diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..c7aa03cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,174 @@ +design +analysi +algorithmscs +design +analysi +algorithm +winter +instructor +richard +anderson +anderson +washington +lectur +seig +offic +hour +mondai +time +appoint +teach +assist +william +chan +wchan +washington +offic +hour +mondai +wednesdai +chateau +confer +room +sieg +floor +cubicl +somebodi +els +us +confer +room +cours +inform +prerequisit +go +assum +alreadi +undergradu +cours +algorithm +wrong +know +soon +possibl +lectur +suggest +readingtextbook +errata +list +project +realli +project +preview +check +outer +anderson +sapplet +assign +handout +written +homework +set +gener +tuesdai +class +background +quiz +post +script +homework +set +homework +solut +homework +solut +homework +solut +homework +solut +homework +solut +homework +homework +solut +homework +homework +midterm +exam +cancel +lack +interest +final +exam +told +mondai +march +probabl +verifi +time +exam +hour +close +book +class +exam +cover +materi +class +exam +willconsist +short +answer +problem +solv +question +bureaucrat +stuffgrad +base +upon +homework +exam +project +class +particip +work +togeth +homework +okai +discuss +homeworkproblem +classmat +must +write +solut +upindepend +gilligan +island +rule +could +invok +betweenani +discuss +homework +write +solut +mustwatch +least +half +hour +gilligan +island +theori +thatan +episod +gilligan +equival +reboot +anyth +thatsurv +learn +understood +anderson +washington +eduwchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..19d72637 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,315 @@ +parallel +algorithmscs +parallel +algorithm +spring +gener +inform +meet +sieg +instructor +richard +anderson +offic +hour +appointment +mail +address +anderson +offic +sieg +homework +exam +catalog +descript +design +analysi +parallel +algorithm +fundament +parallel +algorithmsfor +sort +arithmet +matrix +graph +problem +addit +select +topic +emphasi +gener +techniqu +approach +us +developingfast +effici +parallel +algorithm +limit +theirefficaci +prerequisit +equival +major +homework +assign +note +syllabu +homework +thursdai +april +homework +plu +rambl +commentsabout +cours +thursdai +april +lectur +transpar +april +code +analysisfor +list +rank +lectur +note +connect +compon +algorithmi +simpler +correct +section +latex +version +pointer +paper +pointer +referencesfor +erew +crew +connect +ullman +yannakaki +paper +homework +tuesdai +union +find +paper +homework +thursdai +certifi +write +paper +impli +exist +effici +consensu +algorithm +base +upon +swap +although +likelysometh +go +insid +next +supercomput +homework +thursdai +asynchron +refer +martel +foc +buss +manuscript +note +memori +model +real +descript +special +topic +cours +content +whim +instructor +descript +titl +year +cours +would +theori +share +memori +parallel +comput +mayb +topic +theori +smpc +cours +start +collect +basic +algorithm +spend +time +model +comput +syllabu +give +list +topic +could +cover +term +share +memori +indic +lookingat +topic +pertain +specif +interconnect +topolog +wewil +consid +situat +cost +memori +access +isnon +uniform +cours +theori +cours +sens +notconsid +particular +real +machin +prove +theorem +andyou +expect +parallel +machin +howev +topic +motiv +practic +consider +goal +indevelop +parallel +algorithm +come +algorithmswhich +could +conceiv +effici +parallel +machin +expect +three +four +problem +set +contain +routin +challeng +problem +goingto +requir +project +happi +student +outsidework +cours +relat +topic +text +cours +introduct +parallelalgorithm +nice +book +although +befollow +close +feel +exception +cheap +youcould +probabl +without +purchas +copi +origin +plan +volunt +teach +cours +year +textwould +theori +share +memori +parallel +comput +anderson +howev +book +progress +fast +volum +artof +comput +program +chose +book +instead +go +quit +flexibl +cours +taught +mychoic +topic +influenc +consid +interestingor +uninterest +also +choic +teach +cours +aseith +tradit +lectur +cours +work +researchcont +number +open +problem +mind +could +turninto +nice +research +result +could +present +half +bake +ideason +provid +other +interest +andenergi +think +anderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..717576eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,34 @@ +home +page +automata +comput +complex +page +move +current +quarter +autumn +autumn +portion +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accuratelyquot +duli +credit +copyright +depart +comput +scienc +engin +univers +ofwashington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..13a6b9e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,25 @@ +home +page +fall +automataautumn +instructor +paul +beam +welcom +home +page +world +wide +short +hypermedia +documentfor +exam +quiz +postscript +quiz +postscript +final +postscript +latex +beam +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..3d25be69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,150 @@ +fall +automata +comput +complex +larri +ruzzo +fall +tuth +sieg +staffnameemailphoneoffic +hour +instructor +larri +ruzzo +ruzzo +sieg +nitin +sharma +nitin +csmw +sieg +class +mail +last +updat +messag +sent +class +mail +list +washington +textbook +errata +handout +administrivia +homework +midterm +latex +sourc +cours +organ +syllabu +collabor +midterm +acrobat +cours +organ +syllabu +collabor +midterm +postscript +cours +organ +syllabu +collabor +midterm +file +format +thecours +materi +provid +three +format +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +adob +acrobat +latest +greatest +free +viewer +avail +depart +unix +system +acroread +perhap +aavail +adob +acrobat +page +postscript +ghostview +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +cours +web +autumn +autumn +portion +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accuratelyquot +duli +credit +copyright +depart +comput +scienc +engin +univers +ofwashington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..63710625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,142 @@ +topic +complex +autumn +proposit +theorem +prove +satisfi +test +proof +complex +gener +inform +instructor +paul +beam +meet +time +tuesdai +thursdai +loew +autom +theorem +prove +comput +aid +verif +vlsi +andsoftwar +engin +give +algorithm +attempt +decid +truthof +logic +statement +proposit +first +higher +order +logic +cours +concentr +complex +issu +proposit +casea +well +flip +side +satisfi +test +even +us +oftheorem +prove +first +order +higher +order +logic +often +involv +finitedomain +proof +interpret +proposit +logic +anywai +consid +varieti +system +proposit +theoremprov +satisfi +test +issu +system +complex +proof +within +system +good +choic +search +strategi +consider +theoret +practic +work +thesequest +concentr +theoret +issu +proof +complexityand +rel +complex +search +strategi +also +examin +anumb +implement +proposit +logic +algorithm +compar +theoryand +practic +paper +thing +urquhart +complex +proof +survei +talk +slide +instal +softwar +amus +instal +theorem +prover +sato +andboy +moor +well +satisfi +tester +gsat +june +thedirectori +cours +proversther +paper +scatter +well +process +ofinstal +theorem +prover diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..0bd2a0dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,41 @@ +home +pagecs +comput +systemperform +modelingspr +host +lazowskaandmaryvernonwelcom +home +page +comput +system +performancemodel +meet +mondai +wednesdai +fridai +loew +hall +offic +hourstent +topic +schedulecom +goingsassignmentsproject +informationmap +queue +network +solut +packag +emailoth +inform +avail +sigmetr +confer +measur +model +computersystemsuw +depart +comput +scienc +engineeringlazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..43cc65eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,104 @@ +home +page +comput +system +architecturewint +instructorsusan +egger +egger +washington +sieg +offic +hour +tuth +tajoshua +redston +redston +washington +sieg +offic +hour +sieg +cours +inform +cours +overview +postscript +schedul +continu +updat +postscript +lectur +note +problem +set +previou +test +architectur +histori +postscript +specmark +rate +postscript +inform +tool +shade +instuct +simul +sparc +atom +system +build +analysi +tool +alpha +tullsen +simul +execut +driven +instruct +level +simul +simul +superscalar +architectur +close +etch +binari +rewrit +analyz +pentium +code +alpha +hardwar +monitor +multiflow +compil +alpha +pixi +user +manual +postscript +dinero +uniprocessor +cach +simul +local +machin +alpha +pentium +powerpc +sparc +applic +multiprocessor +uniprocessor +spec +benchmark +neat +page +info +center +info +current +futur +processor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..b6a83ebd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,62 @@ +home +pagecs +oper +system +instructor +hank +levi +spring +freder +pighin +meet +time +instructor +offic +hour +offic +hour +chateau +confer +room +number +unit +welcom +home +page +world +wide +short +hypermedia +document +forcs +contain +inform +class +keep +mind +thisdocu +static +inform +especi +classmessag +ad +frequent +problem +thisdocu +send +mail +pighin +announc +april +first +assign +readi +iti +april +cours +inform +cours +mail +assign +projectlevi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..fa62415f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,108 @@ +home +pagecs +comput +graphicsautumn +quarter +welcom +home +page +world +wide +hypermedia +document +whichcontain +wealth +inform +class +keep +mind +thatthi +document +static +inform +addedfrequ +problem +document +send +mail +deros +click +help +avail +inform +professor +cours +syllabu +lectur +note +written +homework +assign +solut +last +year +project +handout +project +grade +polici +test +cool +imag +last +year +addit +inform +get +class +instruct +us +indi +mvi +home +page +visitor +room +schedul +comput +scienc +engin +depart +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +mosaic +help +help +avail +follow +topic +basic +inform +mosaic +inform +hypertext +markup +languag +html +uniform +resourc +locat +read +home +usinglynx +charact +base +browser diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..ad397d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,176 @@ +home +page +principl +digit +system +design +carl +ebel +fall +welcom +home +page +cours +inform +time +place +loew +import +announc +summari +syllabu +text +book +staff +carl +ebel +instructor +ebel +offic +hour +wednesdai +fridai +sieg +paul +franklin +paul +offic +hour +mondai +thursdai +sieg +hine +hineskj +offic +hour +tuesdai +wednesdai +fridai +sieg +larri +mcmurchi +research +staff +tool +guru +larri +document +simul +synthesi +design +pamett +board +mostli +complet +still +construct +student +work +groupsfin +exam +review +topic +cover +quarter +homework +assign +note +homework +homework +fridai +begin +class +homework +hand +class +begin +class +homework +mondai +begin +class +homework +wednesdai +begin +class +homework +fridai +begin +class +homework +mondai +begin +class +homework +fridai +begin +class +handout +combin +logic +combin +logic +sequenti +logic +sequenti +logic +fpga +fpga +memori +commun +inform +depart +comput +scienc +engin +home +page +mother +site +list +vlsi +link +comprehensivelist +icmanufactur +murphi +recent +dilbert +comic +nation +semiconductor +data +sheet +motorola +data +book +server +philip +semiconduct +data +book +server +micron +technolog +data +sheet +copyright +depart +comput +scienc +engin +univers +washington +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +ebel +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..5d196f51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,56 @@ +index +pagecs +artifici +intelligencefal +quarter +artifici +intellig +pose +fundament +andchalleng +question +comput +scienc +build +intelligentmachin +cours +address +question +provid +anin +depth +introduct +select +topic +includ +agentarchitectur +knowledg +represent +search +plan +machinelearn +reason +uncertainti +methodolog +staff +weldweld +sieg +hour +marc +friedmanfriedman +sieg +hour +nick +kushmericknick +sieg +hour +outlin +topicsread +assignmentsassign +examsgradingresourcesth +class +mailinglist +also +archiv +past +messag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..4f775c87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,141 @@ +uncertainti +decis +make +uncertainti +decis +make +artifici +intellig +winter +professor +steve +hank +hank +offic +sieg +offic +hour +whenev +around +appoint +email +address +mail +washington +goe +class +member +send +mail +request +washington +list +read +materi +pearl +probabilist +reason +intellig +systemsthi +requir +text +class +read +sever +chapter +probabl +without +bui +strappedfor +cash +though +nice +refer +book +shafer +pearl +read +uncertain +reasoningthi +nice +collect +foundat +paper +reason +uncertainti +read +sever +select +copi +avail +grail +librari +jayn +probabl +theori +logic +scienc +fragmentari +edit +juli +extrem +interest +technic +histor +look +foundationsof +probabl +theori +statist +decis +theori +definit +worth +look +refer +list +histor +perspect +alon +math +heavi +go +place +beautifulli +written +neapolitan +probabilist +reason +expert +system +theori +algorithmsa +signific +overlap +pearl +book +good +secondari +sourc +inform +graphic +model +propagationalgorithm +avail +math +research +librari +paper +arrang +cours +summari +summari +topic +cover +read +html +postscript +hank +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..88f7aa10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,425 @@ +home +pagecs +imag +understandingwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +especi +class +messag +ad +frequent +problem +document +send +mail +mock +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +assign +first +assign +read +chapter +cours +note +doexercis +second +assign +wednesdai +april +read +chapter +cours +note +exercis +next +determin +conveni +torun +khoro +cantata +run +sun +aslillith +edit +local +workstat +login +file +containxhost +lilliththen +cshrc +file +sun +follow +setenv +khoros_hom +local +khoro +setenv +manpath +local +khoro +path +khoros_hom +path +rlogin +onto +lillith +rhost +assignmentsand +displai +environ +variabl +appropri +typecantata +unix +prompt +machin +georg +haskhoro +cantata +instal +also +cours +home +page +wwwhttp +washington +educ +cours +index +htmland +follow +link +khoro +cantata +tutori +itscours +outlin +experi +least +first +twotop +imag +inform +spatial +resolut +ideal +take +tutori +read +pagesand +experi +khoro +anoth +window +noth +turn +part +assign +third +assign +read +articl +huerta +andnevatia +cvpr +proceed +also +tolook +articl +wolff +fourth +assign +mondai +april +assign +make +comparison +three +imag +process +softwar +environ +khoro +msvc +fast +oper +level +learn +effort +requir +part +announc +final +examin +mondai +june +pmin +regular +class +meet +room +exam +cover +combinationof +midterm +post +midterm +materi +list +topic +studi +remind +class +approv +sundai +june +time +final +exam +review +meet +plan +meet +insieg +outlin +select +class +period +avail +fridai +april +mondai +wednesdai +mondai +fridai +mondai +wednesdai +fridai +mondai +wednesdai +copi +overhead +transpar +lectur +onneur +net +avail +engin +librari +copi +center +floor +packet +number +trainabl +classifi +fridai +student +permit +temporari +copi +ofmatlab +cours +requirethat +fill +form +sign +contract +know +interest +term +project +import +part +ofth +cours +start +week +april +correct +cours +note +fridai +april +introduc +pentium +laboratori +includingth +msvc +softwar +develop +environ +evan +mclain +documentexplain +transform +imag +applic +current +statu +khoro +instal +sun +contain +recent +inform +get +start +withkhoro +accompani +cours +pleas +read +class +fridai +march +mondai +april +student +alreadi +comput +account +accesskhoro +cantata +contact +rene +reed +onthursdai +fridai +make +arrang +pick +youraccount +login +name +password +itov +weekend +earli +next +week +rene +hour +arelimit +plan +ahead +email +address +reed +andsh +part +sieg +back +offic +sieg +kept +lock +either +need +knock +orhav +prior +arrang +meet +mani +account +applic +card +sign +willhav +take +care +next +week +select +lectur +slide +mondai +march +wednesdai +march +inform +resourc +imag +understand +onlin +intro +imag +process +khoro +cantata +delft +univ +pattern +recognit +inform +page +comput +vision +home +page +store +inform +home +page +thedepart +comput +scienc +engin +onlin +version +comput +scienc +undergradu +brochur +onlin +version +comput +engin +undergradu +brochuremosa +help +run +mosaic +find +help +itemsund +balloon +help +menu +macmosa +home +page +itemund +navig +menu +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..7fee1043 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,257 @@ +home +pagecs +parallel +comput +imag +processingwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +ad +time +time +schedul +informationon +octob +novemb +class +begin +half +hourearli +normal +start +dai +novemb +meet +guest +speaker +prof +nian +simon +fraser +univers +burnabi +canada +tuesdai +novemb +class +novemb +bharath +modayur +guest +speaker +titl +present +effici +parallel +object +recognit +simd +mimd +machin +tuesdai +novemb +class +begin +topic +complet +discuss +pyramid +algorithm +scale +invariantoper +algorithm +segment +hierarchicalrelax +us +isodata +approach +burt +hong +rosenfeld +introduct +embed +virtual +process +overview +neural +network +architectur +algorithm +tuesdai +novemb +class +begin +topic +complet +overview +neural +network +architectur +embed +neural +network +mesh +pyramid +brief +treatment +icon +symbol +comput +thursdai +novemb +class +begin +topic +parallel +imag +analysi +digit +librari +demo +schedul +find +term +project +topicsdur +week +octob +student +activelyexplor +topic +term +project +written +descript +topic +hand +inon +tuesdai +octob +templat +writeupsi +avail +resourcespvm +parallel +virtualmachin +softwar +layear +permit +user +program +aviru +machin +made +heterogen +collect +moreworkst +conveni +implement +studydistribut +algorithm +intel +technicalpubl +includ +document +intel +paragonparallel +comput +system +languag +good +languag +implement +arrai +orient +algorithm +intel +paragon +variousvendor +supercomput +parallel +machin +info +onth +maspar +nation +supercomput +center +sweden +onlin +inform +maspar +theunivers +tennesse +resourc +found +neal +friedman +report +also +paragon +document +error +correctionsto +cours +note +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +term +project +import +part +ofth +cours +start +week +octob +review +session +final +schedul +fridai +decemb +sieg +hall +final +exam +schedul +wednesdai +decemb +normal +class +meet +room +exam +close +book +term +project +thursdai +decemb +last +updat +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..a01a5a29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,91 @@ +graphic +seminarc +rspring +numer +method +graphic +april +matrix +comput +intro +definit +properti +invers +brad +solv +linear +system +eric +april +matrix +comput +eigenvalu +eigenvector +singular +valu +decomposit +joel +april +root +find +nonlinear +equat +corei +shuichi +april +optim +intro +unconstrain +optim +kari +constrain +global +optim +kevin +linear +quadrat +program +linear +program +chuck +ronen +linear +quadrat +program +exampl +daniel +data +fit +intro +conclus +mike +linear +regress +calibr +exampl +brad +ordinari +differenti +equat +intro +method +paper +adam +method +paper +joanna +discret +method +finit +element +radios +fred +pde +finit +differ +interv +arithmet +troi +jonathan +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..f0869049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,56 @@ +quarterscs +special +topicssteven +tanimoto +instructorcs +autumn +transcript +base +educ +winter +mathemat +experi +imag +process +spring +mathemat +experi +imag +process +autumn +technolog +collabor +learn +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +graduat +seminar +explor +varieti +topic +relat +useof +comput +educ +specif +topic +activ +varyfrom +quarter +quarter +last +updat +septemb +tanimoto +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..015c09bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,225 @@ +home +page +autumn +autumn +transcript +base +educ +wwwwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +ad +time +time +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +read +octob +mccalla +central +importanceof +student +model +intellig +tutor +read +octob +present +sandi +youngquist +meet +octob +discuss +paul +barton +davi +aboutinternet +servic +read +octob +labord +labord +problem +solv +geometri +microworld +tointellig +comput +environ +present +tessa +read +novemb +bartel +promot +mathematicsconnect +concept +map +plu +present +gari +anderson +meet +novemb +read +novemb +read +meet +onlin +first +paper +combin +degre +vision +littl +degre +technolog +noth +particularli +ambiti +descript +state +second +paper +technic +piec +promot +thethem +learner +take +respons +educ +someth +increasingli +import +futur +choic +third +read +paper +moresophist +either +first +paper +beyond +brows +elabor +possibl +group +annot +ofwww +materi +paper +toolkit +describ +intechn +term +layer +internet +infrastructur +couldmak +possibl +smart +distribut +tutori +applicationsthat +mosaic +netscap +achiev +pleas +read +either +option +advanc +educ +us +world +wide +webhttp +proceed +paper +paper +html +presentor +jeremi +baer +empow +student +inform +agehttp +ncsa +uiuc +proceed +educ +ward +ward +html +presentor +marla +baker +either +beyond +brows +share +comment +soap +trail +line +communitieshttp +proceed +paper +html +presentor +john +dietz +toolkit +enhanc +protocol +lower +layer +serviceshttp +proceed +paper +dcewebkit +html +presentor +adam +carlson +presentor +paper +concept +map +hong +zhumeet +novemb +discuss +michael +aboutcurriculum +navig +last +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..8ba6a86d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,95 @@ +home +page +autumn +autumn +technolog +collabor +learningwelcom +home +page +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +gener +descript +autumn +comput +technolog +internet +methodologiesfor +teach +learn +current +come +togeth +innew +wai +seminar +explor +read +number +paper +technolog +forcollabor +learn +particip +student +willtak +respons +make +present +group +ofthes +paper +cover +subset +paper +also +explor +possibl +applic +ofai +visual +techniqu +analysi +evid +ofstud +learn +onlin +context +meet +current +schedul +tuesdai +howev +decid +move +time +better +intopeopl +schedul +visit +meani +middl +school +washington +middl +schoolmai +schedul +depend +interest +participatingstud +last +updat +septemb +tanimoto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..0a68c260 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,353 @@ +mvmv +global +resourc +manag +distribut +systemsprofessor +mari +vernontim +pmlocat +processor +alloc +gang +schedul +now +ousterhout +schedul +techniqu +concurr +system +inrd +conf +distribut +system +page +arpaci +dusseau +vahdat +anderson +patterson +interact +parallel +andsequenti +workload +network +workstat +proc +ofth +sigmetr +conf +processor +alloc +dynam +equi +partit +nguyen +tucker +gupta +process +control +schedul +issuesfor +multiprogram +share +memori +multiprocessor +proc +symp +oper +system +principl +page +nguyen +vaswani +zahorjan +us +runtim +measur +workloadcharacterist +parallel +processor +schedul +univ +ofwashington +technic +report +tutori +applic +processor +shun +leung +evangelo +markato +thoma +leblanc +us +processor +affin +loopschedul +share +memori +multiprocessor +proc +supercomput +expand +version +iniee +tran +parallel +distribut +system +han +zima +barbara +mari +chapman +compil +distribut +memori +system +proc +ieee +edjlali +agraw +sussman +saltz +data +parallelprogram +adapt +environ +proc +parallel +process +symp +santa +barbara +april +tutori +processor +alloc +polici +comparisonsshikharesh +majumdar +derek +eager +richard +bunt +schedul +multiprogram +parallel +system +proc +sigmetr +confer +measur +model +ofcomput +system +santa +eric +parson +kenneth +sevcik +multiprocessor +schedul +high +variabilityservic +time +distribut +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +dror +feitelson +bill +nitzberg +characterist +product +parallel +scientif +workload +thenasa +am +ipsc +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +follow +also +cover +requir +read +leutenegg +vernon +perform +multiprogram +multiprocessor +schedul +polici +proc +sigmetr +conf +measur +model +ofcomput +system +mccann +vaswani +zahorjan +dynam +processor +alloc +polici +multiprogram +share +memorymultiprocessor +transact +comput +system +processor +alloc +demand +base +schedul +patrick +sobalvarro +william +weihl +demand +base +coschedul +ofparallel +job +multiprogram +multiprocessor +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +impact +page +page +migrat +burger +hyder +miller +wood +page +tradeoff +distribut +share +memorymultiprocessor +proc +supercomput +rohit +chandra +scott +devin +verghes +anoop +gupta +mendel +rosenblum +schedul +page +migrat +multiprocessorcomput +server +proc +conf +architectur +support +programminglanguag +oper +system +asplo +jose +coordin +schedul +processor +memori +alverson +kahan +korri +mccann +smith +schedul +tera +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +eric +parson +kenneth +sevcik +coordin +alloc +memori +processor +multiprocessor +octob +demand +base +schedul +discuss +open +problem +dusseau +arpaci +culler +effectivedistribut +schedul +parallel +workload +proc +sigmetr +conf +measur +model +computersystem +philadelphia +june +appear +feitelson +rudolph +coschedul +base +runtim +identif +activ +work +set +parallel +program +theoret +result +processor +memori +alloc +karlin +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..1791a5f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,91 @@ +winter +winter +algorithm +molecular +biologi +richard +karp +larri +ruzzo +martin +tompaclass +bboard +last +updat +handout +administr +lectur +note +draft +homework +html +syllabu +schedul +acrobat +titl +syllabu +schedul +postscript +titl +syllabu +schedul +slide +file +format +cours +materi +provid +sever +format +html +usual +format +load +fast +usual +readabl +mani +part +gener +automat +translat +latex +translat +faith +format +origin +adob +acrobat +latest +greatest +free +viewer +adob +acrobat +page +postscript +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +ruzzo +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..b2a0d514 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,236 @@ +architectur +lunchcs +architectur +lunchcours +organ +jean +loupbaermeet +time +tuesdai +cseg +architectur +lunch +continu +quarter +withalmost +format +previou +year +select +paper +discussedat +begin +quarter +distribut +week +ofth +paper +tobe +read +week +discuss +week +might +formal +present +work +progress +byesteem +member +lunch +mostlyw +hopefulli +heat +discussionson +paper +literatur +differ +quarter +previou +quartersi +start +read +posit +paper +fromparticip +recent +workshop +oncrit +issu +comput +architectur +research +copi +hereread +posit +paper +lead +morethem +paper +read +quarter +mani +thank +ruth +anderson +molli +brown +gershoni +matthai +philipos +tabular +summari +ofth +guru +posit +found +herefor +usual +format +thestud +lead +discuss +paper +either +informallyor +slide +credit +cours +variabl +credit +ifyou +present +read +first +meet +organ +meet +tuesdai +octob +tuesdai +read +valu +local +load +valu +predict +lipasti +wilkerson +shen +asplo +asplo +paper +line +follow +link +asplo +advanceprogrami +short +bibliographi +processor +memori +line +appreci +volunt +thesaulsburi +burger +machin +paper +tuesdai +readashlei +saulsburi +fong +pong +andrea +nowatzyk +miss +memori +wall +case +processor +memori +integr +isca +tuesdai +readm +fillo +keckler +dalli +machin +multicomput +micro +avail +follow +machinelink +tuesdai +readdoug +burger +stefano +kaxira +jame +goodman +datascalar +architectur +spsd +execut +model +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +avail +neton +tuesdai +read +intellig +iram +chip +rememb +comput +patterson +anderson +cardwel +fromm +keeton +kozyraki +thomasand +yelick +paper +availableher +fortun +author +prof +anderson +present +paper +subscrib +mail +list +send +email +themajordomo +mail +list +majordomo +mail +content +shouldinclud +line +subscrib +cseg +leav +subject +lineblank +shortli +receiv +messag +back +sai +welcom +baer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..2b92a5d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +home +pagecs +home +page +spring +offer +experiment +graduat +cours +human +comput +interact +born +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..de9cb450 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,151 @@ +compil +seminarcs +compil +seminarcours +organ +susan +eggersand +craigchambersmeet +time +wednesdai +offici +loew +butreal +meet +second +floor +atrium +scheduleweek +memspi +analyz +memori +system +bottleneck +program +margaretmartonosi +anoop +gupta +thoma +anderson +anderson +week +gener +approach +time +special +applic +charl +consel +francoi +noel +week +practic +data +flow +framework +arrai +refer +analysi +itsus +optim +evelyn +duesterwald +rajiv +gupta +maryl +soffa +week +valu +depend +graph +represent +without +taxat +danielweis +roger +crew +michael +ernst +bjarn +steensgaard +litvinov +week +iter +regist +coalesc +georg +andrew +appel +garrett +week +gener +machin +specif +optim +compil +roger +hooverand +kenneth +zadeck +dean +grant +week +paradigm +compil +distribut +memori +multicomput +byprivthviraj +banerje +lewi +week +minimum +cost +interprocedur +regist +alloc +stevenkurland +charl +fischer +secoski +week +data +special +todd +knoblock +erik +grove +week +lazi +strength +reduct +jen +knoop +oliv +ruth +andbernhard +steffen +mock +tullsen +subscrib +mail +list +send +email +majordomo +mail +content +includ +line +subscribecsek +leav +subject +line +blank +shortlyrec +messag +back +sai +welcom +melodi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..3b19b343 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,317 @@ +parallel +program +environmentslarri +snyderautumn +quarter +mondai +loew +welcom +home +page +quarter +read +select +paper +recent +ipp +ppopp +supercomput +icpp +lcpc +ten +schedul +quarter +atmospher +casual +andwil +hopefulli +ignit +live +discuss +everyon +attend +seminar +expect +present +thepap +still +spot +open +hurri +sign +pleas +send +mail +majordomo +subscrib +cseo +bodi +messag +subscribeto +class +mail +list +datepaperpresentor +compil +matlab +program +scalapack +exploit +task +data +parallel +ramaswami +hodg +banerje +ipp +falcon +matlab +interact +restructur +compil +deros +gallivan +gallopoulo +marsolf +padua +lcpc +compil +portabl +messag +driven +program +ramkumar +forb +kale +icpp +sung +cross +loop +reus +analysi +applic +cach +optim +cooper +kennedi +mcintosh +lcpc +ruth +global +commun +analysi +optim +chakarabarti +gupta +choi +pldi +sean +integer +compil +perform +analysi +environ +data +parallel +program +adv +input +output +characterist +scalabl +parallel +applic +crandal +aydt +chien +reed +jason +holidai +stream +librari +complex +distribut +data +structur +gotwal +sriniva +gannon +ppopp +brad +model +compil +strategi +core +data +parallel +program +bordawekar +choudahari +kennedi +koelbel +paleczni +ppopp +local +iter +comput +block +cyclic +distribut +midkiff +icpp +util +thread +data +parallel +program +fahring +hain +mehrotra +eric +cilk +effici +multithread +runtim +system +blumof +joerg +kuszmaul +leiserson +randal +zhou +ppopp +compil +gener +parallel +code +object +orient +mathemat +model +andersson +fritzson +ppopp +analysi +cross +loop +reus +analysi +applic +cach +optim +cooper +kennedi +mcintosh +lcpc +commun +optim +global +commun +analysi +optim +chakarabarti +gupta +choi +pldi +gener +realign +base +commun +program +kamachi +kusano +suehiro +tamura +sakon +ipp +commun +optim +parallel +comput +us +data +access +inform +rinard +tool +integer +compil +perform +analysi +environ +data +parallel +program +adv +rel +debug +applic +develop +larg +numer +model +abramson +foster +michalak +sosic +parallel +model +compil +strategi +core +data +parallel +program +bordawekar +choudahari +kennedi +koelbel +paleczni +ppopp +input +output +characterist +scalabl +parallel +applic +crandal +aydt +chien +reed +data +distribut +local +iter +comput +block +cyclic +distribut +midkiff +icpp +potpourri +compil +matlab +program +scalapack +exploit +task +data +parallel +ramaswami +hodg +banerje +ipp +util +thread +data +parallel +program +fahring +hain +mehrotra +sung +choi +last +modifi +tuesdai +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..f12efd0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,33 @@ +system +seminar +preliminariesif +alreadi +system +mail +list +need +variou +crucial +bit +ofinform +week +seminar +cancel +besent +list +send +mail +system +request +line +subscrib +systemsin +messag +bodi +quarterli +web +spring +summer +autumn +winter +autumn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..bc30aee2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,89 @@ +summer +quarterw +meet +fridai +loew +quarter +wewil +read +final +paper +appear +upcomingacm +symposium +oper +system +principl +sosp +pleas +read +paper +meet +havean +interact +discuss +quarter +scheduleoct +implement +global +memori +manag +workstat +cluster +present +feelei +log +virtual +memori +present +savag +autoraid +hierarch +storag +system +present +wilk +serverless +network +file +system +present +franklin +montgomeri +tiwari +hypervisor +base +fault +toler +present +chan +philipos +wolman +exploit +weak +connect +mobil +file +access +present +voelker +litvinov +perform +cach +coher +stackabl +file +present +sriram +fiuczynski +impact +architectur +trend +oper +system +perform +present +anderson +romer +return +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..6ea61383 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,219 @@ +high +perform +scientif +comput +zphigh +perform +scientif +comput +zpllarri +snyder +teamautumn +quarter +wednesdai +sieg +loew +dai +welcom +home +page +pleas +send +mail +majordomo +subscrib +csezpl +bodi +messag +subscribeto +class +mail +list +student +also +interest +join +usersmail +list +mail +list +us +distribut +informationabout +compil +librarai +releas +relatedinform +ad +mail +list +send +mail +majordomo +subscrib +user +bodi +messag +descriptionzpl +scientificprogram +languag +suitabl +comput +previous +written +infortran +arrai +languag +dramaticallysimplifi +program +elimin +nuisanc +loop +index +run +fast +modern +machin +includ +parallel +supercomput +allow +programm +develop +code +workstat +andtrivi +migrat +largest +parallel +machin +simpli +byrecompil +develop +releas +toth +scientif +comput +commun +class +design +scientist +engin +comput +scientist +want +learn +modern +languag +supercomput +effect +scientif +comput +class +cover +follow +topic +state +high +perform +comput +syntax +semant +algorithm +exploit +high +perform +parallel +machin +wysiwyg +perform +write +fast +program +easili +develop +program +workstat +supercomput +well +program +perform +scienc +faster +program +prototyp +scientif +comput +matlab +text +booknon +class +reli +materi +document +found +onin +page +specif +follow +close +zplprogram +guid +version +prerequisitesfamiliar +scientif +comput +fortran +ormatlab +program +unix +platform +assum +class +variabl +credit +audit +student +write +debug +program +select +technic +disciplin +suitabl +comput +rang +whole +applic +kernel +inner +loop +scientif +comput +us +informationcours +syllabu +includ +lectur +note +appli +ncsa +block +grant +account +faculti +staff +student +compil +program +us +remotezpl +compileroth +import +link +sung +choi +last +modifi +wednesdai +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..cc49a04f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,185 @@ +home +page +autumn +introduct +digit +design +autumn +quarter +gaetano +borriello +corei +andersonwelcom +home +page +home +page +contain +whole +bunch +us +inform +class +keep +mind +document +static +inform +especi +class +announc +messag +ad +frequent +problem +document +gener +send +mail +tocs +webmast +class +announc +notic +instructor +system +administr +last +updat +class +mail +archiv +messagess +washington +last +updat +send +mail +class +instructor +instructor +everyon +cours +administr +goal +syllabu +meet +time +lectur +final +exam +mondai +decemb +workload +grade +expect +laboratori +softwar +tool +polici +collabor +cheat +announc +mail +address +overal +schedul +lectur +topic +instructor +gaetano +borriello +gaetano +offic +hour +sieg +corei +anderson +corin +offic +hour +sieg +aweekli +assign +weekli +quizz +final +exam +lectur +onlin +version +slide +us +lectur +textbook +contemporari +logic +design +katz +benjamin +cum +addison +weslei +maintain +author +katz +maintain +publish +benjamin +cum +addison +weslei +note +topic +interest +evolut +implement +technolog +comput +aid +design +tool +logic +design +synario +feedback +tell +think +thing +go +even +anonym +desir +question +cours +evalu +complet +last +class +link +previou +quarter +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +copyright +depart +comput +scienc +engin +univers +washington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..51be713f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,41 @@ +comput +societycs +comput +societywelcom +home +page +comput +societi +cours +wintercs +comput +societi +focu +social +econom +ethic +legal +implic +present +internet +futur +nation +andglob +inform +highwai +instructor +alan +born +class +time +tue +thur +sieg +cours +syllabusclass +schedulelink +relev +sitesbook +journal +avail +referenceassignmentsassign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..bf348fd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,56 @@ +index +pagecs +artifici +intelligencefal +quarter +artifici +intellig +pose +fundament +andchalleng +question +comput +scienc +build +intelligentmachin +cours +address +question +provid +anin +depth +introduct +select +topic +includ +agentarchitectur +knowledg +represent +search +plan +machinelearn +reason +uncertainti +methodolog +staff +weldweld +sieg +hour +marc +friedmanfriedman +sieg +hour +nick +kushmericknick +sieg +hour +outlin +topicsprojectread +assignmentsassign +examsgradingresourcesth +class +mailinglist +also +archiv +past +messag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..5d62e099 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,191 @@ +ics +intellig +inform +internet +server +meet +tuesdai +siegcreat +server +side +scriptspleas +read +guidelin +towrit +program +execut +someon +follow +link +tothem +peopl +place +collect +mail +list +gener +paul +program +check +futur +begun +updat +file +rememb +want +check +first +us +index +html +check +back +index +html +ad +phoenix +impress +older +topic +offici +releas +glimps +instal +check +manual +page +well +glimps +develop +home +page +interest +detail +glimps +work +read +winter +usenix +paper +design +implement +glimps +inform +wide +area +inform +server +wai +zwhere +mosiac +interfac +zephyr +locat +databas +show +user +current +regist +zephyr +make +guess +room +show +regist +zephyr +server +anoth +version +znol +zwatch +zlocat +extra +info +link +except +show +statu +regist +zephyr +user +on +anyon +file +note +lectur +discuss +mail +sent +mail +list +displai +belief +index +page +short +mike +releg +review +site +comment +make +good +page +anoth +page +miscellan +comment +rather +rambl +kurt +grumbl +interfac +problem +improv +mosaic +bring +class +paul +provid +luddit +perspect +idea +intellig +filter +network +inform +sourc +nick +provid +vagu +relat +comment +decemb +cacm +inform +filter +check +summari +articl +rememb +want +chang +document +check +itout +first +us +index +html +check +back +withci +index +html +send +mail +theentir +class +us +address +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..37bff6ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,37 @@ +home +page +machin +organ +program +credit +introduct +current +system +structur +control +commun +memori +processor +devic +project +involv +detail +studi +specif +small +computerhardwar +softwar +system +prerequisit +consent +instructor +open +student +taken +open +freshmen +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..d252f66a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,31 @@ +page +introduct +comput +architectur +credit +design +comput +system +compon +processor +design +instruct +design +address +control +structur +microprogram +memori +manag +cach +memori +hierarchi +interrupt +structur +prerequisit +andc +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..bee89134 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,30 @@ +home +page +advanc +comput +architectur +credit +advanc +techniqu +comput +design +parallel +process +andpipelin +multiprocessor +multi +comput +network +high +performancemachin +special +purpos +processor +data +flow +architectur +prerequisit +semesterli +cours +inform +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..c4bca9a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,33 @@ +home +page +advanc +comput +architectur +credit +parallel +algorithm +principl +parallel +detect +vectorizingcompil +interconnect +network +simd +mimd +machin +processorsynchron +data +coher +multi +dataflow +machin +special +purposeprocessor +prerequisit +consent +instructor +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..de983b10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,446 @@ +introduct +oper +system +spring +univers +wisconsin +madisoncomput +scienc +departmentc +spring +bart +millerc +introduct +oper +systemsnew +stufffin +grade +post +problem +set +avail +program +assign +avail +quizz +answer +avail +section +lectur +note +readi +read +print +class +staffinstructor +bart +milleremail +bart +wisc +eduoffic +csphone +offic +hour +wednesdai +fridai +noonor +appoint +karuna +muthiahemail +muthiah +wisc +eduoffic +csphone +offic +hour +mondai +wednesdai +jonathan +weyersemail +weyer +wisc +eduoffic +csphone +offic +hour +mondai +fridai +wednesdai +cours +materialsth +cours +organ +around +lectur +note +thelectur +notesar +avail +class +page +need +textbookmodern +oper +system +tanenbaum +programmingassign +purchas +copi +ofobject +orient +program +us +pohl +whatev +favorit +book +lectur +note +avail +read +first +section +come +class +modern +oper +systemsandobject +orient +program +us +avail +book +store +lectur +discuss +sectionslectur +time +tuesdai +thursdai +comput +sciencesdiscuss +section +wednesdai +nolandnot +extra +wednesdai +discuss +section +section +us +mainli +recit +section +discuss +materialcov +lectur +weekli +quizz +occas +us +discuss +import +detail +ofth +program +assign +homework +make +sure +leav +room +schedul +attend +section +exam +quizzesther +midterm +final +option +week +start +second +week +class +quiz +thediscuss +section +quizz +last +minut +follow +past +quizz +answer +process +concurr +februari +semaphor +februari +monitor +februari +messag +februari +schedul +usetrac +activ +real +unix +system +drive +simul +goal +assign +learn +schedul +algorithm +learn +trace +driven +simul +experi +quantit +analyz +comput +system +written +problem +setsdur +semest +hand +severalwritten +problem +set +base +lectur +problem +set +need +turn +though +find +poorli +week +quizz +youdon +problemssolut +set +problem +hand +week +theproblem +happi +answer +question +problem +andlook +solut +problem +avail +goal +assign +learn +us +variou +synchronizationprimit +solv +problem +problem +avail +goal +assign +learn +memori +manag +hardwar +softwar +late +workassign +date +list +handout +entir +semest +havethre +late +daysof +credit +late +dai +differ +assign +eachof +three +assign +three +dai +assign +three +dai +absolut +late +work +accept +late +dai +cannot +us +assignmentthat +last +weekof +class +cheatingprogram +assign +done +partner +group +work +independ +group +cheater +receiv +maximum +penalti +includ +receivingan +grade +cours +mark +transcript +comput +facilitiesw +probabl +us +solari +unix +workstat +cours +unix +workstat +run +solari +oper +system +window +student +regist +class +account +grade +policyif +take +final +take +final +program +assign +program +assign +quizz +quizz +final +final +lowest +quiz +grade +drop +averag +beno +quiz +first +week +week +spring +break +take +final +exam +count +past +taught +class +class +inth +rang +class +scheduleth +follow +schedul +tent +could +probabl +chang +week +januari +introduct +overview +processesweek +januari +februari +dispatch +process +creationweek +februari +cooper +process +synchronizationweek +februari +semaphoresweek +februari +semaphor +monitorsweek +februari +messag +deadlocksweek +march +debug +strategi +dynam +memori +alloc +march +spring +breakweek +march +relocationweek +march +segment +page +tlbsweek +april +virtual +memori +page +replac +thrash +class +thursdai +week +april +work +set +devic +filesweek +april +disk +alloc +schedul +directoriesweek +april +protectionweek +april +secur +advanc +topic +read +week +advanc +topic +read +final +final +exam +tuesdai +last +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..61b1107f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,109 @@ +internet +honor +seminarunivers +wisconsin +madisoncomput +scienc +departmentc +spring +bart +millerc +internet +honor +seminarinstructor +bart +milleremail +bart +wisc +eduoffic +csphone +offic +hour +wednesdai +fridai +noonor +appoint +lectureslectur +time +mondai +comput +sciencesclass +schedulether +written +assign +class +requir +attendal +lectur +particip +discuss +follow +schedul +mostli +right +could +chang +week +januari +bart +miller +introduct +overviewweek +januari +larri +landweb +internet +architectur +protocolsweek +februari +week +februari +bart +miller +client +server +remot +procedur +callsweek +februari +system +securityweek +februari +eric +bach +secur +encryptionweek +march +march +spring +breakweek +march +miron +livni +imag +pictur +netweek +march +week +april +high +perform +file +systemsweek +april +week +april +david +wood +internet +supercomputerweek +april +laru +javaweek +april +week +bart +miller +discussionslast +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..2ff893b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,254 @@ +advanc +oper +system +fall +univers +wisconsin +madisoncomput +scienc +departmentc +fall +bart +millerc +advanc +oper +systemssummarythi +cours +intend +give +broad +exposur +advancedoper +system +topic +read +discuss +topic +protect +secur +memori +manag +oper +system +kernel +file +system +synchron +name +distribut +system +pleas +read +rest +inform +sheet +carefulli +textther +realli +satisfactori +textbook +graduat +level +operatingsystemsclass +current +literatur +text +cours +structur +around +read +journal +articl +andconfer +proceed +abl +purchas +read +doit +handout +class +discuss +topic +relev +current +paper +lectur +detail +detail +review +paper +willinstead +adiscuss +major +topic +theme +us +paper +focal +point +form +read +group +classmat +meetonc +twice +week +discuss +detail +assign +paper +read +especi +import +part +class +read +listaccord +post +read +schedul +formula +success +class +read +papersindepend +discuss +read +group +try +identifyth +import +issu +particip +class +discuss +thepap +class +discussionsclass +meet +form +discuss +lectur +talk +topic +discuss +besupport +comment +opinion +will +particip +activ +daili +class +geta +expect +quietli +listen +week +beveri +unhappi +class +papersdur +class +write +paper +short +page +andon +longer +first +paperwil +design +base +idea +read +work +well +understood +oper +system +facilityand +design +extens +area +second +paper +involv +project +paper +summaryof +project +aselect +project +topicsfrom +choos +write +well +import +write +good +idea +paper +review +least +twice +first +read +refere +paper +fellowstud +give +writer +critic +comment +anoth +person +giveth +reader +look +someon +els +write +paper +revis +second +pass +read +examsther +exam +paper +read +keep +busi +gradesscor +final +grade +post +assign +grade +first +paper +assignmenti +availbl +summari +score +fromth +project +proposalsi +also +availbl +final +cours +gradesar +avail +detailstim +tuesdai +thursdai +place +csoffic +hour +tuesdai +thursdai +noonlast +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..c2869d67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,262 @@ +section +home +pagec +introduct +comput +programmingsect +fortran +credit +cours +cover +basic +program +structur +need +prepar +student +elementari +engin +cours +prior +comput +program +experi +requir +basic +knowledg +comput +assum +materi +cover +enabl +write +simpl +comput +program +solv +engin +problem +elementari +cours +program +done +fortran +cours +intend +student +receiv +littl +program +instruct +high +school +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +click +cours +descript +menu +import +announc +read +lectur +instructor +grade +polici +syllabu +text +lectur +note +program +assign +problem +solv +exercis +comput +pointer +interest +lectur +section +psycholog +march +pleas +punctual +lectur +avoid +disturb +class +instructor +gareth +bestor +offic +comput +scienc +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +solut +class +follow +mondai +exercis +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +subroutin +click +solut +question +function +click +solut +week +question +click +solut +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +gareth +bestor +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +gareth +bestor +bestor +wisc +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..4df8da39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,613 @@ +section +home +pagec +algebra +languag +programmingsect +fortran +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +click +cours +descript +menu +import +announc +read +lectur +instructor +grade +polici +syllabu +text +lectur +note +exam +program +assign +problem +solv +exercis +comput +pointer +interest +lectur +section +psycholog +pleas +punctual +lectur +avoid +disturb +class +instructor +gareth +bestor +offic +comput +scienc +lowest +exam +score +contribut +must +complet +hand +assign +elig +receiv +pass +grade +cours +final +grade +section +grade +curv +mean +rang +curv +comput +final +exam +complet +exam +thur +februari +exam +april +comput +scienc +final +amclick +list +current +class +grade +identif +student +syllabu +tent +follow +topic +section +text +approxim +cover +week +semest +class +read +relev +section +text +come +class +abl +question +class +anyth +unsur +instead +wait +try +work +assign +discov +didn +realli +understand +someth +week +term +exam +week +term +exam +week +final +exam +text +lectur +notestext +fortran +engin +applic +edit +koffman +friedman +lectur +note +copi +lectur +note +avail +line +week +class +import +line +lectur +note +substitut +come +class +cover +show +overhead +projector +includ +exampl +addit +note +board +respons +materi +cover +class +week +week +week +week +week +week +week +week +week +week +week +week +week +week +week +exam +program +assignmentsther +three +exam +term +exam +final +exam +exam +constitut +final +grade +import +well +ensur +good +grade +regardless +perform +assign +exam +approxim +hour +long +though +stai +longer +need +extra +time +close +book +need +bring +pencil +exam +calcul +necessari +even +us +exam +solut +term +exam +term +exam +seven +program +assign +includ +program +contribut +final +grade +assign +must +complet +hand +elig +receiv +pass +grade +cours +must +attempt +complet +everi +program +assign +hand +program +even +compil +without +error +grade +automat +receiv +zero +grade +risk +fail +cours +gradesheet +handin +directori +hand +assign +onlin +late +polici +polici +academ +misconduct +cheat +assign +specif +program +mondai +program +fridai +program +wednesdai +program +wednesdai +program +fridai +program +mondai +program +fridai +pmhow +help +assign +consult +consult +comput +help +problem +wear +name +tag +duti +approxim +consult +answer +short +question +compil +error +messag +program +syntax +well +login +printer +send +mail +netscap +click +inform +consult +instructor +gener +question +assign +question +requir +long +explan +best +answer +pleas +offic +hour +send +mail +normal +offic +except +offic +hour +dissert +research +home +modem +therefor +want +outsid +offic +hour +pleas +make +appoint +first +easili +contact +mail +regularli +login +read +mail +home +click +send +mail +problem +solv +exerciseson +import +skill +learn +class +problem +solv +good +problem +solv +skill +distinguish +good +comput +programm +doesn +matter +familiar +skill +particular +program +languag +understand +solv +problem +abl +write +comput +program +languag +help +learn +problem +solv +skill +techniqu +assign +weekli +problem +solv +exercis +small +trivial +problem +give +mondai +look +problem +think +week +right +step +would +solv +problem +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +fridai +solut +class +exercis +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +subroutin +click +solut +question +function +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +gareth +bestor +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +gareth +bestor +bestor +wisc +last +modifi +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..c5e14118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,86 @@ +fall +section +fall +section +algebra +languag +program +name +dave +egglestonemail +burnett +wisc +eduoffic +offic +phone +offic +hour +announc +updat +note +origin +output +prog +page +error +dai +week +correct +valu +inform +exam +updat +question +ask +program +hourlywork +classread +scan +thursdai +class +program +avail +solut +quiz +grade +page +gener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuswork +homeclass +handout +gradeshomeworkexam +quizzesmiscellan +archivepolici +informationemail +policygrad +policyl +policyacadem +misconduct +policytextproblem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +dave +eggleston +burnett +wisc +base +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..7a8a93f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,180 @@ +spring +advanc +oper +system +spring +summari +cours +intend +give +broad +exposur +advancedoper +system +topic +import +compon +cours +read +discuss +ofvari +research +paper +project +involv +implement +anexperiment +system +research +paper +cover +topicsinclud +synchron +commun +memori +manag +file +system +protect +secur +distribut +system +project +requir +tochoos +problem +research +propos +solut +implement +prototyp +system +lectur +info +class +discuss +topic +relev +current +paper +lectur +detail +review +paper +rathera +discuss +major +topic +theme +us +paper +focal +point +activ +particip +discuss +strongli +encourag +lectur +tuesdai +thursdai +engin +halloffic +hour +tuesdai +appoint +comput +scienc +text +text +select +classic +paper +oper +system +design +implement +purchas +read +doit +formerli +macc +document +deskfor +read +semest +differ +previou +semest +pleas +copi +paper +grade +exam +cours +instead +assign +first +assign +us +benchmark +suit +measur +performanceof +variou +oper +system +suno +solari +linux +window +manya +hand +second +assign +project +involv +project +propos +implement +final +report +project +present +total +grade +class +particip +count +first +assign +count +project +count +schedul +tent +schedul +project +list +suggest +project +make +project +well +either +case +need +come +discuss +choos +project +team +peopl +allow +slide +slide +us +lectur +assig +first +assign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..75c0256a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,87 @@ +introduct +comput +program +comput +scienc +fall +credit +cours +design +cover +basic +programmingstructur +need +prepar +student +elementaryengin +cours +materi +cover +suffici +enableth +student +write +simpl +program +solv +engin +problem +inelementari +cours +materi +essenti +first +half +list +fall +section +lectur +fortran +jeff +lampert +lectur +fortran +jeff +lampert +lectur +toni +silva +lectur +toni +silva +lectur +sidnei +hummert +lectur +sidnei +hummert +lectur +michael +birk +lectur +michael +birk +lectur +sidnei +hummert +lectur +sidnei +hummert +lectur +toni +silva +lectur +toni +silva +lectur +russel +man +lectur +russel +man +lectur +martin +reameslast +modifi +anthoni +silva diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..0f718d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffic +comput +sciencesoffic +hour +mondai +wednesdai +appoint +phone +dept +offic +mail +wisc +teach +assistantsfollow +link +home +page +name +kelli +ratliff +email +kelli +wisc +offic +offic +phone +offic +hour +section +grade +name +nathan +bockrath +email +bockrath +wisc +offic +offic +phone +offic +hour +section +grade +name +rehnuma +rahman +email +rehnuma +wisc +offic +offic +phone +offic +hour +section +grade +name +jaim +fink +email +jfink +wisc +offic +offic +phone +offic +hour +section +grade +name +ashraf +aboulnaga +email +ashraf +wisc +offic +offic +phone +offic +hour +section +grade +name +andrew +geeri +email +geeri +wisc +offic +offic +phone +offic +hour +section +grade +name +jame +herro +email +jherro +wisc +offic +offic +phone +offic +hour +section +grade +name +abhinav +gupta +email +agupta +wisc +offic +offic +phone +offic +hour +section +grade +name +jyothi +krothap +email +jyothi +wisc +offic +offic +phone +offic +hour +section +grade +name +chiang +email +suhui +wisc +offic +offic +phone +offic +hour +section +grade +name +thano +tsioli +email +tsioli +wisc +offic +offic +phone +offic +hour +section +gradesexplor +compani +whose +softwar +hardwar +borland +hewlett +packard +intel +microsoft +novel +us +link +explor +lyco +enorm +databas +site +yahoo +internet +resourc +classifi +categori +lookup +search +virtual +tourist +find +site +around +world +click +world +mother +larg +alphabet +list +site +cool +list +especi +excel +site +univers +wisconsin +madison +home +page +page +origin +creat +maintain +teitelbaum +thano +tsioli +modifi +maintain +kelli +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..0f718d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffic +comput +sciencesoffic +hour +mondai +wednesdai +appoint +phone +dept +offic +mail +wisc +teach +assistantsfollow +link +home +page +name +kelli +ratliff +email +kelli +wisc +offic +offic +phone +offic +hour +section +grade +name +nathan +bockrath +email +bockrath +wisc +offic +offic +phone +offic +hour +section +grade +name +rehnuma +rahman +email +rehnuma +wisc +offic +offic +phone +offic +hour +section +grade +name +jaim +fink +email +jfink +wisc +offic +offic +phone +offic +hour +section +grade +name +ashraf +aboulnaga +email +ashraf +wisc +offic +offic +phone +offic +hour +section +grade +name +andrew +geeri +email +geeri +wisc +offic +offic +phone +offic +hour +section +grade +name +jame +herro +email +jherro +wisc +offic +offic +phone +offic +hour +section +grade +name +abhinav +gupta +email +agupta +wisc +offic +offic +phone +offic +hour +section +grade +name +jyothi +krothap +email +jyothi +wisc +offic +offic +phone +offic +hour +section +grade +name +chiang +email +suhui +wisc +offic +offic +phone +offic +hour +section +grade +name +thano +tsioli +email +tsioli +wisc +offic +offic +phone +offic +hour +section +gradesexplor +compani +whose +softwar +hardwar +borland +hewlett +packard +intel +microsoft +novel +us +link +explor +lyco +enorm +databas +site +yahoo +internet +resourc +classifi +categori +lookup +search +virtual +tourist +find +site +around +world +click +world +mother +larg +alphabet +list +site +cool +list +especi +excel +site +univers +wisconsin +madison +home +page +page +origin +creat +maintain +teitelbaum +thano +tsioli +modifi +maintain +kelli +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..0ba84476 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,228 @@ +us +comput +lectur +us +computersinstructor +info +instructor +salli +petersonoffic +comput +sciencephon +mail +salli +wisc +slpeter +facstaff +wisc +eduoffic +hour +tuesdai +thursdai +appointmentvit +class +info +time +lectur +lectur +place +lectur +held +engin +halllectur +text +inform +technolog +societi +laudon +traver +laudonlab +text +point +click +drag +us +macintosh +petersoncours +introduct +class +design +take +zero +knowledg +computersto +crack +shot +user +us +skill +throughcolleg +arena +section +taught +us +macintoshcomput +section +us +avail +csuse +comput +lectur +cours +compon +part +lectur +lectur +discuss +comput +gener +term +gener +comput +scienc +topic +discuss +comput +work +includ +follow +topic +necessarili +order +applic +program +includ +word +processor +spreadsheet +graphic +databas +hardwar +input +output +storag +devic +oper +system +program +languag +network +telecommun +artifici +intellig +expert +system +comput +relat +social +issu +part +laboratori +discuss +section +hand +experienceon +macintosh +iici +comput +follow +program +word +process +word +electron +mail +newsgroup +world +wide +eudora +netscap +paint +draw +aldu +superpaint +spreadsheet +chart +excel +databas +filemak +present +manag +hypercard +desktop +publish +aldu +pagemak +integr +part +learn +macintosh +oper +system +system +well +addit +special +tool +scanner +avail +teach +section +thegoal +provid +high +qualiti +instruct +rich +educationalexperi +namesectiontimedai +bodner +mwnick +leavi +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghes +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommend +background +background +necessari +cours +assign +quizz +exam +grade +base +exam +lectur +regular +assignmentsand +quizz +syllabu +glanc +syllabu +contain +nitti +gritti +class +detail +click +assign +assign +superpaintassign +excellast +modifi +octob +jonbodn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..0ba84476 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,228 @@ +us +comput +lectur +us +computersinstructor +info +instructor +salli +petersonoffic +comput +sciencephon +mail +salli +wisc +slpeter +facstaff +wisc +eduoffic +hour +tuesdai +thursdai +appointmentvit +class +info +time +lectur +lectur +place +lectur +held +engin +halllectur +text +inform +technolog +societi +laudon +traver +laudonlab +text +point +click +drag +us +macintosh +petersoncours +introduct +class +design +take +zero +knowledg +computersto +crack +shot +user +us +skill +throughcolleg +arena +section +taught +us +macintoshcomput +section +us +avail +csuse +comput +lectur +cours +compon +part +lectur +lectur +discuss +comput +gener +term +gener +comput +scienc +topic +discuss +comput +work +includ +follow +topic +necessarili +order +applic +program +includ +word +processor +spreadsheet +graphic +databas +hardwar +input +output +storag +devic +oper +system +program +languag +network +telecommun +artifici +intellig +expert +system +comput +relat +social +issu +part +laboratori +discuss +section +hand +experienceon +macintosh +iici +comput +follow +program +word +process +word +electron +mail +newsgroup +world +wide +eudora +netscap +paint +draw +aldu +superpaint +spreadsheet +chart +excel +databas +filemak +present +manag +hypercard +desktop +publish +aldu +pagemak +integr +part +learn +macintosh +oper +system +system +well +addit +special +tool +scanner +avail +teach +section +thegoal +provid +high +qualiti +instruct +rich +educationalexperi +namesectiontimedai +bodner +mwnick +leavi +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghes +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommend +background +background +necessari +cours +assign +quizz +exam +grade +base +exam +lectur +regular +assignmentsand +quizz +syllabu +glanc +syllabu +contain +nitti +gritti +class +detail +click +assign +assign +superpaintassign +excellast +modifi +octob +jonbodn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..f0471463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,81 @@ +home +pagecomput +scienc +algebra +languag +program +section +instructorsw +would +like +comment +suggest +complaint +feedback +provid +click +skrentni +coordin +offic +email +skrentni +csinform +section +frequent +ask +question +cours +overview +microcomput +laboratori +consult +fall +consult +schedul +tutor +mainli +polici +academ +misconduct +cours +offer +depart +softwar +section +introduct +microsoft +window +hint +window +compil +window +oper +system +email +netscap +creat +us +subdirectoriesc +inform +savitch +text +book +introduct +borland +languag +borland +integr +develop +environmentfortran +inform +jeff +lampert +home +page +section +last +updat +skrentni +coordin +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..3b8596ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,41 @@ +cours +infocours +inform +cscours +descriptionfrom +guidebook +undergradu +student +construct +algorithm +problem +solv +instruct +experi +least +procedur +orient +languag +pascal +fortran +survei +languag +advanc +program +techniqu +prereq +advanc +high +school +mathemat +prepar +colleg +work +mathemat +statist +logic +consent +instructor +open +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..f0471463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,81 @@ +home +pagecomput +scienc +algebra +languag +program +section +instructorsw +would +like +comment +suggest +complaint +feedback +provid +click +skrentni +coordin +offic +email +skrentni +csinform +section +frequent +ask +question +cours +overview +microcomput +laboratori +consult +fall +consult +schedul +tutor +mainli +polici +academ +misconduct +cours +offer +depart +softwar +section +introduct +microsoft +window +hint +window +compil +window +oper +system +email +netscap +creat +us +subdirectoriesc +inform +savitch +text +book +introduct +borland +languag +borland +integr +develop +environmentfortran +inform +jeff +lampert +home +page +section +last +updat +skrentni +coordin +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..b0fe92c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solv +us +comput +fall +comput +scienc +check +follow +page +inform +instructor +teach +assist +includ +offic +hour +inform +assign +includ +suggest +copi +assign +explan +grade +check +polici +assign +work +inform +examin +copi +past +exam +inform +lab +includ +copi +handout +document +includ +syllabu +mani +document +page +postscript +need +postscript +viewer +obtain +site +check +local +servic +section +depart +home +page +local +servic +page +ghost +directori +read +readm +file +direct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..b0fe92c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solv +us +comput +fall +comput +scienc +check +follow +page +inform +instructor +teach +assist +includ +offic +hour +inform +assign +includ +suggest +copi +assign +explan +grade +check +polici +assign +work +inform +examin +copi +past +exam +inform +lab +includ +copi +handout +document +includ +syllabu +mani +document +page +postscript +need +postscript +viewer +obtain +site +check +local +servic +section +depart +home +page +local +servic +page +ghost +directori +read +readm +file +direct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..c48a1560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,271 @@ +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +class +cancel +handout +assign +solut +exam +grade +simul +help +lectur +note +frequent +ask +question +instructor +section +jerri +tusch +offic +phone +hour +mail +jerri +wisc +tutsch +execpc +class +section +section +nolandsect +karen +miller +offic +phone +hour +mail +smoler +wisc +class +time +psycholog +sunlung +suen +offic +phone +hour +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +offic +phone +hour +mail +bsri +wisc +edumohammad +asgarian +offic +phone +hour +tuth +mail +wisc +class +cancel +karen +section +class +mondai +septemb +class +wednesdai +novemb +jerri +section +cancel +schedul +handout +revis +chapter +postscript +cours +overview +jerri +section +html +assign +assign +html +html +assign +program +homework +assign +karen +section +program +homework +assign +program +homework +assign +program +homework +solut +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +programs +programm +quiz +section +assign +html +program +examsal +quizz +open +book +note +calcul +karen +section +quiz +fridai +septemb +class +quiz +fridai +septemb +class +quiz +fridai +octob +class +quiz +fridai +octob +class +quiz +fridai +novemb +class +quiz +fridai +novemb +class +quiz +probabl +fridai +decemb +last +class +jerri +section +section +syllabu +html +section +syllabu +html +previou +exam +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answer +fall +final +exam +answer +grade +lookup +grade +simul +help +graphic +interfac +manual +lectur +noteskaren +miller +section +section +chapter +chapter +chapter +number +system +chapter +data +represent +chapter +integ +arithmet +chapter +float +point +arithmet +chapter +data +structur +chapter +regist +chapter +procedur +updat +wednesdai +chapter +assembl +updatedmondai +octob +chapter +chapter +except +process +chapter +featur +perform +chapter +architecur +case +studi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..73b31f2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,296 @@ +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +class +cancel +handout +assign +solut +exam +grade +simul +help +lectur +note +frequent +ask +question +instructor +section +jerri +tusch +offic +phone +hour +mail +jerri +wisc +tutsch +execpc +class +section +section +nolandsect +karen +miller +offic +phone +hour +mail +smoler +wisc +class +time +psycholog +sunlung +suen +offic +phone +hour +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +offic +phone +hour +mail +bsri +wisc +edumohammad +asgarian +offic +phone +hour +tuth +mail +wisc +class +cancel +karen +section +class +mondai +septemb +class +wednesdai +novemb +jerri +section +cancel +schedul +handout +revis +chapter +postscript +cours +overview +jerri +section +html +assign +assign +html +html +assign +program +homework +assign +karen +section +program +homework +assign +program +homework +assign +program +homework +assign +program +homework +solut +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +programs +programm +quiz +section +assign +html +program +quiz +section +assign +html +programa +programb +quiz +section +examsal +quizz +open +book +note +calcul +karen +section +quiz +fridai +septemb +class +quiz +fridai +septemb +class +quiz +fridai +octob +class +quiz +fridai +octob +class +quiz +fridai +novemb +class +quiz +fridai +novemb +class +quiz +probabl +fridai +decemb +last +class +option +final +thursdai +decemb +difficult +cumul +final +offer +desperateto +rais +grade +sign +advanc +decemb +jerri +section +section +syllabu +html +section +syllabu +html +previou +exam +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answer +fall +final +exam +answer +grade +lookup +grade +simul +help +graphic +interfac +manual +lectur +noteskaren +miller +section +section +chapter +chapter +chapter +number +system +chapter +data +represent +chapter +integ +arithmet +chapter +float +point +arithmet +chapter +data +structur +chapter +regist +chapter +procedur +updat +wednesdai +chapter +assembl +updatedmondai +octob +chapter +chapter +except +process +chapter +featur +perform +chapter +architecur +case +studi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..d47582db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,255 @@ +data +structur +lec +introduct +data +structureslectur +psychologylectur +psychologycours +inform +announc +read +assign +get +start +get +help +exam +program +assign +sampl +code +lectur +inform +cours +materi +comput +lab +home +announc +gener +announc +place +recent +announc +first +announc +problem +program +assign +found +locat +assign +page +binari +search +tree +sampl +code +onlin +sampl +page +last +makeup +exam +done +place +copi +solut +exam +reserv +kurt +wendt +librari +queue +sampl +code +onlin +sampl +page +stack +sampl +code +onlin +sampl +page +exam +topic +onlin +exam +page +list +sampl +code +onlin +sampl +page +handin +directori +creat +list +common +program +error +onlin +suggest +addit +welcom +either +vega +comput +lab +work +comput +inform +sampl +code +place +line +mondai +wednesdai +lectur +get +magic +number +error +sourc +file +must +abl +compil +otherwis +unusu +error +look +stale +page +forget +reload +page +page +updat +copi +browser +cach +becom +outdat +stale +attend +unix +tutori +need +attend +time +list +thur +thur +read +assign +futur +balanc +search +tree +chapter +page +futur +tabl +chapter +discuss +comparison +implement +lectur +lectur +tree +chapter +lectur +queue +chapter +skip +simul +lectur +overload +oper +chapter +page +lectur +hash +tabl +chapter +page +lectur +stack +chapter +lectur +link +list +chapter +lectur +pointer +dynam +memori +alloc +chapter +page +lectur +sort +search +analysi +chapter +page +lectur +sort +algorithm +chapter +page +lectur +basic +recurs +search +algorithm +chapter +lectur +basic +chapter +page +skip +focu +lectur +basic +appendix +page +lectur +skrentni +skrentni +wisc +offic +comput +scienc +offic +hour +teach +assist +baicheng +billi +liao +bail +wisc +offic +comput +scienc +offic +hour +cheng +jiacheng +wisc +offic +comput +scienc +offic +hour +pmcopyright +copi +jame +skrentni +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..4b0947a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,987 @@ +lectur +introduct +data +structuresfal +cours +email +address +wisc +cours +home +page +http +wisc +htmlinstructor +yanni +ioannidi +offic +comput +sciencesoffic +hour +tuesdai +thursdai +amoffic +phone +email +address +yanni +wisc +home +page +http +wisc +yanni +yanni +html +content +new +teach +assist +lectur +inform +languag +text +grade +exam +cours +schedul +assign +program +assign +late +polici +cheat +help +program +grade +style +extern +document +intern +document +us +unix +program +develop +cycl +newsassign +assign +readi +midterm +statisticssom +interest +exam +statist +section +median +mean +midterma +sampl +oldmidterm +avail +help +prepar +ownmidterm +assign +assign +readi +note +notat +binari +searchth +note +notat +binari +search +avail +want +print +either +open +filemenu +ghostview +window +show +document +andchoos +print +menu +item +women +comput +sciencesom +femal +faculti +graduat +student +undergradu +haveform +group +call +wic +women +comput +scienc +oneof +group +goal +encourag +women +becomecomput +scienc +major +women +thisclass +would +like +talk +someon +major +incomput +scienc +graduat +studi +comput +scienc +women +would +like +extra +help +withtheir +classwork +suzan +computersci +grad +student +offic +hour +email +tomak +appoint +suzan +mail +address +stodder +wisc +eduand +offic +hour +tuesdai +grow +tremend +field +theodd +ever +write +anoth +program +thiscours +end +abl +write +statementi +true +pascal +also +wide +avail +aniniti +startup +period +product +take +comput +scienc +cours +exceptionsy +requir +cours +textth +text +book +cours +isdata +abstract +problem +solv +wall +mirror +frank +carrano +isbn +well +written +text +cover +materi +cours +also +includ +separ +text +languag +notnecessari +lectur +often +alwai +follow +lectur +note +fall +david +dewitt +note +actual +consider +complet +simpl +lecturenot +still +short +true +text +book +isveri +littl +narr +text +exercis +recommend +addit +sourc +want +purchas +note +whichar +avail +doit +document +desk +near +dayton +street +entranceof +comput +scienc +build +dayton +first +experi +unix +needsom +inform +activ +account +log +creat +edit +manipul +file +compil +run +debug +program +handoutc +avail +doit +inform +desk +dewitt +notesar +avail +contain +inform +find +invalu +also +help +section +mention +lectur +often +follow +dewitt +note +althoughi +supplement +handout +courseof +semest +nonetheless +respons +materi +cover +lectur +exam +base +onth +lectur +materi +read +assign +note +andth +cours +assign +gradingther +even +exam +cours +semest +final +exam +five +program +assign +exam +determin +final +grade +approxim +equal +weight +programmingassign +count +exam +exam +tuesdai +octob +chemistri +exam +final +exam +wednesdai +decemb +place +cours +schedul +follow +list +topic +cover +thiscours +detail +schedul +provid +later +semest +topic +dewitt +note +wall +mirror +introduct +administr +gener +familiar +basic +stuff +lectur +function +lectur +apoint +lectur +record +equival +madison +prerequisitecours +assign +must +done +design +machin +thesear +machin +room +first +floor +thec +build +encourag +machin +prefer +home +comput +certainrestrict +must +compil +home +machin +must +univers +account +often +read +emailand +copi +data +file +final +requir +thatyou +turn +program +electron +email +youwork +home +must +make +provis +download +program +toyour +univers +account +make +sure +compil +runwith +compil +sparcstat +often +electron +mail +notifi +student +chang +inassign +hint +program +assum +read +allelectron +mail +send +late +policyno +late +assign +accept +assign +must +turn +exactli +order +avoid +late +caus +machin +load +coincid +duedat +sever +class +simpli +sure +start +right +awai +oneach +assign +thing +certain +wrong +wait +thelast +minut +start +except +must +approv +need +good +excus +troubl +soon +possibl +cheatingth +comput +scienc +depart +take +hard +linest +cheat +welcom +tocommun +design +algorithm +datastructur +butther +share +code +also +expect +learn +understand +obei +thecomput +system +policiesgovern +comput +account +helpif +problem +cours +work +program +pleas +know +earli +semest +possibl +offic +hour +policiesif +need +help +debug +program +best +help +tovisit +thec +offic +hour +take +along +currenthard +copi +program +offic +hour +intend +time +explain +conceptsthat +present +class +still +confus +answer +specif +question +cours +materi +encourag +email +reliabl +contact +problem +read +respond +emailsever +time +daili +almost +everi +week +program +gradingprogram +grade +follow +criteria +correct +program +behav +correctli +normal +typicalinput +program +behav +state +projectspecif +clariti +program +easi +read +understand +note +style +informationabout +clariti +robust +correct +behavior +extrem +unusu +situat +program +handl +situat +reason +andlog +manner +simpli +blow +qualiti +test +data +test +data +program +shoulddemonstr +facet +program +capabl +includingunusu +case +effici +avoid +unnecessarili +ineffici +algorithm +construct +howev +effici +never +pursu +expens +clariti +modular +program +modular +make +effect +useof +paramet +complet +incorpor +inform +program +need +sort +extra +paper +document +gener +program +gener +possibl +subject +considerationof +effici +clariti +avoid +arbitrari +limit +bound +size +orcomplex +input +whenev +possibl +limit +necessari +express +definedconst +near +program +easili +chang +numer +liter +appear +program +thosevalu +like +chang +styleus +meaning +identifi +name +consist +name +scheme +identifi +name +suggest +convent +follow +variable_nam +function_nam +argument +const +defined_const +enum +enumtyp +valu +valu +class +classnam +multipl +statement +singl +line +skip +line +function +group +code +clear +consist +indent +style +dewitt +notesfor +suggest +style +indent +continu +statement +loop +line +label +meaningfulli +done +extern +documentationthi +includ +long +comment +begin +yourprogram +address +typic +user +someonewho +want +know +superfici +program +work +includ +full +name +student +begin +comment +give +gener +descript +program +tell +program +call +format +data +give +limit +bug +special +featur +assumpt +made +describ +neg +well +posit +aspect +program +includ +neg +assum +unawar +inform +includ +assign +problem +descriptionne +repeat +briefli +summar +first +point +statement +refer +user +assign +document +thensuffici +note +appli +problem +descript +intern +documentationther +four +main +type +intern +document +header +comment +header +function +class +major +data +structuresshould +describ +purpos +assumpt +paramet +main +outlin +algorithm +declar +comment +next +declar +variabl +data +membershould +provid +extra +inform +convei +identifi +sname +name +variabl +tell +much +possibl +withoutmak +long +addit +inform +suppli +comment +exampl +index +last +element +ad +stackyou +comment +explain +paramet +well +local +variabl +within +segment +code +tricki +opaqu +section +code +beavoid +sometim +necessari +case +commentcan +help +reader +understand +go +segment +code +comment +clarifi +level +outlineof +algorithm +us +unix +vimani +peopl +work +unix +thefirst +time +find +take +time +becomecomfort +particularli +true +youronli +previou +program +experi +pascal +us +macpasc +macintosh +strongli +urg +inth +time +earli +semest +becom +comfort +withunix +time +pain +time +wellspent +also +wish +attend +unix +tutori +held +room +comp +session +thefollow +dai +tbayou +want +pick +copi +program +develop +cycl +program +develop +cycl +unix +environ +edit +program +program +compil +program +wall +program +compil +error +continu +program +inputfil +outputfil +look +output +outputfil +outputfil +error +break +tire +continu +print +list +take +home +program +inputfil +outputfil +goto +home +debug +program +quit +done +turn +result +submiss +instruct +given +later +yanni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..30707856 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,519 @@ +introduct +data +structur +http +wisc +html +revis +fall +jame +larusinstructor +jame +laruslaru +wisc +comput +scienc +http +wisc +laru +laru +html +offic +hour +tuesdai +fridai +amcontentsteach +assistantstextlectur +informationelectron +mailth +languagegradingexamscours +scheduleassign +assign +assign +assign +program +assignmentscours +objectivesc +object +present +concept +data +structur +gener +wide +us +structur +detail +data +structur +fundament +build +block +comput +program +cours +abl +identifi +situat +data +structur +necessari +determin +requir +data +structur +select +appropri +data +structur +cover +cours +reiter +concept +structur +program +abstract +data +type +modular +principl +introduc +essenti +write +clear +correct +maintain +softwar +close +connect +abstract +data +type +data +structur +cours +place +strong +emphasi +appli +principl +program +exercis +teach +assistantswei +zhang +chin +tang +chin +teach +assist +forthi +cours +section +grade +homework +assignmentsand +happi +answer +question +theassign +aspect +cours +give +troubl +zhangoffic +compuer +sciencesoffic +hour +wednesdai +thursdai +sundai +offic +phone +email +address +weiz +wisc +chin +tang +chin +offic +comput +sciencesoffic +hour +mondai +tuesdai +fridai +amoffic +phone +email +address +cchin +wisc +home +page +http +wisc +cchin +cchin +html +textth +text +book +cours +data +abstract +problem +solv +wall +mirror +frank +carrano +isbn +well +written +littl +long +wind +text +cover +materi +cours +also +includ +background +separ +text +languag +necessari +lectur +often +alwai +follow +david +dewitt +lectur +note +fall +note +complet +simpl +lectur +note +fall +short +true +text +book +contain +littl +narr +text +exercis +us +note +basi +lectur +feel +free +skip +portion +cover +addit +materi +want +purchas +note +avail +doit +document +desk +dayton +street +entranc +comput +scienc +build +dayton +cours +first +experi +unix +need +inform +activ +account +log +creat +edit +manipul +file +compil +run +debug +program +handout +also +avail +doit +inform +desk +contain +crucial +inform +also +also +help +section +lectur +inform +tuesdai +thursdai +psycholog +mention +lectur +often +follow +dewitt +note +lectur +attend +strongli +recommend +regularli +present +materi +appear +textbook +lectur +note +us +program +assign +exam +needless +respons +materi +cover +lectur +exam +base +lectur +materi +read +assign +note +cours +assign +electron +maili +often +electron +mail +notifi +student +chang +assign +hint +program +assum +regularli +read +electron +mail +gradingther +even +exam +semest +final +exam +five +program +assign +exam +determin +final +grade +approxim +equal +weight +program +assign +count +languag +taught +us +program +languag +program +assign +must +written +know +section +skrentni +teach +section +cover +addit +data +structur +larg +complex +languag +unless +experi +program +even +difficult +languag +learn +book +also +anoth +page +inform +program +assign +gdbthere +also +page +describ +program +debugg +exam +exam +tuesdai +chemistri +exam +final +exam +wednesdai +decemb +place +cours +schedul +follow +rough +outlin +topic +cover +cours +detail +schedul +provid +later +topic +dewitt +note +introduct +administrationbas +stuff +lectur +function +lectur +pointer +lectur +record +dynam +storagelectur +list +lectur +binari +search +notat +advanc +listslectur +stackslectur +queueslectur +hashinglectur +even +exam +lectur +recursionlectur +treesbinari +tree +sort +searchlectur +treesgraphslectur +even +exam +sortinglectur +tbaassign +absolut +requir +grade +turn +index +card +follow +inform +name +login +nameyear +school +freshman +sophomor +previou +coursesprevi +program +experiencerec +photograph +pictur +birthdai +girl +scout +trip +summer +color +black +white +size +grade +given +without +photo +assign +first +program +assign +write +simpl +abstract +data +byte +fora +bound +integ +sequenc +text +assign +line +assign +second +program +assign +write +program +maintain +databaseof +score +tenni +tournament +text +assign +line +assign +second +program +assign +write +program +produc +aconcord +us +hash +tabl +text +assign +line diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..e397724a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,234 @@ +data +structur +lec +introduct +data +structureslectur +psychologylectur +psychologycours +inform +announc +read +assign +get +start +get +help +exam +program +assign +sampl +code +lectur +inform +cours +materi +comput +lab +home +announc +gener +announc +place +recent +announc +first +announc +problem +program +assign +found +locat +assign +page +last +makeup +exam +done +place +copi +solut +exam +reserv +kurt +wendt +librari +queue +sampl +code +onlin +sampl +page +stack +sampl +code +onlin +sampl +page +exam +topic +onlin +exam +page +list +sampl +code +onlin +sampl +page +handin +directori +creat +list +common +program +error +onlin +suggest +addit +welcom +either +vega +comput +lab +work +comput +inform +sampl +code +place +line +mondai +wednesdai +lectur +get +magic +number +error +sourc +file +must +abl +compil +otherwis +unusu +error +look +stale +page +forget +reload +page +page +updat +copi +browser +cach +becom +outdat +stale +attend +unix +tutori +need +attend +time +list +thur +thur +read +assign +futur +tree +chapter +lectur +queue +chapter +skip +simul +lectur +overload +oper +chapter +page +lectur +hash +tabl +chapter +page +lectur +stack +chapter +lectur +link +list +chapter +lectur +pointer +dynam +memori +alloc +chapter +page +lectur +sort +search +analysi +chapter +page +lectur +sort +algorithm +chapter +page +lectur +basic +recurs +search +algorithm +chapter +lectur +basic +chapter +page +skip +focu +lectur +basic +appendix +page +lectur +skrentni +skrentni +wisc +offic +comput +scienc +offic +hour +teach +assist +baicheng +billi +liao +bail +wisc +offic +comput +scienc +offic +hour +cheng +jiacheng +wisc +offic +comput +scienc +offic +hour +pmcopyright +copi +jame +skrentni +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..073bcaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,82 @@ +introduct +theoret +comput +scienc +introduct +theoret +comput +scienc +fall +room +lectur +brian +cole +email +wisc +offic +offic +hour +mondai +fridai +teach +assist +david +sundaram +stukel +email +sundaram +wisc +offic +offic +hour +tuesdai +wednesdai +thursdai +text +introduct +languag +theori +comput +john +martin +north +dakota +state +univers +mcgraw +hill +isbn +tent +lectur +schedul +includ +exam +inform +lectur +clarif +assign +page +grade +polici +written +assign +term +examin +final +examin +archiv +mail +list +home +page +septemb +brian +cole +madison +comput +scienc +home +page +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..4dd446fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,158 @@ +also +math +stat +fall +also +math +stat +linear +programmingfal +schedul +lectur +mechan +engin +open +book +midterm +exam +time +date +thursdai +octob +locat +mechan +engin +open +book +final +exam +time +date +wednesdai +decemb +locat +instructor +olvi +mangasarian +offic +comp +stat +pphone +mail +olvi +wisc +offic +hour +wednesdai +fall +semest +teach +assist +offic +comp +stat +telephon +mail +wisc +offic +hour +textbook +linear +program +matlab +ferri +mangasarian +preliminari +version +doit +madison +syllabu +cours +overview +cours +inform +cours +inform +book +reserv +kurt +wendt +librari +matlab +setup +homework +septemb +homework +septemb +homework +septemb +homework +septemb +homework +octob +homework +octob +homework +octob +homework +octob +homework +novemb +homework +novemb +homework +decemb +homework +decemb +program +project +novemb +sampl +midterm +exam +march +solut +sampl +midterm +exam +march +midterm +exam +march +solut +midterm +exam +march +midterm +exam +octob +solut +midterm +exam +octob +sampl +final +exam +final +exam +solut +final +exam +mathemat +program +home +page +cours +relev +site +searchabl +bibliograph +databas +item +link +variou +site +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..0efc0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,87 @@ +fall +introduct +program +languag +compilersspr +stori +month +octob +schedul +lectur +tuth +comp +stat +recit +psycholog +instructor +susan +horwitz +offic +telephon +mail +horwitz +wisc +offic +hour +tuesdai +fridai +appoint +teach +assist +rahul +kapoor +offic +telephon +mail +rahul +wisc +offic +hour +mondai +wednesdai +appoint +text +reserv +wendt +librari +compil +principl +techniqu +tool +sethi +ullman +craft +compil +fischer +leblanc +check +regularli +gener +cours +inform +cours +overview +date +inform +assign +exam +grade +includ +late +polici +get +start +read +program +assign +homework +examin +lectur +note +us +program +tool +grade +email +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..455428a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,1005 @@ +introduct +oper +system +fall +introduct +oper +systemssect +fall +instructormarvin +solomon +offic +comput +sciencesoffic +hour +troffic +phone +email +address +solomon +wisc +tarob +mellencamp +offic +comput +sciencesoffic +hour +mwfoffic +phone +email +address +mellen +wisc +new +watch +space +latest +updat +answer +midterm +exam +summari +score +avail +detail +breakdown +grade +distributioni +also +avail +specif +forproject +avaiabl +date +project +move +thursdai +typograph +error +note +deadlock +avoid +correct +importantli +arraywa +call +place +other +call +place +popular +demand +midterm +exam +isavail +look +warn +take +exampl +larg +grain +salt +exam +long +time +courseus +differ +text +cover +topic +differ +order +semest +midterm +likelyb +quit +differ +time +place +midterm +exam +determin +room +comp +octob +specif +forproject +avaiabl +discuss +issu +presentedin +class +avail +summari +grade +project +avail +electron +hand +direct +forprogram +post +procedur +givefork +algorithm +theproject +specificationshould +contain +call +notifi +page +correct +show +sept +fix +bug +theproject +specif +minor +import +first +caus +introduct +paragraph +slightli +garbl +thank +jake +dawlei +carr +point +second +line +omit +sampl +code +algorithm +theprogram +detailssect +creat +threadschedul +start +threadschedul +sched +threadschedul +sched +start +specifi +correctli +later +section +threadschedul +detail +section +page +fix +thank +lipe +zhang +sept +test +data +file +project +avail +directori +public +srccontain +three +data +file +java +class +read +file +public +graph +javacontain +definit +classgraphdescrib +project +specif +file +public +petersoncycl +graphcontain +peterson +graph +shown +project +specif +mention +initi +placement +fork +notacycl +file +public +petersonacycl +graphcontain +peterson +graph +acycl +initi +placement +fork +file +public +star +graphcontain +star +topolog +central +philosoph +sharingfork +nine +other +sept +todd +jenner +point +typo +specif +project +fork +number +read +fork +number +maxthink +replac +maxeat +onlin +versionha +correct +thank +todd +sept +mistak +thejava +tutori +note +section +string +argument +version +string +substr +second +argumenti +offset +substr +number +charactersin +string +note +correct +thank +franco +tung +chan +point +sept +occasion +send +urgent +messag +directli +mail +listof +student +regist +cours +archiv +messag +sent +list +receiv +messag +think +sendmail +solomon +wisc +specif +forproject +avaiabl +receiv +request +makefil +java +sampl +makefil +public +makefil +copi +file +work +directori +java +sourc +file +rememb +separ +directori +project +edit +describ +comment +type +maketo +compil +program +make +class +compilewithout +run +sept +note +hand +assign +simul +preemptiv +multitask +solari +computershav +ad +sept +java +tutoriali +finish +finish +go +section +onthread +find +help +hint +structur +project +awar +weekli +seminar +oper +systemsand +network +meet +mondai +first +seminar +semest +mondai +checkth +colloquia +seminar +page +detail +sept +java +book +final +avail +theunivers +bookstor +sept +begin +ajava +tutori +avail +sept +java +depart +unix +workstat +must +creat +afil +name +cshrc +local +home +directori +contain +onelin +path +path +java +make +chang +take +effect +either +type +sourc +cshrc +localor +simpli +back +sept +specif +project +readi +sept +unix +orient +session +unix +user +schedul +forth +follow +time +tue +thur +sept +room +csmon +thur +sept +room +cslast +updat +content +new +summari +lectur +inform +text +project +grade +cours +schedul +lectur +note +summari +intend +gener +introduct +techniqu +usedto +implement +oper +system +relat +kind +system +softwar +among +topic +cover +beprocess +manag +creation +synchron +commun +processor +schedul +deadlock +prevent +avoid +recoveri +main +memori +manag +virtual +memori +manag +swap +page +segment +page +replacementalgorithm +control +disk +input +output +devic +file +system +structur +implement +protect +secur +lectur +inform +lectur +tuesdai +thursdai +comput +sciencesand +statisticsdiscuss +wednesdai +psychologyth +discuss +section +option +least +import +lectur +primari +focu +wednesdai +meet +topic +relat +theproject +includ +introduct +thejavaprogram +languag +time +also +avail +answer +anyquest +regard +point +rais +lectur +thetext +text +requir +modern +oper +systemsbi +andrew +tanenbaum +prentic +hall +strongli +recommend +java +program +languagebi +arnold +jame +gosl +addison +weslei +onlin +refer +lot +addit +help +materi +java +avail +follow +refer +collect +local +fast +access +java +tutorialth +java +languag +specificationjava +documentationwatch +spot +addit +link +project +five +program +project +thejavaprogram +languag +sparcstat +workstat +run +solari +dialect +unixoper +system +provid +anycomput +access +implement +java +programminglanguag +howev +comput +scienc +depart +comput +respons +transfer +requireddata +set +softwar +packag +comput +first +assign +easi +acquaint +exercis +designedto +help +becom +familiar +comput +environ +thejava +languag +subsequ +project +involveprocess +synchron +processor +schedul +disk +schedul +file +system +implement +first +project +student +requir +work +pair +member +pair +receiv +grade +project +feel +free +discuss +project +anyon +butyou +must +share +code +anyon +partner +cheat +vigor +punish +enough +said +assign +begin +class +dateind +entir +semest +havethre +late +daysof +credit +late +dai +differ +assign +eachof +three +assign +three +dai +assign +late +dai +us +last +assign +java +student +take +cours +familiar +java +choos +java +sever +argument +favor +java +congeni +program +environ +runtim +error +subscript +null +pointer +uniniti +variabl +caus +except +caught +languag +runtimerath +mysteri +crash +random +behavior +java +string +much +easier +char +arrai +garbag +collect +storag +manag +extrem +handi +java +trendi +java +caught +faster +languag +histori +mani +reason +java +grow +popular +littl +withth +cours +discuss +issu +class +byproduct +coursewil +knowledg +java +becom +quit +market +commod +java +oper +system +featur +built +particular +first +wide +us +program +languag +withlanguag +level +support +concurr +thread +synchron +monitor +hand +switch +program +languag +alwaysa +disloc +fortun +excel +resourc +avail +eas +thetransit +java +program +languagebi +arnold +gosl +amazingli +good +neither +introductori +program +primer +author +assum +youalreadi +know +program +refer +manual +although +arefer +manuali +avail +onlin +readabl +introduct +languag +take +wayfrom +get +start +everyth +need +write +quit +sophisticatedprogram +java +book +avail +univers +bookstor +strongli +encourag +also +gather +varieti +ofoth +resourc +togeth +includ +niceonlin +tutorialabout +java +program +anda +refer +manualfor +standard +class +librari +us +grade +midterm +final +exam +count +grade +midterm +even +wednesdai +octob +room +comput +scienc +statist +final +schedul +timet +tuesdai +decemb +first +program +project +get +start +count +yourgrad +remain +four +project +count +cours +schedul +follow +schedul +tent +updat +later +semest +check +back +frequent +sept +introduct +chapter +sept +process +synchron +processor +schedul +chapter +sept +project +learn +javaoct +project +synchronizationoct +memori +manag +virtual +memori +chapter +project +schedulingoct +midterm +exam +room +comp +devic +file +system +chapter +project +disk +schedulingdec +protect +secur +section +project +file +systemsdec +final +exam +lectur +note +introduct +histori +bottom +view +view +cours +outlin +java +programm +process +synchron +us +process +process +process +creat +process +process +state +synchron +race +condit +semaphor +bound +buffer +problem +dine +philosoph +monitor +messag +deadlock +terminolog +deadlock +detect +deadlock +recoveri +deadlock +prevent +deadlock +avoid +implement +process +implement +monitor +implement +semaphor +implement +critic +section +short +term +schedul +memori +manag +alloc +main +memori +algorithm +memori +manag +compact +garbag +collect +swap +page +disk +come +solomon +wisc +eduthu +copyright +marvin +solomon +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..84a8914b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,107 @@ +fall +home +pagec +introduct +oper +systemsfal +tuesdai +thursdai +discuss +fridai +host +maryvernon +instructor +andkarunamuthiah +welcom +home +page +note +thursdai +lectur +fridai +discuss +beinterchang +follow +date +solut +quiz +assign +offic +hour +email +textbook +read +grade +project +quizz +mail +archiveapproxim +schedul +topicsweek +oftopicsreadingsep +introduct +concurr +thread +address +space +processeschapt +thread +manag +cooper +threadschapt +synchron +implement +mutual +exclusioncont +semaphorescont +monitor +concurr +summarycont +doct +deadlock +process +schedulingchapt +memori +manag +protect +address +translat +cach +tlbschapter +demand +page +virtual +memorycont +review +survei +systemschapt +file +system +name +directorieschapt +protect +java +object +core +methodstbanov +java +thread +secur +thanksgiv +class +network +distribut +system +remot +procedur +call +chapter +distribut +file +system +global +memori +system +reviewchapt +vernon +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..977eeee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,128 @@ +lectur +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +handout +assign +exam +grade +instructor +david +wood +offic +hour +tuesdai +wednesdai +appoint +mail +david +wisc +phone +class +time +tuesdai +thursdai +locat +phil +atkinson +offic +hour +tuesdai +thursdai +appoint +mail +atkinson +wisc +phone +get +start +help +inform +handout +cours +descript +get +start +mentor +error +check +correct +sampl +vhdl +code +compil +simul +vhdl +mentorassign +assign +answer +question +assign +assign +select +answer +assign +assign +select +answer +assign +assign +select +answer +assign +assign +assign +projectthi +section +includ +inform +cours +project +project +descript +project +deadlin +project +report +project +demonstr +time +decemb +examsth +midterm +exam +wednesdai +room +final +exam +tuesdai +room +exam +previou +spring +midterm +fall +midterm +spring +midterm +spring +midterm +spring +midterm +solut +fall +midterm +solut +spring +midterm +solut +spring +endterm diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..7bfa728c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,144 @@ +databas +manag +system +design +implementationc +databas +manag +system +design +implementationcours +inform +postscript +version +class +fridai +instead +offic +hour +time +assign +chang +fridai +assign +text +last +updat +assign +handout +postscript +class +mail +list +solut +chapter +exercis +pleas +dont +print +solut +chapter +exercis +postscript +first +inform +overview +prerequisit +offic +hour +topic +cover +grade +import +date +import +polici +issu +minibas +home +page +check +detail +assign +assign +assign +handout +postscript +assign +html +last +updat +assign +handout +postscript +assign +html +last +updat +assign +handout +postscript +last +year +midterm +sampl +postscript +last +year +midterm +postscript +us +sybas +info +sybas +info +help +yahoo +entri +resourc +tutori +info +tree +debugg +info +tree +languag +construct +assign +handout +grade +experi +assignmentoth +handout +code +convent +instructor +raghu +ramakrishnan +offic +phone +mail +raghu +offic +hour +lectur +discuss +lectur +time +place +ingraham +teach +assist +xuemei +offic +phone +mail +xbao +offic +hour +tue +thur +last +modifi +sept +xbao diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..a8707f59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,67 @@ +lectur +home +page +welcom +home +page +page +obvious +construct +semest +progress +addinginform +need +know +page +import +thing +know +class +meetingroom +chang +current +meet +russel +labsfor +lectur +option +discuss +fridai +beenmov +still +psycholog +instructor +jeff +naughton +offic +wednesdai +lectur +discuss +lectur +time +place +russel +lab +discuss +option +time +place +psycholog +inform +lectur +taught +close +cooper +lectur +fact +assign +probabl +exam +inform +gener +minibas +assign +particular +pleas +lectur +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..147f4c5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,58 @@ +introduct +algorithm +introduct +algorithm +cours +inform +instructor +eric +bach +offic +phone +mail +bach +wisc +hour +appt +teach +assist +bill +donaldson +offic +phone +mail +wisc +hour +teach +assist +raji +gopalakrishnan +offic +phone +mail +raji +wisc +hour +midterm +exam +cours +handout +cours +descript +syllabu +book +reserv +cours +organ +homework +homework +homework +solut +homework +homework +graph +fractal +behaviour +homework +mail +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..2d9fd97a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,190 @@ +comput +network +cours +professor +landweb +comput +network +cours +introduct +comput +network +advanc +comput +networksintroduct +comput +network +tabl +content +intern +connect +network +cours +offer +cours +inform +instructor +teach +assist +cours +syllabu +mail +archiv +assign +program +refer +select +readingsclick +hereto +latest +text +version +networkingcours +madisoncours +informationlecturetim +mwfplace +comp +statclass +email +listinstructor +lawrenc +landweberoffic +comp +statphon +email +wisc +eduoffic +hour +teach +assist +srinivasa +narayananoffic +phone +email +wisc +eduoffic +hour +mondai +wednesdai +time +conveni +feel +free +email +wisc +appoint +teach +assist +teitelbaumoffic +phone +naemail +wisc +eduoffic +hour +tuesdai +thursdai +time +conveni +feel +free +email +wisc +appoint +fall +cours +syllabu +fall +mail +archiv +moder +mail +archiv +complet +assign +fall +program +assign +error +warn +code +class +project +implement +network +layer +reliabl +adapt +layer +handout +postscript +project +overview +slide +postscript +powerpoint +softwar +engin +slide +postscript +powerpoint +design +document +evalu +form +postscript +html +version +pictur +project +slide +document +grade +criteria +gradingmidterm +exam +final +exam +assign +term +project +prior +midterm +fall +midterm +fall +midterm +option +refer +book +project +unix +network +program +steven +richard +prentic +hall +isbn +program +refer +socket +interfac +socket +interfac +lectur +garbler +packag +annot +bibliographyread +partial +icmp +ospf +ipng +advanc +comput +network +lectur +schedul +spring +review +form diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..a2d0a918 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,30 @@ +home +page +fall +fall +instructor +robert +meyer +wisc +time +place +comp +offic +hour +cours +descript +homework +homework +solut +note +homework +homework +solut +note +comput +project +part +comput +project +part +option diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..f4823c68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +program +theori +applicationsfal +schedul +lectur +cours +mail +list +wisc +cours +http +wisc +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +tuesdai +wednesdai +teach +assist +offic +telephon +mail +wisc +offic +hour +wednesdai +thursdai +class +text +nonlinear +program +olvi +mangasarian +siam +publish +philadelphia +us +text +nonlinear +program +theori +algorithm +bazaraa +sherali +shetti +second +edit +wilei +york +nonlinear +program +bertseka +athena +scientif +gener +cours +inform +cours +overview +introduct +linear +inequ +theorem +altern +convex +set +convex +concav +function +saddlepoint +optim +criteria +without +differenti +differenti +convex +concav +function +first +order +optim +criteria +differenti +second +order +optim +criteria +differenti +dualiti +nonlinear +program +gener +convex +function +optim +condit +exact +penalti +augment +lagrangian +gradient +project +book +reserv +kurt +wendt +librari +assign +grade +homework +assign +grade +assign +week +midterm +examin +novemb +grade +final +examin +grade +homework +assign +homework +homework +homework +homework +mathemat +program +home +page +cours +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..f4823c68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +program +theori +applicationsfal +schedul +lectur +cours +mail +list +wisc +cours +http +wisc +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +tuesdai +wednesdai +teach +assist +offic +telephon +mail +wisc +offic +hour +wednesdai +thursdai +class +text +nonlinear +program +olvi +mangasarian +siam +publish +philadelphia +us +text +nonlinear +program +theori +algorithm +bazaraa +sherali +shetti +second +edit +wilei +york +nonlinear +program +bertseka +athena +scientif +gener +cours +inform +cours +overview +introduct +linear +inequ +theorem +altern +convex +set +convex +concav +function +saddlepoint +optim +criteria +without +differenti +differenti +convex +concav +function +first +order +optim +criteria +differenti +second +order +optim +criteria +differenti +dualiti +nonlinear +program +gener +convex +function +optim +condit +exact +penalti +augment +lagrangian +gradient +project +book +reserv +kurt +wendt +librari +assign +grade +homework +assign +grade +assign +week +midterm +examin +novemb +grade +final +examin +grade +homework +assign +homework +homework +homework +homework +mathemat +program +home +page +cours +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..932e3407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,86 @@ +home +page +comput +system +perform +evalu +model +new +sept +assign +postscript +text +sept +mimic +librari +avail +public +mimic +cours +inform +lectur +comput +scienc +devis +softwar +home +page +html +user +manual +postscript +pleas +print +file +contain +mani +imag +take +least +half +hour +initi +instruct +text +mimic +softwar +tutori +html +postscript +onlin +help +html +qnet +exampl +devc +html +professor +miron +livni +offic +comput +scienc +hour +phone +mail +miron +wisc +teach +assist +chee +yong +chan +offic +comput +scienc +hour +phone +mail +cychan +wisc +suggest +comment +pleas +send +cychan +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..65ee6d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,91 @@ +find +uwisc +find +inform +retriev +technolog +seek +knowledgerichard +belewvisit +professorc +lectur +univ +wisconsin +comput +scienc +departmentfal +thur +acal +room +engrthi +cours +design +student +interest +understand +aboutth +inform +retriev +knowledg +represent +machinelearn +techniqu +underli +much +excit +activ +occur +onth +world +wide +complet +descript +coures +canse +abstract +asyllabu +major +topic +consid +graphic +mapof +thesear +relat +anda +tent +schedul +semesterwil +proce +cours +resourc +read +overview +part +postscript +overview +part +postscript +polit +infidel +imag +postscript +assign +class +email +digest +hypermai +suggest +compos +email +classrel +resourc +class +minut +taken +student +student +last +modifi +belew +wisc +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..61f70813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,195 @@ +introduct +numer +method +last +chang +introduct +numer +methodsthi +page +contain +inform +fall +cours +smile +tent +syllabu +order +score +orderli +mind +addit +materi +chang +note +cooper +assignmentson +assign +page +order +total +score +midterm +chang +assign +note +chang +date +well +slightli +chang +point +problem +diari +class +addit +materi +residu +error +condit +chang +rick +carl +offic +hour +list +errata +text +chang +diari +class +diari +class +chang +us +email +concern +problem +updat +sinc +question +comput +complex +numericalanalysi +algorithm +post +preprint +foremostmathematician +todai +subject +interest +trickytop +least +squar +solut +approxim +time +place +also +textmai +supplement +byaddit +materi +file +mention +book +areavail +organ +chapter +awar +though +site +mention +book +well +name +begin +capitallett +sometim +name +book +begin +lower +caselett +matlab +diari +class +session +present +plan +comput +assign +matlab +rather +fortran +kermit +sigmon +matlab +primer +edit +avail +doit +look +handout +student +reaction +student +edit +matlab +access +matlab +telnet +access +matlab +telnet +winor +machin +cours +overviewcours +syllabu +tent +assign +also +answer +list +word +grade +look +last +four +digit +student +email +concern +current +question +conduct +unix +orient +session +user +andp +relat +linksyou +might +wish +explor +csdepart +home +page +comput +system +frequent +ask +question +list +simpl +tutori +avail +well +advanc +referenceviva +also +good +introduct +unix diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..3883b051 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,61 @@ +numer +function +analysi +last +chang +numer +function +analysisthi +page +contain +inform +fall +version +math +current +class +note +avail +follow +directori +well +hard +copi +doit +class +recent +announc +post +grade +time +locat +statlectur +carl +boor +email +deboor +wisc +offic +hour +stat +line +classnot +viii +index +assign +none +email +concern +homework +cours +relat +question +relat +linksyou +might +wish +explor +depart +home +page +courseoff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..89c18b9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,15 @@ +approxim +theori +last +chang +approxim +theorythi +page +contain +inform +spring +version +math +cours +note +cours diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..56a70d12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,93 @@ +home +page +toni +silva +sectioncsm +instructor +toni +silva +contact +email +dsilva +wisc +offic +comput +scienc +statist +phone +offic +hour +appoint +textbookproblem +solv +object +program +walter +savitch +section +inform +sept +comp +stat +firstdai +noland +specifi +timet +sept +chamberlin +comput +comput +scienc +statist +cours +inform +handout +tent +syllabu +semest +late +polici +grade +criteria +academ +misconduct +handout +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +text +assign +program +tuesdai +program +tuesdai +program +tuesdai +program +thursdai +program +thursdai +solut +quizz +solut +quiz +solut +quiz +last +modifi +anthoni +silva +dsilva +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..eead663b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,853 @@ +comput +visionc +comput +visionfal +instructor +chuck +dyeroffic +csstelephon +email +dyer +wisc +eduoffic +hour +mondai +thursdai +appointmentteach +assist +bryan +sooffic +csstelephon +email +wisc +eduoffic +hour +wednesdai +fridai +appointmentstud +gener +cours +informationfundament +comput +vision +first +introduct +level +imag +analysi +method +includ +imag +format +edg +detect +featuredetect +segment +principl +defin +modul +forreconstruct +three +dimension +scene +inform +usingtechniqu +asshap +shade +depth +stereo +activ +method +scene +recoveri +depth +focu +andocclud +contour +detect +viewpoint +control +motion +detect +analysi +includ +track +model +base +three +dimension +object +recognit +schedul +tuesdai +thursdai +prerequisit +fundament +calculu +probabl +theori +linear +algebra +grade +midterm +exam +thursdai +novemb +homework +assign +project +class +particip +syllabu +requir +read +select +part +machin +vision +jain +kasturi +schunck +mcgraw +hill +york +collect +readingsfrom +journal +confer +proceed +sold +doit +document +small +batchessupplementari +read +sourcesonlin +informationmost +cours +inform +avail +onlin +urlhttp +wisc +dyer +html +read +assign +date +chapter +paper +doit +chapter +chapter +chapter +paper +doit +avail +handout +chapter +except +chapter +paper +doit +avail +handout +paper +doit +avail +handout +chapter +chapter +primarili +student +score +grade +homework +assignmentshomework +imag +enhanc +histogram +modif +option +make +copi +portrait +imag +public +imag +contrast +enhanc +face +byfirst +rotat +imag +crop +window +around +head +shoulder +final +interact +adjust +theintens +modif +function +color +editor +window +thewindow +button +also +free +modifi +thing +colorif +wish +found +good +grayscal +transformationsav +result +color +imag +andput +directori +whereth +origin +imag +send +email +tell +qualit +whatintens +transform +appli +improv +qualityof +imag +overal +imag +photo +board +student +class +feel +free +imag +ownweb +home +page +well +homework +skeleton +octob +learn +get +start +vista +vision +softwar +read +introduct +vista +program +manual +avail +doit +document +correct +origin +assign +method +chang +condit +least +instead +least +prevent +type +shape +disappear +altogeth +method +condit +also +count +transit +case +method +matrix +citi +block +distanc +infin +larg +constant +four +corner +method +matrix +chessboard +distanc +center +posit +test +imag +us +vision +imag +doit +vision +imag +hand +evalu +thin +result +might +want +follow +addit +experi +us +output +least +test +convert +skeleton +imag +ubyt +format +us +vconvert +edit +need +emac +clean +header +imag +file +contain +follow +line +right +repn +ubyt +line +component_interp +gradient +low_threshold +high_threshold +vlink +file +vsegedg +us +result +imag +hand +exampl +well +approach +might +us +determin +direct +index +finger +point +applic +note +thin +method +fact +delet +entir +shape +block +surround +disappear +thin +algorithm +base +paper +zhang +suen +fast +parallel +algorithm +thin +digit +pattern +comm +wang +comment +fast +parallel +algorithm +thin +digit +pattern +comm +comparison +student +result +differ +method +appli +point +hand +imag +rotat +version +test +imag +homework +imag +spline +mosaic +octob +read +paper +adelson +pyramid +method +imag +process +engin +burt +adelson +laplacian +pyramid +compact +imag +code +ieee +tran +comm +burt +adelson +multiresolut +spline +applic +imag +mosaic +tran +graphic +hint +faq +spline +imag +produc +student +class +homework +segment +snake +novemb +read +paper +kass +witkin +terzopoulo +snake +activ +contour +model +comput +vision +william +shah +fast +algorithm +activ +contour +curvatur +estim +comput +vision +graphic +imag +process +imag +understand +hint +faq +homework +project +decemb +student +project +titl +abstract +supplementari +read +addit +paper +might +help +select +topic +student +project +done +stanford +vision +cours +comput +account +account +cours +account +sparcstat +call +room +account +larg +disk +space +quota +store +imag +homework +project +sure +delet +imag +compress +other +gzip +howev +order +save +space +email +email +sent +list +goe +everyon +class +includ +instructor +printer +print +imag +laserprint +laser +laser +locat +room +altern +gener +printer +name +laser +send +output +four +printer +shortest +queue +caution +send +imag +printer +sure +check +queue +job +print +manner +send +imag +print +take +long +print +consider +vision +softwar +vista +vista +program +environ +us +homework +assign +code +locat +directori +vision +tool +vista +page +vision +tool +vista +execut +vision +tool +vista +interact +imag +displai +program +window +system +us +displai +imag +varieti +format +imgstar +basic +imag +process +oper +invok +us +unix +like +command +line +code +execut +manual +vision +tool +imgstar +khoro +khoro +imag +process +softwar +develop +environ +provid +basic +imag +process +modul +graphic +program +languag +interfac +rapid +prototyp +simpl +imag +process +algorithm +code +locat +directori +vision +tool +khoro +vision +tool +khoro +cantata +execut +start +interact +environ +netpbm +toolkit +convers +imag +larg +varieti +differ +format +base +pbmplu +packag +page +vision +tool +execut +vision +tool +matlab +matlab +numer +comput +visual +environ +signal +process +imag +process +toolbox +especi +relev +test +imag +test +imag +directori +vision +imag +although +requir +format +convers +us +imag +public +imag +numer +imag +databas +also +access +exampl +collect +test +imag +examin +examin +solut +exam +held +thursdai +novemb +regular +classroom +note +earli +start +time +exam +cover +topic +shape +shade +includ +read +textbook +paper +sold +doit +homework +assign +bring +exam +sheet +paper +note +want +side +exam +focu +main +idea +algorithm +proof +exam +type +question +ask +exam +exam +spring +exam +spring +exam +spring +exam +spring +link +interest +comput +vision +home +page +highli +recommend +chuck +dyer +link +interest +wandel +list +us +number +vision +scienc +hdtv +grand +allianc +hdtv +system +specif +advanc +televis +system +committe +atsc +atsc +document +postscript +spie +optic +scienc +engin +librari +vision +demo +project +appl +quicktim +imag +mosaic +product +panoramix +imag +mosaic +exampl +panoram +imag +mosaic +decfac +talk +synthet +face +video +rate +stereo +machin +virtual +realiti +project +qbic +imag +databas +project +miscellan +comput +vision +demo +comput +vision +relat +cours +boston +univers +cardiff +univers +khoro +digit +imag +process +onlin +cours +royal +institut +sweden +stanford +univers +univers +virginia +univers +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..1d871bde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,77 @@ +fall +section +fall +section +algebra +languag +program +instructor +dave +zimmermannemail +dzimm +wisc +educlass +meet +time +place +nolandoffic +offic +phone +offic +hour +announcementsprogram +wednesdai +octob +program +readi +fridai +novemb +gener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuswork +homeclass +handoutsprogramsexam +quizzeslectur +notesgreg +sharp +style +guidegrad +referenc +last +digit +number +quizzesprogramsexam +polici +informationemail +policygrad +policyl +policyacadem +misconduct +policytext +problem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +dave +zimmermann +dzimm +wisc +base +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..7104cbb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,42 @@ +linear +program +method +linear +program +method +gener +cours +inform +cours +offer +fall +spring +semest +page +variou +instructor +michael +ferri +spring +mangasarian +fall +graduat +cours +wisconsin +network +flow +integ +program +nonlinear +program +theori +nonlinear +program +algorithm +comput +larg +spars +system +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..1ac5241e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,197 @@ +spring +network +flowsspr +schedul +lectur +cours +mail +list +wisc +class +fridai +februari +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +wednesdai +teach +assist +offic +telephon +mail +leei +wisc +offic +hour +tuesdai +thursdai +requir +text +network +flow +ravindra +ahuja +thoma +magnanti +jame +orlin +prentic +hall +us +text +linear +program +chvatal +freeman +linear +network +optim +bertseka +press +gener +cours +inform +cours +overview +path +tree +cycl +data +structur +shortest +path +flow +cost +network +simplex +method +gener +flow +convex +equilibria +lagrangian +relax +multicommod +flow +applic +prerequisit +knowledg +linear +program +grade +homework +assign +grade +project +assign +grade +wednesdai +class +final +examin +grade +mondai +close +book +except +sheet +paper +allow +repres +question +assign +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +mondai +april +homework +mondai +april +homework +fridai +april +homework +fridai +comput +inform +unix +orient +session +first +time +unix +user +novic +unix +user +previous +us +unix +workstat +held +mondai +thursdai +first +week +class +room +mondai +thursdai +second +week +class +room +orient +session +last +minut +introduct +unix +login +access +gam +public +cshrc +local +cshrc +local +sourc +cshrc +local +alter +path +set +gam +directori +appropri +solari +machin +cours +machin +mathemat +program +home +page +cours +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..72998a6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,207 @@ +spring +also +math +comput +method +larg +spars +systemsspr +schedul +lectur +cours +mail +list +wisc +class +fridai +februari +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +wednesdai +teach +assist +offic +telephon +mail +leei +wisc +offic +hour +tuesdai +thursdai +recommend +textbook +matrix +comput +golub +loan +john +hopkinsunivers +press +second +edit +direct +method +spars +matric +duff +erisman +reid +oxford +scienc +public +finit +dimension +vector +space +halmo +springer +verlag +gener +cours +inform +cours +overview +introduct +storag +scheme +gaussian +elimin +dens +error +analysi +spars +local +pivot +strategi +matrix +modif +iter +linear +solver +spars +least +squar +spars +nonlinear +equat +optim +applic +parallel +techniqu +eigenvalu +eigenvector +prerequisit +math +consent +instructor +grade +homework +assign +grade +project +assign +grade +wednesdai +class +final +examin +grade +mondai +close +book +except +sheet +paper +allow +repres +question +assign +homework +mondai +februari +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +mondai +april +homework +mondai +april +homework +fridai +april +homework +fridai +handout +ieee +arithmet +handout +spars +handout +comput +inform +cours +machin +unix +orient +session +first +time +unix +user +novic +unix +user +previous +us +unix +workstat +held +mondai +thursdai +first +week +class +room +mondai +thursdai +second +week +class +room +orient +session +last +minut +introduct +unix +instruct +matlab +mathemat +program +home +page +cours +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..3ef1d088 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,82 @@ +fall +construct +compilersfal +schedul +lectur +tuth +csst +instructor +charl +fischer +offic +telephon +mail +fischer +wisc +offic +hour +mondai +wednesdai +fridai +appoint +teach +assist +krishna +kunchithapadam +offic +telephon +mail +krisna +wisc +offic +hour +tuesdai +thursdai +appoint +program +assign +homework +read +teach +assist +weyer +offic +telephon +mail +weyer +wisc +offic +hour +mondai +wednesdai +fridai +class +text +craft +compil +charl +fischer +richard +leblanc +benjamin +cum +check +regularli +gener +cours +inform +cours +overview +date +grade +examin +get +start +handout +lectur +note +us +program +tool +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..d7b2e584 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,67 @@ +fall +section +fall +section +algebra +languag +program +instructor +greg +sharpemail +greg +wisc +eduoffic +offic +phone +offic +hour +appt +grader +krishna +kunchithapadamemail +krisna +wisc +edugener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuscours +difficultywork +homenewsstartup +informationclass +noteshomeworkexam +quizzesstyl +guideemail +archivepolici +informationemail +policygrad +policyl +policyacadem +misconduct +polici +must +read +textproblem +solv +object +porgrammingwalt +savitchaddison +weslei +publish +compani +pleas +list +known +erratalast +modifi +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..018ed98d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,245 @@ +class +home +page +fall +algebra +languag +program +section +fall +chad +lane +wisc +announc +program +line +tue +import +need +version +tribbl +compil +problem +enumer +type +recogn +outsid +class +must +declar +insid +public +section +class +result +privat +section +need +bump +privat +section +bottom +shown +version +also +note +definit +randomintinrang +defin +line +need +correspond +function +bodi +tribbl +call +within +class +work +file +prog +forgot +chang +enumer +type +valu +uppercas +need +compil +everyth +chang +line +copi +prog +want +overwrit +obsolet +copi +also +us +includ +prog +chang +prog +might +also +want +check +help +link +someth +ad +comment +suggest +program +import +name +throughout +program +project +part +name +project +directori +name +file +name +updat +program +descript +consist +prog +version +us +prog +pleas +make +sure +consist +name +discrep +sampl +chri +weaver +public +directori +call +prog +shouldn +matter +program +sampl +program +line +readi +crucial +read +entir +assign +understand +class +basic +attempt +earli +start +hard +requir +time +piec +everyth +togeth +bring +question +class +tuesdai +midterm +grade +freshmen +either +mean +fine +mean +great +thumb +grade +mean +noth +freshman +disregard +stuff +class +tent +semest +syllabu +read +assign +program +assign +handout +prepar +quizz +test +quizz +test +solutionscours +inform +polici +text +problem +solv +object +program +walter +savitch +addison +weslei +publish +compani +meet +vleck +polici +administr +inform +grade +polici +late +polici +mail +inform +attend +polici +academ +misconduct +link +inform +introduct +microsoft +window +first +introduct +borland +second +home +page +vectra +sourc +code +text +consult +extra +refer +materi +mani +question +answer +work +home +sharp +lectur +note +sharp +style +guidelast +modifi +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..0efc0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,87 @@ +fall +introduct +program +languag +compilersspr +stori +month +octob +schedul +lectur +tuth +comp +stat +recit +psycholog +instructor +susan +horwitz +offic +telephon +mail +horwitz +wisc +offic +hour +tuesdai +fridai +appoint +teach +assist +rahul +kapoor +offic +telephon +mail +rahul +wisc +offic +hour +mondai +wednesdai +appoint +text +reserv +wendt +librari +compil +principl +techniqu +tool +sethi +ullman +craft +compil +fischer +leblanc +check +regularli +gener +cours +inform +cours +overview +date +inform +assign +exam +grade +includ +late +polici +get +start +read +program +assign +homework +examin +lectur +note +us +program +tool +grade +email +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..0eb7b07f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,70 @@ +home +page +hummert +sectionsc +instructor +hummert +contact +email +hummert +wisc +offic +comput +scienc +statist +offic +phone +home +phone +offic +hour +mondai +thursdai +announc +textbookproblem +solv +object +program +walter +savitch +section +inform +psych +psych +grade +comput +comput +scienc +statist +cours +inform +handout +tent +syllabu +semest +late +polici +grade +criteria +academ +misconduct +viewgraph +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +text +assign +program +program diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..30a8eff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,53 @@ +infoc +info +section +name +bodnersect +offic +hour +mondai +thursdai +offic +number +comput +scienc +statist +hall +doit +phone +mail +jonb +wisc +eduher +thing +keep +mind +need +copi +guid +section +click +choos +print +file +menu +click +question +pleas +stop +offic +hour +send +mail +grade +section +avail +click +bodner +jonb +wisc +mound +madison +last +modifi +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..87d83305 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,224 @@ +intro +kunen +section +introduct +artifici +intellig +notic +inform +spring +detail +coursewil +appear +later +instructor +kunenoffic +stat +buildingtelephon +email +kunen +wisc +eduoffic +hour +appoint +email +grade +four +program +assign +count +three +exam +count +thirdexam +schedul +time +place +final +program +turn +time +midnight +thedai +late +assign +loos +late +topic +cover +follow +order +topic +entir +logic +buti +design +topic +program +assignmenti +discuss +program +introduct +lisp +program +search +game +plai +program +introduct +prolog +natur +languag +understand +program +learn +neural +network +program +logic +deduct +plan +reason +uncertain +knowledg +lisp +inform +sinc +lisp +us +program +coursewil +begin +discuss +common +lisp +would +probabl +usefulto +lisp +refer +avail +supplement +lecturesand +line +help +avail +within +lisp +manypaperback +avail +probabl +like +common +lispcraft +wilenski +anoth +possibl +ansi +common +lisp +book +graham +code +us +book +line +ultim +lisp +refer +steel +common +lisp +languag +edit +page +also +avail +line +click +inform +us +common +lisp +sun +addit +inform +textbook +artifici +intellig +modern +approach +russel +norvig +class +time +recit +session +engr +psych +essentiallli +materi +present +answer +question +give +hint +program +assign +review +exam +usual +last +minut +sinc +teach +section +attend +recit +section +differ +lectur +section +cours +directori +cours +kunen +public +alpha +beta +problem +previou +exam +cours +directori +alpha_beta +click +line +best +first +search +problem +previou +exam +cours +directori +astar +click +line +exam +fall +postscript +exam +exam +exam +final +still +older +exam +cours +directori +last +chang +novemb +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..ac43ba9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,168 @@ +fall +advanc +comput +architectur +ifal +offer +cours +inform +instructor +mark +hilloffic +comp +statemail +markhil +wisc +eduoffic +hour +tuesdai +fridai +appoint +shenoffic +comp +statphon +email +mshen +wisc +eduoffic +hour +mondai +thursdai +appoint +tabl +content +reader +lectur +note +homework +project +miscellaneawhat +give +talksread +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +lectur +note +introduct +chapter +perform +cost +chapter +instruct +set +chapter +cach +chapter +part +memori +chapter +part +talluri +hill +basic +pipelin +chapter +part +basic +pipelin +chapter +part +instruct +level +parallel +chapter +part +instruct +level +parallel +chapter +part +input +output +chapter +interconnect +chapter +note +parallel +process +chapter +homework +homework +assign +solut +homework +assign +solut +homework +assign +solut +homework +assign +solut +homework +assign +solutionproject +assign +propos +novemb +class +talk +decemb +class +report +decemb +noonmiscellanea +give +talk +spring +final +spring +project +assign +spring +midterm +us +first +edit +hennessi +patterson +architectur +qualifi +exam +sourc +hard +question +comput +architectur +seminar +wisconsin +comput +architectur +group +world +wide +comput +architectur +inform diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..0cd45cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,105 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +offic +comp +phone +offic +home +offic +hour +offic +appoint +assign +program +program +administr +inform +text +problem +solv +object +program +walter +savitch +room +time +psycholog +tuesdai +thursdai +comp +vectra +syllabu +comput +grade +grade +standard +late +assign +polici +handin +procedur +cheat +academ +misconduct +consult +exampl +string +class +us +dynam +alloc +us +dynam +alloc +ration +class +exampl +us +oper +overload +complex +class +repres +float +point +complex +number +anoth +exampl +oper +overload +intstack +class +simpl +exampl +unlimit +size +data +structur +classinfo +exampl +us +struct +us +class +link +home +page +home +page +introduct +microsoft +window +introduct +borland +tutori +us +debugg +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..f99bfe93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,165 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +offic +comp +phone +offic +home +offic +hour +offic +appoint +announc +test +case +program +avail +dice +code +discuss +class +class +rank +last +four +digit +student +number +past +exam +onlin +spring +fall +spring +note +hangman +assign +program +exam +mondai +octob +comp +room +lectur +instruct +format +syllabu +first +eight +week +avail +second +eight +week +come +soon +instruct +print +program +output +comput +outsid +late +polici +final +room +chang +meet +comp +assign +program +program +program +program +program +program +program +administr +inform +text +problem +solv +object +program +walter +savitch +room +time +comp +tuesdai +thursdai +comp +vectra +syllabu +comput +grade +grade +standard +late +assign +polici +handin +procedur +cheat +academ +misconduct +consult +exampl +string +class +us +dynam +alloc +us +dynam +alloc +ration +class +exampl +us +oper +overload +complex +class +repres +float +point +complex +number +anoth +exampl +oper +overload +intstack +class +simpl +exampl +unlimit +size +data +structur +classinfo +exampl +us +struct +us +class +link +home +page +introduct +microsoft +window +introduct +borland +tutori +us +debugg +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..0956fa48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,107 @@ +section +dave +melskithes +page +chang +frequent +press +reload +button +daili +get +start +alreadi +stuff +assign +page +uncomfort +comput +andth +softwar +page +help +link +info +instructor +david +melski +offic +comput +scienc +statist +floor +phone +offic +hour +email +melski +wisc +click +attach +pleas +section +info +section +meet +noland +section +meet +psycholog +text +problem +solv +object +program +walter +savitch +addison +wesleypublish +gener +stuff +us +us +window +usingborland +refer +materi +rough +syllabu +section +email +archiv +section +email +archiv +section +polici +info +academ +misconduct +must +read +rule +thumb +share +code +consult +help +grade +late +work +email +check +often +essenti +link +assign +solut +handout +list +tutor +avail +last +modifi +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..bb328c04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,301 @@ +home +page +section +fall +section +algebra +languag +program +instructor +milo +martin +milo +wisc +time +locat +psychologyinstructor +milo +martin +email +milo +wisc +eduoffic +offic +hour +tuesdai +thursdai +appoint +offic +phone +announcementsthi +page +chang +frequent +respons +check +page +often +novemb +quiz +take +home +given +class +todai +place +onth +homepag +novemb +ad +file +us +project +program +page +homepag +novemb +room +test +scheduledfor +wednesdai +novemb +octob +updat +current +grade +haseveryth +quiz +pleas +check +make +sure +isaccur +octob +ad +link +coupl +page +withinform +html +languag +us +page +linksar +section +titl +document +octob +program +avail +onfridai +novemb +take +home +quiz +mondai +novemb +remind +exam +wednesdai +novemb +gener +class +inform +current +class +grade +gener +inform +sheet +turn +assign +syllabu +code +style +guid +home +page +vectra +consult +fall +consult +schedul +academ +misconduct +policyclass +document +final +bankaccount +class +code +bankaccount +bankaccount +main +postscript +bankaccount +class +code +bankaccount +postscript +bankaccount +struct +code +bankaccount +postscript +minmax +exampl +code +findth +minimum +maximum +list +number +case +enteredfrom +stdin +form +code +creat +formlett +data +specifi +file +us +file +theopen_fil +function +introduc +class +call +refer +class +exampl +function +user +input +withprompt +call +refer +version +function +user +input +withprompt +call +valu +version +document +beginn +guid +html +standard +introduct +toth +html +languag +html +refer +guid +refer +guid +latest +html +standard +releas +good +time +viru +hoax +code +ethic +andprofession +conductassign +class +survei +questionar +requir +mondai +septemb +program +wednesdai +septemb +program +wednesdai +septemb +program +wednesdai +septemb +program +fridai +octob +program +mondai +octob +program +wednesdai +octob +program +fridai +novemb +program +program +program +wednesdai +decemb +quiz +quiz +solut +score +mondai +septemb +quiz +solut +score +wednesdai +octob +quiz +solut +score +mondai +octob +quiz +solut +score +take +home +quiz +mondai +novemb +exam +exam +solut +score +wednesdai +octob +exam +solut +score +wednesdai +novemb +exam +solut +score +tuesdai +decemb +textbook +problem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +errata +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..1d0c329a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,140 @@ +home +page +martin +ream +classc +section +algebra +languag +programmingspr +martin +ream +teach +assist +absolut +nothingeverydai +informationc +class +inform +pagescommon +programmingmistakesarch +section +section +class +mail +list +messag +sent +list +semest +calendar +program +tuesdai +januari +program +tuesdai +februari +program +tuesdai +februari +program +thursdai +februari +program +thursdai +march +exam +tuesdai +march +program +tuesdai +march +program +thursdai +april +program +tuesdai +april +exam +tuesdai +april +program +thursdai +april +program +thursdai +final +exam +thursdai +placeto +announcedcours +detail +contact +email +mream +wisc +offic +comput +scienc +statist +dayton +phone +offic +hour +appoint +talk +class +send +email +textbookproblem +solv +object +program +walter +savitch +section +inform +section +noland +section +noland +comput +csst +contain +vectra +run +window +andborland +addit +cours +inform +tent +syllabu +semest +extra +materi +late +polici +grade +criteria +academicmisconduct +rule +thumb +share +code +assign +work +anyform +former +student +made +bigtodd +thielwendi +staatsabout +instructor +last +modifi +martin +ream +mream +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..3c8b8e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,130 @@ +fall +section +fall +section +algebra +languag +program +nolandinstructor +mike +steeleemail +msteel +wisc +eduoffic +comp +stat +buildingoffic +hour +time +appoint +soffic +phone +import +announcementsi +extend +deadlin +program +pleas +check +mail +read +thenew +program +deadlin +informationmidterm +tuesdai +novemb +comp +stat +current +grade +line +includ +grade +everyth +hand +tuesdai +novemb +sampl +program +taken +exampl +pastfew +week +class +fill +stuff +gloss +makefulli +function +program +find +us +ifyou +miss +even +didn +understand +exampl +note +exampl +page +near +bottom +rememb +check +mail +clarif +programmingassign +gener +cours +informationc +home +pagecours +objectivesabout +vectra +labc +consultantscours +syllabu +read +assignmentsnot +work +homeclass +handoutsprogram +assignmentsexam +quizzessom +note +examplespolici +informationemail +policygrad +polici +late +polici +academ +misconduct +policyus +refer +pagesintroduct +microsoft +windowsintroduct +borland +greg +sharp +styleguid +codetextproblem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +mike +steel +msteel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..872b7374 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,192 @@ +fall +session +infoc +fall +sessionalgebra +languag +program +instructor +andrew +prockemail +prock +wisc +eduoffic +comput +scienc +statist +offic +phone +offic +hour +thgrader +haihong +wangemail +wisc +eduoffic +comput +scienc +statist +offic +phone +consult +hour +mtwrannounc +grade +link +pleas +check +grade +verifi +score +modifi +crazi +offic +hour +todai +todai +made +minor +modif +crazi +file +assign +copi +alreadi +gotton +assign +five +onlin +exam +result +rang +ad +grade +polici +assign +sampl +quiz +test +onlin +gener +perus +test +quiz +anoth +think +give +good +idea +level +knowledg +need +rememb +topic +test +test +onlin +webpag +seem +done +email +notic +error +question +make +sure +check +assing +assign +onlin +well +like +work +ahead +final +tuesdai +decemb +mark +calendar +everyon +requir +take +final +check +link +page +feel +thing +locat +import +carefulli +read +polici +administr +inform +welcom +class +class +inform +text +problem +solv +walter +savitch +room +comput +scienc +statist +time +section +section +tent +syllabu +section +grade +section +grade +grade +polici +late +polici +mail +polici +academ +misconductcours +materi +gener +cours +info +style +guid +lectur +note +assign +email +archiv +section +email +archiv +section +info +introduct +microsoft +window +first +introduct +borland +second +home +page +vectra +sourc +code +text +consult +extra +refer +materi +mani +question +answer +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..d318e3b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,115 @@ +kelli +page +kelli +ratliff +current +grade +keyword +search +mail +messag +exampl +enter +function +without +quot +everi +paragraph +us +word +function +also +wildcard +link +mail +messag +sent +semest +info +info +info +info +info +info +info +info +info +addit +inform +interest +backup +copi +disk +filesviru +inform +world +wide +faqfun +stuff +usenet +oracl +resourc +index +virtual +tourist +world +mapth +space +shuttl +clickabl +badger +herald +site +comicshumor +abort +retri +ignor +nine +type +usersfin +weeklab +jokesget +softwar +comput +home +might +interest +tryingsom +sharewar +freewar +softwar +avail +internet +archiv +program +usual +compress +need +somecompress +archiv +softwar +unpack +need +reviewsom +command +try +biggest +best +maintain +archiv +simtel +minclud +link +simtel +file +post +usenet +simtel +site +infocompress +infofavorit +site +clickher +visit +desautel +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..1bb717e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,94 @@ +home +page +russ +man +csc +introduct +comput +program +instructor +russel +manningemail +rman +wisc +eduoffic +room +comput +scienceoffic +hour +find +basement +saturdai +except +home +footbal +game +sundai +come +keep +compani +work +like +vectra +although +student +prioriti +grade +lectur +final +click +textbook +problem +solv +object +program +walter +savitch +section +inform +semest +univers +rotc +build +comput +room +comput +scienc +statist +assign +program +mondai +novemb +program +wednesdai +novemb +program +program +program +program +cours +inform +handout +syllabu +late +polici +academ +misconduct +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +textold +quizz +none diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..a63d075a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,143 @@ +introduct +artifici +intellig +introduct +artifici +intellig +gener +cours +inform +cours +offer +fall +spring +semest +academ +year +section +thefal +spring +topic +cover +principl +knowledg +base +search +techniqu +best +first +search +alpha +beta +search +knowledg +represent +us +predic +logic +semant +network +connectionist +network +frame +rule +autom +deduct +applic +problem +solv +plan +expert +system +game +plai +vision +natur +languag +understand +learn +robot +program +includ +lisp +possibl +prolog +previou +knowledg +languag +assumedprerequisit +page +variou +instructor +chuck +dyer +spring +fall +kunen +fall +spring +jude +shavlik +fall +sabbat +spring +bryan +spring +local +relat +link +madison +seminar +qualifi +exam +recent +tabl +content +abstract +journal +mostli +wendt +librari +readabl +wisc +wisc +group +wisc +comput +vision +group +wisc +machin +learn +group +wisc +robot +group +wisc +comput +biologi +includ +wisc +dept +graduat +cours +wisconsin +machin +learn +deduct +problem +solv +comput +vision +robot +motion +plan +extern +relat +link +last +modifi +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..6e1ba1dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,484 @@ +machin +learn +machin +learn +spring +gener +cours +inform +instructor +jude +shavlik +stat +shavlik +wisc +offic +mondai +wednesdai +teach +assist +geoff +weinberg +highwai +lab +basement +build +geoffrei +wisc +offic +mondai +wednesdai +offic +phone +home +home +cours +overview +postscript +cours +syllabu +postscript +archiv +class +email +readabl +wisc +suggest +class +project +postscript +read +assign +assign +read +chapter +theori +refin +chapter +comput +learn +theori +mitchel +textbook +feedback +author +assign +april +read +learn +logic +definit +relat +quinlan +knowledg +base +artifici +neural +network +towel +shavlik +assign +april +read +chunk +soar +laird +rosenbloom +newel +learn +knowledg +level +dietterich +assign +april +read +chapter +analyt +learn +mitchel +textbook +feedback +author +april +assign +april +read +journal +articl +unsupervis +learn +fisher +rumelhart +zipser +cogsci +lenat +assign +april +read +chapter +genet +algorithm +mitchel +textbook +feedback +author +april +assign +march +read +chapter +reinforc +learn +mitchel +textbook +feedback +author +april +assign +march +read +backpropag +basic +theori +rumelhart +assign +februari +read +chapter +neural +network +mitchel +textbook +feedback +author +march +also +read +journal +articl +shavlik +moonei +towel +empir +compar +backprop +assign +februari +read +chapter +concept +space +mitchel +textbook +feedback +author +februari +assign +januari +read +machin +learn +experiment +scienc +kibler +feedback +author +februari +assign +januari +read +chapter +introduct +mitchel +textbook +feedback +author +januari +assign +journal +articl +towel +shavlik +kbann +algorithm +mondai +journal +articl +fisher +cobweb +algorithm +wednesdai +april +journal +articl +shavlik +moonei +towel +empir +compar +backprop +wednesdai +march +sure +answer +sheet +paper +best +idea +next +summar +assign +paper +sentenc +summari +lead +instead +analyz +late +polici +brr +hand +materi +cover +lectur +homework +assign +homework +learn +reinforc +learn +wednesdai +april +homework +train +neural +network +mondai +march +homework +experiment +methodolog +mondai +februari +homework +induc +decis +tree +mondai +februari +homework +creat +person +concept +mondai +januari +late +polici +start +class +student +five +free +late +dai +semest +exhaust +penalti +measur +noon +noon +weekend +free +make +tractabl +accept +week +late +previous +us +homework +postscript +homework +spring +migrat +semest +progress +homework +induc +decis +tree +homework +heurist +search +concept +space +homework +train +neural +network +homework +learn +reinforc +learn +homework +version +space +postscript +homework +explan +base +learn +postscript +homework +cobweb +postscript +previou +exam +postscript +spring +spring +spring +spring +spring +spring +spring +spring +ineedagoodicon +relat +link +machin +learn +journal +line +page +nip +paper +premier +neural +confer +recent +tabl +content +abstract +select +journal +mostli +wendt +librari +readabl +wisc +irvin +dataset +archiv +pointer +cours +knowledg +discoveri +databas +neural +network +resourc +stuff +machin +learn +benchmark +ieee +neural +network +council +sever +journal +connect +page +intern +societi +adapt +behavior +bibliographi +server +austrian +institut +neural +network +bibliographi +server +austrian +institut +resourc +canadian +server +link +peopl +extern +refer +help +program +assign +page +us +akcl +common +lisp +department +workstat +tip +us +emac +lisp +code +write +frequent +ask +question +lisp +cours +comput +tip +us +akcl +debugg +help +lisp +novic +lisp +frequent +ask +question +steel +common +lisp +languag +edit +refer +manual +textbook +print +printer +print +page +relat +local +link +wisc +group +wisc +math +program +group +wisc +comp +biologi +includ +wisc +group +wisc +dept +wisc +librari +local +link +last +modifi +jude +shavlikshavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..671267cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,358 @@ +fall +advanc +oper +systemsfal +marvin +solomon +offic +comput +sciencesoffic +hour +troffic +phone +email +address +solomon +wisc +new +watch +space +latest +updat +last +updat +schedul +project +present +list +final +exam +mondai +inroom +comput +scienc +statist +build +project +present +room +fridai +noon +exampl +past +midterm +examtogeth +sampl +answer +midterm +exam +wednesdai +octob +pmin +room +comput +scienc +final +exam +mondai +decemb +exact +time +place +bedetermin +project +suggest +informationabout +project +avaiabl +readabl +version +figur +multic +memori +manag +paper +avail +content +new +summari +lectur +inform +text +cours +schedul +grade +project +project +present +summari +cours +intend +give +broad +exposur +advancedoper +system +topic +assum +student +good +semest +cours +onoper +system +equival +cover +topic +normal +present +cours +inconsider +detail +synchron +interprocess +commun +memori +manag +file +system +protect +secur +distribut +system +lectur +inform +lectur +tuesdai +thursdai +comput +sciencestextther +realli +satisfactori +textbook +graduat +level +operatingsystem +class +usea +select +classic +papersa +text +cours +structur +around +read +journal +articl +andconfer +proceed +purchas +read +doit +formerli +macc +document +deskfor +read +semest +similar +ident +thoseof +previou +semest +us +copi +make +avail +individu +paper +youto +copi +class +discuss +topic +relev +current +paper +click +herefor +tent +schedul +lectur +detail +detail +review +paper +willinstead +adiscuss +major +topic +theme +us +paper +focal +point +activ +particip +discuss +strongli +encourag +will +particip +activ +daili +class +geta +expect +quietli +listen +week +much +lessout +class +gradingther +exam +midterm +final +project +worth +total +grade +exam +design +verifi +carefulli +thoroughli +read +read +projecty +requir +complet +term +project +list +suggest +topic +provid +strongli +encourag +make +project +project +involv +implement +tool +experiment +implementationsof +algorithm +suggest +research +literatur +measur +studi +simul +project +must +experiment +compon +literatur +survei +unvalid +design +paper +suffici +project +done +person +group +larger +smaller +group +approv +case +basi +write +term +paper +summar +result +project +paper +must +meet +standard +research +public +grade +qualiti +write +well +content +also +make +ashort +presentationabout +project +class +project +presentationsher +schedul +project +presen +present +room +comput +scienc +statist +time +approxim +manyan +stubb +andrew +bigg +franci +salmon +gunawan +agu +qingmin +wang +chien +pang +jame +chen +eric +larsen +conroi +fritz +craig +jordan +prasad +deshpand +avinash +sodani +basnei +rajesh +raman +biswadeep +chen +taxiao +wang +yanm +xinyu +richard +zhang +todd +munson +wenjun +xinyi +wang +yufei +zeyu +chen +sridhar +gopal +michael +leesolomon +wisc +eduthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..810f16f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +chiang +time +gradesgo +homepag +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..fc42e9c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,167 @@ +section +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +solut +class +follow +mondai +exercis +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +quota +page +print +exce +quota +must +contact +either +mail +go +room +offic +hour +prefer +mail +increas +quota +bewar +machin +vectra +aren +configur +correctli +particular +machin +along +wall +closest +outsidehallwai +toward +left +hand +part +room +avoid +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +jeff +lampert +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +modifi +gareth +bestor +bestor +wisc +last +modifi +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..2b42565f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,55 @@ +homepagec +homepagewelcom +homepag +purpos +homepag +provid +student +inform +pertain +section +sinc +page +chang +frequent +responsibilityto +check +page +often +gener +informationinstructor +todd +munsonemail +tmunson +wisc +eduoffic +comput +scienc +statisticsoffic +phone +offic +hour +appointmentsect +textbook +problem +solv +walter +savitchclass +informationexpectationssyllabusexam +schedule +mailgradingl +assignmentsextra +creditpoliciesconsult +responsibilitiesacadem +misconductoth +informationdaili +note +assignmentshomework +assignmentsprogram +document +us +classoth +program +resourcesc +homepagetmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..bca2b44b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,92 @@ +section +comput +scienc +section +time +place +nolandinstructor +todd +turnidgeoffic +hour +tbalab +hour +tbaannouncementsclass +note +class +handout +struct +avail +program +avail +get +start +us +inform +read +get +start +gener +inform +text +facil +grade +polici +syllabu +tent +syllabu +cours +assign +text +read +program +assign +solut +handout +collect +class +handout +date +class +mail +list +inform +send +messag +classa +whole +cours +home +page +home +page +section +muchinform +gener +interest +includ +inform +tutor +consult +window +oper +system +email +netscap +section +find +inform +provid +byother +instructor +help +exampl +handout +gregorysharp +cours +difficulti +last +modifi +todd +turnidg +turnidg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..09f580b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,120 @@ +section +section +algebra +languag +program +announc +exam +thursdai +psych +reload +page +everi +time +login +instructor +chri +weaver +email +weaver +wisc +email +polici +offic +comput +scienc +statist +offic +phone +offic +hour +appoint +offic +hour +first +week +grader +zhang +email +wisc +offic +comput +scienc +statist +offic +phone +locat +section +noland +section +noland +comput +vectra +comput +scienc +statist +hour +seven +dai +week +staf +consult +gener +cours +info +syllabu +text +problem +solv +object +program +walter +savitch +addison +weslei +isbn +includ +errata +sourc +code +text +misconduct +policyassign +grade +handout +program +assign +homework +read +assign +lectur +note +handout +exampl +program +exam +quiz +kei +late +polici +grade +polici +style +guidelin +still +rough +print +paper +statement +chri +weaver +comput +scienc +depart +univers +wisconsin +madison +last +chang +chri +weaver diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..742038f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,185 @@ +spring +advanc +comput +architectur +spring +offer +cours +inform +instructor +prof +jame +smith +offic +engin +hall +offic +hour +tue +thur +offic +phone +email +wisc +princeoffic +engin +hall +offic +hour +offic +phone +mail +address +princ +wisc +tabl +content +new +read +lectur +note +homework +project +miscellaneousnew +homework +soln +special +offic +hour +final +exam +biochemistri +pmread +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +lectur +note +cours +overview +introduct +comput +architectur +perform +cost +instruct +set +pipelin +advanc +pipelin +part +advanc +pipelin +part +vector +vliw +limit +softwar +cach +memori +advanc +cach +advanc +cach +main +memori +main +memori +system +disk +arrai +disk +arrai +interconnect +technolog +interconnect +technolog +network +network +multiprocessor +part +multiprocessor +part +multiprocessor +part +multiprocessor +part +homework +homework +assign +homework +solut +homework +assign +homework +solut +homework +assign +note +homework +homework +solut +homework +assign +homework +solut +homework +assign +homework +solut +project +project +assign +trace +inform +project +list +miscellan +us +tool +project +review +midterm +midterm +exam +specmark +consid +harm +analysi +pipelin +clock +detail +design +reserv +station +lectur +network +rout +lectur +network +rout +cach +summari +final +exam diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..380daa58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,409 @@ +main +pagecomput +scienc +comput +graphic +page +evolv +incomplet +hopefulli +us +begin +home +page +forc +deal +comput +graphic +scientif +visual +atth +level +comput +graphic +principl +practic +folei +computergraph +alan +watt +current +taught +bruceland +also +project +leader +visual +group +atth +cornel +theori +center +content +cours +summari +administrivia +textbook +error +homework +staff +schedul +relev +math +cornel +math +graphic +cours +cours +summari +comput +graphic +topic +requir +mathemat +program +artist +skill +among +other +content +comput +graphic +fundament +cours +cornel +focus +mathemat +skill +associ +cours +program +cours +designedto +help +illumin +math +cours +cover +follow +topic +year +construct +surfac +explicit +polygon +list +parametr +oper +quadric +surfac +figur +rotat +swept +surfac +tensor +product +surfac +parametr +surfac +viewer +implicit +surfac +quadric +surfac +blobbi +model +oper +surfac +tessel +comput +surfac +normal +hierarch +group +simpl +object +form +complex +surfac +scene +composit +anim +introduct +homogen +coordin +geometr +transform +build +object +kinemat +anim +hierarch +model +combin +prototyp +object +mimic +connect +rigid +part +invers +kinemat +dynam +system +anim +differenti +equat +cellular +automata +view +group +object +camera +transform +clip +view +volum +project +onto +screen +parallel +project +perspect +project +camera +simul +transform +clip +project +stereo +vision +render +shade +light +human +color +vision +color +devic +limit +light +geometr +optic +wave +gourand +phong +shade +hidden +surfac +remov +buffer +transpar +shadow +scan +convers +anti +alias +polygon +pixel +human +vernier +hyper +acuiti +imag +space +method +object +space +method +surfac +properti +modif +textur +map +bump +map +volum +textur +model +scientif +data +scientif +visual +aspect +scientif +data +dimension +field +scalar +vector +field +object +wall +channel +scalar +field +contour +line +surfac +color +color +mispercept +volum +render +vector +field +difficulti +arrow +field +line +particl +advect +multiparamet +high +dimension +data +dynam +system +administrivia +error +textbook +comput +graphic +alan +watt +homework +assign +homework +homework +homework +homework +homework +march +homework +march +homework +march +homework +april +homework +april +homework +cours +schedul +prelim +first +test +spring +serv +gener +guid +test +style +also +list +schedul +prelim +cornel +spring +break +prelim +religi +holidai +student +educationlaw +mandat +faculti +make +avail +opportun +tomak +examin +miss +religi +belief +inord +facilit +prepar +makeup +exam +student +intendingto +absent +order +observ +holidai +requestedto +notifi +instructor +last +lectur +final +schedul +exam +period +tuesdai +upson +final +mean +standard +deviat +staff +bruce +land +rhode +bruce +cornel +jing +huang +upson +huang +upson +justin +mccune +upson +jmccune +csrelev +math +cornel +univers +math +graphic +cours +univers +california +davi +univers +waterloo +univers +wale +colleg +cardiff +univers +manchest +oregon +state +universityrel +topic +final +project +anim +visual +cornel +theori +center +comment +theori +center +onlin +document +welcom +sent +todoc +comment +cornel +last +modifi +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..f0aa7c3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,133 @@ +main +pagecomput +scienc +comput +graphic +laboratori +exercisesthi +site +cornel +universityundergradu +comput +graphic +laboratori +page +contain +materi +includ +procedur +softwar +student +result +section +deal +computergraph +scientif +visual +level +computergraph +principl +practic +folei +dcomput +graphic +watt +current +taught +bruceland +also +project +leader +visual +group +atth +cornel +theori +center +page +first +place +sigucc +basededuc +train +materi +competit +exercis +get +start +build +polygon +object +parametr +surfac +transform +model +us +virtual +camera +camera +perspect +transform +light +textur +bump +map +model +scientif +visual +design +project +physic +base +anim +implicit +surfac +procedur +textur +exercis +done +order +note +exercis +mark +current +mark +areinclud +refer +current +exercis +chat +facil +commun +aboutc +relat +topic +spring +semesteraccess +restrict +enrol +student +relat +topic +final +project +anim +visual +cornel +theori +center +comment +theori +center +onlin +document +welcom +sent +todoc +comment +cornel +last +modifi +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..c4bdbe1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,75 @@ +cornel +fall +topic +comput +graphic +fall +semestereach +group +student +chose +current +research +topic +computergraph +read +appropri +paper +implement +code +group +deliv +lectur +chosen +topic +anddocu +work +document +student +topic +metabal +model +window +michael +arcuri +alex +benton +model +human +facial +express +huang +hung +content +base +imag +retriev +system +interior +design +sean +landi +interdepend +particl +system +justin +mccune +visual +diffus +distribut +pollut +us +spatial +explicit +landscap +modelsfu +tsai +antialias +video +imag +us +stochast +sampl +arun +vermach +hsun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..b7d0e299 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,75 @@ +yong +homepageto +yong +homepagey +number +visitor +sinc +yong +beij +china +current +program +comput +scienc +univers +texa +austin +settl +stai +mathemat +graduat +program +rutger +univers +brunswick +year +jersei +beauti +place +wife +tsinghua +univers +beij +china +milanitalian +soccerk +soccernba +sitefox +sportschicago +bullsmichael +jordannflnhlc +rankingmarri +childrenseinfeldcomput +sciencesutilitieshtml +convertersimag +collectionssystemshtmllatexcgitcl +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat +tmiscinternet +travel +network +unit +parcel +servic +unit +state +postal +servic +usp +fedexus +guidefun +todayu +newsstarwavesupermodel +contact +river +street +austin +finger +yonglu +utexa +page +heavi +construct +last +modifi +yong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..0a207cde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,217 @@ +david +young +david +youngashbel +smith +professor +professor +comput +scienc +professor +ofmathemat +director +center +numer +analysi +webb +institut +naval +architectur +mathemat +mathemat +harvard +univers +honor +award +profession +servic +fellow +american +associ +advanc +scienc +award +outstand +contribut +comput +scienc +journal +linear +algebra +applic +special +issueded +chair +appli +mathemat +committe +american +mathematicalsocieti +board +truste +argonn +univers +associ +associ +editor +siam +journal +matric +matrixappl +area +interestnumer +analysi +partial +differenti +equat +numericallinear +algebra +summari +researchmi +research +activ +focus +numer +solut +partialdifferenti +equat +base +finit +differ +methodsand +iter +method +solv +associ +system +oflinear +algebra +equat +involv +matric +larg +andspars +sever +comput +softwar +packag +develop +basedon +research +part +itpack +project +research +beingextend +includ +method +suitabl +share +memori +distributedmemori +parallel +comput +rapidli +converg +iter +methodsbas +parallel +multilevel +procedur +also +beingdevelop +select +recent +publicationsd +young +kincaid +linear +stationari +second +degre +method +solut +larg +linear +system +topic +polynomi +sever +variabl +applic +rassia +srivasiava +yanushauska +world +scientif +publ +compani +singapor +young +vona +ration +iter +method +solv +larg +spars +linear +system +appli +numer +mathemat +young +search +omega +iter +method +larg +linear +system +kincaid +academ +press +young +carei +kincaid +sepehrnoori +vector +parallel +iter +solut +larg +spars +system +pde +scienc +engin +crai +comput +minneapoli +crai +research +young +search +high +level +parallel +iter +spars +linear +system +solver +parallel +supercomput +method +algorithm +applic +graham +carei +john +wilei +son +previou +profil +index +next +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..d54f1689 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,240 @@ +yoonsuck +choe +yoonsuck +choe +photo +ad +student +dept +comput +scienc +univers +texa +austin +comput +scienc +univers +texa +austin +austin +decemb +comput +scienc +yonsei +univers +seoul +korea +august +welcom +homepag +visitor +sinc +research +interest +model +cortic +structur +neural +network +current +work +hand +written +digit +recognit +systembas +laterali +interconnect +synerget +self +organ +featur +lissom +develop +joseph +sirosh +prof +risto +miikkulainen +utc +neural +network +research +group +recent +work +includ +extend +lissom +model +actualspik +event +model +call +spike +lissom +slissom +beself +organ +lissom +segment +multipl +object +retinabi +synchron +spike +within +group +repres +object +desynchron +spike +group +repres +differ +object +research +outlin +also +check +html +book +relat +research +edit +prof +miikkulainen +joseph +sirosh +later +interact +cortex +structur +function +comment +public +relat +public +research +group +utc +group +public +page +yoonsuck +choe +risto +miikkulainen +self +organ +segment +later +connect +spike +neuron +technic +report +depart +comput +scienc +univers +texa +austin +septemb +joseph +sirosh +risto +miikkulainen +yoonsuck +choe +later +interact +cortex +structur +function +electron +book +isbn +yoonsuck +choe +joseph +sirosh +risto +miikkulainen +later +interconnect +self +organ +map +hand +written +digit +recognit +appear +touretzki +mozer +hasselmo +editor +advanc +neural +inform +process +system +cambridg +press +yoonsuck +choe +later +interconnect +self +organ +featur +handwritten +digit +recognit +techic +report +depart +comput +scienc +univers +texa +austin +august +master +thesi +bunch +link +total +unord +click +find +interestingcontact +inform +offic +phone +email +yschoe +utexa +mail +address +univers +texa +austin +depart +comput +scienc +austin +page +maintain +yoonsuck +choe +yschoe +utexa +last +updat +utc +home +home +newsgroup +summari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..d4a92a24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,87 @@ +yuanj +xuwint +break +updat +addresspictur +famili +depart +comput +scienc +univers +texa +austin +austin +texa +spring +time +tabl +select +cours +schedulec +object +orient +softwar +engr +brown +logic +synthesi +aziz +client +server +system +develop +gang +previou +semest +china +pagechines +student +associ +austin +china +alumni +page +work +studi +universityof +scienc +technolog +china +hefei +china +institut +mathemat +chines +academi +scienc +beij +chinaunivers +munich +atmunich +germanyunivers +manchest +manchest +prof +nick +higham +famili +wang +lifan +hong +chen +guizhongustc +yuan +hailiang +yang +yuhongfriend +linsoftwar +program +java +java +perl +common +gatewai +interfac +link +yahoo +publish diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..196905e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +feng +feng +visitor +number +sinc +yufeng +utexa +edufing +public +ring +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..d23b70f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,86 @@ +zhii +chen +home +pagezhii +chenabout +mefrom +guangzhou +canton +peopl +republ +china +current +master +program +comput +scienc +depart +univers +texa +austin +seek +full +time +resum +click +resum +postcript +format +zhongshan +univers +pleas +view +chines +guangzhou +china +life +austin +fall +spring +calculu +fall +comput +architectur +spring +misc +china +chines +zodiac +person +associ +differ +anim +kind +anim +associ +find +page +friend +zhongshan +univers +maintain +john +dong +thank +els +world +wide +info +contact +burton +austin +zchen +utexa +page +still +construct +copyright +zhii +chen +creat +last +modifi +visitor +accord +counter +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..b74c506d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,129 @@ +maggi +xiao +zhou +maggi +xiao +zhou +home +page +educ +assistantship +graduat +student +teach +assist +databas +manag +depart +comput +sciencesat +univers +texa +austin +buaa +beij +china +life +austin +current +work +fall +multimedia +system +algorithm +data +commun +network +distribut +process +work +spring +fall +look +around +campu +kaleidoscop +china +land +beauti +visit +world +peopl +daili +china +new +digest +zhai +chines +magazin +newspag +time +magazin +magazin +entertain +movi +stamp +comput +world +world +comput +societi +ieee +comput +giant +onlin +career +center +compani +home +page +internet +search +yahoo +galaxi +lyco +internet +directori +univers +guid +html +script +onlin +librari +contact +inform +mail +zhouxiao +utexa +http +utexa +user +zhouxiao +offic +main +build +room +campu +offic +phone +address +campu +depart +comput +scienc +taylor +univers +texa +austin +austin +home +page +last +modifi +sept +comment +welcom +send +email +zhouxiao +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..9b41c86f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,110 @@ +qing +homepag +qinghi +friend +welcom +homepag +know +graduat +student +inth +depart +comput +scinc +univers +texa +austin +born +beij +capit +citi +china +bachelor +degre +peke +univers +meet +ofmi +friend +former +classmat +peke +univers +findmor +peopl +pekingunivers +alumni +home +page +oversea +html +enjoi +live +austin +texa +peek +follow +site +know +like +well +find +lot +valuabl +informationand +professionalinternetpc +relatedmac +relatedaft +worknetscap +dynam +document +testtwin +eldertwin +youngernetscap +dynam +document +testanim +danc +titledanc +titl +testanoth +netscap +dynam +document +testyet +anoth +netscap +dynam +document +testfriendsthi +china +travel +site +maintain +former +classmat +xiaohai +best +friend +china +shan +shinan +clike +clike +student +visitor +number +sinc +octob +site +construct +last +modifi +qingunivers +texa +austin +depart +comput +sciencesaustin +zhuqe +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..2b9acab7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,73 @@ +univ +washington +comput +scienc +organizationsinclud +faculti +staff +student +visitor +organ +ouraffili +program +graduat +student +regioninclud +local +inform +desktop +refer +link +elsewher +spotlightuwwin +pacif +region +intern +student +programmingcontesttwovideo +highlight +educ +initiativesourcolloquia +live +mbonemajordon +intel +corporationdickkarp +receiv +nation +medal +scienceprofessionalmast +program +applic +deadlin +autumn +departmentoverview +theimpact +research +univers +perspect +faculti +staffposit +avail +half +centuri +exponenti +progress +inform +technolog +page +peopl +region +cours +laboratori +research +newscan +handl +tabl +click +univers +washington +seattl +voic +comment +webmast +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..ec665d56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,65 @@ +andrew +berman +home +pageandrew +berman +aberman +washington +educomput +scienc +bourassa +virgil +selberg +erik +tron +process +specif +file +protect +unix +oper +system +bothpostscript +andhtml +proceed +winter +usenix +confer +berman +andrew +data +structur +fast +approxim +match +postscript +format +berman +andrew +shapiro +linda +effici +imag +retriev +multipl +distanc +measur +avail +postscript +format +appear +spie +special +link +wife +debbi +debbi +beauti +daughter +melani +miscellan +poison +donut +stupid +stupidmi +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..fa303a9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,127 @@ +adam +finkelstein +adam +finkelstein +adam +washington +depart +comput +scienc +univers +washington +seattl +washington +start +drink +cup +coffe +hair +limp +andlack +bodi +year +live +good +life +graduat +student +final +finish +doctor +comput +graphic +fall +quarter +post +earli +join +thecomput +scienc +depart +atprinceton +univers +softwar +engin +tibco +formerli +teknekron +softwar +system +palo +alto +wrote +softwar +peopl +trade +stock +undergradu +student +swarthmor +colleg +class +studi +physic +occasion +recent +research +project +find +specif +imag +alarg +databas +imag +sinc +work +someth +call +multiresolut +video +photo +plai +ultim +frisbe +team +calledumatata +address +phone +number +look +plan +file +across +photocopi +photocopi +thehilari +menu +seattl +least +visit +coffe +hous +caff +lardo +recent +chilli +night +visit +snoqualmi +pass +made +excel +view +comet +hyakutak +great +pictur +taken +friend +marcu +cool +imag +made +glass +sculptur +dither +mona +gothic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..3defba13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,291 @@ +jame +ahren +home +page +jame +ahren +home +page +address +comput +scienc +engin +depart +univers +washington +seattl +email +ahren +washington +phone +research +interest +visual +parallel +distribut +comput +scientif +databas +manag +project +univers +washington +databas +environ +vision +research +alamo +nation +laboratori +visual +project +public +jame +ahren +charl +hansen +cost +effect +data +parallel +load +balanc +intern +confer +parallel +process +august +load +balanc +algorithm +typic +improv +program +perform +onunbalanc +dataset +degrad +perform +balanc +dataset +unnecessari +load +redistribut +occur +paper +present +cost +effect +data +parallel +load +balanc +algorithm +whichperform +load +redistribut +possibl +save +outweighth +redistribut +cost +experi +data +parallel +polygonrender +show +perform +improv +factor +onunbalanc +dataset +perform +loss +percent +onbalanc +dataset +us +algorithm +linda +shapiro +steven +tanimoto +jame +brinklei +jame +ahren +jakobovit +lara +lewi +visual +databas +system +data +experi +manag +model +base +comput +vision +proceed +second +base +vision +workshop +februari +paper +present +design +visual +databas +system +data +experi +manag +system +design +gener +scientif +databas +system +motiv +intend +model +base +comput +vision +provid +unifi +data +model +highli +graphic +user +interfac +advanc +queri +facil +interact +laboratori +notebook +system +aid +scientif +experiment +promot +data +share +comput +vision +commun +frank +ortega +charl +hansen +jame +ahren +fast +data +parallel +polygon +render +supercomput +novemb +paper +describ +data +parallel +method +polygon +render +massiv +parallel +machin +method +base +simpl +shade +model +target +applic +requir +fast +render +extrem +larg +set +polygon +set +found +mani +scientif +visual +applic +render +handl +arbitrarili +complex +polygon +need +mesh +issu +involv +load +balanc +address +data +parallel +load +balanc +algorithm +present +render +toolkit +enabl +scientist +displai +shade +polygon +directli +parallel +machin +avoid +transmiss +huge +amount +data +post +process +render +system +jame +ahren +charl +hansen +cost +effect +data +parallel +load +balanc +univers +ofwashington +depart +comput +scienc +engin +april +longer +version +icpp +paper +also +describ +fast +data +parallel +load +redistribut +algorithm +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..80e92ea2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,176 @@ +databas +environ +vision +research +databas +environ +vision +research +overview +databas +environ +vision +research +wasdesign +gener +scientif +databas +system +motiv +andintend +model +base +comput +vision +provid +unifieddata +model +highli +graphic +user +interfac +advanc +queryfacil +interact +laboratori +notebook +databaseenviron +vision +research +aid +scientif +experiment +andpromot +data +share +comput +vision +commun +devr +entiti +store +hierarch +relat +datastructur +schema +entiti +contain +name +ofproperti +part +attribut +relat +among +thepart +graphic +definit +describ +buildinst +specif +visual +studi +mani +differ +imag +databas +research +topic +includ +visual +interfac +multi +level +queri +experi +manag +peopl +princip +investig +linda +shapiro +steven +tanimoto +brinklei +graduat +student +jame +ahren +jakobovit +lara +lewi +public +linda +shapiro +steven +tanimoto +jame +brinklei +jame +ahren +jakobovit +lara +lewi +visual +databas +system +data +experi +manag +inmodel +base +comput +vision +proceed +secondcad +base +vision +workshop +februari +present +overview +devr +project +lara +lewi +linda +shapiro +steven +tanimoto +flexibledata +organ +visual +support +visual +databasesystem +spie +symposium +electron +imag +scienceand +technolog +februari +jakobovit +linda +shapiro +steven +tanimoto +implement +multi +level +queri +databas +environ +vision +research +spie +symposium +electron +imag +scienc +technolog +februari +email +ahren +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..c0528f24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,101 @@ +page +bernheim +washington +start +comput +scienc +graduat +school +univers +washington +page +still +construct +graduat +william +colleg +comput +scienc +math +front +comput +plai +ultim +frisbe +autumn +quarter +class +programminglanguag +automata +principl +ofdigit +system +design +comput +graphic +seminar +parallel +program +environ +outdoor +relat +link +nation +park +home +page +gorp +guideto +outdoor +recreationfun +link +christian +scott +interact +list +abig +pile +cool +link +blast +past +scoobi +dooeduc +link +great +refer +women +undergrad +comput +scienc +peterson +educ +center +sourc +inform +graduat +school +educ +opportun +inform +distribut +mentorship +project +mentorship +project +allow +women +undergradu +spend +summerwork +research +femal +mentor +great +experi +highlyrecommend +program +back +home +pagelast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..b43384e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,151 @@ +amir +michail +amir +michailgradu +studenti +second +year +graduat +student +depart +comput +scienceat +univers +washington +research +interest +includ +followingarea +algorithm +design +softwar +engin +educ +softwar +master +degre +universityof +toronto +master +thesi +optim +broadcast +summationfor +hierarch +ring +architectur +shift +click +hereto +obtain +compress +postscript +file +recent +experi +wai +teach +algorithm +particular +built +opsi +java +appletdesign +teach +balanc +binari +tree +algorithm +combinesprogram +proof +anim +final +lunar +lander +style +gamethat +wrote +part +undergradu +graphic +cours +quotat +comput +scientist +mathematician +tend +conserv +mani +unwillingto +consid +might +better +write +proof +told +mathematician +embarrass +learn +publishedincorrect +theorem +motiv +avoid +error +believ +theywil +like +structur +proof +persuad +comput +scientist +will +explor +unconvent +proofstyl +unfortun +found +care +whether +theyhav +publish +incorrect +result +often +seem +glad +error +wasnot +caught +refere +sinc +would +meant +fewer +public +fear +comput +scientist +motiv +proof +stylethat +like +reveal +mistak +lesli +lamport +wai +construct +softwar +design +wayi +make +simpl +obvious +defici +theother +make +complic +obviou +defici +hoar diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..eed4f7e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,87 @@ +richard +anderson +home +page +richard +anderson +associ +professor +graduat +inmathemat +reed +colleg +comput +scienc +stanfordin +join +univers +washington +aon +year +postdoc +mathemat +scienc +research +institut +inberkelei +receiv +presidenti +younginvestig +award +spent +academ +yeara +visit +professor +indian +institut +scienc +bangalor +india +richard +anderson +main +research +interest +theori +implementationof +algorithm +includ +parallel +algorithm +comput +geometri +scientif +applic +work +comput +scienc +engin +depart +univers +washington +seattl +teach +paper +work +progress +research +project +qualifi +evalu +project +travel +note +year +visit +theindian +institut +scienc +resum +travel +tourist +project +pictur +recent +talksanderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..c1f43a0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,160 @@ +anhai +doan +homepageanhai +doan +page +reconstruct +pleas +revisit +soon +born +brought +vietnam +finish +high +school +iwent +hungari +studi +comput +scienc +graduat +kossuth +lajo +univers +art +andscienc +debrecen +hungari +receiv +also +comput +scienc +univers +wisconsin +milwauke +start +fall +program +depart +comput +scienc +andengin +univers +ofwashington +seattl +research +interest +mostli +artifici +intellig +amcurr +investig +decis +make +underuncertainti +decis +theoret +plan +markov +decis +process +qualit +decis +theori +academ +interest +includ +read +travel +listen +music +mostli +jazz +blue +interest +thing +first +name +anhai +mean +calm +invietnames +made +combin +last +syllabl +name +ofmi +mother +birthplac +nghean +first +syllabl +name +ofmi +father +birthplac +haiphong +show +creativ +folkswer +thought +birth +younger +brother +theysimpli +switch +syllabl +gave +namehaian +content +research +interest +probabilist +plan +knowledg +represent +recent +paper +research +librari +curriculum +vita +research +interest +educ +employ +histori +award +honor +public +teach +data +structur +algorithm +take +cours +check +inform +offic +hour +locat +person +interest +comtemporari +vietnames +affair +literatur +write +music +paint +foreign +languag +travel +gener +purpos +librari +life +snapshotsanhai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..1ff28b2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,165 @@ +joel +ausland +joel +ausland +joel +ausland +hope +depart +comput +scienc +engineeringat +univers +washington +pictur +spring +comput +anim +class +click +onit +origin +last +quarter +complet +qual +project +time +code +gener +multiflow +compil +offic +sieg +hall +home +univers +washington +univ +seattl +seattl +resum +written +follow +paper +fast +effect +dynam +compil +ausland +philipos +chamber +egger +andb +bershad +pldi +automat +dynam +compil +support +event +dispatch +inextens +system +chamber +egger +ausland +philipos +mock +andp +pardyak +workshop +compilersupport +system +softwar +februari +experi +control +base +automat +motion +synthesisfor +articul +figur +ausland +fukunaga +partovi +christensen +reiss +shuman +mark +acmtransact +graphic +page +paper +also +site +optim +leapfrog +ausland +benjamin +wilkerson +mathemat +magazin +page +lossili +compress +mpeg +animationthat +goe +motion +synthesi +paper +last +sequenc +show +mywork +comput +piec +togeth +cartwheel +jump +andshuffl +anim +without +figur +fall +andcollaps +brown +figur +us +algorithm +orang +figur +isjust +try +switch +motion +without +consider +tosmooth +physic +autumn +took +super +short +anim +doubl +speed +small +version +final +project +find +better +place +slide +thetalk +singular +valu +decomposit +gave +graphic +seminar +ausland +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..953de632 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,110 @@ +girl +name +brian +name +brian +boinge +boinge +boinge +brian +michalowskidepart +comput +scienc +engineeringmail +stop +univers +washingtonseattl +offic +locat +sieg +offic +phone +current +take +ling +current +edit +mossi +bitsthank +visit +page +visitor +number +page +worst +view +us +headscapewhenev +second +year +gradstud +actual +liber +artist +interest +inlinguist +confus +good +get +know +alreadi +ultrahotlist +favorit +site +ofal +time +search +look +onlin +refer +look +forsometh +glorifi +hotlist +doesn +thave +urouletteto +random +find +past +institut +ofwhich +mental +person +info +quot +file +favorit +songsand +poem +fictiti +thrash +band +puriti +test +origin +work +tokeep +touch +finger +info +mail +info +guestbook +info +pagesfrom +friend +idea +includ +aslfingerspel +snapshot +blatantli +stolen +brad +chamberlain +brian +michalowski +dept +complet +sanityerad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..918b0b55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,72 @@ +paul +beamepaul +beamepaul +beam +associ +professor +thedepart +comput +scienc +engineeringat +theunivers +washington +paul +receiv +mathemat +comput +sciencein +comput +scienc +univers +toronto +post +doctor +research +associ +academicyear +join +univers +washington +autumn +receiv +presidentialyoung +investig +award +paul +research +concern +primarili +computationalcomplex +theoret +aspect +paralleland +distribut +comput +recent +research +concentr +connect +computationalcomplex +proof +theori +particular +complex +proof +inproposit +proof +system +paul +enjoi +squash +softbal +sport +enthusiasm +cancompens +lack +talent +recent +paper +qual +project +beam +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..4ae94e38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,92 @@ +david +beckerdavid +beckercontact +info +mark +spot +stuff +spin +much +time +goe +makingspina +real +oper +system +respons +level +code +borrow +devic +model +devic +drvier +support +build +environ +code +manag +keep +develop +platform +function +somedai +getto +perform +measur +optim +sport +tri +bunch +ultim +frisbe +favorit +confer +tripl +jump +minnesota +athlet +confer +bethel +colleg +ultim +frisbe +champion +volleybal +men +grad +champion +team +sport +plai +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam +handbal +also +playracquetballgolftenni +done +bridgecampingcanoeingdisc +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat +skiingweightliftingwhitewat +raftinghorseback +ridingmountain +bikingin +line +skate +interest +librari +econom +topic +particularli +interest +free +bank +anti +trust +currenc +ssto +rlv +theologi +centurai +railroad +boot +locomot diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..a9450f80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,525 @@ +brian +bershad +brian +bershad +bershad +washington +eduwork +comput +scienc +engin +depart +univers +washington +seattl +home +street +seattl +brian +bershad +assist +professor +univers +ofwashington +sinc +receiv +univers +ofwashington +took +brief +respit +seattl +experi +post +industri +cultur +northeast +return +northwest +coffe +bershad +research +oper +system +distribut +system +network +parallel +system +architectur +work +hasappear +toc +sosp +asplo +isca +although +seem +asigmetr +paper +publish +save +life +besid +work +bershad +run +plai +squash +hang +thestairmast +project +includ +spin +extensibleoper +system +mobil +comput +memori +manag +oper +system +architectur +midwai +project +carnegi +mellon +parallelnetwork +scalabl +rocki +thesequel +etch +binari +instrument +optimizationcours +winter +look +master +qual +project +click +list +project +youmight +master +degre +qual +project +recent +paper +trace +driven +comparison +algorithm +parallel +prefetch +cachingtraci +kimbrel +andrew +tomkin +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +sigop +usenix +associ +symposium +onoper +system +design +implemen +octob +dynam +bind +extensiblesystem +przemyslaw +pardyak +brian +bershad +osdi +sigop +usenix +associ +symposium +onoper +system +design +implemen +octob +structur +perform +interpret +theodor +romer +denni +geoffrei +voelker +alec +wolman +wayn +wong +jean +loup +baer +brian +bershad +henri +levi +asplo +octob +fast +effect +dynamiccompil +confer +program +languag +design +implementationj +ausland +philipos +chamber +egger +bershad +extens +protocol +architectur +applic +specif +network +marc +fiuczynski +brian +bershad +write +oper +system +us +modula +emin +sirer +stefan +savag +przemyslaw +pardyak +greg +defouw +mari +alapat +brian +bershad +appear +workshop +compil +support +system +softwar +februari +languag +support +extens +oper +system +wilson +hsieh +marc +fiuczynski +charl +garrett +stefan +savag +david +becker +brian +bershad +appear +workshop +compil +support +system +softwar +februari +safe +dynam +link +extens +oper +system +emin +sirer +marc +fiuczynski +przemyslaw +pardyak +brian +bershad +appear +workshop +compil +support +system +softwar +februari +automat +dynam +compil +support +event +dispatch +extens +systemsc +chamber +egger +ausland +philipos +mock +pardyak +workshop +compil +support +system +softwar +februari +extens +safeti +perform +spin +oper +system +bershad +sosp +reduc +memori +overhead +us +onlinesuperpag +promot +romer +ohlrich +karlin +bershad +isca +write +detect +distribut +sharedmemori +zekauska +sawdon +bershad +paper +appearedin +osdi +confer +dynam +page +map +polici +cach +conflictresolut +standard +hardwar +romer +bershad +chen +paper +appear +osdi +confer +mobisa +voelker +bershad +paper +appear +inth +mobil +comput +workshop +issu +extens +oper +system +savag +bershad +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +chen +romer +asplo +spin +extens +microkernel +forappl +specif +oper +system +servic +bershad +uwtechn +report +effici +packet +demultiplex +multipl +endpoint +larg +messag +yuhara +bershad +maeda +moss +winter +usenix +impact +oper +system +structur +memori +system +perform +chen +bershad +sosp +protocol +servic +decomposit +high +perform +network +maeda +bershad +sosp +practic +consider +block +concurr +object +bershad +fast +interrupt +prioriti +manag +oper +system +kernel +usenix +microkernel +workshop +midwai +distribut +share +memori +system +bershad +zekauska +sawdon +ieee +compcon +local +area +mobil +comput +stock +hardwar +andmostli +stock +softwar +watson +bershad +usenix +moblic +consist +manag +virtual +index +cach +wheeler +bershad +asplo +fast +mutual +exclus +uniprocessor +bershad +redel +elli +asplo +us +mach +commun +primit +ginsburg +baron +bershad +machnix +us +microbenchmark +evalu +system +perform +drave +bershad +forin +wwo +network +perform +microkernel +maeda +bershad +wwo +increas +irrelev +perform +micro +kernel +base +oper +system +bershad +usenix +microkernel +workshop +system +mach +forin +golub +bershad +machnix +us +continu +implement +thread +manag +commun +inoper +system +drave +bershad +rashid +dean +sosp +inform +arpa +relat +inform +rain +citi +hash +hous +harrier +relat +inform +rel +abduct +alien diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..89bca368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,196 @@ +doorenbo +home +pagebob +doorenbo +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +bobd +washington +current +work +netbot +union +place +suit +seattl +voic +daili +page +comedi +dilbert +strip +boffo +david +letterman +list +geeki +new +new +zdnet +anchordesk +magazin +good +stuff +shortcut +todai +cool +tool +research +current +postdoc +work +oren +etzioni +steve +hank +weld +softbot +project +also +particular +shopbot +internet +shop +agent +previou +soar +project +thesi +scienc +site +collect +pointer +repositori +canada +pointer +gopher +scienc +scientif +american +sigma +american +scientist +miscellan +link +meta +search +metacrawl +savvysearch +search +alta +vista +lyco +inktomi +open +text +infoseek +excit +crawler +hotbot +directori +hierarch +select +yahoo +magellan +pointcom +list +search +engin +guid +search +onlin +search +search +telephon +directori +world +yellow +page +network +yellow +switchboard +white +page +list +directori +new +cnnfn +newshour +washington +post +todai +reuter +headlin +yahoo +new +page +social +cafe +new +world +report +boston +globe +span +seattl +time +view +slate +feed +salon +atlant +monthli +harper +sport +espn +sport +zone +govern +fedworld +index +white +hous +congress +arpa +feder +budget +deficit +nation +debt +clock +concord +coalit +hand +balanc +budget +bipartisan +commiss +entitl +reform +budget +american +univers +museum +link +past +life +pittsburgh +upcom +birthdai +home +page +person +home +page +andfun +pagebobd +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..95c0bb1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,144 @@ +alan +born +home +pagealan +born +home +pagei +professor +depart +comput +scienc +engin +univers +washington +principalresearch +interest +constraint +base +languag +system +object +orient +languag +logic +program +languag +human +computerinteract +comput +societi +current +research +activitiesuwconstraint +page +contain +link +recent +paper +public +domainsourc +code +constraint +satisfact +algorithm +media +technolog +democraci +groupuw +student +also +idea +qualsproject +teachingher +page +cours +taught +recent +program +languag +autumn +concept +program +languag +autumn +comput +societi +winter +human +comput +interact +spring +informationhistori +grew +idaho +graduat +reed +colleg +mathemat +graduat +work +comput +scienc +atstanford +univers +receiv +degre +dissert +research +done +associ +xerox +paloalto +research +center +concern +constraint +orient +simulationlaboratori +receiv +spent +year +post +doctoralfellow +depart +artifici +intellig +univers +ofedinburgh +scotland +work +mechan +problem +solv +symbolicalgebra +join +comput +scienc +depart +andexcept +sabbat +spent +xerox +europarc +cambridg +england +havebeen +sinc +address +dept +comput +scienc +engin +univers +washington +seattl +phone +email +born +washington +eduwww +http +washington +home +born diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..54821524 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,23 @@ +brad +home +pagebrad +chamberlainphoto +credit +mike +perkowitzth +probabl +couldn +care +less +offic +address +thing +work +thing +like +thing +ad +subset +ofth +brad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..3ee3b9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,151 @@ +brendan +home +pagebrendan +mumeyi +student +depart +comput +scienceat +theunivers +washington +enter +fall +expect +graduat +around +fall +contact +information +mail +brendan +washington +edufor +address +click +curriculum +vitaein +htmlorpostscriptformat +academ +interestsi +would +call +appli +theoret +comput +scientist +current +work +comput +biologi +moment +look +problem +physic +map +build +rough +locat +landmark +genom +gener +speak +interest +us +theori +math +tosolv +reason +practic +problem +also +done +work +incomput +astrophys +hpcc +groupher +onlin +papersb +mumei +power +clone +overlap +test +html +postscript +poster +present +ismb +confer +mumei +comput +aspect +physic +map +probe +html +postscript +survei +paper +written +fulfil +candidaci +requir +mumei +find +cluster +quickli +parallel +html +postscript +present +dimac +challeng +klaw +mumei +upper +lower +bound +construct +alphabet +binari +tree +html +postscript +present +soda +confer +also +siam +ofdiscret +mathemat +note +html +version +produc +latexhtml +containsom +error +readabl +part +recreationhik +cycl +ski +climb +drink +coffeeto +name +us +sailingand +hope +sometim +like +plai +bridg +older +photo +first +galleri +second +galleri +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..83a2624e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,105 @@ +lauren +brickerlauren +bricker +comput +scienc +engin +depart +univers +washington +seattl +click +need +clue +research +info +research +interest +primarli +graphic +userinterfac +although +self +proclaim +graphic +groupi +current +work +stevetanimoto +mathematicsexperi +imag +process +metip +project +goal +ofthi +project +interest +junior +high +school +kid +mathemat +usingexploratori +rather +rote +learn +method +particular +minterest +develop +comput +supportedcollabor +learn +cscl +user +interfac +us +inthi +well +project +workin +lawk +dawg +interfacea +fairli +extens +resumeschool +dazethi +quarterdoth +quartershuman +comput +interact +cours +spring +quarter +writeup +final +project +writeupwhat +asystem +cours +interest +cours +comput +insocieti +excit +hobbi +us +enjoi +busi +lifesportscookingpotteri +even +studio +garag +us +year +stuffbecaus +ask +itaddress +bricker +washington +last +modifi +mondai +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..7d8ab404 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,9 @@ +adam +carlsonadam +carlson +carlson +washington +comput +scienc +carlson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..9b47d1c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,56 @@ +andrew +certain +home +page +andrew +certain +certain +washington +server +fix +give +note +page +interest +follow +direct +download +theview +look +model +current +work +tonyderos +david +salesin +werner +stuetzl +duchamp +jovan +popov +scanningproject +current +build +viewer +download +requir +sgigraph +workstat +paper +viewer +look +model +modifi +netscap +shouldalso +work +browser +modifi +similar +comput +scienc +engin +depart +univers +washington +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..6f3a2d20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,99 @@ +chou +chou +welcom +homepag +grad +student +computersci +washington +seattl +pictur +right +first +school +fall +quarter +ross +think +scari +school +relat +infoth +chinook +projectmi +fall +schedulemi +list +publicationscod +workshop +codesignpersonalperson +info +taiwan +greec +resumefoodi +enjoi +cook +peopl +open +restaur +ofpeopl +favorit +dish +includ +stir +fri +rice +noodl +beefskew +recip +line +toysb +comput +geek +computersand +cool +toi +taiwanesei +also +promot +taiwaneselanguag +current +develop +comput +tool +taiwanes +sureto +check +experiment +line +taiwanesedictionari +though +absolut +requir +page +best +viewedif +instal +chines +charact +font +us +java +enabl +browser +like +netscap +beabl +java +applet +yellow +ball +bouncingov +barnei +purpl +dynosaur +last +updat +email +chou +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..aebf5dd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,153 @@ +corei +andersoncorei +andersonth +interest +late +set +home +page +research +color +percept +automat +correct +induc +color +compet +programm +month +contest +search +info +localtalk +adapt +set +linux +plai +wavelet +intern +collegi +program +contest +pacif +region +contest +problem +result +final +problem +result +html +version +previou +year +final +problem +version +duke +internet +program +contest +rsum +onlin +recent +august +review +comput +graphic +text +refer +book +thing +done +graduat +univser +washington +highlin +commun +colleg +self +tune +fpga +fall +help +polli +organ +depart +contribut +engin +open +hous +april +manag +chapter +treasuri +spring +spring +develop +read +done +macintosh +program +search +good +internet +servic +providercool +thing +found +usag +statist +lurker +guid +babylon +sunsit +linux +archiv +dilbert +zone +home +page +brother +home +pageus +link +peek +insid +term +lab +html +refer +html +featur +netscap +comput +scienc +engin +depart +home +page +univers +washinton +home +page +uwtv +tech +notesmi +autumn +schedul +mondai +tuesdai +wednesdai +thursdai +fridai +meet +meet +math +math +math +math +math +math +corin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..ad7a9714 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,75 @@ +craig +experiencecraig +kaplancurr +locat +student +depart +comput +scienc +universityof +washington +seattl +washington +well +page +copyof +homepag +univers +waterloo +time +modifi +page +appropri +tomi +current +situat +enjoy +experi +near +undergraduatewa +grad +ball +photo +fromth +ball +second +enjoy +experi +occur +saturdai +address +convoc +valedictorian +cannot +express +honour +felt +wonder +graduat +class +choos +repres +incident +didn +know +parent +found +valedictorian +minut +start +ceremoni +sai +never +forgiv +text +valedictorian +address +found +anyon +curiou +visitor +number +last +updat +cskaplan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..6f6353a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,65 @@ +codi +kwok +home +page +first +thing +thought +peopl +think +mean +aliv +asami +chiaki +welcom +codi +chung +kwok +ctkwok +washington +edui +graduat +student +work +weld +andoren +etzioni +plan +andsoftwar +agent +sanctuari +work +ingram +softbot +aiuw +contact +informationleisur +sanctuari +nausicaa +vallei +windlaputa +castl +skyhyp +futur +vision +gunnm +nausicaa +vallei +wind +arch +vile +welcom +java +applet +anim +take +load +codi +kwok +last +modifi +visitor +sinc +figur +doom +numer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..ceb203b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,37 @@ +darren +cronquist +darren +cronquist +darrenc +washington +depart +comput +scienc +engin +univers +washington +seattl +welcom +home +page +last +updat +employ +inform +current +plan +complet +myph +html +postscript +resum +resum +curriculum +vita +curriculum +vitaperson +inform +rest +homepag +current +underconstruct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..56df5147 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,239 @@ +david +christianson +second +till +midnight +second +till +nowher +babi +david +christiansondbc +washington +comput +scienc +engin +depart +univers +washington +seattl +current +workin +spare +time +third +year +graduat +student +atth +interest +inaiand +plan +gotten +user +interfac +mayb +even +graphicsa +well +current +studi +knowledg +represent +goal +directedbehavior +mix +initi +plan +activ +recognit +buzzwordacquisit +context +human +comput +interact +recent +work +prototyp +us +intellig +interfac +bobdoorenbo +shopbot +rather +build +interfac +plan +somehowintegr +interfac +variou +line +store +build +shop +assist +simpl +assist +built +applicationthat +automat +read +pars +shop +basket +order +determinewhat +product +user +interest +also +collabor +sean +anderson +weld +david +salesin +michael +cohen +develop +simpl +interact +moviethat +demonstr +automat +camera +plan +appl +intern +work +russel +plan +technologyinto +user +experi +also +develop +perpetr +graphic +debugg +theucpop +famili +planner +programm +also +client +number +peopl +whose +live +touch +grow +everi +manual +isher +spent +undergradu +career +theunivers +chicago +carboload +harold +chicken +assist +theanim +agent +publicationschristianson +anderson +salesin +weld +cohen +declar +camera +control +automat +cinematographi +appear +aaai +firbi +christianson +mcdougal +fast +local +map +support +navig +object +local +symposium +sensor +fusion +boston +novemb +find +dave +work +thechateau +cynic +offic +withfreder +william +darren +adam +gloriou +leader +juan +import +thing +favorit +activ +practic +judo +recent +compet +senior +nation +sibl +sisterjust +graduat +school +inform +librari +studi +univers +michigan +surf +cut +edg +research +inform +supercollid +realli +feel +like +slack +mirski +help +watch +hero +youth +duel +death +wwwf +grudg +match +fame +fortun +check +respons +week +check +game +domain +straight +doomgat +sai +evil +book +tick +dave +christianson diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..9673f6e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,44 @@ +home +page +dave +johnson +david +johnson +washington +comput +scienc +engin +depart +univers +washington +seattl +research +interest +navig +assist +hypertext +readersproject +activ +racquetbal +golf +basketbal +softbal +tutori +script +fit +togeth +take +theracquetbal +quiz +quiz +creat +take +look +thecreat +assess +page +form +give +last +modifi +mondai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..f6e9d637 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,232 @@ +home +page +david +dion +david +dion +ddion +washington +person +yeah +right +like +anyth +person +okai +mayb +littl +academ +current +research +past +year +work +brian +bershad +primari +respons +construct +unix +server +thespinoper +system +spin +oper +system +applic +achiev +impress +perform +dynam +extend +kernel +safeti +protect +maintain +languag +level +spin +extens +written +modula +user +level +spin +unix +server +slight +variant +unix +server +think +run +mach +spin +dynam +link +extens +us +intercept +system +call +emul +mach +kernel +environ +previou +research +spin +first +extens +system +havework +undergradu +studi +notr +dame +help +implement +user +level +memori +manag +extens +commun +subsystem +stuff +afraid +time +around +surf +wouldn +claim +know +cool +stuff +ipromis +surf +soon +netscap +enhancedthi +page +hold +breath +meanwhil +site +visit +occasion +work +distract +univers +washington +comput +scienc +engin +reason +seattl +univers +washington +homepag +featur +date +view +campu +weather +condit +spin +occupi +vast +major +time +modula +languag +program +dai +debug +manual +solv +countless +problem +univers +notr +dame +undergradu +institut +notr +dame +band +undergradu +life +notr +dame +trumpet +section +undergradu +life +racquetbal +ladder +main +outlet +athlet +espn +sportzon +stai +touch +sport +world +todai +stai +touch +rest +world +dilbert +learn +real +world +restaur +fine +seattl +eateri +recommend +other +comput +scienc +movi +seattl +region +movi +list +transport +line +guid +seattl +excel +public +transport +system +current +traffic +condit +current +statu +seattl +infam +traffic +seattl +marin +first +major +leagu +basebal +team +page +bean +shop +page +visit +david +dion +last +modifi +mondai +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..1cab1e46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,167 @@ +derrick +index +derrick +weathersbi +bullssupersonicsi +pursu +phdin +comput +scienc +univers +washington +seattl +beauti +campu +univers +li +heart +ofseattl +offer +mani +divers +graduat +student +often +fall +prei +therebyextend +time +averag +graduat +student +career +practic +experi +interestssignific +project +skill +data +sheet +share +server +project +data +parallel +arrai +languag +compil +host +token +ring +commun +protocol +develop +data +base +design +administr +system +administr +network +administr +tool +securityresearch +experi +interestsmi +research +center +around +parallel +distribut +comput +challengespres +field +on +perform +portabl +conveni +perform +typic +foremost +goal +run +parallel +ordistribut +environ +howev +portabl +suffer +final +theseenviron +offer +extra +challeng +asynchron +independ +event +daunt +task +develop +system +parallel +distributedenviron +issu +address +group +project +research +page +spaa +paper +collect +commun +gener +topic +collect +comm +dissert +topic +tool +integr +softwar +engin +projectacadem +achievementsinstructor +extens +colleg +advanc +summer +curriculum +design +advanc +cours +certif +program +extens +collegeinstructor +extens +colleg +advanc +fall +teach +assist +start +undergradu +tutor +women +minoritystud +depart +comput +scienc +engineeringoutstand +teach +assist +award +colleg +engin +person +interest +interact +cnnfinanciala +newslet +would +javaw +derrick +weathersbyderrick +washington +edutu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..23f44057 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,160 @@ +brian +dewei +home +pagebrian +deweyabout +first +year +student +depart +comput +scienceand +engin +univers +washington +doyou +need +know +like +music +book +ilov +plai +game +ride +bike +write +letter +drink +myfavorit +color +blue +favorit +ocean +atlant +oldroomm +think +like +anim +public +avail +finger +northern +irelandi +return +trip +belfast +june +carri +sixti +pagesof +note +interview +carri +luggag +need +getthos +semi +chaotic +note +readabl +hopefulli +enlighteningformat +work +draft +feel +free +read +shoulder +period +make +draft +avail +html +postscript +format +goal +encourag +frequent +feedback +soon +possibl +pleas +read +mail +pleas +note +well +much +time +work +thisproject +late +hopefulli +progress +near +ofth +summer +imag +northern +ireland +note +thecurr +draft +sinn +fein +inform +bibliographi +postscript +statist +terrorist +relat +death +northern +ireland +postscript +statist +terrorist +relat +injuri +northern +ireland +postscript +game +alreadi +addict +recuri +game +check +link +fascin +thorough +histori +develop +game +china +read +ancient +china +page +imageek +york +cuni +page +provid +mani +link +interest +site +jansteen +page +thorough +seen +brian +dewei +dewei +washington +edulast +modifi +tuesdai +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..db4558f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,145 @@ +home +pagewelcom +fasulo +home +page +dfasulo +washington +third +year +graduat +student +depart +comput +scienc +univers +washington +graduat +williamscolleg +computersci +appli +mathemat +class +note +portrait +contain +slight +inaccuraci +find +home +eastlak +seattl +work +depart +comput +scienc +engin +univers +ofwashington +seattl +offic +chateau +email +dfasulo +washington +academ +interest +comput +graphic +comput +biologi +person +interest +scienc +fiction +fantasi +written +otherwis +fact +probabl +honest +identifi +illustr +merlin +corwin +pictur +favorit +fiction +charact +mine +roger +zelazni +chronicl +amber +imag +taken +amber +drpg +publish +phage +press +would +recommend +anyon +like +book +also +recommend +seri +babylon +creativ +write +fiction +poetri +absolut +link +work +athlet +particular +order +tenni +kwon +distanc +run +role +plai +random +thing +depend +cat +random +thing +homepag +friend +fellow +william +alumnu +sean +sandi +look +woman +former +grad +student +wendi +belluomini +dress +dogbert +peopl +ask +theori +worthwhil +area +research +whether +abstract +us +better +explan +goal +futur +theori +ever +given +dfasulo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..e8b21f2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,34 @@ +martin +dickei +home +page +martin +dickeycomput +scienc +engineeringunivers +washingtonwelcom +home +page +weekli +schedulenarr +resum +blurbcs +engr +autumn +favorit +seattl +coffe +housesfavorit +internet +site +sister +bookspirograph +java +script +garg +plai +dickei +washington +eduupd +tuesdai +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..889863ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,233 @@ +denni +denni +primari +interest +high +perform +comput +architectur +researchwith +smart +peopl +likejean +loup +baer +brian +bershad +brad +calder +brad +chen +alan +eustac +dirk +grunwald +andt +romer +recent +paper +energi +manag +issu +comput +system +gener +paper +instruct +cach +fetch +polici +specul +execut +baer +calder +grunwald +isca +dynam +page +map +polici +cach +conflict +resolutionon +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +romer +chen +asplo +instruct +cach +effect +differ +code +reorder +algorithm +qualifi +project +report +univers +washington +contact +work +comput +scienc +engin +depart +univers +washington +seattl +offic +sieg +dlee +washington +home +seattl +index +pointer +hotlist +good +entri +point +explor +yahoo +yellow +page +internet +lyco +realli +good +search +enginefor +seattl +guid +click +vote +million +sensibl +peopl +high +mind +conced +thatpolit +almost +alwai +choic +lesser +evil +tweedledumand +tweedlede +vote +abstain +theyar +present +presid +appoint +peopl +go +torummag +around +live +next +four +year +consid +allth +peopl +home +stew +rather +vote +huberthumphrei +show +humphrei +peopl +taught +huberthumphrei +lesson +still +enjoi +nixon +suprem +court +whentricia +juli +begin +find +silver +thread +among +gold +theblack +russel +baker +ford +without +flummeri +commit +commit +hesit +chanc +draw +back +alwai +ineffect +concern +act +initi +element +truth +ignor +kill +countless +idea +splendid +plan +moment +definit +commit +provid +move +sort +thing +occur +help +would +never +otherwis +occur +whole +stream +event +issu +decis +rais +favor +manner +unforeseen +incid +meet +materi +assist +magic +could +dream +would +come +whatev +dream +begin +goeth +dlee +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..e66bb847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,140 @@ +anoth +unnecessari +home +pagececi +home +page +well +much +anywai +sure +casual +mention +name +home +page +buti +feel +strongli +bold +noless +doug +zongker +exhaust +list +usual +research +public +classeshow +contact +meth +unusu +toxic +custard +workshop +filesth +mento +galleryvisit +inform +supercolliderth +useless +todai +date +wast +time +cron +player +databas +wast +time +test +ground +caveat +user +sarcasm +lucki +brows +todai +avirtu +intend +treasur +trove +inform +whichmai +actual +us +realli +sarcasm +first +year +grad +student +comput +scienc +engineeringdepart +univers +ofwashington +graduat +michigan +state +univers +imajor +comput +scienc +andminor +math +dubiou +honorsjunior +apprentic +keeper +brotherhood +crouton +death +cart +pizzicato +intern +club +member +bryan +worst +execut +vice +presid +charg +emerg +backup +clicker +cruis +inform +highwai +inhigh +gear +actual +sit +buttstar +comput +screen +tast +background +stolen +depart +labor +homepag +wheremi +sister +work +depart +homepag +doug +zongker +dougz +washington +home +research +class +contact +last +edit +thursdai +novemb +hit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..59fde30d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,128 @@ +dylan +mcname +dylan +jame +mcname +dylan +washington +comput +scienc +engin +depart +univers +washington +seattl +person +inform +research +concentr +interact +applicationsand +oper +system +implement +oper +system +mechanismthat +allow +applic +implement +page +replacementpolici +kernel +polici +caus +perform +poorli +help +implement +schedul +activ +machoper +system +schedul +activ +mechan +thathelp +user +level +thread +system +interact +properli +kernelthread +schedul +experi +lead +work +spin +project +folk +build +oper +system +architecturethat +gener +idea +applic +tailor +oper +system +current +work +carri +research +start +opal +project +thesi +investig +interact +betweenobject +orient +databas +oodb +oper +system +virtual +memori +work +demonstr +signific +improv +perform +achiv +us +commod +oper +system +differ +current +done +addit +improvementscan +come +modifi +oper +system +slightli +betterserv +oodb +paperscv +cours +geoff +voelker +built +winter +quarter +system +seminar +dedic +java +gave +lectureintroduc +java +languag +environ +slide +us diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..303fbe63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,39 @@ +christoph +lewi +home +page +christoph +lewi +graduat +student +dept +comput +scienc +engin +univers +washington +seattl +echri +washington +hello +glad +could +make +graduat +student +work +program +languag +project +offic +hour +tent +mondai +wednesdai +sieg +christoph +lewi +last +modifi +thur +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..6606c38b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page +blank +ecrock +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..cc4fe199 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,139 @@ +eddi +hong +home +page +know +second +year +graduat +comput +scienc +studentat +univers +washington +well +tosai +busi +type +peopl +littl +time +hand +hadto +includ +resum +link +postcriptand +plain +text +form +offic +room +seig +hall +current +home +current +work +anna +karlin +craig +chamber +theoret +model +dynam +compil +specif +workingon +develop +line +algorithm +work +well +fordynam +compil +plan +qual +project +access +sinc +august +histor +fact +free +time +activit +vine +branchesmi +sister +want +home +page +help +creat +also +list +variou +peopl +know +anoth +page +interest +includ +keep +comput +industri +site +give +insight +commentari +happen +knowof +place +pleas +mail +daveneti +power +macintosh +guess +make +bias +towardslik +mac +howev +think +better +comput +eveneasi +come +place +sometim +visit +appl +comput +check +seattl +freewai +traffic +look +advic +import +book +worldher +us +inform +alwai +want +know +found +find +address +domain +name +find +countri +mail +friend +stand +edhong +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..79ac4481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,99 @@ +susan +egger +susan +egger +http +washington +home +egger +depart +computersci +engin +univers +washington +seattl +voic +email +egger +washington +offic +sieg +hall +research +interest +comput +architectur +back +compil +emphasi +onexperiment +perform +analysi +current +work +issu +incompil +optim +dynam +compil +share +data +optimizationsand +instruct +schedul +processor +design +multithreadedarchitectur +current +research +project +compil +time +algorithm +reduc +fals +share +dynam +compil +multithread +architectur +spinprevi +research +cach +coher +code +schedul +compil +time +prefetch +share +memori +machin +miscellan +tool +workload +new +asplo +program +committe +call +paper +asplo +homepag +inform +asplo +look +research +project +click +list +project +might +qual +project +amast +degre +begin +thesi +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..414437a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,311 @@ +emin +sirer +emin +sirereg +washington +depart +comput +scienc +engin +univers +washington +seattl +backgroundi +current +third +year +graduat +student +univers +washington +grew +istanbul +turkei +receiv +comput +scienc +princeton +univers +current +work +toward +spinproject +prof +brian +bershad +spent +summer +bell +labswork +theplan +oper +system +help +build +prototyp +displai +thesumm +research +center +princeton +jersei +recent +summer +work +thevesta +project +projectsmi +goal +develop +safe +adapt +extens +oper +system +develop +thread +schedul +synchron +dynam +link +andprotect +domain +subsystem +spin +also +wrote +machin +specificaspect +spin +kernel +system +call +interrupt +path +andsom +miscellan +interfac +alarm +mach +compat +support +novel +aspect +spin +thread +schedul +provid +mean +ofextend +kernel +arbitrari +thread +implement +schedulingpolici +dynam +link +extens +kernel +allowsu +achiev +high +perform +interfac +call +strand +isdesign +safe +provid +fault +isol +novel +aspect +spin +protect +domain +interfac +allowsisol +safe +fine +grain +share +time +extens +withconflict +symbol +simultan +activ +system +extens +hide +code +data +beassur +possibl +access +clincher +extensionsthat +want +share +code +data +dynam +protectionenforc +overhead +also +implement +high +performanceweb +server +spin +networkingstack +main +object +design +reduc +http +latenc +andminim +load +wrote +mip +instruct +simul +coupl +year +calledmipsi +robust +enough +simul +spec +benchmark +standard +ofnew +jersei +us +educ +tool +researchplatform +page +describ +mipsi +featuresand +avail +paper +safe +dynam +link +extens +oper +system +wcsss +describ +spin +protect +namespac +manag +mechan +write +oper +system +us +modula +wcsss +describ +experi +us +modula +implement +spin +extens +safeti +perform +spin +oper +system +sosp +design +implement +perform +paper +protect +softwar +issu +hoto +posit +paper +compar +softwar +hardwar +protect +mechan +spin +extens +microkernel +applic +specif +oper +system +servic +sigop +european +workshop +oper +system +review +version +spin +extens +microkernel +applic +specif +oper +system +servic +univers +washington +technic +report +march +measur +limit +fine +grain +parallel +senior +independ +work +princeton +univers +june +talkslanguag +support +extens +oper +system +slide +present +first +workshop +compil +support +system +softwar +wcsss +tucson +arizona +interestswhenev +find +time +opportun +follow +sail +windsurf +dive +ski +bikingmak +outdoor +cloth +andhik +dylan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..6e49fa4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,174 @@ +eric +anderson +home +page +eric +andersonwher +find +sieg +hall +depart +comput +scienc +engin +univers +washington +seattl +home +street +seattl +page +longer +black +honor +recent +june +decisionin +aclu +reno +page +longer +black +mind +thedecis +mere +interim +step +could +read +take +probabl +still +commit +feloni +care +speech +freedom +themarketplac +idea +fact +third +year +graduat +student +comput +scienc +mean +imostli +panic +qual +project +also +try +write +paper +prof +henri +greensideof +duke +univers +recent +finish +master +thesi +onsteadi +state +solut +particular +nonlinear +biharmon +stabil +criterion +explicit +method +restrict +fourth +power +spatial +resolut +implicit +timesteppingmethod +backward +euler +necessari +numer +analysisissu +involv +newton +method +solv +implicit +nonlinearequ +spars +matrix +solut +method +solv +newtonstep +interplai +pictur +realli +spiffi +work +bodi +code +astrophys +simul +work +support +project +data +structuresbi +prof +richard +andersoni +work +signal +process +comput +music +project +aim +automat +transcript +acoust +signal +prof +anna +karlin +isth +musician +interest +project +like +everybodi +els +page +applet +first +link +text +small +graphic +section +materi +preparedfor +last +fall +like +everybodi +els +page +snapshot +mostli +famili +prove +brother +final +weather +seattl +eric +washington +meander +washington +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..8b1a65ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,292 @@ +oren +etzioni +home +pageoren +etzioni +home +pagedepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +etzioni +washington +offic +sieg +hall +room +brief +bioand +photo +current +research +internet +softbot +enabl +human +user +state +heor +want +accomplish +softbot +disambigu +request +anddynam +determin +satisfi +softbot +finalist +discoveraward +technolog +innov +comput +softwar +metacrawl +softbot +field +servic +enabl +searchmultipl +indic +parallel +provid +sophist +pruningopt +netrecommend +metacrawl +search +servic +choic +ahoi +softbot +white +page +servic +locatesindividu +home +page +high +accuraci +bruteforc +learn +brute +analyz +hypothes +second +whenrun +sparc +select +public +effici +inform +gather +internet +foc +move +inform +food +chain +deploi +softbot +theweb +aaai +ascal +comparison +shop +agent +world +wide +autonom +agent +multi +servic +search +comparison +us +metacrawl +postscript +html +softbot +base +interfac +internet +cacm +juli +intellig +agent +internet +fact +fiction +forecast +ieee +expert +august +intellig +without +robot +repli +brook +magazin +decemb +learn +understand +inform +internet +ijcai +sound +effici +close +world +reason +plan +toappear +first +robot +aaai +addit +paper +student +advis +richardseg +master +thesi +bernard +fileretriev +softbot +neal +lesh +master +thesi +planner +unix +softbot +keith +golden +master +thesi +plan +universalquantif +incomplet +inform +terranc +goan +master +thesi +learn +softwar +error +mikeperkowitz +master +thesi +learn +understand +inform +internet +erik +selberg +master +thesi +multi +servic +search +comparison +us +metacrawl +oren +zamir +jonathan +shake +undergradu +student +advis +stephen +soderland +program +umass +amherst +juli +roomi +hewlett +packard +bruce +lesourd +robert +spiger +lockhe +research +center +william +alford +program +univers +wisconsin +greg +fitchenholtz +hewlett +packard +guido +hunt +dymitr +mozdyniewicz +quark +machin +learn +resourc +machin +learn +databas +repositori +irvin +machin +learn +program +repositori +irvin +knowledg +discoveri +minecontain +inform +knowledg +discoveri +databas +neuroprosearch +contain +recent +paper +relev +neural +network +machin +learn +inform +servic +univers +illinoi +induct +learn +group +statlib +contain +data +algorithm +inform +relev +statist +machin +learningtoolbox +bonn +german +list +usenet +faq +access +count +sinc +etzioni +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..cf10f210 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,39 @@ +mike +feelei +home +pagemik +feelei +comput +scienc +finish +done +soon +thesi +concern +global +memori +manag +workstationclust +also +work +distribut +comput +opalproject +join +faculti +univers +british +columbia +injanuari +inform +avail +us +link +papersmi +research +summarycvsoutheast +idaholast +modifi +juli +mike +feelei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..429ef8a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,28 @@ +chri +fisher +home +page +pictur +chri +fisherdepart +comput +scienc +engineeringbox +univers +washington +seattl +fisher +washington +voic +mail +fisher +washington +sieg +hall +room +page +current +construct +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..5be978ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,80 @@ +depart +comput +scienc +engineeringunivers +washington +seattl +sieg +hall +washington +schedulethi +quarter +autumn +ta +cours +rather +work +gener +exam +check +scheduleto +otherwis +around +probablyb +found +librari +somewher +nice +read +paper +research +activitiesmi +main +interest +comput +algorithm +specif +areasof +parallel +comput +comput +geometri +public +meander +place +denni +outta +mind +vista +pea +music +site +chateau +galleri +fund +drive +thing +alec +wolman +might +server +seven +lost +soul +captur +html +listen +phone +booth +mofo +peopl +luci +place +paul +peach +ruel +might +look +like +moment +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..a95378d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,112 @@ +francesmari +modugno +home +pagefrancesmari +modugno +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +phone +mail +washington +research +interest +main +interest +human +comput +interact +also +interest +user +program +formal +model +softwar +specif +verif +technolog +polici +welcom +opportun +collabor +relat +topic +current +research +project +softwar +safeti +human +machin +interfac +previou +research +public +avail +onlin +summari +ofmi +thesi +research +formal +model +real +time +concurr +distribut +system +parallel +algorthim +profession +activ +basic +research +symposium +chair +uist +demonstr +chair +educ +comput +scienc +carnegi +mellon +univers +march +comput +scienc +carnegi +mellon +univers +august +comput +scienc +mathemat +cornel +univers +activ +anyth +recent +interest +includecycl +ski +languag +cultur +current +spanish +previouslyitalian +vegetarian +cook +elleri +line +greet +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..bf452b2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,50 @@ +georg +forman +home +pagegeorg +forman +home +pagei +receiv +comput +scienc +optim +compil +ariadn +scalabl +pattern +match +parallel +trace +debugg +public +mobil +comput +hyperlink +librari +someth +interest +free +handi +softwar +script +written +word +puzzl +water +song +chang +netscap +anim +georg +forman +gforman +comhom +page +mail +finger +weather +dept +live +pictur +gener diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..20a26fe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,60 @@ +home +page +marc +friedman +marc +friedman +home +page +cours +page +watercolor +applet +camp +checklist +spanish +english +collabor +dictionari +poetri +research +favorit +quot +link +elsewher +occam +inform +gather +agent +keith +golden +keith +wordbot +bike +trip +artifici +intellig +codi +kwok +weld +ucpop +planner +research +tool +chang +life +work +nietzschein +english +netscap +bookmark +file +everi +page +refer +visitor +sinc +marc +friedman +friedman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..4039698b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,32 @@ +charli +garrett +home +page +charli +garrett +address +seattl +research +interest +compil +graphic +neural +network +genet +algorithm +game +plai +algorithmspap +line +genet +algorithmsformerli +member +cecil +group +univers +ofwashington +bookshelf +audio +file +garrett +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..b415d565 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,56 @@ +georg +winkenbach +georg +winkenbachdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +mail +georgew +washington +eduphon +interest +comput +graphic +multimedia +thesi +work +doneund +supervis +professor +david +salesin +deal +appli +tradit +illustr +techniqu +theautomat +render +three +dimension +model +imagescr +prototyp +render +system +found +link +follow +imag +galleri +grail +graphic +imag +laboratori +depart +comput +scienc +engin +wife +home +page +taweewan +siwadun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..61f60e78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,289 @@ +greg +badro +home +pagegreg +badro +welcom +home +page +last +updat +email +washington +eduaddress +nesbit +seattl +hello +welcom +homepag +pleas +feel +free +send +feedback +email +address +page +alwai +isuppos +construct +keep +check +back +excitingfeatur +recent +graduat +dukeunivers +complet +degre +doubl +major +incomput +scienc +mathemat +spring +amcurr +emploi +part +time +senior +research +scientist +fortransworld +numer +small +compani +origin +locat +indurham +headquart +bermuda +work +myapart +seattl +washington +also +full +time +graduat +student +computersci +engin +depart +univers +ofwashington +fall +cours +home +pagecs +home +pagecs +home +pagecs +softwar +engin +seminarcs +compil +seminar +newer +stuff +philosophi +mathemat +cours +note +misc +patch +fvwm +first +place +best +show +redhat +desktop +configur +competit +entri +emac +configur +readm +file +emac +configur +archiv +file +zshell +configur +readm +file +zshell +configur +archiv +file +chronicl +duke +univers +newspap +articl +transworldnumer +spring +ieeenat +program +competit +victori +vertic +winter +issu +duke +magazin +contain +recent +articl +geneticalgorithm +person +link +rsum +data +date +first +busi +sampl +drew +bycomput +simpl +magic +creat +canterburi +progress +variou +random +pictur +life +definitelynot +work +mani +hobbi +includ +tenni +ski +especi +jackson +hole +jackson +volleybal +juggl +piano +plai +mathemat +game +rubik +cube +linux +freewar +unix +music +especi +sarahmclachlan +billi +joel +joel +yahoo +page +list +link +parliamentari +procedur +ncaa +basketbal +interest +link +lyco +search +duke +comput +scienc +home +page +duke +univers +home +page +chronicl +duke +commun +daili +newspap +univ +washington +home +page +unoffici +seattl +microsoft +corpor +world +wide +server +gatewai +user +group +histor +comput +imag +hotjava +global +network +navig +home +page +perl +practic +extract +report +languag +virtual +librari +inter +languag +unif +interest +devic +connect +write +html +sgml +seinfeld +index +page +friend +sitcom +part +materi +base +upon +work +support +nation +scienc +foundat +graduat +fellowship +opinion +find +conclus +recommend +express +public +author +necessarili +reflect +view +nation +scienc +foundat +greg +badro +washington +comput +scienc +engin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..c78a5557 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,414 @@ +greg +linden +home +page +greg +lindenmi +love +wifecorina +current +third +year +graduat +program +polit +scienceher +third +year +graduat +program +thecomput +scienc +depart +univers +ofwashington +slave +awai +toward +lofti +goal +complet +undergraduatedegre +univers +california +diego +anodd +doubl +major +comput +scienc +polit +scienc +go +leav +graduat +school +decemb +mactiv +look +posit +softwar +develop +interest +check +resum +java +applet +webview +allow +link +page +orset +page +addit +webview +famili +altavistawebviewand +metawebview +instead +enter +enter +keyword +foraltavista +metacrawl +webview +hit +search +servic +return +graph +displai +page +found +searchservic +link +page +autom +travel +assist +emul +dialog +travelag +client +gradual +elicit +flight +prefer +whileallow +brows +real +flight +data +research +prototyp +quit +function +even +earli +stage +webview +highli +rate +gamelan +andjar +wasrat +java +applet +jar +cool +gamelan +andwa +gamelan +staff +pick +webview +wasrat +java +applet +jar +cool +gamelan +andwa +gamelan +staff +pick +gamelan +staff +pick +altavistawebview +winner +thejava +repositori +java +contest +applet +also +publish +book +walsh +foundat +java +book +meilleur +java +best +java +first +java +applet +linear +ballet +oop +us +java +capabl +browser +sorri +cool +java +applet +sourc +demonstr +us +doubl +buffer +avoid +flicker +us +thread +give +time +run +applet +code +certainli +could +cleaner +though +expect +work +thought +cool +might +enough +standardsto +impress +mylgramm +particl +tree +applet +draw +tree +lgrammer +tree +look +much +realist +theparticletre +tree +interest +recent +start +posit +jar +judg +evalu +java +applet +submit +thejar +archiv +interest +work +summer +develop +dawn +civil +ademonstr +applic +show +plan +techniqu +cansuccessfulli +appli +entertain +softwar +myriadsoftwar +ucsd +work +professor +belew +filippo +menzer +latentenergi +environ +project +tool +developingartifici +life +model +experi +artifici +neuralnetwork +evolutionari +learn +softwar +enviro +paper +hank +lesh +linden +elicit +user +prefer +theautom +travel +assit +submit +user +model +majeski +linden +linden +spitzer +model +localizedinteract +spatial +constraint +iter +prison +dilemma +associ +econom +scientist +krishnamoorthi +paturi +blume +linden +liden +esen +hardwaretradeoff +boolean +concept +learn +world +congress +neuralnetwork +linden +recurr +neural +network +iter +prison +sdilemma +unpublish +honor +thesi +adam +carlson +sujai +parekh +wrote +funrai +tracer +ofth +project +inc +graduat +comput +graphic +imag +headless +horseman +closeup +headless +horseman +chess +duel +assembl +requir +sphere +imag +withreflect +transpar +shadow +distribut +trace +adaptivesampl +mess +cool +thing +pattern +thespher +transpar +reflect +causingth +rai +reflect +refract +multipl +time +surfaceand +intern +also +made +second +comput +anim +call +strike +theanim +written +us +inventor +code +manipul +thed +model +origin +movi +file +made +alow +qualiti +quicktim +movi +avail +qualiti +mbquicktim +movi +avail +sorri +doesn +compress +anyfurth +least +anyth +resembl +reason +qualiti +program +stuff +dilbert +cognit +scienc +ucsd +repositori +artifici +life +info +occasion +found +chateau +guggenheim +annex +comput +scienc +engin +univers +washington +seattl +glinden +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..c77594ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,32 @@ +brian +kri +grant +home +pagebrian +home +awai +homework +relat +infowork +backgrounduwdynam +compil +groupuw +depart +computersci +engineeringperson +stuffperson +backgroundmi +daughter +isismi +trip +singaporemi +bookmarksmi +public +keylast +updat +octob +brian +kri +grant +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..0d22cac5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,111 @@ +dave +grove +dave +grove +depart +comput +scienc +engin +univers +washington +seattl +offic +chateau +sieg +worki +spend +time +plai +cecil +cecil +pure +object +orient +langaug +us +vehicl +integr +research +area +languag +design +program +environ +optim +compil +also +us +hord +consult +hang +aroundth +fring +spinproject +actual +attempt +graduat +sometim +soonish +much +less +frequent +paper +author +author +plai +wouldn +complet +without +dilbertfix +strip +thathit +littl +close +home +current +manag +underacheiv +fantasi +footbal +team +us +spend +summer +hampshir +work +gui +scoutreserv +greaterlowel +council +pictur +casunset +taken +right +cabin +kick +anoth +everi +boi +offic +someth +silli +white +water +raft +trip +especi +cool +spend +month +toronto +drove +back +toseattl +took +number +detour +along +somehihglight +trip +grove +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..3f514774 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,105 @@ +scott +hauck +scott +hauck +hauck +washington +comput +scienc +engin +depart +univers +washington +seattl +year +graduat +student +univers +washington +current +work +multi +fpga +system +rapid +prototyp +board +level +design +thoughi +also +interest +asynchron +circuit +fpga +architectur +parallel +graduat +june +person +biographi +educ +experi +public +curriculum +vitaeresearch +asynchron +circuit +survei +current +asynchron +design +methodolog +well +first +fpga +asynchron +circuit +triptych +montag +fpga +architectur +develop +triptych +montag +fpga +architectur +architectur +improv +densiti +current +commerci +fpga +multi +fpga +system +rapid +prototyp +develop +springbok +rapid +prototyp +system +board +level +design +well +partit +assign +rout +topolog +work +gener +multi +fpga +system +chinook +project +hardwar +softwar +design +synthesi +simul +system +embed +applic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..68b1569e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin +hinshaw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..c76245f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,73 @@ +place +place +macduff +ultrasound +imag +emma +elspeth +macduff +name +subject +chang +without +notic +unborn +daughter +week +fromconcept +week +ripe +inmid +decemb +view +profil +ly +back +lookingup +head +right +upper +half +torso +theleft +busi +obsess +impend +fatherhood +master +thesi +part +chinook +project +pass +time +silli +possibl +also +rememb +myspam +unfortun +also +wast +time +html +brows +around +interest +stuff +usingwebcrawl +pointer +neat +stuff +frogstv +nationpenn +tellermus +lyricsian +macduff +washington +dept +comput +scienc +engin +univ +washingtonseattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..4cca8596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,210 @@ +herv +jamrozikherv +jamrozik +postdoc +univers +washington +sinc +septemb +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +phone +mail +jamrozik +washington +research +main +interest +distribut +system +object +orient +languag +softwar +engin +current +work +global +memoi +manag +workstat +cluster +hank +levi +mari +vernon +anna +karlin +mike +feelei +geoff +voelker +high +speed +network +greatli +encourag +network +memori +cach +virtual +memori +file +page +therebi +reduc +need +disk +access +network +node +memori +intens +applic +primari +memori +lightli +load +node +temporari +back +store +introduc +level +memori +hierarchi +name +global +memori +cach +li +logic +local +memori +disk +page +fundament +transfer +access +unit +remot +memori +system +page +size +perform +factor +recent +page +size +modern +processor +increas +order +provid +coverag +amort +disk +access +cost +unfortun +high +speed +network +small +transfer +need +provid +latenc +trend +page +size +thu +odd +network +memori +high +speed +network +studi +subpag +mean +reduc +transfer +size +latenc +remot +memori +environ +reduc +network +latenc +us +subpag +global +memori +environ +jamrozik +feelei +voelker +evan +karlin +levi +vernon +inproceed +seventh +confer +architectur +support +program +languag +oper +system +octob +postscript +thesi +research +debug +distribut +object +orient +system +theuniversit +joseph +fourier +grenobl +involv +guideproject +laboratoir +bull +imag +part +imaginstitut +extrem +peopl +area +snot +visit +louvr +galleri +look +map +franc +europ +world +somefamili +pictur +somefriend +pictur +jamrozik +washington +eduv +march +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..b77d3ed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,30 @@ +jason +secoski +home +pagejason +secoski +jason +washington +eduaddress +comput +scienc +engin +depart +sieg +hall +cunivers +washington +boxseattl +offic +frequent +us +page +projectseattl +weather +forecast +weather +channel +jason +secoskylast +modifi +thursdai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..5b8edf67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,93 @@ +jeremi +baerjeremi +baer +never +school +interfer +educ +mark +twain +stuff +dream +made +william +shakespearei +current +graduat +student +comput +scienceat +univers +washington +interest +includ +artifici +intellig +human +computerinteract +multimedia +educ +softwar +softwar +engineeringtool +comput +gener +music +person +creativ +stuff +cool +place +spend +signific +time +pierian +spring +softwareoregon +museum +scienc +industri +omsi +pomona +collegeher +look +current +project +mine +eight +puzzl +java +applet +work +progress +experiment +virtual +travel +applet +copi +effect +demo +applet +question +project +static +layer +analysi +program +feel +stress +realli +silli +littl +macintosh +thati +wrote +year +download +like +metacrawl +searchcopyright +jeremi +baer +jbaer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..394ad98d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,74 @@ +jeremi +buhler +home +pagejeremi +buhler +home +pagedo +attempt +adjust +browser +control +transmiss +statu +first +year +student +institut +univers +washington +depart +comput +scienc +engin +offic +sieg +hall +offic +phone +home +phone +mail +address +jbuhler +washington +finger +tako +washington +import +stufflectur +note +suffix +tree +postscript +latex +research +come +soon +cours +schedulemi +public +keycyb +activ +electron +frontier +foundat +grinsrecommend +readingmi +undergradu +alma +mater +rice +universityquot +quotesmi +home +page +return +control +browser +jeremi +buhler +jbuhler +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..527f728c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,376 @@ +jeff +dean +jeff +dean +depart +comput +scienc +engin +univers +washington +seattl +offic +chateau +sieg +view +offic +would +dang +build +weren +futur +plansi +plan +graduat +summer +join +western +research +laboratori +sunni +palo +alto +bought +hous +nearbi +menlo +park +curriculum +vita +postscript +also +summari +postscript +research +teach +experi +projectsi +work +primarili +cecil +project +cecil +pure +object +orient +langaug +us +avehicl +integr +research +area +languag +design +program +environ +optim +compil +techniqu +weintend +techniqu +scale +larg +real +world +program +andto +keep +true +goal +implement +vortexcompil +cecil +current +line +cecil +codein +compil +much +work +group +involv +track +compil +bug +also +hang +spinproject +meet +spin +extens +oper +systemmicrokernel +support +dynam +adapt +system +interfacesand +implement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +spring +quarter +organ +depart +compil +seminar +research +interest +research +concern +effici +implement +ofobject +orient +languag +compil +optim +techniqu +particular +explor +whole +program +analysi +usedto +improv +perform +affect +increment +compil +andhow +interact +optim +techniqu +especiallyprofil +guid +optim +techniqu +also +interest +howwhol +program +optim +affect +languag +design +assumedthat +compil +access +entir +program +manycompromis +made +exist +languag +design +becom +unnecessari +interest +appli +lesson +learn +explor +wholeprogram +optim +toward +design +system +programminglanguag +flexibl +effici +base +underlyingimplement +whole +program +analysi +valid +research +three +principaldesign +develop +vortex +optim +compil +forobject +orient +languag +vortex +defin +languag +independentintermedi +languag +object +orient +languag +ishigh +enough +level +permit +optim +featur +messagesend +closur +object +creation +also +contain +uniqu +wayof +compos +optim +pass +parallel +obtain +better +result +repeatedli +run +passessepar +exampl +compil +appli +intraprocedur +classanalysi +profil +guid +receiv +class +predict +inlin +aliasanalysi +split +singl +combin +pass +part +work +vortex +develop +wai +structuringoptim +pass +permit +kind +composit +stillallow +pass +develop +larg +independ +eachoth +nice +framework +specifi +iter +data +flowanalys +permit +client +develop +optim +pass +withrel +littl +effort +exampl +vortex +dead +assignmentelimin +pass +approxim +line +code +publicationssom +recent +paper +author +author +personali +love +spici +food +mild +four +letter +word +coke +probabl +kick +caffein +habit +enjoy +moment +spent +wife +heidi +daughter +victoria +would +realli +like +somedai +wife +spent +honeymoon +kauai +hurrican +iniki +recent +biplan +ride +galvin +fly +seattl +guess +never +anyth +anymor +wife +took +flight +consist +minut +flight +around +downtown +seattl +puget +sound +travel +model +biplan +feel +dare +sadli +insur +coverag +doesn +permit +passeng +walk +wing +back +enjoi +ride +view +would +fantast +even +highli +recommend +look +someth +seattl +number +rather +lengthi +hotlist +jdean +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..59d4e647 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,279 @@ +damon +rees +home +pagejon +damon +reesepost +doctor +research +safeti +critic +softwar +groupdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +chateau +guggenheim +annex +phone +mail +jdrees +washington +research +interest +problem +safeti +critic +softwar +awar +devic +system +structur +caus +catastroph +fail +comput +hardwar +becom +less +expens +expect +place +softwar +comput +place +control +wider +rang +applic +softwar +advantag +convent +technolog +flexibl +advantag +come +price +softwar +behavior +complex +unpredict +perhap +best +public +exampl +three +mile +island +incid +oper +great +difficulti +diagnos +state +system +emerg +requir +stage +softwar +develop +project +success +especi +respect +safeti +reason +colleagu +concentr +requir +especi +commun +requir +requir +specif +develop +state +base +languag +call +requir +state +machin +languag +rsml +valid +us +languag +specifi +tca +avion +system +doctor +thesi +invent +hazard +analysi +procedur +base +hazard +oper +hazop +studi +signific +concept +procedur +borrow +hazop +deviat +henc +name +deviat +analysi +link +html +transcript +dissert +current +project +deviat +analysi +write +confer +articl +summar +deviat +analysi +make +deviat +analysi +softwar +avail +safeti +critic +softwar +group +studi +possibl +dynam +displai +control +deviat +analysi +search +siang +rsml +tool +integr +deviat +analysi +softwar +rsml +tool +kurt +partridg +make +alpha +version +rsml +tool +publicli +avail +kurt +partridg +sean +sandi +rsml +semant +draft +semant +document +includ +discuss +rsml +variant +develop +exampl +improv +semant +academ +histori +inform +comput +scienc +univers +california +irvin +dissert +softwar +deviat +analysi +postscript +comput +scienc +linguist +rice +univers +waxahachi +high +school +waxahachi +public +nanci +leveson +mat +heimdahl +holli +hildreth +rees +requir +specif +process +control +system +ieee +transact +softwar +engin +septemb +postscript +steven +dolin +rees +curv +interpret +diagnost +techniqu +industri +process +ieee +transact +industri +applic +januari +februari +leveson +heimdahl +hildreth +rees +ortega +experi +us +statechart +system +requir +specif +sixth +intern +workshop +softwar +specif +design +como +itali +octob +jdrees +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..d2af25d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,191 @@ +home +page +jack +lojack +lojlo +washington +depart +comput +scienc +engin +univers +washington +seattl +home +page +current +construct +research +paper +convert +thread +level +parallel +instruct +level +parallel +simultan +multithread +abstract +postscript +jack +susan +egger +joel +emer +henri +levi +rebecca +stamm +anddean +tullsen +submit +public +juli +exploit +choic +instruct +fetch +issu +implement +simultan +multithread +processor +abstract +postscript +dean +tullsen +susan +egger +joel +emer +henri +levi +jack +rebecca +stamm +proceed +annual +intern +symposium +comput +architectur +philadelphia +compil +issu +simultan +multithread +processor +postscript +jack +susan +egger +henri +levi +anddean +tullsen +proceed +first +suif +compil +workshop +stanford +januari +improv +balanc +schedul +compil +optim +increas +instruct +level +parallel +abstract +postscript +jack +susan +egger +proceed +sigplan +confer +program +languag +design +implement +jolla +california +june +page +compar +static +dynam +schedul +superscalar +processor +jack +gener +examin +written +report +examin +interact +balanc +schedul +compil +optim +jack +loph +qualifi +examin +written +report +current +work +architectur +compil +support +simultan +multithread +research +interest +also +includ +static +dynam +schedul +superscalar +vliw +processor +instruct +level +parallel +issu +well +compil +multithread +architectur +particular +investig +compil +issu +simultan +multithread +person +jack +page +find +franklin +eseattl +orsieg +hall +room +phone +coupl +pictur +recent +paintbal +experi +pictur +pictur +yahoojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..109537b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,54 @@ +sherman +home +page +shermanjoebob +washington +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +usami +research +interest +user +interfac +designinform +navig +visual +project +activ +user +interfac +inform +local +survei +useclass +project +hcreat +impress +home +pagequ +time +sarahsoftballstuff +might +want +automat +suggest +page +link +user +interfac +research +relat +topic +directori +us +pagesif +browser +support +send +mail +tojoebob +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..8867a2ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,73 @@ +home +page +joshua +seim +home +page +joshua +seim +abstract +joshua +seim +biolog +base +neural +network +system +current +test +theunivers +washington +depart +comput +scienc +begunin +lockean +blank +slate +josh +learn +emul +observedbehavior +successfulli +accomplish +sever +task +graduatingfrom +colleg +travel +volit +recent +start +demonstr +potenti +independ +creativ +thought +current +taskw +present +josh +earn +comput +scienc +expect +take +sever +year +document +provid +overviewof +josh +cognit +ambulatori +achiev +organ +person +academichierarchi +addition +futur +work +discuss +within +context diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..a9eb5135 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,7 @@ +jovan +home +page +jovan +home +page +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..ba3116f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,78 @@ +joanna +powerjoanna +pagehi +joanna +cat +academ +interestsmi +main +interest +comput +scienc +graphic +grad +school +uwneat +stuff +alma +matercool +graphic +link +jonathan +shadegraph +research +uwduoton +reproductionmi +main +academ +interest +biologi +especi +genet +molecular +biologi +alma +matermost +recent +site +gain +employmentpubl +joanna +power +brad +west +eric +stollnitz +david +salesin +reproduc +color +imag +duoton +proceed +siggraph +page +york +real +lifepast +homesdiversionsgend +issuesstatu +women +comput +sciencenow +home +pagefeminist +major +onlineultim +frisbeefun +stufffroggi +page +sean +quotesbrad +comic +musicevan +jokes +page +pagesmi +herojpow +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..b94a0a52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,19 @@ +jonathan +shake +jonathan +shake +sieg +hall +comput +scienc +engin +univers +washington +seattl +ahoi +homepag +finderresumlinkslast +updat +august +jshake +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..5cdb1031 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan +alemanyjuan +alemani +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..de747c3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,157 @@ +kari +pulli +home +pagekari +antero +pullii +third +year +graduat +student +comput +scienc +engineeringdepart +univers +ofwashington +work +interest +comput +graphic +comput +vision +andmathemat +try +combin +aspect +thesedisciplin +research +professor +depart +work +closest +tonyderos +graphic +actual +left +uwfor +pixar +lindashapiro +vision +addition +work +werner +stuetzl +andjohn +mcdonald +statist +duchamp +mathemat +andhugu +hopp +rick +szeliski +microsoft +research +qual +project +tribor +triplet +base +object +recognitionsystem +work +linda +report +technic +report +depart +comput +scienc +engin +universityof +washington +current +work +surfacereconstruct +rang +data +multipl +baselin +camerasystem +obtain +data +subdivis +surfac +waveletanalysi +surfac +geometri +reflect +function +pass +gener +examin +topic +rigidregistr +data +click +find +class +project +year +comput +architecturesystem +class +taught +susanegg +distribut +comput +theclass +taught +brianbershad +imag +understand +theclass +taught +steven +tanimoto +present +technic +sketch +siggraph +getto +slide +subdivis +surfac +slide +remov +wavelet +herear +slide +speaker +note +eacutesum +eacut +find +sieg +hall +comput +scienc +engin +univers +washington +seattl +email +kapu +washington +home +union +seattl +folk +takavainionti +oulu +finland +kari +antero +pulli +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..80e4d185 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,20 @@ +anna +karlinanna +rochel +karlinassoci +professor +univers +washington +sincejuli +work +comput +scienc +engin +depart +univers +washington +seattl +home +page +paperskarlin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..1fac85aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,13 @@ +home +page +yeunghom +page +yeungperson +infomi +picturemi +researchtelnet +machinessend +email +back +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..ad7396ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,109 @@ +kurt +partridg +kurt +partridg +academ +inform +graduat +student +comput +scienc +depart +univers +washington +interest +includ +softwar +engin +softwar +specif +specif +usabl +readabl +applic +formal +method +specif +softwar +safeti +page +describ +other +work +area +also +dabbl +user +interfac +design +human +comput +interact +java +program +recent +public +kurt +partridg +bddtcl +environ +visual +manipul +binari +decisiondiagram +interact +poster +html +postscript +poster +preview +postscript +nanci +leveson +bauer +mat +heimdahl +wayn +ohlrich +kurt +partridg +vivek +ratan +rees +environ +safeti +critic +softwar +nasa +confer +safeti +qualiti +postscript +background +start +graduat +school +complet +comput +scienc +berkelei +live +love +suburban +life +thousand +oak +parent +sister +name +oti +right +kurt +humor +corner +univers +washington +seattl +voic +kepart +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..8a6fb88d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,53 @@ +keith +golden +home +page +questa +pagina +anch +italiano +keith +golden +researchsoftbotsplanningkrselect +publicationscurriculum +vita +also +inpostscriptrandom +hackingwordbot +collabor +dictionari +like +bicycl +tour +languag +paint +photographi +natur +coffe +godless +pinko +stuff +dislik +suit +lawyer +car +friend +ellenmarcruben +laurennickrich +joannavivek +advisor +oren +etzioni +weld +keithgolden +depart +ofcomput +scienc +engin +univers +washington +seattl +kgolden +washington +complet +list diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..5bc2a2e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,219 @@ +kingsum +chow +kingsum +chow +kingsum +washington +kingsum +washington +educomput +scienc +engin +depart +univers +washington +seattl +usathi +inform +highwai +alwai +construct +tabl +content +person +research +upcom +confer +resum +current +schedul +bridg +glossari +univers +hong +kong +suggest +feedbackresearchmi +advisor +david +notkin +asynchron +softwar +evolut +softwar +develop +toolspap +line +softwar +qualiti +manag +respons +driven +softwar +evolut +readi +kingsum +chow +david +notkin +semi +automat +updat +applic +respons +librari +chang +technic +report +revis +version +appear +icsm +kingsum +chow +david +notkin +asynchron +softwar +evolut +asia +pacif +workshop +softwar +engin +research +march +hong +kong +kingsum +chow +program +transform +asynchron +softwar +mainten +proceed +ics +workshop +program +transform +softwar +evolut +william +griswold +editor +intern +confer +softwar +engin +april +seattl +washington +us +site +pcct +sorcererpcct +home +page +pcct +page +terrenc +parr +note +pcct +newbiesresumepleasedrop +mailto +resum +specifi +text +postscript +format +univers +hong +kongchines +univers +hong +kong +univers +hong +kong +univers +scienc +technolog +hong +kong +polytechn +univers +citi +univers +hong +kongsingapor +sitessingapor +onlin +world +wide +server +alumnu +websom +campu +friendstom +liew +fook +home +page +wang +page +jiang +weidongu +relatedunivers +washington +style +polici +manual +these +dissert +graduat +school +webserv +univers +book +storeinvestmentsfre +minut +delai +quot +watch +quot +market +data +experiment +mutual +fund +chart +line +invest +center +stock +commod +technic +analysismisc +read +chines +list +thoma +china +new +servic +welcom +onlin +hong +kong +movi +movi +movi +visit +sinc +kingsum +chow +last +modifi +date diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..e64c2c1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,108 @@ +kevin +bold +kevin +boldingkwb +washington +comput +scienc +engin +depart +univers +washington +seattl +juvenil +squirt +wander +search +suitabl +rock +hunk +coral +cling +make +home +life +task +rudimentari +nervou +system +find +spot +take +root +doesn +need +brain +eat +rather +like +get +tenur +dennett +conscious +explain +research +current +work +build +high +speed +latencylan +chaotic +router +previou +researchha +chaoticrout +form +minim +adapt +rout +formass +parallel +multicomput +profession +assist +professor +electr +engin +seattl +pacif +univers +also +work +part +time +researchassoci +univers +ofwashington +signific +paper +written +archiv +ofth +chaotic +rout +group +spend +time +teach +electr +comput +engineeringat +seattl +pacif +univers +person +photo +took +comethyakutak +seattl +moustach +real +case +want +visit +home +anoth +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..2e159f9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,45 @@ +richard +ladnerrichard +ladnerprofessor +depart +comput +scienc +engin +univers +washington +seattl +mail +ladner +washington +phone +offic +sieg +hall +room +person +short +biographyresearch +public +studentsteachingcomput +program +fall +quarter +introduct +comput +commun +network +spring +quarter +introduct +formal +model +comput +scienc +winter +quarter +data +structur +spring +quarter +ladner +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..dbdfeb5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,147 @@ +larri +mcmurchi +home +page +larri +mcmurchiedepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +larri +washington +offic +sieg +hall +room +current +research +larri +mcmurchi +director +laboratori +integratedsystem +chemistri +western +washington +univers +chemistri +univers +washington +hework +area +quantum +chemistri +graduat +studi +primari +focu +number +evalu +class +ofintegr +gaussian +function +later +appli +work +theconstruct +larg +spars +hamiltonian +matric +coauthorof +comprehens +packag +comput +program +meld +us +abinitio +calcul +small +molecul +sinc +join +staff +depart +comput +scienc +andengin +larri +supervis +work +technic +staff +ofth +laboratori +integr +system +coauthor +wirec +aschemat +captur +system +allow +design +code +withschemat +symbol +creat +concis +parameteriz +representationof +design +larri +also +involv +develop +andcommerci +mactest +integr +softwar +hardwareenviron +function +test +chip +board +andsubsystem +recent +work +area +fpga +andha +develop +gener +purpos +perform +driven +router +fpga +northwest +laboratori +integr +system +mactest +cost +vlsi +chip +tester +triptych +high +densiti +fpga +architectur +public +journal +articl +upcom +confer +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..a6f92079 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,321 @@ +nanci +leveson +home +page +nanci +levesondepart +comput +scienc +engineeringunivers +washingtonbox +express +mail +sieg +hall +seattl +leveson +washington +nanci +leveson +professor +join +faculti +come +california +search +rain +receiv +degre +mathand +comput +scienc +ucla +spent +form +year +professor +univers +california +irvin +professor +leveson +start +area +research +softwar +safeti +concern +problem +build +softwar +real +time +system +failur +result +loss +life +properti +advantag +topic +nobodi +question +goal +except +misanthrop +matter +anywai +student +recent +produc +aform +requir +specif +tca +real +collis +avoid +system +requir +commerci +aircraft +airspac +lesson +learn +project +never +anyth +like +seem +pleas +though +adopt +theiroffici +specif +student +current +work +safeti +analysi +specifi +behavior +tca +claim +thatyou +read +anyth +fact +take +train +late +safetyresearch +project +also +work +model +analysi +autom +highwai +automobil +variou +aerospac +system +subtop +research +area +includ +model +analysi +safeti +specif +safe +softwar +design +softwar +fault +toler +verif +valid +safeti +professor +leveson +editor +chief +ieee +transact +softwareengin +elect +member +board +director +computingresearch +associ +member +nation +research +council +commissionon +engin +technic +system +member +committe +comput +public +polici +recent +chair +nation +research +council +studi +evalu +space +shuttl +softwar +process +levesoni +fellow +award +aiaa +inform +systemsaward +contribut +space +aeronaut +comput +technolog +andscienc +develop +field +softwar +safeti +promotingrespons +softwar +system +engin +practic +life +propertyar +stake +year +leveson +book +softwar +safeti +safewar +system +safeti +comput +addison +weslei +publish +recent +paper +avail +list +paper +isalso +avail +copi +favorit +paper +actual +keynoteaddress +conf +softwar +engin +melbourn +titl +high +pressur +steam +engin +comput +softwar +click +qual +project +avail +follow +topic +appli +hazardanalysi +techniqu +aircraft +collis +avoid +system +model +writtenin +state +machin +style +languag +call +rsml +determin +wai +build +fault +tree +analys +rsml +model +gener +design +newrequir +specif +languag +includ +specifi +human +comput +interfac +deriv +gener +principl +design +languag +appli +hazard +analysi +human +machin +interfac +model +model +human +machin +interfac +control +system +aircraft +cockpit +analyz +aircraft +accid +report +involv +mode +awar +problemsand +gener +issu +deriv +inform +safe +design +human +comput +interact +finger +finger +leveson +washington +inform +citi +airport +current +perhap +contact diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..2c60afd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,328 @@ +hank +levi +home +page +henri +levi +professor +join +faculti +hank +current +research +project +focu +oper +system +parallel +distribut +comput +comput +architectur +particularli +simultan +multithread +architectur +object +base +languag +environ +recent +projectcal +opal +deal +singl +address +space +oper +system +comput +architectur +theetch +projecti +produc +tool +perform +instrument +optim +binari +execut +levi +author +book +numer +paper +comput +system +includ +outstand +paper +select +four +consecutiveacm +symposia +oper +system +principl +former +chair +sigop +special +interest +group +onoper +system +program +chair +theth +symposium +oper +system +principl +tobe +held +hold +carnegi +mellon +universityand +univers +washington +come +washington +consult +engin +digit +equip +corpor +work +span +rang +oper +system +architectur +distribut +system +workstat +hank +fellow +associ +comput +machineryand +recipi +fulbright +research +scholar +award +eleven +master +student +nine +student +survivedlevi +supervis +student +haveal +escap +academ +posit +major +research +lab +glu +workstat +hank +usual +befound +ski +bike +plai +tenni +help +lead +thedepart +infam +softbal +team +smile +potato +death +sampl +dessert +seattl +mani +dessert +parlor +recent +publicationsreduc +network +latenc +us +subpag +global +memori +environ +jamrozik +feelei +voelker +evan +karlin +levi +vernon +inproceed +seventh +confer +architectur +support +program +languag +oper +system +octob +postscript +implement +global +memori +manag +workstat +cluster +michael +feelei +william +morgan +freder +pighin +anna +karlin +henri +levi +chandramohan +thekkath +appear +proc +symposium +oper +system +principl +decemb +simultan +multithread +maxim +chip +parallel +dean +tullsen +susan +egger +henri +levi +proc +annual +intern +symposium +comput +architectur +june +exploit +choic +instruct +fetch +issu +implementablesimultan +multithread +processor +dean +tullsen +susan +egger +joen +emer +henri +levi +jack +rebecca +stamm +proc +intern +symposium +comput +architectur +share +protect +singl +address +space +oper +system +jeffrei +chase +henri +levi +michael +feelei +edwardd +lazowska +transact +comput +system +novemb +integr +coher +recover +distribut +system +michael +feelei +jeffrei +chase +vivek +narasayya +henri +levi +proc +first +symposium +oper +system +design +implement +novemb +hardwar +softwar +support +effici +except +handl +thekkath +levi +proc +conf +arch +support +prog +languag +oper +system +asplo +octob +separ +data +control +transfer +distribut +oper +system +thekkath +levi +lazowska +proc +conf +arch +support +prog +languag +oper +system +asplo +octob +levi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..06073029 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,156 @@ +opal +oper +system +projectop +opal +project +explor +oper +system +structur +tunedto +need +complex +applic +numberof +cooper +program +manipul +larg +share +persist +databaseof +object +opal +code +data +exist +singl +huge +share +address +space +singl +address +space +enhanc +share +andcooper +address +uniqu +time +interpret +thu +pointer +base +data +structur +directlycommun +share +program +time +bestor +directli +secondari +storag +without +need +translat +structur +simplifi +avail +larg +addressspac +provid +alpha +mip +risc +protect +opal +independ +singl +address +space +opal +thread +execut +within +protect +domainthat +defin +virtual +page +right +access +right +access +page +easili +transmit +oneprocess +anoth +result +much +flexibl +protectionstructur +permit +differ +dynam +chang +protect +option +depend +trust +relationshipbetween +cooper +parti +believ +organ +canimprov +structur +perform +complex +cooperatingappl +opal +prototyp +built +alpha +platform +ofth +mach +oper +system +inform +sourc +list +opal +relat +paper +faculti +member +hank +levi +lazowska +jeff +chase +duke +univers +current +graduat +student +mike +feelei +ashutosh +tiwari +vivek +narasayya +dylan +mcname +relat +inform +singl +address +space +mail +list +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..0505ebc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,209 @@ +lopezgu +lopezlopez +washington +school +sieg +hall +depart +comput +scienc +engin +univers +washington +seattl +home +student +univers +washington +dissertationresearch +design +implement +constraint +imper +object +orient +languag +curriculum +vita +publicationsgu +lopez +bjorn +freeman +benson +alan +born +kaleidoscop +constraint +imper +program +languag +brian +mayoh +tougu +jann +penjam +editor +constraintprogram +springer +verlag +nato +advanc +studi +instituteseri +seri +comput +system +scienc +also +publisheda +technic +report +lopez +bjorn +freeman +benson +alan +born +constraint +object +ident +inproceed +european +confer +object +orient +program +bologna +itali +juli +lopez +bjorn +freeman +benson +alan +born +implement +constraint +imper +program +languag +kaleidoscop +virtual +machin +inproceed +confer +object +orient +programmingsystem +languag +applic +portland +oregon +octob +oopsla +tutorialsi +also +tutori +chair +upcom +oopsla +conferencein +jose +california +peopl +object +orient +technolog +andsoftwar +develop +meet +speak +oopsla +confer +oopsla +well +known +breadth +depth +high +qualiti +itsextens +tutori +program +previou +year +oopsla +tutorialshav +cover +aspect +object +orient +technolog +introductorysurvei +industri +softwar +engin +practic +lead +edg +academicresearch +topic +respons +request +past +attende +weespeci +encourag +propos +engin +issu +advanc +topic +anyon +consid +submit +propos +tutori +requestguidelin +tutori +submiss +tutori +chair +theoopsla +electron +hotlin +electron +mail +submiss +propos +encourag +enthusiast +accept +tutori +proposalswithout +email +address +accept +tutori +propos +march +notif +accept +withcamera +readi +note +august +interest +link +constraint +oopsla +tutori +green +direct +jimi +hendrix +grave +star +war +collector +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..d20abcac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,45 @@ +omid +home +page +omid +madani +madani +washington +comput +scienc +engin +depart +univers +washington +seattl +chateau +suit +bhello +curiou +browser +welcom +fourth +year +graduat +student +depart +enjoytheori +also +like +keep +touch +areasinclud +graphic +life +work +academ +want +look +islamicarchitectur +isfahan +best +nomine +citi +home +countri +iran diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..48409060 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,21 @@ +mike +perkowitz +page +mike +perkowitznewsflash +mike +goe +blond +area +research +academia +music +creativ +randomfavorit +sheba +voyeur +written +grooveneedl +espressoresumemik +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..935c5a8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,84 @@ +marc +langheinrich +homepagemarc +langheinrich +homepagemarc +langheinrichuniversitt +bielefeld +univers +washingtontechnisch +fakultt +depart +comput +scienceemail +imlangh +techfak +bielefeld +email +marclang +washington +eduabout +myselfi +spent +last +year +depart +comput +scienc +theunivers +washington +visit +graduat +student +thefulbright +program +check +follow +link +depthinform +resum +project +short +biopost +addressa +septemb +back +germani +finish +mastersat +univers +bielefeld +pleas +contact +german +address +homeschoolgermanyringstra +maintalphon +paulusplatz +bielefeldphon +woodlawn +seattl +phone +sieg +hall +phone +browser +support +tabl +access +data +list +formatmarc +langheinrich +univers +washington +depart +comput +scienc +email +marclang +washington +http +washington +home +marclang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..1ba24ebc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,172 @@ +home +marla +baker +marla +washington +chief +editor +depart +comput +scienc +engin +univers +washington +seattl +partner +crime +bentlei +academ +interestsgraph +user +interfac +human +comput +interact +educ +softwar +comput +support +collabor +learn +cscl +comput +graphic +visual +techniqu +visual +program +languag +current +work +current +work +stevetanimoto +lauren +bricker +coimag +project +devleop +collabor +educ +activ +order +explor +cooper +contol +object +goal +work +investig +differ +wai +multipl +user +cansimultan +share +manipul +given +object +wai +assess +interact +also +work +part +time +interfac +packard +bell +compani +resum +publicationsbak +marla +stephen +eick +space +fill +softwar +visual +journal +visual +languag +comput +june +burnett +baker +bohu +carlson +yang +scale +visual +program +languag +ieee +comput +special +issu +visual +program +march +burnett +margaret +marla +baker +classif +system +visual +program +languag +journal +visual +languag +comput +septemb +baker +marla +stephen +eick +visual +tool +larg +softwar +system +proceed +intern +confer +softwar +engin +sorento +itali +baker +marla +stephen +eick +baker +eick +method +apparatu +displai +hierarch +inform +larg +softwar +system +patent +applic +submit +octob +tutori +geometr +transform +imag +metip +program +environ +check +page +offic +sieg +marla +baker +marla +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..ac4e3a2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,118 @@ +matthai +philipos +home +page +matthai +philipos +work +dynam +compil +project +dynam +compileri +beast +gener +optim +code +runtim +shortterm +interest +figur +produc +good +code +dynam +runtim +modern +processor +architectur +applic +side +think +interpret +basedsystem +real +time +constraint +like +java +browser +canbenefit +select +runtim +compil +like +wire +asystem +runtim +compil +goe +work +withprofessor +susan +eggersand +craig +chamber +work +comput +scienc +engin +depart +univers +washington +seattl +phone +home +seattl +public +ausland +philipos +chamber +egger +bershad +fast +effect +dynam +compil +confer +onprogram +languag +design +implement +chamber +egger +ausland +philipos +mock +andp +pardyak +automaticdynam +compil +support +event +dispatch +extens +system +workshop +compil +support +system +softwar +februari +bookmark +stuff +plai +frequentlymiscellan +link +stuff +local +importancefrom +past +abuwhi +page +black +blue +ribbon +campaign +matthai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..9a6f8499 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,229 @@ +neil +mckenzi +menu +fine +dine +index +page +contact +futur +project +current +project +past +project +public +list +person +inform +game +contact +inform +neil +mckenzi +mitsubishi +electr +research +laboratori +broadwai +floor +cambridg +phone +mail +mckenzi +merl +current +projectsgonna +teenag +lobotomi +ramonesi +live +east +coast +mile +east +seattl +andwork +merl +note +current +involv +projectconcern +real +time +volum +render +medic +data +copiou +free +time +expatri +graduat +student +work +onchaot +routingwith +faculti +advisor +carl +ebel +larri +snyder +chaotic +rout +packet +rout +algorithm +mesh +torusnetwork +dissert +design +implement +thecranium +messag +pass +interfac +compatiblewith +network +us +chaotic +rout +past +projectsi +teach +assist +summer +design +implement +chip +tester +call +mactest +maintain +carl +netlist +graph +isomorph +tool +calledgemini +industri +speak +knowna +layout +schemat +tool +gemini +avail +interest +pleas +send +mail +larri +mcmurchi +larri +washington +public +cranium +interfac +messag +pass +adapt +packetrout +network +proceed +parallel +comput +rout +andcommun +workshop +seattl +link +tomactest +home +page +gemini +user +guid +last +updat +march +person +inform +angel +marri +pictur +hous +arlington +massachusett +head +east +coast +us +livein +fashion +seattl +neighborhood +ofballard +angel +creativ +page +contain +exampl +comput +artworkcr +us +adob +photoshop +ownedthi +year +onlyth +memori +remain +shirt +correctlyguess +answer +toriddl +jour +octob +label +place +jar +mckenzi +countri +farm +honei +produc +myuncl +mckenzi +edmonton +alberta +canada +amus +neil +linkschairman +shot +linksnorm +gregori +bookmark +halcyon +eugen +spafford +link +purdu +randi +pausch +shot +link +virginia +wallach +scool +link +princeton +neil +mckenzi +mckenzi +washington +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..0ee9abae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,251 @@ +marc +fiuczynski +home +page +marc +fiuczynski +home +page +marc +fiuczynski +washington +comput +scienc +engin +depart +univers +washington +seattl +backgroundi +graduat +student +univers +washington +depart +comput +scienc +engin +grewup +germani +near +sseldorf +spent +year +highschool +princeton +receiv +comput +scienc +fromrutg +univers +spent +sever +summer +bell +lab +mitr +corpor +work +rang +ofproject +sole +proprietor +system +companythat +creat +decemb +sell +distribut +fault +toler +network +base +telephon +system +built +scratch +setof +chasi +processor +us +univoic +telephon +interfac +cardsand +vxwork +oper +system +time +spend +hack +spin +safe +adapt +extens +oper +system +primari +contribut +spin +extens +protocol +architectur +forappl +specif +network +applic +achiev +compellingperform +improv +us +structur +compar +tosimilar +applic +run +commerci +platform +demonstr +work +servic +http +request +contacthttp +spin +washington +recent +report +paper +extens +protocol +architectur +forappl +specif +network +design +implement +perform +paper +describ +anextens +protocol +architectur +allow +anyon +custom +anin +kernel +protocol +graph +enabl +applic +achiev +betterperform +compar +similar +applic +run +conventionaloper +system +demonstr +work +servic +http +request +contacthttp +spin +washington +appear +proceed +winter +usenix +technicalconfer +extens +safeti +perform +spin +oper +system +design +implement +perform +paper +appear +proceed +fifteenth +symposium +oper +system +principl +languag +support +extens +oper +system +pretti +happi +deal +shortcom +inord +languag +safe +extens +oper +system +paper +describeshow +address +shortcom +safe +dynam +link +extens +oper +system +describ +dynam +linker +load +code +kernel +point +isth +abil +creat +manag +linkabl +namespac +describ +interfac +andcollect +interfac +protect +softwar +issu +posit +paper +compar +softwar +hardwar +protect +mechan +proceed +fifth +ieee +workshop +topic +oper +system +region +analysi +parallel +elimin +method +data +flow +analysi +ieee +transact +softwar +engin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..671dfe7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,39 @@ +melani +fulgham +comput +scienc +minim +versu +minim +rout +algorithm +rout +method +model +develop +rout +model +help +predict +compar +perform +router +real +parallel +machin +deflect +rout +upper +lower +bound +practic +requir +sort +deflect +rout +algorithm +mesh +topolog +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..a2a2abd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,74 @@ +meng +heng +homepag +meng +homepagemenghe +washington +edubox +depart +comput +scienc +engineeringunivers +washingtonseattl +second +year +student +comput +scienceat +univers +washington +undergrad +univers +pennsylvania +research +interestsi +interest +imag +retriev +problem +try +findimag +huge +databas +imag +virag +andqbicar +commerci +exampl +similar +kind +stuff +work +snapshot +done +singaporesingapor +infomap +provid +fact +andstatist +singapor +singaporeonlin +guid +plan +take +trip +nation +comput +boardi +charg +transform +singapor +anintellig +island +work +graduat +strait +time +singapor +main +english +newspap +visit +sinc +menghe +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..8711a5e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,88 @@ +michael +ernst +home +pagemichael +ernsti +graduat +student +univers +washington +comput +scienc +depart +previous +lectur +riceunivers +comput +sciencedepart +research +programanalysi +group +microsoft +research +graduat +student +laboratori +comput +scienc +eec +depart +page +frequent +updat +technic +interest +includ +compil +static +analysi +slice +debug +optim +code +serial +parallel +program +program +chair +intermedi +represent +workshop +coloc +popl +intellectu +properti +particularli +comput +program +area +includ +game +theori +cryptographi +philosophi +denot +semanticsi +maintain +list +resourcesfor +confer +workshop +organ +occasion +manag +slip +awai +work +carri +real +life +link +possibleinterest +includ +page +maintain +michael +ernst +mernst +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..66ec8882 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,159 @@ +marku +mock +home +page +marku +mock +mock +washington +comput +scienc +rttemberg +grew +anotherpart +state +district +biberach +upper +swabia +oberschwaben +come +studi +comput +scienc +univers +karlsruh +whichi +obtain +diplom +comput +scienc +also +spent +year +umass +fulbright +grante +main +research +interest +parallel +distribut +comput +object +orient +system +compil +current +work +dynam +compil +interest +includ +spanish +latin +american +cultur +travel +good +book +labyrinth +solitud +chess +mainstream +movi +dieangst +torwart +beim +elfmet +know +handk +salsa +merengu +danc +still +time +left +check +els +seattl +come +publicationssepar +list +link +interest +stuff +current +chess +event +chess +olympiad +yerewan +colloquia +oopsla +volunt +page +mossi +bit +grad +journal +link +csek +home +page +link +home +page +link +csebi +home +page +link +cse +home +page +cours +graduat +studentsimag +depart +electr +engineeringy +wouldn +expect +squar +live +view +metacrawl +search +altavista +search +deutsch +well +realaudio +live +stuff +cool +linksand +quot +consid +lili +field +grow +toil +neither +spin +unto +even +solomon +glorywa +arrai +like +matthew +page +access +time +sinc +last +updat +mock +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..7d4017c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,18 @@ +vivek +narasayya +home +page +vivek +narasayya +nara +washington +comput +scienc +engin +depart +univers +washington +seattl +person +informationresearch +interestspap diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..a6b65768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,81 @@ +himanshu +nautiy +home +pagehimanshu +nautiyalthi +page +heavi +construct +himanshu +nautiyaldept +comput +scienc +engin +mail +stop +univers +washington +seattl +offic +sieg +offic +phone +cours +take +autumn +quarter +principl +digit +system +design +artifici +intellig +finger +nautiy +washington +edugod +gift +personkind +dougla +adam +terri +pratchett +pelham +grenvil +wodehouseth +order +alphabet +last +name +impli +favorit +link +place +india +internet +radio +search +engin +india +himanshu +nautiy +name +friend +delhi +finish +tech +astronomi +skate +comput +scienc +aviat +travel +numismat +sound +much +profound +coin +collect +cook +internet +movi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..26e13552 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,95 @@ +labyrinth +mediocr +bemoan +hype +skeptic +cynic +sinc +research +current +work +automaticconstruct +wrapper +inform +resourc +alsointerest +sever +area +artifici +intellig +andcognit +scienc +paper +beeninvolv +stuff +avail +anonym +servic +provid +glbal +infrmatin +sperhighwai +preliminari +version +divers +meter +avail +pictur +hand +shortli +surgeri +need +random +number +alwai +handi +know +date +time +week +favorit +color +avail +line +lost +easili +return +page +ronald +wilson +reagan +need +temperatur +look +javascript +enabl +browser +automat +send +mail +great +republican +tell +like +miscellani +need +contact +bookmark +technolog +societi +awar +bitter +ironi +involv +nonetheless +madeavail +wendel +berri +guidelin +constitutesgood +technolog +comment +nichola +kushmerick +uwcs +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..4398e802 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,109 @@ +wayn +ohlrich +wayn +ohlrich +ohlrich +washington +depart +comput +scienc +engin +univers +washington +seattl +offic +guggenheim +annex +chateau +public +research +current +work +nanci +leveson +model +check +automat +test +gener +specif +spare +time +work +brian +bershad +anna +karlin +romer +memori +perform +analysi +project +local +known +sever +damag +project +group +paper +make +debut +isca +itali +summer +safeti +research +home +page +leveson +memori +sytem +home +page +isca +romer +ohlrich +bershad +karlin +reduc +memori +overhead +us +onlin +superpag +promot +univers +class +inform +cours +person +interest +game +wayn +world +wonder +inform +page +contain +sort +us +link +seattl +inform +home +page +invest +page +contain +invest +inform +research +inform +found +us +page +creat +octob +last +modifi +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..0d3f005e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,106 @@ +gershoni +gershoni +gershoni +washington +graduat +studentcomput +scienc +engin +departmentunivers +washingtonoffic +sieg +home +seattl +second +year +graduat +student +univers +ofwashington +move +seattl +live +california +seven +year +fouryear +berkelei +three +year +lancast +northeast +angel +origin +israel +live +haifa +holon +like +practic +kwon +plai +basketbal +hike +quarter +take +class +whole +bunch +seminar +amta +comput +architectur +usual +find +offic +sieg +class +offic +hour +aremondai +wednesdai +potenti +employ +welcom +look +resum +pictur +took +last +summer +click +tose +cool +shirt +design +made +summer +graphicsprogram +call +virtual +realiti +interest +link +time +daili +new +summari +york +time +riderlink +seattletransport +option +inform +israel +comput +scienc +mathemat +depart +univers +california +berkelei +gershoni +washington +access +sinc +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..88031629 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,90 @@ +ross +ortega +ross +ortega +wear +jean +ortega +washington +depart +comput +scienc +engin +univers +washington +seattl +knew +would +call +research +would +albert +einstein +welcom +home +pageaft +go +school +work +boston +year +decid +head +west +realli +northwest +came +fall +leav +sometim +accord +advisor +gaetano +borriello +offici +work +chinook +project +tool +real +time +embed +control +system +unoffici +brew +beer +learn +hack +try +teach +german +shepherd +tequila +behav +profession +section +chinook +project +myresum +file +educ +experi +public +paper +puppi +pictur +tequila +tequila +color +tequila +color +offic +sieg +check +page +link +find +interest +last +updatedthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..9704812e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,138 @@ +untitl +document +flat +morri +minor +pardodepart +comput +scienc +engineeringunivers +washingtonbox +seattl +washingtonusapardo +washington +edunot +show +blue +ribbon +asimgsrc +http +graphic +icon +blueribbon +rib_trn_plain_sm +show +quiet +opposit +free +speechprohibit +nation +telecommun +bill +likewis +white +letter +black +background +beth +everybodi +need +pardo +around +sometim +pardo +sometim +beth +share +academicsom +papersi +work +find +particularli +interest +runtim +code +gener +rtcg +instruct +simul +trace +tool +home +page +oncomput +architectureandcompil +quick +link +otherpeopl +work +comput +class +thesi +stylenon +academicfeatur +item +featur +item +month +weak +site +week +doesn +blink +anymor +regular +itemsbicyclesbusinessescomputersfoodhumori +famou +thing +relat +legal +ethic +weirdnesslinux +journalmusicgoofi +politicssci +think +though +unrel +stuff +think +transport +seattl +movi +list +seattl +film +festiv +dant +search +truli +gross +stori +trepan +privaci +site +log +mail +address +wors +take +data +disk +everi +time +consid +weather +courtesei +seattl +particular +also +weather +courtesei +newhous +newspap +also +weather +courtesi +yesterdai +stuffpardo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..d5afc1a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,119 @@ +przemek +pardyak +home +page +przemyslaw +pardyak +pardi +washington +first +week +grad +school +coupl +year +later +third +year +graduat +student +comput +scienc +theunivers +washington +current +research +area +ofoper +system +also +interast +distribut +system +languag +compil +besid +grad +school +life +fill +withth +seattl +drizzl +hike +outdoor +activ +notbusi +enjoi +school +drizzl +book +music +find +short +descript +research +interest +resum +also +list +paper +outdat +happenswhen +busi +schedul +projectsspinan +extens +oper +system +built +gloriou +leadership +brian +bershad +group +mechan +object +base +systemsgroup +commun +group +mechan +emerald +object +basedprogram +languag +time +system +interest +link +polish +connect +variou +resourc +somehow +relat +poland +research +relat +project +relat +mine +univers +research +unrel +miscellan +work +comput +scienc +engin +depart +univers +washington +seattl +phone +home +seattl +phone +pardi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..3dc96102 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,212 @@ +paul +franklin +home +pagepaul +franklin +home +pagei +current +graduat +student +univ +washington +inth +depart +comput +scienc +offici +myoffic +sieg +work +rapid +project +us +first +year +student +thesumm +second +year +someon +express +concern +aboutthi +character +usual +somewher +north +seattl +iliv +school +anoth +pictur +best +oneof +dai +around +scan +better +on +norwegian +poem +likea +collect +fortun +receiv +friend +localchines +restaur +mundan +stuffi +hope +folk +might +find +stuff +us +hotlink +pagesstuff +maintainmi +schedul +rememb +updat +contact +love +travel +necessarili +tell +everyon +hire +mewher +come +high +school +diploma +live +high +school +inmorgan +hill +taught +prolog +first +time +graduat +comput +scienc +engin +fromuc +davi +taught +prolog +second +time +andy +taught +prolog +glad +ididn +year +univers +bergen +ialso +research +professor +electr +andcomput +engin +depart +stuffwhil +davi +partner +variou +relatedact +tend +matt +chri +jame +evengot +togeth +recent +us +chri +marriag +joann +anexcus +chri +brother +also +made +itin +photo +throughout +undergradu +year +kept +bike +never +davi +flat +year +exchang +student +univers +ofbergen +bike +hillier +longer +rout +returnedto +davi +took +rollerblad +sinc +bike +around +town +wasnow +easi +bike +drop +year +work +hewlettpackard +return +vengeanc +move +toseattl +done +annual +seattl +portland +bike +ride +intwo +dai +year +inseason +march +april +june +rest +year +justcommut +bike +lot +danc +particularli +lindyhop +know +everi +html +document +header +linethat +look +someth +like +doctyp +html +public +ietf +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..42a93aa1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,115 @@ +frdric +pighin +pighin +washington +comput +scienc +lcommun +wonder +citi +tourist +quarter +ta +taught +anna +karlin +gui +found +much +often +dani +qual +report +rest +like +british +movi +monti +python +swim +nick +cave +corto +maltes +italian +comic +cat +pari +berlin +venis +simpson +mpeg +rain +surpris +like +traditionn +french +marin +song +collect +otherwis +work +graphic +anna +karlin +supervis +although +formerli +studi +systemher +name +paper +implement +global +memori +manag +workstat +cluster +michael +feelei +william +morgan +freder +pighin +anna +karlin +henri +levi +chandramohan +thekkath +proceed +symposium +oper +system +principl +decemb +postscript +live +action +breath +take +pictur +door +refresh +everi +minut +lucki +might +even +look +darren +juan +note +might +dark +live +action +pictur +squar +refresh +everi +five +minut +note +might +rain diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..19efbf7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,17 @@ +ruth +anderson +home +page +ruth +anderson +washington +comput +scienc +engin +depart +univers +washington +seattl +wxyc +map +brother diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..a39b01cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,18 @@ +home +home +josh +home +page +comput +scienc +engin +depart +univers +washington +seattl +redston +washington +joshua +redston +redston +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..820dd5f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,188 @@ +jakobovit +home +page +jakobovit +washington +comput +scienc +engin +depart +univers +washington +seattl +usai +get +comput +scienc +departmentof +univers +ofwashington +wonder +citi +seattl +alwai +rain +thisup +date +weatherreport +sneak +peek +live +imag +video +camera +mount +outsid +camera +point +beauti +drumhel +fountain +clear +youcan +catch +glimps +rainier +glori +probabl +cleardai +nice +color +pictur +research +develop +base +repositori +manag +program +toolkit +build +multi +media +consol +construct +imag +databas +part +astructur +inform +framework +brain +map +build +knowledg +base +support +digit +anatomist +line +interact +atla +human +bodi +implement +databaseenviron +vision +research +local +expert +persistentprogram +languag +interest +els +proud +creator +internetracquetbal +ladder +taught +advanc +program +extens +wrote +perl +script +manag +rotisseriebasebal +leagu +stand +updat +daili +stat +fromusa +todai +rais +happi +famili +africancichlid +visit +home +town +honolulu +everi +chanc +camp +magic +kalalau +vallei +movi +gambl +stock +market +darn +good +fantasi +footbal +team +newslet +would +javafamili +link +mydad +leon +jame +professor +psycholog +univers +hawaii +whoi +write +book +traffic +psycholog +foster +live +onlin +polem +emanuel +swedenborg +step +dian +nahl +professor +librari +inform +scienc +whoprovid +great +index +onlin +librari +databas +judi +jakobovit +realtor +hawaii +uncl +eddi +jakobovit +run +site +bioscienc +profession +bookmarksif +java +click +drag +word +make +poem diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..66783e56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight +welcom +galleri +twenti +photograph +five +head +robert +grimm +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..ebb13364 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,174 @@ +romer +romer +depart +comput +scienc +engin +univers +washington +seattl +home +offic +romer +washington +eduoffic +chateau +sieg +research +interestsi +research +oper +system +supportfor +high +perform +memori +systemswith +realli +smart +peopl +likebrian +bershad +brad +chen +alan +eustac +anna +karlin +denni +wayn +ohlrich +andwayn +wong +three +recent +paper +subject +reduc +memori +overhead +us +onlinesuperpag +promot +romer +ohlrich +karlin +bershad +isca +dynam +page +map +polici +cach +conflict +resolutionon +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +romer +chen +asplo +addit +friend +studi +perform +ofinterpret +learn +theproject +rockyhom +page +also +wrote +paper +togeth +structur +perform +interpret +romer +voelker +wolman +wong +baer +bershad +levi +asplo +appear +abstract +postscript +bibliographi +lobo +read +listrandom +stuffa +hous +comput +scientist +rai +limb +romer +knee +arthroscop +surgeri +mark +hill +wrist +friend +dylansaid +hair +couldn +flowbe +said +could +beingexperiment +scientist +conduct +experi +judg +result +attend +isca +travel +europ +took +somepictur +romer +eatsomeon +els +food +accompani +sincer +ration +forexampl +romer +lunch +thought +leftth +countri +would +didn +origin +unknown +edward +tuft +tip +public +speak +father +edit +american +journal +physic +place +ticker +symbol diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..abb89de2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,281 @@ +memori +system +research +univers +washingtonmemori +system +researchdepart +comput +scienc +engin +univers +washington +seattl +welcom +home +page +memori +system +research +descript +research +group +investig +techniqu +operatingsystem +improv +memori +system +perform +work +sharesth +follow +featur +reli +combin +simpl +hardwar +support +oper +system +modif +monitor +dynam +behavior +applic +monitor +mechan +incur +small +overhead +runtim +inform +collect +us +identifi +sourc +memori +system +delai +cach +miss +miss +identifi +resolv +bottleneck +overhead +monitor +mechan +also +significantli +improv +overal +system +perform +recent +project +explor +polici +monitorappl +memori +refer +pattern +order +identifi +resolvetlb +perform +problem +poor +perform +result +tlbi +small +cover +current +applic +work +severalmodern +architectur +support +superpag +page +whose +size +amultipl +system +base +page +size +system +tlbperform +improv +us +larger +page +cost +ofwast +memori +intern +fragment +simul +sever +polici +adapt +page +size +dynam +todiffer +region +applic +address +space +constructingsuperpag +copi +compon +page +contigu +region +ofmemori +develop +polici +monitor +miss +balancesth +potenti +benefit +superpag +reduct +futur +tlbmiss +cost +construct +superpag +memorycopi +construct +superpag +misspattern +warrant +polici +attain +perform +largepag +without +intern +fragment +detail +project +paper +reduc +memori +overhead +us +onlin +superpag +promot +isca +appear +look +someon +implement +algorithm +would +makea +good +qual +master +project +project +descript +peoplefaculti +brian +bershad +bershad +washington +anna +karlin +karlin +washington +current +student +denni +dlee +washington +wayn +ohlrich +ohlrich +washington +romer +romer +washington +wayn +wong +waynew +washington +paper +reduc +memori +overhead +us +onlin +superpag +promot +romer +ohlrich +karlin +bershad +isca +appear +dynam +page +map +polici +cach +conflict +resolut +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +map +cach +bershad +romer +chen +asplo +comparison +memori +perform +mip +alpha +wong +qual +project +report +univers +washington +instruct +cach +effect +differ +code +reorder +algorithm +qual +project +report +univers +washington +memori +system +bibliographi +romer +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..67a9b92e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,149 @@ +project +rocki +architectur +perform +interpret +languag +project +rocki +architectur +perform +interpret +languag +project +descript +interpret +languag +becom +increasingli +popular +last +sever +year +part +demand +portabl +safeti +eas +project +examin +perform +interpret +languag +environ +sever +perspect +interpret +strategi +implement +processor +architectur +util +basi +studi +collect +benchmark +microbenchmark +implement +sever +interpret +languag +perl +java +mipsi +us +variou +instrument +trace +techniqu +evalu +perform +characterist +benchmark +order +gain +insight +similar +differ +languag +execut +environ +peoplefaculti +jean +loup +baer +baer +washington +brian +bershad +bershad +washington +henri +levi +levi +washington +student +denni +dlee +washington +romer +romer +washington +geoff +voelker +voelker +washington +alec +wolman +wolman +washington +wayn +wong +waynew +washington +papersrom +voelker +wolman +wong +baer +bershad +levi +structur +perform +interpret +asplo +appear +abstractpostscriptjava +measur +xjava +sourc +file +benchmarkstoolsto +collect +inform +perform +applic +vebeen +build +binari +rewrit +tool +call +etch +etch +yetpublicli +avail +read +etchhom +page +intern +documentationproject +intern +document +avail +peopl +last +updat +juli +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..fa5d9009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,125 @@ +richard +roger +home +page +richard +rogersrrog +washington +educomput +scienc +engin +departmentunivers +washington +seattl +usaoffic +chateau +sieg +phone +intellig +system +laboratri +phone +research +develop +system +softwar +systol +cellular +arrai +machin +scam +massiv +parallel +imag +process +comput +softwar +includ +compil +basic +imag +process +morpholog +librari +simul +obtain +softwar +paper +scam +current +work +document +layout +extract +intellig +system +also +help +produc +document +groundtruth +databas +optic +charact +recognit +commun +scienc +camp +director +comput +facil +northwest +center +environment +educ +ncee +offer +summer +scienc +camp +student +ag +washington +beauti +juan +island +also +work +scienc +splash +program +seattl +univers +splash +year +long +nation +scienc +foundat +fund +scienc +program +grade +minor +girl +seattl +area +interest +corn +snake +jessica +squishi +order +increas +length +kuow +nation +public +radio +stationi +bake +best +pecan +seattlelast +modifi +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..f3006a7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,22 @@ +mike +home +page +mike +salisburysalisbur +washington +comput +scienc +engin +depart +univers +washington +seattl +usaoffic +chateau +sieg +lifehistori +school +home +friend +vitacool +stuff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..935efcc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,307 @@ +stefan +savag +stefan +savag +savag +washington +work +comput +scienc +engin +depart +univers +washington +seattl +home +seattl +sampl +rich +post +industri +cultur +modern +pittsburghfor +year +caught +ride +bershad +migrat +mnow +gradual +student +first +rank +strongbackground +centuri +american +histori +provid +witha +firm +irrelev +platform +trash +talk +peer +fool +tocqeuvil +statement +american +find +tiresom +inconveni +exercisepolit +right +distract +industri +quit +similar +tocurr +microprocessor +architectur +trend +favor +need +ofappl +code +oper +system +work +brian +bershad +rest +merri +band +onan +oper +system +project +call +spin +projectsspinspin +extens +oper +system +omnifemtokernel +whichsupport +dynam +adapt +system +interfac +andimplement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +thing +writingspin +paper +extens +safeti +perform +spin +oper +system +proceed +symposium +oper +system +principl +sosp +copper +mountain +decemb +slide +talk +languag +support +extens +oper +system +proceed +first +workshop +compil +support +system +softwar +wcsss +tucson +write +oper +system +modula +proceed +first +workshop +compil +support +system +softwar +wcsss +tucson +protect +softwar +issu +proceed +fifth +workshop +topic +oper +system +hoto +orca +island +issu +design +extens +oper +system +proceed +first +usenix +symposium +oper +system +design +implement +osdi +monterei +novemb +panel +abstract +longer +unpublish +version +paper +spin +extens +microkernel +applic +specif +oper +system +servic +proceed +sixth +sigop +european +workshop +match +oper +system +applic +need +version +appear +oper +system +review +januari +spin +extens +microkernel +applic +specif +oper +system +servic +univers +washington +tech +report +march +afraid +paper +afraid +frequent +redund +arrai +independ +disk +proceed +winter +usenix +technic +confer +diego +januari +best +student +paper +slide +talk +reserv +paper +processor +capac +reserv +oper +system +support +multimedia +applic +proceed +first +ieee +intern +confer +multimedia +comput +system +boston +processor +capac +reserv +abstract +manag +processor +usag +proceed +fourth +workshop +workstat +oper +system +wwo +napa +octob +processor +capac +reserv +multimedia +oper +system +carnegi +mellon +tech +report +real +time +mach +paper +real +time +mach +timer +export +time +user +proceed +third +usenix +mach +symposium +santa +april +slide +talk +interest +music +hikingthi +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..fa5feed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,12 @@ +sean +sandi +sean +david +sandi +washington +comput +scienc +washington +last +revis +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..7bec1b22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,35 @@ +richard +segal +home +page +richard +segaldepart +comput +scienc +engin +univers +washingtonbox +seattl +segal +washington +person +biographi +better +half +famili +pictur +research +overview +brute +internet +softbot +public +curriculum +vita +postscript +amus +archeri +bicycl +racquetbal +ski +softbal diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..a0fb8e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,179 @@ +stefan +bergstefan +berg +work +home +sieg +hall +univers +washington +seattl +univers +seattl +phone +phone +email +sgberg +washington +content +address +inform +past +project +activ +current +project +activ +interest +page +finger +inform +stefan +born +cologn +germani +spring +complet +mittler +reif +thgrade +schillergymnasium +cologn +come +unit +statesto +receiv +high +school +diploma +bloomington +high +school +north +indiana +stefan +receiv +bachelor +scienc +honor +distinctionin +field +comput +scienc +fromindiana +univers +momenth +work +toward +univers +washington +expect +complet +date +sometim +thiscenturi +past +project +activ +project +implement +reduct +machin +teach +assist +project +comparison +hardwar +softwar +solut +fals +share +teach +assist +project +studi +linear +time +sort +algorithm +teach +assist +current +project +activ +qual +project +comparison +hardwar +softwar +solut +fals +share +interest +page +pictur +peopl +around +raft +pictur +bookmark +excit +squar +univers +washington +moment +weather +seattl +print +yourselfsometh +crazi +didn +even +come +particularsolut +implement +done +sall +line +shouldn +contain +trail +carriag +return +byte +compil +without +warn +program +print +exact +sourc +code +itin +fewer +byte +like +putchar +char +els +els +char +main +char +putchar +char +els +els +char +main +char +printf +printf +printf +printf +stefan +resum +avail +inpostscript +andtex +format diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..0d895f78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,67 @@ +ward +shadegreet +salut +third +year +grad +student +hereat +dubcs +interact +renderingof +complex +scene +current +thing +thing +follow +project +link +walkthruproject +amonglot +pictur +anim +find +siggraph +paperdescrib +recent +work +shortcut +click +thepictur +island +lower +left +corner +page +lot +interest +work +go +mani +differ +aspectsof +comput +graphic +thegraph +imag +laboratori +get +done +contact +info +daili +schedul +travel +plan +project +public +pictur +page +look +scrunch +make +browser +least +pixel +wide diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..ddcff9a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,37 @@ +shun +leungshun +leung +student +depart +comput +scienc +andengin +univers +ofwashington +work +prof +johnzahorjan +pointer +research +research +summari +public +curriculum +vita +upon +request +shun +leung +depart +comput +scienc +engin +univers +washington +seattl +email +shuntak +washington +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..627f578f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,87 @@ +welcom +shuichi +home +page +shuichi +koga +skoga +washington +graduat +studentcomput +scienc +engin +departmentunivers +washington +name +shuichi +koga +haven +notic +bynow +start +graduat +studi +univers +ofwashington +haven +quit +figur +myqual +much +less +dissert +graduat +universityof +virginia +degre +mathemat +alsoheavili +involv +asian +studi +foreign +relat +andgovern +depart +origin +slate +also +degreein +asian +studi +also +heavili +involv +user +interfac +groupand +comput +sciencedepart +work +project +call +alic +sinc +anywai +take +look +pictur +smaller +shuichi +mean +finger +info +current +schedul +neat +hypertext +link +hunt +destroi +bug +shuichi +koga +skoga +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..75ac1b97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,59 @@ +soha +hassoun +home +page +soha +washington +soha +hassounit +year +graduat +school +univers +washington +comput +scienc +engin +dept +circuit +design +whoturn +develop +current +work +onarchitectur +retim +professor +carlebel +weekli +schedul +busi +current +previou +research +current +educ +experi +public +patent +chao +group +dept +profession +interest +vlsi +site +inform +littl +deede +photo +galleri +address +comput +scienc +engin +depart +univers +washington +seattl +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..01722dcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,136 @@ +sujai +parekh +home +page +sujai +parekh +work +depart +comput +scienc +engin +sieg +hall +chateau +guggenheim +annex +univers +washington +seattl +home +seattl +quarter +class +seminar +system +seminar +french +french +research +simultaneousmultithread +particular +issu +relat +tomultithread +architectur +softbot +project +evalu +simon +softbot +emploi +procedur +search +controlsystem +control +action +report +construct +design +remov +patio +convent +workspac +interest +fund +project +pleas +contact +sport +spud +soccer +right +sort +bookmark +page +like +keep +track +interest +academ +cognit +scienc +distribut +parallel +system +psycholog +philosophi +tenni +soccer +sail +squash +volleybal +ballroom +danc +food +cornel +comput +scienc +oracl +corpor +stottler +henk +associ +done +resum +random +person +info +favorit +food +oondhiu +mango +phad +thai +kung +chicken +favorit +beverag +screwdriv +scotch +long +island +ic +favorit +danc +tango +swing +east +west +coast +salsa +favorit +rock +music +dire +strait +pink +floyd +phil +collin +genesi +peter +gabriel +petti +sparekh +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..9ccc221f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,68 @@ +erik +selberg +improv +home +page +name +erik +speed +washington +academ +speed +almost +live +fishcam +address +lara +lewi +memorialhappi +link +peopl +sport +food +drink +cultur +mari +kay +offic +friend +famili +fish +disc +drive +ultim +pasti +power +ur +machin +mountain +bike +spud +softbal +ski +utah +raquetbal +pro +colleg +wedgwood +hous +diet +pepper +salt +lake +roast +compani +bean +bagel +speed +racer +star +war +tini +toon +pinki +brain +phantom +babylon +comic +erik diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..e1bc7a16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,151 @@ +home +page +sung +choiwelcom +thehomepag +ofsung +eunchoi +myschoollifemi +primari +research +interest +compil +parallel +program +languag +involv +zplcompil +project +univers +washington +late +beenspend +time +think +optim +communicationgener +us +architechtur +independ +communicationlibrari +ironman +addit +experi +simul +data +parallel +programson +superscalar +processor +goal +work +improv +nodeperform +come +gener +parallel +machin +alsobeen +seen +hang +chaosrout +group +work +simul +includ +graphic +front +visual +thatexperi +current +implement +anoth +router +simul +inzpl +final +also +littl +astronomi +quarter +ta +enjoi +watch +movi +mostli +comfort +home +like +yeah +vegetarian +sinc +myjunior +year +colleg +drink +dinner +would +samewithout +good +wine +result +must +exercis +quit +plai +twosoccerteam +cousin +scrub +divis +cooper +recdivis +last +season +scrub +came +second +place +andcoop +divis +unfortun +recent +sacrifiedmi +left +knee +game +plai +soccer +take +usualstep +aerobicsclass +instead +find +try +swim +weight +trainingclass +like +good +peopl +world +read +book +take +abit +shakespear +watch +publictelevis +listen +classicalmus +myotherlif +sung +choi +sungeun +washington +depart +comput +scienc +engin +univers +washington +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..295eb333 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,66 @@ +nguyen +nguyen +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +washington +work +world +research +interest +includ +oper +system +distribut +andparallel +system +network +secur +current +help +frommi +advisor +johnzahorjan +build +system +support +run +soft +real +timeappl +visual +partial +idl +workstat +innow +recent +complet +studi +runtim +measur +ofappl +characterist +us +runtim +system +tominim +applic +execut +time +uniprogram +multiprocessorsenviron +well +system +schedul +make +goodglob +schedul +decis +multiprogram +multiprocessorsenviron +cvpublic +worldvietnameseresourc +netcyclingplayground diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..1b51a436 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +tian +homepageyour +browser +rusti +yellow +turkei +frame +even +part +html +standard +click +frame +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..3cbc514d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,217 @@ +ashutosh +tiwari +ashutosh +tiwari +tiwari +washington +depart +comput +scienc +engin +univers +washington +seattl +mostli +full +time +fourth +year +graduat +student +work +area +singl +address +space +oper +system +opal +persist +object +system +object +orient +databas +applic +workload +measur +oper +system +support +databas +past +work +infrastructur +user +interfac +distribut +object +system +spare +time +work +comput +scienc +group +research +technolog +organ +boe +comput +servic +oopsla +workshop +build +larg +distribut +softwar +system +us +object +oopsla +workshop +object +larg +distribut +persist +softwar +system +projectsopali +work +distribut +opal +opal +oper +system +project +address +issu +opportun +involv +creat +singl +global +address +space +across +multipl +user +machin +jeff +chase +primari +architect +opal +hank +levi +advisor +work +close +opal +also +advisor +applic +workload +measur +also +work +measur +character +behavior +persist +object +applic +gener +techniqu +paper +work +area +distribut +object +system +work +sever +distribut +object +system +profession +career +thisexperi +basi +oopsla +workshop +build +larg +distribut +softwar +system +us +object +organ +follow +oopsla +workshop +object +larg +distrbut +persist +softwar +system +public +us +virtual +address +object +refer +chase +levi +tiwari +proc +intern +workshop +object +orient +oper +system +septemb +except +handl +parallel +distribut +environ +tiwari +levi +ecoop +workshop +except +handl +juli +build +larg +distribut +system +us +object +tiwari +bosch +addendum +proceed +oopsla +oop +messeng +octob +evalu +system +applic +benchmark +tiwari +narasayya +levi +oopsla +workshop +object +databas +behavior +benchmark +perform +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..530c3339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,103 @@ +tessa +tessa +anoth +dead +inform +superhighwai +second +yeargradu +student +comput +scienc +univers +washington +research +interest +includ +sort +relatedgoodi +current +work +clio +system +search +andbrows +person +histori +avail +current +seek +gain +employ +myresum +onlin +curiou +kittyi +honor +share +apart +gambit +siames +cat +great +memor +inform +found +therear +pictur +page +tofind +appar +alsor +interest +scotland +classesi +still +work +qual +quarter +take +last +ofeight +class +fulfil +breadth +requir +digit +system +seminarlinux +gameseverybodi +plai +game +maintain +linux +gametom +commit +advanc +linux +pretti +coolgam +platform +also +first +attempt +java +program +simpl +maze +applet +also +java +linux +sleepingi +known +frequent +seattl +area +bookstor +also +knit +crochet +copyright +tlau +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..f03065fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,55 @@ +martin +tompa +martin +tompa +depart +comput +scienc +engin +univers +washington +seattl +phone +receptionist +lectur +note +articl +comput +trajectori +thelma +louis +recent +holidai +moon +pearl +among +wash +oyster +collabor +surrealist +electron +propheci +build +across +pierc +lane +carol +martin +photograph +photo +courtesi +health +scienc +center +educ +resourc +provid +mani +imag +univers +washington +martin +tompa +finger +tompa +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..e424c281 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,146 @@ +traci +kimbrel +held +prison +traci +kimbrel +held +prison +traci +kimbrel +held +prison +univers +washingtonsinc +without +charg +trial +move +year +toanoth +seattl +area +prison +inmat +forc +tomanufactur +airplan +escap +institut +wasrecaptur +return +univers +washington +help +hisplight +rescu +imprison +list +thing +done +curriculum +vita +imprison +detail +statement +ofwhat +promis +histori +goal +free +captor +depart +comput +scienc +engin +univers +washington +seattl +tracyk +washington +eduher +captor +forc +trace +driven +comparison +algorithm +parallel +prefetch +cachingtraci +kimbrel +andrew +tomkin +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +appear +sigop +usenix +associ +symposium +onoper +system +design +implemen +near +optim +parallel +prefetch +cachingtraci +kimbrel +anna +karlin +appear +ieeesymposium +foundat +comput +scienc +longer +version +integr +parallel +prefetch +cach +page +extend +abstract +traci +kimbrel +edward +felten +anna +karlin +proceed +sigmetr +confer +measurementand +model +comput +system +probabilist +algorithm +verifi +matrix +product +usingo +squar +time +base +random +bit +traci +kimbrel +rakesh +kumar +sinha +inform +process +letter diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..d4347e74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,63 @@ +travi +craig +home +page +travi +craig +travi +washington +comput +scienc +engin +depart +univers +washington +seattl +research +interest +mechan +predict +real +time +system +cach +restor +queu +spin +lock +arctic +submarin +current +cours +take +quarter +dissert +work +real +time +system +time +consum +side +project +work +half +time +esca +corpor +help +keep +volvo +run +press +latest +motor +pool +statu +understand +comput +scienc +travi +craig +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..d159aac0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,16 @@ +dean +tullsen +home +page +dean +tullsen +biograph +inform +research +interest +bibliographi +home +page +download +resumemi +hobbi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..b7f99de9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,325 @@ +michael +vanhilstmichael +vanhilstvanhilst +washington +edumvh +harvard +eduperson +research +comput +scienc +engin +depart +univers +washington +seattl +usaclick +send +email +messag +mike +vanhilst +personalmik +start +year +graduat +student +univers +washington +hopefulli +littl +luck +finish +around +theend +winter +quarter +immedi +prior +come +udub +mike +work +contractor +atibm +research +wrote +motif +widget +user +unterfac +sdata +explor +mike +start +programm +fix +maintainingcomput +hardwar +smithsonian +astrophys +observatori +part +smithsonian +locat +within +harvard +learn +debug +softwar +mike +could +convinc +programm +hardwar +realli +work +correctli +mike +stai +smithsonian +year +time +wrote +program +call +saoimagewhich +us +lot +astronom +look +imag +saoimag +part +gnudistribut +mike +would +like +thank +bill +wyatt +eric +mandel +schwarz +doug +minkfor +start +guid +continu +project +mention +countless +other +contribut +mike +took +year +work +group +seismologistsin +pari +franc +data +acquisit +calibr +analysi +truli +wonder +time +pari +wife +angela +french +languag +class +theallianc +francais +angela +come +pari +year +nativ +colombiain +south +america +summer +mike +wrote +front +studentsbrows +univers +time +schedul +data +base +uwin +work +talent +staff +comput +commun +folk +brought +pine +special +thank +bill +shirei +design +traci +stenvik +wrote +uwin +screen +librari +machin +uwin +work +time +schedul +mike +also +taught +begin +program +extens +motiv +group +frommicrosoft +product +support +sacrif +summer +learn +recent +mike +present +paper +theintern +symposium +object +technolog +advanc +softwar +isota +confer +object +orient +program +system +languag +applic +oopsla +theacm +sigsoft +symposium +foundat +softwar +engin +also +present +poster +oopsla +made +present +subject +workshop +oopsla +oopsla +particip +doctor +symposium +oopsla +particip +demo +uist +thank +steve +earlier +life +mike +earn +degre +inarchitectur +wooden +kind +citi +planningfrom +mitand +work +commun +develop +director +forth +citi +grinnel +iowa +thing +work +differ +skill +visualdesign +problem +solv +continu +valu +still +get +talk +chri +alexand +seattl +mike +activ +student +chapter +washington +softwar +associ +improv +ti +student +larg +small +softwar +compani +area +enjoi +hike +cross +countri +ski +sail +andkayak +also +enjoi +swim +lake +bronson +recent +mike +free +time +taken +marco +harold +sebastien +hilst +born +mike +post +pictur +soon +locat +anoth +photo +scanner +visit +sinc +novemb +michael +vanhilst +last +modifi +fridai +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..2381e683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,122 @@ +hello +vassilylong +live +hello +start +realli +person +best +linki +come +thu +http +washington +home +vass +us +document +also +shortest +write +young +collect +quit +link +download +fewfil +expand +collect +sinc +thisstuff +select +link +quotesrussian +home +pagesvari +linksguid +html +formsoth +link +home +pageuw +home +pagencsa +mosaic +home +pagerec +join +cecilproject +cecil +cool +pure +object +orient +languag +andvortex +even +cooler +compil +infrastructur +befast +mayb +paper +written +member +staff +design +implement +themvi +system +assist +access +ourdepartment +room +visitor +databas +recent +beenupgrad +staff +peopl +qual +project +expand +thezpl +languageto +handl +irregular +data +structur +repres +graph +anddynam +repartit +data +graph +arrai +myqual +writeup +page +short +overview +check +theslidesfrom +present +page +slide +also +collect +link +toresourc +relat +project +vass +washington +eduobject +mirror +closer +appear +pastor +vybrasyvalsya +okna +pyatyi +deystvov diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..1543c597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,77 @@ +virgil +bourassa +home +page +uwvirgil +evan +bourassavirgil +bourassa +virgil +washington +student +depart +comput +scienc +engin +theunivers +washington +seattl +washington +research +interestsinclud +comput +oper +system +architectur +join +boeingin +work +scientist +comput +scienceorgan +inform +support +servic +divis +bellevu +washington +receiv +electr +engin +arizonast +univers +temp +arizona +electricalengin +univers +washington +seattl +washington +comput +scienc +engin +theunivers +washington +accesswhat +work +interest +expertis +resum +patent +invent +public +present +profession +histori +educ +achiev +recommend +letter +statusoccasion +updat +last +modifi +virgil +bourassa +virgil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..aa886126 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,300 @@ +vivek +ratan +home +page +vivek +ratan +particular +graduat +student +comput +scienc +depart +univers +washington +current +academ +leav +work +research +scientistat +bellcor +morristown +researchwork +bellcor +interest +distribut +comput +issu +distribut +system +applic +system +develop +high +avail +current +work +distribut +system +group +bellcor +high +avail +distributedsoftwar +system +simpli +highli +avail +system +continu +presenc +failur +group +develop +toolset +anatida +provid +high +avail +mechan +distribut +applic +adher +corba +standard +also +look +activ +replic +scheme +high +avail +indc +applic +recent +effort +focuss +enhanc +avail +fault +toler +cell +directori +servic +detail +project +foundher +also +interest +high +avail +issu +integrationof +distribut +technolog +server +univers +washington +work +murphi +project +softwar +safeti +methodolog +softwar +safeti +group +head +bydr +nanci +leveson +detail +safeti +research +found +safeti +page +much +work +involv +look +automat +fault +tree +gener +fromrequir +specif +written +rsml +well +work +rsml +languag +simul +public +list +recent +public +found +person +inform +born +brought +india +last +eight +year +undergradu +attend +angelo +state +univers +angelo +wesleyan +univers +middletown +receiv +physic +math +comput +scienc +right +came +univers +washington +seattl +purus +graduat +studi +comput +scienc +like +plai +tenni +whenev +squash +lesserext +racquetbal +suffic +ardent +folow +game +cricket +particip +ultra +cricket +plai +email +cricket +simul +game +mani +year +also +follow +exploit +superson +marin +cowboi +last +year +half +taken +keen +interest +learn +ballroom +danc +waltz +foxtrot +chacha +rhumba +tango +swing +west +coast +pleas +occasion +dabbl +mambo +area +best +place +learn +danc +seattl +center +us +band +session +everi +saturdai +involv +model +unit +nation +chapter +model +unit +nation +intern +educ +organ +simul +work +confer +held +throughout +year +current +topic +restructur +reform +part +like +secur +council +ecosoc +world +bank +rapidpopul +growth +nuclear +prolifer +home +page +chapter +interest +read +poetri +mirza +ghalib +centuryindian +poet +also +interest +english +literatur +especi +romant +victorian +period +link +obligatori +collect +sitesthat +tend +visit +often +depart +comput +scienc +engin +univers +washington +seattl +vivek +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..263ab550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,37 @@ +geoff +skywhoi +graduat +student +univers +washington +inseattl +whati +master +thesi +wireless +mobil +comput +design +andbuild +system +call +mobisa +current +avoid +settl +thesi +topic +wherechateau +guggenheim +annex +univers +washingtonseattl +washington +look +emac +window +window +geoff +voelker +voelker +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..a5a8a009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,64 @@ +home +wayn +home +comput +scienc +engin +depart +univers +washington +seattl +waynew +washington +stuff +current +look +memori +system +perform +particular +differentmemori +system +organ +investig +work +beingdon +jean +loup +baer +also +look +interpret +other +denni +geoff +alec +rightnow +thing +littl +rocki +actual +earli +version +paper +thing +keep +jump +alpha +interest +place +fish +list +cool +site +howev +list +peoplewho +list +peopl +well +test +testwayn +wong +waynew +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..655d0a4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,16 @@ +william +chan +home +pagewilliam +chan +home +pagei +spend +time +hell +spare +time +hang +heaven +wchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..169b5327 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,179 @@ +daniel +weld +daniel +weld +associ +professor +comput +scienc +engineeringat +univers +ofwashington +receiv +bachelor +degre +comput +scienc +biochemistri +yale +univers +land +artifici +intellig +receiv +presidenti +young +investig +award +offic +naval +research +younginvestig +award +weld +theadvisori +board +journal +airesearch +guest +editor +comput +intellig +edit +aaai +report +role +ofintellig +system +nation +inform +infrastructur +isco +program +chair +aaai +weld +publish +book +scad +technic +paper +person +data +reach +offic +sieg +hall +phone +work +home +mail +dept +comput +scienc +engin +univers +washington +seattlewa +research +interest +weld +current +research +interest +artifici +intellig +specif +softwar +agent +plan +exampl +weld +group +support +ucpop +planner +us +almost +hundr +sitesworldwid +mani +weld +paper +avail +electron +arehi +current +favorit +repres +sens +action +middl +ground +revisit +plan +gather +inform +aaai +august +plan +base +control +softwar +agent +aip +ascal +comparison +shop +agent +world +wide +januari +softbot +base +interfac +internet +cacm +juli +anintroduct +least +commit +plan +magazin +winter +select +exhaustivelist +recreat +absent +offic +weld +foundat +cafe +allegro +stormymountain +climb +past +enjoi +travel +theworld +like +found +plai +twin +boi +adam +galen +invit +visit +galleri +pacif +northwest +desert +wilder +photograph +also +illustr +stori +morocco +weld +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..87e4d4c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,13 @@ +wendi +belluomini +wendi +belluomini +wendi +washington +graduat +master +current +work +univ +utah +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..65814fef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,67 @@ +wilson +hsiehwilson +hsiehi +postdoc +thedepart +comput +scienc +engin +theunivers +washington +inseattl +member +thespin +project +receiv +thedepart +electr +engin +comput +sciencein +theschool +engineeringatmit +work +thelaboratori +comput +scienc +advisor +werefran +kaashoekandbil +weihl +research +compil +parallel +system +myresearch +interest +interact +among +compil +programminglanguag +runtim +oper +system +architectur +select +publicationsselect +linksperson +interestswilson +hsieh +depart +comput +scienc +engin +univers +washington +seattl +offic +sieg +move +phone +numberha +chang +voic +whsieh +washington +public +keyoctob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..d50888a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,103 @@ +alec +wolman +home +page +alec +wolmanwolman +washington +eduworkcomput +scienc +engin +departmentunivers +washingtonbox +seattl +home +seattl +current +graduat +student +thecomput +scienc +departmentat +univers +washington +offic +isroom +thechateau +gradual +school +work +fordigit +equip +corp +cambridg +research +research +interest +includ +oper +system +network +architectur +current +recent +project +scalabl +network +perform +etch +binari +instrument +optim +executablesrocki +interpret +performanceon +line +paper +firewal +applic +relai +trees +wolman +summer +usenix +latenc +analysi +network +wolman +voelker +thekkath +winter +usenix +structur +perform +interpret +romer +voelker +wolman +wong +baer +bershad +levi +appear +asplo +hungri +otter +fixha +strang +idea +nervou +habit +realli +plai +guitar +wolman +link +wolman +hallwolman +diseasewolman +pressur +treat +lumber +wolman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..ffc01734 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,132 @@ +xiaohan +xiaohan +xqin +washington +comput +scienc +engin +depart +univers +washington +seattl +offic +sieg +phone +year +graduat +student +work +jean +loup +baer +research +interest +includ +comput +architectur +parallel +distribut +system +perform +evalu +method +model +simul +short +term +goal +school +soon +possibl +paper +perform +evalu +cluster +base +architectur +baer +submit +confer +perform +explicit +communicationprimit +cach +coher +multiprocessor +system +baer +appear +proceed +hpca +compar +studi +conserv +optimist +trace +driven +simul +baer +award +paper +simul +symposium +page +optimist +trace +driven +simul +baer +tech +report +dept +comput +scienc +engin +univ +washington +parallel +trace +driven +simul +implement +perform +baer +proceed +intern +confer +parallel +process +page +graph +toolfor +monitor +visual +basedmultiprocessor +perform +zhang +nalluri +journal +parallel +distribut +comput +june +page +perform +predict +evalu +parallel +processingon +numa +multiprocessor +zhang +ieee +tran +softwar +engin +page +interest +stuff +photo +chinaread +chinesesearch +engin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..653bc703 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,82 @@ +yasushi +saitoyasushi +saito +second +year +graduat +student +atdepart +comput +scienc +engin +univers +washington +seattl +current +workingwith +brian +bershad +thespin +project +address +andperson +info +meta +link +metacrawl +yahoo +desktop +refer +index +alta +vista +lyco +archi +tech +index +research +link +spin +intern +document +modula +info +time +schedul +survei +oper +system +transact +servic +spin +qual +project +sightse +japanes +link +random +info +javascript +apprentic +page +us +linux +connect +gatewai +japan +perl +patch +touch +type +trainer +dvorak +lesson +text +yasushi +washington +want +finger +talk +trycanva +washington +desktop diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..e19067a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,85 @@ +oren +zamir +home +pageoren +zamir +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +chateau +zamir +washington +home +seattl +zamir +washington +edui +isra +graduat +student +depart +computersci +engin +univers +washington +myundergradu +degre +physic +mathemat +hebrewunivers +jerusalem +israel +interest +field +artifici +intellig +softwareengin +current +work +line +cluster +algorithmsfor +internet +document +retriev +basic +idea +help +userwith +internet +search +result +hundr +document +worki +done +part +metacrawl +parallel +search +servic +along +orenetzioni +erik +selberg +resum +avail +pictur +thing +like +dive +sinai +jeeptour +ski +pictur +last +raft +trip +interest +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..e036f21e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,175 @@ +titl +jean +loup +baer +professorand +adjunct +professor +electr +engin +receiv +diplom +ingnieur +electricalengin +doctorat +cycl +comput +scienc +theuniversit +grenobl +franc +ucla +prior +join +univers +washington +research +engin +laboratoir +decalcul +universit +grenobl +member +digit +technologygroup +ucla +present +interest +parallel +anddistribut +process +comput +system +architectur +author +coauthor +paper +thesearea +author +textbook +comput +system +architectur +comput +scienc +press +professor +baer +serv +ieee +comput +scienc +distinguishedvisitor +nation +lectur +guggenheim +fellow +ieee +fellow +editor +journal +parallel +distribut +comput +journal +comput +languag +serv +asprogram +chairman +intern +confer +parallelprocess +program +chairman +internationalsymposium +comput +architectur +gener +chairman +ofth +internationalsymposium +comput +architectur +current +chair +sigarch +eighteen +student +complet +dissert +professorba +direct +twelv +work +industri +research +laboratoriesand +inacademia +although +year +baer +hashad +difficulti +retain +french +accent +cours +recent +research +project +look +comput +architectur +page +project +involv +cach +coher +protocol +cluster +architectur +improv +protocol +singl +system +perform +softwar +primit +cluster +appear +hpca +prefetch +uniprocessor +hardwar +also +ieee +comparisonwith +block +cach +also +asplo +prefetch +multiprocessor +isca +impact +specul +execut +cach +denni +home +page +andisca +parallel +trace +driven +simul +conserv +approach +also +icpp +optimisticapproach +comparison +also +distribut +simul diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..df7bd6e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,149 @@ +craig +chamber +craig +chamber +assist +professor +join +faculti +receiv +degre +comput +scienc +comput +scienc +stanford +chamber +research +interest +design +implementationof +advanc +program +system +incorpor +express +programminglanguag +effici +implement +support +programmingenviron +current +investig +object +orient +languagesand +lead +ceciland +vortex +project +cecil +pure +object +orient +languageserv +vehicl +investig +multi +method +static +type +modul +featur +vortex +optim +compilersystem +object +orient +languag +incorpor +intra +andinterprocedur +static +analys +profil +guid +optim +withfront +end +cecil +modula +java +previous +chamberswa +member +self +project +chamber +also +member +spinproject +spin +extens +oper +system +microkernel +whichsupport +dynam +adapt +system +interfac +implementationsund +direct +applic +control +still +maintain +systemintegr +isol +applic +spin +util +dialect +themodula +languag +pointer +safe +kernel +extens +languag +spinalso +reli +dynamiccompil +achiev +high +perform +despit +fine +grainedextens +click +herefor +inform +undergradu +graduat +level +researchproject +area +contact +informationprof +craig +chambersdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +chamber +washington +mail +requir +street +address +sieg +hall +room +last +updat +april +chamber +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..49f0d450 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,138 @@ +carl +ebel +home +page +carl +ebelingdepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +ebel +washington +offic +sieg +hall +room +carl +ebel +associ +professor +physic +wheatoncolleg +comput +scienc +southern +illinoisunivers +comput +scienc +carnegi +mellonunivers +join +carl +ebel +research +interest +fall +categori +vlsiarchitectur +comput +aid +design +digit +system +haswork +number +vlsi +project +includ +hitech +chessmachin +apex +graphic +chip +draw +spline +curv +andsurfac +triptych +field +programm +gate +arrai +current +hei +involv +chao +project +build +multicomput +routingnetwork +interest +focu +method +optim +theperform +circuit +us +level +sensit +latch +placementand +rout +algorithm +fpga +particularli +triptych +teachingspr +advanc +logic +designoffic +hour +mondai +thursdai +travel +april +fccm +napamai +burlington +chicagojun +vegasresearch +project +northwest +laboratori +integr +system +chao +router +project +triptych +high +densiti +fpga +architectur +public +journal +articl +confer +workshop +paper +graduat +student +soha +hassoun +neil +mckenzi +darren +cronquist +paul +franklin +amara +galleryelan +galleryebel +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..70a4127d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,48 @@ +steve +hanksunivers +washingtondepart +comput +scienc +design +agent +architecturesai +magazin +seriou +link +home +page +spring +uncertainti +confer +inform +uncertainti +page +group +page +link +seattl +restaur +seattl +symphoni +schedul +seattl +wine +opera +schedul +server +edita +gruberova +page +photo +carlo +maria +giulini +discographi +sumac +inform +tenni +new +hank +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..17acd0ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,82 @@ +titl +alistair +holden +professor +professor +electr +engin +origin +highland +scotland +receivedhi +degre +univers +glasgow +spent +year +withth +british +broadcast +corpor +engin +divis +graduateapprentic +spent +research +divis +spent +year +yale +edison +fellowship +degre +phddegre +univers +washington +dissert +learningin +artifici +intellig +interest +began +take +coursefrom +colin +cherri +imperi +colleg +london +thebbc +initi +comput +scienc +program +theuw +time +group +faculti +mostli +math +departmentsform +group +within +graduat +school +current +work +applic +knowledg +base +system +verif +expert +system +integr +symbol +neural +netmethodolog +speech +understand +comput +aid +design diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..a12ee410 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,178 @@ +dick +karprichard +karpprofessor +ofcomputersci +engin +andadjunct +professor +ofmolecularbiotechnologyunivers +ofwashington +karp +washington +eduaward +membershipsn +medal +scienc +babbag +prize +berkelei +univers +professor +fellow +ture +award +member +nation +academi +sciencesmemb +nation +academi +engineeringfellow +american +academi +art +sciencesfellow +american +associ +advanc +sciencedistinguish +teach +award +berkelei +academ +senat +class +chair +berkeleylanchest +prize +oper +research +societi +america +institut +manag +scienc +fulkerson +prize +american +mathemat +societi +mathemat +program +societi +john +neumann +theori +prize +oper +research +societi +america +institut +manag +scienc +faculti +research +lectur +berkelei +hermann +weyl +lectur +institut +advanc +studi +john +neumann +lectur +societi +industri +appliedmathemat +miller +research +professor +berkelei +honorari +doctor +georgetown +univers +univers +massachusett +technion +univers +pennsylvania +member +nation +advisori +board +comput +profession +forsoci +respons +presentmemb +board +governor +weizmann +institut +scienc +presentmemb +board +truste +intern +comput +scienceinstitut +presentselect +public +combinator +complex +random +turingaward +lectur +commun +construct +perfect +match +random +upfal +wigderson +combinatorica +probabilist +analysi +partit +algorithm +travel +salesman +problem +plane +mathemat +ofoper +research +theoret +improv +algorithm +effici +fornetwork +flow +problem +edmond +journal +theacm +reduc +among +combinatori +problem +complex +comput +comput +plenum +press +travel +salesman +problem +minimum +spanningtre +part +held +mathemat +program +karp +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..21f22b22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,405 @@ +lazowska +ahalf +centuri +exponentialprogress +inform +technolog +univers +washington +annualfaculti +lectur +vicepresid +gore +speech +eniac +thanniversari +celebr +congress +talk +like +georgejetson +support +polici +appropri +forfr +flintston +universityof +california +berkelei +invent +chines +cook +nathanmyhrvold +joinsedlazowska +theuwcs +faculti +trip +memori +lane +lazowska +lazowska +professor +chair +thedepart +comput +scienc +lazowska +mostlywearsti +push +paper +rack +frequent +flier +mile +graduat +student +seem +pick +onthi +mbquicktim +healso +host +lot +visitor +surpris +number +havefunnynos +lazowska +member +board +director +comput +researchassoci +member +includ +essenti +allgradu +depart +industri +research +laboratoriesin +field +chair +ofcra +govern +affair +committe +serv +nation +scienc +foundat +advisorycommitte +comput +inform +scienc +andengin +nation +research +council +scomputersci +telecommun +board +member +person +technic +advisori +board +formicrosoft +research +personnationalsemiconductor +comput +scienc +system +academicadvisori +council +board +director +ofdata +corpor +scientif +advisori +board +forcabl +hows +ventur +cascadia +fund +belong +stand +advisori +committeesfor +thedepart +eecsat +berkelei +andth +depart +comput +scienc +atstanford +univers +universityof +virginia +hongkong +univers +scienc +technolog +member +ture +award +select +committe +complet +servic +person +nation +research +councilpanel +review +multi +agencyhigh +perform +computingand +commun +program +brook +sutherland +committe +andha +recent +serv +chair +committe +examinersfor +graduat +record +examin +board +comput +scienc +test +chair +sigmetr +associ +comput +machineri +sspecial +interest +group +concern +comput +system +perform +chair +softwar +system +award +committe +program +chairof +symposium +oper +system +principl +andeditor +ieee +transact +comput +univers +washington +addit +servinga +chair +thedepart +comput +scienc +engin +lazowska +chair +univers +advisori +committe +onacadem +technolog +recent +serv +member +thecommitte +deanship +colleg +artsand +scienc +chair +review +committe +forth +program +molecular +biotechnolog +amemb +perform +review +committe +deanof +engin +select +deliv +theunivers +washington +annual +faculti +lectur +lazowska +fellowof +associationfor +comput +machineri +theinstitut +electr +andelectron +engin +seventeenph +student +studentshav +complet +degre +work +miscellan +link +integratedoverview +univers +washington +thedepart +region +also +apersuas +player +version +intend +forloc +consumpt +theimpact +research +univers +inform +technolog +perspect +uwcs +profession +master +program +persuas +playertopten +reason +major +inform +csebuild +project +abbrevi +cvcomputingresearch +drive +informationtechnolog +inform +industri +forwardmassi +goldmanreport +alleg +cseph +product +issu +flaw +data +medianyear +confer +boardstudi +doctor +program +think +ahalf +centuri +exponentialprogress +inform +technolog +univers +washington +annualfaculti +lectur +driver +inform +highwai +univers +washington +saturdayseminar +novemb +testimonyto +houseappropri +committe +concern +april +testimonyto +hous +scienc +committe +concern +hpcc +octob +vicepresid +gore +speech +eniac +thanniversari +celebr +februari +interestinghom +page +sometim +demo +purpos +odeto +execut +vice +presid +tallman +trask +departsfor +duke +univers +universityof +california +berkelei +invent +chines +cook +nathanmyhrvold +joinsedlazowska +theuwcs +faculti +trip +memori +lanelazowska +down +famili +home +pagedirect +houseshilshol +aquat +club +home +pagerec +discoveredreview +grade +grade +poetryfing +lazowska +washington +scheduleinform +seem +offic +reflector +home +page +http +washington +home +lazowska +lazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..1285e53d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,130 @@ +titl +alan +shaw +professor +graduat +bachelor +sdegre +engin +physic +univers +toronto +amast +mathemat +stanford +univers +incomput +scienc +stanford +addit +facultyappoint +univers +washington +start +hasbeen +assist +professor +comput +scienc +cornel +univers +visit +professor +fulbright +research +scholar +univers +pari +guest +professor +informat +zurich +research +associ +atth +stanford +linear +acceler +center +system +engin +theibm +corpor +current +research +interest +real +time +system +softwar +specif +method +professor +shaw +publicationsinclud +textbook +oper +system +book +softwareengin +introductori +comput +scienc +text +andan +edit +book +document +prepar +system +serv +memberof +editori +committe +member +comput +sciencescreen +committe +fulbright +award +associateeditor +journal +real +time +system +associ +editor +ieee +transact +softwar +engin +among +thing +supervis +mani +these +project +fifteen +dissert +includ +distinguish +dissert +half +former +student +academ +posit +half +work +live +professor +shaw +hobbi +includ +good +food +trumpet +hike +bike +hobbi +tenni diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..f0fb77a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,137 @@ +titl +lawrenc +snyder +professor +receiv +bachelor +sdegre +univers +iowa +mathemat +econom +andin +receiv +carnegi +mellon +univers +computersci +visit +scholar +univers +washington +join +faculti +perman +serv +onth +faculti +yale +purdu +visit +scholarat +harvard +professor +snyder +research +rang +proof +theundecid +properti +program +design +developmentof +singl +chip +cmo +microprocessor +quarter +hors +hecreat +configur +highli +parallel +chip +architectur +thepok +parallel +program +environ +inventor +chaoticrout +follow +complet +blue +chip +project +nowprincip +investig +orca +project +nwli +professor +snyder +associ +editor +journal +computerand +system +scienc +parallel +system +editor +journal +ofth +area +editor +ieee +transact +parallel +anddistribut +system +serv +nation +scienc +foundationadvisori +committe +divis +comput +research +particip +numer +nation +advisorycommitte +futur +research +direct +parallel +comput +andcomput +scienc +polici +serv +distinguish +doctoraldissert +award +select +committe +chair +program +chair +first +symposium +parallel +algorithmsand +architectur +addit +dozen +student +complet +doctor +degreesund +direct +professor +snyder +guid +numer +master +seniorproject diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..cbc74ad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,109 @@ +arun +somani +arun +somani +professor +professor +electr +engin +earn +msee +degre +electr +engin +mcgill +univers +montreal +canada +respect +prior +work +scientif +offic +govt +india +delhi +period +design +develop +anti +submarin +warfar +system +indian +navi +professor +somani +research +interest +area +offault +toler +comput +interconnect +network +comput +architectur +parallel +comput +system +parallel +algorithm +current +involv +three +major +project +high +integr +system +design +address +issu +relat +tocach +memori +design +redund +comput +system +evalu +tool +system +congest +control +fault +toler +broadband +network +develop +proteu +architectur +multiprocessor +system +autom +classif +object +base +generalizedenhanc +hypercub +reconfigur +interconnect +network +explor +coars +grain +parallel +like +cook +indian +food +hike +plai +bridg +tabl +tenni +tenni +inform +dpcnl +proteu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..634baed9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,269 @@ +steven +tanimoto +home +page +steven +tanimoto +professor +comput +scienc +engin +adjunct +professor +electricalengin +receiv +degre +fromharvard +princeton +join +theunivers +washington +faculti +year +teach +atth +univers +connecticut +visit +professorat +institut +programm +univers +pari +franc +anda +visit +scholar +linkp +univers +sweden +sinc +hasalso +serv +adjunct +member +depart +electricalengin +visit +scientist +atkob +univers +japan +think +machin +corpor +cambridg +massachusett +linkp +univers +sweden +june +june +visit +scientist +theinstitut +research +enseign +superieur +techniquesd +electroniqu +irest +univers +nant +franc +addit +assist +variou +confer +hasrec +home +page +forimag +confer +devot +imag +processingand +commun +confer +take +place +bordeaux +franc +professor +tanimoto +research +interest +includ +comput +analysi +ofimag +particularli +us +parallel +processor +educ +technolog +visual +program +artifici +intellig +currentlydirect +sponsor +project +mathemat +experi +throughimag +process +whose +object +develop +person +comput +softwarethat +motiv +grade +student +studi +mathemat +written +coauthor +paper +edit +thebook +structur +comput +vision +author +textbook +entitl +element +artifici +intellig +introductionus +lisp +publish +common +lisp +edit +accompanyingsoftwar +current +work +book +subject +ofparallel +comput +imag +process +tanimoto +organ +ieee +comput +societi +internationalworkshop +visual +languag +held +seattl +serveda +gener +chair +meet +bergen +norwai +also +serv +program +chair +intern +conferenceon +pattern +recognit +subconfer +parallel +comput +anda +program +chair +ieee +comput +societi +conferenceon +comput +vision +pattern +recognit +serv +programcommitte +numer +confer +imag +process +patternrecognit +gener +chairman +ieee +comput +societyworkshop +comput +architectur +pattern +analysi +machineintellig +current +serv +steer +committe +theieee +symposiaon +visual +languag +serv +editorialboard +journal +pattern +recognit +journal +visual +languag +comput +cvgip +imag +understand +serv +editor +chief +ieeetransact +pattern +analysi +machin +intellig +addit +research +relat +activ +tanimoto +serv +chair +colleg +engineeringeduc +polici +committe +vice +chair +colleg +council +chair +elect +fellow +ieee +outsid +comput +scienc +steve +tanimoto +enjoi +plai +jazz +andclass +piano +music diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..beaafbd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,212 @@ +titl +paul +young +professor +graduateof +antioch +colleg +receiv +hejoin +univers +washington +seventeen +year +atpurdu +univers +earli +faculti +member +inperhap +first +comput +scienc +depart +unit +state +also +nation +scienc +foundat +postdoctor +fellow +atstanford +serv +faculti +reed +colleg +serv +briefli +aschairman +comput +inform +scienc +depart +theunivers +mexico +twice +taught +visit +professorin +comput +scienc +divis +univers +california +berkelei +becam +associ +dean +research +facil +colleg +engin +research +interest +theoret +comput +scienc +emphasi +question +comput +complex +thegener +theori +algorithm +connect +mathematicallog +author +coauthor +paper +area +iscoauthor +graduat +textbook +gener +theori +algorithm +serv +three +time +program +committe +symposiumon +theori +comput +serv +executivecommitte +nomin +committe +special +interestgroup +theori +comput +sigact +also +chairmanof +program +committe +ieee +comput +societi +annualsymposium +foundat +comput +scienc +foc +hasserv +vice +chairman +chairman +comput +societi +stechnic +committe +mathemat +foundat +comput +also +serv +program +committe +chair +programcommitte +structur +complex +theori +confer +serv +nation +scienc +foundat +advisorysubcommitte +comput +scienc +serv +chairman +thiscommitte +serv +chairman +ofth +comput +research +associ +professor +young +serv +editori +board +special +issu +inform +control +annal +histori +ofcomput +current +serv +editori +board +theoret +comput +scienc +notr +dame +journal +formallog +journal +comput +system +scienc +eleven +student +complet +doctor +dissert +underprofessor +young +direct +sever +gone +dopostdoctor +work +cornel +univers +ofcalifornia +berkelei +eight +current +hold +faculti +posit +avarieti +univers +chosen +industri +employ +professor +young +leather +motorcycl +jacket +read +ratherthan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..29fadae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,86 @@ +titl +john +zahorjan +professor +graduat +frombrown +univers +receiv +univers +oftoronto +receiv +presidenti +young +investigatoraward +primari +research +interest +area +schedul +parallelsystem +runtim +support +parallel +comput +applic +supportfor +mobil +comput +current +focu +schedul +polici +support +continu +mediaappl +involv +real +time +audio +video +thegoal +provid +polici +system +interfac +allow +applic +torespond +easili +chang +system +load +activ +research +topic +includ +techniqu +runtim +parallelizationof +code +written +sequenti +languag +support +program +exhibit +bothcontrol +data +parallel +program +construct +develop +applic +intend +formobil +comput +platform +zahorjan +editori +board +ieee +transactionson +softwar +engin +comput +survei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..0b85a648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,265 @@ +mathemat +experi +imag +process +metip +mathemat +experi +imag +process +metip +project +director +steven +tanimoto +depart +comput +scienc +emphas +practic +us +mathemat +encourag +discuss +group +learn +encourag +exploratori +open +end +learn +goal +metip +project +digit +imag +process +help +meet +theseobject +particular +develop +seri +applicationsdesign +allow +student +manipul +digit +imag +choic +materi +intend +us +enrich +activ +rather +part +astandard +classroom +curriculum +teacher +plai +variou +role +withthes +activ +exampl +catalyz +student +learn +bylead +discuss +theconcept +student +explor +comput +metip +project +current +number +program +allow +student +toexplor +mathemat +imag +process +pixel +calcul +click +order +free +copi +todai +imag +warper +click +order +free +copi +todai +transform +programm +click +order +free +copi +todai +applic +develop +primarili +pentium +base +srun +microsoft +window +applic +pixel +calcul +alsoavail +appl +macintosh +volunt +mathematicsteach +particip +test +experiment +learn +materi +transcriptproject +current +design +record +keep +framework +willfacilit +storag +person +academicinform +hard +disk +floppi +own +student +list +peopl +work +themetip +project +close +relat +project +involv +studi +ofmultiplay +educ +activ +metip +project +work +tointegr +activ +idea +describ +prospect +forth +direct +distribut +imag +databas +educ +imag +process +current +project +collect +experi +user +itsxform +imag +transform +softwar +done +somethingfun +us +softwar +pleas +know +put +current +version +document +onlin +link +littl +demonstr +xform +beenput +togeth +graduat +student +took +seminar +winter +xform +program +environ +integr +witha +subset +common +lisp +offer +technic +essenti +newapproach +learn +teach +comput +program +fundamentalattract +us +approach +student +learn +program +thecomput +pursuit +creat +neat +visual +effect +digitalimag +portrai +peopl +thing +interest +successfulli +instal +softwar +would +like +discussteach +program +pleas +contact +link +relat +project +list +metip +support +part +nation +scienc +foundat +undergr +number +bricker +washington +tanimoto +washington +last +modifi +tuesdai +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..f433377b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,65 @@ +univers +washington +research +mobil +ubiquit +comput +mobil +comput +univers +washingtonher +overview +mobil +computingresearch +project +mobisa +inform +system +mobil +wireless +comput +environ +system +infrastructur +mobil +handheld +comput +task +graph +manag +applic +infrastructur +cope +resourc +variabl +paper +survei +paper +describ +fundament +challeng +field +program +methodolog +disconnect +operationdistribut +transact +mobilecomput +systemcontact +prof +brian +bershadprof +gaetano +borriellomarc +fiuczynskigeorg +formanprof +hank +levygeoff +voelkerterri +watsonprof +john +zahorjan +last +updat +forman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..fd5d0149 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,215 @@ +cecil +vortex +projectuw +cecil +vortex +projectwelcom +home +page +cecil +vortex +project +conduct +research +program +languag +design +implement +emphasi +issu +object +orient +languag +cecil +pure +object +orient +languag +intend +support +rapidconstruct +high +qualiti +extens +softwar +cecil +incorporatesmulti +method +simpl +prototyp +base +object +model +mechan +tosupport +structur +form +comput +inherit +modul +basedencapsul +flexibl +static +type +system +allowsstat +dynam +type +code +freeli +vortex +optim +compil +infrastructur +forobject +orient +high +level +languag +target +pureobject +orient +languag +like +cecil +hybrid +object +orientedlanguag +like +modula +java +vortex +current +incorporateshigh +level +optim +static +class +analysi +class +hierachyanalysi +profil +guid +receiv +class +predict +profil +guidedselect +procedur +special +intraprocedur +messag +split +automat +inlin +static +closur +analys +also +includ +acollect +standard +intraprocedur +analys +commonsubexpress +elimin +dead +assign +elimin +vortexcompil +written +entir +cecil +initi +beta +releas +cecil +vortex +system +currentlyavail +sparc +run +either +suno +suno +solari +send +mail +cecil +interest +request +messag +bodi +subscrib +like +subscrib +list +ofinterest +parti +inform +obtain +thebeta +releas +recent +finish +technic +report +describ +much +implement +research +inform +project +overview +detail +overview +project +goal +direct +postscript +version +also +avail +project +member +current +past +project +member +paper +paper +written +sampl +project +list +sampl +research +project +avail +peopl +uwcs +intern +document +project +intern +document +avail +peopl +support +page +list +sourc +support +project +relat +project +pointer +object +orient +languag +implement +projectslast +updat +august +cecil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..216f37ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,45 @@ +grail +graphic +imag +laboratorywelcom +home +page +grail +graphic +imag +laboratori +theunivers +washington +depart +comput +scienc +engin +inform +peopl +cours +research +project +public +these +softwar +data +cool +imag +neighborhood +depart +comput +scienc +engin +univers +washington +seattl +local +interest +grail +disk +usag +polici +comment +mtwong +washington +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..aebcba96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,118 @@ +chaotic +rout +project +chaotic +rout +project +comput +scienc +engin +depart +univers +washington +seattl +chao +friend +mine +dylan +chaotic +peopleal +sort +peopl +work +chaotic +rout +project +research +paper +technic +report +repositori +research +papersand +technic +report +chaoticrout +project +avail +chao +router +chip +chao +router +chip +implement +chao +rout +algorithm +hardwar +built +test +micron +cmo +redesign +micron +process +better +perform +simul +chao +router +simul +power +simul +simul +allsort +network +rout +algorithm +includ +nice +graphicalfront +standard +present +result +pcrcw +discuss +presentationof +simul +result +rout +algorithm +abl +come +upwith +guidelin +present +result +research +group +build +list +research +group +thathav +web +describ +research +rout +interconnect +network +parallel +comput +rout +commun +workshop +pcrcw +pcrcw +held +univeristi +washington +seattl +proceed +avail +univers +washington +home +page +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..ab1562c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,273 @@ +chinook +project +chinook +project +shinook +chinook +salmon +larg +salmon +oncorhynchu +tshawytscha +pacif +amer +name +tribe +warm +wind +blow +east +rocki +mountain +warm +southerli +wind +west +rocki +mountain +rare +american +sled +doga +hardwar +softwar +synthesi +toolfor +real +time +embed +system +chinook +hardwar +softwar +synthesi +cadtool +embed +system +design +control +domin +reactivesystem +time +constraint +chinook +map +behavior +descriptionto +user +target +architectur +fill +detail +neededto +build +complet +system +enabl +design +make +inform +designdecis +high +level +earli +design +cycl +rather +reiterateaft +work +level +detail +retarget +willnot +make +design +maintain +also +enabl +designerto +take +advantag +technolog +instead +ti +legacycod +chinook +current +activ +develop +gener +currentlyw +activ +work +softwar +architectur +synthesi +interprocessorcommun +synthesi +effici +accur +simul +moredetail +becom +avail +shortli +chinook +character +follow +meet +time +constraint +ratherthan +try +maxim +averag +perform +util +assumesmanu +partit +believ +issu +intricateand +sometim +even +technic +want +design +evalu +differentarchitectur +rather +forc +singl +fix +processor +asicarchitectur +synthes +softwar +architectur +rather +reli +onoff +shelf +real +time +kernel +discourag +retarget +first +version +chinook +synthesi +tool +oper +innovemb +version +shownat +design +autom +confer +diego +june +mainfeatur +includ +automat +connect +processor +peripheraldevic +gener +sequenti +code +concurr +descript +andsynthes +devic +driver +input +verilog +output +hardwarenetlist +need +connect +hardwar +compon +togeth +softwareprogram +processor +main +topic +includ +interfacingproblem +hardwar +softwar +compon +schedul +timingconstraint +partit +function +improv +version +demonstr +nato +summer +school +swcodedesign +tremezzo +itali +june +incorpor +severalmor +interfac +synthesi +techniqu +includ +memori +map +moreeffici +code +gener +simul +chinookersfacultygaetano +borriellogradu +student +chou +ross +ortegaken +hinesian +macduff +recent +selizabeth +walkupscott +hauck +henrik +hulgaardstafflarri +mcmurchielist +paperschinook +sponsorsarpa +contract +nation +scienc +foundat +grant +nation +scienc +graduat +fellowship +walkup +patricia +robert +harri +fellowship +ortega +graduat +fellowship +chou +embed +link +depart +comput +scienc +engin +universityof +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..9498ea23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,189 @@ +emerald +emerald +projectid +develop +phase +fpga +architectur +would +makeus +reliabl +map +tool +produc +accur +performanceevalu +propos +design +unfortun +given +quickproduct +time +frame +face +develop +tool +construct +isoften +postpon +mani +architectur +featur +beenfrozen +satisfi +need +fast +tool +prototyp +havedesign +emerald +power +architectur +driven +system +quickdevelop +fpga +tool +heart +emerald +provid +basicfeatur +need +fpga +system +logic +block +analysi +synthesisand +technolog +map +global +placement +partit +anddetail +placement +rout +environ +provid +aneffici +thoroughli +specifi +fpga +rout +logic +blockarchitectur +well +architectur +specif +metric +tailorplac +rout +moreov +emerald +parameter +schematicspecif +allow +architectur +variat +quickli +capturedand +evalu +emerald +public +document +contain +page +includ +byth +contribut +author +mean +ensur +time +dissemin +ofscholarli +technic +work +commerci +basi +copyright +andal +right +therein +maintain +author +copyrighthold +notwithstand +offer +work +hereelectron +understood +person +copi +thisinform +adher +term +constraint +invok +eachauthor +copyright +work +repost +without +theexplicit +permiss +copyright +holder +definit +emerald +paper +darren +cronquist +larri +mcmurchi +emerald +architectur +driven +tool +compil +fpga +appear +proceed +sigda +fourth +intern +symposium +field +programm +gate +arrai +februari +router +us +emeraldlarri +mcmurchi +carl +ebel +pathfind +negoti +basedperform +driven +router +fpga +proceed +third +intern +symposium +field +programm +gate +arraysaid +design +februari +research +darren +cronquist +carl +ebel +larri +mcmurchi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..7deb65da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,220 @@ +northwest +laboratori +integr +system +northwest +laboratori +integr +system +depart +comput +scienc +engin +univers +washington +seattl +usath +depart +comput +scienc +engin +univers +washington +engag +larg +scale +integr +vlsi +comput +aid +design +research +develop +educ +sinc +late +todai +northwest +laboratori +integr +system +focu +wide +varieti +vlsi +architectur +embed +sytem +research +current +research +project +asynchron +circuit +verificationtim +separ +event +specif +synthesi +verif +time +asynchron +circuit +asynchron +circuit +survei +current +asynchron +design +methodolog +well +first +fpga +asynchron +circuit +fpga +rapid +prototypingtriptych +montag +fpga +architectur +develop +triptych +montag +fpga +architectur +architectur +improv +densiti +current +commerci +fpga +multi +fpga +system +rapid +prototyp +develop +springbok +rapid +prototyp +system +board +level +design +well +partit +assign +rout +topolog +work +gener +multi +fpga +system +emerald +architectur +adapt +toolset +fpga +complet +map +placement +rout +toolscan +gener +automat +descript +fpgaarchitectur +architectur +specif +metric +incorporatedinto +variou +tool +improv +result +embed +systemsth +chinook +project +hardwar +softwar +design +synthesi +simul +system +embed +applic +perform +optim +synchron +circuitsretim +level +clock +circuit +effici +algorithm +retim +circuit +uselevel +sensit +latch +improv +perform +reduc +cost +andincreas +toler +clock +skew +architectur +retim +method +improv +perform +synchronouscircuit +latenc +feedback +contraint +network +routerth +chaoticrout +project +self +tune +systemsself +tune +system +direct +kehlprevi +research +project +gemini +valid +layout +compar +specif +circuit +implement +circuit +mactest +cost +digit +function +tester +chip +circuit +cmo +voltag +level +arpa +reportsarpa +bluebook +paragraph +overview +accomplish +embed +system +high +perform diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..3daf0859 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,94 @@ +softwar +safeti +univers +washington +softwar +plai +increasingli +import +role +system +nuclear +reactor +aircraft +defenc +space +system +chemic +plant +medic +equip +consequ +malfunct +safeti +critic +system +must +pass +rigor +test +review +us +although +system +safeti +engin +techniqu +exist +decad +appli +system +contain +digit +comput +softwar +goal +univers +washington +safeti +project +develop +theoret +foundat +safeti +methodolog +build +safeti +critic +system +built +upon +foundat +safewar +system +safeti +comput +nanci +leveson +summar +issu +involv +lai +foundat +methodolog +work +safeti +analysi +techniqu +support +methodolog +prototyp +tool +us +valid +specif +analysi +techniqu +univers +washington +comput +scienc +softwar +engin +safeti +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..94ec28fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,239 @@ +internet +softbotinternet +softbotth +softbot +finalist +discoveraward +technolog +innov +comput +softwar +build +autonom +agent +interact +real +world +softwareenviron +oper +system +databas +pragmaticallyconveni +intellectu +challeng +substrat +research +support +claim +util +plan +machin +learningtechniqu +develop +internet +softbot +softwar +robot +acustomiz +moder +intellig +assist +internetaccess +softbot +accept +goal +high +level +languag +generatesand +execut +plan +achiev +goal +learn +itsexperi +softbot +enabl +human +user +state +want +accomplish +softbot +disambigu +requestand +dynam +determin +satisfyit +softbot +us +unix +shell +world +wide +interactwith +wide +rang +internet +resourc +take +tour +softbot +sgraphic +user +interfac +princip +investig +oren +etzioni +daniel +weld +also +check +metacrawl +softbot +field +servic +enabl +tosearch +multipl +indic +parallel +provid +sophisticatedprun +option +inform +contact +oren +etzioni +etzioni +washington +access +introduct +softbot +project +found +softbot +base +interfac +internet +cacm +juli +methodolog +motiv +project +found +intellig +without +robot +repli +brook +magazin +decemb +technic +softbot +paper +found +cartoonrepresent +internet +softbot +taken +blanchard +articl +appear +decemb +issu +ofcolumn +univers +washington +alumni +magazin +softbot +research +group +current +develop +graphic +user +interfac +toth +softbot +allow +user +easili +specifi +high +level +goal +extend +maintain +xiiplann +keith +golden +work +graphic +specif +search +control +keith +golden +implement +advanc +plan +space +browser +debug +planner +control +dave +christianson +compar +rule +base +versu +procedur +search +control +sujai +parekh +ilalearn +inform +resourc +design +protocol +multi +softbot +collabor +negoti +ying +experi +reactiv +system +softwar +domain +kwok +goan +build +optim +agent +ingram +inform +gather +reactiv +system +internet +kwok +softbot +hacker +info +local +access +back +home +page +back +home +page +mike +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..adde1ffb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,385 @@ +spin +spin +washington +spin +oper +systemspin +extens +oper +system +kernel +thatsupport +dynam +adapt +system +interfac +andimplement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +spin +allow +applic +load +code +kernel +atruntim +run +kernel +extens +code +accesshardwar +oper +system +servic +almost +nooverhead +basic +procedur +call +data +pass +byrefer +rather +copi +spin +kernel +providesa +core +interfac +capabl +basic +resourc +thesear +us +collect +extens +implement +gener +systemservic +address +space +thread +network +allextens +code +kernel +written +typesaf +languag +modula +properti +oftypesafeti +prevent +extens +crash +system +attemptingto +manipul +arbitrari +piec +code +data +spin +run +alpha +workstat +us +writeboth +special +applic +network +video +system +wella +support +gener +purpos +unix +program +connect +machinerun +spin +kernel +spin +server +quick +result +structur +allow +program +extend +oper +system +servic +withlow +overhead +exampl +spin +run +alpha +applic +handl +recov +page +fault +microsecond +take +microsecond +creat +thread +control +executeit +termin +synchron +termin +fork +join +protectedprocedur +call +anoth +extens +kernel +function +take +microsecond +cross +machin +applic +applic +overethernet +take +microsecond +realli +oldadapt +take +less +microsecond +operationsund +mach +unix +take +time +longer +samehardwar +time +benchmark +page +saveyourself +effort +recent +report +paper +dynam +bind +extens +oper +system +invoc +mechan +provid +flexibl +effici +andsimpl +integr +extens +execut +system +appear +osdi +extens +safeti +perform +spin +oper +system +design +implement +perform +paper +appear +sosp +extens +protocol +architectur +forappl +specif +network +design +implement +perform +paper +appear +usenix +winter +confer +write +oper +system +us +modula +describ +experi +us +modula +build +high +perform +extens +system +make +clear +distinct +languag +implement +languag +support +extens +oper +system +pretti +happi +deal +shortcom +order +languag +safe +extens +oper +system +paper +describ +address +shortcom +safe +dynam +link +extens +oper +system +describ +dynam +linker +load +code +kernel +point +abil +creat +manag +linkabl +namespac +describ +interfac +collect +interfac +languag +runtim +support +dynam +interposit +system +code +describ +kernel +intern +commun +extens +facil +show +dynam +code +gener +improv +perform +critic +kernel +servic +inform +dynam +compil +wait +time +compil +code +paper +trail +project +report +talk +paper +project +member +benchmark +interest +bottom +line +modula +inform +modula +arpa +project +overview +execut +summari +regular +report +friend +gotten +assist +academia +industri +project +page +sai +involv +relat +project +pointer +extens +system +project +peopl +sai +barb +arrow +intern +document +latest +statu +project +member +avail +project +project +could +result +qualif +credit +master +degre +fund +raship +posit +undergradu +project +credit +mascot +encourag +mani +peopl +decid +adopt +ourmascot +page +maintain +brian +bershad +bershad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..b6610ae5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,167 @@ +dynam +compil +projectth +dynam +compil +projectmor +inform +dynam +compil +project +member +paper +relat +projectsuw +section +student +project +group +webdynam +compil +enabl +optim +base +valu +ofinvari +data +comput +time +us +valu +theserun +time +constant +dynam +compil +elimin +memoryload +perform +constant +propag +fold +remov +branch +theydetermin +fulli +unrol +loop +bound +howev +performancebenefit +effici +dynam +compil +code +offsetbi +time +cost +dynam +compil +approach +dynamiccompil +strive +fast +dynam +compil +high +qualitydynam +compil +code +programm +annot +region +theprogram +compil +dynam +static +optimizingcompil +automat +produc +optim +machin +code +templat +us +pair +dataflow +analys +identifi +variabl +willb +constant +time +simpl +dynam +compil +copi +thetempl +patch +comput +valu +time +constant +produc +optim +execut +code +work +target +gener +purpos +imper +program +languag +initi +initialexperi +appli +dynam +compil +program +producedspeedup +rang +part +spinproject +eventu +system +us +dynamicallycompil +code +spin +kernel +exampl +spinev +dispatch +howev +also +activ +explor +otherposs +applic +dynam +compil +invirtu +machin +interpret +prototyp +dynam +compil +systemi +describ +pldi +paper +arenow +start +design +build +second +gener +system +wewil +releas +detail +soon +last +updat +august +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..065c8197 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,133 @@ +project +project +depart +comput +scienc +engin +univers +washington +seattl +info +washington +eduzpl +arrai +base +program +languag +suitabl +comput +would +previous +written +fortran +program +fast +sequenti +parallel +comput +without +modif +without +special +direct +machin +independ +recompil +necessari +program +machin +higher +level +concept +like +arrai +border +elimin +error +prone +index +tediou +loop +typic +program +shorter +easili +written +easili +understood +modifi +comput +scientist +find +concept +region +direct +border +shatter +control +flow +conclus +ideal +engin +scientif +program +walk +small +program +write +compil +program +yourmachin +scientif +programm +area +shouldconsid +enrol +zpthi +autumn +program +check +recent +chang +languag +project +overview +high +level +overview +program +walk +minut +introduct +languag +base +compil +compil +program +browser +right +paper +paper +manual +relat +detail +line +inform +sampl +program +peopl +project +member +horizon +descript +group +direct +futur +project +acknowledg +list +help +support +work +info +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..9978ad96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,255 @@ +simultan +multithread +home +pagesimultan +multithread +projectoverviewpeoplepubl +overviewth +crucial +problem +face +todai +high +speed +microprocessor +maintain +high +processor +util +face +long +instruct +memori +latenc +allevi +problem +modern +processor +issu +multipl +instruct +cycl +superscalar +interleav +execut +differentthread +differ +cycl +multithread +processor +ultim +though +techniqu +limit +amount +parallel +avail +within +singl +thread +singl +cycl +simultan +multithread +techniqu +permit +multipl +independ +thread +issu +instruct +superscalar +function +unit +singl +cycl +combin +multipl +instruct +issuefeatur +wide +superscalar +processor +latenc +hide +abilityof +multithread +architectur +processor +hardwar +contextsar +activ +simultan +compet +cycl +avail +resourc +dynam +share +processor +resourc +enabl +exploitthread +level +instruct +level +parallel +interchang +formsof +parallel +effect +us +increas +processor +util +studi +havedemonstr +simultan +multithread +significantli +improvesprocessor +throughput +perform +multiprogram +parallelworkload +shown +perform +gain +achievedin +architectur +minim +extens +modern +ordersuperscalar +processor +current +futur +work +includ +investig +fast +synchronizationtechniqu +enabl +also +conduct +research +otherarchitectur +compil +issu +simultan +multithread +peoplefaculti +susan +egger +hank +levygradu +student +jack +dean +tullsenindustri +collabor +digit +equip +corpor +joel +emer +rebecca +stamm +public +convert +thread +level +parallel +instruct +level +parallel +simultan +multithread +abstract +postscript +egger +emer +levi +stamm +andd +tullsen +submit +public +juli +exploit +choic +instruct +fetch +issu +implement +simultan +multithread +processor +abstract +postscript +tullsen +egger +emer +levi +stamm +proceed +annual +intern +symposium +comput +architectur +philadelphia +compil +issu +simultan +multithread +processor +postscript +egger +levi +andd +tullsen +proceed +first +suif +compil +workshop +stanford +januari +simultan +multithread +maxim +chip +parallel +abstract +postscript +tullsen +egger +andh +levi +proceed +annual +intern +symposium +comput +architectur +santa +margherita +ligur +itali +june +student +check +list +research +project +still +doon +student +affair +page +page +maintain +jack +lojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..4ce6c8e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,102 @@ +home +pagecomput +scienc +departmentabout +comput +scienc +departmentour +depart +form +consist +rank +comput +scienc +depart +countri +faculti +member +receiv +fourteen +presidenti +young +investig +award +packard +fellowship +faculti +award +women +scientist +engin +incent +excel +award +three +doctor +dissert +award +three +faculti +develop +award +comput +scienc +depart +area +research +project +inform +peopl +comput +scienc +depart +cours +offer +fall +class +futur +timet +technic +report +comput +system +answer +frequent +ask +question +comput +scienc +alumni +inform +graduat +guidebook +undergradu +guidebook +depart +annual +report +onlin +util +madison +local +servic +relat +organ +colophon +statist +server +us +infocomput +scienc +departmentunivers +wisconsin +madisona +comput +scienc +statist +west +dayton +streetmadison +wisc +voic +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..e73e8f09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,171 @@ +object +explor +purpos +viewpoint +control +object +explor +purpos +viewpoint +control +kyro +kutulako +chuck +dyer +consider +interest +recent +emploi +simpleobserv +behavior +either +make +recoveri +scene +propertieseasi +fixat +combin +simpl +behavior +order +toperform +complex +task +navig +obstacl +avoid +ourwork +focus +abil +activ +observ +control +pointof +observ +perform +task +involv +explor +object +develop +behavior +provabl +correct +makesimpl +motion +decis +base +observ +local +geometryof +scene +requir +minim +process +imag +first +consid +task +recov +local +shape +thesurfac +select +point +approach +base +generalobserv +posit +provid +inform +objectthan +other +exist +special +viewpoint +beexploit +observ +mobil +effici +anddeterminist +strategi +reach +show +localshap +recoveri +task +achiev +us +simpl +qualitativestrategi +smoothli +control +point +observ +viewingdirect +align +princip +direct +selectedpoint +second +consid +task +deriv +global +descriptionof +object +formul +global +surfac +reconstruct +thequalit +task +smoothli +control +point +observationso +visibl +slide +maxim +connect +reconstruct +region +show +task +provabl +achiev +arbitrari +smooth +surfac +attempt +maintain +well +defin +geometr +relationship +point +observationand +view +surfac +approach +suggest +abil +smoothli +control +point +observ +lead +provabl +correct +behavior +achiev +local +global +task +scene +explor +navig +also +simplifi +frame +comput diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..40fd3773 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,142 @@ +deform +contour +model +extract +detect +classif +deform +contour +model +extract +detect +classif +fung +roland +chin +develop +integr +approach +model +extract +detect +classifi +deform +contour +directli +noisi +imag +conduct +case +studi +regular +formul +initi +ofact +contour +model +snake +us +minimax +principl +deriv +regular +criterion +wherebi +valu +automat +implicitli +determin +along +contour +furthermor +formul +energi +function +yield +snake +contain +hough +transform +special +case +subsequ +consid +problem +model +extract +arbitrari +deform +contour +noisi +imag +combin +stabl +invari +anduniqu +contour +model +markov +random +field +yield +priordistribut +exert +influenc +arbitrari +global +model +allow +deform +bayesian +framework +contour +extract +turn +posterior +estim +turn +equival +energi +minim +gener +activ +contour +model +final +integr +lower +level +visual +task +withpattern +recognit +process +detect +classif +base +nearman +pearson +lemma +deriv +optim +detect +classificationtest +summat +peak +practic +applic +small +region +need +consid +margin +distribut +valid +formul +confirm +extens +rigor +experiment +gsnake +softwar +avail diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..d7bfba5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,423 @@ +data +visual +base +lattic +data +visual +base +lattic +bill +hibbard +brian +paul +chuck +dyer +defin +foundat +data +visual +base +theidea +visual +process +function +ofdata +object +call +data +model +displai +calleda +displai +model +prototyp +system +call +adha +implement +base +upon +idea +guid +follow +principl +scientist +develop +mathemat +model +natur +data +objectsrepres +object +mathemat +model +mathemat +objectsfrequ +contain +infinit +precis +real +number +functionswith +infinit +domain +wherea +comput +data +object +containfinit +amount +inform +thu +approxim +themathemat +object +repres +comput +displai +contain +finit +amount +inform +contain +finit +number +pixel +color +chosenfrom +finit +palett +anim +sequenc +contain +finit +numbersof +frame +thu +approxim +ideal +displai +close +relationship +data +comput +data +model +appropri +data +object +computationalmodel +program +languag +purpos +data +visual +commun +informationcont +data +object +particular +defin +expressivenesscondit +map +data +object +displai +thatdisplai +encod +fact +data +object +onlythos +fact +visual +system +complet +sens +itimpl +function +data +object +displai +satisfyingth +expressivenss +condit +defin +order +relat +data +object +base +onhow +precis +approxim +mathemat +object +wecan +defin +order +relat +displai +base +howprecis +approxim +ideal +displai +base +voxelresolut +order +relat +defin +lattic +ofdata +object +displai +therefor +model +visualizationprocess +function +lattic +data +objectsto +lattic +displai +interpret +expressivenesscondit +context +show +satisfi +theexpress +condit +lattic +isomorph +defin +particular +lattic +appropri +scientificdata +object +data +object +built +primitivevari +call +scalar +time +latitud +radianc +temperatur +tupl +arrai +data +type +constructor +arrai +time +temperatur +data +type +appropriatefor +time +seri +temperatur +thu +lattic +containsth +data +object +scientif +program +languag +canalso +defin +particular +lattic +displai +displayi +voxel +voxel +specifi +graphicsprimit +call +displai +scalar +pixel +locationand +size +volum +color +place +animationsequ +show +function +satisfi +expressivenesscondit +class +function +defin +map +fromth +scalar +us +primit +variabl +data +object +thedisplai +scalar +specifi +graphic +primit +voxel +ofcours +design +scientif +displai +alreadi +assum +primit +variabl +map +graphic +primit +exampl +given +data +object +type +arrai +time +temperatur +isnatur +displai +graph +time +along +axi +andtemperatur +along +anoth +remark +thing +wedo +take +design +displai +assumpt +consequ +fundament +expressivenesscondit +develop +implement +system +calledvi +adthat +allow +scientist +experi +algorithm +steer +theircomput +visual +data +object +creat +theirprogram +lattic +defin +data +object +thevi +program +languag +system +implement +lattic +vvof +displai +data +displai +us +function +thatsatisfi +express +condit +howev +implementationi +quit +precis +data +flow +system +defin +auser +interfac +control +data +displai +base +abstractionof +render +pipelin +system +defin +user +interfacefor +control +data +displai +base +abstract +ofmap +scalar +displai +scalar +possibl +defin +data +lattic +recurs +defineddata +type +complex +link +type +tree +us +ingener +purpos +program +languag +abstract +datatyp +object +class +object +orient +program +languag +lattic +provid +rigor +foundat +visual +particular +help +develop +analyt +altern +usualapproach +defin +visual +process +construct +bywrit +special +purpos +program +comput +displai +fora +specif +data +object diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..26777388 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,68 @@ +condor +project +homepag +object +goal +condor +project +develop +implement +deploi +evaluatemechan +polici +support +high +throughput +comput +larg +collect +distribut +own +comput +resourc +guid +technologicaland +sociolog +challeng +comput +environ +condor +team +build +softwar +tool +enabl +scientist +engin +increas +comput +throughput +introduct +start +high +throughput +comput +research +condor +system +condor +pool +univers +wisconsin +madison +condor +help +page +project +home +page +condor +world +mail +list +comment +suggestionscondor +admin +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..51db6034 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,11 @@ +next +homepag +next +peopl +next +project +last +modifi +septemb +miron +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..aa2e5d10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,262 @@ +coral +databas +projectcor +databas +projectdocu +content +objectiveoverviewreleas +informationse +also +public +relat +coralpeopl +work +coraloth +research +madisonobject +object +coral +project +develop +robust +efficientdeduct +databas +system +investig +variou +applic +domain +sever +algorithm +underli +coralsystem +develop +member +group +durationof +project +sinc +overview +coral +deduct +system +support +rich +declar +languag +interfac +allow +combin +declaritiveand +imper +program +declar +queri +languag +supportsgener +horn +claus +augment +complex +term +group +aggreg +negat +relat +tupl +contain +univers +quantifi +variabl +coral +declar +program +canb +organ +collect +interact +modul +coralimplement +support +wide +rang +evalu +strategi +andautomat +choos +effici +evalu +strategi +modulein +program +addit +user +permit +guid +queri +optim +desir +select +among +wide +rang +control +choic +atth +level +modul +coral +system +provid +imper +construct +updat +insertand +delet +rule +coral +also +interfac +user +canprogram +combin +declar +coral +extend +withcor +primit +high +degre +extens +provid +allowingc +programm +class +structur +enhanc +coralimplemen +coral +provid +support +main +memori +disk +resid +data +disk +resid +data +support +us +theexodusstorag +manang +also +provid +transact +manag +aclient +server +environ +releas +inform +current +releas +coral +version +releas +octob +instal +coral +system +grab +file +want +nobin +version +contain +sourc +code +requiringy +compil +coral +version +includ +made +binari +forth +indic +machin +type +click +file +grab +readm +gener +inform +instal +manual +coral +instruct +instal +coral +nobin +binari +includ +coral +hpux +seri +binari +includ +coral +suno +binari +includ +coral +solari +binari +includ +coral +solari +binari +includ +coral +linux +linux +binari +includ +stai +inform +releas +coral +announcemnt +mail +listwhich +reciev +announc +releas +relev +inform +releas +also +announc +newsgroup +comp +lang +misc +also +submit +question +comment +report +coral +send +mail +coral +wisc +edulast +modifi +octob +shawn +flisakowski +flisakow +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..0f5d6b06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,410 @@ +complementar +problem +algorithm +applic +complementar +problem +neta +result +three +decad +research +subject +complementar +problem +divers +applic +engin +econom +scienc +becom +well +establish +fruitfuldisciplin +within +mathemat +program +sever +monograph +survei +document +basic +theori +algorithm +applic +complementar +problem +role +optim +theori +page +serv +center +inform +regard +research +incomplementar +problem +list +meetingsof +interest +commun +pointer +softwar +forcomplementar +problem +well +link +page +interest +list +research +applic +area +also +given +tabl +content +directori +research +complementar +softwar +complementar +problem +applic +complementar +relev +link +directori +complementar +researcherssoftwar +complementar +problem +mcplib +collect +nonlinear +mix +complementar +problem +problemdescript +access +gam +sourc +file +forthes +problem +complementar +toolbox +matlab +evolv +freeli +avail +toolbox +consist +sever +andm +file +allow +mcplib +problem +access +frommatlab +without +access +gam +file +give +functionevalu +spars +jacobian +evalu +machin +specificvers +download +path +solver +also +avail +toolbox +detail +solver +interfac +gam +describ +librari +routin +areavail +help +hook +solver +gam +model +languag +contact +steve +gam +rutherford +colorado +ferri +wisc +edufor +detail +mile +mile +extens +classicaljosephi +newton +method +solut +linearizedsubproblem +comput +lemk +almost +complementari +pivot +algorithm +newton +point +us +defineth +newton +direct +us +dampedlinesearch +merit +function +us +measur +violat +infeas +complementar +mile +also +emploi +restartprocedur +case +newton +point +cannot +comput +totermin +secondari +everi +linear +subproblem +rescal +equilibr +elementsappear +data +subproblem +path +run +gam +mcpor +directli +matlab +path +solver +appli +techniqu +similarto +us +newton +method +smooth +system +anonsmooth +reformul +algorithmconsist +sequenc +major +iter +consist +anapproxim +linear +step +similar +mile +construct +pathto +newton +point +solut +approxim +aposs +search +path +newton +point +exist +thepath +cannot +entir +construct +step +along +partiallycomput +path +taken +problem +relinear +anonmonoton +watchdog +strategi +emploi +appli +path +search +help +avoid +converg +local +minima +norm +function +forth +underli +nonsmooth +equat +keep +number +functionevalu +requir +small +possibl +list +solver +option +given +document +algorithm +base +uponreformul +system +nonsmooth +equat +algorithm +implement +gam +solver +robustnessimprov +us +proxim +perturb +strategi +give +qpcomp +algorithm +nonsmooth +equat +ishandl +us +direct +deriv +smooth +smooth +algorithm +base +uponreformul +system +nonsmooth +equat +thenapproxim +solv +sequenc +smooth +approxim +leadto +zero +nonsmooth +system +iter +smooth +approxim +origin +system +form +theaccuraci +approxim +determin +residu +thecurr +point +implement +gam +system +solver +implement +subsystem +gam +compar +paper +applic +complementar +engineeringand +econom +applic +complementar +problem +paper +list +mani +known +applic +complementar +problem +mpsge +preprocessor +gam +model +languag +thatallow +econom +equilibrium +problem +formul +easili +thegam +home +page +inform +nemsth +nation +energi +model +system +sever +paper +relat +algorithmsand +paper +give +overview +project +relev +link +look +michael +trick +oper +research +page +interest +link +look +interior +point +inform +interior +pointmethod +argonn +nation +laboratori +archiv +last +modifi +octob +michael +ferri +ferri +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..c98b2788 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,71 @@ +exodu +project +home +pageexodu +extens +object +orient +databas +system +toolkitnot +document +construct +exodu +project +succed +theshor +project +still +provid +minim +support +user +theexodu +storag +manag +compil +persistentprogram +languag +avail +wisc +licens +requir +inform +need +contact +exodu +wisc +eduprincip +investig +mike +carei +david +dewittse +also +public +relat +exodusshor +successor +exoduslatest +exodu +storag +manag +compilercontribut +softwar +storag +managera +mail +list +exodu +user +exodus_al +wisc +benchmark +benchmark +oodbsdat +prepar +april +michael +zwill +zwill +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..0034cc71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,326 @@ +paradis +projectparadis +parallel +databas +system +applic +document +content +object +client +server +paradis +sampl +paradis +frontend +contact +informationse +also +public +relat +paradis +peopl +work +paradis +shore +project +object +manag +us +paradis +extens +optim +paradis +report +examin +sequoia +benchmark +script +us +vldb +paper +inform +madison +databas +research +group +madison +depart +serverobject +object +paradis +project +design +implement +andevalu +scalabl +parallel +geograph +inform +system +iscap +store +manipul +massiv +data +set +applyingobject +orient +parallel +databas +technolog +problem +ofstor +manipul +geograph +inform +hope +tosignificantli +advanc +size +complex +data +set +thatcan +successfulli +store +brows +queri +client +server +paradis +paradis +databasesystem +aim +handl +type +applic +paradis +providesa +graphic +user +interfac +queri +brows +databas +andsupport +subset +issu +queri +paradiseprovid +extend +relat +data +model +model +gisappl +addit +support +base +data +type +asinteg +real +string +paradis +built +support +raster +polygon +polylin +point +circl +video +mpeg +imag +data +paradis +us +shore +underli +persist +object +manag +paradis +front +allow +displai +object +spatialattribut +provid +layer +displai +foroverlap +spatial +attribut +layer +correspond +queri +custom +order +displai +layer +selectingcolor +label +displai +attribut +queri +databas +queri +either +graphic +interfac +withad +queri +graphic +interfac +us +issueimplicit +spatial +queri +zoom +click +sketch +arubb +band +queri +paradis +provid +querycompos +menu +queri +compos +provid +access +databaseschema +assist +queri +composit +queri +result +either +beview +spatial +attribut +bedisplai +tabl +brows +tupl +result +front +also +provid +context +sensit +help +paradis +support +subset +issu +queri +sqlwe +ad +abil +invok +method +defin +extendedset +type +exampl +calcul +area +polygon +byus +method +polygon +area +paradis +also +support +standarddatabas +oper +includ +creat +drop +databas +creat +anddrop +extent +creat +drop +indic +insert +updat +current +version +paradis +emploi +client +server +architectur +front +ship +queri +syntax +paradiseserv +execut +execut +queri +server +ship +theresult +object +back +client +paradis +server +ismulti +thread +multipl +client +connect +sameserv +design +implement +paradis +sever +carefulattent +paid +insur +system +could +effici +processqueri +especi +involv +spatial +attribut +largevolum +data +sampl +paradis +frontendeurop +data +sampl +pressher +contact +inform +paradis +projectattn +prof +david +dewittunivers +wisconsin +madisoncomput +scienc +depart +west +dayton +streetmadison +email +paradis +wisc +edumor +come +biswadeep +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..111a8d89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,114 @@ +scout +servic +homepagego +text +versionnewslett +newand +newli +discov +internet +resourc +network +toolsinternet +announc +updat +daili +effectiveinternet +tool +availablea +report +student +studentssurf +smarter +longer +intern +scout +project +universityof +wisconsin +madison +show +best +canchoos +best +filter +hundr +internet +annoucementseach +week +look +valuabl +onlin +resourc +networktool +organ +summar +annot +best +vefound +offer +internet +commun +sever +us +format +goal +scout +support +effect +internet +byeduc +research +howev +everyon +welcom +useth +public +site +provid +scout +encouragefeedback +suggest +entir +internet +commun +three +primari +servic +provid +includ +scout +report +happen +thescout +toolkit +ournewest +project +know +report +student +student +scout +servic +locat +depart +comput +scienc +theunivers +wisconsin +madison +project +intern +comment +suggest +feedbackscout +intern +scout +servicesfor +inform +us +internet +intern +inform +educ +servic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..420517f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,961 @@ +shore +project +home +pageshor +high +perform +scalabl +persist +object +repositorydocu +content +objectiveoverviewreleas +informationmail +listsse +also +shore +version +line +inform +sourc +binari +document +beta +releas +shore +version +public +relat +shorepeopl +work +shorelatest +research +summari +arpaparadis +project +built +shoreexodu +project +predecessor +shoreoo +benchmark +benchmark +oodbsshor +photo +albumuw +madison +databas +research +group +madison +depart +serverobject +object +shore +project +design +implement +andevalu +persist +object +system +serv +need +widevarieti +target +applic +includ +hardwar +softwar +cadsystem +persist +program +languag +geograph +informationsystem +satellit +data +repositori +multi +media +applic +shore +expand +basic +capabl +wide +usedexodusstorag +manag +develop +wisconsin +fund +arpa +number +ofwai +includ +support +type +object +multipl +programminglanguag +unix +like +hierarch +name +space +name +object +anda +unix +compat +interfac +object +text +field +thisinterfac +intend +eas +transit +applic +theunix +file +system +environ +shore +exist +unix +tool +viand +abl +store +data +shore +object +withoutmodif +basic +unix +file +becom +either +singl +shoreobject +text +field +complex +object +overview +shore +someth +hybrid +system +natur +inheritingcharacterist +object +orient +databas +system +fromfil +system +section +briefli +describ +basic +featur +ofshor +paper +shore +persist +applic +describ +shore +much +greater +detail +shore +three +major +goal +scalabilitysupport +hardwar +languag +heterogeneitysupport +exist +file +base +applicationswhen +shore +project +began +year +goal +uniqueamong +research +commerci +oodbm +commun +odmg +effort +also +concentr +provid +degre +support +languageheterogen +turn +facilit +hardwar +heterogen +shore +remain +distinguish +focu +scalabl +supportfor +applic +depend +unix +file +system +persistentstorag +furthermor +sinc +shore +data +model +basicallycompat +odmg +data +model +expect +much +technolog +develop +eventu +betransf +commerci +sector +scalabl +architectureshor +softwar +architectur +uniqu +sever +wai +first +shore +us +symmetr +peer +peer +distributedarchitectur +shore +everi +particip +processor +run +ashor +server +process +whether +processor +shore +data +disksattach +softwar +design +scalabl +singl +processor +network +workstat +larg +parallel +processor +intel +paragon +design +contrast +client +server +architectureus +exodu +oodbm +vendor +client +serverarchitectur +fine +design +environ +typicallyus +softwar +hardwar +effort +scalabl +second +uniqu +featur +shore +architectur +notionof +valu +ad +server +structur +softwar +runsin +server +extens +mind +rel +simpl +forus +build +applic +specif +server +exampl +theparadis +project +alreadi +us +shore +server +build +geograph +inform +system +nasa +seosdi +project +feel +uniqu +piec +technolog +plai +aimport +role +varieti +futur +research +commerci +endeavor +exampl +digit +librari +futur +almost +certainlydepend +avail +scalabl +persist +object +technolog +system +go +store +retriev +manipul +transmitobject +contain +video +pictur +well +text +whilecurr +oodbm +product +could +us +system +orientedtoward +deal +gigabyt +terabyt +data +customiz +equal +import +index +retriev +queri +process +mechan +need +digit +libraryar +differ +requir +geograph +informationsystem +languag +hardwar +heterogeneityobject +shore +type +shore +provid +singl +languag +neutraltyp +system +us +defin +type +shore +object +type +system +embodi +shore +data +languag +languag +shore +object +type +defin +enhanc +data +model +support +databasefeatur +bulk +type +set +list +persist +provis +type +persist +object +simplifi +task +ofsupport +heterogen +hardwar +environ +make +feasibleto +support +access +persist +object +multipl +programminglanguag +object +shore +project +mention +earlier +quit +close +relat +languag +neutral +object +type +definit +languag +wasrec +propos +standard +oodb +vendor +consortium +odmg +term +emphasi +howev +odmg +larg +concentr +onprovid +standard +interfac +exist +orient +oodb +focu +support +inter +languag +object +share +withina +larg +name +space +object +support +exist +file +base +applicationsa +major +goal +shore +enabl +applic +currentlyus +untyp +byte +orient +file +persist +data +flatten +flatten +data +time +access +stop +applic +abl +store +data +type +structuredobject +conveni +type +safe +intra +inter +program +data +share +ultim +hope +shore +displac +byte +orientedfil +system +unix +file +system +shore +provid +major +servic +file +system +standpoint +first +support +object +name +space +manag +world +manypersist +object +shore +provid +flexibl +tree +structur +unix +like +name +space +persist +object +reachabl +either +directli +indirectli +give +shore +usersa +familiar +framework +regist +individualpersist +object +term +regist +object +root +oflarg +persist +data +structur +bulk +set +unnam +object +term +anonym +object +realiz +framework +involvessever +differ +kind +shore +file +system +object +includingdirectori +pool +file +contain +anonym +object +symbol +link +cross +refer +shore +provid +mechan +eas +transit +legaci +unixappl +compil +editor +system +fromtradit +byte +stream +file +shore +first +applic +link +shore +provid +standardunix +compat +file +system +interfac +open +close +read +write +mkdir +chdir +order +make +access +shore +object +unix +file +system +callsposs +defin +shore +object +type +option +design +onevari +length +byte +string +charact +string +attribut +object +asb +object +unix +data +program +attempt +read +objectthrough +shore +counterpart +unix +file +system +callswil +portion +object +legaci +program +thatwish +without +link +possibl +mount +ashor +file +system +access +unix +datacontain +object +directli +make +feasibl +bothnew +applic +access +object +applic +access +unix +data +componentof +object +applic +defin +access +morestructur +attribut +object +releas +inform +latest +time +tabl +releas +shore +date +approxim +subject +chang +question +contact +shore_support +wisc +beta +releas +first +beta +releas +beta +releas +second +beta +rleas +shore +version +avail +sept +includ +improv +document +completeimplement +mani +featur +mani +fix +port +tosolari +linux +version +august +releas +shore +version +gzip +file +sourc +document +binari +releas +sparc +andpentium +solari +found +atftp +wisc +shore +mail +liststher +shore +relat +mail +list +shore_support +wisc +eduand +shore_al +wisc +shore_support +wisc +eduthi +mail +list +reach +shore +develop +team +usebi +shore +user +submit +question +comment +report +cannot +subscrib +mail +list +shore_al +wisc +mail +list +user +interest +shore +list +manag +listproc +softwar +madisonc +depart +current +unmoder +unlikelyev +get +clutter +junk +mail +moder +mail +messag +interest +list +mailbox +isalreadi +clutter +sign +weekli +digest +belowfor +inform +inform +list +sentwhen +subscrib +purpos +shore_al +notifi +interest +parti +releas +chang +shore +archiv +request +help +user +default +repli +sent +sender +rather +beingpost +entir +list +want +entir +list +yourrepli +copi +repli +shore_al +list +public +mail +list +thu +anyon +maysubscrib +subscrib +post +list +existenceof +list +shown +list +return +listproc +whenit +process +list +request +subscrib +yoursubscript +conceal +default +subscriberscannot +obtain +membership +list +listproc +system +subscrib +shore_al +subscrib +chang +subscript +must +mail +specialmessag +listproc +wisc +subscrib +content +messag +look +like +subscrib +shore_al +receiv +weekli +digest +rather +individu +messag +sendthi +along +subscript +send +separ +messag +shore_al +mail +digest +subscrib +content +messag +unsubscrib +shore_al +help +list +processor +content +messageshould +helplast +modifi +nanci +hall +nhall +wisc +footnot +compat +odlshor +odmg +concurr +decid +data +modelidl +start +point +data +model +henc +odlar +similar +anoth +stabilizesw +convert +compat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..db9c0734 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,40 @@ +abhinav +home +page +welcom +abhinav +gupta +agupta +wisc +gupta +wisc +page +construct +graduat +student +depart +comput +scienc +univers +wisconsin +madison +contact +residenceoffic +kendal +avenu +madison +depart +comput +scienc +dayton +street +madison +interest +link +indian +newspap +stuff +sport +finger +find +whereabout diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..4267d655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,15 @@ +alain +home +pagealain +click +larger +pictur +largest +carnivor +ever +live +last +modifi +alain +alain +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..c4371d8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,84 @@ +carolyn +allex +home +page +carolyn +allex +graduat +studentbiotechnolog +train +program +traineecomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +mail +allex +wisc +edutelephon +advisor +professor +jude +shavlikinterest +comput +biologi +sequenc +protein +fold +artifici +intellig +machin +learn +neural +networkseduc +comput +scienc +univers +wisconsin +madisonb +comput +scienc +purdu +universityb +educ +mankato +state +univers +relat +link +univers +wisconsin +depart +univers +wisconsin +group +univers +wisconsin +machin +learn +group +univers +wisconsin +comput +biologi +research +intellig +system +molecular +biologi +ismb +intellig +system +molecular +biologi +ismb +intellig +system +molecular +biologi +ismb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..e4ed9148 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,225 @@ +amir +home +page +amir +roth +delphi +maven +show +erin +madison +amir +wisc +occasionali +updat +copi +resum +cvte +truth +group +seminar +arch +group +arch +seminar +week +research +topic +implement +compil +us +preprocessor +deleg +work +project +partner +set +airport +metal +detector +out +existencei +graduat +student +depart +comput +scienc +univers +wisconsin +madison +advisor +guri +sohi +look +method +allevi +data +depend +multiscalar +program +us +distribut +regist +file +multiscalar +program +side +curli +fri +physic +yale +univers +advanc +degre +nail +design +vallei +beauti +school +physic +interest +beauti +degre +much +practic +live +girlfriend +marci +cat +charli +marci +also +went +yale +get +master +public +polici +lafollett +institut +go +presid +meantim +like +solv +linear +regress +problem +wacki +recip +find +magazin +watch +parti +five +like +eggplant +peopl +think +weird +anywai +promis +out +subba +officem +daddi +novemb +titanium +screw +desi +relaford +terri +mulholland +oxygen +carbon +dioxid +area +vagu +interest +program +languag +program +analysi +super +whack +compil +optim +parallel +algorithm +theori +good +soul +analysi +evalu +model +perform +enhanc +three +point +shot +thing +scaryarea +rabid +interestth +love +know +talk +better +leav +page +never +return +hmmm +interest +super +handyinformatik +index +comput +scienc +journal +author +madcat +architectur +resourc +minut +score +sportslin +philli +everybodi +favorit +engin +super +ickyth +new +friend +barb +write +articl +gui +go +read +want +kid +barb +friend +drew +home +page +cornel +david +home +page +think +wierd +page +featur +friend +friend +associ +kemin +last +modifi +amir +roth +amir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..da8997bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..b24137cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,181 @@ +amo +home +page +word +search +engin +approxim +theori +spline +wavelet +boxsplin +radial +basi +function +shift +invari +space +approxim +toscatt +data +multiquadr +thin +plate +splinesthi +page +netscap +enhanc +homepag +amo +associ +professordepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usa +mail +amo +wisc +telephon +amo +home +page +present +tabl +linksat +present +item +access +order +download +paperaffin +system +analysi +analysi +operatorof +zuowei +shen +choos +follow +version +us +unix +clickher +compress +version +otherwis +download +uncompress +version +fromher +none +work +server +copi +directlyfrom +accounther +handout +email +clickher +click +vita +want +vita +list +variou +public +includ +abstract +select +articlesof +mine +inform +anonym +site +carl +boor +maintain +site +wisc +site +containspostscript +compress +postscript +file +variou +articl +theapproxim +theori +group +also +found +technic +filesconcern +us +gener +public +recommend +click +read +file +provid +list +avail +file +click +wish +view +line +view +line +download +wish +student +includ +inform +research +andpubl +main +area +interest +togeth +short +summari +present +research +futur +goal +activ +approxim +theori +group +univeristi +ofwisconsin +madison +activ +numer +analysi +group +link +home +page +peopl +approxim +theori +commun +found +miscellan +topic +activ +final +offici +homepag +pleas +deposit +comment +mailbox diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..37089648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi +therber +home +pageandi +therberoffic +sphone +email +andyt +wisc +eduzooresumebookmarksapplet diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..fccccea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,22 @@ +ranga +arvind +ranganathan +erstwhil +workplac +present +workplac +indiaworld +fascin +world +escher +collect +classic +paper +comput +scienc +finger +log +arvind +ranganathan +arvind +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..ab364550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,37 @@ +ashish +home +page +ashish +thusoo +graduat +student +depart +comput +scienc +univers +wisconsin +madison +come +india +hadmi +undergradu +educ +indianinstitut +technolog +delhi +depart +iitd +fantast +place +worth +visit +like +contact +canfing +find +whereabout +altern +send +email +ashisht +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..bf382460 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,51 @@ +ashraf +aboulnaga +home +pageashraf +aboulnagacomput +scienc +depart +room +univers +wisconsin +madison +west +dayton +madison +usaphon +mail +ashraf +wisc +edueduc +comput +scienc +alexandria +univers +alexandria +egypt +juli +comput +scienc +alexandria +univers +alexandria +egypt +june +info +section +view +grade +section +view +grade +offic +hour +desautel +home +page +last +modifi +septemb +ashraf +aboulnaga +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..95a011f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,29 @@ +phil +atkinson +home +page +phil +atkinson +home +pageucla +bannon +win +ncaa +basketbal +championship +seattl +gener +infooffic +phone +email +atkinson +wisc +educurr +researchsailinghors +back +ridingscuba +divingc +infooffic +hour +tuth +appoint diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..bc771092 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,162 @@ +home +page +eric +bach +eric +bach +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +bach +wisc +univers +california +berkelei +interest +theoret +comput +scienc +comput +number +theori +algebraicalgorithm +complex +theori +cryptographi +string +automata +research +summari +interest +us +comput +effici +solvealgebra +number +theoret +problem +exampl +onetel +digit +number +prime +without +examin +possiblefactor +problem +intrins +mathemat +interest +well +applic +random +number +gener +code +forreli +secur +inform +transmiss +comput +algebra +area +also +interest +appli +probabl +theori +designand +analysi +algorithm +exampl +larg +number +iscomposit +prove +simpl +test +us +auxiliarynumb +call +wit +practic +usual +find +witnessbi +direct +search +among +small +prime +lead +followingnatur +question +larg +least +wit +functionof +number +test +recent +work +given +accurateheurist +model +base +probabilist +assumpt +allowsthi +similar +question +answer +recent +public +improv +approxim +euler +product +proc +cnta +canadian +math +proceed +model +algorithm +complet +problem +condon +glaser +tanguai +proc +annual +conf +comput +complex +algorithm +number +theori +volum +effici +algorithm +shallit +press +info +click +curriculum +vita +page +creat +juli +email +bach +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..5985b197 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,70 @@ +bart +miller +home +page +barton +miller +bart +wisc +professorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usath +follow +list +thing +research +project +paradyn +parallel +perform +tool +fuzz +random +softwar +testingteach +introduct +oper +system +spring +honor +internet +seminar +advanc +oper +system +fall +distribut +system +director +undergradu +project +graduatesprofession +symposium +parallel +distribut +tool +monona +terrac +frank +lloyd +wright +convent +center +technic +advisori +groupperson +offici +depart +home +page +famili +photosbart +wisc +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..7abedf34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,47 @@ +hyper +home +page +benjamin +teitelbaum +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +usaben +wisc +edursumquinc +internet +ultim +word +gamezillion +bookmarksspr +schedul +browser +support +tabl +look +like +garbag +click +someth +readabl +mondai +tuesdai +wednesdai +thursdai +fridai +offic +hour +offic +hour +dbseminar +osseminar +condormeet +miron +plseminar diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..946979d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,404 @@ +gareth +bestor +home +pagewelcom +gareth +bestor +home +pagegareth +bestor +dissert +teach +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +telephon +mail +bestor +wisc +click +finger +world +wide +http +wisc +bestor +system +administr +data +program +librari +servic +observatori +drive +madison +telephon +mail +bestor +dpl +dacc +wisc +edueduc +comput +scienc +univers +wisconsin +madison +honor +comput +scienc +massei +univers +zealand +comput +scienc +massei +univers +zealand +curriculum +vita +postscript +resum +postscript +graduat +coursework +postscript +dissert +research +titl +structur +motion +invers +project +problem +abstract +import +problem +comput +vision +recov +structur +scene +posit +observ +within +project +imag +essenti +invers +project +problem +exist +structur +motion +techniqu +solv +problem +examin +multipl +imag +project +rigid +scene +howev +us +extens +practic +techniqu +sensit +nois +accur +model +optic +project +restrict +posit +observ +structur +scene +research +us +techniqu +solv +invers +project +problem +call +concurr +projector +model +make +assumpt +scene +rigid +assumpt +posit +observ +techniqu +us +projector +base +model +project +instead +camera +base +model +tradition +us +result +algorithm +defin +geometr +transform +dimens +perspect +project +given +transform +dimens +algorithm +identifi +invers +project +problem +constrain +specifi +minimum +number +point +imag +requir +solv +concurr +projector +model +also +examin +addit +point +imag +minim +type +project +error +occur +real +world +applic +allow +projector +approxim +intersect +techniqu +current +appli +problem +robot +navig +explor +determin +posit +robot +unknown +environ +time +environ +advisor +prof +charl +dyer +research +interest +comput +machin +vision +vision +base +robot +navig +explor +comput +graphic +virtual +realiti +artifici +intellig +group +comput +vision +group +machin +learn +research +group +robot +teach +duti +spring +introduct +comput +program +section +fortran +credit +cours +cover +basic +program +structur +need +prepar +student +elementari +engin +cours +prior +comput +program +experi +requir +basic +knowledg +comput +assum +materi +cover +enabl +student +write +simpl +comput +program +solv +engin +problem +elementari +cours +program +done +fortran +cours +intend +student +receiv +littl +program +instruct +high +school +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +section +home +page +algebra +languag +program +section +fortran +construct +algorithm +problem +solv +instruct +experi +least +procedur +orient +languag +pascal +fortran +survei +languag +advanc +program +techniqu +prereq +advanc +high +school +mathemat +prepar +colleg +work +mathemat +statist +logic +consent +instructor +open +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +section +home +page +pointer +interest +comput +scienc +depart +home +page +univers +wisconsin +madison +wiscinfo +home +page +inform +zealand +hoofer +out +club +nextstep +next +softwar +start +point +internet +explor +lyco +search +world +wide +keyword +copyright +copi +gareth +bestor +bestor +wisc +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..799e9b19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,38 @@ +kevin +beyer +home +pagekevin +beyerbey +wisc +caution +work +graduat +student +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +advisor +raghu +ramakrishnan +area +interest +databas +researchresearch +project +coral +local +cours +inform +project +graduat +cours +undergradu +coursesinstruct +beyer +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..15a7785a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,69 @@ +todd +bezenek +home +page +toddm +bezenek +back +introduc +actual +faster +cpu +_great +microprocessor +past +present_ +uregina +bayko +html +window +express +locomot +squeez +skateboard +size +packag +helen +custer +_insid +window +microsoft +press +current +cours +advanc +oper +system +bart +miller +pithi +pith +consist +abound +pith +take +yeah +point +skew +associ +cach +access +inform +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +offic +phone +home +phone +mail +bezenek +wisc +edubezenek +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..a878ef52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,67 @@ +nathan +bockrath +graduat +student +nathan +bockrath +teach +assist +graduat +student +averag +pictur +nate +jpeg +send +email +click +bockrath +wisc +section +section +held +section +section +grade +quiz +review +viru +info +word +macro +viru +make +page +offic +hour +anywai +mondai +wednesdai +schedul +distribut +system +simul +model +support +free +speech +onlin +info +site +anoth +dai +back +home +pageback +depart +home +pageoth +neat +stuff +condor +project +internet +oraclesend +comment +bockrath +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..fdf48b56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,392 @@ +bolobologreet +bolo +although +parent +christen +josef +thoma +burger +roll +wai +call +mebolo +everyon +includ +parent +quit +sure +bestexplan +develop +mani +wai +question +person +defin +bywhat +person +case +softwar +engin +develop +semi +real +timeoper +system +util +last +year +kernel +hacker +unix +system +administr +creat +thing +engin +engin +appli +scienc +design +construct +thing +realli +call +scientist +though +acomput +scienc +degre +scientist +pai +david +dewitt +scientist +shudder +methodolog +hand +right +time +work +design +architect +implement +newoper +system +type +thing +often +sameto +thing +thing +forth +system +woodwork +home +control +draw +brew +beer +complet +relax +sleep +work +wonder +sleep +enough +although +myroomm +disagre +pursuit +enjoi +fly +read +scienc +fiction +comic +book +railroad +prototyp +model +role +plai +game +notic +imag +sublim +stripe +creatur +thetig +appear +throughout +page +tiger +taken +william +blake +poemtyg +tyger +put +word +wonder +tiger +road +againin +tremend +leap +insan +purchas +ahous +address +place +isjosef +burger +east +gate +roadmonona +voic +number +bore +workwork +work +drive +banana +us +grung +either +matur +job +chang +year +perhapssom +seem +like +parallel +comput +everi +othermonth +beat +intosubmiss +everyth +els +moon +andstar +current +work +follow +project +fordav +dewitt +world +famou +databas +hacker +gamma +parallel +relat +databas +like +queri +interpret +object +store +paradis +geograph +inform +system +implement +shore +shore +object +orient +data +store +wiss +wisconsin +storag +system +whatev +els +need +done +whole +occur +thecomput +scienc +departmentof +themadison +campusof +univers +wisconsin +campu +locat +madison +peninsula +madison +five +lake +bore +workin +addit +work +also +consult +provid +solut +rather +advic +technicalexpertis +help +internet +provid +port +softwar +newsystem +reviv +comput +oddbal +tasksar +kind +thing +tell +tovisit +serverbut +haven +time +anyth +mostlyempti +except +home +page +friend +activitiesuwvaxi +oper +uwvax +usenet +new +uucp +site +free +time +new +that +print +someth +along +line +uwvax +depart +comput +part +usenet +uucp +internet +longer +work +along +line +also +comput +scienc +depart +svolunt +new +master +much +goe +hand +hand +run +uwvax +howev +try +take +care +new +reader +across +differentarchitectur +try +task +much +time +take +care +softwar +organizationsi +member +follow +organ +alwai +agre +oftenhav +good +benefit +member +usersof +commun +aopa +aircraft +owner +pilot +associ +experiment +aircraft +associ +usenix +associ +blitz +drinkingwhen +school +hord +friendsand +visit +local +everi +thursdai +night +place +essen +hau +import +beer +world +slowli +work +entir +select +year +develop +acquaint +mani +becam +part +loftili +labelledblitz +drink +societi +rather +divers +member +drink +meet +year +essen +hau +time +ofoctoberfest +weekend +chud +accumulateda +short +histori +whatnotof +charad +bolo +home +pagelast +modifi +bolo +josef +burger +bolo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..7506b271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,68 @@ +brad +home +page +welcom +brad +thayer +homepag +much +mail +link +link +comput +scienc +home +page +intro +network +home +page +possibl +advanc +oper +system +page +would +foolish +neglect +comput +system +model +page +interest +thec +us +comput +home +page +probabl +bore +check +oper +system +seminaranywai +aim +beaucoup +boir +pepper +badger +packer +pagesom +link +search +altavista +search +enginefind +email +adress +world +wideth +jazz +page +duan +mclaughlin +home +pageuw +athlet +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..829cdf00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,84 @@ +scott +breach +home +pagescott +breach +breach +wisc +addresseseducationresearch +interest +public +recreat +associatesaddressesscott +breachdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usatel +educationph +comput +scienc +univers +wisconsin +madison +comput +engin +carnegi +mellon +univers +advisorguri +sohiresearch +interestscomput +architecturemultiscalarpublicationsmultiscalar +processorsgurindar +sohi +scott +breach +vijaykumarnd +intern +symposium +comput +architectur +anatomi +regist +file +multiscalar +processorscott +breach +vijaykumar +gurindar +sohith +intern +symposium +microarchitectur +effici +detect +pointer +arrai +access +errorstodd +austin +scott +breach +gurindar +sohiconfer +program +languag +design +implement +recreationwingsbeersquidtvassociatestodd +austindoug +burgerbabak +falsafialain +kagit +vijaykumarlast +updat +septemb +scott +breach +breach +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..0570b69a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,91 @@ +bleed +nontrivi +waysher +temporarili +underst +page +unadorn +page +provid +section +pizza +pool +page +brief +hobbi +page +schedul +spring +stinkin +bookmark +suni +albani +fall +poor +unfortun +name +myclass +hypersensit +rockjock +cretin +brood +glare +clenchesfist +crack +knuckl +tragic +flightyfemm +get +razz +asskick +sinc +thirdgrad +perhap +smooth +skin +hardbodi +leatherboi +leer +atm +whenev +call +roll +differ +make +sinc +todayi +giggl +said +name +becam +aprostitut +societi +bigotri +pedagodi +isaac +theblack +goat +refus +stai +claw +hand +sssuuuhhh +mmuuuhhhh +dddduuuuuhhhhh +mmmmuuuhhhh +maaaahhhjaaaaaahhhhh +fffuuuhhhhh +yyyyyyyuuuuuhhhhh +mmmmmuuuuuhhhhhmmmmuuuhhhhh +uuuhhh +uuummmm +uuuhhhh +wwwwwhhhhuuuuuhhhhh +suni +albani +fall +zhang +wouldn +notic +eggleston +smile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..b4c56342 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,284 @@ +home +page +wisc +assist +professor +comput +sciencedepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usacao +wisc +eduphon +department +offic +educ +research +interest +cours +recent +paper +recent +talk +summari +collect +link +educ +princeton +univers +princeton +univers +tsinghua +univers +beij +china +research +interest +oper +system +high +perform +file +system +memori +resourc +alloc +high +perform +system +parallel +comput +research +project +optim +parallel +prefetch +cachingacf +applic +control +file +cach +prefetch +cours +research +topic +distribut +system +oper +system +fall +advanc +oper +system +spring +trace +simul +file +access +tracesrec +papersintegr +parallel +prefetch +cachingtraci +kimbrel +anna +karlin +felten +princeton +depart +tech +report +novemb +shorter +version +proceed +sigmetr +confer +applic +control +file +cach +prefetch +thesi +also +princeton +depart +tech +report +implement +perform +integr +applic +control +cach +prefetch +disk +schedulingpei +edward +felten +anna +karlin +princetonunivers +appear +toc +studi +integr +prefetch +cach +strategiespei +edward +felten +anna +karlin +princetonunivers +proceed +sigmetr +peform +implement +perform +applic +control +file +cach +edward +felten +princeton +univers +proceed +first +osdi +symposium +slide +present +osdi +applic +control +file +cach +polici +edward +felten +proceed +usenix +summer +technic +confer +tickertaip +parallel +raid +architectur +swee +boon +shivakumar +venkataraman +john +wilk +proceed +isca +recent +talksslid +applic +control +file +cach +prefetch +postscript +page +andpostscript +page +research +summarymi +research +focus +storag +manag +uniprocessor +andparallel +system +particular +investig +techniqu +improvefil +system +perform +applic +specif +replac +polici +filecach +aggress +prefetch +file +data +disk +havedevelop +system +kernel +alloc +physic +page +individualappl +applic +respons +decid +useit +physic +page +cach +prefetch +system +us +fairglob +alloc +polici +kernel +carefulli +integr +cachereplac +prefetch +disk +schedul +prototyp +implementationon +uniprocessor +system +demonstratedthat +good +applic +chosen +replac +strategi +prefetch +informationcan +significantli +improv +perform +mani +applic +current +extend +techniqu +parallel +system +amdevelop +integr +cach +prefetch +algorithm +parallel +diskarrai +addit +investig +global +resourc +managementproblem +oper +system +last +modifi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..ffecdf04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,433 @@ +mike +careymichael +careyprofessor +leav +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +research +staff +member +almaden +research +center +harri +road +jose +phone +primari +altern +mail +carei +almaden +carei +wisc +eduresearch +interestsdatabas +manag +system +parallel +distribut +comput +appli +perform +evalu +research +interest +main +area +databas +system +performanceand +next +gener +databas +system +perform +area +topicsof +current +interest +includ +perform +tradeoff +techniqu +forobject +orient +databas +system +design +evalu +algorithmsrel +transact +process +schedul +complex +multi +userdatabas +workload +base +user +specifi +perform +goal +next +gener +databas +system +area +involv +theexodu +extens +dbm +project +involv +shore +project +aimedat +develop +scalabl +repositori +storag +share +persistentobject +heterogen +environ +goal +shore +effort +whichi +build +upon +experi +exodu +project +meet +objectmanag +need +replac +unix +file +applicationssuch +case +recent +move +academia +industri +twelv +greatyear +part +becom +best +academ +databas +system +researchgroup +known +univers +time +come +tackl +anddiffer +challeng +work +almaden +research +center +thesourc +number +paper +teach +student +forth +past +twelv +year +work +relat +object +databas +signific +fraction +time +spent +rel +projectther +call +garlic +garlic +effort +build +heterogen +multimediainform +system +allow +data +live +varieti +repositori +tobe +queri +manipul +though +resid +homogen +objectdatabas +spent +sabbat +work +garlic +continuedto +work +madison +focus +graduat +student +aqueri +browser +front +tool +call +pesto +work +thegarl +project +locat +almaden +recent +public +extend +oodb +access +design +implement +experi +kiernan +proc +conf +object +orientedprogram +system +languag +applic +oopsla +austin +octob +appear +queri +multimedia +data +multipl +repositori +content +garlic +project +codi +haa +niblack +arya +fagin +flickner +petkov +schwarz +thoma +tork +roth +william +wimmer +proc +ifip +work +confer +visualdatabas +system +lausann +switzerland +march +toward +heterogen +multimedia +inform +system +garlicapproach +haa +schwarz +arya +codi +fagin +flickner +luniewski +niblack +petkov +thoma +william +and +wimmer +proc +ieee +workshop +research +issu +dataengin +ride +taipei +taiwan +march +statu +report +oodbm +benchmark +effort +withd +dewitt +kant +naughton +proc +conf +onobject +orient +program +system +languag +applic +portland +octob +toward +autom +perform +tune +complex +workload +brown +mehta +livni +proc +thint +conf +larg +data +base +santiago +chile +septemb +make +real +data +persist +initi +experi +smrc +withb +reinwald +desslock +lehman +pirahesh +srinivasan +proc +persist +object +system +workshop +tarascon +provenc +franc +septemb +shore +persist +applic +dewitt +franklin +hall +mcauliff +naughton +schuh +solomon +tsatalo +white +zwill +proc +sigmodint +conf +manag +data +minneapoli +fine +grain +share +page +server +oodbm +franklin +andm +zaharioudaki +proc +sigmod +conf +managementof +data +minneapoli +manag +memori +real +time +queri +pang +livni +proc +sigmod +conf +manag +data +minneapoli +accur +model +hybrid +hash +join +algorithm +patel +andm +vernon +proc +sigmetr +conf +measur +modelingof +comput +system +nashvil +index +altern +multivers +lock +bober +proc +conf +extend +databas +technolog +cambridg +england +march +client +server +cach +revisit +franklin +indistribut +object +manag +oszu +dayal +andp +valduriez +morgan +kaufmann +publish diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..97018a2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,42 @@ +chin +chin +tang +home +pagechin +chin +tanggradu +student +univers +wisconsin +depart +west +dayton +streetmadison +wisconsin +offic +bldg +mail +cchin +wisc +edutelephon +current +assign +introduct +data +structur +offic +hour +mondai +tuesdai +fridai +ameduc +biochemistri +univers +wisconsin +madison +biochemistri +univers +wisconsin +madison +cchin +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..9f608b0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,43 @@ +satish +chandra +home +page +satish +chandra +chandra +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +research +research +interest +summari +public +come +soon +real +soon +stuff +wodehous +book +internet +movi +databas +nostalgia +york +time +altavista +italian +languag +cultur +miscellan +linksclick +log diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..3cf0f690 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,127 @@ +trishul +chilimbi +home +page +trishul +chilimbi +chilimbi +wisc +click +real +megradu +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +laru +research +interest +program +languag +compil +architectur +parallel +comput +compil +integr +share +memori +messag +pass +parallel +program +perform +analysi +enhanc +visual +share +memori +parallel +comput +designresearch +project +wisconsin +wind +tunneleduc +univers +wisconsin +madison +tech +indian +institut +technolog +bombai +research +summari +publicationscachi +tool +automat +insert +cico +annot +trishul +chilimbi +jame +laru +intern +confer +parallel +process +icpp +august +stormwatch +tool +visual +memori +system +protocolstrishul +chilimbi +thoma +ball +stephen +eick +jame +laru +supercomput +appear +decemb +award +honor +certif +merit +state +mathemat +olympiadpresid +gold +medal +indian +nation +physic +examinationcertif +merit +state +examin +chemistrycertif +merit +state +examin +electron +miscellan +click +movi +dream +curriculum +vita +last +updat +mail +suggest +page +chilimbi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..4c186fc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,34 @@ +chandrasekaran +sashikanth +home +page +chandrasekaran +sashikanth +csashi +wisc +graduat +studentdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +mark +hill +project +educ +btech +indian +institut +technolog +madra +june +univeristi +wisconsin +depart +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..8e427c21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,145 @@ +curt +ellmann +curt +ellmann +paradis +databas +project +depart +comput +scienc +univers +wisconsin +madison +curt +wisc +item +focu +java +relat +item +paradis +home +page +paradis +develop +page +webgnat +defect +track +paradis +prototyp +index +shore +page +previou +life +doit +eosdi +relat +opengi +consortium +global +posit +system +calmit +univers +nebraska +lincoln +feder +approach +eosdi +databas +relat +object +databas +manag +group +home +page +free +databas +list +standard +home +page +transact +process +perform +council +illustra +white +papersmiscellan +sitescampu +site +wyrm +hoard +comput +scienc +wiscinfo +wiscinfo +gopher +wiscinfo +site +madison +librari +wiscnet +netcorpor +appl +microsoft +research +land +paww +commerc +metrowerk +taligentsearch +savvi +search +webcrawl +open +text +worm +network +inform +site +intern +intern +organ +standard +internet +draft +site +dilbert +world +onlin +winsock +applic +current +weather +map +dienst +dienst +implement +geolog +survei +govern +inform +locat +gil +oakridg +nation +center +comput +scienc +stock +market +datacurt +ellmanncurt +wisc +eduparadis +databas +projectdepart +comput +sciencesunivers +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..18e1ec33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,25 @@ +chee +yong +home +pagechan +chee +yong +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +email +cychan +wisc +offic +phone +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..b65d1c33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,63 @@ +sara +bauman +home +page +sara +dailei +baumandailei +wisc +edugradu +program +mathemat +comput +engin +mace +univers +wisconsin +madison +comput +scienc +depart +engin +mechan +astronaut +depart +nuclear +engin +engin +physic +educ +physic +math +comput +scienc +lewi +clark +colleg +research +work +public +current +schedul +page +link +friend +home +pagessend +mail +offic +address +univers +wisconsin +madison +comput +scienc +statist +west +dayton +street +madison +last +modifi +sara +daileytu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..9b6e391b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,633 @@ +david +wood +home +page +david +wood +david +wisc +associ +professor +comput +scienceand +electr +comput +engineeringdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usadavid +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +research +interest +comput +architectur +especi +memori +system +design +uniprocessor +multiprocessor +design +implement +program +parallel +comput +oper +system +parallel +comput +perform +evalu +tool +techniqu +especi +memori +system +analysi +vlsi +design +includ +power +design +portabl +comput +research +project +wisconsin +wind +tunnel +memori +system +perform +tool +wart +educ +univers +california +berkelei +univers +california +berkelei +current +graduat +student +babak +falsafi +steve +reinhardt +brian +toonenrec +graduat +student +rahmat +hyder +intel +alvi +lebeck +duke +univers +pfile +microsystem +mark +callaghan +informix +cours +teach +fall +introduct +comput +architecturec +machin +organ +programmingc +introduct +comput +architecturec +advanc +comput +architectur +advanc +comput +architectur +select +recent +paper +decoupl +hardwar +support +distribut +share +memorysteven +reinhardt +robert +pfile +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +coher +network +interfac +fine +grain +communicationshubhendu +mukherje +babak +falsafi +mark +hill +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +synchron +hardwar +network +workstat +perform +costrahmat +hyder +david +wood +ieee +intern +confer +supercomput +dynam +self +invalid +reduc +coher +overhead +share +memori +multiprocessorsalvin +lebeck +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +june +activ +memori +abstract +memori +system +simulationalvin +lebeck +anddavid +wood +sigmetricsmai +accuraci +perform +parallel +simul +interconnect +network +dougla +burger +david +wood +proceed +intern +parallel +process +symposium +april +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +proceed +supercomput +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jame +laru +david +wood +proceed +asplo +tempest +typhoon +user +level +share +memori +steven +reinhardt +jame +laru +david +wood +proceed +symposium +comput +architectur +cach +profil +spec +benchmark +case +studi +alvin +lebeck +anddavid +wood +page +ieee +comput +octob +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +comput +architectur +new +decemb +line +version +revis +frequent +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +research +summari +main +research +goal +develop +cost +effect +computerarchitectur +take +advantag +rapidli +chang +technolog +myresearch +program +major +thrust +evalu +perform +feasibl +correct +architectur +develop +tool +techniqu +facilit +evalu +current +research +focuss +follow +three +area +multi +paradigm +multiprocessor +effici +integr +share +memori +messag +pass +hybridprogram +paradigm +virtual +prototyp +system +exploit +similaritesof +exist +parallel +machin +simul +hypothet +parallel +machin +techniqu +understand +tune +program +perform +recent +result +includ +develop +interfac +calledtempest +user +level +protocol +handler +system +suppliedmechan +tempest +provid +mechan +allow +programm +compil +program +librari +implement +messag +pass +transpar +share +memori +hybrid +combin +tempestmechan +overhead +messag +bulk +data +transfer +virtualmemori +manag +fine +grain +access +control +novelmechan +fine +grain +access +control +allow +user +softwar +tagblock +byte +read +write +read +invalid +theloc +memori +us +transpar +cach +remot +data +explor +altern +wai +support +interfac +first +call +typhoon +propos +hardwareplatform +implement +tempest +mechan +fulli +programm +user +level +processor +network +interfac +revers +translationt +rtlb +invok +network +processor +detect +fine +grainaccess +fault +simul +typhoon +wisconsin +wind +tunnel +found +thata +transpar +share +memori +protocol +run +typhoon +performscompar +anal +hardwar +cach +coher +protocol +five +share +memoryprogram +also +develop +memori +system +simul +method +thatoptim +common +case +cach +hit +significantli +reducingsimul +time +fast +cach +tightli +integr +refer +gener +simul +byprovid +abstract +tag +memori +block +referenceinvok +user +specifi +function +depend +upon +refer +type +andmemori +block +state +simul +control +refer +processedbi +manipul +memori +block +state +specifi +special +null +functionfor +action +case +fast +cach +implement +abstract +usingbinari +rewrit +perform +tabl +lookup +memoryrefer +sparcstat +fast +cach +simul +time +tothre +time +faster +convent +trace +driven +simul +thatcal +procedur +memori +refer +simul +time +onlythre +time +slower +origin +instrument +program +also +investig +us +fast +cach +binari +rewrit +techniquesto +support +tempest +interfac +exist +hardwar +platform +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..f97fe77e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,34 @@ +doug +burger +home +page +doug +burger +home +pageprofession +inform +research +summaryresum +cvtranscriptcours +projectsadvisoraffili +project +galileo +sciwisconsin +wind +tunnelpag +maintain +comput +architectureuw +comput +architecturesimplescalar +tool +setgenericasacmperson +stuff +meus +linksphoto +galleryrid +demonhunt +damn +catsbewar +grad +school diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..551612d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,228 @@ +carl +boor +home +page +last +chang +carl +boor +professor +comput +scienc +mathematicsdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usaoffic +hour +fall +town +schoenberg +work +madison +death +email +deboor +wisc +telephon +schedul +fall +teach +look +former +present +student +select +recent +articl +approxim +theori +written +areavail +anonym +wisc +approx +read +file +provid +access +individu +file +theclick +button +clickabl +version +read +file +small +subset +author +clickabl +click +list +errata +third +edit +cont +boor +elementari +numer +analysi +algorithm +approach +list +errata +variou +print +carl +boor +practic +guid +spline +check +latest +version +thevari +program +driver +latter +book +click +journal +ofapproxim +theori +publish +academ +press +inform +journal +includ +recent +accept +publishedpap +well +email +postal +address +mani +approxim +andmuch +much +ditto +forconstruct +approxim +publish +springer +verlag +ditto +foreast +journal +approxim +search +theirtabl +content +singli +combin +thank +paul +nevai +thishandi +tool +alsoapproxim +amo +slist +homepag +approxim +spline +bibliographi +avail +link +variou +publish +journal +peopl +resourc +ila +inform +center +seek +shall +find +organ +introduct +joi +seeviva_vi +alsoon +screen +tutori +click +great +pictur +hermit +place +also +contain +us +inform +html +thehtml +primermight +even +better +unusu +ever_chang +home +page +david +griffeath +sprimordi +soup +kitchen +variou +interest +inform +seeodd +end +thank +allan +pinku +pinku +techunix +technion +paul +nevaiif +find +us +also +check +paul +nevai +makehi +mathemat +outputavail +cours +check +inform +math +click +inform +numer +analysi +hous +next +door +occupi +taki +souganid +andthaleia +zariphopoul +szego +bust +stand +look +inscript diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..09caa7cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,249 @@ +devis +home +pagedevis +environ +data +explor +visualizationt +content +featuresexamplesin +depthpublicationsrel +workreleasecontactsfeaturesthes +featur +distinguish +devis +visual +environ +visual +queri +interfac +visual +construct +oneset +data +save +appli +input +data +data +larger +memori +effici +handl +map +data +graphic +record +level +cancontrol +color +shape +individu +record +abil +queri +data +record +us +repres +graphic +flexibl +layout +mechan +within +window +help +user +group +data +togeth +comparison +asid +need +link +ax +cursor +help +compar +relationship +differ +viewsof +data +record +base +input +data +direct +input +ascii +file +integ +float +date +string +data +type +examplescheck +follow +exampl +cool +pictur +quick +introduct +devis +tree +validationmolecular +biologi +cell +imag +soil +scienc +birch +clusteringfinanci +data +explorationfamili +medicin +nation +climatedata +centergeograph +inform +systemsoil +sciencefil +serverprogram +tracesclin +scienc +mani +moreexampl +data +viewer +famili +medicin +depthfor +detail +descript +devis +model +visualizationvisu +queri +interfaceperform +issuespublicationsmiron +livni +raghu +ramakrishnan +jussi +myllymaki +visual +explor +larg +dataset +proceed +spie +confer +visual +dataexplor +analysi +januari +michael +cheng +miron +livni +raghu +ramakrishnan +visual +analysi +stream +data +inproceed +spie +confer +visual +data +explor +andanalysi +februari +raghu +ramakrishnan +michael +cheng +miron +livni +praveenseshadri +next +sequencequeri +proceed +intern +confer +themanag +data +comad +decemb +relat +workth +seqproject +complementari +devis +design +queryrecord +base +sequenc +data +output +queri +bevisu +devis +releas +informationw +current +releas +version +devis +executablesfor +solari +platform +dynam +link +need +ld_library_path +environ +variabl +appropri +rundevis +support +architectur +execut +arestat +link +requir +shareabl +librari +time +download +devis +click +contactsfor +inform +research +project +contactmiron +livni +raghu +ramakrishnan +jussi +myllymaki +guangshun +chen +kent +wenger +user +support +hotlin +send +mail +devis +usersupport +hotlin +page +access +time +sinc +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..c5787c80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,300 @@ +home +page +david +dewitt +david +dewitt +professor +romn +fellow +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +dewitt +wisc +univers +michigan +interest +object +orient +databas +system +parallel +databas +system +databasebenchmark +geograph +inform +system +research +summari +main +research +project +shore +paradis +objectiveof +shore +design +implement +evalu +persist +objectsystem +serv +need +wide +varieti +target +applicationsinclud +hardwar +softwar +system +persist +programminglanguag +geograph +inform +system +satellit +data +repositori +multimedia +applic +shore +expand +basic +capabilitiesof +wide +us +exodu +storag +manag +develop +wisconsin +fund +arpa +number +wai +includ +support +typedobject +multipl +program +languag +unix +like +hierarchicalnam +space +name +object +unix +compat +interfaceto +object +text +field +interfac +intend +toeas +transit +applic +unix +file +systemenviron +shore +exist +unix +tool +ccwill +abl +store +data +shore +object +without +modif +basic +unix +file +becom +either +singl +shore +object +orth +text +field +complex +object +shore +target +wide +rang +hardwar +environ +scale +fromindividu +workstat +heterogen +client +server +networksto +larg +multiprocessor +intel +paragon +shore +ajoint +project +prof +carei +naughton +solomon +paradis +project +attempt +appli +technolog +developeda +part +shore +gamma +project +gamma +parallel +relationaldatabas +system +develop +univers +wisconsin +thetask +store +manipul +geograph +data +set +current +mani +geograph +inform +system +relat +databasesystem +hold +data +system +excel +formanag +busi +data +poor +match +modelingne +must +capabl +store +manipulatingmuch +complex +object +polygon +polylin +instead +paradis +emploi +object +orient +data +model +provid +muchbett +match +type +need +anoth +signific +differencefrom +current +system +paradis +emploi +parallelismto +facilit +execut +process +larg +data +set +assatellit +imag +target +hardwar +platform +projecti +cluster +sparc +connect +sampl +recent +public +benchmark +withm +carei +naughton +proceed +sigmod +confer +washington +shore +persistentappl +dewitt +franklin +hall +mcauliff +naughton +chuh +tsatalo +white +zwill +proceed +sigmod +intern +conferenceon +manag +data +minneapoli +client +server +paradis +kabra +patel +proceedingsof +larg +data +base +confer +santiego +chile +august +recent +talk +vldb +invit +talk +object +relat +summit +present +page +automat +creat +januari +email +pub +wisc +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..1dfa8306 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,625 @@ +charl +dyer +home +pagecharl +dyerprofessordepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usa +mail +dyer +wisc +edutelephon +finger +infoph +univers +maryland +curriculum +vita +area +interest +comput +vision +three +dimension +shape +represent +appear +model +view +synthesi +activ +vision +visualizationgroup +leader +wisconsin +comput +vision +groupprogram +chair +cvpr +research +interest +view +synthesisth +goal +work +develop +basic +tool +controllingin +real +time +either +autonom +interact +virtual +cameraof +real +environ +input +imag +videostream +acquir +fix +mobil +camera +around +site +output +panoram +visual +scene +whicha +virtual +user +control +camera +move +environ +technolog +user +could +interact +navig +througha +real +environ +control +custom +path +view +thesit +predetermin +input +imag +main +researchquest +adapt +combin +basi +imag +synthesizenew +view +scene +without +model +scene +reconstructiona +intermedi +step +recent +develop +innovativetechniqu +callview +morph +take +basisimag +interpol +continu +rang +imag +correspond +view +linear +path +connect +camera +center +visual +explorationcomput +vision +research +recent +start +investig +howto +activ +control +imag +acquisit +process +controllingcamera +paramet +studi +purposefulli +control +theposit +orient +camera +order +dynam +adjustviewpoint +base +appear +three +dimension +scene +theus +real +time +viewpoint +control +behavior +import +forsolv +task +explor +unknown +object +order +findspecif +surfac +mark +build +global +model +unknownshap +recogn +object +coordin +simpl +observ +behavior +chang +appearanceof +surfac +well +defin +simplifi +imag +computationsrequir +make +precis +global +progress +algorithm +andelimin +need +accur +differenti +measur +thecamera +motion +believ +activ +approach +move +towardsviewpoint +close +relat +geometri +viewedobject +import +gener +us +thisapproach +develop +provabl +correct +algorithm +move +asid +view +surfac +revolut +order +recov +shape +reconstruct +global +surfac +unknown +smooth +arbitrarili +shape +object +visualizationin +area +visual +develop +map +techniquescap +gener +displai +possibl +data +object +defin +user +algorithm +without +need +user +defin +type +specificgraph +displai +procedur +capabl +displayingarbitrari +combin +algorithm +data +object +commonfram +refer +coupl +interact +control +algorithmexecut +provid +power +understand +algorithm +behavior +especi +interact +visual +experi +scientif +dataanalysi +algorithm +implement +system +call +forexperi +techniqu +us +visualizingintermedi +final +result +data +analysi +algorithm +forproblem +discrimin +cloud +satellit +imag +recent +public +seitz +dyer +cyclic +motion +analysi +us +period +trace +motion +base +recognit +shah +jain +kluwer +boston +appear +seitz +dyer +view +invari +analysi +cyclic +motion +comput +vision +appear +seitz +dyer +view +morph +proc +siggraph +seitz +dyer +toward +imag +base +scene +represent +us +view +morph +proc +conf +pattern +recognit +track +comput +vision +dyer +shape +recoveri +stationari +surfac +contour +control +observ +motion +advanc +imag +understand +festschrift +azriel +rosenfeld +ieee +comput +societi +press +alamito +kutulako +dyer +global +surfac +reconstruct +purpos +control +observ +motion +artifici +intellig +seitz +dyer +complet +scene +reconstruct +four +point +correspond +proc +conf +comput +vision +seitz +dyer +physic +valid +view +synthesi +imag +interpol +proc +workshop +represent +visual +scene +kutulako +dyer +recov +shape +purpos +viewpoint +adjust +comput +vision +kutulako +seal +dyer +build +global +object +model +purpos +viewpoint +control +proc +base +vision +workshop +kutulako +dyer +lumelski +provabl +strategi +vision +guid +explor +three +dimens +proc +ieee +conf +robot +autom +kutulako +dyer +occlud +contour +detect +us +affin +invari +purpos +viewpoint +control +proc +comput +vision +pattern +recognit +conf +seitz +dyer +affin +invari +detect +period +motion +proc +comput +vision +pattern +recognit +conf +seitz +dyer +detect +irregular +cyclic +motion +proc +workshop +motion +rigid +articul +object +hibbard +paul +battaiola +santek +voidrot +martinez +dyer +interact +visual +earth +space +scienc +comput +comput +juli +hibbard +dyer +paul +lattic +model +data +displai +proc +visual +recent +public +includ +abstract +wisconsin +comput +vision +groupcours +taught +introduct +artifici +intellig +spring +fall +comput +vision +fall +spring +current +student +gareth +bestor +brian +morgan +steve +seitz +liangyin +yuph +graduat +bill +hibbard +whibbard +macc +wisc +visual +scientif +comput +system +base +onlattic +structur +data +displai +model +kiriako +kutulako +kyro +rochest +explor +three +dimension +object +control +point +ofobserv +mark +allmen +allmen +iutech +imag +sequenc +descript +us +spatiotempor +flow +curv +toward +motion +base +recognit +brent +seal +seal +appear +model +three +dimensionalshap +machin +vision +graphic +harri +plantinga +wheaton +continu +viewer +center +object +representationfor +comput +vision +charl +stewart +stewart +connectionist +model +stereo +vision +bradlei +kjell +kjell +ccsua +ctstateu +orient +edg +separ +textur +measureslink +interestmi +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..55e5c771 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,24 @@ +welcom +friend +welcom +machinew +appreci +patienc +long +arduou +task +bring +better +home +page +check +educ +curriculum +vitaecheck +class +teach +home +page +section +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..dbe649ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,212 @@ +eric +hazen +home +pageer +hazennon +professorroom +comput +scienc +west +dayton +madison +current +work +fornet +scout +servic +project +intern +wait +wait +part +registr +servic +couldn +help +domain +name +problem +even +want +intern +neat +thing +monei +scout +thing +locat +comput +scienc +depart +univers +ofwisconsin +madison +see +could +never +explain +scout +elegantli +fearless +leader +susan +calcari +check +scout +page +offici +explan +design +mainten +site +email +list +scout +report +also +assist +pete +devri +withtech +support +mac +unix +machin +come +scout +spent +half +year +laboratori +molecular +biologi +time +full +time +student +well +molecular +biologi +call +technic +support +assist +digit +video +specialist +meant +around +fix +peopl +broken +mice +answer +email +question +time +lucki +make +cool +video +interest +us +digit +video +instruct +purpos +check +multimedia +servic +page +graduat +univers +wisconsin +philosophi +program +make +philosoph +real +commod +capitalist +societi +also +complet +requir +curriculum +make +shameless +pragmatist +comput +scienc +depart +email +talk +sinc +good +metaphys +discuss +link +serv +practic +purpos +makethi +page +look +standard +resum +date +multimedia +page +molecular +biologi +explain +made +anim +shown +world +among +drosophila +geneticist +told +wonder +girlfriend +page +salon +magazin +entertain +inform +ezin +creat +complet +mac +lauri +anderson +green +room +shockwav +check +kudon +page +link +know +wit +quicktimevr +documentari +plight +bosnia +uproot +popul +billi +holidai +homepag +nation +secur +archiv +check +nixon +preslei +meetingsejhazen +facstaff +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..ddb85c34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,16 @@ +tina +eliassi +home +pagetina +eliassi +univers +illinoi +urbana +champaign +univers +wisconsin +madison +offic +bldgphone +eliassi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..aa4d4a9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,118 @@ +eric +home +page +passsth +anoth +cold +budweisth +address +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +depart +electr +comput +engin +univers +wisconsin +madison +johnson +drive +madison +offic +phone +mail +ericro +wisc +research +area +comput +architectur +advisor +professor +smithresearch +topic +kestrel +multiscalar +project +instruct +level +parallel +high +bandwidth +instruct +fetch +mechan +branch +predict +confid +branch +mispredict +tolerancepubl +trace +cach +latenc +approach +high +bandwidth +instruct +fetch +eric +rotenberg +steve +bennett +jame +smith +appear +proceed +annual +intern +symposium +microarchitectur +decemb +assign +confid +condit +branch +predict +erik +jacobsen +eric +rotenberg +jame +smith +appear +proceed +annual +intern +symposium +microarchitectur +decemb +trace +cach +latenc +approach +high +bandwidth +instruct +fetch +eric +rotenberg +steve +bennett +jame +smith +univers +wisconsin +madison +technic +report +april +resum diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..ab1194a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,95 @@ +babak +falsafi +home +page +babak +falsafi +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usatel +email +falsafi +wisc +work +peopl +mentorcultresearch +interest +comput +architectur +perform +evalu +measur +comput +system +parallel +program +modelseduc +comput +scienc +univers +wisconsin +madison +decemb +comput +scienc +suni +buffalo +june +electr +comput +engin +suni +buffalo +june +miscellan +public +would +rather +drink +would +rather +would +rather +read +would +rather +listen +us +high +school +idea +like +fail +morf +shubu +dionosi +hillari +profan +phone +convers +check +american +french +queen +comput +architect +look +like +hack +partner +crime +next +gener +parallel +comput +last +updat +babak +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..2e138d05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,150 @@ +home +page +michael +ferri +michael +ferri +associ +professor +comput +scienc +industri +engineeringand +member +center +mathemat +scienc +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +telephon +email +ferri +wisc +univers +cambridg +interest +theori +algorithm +applic +mathemat +program +research +summari +look +robust +method +solv +larg +scale +variationalinequ +nonlinear +program +problem +applic +toproblem +econom +engin +pivot +path +followingtechniqu +investig +base +success +linear +emphasi +numer +properti +larg +scale +problem +andinterfac +model +languag +particular +applic +beingconsid +includ +econom +equilibria +effect +taxat +oncarbon +emiss +traffic +congest +effect +toll +structur +optim +contact +problem +chemic +process +design +consid +parallel +architectur +solvingproblem +nonlinear +optim +graph +partitioningtechniqu +determin +underli +structur +investig +tool +gener +purpos +parallel +optim +techniqu +forexploit +parallel +machin +directli +within +model +system +arealso +consider +prototyp +us +condor +system +extens +complementar +framework +also +beinginvestig +emphasi +identifi +exploit +underlyingmodel +structur +public +complet +list +paper +mostli +electron +avail +relev +link +cpnet +complementar +problem +mathemat +prgram +home +pagec +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..2fc1ff40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,335 @@ +david +finton +home +page +david +finton +finton +wisc +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +welcom +page +grad +student +research +nerdin +artifici +intelligenceher +univers +wisconsin +madison +grew +grand +rapid +michigan +late +show +home +offic +earn +degre +math +michigan +state +master +comput +scienceher +dissert +institut +take +littl +year +develop +traffic +measur +softwarefor +first +thesi +advisor +left +wisconsin +trusti +nextstationor +librari +enjoyplai +trumpet +piano +listen +longhair +music +plai +volleybal +intervarsityfolk +contribut +supersoak +arm +race +comment +page +feel +free +comment +form +send +mail +finger +accountto +current +plan +whether +system +gain +employ +introduct +artifici +intellig +current +project +comput +smart +understand +make +machin +intelligenti +goal +artifici +intellig +essenc +intelligencei +abil +learn +adapt +learn +actappropri +order +reach +goal +reinforc +learn +treat +problem +gener +case +system +output +control +action +chang +environ +input +sens +environ +also +input +reinforc +weak +kind +feedback +express +posit +neg +number +instead +teacher +present +thesystem +input +output +pair +system +instead +receiv +thumb +thumb +irregular +interv +work +focuss +need +todistinguish +good +action +on +direct +process +build +agood +represent +environ +term +relev +orimport +featur +note +import +basedfeatur +extract +current +appli +notion +import +problem +learn +balanc +need +explor +world +need +perform +optim +explor +exploit +also +investig +wai +us +import +make +learningprocess +effici +allow +system +specifi +start +point +learn +experi +activ +learn +goal +develop +better +understand +intelligentadapt +hope +provid +basi +intellig +action +whichwil +also +benefit +knowledg +base +task +base +work +realli +date +sorri +reinforc +learn +pagefor +inform +hotlistthi +browser +independ +hotlist +keep +copi +access +browser +platform +combin +actual +bookmark +file +omniweb +eleg +function +browser +netscap +opinion +omniwebi +current +avail +nextstep +avail +foral +openstep +variant +openstep +releas +editori +page +responseto +jehovah +wit +deiti +christwisconsin +site +intervars +graduat +fellowship +univers +wisconsin +madison +check +weatherin +citi +madison +wisc +star +trek +page +program +inform +link +page +isthmu +daili +pagesom +favorit +place +visit +nebula +nasa +pictur +world +wide +studi +bibl +crosssearch +minor +glenn +gould +homepag +farsid +daili +star +trek +star +trek +voyagerent +dilbert +zoneroam +world +virtual +tourist +stereogram +tell +head +blow +true +next +head +show +think +bill +gate +word +sponsor +last +modifi +octob +finton +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..b06df88e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,412 @@ +home +page +charl +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspcharl +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspprofessor +comput +scienc +nbsp +nbsp +nbsp +nbsp +nbsp +nbspunivers +wisconsin +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +messag +email +fischer +wisc +teach +semest +teachingc +graduat +cours +compil +spring +teachingc +undergradu +cours +compil +research +research +interest +focu +compil +design +implement +recent +interest +best +exploit +enormouscap +provid +modern +comput +architectur +issu +student +investig +includ +code +schedul +import +avoid +unnecessari +pipelin +delai +haveinvestig +issu +optim +schedul +arithmet +express +domin +model +global +procedur +level +regist +alloc +graph +color +best +model +mayb +model +explicitli +quantifi +likelihood +benefit +registerresid +attract +care +regist +alloc +vital +gener +code +unnecessari +loadsand +store +must +avoid +ultim +regist +alloc +interprocedur +regist +alloc +theprocedur +program +analyz +studiedinterprocedur +alloc +modelsthat +optimallyalloc +regist +among +procedur +polynomi +time +approach +seem +effect +practic +anyon +code +know +easi +make +mistak +error +involv +pointer +arrai +indic +especi +common +studi +wai +toautomat +check +pointer +arrai +oper +time +share +memori +multiprocessor +workstat +us +processor +program +anoth +monitor +program +possibl +routin +check +program +execut +littl +orno +appar +slowdown +recent +public +minimum +cost +interprocedur +regist +alloc +steven +kurland +charl +fischer +popl +sigplan +sigact +symposium +principl +programminglanguag +januari +cost +concurr +check +pointer +arrai +access +program +harish +patil +charl +fischer +appear +insoftwar +practic +experi +demand +driven +regist +alloc +todd +proebst +charl +fischer +appear +inacm +transact +program +languag +system +effici +instruct +schedul +delai +load +architectur +steven +kurland +todd +proebst +charl +fischer +transact +program +languag +system +zero +cost +rang +split +steven +kurland +charl +fischer +sigplan +confer +program +languag +design +implement +june +activitiesa +complet +revis +second +edit +craft +compil +author +cytronand +richard +leblanc +almost +complet +publish +benjamin +cum +look +soon +better +bookstor +everywher +short +commun +editor +transact +program +languag +system +topla +educationph +cornel +univers +pars +context +free +languag +parallel +environ +supervis +john +william +studentsdonn +milton +syntact +specif +analysi +attribut +grammar +juli +bruce +rowland +semant +attribut +evalu +syntact +analysi +juli +stephen +skedzielewski +definit +attribut +reevalu +attribut +grammar +septemb +bernard +dion +local +least +cost +error +corrector +context +free +context +sensitivepars +decemb +mahadevan +ganapathi +retarget +code +gener +optim +us +attribut +grammar +novemb +vimal +begwami +approach +attribut +evalu +error +correct +compil +august +maunei +least +cost +syntact +error +correct +us +extend +right +context +januari +gregori +johnson +context +sensit +attribut +flow +august +anil +gener +execut +facil +integr +program +environ +decemb +william +winsborough +automat +transpar +parallel +logic +program +compil +time +august +venkatesh +framework +specif +implement +program +analysi +algorithm +august +todd +proebst +code +gener +techniqu +august +steve +kurland +approach +interprocedur +regist +alloc +januari +harish +patil +effici +program +monitor +techniqu +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..8c2adf0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,547 @@ +galileo +home +page +galileo +project +wisconsint +contentsgalileoproject +descriptionpublicationsrel +projectssci +wisconsinproject +descriptionpublicationsproject +membersgalileo +wisconsingalileo +project +conduct +comput +architectur +groupat +univers +wisconsin +madison +project +focus +medium +long +term +evolut +processor +system +architectur +emphasison +memori +system +specif +studi +therelationship +processor +main +memori +futuresystem +complet +separ +todai +integr +extent +process +capabl +storag +merg +least +wai +increas +chip +penalti +issuabl +instruct +orlimit +chip +bandwidth +design +place +capacityon +processor +chip +modul +eventu +sizabl +fractionof +main +memori +resid +chip +repres +arrow +label +mopin +diagram +differ +possibl +migrat +ofprocessor +capabl +onto +dram +chip +eventuallyobvi +central +processor +iram +arrow +specif +research +current +focus +follow +area +examin +perform +impact +larg +memori +latenc +andlimit +memori +bandwidth +current +futur +microprocessor +base +systemsperform +model +variou +design +point +along +theprocessor +memori +integr +spectrumcach +hierarchi +design +integr +systemsdesign +main +memori +bank +integr +systemprogram +execut +system +multipl +integr +chip +datascalar +architectur +galileo +specif +public +exploit +optic +interconnect +elimin +serial +bottlenecksdoug +burger +jame +goodman +appear +intern +confer +massiv +parallel +process +us +optic +interconnect +octob +datascalar +architectur +spsd +execut +modeldoug +burger +stefano +kaxira +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +quantifi +memori +bandwidth +limit +current +futur +microprocessorsdoug +burger +jame +goodman +alain +appear +intern +symposium +comput +architectur +declin +effect +dynam +cach +gener +purpos +microprocessorsdougla +burger +jame +goodman +alain +univers +wisconsin +madison +comput +scienc +depart +technic +report +januari +relat +project +iram +berkeleyppram +kyushu +univeristi +japansci +wisconsinour +group +also +close +involv +research +relat +cach +coherentshar +memori +multiprocessor +design +specif +studi +scalabl +coherentinterfac +coher +transport +layer +us +ieee +standard +base +platform +explor +idea +standard +specifi +link +list +base +hardwar +coher +protocol +includ +support +effici +synchron +primit +queue +lock +qolb +aswel +optim +differ +share +pattern +pairwis +share +fresh +read +share +standard +also +includ +definitionfor +extrem +high +bandwidth +latenc +transport +layer +betweenprocess +element +individu +cluster +current +perform +relat +research +follow +topic +extend +logarithm +grow +share +structureseffici +hardwar +synchron +share +memori +multiprocessorsa +scalabl +studi +base +protocol +includ +standard +extensionsaggress +consist +model +share +memori +multiprocessorswisconsin +public +mechan +minim +synchron +overhead +share +memori +applic +appear +best +architectur +paper +proceed +intern +confer +supercomput +juli +also +univers +wisconsin +madison +comput +scienc +depart +simul +transport +layer +wisconsin +wind +tunneldougla +burger +jame +goodman +proceed +second +intern +workshop +base +high +perform +cost +comput +march +also +univers +wisconsin +madison +comput +scienc +depart +technic +report +march +hierarch +extens +scijam +goodman +stefano +kaxira +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +extend +scalabl +coher +interfac +larg +scale +share +memoryross +evan +johnson +univers +wisconsin +madison +comput +scienc +depart +technic +report +februari +hardwar +support +synchron +scalabl +coher +interfac +nagi +aboulenein +stein +gjess +jame +goodman +philip +woest +univers +wisconsin +madison +comput +scienc +depart +technic +report +novemb +interconnect +topolog +point +point +ringsross +johnson +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +decemb +analysi +ringsteven +scott +jame +goodman +mari +vernon +univers +wisconsin +madison +comput +scienc +depart +technic +report +novemb +lower +bound +latenc +scalabl +link +list +cach +coherenceross +johnson +univers +wisconsin +madison +comput +scienc +depart +technic +report +june +analysi +synchron +mechan +share +memori +multiprocessorsphilip +woest +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +februari +effici +synchron +primit +larg +scale +share +memori +multiprocessorjam +goodman +mari +vernon +philip +woest +proceed +third +intern +confer +architectur +support +program +languag +oper +system +april +also +univers +wisconsin +madison +comput +scienc +depart +technic +report +project +particip +faculti +goodman +graduat +student +doug +burger +alain +stefano +kaxira +project +alumni +nagi +abouleneinross +johnsonstev +scottlast +modifi +doug +burger +dburger +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..156c7118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,38 @@ +andrew +geeri +home +page +andrew +geeri +geeri +wisc +west +dayton +street +madison +regent +madison +madisonin +comput +scienc +current +work +compsci +grade +schedul +pontif +peopl +interest +jacqu +derrida +post +structur +martin +heidegg +albert +camu +jean +paul +sartr +friedrich +nietzsch diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..0cacf901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,167 @@ +gideon +glass +homepag +continu +tweak +homepag +thank +stop +collect +imag +find +deposit +directori +sampl +sampl +pictur +gui +claim +cooler +accord +toonion +seethi +movi +usual +suspect +walk +nearest +blockbust +note +recent +move +none +roommat +dutch +cheap +either +given +predica +dismal +prospect +improv +withno +outsid +influenc +pleas +consid +make +small +donationto +help +defrai +cost +purchas +check +monei +orderscan +sent +follow +address +pleas +send +cash +gideon +glass +monro +floor +madison +usathank +support +grad +student +sometim +find +read +paper +eventhough +shelf +feet +unread +book +wait +read +anyhow +look +someth +christian +achil +huge +index +might +also +unifi +cstechreport +index +class +project +report +otherstuff +avail +follow +program +machin +load +averagewil +grow +fast +main +fork +doofu +actual +time +share +machin +back +calvin +great +time +killer +zippi +pinheadha +reload +sever +time +justtri +last +fall +kill +time +tweak +netscap +noth +think +work +mozilla +higher +well +dabbl +object +orient +programmingin +mostli +exercis +suppos +netscap +buttonher +thing +right +suffic +case +told +somethingin +bookmark +denni +ritchi +creator +unix +wrote +anti +forward +unix +hater +handbook +send +mailand +mayb +mayb +pleas diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..1d83c8bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,608 @@ +andi +krazi +glew +univers +wisconsin +home +pageandi +krazi +glew +univers +wisconsin +home +pagethi +andi +first +wisconsin +home +page +largelyform +snippet +intel +home +page +stylishor +organ +summarycontact +touch +ship +thing +addressescalendar +arrang +meet +filesystem +access +filesystem +glew +public +html +http +wisc +glew +approach +anyon +read +file +unless +specif +arrang +open +default +scribe +minut +taker +sohi +research +group +weekli +meet +research +interest +rather +gener +form +adapt +applic +dynam +versu +static +comput +architectur +wannab +author +comput +architectur +high +level +edit +thought +higher +educ +suggest +effect +work +patent +claim +fame +miscellan +info +stuff +self +imag +configur +manag +resum +favorit +quot +sai +self +imag +beef +montreal +peopl +care +pope +priest +parson +king +william +boyn +want +coin +trust +summer +time +keep +harm +see +first +frost +snow +poor +trust +almighti +dollar +good +mapl +wood +keep +bellow +warm +church +chapel +ranter +preacher +beecher +stuff +montreal +alreadi +enough +beef +keeper +montreal +harbour +deplor +churchmen +notori +atheist +less +well +known +chariti +strand +sailor +knew +could +alwai +beef +exchang +chop +chord +firewood +meal +warm +place +sleep +print +manifesto +handbil +advertis +comput +architect +hackeralthough +aspir +comput +architect +someth +formerlyhad +fake +motorola +busi +card +ever +sinc +start +work +comput +first +thing +learn +assembl +redesign +chip +start +unix +kernel +hacker +gould +real +time +unix +andstil +think +wistfulli +hacker +beard +frequent +wear +suspend +bald +comput +architectureonc +futur +involv +microarchitectur +intel +pentium +adopt +intel +resum +wannab +author +comput +architecturei +constantli +verg +write +book +entitl +comput +architectur +grabbag +trick +techniqu +sort +antidot +hennessi +patterson +cannot +afford +diskspac +internet +servic +provideror +charg +alwai +connect +system +suggest +appreci +interest +piec +comput +architectureon +best +wai +learn +comput +architectur +read +textbook +datasheet +instruct +refer +miscellan +info +wander +netscapebookmarksstockscod +standardsroi +wilkinson +code +standardsi +disagre +mani +configur +manag +although +perhap +quit +first +real +quickli +defunct +startup +compani +call +enfopris +build +busi +workstat +first +chang +assign +unix +driver +writingto +system +integr +longstand +love +hate +relationship +configurationmanag +tool +like +scc +first +publish +paper +box +link +parallel +tree +element +configur +manag +system +first +usenix +workshop +softwar +manag +describ +central +databas +multipl +view +hardlink +clone +save +space +time +us +gould +comput +system +divis +unix +team +brian +berlin +deprec +approach +paper +mainli +advoc +optimist +concurr +control +approach +wherea +thought +advoc +lock +actual +advoc +optimist +concurr +control +also +advoc +lock +case +optimist +version +get +livelock +usual +insist +singl +identifi +serial +schedul +sourc +code +checkinsso +test +proce +linear +manner +requir +programm +test +code +work +system +previou +fix +appli +although +recogn +even +requir +relax +often +strip +version +approachin +thing +like +apolog +never +creat +truli +portabl +tool +accomplish +us +similar +approach +although +mike +fetterman +mark +aitken +deserv +credit +enhanc +sever +featur +went +notabl +version +number +becam +overal +suffici +everyth +includ +cshrc +login +even +page +calendar +wisconsinhow +arrang +meet +sinc +seem +ubiquit +calendar +schedul +programat +univers +wisconsin +depart +variou +peopl +cmtool +public +domain +ical +plan +critic +mass +anyof +calendar +isol +us +pilot +associ +softwar +least +mean +meet +get +pilot +meet +commit +least +commit +possibl +also +record +meet +us +voic +organ +therefor +arrang +meet +must +touch +prefer +email +possibl +phone +person +manuallyadd +meet +calendar +creat +download +calendar +pilot +softwareto +page +creat +download +calendar +pilot +softwareto +microsoft +watch +intelat +intel +devout +user +group +schedul +program +last +time +intel +synchron +unix +also +ontim +past +weak +disconnect +oper +email +oper +least +allow +major +peopl +schedul +meetingswith +without +manual +intervent +intel +schedul +algorithm +access +andi +calendar +us +synchron +also +tell +andi +meet +email +person +reserveth +right +meet +blindli +invit +cannot +synchron +urgent +make +meet +andi +admin +teresa +lock +synchron +possibl +check +andi +calendar +page +proposeif +meet +urgent +week +futur +avoid +bother +teresa +send +andi +email +realiz +andi +miss +meet +sent +email +enough +advanc +overallschedul +calendar +like +topic +someth +fascin +bring +effici +advantag +person +secretariesand +aid +camp +comput +user +header +wisc +glew +public +html +glew +html +glew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..9c96b883 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,28 @@ +goodman +home +page +jame +goodman +goodman +wisc +professor +comput +sciencesdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaresearch +interest +lot +good +stuff +current +project +galileo +wisconsin +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..81e25fd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,540 @@ +greg +sharp +home +pagegreg +sharp +home +pagenam +greg +sharpemail +greg +wisc +eduoffic +offic +phone +offic +hour +appt +tgif +everi +fridai +dept +section +instructor +fall +lectur +note +spring +lectur +notesclass +fall +topic +databas +manag +system +find +aboutsearch +engin +altavista +dejanew +excit +infoseek +lyco +metacrawl +yahoo +usenet +mirror +html +format +ohioc +program +learn +todai +librari +draft +standard +april +mirror +stanford +cygnu +info +refer +mumit +newbi +guideplatform +independ +librari +portabl +develop +kit +amulet +dclap +string +attach +requir +motif +suit +string +attach +wxwindow +yacl +class +spring +introduct +comput +geometri +comput +architectur +comput +architectur +project +machin +learn +machin +learn +projectclass +fall +numer +linear +algebra +introduct +theoret +comput +scienc +comput +vision +imag +homework +comput +vision +projectmisc +freewar +sharewar +info +cygwin +softwar +internet +directori +gimp +harmonai +harmoni +unix +client +hyper +hyper +browser +imag +databas +vasc +imag +databas +altern +link +video +pic +vision +research +rang +imag +databas +shape +shade +pic +rang +imag +databas +shape +shade +pic +give +link +imag +databas +imag +databas +pretti +cool +idea +specifi +section +night +imag +japan +imag +databas +jaida +year +worth +atmospher +data +imag +multiresolut +seamless +imag +databas +cool +click +zoom +resolut +view +solar +system +nice +pic +moon +planet +comet +meteor +wyom +imag +databas +version +pic +wyom +also +lot +misc +stuff +like +pic +planet +overhead +shot +antarctica +satelit +imag +catalogu +niae +satelit +pic +japan +gothic +imag +databas +electr +postcard +card +rack +nice +select +thank +link +todd +vistex +textur +databaseartifici +gener +imag +primoridi +soup +kitchen +math +depart +awesom +site +medic +imag +databas +line +imag +histori +medicin +document +pictur +diagon +line +dermatolog +onlin +imag +atla +base +erlang +imag +databas +germani +mirror +japan +pretti +cool +idea +enter +diagnosi +back +imag +orthopaed +imag +databas +nice +databas +ecvnet +imag +data +base +list +link +optic +charact +recognit +handwrit +recognit +home +page +nici +handwrit +recognit +groupimag +process +home +page +imag +process +home +page +washington +state +univers +imag +librari +softwar +comput +graphic +softwar +raytrac +home +page +rayshad +utah +raster +toolkit +radianc +tracer +radios +packag +avalon +archiv +object +stuff +tracer +mirror +grimstead +massiv +list +trace +dsite +refer +site +comput +graphic +hardwar +graphic +board +intergraph +lockhe +glint +chipset +nvidia +chipsetcomput +geometeri +geometri +center +applic +challeng +comput +geometrylispuseless +pagescomput +architectur +comput +architectur +home +page +hennessi +patterson +resourc +text +superdlx +parallel +comput +simul +parl +mexico +state +univ +includ +databas +trace +architectur +link +univ +washingt +architectur +group +index +simul +georgia +tech +architectur +groupjapanes +comput +guid +japanes +comput +unvers +washington +monash +archiv +index +japanes +comput +stuff +infowav +edict +window +english +japanes +japanes +english +dictionari +shodouka +asiasoftinform +retrev +peregrin +robot +travers +index +written +perl +trec +text +retriev +home +page +infomin +manag +gigabyt +freewar +search +engin +text +imag +textual +imag +provid +info +internet +experi +thoma +thoma +feedback +linguist +util +repositori +inform +retriev +tool +survei +natur +languag +process +inform +retriev +nist +comput +languag +index +softwar +other_sw +info_retriev +world +wide +robot +wander +spider +jedi +project +might +strictli +relat +hartlib +paper +project +latin +stemmer +inform +retriev +multimedia +retriev +group +academ +group +inform +retriev +system +relat +cours +inform +storag +retriev +relat +cours +includ +histori +inform +retriev +relat +cours +new +pointcast +check +custom +portfolio +automat +updat +tool +literatur +mark +twainhumor +apolog +citizen +offens +link +site +threw +link +garbag +belong +invest +investorweb +networth +fundscap +brill +editori +servic +stockmastermutu +fund +brokerag +hous +fidel +invest +vanguard +row +price +jack +white +compani +schwab +charl +schwab +gabelli +fund +mutualsmisc +page +psnuplast +modifi +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..64572acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar +gopalsridhar +gopalgsri +wisc +edubon +marrow +pageresumest +wisconsin +pagecalvin +hobbesbookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..c6cf7e66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,82 @@ +guangshun +home +page +welcom +guangshun +home +page +graduat +student +depart +comput +scienc +dayton +univers +wisconsin +madison +madison +phone +home +offic +educ +univers +wisconsin +madison +california +state +univers +angel +peke +univers +physic +depart +grade +research +interest +databas +manag +system +advis +raghu +ramakrishnan +miron +livni +project +data +analysi +project +famili +medicin +devis +data +explor +visual +environ +class +interest +link +stuff +comput +relat +career +plan +chines +relat +miscellani +send +email +send +email +around +weather +forecast +madison +visitor +sinc +june +visitor +number +sinc +visitor +number +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..e4bd21a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,53 @@ +guhan +viswanathan +home +page +guhan +viswanathan +gviswana +wisc +graduat +studentdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +laru +thesi +research +thesi +focus +design +implement +data +parallellanguag +involv +design +data +parallellanguag +base +develop +local +implement +compil +target +investig +data +parallelappl +execut +effici +hand +code +parallelprogram +amor +detail +research +summari +list +public +us +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..b9a209ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,97 @@ +harit +home +page +harit +graduat +student +univers +wisconsin +madison +depart +comput +scienc +would +like +list +classmat +cours +take +fall +databas +manag +system +prof +raghu +ramakrishnan +advanc +comput +architectur +prof +mark +hill +cours +take +spring +advanc +comput +architectur +prof +jame +goodman +undergradu +student +world +famou +mvsr +engin +colleg +osmania +univers +hyderabad +india +meet +draw +line +thing +interest +indian +newspap +stuff +sport +sastri +link +roommat +home +page +saeed +mirza +murthi +link +zubber +dust +photo +photograph +univers +warn +click +year +folk +page +access +time +sinc +sept +counter +courtesi +counter +page +access +time +sinc +sept +electron +mail +mail +address +harit +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..7eca51cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,55 @@ +rebecca +hasti +home +page +rebecca +hasti +graduat +student +research +assistantcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +hasti +wisc +edutelephon +telephon +dept +first +java +applet +click +fall +schedul +engr +java +noland +seminar +comput +scienc +univers +wisconsin +madison +mathemat +univers +wisconsin +madison +mathemat +carleton +colleg +interest +program +languag +basketbal +volleybal +softbal +linkag +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..fc101a2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,140 @@ +home +page +chad +lane +academ +person +info +neat +stuff +dept +comput +scienc +univers +wisconsin +madison +west +dayton +madison +dept +phone +mail +offic +address +offic +hour +offic +phone +home +phone +wisc +welcomethank +stop +page +hope +enjoi +bestbet +link +stuff +section +biggest +new +life +right +get +marri +onmai +nichol +final +want +tell +good +luck +count +academ +fall +cours +inform +retriev +technolog +seek +inform +databas +manag +system +ling +audit +advanc +semant +research +interest +comput +linguist +discours +process +us +advic +research +barwis +epigram +program +alan +perli +educ +mathemat +comput +scienc +minor +philosophi +laud +truman +state +univers +formerli +northeast +missouri +state +univers +comput +scienc +expect +univers +wisconsin +madison +person +inform +stand +neat +stuff +accord +truli +click +imag +cyber +poop +creation +unabash +brother +bart +arthur +lane +download +claud +claud +psychot +program +talk +rais +plant +internet +deep +thought +jack +handi +reload +differ +on +last +modifi +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..dad5c60c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,196 @@ +susan +hert +home +page +susan +hert +hert +wisc +research +assist +depart +comput +scienc +univers +wisconsin +madison +dayton +madison +telephon +curriculum +vita +postscript +research +interest +public +softwar +interest +link +research +interest +appli +experiment +comput +geometri +analysi +algorithm +design +motion +plan +algorithm +comput +graphic +geometr +algorithm +advisor +vladimir +lumelski +current +work +robot +develop +motion +plan +alogirthm +multipl +robot +common +environ +select +public +susan +hert +vladimir +lumelski +deform +curv +plane +tether +robot +motion +plan +extend +abstract +paper +appear +proceed +canadian +confer +comput +geometri +august +susan +hert +vladimir +lumelski +planar +curv +rout +tether +robot +motion +plan +appear +intern +journal +comput +geometri +applic +susan +hert +vladimir +lumelski +ti +bind +motion +plan +multipl +tether +robot +robot +autonom +system +version +paper +publish +proc +ieee +intern +confer +robot +autom +susan +hert +sanjai +tiwari +vladimir +lumelski +terrain +cover +algorithm +appear +journal +autonom +robot +special +issu +autonom +underwat +robot +susan +hert +vladimir +lumelski +move +multipl +tether +robot +arbitrari +configur +proc +intern +confer +intellig +robot +system +august +susan +hert +reznik +simul +librari +basi +anim +program +version +technic +report +univers +wisconsin +madison +robot +laboratori +juli +interest +link +comput +geometri +page +comput +scienc +educ +link +book +refer +shelf +librari +congress +line +book +page +travel +samantha +cook +epicuri +veggi +unit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..00be66d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,64 @@ +kirk +hogenson +kirk +hogenson +graduat +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +wisc +edutelephon +kirk +hogenson +graduat +student +madison +madison +wisconsin +depart +section +offic +hour +tue +also +look +myschedul +none +offic +hour +workout +mail +tryto +appoint +time +finger +send +mail +visit +ghana +countri +serv +peac +corp +usernam +check +pnhp +student +group +page +maintain +wife +eilun +experi +counter +sai +accessedtim +sinc +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..b1e5c1b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,15 @@ +jeffrei +horn +jeffrei +horn +swanton +road +madison +wisconsin +phone +email +horn +wisc +wise +linear +familyemploymenteducationresearchgenealog diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..456fb2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,244 @@ +susan +horwitzsusan +horwitzprofessorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usa +mail +horwitz +wisc +telephon +secretari +depart +cornel +univers +research +interest +languag +base +program +environmentsprogram +slice +differenc +mergingstat +analysi +programsinterprocedur +dataflow +analysisresearch +summarymi +work +mainli +involv +design +implementationof +languag +base +program +tool +help +programm +problem +like +understand +exist +program +work +would +affectedbi +propos +modif +understand +textual +structur +semant +differ +betweentwo +version +program +retest +program +chang +combin +piec +program +produc +program +certainsemant +guarante +work +involv +program +represent +call +theprogram +depend +graph +oper +call +slice +also +work +algorithm +precis +interproceduraldataflow +analysi +previou +work +interprocedur +dataflow +analysi +mainli +concentratedeith +effici +algorithm +specif +individu +problem +necessarili +effici +algorithm +gener +class +problem +thoma +rep +mooli +sagiv +develop +implement +newalgorithm +effici +appli +larg +class +problem +recent +publicationsm +shapiro +horwitz +fast +accur +flow +insensit +point +analysi +appear +confer +record +twenti +fourth +symposium +onprincipl +program +languag +pari +franc +januari +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +proceed +sigsoft +symposium +foundat +softwareengin +washington +octob +sagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +constantpropag +proceed +sixth +intern +joint +confer +theoryand +practic +softwar +develop +aarhu +denmark +rep +sagiv +horwitz +precis +interprocedur +dataflow +analysi +graph +reachabl +confer +record +twenti +second +symposium +principlesof +program +languag +francisco +januari +bate +horwitz +increment +program +test +us +program +depend +graph +confer +record +twentieth +symposium +principlesof +program +languag +charleston +januari +horwitz +rep +program +depend +graph +softwar +engin +proceed +fourteenth +intern +conferenceon +softwar +engin +melbourn +australia +horwitz +identifi +semant +textual +differ +version +aprogram +proceed +sigplan +confer +program +languagedesign +implement +white +plain +june +teach diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..0c29ed6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid +pagenam +sidnei +hummertoffic +phone +offic +email +hummert +wisc +edua +postscript +version +resum +pictur +click +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..3a830729 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,54 @@ +igor +ivanisev +home +pageigorivanisev +work +newest +project +disclaim +alien +speak +alien +particular +needless +page +construct +ever +feel +like +actual +construct +alreadi +link +mail +research +interest +robot +vision +stuff +generalgradu +slave +univers +wisconsin +departmentwa +undergrad +drake +univers +math +departmentaddress +comput +scienc +departmentunivers +wisconsin +west +dayton +streetmadison +offic +phone +home +phone +mail +iigor +wisc +eduiigor +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..27ce8d3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..a3def617 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,47 @@ +sharenow +home +page +sharenow +wisc +teach +assist +peterson +comput +scienc +depart +univers +wisconsin +madisonmadison +offic +comput +sciencestelephon +offic +hour +section +tuesdai +thursdai +pmsection +meet +section +meet +pmboth +class +meet +room +comput +sciencesc +announcementshandoutsmoth +jone +profil +sharenow +recreat +site +pleas +send +email +comment +last +modifi +tuesdai +septemb +sharenow diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..136d4a78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,49 @@ +basneyjim +basneygradu +student +research +assistantcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +email +jbasnei +wisc +eduoffic +comput +scienc +statisticsoffic +phone +research +interest +area +oper +system +andnetwork +current +work +condor +directionof +prof +miron +livni +receiv +fromoberlin +colleg +comput +scienc +english +webpag +oberlin +resum +codefrom +previou +project +avail +onlin +last +modifi +basnei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..f570b2bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,96 @@ +jerel +mackai +home +pagejerel +mackai +assist +research +comput +scienc +special +databas +oper +system +work +fulltim +comput +system +univers +wisconsin +madison +comput +scienc +depart +respons +includ +develop +support +sybas +ingr +databas +instal +backup +softwar +train +student +hourli +plai +electr +guitar +thrash +metal +specialti +also +violin +classic +baroqu +mainli +seen +error +evil +wai +click +shock +case +didn +believ +ey +first +time +work +like +record +mostli +funni +cover +stuff +abba +metallica +also +origin +soon +abl +sampl +hit +watch +favorit +show +plai +raquetbal +golf +shoot +pool +stand +around +towel +yeah +know +much +work +finger +jerellast +modifi +jerel +mackai +jerel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..daf1eee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home +page +johan +larson +homepag +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..fe795cfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,173 @@ +home +page +relief +happi +java +good +censorship +realli +mail +jherro +wisc +note +class +made +home +page +relax +let +would +cool +thing +home +page +apictur +pictur +girlfriend +half +year +afraid +date +though +almost +year +mani +year +tortur +disembody +becam +float +head +death +directori +anim +frame +seri +pictur +jpeg +format +default +name +number +start +frame +number +frame +millisecond +paus +imag +default +overriden +paus +repeat +sequenc +explicit +order +frame +stuff +vital +inform +aquir +nicknam +would +like +take +time +apolog +lame +page +pleas +bear +pretti +pictur +jack +skellington +kermit +frog +interest +someth +els +neat +link +friend +homepag +write +haiku +said +thath +go +click +stuff +roomat +mine +grad +notr +dame +memori +forgotten +time +roomat +cult +hippothi +exploratori +intervent +chaotic +exist +realiti +follow +link +enjoi +benefit +matriarch +societi +join +todai +exclus +club +hierarchi +rule +friend +homepag +semi +cool +link +notr +dame +home +page +yahooooooooooooo +work +link +contain +free +softwar +shack +bazillion +search +engin +search +engin +mpeg +movi +archiv +realli +cool +link +great +muppet +page +sound +imag +link +cool +rachel +want +select +cool +cano +trip +pictur +look +bout +cano +pictur diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..cbcf6723 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,188 @@ +bing +home +page +bing +index +gener +inform +educ +advisor +research +interest +research +project +public +pointer +hobbi +gener +informationresearch +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +jieb +wisc +edueduc +comput +scienc +univers +wisconsin +madison +advisor +prof +david +dewittresearch +interest +tertiari +storag +support +dbm +parallel +databas +manag +system +object +orient +databas +manag +system +geograph +inform +systemsresearch +project +paradis +shorepublicationsbuild +scaleabl +spatial +dbm +technolog +implment +evalu +patel +kabra +tuft +burger +hall +ramasami +lueder +ellman +kupsch +dewitt +naughton +submit +public +octob +queri +execut +batch +paradis +prong +approach +effici +process +queri +tape +resid +data +set +dewitt +submit +public +octob +process +satellit +imag +tertiari +storag +studi +impact +tile +size +perform +dewitt +appear +nasa +goddard +conferenceon +mass +storag +system +technolog +septemb +us +constraint +queri +tree +goldstein +ramakrishnan +shaft +shorter +version +appear +workshop +constraint +databas +februari +client +server +paradis +dewitt +kabra +patel +proceed +larg +data +base +confer +santiago +chile +septemb +storag +reclam +reorgan +client +serverpersist +object +store +yong +naughton +proceed +ieee +data +engin +confer +houston +februari +pointer +eosdi +sigmod +madison +dbm +research +grouphobbi +tenni +tenni +server +volleybal +volleyballweb +white +water +raft +whitewat +page +find +pictur +click +full +size +pictur +last +updat +juli +bing +jieb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..0ad8a829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,74 @@ +jignesh +home +page +jignesh +patel +jignesh +wisc +welcom +research +assist +depart +comput +scienc +univers +wisconsin +madison +west +dayton +street +madison +telephon +advisor +david +dewitt +research +interest +system +parallel +databas +system +object +relat +databas +current +work +paradis +project +public +relat +paradis +client +server +paradis +paper +publish +vldb +partit +base +spatial +merg +join +publish +sigmod +public +accur +model +hybrid +hash +join +algorithm +paper +publish +sigmetr +miscellan +stuff +virtual +tourist +inlin +skate +home +page +madhuri +kashmir +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..faaaf1b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +georg +varghes +peopl +download +netscap +page +click +warn +page +pretti +lame diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..7678ab80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,106 @@ +johann +gehrk +homepagejohann +gehrkewelcom +intern +graduat +studentat +comput +sciencesdepart +univers +ofwisconsin +madison +area +interest +databasemanag +system +work +area +data +mine +underprofessor +raghuramakrishnan +page +construct +contact +inform +public +interest +linkscontact +inform +email +johann +utexa +offic +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +room +madison +wisconsin +home +eagl +height +madison +public +stoica +abdel +wahab +jeffai +baruah +gehrk +plaxton +proport +share +resourc +alloc +algorithmfor +real +time +time +share +system +proceed +ieee +real +time +system +symposium +washington +decemb +appear +anexpand +version +baruah +gehrk +plaxton +fastschedul +period +task +multipl +resourc +inproceed +ieee +intern +parallel +processingsymposium +april +expand +version +avail +technicalreport +depart +comput +scienc +universityof +texa +austin +februari +johann +gehrk diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..66175672 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,84 @@ +home +pagewelcom +home +page +first +year +graduat +student +univers +wisconsin +madison +studi +comput +scienc +us +comput +also +maintain +frequent +ask +question +list +latest +powerbook +model +releas +appl +thing +look +student +section +click +herei +amass +list +good +site +number +catagori +check +page +madison +depart +madison +alma +mater +site +visit +often +appl +home +page +needsth +nando +time +great +new +coverageth +spot +mind +numb +soap +operaish +drivelziffnet +comput +industri +newsc +databas +manag +system +construct +compil +keep +classworktodai +dilbert +chucklejon +bodner +jonb +wisc +mound +madison +last +modifi +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..a74743cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,107 @@ +welcom +home +page +first +year +graduat +student +depart +came +frombeij +chines +academi +scienc +china +hometown +nanj +capitol +jiangsu +provinc +degre +student +depart +univers +wisconsin +madison +institut +autom +chines +academi +scienc +beij +china +specil +pattern +recognit +imag +process +biomed +engin +depart +southeast +univers +nanj +chinacurr +activ +cours +advanc +comput +network +topic +databas +manag +advanc +oper +system +teach +assist +data +structur +current +address +home +spring +madison +work +comput +scienc +depart +west +dayton +street +madison +tele +offic +home +could +finger +wisc +refer +comput +scienc +depart +inform +class +technic +stuffjava +placeshor +tutorialchina +affairchina +democracybeij +spring +place +interest +stanford +network +groupstanford +medic +informaticsmit +commun +control +signal +processingjob +site +newsyou +visitor +number +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..f27140ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,111 @@ +jeff +shabel +home +pagech +welcom +jeff +shabel +home +page +wisconsinch +theme +song +offic +hour +tue +thur +appoint +offic +person +informationmajor +comput +scienc +architectur +emphasi +statu +second +year +graduat +student +view +fall +schedul +academ +background +receiv +comput +engin +diego +electr +comput +engin +depart +home +town +cupertino +jose +high +school +monta +vista +high +school +cupertino +plan +graduat +comput +scienc +favorit +sport +team +golden +state +warrior +basketbal +jose +mercuri +new +andnando +jose +shark +hockei +jose +mercuri +new +andnando +francisco +footbal +jose +mercuri +new +andnando +oakland +favorit +link +new +jose +mercuri +newsmus +columbia +hous +find +join +columbia +hous +deal +also +tip +info +join +music +club +miscellan +view +print +postscript +document +window +send +mail +jshabel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..637957be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,311 @@ +jussi +myllymakijussi +myllymaki +research +assist +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +telephon +email +jussi +wisc +eduresearch +summaryi +interest +perform +analysi +dbm +oper +onadvanc +tape +disk +technolog +includ +disk +tape +arrai +mcurrent +studi +buffer +larg +dataset +tertiari +storageto +disk +memori +data +us +data +explor +andvisu +deviseproject +advisor +prof +mironlivni +recent +work +includ +improv +perform +relat +joinsof +larg +volum +disk +tape +resid +data +public +listbelow +appli +structur +organ +tertiari +storageto +solv +problem +associ +divers +characterist +andfunct +limit +tertiari +media +recent +paper +datavisu +explor +discuss +data +metadata +managementissu +larg +complex +data +set +involv +refere +publicationseffici +buffer +concurr +disk +andtap +miron +livni +proceed +perform +intern +confer +perform +theori +measur +evalu +comput +commun +system +octob +integr +visual +parallel +programperform +data +karen +karavan +miron +livni +bartonp +miller +proceed +third +workshop +environ +andtool +parallel +scientif +comput +august +structur +organ +tertiarystorag +daniel +ford +proceed +theintern +confer +data +engin +februari +alsoavail +almaden +research +report +visual +explor +larg +data +set +withmiron +livni +raghu +ramakrishnan +proceed +spie +theintern +societi +optic +engin +januari +disk +tape +join +synchron +disk +andtap +access +miron +livni +proceed +acmsigmetr +confer +submit +publicationdevis +integr +queri +visual +larg +dataset +livni +ramakrishnan +beyer +chen +donjerkov +lawand +myllymaki +wenger +submit +sigmod +confer +relat +join +data +tertiari +storag +jussi +myllymaki +andmiron +livni +submit +intern +confer +dataengin +publicationsdisk +tape +join +synchron +disk +tapeaccess +miron +livni +univers +wisconsin +depart +technic +report +join +tape +project +report +master +degreeproject +report +univers +wisconsin +depart +appli +client +server +model +comput +networkarchitectur +master +thesi +helsinki +univers +technolog +depart +industri +manag +finnish +documentsimplement +perform +analysi +treealgorithm +jeff +schwarz +yoav +weiss +class +report +experi +implement +structur +filesystem +trishul +chilimbi +yoav +weiss +class +report +overview +current +tape +technolog +productsoverview +raid +technolog +supplier +productssom +frequent +need +link +unifi +technic +report +search +adaptec +scsi +adapt +home +digit +alpha +workstationsandpcsandtechn +journaland +whitepap +home +technolog +researchandcyberjourn +quantum +digit +linear +tapeanddlt +faqandwhitepap +home +solarisandsparcstationsandtechn +report +home +scsi +faqandstorag +faqand +otherusenet +faqsmani +link +found +jussi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..000bcf40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag +jyothithi +page +construct +info +student +cours +grade +other +sorri +dissappoint +email +jyothi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..49f70225 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,108 @@ +karen +karavaniceveryth +need +know +learn +public +school +karen +karavanicresearch +assist +paradyn +parallel +perform +tool +project +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +karavan +wisc +current +pursu +comput +scienc +research +interest +includ +parallel +comput +environ +autom +perform +tune +process +oper +system +databasesask +madison +women +comput +scienc +wic +frontier +scienc +cool +program +dane +counti +high +school +studentstrio +student +support +servic +free +tutor +support +madison +undergradu +miss +site +page +could +save +life +safer +pagefor +chocol +lover +onlystuyves +high +school +alumni +associationstuyves +high +school +class +thoma +legisl +inform +internetth +constitut +cure +anyth +salt +water +sweat +tear +isak +dinesen +ship +port +safe +ship +sail +thing +admir +grace +hopper +comput +pioneer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..3f8a12ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,178 @@ +stefano +kaxira +home +page +stefano +kaxira +kaxira +wisc +editor +ieee +kiloprocessor +extens +sciresearch +assist +univers +wisconsin +research +interest +summari +recent +publicationsresearch +interest +share +memori +multiprocess +scalabl +coher +interfac +cach +design +aspect +parallel +processor +memori +memori +processor +architectur +galileo +research +summari +introduc +glow +kiloprocessor +extens +hierarch +extens +collaborationwith +goodman +work +examin +depth +design +option +develop +upcom +standard +incolabor +goodman +david +jame +stein +gjess +recent +public +glow +cach +coher +protocol +extens +wide +share +data +stefano +kaxira +jame +goodmanto +appear +proceed +intern +confer +supercomput +also +technic +report +kiloprocessor +extens +stefano +kaxirasto +appear +proceed +intern +parallel +process +symposium +april +implement +perform +glow +kiloprocessor +extens +wisconsin +wind +tunnel +stefano +kaxira +jame +goodmannd +intern +workshop +base +high +perform +cost +comput +march +hierarch +extens +cach +coher +stefano +kaxira +jame +goodmanst +intern +workshop +base +high +perform +cost +comput +august +hierarch +extens +jame +goodman +stefano +kaxirasunivers +wisconsin +comput +scienc +dept +juli +softwar +tool +simul +prototyp +monitor +multiprocessor +system +stafylopati +papakonstantin +kaxirasinform +softwar +technolog +autom +synthesi +parallel +dedic +architectur +us +prolog +specif +tsanaka +papakonstantin +pekmestzi +kaxirasp +greec +hardwar +synthesi +methodolog +us +prolog +tsanaka +papakonstantin +kaxirasmicroprocess +microprogram +north +holland diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..b76667aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,323 @@ +keeper +home +page +steven +foughtthei +heaven +like +perfect +littl +world +doesn +realli +need +everyth +made +light +lauri +anderson +strang +angel +note +possibl +page +make +structur +markup +indic +piec +inform +rather +look +made +inform +page +access +peopl +bitmap +displai +includ +theblind +us +standard +markup +tag +allow +whateverbrows +wish +also +note +page +make +literari +convent +ironi +satir +sarcasm +butnoth +contain +herein +meant +offens +areoffend +probabl +stupid +start +third +year +graduat +student +comput +scienc +depart +firsttwo +week +sinc +support +take +care +varieti +machin +vari +degre +success +graduat +student +side +never +publish +ever +done +anyth +impress +miracl +pass +prelim +research +addup +hill +bean +like +fizzl +result +areobtain +probabl +leav +junior +level +programm +creatingkiosk +front +end +visual +primit +system +leav +perman +skill +free +surpris +year +discov +especi +surpris +wasn +convinc +aliv +thosewho +know +well +would +probabl +argu +proof +mybe +aliv +anywai +might +want +read +thoughtson +hedgehog +contact +electron +mail +pager +reliabl +specif +locat +often +work +home +ifyou +would +like +person +write +someth +address +keeper +wisc +becom +clear +wantto +contact +short +notic +give +pager +number +variou +creation +thought +hedgehog +todo +list +updat +sporad +list +associ +rsum +postscript +document +html +section +long +outof +date +unfortun +danenet +dilhr +jobnet +databas +soon +defunct +caltech +project +caltech +institut +archiv +photonet +databas +caltech +personnel +directori +look +much +better +insid +caltech +caltech +databas +insid +caltech +anyon +enter +page +databaseus +form +interfac +slide +slide +talk +gave +slide +talk +gave +distribut +object +slide +talk +gave +call +java +danger +love +come +hell +freez +rate +inform +page +break +hierarchi +consult +inform +somewher +steven +fought +unifi +attribut +index +sfuai +informationag +intellectu +properti +everyth +index +allow +assigna +uniqu +serial +number +refer +quot +atth +page +provid +quot +sourc +contextu +inform +pointer +relev +quot +index +bui +adob +distil +translat +rsuminto +world +user +chanc +ofread +suppos +print +pinch +want +write +us +comput +certaintruth +psycholog +us +comput +softwar +peopl +eventuallypick +aren +ever +taught +explicitli +think +possibleto +would +make +start +comput +easier +cheap +shot +thing +hate +project +idea +mull +probabl +accessibleto +small +subset +user +tough +world diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..3e3bdb0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +krishna +kunchithapadamkrishna +kunchithapadamgreet +welcom +page +interest +read +languag +indian +classic +music +miscellaneouspubl +data +distribut +perform +steer +perform +toolsresum +gzip +postscript +contact +search +last +modifi +bykk diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..283429dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,42 @@ +kristin +home +pagekristin +tuft +research +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +tuft +wisc +eduadvisor +david +dewitt +miscellani +inform +serveruw +madison +dbm +research +groupacm +sigmod +inform +server +home +pageeo +project +scienc +officelast +modifi +kristin +tuftekristin +tuft +tuft +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..f1db3bf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,61 @@ +krung +homepageupd +novemb +krung +homepag +underconstructioni +keep +page +short +inform +good +serf +year +cometh +follow +relat +topic +research +mathemat +program +project +pursu +cours +work +cours +work +comput +scienc +depart +comput +compani +favorit +hobbi +person +inform +person +opinion +life +madison +wisconsin +linkedth +follow +page +import +link +univers +madison +wisconsin +whole +comput +scienc +depart +uniqu +entiti +electron +librari +system +krung +sinapiromsaran +emailkrung +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..4453013c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,225 @@ +kunen +home +page +kenneth +kunen +professormath +comput +scienc +univers +wisconsin +dayton +madison +mail +kunen +wisc +edutelephon +stanford +univers +interest +autom +deduct +logic +program +theori +topolog +research +summari +research +work +involv +logic +applic +typic +applic +autom +deduct +logic +program +autom +deduct +tool +like +resolutionto +prove +mathemat +theorem +logic +program +studi +semant +languag +likeprolog +specif +topic +consid +prologus +negat +failur +semant +incompat +betweenleast +fix +point +comput +prolog +style +backtrack +comput +mathemat +logic +work +axiomat +theori +besid +interest +right +thissubject +relat +variou +abstract +area +mathemat +theoret +topolog +measur +theori +mani +basic +question +turn +independ +usualaxiom +theori +select +recent +public +follow +postscript +file +kunen +shortest +singl +axiom +group +expon +comput +mathemat +applic +hart +kunen +singl +axiom +expon +group +autom +reason +kunen +ramsei +theorem +boyer +moor +logic +appear +autom +reason +kunen +mill +measur +corson +compact +space +fundamenta +mathematica +hart +kunen +local +constant +function +fundamenta +mathematica +kunen +semant +answer +liter +technic +report +univers +wisconsin +appear +autom +reason +kunen +construct +comput +mathemat +technic +report +univers +wisconsin +appear +autom +reason +kunen +moufang +quasigroup +algebra +kunen +quasigroup +loop +associ +law +preprint +appear +algebra +kunen +structur +conjugaci +close +loop +preprint +kunen +complet +result +link +resolut +appear +press +hart +kunen +weak +measur +extens +axiom +rough +draft +book +review +hart +kunen +review +note +theori +moschovaki +american +mathemat +monthli +cours +taught +fall +math +geometr +infer +reason +math +foundat +mathemat +spring +comp +artifici +intellig +last +chang +octob +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..00fcd239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,520 @@ +jame +laru +home +page +jame +laru +laru +wisc +associ +professor +comput +sciencedepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usalaru +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +educ +research +interest +research +project +upcom +cours +softwar +recent +paper +graduat +summari +educ +univers +california +berkelei +univers +california +berkelei +harvard +colleg +research +interest +program +languagesand +compil +particular +languag +compil +parallel +machin +design +program +share +memori +parallel +comput +compil +symbol +languag +program +profil +trace +program +execut +edit +research +project +wisconsin +wind +tunnel +larg +grain +data +parallel +program +languag +execut +edit +librari +cours +data +structuresc +java +softwar +spim +wartsrec +paperseffici +path +profil +thoma +ball +jame +laru +appear +micro +decemb +parallel +program +larg +grain +data +parallel +programminglanguag +jame +laru +brad +richard +guhan +viswanathan +gregori +wilson +parallel +program +us +press +teapot +languag +support +write +memori +coher +protocol +satish +chandra +brad +richard +jame +laru +sigplan +program +languag +design +implement +pldi +instruct +schedul +execut +edit +eric +schnarr +andjam +laru +appear +workshop +compil +support +system +softwar +wcsss +februari +effici +support +irregular +applic +distribut +memori +machin +shubhendu +mukherje +shamik +sharma +mark +hill +jame +laru +annerog +joel +saltz +fifth +sigplan +symposium +principl +practiceof +parallel +program +ppopp +juli +machin +independ +execut +edit +jame +laru +eric +schnarr +sigplan +confer +program +languagesdesign +implement +pldi +june +tempest +substrat +portabl +parallel +program +mark +hill +jame +laru +david +wood +compcon +spring +march +static +branch +frequenc +program +profil +analysi +youfeng +jame +laru +annual +ieee +intern +symposium +microarchitectur +micro +novemb +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +markhil +jame +laru +ann +roger +david +wood +supercomput +novemb +time +spent +messag +pass +share +memori +program +satish +chandra +jame +laru +ann +roger +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +system +support +languag +implement +jame +laru +brad +richard +guhan +viswanathan +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jameslaru +david +wood +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +cachier +tool +automat +insert +cico +annot +trishul +chilimbi +jame +laru +intern +confer +parallel +program +icpp +august +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +unpublish +manuscript +revis +frequent +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +graduatesbrad +richard +august +memori +system +parallel +program +first +employ +vassar +colleg +guhan +viswanathan +septemb +techniqu +compil +data +parallel +languagesfirst +employ +oracl +lorenz +huelsbergen +august +dynam +languag +parallel +first +employ +bell +lab +lorenz +research +thoma +ball +august +control +flow +control +depend +softwar +tool +first +employ +bell +lab +tball +research +research +summarymi +research +focus +problem +program +comput +part +thewisconsin +wind +tunnel +project +havehelp +develop +hybrid +softwar +hardwar +share +memori +computerarchitectur +facilit +program +compil +parallelmachin +current +student +develop +languag +compil +tool +demonstr +exploit +power +user +level +coherencepolici +also +interest +perform +evalu +tool +help +programmersunderstand +improv +program +perform +recent +ball +andi +develop +effici +path +profil +algorithm +provid +moredetail +understand +control +flow +within +routin +hasidentifi +possibl +better +compil +last +modifi +jame +laru +laru +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..72572875 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick +pagenick +pageoffic +phone +email +leavi +wisc +eduoffic +hour +tuesdai +wednessdai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..88435d57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,107 @@ +steven +huss +lederman +home +page +steven +huss +lederman +home +page +research +interest +includ +research +univ +wisconsin +madison +relat +thewisconsin +wind +tunnel +project +research +area +parallel +linear +algebra +iscov +prismproject +also +heavili +invol +mpistandard +sever +other +recent +publish +book +origin +mpistandard +inform +order +book +press +isbn +also +look +complet +refer +editor +current +draft +forum +pleas +keep +mind +work +forum +ongo +andit +document +intend +interest +ongoingwork +forum +committe +member +compress +postscript +complet +sourc +compressedtar +file +individu +sourc +file +avail +inform +would +finger +steven +huss +lederman +comput +scienc +dept +univ +wisconsin +madison +dayton +madison +phone +messag +desper +mail +lederman +wisc +http +wisc +lederman +lederman +html +offic +comput +scienc +statist +build diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..4fa0d0a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,118 @@ +home +page +lawrenc +landweb +lawrenc +landweb +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +wisc +purdu +univers +interest +comput +network +protocol +high +speed +network +electronicmail +research +summari +research +program +focus +high +speed +network +participatingin +gigabit +project +darpa +nation +project +involvesth +design +implement +network +testb +oper +atgigabit +second +data +rate +wisconsin +work +onissu +protocol +design +congest +admiss +control +visualizationof +atmospher +phenomena +virtual +conferenc +sampl +recent +public +design +implement +fast +virtual +circuit +establishmentmethod +network +olsen +proceed +theieee +infocom +confer +francisco +april +dynam +time +window +packet +admiss +control +feedback +witht +faber +mukherje +proceed +sigcommconfer +baltimor +august +dynam +time +window +gener +virtual +clock +combin +close +loop +open +loop +congest +control +mukherje +faber +proceed +ieee +infocom +confer +florenc +network +coursesconnect +tabl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..8ca180e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,71 @@ +shannon +lloyd +home +page +shannon +lloyd +work +address +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +lloyd +wisc +respons +us +comput +lectur +section +comp +offic +hour +comp +wednesdai +thursdai +appoint +fall +cours +construct +compil +comput +linguist +variou +link +women +comput +scienc +univers +utah +depart +chemistri +univers +utah +depart +comput +scienc +person +engin +career +servic +comput +languag +archiv +comput +linguist +natur +languag +process +artifici +intellig +cognit +linguist +scienc +xsoft +lexdemo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..0dc93aa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,223 @@ +christoph +luka +home +pagechristoph +lukasrelev +inform +offic +phone +email +luka +wisc +edui +appar +coordin +mspl +fall +workshipi +also +defeat +rival +quest +becom +semest +sunivers +wisconsin +program +languag +seminar +czar +cours +go +take +fall +festiv +java +taught +advisor +tuft +univers +mail +list +click +site +entertain +save +tiger +number +free +prisonerthi +stock +quoteserv +maintain +fabul +wealth +todd +amus +page +check +friend +page +cool +thing +also +pagebet +polit +candid +legal +iowa +electron +market +identitycaptain +kirk +sing +lone +gui +lone +troubl +meet +women +throughamaz +technolog +longer +need +concern +withtri +interact +real +women +virtual +girlfriend +traci +teri +wait +meet +wife +incred +jump +catthi +realli +cool +fill +anywai +like +know +well +realli +entertain +pleas +fill +send +option +case +feel +someth +current +list +name +mail +address +favorit +thing +feel +check +appli +killer +buttmunchextrem +dudemichael +nesmith +fanfoolmyth +figurewick +good +basketbal +playervalu +studentment +defectivea +wkrp +cincinatti +tragic +like +figuregeek +tradesgonzo +admirernetscap +junki +search +child +pornpersonifi +organ +condom +stretch +much +readi +blowflam +testicl +outer +space +tast +goodpoetri +guruhogwildthi +kick +assman +manbig +dudeuh +ohprofession +muff +diverregress +higher +lifeformherald +alien +invas +forcechri +know +html +formsalienherpetophiletodd +turnidg +hatth +mancreepi +laugh +headsmal +planetdr +companioneast +bunnycyberweenietcl +hellbeast +option +checkbox +simpli +fill +click +submit +reload +page +mayb +figur +automat +option +cool +interact +checkbox +option +keyword +includ +includ +page +interest +search +superhighwai +drug +cosmo +irrit +gross +nake +scatolog +pervert +offspr +food +etymolog +phat +gnarli +bogu +wierd +cybermuffin +pictur +erotica +chees diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..62de1570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,123 @@ +welcom +ling +zheng +home +page +depart +comput +scienc +sheboygan +dayton +madison +madison +offic +phone +mail +lzheng +wisc +shameless +self +promot +resum +text +version +want +know +side +click +research +interest +research +assist +paradyn +group +current +hack +paradyn +onto +hpux +port +boss +barton +miller +also +charg +home +page +wuhan +univers +chinaand +want +take +look +girlfriend +pictur +temporarili +comput +architectur +educ +prese +dept +comput +scienc +univers +winsconsin +dept +comput +scienc +univers +iowa +dept +comput +scienc +wuhan +univers +officem +marcelo +goncalv +alumni +click +ignor +wuhan +univers +alumni +home +page +china +home +page +place +surf +compani +univers +hereif +want +know +best +graduat +school +comput +scienc +sthe +infom +could +take +look +america +best +graduat +schoolssend +ling +zheng +mail +suggest +homepag +bother +thank +last +updat +march +visitor +number +sinc +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..5b49647d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,50 @@ +manuvir +home +pagemanuvir +dasnow +know +name +andwhat +look +like +hello +feelfre +look +around +need +inform +somethingsend +email +like +passion +golf +anact +photo +later +manuvirwhat +know +know +gener +start +advisor +better +thisto +keep +monei +come +turn +theorigin +america +team +cours +leagu +plai +dai +sundai +round +golf +final +consin +said +manuvir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..6c760e29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,913 @@ +mark +hill +home +pagemark +hill +markhil +wisc +associ +professor +comput +scienc +andelectr +comput +engineeringat +univers +wisconsint +content +address +offic +hour +current +teach +catalog +inform +educ +research +interest +andsummari +wisconsin +wind +tunnel +project +sampler +recent +paper +graduateslink +us +inform +world +wide +comput +architectur +inform +wisconsin +comput +architectur +group +wisconsin +architectur +research +tool +wart +stuff +like +oralpresent +advic +includ +david +patterson +show +give +talk +onlin +data +forcach +perform +spec +benchmark +suit +proof +hardwar +wisconsin +sound +address +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usamarkhil +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +offic +hour +fall +mondai +wednesdai +appoint +markhil +wisc +educurr +teachingfal +advanc +comput +architectur +ifal +topic +comput +java +languag +implement +icatalog +inform +cours +teachc +machin +organ +programmingc +introduct +comput +architecturec +advanc +comput +architectur +advanc +comput +architectur +iieduc +comput +scienc +univers +california +berkelei +comput +scienc +univers +california +berkelei +comput +engin +univers +michigan +research +interest +comput +architectur +parallel +comput +memori +system +perform +evaluationresearch +summarymi +research +target +memori +system +share +memori +multiprocessorsand +high +perform +uniprocessor +memori +system +design +import +larg +determin +comput +sustain +perform +mywork +emphas +quantit +analysi +often +requir +evaluationtechniqu +system +level +hardwar +perform +much +recent +work +part +wisconsin +windtunnel +projectwith +prof +laru +wood +manystud +project +expect +futur +massiv +parallel +computerswil +built +workstat +like +node +program +high +levelparallel +languag +like +support +share +address +space +inwhich +process +uniformli +refer +data +research +seek +todevelop +consensu +middl +level +interfac +languagesand +compil +system +softwar +hardwar +recentlypropos +tempest +interfac +enabl +programm +compil +program +librari +implement +messag +pass +transpar +share +memori +hybrid +combin +aredevelop +tempest +implement +think +machin +aclust +workstat +hypothet +hardwar +platform +wisconsin +wind +tunnel +project +name +toolsto +cull +design +space +parallel +supercomput +manner +similarto +aeronaut +engin +convent +wind +tunnel +designairplan +recent +work +madhu +talluritarget +improv +translat +lookasid +buffer +page +tabl +perform +cluster +align +group +base +page +option +requir +chang +hardwar +complet +subblock +tlb +oper +system +cluster +page +tabl +superpagesand +partial +subblock +tlb +asplosandsosppap +sampler +recent +papersth +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +unpublish +manuscript +revis +frequent +parallel +comput +research +wisconsin +wind +tunnel +project +mark +hill +jame +laru +david +wood +confer +experiment +research +comput +system +june +bidirect +technolog +transfer +sabbat +industri +mark +hill +confer +experiment +research +comput +system +june +coher +network +interfac +fine +grain +commun +shubhendu +mukherje +babak +falsafi +mark +hill +anddavid +wood +intern +symposium +comput +architectur +isca +optimist +simul +parallel +architectur +us +program +execut +sashikanth +chandrasekaran +mark +hill +workshop +parallel +distribut +simul +pad +page +tabl +address +space +madhusudhan +talluri +mark +hill +yousef +khalidi +symposium +oper +system +princip +sosp +decemb +presidenti +young +investig +award +final +report +mark +hill +juli +effici +support +irregular +applic +distribut +memori +machin +shubhendu +mukherje +shamik +sharma +mark +hill +jame +laru +ann +roger +joel +saltz +ppopp +juli +cost +effect +parallel +comput +david +wood +mark +hill +ieee +comput +februari +solv +microstructur +electrostat +propos +parallel +comput +frank +traenkl +mark +hill +sangta +comput +chemic +engin +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +supercomput +surpass +perform +superpag +less +oper +system +support +madhusudhan +talluri +mark +hill +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +evalu +directori +protocol +medium +scale +share +memorymultiprocessor +shubhendu +mukherje +mark +hill +intern +confer +supercomput +juli +comparison +trace +sampl +techniqu +multi +megabyt +cach +kessler +mark +hill +david +wood +ieee +transact +comput +june +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +cach +perform +spec +benchmark +suit +jeffrei +mark +hill +dionisio +pnevmatikato +alan +smith +ieee +micro +august +unifi +formal +four +share +memori +model +sarita +adv +mark +hill +ieee +transact +parallel +distribut +system +tpd +june +perform +implic +toler +cach +fault +andrea +farid +pour +mark +hill +ieee +transact +comput +march +mechan +cooper +share +memori +david +wood +satish +chandra +babak +falsafi +mark +hill +jame +laru +alvin +lebeck +jame +lewi +shubhendu +mukherje +subbarao +palacharla +steven +reinhardt +intern +symposium +comput +architectur +isca +wisconsin +wind +tunnel +virtual +prototyp +parallel +comput +steven +reinhardt +mark +hill +jame +laru +alvin +lebeck +jame +lewi +david +wood +sigmetr +page +placement +algorithm +larg +real +index +cach +kessler +mark +hill +transact +comput +system +novemb +program +differ +memori +consist +model +kourosh +gharachorloo +sarita +adv +anoop +gupta +john +hennessi +mark +hill +journal +parallel +distribut +comput +august +tradeoff +support +page +size +madhusudhan +talluri +shing +kong +mark +hill +david +patterson +intern +symposium +comput +architectur +isca +detect +data +race +weak +memori +system +sarita +adv +mark +hill +barton +miller +robert +netzer +intern +symposium +comput +architectur +isca +june +comparison +hardwar +softwar +cach +coher +scheme +sarita +adv +vikram +adv +mark +hill +mari +vernon +intern +symposium +comput +architectur +isca +june +model +estim +trace +sampl +miss +ratio +david +wood +mark +hill +kessleracm +sigmetr +implement +stack +simul +highli +associ +memori +extend +abstract +mark +hill +david +wood +sigmetr +implement +sequenti +consist +cach +base +system +sarita +adv +mark +hill +intern +confer +parallel +process +august +weak +order +definit +sarita +adv +mark +hill +intern +symposium +comput +architectur +isca +june +graduatesmadhusudhan +talluri +expect +august +superpag +subblock +address +translat +hierarchi +first +employ +microsystem +current +email +madhu +sarita +adv +novemb +design +memori +consist +model +share +memori +multiprocessor +first +employ +assist +professor +rice +univers +current +email +sarita +rice +richard +kessler +juli +analysi +multi +megabyt +secondari +cach +memori +click +tabl +content +first +employ +crai +research +current +email +richard +kessler +crai +last +updatedw +keyword +help +search +engin +rank +page +higher +page +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..10a8554b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,22 @@ +home +page +marko +zaharioudaki +marko +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +mail +marko +wisc +note +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..bb4c44c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,22 @@ +michael +birk +home +page +michael +birk +home +page +section +section +project +list +home +page +program +languag +link +alltraxx +home +page +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..e363721e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,70 @@ +mark +mcauliff +mark +mcauliff +comput +scienc +depart +univers +wisconsin +madison +dayton +madison +mcauliff +wisc +research +interest +design +implement +object +orient +databas +system +public +carei +dewitt +franklin +hall +mcauliff +naughton +schuh +solomon +tsatalo +white +zwill +shoringup +persist +applic +proc +sigmod +mark +mcauliff +marvin +solomon +atrac +base +simul +pointer +swizzl +techniqu +proceed +ieee +data +engin +march +mark +mcauliff +michael +carei +marvin +solomon +towardseffect +effici +free +space +manag +appear +proceed +sigmod +confer +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..fc0a95c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,141 @@ +marc +shapiro +page +marc +shapiro +believ +tautolog +tautolog +tautolog +current +obsess +fond +disappear +fear +repuls +ponder +fast +pointer +analys +watch +lot +jacki +chan +movi +think +program +languag +design +read +much +try +teach +elementari +school +student +think +term +recurs +hope +interrupt +hoar +wrote +pointer +introduct +high +level +languag +step +backward +never +recov +home +page +schedul +todd +automat +accid +gener +elain +dimasi +twisti +littl +page +amanda +peet +retreather +hyper +mode +emac +thepul +menu +doesn +cool +html +tag +submiss +softwarei +cobbl +togeth +pldi +abl +work +nowinclud +previous +mostli +miss +file +submit +html +popl +paper +fast +accur +flow +insensit +point +analysi +marc +shapiroand +susan +horwitz +appear +symposium +principl +program +languag +variou +address +marc +shapiro +dept +dayton +madison +mail +wisc +talk +hous +wisc +finger +hous +wisc +marion +madison +list +peopl +know +realli +marc +shapiro +meet +jonathan +goldstein +paul +ferguson +lawrenc +brown +last +modifi +marc +shapiro +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..04df81f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,27 @@ +home +pagerob +minimalist +home +page +last +modifi +august +mellencamp +taship +introduct +oper +system +email +mellen +wisc +offic +comput +scienc +build +offic +phone +offic +hour +appoint +mellen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..11f57785 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,96 @@ +david +melski +person +info +pagedavid +melskicurr +depart +mill +comput +scienc +statisticsmadison +dayton +madison +permen +west +ivesmarshfield +michel +awesom +person +page +current +construct +sister +kasei +great +home +page +brother +eric +semest +teach +coupl +section +also +work +rep +program +languag +myexact +schedul +still +need +determin +undergrad +major +comput +scienc +russian +studiesher +univers +wisconsin +even +spent +fall +semesterof +russia +chanc +russian +often +miss +somedai +somedai +make +back +interest +includ +chess +soccer +recent +beenbik +also +distract +work +numerousbook +hasti +rewrit +page +also +want +link +tomapquest +plan +steal +alot +map +second +give +direct +madison +marshfield +last +modifi +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..0481eebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,297 @@ +milo +martin +home +pagemilo +martin +milo +wisc +graduat +student +teach +assistantcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usaemail +milo +wisc +eduoffic +offic +phone +offic +hour +tuesdai +thursdai +byappointmentba +comput +scienc +gustavu +adolphu +colleg +class +compil +construct +charl +fischer +advanc +comput +architectur +mark +hill +java +sit +mark +hill +jame +larusteach +algebra +languag +program +section +research +interestsi +first +year +student +interest +programminglanguag +architectur +system +specif +interest +compil +optim +technolog +beinfluenc +hardwar +oper +system +advanc +mobil +program +java +addit +challeng +present +compil +architectur +oper +system +design +mani +mani +thing +mani +even +know +interest +publicationsresearch +perform +summer +argonn +nation +laboratori +technolog +develop +divis +advis +charl +fink +fink +humm +martin +micklich +evalu +view +reconstruct +paramet +illicitsubst +detect +us +fast +neutron +transmiss +spectroscopi +ieee +nuclear +scienc +symposium +medic +imag +confer +fink +micklich +yule +humm +sagalovski +martin +evalu +neutron +techniqu +illicitsubst +detect +nucl +inst +meth +publicationsresearch +perform +school +year +gustavu +adolphu +colleg +advis +hailperin +milo +martin +hailperin +program +languageflex +determinist +dynam +parallel +comput +senior +honor +thesi +mathemat +comput +scienc +depart +gustavu +adolphu +colleg +postscript +comput +interest +java +java +resourc +next +softwar +next +comput +softwar +compani +found +anintern +scientif +educ +organ +dedic +toadvanc +scienc +engin +applic +informationtechnolog +serv +profession +public +interest +fosteringth +open +interchang +inform +promot +highestprofession +ethic +standard +direct +quot +page +person +interest +footbal +bignfl +footbal +sinc +live +minnesota +year +myfavorit +team +minnesota +vike +eventhough +live +land +chees +head +colon +conquer +multi +player +plai +mail +space +explor +combat +game +wrote +babylon +best +show +imho +atlanti +atlanti +plai +email +game +mythic +world +atlanti +player +build +armi +engaug +trade +explor +land +fight +wonder +monster +train +wizard +discov +underworld +right +player +rule +current +list +player +ultim +frisbe +ultim +player +associ +ultim +combin +element +ofsocc +footbal +basketbal +fast +pace +game +plai +afrisbe +everyon +quarterback +everyon +receiv +direct +quot +home +page +ultimatein +simpl +rule +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..df3c3596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,169 @@ +mino +home +page +mino +garofalakismino +wisc +eduphd +candid +research +assist +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaoffic +stat +phone +home +workresearch +interest +effect +resourc +manag +parallel +multimedia +databas +system +complex +queri +process +optim +parallel +algorithm +databas +theoryeduc +comput +scienc +univers +wisconsin +madison +dept +comput +scienc +decemb +comput +scienc +univers +patra +dept +comput +engin +informat +june +refere +public +multi +dimension +resourc +schedul +parallel +queri +mino +garofalaki +yanni +ioannidi +proceed +sigmod +confer +montreal +canada +june +abstract +paper +postscript +schedul +issu +multimedia +queri +optim +mino +garofalaki +yanni +ioannidi +comput +survei +decemb +paper +postscript +technic +report +resourc +schedul +enhanc +view +continu +media +databas +mino +garofalaki +banu +ozden +silberschatz +submit +public +octob +model +check +sequenti +probabilist +real +time +system +mino +garofalaki +technic +report +comput +technolog +institut +patra +februari +advisor +yanni +ioannidismor +feel +free +peek +resum +pointer +interest +stuff +madison +dbm +reasearch +home +page +madison +hellen +societi +home +page +sigmod +home +page +vldb +home +page +almaden +research +center +watson +research +centerdr +michael +bibliograpi +server +databas +logic +program +page +perpetu +construct +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..49ee2025 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,28 @@ +marcelo +gonalv +marcelo +gonalv +mjrg +wisc +associ +research +paradyn +project +addresswork +home +comput +scienc +depart +sheboygan +west +dayton +street +madison +madison +phone +phone +depart +comput +sciencesunivers +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..47fb6c67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,39 @@ +brian +morgan +home +page +brian +morgangradu +studentcomput +scienc +depart +univers +wisconsin +madison +west +dayton +streetmadison +mail +morgan +wisc +telephon +advisor +chuck +dyerresearch +interestsvirtu +conferenc +system +imag +compress +video +conferenc +high +bandwidth +network +relat +link +interest +wisconsin +comput +vision +group diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..c0e49e14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,149 @@ +andrea +moshovo +home +page +andrea +moshovosresearch +assist +depart +comput +sciencesunivers +wisconsin +madisonadvisor +guri +sohigroup +multiscalar +wisconsin +kestrel +comput +architectur +address +leav +notese +aroundw +peek +futur +clickheremi +brother +write +poetri +click +herefor +sampl +work +current +work +data +depend +specul +processor +download +technic +report +compress +postscript +uncompress +postscript +download +talk +slide +load +balanc +multiscalar +processor +data +specul +processor +gener +interest +comput +architectur +instruct +level +parallel +compil +support +explot +vlsi +fall +spring +graduat +student +thecour +instituteof +york +univers +earn +degre +sinc +transfer +wisconsin +howev +theopportun +work +excel +peopl +meet +wife +comput +scienc +univers +crete +greec +implement +numer +algorithm +access +decoupl +architecturethat +support +softwar +pipelin +advisor +kateveni +short +descript +found +comput +scienc +univers +crete +greec +viha +like +editor +support +edit +greek +mani +mani +interest +link +hellen +resouc +network +sure +visit +obtain +instal +greek +font +local +copi +page +resid +atwww +hyper +devil +dictionari +bookmark +mess +nation +fraud +inform +centerusenet +chang +want +send +afax +free diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..5bd705db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,47 @@ +toni +home +pagewhat +newoctob +back +inmadison +updat +sever +page +chang +background +black +better +contrast +updat +contact +inform +minor +chang +variou +page +list +older +updat +prefer +keep +main +page +brief +herear +link +second +level +page +navig +index +friend +favorit +page +interest +contact +informationlast +modifi +octob +wisc +educopyright +toni diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..31288a0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,190 @@ +martin +ream +home +page +martin +ream +graduat +student +teach +assist +also +coke +poobah +finger +coke +machin +comput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +telephon +telephon +dept +email +mream +wisc +edufal +scheduleresearch +interest +databas +particular +digit +terrain +model +tin +program +languag +compil +design +logic +logicprogram +qualifi +exam +databas +spring +previou +year +exam +interest +softwar +design +develop +product +orientedenviron +exploit +comput +scienc +educ +interestsin +databas +compil +design +resum +postscriptand +html +distribut +wisconsin +affili +mathemat +comput +scienc +wesleyan +univers +faint +heart +section +page +alink +senior +honorsthesi +gener +unif +coke +poobahlook +work +mighti +afraid +dear +gone +coke +poobah +tomi +head +usual +gone +realli +want +talk +tosomeon +better +adjust +crucial +role +dept +life +youshould +probabl +elton +doesn +even +mention +poobah +page +imaginethat +besid +aforement +coke +poobahship +mental +ill +afew +thing +might +want +know +third +yeargradu +student +depart +wisconsin +concentr +indatabas +current +studi +qual +sometim +inearli +februari +exercis +relax +plai +squash +reason +well +year +round +ultim +frisbe +summer +basketbal +poorli +andinfrequ +notic +thing +might +rapidlyrid +mountain +bike +around +campu +even +chilliest +weather +alwai +helmet +wish +learn +interest +feel +free +examin +mynot +often +updat +hierarchi +stuff +ilik +enjoi +wisconsin +line +librarylast +modifi +martin +ream +mream +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..8d40b0ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,45 @@ +wisconsin +multiscalar +project +home +pagewisconsin +multiscalar +project +technic +paper +talk +given +multiscalar +peopl +contributor +fund +sourc +relat +project +avail +softwar +wisconsin +comput +architectur +group +comput +scienc +departmentat +univers +wisconsin +world +wide +comput +architectur +inform +inform +interest +local +user +last +updat +februari +guri +sohi +sohi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..b56ee27d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,169 @@ +mike +steel +homepagemik +steel +homepagemsteel +wisc +eduoffic +comp +stat +build +sit +univ +maryland +depart +comput +struggl +undergradu +oper +system +class +sometimearound +april +note +time +stamp +lower +right +corner +sai +folk +graduat +student +comput +scienc +depart +univers +wisconsin +madison +school +motto +come +freezein +land +chees +research +studi +interest +center +around +artificialintellig +comput +vision +oper +system +hope +narrow +come +year +semest +graduateinstructor +section +algebra +languag +program +receiv +bachelor +degre +theunivers +maryland +scomput +scienc +depart +publicationsgrindston +test +suit +parallel +perform +tool +jefferyk +hollingsworth +michael +steel +comput +scienc +technic +reportc +univers +maryland +gzip +postscriptfil +class +semesterc +introduct +oper +system +mari +vernonc +introduct +artifici +intellig +chuck +dyermi +pagesinform +gettingin +touch +friend +back +marylandwhom +forgot +mail +address +favorit +linksmi +favorit +sport +teamssom +friend +usenet +dave +barri +frequent +ask +question +listth +usenet +billi +joel +frequent +ask +question +listi +also +mail +list +administr +thefruit +mail +list +still +host +univers +ofmaryland +start +insidejok +around +offic +hand +sometim +andnow +member +world +wide +predat +come +kill +someinfrar +photo +know +looklik +infrar +pictur +memik +steelemsteel +wisc +eduunivers +wisconsin +madisoncomput +scienc +depart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..e61999c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,19 @@ +maria +home +pagemaria +home +pagehow +visit +univers +maryland +colleg +park +mayb +univers +wisconsin +madison +section +might +want +visit +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..fda41c84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,91 @@ +oscar +naim +home +page +bienvenido +staff +member +work +paradyn +project +finish +myph +univers +southampton +england +undergrad +universidad +simon +bolivar +caraca +venezuela +born +beauti +citi +barquisimeto +venezuela +barquisimetoi +locat +central +western +part +venezuela +popul +ofabout +million +peopl +barquisimeto +also +known +music +capit +venezuela +main +research +area +perform +analysi +visual +parallel +program +howev +apart +research +like +playclass +guitar +fact +studi +year +excellentmaestro +rodrigo +riera +antonio +lauro +like +spend +time +plai +guitar +read +good +book +sherlock +holm +stori +cook +watch +basebal +work +like +beati +pictur +venezuela +pleaseclick +finger +naim +wisc +check +around +mundo +barquisimeto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..2780ad66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,45 @@ +anastassia +ailamaki +home +welcom +home +anastassia +ailamaki +graduat +student +comput +scienc +depart +univers +wisconsin +madison +dayton +street +madison +phone +realli +realli +want +pictur +import +notic +find +time +make +decent +home +page +nice +link +georg +rochest +alex +guid +greek +islandsar +worth +visit +send +mail +natassa +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..f91e9643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,134 @@ +jeffrei +naughtonjeffrei +naughtonnaughton +wisc +eduresearch +interestsolap +multi +dimension +data +analysi +parallel +object +relationaldbm +overal +goal +research +develop +ofdatabas +system +surpass +current +databas +system +inperform +eas +current +three +main +area +ofinterest +techniqu +improv +perform +ofmulti +dimension +data +analysi +includ +arrai +base +storag +andprocess +algorithm +indic +algorithm +computingth +cube +perform +object +relat +databas +system +includ +benchmark +dbm +algorithm +valuedattribut +techniqu +parallel +workload +parallel +spatial +inform +system +recent +public +comput +multidimension +aggreg +withsameet +agarw +rakesh +agraw +prasad +deshpand +ashish +gupta +raghu +ramakrishnan +sunita +sarawagi +proceed +thend +intern +confer +larg +databas +mumbai +bombai +storag +estim +multidimension +aggregatesin +presenc +hierarchi +amit +shukla +prasad +deshpand +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +bucki +object +relat +databas +benchmark +michael +carei +david +dewitt +johann +gerhk +dhaval +shah +moham +asgarian +prepar +toward +molap +object +relat +dbm +withyihong +zhao +kristin +tuft +submit +public diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..581d9315 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +navin +kabranavin +kabragradu +student +depart +comput +scienc +univers +wisconsin +madisonadvisor +david +dewittresearch +area +databas +research +interest +customiz +queri +optim +paradis +project +plan +address +noth +better +explor +bookmark +could +look +indian +stuff +includ +among +thing +archiv +hindi +song +navin +wisc +public diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..10796fb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,31 @@ +newhalltia +newhal +newhal +wisc +graduat +student +dayton +madison +telephon +research +interest +parallel +distribut +system +perform +tool +scalabl +analysi +perform +predict +java +research +group +paradynadvisor +bart +millermummi +pictur +guanajuato +last +chang +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..581f1f49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +nanci +hallcomput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +project +shore +scalabl +heterogen +object +repositori diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..5b139f0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,472 @@ +olvi +mangasarian +home +page +olvi +mangasarian +john +neumann +professor +mathemat +comput +scienc +member +center +mathemat +scienc +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +olvi +wisc +harvard +univers +interest +mathemat +program +machin +learn +parallel +comput +research +summari +optim +theori +rich +mathemat +effectivecomputation +solv +mani +real +life +problem +interestsin +topic +rang +broad +spectrum +encompassestheoret +aspect +error +bound +mathemat +programsand +variat +inequ +converg +proof +parallelgradi +variabl +distribut +algorithm +optim +smooth +techniqu +solv +constrain +optim +problemsa +differenti +nonlinear +equat +well +applicationsto +machin +learn +gener +specif +context +animport +aspect +research +mathemat +programmingtechniqu +diagnos +breast +cancer +result +ahighli +accur +computer +diagnost +system +current +useat +univers +wisconsin +hospit +current +student +paul +bradlei +recent +public +mangasarian +solodova +linearli +converg +descent +method +strongli +monotonecomplementar +problem +mathemat +program +technic +report +octob +mangasarian +jong +pangexact +penalti +function +mathemat +programswith +linear +complementar +constraint +mathemat +program +technic +report +august +mangasarianmathemat +program +data +miningmathemat +program +technic +report +august +mangasarianerror +bound +nondifferenti +convex +inequ +strong +slater +constraint +qualif +mathemat +program +technic +report +juli +bradlei +mangasarian +street +cluster +concav +minim +mathemat +program +technic +report +submit +neural +inform +process +system +street +mangasarian +wolberg +individu +collect +prognost +predict +mathemat +program +technic +report +januari +bradlei +mangasarian +street +featur +select +mathemat +program +mathemat +program +technic +report +decemb +submit +inform +journal +comput +mangasarian +machin +learn +polyhedr +concav +minim +mathemat +program +technic +report +novemb +appli +mathemat +parallel +comput +festschrift +klau +ritter +fischer +riedmuel +schaeffler +editor +physica +verlag +germani +mangasarian +pose +linear +complementar +problem +mathemat +program +technic +report +august +submit +siam +proceed +internationalsymposium +complementar +problem +baltimor +novemb +revis +novemb +nick +street +mangasarian +improv +gener +toler +train +mathemat +program +technic +report +juli +mangasarian +mathemat +program +machin +learn +mathemat +program +technic +report +april +revis +juli +appear +proceed +nonlinear +optim +applic +workshop +eric +june +plenum +press +chunhui +chen +mangasarian +hybrid +misclassif +minim +mathemat +program +technic +report +februari +revis +juli +august +appear +advanc +comput +mathemat +mangasarian +optim +machin +learn +mathemat +program +technic +report +januari +siag +view +new +chunhui +chen +mangasarian +class +smooth +function +nonlinear +mix +complementar +problem +mathemat +program +technic +report +august +revis +octob +februari +septemb +comput +optim +applic +mangasarian +nick +street +wolberg +breast +cancer +diagnosi +prognosi +linear +program +mathemat +program +technic +report +august +revis +decemb +oper +research +juli +august +mangasarian +linear +complementar +problem +separ +bilinear +program +mathemat +program +technic +report +juli +journal +global +optim +mangasarian +solodov +backpropag +converg +determinist +nonmonoton +perturb +minim +mathemat +program +technic +report +june +advanc +neural +inform +process +system +cowan +tesauro +alspector +editor +morgan +kaufmann +publish +francisco +california +chunhui +chen +mangasarian +smooth +method +convex +inequalitiesand +linear +complementar +problem +comput +scienc +technic +report +novemb +revis +novemb +mathemat +program +mangasarian +misclassif +minim +comput +scienc +technic +report +octob +revis +septemb +journal +global +optim +decemb +mangasarian +solodov +serial +parallel +backpropag +neural +net +vianonmonoton +perturb +minimn +comput +scienc +technic +report +april +revis +decemb +optim +method +softwar +chronolog +cancer +bibliographi +page +public +group +wisconsin +paper +report +view +download +paper +report +group +view +home +page +group +period +updat +olvi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..5d4cfd3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,806 @@ +machin +learn +cancer +diagnosi +prognosismachin +learn +cancer +diagnosi +prognosisthi +page +describ +variou +linear +program +base +machin +learningapproach +appli +diagnosi +prognosi +ofbreast +cancer +work +result +collabor +theunivers +wisconsin +madison +betweenprof +olvi +mangasarian +comput +scienc +depart +anddr +william +wolbergof +depart +surgeri +human +oncolog +copi +thepress +releas +distribut +american +cancer +societi +scienc +writer +seminar +inmarch +provid +good +overview +research +tabl +content +diagnosi +prognosi +bibliographi +citat +popular +press +local +relat +link +relat +linksdiagnosisthi +work +grew +desir +wolberg +accur +diagnosebreast +mass +base +sole +fine +needl +aspir +heidentifi +nine +visual +assess +characterist +sampl +consideredrelev +diagnosi +collabor +prof +mangasarian +andtwo +graduat +student +rudi +setiono +kristin +bennett +aclassifi +construct +us +multisurfac +method +pattern +separ +nine +featur +thatsuccessfulli +diagnos +case +result +data +iswel +known +wisconsin +breast +cancer +data +imag +analysi +work +began +addit +nick +streetto +research +team +goal +diagnos +sampl +base +adigit +imag +small +section +slide +result +ofthi +research +consolid +softwar +system +known +xcyt +current +us +wolberg +clinicalpractic +diagnosi +process +perform +follow +taken +breast +mass +materi +thenmount +microscop +slide +stain +highlight +cellularnuclei +portion +slide +cell +arewel +differenti +scan +us +digit +camera +afram +grabber +board +user +isol +individu +nuclei +us +xcyt +us +mous +pointer +user +draw +approxim +boundari +ofeach +nucleu +us +comput +vision +approach +known +snake +approxim +converg +exact +nuclear +boundari +interact +process +take +five +minut +slide +imag +showingxcyt +nuclei +isol +thisfas +program +comput +valu +characterist +ofeach +nuclei +measur +size +shape +textur +mean +standarderror +extrem +valu +featur +comput +result +total +nuclear +featur +sampl +base +train +case +linear +classifi +wasconstruct +differenti +benign +malign +sampl +thisclassifi +consist +singl +separ +plane +space +threeof +featur +extrem +valu +area +extrem +valu +smooth +mean +valu +textur +project +case +onto +thenorm +separ +plane +approxim +probabl +densiti +ofth +benign +malign +point +construct +allow +simpl +bayesiancomput +probabl +malign +patient +thesedens +shown +patient +allow +judg +confid +diagnosi +comparison +hundr +previou +sampl +date +system +correctli +diagnos +consecut +newpati +benign +malign +eight +case +didxcyt +return +suspici +diagnosi +estimatedprob +malign +small +subset +sourc +imag +us +research +found +goodtest +case +imag +segment +object +recognit +algorithm +petsegment +algorithm +automat +identifi +nuclei +inthes +imag +pleas +email +street +wisc +work +togeth +prognosisth +second +problem +consid +research +prognosi +predict +long +term +behavior +diseas +haveapproach +prognosi +function +approxim +problem +us +inputfeatur +includ +comput +xcyt +predict +atim +recurr +malign +patient +us +right +censor +data +solut +term +recurr +surfac +approxim +method +util +linearprogram +construct +surfac +predict +time +recurr +fornew +patient +examin +actual +recurr +train +caseswith +similar +predict +recurr +time +plot +probabl +ofdiseas +free +surviv +variou +time +year +anindividu +patient +capabl +incorpor +intoxcyt +exampl +shown +surviv +curv +plot +probabl +diseas +free +surviv +versu +time +year +black +diseas +free +surviv +curv +repres +patient +ourorigin +studi +curv +repres +probabl +ofdiseas +free +surviv +sampl +case +particular +case +thereforeha +averag +prognosi +probabl +diseas +freeafter +year +equal +procedur +also +us +compar +predict +power +ofvari +prognost +factor +result +indic +precis +detail +cytolog +inform +type +provid +xcytgiv +better +prognost +accuraci +tradit +factor +tumors +lymph +node +statu +corrobor +research +result +could +remov +need +often +pain +axillari +lymph +node +surgeri +chronolog +bibliographylink +paper +provid +postscript +format +postscript +viewer +download +file +shift +click +netscap +print +abstract +ascii +text +obtain +paper +notlink +pleas +contact +first +author +mangasarian +setiono +wolberg +pattern +recognit +linear +program +theori +applic +medic +diagnosi +proceed +workshop +larg +scale +numer +optim +page +philadelphia +siam +mangasarian +wolberg +cancer +diagnosi +linear +program +siam +new +page +abstract +wolberg +street +mangasarian +imag +analysi +machin +learn +appli +breast +cancer +diagnosi +prognosi +analyt +quantit +cytolog +histolog +page +april +abstract +wolberg +street +heisei +mangasarian +comput +deriv +nuclear +featur +distinguish +malign +benign +breast +cytolog +human +patholog +page +abstract +wolberg +street +heisei +mangasarian +comput +deriv +nuclear +grade +breast +cancer +prognosi +analyt +quantit +cytolog +histolog +page +august +abstract +mangasarian +street +wolberg +breast +cancer +diagnosi +prognosi +linear +program +oper +research +page +juli +august +avail +mathemat +program +technic +report +abstract +street +mangasarian +wolberg +induct +learn +approach +prognost +predict +proceed +twelfth +intern +confer +machin +learn +priediti +russel +page +morgan +kaufmann +abstract +teagu +wolberg +street +mangasarian +call +page +indetermin +fine +needl +aspir +breast +imag +analysi +aid +diagnosi +cancer +submit +abstract +street +mangasarian +wolberg +individu +collect +prognost +predict +technic +report +comput +scienc +depart +univers +wisconsin +madison +januari +submit +icml +aaai +confer +abstract +citat +medic +popular +press +new +medicin +segment +prime +new +march +breast +biopsi +without +surgeri +friend +todai +march +cancer +detect +imit +prospect +man +milwauke +sentinel +march +analyz +breast +cancer +detroit +new +march +high +tech +cancer +hunt +marilynn +marchion +milwauke +journal +march +computer +interpret +breast +biopsi +progress +report +oncolog +time +april +comput +program +hunt +breast +cancer +ruth +sorel +houston +chronicl +april +comput +program +improv +interpret +aspir +oncolog +new +intern +data +suggest +needl +biopsi +could +replac +surgic +biopsi +diagnos +breast +cancer +journal +american +medic +associ +medic +new +perspect +column +june +diagnosi +imag +analysi +machin +learn +cope +septemb +octob +comput +seek +breast +cancer +madison +capit +time +januari +comput +aid +cancer +predict +angel +time +januari +local +relat +link +mathemat +program +group +machin +learn +group +medic +schooloth +relat +link +nation +librari +medicin +univers +nevada +center +biomed +model +research +oncolink +washington +univers +institut +biomed +comput +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..602a7ab2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,328 @@ +pattern +separ +mathemat +programmingpattern +separ +mathemat +programmingthi +page +describ +work +pattern +separ +linear +program +mathemat +program +section +univers +wisconsin +madison +comput +scienc +depart +brief +histori +method +outlinemathemat +optim +approach +particular +linear +program +long +us +problem +pattern +separ +linear +program +us +construct +plane +separ +linearli +separ +point +set +separ +nonlinear +surfac +us +linear +program +also +describ +whenev +surfac +paramet +appear +linearli +quadrat +polynomi +surfac +formul +howev +could +failon +set +separ +surfac +linear +paramet +multisurfac +method +avoid +difficulti +separ +disjoint +finit +point +set +dimension +euclidean +space +follow +choos +parallel +plane +dimension +euclidean +space +close +togeth +region +plane +contain +point +set +region +parallel +plane +contain +point +point +discard +point +region +parallel +plane +repeat +process +point +parallel +plane +region +parallel +plane +contain +point +point +multisurfac +method +tree +variant +multisurfac +method +develop +finit +disjoint +point +set +dimension +euclideanspac +goal +todetermin +sequenc +plane +dimension +euclideanspac +separ +set +follow +determin +plane +dimension +euclidean +space +minim +averag +distanc +misclassifi +point +point +misclassifi +li +side +separ +plane +assign +similarli +point +misclassifi +li +side +separ +plane +assign +region +assign +contain +mostli +point +stop +otherwis +gener +anoth +error +minim +plane +region +sequenc +plane +gener +view +decis +tree +eachnod +tree +best +split +point +reach +node +found +solv +node +split +branch +thesam +procedur +appli +mostli +point +oneset +node +linear +program +approach +also +view +astrain +neural +network +hidden +layer +shown +learn +concept +well +better +traditionallearn +method +cart +also +advantag +artifici +neural +network +method +backpropag +inthat +train +proce +much +faster +implement +implement +us +mino +numer +optim +packag +nick +street +kristin +bennett +also +implement +matlab +optim +packag +paul +bradlei +follow +descript +matlab +implement +togeth +file +requir +chronolog +bibliographi +mangasarian +linear +nonlinear +separ +pattern +linear +program +oper +research +june +page +mangasarian +multisurfac +method +pattern +separ +ieee +transact +inform +theori +novemb +page +bennett +decis +tree +construct +linear +program +proceed +midwest +artifici +intellig +cognit +scienc +societi +confer +page +bennett +mangasarian +robust +linear +program +discrimin +linearli +insepar +set +optim +method +softwar +page +mangasarian +mathemat +program +neural +network +orsa +journal +comput +fall +page +last +modifi +paul +bradlei +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..ca640da3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,124 @@ +paradyn +project +home +page +paradyn +parallel +perform +tool +releas +informationthi +page +contain +describ +copi +ofreleas +paradyn +tool +project +goalsth +paradyn +parallel +program +perform +tool +project +explor +newapproach +build +scalabl +tool +parallel +program +perform +technic +paper +paradyn +manualsstatu +reporta +recent +statu +report +blizzard +paradyn +project +arpa +meet +panel +presentationthi +present +made +arpa +csto +meet +insan +antonio +arpa +csto +super +symbol +tabl +presentationthi +present +made +arpa +csto +meet +inflorida +project +effort +develop +common +access +routin +tocompil +gener +inform +us +tool +high +level +parallellanguag +project +staff +super +comput +postera +hypertext +version +poster +super +comput +relat +project +elsewher +spdt +sigmetr +symposium +parallel +distribut +toolsyou +also +restaur +includ +paradyn +page +temporari +placehold +contact +informationparadyn +projectdepart +comput +sciencesunivers +wisconsin +west +dayton +streetmadison +email +paradyn +wisc +edufax +last +modifi +bart +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..7661acc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,42 @@ +steven +parker +home +page +steven +parker +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +wisconsin +offic +parker +wisc +depth +area +numer +analysi +employ +prism +projectfal +schedul +math +prism +relat +link +home +page +home +page +send +mail +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..f1ce9a60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,138 @@ +home +page +paul +bradleygradu +student +comput +scienc +depart +univers +wisconsin +madison +paulb +wisc +eduoffic +csphone +advisor +mangasarianinterestsmathemat +programmingmachin +learningfli +fish +interest +us +mathemat +program +techniqu +specif +nonlinear +linear +program +induct +learn +summari +work +currentlyb +done +area +univers +wisconsin +pleas +madisonmathemat +program +page +thiswork +guid +professor +olvimangasarian +nick +street +publicationsal +paper +store +postscript +format +abstract +ascii +text +postscript +viewer +download +file +shift +click +us +netscap +print +bradlei +mangasarian +street +featur +select +mathemat +program +mathemat +program +technic +report +comput +scienc +depart +univers +wisconsin +madison +wisconsin +decemb +revis +march +submit +inform +journal +comput +abstract +bradlei +mangasarian +street +cluster +concav +minim +mathemat +program +technic +report +comput +scienc +depart +univers +wisconsin +madison +wisconsin +accept +present +neural +inform +process +system +abstract +nick +street +work +paul +picksthes +site +backcountri +page +grate +dead +nasa +wisconsin +fish +frog +espnet +timesfax +uroullett +last +modifi +paul +bradlei +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..6abeb8d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,147 @@ +pete +devri +home +page +peter +devri +internet +tool +specialist +room +comput +scienc +westdayton +madison +pdevri +wisc +internet +tool +specialist +intern +scout +mean +isthat +read +everyth +internet +technolog +make +sens +andthen +write +overview +articl +scout +toolkit +great +thing +iread +everyth +think +anywai +rather +foolish +topai +tell +eric +hazen +alsoprovid +technic +webmast +servic +scout +group +although +excel +help +comput +system +folksat +intern +recent +join +scout +team +work +laboratori +molecularbiolog +integr +microscopi +resourc +biomed +resourc +nearli +eight +year +molecularbiolog +fortun +develop +prof +seancarrol +techniqu +creat +multipl +label +confoc +imag +basic +cool +look +imag +develop +embryo +lotof +journal +book +cover +also +develop +molecular +biologi +site +fortun +work +prof +johnwhit +rest +imrstaff +develop +site +receiv +star +inth +magellan +internet +guid +last +major +project +involv +develop +dimension +microscop +studi +embryo +develop +microscop +isdescrib +articl +appear +augustnd +issu +journal +scienc +photo +guest +scout +lab +standard +info +resum +relat +experi +public +present +updat +tuesdai +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..7a072482 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,131 @@ +yumpe +home +page +manoj +plakal +graduat +slave +dept +comput +scienc +universityofwisconsin +madison +blah +home +countri +india +though +origin +state +kerala +stai +life +calcutta +studi +bosco +school +calcutta +salesian +bosco +undergrad +kanpur +major +comput +scienc +engin +current +first +year +graduat +student +support +teach +assistantship +depart +comput +scienc +univers +wisconsin +madison +stare +barrel +either +comput +architectur +program +languag +interest +music +rock +metal +altern +blue +movi +book +stuff +acad +hack +industri +geeki +nerdi +stuff +featur +home +page +kanpur +chat +gatewai +kanpur +class +seealso +iitk +class +link +friend +snap +friend +pinup +galleri +everi +nerd +need +check +bookmark +link +page +access +log +visit +page +contact +north +randal +avenu +madison +comput +scienc +univers +wisconsin +madison +dayton +street +madison +plakal +wisc +plakal +wisc +acknowledg +suresh +venkat +nifti +tabl +igor +ivanisev +graduat +slave +wisecrack +icon +variou +corner +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..7eccb42b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,90 @@ +prasad +home +page +page +constuct +meanwhil +prasad +deshpand +graduat +student +comput +scienc +depar +univers +wisconsin +madison +home +address +princeton +madison +offic +address +comput +scienc +build +univers +wisconsin +madison +dayton +madison +academ +interest +databas +system +theori +research +area +databas +current +schedul +theori +invest +manageri +econom +meet +prof +jeff +naughton +music +introduct +music +theori +public +comput +multidimension +aggreg +vldb +storag +estim +multidimensionalaggreg +presenc +hierarchi +vldb +interest +cours +project +packag +java +download +want +spend +time +timex +world +find +india +dilbert +comix +explor +bookmark +random +link +finger +time +sinc +hakuna +matata +info +creat +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..4e7f1ca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,45 @@ +vishi +home +page +viswanath +poosala +research +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +poosala +wisc +research +inform +reseach +summari +resum +html +postscript +inform +relat +databas +advisor +prof +yanni +ioannidi +inform +asha +voluntari +organ +help +improv +basic +educ +india +interestsuw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..cb92f567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,770 @@ +home +pageth +project +queri +sequenc +data +document +construct +time +order +databas +order +time +databas +time +databas +order +document +content +project +objectivescurr +statusmotiv +exampleseq +data +model +sequin +queri +languageoptim +techniquesseq +system +developmentpublicationsrel +workcontact +informationproject +object +number +import +databas +applic +requir +processingof +larg +amount +order +sequenc +data +domain +theseappl +includ +financi +manag +histor +analysi +econom +social +scienc +metereolog +medic +scienc +andbiolog +scienc +exist +relat +databas +inadequ +regard +data +collect +treat +set +sequenc +consequ +express +sequenc +queri +tediou +evalu +ineffici +databas +model +data +us +abstract +sequenc +allow +data +sequenc +queri +declar +manner +util +order +semanticstak +advantag +uniqu +opportun +avail +queri +optim +evaluationintegr +sequenc +data +relat +data +user +canstor +queri +combin +relat +sequencesthes +requir +serv +goal +project +variou +kind +sequenc +need +support +tempor +sequenc +themost +import +kind +queri +express +us +notion +like +next +previou +natur +consid +sequenc +queri +optim +evalu +effici +issu +need +studi +theori +databas +system +need +built +demonstr +feasibl +theoret +idea +project +statusth +current +statu +project +defin +data +model +support +import +kind +sequenc +data +also +defin +algebraicqueri +oper +compos +form +sequenc +queri +analogousto +composit +relat +algebra +oper +form +relat +queri +describ +sequenc +queri +effici +process +identifi +variou +optim +techniqu +sequenc +queri +languag +sequin +candeclar +express +queri +sequenc +sequin +queri +includ +embed +express +relat +queri +languag +likesql +vice +versa +build +disk +base +databas +system +demonstr +feasibl +propos +system +implement +model +us +nest +complex +object +architectur +built +shore +storag +manag +process +sever +megabyt +data +relat +sequenc +support +integr +extens +manner +motiv +exampl +sequenc +querya +weather +monitor +system +record +inform +variou +meteorolog +phenomena +sequenti +occurr +phenomena +variou +meteorolog +event +sequenc +time +record +scientist +ask +queri +volcano +erupt +didth +recent +earthquak +strength +greater +richter +scale +queri +express +relat +queri +languag +like +complex +featur +like +groupbi +claus +correl +subqueri +aggregatefunct +requir +convent +relat +queri +optim +would +find +effici +queri +execut +plan +even +given +knowledg +earthquak +volcano +relat +sort +time +howev +effici +plan +exist +model +data +sequencesord +time +sequenc +scan +lock +step +similar +sort +merg +join +recent +earthquak +record +scan +store +temporari +buffer +whenev +volcano +record +process +valu +recent +earthquak +record +store +buffer +check +strength +greater +possibl +gener +answer +queri +therefor +process +singl +scan +sequenc +us +littl +memori +optim +sequenti +data +queri +data +modelth +detail +data +model +aredescrib +publish +paper +click +postscript +version +present +gist +basic +model +sequenc +record +map +ordereddomain +posit +mani +mani +relationship +record +andposit +view +dual +distinct +wai +recordsmap +posit +posit +map +record +view +call +posit +record +orient +respect +give +rise +queri +oper +base +view +queri +sequenc +could +requir +oper +either +flavor +record +orient +oper +similar +relationaloper +includ +variou +kind +join +overlap +contain +andaggreg +oper +extens +explor +researchersin +tempor +databas +commun +posit +oper +includ +next +previou +offset +movingaggreg +oper +allow +zoom +oper +sequenc +mean +collaps +expand +order +domain +associ +sequenc +instanc +daili +sequenc +could +zoom +collaps +weekli +sequenc +zoom +expand +hourli +sequenc +last +part +model +deal +oper +group +set +sequenc +advantag +make +easi +model +queri +involv +sequenc +collect +case +mani +real +worldsitu +sequenc +oper +extend +work +group +similar +sequenc +instead +singl +sequenc +extensionof +model +indic +practic +implement +ofseq +would +probabl +involv +nest +complex +object +system +sequin +queri +languagew +devis +queri +languag +call +sequin +usingwhich +declar +sequenc +queri +specifi +languagei +similar +flavor +except +input +queriesa +well +result +queri +sequenc +click +descript +sequin +languag +exampl +optim +techniquesw +propos +optim +techniqu +sequenc +queri +involv +posit +oper +exist +techniqu +thathav +propos +queri +record +orient +oper +optim +queri +transform +meta +data +cach +intermedi +result +effici +evalu +queri +optim +queri +evalu +plan +gener +us +algorithm +reli +cost +estim +import +observ +access +sequenc +data +singl +stream +probabl +effici +evalu +strategi +take +account +detail +optim +techniqu +aredescrib +publish +paper +click +postscript +version +system +developmentth +databas +system +client +serverarchitectur +support +multipl +client +viaa +multi +thread +server +server +built +ontop +shore +storag +manag +sequin +subset +support +queri +languageswhich +embed +insid +data +mode +nest +complex +object +model +allow +arbitrarylevel +nest +relat +insid +sequenc +viceversa +system +also +extens +provid +supportfor +data +type +order +domain +user +defin +function +storag +implement +queri +languag +detailson +system +click +publicationssequ +queri +process +praveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +sigmod +confer +data +manag +framework +sequenc +datapraveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +ieee +confer +data +engin +march +design +implement +sequenc +databas +systempraveen +seshadri +miron +livni +raghu +ramakrishnan +submit +vldb +next +sequenc +queriesraghu +ramakrishnan +michael +cheng +miron +livni +praveen +seshadri +proceed +intern +confer +manag +data +comad +decemb +relat +workthedevis +project +complementari +provid +visualizationenviron +us +explor +sequenc +data +devis +front +queri +pose +databas +server +answer +examin +graphic +also +shore +project +storag +manag +us +peopl +work +relat +project +madison +databas +research +group +madison +depart +servercontact +informationfor +inform +contact +praveen +seshadri +praveen +wisc +eduraghu +ramakrishnan +raghu +wisc +edumiron +livni +miron +wisc +educomput +scienc +depart +univers +wisconsin +dayton +street +madison +last +modifi +praveen +seshadripraveen +seshadri +praveen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..ae6dca7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,26 @@ +andrew +prock +home +page +andrew +clemen +hockert +prockoffic +hour +person +histori +school +class +madison +class +school +bookmark +resum +doonesburi +trot +alta +vista +alta +vista +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..a00a0b63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,174 @@ +home +page +ann +condon +ann +condon +associ +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +condon +wisc +univers +washington +interest +complex +theori +interact +proof +system +random +complexityclass +theori +parallel +comput +research +summari +interest +model +comput +interactiveproof +system +combin +nondetermin +random +suchmodel +recent +proven +surprisingli +us +solv +classicproblem +complex +theori +exampl +although +theoryof +complet +long +us +identifi +hard +computationalproblem +much +progress +understand +whichhard +problem +solut +easi +approxim +recentresult +interact +proof +system +result +novel +modelsof +turn +us +prove +approximabilityresult +sever +hard +problem +work +developingboth +posit +neg +result +approxim +hardcombinatori +problem +aris +game +theori +graph +theoryand +automata +theori +also +interest +design +analysi +parallel +algorithm +current +work +develop +parallel +algorithm +forsort +graph +problem +minimum +span +tree +goal +develop +algorithm +work +well +practic +parallel +model +commun +synchron +costscan +expens +sampl +recent +public +interact +proof +system +polynomi +bound +strategi +ladner +journal +comput +system +scienc +finit +state +automata +nondeterminist +probabilisticst +hellerstein +pottl +wigderson +proceedingsof +annual +symposium +theori +comput +pspace +provabl +prover +round +caiand +lipton +journal +comput +system +scienc +februari +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..099485ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,192 @@ +home +page +deborah +joseph +deborah +joseph +associ +professor +comput +scienc +mathemat +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +joseph +wisc +purdu +univers +interest +structur +appli +complex +theori +comput +biologi +comput +geometri +mathemat +logic +research +summari +research +concern +area +theoret +comput +scienc +studi +structur +properti +complex +class +design +analysi +algorithm +biolog +problem +last +twenti +year +great +deal +work +gone +studyingth +properti +set +decid +determinist +andnondeterminist +polynomi +time +despit +effort +stillknow +littl +class +recent +fact +computerscientist +question +adequaci +known +proof +techniquesfor +resolv +question +whether +research +investigatesth +structur +properti +set +class +exploresin +formal +type +proof +techniqu +necessari +resolveproblem +concern +complex +class +research +interest +comput +biologi +primarili +inth +area +comput +method +genom +sequenc +theseinclud +develop +dynam +data +structur +algorithmsfor +fragment +assembl +larg +scale +genom +sequenc +project +develop +specif +algorithm +techniqu +handlingrepetit +sequenc +addit +research +util +graphtheoret +method +rapid +homolog +detect +analysisof +anonym +sequenc +sampl +recent +public +collaps +degre +subexponenti +time +pruim +young +proceed +ninth +structur +complex +theoryconfer +spars +spanner +weight +graph +althof +dobkin +soar +discret +comput +geometri +obtain +global +similar +local +similar +meidanisand +tiwari +proceed +fourth +scandinavianworkshop +algorithm +springer +verlag +lectur +note +incomput +scienc +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..3fe4a0fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,143 @@ +home +page +miron +livni +miron +livni +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +miron +wisc +weizmann +institut +scienc +rehovot +israel +interest +resourc +manag +algorithm +perform +model +analysi +discret +event +simul +research +summari +major +emphasi +research +design +evaluationof +resourc +manag +polici +involv +developmentof +resourc +manag +polici +process +data +managementsystem +type +system +gener +purpos +well +asreal +time +schedul +algorithm +consid +researchinvolv +perform +studi +differ +polici +specialemphasi +interplai +properti +systemand +perform +polici +sinc +performancestudi +emploi +simul +interest +discret +event +modelingand +simul +techniqu +current +process +implementinga +simul +laboratori +base +novel +simul +languag +laboratori +includ +data +manag +util +visualizationtool +graphic +interfac +sampl +recent +public +disk +tape +join +synchron +disk +tape +access +myllymaki +proceed +sigmetr +confer +sequenc +queri +process +sashadri +ramakrishnan +proceed +sigmod +confer +foundat +visual +metaphor +schema +displai +haberand +ioannidi +journal +intellig +inform +system +juli +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..4414a99d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,145 @@ +home +page +seymour +parter +seymour +parter +professor +comput +scienc +mathemat +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +parter +wisc +york +univers +interest +numer +method +partial +differenti +equat +research +summari +time +major +emphasi +work +solutionof +indefinit +discret +ellipt +system +equat +classicalit +method +multigrid +method +work +effectivelywhen +system +posit +definit +method +also +bemad +effect +real +symmetr +part +operatori +posit +definit +hand +indefinit +casedirect +method +attempt +preserv +spars +thesystem +encount +small +pivot +thu +challengingproblem +effect +mix +concept +procedur +linearalgebra +ellipt +partial +differenti +equat +nowinvolv +sever +project +attack +class +problem +includ +precondit +studi +research +specialmultigrid +method +sampl +recent +public +precondit +chebyshev +collact +discret +ellipticparti +differenti +equat +appear +siam +journalon +numer +analysi +precondit +boundari +condit +without +estim +condit +number +distribut +singular +valu +siam +journal +numer +analysi +precondit +second +order +ellipt +oper +condit +numbersand +distribut +singular +valu +journal +scientificcomput +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..ed6bb239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,180 @@ +home +page +mari +vernon +mari +vernon +professor +comput +scienc +industri +engin +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +vernon +wisc +univers +california +angel +interest +techniqu +applic +comput +system +perform +analysi +perform +parallel +system +parallel +architectur +operatingsystem +research +summari +interest +analyt +model +techniqu +applicationto +comput +system +perform +issu +emphasi +paralleland +distribut +system +design +issu +model +techniquesi +develop +togeth +graduat +student +colleaguesinclud +gener +time +petri +customizedmean +valu +analysi +gtpn +repres +parallel +systemfeatur +synchron +prioriti +servic +precis +custom +techniqu +yield +intuit +equationsthat +repres +parallel +system +featur +approxim +butcan +solv +effici +also +recent +proposedth +techniqu +call +interpol +approximationsfor +analysi +parallel +processor +alloc +polici +techniquemai +also +broader +applic +parallel +system +performanceanalysi +current +research +project +includ +character +high +performanceparallel +workload +analysi +parallel +processor +schedulingpolici +schedul +issu +multimedia +server +memorymanag +polici +network +workstat +sampl +recent +public +fair +dqdb +network +slot +reus +brewster +proceed +ieee +infocom +confer +august +accur +model +hybrid +hash +join +algorithm +pateland +carei +proceed +sigmetr +confer +june +applic +characterist +limit +preemption +forrun +complet +parallel +processor +schedul +polici +with +chiang +mansharamani +proceed +sigmetricsconfer +june +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..df988728 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,12 @@ +qinqin +wang +home +page +welcom +qinqin +wang +home +pageqw +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..12abd19a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,364 @@ +raghu +ramakrishnan +home +page +raghu +ramakrishnan +associ +professor +comput +scienc +raghu +wisc +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +street +madison +usaphon +depart +educ +teach +activ +research +interest +research +project +graduat +educ +univers +texa +austin +tech +indian +institut +technolog +madra +teach +activ +cours +text +databas +manag +system +softwar +educ +minibaseand +coralth +text +databas +manag +system +publish +mcgraw +hill +aimedat +first +second +cours +databas +system +undergraduateand +graduat +level +minibas +relat +dbm +develop +inconjunct +text +coral +system +also +us +coursesthat +deal +logic +databas +sever +school +research +interest +integr +heterogen +data +sourc +content +base +queri +index +sequenc +imag +data +exploratori +analysi +larg +data +set +data +mine +extend +databas +queri +languag +us +constraint +deductiona +databas +grow +diversifi +increasinglyimport +abl +access +data +dispers +heterogen +independ +develop +sourc +easili +rodin +project +successor +project +investig +severalissu +formal +techniqu +practic +toolkit +forsemant +integr +support +multipl +level +serviceand +access +databas +databas +access +networkedclust +machin +joint +work +prof +ioannidi +livni +recent +work +result +visual +data +explorationfrom +next +project +joint +work +prof +livni +appli +data +integr +queri +second +area +interest +content +base +queri +complex +data +assequ +imag +set +seqsystem +deal +queri +sequenc +data +focus +dbm +design +optimizationissu +relat +sequenc +data +part +next +project +joint +work +prof +livni +import +aspect +work +identifyingtrend +data +gener +identifi +us +pattern +ofinform +project +goal +support +content +base +retriev +fromlarg +set +imag +focu +develop +implementingan +express +data +definit +languag +us +customizea +gener +imag +databas +system +take +advantag +specializedinform +given +collect +imag +indexedand +queri +interest +queri +analysi +data +cover +data +explor +andmin +develop +power +cluster +algorithm +call +birchfor +larg +dataset +visual +data +explor +tool +call +devisea +part +next +project +long +stand +research +interest +extens +relat +databasequeri +languag +logic +program +featuressuch +structur +term +recurs +ofarithmet +constraint +specifi +data +queri +morecompactli +effici +ongo +project +involv +continu +develop +coraldeduct +system +evalu +base +upon +bottom +fixpointevalu +techniqu +sever +optim +appli +make +efficientacross +broad +rang +program +research +project +coral +next +graduat +sudarshan +august +time +optim +bottom +evalu +logic +program +first +employ +bell +lab +murrai +hill +sudarsha +research +srivastava +august +deduct +object +orient +languag +first +employ +bell +lab +murrai +hill +divesh +research +august +design +evalu +transit +closur +algorithm +first +employ +bell +lab +murrai +hill +research +seshadri +august +sequenc +data +managementfirst +employ +comput +scienc +depart +cornel +univers +praveen +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..034757d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,195 @@ +rahul +home +page +rahul +kapoorhello +internet +surfer +welcom +cyber +home +hope +stai +long +enough +know +littl +offici +third +final +semest +master +student +comput +scienc +depart +univers +wisconsin +madison +came +fall +get +bachelor +degre +indianinstitut +technolog +kanpur +interest +employ +pleas +check +resum +cours +schedulemydepartmentmyuniversityiitkanpuriitkclass +india +relatedlink +menow +gone +offici +want +person +well +normal +kind +born +andrais +small +love +famili +compris +parent +elder +sister +nice +town +india +call +kanpur +came +state +fall +good +fortun +live +madison +moneymagazin +rate +livabl +citi +year +editormust +come +greenland +think +winter +livabl +guess +shouldn +complain +fall +spring +madison +isawesom +summer +jose +california +work +almaden +research +centr +cannot +much +madison +summer +suppos +great +anywai +regret +great +time +area +whatev +monei +magazin +sai +think +northern +california +place +littl +like +love +music +take +look +collect +know +kind +movi +almost +genr +though +prefer +romanc +comedi +comedi +show +file +star +trek +read +stuff +novel +philosophi +comput +surf +sport +concern +love +watch +cricket +india +plai +win +tenni +figur +skate +gymnast +semest +try +learn +swim +final +bridg +natur +lover +enjoi +go +long +walk +hike +cloudi +slightli +breezi +wish +could +job +televis +travel +show +youget +interest +place +world +paid +musicstuffmovi +televisioninternettravelotherbookmark +contact +meget +know +form +rest +contact +guestbookrahul +wisc +eduh diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..350981fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,98 @@ +rajesh +raman +home +page +old +homm +page +off +rajesh +raman +rajesh +raman +comput +scienc +depart +west +dayton +street +madison +email +raman +wisc +telephon +ohio +wesleyan +univers +major +comput +scienc +mathemat +minor +music +current +first +year +graduat +student +univers +winsonsin +madison +depart +comput +scienc +person +curriculum +vita +postscript +specif +current +cours +comput +architectur +saluja +system +perform +evalu +model +livni +distribut +system +inform +current +work +team +member +condor +project +integr +part +committe +bookmark +chimera +novelti +monster +chao +subject +contradict +prodigi +judg +thing +feebleworm +earth +depositari +truth +cloaca +uncertainti +error +theglori +shame +univers +blais +pascal +last +modifi +rajesh +raman +raman +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..27c3c047 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,88 @@ +karthik +pagekarthikeyan +ramasamyabouti +student +comput +scienc +depart +univers +wisconsin +research +interest +mostli +databas +oper +system +work +jeffrei +naughton +paradis +project +projectshack +david +dewitt +jeffrei +naughton +connectivityparadis +parallel +databas +wisconsin +pthread +wrapperspublicationsstorag +estim +multidimension +aggreg +presenc +hierarchi +amit +shukla +prasad +deshpand +jeffrei +naughton +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +avail +presentationsweb +picturearchitectur +altern +scalabl +serversphoto +albumencount +leafperson +inforesum +financemonei +wall +street +journal +person +interestshack +photographycontact +informationstreet +address +dayton +comput +scienc +depart +madison +electron +mail +addresskarthik +wisc +eduoffic +phone +number +comment +suggestionspleas +tell +think +home +page +might +improv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..9f48370f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,24 @@ +kelli +home +page +kelli +ratliffoffic +phone +email +kelli +wisc +edulast +login +offic +hour +inform +student +genealog +page +interest +place +visit +space +construct +stai +tune diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..3b74f7e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,64 @@ +viresh +ratnakar +page +viresh +ratnakar +research +assist +comput +scienc +depart +univers +wisconsin +madison +research +area +digit +imag +video +compress +advisor +miron +livni +main +interest +base +compress +vector +quantiz +fractal +compress +qualiti +control +lossi +compress +product +mode +compress +public +home +page +invok +qclicauthor +avail +qclic +imag +invok +qclicbrows +avail +thing +rever +reveal +click +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +ratnakar +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..662cb242 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,97 @@ +monasteriu +omin +doominu +welcom +monasteriu +omin +doominu +brother +richard +without +beard +person +haven +address +offic +haven +address +offic +haven +address +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +offic +number +rcarl +wisc +offic +hour +thur +home +page +current +cours +load +distribut +oper +system +mondai +wednesdai +underwat +fire +prevent +saturdai +advanc +comput +architectur +tuesdai +thursdai +math +introduct +whole +number +emphasi +number +sundai +subsurfac +depositori +engin +grave +dig +mondai +wednesdai +fridai +mani +shade +brother +richard +profession +omin +doom +polit +goofi +solitari +innebri +vampir +seriou +nostalg +funki +monk +fellow +order +ever +need +graphic +artist +desktop +publish +check +best diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..f7cbaaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,1961 @@ +thoma +rep +home +page +thoma +repsprofessorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usa +mail +rep +wisc +telephon +secretari +depart +cornel +univers +curriculum +vita +research +interest +program +slice +differenc +merg +interprocedur +dataflow +analysi +alia +analysi +pointer +analysi +shape +analysi +languag +base +program +develop +environ +increment +comput +attribut +grammar +also +thehom +page +wisconsin +program +slice +project +content +research +summari +categor +index +public +list +public +visitor +post +doctor +associ +student +research +summarymi +research +aim +creat +tool +support +thedevelop +complex +softwar +system +object +createtool +provid +power +languag +specif +program +manipulationoper +particular +work +explor +program +slicingcan +serv +basi +program +manipul +oper +slice +program +respect +program +elementss +project +program +includ +program +element +thatmight +affect +either +directli +transit +valu +thevari +us +member +slice +allow +findsemant +meaning +decomposit +program +thedecomposit +consist +element +textual +contigu +program +slice +fundament +oper +solvingmani +softwar +engin +problem +instanc +applicationsin +program +understand +mainten +debug +test +differenc +special +reus +merg +project +worker +carri +aim +atimprov +underli +technolog +program +slice +relatedoper +implement +program +slicer +develop +method +us +slice +softwar +engin +tool +andbuild +slice +base +program +manipul +tool +clickherefor +home +page +wisconsin +program +slice +project +recent +establish +unexpect +connect +betweeninterprocedur +dataflow +analysi +previou +work +oninterprocedur +program +slice +particular +show +larg +class +interproceduraldataflow +analysi +problem +solv +transformingthem +special +kind +graph +reachabl +problem +graph +reachabl +problem +solv +precis +polynomi +timebi +algorithm +origin +develop +interprocedur +slice +also +interest +subject +increment +algorithm +increment +algorithm +mean +algorithm +make +solut +probleminst +find +solut +nearbi +problem +instanc +categor +index +publicationsprogram +slice +differenc +merg +overview +ics +slice +dagstuhl +slicing_pat +david +binklei +thesi +acta +topla +also +pldi +chop +fseb +differenc +differenc +yang +thesismerg +tosem +tosem +also +sigsoft +david +binklei +thesiswuu +yang +thesi +iwscm +topla +also +popla +esop +iwsvcc +algebra +slice +applic +program +merg +also +esop +ccpsd +amast +npfo_submiss +semant +slice +ccipl +poplb +applic +slice +dagstuhl +pepma +implement +slice +system +prog_integration_system +prog_integration_manu +note +system +describ +paper +handl +small +subsetof +pascal +system +distribut +licens +obtain +clickingher +current +retarget +implement +handl +program +andexpect +distribut +system +support +slice +chop +anddifferenc +probabl +integr +program +miscellan +thesesdavid +binklei +thesiswuu +yang +thesisphil +pfeiffer +thesisinterprocedur +dataflow +analysisdemand +idfa +bottom +logic +program +magic +set +transform +also +exhaust +demand +idfa +graph +reachabl +also +fsea +popl +diku +idfa +us +graph +reachabl +tcs_ide_pap +also +fase +ptime +complet +idfa +acta_pap +alia +analysi +pointer +analysi +shape +analysi +dagstuhl +also +popl +pepmb +pfeiffer_thesi +sigplan +languag +base +program +develop +environ +dagstuhl +synthes +gener +book +synthes +gener +manual +comput +also +lape +popl +rep +thesi +psde +topla +also +popl +popl +compcon +cacm +also +also +increment +comput +ramalingam +thesi +jalg_pap +dagstuhl +popl +popl +popl_not +synthes +gener +book +acta +comput +also +lape +popl +rep +thesi +topla +also +popl +popl +attribut +grammar +spaa +synthes +gener +book +synthes +gener +manual +acta +topla +popl +rep +thesi +psde +popl +topla +also +popl +popl +compcon +list +publicationsbooksrep +teitelbaum +synthes +gener +system +constructinglanguag +base +editor +springer +verlag +york +rep +teitelbaum +synthes +gener +refer +manual +third +edit +springer +verlag +york +chines +reprint +publish +world +publish +corpor +beij +china +rep +gener +languag +base +environ +press +cambridg +journal +publicationssagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +toconst +propag +appear +theoret +comput +scienc +rep +sequenti +natur +interprocedur +program +analysi +problem +appear +acta +informatica +acta +ramalingam +rep +increment +algorithm +gener +shortest +path +problem +appear +journal +algorithm +j_alg +ramalingam +rep +comput +complex +dynam +graph +problem +theoret +comput +scienc +binklei +horwitz +rep +program +integr +languag +procedur +call +transact +softwar +engin +methodolog +januari +tosem +ramalingam +rep +competit +line +algorithm +dynam +prioriti +order +problem +inform +process +letter +yang +horwitz +rep +program +integr +algorithm +accommod +semant +preservingtransform +transact +softwar +engin +methodolog +juli +rep +algebra +properti +program +integr +scienc +comput +program +horwitz +rep +effici +comparison +program +slice +acta +informatica +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +transact +program +languag +system +januari +topla +horwitz +prin +rep +integr +interf +version +program +transact +program +languag +system +juli +topla +rep +increment +evalu +attribut +grammarswith +unrestrict +movement +tree +modif +acta +informatica +rep +teitelbaum +languag +process +program +editor +ieee +comput +novemb +rep +demer +sublinear +space +evalu +algorithm +attribut +grammar +transact +program +languag +system +juli +rep +teitelbaum +demer +increment +context +depend +analysi +languag +base +editor +transact +program +languag +system +juli +teitelbaum +rep +cornel +program +synthes +syntax +direct +program +environ +commun +septemb +invit +papershorwitz +rep +program +depend +graph +softwar +engin +proceed +fourteenth +intern +conferenceon +softwar +engin +melbourn +australia +york +ics +rep +horwitz +semant +base +program +integr +proceed +second +european +symposium +program +nanci +franc +march +lectur +note +comput +scienc +ganzing +springer +verlag +york +book +chaptersrep +demand +interprocedur +program +analysi +us +logic +databas +applic +logic +databas +ramakrishnan +kluwer +academ +publish +boston +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +softwar +chang +impact +analysi +bohner +arnold +ieee +comput +societi +alamito +appear +reprint +fromacm +transact +program +languag +system +januari +topla +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromacm +transact +program +languag +system +januari +topla +horwitz +prin +rep +integr +interf +version +program +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromacm +transact +program +languag +system +juli +topla +ramalingam +rep +theori +program +modif +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromproceed +colloquium +combin +paradigmsfor +softwar +develop +brighton +april +lectur +note +comput +scienc +abramski +maibaum +springer +verlag +york +rep +teitelbaum +languag +process +program +editor +languag +architectur +program +environ +ichikawa +tsubotani +world +scientif +publish +compani +singapor +reprint +ieee +comput +novemb +teitelbaum +rep +cornel +program +synthes +syntax +direct +program +environ +interact +program +environ +barstow +sandewal +shrobe +mcgraw +hill +reprint +commun +septemb +teitelbaum +rep +horwitz +wherefor +cornel +program +synthes +softwar +develop +environ +wasserman +ieee +comput +societi +washington +reprint +proceed +sigplan +sigoa +symposiumon +text +manipul +portland +june +sigplan +notic +june +confer +publicationssiff +rep +program +gener +softwar +reus +appear +sigsoft +proceed +fourth +sigsoftsymposium +foundat +softwar +engin +francisco +octob +sigsoft +softwar +engin +note +rep +turnidg +program +special +program +slice +proceed +dagstuhl +seminar +partial +evalu +schloss +dagstuhl +wadern +germani +lectur +note +comput +scienc +danvi +glueck +thiemann +springer +verlag +york +dagstuhl +sagiv +rep +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +confer +record +twenti +third +symposiumon +principl +program +languag +petersburg +york +popl +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +sigsoft +proceed +third +sigsoftsymposium +foundat +softwar +engin +washington +octob +sigsoft +softwar +engin +note +fsea +rep +rosai +precis +interprocedur +chop +sigsoft +proceed +third +sigsoftsymposium +foundat +softwar +engin +washington +octob +sigsoft +softwar +engin +note +fseb +rep +hentenryck +semant +foundat +bind +time +analysi +imper +program +pepm +proceed +sigplan +symposium +onparti +evalu +semant +base +program +manipul +jolla +california +june +york +pepma +rep +shape +analysi +gener +path +problem +pepm +proceed +sigplan +symposium +onparti +evalu +semant +base +program +manipul +jolla +california +june +york +pepmb +sagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +toconst +propag +proceed +fase +colloquium +formalapproach +softwar +engin +aarhu +denmark +lectur +note +comput +scienc +moss +nielsen +schwartzbach +springer +verlag +york +tapsoft +rep +horwitz +sagiv +precis +interprocedur +dataflow +analysi +graph +reachabl +confer +record +twenti +second +symposiumon +principl +program +languag +francisco +popl +rep +horwitz +sagiv +rosai +speed +slice +sigsoft +proceed +second +sigsoft +symposium +onth +foundat +softwar +engin +orlean +decemb +sigsoft +softwar +engin +note +decemb +rep +solv +demand +version +interprocedur +analysi +problem +proceed +fifth +intern +confer +compilerconstruct +edinburgh +scotland +april +lectur +note +comput +scienc +fritzson +springer +verlag +york +ramalingam +rep +increment +algorithm +maintain +domin +tree +reducibleflowgraph +confer +record +twenti +first +symposiumon +principl +program +languag +portland +popl +rep +scan +grammar +parallel +attribut +evalu +data +parallel +proceed +fifth +symposium +parallel +algorithm +andarchitectur +velen +germani +june +juli +spaa +ramalingam +rep +modif +algebra +proceed +second +intern +confer +onalgebra +methodolog +softwar +technolog +amast +iowa +citi +iowa +ramalingam +rep +theori +program +modif +proceed +colloquium +combin +paradigmsfor +softwar +develop +brighton +april +lectur +note +comput +scienc +abramski +maibaum +springer +verlag +york +yang +horwitz +rep +program +integr +algorithm +accommod +semant +preserv +transform +sigsoft +proceed +fourth +sigsoft +symposiumon +softwar +develop +environ +irvin +decemb +softwar +engin +note +decemb +rep +algebra +properti +program +integr +proceed +european +symposium +program +copenhagen +denmark +lectur +note +comput +scienc +jone +springer +verlag +york +rep +bricker +illustr +interfer +interf +version +program +proceed +second +intern +workshop +softwareconfigur +manag +princeton +octob +softwar +engin +note +novemb +horwitz +pfeiffer +rep +depend +analysi +pointer +variabl +proceed +sigplan +confer +program +languagedesign +implement +portland +june +sigplan +notic +juli +rep +yang +semant +program +slice +program +integr +proceed +colloquium +current +issuesin +program +languag +barcelona +spain +march +lectur +note +comput +scienc +diaz +oreja +springer +verlag +york +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +proceed +sigplan +confer +programminglanguag +design +implement +atlanta +june +sigplan +notic +juli +rep +horwitz +prin +support +integr +program +variant +environ +forprogram +larg +proceed +intern +workshop +softwar +versionand +configur +control +grassau +germani +bericht +german +chapter +winkler +teubner +stuttgart +germani +horwitz +prin +rep +integr +interf +version +program +confer +record +fifteenth +symposium +principl +ofprogram +languag +diego +januari +york +horwitz +prin +rep +adequaci +program +depend +graph +repres +program +confer +record +fifteenth +symposium +principl +ofprogram +languag +diego +januari +york +poplb +rep +marceau +teitelbaum +remot +attribut +updat +languag +base +editor +confer +record +thirteenth +symposium +principl +ofprogram +languag +petersburg +januari +york +rep +teitelbaum +synthes +gener +proceed +sigsoft +sigplan +softwar +engineeringsymposium +practic +softwar +develop +environ +pittsburgh +april +sigplan +notic +rep +alpern +interact +proof +check +confer +record +eleventh +symposium +onprincipl +program +languag +salt +lake +citi +utah +januari +york +rep +static +semant +analysi +languag +base +editor +digest +paper +ieee +spring +compcon +francisco +march +ieee +comput +societi +washington +rep +optim +time +increment +semant +analysi +syntax +direct +editor +confer +record +ninth +symposium +principlesof +program +languag +albuquerqu +januari +york +teitelbaum +rep +horwitz +wherefor +cornel +program +synthes +proceed +sigplan +sigoa +symposium +text +manipul +portland +june +sigplan +notic +june +demer +rep +teitelbaum +increment +evalu +attribut +grammar +applic +tosyntax +direct +editor +confer +record +eighth +symposium +principlesof +program +languag +williamsburg +januari +york +softwarerep +bricker +rosai +wisconsin +program +integr +system +releas +april +releas +april +releas +juli +licens +site +click +herefor +licens +inform +patentsrep +horwitz +binklei +interprocedur +slice +comput +program +us +depend +graph +patent +number +novemb +pend +submissionsrep +sagiv +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +comput +scienc +depart +univers +wisconsin +madison +august +submit +journal +public +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +click +access +latest +version +submit +journal +public +ramalingam +rep +program +comput +scienc +depart +univers +wisconsin +madison +novemb +click +access +latest +version +submit +journal +public +public +reportsrep +sagiv +wilhelm +shape +abstract +shape +analys +comput +scienc +depart +univers +wisconsin +madison +juli +rep +sagiv +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +comput +scienc +depart +univers +wisconsin +madison +juli +leeuwen +mehlhorn +rep +increment +comput +dynam +algorithm +dagstuhl +seminar +report +intern +confer +research +center +comput +scienc +ibfi +schloss +dagstuhl +wadern +germani +rep +sagiv +horwitz +interprocedur +dataflow +analysi +graph +reachabl +datalogisk +institut +univers +copenhagen +copenhagen +denmark +april +diku +rep +wisconsin +program +integr +system +refer +manual +releas +comput +scienc +depart +univers +wisconsin +madison +juli +manual +psramalingam +rep +categor +bibliographi +increment +comput +confer +record +twentieth +symposiumon +principl +program +languag +charleston +york +tutori +paper +rep +increment +comput +unpublish +tutori +note +present +twentieth +symposium +principl +program +languag +charleston +klint +rep +snelt +program +environ +dagstuhl +seminar +report +intern +confer +research +center +comput +scienc +ibfi +schloss +dagstuhl +wadern +germani +binklei +horwitz +rep +identifi +semant +differ +program +procedur +extendedabstract +comput +scienc +depart +univers +wisconsin +madison +septemb +ball +horwitz +rep +correct +algorithm +reconstitut +program +depend +graph +comput +scienc +depart +univers +wisconsin +madison +juli +click +access +paper +ramalingam +rep +semant +program +represent +graph +comput +scienc +depart +univers +wisconsin +madison +decemb +click +access +paper +binklei +horwitz +rep +multi +procedur +equival +theorem +comput +scienc +depart +univers +wisconsin +madison +novemb +click +access +paper +rep +demonstr +prototyp +tool +program +integr +comput +scienc +depart +univers +wisconsin +madison +januari +click +access +paper +visitor +post +doc +studentsvisitor +mooli +sagiv +israel +scientif +center +jiazhen +robert +paig +post +doctor +associ +yang +univ +wisconsin +current +associ +professor +depart +comput +inform +scienc +nation +chiao +tung +univers +taiwan +prin +cornel +univ +current +associ +professor +depart +comput +scienc +univers +north +carolina +chapel +hill +studentsramalingam +bound +increment +comput +lectur +note +comput +scienc +springer +verlag +york +binklei +multi +procedur +program +integr +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +pfeiffer +depend +base +represent +program +refer +variabl +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +yang +algorithm +semant +base +programintegr +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..312bf022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,94 @@ +saeed +home +pagespe +function +statusclock +window +statu +date +window +settimeout +statusclock +speed +function +clearid +window +cleartimeout +saeed +mirza +tech +depart +comput +scienc +engin +indian +institut +technolog +kanpur +graduat +student +univ +wisconsin +madison +comput +scienc +depart +home +lucknow +india +like +spend +time +listn +indian +film +song +netsurf +read +comic +hero +calvin +love +peopl +beauti +seem +beauti +love +contact +home +offic +randal +apart +madison +comp +depart +univ +wisconsin +madison +dayton +street +madison +contact +best +email +saeed +wisc +friend +contact +right +pictur +wismad +suggest +send +check +guestbook +page +access +time +sinc +last +updat +saeed +copi +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..74613f0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,38 @@ +salli +peterson +home +page +salli +goodwin +peterson +lecturercomput +scienc +univers +wisconsin +dayton +madison +mail +salli +wisc +edutelephon +interest +desktop +comput +real +time +oper +system +program +languag +cours +taught +fall +comp +lectur +us +comput +last +chang +septemb +salli +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..365323cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,179 @@ +amit +home +page +warn +reach +protocol +offic +home +email +amit +wisc +snail +mail +comput +scienc +dept +univers +wisconsin +madison +princeton +madison +phone +educ +work +toward +guidanc +jeff +naughton +master +scienc +comput +scienc +univers +wisconsin +madison +bachelor +technolog +comput +scienc +engin +indian +institut +technolog +madra +research +interest +onlin +analyt +process +queri +process +perform +evalu +public +storag +estim +multidimension +aggreg +presenc +hierarchi +amit +shukla +prasad +deshpand +jeffrei +naughton +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +paper +postscript +slide +present +vldb +interest +link +madison +run +boston +marathon +chicago +marathon +madison +marathon +york +marathon +seattl +marathon +georg +marathon +utah +databas +relat +pointer +research +network +activ +databas +bibliographi +server +databas +logic +program +competit +profil +spec +idea +intern +databas +confer +sort +name +univers +trier +databas +confer +sort +date +sigmod +server +larg +data +base +vldb +endow +databas +articl +archiv +massiv +digit +data +system +mdd +initi +multimedia +inform +sourc +nation +industri +inform +infrastructur +protocol +niiip +consortium +transact +process +council +transcoop +transact +manag +support +cooper +applic +olap +page +olap +introduct +pilot +softwar +interest +help +educ +needi +children +look +asha +home +page +person +pageand +bookmarksar +also +onlin +garfield diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..2b3515b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,41 @@ +ashwin +home +page +page +construct +name +ashwin +graduat +student +depart +comput +scienc +univers +wisconsin +madison +come +india +hadmi +undergradu +educ +indianinstitut +technolog +bombai +depart +iitb +fantast +place +worth +visit +like +contact +canfing +meto +find +whereabout +altern +send +email +sashwin +wisc +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..61770da4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,228 @@ +subramanya +sastri +home +pagei +subramanya +sastri +mugshot +mine +come +hospet +town +karnataka +india +year +school +near +hospet +awai +hampi +ruin +vijayanagara +empir +also +awai +tungabhadra +built +across +tungabhadra +river +place +beauti +unfortun +dont +photograph +place +would +scan +photo +album +long +undergradu +indian +institut +technolog +kanpur +depart +comput +scienc +engin +year +wonder +photo +album +fewphotograph +time +gokul +also +maintain +photo +album +contain +mani +photo +iitk +iitk +class +homepag +inform +classmatesat +iitk +presentcurr +graduat +student +comput +scienc +depart +univers +wisconsin +madison +plan +cours +registeredfor +spring +interestsmi +academ +interest +field +architectur +program +languag +compil +hope +graduat +field +cricket +favourit +sport +us +playphatta +iitk +tenni +ball +version +cricket +thati +champ +anyth +provid +entertainmentin +compani +friend +bookmark +link +cricket +site +enjoi +listen +music +anyth +pleasant +must +dont +consid +hard +rock +metal +someth +realli +pleasant +donot +watch +much +whatev +watch +like +seinfeld +sshow +much +read +goe +voraci +reader +rather +whati +like +better +like +like +horror +fantasi +neither +like +scienc +fiction +unsuccesfulli +tri +come +grip +like +someth +earth +know +mean +someth +hint +romanc +ifposs +jeffrei +archer +favourit +author +date +also +like +jane +austen +pride +prejudic +much +talk +read +ramesh +mahadeven +sarticl +make +interest +read +also +like +plai +bridg +back +kanpur +pick +wonderfulgam +compani +friend +long +time +back +itagain +also +like +solv +crossword +cryptic +kind +inth +past +year +devot +much +time +hobbi +mine +last +updat +januari +send +comment +suggest +sastri +wisc +eduunivers +wisconsin +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..aeb0fa6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,347 @@ +susan +calcari +home +page +susan +calcarimanag +scout +servicescomput +scienc +departmentunivers +wisconsin +madisonsc +wisc +edumi +titl +internet +scout +manag +scout +servic +comput +scienc +depart +univers +wisconsin +madison +scoutservic +project +internicand +support +nation +sciencefound +scout +servic +support +internet +user +thehigh +educ +commun +provid +time +inform +bestresourc +tool +internet +goal +help +research +andeduc +internet +effect +work +week +filter +hundr +item +internet +edit +organ +import +present +inform +multipl +usabl +format +servic +includ +scout +report +scout +toolkit +happen +come +soonth +scout +sprout +report +newslett +written +kid +kid +peopl +receiv +scout +report +week +email +andthousand +read +scout +report +annotatedlist +best +newli +discov +internet +resourc +tool +public +kind +devot +research +andeduc +commun +scout +report +select +itemsinclud +issu +peopl +receiv +happeningspost +everi +weekdai +thousand +read +orth +newsgroup +scout +servic +page +moreinform +servic +profession +background +involv +nation +wide +internet +project +sinc +wheni +join +merit +arbor +organ +manag +thensfnet +backbon +project +work +informationservic +divis +project +spent +three +year +speak +tonat +intern +higher +educ +audienc +internetand +resourc +also +develop +produc +merit +network +seminarseri +first +nation +seminar +seri +focus +need +internetend +user +later +becam +director +network +inform +servic +forcerfnet +respect +internet +provid +base +diego +wrote +propos +result +award +part +internicproject +year +cooper +agreement +nation +sciencefound +third +year +agreement +termin +theport +intern +base +diego +chose +continu +workof +internet +scout +time +elect +reloc +wisconsin +andrequest +approv +reloc +scout +project +thecomput +scienc +depart +madison +heartilyagre +futur +plan +scout +servicesat +time +scout +servic +staff +includ +jack +solock +speciallibrarian +project +expand +theaddit +includ +open +systemadministr +posit +open +speciallibrarian +posit +june +matthew +livesei +join +staff +aproject +assist +goal +staff +includ +expans +thescout +toolkit +addit +disciplin +specif +servic +asscout +report +page +devot +specif +area +studi +scout +servic +also +branch +researcharea +collabor +intern +support +servic +group +madison +campus +potenti +research +topic +includenetwork +inform +discoveri +retriev +nidr +anddisciplin +orient +inform +gather +public +depend +onth +natur +research +addit +staff +hire +willincludecomput +scienc +research +graduat +undergradu +level +inform +visit +site +find +scout +servic +join +ofour +mail +list +ifyou +interest +appli +open +posit +theonlin +descript +systemadministr +special +librarian +send +aresum +write +sampl +address +feel +free +contactm +telephon +email +susan +calcariinternet +scout +comput +scienc +departmentunivers +wisconsin +madison +dayton +street +madison +scal +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..17ab7562 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,64 @@ +chandrasekar +home +page +welcom +chandrasekar +homepag +worri +happi +present +graduat +student +depart +comput +scienc +past +born +june +coimbator +southern +state +tamilnadu +inindia +high +school +educ +higher +secondari +school +coimbator +undergradu +educ +theindian +institut +technolog +kharagpur +major +dept +comput +scienc +engin +person +stuff +resid +kendal +avenu +madison +officedept +comput +scienc +dayton +madison +sivasankaran +chandrasekar +schandra +wisc +last +updat +finger +find +whereabout +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..1e03e7fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,36 @@ +eric +schnarr +home +pageer +schnarr +schnarr +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +advisor +larusresearch +interest +architectur +descript +languagesfunct +languag +designinterest +link +wisconsin +wind +tunnel +sacm +hockei +club +dragon +byte +schnarr +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..42a2fe44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,91 @@ +yanni +schoina +home +page +yanni +schoina +schoina +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +note +page +construct +advisor +mark +hill +research +interest +parallel +systemspubl +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jame +laru +david +wood +sixth +intern +confer +architectur +support +programminglanguag +oper +system +asplo +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +supercomput +educ +univers +crete +iraklio +univers +crete +iraklio +last +updat +juli +cretan +cook diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..de276481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,47 @@ +beverli +seavei +home +page +beverli +seavei +current +regist +grad +student +comput +scienc +switch +special +degre +comput +biologi +comput +interest +mine +includ +asian +classic +danc +differ +version +ramayana +india +southeast +asia +danc +drama +ramayana +ramakien +india +southeast +asia +wish +could +finger +give +account +hairbal +keyboard +finger +keeper +instead diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..8da4b1f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,79 @@ +scott +colvil +home +pagescott +colvil +home +page +mail +wisc +eduoffic +address +comput +scienc +offic +dayton +madison +home +franc +madison +welcom +welcom +home +page +well +school +back +univers +wisconsin +madison +seen +largest +ball +chees +want +school +link +uwisc +home +page +uwisc +home +pagein +addit +list +page +find +interest +hopefulli +also +enjoi +beer +world +drink +game +absolut +add +caffein +rate +soda +guid +lock +pickingand +educ +artsi +page +world +fact +book +constitut +english +dictionari +roget +thesauru +poetri +databas +wisc +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..c705d419 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,74 @@ +steve +seitz +anim +writeup +imag +motion +analysi +charact +anim +control +steve +seitz +chuck +dyerour +research +motiv +problem +teachinga +graphic +model +perform +realist +motion +problem +hasit +root +cartoon +anim +modern +applic +tocomput +anim +virtual +realiti +teleconferenc +robot +task +endow +graphic +model +knowledg +performa +repertoir +interest +motion +learn +motion +beinvok +directli +high +level +cue +smile +walk +infer +anabstract +goal +store +cu +levelev +virtual +input +devic +imag +sequenc +project +includ +analysi +period +motion +track +rigid +nonrigid +object diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..4627dae4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,132 @@ +steve +seitz +view +interpol +view +synthesi +imag +interpol +investig +steve +seitz +chuck +dyerw +devis +provabl +correct +autom +techniqu +creat +view +scene +basi +view +scene +techniqu +reli +geometr +imag +interpol +known +morph +graphicscommun +produc +intermedi +imag +although +morph +techniquescurr +enjoi +widespread +theoret +validityha +establish +particular +interpol +viewsof +scene +produc +sequenc +physic +valid +view +ofthat +scene +surprisingli +answer +provid +imag +first +undergo +simplerectif +procedur +certain +assumpt +visibl +theproject +process +satisfi +view +synthesi +work +describ +us +autom +stereo +techniqu +todetermin +imag +correspond +recent +work +view +morph +consid +user +interact +us +guid +interpol +comput +interpol +three +differ +imag +pair +therectifi +origin +imag +shown +left +right +click +theinterpol +imag +center +mpeg +movi +show +computedinterpol +view +interpol +origin +interpol +origin +work +describ +physic +valid +view +synthesi +imag +interpol +seitz +dyer +proc +workshop +represent +visual +scene +last +chang +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..1eacae64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,158 @@ +view +morph +steve +seitz +view +morph +investig +steve +seitz +chuck +dyer +relat +public +view +morph +appear +siggraph +toward +imag +base +scene +represent +us +view +morph +appear +icpr +imag +morph +techniqu +gener +compel +transit +betweenimag +howev +differ +object +pose +viewpoint +often +causeunnatur +distort +imag +morph +difficult +correct +manual +us +basic +principl +projectivegeometri +paper +introduc +simpl +extens +imag +morphingthat +correctli +handl +project +camera +scene +transform +techniqu +call +view +morph +work +prewarp +imagesprior +comput +morph +postwarp +interpol +imag +knowledg +shape +requir +techniqu +appliedto +photograph +draw +well +render +scene +abil +synthes +chang +viewpoint +imag +structureafford +wide +varieti +interest +effect +simpl +imagetransform +view +morph +imag +object +taken +differ +viewpoint +produc +illus +physic +move +virtual +camera +click +mpeg +movi +interpol +morph +view +differ +face +produc +simultan +interpol +facial +shape +color +pose +click +mpeg +movi +interpol +mpeg +movi +view +morph +resolut +mona +lisa +mona +lisa +reflect +high +resolut +mpeg +movi +frame +resolut +mpeg +movi +frame +click +mpeg +movi +jude +shavlik +chuck +dyer +last +chang +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..16c08604 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,196 @@ +period +motion +inform +period +cyclic +motion +analysi +steve +seitz +chuck +dyermani +real +life +motion +period +frame +refer +instanc +human +locomotori +motion +walk +run +skip +shuffl +areperiod +frame +refer +move +person +havedevelop +approach +determin +imag +sequenc +could +beenproduc +object +whose +motion +period +refer +frame +unlik +previou +attempt +determin +period +inform +ourapproach +allow +camera +move +film +inform +poscript +paper +click +period +tracethi +imag +show +period +trace +line +recov +imagesequ +phonograph +turntabl +ramp +correspond +moment +timewher +turntabl +momentarili +slow +period +trace +shownsuperimpos +error +surfac +recov +real +repeat +motion +tend +perfectli +even +period +variesslightli +cycl +next +physic +import +changesin +scene +gener +period +defin +cyclic +motionsthat +make +period +variat +explicit +represent +call +period +trace +compact +pure +tempor +describ +evolutionof +object +scene +without +refer +spatial +quantiti +asposit +veloc +delimit +cycl +identifi +correspondencesacross +cycl +period +trace +provid +mean +tempor +parsinga +cyclic +motion +addit +sever +pure +tempor +motion +featur +canb +deriv +relat +natur +locat +irregular +period +tracecan +also +us +medic +imag +enhanc +composit +imag +fromdiffer +cycl +furthermor +period +trace +reliabl +recoveredfrom +imag +sequenc +view +invari +fashion +us +theori +affin +invari +inform +poscript +paper +clickher +heart +imag +enhanc +correspond +angiograph +imag +determin +period +trace +bottom +composit +correspond +imag +note +additionalstructur +visibl +composit +appar +singl +imag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..455cc72b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,62 @@ +steve +seitz +home +page +steve +seitz +seitz +wisc +graduat +student +berkelei +math +area +interest +imag +motion +analysi +imag +base +render +machin +vision +comput +graphic +research +project +view +morph +view +synthesi +mpeg +movi +show +comput +interpol +imag +left +click +exampl +mpeg +movi +analysi +cyclic +motion +recent +public +stuff +frequent +link +wisconsin +comput +vision +group +surreal +cach +click +closer +look +seitz +last +chang +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..b590c7e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,199 @@ +home +page +madison +machin +learn +research +group +home +page +contain +relev +inform +thememb +machin +learn +research +group +mlrg +univers +wisconsin +madison +tabl +content +group +member +mlrg +archiv +recent +paper +mlrg +archiv +dataset +domain +theori +mlrg +paper +read +schedul +seminar +machin +learn +graduat +cours +relev +local +link +us +extern +link +group +member +carolyn +allex +jonathon +bodner +kevin +cherkauer +mark +craven +tina +eliassi +richard +maclin +graduat +august +david +opitz +graduat +august +jude +shavlik +mlrg +archiv +recent +papersvisit +page +describ +recent +public +ascii +file +contain +list +recentabstractsi +also +avail +mlrg +archiv +dataset +domain +theoriesy +access +directori +contain +severalml +testb +also +access +wisconsin +breast +cancer +databas +prof +olvi +mangasarian +sgroup +mlrg +paper +read +schedul +mlrg +current +schedul +paper +read +schedul +also +line +seminar +current +schedul +local +seminar +relev +local +link +machin +learn +graduat +cours +madison +machin +learn +math +program +group +comput +biologi +madison +dept +madison +comput +neurosci +madison +group +madison +comput +vision +group +madison +robot +group +madison +dept +home +page +madison +home +page +doit +madison +center +mathemat +scienc +gopher +madison +graduat +school +madison +librari +recent +tabl +content +abstract +select +journal +mostli +wendt +librari +readabl +wisc +proc +workshop +agent +learn +agent +held +intern +machin +learn +confer +local +link +last +modifi +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..b5f36f8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,130 @@ +shubu +mukherje +home +page +shubu +mukherje +shubu +wisc +fiance +mimi +nephew +avirup +month +graduat +research +assist +comput +scienc +departmentunivers +wisconsin +madison +west +dayton +street +madison +usaphon +shubhendu +mukherje +click +button +jump +correspond +home +page +articl +advisor +mark +hill +research +project +wisconsin +wind +tunnel +public +wisconsin +comput +architect +world +wide +comput +architect +badger +ballroom +danc +team +person +interest +hobbi +morph +dionisio +courtesi +steve +seitz +random +interest +linkseducationph +univers +wisconsin +madison +spring +expect +univers +wisconsin +madison +tech +indian +institut +technolog +kanpur +india +research +summari +coher +network +interfac +dissert +cachabl +queue +design +space +isca +progress +distribut +share +memori +mechan +cooper +share +memori +isca +commod +workstat +submit +public +cach +coher +protocol +custom +protocol +irregular +applic +ppopp +grai +softwar +dirsw +isca +ppopp +parallel +simul +wind +tunnel +tutori +cach +simul +copyright +copi +shubu +mukherje +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..130274be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,27 @@ +michael +siff +home +page +michael +siff +philosophi +research +academ +interest +run +club +fall +midwest +seminar +wonder +wai +wast +time +new +inform +resourc +book +movi +televis +sport +humor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..6f425770 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,31 @@ +skrentni +home +page +skrentni +lecturerc +coordinatorgradu +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +comput +sciencesemail +skrentni +wisc +edutelephon +relat +link +univers +wisconsin +depart +univers +wisconsin +groupskrentni +wisc +last +updat +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..ae4e6bf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,79 @@ +bryan +home +page +bryan +graduat +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +wisc +edutelephon +comput +scienc +univers +wisconsin +madison +comput +scienc +purdu +univers +interest +intellig +help +system +human +comput +interact +knowledg +represent +oper +system +activ +select +recent +public +travi +step +toward +intellig +unix +help +system +knowledg +represent +unix +util +technic +report +univers +wisconsin +madison +april +miller +fredriksen +empir +studi +reliabl +unix +util +commun +relat +link +univers +wisconsin +depart +univers +wisconsin +group +professor +larri +travi +advisor +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..4b3b430f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,176 @@ +larri +travi +home +page +larri +travisprofessorcomput +scienc +departmentunivers +wisconsin +dayton +madison +mail +travi +wisc +edutelephon +univers +californa +angel +interest +expert +system +procedur +control +automat +deduct +comput +support +understand +complex +data +philosoph +foundat +ofartifici +intellig +comput +manag +social +implic +comput +research +summari +research +center +around +us +logic +basi +knowledg +formal +expert +system +deduct +augment +databasesystem +recent +work +focus +procedur +control +automaticdeduct +design +system +support +contruct +displai +test +high +level +abstract +pattern +form +informationcontain +larg +heterogen +databas +special +attent +beingdevot +represent +geograph +inform +waysthat +enhanc +data +integr +data +visualiz +map +activ +involv +sever +expert +system +develop +project +andwith +larg +intellig +databas +project +incorpor +databas +model +visual +aid +singl +integr +system +organiz +social +issu +associ +introduct +inform +technolog +analysi +suppositionsunderli +altern +approach +artifici +intellig +current +student +chuck +ohar +bryan +scott +swanson +andi +whitsitt +derek +zahn +recent +public +could +failur +expert +system +develop +implement +oravec +appear +journal +system +softwar +comput +metaphor +artifici +intellig +reflex +examin +falsework +west +artifici +intellig +magazin +societi +landscap +altern +metaphor +artifici +intellig +west +artifici +intellig +magazin +interest +link +wisc +dept +wisc +group +last +chang +june +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..15672c56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,86 @@ +avinash +sodani +home +page +avinash +sodani +sodani +wisc +graduat +student +depart +comput +scienc +univers +wisconsin +madison +west +dayton +street +madison +educ +comput +scienc +univers +wisconsin +madison +tech +hon +comput +scienc +indian +institut +technolog +kharagpur +india +juli +academ +interest +comput +architectur +multiscalar +kestrel +project +program +languag +compil +interest +cours +project +packag +java +download +packag +meet +batch +mate +relat +link +info +center +depart +rank +depart +india +relat +link +india +new +hindu +onlin +edit +random +link +look +kgpite +follow +toll +free +directori +will +world +cricket +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..df200481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,522 @@ +guri +sohi +home +page +gurindar +sohi +sohi +wisc +associ +professor +comput +scienc +andelectr +comput +engin +address +educ +research +interest +summari +current +graduat +student +recent +talk +recent +public +recent +graduatesaddress +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usasohi +wisc +eduphon +department +offic +educ +comput +scienc +univers +illinoi +urbana +elect +engin +univers +illinoi +urbana +electr +electron +engin +birla +institut +technolog +scienc +pilani +india +research +interest +instruct +level +parallel +process +compil +architectur +share +memori +multiprocessor +memori +system +research +summari +current +research +focus +design +thehighest +perform +uniprocessor +current +gener +current +investig +architectur +circa +processor +plenti +transistor +availableon +chip +challeng +resourc +getth +highest +possibl +perform +execut +sequenti +program +target +sustain +execut +ofov +instruct +cycl +ordinari +numer +applic +program +research +group +investig +sever +issu +needto +resolv +goal +achiev +studi +character +thenatur +instruct +level +parallel +numericappl +program +order +understand +avail +parallel +andhow +could +exploit +bulk +group +research +effort +expend +continu +thedevelop +multiscalar +process +model +novel +paradigm +exploit +current +develop +multiscalar +compil +andcarri +detail +simul +studi +assessth +potenti +multiscalar +concept +current +graduat +student +todd +austin +scott +breach +andrea +moshovo +vijaykumarrec +talkswil +instruct +set +import +futur +processor +given +risc +symposium +held +watson +researchcent +yorktown +height +novemb +file +compress +postscript +gener +framemak +multiscalar +processor +gener +multiscalar +talk +given +sever +place +file +compress +postscript +gener +framemak +recent +publicationshigh +bandwidth +address +translat +multipl +issu +processor +austin +sohi +appear +inrd +annual +intern +symposium +comput +architectur +appendix +ofdetail +resultsi +also +avail +zero +cycl +load +microarchitectur +support +reduc +load +latencyt +austin +sohi +annual +intern +symposium +microarchitectur +micro +microarchitectur +superscalar +processorsj +smith +sohi +proceed +ieee +decemb +hardwar +mechan +dynam +reorder +memori +referencesm +franklin +sohi +appear +ieee +transact +comput +multiscalar +processor +sohi +breach +vijaykumar +intern +symposium +comput +architectur +streamlin +data +cach +access +fast +address +calcul +austin +pnevmatikato +sohi +intern +symposium +comput +architectur +anatomi +regist +file +multiscalar +processor +breach +vijaykumar +sohi +annual +intern +symposium +microarchitectur +micro +request +combin +multiprocessor +arbitrari +interconnect +network +lebeck +sohi +ieee +transact +parallel +distribut +system +effici +detect +pointer +arrai +access +error +austin +breach +sohi +sigplan +confer +program +languag +design +implement +guard +execut +branch +predict +dynam +processor +pnevmatikato +sohi +intern +symposium +comput +architectur +memori +system +goodman +sohi +handbook +electr +engin +press +control +flow +predict +dynam +processor +pnevmatikato +franklin +sohi +annual +intern +symposium +microarchitectur +micro +regist +traffic +analysi +streamlin +inter +oper +communicationin +fine +grain +parallel +processor +franklin +sohi +annual +intern +symposium +microarchitectur +micro +expand +split +window +paradigm +exploit +fine +grain +parallel +franklin +sohi +intern +symposium +comput +architectur +dynam +depend +analysi +ordinari +program +austin +sohi +intern +symposium +comput +architectur +effici +detect +pointer +arrai +access +errorst +austin +breach +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +decemb +guard +execut +branch +predict +dynam +processorsd +pnevmatikato +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +novemb +knapsack +zero +cycl +memori +hierarchi +componentt +austin +vijaykumar +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +novemb +tetra +evalu +serial +program +perform +fine +grain +parallel +processorst +austin +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +juli +recent +gradstodd +austin +april +hardwar +softwar +mechan +reduc +load +latencydionisio +pnevmatikato +decemb +incorpor +guard +execut +exist +instruct +setsmanoj +franklin +decemb +multiscalar +architecturemark +friedman +januari +architectur +character +prolog +executionsriram +vajapeyam +decemb +instruct +level +character +crai +processormen +chow +chiang +septemb +memori +system +design +base +multiprocessor +last +updat +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..cb8489bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,183 @@ +solomon +home +page +marvin +solomon +professor +former +chair +chair +goodman +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +phone +solomon +wisc +research +interest +object +orient +databas +system +softwar +develop +support +environ +distribut +oper +system +comput +network +design +implement +program +languag +program +languag +theori +recent +publicationstoward +effect +effici +free +space +manag +proc +sigmod +conf +manag +data +june +mark +mcauliff +michael +carei +andmarvin +solomon +abstractpostscriptth +gmap +versatil +tool +physic +data +independ +proc +conf +larg +databas +septemb +odyssea +tsatalo +marvin +solomon +andyanni +ioannidi +abstractpostscriptexpand +version +appear +inth +vldb +journalv +april +abstractpostscriptshor +persist +applic +proc +sigmod +conf +manag +data +june +michael +carei +david +dewitt +michael +franklin +nanci +hall +mark +mcauliff +jeffrei +naughton +daniel +schuh +marvin +solomon +odyssea +tsatalo +seth +white +andmichael +zwillingavail +astech +report +overview +capitl +softwar +develop +environ +fourth +intern +workshop +softwar +configur +manag +paul +adam +andmarvin +solomon +avail +astech +report +updat +version +appear +lectur +note +comput +scienc +persist +object +logic +paul +adam +andmarvin +solomon +avail +astech +report +point +interest +graphic +interfac +room +program +built +us +java +home +page +spring +univ +wisconsin +comput +scienc +depart +shore +project +home +page +shore +project +photoalbum +todai +dilbert diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..3c22ba34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,10 @@ +sowmya +home +page +welcom +home +page +sowmya +subramanian +sowmya +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..06a6a8a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,91 @@ +shilpa +lawand +home +page +welcom +shilpa +lawand +home +pagei +graduat +student +depart +comput +scienc +univers +wisconsin +madison +person +stuffa +link +pastfor +info +schoolher +resum +html +ascii +comput +scienc +second +love +us +resours +stuff +want +place +syster +women +comput +scienc +stuff +relat +madisonsurf +madisonst +wisconsin +inform +serverth +hoofer +sail +clubowl +music +book +movi +java +shilpa +signatur +meet +first +lovesnowi +homepag +cool +linksher +iswher +finger +three +judg +panel +philadelphia +vote +constitut +follow +link +read +decis +page +access +time +sinc +june +send +comment +suggest +email +tossl +wisc +shilpal +wisc +thru +guest +formlast +modifi +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..40f03605 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,49 @@ +jeremi +stenglein +home +page +jeremi +stenglein +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +offic +comput +scienc +phone +mail +stenglei +wisc +teach +comput +scienc +section +gener +home +page +section +home +page +take +construct +compil +link +comput +scienc +depart +home +pageth +simpson +home +pageespn +sport +hotwir diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..f3771503 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,61 @@ +steve +reinhardt +home +page +steven +reinhardt +graduat +student +computerarchitectur +work +wisconsin +wind +tunnelgroup +advisor +david +wood +although +project +mark +hill +andjim +laru +often +feel +free +tell +mewhat +well +plan +finish +fall +join +faculti +ofth +univers +michigan +eec +depart +januari +interest +find +page +publicationsresearch +summari +email +stever +wisc +click +finger +phone +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usalast +updat +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..fd89c6e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,74 @@ +john +strikwerda +home +page +john +strikwerda +professor +comput +scienc +john +strikwerdadepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +email +strik +wisc +telephon +fall +teach +problem +solv +us +comput +begin +januari +assign +nation +scienc +foundat +year +click +inform +numer +analysi +qualifi +exam +research +interest +numer +analysi +comput +fluid +dynamicsmyoffici +depart +home +pageoth +stuff +field +museum +point +search +rate +home +page +inform +chicago +best +chicago +tribun +talk +radio +show +car +footballmi +kid +nathan +nathan +drew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..c6cea69d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +subba +home +page +thing +enjoi +calvin +hobb +late +show +david +letterman +seinfeld +interest +prooocessor +histor +interest +paper +evalu +stream +buffer +secondari +cach +replac +decoupl +integ +execut +superscalar +processor +subbarao +cambridg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..72ca79f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,93 @@ +chiang +home +page +chiang +depart +univers +wisconsin +madisonoffic +stelephon +mail +suhui +wisc +educlick +send +emailoffic +hour +thur +page +still +construct +ta +fall +public +applic +characterist +limit +preemption +complet +parallel +processor +schedul +polici +rajesh +mansharamani +mari +vernon +proc +sigmetr +conf +measur +model +comput +system +nashvil +dynam +static +quantum +base +parallel +processor +alloc +mari +vernon +workshop +schedul +strategi +parallel +process +conjunct +ipp +april +search +engin +yahoo +sourc +resourc +bibliographi +world +wide +virtual +librari +subject +catalogu +link +relat +taiwan +taiwan +network +servic +sinanet +shop +magzin +new +job +calendar +seednet +vistor +guid +taiwan +academia +sinica +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..8cc1ff73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,253 @@ +david +sundaram +stukel +homepag +david +sundaram +stukel +page +upon +effronteri +push +hand +sever +patient +femor +arteri +blood +spurt +blind +anesthetist +hall +scream +browbeck +tri +knee +groin +manag +hamstr +scalpel +crawl +floor +stab +feet +leg +voilet +baboon +assist +woman +ever +care +damn +realli +wig +climb +tabl +pois +jump +browbeck +feet +stomp +cop +rush +william +burrough +nake +lunch +construct +page +catapult +reader +page +choos +page +index +brief +class +take +class +link +relat +comput +scienc +site +dedic +smart +cloth +also +steve +mann +page +link +view +current +see +wearabl +camera +site +link +inform +artifici +life +santa +institut +specif +link +project +call +tierra +thoma +recent +dilbert +strip +technic +comput +scienc +math +joke +somewher +link +philosoph +scientif +artist +natur +physic +conscious +surviv +research +laboratori +site +info +variou +destruct +show +organ +arcosanti +arcolog +site +outsid +phoenix +krishnamurti +foundat +site +tell +centuri +philosoph +link +variou +beat +writer +includ +pictur +site +fill +info +wait +link +variou +new +sourc +packer +new +scientist +onlin +regist +harass +mail +reward +dozen +interest +factoid +astound +friend +scientif +american +onlin +take +advantag +hypertext +addit +provid +select +current +articl +print +edit +scienc +new +publish +weekli +contain +smaller +current +articl +hindu +nation +newspap +india +onion +link +local +interest +madison +astronomi +depart +page +specif +washburn +observatori +public +view +univers +len +insignific +piec +histori +astronomi +madison +madison +weather +info +obtain +follow +site +webweath +nation +weather +servic +madison +channel +new +weather +channel +new +weather +link +interest +home +page +late +timothi +leari +link +numer +articl +written +note +optimist +noam +chomski +disinform +great +list +conspiraci +theori +buri +within +ultra +trendi +movi +review +back +madison +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..d2067ec1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,11 @@ +brian +swander +home +pagebrian +swander +think +offic +offic +hour +bookmark +mark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..c546173b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,139 @@ +ariel +tamchesari +tamch +research +assistantemail +tamch +wisc +ariel +tamch +comput +scienc +depart +west +dayton +street +madison +typic +pose +angri +posei +organ +fall +oper +system +comput +scienc +colleg +park +offic +sresearch +paradyn +parallel +perform +toolsstatu +search +thesi +topic +els +interest +parallel +perform +toolsparallel +distribut +oper +systemsbluesth +simpsonsseinfeldskiingskinetkeyston +favorit +area +snowboard +joke +differ +snowboard +vacum +cleaner +dirt +attach +snowboard +greet +peopl +whoa +sorri +dude +differ +municip +bond +snowboard +municip +bond +eventu +matur +gener +incom +hate +countri +music +fortran +cool +link +yahooespncpu +infoskinetoth +stuff +talk +exokernel +oper +system +architectur +applic +level +resourc +manag +octob +paper +techniqu +tool +distribut +share +memori +perform +improv +spring +callaghan +talk +supercomput +interconnect +network +april +talk +zebra +stripe +network +file +system +need +structur +file +system +raid +paper +perform +block +wait +free +highli +concurr +object +asynchron +share +memori +multiprocessor +version +postscript +version +spring +paper +analysi +risc +instruct +enhanc +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..93da0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,127 @@ +jeff +lampert +home +pagejeff +lampert +home +page +ricardo +montalban +voic +welcom +home +page +know +nota +pictur +least +good +still +look +foron +incrimin +doesn +make +look +like +aconvict +babi +pictur +high +school +yearbook +lasttim +show +someon +never +heard +cute +babi +someon +think +well +found +coupl +pictur +tick +threaten +turn +intoa +human +dispens +took +pictur +henc +pictur +befound +separ +page +click +anautograph +copi +sign +name +monitor +pictureappear +choos +link +weasel +seek +take +pace +turn +click +basic +factswho +person +last +night +academ +work +relatedwhat +class +take +work +dept +resum +entertainmentbook +movi +music +program +newsgroup +import +subjectsfriendsno +show +show +sick +theme +song +hobbi +club +organizationsgroup +plu +wish +inmi +favorit +linksugh +sound +like +song +sound +music +servo +look +juli +andrew +fire +crow +good +mstk +eclect +paraphenaliai +would +miscellan +straight +forward +tick +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..2b829d36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +todd +homepagetodd +homepagein +fall +teach +section +sinc +area +mathemat +program +plug +mathematicalprogram +page +contain +wealth +inform +mathematicalprogram +tmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..e975541a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian +home +pagebrian +toonen +comput +scienc +departmentunivers +wisconsin +dayton +streetmadison +offic +cswhatev +chief +seattleth +ground +tipi +medit +life +itsmean +accept +kinship +creatur +acknowledgingun +univers +thing +infus +thetru +essenc +civil +luther +stand +bear +oglala +siouxlast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..aa75f430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,19 @@ +thano +tsioli +home +page +site +netscap +enhanc +read +shouldconsid +upgrad +browser +latest +version +netscap +ifthat +option +page +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..8c282281 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,66 @@ +todd +turnidg +todd +dougla +turnidgeschoolcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +homemuppet +babylon +milton +madison +eyesightright +axi +left +axi +graduat +student +depart +comput +sciencesat +univers +wisconsin +madison +year +work +professorthoma +rep +studyingprogram +languag +teach +section +hold +mathematicsand +computersci +case +western +reserveunivers +locat +cleveland +ohio +origin +kent +ohio +myfamili +live +judg +compani +keep +click +enough +evid +awai +long +time +amus +shortcut +last +modifi +todd +turnidg +turnidg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..bd221275 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,54 @@ +taxiao +wang +home +page +welcom +taxiao +wang +home +page +page +heavi +construct +click +finger +contact +inform +taxiao +wang +graduat +student +teach +assist +comput +scienc +depart +univers +wisconsin +madison +offic +bldg +dayton +street +madison +offic +phone +home +phone +mail +twang +wisc +visitor +number +sinc +home +page +visit +time +sinc +visitor +number +sinc +last +updat +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..b24cc52f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,16 @@ +shaft +home +pageuri +shaft +home +pageemail +wisc +eduinterest +diversionsstart +trek +meet +microsoft +start +trek +meet +window diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..1ff9c943 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,76 @@ +venkatesh +ganti +home +pagevenkatesh +ganti +vganti +wisc +graduat +studentoffic +comput +scienc +depart +dayton +madison +usaphon +note +page +construct +past +present +graduat +student +univers +wisconsin +madison +fall +earlier +undergradu +student +madra +india +nativ +kakinada +andhra +pradesh +info +asha +asha +basic +educ +asha +madison +india +click +know +india +godav +homepag +hostel +madra +godav +yearbook +hope +onlin +sometim +research +interest +databas +work +till +btech +project +real +time +databas +want +look +real +time +genesi +madison +group +homepag +last +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..3587ea90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,113 @@ +vijai +home +page +vijaykumar +vijai +wisc +profession +affili +comput +scienc +depart +univers +wisconsin +madison +contact +address +comput +scienc +depart +dayton +street +madison +phone +email +vijai +wisc +advisor +guri +sohi +project +multiscalar +project +educ +doctor +univers +wisconsin +madison +august +undergradu +birla +institut +technolog +scienc +pilani +india +research +compil +multiscalar +architectur +dissert +distribut +regist +file +design +anatomi +regist +file +multiscalar +processor +breach +vijaykumar +sohi +annual +intern +symposium +microarchitectur +micro +compil +regist +commun +regist +commun +strategi +multiscalar +architectur +breach +vijaykumar +sohi +submit +annual +intern +symposium +microarchitectur +micro +multiscalar +processor +multiscalar +processor +sohi +breach +vijaykumar +intern +symposium +comput +architectur +schedul +regist +commun +compil +regist +commun +multiscalar +architecturet +vijaykumar +sohi +go +work +memori +data +depend +predict +person +side diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..e9344bf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,101 @@ +john +watrou +home +pagejohn +watrou +watrou +wisc +comput +scienc +departmentunivers +wisconsin +madison +dayton +streetmadison +telephon +public +john +watrou +dimension +quantum +cellular +automata +proc +symp +foundat +comput +scienc +john +watrou +polynomi +time +algorithm +artin +whapl +approxim +theorem +number +theori +fourth +confer +canadiannumb +theori +associ +assort +link +quantum +comput +link +quantum +comput +archiv +stanford +quantum +inform +home +page +oxford +particl +beam +physic +laboratori +quantum +inform +page +ucla +laboratori +theoret +quantum +comput +montreal +lanl +preprint +bibliographi +comput +scienc +bibliographi +hypertext +bibliographi +project +hypatia +gener +refer +element +stylehypertext +webster +interfaceroget +thesauru +random +link +parasol +recordsplayst +linksweath +forecast +madisonth +isthmu +daili +pagemathemat +quotat +servermathematician +biographiesgeek +site diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..5e8d5506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,132 @@ +weiru +home +page +eiru +home +page +send +email +ppppleas +find +around +sometim +think +english +speaker +commit +asylum +verbal +insan +languag +peopl +recit +plai +plai +recit +ship +truck +send +cargo +ship +havenos +feet +smell +richard +leder +three +possibl +part +date +least +must +beoffer +entertain +food +affect +customari +begina +seri +date +great +deal +entertain +moder +amountof +food +merest +suggest +affect +amount +ofaffect +increas +entertain +reduc +proportion +affect +entertain +longer +call +date +circumst +food +omit +miss +manner +guid +excruciatingli +correct +behaviour +univers +peke +univers +friend +physic +depart +alumni +associ +atmadison +univers +world +littl +grei +cell +pictur +univers +game +entertain +late +show +david +letterman +show +univers +studio +wish +send +postcard +someon +movi +world +movi +review +favorit +hockei +player +steve +francai +dictionnairefrancai +anglai +dictionnair +softwar +relatif +lafrancophoni +test +degrammair +francais +french +lesson +weather +forecast +madison +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..310d8db4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,107 @@ +welcom +zhang +home +page +first +year +graduat +student +depart +hometown +shanghai +peopl +republ +china +educ +student +depart +comput +scienc +univers +wisconsin +madison +comput +scienc +jose +state +univers +jose +california +depart +comput +scienc +technolog +tsinghua +univers +beij +peopl +republ +chinaemail +weiz +wisc +eduwork +experiencecontractor +develop +variou +inform +manag +system +differ +platform +platform +includ +windowsnt +solari +technolog +us +includ +tuxedo +pathwai +softwar +design +tandem +comput +corpor +softwar +engin +sherpa +corpor +system +oper +nasa +am +research +center +hobbiesma +jiangbridg +card +game +tabl +tenni +pingpong +joggingth +ultim +challengesolv +mine +sweeper +expert +level +puzzl +within +second +without +cheat +quot +dayth +best +memori +manag +memori +manag +ackowledgementthi +home +page +written +us +framework +provid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..8f92cac5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,79 @@ +kent +wenger +home +page +welcom +kent +wenger +home +page +note +page +definit +still +construct +preparedfor +pothol +need +pictur +scan +kent +wengerassoci +researchercomput +scienc +departmentunivers +wisconsin +west +dayton +streetmadison +telephon +email +wenger +wisc +edufing +workth +main +project +work +arecod +cluster +data +provid +anddevis +data +explor +andvisu +come +good +acronym +importantpart +project +wouldn +agre +visualizationproduc +devis +softwar +peopl +work +yanni +ioannidi +miron +livnyraghu +ramakrishnanmor +inform +univers +wisconsin +madison +dbm +research +groupuw +madison +comput +scienc +home +pagewiscinfo +madison +home +page +personallinksimageslast +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..5fb2580c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,155 @@ +wisconsin +wind +tunnel +project +home +page +wisconsin +wind +tunnel +projectmost +futur +massiv +parallel +comput +built +fromworkst +like +node +program +high +level +parallellanguag +like +support +share +address +space +whichprocess +uniformli +refer +data +wisconsin +wind +tunnel +project +seek +develop +consensu +aboutth +middl +level +interfac +languag +compil +abovesystem +softwar +hardwar +first +propos +interfac +wascoop +share +memori +evolutionari +extens +toconvent +share +memori +softwar +hardwar +recent +havebeen +work +revolutionari +interfac +call +tempest +tempest +provid +mechan +allow +programm +compil +andprogram +librari +implement +messag +pass +transparentshar +memori +hybrid +combin +developingimplement +tempest +think +machin +cluster +ofworkst +wisconsincow +hypothet +hardwar +platform +approach +cowus +snoop +logic +implement +fpga +sram +collaboratingwith +wisconsin +paradyn +project +adapt +perform +tool +tempest +overviewand +annot +bibliographi +slide +overview +talk +novemb +slide +pageor +four +slide +page +complet +technic +paper +contributor +fund +sourc +origin +project +name +wisconsin +week +articl +paradyn +relat +project +wisconsin +comput +architectur +group +comput +scienc +departmentat +univers +wisconsin +world +wide +comput +architectur +inform +last +updat +juli +mark +hill +markhil +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..fded9407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,94 @@ +xuelin +home +page +felix +charact +creat +otto +messmer +first +base +anim +human +person +first +featur +felix +save +whichwa +shown +famou +star +rival +chaplin +keaton +princ +wale +pick +polo +team +mascot +pictur +accompani +charl +lindbergh +across +theatlant +statu +first +imag +successfulli +transmit +develop +televis +star +televis +seri +somehow +obtain +magic +trick +didn +oneev +seem +agre +whether +teeth +whisker +like +spend +time +make +film +televis +program +appear +newspap +comic +strip +advertis +hundr +product +make +thing +trick +remov +tail +ear +put +back +wish +could +finger +give +account +hairbal +keyboard +finger +keeper +instead +sui +vritabl +chat +pass +partout diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..db2ff6e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,608 @@ +yanni +ioannidisyanni +ioannidi +yanni +wisc +eduresearch +interestsdatabas +manag +system +scientif +databas +user +interfac +andinform +visual +complex +queri +optim +heterogen +databas +research +primarili +focus +area +databas +system +optim +complex +queri +databas +support +scientificdata +futur +databas +applic +pose +sever +challeng +toqueri +optim +complex +queri +ask +significantli +higher +thanin +tradit +system +number +altern +evalu +algorithm +much +highera +well +especi +parallel +attempt +tooptim +sever +valu +time +paramet +parametr +queryoptim +thu +number +altern +access +plan +process +querywil +extrem +larg +current +us +algorithmsfor +find +optimum +among +inadequ +research +investig +random +optim +algorithmsa +viabl +solut +problem +primarili +interest +simul +anneal +genet +algorithm +well +altern +take +advantag +special +propertiesof +queri +optim +also +look +complex +queri +schedul +problem +especiallythos +aris +parallel +multimedia +environ +error +propag +size +cost +estim +complex +queri +alsopart +studi +try +identifi +appropriateinform +must +maintain +databas +system +limit +thepropag +error +primarili +focus +identifi +properti +ofoptim +histogram +approxim +distribut +valu +inrel +attribut +comput +mode +investig +expect +part +manyexperi +variou +scientif +disciplin +futur +databas +gener +need +special +support +mani +aspectsthat +current +technolog +readi +provid +involv +develop +desktop +experi +managementenviron +help +scientist +throughout +life +cycl +theirexperiment +studi +primari +compon +system +databas +system +major +issu +work +address +visual +user +interfac +andsemant +heterogen +former +concentr +identifi +right +metaphor +arefor +repres +complex +databas +schema +queri +object +scientistsso +natur +also +investig +power +dynam +visual +queri +latter +concentr +develop +visual +tool +facilitatetransl +integr +differ +data +format +schema +although +issu +gener +aris +experimentalscientif +disciplin +effort +guid +need +specificproject +associ +particular +simul +basedperform +studi +comput +system +simul +base +model +plantgrowth +spectroscopi +sequenc +microscop +imag +recent +publicationsi +ioannidi +queri +optim +comput +survei +symposium +issueon +anniversari +march +garofalaki +ioannidi +schedul +issu +multimedia +queryoptim +comput +survei +symposium +issu +multimediasystem +decemb +ioannidi +ramakrishnan +contain +conjunct +queri +beyondrel +set +transact +databas +system +tod +septemb +haber +ioannidi +livni +foundat +visual +metaphor +forschema +displai +journal +intellig +inform +system +juli +special +issu +visual +inform +system +ioannidi +tsangari +design +implement +performanceevalu +bermuda +ieee +transact +knowledg +data +engin +tkde +februari +miller +ioannidi +ramakrishnan +translat +integr +ofheterogen +schema +bridg +theori +practic +inform +system +januari +ioannidi +christodoulaki +optim +histogram +limitingworst +case +error +propag +size +join +result +transact +databas +system +tod +decemb +ioannidi +ramakrishnan +winger +transit +closur +algorithmsbas +graph +travers +transact +ondatabas +system +tod +septemb +ioannidi +dynam +inform +visual +sigmod +record +decemb +ioannidi +poosala +histogram +base +solut +divers +databaseestim +problem +ieee +data +engin +septemb +ioannidi +livni +gupta +ponnekanti +desktop +experimentmanag +environ +proc +intern +vldb +confer +bombai +india +septemb +poosala +ioannidi +estim +queri +result +distribut +itsappl +parallel +join +load +balanc +proc +intern +vldbconfer +bombai +india +septemb +anjur +ioannidi +livni +frog +turtl +visual +bridgesbetween +file +object +orient +data +proc +intern +conferenceon +scientif +statist +databas +manag +stockholm +sweden +june +garofalaki +ioannidi +multi +dimension +resourc +schedul +forparallel +queri +proc +intern +sigmod +confer +montreal +canada +poosala +ioannidi +haa +shekita +improv +histogram +forselect +estim +rang +predic +proc +internationalacm +sigmod +confer +montreal +canada +ioannidi +livni +haber +user +orient +visual +layoutat +multipl +granular +proc +intern +workshop +advancedvisu +interfac +gubbio +itali +haber +ioannidi +livni +opossum +desk +schema +managementthrough +customiz +visual +proc +intern +vldbconfer +zurich +switzerland +septemb +ioannidi +poosala +balanc +histogram +optim +practicalityfor +queri +result +size +estim +proc +intern +sigmodconfer +jose +tsatalo +solomon +ioannidi +gmap +versatil +tool +forphys +data +independ +proc +intern +vldbconfer +santiago +chile +septemb +tsatalo +ioannidi +unifi +framework +index +databasesystem +proc +intern +dexa +confer +athen +greec +septemb +ioannidi +lashkari +incomplet +path +express +theirdisambigu +proc +intern +sigmod +confer +minneapoli +haber +ioannidi +livni +opossum +flexibl +schemavisu +edit +tool +proc +confer +boston +april +miller +ioannidi +ramakrishnan +translat +integr +ofheterogen +schema +bridg +theori +practic +proc +intern +edbt +confer +cambridg +england +march +ioannidi +univers +serial +histogram +proc +internationalvldb +confer +dublin +ireland +august +miller +ioannidi +ramakrishnan +inform +capacityin +schema +integr +translat +proc +intern +vldbconfer +dublin +ireland +august +wiener +ioannidi +moos +scientist +withdata +manag +problem +proc +intern +workshop +ondatabas +program +languag +york +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..2f283936 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin +zhongbin +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..98736a68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,85 @@ +yinng +home +pageindexofyinongwei +spagehi +welcom +homepag +pleas +good +look +person +info +especi +employ +give +alsolink +classmat +cours +take +good +time +telephon +work +home +address +offic +comp +stat +bldg +madison +home +univ +person +inforesumehobbiestravel +usathi +collect +pictur +took +travel +articl +wrote +trip +chicago +seattl +cours +pointersr +time +computingmacin +learningpattern +recognitioncomputatin +geometrydatabasevisionacadem +diarythi +diari +everi +month +sometim +amaz +mani +littl +read +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel +linksmi +beida +classmatespek +univers +alumni +home +page +oversea +chines +organ +madison +ciumi +bookmarkcom +press +client +support +send +comment +visitor +number +last +access +last +modifi +yinong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..acbc20a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,52 @@ +matt +home +pageuntil +around +updat +basic +inform +offic +matthew +zeidenbergcent +wisconsin +strategi +observatori +drive +room +madison +voic +home +gilson +madison +email +zeiden +wisc +eduzeidenb +wisc +eduwhen +california +parent +hous +coho +huntington +beach +beauti +convuls +breton +nadja +beaut +sera +convuls +sera +give +food +poor +call +saint +whyth +poor +food +call +communist +helder +camara diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..b45a5712 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,308 @@ +tian +zhang +home +page +tian +zhang +gener +inform +student +research +assistantadvisor +prof +raghu +ramakrishnan +prof +miron +livni +joint +major +concentr +databas +artifici +intellig +compilerminor +financi +invest +bankingoffic +room +comput +scienc +dept +univ +wisconsin +madison +madison +wisconsin +mail +zhang +wisc +eduoffic +telephon +home +telephon +depart +research +intereststher +grow +need +exploratori +analysi +larg +dataset +discov +us +pattern +data +mine +territori +develop +purpos +interest +design +effici +data +mine +algorithm +ortool +larg +databas +integr +techniqu +databas +artifici +intellig +statist +thesi +topic +cluster +densityanalysi +larg +dataset +given +larg +multi +dimension +dataset +limit +amount +resourc +run +time +memori +design +implement +algorithm +effici +accur +identifi +spars +crowd +region +cluster +analysi +estim +densiti +function +overal +data +distribut +densiti +analysi +import +practic +branch +data +mine +appli +mani +domain +dataclassif +imag +compress +pattern +recognit +recent +research +project +birch +effici +data +cluster +densiti +analysi +system +larg +databas +select +public +data +cluster +system +birch +applic +tian +zhang +raghu +ramakrishnan +miron +livni +submit +data +mine +knowledg +discoveri +journal +june +birch +effici +data +cluster +method +larg +databas +tian +zhang +raghu +ramakrishnan +miron +livni +proc +sigmod +conf +data +manag +june +canada +interact +classif +larg +dataset +birch +tian +zhang +raghu +ramakrishnan +miron +livni +proc +workshop +research +issu +data +mine +knowledgediscoveri +cooper +sigmod +june +canada +fast +densiti +probabl +estim +us +kernel +method +larg +databas +miron +livni +raghu +ramakrishnan +tian +zhang +technic +report +juli +motion +plan +multi +joint +robot +topolog +dimensionreduct +method +zhang +ling +zhang +tian +zhang +proc +joint +confer +artifici +intellig +ijcai +findpath +algorithm +manipul +finit +divis +configur +space +zhang +jianwei +zhang +ling +zhang +tian +zhang +robot +manufactur +recent +trend +research +educ +applic +proc +symposium +robot +andmanufactur +research +educ +applic +motion +plan +robot +topolog +dimens +reduct +method +zhang +tian +zhang +jianwei +zhang +ling +zhang +journal +comput +scienc +technolog +find +collis +free +path +mobil +robot +tian +zhang +zhang +proc +symposium +young +comput +profession +beij +relev +link +technic +document +journal +confer +organ +beij +china +interest +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..8abc041b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,90 @@ +yihong +home +page +zhao +yihong +zhao +wisc +research +assist +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +advis +prof +jeff +naughton +research +interest +parallel +object +relat +dbm +line +analyt +process +olap +data +mine +financi +data +dbm +benchmark +educationb +univers +north +carolina +chapel +hillm +madison +fall +research +relat +site +wiscosin +group +sigmod +data +mine +maryland +datamin +microstrategi +rolap +arbor +molap +stock +financi +site +lombard +graph +server +pathfind +server +kiwi +club +server +daili +new +site +pathfind +todai +monei +daili +chines +taiwan +new +search +engin +lyco +excit +yahoo +surf +ters +detail +comment +pgmo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..22befd72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,27 @@ +home +page +still +construct +wang +homepag +offer +inform +home +address +johnson +madison +home +phone +offic +address +dayton +street +madison +offic +phone +email +address +wisc +zhewang +student +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..0bf30ed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,139 @@ +zhichen +home +page +zhichen +depart +comput +scienc +dayton +madison +offic +phone +research +assist +advisor +professor +jame +larusprofessor +barton +millerawardbest +paper +award +intern +confer +supercomput +press +juli +research +interest +area +program +languag +perform +issu +parallel +anddistribut +system +recent +studi +techniqu +detect +eliminateperform +bottleneck +distribut +share +memori +system +combin +paradyn +perform +toolwith +blizzard +wisconsinwind +tunnel +think +machin +andth +cluster +workstat +recent +public +field +interestprogram +languag +environ +tool +parallel +distribut +comput +network +comput +parallel +distribut +oper +system +comput +architectur +perform +evalu +benchmark +place +studi +work +high +perform +comput +softwar +laboratori +univers +texa +antonio +studi +publish +area +ofparallel +perform +predict +model +simul +comput +scienc +departmentat +fudan +univers +particip +sever +nation +project +china +area +softwar +develop +environ +high +levelprogram +languag +object +orient +technolog +andimcrement +compil +techniqu +click +postscript +version +html +version +interest +link +asplo +programjourn +confer +compil +program +languag +researchchines +novel +friend +fudan +java diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..9afd5f7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,29 @@ +zhang +home +page +hello +name +zhang +pictur +taken +invit +supper +theth +restaur +tsinghua +univers +chen +weihai +wang +tong +univers +wisconsin +madison +depart +comput +scienc +west +dayton +street +madison +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..54ce4c39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +krzysztof +zmudzinskikrzysztof +zmudzinskispin +inform +student +inc +pictur +inform +poland +pole +thank +stop +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..acfc3630 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,188 @@ +saluja +kewal +colleg +engineeringunivers +wisconsin +madison +kewal +salujaprofessor +engin +hall +engin +drivemadison +mail +saluja +engr +wisc +eduportrait +jpgdepartmentselectr +comput +engineeringcomput +scienc +educ +univers +iowa +research +interestsdesign +testabl +comput +architectur +data +compress +integr +circuit +vlsi +fault +toler +comput +gener +area +research +interest +test +gener +testableand +reliabl +design +digit +system +carri +research +thisarea +make +extens +vlsi +analysi +tool +theresearch +involv +model +fault +design +digit +circuit +testgener +design +modif +enhanc +testabl +built +inself +test +design +fabric +circuit +test +applic +investig +techniqu +make +test +gener +andfault +simul +process +effici +combin +andsequenti +circuit +data +compress +compact +methodsapplic +design +testabl +built +self +testenviron +investig +area +built +self +test +concentr +regularstructur +programm +logic +arrai +ram +areinvestig +self +test +algorithm +implement +inhardwar +littl +perform +area +penalti +anoth +projectw +investig +wai +built +self +test +hardwar +test +asystem +perform +normal +oper +goal +thatth +system +test +continu +oper +littl +noimpact +system +perform +much +work +perform +us +facil +vlsi +digitalsystem +laboratori +laboratori +hous +number +station +withcolor +monitor +termin +program +design +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +fridai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..6bdd6292 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,213 @@ +duffi +neil +colleg +engineeringunivers +wisconsin +madison +neil +duffi +professor +engin +research +build +engin +drivemadison +mail +duffi +engr +wisc +eduportrait +departmentsmechan +engineeringeducationb +univers +wisconsin +madisonm +univers +wisconsin +madisonphd +univers +wisconsin +madisonresearch +interestsrobot +comput +control +manufactur +system +precis +engr +comput +integr +manufactur +micromechanismscent +consortiamanufactur +system +engin +programwisconsin +center +space +autom +roboticsprofessor +duffi +research +manufactur +system +involv +integr +sensor +actuat +comput +data +base +advanc +autom +product +system +develop +control +self +guid +inspect +machin +weld +robot +high +perform +materi +handl +system +autom +finish +system +mold +product +rework +studi +highli +distribut +hierarch +system +control +architectur +hope +reduc +cost +complex +larg +scale +comput +control +manufactur +system +increas +flexibl +fault +toler +duffi +construct +sever +experiment +manufactur +system +incorpor +real +time +fulli +distribut +schedul +optim +control +system +develop +theori +explain +properti +perform +system +duffi +associ +director +wisconsin +center +space +autom +robot +research +nasa +fund +center +emphas +autom +agricultur +system +sensor +tactil +feedback +human +oper +telerobot +system +method +perform +evalu +well +human +factor +research +sensori +feedback +fatigu +develop +telerobot +test +experiment +work +carri +duffi +work +close +manufactur +aerospac +industri +teach +cours +manufactur +system +automat +control +comput +control +author +comput +control +machin +process +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +tuesdai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..e12db07f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,318 @@ +shapiro +vadim +colleg +engineeringunivers +wisconsin +madison +vadim +shapiro +assist +professor +mechan +engin +univers +avenuemadison +mail +vshapiro +engr +wisc +eduportrait +jpgurl +http +wisc +departmentscomput +sciencemechan +engineeringeducationba +york +universitym +univers +california +angelesm +cornel +universityphd +cornel +univeristyresearch +interestscomput +aid +design +manufactur +appli +comput +geometri +geometr +solid +model +physic +model +analysi +simul +design +product +automationcent +consortiamathemat +comput +engin +graduat +programmanufactur +system +engin +programspati +autom +laboratoryselect +award +honorsn +scienc +foundat +career +award +gener +motor +fellow +select +public +mainten +geometr +represent +space +decomposit +intern +journal +comput +geometri +applic +chain +model +physic +behavior +engin +analysi +design +research +engin +design +april +palmer +real +function +represent +rigid +solid +comput +aid +geometr +design +april +separ +boundari +convers +transact +graphic +januari +vossler +professor +shapiro +research +interest +center +relationship +betweengeometri +physic +phenomena +mechan +artifact +bemodel +repres +analyz +manipul +design +manufacturedbas +comput +represent +algorithm +specif +ongo +project +includ +geometr +model +abil +creat +convert +maintain +consist +ofdistinct +represent +mechan +part +major +technologicalbarri +undermin +us +reliabl +commercialgeometr +model +system +current +research +effort +focu +eliminatingambigu +commun +engin +specif +formal +model +ofparametr +famili +mechan +part +investig +novel +methodsand +comput +techniqu +support +design +manufactur +mechan +design +todai +mechan +form +function +fabric +process +cannot +bedescrib +combinatori +term +discret +simpl +interactingprimit +appar +lack +combinatori +structur +amajor +roadblock +competit +design +manufactur +mechan +system +collabor +industri +present +research +deal +withtheoret +practic +comput +aspect +mechan +designand +seek +establish +formal +basi +make +mechan +design +andmanufactur +part +systemat +competit +smoothintegr +mechan +form +model +engin +activ +physic +model +geometr +model +contain +part +inform +need +captur +thedesir +physic +behavior +artifact +process +us +tomanufactur +recent +studi +algebra +topolog +model +call +chain +model +physic +behavior +suggest +possibl +tounifi +physic +geometr +model +thu +facilit +develop +ofnew +comput +aid +engin +tool +current +investig +theseand +model +physic +behavior +develop +engin +languagesand +comput +algorithm +systemat +specif +model +simul +analysi +physicalobject +system +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +thursdai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..a1ba50e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,144 @@ +autom +theorem +prove +groupautom +theorem +prove +groupth +autom +theorem +prove +group +part +comput +scienc +mathemat +depart +univers +texa +ataustin +produc +method +system +intend +prove +theorem +first +higher +order +logic +intent +appli +systemsand +method +problem +primarili +mathemat +also +computersci +technolog +herei +index +electron +avail +tech +report +site +tech +report +seri +continu +current +techreport +ad +tech +reportseri +present +grouplarri +hinesmarti +mayberrybenjamin +shultsalumniprevi +student +woodi +bledso +previou +student +robert +boyer +incomplet +list +other +relat +groupth +late +woodi +bledso +comput +scienc +faculti +profil +robertboyerj +strother +moorethi +incomplet +list +past +visitor +collaboratorswhat +done +implyth +natur +deduct +proverstrivelarri +hine +first +order +logic +inequ +prover +struvelarri +hine +theori +prover +chou +geometri +proverand +variou +improv +theretoinclud +mcphee +feng +prover +theoryimplement +descript +proof +hein +borel +theoremprecondit +proverbledso +prover +analog +proof +hein +borel +theoremnqthmboy +andmoor +prover +develop +clinc +incomplet +list +iprshult +knowledg +us +prover +mathemat +incomplet +list +relatedlinksdo +feedback +want +inform +contact +benjamin +shult diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..e834f132 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +document +moveddocu +movedthi +document +perman +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..e834f132 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/ps/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +document +moveddocu +movedthi +document +perman +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..aecf508c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,174 @@ +design +analysis +algorithmscse +design +analysis +algorithms +winter +instructor +richard +anderson +anderson +washington +lectures +seig +office +hours +monday +times +appointment +teaching +assistant +william +chan +wchan +washington +office +hours +monday +wednesday +chateau +conference +room +sieg +floor +cubicle +somebody +else +using +conference +room +course +information +prerequisite +going +assuming +already +undergraduate +course +algorithms +wrong +know +soon +possible +lecture +suggested +readingtextbook +errata +list +project +really +project +preview +check +outeric +anderson +sapplet +assignments +handouts +written +homework +sets +generally +tuesdays +class +background +quiz +post +script +homework +sets +homework +solution +homework +solution +homework +solution +homework +solution +homework +solution +homework +homework +solution +homework +homework +midterm +exam +cancelled +lack +interest +final +exam +told +monday +march +probably +verify +time +exam +hour +closed +book +class +exam +covers +material +class +exam +willconsist +short +answer +problem +solving +questions +bureaucratic +stuffgrading +based +upon +homework +exams +project +class +participation +working +together +homework +okay +discuss +homeworkproblems +classmates +must +write +solutions +upindependently +gilligan +island +rule +could +invoked +betweenany +discussion +homework +writing +solution +mustwatch +least +half +hour +gilligan +island +theory +thatan +episode +gilligan +equivalent +reboot +anything +thatsurvives +learned +understood +anderson +washington +eduwchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..70cd78f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,315 @@ +parallel +algorithmscse +parallel +algorithms +spring +general +information +meets +sieg +instructor +richard +anderson +office +hours +appointmente +mail +address +anderson +office +sieg +homework +exams +catalog +description +design +analysis +parallel +algorithms +fundamental +parallel +algorithmsfor +sorting +arithmetic +matrix +graph +problems +additional +selected +topics +emphasis +general +techniques +approaches +used +developingfast +efficient +parallel +algorithms +limitations +theirefficacy +prerequisite +equivalent +majors +homework +assignments +notes +syllabus +homework +thursday +april +homework +plus +rambling +commentsabout +course +thursday +april +lecture +transparencies +april +code +analysisfor +list +ranking +lecture +notes +connected +components +algorithmis +simpler +correcter +section +latex +version +pointers +papers +pointers +referencesfor +erew +crew +connectivity +ullman +yannakakis +paper +homework +tuesday +union +find +paper +homework +thursday +certified +write +paper +implies +existence +efficient +consensus +algorithm +based +upon +swap +although +likelysomething +going +inside +next +supercomputer +homework +thursday +asynchronous +references +martel +focs +buss +manuscript +notes +memory +models +real +description +special +topics +course +content +whim +instructor +descriptive +title +year +course +would +theory +shared +memory +parallel +computing +maybe +topics +theory +smpc +course +start +collection +basic +algorithms +spend +time +models +computation +syllabus +gives +list +topics +could +covered +term +shared +memory +indicate +lookingat +topics +pertain +specific +interconnection +topologies +wewill +consider +situations +cost +memory +access +isnon +uniform +course +theory +course +sense +notconsider +particular +real +machines +prove +theorems +andyou +expected +parallel +machine +however +topics +motivated +practical +considerations +goal +indeveloping +parallel +algorithms +come +algorithmswhich +could +conceivably +efficient +parallel +machines +expecting +three +four +problem +sets +containing +routine +challenging +problems +goingto +require +project +happy +students +outsidework +course +related +topics +text +course +introduction +parallelalgorithms +nice +book +although +befollowing +closely +feeling +exceptionally +cheap +youcould +probably +without +purchasing +copy +original +plan +volunteered +teach +course +year +textwould +theory +shared +memory +parallel +computing +anderson +however +book +progressing +fast +volume +artof +computer +programming +chose +book +instead +going +quite +flexible +course +taught +mychoice +topics +influenced +considered +interestingor +uninteresting +also +choice +teach +course +aseither +traditional +lecture +course +work +researchcontent +number +open +problems +mind +could +turninto +nice +research +results +could +present +half +baked +ideason +provided +others +interest +andenergy +think +anderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..865666be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,34 @@ +home +page +automata +computability +complexity +pages +moved +current +quarter +autumn +autumn +portions +reprinted +adapted +foracademic +nonprofit +purposes +providing +source +accuratelyquoted +duly +credited +copyright +department +computer +science +engineering +university +ofwashington +comments +webmaster +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..e5486cca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,25 @@ +home +page +fall +automataautumn +instructor +paul +beame +welcome +home +page +world +wide +short +hypermedia +documentfor +exams +quiz +postscript +quiz +postscript +final +postscript +latex +beame +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..ab1ccaaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,150 @@ +fall +automata +computability +complexity +larry +ruzzo +fall +tuth +sieg +staffnameemailphoneoffice +hours +instructor +larry +ruzzo +ruzzo +sieg +nitin +sharma +nitin +csmw +sieg +class +mail +last +update +messages +sent +class +mail +list +washington +textbook +errata +handouts +administrivia +homework +midterm +latex +source +course +organization +syllabus +collaboration +midterm +acrobat +course +organization +syllabus +collaboration +midterm +postscript +course +organization +syllabus +collaboration +midterm +file +formats +thecourse +materials +provided +three +formats +latex +plain +ascii +text +including +formating +commands +simple +things +assignments +generally +quite +legible +format +figures +complex +math +stuff +hard +impossible +read +adobe +acrobat +latest +greatest +free +viewer +available +department +unix +systems +acroread +perhaps +aavailable +adobe +acrobat +page +postscript +ghostview +ghostscript +home +page +free +viewers +windows +linux +time +acrobat +supported +fewer +systems +ispreferable +files +smaller +rendering +isfaster +legible +print +ghostscriptcan +example +course +webs +autumn +autumn +portions +reprinted +adapted +foracademic +nonprofit +purposes +providing +source +accuratelyquoted +duly +credited +copyright +department +computer +science +engineering +university +ofwashington +comments +webmaster +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..ea0071c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,142 @@ +topics +complexity +autumn +propositional +theorem +proving +satisfiability +testing +proof +complexity +general +information +instructor +paul +beame +meeting +times +tuesday +thursday +loew +automated +theorem +proving +computer +aided +verification +vlsi +andsoftware +engineering +give +algorithms +attempt +decide +truthof +logical +statements +propositional +first +higher +order +logic +course +concentrate +complexity +issues +propositional +caseas +well +flip +side +satisfiability +testing +even +uses +oftheorem +proving +first +order +higher +order +logic +often +involve +finitedomains +proofs +interpreted +propositional +logic +anyway +consider +variety +systems +propositional +theoremproving +satisfiability +testing +issues +systems +complex +proofs +within +system +good +choices +search +strategies +considerable +theoretical +practical +work +thesequestions +concentrate +theoretical +issues +proof +complexityand +relative +complexity +search +strategies +also +examine +anumber +implementations +propositional +logic +algorithms +compare +theoryand +practice +papers +things +urquhart +complexity +proofs +survey +talk +slides +installed +software +amusement +installed +theorem +provers +sato +andboyer +moore +well +satisfiability +tester +gsat +june +thedirectory +courses +proversthere +papers +scattered +well +process +ofinstalling +theorem +provers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..c73932a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,41 @@ +home +pagecse +computer +systemperformance +modelingspring +hosts +lazowskaandmaryvernonwelcome +home +page +computer +system +performancemodeling +meets +mondays +wednesdays +fridays +loew +hall +office +hourstentative +topic +schedulecomings +goingsassignmentsproject +informationmap +queueing +network +solution +package +emailother +information +available +sigmetrics +conference +measurement +modeling +computersystemsuw +department +computer +science +engineeringlazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..5d7dcd0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,104 @@ +home +page +computer +systems +architecturewinter +instructorsusan +eggers +eggers +washington +sieg +office +hours +tuth +tajoshua +redstone +redstone +washington +sieg +office +hours +sieg +course +information +course +overview +postscript +schedule +continuously +updated +postscript +lecture +notes +problem +sets +previous +tests +architecture +history +postscript +specmark +ratings +postscript +information +tools +shade +instuction +simulator +sparc +atom +system +building +analysis +tools +alpha +tullsen +simulator +execution +driven +instruction +level +simulator +simulates +superscalar +architecture +close +etch +binary +rewriter +analyze +pentium +code +alpha +hardware +monitors +multiflow +compiler +alpha +pixie +user +manual +postscript +dinero +uniprocessor +cache +simulator +local +machines +alphas +pentiums +powerpcs +sparcs +applications +multiprocessor +uniprocessor +spec +benchmarks +neat +pages +info +center +info +current +future +processors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..a87d2a74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,62 @@ +home +pagecse +operating +systems +instructor +hank +levy +spring +frederic +pighin +meeting +times +instructor +office +hours +office +hours +chateau +conference +room +number +units +welcome +home +page +world +wide +short +hypermedia +document +forcse +contains +information +class +keep +mind +thisdocument +static +information +especially +classmessages +added +frequently +problems +thisdocument +send +mail +pighin +announcements +april +first +assignment +ready +itis +april +course +information +course +mail +assignments +projectlevy +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..abd3c816 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,108 @@ +home +pagecse +computer +graphicsautumn +quarter +welcome +home +page +world +wide +hypermedia +document +whichcontains +wealth +information +class +keep +mind +thatthis +document +static +information +addedfrequently +problems +document +send +mail +derose +click +help +available +information +professor +course +syllabus +lecture +notes +written +homework +assignments +solutions +last +year +project +handouts +project +grading +policy +tests +cool +images +last +year +additional +information +getting +class +instructional +using +indys +mvis +home +page +visitor +room +scheduling +computer +science +engineering +department +computer +science +degree +program +offered +college +arts +sciences +computer +engineering +degree +program +offered +college +engineering +mosaic +help +help +available +following +topics +basic +information +mosaic +information +hypertext +markup +language +html +uniform +resource +locators +read +home +usinglynx +character +based +browser diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..914ed39f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,176 @@ +home +page +principles +digital +systems +design +carl +ebeling +fall +welcome +home +page +course +information +time +place +loew +important +announcements +summary +syllabus +text +books +staff +carl +ebeling +instructor +ebeling +office +hours +wednesday +friday +sieg +paul +franklin +paul +office +hours +monday +thursday +sieg +hines +hineskj +office +hours +tuesday +wednesday +friday +sieg +larry +mcmurchie +research +staff +tools +guru +larry +documentation +simulation +synthesis +design +pamette +board +mostly +complete +still +construction +students +working +groupsfinal +exam +review +topics +covered +quarter +homework +assignments +note +homework +homework +friday +beginning +class +homework +handed +class +beginning +class +homework +monday +beginning +class +homework +wednesday +beginning +class +homework +friday +beginning +class +homework +monday +beginning +class +homework +friday +beginning +class +handouts +combinational +logic +combinational +logic +sequential +logic +sequential +logic +fpgas +fpgas +memories +communication +information +department +computer +science +engineering +home +page +mother +site +list +vlsi +links +comprehensivelist +icmanufactures +murphy +recent +dilbert +comics +national +semiconductor +data +sheets +motorola +data +book +server +philips +semiconducter +data +book +server +micron +technology +data +sheets +copyright +department +computer +science +engineering +university +washington +portions +reprinted +adapted +academic +nonprofit +purposes +providing +source +accurately +quoted +duly +credited +ebeling +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..d5e8b4dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,56 @@ +index +pagecse +artificial +intelligencefall +quarter +artificial +intelligence +poses +fundamental +andchallenging +questions +computer +science +build +intelligentmachines +course +addresses +questions +providing +anin +depth +introduction +selected +topics +including +agentarchitectures +knowledge +representation +search +planning +machinelearning +reasoning +uncertainty +methodology +staff +weldweld +sieg +hours +marc +friedmanfriedman +sieg +hours +nick +kushmericknick +sieg +hours +outline +topicsreading +assignmentsassignments +examsgradingresourcesthe +class +mailinglist +also +archive +past +messages diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..f89139ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,141 @@ +uncertainty +decision +making +uncertainty +decision +making +artificial +intelligence +winter +professor +steve +hanks +hanks +office +sieg +office +hours +whenever +around +appointment +email +addresses +mail +washington +goes +class +members +send +mail +request +washington +list +reading +material +pearl +probabilistic +reasoning +intelligent +systemsthis +required +text +class +read +several +chapters +probably +without +buying +strappedfor +cash +though +nice +reference +book +shafer +pearl +readings +uncertain +reasoningthis +nice +collection +foundational +papers +reasoning +uncertainty +read +several +selections +copy +available +grail +library +jaynes +probability +theory +logic +science +fragmentary +edition +july +extremely +interesting +technical +historical +look +foundationsof +probability +theory +statistics +decision +theory +definitely +worth +looking +reference +list +historical +perspective +alone +math +heavy +going +places +beautifully +written +neapolitan +probabilistic +reasoning +expert +systems +theory +algorithmsa +significant +overlap +pearl +book +good +secondary +source +information +graphical +models +propagationalgorithms +available +math +research +library +papers +arranged +course +summary +summary +topics +covered +readings +html +postscript +hanks +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..3e4d1371 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,425 @@ +home +pagecse +image +understandingwelcome +home +page +world +wide +short +hypermedia +documentfor +contains +information +theclass +keep +mind +document +static +newinformation +especially +class +messages +added +frequently +problems +document +send +mail +mock +copyright +notice +material +course +subjectto +copyright +viewed +public +installed +site +otherthan +university +washington +assignments +first +assignment +read +chapter +course +notes +doexercise +second +assignment +wednesday +april +read +chapter +course +notes +exercises +next +determine +convenient +torun +khoros +cantata +runs +suns +aslillith +edit +local +workstation +login +file +containxhost +lilliththen +cshrc +file +suns +following +setenv +khoros_home +local +khoros +setenv +manpath +local +khoros +path +khoros_home +path +rlogin +onto +lillith +rhost +assignmentsand +display +environment +variable +appropriately +typecantata +unix +prompt +machine +george +haskhoros +cantata +installed +also +course +home +page +wwwhttp +washington +education +courses +index +htmland +follow +links +khoros +cantata +tutorial +itscourse +outline +experiments +least +first +twotopics +image +information +spatial +resolution +ideal +take +tutorial +read +pagesand +experiment +khoros +another +window +nothing +turn +part +assignment +third +assignment +read +article +huertas +andnevatia +cvpr +proceedings +also +tolook +article +wolff +fourth +assignment +monday +april +assignment +make +comparison +three +image +processing +software +environments +khoros +msvc +fast +operations +level +learning +effort +required +part +announcements +final +examination +monday +june +pmin +regular +class +meeting +room +exam +cover +combinationof +midterm +post +midterm +material +list +topics +study +reminder +class +approved +sunday +june +time +final +exam +review +meeting +plan +meet +insieg +outlines +selected +class +periods +available +friday +april +monday +wednesday +monday +friday +monday +wednesday +friday +monday +wednesday +copies +overhead +transparencies +lectures +onneural +nets +available +engineering +library +copy +center +floor +packet +number +trainable +classifiers +friday +students +permitted +temporary +copy +ofmatlab +course +requirethat +fill +form +sign +contract +know +interested +term +projects +important +part +ofthe +course +started +week +april +corrections +course +notes +friday +april +introduced +pentium +laboratory +includingthe +msvc +software +development +environment +evan +mclain +documentexplaining +transform +image +application +current +status +khoros +installation +suns +contains +recent +information +getting +started +withkhoros +accompanying +course +please +read +class +friday +march +monday +april +students +already +computer +account +accesskhoros +cantata +contact +renee +reed +onthursday +friday +make +arrangement +pick +youraccount +login +name +password +itover +weekend +early +next +week +renee +hours +arelimited +plan +ahead +email +address +reed +andshe +part +sieg +back +office +sieg +kept +locked +either +need +knock +orhave +prior +arrangement +meet +many +account +application +cards +signed +willhave +take +care +next +week +selected +lecture +slides +monday +march +wednesday +march +information +resources +image +understanding +online +intro +image +processing +khoros +cantata +delft +univ +pattern +recognition +information +page +computer +vision +home +page +stored +information +home +page +thedepartment +computer +science +engineering +online +version +computer +science +undergraduate +brochure +online +version +computer +engineering +undergraduate +brochuremosaic +help +running +mosaic +find +help +itemsunder +balloon +help +menu +macmosaic +home +page +itemunder +navigate +menu +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..70f9425f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,257 @@ +home +pagecse +parallel +computation +image +processingwelcome +home +page +world +wide +short +hypermedia +documentfor +contains +information +theclass +keep +mind +document +static +newinformation +added +time +time +schedule +informationon +october +november +class +begin +half +hourearlier +normal +start +days +november +meet +guest +speaker +prof +nian +simon +fraser +university +burnaby +canada +tuesday +november +class +november +bharath +modayur +guest +speaker +title +presentation +efficient +parallel +object +recognition +simd +mimd +machines +tuesday +november +class +begin +topics +completion +discussion +pyramid +algorithms +scale +invariantoperators +algorithms +segmentation +hierarchicalrelaxation +using +isodata +approach +burt +hong +rosenfeld +introduction +embedding +virtual +processing +overview +neural +network +architectures +algorithms +tuesday +november +class +begin +topics +completion +overview +neural +network +architectures +embedding +neural +networks +meshes +pyramids +brief +treatment +iconic +symbolic +computation +thursday +november +class +begin +topic +parallel +image +analysis +digital +libraries +demo +schedule +finding +term +project +topicsduring +week +october +students +activelyexploring +topics +term +project +written +descriptions +topics +handed +inon +tuesday +october +template +writeupsis +available +resourcespvm +parallel +virtualmachine +software +layear +permits +user +program +avirual +machine +made +heterogeneous +collection +moreworkstations +convenient +implement +studydistributed +algorithms +intel +technicalpublications +include +documentation +intel +paragonparallel +computer +system +language +good +language +implement +array +oriented +algorithms +intel +paragon +variousvendors +supercomputers +parallel +machines +info +onthe +maspar +national +supercomputer +center +sweden +online +information +maspar +theuniversity +tennessee +resource +found +neal +friedman +reports +also +paragon +documents +errors +correctionsto +course +notes +copyright +notice +material +course +subjectto +copyright +viewed +public +installed +site +otherthan +university +washington +term +projects +important +part +ofthe +course +started +week +october +review +session +final +schedule +friday +december +sieg +hall +final +exam +scheduled +wednesday +december +normal +class +meeting +room +exam +closed +book +term +projects +thursday +december +last +update +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..396d1b9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,91 @@ +graphics +seminarc +rspring +numerical +methods +graphics +april +matrix +computations +intro +definitions +properties +inversion +brad +solving +linear +systems +eric +april +matrix +computations +eigenvalues +eigenvectors +singular +value +decomposition +joel +april +root +finding +nonlinear +equations +corey +shuichi +april +optimization +intro +unconstrained +optimization +kari +constrained +global +optimization +kevin +linear +quadratic +programming +linear +programming +chuck +ronen +linear +quadratic +programming +examples +daniel +data +fitting +intro +conclusion +mike +linear +regression +calibration +example +brad +ordinary +differential +equations +intro +methods +papers +adam +methods +papers +joanna +discretization +methods +finite +elements +radiosity +fred +pdes +finite +differences +interval +arithmetic +troy +jonathan +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..b92053cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,56 @@ +quarterscse +special +topicssteven +tanimoto +instructorcse +autumn +transcript +based +education +winter +mathematics +experiences +image +processing +spring +mathematics +experiences +image +processing +autumn +technology +collaborative +learning +copyright +notice +material +course +subjectto +copyright +viewed +public +installed +site +otherthan +university +washington +graduate +seminar +explores +variety +topics +related +useof +computers +education +specific +topics +activities +varyfrom +quarter +quarter +last +update +september +tanimoto +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..995ab8d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,225 @@ +home +page +autumn +autumn +transcript +based +education +wwwwelcome +home +page +world +wide +short +hypermedia +documentfor +contains +information +theclass +keep +mind +document +static +newinformation +added +time +time +copyright +notice +material +course +subjectto +copyright +viewed +public +installed +site +otherthan +university +washington +reading +october +mccalla +central +importanceof +student +modelling +intelligent +tutoring +reading +october +presented +sandi +youngquist +meeting +october +discussion +paul +barton +davis +aboutinternet +services +reading +october +laborde +laborde +problem +solving +geometry +microworlds +tointelligent +computer +environments +presented +tessa +reading +november +bartels +promoting +mathematicsconnections +concept +mapping +plus +presentation +gary +anderson +meeting +november +reading +november +readings +meeting +online +first +paper +combines +degree +vision +little +degree +technology +nothing +particularly +ambitious +description +state +second +paper +technical +piece +promotes +thetheme +learners +taking +responsibility +education +something +increasingly +important +future +choices +third +reading +papers +moresophisticated +either +first +paper +beyond +browsing +elaborates +possibility +group +annotation +ofwww +materials +paper +toolkit +describes +intechnical +terms +layer +internet +infrastructure +couldmake +possible +smart +distributed +tutorial +applicationsthat +mosaic +netscape +achieve +please +read +either +options +advanced +educational +uses +world +wide +webhttp +proceedings +papers +paper +html +presentor +jeremy +baer +empowering +students +information +agehttp +ncsa +uiuc +proceedings +educ +ward +ward +html +presentor +marla +baker +either +beyond +browsing +shared +comments +soaps +trails +line +communitieshttp +proceedings +papers +html +presentor +john +dietz +toolkit +enhancing +protocols +lower +layer +serviceshttp +proceedings +papers +dcewebkit +html +presentor +adam +carlson +presentor +paper +concept +mapping +hong +zhumeeting +november +discussion +michael +aboutcurriculum +navigator +last +update +november diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..388181f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,95 @@ +home +page +autumn +autumn +technology +collaborative +learningwelcome +home +page +copyright +notice +material +course +subjectto +copyright +viewed +public +installed +site +otherthan +university +washington +general +description +autumn +computer +technology +internet +methodologiesfor +teaching +learning +currently +coming +together +innew +ways +seminar +explore +read +number +papers +technology +forcollaborative +learning +participating +student +willtake +responsibility +making +presentation +group +ofthese +papers +cover +subset +papers +also +explore +possible +applications +ofai +visualization +techniques +analysis +evidence +ofstudent +learning +online +contexts +meetings +currently +scheduled +tuesdays +however +decide +move +time +better +intopeople +schedules +visits +meany +middle +school +washington +middle +schoolmay +scheduled +depending +interests +participatingstudents +last +update +september +tanimoto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..9f1255cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,353 @@ +mvmv +global +resource +management +distributed +systemsprofessor +mary +vernontime +pmlocation +processor +allocation +gang +scheduling +nows +ousterhout +scheduling +techniques +concurrent +systems +inrd +conf +distributed +systems +pages +arpaci +dusseau +vahdat +anderson +patterson +interaction +parallel +andsequential +workloads +network +workstations +proc +ofthe +sigmetrics +conf +processor +allocation +dynamic +equi +partitioning +nguyen +tucker +gupta +process +control +scheduling +issuesfor +multiprogrammed +shared +memory +multiprocessors +proc +symp +operating +system +principles +pages +nguyen +vaswani +zahorjan +using +runtime +measured +workloadcharacteristics +parallel +processor +scheduling +univ +ofwashington +technical +report +tutorial +applications +processors +shun +leung +evangelos +markatos +thomas +leblanc +using +processor +affinity +loopscheduling +shared +memory +multiprocessors +proc +supercomputing +expanded +version +inieee +trans +parallel +distributed +systems +hans +zima +barbara +mary +chapman +compiling +distributed +memory +systems +proc +ieee +edjlali +agrawal +sussman +saltz +data +parallelprogramming +adaptive +environment +proc +parallel +processing +symp +santa +barbara +april +tutorial +processor +allocation +policy +comparisonsshikharesh +majumdar +derek +eager +richard +bunt +scheduling +multiprogrammed +parallel +systems +proc +sigmetrics +conference +measurement +modeling +ofcomputer +systems +santa +eric +parsons +kenneth +sevcik +multiprocessor +scheduling +high +variabilityservice +time +distributions +proc +ipps +workshop +scheduling +strategies +parallel +systems +santa +barbara +dror +feitelson +bill +nitzberg +characteristics +production +parallel +scientific +workload +thenasa +ames +ipsc +proc +ipps +workshop +scheduling +strategies +parallel +systems +santa +barbara +following +also +covered +required +reading +leutenegger +vernon +performance +multiprogrammed +multiprocessor +scheduling +policies +proc +sigmetrics +conf +measurement +modeling +ofcomputer +systems +mccann +vaswani +zahorjan +dynamic +processor +allocation +policy +multiprogrammed +shared +memorymultiprocessors +transactions +computer +systems +processor +allocation +demand +based +scheduling +patrick +sobalvarro +william +weihl +demand +based +coscheduling +ofparallel +jobs +multiprogrammed +multiprocessors +proc +ipps +workshop +scheduling +strategies +parallel +systems +santa +barbara +impact +paging +page +migration +burger +hyder +miller +wood +paging +tradeoffs +distributed +shared +memorymultiprocessors +proc +supercomputing +rohit +chandra +scott +devine +verghese +anoop +gupta +mendel +rosenblum +scheduling +page +migration +multiprocessorcompute +servers +proc +conf +architectural +support +programminglanguages +operating +systems +asplos +jose +coordinated +scheduling +processors +memory +alverson +kahan +korry +mccann +smith +scheduling +tera +proc +ipps +workshop +scheduling +strategies +parallel +systems +santa +barbara +eric +parsons +kenneth +sevcik +coordinated +allocation +memory +processors +multiprocessors +october +demand +based +scheduling +discussion +open +problems +dusseau +arpaci +culler +effectivedistributed +scheduling +parallel +workloads +proc +sigmetrics +conf +measurement +modeling +computersystems +philadelphia +june +appear +feitelson +rudolph +coscheduling +based +runtime +identification +activity +working +sets +parallel +programming +theoretical +results +processor +memory +allocation +karlin +papers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..6c22ea23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,91 @@ +winter +winter +algorithms +molecular +biology +richard +karp +larry +ruzzo +martin +tompaclass +bboard +last +update +handouts +administrative +lecture +notes +drafts +homework +html +syllabus +schedule +acrobat +title +syllabus +schedule +postscript +title +syllabus +schedule +slides +file +formats +course +materials +provided +several +formats +html +usual +format +loads +fast +usually +readable +many +parts +generated +automatic +translations +latex +translation +faithful +format +original +adobe +acrobat +latest +greatest +free +viewer +adobe +acrobat +page +postscript +ghostscript +home +page +free +viewers +windows +linux +time +acrobat +supported +fewer +systems +ispreferable +files +smaller +rendering +isfaster +legible +print +ghostscriptcan +example +ruzzo +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..827e7808 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,236 @@ +architecture +lunchcse +architecture +lunchcourse +organizer +jean +loupbaermeeting +time +tuesdays +cseg +architecture +lunch +continue +quarter +withalmost +format +previous +years +selection +papers +discussedat +beginning +quarter +distribution +week +ofthe +paper +tobe +read +week +discussed +week +might +formal +presentations +work +progress +byesteemed +members +lunch +mostlywe +hopefully +heated +discussionson +papers +literature +difference +quarter +previous +quartersis +start +reading +position +papers +fromparticipants +recent +workshop +oncritical +issues +computer +architecture +research +copy +herereading +positions +papers +lead +morethemes +papers +read +quarter +many +thanks +ruth +anderson +molly +brown +gershony +matthai +philipose +tabular +summary +ofthe +gurus +positions +found +herefor +usual +format +thestudents +lead +discussion +papers +either +informallyor +slides +credit +course +variable +credits +ifyou +present +read +first +meeting +organization +meeting +tuesday +october +tuesday +read +value +locality +load +value +prediction +lipasti +wilkerson +shen +asplos +asplos +papers +line +follow +links +asplos +advanceprogrami +short +bibliography +processor +memory +line +appreciate +volunteers +thesaulsbury +burger +machine +papers +tuesday +readashley +saulsbury +fong +pong +andreas +nowatzyk +missing +memory +wall +case +processor +memory +integration +isca +tuesday +readm +fillo +keckler +dally +machine +multicomputer +micro +available +follow +machinelink +tuesday +readdoug +burger +stefanos +kaxiras +james +goodman +datascalar +architectures +spsd +execution +model +university +wisconsin +madison +computer +sciences +department +technical +report +july +available +neton +tuesday +read +intelligent +iram +chips +remember +compute +patterson +anderson +cardwell +fromm +keeton +kozyrakis +thomasand +yelick +paper +availablehere +fortunate +authors +prof +anderson +present +paper +subscribe +mailing +list +send +email +themajordomo +mailing +list +majordomo +mail +contents +shouldinclude +line +subscribe +cseg +leave +subject +lineblank +shortly +receive +message +back +saying +welcome +baer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..235e7dd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +home +pagecse +home +page +spring +offering +experimental +graduate +course +human +computer +interaction +borning +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..cf3c688e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,151 @@ +compiler +seminarcse +compiler +seminarcourse +organizers +susan +eggersand +craigchambersmeeting +time +wednesdays +officially +loew +butreally +meet +second +floor +atrium +scheduleweek +memspy +analyzing +memory +system +bottlenecks +programs +margaretmartonosi +anoop +gupta +thomas +anderson +anderson +week +general +approach +time +specialization +application +charles +consel +francois +noel +week +practical +data +flow +framework +array +reference +analysis +itsuse +optimizations +evelyn +duesterwald +rajiv +gupta +marylou +soffa +week +value +dependence +graphs +representation +without +taxation +danielweise +roger +crew +michael +ernst +bjarne +steensgaard +litvinov +week +iterated +register +coalescing +george +andrew +appel +garrett +week +generating +machine +specific +optimizing +compilers +roger +hooverand +kenneth +zadeck +dean +grant +week +paradigm +compiler +distributed +memory +multicomputers +byprivthviraj +banerjee +lewis +week +minimum +cost +interprocedural +register +allocation +stevenkurlander +charles +fischer +secosky +week +data +specialization +todd +knoblock +erik +grove +week +lazy +strength +reduction +jens +knoop +oliver +ruthing +andbernhard +steffen +mock +tullsen +subscribe +mailing +list +send +email +majordomo +mail +contents +include +line +subscribecsek +leave +subject +line +blank +shortlyreceive +message +back +saying +welcome +melody +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..a5b233dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,317 @@ +parallel +programming +environmentslarry +snyderautumn +quarter +mondays +loew +welcome +home +page +quarter +reading +selected +papers +recent +ipps +ppopp +supercomputing +icpp +lcpc +tenative +schedule +quarter +atmosphere +casual +andwill +hopefully +ignite +lively +discussion +everyone +attending +seminar +expected +present +thepapers +still +spots +open +hurry +sign +please +send +mail +majordomo +subscribe +cseo +body +message +subscribeto +class +mailing +list +datepaperpresentor +compiling +matlab +programs +scalapack +exploiting +task +data +parallelism +ramaswamy +hodges +banerjee +ipps +falcon +matlab +interactive +restructuring +compiler +derose +gallivan +gallopoulos +marsolf +padua +lcpc +compiling +portable +message +driven +programs +ramkumar +forbes +kale +icpp +sung +cross +loop +reuse +analysis +application +cache +optimizations +cooper +kennedy +mcintosh +lcpc +ruth +global +communication +analysis +optimization +chakarabarti +gupta +choi +pldi +sean +integerated +compilation +performance +analysis +environment +data +parallel +programs +adve +input +output +characteristics +scalable +parallel +applications +crandall +aydt +chien +reed +jason +holiday +streams +library +complex +distributed +data +structures +gotwalls +srinivas +gannon +ppopp +brad +model +compilation +strategy +core +data +parallel +programs +bordawekar +choudahary +kennedy +koelbel +paleczny +ppopp +local +iteration +computation +block +cyclic +distributions +midkiff +icpp +utility +threads +data +parallel +programming +fahringer +haines +mehrotra +eric +cilk +efficient +multithreaded +runtime +system +blumofe +joerg +kuszmaul +leiserson +randall +zhou +ppopp +compiling +generating +parallel +code +object +oriented +mathematical +models +andersson +fritzson +ppopp +analysis +cross +loop +reuse +analysis +application +cache +optimizations +cooper +kennedy +mcintosh +lcpc +communication +optimizations +global +communication +analysis +optimization +chakarabarti +gupta +choi +pldi +generating +realignment +based +communication +programs +kamachi +kusano +suehiro +tamura +sakon +ipps +communication +optimizations +parallel +computing +using +data +access +information +rinard +tools +integerated +compilation +performance +analysis +environment +data +parallel +programs +adve +relative +debugging +application +development +large +numerical +models +abramson +foster +michalakes +sosic +parallel +model +compilation +strategy +core +data +parallel +programs +bordawekar +choudahary +kennedy +koelbel +paleczny +ppopp +input +output +characteristics +scalable +parallel +applications +crandall +aydt +chien +reed +data +distribution +local +iteration +computation +block +cyclic +distributions +midkiff +icpp +potpourri +compiling +matlab +programs +scalapack +exploiting +task +data +parallelism +ramaswamy +hodges +banerjee +ipps +utility +threads +data +parallel +programming +fahringer +haines +mehrotra +sung +choi +last +modified +tuesday +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..a99ad81c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,33 @@ +systems +seminar +preliminariesif +already +systems +mailing +list +need +various +crucial +bits +ofinformation +week +seminar +cancelled +besent +list +send +mail +systems +request +line +subscribe +systemsin +message +body +quarterly +webs +spring +summer +autumn +winter +autumn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..f85c35ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,89 @@ +summer +quarterwe +meet +fridays +loew +quarter +wewill +read +final +papers +appear +upcomingacm +symposium +operating +systems +principles +sosp +please +read +papers +meeting +havean +interactive +discussion +quarter +scheduleoct +implementing +global +memory +management +workstation +cluster +presenter +feeley +logged +virtual +memory +presenters +savage +autoraid +hierarchical +storage +system +presenter +wilkes +serverless +network +file +systems +presenters +franklin +montgomery +tiwary +hypervisor +based +fault +tolerance +presenters +chan +philipose +wolman +exploiting +weak +connectivity +mobile +file +access +presenters +voelker +litvinov +performance +cache +coherence +stackable +filing +presenters +sriram +fiuczynski +impact +architectural +trends +operating +system +performance +presenters +anderson +romer +return +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..12594714 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,219 @@ +high +performance +scientific +computing +zphigh +performance +scientific +computing +zpllarry +snyder +teamautumn +quarter +wednesday +sieg +loew +days +welcome +home +page +please +send +mail +majordomo +subscribe +csezpl +body +message +subscribeto +class +mailing +list +students +also +interested +joining +usersmailing +list +mailing +list +used +distribute +informationabout +compiler +libraray +releases +relatedinformation +added +mailing +list +send +mail +majordomo +subscribe +users +body +message +descriptionzpl +scientificprogramming +language +suitable +computations +previously +written +infortran +array +language +dramaticallysimplifies +programming +eliminating +nuisance +looping +indexing +runs +fast +modern +machines +including +parallel +supercomputers +allowing +programmers +develop +code +workstations +andtrivially +migrate +largest +parallel +machines +simply +byrecompiling +developed +released +tothe +scientific +computing +community +class +designed +scientists +engineers +computer +scientists +want +learn +modern +languages +supercomputers +effectively +scientific +computation +class +cover +following +topics +state +high +performance +computing +syntax +semantics +algorithms +exploiting +high +performance +parallel +machines +wysiwyg +performance +writing +fast +programs +easily +developing +programs +workstation +supercomputer +well +program +perform +science +faster +program +prototyping +scientific +computations +matlab +text +booknone +class +rely +materials +documents +found +onin +pages +specifically +follow +closely +zplprogramming +guide +version +prerequisitesfamiliarity +scientific +computation +fortran +ormatlab +programming +unix +platform +assumed +class +variable +credit +audit +students +write +debug +program +selected +technical +discipline +suitable +computations +range +whole +applications +kernels +inner +loops +scientific +computation +useful +informationcourse +syllabus +including +lecture +notes +apply +ncsa +block +grant +account +faculty +staff +students +compile +programs +using +remotezpl +compilerother +important +links +sung +choi +last +modified +wednesday +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..81bc9a9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,185 @@ +home +page +autumn +introduction +digital +design +autumn +quarter +gaetano +borriello +corey +andersonwelcome +home +page +home +page +contains +whole +bunch +useful +information +class +keep +mind +document +static +information +especially +class +announcements +messages +added +frequently +problems +document +general +send +mail +tocse +webmaster +class +announcements +notices +instructor +system +administrators +last +update +class +mail +archive +messagessent +washington +last +update +send +mail +class +instructor +instructor +everyone +course +administration +goals +syllabus +meeting +times +lectures +final +exam +monday +december +workload +grading +expectations +laboratory +software +tools +policies +collaboration +cheating +announcements +mail +addresses +overall +schedule +lecture +topics +instructor +gaetano +borriello +gaetano +office +hours +sieg +corey +anderson +corin +office +hours +sieg +aweekly +assignments +weekly +quizzes +final +exam +lectures +online +versions +slides +used +lectures +textbook +contemporary +logic +design +katz +benjamin +cummings +addison +wesley +maintained +author +katz +maintained +publisher +benjamin +cummings +addison +wesley +notes +topics +interest +evolution +implementation +technologies +computer +aided +design +tools +logic +design +synario +feedback +tell +think +things +going +even +anonymously +desire +questions +course +evaluation +completed +last +class +links +previous +quarters +portions +reprinted +adapted +academic +nonprofit +purposes +providing +source +accurately +quoted +duly +credited +copyright +department +computer +science +engineering +university +washington +comments +webmaster +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..83f0e558 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,41 @@ +computers +societycse +computers +societywelcome +home +page +computers +society +course +wintercse +computers +society +focus +social +economic +ethical +legal +implications +present +internet +future +national +andglobal +information +highway +instructor +alan +borning +class +times +tues +thurs +sieg +course +syllabusclass +schedulelinks +relevant +sitesbooks +journals +available +referenceassignmentsassignment diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..6620aa65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,56 @@ +index +pagecse +artificial +intelligencefall +quarter +artificial +intelligence +poses +fundamental +andchallenging +questions +computer +science +build +intelligentmachines +course +addresses +questions +providing +anin +depth +introduction +selected +topics +including +agentarchitectures +knowledge +representation +search +planning +machinelearning +reasoning +uncertainty +methodology +staff +weldweld +sieg +hours +marc +friedmanfriedman +sieg +hours +nick +kushmericknick +sieg +hours +outline +topicsprojectreading +assignmentsassignments +examsgradingresourcesthe +class +mailinglist +also +archive +past +messages diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..accc2535 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,191 @@ +icse +intelligent +information +internet +servers +meets +tuesdays +siegcreating +server +side +scriptsplease +read +guidelines +towrite +programs +executed +someone +follows +link +tothem +people +places +collection +mailing +list +generated +paul +program +check +future +begun +update +file +remember +want +check +first +using +index +html +check +back +index +html +added +phoenix +impressed +older +topics +official +release +glimpse +installed +check +manual +pages +well +glimpse +developer +home +page +interested +details +glimpse +works +read +winter +usenix +paper +design +implementation +glimpse +information +wide +area +information +server +wais +zwhere +mosiac +interface +zephyr +location +database +shows +users +currently +registered +zephyr +makes +guess +room +shows +registered +zephyr +server +another +version +znol +zwatch +zlocate +extra +info +links +except +shows +status +registered +zephyr +users +ones +anyone +file +notes +lectures +discussions +mail +sent +mailing +list +displaying +belief +index +page +short +mike +relegates +reviews +sites +comments +makes +good +page +another +page +miscellaneous +comments +rather +ramble +kurt +grumbles +interface +problems +improvements +mosaic +bring +class +paul +provides +luddite +perspective +idea +intelligent +filter +network +information +sources +nick +provides +vaguely +related +comments +december +cacm +information +filtering +check +summary +article +remember +want +change +document +check +itout +first +using +index +html +check +back +withci +index +html +send +mail +theentire +class +using +address +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..facba4a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,37 @@ +home +page +machine +organization +programming +credits +introduction +current +system +structures +control +communication +memories +processors +devices +projects +involve +detailed +study +specific +small +computerhardware +software +system +prerequisites +consent +instructor +open +students +taken +open +freshmen +semesterly +course +information +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..2d39b1db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,31 @@ +page +introduction +computer +architecture +credits +design +computer +systems +components +processor +design +instruction +design +addressing +control +structures +microprogramming +memory +management +caches +memory +hierarchies +interrupts +structures +prerequisites +andcs +semesterly +course +information +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..ef447912 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,30 @@ +home +page +advanced +computer +architecture +credits +advanced +techniques +computer +design +parallel +processing +andpipelining +multiprocessors +multi +computers +networks +high +performancemachines +special +purpose +processors +data +flow +architectures +prerequisites +semesterly +course +information +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..c8e633fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,33 @@ +home +page +advanced +computer +architecture +credits +parallel +algorithms +principles +parallelism +detection +vectorizingcompilers +interconnection +networks +simd +mimd +machines +processorsynchronization +data +coherence +multis +dataflow +machines +special +purposeprocessors +prerequisites +consent +instructor +semesterly +course +information +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..bd5877bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,446 @@ +introduction +operating +systems +spring +university +wisconsin +madisoncomputer +sciences +departmentcs +spring +bart +millercs +introduction +operating +systemsnew +stufffinal +grades +posted +problem +sets +available +programming +assignment +available +quizzes +answers +available +sections +lecture +notes +ready +reading +printing +class +staffinstructor +bart +milleremail +bart +wisc +eduoffice +csphone +office +hours +wednesday +friday +noonor +appointment +karuna +muthiahemail +muthiah +wisc +eduoffice +csphone +office +hours +monday +wednesday +jonathan +weyersemail +weyers +wisc +eduoffice +csphone +office +hours +monday +friday +wednesday +course +materialsthe +course +organized +around +lecture +notes +thelecture +notesare +available +class +page +need +textbookmodern +operating +systems +tanenbaum +programmingassignments +purchase +copy +ofobject +oriented +programming +using +pohl +whatever +favorite +book +lecture +notes +available +read +first +section +coming +class +modern +operating +systemsandobject +oriented +programming +using +available +book +store +lectures +discussion +sectionslecture +times +tuesday +thursday +computer +sciencesdiscussion +section +wednesday +nolandnote +extra +wednesday +discussion +section +section +used +mainly +recitation +section +discuss +materialcovered +lecture +weekly +quizzes +occasion +used +discuss +important +details +ofthe +programming +assignments +homeworks +make +sure +leave +room +schedule +attend +section +exams +quizzesthere +midterms +final +optional +week +starting +second +week +classes +quiz +thediscussion +section +quizzes +last +minutes +following +past +quizzes +answers +processes +concurrency +february +semaphores +february +monitors +february +messages +february +scheduling +usetraces +activities +real +unix +systems +drive +simulation +goals +assignment +learn +scheduling +algorithms +learn +trace +driven +simulation +experiment +quantitatively +analyzing +computer +system +written +problem +setsduring +semester +hand +severalwritten +problem +sets +based +lectures +problem +sets +need +turned +though +find +poorly +weekely +quizzes +youdon +problemssolution +sets +problem +handed +weeks +theproblem +happy +answer +questions +problems +andlook +solutions +problem +available +goal +assignment +learn +using +various +synchronizationprimitives +solve +problems +problem +available +goal +assignment +learn +memory +management +hardware +software +late +workassignments +date +listed +handout +entire +semester +havethree +late +daysof +credit +late +days +different +assignments +eachof +three +assignments +three +days +assignment +three +days +absolutely +late +work +accepted +late +days +cannot +used +assignmentthat +last +weekof +classes +cheatingprogramming +assignments +done +partners +group +work +independently +groups +cheaters +receive +maximum +penalties +include +receivingan +grade +course +marked +transcript +computer +facilitieswe +probably +using +solaris +unix +workstations +course +unix +workstations +running +solaris +operating +systems +windows +students +registered +class +account +grading +policyif +take +final +take +final +programming +assignments +programming +assignments +quizzes +quizzes +final +final +lowest +quiz +grade +dropped +average +beno +quiz +first +week +week +spring +break +take +final +exam +counted +past +taught +class +class +inthe +range +class +schedulethe +following +schedule +tentative +could +probably +change +week +january +introduction +overview +processesweek +january +february +dispatching +process +creationweek +february +cooperating +processes +synchronizationweek +february +semaphoresweek +february +semaphores +monitorsweek +february +messages +deadlocksweek +march +debugging +strategies +dynamic +memory +allocation +march +spring +breakweek +march +relocationweek +march +segmentation +paging +tlbsweek +april +virtual +memory +page +replacement +thrashing +class +thursday +week +april +working +sets +devices +filesweek +april +disk +allocation +scheduling +directoriesweek +april +protectionweek +april +security +advanced +topics +readings +week +advanced +topics +readings +finals +final +exam +tuesday +last +modified +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..2f370152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,109 @@ +internet +honors +seminaruniversity +wisconsin +madisoncomputer +sciences +departmentcs +spring +bart +millercs +internet +honors +seminarinstructor +bart +milleremail +bart +wisc +eduoffice +csphone +office +hours +wednesday +friday +noonor +appointment +lectureslecture +time +monday +computer +sciencesclass +schedulethere +written +assignments +class +required +attendall +lectures +participate +discussions +following +schedule +mostly +right +could +change +week +january +bart +miller +introduction +overviewweek +january +larry +landweber +internet +architecture +protocolsweek +february +week +february +bart +miller +client +server +remote +procedure +callsweek +february +systems +securityweek +february +eric +bach +security +encryptionweek +march +march +spring +breakweek +march +miron +livny +images +pictures +netweek +march +week +april +high +performance +file +systemsweek +april +week +april +david +wood +internet +supercomputerweek +april +larus +javaweek +april +week +bart +miller +discussionslast +modified +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..9b1af4f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,254 @@ +advanced +operating +systems +fall +university +wisconsin +madisoncomputer +sciences +departmentcs +fall +bart +millercs +advanced +operating +systemssummarythis +course +intended +give +broad +exposure +advancedoperating +systems +topics +reading +discussing +topics +protection +security +memory +management +operating +system +kernels +file +systems +synchronization +naming +distributed +systems +please +read +rest +information +sheet +carefully +textthere +really +satisfactory +textbook +graduate +level +operatingsystemsclass +current +literature +text +course +structured +around +readings +journal +articles +andconference +proceedings +able +purchase +readings +doit +handout +class +discuss +topics +relevant +current +papers +lecture +detail +detail +review +papers +willinstead +adiscussion +major +topics +themes +using +papers +focal +point +form +reading +groups +classmates +meetonce +twice +week +discuss +details +assigned +papers +readings +especially +important +part +class +reading +listaccording +posted +reading +schedule +formula +successful +class +read +papersindependently +discuss +reading +group +trying +identifythe +important +issues +participate +class +discussion +thepapers +class +discussionsclass +meetings +form +discussion +lectures +talk +topics +discussion +besupported +comments +opinions +willing +participate +actively +daily +class +geta +expect +quietly +listen +weeks +bevery +unhappy +class +papersduring +class +write +papers +short +pages +andone +longer +first +paperwill +design +based +ideas +read +work +well +understood +operating +system +facilityand +design +extension +area +second +paper +involve +project +paper +summaryof +project +aselection +project +topicsfrom +choose +writing +well +important +writing +good +ideas +paper +reviewed +least +twice +first +reading +refereeing +paper +fellowstudents +give +writer +critical +comments +another +person +givethe +reader +look +someone +else +writing +paper +revised +second +pass +read +examsthere +exams +papers +reading +keep +busy +gradesscores +final +grades +posted +assignments +graded +first +paper +assignmentis +availble +summary +scores +fromthe +project +proposalsis +also +availble +final +course +gradesare +available +detailstime +tuesday +thursday +place +csoffice +hours +tuesday +thursday +noonlast +modified +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..3d517dce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,262 @@ +section +home +pagecs +introduction +computer +programmingsection +fortran +credit +course +covers +basic +programming +structures +needed +prepare +students +elementary +engineering +courses +prior +computer +programming +experience +required +basic +knowledge +computers +assumed +material +covered +enable +write +simple +computer +programs +solve +engineering +problems +elementary +courses +programming +done +fortran +course +intended +students +received +little +programming +instruction +high +school +section +taught +entirely +fortran +programming +language +intended +primarily +engineering +students +computer +science +majors +click +course +description +menu +important +announcements +read +lectures +instructor +grading +policy +syllabus +text +lecture +notes +programming +assignments +problem +solving +exercises +computer +pointers +interest +lectures +section +psychology +march +please +punctual +lectures +avoid +disturbing +class +instructor +gareth +bestor +office +computer +science +overall +structure +program +primarily +exercise +general +problem +solving +write +fortran +code +though +want +time +solution +algorithm +even +dependent +particular +programming +language +fortran +solution +class +following +monday +exercises +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +subroutines +click +solution +question +functions +click +solution +week +question +click +solution +computer +labyou +using +vectra +computer +science +statistics +containing +hewlett +packard +vectra +running +microsoft +windows +microsoft +fortran +open +seven +days +week +except +certain +holidays +printer +room +located +across +hall +also +home +dorm +computers +write +programs +however +probably +purchase +copy +microsoft +fortran +lahey +personal +fortran +inside +cover +textbook +also +work +computer +labs +campus +however +fortran +compilers +please +first +using +software +using +includes +microsoft +windows +microsoft +fortran +mail +netscape +pointers +interest +home +page +gareth +bestor +home +page +computer +sciences +department +home +page +starting +points +internet +exploration +lycos +search +world +wide +keyword +dilbert +comic +relief +long +nights +assignment +copyright +copy +gareth +bestor +bestor +wisc +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..e53b2139 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,613 @@ +section +home +pagecs +algebraic +language +programmingsection +fortran +section +taught +entirely +fortran +programming +language +intended +primarily +engineering +students +computer +science +majors +click +course +description +menu +important +announcements +read +lectures +instructor +grading +policy +syllabus +text +lecture +notes +exams +programming +assignments +problem +solving +exercises +computer +pointers +interest +lectures +section +psychology +please +punctual +lectures +avoid +disturbing +class +instructor +gareth +bestor +office +computer +science +lowest +exam +score +contribute +must +complete +hand +assignments +eligible +receive +passing +grade +course +final +grades +sections +graded +curve +mean +range +curve +computed +final +exam +completed +exam +thur +february +exam +april +computer +science +final +amclick +list +current +class +grades +identifed +student +syllabus +tentative +following +topics +sections +text +approximately +covered +week +semester +class +read +relevant +sections +text +coming +class +able +questions +class +anything +unsure +instead +waiting +trying +work +assignment +discover +didn +really +understand +something +weeks +term +exam +weeks +term +exam +weeks +final +exam +text +lecture +notestext +fortran +engineering +applications +edition +koffman +friedman +lecture +notes +copies +lecture +notes +available +line +week +class +important +line +lecture +notes +substitute +coming +class +cover +show +overhead +projector +include +examples +additional +notes +board +responsible +material +covered +class +week +week +week +week +week +week +week +week +week +week +week +week +week +week +week +exams +programming +assignmentsthere +three +exams +term +exams +final +exam +exams +constitute +final +grade +important +well +ensure +good +grade +regardless +performance +assignments +exams +approximately +hours +long +though +stay +longer +need +extra +time +closed +book +need +bring +pencil +exams +calculators +necessary +even +useful +exam +solutions +term +exam +term +exam +seven +programming +assignments +including +program +contributing +final +grade +assignments +must +completed +handed +eligible +receive +passing +grade +course +must +attempt +complete +every +programming +assignment +hand +program +even +compile +without +errors +graded +automatically +receive +zero +grade +risk +failing +course +gradesheets +handin +directory +hand +assignments +online +late +policy +policy +academic +misconduct +cheating +assignment +specifications +program +monday +program +friday +program +wednesday +program +wednesday +program +friday +program +monday +program +friday +pmhow +help +assignments +consultants +consultants +computer +help +problems +wear +name +tags +duty +approximately +consultants +answer +short +questions +compiler +error +messages +program +syntax +well +login +printers +send +mail +netscape +click +information +consultants +instructor +general +questions +assignment +questions +require +long +explanation +best +answered +please +office +hours +send +mail +normally +office +except +office +hours +dissertation +research +home +modem +therefore +want +outside +office +hours +please +make +appointment +first +easily +contact +mail +regularly +login +read +mail +home +click +send +mail +problem +solving +exercisesone +important +skills +learn +class +problem +solving +good +problem +solving +skills +distinguish +good +computer +programmer +doesn +matter +familiar +skilled +particular +programming +language +understand +solve +problem +able +write +computer +program +language +help +learn +problem +solving +skills +techniques +assign +weekly +problem +solving +exercises +small +trivial +problems +give +monday +look +problem +think +week +right +steps +would +solve +problem +overall +structure +program +primarily +exercise +general +problem +solving +write +fortran +code +though +want +time +solution +algorithm +even +dependent +particular +programming +language +fortran +friday +solution +class +exercises +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +subroutines +click +solution +question +functions +click +solution +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +click +solution +week +question +computer +labyou +using +vectra +computer +science +statistics +containing +hewlett +packard +vectra +running +microsoft +windows +microsoft +fortran +open +seven +days +week +except +certain +holidays +printer +room +located +across +hall +also +home +dorm +computers +write +programs +however +probably +purchase +copy +microsoft +fortran +lahey +personal +fortran +inside +cover +textbook +also +work +computer +labs +campus +however +fortran +compilers +please +first +using +software +using +includes +microsoft +windows +microsoft +fortran +mail +netscape +pointers +interest +home +page +gareth +bestor +home +page +computer +sciences +department +home +page +starting +points +internet +exploration +lycos +search +world +wide +keyword +dilbert +comic +relief +long +nights +assignment +copyright +copy +gareth +bestor +bestor +wisc +last +modified +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..2f68d366 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,86 @@ +fall +section +fall +section +algebraic +language +programming +name +dave +egglestonemail +burnett +wisc +eduoffice +office +phone +office +hours +announcements +updated +note +original +output +prog +page +errors +days +week +correct +values +information +exam +updated +questions +asked +program +hourlyworker +classreading +scan +thursday +class +program +available +solution +quiz +grades +page +general +course +informationcs +home +pagecourse +objectivesvectra +labcs +consultantssyllabusworking +homeclass +handouts +gradeshomeworkexams +quizzesmiscellaneous +archivepolicy +informationemail +policygrading +policylate +policyacademic +misconduct +policytextproblem +solving +object +programming +walter +savitchaddison +wesley +publishing +company +list +known +erratalast +modified +dave +eggleston +burnett +wisc +based +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..217f0db2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,180 @@ +spring +advanced +operating +systems +spring +summary +course +intended +give +broad +exposure +advancedoperating +systems +topics +important +components +course +reading +discussion +ofvarious +research +papers +project +involving +implementation +anexperimental +system +research +papers +cover +topicsincluding +synchronization +communications +memory +management +file +systems +protection +security +distributed +systems +project +requires +tochoose +problem +research +propose +solutions +implement +prototype +system +lecture +info +class +discuss +topics +relevant +current +papers +lecture +detailed +review +papers +rathera +discussion +major +topics +themes +using +papers +focal +point +active +participation +discussion +strongly +encouraged +lecture +tuesday +thursday +engineering +halloffice +hour +tuesday +appointment +computer +sciences +text +text +selection +classic +papers +operating +system +design +implementation +purchase +readings +doit +formerly +macc +documentation +deskfor +readings +semester +different +previous +semesters +please +copy +papers +grading +exam +course +instead +assignments +first +assignment +using +benchmark +suite +measure +performanceof +various +operating +systems +sunos +solaris +linux +windows +manyas +hands +second +assignment +project +involving +project +proposal +implementation +final +report +project +presentation +total +grade +class +participation +counts +first +assignment +counts +project +counts +schedule +tentative +schedule +projects +list +suggested +projects +make +project +well +either +case +need +come +discuss +choosing +project +teams +people +allowed +slides +slides +used +lectures +assigments +first +assignment diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..937c223c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,87 @@ +introduction +computer +programming +computer +sciences +fall +credit +course +designed +cover +basic +programmingstructures +needed +prepare +students +elementaryengineering +courses +material +covered +sufficient +enablethe +student +write +simple +programs +solve +engineering +problems +inelementary +courses +material +essentially +first +half +list +fall +sections +lecture +fortran +jeff +lampert +lecture +fortran +jeff +lampert +lecture +tony +silva +lecture +tony +silva +lecture +sidney +hummert +lecture +sidney +hummert +lecture +michael +birk +lecture +michael +birk +lecture +sidney +hummert +lecture +sidney +hummert +lecture +tony +silva +lecture +tony +silva +lecture +russell +manning +lecture +russell +manning +lecture +martin +reameslast +modified +anthony +silva diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..0fc10570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffice +computer +sciencesoffice +hours +monday +wednesday +appointment +phone +dept +office +mail +wisc +teaching +assistantsfollow +links +home +page +name +kelly +ratliff +email +kelly +wisc +office +office +phone +office +hours +sections +grades +name +nathan +bockrath +email +bockrath +wisc +office +office +phone +office +hours +sections +grades +name +rehnuma +rahman +email +rehnuma +wisc +office +office +phone +office +hours +sections +grades +name +jaime +fink +email +jfink +wisc +office +office +phone +office +hours +sections +grades +name +ashraf +aboulnaga +email +ashraf +wisc +office +office +phone +office +hours +sections +grades +name +andrew +geery +email +geery +wisc +office +office +phone +office +hours +sections +grades +name +james +herro +email +jherro +wisc +office +office +phone +office +hours +sections +grades +name +abhinav +gupta +email +agupta +wisc +office +office +phone +office +hours +sections +grades +name +jyothi +krothapalli +email +jyothi +wisc +office +office +phone +office +hours +sections +grades +name +chiang +email +suhui +wisc +office +office +phone +office +hours +sections +grades +name +thanos +tsiolis +email +tsiolis +wisc +office +office +phone +office +hours +sections +gradesexplore +companies +whose +software +hardware +borland +hewlett +packard +intel +microsoft +novell +useful +links +exploration +lycos +enormous +database +sites +yahoo +internet +resources +classified +categories +lookup +search +virtual +tourist +find +sites +around +world +clicking +world +mother +large +alphabetical +list +sites +cool +lists +especially +excellent +sites +university +wisconsin +madison +home +page +page +originally +created +maintained +teitelbaum +thanos +tsiolis +modified +maintained +kelly +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..0fc10570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffice +computer +sciencesoffice +hours +monday +wednesday +appointment +phone +dept +office +mail +wisc +teaching +assistantsfollow +links +home +page +name +kelly +ratliff +email +kelly +wisc +office +office +phone +office +hours +sections +grades +name +nathan +bockrath +email +bockrath +wisc +office +office +phone +office +hours +sections +grades +name +rehnuma +rahman +email +rehnuma +wisc +office +office +phone +office +hours +sections +grades +name +jaime +fink +email +jfink +wisc +office +office +phone +office +hours +sections +grades +name +ashraf +aboulnaga +email +ashraf +wisc +office +office +phone +office +hours +sections +grades +name +andrew +geery +email +geery +wisc +office +office +phone +office +hours +sections +grades +name +james +herro +email +jherro +wisc +office +office +phone +office +hours +sections +grades +name +abhinav +gupta +email +agupta +wisc +office +office +phone +office +hours +sections +grades +name +jyothi +krothapalli +email +jyothi +wisc +office +office +phone +office +hours +sections +grades +name +chiang +email +suhui +wisc +office +office +phone +office +hours +sections +grades +name +thanos +tsiolis +email +tsiolis +wisc +office +office +phone +office +hours +sections +gradesexplore +companies +whose +software +hardware +borland +hewlett +packard +intel +microsoft +novell +useful +links +exploration +lycos +enormous +database +sites +yahoo +internet +resources +classified +categories +lookup +search +virtual +tourist +find +sites +around +world +clicking +world +mother +large +alphabetical +list +sites +cool +lists +especially +excellent +sites +university +wisconsin +madison +home +page +page +originally +created +maintained +teitelbaum +thanos +tsiolis +modified +maintained +kelly +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..dc8ff427 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,228 @@ +using +computers +lectures +using +computersinstructor +info +instructor +sally +petersonoffice +computer +sciencephone +mail +sally +wisc +slpeters +facstaff +wisc +eduoffice +hours +tuesday +thursday +appointmentvital +class +info +time +lecture +lecture +place +lectures +held +engineering +halllecture +text +information +technology +society +laudon +traver +laudonlab +text +point +click +drag +using +macintosh +petersoncourse +introduction +class +designed +take +zero +knowledge +computersto +crack +shot +user +using +skills +throughcollege +arena +sections +taught +using +macintoshcomputers +sections +using +available +csusing +computers +lectures +course +components +part +lecture +lecture +discuss +computers +generic +terms +general +computer +science +topics +discuss +computers +work +including +following +topics +necessarily +order +application +programs +including +word +processors +spreadsheets +graphics +databases +hardware +input +output +storage +devices +operating +systems +programming +languages +networks +telecommunications +artificial +intelligence +expert +systems +computer +related +social +issues +part +laboratory +discussion +sections +hands +experienceon +macintosh +iici +computers +following +programs +word +processing +word +electronic +mail +newsgroups +world +wide +eudora +netscape +painting +drawing +aldus +superpaint +spreadsheet +charting +excel +database +filemaker +presentation +manager +hypercard +desktop +publishing +aldus +pagemaker +integral +part +learning +macintosh +operating +system +system +well +addition +special +tools +scanners +available +teach +sections +thegoal +providing +high +quality +instruction +rich +educationalexperience +namesectiontimedays +bodner +mwnick +leavy +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghese +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommended +background +background +necessary +course +assignments +quizzes +exams +grade +based +exams +lecture +regular +assignmentsand +quizzes +syllabus +glance +syllabus +contains +nitty +gritty +class +details +click +assignments +assignment +superpaintassignment +excellast +modified +october +jonbodner diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..dc8ff427 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,228 @@ +using +computers +lectures +using +computersinstructor +info +instructor +sally +petersonoffice +computer +sciencephone +mail +sally +wisc +slpeters +facstaff +wisc +eduoffice +hours +tuesday +thursday +appointmentvital +class +info +time +lecture +lecture +place +lectures +held +engineering +halllecture +text +information +technology +society +laudon +traver +laudonlab +text +point +click +drag +using +macintosh +petersoncourse +introduction +class +designed +take +zero +knowledge +computersto +crack +shot +user +using +skills +throughcollege +arena +sections +taught +using +macintoshcomputers +sections +using +available +csusing +computers +lectures +course +components +part +lecture +lecture +discuss +computers +generic +terms +general +computer +science +topics +discuss +computers +work +including +following +topics +necessarily +order +application +programs +including +word +processors +spreadsheets +graphics +databases +hardware +input +output +storage +devices +operating +systems +programming +languages +networks +telecommunications +artificial +intelligence +expert +systems +computer +related +social +issues +part +laboratory +discussion +sections +hands +experienceon +macintosh +iici +computers +following +programs +word +processing +word +electronic +mail +newsgroups +world +wide +eudora +netscape +painting +drawing +aldus +superpaint +spreadsheet +charting +excel +database +filemaker +presentation +manager +hypercard +desktop +publishing +aldus +pagemaker +integral +part +learning +macintosh +operating +system +system +well +addition +special +tools +scanners +available +teach +sections +thegoal +providing +high +quality +instruction +rich +educationalexperience +namesectiontimedays +bodner +mwnick +leavy +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghese +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommended +background +background +necessary +course +assignments +quizzes +exams +grade +based +exams +lecture +regular +assignmentsand +quizzes +syllabus +glance +syllabus +contains +nitty +gritty +class +details +click +assignments +assignment +superpaintassignment +excellast +modified +october +jonbodner diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..486038af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,81 @@ +home +pagecomputer +sciences +algebraic +language +programming +sections +instructorswe +would +like +comments +suggestions +complaints +feedback +provided +clicking +skrentny +coordinator +office +email +skrentny +csinformation +sections +frequently +asked +questions +course +overview +microcomputer +laboratories +consultants +fall +consulting +schedule +tutors +mainly +policy +academic +misconduct +courses +offered +department +software +sections +introduction +microsoft +windows +hints +windows +compilers +windows +operating +system +email +netscape +creating +using +subdirectoriesc +information +savitch +text +book +introduction +borland +language +borland +integrated +development +environmentfortran +information +jeff +lampert +home +page +section +last +updated +skrentny +coordinator +skrentny +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..a4cda2d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,41 @@ +course +infocourse +information +cscourse +descriptionfrom +guidebook +undergraduate +students +construction +algorithms +problem +solving +instruction +experience +least +procedure +oriented +language +pascal +fortran +survey +languages +advanced +programming +techniques +prereq +advanced +high +school +mathematical +preparation +college +work +mathematics +statistics +logic +consent +instructor +open +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..486038af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,81 @@ +home +pagecomputer +sciences +algebraic +language +programming +sections +instructorswe +would +like +comments +suggestions +complaints +feedback +provided +clicking +skrentny +coordinator +office +email +skrentny +csinformation +sections +frequently +asked +questions +course +overview +microcomputer +laboratories +consultants +fall +consulting +schedule +tutors +mainly +policy +academic +misconduct +courses +offered +department +software +sections +introduction +microsoft +windows +hints +windows +compilers +windows +operating +system +email +netscape +creating +using +subdirectoriesc +information +savitch +text +book +introduction +borland +language +borland +integrated +development +environmentfortran +information +jeff +lampert +home +page +section +last +updated +skrentny +coordinator +skrentny +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..47733b5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solving +using +computers +fall +computer +sciences +check +following +pages +information +instructors +teaching +assistants +including +office +hours +information +assignments +including +suggestions +copies +assignments +explanations +grading +check +policy +assignments +work +information +examinations +copies +past +exams +information +labs +including +copies +handouts +documents +including +syllabus +many +documents +pages +postscript +need +postscript +viewer +obtain +site +check +local +services +section +department +home +page +local +services +page +ghost +directory +read +readme +file +directions diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..47733b5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solving +using +computers +fall +computer +sciences +check +following +pages +information +instructors +teaching +assistants +including +office +hours +information +assignments +including +suggestions +copies +assignments +explanations +grading +check +policy +assignments +work +information +examinations +copies +past +exams +information +labs +including +copies +handouts +documents +including +syllabus +many +documents +pages +postscript +need +postscript +viewer +obtain +site +check +local +services +section +department +home +page +local +services +page +ghost +directory +read +readme +file +directions diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..f1d54db2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,271 @@ +home +page +fall +fall +course +information +instructors +office +hours +office +hours +class +cancellations +handouts +assignments +solutions +exams +grades +simulator +help +lecture +notes +frequently +asked +questions +instructors +sections +jerry +tusch +office +phone +hours +mail +jerry +wisc +tutsch +execpc +class +section +section +nolandsection +karen +miller +office +phone +hours +mail +smoler +wisc +class +time +psychology +sunlung +suen +office +phone +hours +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +office +phone +hours +mail +bsri +wisc +edumohammad +asgarian +office +phone +hours +tuth +mail +wisc +class +cancellations +karen +section +class +monday +september +class +wednesday +november +jerry +sections +cancellations +scheduled +handouts +revised +chapter +postscript +course +overview +jerry +sections +html +assignments +assignment +html +html +assignment +program +homework +assignment +karen +section +program +homework +assignment +program +homework +assignment +program +homework +solutions +quiz +sections +assignment +html +program +quiz +sections +assignment +html +program +quiz +sections +assignment +html +program +quiz +sections +assignment +html +programsal +programmal +quiz +sections +assignment +html +program +examsall +quizzes +open +book +note +calculators +karen +section +quiz +friday +september +class +quiz +friday +september +class +quiz +friday +october +class +quiz +friday +october +class +quiz +friday +november +class +quiz +friday +november +class +quiz +probably +friday +december +last +class +jerry +sections +section +syllabus +html +section +syllabus +html +previous +exams +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answers +fall +final +exam +answers +grades +lookup +grade +simulator +help +graphical +interface +manual +lecture +noteskaren +miller +section +section +chapter +chapter +chapter +number +systems +chapter +data +representation +chapter +integer +arithmetic +chapter +floating +point +arithmetic +chapter +data +structures +chapter +registers +chapter +procedures +updated +wednesday +chapter +assembly +updatedmonday +october +chapter +chapter +exception +processing +chapter +features +performance +chapter +architecure +case +studies diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..73b5901c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,296 @@ +home +page +fall +fall +course +information +instructors +office +hours +office +hours +class +cancellations +handouts +assignments +solutions +exams +grades +simulator +help +lecture +notes +frequently +asked +questions +instructors +sections +jerry +tusch +office +phone +hours +mail +jerry +wisc +tutsch +execpc +class +section +section +nolandsection +karen +miller +office +phone +hours +mail +smoler +wisc +class +time +psychology +sunlung +suen +office +phone +hours +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +office +phone +hours +mail +bsri +wisc +edumohammad +asgarian +office +phone +hours +tuth +mail +wisc +class +cancellations +karen +section +class +monday +september +class +wednesday +november +jerry +sections +cancellations +scheduled +handouts +revised +chapter +postscript +course +overview +jerry +sections +html +assignments +assignment +html +html +assignment +program +homework +assignment +karen +section +program +homework +assignment +program +homework +assignment +program +homework +assignment +program +homework +solutions +quiz +sections +assignment +html +program +quiz +sections +assignment +html +program +quiz +sections +assignment +html +program +quiz +sections +assignment +html +programsal +programmal +quiz +sections +assignment +html +program +quiz +sections +assignment +html +programa +programb +quiz +sections +examsall +quizzes +open +book +note +calculators +karen +section +quiz +friday +september +class +quiz +friday +september +class +quiz +friday +october +class +quiz +friday +october +class +quiz +friday +november +class +quiz +friday +november +class +quiz +probably +friday +december +last +class +optional +final +thursday +december +difficult +cumulative +final +offered +desperateto +raise +grade +sign +advance +december +jerry +sections +section +syllabus +html +section +syllabus +html +previous +exams +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answers +fall +final +exam +answers +grades +lookup +grade +simulator +help +graphical +interface +manual +lecture +noteskaren +miller +section +section +chapter +chapter +chapter +number +systems +chapter +data +representation +chapter +integer +arithmetic +chapter +floating +point +arithmetic +chapter +data +structures +chapter +registers +chapter +procedures +updated +wednesday +chapter +assembly +updatedmonday +october +chapter +chapter +exception +processing +chapter +features +performance +chapter +architecure +case +studies diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..339146e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,255 @@ +data +structures +lecs +introduction +data +structureslecture +psychologylecture +psychologycourse +information +announcements +reading +assignments +getting +started +getting +help +exams +programming +assignments +sample +code +lecturer +information +course +materials +computers +labs +home +announcements +general +announcements +placed +recent +announcements +first +announcements +problems +programming +assignments +found +located +assignment +page +binary +search +tree +sample +code +online +samples +page +last +makeup +exam +done +placed +copy +solution +exam +reserve +kurt +wendt +library +queue +sample +code +online +samples +page +stack +sample +code +online +samples +page +exam +topics +online +exams +page +list +sample +code +online +samples +page +handin +directories +created +list +common +programming +errors +online +suggestions +additions +welcomed +either +vega +computer +labs +work +computers +information +sample +code +placed +line +monday +wednesday +lectures +getting +magic +number +error +source +files +must +able +compiled +otherwise +unusual +error +looking +stale +pages +forget +reload +pages +pages +updated +copies +browser +caches +become +outdated +stale +attend +unix +tutorial +need +attend +times +listed +thurs +thurs +reading +assignments +future +balanced +search +trees +chapter +pages +future +tables +chapter +discuss +comparisons +implementations +lecture +lectures +trees +chapter +lectures +queues +chapter +skip +simulation +lecture +overloading +operators +chapter +pages +lectures +hash +tables +chapter +pages +lectures +stacks +chapter +lectures +linked +lists +chapter +lecture +pointers +dynamic +memory +allocation +chapter +pages +lecture +sorting +searching +analysis +chapter +pages +lectures +sorting +algorithms +chapter +pages +lectures +basic +recursion +searching +algorithms +chapter +lectures +basic +chapter +pages +skip +focus +lectures +basic +appendix +pages +lecturer +skrentny +skrentny +wisc +office +computer +sciences +office +hours +teaching +assistants +baicheng +billy +liao +bail +wisc +office +computer +sciences +office +hours +cheng +jiacheng +wisc +office +computer +science +office +hours +pmcopyright +copy +james +skrentny +skrentny +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..9afe7282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,987 @@ +lecture +introduction +data +structuresfall +course +email +address +wisc +course +home +page +http +wisc +htmlinstructor +yannis +ioannidis +office +computer +sciencesoffice +hours +tuesday +thursday +amoffice +phone +email +address +yannis +wisc +home +page +http +wisc +yannis +yannis +html +contents +news +teaching +assistants +lecture +information +language +text +grading +exams +course +schedule +assignment +programming +assignments +late +policy +cheating +help +program +grading +style +external +documentation +internal +documentation +using +unix +program +development +cycle +newsassignment +assignment +ready +midterm +statisticssome +interesting +exam +statistics +section +median +mean +midterma +sample +oldmidterm +available +help +preparation +ownmidterm +assignment +assignment +ready +notes +notation +binary +searchthe +notes +notation +binary +search +available +want +print +either +open +filemenu +ghostview +window +shows +document +andchoose +print +menu +item +women +computer +sciencesome +female +faculty +graduate +students +undergraduates +haveformed +group +called +wics +women +computer +science +oneof +group +goals +encourage +women +becomecomputer +science +majors +women +thisclass +would +like +talk +someone +majoring +incomputer +science +graduate +studies +computer +science +women +would +like +extra +help +withtheir +classwork +suzan +computerscience +grad +student +office +hours +email +tomake +appointment +suzan +mail +address +stodder +wisc +eduand +office +hours +tuesday +growing +tremendously +field +theodds +ever +write +another +program +thiscourse +ends +able +write +statementis +true +pascal +also +widely +available +aninitial +startup +period +productive +take +computer +science +courses +exceptionsyou +required +courses +textthe +text +book +course +isdata +abstraction +problem +solving +walls +mirrors +frank +carrano +isbn +well +written +text +covers +material +course +also +includes +separate +text +language +notnecessary +lectures +often +always +following +lecture +notes +fall +david +dewitt +notes +actually +considerably +complete +simple +lecturenotes +still +short +true +text +book +isvery +little +narrative +text +exercises +recommended +additional +source +want +purchase +notes +whichare +available +doit +documentation +desk +near +dayton +street +entranceof +computer +sciences +building +dayton +first +experience +unix +needsome +information +activating +account +logging +creating +editing +manipulating +files +compiling +running +debugging +programs +handoutcs +available +doit +information +desk +dewitt +notesare +available +contains +information +find +invaluable +also +help +section +mentioned +lectures +often +follow +dewitt +notes +althoughi +supplement +handouts +courseof +semester +nonetheless +responsible +material +covered +lecture +exams +based +onthe +lecture +material +reading +assignments +notes +andthe +course +assignments +gradingthere +evening +exams +course +semester +final +exam +five +programming +assignments +exams +determine +final +grade +approximately +equal +weight +programmingassignments +count +exams +exam +tuesday +october +chemistry +exam +final +exam +wednesday +december +place +course +schedule +following +list +topics +covered +thiscourse +detailed +scheduled +provided +later +semester +topic +dewitt +notes +walls +mirrors +introduction +administration +general +familiarity +basic +stuff +lecture +functions +lecture +apointers +lecture +records +equivalent +madison +prerequisitecourse +assignments +must +done +designated +machines +theseare +machine +rooms +first +floor +thecs +building +encourage +machines +prefer +home +computer +certainrestrictions +must +compiler +home +machine +must +university +account +often +read +emailand +copies +data +files +finally +require +thatyou +turn +program +electronically +email +youwork +home +must +make +provisions +download +programs +toyour +university +account +make +sure +compile +runwith +compiler +sparcstations +often +electronic +mail +notify +students +changes +inassignments +hints +programs +assume +read +allelectronic +mail +send +late +policyno +late +assignment +accepted +assignments +must +turned +exactly +order +avoid +lateness +caused +machine +loads +coincident +duedates +several +classes +simply +sure +started +right +away +oneach +assignment +things +certain +wrong +wait +thelast +minute +start +exceptions +must +approved +need +good +excuse +trouble +soon +possible +cheatingthe +computer +science +department +takes +hard +linestance +cheating +welcome +tocommunicate +design +algorithms +datastructures +butthere +sharing +code +also +expected +learn +understand +obey +thecomputer +systems +policiesgoverning +computer +accounts +helpif +problems +course +work +programs +please +know +early +semester +possible +office +hours +policiesif +need +help +debugging +program +best +help +tovisit +thecs +office +hours +taking +along +currenthard +copy +program +office +hours +intended +time +explain +conceptsthat +presented +class +still +confused +answer +specific +questions +course +material +encourage +email +reliable +contact +problems +read +respond +emailseveral +times +daily +almost +every +week +program +gradingprograms +graded +following +criteria +correctness +program +behave +correctly +normally +typicalinput +program +behave +stated +projectspecifications +clarity +program +easy +read +understand +notes +style +informationabout +clarity +robustness +correct +behavior +extreme +unusual +situations +program +handle +situations +reasonable +andlogical +manner +simply +blow +quality +test +data +test +data +program +shoulddemonstrate +facets +program +capabilities +includingunusual +cases +efficiency +avoid +unnecessarily +inefficient +algorithms +constructs +however +efficiency +never +pursued +expense +clarity +modularity +program +modular +make +effective +useof +parameters +completeness +incorporate +information +program +need +sort +extra +paper +documentation +generality +program +general +possible +subject +considerationof +efficiency +clarity +avoid +arbitrary +limitations +bound +size +orcomplexity +input +whenever +possible +limitations +necessary +expressed +definedconstants +near +program +easily +changed +numeric +literals +appear +program +thosevalues +likely +change +styleuse +meaningful +identifier +names +consistent +naming +scheme +identifier +names +suggested +convention +follows +variable_name +function_name +argument +const +defined_constant +enum +enumtype +value +value +class +classname +multiple +statements +single +line +skip +lines +functional +groups +code +clear +consistent +indentation +style +dewitt +notesfor +suggested +style +indent +continued +statements +loops +line +label +meaningfully +done +external +documentationthis +included +long +comment +beginning +yourprogram +addressed +typical +user +someonewho +wants +know +superficially +program +works +include +full +name +student +beginning +comment +give +general +description +program +tell +program +call +format +data +give +limitations +bugs +special +features +assumptions +made +describe +negative +well +positive +aspects +program +include +negatives +assume +unaware +information +included +assignment +problem +descriptionneed +repeated +briefly +summarized +first +point +statement +referring +user +assignment +document +thensufficient +note +applies +problem +description +internal +documentationthere +four +main +types +internal +documentation +headers +comments +headers +functions +classes +major +data +structuresshould +describe +purpose +assumptions +parameters +main +outline +algorithms +declarations +comments +next +declaration +variable +data +membershould +provide +extra +information +conveyed +identifier +sname +name +variable +tell +much +possible +withoutmaking +long +additional +information +supplied +comment +example +index +last +element +added +stackyou +comments +explain +parameters +well +local +variables +within +segments +code +tricky +opaque +sections +code +beavoided +sometimes +necessary +cases +commentcan +help +reader +understand +going +segments +code +comments +clarify +level +outlineof +algorithm +using +unix +vimany +people +working +unix +thefirst +time +find +takes +time +becomecomfortable +particularly +true +youronly +previous +programming +experience +pascal +using +macpascal +macintosh +strongly +urge +inthe +time +early +semester +become +comfortable +withunix +time +painful +time +wellspent +also +wish +attend +unix +tutorial +held +rooms +comp +sessions +thefollowing +days +tbayou +want +pick +copy +program +development +cycle +program +development +cycle +unix +environment +edit +program +program +compile +program +wall +program +compilation +errors +continue +program +inputfile +outputfile +look +output +outputfile +outputfile +errors +break +tired +continue +print +listing +take +home +program +inputfile +outputfile +goto +home +debug +program +quit +done +turn +result +submission +instruction +given +later +yannis +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..4b0a8763 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,519 @@ +introduction +data +structures +http +wisc +html +revised +fall +james +larusinstructor +james +laruslarus +wisc +computer +sciences +http +wisc +larus +larus +html +office +hours +tuesday +friday +amcontentsteaching +assistantstextlecture +informationelectronic +mailthe +languagegradingexamscourse +scheduleassignment +assignment +assignment +assignment +programming +assignmentscourse +objectivescs +objectives +present +concepts +data +structures +general +widely +used +structures +detail +data +structures +fundamental +building +blocks +computer +programs +course +able +identify +situations +data +structure +necessary +determine +requirements +data +structure +select +appropriate +data +structure +covered +course +reiterate +concepts +structure +programming +abstract +data +types +modularity +principles +introduced +essential +writing +clear +correct +maintainable +software +close +connection +abstract +data +types +data +structures +course +places +strong +emphasis +applying +principles +programming +exercises +teaching +assistantswei +zhang +chin +tang +chin +teaching +assistants +forthis +course +sections +grade +homework +assignmentsand +happy +answer +questions +theassignments +aspect +course +giving +trouble +zhangoffice +compuer +sciencesoffice +hours +wednesday +thursday +sunday +office +phone +email +address +weiz +wisc +chin +tang +chin +office +computer +sciencesoffice +hours +monday +tuesday +friday +amoffice +phone +email +address +cchin +wisc +home +page +http +wisc +cchin +cchin +html +textthe +text +book +course +data +abstraction +problem +solving +walls +mirrors +frank +carrano +isbn +well +written +little +long +winded +text +covers +material +course +also +includes +background +separate +text +language +necessary +lectures +often +always +follow +david +dewitt +lecture +notes +fall +notes +complete +simple +lecture +notes +fall +short +true +text +book +contain +little +narrative +text +exercises +using +notes +basis +lectures +feel +free +skip +portions +cover +additional +material +want +purchase +notes +available +doit +documentation +desk +dayton +street +entrance +computer +sciences +building +dayton +course +first +experience +unix +need +information +activating +account +logging +creating +editing +manipulating +files +compiling +running +debugging +programs +handout +also +available +doit +information +desk +contains +crucial +information +also +also +help +section +lecture +information +tuesday +thursday +psychology +mentioned +lectures +often +follow +dewitt +notes +lecture +attendence +strongly +recommended +regularly +present +material +appear +textbook +lecture +notes +useful +programming +assignments +exams +needless +responsible +material +covered +lecture +exams +based +lecture +material +reading +assignments +notes +course +assignments +electronic +maili +often +electronic +mail +notify +students +changes +assignments +hints +programs +assume +regularly +read +electronic +mail +gradingthere +evening +exams +semester +final +exam +five +programming +assignments +exams +determine +final +grade +approximately +equal +weight +programming +assignments +count +language +taught +using +programming +language +programming +assignments +must +written +know +section +skrentny +teaching +sections +cover +addition +data +structures +large +complex +language +unless +experience +programming +even +difficult +language +learn +book +also +another +page +information +programming +assignments +gdbthere +also +page +describes +program +debugger +exams +exam +tuesday +chemistry +exam +final +exam +wednesday +december +place +course +schedule +following +rough +outline +topics +covered +course +detailed +scheduled +provided +later +topic +dewitt +notes +introduction +administrationbasic +stuff +lecture +functions +lecture +pointers +lecture +records +dynamic +storagelecture +lists +lecture +binary +search +notation +advanced +listslecture +stackslecture +queueslecture +hashinglecture +evening +exam +lecture +recursionlecture +treesbinary +trees +sort +searchlecture +treesgraphslecture +evening +exam +sortinglecture +tbaassignment +absolute +requirement +grade +turn +index +card +following +information +name +login +nameyear +school +freshman +sophomore +previous +coursesprevious +programming +experiencerecent +photograph +picture +birthday +girl +scout +trip +summer +color +black +white +size +grades +given +without +photo +assignment +first +programming +assignment +write +simple +abstract +data +byte +fora +bounded +integer +sequence +text +assignment +line +assignment +second +programming +assignment +write +program +maintain +databaseof +scores +tennis +tournament +text +assignment +line +assignment +second +programming +assignment +write +program +produce +aconcordance +using +hash +tables +text +assignment +line diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..f927a849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,234 @@ +data +structures +lecs +introduction +data +structureslecture +psychologylecture +psychologycourse +information +announcements +reading +assignments +getting +started +getting +help +exams +programming +assignments +sample +code +lecturer +information +course +materials +computers +labs +home +announcements +general +announcements +placed +recent +announcements +first +announcements +problems +programming +assignments +found +located +assignment +page +last +makeup +exam +done +placed +copy +solution +exam +reserve +kurt +wendt +library +queue +sample +code +online +samples +page +stack +sample +code +online +samples +page +exam +topics +online +exams +page +list +sample +code +online +samples +page +handin +directories +created +list +common +programming +errors +online +suggestions +additions +welcomed +either +vega +computer +labs +work +computers +information +sample +code +placed +line +monday +wednesday +lectures +getting +magic +number +error +source +files +must +able +compiled +otherwise +unusual +error +looking +stale +pages +forget +reload +pages +pages +updated +copies +browser +caches +become +outdated +stale +attend +unix +tutorial +need +attend +times +listed +thurs +thurs +reading +assignments +future +trees +chapter +lectures +queues +chapter +skip +simulation +lecture +overloading +operators +chapter +pages +lectures +hash +tables +chapter +pages +lectures +stacks +chapter +lectures +linked +lists +chapter +lecture +pointers +dynamic +memory +allocation +chapter +pages +lecture +sorting +searching +analysis +chapter +pages +lectures +sorting +algorithms +chapter +pages +lectures +basic +recursion +searching +algorithms +chapter +lectures +basic +chapter +pages +skip +focus +lectures +basic +appendix +pages +lecturer +skrentny +skrentny +wisc +office +computer +sciences +office +hours +teaching +assistants +baicheng +billy +liao +bail +wisc +office +computer +sciences +office +hours +cheng +jiacheng +wisc +office +computer +science +office +hours +pmcopyright +copy +james +skrentny +skrentny +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..e390fdc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,82 @@ +introduction +theoretical +computer +science +introduction +theoretical +computer +science +fall +room +lecturer +brian +cole +email +wisc +office +office +hours +monday +friday +teaching +assistant +david +sundaram +stukel +email +sundaram +wisc +office +office +hours +tuesday +wednesday +thursday +text +introduction +languages +theory +computation +john +martin +north +dakota +state +university +mcgraw +hill +isbn +tentative +lecture +schedule +including +exam +information +lecture +clarifications +assignments +page +grading +policy +written +assignments +term +examination +final +examination +archive +mailing +list +home +page +september +brian +cole +madison +computer +sciences +home +page +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..ff046d32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,158 @@ +also +math +stat +fall +also +math +stat +linear +programmingfall +schedule +lecture +mechanical +engineering +open +book +midterm +exam +time +date +thursday +october +location +mechanical +engineering +open +book +final +exam +time +date +wednesday +december +location +instructor +olvi +mangasarian +office +comp +stat +pphone +mail +olvi +wisc +office +hours +wednesdays +fall +semester +teaching +assistant +office +comp +stat +telephone +mail +wisc +office +hours +textbook +linear +programming +matlab +ferris +mangasarian +preliminary +version +doit +madison +syllabus +course +overview +course +information +course +information +books +reserve +kurt +wendt +library +matlab +setup +homework +september +homework +september +homework +september +homework +september +homework +october +homework +october +homework +october +homework +october +homework +november +homework +november +homework +december +homework +december +programming +project +november +sample +midterm +exam +march +solution +sample +midterm +exam +march +midterm +exam +march +solution +midterm +exam +march +midterm +exam +october +solution +midterm +exam +october +sample +final +exam +final +exam +solution +final +exam +mathematical +programming +home +page +courses +relevant +sites +searchable +bibliographic +database +items +links +various +sites +page +updated +periodically +semester diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..da6d0054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,87 @@ +fall +introduction +programming +languages +compilersspring +story +month +october +schedule +lecture +tuth +comp +stat +recitation +psychology +instructor +susan +horwitz +office +telephone +mail +horwitz +wisc +office +hours +tuesday +friday +appointment +teaching +assistant +rahul +kapoor +office +telephone +mail +rahul +wisc +office +hours +monday +wednesday +appointment +texts +reserve +wendt +library +compilers +principles +techniques +tools +sethi +ullman +crafting +compiler +fischer +leblanc +check +regularly +general +course +information +course +overview +dates +information +assignments +exams +grading +including +late +policy +getting +started +readings +programming +assignments +homeworks +examinations +lecture +notes +useful +programming +tools +grades +email +links +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..56c4cdbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,1005 @@ +introduction +operating +systems +fall +introduction +operating +systemssection +fall +instructormarvin +solomon +office +computer +sciencesoffice +hours +troffice +phone +email +address +solomon +wisc +tarob +mellencamp +office +computer +sciencesoffice +hours +mwfoffice +phone +email +address +mellen +wisc +news +watch +space +latest +updates +answers +midterm +exam +summary +scores +available +detailed +breakdown +grade +distributionis +also +available +specification +forproject +avaiable +date +project +moved +thursday +typographical +errors +notes +deadlock +avoidance +corrected +importantly +arraywas +called +places +others +called +places +popular +demand +midterm +exam +isavailable +look +warning +take +example +large +grain +salt +exam +long +time +courseused +different +text +covered +topics +different +order +semester +midterm +likelybe +quite +different +time +place +midterm +exam +determined +room +comp +october +specification +forproject +avaiable +discussion +issues +presentedin +class +available +summary +grades +project +available +electronic +hand +directions +forprogram +posted +procedure +givefork +algorithm +theproject +specificationshould +contain +call +notify +page +corrected +show +sept +fixed +bugs +theproject +specification +minor +important +first +caused +introduction +paragraph +slightly +garbled +thanks +jake +dawley +carr +pointing +second +line +omitted +sample +code +algorithm +theprogramming +detailssection +create +threadscheduler +start +threadscheduler +sched +threadscheduler +sched +start +specified +correctly +later +section +threadscheduler +details +section +page +fixed +thanks +liping +zhang +sept +test +data +files +project +available +directory +public +srccontains +three +data +files +java +class +reading +file +public +graph +javacontains +definition +classgraphdescribed +project +specification +file +public +petersoncyclic +graphcontains +peterson +graph +shown +project +specification +mentioned +initial +placement +forks +notacyclic +file +public +petersonacyclic +graphcontains +peterson +graph +acyclic +initial +placement +forks +file +public +star +graphcontains +star +topology +central +philosopher +sharingforks +nine +others +sept +todd +jenner +pointed +typos +specification +project +forks +numbers +read +forks +numbers +maxthink +replaced +maxeat +online +versionhas +corrected +thanks +todd +sept +mistake +thejava +tutorial +notes +section +strings +argument +version +string +substring +second +argumentis +offset +substring +number +charactersin +string +notes +corrected +thanks +franco +tung +chan +pointing +sept +occasionally +send +urgent +messages +directly +mailing +listof +students +registered +course +archive +messages +sent +list +receiving +messages +think +sendmail +solomon +wisc +specification +forproject +avaiable +received +requests +makefiles +java +sample +makefile +public +makefile +copy +file +working +directory +java +source +files +remember +separate +directory +project +edit +described +comments +type +maketo +compile +program +make +classes +compilewithout +running +sept +notes +handing +assignment +simulating +preemptive +multitasking +solaris +computershave +added +sept +java +tutorialis +finished +finished +going +section +onthreads +find +helpful +hints +structure +project +aware +weekly +seminar +operating +systemsand +networking +meeting +mondays +first +seminar +semester +monday +checkthe +colloquia +seminars +page +details +sept +java +book +finally +available +theuniversity +bookstore +sept +beginnings +ajava +tutorial +available +sept +java +department +unix +workstations +must +create +afile +named +cshrc +local +home +directory +containing +oneline +path +path +java +make +change +take +effect +either +type +source +cshrc +localor +simply +back +sept +specification +project +ready +sept +unix +orientation +sessions +unix +users +scheduled +forthe +following +times +tues +thurs +sept +room +csmon +thurs +sept +room +cslast +updated +contents +news +summary +lecture +information +text +projects +grading +course +schedule +lecture +notes +summary +intended +general +introduction +techniques +usedto +implement +operating +systems +related +kinds +systems +software +among +topics +covered +beprocess +management +creation +synchronization +communication +processor +scheduling +deadlock +prevention +avoidance +recovery +main +memory +management +virtual +memory +management +swapping +paging +segmentation +page +replacementalgorithms +control +disks +input +output +devices +file +system +structure +implementation +protection +security +lecture +information +lecture +tuesday +thursday +computer +sciencesand +statisticsdiscussion +wednesday +psychologythe +discussion +section +optional +least +important +lectures +primary +focus +wednesday +meetings +topics +related +theprojects +including +introduction +thejavaprogramming +language +time +also +available +answering +anyquestions +regarding +points +raised +lectures +thetext +text +required +modern +operating +systemsby +andrew +tanenbaum +prentice +hall +strongly +recommended +java +programming +languageby +arnold +james +gosling +addison +wesley +online +references +lots +additional +helpful +materials +java +available +following +references +collected +locally +fast +access +java +tutorialthe +java +language +specificationjava +documentationwatch +spot +additional +links +projects +five +programming +projects +thejavaprogramming +language +sparcstation +workstations +running +solaris +dialect +unixoperating +system +provided +anycomputer +access +implements +java +programminglanguage +however +computer +sciences +department +computers +responsible +transferring +requireddata +sets +software +packages +computer +first +assignment +easy +acquainted +exercise +designedto +help +become +familiar +computing +environment +thejava +language +subsequent +projects +involveprocess +synchronization +processor +scheduling +disk +scheduling +file +system +implementation +first +project +students +required +work +pairs +members +pair +receive +grade +project +feel +free +discuss +projects +anyone +butyou +must +share +code +anyone +partner +cheating +vigorously +punished +enough +said +assignments +beginning +class +dateindicated +entire +semester +havethree +late +daysof +credit +late +days +different +assignments +eachof +three +assignments +three +days +assignment +late +days +used +last +assignment +java +students +taking +course +familiar +java +choose +java +several +arguments +favor +java +congenial +programming +environment +runtime +errors +subscripts +null +pointers +uninitialized +variables +cause +exceptions +caught +language +runtimerather +mysterious +crashes +random +behavior +java +strings +much +easier +char +arrays +garbage +collected +storage +management +extremely +handy +java +trendy +java +caught +faster +language +history +many +reasons +java +growing +popularity +little +withthe +course +discuss +issues +class +byproduct +coursewill +knowledge +java +becoming +quite +marketable +commodity +java +operating +system +features +built +particular +first +widely +used +programming +language +withlanguage +level +support +concurrency +threads +synchronization +monitors +hand +switching +programming +language +alwaysa +dislocating +fortunately +excellent +resources +available +ease +thetransition +java +programming +languageby +arnold +gosling +amazingly +good +neither +introductory +programming +primer +authors +assume +youalready +know +program +reference +manual +although +areference +manualis +available +online +readable +introduction +language +takes +wayfrom +getting +started +everything +need +write +quite +sophisticatedprograms +java +book +available +university +bookstore +strongly +encouraged +also +gathered +variety +ofother +resources +together +including +niceonline +tutorialabout +java +programming +anda +reference +manualfor +standard +class +libraries +using +grading +midterm +final +exam +count +grade +midterm +evening +wednesday +october +room +computer +sciences +statistics +final +scheduled +timetable +tuesday +december +first +programming +project +getting +started +count +yourgrade +remaining +four +projects +count +course +schedule +following +schedule +tentative +updated +later +semester +check +back +frequently +sept +introduction +chapter +sept +processes +synchronization +processor +scheduling +chapters +sept +project +learning +javaoct +project +synchronizationoct +memory +management +virtual +memory +chapter +project +schedulingoct +midterm +exam +room +comp +devices +file +systems +chapters +project +disk +schedulingdec +protection +security +sections +project +file +systemsdec +final +exam +lecture +notes +introduction +history +bottom +view +view +course +outline +java +programmers +processes +synchronization +using +processes +process +processes +creating +processes +process +states +synchronization +race +conditions +semaphores +bounded +buffer +problem +dining +philosophers +monitors +messages +deadlock +terminology +deadlock +detection +deadlock +recovery +deadlock +prevention +deadlock +avoidance +implementing +processes +implementing +monitors +implementing +semaphores +implementing +critical +sections +short +term +scheduling +memory +management +allocating +main +memory +algorithms +memory +management +compaction +garbage +collection +swapping +paging +disks +come +solomon +wisc +eduthu +copyright +marvin +solomon +rights +reserved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..7a8c660f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,107 @@ +fall +home +pagecs +introduction +operating +systemsfall +tuesdays +thursdays +discussion +fridays +hosts +maryvernon +instructor +andkarunamuthiah +welcome +home +page +note +thursday +lecture +friday +discussion +beinterchanged +following +dates +solutions +quiz +assignment +office +hours +email +textbook +readings +grading +projects +quizzes +mail +archiveapproximate +schedule +topicsweek +oftopicsreadingsep +introduction +concurrency +threads +address +spaces +processeschapter +thread +management +cooperating +threadschapter +synchronization +implementing +mutual +exclusioncont +semaphorescont +monitors +concurrency +summarycont +doct +deadlock +process +schedulingchapter +memory +management +protection +address +translation +caching +tlbschapter +demand +paged +virtual +memorycont +review +survey +systemschapter +file +systems +naming +directorieschapters +protection +java +objects +core +methodstbanov +java +threads +security +thanksgiving +class +networks +distributed +systems +remote +procedure +call +chapter +distributed +file +systems +global +memory +systems +reviewchapter +vernon +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..c7cbb948 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,128 @@ +lecture +home +page +fall +fall +course +information +instructor +office +hours +office +hours +handouts +assignments +exams +grades +instructor +david +wood +office +hours +tuesday +wednesday +appointment +mail +david +wisc +phone +class +time +tuesday +thursday +location +phil +atkinson +office +hours +tuesday +thursday +appointment +mail +atkinson +wisc +phone +getting +started +helpful +information +handouts +course +description +getting +started +mentor +error +checking +correction +sample +vhdl +code +compiling +simulating +vhdl +mentorassignments +assignment +answer +question +assignment +assignment +selected +answers +assignment +assignment +selected +answers +assignment +assignment +selected +answers +assignment +assignment +assignment +projectthis +section +includes +information +course +project +project +description +project +deadlines +project +reports +project +demonstration +times +december +examsthe +midterm +exam +wednesday +room +final +exam +tuesday +room +exams +previous +spring +midterm +fall +midterm +spring +midterm +spring +midterm +spring +midterm +solution +fall +midterm +solution +spring +midterm +solution +spring +endterm diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..bae341b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,144 @@ +database +management +systems +design +implementationcs +database +management +systems +design +implementationcourse +information +postscript +version +class +friday +instead +office +hours +time +assignment +changed +friday +assignment +text +last +updated +assignment +handout +postscript +class +mailing +list +solutions +chapters +exercises +please +dont +print +solutions +chapters +exercises +postscript +first +information +overview +prerequisites +office +hours +topics +covered +grading +important +dates +important +policy +issues +minibase +home +page +check +details +assignment +assignments +assignment +handout +postscript +assignment +html +last +updated +assignment +handout +postscript +assignment +html +last +updated +assignment +handout +postscript +last +year +midterm +sample +postscript +last +year +midterm +postscript +using +sybase +info +sybase +info +help +yahoo +entry +resources +tutorial +info +tree +debugger +info +tree +language +construction +assignment +handout +graded +experience +assignmentother +handouts +coding +conventions +instructor +raghu +ramakrishnan +office +phone +mail +raghu +office +hours +lecture +discussion +lecture +time +place +ingraham +teaching +assistants +xuemei +office +phone +mail +xbao +office +hours +tues +thur +last +modified +sept +xbao diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..0379ba3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,67 @@ +lecture +home +page +welcome +home +page +page +obviously +construction +semester +progresses +addinginformation +need +know +page +important +thing +know +class +meetingroom +changed +currently +meeting +russell +labsfor +lectures +optional +discussion +friday +beenmoved +still +psychology +instructor +jeff +naughton +office +wednesday +lecture +discussion +lecture +time +place +russell +labs +discussion +optional +time +place +psychology +information +lecture +taught +close +cooperation +lecture +fact +assignments +probably +exams +information +general +minibase +assignments +particular +please +lecture +homepage diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..0e232d03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,58 @@ +introduction +algorithms +introduction +algorithms +course +information +instructor +eric +bach +office +phone +mail +bach +wisc +hours +appt +teaching +assistant +bill +donaldson +office +phone +mail +wisc +hours +teaching +assistant +raji +gopalakrishnan +office +phone +mail +raji +wisc +hours +midterm +exam +course +handouts +course +description +syllabus +books +reserve +course +organization +homework +homework +homework +solutions +homeworks +homework +graph +fractal +behaviour +homework +mail +archive diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..5a021297 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,190 @@ +computer +networking +courses +professor +landweber +computer +networking +courses +introduction +computer +networks +advanced +computer +networksintroduction +computer +networks +table +contents +international +connectivity +networking +courses +offered +course +information +instructor +teaching +assistants +course +syllabus +mail +archives +assignments +programming +references +selected +readingsclick +hereto +latest +text +version +networkingcourses +madisoncourse +informationlecturetime +mwfplace +comp +statclass +email +listinstructor +lawrence +landweberoffice +comp +statphone +email +wisc +eduoffice +hours +teaching +assistant +srinivasa +narayananoffice +phone +email +wisc +eduoffice +hours +monday +wednesday +times +convenient +feel +free +email +wisc +appointment +teaching +assistant +teitelbaumoffice +phone +naemail +wisc +eduoffice +hours +tuesday +thursday +times +convenient +feel +free +email +wisc +appointment +fall +course +syllabus +fall +mail +archive +moderated +mail +archive +complete +assignments +fall +programming +assignment +error +warning +codes +class +project +implementation +network +layer +reliable +adaptation +layer +handout +postscript +project +overview +slides +postscript +powerpoint +software +engineering +slides +postscript +powerpoint +design +document +evaluation +form +postscript +html +version +pictures +project +slides +document +grading +criteria +gradingmidterm +exam +final +exam +assignment +term +project +prior +midterms +fall +midterm +fall +midterm +optional +reference +book +project +unix +network +programming +stevens +richard +prentice +hall +isbn +programming +references +socket +interface +socket +interface +lecture +garbler +package +annotated +bibliographyreadings +partial +icmp +ospf +ipng +advanced +computer +networks +lecture +schedule +spring +review +form diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..e7df3ba0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,30 @@ +home +page +fall +fall +instructor +robert +meyer +wisc +time +place +comp +office +hours +course +description +homework +homework +solution +notes +homework +homework +solution +notes +computing +project +part +computing +project +part +optional diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..03931e10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +programming +theory +applicationsfall +schedule +lecture +course +mail +list +wisc +course +http +wisc +instructor +michael +ferris +office +telephone +mail +ferris +wisc +office +hours +monday +tuesday +wednesday +teaching +assistant +office +telephone +mail +wisc +office +hours +wednesday +thursday +class +text +nonlinear +programming +olvi +mangasarian +siam +publishers +philadelphia +useful +texts +nonlinear +programming +theory +algorithms +bazaraa +sherali +shetty +second +edition +wiley +york +nonlinear +programming +bertsekas +athena +scientific +general +course +information +course +overview +introduction +linear +inequalities +theorem +alternative +convex +sets +convex +concave +functions +saddlepoint +optimality +criteria +without +differentiability +differentiable +convex +concave +functions +first +order +optimality +criteria +differentiability +second +order +optimality +criteria +differentiability +duality +nonlinear +programming +generalizations +convex +functions +optimality +conditions +exact +penalty +augmented +lagrangians +gradient +projection +books +reserve +kurt +wendt +library +assignments +grading +homework +assignments +grade +assignment +week +midterm +examination +november +grade +final +examination +grade +homework +assignments +homework +homework +homework +homework +mathematical +programming +home +page +courses +page +updated +periodically +semester diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..03931e10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +programming +theory +applicationsfall +schedule +lecture +course +mail +list +wisc +course +http +wisc +instructor +michael +ferris +office +telephone +mail +ferris +wisc +office +hours +monday +tuesday +wednesday +teaching +assistant +office +telephone +mail +wisc +office +hours +wednesday +thursday +class +text +nonlinear +programming +olvi +mangasarian +siam +publishers +philadelphia +useful +texts +nonlinear +programming +theory +algorithms +bazaraa +sherali +shetty +second +edition +wiley +york +nonlinear +programming +bertsekas +athena +scientific +general +course +information +course +overview +introduction +linear +inequalities +theorem +alternative +convex +sets +convex +concave +functions +saddlepoint +optimality +criteria +without +differentiability +differentiable +convex +concave +functions +first +order +optimality +criteria +differentiability +second +order +optimality +criteria +differentiability +duality +nonlinear +programming +generalizations +convex +functions +optimality +conditions +exact +penalty +augmented +lagrangians +gradient +projection +books +reserve +kurt +wendt +library +assignments +grading +homework +assignments +grade +assignment +week +midterm +examination +november +grade +final +examination +grade +homework +assignments +homework +homework +homework +homework +mathematical +programming +home +page +courses +page +updated +periodically +semester diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..bd303b51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,86 @@ +home +page +computer +system +performance +evaluation +modeling +news +sept +assignment +postscript +text +sept +mimic +library +available +public +mimic +course +information +lecture +computer +science +devise +software +home +page +html +user +manual +postscript +please +print +file +contains +many +images +take +least +half +hour +initialization +instructions +text +mimic +software +tutorial +html +postscript +online +help +html +qnet +example +devc +html +professor +miron +livny +office +computer +sciences +hours +phone +mail +miron +wisc +teaching +assistant +chee +yong +chan +office +computer +sciences +hours +phone +mail +cychan +wisc +suggestion +comment +please +send +cychan +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..36b42cf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,91 @@ +finding +uwisc +finding +information +retrieval +technologies +seeking +knowledgerichard +belewvisiting +professorcs +lecture +univ +wisconsin +computer +science +departmentfall +thurs +acall +room +engrthis +course +designed +students +interested +understanding +aboutthe +information +retrieval +knowledge +representation +machinelearning +techniques +underlying +much +exciting +activity +occurring +onthe +world +wide +complete +description +courese +cansee +abstract +asyllabus +major +topics +considered +graphical +mapof +theseare +related +anda +tentative +schedule +semesterwill +proceed +course +resources +readings +overview +part +postscript +overview +part +postscript +political +infidelity +image +postscript +assignments +class +email +digested +hypermai +suggestions +composing +email +classrelated +resources +class +minutes +taken +students +students +last +modified +belew +wisc +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..926fcfc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,195 @@ +introduction +numerical +methods +last +change +introduction +numerical +methodsthis +page +contains +information +fall +course +smile +tentative +syllabus +ordered +scores +orderly +minds +additional +material +changes +note +cooperation +assignmentson +assignments +page +ordered +total +scores +midterm +changes +assignment +note +changed +date +well +slightly +changed +points +problems +diary +class +additional +material +residual +error +condition +changes +rick +carl +office +hour +list +errata +text +changes +diary +class +diary +class +changes +useful +email +concerning +problem +updated +since +question +computational +complexity +numericalanalysis +algorithms +posted +preprint +foremostmathematicians +today +subject +interested +trickytopic +least +squares +solution +approximation +time +place +also +textmay +supplemented +byadditional +material +files +mentioned +book +areavailable +organized +chapter +aware +though +site +mentioned +book +well +names +begin +capitalletter +sometimes +name +book +begins +lower +caseletter +matlab +diaries +class +sessions +present +plans +computing +assignments +matlab +rather +fortran +kermit +sigmon +matlab +primer +edition +available +doit +look +handout +student +reaction +student +edition +matlab +accessing +matlab +telnet +accessing +matlab +telnet +winor +machine +course +overviewcourse +syllabus +tentative +assignments +also +answers +list +words +grades +look +last +four +digits +student +email +concerning +current +questions +conduct +unix +orientation +sessions +users +andp +related +linksyou +might +wish +explore +csdepartment +home +page +computer +systems +frequently +asked +questions +list +simple +tutorial +available +well +advanced +referenceviva +also +good +introduction +unix diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..97552630 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,61 @@ +numerical +functional +analysis +last +change +numerical +functional +analysisthis +page +contains +information +fall +version +math +current +class +notes +available +following +directory +well +hard +copy +doit +classes +recent +announcement +posted +grades +time +location +statlecturer +carl +boor +email +deboor +wisc +office +hours +stat +line +classnotes +viii +index +assignments +none +email +concerning +homework +course +related +questions +related +linksyou +might +wish +explore +department +home +page +courseofferings diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..fc37a3d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,15 @@ +approximation +theory +last +change +approximation +theorythis +page +contains +information +spring +version +math +course +notes +course diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..782b0a03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,93 @@ +home +page +tony +silva +sectioncsm +instructor +tony +silva +contact +email +dsilva +wisc +office +computer +sciences +statistics +phone +office +hours +appointment +textbookproblem +solving +object +programming +walter +savitch +section +information +sept +comp +stat +firstday +noland +specified +timetable +sept +chamberlin +computer +computer +science +statistics +course +information +handout +tentative +syllabus +semester +late +policy +grading +criteria +academic +misconduct +handouts +important +software +introduction +microsoft +windowshints +windows +compilersthe +windows +operating +systememailmosaicnetscape +information +introduction +borland +languagethe +savitch +text +assignments +program +tuesday +program +tuesday +program +tuesday +program +thursday +program +thursday +solution +quizzes +solution +quiz +solution +quiz +last +modified +anthony +silva +dsilva +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..a2e22a23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,853 @@ +computer +visioncs +computer +visionfall +instructor +chuck +dyeroffice +csstelephone +email +dyer +wisc +eduoffice +hours +mondays +thursdays +appointmentteaching +assistant +bryan +sooffice +csstelephone +email +wisc +eduoffice +hours +wednesdays +fridays +appointmentstudents +general +course +informationfundamentals +computer +vision +first +introduction +level +image +analysis +methods +including +image +formation +edge +detection +featuredetection +segmentation +principles +defining +modules +forreconstructing +three +dimensional +scene +information +usingtechniques +asshape +shading +depth +stereo +active +methods +scene +recovery +depth +focus +andoccluding +contour +detection +viewpoint +control +motion +detection +analysis +including +tracking +model +based +three +dimensional +object +recognition +schedule +tuesdays +thursdays +prerequisites +fundamentals +calculus +probability +theory +linear +algebra +grading +midterm +exam +thursday +november +homework +assignments +project +class +participation +syllabus +required +readings +selected +parts +machine +vision +jain +kasturi +schunck +mcgraw +hill +york +collection +readingsfrom +journals +conference +proceedings +sold +doit +documentation +small +batchessupplementary +reading +sourcesonline +informationmost +course +information +available +online +urlhttp +wisc +dyer +html +reading +assignments +date +chapters +paper +doit +chapter +chapter +chapters +papers +doit +available +handout +chapter +except +chapter +papers +doit +available +handout +papers +doit +available +handout +chapters +chapter +primarily +student +scores +grades +homework +assignmentshomework +image +enhancement +histogram +modification +optional +make +copy +portrait +image +public +images +contrast +enhance +face +byfirst +rotating +image +cropping +window +around +head +shoulders +finally +interactively +adjusting +theintensity +modification +function +color +editor +window +thewindows +button +also +free +modify +things +colorif +wish +found +good +grayscale +transformationsave +result +color +image +andput +directory +wherethe +original +image +send +email +telling +qualitatively +whatintensity +transformation +applied +improves +qualityof +image +overall +image +photo +board +students +class +feel +free +image +ownweb +home +page +well +homework +skeletons +october +learn +getting +started +vista +vision +software +read +introduction +vista +programming +manual +available +doit +documentation +corrections +original +assignment +method +change +condition +least +instead +least +prevent +types +shapes +disappearing +altogether +method +condition +also +count +transition +case +method +matrix +city +block +distance +infinity +large +constant +four +corners +method +matrix +chessboard +distance +center +position +test +images +used +vision +images +doit +vision +images +hand +evaluate +thinning +results +might +want +following +additional +experiment +using +output +least +tests +convert +skeleton +image +ubyte +format +using +vconvert +edit +need +emacs +clean +header +image +file +contains +following +lines +right +repn +ubyte +line +component_interp +gradient +low_threshold +high_threshold +vlink +file +vsegedges +using +results +image +hand +example +well +approach +might +used +determine +direction +index +finger +pointing +application +note +thinning +method +fact +delete +entirely +shapes +block +surrounded +disappear +thinning +algorithm +based +papers +zhang +suen +fast +parallel +algorithm +thinning +digital +patterns +comm +wang +comment +fast +parallel +algorithm +thinning +digital +patterns +comm +comparison +student +results +different +methods +applied +pointing +hand +image +rotated +version +test +image +homework +image +splining +mosaics +october +read +papers +adelson +pyramid +methods +image +processing +engineer +burt +adelson +laplacian +pyramid +compact +image +code +ieee +trans +comm +burt +adelson +multiresolution +spline +application +image +mosaics +trans +graphics +hints +faqs +splined +images +produced +students +class +homework +segmentation +snakes +november +read +papers +kass +witkin +terzopoulos +snakes +active +contour +models +computer +vision +williams +shah +fast +algorithm +active +contours +curvature +estimation +computer +vision +graphics +image +processing +image +understanding +hints +faqs +homework +project +december +student +project +titles +abstracts +supplementary +readings +additional +papers +might +help +select +topic +student +projects +done +stanford +vision +course +computer +accounts +accounts +course +accounts +sparcstations +called +rooms +account +large +disk +space +quota +store +images +homeworks +project +sure +delete +images +compress +others +gzip +however +order +save +space +email +email +sent +list +goes +everyone +class +including +instructor +printers +print +images +laserprinters +laser +laser +located +room +alternatively +generic +printer +name +laser +send +output +four +printers +shortest +queue +caution +sending +images +printer +sure +check +queue +jobs +printed +manners +send +images +printed +take +long +print +considerate +vision +software +vista +vista +programming +environment +used +homework +assignments +code +located +directory +vision +tools +vista +pages +vision +tools +vista +executables +vision +tools +vista +interactive +image +display +program +window +system +useful +displaying +images +variety +formats +imgstar +basic +image +processing +operations +invoked +using +unix +like +command +lines +code +executables +manual +vision +tools +imgstar +khoros +khoros +image +processing +software +development +environment +provides +basic +image +processing +modules +graphical +programming +language +interface +rapid +prototyping +simple +image +processing +algorithms +code +located +directory +vision +tools +khoros +vision +tools +khoros +cantata +executable +starts +interactive +environment +netpbm +toolkit +conversion +images +large +variety +different +formats +based +pbmplus +package +pages +vision +tools +executables +vision +tools +matlab +matlab +numeric +computation +visualization +environment +signal +processing +image +processing +toolboxes +especially +relevant +test +images +test +images +directory +vision +images +although +require +format +conversion +used +images +public +images +numerous +image +databases +also +accessible +example +collection +test +images +examination +examination +solution +exam +held +thursday +november +regular +classroom +note +early +starting +time +exam +cover +topics +shape +shading +including +readings +textbook +papers +sold +doit +homework +assignments +bring +exam +sheet +paper +notes +want +sides +exam +focus +main +ideas +algorithms +proofs +exams +types +questions +asked +exams +exam +spring +exam +spring +exam +spring +exam +spring +links +interest +computer +vision +home +page +highly +recommended +chuck +dyer +links +interest +wandell +list +useful +numbers +vision +science +hdtv +grand +alliance +hdtv +system +specification +advanced +television +systems +committee +atsc +atsc +documents +postscript +spie +optical +science +engineering +library +vision +demos +projects +apple +quicktime +image +mosaicing +product +panoramix +image +mosaicing +example +panoramic +image +mosaicing +decface +talking +synthetic +face +video +rate +stereo +machine +virtualized +reality +project +qbic +image +database +project +miscellaneous +computer +vision +demos +computer +vision +related +courses +boston +university +cardiff +university +khoros +digital +image +processing +online +course +royal +institute +sweden +stanford +university +university +virginia +university +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..ae5d2eb1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,77 @@ +fall +section +fall +section +algebraic +language +programming +instructor +dave +zimmermannemail +dzimm +wisc +educlass +meeting +time +place +nolandoffice +office +phone +office +hours +announcementsprogram +wednesday +october +program +ready +friday +november +general +course +informationcs +home +pagecourse +objectivesvectra +labcs +consultantssyllabusworking +homeclass +handoutsprogramsexams +quizzeslecture +notesgreg +sharp +style +guidegrades +referenced +last +digits +number +quizzesprogramsexams +policy +informationemail +policygrading +policylate +policyacademic +misconduct +policytext +problem +solving +object +programming +walter +savitchaddison +wesley +publishing +company +list +known +erratalast +modified +dave +zimmermann +dzimm +wisc +based +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..0a2778d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,42 @@ +linear +programming +methods +linear +programming +methods +general +course +information +course +offered +fall +spring +semester +pages +various +instructors +michael +ferris +spring +mangasarian +fall +graduate +courses +wisconsin +network +flows +integer +programming +nonlinear +programming +theory +nonlinear +programming +algorithms +computational +large +sparse +systems +last +modified +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..8067ee1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,197 @@ +spring +network +flowsspring +schedule +lecture +course +mail +list +wisc +class +friday +february +instructor +michael +ferris +office +telephone +mail +ferris +wisc +office +hours +mondays +wednesdays +teaching +assistant +office +telephone +mail +leey +wisc +office +hours +tuesdays +thursdays +required +text +network +flows +ravindra +ahuja +thomas +magnanti +james +orlin +prentice +hall +useful +texts +linear +programming +chvatal +freeman +linear +network +optimization +bertsekas +press +general +course +information +course +overview +paths +trees +cycles +data +structures +shortest +paths +flow +cost +network +simplex +method +generalized +flows +convex +equilibria +lagrangian +relaxation +multicommodity +flows +applications +prerequisite +knowledge +linear +programming +grading +homework +assignments +grade +project +assignment +grade +wednesday +class +final +examination +grade +monday +closed +book +except +sheets +paper +allowed +representative +questions +assignments +homework +friday +march +homework +friday +march +homework +friday +march +homework +friday +march +homework +monday +april +homework +monday +april +homework +friday +april +homework +friday +computing +information +unix +orientation +sessions +first +time +unix +users +novice +unix +users +previously +used +unix +workstations +held +monday +thursday +first +week +classes +room +monday +thursday +second +week +classes +room +orientation +sessions +last +minutes +introduction +unix +login +access +gams +public +cshrc +local +cshrc +local +source +cshrc +local +alters +path +sets +gams +directory +appropriate +solaris +machines +course +machines +mathematical +programming +home +page +courses +page +updated +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..9c61fb70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,207 @@ +spring +also +math +computational +methods +large +sparse +systemsspring +schedule +lecture +course +mail +list +wisc +class +friday +february +instructor +michael +ferris +office +telephone +mail +ferris +wisc +office +hours +mondays +wednesdays +teaching +assistant +office +telephone +mail +leey +wisc +office +hours +tuesdays +thursdays +recommended +textbooks +matrix +computations +golub +loan +johns +hopkinsuniversity +press +second +edition +direct +methods +sparse +matrices +duff +erisman +reid +oxford +science +publications +finite +dimensional +vector +spaces +halmos +springer +verlag +general +course +information +course +overview +introduction +storage +schemes +gaussian +elimination +dense +error +analysis +sparse +local +pivotal +strategies +matrix +modifications +iterative +linear +solvers +sparse +least +squares +sparse +nonlinear +equations +optimization +applications +parallel +techniques +eigenvalue +eigenvectors +prerequisite +math +consent +instructor +grading +homework +assignments +grade +project +assignment +grade +wednesday +class +final +examination +grade +monday +closed +book +except +sheets +paper +allowed +representative +questions +assignments +homework +monday +february +homework +friday +march +homework +friday +march +homework +friday +march +homework +friday +march +homework +monday +april +homework +monday +april +homework +friday +april +homework +friday +handouts +ieee +arithmetic +handout +sparse +handout +computing +information +course +machines +unix +orientation +sessions +first +time +unix +users +novice +unix +users +previously +used +unix +workstations +held +monday +thursday +first +week +classes +room +monday +thursday +second +week +classes +room +orientation +sessions +last +minutes +introduction +unix +instructions +matlab +mathematical +programming +home +page +courses +page +updated +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..4f3c64fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,82 @@ +fall +construction +compilersfall +schedule +lecture +tuth +csst +instructor +charles +fischer +office +telephone +mail +fischer +wisc +office +hours +mondays +wednesdays +fridays +appointment +teaching +assistant +krishna +kunchithapadam +office +telephone +mail +krisna +wisc +office +hours +tuesdays +thursdays +appointment +programming +assignments +homeworks +readings +teaching +assistant +weyers +office +telephone +mail +weyers +wisc +office +hours +mondays +wednesdays +fridays +class +text +crafting +compiler +charles +fischer +richard +leblanc +benjamin +cummings +check +regularly +general +course +information +course +overview +dates +grades +examinations +getting +started +handouts +lecture +notes +useful +programming +tools +links +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..e6461904 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,67 @@ +fall +sections +fall +sections +algebraic +language +programming +instructor +greg +sharpemail +greg +wisc +eduoffice +office +phone +office +hours +appt +grader +krishna +kunchithapadamemail +krisna +wisc +edugeneral +course +informationcs +home +pagecourse +objectivesvectra +labcs +consultantssyllabuscourse +difficultyworking +homenewsstartup +informationclass +noteshomeworkexams +quizzesstyle +guideemail +archivepolicy +informationemail +policygrading +policylate +policyacademic +misconduct +policy +must +read +textproblem +solving +object +porgrammingwalter +savitchaddison +wesley +publishing +company +please +list +known +erratalast +modified +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..f41ea1ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,245 @@ +class +home +page +fall +algebraic +language +programming +section +fall +chad +lane +wisc +announcements +program +line +tues +important +need +version +tribble +compile +problem +enumerated +types +recognized +outside +class +must +declared +inside +public +section +class +result +private +section +needed +bumped +private +section +bottom +shown +version +also +note +definition +randomintinrange +defined +line +need +corresponding +function +body +tribble +call +within +class +work +file +prog +forgot +change +enumerated +type +values +uppercase +need +compile +everything +changed +line +copy +prog +want +overwrite +obsolete +copy +also +used +include +prog +changed +prog +might +also +want +check +help +link +something +added +comments +suggestions +program +important +name +throughout +program +project +parts +name +project +directory +name +file +names +updated +program +description +consistent +prog +version +used +prog +please +make +sure +consistent +naming +discrepency +sample +chris +weaver +public +directory +called +prog +shouldn +matter +program +sample +program +line +ready +crucial +read +entire +assignment +understand +class +basics +attempt +early +start +hard +require +time +piece +everything +together +bring +questions +class +tuesday +midterm +grades +freshmen +either +means +fine +means +great +thumbs +grade +means +nothing +freshman +disregard +stuff +class +tentative +semester +syllabus +reading +assignments +programming +assignments +handouts +preparing +quizzes +tests +quizzes +tests +solutionscourse +information +policies +text +problem +solving +object +programming +walter +savitch +addison +wesley +publishing +company +meet +vleck +policies +administrative +information +grading +policy +late +policy +mail +information +attendance +policy +academic +misconduct +links +information +introduction +microsoft +windows +first +introduction +borland +second +home +page +vectra +source +code +text +consultants +extra +reference +material +many +questions +answered +working +home +sharp +lecture +notes +sharp +style +guidelast +modified +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..da6d0054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,87 @@ +fall +introduction +programming +languages +compilersspring +story +month +october +schedule +lecture +tuth +comp +stat +recitation +psychology +instructor +susan +horwitz +office +telephone +mail +horwitz +wisc +office +hours +tuesday +friday +appointment +teaching +assistant +rahul +kapoor +office +telephone +mail +rahul +wisc +office +hours +monday +wednesday +appointment +texts +reserve +wendt +library +compilers +principles +techniques +tools +sethi +ullman +crafting +compiler +fischer +leblanc +check +regularly +general +course +information +course +overview +dates +information +assignments +exams +grading +including +late +policy +getting +started +readings +programming +assignments +homeworks +examinations +lecture +notes +useful +programming +tools +grades +email +links +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..dcf62544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,70 @@ +home +page +hummert +sectionscs +instructor +hummert +contact +email +hummert +wisc +office +computer +sciences +statistics +office +phone +home +phone +office +hours +monday +thursday +announcements +textbookproblem +solving +object +programming +walter +savitch +section +information +psych +psych +grades +computer +computer +science +statistics +course +information +handout +tentative +syllabus +semester +late +policy +grading +criteria +academic +misconduct +viewgraphs +important +software +introduction +microsoft +windowshints +windows +compilersthe +windows +operating +systememailmosaicnetscape +information +introduction +borland +languagethe +savitch +text +assignments +program +program diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..3f35e4d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,53 @@ +infocs +info +section +name +bodnersections +office +hours +monday +thursday +office +number +computer +sciences +statistics +hall +doit +phone +mail +jonb +wisc +eduhere +things +keep +mind +need +copy +guide +sections +click +choose +print +file +menu +click +questions +please +stop +office +hours +send +mail +grades +section +available +clicking +bodner +jonb +wisc +mound +madison +last +modified +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..5564aa53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,224 @@ +intro +kunen +section +introduction +artificial +intelligence +notice +information +spring +details +coursewill +appear +later +instructor +kunenoffice +stats +buildingtelephone +email +kunen +wisc +eduoffice +hours +appointment +email +grades +four +programming +assignments +counting +three +exams +counting +thirdexam +scheduled +time +place +final +programs +turned +time +midnight +theday +late +assignments +loose +late +topics +covered +following +order +topics +entirely +logical +butis +designed +topic +programming +assignmentis +discussed +program +introduction +lisp +program +searching +game +playing +program +introduction +prolog +natural +language +understanding +program +learning +neural +networks +program +logical +deduction +planning +reasoning +uncertain +knowledge +lisp +information +since +lisp +used +programming +coursewill +begin +discussion +common +lisp +would +probably +usefulto +lisp +reference +available +supplement +lecturesand +line +help +available +within +lisp +manypaperbacks +available +probably +like +common +lispcraft +wilensky +another +possibility +ansi +common +lisp +book +graham +code +used +book +line +ultimate +lisp +reference +steele +common +lisp +language +edition +pages +also +available +line +click +information +using +common +lisp +suns +additional +information +textbook +artificial +intelligence +modern +approach +russell +norvig +class +time +recitation +sessions +engr +psych +essentiallly +material +presented +answer +questions +give +hints +programming +assignments +review +exams +usually +last +minutes +since +teaching +sections +attend +recitation +section +different +lecture +section +course +directory +course +kunen +public +alpha +beta +problem +previous +exam +course +directory +alpha_beta +click +line +best +first +search +problem +previous +exam +course +directory +astar +click +line +exams +fall +postscript +exam +exam +exam +final +still +older +exams +course +directory +last +changed +november +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..eead007b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,168 @@ +fall +advanced +computer +architecture +ifall +offering +course +information +instructor +mark +hilloffice +comp +statemail +markhill +wisc +eduoffice +hours +tuesday +friday +appointment +shenoffice +comp +statphone +email +mshen +wisc +eduoffice +hours +monday +thursday +appointment +table +contents +reader +lecture +notes +homeworks +project +miscellaneawhat +giving +talksreader +reader +table +contents +full +papers +doit +reader +table +contents +full +papers +doit +reader +table +contents +full +papers +doit +reader +table +contents +full +papers +doit +lecture +notes +introduction +chapter +performance +cost +chapter +instruction +sets +chapter +caches +chapter +part +memory +chapter +part +talluri +hill +basic +pipelining +chapter +part +basic +pipelining +chapter +part +instruction +level +parallelism +chapter +part +instruction +level +parallelism +chapter +part +input +output +chapter +interconnects +chapter +notes +parallel +processing +chapter +homeworks +homework +assignment +solution +homework +assignment +solution +homework +assignment +solution +homework +assignment +solution +homework +assignment +solutionproject +assignment +proposals +november +class +talks +december +class +report +december +noonmiscellanea +giving +talks +spring +final +spring +project +assignment +spring +midterm +using +first +edition +hennessy +patterson +architecture +qualifying +exams +source +hard +questions +computer +architecture +seminar +wisconsin +computer +architecture +group +world +wide +computer +architecture +information diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..da26515a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,105 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +office +comp +phone +office +home +office +hours +office +appointment +assignments +program +program +administrative +information +text +problem +solving +object +programming +walter +savitch +room +time +psychology +tuesday +thursday +comp +vectra +syllabus +computation +grades +grading +standards +late +assignments +policy +handin +procedures +cheating +academic +misconduct +consultants +examples +string +class +using +dynamic +allocation +using +dynamic +allocation +rational +class +example +using +operator +overloading +complex +class +represents +floating +point +complex +numbers +another +example +operator +overloading +intstack +class +simple +example +unlimited +size +data +structure +classinfo +example +using +structs +using +classes +links +home +page +home +page +introduction +microsoft +windows +introduction +borland +tutorial +using +debugger +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..34ca4a48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,165 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +office +comp +phone +office +home +office +hours +office +appointment +announcements +test +cases +program +available +dice +code +discussed +class +class +ranking +last +four +digits +student +number +past +exams +online +spring +fall +spring +notes +hangman +assignment +program +exam +monday +october +comp +room +lecture +instructions +formatting +syllabus +first +eight +weeks +available +second +eight +weeks +coming +soon +instructions +printing +program +output +computer +outside +late +policy +finalized +room +change +meet +comp +assignments +program +program +program +program +program +program +program +administrative +information +text +problem +solving +object +programming +walter +savitch +room +time +comp +tuesday +thursday +comp +vectra +syllabus +computation +grades +grading +standards +late +assignments +policy +handin +procedures +cheating +academic +misconduct +consultants +examples +string +class +using +dynamic +allocation +using +dynamic +allocation +rational +class +example +using +operator +overloading +complex +class +represents +floating +point +complex +numbers +another +example +operator +overloading +intstack +class +simple +example +unlimited +size +data +structure +classinfo +example +using +structs +using +classes +links +home +page +introduction +microsoft +windows +introduction +borland +tutorial +using +debugger +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..dba15ec2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,107 @@ +sections +dave +melskithese +pages +change +frequently +press +reload +button +daily +getting +started +already +stuff +assignments +page +uncomfortable +computer +andthe +software +page +helpful +links +info +instructor +david +melski +office +computer +science +statistics +floor +phone +office +hours +email +melski +wisc +click +attachments +please +section +info +section +meets +noland +section +meets +psychology +text +problem +solving +object +programming +walter +savitch +addison +wesleypublishing +general +stuff +using +using +windows +usingborland +reference +material +rough +syllabus +sections +email +archive +section +email +archive +section +policy +info +academic +misconduct +must +read +rule +thumb +share +code +consultants +help +grading +late +work +email +checked +often +essential +links +assignments +solutions +handouts +list +tutors +available +last +modified +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..0b5e60d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,301 @@ +home +page +section +fall +section +algebraic +language +programming +instructor +milo +martin +milo +wisc +time +location +psychologyinstructor +milo +martin +email +milo +wisc +eduoffice +office +hours +tuesday +thursday +appointment +office +phone +announcementsthis +page +changes +frequently +responsibility +check +page +often +november +quiz +take +home +given +class +today +placed +onthe +homepage +november +added +file +using +projects +program +page +homepage +november +room +test +scheduledfor +wednesday +november +october +updated +current +grades +haseverything +quiz +please +check +make +sure +isaccurate +october +added +links +couple +pages +withinformation +html +language +used +pages +linksare +section +titled +documents +october +program +available +onfriday +november +take +home +quiz +monday +november +reminder +exam +wednesday +november +general +class +information +current +class +grades +general +information +sheet +turning +assignments +syllabus +code +style +guide +home +page +vectra +consultants +fall +consulting +schedule +academic +misconduct +policyclass +documents +final +bankaccount +class +code +bankaccount +bankaccount +main +postscript +bankaccount +class +code +bankaccount +postscript +bankaccount +struct +code +bankaccount +postscript +minmax +example +code +findthe +minimum +maximum +list +numbers +case +enteredfrom +stdin +form +code +creates +formletters +data +specified +files +uses +file +theopen_file +function +introduced +class +call +reference +class +example +functions +user +input +withprompts +call +reference +version +functions +user +input +withprompts +call +value +version +documents +beginner +guide +html +standard +introduction +tothe +html +language +html +reference +guide +reference +guide +latest +html +standard +release +good +times +virus +hoax +code +ethics +andprofessional +conductassignments +class +survey +questionare +required +monday +september +program +wednesday +september +program +wednesday +september +program +wednesday +september +program +friday +october +program +monday +october +program +wednesday +october +program +friday +november +program +program +program +wednesday +december +quizs +quiz +solutions +scores +monday +september +quiz +solutions +scores +wednesday +october +quiz +solutions +scores +monday +october +quiz +solutions +scores +take +home +quiz +monday +november +exams +exam +solutions +scores +wednesday +october +exam +solutions +scores +wednesday +november +exam +solutions +scores +tuesday +december +textbook +problem +solving +object +programming +walter +savitchaddison +wesley +publishing +company +list +known +errata +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..93ed51e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,140 @@ +home +page +martin +reames +classcs +sections +algebraic +language +programmingspring +martin +reames +teaching +assistant +absolutely +nothingeveryday +informationcs +class +information +pagescommon +programmingmistakesarchive +section +section +class +mailing +lists +messages +sent +lists +semester +calendar +program +tuesday +january +program +tuesday +february +program +tuesday +february +program +thursday +february +program +thursday +march +exam +tuesday +march +program +tuesday +march +program +thursday +april +program +tuesday +april +exam +tuesday +april +program +thursday +april +program +thursday +final +exam +thursday +placeto +announcedcourse +details +contact +email +mreames +wisc +office +computer +sciences +statistics +dayton +phone +office +hours +appointment +talk +class +send +email +textbookproblem +solving +object +programming +walter +savitch +section +information +section +noland +section +noland +computer +csst +containing +vectra +running +windows +andborland +additional +course +information +tentative +syllabus +semester +extra +material +late +policy +grading +criteria +academicmisconduct +rule +thumb +share +code +assigned +work +anyform +former +students +made +bigtodd +thielwendy +staatsabout +instructor +last +modified +martin +reames +mreames +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..19848ef0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,130 @@ +fall +section +fall +section +algebraic +language +programming +nolandinstructor +mike +steeleemail +msteele +wisc +eduoffice +comp +stat +buildingoffice +hours +times +appointment +soffice +phone +important +announcementsi +extended +deadline +program +please +check +mail +read +thenew +program +deadline +informationmidterm +tuesday +november +comp +stat +current +grades +line +includes +grades +everything +handed +tuesday +november +sample +programs +taken +examples +pastfew +weeks +class +filled +stuff +glossed +makefully +functioning +programs +find +useful +ifyou +missed +even +didn +understand +example +notes +examples +page +near +bottom +remember +check +mail +clarifications +programmingassignments +general +course +informationcs +home +pagecourse +objectivesabout +vectra +labcs +consultantscourse +syllabus +reading +assignmentsnotes +working +homeclass +handoutsprogramming +assignmentsexams +quizzessome +notes +examplespolicy +informationemail +policygrading +policy +late +policy +academic +misconduct +policyuseful +reference +pagesintroduction +microsoft +windowsintroduction +borland +greg +sharp +styleguide +codetextproblem +solving +object +programming +walter +savitchaddison +wesley +publishing +company +list +known +erratalast +modified +mike +steele +msteele +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..b17e9c9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,192 @@ +fall +session +infocs +fall +sessionalgebraic +language +programming +instructor +andrew +prockemail +prock +wisc +eduoffice +computer +science +statistics +office +phone +office +hours +thgrader +haihong +wangemail +wisc +eduoffice +computer +science +statistics +office +phone +consulting +hours +mtwrannouncements +grades +link +please +check +grade +verify +scores +modified +crazy +office +hours +today +today +made +minor +modification +crazy +file +assignment +copy +already +gotton +assignment +five +online +exam +results +range +added +grading +policy +assignment +sample +quizes +tests +online +general +perusal +tests +quizes +another +think +give +good +idea +level +knowledge +need +remember +topics +tests +tests +online +webpages +seem +done +email +notice +errors +questions +make +sure +check +assingment +assignment +online +well +like +work +ahead +final +tuesday +december +mark +calendar +everyone +required +take +final +check +links +page +feel +things +located +important +carefully +read +policies +administrative +information +welcome +class +class +information +text +problem +solving +walter +savitch +room +computer +sciences +statistics +time +section +section +tentative +syllabus +section +grades +section +grades +grading +policy +late +policy +mail +policy +academic +misconductcourse +materials +general +course +info +style +guide +lecture +notes +assignments +email +archive +section +email +archive +section +info +introduction +microsoft +windows +first +introduction +borland +second +home +page +vectra +source +code +text +consultants +extra +reference +material +many +questions +answered +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..ce45d701 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,115 @@ +kelly +page +kelly +ratliff +current +grades +keyword +search +mail +messages +example +enter +functions +without +quotes +every +paragraph +used +word +functions +also +wildcard +links +mail +messages +sent +semester +info +info +info +info +info +info +info +info +info +additional +information +interest +backups +copying +disks +filesvirus +information +world +wide +faqfun +stuff +usenet +oracle +resource +index +virtual +tourist +world +mapthe +space +shuttle +clickable +badger +herald +site +comicshumor +abort +retry +ignore +nine +types +usersfinals +weeklab +jokesgetting +software +computer +home +might +interested +tryingsome +shareware +freeware +software +available +internet +archives +programs +usually +compressed +need +somecompression +archiving +software +unpack +need +reviewsome +commands +trying +biggest +best +maintained +archives +simtel +mincluding +links +simtel +files +posted +usenet +simtel +site +infocompression +infofavorite +sites +clickhere +visit +desautels +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..3209c9dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,94 @@ +home +page +russ +manning +cscs +introduction +computer +programming +instructor +russell +manningemail +rmanning +wisc +eduoffice +room +computer +scienceoffice +hours +find +basement +saturday +except +home +football +games +sunday +come +keep +company +work +like +vectra +although +students +priority +grades +lecture +finally +click +textbook +problem +solving +object +programming +walter +savitch +section +information +semester +university +rotc +building +computer +room +computer +science +statistics +assignments +program +monday +november +program +wednesday +november +program +program +program +program +course +information +handout +syllabus +late +policy +academic +misconduct +important +software +introduction +microsoft +windowshints +windows +compilersthe +windows +operating +systememailmosaicnetscape +information +introduction +borland +languagethe +savitch +textold +quizzes +none diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..b7eb9471 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,143 @@ +introduction +artificial +intelligence +introduction +artificial +intelligence +general +course +information +course +offered +fall +spring +semester +academic +year +section +thefall +spring +topics +covered +principles +knowledge +based +search +techniques +best +first +search +alpha +beta +search +knowledge +representation +using +predicate +logic +semantic +networks +connectionist +networks +frames +rules +automated +deduction +applications +problem +solving +planning +expert +systems +game +playing +vision +natural +language +understanding +learning +robotics +programming +include +lisp +possibly +prolog +previous +knowledge +languages +assumedprerequisite +pages +various +instructors +chuck +dyer +spring +fall +kunen +fall +spring +jude +shavlik +fall +sabbatical +spring +bryan +spring +local +related +links +madison +seminar +qualifying +exam +recent +table +contents +abstracts +journals +mostly +wendt +library +readable +wisc +wisc +group +wisc +computer +vision +group +wisc +machine +learning +group +wisc +robotics +group +wisc +computational +biology +includes +wisc +dept +graduate +courses +wisconsin +machine +learning +deduction +problem +solving +computer +vision +robot +motion +planning +external +related +links +last +modified +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..c2e0c435 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,484 @@ +machine +learning +machine +learning +spring +general +course +information +instructor +jude +shavlik +stats +shavlik +wisc +office +monday +wednesday +teaching +assistant +geoff +weinberg +highway +labs +basement +building +geoffrey +wisc +office +monday +wednesday +office +phone +home +home +course +overview +postscript +course +syllabus +postscript +archive +class +email +readable +wisc +suggested +class +projects +postscript +reading +assignments +assigned +read +chapter +theory +refinement +chapter +computational +learning +theory +mitchell +textbook +feedback +author +assigned +april +read +learning +logical +definitions +relations +quinlan +knowledge +based +artificial +neural +networks +towell +shavlik +assigned +april +read +chunking +soar +laird +rosenbloom +newell +learning +knowledge +level +dietterich +assigned +april +read +chapter +analytical +learning +mitchell +textbook +feedback +author +april +assigned +april +read +journal +articles +unsupervised +learning +fisher +rumelhart +zipser +cogsci +lenat +assigned +april +read +chapter +genetic +algorithms +mitchell +textbook +feedback +author +april +assigned +march +read +chapter +reinforcement +learning +mitchell +textbook +feedback +author +april +assigned +march +read +backpropagation +basic +theory +rumelhart +assigned +february +read +chapter +neural +networks +mitchell +textbook +feedback +author +march +also +read +journal +article +shavlik +mooney +towell +empirically +compares +backprop +assigned +february +read +chapter +concept +space +mitchell +textbook +feedback +author +february +assigned +january +read +machine +learning +experimental +science +kibler +feedback +author +february +assigned +january +read +chapter +introduction +mitchell +textbook +feedback +author +january +assignments +journal +article +towell +shavlik +kbann +algorithm +monday +journal +article +fisher +cobweb +algorithm +wednesday +april +journal +article +shavlik +mooney +towell +empirically +compares +backprop +wednesday +march +sure +answer +sheet +paper +best +idea +next +summarize +assigned +paper +sentence +summary +lead +instead +analyze +late +policy +brrs +handed +material +covered +lecture +homework +assignments +homework +learning +reinforcements +learning +wednesday +april +homework +training +neural +networks +monday +march +homework +experimental +methodology +monday +february +homework +inducing +decision +trees +monday +february +homework +creating +personal +concept +monday +january +late +policy +start +class +student +five +free +late +days +semester +exhausted +penalty +measured +noon +noon +weekends +free +make +tractable +accepted +week +late +previously +used +homeworks +postscript +homeworks +spring +migrate +semester +progresses +homework +inducing +decision +trees +homework +heuristically +searching +concept +space +homework +training +neural +networks +homework +learning +reinforcements +learning +homework +version +space +postscript +homework +explanation +based +learning +postscript +homework +cobweb +postscript +previous +exams +postscript +spring +spring +spring +spring +spring +spring +spring +spring +ineedagoodicon +related +links +machine +learning +journal +line +page +nips +papers +premier +neural +conference +recent +table +contents +abstracts +selected +journals +mostly +wendt +library +readable +wisc +irvine +dataset +archive +pointers +courses +knowledge +discovery +databases +neural +network +resources +stuff +machine +learning +benchmarking +ieee +neural +networks +council +several +journals +connected +page +international +society +adaptive +behavior +bibliography +server +austrian +institute +neural +networks +bibliography +server +austrian +institute +resources +canadian +server +links +people +external +references +help +programming +assignments +page +using +akcl +common +lisp +departmental +workstations +tips +using +emacs +lisp +code +writing +frequently +asked +questions +lisp +course +computers +tips +using +akcl +debugger +help +lisp +novices +lisp +frequently +asked +questions +steele +common +lisp +language +edition +reference +manual +textbook +printing +printers +print +pages +related +local +links +wisc +group +wisc +math +programming +group +wisc +comp +biology +includes +wisc +group +wisc +dept +wisc +library +local +links +last +modified +jude +shavlikshavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..beef7c8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,358 @@ +fall +advanced +operating +systemsfall +marvin +solomon +office +computer +sciencesoffice +hours +troffice +phone +email +address +solomon +wisc +news +watch +space +latest +updates +last +updated +schedule +project +presentations +listed +final +exam +monday +inroom +computer +sciences +statistics +building +project +presentations +room +friday +noon +example +past +midterm +examtogether +sample +answers +midterm +exam +wednesday +october +pmin +room +computer +sciences +final +exam +monday +december +exact +time +place +bedetermined +project +suggestions +informationabout +project +avaiable +readable +version +figure +multics +memory +management +paper +available +contents +news +summary +lecture +information +text +course +schedule +grading +project +project +presentations +summary +course +intended +give +broad +exposure +advancedoperating +systems +topics +assume +students +good +semester +course +onoperating +systems +equivalent +cover +topics +normally +presented +course +inconsiderably +detail +synchronization +interprocess +communication +memory +management +file +systems +protection +security +distributed +systems +lecture +information +lecture +tuesday +thursday +computer +sciencestextthere +really +satisfactory +textbook +graduate +level +operatingsystems +class +usea +selection +classic +papersas +text +course +structured +around +readings +journal +articles +andconference +proceedings +purchase +readings +doit +formerly +macc +documentation +deskfor +readings +semester +similar +identical +thoseof +previous +semesters +used +copy +make +available +individual +papers +youto +copy +class +discuss +topics +relevant +current +papers +click +herefor +tentative +schedule +lecture +detail +detail +review +papers +willinstead +adiscussion +major +topics +themes +using +papers +focal +point +active +participation +discussion +strongly +encouraged +willing +participate +actively +daily +class +geta +expect +quietly +listen +weeks +much +lessout +class +gradingthere +exams +midterm +final +project +worth +total +grade +exams +designed +verify +carefully +thoroughly +read +readings +projectyou +required +complete +term +project +list +suggested +topics +provided +strongly +encouraged +make +project +projects +involve +implementation +tools +experimental +implementationsof +algorithms +suggested +research +literature +measurement +studies +simulations +projects +must +experimental +component +literature +surveys +unvalidated +design +papers +sufficient +projects +done +person +groups +larger +smaller +groups +approved +case +basis +write +term +paper +summarizing +results +project +paper +must +meet +standards +research +publication +graded +quality +writing +well +content +also +make +ashort +presentationabout +project +class +project +presentationshere +schedule +project +presenations +presentations +room +computer +sciences +statistics +times +approximate +manyan +stubbs +andrew +biggs +francis +salmon +gunawan +agus +qingmin +wang +chien +pang +james +chen +eric +larsen +conroy +fritz +craig +jordan +prasad +deshpande +avinash +sodani +basney +rajesh +raman +biswadeep +chen +taxiao +wang +yanming +xinyu +richard +zhang +todd +munson +wenjun +xinyi +wang +yufei +zeyu +chen +sridhar +gopal +michael +leesolomon +wisc +eduthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..2e23a039 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +chiang +time +gradesgo +homepage +homepage diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..d44d22f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,167 @@ +sections +overall +structure +program +primarily +exercise +general +problem +solving +write +fortran +code +though +want +time +solution +algorithm +even +dependent +particular +programming +language +fortran +solution +class +following +monday +exercises +computer +labyou +using +vectra +computer +science +statistics +containing +hewlett +packard +vectra +running +microsoft +windows +microsoft +fortran +open +seven +days +week +except +certain +holidays +printer +room +located +across +hall +quota +pages +print +exceed +quota +must +contact +either +mail +going +room +office +hours +prefer +mail +increase +quota +beware +machines +vectra +aren +configured +correctly +particular +machines +along +wall +closest +outsidehallway +towards +left +hand +part +room +avoid +also +home +dorm +computers +write +programs +however +probably +purchase +copy +microsoft +fortran +lahey +personal +fortran +inside +cover +textbook +also +work +computer +labs +campus +however +fortran +compilers +please +first +using +software +using +includes +microsoft +windows +microsoft +fortran +mail +netscape +pointers +interest +home +page +jeff +lampert +home +page +computer +sciences +department +home +page +starting +points +internet +exploration +lycos +search +world +wide +keyword +dilbert +comic +relief +long +nights +assignment +copyright +copy +modified +gareth +bestor +bestor +wisc +last +modified +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..7c1b93da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,55 @@ +homepagecs +homepagewelcome +homepage +purpose +homepage +provide +students +information +pertaining +sections +since +page +changes +frequently +responsibilityto +check +page +often +general +informationinstructor +todd +munsonemail +tmunson +wisc +eduoffice +computer +science +statisticsoffice +phone +office +hours +appointmentsections +textbook +problem +solving +walter +savitchclass +informationexpectationssyllabusexam +schedulee +mailgradinglate +assignmentsextra +creditpoliciesconsultant +responsibilitiesacademic +misconductother +informationdaily +notes +assignmentshomework +assignmentsprograms +documents +used +classother +programs +resourcescs +homepagetmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..05945105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,92 @@ +section +computer +science +section +time +place +nolandinstructor +todd +turnidgeoffice +hours +tbalab +hours +tbaannouncementsclass +notes +classes +handout +structs +available +program +available +getting +started +useful +information +read +getting +started +general +information +text +facilities +grading +policies +syllabus +tentative +syllabus +course +assignments +text +reading +programming +assignments +solutions +handouts +collection +class +handouts +date +class +mailing +list +information +send +messages +classas +whole +course +home +page +home +page +sections +muchinformation +general +interest +including +information +tutors +consultants +windows +operating +system +email +netscape +sections +find +information +provided +byother +instructors +helpful +example +handout +gregorysharp +course +difficulty +last +modified +todd +turnidge +turnidge +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..bdf5956c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,120 @@ +sections +sections +algebraic +language +programming +announcements +exam +thursday +psych +reload +page +every +time +login +instructor +chris +weaver +email +weaver +wisc +email +policy +office +computer +science +statistics +office +phone +office +hours +appointment +office +hours +first +weeks +grader +zhang +email +wisc +office +computer +science +statistics +office +phone +location +section +noland +section +noland +computer +vectra +computer +science +statistics +hours +seven +days +week +staffed +consultants +general +course +info +syllabus +text +problem +solving +object +programming +walter +savitch +addison +wesley +isbn +includes +errata +source +code +text +misconduct +policyassignments +grading +handouts +programming +assignments +homework +reading +assignments +lecture +notes +handouts +example +programs +exam +quiz +keys +late +policy +grading +policy +style +guidelines +still +rough +printing +paper +statement +chris +weaver +computer +sciences +department +university +wisconsin +madison +last +change +chris +weaver diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..89e0931b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,185 @@ +spring +advanced +computer +architecture +spring +offering +course +information +instructor +prof +james +smith +office +engineering +hall +office +hours +tues +thurs +office +phone +email +wisc +princeoffice +engineering +hall +office +hours +office +phone +mail +address +princed +wisc +table +contents +news +readings +lecture +notes +homeworks +project +miscellaneousnews +homework +solns +special +office +hours +final +exam +biochemistry +pmreadings +readings +table +contents +full +papers +doit +readings +table +contents +full +papers +doit +readings +table +contents +full +papers +doit +readings +table +contents +full +papers +doit +lecture +notes +course +overview +introduction +computer +architecture +performance +cost +instruction +sets +pipelining +advanced +pipelining +part +advanced +pipelining +part +vectors +vliw +limits +software +cache +memories +advanced +caches +advanced +caches +main +memory +main +memory +systems +disk +arrays +disk +arrays +interconnect +technology +interconnect +technology +networks +networks +multiprocessors +part +multiprocessors +part +multiprocessors +part +multiprocessors +part +homeworks +homework +assignment +homework +solution +homework +assignment +homework +solution +homework +assignment +note +homework +homework +solution +homework +assignment +homework +solution +homework +assignment +homework +solution +project +project +assignment +trace +information +project +list +miscellaneous +useful +tools +projects +review +midterm +midterm +exam +specmarks +considered +harmful +analysis +pipeline +clocking +detailed +design +reservation +station +lecture +network +routing +lecture +network +routing +cache +summary +final +exam diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..df0df1c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,409 @@ +main +pagecomputer +science +computer +graphics +page +evolving +incomplete +hopefully +useful +beginnings +home +page +forcs +deals +computer +graphics +scientific +visualization +atthe +level +computer +graphics +principles +practice +foley +computergraphics +alan +watt +currently +taught +bruceland +also +project +leader +visualization +group +atthe +cornell +theory +center +contents +course +summary +administrivia +textbook +errors +homework +staff +schedule +relevant +math +cornell +math +graphics +courses +course +summary +computer +graphics +topic +requires +mathematical +programming +artistic +skills +among +others +content +computer +graphics +fundamentals +course +cornell +focuses +mathematical +skills +associated +course +programming +course +designedto +help +illuminate +math +course +covers +following +topics +year +construction +surfaces +explicit +polygon +lists +parametric +operations +quadric +surfaces +figures +rotation +swept +surfaces +tensor +product +surfaces +parametric +surface +viewer +implicit +surfaces +quadric +surfaces +blobby +models +operations +surface +tesselation +computing +surface +normal +hierarchical +grouping +simple +objects +form +complex +surfaces +scene +composition +animation +introduction +homogeneous +coordinates +geometric +transforms +building +objects +kinematic +animation +hierarchical +modeling +combining +prototype +objects +mimic +connected +rigid +parts +inverse +kinematics +dynamical +systems +animation +differential +equations +cellular +automata +viewing +group +objects +camera +transforms +clipping +view +volume +projection +onto +screen +parallel +projection +perspective +projection +camera +simulator +transform +clip +project +stereo +vision +rendering +shading +lighting +human +color +vision +color +device +limitations +light +geometric +optics +waves +gourand +phong +shading +hidden +surface +removal +buffer +transparency +shadows +scan +conversion +anti +aliasing +polygons +pixels +human +vernier +hyper +acuity +image +space +methods +object +space +methods +surface +property +modifications +texture +mapping +bump +mapping +volume +textures +modeling +scientific +data +scientific +visualization +aspects +scientific +data +dimensionality +fields +scalar +vector +fields +objects +walls +channel +scalar +fields +contour +lines +surfaces +colors +color +misperception +volume +rendering +vector +fields +difficulties +arrows +field +lines +particle +advection +multiparameter +high +dimensional +data +dynamic +systems +administrivia +errors +textbook +computer +graphics +alan +watt +homework +assignments +homework +homework +homework +homework +homework +march +homework +march +homework +march +homework +april +homework +april +homework +course +schedule +prelim +first +test +spring +serve +general +guide +test +style +also +list +scheduled +prelims +cornell +spring +break +prelim +religious +holiday +students +educationlaw +mandates +faculty +make +available +opportunity +tomake +examination +missed +religious +beliefs +inorder +facilitate +preparation +makeup +exams +students +intendingto +absent +order +observe +holiday +requestedto +notify +instructor +last +lecture +final +scheduled +exam +period +tuesday +upson +final +mean +standard +deviation +staff +bruce +land +rhodes +bruce +cornell +jing +huang +upson +huang +upson +justin +mccune +upson +jmccune +csrelevant +math +cornell +university +math +graphics +courses +university +california +davis +university +waterloo +university +wales +college +cardiff +university +manchester +oregon +state +universityrelated +topics +final +project +animations +visualization +cornell +theory +center +comments +theory +center +online +documents +welcome +sent +todoc +comments +cornell +last +modified +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..0c5a0104 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,133 @@ +main +pagecomputer +science +computer +graphics +laboratory +exercisesthis +site +cornell +universityundergraduate +computer +graphics +laboratory +pages +contain +materials +including +procedures +software +student +results +section +deals +computergraphics +scientific +visualization +level +computergraphics +principles +practice +foley +dcomputer +graphics +watt +currently +taught +bruceland +also +project +leader +visualization +group +atthe +cornell +theory +center +pages +first +place +siguccs +basededucation +training +materials +competition +exercises +getting +started +building +polygon +objects +parametric +surfaces +transformations +modeling +using +virtual +camera +camera +perspective +transforms +lighting +texture +bump +mapping +modeling +scientific +visualization +design +project +physics +based +animation +implicit +surfaces +procedural +textures +exercises +done +order +note +exercises +marked +current +marked +areincluded +reference +current +exercises +chat +facility +communication +aboutcs +related +topics +spring +semesteraccess +restricted +enrolled +students +related +topics +final +project +animations +visualization +cornell +theory +center +comments +theory +center +online +documents +welcome +sent +todoc +comments +cornell +last +modified +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..64209cae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,75 @@ +cornell +fall +topics +computer +graphics +fall +semestereach +group +students +chose +current +research +topic +computergraphics +read +appropriate +papers +implemented +code +group +delivered +lecture +chosen +topic +anddocumented +work +document +student +topics +metaball +modeler +windows +michael +arcuri +alex +benton +modeling +human +facial +expressions +huang +hung +content +based +image +retrieval +systems +interior +design +sean +landis +interdependent +particle +systems +justin +mccune +visualization +diffusely +distributed +pollutants +using +spatially +explicit +landscape +modelsfu +tsai +antialiasing +videos +images +using +stochastic +sampling +arun +vermache +hsun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..d701faa7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,75 @@ +yong +homepageto +yong +homepageyou +number +visitor +since +yong +beijing +china +currently +program +computer +sciences +university +texas +austin +settled +stayed +mathematics +graduate +program +rutgers +university +brunswick +year +jersey +beautiful +place +wife +tsinghua +university +beijing +china +milanitalian +soccerking +soccernba +sitefox +sportschicago +bullsmichael +jordannflnhlcs +rankingmarried +childrenseinfeldcomputer +sciencesutilitieshtml +convertersimage +collectionssystemshtmllatexcgitcl +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat +tmiscinternet +travel +network +united +parcel +service +united +states +postal +service +usps +fedexused +guidefun +todayus +newsstarwavesupermodels +contact +river +street +austin +finger +yonglu +utexas +page +heavy +construction +last +modified +yong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..cdadc391 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,217 @@ +david +young +david +youngashbel +smith +professor +professor +computer +sciences +professor +ofmathematics +director +center +numerical +analysis +webb +institute +naval +architecture +mathematics +mathematics +harvard +university +honors +awards +professional +service +fellow +american +association +advancement +science +award +outstanding +contribution +computer +science +journal +linear +algebra +applications +special +issuededication +chair +applied +mathematics +committee +american +mathematicalsociety +board +trustees +argonne +universities +association +associate +editor +siam +journal +matrices +matrixapplications +areas +interestnumerical +analysis +partial +differential +equations +numericallinear +algebra +summary +researchmy +research +activity +focuses +numerical +solution +partialdifferential +equations +based +finite +difference +methodsand +iterative +methods +solve +associated +systems +oflinear +algebraic +equations +involving +matrices +large +andsparse +several +computer +software +packages +developed +basedon +research +part +itpack +project +research +beingextended +include +methods +suitable +shared +memory +distributedmemory +parallel +computers +rapidly +convergent +iterative +methodsbased +parallel +multilevel +procedures +also +beingdeveloped +selected +recent +publicationsd +young +kincaid +linear +stationary +second +degree +methods +solution +large +linear +systems +topics +polynomials +several +variables +applications +rassias +srivasiava +yanushauska +world +scientific +publ +company +singapore +young +vona +rational +iterative +methods +solving +large +sparse +linear +systems +applied +numerical +mathematics +young +search +omega +iterative +methods +large +linear +systems +kincaid +academic +press +young +carey +kincaid +sepehrnoori +vector +parallel +iterative +solutions +large +sparse +systems +pdes +science +engineering +cray +computers +minneapolis +cray +research +young +search +high +level +parallelism +iterative +sparse +linear +systems +solvers +parallel +supercomputing +methods +algorithms +applications +graham +carey +john +wiley +sons +previous +profile +index +next +profile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..f05f4341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,240 @@ +yoonsuck +choe +yoonsuck +choe +photo +added +student +dept +computer +sciences +university +texas +austin +computer +science +university +texas +austin +austin +december +computer +science +yonsei +university +seoul +korea +august +welcome +homepage +visitor +since +research +interested +modeling +cortical +structures +neural +networks +currently +working +hand +written +digit +recognition +systembased +lateraly +interconnected +synergetically +self +organizing +feature +lissom +developed +joseph +sirosh +prof +risto +miikkulainen +utcs +neural +networks +research +group +recent +work +includes +extending +lissom +model +actualspiking +events +model +called +spiking +lissom +slissom +beself +organized +lissom +segment +multiple +objects +retinaby +synchronizing +spikes +within +group +representing +object +desynchronizing +spikes +groups +representing +different +objects +research +outline +also +check +html +book +related +research +editing +prof +miikkulainen +joseph +sirosh +lateral +interactions +cortex +structure +function +commented +publications +related +publications +research +group +utcs +group +publications +page +yoonsuck +choe +risto +miikkulainen +self +organization +segmentation +laterally +connected +spiking +neurons +technical +report +department +computer +sciences +university +texas +austin +september +joseph +sirosh +risto +miikkulainen +yoonsuck +choe +lateral +interactions +cortex +structure +function +electronic +book +isbn +yoonsuck +choe +joseph +sirosh +risto +miikkulainen +laterally +interconnected +self +organizing +maps +hand +written +digit +recognition +appear +touretzky +mozer +hasselmo +editors +advances +neural +information +processing +systems +cambridge +press +yoonsuck +choe +laterally +interconnected +self +organizing +feature +handwritten +digit +recognition +techical +report +department +computer +sciences +university +texas +austin +august +masters +thesis +bunch +links +totally +unordered +click +find +interestingcontact +information +office +phone +email +yschoe +utexas +mailing +address +university +texas +austin +department +computer +sciences +austin +page +maintained +yoonsuck +choe +yschoe +utexas +last +updated +utcs +home +home +newsgroup +summary diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..f3886f4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,87 @@ +yuanjing +xuwinter +break +updated +addresspictures +family +department +computer +sciences +university +texas +austin +austin +texas +spring +time +table +selected +course +schedulecs +object +oriented +software +engr +browne +logic +synthesis +aziz +client +server +systems +development +gang +previous +semesters +china +pagechinese +students +association +austin +china +alumni +page +worked +studied +universityof +science +technology +china +hefei +china +institute +mathematics +chinese +academy +sciences +beijing +chinauniversity +munich +atmunich +germanyuniversity +manchester +manchester +prof +nick +higham +family +wang +lifan +hong +chen +guizhongustc +yuan +hailiang +yang +yuhongfriends +linsoftware +programming +java +java +perl +common +gateway +interface +links +yahoo +publisher diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..68404d75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +feng +feng +visitor +number +since +yufeng +utexas +edufinger +public +ring +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..29dd6199 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,86 @@ +zhiying +chen +home +pagezhiying +chenabout +mefrom +guangzhou +canton +people +republic +china +currently +master +program +computer +sciences +department +university +texas +austin +seeking +full +time +resume +click +resume +postcript +format +zhongshan +university +please +view +chinese +guangzhou +china +life +austin +fall +spring +calculus +fall +computer +architecture +spring +misc +china +chinese +zodiac +person +associated +different +animals +kind +animal +associated +find +page +friends +zhongshan +university +maintained +john +dong +thanks +else +world +wide +info +contact +burton +austin +zchen +utexas +page +still +construction +copyright +zhiying +chen +created +last +modified +visitor +according +counter +since diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..7f07be90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,129 @@ +maggie +xiao +zhou +maggie +xiao +zhou +home +page +education +assistantship +graduate +student +teaching +assistant +database +management +department +computer +sciencesat +university +texas +austin +buaa +beijing +china +life +austin +current +work +fall +multimedia +systems +algorithms +data +communication +networks +distributed +processes +work +spring +fall +look +around +campus +kaleidoscope +china +land +beauty +visit +world +people +daily +china +news +digest +zhai +chinese +magazine +newspage +time +magazine +magazine +entertainment +movies +stamps +computer +world +world +computer +society +ieee +computer +giants +online +career +center +company +home +pages +internet +search +yahoo +galaxy +lycos +internet +directory +universities +guide +html +scripts +online +library +contact +information +mail +zhouxiao +utexas +http +utexas +users +zhouxiao +office +main +building +room +campus +office +phone +address +campus +department +computer +sciences +taylor +university +texas +austin +austin +home +page +last +modified +sept +comments +welcome +send +email +zhouxiao +utexas diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..b978310f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,110 @@ +qing +homepage +qinghi +friend +welcome +homepage +know +graduate +student +inthe +department +computer +scinces +university +texas +austin +born +beijing +capital +city +china +bachelor +degree +peking +university +meet +ofmy +friends +former +classmates +peking +university +findmore +people +pekinguniversity +alumni +home +page +overseas +html +enjoy +living +austin +texas +peeking +following +sites +know +like +well +find +lots +valuable +informationand +professionalinternetpc +relatedmac +relatedafter +worknetscape +dynamic +document +testtwins +eldertwins +youngernetscape +dynamic +document +testanimation +dancing +titledancing +title +testanother +netscape +dynamic +document +testyet +another +netscape +dynamic +document +testfriendsthis +china +travel +site +maintained +former +classmate +xiaohai +best +friend +china +shan +shinan +clike +clike +student +visitor +number +since +october +site +construction +last +modified +qinguniversity +texas +austin +department +computer +sciencesaustin +zhuqing +utexas diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..58d45985 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,73 @@ +univ +washington +computer +science +organizationsincluding +faculty +staff +students +visitors +organizations +ouraffiliates +program +graduating +students +regionincluding +local +information +desktop +references +links +elsewhere +spotlightuwwins +pacific +regionals +international +student +programmingcontesttwovideos +highlighting +educational +initiativesourcolloquia +live +mbonemajordonation +intel +corporationdickkarp +receives +national +medal +scienceprofessionalmasters +program +application +deadline +autumn +departmentoverview +theimpact +research +university +perspective +faculty +staffpositions +available +half +century +exponential +progress +information +technology +pages +people +region +courses +laboratory +research +newscan +handle +tables +click +university +washington +seattle +voice +comments +webmaster +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..b02044e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,65 @@ +andrew +berman +home +pageandrew +berman +aberman +washington +educomputer +science +bourassa +virgil +selberg +erik +tron +process +specific +file +protection +unix +operating +system +bothpostscript +andhtml +proceedings +winter +usenix +conference +berman +andrew +data +structure +fast +approximate +matching +postscript +format +berman +andrew +shapiro +linda +efficient +image +retrieval +multiple +distance +measures +available +postscript +format +appear +spie +special +links +wife +debbie +debbie +beautiful +daughter +melanie +miscellaneous +poison +donuts +stupid +stupidmy +bookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..f33d3407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,127 @@ +adam +finkelstein +adam +finkelstein +adam +washington +department +computer +science +university +washington +seattle +washington +started +drinking +cups +coffee +hair +limp +andlacked +body +years +living +good +life +graduate +student +finally +finished +doctorate +computer +graphics +fall +quarter +post +early +join +thecomputer +science +department +atprinceton +university +software +engineer +tibco +formerly +teknekron +software +systems +palo +alto +wrote +software +people +trade +stock +undergraduate +student +swarthmore +college +class +studied +physics +occasionally +recent +research +project +finding +specific +images +alarge +database +images +since +working +something +called +multiresolution +video +photos +play +ultimate +frisbee +team +calledumatata +address +phone +number +look +plan +file +across +photocopy +photocopy +thehilarious +menu +seattle +least +visited +coffee +house +caffe +lardo +recent +chilly +night +visit +snoqualmie +pass +made +excellent +view +comet +hyakutake +great +pictures +taken +friend +marcus +cool +images +made +glass +sculpture +dithering +mona +gothic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..6c4bfc7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,291 @@ +james +ahrens +home +page +james +ahrens +home +page +address +computer +science +engineering +department +university +washington +seattle +email +ahrens +washington +phone +research +interests +visualization +parallel +distributed +computing +scientific +database +management +projects +university +washington +database +environment +vision +research +alamos +national +laboratory +visualization +project +publications +james +ahrens +charles +hansen +cost +effective +data +parallel +load +balancing +international +conference +parallel +processing +august +load +balancing +algorithms +typically +improve +program +performance +onunbalanced +datasets +degrade +performance +balanced +datasets +unnecessary +load +redistributions +occur +paper +presents +cost +effective +data +parallel +load +balancing +algorithm +whichperforms +load +redistributions +possible +savings +outweighthe +redistribution +costs +experiments +data +parallel +polygonrenderer +show +performance +improvement +factor +onunbalanced +datasets +performance +loss +percent +onbalanced +datasets +using +algorithm +linda +shapiro +steven +tanimoto +james +brinkley +james +ahrens +jakobovits +lara +lewis +visual +database +system +data +experiment +management +model +based +computer +vision +proceedings +second +based +vision +workshop +february +paper +presents +design +visual +database +system +data +experiment +management +system +designed +general +scientific +database +system +motivated +intended +model +based +computer +vision +provide +unified +data +model +highly +graphical +user +interface +advanced +query +facility +interactive +laboratory +notebook +system +aids +scientific +experimentation +promote +data +sharing +computer +vision +community +frank +ortega +charles +hansen +james +ahrens +fast +data +parallel +polygon +rendering +supercomputing +november +paper +describes +data +parallel +method +polygon +rendering +massively +parallel +machine +method +based +simple +shading +model +targeted +applications +require +fast +rendering +extremely +large +sets +polygons +sets +found +many +scientific +visualization +applications +renderer +handle +arbitrarily +complex +polygons +need +meshed +issues +involving +load +balancing +addressed +data +parallel +load +balancing +algorithm +presented +rendering +toolkit +enables +scientist +display +shaded +polygons +directly +parallel +machine +avoiding +transmission +huge +amounts +data +post +processing +rendering +system +james +ahrens +charles +hansen +cost +effective +data +parallel +load +balancing +university +ofwashington +department +computer +science +engineering +april +longer +version +icpp +paper +also +describes +fast +data +parallel +load +redistribution +algorithm +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..2f731fbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,176 @@ +database +environment +vision +research +database +environment +vision +research +overview +database +environment +vision +research +wasdesigned +general +scientific +database +system +motivated +andintended +model +based +computer +vision +provide +unifieddata +model +highly +graphical +user +interface +advanced +queryfacility +interactive +laboratory +notebook +databaseenvironment +vision +research +aids +scientific +experimentation +andpromotes +data +sharing +computer +vision +community +devr +entities +stored +hierarchical +relational +datastructures +schema +entity +contains +name +ofproperties +parts +attributed +relations +among +theparts +graphic +definitions +describe +buildinstance +specific +visualizations +studying +many +different +image +database +research +topics +including +visual +interfaces +multi +level +queries +experiment +management +people +principal +investigators +linda +shapiro +steven +tanimoto +brinkley +graduate +students +james +ahrens +jakobovits +lara +lewis +publications +linda +shapiro +steven +tanimoto +james +brinkley +james +ahrens +jakobovits +lara +lewis +visual +database +system +data +experiment +management +inmodel +based +computer +vision +proceedings +secondcad +based +vision +workshop +february +presents +overview +devr +project +lara +lewis +linda +shapiro +steven +tanimoto +flexibledata +organization +visualization +support +visual +databasesystem +spie +symposium +electronic +imaging +scienceand +technology +february +jakobovits +linda +shapiro +steven +tanimoto +implementing +multi +level +queries +database +environment +vision +research +spie +symposium +electronic +imaging +science +technology +february +email +ahrens +washington +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..e4195788 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,101 @@ +page +bernheim +washington +started +computer +science +graduate +school +university +washington +page +still +construction +graduated +williams +college +computer +science +math +front +computer +playing +ultimate +frisbee +autumn +quarter +classes +programminglanguages +automata +principles +ofdigital +systems +design +computer +graphics +seminar +parallel +programming +environments +outdoor +related +links +national +parks +home +page +gorp +guideto +outdoor +recreationfun +links +christian +scott +interactive +list +abig +pile +cool +links +blast +past +scooby +dooeducational +links +great +reference +women +undergrads +computer +science +peterson +education +center +source +information +graduate +schools +educational +opportunities +information +distributed +mentorship +project +mentorship +project +allows +women +undergraduates +spend +summerworking +research +female +mentor +great +experience +highlyrecommend +program +back +home +pagelast +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..0f5eed2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,151 @@ +amir +michail +amir +michailgraduate +studenti +second +year +graduate +student +department +computer +scienceat +university +washington +research +interests +include +followingareas +algorithm +design +software +engineering +educational +software +master +degree +universityof +toronto +master +thesis +optimal +broadcast +summationfor +hierarchical +ring +architectures +shift +click +hereto +obtain +compressed +postscript +file +recently +experimenting +ways +teach +algorithms +particular +built +opsis +java +appletdesigned +teach +balanced +binary +tree +algorithms +combinesprogramming +proof +animation +finally +lunar +lander +style +gamethat +wrote +part +undergraduate +graphics +course +quotations +computer +scientists +mathematicians +tend +conservative +many +unwillingto +consider +might +better +writing +proofs +told +mathematicians +embarrassed +learn +publishedincorrect +theorems +motivated +avoid +errors +believe +theywill +like +structured +proofs +persuaded +computer +scientists +willing +explore +unconventional +proofstyles +unfortunately +found +care +whether +theyhave +published +incorrect +results +often +seem +glad +error +wasnot +caught +referees +since +would +meant +fewer +publication +fear +computer +scientists +motivated +proof +stylethat +likely +reveal +mistakes +leslie +lamport +ways +constructing +software +design +wayis +make +simple +obviously +deficiencies +theother +make +complicated +obvious +deficiencies +hoare diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..193c7e59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,87 @@ +richard +anderson +home +page +richard +anderson +associate +professor +graduated +inmathematics +reed +college +computer +science +stanfordin +joined +university +washington +aone +year +postdoc +mathematical +science +research +institute +inberkeley +received +presidential +younginvestigator +award +spent +academic +yearas +visiting +professor +indian +institute +science +bangalore +india +richard +anderson +main +research +interests +theory +implementationof +algorithms +including +parallel +algorithms +computational +geometry +scientific +applications +work +computer +science +engineering +department +university +washington +seattle +teaching +papers +work +progress +research +projects +qualifying +evaluation +projects +travel +notes +year +visiting +theindian +institute +science +resume +travelling +tourist +project +pictures +recent +talksanderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..c42777bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,160 @@ +anhai +doan +homepageanhai +doan +page +reconstructed +please +revisit +soon +born +brought +vietnam +finishing +high +school +iwent +hungary +study +computer +science +graduated +kossuth +lajos +university +arts +andsciences +debrecen +hungary +received +also +computer +science +university +wisconsin +milwaukee +starting +fall +program +department +computer +science +andengineering +university +ofwashington +seattle +research +interests +mostly +artificial +intelligence +amcurrently +investigating +decision +making +underuncertainty +decision +theoretic +planning +markov +decision +processes +qualitative +decision +theory +academic +interests +include +reading +traveling +listening +music +mostly +jazz +blues +interesting +thing +first +name +anhai +meaning +calm +invietnamese +made +combining +last +syllable +name +ofmy +mother +birthplace +nghean +first +syllable +name +ofmy +father +birthplace +haiphong +shows +creative +folkswere +thought +birth +younger +brother +theysimply +switched +syllables +gave +namehaian +contents +research +interests +probabilistic +planning +knowledge +representation +recent +papers +research +library +curriculum +vitae +research +interests +education +employment +history +awards +honors +publications +teaching +data +structures +algorithms +taking +course +check +information +office +hours +locations +personal +interests +comtemporary +vietnamese +affairs +literature +writing +music +paintings +foreign +languages +traveling +general +purpose +library +life +snapshotsanhai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..bc6434c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,165 @@ +joel +auslander +joel +auslander +joel +auslander +hopeful +department +computer +science +engineeringat +university +washington +picture +spring +computer +animation +class +click +onit +original +last +quarter +completed +quals +project +time +code +generation +multiflow +compiler +office +sieg +hall +home +university +washington +univ +seattle +seattle +resume +written +following +papers +fast +effective +dynamic +compilation +auslander +philipose +chambers +eggers +andb +bershad +pldi +automatic +dynamic +compilation +support +event +dispatching +inextensible +systems +chambers +eggers +auslander +philipose +mock +andp +pardyak +workshop +compilersupport +systems +software +february +experience +controller +based +automatic +motion +synthesisfor +articulated +figures +auslander +fukunaga +partovi +christensen +reiss +shuman +marks +acmtransactions +graphics +pages +paper +also +site +optimal +leapfrogging +auslander +benjamin +wilkerson +mathematics +magazine +pages +lossily +compressed +mpeg +animationthat +goes +motion +synthesis +paper +last +sequence +shows +mywork +computer +piece +together +cartwheels +jump +andshuffle +animation +without +figure +fall +andcollapse +brown +figure +using +algorithm +orange +figure +isjust +trying +switch +motions +without +consideration +tosmoothness +physics +autumn +took +super +short +animation +double +speed +small +version +final +project +find +better +place +slides +thetalk +singular +value +decomposition +gave +graphics +seminar +ausland +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..dfa2b2e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,110 @@ +girl +named +brian +named +brian +boingee +boingee +boingee +brian +michalowskidepartment +computer +science +engineeringmail +stop +university +washingtonseattle +office +location +sieg +office +phone +currently +taking +ling +currently +editing +mossy +bitsthank +visiting +page +visitor +number +page +worst +viewed +using +headscapewhenever +second +year +gradstudent +actually +liberal +artist +interested +inlinguistics +confused +good +getting +know +already +ultrahotlist +favorite +sites +ofall +time +search +look +online +references +looking +forsomething +glorified +hotlist +doesn +thave +urouletteto +random +find +past +institutions +ofwhich +mental +personal +info +quote +file +favorite +songsand +poems +fictitious +thrash +band +purity +test +original +works +tokeep +touch +finger +info +mail +info +guestbook +info +pagesfrom +friends +idea +including +aslfingerspelling +snapshots +blatantly +stolen +brad +chamberlain +brian +michalowski +dept +complete +sanityeradication +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..66e8fe65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,72 @@ +paul +beamepaul +beamepaul +beame +associate +professor +thedepartment +computer +science +engineeringat +theuniversity +washington +paul +received +mathematics +computer +sciencein +computer +science +university +toronto +post +doctoral +research +associate +academicyear +joined +university +washington +autumn +received +presidentialyoung +investigator +award +paul +research +concerned +primarily +computationalcomplexity +theoretical +aspects +paralleland +distributed +computing +recent +research +concentrated +connections +computationalcomplexity +proof +theory +particular +complexity +proofs +inpropositional +proof +systems +paul +enjoys +squash +softball +sports +enthusiasm +cancompensate +lack +talent +recent +papers +quals +projects +beame +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..8149c6d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,92 @@ +david +beckerdavid +beckercontact +info +marks +spot +stuff +spin +much +time +goes +makingspina +real +operating +system +responsibility +level +code +borrow +device +model +device +drvier +support +build +environment +code +management +keeping +development +platforms +functioning +someday +getto +performance +measurement +optimization +sport +tried +bunch +ultimate +frisbee +favorite +conference +triple +jump +minnesota +athletic +conference +bethel +college +ultimate +frisbee +champions +volleyball +mens +grad +champions +team +sports +play +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam +handball +also +playracquetballgolftennis +done +bridgecampingcanoeingdisc +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwater +skiingweightliftingwhitewater +raftinghorseback +ridingmountain +bikingin +line +skating +interests +library +economic +topics +particularly +interest +free +banking +anti +trust +currency +ssto +rlvs +theology +centuray +railroads +boot +locomotive diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..c648d7b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,525 @@ +brian +bershad +brian +bershad +bershad +washington +eduwork +computer +science +engineering +department +university +washington +seattle +home +street +seattle +brian +bershad +assistant +professor +university +ofwashington +since +receiving +university +ofwashington +took +brief +respite +seattle +experiment +post +industrial +culture +northeast +returned +northwest +coffee +bershad +research +operating +systems +distributed +systems +networking +parallel +systems +architecture +work +hasappeared +tocs +sosp +asplos +isca +although +seem +asigmetrics +paper +published +save +life +besides +working +bershad +runs +plays +squash +hangs +thestairmaster +projects +include +spin +extensibleoperating +systems +mobile +computing +memory +management +operating +systems +architectures +midway +project +carnegie +mellon +parallelnetworking +scalable +rocky +thesequel +etch +binary +instrumentation +optimizationcourses +winter +looking +masters +quals +project +click +list +projects +youmight +masters +degree +quals +project +recent +papers +trace +driven +comparison +algorithms +parallel +prefetching +cachingtracy +kimbrel +andrew +tomkins +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +sigops +usenix +association +symposium +onoperating +system +design +implemenation +october +dynamic +binding +extensiblesystems +przemyslaw +pardyak +brian +bershad +osdi +sigops +usenix +association +symposium +onoperating +system +design +implemenation +october +structure +performance +interpreters +theodore +romer +dennis +geoffrey +voelker +alec +wolman +wayne +wong +jean +loup +baer +brian +bershad +henry +levy +asplos +october +fast +effective +dynamiccompilation +conference +programming +language +design +implementationj +auslander +philipose +chambers +eggers +bershad +extensible +protocol +architecture +application +specific +networking +marc +fiuczynski +brian +bershad +writing +operating +system +using +modula +emin +sirer +stefan +savage +przemyslaw +pardyak +greg +defouw +mary +alapat +brian +bershad +appeared +workshop +compiler +support +system +software +february +language +support +extensible +operating +systems +wilson +hsieh +marc +fiuczynski +charles +garrett +stefan +savage +david +becker +brian +bershad +appeared +workshop +compiler +support +system +software +february +safe +dynamic +linking +extensible +operating +system +emin +sirer +marc +fiuczynski +przemyslaw +pardyak +brian +bershad +appeared +workshop +compiler +support +system +software +february +automatic +dynamic +compilation +support +event +dispatching +extensible +systemsc +chambers +eggers +auslander +philipose +mock +pardyak +workshop +compiler +support +systems +software +february +extensibility +safety +performance +spin +operating +system +bershad +sosp +reducing +memory +overhead +using +onlinesuperpage +promotion +romer +ohlrich +karlin +bershad +isca +write +detection +distributed +sharedmemory +zekauskas +sawdon +bershad +paper +appearedin +osdi +conference +dynamic +page +mapping +policies +cache +conflictresolution +standard +hardware +romer +bershad +chen +paper +appeared +osdi +conference +mobisaic +voelker +bershad +paper +appeared +inthe +mobile +computing +workshop +issues +extensible +operating +systems +savage +bershad +avoiding +conflict +misses +dynamically +large +direct +mappedcaches +bershad +chen +romer +asplos +spin +extensible +microkernel +forapplication +specific +operating +system +services +bershad +uwtechnical +report +efficient +packet +demultiplexing +multiple +endpoints +large +messages +yuhara +bershad +maeda +moss +winter +usenix +impact +operating +system +structure +memory +system +performance +chen +bershad +sosp +protocol +service +decomposition +high +performance +networking +maeda +bershad +sosp +practical +considerations +blocking +concurrent +objects +bershad +fast +interrupt +priority +management +operating +system +kernels +usenix +microkernels +workshop +midway +distributed +shared +memory +system +bershad +zekauskas +sawdon +ieee +compcon +local +area +mobile +computing +stock +hardware +andmostly +stock +software +watson +bershad +usenix +moblic +consistency +management +virtually +indexed +caches +wheeler +bershad +asplos +fast +mutual +exclusion +uniprocessors +bershad +redell +ellis +asplos +using +mach +communication +primitives +ginsburg +baron +bershad +machnix +using +microbenchmarks +evaluate +system +performance +draves +bershad +forin +wwos +networking +performance +microkernels +maeda +bershad +wwos +increasing +irrelevance +performance +micro +kernel +based +operating +systems +bershad +usenix +microkernels +workshop +system +mach +forin +golub +bershad +machnix +using +continuations +implement +thread +management +communication +inoperating +systems +draves +bershad +rashid +dean +sosp +information +arpa +related +information +rain +city +hash +house +harriers +related +information +relative +abducted +alien diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..b4058377 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,196 @@ +doorenbos +home +pagebob +doorenbos +home +page +department +computer +science +engineeringuniversity +washingtonbox +seattle +office +sieg +hall +bobd +washington +currently +working +netbot +union +place +suite +seattle +voice +daily +pages +comedy +dilbert +strips +boffo +david +letterman +list +geeky +news +news +zdnet +anchordesk +magazines +good +stuff +shortcut +today +cool +tool +research +current +postdoc +working +oren +etzioni +steve +hanks +weld +softbot +project +also +particular +shopbot +internet +shopping +agent +previous +soar +project +thesis +science +sites +collections +pointers +repository +canada +pointers +gophers +science +scientific +american +sigma +american +scientist +miscellaneous +links +meta +searching +metacrawler +savvysearch +searching +alta +vista +lycos +inktomi +open +text +infoseek +excite +crawler +hotbot +directories +hierarchical +selective +yahoo +magellan +pointcom +lists +search +engines +guide +searching +online +searches +search +telephone +directories +world +yellow +pages +network +yellow +switchboard +white +pages +list +directories +news +cnnfn +newshour +washington +post +today +reuters +headlines +yahoo +news +page +social +cafe +news +world +report +boston +globe +span +seattle +times +views +slate +feed +salon +atlantic +monthly +harper +sports +espn +sports +zone +government +fedworld +index +white +house +congress +arpa +federal +budget +deficit +national +debt +clock +concord +coalition +hand +balancing +budget +bipartisan +commission +entitlement +reform +budget +american +universities +museums +links +past +life +pittsburgh +upcoming +birthdays +home +page +personal +home +pages +andfun +pagebobd +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..875da6d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,144 @@ +alan +borning +home +pagealan +borning +home +pagei +professor +department +computer +science +engineering +university +washington +principalresearch +interests +constraint +based +languages +systems +object +oriented +languages +logic +programming +languages +human +computerinteraction +computers +society +current +research +activitiesuwconstraints +page +contains +links +recent +papers +public +domainsource +code +constraint +satisfaction +algorithms +media +technology +democracy +groupuw +students +also +ideas +qualsprojects +teachinghere +pages +courses +taught +recently +programming +languages +autumn +concepts +programming +languages +autumn +computers +society +winter +human +computer +interaction +spring +informationhistory +grew +idaho +graduated +reed +college +mathematics +graduate +work +computer +science +atstanford +university +receiving +degree +dissertation +research +done +association +xerox +paloalto +research +center +concerned +constraint +oriented +simulationlaboratory +receiving +spent +year +post +doctoralfellow +department +artificial +intelligence +university +ofedinburgh +scotland +working +mechanics +problem +solving +symbolicalgebra +joined +computer +science +department +andexcept +sabbatical +spent +xerox +europarc +cambridge +england +havebeen +since +address +dept +computer +science +engineering +university +washington +seattle +phone +email +borning +washington +eduwww +http +washington +homes +borning diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..ff1efc83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,23 @@ +brad +home +pagebrad +chamberlainphoto +credit +mike +perkowitzthings +probably +couldn +care +less +office +address +things +work +things +like +things +added +subset +ofthe +brad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..babec909 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,151 @@ +brendan +home +pagebrendan +mumeyi +student +department +computer +scienceat +theuniversity +washington +entered +fall +expect +graduate +around +fall +contact +informatione +mail +brendan +washington +edufor +address +click +curriculum +vitaein +htmlorpostscriptformat +academic +interestsi +would +call +applied +theoretical +computer +scientist +current +work +computational +biology +moment +looking +problem +physical +mapping +building +rough +location +landmarks +genome +generally +speaking +interested +using +theory +math +tosolve +reasonably +practical +problems +also +done +work +incomputational +astrophysics +hpcc +grouphere +online +papersb +mumey +powerful +clone +overlap +test +html +postscript +poster +presentation +ismb +conference +mumey +computational +aspects +physical +mapping +probes +html +postscript +survey +paper +written +fulfill +candidacy +requirement +mumey +finding +clusters +quickly +parallel +html +postscript +presented +dimacs +challenge +klawe +mumey +upper +lower +bounds +constructing +alphabetic +binary +trees +html +postscript +presented +soda +conference +also +siam +ofdiscrete +mathematics +note +html +versions +produced +latexhtml +containsome +errors +readable +part +recreationhiking +cycling +skiing +climbing +drinking +coffeeto +name +used +sailingand +hope +sometime +like +play +bridge +older +photos +first +gallery +second +gallery +bookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..1b69429f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,105 @@ +lauren +brickerlauren +bricker +computer +science +engineering +department +university +washington +seattle +click +need +clue +research +info +research +interest +primarly +graphical +userinterfaces +although +self +proclaimed +graphics +groupie +currently +working +stevetanimoto +mathematicsexperiences +image +processing +metip +project +goal +ofthis +project +interest +junior +high +school +kids +mathematics +usingexploratory +rather +rote +learning +methods +particular +minterested +developing +computer +supportedcollaborative +learning +cscl +user +interfaces +used +inthis +well +projects +workin +lawk +dawg +interfacea +fairly +extensive +resumeschool +dazethis +quarterdother +quartershuman +computer +interaction +course +spring +quarter +writeups +final +project +writeupwhat +asystems +course +interesting +course +computers +insociety +exciting +hobbies +used +enjoy +busy +lifesportscookingpottery +even +studio +garage +used +years +stuffbecause +asked +itaddresses +bricker +washington +last +modified +monday +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..c9bca0cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,9 @@ +adam +carlsonadam +carlson +carlson +washington +computer +science +carlson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..dae126b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,56 @@ +andrew +certain +home +page +andrew +certain +certain +washington +server +fixed +give +note +page +interesting +follow +directions +download +theviewer +look +models +currently +working +tonyderose +david +salesin +werner +stuetzle +duchamp +jovan +popovic +scanningproject +currently +building +viewer +download +requires +sgigraphics +workstation +paper +viewer +look +models +modified +netscape +shouldalso +work +browsers +modify +similar +computer +science +engineering +department +university +washington +seattle diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..d66711fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,99 @@ +chou +chou +welcome +homepage +grad +student +computerscience +washington +seattle +picture +right +first +school +fall +quarter +ross +thinks +scary +school +related +infothe +chinook +projectmy +fall +schedulemy +list +publicationscodes +workshop +codesignpersonalpersonal +info +taiwan +greece +resumefoodi +enjoy +cooking +people +open +restaurant +ofpeople +favorite +dishes +include +stir +fried +rice +noodles +beefskewers +recipes +line +toysbeing +computer +geek +computersand +cool +toys +taiwanesei +also +promoting +taiwaneselanguage +currently +developing +computer +tools +taiwanese +sureto +check +experimental +line +taiwanesedictionary +though +absolutely +required +pages +best +viewedif +install +chinese +character +fonts +using +java +enabled +browser +like +netscape +beable +java +applet +yellow +ball +bouncingover +barney +purple +dynosaur +last +updated +email +chou +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..8cc83cd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,153 @@ +corey +andersoncorey +andersonthings +interested +lately +setting +home +page +researching +color +perception +automatically +correcting +induced +color +competing +programmer +month +contest +searching +info +localtalk +adapter +setting +linux +playing +wavelets +international +collegiate +programming +contest +pacific +regional +contest +problems +results +final +problems +results +html +version +previous +years +final +problems +version +duke +internet +programming +contest +rsum +online +recent +august +reviewing +computer +graphics +text +reference +books +things +done +graduate +univserity +washington +highline +community +college +self +tuning +fpga +fall +helping +polly +organize +department +contribution +engineering +open +house +april +managing +chapter +treasury +spring +spring +developement +read +done +macintosh +programming +searching +good +internet +service +providercool +things +found +usage +statistics +lurker +guide +babylon +sunsite +linux +archive +dilbert +zone +home +page +brother +home +pageuseful +links +peek +inside +term +labs +html +reference +html +features +netscape +computer +science +engineering +department +home +page +university +washinton +home +page +uwtv +tech +notesmy +autumn +schedule +monday +tuesday +wednesday +thursday +friday +meeting +meeting +math +math +math +math +math +math +corin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..1bb6d69d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,75 @@ +craig +experiencecraig +kaplancurrent +location +student +department +computer +science +universityof +washington +seattle +washington +well +page +copyof +homepage +university +waterloo +time +modify +page +appropriate +tomy +current +situation +enjoyable +experience +near +undergraduatewas +grad +ball +photos +fromthe +ball +second +enjoyable +experience +occured +saturday +addressed +convocation +valedictorian +cannot +express +honoured +felt +wonderful +graduating +class +choose +represent +incidentally +didn +know +parents +found +valedictorian +minutes +start +ceremony +says +never +forgive +text +valedictorian +address +found +anyone +curious +visitor +number +last +updated +cskaplan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..54d884c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,65 @@ +cody +kwok +home +page +first +thing +thought +people +think +means +alive +asami +chiaki +welcome +cody +chung +kwok +ctkwok +washington +edui +graduate +student +working +weld +andoren +etzioni +planning +andsoftware +agents +sanctuaries +work +ingram +softbot +aiuw +contact +informationleisure +sanctuary +nausicaa +valley +windlaputa +castle +skyhyper +future +vision +gunnm +nausicaa +valley +wind +arch +vile +welcomes +java +applet +animator +takes +load +cody +kwok +last +modified +visitors +since +figure +doom +numerals diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..2171b951 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,37 @@ +darren +cronquist +darren +cronquist +darrenc +washington +department +computer +science +engineering +university +washington +seattle +welcome +home +page +last +updated +employment +information +currently +plan +complete +myph +html +postscript +resume +resume +curriculum +vita +curriculum +vitapersonal +information +rest +homepage +currently +underconstruction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..0ddfcdbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,239 @@ +david +christianson +seconds +till +midnight +seconds +till +nowhere +baby +david +christiansondbc +washington +computer +science +engineering +department +university +washington +seattle +current +workin +spare +time +third +year +graduate +student +atthe +interested +inaiand +planning +gotten +user +interfaces +maybe +even +graphicsas +well +currently +studying +knowledge +representation +goal +directedbehavior +mixed +initiative +planning +activity +recognition +buzzwordacquisition +context +human +computer +interaction +recent +work +prototype +useful +intelligent +interfaces +bobdoorenbos +shopbot +rather +build +interface +plan +somehowintegrate +interfaces +various +line +stores +building +shopping +assistant +simple +assistant +built +applicationthat +automatically +reads +parses +shopping +baskets +order +determinewhat +products +user +interested +also +collaborated +sean +anderson +weld +david +salesin +michael +cohen +develop +simple +interactive +moviethat +demonstrates +automatic +camera +planning +apple +intern +worked +russell +planning +technologyinto +user +experience +also +developer +perpetrator +graphical +debugger +theucpop +family +planners +programmer +also +client +number +people +whose +lives +touched +grows +every +manual +ishere +spent +undergraduate +career +theuniversity +chicago +carboloading +harold +chicken +assisting +theanimate +agent +publicationschristianson +anderson +salesin +weld +cohen +declarative +camera +control +automatic +cinematography +appear +aaai +firby +christianson +mcdougal +fast +local +mapping +support +navigation +object +localization +symposium +sensor +fusion +boston +november +find +dave +work +thechateau +cynical +office +withfrederic +william +darren +adam +glorious +leader +juan +important +thing +favorite +activities +practice +judo +recently +competed +senior +nationals +sibling +sisterjust +graduated +school +information +library +studies +university +michigan +surfing +cutting +edge +research +information +supercollider +really +feel +like +slacking +mirsky +help +watch +heroes +youth +duel +death +wwwf +grudge +match +fame +fortune +check +response +week +check +games +domain +straight +doomgate +says +evil +book +tick +dave +christianson diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..89ef2f39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,44 @@ +home +page +dave +johnson +david +johnson +washington +computer +science +engineering +department +university +washington +seattle +research +interests +navigational +assistance +hypertext +readersprojects +activities +racquetball +golf +basketball +softball +tutorials +scripts +fits +together +take +theracquetball +quiz +quiz +created +take +look +thecreate +assessment +page +form +give +last +modified +monday diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..c8de5bff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,232 @@ +home +page +david +dion +david +dion +ddion +washington +personal +yeah +right +like +anything +personal +okay +maybe +little +academics +current +research +past +year +working +brian +bershad +primary +responsibility +construction +unix +server +thespinoperating +system +spin +operating +system +applications +achieve +impressive +performance +dynamically +extending +kernel +safety +protection +maintained +language +level +spin +extensions +written +modula +user +level +spin +unix +server +slight +variant +unix +server +thinks +running +mach +spin +dynamically +linked +extensions +used +intercept +system +calls +emulate +mach +kernel +environment +previous +research +spin +first +extensible +system +haveworked +undergraduate +studies +notre +dame +helped +implement +user +level +memory +management +extensible +communications +subsystem +stuff +afraid +time +around +surf +wouldn +claim +know +cool +stuff +ipromise +surfing +soon +netscape +enhancedthis +page +hold +breath +meanwhile +sites +visit +occasionally +work +distraction +university +washington +computer +science +engineering +reason +seattle +university +washington +homepage +featuring +date +view +campus +weather +conditions +spin +occupies +vast +majority +time +modula +language +programming +days +debugging +manual +solved +countless +problems +university +notre +dame +undergraduate +institution +notre +dame +bands +undergraduate +life +notre +dame +trumpet +section +undergraduate +life +racquetball +ladder +main +outlet +athletics +espn +sportzone +stay +touch +sports +world +today +stay +touch +rest +world +dilbert +learn +real +world +restaurants +fine +seattle +eateries +recommended +others +computer +science +movies +seattle +regional +movie +listings +transportation +line +guide +seattle +excellent +public +transportation +system +current +traffic +conditions +current +status +seattle +infamous +traffic +seattle +mariners +first +major +league +baseball +team +page +bean +shopping +page +visit +david +dion +last +modified +monday +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..2ee814a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,167 @@ +derrick +index +derrick +weathersby +bullssupersonicsi +pursuing +phdin +computer +science +university +washington +seattle +beautiful +campus +university +lies +heart +ofseattle +offering +many +diversions +graduate +students +often +fall +prey +therebyextending +time +average +graduate +student +career +practical +experience +interestssignificant +projects +skills +data +sheet +shared +server +project +data +parallel +array +language +compiler +host +token +ring +communication +protocol +development +data +base +design +administration +system +administration +network +administration +tools +securityresearch +experience +interestsmy +research +centers +around +parallel +distributed +computing +challengespresented +fields +ones +performance +portability +convenience +performance +typically +foremost +goal +running +parallel +ordistributed +environment +however +portability +suffers +finally +theseenvironments +offer +extra +challenges +asynchronous +independent +events +daunting +task +develop +systems +parallel +distributedenvironments +issues +address +group +project +research +page +spaa +paper +collective +communications +generals +topic +collective +comm +dissertation +topic +tool +integration +software +engineering +projectacademic +achievementsinstructor +extension +college +advanced +summer +curriculum +design +advanced +course +certificate +program +extension +collegeinstructor +extension +college +advanced +fall +teaching +assistant +started +undergraduate +tutoring +women +minoritystudents +department +computer +science +engineeringoutstanding +teaching +assistant +award +college +engineering +personal +interests +interactive +cnnfinanciala +newslet +would +javaw +derrick +weathersbyderrick +washington +edutue diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..a3696aac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,160 @@ +brian +dewey +home +pagebrian +deweyabout +first +year +student +department +computer +scienceand +engineering +university +washington +doyou +need +know +like +music +books +ilove +play +games +ride +bike +write +letters +drink +myfavorite +color +blue +favorite +ocean +atlantic +oldroommate +thinks +like +animal +public +available +finger +northern +irelandi +returned +trip +belfast +june +carrying +sixty +pagesof +notes +interviews +carry +luggage +need +getthose +semi +chaotic +notes +readable +hopefully +enlighteningformat +work +draft +feel +free +read +shoulder +periodically +make +drafts +available +html +postscript +formats +goal +encourage +frequent +feedback +soon +possible +please +read +mail +please +note +well +much +time +work +thisproject +lately +hopefully +progress +near +ofthe +summer +images +northern +ireland +notes +thecurrent +draft +sinn +fein +information +bibliography +postscript +statistics +terrorist +related +deaths +northern +ireland +postscript +statistics +terrorist +related +injuries +northern +ireland +postscript +game +already +addicted +recurious +game +check +links +fascinating +thorough +history +development +game +china +read +ancient +china +page +imageek +york +cuny +page +provides +many +links +interesting +sites +jansteen +page +thorough +seen +brian +dewey +dewey +washington +edulast +modified +tuesday +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..57789c15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,145 @@ +home +pagewelcome +fasulo +home +page +dfasulo +washington +third +year +graduate +student +department +computer +science +university +washington +graduate +williamscollege +computerscience +applied +mathematics +class +note +portrait +contain +slight +inaccuracies +finding +home +eastlake +seattle +work +department +computer +science +engineering +university +ofwashington +seattle +office +chateau +email +dfasulo +washington +academic +interests +computer +graphics +computational +biology +personal +interests +science +fiction +fantasy +written +otherwise +fact +probably +honest +identify +illustration +merlin +corwin +pictured +favorite +fictional +character +mine +roger +zelazny +chronicles +amber +image +taken +amber +drpg +published +phage +press +would +recommend +anyone +likes +books +also +recommend +series +babylon +creative +writing +fiction +poetry +absolutely +links +work +athletics +particular +order +tennis +kwon +distance +running +role +playing +random +things +depending +cats +random +things +homepage +friend +fellow +williams +alumnus +sean +sandys +look +woman +former +grad +student +wendy +belluomini +dressed +dogbert +people +asked +theory +worthwhile +area +research +whether +abstract +useful +better +explanation +goals +future +theory +ever +given +dfasulo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..70dbe30d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,34 @@ +martin +dickey +home +page +martin +dickeycomputer +science +engineeringuniversity +washingtonwelcome +home +page +weekly +schedulenarrative +resume +blurbcse +engr +autumn +favorite +seattle +coffee +housesfavorite +internet +site +sister +bookspirograph +java +script +garg +play +dickey +washington +eduupdated +tuesday +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..40e4934e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,233 @@ +dennis +dennis +primary +interest +high +performance +computer +architecture +researchwith +smart +people +likejean +loup +baer +brian +bershad +brad +calder +brad +chen +alan +eustace +dirk +grunwald +andted +romer +recent +papers +energy +management +issues +computer +systems +generals +paper +instruction +cache +fetch +policies +speculative +execution +baer +calder +grunwald +isca +dynamic +page +mapping +policies +cache +conflict +resolutionon +standard +hardware +romer +bershad +chen +osdi +avoiding +conflict +misses +dynamically +large +direct +mappedcaches +bershad +romer +chen +asplos +instruction +cache +effects +different +code +reordering +algorithms +qualifying +project +report +university +washington +contact +work +computer +science +engineering +department +university +washington +seattle +office +sieg +dlee +washington +home +seattle +index +pointer +hotlist +good +entry +points +exploration +yahoo +yellow +pages +internet +lycos +really +good +search +enginefor +seattle +guide +click +voting +millions +sensible +people +high +minded +concede +thatpolitics +almost +always +choice +lesser +evil +tweedledumand +tweedledee +vote +abstained +theyare +presented +president +appoints +people +going +torummage +around +lives +next +four +years +consider +allthe +people +home +stew +rather +vote +huberthumphrey +showed +humphrey +people +taught +huberthumphrey +lesson +still +enjoying +nixon +supreme +court +whentricia +julie +begin +find +silver +threads +among +gold +theblack +russel +baker +ford +without +flummery +commitment +committed +hesitancy +chance +draw +back +always +ineffectiveness +concerning +acts +initiative +elemental +truth +ignorance +kills +countless +ideas +splendid +plans +moment +definitely +commits +providence +moves +sorts +things +occur +help +would +never +otherwise +occurred +whole +stream +events +issues +decision +raising +favor +manner +unforeseen +incidents +meetings +material +assistance +magic +could +dreamed +would +come +whatever +dream +begin +goethe +dlee +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..56873124 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,140 @@ +another +unnecessary +home +pagececi +home +page +well +much +anyway +sure +casually +mention +name +home +page +buti +feel +strongly +bold +noless +doug +zongker +exhaustive +list +usual +research +publications +classeshow +contact +methe +unusual +toxic +custard +workshop +filesthe +mentos +galleryvisit +information +supercolliderthe +useless +today +date +wasting +time +cron +player +database +wasting +time +testing +grounds +caveat +user +sarcasm +lucky +browsing +today +avirtual +intended +treasure +trove +information +whichmay +actually +useful +really +sarcasm +first +year +grad +student +computer +science +engineeringdepartment +university +ofwashington +graduated +michigan +state +university +imajored +computer +science +andminored +math +dubious +honorsjunior +apprentice +keeper +brotherhood +crouton +death +carte +pizzicato +international +club +member +bryan +worst +executive +vice +president +charge +emergency +backup +clicker +cruising +information +highway +inhigh +gear +actually +sitting +buttstaring +computer +screen +tasteful +background +stolen +department +labor +homepage +wheremy +sister +works +department +homepage +doug +zongker +dougz +washington +home +research +classes +contact +last +edited +thursday +november +hits diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..c4b08420 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,128 @@ +dylan +mcnamee +dylan +james +mcnamee +dylan +washington +computer +science +engineering +department +university +washington +seattle +personal +information +research +concentrated +interaction +applicationsand +operating +systems +implemented +operating +system +mechanismthat +allowed +applications +implement +page +replacementpolicy +kernel +policy +caused +perform +poorly +helped +implement +scheduler +activations +machoperating +system +scheduler +activations +mechanism +thathelp +user +level +threads +systems +interact +properly +kernelthread +scheduler +experiences +lead +work +spin +project +folks +building +operating +system +architecturethat +generalizes +idea +application +tailorable +operating +systems +current +work +carries +research +started +opal +project +thesis +investigating +interaction +betweenobject +oriented +databases +oodbs +operating +system +virtual +memory +work +demonstrating +significant +improvements +performance +achived +using +commodity +operating +systems +different +currently +done +additional +improvementscan +come +modifying +operating +system +slightly +betterserve +oodbs +paperscvs +course +geoff +voelker +built +winter +quarter +systems +seminar +dedicated +java +gave +lectureintroducing +java +language +environment +slides +used diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..bb543bd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,39 @@ +christopher +lewis +home +page +christopher +lewis +graduate +student +dept +computer +science +engineering +university +washington +seattle +echris +washington +hello +glad +could +make +graduate +student +work +programming +language +project +office +hours +tentatively +monday +wednesday +sieg +christopher +lewis +last +modified +thur +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..1fba0c7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page +blank +ecrocke +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..9dc303a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,139 @@ +eddie +hong +home +page +know +second +year +graduate +computer +science +studentat +university +washington +well +tosay +business +type +people +little +time +hands +hadto +include +resume +link +postcriptand +plain +text +form +office +room +seig +hall +current +home +currently +working +anna +karlin +craig +chambers +theoretical +model +dynamic +compilation +specifically +workingon +developing +line +algorithm +work +well +fordynamic +compilation +plan +quals +project +accesses +since +august +historical +facts +free +time +activites +vine +branchesmy +sister +wanted +home +page +helped +create +also +list +various +people +know +another +page +interests +includes +keeping +computer +industry +sites +give +insightful +commentary +happening +knowof +places +please +mail +daveneti +power +macintosh +guess +makes +biased +towardsliking +macs +however +think +better +computers +eveneasier +come +places +sometimes +visit +apple +computer +check +seattle +freeway +traffic +look +advice +important +book +worldhere +useful +information +always +wanted +know +found +find +address +domain +names +find +country +mail +friends +stand +edhong +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..7c785b15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,99 @@ +susan +eggers +susan +eggers +http +washington +homes +eggers +department +computerscience +engineering +university +washington +seattle +voice +email +eggers +washington +office +sieg +hall +research +interests +computer +architecture +back +compilation +emphasis +onexperimental +performance +analysis +current +work +issues +incompiler +optimizations +dynamic +compilation +shared +data +optimizationsand +instruction +scheduling +processor +design +multithreadedarchitectures +current +research +projects +compile +time +algorithms +reduce +false +sharing +dynamic +compilation +multithreaded +architectures +spinprevious +research +cache +coherency +code +scheduling +compile +time +prefetching +shared +memory +machines +miscellaneous +tools +workload +news +asplos +program +committee +call +papers +asplos +homepage +information +asplos +looking +research +project +click +list +projects +might +quals +project +amasters +degree +beginning +thesis +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..41b592a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,311 @@ +emin +sirer +emin +sireregs +washington +department +computer +science +engineering +university +washington +seattle +backgroundi +currently +third +year +graduate +student +university +washington +grew +istanbul +turkey +received +computer +science +princeton +university +currently +working +towards +spinproject +prof +brian +bershad +spent +summer +bell +labsworking +theplan +operating +system +helped +build +prototype +display +thesummer +research +center +princeton +jersey +recently +summer +worked +thevesta +project +projectsmy +goal +develop +safe +adaptable +extensible +operating +systems +developed +threads +scheduling +synchronization +dynamic +linking +andprotection +domain +subsystems +spin +also +wrote +machine +specificaspects +spin +kernel +system +call +interrupt +paths +andsome +miscellaneous +interfaces +alarms +mach +compatibility +support +novel +aspects +spin +threads +scheduling +provide +means +ofextending +kernel +arbitrary +thread +implementations +schedulingpolicies +dynamically +linking +extensions +kernel +allowsus +achieve +high +performance +interface +called +strands +isdesigned +safe +provide +fault +isolation +novel +aspect +spin +protection +domain +interface +allowsisolation +safe +fine +grain +sharing +time +extensions +withconflicting +symbols +simultaneously +active +system +extensions +hide +code +data +beassured +possibly +access +clincher +extensionsthat +want +share +code +data +dynamic +protectionenforcement +overhead +also +implemented +high +performanceweb +server +spin +networkingstack +main +objectives +design +reduced +http +latency +andminimal +load +wrote +mips +instruction +simulator +couple +years +calledmipsi +robust +enough +simulate +spec +benchmarks +standard +ofnew +jersey +used +educational +tool +researchplatform +page +describes +mipsi +featuresand +availability +papers +safe +dynamic +linking +extensible +operating +system +wcsss +describes +spin +protection +namespace +management +mechanism +writing +operating +system +using +modula +wcsss +describes +experience +using +modula +implement +spin +extensibility +safety +performance +spin +operating +system +sosp +design +implementation +performance +paper +protection +software +issue +hotos +position +paper +comparing +software +hardware +protection +mechanisms +spin +extensible +microkernel +application +specific +operating +system +services +sigops +european +workshop +operating +systems +review +version +spin +extensible +microkernel +application +specific +operating +system +services +university +washington +technical +report +march +measuring +limits +fine +grained +parallelism +senior +independent +work +princeton +university +june +talkslanguage +support +extensible +operating +systems +slides +presentation +first +workshop +compiler +support +systems +software +wcsss +tucson +arizona +interestswhenever +find +time +opportunity +following +sailing +windsurfing +diving +skiing +bikingmaking +outdoor +clothing +andhiking +dylan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..0a971ec3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,174 @@ +eric +anderson +home +page +eric +andersonwhere +find +sieg +hall +department +computer +science +engineering +university +washington +seattle +home +street +seattle +page +longer +black +honor +recent +june +decisionin +aclu +reno +page +longer +black +mind +thedecision +merely +interim +step +could +read +take +probably +still +committing +felony +care +speech +freedom +themarketplace +ideas +facts +third +year +graduate +student +computer +science +means +imostly +panic +quals +project +also +trying +write +papers +prof +henry +greensideof +duke +university +recently +finished +master +thesis +onsteady +state +solutions +particular +nonlinear +biharmonic +stability +criterion +explicit +methods +restrictive +fourth +power +spatial +resolution +implicit +timesteppingmethods +backwards +euler +necessary +numerical +analysisissues +involve +newton +method +solving +implicit +nonlinearequations +sparse +matrix +solution +methods +solving +newtonstep +interplay +pictures +really +spiffy +working +body +code +astrophysics +simulation +work +support +project +data +structuresby +prof +richard +andersoni +working +signal +processing +computer +music +project +aiming +automatic +transcription +acoustic +signal +prof +anna +karlin +isthe +musician +interested +project +like +everybody +else +page +applets +first +link +text +small +graphics +section +material +preparedfor +last +fall +like +everybody +else +page +snapshots +mostly +family +prove +brothers +finally +weather +seattle +eric +washington +meanders +washington +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..902a2009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,292 @@ +oren +etzioni +home +pageoren +etzioni +home +pagedepartment +computer +science +engineering +university +ofwashington +seattle +washington +voice +mail +etzioni +washington +office +sieg +hall +room +brief +bioand +photo +current +research +internet +softbot +enables +human +user +state +heor +wants +accomplished +softbot +disambiguates +request +anddynamically +determines +satisfy +softbot +finalists +discoverawards +technological +innovation +computer +software +metacrawler +softbot +fielded +service +enables +searchmultiple +indices +parallel +provides +sophisticated +pruningoptions +netrecommends +metacrawler +search +service +choice +ahoy +softbot +white +page +service +locatesindividual +home +pages +high +accuracy +bruteforce +learning +brute +analyze +hypotheses +second +whenrun +sparc +selected +publications +efficient +information +gathering +internet +focs +moving +information +food +chain +deploying +softbots +theweb +aaai +ascalable +comparison +shopping +agent +world +wide +autonomous +agents +multi +service +search +comparison +using +metacrawler +postscript +html +softbot +based +interface +internet +cacm +july +intelligent +agents +internet +fact +fiction +forecast +ieee +expert +august +intelligence +without +robots +reply +brooks +magazine +december +learning +understand +information +internet +ijcai +sound +efficient +closed +world +reasoning +planning +toappear +first +robotics +aaai +additional +papers +students +advised +richardsegal +master +thesis +bernard +fileretrieving +softbot +neal +lesh +master +thesis +planner +unix +softbot +keith +golden +master +thesis +planning +universalquantification +incomplete +information +terrance +goan +master +thesis +learning +software +errors +mikeperkowitz +master +thesis +learning +understand +information +internet +erik +selberg +master +thesis +multi +service +search +comparison +using +metacrawler +oren +zamir +jonathan +shakes +undergraduate +students +advised +stephen +soderland +program +umass +amherst +julie +roomy +hewlett +packard +bruce +lesourd +robert +spiger +lockheed +research +center +william +alford +program +university +wisconsin +greg +fitchenholtz +hewlett +packard +guido +hunt +dymitr +mozdyniewicz +quark +machine +learning +resources +machine +learning +database +repository +irvine +machine +learning +programs +repository +irvine +knowledge +discovery +minecontaining +information +knowledge +discovery +databases +neuroprosearchive +containing +recent +papers +relevant +neural +networks +machine +learning +information +services +university +illinois +inductive +learning +group +statlib +containing +data +algorithms +information +relevant +statistics +machine +learningtoolbox +bonn +german +list +usenet +faqs +access +count +since +etzioni +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..99546999 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,39 @@ +mike +feeley +home +pagemike +feeley +computer +science +finishing +done +soon +thesis +concerns +global +memory +management +workstationclusters +also +work +distributed +computing +opalprojects +joining +faculty +university +british +columbia +injanuary +information +available +using +links +papersmy +research +summarycvsoutheast +idaholast +modified +july +mike +feeley diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..1dfabd87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,28 @@ +chris +fisher +home +page +picture +chris +fisherdepartment +computer +science +engineeringbox +university +washington +seattle +fisher +washington +voice +mail +fisher +washington +sieg +hall +room +page +currently +construction +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..755fbfcf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,80 @@ +department +computer +science +engineeringuniversity +washington +seattle +sieg +hall +washington +schedulethis +quarter +autumn +taing +courses +rather +working +generals +exam +check +scheduleto +otherwise +around +probablybe +found +library +somewhere +nice +reading +papers +research +activitiesmy +main +interest +computer +algorithms +specifically +areasof +parallel +computation +computational +geometry +publications +meanderings +places +denny +outta +mind +vista +peas +music +sites +chateau +gallery +fund +drive +things +alec +wolman +might +server +seven +lost +souls +captured +html +listen +phone +booth +mofo +people +lucy +place +paul +peach +ruel +might +look +like +moment +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..4d060c4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,112 @@ +francesmary +modugno +home +pagefrancesmary +modugno +home +page +department +computer +science +engineeringuniversity +washingtonbox +seattle +office +sieg +hall +phone +mail +washington +research +interests +main +interest +human +computer +interaction +also +interested +user +programming +formal +modelling +software +specification +verification +technology +policy +welcome +opportunity +collaborate +related +topics +current +research +projects +software +safety +human +machine +interface +previous +research +publications +available +online +summary +ofmy +thesis +research +formal +models +real +time +concurrent +distributed +systems +parallel +algorthims +professional +activities +basic +research +symposium +chair +uist +demonstrations +chair +education +computer +science +carnegie +mellon +university +march +computer +science +carnegie +mellon +university +august +computer +science +mathematics +cornell +university +activities +anything +recent +interests +includecycling +skiing +languages +cultures +currently +spanish +previouslyitalian +vegetarian +cooking +ellery +line +greetings +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..6c71f0b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,50 @@ +george +forman +home +pagegeorge +forman +home +pagei +received +computer +science +optimizing +compiler +ariadne +scalable +pattern +matching +parallel +trace +debugger +publications +mobile +computing +hyperlink +library +something +interesting +free +handy +software +scripts +written +word +puzzle +water +song +change +netscape +animation +george +forman +gforman +comhome +page +mail +finger +weather +dept +live +picture +generated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..d835dbba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,60 @@ +home +page +marc +friedman +marc +friedman +home +page +course +page +watercolors +applets +camping +checklists +spanish +english +collaborative +dictionary +poetry +research +favorite +quotes +links +elsewhere +occam +information +gathering +agent +keith +golden +keith +wordbot +bike +trip +artificial +intelligence +cody +kwok +weld +ucpop +planner +research +tool +changed +life +works +nietzschein +english +netscape +bookmarks +file +every +page +reference +visitor +since +marc +friedman +friedman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..eb63543a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,32 @@ +charlie +garrett +home +page +charlie +garrett +address +seattle +research +interests +compilers +graphics +neural +networks +genetic +algorithms +game +playing +algorithmspapers +line +genetic +algorithmsformerly +member +cecil +group +university +ofwashington +bookshelf +audio +file +garrett +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..93e07433 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,56 @@ +georges +winkenbach +georges +winkenbachdepartment +computer +science +engineeringuniversity +washingtonbox +seattle +mail +georgew +washington +eduphone +interests +computer +graphics +multimedia +thesis +work +doneunder +supervision +professor +david +salesin +deals +applying +traditional +illustration +techniques +theautomatic +rendering +three +dimensional +models +imagescreated +prototype +rendering +system +found +links +follow +image +gallery +grail +graphics +imaging +laboratory +department +computer +science +engineering +wife +home +page +taweewan +siwadune diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..b9b9ff22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,289 @@ +greg +badros +home +pagegreg +badros +welcome +home +page +last +updated +email +washington +eduaddress +nesbit +seattle +hello +welcome +homepage +please +feel +free +send +feedback +email +address +page +always +isuppose +construction +keep +checking +back +excitingfeatures +recent +graduate +dukeuniversity +completed +degree +double +major +incomputer +science +mathematics +spring +amcurrently +employed +part +time +senior +research +scientist +fortransworld +numerics +small +company +originally +located +indurham +headquarters +bermuda +work +myapartment +seattle +washington +also +full +time +graduate +student +computerscience +engineering +department +university +ofwashington +fall +courses +home +pagecse +home +pagecse +home +pagecse +software +engineering +seminarcse +compiler +seminar +newer +stuff +philosophy +mathematics +course +notes +misc +patches +fvwm +first +place +best +show +redhat +desktop +configuration +competition +entry +emacs +configuration +readme +file +emacs +configuration +archive +file +zshell +configuration +readme +file +zshell +configuration +archive +file +chronicle +duke +university +newspaper +article +transworldnumerics +spring +ieeenational +programming +competition +victory +vertices +winter +issue +duke +magazine +contains +recent +article +geneticalgorithms +personal +links +rsum +data +date +first +business +sample +drew +bycomputer +simple +magic +created +canterbury +progressive +various +random +pictures +life +definitelynot +work +many +hobbies +including +tennis +skiing +especially +jackson +hole +jackson +volleyball +juggling +piano +playing +mathematical +games +rubik +cube +linux +freeware +unix +music +especially +sarahmclachlan +billy +joel +joel +yahoo +page +lists +links +parliamentary +procedure +ncaa +basketball +interesting +links +lycos +search +duke +computer +science +home +page +duke +university +home +page +chronicle +duke +community +daily +newspaper +univ +washington +home +page +unofficial +seattle +microsoft +corporation +world +wide +server +gateway +users +group +historic +computer +images +hotjava +global +network +navigator +home +page +perl +practical +extraction +report +language +virtual +library +inter +language +unification +interesting +devices +connected +writing +html +sgml +seinfeld +index +page +friends +sitcom +parts +material +based +upon +work +supported +national +science +foundation +graduate +fellowship +opinions +findings +conclusions +recommendations +expressed +publication +author +necessarily +reflect +views +national +science +foundation +greg +badros +washington +computer +science +engineering +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..396ddd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,414 @@ +greg +linden +home +page +greg +lindenmy +lovely +wifecorina +currently +third +year +graduate +program +political +sciencehere +third +year +graduate +program +thecomputer +science +department +university +ofwashington +slaving +away +toward +lofty +goal +completed +undergraduatedegree +university +california +diego +anodd +double +major +computer +science +political +science +going +leave +graduate +school +december +mactively +looking +position +software +development +interested +check +resume +java +applets +webview +allows +links +page +orset +pages +additions +webview +family +altavistawebviewand +metawebview +instead +entering +enter +keywords +foraltavista +metacrawler +webview +hits +search +service +returning +graph +displaying +pages +found +searchservices +links +pages +automated +travel +assistant +emulates +dialog +travelagent +client +gradually +eliciting +flight +preferences +whileallowing +browse +real +flight +data +research +prototype +quite +functionality +even +early +stage +webview +highly +rated +gamelan +andjars +wasrated +java +applets +jars +cool +gamelan +andwas +gamelan +staff +pick +webview +wasrated +java +applets +jars +cool +gamelan +andwas +gamelan +staff +pick +gamelan +staff +pick +altavistawebview +winners +thejava +repository +java +contest +applets +also +published +books +walsh +foundations +java +books +meilleur +java +best +java +first +java +applet +linear +ballet +oops +using +java +capable +browser +sorry +cool +java +applets +source +demonstrates +using +double +buffering +avoid +flicker +using +threads +give +time +running +applets +code +certainly +could +cleaner +though +expect +work +thought +cool +might +enough +standardsto +impressed +mylgrammer +particle +tree +applets +draw +trees +lgrammer +trees +look +much +realistic +theparticletree +trees +interesting +recently +started +position +jars +judge +evaluating +java +applets +submitted +thejars +archive +interesting +work +summer +developed +dawn +civilization +ademonstration +application +showing +planning +techniques +cansuccessfully +applied +entertainment +software +myriadsoftware +ucsd +worked +professor +belew +filippo +menzer +latentenergy +environments +project +tool +developingartificial +life +models +experimenting +artificial +neuralnetwork +evolutionary +learning +software +enviroments +papers +hanks +lesh +linden +eliciting +user +preferences +theautomated +travel +assitant +submitted +user +modeling +majeski +linden +linden +spitzer +modeling +localizedinteraction +spatial +constraints +iterated +prisoner +dilemma +association +economic +scientists +krishnamoorthy +paturi +blume +linden +liden +esener +hardwaretradeoffs +boolean +concept +learning +world +congress +neuralnetworks +linden +recurrent +neural +networks +iterated +prisoner +sdilemma +unpublished +honor +thesis +adam +carlson +sujay +parekh +wrote +funray +tracer +ofthe +projects +incs +graduate +computer +graphics +images +headless +horseman +closeup +headless +horseman +chess +duel +assembly +required +spheres +image +withreflection +transparency +shadows +distributed +tracing +adaptivesampling +mess +cool +things +patterns +thespheres +transparent +reflective +causingthe +rays +reflect +refract +multiple +times +surfaceand +internally +also +made +second +computer +animation +called +strike +theanimation +written +using +inventor +code +manipulate +thed +models +original +movie +file +made +alower +quality +quicktime +movie +available +quality +mbquicktime +movie +available +sorry +doesn +compress +anyfurther +least +anything +resembling +reasonable +quality +programming +stuff +dilbert +cognitive +science +ucsd +repository +artificial +life +info +occasionally +found +chateau +guggenheim +annex +computer +science +engineering +university +washington +seattle +glinden +washington +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..90a2e048 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,32 @@ +brian +kris +grant +home +pagebrian +home +away +homework +related +infowork +backgrounduwdynamic +compilation +groupuw +department +computerscience +engineeringpersonal +stuffpersonal +backgroundmy +daughter +isismy +trip +singaporemy +bookmarksmy +public +keylast +updated +october +brian +kris +grant +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..1e5e947b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,111 @@ +dave +grove +dave +grove +department +computer +science +engineering +university +washington +seattle +office +chateau +sieg +worki +spend +time +playing +cecil +cecil +pure +object +oriented +langauge +using +vehicle +integrated +research +areas +language +design +programming +environments +optimizing +compilers +also +used +hordes +consultants +hanging +aroundthe +fringes +spinproject +actually +attempting +graduate +sometime +soonish +much +less +frequently +papers +authored +authored +play +wouldn +complete +without +dilbertfix +strip +thathits +little +close +home +currently +managing +underacheiving +fantasy +football +team +used +spend +summers +hampshire +working +guys +scoutreservation +greaterlowell +council +picture +casunset +taken +right +cabin +kicks +another +every +boys +office +something +silly +white +water +rafting +trip +especially +cool +spending +months +toronto +drove +back +toseattle +took +number +detours +along +somehihglights +trip +grove +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..54dbba89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,105 @@ +scott +hauck +scott +hauck +hauck +washington +computer +science +engineering +department +university +washington +seattle +year +graduate +student +university +washington +currently +working +multi +fpga +systems +rapid +prototyping +board +level +designs +thoughi +also +interested +asynchronous +circuits +fpga +architectures +parallelism +graduate +june +personal +biography +education +experience +publications +curriculum +vitaeresearch +asynchronous +circuits +survey +current +asynchronous +design +methodologies +well +first +fpga +asynchronous +circuits +triptych +montage +fpga +architectures +development +triptych +montage +fpga +architectures +architectures +improved +densities +current +commercial +fpgas +multi +fpga +systems +rapid +prototyping +development +springbok +rapid +prototyping +system +board +level +designs +well +partitioning +assignment +routing +topology +work +general +multi +fpga +systems +chinook +project +hardware +software +design +synthesis +simulation +system +embedded +applications diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..68b1569e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin +hinshaw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..31b95cce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,73 @@ +place +place +macduff +ultrasound +image +emma +elspeth +macduff +name +subject +change +without +notice +unborn +daughter +weeks +fromconception +weeks +ripe +inmid +december +view +profile +lying +back +lookingup +head +right +upper +half +torso +theleft +busy +obsessing +impending +fatherhood +master +thesis +part +chinook +project +pass +time +silly +possible +also +remember +myspam +unfortunately +also +waste +time +html +browse +around +interesting +stuff +usingwebcrawler +pointers +neat +stuff +frogstv +nationpenn +tellermusic +lyricsian +macduff +washington +dept +computer +science +engineering +univ +washingtonseattle diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..6f531488 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,210 @@ +herve +jamrozikherve +jamrozik +postdoc +university +washington +since +september +department +computer +science +engineeringuniversity +washingtonbox +seattle +office +sieg +hall +phone +mail +jamrozik +washington +research +main +interests +distributed +systems +object +oriented +languages +software +engineering +currently +working +global +memoy +management +workstation +cluster +hank +levy +mary +vernon +anna +karlin +mike +feeley +geoff +voelker +high +speed +networks +greatly +encourage +network +memory +cache +virtual +memory +file +pages +thereby +reducing +need +disk +access +network +nodes +memory +intensive +applications +primary +memory +lightly +loaded +nodes +temporary +backing +store +introduce +level +memory +hierarchy +namely +global +memory +cache +lies +logically +local +memory +disk +pages +fundamental +transfer +access +units +remote +memory +systems +page +size +performance +factor +recently +page +sizes +modern +processors +increasing +order +provide +coverage +amortize +disk +access +costs +unfortunately +high +speed +networks +small +transfers +needed +provide +latency +trend +page +size +thus +odds +network +memory +high +speed +networks +studied +subpages +means +reducing +transfer +size +latency +remote +memory +environment +reducing +network +latency +using +subpages +global +memory +environment +jamrozik +feeley +voelker +evans +karlin +levy +vernon +inproceedings +seventh +conference +architectural +support +programming +languages +operating +systems +october +postscript +thesis +research +debugging +distributed +object +oriented +system +theuniversite +joseph +fourier +grenoble +involved +guideproject +laboratoire +bull +imag +part +imaginstitut +extreme +people +area +snot +visit +louvre +gallery +look +maps +france +europe +world +somefamily +pictures +somefriends +pictures +jamrozik +washington +eduv +march +december diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..f1c005a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,30 @@ +jason +secosky +home +pagejason +secosky +jasons +washington +eduaddress +computer +science +engineering +department +sieg +hall +cuniversity +washington +boxseattle +office +frequently +used +pages +projectseattle +weather +forecast +weather +channel +jason +secoskylast +modified +thursday diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..ff231fc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,93 @@ +jeremy +baerjeremy +baer +never +schooling +interfere +education +mark +twain +stuff +dreams +made +william +shakespearei +currently +graduate +student +computer +scienceat +university +washington +interests +include +artificial +intelligence +human +computerinteraction +multimedia +educational +software +software +engineeringtools +computer +generated +music +personal +creative +stuff +cool +places +spend +significant +time +pierian +spring +softwareoregon +museum +science +industry +omsi +pomona +collegehere +look +current +projects +mine +eight +puzzle +java +applet +work +progress +experimental +virtual +travel +applet +copy +effects +demo +applet +questioner +project +static +layer +analysis +programs +feeling +stressed +really +silly +little +macintosh +thati +wrote +years +download +like +metacrawler +searchcopyright +jeremy +baer +jbaer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..e3699159 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,74 @@ +jeremy +buhler +home +pagejeremy +buhler +home +pagedo +attempt +adjust +browser +controlling +transmission +status +first +year +student +institution +university +washington +department +computer +science +engineering +office +sieg +hall +office +phone +home +phone +mail +address +jbuhler +washington +finger +tako +washington +important +stufflecture +notes +suffix +trees +postscript +latex +research +coming +soon +course +schedulemy +public +keycyber +activism +electronic +frontier +foundation +grinsrecommended +readingmy +undergraduate +alma +mater +rice +universityquotable +quotesmy +home +page +return +control +browser +jeremy +buhler +jbuhler +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..208b0cc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,376 @@ +jeff +dean +jeff +dean +department +computer +science +engineering +university +washington +seattle +office +chateau +sieg +view +office +would +danged +buildings +weren +future +plansi +plan +graduate +summer +joining +western +research +laboratory +sunny +palo +alto +bought +house +nearby +menlo +park +curriculum +vita +postscript +also +summary +postscript +research +teaching +experience +projectsi +work +primarily +cecil +project +cecil +pure +object +oriented +langauge +using +avehicle +integrated +research +areas +language +design +programming +environments +optimizing +compiler +techniques +weintend +techniques +scale +large +real +world +programs +andto +keep +true +goal +implementing +vortexcompiler +cecil +currently +lines +cecil +codein +compiler +much +work +group +involves +tracking +compiler +bugs +also +hang +spinproject +meetings +spin +extensible +operating +systemmicrokernel +supports +dynamic +adaptation +system +interfacesand +implementations +direct +application +control +stillmaintaining +system +integrity +inter +application +isolation +spring +quarter +organizers +department +compiler +seminar +research +interests +research +concerns +efficient +implementation +ofobject +oriented +languages +compiler +optimization +techniques +particular +exploring +whole +program +analysis +usedto +improve +performance +affects +incremental +compilation +andhow +interacts +optimization +techniques +especiallyprofile +guided +optimization +techniques +also +interested +howwhole +program +optimization +affects +language +design +assumedthat +compiler +access +entire +program +manycompromises +made +existing +language +designs +become +unnecessary +interested +applying +lessons +learned +exploring +wholeprogram +optimization +towards +designing +systems +programminglanguage +flexible +efficient +based +underlyingimplementation +whole +program +analysis +validate +research +three +principaldesigners +developers +vortex +optimizing +compiler +forobject +oriented +languages +vortex +defines +language +independentintermediate +language +object +oriented +languages +ishigh +enough +level +permit +optimization +features +messagesends +closures +object +creations +also +contains +unique +wayof +composing +optimization +passes +parallel +obtaining +better +results +repeatedly +running +passesseparately +example +compiler +applies +intraprocedural +classanalysis +profile +guided +receiver +class +prediction +inlining +aliasanalysis +splitting +single +combined +pass +part +work +vortex +develop +ways +structuringoptimization +passes +permit +kind +composition +stillallowing +passes +developed +largely +independently +eachother +nice +framework +specifying +iterative +data +flowanalyses +permits +clients +develop +optimization +passes +withrelatively +little +effort +example +vortex +dead +assignmentelimination +pass +approximately +lines +code +publicationssome +recent +papers +authored +authored +personali +love +spicy +food +mild +four +letter +word +coke +probably +kick +caffeine +habit +enjoyable +moments +spent +wife +heidi +daughter +victoria +would +really +like +someday +wife +spent +honeymoon +kauai +hurricane +iniki +recently +biplane +ride +galvin +flying +seattle +guess +never +anything +anymore +wife +took +flight +consisted +minute +flight +around +downtown +seattle +puget +sound +travel +model +biplane +feeling +daring +sadly +insurance +coverage +doesn +permit +passengers +walk +wing +back +enjoyed +ride +view +would +fantastic +even +highly +recommended +looking +something +seattle +number +rather +lengthy +hotlist +jdean +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..7394007d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,279 @@ +damon +reese +home +pagejon +damon +reesepost +doctoral +researcher +safety +critical +software +groupdepartment +computer +science +engineeringuniversity +washingtonbox +seattle +office +chateau +guggenheim +annex +phone +mail +jdreese +washington +research +interested +problem +safety +critical +software +aware +devices +systems +structures +cause +catastrophes +fail +computer +hardware +becomes +less +expensive +expectations +placed +software +computers +placed +control +wider +range +applications +software +advantages +conventional +technologies +flexibility +advantages +come +price +software +behavior +complex +unpredictable +perhaps +best +publicized +example +three +mile +island +incident +operators +great +difficulty +diagnosing +state +system +emergency +requirements +stage +software +development +project +success +especially +respect +safety +reason +colleagues +concentrated +requirements +especially +communication +requirements +requirements +specification +developed +state +based +language +called +requirements +state +machine +language +rsml +validated +usefulness +language +specifying +tcas +avionics +system +doctoral +thesis +invented +hazard +analysis +procedure +based +hazard +operability +hazop +study +significant +concept +procedure +borrows +hazop +deviation +hence +name +deviation +analysis +link +html +transcription +dissertation +current +projects +deviation +analysis +write +conference +article +summarizing +deviation +analysis +make +deviation +analysis +software +available +safety +critical +software +group +study +possibility +dynamic +display +control +deviation +analysis +searches +siang +rsml +tool +integrate +deviation +analysis +software +rsml +tool +kurt +partridge +make +alpha +version +rsml +tool +publicly +available +kurt +partridge +sean +sandys +rsml +semantics +draft +semantics +document +including +discussion +rsml +variants +develop +example +improved +semantics +academic +history +information +computer +science +university +california +irvine +dissertation +software +deviation +analysis +postscript +computer +science +linguistics +rice +university +waxahachie +high +school +waxahachie +publications +nancy +leveson +mats +heimdahl +holly +hildreth +reese +requirements +specification +process +control +systems +ieee +transactions +software +engineering +september +postscript +steven +dolins +reese +curve +interpretation +diagnostic +technique +industrial +processes +ieee +transactions +industry +applications +january +february +leveson +heimdahl +hildreth +reese +ortega +experiences +using +statecharts +system +requirements +specification +sixth +international +workshop +software +specification +design +como +italy +october +jdreese +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..4aa096fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,191 @@ +home +page +jack +lojack +lojlo +washington +department +computer +science +engineering +university +washington +seattle +home +page +currently +construction +research +papers +converting +thread +level +parallelism +instruction +level +parallelism +simultaneous +multithreading +abstract +postscript +jack +susan +eggers +joel +emer +henry +levy +rebecca +stamm +anddean +tullsen +submitted +publication +july +exploiting +choice +instruction +fetch +issue +implementable +simultaneous +multithreading +processor +abstract +postscript +dean +tullsen +susan +eggers +joel +emer +henry +levy +jack +rebecca +stamm +proceedings +annual +international +symposium +computer +architecture +philadelphia +compilation +issues +simultaneous +multithreading +processor +postscript +jack +susan +eggers +henry +levy +anddean +tullsen +proceedings +first +suif +compiler +workshop +stanford +january +improving +balanced +scheduling +compiler +optimizations +increase +instruction +level +parallelism +abstract +postscript +jack +susan +eggers +proceedings +sigplan +conference +programming +language +design +implementation +jolla +california +june +pages +comparing +static +dynamic +scheduling +superscalar +processors +jack +general +examination +written +report +examining +interaction +balanced +scheduling +compiler +optimizations +jack +loph +qualifying +examination +written +report +currently +working +architectural +compiler +support +simultaneous +multithreading +research +interests +also +include +static +dynamic +scheduling +superscalar +vliw +processors +instruction +level +parallelism +issues +well +compilation +multithreaded +architectures +particular +investigating +compilation +issues +simultaneous +multithreading +personal +jack +pages +find +franklin +eseattle +orsieg +hall +room +phone +couple +pictures +recent +paintball +experience +picture +picture +yahoojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..437ab056 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,54 @@ +sherman +home +page +shermanjoebob +washington +department +computer +science +engineeringuniversity +washingtonbox +seattle +usamy +research +interests +user +interface +designinformation +navigation +visualization +projects +activities +user +interfaces +informal +local +survey +useclass +project +hcreating +impressive +home +pagequality +time +sarahsoftballstuff +might +want +automatic +suggestions +page +links +user +interface +research +related +topics +directory +useful +pagesif +browser +supports +send +mail +tojoebob +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..089cabad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,73 @@ +home +page +joshua +seims +home +page +joshua +seims +abstract +joshua +seims +biologically +based +neural +network +system +currently +tested +theuniversity +washington +department +computer +science +begunin +lockean +blank +slate +josh +learned +emulate +observedbehavior +successfully +accomplished +several +tasks +graduatingfrom +college +traveling +volition +recently +started +demonstrate +potential +independent +creative +thought +current +taskwe +presented +josh +earn +computer +science +expect +take +several +years +document +provides +overviewof +josh +cognitive +ambulatory +achievements +organized +personal +academichierarchies +additionally +future +work +discussed +within +contexts diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..434a838b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,7 @@ +jovan +home +page +jovan +home +page +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..4f1b722a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,78 @@ +joanna +powerjoanna +pagehi +joanna +cats +academic +interestsmy +main +interest +computer +science +graphics +grad +school +uwneat +stuff +alma +matercool +graphics +links +jonathan +shadegraphics +research +uwduotone +reproductionmy +main +academic +interest +biology +especially +genetics +molecular +biology +alma +matermost +recent +site +gainful +employmentpublications +joanna +power +brad +west +eric +stollnitz +david +salesin +reproducing +color +images +duotones +proceedings +siggraph +pages +york +real +lifepast +homesdiversionsgender +issuesstatus +women +computer +sciencenow +home +pagefeminist +majority +onlineultimate +frisbeefun +stufffroggy +page +sean +quotesbrad +comics +musicevan +jokesed +page +pagesmy +herojpower +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..2737396d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,19 @@ +jonathan +shakes +jonathan +shakes +sieg +hall +computer +science +engineering +university +washington +seattle +ahoy +homepage +finderresumlinkslast +update +august +jshakes +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..843175d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan +alemanyjuan +alemany +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..7b7c0992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,157 @@ +kari +pulli +home +pagekari +antero +pullii +third +year +graduate +student +computer +science +engineeringdepartment +university +ofwashington +working +interested +computer +graphics +computer +vision +andmathematics +trying +combine +aspects +thesedisciplines +research +professors +department +work +closest +tonyderose +graphics +actually +left +uwfor +pixar +lindashapiro +vision +additionally +work +werner +stuetzle +andjohn +mcdonald +statistics +duchamp +mathematics +andhugues +hoppe +rick +szeliski +microsoft +research +quals +project +tribors +triplet +based +object +recognitionsystem +working +linda +report +technical +report +department +computer +science +engineering +universityof +washington +currently +working +surfacereconstruction +range +data +multiple +baseline +camerasystem +obtaining +data +subdivision +surfaces +waveletanalysis +surface +geometry +reflectance +functions +passed +general +examination +topic +rigidregistration +data +click +find +class +projects +year +computer +architecturesystems +class +taught +susaneggers +distributed +computing +theclass +taught +brianbershad +image +understanding +theclass +taught +steven +tanimoto +presented +technical +sketches +siggraph +getto +slides +subdivision +surfaces +slides +removing +wavelets +hereare +slides +speaker +notes +eacutesum +eacute +find +sieg +hall +computer +science +engineering +university +washington +seattle +email +kapu +washington +home +union +seattle +folks +takavainiontie +oulu +finland +kari +antero +pulli +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..7f5caf4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,20 @@ +anna +karlinanna +rochelle +karlinassociate +professor +university +washington +sincejuly +work +computer +science +engineering +department +university +washington +seattle +home +page +paperskarlin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..2e34c142 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,13 @@ +home +page +yeunghome +page +yeungpersonal +infomy +picturemy +researchtelnet +machinessend +emails +back +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..3a3f2cc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,109 @@ +kurt +partridge +kurt +partridge +academic +information +graduate +student +computer +science +department +university +washington +interests +include +software +engineering +software +specifications +specification +usability +readability +applications +formal +methods +specifications +software +safety +page +describes +others +work +areas +also +dabbled +user +interface +design +human +computer +interaction +java +programming +recent +publications +kurt +partridge +bddtcl +environment +visualizing +manipulating +binary +decisiondiagrams +interactive +poster +html +postscript +poster +preview +postscript +nancy +leveson +bauer +mats +heimdahl +wayne +ohlrich +kurt +partridge +vivek +ratan +reese +environment +safety +critical +software +nasa +conference +safety +quality +postscript +background +started +graduate +school +completing +computer +science +berkeley +lived +lovely +suburban +life +thousand +oaks +parents +sister +named +otis +right +kurt +humor +corner +university +washington +seattle +voice +kepart +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..c1cc0822 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,53 @@ +keith +golden +home +page +questa +pagina +anche +italiano +keith +golden +researchsoftbotsplanningkrselected +publicationscurriculum +vitae +also +inpostscriptrandom +hackingwordbot +collaborative +dictionaries +likes +bicycle +touring +languages +painting +photography +nature +coffee +godless +pinko +stuff +dislikes +suits +lawyers +cars +friends +ellenmarcruben +laurennickrich +joannavivek +advisors +oren +etzioni +weld +keithgolden +department +ofcomputer +science +engineering +university +washington +seattle +kgolden +washington +complete +list diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..474281a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,219 @@ +kingsum +chow +kingsum +chow +kingsum +washington +kingsum +washington +educomputer +science +engineering +department +university +washington +seattle +usathis +information +highway +always +construction +table +contents +personal +research +upcoming +conferences +resume +current +schedule +bridge +glossary +universities +hong +kong +suggestions +feedbackresearchmy +advisor +david +notkin +asynchronous +software +evolution +software +development +toolspapers +line +software +quality +management +responsibility +driven +software +evolution +ready +kingsum +chow +david +notkin +semi +automatic +update +applications +response +library +changes +technical +report +revised +version +appear +icsm +kingsum +chow +david +notkin +asynchronous +software +evolution +asia +pacific +workshop +software +engineering +research +march +hong +kong +kingsum +chow +program +transformation +asynchronous +software +maintenance +proceedings +icse +workshop +program +transformation +software +evolution +william +griswold +editor +international +conference +software +engineering +april +seattle +washington +useful +sites +pccts +sorcererpccts +home +page +pccts +page +terrence +parr +notes +pccts +newbiesresumepleasedrop +mailto +resume +specify +text +postscript +format +universities +hong +kongchinese +university +hong +kong +university +hong +kong +university +science +technology +hong +kong +polytechnic +university +city +university +hong +kongsingapore +sitessingapore +online +world +wide +server +alumnus +websome +campus +friendstom +liew +fook +home +page +wang +page +jiang +weidongu +relateduniversity +washington +style +policy +manual +theses +dissertations +graduate +school +webserver +university +book +storeinvestmentsfree +minute +delayed +quote +watch +quote +market +data +experimental +mutual +fund +charts +line +investment +center +stocks +commodities +technical +analysismisc +read +chinese +list +thomas +china +news +services +welcome +online +hong +kong +movies +movies +movies +visits +since +kingsum +chow +last +modified +date diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..ef739c6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,108 @@ +kevin +bolding +kevin +boldingkwb +washington +computer +science +engineering +department +university +washington +seattle +juvenile +squirt +wanders +searching +suitable +rock +hunk +coral +cling +make +home +life +task +rudimentary +nervous +system +finds +spot +takes +root +doesn +need +brain +eats +rather +like +getting +tenure +dennett +consciousness +explained +research +currently +working +building +high +speed +latencylan +chaotic +routers +previous +researchhas +chaoticrouting +form +minimal +adaptive +routing +formassively +parallel +multicomputers +professional +assistant +professor +electrical +engineering +seattle +pacific +university +also +working +part +time +researchassociate +university +ofwashington +significant +papers +written +archives +ofthe +chaotic +routing +group +spend +time +teaching +electrical +computer +engineeringat +seattle +pacific +university +personal +photos +took +comethyakutake +seattle +moustache +real +case +want +visit +home +another +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..e3a15fff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,45 @@ +richard +ladnerrichard +ladnerprofessor +department +computer +science +engineering +university +washington +seattle +mail +ladner +washington +phone +office +sieg +hall +room +personal +short +biographyresearch +publications +studentsteachingcomputer +programming +fall +quarter +introduction +computer +communication +networks +spring +quarter +introduction +formal +model +computer +science +winter +quarter +data +structures +spring +quarter +ladner +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..400949b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,147 @@ +larry +mcmurchie +home +page +larry +mcmurchiedepartment +computer +science +engineering +university +ofwashington +seattle +washington +voice +mail +larry +washington +office +sieg +hall +room +current +research +larry +mcmurchie +director +laboratory +integratedsystems +chemistry +western +washington +university +chemistry +university +washington +heworked +area +quantum +chemistry +graduate +studies +primary +focus +numberical +evaluation +class +ofintegrals +gaussian +functions +later +applied +work +theconstruction +large +sparse +hamiltonian +matrices +coauthorof +comprehensive +package +computer +programs +meld +used +abinitio +calculations +small +molecules +since +joining +staff +department +computer +science +andengineering +larry +supervised +work +technical +staff +ofthe +laboratory +integrated +systems +coauthor +wirec +aschematic +capture +system +allows +designers +code +withschematic +symbols +creat +concise +parameterizable +representationof +design +larry +also +involved +development +andcommercialization +mactester +integrated +software +hardwareenvironment +functional +testing +chips +boards +andsubsystems +recently +worked +area +fpgas +andhas +developed +general +purpose +performance +driven +router +fpgas +northwest +laboratory +integrated +systems +mactester +cost +vlsi +chip +tester +triptych +high +density +fpga +architecture +publications +journal +articles +upcoming +conferences +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..fa313c8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,321 @@ +nancy +leveson +home +page +nancy +levesondepartment +computer +science +engineeringuniversity +washingtonbox +express +mail +sieg +hall +seattle +leveson +washington +nancy +leveson +professor +joined +faculty +coming +california +search +rain +received +degrees +mathand +computer +science +ucla +spent +formative +years +professor +university +california +irvine +professor +leveson +started +area +research +software +safety +concerned +problems +building +software +real +time +systems +failures +result +loss +life +property +advantage +topic +nobody +questions +goals +except +misanthropes +matter +anyway +students +recently +produced +aformal +requirements +specification +tcas +real +collision +avoidance +system +required +commercial +aircraft +airspace +lessons +learned +project +never +anything +like +seems +pleased +though +adopted +theirofficial +specification +students +currently +working +safety +analysis +specified +behavior +tcas +claims +thatyou +read +anything +fact +taking +train +lately +safetyresearch +project +also +working +modeling +analysis +automated +highways +automobiles +various +aerospace +systems +subtopics +research +area +include +modeling +analysis +safety +specification +safe +software +design +software +fault +tolerance +verification +validation +safety +professor +leveson +editor +chief +ieee +transactions +softwareengineering +elected +member +board +directors +computingresearch +association +member +national +research +council +commissionon +engineering +technical +systems +member +committee +computers +public +policy +recently +chaired +national +research +council +study +evaluating +space +shuttle +software +process +levesonis +fellow +awarded +aiaa +information +systemsaward +contributions +space +aeronautics +computer +technology +andscience +developing +field +software +safety +promotingresponsible +software +system +engineering +practices +life +propertyare +stake +year +leveson +book +software +safety +safeware +system +safety +computers +addison +wesley +published +recent +papers +available +list +papers +isalso +available +copy +favorite +paper +actually +keynoteaddress +conf +software +engineering +melbourne +titled +high +pressure +steam +engines +computer +software +click +quals +projects +available +following +topics +applying +hazardanalysis +techniques +aircraft +collision +avoidance +system +model +writtenin +state +machine +style +language +called +rsml +determining +ways +build +fault +trees +analyses +rsml +models +general +designing +newrequirements +specification +languages +including +specifying +human +computer +interface +deriving +general +principles +designing +languages +applying +hazard +analysis +human +machine +interface +models +modeling +human +machine +interface +control +systems +aircraft +cockpit +analyzing +aircraft +accident +reports +involving +mode +awareness +problemsand +general +issues +derive +information +safe +design +human +computer +interaction +finger +finger +leveson +washington +information +city +airport +currently +perhaps +contact diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..2a7584df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,328 @@ +hank +levy +home +page +henry +levy +professor +joined +faculty +hank +current +research +projects +focus +operating +systems +parallel +distributed +computing +computer +architecture +particularly +simultaneous +multithreading +architectures +object +based +languages +environments +recent +projectcalled +opal +deals +single +address +space +operating +systems +computer +architectures +theetch +projectis +producing +tool +performance +instrumentation +optimization +binary +executables +levy +author +books +numerous +papers +computer +systems +including +outstanding +paper +selections +four +consecutiveacm +symposia +operating +systems +principles +former +chair +sigops +special +interest +group +onoperating +systems +program +chair +theth +symposium +operating +systems +principles +tobe +held +holds +carnegie +mellon +universityand +university +washington +coming +washington +consulting +engineer +digital +equipment +corporation +work +spanned +range +operating +systems +architectures +distributed +systems +workstations +hank +fellow +association +computing +machineryand +recipient +fulbright +research +scholar +award +eleven +master +students +nine +students +survivedlevy +supervision +students +haveall +escaped +academic +positions +major +research +labs +glued +workstation +hank +usually +befound +skiing +biking +playing +tennis +helping +lead +thedepartment +infamous +softball +team +smiling +potatoes +death +sampling +desserts +seattle +many +dessert +parlors +recent +publicationsreducing +network +latency +using +subpages +global +memory +environment +jamrozik +feeley +voelker +evans +karlin +levy +vernon +inproceedings +seventh +conference +architectural +support +programming +languages +operating +systems +october +postscript +implementing +global +memory +management +workstation +cluster +michael +feeley +william +morgan +frederic +pighin +anna +karlin +henry +levy +chandramohan +thekkath +appear +proc +symposium +operating +systems +principles +december +simultaneous +multithreading +maximizing +chip +parallelism +dean +tullsen +susan +eggers +henry +levy +proc +annual +international +symposium +computer +architecture +june +exploiting +choice +instruction +fetch +issue +implementablesimultaneous +multithreading +processor +dean +tullsen +susan +eggers +joen +emer +henry +levy +jack +rebecca +stamm +proc +international +symposium +computer +architecture +sharing +protection +single +address +space +operating +system +jeffrey +chase +henry +levy +michael +feeley +edwardd +lazowska +transactions +computer +systems +november +integrating +coherency +recoverability +distributed +systems +michael +feeley +jeffrey +chase +vivek +narasayya +henry +levy +proc +first +symposium +operating +systems +design +implementation +november +hardware +software +support +efficient +exception +handling +thekkath +levy +proc +conf +arch +support +prog +languages +operating +systems +asplos +october +separating +data +control +transfer +distributed +operating +systems +thekkath +levy +lazowska +proc +conf +arch +support +prog +languages +operating +systems +asplos +october +levy +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..c74fd96b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,156 @@ +opal +operating +system +projectopal +opal +project +exploring +operating +system +structure +tunedto +needs +complex +applications +numberof +cooperating +programs +manipulate +large +shared +persistent +databaseof +objects +opal +code +data +exists +single +huge +shared +address +space +single +address +space +enhances +sharing +andcooperation +addresses +unique +time +interpretation +thus +pointer +based +data +structures +directlycommunicated +shared +programs +time +bestored +directly +secondary +storage +without +need +translation +structure +simplified +availability +large +addressspace +provided +alpha +mips +risc +protection +opal +independent +single +address +space +opal +thread +executes +within +protection +domainthat +defines +virtual +pages +right +access +rights +access +page +easily +transmitted +oneprocess +another +result +much +flexible +protectionstructure +permitting +different +dynamically +changing +protection +options +depending +trust +relationshipbetween +cooperating +parties +believe +organization +canimprove +structure +performance +complex +cooperatingapplications +opal +prototype +built +alpha +platform +ofthe +mach +operating +system +information +sources +list +opal +related +papers +faculty +members +hank +levy +lazowska +jeff +chase +duke +university +current +graduate +students +mike +feeley +ashutosh +tiwary +vivek +narasayya +dylan +mcnamee +related +information +single +address +space +mailing +list +archive diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..5da302a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,209 @@ +lopezgus +lopezlopez +washington +school +sieg +hall +department +computer +science +engineering +university +washington +seattle +home +student +university +washington +dissertationresearch +design +implementation +constraint +imperative +object +oriented +languages +curriculum +vita +publicationsgus +lopez +bjorn +freeman +benson +alan +borning +kaleidoscope +constraint +imperative +programming +language +brian +mayoh +tougu +jann +penjam +editors +constraintprogramming +springer +verlag +nato +advanced +study +instituteseries +series +computer +system +sciences +also +publishedas +technical +report +lopez +bjorn +freeman +benson +alan +borning +constraints +object +identity +inproceedings +european +conference +object +oriented +programming +bologna +italy +july +lopez +bjorn +freeman +benson +alan +borning +implementing +constraint +imperative +programming +languages +kaleidoscope +virtual +machine +inproceedings +conference +object +oriented +programmingsystems +languages +applications +portland +oregon +october +oopsla +tutorialsi +also +tutorials +chair +upcoming +oopsla +conferencein +jose +california +people +object +oriented +technologies +andsoftware +development +meet +speak +oopsla +conference +oopsla +well +known +breadth +depth +high +quality +itsextensive +tutorial +program +previous +years +oopsla +tutorialshave +covered +aspects +object +oriented +technology +introductorysurveys +industrial +software +engineering +practices +leading +edge +academicresearch +topics +response +requests +past +attendees +weespecially +encourage +proposals +engineering +issues +advanced +topics +anyone +considering +submitting +proposal +tutorial +requestguidelines +tutorial +submissions +tutorials +chair +theoopsla +electronic +hotline +electronic +mail +submissions +proposals +encouraged +enthusiastically +accepted +tutorial +proposalswithout +email +addresses +accepted +tutorial +proposals +march +notification +acceptance +withcamera +ready +notes +august +interesting +links +constraints +oopsla +tutorials +green +directions +jimi +hendrix +grave +star +wars +collectors +archive diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..144dda67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,45 @@ +omid +home +page +omid +madani +madani +washington +computer +science +engineering +department +university +washington +seattle +chateau +suite +bhello +curious +browsers +welcome +fourth +year +graduate +student +department +enjoytheory +also +like +keep +touch +areasincluding +graphics +life +work +academics +want +look +islamicarchitecture +isfahan +best +nominee +cities +home +country +iran diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..e7ab6b82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,21 @@ +mike +perkowitz +page +mike +perkowitznewsflash +mike +goes +blond +areas +research +academia +music +creativity +randomfavorites +sheba +voyeur +written +grooveneedle +espressoresumemike +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..dc532d9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,84 @@ +marc +langheinrich +homepagemarc +langheinrich +homepagemarc +langheinrichuniversitt +bielefeld +university +washingtontechnische +fakultt +department +computer +scienceemail +imlanghe +techfak +bielefeld +email +marclang +washington +eduabout +myselfi +spent +last +year +department +computer +science +theuniversity +washington +visiting +graduate +student +thefulbright +program +check +following +links +depthinformation +resume +projects +short +biopostal +addressas +september +back +germany +finishing +mastersat +university +bielefeld +please +contact +german +address +homeschoolgermanyringstrae +maintalphone +paulusplatz +bielefeldphone +woodlawn +seattle +phone +sieg +hall +phone +browser +support +tables +access +data +list +formatmarc +langheinrich +university +washington +department +computer +science +email +marclang +washington +http +washington +homes +marclang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..5a3b447b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,172 @@ +home +marla +baker +marla +washington +chief +editor +department +computer +science +engineering +university +washington +seattle +partner +crime +bentley +academic +interestsgraphical +user +interfaces +human +computer +interaction +educational +software +computer +supported +collaborative +learning +cscl +computer +graphics +visualization +techniques +visual +programming +languages +current +work +currently +working +stevetanimoto +lauren +bricker +coimage +project +devleoped +collaborative +educational +activities +order +explore +cooperatively +contolled +objects +goal +work +investigate +different +ways +multiple +users +cansimultaneously +share +manipulate +given +object +ways +assessing +interactions +also +work +part +time +interface +packard +bell +company +resume +publicationsbaker +marla +stephen +eick +space +filling +software +visualization +journal +visual +languages +computing +june +burnett +baker +bohus +carlson +yang +scaling +visual +programming +languages +ieee +computer +special +issue +visual +programming +march +burnett +margaret +marla +baker +classification +system +visual +programming +languages +journal +visual +languages +computing +september +baker +marla +stephen +eick +visualization +tool +large +software +systems +proceedings +international +conference +software +engineering +sorento +italy +baker +marla +stephen +eick +baker +eick +method +apparatus +displaying +hierarchical +information +large +software +system +patent +application +submitted +october +tutorial +geometric +transformations +images +metip +programming +environment +check +page +office +sieg +marla +baker +marla +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..2d1e7c7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,118 @@ +matthai +philipose +home +page +matthai +philipose +working +dynamic +compilation +project +dynamic +compileris +beast +generates +optimizes +code +runtime +shortterm +interested +figuring +produce +good +code +dynamically +runtime +modern +processor +architectures +applications +side +think +interpreter +basedsystems +real +time +constraints +like +java +browser +canbenefit +selective +runtime +compilation +like +wire +asystem +runtime +compiler +goes +working +withprofessors +susan +eggersand +craig +chambers +work +computer +science +engineering +department +university +washington +seattle +phone +home +seattle +publications +auslander +philipose +chambers +eggers +bershad +fast +effective +dynamic +compilation +conference +onprogramming +language +design +implementation +chambers +eggers +auslander +philipose +mock +andp +pardyak +automaticdynamic +compilation +support +event +dispatching +extensible +systems +workshop +compiler +support +systems +software +february +bookmarks +stuff +play +frequentlymiscellaneous +links +stuff +local +importancefrom +past +abuwhy +page +black +blue +ribbon +campaign +matthai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..1e1c540d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,229 @@ +neil +mckenzie +menu +fine +dining +index +page +contact +future +projects +current +projects +past +projects +publication +list +personal +information +games +contact +information +neil +mckenzie +mitsubishi +electric +research +laboratories +broadway +floor +cambridge +phone +mail +mckenzie +merl +current +projectsgonna +teenage +lobotomy +ramonesi +living +east +coast +miles +east +seattle +andworking +merl +noted +currently +involved +projectconcerning +real +time +volume +rendering +medical +data +copious +free +time +expatriate +graduate +student +working +onchaotic +routingwith +faculty +advisors +carl +ebeling +larry +snyder +chaotic +routing +packet +routing +algorithm +mesh +torusnetworks +dissertation +design +implementation +thecranium +message +passing +interface +compatiblewith +network +using +chaotic +routing +past +projectsi +teaching +assistant +summer +designed +implemented +chip +tester +called +mactester +maintainer +carl +netlist +graph +isomorphism +tool +calledgemini +industry +speak +knownas +layout +schematic +tool +gemini +available +interested +please +send +mail +larry +mcmurchie +larry +washington +publications +cranium +interface +message +passing +adaptive +packetrouting +networks +proceedings +parallel +computer +routing +andcommunication +workshop +seattle +link +tomactester +home +page +gemini +user +guide +last +update +march +personal +information +angel +married +picture +house +arlington +massachusetts +heading +east +coast +used +livein +fashionable +seattle +neighborhood +ofballard +angel +creative +page +contains +examples +computer +artworkcreated +using +adobe +photoshop +ownedthis +years +onlythe +memories +remain +shirt +correctlyguessing +answer +toriddle +jour +october +label +placed +jars +mckenzie +country +farm +honey +produced +myuncle +mckenzie +edmonton +alberta +canada +amusement +neil +linkschairmaned +shot +linksnorm +gregory +bookmarks +halcyon +eugene +spafford +links +purdue +randy +pausch +shot +links +virginia +wallach +scool +links +princeton +neil +mckenzie +mckenzie +washington +last +update +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..34e79350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,251 @@ +marc +fiuczynski +home +page +marc +fiuczynski +home +page +marc +fiuczynski +washington +computer +science +engineering +department +university +washington +seattle +backgroundi +graduate +student +university +washington +department +computer +science +engineering +grewup +germany +near +sseldorf +spent +years +highschool +princeton +received +computer +science +fromrutgers +university +spent +several +summers +bell +labs +mitre +corporation +working +range +ofprojects +sole +proprietor +systems +companythat +created +december +sell +distributed +fault +tolerant +network +based +telephone +system +built +scratch +setof +chasis +processors +using +univoice +telephone +interface +cardsand +vxworks +operating +system +time +spend +hacking +spin +safe +adaptable +extensible +operating +system +primary +contribution +spin +extensible +protocol +architecture +forapplication +specific +networking +applications +achieve +compellingperformance +improvements +using +structure +compared +tosimilar +applications +running +commercial +platform +demonstration +work +services +http +requests +contacthttp +spin +washington +recent +reports +papers +extensible +protocol +architecture +forapplication +specific +networking +design +implementation +performance +paper +describes +anextensible +protocol +architecture +allows +anyone +customize +anin +kernel +protocol +graph +enables +applications +achieves +betterperformance +compared +similar +applications +running +conventionaloperating +systems +demonstration +work +services +http +requests +contacthttp +spin +washington +appeared +proceedings +winter +usenix +technicalconference +extensibility +safety +performance +spin +operating +system +design +implementation +performance +paper +appeared +proceedings +fifteenth +symposium +operating +systems +principles +language +support +extensible +operating +systems +pretty +happy +deal +shortcomings +inorder +language +safe +extensible +operating +system +paper +describeshow +addressed +shortcomings +safe +dynamic +linking +extensible +operating +system +describes +dynamic +linker +load +code +kernel +point +isthe +ability +create +manage +linkable +namespaces +describe +interfaces +andcollections +interfaces +protection +software +issue +position +paper +comparing +software +hardware +protection +mechanisms +proceedings +fifth +ieee +workshop +topics +operating +systems +region +analysis +parallel +elimination +method +data +flow +analysis +ieee +transactions +software +engineering +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..399d16b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,39 @@ +melanie +fulgham +computer +science +minimal +versus +minimal +routing +algorithms +routing +methods +models +development +routing +models +help +predict +compare +performance +routers +real +parallel +machines +deflection +routing +upper +lower +bounds +practical +require +sorting +deflection +routing +algorithms +mesh +topology +washington +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..e8da8789 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,74 @@ +meng +heng +homepage +meng +homepagemenghee +washington +edubox +department +computer +science +engineeringuniversity +washingtonseattle +second +year +student +computer +scienceat +university +washington +undergrad +university +pennsylvania +research +interestsi +interested +image +retrieval +problem +trying +findimages +huge +database +images +virage +andqbicare +commercial +examples +similar +kind +stuff +working +snapshots +done +singaporesingapore +infomap +provides +facts +andstatistics +singapore +singaporeonline +guide +plan +take +trip +national +computer +boardis +charge +transforming +singapore +anintelligent +island +work +graduate +straits +times +singapore +main +english +newspaper +visits +since +menghee +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..276d865a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,88 @@ +michael +ernst +home +pagemichael +ernsti +graduate +student +university +washington +computer +science +department +previously +lecturer +riceuniversity +computer +sciencedepartment +researcher +programanalysis +group +microsoft +research +graduate +student +laboratory +computer +science +eecs +department +pages +frequently +updated +technical +interests +include +compilation +static +analysis +slicing +debugging +optimized +code +serialization +parallel +programs +program +chair +intermediate +representation +workshop +colocated +popl +intellectual +property +particularly +computer +programs +areas +including +game +theory +cryptography +philosophy +denotational +semanticsi +maintain +list +resourcesfor +conference +workshop +organizers +occasionally +manage +slip +away +work +carry +real +life +links +possibleinterest +including +pages +maintain +michael +ernst +mernst +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..53874320 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,159 @@ +markus +mock +home +page +markus +mock +mock +washington +computer +science +rttemberg +grew +anotherpart +state +district +biberach +upper +swabia +oberschwaben +coming +studied +computer +science +university +karlsruhe +whichi +obtained +diplom +computer +science +also +spent +year +umass +fulbright +grantee +main +research +interests +parallel +distributed +computing +object +oriented +systems +compilers +currently +working +dynamic +compilation +interests +include +spanish +latin +american +culture +travel +good +books +labyrinth +solitude +chess +mainstream +movies +dieangst +torwarts +beim +elfmeter +know +handke +salsa +merengue +dancing +still +time +left +check +else +seattle +come +publicationsseparate +list +links +interesting +stuff +current +chess +event +chess +olympiad +yerewan +colloquia +oopsla +volunteers +page +mossy +bits +grad +journal +link +csek +home +page +link +home +page +link +csebi +home +page +link +cses +home +page +courses +graduate +studentsimages +department +electrical +engineeringyou +wouldn +expect +square +live +view +metacrawler +searching +altavista +searching +deutsche +welle +realaudio +live +stuff +cool +linksand +quote +consider +lilies +field +grow +toil +neither +spin +unto +even +solomon +glorywas +arrayed +like +matthew +page +accessed +times +since +last +updated +mock +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..f930dada --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,18 @@ +vivek +narasayya +home +page +vivek +narasayya +nara +washington +computer +science +engineering +department +university +washington +seattle +personal +informationresearch +interestspapers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..60a585ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,81 @@ +himanshu +nautiyal +home +pagehimanshu +nautiyalthis +page +heavy +construction +himanshu +nautiyaldept +computer +science +engineering +mail +stop +university +washington +seattle +office +sieg +office +phone +courses +taking +autumn +quarter +principles +digital +systems +design +artificial +intelligence +finger +nautiyal +washington +edugod +gifts +personkind +douglas +adams +terry +pratchett +pelham +grenvile +wodehousethe +order +alphabetical +last +names +implied +favorite +links +place +india +internet +radio +search +engines +india +himanshu +nautiyal +name +friends +delhi +finished +tech +astronomy +skating +computer +science +aviation +travel +numismatics +sounds +much +profound +coin +collecting +cooking +internet +movies diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..d2623c3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,95 @@ +labyrinth +mediocrity +bemoaning +hype +skeptical +cynicism +since +research +currently +working +automaticconstruction +wrappers +information +resources +alsointerested +several +areas +artificial +intelligence +andcognitive +science +papers +beeninvolved +stuff +available +anonymous +services +provides +glbal +infrmatin +sperhighway +preliminary +version +diverse +meter +available +pictures +hand +shortly +surgery +need +random +number +always +handy +know +date +time +week +favorite +color +available +line +lost +easily +return +page +ronald +wilson +reagan +need +temperature +looking +javascript +enabled +browser +automatically +send +mail +great +republicans +tell +like +miscellany +need +contact +bookmarks +technology +society +aware +bitter +irony +involved +nonetheless +madeavailable +wendell +berry +guidelines +constitutesgood +technology +comments +nicholas +kushmerick +uwcse +seattle diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..9bd1209d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,109 @@ +wayne +ohlrich +wayne +ohlrich +ohlrich +washington +department +computer +science +engineering +university +washington +seattle +office +guggenheim +annex +chateau +publications +research +currently +working +nancy +leveson +model +checking +automatic +test +generation +specifications +spare +time +working +brian +bershad +anna +karlin +romer +memory +performance +analysis +project +locally +known +severe +damage +project +group +paper +make +debut +isca +italy +summer +safety +research +home +page +leveson +memory +sytems +home +page +isca +romer +ohlrich +bershad +karlin +reducing +memory +overhead +using +online +superpage +promotion +university +class +information +course +personal +interests +games +wayne +world +wonder +information +page +contains +sorts +useful +links +seattle +information +home +page +investment +page +contains +investment +information +research +information +found +useful +page +created +october +last +modified +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..2b8fb968 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,106 @@ +gershony +gershony +gershony +washington +graduate +studentcomputer +science +engineering +departmentuniversity +washingtonoffice +sieg +home +seattle +second +year +graduate +student +university +ofwashington +moving +seattle +lived +california +seven +years +fouryears +berkeley +three +years +lancaster +northeast +angeles +originally +israel +lived +haifa +holon +like +practice +kwon +play +basketball +hiking +quarter +taking +class +whole +bunch +seminars +amtaing +computer +architecture +usually +find +office +sieg +class +office +hours +aremonday +wednesday +potential +employers +welcome +look +resume +pictures +took +last +summer +click +tosee +cool +shirt +design +made +summer +graphicsprogram +called +virtual +reality +interesting +links +time +daily +news +summary +york +times +riderlink +seattletransportation +options +information +israel +computer +science +mathematics +departments +university +california +berkeley +gershony +washington +accesses +since +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..5577cc43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,90 @@ +ross +ortega +ross +ortega +wearing +jeans +ortega +washington +department +computer +science +engineering +university +washington +seattle +knew +would +called +research +would +albert +einstein +welcome +home +pageafter +going +school +working +boston +years +decided +head +west +really +northwest +came +fall +leaving +sometime +according +advisor +gaetano +borriello +officially +working +chinook +project +tool +real +time +embedded +controller +systems +unofficially +brewing +beer +learning +hack +trying +teach +german +shepherd +tequila +behave +professional +section +chinook +project +myresume +file +education +experience +publications +papers +puppy +pictures +tequila +tequila +color +tequila +color +office +sieg +check +page +links +find +interesting +last +updatedthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..ab248a97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,138 @@ +untitled +document +flat +morris +minor +pardodepartment +computer +science +engineeringuniversity +washingtonbox +seattle +washingtonusapardo +washington +edunote +show +blue +ribbon +asimgsrc +http +graphics +icons +blueribbon +rib_trn_plain_sm +showing +quiet +opposition +free +speechprohibition +national +telecommunications +bill +likewise +white +letters +black +background +beth +everybody +needs +pardo +around +sometimes +pardo +sometimes +beth +share +academicsome +papersi +worked +find +particularly +interesting +runtime +code +generation +rtcg +instruction +simulation +tracing +tools +home +pages +oncomputer +architectureandcompilers +quick +links +otherpeople +working +computers +classes +thesis +stylenon +academicfeatured +items +featured +item +month +weak +site +week +doesn +blink +anymore +regular +itemsbicyclesbusinessescomputersfoodhumori +famous +things +related +legal +ethical +weirdnesslinux +journalmusicgoofy +politicsscience +thinking +though +unrelated +stuff +thinking +transportation +seattle +movie +listings +seattle +film +festival +dante +search +truly +gross +story +trepanation +privacy +sites +logging +mail +address +worse +taking +data +disks +every +time +consider +weather +courtesey +seattle +particular +also +weather +courtesey +newhouse +newspapers +also +weather +courtesy +yesterday +stuffpardo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..a6c5bb7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,119 @@ +przemek +pardyak +home +page +przemyslaw +pardyak +pardy +washington +first +weeks +grad +school +couple +years +later +third +year +graduate +student +computer +science +theuniversity +washington +currently +research +area +ofoperating +systems +also +interasted +distributed +systems +languages +compilers +besides +grad +school +life +filled +withthe +seattle +drizzle +hiking +outdoors +activities +notbusy +enjoying +school +drizzle +books +music +find +short +description +research +interests +resume +also +list +papers +outdated +happenswhen +busy +schedule +projectsspinan +extensible +operating +system +built +glorious +leadership +brian +bershad +group +mechanisms +object +based +systemsgroup +communication +group +mechanisms +emerald +object +basedprogramming +language +time +system +interesting +links +polish +connection +various +resources +somehow +related +poland +research +related +projects +related +mine +universities +research +unrelated +miscellaneous +work +computer +science +engineering +department +university +washington +seattle +phone +home +seattle +phone +pardy +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..68caceb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,212 @@ +paul +franklin +home +pagepaul +franklin +home +pagei +currently +graduate +student +univ +washington +inthe +department +computer +science +officially +myoffice +sieg +working +rapid +project +used +first +year +student +thesummer +second +year +someone +expressed +concern +aboutthis +characterization +usually +somewhere +north +seattle +ilive +school +another +picture +best +oneof +days +around +scanning +better +ones +norwegian +poem +likea +collection +fortunes +received +friends +localchinese +restaurants +mundane +stuffi +hope +folks +might +find +stuff +useful +hotlinks +pagesstuff +maintainmy +schedule +remembered +update +contacting +love +travel +necessarily +tell +everyone +hiring +mewhere +come +high +school +diploma +live +high +school +inmorgan +hill +taught +prolog +first +time +graduated +computer +science +engineering +fromuc +davis +taught +prolog +second +time +andyes +taught +prolog +glad +ididn +year +university +bergen +ialso +research +professor +electrical +andcomputer +engineering +department +stuffwhile +davis +partners +various +relatedactivities +tended +matt +chris +james +evengot +together +recently +using +chris +marriage +joanne +anexcuse +chris +brother +also +made +itin +photo +throughout +undergraduate +years +kept +biking +never +davis +flat +year +exchange +student +university +ofbergen +biked +hillier +longer +routes +returnedto +davis +took +rollerblading +since +biking +around +town +wasnow +easy +biking +dropped +year +worked +hewlettpackard +returned +vengeance +moved +toseattle +done +annual +seattle +portland +bike +ride +intwo +days +year +inseason +march +april +june +rest +year +justcommute +bike +lots +dancing +particularly +lindyhop +know +every +html +document +header +linethat +looks +something +like +doctype +html +public +ietf +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..64865f88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,115 @@ +frdric +pighin +pighin +washington +computer +science +lcommunications +wonderful +city +tourist +quarter +taing +taught +anna +karlin +guys +found +much +often +dani +quals +report +rest +like +british +movies +monty +python +swimming +nick +cave +corto +maltese +italian +comics +cats +paris +berlin +venise +simpsons +mpeg +rain +surprise +like +traditionnal +french +marine +songs +collect +otherwise +working +graphics +anna +karlin +supervision +although +formerly +studying +systemhere +name +paper +implementing +global +memory +management +workstation +cluster +michael +feeley +william +morgan +frederic +pighin +anna +karlin +henry +levy +chandramohan +thekkath +proceedings +symposium +operating +systems +principles +december +postscript +live +action +breath +taking +picture +door +refreshed +every +minutes +lucky +might +even +look +darren +juan +note +might +dark +live +action +picture +square +refreshed +every +five +minutes +note +might +rain diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..e46240d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,17 @@ +ruth +anderson +home +page +ruth +anderson +washington +computer +science +engineering +department +university +washington +seattle +wxyc +maps +brother diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..1c9f4807 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,18 @@ +home +home +josh +home +page +computer +science +engineering +department +university +washington +seattle +redstone +washington +joshua +redstone +redstone +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..ac957507 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,188 @@ +jakobovits +home +page +jakobovits +washington +computer +science +engineering +department +university +washington +seattle +usai +getting +computer +science +departmentof +university +ofwashington +wonderful +city +seattle +always +raining +thisup +date +weatherreport +sneak +peek +live +image +video +camera +mounted +outside +camera +pointed +beautiful +drumheller +fountain +clear +youcan +catch +glimpse +rainier +glory +probably +clearday +nice +color +picture +research +developing +based +repository +manager +programming +toolkit +building +multi +media +consoles +constructing +image +database +part +astructural +information +framework +brain +mapping +building +knowledge +base +support +digital +anatomist +line +interactive +atlas +human +body +implemented +databaseenvironment +vision +research +local +expert +persistentprogramming +languages +interests +else +proud +creator +internetracquetball +ladder +taught +advanced +programming +extension +wrote +perl +scripts +manage +rotisseriebaseball +league +standings +updated +daily +stats +fromusa +today +raising +happy +family +africancichlids +visit +home +town +honolulu +every +chance +camp +magical +kalalau +valley +movies +gamble +stock +market +darn +good +fantasy +football +team +newslet +would +javafamily +links +mydad +leon +james +professor +psychology +university +hawaii +whois +writing +book +traffic +psychology +fostering +lively +online +polemic +emanuel +swedenborg +step +diane +nahl +professor +library +information +sciences +whoprovides +great +index +online +libraries +databases +judy +jakobovits +realtor +hawaii +uncle +eddy +jakobovits +running +site +bioscience +professionals +bookmarksif +java +click +drag +words +make +poem diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..e69a50f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight +welcome +gallery +twenty +photographs +five +headings +robert +grimm +rights +reserved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..86a2c2de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,174 @@ +romer +romer +department +computer +science +engineering +university +washington +seattle +home +office +romer +washington +eduoffice +chateau +sieg +research +interestsi +research +operating +system +supportfor +high +performance +memory +systemswith +really +smart +people +likebrian +bershad +brad +chen +alan +eustace +anna +karlin +dennis +wayne +ohlrich +andwayne +wong +three +recent +papers +subject +reducing +memory +overhead +using +onlinesuperpage +promotion +romer +ohlrich +karlin +bershad +isca +dynamic +page +mapping +policies +cache +conflict +resolutionon +standard +hardware +romer +bershad +chen +osdi +avoiding +conflict +misses +dynamically +large +direct +mappedcaches +bershad +romer +chen +asplos +addition +friends +studying +performance +ofinterpreters +learn +theproject +rockyhome +page +also +wrote +paper +together +structure +performance +interpreters +romer +voelker +wolman +wong +baer +bershad +levy +asplos +appear +abstract +postscript +bibliography +lobo +reading +listrandom +stuffa +house +computer +scientists +rays +limbs +romer +knee +arthroscopic +surgery +mark +hill +wrist +friend +dylansaid +hair +couldn +flowbee +said +could +beingexperimental +scientists +conducted +experiment +judge +results +attending +isca +travelled +europe +took +somepictures +romer +eatsomeone +else +food +accompanied +sincere +rationalization +forexample +romered +lunch +thought +leftthe +country +would +didn +origin +unknown +edward +tufte +tips +public +speaking +father +edits +american +journal +physics +places +ticker +symbol diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..9125b355 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,281 @@ +memory +systems +research +university +washingtonmemory +systems +researchdepartment +computer +science +engineering +university +washington +seattle +welcome +home +page +memory +systems +research +description +research +group +investigating +techniques +operatingsystem +improve +memory +system +performance +work +sharesthe +following +features +rely +combination +simple +hardware +support +operating +system +modifications +monitor +dynamic +behavior +applications +monitoring +mechanisms +incur +small +overhead +runtime +information +collect +used +identify +sources +memory +system +delays +cache +misses +misses +identifying +resolving +bottlenecks +overhead +monitoring +mechanisms +also +significantly +improve +overall +system +performance +recent +project +explored +policies +monitorapplication +memory +reference +patterns +order +identify +resolvetlb +performance +problems +poor +performance +results +tlbis +small +cover +current +application +working +severalmodern +architectures +support +superpages +pages +whose +size +amultiple +system +base +page +size +systems +tlbperformance +improved +using +larger +pages +cost +ofwasted +memory +internal +fragmentation +simulated +several +policies +adapt +page +size +dynamically +todifferent +regions +application +address +space +constructingsuperpages +copying +component +pages +contiguous +region +ofmemory +developed +policy +monitors +misses +balancesthe +potential +benefit +superpage +reduction +future +tlbmisses +cost +constructing +superpage +memorycopy +constructing +superpages +misspatterns +warrant +policy +attains +performance +largepages +without +internal +fragmentation +details +project +paper +reducing +memory +overhead +using +online +superpage +promotion +isca +appear +looking +someone +implement +algorithms +would +makea +good +quals +masters +project +project +description +peoplefaculty +brian +bershad +bershad +washington +anna +karlin +karlin +washington +current +students +dennis +dlee +washington +wayne +ohlrich +ohlrich +washington +romer +romer +washington +wayne +wong +waynew +washington +papers +reducing +memory +overhead +using +online +superpage +promotion +romer +ohlrich +karlin +bershad +isca +appear +dynamic +page +mapping +policies +cache +conflict +resolution +standard +hardware +romer +bershad +chen +osdi +avoiding +conflict +misses +dynamically +large +direct +mapped +caches +bershad +romer +chen +asplos +comparison +memory +performance +mips +alpha +wong +quals +project +report +university +washington +instruction +cache +effects +different +code +reordering +algorithms +quals +project +report +university +washington +memory +systems +bibliography +romer +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..92bb1fe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,149 @@ +project +rocky +architectural +performance +interpreted +languages +project +rocky +architectural +performance +interpreted +languages +project +description +interpreted +languages +become +increasingly +popular +last +several +years +part +demands +portability +safety +ease +project +examines +performance +interpreted +languages +environments +several +perspectives +interpretation +strategy +implementation +processor +architecture +utilization +basis +study +collected +benchmarks +microbenchmarks +implemented +several +interpreted +languages +perl +java +mipsi +using +various +instrumentation +tracing +techniques +evaluate +performance +characteristics +benchmarks +order +gain +insight +similarities +differences +languages +execution +environments +peoplefaculty +jean +loup +baer +baer +washington +brian +bershad +bershad +washington +henry +levy +levy +washington +students +dennis +dlee +washington +romer +romer +washington +geoff +voelker +voelker +washington +alec +wolman +wolman +washington +wayne +wong +waynew +washington +papersromer +voelker +wolman +wong +baer +bershad +levy +structure +performance +interpreters +asplos +appear +abstractpostscriptjava +measurements +xjava +source +files +benchmarkstoolsto +collect +information +performance +applications +vebeen +building +binary +rewriting +tool +called +etch +etch +yetpublicly +available +read +etchhome +page +internal +documentationproject +internal +documentation +available +people +last +updated +july +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..906a5336 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,125 @@ +richard +rogers +home +page +richard +rogersrrogers +washington +educomputer +science +engineering +departmentuniversity +washington +seattle +usaoffice +chateau +sieg +phone +intelligent +systems +laboratry +phone +research +developed +system +software +systolic +cellular +array +machine +scam +massively +parallel +image +processing +computer +software +includes +compiler +basic +image +processing +morphology +libraries +simulator +obtain +software +papers +scam +currently +work +document +layout +extraction +intelligent +systems +also +helping +produce +document +groundtruth +database +optical +character +recognition +community +science +camps +director +computer +facilities +northwest +center +environmental +education +ncee +offers +summer +science +camp +students +ages +washington +beautiful +juan +islands +also +work +science +splash +program +seattle +university +splash +year +long +national +science +foundation +funded +science +program +grade +minority +girls +seattle +area +interests +corn +snakes +jessica +squishy +order +increasing +length +kuow +national +public +radio +stationi +bake +best +pecan +seattlelast +modified +february diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..4287cb77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,22 @@ +mike +home +page +mike +salisburysalisbur +washington +computer +science +engineering +department +university +washington +seattle +usaoffice +chateau +sieg +lifehistory +school +home +friends +vitacool +stuff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..7478ff53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,307 @@ +stefan +savage +stefan +savage +savage +washington +work +computer +science +engineering +department +university +washington +seattle +home +seattle +sampling +rich +post +industrial +culture +modern +pittsburghfor +years +caught +ride +bershad +migration +mnow +gradual +student +first +rank +strongbackground +century +american +history +provides +witha +firm +irrelevant +platform +trash +talk +peers +fool +tocqeuville +statement +americans +find +tiresome +inconvenience +exercisepolitical +rights +distract +industry +quite +similar +tocurrent +microprocessor +architectural +trends +favoring +needs +ofapplication +code +operating +systems +work +brian +bershad +rest +merry +band +onan +operating +system +project +called +spin +projectsspinspin +extensible +operating +system +omnifemtokernel +whichsupports +dynamic +adaptation +system +interfaces +andimplementations +direct +application +control +stillmaintaining +system +integrity +inter +application +isolation +things +writingspin +papers +extensibility +safety +performance +spin +operating +system +proceedings +symposium +operating +system +principles +sosp +copper +mountain +december +slides +talk +language +support +extensible +operating +systems +proceedings +first +workshop +compiler +support +system +software +wcsss +tucson +writing +operating +system +modula +proceedings +first +workshop +compiler +support +system +software +wcsss +tucson +protection +software +issue +proceedings +fifth +workshop +topics +operating +systems +hotos +orcas +island +issues +design +extensible +operating +system +proceedings +first +usenix +symposium +operating +system +design +implementation +osdi +monterey +november +panel +abstract +longer +unpublished +version +paper +spin +extensible +microkernel +application +specific +operating +system +services +proceedings +sixth +sigops +european +workshop +matching +operating +systems +application +needs +version +appeared +operating +systems +review +january +spin +extensible +microkernel +application +specific +operating +system +services +university +washington +tech +report +march +afraid +paper +afraid +frequently +redundant +array +independent +disks +proceedings +winter +usenix +technical +conference +diego +january +best +student +paper +slides +talk +reservation +papers +processor +capacity +reserves +operating +system +support +multimedia +applications +proceedings +first +ieee +international +conference +multimedia +computing +systems +boston +processor +capacity +reserves +abstraction +managing +processor +usage +proceedings +fourth +workshop +workstation +operating +systems +wwos +napa +october +processor +capacity +reserves +multimedia +operating +systems +carnegie +mellon +tech +report +real +time +mach +paper +real +time +mach +timers +exporting +time +user +proceedings +third +usenix +mach +symposium +santa +april +slides +talk +interests +music +hikingthis +construction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..0d6f7d4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,12 @@ +sean +sandys +sean +david +sandys +washington +computer +science +washington +last +revised +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..676f33e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,35 @@ +richard +segal +home +page +richard +segaldepartment +computer +science +engineering +university +washingtonbox +seattle +segal +washington +personal +biography +better +half +family +pictures +research +overview +brute +internet +softbot +publications +curriculum +vitae +postscript +amusements +archery +bicycling +racquetball +skiing +softball diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..b3c5ad55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,179 @@ +stefan +bergstefan +berg +work +home +sieg +hall +university +washington +seattle +university +seattle +phone +phone +email +sgberg +washington +contents +address +information +past +projects +activities +current +projects +activities +interesting +pages +finger +information +stefan +born +cologne +germany +spring +completed +mittlere +reife +thgrade +schillergymnasium +cologne +coming +united +statesto +receive +high +school +diploma +bloomington +high +school +north +indiana +stefan +received +bachelor +science +honors +distinctionin +field +computer +science +fromindiana +university +momenthe +working +towards +university +washington +expected +completion +date +sometime +thiscentury +past +projects +activities +project +implementation +reduction +machine +teaching +assistant +project +comparison +hardware +software +solutions +false +sharing +teaching +assistant +project +study +linear +time +sorting +algorithms +teaching +assistant +current +projects +activities +quals +project +comparison +hardware +software +solutions +false +sharing +interesting +pages +pictures +people +around +rafting +pictures +bookmarks +exciting +square +university +washington +moment +weather +seattle +print +yourselfsomething +crazy +didn +even +come +particularsolution +implementation +done +sall +line +shouldn +contain +trailing +carriage +return +bytes +compiles +without +warning +program +print +exact +source +code +itin +fewer +bytes +like +putchar +char +else +else +char +main +char +putchar +char +else +else +char +main +char +printf +printf +printf +printf +stefan +resume +available +inpostscript +andtex +format diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..9e5f7d1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,67 @@ +ward +shadegreetings +salutations +third +year +grad +student +hereat +dubcse +interactive +renderingof +complex +scenes +currently +thing +thing +follow +projects +link +walkthruproject +amonglots +pictures +animations +find +siggraph +paperdescribing +recent +work +shortcut +click +thepicture +island +lower +left +corner +page +lots +interesting +work +going +many +different +aspectsof +computer +graphics +thegraphics +imaging +laboratory +gets +done +contact +info +daily +schedule +travel +plans +projects +publications +pictures +page +looks +scrunched +make +browser +least +pixels +wide diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..45cf442e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,37 @@ +shun +leungshun +leung +student +department +computer +science +andengineering +university +ofwashington +working +prof +johnzahorjan +pointers +research +research +summary +publications +curriculum +vitae +upon +request +shun +leung +department +computer +science +engineering +university +washington +seattle +email +shuntak +washington +last +modified +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..4f2edf1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,87 @@ +welcome +shuichi +home +page +shuichi +koga +skoga +washington +graduate +studentcomputer +science +engineering +departmentuniversity +washington +name +shuichi +koga +haven +noticed +bynow +started +graduate +studies +university +ofwashington +haven +quite +figured +myquals +much +less +dissertation +graduated +universityof +virginia +degree +mathematics +alsoheavily +involved +asian +studies +foreign +relations +andgovernment +departments +originally +slated +also +degreein +asian +studies +also +heavily +involved +user +interface +groupand +computer +sciencedepartment +working +project +called +alice +since +anyways +take +look +picture +smaller +shuichi +means +finger +info +current +schedule +neat +hypertext +links +hunt +destroy +bugs +shuichi +koga +skoga +washington +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..57f895ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,59 @@ +soha +hassoun +home +page +soha +washington +soha +hassounit +year +graduate +school +university +washington +computer +science +engineering +dept +circuit +designer +whoturned +developer +currently +working +onarchitectural +retiming +professor +carlebeling +weekly +schedule +business +current +previous +research +current +education +experience +publications +patent +chaos +groups +dept +professional +interesting +vlsi +sites +information +little +deedee +photo +gallery +address +computer +science +engineering +department +university +washington +seattle +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..b4ba0354 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,136 @@ +sujay +parekh +home +page +sujay +parekh +work +department +computer +science +engineering +sieg +hall +chateau +guggenheim +annex +university +washington +seattle +home +seattle +quarter +classes +seminar +systems +seminar +french +french +research +simultaneousmultithreading +particular +issues +related +tomultithreaded +architectures +softbot +project +evaluated +simon +softbot +employs +procedural +search +controlsystem +control +actions +report +construction +design +removable +patio +conventional +workspaces +interested +funding +project +please +contact +sports +spuds +soccer +right +sort +bookmarks +pages +like +keep +track +interests +academic +cognitive +science +distributed +parallel +systems +psychology +philosophy +tennis +soccer +sailing +squash +volleyball +ballroom +dancing +food +cornell +computer +science +oracle +corporation +stottler +henke +associates +done +resume +random +personal +info +favorite +foods +oondhiu +mangoes +phad +thai +kung +chicken +favorite +beverages +screwdriver +scotch +long +island +iced +favorite +dances +tango +swing +east +west +coast +salsa +favorite +rock +music +dire +straits +pink +floyd +phil +collins +genesis +peter +gabriel +petty +sparekh +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..b08118f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,68 @@ +erik +selberg +improved +home +page +name +erik +speed +washington +academic +speed +almost +live +fishcam +addresses +lara +lewis +memorialhappy +link +people +sports +food +drink +culture +mary +kaye +office +friends +family +fish +disc +drives +ultimate +pasty +powered +uring +machines +mountain +biking +spuds +softball +skiing +utah +raquetball +pros +college +wedgwood +house +diet +pepper +salt +lake +roasting +company +bean +bagel +speed +racer +star +wars +tiny +toons +pinky +brain +phantom +babylon +comics +erik diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..c5eea59c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,151 @@ +home +page +sung +choiwelcome +thehomepage +ofsung +eunchoi +myschoollifemy +primary +research +interest +compiling +parallel +programming +languages +involved +zplcompiler +project +university +washington +lately +beenspending +time +thinking +optimized +communicationgeneration +using +architechture +independent +communicationlibrary +ironman +addition +experimenting +simulating +data +parallel +programson +superscalar +processors +goal +work +improve +nodeperformance +coming +generations +parallel +machines +alsobeen +seen +hanging +chaosrouter +group +work +simulator +including +graphical +front +visualization +thatexperience +currently +implementing +another +router +simulator +inzpl +finally +also +little +astronomy +quarter +taing +enjoy +watching +movies +mostly +comfort +home +like +yeah +vegetarian +since +myjunior +year +college +drink +dinner +would +samewithout +good +wine +result +must +exercise +quite +play +twosoccerteams +cousin +scrubs +division +cooper +recdivision +last +season +scrubs +came +second +place +andcooper +division +unfortunately +recently +sacrifiedmy +left +knee +game +playing +soccer +taking +usualstep +aerobicsclass +instead +find +trying +swim +weight +trainingclass +like +good +people +world +read +books +take +abit +shakespeare +watch +publictelevision +listen +classicalmusic +myotherlife +sung +choi +sungeun +washington +department +computer +science +engineering +university +washington +seattle diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..a8c1a5c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,66 @@ +nguyen +nguyen +department +computer +science +engineeringuniversity +washingtonbox +seattle +washington +work +world +research +interests +include +operating +systems +distributed +andparallel +systems +networking +security +currently +help +frommy +advisor +johnzahorjan +building +system +support +running +soft +real +timeapplications +visualization +partially +idle +workstations +innows +recently +completed +study +runtime +measurements +ofapplication +characteristics +used +runtime +system +tominimize +application +execution +time +uniprogrammed +multiprocessorsenvironments +well +system +scheduler +make +goodglobal +scheduling +decisions +multiprogrammed +multiprocessorsenvironments +cvpublications +worldvietnameseresources +netcyclingplayground diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..b34e0c5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +tian +homepageyour +browser +rusty +yellow +turkey +frames +even +part +html +standard +click +frames +homepage diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..e561e778 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,217 @@ +ashutosh +tiwary +ashutosh +tiwary +tiwary +washington +department +computer +science +engineering +university +washington +seattle +mostly +full +time +fourth +year +graduate +student +work +area +single +address +space +operating +systems +opal +persistent +object +systems +object +oriented +databases +application +workload +measurement +operating +system +support +databases +past +worked +infrastructures +user +interfaces +distributed +object +systems +spare +time +work +computer +science +group +research +technology +organization +boeing +computer +services +oopsla +workshop +building +large +distributed +software +systems +using +objects +oopsla +workshop +objects +large +distributed +persistent +software +systems +projectsopali +working +distribution +opal +opal +operating +systems +project +addresses +issues +opportunities +involved +creating +single +global +address +space +across +multiple +users +machines +jeff +chase +primary +architect +opal +hank +levy +advisor +working +closely +opal +also +advisor +application +workload +measurement +also +work +measuring +characterizing +behavior +persistent +object +applications +general +techniques +paper +work +area +distributed +object +systems +worked +several +distributed +object +systems +professional +career +thisexperience +basis +oopsla +workshop +building +large +distributed +software +systems +using +objects +organized +follow +oopsla +workshop +objects +large +distrbuted +persistent +software +systems +publications +using +virtual +addresses +object +references +chase +levy +tiwary +proc +international +workshop +object +orientation +operating +systems +september +exception +handling +parallel +distributed +environment +tiwary +levy +ecoop +workshop +exception +handling +july +building +large +distributed +systems +using +objects +tiwary +bosch +addendum +proceedings +oopsla +oops +messenger +october +evaluation +system +application +benchmark +tiwary +narasayya +levy +oopsla +workshop +object +database +behavior +benchmarks +performance +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..871cb108 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,103 @@ +tessa +tessa +another +dead +information +superhighway +second +yeargraduate +student +computer +science +university +washington +research +interests +include +sorts +relatedgoodies +currently +working +clio +system +searching +andbrowsing +personal +history +available +currently +seeking +gainful +employment +myresume +online +curious +kittyi +honor +sharing +apartment +gambit +siamese +cats +great +memore +information +found +thereare +pictures +page +tofind +apparently +alsoreally +interested +scotland +classesi +still +working +quals +quarter +taking +last +ofeight +classes +fulfill +breadth +requirement +digital +systems +seminarlinux +gameseverybody +plays +games +maintain +linux +gametome +committed +advancement +linux +pretty +coolgaming +platform +also +first +attempt +java +programming +simple +maze +applet +also +java +linux +sleepingi +known +frequent +seattle +area +bookstores +also +knit +crochet +copyright +tlau +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..46937913 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,55 @@ +martin +tompa +martin +tompa +department +computer +science +engineering +university +washington +seattle +phone +receptionist +lecture +notes +articles +computing +trajectory +thelma +louise +recent +holiday +moon +pearls +among +wash +oysters +collaborative +surrealistic +electronic +prophecy +building +across +pierce +lane +carol +martin +photographer +photo +courtesy +health +sciences +center +educational +resources +provide +many +images +university +washington +martin +tompa +finger +tompa +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..370d0e81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,146 @@ +tracy +kimbrel +held +prisoner +tracy +kimbrel +held +prisoner +tracy +kimbrel +held +prisoner +university +washingtonsince +without +charge +trial +moved +years +toanother +seattle +area +prison +inmates +forced +tomanufacture +airplanes +escaped +institution +wasrecaptured +returned +university +washington +help +hisplight +rescue +imprisonment +list +things +done +curriculum +vitae +imprisoned +details +statement +ofwhat +promises +history +goals +free +captors +department +computer +science +engineering +university +washington +seattle +tracyk +washington +eduhere +captors +force +trace +driven +comparison +algorithms +parallel +prefetching +cachingtracy +kimbrel +andrew +tomkins +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +appear +sigops +usenix +association +symposium +onoperating +system +design +implemenation +near +optimal +parallel +prefetching +cachingtracy +kimbrel +anna +karlin +appear +ieeesymposium +foundations +computer +science +longer +version +integrated +parallel +prefetching +caching +page +extended +abstract +tracy +kimbrel +edward +felten +anna +karlin +proceedings +sigmetrics +conference +measurementand +modeling +computer +systems +probabilistic +algorithm +verifying +matrix +products +usingo +squared +time +base +random +bits +tracy +kimbrel +rakesh +kumar +sinha +information +processing +letters diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..ef0aa733 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,63 @@ +travis +craig +home +page +travis +craig +travis +washington +computer +science +engineering +department +university +washington +seattle +research +interests +mechanisms +predictability +real +time +systems +cache +restoration +queuing +spin +locks +arctic +submarine +currents +courses +taking +quarter +dissertation +working +real +time +systems +time +consuming +side +projects +working +half +time +esca +corporation +helping +keep +volvo +running +press +latest +motor +pool +status +understand +computer +science +travis +craig +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..39de039e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,16 @@ +dean +tullsen +home +page +dean +tullsen +biographical +information +research +interests +bibliography +home +page +download +resumemy +hobbies diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..22f3110e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,325 @@ +michael +vanhilstmichael +vanhilstvanhilst +washington +edumvh +harvard +edupersonal +research +computer +science +engineering +department +university +washington +seattle +usaclick +send +email +message +mike +vanhilst +personalmike +starting +year +graduate +student +university +washington +hopefully +little +luck +finished +around +theend +winter +quarter +immediately +prior +coming +udub +mike +worked +contractor +atibm +research +wrote +motif +widgets +user +unterface +sdata +explorer +mike +start +programmer +fixing +maintainingcomputer +hardware +smithsonian +astrophysical +observatory +part +smithsonian +located +within +harvard +learning +debug +software +mike +could +convince +programmers +hardware +really +working +correctly +mike +stayed +smithsonian +years +time +wrote +program +called +saoimagewhich +used +lots +astronomers +look +images +saoimage +part +gnudistribution +mike +would +like +thank +bill +wyatt +eric +mandel +schwarz +doug +minkfor +starting +guiding +continuing +project +mention +countless +others +contributed +mike +took +year +work +group +seismologistsin +paris +france +data +acquisition +calibration +analysis +truly +wonderful +time +paris +wife +angela +french +language +class +thealliance +francaise +angela +come +paris +year +native +colombiain +south +america +summer +mike +wrote +front +studentsbrowse +university +time +schedule +data +base +uwin +working +talented +staff +computing +communications +folks +brought +pine +special +thanks +bill +shirey +design +tracy +stenvik +wrote +uwin +screen +library +machine +uwin +working +time +schedule +mike +also +taught +beginning +programming +extension +motivated +group +frommicrosoft +product +support +sacrificed +summer +learn +recently +mike +presented +papers +theinternational +symposium +object +technologies +advanced +software +isotas +conference +object +oriented +programming +systems +languages +applications +oopsla +theacm +sigsoft +symposium +foundations +software +engineering +also +presented +poster +oopsla +made +presentations +subjectivity +workshops +oopsla +oopsla +participated +doctoral +symposium +oopsla +participated +demo +uist +thanks +steve +earlier +life +mike +earned +degrees +inarchitecture +wooden +kind +city +planningfrom +mitand +worked +community +development +director +forthe +city +grinnell +iowa +things +works +different +skills +visualdesign +problem +solving +continue +value +still +gets +talk +chris +alexander +seattle +mike +active +student +chapter +washington +software +association +improving +ties +students +large +small +software +companies +area +enjoys +hiking +cross +country +skiing +sailing +andkayaking +also +enjoys +swimming +lake +bronson +recently +mike +free +time +taken +marco +harold +sebastien +hilst +born +mike +post +pictures +soon +locates +another +photo +scanner +visits +since +november +michael +vanhilst +last +modified +friday +november diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..da91b4dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,122 @@ +hello +vassilylong +live +hello +start +really +person +best +linki +come +thus +http +washington +homes +vass +useful +documents +also +shortest +write +young +collected +quite +links +downloaded +fewfiles +expanding +collection +since +thisstuff +selected +links +quotesrussian +home +pagesvarious +linksguides +html +formsother +links +home +pageuw +home +pagencsa +mosaic +home +pagerecently +joined +cecilproject +cecil +cool +pure +object +oriented +language +andvortex +even +cooler +compiler +infrastructure +befaster +maybe +papers +written +member +staff +designed +implemented +themvis +system +assisted +access +ourdepartmental +room +visitor +database +recently +beenupgraded +staff +people +quals +project +expanding +thezpl +languageto +handle +irregular +data +structures +represented +graphs +anddynamically +repartitioned +data +graphs +arrays +myquals +writeup +pages +short +overview +check +theslidesfrom +presentation +pages +slides +also +collected +links +toresources +related +project +vass +washington +eduobjects +mirror +closer +appear +pastor +vybrasyvalsya +okna +pyatyi +deystvoval diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..4aed6054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,77 @@ +virgil +bourassa +home +page +uwvirgil +evan +bourassavirgil +bourassa +virgil +washington +student +department +computer +science +engineering +theuniversity +washington +seattle +washington +research +interestsinclude +computer +operating +systems +architectures +joined +boeingin +works +scientist +computer +scienceorganization +information +support +services +division +bellevue +washington +received +electrical +engineering +arizonastate +university +tempe +arizona +electricalengineering +university +washington +seattle +washington +computer +science +engineering +theuniversity +washington +accesswhat +working +interests +expertise +resume +patents +inventions +publications +presentations +professional +history +education +achievements +recommendation +letter +statusoccasional +updates +last +modified +virgil +bourassa +virgil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..9f5fa970 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,300 @@ +vivek +ratan +home +page +vivek +ratan +particulars +graduate +student +computer +science +department +university +washington +currently +academic +leave +working +research +scientistat +bellcore +morristown +researchwork +bellcore +interested +distributed +computing +issues +distributed +systems +application +system +development +high +availability +currently +work +distributed +systems +group +bellcore +high +availability +distributedsoftware +systems +simply +highly +available +system +continues +presence +failures +group +developing +toolset +anatidae +provides +high +availability +mechanisms +distributed +applications +adhere +corba +standard +also +looked +active +replication +schemes +high +availability +indce +applications +recent +effort +focussed +enhancing +availability +fault +tolerance +cell +directory +service +details +projects +foundhere +also +interested +high +availability +issues +integrationof +distributed +technology +servers +university +washington +worked +murphy +project +software +safety +methodologies +software +safety +group +headed +bydr +nancy +leveson +details +safety +research +found +safety +page +much +work +involved +looking +automatic +fault +tree +generation +fromrequirements +specifications +written +rsml +well +working +rsml +language +simulator +publications +list +recent +publications +found +personal +information +born +brought +india +last +eight +years +undergraduate +attended +angelo +state +university +angelo +wesleyan +university +middletown +received +physics +math +computer +science +right +came +university +washington +seattle +puruse +graduate +studies +computer +science +like +play +tennis +whenever +squash +lesserextent +racquetball +suffice +ardently +folow +game +cricket +participating +ultra +cricket +play +email +cricket +simulation +game +many +years +also +follow +exploits +supersonics +mariners +cowboys +last +year +half +taken +keen +interest +learning +ballroom +dancing +waltz +foxtrot +chacha +rhumba +tango +swing +west +coast +please +occasional +dabbles +mambo +area +best +places +learning +dancing +seattle +center +used +band +session +every +saturday +involved +model +united +nations +chapter +model +united +nations +international +educational +organization +simulates +workings +conferences +held +throughout +year +current +topics +restructuring +reforming +parts +like +security +council +ecosoc +world +bank +rapidpopulation +growth +nuclear +proliferation +home +page +chapter +interests +reading +poetry +mirza +ghalib +centuryindian +poet +also +interested +english +literature +especially +romantic +victorian +periods +links +obligatory +collection +sitesthat +tend +visit +often +department +computer +science +engineering +university +washington +seattle +vivek +washington +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..54528a2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,37 @@ +geoff +skywhoi +graduate +student +university +washington +inseattle +whati +master +thesis +wireless +mobile +computing +designing +andbuilding +system +called +mobisaic +currently +avoiding +settling +thesis +topic +wherechateau +guggenheim +annex +university +washingtonseattle +washington +looking +emacs +windows +windows +geoff +voelker +voelker +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..9b7d663c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,64 @@ +home +wayne +home +computer +science +engineering +department +university +washington +seattle +waynew +washington +stuff +currently +looking +memory +system +performance +particular +differentmemory +system +organizations +investigated +work +beingdone +jean +loup +baer +also +looking +interpreters +others +dennis +geoff +alec +rightnow +things +little +rocky +actually +early +version +paper +things +keep +jumping +alpha +interesting +places +fishing +list +cool +sites +however +list +peoplewho +list +people +well +testing +testwayne +wong +waynew +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..655d0a4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,16 @@ +william +chan +home +pagewilliam +chan +home +pagei +spend +time +hell +spare +time +hang +heaven +wchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..a4c31cce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,179 @@ +daniel +weld +daniel +weld +associate +professor +computer +science +engineeringat +university +ofwashington +received +bachelor +degrees +computer +science +biochemistry +yale +university +landed +artificial +intelligence +received +presidential +young +investigator +award +office +naval +research +younginvestigator +award +weld +theadvisory +board +journal +airesearch +guest +editor +computational +intelligence +edited +aaai +report +role +ofintelligent +systems +national +information +infrastructure +isco +program +chair +aaai +weld +published +books +scads +technical +papers +personal +data +reach +office +sieg +hall +phone +work +home +mail +dept +computer +science +engineering +university +washington +seattlewa +research +interests +weld +current +research +interests +artificial +intelligence +specifically +software +agents +planning +example +weld +group +supports +ucpop +planner +used +almost +hundred +sitesworldwide +many +weld +papers +available +electronically +arehis +current +favorites +representing +sensing +actions +middle +ground +revisited +planning +gather +information +aaai +august +planning +based +control +software +agents +aips +ascalable +comparison +shopping +agent +world +wide +january +softbot +based +interface +internet +cacm +july +anintroduction +least +commitment +planning +magazine +winter +select +exhaustivelist +recreation +absent +office +weld +foundat +cafe +allegro +stormymountains +climbing +past +enjoyed +traveling +theworld +likely +found +playing +twin +boys +adam +galen +invited +visit +gallery +pacific +northwest +desert +wilderness +photographs +also +illustrated +story +morocco +weld +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..df6346bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,13 @@ +wendy +belluomini +wendy +belluomini +wendy +washington +graduated +masters +currently +working +univ +utah +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..074ab669 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,67 @@ +wilson +hsiehwilson +hsiehi +postdoc +thedepartment +computer +science +engineering +theuniversity +washington +inseattle +member +thespin +project +received +thedepartment +electrical +engineering +computer +sciencein +theschool +engineeringatmit +worked +thelaboratory +computer +science +advisors +werefrans +kaashoekandbill +weihl +research +compilation +parallel +systems +myresearch +interests +interactions +among +compilers +programminglanguages +runtime +operating +systems +architectures +selected +publicationsselected +linkspersonal +interestswilson +hsieh +department +computer +science +engineering +university +washington +seattle +office +sieg +moved +phone +numberhas +changed +voice +whsieh +washington +public +keyoctober diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..1849bcc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,103 @@ +alec +wolman +home +page +alec +wolmanwolman +washington +eduworkcomputer +science +engineering +departmentuniversity +washingtonbox +seattle +home +seattle +currently +graduate +student +thecomputer +science +departmentat +university +washington +office +isroom +thechateau +gradual +school +worked +fordigital +equipment +corp +cambridge +research +research +interests +include +operating +systems +networking +architecture +current +recent +projects +scalable +networking +performance +etch +binary +instrumentation +optimization +executablesrocky +interpreter +performanceon +line +papers +firewall +application +relays +treese +wolman +summer +usenix +latency +analysis +network +wolman +voelker +thekkath +winter +usenix +structure +performance +interpreters +romer +voelker +wolman +wong +baer +bershad +levy +appear +asplos +hungry +otter +fixhas +strange +ideas +nervous +habit +really +play +guitar +wolman +links +wolman +hallwolman +diseasewolmanized +pressure +treated +lumber +wolman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..0c49408a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,132 @@ +xiaohan +xiaohan +xqin +washington +computer +science +engineering +department +university +washington +seattle +office +sieg +phone +year +graduate +student +working +jean +loup +baer +research +interests +include +computer +architectures +parallel +distributed +systems +performance +evaluation +methods +modeling +simulation +short +term +goal +school +soon +possible +papers +performance +evaluation +cluster +based +architectures +baer +submitted +conference +performance +explicit +communicationprimitives +cache +coherent +multiprocessor +systems +baer +appear +proceedings +hpca +comparative +study +conservative +optimistic +trace +driven +simulations +baer +award +paper +simulation +symposium +page +optimistic +trace +driven +simulation +baer +tech +report +dept +computer +science +engineering +univ +washington +parallel +trace +driven +simulator +implementation +performance +baer +proceedings +international +conference +parallel +processing +page +graph +toolfor +monitoring +visualizing +basedmultiprocessor +performance +zhang +nalluri +journal +parallel +distributed +computing +june +page +performance +prediction +evaluation +parallel +processingon +numa +multiprocessor +zhang +ieee +trans +software +engineering +page +interesting +stuff +photos +chinareadings +chinesesearch +engine diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..579111d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,82 @@ +yasushi +saitoyasushi +saito +second +year +graduate +student +atdepartment +computer +science +engineering +university +washington +seattle +currently +workingwith +brian +bershad +thespin +project +address +andpersonal +info +meta +links +metacrawler +yahoo +desktop +reference +index +alta +vista +lycos +archie +tech +index +research +links +spin +internal +documentation +modula +info +time +schedule +surveying +operating +systems +transaction +service +spin +qual +project +sightseeing +japanese +links +random +info +javascript +apprentice +page +using +linux +connect +gateway +japanized +perl +patch +touch +type +trainer +dvorak +lesson +texts +yasushi +washington +want +finger +talk +trycanvas +washington +desktop diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..07b903a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,85 @@ +oren +zamir +home +pageoren +zamir +home +page +department +computer +science +engineeringuniversity +washingtonbox +seattle +office +chateau +zamir +washington +home +seattle +zamir +washington +edui +israeli +graduate +student +department +computerscience +engineering +university +washington +myundergraduate +degree +physics +mathematics +hebrewuniversity +jerusalem +israel +interests +fields +artificial +intelligence +softwareengineering +currently +working +line +clustering +algorithmsfor +internet +document +retrievals +basic +idea +help +userwith +internet +searches +resulting +hundreds +documents +workis +done +part +metacrawler +parallel +search +service +along +orenetzioni +erik +selberg +resume +available +pictures +things +like +diving +sinai +jeeptouring +skiing +pictures +last +rafting +trip +interesting +links diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..256daffe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,175 @@ +title +jean +loup +baer +professorand +adjunct +professor +electrical +engineering +received +diplome +ingnieur +electricalenginering +doctorat +cycle +computer +science +theuniversit +grenoble +france +ucla +prior +joining +university +washington +research +engineer +laboratoire +decalcul +universit +grenoble +member +digital +technologygroup +ucla +present +interests +parallel +anddistributed +processing +computer +systems +architecture +author +coauthor +papers +theseareas +author +textbook +computer +systems +architecture +computer +science +press +professor +baer +served +ieee +computer +science +distinguishedvisitor +national +lecturer +guggenheim +fellow +ieee +fellow +editor +journal +parallel +distributed +computing +journal +computer +languages +served +asprogram +chairman +international +conference +parallelprocessing +program +chairman +internationalsymposium +computer +architecture +general +chairman +ofthe +internationalsymposium +computer +architecture +currently +chair +sigarch +eighteen +students +completed +dissertation +professorbaer +direction +twelve +work +industry +research +laboratoriesand +inacademia +although +years +baer +hashad +difficulty +retaining +french +accent +courses +recent +research +projects +look +computer +architecture +page +projects +involved +cache +coherence +protocols +cluster +architectures +improved +protocols +single +systems +performance +software +primitives +clusters +appear +hpca +prefetching +uniprocessors +hardware +also +ieee +comparisonwith +blocking +caches +also +asplos +prefetching +multiprocessors +isca +impact +speculative +execution +caches +dennis +home +page +andisca +parallel +trace +driven +simulations +conservative +approach +also +icpp +optimisticapproach +comparison +also +distributed +simulation diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..3a4a7638 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,149 @@ +craig +chambers +craig +chambers +assistant +professor +joined +faculty +received +degree +computer +science +computer +science +stanford +chambers +research +interests +design +implementationof +advanced +programming +systems +incorporating +expressive +programminglanguages +efficient +implementations +supportive +programmingenvironments +currently +investigating +object +oriented +languagesand +leads +ceciland +vortex +projects +cecil +purely +object +oriented +languageserving +vehicle +investigating +multi +methods +static +typing +modules +features +vortex +optimizing +compilersystem +object +oriented +languages +incorporating +intra +andinterprocedural +static +analyses +profile +guided +optimizations +withfront +ends +cecil +modula +java +previously +chamberswas +member +self +project +chambers +also +member +spinproject +spin +extensible +operating +system +microkernel +whichsupports +dynamic +adaptation +system +interfaces +implementationsunder +direct +application +control +still +maintaining +systemintegrity +isolating +applications +spin +utilizes +dialect +themodula +language +pointer +safe +kernel +extension +language +spinalso +relies +dynamiccompilation +achieve +high +performance +despite +fine +grainedextensibility +click +herefor +information +undergraduate +graduate +level +researchprojects +areas +contact +informationprof +craig +chambersdepartment +computer +science +engineeringuniversity +washingtonbox +seattle +chambers +washington +mail +requiring +street +address +sieg +hall +room +last +updated +april +chambers +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..314d4935 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,138 @@ +carl +ebeling +home +page +carl +ebelingdepartment +computer +science +engineering +university +ofwashington +seattle +washington +voice +mail +ebeling +washington +office +sieg +hall +room +carl +ebeling +associate +professor +physics +wheatoncollege +computer +science +southern +illinoisuniversity +computer +science +carnegie +mellonuniversity +joined +carl +ebeling +research +interests +fall +categories +vlsiarchitectures +computer +aided +design +digital +systems +hasworked +number +vlsi +projects +including +hitech +chessmachine +apex +graphics +chip +drawing +spline +curves +andsurfaces +triptych +field +programmable +gate +array +currently +heis +involved +chaos +project +building +multicomputer +routingnetwork +interests +focus +methods +optimizing +theperformance +circuits +using +level +sensitive +latches +placementand +routing +algorithms +fpgas +particularly +triptych +teachingspring +advanced +logic +designoffice +hours +monday +thursday +travel +april +fccm +napamay +burlington +chicagojune +vegasresearch +projects +northwest +laboratory +integrated +systems +chaos +router +project +triptych +high +density +fpga +architecture +publications +journal +articles +conference +workshop +papers +graduate +students +soha +hassoun +neil +mckenzie +darren +cronquist +paul +franklin +amara +galleryelan +galleryebeling +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..03c8dcbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,48 @@ +steve +hanksuniversity +washingtondepartment +computer +science +design +agent +architecturesai +magazine +serious +links +home +page +spring +uncertainty +conference +information +uncertainty +page +group +page +links +seattle +restaurants +seattle +symphony +schedule +seattle +wine +opera +schedule +server +edita +gruberova +page +photos +carlo +maria +giulini +discography +sumac +information +tennis +news +hanks +washington +last +update diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..a51ea38e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,82 @@ +title +alistair +holden +professor +professor +electrical +engineering +originated +highlands +scotland +receivedhis +degree +university +glasgow +spent +years +withthe +british +broadcasting +corporation +engineering +division +graduateapprentice +spent +research +division +spent +year +yale +edison +fellowship +degree +phddegree +university +washington +dissertation +learningin +artificial +intelligence +interest +began +taking +coursefrom +colin +cherry +imperial +college +london +thebbc +initiated +computer +science +program +theuw +time +group +faculty +mostly +math +departmentsformed +group +within +graduate +school +currently +working +applications +knowledge +based +systems +verification +expert +systems +integrated +symbolic +neural +netmethodology +speech +understanding +computer +aided +design diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..bf672e46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,178 @@ +dick +karprichard +karpprofessor +ofcomputerscience +engineering +andadjunct +professor +ofmolecularbiotechnologyuniversity +ofwashington +karp +washington +eduawards +membershipsnational +medal +science +babbage +prize +berkeley +university +professor +fellow +turing +award +member +national +academy +sciencesmember +national +academy +engineeringfellow +american +academy +arts +sciencesfellow +american +association +advancement +sciencedistinguished +teaching +award +berkeley +academic +senate +class +chair +berkeleylanchester +prize +operations +research +society +america +institute +management +science +fulkerson +prize +american +mathematical +society +mathematical +programming +society +john +neumann +theory +prize +operations +research +society +america +institute +management +science +faculty +research +lecturer +berkeley +hermann +weyl +lecturer +institute +advanced +study +john +neumann +lecturer +society +industrial +appliedmathematics +miller +research +professor +berkeley +honorary +doctorates +georgetown +university +university +massachusetts +technion +university +pennsylvania +member +national +advisory +board +computer +professionals +forsocial +responsibility +presentmember +board +governors +weizmann +institute +science +presentmember +board +trustees +international +computer +scienceinstitute +presentselected +publications +combinatorics +complexity +randomness +turingaward +lecture +communications +constructing +perfect +matching +random +upfal +wigderson +combinatorica +probabilistic +analysis +partitioning +algorithms +traveling +salesman +problem +plane +mathematics +ofoperations +research +theoretical +improvements +algorithmic +efficiency +fornetwork +flow +problems +edmonds +journal +theacm +reducibility +among +combinatorial +problems +complexity +computer +computations +plenum +press +traveling +salesman +problem +minimum +spanningtrees +part +held +mathematical +programming +karp +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..6ff5d62e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,405 @@ +lazowska +ahalf +century +exponentialprogress +information +technology +university +washington +annualfaculty +lecture +vicepresident +gore +speech +eniac +thanniversary +celebration +congress +talks +like +georgejetson +support +policies +appropriate +forfred +flintstone +universityof +california +berkeley +invents +chinese +cooking +nathanmyhrvold +joinsedlazowska +theuwcse +faculty +trip +memory +laneed +lazowskaed +lazowska +professor +chair +thedepartment +computer +science +lazowska +mostlywearsties +pushes +paper +racks +frequent +flier +miles +graduate +students +seem +picked +onthis +mbquicktime +healso +hosts +lots +visitors +surprising +number +havefunnynoses +lazowska +member +board +directors +computing +researchassociation +members +include +essentially +allgraduate +departments +industrial +research +laboratoriesin +field +chair +ofcra +government +affairs +committee +serves +national +science +foundation +advisorycommittee +computer +information +science +andengineering +national +research +council +scomputerscience +telecommunications +board +member +person +technical +advisory +board +formicrosoft +research +personnationalsemiconductor +computer +science +systems +academicadvisory +council +board +directors +ofdata +corporation +scientific +advisory +board +forcable +howse +ventures +cascadia +fund +belongs +standing +advisory +committeesfor +thedepartment +eecsat +berkeley +andthe +departments +computer +science +atstanford +university +universityof +virginia +hongkong +university +science +technology +member +turing +award +selection +committee +completed +service +person +national +research +councilpanel +reviewing +multi +agencyhigh +performance +computingand +communications +program +brooks +sutherland +committee +andhas +recently +served +chair +committee +examinersfor +graduate +record +examinations +board +computer +science +test +chair +sigmetrics +association +computing +machinery +sspecial +interest +group +concerned +computer +system +performance +chair +software +systems +award +committee +program +chairof +symposium +operating +systems +principles +andeditor +ieee +transactions +computers +university +washington +addition +servingas +chair +thedepartment +computer +science +engineering +lazowska +chair +university +advisory +committee +onacademic +technology +recently +served +member +thecommittee +deanship +college +artsand +sciences +chair +review +committee +forthe +program +molecular +biotechnology +amember +performance +review +committee +deanof +engineering +selected +deliver +theuniversity +washington +annual +faculty +lecture +lazowska +fellowof +associationfor +computing +machinery +theinstitute +electrical +andelectronics +engineers +seventeenph +students +studentshave +completed +degrees +working +miscellaneous +links +integratedoverview +university +washington +thedepartment +region +also +apersuasion +player +version +intended +forlocal +consumption +theimpact +research +university +information +technology +perspective +uwcse +professional +masters +program +persuasion +playertopten +reasons +major +information +csebuilding +project +abbreviated +cvcomputingresearch +driving +informationtechnology +information +industry +forwardmassy +goldmanreport +alleging +cseph +production +issued +flawed +data +medianyears +conference +boardstudy +doctorate +programs +think +ahalf +century +exponentialprogress +information +technology +university +washington +annualfaculty +lecture +driver +information +highway +university +washington +saturdayseminar +november +testimonyto +houseappropriations +committee +concerning +april +testimonyto +house +science +committee +concerning +hpcc +october +vicepresident +gore +speech +eniac +thanniversary +celebration +february +interestinghome +pages +sometimes +demo +purposes +odeto +executive +vice +president +tallman +trask +departsfor +duke +university +universityof +california +berkeley +invents +chinese +cooking +nathanmyhrvold +joinsedlazowska +theuwcse +faculty +trip +memory +lanelazowska +downs +family +home +pagedirections +houseshilshole +aquatic +club +home +pagerecently +discoveredreview +grade +grade +poetryfinger +lazowska +washington +scheduleinformation +seeme +office +reflector +home +page +http +washington +homes +lazowska +lazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..af579eae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,130 @@ +title +alan +shaw +professor +graduated +bachelor +sdegree +engineering +physics +university +toronto +amaster +mathematics +stanford +university +incomputer +science +stanford +addition +facultyappointments +university +washington +starting +hasbeen +assistant +professor +computer +science +cornell +university +visiting +professor +fulbright +research +scholar +university +paris +guest +professor +informatics +zurich +research +associate +atthe +stanford +linear +accelerator +center +systems +engineer +theibm +corporation +current +research +interests +real +time +systems +software +specification +methods +professor +shaw +publicationsinclude +textbook +operating +systems +book +softwareengineering +introductory +computer +science +text +andan +edited +book +document +preparation +systems +served +memberof +editorial +committee +member +computer +sciencescreening +committee +fulbright +awards +associateeditor +journal +real +time +systems +associate +editor +ieee +transactions +software +engineering +among +things +supervised +many +theses +projects +fifteen +dissertations +including +distinguished +dissertation +half +former +students +academic +positions +half +work +living +professor +shaw +hobbies +include +good +food +trumpet +hiking +biking +hobbies +tennis diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..41cb7593 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,137 @@ +title +lawrence +snyder +professor +received +bachelor +sdegree +university +iowa +mathematics +economics +andin +received +carnegie +mellon +university +computerscience +visiting +scholar +university +washington +joined +faculty +permanently +serving +onthe +faculties +yale +purdue +visiting +scholarat +harvard +professor +snyder +research +ranged +proofs +theundecidability +properties +programs +design +developmentof +single +chip +cmos +microprocessor +quarter +horse +hecreated +configurable +highly +parallel +chip +architecture +thepoker +parallel +programming +environment +inventor +chaoticrouting +following +completion +blue +chip +project +nowprincipal +investigator +orca +project +nwlis +professor +snyder +associate +editor +journal +computerand +systems +sciences +parallel +systems +editor +journal +ofthe +area +editor +ieee +transactions +parallel +anddistributed +systems +served +national +science +foundationadvisory +committee +division +computer +research +participates +numerous +national +advisorycommittees +future +research +directions +parallel +computation +andcomputer +science +policy +served +distinguished +doctoraldissertation +award +selection +committee +chairing +program +chair +first +symposium +parallel +algorithmsand +architectures +addition +dozen +students +completed +doctoral +degreesunder +direction +professor +snyder +guided +numerous +masters +seniorprojects diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..eedd930c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,109 @@ +arun +somani +arun +somani +professor +professor +electrical +engineering +earned +msee +degrees +electrical +engineering +mcgill +university +montreal +canada +respectively +prior +worked +scientific +officer +govt +india +delhi +period +designed +developed +anti +submarine +warfare +system +indian +navy +professor +somani +research +interests +area +offault +tolerant +computing +interconnection +networks +computer +architecture +parallel +computer +systems +parallel +algorithms +currently +involved +three +major +projects +high +integrity +system +design +addressing +issues +related +tocache +memory +design +redundant +computer +systems +evaluation +tools +systems +congestion +control +fault +tolerance +broadband +networks +development +proteus +architecture +multiprocessor +system +automated +classification +objects +based +generalizedenhanced +hypercube +reconfigurable +interconnection +network +exploring +coarse +grain +parallelism +likes +cook +indian +food +hiking +play +bridge +table +tennis +tennis +information +dpcnl +proteus diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..a47792be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,269 @@ +steven +tanimoto +home +page +steven +tanimoto +professor +computer +science +engineering +adjunct +professor +electricalengineering +received +degree +fromharvard +princeton +joined +theuniversity +washington +faculty +years +teaching +atthe +university +connecticut +visiting +professorat +institut +programmation +university +paris +france +anda +visiting +scholar +linkping +university +sweden +since +hasalso +served +adjunct +member +department +electricalengineering +visiting +scientist +atkobe +university +japan +thinking +machines +corporation +cambridge +massachusetts +linkping +university +sweden +june +june +visiting +scientist +theinstitut +research +enseignement +superieur +techniquesde +electronique +ireste +university +nantes +france +addition +assisting +various +conferences +hasrecently +home +page +forimage +conference +devoted +image +processingand +communication +conference +take +place +bordeaux +france +professor +tanimoto +research +interests +include +computer +analysis +ofimages +particularly +using +parallel +processors +educational +technology +visual +programming +artificial +intelligence +currentlydirects +sponsored +project +mathematics +experiences +throughimage +processing +whose +objective +develop +personal +computer +softwarethat +motivates +grade +students +study +mathematics +written +coauthored +papers +edited +thebook +structured +computer +vision +author +textbook +entitled +elements +artificial +intelligence +introductionusing +lisp +published +common +lisp +editions +accompanyingsoftware +currently +working +book +subject +ofparallel +computation +image +processing +tanimoto +organized +ieee +computer +society +internationalworkshop +visual +languages +held +seattle +servedas +general +chair +meeting +bergen +norway +also +served +program +chair +international +conferenceon +pattern +recognition +subconference +parallel +computation +andas +program +chair +ieee +computer +society +conferenceon +computer +vision +pattern +recognition +served +programcommittees +numerous +conferences +image +processing +patternrecognition +general +chairman +ieee +computer +societyworkshop +computer +architecture +pattern +analysis +machineintelligence +currently +serves +steering +committee +theieee +symposiaon +visual +languages +served +editorialboards +journals +pattern +recognition +journal +visual +languages +computing +cvgip +image +understanding +served +editor +chief +ieeetransactions +pattern +analysis +machine +intelligence +addition +research +related +activities +tanimoto +served +chair +college +engineeringeducational +policy +committee +vice +chair +college +council +chair +elected +fellow +ieee +outside +computer +science +steve +tanimoto +enjoys +playing +jazz +andclassical +piano +music diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..58ca81c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,212 @@ +title +paul +young +professor +graduateof +antioch +college +received +hejoined +university +washington +seventeen +years +atpurdue +university +early +faculty +members +inperhaps +first +computer +science +department +united +states +also +national +science +foundation +postdoctoral +fellow +atstanford +served +faculty +reed +college +served +briefly +aschairman +computing +information +sciences +department +theuniversity +mexico +twice +taught +visiting +professorin +computer +sciences +division +university +california +berkeley +became +associate +dean +research +facilities +college +engineering +research +interests +theoretical +computer +science +emphasis +questions +computational +complexity +thegeneral +theory +algorithms +connections +mathematicallogic +author +coauthor +papers +area +iscoauthor +graduate +textbook +general +theory +algorithms +served +three +times +program +committee +symposiumon +theory +computing +served +executivecommittee +nominating +committee +special +interestgroup +theory +computing +sigact +also +chairmanof +program +committee +ieee +computer +society +annualsymposium +foundations +computer +science +focs +hasserved +vice +chairman +chairman +computer +society +stechnical +committee +mathematical +foundations +computing +also +served +program +committee +chair +programcommittee +structural +complexity +theory +conference +served +national +science +foundation +advisorysubcommittee +computer +science +served +chairman +thiscommittee +served +chairman +ofthe +computing +research +association +professor +young +served +editorial +boards +special +issues +information +control +annals +history +ofcomputing +currently +serves +editorial +boards +theoretical +computer +science +notre +dame +journal +formallogic +journal +computing +system +science +eleven +students +completed +doctoral +dissertations +underprofessor +young +direction +several +gone +dopostdoctoral +work +cornell +university +ofcalifornia +berkeley +eight +currently +hold +faculty +positions +avariety +universities +chosen +industrial +employment +professor +young +leather +motorcycle +jacket +reads +ratherthan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..b637b462 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,86 @@ +title +john +zahorjan +professor +graduated +frombrown +university +received +university +oftoronto +received +presidential +young +investigatoraward +primary +research +interests +areas +scheduling +parallelsystems +runtime +support +parallel +computations +applications +supportfor +mobile +computing +current +focus +scheduling +policies +support +continuous +mediaapplications +involving +real +time +audio +video +thegoal +provide +policy +system +interface +allows +applications +torespond +easily +changes +system +load +active +research +topics +include +techniques +runtime +parallelizationof +code +written +sequential +language +support +programs +exhibiting +bothcontrol +data +parallelism +programming +constructs +development +applications +intended +formobile +computing +platforms +zahorjan +editorial +boards +ieee +transactionson +software +engineering +computing +surveys diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..14a55ba1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,265 @@ +mathematics +experiences +image +processing +metip +mathematics +experiences +image +processing +metip +project +director +steven +tanimoto +department +computer +science +emphasize +practical +uses +mathematics +encourage +discussions +group +learning +encourage +exploratory +open +ended +learning +goal +metip +project +digital +image +processing +help +meet +theseobjectives +particular +developed +series +applicationsdesigned +allow +students +manipulate +digitized +images +choice +materials +intended +used +enrichment +activities +rather +part +astandard +classroom +curriculum +teachers +play +various +roles +withthese +activities +example +catalyze +student +learning +byleading +discussions +theconcepts +students +explored +computer +metip +project +currently +number +programs +allow +students +toexplore +mathematics +image +processing +pixel +calculator +click +order +free +copy +today +image +warper +click +order +free +copy +today +transform +programmer +click +order +free +copy +today +applications +developed +primarily +pentium +based +srunning +microsoft +windows +application +pixel +calculator +alsoavailable +apple +macintosh +volunteer +mathematicsteachers +participate +testing +experimental +learning +materials +transcriptproject +currently +designing +record +keeping +framework +willfacilitate +storage +personal +academicinformation +hard +disk +floppy +owned +student +list +people +working +themetip +project +closely +related +project +involved +study +ofmultiplayer +educational +activities +metip +project +working +tointegrate +activities +ideas +described +prospects +forthe +direct +distributed +image +databases +educational +image +processing +currently +project +collecting +experiences +users +itsxform +image +transformation +software +done +somethingfun +useful +software +please +know +putting +current +version +documentation +online +link +little +demonstrations +xform +beenput +together +graduate +students +took +seminar +winter +xform +programming +environment +integrated +witha +subset +common +lisp +offers +technical +essentials +newapproach +learning +teaching +computer +programming +fundamentalattraction +using +approach +students +learn +program +thecomputer +pursuit +creating +neat +visual +effects +digitalimages +portraying +people +things +interest +successfully +installed +software +would +like +discussteaching +programming +please +contact +links +related +projects +listed +metip +supported +part +national +science +foundation +undergrant +number +bricker +washington +tanimoto +washington +last +modified +tuesday +february diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..0c7b0344 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,65 @@ +university +washington +research +mobile +ubiquitous +computing +mobile +computing +university +washingtonhere +overview +mobile +computingresearch +projects +mobisaic +information +system +mobile +wireless +computing +environment +system +infrastructure +mobile +handheld +computing +task +graph +manager +application +infrastructure +coping +resource +variability +papers +survey +paper +describing +fundamental +challenges +field +programming +methodology +disconnected +operationdistributed +transactions +mobilecomputing +systemcontacts +prof +brian +bershadprof +gaetano +borriellomarc +fiuczynskigeorge +formanprof +hank +levygeoff +voelkerterri +watsonprof +john +zahorjan +last +updated +forman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..4f257915 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,215 @@ +cecil +vortex +projectuw +cecil +vortex +projectwelcome +home +page +cecil +vortex +projects +conduct +research +programming +language +design +implementation +emphasis +issues +object +oriented +languages +cecil +purely +object +oriented +language +intended +support +rapidconstruction +high +quality +extensible +software +cecil +incorporatesmulti +methods +simple +prototype +based +object +model +mechanism +tosupport +structured +form +computed +inheritance +module +basedencapsulation +flexible +static +type +system +allowsstatically +dynamically +typed +code +freely +vortex +optimizing +compiler +infrastructure +forobject +oriented +high +level +languages +targets +pureobject +oriented +languages +like +cecil +hybrid +object +orientedlanguages +like +modula +java +vortex +currently +incorporateshigh +level +optimizations +static +class +analysis +class +hierachyanalysis +profile +guided +receiver +class +prediction +profile +guidedselective +procedure +specialization +intraprocedural +message +splitting +automatic +inlining +static +closure +analyses +also +includes +acollection +standard +intraprocedural +analyses +commonsubexpression +elimination +dead +assignment +elimination +vortexcompiler +written +entirely +cecil +initial +beta +release +cecil +vortex +system +currentlyavailable +sparcs +running +either +sunos +sunos +solaris +send +mail +cecil +interest +request +message +body +subscribe +like +subscribe +list +ofinterested +parties +information +obtain +thebeta +release +recently +finished +technical +report +describes +much +implementation +research +information +project +overview +detailed +overview +project +goals +direction +postscript +version +also +available +project +members +current +past +project +members +papers +papers +written +sample +projects +list +sample +research +projects +available +people +uwcse +internal +documentation +project +internal +documentation +available +people +support +page +lists +sources +support +project +related +projects +pointers +object +oriented +language +implementation +projectslast +updated +august +cecil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..611dcfec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,45 @@ +grail +graphics +imaging +laboratorywelcome +home +page +grail +graphics +imaging +laboratory +theuniversity +washington +department +computer +science +engineering +information +people +courses +research +projects +publications +theses +software +data +cool +images +neighborhood +department +computer +science +engineering +university +washington +seattle +local +interest +grail +disk +usage +policy +comments +mtwong +washington +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..3795069c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,118 @@ +chaotic +routing +project +chaotic +routing +project +computer +science +engineering +department +university +washington +seattle +chaos +friend +mine +dylan +chaotic +peopleall +sorts +people +work +chaotic +routing +project +research +papers +technical +reports +repository +research +papersand +technical +reports +chaoticrouting +project +available +chaos +router +chip +chaos +router +chip +implementation +chaos +routing +algorithm +hardware +built +tested +micron +cmos +redesigned +micron +process +better +performance +simulator +chaos +router +simulator +powerful +simulator +simulate +allsorts +networks +routing +algorithms +includes +nice +graphicalfront +standards +presentation +results +pcrcw +discussed +presentationof +simulation +results +routing +algorithms +able +come +upwith +guidelines +presentation +results +research +groups +building +list +research +groups +thathave +webs +describing +research +routing +interconnection +networks +parallel +computer +routing +communication +workshop +pcrcw +pcrcw +held +univeristy +washington +seattle +proceedings +available +university +washington +home +page +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..1fdd2d93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,273 @@ +chinook +project +chinook +project +shinook +chinook +salmon +large +salmon +oncorhynchus +tshawytscha +pacific +amer +name +tribe +warm +wind +blows +east +rocky +mountains +warm +southerly +wind +west +rocky +mountains +rare +american +sled +doga +hardware +software +synthesis +toolfor +real +time +embedded +systems +chinook +hardware +software +synthesis +cadtool +embedded +systems +designed +control +dominated +reactivesystems +timing +constraints +chinook +maps +behavioral +descriptionto +user +target +architecture +filling +details +neededto +build +complete +system +enables +designers +make +informed +designdecisions +high +level +early +design +cycle +rather +reiterateafter +worked +level +details +retargetability +willnot +make +designs +maintainable +also +enables +designerto +take +advantage +technologies +instead +tied +legacycode +chinook +currently +active +development +generation +currentlywe +actively +working +software +architecture +synthesis +interprocessorcommunication +synthesis +efficient +accurate +simulation +moredetails +become +available +shortly +chinook +characterized +follows +meets +timing +constraints +ratherthan +trying +maximize +average +performance +utilization +assumesmanual +partitioning +believe +issues +intricateand +sometimes +even +technical +want +designers +evaluate +differentarchitectures +rather +forcing +single +fixed +processor +asicarchitecture +synthesize +software +architectures +rather +rely +onoff +shelf +real +time +kernels +discourage +retargeting +first +version +chinook +synthesis +tool +operational +innovember +version +shownat +design +automation +conference +diego +june +mainfeatures +include +automatic +connection +processor +peripheraldevices +generation +sequential +code +concurrent +description +andsynthesizing +device +drivers +inputs +verilog +outputs +hardwarenetlist +needed +connect +hardware +components +together +softwareprogram +processor +main +topics +include +interfacingproblem +hardware +software +components +scheduling +timingconstraints +partitioning +functionality +improved +version +demonstrated +nato +summer +school +swcodedesign +tremezzo +italy +june +incorporated +severalmore +interface +synthesis +techniques +including +memory +mapped +moreefficient +code +generation +simulator +chinookersfacultygaetano +borriellograduate +students +chou +ross +ortegaken +hinesian +macduff +recent +selizabeth +walkupscott +hauck +henrik +hulgaardstafflarry +mcmurchielist +paperschinook +sponsorsarpa +contract +national +science +foundation +grant +national +science +graduate +fellowship +walkup +patricia +roberts +harris +fellowship +ortega +graduate +fellowship +chou +embedded +links +department +computer +science +engineering +universityof +washington +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..4490fce3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,189 @@ +emerald +emerald +projectideally +development +phase +fpga +architecture +would +makeuse +reliable +mapping +tools +produce +accurate +performanceevaluations +proposed +designs +unfortunately +given +quickproduction +time +frames +faced +developers +tool +construction +isoften +postponed +many +architectural +features +beenfrozen +satisfy +need +fast +tool +prototyping +havedesigned +emerald +powerful +architecture +driven +system +quickdevelopment +fpga +tools +heart +emerald +provides +basicfeatures +needed +fpga +systems +logic +block +analysis +synthesisand +technology +mapping +global +placement +partitioning +anddetailed +placement +routing +environment +provides +anefficient +thoroughly +specify +fpga +routing +logic +blockarchitectures +well +architecture +specific +metrics +tailorplacement +routing +moreover +emerald +parameterized +schematicspecifications +allow +architectural +variations +quickly +capturedand +evaluated +emerald +publications +documents +contained +page +included +bythe +contributing +authors +means +ensure +timely +dissemination +ofscholarly +technical +work +commercial +basis +copyright +andall +rights +therein +maintained +authors +copyrightholders +notwithstanding +offered +works +hereelectronically +understood +persons +copying +thisinformation +adhere +terms +constraints +invoked +eachauthor +copyright +works +reposted +without +theexplicit +permission +copyright +holder +definitive +emerald +paper +darren +cronquist +larry +mcmurchie +emerald +architecture +driven +tool +compiler +fpgas +appear +proceedings +sigda +fourth +international +symposium +field +programmable +gate +arrays +february +router +used +emeraldlarry +mcmurchie +carl +ebeling +pathfinder +negotiation +basedperformance +driven +router +fpgas +proceedings +third +international +symposium +field +programmable +gate +arraysaided +design +february +researchers +darren +cronquist +carl +ebeling +larry +mcmurchie diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..db839e6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,220 @@ +northwest +laboratory +integrated +systems +northwest +laboratory +integrated +systems +department +computer +science +engineering +university +washington +seattle +usathe +department +computer +science +engineering +university +washington +engaged +large +scale +integration +vlsi +computer +aided +design +research +development +education +since +late +today +northwest +laboratory +integrated +systems +focus +wide +variety +vlsi +architectures +embedded +sytems +research +current +research +projects +asynchronous +circuits +verificationtime +separation +events +specification +synthesis +verification +timed +asynchronous +circuits +asynchronous +circuits +survey +current +asynchronous +design +methodologies +well +first +fpga +asynchronous +circuits +fpgas +rapid +prototypingtriptych +montage +fpga +architectures +development +triptych +montage +fpga +architectures +architectures +improved +densities +current +commercial +fpgas +multi +fpga +systems +rapid +prototyping +development +springbok +rapid +prototyping +system +board +level +designs +well +partitioning +assignment +routing +topology +work +general +multi +fpga +systems +emerald +architecture +adaptive +toolset +fpgas +complete +mapping +placement +routing +toolscan +generated +automatically +description +fpgaarchitecture +architecture +specific +metrics +incorporatedinto +various +tools +improve +results +embedded +systemsthe +chinook +project +hardware +software +design +synthesis +simulation +system +embedded +applications +performance +optimization +synchronous +circuitsretiming +level +clocked +circuits +efficient +algorithms +retiming +circuits +uselevel +sensitive +latches +improve +performance +reduce +cost +andincrease +tolerance +clock +skew +architectural +retiming +methods +improving +performance +synchronouscircuits +latency +feedback +contraints +network +routerthe +chaoticrouting +project +self +tuned +systemsself +tuned +systems +directed +kehlprevious +research +projects +gemini +validating +layout +comparing +specification +circuit +implemented +circuit +mactester +cost +digital +functional +tester +chips +circuits +cmos +voltage +levels +arpa +reportsarpa +bluebook +paragraphs +overview +accomplishments +embedded +systems +high +performance diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..2134b495 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,94 @@ +software +safety +university +washington +software +playing +increasingly +important +role +systems +nuclear +reactors +aircraft +defence +space +systems +chemical +plants +medical +equipment +consequences +malfunction +safety +critical +systems +must +pass +rigorous +tests +reviews +used +although +system +safety +engineering +techniques +existed +decades +apply +systems +contain +digital +computers +software +goal +university +washington +safety +project +develop +theoretical +foundation +safety +methodology +building +safety +critical +systems +built +upon +foundation +safeware +system +safety +computers +nancy +leveson +summarizes +issues +involved +lays +foundation +methodology +working +safety +analysis +techniques +support +methodology +prototype +tools +used +validate +specification +analysis +techniques +university +washington +computer +science +software +engineering +safety +links diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..41ecf860 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,239 @@ +internet +softbotinternet +softbotthe +softbot +finalists +discoverawards +technological +innovation +computer +software +building +autonomous +agents +interact +real +world +softwareenvironments +operating +systems +databases +pragmaticallyconvenient +intellectually +challenging +substrate +research +support +claim +utilizing +planning +machine +learningtechniques +develop +internet +softbot +software +robot +acustomizable +moderately +intelligent +assistant +internetaccess +softbot +accepts +goals +high +level +language +generatesand +executes +plans +achieve +goals +learns +itsexperience +softbot +enables +human +user +state +wants +accomplished +softbot +disambiguates +requestand +dynamically +determines +satisfyit +softbot +uses +unix +shell +world +wide +interactwith +wide +range +internet +resources +take +tour +softbot +sgraphical +user +interface +principal +investigators +oren +etzioni +daniel +weld +also +check +metacrawler +softbot +fielded +service +enables +tosearch +multiple +indices +parallel +provides +sophisticatedpruning +options +information +contact +oren +etzioni +etzioni +washington +accessible +introduction +softbot +project +found +softbot +based +interface +internet +cacm +july +methodological +motivation +project +found +intelligence +without +robots +reply +brooks +magazine +december +technical +softbot +papers +found +cartoonrepresentation +internet +softbot +taken +blanchard +article +appeared +december +issue +ofcolumns +university +washington +alumni +magazine +softbots +research +group +currently +developing +graphical +user +interface +tothe +softbot +allow +user +easily +specify +high +level +goals +extending +maintaining +xiiplanner +keith +golden +working +graphical +specification +search +control +keith +golden +implementing +advanced +plan +space +browser +debug +planner +control +dave +christianson +comparing +rule +based +versus +procedural +search +control +sujay +parekh +ilalearns +information +resources +designing +protocol +multi +softbot +collaboration +negotiation +ying +experimenting +reactive +system +software +domain +kwok +goan +building +optimized +agent +ingram +information +gathering +reactive +system +internet +kwok +softbot +hackers +info +local +access +back +home +page +back +home +page +mike +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..5d4b6bbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,385 @@ +spin +spin +washington +spin +operating +systemspin +extensible +operating +system +kernel +thatsupports +dynamic +adaptation +system +interfaces +andimplementations +direct +application +control +stillmaintaining +system +integrity +inter +application +isolation +spin +allows +applications +load +code +kernel +atruntime +runs +kernel +extension +code +accesshardware +operating +systems +services +almost +nooverhead +basically +procedure +call +data +passed +byreference +rather +copied +spin +kernel +providesa +core +interfaces +capabilities +basic +resources +theseare +used +collections +extensions +implement +general +systemservices +address +spaces +threads +networking +allextension +code +kernel +written +typesafe +language +modula +property +oftypesafety +prevents +extensions +crashing +system +attemptingto +manipulate +arbitrary +pieces +code +data +spin +runs +alpha +workstations +using +writeboth +specialized +applications +network +video +systems +wellas +supporting +general +purpose +unix +programs +connect +machinerunning +spin +kernel +spin +server +quick +results +structure +allows +programs +extend +operating +system +services +withlow +overhead +example +spin +running +alpha +application +handle +recover +page +fault +microseconds +takes +microseconds +create +thread +control +executeit +terminate +synchronize +termination +fork +join +protectedprocedure +call +another +extension +kernel +function +takes +microseconds +cross +machine +application +application +overethernet +takes +microseconds +really +oldadapters +takes +less +microseconds +operationsunder +mach +unix +take +times +longer +samehardware +time +benchmarks +page +saveyourself +effort +recent +reports +papers +dynamic +binding +extensible +operating +systems +invocation +mechanism +provides +flexible +efficient +andsimple +integrating +extensions +executing +system +appeared +osdi +extensibility +safety +performance +spin +operating +system +design +implementation +performance +paper +appeared +sosp +extensible +protocol +architecture +forapplication +specific +networking +design +implementation +performance +paper +appeared +usenix +winter +conference +writing +operating +system +using +modula +describes +experiences +using +modula +build +high +performance +extensible +system +makes +clear +distinction +language +implementation +language +support +extensible +operating +systems +pretty +happy +deal +shortcomings +order +language +safe +extensible +operating +system +paper +describes +addressed +shortcomings +safe +dynamic +linking +extensible +operating +system +describes +dynamic +linker +load +code +kernel +point +ability +create +manage +linkable +namespaces +describe +interfaces +collections +interfaces +language +runtime +support +dynamic +interposition +system +code +describes +kernel +internal +communication +extension +facilities +shows +dynamic +code +generation +improve +performance +critical +kernel +service +information +dynamic +compilation +waiting +time +compile +code +paper +trail +project +reports +talks +papers +project +members +benchmarks +interested +bottom +line +modula +information +modula +arpa +project +overview +executive +summary +regular +reports +friends +gotten +assistance +academia +industry +project +page +says +involved +related +projects +pointers +extensible +systems +projects +people +saying +barbs +arrows +internal +documentation +latest +status +project +members +available +projects +projects +could +result +qualifications +credit +masters +degree +funded +raship +position +undergraduate +project +credit +mascot +encouraged +many +people +decided +adopt +ourmascot +page +maintained +brian +bershad +bershad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..c3acfdfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,167 @@ +dynamic +compilation +projectthe +dynamic +compilation +projectmore +information +dynamic +compiler +project +members +papers +related +projectsuw +sections +student +projects +group +webdynamic +compilation +enables +optimizations +based +values +ofinvariant +data +computed +time +using +values +theserun +time +constants +dynamic +compiler +eliminate +memoryloads +perform +constant +propagation +folding +remove +branches +theydetermine +fully +unroll +loops +bound +however +performancebenefits +efficient +dynamically +compiled +code +offsetby +time +cost +dynamic +compile +approach +dynamiccompilation +strives +fast +dynamic +compilation +high +qualitydynamically +compiled +code +programmer +annotates +regions +theprograms +compiled +dynamically +static +optimizingcompiler +automatically +produces +optimized +machine +code +templates +using +pair +dataflow +analyses +identify +variables +willbe +constant +time +simple +dynamic +compiler +copies +thetemplates +patching +computed +values +time +constants +produce +optimized +executable +code +work +targets +general +purpose +imperative +programming +languages +initially +initialexperiments +applying +dynamic +compilation +programs +producedspeedups +ranging +part +spinproject +eventually +system +used +dynamicallycompile +code +spin +kernel +example +spinevent +dispatcher +however +also +actively +exploring +otherpossible +applications +dynamic +compilation +invirtual +machine +interpreters +prototype +dynamic +compilation +systemis +described +pldi +paper +arenow +starting +design +build +second +generation +system +wewill +release +details +soon +last +updated +august +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..1386bd49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +moved +permanentlymoved +permanentlythe +document +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..ab2a640d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,133 @@ +project +project +department +computer +science +engineering +university +washington +seattle +info +washington +eduzpl +array +based +programming +language +suitable +computations +would +previously +written +fortran +programs +fast +sequential +parallel +computers +without +modifications +without +special +directives +machine +independent +recompilation +necessary +program +machine +higher +level +concepts +like +arrays +borders +eliminate +error +prone +indexing +tedious +looping +typical +programs +shorter +easily +written +easily +understood +modified +computer +scientists +find +concepts +regions +directions +borders +shattered +control +flow +conclusion +ideal +engineering +scientific +programs +walk +small +program +write +compile +program +yourmachine +scientific +programmers +area +shouldconsider +enrolling +zpthis +autumn +programmed +check +recent +changes +language +project +overview +high +level +overview +program +walk +minute +introduction +language +based +compiler +compile +programs +browser +right +papers +papers +manuals +related +details +line +information +sample +programs +people +project +members +horizon +description +group +direction +future +projects +acknowledgments +list +helped +support +work +info +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..beb3363b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,255 @@ +simultaneous +multithreading +home +pagesimultaneous +multithreading +projectoverviewpeoplepublications +overviewthe +crucial +problem +facing +today +high +speed +microprocessors +maintaining +high +processor +utilization +face +long +instruction +memory +latencies +alleviate +problem +modern +processors +issue +multiple +instructions +cycle +superscalars +interleave +execution +differentthreads +different +cycles +multithreaded +processors +ultimately +though +techniques +limited +amount +parallelism +available +within +single +thread +single +cycle +simultaneous +multithreading +technique +permits +multiple +independent +threads +issue +instructions +superscalar +functional +units +single +cycle +combines +multiple +instruction +issuefeatures +wide +superscalar +processors +latency +hiding +abilityof +multithreaded +architectures +processor +hardware +contextsare +active +simultaneously +competing +cycle +available +resources +dynamic +sharing +processor +resources +enables +exploitthread +level +instruction +level +parallelism +interchangeably +formsof +parallelism +effectively +used +increase +processor +utilization +studies +havedemonstrated +simultaneous +multithreading +significantly +improvesprocessor +throughput +performance +multiprogrammed +parallelworkloads +shown +performance +gains +achievedin +architecture +minimal +extensions +modern +ordersuperscalar +processors +current +future +work +includes +investigations +fast +synchronizationtechniques +enabled +also +conducting +research +otherarchitectural +compiler +issues +simultaneous +multithreading +peoplefaculty +susan +eggers +hank +levygraduate +students +jack +dean +tullsenindustrial +collaborators +digital +equipment +corporation +joel +emer +rebecca +stamm +publications +converting +thread +level +parallelism +instruction +level +parallelism +simultaneous +multithreading +abstract +postscript +eggers +emer +levy +stamm +andd +tullsen +submitted +publication +july +exploiting +choice +instruction +fetch +issue +implementable +simultaneous +multithreading +processor +abstract +postscript +tullsen +eggers +emer +levy +stamm +proceedings +annual +international +symposium +computer +architecture +philadelphia +compilation +issues +simultaneous +multithreading +processor +postscript +eggers +levy +andd +tullsen +proceedings +first +suif +compiler +workshop +stanford +january +simultaneous +multithreading +maximizing +chip +parallelism +abstract +postscript +tullsen +eggers +andh +levy +proceedings +annual +international +symposium +computer +architecture +santa +margherita +ligure +italy +june +students +check +list +research +projects +still +doon +student +affairs +page +page +maintained +jack +lojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..e9cccb25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,102 @@ +home +pagecomputer +sciences +departmentabout +computer +sciences +departmentour +department +formed +consistently +ranked +computer +science +departments +country +faculty +members +received +fourteen +presidential +young +investigator +awards +packard +fellowships +faculty +award +women +scientists +engineers +incentives +excellence +award +three +doctoral +dissertation +awards +three +faculty +development +awards +computer +sciences +department +area +research +projects +information +people +computer +sciences +department +courses +offered +fall +classes +future +timetables +technical +reports +computer +systems +answers +frequently +asked +questions +computer +sciences +alumni +information +graduate +guidebook +undergraduate +guidebook +department +annual +report +online +utilities +madison +local +services +related +organizations +colophon +statistics +server +useful +infocomputer +sciences +departmentuniversity +wisconsin +madisona +computer +sciences +statistics +west +dayton +streetmadison +wisc +voice +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..9afc7826 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,171 @@ +object +exploration +purposive +viewpoint +control +object +exploration +purposive +viewpoint +control +kyros +kutulakos +chuck +dyer +considerable +interest +recently +employing +simpleobserver +behaviors +either +make +recovery +scene +propertieseasier +fixation +combine +simple +behaviors +order +toperform +complex +tasks +navigation +obstacle +avoidance +ourwork +focuses +ability +active +observer +control +pointof +observation +perform +tasks +involving +exploration +object +developed +behaviors +provably +correct +makesimple +motion +decisions +based +observed +local +geometryof +scene +require +minimal +processing +image +first +consider +task +recovering +local +shape +thesurface +selected +point +approach +based +generalobservation +positions +provide +information +objectthan +others +existence +special +viewpoints +beexploited +observer +mobile +efficient +anddeterministic +strategy +reaching +show +localshape +recovery +task +achieved +using +simple +qualitativestrategy +smoothly +controlling +point +observation +viewingdirection +aligned +principal +direction +selectedpoint +second +consider +task +deriving +global +descriptionof +object +formulate +global +surface +reconstruction +thequalitative +task +smoothly +controlling +point +observationso +visible +slides +maximal +connected +reconstructible +region +show +task +provably +achieved +arbitrary +smooth +surfaces +attempting +maintain +well +defined +geometric +relationship +point +observationand +viewed +surface +approach +suggests +ability +smoothly +control +point +observation +lead +provably +correct +behaviors +achieving +local +global +tasks +scene +exploration +navigation +also +simplifying +frame +computations diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..fc23e946 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,142 @@ +deformable +contours +modeling +extraction +detection +classification +deformable +contours +modeling +extraction +detection +classification +fung +roland +chin +developed +integrated +approach +modeling +extracting +detecting +classifying +deformable +contours +directly +noisy +images +conducted +case +study +regularization +formulation +initialization +ofactive +contour +models +snakes +using +minimax +principle +derived +regularization +criterion +whereby +values +automatically +implicitly +determined +along +contour +furthermore +formulated +energy +functionals +yield +snakes +contain +hough +transform +special +case +subsequently +considered +problem +modeling +extracting +arbitrary +deformable +contours +noisy +images +combined +stable +invariant +andunique +contour +model +markov +random +fields +yield +priordistribution +exerts +influence +arbitrary +global +model +allowing +deformation +bayesian +framework +contour +extraction +turns +posterior +estimation +turn +equivalent +energy +minimization +generalized +active +contour +model +finally +integrated +lower +level +visual +tasks +withpattern +recognition +processes +detection +classification +based +nearman +pearson +lemma +derived +optimal +detection +classificationtests +summation +peaked +practical +applications +small +regions +need +considered +marginalizing +distribution +validity +formulation +confirmed +extensive +rigorous +experimentation +gsnake +software +available diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..ea35cd39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,423 @@ +data +visualization +based +lattices +data +visualization +based +lattices +bill +hibbard +brian +paul +chuck +dyer +defining +foundation +data +visualization +based +theidea +visualization +process +function +ofdata +objects +called +data +model +displays +calleda +display +model +prototype +system +called +adhas +implemented +based +upon +ideas +guided +following +principles +scientists +develop +mathematical +models +nature +data +objectsrepresent +objects +mathematical +models +mathematical +objectsfrequently +contain +infinite +precision +real +numbers +functionswith +infinite +domains +whereas +computer +data +objects +containfinite +amounts +information +thus +approximations +themathematical +objects +represent +computer +displays +contain +finite +amounts +information +contain +finite +numbers +pixels +colors +chosenfrom +finite +palettes +animation +sequences +contain +finite +numbersof +frames +thus +approximations +ideal +displays +close +relationship +data +computation +data +model +appropriate +data +objects +computationalmodel +programming +language +purpose +data +visualization +communicate +informationcontent +data +objects +particular +define +expressivenessconditions +mapping +data +objects +displays +thatdisplays +encode +facts +data +objects +onlythose +facts +visualization +system +complete +sense +itimplements +function +data +objects +displays +satisfyingthe +expressivenss +conditions +define +order +relation +data +objects +based +onhow +precisely +approximate +mathematical +objects +wecan +define +order +relation +displays +based +howprecisely +approximate +ideal +displays +based +voxelresolution +order +relations +define +lattices +ofdata +objects +displays +therefore +model +visualizationprocess +function +lattice +data +objectsto +lattice +displays +interpret +expressivenessconditions +context +show +satisfies +theexpressiveness +conditions +lattice +isomorphism +define +particular +lattice +appropriate +scientificdata +objects +data +objects +built +primitivevariables +call +scalars +time +latitude +radiance +temperature +tuple +array +data +type +constructors +array +time +temperature +data +type +appropriatefor +time +series +temperatures +thus +lattice +containsthe +data +objects +scientific +programming +language +canalso +define +particular +lattice +displays +displayis +voxels +voxel +specified +graphicsprimitives +call +display +scalars +pixel +locationand +size +volume +color +place +animationsequence +show +functions +satisfying +expressivenessconditions +class +functions +defined +mappings +fromthe +scalars +used +primitive +variables +data +objects +thedisplay +scalars +specify +graphics +primitives +voxels +ofcourse +designers +scientific +displays +already +assume +primitive +variables +mapped +graphics +primitives +example +given +data +object +type +array +time +temperature +isnatural +display +graph +time +along +axis +andtemperature +along +another +remarkable +thing +wedo +take +designing +displays +assumption +consequence +fundamental +expressivenessconditions +developing +implementing +system +calledvis +adthat +allows +scientists +experiment +algorithms +steer +theircomputations +visualize +data +objects +create +theirprograms +lattice +defines +data +objects +thevis +programming +language +system +implements +lattice +vvof +displays +data +displayed +using +function +thatsatisfies +expressiveness +conditions +however +implementationis +quite +precise +data +flow +systems +define +auser +interface +controlling +data +display +based +abstractionof +rendering +pipeline +system +defines +user +interfacefor +controlling +data +display +based +abstraction +ofmappings +scalars +display +scalars +possible +define +data +lattices +recursively +defineddata +types +complex +linked +types +trees +used +ingeneral +purpose +programming +languages +abstract +datatypes +object +classes +object +oriented +programming +languages +lattices +provide +rigorous +foundation +visualization +particular +help +develop +analytical +alternative +usualapproach +defining +visualization +process +constructively +bywriting +special +purpose +program +computing +display +fora +specific +data +object diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..9668d80e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,68 @@ +condor +project +homepage +objective +goal +condor +project +develop +implement +deploy +evaluatemechanisms +policies +support +high +throughput +computing +large +collections +distributively +owned +computing +resources +guided +technologicaland +sociological +challenges +computing +environment +condor +team +building +software +tools +enable +scientists +engineers +increase +computing +throughput +introduction +start +high +throughput +computing +research +condor +system +condor +pools +university +wisconsin +madison +condor +help +page +project +home +page +condor +world +mailing +list +comments +suggestionscondor +admin +wisc +edulast +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..75eb555a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,11 @@ +next +homepage +next +people +next +project +last +modified +september +miron +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..c12ec8cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,262 @@ +coral +database +projectcoral +database +projectdocument +contents +objectiveoverviewrelease +informationsee +also +publications +related +coralpeople +working +coralother +research +madisonobjective +objective +coral +project +develop +robust +efficientdeductive +database +system +investigate +various +application +domains +several +algorithms +underlying +coralsystem +developed +members +group +durationof +project +since +overview +coral +deductive +system +supports +rich +declarative +language +interface +allows +combination +declaritiveand +imperative +programming +declarative +query +language +supportsgeneral +horn +clauses +augmented +complex +terms +grouping +aggregation +negation +relations +tuples +contain +universally +quantified +variables +coral +declarative +program +canbe +organized +collection +interacting +modules +coralimplementation +supports +wide +range +evaluation +strategies +andautomatically +chooses +efficient +evaluation +strategy +modulein +program +addition +users +permitted +guide +query +optimization +desired +selecting +among +wide +range +control +choices +atthe +level +module +coral +system +provides +imperative +constructs +update +insertand +delete +rules +coral +also +interface +users +canprogram +combination +declarative +coral +extended +withcoral +primitives +high +degree +extensibility +provided +allowingc +programmers +class +structure +enhance +coralimplemenation +coral +provides +support +main +memory +disk +resident +data +disk +resident +data +supported +using +theexodusstorage +mananger +also +provides +transaction +management +aclient +server +environment +release +information +current +release +coral +version +released +october +install +coral +system +grab +file +want +nobin +version +contains +source +code +requiringyou +compile +coral +versions +include +made +binaries +forthe +indicated +machine +type +click +file +grab +readme +general +information +installation +manual +coral +instructions +installation +coral +nobin +binaries +included +coral +hpux +series +binaries +included +coral +sunos +binaries +included +coral +solaris +binaries +included +coral +solaris +binaries +included +coral +linux +linux +binaries +included +stay +informed +releases +coral +announcemnt +mailing +listwhich +recieves +announcements +releases +relevant +information +releases +also +announced +newsgroup +comp +lang +misc +also +submit +questions +comments +reports +coral +send +mail +coral +wisc +edulast +modified +october +shawn +flisakowski +flisakow +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..5fb2efa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,410 @@ +complementarity +problem +algorithms +applications +complementarity +problem +netas +result +three +decades +research +subject +complementarity +problems +diverse +applications +engineering +economics +sciences +become +well +established +fruitfuldiscipline +within +mathematical +programming +several +monographs +surveys +documented +basic +theory +algorithms +applications +complementarity +problems +role +optimization +theory +page +serves +center +information +regarding +research +incomplementarity +problems +listing +meetingsof +interest +community +pointers +software +forcomplementarity +problems +well +links +pages +interest +listing +researchers +application +areas +also +given +table +contents +directory +researchers +complementarity +software +complementarity +problems +applications +complementarity +relevant +links +directory +complementarity +researcherssoftware +complementarity +problems +mcplib +collection +nonlinear +mixed +complementarity +problems +problemdescription +access +gams +source +files +forthese +problems +complementarity +toolbox +matlab +evolving +freely +available +toolbox +consists +several +andm +files +allow +mcplib +problems +accessed +frommatlab +without +access +gams +files +give +functionevaluations +sparse +jacobian +evaluations +machine +specificversions +downloaded +path +solver +also +available +toolbox +details +solver +interface +gams +describes +library +routines +areavailable +help +hook +solver +gams +modeling +language +contact +steve +gams +rutherford +colorado +ferris +wisc +edufor +details +miles +miles +extension +classicaljosephy +newton +method +solution +linearizedsubproblem +computed +lemke +almost +complementary +pivot +algorithm +newton +point +used +definethe +newton +direction +used +dampedlinesearch +merit +function +used +measures +violation +infeasibility +complementarity +miles +also +employs +restartprocedure +cases +newton +point +cannot +computed +totermination +secondary +every +linearized +subproblem +rescaled +equilibrate +elementsappearing +data +subproblem +path +runs +gams +mcpor +directly +matlab +path +solver +applies +techniques +similarto +used +newton +methods +smooth +systems +anonsmooth +reformulation +algorithmconsists +sequence +major +iterations +consisting +anapproximation +linearization +step +similar +miles +construction +pathto +newton +point +solution +approximation +apossible +search +path +newton +point +exists +thepath +cannot +entirely +constructed +step +along +partiallycomputed +path +taken +problem +relinearized +anonmonotone +watchdog +strategy +employed +applying +path +search +helps +avoid +convergence +local +minima +norm +function +forthe +underlying +nonsmooth +equation +keeps +number +functionevaluations +required +small +possible +list +solver +options +given +document +algorithm +based +uponreformulating +system +nonsmooth +equations +algorithm +implemented +gams +solver +robustnessimproved +using +proximal +perturbation +strategy +giving +qpcomp +algorithm +nonsmoothness +equations +ishandled +using +directional +derivatives +smooth +smooth +algorithm +based +uponreformulating +system +nonsmooth +equations +thenapproximately +solving +sequence +smooth +approximations +leadto +zero +nonsmooth +system +iteration +smooth +approximation +original +system +formed +theaccuracy +approximation +determined +residual +thecurrent +point +implemented +gams +system +solvers +implemented +subsystems +gams +compared +paper +applications +complementarity +engineeringand +economic +applications +complementarity +problems +paper +lists +many +known +applications +complementarity +problems +mpsge +preprocessor +gams +modeling +language +thatallows +economic +equilibrium +problems +formulated +easily +thegams +home +page +information +nemsthe +national +energy +modeling +system +several +papers +related +algorithmsand +paper +giving +overview +project +relevant +links +look +michael +trick +operations +research +page +interesting +links +look +interior +point +information +interior +pointmethods +argonne +national +laboratories +archive +last +modified +october +michael +ferris +ferris +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..44083206 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,71 @@ +exodus +project +home +pageexodus +extensible +object +oriented +database +system +toolkitnote +document +construction +exodus +project +succeded +theshore +project +still +provide +minimal +support +users +theexodus +storage +manager +compiler +persistentprogramming +language +available +wisc +licenses +required +information +needed +contact +exodus +wisc +eduprincipal +investigators +mike +carey +david +dewittsee +also +publications +related +exodusshore +successor +exoduslatest +exodus +storage +manager +compilercontributed +software +storage +managera +mailing +list +exodus +users +exodus_all +wisc +benchmark +benchmark +oodbsdate +prepared +april +michael +zwilling +zwilling +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..06406a2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,326 @@ +paradise +projectparadise +parallel +database +system +applications +document +contents +objective +client +server +paradise +samples +paradise +frontend +contact +informationsee +also +publications +related +paradise +people +working +paradise +shore +project +object +manager +used +paradise +extensible +optimizer +paradise +reporting +examine +sequoia +benchmark +scripts +used +vldb +paper +information +madison +database +research +group +madison +department +serverobjective +objective +paradise +project +design +implement +andevaluate +scalable +parallel +geographic +information +system +iscapable +storing +manipulating +massive +data +sets +applyingobject +oriented +parallel +database +technologies +problem +ofstoring +manipulating +geographic +information +hope +tosignificantly +advance +size +complexity +data +sets +thatcan +successfully +stored +browsed +queried +client +server +paradise +paradise +databasesystem +aimed +handling +types +applications +paradise +providesa +graphical +user +interface +querying +browsing +database +andsupports +subset +issuing +queries +paradiseprovides +extended +relational +data +model +modeling +gisapplications +addition +support +base +data +types +asinteger +real +string +paradise +built +support +raster +polygon +polyline +point +circle +video +mpeg +images +data +paradise +uses +shore +underlying +persistent +object +manager +paradise +front +allows +display +objects +spatialattributes +provides +layered +display +foroverlapping +spatial +attributes +layer +corresponds +query +customize +ordering +display +layers +selectingcolor +labelling +display +attributes +query +database +queried +either +graphical +interface +withad +queries +graphical +interface +used +issueimplicit +spatial +queries +zooming +clicking +sketching +arubber +banded +queries +paradise +provides +querycomposer +menus +query +composer +provide +access +databaseschema +assist +query +composition +query +results +either +beviewed +spatial +attributes +bedisplayed +table +browse +tuples +result +front +also +provides +context +sensitive +help +paradise +supports +subset +issuing +queries +sqlwe +added +ability +invoke +methods +defined +extendedset +types +example +calculate +area +polygon +byusing +method +polygon +area +paradise +also +supports +standarddatabase +operations +including +create +drop +database +create +anddrop +extent +create +drop +indices +insert +update +current +version +paradise +employs +client +server +architecture +front +ships +queries +syntax +paradiseserver +execution +executing +query +server +ships +theresult +objects +back +client +paradise +server +ismulti +threaded +multiple +clients +connect +sameserver +designing +implementing +paradise +sever +carefulattention +paid +insure +system +could +efficiently +processqueries +especially +involving +spatial +attributes +largevolumes +data +samples +paradise +frontendeurope +data +samples +presshere +contact +information +paradise +projectattn +prof +david +dewittuniversity +wisconsin +madisoncomputer +sciences +department +west +dayton +streetmadison +email +paradise +wisc +edumore +come +biswadeep +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..40e8c782 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,114 @@ +scout +services +homepagego +text +versionnewsletter +newand +newly +discovered +internet +resources +network +toolsinternet +announcements +updated +daily +effectiveinternet +tools +availablea +report +students +studentssurf +smarter +longer +internic +scout +project +universityof +wisconsin +madison +show +best +canchoose +best +filter +hundreds +internet +annoucementseach +week +looking +valuable +online +resources +networktools +organize +summarize +annotate +best +vefound +offer +internet +community +several +useful +formats +goal +scout +support +effective +internet +byeducators +researchers +however +everyone +welcome +usethe +publications +sites +provided +scout +encouragefeedback +suggestions +entire +internet +community +three +primary +services +provided +include +scout +report +happenings +thescout +toolkit +ournewest +project +know +report +students +students +scout +services +located +department +computer +sciences +theuniversity +wisconsin +madison +project +internic +comments +suggestions +feedbackscout +internic +scout +servicesfor +information +using +internet +internic +information +education +services diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..25b0f235 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,961 @@ +shore +project +home +pageshore +high +performance +scalable +persistent +object +repositorydocument +contents +objectiveoverviewrelease +informationmailing +listssee +also +shore +version +line +information +source +binaries +documentation +beta +release +shore +version +publications +related +shorepeople +working +shorelatest +research +summary +arpaparadise +project +built +shoreexodus +project +predecessor +shoreoo +benchmark +benchmark +oodbsshore +photo +albumuw +madison +database +research +group +madison +department +serverobjective +objective +shore +project +design +implement +andevaluate +persistent +object +system +serve +needs +widevariety +target +applications +including +hardware +software +cadsystems +persistent +programming +languages +geographic +informationsystems +satellite +data +repositories +multi +media +applications +shore +expands +basic +capabilities +widely +usedexodusstorage +manager +developed +wisconsin +funded +arpa +number +ofways +including +support +typed +objects +multiple +programminglanguages +unix +like +hierarchical +name +space +named +objects +anda +unix +compatible +interface +objects +text +field +thisinterface +intended +ease +transition +applications +theunix +file +system +environment +shore +existing +unix +tools +viand +able +store +data +shore +objects +withoutmodification +basically +unix +file +becomes +either +single +shoreobject +text +field +complex +object +overview +shore +something +hybrid +system +nature +inheritingcharacteristics +object +oriented +database +systems +fromfile +systems +section +briefly +describe +basic +features +ofshore +paper +shoring +persistent +applications +describes +shore +much +greater +detail +shore +three +major +goals +scalabilitysupport +hardware +language +heterogeneitysupport +existing +file +based +applicationswhen +shore +project +began +years +goals +uniqueamong +research +commercial +oodbms +community +odmg +effort +also +concentrated +providing +degree +support +languageheterogeneity +turn +facilitates +hardware +heterogeneity +shore +remains +distinguished +focus +scalability +supportfor +applications +depend +unix +file +system +persistentstorage +furthermore +since +shore +data +model +basicallycompatible +odmg +data +model +expect +much +technology +develop +eventually +betransferred +commercial +sector +scalable +architectureshore +software +architecture +unique +several +ways +first +shore +uses +symmetric +peer +peer +distributedarchitecture +shore +every +participating +processor +runs +ashore +server +process +whether +processor +shore +data +disksattached +software +designed +scalable +single +processor +network +workstations +large +parallel +processor +intel +paragon +design +contrast +client +server +architectureused +exodus +oodbms +vendors +client +serverarchitecture +fine +design +environment +typicallyused +software +hardware +efforts +scalable +second +unique +feature +shore +architecture +notionof +value +added +server +structuring +software +runsin +server +extensibility +mind +relatively +simple +forusers +build +application +specific +servers +example +theparadise +project +already +using +shore +server +build +geographic +information +system +nasa +seosdis +project +feel +unique +pieces +technology +play +aimportant +role +variety +future +research +commercial +endeavors +example +digital +libraries +future +almost +certainlydepend +availability +scalable +persistent +object +technology +systems +going +store +retrieve +manipulate +transmitobjects +containing +video +pictures +well +text +whilecurrent +oodbms +products +could +used +systems +orientedtoward +dealing +gigabytes +terabytes +data +customizability +equally +important +indexing +retrieval +query +processing +mechanisms +needed +digital +libraryare +different +required +geographic +informationsystem +language +hardware +heterogeneityobjects +shore +typed +shore +provides +single +language +neutraltype +system +used +define +types +shore +objects +type +system +embodied +shore +data +language +language +shore +object +types +defined +enhances +data +model +support +databasefeatures +bulk +types +sets +lists +persistence +provision +typed +persistent +objects +simplifies +task +ofsupporting +heterogeneous +hardware +environments +makes +feasibleto +support +access +persistent +objects +multiple +programminglanguages +objective +shore +project +mentioned +earlier +quite +closely +related +language +neutral +object +type +definition +language +wasrecently +proposed +standard +oodb +vendor +consortium +odmg +terms +emphasis +however +odmg +largely +concentrated +onproviding +standardized +interface +existing +oriented +oodbs +focus +support +inter +language +object +sharing +withina +large +name +space +objects +support +existing +file +based +applicationsa +major +goal +shore +enable +applications +currentlyuse +untyped +byte +oriented +files +persistent +data +flattening +flattening +data +time +accessed +stop +applications +able +store +data +typed +structuredobjects +convenient +type +safe +intra +inter +program +data +sharing +ultimate +hope +shore +displace +byte +orientedfile +systems +unix +file +system +shore +provides +major +services +file +system +standpoint +first +support +object +naming +space +management +world +manypersistent +objects +shore +provides +flexible +tree +structured +unix +like +name +space +persistent +objects +reachable +either +directly +indirectly +gives +shore +usersa +familiar +framework +register +individualpersistent +objects +termed +registered +objects +roots +oflarge +persistent +data +structures +bulk +sets +unnamed +objects +termed +anonymous +objects +realization +framework +involvesseveral +different +kinds +shore +file +system +objects +includingdirectories +pools +files +containing +anonymous +objects +symbolic +links +cross +references +shore +provides +mechanisms +ease +transition +legacy +unixapplications +compilers +editors +systems +fromtraditional +byte +stream +files +shore +first +applications +linked +shore +provides +standardunix +compatible +file +system +interface +open +close +read +write +mkdir +chdir +order +make +access +shore +objects +unix +file +system +callspossible +definer +shore +object +type +optionally +designate +onevariable +length +byte +string +character +string +attribute +object +asbeing +object +unix +data +programs +attempt +read +objectthrough +shore +counterparts +unix +file +system +callswill +portion +object +legacy +programs +thatwish +without +linked +possible +mount +ashore +file +system +access +unix +datacontained +objects +directly +makes +feasible +bothnew +applications +access +objects +applications +access +unix +data +componentof +object +applications +define +access +morestructured +attributes +object +release +information +latest +time +table +release +shore +dates +approximate +subject +change +questions +contact +shore_support +wisc +beta +release +first +beta +release +beta +release +second +beta +rlease +shore +version +available +sept +includes +improved +documentation +completeimplementations +many +features +many +fixes +ports +tosolaris +linux +version +august +released +shore +version +gzip +files +source +documentation +binary +release +sparc +andpentium +solaris +found +atftp +wisc +shore +mailing +liststhere +shore +related +mailing +lists +shore_support +wisc +eduand +shore_all +wisc +shore_support +wisc +eduthis +mailing +list +reaches +shore +development +team +useby +shore +users +submit +questions +comments +reports +cannot +subscribe +mailing +list +shore_all +wisc +mailing +list +users +interested +shore +list +managed +listproc +software +madisoncs +department +currently +unmoderated +unlikelyevent +gets +cluttered +junk +mail +moderate +mail +messages +interested +list +mailbox +isalready +cluttered +sign +weekly +digests +belowfor +information +information +list +sentwhen +subscribe +purpose +shore_all +notifying +interested +parties +releases +changes +shore +archive +requests +help +users +default +replies +sent +sender +rather +beingposted +entire +list +want +entire +list +yourreply +copy +reply +shore_all +list +public +mailing +list +thus +anyone +maysubscribe +subscribers +post +list +existenceof +list +shown +listing +returned +listproc +whenit +processes +lists +request +subscribe +yoursubscription +concealed +default +subscriberscannot +obtain +membership +list +listproc +system +subscribing +shore_all +subscribe +change +subscription +must +mail +specialmessage +listproc +wisc +subscribe +content +message +look +like +subscribe +shore_all +receive +weekly +digests +rather +individual +messages +sendthis +along +subscription +send +separate +message +shore_all +mail +digest +subscribe +content +message +unsubscribe +shore_all +help +list +processor +content +messageshould +helplast +modified +nancy +hall +nhall +wisc +footnotes +compatibility +odlshore +odmg +concurrently +decided +data +modelidl +starting +point +data +models +hence +odlare +similar +another +stabilizeswe +convert +compatible diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..2a3b0ca7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,40 @@ +abhinav +home +page +welcome +abhinav +gupta +agupta +wisc +gupta +wisc +page +construction +graduate +student +department +computer +science +university +wisconsin +madison +contact +residenceoffice +kendall +avenue +madison +department +computer +sciences +dayton +street +madison +interesting +links +indian +newspapers +stuff +sports +finger +find +whereabouts diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..1c1c8840 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,15 @@ +alain +home +pagealain +click +larger +picture +largest +carnivore +ever +lived +last +modified +alain +alain +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..d23b33e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,84 @@ +carolyn +allex +home +page +carolyn +allex +graduate +studentbiotechnology +training +program +traineecomputer +sciences +departmentuniversity +wisconsin +madison +west +dayton +streetmadison +wisconsin +mail +allex +wisc +edutelephone +advisor +professor +jude +shavlikinterests +computational +biology +sequencing +protein +folding +artificial +intelligence +machine +learning +neural +networkseducation +computer +sciences +university +wisconsin +madisonbs +computer +science +purdue +universitybs +education +mankato +state +university +related +links +university +wisconsin +department +university +wisconsin +group +university +wisconsin +machine +learning +group +university +wisconsin +computational +biology +research +intelligent +systems +molecular +biology +ismb +intelligent +systems +molecular +biology +ismb +intelligent +systems +molecular +biology +ismb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..a61d2dd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,225 @@ +amir +home +page +amir +roth +delphi +maven +show +erin +madison +amir +wisc +occasionaly +updated +copy +resume +cvte +truth +group +seminar +arch +group +arch +seminar +week +research +topics +implementing +compiler +using +preprocessor +delegating +work +project +partners +setting +airport +metal +detectors +outs +existencei +graduate +student +department +computer +science +university +wisconsin +madison +advisor +guri +sohi +looking +methods +alleviating +data +dependences +multiscalar +programs +using +distributed +register +file +multiscalar +programs +side +curly +fries +physics +yale +university +advanced +degree +nail +design +valley +beauty +school +physics +interesting +beauty +degree +much +practical +live +girlfriend +marci +cats +charlie +marci +also +went +yale +getting +masters +public +policy +lafollette +institute +going +president +meantime +likes +solve +linear +regression +problems +wacky +recipes +finds +magazines +watch +party +five +like +eggplant +people +think +weird +anyway +promised +outs +subba +officemates +daddy +november +titanium +screws +desi +relaford +terry +mulholland +oxygen +carbon +dioxide +areas +vague +interest +programming +languages +program +analysis +super +whack +compiler +optimizations +parallel +algorithms +theory +good +soul +analysis +evaluation +modeling +performance +enhancement +three +point +shot +things +scaryareas +rabid +interestthe +love +know +talking +better +leave +page +never +return +hmmm +interested +super +handyinformatik +index +computer +science +journals +authors +madcat +architecture +resources +minute +scores +sportsline +philly +everybody +favorite +engineer +super +ickythe +news +friend +barb +writes +articles +guys +going +read +want +kidding +barb +friend +drew +home +page +cornell +david +home +page +think +wierd +page +featuring +friend +friend +association +kemin +last +modified +amir +roth +amir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..7978843e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page +construction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..96943f19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,181 @@ +amos +home +page +words +search +engines +approximation +theory +splines +wavelets +boxsplines +radial +basis +functions +shift +invariant +spaces +approximation +toscattered +data +multiquadrics +thin +plate +splinesthis +page +netscape +enhanced +homepage +amos +associate +professordepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +wisconsin +usae +mail +amos +wisc +telephone +amos +home +page +presently +table +linksat +present +items +accessible +order +download +paperaffine +systems +analysis +analysis +operatorof +zuowei +shen +choose +following +versions +using +unix +clickhere +compressed +version +otherwise +download +uncompressed +version +fromhere +none +works +server +copy +directlyfrom +accounthere +handouts +email +clickhere +click +vita +want +vita +list +various +publications +including +abstracts +selected +articlesof +mine +information +anonymous +site +carl +boor +maintain +site +wisc +site +containspostscript +compressed +postscript +files +various +articles +theapproximation +theory +group +also +found +technical +filesconcerning +useful +general +public +recommend +click +read +file +provides +list +available +files +click +wish +view +line +view +line +download +wish +students +including +information +research +andpublications +main +areas +interest +together +short +summary +present +research +future +goals +activity +approximation +theory +group +univeristy +ofwisconsin +madison +activity +numerical +analysis +group +links +home +pages +people +approximation +theory +community +found +miscellaneous +topics +activities +finally +official +homepage +please +deposit +comments +mailbox diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..a2a593ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andy +therber +home +pageandy +therberoffice +sphone +email +andyt +wisc +eduzooresumebookmarksapplets diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..87d53106 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,22 @@ +ranga +arvind +ranganathan +erstwhile +workplace +present +workplace +indiaworld +fascinating +world +escher +collection +classic +papers +computer +science +finger +logged +arvind +ranganathan +arvind +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..4f8d62f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,37 @@ +ashish +home +page +ashish +thusoo +graduate +student +department +computer +science +university +wisconsin +madison +come +india +hadmy +undergraduate +education +indianinstitute +technology +delhi +department +iitd +fantastic +place +worth +visiting +like +contact +canfinger +find +whereabouts +alternatively +send +email +ashisht +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..dd30cbdb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,51 @@ +ashraf +aboulnaga +home +pageashraf +aboulnagacomputer +sciences +department +room +university +wisconsin +madison +west +dayton +madison +usaphone +mail +ashraf +wisc +edueducation +computer +science +alexandria +university +alexandria +egypt +july +computer +science +alexandria +university +alexandria +egypt +june +info +section +view +grades +section +view +grades +office +hours +desautels +home +page +last +modified +september +ashraf +aboulnaga +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..aedc8776 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,29 @@ +phil +atkinson +home +page +phil +atkinson +home +pageucla +bannon +winning +ncaa +basketball +championship +seattle +general +infooffice +phone +email +atkinson +wisc +educurrent +researchsailinghorse +back +ridingscuba +divingcs +infooffice +hours +tuth +appointment diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..b7a2d136 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,162 @@ +home +page +eric +bach +eric +bach +professor +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +bach +wisc +university +california +berkeley +interests +theoretical +computer +science +computational +number +theory +algebraicalgorithms +complexity +theory +cryptography +string +automata +research +summary +interested +uses +computers +efficiently +solvealgebraic +number +theoretic +problems +example +onetell +digit +number +prime +without +examining +possiblefactors +problems +intrinsic +mathematical +interest +well +applications +random +number +generation +codes +forreliable +secure +information +transmission +computer +algebra +areas +also +interested +applying +probability +theory +designand +analysis +algorithms +example +large +number +iscomposite +proved +simple +test +uses +auxiliarynumber +called +witness +practice +usually +finds +witnessby +direct +search +among +small +primes +leads +followingnatural +question +large +least +witness +functionof +number +tested +recent +work +given +accurateheuristic +model +based +probabilistic +assumptions +allowsthis +similar +questions +answered +recent +publications +improved +approximations +euler +products +proc +cnta +canadian +math +proceedings +models +algorithms +complete +problems +condon +glaser +tanguay +proc +annual +conf +computational +complexity +algorithmic +number +theory +volume +efficient +algorithms +shallit +press +info +click +curriculum +vitae +page +created +july +email +bach +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..523be599 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,70 @@ +bart +miller +home +page +barton +miller +bart +wisc +professorcomputer +sciences +departmentuniversity +wisconsin +madison +west +dayton +streetmadison +usathe +following +list +things +research +projects +paradyn +parallel +performance +tools +fuzz +random +software +testingteaching +introduction +operating +systems +spring +honors +internet +seminar +advanced +operating +systems +fall +distributed +systems +director +undergraduate +projects +graduatesprofessional +symposium +parallel +distributed +tools +monona +terrace +frank +lloyd +wright +convention +center +technical +advisory +grouppersonal +official +department +home +page +family +photosbart +wisc +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..95e61875 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,47 @@ +hyper +home +page +benjamin +teitelbaum +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +usaben +wisc +edursumquince +internet +ultimate +word +gamezillions +bookmarksspring +schedule +browser +support +tables +look +like +garbage +click +something +readable +monday +tuesday +wednesday +thursday +friday +office +hours +office +hours +dbseminar +osseminar +condormeeting +miron +plseminar diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..c38ed75e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,404 @@ +gareth +bestor +home +pagewelcome +gareth +bestor +home +pagegareth +bestor +dissertator +teaching +assistant +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +telephone +mail +bestor +wisc +click +finger +world +wide +http +wisc +bestor +systems +administrator +data +program +library +service +observatory +drive +madison +telephone +mail +bestor +dpls +dacc +wisc +edueducation +computer +sciences +university +wisconsin +madison +honors +computer +science +massey +university +zealand +computer +science +massey +university +zealand +curriculum +vitae +postscript +resume +postscript +graduate +coursework +postscript +dissertation +research +title +structure +motion +inverse +projection +problem +abstract +important +problem +computer +vision +recovering +structure +scene +position +observer +within +projected +images +essentially +inverse +projection +problem +existing +structure +motion +techniques +solve +problem +examining +multiple +images +projected +rigid +scene +however +used +extensively +practice +techniques +sensitive +noise +accurately +model +optical +projection +restrict +position +observer +structure +scene +research +uses +technique +solving +inverse +projection +problem +called +concurrent +projector +model +makes +assumptions +scene +rigid +assumptions +position +observer +technique +uses +projector +based +model +projection +instead +camera +based +model +traditionally +used +result +algorithm +defined +geometric +transformation +dimension +perspective +projection +given +transformation +dimension +algorithm +identifies +inverse +projection +problem +constrained +specifies +minimum +number +points +images +required +solve +concurrent +projector +model +also +examine +additional +points +images +minimize +types +projection +errors +occur +real +world +applications +allowing +projectors +approximately +intersect +technique +currently +applied +problem +robot +navigation +exploration +determine +position +robot +unknown +environment +time +environment +advisor +prof +charles +dyer +research +interests +computer +machine +vision +vision +based +robot +navigation +exploration +computer +graphics +virtual +reality +artificial +intelligence +group +computer +vision +group +machine +learning +research +group +robotics +teaching +duties +spring +introduction +computer +programming +sections +fortran +credit +course +covers +basic +programming +structures +needed +prepare +students +elementary +engineering +courses +prior +computer +programming +experience +required +basic +knowledge +computers +assumed +material +covered +enables +students +write +simple +computer +programs +solve +engineering +problems +elementary +courses +programming +done +fortran +course +intended +students +received +little +programming +instruction +high +school +sections +taught +entirely +fortran +programming +language +intended +primarily +engineering +students +computer +science +majors +sections +home +page +algebraic +language +programming +section +fortran +construction +algorithms +problem +solving +instruction +experience +least +procedure +oriented +language +pascal +fortran +survey +languages +advanced +programming +techniques +prereq +advanced +high +school +mathematical +preparation +college +work +mathematics +statistics +logic +consent +instructor +open +section +taught +entirely +fortran +programming +language +intended +primarily +engineering +students +computer +science +majors +section +home +page +pointers +interest +computer +sciences +department +home +page +university +wisconsin +madison +wiscinfo +home +page +information +zealand +hoofer +outing +club +nextstep +next +software +starting +points +internet +exploration +lycos +search +world +wide +keyword +copyright +copy +gareth +bestor +bestor +wisc +last +modified +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..ea28e68c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,38 @@ +kevin +beyer +home +pagekevin +beyerbeyer +wisc +caution +work +graduate +student +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +advisor +raghu +ramakrishnan +area +interest +database +researchresearch +projects +coral +local +course +information +projects +graduate +courses +undergraduate +coursesinstructing +beyer +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..2f832333 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,69 @@ +todd +bezenek +home +page +toddm +bezenek +back +introduced +actually +faster +cpus +_great +microprocessors +past +present_ +uregina +bayko +html +windows +express +locomotive +squeezed +skateboard +sized +package +helen +custer +_inside +windows +microsoft +press +current +courses +advanced +operating +systems +bart +miller +pithy +pith +consisting +abounding +pith +taking +yeah +point +skewed +associative +caches +access +information +computer +sciences +department +university +wisconsin +west +dayton +street +madison +office +phone +home +phone +mail +bezenek +wisc +edubezenek +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..9530ee52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,67 @@ +nathan +bockrath +graduate +student +nathan +bockrath +teaching +assistant +graduate +student +average +picture +nate +jpeg +send +email +click +bockrath +wisc +sections +sections +held +section +section +grades +quiz +reviews +virus +info +word +macro +virus +make +pages +office +hours +anyway +monday +wednesday +schedule +distributed +systems +simulation +modeling +support +free +speech +online +info +site +another +days +back +home +pageback +department +home +pageother +neat +stuff +condor +project +internet +oraclesend +comments +bockrath +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..89d9b76c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,392 @@ +bolobologreetings +bolo +although +parents +christened +josef +thomas +burger +rolled +ways +call +mebolo +everyone +including +parents +quite +sure +bestexplanation +developed +many +ways +question +person +defined +bywhat +person +case +software +engineer +develops +semi +real +timeoperating +systems +utilities +last +years +kernel +hacker +unix +system +administrator +create +things +engineer +engineer +applies +science +design +construction +things +really +call +scientist +though +acomputer +sciences +degree +scientist +pays +david +dewitt +scientist +shudders +methodology +hand +right +time +works +designing +architecting +implementing +newoperating +system +type +things +often +sameto +things +things +forth +system +woodworking +home +control +drawing +brewing +beer +completely +relax +sleep +works +wonders +sleep +enough +although +myroommate +disagrees +pursuits +enjoy +flying +reading +science +fiction +comic +books +railroading +prototype +model +role +playing +games +notice +images +sublime +striped +creature +thetiger +appearing +throughout +pages +tiger +taken +william +blake +poemtyger +tyger +puts +words +wonder +tiger +road +againin +tremendous +leap +insanity +purchased +ahouse +address +place +isjosef +burger +east +gate +roadmonona +voice +number +boring +workwork +work +drives +bananas +used +grunge +either +matured +jobs +changed +years +perhapssome +seems +like +parallel +computers +every +othermonth +beat +intosubmission +everything +else +moon +andstars +currently +working +following +projects +fordave +dewitt +world +famous +database +hacker +gamma +parallel +relational +database +like +query +interpreter +object +stores +paradise +geographic +information +system +implemented +shore +shore +object +oriented +data +store +wiss +wisconsin +storage +system +whatever +else +needs +done +whole +occurs +thecomputer +sciences +departmentof +themadison +campusof +university +wisconsin +campus +located +madison +peninsula +madison +five +lakes +boring +workin +addition +work +also +consult +provide +solutions +rather +advice +technicalexpertise +helping +internet +providers +porting +software +newsystems +reviving +computers +oddball +tasksare +kinds +things +tell +tovisit +serverbut +haven +time +anything +mostlyempty +except +home +pages +friends +activitiesuwvaxi +operate +uwvax +usenet +news +uucp +site +free +time +news +thats +print +something +along +lines +uwvax +department +computer +part +usenet +uucp +internet +longer +worked +along +lines +also +computer +sciences +department +svolunteer +news +master +much +goes +hand +hand +running +uwvax +however +trying +take +care +news +readers +across +differentarchitectures +trying +task +much +time +take +care +software +organizationsi +member +following +organizations +always +agree +oftenhave +good +benefits +members +usersof +communities +aopa +aircraft +owners +pilots +association +experimental +aircraft +association +usenix +association +blitz +drinkingwhen +school +horde +friendsand +visited +local +every +thursday +night +place +essen +haus +imports +beers +world +slowly +worked +entire +selection +years +developed +acquaintances +many +became +part +loftily +labelledblitz +drinking +society +rather +diverse +members +drink +meet +year +essen +haus +time +ofoctoberfest +weekend +chud +accumulateda +short +history +whatnotof +charade +bolo +home +pagelast +modified +bolo +josef +burger +bolo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..5c40d931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,68 @@ +brad +home +page +welcome +brad +thayer +homepage +much +mail +links +link +computer +sciences +home +page +intro +networking +home +page +possibly +advanced +operating +systems +page +would +foolish +neglect +computer +systems +modeling +page +interested +thecs +using +computers +home +page +probably +bored +check +operating +systems +seminaranyway +aime +beaucoup +boire +pepper +badgers +packers +pagesome +links +search +altavista +search +enginefind +email +adresses +world +widethe +jazz +page +duane +mclaughlin +home +pageuw +athletics +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..41680bb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,84 @@ +scott +breach +home +pagescott +breach +breach +wisc +addresseseducationresearch +interests +publications +recreation +associatesaddressesscott +breachdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usatel +educationph +computer +science +university +wisconsin +madison +computer +engineering +carnegie +mellon +university +advisorguri +sohiresearch +interestscomputer +architecturemultiscalarpublicationsmultiscalar +processorsgurindar +sohi +scott +breach +vijaykumarnd +international +symposium +computer +architecture +anatomy +register +file +multiscalar +processorscott +breach +vijaykumar +gurindar +sohith +international +symposium +microarchitecture +efficient +detection +pointer +array +access +errorstodd +austin +scott +breach +gurindar +sohiconference +programming +language +design +implementation +recreationwingsbeersquidtvassociatestodd +austindoug +burgerbabak +falsafialain +kagit +vijaykumarlast +updated +september +scott +breach +breach +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..766a7038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,91 @@ +bleed +nontrivial +wayshere +temporarily +understated +page +unadorned +page +provide +section +pizza +pool +page +brief +hobbies +page +schedule +spring +stinkin +bookmarks +suny +albany +fall +poor +unfortunately +named +myclass +hypersensitive +rockjock +cretin +broods +glares +clenchesfist +cracks +knuckle +tragically +flightyfemme +getting +razzed +asskicked +since +thirdgrade +perhaps +smooth +skinned +hardbody +leatherboy +leers +atme +whenever +call +roll +difference +make +since +todayi +giggled +said +name +became +aprostitute +society +bigotries +pedagody +isaac +theblack +goat +refused +stay +clawed +hand +sssuuuhhh +mmuuuhhhh +dddduuuuuhhhhh +mmmmuuuhhhh +maaaahhhjaaaaaahhhhh +fffuuuhhhhh +yyyyyyyuuuuuhhhhh +mmmmmuuuuuhhhhhmmmmuuuhhhhh +uuuhhh +uuummmm +uuuhhhh +wwwwwhhhhuuuuuhhhhh +suny +albany +fall +zhang +wouldn +notice +eggleston +smile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..aa8fac2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,284 @@ +home +page +wisc +assistant +professor +computer +sciencedepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usacao +wisc +eduphone +departmental +office +education +research +interests +courses +recent +papers +recent +talks +summary +collection +links +education +princeton +university +princeton +university +tsinghua +university +beijing +china +research +interests +operating +systems +high +performance +file +systems +memory +resource +allocation +high +performance +systems +parallel +computers +research +projects +optimal +parallel +prefetching +cachingacfs +application +controlled +file +caching +prefetching +courses +research +topics +distributed +systems +operating +systems +fall +advanced +operating +system +spring +traces +simulators +file +access +tracesrecent +papersintegrated +parallel +prefetching +cachingtracy +kimbrel +anna +karlin +felten +princeton +department +tech +report +november +shorter +version +proceedings +sigmetrics +conference +application +controlled +file +caching +prefetching +thesis +also +princeton +department +tech +report +implementation +performance +integrated +application +controlled +caching +prefetching +disk +schedulingpei +edward +felten +anna +karlin +princetonuniversity +appear +tocs +study +integrated +prefetching +caching +strategiespei +edward +felten +anna +karlin +princetonuniversity +proceedings +sigmetrics +peformance +implementation +performance +application +controlled +file +cache +edward +felten +princeton +university +proceedings +first +osdi +symposium +slides +presentation +osdi +application +controlled +file +caching +policies +edward +felten +proceedings +usenix +summer +technical +conference +tickertaip +parallel +raid +architecture +swee +boon +shivakumar +venkataraman +john +wilkes +proceedings +isca +recent +talksslides +application +controlled +file +caching +prefetching +postscript +page +andpostscript +page +research +summarymy +research +focuses +storage +management +uniprocessor +andparallel +systems +particular +investigating +techniques +improvefile +system +performance +application +specific +replacement +policies +filecache +aggressive +prefetching +file +data +disk +havedeveloped +system +kernel +allocates +physical +pages +individualapplications +application +responsible +deciding +useits +physical +pages +caching +prefetching +system +uses +fairglobal +allocation +policy +kernel +carefully +integrates +cachereplacement +prefetching +disk +scheduling +prototype +implementationon +uniprocessor +systems +demonstratedthat +good +application +chosen +replacement +strategies +prefetching +informationcan +significantly +improve +performance +many +applications +currently +extending +techniques +parallel +systems +amdeveloping +integrated +caching +prefetching +algorithms +parallel +diskarrays +addition +investigating +global +resource +managementproblems +operating +systems +last +modified +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..d6461b35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,433 @@ +mike +careymichael +careyprofessor +leave +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +research +staff +member +almaden +research +center +harry +road +jose +phone +primary +alternate +mail +carey +almaden +carey +wisc +eduresearch +interestsdatabase +management +systems +parallel +distributed +computing +applied +performance +evaluation +research +interests +main +areas +database +system +performanceand +next +generation +database +systems +performance +area +topicsof +current +interest +include +performance +tradeoffs +techniques +forobject +oriented +database +systems +design +evaluation +algorithmsrelated +transaction +processing +scheduling +complex +multi +userdatabase +workloads +based +user +specified +performance +goals +next +generation +database +system +area +involved +theexodus +extensible +dbms +project +involved +shore +project +aimedat +developing +scalable +repository +storage +sharing +persistentobjects +heterogeneous +environment +goal +shore +effort +whichis +building +upon +experience +exodus +project +meet +objectmanagement +needs +replace +unix +files +applicationssuch +case +recently +moved +academia +industry +twelve +greatyears +part +become +best +academic +database +systems +researchgroup +known +universe +time +come +tackle +anddifferent +challenges +working +almaden +research +center +thesource +number +papers +teaching +students +forthe +past +twelve +years +work +related +objects +databases +significant +fraction +time +spent +relatively +projectthere +called +garlic +garlic +effort +build +heterogeneous +multimediainformation +system +allows +data +living +variety +repositories +tobe +queried +manipulated +though +resided +homogeneous +objectdatabase +spent +sabbatical +working +garlic +continuedto +work +madison +focusing +graduate +student +aquery +browser +front +tool +called +pesto +working +thegarlic +project +location +almaden +recent +publications +extending +oodb +access +design +implementation +experience +kiernan +proc +conf +object +orientedprogramming +systems +languages +applications +oopsla +austin +october +appear +querying +multimedia +data +multiple +repositories +content +garlic +project +cody +haas +niblack +arya +fagin +flickner +petkovic +schwarz +thomas +tork +roth +williams +wimmers +proc +ifip +working +conference +visualdatabase +systems +lausanne +switzerland +march +towards +heterogeneous +multimedia +information +systems +garlicapproach +haas +schwarz +arya +cody +fagin +flickner +luniewski +niblack +petkovic +thomas +williams +ande +wimmers +proc +ieee +workshop +research +issues +dataengineering +ride +taipei +taiwan +march +status +report +oodbms +benchmarking +effort +withd +dewitt +kant +naughton +proc +conf +onobject +oriented +programming +systems +languages +applications +portland +october +towards +automated +performance +tuning +complex +workloads +brown +mehta +livny +proc +thint +conf +large +data +bases +santiago +chile +september +making +real +data +persistent +initial +experiences +smrc +withb +reinwald +desslock +lehman +pirahesh +srinivasan +proc +persistent +object +systems +workshop +tarascon +provence +france +september +shoring +persistent +applications +dewitt +franklin +hall +mcauliffe +naughton +schuh +solomon +tsatalos +white +zwilling +proc +sigmodint +conf +management +data +minneapolis +fine +grained +sharing +page +server +oodbms +franklin +andm +zaharioudakis +proc +sigmod +conf +managementof +data +minneapolis +managing +memory +real +time +queries +pang +livny +proc +sigmod +conf +management +data +minneapolis +accurate +modeling +hybrid +hash +join +algorithm +patel +andm +vernon +proc +sigmetrics +conf +measurement +modelingof +computer +systems +nashville +indexing +alternatives +multiversion +locking +bober +proc +conf +extending +database +technology +cambridge +england +march +client +server +caching +revisited +franklin +indistributed +object +management +oszu +dayal +andp +valduriez +morgan +kaufmann +publishers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..6b0e6c9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,42 @@ +chin +chin +tang +home +pagechin +chin +tanggraduate +student +university +wisconsin +department +west +dayton +streetmadison +wisconsin +office +bldg +mail +cchin +wisc +edutelephone +current +assignment +introduction +data +structures +office +hours +monday +tuesday +friday +ameducation +biochemistry +university +wisconsin +madison +biochemistry +university +wisconsin +madison +cchin +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..f462b7d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,43 @@ +satish +chandra +home +page +satish +chandra +chandra +wisc +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaphone +research +research +interests +summary +publications +coming +soon +real +soon +stuff +wodehouse +books +internet +movie +database +nostalgia +york +times +altavista +italian +language +culture +miscellaneous +linksclick +logged diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..8e91e7b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,127 @@ +trishul +chilimbi +home +page +trishul +chilimbi +chilimbi +wisc +click +real +megraduate +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaadvisor +larus +research +interests +programming +languages +compilers +architectures +parallel +computing +compiling +integrated +shared +memory +message +passing +parallel +program +performance +analysis +enhancement +visualization +shared +memory +parallel +computer +designresearch +projects +wisconsin +wind +tunneleducation +university +wisconsin +madison +tech +indian +institute +technology +bombay +research +summary +publicationscachier +tool +automatically +inserting +cico +annotations +trishul +chilimbi +james +larus +international +conference +parallel +processing +icpp +august +stormwatch +tool +visualizing +memory +system +protocolstrishul +chilimbi +thomas +ball +stephen +eick +james +larus +supercomputing +appear +december +awards +honors +certificate +merit +state +mathematics +olympiadpresidents +gold +medal +indian +national +physics +examinationcertificate +merit +state +examination +chemistrycertificate +merit +state +examination +electronics +miscellaneous +click +movie +dream +curriculum +vitae +last +updated +mail +suggestions +page +chilimbi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..bc102982 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,34 @@ +chandrasekaran +sashikanth +home +page +chandrasekaran +sashikanth +csashi +wisc +graduate +studentdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaadvisor +mark +hill +project +education +btech +indian +institute +technology +madras +june +univeristy +wisconsin +department +last +updated +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..7352ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,145 @@ +curt +ellmann +curt +ellmann +paradise +database +project +department +computer +sciences +university +wisconsin +madison +curt +wisc +items +focus +java +related +items +paradise +home +page +paradise +developers +page +webgnats +defect +tracking +paradise +prototype +index +shore +pages +previous +life +doit +eosdis +related +opengis +consortium +global +positioning +system +calmit +university +nebraska +lincoln +federated +approach +eosdis +database +related +object +database +management +group +home +page +free +database +list +standards +home +page +transaction +processing +performance +council +illustra +white +papersmiscellaneous +sitescampus +sites +wyrm +hoard +computer +sciences +wiscinfo +wiscinfo +gopher +wiscinfo +site +madison +library +wiscnet +netcorporate +apple +microsoft +research +lands +pawws +commerce +metrowerks +taligentsearching +savvy +search +webcrawler +open +text +worm +network +information +sites +internic +international +organization +standardization +internet +drafts +site +dilbert +world +online +winsock +applications +current +weather +maps +dienst +dienst +implementation +geologic +survey +government +information +locator +gils +oakridge +national +center +computing +sciences +stock +market +datacurt +ellmanncurt +wisc +eduparadise +database +projectdepartment +computer +sciencesuniversity +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..2de6a1ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,25 @@ +chee +yong +home +pagechan +chee +yong +graduate +student +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +email +cychan +wisc +office +phone +page +construction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..391dabdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,63 @@ +sara +bauman +home +page +sara +dailey +baumandailey +wisc +edugraduate +program +mathematics +computation +engineering +mace +university +wisconsin +madison +computer +sciences +department +engineering +mechanics +astronautics +department +nuclear +engineering +engineering +physics +education +physics +math +computer +science +lewis +clark +college +research +work +publication +current +schedule +page +links +friends +home +pagessend +mail +office +address +university +wisconsin +madison +computer +sciences +statistics +west +dayton +street +madison +last +modified +sara +daileytues diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..5a049178 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,633 @@ +david +wood +home +page +david +wood +david +wisc +associate +professor +computer +scienceand +electrical +computer +engineeringdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usadavid +wisc +eduphone +secretary +julie +fingerson +thea +sklenar +departmental +office +research +interests +computer +architecture +especially +memory +system +design +uniprocessors +multiprocessors +design +implementation +programming +parallel +computers +operating +systems +parallel +computers +performance +evaluation +tools +techniques +especially +memory +system +analysis +vlsi +design +including +power +design +portable +computers +research +projects +wisconsin +wind +tunnel +memory +system +performance +tools +warts +education +university +california +berkeley +university +california +berkeley +current +graduate +students +babak +falsafi +steve +reinhardt +brian +toonenrecently +graduated +students +rahmat +hyder +intel +alvy +lebeck +duke +university +pfile +microsystems +mark +callaghan +informix +courses +teach +fall +introduction +computer +architecturecs +machine +organization +programmingcs +introduction +computer +architecturecs +advanced +computer +architecture +advanced +computer +architecture +selected +recent +papers +decoupled +hardware +support +distributed +shared +memorysteven +reinhardt +robert +pfile +anddavid +wood +ieee +international +symposium +computer +architecture +isca +coherent +network +interfaces +fine +grain +communicationshubhendu +mukherjee +babak +falsafi +mark +hill +anddavid +wood +ieee +international +symposium +computer +architecture +isca +synchronization +hardware +networks +workstations +performance +costrahmat +hyder +david +wood +ieee +international +conference +supercomputing +dynamic +self +invalidation +reducing +coherence +overhead +shared +memory +multiprocessorsalvin +lebeck +anddavid +wood +ieee +international +symposium +computer +architecture +isca +june +active +memory +abstraction +memory +system +simulationalvin +lebeck +anddavid +wood +sigmetricsmay +accuracy +performance +parallel +simulation +interconnection +networks +douglas +burger +david +wood +proceedings +international +parallel +processing +symposium +april +application +specific +protocols +user +level +shared +memory +babak +falsafi +alvin +lebeck +steven +reinhardt +ioannis +schoinas +mark +hill +james +larus +anne +rogers +david +wood +proceedings +supercomputing +fine +grain +access +control +distributed +shared +memory +ioannis +schoinas +babak +falsafi +alvin +lebeck +steven +reinhardt +james +larus +david +wood +proceedings +asplos +tempest +typhoon +user +level +shared +memory +steven +reinhardt +james +larus +david +wood +proceedings +symposium +computer +architecture +cache +profiling +spec +benchmarks +case +study +alvin +lebeck +anddavid +wood +pages +ieee +computer +october +cooperative +shared +memory +software +hardware +scalable +multiprocessors +mark +hill +james +larus +steven +reinhardt +david +wood +transactions +computer +systems +tocs +november +wisconsin +wind +tunnel +project +annotated +bibliography +mark +hill +james +larus +david +wood +computer +architecture +news +december +line +version +revised +frequently +wisconsin +architectural +research +tool +warts +mark +hill +james +larus +alvin +lebeck +madhusudhan +talluri +david +wood +computer +architecture +news +august +research +summary +main +research +goals +developing +cost +effective +computerarchitectures +take +advantage +rapidly +changing +technologies +myresearch +program +major +thrusts +evaluating +performance +feasibility +correctness +architectures +developing +tools +techniques +facilitate +evaluation +currently +research +focusses +following +three +areas +multi +paradigm +multiprocessors +efficiently +integrate +shared +memory +message +passing +hybridprogramming +paradigms +virtual +prototyping +system +exploits +similaritesof +existing +parallel +machine +simulate +hypothetical +parallel +machine +techniques +understanding +tuning +program +performance +recent +results +include +developing +interface +calledtempest +user +level +protocol +handlers +system +suppliedmechanisms +tempest +provides +mechanisms +allow +programmers +compilers +program +libraries +implement +message +passing +transparent +shared +memory +hybrid +combinations +tempestmechanisms +overhead +messages +bulk +data +transfer +virtualmemory +management +fine +grain +access +control +novelmechanism +fine +grain +access +control +allows +user +software +tagblocks +bytes +read +write +read +invalid +thelocal +memory +used +transparently +cache +remote +data +exploring +alternative +ways +support +interface +first +called +typhoon +proposed +hardwareplatform +implements +tempest +mechanisms +fully +programmable +user +level +processor +network +interface +reverse +translationtable +rtlb +invokes +network +processor +detects +fine +grainaccess +fault +simulated +typhoon +wisconsin +wind +tunnel +found +thata +transparent +shared +memory +protocol +running +typhoon +performscomparably +anall +hardware +cache +coherence +protocol +five +shared +memoryprograms +also +developed +memory +system +simulation +method +thatoptimizes +common +case +cache +hits +significantly +reducingsimulation +time +fast +cache +tightly +integrates +reference +generation +simulation +byproviding +abstraction +tagged +memory +blocks +referenceinvokes +user +specified +function +depending +upon +reference +type +andmemory +block +state +simulator +controls +references +processedby +manipulating +memory +block +states +specifying +special +null +functionfor +action +cases +fast +cache +implements +abstraction +usingbinary +rewriting +perform +table +lookup +memoryreference +sparcstation +fast +cache +simulation +times +tothree +times +faster +conventional +trace +driven +simulator +thatcalls +procedure +memory +reference +simulation +times +onlythree +times +slower +original +instrumented +program +also +investigating +using +fast +cache +binary +rewriting +techniquesto +support +tempest +interface +existing +hardware +platforms +last +updated +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..8fbb395e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,34 @@ +doug +burger +home +page +doug +burger +home +pageprofessional +information +research +summaryresume +cvtranscriptcourse +projectsadvisoraffiliated +projects +galileo +sciwisconsin +wind +tunnelpages +maintain +computer +architectureuw +computer +architecturesimplescalar +tool +setgenericasacmpersonal +stuff +meuseful +linksphoto +galleryriding +demonhunting +damn +catsbeware +grad +school diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..068d36ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,228 @@ +carl +boor +home +page +last +change +carl +boor +professor +computer +sciences +mathematicsdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +wisconsin +usaoffice +hours +fall +town +schoenberg +worked +madison +death +email +deboor +wisc +telephone +schedule +fall +teaching +look +former +present +students +selected +recent +articles +approximation +theory +written +areavailable +anonymous +wisc +approx +read +file +provides +access +individual +files +theclick +button +clickable +version +read +file +small +subset +authored +clickable +click +list +errata +third +edition +conte +boor +elementary +numerical +analysis +algorithmic +approach +list +errata +various +printings +carl +boor +practical +guide +splines +check +latest +versions +thevarious +programs +drivers +latter +book +click +journal +ofapproximation +theory +published +academic +press +information +journal +including +recently +accepted +publishedpapers +well +email +postal +addresses +many +approximators +andmuch +much +ditto +forconstructive +approximation +published +springer +verlag +ditto +foreast +journal +approximations +search +theirtables +content +singly +combined +thank +paul +nevai +thishandy +tool +alsoapproximation +amos +slist +homepages +approximators +spline +bibliography +available +links +various +publishers +journals +people +resources +ilas +information +center +seek +shall +find +organized +introduction +joys +seeviva_vi +alsoon +screen +tutorials +click +great +picture +hermite +place +also +contains +useful +information +html +thehtml +primermight +even +better +unusual +ever_changing +home +page +david +griffeath +sprimordial +soup +kitchen +various +interesting +information +seeodds +ends +thank +allan +pinkus +pinkus +techunix +technion +paul +nevaiif +find +useful +also +check +paul +nevai +makehis +mathematical +outputavailable +courses +check +information +math +click +information +numerical +analysis +house +next +door +occupied +takis +souganides +andthaleia +zariphopoulou +szego +bust +stands +look +inscription diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..2fcfcdb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,249 @@ +devise +home +pagedevise +environment +data +exploration +visualizationtable +contents +featuresexamplesin +depthpublicationsrelated +workreleasecontactsfeaturesthese +features +distinguish +devise +visualization +environments +visual +query +interface +visualizations +constructed +oneset +data +saved +applied +input +data +data +larger +memory +efficiently +handled +mapping +data +graphics +record +level +cancontrol +color +shape +individual +records +ability +query +data +records +used +represent +graphics +flexible +layout +mechanisms +within +windows +helps +user +group +data +together +comparison +aside +needed +linked +axes +cursor +helps +compare +relationship +different +viewsof +data +record +based +input +data +direct +input +ascii +files +integer +float +date +string +data +types +examplescheck +following +examples +cool +pictures +quick +introduction +devise +tree +validationmolecular +biology +cell +images +soil +science +birch +clusteringfinancial +data +explorationfamily +medicine +national +climatedata +centergeographical +information +systemsoil +sciencefile +serverprogram +tracesclinical +sciences +many +moreexamples +data +viewer +family +medicine +depthfor +detailed +description +devise +model +visualizationvisual +query +interfaceperformance +issuespublicationsmiron +livny +raghu +ramakrishnan +jussi +myllymaki +visual +exploration +large +datasets +proceedings +spie +conference +visual +dataexploration +analysis +january +michael +cheng +miron +livny +raghu +ramakrishnan +visual +analysis +stream +data +inproceedings +spie +conference +visual +data +exploration +andanalysis +february +raghu +ramakrishnan +michael +cheng +miron +livny +praveenseshadri +next +sequencequeries +proceedings +international +conference +themanagement +data +comad +december +related +workthe +seqproject +complementary +devise +designed +queryrecord +based +sequenced +data +output +queries +bevisualized +devise +release +informationwe +currently +releasing +version +devise +executablesfor +solaris +platforms +dynamically +linked +need +ld_library_path +environment +variable +appropriately +rundevise +supported +architectures +executables +arestatically +linked +require +shareable +libraries +time +download +devise +click +contactsfor +information +research +project +contactmiron +livny +raghu +ramakrishnan +jussi +myllymaki +guangshun +chen +kent +wenger +user +support +hotline +send +mail +devise +usersupport +hotline +page +accessed +times +since +october diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..2e105f10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,300 @@ +home +page +david +dewitt +david +dewitt +professor +romnes +fellow +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +dewitt +wisc +university +michigan +interests +object +oriented +database +systems +parallel +database +systems +databasebenchmarking +geographic +information +systems +research +summary +main +research +projects +shore +paradise +objectiveof +shore +design +implement +evaluate +persistent +objectsystem +serve +needs +wide +variety +target +applicationsincluding +hardware +software +systems +persistent +programminglanguages +geographic +information +systems +satellite +data +repositories +multimedia +applications +shore +expands +basic +capabilitiesof +widely +used +exodus +storage +manager +developed +wisconsin +funded +arpa +number +ways +including +support +typedobjects +multiple +programming +languages +unix +like +hierarchicalname +space +named +objects +unix +compatible +interfaceto +objects +text +field +interface +intended +toease +transition +applications +unix +file +systemenvironment +shore +existing +unix +tools +ccwill +able +store +data +shore +objects +without +modification +basically +unix +file +becomes +either +single +shore +object +orthe +text +field +complex +object +shore +targeted +wide +range +hardware +environments +scaling +fromindividual +workstations +heterogeneous +client +server +networksto +large +multiprocessors +intel +paragon +shore +ajoint +project +profs +carey +naughton +solomon +paradise +project +attempting +apply +technology +developedas +part +shore +gamma +projects +gamma +parallel +relationaldatabase +system +developed +university +wisconsin +thetask +storing +manipulating +geographic +data +sets +currently +many +geographic +information +systems +relational +databasesystems +hold +data +systems +excellent +formanaging +business +data +poor +match +modelingneeds +must +capable +storing +manipulatingmuch +complex +objects +polygons +polylines +instead +paradise +employs +object +oriented +data +model +providing +muchbetter +match +type +needs +another +significant +differencefrom +current +systems +paradise +employs +parallelismto +facilitate +executing +processing +large +data +sets +assatellite +images +target +hardware +platform +projectis +cluster +sparc +connected +sample +recent +publications +benchmark +withm +carey +naughton +proceedings +sigmod +conference +washington +shoring +persistentapplications +dewitt +franklin +hall +mcauliffe +naughton +chuh +tsatalos +white +zwilling +proceedings +sigmod +international +conferenceon +management +data +minneapolis +client +server +paradise +kabra +patel +proceedingsof +large +data +base +conference +santiego +chile +august +recent +talks +vldb +invited +talk +object +relational +summit +presentation +page +automatically +created +january +email +pubs +wisc +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..9a2d0041 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,625 @@ +charles +dyer +home +pagecharles +dyerprofessordepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +wisconsin +usae +mail +dyer +wisc +edutelephone +finger +infoph +university +maryland +curriculum +vitae +areas +interest +computer +vision +three +dimensional +shape +representation +appearance +modeling +view +synthesis +active +vision +visualizationgroup +leader +wisconsin +computer +vision +groupprogram +chair +cvpr +research +interests +view +synthesisthe +goal +work +develop +basic +tools +controllingin +real +time +either +autonomously +interactively +virtual +cameraof +real +environment +input +images +videostreams +acquired +fixed +mobile +cameras +around +site +output +panoramic +visualization +scene +whicha +virtual +user +controlled +camera +moved +environment +technology +user +could +interactively +navigate +througha +real +environment +controlling +customized +path +views +thesite +predetermined +input +images +main +researchquestion +adaptively +combine +basis +images +synthesizenew +views +scene +without +models +scene +reconstructionas +intermediate +step +recently +developed +innovativetechnique +callview +morphing +takes +basisimages +interpolates +continuous +range +images +corresponding +views +linear +path +connecting +camera +centers +visual +explorationcomputer +vision +researchers +recently +started +investigate +howto +actively +control +image +acquisition +process +controllingcamera +parameters +studying +purposefully +control +theposition +orientation +camera +order +dynamically +adjustviewpoint +based +appearance +three +dimensional +scene +theuse +real +time +viewpoint +control +behaviors +important +forsolving +tasks +exploring +unknown +object +order +findspecific +surface +markings +building +global +model +unknownshape +recognizing +object +coordinating +simple +observer +behaviors +change +appearanceof +surfaces +well +defined +simplify +image +computationsrequired +make +precise +global +progress +algorithm +andeliminate +need +accurate +differential +measurements +thecamera +motion +believe +active +approach +moving +towardsviewpoints +closely +related +geometry +viewedobjects +important +general +used +thisapproach +develop +provably +correct +algorithms +moving +aside +view +surface +revolution +order +recover +shape +reconstructing +global +surface +unknown +smooth +arbitrarily +shaped +object +visualizationin +area +visualization +developed +mapping +techniquescapable +generating +displays +possible +data +objects +defined +user +algorithm +without +need +user +defined +type +specificgraphics +display +procedures +capability +displayingarbitrary +combinations +algorithm +data +objects +commonframe +reference +coupled +interactive +control +algorithmexecution +provides +powerful +understand +algorithm +behavior +especially +interactive +visual +experiments +scientific +dataanalysis +algorithms +implemented +system +called +forexperimenting +techniques +used +visualizingintermediate +final +results +data +analysis +algorithms +forproblems +discriminating +clouds +satellite +images +recent +publications +seitz +dyer +cyclic +motion +analysis +using +period +trace +motion +based +recognition +shah +jain +kluwer +boston +appear +seitz +dyer +view +invariant +analysis +cyclic +motion +computer +vision +appear +seitz +dyer +view +morphing +proc +siggraph +seitz +dyer +toward +image +based +scene +representation +using +view +morphing +proc +conf +pattern +recognition +track +computer +vision +dyer +shape +recovery +stationary +surface +contours +controlled +observer +motion +advances +image +understanding +festschrift +azriel +rosenfeld +ieee +computer +society +press +alamitos +kutulakos +dyer +global +surface +reconstruction +purposive +control +observer +motion +artificial +intelligence +seitz +dyer +complete +scene +reconstruction +four +point +correspondences +proc +conf +computer +vision +seitz +dyer +physically +valid +view +synthesis +image +interpolation +proc +workshop +representation +visual +scenes +kutulakos +dyer +recovering +shape +purposive +viewpoint +adjustment +computer +vision +kutulakos +seales +dyer +building +global +object +models +purposive +viewpoint +control +proc +based +vision +workshop +kutulakos +dyer +lumelsky +provable +strategies +vision +guided +exploration +three +dimensions +proc +ieee +conf +robotics +automation +kutulakos +dyer +occluding +contour +detection +using +affine +invariants +purposive +viewpoint +control +proc +computer +vision +pattern +recognition +conf +seitz +dyer +affine +invariant +detection +periodic +motion +proc +computer +vision +pattern +recognition +conf +seitz +dyer +detecting +irregularities +cyclic +motion +proc +workshop +motion +rigid +articulated +objects +hibbard +paul +battaiola +santek +voidrot +martinez +dyer +interactive +visualization +earth +space +science +computations +computer +july +hibbard +dyer +paul +lattice +model +data +display +proc +visualization +recent +publications +including +abstracts +wisconsin +computer +vision +groupcourses +taught +introduction +artificial +intelligence +spring +fall +computer +vision +fall +spring +current +students +gareth +bestor +brian +morgan +steve +seitz +liangyin +yuph +graduates +bill +hibbard +whibbard +macc +wisc +visualizing +scientific +computations +system +based +onlattice +structured +data +display +models +kiriakos +kutulakos +kyros +rochester +exploring +three +dimensional +objects +controlling +point +ofobservation +mark +allmen +allmen +iutech +image +sequence +description +using +spatiotemporal +flow +curves +toward +motion +based +recognition +brent +seales +seales +appearance +models +three +dimensionalshape +machine +vision +graphics +harry +plantinga +wheaton +continuous +viewer +centered +object +representationfor +computer +vision +charles +stewart +stewart +connectionist +models +stereo +vision +bradley +kjell +kjell +ccsua +ctstateu +oriented +edge +separation +texture +measureslinks +interestmy +bookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..d59f2f8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,24 @@ +welcome +friends +welcome +machinewe +appreciate +patience +long +arduous +task +bringing +better +home +page +check +education +curriculum +vitaecheck +class +teaching +home +page +section +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..b606d26d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,212 @@ +eric +hazen +home +pageeric +hazennon +professorroom +computer +sciences +west +dayton +madison +currently +working +fornet +scout +services +project +internic +wait +wait +part +registration +services +couldn +help +domain +name +problems +even +wanted +internic +neat +things +money +scout +things +located +computer +sciences +department +university +ofwisconsin +madison +seeing +could +never +explain +scout +elegantly +fearless +leader +susan +calcari +check +scout +page +official +explanation +design +maintenance +site +email +list +scout +report +also +assist +pete +devries +withtech +support +macs +unix +machines +coming +scout +spent +half +years +laboratory +molecular +biology +time +full +time +student +well +molecular +biology +called +technical +support +assistant +digital +video +specialist +meant +around +fixing +people +broken +mice +answering +email +questions +time +lucky +make +cool +videos +interested +using +digital +video +instructional +purposes +check +multimedia +services +page +graduate +university +wisconsin +philosophy +program +makes +philosopher +real +commodity +capitalist +society +also +completed +requirements +curriculum +makes +shameless +pragmatist +computer +science +department +email +talk +since +good +metaphysical +discussion +links +serve +practical +purpose +makethis +page +look +standard +resume +date +multimedia +page +molecular +biology +explains +made +animations +shown +world +among +drosophila +geneticists +told +wonderful +girlfriend +page +salon +magazine +entertaining +informative +ezine +created +completely +macs +laurie +anderson +green +room +shockwave +check +kudon +page +link +knowing +witness +quicktimevr +documentary +plight +bosnia +uprooted +population +billie +holiday +homepage +national +security +archives +check +nixon +presley +meetingsejhazen +facstaff +wisc +edulast +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..0638c0bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,16 @@ +tina +eliassi +home +pagetina +eliassi +university +illinois +urbana +champaign +university +wisconsin +madison +office +bldgphone +eliassi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..a7037062 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,118 @@ +eric +home +page +passsth +another +cold +budweisther +address +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +department +electrical +computer +engineering +university +wisconsin +madison +johnson +drive +madison +office +phone +mail +ericro +wisc +research +area +computer +architecture +advisor +professor +smithresearch +topics +kestrel +multiscalar +project +instruction +level +parallelism +high +bandwidth +instruction +fetch +mechanisms +branch +prediction +confidence +branch +mispredict +tolerancepublications +trace +cache +latency +approach +high +bandwidth +instruction +fetching +eric +rotenberg +steve +bennett +james +smith +appear +proceedings +annual +international +symposium +microarchitecture +december +assigning +confidence +conditional +branch +predictions +erik +jacobsen +eric +rotenberg +james +smith +appear +proceedings +annual +international +symposium +microarchitecture +december +trace +cache +latency +approach +high +bandwidth +instruction +fetching +eric +rotenberg +steve +bennett +james +smith +university +wisconsin +madison +technical +report +april +resume diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..b1d89f54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,95 @@ +babak +falsafi +home +page +babak +falsafi +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usatel +email +falsafi +wisc +work +people +mentorcultresearch +interests +computer +architecture +performance +evaluation +measurement +computer +systems +parallel +programming +modelseducation +computer +sciences +university +wisconsin +madison +december +computer +sciences +suny +buffalo +june +electrical +computer +engineering +suny +buffalo +june +miscellaneous +publications +would +rather +drinking +would +rather +would +rather +reading +would +rather +listening +used +high +school +ideas +like +fail +morf +shubu +dionosi +hillarious +profane +phone +conversations +check +american +french +queen +computer +architects +look +like +hacking +partners +crime +next +generation +parallel +computer +last +updated +babak +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..a010d600 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,150 @@ +home +page +michael +ferris +michael +ferris +associate +professor +computer +sciences +industrial +engineeringand +member +center +mathematical +sciences +computer +sciences +department +university +wisconsin +west +dayton +street +madison +telephone +email +ferris +wisc +university +cambridge +interests +theory +algorithms +applications +mathematical +programming +research +summary +looking +robust +methods +solving +large +scale +variationalinequality +nonlinear +programming +problems +applications +toproblems +economics +engineering +pivotal +path +followingtechniques +investigated +based +successive +linearization +emphasis +numerical +properties +large +scale +problems +andinterfaces +modeling +languages +particular +applications +beingconsidered +include +economic +equilibria +effects +taxation +oncarbon +emissions +traffic +congestion +effects +tolling +structural +optimization +contact +problems +chemical +process +design +considering +parallel +architectures +solvingproblems +nonlinear +optimization +graph +partitioningtechniques +determine +underlying +structure +investigated +tool +general +purpose +parallel +optimization +techniques +forexploiting +parallel +machines +directly +within +modeling +systems +arealso +consideration +prototypes +using +condor +system +extensions +complementarity +framework +also +beinginvestigated +emphasis +identifying +exploiting +underlyingmodel +structure +publications +complete +list +papers +mostly +electronically +available +relevant +links +cpnet +complementarity +problem +mathematical +prgramming +home +pagecs +page +updated +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..58a0c655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,335 @@ +david +finton +home +page +david +finton +finton +wisc +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +welcome +page +grad +student +research +nerdin +artificial +intelligencehere +university +wisconsin +madison +grew +grand +rapids +michigan +late +show +home +office +earned +degree +math +michigan +state +master +computer +sciencehere +dissertator +institution +taking +little +year +develop +traffic +measurements +softwarefor +first +thesis +advisor +left +wisconsin +trusty +nextstationor +library +enjoyplaying +trumpet +piano +listening +longhair +music +playing +volleyball +intervarsityfolks +contributing +supersoaker +arms +race +comments +pages +feel +free +comment +form +send +mail +finger +accountto +current +plan +whether +system +gainful +employment +introduction +artificial +intelligence +current +project +computers +smart +understand +making +machines +intelligentis +goal +artificial +intelligence +essence +intelligenceis +ability +learn +adapt +learn +actappropriately +order +reach +goals +reinforcement +learning +treats +problem +general +case +system +outputs +control +actions +change +environment +inputs +senses +environment +also +input +reinforcement +weak +kind +feedback +expressed +positive +negative +number +instead +teacher +present +thesystem +input +output +pairs +system +instead +receives +thumbs +thumbs +irregular +intervals +work +focussed +need +todistinguish +good +actions +ones +direct +process +building +agood +representation +environment +terms +relevant +orimportant +features +note +importance +basedfeature +extraction +currently +applying +notion +importance +problem +learning +balance +need +explore +world +need +perform +optimally +exploration +exploitation +also +investigating +ways +using +importance +make +learningprocess +efficient +allowing +system +specify +starting +points +learning +experiments +active +learning +goal +develop +better +understanding +intelligentadaptation +hope +provide +basis +intelligent +action +whichwill +also +benefit +knowledge +based +task +based +work +really +date +sorry +reinforcement +learning +pagefor +information +hotlistthis +browser +independent +hotlist +keep +copy +access +browser +platform +combinations +actually +bookmarks +file +omniweb +elegant +functional +browser +netscape +opinion +omniwebis +currently +available +nextstep +available +forall +openstep +variants +openstep +released +editorial +pages +responseto +jehovah +witnesses +deity +christwisconsin +sites +intervarsity +graduate +fellowship +university +wisconsin +madison +check +weatherin +city +madison +wisc +star +trek +page +programming +information +links +pages +isthmus +daily +pagesome +favorite +places +visit +nebulae +nasa +pictures +world +wide +study +bible +crosssearch +minor +glenn +gould +homepage +farside +daily +star +trek +star +trek +voyagerenter +dilbert +zoneroam +world +virtual +tourist +stereograms +tell +head +blow +true +next +head +showing +thinks +bill +gates +word +sponsor +last +modified +october +finton +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..a74fcb21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,412 @@ +home +page +charles +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspcharles +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspprofessor +computer +sciences +nbsp +nbsp +nbsp +nbsp +nbsp +nbspuniversity +wisconsin +computer +sciences +department +university +wisconsin +dayton +madison +telephone +messages +email +fischer +wisc +teaching +semester +teachingcs +graduate +course +compilers +spring +teachingcs +undergraduate +course +compilers +research +research +interests +focus +compiler +design +implementation +recently +interested +best +exploit +enormouscapabilities +provided +modern +computer +architectures +issues +students +investigated +include +code +scheduling +important +avoid +unnecessary +pipeline +delays +haveinvestigated +issues +optimally +scheduling +arithmetic +expressions +dominant +model +global +procedure +level +register +allocation +graph +coloring +best +model +maybe +models +explicitly +quantify +likelihood +benefit +registerresidence +attractive +careful +register +allocation +vital +generating +code +unnecessary +loadsand +stores +must +avoided +ultimate +register +allocation +interprocedural +register +allocation +theprocedures +program +analyzed +studiedinterprocedural +allocation +modelsthat +optimallyallocate +register +among +procedures +polynomial +time +approach +seems +effective +practice +anyone +codes +knows +easy +make +mistakes +errors +involving +pointers +array +indices +especially +common +studied +ways +toautomatically +check +pointer +array +operations +time +shared +memory +multiprocessor +workstations +using +processor +program +another +monitor +program +possible +routinely +check +programs +execute +little +orno +apparent +slowdown +recent +publications +minimum +cost +interprocedural +register +allocation +steven +kurlander +charles +fischer +popl +sigplan +sigact +symposium +principles +programminglanguages +january +cost +concurrent +checking +pointer +array +accesses +programs +harish +patil +charles +fischer +appear +insoftware +practice +experience +demand +driven +register +allocation +todd +proebsting +charles +fischer +appear +inacm +transactions +programming +languages +systems +efficient +instruction +scheduling +delayed +load +architectures +steven +kurlander +todd +proebsting +charles +fischer +transactions +programming +languages +systems +zero +cost +range +splitting +steven +kurlander +charles +fischer +sigplan +conference +programming +language +design +implementation +june +activitiesa +completely +revised +second +edition +crafting +compiler +authored +cytronand +richard +leblanc +almost +complete +published +benjamin +cummings +look +soon +better +bookstores +everywhere +short +communications +editor +transactions +programming +languages +systems +toplas +educationph +cornell +university +parsing +context +free +languages +parallel +environments +supervised +john +williams +studentsdonn +milton +syntactic +specification +analysis +attributed +grammars +july +bruce +rowland +semantic +attribute +evaluation +syntactic +analysis +july +stephen +skedzielewski +definition +attribute +reevaluation +attributed +grammars +september +bernard +dion +locally +least +cost +error +correctors +context +free +context +sensitiveparsers +december +mahadevan +ganapathi +retargetable +code +generation +optimization +using +attribute +grammars +november +vimal +begwami +approach +attribute +evaluation +error +correction +compilers +august +mauney +least +cost +syntactic +error +correction +using +extended +right +context +january +gregory +johnson +context +sensitive +attribute +flow +august +anil +generating +execution +facilities +integrated +programming +environments +december +william +winsborough +automatic +transparent +parallelization +logic +programs +compile +time +august +venkatesh +framework +specification +implementation +program +analysis +algorithms +august +todd +proebsting +code +generation +techniques +august +steve +kurlander +approaches +interprocedural +register +allocation +january +harish +patil +efficient +program +monitoring +techniques +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..5ef44ff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,547 @@ +galileo +home +page +galileo +projects +wisconsintable +contentsgalileoproject +descriptionpublicationsrelated +projectssci +wisconsinproject +descriptionpublicationsproject +membersgalileo +wisconsingalileo +project +conducted +computer +architecture +groupat +university +wisconsin +madison +project +focuses +medium +long +term +evolution +processor +system +architectures +emphasison +memory +system +specifically +studying +therelationship +processor +main +memory +futuresystems +completely +separate +today +integrated +extent +processing +capability +storage +merge +least +ways +increasing +chip +penalties +issuable +instructions +orlimited +chip +bandwidth +designers +place +capacityon +processor +chip +module +eventually +sizable +fractionof +main +memory +resides +chip +represented +arrow +labeled +mopin +diagram +different +possibility +migration +ofprocessor +capability +onto +dram +chips +eventuallyobviating +central +processor +iram +arrow +specific +research +currently +focuses +following +areas +examining +performance +impact +large +memory +latencies +andlimited +memory +bandwidth +current +future +microprocessor +based +systemsperformance +modeling +various +design +points +along +theprocessor +memory +integration +spectrumcache +hierarchy +design +integrated +systemsdesign +main +memory +banks +integrated +systemprogram +execution +systems +multiple +integrated +chips +datascalar +architectures +galileo +specific +publications +exploiting +optical +interconnects +eliminate +serial +bottlenecksdoug +burger +james +goodman +appears +international +conference +massively +parallel +processing +using +optical +interconnects +october +datascalar +architectures +spsd +execution +modeldoug +burger +stefanos +kaxiras +james +goodman +university +wisconsin +madison +computer +sciences +department +technical +report +july +quantifying +memory +bandwidth +limitations +current +future +microprocessorsdoug +burger +james +goodman +alain +appears +international +symposium +computer +architecture +declining +effectiveness +dynamic +caching +general +purpose +microprocessorsdouglas +burger +james +goodman +alain +university +wisconsin +madison +computer +sciences +department +technical +report +january +related +projects +iram +berkeleyppram +kyushu +univeristy +japansci +wisconsinour +group +also +closely +involved +research +relating +cache +coherentshared +memory +multiprocessor +design +specifically +studying +scalable +coherentinterface +coherence +transport +layers +using +ieee +standard +base +platform +exploration +ideas +standard +specifies +linked +list +based +hardware +coherence +protocol +includes +support +efficient +synchronization +primitives +queue +lock +qolb +aswell +optimizations +different +sharing +patterns +pairwise +sharing +fresh +read +sharing +standard +also +includes +definitionfor +extremely +high +bandwidth +latency +transport +layer +betweenprocessing +elements +individual +clusters +currently +performing +related +research +following +topics +extending +logarithmically +growing +sharing +structuresefficient +hardware +synchronization +shared +memory +multiprocessorsa +scalability +study +base +protocol +including +standard +extensionsaggressive +consistency +models +shared +memory +multiprocessorswisconsin +publications +mechanisms +minimizing +synchronization +overheads +shared +memory +applications +appears +best +architecture +paper +proceedings +international +conference +supercomputing +july +also +university +wisconsin +madison +computer +sciences +department +simulation +transport +layer +wisconsin +wind +tunneldouglas +burger +james +goodman +proceedings +second +international +workshop +based +high +performance +cost +computing +march +also +university +wisconsin +madison +computer +sciences +department +technical +report +march +hierarchical +extensions +scijames +goodman +stefanos +kaxiras +university +wisconsin +madison +computer +sciences +department +technical +report +july +extending +scalable +coherent +interface +large +scale +shared +memoryross +evan +johnson +university +wisconsin +madison +computer +sciences +department +technical +report +february +hardware +support +synchronization +scalable +coherent +interface +nagi +aboulenein +stein +gjessing +james +goodman +philip +woest +university +wisconsin +madison +computer +sciences +department +technical +report +november +interconnect +topologies +point +point +ringsross +johnson +james +goodman +university +wisconsin +madison +computer +sciences +department +technical +report +december +analysis +ringsteven +scott +james +goodman +mary +vernon +university +wisconsin +madison +computer +sciences +department +technical +report +november +lower +bounds +latency +scalable +linked +list +cache +coherenceross +johnson +university +wisconsin +madison +computer +sciences +department +technical +report +june +analysis +synchronization +mechanisms +shared +memory +multiprocessorsphilip +woest +james +goodman +university +wisconsin +madison +computer +sciences +department +technical +report +february +efficient +synchronization +primitives +large +scale +shared +memory +multiprocessorjames +goodman +mary +vernon +philip +woest +proceedings +third +international +conference +architectural +support +programming +languages +operating +systems +april +also +university +wisconsin +madison +computer +sciences +department +technical +report +project +participants +faculty +goodman +graduate +students +doug +burger +alain +stefanos +kaxiras +project +alumni +nagi +abouleneinross +johnsonsteve +scottlast +modified +doug +burger +dburger +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..35f773cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,38 @@ +andrew +geery +home +page +andrew +geery +geery +wisc +west +dayton +street +madison +regent +madison +madisonin +computer +science +currently +working +compsci +grades +schedule +pontifications +people +interest +jacques +derrida +post +structuralism +martin +heidegger +albert +camus +jean +paul +sartre +friedrich +nietzsche diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..a19e7ac0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,167 @@ +gideon +glass +homepage +continually +tweaked +homepage +thanks +stopping +collect +images +find +deposit +directory +samples +sample +picture +guys +claimed +cooler +according +toonion +seethis +movie +usual +suspects +walk +nearest +blockbuster +note +recently +moved +none +roommates +dutch +cheap +either +given +predicament +dismal +prospects +improvement +withno +outside +influence +please +consider +making +small +donationto +help +defray +cost +purchase +checks +money +orderscan +sent +following +address +please +send +cash +gideon +glass +monroe +floor +madison +usathank +support +grad +student +sometimes +find +reading +papers +eventhough +shelf +feet +unread +books +waiting +read +anyhow +looking +something +christian +achilles +huge +index +might +also +unified +cstechreports +index +class +project +reports +otherstuff +available +following +program +machine +load +averagewill +grow +fast +main +fork +doofus +actually +time +sharing +machine +back +calvin +great +time +killer +zippy +pinheadhas +reloading +several +times +justtry +last +fall +killed +time +tweaking +netscape +nothing +think +works +mozilla +higher +well +dabbled +object +oriented +programmingin +mostly +exercise +suppose +netscape +buttonhere +thing +right +suffice +case +told +somethingin +bookmarks +dennis +ritchie +creator +unix +wrote +anti +forward +unix +haters +handbook +send +mailand +maybe +maybe +please diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..007fcf2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,608 @@ +andy +krazy +glew +university +wisconsin +home +pageandy +krazy +glew +university +wisconsin +home +pagethis +andy +first +wisconsin +home +page +largelyformed +snippets +intel +home +page +stylishor +organized +summarycontact +touch +ship +things +addressescalendar +arrange +meetings +filesystem +access +filesystem +glew +public +html +http +wisc +glew +approach +anyone +read +files +unless +specifically +arrange +open +default +scribe +minutes +taker +sohi +research +group +weekly +meetings +research +interests +rather +generic +form +adapted +application +dynamic +versus +static +computer +architecture +wannabe +author +computer +architecture +high +level +editting +thoughts +higher +education +suggestions +effectively +work +patents +claims +fame +miscellaneous +info +stuff +self +image +configuration +management +resume +favorite +quotes +sayings +self +image +beef +montreal +people +cares +pope +priest +parson +king +william +boyne +wants +coin +trusts +summer +time +keep +harm +sees +first +frost +snow +poor +trusts +almighty +dollar +good +maple +wood +keep +bellow +warm +churches +chapels +ranters +preachers +beechers +stuff +montreal +already +enough +beef +keeper +montreal +harbour +deplored +churchmen +notorious +atheist +less +well +known +charity +stranded +sailors +knew +could +always +beef +exchange +chopping +chords +firewood +meal +warm +place +sleep +printed +manifesto +handbills +advertisements +computer +architect +hackeralthough +aspiring +computer +architect +something +formerlyhad +fake +motorola +business +card +ever +since +started +working +computers +first +thing +learning +assembly +redesign +chip +started +unix +kernel +hacker +gould +real +time +unix +andstill +think +wistfully +hacker +beard +frequently +wear +suspenders +balding +computer +architectureonce +future +involved +microarchitecture +intel +pentium +adoption +intel +resume +wannabe +author +computer +architecturei +constantly +verge +writing +book +entitled +computer +architecture +grabbag +tricks +techniques +sort +antidote +hennessy +patterson +cannot +afford +diskspace +internet +service +provideror +charges +always +connected +system +suggestions +appreciated +interesting +pieces +computer +architectureone +best +ways +learn +computer +architecture +read +textbooks +datasheets +instruction +references +miscellaneous +info +wanderings +netscapebookmarksstockscoding +standardsroy +wilkinson +coding +standardsi +disagree +many +configuration +management +although +perhaps +quit +first +real +quickly +defunct +startup +company +called +enfoprise +building +business +workstations +first +changed +assignment +unix +driver +writingto +systems +integration +longstanding +love +hate +relationship +configurationmanagement +tools +like +sccs +first +published +paper +boxes +links +parallel +trees +elements +configuration +management +system +first +usenix +workshop +software +management +described +centralized +database +multiple +views +hardlink +cloning +save +space +time +used +gould +computer +systems +division +unix +team +brian +berliner +deprecates +approach +papers +mainly +advocates +optimistic +concurrency +control +approach +whereas +thought +advocated +locking +actually +advocate +optimistic +concurrency +control +also +advocate +locking +case +optimistic +version +gets +livelock +usually +insist +single +identified +serial +schedule +source +code +checkinsso +testing +proceed +linear +manner +require +programmers +test +code +works +system +previous +fixes +applied +although +recognize +even +requirement +relaxed +often +stripped +version +approachin +things +like +apologize +never +created +truly +portable +tools +accomplish +uses +similar +approach +although +mike +fetterman +mark +aitken +deserve +credit +enhanced +several +features +went +notably +version +number +became +overall +sufficiently +everything +including +cshrc +login +even +page +calendar +wisconsinhow +arrange +meetings +since +seems +ubiquitous +calendaring +scheduling +programat +university +wisconsin +department +various +people +cmtool +public +domain +ical +plan +critical +mass +anyof +calendaring +isolation +using +pilot +associated +software +least +means +meetings +gets +pilot +meeting +committed +least +committed +possible +also +record +meetings +using +voice +organizer +therefore +arrange +meeting +must +touch +preferably +email +possibly +phone +person +manuallyadd +meeting +calendar +created +download +calendar +pilot +softwareto +page +created +download +calendar +pilot +softwareto +microsoft +watch +intelat +intel +devout +user +group +scheduling +program +last +time +intel +synchronize +unix +also +ontime +past +weak +disconnected +operation +email +operation +least +allowed +majority +people +schedule +meetingswith +without +manual +intervention +intel +scheduling +algorithm +access +andy +calendar +using +synchronize +also +tell +andy +meeting +email +person +reservethe +right +meetings +blindly +invited +cannot +synchronize +urgent +make +meeting +andy +admin +teresa +locke +synchronize +possible +check +andy +calendar +page +proposeif +meeting +urgent +weeks +future +avoid +bothering +teresa +sending +andy +email +realize +andy +miss +meeting +sent +email +enough +advance +overallscheduling +calendaring +like +topics +something +fascinated +bring +efficiency +advantages +personal +secretariesand +aides +camp +computer +users +header +wisc +glew +public +html +glew +html +glew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..1aa3e80c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,28 @@ +goodman +home +page +james +goodman +goodman +wisc +professor +computer +sciencesdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaresearch +interests +lots +good +stuff +current +projects +galileo +wisconsin +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..d6ad18e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,540 @@ +greg +sharp +home +pagegreg +sharp +home +pagename +greg +sharpemail +greg +wisc +eduoffice +office +phone +office +hours +appt +tgif +every +friday +dept +section +instructors +fall +lecture +notes +spring +lecture +notesclasses +fall +topics +database +management +systems +finding +aboutsearch +engines +altavista +dejanews +excite +infoseek +lycos +metacrawler +yahoo +usenet +mirrors +html +format +ohioc +programming +learn +today +library +draft +standard +april +mirrors +stanford +cygnus +info +reference +mumit +newbie +guideplatform +independant +libraries +portable +development +kits +amulet +dclap +strings +attached +requires +motif +suit +strings +attached +wxwindows +yacl +classes +spring +introduction +computational +geometry +computer +architecture +computer +architecture +project +machine +learning +machine +learning +projectclasses +fall +numerical +linear +algebra +introduction +theoretical +computer +science +computer +vision +images +homework +computer +vision +projectmisc +freeware +shareware +info +cygwin +software +internet +directory +gimp +harmonay +harmony +unix +client +hyper +hyper +browser +image +databases +vasc +image +database +alternate +link +video +pics +vision +research +range +image +database +shape +shading +pics +range +image +database +shape +shading +pics +gives +links +image +databases +image +database +pretty +cool +idea +specify +section +night +image +japan +image +database +jaidas +years +worth +atmospheric +data +images +multiresolution +seamless +image +database +cool +click +zoom +resolution +views +solar +system +nice +pics +moon +planets +comets +meteors +wyoming +image +database +version +pics +wyoming +also +lots +misc +stuff +like +pics +planets +overhead +shots +antarctica +satelite +image +catalogue +niaes +satelite +pics +japan +gothic +image +database +electric +postcard +card +rack +nice +selection +thanks +link +todd +vistex +texture +databaseartificially +generated +images +primoridial +soup +kitchen +math +department +awesome +site +medical +image +databases +line +images +history +medicine +documentation +pictures +diagonal +lines +dermatologic +online +image +atlas +based +erlanger +image +database +germany +mirror +japan +pretty +cool +idea +enter +diagnosis +back +image +orthopaedic +image +database +nice +database +ecvnet +image +data +bases +list +links +optical +character +recognition +handwriting +recognition +home +page +nici +handwriting +recognition +groupimage +processing +home +page +image +processing +home +page +washington +state +university +image +libraries +softwared +computer +graphics +software +raytracer +home +page +rayshade +utah +raster +toolkit +radiance +tracer +radiosity +package +avalon +archive +objects +stuff +tracer +mirror +grimstead +massive +list +tracing +dsite +reference +sited +computer +graphics +hardware +graphic +board +intergraph +lockheed +glint +chipset +nvidia +chipsetcomputational +geometery +geometry +center +application +challenges +computational +geometrylispuseless +pagescomputer +architecture +computer +architecture +home +page +hennessy +patterson +resources +text +superdlx +parallel +computing +simulators +parl +mexico +state +univ +includes +database +traces +architecture +links +univ +washingtion +architecture +group +index +simulators +georgia +tech +architecture +groupjapanese +computing +guide +japanese +computing +unversity +washington +monash +archive +index +japanese +computing +stuff +infowave +edict +windows +english +japanese +japanese +english +dictionary +shodouka +asiasoftinformation +retreval +peregrinator +robot +traverses +indexes +written +perl +trec +text +retrieval +home +page +infomine +managing +gigabytes +freeware +search +engine +text +images +textual +images +providing +info +internet +experiences +thomas +thomas +feedback +linguistic +utilities +repository +information +retrieval +tools +survey +natural +language +processing +information +retrieval +nist +computers +language +index +software +other_sw +info_retrieval +world +wide +robots +wanderers +spiders +jedi +project +might +strictly +related +hartlib +papers +project +latin +stemmer +information +retrieval +multimedia +retrieval +group +academic +group +information +retrieval +systems +related +course +information +storage +retrieval +related +course +includes +history +information +retrieval +related +course +news +pointcast +check +customized +portfolio +automatic +update +tools +literature +mark +twainhumor +apologize +citizens +offensive +links +site +threw +links +garbage +belong +investing +investorweb +networth +fundscape +brill +editorial +services +stockmastermutual +funds +brokerage +houses +fidelity +investments +vanguard +rowe +price +jack +white +company +schwab +charles +schwab +gabelli +funds +mutualsmisc +pages +psnuplast +modified +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..24c85852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar +gopalsridhar +gopalgsri +wisc +edubone +marrow +pageresumestate +wisconsin +pagecalvin +hobbesbookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..564763e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,82 @@ +guangshun +home +page +welcome +guangshun +home +page +graduate +student +department +computer +science +dayton +university +wisconsin +madison +madison +phone +home +office +education +university +wisconsin +madison +california +state +university +angeles +peking +university +physics +department +grade +research +interests +database +management +systems +advisers +raghu +ramakrishnan +miron +livny +projects +data +analysis +project +family +medicine +devise +data +exploration +visualization +environment +classes +interesting +links +stuff +computer +related +career +planning +chinese +related +miscellany +send +email +send +email +around +weather +forecast +madison +visitor +since +june +visitor +number +since +visitor +number +since diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..8381c36c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,53 @@ +guhan +viswanathan +home +page +guhan +viswanathan +gviswana +wisc +graduate +studentdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaadvisor +larus +thesis +research +thesis +focuses +design +implementation +data +parallellanguages +involved +design +data +parallellanguage +based +developed +locally +implemented +compiler +targeting +investigating +data +parallelapplications +executed +efficiently +hand +coded +parallelprograms +amore +detailed +research +summary +list +publications +useful +links diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..6b021ba1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,97 @@ +harit +home +page +harit +graduate +student +university +wisconsin +madison +department +computer +sciences +would +like +list +classmates +courses +taking +fall +database +management +systems +prof +raghu +ramakrishnan +advanced +computer +architecture +prof +mark +hill +courses +taking +spring +advanced +computer +architecture +prof +james +goodman +undergraduate +student +world +famous +mvsr +engineering +college +osmania +university +hyderabad +india +meet +draws +line +things +interest +indian +newspapers +stuff +sports +sastry +links +roommate +home +page +saeed +mirza +murthy +links +zubber +dust +photos +photographs +university +warning +click +years +folks +page +accessed +times +since +sept +counter +courtesy +counter +page +accessed +times +since +sept +electronic +mailing +mail +address +harit +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..ac5e0512 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,55 @@ +rebecca +hasti +home +page +rebecca +hasti +graduate +student +research +assistantcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +office +mail +hasti +wisc +edutelephone +telephone +dept +first +java +applet +click +fall +schedule +engr +java +noland +seminar +computer +sciences +university +wisconsin +madison +mathematics +university +wisconsin +madison +mathematics +carleton +college +interests +programming +languages +basketball +volleyball +softball +linkage +last +updated +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..2a5b2f9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,140 @@ +home +page +chad +lane +academics +personal +info +neat +stuff +dept +computer +sciences +university +wisconsin +madison +west +dayton +madison +dept +phone +mail +office +address +office +hours +office +phone +home +phone +wisc +welcomethanks +stopping +page +hope +enjoy +bestbet +links +stuff +section +biggest +news +life +right +getting +married +onmay +nichole +finally +want +tell +good +luck +counting +academics +fall +courses +information +retrieval +technologies +seeking +information +database +management +systems +ling +audit +advanced +semantics +research +interests +computational +linguistics +discourse +processing +useful +advice +research +barwise +epigrams +programming +alan +perlis +education +mathematics +computer +science +minor +philosophy +laude +truman +state +university +formerly +northeast +missouri +state +university +computer +sciences +expected +university +wisconsin +madison +personal +information +stand +neat +stuff +according +truly +click +image +cyber +poop +creation +unabashed +brother +bart +arthur +lane +download +claude +claude +psychotic +program +talk +raise +plant +internet +deep +thoughts +jack +handy +reload +different +ones +last +modified +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..019ad25b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,196 @@ +susan +hert +home +page +susan +hert +hert +wisc +research +assistant +department +computer +sciences +university +wisconsin +madison +dayton +madison +telephone +curriculum +vita +postscript +research +interests +publications +software +interesting +links +research +interests +applied +experimental +computational +geometry +analysis +algorithms +design +motion +planning +algorithms +computer +graphics +geometric +algorithms +advisor +vladimir +lumelsky +currently +work +robotics +developing +motion +planning +alogirthms +multiple +robots +common +environment +selected +publications +susan +hert +vladimir +lumelsky +deforming +curves +plane +tethered +robot +motion +planning +extended +abstract +paper +appeared +proceedings +canadian +conference +computational +geometry +august +susan +hert +vladimir +lumelsky +planar +curve +routing +tethered +robot +motion +planning +appear +international +journal +computational +geometry +applications +susan +hert +vladimir +lumelsky +ties +bind +motion +planning +multiple +tethered +robots +robotics +autonomous +systems +version +paper +published +proc +ieee +international +conference +robotics +automation +susan +hert +sanjay +tiwari +vladimir +lumelsky +terrain +covering +algorithm +appear +journal +autonomous +robots +special +issue +autonomous +underwater +robots +susan +hert +vladimir +lumelsky +moving +multiple +tethered +robots +arbitrary +configurations +proc +international +conference +intelligent +robots +systems +august +susan +hert +reznik +simulation +library +basis +animation +programs +version +technical +report +university +wisconsin +madison +robotics +laboratory +july +interesting +links +computational +geometry +pages +computer +science +education +links +books +reference +shelf +library +congress +line +books +page +travels +samantha +cooking +epicurious +veggies +unite diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..56587736 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,64 @@ +kirk +hogenson +kirk +hogenson +graduate +studentcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +office +mail +wisc +edutelephone +kirk +hogenson +graduate +student +madison +madison +wisconsin +department +sections +office +hours +tues +also +look +myschedule +none +office +hours +workout +mail +tryto +appointment +time +finger +send +mail +visit +ghana +country +serving +peace +corps +username +check +pnhp +student +group +page +maintained +wife +eiluned +experimenting +counter +says +accessedtimes +since +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..7509efde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,15 @@ +jeffrey +horn +jeffrey +horn +swanton +road +madison +wisconsin +phone +email +horn +wisc +wise +linearize +familyemploymenteducationresearchgenealogy diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..dcb4165f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,244 @@ +susan +horwitzsusan +horwitzprofessorcomputer +sciences +departmentuniversity +wisconsin +madison +west +dayton +streetmadison +usae +mail +horwitz +wisc +telephone +secretary +department +cornell +university +research +interests +language +based +programming +environmentsprogram +slicing +differencing +mergingstatic +analysis +programsinterprocedural +dataflow +analysisresearch +summarymy +work +mainly +involves +design +implementationof +language +based +programming +tools +help +programmers +problems +like +understanding +existing +programs +work +would +affectedby +proposed +modifications +understanding +textual +structural +semantic +differences +betweentwo +versions +program +retesting +program +changing +combining +pieces +programs +produce +program +certainsemantic +guarantees +work +involved +program +representation +called +theprogram +dependence +graph +operation +called +slicing +also +working +algorithms +precise +interproceduraldataflow +analysis +previous +work +interprocedural +dataflow +analysis +mainly +concentratedeither +efficient +algorithms +specific +individual +problems +necessarily +efficient +algorithms +general +class +problems +thomas +reps +mooly +sagiv +developed +implemented +newalgorithm +efficient +applies +large +class +problems +recent +publicationsm +shapiro +horwitz +fast +accurate +flow +insensitive +points +analysis +appear +conference +record +twenty +fourth +symposium +onprinciples +programming +languages +paris +france +january +horwitz +reps +sagiv +demand +interprocedural +dataflow +analysis +proceedings +sigsoft +symposium +foundations +softwareengineering +washington +october +sagiv +reps +horwitz +precise +interprocedural +dataflow +analysis +applications +constantpropagation +proceedings +sixth +international +joint +conference +theoryand +practice +software +development +aarhus +denmark +reps +sagiv +horwitz +precise +interprocedural +dataflow +analysis +graph +reachability +conference +record +twenty +second +symposium +principlesof +programming +languages +francisco +january +bates +horwitz +incremental +program +testing +using +program +dependence +graphs +conference +record +twentieth +symposium +principlesof +programming +languages +charleston +january +horwitz +reps +program +dependence +graphs +software +engineering +proceedings +fourteenth +international +conferenceon +software +engineering +melbourne +australia +horwitz +identifying +semantic +textual +differences +versions +aprogram +proceedings +sigplan +conference +programming +languagedesign +implementation +white +plains +june +teaching diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..21d7dc96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid +pagename +sidney +hummertoffice +phone +office +email +hummert +wisc +edua +postscript +version +resume +pictures +click +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..9060068c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,54 @@ +igor +ivanisevic +home +pageigorivanisevic +working +newest +project +disclaimer +alien +speak +aliens +particular +needless +page +construction +ever +feel +like +actually +constructing +already +links +mail +research +interests +robotics +vision +stuff +generalgraduate +slave +university +wisconsin +departmentwas +undergrad +drake +university +math +departmentaddress +computer +sciences +departmentuniversity +wisconsin +west +dayton +streetmadison +office +phone +home +phone +mail +iigor +wisc +eduiigor +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..27ce8d3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..713ddb10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,47 @@ +sharenow +home +page +sharenow +wisc +teaching +assistant +peterson +computer +sciences +department +university +wisconsin +madisonmadison +office +computer +sciencestelephone +office +hours +sections +tuesdays +thursdays +pmsection +meets +section +meets +pmboth +classes +meet +room +computer +sciencescs +announcementshandoutsmother +jones +profile +sharenow +recreational +site +please +send +email +comments +last +modified +tuesday +september +sharenow diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..0a3e61a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,49 @@ +basneyjim +basneygraduate +student +research +assistantcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +email +jbasney +wisc +eduoffice +computer +science +statisticsoffice +phone +research +interests +area +operating +systems +andnetworks +currently +working +condor +directionof +prof +miron +livny +received +fromoberlin +college +computer +science +english +webpages +oberlin +resume +codefrom +previous +projects +available +online +last +modified +basney diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..d3dc8e3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,96 @@ +jerel +mackay +home +pagejerel +mackay +assistant +researcher +computer +sciences +specializing +databases +operating +systems +work +fulltime +computer +systems +university +wisconsin +madison +computer +sciences +department +responsibilities +include +developing +supporting +sybase +ingres +database +installations +backup +software +training +student +hourlies +play +electric +guitar +thrash +metal +specialty +also +violin +classical +baroque +mainly +seen +error +evil +ways +click +shocking +case +didn +believe +eyes +first +time +working +like +record +mostly +funny +covers +stuff +abba +metallica +also +originals +soon +able +sample +hits +watch +favorite +show +play +raquetball +golf +shoot +pool +stand +around +towel +yeah +know +much +working +finger +jerellast +modified +jerel +mackay +jerel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..14edf18d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home +page +johan +larson +homepage +construction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..89770824 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,173 @@ +home +page +relief +happiness +java +good +censorship +really +mail +jherro +wisc +notes +classes +made +home +page +relax +lets +would +cool +thing +home +page +apicture +picture +girlfriend +half +years +afraid +dated +though +almost +years +many +years +torture +disembodyment +became +floating +head +death +directory +animation +frames +series +pictures +jpeg +format +default +named +number +starting +frame +number +frame +milliseconds +pause +images +default +overriden +pauses +repeat +sequence +explicit +order +frames +stuff +vital +information +aquired +nickname +would +like +take +time +apologize +lameness +page +please +bear +pretty +pictures +jack +skellington +kermit +frog +interesting +something +else +neat +link +friend +homepage +writes +haiku +said +thathe +going +click +stuff +roomate +mine +grad +notre +dame +memories +forgotten +time +roomates +cult +hippothis +exploratory +intervention +chaotic +existence +reality +follow +link +enjoy +benefits +matriarchical +society +join +today +exclusive +club +hierarchy +rules +friends +homepages +semi +cool +links +notre +dame +home +page +yahooooooooooooo +work +link +contains +free +software +shack +bazillion +search +engines +search +engines +mpeg +movie +archive +really +cool +link +great +muppet +page +sounds +images +links +cool +rachel +want +select +cool +canoe +trip +pictures +look +bout +canoe +pictures diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..4cca6507 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,188 @@ +bing +home +page +bing +index +general +information +education +advisor +research +interests +research +projects +publications +pointers +hobbies +general +informationresearch +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +jiebing +wisc +edueducation +computer +science +university +wisconsin +madison +advisor +prof +david +dewittresearch +interests +tertiary +storage +support +dbms +parallel +database +management +systems +object +oriented +database +management +systems +geographic +information +systemsresearch +projects +paradise +shorepublicationsbuilding +scaleable +spatial +dbms +technology +implmentation +evaluation +patel +kabra +tufte +burger +hall +ramasamy +lueder +ellman +kupsch +dewitt +naughton +submitted +publication +october +query +execution +batching +paradise +pronged +approach +efficient +processing +queries +tape +resident +data +sets +dewitt +submitted +publication +october +processing +satellite +images +tertiary +storage +study +impact +tile +size +performance +dewitt +appear +nasa +goddard +conferenceon +mass +storage +systems +technologies +september +using +constraints +query +tree +goldstein +ramakrishnan +shaft +shorter +version +appear +workshop +constraints +databases +february +client +server +paradise +dewitt +kabra +patel +proceedings +large +data +bases +conference +santiago +chile +september +storage +reclamation +reorganization +client +serverpersistent +object +store +yong +naughton +proceedings +ieee +data +engineering +conference +houston +february +pointers +eosdis +sigmod +madison +dbms +research +grouphobbies +tennis +tennis +server +volleyball +volleyballweb +white +water +rafting +whitewater +page +find +picture +click +full +sized +picture +last +updated +july +bing +jiebing +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..bed4aa1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,74 @@ +jignesh +home +page +jignesh +patel +jignesh +wisc +welcome +research +assistant +department +computer +sciences +university +wisconsin +madison +west +dayton +street +madison +telephone +advisor +david +dewitt +research +interests +systems +parallel +database +systems +object +relational +databases +currently +working +paradise +project +publications +related +paradise +client +server +paradise +paper +published +vldb +partition +based +spatial +merge +join +published +sigmod +publications +accurate +modeling +hybrid +hash +join +algorithm +paper +published +sigmetrics +miscellaneous +stuff +virtual +tourist +inline +skating +home +page +madhuri +kashmir +bookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..81e3e40d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +george +varghese +people +download +netscape +page +clicking +warning +page +pretty +lame diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..47fba69a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,106 @@ +johannes +gehrke +homepagejohannes +gehrkewelcome +international +graduate +studentat +computer +sciencesdepartment +university +ofwisconsin +madison +area +interest +databasemanagement +systems +working +area +data +mining +underprofessor +raghuramakrishnan +page +construction +contact +information +publications +interesting +linkscontact +information +email +johannes +utexas +office +university +wisconsin +madison +computer +sciences +department +west +dayton +street +room +madison +wisconsin +home +eagle +heights +madison +publications +stoica +abdel +wahab +jeffay +baruah +gehrke +plaxton +proportional +share +resource +allocation +algorithmfor +real +time +time +shared +systems +proceedings +ieee +real +time +systems +symposium +washington +december +appear +anexpanded +version +baruah +gehrke +plaxton +fastscheduling +periodic +tasks +multiple +resources +inproceedings +ieee +international +parallel +processingsymposium +april +expanded +version +available +technicalreport +department +computer +sciences +universityof +texas +austin +february +johannes +gehrke diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..d8c2fc56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,84 @@ +home +pagewelcome +home +page +first +year +graduate +student +university +wisconsin +madison +studying +computer +science +using +computers +also +maintaining +frequently +asked +questions +list +latest +powerbook +models +released +apple +things +look +students +sections +click +herei +amassed +list +good +sites +number +catagories +check +pages +madison +department +madison +alma +mater +sites +visit +often +apple +home +page +needsthe +nando +times +great +news +coveragethe +spot +mind +numbing +soap +operaish +drivelziffnet +computer +industry +newscs +database +management +systems +construction +compilers +keeping +classworktoday +dilbert +chucklejon +bodner +jonb +wisc +mound +madison +last +modified +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..f0780585 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,107 @@ +welcome +home +page +first +year +graduate +student +department +came +frombeijing +chinese +academy +sciences +china +hometown +nanjing +capitol +jiangsu +province +degree +student +department +university +wisconsin +madison +institute +automation +chinese +academy +sciences +beijing +china +specilized +pattern +recognition +image +processing +biomedical +engineering +department +southeast +university +nanjing +chinacurrent +activities +courses +advanced +computer +networks +topics +database +management +advanced +operating +systems +teaching +assistant +data +structure +current +address +home +spring +madison +work +computer +sciences +department +west +dayton +street +madison +tele +office +home +could +finger +wisc +refer +computer +science +department +information +class +technical +stuffjava +placeshore +tutorialchina +affairchina +democracybeijing +spring +places +interest +stanford +network +groupstanford +medical +informaticsmit +communication +control +signal +processingjob +sites +newsyou +visitor +number +since diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..6663df26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,111 @@ +jeff +shabel +home +pagecheers +welcome +jeff +shabel +home +page +wisconsincheers +theme +song +office +hours +tues +thurs +appointment +office +personal +informationmajor +computer +science +architecture +emphasis +status +second +year +graduate +student +view +fall +schedule +academic +background +received +computer +engineering +diego +electrical +computer +engineering +department +home +town +cupertino +jose +high +school +monta +vista +high +school +cupertino +plans +graduate +computer +science +favorite +sports +teams +golden +state +warriors +basketball +jose +mercury +news +andnando +jose +sharks +hockey +jose +mercury +news +andnando +francisco +football +jose +mercury +news +andnando +oakland +favorite +links +news +jose +mercury +newsmusic +columbia +house +find +join +columbia +house +deal +also +tips +info +join +music +clubs +miscellaneous +view +print +postscript +documents +windows +send +mail +jshabel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..99fe3d24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,311 @@ +jussi +myllymakijussi +myllymaki +research +assistant +computer +sciences +department +university +wisconsin +west +dayton +street +madison +telephone +email +jussi +wisc +eduresearch +summaryi +interested +performance +analysis +dbms +operations +onadvanced +tape +disk +technology +including +disk +tape +arrays +mcurrently +studying +buffer +large +datasets +tertiary +storageto +disk +memory +data +used +data +exploration +andvisualization +deviseproject +advisor +prof +mironlivny +recent +work +includes +improving +performance +relational +joinsof +large +volumes +disk +tape +resident +data +publication +listbelow +applying +structured +organization +tertiary +storageto +solve +problems +associated +diverse +characteristics +andfunctional +limitations +tertiary +media +recent +paper +datavisualization +exploration +discusses +data +metadata +managementissues +large +complex +data +sets +involved +refereed +publicationsefficient +buffering +concurrent +disk +andtape +miron +livny +proceedings +performance +international +conference +performance +theory +measurement +evaluation +computer +communication +systems +october +integrated +visualization +parallel +programperformance +data +karen +karavanic +miron +livny +bartonp +miller +proceedings +third +workshop +environments +andtools +parallel +scientific +computing +august +structured +organization +tertiarystorage +daniel +ford +proceedings +theinternational +conference +data +engineering +february +alsoavailable +almaden +research +report +visual +exploration +large +data +sets +withmiron +livny +raghu +ramakrishnan +proceedings +spie +theinternational +society +optical +engineering +january +disk +tape +joins +synchronizing +disk +andtape +access +miron +livny +proceedings +acmsigmetrics +conference +submitted +publicationdevise +integrated +querying +visualization +large +datasets +livny +ramakrishnan +beyer +chen +donjerkovic +lawande +myllymaki +wenger +submitted +sigmod +conference +relational +joins +data +tertiary +storage +jussi +myllymaki +andmiron +livny +submitted +international +conference +dataengineering +publicationsdisk +tape +joins +synchronizing +disk +tapeaccess +miron +livny +university +wisconsin +department +technical +report +joins +tapes +project +report +master +degreeproject +report +university +wisconsin +department +applying +client +server +model +computer +networkarchitectures +master +thesis +helsinki +university +technology +department +industrial +management +finnish +documentsimplementation +performance +analysis +treealgorithms +jeff +schwarz +yoav +weiss +class +report +experiences +implementing +structured +filesystem +trishul +chilimbi +yoav +weiss +class +report +overview +current +tape +technologies +productsoverview +raid +technology +suppliers +productssome +frequently +needed +links +unified +technical +report +search +adaptec +scsi +adapters +home +digital +alpha +workstationsandpcsandtechnical +journaland +whitepapers +home +technology +researchandcyberjournal +quantum +digital +linear +tapeanddlt +faqandwhitepapers +home +solarisandsparcstationsandtechnical +reports +home +scsi +faqandstorage +faqand +otherusenet +faqsmany +links +found +jussi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..7f935019 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepage +jyothithis +page +construction +info +students +course +grades +others +sorry +dissappoint +email +jyothi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..83039ff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,108 @@ +karen +karavaniceverything +need +know +learned +public +school +karen +karavanicresearch +assistant +paradyn +parallel +performance +tools +project +university +wisconsin +madison +computer +sciences +department +west +dayton +street +madison +karavan +wisc +currently +pursuing +computer +science +research +interests +include +parallel +computing +environments +automating +performance +tuning +process +operating +systems +databasesask +madison +women +computer +sciences +wics +frontiers +science +cool +program +dane +county +high +school +studentstrio +student +support +services +free +tutoring +support +madison +undergraduates +miss +sites +page +could +save +life +safer +pagefor +chocolate +lovers +onlystuyvesant +high +school +alumni +associationstuyvesant +high +school +class +thomas +legislative +information +internetthe +constitution +cure +anything +salt +water +sweat +tears +isak +dinesen +ship +port +safe +ships +sail +things +admiral +grace +hopper +computer +pioneer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..173be322 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,178 @@ +stefanos +kaxiras +home +page +stefanos +kaxiras +kaxiras +wisc +editor +ieee +kiloprocessor +extensions +sciresearch +assistant +university +wisconsin +research +interests +summary +recent +publicationsresearch +interests +shared +memory +multiprocessing +scalable +coherent +interface +cache +design +aspects +parallelism +processor +memory +memory +processor +architectures +galileo +research +summary +introduced +glow +kiloprocessor +extensions +hierarchical +extensions +collaborationwith +goodman +working +examine +depth +design +options +develop +upcoming +standard +incolaboration +goodman +david +james +stein +gjessing +recent +publications +glow +cache +coherence +protocol +extensions +widely +shared +data +stefanos +kaxiras +james +goodmanto +appear +proceedings +international +conference +supercomputing +also +technical +report +kiloprocessor +extensions +stefanos +kaxirasto +appear +proceedings +international +parallel +processing +symposium +april +implementation +performance +glow +kiloprocessor +extensions +wisconsin +wind +tunnel +stefanos +kaxiras +james +goodmannd +international +workshop +based +high +performance +cost +computing +march +hierarchical +extensions +cache +coherence +stefanos +kaxiras +james +goodmanst +international +workshop +based +high +performance +cost +computing +august +hierarchical +extensions +james +goodman +stefanos +kaxirasuniversity +wisconsin +computer +sciences +dept +july +software +tool +simulating +prototyping +monitoring +multiprocessor +systems +stafylopatis +papakonstantinou +kaxirasinformation +software +technology +automated +synthesis +parallel +dedicated +architectures +using +prolog +specifications +tsanakas +papakonstantinou +pekmestzi +kaxirasp +greece +hardware +synthesis +methodology +using +prolog +tsanakas +papakonstantinou +kaxirasmicroprocessing +microprogramming +north +holland diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..8bc2c13c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,323 @@ +keeper +home +page +steven +foughtthey +heaven +like +perfect +little +world +doesn +really +need +everything +made +light +laurie +anderson +strange +angels +note +possible +pages +make +structural +markup +indicating +piece +information +rather +look +made +information +pages +accessible +people +bitmapped +displays +including +theblind +using +standard +markup +tags +allowing +whateverbrowser +wish +also +note +pages +make +literary +conventions +irony +satire +sarcasm +butnothing +contained +herein +meant +offensive +areoffended +probably +stupid +started +third +year +graduate +student +computer +sciences +department +firsttwo +weeks +since +supported +taking +care +variety +machines +varying +degrees +success +graduate +student +side +never +published +ever +done +anything +impressive +miracle +pass +prelim +research +addup +hill +beans +likely +fizzle +results +areobtained +probably +leave +junior +level +programmer +creatingkiosk +front +ends +visual +primitive +systems +leave +permanently +skill +free +surprise +year +discovered +especially +surprising +wasn +convinced +alive +thosewho +know +well +would +probably +argue +proof +mybeing +alive +anyway +might +want +read +thoughtson +hedgehog +contacting +electronic +mail +pager +reliably +specific +location +often +work +home +ifyou +would +like +person +write +something +address +keeper +wisc +becomes +clear +wantto +contact +short +notice +give +pager +number +various +creations +thoughts +hedgehog +todo +list +updated +sporadically +list +associates +rsum +postscript +documentation +html +section +long +outof +date +unfortunately +danenet +dilhr +jobnet +database +soon +defunct +caltech +projects +caltech +institute +archives +photonet +database +caltech +personnel +directory +looks +much +better +inside +caltech +caltech +database +inside +caltech +anyone +enter +pages +databaseusing +forms +interface +slides +slides +talk +gave +slides +talk +gave +distributed +objects +slides +talk +gave +called +java +dangers +love +coming +hell +freezes +rate +information +page +break +hierarchy +consulting +information +somewhere +steven +fought +unified +attribution +index +sfuai +informationage +intellectual +property +everything +index +allow +assigna +unique +serial +number +references +quote +atthe +page +provide +quote +source +contextual +information +pointers +relevant +quotes +index +buys +adobe +distiller +translate +rsuminto +world +users +chance +ofreading +suppose +print +pinch +want +writing +using +computers +certaintruths +psychology +using +computer +software +people +eventuallypick +aren +ever +taught +explicitly +think +possibleto +would +make +starting +computers +easier +cheap +shots +things +hate +project +ideas +mulling +probably +accessibleto +small +subset +users +tough +world diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..bbf5654c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +krishna +kunchithapadamkrishna +kunchithapadamgreetings +welcome +page +interests +reading +languages +indian +classical +music +miscellaneouspublications +data +distributions +performance +steering +performance +toolsresume +gzipped +postscript +contacting +search +last +modified +bykk diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..77f31940 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,42 @@ +kristin +home +pagekristin +tufte +research +assistant +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +tufte +wisc +eduadvisor +david +dewitt +miscellany +information +serveruw +madison +dbms +research +groupacm +sigmod +information +server +home +pageeos +project +science +officelast +modified +kristin +tuftekristin +tufte +tufte +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..4ce140e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,61 @@ +krung +homepageupdated +november +krung +homepage +underconstructioni +keep +page +short +informative +good +serf +year +comethe +following +related +topic +research +mathematical +programmings +project +pursuing +course +works +course +works +computer +science +department +computer +companies +favorite +hobby +personal +information +personal +opinion +life +madison +wisconsin +linkedthe +following +pages +important +links +university +madison +wisconsin +whole +computer +sciences +department +unique +entity +electronic +library +system +krung +sinapiromsaran +emailkrung +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..1a23d708 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,225 @@ +kunen +home +page +kenneth +kunen +professormath +computer +sciences +university +wisconsin +dayton +madison +mail +kunen +wisc +edutelephone +stanford +university +interests +automated +deduction +logic +programing +theory +topology +research +summary +research +work +involves +logic +applications +typical +applications +automated +deduction +logic +programming +automated +deduction +tools +like +resolutionto +prove +mathematical +theorems +logic +programming +study +semantics +languages +likeprolog +specific +topics +considering +prologuse +negation +failure +semantic +incompatibilities +betweenleast +fixed +point +computations +prolog +style +backtracking +computation +mathematical +logic +work +axiomatic +theory +besides +interest +right +thissubject +relates +various +abstract +areas +mathematics +theoretic +topology +measure +theory +many +basic +questions +turn +independent +usualaxioms +theory +selected +recent +publications +following +postscript +files +kunen +shortest +single +axioms +groups +exponent +computers +mathematics +applications +hart +kunen +single +axioms +exponent +groups +automated +reasoning +kunen +ramsey +theorem +boyer +moore +logic +appear +automated +reasoning +kunen +mill +measures +corson +compact +spaces +fundamenta +mathematica +hart +kunen +locally +constant +functions +fundamenta +mathematica +kunen +semantics +answer +literals +technical +report +university +wisconsin +appear +automated +reasoning +kunen +constructive +computational +mathematics +technical +report +university +wisconsin +appear +automated +reasoning +kunen +moufang +quasigroups +algebra +kunen +quasigroups +loops +associative +laws +preprint +appear +algebra +kunen +structure +conjugacy +closed +loops +preprint +kunen +completeness +result +linked +resolution +appear +press +hart +kunen +weak +measure +extension +axioms +rough +draft +book +review +hart +kunen +review +notes +theory +moschovakis +american +mathematical +monthly +courses +taught +fall +math +geometrical +inference +reasoning +math +foundations +mathematics +spring +comp +artificial +intelligence +last +changed +october +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..ec85d47f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,520 @@ +james +larus +home +page +james +larus +larus +wisc +associate +professor +computer +sciencedepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usalarus +wisc +eduphone +secretary +julie +fingerson +thea +sklenar +departmental +office +education +research +interests +research +projects +upcomming +courses +software +recent +papers +graduates +summary +education +university +california +berkeley +university +california +berkeley +harvard +college +research +interests +programming +languagesand +compilers +particular +languages +compilers +parallel +machines +design +programming +shared +memory +parallel +computers +compiling +symbolic +languages +program +profiling +tracing +program +executable +editing +research +projects +wisconsin +wind +tunnel +large +grain +data +parallel +programming +language +executable +editing +library +courses +data +structurescs +java +software +spim +wartsrecent +papersefficient +path +profiling +thomas +ball +james +larus +appear +micro +december +parallel +programming +large +grain +data +parallel +programminglanguage +james +larus +brad +richards +guhan +viswanathan +gregory +wilson +parallel +programming +using +press +teapot +language +support +writing +memory +coherence +protocols +satish +chandra +brad +richards +james +larus +sigplan +programming +language +design +implementation +pldi +instruction +scheduling +executable +editing +eric +schnarr +andjames +larus +appear +workshop +compiler +support +system +software +wcsss +february +efficient +support +irregular +applications +distributed +memory +machines +shubhendu +mukherjee +shamik +sharma +mark +hill +james +larus +annerogers +joel +saltz +fifth +sigplan +symposium +principles +practiceof +parallel +programming +ppopp +july +machine +independent +executable +editing +james +larus +eric +schnarr +sigplan +conferences +programming +languagesdesign +implementation +pldi +june +tempest +substrate +portable +parallel +programs +mark +hill +james +larus +david +wood +compcon +spring +march +static +branch +frequency +program +profile +analysis +youfeng +james +larus +annual +ieee +international +symposium +microarchitecture +micro +november +application +specific +protocols +user +level +shared +memory +babak +falsafi +alvin +lebeck +steven +reinhardt +ioannis +schoinas +markhill +james +larus +anne +rogers +david +wood +supercomputing +november +time +spent +message +passing +shared +memory +programs +satish +chandra +james +larus +anne +rogers +sixth +international +conference +architectural +support +forprogramming +languages +operating +systems +asplos +october +system +support +language +implementation +james +larus +brad +richards +guhan +viswanathan +sixth +international +conference +architectural +support +forprogramming +languages +operating +systems +asplos +october +fine +grain +access +control +distributed +shared +memory +ioannis +schoinas +babak +falsafi +alvin +lebeck +steven +reinhardt +jameslarus +david +wood +sixth +international +conference +architectural +support +forprogramming +languages +operating +systems +asplos +october +cachier +tool +automatically +inserting +cico +annotations +trishul +chilimbi +james +larus +international +conference +parallel +programming +icpp +august +wisconsin +wind +tunnel +project +annotated +bibliography +mark +hill +james +larus +david +wood +unpublished +manuscript +revised +frequently +cooperative +shared +memory +software +hardware +scalable +multiprocessors +mark +hill +james +larus +steven +reinhardt +david +wood +transactions +computer +systems +tocs +november +wisconsin +architectural +research +tool +warts +mark +hill +james +larus +alvin +lebeck +madhusudhan +talluri +david +wood +computer +architecture +news +august +graduatesbrad +richards +august +memory +systems +parallel +programming +first +employment +vassar +college +guhan +viswanathan +september +techniques +compiling +data +parallel +languagesfirst +employment +oracle +lorenz +huelsbergen +august +dynamic +language +parallelization +first +employment +bell +labs +lorenz +research +thomas +ball +august +control +flow +control +dependence +software +tools +first +employment +bell +labs +tball +research +research +summarymy +research +focuses +problems +programming +computers +part +thewisconsin +wind +tunnel +project +havehelped +develop +hybrid +software +hardware +shared +memory +computerarchitecture +facilitates +programming +compiling +parallelmachines +currently +students +developing +languages +compilers +tools +demonstrate +exploit +power +user +level +coherencepolicies +also +interested +performance +evaluation +tools +help +programmersunderstand +improve +programs +performance +recently +ball +andi +developed +efficient +path +profiling +algorithm +provides +moredetailed +understanding +control +flow +within +routines +hasidentified +possibilities +better +compilers +last +modified +james +larus +larus +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..a491b6c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick +pagenick +pageoffice +phone +email +leavy +wisc +eduoffice +hours +tuesday +wednessday diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..139f79b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,107 @@ +steven +huss +lederman +home +page +steven +huss +lederman +home +page +research +interests +include +research +univ +wisconsin +madison +relates +thewisconsin +wind +tunnel +project +research +area +parallel +linear +algebra +iscovered +prismproject +also +heavily +involed +mpistandard +several +others +recently +published +book +original +mpistandard +information +ordering +book +press +isbn +also +look +complete +reference +editor +current +draft +forum +please +keep +mind +work +forum +ongoing +andits +documents +intended +interested +ongoingwork +forum +committee +members +compressed +postscript +complete +sources +compressedtar +file +individual +source +files +available +information +would +finger +steven +huss +lederman +computer +science +dept +univ +wisconsin +madison +dayton +madison +phone +message +desperate +mail +lederman +wisc +http +wisc +lederman +lederman +html +office +computer +science +statistics +building diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..ed5777b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,118 @@ +home +page +lawrence +landweber +lawrence +landweber +professor +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +wisc +purdue +university +interests +computer +networks +protocols +high +speed +networks +electronicmail +research +summary +research +program +focuses +high +speed +networks +participatingin +gigabit +project +darpa +national +project +involvesthe +design +implementation +network +testbeds +operating +atgigabit +second +data +rates +wisconsin +working +onissues +protocol +design +congestion +admission +control +visualizationof +atmospheric +phenomena +virtual +conferencing +sample +recent +publications +design +implementation +fast +virtual +circuit +establishmentmethod +networks +olsen +proceedings +theieee +infocom +conference +francisco +april +dynamic +time +windows +packet +admission +control +feedback +witht +faber +mukherjee +proceedings +sigcommconference +baltimore +august +dynamic +time +windows +generalized +virtual +clock +combined +closed +loop +open +loop +congestion +control +mukherjee +faber +proceedings +ieee +infocom +conference +florence +networking +coursesconnectivity +table diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..9e2c052b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,71 @@ +shannon +lloyd +home +page +shannon +lloyd +work +address +university +wisconsin +madison +computer +science +department +west +dayton +street +madison +lloyd +wisc +responsibilities +using +computers +lectures +sections +comp +office +hours +comp +wednesday +thursday +appointment +fall +courses +construction +compilers +computational +linguistics +various +links +women +computer +science +university +utah +department +chemistry +university +utah +department +computer +science +personal +engineering +career +services +computation +language +archive +computational +linguistics +natural +language +processing +artificial +intelligence +cognitive +linguistic +science +xsoft +lexdemo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..f2258fe7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,223 @@ +christopher +lukas +home +pagechristopher +lukasrelevant +information +office +phone +email +lukas +wisc +edui +apparently +coordinator +mspls +fall +workshipi +also +defeated +rivals +quest +become +semester +suniversity +wisconsin +programming +languages +seminar +czar +courses +going +take +fall +festival +java +taught +advisor +tufts +university +mail +list +click +sites +entertainment +save +tigers +number +free +prisonerthis +stock +quoteserver +maintain +fabulous +wealth +todd +amusing +page +check +friends +page +cool +things +also +pagebet +political +candidates +legally +iowa +electronic +market +identitycaptain +kirk +sings +lonely +guys +lonely +trouble +meeting +women +throughamazing +technology +longer +need +concern +withtrying +interact +real +women +virtual +girlfriend +tracy +teri +waiting +meet +wife +incredible +jumping +catthis +really +cool +fill +anyway +like +know +well +really +entertaining +please +fill +send +option +case +feel +something +currently +listed +name +mail +address +favorite +thing +feel +check +apply +killer +buttmunchextreme +dudemichael +nesmith +fanfoolmythical +figurewicked +good +basketball +playervalued +studentmental +defectivea +wkrp +cincinatti +tragic +like +figuregeek +tradesgonzo +admirernetscape +junkie +searches +child +pornpersonified +organic +condom +stretched +much +ready +blowflaming +testicle +outer +space +tastes +goodpoetry +guruhogwildthis +kicks +assman +manbig +dudeuh +ohprofessional +muff +diverregressing +higher +lifeformherald +alien +invasion +forcechris +knows +html +formsalienherpetophiletodd +turnidges +hatthe +mancreepy +laughing +headsmall +planetdr +companioneaster +bunnycyberweenietcl +hellbeast +option +checkboxes +simply +fill +click +submit +reload +page +maybe +figure +automatically +option +cool +interactive +checkbox +option +keywords +included +include +page +interesting +searches +superhighway +drugs +cosmos +irritating +gross +naked +scatology +pervert +offspring +food +etymology +phat +gnarly +bogus +wierd +cybermuffin +pictures +erotica +cheese diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..ba9de928 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,123 @@ +welcom +ling +zheng +home +page +department +computer +sciences +sheboygan +dayton +madison +madison +office +phone +mail +lzheng +wisc +shameless +self +promotion +resume +text +version +want +know +side +click +research +interest +research +assistant +paradyn +group +currently +hacking +paradyn +onto +hpux +port +boss +barton +miller +also +charge +home +page +wuhan +university +chinaand +want +take +look +girlfriend +picture +temporarily +computer +architecture +education +prese +dept +computer +science +university +winsconsin +dept +computer +science +university +iowa +dept +computer +science +wuhan +university +officemate +marcelo +goncalves +alumni +click +ignore +wuhan +university +alumni +home +page +china +home +page +place +surf +companies +universities +hereif +want +know +best +graduate +school +computer +science +sthe +infomation +could +take +look +america +best +graduate +schoolssend +ling +zheng +mail +suggestion +homepage +bother +thanks +last +updated +march +visitor +number +since +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..f43c7cc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,50 @@ +manuvir +home +pagemanuvir +dasnow +know +name +andwhat +look +like +hello +feelfree +look +around +need +information +somethingsend +email +like +passion +golf +anaction +photo +later +manuvirwhat +know +know +general +start +advisor +better +thisto +keep +money +coming +turn +theoriginal +america +team +course +league +play +days +sunday +round +golf +finally +consin +said +manuvir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..cfcc9c98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,913 @@ +mark +hill +home +pagemark +hill +markhill +wisc +associate +professor +computer +sciences +andelectrical +computer +engineeringat +university +wisconsintable +contents +addresses +office +hours +current +teaching +catalog +information +education +research +interests +andsummary +wisconsin +wind +tunnel +project +sampler +recent +papers +graduateslinks +useful +information +world +wide +computer +architecture +information +wisconsin +computer +architecture +group +wisconsin +architectural +research +tool +warts +stuff +like +oralpresentation +advice +including +david +patterson +show +give +talk +online +data +forcache +performance +spec +benchmark +suite +proof +hardware +wisconsin +sound +addresses +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usamarkhill +wisc +eduphone +secretary +julie +fingerson +thea +sklenar +departmental +office +office +hours +fall +monday +wednesday +appointment +markhill +wisc +educurrent +teachingfall +advanced +computer +architecture +ifall +topics +computing +java +language +implementation +icatalog +information +courses +teachcs +machine +organization +programmingcs +introduction +computer +architecturecs +advanced +computer +architecture +advanced +computer +architecture +iieducation +computer +science +university +california +berkeley +computer +science +university +california +berkeley +computer +engineering +university +michigan +research +interests +computer +architecture +parallel +computing +memory +systems +performance +evaluationresearch +summarymy +research +targets +memory +systems +shared +memory +multiprocessorsand +high +performance +uniprocessors +memory +system +design +important +largely +determines +computer +sustained +performance +mywork +emphasizes +quantitative +analysis +often +requiring +evaluationtechniques +system +level +hardware +performance +much +recent +work +part +wisconsin +windtunnel +projectwith +profs +larus +wood +manystudents +project +expects +future +massively +parallel +computerswill +built +workstation +like +nodes +programmed +high +levelparallel +languages +like +support +shared +address +space +inwhich +processes +uniformly +reference +data +research +seeks +todevelop +consensus +middle +level +interface +languagesand +compilers +system +software +hardware +recentlyproposed +tempest +interface +enables +programmers +compilers +program +libraries +implement +message +passing +transparent +shared +memory +hybrid +combinations +aredeveloping +tempest +implementations +thinking +machines +acluster +workstations +hypothetical +hardware +platforms +wisconsin +wind +tunnel +project +named +toolsto +cull +design +space +parallel +supercomputers +manner +similarto +aeronautical +engineers +conventional +wind +tunnels +designairplanes +recent +work +madhu +talluritargets +improving +translation +lookaside +buffer +page +table +performance +clustering +aligned +groups +base +pages +options +require +changes +hardware +complete +subblocked +tlbs +operating +system +clustered +page +tables +superpagesand +partial +subblocked +tlbs +asplosandsosppapers +sampler +recent +papersthe +wisconsin +wind +tunnel +project +annotated +bibliography +mark +hill +james +larus +david +wood +unpublished +manuscript +revised +frequently +parallel +computer +research +wisconsin +wind +tunnel +project +mark +hill +james +larus +david +wood +conference +experimental +research +computer +systems +june +bidirectional +technology +transfer +sabbaticals +industry +mark +hill +conference +experimental +research +computer +systems +june +coherent +network +interfaces +fine +grain +communication +shubhendu +mukherjee +babak +falsafi +mark +hill +anddavid +wood +international +symposium +computer +architecture +isca +optimistic +simulation +parallel +architectures +using +program +executables +sashikanth +chandrasekaran +mark +hill +workshop +parallel +distributed +simulation +pads +page +table +address +spaces +madhusudhan +talluri +mark +hill +yousef +khalidi +symposium +operating +systems +principals +sosp +december +presidential +young +investigator +award +final +report +mark +hill +july +efficient +support +irregular +applications +distributed +memory +machines +shubhendu +mukherjee +shamik +sharma +mark +hill +james +larus +anne +rogers +joel +saltz +ppopp +july +cost +effective +parallel +computing +david +wood +mark +hill +ieee +computer +february +solving +microstructure +electrostatics +proposed +parallel +computer +frank +traenkle +mark +hill +sangtae +computers +chemical +engineering +application +specific +protocols +user +level +shared +memory +babak +falsafi +alvin +lebeck +steven +reinhardt +ioannis +schoinas +mark +hill +james +larus +anne +rogers +david +wood +supercomputing +surpassing +performance +superpages +less +operating +system +support +madhusudhan +talluri +mark +hill +international +conference +architectural +support +forprogramming +languages +operating +systems +asplos +october +evaluation +directory +protocols +medium +scale +shared +memorymultiprocessors +shubhendu +mukherjee +mark +hill +international +conference +supercomputing +july +comparison +trace +sampling +techniques +multi +megabyte +caches +kessler +mark +hill +david +wood +ieee +transactions +computers +june +cooperative +shared +memory +software +hardware +scalable +multiprocessors +mark +hill +james +larus +steven +reinhardt +david +wood +transactions +computer +systems +tocs +november +wisconsin +architectural +research +tool +warts +mark +hill +james +larus +alvin +lebeck +madhusudhan +talluri +david +wood +computer +architecture +news +august +cache +performance +spec +benchmark +suite +jeffrey +mark +hill +dionisios +pnevmatikatos +alan +smith +ieee +micro +august +unified +formalization +four +shared +memory +models +sarita +adve +mark +hill +ieee +transactions +parallel +distributed +systems +tpds +june +performance +implications +tolerating +cache +faults +andreas +farid +pour +mark +hill +ieee +transactions +computers +march +mechanisms +cooperative +shared +memory +david +wood +satish +chandra +babak +falsafi +mark +hill +james +larus +alvin +lebeck +james +lewis +shubhendu +mukherjee +subbarao +palacharla +steven +reinhardt +international +symposium +computer +architecture +isca +wisconsin +wind +tunnel +virtual +prototyping +parallel +computers +steven +reinhardt +mark +hill +james +larus +alvin +lebeck +james +lewis +david +wood +sigmetrics +page +placement +algorithms +large +real +index +caches +kessler +mark +hill +transactions +computer +systems +november +programming +different +memory +consistency +models +kourosh +gharachorloo +sarita +adve +anoop +gupta +john +hennessy +mark +hill +journal +parallel +distributed +computing +august +tradeoffs +supporting +page +sizes +madhusudhan +talluri +shing +kong +mark +hill +david +patterson +international +symposium +computer +architecture +isca +detecting +data +races +weak +memory +systems +sarita +adve +mark +hill +barton +miller +robert +netzer +international +symposium +computer +architecture +isca +june +comparison +hardware +software +cache +coherence +schemes +sarita +adve +vikram +adve +mark +hill +mary +vernon +international +symposium +computer +architecture +isca +june +model +estimating +trace +sample +miss +ratios +david +wood +mark +hill +kessleracm +sigmetrics +implementing +stack +simulation +highly +associative +memories +extended +abstract +mark +hill +david +wood +sigmetrics +implementing +sequential +consistency +cache +based +systems +sarita +adve +mark +hill +international +conference +parallel +processing +august +weak +ordering +definition +sarita +adve +mark +hill +international +symposium +computer +architecture +isca +june +graduatesmadhusudhan +talluri +expected +august +superpages +subblocking +address +translation +hierarchy +first +employment +microsystems +current +email +madhu +sarita +adve +november +designing +memory +consistency +models +shared +memory +multiprocessors +first +employment +assistant +professor +rice +university +current +email +sarita +rice +richard +kessler +july +analysis +multi +megabyte +secondary +cache +memories +click +table +contents +first +employment +cray +research +current +email +richard +kessler +cray +last +updatedwed +keywords +help +search +engines +rank +page +higher +pages +mark +hill +home +page +computer +sciences +wisconsin +mark +hill +home +page +computer +sciences +wisconsin +mark +hill +home +page +computer +sciences +wisconsin +mark +hill +home +page +computer +sciences +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..84d62517 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,22 @@ +home +page +markos +zaharioudakis +markos +wisc +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaphone +mail +markos +wisc +note +page +construction diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..8b20919d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,22 @@ +michael +birk +home +page +michael +birk +home +page +section +section +project +list +home +page +programming +languages +links +alltraxx +home +page +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..3a3f59a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,70 @@ +mark +mcauliffe +mark +mcauliffe +computer +sciences +department +university +wisconsin +madison +dayton +madison +mcauliff +wisc +research +interests +design +implementation +object +oriented +database +systems +publications +carey +dewitt +franklin +hall +mcauliffe +naughton +schuh +solomon +tsatalos +white +zwilling +shoringup +persistent +applications +proc +sigmod +mark +mcauliffe +marvin +solomon +atrace +based +simulation +pointer +swizzling +techniques +proceedings +ieee +data +engineering +march +mark +mcauliffe +michael +carey +marvin +solomon +towardseffective +efficient +free +space +management +appear +proceedings +sigmod +conference +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..519b36cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,141 @@ +marc +shapiro +page +marc +shapiro +believes +tautology +tautology +tautology +currently +obsessively +fond +disappear +fear +repulsed +pondering +fast +pointer +analyses +watching +lots +jackie +chan +movies +thinking +programming +language +design +reading +much +trying +teach +elementary +school +students +think +terms +recursion +hoping +interrupted +hoare +wrote +pointers +introduction +high +level +languages +step +backward +never +recover +home +page +schedule +todd +automatic +accident +generator +elaine +dimasi +twisty +little +page +amanda +peet +retreathere +hyper +mode +emacs +thepull +menus +doesn +cool +html +tags +submissions +softwarei +cobbled +together +pldi +able +work +nowincludes +previously +mostly +missing +file +submit +html +popl +paper +fast +accurate +flow +insensitive +points +analysis +marc +shapiroand +susan +horwitz +appear +symposium +principles +programming +languages +various +addresses +marc +shapiro +dept +dayton +madison +mail +wisc +talk +house +wisc +finger +house +wisc +marion +madison +list +people +know +really +marc +shapiro +meet +jonathan +goldstein +paul +ferguson +lawrence +brown +last +modified +marc +shapiro +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..6eff6d3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,27 @@ +home +pagerob +minimalist +home +page +last +modified +august +mellencamp +taship +introduction +operating +systems +email +mellen +wisc +office +computer +science +building +office +phone +office +hours +appointment +mellen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..5ccff262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,96 @@ +david +melski +personal +info +pagedavid +melskicurrent +department +mills +computer +science +statisticsmadison +dayton +madison +permenant +west +ivesmarshfield +michelle +awesome +person +page +currently +construction +sister +kasey +great +home +page +brother +eric +semester +teaching +couple +sections +also +working +reps +programming +languages +myexact +schedule +still +needs +determined +undergrad +majored +computer +science +russian +studieshere +university +wisconsin +even +spent +fall +semesterof +russia +chance +russian +often +miss +someday +someday +make +back +interests +include +chess +soccer +recently +beenbiking +also +distracted +work +numerousbooks +hasty +rewrites +page +also +want +link +tomapquest +plan +stealing +alot +maps +second +give +directions +madison +marshfield +last +modified +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..e4de9979 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,297 @@ +milo +martin +home +pagemilo +martin +milo +wisc +graduate +student +teaching +assistantcomputer +sciences +departmentuniversity +wisconsin +madison +west +dayton +streetmadison +usaemail +milo +wisc +eduoffice +office +phone +office +hours +tuesday +thursday +byappointmentba +computer +science +gustavus +adolphus +college +classes +compiler +construction +charles +fischer +advanced +computer +architecture +mark +hill +java +sitting +mark +hill +james +larusteaching +algebraic +language +programming +section +research +interestsi +first +year +student +interested +programminglanguages +architecture +systems +specifically +interested +compiler +optimization +technology +beinfluenced +hardware +operating +systems +advances +mobile +programming +java +additional +challenges +presents +compilers +architecture +operating +system +designers +many +many +things +many +even +know +interested +publicationsresearch +performed +summer +argonne +national +laboratory +technology +development +division +advisement +charles +fink +fink +humm +martin +micklich +evaluation +view +reconstruction +parameters +illicitsubstance +detection +using +fast +neutron +transmission +spectroscopy +ieee +nuclear +science +symposium +medical +imaging +conference +fink +micklich +yule +humm +sagalovsky +martin +evaluation +neutron +techniques +illicitsubstance +detection +nucl +inst +meth +publicationsresearch +performed +school +year +gustavus +adolphus +college +advisement +hailperin +milo +martin +hailperin +programming +languageflexibility +deterministic +dynamic +parallel +computation +senior +honors +thesis +mathematics +computer +science +department +gustavus +adolphus +college +postscript +computing +interests +java +java +resources +next +software +next +computer +software +company +founded +aninternational +scientific +educational +organization +dedicated +toadvancing +science +engineering +application +informationtechnology +serving +professional +public +interests +fosteringthe +open +interchange +information +promoting +highestprofessional +ethical +standards +direct +quote +page +personal +interests +football +bignfl +football +since +lived +minnesota +years +myfavorite +team +minnesota +vikings +eventhough +live +land +cheese +heads +colonize +conquer +multi +player +play +mail +space +exploration +combat +game +wrote +babylon +best +show +imho +atlantis +atlantis +play +email +game +mythical +world +atlantis +players +build +armies +engauge +trade +explore +lands +fight +wondering +monsters +train +wizards +discover +underworld +right +players +rules +current +list +players +ultimate +frisbee +ultimate +players +association +ultimate +combines +elements +ofsoccer +football +basketball +fast +paced +game +played +afrisbee +everyone +quarterback +everyone +receiver +direct +quote +home +page +ultimatein +simple +rules +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..b9aa97c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,169 @@ +minos +home +page +minos +garofalakisminos +wisc +eduphd +candidate +research +assistant +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaoffice +stat +phone +home +workresearch +interests +effective +resource +management +parallel +multimedia +database +systems +complex +query +processing +optimization +parallel +algorithms +database +theoryeducation +computer +science +university +wisconsin +madison +dept +computer +sciences +december +computer +science +university +patras +dept +computer +engineering +informatics +june +refereed +publications +multi +dimensional +resource +scheduling +parallel +queries +minos +garofalakis +yannis +ioannidis +proceedings +sigmod +conference +montreal +canada +june +abstract +paper +postscript +scheduling +issues +multimedia +query +optimization +minos +garofalakis +yannis +ioannidis +computing +surveys +december +paper +postscript +technical +reports +resource +scheduling +enhanced +view +continuous +media +databases +minos +garofalakis +banu +ozden +silberschatz +submitted +publication +october +model +checking +sequential +probabilistic +real +time +systems +minos +garofalakis +technical +report +computer +technology +institute +patras +february +advisor +yannis +ioannidismore +feel +free +peek +resume +pointers +interesting +stuff +madison +dbms +reasearch +home +page +madison +hellenic +society +home +page +sigmod +home +page +vldb +home +page +almaden +research +center +watson +research +centerdr +michael +bibliograpy +server +databases +logic +programming +page +perpetually +construction +last +updated +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..bd72bafe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,28 @@ +marcelo +gonalves +marcelo +gonalves +mjrg +wisc +associate +researcher +paradyn +project +addresswork +home +computer +sciences +department +sheboygan +west +dayton +street +madison +madison +phone +phone +department +computer +sciencesuniversity +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..4bed39dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,39 @@ +brian +morgan +home +page +brian +morgangraduate +studentcomputer +sciences +department +university +wisconsin +madison +west +dayton +streetmadison +mail +morgan +wisc +telephone +advisor +chuck +dyerresearch +interestsvirtual +conferencing +systems +image +compression +video +conferencing +high +bandwidth +networking +related +links +interest +wisconsin +computer +vision +group diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..92a52b84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,149 @@ +andreas +moshovos +home +page +andreas +moshovosresearch +assistant +department +computer +sciencesuniversity +wisconsin +madisonadvisor +guri +sohigroups +multiscalar +wisconsin +kestrel +computer +architecture +address +leave +notesee +aroundwant +peek +future +clickheremy +brother +writes +poetry +click +herefor +sample +work +currently +working +data +dependence +speculation +processors +download +technical +report +compressed +postscript +uncompressed +postscript +download +talk +slides +load +balancing +multiscalar +processors +data +speculation +processors +general +interested +computer +architecture +instruction +level +parallelism +compiler +support +explotation +vlsi +fall +spring +graduate +student +thecourant +instituteof +york +university +earned +degree +since +transfered +wisconsin +however +theopportunity +work +excellent +people +meet +wife +computer +science +university +crete +greece +implementing +numerical +algorithms +access +decoupled +architecturethat +supports +software +pipelining +advisor +katevenis +short +description +found +computer +science +university +crete +greece +viha +like +editor +supports +editing +greek +many +many +interesting +links +hellenic +resouces +network +sure +visit +obtaining +installing +greek +fonts +local +copy +page +residing +atwww +hyper +devil +dictionary +bookmarks +mess +national +fraud +information +centerusenet +changes +want +send +afax +free diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..2bd51d24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,47 @@ +tony +home +pagewhat +newoctober +back +inmadison +update +several +pages +changed +background +black +better +contrast +updated +contact +information +minor +changes +various +pages +list +older +updates +prefer +keep +main +page +brief +hereare +links +second +level +pages +navigational +index +friends +favorite +pages +interests +contact +informationlast +modified +october +wisc +educopyright +tony diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..de9f6a0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,190 @@ +martin +reames +home +page +martin +reames +graduate +student +teaching +assistant +also +coke +poobah +finger +coke +machine +computer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +office +telephone +telephone +dept +email +mreames +wisc +edufall +scheduleresearch +interests +databases +particular +digital +terrain +modelling +tins +programming +languages +compiler +design +logic +logicprogramming +qualifying +exam +databases +spring +previous +years +exams +interests +software +design +development +product +orientedenvironment +exploits +computer +science +education +interestsin +databases +compiler +design +resume +postscriptand +html +distributed +wisconsin +affiliates +mathematics +computer +science +wesleyan +university +faint +heart +section +page +alink +senior +honorsthesis +general +unification +coke +poobahlook +works +mighty +afraid +dear +gone +coke +poobah +tomy +head +usually +gone +really +want +talk +tosomeone +better +adjusted +crucial +role +dept +life +youshould +probably +elton +doesn +even +mention +poobah +page +imaginethat +besides +aforementioned +coke +poobahship +mental +illness +afew +things +might +want +know +third +yeargraduate +student +department +wisconsin +concentrating +indatabases +currently +studying +qual +sometime +inearly +february +exercise +relaxation +play +squash +reasonably +well +year +round +ultimate +frisbee +summer +basketball +poorly +andinfrequently +notice +things +might +rapidlyriding +mountain +bike +around +campus +even +chilliest +weather +always +helmet +wish +learn +interests +feel +free +examine +mynot +often +updated +hierarchy +stuff +ilike +enjoy +wisconsin +line +librarylast +modified +martin +reames +mreames +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..f157038a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,45 @@ +wisconsin +multiscalar +project +home +pagewisconsin +multiscalar +project +technical +papers +talks +given +multiscalar +people +contributors +funding +sources +related +projects +available +software +wisconsin +computer +architecture +group +computer +sciences +departmentat +university +wisconsin +world +wide +computer +architecture +information +information +interest +local +users +last +updated +february +guri +sohi +sohi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..99af23dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,169 @@ +mike +steele +homepagemike +steele +homepagemsteele +wisc +eduoffice +comp +stat +building +sitting +univ +maryland +department +computer +struggling +undergraduate +operating +systems +class +sometimearound +april +note +time +stamp +lower +right +corner +says +folks +graduate +student +computer +sciences +department +university +wisconsin +madison +school +motto +come +freezein +land +cheese +research +study +interests +center +around +artificialintelligence +computer +vision +operating +systems +hope +narrow +coming +years +semester +graduateinstructor +section +algebraic +language +programming +received +bachelor +degree +theuniversity +maryland +scomputer +science +department +publicationsgrindstone +test +suite +parallel +performance +tools +jefferyk +hollingsworth +michael +steele +computer +science +technical +reportcs +university +maryland +gzipped +postscriptfile +classes +semestercs +introduction +operating +systems +mary +vernoncs +introduction +artificial +intelligence +chuck +dyermy +pagesinformation +gettingin +touch +friends +back +marylandwhom +forgot +mail +addresses +favorite +linksmy +favorite +sports +teamssome +friends +usenet +dave +barry +frequently +asked +questions +listthe +usenet +billy +joel +frequently +asked +questions +listi +also +mailing +list +administrator +thefruit +mailing +list +still +hosted +university +ofmaryland +started +insidejoke +around +office +hand +sometime +andnow +members +world +wide +predator +coming +kill +someinfrared +photos +know +looklike +infrared +picture +memike +steelemsteele +wisc +eduuniversity +wisconsin +madisoncomputer +sciences +department diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..1f227e53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,19 @@ +maria +home +pagemaria +home +pagehow +visit +university +maryland +college +park +maybe +university +wisconsin +madison +sections +might +want +visit +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..837fa5d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,91 @@ +oscar +naim +home +page +bienvenido +staff +member +working +paradyn +project +finished +myph +university +southampton +england +undergrad +universidad +simon +bolivar +caracas +venezuela +born +beautiful +city +barquisimeto +venezuela +barquisimetois +located +central +western +part +venezuela +population +ofabout +million +people +barquisimeto +also +known +musical +capital +venezuela +main +research +area +performance +analysis +visualization +parallel +programs +however +apart +researcher +like +playclassical +guitar +fact +studied +years +excellentmaestros +rodrigo +riera +antonio +lauro +like +spend +time +playing +guitar +reading +good +books +sherlock +holmes +stories +cooking +watching +baseball +working +like +beatiful +pictures +venezuela +pleaseclick +finger +naim +wisc +check +around +mundo +barquisimeto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..1bec80a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,45 @@ +anastassia +ailamaki +home +welcome +home +anastassia +ailamaki +graduate +student +computer +sciences +department +university +wisconsin +madison +dayton +street +madison +phone +really +really +want +picture +important +notice +finds +time +make +decent +home +page +nice +links +george +rochester +alex +guide +greek +islandsare +worth +visiting +send +mail +natassa +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..3673ceb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,134 @@ +jeffrey +naughtonjeffrey +naughtonnaughton +wisc +eduresearch +interestsolap +multi +dimensional +data +analysis +parallel +object +relationaldbms +overall +goal +research +development +ofdatabase +systems +surpass +current +database +systems +inperformance +ease +currently +three +main +areas +ofinterest +techniques +improving +performance +ofmulti +dimensional +data +analysis +including +array +based +storage +andprocessing +algorithms +indices +algorithms +computingthe +cube +performance +object +relational +database +systems +including +benchmarking +dbms +algorithms +valuedattributes +techniques +parallelizing +workloads +parallel +spatial +information +systems +recent +publications +computation +multidimensional +aggregates +withsameet +agarwal +rakesh +agrawal +prasad +deshpande +ashish +gupta +raghu +ramakrishnan +sunita +sarawagi +proceedings +thend +international +conference +large +databases +mumbai +bombay +storage +estimation +multidimensional +aggregatesin +presence +hierarchies +amit +shukla +prasad +deshpande +karthikeyan +ramasamy +international +conference +large +databases +mumbai +bombay +bucky +object +relational +database +benchmark +michael +carey +david +dewitt +johannes +gerhke +dhaval +shah +mohammed +asgarian +preparation +toward +molap +object +relational +dbms +withyihong +zhao +kristin +tufte +submitted +publication diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..a121c662 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +navin +kabranavin +kabragraduate +student +department +computer +sciences +university +wisconsin +madisonadvisor +david +dewittresearch +area +databases +research +interests +customizable +query +optimization +paradise +project +plan +address +nothing +better +explore +bookmarks +could +look +indian +stuff +includes +among +things +archive +hindi +songs +navin +wisc +public diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..dc4184e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,31 @@ +newhalltia +newhall +newhall +wisc +graduate +student +dayton +madison +telephone +research +interests +parallel +distributed +systems +performance +tools +scalability +analysis +performance +prediction +java +research +group +paradynadvisor +bart +millermummy +pictures +guanajuato +last +changed +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..6708aa27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +nancy +hallcomputer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +project +shore +scalable +heterogeneous +object +repository diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..c7792c71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,472 @@ +olvi +mangasarian +home +page +olvi +mangasarian +john +neumann +professor +mathematics +computer +sciences +member +center +mathematical +sciences +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +olvi +wisc +harvard +university +interests +mathematical +programming +machine +learning +parallel +computing +research +summary +optimization +theory +rich +mathematically +effectivecomputationally +solving +many +real +life +problems +interestsin +topic +ranged +broad +spectrum +encompassestheoretical +aspects +error +bounds +mathematical +programsand +variational +inequalities +convergence +proofs +parallelgradient +variable +distribution +algorithms +optimization +smoothing +techniques +solving +constrained +optimization +problemsas +differentiable +nonlinear +equations +well +applicationsto +machine +learning +general +specific +contexts +animportant +aspect +research +mathematical +programmingtechniques +diagnosing +breast +cancer +resulted +ahighly +accurate +computerized +diagnostic +system +current +useat +university +wisconsin +hospitals +current +students +paul +bradley +recent +publications +mangasarian +solodova +linearly +convergent +descent +method +strongly +monotonecomplementarity +problems +mathematical +programming +technical +report +october +mangasarian +jong +pangexact +penalty +functions +mathematical +programswith +linear +complementarity +constraints +mathematical +programming +technical +report +august +mangasarianmathematical +programming +data +miningmathematical +programming +technical +report +august +mangasarianerror +bounds +nondifferentiable +convex +inequalities +strong +slater +constraint +qualification +mathematical +programming +technical +report +july +bradley +mangasarian +street +clustering +concave +minimization +mathematical +programming +technical +report +submitted +neural +information +processing +systems +street +mangasarian +wolberg +individual +collective +prognostic +prediction +mathematical +programming +technical +report +january +bradley +mangasarian +street +feature +selection +mathematical +programming +mathematical +programming +technical +report +december +submitted +informs +journal +computing +mangasarian +machine +learning +polyhedral +concave +minimization +mathematical +programming +technical +report +november +applied +mathematics +parallel +computing +festschrift +klaus +ritter +fischer +riedmueller +schaeffler +editors +physica +verlag +germany +mangasarian +posed +linear +complementarity +problem +mathematical +programming +technical +report +august +submitted +siam +proceedings +internationalsymposium +complementarity +problems +baltimore +november +revised +november +nick +street +mangasarian +improved +generalization +tolerant +training +mathematical +programming +technical +report +july +mangasarian +mathematical +programming +machine +learning +mathematical +programming +technical +report +april +revised +july +appear +proceedings +nonlinear +optimization +applications +workshop +erice +june +plenum +press +chunhui +chen +mangasarian +hybrid +misclassification +minimization +mathematical +programming +technical +report +february +revised +july +august +appear +advances +computational +mathematics +mangasarian +optimization +machine +learning +mathematical +programming +technical +report +january +siag +views +news +chunhui +chen +mangasarian +class +smoothing +functions +nonlinear +mixed +complementarity +problems +mathematical +programming +technical +report +august +revised +october +february +september +computational +optimization +applications +mangasarian +nick +street +wolberg +breast +cancer +diagnosis +prognosis +linear +programming +mathematical +programming +technical +report +august +revised +december +operations +research +july +august +mangasarian +linear +complementarity +problem +separable +bilinear +program +mathematical +programming +technical +report +july +journal +global +optimization +mangasarian +solodov +backpropagation +convergence +deterministic +nonmonotone +perturbed +minimization +mathematical +programming +technical +report +june +advances +neural +information +processing +systems +cowan +tesauro +alspector +editors +morgan +kaufmann +publishers +francisco +california +chunhui +chen +mangasarian +smoothing +methods +convex +inequalitiesand +linear +complementarity +problems +computer +sciences +technical +report +november +revised +november +mathematical +programming +mangasarian +misclassification +minimization +computer +sciences +technical +report +october +revised +september +journal +global +optimization +december +mangasarian +solodov +serial +parallel +backpropagation +neural +nets +vianonmonotone +perturbed +minimnization +computer +sciences +technical +report +april +revised +december +optimization +methods +software +chronological +cancer +bibliography +page +publications +group +wisconsin +papers +reports +view +download +papers +reports +group +view +home +page +group +periodically +updated +olvi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..ecdcb829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,806 @@ +machine +learning +cancer +diagnosis +prognosismachine +learning +cancer +diagnosis +prognosisthis +page +describes +various +linear +programming +based +machine +learningapproaches +applied +diagnosis +prognosis +ofbreast +cancer +work +result +collaboration +theuniversity +wisconsin +madison +betweenprof +olvi +mangasarian +computer +sciences +department +anddr +william +wolbergof +departments +surgery +human +oncology +copy +thepress +release +distributed +american +cancer +society +science +writers +seminar +inmarch +provides +good +overview +research +table +contents +diagnosis +prognosis +bibliography +citation +popular +press +local +related +links +related +linksdiagnosisthis +work +grew +desire +wolberg +accurately +diagnosebreast +masses +based +solely +fine +needle +aspiration +heidentified +nine +visually +assessed +characteristics +sample +consideredrelevant +diagnosis +collaboration +prof +mangasarian +andtwo +graduate +students +rudy +setiono +kristin +bennett +aclassifier +constructed +using +multisurface +method +pattern +separation +nine +features +thatsuccessfully +diagnosed +cases +resulting +data +iswell +known +wisconsin +breast +cancer +data +image +analysis +work +began +addition +nick +streetto +research +team +goal +diagnose +sample +based +adigital +image +small +section +slide +results +ofthis +research +consolidated +software +system +known +xcyt +currently +used +wolberg +clinicalpractice +diagnosis +process +performed +follows +taken +breast +mass +material +thenmounted +microscope +slide +stained +highlight +cellularnuclei +portion +slide +cells +arewell +differentiated +scanned +using +digital +camera +aframe +grabber +board +user +isolates +individual +nuclei +using +xcyt +using +mouse +pointer +user +draws +approximate +boundary +ofeach +nucleus +using +computer +vision +approach +known +snakes +approximations +converge +exact +nuclear +boundaries +interactive +process +takes +five +minutes +slide +image +showingxcyt +nuclei +isolated +thisfasion +program +computes +values +characteristics +ofeach +nuclei +measuring +size +shape +texture +mean +standarderror +extreme +values +features +computed +resulting +total +nuclear +features +sample +based +training +cases +linear +classifier +wasconstructed +differentiate +benign +malignant +samples +thisclassifier +consists +single +separating +plane +space +threeof +features +extreme +value +area +extreme +value +smoothness +mean +value +texture +projecting +cases +onto +thenormal +separating +plane +approximate +probability +densities +ofthe +benign +malignant +points +constructed +allow +simple +bayesiancomputation +probability +malignancy +patients +thesedensities +shown +patient +allowing +judge +confidence +diagnosis +comparison +hundreds +previous +samples +date +system +correctly +diagnosed +consecutive +newpatients +benign +malignant +eight +cases +didxcyt +return +suspicious +diagnosis +estimatedprobability +malignancy +small +subset +source +images +used +research +found +goodtest +cases +image +segmentation +object +recognition +algorithms +petsegmentation +algorithm +automatically +identify +nuclei +inthese +images +please +email +street +wisc +work +together +prognosisthe +second +problem +considered +research +prognosis +prediction +long +term +behavior +disease +haveapproached +prognosis +function +approximation +problem +using +inputfeatures +including +computed +xcyt +predict +atime +recurrence +malignant +patients +using +right +censored +data +solution +termed +recurrence +surface +approximation +method +utilizes +linearprogram +construct +surface +predicts +time +recurrence +fornew +patients +examining +actual +recurrence +training +caseswith +similar +predicted +recurrence +times +plot +probability +ofdisease +free +survival +various +times +years +anindividual +patient +capability +incorporated +intoxcyt +example +shown +survival +curves +plot +probability +disease +free +survival +versus +time +years +black +disease +free +survival +curve +represents +patients +ouroriginal +study +curve +represents +probability +ofdisease +free +survival +sample +case +particular +case +thereforehas +average +prognosis +probability +disease +freeafter +years +equal +procedure +also +used +compare +predictive +power +ofvarious +prognostic +factors +results +indicate +precise +detailed +cytological +information +type +provided +xcytgives +better +prognostic +accuracy +traditional +factors +tumorsize +lymph +node +status +corroborated +researchers +result +could +remove +need +often +painful +axillary +lymph +node +surgery +chronological +bibliographylinked +papers +provided +postscript +format +postscript +viewer +download +file +shift +click +netscape +print +abstracts +ascii +text +obtain +papers +notlinked +please +contact +first +author +mangasarian +setiono +wolberg +pattern +recognition +linear +programming +theory +application +medical +diagnosis +proceedings +workshop +large +scale +numerical +optimization +pages +philadelphia +siam +mangasarian +wolberg +cancer +diagnosis +linear +programming +siam +news +pages +abstract +wolberg +street +mangasarian +image +analysis +machine +learning +applied +breast +cancer +diagnosis +prognosis +analytical +quantitative +cytology +histology +pages +april +abstract +wolberg +street +heisey +mangasarian +computer +derived +nuclear +features +distinguish +malignant +benign +breast +cytology +human +pathology +pages +abstract +wolberg +street +heisey +mangasarian +computer +derived +nuclear +grade +breast +cancer +prognosis +analytical +quantitative +cytology +histology +pages +august +abstract +mangasarian +street +wolberg +breast +cancer +diagnosis +prognosis +linear +programming +operations +research +pages +july +august +available +mathematical +programming +technical +report +abstract +street +mangasarian +wolberg +inductive +learning +approach +prognostic +prediction +proceedings +twelfth +international +conference +machine +learning +prieditis +russell +pages +morgan +kaufmann +abstract +teague +wolberg +street +mangasarian +call +page +indeterminate +fine +needle +aspiration +breast +image +analysis +aided +diagnosis +cancer +submitted +abstract +street +mangasarian +wolberg +individual +collective +prognostic +prediction +technical +report +computer +sciences +department +university +wisconsin +madison +january +submitted +icml +aaai +conferences +abstract +citation +medical +popular +press +news +medicine +segment +prime +news +march +breast +biopsy +without +surgery +friend +today +march +cancer +detection +imitates +prospecting +manning +milwaukee +sentinel +march +analyzing +breast +cancer +detroit +news +march +high +tech +cancer +hunt +marilynn +marchione +milwaukee +journal +march +computerized +interpretation +breast +biopsies +progress +reported +oncology +times +april +computer +program +hunts +breast +cancer +ruth +sorelle +houston +chronicle +april +computer +program +improve +interpretation +aspirate +oncology +news +international +data +suggest +needle +biopsies +could +replace +surgical +biopsy +diagnosing +breast +cancer +journal +american +medical +association +medical +news +perspectives +column +june +diagnosis +image +analysis +machine +learning +cope +september +october +computer +seeks +breast +cancer +madison +capital +times +january +computer +aided +cancer +prediction +angeles +times +january +local +related +links +mathematical +programming +group +machine +learning +group +medical +schoolother +related +links +national +library +medicine +university +nevada +center +biomedical +modeling +research +oncolink +washington +university +institute +biomedical +computing +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..4579a8a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,328 @@ +pattern +separation +mathematical +programmingpattern +separation +mathematical +programmingthis +page +describes +work +pattern +separation +linear +programming +mathematical +programming +section +university +wisconsin +madison +computer +sciences +department +brief +history +method +outlinemathematical +optimization +approaches +particular +linear +programming +long +used +problems +pattern +separation +linear +programs +used +construct +planes +separate +linearly +separable +point +sets +separation +nonlinear +surface +using +linear +programming +also +described +whenever +surface +parameters +appeared +linearly +quadratic +polynomial +surface +formulations +however +could +failon +sets +separable +surface +linear +parameters +multisurface +method +avoided +difficulty +separates +disjoint +finite +point +sets +dimensional +euclidean +space +follows +choose +parallel +planes +dimensional +euclidean +space +close +together +region +planes +contains +points +sets +regions +parallel +planes +contain +points +points +discard +points +regions +parallel +planes +repeat +process +points +parallel +planes +region +parallel +planes +contains +points +points +multisurface +method +tree +variant +multisurface +method +developed +finite +disjoint +point +sets +dimensional +euclideanspace +goal +todetermine +sequence +planes +dimensional +euclideanspace +separate +sets +follows +determine +plane +dimensional +euclidean +space +minimizes +average +distances +misclassified +points +point +misclassified +lies +side +separating +plane +assigned +similarly +point +misclassified +lies +side +separating +plane +assigned +regions +assigned +contain +mostly +points +stop +otherwise +generate +another +error +minimizing +plane +region +sequence +planes +generated +viewed +decision +tree +eachnode +tree +best +split +points +reaching +node +found +solving +node +split +branches +thesame +procedure +applied +mostly +points +oneset +node +linear +programming +approach +also +viewed +astraining +neural +network +hidden +layer +shown +learn +concepts +well +better +traditionallearning +methods +cart +also +advantage +artificial +neural +network +methods +backpropagation +inthat +training +proceeds +much +faster +implementations +implemented +using +minos +numerical +optimization +package +nick +street +kristin +bennett +also +implemented +matlab +optimization +package +paul +bradley +following +description +matlab +implementation +together +files +required +chronological +bibliography +mangasarian +linear +nonlinear +separation +patterns +linear +programming +operations +research +june +pages +mangasarian +multisurface +method +pattern +separation +ieee +transactions +information +theory +november +pages +bennett +decision +tree +construction +linear +programming +proceedings +midwest +artificial +intelligence +cognitive +science +society +conference +pages +bennett +mangasarian +robust +linear +programming +discrimination +linearly +inseparable +sets +optimization +methods +software +pages +mangasarian +mathematical +programming +neural +networks +orsa +journal +computing +fall +pages +last +modified +paul +bradley +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..dfe4aa0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,124 @@ +paradyn +project +home +page +paradyn +parallel +performance +tools +release +informationthis +page +contains +describes +copy +ofrelease +paradyn +tools +project +goalsthe +paradyn +parallel +program +performance +tools +project +exploring +newapproaches +building +scalable +tools +parallel +program +performance +technical +papers +paradyn +manualsstatus +reporta +recent +status +report +blizzard +paradyn +project +arpa +meeting +panel +presentationthis +presentation +made +arpa +csto +meeting +insan +antonio +arpa +csto +super +symbol +table +presentationthis +presentation +made +arpa +csto +meeting +inflorida +project +effort +develop +common +access +routines +tocompiler +generated +information +used +tools +high +level +parallellanguages +project +staff +super +computing +postera +hypertext +version +poster +super +computing +related +projects +elsewhere +spdt +sigmetrics +symposium +parallel +distributed +toolsyou +also +restaurants +included +paradyn +page +temporary +placeholder +contact +informationparadyn +projectdepartment +computer +sciencesuniversity +wisconsin +west +dayton +streetmadison +email +paradyn +wisc +edufax +last +modified +bart +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..56d72bbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,42 @@ +steven +parker +home +page +steven +parker +graduate +student +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +wisconsin +office +parker +wisc +depth +area +numerical +analysis +employment +prism +projectfall +schedule +math +prism +related +links +home +page +home +page +send +mail +last +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..9803d605 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,138 @@ +home +page +paul +bradleygraduate +student +computer +sciences +department +university +wisconsin +madison +paulb +wisc +eduoffice +csphone +advisor +mangasarianinterestsmathematical +programmingmachine +learningfly +fishing +interested +using +mathematical +programming +techniques +specifically +nonlinear +linear +programming +inductive +learning +summary +work +currentlybeing +done +area +university +wisconsin +please +madisonmathematical +programming +page +thiswork +guided +professor +olvimangasarian +nick +street +publicationsall +papers +stored +postscript +format +abstracts +ascii +text +postscript +viewer +download +file +shift +click +using +netscape +print +bradley +mangasarian +street +feature +selection +mathematical +programming +mathematical +programming +technical +report +computer +sciences +department +university +wisconsin +madison +wisconsin +december +revised +march +submitted +informs +journal +computing +abstract +bradley +mangasarian +street +clustering +concave +minimization +mathematical +programming +technical +report +computer +sciences +department +university +wisconsin +madison +wisconsin +accepted +presentation +neural +information +processing +systems +abstract +nick +street +work +paul +picksthese +sites +backcountry +page +grateful +dead +nasa +wisconsin +fishing +frogs +espnet +timesfax +uroullette +last +modified +paul +bradley +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..3939428c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,147 @@ +pete +devries +home +page +peter +devries +internet +tools +specialist +room +computer +sciences +westdayton +madison +pdevries +wisc +internet +tools +specialist +internic +scout +means +isthat +read +everything +internet +technology +make +sense +andthen +write +overview +articles +scout +toolkit +great +thing +iread +everything +think +anyway +rather +foolish +topay +tell +eric +hazen +alsoprovide +technical +webmaster +services +scout +group +although +excellent +help +computer +systems +folksat +internic +recently +joining +scout +team +worked +laboratory +molecularbiology +integrated +microscopy +resource +biomedical +resource +nearly +eight +years +molecularbiology +fortunate +develop +prof +seancarroll +technique +creating +multiple +label +confocal +images +basically +cool +looking +images +developing +embryos +lotof +journal +book +covers +also +developed +molecular +biology +site +fortunate +work +prof +johnwhite +rest +imrstaff +developed +site +received +stars +inthe +magellan +internet +guide +last +major +project +involved +development +dimensional +microscope +studying +embryos +develop +microscope +isdescribed +article +appeared +augustnd +issue +journal +science +photos +guests +scout +labs +standard +info +resume +related +experience +publications +presentations +updated +tuesday +december diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..50a013a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,131 @@ +yumpee +home +page +manoj +plakal +graduate +slave +dept +computer +science +universityofwisconsin +madison +blah +home +country +india +though +originally +state +kerala +stayed +life +calcutta +studied +bosco +school +calcutta +salesians +bosco +undergrad +kanpur +major +computer +science +engineering +currently +first +year +graduate +student +supported +teaching +assistantship +department +computer +sciences +university +wisconsin +madison +staring +barrel +either +computer +architecture +programming +languages +interests +music +rock +metal +alternative +blues +movies +books +stuff +acads +hacking +industry +geeky +nerdy +stuff +featured +home +page +kanpur +chat +gateway +kanpur +class +seealso +iitk +class +links +friends +snaps +friends +pinup +gallery +every +nerd +needs +check +bookmarks +links +page +access +logs +visiting +pages +contact +north +randall +avenue +madison +computer +sciences +university +wisconsin +madison +dayton +street +madison +plakal +wisc +plakal +wisc +acknowledgements +suresh +venkat +nifty +table +igor +ivanisevic +graduate +slave +wisecrack +icons +various +corners +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..7f2f7f6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,90 @@ +prasad +home +page +page +constuction +meanwhile +prasad +deshpande +graduate +student +computer +sciences +deparment +university +wisconsin +madison +home +address +princeton +madison +office +address +computer +sciences +building +university +wisconsin +madison +dayton +madison +academic +interests +database +systems +theory +research +area +databases +current +schedule +theory +investments +managerial +economics +meetings +prof +jeff +naughton +music +introduction +music +theory +publications +computation +multidimensional +aggregates +vldb +storage +estimation +multidimensionalaggregates +presence +hierarchies +vldb +interesting +course +projects +package +java +download +want +spend +time +timex +world +find +india +dilbert +comix +explore +bookmarks +random +link +finger +times +since +hakuna +matata +info +creating +pages diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..3801d8e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,45 @@ +vishy +home +page +viswanath +poosala +research +assistant +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +poosala +wisc +research +information +reseach +summary +resume +html +postscript +information +related +databases +advisor +prof +yannis +ioannidis +information +asha +voluntary +organization +help +improve +basic +education +india +interestsuw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..971fb3e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,770 @@ +home +pagethe +project +querying +sequence +data +document +construction +time +order +database +order +time +database +time +database +order +document +contents +project +objectivescurrent +statusmotivating +exampleseq +data +model +sequin +query +languageoptimization +techniquesseq +system +developmentpublicationsrelated +workcontact +informationproject +objectives +number +important +database +applications +require +processingof +large +amounts +ordered +sequence +data +domains +theseapplications +include +financial +management +historical +analysis +economic +social +sciences +metereology +medical +sciences +andbiological +sciences +existing +relational +databases +inadequate +regard +data +collections +treated +sets +sequences +consequently +expressing +sequence +queries +tedious +evaluating +inefficient +databases +model +data +using +abstraction +sequences +allow +data +sequences +queried +declarative +manner +utilizing +ordered +semanticstake +advantage +unique +opportunities +available +query +optimization +evaluationintegrate +sequence +data +relational +data +users +canstore +query +combination +relation +sequencesthese +requirements +serve +goals +project +various +kinds +sequences +need +supported +temporal +sequences +themost +important +kind +queries +expressible +using +notions +like +next +previous +natural +considering +sequences +queries +optimized +evaluated +efficiently +issues +need +studied +theory +database +system +needs +built +demonstrates +feasibility +theoretical +ideas +project +statusthe +current +status +project +defined +data +model +support +important +kinds +sequence +data +also +defined +algebraicquery +operators +composed +form +sequence +queries +analogousto +composition +relational +algebra +operators +form +relation +queries +described +sequence +queries +efficiently +processed +identified +various +optimization +techniques +sequence +query +language +sequin +candeclaratively +express +queries +sequences +sequin +query +include +embedded +expressions +relational +query +language +likesql +vice +versa +building +disk +based +database +system +demonstrate +feasibility +proposals +system +implements +model +using +nested +complex +object +architecture +built +shore +storage +manager +process +several +megabytes +data +relations +sequences +supported +integrated +extensible +manner +motivating +example +sequence +querya +weather +monitoring +system +records +information +various +meteorological +phenomena +sequentiality +occurrence +phenomena +various +meteorological +events +sequenced +time +recorded +scientist +asks +query +volcano +eruptions +didthe +recent +earthquake +strength +greater +richter +scale +query +expressed +relational +query +language +like +complex +features +like +groupby +clauses +correlated +subqueries +aggregatefunctions +required +conventional +relational +query +optimizer +would +find +efficient +query +execution +plan +even +given +knowledge +earthquakes +volcano +relations +sorted +time +however +efficient +plan +exists +models +data +sequencesordered +time +sequences +scanned +lock +step +similar +sort +merge +join +recent +earthquake +record +scanned +stored +temporary +buffer +whenever +volcano +record +processed +value +recent +earthquake +record +stored +buffer +checked +strength +greater +possibly +generating +answer +query +therefore +processed +single +scan +sequences +using +little +memory +optimization +sequentiality +data +query +data +modelthe +details +data +model +aredescribed +published +paper +click +postscript +version +present +gist +basic +model +sequence +records +mapped +ordereddomain +positions +many +many +relationship +records +andpositions +viewed +dual +distinct +ways +recordsmapped +position +positions +mapped +record +views +called +positional +record +oriented +respectively +gives +rise +query +operators +based +view +queries +sequences +could +require +operators +either +flavors +record +oriented +operators +similar +relationaloperators +include +various +kinds +joins +overlap +containment +andaggregates +operators +extensively +explored +researchersin +temporal +database +community +positional +operators +include +next +previous +offset +movingaggregates +operators +allow +zooming +operations +sequences +means +collapsing +expanding +ordering +domains +associated +sequence +instance +daily +sequence +could +zoomed +collapsed +weekly +sequence +zoomed +expanded +hourly +sequence +last +part +model +deals +operations +groups +sets +sequences +advantage +makes +easy +model +queries +involving +sequence +collections +case +many +real +worldsituations +sequence +operators +extended +work +groups +similar +sequences +instead +single +sequences +extensionof +model +indicates +practical +implementation +ofseq +would +probably +involve +nested +complex +object +system +sequin +query +languagewe +devised +query +language +called +sequin +usingwhich +declarative +sequence +queries +specified +languageis +similar +flavor +except +inputs +queriesas +well +results +queries +sequences +click +description +sequin +language +examples +optimization +techniqueswe +proposed +optimization +techniques +sequence +queries +involving +positional +operators +existing +techniques +thathave +proposed +queries +record +oriented +operators +optimizations +query +transformations +meta +data +caching +intermediate +results +efficiently +evaluate +query +optimal +query +evaluation +plan +generated +using +algorithm +relies +cost +estimates +important +observations +accessing +sequence +data +single +stream +probably +efficient +evaluation +strategies +take +account +details +optimization +techniques +aredescribed +published +paper +click +postscript +version +system +developmentthe +database +system +client +serverarchitecture +supporting +multiple +clients +viaa +multi +threaded +server +server +built +ontop +shore +storage +manager +sequin +subset +supported +query +languageswhich +embedded +inside +data +modelis +nested +complex +object +model +allows +arbitrarylevels +nesting +relations +inside +sequences +viceversa +system +also +extensible +providing +supportfor +data +types +ordering +domains +user +defined +functions +storage +implementations +query +languages +detailson +system +click +publicationssequence +query +processing +praveen +seshadri +miron +livny +raghu +ramakrishnan +proceedings +sigmod +conference +data +management +framework +sequence +datapraveen +seshadri +miron +livny +raghu +ramakrishnan +proceedings +ieee +conference +data +engineering +march +design +implementation +sequence +database +systempraveen +seshadri +miron +livny +raghu +ramakrishnan +submitted +vldb +next +sequence +queriesraghu +ramakrishnan +michael +cheng +miron +livny +praveen +seshadri +proceedings +international +conference +management +data +comad +december +related +workthedevise +project +complementary +provides +visualizationenvironment +used +explore +sequence +data +devise +front +queries +posed +database +server +answers +examined +graphically +also +shore +project +storage +manager +used +people +working +related +projects +madison +database +research +group +madison +department +servercontact +informationfor +information +contact +praveen +seshadri +praveen +wisc +eduraghu +ramakrishnan +raghu +wisc +edumiron +livny +miron +wisc +educomputer +sciences +department +university +wisconsin +dayton +street +madison +last +modified +praveen +seshadripraveen +seshadri +praveen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..dded804a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,26 @@ +andrew +prock +home +page +andrew +clemens +hockert +prockoffice +hours +personal +history +school +classes +madison +class +school +bookmarks +resume +doonesbury +trot +alta +vista +alta +vista +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..fa6fb369 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,174 @@ +home +page +anne +condon +anne +condon +associate +professor +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +condon +wisc +university +washington +interests +complexity +theory +interactive +proof +systems +randomized +complexityclasses +theory +parallel +computation +research +summary +interested +models +computation +interactiveproof +systems +combine +nondeterminism +randomness +suchmodels +recently +proven +surprisingly +useful +solving +classicproblems +complexity +theory +example +although +theoryof +completeness +long +used +identify +hard +computationalproblems +much +progress +understanding +whichhard +problems +solutions +easy +approximate +recentresults +interactive +proof +systems +resulted +novel +modelsof +turn +used +prove +approximabilityresults +several +hard +problems +work +developingboth +positive +negative +results +approximability +hardcombinatorial +problems +arise +game +theory +graph +theoryand +automata +theory +also +interested +design +analysis +parallel +algorithms +currently +working +development +parallel +algorithms +forsorting +graph +problems +minimum +spanning +tree +goal +develop +algorithms +work +well +practical +parallel +models +communication +synchronization +costscan +expensive +sample +recent +publications +interactive +proof +systems +polynomially +bounded +strategies +ladner +journal +computer +system +sciences +finite +state +automata +nondeterministic +probabilisticstates +hellerstein +pottle +wigderson +proceedingsof +annual +symposium +theory +computing +pspace +provable +provers +round +caiand +lipton +journal +computer +system +sciences +february +page +automatically +created +october +email +pubs +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..e46e8042 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,192 @@ +home +page +deborah +joseph +deborah +joseph +associate +professor +computer +sciences +mathematics +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +joseph +wisc +purdue +university +interests +structural +applied +complexity +theory +computational +biology +computational +geometry +mathematical +logic +research +summary +research +concerns +areas +theoretical +computer +science +study +structural +properties +complexity +classes +design +analysis +algorithms +biological +problems +last +twenty +years +great +deal +work +gone +studyingthe +properties +sets +decidable +deterministic +andnondeterministic +polynomial +time +despite +effort +stillknow +little +classes +recently +fact +computerscientists +questioned +adequacy +known +proof +techniquesfor +resolving +questions +whether +research +investigatesthe +structural +properties +sets +classes +exploresin +formal +types +proof +techniques +necessary +resolveproblems +concerning +complexity +classes +research +interests +computational +biology +primarily +inthe +area +computational +methods +genome +sequencing +theseincluded +development +dynamic +data +structures +algorithmsfor +fragment +assembly +large +scale +genome +sequencing +projects +development +specific +algorithmic +techniques +handlingrepetitive +sequences +addition +research +utilized +graphtheoretic +methods +rapid +homology +detection +analysisof +anonymous +sequences +sample +recent +publications +collapsing +degrees +subexponential +time +pruim +young +proceedings +ninth +structure +complexity +theoryconference +sparse +spanners +weighted +graphs +althofer +dobkin +soares +discrete +computational +geometry +obtaining +global +similarity +local +similarity +meidanisand +tiwari +proceedings +fourth +scandinavianworkshop +algorithms +springer +verlag +lecture +notes +incomputer +science +page +automatically +created +october +email +pubs +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..213c481a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,143 @@ +home +page +miron +livny +miron +livny +professor +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +miron +wisc +weizmann +institute +science +rehovot +israel +interests +resource +management +algorithms +performance +modeling +analysis +discrete +event +simulation +research +summary +major +emphasis +research +design +evaluationof +resource +management +policies +involved +developmentof +resource +management +policies +processing +data +managementsystems +types +systems +general +purpose +well +asreal +time +scheduling +algorithms +considered +researchinvolves +performance +studies +different +policies +specialemphasis +interplay +properties +systemand +performance +policy +since +performancestudies +employ +simulation +interested +discrete +event +modelingand +simulation +techniques +currently +process +implementinga +simulation +laboratory +based +novel +simulation +language +laboratory +includes +data +management +utilities +visualizationtools +graphical +interfaces +sample +recent +publications +disk +tape +joins +synchronizing +disk +tape +access +myllymaki +proceedings +sigmetrics +conference +sequence +query +processing +sashadri +ramakrishnan +proceedings +sigmod +conference +foundations +visual +metaphors +schema +display +haberand +ioannidis +journal +intelligent +information +systems +july +page +automatically +created +october +email +pubs +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..fa8fc25d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,145 @@ +home +page +seymour +parter +seymour +parter +professor +computer +sciences +mathematics +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +parter +wisc +york +university +interests +numerical +methods +partial +differential +equations +research +summary +time +major +emphasis +work +solutionof +indefinite +discrete +elliptic +systems +equations +classicaliterative +methods +multigrid +methods +work +effectivelywhen +system +positive +definite +methods +also +bemade +effective +real +symmetric +part +operatoris +positive +definite +hand +indefinite +casedirect +methods +attempt +preserve +sparseness +thesystem +encounter +small +pivots +thus +challengingproblem +effectively +mixes +concepts +procedures +linearalgebra +elliptic +partial +differential +equations +nowinvolved +several +projects +attack +class +problems +include +preconditioning +studies +research +specialmultigrid +methods +sample +recent +publications +preconditioning +chebyshev +collaction +discretization +ellipticpartial +differential +equations +appear +siam +journalon +numerical +analysis +preconditioning +boundary +conditions +without +estimates +condition +numbers +distribution +singular +values +siam +journal +numerical +analysis +preconditioning +second +order +elliptic +operators +condition +numbersand +distribution +singular +values +journal +scientificcomputing +page +automatically +created +october +email +pubs +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..38bd9bd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,180 @@ +home +page +mary +vernon +mary +vernon +professor +computer +sciences +industrial +engineering +computer +sciences +department +university +wisconsin +dayton +madison +telephone +email +vernon +wisc +university +california +angeles +interests +techniques +applications +computer +systems +performance +analysis +performance +parallel +systems +parallel +architectures +operatingsystems +research +summary +interested +analytic +modeling +techniques +applicationto +computer +system +performance +issues +emphasis +paralleland +distributed +system +design +issues +modeling +techniquesi +developed +together +graduate +students +colleaguesinclude +generalized +timed +petri +customizedmean +value +analysis +gtpn +represent +parallel +systemfeatures +synchronization +priority +service +precisely +customized +technique +yields +intuitive +equationsthat +represent +parallel +system +features +approximately +butcan +solved +efficiently +also +recently +proposedthe +technique +called +interpolation +approximationsfor +analysis +parallel +processor +allocation +policies +techniquemay +also +broader +applicability +parallel +systems +performanceanalysis +current +research +projects +include +characterization +high +performanceparallel +workloads +analysis +parallel +processor +schedulingpolicies +scheduling +issues +multimedia +servers +memorymanagement +policies +networks +workstations +sample +recent +publications +fairness +dqdb +networks +slot +reuse +brewster +proceedings +ieee +infocom +conference +august +accurate +modeling +hybrid +hash +join +algorithm +pateland +carey +proceedings +sigmetrics +conference +june +application +characteristics +limited +preemption +forrun +completion +parallel +processor +scheduling +policies +withs +chiang +mansharamani +proceedings +sigmetricsconference +june +page +automatically +created +october +email +pubs +wisc +eduto +report +errors diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..29188480 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,12 @@ +qinqin +wang +home +page +welcome +qinqin +wang +home +pageqw +wisc +edulast +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..41336acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,364 @@ +raghu +ramakrishnan +home +page +raghu +ramakrishnan +associate +professor +computer +science +raghu +wisc +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +street +madison +usaphone +department +education +teaching +activities +research +interests +research +projects +graduates +education +university +texas +austin +tech +indian +institute +technology +madras +teaching +activities +course +text +database +management +systems +software +educational +minibaseand +coralthe +text +database +management +systems +published +mcgraw +hill +aimedat +first +second +courses +database +systems +undergraduateand +graduate +levels +minibase +relational +dbms +developed +inconjunction +text +coral +system +also +used +coursesthat +deal +logic +databases +several +schools +research +interests +integrating +heterogeneous +data +sources +content +based +querying +indexing +sequence +image +data +exploratory +analysis +large +data +sets +data +mining +extending +database +query +languages +using +constraints +deductionas +databases +grows +diversifies +increasinglyimportant +able +access +data +dispersed +heterogeneous +independently +developed +sources +easily +rodin +project +successor +project +investigating +severalissues +formal +techniques +practical +toolkits +forsemantic +integration +supporting +multiple +levels +serviceand +access +database +database +access +networkedcluster +machines +joint +work +profs +ioannidis +livny +recent +work +results +visual +data +explorationfrom +next +project +joint +work +prof +livny +applied +data +integration +querying +second +area +interest +content +based +querying +complex +data +assequences +image +sets +seqsystem +deals +queries +sequence +data +focuses +dbms +design +optimizationissues +related +sequence +data +part +next +project +joint +work +prof +livny +important +aspect +work +identifyingtrends +data +general +identifying +useful +patterns +ofinformation +project +goal +support +content +based +retrieval +fromlarge +sets +images +focus +developing +implementingan +expressive +data +definition +language +used +customizea +general +image +database +system +take +advantage +specializedinformation +given +collection +images +indexedand +queried +interest +querying +analysis +data +covers +data +exploration +andmining +developed +powerful +clustering +algorithm +called +birchfor +large +datasets +visual +data +exploration +tool +called +deviseas +part +next +project +long +standing +research +interest +extension +relational +databasequery +languages +logic +programming +featuressuch +structured +terms +recursion +ofarithmetic +constraints +specify +data +queries +morecompactly +efficiently +ongoing +project +involves +continued +development +coraldeductive +system +evaluation +based +upon +bottom +fixpointevaluation +techniques +several +optimizations +applied +make +efficientacross +broad +range +programs +research +projects +coral +next +graduates +sudarshan +august +time +optimizations +bottom +evaluation +logic +programs +first +employment +bell +labs +murray +hill +sudarsha +research +srivastava +august +deductive +object +oriented +languages +first +employment +bell +labs +murray +hill +divesh +research +august +design +evaluation +transitive +closure +algorithms +first +employment +bell +labs +murray +hill +research +seshadri +august +sequence +data +managementfirst +employment +computer +sciences +department +cornell +university +praveen +cornell diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..a1397934 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,195 @@ +rahul +home +page +rahul +kapoorhello +internet +surfer +welcome +cyber +home +hope +stay +long +enough +know +little +officially +third +final +semester +master +student +computer +sciences +department +university +wisconsin +madison +came +fall +getting +bachelor +degree +indianinstitute +technology +kanpur +interested +employers +please +check +resume +courses +schedulemydepartmentmyuniversityiitkanpuriitkclass +india +relatedlinks +menow +gone +official +want +personal +well +normal +kind +born +andraised +small +loving +family +comprising +parents +elder +sister +nice +town +india +called +kanpur +came +states +fall +good +fortune +living +madison +moneymagazine +rated +livable +city +year +editormust +come +greenland +think +winter +livable +guess +shouldn +complain +fall +spring +madison +isawesome +summer +jose +california +working +almaden +research +centre +cannot +much +madison +summers +supposed +great +anyway +regret +great +time +area +whatever +money +magazine +says +think +northern +california +place +little +likes +love +music +take +look +collection +know +kind +movies +almost +genres +though +prefer +romance +comedy +comedy +shows +files +star +trek +reading +stuff +novels +philosophy +computers +surfing +sports +concerned +love +watching +cricket +india +playing +winning +tennis +figure +skating +gymnastics +semester +trying +learn +swimming +finally +bridge +nature +lover +enjoy +going +long +walks +hikes +cloudy +slightly +breezy +wish +could +jobs +television +travel +shows +youget +interesting +places +world +paid +musicstuffmovies +televisioninternettravelotherbookmarks +contact +meget +know +form +rest +contact +guestbookrahul +wisc +eduh diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..a40c51b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,98 @@ +rajesh +raman +home +page +olde +homme +page +offe +rajesh +raman +rajesh +raman +computer +sciences +department +west +dayton +street +madison +email +raman +wisc +telephone +ohio +wesleyan +university +majors +computer +science +mathematics +minor +music +current +first +year +graduate +student +university +winsonsin +madison +department +computer +science +personal +curriculum +vitae +postscript +specifications +current +courses +computer +architecture +saluja +system +performance +evaluation +modeling +livny +distributed +systems +information +current +work +team +member +condor +project +integral +part +committee +bookmarks +chimera +novelty +monster +chaos +subject +contradiction +prodigy +judge +things +feebleworm +earth +depositary +truth +cloaca +uncertainty +error +theglory +shame +universe +blaise +pascal +last +modified +rajesh +raman +raman +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..c1801767 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,88 @@ +karthik +pagekarthikeyan +ramasamyabouti +student +computer +sciences +department +university +wisconsin +research +interests +mostly +database +operating +systems +work +jeffrey +naughton +paradise +project +projectshacking +david +dewitt +jeffrey +naughton +connectivityparadise +parallel +database +wisconsin +pthread +wrapperspublicationsstorage +estimation +multidimensional +aggregates +presence +hierarchies +amit +shukla +prasad +deshpande +jeffrey +naughton +karthikeyan +ramasamy +international +conference +large +databases +mumbai +bombay +available +presentationsweb +picturearchitecture +alternatives +scalable +serversphoto +albumencounter +leafpersonal +inforesume +financemoney +wall +street +journal +personal +interestshacking +photographycontact +informationstreet +address +dayton +computer +sciences +department +madison +electronic +mail +addresskarthik +wisc +eduoffice +phone +number +comments +suggestionsplease +tell +think +home +page +might +improve diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..4ab90c66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,24 @@ +kelly +home +page +kelly +ratliffoffice +phone +email +kelly +wisc +edulast +login +office +hours +information +students +genealogy +page +interesting +places +visit +space +construction +stay +tuned diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..6795f54e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,64 @@ +viresh +ratnakar +page +viresh +ratnakar +research +assistant +computer +sciences +department +university +wisconsin +madison +research +area +digital +image +video +compression +advisor +miron +livny +main +interests +based +compression +vector +quantization +fractal +compression +quality +control +lossy +compression +production +mode +compression +publications +home +page +invoke +qclicauthor +available +qclic +images +invoke +qclicbrowse +available +things +revere +revealed +clicking +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +ratnakar +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..79f91480 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,97 @@ +monasterius +ominous +doominus +welcome +monasterius +ominous +doominus +brother +richard +without +beard +personal +haven +address +office +haven +address +office +haven +address +university +wisconsin +madison +computer +sciences +department +west +dayton +street +madison +office +number +rcarl +wisc +office +hours +thur +home +page +current +course +load +distributed +operating +systems +monday +wednesday +underwater +fire +prevention +saturday +advanced +computer +architecture +tuesday +thursday +math +introduction +whole +numbers +emphasis +number +sunday +subsurface +depository +engineering +grave +digging +monday +wednesday +friday +many +shades +brother +richard +professional +ominously +doomed +political +goofy +solitary +innebriated +vampiric +serious +nostalgic +funky +monk +fellows +order +ever +need +graphic +artist +desktop +publisher +check +best diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..181d0b15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,1961 @@ +thomas +reps +home +page +thomas +repsprofessorcomputer +sciences +departmentuniversity +wisconsin +madison +west +dayton +streetmadison +usae +mail +reps +wisc +telephone +secretary +department +cornell +university +curriculum +vitae +research +interests +program +slicing +differencing +merging +interprocedural +dataflow +analysis +alias +analysis +pointer +analysis +shape +analysis +language +based +program +development +environments +incremental +computing +attribute +grammars +also +thehome +page +wisconsin +program +slicing +project +contents +research +summary +categorized +index +publications +list +publications +visitors +post +doctoral +associates +students +research +summarymy +research +aimed +creating +tools +support +thedevelopment +complex +software +systems +objective +createtools +provide +powerful +language +specific +program +manipulationoperations +particular +work +explored +program +slicingcan +serve +basis +program +manipulation +operations +slice +program +respect +program +elementss +projection +program +includes +program +elements +thatmight +affect +either +directly +transitively +values +thevariables +used +members +slicing +allows +findsemantically +meaningful +decompositions +programs +thedecompositions +consist +elements +textually +contiguous +program +slicing +fundamental +operation +solvingmany +software +engineering +problems +instance +applicationsin +program +understanding +maintenance +debugging +testing +differencing +specialization +reuse +merging +projects +workers +carrying +aimed +atimproving +underlying +technology +program +slicing +relatedoperations +implementing +program +slicers +developing +methods +using +slicing +software +engineering +tools +andbuilding +slicing +based +program +manipulation +tools +clickherefor +home +page +wisconsin +program +slicing +project +recently +established +unexpected +connections +betweeninterprocedural +dataflow +analysis +previous +work +oninterprocedural +program +slicing +particular +showed +large +class +interproceduraldataflow +analysis +problems +solved +transformingthem +special +kind +graph +reachability +problem +graph +reachability +problem +solved +precisely +polynomial +timeby +algorithm +originally +developed +interprocedural +slicing +also +interested +subject +incremental +algorithms +incremental +algorithm +mean +algorithm +makes +solution +probleminstance +find +solution +nearby +problem +instance +categorized +index +publicationsprogram +slicing +differencing +merging +overview +icse +slicing +dagstuhl +slicing_patent +david +binkley +thesis +acta +toplas +also +pldi +chopping +fseb +differencing +differencing +yang +thesismerging +tosem +tosem +also +sigsoft +david +binkley +thesiswuu +yang +thesis +iwscm +toplas +also +popla +esop +iwsvcc +algebra +slices +applications +program +merging +also +esop +ccpsd +amast +npfo_submission +semantics +slicing +ccipl +poplb +applications +slicing +dagstuhl +pepma +implemented +slicing +system +prog_integration_system +prog_integration_manual +note +system +described +papers +handles +small +subsetof +pascal +system +distributed +license +obtain +clickinghere +currently +retargeting +implementation +handle +programs +andexpect +distribute +system +supports +slicing +chopping +anddifferencing +probably +integration +programs +miscellaneous +thesesdavid +binkley +thesiswuu +yang +thesisphil +pfeiffer +thesisinterprocedural +dataflow +analysisdemand +idfa +bottom +logic +programming +magic +sets +transformation +also +exhaustive +demand +idfa +graph +reachability +also +fsea +popl +diku +idfa +using +graph +reachability +tcs_ide_paper +also +fase +ptime +completeness +idfa +acta_paper +alias +analysis +pointer +analysis +shape +analysis +dagstuhl +also +popl +pepmb +pfeiffer_thesis +sigplan +language +based +program +development +environments +dagstuhl +synthesizer +generator +book +synthesizer +generator +manual +computer +also +lape +popl +reps +thesis +psde +toplas +also +popl +popl +compcon +cacm +also +also +incremental +computing +ramalingam +thesis +jalg_paper +dagstuhl +popl +popl +popl_notes +synthesizer +generator +book +acta +computer +also +lape +popl +reps +thesis +toplas +also +popl +popl +attribute +grammars +spaa +synthesizer +generator +book +synthesizer +generator +manual +acta +toplas +popl +reps +thesis +psde +popl +toplas +also +popl +popl +compcon +list +publicationsbooksreps +teitelbaum +synthesizer +generator +system +constructinglanguage +based +editors +springer +verlag +york +reps +teitelbaum +synthesizer +generator +reference +manual +third +edition +springer +verlag +york +chinese +reprint +published +world +publishing +corporation +beijing +china +reps +generating +language +based +environments +press +cambridge +journal +publicationssagiv +reps +horwitz +precise +interprocedural +dataflow +analysis +applications +toconstant +propagation +appear +theoretical +computer +science +reps +sequential +nature +interprocedural +program +analysis +problems +appear +acta +informatica +acta +ramalingam +reps +incremental +algorithm +generalization +shortest +path +problem +appear +journal +algorithms +j_alg +ramalingam +reps +computational +complexity +dynamic +graph +problems +theoretical +computer +science +binkley +horwitz +reps +program +integration +languages +procedure +calls +transactions +software +engineering +methodology +january +tosem +ramalingam +reps +competitive +line +algorithms +dynamic +priority +ordering +problem +information +processing +letters +yang +horwitz +reps +program +integration +algorithm +accommodates +semantics +preservingtransformations +transactions +software +engineering +methodology +july +reps +algebraic +properties +program +integration +science +computer +programming +horwitz +reps +efficient +comparison +program +slices +acta +informatica +horwitz +reps +binkley +interprocedural +slicing +using +dependence +graphs +transactions +programming +languages +systems +january +toplas +horwitz +prins +reps +integrating +interfering +versions +programs +transactions +programming +languages +systems +july +toplas +reps +incremental +evaluation +attribute +grammarswith +unrestricted +movement +tree +modifications +acta +informatica +reps +teitelbaum +language +processing +program +editors +ieee +computer +november +reps +demers +sublinear +space +evaluation +algorithms +attribute +grammars +transactions +programming +languages +systems +july +reps +teitelbaum +demers +incremental +context +dependent +analysis +language +based +editors +transactions +programming +languages +systems +july +teitelbaum +reps +cornell +program +synthesizer +syntax +directed +programming +environment +communications +september +invited +papershorwitz +reps +program +dependence +graphs +software +engineering +proceedings +fourteenth +international +conferenceon +software +engineering +melbourne +australia +york +icse +reps +horwitz +semantics +based +program +integration +proceedings +second +european +symposium +programming +nancy +france +march +lecture +notes +computer +science +ganzinger +springer +verlag +york +book +chaptersreps +demand +interprocedural +program +analysis +using +logic +databases +applications +logic +databases +ramakrishnan +kluwer +academic +publishers +boston +horwitz +reps +binkley +interprocedural +slicing +using +dependence +graphs +software +change +impact +analysis +bohner +arnold +ieee +computer +society +alamitos +appear +reprinted +fromacm +transactions +programming +languages +systems +january +toplas +horwitz +reps +binkley +interprocedural +slicing +using +dependence +graphs +software +merging +slicing +berzins +ieee +computer +society +alamitos +reprinted +fromacm +transactions +programming +languages +systems +january +toplas +horwitz +prins +reps +integrating +interfering +versions +programs +software +merging +slicing +berzins +ieee +computer +society +alamitos +reprinted +fromacm +transactions +programming +languages +systems +july +toplas +ramalingam +reps +theory +program +modifications +software +merging +slicing +berzins +ieee +computer +society +alamitos +reprinted +fromproceedings +colloquium +combining +paradigmsfor +software +development +brighton +april +lecture +notes +computer +science +abramsky +maibaum +springer +verlag +york +reps +teitelbaum +language +processing +program +editors +language +architectures +programming +environments +ichikawa +tsubotani +world +scientific +publishing +company +singapore +reprinted +ieee +computer +november +teitelbaum +reps +cornell +program +synthesizer +syntax +directed +programming +environment +interactive +programming +environments +barstow +sandewall +shrobe +mcgraw +hill +reprinted +communications +september +teitelbaum +reps +horwitz +wherefore +cornell +program +synthesizer +software +development +environments +wasserman +ieee +computer +society +washington +reprinted +proceedings +sigplan +sigoa +symposiumon +text +manipulation +portland +june +sigplan +notices +june +conference +publicationssiff +reps +program +generalization +software +reuse +appear +sigsoft +proceedings +fourth +sigsoftsymposium +foundations +software +engineering +francisco +october +sigsoft +software +engineering +notes +reps +turnidge +program +specialization +program +slicing +proceedings +dagstuhl +seminar +partial +evaluation +schloss +dagstuhl +wadern +germany +lecture +notes +computer +science +danvy +glueck +thiemann +springer +verlag +york +dagstuhl +sagiv +reps +wilhelm +solving +shape +analysis +problems +languages +destructive +updating +conference +record +twenty +third +symposiumon +principles +programming +languages +petersburg +york +popl +horwitz +reps +sagiv +demand +interprocedural +dataflow +analysis +sigsoft +proceedings +third +sigsoftsymposium +foundations +software +engineering +washington +october +sigsoft +software +engineering +notes +fsea +reps +rosay +precise +interprocedural +chopping +sigsoft +proceedings +third +sigsoftsymposium +foundations +software +engineering +washington +october +sigsoft +software +engineering +notes +fseb +reps +hentenryck +semantic +foundations +binding +time +analysis +imperative +programs +pepm +proceedings +sigplan +symposium +onpartial +evaluation +semantics +based +program +manipulation +jolla +california +june +york +pepma +reps +shape +analysis +generalized +path +problem +pepm +proceedings +sigplan +symposium +onpartial +evaluation +semantics +based +program +manipulation +jolla +california +june +york +pepmb +sagiv +reps +horwitz +precise +interprocedural +dataflow +analysis +applications +toconstant +propagation +proceedings +fase +colloquium +formalapproaches +software +engineering +aarhus +denmark +lecture +notes +computer +science +mosses +nielsen +schwartzbach +springer +verlag +york +tapsoft +reps +horwitz +sagiv +precise +interprocedural +dataflow +analysis +graph +reachability +conference +record +twenty +second +symposiumon +principles +programming +languages +francisco +popl +reps +horwitz +sagiv +rosay +speeding +slicing +sigsoft +proceedings +second +sigsoft +symposium +onthe +foundations +software +engineering +orleans +december +sigsoft +software +engineering +notes +december +reps +solving +demand +versions +interprocedural +analysis +problems +proceedings +fifth +international +conference +compilerconstruction +edinburgh +scotland +april +lecture +notes +computer +science +fritzson +springer +verlag +york +ramalingam +reps +incremental +algorithm +maintaining +dominator +tree +reducibleflowgraph +conference +record +twenty +first +symposiumon +principles +programming +languages +portland +popl +reps +scan +grammars +parallel +attribute +evaluation +data +parallelism +proceedings +fifth +symposium +parallel +algorithms +andarchitectures +velen +germany +june +july +spaa +ramalingam +reps +modification +algebras +proceedings +second +international +conference +onalgebraic +methodology +software +technology +amast +iowa +city +iowa +ramalingam +reps +theory +program +modifications +proceedings +colloquium +combining +paradigmsfor +software +development +brighton +april +lecture +notes +computer +science +abramsky +maibaum +springer +verlag +york +yang +horwitz +reps +program +integration +algorithm +accommodates +semantics +preserving +transformations +sigsoft +proceedings +fourth +sigsoft +symposiumon +software +development +environments +irvine +december +software +engineering +notes +december +reps +algebraic +properties +program +integration +proceedings +european +symposium +programming +copenhagen +denmark +lecture +notes +computer +science +jones +springer +verlag +york +reps +bricker +illustrating +interference +interfering +versions +programs +proceedings +second +international +workshop +softwareconfiguration +management +princeton +october +software +engineering +notes +november +horwitz +pfeiffer +reps +dependence +analysis +pointer +variables +proceedings +sigplan +conference +programming +languagedesign +implementation +portland +june +sigplan +notices +july +reps +yang +semantics +program +slicing +program +integration +proceedings +colloquium +current +issuesin +programming +languages +barcelona +spain +march +lecture +notes +computer +science +diaz +orejas +springer +verlag +york +horwitz +reps +binkley +interprocedural +slicing +using +dependence +graphs +proceedings +sigplan +conference +programminglanguage +design +implementation +atlanta +june +sigplan +notices +july +reps +horwitz +prins +support +integrating +program +variants +environment +forprogramming +large +proceedings +international +workshop +software +versionand +configuration +control +grassau +germany +berichte +german +chapter +winkler +teubner +stuttgart +germany +horwitz +prins +reps +integrating +interfering +versions +programs +conference +record +fifteenth +symposium +principles +ofprogramming +languages +diego +january +york +horwitz +prins +reps +adequacy +program +dependence +graphs +representing +programs +conference +record +fifteenth +symposium +principles +ofprogramming +languages +diego +january +york +poplb +reps +marceau +teitelbaum +remote +attribute +updating +language +based +editors +conference +record +thirteenth +symposium +principles +ofprogramming +languages +petersburg +january +york +reps +teitelbaum +synthesizer +generator +proceedings +sigsoft +sigplan +software +engineeringsymposium +practical +software +development +environments +pittsburgh +april +sigplan +notices +reps +alpern +interactive +proof +checking +conference +record +eleventh +symposium +onprinciples +programming +languages +salt +lake +city +utah +january +york +reps +static +semantic +analysis +language +based +editors +digest +papers +ieee +spring +compcon +francisco +march +ieee +computer +society +washington +reps +optimal +time +incremental +semantic +analysis +syntax +directed +editors +conference +record +ninth +symposium +principlesof +programming +languages +albuquerque +january +york +teitelbaum +reps +horwitz +wherefore +cornell +program +synthesizer +proceedings +sigplan +sigoa +symposium +text +manipulation +portland +june +sigplan +notices +june +demers +reps +teitelbaum +incremental +evaluation +attribute +grammars +application +tosyntax +directed +editors +conference +record +eighth +symposium +principlesof +programming +languages +williamsburg +january +york +softwarereps +bricker +rosay +wisconsin +program +integration +system +release +april +release +april +release +july +licensed +sites +click +herefor +license +information +patentsreps +horwitz +binkley +interprocedural +slicing +computer +programs +using +dependence +graphs +patent +number +november +pending +submissionsreps +sagiv +wilhelm +solving +shape +analysis +problems +languages +destructive +updating +computer +sciences +department +university +wisconsin +madison +august +submitted +journal +publication +horwitz +reps +sagiv +demand +interprocedural +dataflow +analysis +computer +sciences +department +university +wisconsin +madison +august +click +access +paper +click +access +latest +version +submitted +journal +publication +ramalingam +reps +programs +computer +sciences +department +university +wisconsin +madison +november +click +access +latest +version +submitted +journal +publication +publications +reportsreps +sagiv +wilhelm +shape +abstractions +shape +analyses +computer +sciences +department +university +wisconsin +madison +july +reps +sagiv +wilhelm +solving +shape +analysis +problems +languages +destructive +updating +computer +sciences +department +university +wisconsin +madison +july +leeuwen +mehlhorn +reps +incremental +computation +dynamic +algorithms +dagstuhl +seminar +report +international +conference +research +center +computer +science +ibfi +schloss +dagstuhl +wadern +germany +reps +sagiv +horwitz +interprocedural +dataflow +analysis +graph +reachability +datalogisk +institut +university +copenhagen +copenhagen +denmark +april +diku +reps +wisconsin +program +integration +system +reference +manual +release +computer +sciences +department +university +wisconsin +madison +july +manual +psramalingam +reps +categorized +bibliography +incremental +computation +conference +record +twentieth +symposiumon +principles +programming +languages +charleston +york +tutorial +paper +reps +incremental +computation +unpublished +tutorial +notes +presented +twentieth +symposium +principles +programming +languages +charleston +klint +reps +snelting +programming +environments +dagstuhl +seminar +report +international +conference +research +center +computer +science +ibfi +schloss +dagstuhl +wadern +germany +binkley +horwitz +reps +identifying +semantic +differences +programs +procedures +extendedabstract +computer +sciences +department +university +wisconsin +madison +september +ball +horwitz +reps +correctness +algorithm +reconstituting +program +dependence +graph +computer +sciences +department +university +wisconsin +madison +july +click +access +paper +ramalingam +reps +semantics +program +representation +graphs +computer +sciences +department +university +wisconsin +madison +december +click +access +paper +binkley +horwitz +reps +multi +procedure +equivalence +theorem +computer +sciences +department +university +wisconsin +madison +november +click +access +paper +reps +demonstration +prototype +tool +program +integration +computer +sciences +department +university +wisconsin +madison +january +click +access +paper +visitors +post +docs +studentsvisitors +mooly +sagiv +israel +scientific +center +jiazhen +robert +paige +post +doctoral +associates +yang +univ +wisconsin +currently +associate +professor +department +computer +information +science +national +chiao +tung +university +taiwan +prins +cornell +univ +currently +associate +professor +department +computer +science +university +north +carolina +chapel +hill +studentsramalingam +bounded +incremental +computation +lecture +notes +computer +science +springer +verlag +york +binkley +multi +procedure +program +integration +dissertation +tech +computer +sciences +department +university +wisconsin +madison +august +click +access +paper +pfeiffer +dependence +based +representations +programs +reference +variables +dissertation +tech +computer +sciences +department +university +wisconsin +madison +august +click +access +paper +yang +algorithm +semantics +based +programintegration +dissertation +tech +computer +sciences +department +university +wisconsin +madison +august +click +access +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..45c8a794 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,94 @@ +saeed +home +pagespeed +function +statusclock +window +status +date +window +settimeout +statusclock +speed +function +clearid +window +cleartimeout +saeed +mirza +tech +department +computer +science +engineering +indian +institute +technology +kanpur +graduate +student +univ +wisconsin +madison +computer +science +department +home +lucknow +india +like +spend +time +listning +indian +film +songs +netsurfing +reading +comics +hero +calvin +love +people +beautiful +seem +beautiful +love +contact +home +office +randall +apartment +madison +comp +department +univ +wisconsin +madison +dayton +street +madison +contact +best +email +saeed +wisc +friends +contact +right +pictures +wismad +suggestions +send +check +guestbook +page +accessed +times +since +last +updated +saeed +copy +rights +reserved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..4114cf9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,38 @@ +sally +peterson +home +page +sally +goodwin +peterson +lecturercomputer +sciences +university +wisconsin +dayton +madison +mail +sally +wisc +edutelephone +interests +desktop +computing +real +time +operating +systems +programming +languages +courses +taught +fall +comp +lectures +using +computers +last +changed +september +sally +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..896c42c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,179 @@ +amit +home +page +warning +reaching +protocol +office +home +email +amit +wisc +snail +mail +computer +sciences +dept +university +wisconsin +madison +princeton +madison +phone +education +working +towards +guidance +jeff +naughton +master +science +computer +sciences +university +wisconsin +madison +bachelor +technology +computer +sciences +engineering +indian +institute +technology +madras +research +interests +online +analytical +processing +query +processing +performance +evaluation +publications +storage +estimation +multidimensional +aggregates +presence +hierarchies +amit +shukla +prasad +deshpande +jeffrey +naughton +karthikeyan +ramasamy +international +conference +large +databases +mumbai +bombay +paper +postscript +slides +presentation +vldb +interesting +links +madison +running +boston +marathon +chicago +marathon +madison +marathon +york +marathon +seattle +marathon +george +marathon +utah +database +related +pointers +research +network +active +databases +bibliography +server +databases +logic +programming +competitive +profiles +spec +ideas +international +database +conferences +sorted +name +university +trier +database +conferences +sorted +date +sigmod +server +large +data +bases +vldb +endowment +database +article +archive +massive +digital +data +systems +mdds +initiative +multimedia +information +sources +national +industrial +information +infrastructure +protocols +niiip +consortium +transaction +processing +council +transcoop +transaction +management +support +cooperative +applications +olap +page +olap +introduction +pilot +software +interested +helping +educate +needy +children +look +asha +home +page +personal +pageand +bookmarksare +also +online +garfield diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..8c95735b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,41 @@ +ashwin +home +page +page +construction +name +ashwin +graduate +student +department +computer +science +university +wisconsin +madison +come +india +hadmy +undergraduate +education +indianinstitute +technology +bombay +department +iitb +fantastic +place +worth +visiting +like +contact +canfinger +meto +find +whereabouts +alternatively +send +email +sashwin +wisc +bookmarks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..c2570825 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,228 @@ +subramanya +sastry +home +pagei +subramanya +sastry +mugshot +mine +come +hospet +town +karnataka +india +years +schooling +near +hospet +away +hampi +ruins +vijayanagara +empire +also +away +tungabhadra +built +across +tungabhadra +river +places +beautiful +unfortunately +dont +photographs +places +would +scanned +photo +album +long +undergraduate +indian +institute +technology +kanpur +department +computer +science +engineering +years +wonderful +photo +album +fewphotographs +time +gokul +also +maintaining +photo +album +contains +many +photos +iitk +iitk +class +homepage +information +classmatesat +iitk +presentcurrently +graduate +student +computer +sciences +department +university +wisconsin +madison +plan +courses +registeredfor +spring +interestsmy +academic +interests +fields +architecture +programming +languages +compilers +hope +graduate +fields +cricket +favourite +sports +used +playphatta +iitk +tennis +ball +version +cricket +thati +champ +anything +provided +entertainmentin +company +friends +bookmarks +links +cricket +sites +enjoy +listening +music +anything +pleasant +must +dont +consider +hard +rock +metal +something +really +pleasant +donot +watch +much +whatever +watched +liked +seinfeld +sshow +much +reading +goes +voracious +reader +rather +whati +like +better +like +like +horror +fantasy +neither +like +science +fiction +unsuccesfully +tried +come +grips +like +something +earth +know +mean +something +hint +romance +ifpossible +jeffrey +archer +favourite +author +date +also +liked +jane +austen +pride +prejudice +much +talking +reading +ramesh +mahadeven +sarticles +make +interesting +reading +also +like +playing +bridge +back +kanpur +picked +wonderfulgame +company +friends +long +time +back +itagain +also +like +solving +crosswords +cryptic +kind +inthe +past +year +devoted +much +time +hobby +mine +last +updated +january +send +comments +suggestions +sastry +wisc +eduuniversity +wisconsin +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..afbc6236 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,347 @@ +susan +calcari +home +page +susan +calcarimanager +scout +servicescomputer +sciences +departmentuniversity +wisconsin +madisonscal +wisc +edumy +title +internet +scout +manage +scout +services +computer +sciences +department +university +wisconsin +madison +scoutservices +project +internicand +supported +national +sciencefoundation +scout +services +supports +internet +users +thehigher +education +community +providing +timely +information +bestresources +tools +internet +goal +help +researchers +andeducators +internet +effectively +work +week +filter +hundreds +items +internet +edit +organize +important +present +information +multiple +usable +formats +services +include +scout +report +scout +toolkit +happenings +coming +soonthe +scout +sprouts +report +newsletter +written +kids +kids +people +receive +scout +report +week +email +andthousands +read +scout +report +annotatedlisting +best +newly +discovered +internet +resources +tools +publication +kind +devoted +research +andeducation +community +scout +report +selective +itemsincluded +issue +people +receive +happeningspostings +every +weekday +thousands +read +orthe +newsgroup +scout +services +page +moreinformation +services +professional +background +involved +nation +wide +internet +projects +since +wheni +joined +merit +arbor +organization +managed +thensfnet +backbone +project +worked +informationservices +division +project +spent +three +years +speaking +tonational +international +higher +education +audiences +internetand +resources +also +developed +produced +merit +network +seminarseries +first +national +seminar +series +focused +needs +internetend +users +later +became +director +network +information +services +forcerfnet +respected +internet +provider +based +diego +wrote +proposal +resulted +award +part +internicproject +year +cooperative +agreement +national +sciencefoundation +third +year +agreement +terminated +theportion +internic +based +diego +chose +continue +workof +internet +scout +time +elected +relocate +wisconsin +andrequested +approve +relocation +scout +project +thecomputer +sciences +department +madison +heartilyagreed +future +plans +scout +servicesat +time +scout +services +staff +includes +jack +solock +speciallibrarian +project +expand +theaddition +including +open +systemadministrator +position +open +speciallibrarian +position +june +matthew +livesey +join +staff +aproject +assistant +goals +staff +include +expansion +thescout +toolkit +addition +discipline +specific +services +asscout +reports +pages +devoted +specific +areas +study +scout +services +also +branch +researchareas +collaboration +internic +support +services +groups +madison +campuses +potential +research +topics +includenetwork +information +discovery +retrieval +nidr +anddiscipline +oriented +information +gathering +publication +depending +onthe +nature +research +additional +staff +hired +willincludecomputer +science +researchers +graduate +undergraduate +levels +information +visit +site +find +scout +services +join +ofour +mailing +lists +ifyou +interested +applying +open +positions +theonline +descriptions +systemadministrator +special +librarian +send +aresume +writing +sample +address +feel +free +contactme +telephone +email +susan +calcariinternet +scout +computer +sciences +departmentuniversity +wisconsin +madison +dayton +street +madison +scal +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..d8d8d855 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,64 @@ +chandrasekar +home +page +welcome +chandrasekar +homepage +worry +happy +present +graduate +student +department +computer +science +past +born +june +coimbatore +southern +state +tamilnadu +inindia +high +school +education +higher +secondary +school +coimbatore +undergraduate +education +theindian +institute +technology +kharagpur +majoring +dept +computer +science +engineering +personal +stuff +residence +kendall +avenue +madison +officedept +computer +science +dayton +madison +sivasankaran +chandrasekar +schandra +wisc +last +updated +finger +find +whereabouts +page +accessed +times +since diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..646225e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,36 @@ +eric +schnarr +home +pageeric +schnarr +schnarr +wisc +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usaphone +advisor +larusresearch +interests +architecture +description +languagesfunctional +language +designinteresting +links +wisconsin +wind +tunnel +sacm +hockey +club +dragon +bytes +schnarr +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..c8b49551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,91 @@ +yannis +schoinas +home +page +yannis +schoinas +schoinas +wisc +research +assistantdepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +note +page +construction +advisor +mark +hill +research +interests +parallel +systemspublications +fine +grain +access +control +distributed +shared +memory +ioannis +schoinas +babak +falsafi +alvin +lebeck +steven +reinhardt +james +larus +david +wood +sixth +international +conference +architectural +support +programminglanguages +operating +systems +asplos +application +specific +protocols +user +level +shared +memory +babak +falsafi +alvin +lebeck +steven +reinhardt +ioannis +schoinas +mark +hill +james +larus +anne +rogers +david +wood +supercomputing +education +university +crete +iraklio +university +crete +iraklio +last +updated +july +cretan +cooking diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..8812ac8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,47 @@ +beverly +seavey +home +page +beverly +seavey +currently +registered +grad +student +computer +science +switch +special +degree +computational +biology +computational +interests +mine +include +asian +classical +dance +different +versions +ramayana +india +southeast +asia +dance +dramas +ramayana +ramakien +india +southeast +asia +wish +could +finger +give +account +hairballs +keyboards +finger +keeper +instead diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..0b397038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,79 @@ +scott +colville +home +pagescott +colville +home +pagee +mail +wisc +eduoffice +address +computer +science +office +dayton +madison +home +frances +madison +welcome +welcome +home +page +well +school +back +university +wisconsin +madison +seen +largest +ball +cheese +want +school +links +uwisc +home +page +uwisc +home +pagein +addition +list +pages +find +interest +hopefully +also +enjoy +beer +world +drinking +games +absolut +adds +caffeine +ratings +soda +guide +lock +pickingand +educational +artsy +pages +world +fact +book +constitution +english +dictionary +roget +thesaurus +poetry +database +wisc +last +updated +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..fb93b6cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,74 @@ +steve +seitz +animation +writeup +image +motion +analysis +character +animation +control +steve +seitz +chuck +dyerour +research +motivated +problem +teachinga +graphical +model +perform +realistic +motion +problem +hasits +roots +cartoon +animation +modern +applications +tocomputer +animation +virtual +reality +teleconferencing +robotics +task +endow +graphical +model +knowledge +performa +repertoire +interesting +motions +learned +motions +beinvoked +directly +high +level +cues +smile +walk +inferred +anabstract +goal +store +cued +levelevents +virtual +input +device +image +sequence +projects +include +analysis +periodic +motion +tracking +rigid +nonrigid +objects diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..4b271283 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,132 @@ +steve +seitz +view +interpolations +view +synthesis +image +interpolation +investigators +steve +seitz +chuck +dyerwe +devised +provably +correct +automated +technique +creating +views +scene +basis +views +scene +technique +relies +geometric +image +interpolation +known +morphing +graphicscommunity +produce +intermediate +images +although +morphing +techniquescurrently +enjoy +widespread +theoretical +validityhas +established +particular +interpolation +viewsof +scene +produce +sequence +physically +valid +views +ofthat +scene +surprisingly +answer +providing +images +first +undergo +simplerectification +procedure +certain +assumptions +visibility +theprojection +process +satisfied +view +synthesis +work +described +uses +automated +stereo +techniques +todetermine +image +correspondences +recent +work +view +morphing +considers +user +interaction +used +guide +interpolation +computed +interpolations +three +different +image +pairs +therectified +original +images +shown +left +right +click +theinterpolated +images +center +mpeg +movies +showing +computedinterpolations +view +interpolations +original +interpolated +original +work +described +physically +valid +view +synthesis +image +interpolation +seitz +dyer +proc +workshop +representation +visual +scenes +last +changed +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..30c4657e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,158 @@ +view +morphing +steve +seitz +view +morphing +investigators +steve +seitz +chuck +dyer +related +publications +view +morphing +appear +siggraph +toward +image +based +scene +representation +using +view +morphing +appear +icpr +image +morphing +techniques +generate +compelling +transitions +betweenimages +however +differences +object +pose +viewpoint +often +causeunnatural +distortions +image +morphs +difficult +correct +manually +using +basic +principles +projectivegeometry +paper +introduces +simple +extension +image +morphingthat +correctly +handles +projective +camera +scene +transformations +technique +called +view +morphing +works +prewarping +imagesprior +computing +morph +postwarping +interpolated +images +knowledge +shape +required +technique +appliedto +photographs +drawings +well +rendered +scenes +ability +synthesize +changes +viewpoint +image +structureaffords +wide +variety +interesting +effects +simple +imagetransformations +view +morphing +images +object +taken +different +viewpoints +produces +illusion +physically +moving +virtual +camera +click +mpeg +movie +interpolation +morph +views +different +faces +produces +simultaneous +interpolation +facial +shape +color +pose +click +mpeg +movie +interpolation +mpeg +movies +view +morphs +resolutions +mona +lisa +mona +lisa +reflection +high +resolution +mpeg +movie +frames +resolution +mpeg +movie +frames +click +mpeg +movie +jude +shavlik +chuck +dyer +last +changed +september diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..ba4f234b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,196 @@ +periodic +motion +information +periodic +cyclic +motion +analysis +steve +seitz +chuck +dyermany +real +life +motions +periodic +frame +reference +instance +human +locomotory +motions +walking +running +skipping +shuffling +areperiodic +frame +reference +moves +person +havedeveloped +approach +determining +image +sequence +could +beenproduced +object +whose +motion +periodic +reference +frame +unlike +previous +attempts +determine +periodicity +information +ourapproach +allows +camera +move +filming +information +poscript +paper +click +period +tracethis +image +shows +period +trace +lines +recovered +imagesequence +phonograph +turntable +ramps +correspond +moments +timewhere +turntable +momentarily +slowed +period +trace +shownsuperimposed +error +surface +recovered +real +repeating +motions +tend +perfectly +even +period +variesslightly +cycle +next +physically +important +changesin +scene +generalization +period +defined +cyclic +motionsthat +makes +periodic +variation +explicit +representation +called +period +trace +compact +purely +temporal +describing +evolutionof +object +scene +without +reference +spatial +quantities +asposition +velocity +delimiting +cycles +identifying +correspondencesacross +cycles +period +trace +provides +means +temporally +parsinga +cyclic +motion +addition +several +purely +temporal +motion +features +canbe +derived +relating +nature +location +irregularities +period +tracecan +also +used +medical +image +enhancement +compositing +images +fromdifferent +cycles +furthermore +period +trace +reliably +recoveredfrom +image +sequences +view +invariant +fashion +using +theory +affine +invariance +information +poscript +paper +clickhere +heart +image +enhancement +corresponding +angiographic +images +determined +period +trace +bottom +composite +corresponding +images +note +additionalstructure +visible +composite +apparent +single +images diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..a7586dcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,62 @@ +steve +seitz +home +page +steve +seitz +seitz +wisc +graduate +student +berkeley +math +areas +interest +image +motion +analysis +image +based +rendering +machine +vision +computer +graphics +research +projects +view +morphing +view +synthesis +mpeg +movie +showing +computed +interpolation +images +left +click +examples +mpeg +movies +analysis +cyclic +motion +recent +publications +stuff +frequented +links +wisconsin +computer +vision +group +surreal +caching +click +closer +look +seitz +last +changed +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..dc0f7acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,199 @@ +home +page +madison +machine +learning +research +group +home +page +contains +relevant +information +themembers +machine +learning +research +group +mlrg +university +wisconsin +madison +table +contents +group +members +mlrg +archive +recent +papers +mlrg +archive +datasets +domain +theories +mlrg +paper +reading +schedule +seminar +machine +learning +graduate +course +relevant +local +links +useful +external +links +group +members +carolyn +allex +jonathon +bodner +kevin +cherkauer +mark +craven +tina +eliassi +richard +maclin +graduated +august +david +opitz +graduated +august +jude +shavlik +mlrg +archive +recent +papersvisit +page +describing +recent +publications +ascii +file +containing +list +recentabstractsis +also +available +mlrg +archive +datasets +domain +theoriesyou +access +directory +contains +severalml +testbeds +also +access +wisconsin +breast +cancer +database +prof +olvi +mangasarian +sgroup +mlrg +paper +reading +schedule +mlrg +current +schedule +papers +read +schedules +also +line +seminar +current +schedule +local +seminar +relevant +local +links +machine +learning +graduate +course +madison +machine +learning +math +programming +group +computational +biology +madison +dept +madison +computational +neuroscience +madison +group +madison +computer +vision +group +madison +robotics +group +madison +dept +home +page +madison +home +page +doit +madison +center +mathematical +sciences +gopher +madison +graduate +school +madison +library +recent +table +contents +abstracts +selected +journals +mostly +wendt +library +readable +wisc +procs +workshop +agents +learn +agents +held +international +machine +learning +conference +local +links +last +modified +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..ceccfd2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,130 @@ +shubu +mukherjee +home +page +shubu +mukherjee +shubu +wisc +fiancee +mimi +nephew +avirup +months +graduate +research +assistant +computer +sciences +departmentuniversity +wisconsin +madison +west +dayton +street +madison +usaphone +shubhendu +mukherjee +click +button +jump +corresponding +home +page +article +advisor +mark +hill +research +project +wisconsin +wind +tunnel +publications +wisconsin +computer +architects +world +wide +computer +architects +badger +ballroom +dance +team +personal +interests +hobbies +morph +dionisios +courtesy +steve +seitz +random +interesting +linkseducationph +university +wisconsin +madison +spring +expected +university +wisconsin +madison +tech +indian +institute +technology +kanpur +india +research +summary +coherent +network +interfaces +dissertation +cachable +queues +design +space +isca +progress +distributed +shared +memory +mechanisms +cooperative +shared +memory +isca +commodity +workstations +submitted +publication +cache +coherence +protocols +custom +protocols +irregular +applications +ppopp +gray +software +dirsw +isca +ppopp +parallel +simulation +wind +tunnel +tutorial +cache +simulation +copyright +copy +shubu +mukherjee +rights +reserved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..d9120ecb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,27 @@ +michael +siff +home +page +michael +siff +philosophy +research +academic +interests +running +club +fall +midwest +seminar +wonderful +ways +waste +time +news +information +resources +books +movies +television +sports +humor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..d0550c32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,31 @@ +skrentny +home +page +skrentny +lecturercs +coordinatorgraduate +studentcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +office +computer +sciencesemail +skrentny +wisc +edutelephone +related +links +university +wisconsin +department +university +wisconsin +groupskrentny +wisc +last +updated +february diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..758dd2e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,79 @@ +bryan +home +page +bryan +graduate +studentcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +office +mail +wisc +edutelephone +computer +sciences +university +wisconsin +madison +computer +science +purdue +university +interests +intelligent +help +systems +human +computer +interaction +knowledge +representation +operating +system +activities +selected +recent +publications +travis +step +toward +intelligent +unix +help +system +knowledge +representation +unix +utilities +technical +report +university +wisconsin +madison +april +miller +fredriksen +empirical +study +reliability +unix +utilities +communications +related +links +university +wisconsin +department +university +wisconsin +group +professor +larry +travis +advisor +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..fdb93e64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,176 @@ +larry +travis +home +page +larry +travisprofessorcomputer +sciences +departmentuniversity +wisconsin +dayton +madison +mail +travis +wisc +edutelephone +university +californa +angeles +interests +expert +systems +procedural +control +automatic +deduction +computer +support +understanding +complex +data +philosophical +foundations +ofartificial +intelligence +computing +management +social +implications +computing +research +summary +research +centers +around +using +logic +basis +knowledge +formalization +expert +systems +deductive +augmentation +databasesystems +recent +work +focused +procedural +control +automaticdeduction +design +systems +support +contruction +display +testing +high +level +abstractions +patterns +formed +informationcontained +large +heterogeneous +databases +special +attention +beingdevoted +representation +geographic +information +waysthat +enhance +data +integration +data +visualizability +maps +actively +involved +several +expert +system +development +projects +andwith +large +intelligent +database +project +incorporates +databases +modeling +visualization +aids +single +integrated +system +organizational +social +issues +associated +introduction +information +technology +analysis +suppositionsunderlying +alternative +approaches +artificial +intelligence +current +students +chuck +ohare +bryan +scott +swanson +andy +whitsitt +derek +zahn +recent +publications +could +failures +expert +system +development +implementation +oravec +appear +journal +systems +software +computational +metaphor +artificial +intelligence +reflexive +examination +falsework +west +artificial +intelligence +magazine +society +landscape +alternative +metaphors +artificial +intelligence +west +artificial +intelligence +magazine +interesting +links +wisc +dept +wisc +group +last +changed +june +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..e2ced256 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,86 @@ +avinash +sodani +home +page +avinash +sodani +sodani +wisc +graduate +student +department +computer +sciences +university +wisconsin +madison +west +dayton +street +madison +education +computer +science +university +wisconsin +madison +tech +hons +computer +science +indian +institute +technology +kharagpur +india +july +academic +interests +computer +architecture +multiscalar +kestrel +project +programming +languages +compilers +interesting +course +projects +package +java +download +package +meet +batch +mates +related +links +info +center +departments +ranks +departments +india +related +links +india +news +hindu +online +edition +random +links +looking +kgpite +follow +toll +free +directory +wills +world +cricket +page +accessed +times +since diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..6d033e07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,522 @@ +guri +sohi +home +page +gurindar +sohi +sohi +wisc +associate +professor +computer +sciences +andelectrical +computer +engineering +addresses +education +research +interests +summary +current +graduate +students +recent +talks +recent +publications +recent +graduatesaddresses +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usasohi +wisc +eduphone +departmental +office +education +computer +science +university +illinois +urbana +electical +engineering +university +illinois +urbana +electrical +electronics +engineering +birla +institute +technology +science +pilani +india +research +interests +instruction +level +parallel +processing +compiling +architectures +shared +memory +multiprocessors +memory +systems +research +summary +current +research +focuses +design +thehighest +performance +uniprocessors +current +generation +currently +investigating +architecture +circa +processor +plenty +transistors +availableon +chip +challenge +resources +getthe +highest +possible +performance +executing +sequential +program +target +sustain +execution +ofover +instructions +cycle +ordinary +numeric +application +programs +research +group +investigating +several +issues +needto +resolved +goals +achieved +studying +characterizing +thenature +instruction +level +parallelism +numericapplication +programs +order +understand +available +parallelism +andhow +could +exploited +bulk +group +research +effort +expended +continuing +thedevelopment +multiscalar +processing +model +novel +paradigm +exploiting +currently +developing +multiscalar +compiler +andcarrying +detailed +simulation +studies +assessthe +potential +multiscalar +concept +current +graduate +students +todd +austin +scott +breach +andreas +moshovos +vijaykumarrecent +talkswill +instruction +sets +important +future +processors +given +risc +symposium +held +watson +researchcenter +yorktown +heights +november +file +compressed +postscript +generated +framemaker +multiscalar +processors +generic +multiscalar +talk +given +several +places +file +compressed +postscript +generated +framemaker +recent +publicationshigh +bandwidth +address +translation +multiple +issue +processors +austin +sohi +appear +inrd +annual +international +symposium +computer +architecture +appendix +ofdetailed +resultsis +also +available +zero +cycle +loads +microarchitecture +support +reducing +load +latencyt +austin +sohi +annual +international +symposium +microarchitecture +micro +microarchitecture +superscalar +processorsj +smith +sohi +proceedings +ieee +december +hardware +mechanism +dynamic +reordering +memory +referencesm +franklin +sohi +appear +ieee +transactions +computers +multiscalar +processors +sohi +breach +vijaykumar +international +symposium +computer +architecture +streamlining +data +cache +access +fast +address +calculation +austin +pnevmatikatos +sohi +international +symposium +computer +architecture +anatomy +register +file +multiscalar +processor +breach +vijaykumar +sohi +annual +international +symposium +microarchitecture +micro +request +combining +multiprocessors +arbitrary +interconnection +networks +lebeck +sohi +ieee +transactions +parallel +distributed +systems +efficient +detection +pointer +array +access +errors +austin +breach +sohi +sigplan +conference +programming +language +design +implementation +guarded +execution +branch +prediction +dynamic +processors +pnevmatikatos +sohi +international +symposium +computer +architecture +memory +systems +goodman +sohi +handbook +electrical +engineering +press +control +flow +prediction +dynamic +processors +pnevmatikatos +franklin +sohi +annual +international +symposium +microarchitecture +micro +register +traffic +analysis +streamlining +inter +operation +communicationin +fine +grain +parallel +processors +franklin +sohi +annual +international +symposium +microarchitecture +micro +expandable +split +window +paradigm +exploiting +fine +grain +parallelism +franklin +sohi +international +symposium +computer +architecture +dynamic +dependency +analysis +ordinary +programs +austin +sohi +international +symposium +computer +architecture +efficient +detection +pointer +array +access +errorst +austin +breach +sohi +technical +report +computer +sciences +department +university +wisconsin +madison +december +guarded +execution +branch +prediction +dynamic +processorsd +pnevmatikatos +sohi +technical +report +computer +sciences +department +university +wisconsin +madison +november +knapsack +zero +cycle +memory +hierarchy +componentt +austin +vijaykumar +sohi +technical +report +computer +sciences +department +university +wisconsin +madison +november +tetra +evaluation +serial +program +performance +fine +grain +parallel +processorst +austin +sohi +technical +report +computer +sciences +department +university +wisconsin +madison +july +recent +gradstodd +austin +april +hardware +software +mechanisms +reducing +load +latencydionisios +pnevmatikatos +december +incorporating +guarded +execution +existing +instruction +setsmanoj +franklin +december +multiscalar +architecturemark +friedman +january +architectural +characterization +prolog +executionsriram +vajapeyam +december +instruction +level +characterization +cray +processormen +chow +chiang +september +memory +system +design +based +multiprocessors +last +updated +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..eca554a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,183 @@ +solomon +home +page +marvin +solomon +professor +former +chair +chair +goodman +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +phone +solomon +wisc +research +interests +object +oriented +database +systems +software +development +support +environments +distributed +operating +systems +computer +networks +design +implementation +programming +languages +programming +language +theory +recent +publicationstowards +effective +efficient +free +space +management +proc +sigmod +conf +management +data +june +mark +mcauliffe +michael +carey +andmarvin +solomon +abstractpostscriptthe +gmap +versatile +tool +physical +data +independence +proc +conf +large +databases +september +odysseas +tsatalos +marvin +solomon +andyannis +ioannidis +abstractpostscriptexpanded +version +appears +inthe +vldb +journalv +april +abstractpostscriptshoring +persistent +applications +proc +sigmod +conf +management +data +june +michael +carey +david +dewitt +michael +franklin +nancy +hall +mark +mcauliffe +jeffrey +naughton +daniel +schuh +marvin +solomon +odysseas +tsatalos +seth +white +andmichael +zwillingavailable +astech +report +overview +capitl +software +development +environment +fourth +international +workshop +software +configuration +management +paul +adams +andmarvin +solomon +available +astech +report +updated +version +appear +lecture +notes +computer +science +persistent +objects +logic +paul +adams +andmarvin +solomon +available +astech +report +points +interest +graphical +interface +rooms +program +built +using +java +home +page +spring +univ +wisconsin +computer +sciences +department +shore +project +home +page +shore +project +photoalbum +today +dilbert diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..1d71baa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,10 @@ +sowmya +home +page +welcome +home +page +sowmya +subramanian +sowmya +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..a699f627 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,91 @@ +shilpa +lawande +home +page +welcome +shilpa +lawande +home +pagei +graduate +student +department +computer +science +university +wisconsin +madison +personal +stuffa +link +pastfor +info +schoolhere +resume +html +ascii +computer +science +second +love +useful +resourses +stuff +wanted +place +systers +women +computer +science +stuff +related +madisonsurf +madisonstate +wisconsin +information +serverthe +hoofer +sailing +clubowls +music +books +movies +java +shilpa +signatures +meet +first +lovesnowy +homepage +cool +linkshere +iswhere +fingered +three +judge +panel +philadelphia +votes +constitutional +follow +link +read +decision +page +accessed +times +since +june +send +comments +suggestions +email +tossl +wisc +shilpal +wisc +thru +guest +formlast +modified +july diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..c41d7cd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,49 @@ +jeremy +stenglein +home +page +jeremy +stenglein +graduate +student +computer +sciences +department +university +wisconsin +madison +west +dayton +street +madison +office +computer +science +phone +mail +stenglei +wisc +teaching +computer +science +section +general +home +page +section +home +page +taking +construction +compilers +links +computer +science +department +home +pagethe +simpson +home +pageespn +sports +hotwired diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..faa65cbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,61 @@ +steve +reinhardt +home +page +steven +reinhardt +graduate +student +computerarchitecture +working +wisconsin +wind +tunnelgroup +advisor +david +wood +although +project +mark +hill +andjim +larus +often +feel +free +tell +mewhat +well +planning +finishing +fall +joining +faculty +ofthe +university +michigan +eecs +department +january +interested +find +pages +publicationsresearch +summary +email +stever +wisc +click +finger +phone +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +usalast +updated +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..632c221e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,74 @@ +john +strikwerda +home +page +john +strikwerda +professor +computer +sciences +john +strikwerdadepartment +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +wisconsin +email +strik +wisc +telephone +fall +teaching +problem +solving +using +computers +beginning +january +assignment +national +science +foundation +years +click +information +numerical +analysis +qualifying +exams +research +interests +numerical +analysis +computational +fluid +dynamicsmyofficial +department +home +pageother +stuff +field +museum +point +searching +rating +home +pages +information +chicago +best +chicago +tribune +talk +radio +show +cars +footballmy +kids +nathan +nathan +drew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..832477ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +subba +home +page +things +enjoy +calvin +hobbes +late +show +david +letterman +seinfeld +interests +prooocessors +historical +interest +papers +evaluating +stream +buffers +secondary +cache +replacement +decoupling +integer +execution +superscalar +processors +subbarao +cambridge +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..b12b4111 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,93 @@ +chiang +home +page +chiang +department +university +wisconsin +madisonoffice +stelephone +mail +suhui +wisc +educlick +send +emailoffice +hours +thurs +page +still +construction +taing +fall +publications +application +characteristics +limited +preemption +completion +parallel +processor +scheduling +policies +rajesh +mansharamani +mary +vernon +proc +sigmetrics +conf +measurement +modeling +computer +systems +nashville +dynamic +static +quantum +based +parallel +processor +allocation +mary +vernon +workshop +scheduling +strategies +parallel +processing +conjunction +ipps +april +search +engines +yahoo +sources +resources +bibliographies +world +wide +virtual +library +subject +catalogue +links +related +taiwan +taiwan +network +service +sinanet +shopping +magzines +news +jobs +calendar +seednet +vistors +guide +taiwan +academia +sinica +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..af025153 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,253 @@ +david +sundaram +stukel +homepage +david +sundaram +stukel +page +upon +effrontery +push +hand +severing +patient +femoral +artery +blood +spurted +blinded +anesthetist +halls +screaming +browbeck +tried +knee +groin +managed +hamstring +scalpel +crawled +floor +stabbing +feet +legs +voilet +baboon +assistant +woman +ever +cared +damn +really +wigged +climbed +table +poise +jump +browbeck +feet +stomp +cops +rushed +william +burroughs +naked +lunch +constructed +page +catapult +readers +pages +choosing +page +index +brief +classes +taking +class +links +relating +computer +science +site +dedicated +smart +clothing +also +steve +mann +page +links +view +currently +seeing +wearable +camera +site +links +information +artificial +life +santa +institute +specifically +link +project +called +tierra +thomas +recent +dilbert +strips +technically +computer +science +math +jokes +somewhere +links +philosophic +scientific +artistic +nature +physics +consciousness +survival +research +laboratories +site +info +various +destructive +shows +organization +arcosanti +arcological +site +outside +phoenix +krishnamurti +foundation +site +tells +century +philosopher +link +various +beat +writers +including +pictured +site +filled +info +waits +links +various +news +sources +packer +news +scientist +online +register +harass +mail +rewarded +dozens +interesting +factoids +astound +friends +scientific +american +online +takes +advantage +hypertext +addition +providing +selection +current +articles +print +edition +science +news +published +weekly +contains +smaller +current +articles +hindu +national +newspaper +india +onion +links +local +interests +madison +astronomy +department +page +specifically +washburn +observatory +public +view +universe +lens +insignificant +piece +history +astronomy +madison +madison +weather +info +obtained +following +sites +webweather +national +weather +service +madison +channel +news +weather +channel +news +weather +links +interest +home +page +late +timothy +leary +links +numerous +articles +written +noted +optimist +noam +chomsky +disinformation +great +listing +conspiracy +theories +buried +within +ultra +trendy +movie +reviews +back +madison +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..35759996 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,11 @@ +brian +swander +home +pagebrian +swander +think +office +office +hours +bookmarks +marks diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..08044ed9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,139 @@ +ariel +tamchesari +tamches +research +assistantemail +tamches +wisc +ariel +tamches +computer +sciences +department +west +dayton +street +madison +typical +pose +angry +posei +organizing +fall +operating +systems +computer +science +college +park +office +sresearch +paradyn +parallel +performance +toolsstatus +searching +thesis +topic +else +interests +parallel +performance +toolsparallel +distributed +operating +systemsbluesthe +simpsonsseinfeldskiingskinetkeystone +favorite +area +snowboarder +jokes +difference +snowboarder +vacum +cleaner +dirt +attached +snowboarders +greet +people +whoa +sorry +dude +difference +municipal +bonds +snowboarders +municipal +bonds +eventually +mature +generate +income +hates +country +music +fortran +cool +links +yahooespncpu +infoskinetother +stuff +talk +exokernel +operating +system +architecture +application +level +resource +management +october +paper +techniques +tools +distributed +shared +memory +performance +improvement +spring +callaghan +talk +supercomputer +interconnection +networks +april +talk +zebra +striped +network +file +system +need +structured +file +system +raid +paper +performance +blocking +wait +free +highly +concurrent +objects +asynchronous +shared +memory +multiprocessors +version +postscript +version +spring +paper +analysis +risc +instruction +enhancement +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..b163fac0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,127 @@ +jeff +lampert +home +pagejeff +lampert +home +page +ricardo +montalban +voice +welcome +home +page +know +nota +picture +least +good +still +looking +forone +incriminating +doesn +make +look +like +aconvict +baby +picture +high +school +yearbook +lasttime +showed +someone +never +heard +cute +baby +someone +thinking +well +found +couple +pictures +tick +threatened +turn +intoa +human +dispenser +took +picture +hence +pictures +befound +separate +page +clicking +anautographed +copy +sign +name +monitor +pictureappears +choose +link +weasel +seek +take +paces +turn +click +basic +factswho +person +last +night +academic +work +relatedwhat +classes +taking +work +dept +resume +entertainmentbooks +movies +music +programs +newsgroups +important +subjectsfriendsno +show +show +sick +theme +song +hobbies +clubs +organizationsgroups +plus +wish +inmy +favorite +linksugh +sounds +like +song +sound +music +servo +look +julie +andrews +fire +crow +good +mstk +eclectic +paraphenaliai +would +miscellaneous +straight +forward +tick +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..62c3a90d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +todd +homepagetodd +homepagein +fall +teaching +sections +since +area +mathematical +programming +plug +mathematicalprogramming +pages +contain +wealth +information +mathematicalprogramming +tmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..15014afe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian +home +pagebrian +toonen +computer +sciences +departmentuniversity +wisconsin +dayton +streetmadison +office +cswhatever +chief +seattlethe +ground +tipi +meditating +life +itsmeaning +accepting +kinship +creatures +acknowledgingunity +universe +things +infusing +thetrue +essence +civilization +luther +standing +bear +oglala +siouxlast +modified diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..04def577 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,19 @@ +thanos +tsiolis +home +page +site +netscape +enhanced +read +shouldconsider +upgrading +browser +latest +version +netscape +ifthat +option +page +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..7ed11dbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,66 @@ +todd +turnidge +todd +douglas +turnidgeschoolcomputer +sciences +departmentuniversity +wisconsin +madison +dayton +madison +homemuppet +babylon +milton +madison +eyesightright +axis +left +axis +graduate +student +department +computer +sciencesat +university +wisconsin +madison +years +working +professorthomas +reps +studyingprogramming +languages +teach +section +hold +mathematicsand +computerscience +case +western +reserveuniversity +located +cleveland +ohio +originally +kent +ohio +myfamily +lives +judge +company +keeps +click +enough +evidence +away +long +time +amusements +shortcuts +last +modified +todd +turnidge +turnidge +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..04ef99f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,54 @@ +taxiao +wang +home +page +welcome +taxiao +wang +home +page +page +heavy +construction +click +finger +contact +information +taxiao +wang +graduate +student +teaching +assistant +computer +science +department +university +wisconsin +madison +office +bldg +dayton +street +madison +office +phone +home +phone +mail +twang +wisc +visitor +number +since +home +page +visited +times +since +visitor +number +since +last +updated +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..d0e8f607 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,16 @@ +shaft +home +pageuri +shaft +home +pageemail +wisc +eduinteresting +diversionsstart +trek +meets +microsoft +start +trek +meets +windows diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..573b9d20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,76 @@ +venkatesh +ganti +home +pagevenkatesh +ganti +vganti +wisc +graduate +studentoffice +computer +sciences +department +dayton +madison +usaphone +note +page +construction +past +present +graduate +student +university +wisconsin +madison +fall +earlier +undergraduate +student +madras +india +native +kakinada +andhra +pradesh +info +asha +asha +basic +education +asha +madison +india +click +know +india +godav +homepage +hostel +madras +godav +yearbook +hope +online +sometime +research +interested +databases +work +till +btech +project +real +time +databases +want +look +real +time +genesis +madison +group +homepage +last +updated +january diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..77174982 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,113 @@ +vijay +home +page +vijaykumar +vijay +wisc +professional +affiliation +computer +sciences +department +university +wisconsin +madison +contact +address +computer +sciences +department +dayton +street +madison +phone +email +vijay +wisc +advisor +guri +sohi +project +multiscalar +project +education +doctorate +university +wisconsin +madison +august +undergraduate +birla +institute +technology +science +pilani +india +research +compiling +multiscalar +architecture +dissertation +distributed +register +file +design +anatomy +register +file +multiscalar +processor +breach +vijaykumar +sohi +annual +international +symposium +microarchitecture +micro +compiling +register +communication +register +communication +strategies +multiscalar +architectures +breach +vijaykumar +sohi +submitted +annual +international +symposium +microarchitecture +micro +multiscalar +processors +multiscalar +processors +sohi +breach +vijaykumar +international +symposium +computer +architecture +scheduling +register +communication +compiling +register +communication +multiscalar +architecturet +vijaykumar +sohi +going +work +memory +data +dependence +prediction +personal +side diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..8cb232b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,101 @@ +john +watrous +home +pagejohn +watrous +watrous +wisc +computer +sciences +departmentuniversity +wisconsin +madison +dayton +streetmadison +telephone +publications +john +watrous +dimensional +quantum +cellular +automata +proc +symp +foundations +computer +science +john +watrous +polynomial +time +algorithm +artin +whaples +approximation +theorem +number +theory +fourth +conference +canadiannumber +theory +association +assorted +links +quantum +computation +links +quantum +computation +archive +stanford +quantum +information +home +page +oxford +particle +beam +physics +laboratory +quantum +information +page +ucla +laboratory +theoretical +quantum +computing +montreal +lanl +preprints +bibliographies +computer +science +bibliographies +hypertext +bibliography +project +hypatia +general +reference +elements +stylehypertext +webster +interfaceroget +thesaurus +random +links +parasol +recordsplaystation +linksweather +forecast +madisonthe +isthmus +daily +pagemathematical +quotation +servermathematician +biographiesgeek +site diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..7f879a2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,132 @@ +weiru +home +page +eiru +home +page +send +email +pppplease +find +around +sometimes +think +english +speakers +committed +asylum +verbally +insane +language +people +recite +play +play +recital +ship +truck +send +cargo +ship +havenoses +feet +smell +richard +lederer +three +possible +parts +date +least +must +beoffered +entertainment +food +affection +customary +begina +series +dates +great +deal +entertainment +moderate +amountof +food +merest +suggestion +affection +amount +ofaffection +increases +entertainment +reduced +proportionately +affection +entertainment +longer +call +dating +circumstances +food +omitted +miss +manners +guide +excruciatingly +correct +behaviour +universities +peking +university +friends +physics +department +alumni +association +atmadison +university +world +little +grey +cells +picture +universe +game +entertainment +late +show +david +letterman +shows +universal +studios +wish +send +postcard +someone +movie +world +movie +reviews +favorite +hockey +player +steve +francais +dictionnairefrancais +anglais +dictionnaire +softwares +relatifs +lafrancophonie +test +degrammaire +francaise +french +lessons +weather +forecast +madison +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..fb9a5eaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,107 @@ +welcome +zhang +home +page +first +year +graduate +student +department +hometown +shanghai +people +republic +china +education +student +department +computer +science +university +wisconsin +madison +computer +science +jose +state +university +jose +california +department +computer +science +technology +tsinghua +university +beijing +people +republic +chinaemail +weiz +wisc +eduworking +experiencecontractor +developing +various +information +management +system +different +platforms +platforms +included +windowsnt +solaris +technologies +used +included +tuxedo +pathway +software +designer +tandem +computers +corporation +software +engineer +sherpa +corporation +system +operator +nasa +ames +research +center +hobbiesma +jiangbridge +card +game +table +tennis +pingpong +joggingthe +ultimate +challengesolve +mine +sweeper +expert +level +puzzle +within +seconds +without +cheating +quote +daythe +best +memory +management +memory +management +ackowledgementthis +home +page +written +using +framework +provided diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..4b477146 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,79 @@ +kent +wenger +home +page +welcome +kent +wenger +home +page +note +page +definitely +still +construction +preparedfor +potholes +need +picture +scanned +kent +wengerassociate +researchercomputer +sciences +departmentuniversity +wisconsin +west +dayton +streetmadison +telephone +email +wenger +wisc +edufinger +workthe +main +projects +working +arecod +clusters +data +providers +anddevise +data +exploration +andvisualization +coming +good +acronym +importantparts +project +wouldn +agree +visualizationproduced +devise +software +people +work +yannis +ioannidis +miron +livnyraghu +ramakrishnanmore +information +university +wisconsin +madison +dbms +research +groupuw +madison +computer +sciences +home +pagewiscinfo +madison +home +page +personallinksimageslast +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..b2d7793d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,155 @@ +wisconsin +wind +tunnel +project +home +page +wisconsin +wind +tunnel +projectmost +future +massively +parallel +computers +built +fromworkstation +like +nodes +programmed +high +level +parallellanguages +like +support +shared +address +space +whichprocesses +uniformly +reference +data +wisconsin +wind +tunnel +project +seeks +develop +consensus +aboutthe +middle +level +interface +languages +compilers +abovesystem +software +hardware +first +proposed +interface +wascooperative +shared +memory +evolutionary +extension +toconventional +shared +memory +software +hardware +recently +havebeen +working +revolutionary +interface +called +tempest +tempest +provides +mechanisms +allow +programmers +compilers +andprogram +libraries +implement +message +passing +transparentshared +memory +hybrid +combinations +developingimplementations +tempest +thinking +machines +cluster +ofworkstations +wisconsincow +hypothetical +hardware +platform +approach +cowuses +snooping +logic +implemented +fpgas +sram +collaboratingwith +wisconsin +paradyn +project +adapt +performance +tools +tempest +overviewand +annotated +bibliography +slides +overview +talk +november +slide +pageor +four +slides +page +complete +technical +papers +contributors +funding +sources +origin +project +name +wisconsin +week +article +paradyn +related +projects +wisconsin +computer +architecture +group +computer +sciences +departmentat +university +wisconsin +world +wide +computer +architecture +information +last +updated +july +mark +hill +markhill +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..dd12df47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,94 @@ +xuelin +home +page +felix +character +created +otto +messmer +first +based +animal +human +personality +first +feature +felix +saves +whichwas +shown +famous +stars +rivaling +chaplin +keaton +prince +wales +picked +polo +team +mascot +picture +accompanied +charles +lindbergh +across +theatlantic +statue +first +image +successfully +transmitted +development +television +star +television +series +somehow +obtained +magic +tricks +didn +oneever +seems +agree +whether +teeth +whiskers +like +spend +time +making +films +television +programs +appearing +newspaper +comic +strips +advertising +hundreds +products +making +things +tricks +removing +tail +ears +putting +back +wish +could +finger +give +account +hairballs +keyboards +finger +keeper +instead +suis +vritable +chat +passe +partout diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..e9d00a0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,608 @@ +yannis +ioannidisyannis +ioannidis +yannis +wisc +eduresearch +interestsdatabase +management +systems +scientific +databases +user +interfaces +andinformation +visualization +complex +query +optimization +heterogeneous +databases +research +primarily +focuses +areas +database +systems +optimization +complex +queries +database +support +scientificdata +future +database +applications +pose +several +challenges +toquery +optimization +complexity +queries +asked +significantly +higher +thanin +traditional +systems +number +alternative +evaluation +algorithms +much +higheras +well +especially +parallelism +attempts +tooptimize +several +values +time +parameters +parametric +queryoptimization +thus +number +alternative +access +plan +processing +querywill +extremely +large +currently +used +algorithmsfor +finding +optimum +among +inadequate +research +investigates +randomized +optimization +algorithmsas +viable +solution +problem +primarily +interested +simulated +annealing +genetic +algorithms +well +alternatives +take +advantage +special +propertiesof +query +optimization +also +looking +complex +query +scheduling +problems +especiallythose +arise +parallel +multimedia +environments +error +propagation +size +cost +estimates +complex +queries +alsopart +studies +trying +identify +appropriateinformation +must +maintained +database +system +limit +thepropagation +error +primarily +focusing +identifying +properties +ofoptimal +histograms +approximate +distribution +values +inrelation +attributes +computational +mode +investigation +expected +part +manyexperiments +various +scientific +disciplines +future +databases +generated +need +specialized +support +many +aspectsthat +current +technology +ready +provide +involved +development +desktop +experiment +managementenvironment +help +scientists +throughout +life +cycle +theirexperimental +studies +primary +component +system +database +system +major +issues +work +addresses +visual +user +interfaces +andsemantic +heterogeneity +former +concentrating +identifying +right +metaphors +arefor +representing +complex +database +schemas +queries +objects +scientistsso +natural +also +investigating +power +dynamic +visual +queries +latter +concentrating +developing +visual +tools +facilitatetranslation +integration +different +data +formats +schemas +although +issues +generic +arise +experimentalscientific +disciplines +efforts +guided +needs +specificprojects +associated +particular +simulation +basedperformance +studies +computer +systems +simulation +based +modeling +plantgrowth +spectroscopy +sequencing +microscopic +imaging +recent +publicationsy +ioannidis +query +optimization +computing +surveys +symposium +issueon +anniversary +march +garofalakis +ioannidis +scheduling +issues +multimedia +queryoptimization +computing +surveys +symposium +issue +multimediasystems +december +ioannidis +ramakrishnan +containment +conjunctive +queries +beyondrelations +sets +transactions +database +systems +tods +september +haber +ioannidis +livny +foundations +visual +metaphors +forschema +display +journal +intelligent +information +systems +july +special +issue +visual +information +systems +ioannidis +tsangaris +design +implementation +performanceevaluation +bermuda +ieee +transactions +knowledge +data +engineering +tkde +february +miller +ioannidis +ramakrishnan +translation +integration +ofheterogeneous +schemas +bridging +theory +practice +information +systems +january +ioannidis +christodoulakis +optimal +histograms +limitingworst +case +error +propagation +size +join +results +transactions +database +systems +tods +december +ioannidis +ramakrishnan +winger +transitive +closure +algorithmsbased +graph +traversal +transactions +ondatabase +systems +tods +september +ioannidis +dynamic +information +visualization +sigmod +record +december +ioannidis +poosala +histogram +based +solutions +diverse +databaseestimation +problems +ieee +data +engineering +september +ioannidis +livny +gupta +ponnekanti +desktop +experimentmanagement +environment +proc +international +vldb +conference +bombay +india +september +poosala +ioannidis +estimation +query +result +distribution +itsapplication +parallel +join +load +balancing +proc +international +vldbconference +bombay +india +september +anjur +ioannidis +livny +frog +turtle +visual +bridgesbetween +files +object +oriented +data +proc +international +conferenceon +scientific +statistical +database +management +stockholm +sweden +june +garofalakis +ioannidis +multi +dimensional +resource +scheduling +forparallel +queries +proc +international +sigmod +conference +montreal +canada +poosala +ioannidis +haas +shekita +improved +histograms +forselectivity +estimation +range +predicates +proc +internationalacm +sigmod +conference +montreal +canada +ioannidis +livny +haber +user +oriented +visual +layoutat +multiple +granularities +proc +international +workshop +advancedvisual +interfaces +gubbio +italy +haber +ioannidis +livny +opossum +desk +schema +managementthrough +customizable +visualization +proc +international +vldbconference +zurich +switzerland +september +ioannidis +poosala +balancing +histogram +optimality +practicalityfor +query +result +size +estimation +proc +international +sigmodconference +jose +tsatalos +solomon +ioannidis +gmap +versatile +tool +forphysical +data +independence +proc +international +vldbconference +santiago +chile +september +tsatalos +ioannidis +unified +framework +indexing +databasesystems +proc +international +dexa +conference +athens +greece +september +ioannidis +lashkari +incomplete +path +expressions +theirdisambiguation +proc +international +sigmod +conference +minneapolis +haber +ioannidis +livny +opossum +flexible +schemavisualization +editing +tool +proc +conference +boston +april +miller +ioannidis +ramakrishnan +translation +integration +ofheterogeneous +schemas +bridging +theory +practice +proc +international +edbt +conference +cambridge +england +march +ioannidis +universality +serial +histograms +proc +internationalvldb +conference +dublin +ireland +august +miller +ioannidis +ramakrishnan +information +capacityin +schema +integration +translation +proc +international +vldbconference +dublin +ireland +august +wiener +ioannidis +moose +scientists +withdata +management +problems +proc +international +workshop +ondatabase +programming +languages +york +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..7b6b2e83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin +zhongbin +homepage diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..8c658ff8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,85 @@ +yinng +home +pageindexofyinongwei +spagehi +welcome +homepage +please +good +look +personal +info +especially +employer +give +alsolinks +classmate +courses +taking +good +time +telephone +work +home +address +office +comp +stat +bldg +madison +home +univ +personal +inforesumehobbiestravel +usathis +collection +pictures +took +travel +articles +wrote +trip +chicago +seattle +course +pointersreal +time +computingmacine +learningpattern +recognitioncomputatinal +geometrydatabasevisionacademic +diarythis +diary +every +month +sometime +amaze +many +little +read +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrelated +linksmy +beida +classmatespeking +university +alumni +home +page +overseas +chinese +organization +madison +ciumy +bookmarkcomments +press +clients +support +send +comments +visitor +number +last +access +last +modified +yinong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..4b4b1f30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,52 @@ +matt +home +pageuntil +around +updating +basic +information +office +matthew +zeidenbergcenter +wisconsin +strategy +observatory +drive +room +madison +voice +home +gilson +madison +email +zeiden +wisc +eduzeidenbe +wisc +eduwhen +california +parents +house +coho +huntington +beach +beauty +convulsive +breton +nadja +beaute +sera +convulsive +sera +give +food +poor +call +saint +whythe +poor +food +call +communist +helder +camara diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..a5e76bff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,308 @@ +tian +zhang +home +page +tian +zhang +general +information +student +research +assistantadvisor +prof +raghu +ramakrishnan +prof +miron +livny +joint +major +concentrations +database +artificial +intelligence +compilerminor +financial +investment +bankingoffice +room +computer +sciences +dept +univ +wisconsin +madison +madison +wisconsin +mail +zhang +wisc +eduoffice +telephone +home +telephone +department +research +intereststhere +growing +need +exploratory +analysis +large +datasets +discover +useful +patterns +data +mining +territory +developed +purpose +interested +designing +efficient +data +mining +algorithms +ortools +large +databases +integrating +techniques +databases +artificial +intelligence +statistics +thesis +topic +clustering +densityanalysis +large +datasets +given +large +multi +dimensional +dataset +limited +amount +resources +running +time +memory +design +implement +algorithms +efficiently +accurately +identify +sparse +crowded +regions +clustering +analysis +estimate +density +function +overall +data +distribution +density +analysis +important +practical +branches +data +mining +applied +many +domains +dataclassification +image +compression +pattern +recognition +recent +research +project +birch +efficient +data +clustering +density +analysis +system +large +databases +selected +publications +data +clustering +system +birch +applications +tian +zhang +raghu +ramakrishnan +miron +livny +submitted +data +mining +knowledge +discovery +journal +june +birch +efficient +data +clustering +method +large +databases +tian +zhang +raghu +ramakrishnan +miron +livny +proc +sigmod +conf +data +management +june +canada +interactive +classification +large +datasets +birch +tian +zhang +raghu +ramakrishnan +miron +livny +proc +workshop +research +issues +data +mining +knowledgediscovery +cooperation +sigmod +june +canada +fast +density +probability +estimations +using +kernel +method +large +databases +miron +livny +raghu +ramakrishnan +tian +zhang +technical +report +july +motion +planning +multi +joint +robotic +topological +dimensionreduction +method +zhang +ling +zhang +tian +zhang +proc +joint +conference +artificial +intelligence +ijcai +findpath +algorithm +manipulator +finite +division +configuration +space +zhang +jianwei +zhang +ling +zhang +tian +zhang +robotics +manufacturing +recent +trends +research +education +applications +proc +symposium +robotics +andmanufacturing +research +education +applications +motion +planning +robots +topological +dimension +reduction +method +zhang +tian +zhang +jianwei +zhang +ling +zhang +journal +computer +science +technology +finding +collision +free +paths +mobile +robots +tian +zhang +zhang +proc +symposium +young +computer +professionals +beijing +relevant +links +technical +documents +journals +conferences +organizations +beijing +china +interests +last +updated diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..87ba08de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,90 @@ +yihong +home +page +zhao +yihong +zhao +wisc +research +assistant +department +computer +sciencesuniversity +wisconsin +madison +west +dayton +streetmadison +adviser +prof +jeff +naughton +research +interests +parallel +object +relational +dbms +line +analytical +processing +olap +data +mining +financial +data +dbms +benchmark +educationb +university +north +carolina +chapel +hillm +madison +fall +research +related +sites +wiscosin +group +sigmod +data +mining +maryland +datamining +microstrategy +rolap +arbor +molap +stocks +financial +sites +lombard +graph +server +pathfinder +server +kiwi +club +server +daily +news +sites +pathfinder +today +money +daily +chinese +taiwan +news +search +engines +lycos +excite +yahoo +surfing +terse +detail +comments +pgmos diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..18182b26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,27 @@ +home +page +still +construction +wang +homepage +offer +information +home +address +johnson +madison +home +phone +office +address +dayton +street +madison +office +phone +email +address +wisc +zhewang +students +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..855686f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,139 @@ +zhichen +home +page +zhichen +department +computer +sciences +dayton +madison +office +phone +research +assistant +advisors +professor +james +larusprofessor +barton +millerawardbest +paper +award +international +conference +supercomputing +press +july +research +interest +area +programming +languages +performance +issues +parallel +anddistributed +systems +recently +studying +techniques +detect +eliminateperformance +bottlenecks +distributed +shared +memory +systems +combined +paradyn +performance +toolwith +blizzard +wisconsinwind +tunnel +thinking +machine +andthe +cluster +workstations +recent +publications +fields +interestprogramming +languages +environments +tools +parallel +distributed +computing +network +computing +parallel +distributed +operating +system +computer +architecture +performance +evaluation +benchmarks +places +studied +worked +high +performance +computing +software +laboratory +university +texas +antonio +studied +published +area +ofparallel +performance +predictions +modeling +simulations +computer +sciences +departmentat +fudan +university +participated +several +national +projects +china +area +software +development +environment +high +levelprogramming +languages +object +oriented +technologies +andimcremental +compilation +techniques +click +postscript +version +html +version +interesting +links +asplos +programjournals +conferences +compilers +programming +language +researchchinese +novels +friends +fudan +java diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..0d31458a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,29 @@ +zhang +home +page +hello +name +zhang +picture +taken +invited +supper +theth +restaurant +tsinghua +university +chen +weihai +wang +tong +university +wisconsin +madison +department +computer +sciences +west +dayton +street +madison +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..0ff2d4e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +krzysztof +zmudzinskikrzysztof +zmudzinskispin +information +students +incs +pictures +information +poland +poles +thanks +stopping +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..5633e484 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,188 @@ +saluja +kewal +college +engineeringuniversity +wisconsin +madison +kewal +salujaprofessor +engineering +hall +engineering +drivemadison +mail +saluja +engr +wisc +eduportrait +jpgdepartmentselectrical +computer +engineeringcomputer +science +education +university +iowa +research +interestsdesign +testability +computer +architecture +data +compression +integrated +circuits +vlsi +fault +tolerant +computing +general +area +research +interest +test +generation +testableand +reliable +design +digital +systems +carry +research +thisarea +make +extensive +vlsi +analysis +tools +theresearch +involves +modeling +faults +designing +digital +circuits +testgeneration +design +modification +enhancing +testability +built +inself +testing +designs +fabrication +circuits +test +application +investigating +techniques +make +test +generation +andfault +simulation +process +efficient +combinational +andsequential +circuits +data +compression +compaction +methodsapplicable +design +testability +built +self +testenvironment +investigated +area +built +self +test +concentrating +regularstructures +programmable +logic +arrays +rams +areinvestigating +self +test +algorithms +implemented +inhardware +little +performance +area +penalty +another +projectwe +investigating +ways +built +self +test +hardware +test +asystem +performing +normal +operation +goal +thatthe +system +tested +continuously +operates +little +noimpact +system +performance +much +work +performed +using +facilities +vlsi +digitalsystem +laboratory +laboratory +houses +number +stations +withcolor +monitors +terminals +programming +design +depts +centers +consortia +services +fountain +index +search +mail +credits +help +last +modified +friday +cdtthis +page +best +viewed +browsers +support +tables +photographs +college +engineering +address +comments +webmaster +engr +wisc +eduupdate +profile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..6c650a22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,213 @@ +duffie +neil +college +engineeringuniversity +wisconsin +madison +neil +duffie +professor +engineering +research +building +engineering +drivemadison +mail +duffie +engr +wisc +eduportrait +departmentsmechanical +engineeringeducationbs +university +wisconsin +madisonms +university +wisconsin +madisonphd +university +wisconsin +madisonresearch +interestsrobotics +computer +control +manufacturing +systems +precision +engr +computer +integrated +manufacturing +micromechanismscenters +consortiamanufacturing +systems +engineering +programwisconsin +center +space +automation +roboticsprofessor +duffie +research +manufacturing +systems +involves +integrating +sensors +actuators +computers +data +bases +advanced +automated +production +systems +developed +controls +self +guided +inspection +machines +welding +robots +high +performance +material +handling +systems +automated +finishing +systems +mold +production +rework +studying +highly +distributed +hierarchical +system +control +architectures +hope +reducing +cost +complexity +large +scale +computer +controlled +manufacturing +systems +increasing +flexibility +fault +tolerance +duffie +constructed +several +experimental +manufacturing +systems +incorporate +real +time +fully +distributed +scheduling +optimization +control +systems +developing +theories +explaining +properties +performance +systems +duffie +associate +director +wisconsin +center +space +automation +robotics +research +nasa +funded +center +emphasizes +automated +agriculture +systems +sensors +tactile +feedback +human +operators +telerobotic +systems +methods +performance +evaluation +well +human +factors +research +sensory +feedback +fatigue +developed +telerobotics +test +experimental +work +carried +duffie +works +closely +manufacturing +aerospace +industries +teaches +courses +manufacturing +systems +automatic +controls +computer +controls +authored +computer +control +machines +processes +depts +centers +consortia +services +fountain +index +search +mail +credits +help +last +modified +tuesday +cdtthis +page +best +viewed +browsers +support +tables +photographs +college +engineering +address +comments +webmaster +engr +wisc +eduupdate +profile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..7220378b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,318 @@ +shapiro +vadim +college +engineeringuniversity +wisconsin +madison +vadim +shapiro +assistant +professor +mechanical +engineering +university +avenuemadison +mail +vshapiro +engr +wisc +eduportrait +jpgurl +http +wisc +departmentscomputer +sciencemechanical +engineeringeducationba +york +universityms +university +california +angelesms +cornell +universityphd +cornell +univeristyresearch +interestscomputer +aided +design +manufacturing +applied +computational +geometry +geometric +solid +modeling +physical +modeling +analysis +simulation +design +production +automationcenters +consortiamathematics +computation +engineering +graduate +programmanufacturing +systems +engineering +programspatial +automation +laboratoryselected +awards +honorsnational +science +foundation +career +award +general +motors +fellow +selected +publications +maintenance +geometric +representations +space +decompositions +international +journal +computational +geometry +applications +chain +models +physical +behavior +engineering +analysis +design +research +engineering +design +april +palmer +real +functions +representation +rigid +solids +computer +aided +geometric +design +april +separation +boundary +conversion +transactions +graphics +january +vossler +professor +shapiro +research +interests +center +relationships +betweengeometry +physical +phenomena +mechanical +artifacts +bemodeled +represented +analyzed +manipulated +designed +manufacturedbased +computer +representations +algorithms +specific +ongoing +projects +include +geometric +modeling +ability +create +convert +maintain +consistency +ofdistinct +representations +mechanical +parts +major +technologicalbarrier +undermines +usefulness +reliability +commercialgeometric +modeling +systems +current +research +efforts +focus +eliminatingambiguity +communicating +engineering +specifications +formal +modeling +ofparametric +families +mechanical +parts +investigating +novel +methodsand +computational +techniques +support +design +manufacturing +mechanical +design +today +mechanical +forms +functions +fabrication +processes +cannot +bedescribed +combinatorially +terms +discrete +simple +interactingprimitives +apparent +lack +combinatorial +structure +amajor +roadblock +competitive +design +manufacturing +mechanical +systems +collaboration +industry +present +research +deals +withtheoretical +practical +computational +aspects +mechanical +designand +seeks +establish +formal +basis +making +mechanical +design +andmanufacturing +parts +systematic +competitive +smoothintegration +mechanical +form +modeling +engineering +activities +physical +modeling +geometric +models +contain +part +information +needed +capture +thedesired +physical +behavior +artifact +processes +used +tomanufacture +recent +study +algebraic +topological +models +called +chain +models +physical +behavior +suggests +possible +tounify +physical +geometric +modeling +thus +facilitate +development +ofnew +computer +aided +engineering +tools +current +investigations +theseand +models +physical +behavior +develop +engineering +languagesand +computer +algorithms +systematic +specification +modeling +simulation +analysis +physicalobjects +systems +depts +centers +consortia +services +fountain +index +search +mail +credits +help +last +modified +thursday +cdtthis +page +best +viewed +browsers +support +tables +photographs +college +engineering +address +comments +webmaster +engr +wisc +eduupdate +profile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..e404cae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,144 @@ +automated +theorem +proving +groupautomated +theorem +proving +groupthe +automated +theorem +proving +group +part +computer +science +mathematics +departments +university +texas +ataustin +produce +methods +systems +intended +prove +theorems +first +higher +order +logic +intention +applying +systemsand +methods +problems +primarily +mathematics +also +computerscience +technology +hereis +index +electronically +available +tech +reports +site +tech +report +series +continued +currently +techreports +added +tech +reportseries +present +grouplarry +hinesmarty +mayberrybenjamin +shultsalumniprevious +students +woody +bledsoe +previous +students +robert +boyer +incomplete +list +others +related +groupthe +late +woody +bledsoe +computer +science +faculty +profile +robertboyerj +strother +moorethis +incomplete +list +past +visitors +collaboratorswhat +done +implythe +natural +deduction +proverstrivelarry +hines +first +order +logic +inequality +prover +struvelarry +hines +theory +prover +chou +geometry +proverand +various +improvements +theretoincluding +mcphee +feng +prover +theoryimplementation +description +proof +heine +borel +theoremprecondition +proverbledsoe +prover +analogy +proof +heine +borel +theoremnqthmboyer +andmoore +prover +developed +clinc +incomplete +list +iprshults +knowledge +using +prover +mathematics +incomplete +list +relatedlinksdo +feedback +want +information +contact +benjamin +shults diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..212e75b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +document +moveddocument +movedthis +document +permanently +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..212e75b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/data/sw/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +document +moveddocument +movedthis +document +permanently +moved diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/idf/idf.txt b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/idf/idf.txt new file mode 100644 index 00000000..5ea667bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/idf/idf.txt @@ -0,0 +1,20655 @@ +term, in documents count, idf, in c count, in nc count, wordid; documentcount = 1051 +home 672 0.000000 0 0 1 +comput 775 0.000000 1 0 2 +page 705 0.000000 0 0 3 +scienc 640 0.000000 0 0 4 +univers 571 0.000000 0 0 5 +system 443 0.693147 1 0 6 +program 374 0.693147 0 0 7 +inform 412 0.693147 0 0 8 +work 380 0.693147 1 0 9 +research 431 0.693147 1 0 10 +interest 384 0.693147 0 0 11 +depart 457 0.693147 0 0 12 +offic 299 1.098612 0 0 13 +last 314 1.098612 0 0 14 +cours 273 1.098612 0 0 15 +us 329 1.098612 1 0 16 +time 293 1.098612 0 0 17 +project 340 1.098612 0 0 18 +student 343 1.098612 1 0 19 +engin 297 1.098612 0 0 20 +current 284 1.098612 1 0 21 +mail 238 1.386294 0 0 22 +cornel 215 1.386294 1 0 23 +link 247 1.386294 0 0 24 +design 213 1.386294 1 0 25 +languag 227 1.386294 0 0 26 +gener 220 1.386294 0 0 27 +also 259 1.386294 0 0 28 +email 220 1.386294 0 0 29 +softwar 220 1.386294 0 0 30 +graduat 215 1.386294 0 0 31 +washington 236 1.386294 0 0 32 +wisc 242 1.386294 0 0 33 +oper 180 1.609438 0 0 34 +modifi 178 1.609438 0 0 35 +group 183 1.609438 1 0 36 +class 199 1.609438 0 0 37 +paper 205 1.609438 1 0 38 +list 201 1.609438 0 0 39 +fall 181 1.609438 1 0 40 +updat 191 1.609438 0 0 41 +includ 208 1.609438 0 0 42 +public 202 1.609438 0 0 43 +utexa 189 1.609438 0 0 44 +phone 175 1.791759 0 0 45 +hour 165 1.791759 0 0 46 +read 154 1.791759 1 0 47 +avail 169 1.791759 0 0 48 +data 170 1.791759 0 0 49 +base 165 1.791759 1 0 50 +distribut 162 1.791759 1 0 51 +implement 152 1.791759 1 0 52 +develop 174 1.791759 0 0 53 +wisconsin 169 1.791759 0 0 54 +madison 165 1.791759 0 0 55 +applic 170 1.791759 0 0 56 +algorithm 162 1.791759 0 0 57 +recent 167 1.791759 0 0 58 +contact 153 1.791759 0 0 59 +parallel 169 1.791759 0 0 60 +network 168 1.791759 0 0 61 +address 170 1.791759 0 0 62 +austin 168 1.791759 0 0 63 +texa 160 1.791759 0 0 64 +hall 146 1.945910 0 0 65 +assign 135 1.945910 0 0 66 +note 142 1.945910 0 0 67 +relat 139 1.945910 0 0 68 +model 145 1.945910 1 0 69 +file 132 1.945910 0 0 70 +first 140 1.945910 0 0 71 +process 142 1.945910 0 0 72 +lectur 135 1.945910 1 0 73 +perform 143 1.945910 0 0 74 +problem 147 1.945910 0 0 75 +professor 137 1.945910 0 0 76 +architectur 139 1.945910 0 0 77 +click 142 1.945910 0 0 78 +object 138 1.945910 0 0 79 +area 144 1.945910 0 0 80 +like 132 1.945910 0 0 81 +construct 139 1.945910 0 0 82 +support 132 1.945910 0 0 83 +year 148 1.945910 0 0 84 +schedul 119 2.079442 0 0 85 +databas 122 2.079442 0 0 86 +introduct 126 2.079442 0 0 87 +spring 131 2.079442 0 0 88 +document 121 2.079442 1 1 89 +postscript 131 2.079442 0 0 90 +studi 120 2.079442 0 0 91 +report 131 2.079442 0 0 92 +tool 117 2.079442 0 0 93 +provid 121 2.079442 0 0 94 +machin 129 2.079442 0 0 95 +compil 122 2.079442 0 0 96 +number 130 2.079442 0 0 97 +analysi 124 2.079442 0 0 98 +welcom 122 2.079442 0 0 99 +confer 126 2.079442 0 0 100 +high 130 2.079442 0 0 101 +technolog 131 2.079442 0 0 102 +seattl 120 2.079442 0 0 103 +dayton 119 2.079442 0 0 104 +structur 106 2.197225 0 0 105 +specif 106 2.197225 0 0 106 +instructor 108 2.197225 0 0 107 +final 116 2.197225 0 0 108 +send 114 2.197225 0 0 109 +topic 114 2.197225 1 0 110 +find 111 2.197225 0 0 111 +teach 108 2.197225 0 0 112 +assist 112 2.197225 0 0 113 +pleas 113 2.197225 0 0 114 +look 107 2.197225 0 0 115 +code 108 2.197225 1 0 116 +person 111 2.197225 0 0 117 +check 115 2.197225 0 0 118 +site 106 2.197225 0 0 119 +make 111 2.197225 0 0 120 +well 109 2.197225 0 0 121 +version 113 2.197225 0 0 122 +mathemat 108 2.197225 0 0 123 +place 106 2.197225 0 0 124 +manag 114 2.197225 0 0 125 +world 115 2.197225 0 0 126 +theori 111 2.197225 0 0 127 +intern 108 2.197225 0 0 128 +part 98 2.302585 0 0 129 +advanc 99 2.302585 0 0 130 +book 99 2.302585 0 0 131 +peopl 96 2.302585 0 0 132 +text 98 2.302585 0 0 133 +take 97 2.302585 0 0 134 +need 98 2.302585 0 0 135 +access 102 2.302585 0 0 136 +user 104 2.302585 0 0 137 +techniqu 99 2.302585 0 0 138 +memori 101 2.302585 0 0 139 +technic 100 2.302585 0 0 140 +question 91 2.397895 0 0 141 +grade 90 2.397895 0 0 142 +follow 92 2.397895 0 0 143 +real 93 2.397895 0 0 144 +present 91 2.397895 0 0 145 +comment 93 2.397895 0 0 146 +graphic 90 2.397895 1 0 147 +homepag 93 2.397895 0 0 148 +section 94 2.397895 0 0 149 +mani 92 2.397895 0 0 150 +associ 93 2.397895 0 0 151 +proceed 93 2.397895 0 0 152 +call 91 2.397895 0 0 153 +select 91 2.397895 0 0 154 +search 95 2.397895 0 0 155 +octob 89 2.397895 0 0 156 +commun 95 2.397895 0 0 157 +center 88 2.397895 0 0 158 +sinc 90 2.397895 0 0 159 +pictur 89 2.397895 0 0 160 +imag 91 2.397895 1 0 161 +solut 82 2.484907 0 0 162 +chang 82 2.484907 0 0 163 +control 82 2.484907 0 0 164 +member 84 2.484907 0 0 165 +second 81 2.484907 0 0 166 +requir 81 2.484907 0 0 167 +larg 82 2.484907 0 0 168 +exam 86 2.484907 0 0 169 +learn 86 2.484907 0 0 170 +stuff 87 2.484907 0 0 171 +resourc 81 2.484907 0 0 172 +start 83 2.484907 0 0 173 +contain 81 2.484907 0 0 174 +help 83 2.484907 0 0 175 +info 85 2.484907 0 0 176 +environ 84 2.484907 0 0 177 +academ 82 2.484907 0 0 178 +novemb 81 2.484907 0 0 179 +level 87 2.484907 0 0 180 +librari 87 2.484907 0 0 181 +activ 84 2.484907 0 0 182 +journal 83 2.484907 0 0 183 +build 85 2.484907 0 0 184 +wide 84 2.484907 0 0 185 +internet 83 2.484907 0 0 186 +institut 84 2.484907 0 0 187 +school 84 2.484907 0 0 188 +thing 84 2.484907 0 0 189 +ieee 86 2.484907 0 0 190 +educ 86 2.484907 0 0 191 +west 83 2.484907 0 0 192 +homework 79 2.564949 0 0 193 +dynam 76 2.564949 0 0 194 +exampl 77 2.564949 0 0 195 +april 77 2.564949 0 0 196 +optim 79 2.564949 0 0 197 +know 80 2.564949 0 0 198 +want 79 2.564949 0 0 199 +good 77 2.564949 0 0 200 +sourc 77 2.564949 0 0 201 +come 78 2.564949 0 0 202 +refer 78 2.564949 0 0 203 +server 76 2.564949 0 0 204 +orient 80 2.564949 0 0 205 +mondai 77 2.564949 0 0 206 +state 76 2.564949 0 0 207 +complet 77 2.564949 0 0 208 +interfac 79 2.564949 0 0 209 +appear 78 2.564949 0 0 210 +issu 78 2.564949 0 0 211 +messag 76 2.564949 0 0 212 +method 80 2.564949 0 0 213 +june 79 2.564949 0 0 214 +decemb 80 2.564949 0 0 215 +master 76 2.564949 0 0 216 +resum 79 2.564949 0 0 217 +upson 71 2.639057 0 0 218 +tuesdai 73 2.639057 0 0 219 +name 72 2.639057 0 0 220 +materi 75 2.639057 0 0 221 +write 72 2.639057 0 0 222 +onlin 75 2.639057 0 0 223 +free 73 2.639057 0 0 224 +intellig 72 2.639057 0 0 225 +appli 71 2.639057 0 0 226 +involv 71 2.639057 0 0 227 +addit 74 2.639057 0 0 228 +meet 72 2.639057 0 0 229 +logic 71 2.639057 0 0 230 +line 75 2.639057 0 0 231 +david 71 2.639057 0 0 232 +effici 73 2.639057 0 0 233 +solv 73 2.639057 0 0 234 +html 75 2.639057 0 0 235 +servic 72 2.639057 0 0 236 +summari 73 2.639057 0 0 237 +symposium 72 2.639057 0 0 238 +workshop 71 2.639057 0 0 239 +nation 74 2.639057 0 0 240 +thursdai 70 2.708050 0 0 241 +window 68 2.708050 1 0 242 +knowledg 67 2.708050 0 0 243 +receiv 66 2.708050 0 0 244 +integr 67 2.708050 0 0 245 +practic 70 2.708050 0 0 246 +syllabu 67 2.708050 0 0 247 +java 70 2.708050 0 0 248 +order 69 2.708050 0 0 249 +goal 66 2.708050 0 0 250 +would 67 2.708050 0 0 251 +test 66 2.708050 0 0 252 +differ 66 2.708050 0 0 253 +view 70 2.708050 0 0 254 +simul 66 2.708050 0 0 255 +main 67 2.708050 0 0 256 +august 66 2.708050 0 0 257 +multimedia 68 2.708050 0 0 258 +degre 69 2.708050 0 0 259 +sieg 69 2.708050 0 0 260 +wednesdai 64 2.772589 0 0 261 +new 64 2.772589 0 0 262 +handout 64 2.772589 0 0 263 +januari 62 2.772589 0 0 264 +organ 65 2.772589 0 0 265 +evalu 64 2.772589 0 0 266 +guid 63 2.772589 0 0 267 +collect 65 2.772589 0 0 268 +complex 64 2.772589 0 0 269 +interact 62 2.772589 0 0 270 +descript 64 2.772589 0 0 271 +plan 65 2.772589 0 0 272 +prof 64 2.772589 0 0 273 +septemb 65 2.772589 0 0 274 +function 62 2.772589 0 0 275 +abstract 62 2.772589 0 0 276 +creat 63 2.772589 0 0 277 +written 63 2.772589 0 0 278 +polici 64 2.772589 0 0 279 +artifici 63 2.772589 0 0 280 +result 65 2.772589 0 0 281 +import 65 2.772589 0 0 282 +experi 64 2.772589 0 0 283 +copi 63 2.772589 0 0 284 +virtual 62 2.772589 0 0 285 +foundat 62 2.772589 0 0 286 +taylor 63 2.772589 0 0 287 +visit 63 2.772589 0 0 288 +improv 62 2.772589 0 0 289 +previou 62 2.772589 0 0 290 +dept 64 2.772589 0 0 291 +laboratori 63 2.772589 0 0 292 +street 63 2.772589 0 0 293 +ithaca 65 2.772589 0 0 294 +march 61 2.833213 0 0 295 +type 61 2.833213 0 0 296 +back 60 2.833213 0 0 297 +simpl 60 2.833213 0 0 298 +best 59 2.833213 0 0 299 +colleg 61 2.833213 0 0 300 +room 59 2.833213 0 0 301 +content 59 2.833213 1 0 302 +locat 59 2.833213 0 0 303 +share 59 2.833213 0 0 304 +juli 60 2.833213 0 0 305 +automat 61 2.833213 0 0 306 +plai 60 2.833213 0 0 307 +unix 58 2.890372 0 0 308 +index 56 2.890372 0 0 309 +space 57 2.890372 0 0 310 +summer 56 2.890372 0 0 311 +semest 58 2.890372 0 0 312 +browser 56 2.890372 0 0 313 +think 57 2.890372 0 0 314 +major 56 2.890372 0 0 315 +direct 57 2.890372 0 0 316 +variou 56 2.890372 0 0 317 +reason 57 2.890372 0 0 318 +point 58 2.890372 0 0 319 +special 56 2.890372 0 0 320 +detail 57 2.890372 0 0 321 +sever 56 2.890372 0 0 322 +overview 56 2.890372 0 0 323 +explor 58 2.890372 0 0 324 +faculti 56 2.890372 0 0 325 +publish 57 2.890372 0 0 326 +thesi 57 2.890372 0 0 327 +februari 54 2.944439 0 0 328 +cover 55 2.944439 0 0 329 +three 54 2.944439 0 0 330 +suggest 53 2.944439 0 0 331 +instruct 53 2.944439 0 0 332 +allow 53 2.944439 0 0 333 +local 55 2.944439 0 0 334 +processor 54 2.944439 0 0 335 +talk 53 2.944439 0 0 336 +found 53 2.944439 0 0 337 +undergradu 54 2.944439 0 0 338 +sampl 53 2.944439 1 0 339 +extens 53 2.944439 0 0 340 +scientif 53 2.944439 0 0 341 +maintain 51 2.995732 0 0 342 +week 52 2.995732 0 0 343 +date 51 2.995732 0 0 344 +profession 51 2.995732 0 0 345 +tabl 51 2.995732 0 0 346 +run 51 2.995732 0 0 347 +digit 52 2.995732 0 0 348 +much 52 2.995732 0 0 349 +hardwar 51 2.995732 0 0 350 +case 51 2.995732 0 0 351 +particular 51 2.995732 0 0 352 +investig 51 2.995732 0 0 353 +finger 52 2.995732 0 0 354 +advisor 51 2.995732 0 0 355 +format 48 3.044522 0 0 356 +principl 48 3.044522 0 0 357 +appoint 49 3.044522 0 0 358 +give 50 3.044522 0 0 359 +basic 50 3.044522 0 0 360 +set 50 3.044522 0 0 361 +still 50 3.044522 0 0 362 +right 48 3.044522 0 0 363 +archiv 49 3.044522 0 0 364 +standard 48 3.044522 0 0 365 +approach 48 3.044522 0 0 366 +frequent 49 3.044522 0 0 367 +pointer 48 3.044522 0 0 368 +numer 49 3.044522 0 0 369 +without 50 3.044522 0 0 370 +visitor 49 3.044522 0 0 371 +visual 48 3.044522 1 0 372 +telephon 50 3.044522 0 0 373 +cool 49 3.044522 0 0 374 +life 50 3.044522 0 0 375 +friend 48 3.044522 0 0 376 +physic 47 3.091042 0 0 377 +possibl 47 3.091042 0 0 378 +electron 47 3.091042 0 0 379 +get 46 3.091042 0 0 380 +done 47 3.091042 0 0 381 +move 47 3.091042 0 1 382 +could 46 3.091042 0 0 383 +understand 47 3.091042 0 0 384 +effect 46 3.091042 0 0 385 +featur 46 3.091042 0 0 386 +adapt 46 3.091042 0 0 387 +california 46 3.091042 0 0 388 +quarter 47 3.091042 0 0 389 +fridai 44 3.135494 0 0 390 +answer 45 3.135494 0 0 391 +midterm 45 3.135494 0 0 392 +even 45 3.135494 0 0 393 +algebra 45 3.135494 0 0 394 +netscap 44 3.135494 0 0 395 +directori 45 3.135494 0 0 396 +textbook 44 3.135494 0 0 397 +made 44 3.135494 0 0 398 +discuss 45 3.135494 0 0 399 +describ 45 3.135494 0 0 400 +better 45 3.135494 0 0 401 +math 44 3.135494 0 0 402 +mark 44 3.135494 0 0 403 +execut 45 3.135494 0 0 404 +video 44 3.135494 1 0 405 +natur 44 3.135494 0 0 406 +protocol 45 3.135494 0 0 407 +anoth 45 3.135494 0 0 408 +keep 44 3.135494 0 0 409 +favorit 44 3.135494 0 0 410 +term 43 3.178054 0 0 411 +third 43 3.178054 0 0 412 +long 43 3.178054 0 0 413 +offer 43 3.178054 0 0 414 +around 43 3.178054 0 0 415 +mechan 43 3.178054 0 0 416 +show 43 3.178054 0 0 417 +edit 42 3.218876 0 0 418 +press 42 3.218876 0 0 419 +http 41 3.218876 0 0 420 +combin 42 3.218876 0 0 421 +howev 41 3.218876 0 0 422 +compani 41 3.218876 0 0 423 +examin 42 3.218876 0 0 424 +review 42 3.218876 0 0 425 +might 41 3.218876 0 0 426 +futur 41 3.218876 0 0 427 +past 42 3.218876 0 0 428 +fast 42 3.218876 0 0 429 +vision 41 3.218876 0 0 430 +linear 41 3.218876 0 0 431 +cach 41 3.218876 0 0 432 +editor 41 3.218876 0 0 433 +autom 41 3.218876 0 0 434 +york 41 3.218876 0 0 435 +music 42 3.218876 0 0 436 +tutori 39 3.258097 0 0 437 +transact 39 3.258097 0 0 438 +late 40 3.258097 0 0 439 +submit 39 3.258097 0 0 440 +announc 40 3.258097 0 0 441 +must 40 3.258097 0 0 442 +form 39 3.258097 0 0 443 +realli 40 3.258097 0 0 444 +programm 39 3.258097 0 0 445 +theoret 39 3.258097 0 0 446 +small 39 3.258097 0 0 447 +continu 39 3.258097 0 0 448 +error 40 3.258097 0 0 449 +author 39 3.258097 0 0 450 +live 40 3.258097 0 0 451 +map 39 3.258097 0 0 452 +multipl 39 3.258097 0 0 453 +littl 39 3.258097 0 0 454 +probabl 40 3.258097 0 0 455 +societi 40 3.258097 0 0 456 +join 39 3.258097 0 0 457 +annual 40 3.258097 0 0 458 +movi 40 3.258097 0 0 459 +credit 38 3.295837 0 0 460 +electr 38 3.295837 0 0 461 +correct 38 3.295837 0 0 462 +prototyp 38 3.295837 0 0 463 +industri 38 3.295837 0 0 464 +close 38 3.295837 0 0 465 +brian 38 3.295837 0 0 466 +slide 38 3.295837 0 0 467 +microsoft 38 3.295837 0 0 468 +open 38 3.295837 0 0 469 +seminar 38 3.295837 0 0 470 +paul 38 3.295837 0 0 471 +origin 38 3.295837 0 0 472 +vita 38 3.295837 0 0 473 +streetmadison 38 3.295837 0 0 474 +hand 37 3.332205 0 0 475 +respons 37 3.332205 0 0 476 +mean 37 3.332205 0 0 477 +formal 37 3.332205 0 0 478 +workstat 37 3.332205 0 0 479 +cost 37 3.332205 0 0 480 +purpos 37 3.332205 0 0 481 +field 37 3.332205 0 0 482 +feel 37 3.332205 0 0 483 +expect 37 3.332205 0 0 484 +connect 37 3.332205 0 0 485 +sciencesunivers 37 3.332205 0 0 486 +china 37 3.332205 0 0 487 +procedur 36 3.367296 0 0 488 +download 36 3.367296 0 0 489 +staff 36 3.367296 0 0 490 +ofth 36 3.367296 0 0 491 +tree 36 3.367296 0 0 492 +multi 36 3.367296 0 0 493 +soon 36 3.367296 0 0 494 +copyright 36 3.367296 0 0 495 +especi 36 3.367296 0 0 496 +robot 36 3.367296 0 0 497 +game 36 3.367296 0 0 498 +short 36 3.367296 0 0 499 +winter 36 3.367296 0 0 500 +concurr 34 3.401197 0 0 501 +return 34 3.401197 0 0 502 +print 34 3.401197 0 0 503 +manual 35 3.401197 0 0 504 +post 35 3.401197 0 0 505 +either 35 3.401197 0 0 506 +jame 35 3.401197 0 0 507 +word 34 3.401197 0 0 508 +approxim 35 3.401197 0 0 509 +singl 34 3.401197 0 0 510 +random 34 3.401197 0 0 511 +represent 35 3.401197 0 0 512 +survei 35 3.401197 0 0 513 +michael 35 3.401197 1 0 514 +tech 35 3.401197 0 0 515 +least 35 3.401197 0 0 516 +next 34 3.401197 0 0 517 +bibliographi 34 3.401197 0 0 518 +everi 34 3.401197 0 0 519 +global 34 3.401197 0 0 520 +statist 35 3.401197 0 0 521 +committe 34 3.401197 0 0 522 +award 34 3.401197 0 0 523 +queri 33 3.433987 0 0 524 +within 33 3.433987 0 0 525 +taught 33 3.433987 0 0 526 +product 33 3.433987 0 0 527 +board 33 3.433987 0 0 528 +go 33 3.433987 0 0 529 +articl 33 3.433987 0 0 530 +eduoffic 33 3.433987 0 0 531 +john 33 3.433987 0 0 532 +toler 33 3.433987 0 0 533 +obtain 33 3.433987 0 0 534 +curriculum 33 3.433987 0 0 535 +chapter 32 3.465736 0 0 536 +concept 32 3.465736 0 0 537 +given 32 3.465736 0 0 538 +extend 32 3.465736 0 0 539 +express 32 3.465736 1 0 540 +kind 32 3.465736 0 0 541 +transform 32 3.465736 0 0 542 +collabor 32 3.465736 0 0 543 +ad 32 3.465736 0 0 544 +idea 32 3.465736 0 0 545 +human 32 3.465736 1 0 546 +fault 32 3.465736 0 0 547 +independ 32 3.465736 0 0 548 +dissert 32 3.465736 0 0 549 +india 32 3.465736 0 0 550 +often 31 3.496508 0 0 551 +posit 31 3.496508 0 0 552 +storag 31 3.496508 0 0 553 +someth 31 3.496508 0 0 554 +taken 31 3.496508 0 0 555 +titl 31 3.496508 0 0 556 +anim 31 3.496508 0 0 557 +autumn 31 3.496508 0 0 558 +richard 31 3.496508 0 0 559 +scientist 31 3.496508 0 0 560 +photo 31 3.496508 0 0 561 +computersci 30 3.555348 0 0 562 +hard 30 3.555348 0 0 563 +domain 30 3.555348 0 0 564 +rang 30 3.555348 0 0 565 +abl 30 3.555348 0 0 566 +robert 30 3.555348 0 0 567 +specifi 30 3.555348 0 0 568 +exist 30 3.555348 0 0 569 +compon 30 3.555348 0 0 570 +focu 30 3.555348 0 0 571 +produc 30 3.555348 0 0 572 +power 30 3.555348 0 0 573 +common 30 3.555348 0 0 574 +option 30 3.555348 0 0 575 +graph 30 3.555348 0 0 576 +secur 30 3.555348 0 0 577 +neural 30 3.555348 0 0 578 +travel 30 3.555348 0 0 579 +postal 30 3.555348 0 0 580 +profil 30 3.555348 0 0 581 +quot 29 3.583519 0 0 582 +depend 29 3.583519 0 0 583 +focus 29 3.583519 0 0 584 +limit 29 3.583519 0 0 585 +turn 29 3.583519 0 0 586 +semant 29 3.583519 0 0 587 +synchron 29 3.583519 0 0 588 +particip 29 3.583519 0 0 589 +consid 29 3.583519 0 0 590 +platform 29 3.583519 0 0 591 +built 29 3.583519 0 0 592 +art 29 3.583519 0 0 593 +steve 29 3.583519 0 0 594 +chines 29 3.583519 0 0 595 +chair 29 3.583519 0 0 596 +ask 28 3.610918 0 0 597 +progress 28 3.610918 0 0 598 +intend 28 3.610918 0 0 599 +held 28 3.610918 0 0 600 +load 28 3.610918 0 0 601 +propos 28 3.610918 0 0 602 +becom 28 3.610918 0 0 603 +actual 28 3.610918 0 0 604 +multiprocessor 28 3.610918 0 0 605 +framework 28 3.610918 0 0 606 +except 28 3.610918 0 0 607 +usual 28 3.610918 0 0 608 +measur 28 3.610918 0 0 609 +hope 28 3.610918 0 0 610 +pass 28 3.610918 0 0 611 +cluster 28 3.610918 0 0 612 +scale 28 3.610918 0 0 613 +packag 28 3.610918 0 0 614 +full 28 3.610918 0 0 615 +releas 28 3.610918 0 0 616 +univ 28 3.610918 0 0 617 +weather 28 3.610918 0 0 618 +static 27 3.637586 0 0 619 +symbol 27 3.637586 0 0 620 +retriev 27 3.637586 1 0 621 +though 27 3.637586 0 0 622 +campu 27 3.637586 0 0 623 +manipul 27 3.637586 0 0 624 +team 27 3.637586 0 0 625 +great 27 3.637586 0 0 626 +arrai 27 3.637586 0 0 627 +administr 27 3.637586 0 0 628 +utc 27 3.637586 0 0 629 +determin 27 3.637586 0 0 630 +linux 27 3.637586 0 0 631 +mind 27 3.637586 0 0 632 +quit 27 3.637586 0 0 633 +american 27 3.637586 0 0 634 +request 26 3.688879 0 0 635 +constraint 26 3.688879 0 0 636 +relev 26 3.688879 0 0 637 +rule 26 3.688879 0 0 638 +bookmark 26 3.688879 0 0 639 +revis 26 3.688879 0 0 640 +altern 26 3.688879 0 0 641 +rather 26 3.688879 0 0 642 +session 26 3.688879 0 0 643 +enhanc 26 3.688879 0 0 644 +experiment 26 3.688879 0 0 645 +detect 26 3.688879 0 0 646 +subject 26 3.688879 0 0 647 +compar 26 3.688879 0 0 648 +proc 26 3.688879 0 0 649 +comp 26 3.688879 0 0 650 +consist 26 3.688879 0 0 651 +effort 26 3.688879 0 0 652 +challeng 26 3.688879 0 0 653 +mine 26 3.688879 0 0 654 +enabl 26 3.688879 0 0 655 +repres 26 3.688879 0 0 656 +berkelei 26 3.688879 0 0 657 +pagecs 26 3.688879 0 0 658 +bound 26 3.688879 0 0 659 +enjoi 26 3.688879 0 0 660 +fundament 25 3.737670 0 0 661 +wai 25 3.737670 0 0 662 +aspect 25 3.737670 0 0 663 +background 25 3.737670 0 0 664 +valu 25 3.737670 0 0 665 +concern 25 3.737670 0 0 666 +although 25 3.737670 0 0 667 +toward 25 3.737670 0 0 668 +primari 25 3.737670 0 0 669 +hill 25 3.737670 0 0 670 +never 25 3.737670 0 0 671 +todai 25 3.737670 0 0 672 +jeff 25 3.737670 0 0 673 +reliabl 25 3.737670 0 0 674 +notic 25 3.737670 0 0 675 +spent 25 3.737670 0 0 676 +trace 25 3.737670 0 0 677 +task 25 3.737670 0 0 678 +client 25 3.737670 0 0 679 +accur 25 3.737670 0 0 680 +supercomput 25 3.737670 0 0 681 +strategi 25 3.737670 0 0 682 +sport 25 3.737670 0 0 683 +frame 24 3.761200 0 0 684 +handl 24 3.761200 0 0 685 +interpret 24 3.761200 0 0 686 +consult 24 3.761200 0 0 687 +reach 24 3.761200 0 0 688 +pattern 24 3.761200 0 0 689 +higher 24 3.761200 0 0 690 +alwai 24 3.761200 0 0 691 +wish 24 3.761200 0 0 692 +store 24 3.761200 0 0 693 +demonstr 24 3.761200 0 0 694 +greg 24 3.761200 0 0 695 +sometim 24 3.761200 0 0 696 +other 24 3.761200 0 0 697 +lab 24 3.761200 0 0 698 +motion 24 3.761200 0 0 699 +flow 24 3.761200 0 0 700 +fellow 24 3.761200 0 0 701 +known 24 3.761200 0 0 702 +mike 24 3.761200 0 0 703 +magazin 24 3.761200 0 0 704 +scalabl 24 3.761200 0 0 705 +daili 24 3.761200 0 0 706 +yahoo 24 3.761200 0 0 707 +seri 24 3.761200 0 0 708 +doctor 24 3.761200 0 0 709 +universityithaca 24 3.761200 0 0 710 +departmentunivers 24 3.761200 0 0 711 +displai 23 3.806662 0 0 712 +size 23 3.806662 0 0 713 +togeth 23 3.806662 0 0 714 +variabl 23 3.806662 0 0 715 +begin 23 3.806662 0 0 716 +initi 23 3.806662 0 0 717 +lead 23 3.806662 0 0 718 +compress 23 3.806662 0 0 719 +proof 23 3.806662 0 0 720 +thank 23 3.806662 0 0 721 +thread 23 3.806662 0 0 722 +recognit 23 3.806662 0 0 723 +equat 23 3.806662 0 0 724 +highli 23 3.806662 0 0 725 +brows 23 3.806662 0 0 726 +input 23 3.806662 0 0 727 +decis 23 3.806662 0 0 728 +honor 23 3.806662 0 0 729 +mobil 23 3.806662 0 0 730 +miscellan 23 3.806662 0 0 731 +head 23 3.806662 0 0 732 +methodolog 23 3.806662 0 0 733 +sequenc 23 3.806662 0 0 734 +famili 23 3.806662 0 0 735 +deal 22 3.850148 0 0 736 +recommend 22 3.850148 0 0 737 +sort 22 3.850148 0 0 738 +tent 22 3.850148 0 0 739 +varieti 22 3.850148 0 0 740 +inth 22 3.850148 0 0 741 +almost 22 3.850148 0 0 742 +period 22 3.850148 0 0 743 +hierarchi 22 3.850148 0 0 744 +sequenti 22 3.850148 0 0 745 +defin 22 3.850148 0 0 746 +disk 22 3.850148 0 0 747 +finish 22 3.850148 0 0 748 +properti 22 3.850148 0 0 749 +springer 22 3.850148 0 0 750 +verlag 22 3.850148 0 0 751 +geometri 22 3.850148 0 0 752 +dai 22 3.850148 0 0 753 +instal 22 3.850148 0 0 754 +emphasi 22 3.850148 0 0 755 +instead 22 3.850148 0 0 756 +cooper 22 3.850148 0 0 757 +serv 22 3.850148 0 0 758 +reduc 22 3.850148 0 0 759 +identifi 22 3.850148 0 0 760 +self 22 3.850148 0 0 761 +color 22 3.850148 0 0 762 +sent 22 3.850148 0 0 763 +try 22 3.850148 0 0 764 +william 22 3.850148 0 0 765 +ofwashington 22 3.850148 0 0 766 +director 22 3.850148 0 0 767 +sciencecornel 22 3.850148 0 0 768 +indian 22 3.850148 0 0 769 +chip 21 3.912023 0 0 770 +similar 21 3.912023 0 0 771 +leav 21 3.912023 0 0 772 +thu 21 3.912023 0 0 773 +util 21 3.912023 0 0 774 +annot 21 3.912023 0 0 775 +half 21 3.912023 0 0 776 +tell 21 3.912023 0 0 777 +path 21 3.912023 0 0 778 +unit 21 3.912023 0 0 779 +fact 21 3.912023 0 0 780 +among 21 3.912023 0 0 781 +programminglanguag 21 3.912023 0 0 782 +newsgroup 21 3.912023 0 0 783 +busi 21 3.912023 0 0 784 +latest 21 3.912023 0 0 785 +theorem 21 3.912023 0 0 786 +viewer 21 3.912023 0 0 787 +output 21 3.912023 0 0 788 +watch 21 3.912023 0 0 789 +wang 21 3.912023 0 0 790 +chen 21 3.912023 0 0 791 +flexibl 21 3.912023 0 0 792 +rout 21 3.912023 0 0 793 +martin 21 3.912023 0 0 794 +vlsi 21 3.912023 0 0 795 +navig 21 3.912023 0 0 796 +theunivers 21 3.912023 0 0 797 +born 21 3.912023 0 0 798 +avoid 21 3.912023 0 0 799 +siam 21 3.912023 0 0 800 +hous 21 3.912023 0 0 801 +corpor 21 3.912023 0 0 802 +divis 21 3.912023 0 0 803 +love 21 3.912023 0 0 804 +fund 21 3.912023 0 0 805 +voic 21 3.912023 0 0 806 +alumni 21 3.912023 0 0 807 +reserv 20 3.951244 0 0 808 +nice 20 3.951244 0 0 809 +minut 20 3.951244 0 0 810 +entir 20 3.951244 0 0 811 +break 20 3.951244 0 0 812 +sure 20 3.951244 0 0 813 +facil 20 3.951244 0 0 814 +wonder 20 3.951244 0 0 815 +longer 20 3.951244 0 0 816 +safeti 20 3.951244 0 0 817 +scheme 20 3.951244 0 0 818 +portabl 20 3.951244 0 0 819 +smith 20 3.951244 0 0 820 +alloc 20 3.951244 0 0 821 +fine 20 3.951244 0 0 822 +binari 20 3.951244 0 0 823 +prepar 20 3.951244 0 0 824 +kernel 20 3.951244 0 0 825 +verif 20 3.951244 0 0 826 +applet 20 3.951244 0 0 827 +basi 20 3.951244 0 0 828 +increas 20 3.951244 0 0 829 +wrote 20 3.951244 0 0 830 +mpeg 20 3.951244 0 0 831 +qualiti 20 3.951244 0 0 832 +expert 20 3.951244 0 0 833 +synthesi 20 3.951244 0 0 834 +toolkit 20 3.951244 0 0 835 +exploit 20 3.951244 0 0 836 +grad 20 3.951244 0 0 837 +tenni 20 3.951244 0 0 838 +department 20 3.951244 0 0 839 +supervis 20 3.951244 0 0 840 +five 19 4.007333 0 0 841 +exercis 19 4.007333 0 0 842 +els 19 4.007333 0 0 843 +separ 19 4.007333 0 0 844 +assum 19 4.007333 0 0 845 +prerequisit 19 4.007333 0 0 846 +thur 19 4.007333 0 0 847 +prove 19 4.007333 0 0 848 +andrew 19 4.007333 0 0 849 +spend 19 4.007333 0 0 850 +left 19 4.007333 0 0 851 +geometr 19 4.007333 0 0 852 +histori 19 4.007333 0 0 853 +feedback 19 4.007333 0 0 854 +predict 19 4.007333 0 0 855 +item 19 4.007333 0 0 856 +log 19 4.007333 0 0 857 +runtim 19 4.007333 0 0 858 +benchmark 19 4.007333 0 0 859 +anderson 19 4.007333 0 0 860 +media 19 4.007333 0 0 861 +boston 19 4.007333 0 0 862 +comparison 19 4.007333 0 0 863 +definit 19 4.007333 0 0 864 +hypertext 19 4.007333 0 0 865 +miss 19 4.007333 0 0 866 +particularli 19 4.007333 0 0 867 +excel 19 4.007333 0 0 868 +mostli 19 4.007333 0 0 869 +eric 19 4.007333 0 0 870 +lyco 19 4.007333 0 0 871 +ever 19 4.007333 0 0 872 +north 19 4.007333 0 0 873 +citi 19 4.007333 0 0 874 +region 19 4.007333 0 0 875 +beij 19 4.007333 0 0 876 +sigmod 19 4.007333 0 0 877 +along 18 4.060443 0 0 878 +accept 18 4.060443 0 0 879 +encourag 18 4.060443 0 0 880 +behavior 18 4.060443 0 0 881 +account 18 4.060443 0 0 882 +appropri 18 4.060443 1 0 883 +scott 18 4.060443 0 0 884 +statu 18 4.060443 0 0 885 +lower 18 4.060443 0 0 886 +minim 18 4.060443 0 0 887 +demo 18 4.060443 0 0 888 +lot 18 4.060443 0 0 889 +record 18 4.060443 0 0 890 +stand 18 4.060443 0 0 891 +less 18 4.060443 0 0 892 +attend 18 4.060443 0 0 893 +offici 18 4.060443 0 0 894 +element 18 4.060443 0 0 895 +event 18 4.060443 0 0 896 +lisp 18 4.060443 0 0 897 +failur 18 4.060443 0 0 898 +seem 18 4.060443 0 0 899 +partial 18 4.060443 0 0 900 +thoma 18 4.060443 0 0 901 +bershad 18 4.060443 0 0 902 +figur 18 4.060443 0 0 903 +aid 18 4.060443 0 0 904 +four 18 4.060443 0 0 905 +concentr 18 4.060443 0 0 906 +listen 18 4.060443 0 0 907 +wind 18 4.060443 0 0 908 +dimension 18 4.060443 0 0 909 +agent 18 4.060443 0 0 910 +speed 18 4.060443 0 0 911 +beauti 18 4.060443 0 0 912 +modif 17 4.110874 0 0 913 +outlin 17 4.110874 0 0 914 +intro 17 4.110874 0 0 915 +anyon 17 4.110874 0 0 916 +attempt 17 4.110874 0 0 917 +whether 17 4.110874 0 0 918 +weekli 17 4.110874 0 0 919 +macintosh 17 4.110874 0 0 920 +differenti 17 4.110874 0 0 921 +otherwis 17 4.110874 0 0 922 +previous 17 4.110874 0 0 923 +stat 17 4.110874 0 0 924 +analyz 17 4.110874 0 0 925 +layer 17 4.110874 0 0 926 +edulast 17 4.110874 0 0 927 +expand 17 4.110874 0 0 928 +regular 17 4.110874 0 0 929 +estim 17 4.110874 0 0 930 +segment 17 4.110874 0 0 931 +repositori 17 4.110874 0 0 932 +matrix 17 4.110874 0 0 933 +adam 17 4.110874 0 0 934 +protect 17 4.110874 0 0 935 +moor 17 4.110874 0 0 936 +interconnect 17 4.110874 0 0 937 +regist 17 4.110874 0 0 938 +coupl 17 4.110874 0 0 939 +whole 17 4.110874 0 0 940 +monitor 17 4.110874 0 0 941 +stop 17 4.110874 0 0 942 +ultim 17 4.110874 0 0 943 +debug 17 4.110874 0 0 944 +thought 17 4.110874 0 0 945 +germani 17 4.110874 0 0 946 +render 17 4.110874 0 0 947 +asplo 17 4.110874 0 0 948 +miller 17 4.110874 0 0 949 +walter 17 4.110874 0 0 950 +white 17 4.110874 0 0 951 +sept 17 4.110874 0 0 952 +steven 17 4.110874 0 0 953 +seek 17 4.110874 0 0 954 +stanford 17 4.110874 0 0 955 +former 17 4.110874 0 0 956 +bachelor 17 4.110874 0 0 957 +medic 17 4.110874 0 0 958 +engineeringunivers 17 4.110874 0 0 959 +practicum 16 4.174387 0 0 960 +vector 16 4.174387 0 0 961 +permit 16 4.174387 0 0 962 +alreadi 16 4.174387 0 0 963 +choos 16 4.174387 0 0 964 +match 16 4.174387 0 0 965 +modern 16 4.174387 0 0 966 +transfer 16 4.174387 0 0 967 +earli 16 4.174387 0 0 968 +easi 16 4.174387 0 0 969 +sign 16 4.174387 0 0 970 +portion 16 4.174387 0 0 971 +ramakrishnan 16 4.174387 0 0 972 +sheet 16 4.174387 0 0 973 +across 16 4.174387 0 0 974 +condit 16 4.174387 0 0 975 +jose 16 4.174387 0 0 976 +brown 16 4.174387 0 0 977 +upon 16 4.174387 0 0 978 +choic 16 4.174387 0 0 979 +zhang 16 4.174387 0 0 980 +letter 16 4.174387 0 0 981 +critic 16 4.174387 0 0 982 +weslei 16 4.174387 0 0 983 +partit 16 4.174387 0 0 984 +explan 16 4.174387 0 0 985 +cognit 16 4.174387 0 0 986 +advantag 16 4.174387 0 0 987 +spatial 16 4.174387 1 0 988 +spars 16 4.174387 0 0 989 +quiz 16 4.174387 0 0 990 +young 16 4.174387 0 0 991 +diego 16 4.174387 0 0 992 +latenc 16 4.174387 0 0 993 +georg 16 4.174387 0 0 994 +normal 16 4.174387 0 0 995 +dilbert 16 4.174387 0 0 996 +chateau 16 4.174387 0 0 997 +anyth 16 4.174387 0 0 998 +fourth 16 4.174387 0 0 999 +intel 16 4.174387 0 0 1000 +brief 16 4.174387 0 0 1001 +devic 16 4.174387 0 0 1002 +misconduct 16 4.174387 0 0 1003 +women 16 4.174387 0 0 1004 +commerci 16 4.174387 0 0 1005 +taiwan 16 4.174387 0 0 1006 +stock 16 4.174387 0 0 1007 +cambridg 16 4.174387 0 0 1008 +hobbi 16 4.174387 0 0 1009 +took 16 4.174387 0 0 1010 +pagec 15 4.248495 0 0 1011 +configur 15 4.248495 0 0 1012 +indic 15 4.248495 0 0 1013 +mayb 15 4.248495 0 0 1014 +stream 15 4.248495 0 0 1015 +capabl 15 4.248495 0 0 1016 +score 15 4.248495 0 0 1017 +hierarch 15 4.248495 0 0 1018 +atth 15 4.248495 0 0 1019 +piec 15 4.248495 0 0 1020 +contribut 15 4.248495 0 0 1021 +side 15 4.248495 0 0 1022 +precis 15 4.248495 0 0 1023 +carl 15 4.248495 0 0 1024 +month 15 4.248495 0 0 1025 +massiv 15 4.248495 0 0 1026 +fortran 15 4.248495 0 0 1027 +charact 15 4.248495 0 0 1028 +track 15 4.248495 0 0 1029 +purchas 15 4.248495 0 0 1030 +micro 15 4.248495 0 0 1031 +ascii 15 4.248495 0 0 1032 +english 15 4.248495 0 0 1033 +reflect 15 4.248495 0 0 1034 +overhead 15 4.248495 0 0 1035 +style 15 4.248495 0 0 1036 +rate 15 4.248495 0 0 1037 +action 15 4.248495 0 0 1038 +novel 15 4.248495 0 0 1039 +enough 15 4.248495 0 0 1040 +remot 15 4.248495 0 0 1041 +princeton 15 4.248495 0 0 1042 +later 15 4.248495 0 0 1043 +goe 15 4.248495 0 0 1044 +webmast 15 4.248495 0 0 1045 +transit 15 4.248495 0 0 1046 +anywai 15 4.248495 0 0 1047 +driven 15 4.248495 0 0 1048 +biologi 15 4.248495 0 0 1049 +susan 15 4.248495 0 0 1050 +todd 15 4.248495 0 0 1051 +drive 15 4.248495 0 0 1052 +livni 15 4.248495 0 0 1053 +psycholog 15 4.248495 0 0 1054 +doesn 15 4.248495 0 0 1055 +photograph 15 4.248495 0 0 1056 +hybrid 15 4.248495 0 0 1057 +club 15 4.248495 0 0 1058 +countri 15 4.248495 0 0 1059 +eduphon 15 4.248495 0 0 1060 +universityof 15 4.248495 0 0 1061 +qual 15 4.248495 0 0 1062 +rank 14 4.317488 0 0 1063 +latex 14 4.317488 0 0 1064 +conduct 14 4.317488 0 0 1065 +train 14 4.317488 0 0 1066 +borland 14 4.317488 0 0 1067 +warn 14 4.317488 0 0 1068 +polynomi 14 4.317488 0 0 1069 +floor 14 4.317488 0 0 1070 +hopefulli 14 4.317488 0 0 1071 +convent 14 4.317488 0 0 1072 +demand 14 4.317488 0 0 1073 +manner 14 4.317488 0 0 1074 +decid 14 4.317488 0 0 1075 +consider 14 4.317488 0 0 1076 +easili 14 4.317488 0 0 1077 +split 14 4.317488 0 0 1078 +happi 14 4.317488 0 0 1079 +shown 14 4.317488 0 0 1080 +matlab 14 4.317488 0 0 1081 +squar 14 4.317488 0 0 1082 +command 14 4.317488 0 0 1083 +classic 14 4.317488 0 0 1084 +draft 14 4.317488 0 0 1085 +draw 14 4.317488 0 0 1086 +stori 14 4.317488 0 0 1087 +achiev 14 4.317488 0 0 1088 +topolog 14 4.317488 0 0 1089 +heterogen 14 4.317488 0 0 1090 +near 14 4.317488 0 0 1091 +attribut 14 4.317488 0 0 1092 +levi 14 4.317488 0 0 1093 +audio 14 4.317488 0 0 1094 +francisco 14 4.317488 0 0 1095 +incomput 14 4.317488 0 0 1096 +reprint 14 4.317488 0 0 1097 +dave 14 4.317488 0 0 1098 +save 14 4.317488 0 0 1099 +anonym 14 4.317488 0 0 1100 +role 14 4.317488 0 0 1101 +embed 14 4.317488 0 0 1102 +comic 14 4.317488 0 0 1103 +dean 14 4.317488 0 0 1104 +hong 14 4.317488 0 0 1105 +finit 14 4.317488 0 0 1106 +nonlinear 14 4.317488 0 0 1107 +chuck 14 4.317488 0 0 1108 +coher 14 4.317488 0 0 1109 +miron 14 4.317488 0 0 1110 +doit 14 4.317488 0 0 1111 +balanc 14 4.317488 0 0 1112 +trip 14 4.317488 0 0 1113 +scene 14 4.317488 0 0 1114 +camera 14 4.317488 0 0 1115 +horu 14 4.317488 0 0 1116 +becam 14 4.317488 0 0 1117 +massachusett 14 4.317488 0 0 1118 +researchmi 14 4.317488 0 0 1119 +senior 14 4.317488 0 0 1120 +spin 14 4.317488 0 0 1121 +convert 13 4.382027 0 0 1122 +social 13 4.382027 0 0 1123 +misc 13 4.382027 0 0 1124 +signific 13 4.382027 0 0 1125 +individu 13 4.382027 0 0 1126 +recurs 13 4.382027 0 0 1127 +someon 13 4.382027 0 0 1128 +suit 13 4.382027 0 0 1129 +joint 13 4.382027 0 0 1130 +circuit 13 4.382027 0 0 1131 +difficulti 13 4.382027 0 0 1132 +thorsten 13 4.382027 0 0 1133 +eicken 13 4.382027 0 0 1134 +automata 13 4.382027 0 0 1135 +dbm 13 4.382027 0 0 1136 +essenti 13 4.382027 0 0 1137 +step 13 4.382027 0 0 1138 +introduc 13 4.382027 0 0 1139 +earlier 13 4.382027 0 0 1140 +directli 13 4.382027 0 0 1141 +larri 13 4.382027 0 0 1142 +emac 13 4.382027 0 0 1143 +cannot 13 4.382027 0 0 1144 +deriv 13 4.382027 0 0 1145 +alan 13 4.382027 0 0 1146 +necessari 13 4.382027 0 0 1147 +everyon 13 4.382027 0 0 1148 +charl 13 4.382027 0 0 1149 +composit 13 4.382027 0 0 1150 +quizz 13 4.382027 0 0 1151 +carri 13 4.382027 0 0 1152 +context 13 4.382027 0 0 1153 +front 13 4.382027 0 0 1154 +prolog 13 4.382027 0 0 1155 +menu 13 4.382027 0 0 1156 +station 13 4.382027 0 0 1157 +canada 13 4.382027 0 0 1158 +weak 13 4.382027 0 0 1159 +central 13 4.382027 0 0 1160 +opportun 13 4.382027 0 0 1161 +johnson 13 4.382027 0 0 1162 +incorpor 13 4.382027 0 0 1163 +translat 13 4.382027 0 0 1164 +discret 13 4.382027 0 0 1165 +whose 13 4.382027 0 0 1166 +philosophi 13 4.382027 0 0 1167 +wait 13 4.382027 0 0 1168 +everyth 13 4.382027 0 0 1169 +unfortun 13 4.382027 0 0 1170 +script 13 4.382027 0 0 1171 +resolut 13 4.382027 0 0 1172 +sigmetr 13 4.382027 0 0 1173 +jonathan 13 4.382027 0 0 1174 +sai 13 4.382027 0 0 1175 +karlin 13 4.382027 0 0 1176 +care 13 4.382027 0 0 1177 +bodi 13 4.382027 0 0 1178 +mellon 13 4.382027 0 0 1179 +nick 13 4.382027 0 0 1180 +conf 13 4.382027 0 0 1181 +coordin 13 4.382027 0 0 1182 +block 13 4.382027 0 0 1183 +econom 13 4.382027 0 0 1184 +primarili 13 4.382027 0 0 1185 +forth 13 4.382027 0 0 1186 +believ 13 4.382027 0 0 1187 +nasa 13 4.382027 0 0 1188 +brother 13 4.382027 0 0 1189 +sigplan 13 4.382027 0 0 1190 +pretti 13 4.382027 0 0 1191 +galleri 13 4.382027 0 0 1192 +edui 13 4.382027 0 0 1193 +affili 13 4.382027 0 0 1194 +tsinghua 13 4.382027 0 0 1195 +wife 13 4.382027 0 0 1196 +came 13 4.382027 0 0 1197 +usavoic 13 4.382027 0 0 1198 +hotlist 13 4.382027 0 0 1199 +washingtonbox 13 4.382027 0 0 1200 +prelim 12 4.465908 0 0 1201 +huang 12 4.465908 1 0 1202 +calculu 12 4.465908 0 0 1203 +weight 12 4.465908 0 0 1204 +skill 12 4.465908 0 0 1205 +iter 12 4.465908 0 0 1206 +assembl 12 4.465908 0 0 1207 +amount 12 4.465908 0 0 1208 +grow 12 4.465908 0 0 1209 +workload 12 4.465908 0 0 1210 +buffer 12 4.465908 0 0 1211 +raghu 12 4.465908 0 0 1212 +pascal 12 4.465908 0 0 1213 +meng 12 4.465908 0 0 1214 +stai 12 4.465908 0 0 1215 +grant 12 4.465908 0 0 1216 +rememb 12 4.465908 0 0 1217 +onth 12 4.465908 0 0 1218 +outsid 12 4.465908 0 0 1219 +guest 12 4.465908 0 0 1220 +optic 12 4.465908 0 0 1221 +speech 12 4.465908 0 0 1222 +evolv 12 4.465908 0 0 1223 +holidai 12 4.465908 0 0 1224 +remov 12 4.465908 0 0 1225 +bruce 12 4.465908 0 0 1226 +tune 12 4.465908 0 0 1227 +uniqu 12 4.465908 0 0 1228 +asynchron 12 4.465908 0 0 1229 +addison 12 4.465908 0 0 1230 +replic 12 4.465908 0 0 1231 +captur 12 4.465908 0 0 1232 +daniel 12 4.465908 0 0 1233 +infrastructur 12 4.465908 0 0 1234 +promot 12 4.465908 0 0 1235 +deduct 12 4.465908 0 0 1236 +minor 12 4.465908 0 0 1237 +solari 12 4.465908 0 0 1238 +count 12 4.465908 0 0 1239 +usenix 12 4.465908 0 0 1240 +gupta 12 4.465908 0 0 1241 +readi 12 4.465908 0 0 1242 +scan 12 4.465908 0 0 1243 +philadelphia 12 4.465908 0 0 1244 +shape 12 4.465908 0 0 1245 +reader 12 4.465908 0 0 1246 +hypermedia 12 4.465908 0 0 1247 +duli 12 4.465908 0 0 1248 +web 12 4.465908 0 0 1249 +permanentlymov 12 4.465908 0 0 1250 +permanentlyth 12 4.465908 0 0 1251 +loew 12 4.465908 0 0 1252 +hank 12 4.465908 0 0 1253 +overal 12 4.465908 0 0 1254 +denis 12 4.465908 0 0 1255 +nanci 12 4.465908 0 0 1256 +characterist 12 4.465908 0 0 1257 +readabl 12 4.465908 0 0 1258 +rest 12 4.465908 0 0 1259 +carnegi 12 4.465908 0 0 1260 +verifi 12 4.465908 0 0 1261 +insid 12 4.465908 0 0 1262 +neat 12 4.465908 0 0 1263 +brad 12 4.465908 0 0 1264 +kenneth 12 4.465908 0 0 1265 +mari 12 4.465908 0 0 1266 +vectra 12 4.465908 0 0 1267 +calcul 12 4.465908 0 0 1268 +savitch 12 4.465908 0 0 1269 +dewitt 12 4.465908 0 0 1270 +robust 12 4.465908 0 0 1271 +realiti 12 4.465908 0 0 1272 +land 12 4.465908 0 0 1273 +safe 12 4.465908 0 0 1274 +pageif 12 4.465908 0 0 1275 +franc 12 4.465908 0 0 1276 +avenu 12 4.465908 0 0 1277 +danc 12 4.465908 0 0 1278 +went 12 4.465908 0 0 1279 +newspap 12 4.465908 0 0 1280 +walk 12 4.465908 0 0 1281 +target 12 4.465908 0 0 1282 +speak 12 4.465908 0 0 1283 +emploi 12 4.465908 0 0 1284 +food 12 4.465908 0 0 1285 +entertain 12 4.465908 0 0 1286 +career 12 4.465908 0 0 1287 +touch 12 4.465908 0 0 1288 +basketbal 12 4.465908 0 0 1289 +fromindividu 12 4.465908 0 0 1290 +employ 12 4.465908 0 0 1291 +anna 12 4.465908 0 0 1292 +systemsc 11 4.553877 0 0 1293 +worth 11 4.553877 0 0 1294 +summar 11 4.553877 0 0 1295 +benjamin 11 4.553877 0 0 1296 +bill 11 4.553877 0 0 1297 +submiss 11 4.553877 0 0 1298 +valid 11 4.553877 0 0 1299 +literatur 11 4.553877 0 0 1300 +surf 11 4.553877 0 0 1301 +broad 11 4.553877 0 0 1302 +appl 11 4.553877 0 0 1303 +induct 11 4.553877 0 0 1304 +sens 11 4.553877 0 0 1305 +host 11 4.553877 0 0 1306 +tour 11 4.553877 0 0 1307 +tue 11 4.553877 0 0 1308 +regard 11 4.553877 0 0 1309 +loop 11 4.553877 0 0 1310 +chri 11 4.553877 0 0 1311 +extra 11 4.553877 0 0 1312 +statement 11 4.553877 0 0 1313 +evolut 11 4.553877 0 0 1314 +multithread 11 4.553877 0 0 1315 +peter 11 4.553877 0 0 1316 +primit 11 4.553877 0 0 1317 +branch 11 4.553877 0 0 1318 +night 11 4.553877 0 0 1319 +clock 11 4.553877 0 0 1320 +denni 11 4.553877 0 0 1321 +instanc 11 4.553877 0 0 1322 +faster 11 4.553877 0 0 1323 +vladimir 11 4.553877 0 0 1324 +transpar 11 4.553877 0 0 1325 +node 11 4.553877 0 0 1326 +fix 11 4.553877 0 0 1327 +noth 11 4.553877 0 0 1328 +excit 11 4.553877 0 0 1329 +extrem 11 4.553877 0 0 1330 +eight 11 4.553877 0 0 1331 +perl 11 4.553877 0 0 1332 +reness 11 4.553877 0 0 1333 +impact 11 4.553877 0 0 1334 +cycl 11 4.553877 0 0 1335 +rice 11 4.553877 0 0 1336 +see 11 4.553877 0 0 1337 +regularli 11 4.553877 0 0 1338 +nonprofit 11 4.553877 0 0 1339 +string 11 4.553877 0 0 1340 +abil 11 4.553877 0 0 1341 +stephen 11 4.553877 0 0 1342 +probabilist 11 4.553877 0 0 1343 +pagewelcom 11 4.553877 0 0 1344 +island 11 4.553877 0 0 1345 +motiv 11 4.553877 0 0 1346 +volum 11 4.553877 0 0 1347 +alpha 11 4.553877 0 0 1348 +fill 11 4.553877 0 0 1349 +thedepart 11 4.553877 0 0 1350 +mesh 11 4.553877 0 0 1351 +smart 11 4.553877 0 0 1352 +baer 11 4.553877 0 0 1353 +isca 11 4.553877 0 0 1354 +wood 11 4.553877 0 0 1355 +keyword 11 4.553877 0 0 1356 +distinguish 11 4.553877 0 0 1357 +magic 11 4.553877 0 0 1358 +arbitrari 11 4.553877 0 0 1359 +typic 11 4.553877 0 0 1360 +market 11 4.553877 0 0 1361 +qualit 11 4.553877 0 0 1362 +refin 11 4.553877 0 0 1363 +council 11 4.553877 0 0 1364 +bandwidth 11 4.553877 0 0 1365 +israel 11 4.553877 0 0 1366 +persist 11 4.553877 0 0 1367 +michigan 11 4.553877 0 0 1368 +arpa 11 4.553877 0 0 1369 +america 11 4.553877 0 0 1370 +player 11 4.553877 0 0 1371 +perman 11 4.553877 0 1 1372 +lake 11 4.553877 0 0 1373 +road 11 4.553877 0 0 1374 +scienceat 11 4.553877 0 0 1375 +mapl 11 4.553877 0 0 1376 +shore 11 4.553877 0 0 1377 +itali 11 4.553877 0 0 1378 +moment 11 4.553877 0 0 1379 +song 11 4.553877 0 0 1380 +cheng 10 4.653960 0 0 1381 +correspond 10 4.653960 0 0 1382 +relationship 10 4.653960 0 0 1383 +forc 10 4.653960 0 0 1384 +werner 10 4.653960 0 0 1385 +queue 10 4.653960 0 0 1386 +sundai 10 4.653960 0 0 1387 +arithmet 10 4.653960 0 0 1388 +stack 10 4.653960 0 0 1389 +success 10 4.653960 0 0 1390 +guarante 10 4.653960 0 0 1391 +modular 10 4.653960 0 0 1392 +certain 10 4.653960 0 0 1393 +linda 10 4.653960 0 0 1394 +cheat 10 4.653960 0 0 1395 +rich 10 4.653960 0 0 1396 +princip 10 4.653960 0 0 1397 +total 10 4.653960 0 0 1398 +matric 10 4.653960 0 0 1399 +length 10 4.653960 0 0 1400 +operatingsystem 10 4.653960 0 0 1401 +nuprl 10 4.653960 0 0 1402 +errata 10 4.653960 0 0 1403 +tradit 10 4.653960 0 0 1404 +penalti 10 4.653960 0 0 1405 +strongli 10 4.653960 0 0 1406 +hello 10 4.653960 0 0 1407 +thecomput 10 4.653960 0 0 1408 +genet 10 4.653960 0 0 1409 +underli 10 4.653960 0 0 1410 +reli 10 4.653960 0 0 1411 +facilit 10 4.653960 0 0 1412 +sentenc 10 4.653960 0 0 1413 +custom 10 4.653960 0 0 1414 +packet 10 4.653960 0 0 1415 +sosp 10 4.653960 0 0 1416 +henri 10 4.653960 0 0 1417 +black 10 4.653960 0 0 1418 +hint 10 4.653960 0 0 1419 +placement 10 4.653960 0 0 1420 +traffic 10 4.653960 0 0 1421 +true 10 4.653960 0 0 1422 +label 10 4.653960 0 0 1423 +equal 10 4.653960 0 0 1424 +subset 10 4.653960 0 0 1425 +mosaic 10 4.653960 0 0 1426 +engr 10 4.653960 0 0 1427 +invit 10 4.653960 0 0 1428 +tanimoto 10 4.653960 0 0 1429 +bring 10 4.653960 0 0 1430 +catalog 10 4.653960 0 0 1431 +mainli 10 4.653960 0 0 1432 +fpga 10 4.653960 0 0 1433 +modul 10 4.653960 0 0 1434 +card 10 4.653960 0 0 1435 +franklin 10 4.653960 0 0 1436 +perspect 10 4.653960 0 0 1437 +prior 10 4.653960 0 0 1438 +decomposit 10 4.653960 0 0 1439 +jean 10 4.653960 0 0 1440 +santa 10 4.653960 0 0 1441 +ofcomput 10 4.653960 0 0 1442 +guess 10 4.653960 0 0 1443 +packard 10 4.653960 0 0 1444 +desktop 10 4.653960 0 0 1445 +wendt 10 4.653960 0 0 1446 +awai 10 4.653960 0 0 1447 +grain 10 4.653960 0 0 1448 +paragraph 10 4.653960 0 0 1449 +naughton 10 4.653960 0 0 1450 +devis 10 4.653960 0 0 1451 +vista 10 4.653960 0 0 1452 +rapid 10 4.653960 0 0 1453 +enter 10 4.653960 0 0 1454 +metacrawl 10 4.653960 0 0 1455 +mountain 10 4.653960 0 0 1456 +death 10 4.653960 0 0 1457 +town 10 4.653960 0 0 1458 +equip 10 4.653960 0 0 1459 +fellowship 10 4.653960 0 0 1460 +resid 10 4.653960 0 0 1461 +interestsmi 10 4.653960 0 0 1462 +earth 10 4.653960 0 0 1463 +cook 10 4.653960 0 0 1464 +acquisit 10 4.653960 0 0 1465 +purdu 10 4.653960 0 0 1466 +consortium 10 4.653960 0 0 1467 +bike 10 4.653960 0 0 1468 +shop 10 4.653960 0 0 1469 +vldb 10 4.653960 0 0 1470 +ski 10 4.653960 0 0 1471 +mepost 10 4.653960 0 0 1472 +edutelephon 10 4.653960 0 0 1473 +recoveri 9 4.753590 0 0 1474 +recit 9 4.753590 0 0 1475 +admin 9 4.753590 0 0 1476 +criteria 9 4.753590 0 0 1477 +correctli 9 4.753590 0 0 1478 +introductori 9 4.753590 0 0 1479 +preliminari 9 4.753590 0 0 1480 +andth 9 4.753590 0 0 1481 +kevin 9 4.753590 0 0 1482 +herefor 9 4.753590 0 0 1483 +morgan 9 4.753590 0 0 1484 +familiar 9 4.753590 0 0 1485 +suitabl 9 4.753590 0 0 1486 +rel 9 4.753590 0 0 1487 +clear 9 4.753590 0 0 1488 +notat 9 4.753590 0 0 1489 +entitl 9 4.753590 0 0 1490 +prefer 9 4.753590 0 0 1491 +mode 9 4.753590 0 0 1492 +debugg 9 4.753590 0 0 1493 +strength 9 4.753590 0 0 1494 +plain 9 4.753590 0 0 1495 +equival 9 4.753590 0 0 1496 +intermedi 9 4.753590 0 0 1497 +pick 9 4.753590 0 0 1498 +hang 9 4.753590 0 0 1499 +distanc 9 4.753590 0 0 1500 +face 9 4.753590 0 0 1501 +deadlin 9 4.753590 0 0 1502 +pair 9 4.753590 0 0 1503 +float 9 4.753590 0 0 1504 +meta 9 4.753590 0 0 1505 +kumar 9 4.753590 0 0 1506 +russel 9 4.753590 0 0 1507 +significantli 9 4.753590 0 0 1508 +exact 9 4.753590 0 0 1509 +donald 9 4.753590 0 0 1510 +french 9 4.753590 0 0 1511 +cryptographi 9 4.753590 0 0 1512 +imposs 9 4.753590 0 0 1513 +assumpt 9 4.753590 0 0 1514 +informationabout 9 4.753590 0 0 1515 +classmat 9 4.753590 0 0 1516 +doug 9 4.753590 0 0 1517 +calvin 9 4.753590 0 0 1518 +tuth 9 4.753590 0 0 1519 +moonei 9 4.753590 0 0 1520 +novak 9 4.753590 0 0 1521 +latter 9 4.753590 0 0 1522 +risto 9 4.753590 0 0 1523 +sister 9 4.753590 0 0 1524 +explicit 9 4.753590 1 0 1525 +declar 9 4.753590 0 0 1526 +compos 9 4.753590 0 0 1527 +hundr 9 4.753590 0 0 1528 +robbert 9 4.753590 0 0 1529 +inter 9 4.753590 0 0 1530 +birman 9 4.753590 0 0 1531 +establish 9 4.753590 0 0 1532 +light 9 4.753590 0 0 1533 +osdi 9 4.753590 0 0 1534 +pose 9 4.753590 0 0 1535 +wilson 9 4.753590 0 0 1536 +classifi 9 4.753590 0 0 1537 +weld 9 4.753590 0 0 1538 +departmentof 9 4.753590 0 0 1539 +leveson 9 4.753590 0 0 1540 +subscrib 9 4.753590 0 0 1541 +desir 9 4.753590 0 0 1542 +mainten 9 4.753590 0 0 1543 +factor 9 4.753590 0 0 1544 +respect 9 4.753590 0 0 1545 +motorola 9 4.753590 0 0 1546 +cecil 9 4.753590 0 0 1547 +kurt 9 4.753590 0 0 1548 +routin 9 4.753590 0 0 1549 +login 9 4.753590 0 0 1550 +lock 9 4.753590 0 0 1551 +tutor 9 4.753590 0 0 1552 +wall 9 4.753590 0 0 1553 +patterson 9 4.753590 0 0 1554 +minimum 9 4.753590 0 0 1555 +vernon 9 4.753590 0 0 1556 +voelker 9 4.753590 0 0 1557 +elimin 9 4.753590 0 0 1558 +bart 9 4.753590 0 0 1559 +laru 9 4.753590 0 0 1560 +seven 9 4.753590 0 0 1561 +discov 9 4.753590 0 0 1562 +didn 9 4.753590 0 0 1563 +informationemail 9 4.753590 0 0 1564 +telecommun 9 4.753590 0 0 1565 +unusu 9 4.753590 0 0 1566 +end 9 4.753590 0 0 1567 +frank 9 4.753590 0 0 1568 +mention 9 4.753590 0 0 1569 +mangasarian 9 4.753590 0 0 1570 +said 9 4.753590 0 0 1571 +occur 9 4.753590 0 0 1572 +dyer 9 4.753590 0 0 1573 +surfac 9 4.753590 0 0 1574 +incomplet 9 4.753590 0 0 1575 +leader 9 4.753590 0 0 1576 +screen 9 4.753590 0 0 1577 +observ 9 4.753590 0 0 1578 +rhode 9 4.753590 0 0 1579 +juan 9 4.753590 0 0 1580 +govern 9 4.753590 0 0 1581 +charg 9 4.753590 0 0 1582 +trust 9 4.753590 0 0 1583 +tempor 9 4.753590 0 0 1584 +utah 9 4.753590 0 0 1585 +classif 9 4.753590 0 0 1586 +jersei 9 4.753590 0 0 1587 +transmiss 9 4.753590 0 0 1588 +undergrad 9 4.753590 0 0 1589 +palo 9 4.753590 0 0 1590 +alto 9 4.753590 0 0 1591 +congress 9 4.753590 0 0 1592 +linguist 9 4.753590 0 0 1593 +softbal 9 4.753590 0 0 1594 +conferenceon 9 4.753590 0 0 1595 +poetri 9 4.753590 0 0 1596 +folk 9 4.753590 0 0 1597 +volleybal 9 4.753590 0 0 1598 +swim 9 4.753590 0 0 1599 +usaphon 9 4.753590 0 0 1600 +yellow 9 4.753590 0 0 1601 +kong 9 4.753590 0 0 1602 +jump 9 4.753590 0 0 1603 +vice 9 4.753590 0 0 1604 +sound 9 4.753590 0 0 1605 +candid 9 4.753590 0 0 1606 +drink 9 4.753590 0 0 1607 +ball 9 4.753590 0 0 1608 +wong 9 4.753590 0 0 1609 +ataustin 9 4.753590 0 0 1610 +editori 9 4.753590 0 0 1611 +jeffrei 9 4.753590 0 0 1612 +modula 9 4.753590 0 0 1613 +paradyn 9 4.753590 0 0 1614 +tunnel 9 4.753590 0 0 1615 +crash 8 4.875197 0 0 1616 +databasesystem 8 4.875197 0 0 1617 +hash 8 4.875197 0 0 1618 +cum 8 4.875197 0 0 1619 +joke 8 4.875197 0 0 1620 +printer 8 4.875197 0 0 1621 +vogel 8 4.875197 0 0 1622 +andcomput 8 4.875197 0 0 1623 +architect 8 4.875197 0 0 1624 +dylan 8 4.875197 0 0 1625 +simpli 8 4.875197 0 0 1626 +matter 8 4.875197 0 0 1627 +on 8 4.875197 0 0 1628 +combinatori 8 4.875197 0 0 1629 +ideal 8 4.875197 0 0 1630 +leon 8 4.875197 0 0 1631 +rivl 8 4.875197 0 0 1632 +evan 8 4.875197 0 0 1633 +manufactur 8 4.875197 0 0 1634 +competit 8 4.875197 0 0 1635 +depth 8 4.875197 0 0 1636 +contrast 8 4.875197 0 0 1637 +brain 8 4.875197 0 0 1638 +vineet 8 4.875197 0 0 1639 +spec 8 4.875197 0 0 1640 +filter 8 4.875197 0 0 1641 +dictionari 8 4.875197 0 0 1642 +closur 8 4.875197 0 0 1643 +judg 8 4.875197 0 0 1644 +hold 8 4.875197 0 0 1645 +absolut 8 4.875197 0 0 1646 +edg 8 4.875197 0 0 1647 +partner 8 4.875197 0 0 1648 +calendar 8 4.875197 0 0 1649 +root 8 4.875197 0 0 1650 +attent 8 4.875197 0 0 1651 +yang 8 4.875197 0 0 1652 +prover 8 4.875197 0 0 1653 +quantit 8 4.875197 0 0 1654 +fail 8 4.875197 0 0 1655 +curv 8 4.875197 0 0 1656 +driver 8 4.875197 0 0 1657 +told 8 4.875197 0 0 1658 +virginia 8 4.875197 0 0 1659 +irvin 8 4.875197 0 0 1660 +reus 8 4.875197 0 0 1661 +paradigm 8 4.875197 0 0 1662 +isol 8 4.875197 0 0 1663 +claim 8 4.875197 0 0 1664 +realist 8 4.875197 0 0 1665 +analys 8 4.875197 0 0 1666 +miikkulainen 8 4.875197 0 0 1667 +replac 8 4.875197 0 0 1668 +mach 8 4.875197 0 0 1669 +inproceed 8 4.875197 0 0 1670 +presenc 8 4.875197 0 0 1671 +transport 8 4.875197 0 0 1672 +convers 8 4.875197 0 0 1673 +pacif 8 4.875197 0 0 1674 +grove 8 4.875197 0 0 1675 +parti 8 4.875197 0 0 1676 +textur 8 4.875197 0 0 1677 +entri 8 4.875197 0 0 1678 +illustr 8 4.875197 0 0 1679 +marc 8 4.875197 0 0 1680 +besid 8 4.875197 0 0 1681 +reload 8 4.875197 0 0 1682 +pagei 8 4.875197 0 0 1683 +ring 8 4.875197 0 0 1684 +upcom 8 4.875197 0 0 1685 +shapiro 8 4.875197 0 0 1686 +insert 8 4.875197 0 0 1687 +integ 8 4.875197 0 0 1688 +risk 8 4.875197 0 0 1689 +potenti 8 4.875197 0 0 1690 +watson 8 4.875197 0 0 1691 +chamber 8 4.875197 0 0 1692 +perhap 8 4.875197 0 0 1693 +satisfi 8 4.875197 0 0 1694 +egger 8 4.875197 0 0 1695 +uniprocessor 8 4.875197 0 0 1696 +simon 8 4.875197 0 0 1697 +joel 8 4.875197 0 0 1698 +readm 8 4.875197 0 0 1699 +lewi 8 4.875197 0 0 1700 +erik 8 4.875197 0 0 1701 +job 8 4.875197 0 0 1702 +cross 8 4.875197 0 0 1703 +pldi 8 4.875197 0 0 1704 +sean 8 4.875197 1 0 1705 +romer 8 4.875197 0 0 1706 +theme 8 4.875197 0 0 1707 +opinion 8 4.875197 0 0 1708 +hewlett 8 4.875197 0 0 1709 +tourist 8 4.875197 0 0 1710 +rais 8 4.875197 0 0 1711 +forget 8 4.875197 0 0 1712 +yanni 8 4.875197 0 0 1713 +ioannidi 8 4.875197 0 0 1714 +ferri 8 4.875197 0 0 1715 +solomon 8 4.875197 0 0 1716 +star 8 4.875197 0 0 1717 +switch 8 4.875197 0 0 1718 +gather 8 4.875197 0 0 1719 +lane 8 4.875197 0 0 1720 +qualifi 8 4.875197 0 0 1721 +empir 8 4.875197 0 0 1722 +polygon 8 4.875197 0 0 1723 +postdoc 8 4.875197 0 0 1724 +xerox 8 4.875197 0 0 1725 +sensit 8 4.875197 0 0 1726 +sigop 8 4.875197 0 0 1727 +extract 8 4.875197 0 0 1728 +heart 8 4.875197 0 0 1729 +gain 8 4.875197 0 0 1730 +babylon 8 4.875197 0 0 1731 +mass 8 4.875197 0 0 1732 +formul 8 4.875197 0 0 1733 +colloquium 8 4.875197 0 0 1734 +academi 8 4.875197 0 0 1735 +poor 8 4.875197 0 0 1736 +presidenti 8 4.875197 0 0 1737 +wayn 8 4.875197 0 0 1738 +realiz 8 4.875197 0 0 1739 +capac 8 4.875197 0 0 1740 +ride 8 4.875197 0 0 1741 +grew 8 4.875197 0 0 1742 +mile 8 4.875197 0 0 1743 +kanpur 8 4.875197 0 0 1744 +gold 8 4.875197 0 0 1745 +coast 8 4.875197 0 0 1746 +wire 8 4.875197 0 0 1747 +invari 8 4.875197 0 0 1748 +autonom 8 4.875197 0 0 1749 +aaai 8 4.875197 0 0 1750 +span 8 4.875197 0 0 1751 +soccer 8 4.875197 0 0 1752 +chao 8 4.875197 0 0 1753 +univeristi 8 4.875197 0 0 1754 +accomplish 8 4.875197 0 0 1755 +researchi 8 4.875197 0 0 1756 +hallcornel 8 4.875197 0 0 1757 +guitar 8 4.875197 0 0 1758 +guggenheim 8 4.875197 0 0 1759 +hockei 8 4.875197 0 0 1760 +film 8 4.875197 0 0 1761 +japan 8 4.875197 0 0 1762 +european 8 4.875197 0 0 1763 +bridg 8 4.875197 0 0 1764 +counter 8 4.875197 0 0 1765 +port 8 4.875197 0 0 1766 +character 8 4.875197 0 0 1767 +irregular 8 4.875197 0 0 1768 +round 8 4.875197 0 0 1769 +madra 8 4.875197 0 0 1770 +elect 8 4.875197 0 0 1771 +router 8 4.875197 0 0 1772 +siggraph 8 4.875197 0 0 1773 +unifi 8 4.875197 0 0 1774 +secretari 8 4.875197 0 0 1775 +pure 8 4.875197 0 0 1776 +creativ 8 4.875197 0 0 1777 +oop 8 4.875197 0 0 1778 +angel 8 4.875197 0 0 1779 +jack 8 4.875197 0 0 1780 +carei 8 4.875197 0 0 1781 +paradis 8 4.875197 0 0 1782 +multiscalar 8 4.875197 0 0 1783 +assistantdepart 8 4.875197 0 0 1784 +attach 7 5.010635 0 0 1785 +ethic 7 5.010635 0 0 1786 +header 7 5.010635 0 0 1787 +earn 7 5.010635 0 0 1788 +justin 7 5.010635 1 0 1789 +happen 7 5.010635 0 0 1790 +dispatch 7 5.010635 0 0 1791 +prioriti 7 5.010635 0 0 1792 +interrupt 7 5.010635 0 0 1793 +saturdai 7 5.010635 0 0 1794 +slightli 7 5.010635 0 0 1795 +paramet 7 5.010635 0 0 1796 +planner 7 5.010635 0 0 1797 +hunt 7 5.010635 0 0 1798 +remind 7 5.010635 0 0 1799 +awar 7 5.010635 0 0 1800 +bug 7 5.010635 0 0 1801 +fromth 7 5.010635 0 0 1802 +seshadri 7 5.010635 0 0 1803 +noon 7 5.010635 0 0 1804 +henc 7 5.010635 0 0 1805 +predic 7 5.010635 0 0 1806 +smile 7 5.010635 0 0 1807 +microprocessor 7 5.010635 0 0 1808 +core 7 5.010635 0 0 1809 +usabl 7 5.010635 0 0 1810 +none 7 5.010635 0 0 1811 +baker 7 5.010635 0 0 1812 +implementationof 7 5.010635 0 0 1813 +poster 7 5.010635 0 0 1814 +trade 7 5.010635 0 0 1815 +explain 7 5.010635 0 0 1816 +exactli 7 5.010635 0 0 1817 +stereo 7 5.010635 0 0 1818 +parametr 7 5.010635 0 0 1819 +ramin 7 5.010635 0 0 1820 +tag 7 5.010635 0 0 1821 +therefor 7 5.010635 0 0 1822 +interpol 7 5.010635 0 0 1823 +newton 7 5.010635 0 0 1824 +elementari 7 5.010635 0 0 1825 +accord 7 5.010635 0 0 1826 +prevent 7 5.010635 0 0 1827 +surpris 7 5.010635 0 0 1828 +chief 7 5.010635 0 0 1829 +pipelin 7 5.010635 0 0 1830 +metric 7 5.010635 0 0 1831 +guidelin 7 5.010635 0 0 1832 +bit 7 5.010635 0 0 1833 +beyond 7 5.010635 0 0 1834 +encrypt 7 5.010635 0 0 1835 +channel 7 5.010635 0 0 1836 +bookstor 7 5.010635 0 0 1837 +prentic 7 5.010635 0 0 1838 +usenet 7 5.010635 0 0 1839 +dead 7 5.010635 0 0 1840 +heavi 7 5.010635 0 0 1841 +trend 7 5.010635 0 0 1842 +dedic 7 5.010635 0 0 1843 +converg 7 5.010635 0 0 1844 +uniform 7 5.010635 0 0 1845 +canb 7 5.010635 0 0 1846 +intellectu 7 5.010635 0 0 1847 +delai 7 5.010635 0 0 1848 +harrick 7 5.010635 0 0 1849 +peterson 7 5.010635 0 0 1850 +migrat 7 5.010635 0 0 1851 +keshav 7 5.010635 0 0 1852 +chiang 7 5.010635 0 0 1853 +supportfor 7 5.010635 0 0 1854 +smooth 7 5.010635 0 0 1855 +misra 7 5.010635 0 0 1856 +conferenc 7 5.010635 0 0 1857 +largest 7 5.010635 0 0 1858 +feelei 7 5.010635 0 0 1859 +sparc 7 5.010635 0 0 1860 +bunch 7 5.010635 0 0 1861 +merg 7 5.010635 0 0 1862 +tip 7 5.010635 0 0 1863 +digest 7 5.010635 0 0 1864 +documentfor 7 5.010635 0 0 1865 +legibl 7 5.010635 0 0 1866 +ghostscript 7 5.010635 0 0 1867 +clip 7 5.010635 0 0 1868 +successfulli 7 5.010635 0 0 1869 +ruth 7 5.010635 0 0 1870 +throughout 7 5.010635 0 0 1871 +fortun 7 5.010635 0 0 1872 +adob 7 5.010635 0 0 1873 +smaller 7 5.010635 0 0 1874 +larger 7 5.010635 0 0 1875 +chan 7 5.010635 0 0 1876 +reduct 7 5.010635 0 0 1877 +portland 7 5.010635 0 0 1878 +craig 7 5.010635 0 0 1879 +foc 7 5.010635 0 0 1880 +shade 7 5.010635 0 0 1881 +uncertainti 7 5.010635 0 0 1882 +whenev 7 5.010635 0 0 1883 +secondari 7 5.010635 0 0 1884 +sweden 7 5.010635 0 0 1885 +friedman 7 5.010635 0 0 1886 +molecular 7 5.010635 0 0 1887 +davi 7 5.010635 0 0 1888 +burger 7 5.010635 0 0 1889 +multicomput 7 5.010635 0 0 1890 +goodman 7 5.010635 0 0 1891 +roger 7 5.010635 0 0 1892 +fischer 7 5.010635 0 0 1893 +spot 7 5.010635 0 0 1894 +refere 7 5.010635 0 0 1895 +zero 7 5.010635 0 0 1896 +suffici 7 5.010635 0 0 1897 +shot 7 5.010635 0 0 1898 +necessarili 7 5.010635 0 0 1899 +pagecomput 7 5.010635 0 0 1900 +isbn 7 5.010635 0 0 1901 +pursu 7 5.010635 0 0 1902 +scout 7 5.010635 0 0 1903 +philosoph 7 5.010635 0 0 1904 +occasion 7 5.010635 0 0 1905 +bottom 7 5.010635 0 0 1906 +compact 7 5.010635 0 0 1907 +lawrenc 7 5.010635 0 0 1908 +corner 7 5.010635 0 0 1909 +signal 7 5.010635 0 0 1910 +solver 7 5.010635 0 0 1911 +footbal 7 5.010635 0 0 1912 +analyt 7 5.010635 0 0 1913 +dataset 7 5.010635 0 0 1914 +discoveri 7 5.010635 0 0 1915 +cornellunivers 7 5.010635 0 0 1916 +sixth 7 5.010635 0 0 1917 +pronounc 7 5.010635 0 0 1918 +foreign 7 5.010635 0 0 1919 +sensor 7 5.010635 0 0 1920 +perfect 7 5.010635 0 0 1921 +gave 7 5.010635 0 0 1922 +synchroni 7 5.010635 0 0 1923 +aris 7 5.010635 0 0 1924 +brought 7 5.010635 0 0 1925 +harvard 7 5.010635 0 0 1926 +densiti 7 5.010635 0 0 1927 +vehicl 7 5.010635 0 0 1928 +boundari 7 5.010635 0 0 1929 +dimens 7 5.010635 0 0 1930 +fifth 7 5.010635 0 0 1931 +pennsylvania 7 5.010635 0 0 1932 +athlet 7 5.010635 0 0 1933 +monei 7 5.010635 0 0 1934 +centuri 7 5.010635 0 0 1935 +apart 7 5.010635 0 0 1936 +morph 7 5.010635 0 0 1937 +pittsburgh 7 5.010635 0 0 1938 +pageth 7 5.010635 0 0 1939 +hear 7 5.010635 0 0 1940 +illinoi 7 5.010635 0 0 1941 +gatewai 7 5.010635 0 0 1942 +daughter 7 5.010635 0 0 1943 +maxim 7 5.010635 0 0 1944 +cricket 7 5.010635 0 0 1945 +marri 7 5.010635 0 0 1946 +notion 7 5.010635 0 0 1947 +bore 7 5.010635 0 0 1948 +christian 7 5.010635 0 0 1949 +hack 7 5.010635 0 0 1950 +cultur 7 5.010635 0 0 1951 +chronicl 7 5.010635 0 0 1952 +courtesi 7 5.010635 0 0 1953 +instrument 7 5.010635 0 0 1954 +ground 7 5.010635 0 0 1955 +eduto 7 5.010635 0 0 1956 +capit 7 5.010635 0 0 1957 +appar 7 5.010635 0 0 1958 +vallei 7 5.010635 0 0 1959 +chanc 7 5.010635 0 0 1960 +montreal 7 5.010635 0 0 1961 +golden 7 5.010635 0 0 1962 +studentcomput 7 5.010635 0 0 1963 +chung 7 5.010635 0 0 1964 +hit 7 5.010635 0 0 1965 +austinaustin 7 5.010635 0 0 1966 +centenni 7 5.010635 0 0 1967 +sciencesat 7 5.010635 0 0 1968 +interestsi 7 5.010635 0 0 1969 +wouldn 7 5.010635 0 0 1970 +iowa 7 5.010635 0 0 1971 +bombai 7 5.010635 0 0 1972 +northwest 7 5.010635 0 0 1973 +softbot 7 5.010635 0 0 1974 +serial 7 5.010635 0 0 1975 +seitz 7 5.010635 0 0 1976 +phase 6 5.164786 0 0 1977 +silberschatz 6 5.164786 0 0 1978 +textual 6 5.164786 0 0 1979 +alphabet 6 5.164786 0 0 1980 +theproject 6 5.164786 0 0 1981 +gopher 6 5.164786 0 0 1982 +huttenloch 6 5.164786 0 0 1983 +chosen 6 5.164786 1 0 1984 +contract 6 5.164786 0 0 1985 +garbag 6 5.164786 0 0 1986 +hidden 6 5.164786 0 0 1987 +schema 6 5.164786 0 0 1988 +consequ 6 5.164786 0 0 1989 +neither 6 5.164786 0 0 1990 +huge 6 5.164786 0 0 1991 +ifyou 6 5.164786 0 0 1992 +beta 6 5.164786 0 0 1993 +lack 6 5.164786 0 0 1994 +tobe 6 5.164786 0 0 1995 +praveen 6 5.164786 0 0 1996 +ture 6 5.164786 0 0 1997 +rubinfeld 6 5.164786 0 0 1998 +price 6 5.164786 0 0 1999 +quickli 6 5.164786 0 0 2000 +vari 6 5.164786 0 0 2001 +troubl 6 5.164786 0 0 2002 +yale 6 5.164786 0 0 2003 +plu 6 5.164786 0 0 2004 +philip 6 5.164786 0 0 2005 +classroom 6 5.164786 0 0 2006 +spline 6 5.164786 0 0 2007 +drop 6 5.164786 0 0 2008 +otherthan 6 5.164786 0 0 2009 +multiprogram 6 5.164786 0 0 2010 +pace 6 5.164786 0 0 2011 +ensur 6 5.164786 0 0 2012 +boyer 6 5.164786 0 0 2013 +freeli 6 5.164786 0 0 2014 +subsystem 6 5.164786 0 0 2015 +risc 6 5.164786 0 0 2016 +put 6 5.164786 0 0 2017 +banerje 6 5.164786 0 0 2018 +assignmentsprogram 6 5.164786 0 0 2019 +sciencesdepart 6 5.164786 0 0 2020 +slate 6 5.164786 0 0 2021 +distributedsystem 6 5.164786 0 0 2022 +arrang 6 5.164786 0 0 2023 +causal 6 5.164786 0 0 2024 +wrong 6 5.164786 0 0 2025 +constitut 6 5.164786 0 0 2026 +forum 6 5.164786 0 0 2027 +mirror 6 5.164786 0 0 2028 +strong 6 5.164786 0 0 2029 +syntax 6 5.164786 0 0 2030 +snow 6 5.164786 0 0 2031 +gordon 6 5.164786 0 0 2032 +thegoal 6 5.164786 0 0 2033 +determinist 6 5.164786 0 0 2034 +difficult 6 5.164786 0 0 2035 +academia 6 5.164786 0 0 2036 +promis 6 5.164786 0 0 2037 +emerg 6 5.164786 0 0 2038 +prefetch 6 5.164786 0 0 2039 +infer 6 5.164786 0 0 2040 +conflict 6 5.164786 0 0 2041 +constrain 6 5.164786 0 0 2042 +variant 6 5.164786 0 0 2043 +affect 6 5.164786 0 0 2044 +carefulli 6 5.164786 0 0 2045 +apolog 6 5.164786 0 0 2046 +nine 6 5.164786 0 0 2047 +onoper 6 5.164786 0 0 2048 +thompson 6 5.164786 0 0 2049 +edward 6 5.164786 0 0 2050 +internationalconfer 6 5.164786 0 0 2051 +versu 6 5.164786 0 0 2052 +jpeg 6 5.164786 0 0 2053 +symposiumon 6 5.164786 0 0 2054 +meant 6 5.164786 0 0 2055 +whichi 6 5.164786 0 0 2056 +indiana 6 5.164786 0 0 2057 +grammar 6 5.164786 0 0 2058 +markup 6 5.164786 0 0 2059 +theclass 6 5.164786 0 0 2060 +handbook 6 5.164786 0 0 2061 +ladner 6 5.164786 0 0 2062 +acrobat 6 5.164786 0 0 2063 +strang 6 5.164786 0 0 2064 +ann 6 5.164786 0 0 2065 +majordomo 6 5.164786 0 0 2066 +transcript 6 5.164786 0 0 2067 +gaetano 6 5.164786 0 0 2068 +creation 6 5.164786 0 0 2069 +deliv 6 5.164786 1 0 2070 +trail 6 5.164786 0 0 2071 +fred 6 5.164786 0 0 2072 +greatest 6 5.164786 0 0 2073 +fewer 6 5.164786 0 0 2074 +sung 6 5.164786 0 0 2075 +silicon 6 5.164786 0 0 2076 +pentium 6 5.164786 0 0 2077 +approv 6 5.164786 0 0 2078 +invok 6 5.164786 0 0 2079 +consensu 6 5.164786 0 0 2080 +tullsen 6 5.164786 0 0 2081 +superscalar 6 5.164786 0 0 2082 +mother 6 5.164786 0 0 2083 +machinelearn 6 5.164786 0 0 2084 +histor 6 5.164786 0 0 2085 +reed 6 5.164786 0 0 2086 +mock 6 5.164786 0 0 2087 +conveni 6 5.164786 0 0 2088 +onto 6 5.164786 0 0 2089 +temporari 6 5.164786 0 0 2090 +chandra 6 5.164786 0 0 2091 +zhou 6 5.164786 0 0 2092 +wolman 6 5.164786 0 0 2093 +legal 6 5.164786 0 0 2094 +highwai 6 5.164786 0 0 2095 +impress 6 5.164786 0 0 2096 +whatev 6 5.164786 0 0 2097 +averag 6 5.164786 0 0 2098 +bestor 6 5.164786 0 0 2099 +sharp 6 5.164786 0 0 2100 +sciencesoffic 6 5.164786 0 0 2101 +teitelbaum 6 5.164786 0 0 2102 +lloyd 6 5.164786 0 0 2103 +skrentni 6 5.164786 0 0 2104 +extern 6 5.164786 0 0 2105 +notifi 6 5.164786 0 0 2106 +rough 6 5.164786 0 0 2107 +byte 6 5.164786 0 0 2108 +olvi 6 5.164786 0 0 2109 +mistak 6 5.164786 0 0 2110 +handi 6 5.164786 0 0 2111 +moder 6 5.164786 0 0 2112 +inequ 6 5.164786 0 0 2113 +proce 6 5.164786 0 0 2114 +polit 6 5.164786 0 0 2115 +viewpoint 6 5.164786 0 0 2116 +gzip 6 5.164786 0 0 2117 +televis 6 5.164786 0 0 2118 +spie 6 5.164786 0 0 2119 +relax 6 5.164786 0 0 2120 +oxford 6 5.164786 0 0 2121 +dens 6 5.164786 0 0 2122 +jude 6 5.164786 0 0 2123 +geoff 6 5.164786 0 0 2124 +heurist 6 5.164786 0 0 2125 +prasad 6 5.164786 0 0 2126 +artist 6 5.164786 0 0 2127 +chat 6 5.164786 0 0 2128 +restrict 6 5.164786 0 0 2129 +alex 6 5.164786 1 0 2130 +atcornel 6 5.164786 0 0 2131 +edumi 6 5.164786 0 0 2132 +sponsor 6 5.164786 0 0 2133 +oren 6 5.164786 0 0 2134 +etzioni 6 5.164786 0 0 2135 +outstand 6 5.164786 0 0 2136 +rain 6 5.164786 0 0 2137 +zabih 6 5.164786 0 0 2138 +corp 6 5.164786 0 0 2139 +maryland 6 5.164786 0 0 2140 +furthermor 6 5.164786 0 0 2141 +carolina 6 5.164786 0 0 2142 +kluwer 6 5.164786 0 0 2143 +privaci 6 5.164786 0 0 2144 +recruit 6 5.164786 0 0 2145 +photographi 6 5.164786 0 0 2146 +biolog 6 5.164786 0 0 2147 +advisori 6 5.164786 0 0 2148 +chicago 6 5.164786 0 0 2149 +prize 6 5.164786 0 0 2150 +nest 6 5.164786 0 0 2151 +brook 6 5.164786 0 0 2152 +invest 6 5.164786 0 0 2153 +patel 6 5.164786 0 0 2154 +simultan 6 5.164786 0 0 2155 +toronto 6 5.164786 0 0 2156 +feasibl 6 5.164786 0 0 2157 +pari 6 5.164786 0 0 2158 +usaoffic 6 5.164786 0 0 2159 +microsystem 6 5.164786 0 0 2160 +railroad 6 5.164786 0 0 2161 +softwareengin 6 5.164786 0 0 2162 +lucki 6 5.164786 0 0 2163 +rock 6 5.164786 0 0 2164 +dream 6 5.164786 0 0 2165 +tri 6 5.164786 0 0 2166 +south 6 5.164786 0 0 2167 +goldstein 6 5.164786 0 0 2168 +peek 6 5.164786 0 0 2169 +reconstruct 6 5.164786 0 0 2170 +forecast 6 5.164786 0 0 2171 +sciencedepart 6 5.164786 0 0 2172 +advis 6 5.164786 0 0 2173 +rebecca 6 5.164786 0 0 2174 +srinivasan 6 5.164786 0 0 2175 +somewher 6 5.164786 0 0 2176 +spare 6 5.164786 0 0 2177 +golf 6 5.164786 0 0 2178 +truth 6 5.164786 0 0 2179 +scholar 6 5.164786 0 0 2180 +postcard 6 5.164786 0 0 2181 +gate 6 5.164786 0 0 2182 +layout 6 5.164786 0 0 2183 +quick 6 5.164786 0 0 2184 +famou 6 5.164786 0 0 2185 +antonio 6 5.164786 0 0 2186 +plane 6 5.164786 0 0 2187 +infoseek 6 5.164786 0 0 2188 +soda 6 5.164786 0 0 2189 +german 6 5.164786 0 0 2190 +southern 6 5.164786 0 0 2191 +nativ 6 5.164786 0 0 2192 +matthew 6 5.164786 0 0 2193 +cat 6 5.164786 0 0 2194 +yeah 6 5.164786 0 0 2195 +presid 6 5.164786 0 0 2196 +financi 6 5.164786 0 0 2197 +band 6 5.164786 0 0 2198 +myresum 6 5.164786 0 0 2199 +mix 6 5.164786 0 0 2200 +piano 6 5.164786 0 0 2201 +seen 6 5.164786 0 0 2202 +strip 6 5.164786 0 0 2203 +parent 6 5.164786 0 0 2204 +eduresearch 6 5.164786 0 0 2205 +increment 6 5.164786 0 0 2206 +fish 6 5.164786 0 0 2207 +greec 6 5.164786 0 0 2208 +usag 6 5.164786 0 0 2209 +vivek 6 5.164786 0 0 2210 +sleep 6 5.164786 0 0 2211 +sigact 6 5.164786 0 0 2212 +benefit 6 5.164786 0 0 2213 +quantum 6 5.164786 0 0 2214 +ongo 6 5.164786 0 0 2215 +beer 6 5.164786 0 0 2216 +fiction 6 5.164786 0 0 2217 +park 6 5.164786 0 0 2218 +aggreg 6 5.164786 0 0 2219 +river 6 5.164786 0 0 2220 +oopsla 6 5.164786 0 0 2221 +altavista 6 5.164786 0 0 2222 +squash 6 5.164786 0 0 2223 +bell 6 5.164786 0 0 2224 +pool 6 5.164786 0 0 2225 +unpublish 6 5.164786 0 0 2226 +blue 6 5.164786 0 0 2227 +loup 6 5.164786 0 0 2228 +chinook 6 5.164786 0 0 2229 +restaur 6 5.164786 0 0 2230 +duke 6 5.164786 0 0 2231 +divers 6 5.164786 0 0 2232 +commit 6 5.164786 0 0 2233 +hike 6 5.164786 0 0 2234 +recov 6 5.164786 0 0 2235 +geograph 6 5.164786 0 0 2236 +sohi 6 5.164786 0 0 2237 +microarchitectur 6 5.164786 0 0 2238 +pub 6 5.164786 0 0 2239 +lili 5 5.347108 0 0 2240 +hoca 5 5.347108 0 0 2241 +phrase 5 5.347108 0 0 2242 +ross 5 5.347108 0 0 2243 +tupl 5 5.347108 0 0 2244 +conot 5 5.347108 0 0 2245 +hardcopi 5 5.347108 0 0 2246 +substitut 5 5.347108 0 0 2247 +variat 5 5.347108 0 0 2248 +registr 5 5.347108 0 0 2249 +bind 5 5.347108 0 0 2250 +constant 5 5.347108 0 0 2251 +seriou 5 5.347108 0 0 2252 +clarif 5 5.347108 0 0 2253 +kaufmann 5 5.347108 0 0 2254 +solid 5 5.347108 0 0 2255 +valuabl 5 5.347108 0 0 2256 +thrive 5 5.347108 0 0 2257 +greater 5 5.347108 0 0 2258 +fraction 5 5.347108 0 0 2259 +interestedin 5 5.347108 0 0 2260 +categori 5 5.347108 0 0 2261 +mcgraw 5 5.347108 0 0 2262 +morrisett 5 5.347108 0 0 2263 +gentl 5 5.347108 0 0 2264 +ronitt 5 5.347108 0 0 2265 +feder 5 5.347108 0 0 2266 +eas 5 5.347108 0 0 2267 +suffer 5 5.347108 0 0 2268 +matur 5 5.347108 0 0 2269 +vertic 5 5.347108 0 0 2270 +focuss 5 5.347108 0 0 2271 +buch 5 5.347108 0 0 2272 +contest 5 5.347108 0 0 2273 +cell 5 5.347108 0 0 2274 +departmentcornel 5 5.347108 0 0 2275 +notabl 5 5.347108 0 0 2276 +willb 5 5.347108 0 0 2277 +remain 5 5.347108 0 0 2278 +correl 5 5.347108 0 0 2279 +markov 5 5.347108 0 0 2280 +snake 5 5.347108 0 0 2281 +corpu 5 5.347108 0 0 2282 +circumst 5 5.347108 0 0 2283 +karp 5 5.347108 0 0 2284 +fit 5 5.347108 0 0 2285 +stabil 5 5.347108 0 0 2286 +worst 5 5.347108 0 0 2287 +ignor 5 5.347108 0 0 2288 +hennessi 5 5.347108 0 0 2289 +computerarchitectur 5 5.347108 0 0 2290 +door 5 5.347108 0 0 2291 +mac 5 5.347108 0 0 2292 +porter 5 5.347108 0 0 2293 +lang 5 5.347108 0 0 2294 +rotat 5 5.347108 0 0 2295 +scope 5 5.347108 0 0 2296 +desk 5 5.347108 0 0 2297 +caus 5 5.347108 0 0 2298 +opengl 5 5.347108 0 0 2299 +fussel 5 5.347108 0 0 2300 +ousterhout 5 5.347108 0 0 2301 +recogn 5 5.347108 0 0 2302 +snapshot 5 5.347108 0 0 2303 +colleagu 5 5.347108 0 0 2304 +multicast 5 5.347108 0 0 2305 +authent 5 5.347108 0 0 2306 +volunt 5 5.347108 0 0 2307 +explicitli 5 5.347108 0 0 2308 +stabl 5 5.347108 0 0 2309 +exchang 5 5.347108 0 0 2310 +templat 5 5.347108 0 0 2311 +appt 5 5.347108 0 0 2312 +raymond 5 5.347108 0 0 2313 +allegro 5 5.347108 0 0 2314 +revolut 5 5.347108 0 0 2315 +ofdistribut 5 5.347108 0 0 2316 +despit 5 5.347108 0 0 2317 +unknown 5 5.347108 0 0 2318 +distinct 5 5.347108 0 0 2319 +corba 5 5.347108 0 0 2320 +pars 5 5.347108 0 0 2321 +fairli 5 5.347108 0 0 2322 +ofinterest 5 5.347108 0 0 2323 +blumoferdb 5 5.347108 0 0 2324 +theth 5 5.347108 0 0 2325 +oncomput 5 5.347108 0 0 2326 +joseph 5 5.347108 0 0 2327 +steer 5 5.347108 0 0 2328 +sigcomm 5 5.347108 0 0 2329 +row 5 5.347108 0 0 2330 +proceedingsof 5 5.347108 0 0 2331 +jain 5 5.347108 0 0 2332 +fair 5 5.347108 0 0 2333 +consum 5 5.347108 0 0 2334 +default 5 5.347108 0 0 2335 +pagethi 5 5.347108 0 0 2336 +button 5 5.347108 0 0 2337 +ahead 5 5.347108 0 0 2338 +proposit 5 5.347108 0 0 2339 +highlight 5 5.347108 0 0 2340 +foracadem 5 5.347108 0 0 2341 +newinform 5 5.347108 0 0 2342 +bulletin 5 5.347108 0 0 2343 +beam 5 5.347108 0 0 2344 +ruzzo 5 5.347108 0 0 2345 +diagram 5 5.347108 0 0 2346 +latexhtml 5 5.347108 0 0 2347 +scienceand 5 5.347108 0 0 2348 +borriello 5 5.347108 0 0 2349 +augment 5 5.347108 0 0 2350 +carlson 5 5.347108 0 0 2351 +writeup 5 5.347108 0 0 2352 +shell 5 5.347108 0 0 2353 +respond 5 5.347108 0 0 2354 +supplement 5 5.347108 0 0 2355 +attract 5 5.347108 0 0 2356 +shift 5 5.347108 0 0 2357 +lost 5 5.347108 0 0 2358 +snyder 5 5.347108 0 0 2359 +jeremi 5 5.347108 0 0 2360 +forprogram 5 5.347108 0 0 2361 +vortex 5 5.347108 0 0 2362 +travers 5 5.347108 0 0 2363 +understood 5 5.347108 0 0 2364 +situat 5 5.347108 0 0 2365 +amus 5 5.347108 0 0 2366 +rewrit 5 5.347108 0 0 2367 +overlap 5 5.347108 0 0 2368 +subjectto 5 5.347108 0 0 2369 +speaker 5 5.347108 0 0 2370 +barton 5 5.347108 0 0 2371 +middl 5 5.347108 0 0 2372 +philipos 5 5.347108 0 0 2373 +appreci 5 5.347108 0 0 2374 +andrea 5 5.347108 0 0 2375 +symp 5 5.347108 0 0 2376 +leblanc 5 5.347108 0 0 2377 +affin 5 5.347108 0 0 2378 +parallelprogram 5 5.347108 0 0 2379 +ofparallel 5 5.347108 0 0 2380 +culler 5 5.347108 0 0 2381 +icpp 5 5.347108 0 0 2382 +cyclic 5 5.347108 0 0 2383 +crucial 5 5.347108 0 0 2384 +tiwari 5 5.347108 0 0 2385 +begun 5 5.347108 0 0 2386 +older 5 5.347108 0 0 2387 +cacm 5 5.347108 0 0 2388 +consent 5 5.347108 0 0 2389 +dataflow 5 5.347108 0 0 2390 +madisoncomput 5 5.347108 0 0 2391 +gareth 5 5.347108 0 0 2392 +handin 5 5.347108 0 0 2393 +informationc 5 5.347108 0 0 2394 +pagecours 5 5.347108 0 0 2395 +savitchaddison 5 5.347108 0 0 2396 +formerli 5 5.347108 0 0 2397 +lampert 5 5.347108 0 0 2398 +lookup 5 5.347108 0 0 2399 +paint 5 5.347108 0 0 2400 +bodner 5 5.347108 0 0 2401 +skip 5 5.347108 0 0 2402 +overload 5 5.347108 0 0 2403 +billi 5 5.347108 0 0 2404 +chemistri 5 5.347108 0 0 2405 +sparcstat 5 5.347108 0 0 2406 +blow 5 5.347108 0 0 2407 +chin 5 5.347108 0 0 2408 +tang 5 5.347108 0 0 2409 +girl 5 5.347108 0 0 2410 +horwitz 5 5.347108 0 0 2411 +craft 5 5.347108 0 0 2412 +salt 5 5.347108 0 0 2413 +favor 5 5.347108 0 0 2414 +commod 5 5.347108 0 0 2415 +anda 5 5.347108 0 0 2416 +race 5 5.347108 0 0 2417 +mutual 5 5.347108 0 0 2418 +phil 5 5.347108 0 0 2419 +noland 5 5.347108 0 0 2420 +bryan 5 5.347108 0 0 2421 +adjust 5 5.347108 0 0 2422 +multiresolut 5 5.347108 0 0 2423 +shortest 5 5.347108 0 0 2424 +grand 5 5.347108 0 0 2425 +pivot 5 5.347108 0 0 2426 +ration 5 5.347108 0 0 2427 +clickher 5 5.347108 0 0 2428 +shavlik 5 5.347108 0 0 2429 +connectionist 5 5.347108 0 0 2430 +deshpand 5 5.347108 0 0 2431 +rigid 5 5.347108 0 0 2432 +cellular 5 5.347108 0 0 2433 +anti 5 5.347108 0 0 2434 +hyper 5 5.347108 0 0 2435 +particl 5 5.347108 1 0 2436 +oregon 5 5.347108 0 0 2437 +facial 5 5.347108 1 0 2438 +interior 5 5.347108 1 0 2439 +fluid 5 5.347108 0 0 2440 +selberg 5 5.347108 0 0 2441 +actuat 5 5.347108 0 0 2442 +isi 5 5.347108 0 0 2443 +elsewher 5 5.347108 0 0 2444 +knew 5 5.347108 0 0 2445 +hair 5 5.347108 0 0 2446 +ohio 5 5.347108 0 0 2447 +medicin 5 5.347108 0 0 2448 +eduph 5 5.347108 0 0 2449 +accuraci 5 5.347108 0 0 2450 +synthes 5 5.347108 0 0 2451 +suni 5 5.347108 0 0 2452 +broadcast 5 5.347108 0 0 2453 +activitieseditor 5 5.347108 0 0 2454 +decad 5 5.347108 0 0 2455 +testb 5 5.347108 0 0 2456 +chapel 5 5.347108 0 0 2457 +yield 5 5.347108 0 0 2458 +ifip 5 5.347108 0 0 2459 +houston 5 5.347108 0 0 2460 +argonn 5 5.347108 0 0 2461 +compet 5 5.347108 0 0 2462 +panel 5 5.347108 0 0 2463 +seventh 5 5.347108 0 0 2464 +mission 5 5.347108 0 0 2465 +merit 5 5.347108 0 0 2466 +adopt 5 5.347108 0 0 2467 +webster 5 5.347108 0 0 2468 +minnesota 5 5.347108 0 0 2469 +allen 5 5.347108 0 0 2470 +dougla 5 5.347108 0 0 2471 +east 5 5.347108 0 0 2472 +ashish 5 5.347108 0 0 2473 +hypothet 5 5.347108 0 0 2474 +guestbook 5 5.347108 0 0 2475 +truli 5 5.347108 0 0 2476 +aim 5 5.347108 0 0 2477 +australia 5 5.347108 0 0 2478 +frog 5 5.347108 0 0 2479 +minneapoli 5 5.347108 0 0 2480 +upper 5 5.347108 0 0 2481 +these 5 5.347108 0 0 2482 +poem 5 5.347108 0 0 2483 +amherst 5 5.347108 0 0 2484 +compat 5 5.347108 0 0 2485 +chess 5 5.347108 0 0 2486 +singapor 5 5.347108 0 0 2487 +stage 5 5.347108 0 0 2488 +stupid 5 5.347108 0 0 2489 +holland 5 5.347108 0 0 2490 +stoc 5 5.347108 0 0 2491 +feet 5 5.347108 0 0 2492 +babi 5 5.347108 0 0 2493 +interfer 5 5.347108 0 0 2494 +million 5 5.347108 0 0 2495 +elain 5 5.347108 0 0 2496 +plant 5 5.347108 0 0 2497 +began 5 5.347108 0 0 2498 +sing 5 5.347108 0 0 2499 +li 5 5.347108 0 0 2500 +optimist 5 5.347108 0 0 2501 +ucla 5 5.347108 0 0 2502 +czar 5 5.347108 0 0 2503 +educurr 5 5.347108 0 0 2504 +studentdepart 5 5.347108 0 0 2505 +unnecessari 5 5.347108 0 0 2506 +puzzl 5 5.347108 0 0 2507 +atlant 5 5.347108 0 0 2508 +advic 5 5.347108 0 0 2509 +semi 5 5.347108 0 0 2510 +almaden 5 5.347108 0 0 2511 +christoph 5 5.347108 0 0 2512 +departmentat 5 5.347108 0 0 2513 +outdoor 5 5.347108 0 0 2514 +carlo 5 5.347108 0 0 2515 +kid 5 5.347108 0 0 2516 +everybodi 5 5.347108 0 0 2517 +hole 5 5.347108 0 0 2518 +junior 5 5.347108 0 0 2519 +licens 5 5.347108 0 0 2520 +treat 5 5.347108 0 0 2521 +dual 5 5.347108 0 0 2522 +complementari 5 5.347108 0 0 2523 +educomput 5 5.347108 0 0 2524 +quantifi 5 5.347108 0 0 2525 +florida 5 5.347108 0 0 2526 +jazz 5 5.347108 0 0 2527 +keith 5 5.347108 0 0 2528 +hate 5 5.347108 0 0 2529 +delhi 5 5.347108 0 0 2530 +own 5 5.347108 0 0 2531 +isth 5 5.347108 0 0 2532 +humor 5 5.347108 0 0 2533 +ship 5 5.347108 0 0 2534 +water 5 5.347108 0 0 2535 +settimeout 5 5.347108 0 0 2536 +wast 5 5.347108 0 0 2537 +coral 5 5.347108 0 0 2538 +peke 5 5.347108 0 0 2539 +twenti 5 5.347108 0 0 2540 +curiou 5 5.347108 0 0 2541 +lifschitz 5 5.347108 0 0 2542 +mirank 5 5.347108 0 0 2543 +emeritu 5 5.347108 0 0 2544 +camp 5 5.347108 0 0 2545 +british 5 5.347108 0 0 2546 +emerson 5 5.347108 0 0 2547 +tempest 5 5.347108 0 0 2548 +groupth 5 5.347108 0 0 2549 +orlean 5 5.347108 0 0 2550 +regent 5 5.347108 0 0 2551 +chemic 5 5.347108 0 0 2552 +ioanni 5 5.347108 0 0 2553 +bradlei 5 5.347108 0 0 2554 +alert 5 5.347108 0 0 2555 +coffe 5 5.347108 0 0 2556 +england 5 5.347108 0 0 2557 +provabl 5 5.347108 0 0 2558 +ortega 5 5.347108 0 0 2559 +frisbe 5 5.347108 0 0 2560 +engineeringat 5 5.347108 0 0 2561 +toc 5 5.347108 0 0 2562 +alec 5 5.347108 0 0 2563 +ohlrich 5 5.347108 0 0 2564 +darren 5 5.347108 0 0 2565 +chaotic 5 5.347108 0 0 2566 +fetch 5 5.347108 0 0 2567 +lesson 5 5.347108 0 0 2568 +keeper 5 5.347108 0 0 2569 +spinproject 5 5.347108 0 0 2570 +sail 5 5.347108 0 0 2571 +annex 5 5.347108 0 0 2572 +gui 5 5.347108 0 0 2573 +patent 5 5.347108 0 0 2574 +tuft 5 5.347108 0 0 2575 +nuclear 5 5.347108 0 0 2576 +condor 5 5.347108 0 0 2577 +guri 5 5.347108 0 0 2578 +girlfriend 5 5.347108 0 0 2579 +summarymi 5 5.347108 0 0 2580 +tsatalo 5 5.347108 0 0 2581 +lebeck 5 5.347108 0 0 2582 +reinhardt 5 5.347108 0 0 2583 +babak 5 5.347108 0 0 2584 +falsafi 5 5.347108 0 0 2585 +mukherje 5 5.347108 0 0 2586 +filesystem 4 5.568345 0 0 2587 +lorenzo 4 5.568345 0 0 2588 +marco 4 5.568345 0 0 2589 +clearli 4 5.568345 0 0 2590 +mentor 4 5.568345 0 0 2591 +sole 4 5.568345 0 0 2592 +addition 4 5.568345 0 0 2593 +password 4 5.568345 0 0 2594 +toth 4 5.568345 0 0 2595 +infinit 4 5.568345 0 0 2596 +wherea 4 5.568345 0 0 2597 +exposur 4 5.568345 0 0 2598 +midnight 4 5.568345 0 0 2599 +amaz 4 5.568345 0 0 2600 +thiscours 4 5.568345 0 0 2601 +catch 4 5.568345 0 0 2602 +illus 4 5.568345 0 0 2603 +microprogram 4 5.568345 0 0 2604 +clair 4 5.568345 0 0 2605 +shouldn 4 5.568345 0 0 2606 +unless 4 5.568345 0 0 2607 +minibas 4 5.568345 0 0 2608 +surprisingli 4 5.568345 0 0 2609 +behind 4 5.568345 0 0 2610 +suppli 4 5.568345 0 0 2611 +asystem 4 5.568345 0 0 2612 +enrol 4 5.568345 0 0 2613 +twice 4 5.568345 0 0 2614 +fold 4 5.568345 0 0 2615 +thati 4 5.568345 0 0 2616 +witha 4 5.568345 0 0 2617 +haskel 4 5.568345 0 0 2618 +kozen 4 5.568345 0 0 2619 +cut 4 5.568345 0 0 2620 +shelf 4 5.568345 0 0 2621 +slice 4 5.568345 0 0 2622 +pierc 4 5.568345 0 0 2623 +ofprogram 4 5.568345 0 0 2624 +flavor 4 5.568345 0 0 2625 +cuinfo 4 5.568345 0 0 2626 +polymorph 4 5.568345 0 0 2627 +gotten 4 5.568345 0 0 2628 +chose 4 5.568345 1 0 2629 +cuc 4 5.568345 0 0 2630 +scribe 4 5.568345 0 0 2631 +maximum 4 5.568345 0 0 2632 +hausdorff 4 5.568345 0 0 2633 +union 4 5.568345 0 0 2634 +push 4 5.568345 0 0 2635 +ford 4 5.568345 0 0 2636 +niko 4 5.568345 0 0 2637 +backward 4 5.568345 0 0 2638 +indupraka 4 5.568345 0 0 2639 +kodukula 4 5.568345 0 0 2640 +deadlock 4 5.568345 0 0 2641 +permiss 4 5.568345 0 0 2642 +usedto 4 5.568345 0 0 2643 +abraham 4 5.568345 0 0 2644 +backup 4 5.568345 0 0 2645 +rick 4 5.568345 0 0 2646 +reveal 4 5.568345 0 0 2647 +queu 4 5.568345 0 0 2648 +buss 4 5.568345 0 0 2649 +hypothesi 4 5.568345 0 0 2650 +bear 4 5.568345 0 0 2651 +glanc 4 5.568345 0 0 2652 +chart 4 5.568345 0 0 2653 +turnin 4 5.568345 0 0 2654 +welch 4 5.568345 0 0 2655 +coverag 4 5.568345 0 0 2656 +glad 4 5.568345 0 0 2657 +arora 4 5.568345 0 0 2658 +somewhat 4 5.568345 0 0 2659 +webpag 4 5.568345 0 0 2660 +hasbeen 4 5.568345 0 0 2661 +makefil 4 5.568345 0 0 2662 +basement 4 5.568345 0 0 2663 +disconnect 4 5.568345 0 0 2664 +subsequ 4 5.568345 0 0 2665 +accompani 4 5.568345 0 0 2666 +jacob 4 5.568345 0 0 2667 +gokul 4 5.568345 0 0 2668 +wilei 4 5.568345 0 0 2669 +sowmya 4 5.568345 0 0 2670 +bayesian 4 5.568345 0 0 2671 +emphas 4 5.568345 0 0 2672 +intelligencec 4 5.568345 0 0 2673 +reinforc 4 5.568345 0 0 2674 +resolv 4 5.568345 0 0 2675 +asymptot 4 5.568345 0 0 2676 +vijaya 4 5.568345 0 0 2677 +expens 4 5.568345 0 0 2678 +ofworkst 4 5.568345 0 0 2679 +algorithmsand 4 5.568345 0 0 2680 +havedevelop 4 5.568345 0 0 2681 +floyd 4 5.568345 0 0 2682 +systemsfal 4 5.568345 0 0 2683 +firm 4 5.568345 0 0 2684 +thecours 4 5.568345 0 0 2685 +theprogram 4 5.568345 0 0 2686 +dale 4 5.568345 0 0 2687 +wewil 4 5.568345 0 0 2688 +repli 4 5.568345 0 0 2689 +batori 4 5.568345 0 0 2690 +delet 4 5.568345 0 0 2691 +coursesc 4 5.568345 0 0 2692 +wireless 4 5.568345 0 0 2693 +lazowska 4 5.568345 0 0 2694 +breadth 4 5.568345 0 0 2695 +implic 4 5.568345 0 0 2696 +fora 4 5.568345 0 0 2697 +theacm 4 5.568345 0 0 2698 +zhao 4 5.568345 0 0 2699 +batch 4 5.568345 0 0 2700 +multimediasystem 4 5.568345 0 0 2701 +venkat 4 5.568345 0 0 2702 +forparallel 4 5.568345 0 0 2703 +admiss 4 5.568345 0 0 2704 +clark 4 5.568345 0 0 2705 +andevalu 4 5.568345 0 0 2706 +ofinform 4 5.568345 0 0 2707 +bach 4 5.568345 0 0 2708 +disjoint 4 5.568345 0 0 2709 +patch 4 5.568345 0 0 2710 +devot 4 5.568345 0 0 2711 +chain 4 5.568345 0 0 2712 +edufing 4 5.568345 0 0 2713 +whichcontain 4 5.568345 0 0 2714 +bounti 4 5.568345 0 0 2715 +insensit 4 5.568345 0 0 2716 +employe 4 5.568345 0 0 2717 +corei 4 5.568345 0 0 2718 +contemporari 4 5.568345 0 0 2719 +aboutth 4 5.568345 0 0 2720 +assignmentshomework 4 5.568345 0 0 2721 +assur 4 5.568345 0 0 2722 +isthat 4 5.568345 0 0 2723 +assess 4 5.568345 0 0 2724 +freeman 4 5.568345 0 0 2725 +websit 4 5.568345 0 0 2726 +csoffic 4 5.568345 0 0 2727 +knight 4 5.568345 0 0 2728 +isprefer 4 5.568345 0 0 2729 +isfast 4 5.568345 0 0 2730 +ghostscriptcan 4 5.568345 0 0 2731 +choi 4 5.568345 0 0 2732 +vital 4 5.568345 0 0 2733 +surviv 4 5.568345 0 0 2734 +pighin 4 5.568345 0 0 2735 +arun 4 5.568345 1 0 2736 +murphi 4 5.568345 0 0 2737 +mip 4 5.568345 0 0 2738 +appendix 4 5.568345 0 0 2739 +prog 4 5.568345 0 0 2740 +net 4 5.568345 0 0 2741 +peer 4 5.568345 0 0 2742 +screenshot 4 5.568345 0 0 2743 +kent 4 5.568345 0 0 2744 +comprehens 4 5.568345 0 0 2745 +cancel 4 5.568345 0 0 2746 +episod 4 5.568345 0 0 2747 +algorithmsfor 4 5.568345 0 0 2748 +ullman 4 5.568345 0 0 2749 +manuscript 4 5.568345 0 0 2750 +cheap 4 5.568345 0 0 2751 +sharma 4 5.568345 0 0 2752 +andsoftwar 4 5.568345 0 0 2753 +tester 4 5.568345 0 0 2754 +etch 4 5.568345 0 0 2755 +ebel 4 5.568345 0 0 2756 +mcmurchi 4 5.568345 0 0 2757 +uncertain 4 5.568345 0 0 2758 +cshrc 4 5.568345 0 0 2759 +assignmentsand 4 5.568345 0 0 2760 +cvpr 4 5.568345 0 0 2761 +kept 4 5.568345 0 0 2762 +tennesse 4 5.568345 0 0 2763 +invers 4 5.568345 0 0 2764 +sandi 4 5.568345 0 0 2765 +increasingli 4 5.568345 0 0 2766 +ncsa 4 5.568345 0 0 2767 +evid 4 5.568345 0 0 2768 +bottleneck 4 5.568345 0 0 2769 +anoop 4 5.568345 0 0 2770 +interprocedur 4 5.568345 0 0 2771 +hyder 4 5.568345 0 0 2772 +identif 4 5.568345 0 0 2773 +ppopp 4 5.568345 0 0 2774 +restructur 4 5.568345 0 0 2775 +randal 4 5.568345 0 0 2776 +savag 4 5.568345 0 0 2777 +glimps 4 5.568345 0 0 2778 +ics 4 5.568345 0 0 2779 +semesterli 4 5.568345 0 0 2780 +poorli 4 5.568345 0 0 2781 +will 4 5.568345 0 0 2782 +writer 4 5.568345 0 0 2783 +relief 4 5.568345 0 0 2784 +wear 4 5.568345 0 0 2785 +trivial 4 5.568345 0 0 2786 +labc 4 5.568345 0 0 2787 +policygrad 4 5.568345 0 0 2788 +erratalast 4 5.568345 0 0 2789 +suno 4 5.568345 0 0 2790 +birk 4 5.568345 0 0 2791 +anthoni 4 5.568345 0 0 2792 +kelli 4 5.568345 0 0 2793 +nathan 4 5.568345 0 0 2794 +complaint 4 5.568345 0 0 2795 +karen 4 5.568345 0 0 2796 +outdat 4 5.568345 0 0 2797 +repeat 4 5.568345 0 0 2798 +tire 4 5.568345 0 0 2799 +birthdai 4 5.568345 0 0 2800 +fork 4 5.568345 0 0 2801 +popular 4 5.568345 0 0 2802 +multitask 4 5.568345 0 0 2803 +systemsand 4 5.568345 0 0 2804 +withth 4 5.568345 0 0 2805 +marvin 4 5.568345 0 0 2806 +convex 4 5.568345 0 0 2807 +concav 4 5.568345 0 0 2808 +yong 4 5.568345 0 0 2809 +areavail 4 5.568345 0 0 2810 +andp 4 5.568345 0 0 2811 +contour 4 5.568345 0 0 2812 +sold 4 5.568345 0 0 2813 +shah 4 5.568345 0 0 2814 +novic 4 5.568345 0 0 2815 +thumb 4 5.568345 0 0 2816 +graham 4 5.568345 0 0 2817 +steel 4 5.568345 0 0 2818 +markhil 4 5.568345 0 0 2819 +talluri 4 5.568345 0 0 2820 +struct 4 5.568345 0 0 2821 +crazi 4 5.568345 0 0 2822 +oracl 4 5.568345 0 0 2823 +sabbat 4 5.568345 0 0 2824 +exhaust 4 5.568345 0 0 2825 +ident 4 5.568345 0 0 2826 +raman 4 5.568345 0 0 2827 +closest 4 5.568345 0 0 2828 +turnidg 4 5.568345 0 0 2829 +implicit 4 5.568345 0 0 2830 +pixel 4 5.568345 0 0 2831 +orthogon 4 5.568345 0 0 2832 +satish 4 5.568345 0 0 2833 +medium 4 5.568345 0 0 2834 +transmit 4 5.568345 0 0 2835 +lawyer 4 5.568345 0 0 2836 +lumelski 4 5.568345 0 0 2837 +underwat 4 5.568345 0 0 2838 +redund 4 5.568345 0 0 2839 +skin 4 5.568345 0 0 2840 +neil 4 5.568345 0 0 2841 +myresearch 4 5.568345 0 0 2842 +basu 4 5.568345 0 0 2843 +hayden 4 5.568345 0 0 2844 +hickei 4 5.568345 0 0 2845 +vaysburd 4 5.568345 0 0 2846 +disclaim 4 5.568345 0 0 2847 +green 4 5.568345 0 0 2848 +allan 4 5.568345 0 0 2849 +rapidli 4 5.568345 0 0 2850 +machineri 4 5.568345 0 0 2851 +termin 4 5.568345 0 0 2852 +ireland 4 5.568345 0 0 2853 +weyl 4 5.568345 0 0 2854 +dexter 4 5.568345 0 0 2855 +technion 4 5.568345 0 0 2856 +substrat 4 5.568345 0 0 2857 +weizmann 4 5.568345 0 0 2858 +publicationsth 4 5.568345 0 0 2859 +monograph 4 5.568345 0 0 2860 +fals 4 5.568345 0 0 2861 +compris 4 5.568345 0 0 2862 +align 4 5.568345 0 0 2863 +stodghil 4 5.568345 0 0 2864 +sigcs 4 5.568345 0 0 2865 +stoller 4 5.568345 0 0 2866 +nashvil 4 5.568345 0 0 2867 +schneider 4 5.568345 0 0 2868 +suffic 4 5.568345 0 0 2869 +conserv 4 5.568345 0 0 2870 +dagstuhl 4 5.568345 0 0 2871 +aircraft 4 5.568345 0 0 2872 +newslett 4 5.568345 0 0 2873 +wavelet 4 5.568345 0 0 2874 +analog 4 5.568345 0 0 2875 +publicationsresearch 4 5.568345 0 0 2876 +commonli 4 5.568345 0 0 2877 +swartz 4 5.568345 0 0 2878 +zippel 4 5.568345 0 0 2879 +notr 4 5.568345 0 0 2880 +dame 4 5.568345 0 0 2881 +alfr 4 5.568345 0 0 2882 +sinanet 4 5.568345 0 0 2883 +engg 4 5.568345 0 0 2884 +hell 4 5.568345 0 0 2885 +mess 4 5.568345 0 0 2886 +swing 4 5.568345 0 0 2887 +album 4 5.568345 0 0 2888 +ultra 4 5.568345 0 0 2889 +height 4 5.568345 0 0 2890 +gear 4 5.568345 0 0 2891 +thecornel 4 5.568345 0 0 2892 +hobb 4 5.568345 0 0 2893 +hallithaca 4 5.568345 0 0 2894 +heard 4 5.568345 0 0 2895 +wander 4 5.568345 0 0 2896 +chase 4 5.568345 0 0 2897 +decoupl 4 5.568345 0 0 2898 +disambigu 4 5.568345 0 0 2899 +hallphon 4 5.568345 0 0 2900 +ijcai 4 5.568345 0 0 2901 +complic 4 5.568345 0 0 2902 +triangul 4 5.568345 0 0 2903 +engineeringc 4 5.568345 0 0 2904 +biomed 4 5.568345 0 0 2905 +lord 4 5.568345 0 0 2906 +soul 4 5.568345 0 0 2907 +symmetr 4 5.568345 0 0 2908 +cyber 4 5.568345 0 0 2909 +dark 4 5.568345 0 0 2910 +fear 4 5.568345 0 0 2911 +planet 4 5.568345 0 0 2912 +bloom 4 5.568345 0 0 2913 +turkei 4 5.568345 0 0 2914 +combinator 4 5.568345 0 0 2915 +snail 4 5.568345 0 0 2916 +engineeringdepart 4 5.568345 0 0 2917 +proud 4 5.568345 0 0 2918 +queen 4 5.568345 0 0 2919 +naval 4 5.568345 0 0 2920 +substanti 4 5.568345 0 0 2921 +ping 4 5.568345 0 0 2922 +grzegorz 4 5.568345 0 0 2923 +czajkowski 4 5.568345 0 0 2924 +shanghai 4 5.568345 0 0 2925 +taipei 4 5.568345 0 0 2926 +spam 4 5.568345 0 0 2927 +gregori 4 5.568345 0 0 2928 +encod 4 5.568345 0 0 2929 +dalla 4 5.568345 0 0 2930 +car 4 5.568345 0 0 2931 +fulfil 4 5.568345 0 0 2932 +innov 4 5.568345 0 0 2933 +japanes 4 5.568345 0 0 2934 +sell 4 5.568345 0 0 2935 +climb 4 5.568345 0 0 2936 +enjoy 4 5.568345 0 0 2937 +colorado 4 5.568345 0 0 2938 +insur 4 5.568345 0 0 2939 +cube 4 5.568345 0 0 2940 +conform 4 5.568345 0 0 2941 +ratio 4 5.568345 0 0 2942 +superhighwai 4 5.568345 0 0 2943 +darpa 4 5.568345 0 0 2944 +metadata 4 5.568345 0 0 2945 +breath 4 5.568345 0 0 2946 +exclus 4 5.568345 0 0 2947 +essai 4 5.568345 0 0 2948 +vrml 4 5.568345 0 0 2949 +highest 4 5.568345 0 0 2950 +doubl 4 5.568345 0 0 2951 +festiv 4 5.568345 0 0 2952 +vote 4 5.568345 0 0 2953 +maria 4 5.568345 0 0 2954 +bernoulli 4 5.568345 0 0 2955 +pingali 4 5.568345 0 0 2956 +richter 4 5.568345 0 0 2957 +compcon 4 5.568345 0 0 2958 +tape 4 5.568345 0 0 2959 +vijai 4 5.568345 0 0 2960 +zoom 4 5.568345 0 0 2961 +phenomena 4 5.568345 0 0 2962 +fulbright 4 5.568345 0 0 2963 +inlin 4 5.568345 0 0 2964 +paus 4 5.568345 0 0 2965 +customiz 4 5.568345 0 0 2966 +trick 4 5.568345 0 0 2967 +bean 4 5.568345 0 0 2968 +basebal 4 5.568345 0 0 2969 +keyboard 4 5.568345 0 0 2970 +korea 4 5.568345 0 0 2971 +amit 4 5.568345 0 0 2972 +nist 4 5.568345 0 0 2973 +somehow 4 5.568345 0 0 2974 +tick 4 5.568345 0 0 2975 +hire 4 5.568345 0 0 2976 +couldn 4 5.568345 0 0 2977 +pagescott 4 5.568345 0 0 2978 +mehom 4 5.568345 0 0 2979 +drew 4 5.568345 0 0 2980 +theintern 4 5.568345 0 0 2981 +champion 4 5.568345 0 0 2982 +bldg 4 5.568345 0 0 2983 +seed 4 5.568345 0 0 2984 +timertwo 4 5.568345 0 0 2985 +fulli 4 5.568345 0 0 2986 +wart 4 5.568345 0 0 2987 +jolla 4 5.568345 0 0 2988 +petersburg 4 5.568345 0 0 2989 +kestrel 4 5.568345 0 0 2990 +moon 4 5.568345 0 0 2991 +theus 4 5.568345 0 0 2992 +throughput 4 5.568345 0 0 2993 +visibl 4 5.568345 0 0 2994 +arch 4 5.568345 0 0 2995 +hide 4 5.568345 0 0 2996 +gradual 4 5.568345 0 0 2997 +commonsens 4 5.568345 0 0 2998 +bledso 4 5.568345 0 0 2999 +kill 4 5.568345 0 0 3000 +fire 4 5.568345 0 0 3001 +suppos 4 5.568345 0 0 3002 +oftexa 4 5.568345 0 0 3003 +werth 4 5.568345 0 0 3004 +ti 4 5.568345 0 0 3005 +jayadev 4 5.568345 0 0 3006 +blvd 4 5.568345 0 0 3007 +austindepart 4 5.568345 0 0 3008 +provinc 4 5.568345 0 0 3009 +worki 4 5.568345 0 0 3010 +church 4 5.568345 0 0 3011 +crai 4 5.568345 0 0 3012 +prospect 4 5.568345 0 0 3013 +toolset 4 5.568345 0 0 3014 +sytem 4 5.568345 0 0 3015 +escap 4 5.568345 0 0 3016 +spanish 4 5.568345 0 0 3017 +avion 4 5.568345 0 0 3018 +aspir 4 5.568345 0 0 3019 +countless 4 5.568345 0 0 3020 +gouda 4 5.568345 0 0 3021 +lanc 4 5.568345 0 0 3022 +nonmonoton 4 5.568345 0 0 3023 +insight 4 5.568345 0 0 3024 +radio 4 5.568345 0 0 3025 +presentarea 4 5.568345 0 0 3026 +diagnosi 4 5.568345 0 0 3027 +invent 4 5.568345 0 0 3028 +andimplement 4 5.568345 0 0 3029 +rigor 4 5.568345 0 0 3030 +anddistribut 4 5.568345 0 0 3031 +republ 4 5.568345 0 0 3032 +chou 4 5.568345 0 0 3033 +harri 4 5.568345 0 0 3034 +melbourn 4 5.568345 0 0 3035 +sigsoft 4 5.568345 0 0 3036 +haven 4 5.568345 0 0 3037 +silli 4 5.568345 0 0 3038 +alta 4 5.568345 0 0 3039 +leagu 4 5.568345 0 0 3040 +tend 4 5.568345 0 0 3041 +andengin 4 5.568345 0 0 3042 +pardyak 4 5.568345 0 0 3043 +washingtonseattl 4 5.568345 0 0 3044 +ling 4 5.568345 0 0 3045 +skate 4 5.568345 0 0 3046 +microkernel 4 5.568345 0 0 3047 +rocki 4 5.568345 0 0 3048 +scotland 4 5.568345 0 0 3049 +bricker 4 5.568345 0 0 3050 +salesin 4 5.568345 0 0 3051 +racquetbal 4 5.568345 0 0 3052 +afraid 4 5.568345 0 0 3053 +pagebrian 4 5.568345 0 0 3054 +fantasi 4 5.568345 0 0 3055 +eduupd 4 5.568345 0 0 3056 +opal 4 5.568345 0 0 3057 +ta 4 5.568345 0 0 3058 +sorri 4 5.568345 0 0 3059 +raft 4 5.568345 0 0 3060 +triptych 4 5.568345 0 0 3061 +western 4 5.568345 0 0 3062 +chaoticrout 4 5.568345 0 0 3063 +coauthor 4 5.568345 0 0 3064 +narasayya 4 5.568345 0 0 3065 +simplifi 4 5.568345 0 0 3066 +imper 4 5.568345 0 0 3067 +popl 4 5.568345 0 0 3068 +fountain 4 5.568345 0 0 3069 +observatori 4 5.568345 0 0 3070 +languagesand 4 5.568345 0 0 3071 +gone 4 5.568345 0 0 3072 +exploratori 4 5.568345 0 0 3073 +eventu 4 5.568345 0 0 3074 +exodu 4 5.568345 0 0 3075 +zwill 4 5.568345 0 0 3076 +satellit 4 5.568345 0 0 3077 +whereabout 4 5.568345 0 0 3078 +metal 4 5.568345 0 0 3079 +usa 4 5.568345 0 0 3080 +andi 4 5.568345 0 0 3081 +chile 4 5.568345 0 0 3082 +mcauliff 4 5.568345 0 0 3083 +alvin 4 5.568345 0 0 3084 +schoina 4 5.568345 0 0 3085 +galileo 4 5.568345 0 0 3086 +rep 4 5.568345 0 0 3087 +ramasami 4 5.568345 0 0 3088 +kristin 4 5.568345 0 0 3089 +chees 4 5.568345 0 0 3090 +multidimension 4 5.568345 0 0 3091 +groupcours 3 5.857933 0 0 3092 +ychuang 3 5.857933 0 0 3093 +penn 3 5.857933 0 0 3094 +alvisi 3 5.857933 0 0 3095 +entiti 3 5.857933 0 0 3096 +roughli 3 5.857933 0 0 3097 +singhal 3 5.857933 0 0 3098 +sendmail 3 5.857933 0 0 3099 +pfile 3 5.857933 0 0 3100 +sumedh 3 5.857933 0 0 3101 +theywil 3 5.857933 0 0 3102 +contentspag 3 5.857933 0 0 3103 +saluja 3 5.857933 0 0 3104 +duedat 3 5.857933 0 0 3105 +wiscinfo 3 5.857933 0 0 3106 +preced 3 5.857933 0 0 3107 +szewczyk 3 5.857933 0 0 3108 +voskuhl 3 5.857933 0 0 3109 +useth 3 5.857933 0 0 3110 +programsand 3 5.857933 0 0 3111 +toolbox 3 5.857933 0 0 3112 +programmingtechniqu 3 5.857933 0 0 3113 +kimbal 3 5.857933 0 0 3114 +andon 3 5.857933 0 0 3115 +requirementsstud 3 5.857933 0 0 3116 +immedi 3 5.857933 0 0 3117 +jointli 3 5.857933 0 0 3118 +doubt 3 5.857933 0 0 3119 +argument 3 5.857933 0 0 3120 +quotat 3 5.857933 0 0 3121 +inherit 3 5.857933 0 0 3122 +heap 3 5.857933 0 0 3123 +exit 3 5.857933 0 0 3124 +eickenfal 3 5.857933 0 0 3125 +helpif 3 5.857933 0 0 3126 +mate 3 5.857933 0 0 3127 +encount 3 5.857933 0 0 3128 +meanwhil 3 5.857933 0 0 3129 +worri 3 5.857933 0 0 3130 +add 3 5.857933 0 0 3131 +thec 3 5.857933 0 0 3132 +pagesc 3 5.857933 0 0 3133 +incorrect 3 5.857933 0 0 3134 +predat 3 5.857933 0 0 3135 +comfort 3 5.857933 0 0 3136 +giant 3 5.857933 0 0 3137 +explos 3 5.857933 0 0 3138 +alon 3 5.857933 0 0 3139 +scratch 3 5.857933 0 0 3140 +parser 3 5.857933 0 0 3141 +aproject 3 5.857933 0 0 3142 +bibl 3 5.857933 0 0 3143 +confus 3 5.857933 0 0 3144 +principlesof 3 5.857933 0 0 3145 +deeper 3 5.857933 0 0 3146 +denot 3 5.857933 0 0 3147 +systemscomput 3 5.857933 0 0 3148 +janosi 3 5.857933 0 0 3149 +addendum 3 5.857933 0 0 3150 +moran 3 5.857933 0 0 3151 +rajeev 3 5.857933 0 0 3152 +leverag 3 5.857933 0 0 3153 +cardiff 3 5.857933 0 0 3154 +samuel 3 5.857933 0 0 3155 +weber 3 5.857933 0 0 3156 +linker 3 5.857933 0 0 3157 +theworld 3 5.857933 0 0 3158 +foster 3 5.857933 0 0 3159 +tripl 3 5.857933 0 0 3160 +walker 3 5.857933 0 0 3161 +tocomput 3 5.857933 0 0 3162 +ghostview 3 5.857933 0 0 3163 +maker 3 5.857933 0 0 3164 +grader 3 5.857933 0 0 3165 +administrivia 3 5.857933 0 0 3166 +informationcours 3 5.857933 0 0 3167 +attack 3 5.857933 0 0 3168 +memberof 3 5.857933 0 0 3169 +off 3 5.857933 0 0 3170 +cont 3 5.857933 0 0 3171 +likelihood 3 5.857933 0 0 3172 +dijkstra 3 5.857933 0 0 3173 +euler 3 5.857933 0 0 3174 +pitsiani 3 5.857933 0 0 3175 +rack 3 5.857933 0 0 3176 +uncompress 3 5.857933 0 0 3177 +prereq 3 5.857933 0 0 3178 +theimpact 3 5.857933 0 0 3179 +audienc 3 5.857933 0 0 3180 +serverless 3 5.857933 0 0 3181 +todetermin 3 5.857933 0 0 3182 +csc 3 5.857933 0 0 3183 +neal 3 5.857933 0 0 3184 +ravi 3 5.857933 0 0 3185 +constabl 3 5.857933 0 0 3186 +vicki 3 5.857933 0 0 3187 +lego 3 5.857933 0 0 3188 +oral 3 5.857933 0 0 3189 +sawada 3 5.857933 0 0 3190 +hazard 3 5.857933 0 0 3191 +evaluationof 3 5.857933 0 0 3192 +tertiari 3 5.857933 0 0 3193 +mpp 3 5.857933 0 0 3194 +labor 3 5.857933 0 0 3195 +obsolet 3 5.857933 0 0 3196 +dwip 3 5.857933 0 0 3197 +ansi 3 5.857933 0 0 3198 +forthes 3 5.857933 0 0 3199 +moreov 3 5.857933 0 0 3200 +luck 3 5.857933 0 0 3201 +boolean 3 5.857933 0 0 3202 +experienc 3 5.857933 0 0 3203 +xlib 3 5.857933 0 0 3204 +zuckerman 3 5.857933 0 0 3205 +frequenc 3 5.857933 0 0 3206 +agreement 3 5.857933 0 0 3207 +pertain 3 5.857933 0 0 3208 +commerc 3 5.857933 0 0 3209 +credibl 3 5.857933 0 0 3210 +violat 3 5.857933 0 0 3211 +urg 3 5.857933 0 0 3212 +agener 3 5.857933 0 0 3213 +conceptu 3 5.857933 0 0 3214 +kornerup 3 5.857933 0 0 3215 +faq 3 5.857933 0 0 3216 +lavend 3 5.857933 0 0 3217 +cline 3 5.857933 0 0 3218 +gamma 3 5.857933 0 0 3219 +hotjava 3 5.857933 0 0 3220 +javascript 3 5.857933 0 0 3221 +polytechn 3 5.857933 0 0 3222 +jar 3 5.857933 0 0 3223 +informationclick 3 5.857933 0 0 3224 +networkfor 3 5.857933 0 0 3225 +dialect 3 5.857933 0 0 3226 +gambit 3 5.857933 0 0 3227 +macintoshcomput 3 5.857933 0 0 3228 +treasur 3 5.857933 0 0 3229 +gradingmidterm 3 5.857933 0 0 3230 +guidefin 3 5.857933 0 0 3231 +programmingc 3 5.857933 0 0 3232 +ordinari 3 5.857933 0 0 3233 +lightweight 3 5.857933 0 0 3234 +popul 3 5.857933 0 0 3235 +embodi 3 5.857933 0 0 3236 +blumof 3 5.857933 0 0 3237 +citizen 3 5.857933 0 0 3238 +dramat 3 5.857933 0 0 3239 +aggress 3 5.857933 0 0 3240 +similarli 3 5.857933 0 0 3241 +neighborhood 3 5.857933 0 0 3242 +intra 3 5.857933 0 0 3243 +enumer 3 5.857933 0 0 3244 +compliant 3 5.857933 0 0 3245 +andsemant 3 5.857933 0 0 3246 +surpass 3 5.857933 0 0 3247 +rivest 3 5.857933 0 0 3248 +parallelalgorithm 3 5.857933 0 0 3249 +rscheme 3 5.857933 0 0 3250 +lauren 3 5.857933 0 0 3251 +nichola 3 5.857933 0 0 3252 +interv 3 5.857933 0 0 3253 +thepap 3 5.857933 0 0 3254 +guadalup 3 5.857933 0 0 3255 +andyou 3 5.857933 0 0 3256 +gripe 3 5.857933 0 0 3257 +tong 3 5.857933 0 0 3258 +cheriton 3 5.857933 0 0 3259 +synopsi 3 5.857933 0 0 3260 +formobil 3 5.857933 0 0 3261 +theperform 3 5.857933 0 0 3262 +berlin 3 5.857933 0 0 3263 +terri 3 5.857933 0 0 3264 +golub 3 5.857933 0 0 3265 +tokuda 3 5.857933 0 0 3266 +kistler 3 5.857933 0 0 3267 +goyal 3 5.857933 0 0 3268 +shenoi 3 5.857933 0 0 3269 +rangan 3 5.857933 0 0 3270 +anaheim 3 5.857933 0 0 3271 +campbel 3 5.857933 0 0 3272 +mccann 3 5.857933 0 0 3273 +multimediaappl 3 5.857933 0 0 3274 +ftc 3 5.857933 0 0 3275 +katz 3 5.857933 0 0 3276 +reddi 3 5.857933 0 0 3277 +deliveri 3 5.857933 0 0 3278 +durham 3 5.857933 0 0 3279 +hampshir 3 5.857933 0 0 3280 +chow 3 5.857933 0 0 3281 +london 3 5.857933 0 0 3282 +infocom 3 5.857933 0 0 3283 +weihl 3 5.857933 0 0 3284 +networkprotocol 3 5.857933 0 0 3285 +aswel 3 5.857933 0 0 3286 +franci 3 5.857933 0 0 3287 +axiomat 3 5.857933 0 0 3288 +how 3 5.857933 0 0 3289 +nguyen 3 5.857933 0 0 3290 +nearbi 3 5.857933 0 0 3291 +ofoper 3 5.857933 0 0 3292 +proport 3 5.857933 0 0 3293 +qbic 3 5.857933 0 0 3294 +qing 3 5.857933 0 0 3295 +outof 3 5.857933 0 0 3296 +ters 3 5.857933 0 0 3297 +theoremprov 3 5.857933 0 0 3298 +ajit 3 5.857933 0 0 3299 +feng 3 5.857933 0 0 3300 +warren 3 5.857933 0 0 3301 +edudepart 3 5.857933 0 0 3302 +mathematica 3 5.857933 0 0 3303 +quotedand 3 5.857933 0 0 3304 +tompa 3 5.857933 0 0 3305 +preview 3 5.857933 0 0 3306 +moreinform 3 5.857933 0 0 3307 +rambl 3 5.857933 0 0 3308 +condon 3 5.857933 0 0 3309 +tompaclass 3 5.857933 0 0 3310 +corin 3 5.857933 0 0 3311 +aweekli 3 5.857933 0 0 3312 +punctual 3 5.857933 0 0 3313 +holden 3 5.857933 0 0 3314 +alistair 3 5.857933 0 0 3315 +urgent 3 5.857933 0 0 3316 +duti 3 5.857933 0 0 3317 +boe 3 5.857933 0 0 3318 +specialist 3 5.857933 0 0 3319 +leadership 3 5.857933 0 0 3320 +expertis 3 5.857933 0 0 3321 +listof 3 5.857933 0 0 3322 +proper 3 5.857933 0 0 3323 +interview 3 5.857933 0 0 3324 +mailinglist 3 5.857933 0 0 3325 +militari 3 5.857933 0 0 3326 +defens 3 5.857933 0 0 3327 +andit 3 5.857933 0 0 3328 +orpostscript 3 5.857933 0 0 3329 +beginn 3 5.857933 0 0 3330 +insieg 3 5.857933 0 0 3331 +redston 3 5.857933 0 0 3332 +joshua 3 5.857933 0 0 3333 +patrick 3 5.857933 0 0 3334 +andwil 3 5.857933 0 0 3335 +thisdocu 3 5.857933 0 0 3336 +accommod 3 5.857933 0 0 3337 +wchan 3 5.857933 0 0 3338 +semiconductor 3 5.857933 0 0 3339 +semiconduct 3 5.857933 0 0 3340 +micron 3 5.857933 0 0 3341 +assignmentsassign 3 5.857933 0 0 3342 +litvinov 3 5.857933 0 0 3343 +projectth 3 5.857933 0 0 3344 +notkin 3 5.857933 0 0 3345 +partridg 3 5.857933 0 0 3346 +crew 3 5.857933 0 0 3347 +impli 3 5.857933 0 0 3348 +influenc 3 5.857933 0 0 3349 +theoryand 3 5.857933 0 0 3350 +scatter 3 5.857933 0 0 3351 +freder 3 5.857933 0 0 3352 +wealth 3 5.857933 0 0 3353 +anin 3 5.857933 0 0 3354 +cash 3 5.857933 0 0 3355 +grail 3 5.857933 0 0 3356 +weekend 3 5.857933 0 0 3357 +pyramid 3 5.857933 0 0 3358 +paragon 3 5.857933 0 0 3359 +simd 3 5.857933 0 0 3360 +mimd 3 5.857933 0 0 3361 +icon 3 5.857933 0 0 3362 +faith 3 5.857933 0 0 3363 +eigenvalu 3 5.857933 0 0 3364 +eigenvector 3 5.857933 0 0 3365 +singular 3 5.857933 0 0 3366 +conclus 3 5.857933 0 0 3367 +useof 3 5.857933 0 0 3368 +lunch 3 5.857933 0 0 3369 +shen 3 5.857933 0 0 3370 +pong 3 5.857933 0 0 3371 +stefano 3 5.857933 0 0 3372 +kaxira 3 5.857933 0 0 3373 +yelick 3 5.857933 0 0 3374 +shortli 3 5.857933 0 0 3375 +noel 3 5.857933 0 0 3376 +garrett 3 5.857933 0 0 3377 +jen 3 5.857933 0 0 3378 +blank 3 5.857933 0 0 3379 +barbara 3 5.857933 0 0 3380 +ipp 3 5.857933 0 0 3381 +dusseau 3 5.857933 0 0 3382 +zahorjan 3 5.857933 0 0 3383 +tran 3 5.857933 0 0 3384 +saltz 3 5.857933 0 0 3385 +am 3 5.857933 0 0 3386 +tradeoff 3 5.857933 0 0 3387 +atmospher 3 5.857933 0 0 3388 +jason 3 5.857933 0 0 3389 +fiuczynski 3 5.857933 0 0 3390 +audit 3 5.857933 0 0 3391 +disciplin 3 5.857933 0 0 3392 +vagu 3 5.857933 0 0 3393 +csphone 3 5.857933 0 0 3394 +departmentc 3 5.857933 0 0 3395 +millerc 3 5.857933 0 0 3396 +tanenbaum 3 5.857933 0 0 3397 +programmingassign 3 5.857933 0 0 3398 +ofobject 3 5.857933 0 0 3399 +thrash 3 5.857933 0 0 3400 +bybart 3 5.857933 0 0 3401 +landweb 3 5.857933 0 0 3402 +advancedoper 3 5.857933 0 0 3403 +focal 3 5.857933 0 0 3404 +formula 3 5.857933 0 0 3405 +labyou 3 5.857933 0 0 3406 +dorm 3 5.857933 0 0 3407 +lahei 3 5.857933 0 0 3408 +projector 3 5.857933 0 0 3409 +objectivesvectra 3 5.857933 0 0 3410 +homeclass 3 5.857933 0 0 3411 +policyl 3 5.857933 0 0 3412 +policyacadem 3 5.857933 0 0 3413 +macc 3 5.857933 0 0 3414 +toni 3 5.857933 0 0 3415 +hummert 3 5.857933 0 0 3416 +man 3 5.857933 0 0 3417 +tsioli 3 5.857933 0 0 3418 +ratliff 3 5.857933 0 0 3419 +bockrath 3 5.857933 0 0 3420 +ashraf 3 5.857933 0 0 3421 +geeri 3 5.857933 0 0 3422 +jyothi 3 5.857933 0 0 3423 +thano 3 5.857933 0 0 3424 +fink 3 5.857933 0 0 3425 +aboulnaga 3 5.857933 0 0 3426 +jherro 3 5.857933 0 0 3427 +abhinav 3 5.857933 0 0 3428 +agupta 3 5.857933 0 0 3429 +suhui 3 5.857933 0 0 3430 +enorm 3 5.857933 0 0 3431 +salli 3 5.857933 0 0 3432 +facstaff 3 5.857933 0 0 3433 +drag 3 5.857933 0 0 3434 +crack 3 5.857933 0 0 3435 +iici 3 5.857933 0 0 3436 +scanner 3 5.857933 0 0 3437 +leavi 3 5.857933 0 0 3438 +sharenow 3 5.857933 0 0 3439 +swander 3 5.857933 0 0 3440 +thayer 3 5.857933 0 0 3441 +varghes 3 5.857933 0 0 3442 +weinberg 3 5.857933 0 0 3443 +microcomput 3 5.857933 0 0 3444 +jerri 3 5.857933 0 0 3445 +suen 3 5.857933 0 0 3446 +asgarian 3 5.857933 0 0 3447 +architecur 3 5.857933 0 0 3448 +makeup 3 5.857933 0 0 3449 +vega 3 5.857933 0 0 3450 +neg 3 5.857933 0 0 3451 +eduand 3 5.857933 0 0 3452 +tremend 3 5.857933 0 0 3453 +narr 3 5.857933 0 0 3454 +gradingther 3 5.857933 0 0 3455 +thesear 3 5.857933 0 0 3456 +ineffici 3 5.857933 0 0 3457 +meaning 3 5.857933 0 0 3458 +briefli 3 5.857933 0 0 3459 +pain 3 5.857933 0 0 3460 +reiter 3 5.857933 0 0 3461 +freshman 3 5.857933 0 0 3462 +sundaram 3 5.857933 0 0 3463 +rahul 3 5.857933 0 0 3464 +caught 3 5.857933 0 0 3465 +omit 3 5.857933 0 0 3466 +offset 3 5.857933 0 0 3467 +acquaint 3 5.857933 0 0 3468 +subscript 3 5.857933 0 0 3469 +easier 3 5.857933 0 0 3470 +timet 3 5.857933 0 0 3471 +dine 3 5.857933 0 0 3472 +dont 3 5.857933 0 0 3473 +obvious 3 5.857933 0 0 3474 +fractal 3 5.857933 0 0 3475 +hereto 3 5.857933 0 0 3476 +bertseka 3 5.857933 0 0 3477 +lagrangian 3 5.857933 0 0 3478 +gradient 3 5.857933 0 0 3479 +chee 3 5.857933 0 0 3480 +preprint 3 5.857933 0 0 3481 +boor 3 5.857933 0 0 3482 +textbookproblem 3 5.857933 0 0 3483 +windowshint 3 5.857933 0 0 3484 +compilersth 3 5.857933 0 0 3485 +systememailmosaicnetscap 3 5.857933 0 0 3486 +languageth 3 5.857933 0 0 3487 +thin 3 5.857933 0 0 3488 +visionc 3 5.857933 0 0 3489 +histogram 3 5.857933 0 0 3490 +portrait 3 5.857933 0 0 3491 +surround 3 5.857933 0 0 3492 +quicktim 3 5.857933 0 0 3493 +ahuja 3 5.857933 0 0 3494 +krishna 3 5.857933 0 0 3495 +privat 3 5.857933 0 0 3496 +bump 3 5.857933 0 0 3497 +psych 3 5.857933 0 0 3498 +eduher 3 5.857933 0 0 3499 +kunen 3 5.857933 0 0 3500 +mbirk 3 5.857933 0 0 3501 +badger 3 5.857933 0 0 3502 +sharewar 3 5.857933 0 0 3503 +freewar 3 5.857933 0 0 3504 +geoffrei 3 5.857933 0 0 3505 +soar 3 5.857933 0 0 3506 +backpropag 3 5.857933 0 0 3507 +canadian 3 5.857933 0 0 3508 +pang 3 5.857933 0 0 3509 +avinash 3 5.857933 0 0 3510 +rajesh 3 5.857933 0 0 3511 +aren 3 5.857933 0 0 3512 +biochemistri 3 5.857933 0 0 3513 +vliw 3 5.857933 0 0 3514 +harm 3 5.857933 0 0 3515 +kinemat 3 5.857933 0 0 3516 +computergraph 3 5.857933 1 0 3517 +wave 3 5.857933 0 0 3518 +shadow 3 5.857933 0 0 3519 +arrow 3 5.857933 0 0 3520 +jing 3 5.857933 0 0 3521 +mccune 3 5.857933 1 0 3522 +waterloo 3 5.857933 0 0 3523 +hung 3 5.857933 1 0 3524 +landscap 3 5.857933 1 0 3525 +vavasi 3 5.857933 0 0 3526 +hough 3 5.857933 0 0 3527 +trefethen 3 5.857933 0 0 3528 +exponenti 3 5.857933 0 0 3529 +ncstrl 3 5.857933 0 0 3530 +owner 3 5.857933 0 0 3531 +ahoi 3 5.857933 0 0 3532 +concert 3 5.857933 0 0 3533 +interfacefor 3 5.857933 0 0 3534 +anindya 3 5.857933 0 0 3535 +copper 3 5.857933 0 0 3536 +glade 3 5.857933 0 0 3537 +takako 3 5.857933 0 0 3538 +woman 3 5.857933 0 0 3539 +redesign 3 5.857933 0 0 3540 +greatli 3 5.857933 0 0 3541 +child 3 5.857933 0 0 3542 +csrvl 3 5.857933 0 0 3543 +binghamton 3 5.857933 0 0 3544 +sophist 3 5.857933 0 0 3545 +genom 3 5.857933 0 0 3546 +trec 3 5.857933 0 0 3547 +gigabyt 3 5.857933 0 0 3548 +activitiesmemb 3 5.857933 0 0 3549 +zurich 3 5.857933 0 0 3550 +switzerland 3 5.857933 0 0 3551 +softwareth 3 5.857933 0 0 3552 +syracus 3 5.857933 0 0 3553 +haifa 3 5.857933 0 0 3554 +aerospac 3 5.857933 0 0 3555 +reconfigur 3 5.857933 0 0 3556 +act 3 5.857933 0 0 3557 +alamito 3 5.857933 0 0 3558 +gould 3 5.857933 0 0 3559 +nondeterminist 3 5.857933 0 0 3560 +algorithmica 3 5.857933 0 0 3561 +armi 3 5.857933 0 0 3562 +topla 3 5.857933 0 0 3563 +chelmsford 3 5.857933 0 0 3564 +detroit 3 5.857933 0 0 3565 +rutger 3 5.857933 0 0 3566 +brunswick 3 5.857933 0 0 3567 +redmond 3 5.857933 0 0 3568 +gri 3 5.857933 0 0 3569 +munich 3 5.857933 0 0 3570 +stoni 3 5.857933 0 0 3571 +heavili 3 5.857933 0 0 3572 +streamlin 3 5.857933 0 0 3573 +dimac 3 5.857933 0 0 3574 +reactiv 3 5.857933 0 0 3575 +successor 3 5.857933 0 0 3576 +hoto 3 5.857933 0 0 3577 +orca 3 5.857933 0 0 3578 +wilkinson 3 5.857933 0 0 3579 +zeno 3 5.857933 0 0 3580 +thetim 3 5.857933 0 0 3581 +magnitud 3 5.857933 0 0 3582 +rival 3 5.857933 0 0 3583 +stuart 3 5.857933 0 0 3584 +predecessor 3 5.857933 0 0 3585 +jackson 3 5.857933 0 0 3586 +nato 3 5.857933 0 0 3587 +coursework 3 5.857933 0 0 3588 +heaven 3 5.857933 0 0 3589 +hindu 3 5.857933 0 0 3590 +fernandez 3 5.857933 0 0 3591 +dutch 3 5.857933 0 0 3592 +win 3 5.857933 0 0 3593 +birth 3 5.857933 0 0 3594 +greek 3 5.857933 0 0 3595 +bright 3 5.857933 0 0 3596 +pack 3 5.857933 0 0 3597 +asian 3 5.857933 0 0 3598 +schauser 3 5.857933 0 0 3599 +avula 3 5.857933 0 0 3600 +educornel 3 5.857933 0 0 3601 +universitydept 3 5.857933 0 0 3602 +galaxi 3 5.857933 0 0 3603 +underground 3 5.857933 0 0 3604 +spider 3 5.857933 0 0 3605 +bhringer 3 5.857933 0 0 3606 +fabric 3 5.857933 0 0 3607 +artificialintellig 3 5.857933 0 0 3608 +scream 3 5.857933 0 0 3609 +microfabr 3 5.857933 0 0 3610 +daniela 3 5.857933 0 0 3611 +educlick 3 5.857933 0 0 3612 +tosupport 3 5.857933 0 0 3613 +teachingc 3 5.857933 0 0 3614 +fourteenth 3 5.857933 0 0 3615 +ninth 3 5.857933 0 0 3616 +citat 3 5.857933 0 0 3617 +chew 3 5.857933 0 0 3618 +delaunai 3 5.857933 0 0 3619 +implicitli 3 5.857933 0 0 3620 +andoper 3 5.857933 0 0 3621 +tokyo 3 5.857933 0 0 3622 +karl 3 5.857933 0 0 3623 +jesu 3 5.857933 0 0 3624 +ctctr 3 5.857933 0 0 3625 +parallelprocess 3 5.857933 0 0 3626 +recipi 3 5.857933 0 0 3627 +preserv 3 5.857933 0 0 3628 +mobilecomput 3 5.857933 0 0 3629 +dread 3 5.857933 0 0 3630 +wise 3 5.857933 0 0 3631 +romanc 3 5.857933 0 0 3632 +passion 3 5.857933 0 0 3633 +tortur 3 5.857933 0 0 3634 +diseas 3 5.857933 0 0 3635 +blame 3 5.857933 0 0 3636 +cold 3 5.857933 0 0 3637 +krafft 3 5.857933 0 0 3638 +archi 3 5.857933 0 0 3639 +dienst 3 5.857933 0 0 3640 +halldepart 3 5.857933 0 0 3641 +metatheori 3 5.857933 0 0 3642 +allevi 3 5.857933 0 0 3643 +checker 3 5.857933 0 0 3644 +funda 3 5.857933 0 0 3645 +stein 3 5.857933 0 0 3646 +planar 3 5.857933 0 0 3647 +thegener 3 5.857933 0 0 3648 +combinatorica 3 5.857933 0 0 3649 +netherland 3 5.857933 0 0 3650 +universitycomput 3 5.857933 0 0 3651 +cohen 3 5.857933 0 0 3652 +yuan 3 5.857933 0 0 3653 +dive 3 5.857933 0 0 3654 +straight 3 5.857933 0 0 3655 +indira 3 5.857933 0 0 3656 +twin 3 5.857933 0 0 3657 +biographi 3 5.857933 0 0 3658 +laugh 3 5.857933 0 0 3659 +assistantship 3 5.857933 0 0 3660 +langaug 3 5.857933 0 0 3661 +blind 3 5.857933 0 0 3662 +serious 3 5.857933 0 0 3663 +researchassoci 3 5.857933 0 0 3664 +poland 3 5.857933 0 0 3665 +tast 3 5.857933 0 0 3666 +engineeringclass 3 5.857933 0 0 3667 +recip 3 5.857933 0 0 3668 +pagepaul 3 5.857933 0 0 3669 +bout 3 5.857933 0 0 3670 +elsevi 3 5.857933 0 0 3671 +pai 3 5.857933 0 0 3672 +shouldb 3 5.857933 0 0 3673 +stone 3 5.857933 0 0 3674 +informationresearch 3 5.857933 0 0 3675 +denmark 3 5.857933 0 0 3676 +hongkong 3 5.857933 0 0 3677 +stamp 3 5.857933 0 0 3678 +ryan 3 5.857933 0 0 3679 +tian 3 5.857933 0 0 3680 +classesc 3 5.857933 0 0 3681 +counti 3 5.857933 0 0 3682 +emilio 3 5.857933 0 0 3683 +summit 3 5.857933 0 0 3684 +orth 3 5.857933 0 0 3685 +backcountri 3 5.857933 0 0 3686 +publicli 3 5.857933 0 0 3687 +sale 3 5.857933 0 0 3688 +karlsruh 3 5.857933 0 0 3689 +kwon 3 5.857933 0 0 3690 +confid 3 5.857933 0 0 3691 +temporarili 3 5.857933 0 0 3692 +parallelmachin 3 5.857933 0 0 3693 +lai 3 5.857933 0 0 3694 +inventor 3 5.857933 0 0 3695 +flame 3 5.857933 0 0 3696 +arm 3 5.857933 0 0 3697 +argu 3 5.857933 0 0 3698 +fashion 3 5.857933 0 0 3699 +arizona 3 5.857933 0 0 3700 +onprincipl 3 5.857933 0 0 3701 +berger 3 5.857933 0 0 3702 +jone 3 5.857933 0 0 3703 +worldwid 3 5.857933 0 0 3704 +luci 3 5.857933 0 0 3705 +fresh 3 5.857933 0 0 3706 +fudan 3 5.857933 0 0 3707 +legion 3 5.857933 0 0 3708 +automobil 3 5.857933 0 0 3709 +headlin 3 5.857933 0 0 3710 +hero 3 5.857933 0 0 3711 +ming 3 5.857933 0 0 3712 +henzing 3 5.857933 0 0 3713 +professorcomput 3 5.857933 0 0 3714 +stockholm 3 5.857933 0 0 3715 +nicknam 3 5.857933 0 0 3716 +lame 3 5.857933 0 0 3717 +imac 3 5.857933 0 0 3718 +cyberspac 3 5.857933 0 0 3719 +neumann 3 5.857933 0 0 3720 +lattic 3 5.857933 0 0 3721 +usaemail 3 5.857933 0 0 3722 +roll 3 5.857933 0 0 3723 +espn 3 5.857933 0 0 3724 +borrow 3 5.857933 0 0 3725 +interplai 3 5.857933 0 0 3726 +schloss 3 5.857933 0 0 3727 +packer 3 5.857933 0 0 3728 +collaps 3 5.857933 0 0 3729 +inadequ 3 5.857933 0 0 3730 +tediou 3 5.857933 0 0 3731 +megabyt 3 5.857933 0 0 3732 +claus 3 5.857933 0 0 3733 +hourli 3 5.857933 0 0 3734 +thathav 3 5.857933 0 0 3735 +serverarchitectur 3 5.857933 0 0 3736 +comad 3 5.857933 0 0 3737 +informationfor 3 5.857933 0 0 3738 +percept 3 5.857933 0 0 3739 +recurr 3 5.857933 0 0 3740 +latin 3 5.857933 0 0 3741 +ramachandran 3 5.857933 0 0 3742 +conjunct 3 5.857933 0 0 3743 +tina 3 5.857933 0 0 3744 +detector 3 5.857933 0 0 3745 +horizon 3 5.857933 0 0 3746 +zone 3 5.857933 0 0 3747 +forfault 3 5.857933 0 0 3748 +kerala 3 5.857933 0 0 3749 +deploi 3 5.857933 0 0 3750 +membership 3 5.857933 0 0 3751 +motif 3 5.857933 0 0 3752 +hindi 3 5.857933 0 0 3753 +supervisor 3 5.857933 0 0 3754 +expans 3 5.857933 0 0 3755 +district 3 5.857933 0 0 3756 +father 3 5.857933 0 0 3757 +johann 3 5.857933 0 0 3758 +glass 3 5.857933 0 0 3759 +twentieth 3 5.857933 0 0 3760 +europ 3 5.857933 0 0 3761 +eduaddress 3 5.857933 0 0 3762 +thesumm 3 5.857933 0 0 3763 +internship 3 5.857933 0 0 3764 +categor 3 5.857933 0 0 3765 +sector 3 5.857933 0 0 3766 +children 3 5.857933 0 0 3767 +talent 3 5.857933 0 0 3768 +peac 3 5.857933 0 0 3769 +asid 3 5.857933 0 0 3770 +frontier 3 5.857933 0 0 3771 +abridg 3 5.857933 0 0 3772 +crete 3 5.857933 0 0 3773 +ellipt 3 5.857933 0 0 3774 +cleaner 3 5.857933 0 0 3775 +pageer 3 5.857933 0 0 3776 +habit 3 5.857933 0 0 3777 +atlanta 3 5.857933 0 0 3778 +engineeringand 3 5.857933 0 0 3779 +hpux 3 5.857933 0 0 3780 +systemat 3 5.857933 0 0 3781 +beach 3 5.857933 0 0 3782 +seoul 3 5.857933 0 0 3783 +forward 3 5.857933 0 0 3784 +nota 3 5.857933 0 0 3785 +newli 3 5.857933 0 0 3786 +health 3 5.857933 0 0 3787 +advertis 3 5.857933 0 0 3788 +neta 3 5.857933 0 0 3789 +let 3 5.857933 0 0 3790 +michel 3 5.857933 0 0 3791 +matt 3 5.857933 0 0 3792 +fame 3 5.857933 0 0 3793 +kuiper 3 5.857933 0 0 3794 +souther 3 5.857933 0 0 3795 +mathematicallog 3 5.857933 0 0 3796 +belong 3 5.857933 0 0 3797 +neuron 3 5.857933 0 0 3798 +coin 3 5.857933 0 0 3799 +wasn 3 5.857933 0 0 3800 +tenur 3 5.857933 0 0 3801 +deutsch 3 5.857933 0 0 3802 +harold 3 5.857933 0 0 3803 +carbon 3 5.857933 0 0 3804 +loss 3 5.857933 0 0 3805 +interestparallel 3 5.857933 0 0 3806 +narrow 3 5.857933 0 0 3807 +publicationsj 3 5.857933 0 0 3808 +baltimor 3 5.857933 0 0 3809 +guangtian 3 5.857933 0 0 3810 +haiku 3 5.857933 0 0 3811 +uniti 3 5.857933 0 0 3812 +alsointerest 3 5.857933 0 0 3813 +sciencestaylor 3 5.857933 0 0 3814 +republican 3 5.857933 0 0 3815 +gloriou 3 5.857933 0 0 3816 +hometown 3 5.857933 0 0 3817 +tower 3 5.857933 0 0 3818 +sinica 3 5.857933 0 0 3819 +poon 3 5.857933 0 0 3820 +meemail 3 5.857933 0 0 3821 +edupost 3 5.857933 0 0 3822 +irrelev 3 5.857933 0 0 3823 +eleven 3 5.857933 0 0 3824 +pleasant 3 5.857933 0 0 3825 +multifunct 3 5.857933 0 0 3826 +publicationsi 3 5.857933 0 0 3827 +sciencesaustin 3 5.857933 0 0 3828 +aloysiu 3 5.857933 0 0 3829 +cindi 3 5.857933 0 0 3830 +groupunivers 3 5.857933 0 0 3831 +primarilyin 3 5.857933 0 0 3832 +diagnost 3 5.857933 0 0 3833 +georgia 3 5.857933 0 0 3834 +systemsth 3 5.857933 0 0 3835 +informationtechnolog 3 5.857933 0 0 3836 +unrel 3 5.857933 0 0 3837 +intereststh 3 5.857933 0 0 3838 +informat 3 5.857933 0 0 3839 +beat 3 5.857933 0 0 3840 +multimediacomput 3 5.857933 0 0 3841 +mitsubishi 3 5.857933 0 0 3842 +merl 3 5.857933 0 0 3843 +preprocessor 3 5.857933 0 0 3844 +crow 3 5.857933 0 0 3845 +bold 3 5.857933 0 0 3846 +acad 3 5.857933 0 0 3847 +moham 3 5.857933 0 0 3848 +plapack 3 5.857933 0 0 3849 +amir 3 5.857933 0 0 3850 +smaragdaki 3 5.857933 0 0 3851 +myfavorit 3 5.857933 0 0 3852 +flat 3 5.857933 0 0 3853 +moredetail 3 5.857933 0 0 3854 +isaac 3 5.857933 0 0 3855 +cortex 3 5.857933 0 0 3856 +cortic 3 5.857933 0 0 3857 +meyour 3 5.857933 0 0 3858 +certif 3 5.857933 0 0 3859 +interestmathemat 3 5.857933 0 0 3860 +stationari 3 5.857933 0 0 3861 +qsim 3 5.857933 0 0 3862 +lockhe 3 5.857933 0 0 3863 +aliv 3 5.857933 0 0 3864 +pete 3 5.857933 0 0 3865 +inconveni 3 5.857933 0 0 3866 +lauri 3 5.857933 0 0 3867 +fifteenth 3 5.857933 0 0 3868 +glenn 3 5.857933 0 0 3869 +down 3 5.857933 0 0 3870 +informationi 3 5.857933 0 0 3871 +marku 3 5.857933 0 0 3872 +groupand 3 5.857933 0 0 3873 +byth 3 5.857933 0 0 3874 +hoar 3 5.857933 0 0 3875 +hermjakob 3 5.857933 0 0 3876 +signll 3 5.857933 0 0 3877 +ucpop 3 5.857933 0 0 3878 +urbana 3 5.857933 0 0 3879 +myph 3 5.857933 0 0 3880 +thesystem 3 5.857933 0 0 3881 +scrollit_rl 3 5.857933 0 0 3882 +swizzl 3 5.857933 0 0 3883 +providesa 3 5.857933 0 0 3884 +sudarshan 3 5.857933 0 0 3885 +plaxton 3 5.857933 0 0 3886 +alsoavail 3 5.857933 0 0 3887 +hawaii 3 5.857933 0 0 3888 +underconstruct 3 5.857933 0 0 3889 +freedom 3 5.857933 0 0 3890 +shall 3 5.857933 0 0 3891 +teacher 3 5.857933 0 0 3892 +interchang 3 5.857933 0 0 3893 +slight 3 5.857933 0 0 3894 +wine 3 5.857933 0 0 3895 +eddi 3 5.857933 0 0 3896 +tiger 3 5.857933 0 0 3897 +evolutionari 3 5.857933 0 0 3898 +twelv 3 5.857933 0 0 3899 +truste 3 5.857933 0 0 3900 +andm 3 5.857933 0 0 3901 +louisiana 3 5.857933 0 0 3902 +thedevelop 3 5.857933 0 0 3903 +warm 3 5.857933 0 0 3904 +dozen 3 5.857933 0 0 3905 +spaa 3 5.857933 0 0 3906 +prison 3 5.857933 0 0 3907 +civil 3 5.857933 0 0 3908 +motor 3 5.857933 0 0 3909 +monthli 3 5.857933 0 0 3910 +ofmi 3 5.857933 0 0 3911 +medal 3 5.857933 0 0 3912 +jakobovit 3 5.857933 0 0 3913 +lara 3 5.857933 0 0 3914 +revisit 3 5.857933 0 0 3915 +affair 3 5.857933 0 0 3916 +ausland 3 5.857933 0 0 3917 +super 3 5.857933 0 0 3918 +somedai 3 5.857933 0 0 3919 +bank 3 5.857933 0 0 3920 +stefan 3 5.857933 0 0 3921 +northeast 3 5.857933 0 0 3922 +cachingtraci 3 5.857933 0 0 3923 +kimbrel 3 5.857933 0 0 3924 +felten 3 5.857933 0 0 3925 +dynamiccompil 3 5.857933 0 0 3926 +mobisa 3 5.857933 0 0 3927 +mappedcach 3 5.857933 0 0 3928 +forappl 3 5.857933 0 0 3929 +alien 3 5.857933 0 0 3930 +letterman 3 5.857933 0 0 3931 +shortcut 3 5.857933 0 0 3932 +museum 3 5.857933 0 0 3933 +groupuw 3 5.857933 0 0 3934 +atstanford 3 5.857933 0 0 3935 +astrophys 3 5.857933 0 0 3936 +metip 3 5.857933 0 0 3937 +workin 3 5.857933 0 0 3938 +rsum 3 5.857933 0 0 3939 +codi 3 5.857933 0 0 3940 +kwok 3 5.857933 0 0 3941 +cronquist 3 5.857933 0 0 3942 +evil 3 5.857933 0 0 3943 +emul 3 5.857933 0 0 3944 +distract 3 5.857933 0 0 3945 +trumpet 3 5.857933 0 0 3946 +marin 3 5.857933 0 0 3947 +fascin 3 5.857933 0 0 3948 +dlee 3 5.857933 0 0 3949 +energi 3 5.857933 0 0 3950 +specul 3 5.857933 0 0 3951 +reorder 3 5.857933 0 0 3952 +sit 3 5.857933 0 0 3953 +oodb 3 5.857933 0 0 3954 +amast 3 5.857933 0 0 3955 +wcsss 3 5.857933 0 0 3956 +namespac 3 5.857933 0 0 3957 +seinfeld 3 5.857933 0 0 3958 +slave 3 5.857933 0 0 3959 +sujai 3 5.857933 0 0 3960 +parekh 3 5.857933 0 0 3961 +kick 3 5.857933 0 0 3962 +alma 3 5.857933 0 0 3963 +stillmaintain 3 5.857933 0 0 3964 +forobject 3 5.857933 0 0 3965 +fantast 3 5.857933 0 0 3966 +rsml 3 5.857933 0 0 3967 +diagnos 3 5.857933 0 0 3968 +emer 3 5.857933 0 0 3969 +stamm 3 5.857933 0 0 3970 +dabbl 3 5.857933 0 0 3971 +mactest 3 5.857933 0 0 3972 +thekkath 3 5.857933 0 0 3973 +astronomi 3 5.857933 0 0 3974 +surgeri 3 5.857933 0 0 3975 +miscellani 3 5.857933 0 0 3976 +uwcs 3 5.857933 0 0 3977 +superpag 3 5.857933 0 0 3978 +emerald 3 5.857933 0 0 3979 +knee 3 5.857933 0 0 3980 +peoplefaculti 3 5.857933 0 0 3981 +waynew 3 5.857933 0 0 3982 +ballroom 3 5.857933 0 0 3983 +traci 3 5.857933 0 0 3984 +travi 3 5.857933 0 0 3985 +schwarz 3 5.857933 0 0 3986 +electricalengin 3 5.857933 0 0 3987 +wesleyan 3 5.857933 0 0 3988 +mirza 3 5.857933 0 0 3989 +recreat 3 5.857933 0 0 3990 +chairman 3 5.857933 0 0 3991 +cmo 3 5.857933 0 0 3992 +congest 3 5.857933 0 0 3993 +retarget 3 5.857933 0 0 3994 +domin 3 5.857933 0 0 3995 +hacker 3 5.857933 0 0 3996 +propag 3 5.857933 0 0 3997 +shorter 3 5.857933 0 0 3998 +complementar 3 5.857933 0 0 3999 +bulk 3 5.857933 0 0 4000 +reachabl 3 5.857933 0 0 4001 +ofwisconsin 3 5.857933 0 0 4002 +indianinstitut 3 5.857933 0 0 4003 +edueduc 3 5.857933 0 0 4004 +wit 3 5.857933 0 0 4005 +insan 3 5.857933 0 0 4006 +agre 3 5.857933 0 0 4007 +pilot 3 5.857933 0 0 4008 +breach 3 5.857933 0 0 4009 +anatomi 3 5.857933 0 0 4010 +vijaykumar 3 5.857933 0 0 4011 +raid 3 5.857933 0 0 4012 +santiago 3 5.857933 0 0 4013 +schuh 3 5.857933 0 0 4014 +chilimbi 3 5.857933 0 0 4015 +trishul 3 5.857933 0 0 4016 +usaadvisor 3 5.857933 0 0 4017 +fingerson 3 5.857933 0 0 4018 +thea 3 5.857933 0 0 4019 +sklenar 3 5.857933 0 0 4020 +madhusudhan 3 5.857933 0 0 4021 +myllymaki 3 5.857933 0 0 4022 +wenger 3 5.857933 0 0 4023 +bennett 3 5.857933 0 0 4024 +trek 3 5.857933 0 0 4025 +parallellanguag 3 5.857933 0 0 4026 +assistantcomput 3 5.857933 0 0 4027 +shubhendu 3 5.857933 0 0 4028 +mumbai 3 5.857933 0 0 4029 +shukla 3 5.857933 0 0 4030 +karthikeyan 3 5.857933 0 0 4031 +cancer 3 5.857933 0 0 4032 +breast 3 5.857933 0 0 4033 +chronolog 3 5.857933 0 0 4034 +microscop 3 5.857933 0 0 4035 +paulb 3 5.857933 0 0 4036 +asha 3 5.857933 0 0 4037 +metaphor 3 5.857933 0 0 4038 +eduportrait 3 5.857933 0 0 4039 +consortia 3 5.857933 0 0 4040 +cdtthi 3 5.857933 0 0 4041 +budiu 2 6.263398 0 0 4042 +systemkenneth 2 6.263398 0 0 4043 +birmanc 2 6.263398 0 0 4044 +syllabuslectur 2 6.263398 0 0 4045 +taslili 2 6.263398 0 0 4046 +mihai 2 6.263398 0 0 4047 +consol 2 6.263398 0 0 4048 +systemsselect 2 6.263398 0 0 4049 +postcript 2 6.263398 0 0 4050 +korth 2 6.263398 0 0 4051 +aguilera 2 6.263398 0 0 4052 +amith 2 6.263398 0 0 4053 +thegroup 2 6.263398 0 0 4054 +universityspr 2 6.263398 0 0 4055 +introductionthi 2 6.263398 0 0 4056 +queryoptim 2 6.263398 0 0 4057 +prerequisitesc 2 6.263398 0 0 4058 +elmasri 2 6.263398 0 0 4059 +salton 2 6.263398 0 0 4060 +amitsingh 2 6.263398 0 0 4061 +yamasani 2 6.263398 0 0 4062 +ofyour 2 6.263398 0 0 4063 +ofcours 2 6.263398 0 0 4064 +throughth 2 6.263398 0 0 4065 +iti 2 6.263398 0 0 4066 +dole 2 6.263398 0 0 4067 +schedulethi 2 6.263398 0 0 4068 +enscript 2 6.263398 0 0 4069 +incl 2 6.263398 0 0 4070 +offersa 2 6.263398 0 0 4071 +kewal 2 6.263398 0 0 4072 +studentsenrol 2 6.263398 0 0 4073 +goofi 2 6.263398 0 0 4074 +parter 2 6.263398 0 0 4075 +rangeof 2 6.263398 0 0 4076 +standalon 2 6.263398 0 0 4077 +developedat 2 6.263398 0 0 4078 +orientedlanguag 2 6.263398 0 0 4079 +therewil 2 6.263398 0 0 4080 +combinationof 2 6.263398 0 0 4081 +programmingproblem 2 6.263398 0 0 4082 +youwork 2 6.263398 0 0 4083 +growth 2 6.263398 0 0 4084 +btopic 2 6.263398 0 0 4085 +pagecsfound 2 6.263398 0 0 4086 +nikolai 2 6.263398 0 0 4087 +weitsang 2 6.263398 0 0 4088 +databasemanag 2 6.263398 0 0 4089 +certainli 2 6.263398 0 0 4090 +proportion 2 6.263398 0 0 4091 +thefirst 2 6.263398 0 0 4092 +youto 2 6.263398 0 0 4093 +builton 2 6.263398 0 0 4094 +thehigh 2 6.263398 0 0 4095 +secondedit 2 6.263398 0 0 4096 +ingr 2 6.263398 0 0 4097 +grai 2 6.263398 0 0 4098 +reuter 2 6.263398 0 0 4099 +likewis 2 6.263398 0 0 4100 +confirm 2 6.263398 0 0 4101 +noteshomework 2 6.263398 0 0 4102 +profici 2 6.263398 0 0 4103 +andlog 2 6.263398 0 0 4104 +competillo 2 6.263398 0 0 4105 +lfar 2 6.263398 0 0 4106 +erlingsson 2 6.263398 0 0 4107 +indexdocument 2 6.263398 0 0 4108 +toolsa 2 6.263398 0 0 4109 +pagecsmultimedia 2 6.263398 0 0 4110 +anounc 2 6.263398 0 0 4111 +bugcom 2 6.263398 0 0 4112 +heat 2 6.263398 0 0 4113 +glorifi 2 6.263398 0 0 4114 +farm 2 6.263398 0 0 4115 +adequ 2 6.263398 0 0 4116 +horizont 2 6.263398 0 0 4117 +marshal 2 6.263398 0 0 4118 +representationof 2 6.263398 0 0 4119 +toon 2 6.263398 0 0 4120 +yourgrad 2 6.263398 0 0 4121 +yaron 2 6.263398 0 0 4122 +minski 2 6.263398 0 0 4123 +remark 2 6.263398 0 0 4124 +codewarrior 2 6.263398 0 0 4125 +kwan 2 6.263398 0 0 4126 +stuffit 2 6.263398 0 0 4127 +thesecond 2 6.263398 0 0 4128 +datatyp 2 6.263398 0 0 4129 +csdepart 2 6.263398 0 0 4130 +metrowerk 2 6.263398 0 0 4131 +arriv 2 6.263398 0 0 4132 +subdirectori 2 6.263398 0 0 4133 +thorough 2 6.263398 0 0 4134 +programmingin 2 6.263398 0 0 4135 +anneal 2 6.263398 0 0 4136 +brill 2 6.263398 0 0 4137 +treebank 2 6.263398 0 0 4138 +schedulewhat 2 6.263398 0 0 4139 +materialcov 2 6.263398 0 0 4140 +monika 2 6.263398 0 0 4141 +rauch 2 6.263398 0 0 4142 +greedi 2 6.263398 0 0 4143 +edmond 2 6.263398 0 0 4144 +scientificcomput 2 6.263398 0 0 4145 +stress 2 6.263398 0 0 4146 +loan 2 6.263398 0 0 4147 +renssela 2 6.263398 0 0 4148 +examsther 2 6.263398 0 0 4149 +hermit 2 6.263398 0 0 4150 +multivari 2 6.263398 0 0 4151 +folder 2 6.263398 0 0 4152 +nawaaz 2 6.263398 0 0 4153 +ahm 2 6.263398 0 0 4154 +praka 2 6.263398 0 0 4155 +anintroduct 2 6.263398 0 0 4156 +emphasison 2 6.263398 0 0 4157 +memorymanag 2 6.263398 0 0 4158 +thetradit 2 6.263398 0 0 4159 +galvin 2 6.263398 0 0 4160 +languagesfal 2 6.263398 0 0 4161 +glew 2 6.263398 0 0 4162 +informationhandout 2 6.263398 0 0 4163 +pavel 2 6.263398 0 0 4164 +almstrum 2 6.263398 0 0 4165 +otter 2 6.263398 0 0 4166 +ofmathemat 2 6.263398 0 0 4167 +nelson 2 6.263398 0 0 4168 +ortool 2 6.263398 0 0 4169 +bowen 2 6.263398 0 0 4170 +guyer 2 6.263398 0 0 4171 +insystem 2 6.263398 0 0 4172 +dram 2 6.263398 0 0 4173 +interprocess 2 6.263398 0 0 4174 +yurkanan 2 6.263398 0 0 4175 +dragon 2 6.263398 0 0 4176 +yoonsuck 2 6.263398 0 0 4177 +choe 2 6.263398 0 0 4178 +yschoe 2 6.263398 0 0 4179 +typo 2 6.263398 0 0 4180 +constantli 2 6.263398 0 0 4181 +edmondson 2 6.263398 0 0 4182 +gzhang 2 6.263398 0 0 4183 +rare 2 6.263398 0 0 4184 +thanksgiv 2 6.263398 0 0 4185 +appeal 2 6.263398 0 0 4186 +painter 2 6.263398 0 0 4187 +nimar 2 6.263398 0 0 4188 +disregard 2 6.263398 0 0 4189 +schedulec 2 6.263398 0 0 4190 +newgroup 2 6.263398 0 0 4191 +delphi 2 6.263398 0 0 4192 +dell 2 6.263398 0 0 4193 +mesa 2 6.263398 0 0 4194 +cscomput 2 6.263398 0 0 4195 +anopengl 2 6.263398 0 0 4196 +billthecat 2 6.263398 0 0 4197 +repair 2 6.263398 0 0 4198 +hqliu 2 6.263398 0 0 4199 +huiqun 2 6.263398 0 0 4200 +drastic 2 6.263398 0 0 4201 +joshi 2 6.263398 0 0 4202 +byzantin 2 6.263398 0 0 4203 +requiredtextbook 2 6.263398 0 0 4204 +checkpoint 2 6.263398 0 0 4205 +replica 2 6.263398 0 0 4206 +towrit 2 6.263398 0 0 4207 +algorithmi 2 6.263398 0 0 4208 +moreeffici 2 6.263398 0 0 4209 +simpler 2 6.263398 0 0 4210 +setup 2 6.263398 0 0 4211 +infocours 2 6.263398 0 0 4212 +coop 2 6.263398 0 0 4213 +materiali 2 6.263398 0 0 4214 +drawn 2 6.263398 0 0 4215 +elli 2 6.263398 0 0 4216 +helm 2 6.263398 0 0 4217 +reusabl 2 6.263398 0 0 4218 +sourcesth 2 6.263398 0 0 4219 +javasoft 2 6.263398 0 0 4220 +gamelan 2 6.263398 0 0 4221 +centr 2 6.263398 0 0 4222 +compilersfal 2 6.263398 0 0 4223 +tera 2 6.263398 0 0 4224 +skeleton 2 6.263398 0 0 4225 +ironman 2 6.263398 0 0 4226 +logp 2 6.263398 0 0 4227 +grid 2 6.263398 0 0 4228 +mooneytim 2 6.263398 0 0 4229 +sheetand 2 6.263398 0 0 4230 +placetu 2 6.263398 0 0 4231 +informationon 2 6.263398 0 0 4232 +unsupervis 2 6.263398 0 0 4233 +peano 2 6.263398 0 0 4234 +turtl 2 6.263398 0 0 4235 +plot 2 6.263398 0 0 4236 +compilersc 2 6.263398 0 0 4237 +powerpc 2 6.263398 0 0 4238 +syllabusprogram 2 6.263398 0 0 4239 +actor 2 6.263398 0 0 4240 +problemssolut 2 6.263398 0 0 4241 +cilk 2 6.263398 0 0 4242 +alamo 2 6.263398 0 0 4243 +chill 2 6.263398 0 0 4244 +theform 2 6.263398 0 0 4245 +sciencefal 2 6.263398 0 0 4246 +andresearch 2 6.263398 0 0 4247 +government 2 6.263398 0 0 4248 +andcollect 2 6.263398 0 0 4249 +todramat 2 6.263398 0 0 4250 +thedesign 2 6.263398 0 0 4251 +har 2 6.263398 0 0 4252 +undergo 2 6.263398 0 0 4253 +applicationsto 2 6.263398 0 0 4254 +offailur 2 6.263398 0 0 4255 +idl 2 6.263398 0 0 4256 +indistribut 2 6.263398 0 0 4257 +andmap 2 6.263398 0 0 4258 +equilibrium 2 6.263398 0 0 4259 +trajectori 2 6.263398 0 0 4260 +sufficientto 2 6.263398 0 0 4261 +logicprogram 2 6.263398 0 0 4262 +thesetechniqu 2 6.263398 0 0 4263 +ofneur 2 6.263398 0 0 4264 +neuro 2 6.263398 0 0 4265 +resourcemanag 2 6.263398 0 0 4266 +anobject 2 6.263398 0 0 4267 +anticip 2 6.263398 0 0 4268 +corpora 2 6.263398 0 0 4269 +foidl 2 6.263398 0 0 4270 +andanalysi 2 6.263398 0 0 4271 +straightforward 2 6.263398 0 0 4272 +lengthi 2 6.263398 0 0 4273 +andform 2 6.263398 0 0 4274 +succinctli 2 6.263398 0 0 4275 +concret 2 6.263398 0 0 4276 +analysisof 2 6.263398 0 0 4277 +tarjan 2 6.263398 0 0 4278 +maspar 2 6.263398 0 0 4279 +workon 2 6.263398 0 0 4280 +gooti 2 6.263398 0 0 4281 +subramanyam 2 6.263398 0 0 4282 +bednar 2 6.263398 0 0 4283 +jbednar 2 6.263398 0 0 4284 +cliff 2 6.263398 0 0 4285 +edusun 2 6.263398 0 0 4286 +commentari 2 6.263398 0 0 4287 +suzi 2 6.263398 0 0 4288 +wella 2 6.263398 0 0 4289 +foral 2 6.263398 0 0 4290 +riski 2 6.263398 0 0 4291 +nowher 2 6.263398 0 0 4292 +gallagh 2 6.263398 0 0 4293 +elicit 2 6.263398 0 0 4294 +append 2 6.263398 0 0 4295 +synopsisc 2 6.263398 0 0 4296 +systemdesign 2 6.263398 0 0 4297 +theinstructor 2 6.263398 0 0 4298 +anexperiment 2 6.263398 0 0 4299 +afip 2 6.263398 0 0 4300 +hansen 2 6.263398 0 0 4301 +nucleu 2 6.263398 0 0 4302 +bensoussan 2 6.263398 0 0 4303 +multic 2 6.263398 0 0 4304 +virtualmemori 2 6.263398 0 0 4305 +ritchi 2 6.263398 0 0 4306 +tucker 2 6.263398 0 0 4307 +bunt 2 6.263398 0 0 4308 +barrera 2 6.263398 0 0 4309 +acmtransact 2 6.263398 0 0 4310 +cristian 2 6.263398 0 0 4311 +systemsr 2 6.263398 0 0 4312 +goldberg 2 6.263398 0 0 4313 +rosenblum 2 6.263398 0 0 4314 +ieeetransact 2 6.263398 0 0 4315 +oninform 2 6.263398 0 0 4316 +baron 2 6.263398 0 0 4317 +rashid 2 6.263398 0 0 4318 +preemptiv 2 6.263398 0 0 4319 +ondistribut 2 6.263398 0 0 4320 +kandlur 2 6.263398 0 0 4321 +ofmultimedia 2 6.263398 0 0 4322 +icmc 2 6.263398 0 0 4323 +jacobson 2 6.263398 0 0 4324 +prerequisitesgradu 2 6.263398 0 0 4325 +madeavail 2 6.263398 0 0 4326 +thetop 2 6.263398 0 0 4327 +critiqu 2 6.263398 0 0 4328 +ofpap 2 6.263398 0 0 4329 +andclass 2 6.263398 0 0 4330 +prashant 2 6.263398 0 0 4331 +gemmel 2 6.263398 0 0 4332 +ieeeintern 2 6.263398 0 0 4333 +inmulti 2 6.263398 0 0 4334 +annualintern 2 6.263398 0 0 4335 +pasadena 2 6.263398 0 0 4336 +multimediai 2 6.263398 0 0 4337 +acmmultimedia 2 6.263398 0 0 4338 +sanfrancisco 2 6.263398 0 0 4339 +shenker 2 6.263398 0 0 4340 +verma 2 6.263398 0 0 4341 +delaybound 2 6.263398 0 0 4342 +toappear 2 6.263398 0 0 4343 +nossdav 2 6.263398 0 0 4344 +acmsigcomm 2 6.263398 0 0 4345 +andd 2 6.263398 0 0 4346 +shepherd 2 6.263398 0 0 4347 +basedcommun 2 6.263398 0 0 4348 +incommun 2 6.263398 0 0 4349 +govindan 2 6.263398 0 0 4350 +forcontinu 2 6.263398 0 0 4351 +formultimedia 2 6.263398 0 0 4352 +zellweg 2 6.263398 0 0 4353 +swinehart 2 6.263398 0 0 4354 +etherphon 2 6.263398 0 0 4355 +deer 2 6.263398 0 0 4356 +jeffai 2 6.263398 0 0 4357 +redel 2 6.263398 0 0 4358 +lan 2 6.263398 0 0 4359 +computersystem 2 6.263398 0 0 4360 +mbone 2 6.263398 0 0 4361 +monterei 2 6.263398 0 0 4362 +timeoper 2 6.263398 0 0 4363 +niblack 2 6.263398 0 0 4364 +managementsystem 2 6.263398 0 0 4365 +knowledgeand 2 6.263398 0 0 4366 +onveri 2 6.263398 0 0 4367 +omega 2 6.263398 0 0 4368 +sigma 2 6.263398 0 0 4369 +amort 2 6.263398 0 0 4370 +donovan 2 6.263398 0 0 4371 +kolbl 2 6.263398 0 0 4372 +youcan 2 6.263398 0 0 4373 +indent 2 6.263398 0 0 4374 +subtyp 2 6.263398 0 0 4375 +xfeng 2 6.263398 0 0 4376 +natarajan 2 6.263398 0 0 4377 +quarterwelcom 2 6.263398 0 0 4378 +thatthi 2 6.263398 0 0 4379 +addedfrequ 2 6.263398 0 0 4380 +personnel 2 6.263398 0 0 4381 +mvi 2 6.263398 0 0 4382 +usinglynx 2 6.263398 0 0 4383 +raini 2 6.263398 0 0 4384 +intact 2 6.263398 0 0 4385 +nonmajor 2 6.263398 0 0 4386 +itemsund 2 6.263398 0 0 4387 +balloon 2 6.263398 0 0 4388 +dickei 2 6.263398 0 0 4389 +nowitz 2 6.263398 0 0 4390 +fasulo 2 6.263398 0 0 4391 +ofcs 2 6.263398 0 0 4392 +informationth 2 6.263398 0 0 4393 +listinfo 2 6.263398 0 0 4394 +pagehom 2 6.263398 0 0 4395 +engineeringport 2 6.263398 0 0 4396 +academicnonprofit 2 6.263398 0 0 4397 +dulycredit 2 6.263398 0 0 4398 +overviewcours 2 6.263398 0 0 4399 +andersonwelcom 2 6.263398 0 0 4400 +tocs 2 6.263398 0 0 4401 +messagess 2 6.263398 0 0 4402 +synario 2 6.263398 0 0 4403 +anhai 2 6.263398 0 0 4404 +doan 2 6.263398 0 0 4405 +mscc 2 6.263398 0 0 4406 +breakdown 2 6.263398 0 0 4407 +portfolio 2 6.263398 0 0 4408 +educours 2 6.263398 0 0 4409 +terminolog 2 6.263398 0 0 4410 +beavoid 2 6.263398 0 0 4411 +thenorm 2 6.263398 0 0 4412 +clariti 2 6.263398 0 0 4413 +petri 2 6.263398 0 0 4414 +token 2 6.263398 0 0 4415 +mileston 2 6.263398 0 0 4416 +ofread 2 6.263398 0 0 4417 +glossari 2 6.263398 0 0 4418 +referenceon 2 6.263398 0 0 4419 +usingcommon 2 6.263398 0 0 4420 +themathemat 2 6.263398 0 0 4421 +yacc 2 6.263398 0 0 4422 +franz 2 6.263398 0 0 4423 +thelaboratori 2 6.263398 0 0 4424 +onthursdai 2 6.263398 0 0 4425 +pencil 2 6.263398 0 0 4426 +noonta 2 6.263398 0 0 4427 +touretzki 2 6.263398 0 0 4428 +aberman 2 6.263398 0 0 4429 +wisdom 2 6.263398 0 0 4430 +indi 2 6.263398 0 0 4431 +somani 2 6.263398 0 0 4432 +cslectur 2 6.263398 0 0 4433 +havea 2 6.263398 0 0 4434 +eduwchan 2 6.263398 0 0 4435 +designt 2 6.263398 0 0 4436 +kehl 2 6.263398 0 0 4437 +aaron 2 6.263398 0 0 4438 +comprehensivelist 2 6.263398 0 0 4439 +icmanufactur 2 6.263398 0 0 4440 +verilog 2 6.263398 0 0 4441 +judi 2 6.263398 0 0 4442 +andorgan 2 6.263398 0 0 4443 +youdon 2 6.263398 0 0 4444 +inour 2 6.263398 0 0 4445 +ofproject 2 6.263398 0 0 4446 +burn 2 6.263398 0 0 4447 +serverth 2 6.263398 0 0 4448 +vass 2 6.263398 0 0 4449 +informationmeet 2 6.263398 0 0 4450 +cubicl 2 6.263398 0 0 4451 +archivesslid 2 6.263398 0 0 4452 +tutorialth 2 6.263398 0 0 4453 +onmark 2 6.263398 0 0 4454 +jdean 2 6.263398 0 0 4455 +optimizingcompil 2 6.263398 0 0 4456 +cecilproject 2 6.263398 0 0 4457 +lambda 2 6.263398 0 0 4458 +kepart 2 6.263398 0 0 4459 +monash 2 6.263398 0 0 4460 +algorithmscs 2 6.263398 0 0 4461 +seig 2 6.263398 0 0 4462 +somebodi 2 6.263398 0 0 4463 +outer 2 6.263398 0 0 4464 +okai 2 6.263398 0 0 4465 +swap 2 6.263398 0 0 4466 +exception 2 6.263398 0 0 4467 +bake 2 6.263398 0 0 4468 +ideason 2 6.263398 0 0 4469 +accuratelyquot 2 6.263398 0 0 4470 +specmark 2 6.263398 0 0 4471 +atom 2 6.263398 0 0 4472 +multiflow 2 6.263398 0 0 4473 +deros 2 6.263398 0 0 4474 +hine 2 6.263398 0 0 4475 +guru 2 6.263398 0 0 4476 +intelligencefal 2 6.263398 0 0 4477 +andchalleng 2 6.263398 0 0 4478 +intelligentmachin 2 6.263398 0 0 4479 +agentarchitectur 2 6.263398 0 0 4480 +weldweld 2 6.263398 0 0 4481 +friedmanfriedman 2 6.263398 0 0 4482 +kushmericknick 2 6.263398 0 0 4483 +examsgradingresourcesth 2 6.263398 0 0 4484 +pearl 2 6.263398 0 0 4485 +bui 2 6.263398 0 0 4486 +algorithmsa 2 6.263398 0 0 4487 +khoro 2 6.263398 0 0 4488 +cantata 2 6.263398 0 0 4489 +sun 2 6.263398 0 0 4490 +setenv 2 6.263398 0 0 4491 +pmin 2 6.263398 0 0 4492 +includingth 2 6.263398 0 0 4493 +burt 2 6.263398 0 0 4494 +rosenfeld 2 6.263398 0 0 4495 +inon 2 6.263398 0 0 4496 +quadrat 2 6.263398 0 0 4497 +shuichi 2 6.263398 0 0 4498 +unconstrain 2 6.263398 0 0 4499 +kari 2 6.263398 0 0 4500 +regress 2 6.263398 0 0 4501 +calibr 2 6.263398 0 0 4502 +joanna 2 6.263398 0 0 4503 +radios 2 6.263398 0 0 4504 +pde 2 6.263398 0 0 4505 +ward 2 6.263398 0 0 4506 +tessa 2 6.263398 0 0 4507 +learner 2 6.263398 0 0 4508 +uiuc 2 6.263398 0 0 4509 +marla 2 6.263398 0 0 4510 +soap 2 6.263398 0 0 4511 +innew 2 6.263398 0 0 4512 +gershoni 2 6.263398 0 0 4513 +matthai 2 6.263398 0 0 4514 +tabular 2 6.263398 0 0 4515 +wilkerson 2 6.263398 0 0 4516 +dalli 2 6.263398 0 0 4517 +datascalar 2 6.263398 0 0 4518 +spsd 2 6.263398 0 0 4519 +iram 2 6.263398 0 0 4520 +seminarcs 2 6.263398 0 0 4521 +eggersand 2 6.263398 0 0 4522 +francoi 2 6.263398 0 0 4523 +taxat 2 6.263398 0 0 4524 +ernst 2 6.263398 0 0 4525 +secoski 2 6.263398 0 0 4526 +lazi 2 6.263398 0 0 4527 +parson 2 6.263398 0 0 4528 +memorymultiprocessor 2 6.263398 0 0 4529 +gang 2 6.263398 0 0 4530 +inrd 2 6.263398 0 0 4531 +andsequenti 2 6.263398 0 0 4532 +shun 2 6.263398 0 0 4533 +leung 2 6.263398 0 0 4534 +han 2 6.263398 0 0 4535 +agraw 2 6.263398 0 0 4536 +derek 2 6.263398 0 0 4537 +lcpc 2 6.263398 0 0 4538 +kennedi 2 6.263398 0 0 4539 +adv 2 6.263398 0 0 4540 +chien 2 6.263398 0 0 4541 +casual 2 6.263398 0 0 4542 +subscribeto 2 6.263398 0 0 4543 +padua 2 6.263398 0 0 4544 +kale 2 6.263398 0 0 4545 +fritzson 2 6.263398 0 0 4546 +potpourri 2 6.263398 0 0 4547 +wilk 2 6.263398 0 0 4548 +hypervisor 2 6.263398 0 0 4549 +sriram 2 6.263398 0 0 4550 +inner 2 6.263398 0 0 4551 +phoenix 2 6.263398 0 0 4552 +belief 2 6.263398 0 0 4553 +freshmen 2 6.263398 0 0 4554 +semaphor 2 6.263398 0 0 4555 +milleremail 2 6.263398 0 0 4556 +noonor 2 6.263398 0 0 4557 +weyer 2 6.263398 0 0 4558 +notesar 2 6.263398 0 0 4559 +theproblem 2 6.263398 0 0 4560 +andlook 2 6.263398 0 0 4561 +havethre 2 6.263398 0 0 4562 +daysof 2 6.263398 0 0 4563 +eachof 2 6.263398 0 0 4564 +lowest 2 6.263398 0 0 4565 +breakweek 2 6.263398 0 0 4566 +satisfactori 2 6.263398 0 0 4567 +andconfer 2 6.263398 0 0 4568 +willinstead 2 6.263398 0 0 4569 +adiscuss 2 6.263398 0 0 4570 +geta 2 6.263398 0 0 4571 +quietli 2 6.263398 0 0 4572 +assignmenti 2 6.263398 0 0 4573 +programmingsect 2 6.263398 0 0 4574 +disturb 2 6.263398 0 0 4575 +subroutin 2 6.263398 0 0 4576 +regardless 2 6.263398 0 0 4577 +burnett 2 6.263398 0 0 4578 +consultantssyllabuswork 2 6.263398 0 0 4579 +archivepolici 2 6.263398 0 0 4580 +eggleston 2 6.263398 0 0 4581 +ofvari 2 6.263398 0 0 4582 +halloffic 2 6.263398 0 0 4583 +deskfor 2 6.263398 0 0 4584 +performanceof 2 6.263398 0 0 4585 +silva 2 6.263398 0 0 4586 +sidnei 2 6.263398 0 0 4587 +rehnuma 2 6.263398 0 0 4588 +keyinstructorprofessor 2 6.263398 0 0 4589 +desautelsoffic 2 6.263398 0 0 4590 +assistantsfollow 2 6.263398 0 0 4591 +rahman 2 6.263398 0 0 4592 +jaim 2 6.263398 0 0 4593 +jfink 2 6.263398 0 0 4594 +herro 2 6.263398 0 0 4595 +krothap 2 6.263398 0 0 4596 +gradesexplor 2 6.263398 0 0 4597 +spreadsheet 2 6.263398 0 0 4598 +aldu 2 6.263398 0 0 4599 +computersinstructor 2 6.263398 0 0 4600 +petersonoffic 2 6.263398 0 0 4601 +sciencephon 2 6.263398 0 0 4602 +slpeter 2 6.263398 0 0 4603 +appointmentvit 2 6.263398 0 0 4604 +halllectur 2 6.263398 0 0 4605 +laudon 2 6.263398 0 0 4606 +traver 2 6.263398 0 0 4607 +laudonlab 2 6.263398 0 0 4608 +petersoncours 2 6.263398 0 0 4609 +computersto 2 6.263398 0 0 4610 +throughcolleg 2 6.263398 0 0 4611 +arena 2 6.263398 0 0 4612 +csuse 2 6.263398 0 0 4613 +experienceon 2 6.263398 0 0 4614 +eudora 2 6.263398 0 0 4615 +superpaint 2 6.263398 0 0 4616 +filemak 2 6.263398 0 0 4617 +hypercard 2 6.263398 0 0 4618 +pagemak 2 6.263398 0 0 4619 +educationalexperi 2 6.263398 0 0 4620 +namesectiontimedai 2 6.263398 0 0 4621 +mwnick 2 6.263398 0 0 4622 +mwtrshannon 2 6.263398 0 0 4623 +trtrjeff 2 6.263398 0 0 4624 +reminga 2 6.263398 0 0 4625 +mwfmwira 2 6.263398 0 0 4626 +trtrbrian 2 6.263398 0 0 4627 +mwfmwfbrad 2 6.263398 0 0 4628 +mwfmwfjoe 2 6.263398 0 0 4629 +trtrgeoff 2 6.263398 0 0 4630 +mwftrmaria 2 6.263398 0 0 4631 +yuin 2 6.263398 0 0 4632 +mwfmwrecommend 2 6.263398 0 0 4633 +nitti 2 6.263398 0 0 4634 +gritti 2 6.263398 0 0 4635 +superpaintassign 2 6.263398 0 0 4636 +excellast 2 6.263398 0 0 4637 +jonbodn 2 6.263398 0 0 4638 +instructorsw 2 6.263398 0 0 4639 +csinform 2 6.263398 0 0 4640 +subdirectoriesc 2 6.263398 0 0 4641 +environmentfortran 2 6.263398 0 0 4642 +guidebook 2 6.263398 0 0 4643 +ghost 2 6.263398 0 0 4644 +tusch 2 6.263398 0 0 4645 +tutsch 2 6.263398 0 0 4646 +execpc 2 6.263398 0 0 4647 +nolandsect 2 6.263398 0 0 4648 +smoler 2 6.263398 0 0 4649 +sunlung 2 6.263398 0 0 4650 +ssuen 2 6.263398 0 0 4651 +edusridevi 2 6.263398 0 0 4652 +bhamidipati 2 6.263398 0 0 4653 +bsri 2 6.263398 0 0 4654 +edumohammad 2 6.263398 0 0 4655 +programs 2 6.263398 0 0 4656 +examsal 2 6.263398 0 0 4657 +noteskaren 2 6.263398 0 0 4658 +updatedmondai 2 6.263398 0 0 4659 +stale 2 6.263398 0 0 4660 +lec 2 6.263398 0 0 4661 +structureslectur 2 6.263398 0 0 4662 +psychologylectur 2 6.263398 0 0 4663 +psychologycours 2 6.263398 0 0 4664 +baicheng 2 6.263398 0 0 4665 +liao 2 6.263398 0 0 4666 +bail 2 6.263398 0 0 4667 +jiacheng 2 6.263398 0 0 4668 +pmcopyright 2 6.263398 0 0 4669 +behav 2 6.263398 0 0 4670 +amoffic 2 6.263398 0 0 4671 +femal 2 6.263398 0 0 4672 +wic 2 6.263398 0 0 4673 +oneof 2 6.263398 0 0 4674 +tomak 2 6.263398 0 0 4675 +startup 2 6.263398 0 0 4676 +textth 2 6.263398 0 0 4677 +carrano 2 6.263398 0 0 4678 +lecturenot 2 6.263398 0 0 4679 +invalu 2 6.263398 0 0 4680 +nonetheless 2 6.263398 0 0 4681 +thatyou 2 6.263398 0 0 4682 +provis 2 6.263398 0 0 4683 +excus 2 6.263398 0 0 4684 +datastructur 2 6.263398 0 0 4685 +tovisit 2 6.263398 0 0 4686 +facet 2 6.263398 0 0 4687 +unnecessarili 2 6.263398 0 0 4688 +liter 2 6.263398 0 0 4689 +convei 2 6.263398 0 0 4690 +cchin 2 6.263398 0 0 4691 +compuer 2 6.263398 0 0 4692 +weiz 2 6.263398 0 0 4693 +needless 2 6.263398 0 0 4694 +sophomor 2 6.263398 0 0 4695 +databaseof 2 6.263398 0 0 4696 +cole 2 6.263398 0 0 4697 +stukel 2 6.263398 0 0 4698 +bibliograph 2 6.263398 0 0 4699 +compilersspr 2 6.263398 0 0 4700 +kapoor 2 6.263398 0 0 4701 +sethi 2 6.263398 0 0 4702 +avaiabl 2 6.263398 0 0 4703 +thejava 2 6.263398 0 0 4704 +arnold 2 6.263398 0 0 4705 +troffic 2 6.263398 0 0 4706 +mellencamp 2 6.263398 0 0 4707 +mellen 2 6.263398 0 0 4708 +tung 2 6.263398 0 0 4709 +colloquia 2 6.263398 0 0 4710 +sciencesand 2 6.263398 0 0 4711 +designedto 2 6.263398 0 0 4712 +congeni 2 6.263398 0 0 4713 +null 2 6.263398 0 0 4714 +mysteri 2 6.263398 0 0 4715 +char 2 6.263398 0 0 4716 +trendi 2 6.263398 0 0 4717 +coursewil 2 6.263398 0 0 4718 +primer 2 6.263398 0 0 4719 +manualfor 2 6.263398 0 0 4720 +eduthu 2 6.263398 0 0 4721 +atkinson 2 6.263398 0 0 4722 +sybas 2 6.263398 0 0 4723 +behaviour 2 6.263398 0 0 4724 +socket 2 6.263398 0 0 4725 +statphon 2 6.263398 0 0 4726 +ipng 2 6.263398 0 0 4727 +meyer 2 6.263398 0 0 4728 +applicationsfal 2 6.263398 0 0 4729 +bazaraa 2 6.263398 0 0 4730 +sherali 2 6.263398 0 0 4731 +shetti 2 6.263398 0 0 4732 +athena 2 6.263398 0 0 4733 +saddlepoint 2 6.263398 0 0 4734 +dualiti 2 6.263398 0 0 4735 +mimic 2 6.263398 0 0 4736 +cychan 2 6.263398 0 0 4737 +uwisc 2 6.263398 0 0 4738 +belew 2 6.263398 0 0 4739 +diari 2 6.263398 0 0 4740 +residu 2 6.263398 0 0 4741 +kermit 2 6.263398 0 0 4742 +linksyou 2 6.263398 0 0 4743 +deboor 2 6.263398 0 0 4744 +chamberlin 2 6.263398 0 0 4745 +comm 2 6.263398 0 0 4746 +laser 2 6.263398 0 0 4747 +disappear 2 6.263398 0 0 4748 +visionfal 2 6.263398 0 0 4749 +shoulder 2 6.263398 0 0 4750 +altogeth 2 6.263398 0 0 4751 +supplementari 2 6.263398 0 0 4752 +quota 2 6.263398 0 0 4753 +caution 2 6.263398 0 0 4754 +panoram 2 6.263398 0 0 4755 +royal 2 6.263398 0 0 4756 +referenc 2 6.263398 0 0 4757 +gam 2 6.263398 0 0 4758 +leei 2 6.263398 0 0 4759 +equilibria 2 6.263398 0 0 4760 +multicommod 2 6.263398 0 0 4761 +systemsspr 2 6.263398 0 0 4762 +gaussian 2 6.263398 0 0 4763 +csst 2 6.263398 0 0 4764 +krisna 2 6.263398 0 0 4765 +sharpemail 2 6.263398 0 0 4766 +sharpgreg 2 6.263398 0 0 4767 +chad 2 6.263398 0 0 4768 +forgot 2 6.263398 0 0 4769 +weaver 2 6.263398 0 0 4770 +jonb 2 6.263398 0 0 4771 +infoc 2 6.263398 0 0 4772 +mound 2 6.263398 0 0 4773 +loos 2 6.263398 0 0 4774 +buti 2 6.263398 0 0 4775 +ifal 2 6.263398 0 0 4776 +intstack 2 6.263398 0 0 4777 +unlimit 2 6.263398 0 0 4778 +classinfo 2 6.263398 0 0 4779 +melski 2 6.263398 0 0 4780 +milo 2 6.263398 0 0 4781 +viru 2 6.263398 0 0 4782 +ream 2 6.263398 0 0 4783 +mream 2 6.263398 0 0 4784 +nolandinstructor 2 6.263398 0 0 4785 +prock 2 6.263398 0 0 4786 +shuttl 2 6.263398 0 0 4787 +clickabl 2 6.263398 0 0 4788 +herald 2 6.263398 0 0 4789 +biggest 2 6.263398 0 0 4790 +desautel 2 6.263398 0 0 4791 +mitchel 2 6.263398 0 0 4792 +towel 2 6.263398 0 0 4793 +fisher 2 6.263398 0 0 4794 +induc 2 6.263398 0 0 4795 +akcl 2 6.263398 0 0 4796 +quinlan 2 6.263398 0 0 4797 +cogsci 2 6.263398 0 0 4798 +tractabl 2 6.263398 0 0 4799 +usea 2 6.263398 0 0 4800 +thoroughli 2 6.263398 0 0 4801 +salmon 2 6.263398 0 0 4802 +sodani 2 6.263398 0 0 4803 +basnei 2 6.263398 0 0 4804 +biswadeep 2 6.263398 0 0 4805 +taxiao 2 6.263398 0 0 4806 +sridhar 2 6.263398 0 0 4807 +homepagewelcom 2 6.263398 0 0 4808 +tmunson 2 6.263398 0 0 4809 +statisticsoffic 2 6.263398 0 0 4810 +muchinform 2 6.263398 0 0 4811 +kei 2 6.263398 0 0 4812 +princ 2 6.263398 0 0 4813 +watt 2 6.263398 0 0 4814 +scalar 2 6.263398 0 0 4815 +religi 2 6.263398 0 0 4816 +folei 2 6.263398 0 0 4817 +bruceland 2 6.263398 0 0 4818 +illumin 2 6.263398 0 0 4819 +blobbi 2 6.263398 0 0 4820 +homogen 2 6.263398 0 0 4821 +phong 2 6.263398 0 0 4822 +alias 2 6.263398 0 0 4823 +inord 2 6.263398 0 0 4824 +absent 2 6.263398 0 0 4825 +deviat 2 6.263398 0 0 4826 +wale 2 6.263398 0 0 4827 +manchest 2 6.263398 0 0 4828 +todoc 2 6.263398 0 0 4829 +landi 2 6.263398 1 0 4830 +tsai 2 6.263398 1 0 4831 +stochast 2 6.263398 1 0 4832 +linearalgebra 2 6.263398 0 0 4833 +anal 2 6.263398 0 0 4834 +pointmethod 2 6.263398 0 0 4835 +driscol 2 6.263398 0 0 4836 +spectral 2 6.263398 0 0 4837 +interoper 2 6.263398 0 0 4838 +enterpris 2 6.263398 0 0 4839 +informationand 2 6.263398 0 0 4840 +worker 2 6.263398 0 0 4841 +avenuemadison 2 6.263398 0 0 4842 +maze 2 6.263398 0 0 4843 +tether 2 6.263398 0 0 4844 +duffi 2 6.263398 0 0 4845 +lorenz 2 6.263398 0 0 4846 +telerobot 2 6.263398 0 0 4847 +hert 2 6.263398 0 0 4848 +mace 2 6.263398 0 0 4849 +amwork 2 6.263398 0 0 4850 +katherin 2 6.263398 0 0 4851 +dalia 2 6.263398 0 0 4852 +malki 2 6.263398 0 0 4853 +ensembl 2 6.263398 0 0 4854 +oppos 2 6.263398 0 0 4855 +egypt 2 6.263398 0 0 4856 +groupwar 2 6.263398 0 0 4857 +toconstruct 2 6.263398 0 0 4858 +communicationarchitectur 2 6.263398 0 0 4859 +ofreleas 2 6.263398 0 0 4860 +transi 2 6.263398 0 0 4861 +froma 2 6.263398 0 0 4862 +mighti 2 6.263398 0 0 4863 +wing 2 6.263398 0 0 4864 +stir 2 6.263398 0 0 4865 +lament 2 6.263398 0 0 4866 +papersand 2 6.263398 0 0 4867 +silvano 2 6.263398 0 0 4868 +mytholog 2 6.263398 0 0 4869 +court 2 6.263398 0 0 4870 +universitydepart 2 6.263398 0 0 4871 +season 2 6.263398 0 0 4872 +sigir 2 6.263398 0 0 4873 +bucklei 2 6.263398 0 0 4874 +nevada 2 6.263398 0 0 4875 +gerard 2 6.263398 0 0 4876 +decreas 2 6.263398 0 0 4877 +absenc 2 6.263398 0 0 4878 +unrestrict 2 6.263398 0 0 4879 +excerpt 2 6.263398 0 0 4880 +activitiesassoci 2 6.263398 0 0 4881 +systemsprogram 2 6.263398 0 0 4882 +dublin 2 6.263398 0 0 4883 +moscow 2 6.263398 0 0 4884 +publicationsa 2 6.263398 0 0 4885 +dawson 2 6.263398 0 0 4886 +microstorag 2 6.263398 0 0 4887 +activitieseditori 2 6.263398 0 0 4888 +softwareprogram 2 6.263398 0 0 4889 +irreduc 2 6.263398 0 0 4890 +rehovot 2 6.263398 0 0 4891 +albani 2 6.263398 0 0 4892 +benign 2 6.263398 0 0 4893 +activitieschair 2 6.263398 0 0 4894 +isat 2 6.263398 0 0 4895 +law 2 6.263398 0 0 4896 +aaa 2 6.263398 0 0 4897 +banquet 2 6.263398 0 0 4898 +publicationson 2 6.263398 0 0 4899 +johan 2 6.263398 0 0 4900 +commiss 2 6.263398 0 0 4901 +engineeringfellow 2 6.263398 0 0 4902 +sciencesfellow 2 6.263398 0 0 4903 +professorphd 2 6.263398 0 0 4904 +numa 2 6.263398 0 0 4905 +knit 2 6.263398 0 0 4906 +kotlyar 2 6.263398 0 0 4907 +norwai 2 6.263398 0 0 4908 +tacoma 2 6.263398 0 0 4909 +warfar 2 6.263398 0 0 4910 +widespread 2 6.263398 0 0 4911 +annal 2 6.263398 0 0 4912 +hebrew 2 6.263398 0 0 4913 +grante 2 6.263398 0 0 4914 +contractor 2 6.263398 0 0 4915 +mason 2 6.263398 0 0 4916 +airplan 2 6.263398 0 0 4917 +jerusalem 2 6.263398 0 0 4918 +marzullo 2 6.263398 0 0 4919 +household 2 6.263398 0 0 4920 +intuit 2 6.263398 0 0 4921 +nearest 2 6.263398 0 0 4922 +activitiescomput 2 6.263398 0 0 4923 +intereststeachingselect 2 6.263398 0 0 4924 +andprocess 2 6.263398 0 0 4925 +withlarg 2 6.263398 0 0 4926 +needto 2 6.263398 0 0 4927 +thecommun 2 6.263398 0 0 4928 +availableonlin 2 6.263398 0 0 4929 +aredevelop 2 6.263398 0 0 4930 +animplement 2 6.263398 0 0 4931 +insoftwar 2 6.263398 0 0 4932 +asif 2 6.263398 0 0 4933 +ghia 2 6.263398 0 0 4934 +hum 2 6.263398 0 0 4935 +decod 2 6.263398 0 0 4936 +engag 2 6.263398 0 0 4937 +ventur 2 6.263398 0 0 4938 +polya 2 6.263398 0 0 4939 +programmingand 2 6.263398 0 0 4940 +aitken 2 6.263398 0 0 4941 +possibleto 2 6.263398 0 0 4942 +aprogram 2 6.263398 0 0 4943 +thiswil 2 6.263398 0 0 4944 +anniversari 2 6.263398 0 0 4945 +celebr 2 6.263398 0 0 4946 +buffalo 2 6.263398 0 0 4947 +andmathemat 2 6.263398 0 0 4948 +manfr 2 6.263398 0 0 4949 +florenc 2 6.263398 0 0 4950 +worthwhil 2 6.263398 0 0 4951 +hasbrouck 2 6.263398 0 0 4952 +androbbert 2 6.263398 0 0 4953 +partition 2 6.263398 0 0 4954 +lausann 2 6.263398 0 0 4955 +cupertino 2 6.263398 0 0 4956 +pyramania 2 6.263398 0 0 4957 +clara 2 6.263398 0 0 4958 +cave 2 6.263398 0 0 4959 +softwarei 2 6.263398 0 0 4960 +nano 2 6.263398 0 0 4961 +snap 2 6.263398 0 0 4962 +thed 2 6.263398 0 0 4963 +pal 2 6.263398 0 0 4964 +joselui 2 6.263398 0 0 4965 +ankit 2 6.263398 0 0 4966 +endpoint 2 6.263398 0 0 4967 +broker 2 6.263398 0 0 4968 +sciencemast 2 6.263398 0 0 4969 +berg 2 6.263398 0 0 4970 +resumemi 2 6.263398 0 0 4971 +hodja 2 6.263398 0 0 4972 +fledg 2 6.263398 0 0 4973 +diagon 2 6.263398 0 0 4974 +caveat 2 6.263398 0 0 4975 +sugata 2 6.263398 0 0 4976 +dude 2 6.263398 0 0 4977 +felt 2 6.263398 0 0 4978 +fratern 2 6.263398 0 0 4979 +border 2 6.263398 0 0 4980 +mukhopadhyai 2 6.263398 0 0 4981 +surfer 2 6.263398 0 0 4982 +captain 2 6.263398 0 0 4983 +mugshot 2 6.263398 0 0 4984 +goof 2 6.263398 0 0 4985 +projectwith 2 6.263398 0 0 4986 +thegreat 2 6.263398 0 0 4987 +pelham 2 6.263398 0 0 4988 +grenvil 2 6.263398 0 0 4989 +wodehous 2 6.263398 0 0 4990 +metallica 2 6.263398 0 0 4991 +fanci 2 6.263398 0 0 4992 +monti 2 6.263398 0 0 4993 +python 2 6.263398 0 0 4994 +beavi 2 6.263398 0 0 4995 +meiko 2 6.263398 0 0 4996 +untrust 2 6.263398 0 0 4997 +seth 2 6.263398 0 0 4998 +klau 2 6.263398 0 0 4999 +veena 2 6.263398 0 0 5000 +homepagelast 2 6.263398 0 0 5001 +intertext 2 6.263398 0 0 5002 +solar 2 6.263398 0 0 5003 +martial 2 6.263398 0 0 5004 +aastha 2 6.263398 0 0 5005 +macdonald 2 6.263398 0 0 5006 +mem 2 6.263398 0 0 5007 +brigg 2 6.263398 0 0 5008 +ree 2 6.263398 0 0 5009 +nanofabr 2 6.263398 0 0 5010 +ofmobil 2 6.263398 0 0 5011 +internationalworkshop 2 6.263398 0 0 5012 +crystal 2 6.263398 0 0 5013 +electro 2 6.263398 0 0 5014 +reif 2 6.263398 0 0 5015 +furnitur 2 6.263398 0 0 5016 +actuatorarrai 2 6.263398 0 0 5017 +mihailovich 2 6.263398 0 0 5018 +automationnic 2 6.263398 0 0 5019 +andj 2 6.263398 0 0 5020 +latomb 2 6.263398 0 0 5021 +doc 2 6.263398 0 0 5022 +catalogc 2 6.263398 0 0 5023 +apictur 2 6.263398 0 0 5024 +swallow 2 6.263398 0 0 5025 +interestscours 2 6.263398 0 0 5026 +tandem 2 6.263398 0 0 5027 +learningtechniqu 2 6.263398 0 0 5028 +gabriel 2 6.263398 0 0 5029 +jointconfer 2 6.263398 0 0 5030 +eleventh 2 6.263398 0 0 5031 +newark 2 6.263398 0 0 5032 +bias 2 6.263398 0 0 5033 +bloomington 2 6.263398 0 0 5034 +twelfth 2 6.263398 0 0 5035 +voronoi 2 6.263398 0 0 5036 +agenda 2 6.263398 0 0 5037 +scientificsoftwar 2 6.263398 0 0 5038 +acollect 2 6.263398 0 0 5039 +messageslow 2 6.263398 0 0 5040 +coleman 2 6.263398 0 0 5041 +atyp 2 6.263398 0 0 5042 +pagekarl 2 6.263398 0 0 5043 +intract 2 6.263398 0 0 5044 +anapproxim 2 6.263398 0 0 5045 +unavail 2 6.263398 0 0 5046 +thenuprl 2 6.263398 0 0 5047 +hereat 2 6.263398 0 0 5048 +papersoth 2 6.263398 0 0 5049 +lurker 2 6.263398 0 0 5050 +andwith 2 6.263398 0 0 5051 +pagedepart 2 6.263398 0 0 5052 +professorthoma 2 6.263398 0 0 5053 +defici 2 6.263398 0 0 5054 +idaho 2 6.263398 0 0 5055 +solutionof 2 6.263398 0 0 5056 +key 2 6.263398 0 0 5057 +dongarra 2 6.263398 0 0 5058 +postdoctor 2 6.263398 0 0 5059 +honest 2 6.263398 0 0 5060 +ofvirtu 2 6.263398 0 0 5061 +communicatewith 2 6.263398 0 0 5062 +unlik 2 6.263398 0 0 5063 +sender 2 6.263398 0 0 5064 +adversari 2 6.263398 0 0 5065 +securityand 2 6.263398 0 0 5066 +blink 2 6.263398 0 0 5067 +ey 2 6.263398 0 0 5068 +mice 2 6.263398 0 0 5069 +autobiographi 2 6.263398 0 0 5070 +ear 2 6.263398 0 0 5071 +soft 2 6.263398 0 0 5072 +belov 2 6.263398 0 0 5073 +broken 2 6.263398 0 0 5074 +horror 2 6.263398 0 0 5075 +tear 2 6.263398 0 0 5076 +deed 2 6.263398 0 0 5077 +frozen 2 6.263398 0 0 5078 +cern 2 6.263398 0 0 5079 +dissemin 2 6.263398 0 0 5080 +lagoz 2 6.263398 0 0 5081 +gorgeou 2 6.263398 0 0 5082 +geek 2 6.263398 0 0 5083 +snowboard 2 6.263398 0 0 5084 +lnc 2 6.263398 0 0 5085 +inher 2 6.263398 0 0 5086 +ergun 2 6.263398 0 0 5087 +angri 2 6.263398 0 0 5088 +dog 2 6.263398 0 0 5089 +tardo 2 6.263398 0 0 5090 +lovasz 2 6.263398 0 0 5091 +hopp 2 6.263398 0 0 5092 +kleinberg 2 6.263398 0 0 5093 +julia 2 6.263398 0 0 5094 +broadli 2 6.263398 0 0 5095 +appearedin 2 6.263398 0 0 5096 +leighton 2 6.263398 0 0 5097 +inmathemat 2 6.263398 0 0 5098 +hasappear 2 6.263398 0 0 5099 +goeman 2 6.263398 0 0 5100 +williamson 2 6.263398 0 0 5101 +diamet 2 6.263398 0 0 5102 +felix 2 6.263398 0 0 5103 +erni 2 6.263398 0 0 5104 +epicuri 2 6.263398 0 0 5105 +karnataka 2 6.263398 0 0 5106 +bharat 2 6.263398 0 0 5107 +cute 2 6.263398 0 0 5108 +incident 2 6.263398 0 0 5109 +bangalor 2 6.263398 0 0 5110 +that 2 6.263398 0 0 5111 +conquer 2 6.263398 0 0 5112 +mywww 2 6.263398 0 0 5113 +pagedavid 2 6.263398 0 0 5114 +weapon 2 6.263398 0 0 5115 +degreein 2 6.263398 0 0 5116 +bauer 2 6.263398 0 0 5117 +cake 2 6.263398 0 0 5118 +booth 2 6.263398 0 0 5119 +theamerican 2 6.263398 0 0 5120 +andt 2 6.263398 0 0 5121 +spoken 2 6.263398 0 0 5122 +researchinterest 2 6.263398 0 0 5123 +acta 2 6.263398 0 0 5124 +informatica 2 6.263398 0 0 5125 +andtool 2 6.263398 0 0 5126 +pond 2 6.263398 0 0 5127 +bachelorand 2 6.263398 0 0 5128 +chinami 2 6.263398 0 0 5129 +coimbator 2 6.263398 0 0 5130 +cornelluniversityfal 2 6.263398 0 0 5131 +cspracticum 2 6.263398 0 0 5132 +carpet 2 6.263398 0 0 5133 +colloqium 2 6.263398 0 0 5134 +manageri 2 6.263398 0 0 5135 +knowledgebas 2 6.263398 0 0 5136 +associatecornel 2 6.263398 0 0 5137 +eduwww 2 6.263398 0 0 5138 +linkag 2 6.263398 0 0 5139 +hani 2 6.263398 0 0 5140 +harper 2 6.263398 0 0 5141 +multiprocess 2 6.263398 0 0 5142 +linksperson 2 6.263398 0 0 5143 +herlihi 2 6.263398 0 0 5144 +copenhagen 2 6.263398 0 0 5145 +shing 2 6.263398 0 0 5146 +nankai 2 6.263398 0 0 5147 +tianjin 2 6.263398 0 0 5148 +barri 2 6.263398 0 0 5149 +sciencefound 2 6.263398 0 0 5150 +chinaand 2 6.263398 0 0 5151 +sceneri 2 6.263398 0 0 5152 +sheng 2 6.263398 0 0 5153 +liber 2 6.263398 0 0 5154 +navi 2 6.263398 0 0 5155 +com 2 6.263398 0 0 5156 +cornellopoli 2 6.263398 0 0 5157 +techniquec 2 6.263398 0 0 5158 +methodsc 2 6.263398 0 0 5159 +colloquiumc 2 6.263398 0 0 5160 +magazinepc 2 6.263398 0 0 5161 +morn 2 6.263398 0 0 5162 +orang 2 6.263398 0 0 5163 +lui 2 6.263398 0 0 5164 +bought 2 6.263398 0 0 5165 +accel 2 6.263398 0 0 5166 +plug 2 6.263398 0 0 5167 +adress 2 6.263398 0 0 5168 +occup 2 6.263398 0 0 5169 +fulltim 2 6.263398 0 0 5170 +ethernet 2 6.263398 0 0 5171 +blast 2 6.263398 0 0 5172 +theatr 2 6.263398 0 0 5173 +bellcor 2 6.263398 0 0 5174 +friedrich 2 6.263398 0 0 5175 +sculptur 2 6.263398 0 0 5176 +wright 2 6.263398 0 0 5177 +prone 2 6.263398 0 0 5178 +thehoru 2 6.263398 0 0 5179 +withprofessor 2 6.263398 0 0 5180 +ofhoru 2 6.263398 0 0 5181 +haswork 2 6.263398 0 0 5182 +distributedenviron 2 6.263398 0 0 5183 +toi 2 6.263398 0 0 5184 +linksfor 2 6.263398 0 0 5185 +resumesom 2 6.263398 0 0 5186 +suspect 2 6.263398 0 0 5187 +anyhow 2 6.263398 0 0 5188 +mathematician 2 6.263398 0 0 5189 +terrorist 2 6.263398 0 0 5190 +icdc 2 6.263398 0 0 5191 +ucsd 2 6.263398 0 0 5192 +amazon 2 6.263398 0 0 5193 +dessert 2 6.263398 0 0 5194 +infoth 2 6.263398 0 0 5195 +tetra 2 6.263398 0 0 5196 +attiya 2 6.263398 0 0 5197 +euclidean 2 6.263398 0 0 5198 +sdsc 2 6.263398 0 0 5199 +melco 2 6.263398 0 0 5200 +advert 2 6.263398 0 0 5201 +heng 2 6.263398 0 0 5202 +quiet 2 6.263398 0 0 5203 +protocolsfor 2 6.263398 0 0 5204 +developeda 2 6.263398 0 0 5205 +interfacesand 2 6.263398 0 0 5206 +cano 2 6.263398 0 0 5207 +joi 2 6.263398 0 0 5208 +fight 2 6.263398 0 0 5209 +buyer 2 6.263398 0 0 5210 +resours 2 6.263398 0 0 5211 +sunris 2 6.263398 0 0 5212 +edmund 2 6.263398 0 0 5213 +succe 2 6.263398 0 0 5214 +villag 2 6.263398 0 0 5215 +computingc 2 6.263398 0 0 5216 +castl 2 6.263398 0 0 5217 +nausicaa 2 6.263398 0 0 5218 +galact 2 6.263398 0 0 5219 +loui 2 6.263398 0 0 5220 +badminton 2 6.263398 0 0 5221 +sunlab 2 6.263398 0 0 5222 +caltech 2 6.263398 0 0 5223 +lowel 2 6.263398 0 0 5224 +coursesfal 2 6.263398 0 0 5225 +sheldon 2 6.263398 0 0 5226 +aclu 2 6.263398 0 0 5227 +reno 2 6.263398 0 0 5228 +coolest 2 6.263398 0 0 5229 +pagemi 2 6.263398 0 0 5230 +nerd 2 6.263398 0 0 5231 +suck 2 6.263398 0 0 5232 +donnel 2 6.263398 0 0 5233 +spirit 2 6.263398 0 0 5234 +harmoni 2 6.263398 0 0 5235 +reset 2 6.263398 0 0 5236 +chrisochoid 2 6.263398 0 0 5237 +prema 2 6.263398 0 0 5238 +aiaa 2 6.263398 0 0 5239 +programmingenviron 2 6.263398 0 0 5240 +and 2 6.263398 0 0 5241 +nikosc 2 6.263398 0 0 5242 +suppot 2 6.263398 0 0 5243 +cinema 2 6.263398 0 0 5244 +pearson 2 6.263398 0 0 5245 +molecul 2 6.263398 0 0 5246 +ticker 2 6.263398 0 0 5247 +outlet 2 6.263398 0 0 5248 +menon 2 6.263398 0 0 5249 +sequin 2 6.263398 0 0 5250 +earthquak 2 6.263398 0 0 5251 +volcano 2 6.263398 0 0 5252 +meteorolog 2 6.263398 0 0 5253 +aredescrib 2 6.263398 0 0 5254 +objectivescurr 2 6.263398 0 0 5255 +statusmotiv 2 6.263398 0 0 5256 +exampleseq 2 6.263398 0 0 5257 +languageoptim 2 6.263398 0 0 5258 +techniquesseq 2 6.263398 0 0 5259 +developmentpublicationsrel 2 6.263398 0 0 5260 +workcontact 2 6.263398 0 0 5261 +informationproject 2 6.263398 0 0 5262 +processingof 2 6.263398 0 0 5263 +theseappl 2 6.263398 0 0 5264 +metereolog 2 6.263398 0 0 5265 +andbiolog 2 6.263398 0 0 5266 +semanticstak 2 6.263398 0 0 5267 +evaluationintegr 2 6.263398 0 0 5268 +canstor 2 6.263398 0 0 5269 +sequencesthes 2 6.263398 0 0 5270 +themost 2 6.263398 0 0 5271 +statusth 2 6.263398 0 0 5272 +algebraicqueri 2 6.263398 0 0 5273 +analogousto 2 6.263398 0 0 5274 +candeclar 2 6.263398 0 0 5275 +likesql 2 6.263398 0 0 5276 +versa 2 6.263398 0 0 5277 +querya 2 6.263398 0 0 5278 +occurr 2 6.263398 0 0 5279 +erupt 2 6.263398 0 0 5280 +didth 2 6.263398 0 0 5281 +groupbi 2 6.263398 0 0 5282 +subqueri 2 6.263398 0 0 5283 +aggregatefunct 2 6.263398 0 0 5284 +sequencesord 2 6.263398 0 0 5285 +modelth 2 6.263398 0 0 5286 +gist 2 6.263398 0 0 5287 +ordereddomain 2 6.263398 0 0 5288 +andposit 2 6.263398 0 0 5289 +recordsmap 2 6.263398 0 0 5290 +rise 2 6.263398 0 0 5291 +relationaloper 2 6.263398 0 0 5292 +andaggreg 2 6.263398 0 0 5293 +researchersin 2 6.263398 0 0 5294 +movingaggreg 2 6.263398 0 0 5295 +worldsitu 2 6.263398 0 0 5296 +extensionof 2 6.263398 0 0 5297 +ofseq 2 6.263398 0 0 5298 +languagew 2 6.263398 0 0 5299 +usingwhich 2 6.263398 0 0 5300 +languagei 2 6.263398 0 0 5301 +queriesa 2 6.263398 0 0 5302 +techniquesw 2 6.263398 0 0 5303 +developmentth 2 6.263398 0 0 5304 +viaa 2 6.263398 0 0 5305 +ontop 2 6.263398 0 0 5306 +languageswhich 2 6.263398 0 0 5307 +arbitrarylevel 2 6.263398 0 0 5308 +viceversa 2 6.263398 0 0 5309 +detailson 2 6.263398 0 0 5310 +publicationssequ 2 6.263398 0 0 5311 +datapraveen 2 6.263398 0 0 5312 +systempraveen 2 6.263398 0 0 5313 +queriesraghu 2 6.263398 0 0 5314 +workthedevis 2 6.263398 0 0 5315 +visualizationenviron 2 6.263398 0 0 5316 +servercontact 2 6.263398 0 0 5317 +eduraghu 2 6.263398 0 0 5318 +edumiron 2 6.263398 0 0 5319 +seshadripraveen 2 6.263398 0 0 5320 +monoton 2 6.263398 0 0 5321 +logarithm 2 6.263398 0 0 5322 +expon 2 6.263398 0 0 5323 +ofintegr 2 6.263398 0 0 5324 +summat 2 6.263398 0 0 5325 +justa 2 6.263398 0 0 5326 +glori 2 6.263398 0 0 5327 +width 2 6.263398 0 0 5328 +alexand 2 6.263398 0 0 5329 +cytacki 2 6.263398 0 0 5330 +wasserman 2 6.263398 0 0 5331 +nephew 2 6.263398 0 0 5332 +scienceatcornel 2 6.263398 0 0 5333 +withken 2 6.263398 0 0 5334 +formor 2 6.263398 0 0 5335 +distributedcomput 2 6.263398 0 0 5336 +bakker 2 6.263398 0 0 5337 +hadzilaco 2 6.263398 0 0 5338 +toueg 2 6.263398 0 0 5339 +mere 2 6.263398 0 0 5340 +slow 2 6.263398 0 0 5341 +systemswith 2 6.263398 0 0 5342 +amajor 2 6.263398 0 0 5343 +clickherefor 2 6.263398 0 0 5344 +stratu 2 6.263398 0 0 5345 +artifact 2 6.263398 0 0 5346 +widget 2 6.263398 0 0 5347 +hors 2 6.263398 0 0 5348 +stumbl 2 6.263398 0 0 5349 +leadto 2 6.263398 0 0 5350 +navin 2 6.263398 0 0 5351 +agarw 2 6.263398 0 0 5352 +fool 2 6.263398 0 0 5353 +korean 2 6.263398 0 0 5354 +acoust 2 6.263398 0 0 5355 +sang 2 6.263398 0 0 5356 +onthi 2 6.263398 0 0 5357 +chopin 2 6.263398 0 0 5358 +miser 2 6.263398 0 0 5359 +kang 2 6.263398 0 0 5360 +foremost 2 6.263398 0 0 5361 +degrad 2 6.263398 0 0 5362 +slowli 2 6.263398 0 0 5363 +snoop 2 6.263398 0 0 5364 +ought 2 6.263398 0 0 5365 +goeth 2 6.263398 0 0 5366 +obviou 2 6.263398 0 0 5367 +forest 2 6.263398 0 0 5368 +shack 2 6.263398 0 0 5369 +withno 2 6.263398 0 0 5370 +pile 2 6.263398 0 0 5371 +purpl 2 6.263398 0 0 5372 +silk 2 6.263398 0 0 5373 +silver 2 6.263398 0 0 5374 +ocean 2 6.263398 0 0 5375 +nowadai 2 6.263398 0 0 5376 +pointcast 2 6.263398 0 0 5377 +haveth 2 6.263398 0 0 5378 +neededto 2 6.263398 0 0 5379 +agood 2 6.263398 0 0 5380 +carleton 2 6.263398 0 0 5381 +eduthi 2 6.263398 0 0 5382 +alia 2 6.263398 0 0 5383 +persuad 2 6.263398 0 0 5384 +declin 2 6.263398 0 0 5385 +portrai 2 6.263398 0 0 5386 +bitter 2 6.263398 0 0 5387 +rebel 2 6.263398 0 0 5388 +imperi 2 6.263398 0 0 5389 +andclassif 2 6.263398 0 0 5390 +cue 2 6.263398 0 0 5391 +forthcom 2 6.263398 0 0 5392 +dartmouth 2 6.263398 0 0 5393 +montral 2 6.263398 0 0 5394 +srivastava 2 6.263398 0 0 5395 +dick 2 6.263398 0 0 5396 +tender 2 6.263398 0 0 5397 +tropic 2 6.263398 0 0 5398 +fifteen 2 6.263398 0 0 5399 +marvel 2 6.263398 0 0 5400 +defeat 2 6.263398 0 0 5401 +reward 2 6.263398 0 0 5402 +andwork 2 6.263398 0 0 5403 +relai 2 6.263398 0 0 5404 +spectrum 2 6.263398 0 0 5405 +bibtex 2 6.263398 0 0 5406 +firewal 2 6.263398 0 0 5407 +krishnamurthi 2 6.263398 0 0 5408 +lumetta 2 6.263398 0 0 5409 +orlando 2 6.263398 0 0 5410 +acceler 2 6.263398 0 0 5411 +polyhedr 2 6.263398 0 0 5412 +andautomat 2 6.263398 0 0 5413 +vinc 2 6.263398 0 0 5414 +functionof 2 6.263398 0 0 5415 +todayth 2 6.263398 0 0 5416 +bowl 2 6.263398 0 0 5417 +reject 2 6.263398 0 0 5418 +cachet 2 6.263398 0 0 5419 +anni 2 6.263398 0 0 5420 +auxiliari 2 6.263398 0 0 5421 +yanhong 2 6.263398 0 0 5422 +fuzzi 2 6.263398 0 0 5423 +eduhttp 2 6.263398 0 0 5424 +infom 2 6.263398 0 0 5425 +veggi 2 6.263398 0 0 5426 +lisa 2 6.263398 0 0 5427 +theidea 2 6.263398 0 0 5428 +admit 2 6.263398 0 0 5429 +thegam 2 6.263398 0 0 5430 +differencebetween 2 6.263398 0 0 5431 +pleaseclick 2 6.263398 0 0 5432 +focuseson 2 6.263398 0 0 5433 +thenetwork 2 6.263398 0 0 5434 +microsecond 2 6.263398 0 0 5435 +tominim 2 6.263398 0 0 5436 +mainstream 2 6.263398 0 0 5437 +contactthorsten 2 6.263398 0 0 5438 +laboratorywelcom 2 6.263398 0 0 5439 +windowsnt 2 6.263398 0 0 5440 +homolog 2 6.263398 0 0 5441 +hing 2 6.263398 0 0 5442 +retain 2 6.263398 0 0 5443 +isdescrib 2 6.263398 0 0 5444 +waysthat 2 6.263398 0 0 5445 +informationag 2 6.263398 0 0 5446 +hyperlink 2 6.263398 0 0 5447 +todevelop 2 6.263398 0 0 5448 +communicationprimit 2 6.263398 0 0 5449 +thorstenvon 2 6.263398 0 0 5450 +expend 2 6.263398 0 0 5451 +levelprogram 2 6.263398 0 0 5452 +palmer 2 6.263398 0 0 5453 +properli 2 6.263398 0 0 5454 +multiprocessorsa 2 6.263398 0 0 5455 +knowwhat 2 6.263398 0 0 5456 +junki 2 6.263398 0 0 5457 +nando 2 6.263398 0 0 5458 +woodi 2 6.263398 0 0 5459 +simmon 2 6.263398 0 0 5460 +ajita 2 6.263398 0 0 5461 +papersmi 2 6.263398 0 0 5462 +woodwork 2 6.263398 0 0 5463 +spurt 2 6.263398 0 0 5464 +plenti 2 6.263398 0 0 5465 +patienc 2 6.263398 0 0 5466 +bayardo 2 6.263398 0 0 5467 +roberto 2 6.263398 0 0 5468 +overviewof 2 6.263398 0 0 5469 +toss 2 6.263398 0 0 5470 +bogu 2 6.263398 0 0 5471 +imagin 2 6.263398 0 0 5472 +combat 2 6.263398 0 0 5473 +nobodi 2 6.263398 0 0 5474 +voltag 2 6.263398 0 0 5475 +invalid 2 6.263398 0 0 5476 +informationthi 2 6.263398 0 0 5477 +empti 2 6.263398 0 0 5478 +edufax 2 6.263398 0 0 5479 +knowna 2 6.263398 0 0 5480 +webth 2 6.263398 0 0 5481 +projectmi 2 6.263398 0 0 5482 +andsom 2 6.263398 0 0 5483 +thereof 2 6.263398 0 0 5484 +steal 2 6.263398 0 0 5485 +amor 2 6.263398 0 0 5486 +rudi 2 6.263398 0 0 5487 +verg 2 6.263398 0 0 5488 +texan 2 6.263398 0 0 5489 +hendrix 2 6.263398 0 0 5490 +anabstract 2 6.263398 0 0 5491 +researchth 2 6.263398 0 0 5492 +sigda 2 6.263398 0 0 5493 +disinform 2 6.263398 0 0 5494 +carruth 2 6.263398 0 0 5495 +mydissert 2 6.263398 0 0 5496 +fiance 2 6.263398 0 0 5497 +deji 2 6.263398 0 0 5498 +chenabout 2 6.263398 0 0 5499 +bullet 2 6.263398 0 0 5500 +mstk 2 6.263398 0 0 5501 +northwestern 2 6.263398 0 0 5502 +weird 2 6.263398 0 0 5503 +nifti 2 6.263398 0 0 5504 +numb 2 6.263398 0 0 5505 +pope 2 6.263398 0 0 5506 +instituteof 2 6.263398 0 0 5507 +pinbal 2 6.263398 0 0 5508 +inthi 2 6.263398 0 0 5509 +ckpoon 2 6.263398 0 0 5510 +hungri 2 6.263398 0 0 5511 +byprof 2 6.263398 0 0 5512 +descriptionof 2 6.263398 0 0 5513 +followingtechniqu 2 6.263398 0 0 5514 +emeri 2 6.263398 0 0 5515 +wilder 2 6.263398 0 0 5516 +symmetri 2 6.263398 0 0 5517 +newest 2 6.263398 0 0 5518 +reproduc 2 6.263398 0 0 5519 +publicationscod 2 6.263398 0 0 5520 +grabber 2 6.263398 0 0 5521 +spain 2 6.263398 0 0 5522 +stringent 2 6.263398 0 0 5523 +scenario 2 6.263398 0 0 5524 +tsou 2 6.263398 0 0 5525 +clement 2 6.263398 0 0 5526 +austini 2 6.263398 0 0 5527 +deep 2 6.263398 0 0 5528 +exhibit 2 6.263398 0 0 5529 +cthomp 2 6.263398 0 0 5530 +informationassist 2 6.263398 0 0 5531 +teachingfal 2 6.263398 0 0 5532 +garg 2 6.263398 0 0 5533 +dane 2 6.263398 0 0 5534 +probe 2 6.263398 0 0 5535 +dian 2 6.263398 0 0 5536 +patra 2 6.263398 0 0 5537 +reasearch 2 6.263398 0 0 5538 +federalinstitut 2 6.263398 0 0 5539 +parameter 2 6.263398 0 0 5540 +encapsul 2 6.263398 0 0 5541 +decent 2 6.263398 0 0 5542 +leeuwen 2 6.263398 0 0 5543 +groupi 2 6.263398 0 0 5544 +linksth 2 6.263398 0 0 5545 +emma 2 6.263398 0 0 5546 +zhongshan 2 6.263398 0 0 5547 +mini 2 6.263398 0 0 5548 +mehi 2 6.263398 0 0 5549 +andinform 2 6.263398 0 0 5550 +dust 2 6.263398 0 0 5551 +syntact 2 6.263398 0 0 5552 +peak 2 6.263398 0 0 5553 +estlin 2 6.263398 0 0 5554 +tara 2 6.263398 0 0 5555 +researchinvolv 2 6.263398 0 0 5556 +acquir 2 6.263398 0 0 5557 +amparticularli 2 6.263398 0 0 5558 +tulan 2 6.263398 0 0 5559 +polic 2 6.263398 0 0 5560 +crawl 2 6.263398 0 0 5561 +trammel 2 6.263398 0 0 5562 +collegem 2 6.263398 0 0 5563 +useless 2 6.263398 0 0 5564 +odd 2 6.263398 0 0 5565 +oak 2 6.263398 0 0 5566 +democrat 2 6.263398 0 0 5567 +plakal 2 6.263398 0 0 5568 +perfectli 2 6.263398 0 0 5569 +hyderabad 2 6.263398 0 0 5570 +andhra 2 6.263398 0 0 5571 +pradesh 2 6.263398 0 0 5572 +osmania 2 6.263398 0 0 5573 +nebraska 2 6.263398 0 0 5574 +lincoln 2 6.263398 0 0 5575 +addict 2 6.263398 0 0 5576 +neeraj 2 6.263398 0 0 5577 +shailesh 2 6.263398 0 0 5578 +vipin 2 6.263398 0 0 5579 +fornetwork 2 6.263398 0 0 5580 +ordistribut 2 6.263398 0 0 5581 +redistribut 2 6.263398 0 0 5582 +hail 2 6.263398 0 0 5583 +addresspictur 2 6.263398 0 0 5584 +ceremoni 2 6.263398 0 0 5585 +jiangsu 2 6.263398 0 0 5586 +pagealan 2 6.263398 0 0 5587 +researchgroup 2 6.263398 0 0 5588 +wuhan 2 6.263398 0 0 5589 +intramur 2 6.263398 0 0 5590 +kansa 2 6.263398 0 0 5591 +marathon 2 6.263398 0 0 5592 +vietnames 2 6.263398 0 0 5593 +researchwith 2 6.263398 0 0 5594 +rosett 2 6.263398 0 0 5595 +interestscommonsens 2 6.263398 0 0 5596 +actionlog 2 6.263398 0 0 5597 +reasoningmi 2 6.263398 0 0 5598 +yanbin 2 6.263398 0 0 5599 +reciev 2 6.263398 0 0 5600 +holli 2 6.263398 0 0 5601 +dejanew 2 6.263398 0 0 5602 +pagejohn 2 6.263398 0 0 5603 +universityph 2 6.263398 0 0 5604 +lissom 2 6.263398 0 0 5605 +testabl 2 6.263398 0 0 5606 +hypothes 2 6.263398 0 0 5607 +nearli 2 6.263398 0 0 5608 +sirosh 2 6.263398 0 0 5609 +novelschines 2 6.263398 0 0 5610 +registrar 2 6.263398 0 0 5611 +gradaut 2 6.263398 0 0 5612 +studiesut 2 6.263398 0 0 5613 +magzin 2 6.263398 0 0 5614 +technicalreport 2 6.263398 0 0 5615 +visitorsinc 2 6.263398 0 0 5616 +kincaid 2 6.263398 0 0 5617 +subprogram 2 6.263398 0 0 5618 +itpack 2 6.263398 0 0 5619 +rassia 2 6.263398 0 0 5620 +swarthmor 2 6.263398 0 0 5621 +thequalit 2 6.263398 0 0 5622 +backbon 2 6.263398 0 0 5623 +professordepart 2 6.263398 0 0 5624 +biograph 2 6.263398 0 0 5625 +disc 2 6.263398 0 0 5626 +aboutthi 2 6.263398 0 0 5627 +addr 2 6.263398 0 0 5628 +performanceanalysi 2 6.263398 0 0 5629 +universit 2 6.263398 0 0 5630 +bologna 2 6.263398 0 0 5631 +honour 2 6.263398 0 0 5632 +madhukar 2 6.263398 0 0 5633 +espnet 2 6.263398 0 0 5634 +mallori 2 6.263398 0 0 5635 +forev 2 6.263398 0 0 5636 +isvia 2 6.263398 0 0 5637 +johnston 2 6.263398 0 0 5638 +somerset 2 6.263398 0 0 5639 +isalso 2 6.263398 0 0 5640 +mayberri 2 6.263398 0 0 5641 +downtown 2 6.263398 0 0 5642 +norm 2 6.263398 0 0 5643 +austinresearchmi 2 6.263398 0 0 5644 +mecaliff 2 6.263398 0 0 5645 +bareiss 2 6.263398 0 0 5646 +murrai 2 6.263398 0 0 5647 +rickel 2 6.263398 0 0 5648 +forconstruct 2 6.263398 0 0 5649 +arealso 2 6.263398 0 0 5650 +lexicon 2 6.263398 0 0 5651 +brant 2 6.263398 0 0 5652 +aroundth 2 6.263398 0 0 5653 +leap 2 6.263398 0 0 5654 +venu 2 6.263398 0 0 5655 +satisfact 2 6.263398 0 0 5656 +obermey 2 6.263398 0 0 5657 +vaidyaraman 2 6.263398 0 0 5658 +warshaw 2 6.263398 0 0 5659 +powerlist 2 6.263398 0 0 5660 +chandi 2 6.263398 0 0 5661 +seuss 2 6.263398 0 0 5662 +abduct 2 6.263398 0 0 5663 +califf 2 6.263398 0 0 5664 +dirk 2 6.263398 0 0 5665 +subramanian 2 6.263398 0 0 5666 +georgetown 2 6.263398 0 0 5667 +drake 2 6.263398 0 0 5668 +icml 2 6.263398 0 0 5669 +prodigi 2 6.263398 0 0 5670 +champaign 2 6.263398 0 0 5671 +highschool 2 6.263398 0 0 5672 +geneticalgorithm 2 6.263398 0 0 5673 +retir 2 6.263398 0 0 5674 +singh 2 6.263398 0 0 5675 +clearer 2 6.263398 0 0 5676 +groupat 2 6.263398 0 0 5677 +bobbi 2 6.263398 0 0 5678 +marti 2 6.263398 0 0 5679 +rupert 2 6.263398 0 0 5680 +andcognit 2 6.263398 0 0 5681 +panic 2 6.263398 0 0 5682 +collector 2 6.263398 0 0 5683 +sheetal 2 6.263398 0 0 5684 +kakkad 2 6.263398 0 0 5685 +macro 2 6.263398 0 0 5686 +damag 2 6.263398 0 0 5687 +programmingsystem 2 6.263398 0 0 5688 +export 2 6.263398 0 0 5689 +everywher 2 6.263398 0 0 5690 +curli 2 6.263398 0 0 5691 +quantiz 2 6.263398 0 0 5692 +lemk 2 6.263398 0 0 5693 +thephys 2 6.263398 0 0 5694 +blah 2 6.263398 0 0 5695 +andpostscript 2 6.263398 0 0 5696 +warrant 2 6.263398 0 0 5697 +fourier 2 6.263398 0 0 5698 +offspr 2 6.263398 0 0 5699 +basedprogram 2 6.263398 0 0 5700 +junk 2 6.263398 0 0 5701 +helsinki 2 6.263398 0 0 5702 +rong 2 6.263398 0 0 5703 +rajaraman 2 6.263398 0 0 5704 +rraj 2 6.263398 0 0 5705 +rajmohan 2 6.263398 0 0 5706 +mypubl 2 6.263398 0 0 5707 +linkscontact 2 6.263398 0 0 5708 +zhai 2 6.263398 0 0 5709 +miracl 2 6.263398 0 0 5710 +holi 2 6.263398 0 0 5711 +stimul 2 6.263398 0 0 5712 +truck 2 6.263398 0 0 5713 +wash 2 6.263398 0 0 5714 +geijn 2 6.263398 0 0 5715 +appliedmathemat 2 6.263398 0 0 5716 +interestnumer 2 6.263398 0 0 5717 +musician 2 6.263398 0 0 5718 +hamilton 2 6.263398 0 0 5719 +nate 2 6.263398 0 0 5720 +fring 2 6.263398 0 0 5721 +unpredict 2 6.263398 0 0 5722 +grab 2 6.263398 0 0 5723 +bookshelf 2 6.263398 0 0 5724 +danger 2 6.263398 0 0 5725 +strictli 2 6.263398 0 0 5726 +choiwelcom 2 6.263398 0 0 5727 +nanj 2 6.263398 0 0 5728 +zodiac 2 6.263398 0 0 5729 +twang 2 6.263398 0 0 5730 +eagl 2 6.263398 0 0 5731 +publicationss 2 6.263398 0 0 5732 +thirteenth 2 6.263398 0 0 5733 +modelingof 2 6.263398 0 0 5734 +andsurfac 2 6.263398 0 0 5735 +publicationsr 2 6.263398 0 0 5736 +king 2 6.263398 0 0 5737 +guard 2 6.263398 0 0 5738 +stewart 2 6.263398 0 0 5739 +edsger 2 6.263398 0 0 5740 +honorari 2 6.263398 0 0 5741 +sciencesmemb 2 6.263398 0 0 5742 +interestcomput 2 6.263398 0 0 5743 +southwestern 2 6.263398 0 0 5744 +beinginvestig 2 6.263398 0 0 5745 +parallelsystem 2 6.263398 0 0 5746 +areasinclud 2 6.263398 0 0 5747 +astronaut 2 6.263398 0 0 5748 +ozden 2 6.263398 0 0 5749 +eighth 2 6.263398 0 0 5750 +andnetwork 2 6.263398 0 0 5751 +fordigit 2 6.263398 0 0 5752 +baruah 2 6.263398 0 0 5753 +sinha 2 6.263398 0 0 5754 +andarchitectur 2 6.263398 0 0 5755 +russia 2 6.263398 0 0 5756 +convoc 2 6.263398 0 0 5757 +nomin 2 6.263398 0 0 5758 +burton 2 6.263398 0 0 5759 +neutral 2 6.263398 0 0 5760 +howto 2 6.263398 0 0 5761 +mackai 2 6.263398 0 0 5762 +grinnel 2 6.263398 0 0 5763 +edua 2 6.263398 0 0 5764 +novelti 2 6.263398 0 0 5765 +christ 2 6.263398 0 0 5766 +arthur 2 6.263398 0 0 5767 +intent 2 6.263398 0 0 5768 +hei 2 6.263398 0 0 5769 +forgiv 2 6.263398 0 0 5770 +andto 2 6.263398 0 0 5771 +wedo 2 6.263398 0 0 5772 +sick 2 6.263398 0 0 5773 +hesit 2 6.263398 0 0 5774 +worm 2 6.263398 0 0 5775 +eduperson 2 6.263398 0 0 5776 +medit 2 6.263398 0 0 5777 +settl 2 6.263398 0 0 5778 +methodsand 2 6.263398 0 0 5779 +kaleidoscop 2 6.263398 0 0 5780 +oversea 2 6.263398 0 0 5781 +bourassa 2 6.263398 0 0 5782 +virgil 2 6.263398 0 0 5783 +melani 2 6.263398 0 0 5784 +comet 2 6.263398 0 0 5785 +mona 2 6.263398 0 0 5786 +gothic 2 6.263398 0 0 5787 +ahren 2 6.263398 0 0 5788 +brinklei 2 6.263398 0 0 5789 +notebook 2 6.263398 0 0 5790 +arbitrarili 2 6.263398 0 0 5791 +databaseenviron 2 6.263398 0 0 5792 +pagelast 2 6.263398 0 0 5793 +younginvestig 2 6.263398 0 0 5794 +theindian 2 6.263398 0 0 5795 +andscienc 2 6.263398 0 0 5796 +milwauke 2 6.263398 0 0 5797 +amcurr 2 6.263398 0 0 5798 +articul 2 6.263398 0 0 5799 +mywork 2 6.263398 0 0 5800 +mossi 2 6.263398 0 0 5801 +mental 2 6.263398 0 0 5802 +stolen 2 6.263398 0 0 5803 +sciencein 2 6.263398 0 0 5804 +paralleland 2 6.263398 0 0 5805 +getto 2 6.263398 0 0 5806 +locomot 2 6.263398 0 0 5807 +przemyslaw 2 6.263398 0 0 5808 +implemen 2 6.263398 0 0 5809 +emin 2 6.263398 0 0 5810 +sirer 2 6.263398 0 0 5811 +wwo 2 6.263398 0 0 5812 +eduwork 2 6.263398 0 0 5813 +tomkin 2 6.263398 0 0 5814 +hugo 2 6.263398 0 0 5815 +garth 2 6.263398 0 0 5816 +gibson 2 6.263398 0 0 5817 +hsieh 2 6.263398 0 0 5818 +onlinesuperpag 2 6.263398 0 0 5819 +moss 2 6.263398 0 0 5820 +microbenchmark 2 6.263398 0 0 5821 +comedi 2 6.263398 0 0 5822 +geeki 2 6.263398 0 0 5823 +shopbot 2 6.263398 0 0 5824 +magellan 2 6.263398 0 0 5825 +cafe 2 6.263398 0 0 5826 +salon 2 6.263398 0 0 5827 +reform 2 6.263398 0 0 5828 +computerinteract 2 6.263398 0 0 5829 +havebeen 2 6.263398 0 0 5830 +edufor 2 6.263398 0 0 5831 +hpcc 2 6.263398 0 0 5832 +clone 2 6.263398 0 0 5833 +ismb 2 6.263398 0 0 5834 +stevetanimoto 2 6.263398 0 0 5835 +ofthi 2 6.263398 0 0 5836 +cscl 2 6.263398 0 0 5837 +studio 2 6.263398 0 0 5838 +tonyderos 2 6.263398 0 0 5839 +stuetzl 2 6.263398 0 0 5840 +duchamp 2 6.263398 0 0 5841 +jovan 2 6.263398 0 0 5842 +schedulemi 2 6.263398 0 0 5843 +fri 2 6.263398 0 0 5844 +font 2 6.263398 0 0 5845 +tomi 2 6.263398 0 0 5846 +ingram 2 6.263398 0 0 5847 +doom 2 6.263398 0 0 5848 +christianson 2 6.263398 0 0 5849 +till 2 6.263398 0 0 5850 +chicken 2 6.263398 0 0 5851 +theanim 2 6.263398 0 0 5852 +thechateau 2 6.263398 0 0 5853 +cynic 2 6.263398 0 0 5854 +duel 2 6.263398 0 0 5855 +dion 2 6.263398 0 0 5856 +occupi 2 6.263398 0 0 5857 +ladder 2 6.263398 0 0 5858 +infam 2 6.263398 0 0 5859 +newslet 2 6.263398 0 0 5860 +northern 2 6.263398 0 0 5861 +thecurr 2 6.263398 0 0 5862 +alumnu 2 6.263398 0 0 5863 +wendi 2 6.263398 0 0 5864 +belluomini 2 6.263398 0 0 5865 +eustac 2 6.263398 0 0 5866 +resolutionon 2 6.263398 0 0 5867 +nixon 2 6.263398 0 0 5868 +theblack 2 6.263398 0 0 5869 +incid 2 6.263398 0 0 5870 +sarcasm 2 6.263398 0 0 5871 +meth 2 6.263398 0 0 5872 +apprentic 2 6.263398 0 0 5873 +cart 2 6.263398 0 0 5874 +mcname 2 6.263398 0 0 5875 +architecturethat 2 6.263398 0 0 5876 +studentat 2 6.263398 0 0 5877 +backgroundi 2 6.263398 0 0 5878 +schedulingpolici 2 6.263398 0 0 5879 +strand 2 6.263398 0 0 5880 +ofnew 2 6.263398 0 0 5881 +mipsi 2 6.263398 0 0 5882 +tucson 2 6.263398 0 0 5883 +cloth 2 6.263398 0 0 5884 +criterion 2 6.263398 0 0 5885 +preparedfor 2 6.263398 0 0 5886 +meander 2 6.263398 0 0 5887 +pageoren 2 6.263398 0 0 5888 +anddynam 2 6.263398 0 0 5889 +finalist 2 6.263398 0 0 5890 +discoveraward 2 6.263398 0 0 5891 +brute 2 6.263398 0 0 5892 +ascal 2 6.263398 0 0 5893 +bernard 2 6.263398 0 0 5894 +lesh 2 6.263398 0 0 5895 +goan 2 6.263398 0 0 5896 +zamir 2 6.263398 0 0 5897 +shake 2 6.263398 0 0 5898 +umass 2 6.263398 0 0 5899 +columbia 2 6.263398 0 0 5900 +uist 2 6.263398 0 0 5901 +vegetarian 2 6.263398 0 0 5902 +greet 2 6.263398 0 0 5903 +forman 2 6.263398 0 0 5904 +charli 2 6.263398 0 0 5905 +pagegreg 2 6.263398 0 0 5906 +bermuda 2 6.263398 0 0 5907 +ncaa 2 6.263398 0 0 5908 +unoffici 2 6.263398 0 0 5909 +unif 2 6.263398 0 0 5910 +flight 2 6.263398 0 0 5911 +scienceher 2 6.263398 0 0 5912 +tracer 2 6.263398 0 0 5913 +inc 2 6.263398 0 0 5914 +rai 2 6.263398 0 0 5915 +mbquicktim 2 6.263398 0 0 5916 +hord 2 6.263398 0 0 5917 +boi 2 6.263398 0 0 5918 +toseattl 2 6.263398 0 0 5919 +hauck 2 6.263398 0 0 5920 +montag 2 6.263398 0 0 5921 +springbok 2 6.263398 0 0 5922 +macduff 2 6.263398 0 0 5923 +obsess 2 6.263398 0 0 5924 +jamrozik 2 6.263398 0 0 5925 +subpag 2 6.263398 0 0 5926 +theuniversit 2 6.263398 0 0 5927 +grenobl 2 6.263398 0 0 5928 +laboratoir 2 6.263398 0 0 5929 +mater 2 6.263398 0 0 5930 +projectsi 2 6.263398 0 0 5931 +vortexcompil 2 6.263398 0 0 5932 +andhow 2 6.263398 0 0 5933 +intraprocedur 2 6.263398 0 0 5934 +coke 2 6.263398 0 0 5935 +caffein 2 6.263398 0 0 5936 +fly 2 6.263398 0 0 5937 +anymor 2 6.263398 0 0 5938 +rees 2 6.263398 0 0 5939 +heimdahl 2 6.263398 0 0 5940 +tca 2 6.263398 0 0 5941 +mat 2 6.263398 0 0 5942 +lojlo 2 6.263398 0 0 5943 +suif 2 6.263398 0 0 5944 +josh 2 6.263398 0 0 5945 +sketch 2 6.263398 0 0 5946 +herear 2 6.263398 0 0 5947 +ratan 2 6.263398 0 0 5948 +thousand 2 6.263398 0 0 5949 +bicycl 2 6.263398 0 0 5950 +usathi 2 6.263398 0 0 5951 +asia 2 6.263398 0 0 5952 +nervou 2 6.263398 0 0 5953 +conscious 2 6.263398 0 0 5954 +andha 2 6.263398 0 0 5955 +collis 2 6.263398 0 0 5956 +computingresearch 2 6.263398 0 0 5957 +aeronaut 2 6.263398 0 0 5958 +safewar 2 6.263398 0 0 5959 +pressur 2 6.263398 0 0 5960 +accid 2 6.263398 0 0 5961 +airport 2 6.263398 0 0 5962 +projecti 2 6.263398 0 0 5963 +befound 2 6.263398 0 0 5964 +chandramohan 2 6.263398 0 0 5965 +ashutosh 2 6.263398 0 0 5966 +hotlin 2 6.263398 0 0 5967 +grave 2 6.263398 0 0 5968 +war 2 6.263398 0 0 5969 +perkowitz 2 6.263398 0 0 5970 +eick 2 6.263398 0 0 5971 +crime 2 6.263398 0 0 5972 +ribbon 2 6.263398 0 0 5973 +mckenzi 2 6.263398 0 0 5974 +gemini 2 6.263398 0 0 5975 +isomorph 2 6.263398 0 0 5976 +shirt 2 6.263398 0 0 5977 +shortcom 2 6.263398 0 0 5978 +linkabl 2 6.263398 0 0 5979 +strait 2 6.263398 0 0 5980 +eec 2 6.263398 0 0 5981 +diplom 2 6.263398 0 0 5982 +labyrinth 2 6.263398 0 0 5983 +salsa 2 6.263398 0 0 5984 +temperatur 2 6.263398 0 0 5985 +ironi 2 6.263398 0 0 5986 +albert 2 6.263398 0 0 5987 +brew 2 6.263398 0 0 5988 +gross 2 6.263398 0 0 5989 +diploma 2 6.263398 0 0 5990 +bergen 2 6.263398 0 0 5991 +itin 2 6.263398 0 0 5992 +italian 2 6.263398 0 0 5993 +simpson 2 6.263398 0 0 5994 +mount 2 6.263398 0 0 5995 +atla 2 6.263398 0 0 5996 +persistentprogram 2 6.263398 0 0 5997 +creator 2 6.263398 0 0 5998 +stuffa 2 6.263398 0 0 5999 +fragment 2 6.263398 0 0 6000 +contigu 2 6.263398 0 0 6001 +ofappl 2 6.263398 0 0 6002 +whichsupport 2 6.263398 0 0 6003 +johnzahorjan 2 6.263398 0 0 6004 +myqual 2 6.263398 0 0 6005 +soha 2 6.263398 0 0 6006 +hassoun 2 6.263398 0 0 6007 +retim 2 6.263398 0 0 6008 +spud 2 6.263398 0 0 6009 +tango 2 6.263398 0 0 6010 +genesi 2 6.263398 0 0 6011 +raquetbal 2 6.263398 0 0 6012 +pepper 2 6.263398 0 0 6013 +andparallel 2 6.263398 0 0 6014 +yeargradu 2 6.263398 0 0 6015 +tomanufactur 2 6.263398 0 0 6016 +rakesh 2 6.263398 0 0 6017 +submarin 2 6.263398 0 0 6018 +convinc 2 6.263398 0 0 6019 +francais 2 6.263398 0 0 6020 +pageuw 2 6.263398 0 0 6021 +pagerec 2 6.263398 0 0 6022 +cooler 2 6.263398 0 0 6023 +closer 2 6.263398 0 0 6024 +adher 2 6.263398 0 0 6025 +inseattl 2 6.263398 0 0 6026 +whati 2 6.263398 0 0 6027 +andbuild 2 6.263398 0 0 6028 +thespin 2 6.263398 0 0 6029 +hpca 2 6.263398 0 0 6030 +toolfor 2 6.263398 0 0 6031 +internationalsymposium 2 6.263398 0 0 6032 +adjunct 2 6.263398 0 0 6033 +latch 2 6.263398 0 0 6034 +wigderson 2 6.263398 0 0 6035 +plenum 2 6.263398 0 0 6036 +advisorycommitte 2 6.263398 0 0 6037 +ofdata 2 6.263398 0 0 6038 +theinstitut 2 6.263398 0 0 6039 +sdegre 2 6.263398 0 0 6040 +developmentof 2 6.263398 0 0 6041 +programcommitte 2 6.263398 0 0 6042 +theieee 2 6.263398 0 0 6043 +mexico 2 6.263398 0 0 6044 +underprofessor 2 6.263398 0 0 6045 +ratherthan 2 6.263398 0 0 6046 +newapproach 2 6.263398 0 0 6047 +pursuit 2 6.263398 0 0 6048 +ubiquit 2 6.263398 0 0 6049 +cope 2 6.263398 0 0 6050 +differentarchitectur 2 6.263398 0 0 6051 +performanceevalu 2 6.263398 0 0 6052 +pathfind 2 6.263398 0 0 6053 +negoti 2 6.263398 0 0 6054 +basedperform 2 6.263398 0 0 6055 +usath 2 6.263398 0 0 6056 +skew 2 6.263398 0 0 6057 +barb 2 6.263398 0 0 6058 +qualif 2 6.263398 0 0 6059 +mascot 2 6.263398 0 0 6060 +shouldconsid 2 6.263398 0 0 6061 +acknowledg 2 6.263398 0 0 6062 +kyro 2 6.263398 0 0 6063 +kutulako 2 6.263398 0 0 6064 +deform 2 6.263398 0 0 6065 +hibbard 2 6.263398 0 0 6066 +scientificdata 2 6.263398 0 0 6067 +radianc 2 6.263398 0 0 6068 +axi 2 6.263398 0 0 6069 +objectiveoverviewreleas 2 6.263398 0 0 6070 +informationse 2 6.263398 0 0 6071 +horn 2 6.263398 0 0 6072 +negat 2 6.263398 0 0 6073 +similarto 2 6.263398 0 0 6074 +perturb 2 6.263398 0 0 6075 +theexodu 2 6.263398 0 0 6076 +serverobject 2 6.263398 0 0 6077 +raster 2 6.263398 0 0 6078 +polylin 2 6.263398 0 0 6079 +extent 2 6.263398 0 0 6080 +paid 2 6.263398 0 0 6081 +thescout 2 6.263398 0 0 6082 +oodbm 2 6.263398 0 0 6083 +mailbox 2 6.263398 0 0 6084 +kendal 2 6.263398 0 0 6085 +alain 2 6.263398 0 0 6086 +allex 2 6.263398 0 0 6087 +carolyn 2 6.263398 0 0 6088 +roth 2 6.263398 0 0 6089 +out 2 6.263398 0 0 6090 +subba 2 6.263398 0 0 6091 +officem 2 6.263398 0 0 6092 +wierd 2 6.263398 0 0 6093 +amo 2 6.263398 0 0 6094 +deposit 2 6.263398 0 0 6095 +pageandi 2 6.263398 0 0 6096 +hadmi 2 6.263398 0 0 6097 +canfing 2 6.263398 0 0 6098 +prime 2 6.263398 0 0 6099 +designand 2 6.263398 0 0 6100 +hoofer 2 6.263398 0 0 6101 +nextstep 2 6.263398 0 0 6102 +beyer 2 6.263398 0 0 6103 +pageoth 2 6.263398 0 0 6104 +disagre 2 6.263398 0 0 6105 +stripe 2 6.263398 0 0 6106 +creatur 2 6.263398 0 0 6107 +foolish 2 6.263398 0 0 6108 +pagesom 2 6.263398 0 0 6109 +gurindar 2 6.263398 0 0 6110 +usatel 2 6.263398 0 0 6111 +educationph 2 6.263398 0 0 6112 +interestscomput 2 6.263398 0 0 6113 +tragic 2 6.263398 0 0 6114 +haa 2 6.263398 0 0 6115 +interestsdatabas 2 6.263398 0 0 6116 +aimedat 2 6.263398 0 0 6117 +dataengin 2 6.263398 0 0 6118 +zaharioudaki 2 6.263398 0 0 6119 +cico 2 6.263398 0 0 6120 +chandrasekaran 2 6.263398 0 0 6121 +sashikanth 2 6.263398 0 0 6122 +btech 2 6.263398 0 0 6123 +eosdi 2 6.263398 0 0 6124 +projectdepart 2 6.263398 0 0 6125 +anddavid 2 6.263398 0 0 6126 +architecturec 2 6.263398 0 0 6127 +callaghan 2 6.263398 0 0 6128 +damn 2 6.263398 0 0 6129 +thevari 2 6.263398 0 0 6130 +soup 2 6.263398 0 0 6131 +kitchen 2 6.263398 0 0 6132 +jussi 2 6.263398 0 0 6133 +oneset 2 6.263398 0 0 6134 +viewsof 2 6.263398 0 0 6135 +birch 2 6.263398 0 0 6136 +workth 2 6.263398 0 0 6137 +guangshun 2 6.263398 0 0 6138 +kabra 2 6.263398 0 0 6139 +discrimin 2 6.263398 0 0 6140 +festschrift 2 6.263398 0 0 6141 +rochest 2 6.263398 0 0 6142 +hazen 2 6.263398 0 0 6143 +calcari 2 6.263398 0 0 6144 +devri 2 6.263398 0 0 6145 +shameless 2 6.263398 0 0 6146 +eliassi 2 6.263398 0 0 6147 +shubu 2 6.263398 0 0 6148 +toll 2 6.263398 0 0 6149 +essenc 2 6.263398 0 0 6150 +pagefor 2 6.263398 0 0 6151 +isthmu 2 6.263398 0 0 6152 +milton 2 6.263398 0 0 6153 +venkatesh 2 6.263398 0 0 6154 +wisconsint 2 6.263398 0 0 6155 +gjess 2 6.263398 0 0 6156 +roommat 2 6.263398 0 0 6157 +eventhough 2 6.263398 0 0 6158 +killer 2 6.263398 0 0 6159 +chop 2 6.263398 0 0 6160 +beard 2 6.263398 0 0 6161 +defunct 2 6.263398 0 0 6162 +intervent 2 6.263398 0 0 6163 +bother 2 6.263398 0 0 6164 +pagenam 2 6.263398 0 0 6165 +catalogu 2 6.263398 0 0 6166 +awesom 2 6.263398 0 0 6167 +offens 2 6.263398 0 0 6168 +guhan 2 6.263398 0 0 6169 +viswanathan 2 6.263398 0 0 6170 +sastri 2 6.263398 0 0 6171 +saeed 2 6.263398 0 0 6172 +hasti 2 6.263398 0 0 6173 +terrain 2 6.263398 0 0 6174 +kirk 2 6.263398 0 0 6175 +sagiv 2 6.263398 0 0 6176 +differenc 2 6.263398 0 0 6177 +interproceduraldataflow 2 6.263398 0 0 6178 +mooli 2 6.263398 0 0 6179 +aarhu 2 6.263398 0 0 6180 +charleston 2 6.263398 0 0 6181 +languagedesign 2 6.263398 0 0 6182 +igor 2 6.263398 0 0 6183 +ivanisev 2 6.263398 0 0 6184 +dewittresearch 2 6.263398 0 0 6185 +shaft 2 6.263398 0 0 6186 +herei 2 6.263398 0 0 6187 +southeast 2 6.263398 0 0 6188 +andvisu 2 6.263398 0 0 6189 +karavan 2 6.263398 0 0 6190 +lawand 2 6.263398 0 0 6191 +lover 2 6.263398 0 0 6192 +mill 2 6.263398 0 0 6193 +schnarr 2 6.263398 0 0 6194 +shamik 2 6.263398 0 0 6195 +thewisconsin 2 6.263398 0 0 6196 +nake 2 6.263398 0 0 6197 +sheboygan 2 6.263398 0 0 6198 +marcelo 2 6.263398 0 0 6199 +andelectr 2 6.263398 0 0 6200 +sustain 2 6.263398 0 0 6201 +uniformli 2 6.263398 0 0 6202 +dionisio 2 6.263398 0 0 6203 +pnevmatikato 2 6.263398 0 0 6204 +subbarao 2 6.263398 0 0 6205 +spectroscopi 2 6.263398 0 0 6206 +monster 2 6.263398 0 0 6207 +mino 2 6.263398 0 0 6208 +garofalaki 2 6.263398 0 0 6209 +hellen 2 6.263398 0 0 6210 +moshovo 2 6.263398 0 0 6211 +madisonadvisor 2 6.263398 0 0 6212 +interestsin 2 6.263398 0 0 6213 +contributor 2 6.263398 0 0 6214 +linksmi 2 6.263398 0 0 6215 +eduunivers 2 6.263398 0 0 6216 +molap 2 6.263398 0 0 6217 +wolberg 2 6.263398 0 0 6218 +computer 2 6.263398 0 0 6219 +linearli 2 6.263398 0 0 6220 +prognost 2 6.263398 0 0 6221 +prognosi 2 6.263398 0 0 6222 +patient 2 6.263398 0 0 6223 +multisurfac 2 6.263398 0 0 6224 +midwest 2 6.263398 0 0 6225 +blizzard 2 6.263398 0 0 6226 +iitk 2 6.263398 0 0 6227 +poosala 2 6.263398 0 0 6228 +adequaci 2 6.263398 0 0 6229 +preemption 2 6.263398 0 0 6230 +mansharamani 2 6.263398 0 0 6231 +destruct 2 6.263398 0 0 6232 +olap 2 6.263398 0 0 6233 +endow 2 6.263398 0 0 6234 +arbor 2 6.263398 0 0 6235 +kharagpur 2 6.263398 0 0 6236 +hairbal 2 6.263398 0 0 6237 +thesauru 2 6.263398 0 0 6238 +birla 2 6.263398 0 0 6239 +pilani 2 6.263398 0 0 6240 +gmap 2 6.263398 0 0 6241 +versatil 2 6.263398 0 0 6242 +yearbook 2 6.263398 0 0 6243 +andmanufactur 2 6.263398 0 0 6244 +drivemadison 2 6.263398 0 0 6245 +moveddocu 2 6.263398 0 1 6246 +movedthi 2 6.263398 0 1 6247 +postcriptdocu 1 6.957497 0 0 6248 +hocacours 1 6.957497 0 0 6249 +broccoli 1 6.957497 0 0 6250 +fileth 1 6.957497 0 0 6251 +systemth 1 6.957497 0 0 6252 +availablethursdai 1 6.957497 0 0 6253 +duetuesdai 1 6.957497 0 0 6254 +regrad 1 6.957497 0 0 6255 +retrievalthursdai 1 6.957497 0 0 6256 +retrievaldepart 1 6.957497 0 0 6257 +gradeshav 1 6.957497 0 0 6258 +twothird 1 6.957497 0 0 6259 +systemsinclud 1 6.957497 0 0 6260 +transactionprocess 1 6.957497 0 0 6261 +usefulinform 1 6.957497 0 0 6262 +willcov 1 6.957497 0 0 6263 +invert 1 6.957497 0 0 6264 +smartsystem 1 6.957497 0 0 6265 +relevancefeedback 1 6.957497 0 0 6266 +thesaurusconstruct 1 6.957497 0 0 6267 +automatictext 1 6.957497 0 0 6268 +placetuesdai 1 6.957497 0 0 6269 +thurston 1 6.957497 0 0 6270 +booksdatabas 1 6.957497 0 0 6271 +mcgrawhil 1 6.957497 0 0 6272 +andnavath 1 6.957497 0 0 6273 +byullman 1 6.957497 0 0 6274 +photocopiedmateri 1 6.957497 0 0 6275 +sophia 1 6.957497 0 0 6276 +georgiakaki 1 6.957497 0 0 6277 +officehour 1 6.957497 0 0 6278 +gradingexam 1 6.957497 0 0 6279 +yourfin 1 6.957497 0 0 6280 +policiesy 1 6.957497 0 0 6281 +samegrad 1 6.957497 0 0 6282 +tuesdayand 1 6.957497 0 0 6283 +illeg 1 6.957497 0 0 6284 +latexif 1 6.957497 0 0 6285 +goodopportun 1 6.957497 0 0 6286 +submissionpleas 1 6.957497 0 0 6287 +clinton 1 6.957497 0 0 6288 +perot 1 6.957497 0 0 6289 +homeworksgrad 1 6.957497 0 0 6290 +sortedalphabet 1 6.957497 0 0 6291 +thecov 1 6.957497 0 0 6292 +pagefollow 1 6.957497 0 0 6293 +policyal 1 6.957497 0 0 6294 +inwrit 1 6.957497 0 0 6295 +referto 1 6.957497 0 0 6296 +modelhomework 1 6.957497 0 0 6297 +weightingthursdai 1 6.957497 0 0 6298 +indexinghomework 1 6.957497 0 0 6299 +evaluationtuesdai 1 6.957497 0 0 6300 +feedbackthursdai 1 6.957497 0 0 6301 +clusteringhomework 1 6.957497 0 0 6302 +systemsor 1 6.957497 0 0 6303 +dirti 1 6.957497 0 0 6304 +internetworkingto 1 6.957497 0 0 6305 +teamsof 1 6.957497 0 0 6306 +trough 1 6.957497 0 0 6307 +complexityof 1 6.957497 0 0 6308 +offcial 1 6.957497 0 0 6309 +pageslink 1 6.957497 0 0 6310 +sorin 1 6.957497 0 0 6311 +generalinform 1 6.957497 0 0 6312 +midtermsyllabu 1 6.957497 0 0 6313 +midtermi 1 6.957497 0 0 6314 +caeworkst 1 6.957497 0 0 6315 +whomai 1 6.957497 0 0 6316 +throughbold_brows 1 6.957497 0 0 6317 +gettingstart 1 6.957497 0 0 6318 +workbook 1 6.957497 0 0 6319 +quicksim 1 6.957497 0 0 6320 +trainingworkbook 1 6.957497 0 0 6321 +exersis 1 6.957497 0 0 6322 +thesedocu 1 6.957497 0 0 6323 +uwengin 1 6.957497 0 0 6324 +pmcst 1 6.957497 0 0 6325 +herelink 1 6.957497 0 0 6326 +motw 1 6.957497 0 0 6327 +stuffnot 1 6.957497 0 0 6328 +edupag 1 6.957497 0 0 6329 +noodll 1 6.957497 0 0 6330 +inconsist 1 6.957497 0 0 6331 +partnerjoin 1 6.957497 0 0 6332 +ugrad 1 6.957497 0 0 6333 +idand 1 6.957497 0 0 6334 +tobia 1 6.957497 0 0 6335 +mayr 1 6.957497 0 0 6336 +hamblin 1 6.957497 0 0 6337 +mutabl 1 6.957497 0 0 6338 +informationaugust 1 6.957497 0 0 6339 +courseabout 1 6.957497 0 0 6340 +notationthat 1 6.957497 0 0 6341 +takec 1 6.957497 0 0 6342 +programmingparadigm 1 6.957497 0 0 6343 +imperativeprogram 1 6.957497 0 0 6344 +goodform 1 6.957497 0 0 6345 +probablytak 1 6.957497 0 0 6346 +questionsor 1 6.957497 0 0 6347 +serverwhich 1 6.957497 0 0 6348 +answersa 1 6.957497 0 0 6349 +thisweek 1 6.957497 0 0 6350 +edubut 1 6.957497 0 0 6351 +aboutproblem 1 6.957497 0 0 6352 +upsonjam 1 6.957497 0 0 6353 +tarobert 1 6.957497 0 0 6354 +tajustin 1 6.957497 0 0 6355 +taandra 1 6.957497 0 0 6356 +ferencz 1 6.957497 0 0 6357 +melissa 1 6.957497 0 0 6358 +consultantwhen 1 6.957497 0 0 6359 +meetlectur 1 6.957497 0 0 6360 +andrecit 1 6.957497 0 0 6361 +recitationsexpand 1 6.957497 0 0 6362 +opportunityto 1 6.957497 0 0 6363 +eachproblem 1 6.957497 0 0 6364 +setsdu 1 6.957497 0 0 6365 +mondayeven 1 6.957497 0 0 6366 +consultinghour 1 6.957497 0 0 6367 +voskuhltba 1 6.957497 0 0 6368 +materialsther 1 6.957497 0 0 6369 +handoutsand 1 6.957497 0 0 6370 +implementedin 1 6.957497 0 0 6371 +downloadonto 1 6.957497 0 0 6372 +ontoyour 1 6.957497 0 0 6373 +recentvers 1 6.957497 0 0 6374 +gradeswil 1 6.957497 0 0 6375 +thetot 1 6.957497 0 0 6376 +willgener 1 6.957497 0 0 6377 +followingclass 1 6.957497 0 0 6378 +sittingdown 1 6.957497 0 0 6379 +sink 1 6.957497 0 0 6380 +beforesit 1 6.957497 0 0 6381 +workmuch 1 6.957497 0 0 6382 +jointassign 1 6.957497 0 0 6383 +circumstancesmai 1 6.957497 0 0 6384 +yourown 1 6.957497 0 0 6385 +yougot 1 6.957497 0 0 6386 +whenpeopl 1 6.957497 0 0 6387 +lifeunpleas 1 6.957497 0 0 6388 +facilitiescit 1 6.957497 0 0 6389 +andpc 1 6.957497 0 0 6390 +upsonmac 1 6.957497 0 0 6391 +datesal 1 6.957497 0 0 6392 +mondaynight 1 6.957497 0 0 6393 +submityour 1 6.957497 0 0 6394 +multimethod 1 6.957497 0 0 6395 +heapsort 1 6.957497 0 0 6396 +metacircular 1 6.957497 0 0 6397 +nonloc 1 6.957497 0 0 6398 +throw 1 6.957497 0 0 6399 +quicksort 1 6.957497 0 0 6400 +organizationthorsten 1 6.957497 0 0 6401 +materialsal 1 6.957497 0 0 6402 +listlist 1 6.957497 0 0 6403 +csuglab 1 6.957497 0 0 6404 +dodg 1 6.957497 0 0 6405 +notethat 1 6.957497 0 0 6406 +rubix 1 6.957497 0 0 6407 +thefunct 1 6.957497 0 0 6408 +rearrang 1 6.957497 0 0 6409 +appeas 1 6.957497 0 0 6410 +var 1 6.957497 0 0 6411 +bracket 1 6.957497 0 0 6412 +youus 1 6.957497 0 0 6413 +machinesshould 1 6.957497 0 0 6414 +sbin 1 6.957497 0 0 6415 +ksaunder 1 6.957497 0 0 6416 +sbinfor 1 6.957497 0 0 6417 +gremlin 1 6.957497 0 0 6418 +codefor 1 6.957497 0 0 6419 +andget 1 6.957497 0 0 6420 +uponcomplet 1 6.957497 0 0 6421 +thoseus 1 6.957497 0 0 6422 +zeroon 1 6.957497 0 0 6423 +asspecifi 1 6.957497 0 0 6424 +oneassign 1 6.957497 0 0 6425 +vanto 1 6.957497 0 0 6426 +thisclarif 1 6.957497 0 0 6427 +newhomework 1 6.957497 0 0 6428 +coursemateri 1 6.957497 0 0 6429 +theorywelcom 1 6.957497 0 0 6430 +guideannounc 1 6.957497 0 0 6431 +erratum 1 6.957497 0 0 6432 +hourscod 1 6.957497 0 0 6433 +outsidefirewal 1 6.957497 0 0 6434 +stonebrak 1 6.957497 0 0 6435 +samplequest 1 6.957497 0 0 6436 +predatordbm 1 6.957497 0 0 6437 +currentproject 1 6.957497 0 0 6438 +coursedescript 1 6.957497 0 0 6439 +intendedto 1 6.957497 0 0 6440 +slargest 1 6.957497 0 0 6441 +piecesof 1 6.957497 0 0 6442 +knowledgeabledatabas 1 6.957497 0 0 6443 +researchcommun 1 6.957497 0 0 6444 +addressedbecaus 1 6.957497 0 0 6445 +informedus 1 6.957497 0 0 6446 +teller 1 6.957497 0 0 6447 +newcours 1 6.957497 0 0 6448 +quickreview 1 6.957497 0 0 6449 +abreadth 1 6.957497 0 0 6450 +advancedtop 1 6.957497 0 0 6451 +thepurpos 1 6.957497 0 0 6452 +coursei 1 6.957497 0 0 6453 +weeksaft 1 6.957497 0 0 6454 +requireread 1 6.957497 0 0 6455 +engineeringlibrari 1 6.957497 0 0 6456 +pursueaddit 1 6.957497 0 0 6457 +forinform 1 6.957497 0 0 6458 +examtim 1 6.957497 0 0 6459 +developmentproject 1 6.957497 0 0 6460 +involvea 1 6.957497 0 0 6461 +wishto 1 6.957497 0 0 6462 +willinvolv 1 6.957497 0 0 6463 +andmodifi 1 6.957497 0 0 6464 +andrar 1 6.957497 0 0 6465 +luxuri 1 6.957497 0 0 6466 +thediffer 1 6.957497 0 0 6467 +inevit 1 6.957497 0 0 6468 +varioussystem 1 6.957497 0 0 6469 +buffermanag 1 6.957497 0 0 6470 +enginethat 1 6.957497 0 0 6471 +possibleproject 1 6.957497 0 0 6472 +likecomplex 1 6.957497 0 0 6473 +becauseth 1 6.957497 0 0 6474 +betweenminibas 1 6.957497 0 0 6475 +somegener 1 6.957497 0 0 6476 +ideaon 1 6.957497 0 0 6477 +advanceof 1 6.957497 0 0 6478 +submitan 1 6.957497 0 0 6479 +discussth 1 6.957497 0 0 6480 +particularsystem 1 6.957497 0 0 6481 +documentwil 1 6.957497 0 0 6482 +picki 1 6.957497 0 0 6483 +geton 1 6.957497 0 0 6484 +oftest 1 6.957497 0 0 6485 +coursetextbook 1 6.957497 0 0 6486 +bookcontain 1 6.957497 0 0 6487 +databasebook 1 6.957497 0 0 6488 +thecampu 1 6.957497 0 0 6489 +collectedand 1 6.957497 0 0 6490 +postgr 1 6.957497 0 0 6491 +andillustra 1 6.957497 0 0 6492 +corearea 1 6.957497 0 0 6493 +navath 1 6.957497 0 0 6494 +tellsyou 1 6.957497 0 0 6495 +wonderfulrefer 1 6.957497 0 0 6496 +debuggingwith 1 6.957497 0 0 6497 +gradingpolici 1 6.957497 0 0 6498 +percentag 1 6.957497 0 0 6499 +anextra 1 6.957497 0 0 6500 +thefin 1 6.957497 0 0 6501 +willfocu 1 6.957497 0 0 6502 +coveredin 1 6.957497 0 0 6503 +professorpraveen 1 6.957497 0 0 6504 +teachingassist 1 6.957497 0 0 6505 +prerequisiteshandoutsscrib 1 6.957497 0 0 6506 +assignmentscontact 1 6.957497 0 0 6507 +informationrelev 1 6.957497 0 0 6508 +goalof 1 6.957497 0 0 6509 +multipleinherit 1 6.957497 0 0 6510 +subsum 1 6.957497 0 0 6511 +thestudi 1 6.957497 0 0 6512 +abstractli 1 6.957497 0 0 6513 +howprogram 1 6.957497 0 0 6514 +asnot 1 6.957497 0 0 6515 +preciser 1 6.957497 0 0 6516 +forform 1 6.957497 0 0 6517 +somethingabout 1 6.957497 0 0 6518 +tomanipul 1 6.957497 0 0 6519 +gunter 1 6.957497 0 0 6520 +paulson 1 6.957497 0 0 6521 +undergraduatemathemat 1 6.957497 0 0 6522 +mathematicalmatur 1 6.957497 0 0 6523 +anmeng 1 6.957497 0 0 6524 +ifth 1 6.957497 0 0 6525 +ulfar 1 6.957497 0 0 6526 +pmrelev 1 6.957497 0 0 6527 +comint 1 6.957497 0 0 6528 +inupson 1 6.957497 0 0 6529 +tome 1 6.957497 0 0 6530 +reschedul 1 6.957497 0 0 6531 +motwani 1 6.957497 0 0 6532 +debat 1 6.957497 0 0 6533 +pagefronti 1 6.957497 0 0 6534 +pmoffic 1 6.957497 0 0 6535 +pmcours 1 6.957497 0 0 6536 +descriptionparallel 1 6.957497 0 0 6537 +underscor 1 6.957497 0 0 6538 +erad 1 6.957497 0 0 6539 +competitor 1 6.957497 0 0 6540 +dash 1 6.957497 0 0 6541 +materialscours 1 6.957497 0 0 6542 +formatlectur 1 6.957497 0 0 6543 +pageintroduct 1 6.957497 0 0 6544 +sequentialcircuit 1 6.957497 0 0 6545 +andmicroprogram 1 6.957497 0 0 6546 +theappropri 1 6.957497 0 0 6547 +gethelp 1 6.957497 0 0 6548 +informationcoursemateri 1 6.957497 0 0 6549 +announcementsannounc 1 6.957497 0 0 6550 +onlinean 1 6.957497 0 0 6551 +forpeopl 1 6.957497 0 0 6552 +cclass 1 6.957497 0 0 6553 +learnc 1 6.957497 0 0 6554 +theyahoo 1 6.957497 0 0 6555 +ofmor 1 6.957497 0 0 6556 +inansw 1 6.957497 0 0 6557 +voneicken 1 6.957497 0 0 6558 +techniquescomput 1 6.957497 0 0 6559 +loader 1 6.957497 0 0 6560 +frequentlyfor 1 6.957497 0 0 6561 +onsundai 1 6.957497 0 0 6562 +personalmac 1 6.957497 0 0 6563 +gofer 1 6.957497 0 0 6564 +macgof 1 6.957497 0 0 6565 +jfoster 1 6.957497 0 0 6566 +hollist 1 6.957497 0 0 6567 +binhqx 1 6.957497 0 0 6568 +dynamicdata 1 6.957497 0 0 6569 +curri 1 6.957497 0 0 6570 +olin 1 6.957497 0 0 6571 +ahal 1 6.957497 0 0 6572 +walkerwednesdai 1 6.957497 0 0 6573 +kaykylesteveericvasantha 1 6.957497 0 0 6574 +danerickaychrisdan 1 6.957497 0 0 6575 +earlyvers 1 6.957497 0 0 6576 +announcetim 1 6.957497 0 0 6577 +theprelim 1 6.957497 0 0 6578 +wereannounc 1 6.957497 0 0 6579 +lastnam 1 6.957497 0 0 6580 +covereveryth 1 6.957497 0 0 6581 +topicsconv 1 6.957497 0 0 6582 +daywhenwherewhomondai 1 6.957497 0 0 6583 +davetuesdai 1 6.957497 0 0 6584 +jeffwednesdai 1 6.957497 0 0 6585 +davethursdai 1 6.957497 0 0 6586 +halfridai 1 6.957497 0 0 6587 +halsaturdai 1 6.957497 0 0 6588 +breview 1 6.957497 0 0 6589 +chrisand 1 6.957497 0 0 6590 +engrd 1 6.957497 0 0 6591 +bothcom 1 6.957497 0 0 6592 +programmingexperi 1 6.957497 0 0 6593 +ofalgorithm 1 6.957497 0 0 6594 +perkin 1 6.957497 0 0 6595 +sectionsdaytimeroominstructortuesdai 1 6.957497 0 0 6596 +perkinstuesdai 1 6.957497 0 0 6597 +perkinswednesdai 1 6.957497 0 0 6598 +walkerthursdai 1 6.957497 0 0 6599 +fosterfridai 1 6.957497 0 0 6600 +ofclass 1 6.957497 0 0 6601 +consultingsundaymondaytuesdaywednesdaythursdayfridai 1 6.957497 0 0 6602 +steveerickylechrisjpkyl 1 6.957497 0 0 6603 +steveerickylechrisjpvasantha 1 6.957497 0 0 6604 +josejosekayjosejpvasantha 1 6.957497 0 0 6605 +josejosekayjosejp 1 6.957497 0 0 6606 +macbinari 1 6.957497 0 0 6607 +parseabl 1 6.957497 0 0 6608 +waspost 1 6.957497 0 0 6609 +foraladdin 1 6.957497 0 0 6610 +armandonunez 1 6.957497 0 0 6611 +anylas 1 6.957497 0 0 6612 +applicationlik 1 6.957497 0 0 6613 +ishaskel 1 6.957497 0 0 6614 +systemsz 1 6.957497 0 0 6615 +ofgof 1 6.957497 0 0 6616 +itavail 1 6.957497 0 0 6617 +enhance_assign 1 6.957497 0 0 6618 +aladdin 1 6.957497 0 0 6619 +interpretationof 1 6.957497 0 0 6620 +programscomput 1 6.957497 0 0 6621 +macmarlai 1 6.957497 0 0 6622 +descriptionhandoutsadministriviaweb 1 6.957497 0 0 6623 +ofmodern 1 6.957497 0 0 6624 +connectionsto 1 6.957497 0 0 6625 +pmweb 1 6.957497 0 0 6626 +systemspract 1 6.957497 0 0 6627 +takingc 1 6.957497 0 0 6628 +logist 1 6.957497 0 0 6629 +homeworkshomework 1 6.957497 0 0 6630 +amexaminationsmidterm 1 6.957497 0 0 6631 +bibliographiesselect 1 6.957497 0 0 6632 +pagehigh 1 6.957497 0 0 6633 +eickenspr 1 6.957497 0 0 6634 +sessionthu 1 6.957497 0 0 6635 +tbdpleas 1 6.957497 0 0 6636 +willdetermin 1 6.957497 0 0 6637 +postersess 1 6.957497 0 0 6638 +cindywilliam 1 6.957497 0 0 6639 +ithorizont 1 6.957497 0 0 6640 +corridor 1 6.957497 0 0 6641 +presentyour 1 6.957497 0 0 6642 +asens 1 6.957497 0 0 6643 +contempl 1 6.957497 0 0 6644 +presentationswil 1 6.957497 0 0 6645 +nativespeak 1 6.957497 0 0 6646 +thelongest 1 6.957497 0 0 6647 +tocom 1 6.957497 0 0 6648 +finalreport 1 6.957497 0 0 6649 +aretri 1 6.957497 0 0 6650 +thesolut 1 6.957497 0 0 6651 +youreject 1 6.957497 0 0 6652 +webread 1 6.957497 0 0 6653 +convic 1 6.957497 0 0 6654 +bestsolut 1 6.957497 0 0 6655 +showcas 1 6.957497 0 0 6656 +ampl 1 6.957497 0 0 6657 +goodexplan 1 6.957497 0 0 6658 +whatyou 1 6.957497 0 0 6659 +projectsproject 1 6.957497 0 0 6660 +reportsproject 1 6.957497 0 0 6661 +proposalsiniti 1 6.957497 0 0 6662 +ideascours 1 6.957497 0 0 6663 +materialshomework 1 6.957497 0 0 6664 +pagebefor 1 6.957497 0 0 6665 +introc 1 6.957497 0 0 6666 +casec 1 6.957497 0 0 6667 +technologyc 1 6.957497 0 0 6668 +cachesc 1 6.957497 0 0 6669 +netsc 1 6.957497 0 0 6670 +spc 1 6.957497 0 0 6671 +cyou 1 6.957497 0 0 6672 +emdc 1 6.957497 0 0 6673 +sortingc 1 6.957497 0 0 6674 +spamc 1 6.957497 0 0 6675 +msgpassc 1 6.957497 0 0 6676 +mpic 1 6.957497 0 0 6677 +cachecohc 1 6.957497 0 0 6678 +locksc 1 6.957497 0 0 6679 +threadsc 1 6.957497 0 0 6680 +atmc 1 6.957497 0 0 6681 +netc 1 6.957497 0 0 6682 +scoreboardc 1 6.957497 0 0 6683 +tomasuloc 1 6.957497 0 0 6684 +predc 1 6.957497 0 0 6685 +superscalarc 1 6.957497 0 0 6686 +busesc 1 6.957497 0 0 6687 +pentiummaintain 1 6.957497 0 0 6688 +zabihteach 1 6.957497 0 0 6689 +millerclass 1 6.957497 0 0 6690 +phillip 1 6.957497 0 0 6691 +suggestionsproblem 1 6.957497 0 0 6692 +mestim 1 6.957497 0 0 6693 +censu 1 6.957497 0 0 6694 +eigenhausdorff 1 6.957497 0 0 6695 +recognitionsect 1 6.957497 0 0 6696 +equationoth 1 6.957497 0 0 6697 +wordnet 1 6.957497 0 0 6698 +pagecsintroduct 1 6.957497 0 0 6699 +understandingcomput 1 6.957497 0 0 6700 +announcementsher 1 6.957497 0 0 6701 +taggerbrown 1 6.957497 0 0 6702 +withpart 1 6.957497 0 0 6703 +wnsearchdir 1 6.957497 0 0 6704 +dict 1 6.957497 0 0 6705 +iicollect 1 6.957497 0 0 6706 +canus 1 6.957497 0 0 6707 +francisabout 1 6.957497 0 0 6708 +computationallinguist 1 6.957497 0 0 6709 +announcementsroom 1 6.957497 0 0 6710 +unforseen 1 6.957497 0 0 6711 +unableto 1 6.957497 0 0 6712 +maclab 1 6.957497 0 0 6713 +maxflow 1 6.957497 0 0 6714 +matroid 1 6.957497 0 0 6715 +binomi 1 6.957497 0 0 6716 +preflow 1 6.957497 0 0 6717 +henzingeremail 1 6.957497 0 0 6718 +informationhomework 1 6.957497 0 0 6719 +bellman 1 6.957497 0 0 6720 +fibonacci 1 6.957497 0 0 6721 +treap 1 6.957497 0 0 6722 +randomizedsearch 1 6.957497 0 0 6723 +mincut 1 6.957497 0 0 6724 +dinitz 1 6.957497 0 0 6725 +scmv 1 6.957497 0 0 6726 +quadratur 1 6.957497 0 0 6727 +ozan 1 6.957497 0 0 6728 +siblei 1 6.957497 0 0 6729 +martha 1 6.957497 0 0 6730 +cubic 1 6.957497 0 0 6731 +zcat 1 6.957497 0 0 6732 +computationsumm 1 6.957497 0 0 6733 +setsan 1 6.957497 0 0 6734 +andnonlinear 1 6.957497 0 0 6735 +ordinarydifferenti 1 6.957497 0 0 6736 +informationstaff 1 6.957497 0 0 6737 +hafizogullari 1 6.957497 0 0 6738 +lecturesclass 1 6.957497 0 0 6739 +administrationlauri 1 6.957497 0 0 6740 +buck 1 6.957497 0 0 6741 +addressedto 1 6.957497 0 0 6742 +corequisit 1 6.957497 0 0 6743 +materialstext 1 6.957497 0 0 6744 +approachus 1 6.957497 0 0 6745 +eitherth 1 6.957497 0 0 6746 +labsthi 1 6.957497 0 0 6747 +setsther 1 6.957497 0 0 6748 +orfrom 1 6.957497 0 0 6749 +computingproblem 1 6.957497 0 0 6750 +behandl 1 6.957497 0 0 6751 +gradefrom 1 6.957497 0 0 6752 +printyour 1 6.957497 0 0 6753 +firstpag 1 6.957497 0 0 6754 +partnernam 1 6.957497 0 0 6755 +gradingyour 1 6.957497 0 0 6756 +beassign 1 6.957497 0 0 6757 +onyour 1 6.957497 0 0 6758 +vandermond 1 6.957497 0 0 6759 +piecewis 1 6.957497 0 0 6760 +cote 1 6.957497 0 0 6761 +choleski 1 6.957497 0 0 6762 +rung 1 6.957497 0 0 6763 +kutta 1 6.957497 0 0 6764 +computingat 1 6.957497 0 0 6765 +rennselaerhal 1 6.957497 0 0 6766 +untar 1 6.957497 0 0 6767 +randperm 1 6.957497 0 0 6768 +motd 1 6.957497 0 0 6769 +lldiscuss 1 6.957497 0 0 6770 +prerequsit 1 6.957497 0 0 6771 +processsynchron 1 6.957497 0 0 6772 +requiringconst 1 6.957497 0 0 6773 +prerequsitescomplet 1 6.957497 0 0 6774 +inparticular 1 6.957497 0 0 6775 +theintroductori 1 6.957497 0 0 6776 +thatwil 1 6.957497 0 0 6777 +outlineth 1 6.957497 0 0 6778 +theorder 1 6.957497 0 0 6779 +mutualexclus 1 6.957497 0 0 6780 +timepermit 1 6.957497 0 0 6781 +textbooksth 1 6.957497 0 0 6782 +conceptsbook 1 6.957497 0 0 6783 +distributeclass 1 6.957497 0 0 6784 +noteswil 1 6.957497 0 0 6785 +pageat 1 6.957497 0 0 6786 +mondaythru 1 6.957497 0 0 6787 +thesewil 1 6.957497 0 0 6788 +thursdayat 1 6.957497 0 0 6789 +gradingeach 1 6.957497 0 0 6790 +weightag 1 6.957497 0 0 6791 +combinedweightag 1 6.957497 0 0 6792 +twomidterm 1 6.957497 0 0 6793 +collaborationat 1 6.957497 0 0 6794 +eachhomework 1 6.957497 0 0 6795 +thehomework 1 6.957497 0 0 6796 +closednot 1 6.957497 0 0 6797 +induprakaskodukula 1 6.957497 0 0 6798 +henzingerupson 1 6.957497 0 0 6799 +glewupson 1 6.957497 0 0 6800 +handoutshandout 1 6.957497 0 0 6801 +mlhandout 1 6.957497 0 0 6802 +lambdahomeworkshomework 1 6.957497 0 0 6803 +grieshomework 1 6.957497 0 0 6804 +notesraw 1 6.957497 0 0 6805 +noteslectur 1 6.957497 0 0 6806 +mllectur 1 6.957497 0 0 6807 +grieslectur 1 6.957497 0 0 6808 +linyuan 1 6.957497 0 0 6809 +blurb 1 6.957497 0 0 6810 +theobject 1 6.957497 0 0 6811 +formalizationof 1 6.957497 0 0 6812 +creationof 1 6.957497 0 0 6813 +systemsfor 1 6.957497 0 0 6814 +formalmethod 1 6.957497 0 0 6815 +suchsystem 1 6.957497 0 0 6816 +imp 1 6.957497 0 0 6817 +mizar 1 6.957497 0 0 6818 +quaif 1 6.957497 0 0 6819 +coqstud 1 6.957497 0 0 6820 +aboutthes 1 6.957497 0 0 6821 +projecthtml 1 6.957497 0 0 6822 +theqe 1 6.957497 0 0 6823 +manifestoplain 1 6.957497 0 0 6824 +qedmanifestobowen 1 6.957497 0 0 6825 +localform 1 6.957497 0 0 6826 +tannei 1 6.957497 0 0 6827 +trevor 1 6.957497 0 0 6828 +hick 1 6.957497 0 0 6829 +ruben 1 6.957497 0 0 6830 +gamboa 1 6.957497 0 0 6831 +circal 1 6.957497 0 0 6832 +turpin 1 6.957497 0 0 6833 +galoi 1 6.957497 0 0 6834 +stedit 1 6.957497 0 0 6835 +predictionch 1 6.957497 0 0 6836 +revieww 1 6.957497 0 0 6837 +architecturethi 1 6.957497 0 0 6838 +benchmarksto 1 6.957497 0 0 6839 +highperform 1 6.957497 0 0 6840 +memoryhierarchi 1 6.957497 0 0 6841 +studentswil 1 6.957497 0 0 6842 +undertak 1 6.957497 0 0 6843 +oftheir 1 6.957497 0 0 6844 +informationuniqu 1 6.957497 0 0 6845 +mikedahlinoffic 1 6.957497 0 0 6846 +tbdtaoffic 1 6.957497 0 0 6847 +tbdreadingstextbook 1 6.957497 0 0 6848 +patteson 1 6.957497 0 0 6849 +sheetfor 1 6.957497 0 0 6850 +pattersonin 1 6.957497 0 0 6851 +currentcomput 1 6.957497 0 0 6852 +readinglist 1 6.957497 0 0 6853 +scheduleweekdatetopicreadingduejan 1 6.957497 0 0 6854 +perf 1 6.957497 0 0 6855 +amdahl 1 6.957497 0 0 6856 +trendsch 1 6.957497 0 0 6857 +isa 1 6.957497 0 0 6858 +mlkholidayf 1 6.957497 0 0 6859 +proposalfeb 1 6.957497 0 0 6860 +scoreboard 1 6.957497 0 0 6861 +tomasulu 1 6.957497 0 0 6862 +speculationch 1 6.957497 0 0 6863 +processorsch 1 6.957497 0 0 6864 +dfeb 1 6.957497 0 0 6865 +hierarchych 1 6.957497 0 0 6866 +surveyfeb 1 6.957497 0 0 6867 +banksf 1 6.957497 0 0 6868 +breakm 1 6.957497 0 0 6869 +breakmar 1 6.957497 0 0 6870 +raidch 1 6.957497 0 0 6871 +networksf 1 6.957497 0 0 6872 +networksch 1 6.957497 0 0 6873 +checkpointapr 1 6.957497 0 0 6874 +architecturesf 1 6.957497 0 0 6875 +mppsch 1 6.957497 0 0 6876 +preseantationsm 1 6.957497 0 0 6877 +presentationsfri 1 6.957497 0 0 6878 +classesm 1 6.957497 0 0 6879 +reportaddit 1 6.957497 0 0 6880 +resourcescours 1 6.957497 0 0 6881 +reportsyahoo 1 6.957497 0 0 6882 +businessand 1 6.957497 0 0 6883 +economi 1 6.957497 0 0 6884 +systemsuniqu 1 6.957497 0 0 6885 +resultedin 1 6.957497 0 0 6886 +contextof 1 6.957497 0 0 6887 +understandingof 1 6.957497 0 0 6888 +addressproblem 1 6.957497 0 0 6889 +theissu 1 6.957497 0 0 6890 +addressedin 1 6.957497 0 0 6891 +occasionallyread 1 6.957497 0 0 6892 +understandingcurr 1 6.957497 0 0 6893 +reportspoint 1 6.957497 0 0 6894 +rosterhandout 1 6.957497 0 0 6895 +sslprotocol 1 6.957497 0 0 6896 +proofsketch 1 6.957497 0 0 6897 +fantasm 1 6.957497 0 0 6898 +p_global 1 6.957497 0 0 6899 +bonu 1 6.957497 0 0 6900 +electronc 1 6.957497 0 0 6901 +macsbug 1 6.957497 0 0 6902 +electoron 1 6.957497 0 0 6903 +edum 1 6.957497 0 0 6904 +withdraw 1 6.957497 0 0 6905 +rightmost 1 6.957497 0 0 6906 +procudur 1 6.957497 0 0 6907 +powermac 1 6.957497 0 0 6908 +quadra 1 6.957497 0 0 6909 +onmon 1 6.957497 0 0 6910 +cynthia 1 6.957497 0 0 6911 +deepa 1 6.957497 0 0 6912 +ramani 1 6.957497 0 0 6913 +dparam 1 6.957497 0 0 6914 +eduw 1 6.957497 0 0 6915 +eduf 1 6.957497 0 0 6916 +refund 1 6.957497 0 0 6917 +extenu 1 6.957497 0 0 6918 +boxin 1 6.957497 0 0 6919 +endia 1 6.957497 0 0 6920 +func 1 6.957497 0 0 6921 +practiv 1 6.957497 0 0 6922 +questionair 1 6.957497 0 0 6923 +brett 1 6.957497 0 0 6924 +subroutine_fil 1 6.957497 0 0 6925 +exception_fil 1 6.957497 0 0 6926 +avali 1 6.957497 0 0 6927 +turbo 1 6.957497 0 0 6928 +andther 1 6.957497 0 0 6929 +unabl 1 6.957497 0 0 6930 +luckfor 1 6.957497 0 0 6931 +dependon 1 6.957497 0 0 6932 +availib 1 6.957497 0 0 6933 +uptoth 1 6.957497 0 0 6934 +resolutio 1 6.957497 0 0 6935 +porterquest 1 6.957497 0 0 6936 +thecont 1 6.957497 0 0 6937 +atugl 1 6.957497 0 0 6938 +sostai 1 6.957497 0 0 6939 +iinstructorbruc 1 6.957497 0 0 6940 +tasoffic 1 6.957497 0 0 6941 +hourslab 1 6.957497 0 0 6942 +descriptionclass 1 6.957497 0 0 6943 +scheduleclass 1 6.957497 0 0 6944 +articlesclass 1 6.957497 0 0 6945 +newsgroupprogram 1 6.957497 0 0 6946 +pascaltutori 1 6.957497 0 0 6947 +faqyou 1 6.957497 0 0 6948 +zipe 1 6.957497 0 0 6949 +isocomp 1 6.957497 0 0 6950 +maccomp 1 6.957497 0 0 6951 +borlandcomp 1 6.957497 0 0 6952 +misccomp 1 6.957497 0 0 6953 +miscfj 1 6.957497 0 0 6954 +serverto 1 6.957497 0 0 6955 +importantstuff 1 6.957497 0 0 6956 +graphicsspr 1 6.957497 0 0 6957 +oneor 1 6.957497 0 0 6958 +examwil 1 6.957497 0 0 6959 +bothmai 1 6.957497 0 0 6960 +willcount 1 6.957497 0 0 6961 +reinstal 1 6.957497 0 0 6962 +gcomput 1 6.957497 0 0 6963 +graphicsfal 1 6.957497 0 0 6964 +libtcl 1 6.957497 0 0 6965 +libtk 1 6.957497 0 0 6966 +tclsh 1 6.957497 0 0 6967 +ciphertext 1 6.957497 0 0 6968 +digram 1 6.957497 0 0 6969 +fifo 1 6.957497 0 0 6970 +ispr 1 6.957497 0 0 6971 +alvisiteach 1 6.957497 0 0 6972 +joshicont 1 6.957497 0 0 6973 +stafflorenzo 1 6.957497 0 0 6974 +mechanicsi 1 6.957497 0 0 6975 +remaind 1 6.957497 0 0 6976 +classat 1 6.957497 0 0 6977 +isutexa 1 6.957497 0 0 6978 +mullend 1 6.957497 0 0 6979 +acmpress 1 6.957497 0 0 6980 +contentc 1 6.957497 0 0 6981 +tomorrow 1 6.957497 0 0 6982 +messagedeliveri 1 6.957497 0 0 6983 +backupapproach 1 6.957497 0 0 6984 +thepresent 1 6.957497 0 0 6985 +exemplifi 1 6.957497 0 0 6986 +principleshav 1 6.957497 0 0 6987 +meor 1 6.957497 0 0 6988 +apresent 1 6.957497 0 0 6989 +networksgradingther 1 6.957497 0 0 6990 +begrad 1 6.957497 0 0 6991 +onbehalf 1 6.957497 0 0 6992 +willrec 1 6.957497 0 0 6993 +ispermit 1 6.957497 0 0 6994 +acollabor 1 6.957497 0 0 6995 +forgrad 1 6.957497 0 0 6996 +collaborationswil 1 6.957497 0 0 6997 +nocollabor 1 6.957497 0 0 6998 +issuesthat 1 6.957497 0 0 6999 +bedistribut 1 6.957497 0 0 7000 +tocomplet 1 6.957497 0 0 7001 +twolectur 1 6.957497 0 0 7002 +choosethi 1 6.957497 0 0 7003 +asingl 1 6.957497 0 0 7004 +warmli 1 6.957497 0 0 7005 +toconsid 1 6.957497 0 0 7006 +excellentopportun 1 6.957497 0 0 7007 +setsin 1 6.957497 0 0 7008 +shouldconform 1 6.957497 0 0 7009 +synonym 1 6.957497 0 0 7010 +isrequir 1 6.957497 0 0 7011 +thatmak 1 6.957497 0 0 7012 +insuffici 1 6.957497 0 0 7013 +ofcorrect 1 6.957497 0 0 7014 +thetextbook 1 6.957497 0 0 7015 +asnapshot 1 6.957497 0 0 7016 +theprotocol 1 6.957497 0 0 7017 +atmost 1 6.957497 0 0 7018 +mattern 1 6.957497 0 0 7019 +thatcontain 1 6.957497 0 0 7020 +resist 1 6.957497 0 0 7021 +thetempt 1 6.957497 0 0 7022 +monitorprocess 1 6.957497 0 0 7023 +basedsnapshot 1 6.957497 0 0 7024 +nowonlin 1 6.957497 0 0 7025 +filedescrib 1 6.957497 0 0 7026 +examth 1 6.957497 0 0 7027 +fridaymai 1 6.957497 0 0 7028 +thepostscript 1 6.957497 0 0 7029 +freeto 1 6.957497 0 0 7030 +yoursuggest 1 6.957497 0 0 7031 +edurajeev 1 6.957497 0 0 7032 +bywil 1 6.957497 0 0 7033 +linea 1 6.957497 0 0 7034 +inhomework 1 6.957497 0 0 7035 +crude 1 6.957497 0 0 7036 +newsgrouputexa 1 6.957497 0 0 7037 +takesplac 1 6.957497 0 0 7038 +pascalprogramm 1 6.957497 0 0 7039 +viewinginform 1 6.957497 0 0 7040 +projecthow 1 6.957497 0 0 7041 +examand 1 6.957497 0 0 7042 +stroustrup 1 6.957497 0 0 7043 +libg 1 6.957497 0 0 7044 +descriptionc 1 6.957497 0 0 7045 +programminglast 1 6.957497 0 0 7046 +rajaram 1 6.957497 0 0 7047 +lavendercours 1 6.957497 0 0 7048 +syllabusannouncementslectur 1 6.957497 0 0 7049 +solutionsprogram 1 6.957497 0 0 7050 +assignmentsgnu 1 6.957497 0 0 7051 +manualsstandard 1 6.957497 0 0 7052 +codesocket 1 6.957497 0 0 7053 +manualdescript 1 6.957497 0 0 7054 +anintroductori 1 6.957497 0 0 7055 +reusablepattern 1 6.957497 0 0 7056 +typehierarchi 1 6.957497 0 0 7057 +professionallyus 1 6.957497 0 0 7058 +horstmann 1 6.957497 0 0 7059 +cargil 1 6.957497 0 0 7060 +lomow 1 6.957497 0 0 7061 +coplien 1 6.957497 0 0 7062 +idiom 1 6.957497 0 0 7063 +plauger 1 6.957497 0 0 7064 +vlissid 1 6.957497 0 0 7065 +announcementsabout 1 6.957497 0 0 7066 +linediscuss 1 6.957497 0 0 7067 +lavendery 1 6.957497 0 0 7068 +helpjava 1 6.957497 0 0 7069 +advocaci 1 6.957497 0 0 7070 +oopth 1 6.957497 0 0 7071 +objectspac 1 6.957497 0 0 7072 +libstdc 1 6.957497 0 0 7073 +mitgnu 1 6.957497 0 0 7074 +cygnusgnu 1 6.957497 0 0 7075 +ftpobject 1 6.957497 0 0 7076 +developmentindex 1 6.957497 0 0 7077 +librariesth 1 6.957497 0 0 7078 +libraryindex 1 6.957497 0 0 7079 +talig 1 6.957497 0 0 7080 +frameworkjava 1 6.957497 0 0 7081 +registri 1 6.957497 0 0 7082 +espresso 1 6.957497 0 0 7083 +kafura 1 6.957497 0 0 7084 +techdoug 1 6.957497 0 0 7085 +schmidt 1 6.957497 0 0 7086 +universitydoug 1 6.957497 0 0 7087 +sunyintroductori 1 6.957497 0 0 7088 +groningen 1 6.957497 0 0 7089 +compilerscst 1 6.957497 0 0 7090 +posix 1 6.957497 0 0 7091 +hierarchieslast 1 6.957497 0 0 7092 +linlin 1 6.957497 0 0 7093 +csintroduct 1 6.957497 0 0 7094 +thot 1 6.957497 0 0 7095 +systemsfil 1 6.957497 0 0 7096 +systemstopolog 1 6.957497 0 0 7097 +systemselectron 1 6.957497 0 0 7098 +commenrcefailur 1 6.957497 0 0 7099 +detectorsdistribut 1 6.957497 0 0 7100 +objectsconsistencysecuregroup 1 6.957497 0 0 7101 +communicationlanguag 1 6.957497 0 0 7102 +dsmmobil 1 6.957497 0 0 7103 +databasesprof 1 6.957497 0 0 7104 +mirankernew 1 6.957497 0 0 7105 +seminarschedul 1 6.957497 0 0 7106 +overviewtentativeread 1 6.957497 0 0 7107 +homeworkproject 1 6.957497 0 0 7108 +placespr 1 6.957497 0 0 7109 +mooneyteach 1 6.957497 0 0 7110 +ramachandrantim 1 6.957497 0 0 7111 +alsout 1 6.957497 0 0 7112 +textparadigm 1 6.957497 0 0 7113 +lispassignmentsse 1 6.957497 0 0 7114 +textmachinelearninglectur 1 6.957497 0 0 7115 +learningassignmentsse 1 6.957497 0 0 7116 +sciencec 1 6.957497 0 0 7117 +treesassign 1 6.957497 0 0 7118 +atleast 1 6.957497 0 0 7119 +programmingcours 1 6.957497 0 0 7120 +precalculu 1 6.957497 0 0 7121 +theschem 1 6.957497 0 0 7122 +coursesand 1 6.957497 0 0 7123 +learninga 1 6.957497 0 0 7124 +tutorcopi 1 6.957497 0 0 7125 +pcassign 1 6.957497 0 0 7126 +simulationassign 1 6.957497 0 0 7127 +webassign 1 6.957497 0 0 7128 +schemeassign 1 6.957497 0 0 7129 +gamblingassign 1 6.957497 0 0 7130 +graphicsassign 1 6.957497 0 0 7131 +manipulationstudi 1 6.957497 0 0 7132 +vocabulari 1 6.957497 0 0 7133 +thickensassign 1 6.957497 0 0 7134 +huntassign 1 6.957497 0 0 7135 +algebraassign 1 6.957497 0 0 7136 +matricesstudi 1 6.957497 0 0 7137 +unparsingassign 1 6.957497 0 0 7138 +translationstudi 1 6.957497 0 0 7139 +descriptionsprogram 1 6.957497 0 0 7140 +codei 1 6.957497 0 0 7141 +dedicatetheir 1 6.957497 0 0 7142 +guidegordon 1 6.957497 0 0 7143 +intelligenceartifici 1 6.957497 0 0 7144 +todupl 1 6.957497 0 0 7145 +connectspercept 1 6.957497 0 0 7146 +andknowledg 1 6.957497 0 0 7147 +withbrief 1 6.957497 0 0 7148 +descriptionsmidterm 1 6.957497 0 0 7149 +guidepred 1 6.957497 0 0 7150 +problemsnot 1 6.957497 0 0 7151 +braingordon 1 6.957497 0 0 7152 +programmingautomat 1 6.957497 0 0 7153 +programsfrom 1 6.957497 0 0 7154 +illustrateth 1 6.957497 0 0 7155 +requirelearn 1 6.957497 0 0 7156 +partof 1 6.957497 0 0 7157 +syllabusbibliographyassign 1 6.957497 0 0 7158 +handpattern 1 6.957497 0 0 7159 +matchingobject 1 6.957497 0 0 7160 +programmingintroduct 1 6.957497 0 0 7161 +glispview 1 6.957497 0 0 7162 +programminggordon 1 6.957497 0 0 7163 +symbiot 1 6.957497 0 0 7164 +pram 1 6.957497 0 0 7165 +programmingoctob 1 6.957497 0 0 7166 +wilsonextens 1 6.957497 0 0 7167 +ramachandranth 1 6.957497 0 0 7168 +theapplic 1 6.957497 0 0 7169 +sane 1 6.957497 0 0 7170 +datasourc 1 6.957497 0 0 7171 +theabstract 1 6.957497 0 0 7172 +sciencecst 1 6.957497 0 0 7173 +apass 1 6.957497 0 0 7174 +beregist 1 6.957497 0 0 7175 +schedulespeakertitleseptemb 1 6.957497 0 0 7176 +mirankeralamo 1 6.957497 0 0 7177 +warehouseseptemb 1 6.957497 0 0 7178 +kuipersth 1 6.957497 0 0 7179 +humanand 1 6.957497 0 0 7180 +mapsseptemb 1 6.957497 0 0 7181 +blumofecilk 1 6.957497 0 0 7182 +reliableparallel 1 6.957497 0 0 7183 +workstationsseptemb 1 6.957497 0 0 7184 +miikkulainenlearn 1 6.957497 0 0 7185 +throughsymbiot 1 6.957497 0 0 7186 +networksoctob 1 6.957497 0 0 7187 +lifschitzmathemat 1 6.957497 0 0 7188 +reflectionoctob 1 6.957497 0 0 7189 +mooneylearn 1 6.957497 0 0 7190 +usinginduct 1 6.957497 0 0 7191 +dahlindistribut 1 6.957497 0 0 7192 +internetsnovemb 1 6.957497 0 0 7193 +novaksoftwar 1 6.957497 0 0 7194 +genericprocedur 1 6.957497 0 0 7195 +viewsnovemb 1 6.957497 0 0 7196 +parallelalgorithmsnovemb 1 6.957497 0 0 7197 +alvisilighweight 1 6.957497 0 0 7198 +tolerancenovemb 1 6.957497 0 0 7199 +linadapt 1 6.957497 0 0 7200 +optimizationdecemb 1 6.957497 0 0 7201 +plaxtonanalysi 1 6.957497 0 0 7202 +algorithmslighweight 1 6.957497 0 0 7203 +tolerancelorenzo 1 6.957497 0 0 7204 +alvisidistribut 1 6.957497 0 0 7205 +confin 1 6.957497 0 0 7206 +revolution 1 6.957497 0 0 7207 +beyondth 1 6.957497 0 0 7208 +toleranttechniqu 1 6.957497 0 0 7209 +willceas 1 6.957497 0 0 7210 +exot 1 6.957497 0 0 7211 +distributedinform 1 6.957497 0 0 7212 +acompetit 1 6.957497 0 0 7213 +criticalinform 1 6.957497 0 0 7214 +engineerfault 1 6.957497 0 0 7215 +negligibleimpact 1 6.957497 0 0 7216 +communicatethrough 1 6.957497 0 0 7217 +onnetwork 1 6.957497 0 0 7218 +workstationsrobert 1 6.957497 0 0 7219 +blumofethi 1 6.957497 0 0 7220 +pronouncedsilk 1 6.957497 0 0 7221 +andcilk 1 6.957497 0 0 7222 +functionalsubset 1 6.957497 0 0 7223 +providesadapt 1 6.957497 0 0 7224 +tranpar 1 6.957497 0 0 7225 +touser 1 6.957497 0 0 7226 +shrinkdynam 1 6.957497 0 0 7227 +cilkprogram 1 6.957497 0 0 7228 +workstationscrash 1 6.957497 0 0 7229 +andrecov 1 6.957497 0 0 7230 +livedemonstr 1 6.957497 0 0 7231 +internetsmik 1 6.957497 0 0 7232 +dahlinthi 1 6.957497 0 0 7233 +applicationsmotiv 1 6.957497 0 0 7234 +inclust 1 6.957497 0 0 7235 +servicei 1 6.957497 0 0 7236 +nodesto 1 6.957497 0 0 7237 +centralserv 1 6.957497 0 0 7238 +goodperform 1 6.957497 0 0 7239 +networkperform 1 6.957497 0 0 7240 +projectwil 1 6.957497 0 0 7241 +mapsbenjamin 1 6.957497 0 0 7242 +kuipershuman 1 6.957497 0 0 7243 +forlarg 1 6.957497 0 0 7244 +ontolog 1 6.957497 0 0 7245 +varietyof 1 6.957497 0 0 7246 +cast 1 6.957497 0 0 7247 +diverserepresent 1 6.957497 0 0 7248 +spatialsemant 1 6.957497 0 0 7249 +andassumpt 1 6.957497 0 0 7250 +thecontrol 1 6.957497 0 0 7251 +beabstract 1 6.957497 0 0 7252 +givinga 1 6.957497 0 0 7253 +causalgraph 1 6.957497 0 0 7254 +topologicalnetwork 1 6.957497 0 0 7255 +occupancygrid 1 6.957497 0 0 7256 +theframework 1 6.957497 0 0 7257 +ofglob 1 6.957497 0 0 7258 +programmingvladimir 1 6.957497 0 0 7259 +lifschitzlog 1 6.957497 0 0 7260 +functionalprogram 1 6.957497 0 0 7261 +notne 1 6.957497 0 0 7262 +itcan 1 6.957497 0 0 7263 +executedus 1 6.957497 0 0 7264 +withdefin 1 6.957497 0 0 7265 +thereason 1 6.957497 0 0 7266 +thesound 1 6.957497 0 0 7267 +optimizationcalvin 1 6.957497 0 0 7268 +linthi 1 6.957497 0 0 7269 +andtheir 1 6.957497 0 0 7270 +differenthardwar 1 6.957497 0 0 7271 +efficientand 1 6.957497 0 0 7272 +suchlibrari 1 6.957497 0 0 7273 +weexplain 1 6.957497 0 0 7274 +networksristo 1 6.957497 0 0 7275 +miikkulainena 1 6.957497 0 0 7276 +neuronsthrough 1 6.957497 0 0 7277 +anddiscourag 1 6.957497 0 0 7278 +suboptim 1 6.957497 0 0 7279 +toextract 1 6.957497 0 0 7280 +sequentialdecis 1 6.957497 0 0 7281 +warehousedan 1 6.957497 0 0 7282 +mirankerth 1 6.957497 0 0 7283 +andint 1 6.957497 0 0 7284 +theuser 1 6.957497 0 0 7285 +byqueri 1 6.957497 0 0 7286 +interfacethat 1 6.957497 0 0 7287 +ofabstract 1 6.957497 0 0 7288 +clever 1 6.957497 0 0 7289 +anddata 1 6.957497 0 0 7290 +activedatabas 1 6.957497 0 0 7291 +constructedus 1 6.957497 0 0 7292 +databasefacil 1 6.957497 0 0 7293 +thealamo 1 6.957497 0 0 7294 +dataintegr 1 6.957497 0 0 7295 +elementsof 1 6.957497 0 0 7296 +furthercomposit 1 6.957497 0 0 7297 +answerhigh 1 6.957497 0 0 7298 +logicprogrammingraymond 1 6.957497 0 0 7299 +mooneyinduct 1 6.957497 0 0 7300 +learningprolog 1 6.957497 0 0 7301 +offirst 1 6.957497 0 0 7302 +learningmethod 1 6.957497 0 0 7303 +areappli 1 6.957497 0 0 7304 +believethi 1 6.957497 0 0 7305 +richer 1 6.957497 0 0 7306 +parsersfrom 1 6.957497 0 0 7307 +superior 1 6.957497 0 0 7308 +onsever 1 6.957497 0 0 7309 +networkmethod 1 6.957497 0 0 7310 +ati 1 6.957497 0 0 7311 +ofairlin 1 6.957497 0 0 7312 +automaticallydevelop 1 6.957497 0 0 7313 +englishdatabas 1 6.957497 0 0 7314 +moreaccur 1 6.957497 0 0 7315 +smallgeograph 1 6.957497 0 0 7316 +tens 1 6.957497 0 0 7317 +treemethod 1 6.957497 0 0 7318 +throughviewsgordon 1 6.957497 0 0 7319 +toachiev 1 6.957497 0 0 7320 +thesoftwar 1 6.957497 0 0 7321 +typesus 1 6.957497 0 0 7322 +specifyview 1 6.957497 0 0 7323 +adesir 1 6.957497 0 0 7324 +algorithmsgreg 1 6.957497 0 0 7325 +plaxtona 1 6.957497 0 0 7326 +forspecif 1 6.957497 0 0 7327 +notuncommon 1 6.957497 0 0 7328 +havelittl 1 6.957497 0 0 7329 +suchpap 1 6.957497 0 0 7330 +gapsinher 1 6.957497 0 0 7331 +inadequatefor 1 6.957497 0 0 7332 +straightforwardalgorithm 1 6.957497 0 0 7333 +theconceptu 1 6.957497 0 0 7334 +trivialclass 1 6.957497 0 0 7335 +blum 1 6.957497 0 0 7336 +pratt 1 6.957497 0 0 7337 +algorithmsvijaya 1 6.957497 0 0 7338 +forcombinatori 1 6.957497 0 0 7339 +recentyear 1 6.957497 0 0 7340 +willdescrib 1 6.957497 0 0 7341 +thesealgorithm 1 6.957497 0 0 7342 +thendescrib 1 6.957497 0 0 7343 +wepropos 1 6.957497 0 0 7344 +parallelshar 1 6.957497 0 0 7345 +reflectionpaul 1 6.957497 0 0 7346 +addnew 1 6.957497 0 0 7347 +structureaccordingli 1 6.957497 0 0 7348 +adapat 1 6.957497 0 0 7349 +extensiblelanguag 1 6.957497 0 0 7350 +crypt 1 6.957497 0 0 7351 +multiplemap 1 6.957497 0 0 7352 +themap 1 6.957497 0 0 7353 +decrypt 1 6.957497 0 0 7354 +solutionsread 1 6.957497 0 0 7355 +networksfal 1 6.957497 0 0 7356 +fausett 1 6.957497 0 0 7357 +englewood 1 6.957497 0 0 7358 +prenticehal 1 6.957497 0 0 7359 +schedulehomework 1 6.957497 0 0 7360 +assignmentsexamsclass 1 6.957497 0 0 7361 +resourcesa 1 6.957497 0 0 7362 +versionof 1 6.957497 0 0 7363 +syllabusristo 1 6.957497 0 0 7364 +asher 1 6.957497 0 0 7365 +waggen 1 6.957497 0 0 7366 +nasher 1 6.957497 0 0 7367 +berti 1 6.957497 0 0 7368 +posner 1 6.957497 0 0 7369 +mitpress 1 6.957497 0 0 7370 +withanoth 1 6.957497 0 0 7371 +alsorequir 1 6.957497 0 0 7372 +descriptioncours 1 6.957497 0 0 7373 +schedulediscuss 1 6.957497 0 0 7374 +notesperson 1 6.957497 0 0 7375 +adscollabor 1 6.957497 0 0 7376 +paperclass 1 6.957497 0 0 7377 +resourcesstud 1 6.957497 0 0 7378 +questionnaireus 1 6.957497 0 0 7379 +sciencefaculti 1 6.957497 0 0 7380 +thenewsgroup 1 6.957497 0 0 7381 +thetest 1 6.957497 0 0 7382 +programmingcsp 1 6.957497 0 0 7383 +pascalintroductori 1 6.957497 0 0 7384 +programminginstructor 1 6.957497 0 0 7385 +gallagherwelcom 1 6.957497 0 0 7386 +cspi 1 6.957497 0 0 7387 +andso 1 6.957497 0 0 7388 +otherdeadlin 1 6.957497 0 0 7389 +thesyllabu 1 6.957497 0 0 7390 +jenn 1 6.957497 0 0 7391 +takethi 1 6.957497 0 0 7392 +courseeach 1 6.957497 0 0 7393 +howwel 1 6.957497 0 0 7394 +deadlineto 1 6.957497 0 0 7395 +thursdayeven 1 6.957497 0 0 7396 +intosmal 1 6.957497 0 0 7397 +ateach 1 6.957497 0 0 7398 +thatlaboratori 1 6.957497 0 0 7399 +thatgrad 1 6.957497 0 0 7400 +thattest 1 6.957497 0 0 7401 +limitedand 1 6.957497 0 0 7402 +foravail 1 6.957497 0 0 7403 +proctor 1 6.957497 0 0 7404 +hoursbefor 1 6.957497 0 0 7405 +andquizz 1 6.957497 0 0 7406 +betaken 1 6.957497 0 0 7407 +prescrib 1 6.957497 0 0 7408 +openedfor 1 6.957497 0 0 7409 +yourstud 1 6.957497 0 0 7410 +orsak 1 6.957497 0 0 7411 +weem 1 6.957497 0 0 7412 +liabl 1 6.957497 0 0 7413 +ret_into 1 6.957497 0 0 7414 +mdb 1 6.957497 0 0 7415 +diff 1 6.957497 0 0 7416 +medec 1 6.957497 0 0 7417 +individualfaculti 1 6.957497 0 0 7418 +contactgloria 1 6.957497 0 0 7419 +dalei 1 6.957497 0 0 7420 +formultiprogram 1 6.957497 0 0 7421 +karshmer 1 6.957497 0 0 7422 +nehmer 1 6.957497 0 0 7423 +schroeder 1 6.957497 0 0 7424 +needham 1 6.957497 0 0 7425 +trigger 1 6.957497 0 0 7426 +prerequisitegradu 1 6.957497 0 0 7427 +systemssuch 1 6.957497 0 0 7428 +materialin 1 6.957497 0 0 7429 +andsilberschatz 1 6.957497 0 0 7430 +coveringboth 1 6.957497 0 0 7431 +anemphasi 1 6.957497 0 0 7432 +anddiscuss 1 6.957497 0 0 7433 +aterm 1 6.957497 0 0 7434 +systemsfernando 1 6.957497 0 0 7435 +corbato 1 6.957497 0 0 7436 +marjori 1 6.957497 0 0 7437 +merwin 1 6.957497 0 0 7438 +daggett 1 6.957497 0 0 7439 +brinch 1 6.957497 0 0 7440 +clingen 1 6.957497 0 0 7441 +tannenbaum 1 6.957497 0 0 7442 +andexampl 1 6.957497 0 0 7443 +managementa 1 6.957497 0 0 7444 +forshar 1 6.957497 0 0 7445 +schedulingr 1 6.957497 0 0 7446 +communicationj 1 6.957497 0 0 7447 +birel 1 6.957497 0 0 7448 +rpc 1 6.957497 0 0 7449 +lightweightremot 1 6.957497 0 0 7450 +migrationf 1 6.957497 0 0 7451 +dougli 1 6.957497 0 0 7452 +spriteoper 1 6.957497 0 0 7453 +theimer 1 6.957497 0 0 7454 +lantz 1 6.957497 0 0 7455 +preemptabl 1 6.957497 0 0 7456 +tolerancef 1 6.957497 0 0 7457 +sand 1 6.957497 0 0 7458 +sandberg 1 6.957497 0 0 7459 +kleiman 1 6.957497 0 0 7460 +ofsun 1 6.957497 0 0 7461 +mckusick 1 6.957497 0 0 7462 +leffler 1 6.957497 0 0 7463 +fabri 1 6.957497 0 0 7464 +fastfil 1 6.957497 0 0 7465 +alog 1 6.957497 0 0 7466 +systemsm 1 6.957497 0 0 7467 +gifford 1 6.957497 0 0 7468 +securityr 1 6.957497 0 0 7469 +inlarg 1 6.957497 0 0 7470 +butler 1 6.957497 0 0 7471 +lampson 1 6.957497 0 0 7472 +accetta 1 6.957497 0 0 7473 +boloski 1 6.957497 0 0 7474 +tevanian 1 6.957497 0 0 7475 +systemsh 1 6.957497 0 0 7476 +kopetz 1 6.957497 0 0 7477 +timesystem 1 6.957497 0 0 7478 +layland 1 6.957497 0 0 7479 +ramamritham 1 6.957497 0 0 7480 +stankov 1 6.957497 0 0 7481 +schedulingund 1 6.957497 0 0 7482 +mercer 1 6.957497 0 0 7483 +computingb 1 6.957497 0 0 7484 +badrinath 1 6.957497 0 0 7485 +acharya 1 6.957497 0 0 7486 +imielinski 1 6.957497 0 0 7487 +satyanarayanan 1 6.957497 0 0 7488 +okasaki 1 6.957497 0 0 7489 +siegel 1 6.957497 0 0 7490 +coda 1 6.957497 0 0 7491 +distributedworkst 1 6.957497 0 0 7492 +steinmetz 1 6.957497 0 0 7493 +sitaram 1 6.957497 0 0 7494 +coulson 1 6.957497 0 0 7495 +descriptiongener 1 6.957497 0 0 7496 +boththeoret 1 6.957497 0 0 7497 +systemsupport 1 6.957497 0 0 7498 +transportprotocol 1 6.957497 0 0 7499 +designissu 1 6.957497 0 0 7500 +textbooka 1 6.957497 0 0 7501 +requirementsth 1 6.957497 0 0 7502 +relatedpap 1 6.957497 0 0 7503 +tounderstand 1 6.957497 0 0 7504 +asemest 1 6.957497 0 0 7505 +vintuesdai 1 6.957497 0 0 7506 +assistantmr 1 6.957497 0 0 7507 +eduread 1 6.957497 0 0 7508 +cntain 1 6.957497 0 0 7509 +theread 1 6.957497 0 0 7510 +speedwai 1 6.957497 0 0 7511 +dobi 1 6.957497 0 0 7512 +mall 1 6.957497 0 0 7513 +callthem 1 6.957497 0 0 7514 +compressionr 1 6.957497 0 0 7515 +wallac 1 6.957497 0 0 7516 +gall 1 6.957497 0 0 7517 +anastassi 1 6.957497 0 0 7518 +digitaltelevis 1 6.957497 0 0 7519 +serversoverview 1 6.957497 0 0 7520 +serverdesign 1 6.957497 0 0 7521 +chiueh 1 6.957497 0 0 7522 +groupedsweep 1 6.957497 0 0 7523 +ofthird 1 6.957497 0 0 7524 +narasimha 1 6.957497 0 0 7525 +wylli 1 6.957497 0 0 7526 +admissioncontrol 1 6.957497 0 0 7527 +designinglarg 1 6.957497 0 0 7528 +inmultimedia 1 6.957497 0 0 7529 +interactivevideo 1 6.957497 0 0 7530 +playout 1 6.957497 0 0 7531 +shahabuddin 1 6.957497 0 0 7532 +foran 1 6.957497 0 0 7533 +demandvideo 1 6.957497 0 0 7534 +papadimitri 1 6.957497 0 0 7535 +ramanathan 1 6.957497 0 0 7536 +informationcach 1 6.957497 0 0 7537 +homeentertain 1 6.957497 0 0 7538 +multimedianetwork 1 6.957497 0 0 7539 +ferrari 1 6.957497 0 0 7540 +channelestablish 1 6.957497 0 0 7541 +areasin 1 6.957497 0 0 7542 +servicedisciplin 1 6.957497 0 0 7543 +workshopon 1 6.957497 0 0 7544 +losslesssmooth 1 6.957497 0 0 7545 +salehi 1 6.957497 0 0 7546 +kuros 1 6.957497 0 0 7547 +towslei 1 6.957497 0 0 7548 +storedvideo 1 6.957497 0 0 7549 +requirementsthrough 1 6.957497 0 0 7550 +grossglaus 1 6.957497 0 0 7551 +rcbr 1 6.957497 0 0 7552 +efficientservic 1 6.957497 0 0 7553 +kanakia 1 6.957497 0 0 7554 +reibman 1 6.957497 0 0 7555 +congestioncontrol 1 6.957497 0 0 7556 +tennenhous 1 6.957497 0 0 7557 +newgener 1 6.957497 0 0 7558 +hutchison 1 6.957497 0 0 7559 +servicearchitectur 1 6.957497 0 0 7560 +turner 1 6.957497 0 0 7561 +reliablemulticast 1 6.957497 0 0 7562 +levelfram 1 6.957497 0 0 7563 +deffner 1 6.957497 0 0 7564 +schulzrinn 1 6.957497 0 0 7565 +blakowski 1 6.957497 0 0 7566 +onselect 1 6.957497 0 0 7567 +januaryoper 1 6.957497 0 0 7568 +multimediag 1 6.957497 0 0 7569 +robin 1 6.957497 0 0 7570 +blair 1 6.957497 0 0 7571 +papathoma 1 6.957497 0 0 7572 +choru 1 6.957497 0 0 7573 +druschel 1 6.957497 0 0 7574 +abbott 1 6.957497 0 0 7575 +pagel 1 6.957497 0 0 7576 +systemssupport 1 6.957497 0 0 7577 +conferencingh 1 6.957497 0 0 7578 +venkatrangan 1 6.957497 0 0 7579 +packetvideo 1 6.957497 0 0 7580 +crowcroft 1 6.957497 0 0 7581 +warldersburg 1 6.957497 0 0 7582 +synopsisthi 1 6.957497 0 0 7583 +bediscuss 1 6.957497 0 0 7584 +andmultimedia 1 6.957497 0 0 7585 +multimediadatabas 1 6.957497 0 0 7586 +determinedbas 1 6.957497 0 0 7587 +orcarri 1 6.957497 0 0 7588 +hoursfridai 1 6.957497 0 0 7589 +flexibleframework 1 6.957497 0 0 7590 +handlei 1 6.957497 0 0 7591 +wakeman 1 6.957497 0 0 7592 +controlchannel 1 6.957497 0 0 7593 +cccp 1 6.957497 0 0 7594 +conferencecontrol 1 6.957497 0 0 7595 +gajewska 1 6.957497 0 0 7596 +manass 1 6.957497 0 0 7597 +argo 1 6.957497 0 0 7598 +systemfor 1 6.957497 0 0 7599 +gong 1 6.957497 0 0 7600 +multipoint 1 6.957497 0 0 7601 +basedmultimedia 1 6.957497 0 0 7602 +ieeecomput 1 6.957497 0 0 7603 +datagraminternetwork 1 6.957497 0 0 7604 +ballardi 1 6.957497 0 0 7605 +thyagarajan 1 6.957497 0 0 7606 +widyono 1 6.957497 0 0 7607 +msthesi 1 6.957497 0 0 7608 +kompella 1 6.957497 0 0 7609 +pasqual 1 6.957497 0 0 7610 +polyzo 1 6.957497 0 0 7611 +multimediacommun 1 6.957497 0 0 7612 +weightsess 1 6.957497 0 0 7613 +ofacm 1 6.957497 0 0 7614 +holbrook 1 6.957497 0 0 7615 +fordistribut 1 6.957497 0 0 7616 +herzog 1 6.957497 0 0 7617 +estrin 1 6.957497 0 0 7618 +timecommun 1 6.957497 0 0 7619 +servicesj 1 6.957497 0 0 7620 +guyton 1 6.957497 0 0 7621 +schwartz 1 6.957497 0 0 7622 +mogul 1 6.957497 0 0 7623 +forpersist 1 6.957497 0 0 7624 +supportc 1 6.957497 0 0 7625 +lotteri 1 6.957497 0 0 7626 +flexibleproport 1 6.957497 0 0 7627 +mangement 1 6.957497 0 0 7628 +strideschedul 1 6.957497 0 0 7629 +golestani 1 6.957497 0 0 7630 +speedappl 1 6.957497 0 0 7631 +timeproduc 1 6.957497 0 0 7632 +ofeffici 1 6.957497 0 0 7633 +sigapp 1 6.957497 0 0 7634 +intim 1 6.957497 0 0 7635 +databasesw 1 6.957497 0 0 7636 +contentus 1 6.957497 0 0 7637 +cawkel 1 6.957497 0 0 7638 +weymouth 1 6.957497 0 0 7639 +vimsi 1 6.957497 0 0 7640 +submatrix 1 6.957497 0 0 7641 +ramachandranuniqu 1 6.957497 0 0 7642 +onsigma 1 6.957497 0 0 7643 +oroth 1 6.957497 0 0 7644 +isther 1 6.957497 0 0 7645 +paragraphof 1 6.957497 0 0 7646 +containdistinct 1 6.957497 0 0 7647 +cancontain 1 6.957497 0 0 7648 +unclear 1 6.957497 0 0 7649 +somek 1 6.957497 0 0 7650 +bepost 1 6.957497 0 0 7651 +youhav 1 6.957497 0 0 7652 +yourbest 1 6.957497 0 0 7653 +judgment 1 6.957497 0 0 7654 +meroon 1 6.957497 0 0 7655 +runschem 1 6.957497 0 0 7656 +orani 1 6.957497 0 0 7657 +andinstal 1 6.957497 0 0 7658 +itfrom 1 6.957497 0 0 7659 +friendlier 1 6.957497 0 0 7660 +fornewbi 1 6.957497 0 0 7661 +gettinggambit 1 6.957497 0 0 7662 +bestschem 1 6.957497 0 0 7663 +guil 1 6.957497 0 0 7664 +mzscheme 1 6.957497 0 0 7665 +doingobject 1 6.957497 0 0 7666 +tous 1 6.957497 0 0 7667 +freeimplement 1 6.957497 0 0 7668 +getinterest 1 6.957497 0 0 7669 +paulwilson 1 6.957497 0 0 7670 +yourbrows 1 6.957497 0 0 7671 +mostrec 1 6.957497 0 0 7672 +ondeclar 1 6.957497 0 0 7673 +arereason 1 6.957497 0 0 7674 +willchang 1 6.957497 0 0 7675 +islik 1 6.957497 0 0 7676 +adventur 1 6.957497 0 0 7677 +usinga 1 6.957497 0 0 7678 +throughchapt 1 6.957497 0 0 7679 +sanoth 1 6.957497 0 0 7680 +thanprint 1 6.957497 0 0 7681 +weget 1 6.957497 0 0 7682 +onlinebrows 1 6.957497 0 0 7683 +coursenot 1 6.957497 0 0 7684 +miscellanousfunct 1 6.957497 0 0 7685 +shouldconsult 1 6.957497 0 0 7686 +itsens 1 6.957497 0 0 7687 +andnot 1 6.957497 0 0 7688 +setofrul 1 6.957497 0 0 7689 +ofanim 1 6.957497 0 0 7690 +simpleobject 1 6.957497 0 0 7691 +metaclass 1 6.957497 0 0 7692 +circular 1 6.957497 0 0 7693 +onclass 1 6.957497 0 0 7694 +decimalinteg 1 6.957497 0 0 7695 +hexinteg 1 6.957497 0 0 7696 +octalinteg 1 6.957497 0 0 7697 +xunnow 1 6.957497 0 0 7698 +homeworksreview 1 6.957497 0 0 7699 +slidesth 1 6.957497 0 0 7700 +onlineif 1 6.957497 0 0 7701 +updatedhomework 1 6.957497 0 0 7702 +filemidterm 1 6.957497 0 0 7703 +webta 1 6.957497 0 0 7704 +timetableta 1 6.957497 0 0 7705 +guana 1 6.957497 0 0 7706 +eduxun 1 6.957497 0 0 7707 +wordlist 1 6.957497 0 0 7708 +wwang 1 6.957497 0 0 7709 +afternoon 1 6.957497 0 0 7710 +schwetman 1 6.957497 0 0 7711 +mesquit 1 6.957497 0 0 7712 +yangyang 1 6.957497 0 0 7713 +herb 1 6.957497 0 0 7714 +appointmentcontact 1 6.957497 0 0 7715 +statisticsassign 1 6.957497 0 0 7716 +asga 1 6.957497 0 0 7717 +statisticsyour 1 6.957497 0 0 7718 +gradesect 1 6.957497 0 0 7719 +microsparc 1 6.957497 0 0 7720 +datasheetonlin 1 6.957497 0 0 7721 +ruiliu 1 6.957497 0 0 7722 +postmessag 1 6.957497 0 0 7723 +csnet 1 6.957497 0 0 7724 +wensdai 1 6.957497 0 0 7725 +netsim 1 6.957497 0 0 7726 +corejava 1 6.957497 0 0 7727 +fengyufeng 1 6.957497 0 0 7728 +pageclick 1 6.957497 0 0 7729 +gradesoth 1 6.957497 0 0 7730 +browserport 1 6.957497 0 0 7731 +dugan 1 6.957497 0 0 7732 +hypermediadocu 1 6.957497 0 0 7733 +schedulesth 1 6.957497 0 0 7734 +glanceweek 1 6.957497 0 0 7735 +schedulecomput 1 6.957497 0 0 7736 +includinglab 1 6.957497 0 0 7737 +andta 1 6.957497 0 0 7738 +audiofrom 1 6.957497 0 0 7739 +midtermand 1 6.957497 0 0 7740 +originallyschedul 1 6.957497 0 0 7741 +andtim 1 6.957497 0 0 7742 +usingth 1 6.957497 0 0 7743 +intactand 1 6.957497 0 0 7744 +forinst 1 6.957497 0 0 7745 +andrel 1 6.957497 0 0 7746 +instructorpaul 1 6.957497 0 0 7747 +edulectur 1 6.957497 0 0 7748 +assistantjonathan 1 6.957497 0 0 7749 +edusect 1 6.957497 0 0 7750 +ladnerclass 1 6.957497 0 0 7751 +construc 1 6.957497 0 0 7752 +halt 1 6.957497 0 0 7753 +undecidableexam 1 6.957497 0 0 7754 +edufix 1 6.957497 0 0 7755 +findhomework 1 6.957497 0 0 7756 +userid 1 6.957497 0 0 7757 +edukaye 1 6.957497 0 0 7758 +informationlab 1 6.957497 0 0 7759 +technot 1 6.957497 0 0 7760 +questionnaireloc 1 6.957497 0 0 7761 +cdeletemin 1 6.957497 0 0 7762 +treeshomework 1 6.957497 0 0 7763 +structuresrichard 1 6.957497 0 0 7764 +instructordan 1 6.957497 0 0 7765 +assistantthi 1 6.957497 0 0 7766 +overheadsport 1 6.957497 0 0 7767 +smalltalk 1 6.957497 0 0 7768 +transcipt 1 6.957497 0 0 7769 +htmlpostscript 1 6.957497 0 0 7770 +languagesspr 1 6.957497 0 0 7771 +hanks 1 6.957497 0 0 7772 +documentsgeneralintroduct 1 6.957497 0 0 7773 +relatedrun 1 6.957497 0 0 7774 +pagecurr 1 6.957497 0 0 7775 +quarterth 1 6.957497 0 0 7776 +quarterscours 1 6.957497 0 0 7777 +younotic 1 6.957497 0 0 7778 +algorithmsautumn 1 6.957497 0 0 7779 +shaffer 1 6.957497 0 0 7780 +examinform 1 6.957497 0 0 7781 +exambas 1 6.957497 0 0 7782 +compilerassignmentssolut 1 6.957497 0 0 7783 +assignmentsteach 1 6.957497 0 0 7784 +informationscheduleweb 1 6.957497 0 0 7785 +algorithmsspr 1 6.957497 0 0 7786 +funnowitz 1 6.957497 0 0 7787 +enclos 1 6.957497 0 0 7788 +siegtelephon 1 6.957497 0 0 7789 +algorithmswint 1 6.957497 0 0 7790 +shapirooffic 1 6.957497 0 0 7791 +pinneloffic 1 6.957497 0 0 7792 +denisep 1 6.957497 0 0 7793 +syllabustransparencieshomework 1 6.957497 0 0 7794 +inquot 1 6.957497 0 0 7795 +associatedvalu 1 6.957497 0 0 7796 +linebegin 1 6.957497 0 0 7797 +isfollow 1 6.957497 0 0 7798 +graphimag 1 6.957497 0 0 7799 +graphreview 1 6.957497 0 0 7800 +listsfin 1 6.957497 0 0 7801 +classhomethi 1 6.957497 0 0 7802 +inmind 1 6.957497 0 0 7803 +deliver 1 6.957497 0 0 7804 +mockup 1 6.957497 0 0 7805 +descriptioninstruct 1 6.957497 0 0 7806 +softwaresystem 1 6.957497 0 0 7807 +tocreat 1 6.957497 0 0 7808 +effectiveor 1 6.957497 0 0 7809 +topicsar 1 6.957497 0 0 7810 +employersand 1 6.957497 0 0 7811 +realbo 1 6.957497 0 0 7812 +largegroup 1 6.957497 0 0 7813 +cannotlearn 1 6.957497 0 0 7814 +devotedto 1 6.957497 0 0 7815 +isto 1 6.957497 0 0 7816 +effectivelytogeth 1 6.957497 0 0 7817 +disast 1 6.957497 0 0 7818 +worktogeth 1 6.957497 0 0 7819 +requirementsanalysi 1 6.957497 0 0 7820 +areal 1 6.957497 0 0 7821 +engineeringinstitut 1 6.957497 0 0 7822 +providedat 1 6.957497 0 0 7823 +playthat 1 6.957497 0 0 7824 +projectso 1 6.957497 0 0 7825 +softwaredevelop 1 6.957497 0 0 7826 +responsiblefor 1 6.957497 0 0 7827 +duri 1 6.957497 0 0 7828 +engineeringmeet 1 6.957497 0 0 7829 +eduta 1 6.957497 0 0 7830 +descriptionthi 1 6.957497 0 0 7831 +textbookghezzi 1 6.957497 0 0 7832 +jazayeri 1 6.957497 0 0 7833 +mandrioli 1 6.957497 0 0 7834 +cohes 1 6.957497 0 0 7835 +departmentsuggest 1 6.957497 0 0 7836 +inthompson 1 6.957497 0 0 7837 +koch 1 6.957497 0 0 7838 +andersonmeet 1 6.957497 0 0 7839 +andpars 1 6.957497 0 0 7840 +incommon 1 6.957497 0 0 7841 +purchasedsepar 1 6.957497 0 0 7842 +fordigitool 1 6.957497 0 0 7843 +thatmaintain 1 6.957497 0 0 7844 +currentinform 1 6.957497 0 0 7845 +introductionto 1 6.957497 0 0 7846 +thatdoesn 1 6.957497 0 0 7847 +promptli 1 6.957497 0 0 7848 +theirimplement 1 6.957497 0 0 7849 +buildingprogram 1 6.957497 0 0 7850 +tointepret 1 6.957497 0 0 7851 +alsolook 1 6.957497 0 0 7852 +programmingfacil 1 6.957497 0 0 7853 +thebas 1 6.957497 0 0 7854 +allegrocommon 1 6.957497 0 0 7855 +powerfulenviron 1 6.957497 0 0 7856 +graphicsand 1 6.957497 0 0 7857 +machinesof 1 6.957497 0 0 7858 +theirown 1 6.957497 0 0 7859 +xlisp 1 6.957497 0 0 7860 +theseresourc 1 6.957497 0 0 7861 +thatxlisp 1 6.957497 0 0 7862 +bare 1 6.957497 0 0 7863 +bone 1 6.957497 0 0 7864 +nothav 1 6.957497 0 0 7865 +disadvantag 1 6.957497 0 0 7866 +labunless 1 6.957497 0 0 7867 +fromdigitool 1 6.957497 0 0 7868 +dealallow 1 6.957497 0 0 7869 +lispfor 1 6.957497 0 0 7870 +thistim 1 6.957497 0 0 7871 +regardingread 1 6.957497 0 0 7872 +printout 1 6.957497 0 0 7873 +becov 1 6.957497 0 0 7874 +announcedearli 1 6.957497 0 0 7875 +snowflak 1 6.957497 0 0 7876 +projectgener 1 6.957497 0 0 7877 +aboutdemonstr 1 6.957497 0 0 7878 +onmondai 1 6.957497 0 0 7879 +exercisestokenizerassign 1 6.957497 0 0 7880 +andpart 1 6.957497 0 0 7881 +parsertokenizerpart 1 6.957497 0 0 7882 +snowflakeassign 1 6.957497 0 0 7883 +ondemonstr 1 6.957497 0 0 7884 +refcard 1 6.957497 0 0 7885 +intelligencecs 1 6.957497 0 0 7886 +msoffic 1 6.957497 0 0 7887 +symboliccomput 1 6.957497 0 0 7888 +emacsinterfac 1 6.957497 0 0 7889 +standalonelisp 1 6.957497 0 0 7890 +gradesredston 1 6.957497 0 0 7891 +midtem 1 6.957497 0 0 7892 +jayram 1 6.957497 0 0 7893 +thathachar 1 6.957497 0 0 7894 +systemscs 1 6.957497 0 0 7895 +crowlei 1 6.957497 0 0 7896 +pcrowlei 1 6.957497 0 0 7897 +unisql 1 6.957497 0 0 7898 +webcs 1 6.957497 0 0 7899 +adminth 1 6.957497 0 0 7900 +andoth 1 6.957497 0 0 7901 +projectsdescript 1 6.957497 0 0 7902 +solutionsto 1 6.957497 0 0 7903 +notesnot 1 6.957497 0 0 7904 +watchthi 1 6.957497 0 0 7905 +andgrad 1 6.957497 0 0 7906 +onproject 1 6.957497 0 0 7907 +keepin 1 6.957497 0 0 7908 +informationwil 1 6.957497 0 0 7909 +classpersonnel 1 6.957497 0 0 7910 +syllabuscours 1 6.957497 0 0 7911 +calendarta 1 6.957497 0 0 7912 +hourshandout 1 6.957497 0 0 7913 +assignmentslectur 1 6.957497 0 0 7914 +notesread 1 6.957497 0 0 7915 +assignmentsprojectsproject 1 6.957497 0 0 7916 +handoutsproject 1 6.957497 0 0 7917 +artifactsproject 1 6.957497 0 0 7918 +sessionsproject 1 6.957497 0 0 7919 +policyproject 1 6.957497 0 0 7920 +upslibui 1 6.957497 0 0 7921 +documentationoth 1 6.957497 0 0 7922 +informationget 1 6.957497 0 0 7923 +classhearn 1 6.957497 0 0 7924 +erratath 1 6.957497 0 0 7925 +labus 1 6.957497 0 0 7926 +pagegraph 1 6.957497 0 0 7927 +linkssgi 1 6.957497 0 0 7928 +surfgrafica 1 6.957497 0 0 7929 +obscurasiggraphgrailgraph 1 6.957497 0 0 7930 +indexoth 1 6.957497 0 0 7931 +linksmvi 1 6.957497 0 0 7932 +departmentth 1 6.957497 0 0 7933 +programth 1 6.957497 0 0 7934 +programweb 1 6.957497 0 0 7935 +helpbas 1 6.957497 0 0 7936 +helpmosa 1 6.957497 0 0 7937 +lynxus 1 6.957497 0 0 7938 +indyspighin 1 6.957497 0 0 7939 +disabl 1 6.957497 0 0 7940 +jari 1 6.957497 0 0 7941 +networksautumn 1 6.957497 0 0 7942 +eebphon 1 6.957497 0 0 7943 +kristensen 1 6.957497 0 0 7944 +tomatch 1 6.957497 0 0 7945 +andprovid 1 6.957497 0 0 7946 +timewindow 1 6.957497 0 0 7947 +overheadshomeworksprojectsinterest 1 6.957497 0 0 7948 +stuffattentionif 1 6.957497 0 0 7949 +pleasecontact 1 6.957497 0 0 7950 +schmitz 1 6.957497 0 0 7951 +requiresacadem 1 6.957497 0 0 7952 +networksspr 1 6.957497 0 0 7953 +overheadshomeworksprojectsabout 1 6.957497 0 0 7954 +optionlab 1 6.957497 0 0 7955 +syllabusschedul 1 6.957497 0 0 7956 +savoi 1 6.957497 0 0 7957 +savac 1 6.957497 0 0 7958 +chinn 1 6.957497 0 0 7959 +richin 1 6.957497 0 0 7960 +howard 1 6.957497 0 0 7961 +shchang 1 6.957497 0 0 7962 +csjason 1 6.957497 0 0 7963 +quarterhomework 1 6.957497 0 0 7964 +assignmentsweb 1 6.957497 0 0 7965 +duehomework 1 6.957497 0 0 7966 +abel 1 6.957497 0 0 7967 +fixtur 1 6.957497 0 0 7968 +communicationoth 1 6.957497 0 0 7969 +sheetsth 1 6.957497 0 0 7970 +skim 1 6.957497 0 0 7971 +jwatson 1 6.957497 0 0 7972 +chenoffic 1 6.957497 0 0 7973 +thursdays 1 6.957497 0 0 7974 +chensg 1 6.957497 0 0 7975 +laboratoryproject 1 6.957497 0 0 7976 +setprocessor 1 6.957497 0 0 7977 +chap 1 6.957497 0 0 7978 +referencesthi 1 6.957497 0 0 7979 +csor 1 6.957497 0 0 7980 +pnew 1 6.957497 0 0 7981 +baermeet 1 6.957497 0 0 7982 +windowsimplement 1 6.957497 0 0 7983 +programdevelop 1 6.957497 0 0 7984 +theintel 1 6.957497 0 0 7985 +isfor 1 6.957497 0 0 7986 +bedownload 1 6.957497 0 0 7987 +givenaccord 1 6.957497 0 0 7988 +alist 1 6.957497 0 0 7989 +coversboth 1 6.957497 0 0 7990 +logicalreason 1 6.957497 0 0 7991 +clo 1 6.957497 0 0 7992 +programmingpart 1 6.957497 0 0 7993 +ofhow 1 6.957497 0 0 7994 +circul 1 6.957497 0 0 7995 +orturn 1 6.957497 0 0 7996 +wrap 1 6.957497 0 0 7997 +designstev 1 6.957497 0 0 7998 +casei 1 6.957497 0 0 7999 +studentslab 1 6.957497 0 0 8000 +mchc 1 6.957497 0 0 8001 +martinrobot 1 6.957497 0 0 8002 +societyoth 1 6.957497 0 0 8003 +bevi 1 6.957497 0 0 8004 +relatingto 1 6.957497 0 0 8005 +frequentlychang 1 6.957497 0 0 8006 +bswest 1 6.957497 0 0 8007 +csif 1 6.957497 0 0 8008 +classpersonnelsyllabuslectur 1 6.957497 0 0 8009 +scheduleguest 1 6.957497 0 0 8010 +scheduleoffic 1 6.957497 0 0 8011 +hoursproject 1 6.957497 0 0 8012 +projectoth 1 6.957497 0 0 8013 +erratarefer 1 6.957497 0 0 8014 +pagesmidterm 1 6.957497 0 0 8015 +questionnairebswest 1 6.957497 0 0 8016 +languageswint 1 6.957497 0 0 8017 +craigchamb 1 6.957497 0 0 8018 +archivedher 1 6.957497 0 0 8019 +closedbook 1 6.957497 0 0 8020 +wereask 1 6.957497 0 0 8021 +tutorialsth 1 6.957497 0 0 8022 +tutorialhow 1 6.957497 0 0 8023 +enda 1 6.957497 0 0 8024 +interestdead 1 6.957497 0 0 8025 +elim 1 6.957497 0 0 8026 +idfacfg 1 6.957497 0 0 8027 +frameworkvortex 1 6.957497 0 0 8028 +grammarcecil 1 6.957497 0 0 8029 +documentationdocument 1 6.957497 0 0 8030 +resourcesth 1 6.957497 0 0 8031 +languagesimport 1 6.957497 0 0 8032 +turori 1 6.957497 0 0 8033 +andtransform 1 6.957497 0 0 8034 +resourcesmor 1 6.957497 0 0 8035 +engineeringdavid 1 6.957497 0 0 8036 +kwic 1 6.957497 0 0 8037 +projectsnotkin 1 6.957497 0 0 8038 +languagesautumn 1 6.957497 0 0 8039 +byappoint 1 6.957497 0 0 8040 +cubiclescours 1 6.957497 0 0 8041 +readingsmail 1 6.957497 0 0 8042 +archivesw 1 6.957497 0 0 8043 +instructionalpurpos 1 6.957497 0 0 8044 +emailto 1 6.957497 0 0 8045 +csegener 1 6.957497 0 0 8046 +pagesprogram 1 6.957497 0 0 8047 +critiquesgari 1 6.957497 0 0 8048 +leaven 1 6.957497 0 0 8049 +pagefunct 1 6.957497 0 0 8050 +resourcesmit 1 6.957497 0 0 8051 +pagecmu 1 6.957497 0 0 8052 +pagea 1 6.957497 0 0 8053 +mlhaskel 1 6.957497 0 0 8054 +universityobject 1 6.957497 0 0 8055 +geneva 1 6.957497 0 0 8056 +gilligan 1 6.957497 0 0 8057 +readingtextbook 1 6.957497 0 0 8058 +sapplet 1 6.957497 0 0 8059 +willconsist 1 6.957497 0 0 8060 +bureaucrat 1 6.957497 0 0 8061 +stuffgrad 1 6.957497 0 0 8062 +homeworkproblem 1 6.957497 0 0 8063 +upindepend 1 6.957497 0 0 8064 +betweenani 1 6.957497 0 0 8065 +mustwatch 1 6.957497 0 0 8066 +thatan 1 6.957497 0 0 8067 +reboot 1 6.957497 0 0 8068 +thatsurv 1 6.957497 0 0 8069 +appointment 1 6.957497 0 0 8070 +developingfast 1 6.957497 0 0 8071 +theirefficaci 1 6.957497 0 0 8072 +commentsabout 1 6.957497 0 0 8073 +analysisfor 1 6.957497 0 0 8074 +referencesfor 1 6.957497 0 0 8075 +erew 1 6.957497 0 0 8076 +yannakaki 1 6.957497 0 0 8077 +certifi 1 6.957497 0 0 8078 +likelysometh 1 6.957497 0 0 8079 +martel 1 6.957497 0 0 8080 +whim 1 6.957497 0 0 8081 +smpc 1 6.957497 0 0 8082 +lookingat 1 6.957497 0 0 8083 +isnon 1 6.957497 0 0 8084 +notconsid 1 6.957497 0 0 8085 +indevelop 1 6.957497 0 0 8086 +algorithmswhich 1 6.957497 0 0 8087 +conceiv 1 6.957497 0 0 8088 +goingto 1 6.957497 0 0 8089 +outsidework 1 6.957497 0 0 8090 +befollow 1 6.957497 0 0 8091 +youcould 1 6.957497 0 0 8092 +textwould 1 6.957497 0 0 8093 +artof 1 6.957497 0 0 8094 +mychoic 1 6.957497 0 0 8095 +interestingor 1 6.957497 0 0 8096 +uninterest 1 6.957497 0 0 8097 +aseith 1 6.957497 0 0 8098 +researchcont 1 6.957497 0 0 8099 +turninto 1 6.957497 0 0 8100 +andenergi 1 6.957497 0 0 8101 +automataautumn 1 6.957497 0 0 8102 +nitin 1 6.957497 0 0 8103 +staffnameemailphoneoffic 1 6.957497 0 0 8104 +csmw 1 6.957497 0 0 8105 +acroread 1 6.957497 0 0 8106 +aavail 1 6.957497 0 0 8107 +truthof 1 6.957497 0 0 8108 +casea 1 6.957497 0 0 8109 +flip 1 6.957497 0 0 8110 +oftheorem 1 6.957497 0 0 8111 +finitedomain 1 6.957497 0 0 8112 +thesequest 1 6.957497 0 0 8113 +complexityand 1 6.957497 0 0 8114 +anumb 1 6.957497 0 0 8115 +urquhart 1 6.957497 0 0 8116 +sato 1 6.957497 0 0 8117 +andboy 1 6.957497 0 0 8118 +gsat 1 6.957497 0 0 8119 +thedirectori 1 6.957497 0 0 8120 +proversther 1 6.957497 0 0 8121 +ofinstal 1 6.957497 0 0 8122 +systemperform 1 6.957497 0 0 8123 +modelingspr 1 6.957497 0 0 8124 +lazowskaandmaryvernonwelcom 1 6.957497 0 0 8125 +performancemodel 1 6.957497 0 0 8126 +hourstent 1 6.957497 0 0 8127 +schedulecom 1 6.957497 0 0 8128 +goingsassignmentsproject 1 6.957497 0 0 8129 +informationmap 1 6.957497 0 0 8130 +emailoth 1 6.957497 0 0 8131 +computersystemsuw 1 6.957497 0 0 8132 +engineeringlazowska 1 6.957497 0 0 8133 +architecturewint 1 6.957497 0 0 8134 +instructorsusan 1 6.957497 0 0 8135 +tajoshua 1 6.957497 0 0 8136 +instuct 1 6.957497 0 0 8137 +pixi 1 6.957497 0 0 8138 +dinero 1 6.957497 0 0 8139 +forcs 1 6.957497 0 0 8140 +classmessag 1 6.957497 0 0 8141 +projectlevi 1 6.957497 0 0 8142 +graphicsautumn 1 6.957497 0 0 8143 +hineskj 1 6.957497 0 0 8144 +pamett 1 6.957497 0 0 8145 +groupsfin 1 6.957497 0 0 8146 +topicsread 1 6.957497 0 0 8147 +systemsthi 1 6.957497 0 0 8148 +strappedfor 1 6.957497 0 0 8149 +shafer 1 6.957497 0 0 8150 +reasoningthi 1 6.957497 0 0 8151 +jayn 1 6.957497 0 0 8152 +fragmentari 1 6.957497 0 0 8153 +foundationsof 1 6.957497 0 0 8154 +beautifulli 1 6.957497 0 0 8155 +neapolitan 1 6.957497 0 0 8156 +propagationalgorithm 1 6.957497 0 0 8157 +khoros_hom 1 6.957497 0 0 8158 +msvc 1 6.957497 0 0 8159 +rene 1 6.957497 0 0 8160 +understandingwelcom 1 6.957497 0 0 8161 +doexercis 1 6.957497 0 0 8162 +torun 1 6.957497 0 0 8163 +aslillith 1 6.957497 0 0 8164 +containxhost 1 6.957497 0 0 8165 +lilliththen 1 6.957497 0 0 8166 +manpath 1 6.957497 0 0 8167 +rlogin 1 6.957497 0 0 8168 +lillith 1 6.957497 0 0 8169 +rhost 1 6.957497 0 0 8170 +typecantata 1 6.957497 0 0 8171 +prompt 1 6.957497 0 0 8172 +haskhoro 1 6.957497 0 0 8173 +wwwhttp 1 6.957497 0 0 8174 +htmland 1 6.957497 0 0 8175 +itscours 1 6.957497 0 0 8176 +twotop 1 6.957497 0 0 8177 +pagesand 1 6.957497 0 0 8178 +huerta 1 6.957497 0 0 8179 +andnevatia 1 6.957497 0 0 8180 +tolook 1 6.957497 0 0 8181 +wolff 1 6.957497 0 0 8182 +onneur 1 6.957497 0 0 8183 +trainabl 1 6.957497 0 0 8184 +ofmatlab 1 6.957497 0 0 8185 +requirethat 1 6.957497 0 0 8186 +mclain 1 6.957497 0 0 8187 +documentexplain 1 6.957497 0 0 8188 +withkhoro 1 6.957497 0 0 8189 +accesskhoro 1 6.957497 0 0 8190 +youraccount 1 6.957497 0 0 8191 +itov 1 6.957497 0 0 8192 +arelimit 1 6.957497 0 0 8193 +andsh 1 6.957497 0 0 8194 +knock 1 6.957497 0 0 8195 +orhav 1 6.957497 0 0 8196 +willhav 1 6.957497 0 0 8197 +delft 1 6.957497 0 0 8198 +brochur 1 6.957497 0 0 8199 +brochuremosa 1 6.957497 0 0 8200 +macmosa 1 6.957497 0 0 8201 +itemund 1 6.957497 0 0 8202 +processingwelcom 1 6.957497 0 0 8203 +hourearli 1 6.957497 0 0 8204 +nian 1 6.957497 0 0 8205 +fraser 1 6.957497 0 0 8206 +burnabi 1 6.957497 0 0 8207 +bharath 1 6.957497 0 0 8208 +modayur 1 6.957497 0 0 8209 +invariantoper 1 6.957497 0 0 8210 +hierarchicalrelax 1 6.957497 0 0 8211 +isodata 1 6.957497 0 0 8212 +treatment 1 6.957497 0 0 8213 +topicsdur 1 6.957497 0 0 8214 +activelyexplor 1 6.957497 0 0 8215 +writeupsi 1 6.957497 0 0 8216 +resourcespvm 1 6.957497 0 0 8217 +virtualmachin 1 6.957497 0 0 8218 +layear 1 6.957497 0 0 8219 +aviru 1 6.957497 0 0 8220 +moreworkst 1 6.957497 0 0 8221 +studydistribut 1 6.957497 0 0 8222 +technicalpubl 1 6.957497 0 0 8223 +paragonparallel 1 6.957497 0 0 8224 +variousvendor 1 6.957497 0 0 8225 +correctionsto 1 6.957497 0 0 8226 +bboard 1 6.957497 0 0 8227 +seminarc 1 6.957497 0 0 8228 +rspring 1 6.957497 0 0 8229 +ronen 1 6.957497 0 0 8230 +troi 1 6.957497 0 0 8231 +quarterscs 1 6.957497 0 0 8232 +topicssteven 1 6.957497 0 0 8233 +instructorcs 1 6.957497 0 0 8234 +varyfrom 1 6.957497 0 0 8235 +presentor 1 6.957497 0 0 8236 +labord 1 6.957497 0 0 8237 +wwwwelcom 1 6.957497 0 0 8238 +mccalla 1 6.957497 0 0 8239 +importanceof 1 6.957497 0 0 8240 +youngquist 1 6.957497 0 0 8241 +aboutinternet 1 6.957497 0 0 8242 +microworld 1 6.957497 0 0 8243 +tointellig 1 6.957497 0 0 8244 +bartel 1 6.957497 0 0 8245 +mathematicsconnect 1 6.957497 0 0 8246 +gari 1 6.957497 0 0 8247 +ambiti 1 6.957497 0 0 8248 +thethem 1 6.957497 0 0 8249 +moresophist 1 6.957497 0 0 8250 +elabor 1 6.957497 0 0 8251 +ofwww 1 6.957497 0 0 8252 +intechn 1 6.957497 0 0 8253 +couldmak 1 6.957497 0 0 8254 +applicationsthat 1 6.957497 0 0 8255 +webhttp 1 6.957497 0 0 8256 +empow 1 6.957497 0 0 8257 +agehttp 1 6.957497 0 0 8258 +communitieshttp 1 6.957497 0 0 8259 +dietz 1 6.957497 0 0 8260 +serviceshttp 1 6.957497 0 0 8261 +dcewebkit 1 6.957497 0 0 8262 +zhumeet 1 6.957497 0 0 8263 +aboutcurriculum 1 6.957497 0 0 8264 +learningwelcom 1 6.957497 0 0 8265 +methodologiesfor 1 6.957497 0 0 8266 +forcollabor 1 6.957497 0 0 8267 +willtak 1 6.957497 0 0 8268 +ofthes 1 6.957497 0 0 8269 +ofai 1 6.957497 0 0 8270 +ofstud 1 6.957497 0 0 8271 +intopeopl 1 6.957497 0 0 8272 +meani 1 6.957497 0 0 8273 +schoolmai 1 6.957497 0 0 8274 +participatingstud 1 6.957497 0 0 8275 +cseg 1 6.957497 0 0 8276 +lunchcs 1 6.957497 0 0 8277 +lunchcours 1 6.957497 0 0 8278 +loupbaermeet 1 6.957497 0 0 8279 +withalmost 1 6.957497 0 0 8280 +discussedat 1 6.957497 0 0 8281 +byesteem 1 6.957497 0 0 8282 +mostlyw 1 6.957497 0 0 8283 +discussionson 1 6.957497 0 0 8284 +quartersi 1 6.957497 0 0 8285 +fromparticip 1 6.957497 0 0 8286 +oncrit 1 6.957497 0 0 8287 +hereread 1 6.957497 0 0 8288 +morethem 1 6.957497 0 0 8289 +molli 1 6.957497 0 0 8290 +thestud 1 6.957497 0 0 8291 +informallyor 1 6.957497 0 0 8292 +lipasti 1 6.957497 0 0 8293 +advanceprogrami 1 6.957497 0 0 8294 +thesaulsburi 1 6.957497 0 0 8295 +readashlei 1 6.957497 0 0 8296 +saulsburi 1 6.957497 0 0 8297 +fong 1 6.957497 0 0 8298 +nowatzyk 1 6.957497 0 0 8299 +fillo 1 6.957497 0 0 8300 +keckler 1 6.957497 0 0 8301 +machinelink 1 6.957497 0 0 8302 +readdoug 1 6.957497 0 0 8303 +neton 1 6.957497 0 0 8304 +cardwel 1 6.957497 0 0 8305 +fromm 1 6.957497 0 0 8306 +keeton 1 6.957497 0 0 8307 +kozyraki 1 6.957497 0 0 8308 +thomasand 1 6.957497 0 0 8309 +availableher 1 6.957497 0 0 8310 +themajordomo 1 6.957497 0 0 8311 +shouldinclud 1 6.957497 0 0 8312 +lineblank 1 6.957497 0 0 8313 +seminarcours 1 6.957497 0 0 8314 +craigchambersmeet 1 6.957497 0 0 8315 +butreal 1 6.957497 0 0 8316 +atrium 1 6.957497 0 0 8317 +scheduleweek 1 6.957497 0 0 8318 +memspi 1 6.957497 0 0 8319 +margaretmartonosi 1 6.957497 0 0 8320 +consel 1 6.957497 0 0 8321 +itsus 1 6.957497 0 0 8322 +evelyn 1 6.957497 0 0 8323 +duesterwald 1 6.957497 0 0 8324 +rajiv 1 6.957497 0 0 8325 +maryl 1 6.957497 0 0 8326 +soffa 1 6.957497 0 0 8327 +danielweis 1 6.957497 0 0 8328 +bjarn 1 6.957497 0 0 8329 +steensgaard 1 6.957497 0 0 8330 +coalesc 1 6.957497 0 0 8331 +appel 1 6.957497 0 0 8332 +hooverand 1 6.957497 0 0 8333 +zadeck 1 6.957497 0 0 8334 +byprivthviraj 1 6.957497 0 0 8335 +stevenkurland 1 6.957497 0 0 8336 +knoblock 1 6.957497 0 0 8337 +knoop 1 6.957497 0 0 8338 +oliv 1 6.957497 0 0 8339 +andbernhard 1 6.957497 0 0 8340 +steffen 1 6.957497 0 0 8341 +subscribecsek 1 6.957497 0 0 8342 +shortlyrec 1 6.957497 0 0 8343 +melodi 1 6.957497 0 0 8344 +arpaci 1 6.957497 0 0 8345 +vaswani 1 6.957497 0 0 8346 +sevcik 1 6.957497 0 0 8347 +feitelson 1 6.957497 0 0 8348 +coschedul 1 6.957497 0 0 8349 +mvmv 1 6.957497 0 0 8350 +systemsprofessor 1 6.957497 0 0 8351 +vernontim 1 6.957497 0 0 8352 +pmlocat 1 6.957497 0 0 8353 +now 1 6.957497 0 0 8354 +vahdat 1 6.957497 0 0 8355 +equi 1 6.957497 0 0 8356 +issuesfor 1 6.957497 0 0 8357 +workloadcharacterist 1 6.957497 0 0 8358 +evangelo 1 6.957497 0 0 8359 +markato 1 6.957497 0 0 8360 +loopschedul 1 6.957497 0 0 8361 +iniee 1 6.957497 0 0 8362 +zima 1 6.957497 0 0 8363 +chapman 1 6.957497 0 0 8364 +edjlali 1 6.957497 0 0 8365 +sussman 1 6.957497 0 0 8366 +comparisonsshikharesh 1 6.957497 0 0 8367 +majumdar 1 6.957497 0 0 8368 +eager 1 6.957497 0 0 8369 +variabilityservic 1 6.957497 0 0 8370 +dror 1 6.957497 0 0 8371 +nitzberg 1 6.957497 0 0 8372 +thenasa 1 6.957497 0 0 8373 +ipsc 1 6.957497 0 0 8374 +leutenegg 1 6.957497 0 0 8375 +sobalvarro 1 6.957497 0 0 8376 +rohit 1 6.957497 0 0 8377 +devin 1 6.957497 0 0 8378 +verghes 1 6.957497 0 0 8379 +mendel 1 6.957497 0 0 8380 +multiprocessorcomput 1 6.957497 0 0 8381 +alverson 1 6.957497 0 0 8382 +kahan 1 6.957497 0 0 8383 +korri 1 6.957497 0 0 8384 +effectivedistribut 1 6.957497 0 0 8385 +rudolph 1 6.957497 0 0 8386 +scalapack 1 6.957497 0 0 8387 +ramaswami 1 6.957497 0 0 8388 +hodg 1 6.957497 0 0 8389 +mcintosh 1 6.957497 0 0 8390 +chakarabarti 1 6.957497 0 0 8391 +integer 1 6.957497 0 0 8392 +crandal 1 6.957497 0 0 8393 +aydt 1 6.957497 0 0 8394 +bordawekar 1 6.957497 0 0 8395 +choudahari 1 6.957497 0 0 8396 +koelbel 1 6.957497 0 0 8397 +paleczni 1 6.957497 0 0 8398 +midkiff 1 6.957497 0 0 8399 +fahring 1 6.957497 0 0 8400 +hain 1 6.957497 0 0 8401 +mehrotra 1 6.957497 0 0 8402 +environmentslarri 1 6.957497 0 0 8403 +snyderautumn 1 6.957497 0 0 8404 +ten 1 6.957497 0 0 8405 +ignit 1 6.957497 0 0 8406 +hurri 1 6.957497 0 0 8407 +cseo 1 6.957497 0 0 8408 +datepaperpresentor 1 6.957497 0 0 8409 +falcon 1 6.957497 0 0 8410 +gallivan 1 6.957497 0 0 8411 +gallopoulo 1 6.957497 0 0 8412 +marsolf 1 6.957497 0 0 8413 +ramkumar 1 6.957497 0 0 8414 +forb 1 6.957497 0 0 8415 +gotwal 1 6.957497 0 0 8416 +sriniva 1 6.957497 0 0 8417 +gannon 1 6.957497 0 0 8418 +joerg 1 6.957497 0 0 8419 +kuszmaul 1 6.957497 0 0 8420 +leiserson 1 6.957497 0 0 8421 +andersson 1 6.957497 0 0 8422 +realign 1 6.957497 0 0 8423 +kamachi 1 6.957497 0 0 8424 +kusano 1 6.957497 0 0 8425 +suehiro 1 6.957497 0 0 8426 +tamura 1 6.957497 0 0 8427 +sakon 1 6.957497 0 0 8428 +rinard 1 6.957497 0 0 8429 +abramson 1 6.957497 0 0 8430 +michalak 1 6.957497 0 0 8431 +sosic 1 6.957497 0 0 8432 +preliminariesif 1 6.957497 0 0 8433 +besent 1 6.957497 0 0 8434 +systemsin 1 6.957497 0 0 8435 +quarterli 1 6.957497 0 0 8436 +quarterw 1 6.957497 0 0 8437 +upcomingacm 1 6.957497 0 0 8438 +havean 1 6.957497 0 0 8439 +scheduleoct 1 6.957497 0 0 8440 +autoraid 1 6.957497 0 0 8441 +montgomeri 1 6.957497 0 0 8442 +stackabl 1 6.957497 0 0 8443 +zphigh 1 6.957497 0 0 8444 +zpllarri 1 6.957497 0 0 8445 +teamautumn 1 6.957497 0 0 8446 +csezpl 1 6.957497 0 0 8447 +usersmail 1 6.957497 0 0 8448 +librarai 1 6.957497 0 0 8449 +relatedinform 1 6.957497 0 0 8450 +descriptionzpl 1 6.957497 0 0 8451 +scientificprogram 1 6.957497 0 0 8452 +infortran 1 6.957497 0 0 8453 +dramaticallysimplifi 1 6.957497 0 0 8454 +nuisanc 1 6.957497 0 0 8455 +andtrivi 1 6.957497 0 0 8456 +byrecompil 1 6.957497 0 0 8457 +wysiwyg 1 6.957497 0 0 8458 +booknon 1 6.957497 0 0 8459 +onin 1 6.957497 0 0 8460 +zplprogram 1 6.957497 0 0 8461 +prerequisitesfamiliar 1 6.957497 0 0 8462 +ormatlab 1 6.957497 0 0 8463 +remotezpl 1 6.957497 0 0 8464 +compileroth 1 6.957497 0 0 8465 +societycs 1 6.957497 0 0 8466 +societywelcom 1 6.957497 0 0 8467 +wintercs 1 6.957497 0 0 8468 +andglob 1 6.957497 0 0 8469 +syllabusclass 1 6.957497 0 0 8470 +schedulelink 1 6.957497 0 0 8471 +sitesbook 1 6.957497 0 0 8472 +referenceassignmentsassign 1 6.957497 0 0 8473 +topicsprojectread 1 6.957497 0 0 8474 +zephyr 1 6.957497 0 0 8475 +siegcreat 1 6.957497 0 0 8476 +scriptspleas 1 6.957497 0 0 8477 +tothem 1 6.957497 0 0 8478 +zwhere 1 6.957497 0 0 8479 +mosiac 1 6.957497 0 0 8480 +znol 1 6.957497 0 0 8481 +zwatch 1 6.957497 0 0 8482 +zlocat 1 6.957497 0 0 8483 +releg 1 6.957497 0 0 8484 +grumbl 1 6.957497 0 0 8485 +luddit 1 6.957497 0 0 8486 +itout 1 6.957497 0 0 8487 +withci 1 6.957497 0 0 8488 +theentir 1 6.957497 0 0 8489 +computerhardwar 1 6.957497 0 0 8490 +andc 1 6.957497 0 0 8491 +andpipelin 1 6.957497 0 0 8492 +performancemachin 1 6.957497 0 0 8493 +vectorizingcompil 1 6.957497 0 0 8494 +processorsynchron 1 6.957497 0 0 8495 +purposeprocessor 1 6.957497 0 0 8496 +systemsnew 1 6.957497 0 0 8497 +stufffin 1 6.957497 0 0 8498 +staffinstructor 1 6.957497 0 0 8499 +karuna 1 6.957497 0 0 8500 +muthiahemail 1 6.957497 0 0 8501 +muthiah 1 6.957497 0 0 8502 +weyersemail 1 6.957497 0 0 8503 +materialsth 1 6.957497 0 0 8504 +thelectur 1 6.957497 0 0 8505 +textbookmodern 1 6.957497 0 0 8506 +pohl 1 6.957497 0 0 8507 +systemsandobject 1 6.957497 0 0 8508 +sectionslectur 1 6.957497 0 0 8509 +sciencesdiscuss 1 6.957497 0 0 8510 +nolandnot 1 6.957497 0 0 8511 +occas 1 6.957497 0 0 8512 +quizzesther 1 6.957497 0 0 8513 +thediscuss 1 6.957497 0 0 8514 +usetrac 1 6.957497 0 0 8515 +setsdur 1 6.957497 0 0 8516 +severalwritten 1 6.957497 0 0 8517 +synchronizationprimit 1 6.957497 0 0 8518 +workassign 1 6.957497 0 0 8519 +assignmentthat 1 6.957497 0 0 8520 +weekof 1 6.957497 0 0 8521 +cheatingprogram 1 6.957497 0 0 8522 +cheater 1 6.957497 0 0 8523 +receivingan 1 6.957497 0 0 8524 +facilitiesw 1 6.957497 0 0 8525 +policyif 1 6.957497 0 0 8526 +beno 1 6.957497 0 0 8527 +scheduleth 1 6.957497 0 0 8528 +processesweek 1 6.957497 0 0 8529 +creationweek 1 6.957497 0 0 8530 +synchronizationweek 1 6.957497 0 0 8531 +semaphoresweek 1 6.957497 0 0 8532 +monitorsweek 1 6.957497 0 0 8533 +deadlocksweek 1 6.957497 0 0 8534 +relocationweek 1 6.957497 0 0 8535 +tlbsweek 1 6.957497 0 0 8536 +filesweek 1 6.957497 0 0 8537 +directoriesweek 1 6.957497 0 0 8538 +protectionweek 1 6.957497 0 0 8539 +seminarunivers 1 6.957497 0 0 8540 +seminarinstructor 1 6.957497 0 0 8541 +lectureslectur 1 6.957497 0 0 8542 +sciencesclass 1 6.957497 0 0 8543 +schedulether 1 6.957497 0 0 8544 +attendal 1 6.957497 0 0 8545 +overviewweek 1 6.957497 0 0 8546 +protocolsweek 1 6.957497 0 0 8547 +callsweek 1 6.957497 0 0 8548 +securityweek 1 6.957497 0 0 8549 +encryptionweek 1 6.957497 0 0 8550 +netweek 1 6.957497 0 0 8551 +systemsweek 1 6.957497 0 0 8552 +supercomputerweek 1 6.957497 0 0 8553 +javaweek 1 6.957497 0 0 8554 +discussionslast 1 6.957497 0 0 8555 +availbl 1 6.957497 0 0 8556 +systemssummarythi 1 6.957497 0 0 8557 +textther 1 6.957497 0 0 8558 +operatingsystemsclass 1 6.957497 0 0 8559 +meetonc 1 6.957497 0 0 8560 +listaccord 1 6.957497 0 0 8561 +papersindepend 1 6.957497 0 0 8562 +identifyth 1 6.957497 0 0 8563 +discussionsclass 1 6.957497 0 0 8564 +besupport 1 6.957497 0 0 8565 +beveri 1 6.957497 0 0 8566 +unhappi 1 6.957497 0 0 8567 +papersdur 1 6.957497 0 0 8568 +paperwil 1 6.957497 0 0 8569 +facilityand 1 6.957497 0 0 8570 +summaryof 1 6.957497 0 0 8571 +aselect 1 6.957497 0 0 8572 +topicsfrom 1 6.957497 0 0 8573 +fellowstud 1 6.957497 0 0 8574 +giveth 1 6.957497 0 0 8575 +gradesscor 1 6.957497 0 0 8576 +proposalsi 1 6.957497 0 0 8577 +gradesar 1 6.957497 0 0 8578 +detailstim 1 6.957497 0 0 8579 +noonlast 1 6.957497 0 0 8580 +elig 1 6.957497 0 0 8581 +amclick 1 6.957497 0 0 8582 +unsur 1 6.957497 0 0 8583 +notestext 1 6.957497 0 0 8584 +koffman 1 6.957497 0 0 8585 +assignmentsther 1 6.957497 0 0 8586 +gradesheet 1 6.957497 0 0 8587 +pmhow 1 6.957497 0 0 8588 +modem 1 6.957497 0 0 8589 +exerciseson 1 6.957497 0 0 8590 +egglestonemail 1 6.957497 0 0 8591 +hourlywork 1 6.957497 0 0 8592 +classread 1 6.957497 0 0 8593 +gradeshomeworkexam 1 6.957497 0 0 8594 +quizzesmiscellan 1 6.957497 0 0 8595 +policytextproblem 1 6.957497 0 0 8596 +topicsinclud 1 6.957497 0 0 8597 +tochoos 1 6.957497 0 0 8598 +rathera 1 6.957497 0 0 8599 +manya 1 6.957497 0 0 8600 +assig 1 6.957497 0 0 8601 +programmingstructur 1 6.957497 0 0 8602 +elementaryengin 1 6.957497 0 0 8603 +enableth 1 6.957497 0 0 8604 +inelementari 1 6.957497 0 0 8605 +reameslast 1 6.957497 0 0 8606 +cscours 1 6.957497 0 0 8607 +descriptionfrom 1 6.957497 0 0 8608 +programa 1 6.957497 0 0 8609 +programb 1 6.957497 0 0 8610 +cumul 1 6.957497 0 0 8611 +desperateto 1 6.957497 0 0 8612 +outputfil 1 6.957497 0 0 8613 +suzan 1 6.957497 0 0 8614 +inputfil 1 6.957497 0 0 8615 +structuresfal 1 6.957497 0 0 8616 +htmlinstructor 1 6.957497 0 0 8617 +newsassign 1 6.957497 0 0 8618 +statisticssom 1 6.957497 0 0 8619 +median 1 6.957497 0 0 8620 +midterma 1 6.957497 0 0 8621 +oldmidterm 1 6.957497 0 0 8622 +ownmidterm 1 6.957497 0 0 8623 +searchth 1 6.957497 0 0 8624 +filemenu 1 6.957497 0 0 8625 +andchoos 1 6.957497 0 0 8626 +sciencesom 1 6.957497 0 0 8627 +haveform 1 6.957497 0 0 8628 +becomecomput 1 6.957497 0 0 8629 +thisclass 1 6.957497 0 0 8630 +withtheir 1 6.957497 0 0 8631 +classwork 1 6.957497 0 0 8632 +stodder 1 6.957497 0 0 8633 +theodd 1 6.957497 0 0 8634 +statementi 1 6.957497 0 0 8635 +aniniti 1 6.957497 0 0 8636 +exceptionsy 1 6.957497 0 0 8637 +isdata 1 6.957497 0 0 8638 +notnecessari 1 6.957497 0 0 8639 +isveri 1 6.957497 0 0 8640 +whichar 1 6.957497 0 0 8641 +entranceof 1 6.957497 0 0 8642 +needsom 1 6.957497 0 0 8643 +handoutc 1 6.957497 0 0 8644 +althoughi 1 6.957497 0 0 8645 +courseof 1 6.957497 0 0 8646 +apoint 1 6.957497 0 0 8647 +prerequisitecours 1 6.957497 0 0 8648 +certainrestrict 1 6.957497 0 0 8649 +emailand 1 6.957497 0 0 8650 +toyour 1 6.957497 0 0 8651 +runwith 1 6.957497 0 0 8652 +inassign 1 6.957497 0 0 8653 +allelectron 1 6.957497 0 0 8654 +policyno 1 6.957497 0 0 8655 +coincid 1 6.957497 0 0 8656 +oneach 1 6.957497 0 0 8657 +thelast 1 6.957497 0 0 8658 +cheatingth 1 6.957497 0 0 8659 +linest 1 6.957497 0 0 8660 +tocommun 1 6.957497 0 0 8661 +butther 1 6.957497 0 0 8662 +obei 1 6.957497 0 0 8663 +policiesgovern 1 6.957497 0 0 8664 +policiesif 1 6.957497 0 0 8665 +currenthard 1 6.957497 0 0 8666 +conceptsthat 1 6.957497 0 0 8667 +emailsever 1 6.957497 0 0 8668 +gradingprogram 1 6.957497 0 0 8669 +typicalinput 1 6.957497 0 0 8670 +projectspecif 1 6.957497 0 0 8671 +shoulddemonstr 1 6.957497 0 0 8672 +includingunusu 1 6.957497 0 0 8673 +considerationof 1 6.957497 0 0 8674 +orcomplex 1 6.957497 0 0 8675 +definedconst 1 6.957497 0 0 8676 +thosevalu 1 6.957497 0 0 8677 +styleus 1 6.957497 0 0 8678 +variable_nam 1 6.957497 0 0 8679 +function_nam 1 6.957497 0 0 8680 +const 1 6.957497 0 0 8681 +defined_const 1 6.957497 0 0 8682 +enum 1 6.957497 0 0 8683 +enumtyp 1 6.957497 0 0 8684 +classnam 1 6.957497 0 0 8685 +notesfor 1 6.957497 0 0 8686 +meaningfulli 1 6.957497 0 0 8687 +documentationthi 1 6.957497 0 0 8688 +yourprogram 1 6.957497 0 0 8689 +someonewho 1 6.957497 0 0 8690 +superfici 1 6.957497 0 0 8691 +unawar 1 6.957497 0 0 8692 +descriptionne 1 6.957497 0 0 8693 +thensuffici 1 6.957497 0 0 8694 +documentationther 1 6.957497 0 0 8695 +structuresshould 1 6.957497 0 0 8696 +membershould 1 6.957497 0 0 8697 +sname 1 6.957497 0 0 8698 +withoutmak 1 6.957497 0 0 8699 +stackyou 1 6.957497 0 0 8700 +tricki 1 6.957497 0 0 8701 +opaqu 1 6.957497 0 0 8702 +commentcan 1 6.957497 0 0 8703 +clarifi 1 6.957497 0 0 8704 +outlineof 1 6.957497 0 0 8705 +vimani 1 6.957497 0 0 8706 +becomecomfort 1 6.957497 0 0 8707 +youronli 1 6.957497 0 0 8708 +macpasc 1 6.957497 0 0 8709 +withunix 1 6.957497 0 0 8710 +wellspent 1 6.957497 0 0 8711 +thefollow 1 6.957497 0 0 8712 +tbayou 1 6.957497 0 0 8713 +goto 1 6.957497 0 0 8714 +larusinstructor 1 6.957497 0 0 8715 +laruslaru 1 6.957497 0 0 8716 +amcontentsteach 1 6.957497 0 0 8717 +assistantstextlectur 1 6.957497 0 0 8718 +informationelectron 1 6.957497 0 0 8719 +mailth 1 6.957497 0 0 8720 +languagegradingexamscours 1 6.957497 0 0 8721 +scheduleassign 1 6.957497 0 0 8722 +assignmentscours 1 6.957497 0 0 8723 +objectivesc 1 6.957497 0 0 8724 +assistantswei 1 6.957497 0 0 8725 +forthi 1 6.957497 0 0 8726 +theassign 1 6.957497 0 0 8727 +zhangoffic 1 6.957497 0 0 8728 +entranc 1 6.957497 0 0 8729 +maili 1 6.957497 0 0 8730 +gdbthere 1 6.957497 0 0 8731 +administrationbas 1 6.957497 0 0 8732 +storagelectur 1 6.957497 0 0 8733 +listslectur 1 6.957497 0 0 8734 +stackslectur 1 6.957497 0 0 8735 +queueslectur 1 6.957497 0 0 8736 +hashinglectur 1 6.957497 0 0 8737 +recursionlectur 1 6.957497 0 0 8738 +treesbinari 1 6.957497 0 0 8739 +searchlectur 1 6.957497 0 0 8740 +treesgraphslectur 1 6.957497 0 0 8741 +sortinglectur 1 6.957497 0 0 8742 +tbaassign 1 6.957497 0 0 8743 +nameyear 1 6.957497 0 0 8744 +coursesprevi 1 6.957497 0 0 8745 +experiencerec 1 6.957497 0 0 8746 +tournament 1 6.957497 0 0 8747 +aconcord 1 6.957497 0 0 8748 +dakota 1 6.957497 0 0 8749 +programmingfal 1 6.957497 0 0 8750 +pphone 1 6.957497 0 0 8751 +searchabl 1 6.957497 0 0 8752 +threadschedul 1 6.957497 0 0 8753 +forproject 1 6.957497 0 0 8754 +graphcontain 1 6.957497 0 0 8755 +sched 1 6.957497 0 0 8756 +substr 1 6.957497 0 0 8757 +thejavaprogram 1 6.957497 0 0 8758 +languagebi 1 6.957497 0 0 8759 +gosl 1 6.957497 0 0 8760 +systemssect 1 6.957497 0 0 8761 +instructormarvin 1 6.957497 0 0 8762 +tarob 1 6.957497 0 0 8763 +mwfoffic 1 6.957497 0 0 8764 +distributioni 1 6.957497 0 0 8765 +typograph 1 6.957497 0 0 8766 +importantli 1 6.957497 0 0 8767 +arraywa 1 6.957497 0 0 8768 +isavail 1 6.957497 0 0 8769 +courseus 1 6.957497 0 0 8770 +likelyb 1 6.957497 0 0 8771 +presentedin 1 6.957497 0 0 8772 +givefork 1 6.957497 0 0 8773 +specificationshould 1 6.957497 0 0 8774 +garbl 1 6.957497 0 0 8775 +jake 1 6.957497 0 0 8776 +dawlei 1 6.957497 0 0 8777 +carr 1 6.957497 0 0 8778 +detailssect 1 6.957497 0 0 8779 +lipe 1 6.957497 0 0 8780 +srccontain 1 6.957497 0 0 8781 +javacontain 1 6.957497 0 0 8782 +classgraphdescrib 1 6.957497 0 0 8783 +petersoncycl 1 6.957497 0 0 8784 +notacycl 1 6.957497 0 0 8785 +petersonacycl 1 6.957497 0 0 8786 +acycl 1 6.957497 0 0 8787 +sharingfork 1 6.957497 0 0 8788 +jenner 1 6.957497 0 0 8789 +maxthink 1 6.957497 0 0 8790 +maxeat 1 6.957497 0 0 8791 +versionha 1 6.957497 0 0 8792 +argumenti 1 6.957497 0 0 8793 +charactersin 1 6.957497 0 0 8794 +franco 1 6.957497 0 0 8795 +maketo 1 6.957497 0 0 8796 +compilewithout 1 6.957497 0 0 8797 +computershav 1 6.957497 0 0 8798 +tutoriali 1 6.957497 0 0 8799 +onthread 1 6.957497 0 0 8800 +checkth 1 6.957497 0 0 8801 +ajava 1 6.957497 0 0 8802 +afil 1 6.957497 0 0 8803 +onelin 1 6.957497 0 0 8804 +localor 1 6.957497 0 0 8805 +csmon 1 6.957497 0 0 8806 +cslast 1 6.957497 0 0 8807 +beprocess 1 6.957497 0 0 8808 +replacementalgorithm 1 6.957497 0 0 8809 +statisticsdiscuss 1 6.957497 0 0 8810 +psychologyth 1 6.957497 0 0 8811 +anyquest 1 6.957497 0 0 8812 +thetext 1 6.957497 0 0 8813 +systemsbi 1 6.957497 0 0 8814 +specificationjava 1 6.957497 0 0 8815 +documentationwatch 1 6.957497 0 0 8816 +unixoper 1 6.957497 0 0 8817 +anycomput 1 6.957497 0 0 8818 +requireddata 1 6.957497 0 0 8819 +involveprocess 1 6.957497 0 0 8820 +butyou 1 6.957497 0 0 8821 +vigor 1 6.957497 0 0 8822 +punish 1 6.957497 0 0 8823 +dateind 1 6.957497 0 0 8824 +uniniti 1 6.957497 0 0 8825 +runtimerath 1 6.957497 0 0 8826 +byproduct 1 6.957497 0 0 8827 +withlanguag 1 6.957497 0 0 8828 +alwaysa 1 6.957497 0 0 8829 +disloc 1 6.957497 0 0 8830 +thetransit 1 6.957497 0 0 8831 +amazingli 1 6.957497 0 0 8832 +youalreadi 1 6.957497 0 0 8833 +arefer 1 6.957497 0 0 8834 +manuali 1 6.957497 0 0 8835 +wayfrom 1 6.957497 0 0 8836 +sophisticatedprogram 1 6.957497 0 0 8837 +ofoth 1 6.957497 0 0 8838 +niceonlin 1 6.957497 0 0 8839 +tutorialabout 1 6.957497 0 0 8840 +javaoct 1 6.957497 0 0 8841 +synchronizationoct 1 6.957497 0 0 8842 +schedulingoct 1 6.957497 0 0 8843 +schedulingdec 1 6.957497 0 0 8844 +systemsdec 1 6.957497 0 0 8845 +maryvernon 1 6.957497 0 0 8846 +andkarunamuthiah 1 6.957497 0 0 8847 +beinterchang 1 6.957497 0 0 8848 +archiveapproxim 1 6.957497 0 0 8849 +topicsweek 1 6.957497 0 0 8850 +oftopicsreadingsep 1 6.957497 0 0 8851 +processeschapt 1 6.957497 0 0 8852 +threadschapt 1 6.957497 0 0 8853 +exclusioncont 1 6.957497 0 0 8854 +semaphorescont 1 6.957497 0 0 8855 +summarycont 1 6.957497 0 0 8856 +doct 1 6.957497 0 0 8857 +schedulingchapt 1 6.957497 0 0 8858 +tlbschapter 1 6.957497 0 0 8859 +memorycont 1 6.957497 0 0 8860 +systemschapt 1 6.957497 0 0 8861 +directorieschapt 1 6.957497 0 0 8862 +methodstbanov 1 6.957497 0 0 8863 +reviewchapt 1 6.957497 0 0 8864 +vhdl 1 6.957497 0 0 8865 +mentorassign 1 6.957497 0 0 8866 +projectthi 1 6.957497 0 0 8867 +examsth 1 6.957497 0 0 8868 +endterm 1 6.957497 0 0 8869 +xbao 1 6.957497 0 0 8870 +implementationc 1 6.957497 0 0 8871 +implementationcours 1 6.957497 0 0 8872 +assignmentoth 1 6.957497 0 0 8873 +ingraham 1 6.957497 0 0 8874 +xuemei 1 6.957497 0 0 8875 +addinginform 1 6.957497 0 0 8876 +meetingroom 1 6.957497 0 0 8877 +labsfor 1 6.957497 0 0 8878 +beenmov 1 6.957497 0 0 8879 +raji 1 6.957497 0 0 8880 +donaldson 1 6.957497 0 0 8881 +gopalakrishnan 1 6.957497 0 0 8882 +powerpoint 1 6.957497 0 0 8883 +networksintroduct 1 6.957497 0 0 8884 +readingsclick 1 6.957497 0 0 8885 +networkingcours 1 6.957497 0 0 8886 +madisoncours 1 6.957497 0 0 8887 +informationlecturetim 1 6.957497 0 0 8888 +mwfplace 1 6.957497 0 0 8889 +statclass 1 6.957497 0 0 8890 +listinstructor 1 6.957497 0 0 8891 +landweberoffic 1 6.957497 0 0 8892 +srinivasa 1 6.957497 0 0 8893 +narayananoffic 1 6.957497 0 0 8894 +teitelbaumoffic 1 6.957497 0 0 8895 +naemail 1 6.957497 0 0 8896 +garbler 1 6.957497 0 0 8897 +bibliographyread 1 6.957497 0 0 8898 +icmp 1 6.957497 0 0 8899 +ospf 1 6.957497 0 0 8900 +qnet 1 6.957497 0 0 8901 +devc 1 6.957497 0 0 8902 +knowledgerichard 1 6.957497 0 0 8903 +belewvisit 1 6.957497 0 0 8904 +professorc 1 6.957497 0 0 8905 +departmentfal 1 6.957497 0 0 8906 +acal 1 6.957497 0 0 8907 +engrthi 1 6.957497 0 0 8908 +coures 1 6.957497 0 0 8909 +canse 1 6.957497 0 0 8910 +asyllabu 1 6.957497 0 0 8911 +mapof 1 6.957497 0 0 8912 +semesterwil 1 6.957497 0 0 8913 +infidel 1 6.957497 0 0 8914 +hypermai 1 6.957497 0 0 8915 +classrel 1 6.957497 0 0 8916 +telnet 1 6.957497 0 0 8917 +methodsthi 1 6.957497 0 0 8918 +orderli 1 6.957497 0 0 8919 +assignmentson 1 6.957497 0 0 8920 +numericalanalysi 1 6.957497 0 0 8921 +foremostmathematician 1 6.957497 0 0 8922 +trickytop 1 6.957497 0 0 8923 +textmai 1 6.957497 0 0 8924 +byaddit 1 6.957497 0 0 8925 +capitallett 1 6.957497 0 0 8926 +caselett 1 6.957497 0 0 8927 +sigmon 1 6.957497 0 0 8928 +reaction 1 6.957497 0 0 8929 +winor 1 6.957497 0 0 8930 +referenceviva 1 6.957497 0 0 8931 +analysisthi 1 6.957497 0 0 8932 +statlectur 1 6.957497 0 0 8933 +classnot 1 6.957497 0 0 8934 +viii 1 6.957497 0 0 8935 +courseoff 1 6.957497 0 0 8936 +theorythi 1 6.957497 0 0 8937 +dsilva 1 6.957497 0 0 8938 +sectioncsm 1 6.957497 0 0 8939 +firstdai 1 6.957497 0 0 8940 +adelson 1 6.957497 0 0 8941 +csstelephon 1 6.957497 0 0 8942 +ubyt 1 6.957497 0 0 8943 +imgstar 1 6.957497 0 0 8944 +hdtv 1 6.957497 0 0 8945 +atsc 1 6.957497 0 0 8946 +dyeroffic 1 6.957497 0 0 8947 +appointmentteach 1 6.957497 0 0 8948 +sooffic 1 6.957497 0 0 8949 +appointmentstud 1 6.957497 0 0 8950 +informationfundament 1 6.957497 0 0 8951 +featuredetect 1 6.957497 0 0 8952 +forreconstruct 1 6.957497 0 0 8953 +usingtechniqu 1 6.957497 0 0 8954 +asshap 1 6.957497 0 0 8955 +andocclud 1 6.957497 0 0 8956 +kasturi 1 6.957497 0 0 8957 +schunck 1 6.957497 0 0 8958 +readingsfrom 1 6.957497 0 0 8959 +batchessupplementari 1 6.957497 0 0 8960 +sourcesonlin 1 6.957497 0 0 8961 +informationmost 1 6.957497 0 0 8962 +urlhttp 1 6.957497 0 0 8963 +byfirst 1 6.957497 0 0 8964 +crop 1 6.957497 0 0 8965 +theintens 1 6.957497 0 0 8966 +thewindow 1 6.957497 0 0 8967 +colorif 1 6.957497 0 0 8968 +grayscal 1 6.957497 0 0 8969 +transformationsav 1 6.957497 0 0 8970 +andput 1 6.957497 0 0 8971 +whereth 1 6.957497 0 0 8972 +whatintens 1 6.957497 0 0 8973 +qualityof 1 6.957497 0 0 8974 +ownweb 1 6.957497 0 0 8975 +infin 1 6.957497 0 0 8976 +chessboard 1 6.957497 0 0 8977 +vconvert 1 6.957497 0 0 8978 +clean 1 6.957497 0 0 8979 +repn 1 6.957497 0 0 8980 +component_interp 1 6.957497 0 0 8981 +low_threshold 1 6.957497 0 0 8982 +high_threshold 1 6.957497 0 0 8983 +vlink 1 6.957497 0 0 8984 +vsegedg 1 6.957497 0 0 8985 +laplacian 1 6.957497 0 0 8986 +kass 1 6.957497 0 0 8987 +witkin 1 6.957497 0 0 8988 +terzopoulo 1 6.957497 0 0 8989 +curvatur 1 6.957497 0 0 8990 +laserprint 1 6.957497 0 0 8991 +netpbm 1 6.957497 0 0 8992 +pbmplu 1 6.957497 0 0 8993 +wandel 1 6.957497 0 0 8994 +allianc 1 6.957497 0 0 8995 +panoramix 1 6.957497 0 0 8996 +decfac 1 6.957497 0 0 8997 +synthet 1 6.957497 0 0 8998 +dzimm 1 6.957497 0 0 8999 +zimmermannemail 1 6.957497 0 0 9000 +educlass 1 6.957497 0 0 9001 +nolandoffic 1 6.957497 0 0 9002 +announcementsprogram 1 6.957497 0 0 9003 +handoutsprogramsexam 1 6.957497 0 0 9004 +quizzeslectur 1 6.957497 0 0 9005 +notesgreg 1 6.957497 0 0 9006 +guidegrad 1 6.957497 0 0 9007 +quizzesprogramsexam 1 6.957497 0 0 9008 +policytext 1 6.957497 0 0 9009 +zimmermann 1 6.957497 0 0 9010 +flowsspr 1 6.957497 0 0 9011 +ravindra 1 6.957497 0 0 9012 +magnanti 1 6.957497 0 0 9013 +orlin 1 6.957497 0 0 9014 +chvatal 1 6.957497 0 0 9015 +simplex 1 6.957497 0 0 9016 +alter 1 6.957497 0 0 9017 +hopkinsunivers 1 6.957497 0 0 9018 +duff 1 6.957497 0 0 9019 +erisman 1 6.957497 0 0 9020 +reid 1 6.957497 0 0 9021 +halmo 1 6.957497 0 0 9022 +kunchithapadam 1 6.957497 0 0 9023 +kunchithapadamemail 1 6.957497 0 0 9024 +edugener 1 6.957497 0 0 9025 +consultantssyllabuscours 1 6.957497 0 0 9026 +difficultywork 1 6.957497 0 0 9027 +homenewsstartup 1 6.957497 0 0 9028 +informationclass 1 6.957497 0 0 9029 +noteshomeworkexam 1 6.957497 0 0 9030 +quizzesstyl 1 6.957497 0 0 9031 +guideemail 1 6.957497 0 0 9032 +textproblem 1 6.957497 0 0 9033 +porgrammingwalt 1 6.957497 0 0 9034 +tribbl 1 6.957497 0 0 9035 +randomintinrang 1 6.957497 0 0 9036 +uppercas 1 6.957497 0 0 9037 +overwrit 1 6.957497 0 0 9038 +discrep 1 6.957497 0 0 9039 +solutionscours 1 6.957497 0 0 9040 +vleck 1 6.957497 0 0 9041 +guidelast 1 6.957497 0 0 9042 +sectionsc 1 6.957497 0 0 9043 +viewgraph 1 6.957497 0 0 9044 +bodnersect 1 6.957497 0 0 9045 +kunenoffic 1 6.957497 0 0 9046 +buildingtelephon 1 6.957497 0 0 9047 +thirdexam 1 6.957497 0 0 9048 +thedai 1 6.957497 0 0 9049 +usefulto 1 6.957497 0 0 9050 +lecturesand 1 6.957497 0 0 9051 +manypaperback 1 6.957497 0 0 9052 +lispcraft 1 6.957497 0 0 9053 +wilenski 1 6.957497 0 0 9054 +norvig 1 6.957497 0 0 9055 +essentiallli 1 6.957497 0 0 9056 +alpha_beta 1 6.957497 0 0 9057 +astar 1 6.957497 0 0 9058 +hilloffic 1 6.957497 0 0 9059 +statemail 1 6.957497 0 0 9060 +shenoffic 1 6.957497 0 0 9061 +mshen 1 6.957497 0 0 9062 +miscellaneawhat 1 6.957497 0 0 9063 +talksread 1 6.957497 0 0 9064 +solutionproject 1 6.957497 0 0 9065 +noonmiscellanea 1 6.957497 0 0 9066 +dice 1 6.957497 0 0 9067 +hangman 1 6.957497 0 0 9068 +melskithes 1 6.957497 0 0 9069 +uncomfort 1 6.957497 0 0 9070 +wesleypublish 1 6.957497 0 0 9071 +usingborland 1 6.957497 0 0 9072 +bankaccount 1 6.957497 0 0 9073 +withprompt 1 6.957497 0 0 9074 +psychologyinstructor 1 6.957497 0 0 9075 +announcementsthi 1 6.957497 0 0 9076 +scheduledfor 1 6.957497 0 0 9077 +haseveryth 1 6.957497 0 0 9078 +isaccur 1 6.957497 0 0 9079 +withinform 1 6.957497 0 0 9080 +linksar 1 6.957497 0 0 9081 +onfridai 1 6.957497 0 0 9082 +policyclass 1 6.957497 0 0 9083 +minmax 1 6.957497 0 0 9084 +findth 1 6.957497 0 0 9085 +enteredfrom 1 6.957497 0 0 9086 +stdin 1 6.957497 0 0 9087 +formlett 1 6.957497 0 0 9088 +theopen_fil 1 6.957497 0 0 9089 +hoax 1 6.957497 0 0 9090 +andprofession 1 6.957497 0 0 9091 +conductassign 1 6.957497 0 0 9092 +questionar 1 6.957497 0 0 9093 +classc 1 6.957497 0 0 9094 +programmingspr 1 6.957497 0 0 9095 +nothingeverydai 1 6.957497 0 0 9096 +pagescommon 1 6.957497 0 0 9097 +programmingmistakesarch 1 6.957497 0 0 9098 +placeto 1 6.957497 0 0 9099 +announcedcours 1 6.957497 0 0 9100 +andborland 1 6.957497 0 0 9101 +academicmisconduct 1 6.957497 0 0 9102 +anyform 1 6.957497 0 0 9103 +bigtodd 1 6.957497 0 0 9104 +thielwendi 1 6.957497 0 0 9105 +staatsabout 1 6.957497 0 0 9106 +msteel 1 6.957497 0 0 9107 +steeleemail 1 6.957497 0 0 9108 +buildingoffic 1 6.957497 0 0 9109 +soffic 1 6.957497 0 0 9110 +announcementsi 1 6.957497 0 0 9111 +thenew 1 6.957497 0 0 9112 +informationmidterm 1 6.957497 0 0 9113 +pastfew 1 6.957497 0 0 9114 +gloss 1 6.957497 0 0 9115 +makefulli 1 6.957497 0 0 9116 +objectivesabout 1 6.957497 0 0 9117 +consultantscours 1 6.957497 0 0 9118 +assignmentsnot 1 6.957497 0 0 9119 +handoutsprogram 1 6.957497 0 0 9120 +assignmentsexam 1 6.957497 0 0 9121 +quizzessom 1 6.957497 0 0 9122 +examplespolici 1 6.957497 0 0 9123 +policyus 1 6.957497 0 0 9124 +pagesintroduct 1 6.957497 0 0 9125 +windowsintroduct 1 6.957497 0 0 9126 +styleguid 1 6.957497 0 0 9127 +codetextproblem 1 6.957497 0 0 9128 +sessionalgebra 1 6.957497 0 0 9129 +prockemail 1 6.957497 0 0 9130 +thgrader 1 6.957497 0 0 9131 +haihong 1 6.957497 0 0 9132 +wangemail 1 6.957497 0 0 9133 +mtwrannounc 1 6.957497 0 0 9134 +gotton 1 6.957497 0 0 9135 +perus 1 6.957497 0 0 9136 +assing 1 6.957497 0 0 9137 +misconductcours 1 6.957497 0 0 9138 +simtel 1 6.957497 0 0 9139 +wildcard 1 6.957497 0 0 9140 +filesviru 1 6.957497 0 0 9141 +faqfun 1 6.957497 0 0 9142 +mapth 1 6.957497 0 0 9143 +comicshumor 1 6.957497 0 0 9144 +abort 1 6.957497 0 0 9145 +retri 1 6.957497 0 0 9146 +usersfin 1 6.957497 0 0 9147 +weeklab 1 6.957497 0 0 9148 +jokesget 1 6.957497 0 0 9149 +tryingsom 1 6.957497 0 0 9150 +somecompress 1 6.957497 0 0 9151 +unpack 1 6.957497 0 0 9152 +reviewsom 1 6.957497 0 0 9153 +minclud 1 6.957497 0 0 9154 +infocompress 1 6.957497 0 0 9155 +infofavorit 1 6.957497 0 0 9156 +russ 1 6.957497 0 0 9157 +manningemail 1 6.957497 0 0 9158 +rman 1 6.957497 0 0 9159 +scienceoffic 1 6.957497 0 0 9160 +rotc 1 6.957497 0 0 9161 +textold 1 6.957497 0 0 9162 +thefal 1 6.957497 0 0 9163 +assumedprerequisit 1 6.957497 0 0 9164 +rumelhart 1 6.957497 0 0 9165 +backprop 1 6.957497 0 0 9166 +cobweb 1 6.957497 0 0 9167 +austrian 1 6.957497 0 0 9168 +chunk 1 6.957497 0 0 9169 +laird 1 6.957497 0 0 9170 +rosenbloom 1 6.957497 0 0 9171 +newel 1 6.957497 0 0 9172 +dietterich 1 6.957497 0 0 9173 +zipser 1 6.957497 0 0 9174 +lenat 1 6.957497 0 0 9175 +kibler 1 6.957497 0 0 9176 +kbann 1 6.957497 0 0 9177 +brr 1 6.957497 0 0 9178 +ineedagoodicon 1 6.957497 0 0 9179 +nip 1 6.957497 0 0 9180 +premier 1 6.957497 0 0 9181 +shavlikshavlik 1 6.957497 0 0 9182 +inroom 1 6.957497 0 0 9183 +examtogeth 1 6.957497 0 0 9184 +bedetermin 1 6.957497 0 0 9185 +inconsider 1 6.957497 0 0 9186 +sciencestextther 1 6.957497 0 0 9187 +papersa 1 6.957497 0 0 9188 +thoseof 1 6.957497 0 0 9189 +lessout 1 6.957497 0 0 9190 +projecty 1 6.957497 0 0 9191 +implementationsof 1 6.957497 0 0 9192 +unvalid 1 6.957497 0 0 9193 +ashort 1 6.957497 0 0 9194 +presentationabout 1 6.957497 0 0 9195 +presentationsher 1 6.957497 0 0 9196 +presen 1 6.957497 0 0 9197 +manyan 1 6.957497 0 0 9198 +stubb 1 6.957497 0 0 9199 +bigg 1 6.957497 0 0 9200 +gunawan 1 6.957497 0 0 9201 +agu 1 6.957497 0 0 9202 +qingmin 1 6.957497 0 0 9203 +larsen 1 6.957497 0 0 9204 +conroi 1 6.957497 0 0 9205 +fritz 1 6.957497 0 0 9206 +jordan 1 6.957497 0 0 9207 +yanm 1 6.957497 0 0 9208 +xinyu 1 6.957497 0 0 9209 +munson 1 6.957497 0 0 9210 +wenjun 1 6.957497 0 0 9211 +xinyi 1 6.957497 0 0 9212 +yufei 1 6.957497 0 0 9213 +zeyu 1 6.957497 0 0 9214 +gopal 1 6.957497 0 0 9215 +leesolomon 1 6.957497 0 0 9216 +gradesgo 1 6.957497 0 0 9217 +exce 1 6.957497 0 0 9218 +bewar 1 6.957497 0 0 9219 +outsidehallwai 1 6.957497 0 0 9220 +homepagec 1 6.957497 0 0 9221 +responsibilityto 1 6.957497 0 0 9222 +informationinstructor 1 6.957497 0 0 9223 +munsonemail 1 6.957497 0 0 9224 +appointmentsect 1 6.957497 0 0 9225 +savitchclass 1 6.957497 0 0 9226 +informationexpectationssyllabusexam 1 6.957497 0 0 9227 +schedule 1 6.957497 0 0 9228 +mailgradingl 1 6.957497 0 0 9229 +assignmentsextra 1 6.957497 0 0 9230 +creditpoliciesconsult 1 6.957497 0 0 9231 +responsibilitiesacadem 1 6.957497 0 0 9232 +misconductoth 1 6.957497 0 0 9233 +informationdaili 1 6.957497 0 0 9234 +classoth 1 6.957497 0 0 9235 +resourcesc 1 6.957497 0 0 9236 +homepagetmunson 1 6.957497 0 0 9237 +turnidgeoffic 1 6.957497 0 0 9238 +tbalab 1 6.957497 0 0 9239 +tbaannouncementsclass 1 6.957497 0 0 9240 +classa 1 6.957497 0 0 9241 +byother 1 6.957497 0 0 9242 +gregorysharp 1 6.957497 0 0 9243 +staf 1 6.957497 0 0 9244 +policyassign 1 6.957497 0 0 9245 +princeoffic 1 6.957497 0 0 9246 +miscellaneousnew 1 6.957497 0 0 9247 +soln 1 6.957497 0 0 9248 +pmread 1 6.957497 0 0 9249 +quadric 1 6.957497 0 0 9250 +swept 1 6.957497 0 0 9251 +tensor 1 6.957497 0 0 9252 +tessel 1 6.957497 0 0 9253 +gourand 1 6.957497 0 0 9254 +vernier 1 6.957497 0 0 9255 +acuiti 1 6.957497 0 0 9256 +mispercept 1 6.957497 0 0 9257 +advect 1 6.957497 0 0 9258 +multiparamet 1 6.957497 0 0 9259 +educationlaw 1 6.957497 0 0 9260 +mandat 1 6.957497 0 0 9261 +intendingto 1 6.957497 0 0 9262 +requestedto 1 6.957497 0 0 9263 +jmccune 1 6.957497 0 0 9264 +csrelev 1 6.957497 0 0 9265 +universityrel 1 6.957497 0 0 9266 +exercisesthi 1 6.957497 0 0 9267 +universityundergradu 1 6.957497 0 0 9268 +dcomput 1 6.957497 0 0 9269 +sigucc 1 6.957497 0 0 9270 +basededuc 1 6.957497 0 0 9271 +areinclud 1 6.957497 0 0 9272 +aboutc 1 6.957497 0 0 9273 +semesteraccess 1 6.957497 0 0 9274 +semestereach 1 6.957497 1 0 9275 +anddocu 1 6.957497 1 0 9276 +metabal 1 6.957497 1 0 9277 +arcuri 1 6.957497 1 0 9278 +benton 1 6.957497 1 0 9279 +interdepend 1 6.957497 1 0 9280 +diffus 1 6.957497 1 0 9281 +pollut 1 6.957497 1 0 9282 +modelsfu 1 6.957497 1 0 9283 +antialias 1 6.957497 1 0 9284 +vermach 1 6.957497 1 0 9285 +hsun 1 6.957497 1 0 9286 +sandia 1 6.957497 0 0 9287 +livermor 1 6.957497 0 0 9288 +patti 1 6.957497 0 0 9289 +houghpatti 1 6.957497 0 0 9290 +frankh 1 6.957497 0 0 9291 +nicktrefethen 1 6.957497 0 0 9292 +schatz 1 6.957497 0 0 9293 +optimizationi 1 6.957497 0 0 9294 +meza 1 6.957497 0 0 9295 +nationallaboratori 1 6.957497 0 0 9296 +ofweight 1 6.957497 0 0 9297 +baggett 1 6.957497 0 0 9298 +turbul 1 6.957497 0 0 9299 +baggettjeff 1 6.957497 0 0 9300 +hydrodynam 1 6.957497 0 0 9301 +blend 1 6.957497 0 0 9302 +iwould 1 6.957497 0 0 9303 +abscissa 1 6.957497 0 0 9304 +andphillip 1 6.957497 0 0 9305 +subcrit 1 6.957497 0 0 9306 +libraryncstrl 1 6.957497 0 0 9307 +ancestr 1 6.957497 0 0 9308 +internationalcollect 1 6.957497 0 0 9309 +departmentsand 1 6.957497 0 0 9310 +availablefor 1 6.957497 0 0 9311 +eduat 1 6.957497 0 0 9312 +ncstrlcollect 1 6.957497 0 0 9313 +serversoper 1 6.957497 0 0 9314 +participatinginstitut 1 6.957497 0 0 9315 +ncstrlpress 1 6.957497 0 0 9316 +theparticip 1 6.957497 0 0 9317 +moreread 1 6.957497 0 0 9318 +forinstitut 1 6.957497 0 0 9319 +informationfind 1 6.957497 0 0 9320 +snew 1 6.957497 0 0 9321 +totech 1 6.957497 0 0 9322 +instituteabout 1 6.957497 0 0 9323 +researchersat 1 6.957497 0 0 9324 +searchal 1 6.957497 0 0 9325 +reportssearch 1 6.957497 0 0 9326 +ipic 1 6.957497 0 0 9327 +itisingapor 1 6.957497 0 0 9328 +altavistaforum 1 6.957497 0 0 9329 +institutejim 1 6.957497 0 0 9330 +davisxerox 1 6.957497 0 0 9331 +corporationphd 1 6.957497 0 0 9332 +improvecommun 1 6.957497 0 0 9333 +andcont 1 6.957497 0 0 9334 +reformat 1 6.957497 0 0 9335 +inhypertext 1 6.957497 0 0 9336 +thecstr 1 6.957497 0 0 9337 +anarpa 1 6.957497 0 0 9338 +moreeasili 1 6.957497 0 0 9339 +electronicsystem 1 6.957497 0 0 9340 +ofor 1 6.957497 0 0 9341 +memoryinclud 1 6.957497 0 0 9342 +sscreenplai 1 6.957497 0 0 9343 +producedth 1 6.957497 0 0 9344 +andjustif 1 6.957497 0 0 9345 +developingcorpor 1 6.957497 0 0 9346 +sharedannot 1 6.957497 0 0 9347 +howpeopl 1 6.957497 0 0 9348 +inelectron 1 6.957497 0 0 9349 +prototypeimplement 1 6.957497 0 0 9350 +shareddocu 1 6.957497 0 0 9351 +nnotat 1 6.957497 0 0 9352 +berequest 1 6.957497 0 0 9353 +orcorrect 1 6.957497 0 0 9354 +aus 1 6.957497 0 0 9355 +willfind 1 6.957497 0 0 9356 +whetherstud 1 6.957497 0 0 9357 +usefulmean 1 6.957497 0 0 9358 +designof 1 6.957497 0 0 9359 +proxi 1 6.957497 0 0 9360 +reliablycarri 1 6.957497 0 0 9361 +toeither 1 6.957497 0 0 9362 +alsopap 1 6.957497 0 0 9363 +publicatiion 1 6.957497 0 0 9364 +thedrimi 1 6.957497 0 0 9365 +meprofession 1 6.957497 0 0 9366 +historythi 1 6.957497 0 0 9367 +improvisationi 1 6.957497 0 0 9368 +resumeno 1 6.957497 0 0 9369 +likeit 1 6.957497 0 0 9370 +searchingmetacrawlerbi 1 6.957497 0 0 9371 +lauckhartand 1 6.957497 0 0 9372 +etzioniif 1 6.957497 0 0 9373 +wordssort 1 6.957497 0 0 9374 +locationcontrol 1 6.957497 0 0 9375 +problemswebmast 1 6.957497 0 0 9376 +comcopyright 1 6.957497 0 0 9377 +lauckhart 1 6.957497 0 0 9378 +jogger 1 6.957497 0 0 9379 +decentr 1 6.957497 0 0 9380 +lumelskyprofessormechan 1 6.957497 0 0 9381 +workshopconnamoran 1 6.957497 0 0 9382 +ierland 1 6.957497 0 0 9383 +researchera 1 6.957497 0 0 9384 +halldept 1 6.957497 0 0 9385 +thehorusand 1 6.957497 0 0 9386 +bandwith 1 6.957497 0 0 9387 +horuswith 1 6.957497 0 0 9388 +fallen 1 6.957497 0 0 9389 +latencyfor 1 6.957497 0 0 9390 +protocolsar 1 6.957497 0 0 9391 +structureand 1 6.957497 0 0 9392 +guarant 1 6.957497 0 0 9393 +acur 1 6.957497 0 0 9394 +aglob 1 6.957497 0 0 9395 +supportfailur 1 6.957497 0 0 9396 +suspis 1 6.957497 0 0 9397 +workwith 1 6.957497 0 0 9398 +middlewar 1 6.957497 0 0 9399 +brainchild 1 6.957497 0 0 9400 +andken 1 6.957497 0 0 9401 +withthorsten 1 6.957497 0 0 9402 +horusexperi 1 6.957497 0 0 9403 +lectureson 1 6.957497 0 0 9404 +virtuallysynchron 1 6.957497 0 0 9405 +princpl 1 6.957497 0 0 9406 +hpc 1 6.957497 0 0 9407 +kati 1 6.957497 0 0 9408 +osiri 1 6.957497 0 0 9409 +egyptian 1 6.957497 0 0 9410 +god 1 6.957497 0 0 9411 +rejoic 1 6.957497 0 0 9412 +groupcommun 1 6.957497 0 0 9413 +triumphant 1 6.957497 0 0 9414 +ofisi 1 6.957497 0 0 9415 +heir 1 6.957497 0 0 9416 +appealedstrongli 1 6.957497 0 0 9417 +becausein 1 6.957497 0 0 9418 +possess 1 6.957497 0 0 9419 +renew 1 6.957497 0 0 9420 +movementa 1 6.957497 0 0 9421 +inact 1 6.957497 0 0 9422 +applicationsbas 1 6.957497 0 0 9423 +infault 1 6.957497 0 0 9424 +thatexploit 1 6.957497 0 0 9425 +theoveral 1 6.957497 0 0 9426 +applicationprotocol 1 6.957497 0 0 9427 +applicationrequir 1 6.957497 0 0 9428 +launch 1 6.957497 0 0 9429 +theisi 1 6.957497 0 0 9430 +robustdistribut 1 6.957497 0 0 9431 +unsuit 1 6.957497 0 0 9432 +asappl 1 6.957497 0 0 9433 +besidesth 1 6.957497 0 0 9434 +usedfor 1 6.957497 0 0 9435 +sametim 1 6.957497 0 0 9436 +lighter 1 6.957497 0 0 9437 +beus 1 6.957497 0 0 9438 +commericalright 1 6.957497 0 0 9439 +manyoth 1 6.957497 0 0 9440 +nofe 1 6.957497 0 0 9441 +ensemblewil 1 6.957497 0 0 9442 +groupwareappl 1 6.957497 0 0 9443 +differentclass 1 6.957497 0 0 9444 +onnext 1 6.957497 0 0 9445 +speedcommun 1 6.957497 0 0 9446 +systemsproject 1 6.957497 0 0 9447 +navtech 1 6.957497 0 0 9448 +stormcast 1 6.957497 0 0 9449 +tacomaproject 1 6.957497 0 0 9450 +thesepag 1 6.957497 0 0 9451 +begotten 1 6.957497 0 0 9452 +sorrow 1 6.957497 0 0 9453 +herhusband 1 6.957497 0 0 9454 +goddess 1 6.957497 0 0 9455 +distress 1 6.957497 0 0 9456 +equippedwith 1 6.957497 0 0 9457 +utter 1 6.957497 0 0 9458 +mighthav 1 6.957497 0 0 9459 +secret 1 6.957497 0 0 9460 +suckl 1 6.957497 0 0 9461 +rear 1 6.957497 0 0 9462 +horusvisit 1 6.957497 0 0 9463 +abstractpag 1 6.957497 0 0 9464 +relatedto 1 6.957497 0 0 9465 +maffei 1 6.957497 0 0 9466 +flexiblegroup 1 6.957497 0 0 9467 +hyme 1 6.957497 0 0 9468 +osirisfrom 1 6.957497 0 0 9469 +papyru 1 6.957497 0 0 9470 +walli 1 6.957497 0 0 9471 +budg 1 6.957497 0 0 9472 +studiesin 1 6.957497 0 0 9473 +infoget 1 6.957497 0 0 9474 +contactswithin 1 6.957497 0 0 9475 +facultyfind 1 6.957497 0 0 9476 +ortheir 1 6.957497 0 0 9477 +researchcheck 1 6.957497 0 0 9478 +aboutour 1 6.957497 0 0 9479 +publicationsfind 1 6.957497 0 0 9480 +researcherseith 1 6.957497 0 0 9481 +degreeslook 1 6.957497 0 0 9482 +orundergradu 1 6.957497 0 0 9483 +academicsrefer 1 6.957497 0 0 9484 +webfor 1 6.957497 0 0 9485 +generalcoursedescript 1 6.957497 0 0 9486 +peopleget 1 6.957497 0 0 9487 +directorylist 1 6.957497 0 0 9488 +activitiesfind 1 6.957497 0 0 9489 +theassoci 1 6.957497 0 0 9490 +excellenthockei 1 6.957497 0 0 9491 +serverscheck 1 6.957497 0 0 9492 +gopherserv 1 6.957497 0 0 9493 +ftpserver 1 6.957497 0 0 9494 +sitesquest 1 6.957497 0 0 9495 +informationpres 1 6.957497 0 0 9496 +syosset 1 6.957497 0 0 9497 +californialockhe 1 6.957497 0 0 9498 +yorkaltera 1 6.957497 0 0 9499 +californiafun 1 6.957497 0 0 9500 +domainvth 1 6.957497 0 0 9501 +siteoth 1 6.957497 0 0 9502 +worldcareermosaictop 1 6.957497 0 0 9503 +kmai 1 6.957497 0 0 9504 +encyclopedia 1 6.957497 0 0 9505 +bethesda 1 6.957497 0 0 9506 +columbu 1 6.957497 0 0 9507 +saltongerard 1 6.957497 0 0 9508 +saltonprofessorg 1 6.957497 0 0 9509 +cheapli 1 6.957497 0 0 9510 +funk 1 6.957497 0 0 9511 +wagnal 1 6.957497 0 0 9512 +committeeprofession 1 6.957497 0 0 9513 +seventeenth 1 6.957497 0 0 9514 +darmstadt 1 6.957497 0 0 9515 +lecturesautomat 1 6.957497 0 0 9516 +konstanz 1 6.957497 0 0 9517 +asi 1 6.957497 0 0 9518 +publicationsapproach 1 6.957497 0 0 9519 +passag 1 6.957497 0 0 9520 +nonlinearli 1 6.957497 0 0 9521 +yui 1 6.957497 0 0 9522 +liyui 1 6.957497 0 0 9523 +liresearch 1 6.957497 0 0 9524 +associateyui 1 6.957497 0 0 9525 +lecturesan 1 6.957497 0 0 9526 +landau 1 6.957497 0 0 9527 +zippelrichard 1 6.957497 0 0 9528 +zippelsenior 1 6.957497 0 0 9529 +associaterz 1 6.957497 0 0 9530 +computationlecturesalgebra 1 6.957497 0 0 9531 +publicationseffect 1 6.957497 0 0 9532 +birmankenneth 1 6.957497 0 0 9533 +birmanprofessorphd 1 6.957497 0 0 9534 +greenbergdonald 1 6.957497 0 0 9535 +greenberg 1 6.957497 0 0 9536 +schurman 1 6.957497 0 0 9537 +perceptu 1 6.957497 0 0 9538 +activitiesdirector 1 6.957497 0 0 9539 +visualizationprofession 1 6.957497 0 0 9540 +acmreturn 1 6.957497 0 0 9541 +eatc 1 6.957497 0 0 9542 +juri 1 6.957497 0 0 9543 +hartmanisjuri 1 6.957497 0 0 9544 +hartmani 1 6.957497 0 0 9545 +strateg 1 6.957497 0 0 9546 +representativeschair 1 6.957497 0 0 9547 +committeehonorsacm 1 6.957497 0 0 9548 +stearn 1 6.957497 0 0 9549 +latvian 1 6.957497 0 0 9550 +charter 1 6.957497 0 0 9551 +presseditori 1 6.957497 0 0 9552 +sciencegoedel 1 6.957497 0 0 9553 +awardshonorari 1 6.957497 0 0 9554 +dortmund 1 6.957497 0 0 9555 +lecturessom 1 6.957497 0 0 9556 +benni 1 6.957497 0 0 9557 +chor 1 6.957497 0 0 9558 +od 1 6.957497 0 0 9559 +goldreich 1 6.957497 0 0 9560 +hastad 1 6.957497 0 0 9561 +desh 1 6.957497 0 0 9562 +ranjan 1 6.957497 0 0 9563 +pankaj 1 6.957497 0 0 9564 +rohatgi 1 6.957497 0 0 9565 +kolmogorov 1 6.957497 0 0 9566 +hopcroftjohn 1 6.957497 0 0 9567 +hopcroftjoseph 1 6.957497 0 0 9568 +silbert 1 6.957497 0 0 9569 +engineeringprofessor 1 6.957497 0 0 9570 +sciencephd 1 6.957497 0 0 9571 +overse 1 6.957497 0 0 9572 +applicationsmemb 1 6.957497 0 0 9573 +boardmemb 1 6.957497 0 0 9574 +forcememb 1 6.957497 0 0 9575 +machinerychairman 1 6.957497 0 0 9576 +trusteesmemb 1 6.957497 0 0 9577 +lucil 1 6.957497 0 0 9578 +foundationmemb 1 6.957497 0 0 9579 +sloan 1 6.957497 0 0 9580 +committeeadvisori 1 6.957497 0 0 9581 +analysiseditor 1 6.957497 0 0 9582 +geometryassoci 1 6.957497 0 0 9583 +sciencesreturn 1 6.957497 0 0 9584 +pingalikeshav 1 6.957497 0 0 9585 +pingaliassoci 1 6.957497 0 0 9586 +radic 1 6.957497 0 0 9587 +sparsiti 1 6.957497 0 0 9588 +krylov 1 6.957497 0 0 9589 +petsc 1 6.957497 0 0 9590 +activitiespanel 1 6.957497 0 0 9591 +ballist 1 6.957497 0 0 9592 +odyssei 1 6.957497 0 0 9593 +computereditori 1 6.957497 0 0 9594 +awardsn 1 6.957497 0 0 9595 +lecturesfast 1 6.957497 0 0 9596 +publicationssolv 1 6.957497 0 0 9597 +gianfranco 1 6.957497 0 0 9598 +bilardi 1 6.957497 0 0 9599 +trom 1 6.957497 0 0 9600 +marktoberdorf 1 6.957497 0 0 9601 +lubeck 1 6.957497 0 0 9602 +johansen 1 6.957497 0 0 9603 +engineeringeditor 1 6.957497 0 0 9604 +schneiderfr 1 6.957497 0 0 9605 +assert 1 6.957497 0 0 9606 +undefin 1 6.957497 0 0 9607 +bressoud 1 6.957497 0 0 9608 +instantli 1 6.957497 0 0 9609 +freed 1 6.957497 0 0 9610 +roam 1 6.957497 0 0 9611 +activitiessabbat 1 6.957497 0 0 9612 +computingeditor 1 6.957497 0 0 9613 +letterseditor 1 6.957497 0 0 9614 +systemseditor 1 6.957497 0 0 9615 +surveysco 1 6.957497 0 0 9616 +verlagprogram 1 6.957497 0 0 9617 +constructionprogram 1 6.957497 0 0 9618 +resili 1 6.957497 0 0 9619 +applicationsprogram 1 6.957497 0 0 9620 +sixteenth 1 6.957497 0 0 9621 +symposiumprogram 1 6.957497 0 0 9622 +systemsst 1 6.957497 0 0 9623 +chissa 1 6.957497 0 0 9624 +technologymemb 1 6.957497 0 0 9625 +agencyreview 1 6.957497 0 0 9626 +leibniz 1 6.957497 0 0 9627 +universitymemb 1 6.957497 0 0 9628 +awardsfellow 1 6.957497 0 0 9629 +sciencefellow 1 6.957497 0 0 9630 +machinerylecturesproof 1 6.957497 0 0 9631 +afosr 1 6.957497 0 0 9632 +panelist 1 6.957497 0 0 9633 +publicationsreason 1 6.957497 0 0 9634 +icalp 1 6.957497 0 0 9635 +boll 1 6.957497 0 0 9636 +limor 1 6.957497 0 0 9637 +ultradepend 1 6.957497 0 0 9638 +dehn 1 6.957497 0 0 9639 +primu 1 6.957497 0 0 9640 +kroneck 1 6.957497 0 0 9641 +umea 1 6.957497 0 0 9642 +loancharl 1 6.957497 0 0 9643 +loanprofessorphd 1 6.957497 0 0 9644 +inhomogen 1 6.957497 0 0 9645 +committeedepart 1 6.957497 0 0 9646 +meetingfreshman 1 6.957497 0 0 9647 +analysismemb 1 6.957497 0 0 9648 +diprima 1 6.957497 0 0 9649 +lecturesappl 1 6.957497 0 0 9650 +linkop 1 6.957497 0 0 9651 +publicationsoptim 1 6.957497 0 0 9652 +ellerbroek 1 6.957497 0 0 9653 +plemmon 1 6.957497 0 0 9654 +catherin 1 6.957497 0 0 9655 +wagnercatherin 1 6.957497 0 0 9656 +wagnersenior 1 6.957497 0 0 9657 +lecturerphd 1 6.957497 0 0 9658 +fileserv 1 6.957497 0 0 9659 +playback 1 6.957497 0 0 9660 +decompress 1 6.957497 0 0 9661 +transcod 1 6.957497 0 0 9662 +ketan 1 6.957497 0 0 9663 +bsmith 1 6.957497 0 0 9664 +talksmisc 1 6.957497 0 0 9665 +linksresearch 1 6.957497 0 0 9666 +ourcomput 1 6.957497 0 0 9667 +commercialand 1 6.957497 0 0 9668 +aredesign 1 6.957497 0 0 9669 +premis 1 6.957497 0 0 9670 +infrastructurei 1 6.957497 0 0 9671 +andappl 1 6.957497 0 0 9672 +workingsystem 1 6.957497 0 0 9673 +zenodistribut 1 6.957497 0 0 9674 +anethernet 1 6.957497 0 0 9675 +serverof 1 6.957497 0 0 9676 +videostor 1 6.957497 0 0 9677 +severalserv 1 6.957497 0 0 9678 +effortdeliveri 1 6.957497 0 0 9679 +resourcereserv 1 6.957497 0 0 9680 +communicationinfrastructur 1 6.957497 0 0 9681 +forbandwidth 1 6.957497 0 0 9682 +networkenviron 1 6.957497 0 0 9683 +accessiblebi 1 6.957497 0 0 9684 +latterenviron 1 6.957497 0 0 9685 +datagram 1 6.957497 0 0 9686 +audioand 1 6.957497 0 0 9687 +metropolitan 1 6.957497 0 0 9688 +andwid 1 6.957497 0 0 9689 +todeliv 1 6.957497 0 0 9690 +compressedrepresent 1 6.957497 0 0 9691 +reducesth 1 6.957497 0 0 9692 +indicatesthat 1 6.957497 0 0 9693 +fasterthan 1 6.957497 0 0 9694 +currentlyextend 1 6.957497 0 0 9695 +onecompress 1 6.957497 0 0 9696 +compresseddomain 1 6.957497 0 0 9697 +simplifyexperiment 1 6.957497 0 0 9698 +calledrivl 1 6.957497 0 0 9699 +allowsvideo 1 6.957497 0 0 9700 +resolutionand 1 6.957497 0 0 9701 +whatpostscript 1 6.957497 0 0 9702 +resolutionindepend 1 6.957497 0 0 9703 +sameprogram 1 6.957497 0 0 9704 +whileedit 1 6.957497 0 0 9705 +qualityfinish 1 6.957497 0 0 9706 +bepreview 1 6.957497 0 0 9707 +dpiprint 1 6.957497 0 0 9708 +onvideo 1 6.957497 0 0 9709 +videous 1 6.957497 0 0 9710 +teachingat 1 6.957497 0 0 9711 +logan 1 6.957497 0 0 9712 +ontario 1 6.957497 0 0 9713 +annett 1 6.957497 0 0 9714 +hanna 1 6.957497 0 0 9715 +mmcn 1 6.957497 0 0 9716 +documentationth 1 6.957497 0 0 9717 +priceweb 1 6.957497 0 0 9718 +leeser 1 6.957497 0 0 9719 +eaton 1 6.957497 0 0 9720 +computationeditor 1 6.957497 0 0 9721 +betweencomput 1 6.957497 0 0 9722 +constablerobert 1 6.957497 0 0 9723 +constabledepart 1 6.957497 0 0 9724 +professorrc 1 6.957497 0 0 9725 +researchw 1 6.957497 0 0 9726 +providemechan 1 6.957497 0 0 9727 +implementedthre 1 6.957497 0 0 9728 +lispprogram 1 6.957497 0 0 9729 +sucha 1 6.957497 0 0 9730 +canexpress 1 6.957497 0 0 9731 +asmetalevel 1 6.957497 0 0 9732 +canevalu 1 6.957497 0 0 9733 +nuprli 1 6.957497 0 0 9734 +fomal 1 6.957497 0 0 9735 +iscal 1 6.957497 0 0 9736 +termeditor 1 6.957497 0 0 9737 +itsintern 1 6.957497 0 0 9738 +hedefinit 1 6.957497 0 0 9739 +inconstruct 1 6.957497 0 0 9740 +mechanismha 1 6.957497 0 0 9741 +rebuilt 1 6.957497 0 0 9742 +thework 1 6.957497 0 0 9743 +isan 1 6.957497 0 0 9744 +builtprincip 1 6.957497 0 0 9745 +internaldescript 1 6.957497 0 0 9746 +withmiriam 1 6.957497 0 0 9747 +davidgri 1 6.957497 0 0 9748 +richardzippel 1 6.957497 0 0 9749 +withless 1 6.957497 0 0 9750 +aagard 1 6.957497 0 0 9751 +thecorrect 1 6.957497 0 0 9752 +bedrocsystem 1 6.957497 0 0 9753 +widelyus 1 6.957497 0 0 9754 +efforttaught 1 6.957497 0 0 9755 +themann 1 6.957497 0 0 9756 +programmingprocess 1 6.957497 0 0 9757 +givn 1 6.957497 0 0 9758 +ofpolya 1 6.957497 0 0 9759 +tryingto 1 6.957497 0 0 9760 +thepolya 1 6.957497 0 0 9761 +conal 1 6.957497 0 0 9762 +mannion 1 6.957497 0 0 9763 +ofus 1 6.957497 0 0 9764 +discussingproblem 1 6.957497 0 0 9765 +ssymbol 1 6.957497 0 0 9766 +computingsoftwar 1 6.957497 0 0 9767 +isbuild 1 6.957497 0 0 9768 +presseditor 1 6.957497 0 0 9769 +pressgener 1 6.957497 0 0 9770 +licsprogram 1 6.957497 0 0 9771 +jumelageprogram 1 6.957497 0 0 9772 +softwarerefere 1 6.957497 0 0 9773 +nserc 1 6.957497 0 0 9774 +scienceunivers 1 6.957497 0 0 9775 +committeecomput 1 6.957497 0 0 9776 +committeeprovost 1 6.957497 0 0 9777 +mathematicslecturesform 1 6.957497 0 0 9778 +inria 1 6.957497 0 0 9779 +bengurion 1 6.957497 0 0 9780 +sheva 1 6.957497 0 0 9781 +aviv 1 6.957497 0 0 9782 +metaprogram 1 6.957497 0 0 9783 +engineeringworkshop 1 6.957497 0 0 9784 +publicationsform 1 6.957497 0 0 9785 +tendenc 1 6.957497 0 0 9786 +verju 1 6.957497 0 0 9787 +metalevel 1 6.957497 0 0 9788 +broi 1 6.957497 0 0 9789 +aflorenc 1 6.957497 0 0 9790 +professionalinterest 1 6.957497 0 0 9791 +academicsresearchworkinterest 1 6.957497 0 0 9792 +mewith 1 6.957497 0 0 9793 +kawazo 1 6.957497 0 0 9794 +algorithmsrandom 1 6.957497 0 0 9795 +consensusgo 1 6.957497 0 0 9796 +brazil 1 6.957497 0 0 9797 +constructionmarco 1 6.957497 0 0 9798 +dizzi 1 6.957497 0 0 9799 +nandonet 1 6.957497 0 0 9800 +sunworld 1 6.957497 0 0 9801 +javaworldcours 1 6.957497 0 0 9802 +bibliographyc 1 6.957497 0 0 9803 +reportalfr 1 6.957497 0 0 9804 +ahong 1 6.957497 0 0 9805 +pagevineet 1 6.957497 0 0 9806 +ahujam 1 6.957497 0 0 9807 +apt 1 6.957497 0 0 9808 +reportfal 1 6.957497 0 0 9809 +ching 1 6.957497 0 0 9810 +chinglan 1 6.957497 0 0 9811 +edumast 1 6.957497 0 0 9812 +beau 1 6.957497 0 0 9813 +seneca 1 6.957497 0 0 9814 +examplegraph 1 6.957497 0 0 9815 +alexei 1 6.957497 0 0 9816 +pagealexei 1 6.957497 0 0 9817 +vaysburdalexei 1 6.957497 0 0 9818 +andobject 1 6.957497 0 0 9819 +ecol 1 6.957497 0 0 9820 +polytechniqu 1 6.957497 0 0 9821 +federal 1 6.957497 0 0 9822 +cornellcornel 1 6.957497 0 0 9823 +directorycornel 1 6.957497 0 0 9824 +directorycours 1 6.957497 0 0 9825 +examscornel 1 6.957497 0 0 9826 +calendarcornel 1 6.957497 0 0 9827 +musicbailei 1 6.957497 0 0 9828 +concertscornel 1 6.957497 0 0 9829 +ithacaworld 1 6.957497 0 0 9830 +odessa 1 6.957497 0 0 9831 +odessaweb 1 6.957497 0 0 9832 +yama 1 6.957497 0 0 9833 +coursesvlsi 1 6.957497 0 0 9834 +downto 1 6.957497 0 0 9835 +imparti 1 6.957497 0 0 9836 +amithyamasanim 1 6.957497 0 0 9837 +yorki 1 6.957497 0 0 9838 +garcia 1 6.957497 0 0 9839 +mailstop 1 6.957497 0 0 9840 +ucup 1 6.957497 0 0 9841 +warburton 1 6.957497 0 0 9842 +comi 1 6.957497 0 0 9843 +currentlyemploi 1 6.957497 0 0 9844 +workingin 1 6.957497 0 0 9845 +javamedia 1 6.957497 0 0 9846 +groupeducationfal 1 6.957497 0 0 9847 +cssoftwar 1 6.957497 0 0 9848 +csspring 1 6.957497 0 0 9849 +csproject 1 6.957497 0 0 9850 +railroadsystem 1 6.957497 0 0 9851 +onyx 1 6.957497 0 0 9852 +openinventord 1 6.957497 0 0 9853 +through 1 6.957497 0 0 9854 +documentimag 1 6.957497 0 0 9855 +chipdevelop 1 6.957497 0 0 9856 +basicallycompress 1 6.957497 0 0 9857 +thisalgorithm 1 6.957497 0 0 9858 +cachecam 1 6.957497 0 0 9859 +inputstream 1 6.957497 0 0 9860 +capableof 1 6.957497 0 0 9861 +nowai 1 6.957497 0 0 9862 +rivlproposalpresentationand 1 6.957497 0 0 9863 +dthi 1 6.957497 0 0 9864 +itswritten 1 6.957497 0 0 9865 +parallelomania 1 6.957497 0 0 9866 +resumehtmlpostscript 1 6.957497 0 0 9867 +satyaprasad 1 6.957497 0 0 9868 +avinashgupta 1 6.957497 0 0 9869 +kartikh 1 6.957497 0 0 9870 +kapadia 1 6.957497 0 0 9871 +hrishikeshdixit 1 6.957497 0 0 9872 +vineetahuja 1 6.957497 0 0 9873 +apatel 1 6.957497 0 0 9874 +galleria 1 6.957497 0 0 9875 +chronologia 1 6.957497 0 0 9876 +universityresumedepart 1 6.957497 0 0 9877 +enrolledgradu 1 6.957497 0 0 9878 +canvasd 1 6.957497 0 0 9879 +conferencingmultimedia 1 6.957497 0 0 9880 +assignmentsproject 1 6.957497 0 0 9881 +netan 1 6.957497 0 0 9882 +skeen 1 6.957497 0 0 9883 +scienceworld 1 6.957497 0 0 9884 +multimediamaharaja 1 6.957497 0 0 9885 +sayajirao 1 6.957497 0 0 9886 +academicsfriend 1 6.957497 0 0 9887 +techoreli 1 6.957497 0 0 9888 +limitedjob 1 6.957497 0 0 9889 +profilelif 1 6.957497 0 0 9890 +relianc 1 6.957497 0 0 9891 +jhaveriashish 1 6.957497 0 0 9892 +jhaveridepart 1 6.957497 0 0 9893 +engineeringresumehtmlpost 1 6.957497 0 0 9894 +scriptcourseworkadvanc 1 6.957497 0 0 9895 +systemscsmultimedia 1 6.957497 0 0 9896 +systemscsengin 1 6.957497 0 0 9897 +networkscsprogram 1 6.957497 0 0 9898 +csashish 1 6.957497 0 0 9899 +jhaveri 1 6.957497 0 0 9900 +aswin 1 6.957497 0 0 9901 +skyacr 1 6.957497 0 0 9902 +systemmi 1 6.957497 0 0 9903 +annek 1 6.957497 0 0 9904 +deejay 1 6.957497 0 0 9905 +isdn 1 6.957497 0 0 9906 +hop 1 6.957497 0 0 9907 +nederlands 1 6.957497 0 0 9908 +clubi 1 6.957497 0 0 9909 +trap 1 6.957497 0 0 9910 +welcomeavinash 1 6.957497 0 0 9911 +guptam 1 6.957497 0 0 9912 +streetcambridg 1 6.957497 0 0 9913 +thecia 1 6.957497 0 0 9914 +presentationpent 1 6.957497 0 0 9915 +skillpent 1 6.957497 0 0 9916 +oppon 1 6.957497 0 0 9917 +gamedownload 1 6.957497 0 0 9918 +ipvimpl 1 6.957497 0 0 9919 +proposalprogress 1 6.957497 0 0 9920 +reportsam 1 6.957497 0 0 9921 +pageon 1 6.957497 0 0 9922 +internethytelnetth 1 6.957497 0 0 9923 +catalogeinet 1 6.957497 0 0 9924 +galaxyplanet 1 6.957497 0 0 9925 +pagejoel 1 6.957497 0 0 9926 +indexyahoo 1 6.957497 0 0 9927 +wwwwebcrawlerlycosmi 1 6.957497 0 0 9928 +barber 1 6.957497 0 0 9929 +bulli 1 6.957497 0 0 9930 +ponch 1 6.957497 0 0 9931 +inde 1 6.957497 0 0 9932 +bush 1 6.957497 0 0 9933 +fleshpooooooooooooooch 1 6.957497 0 0 9934 +inclin 1 6.957497 0 0 9935 +callm 1 6.957497 0 0 9936 +orpooch 1 6.957497 0 0 9937 +guppi 1 6.957497 0 0 9938 +mama 1 6.957497 0 0 9939 +phin 1 6.957497 0 0 9940 +attendedmontgomeri 1 6.957497 0 0 9941 +collegetown 1 6.957497 0 0 9942 +adjac 1 6.957497 0 0 9943 +sublet 1 6.957497 0 0 9944 +oncolleg 1 6.957497 0 0 9945 +radiu 1 6.957497 0 0 9946 +epsilon 1 6.957497 0 0 9947 +cayuga 1 6.957497 0 0 9948 +thefilt 1 6.957497 0 0 9949 +ofc 1 6.957497 0 0 9950 +intereststhi 1 6.957497 0 0 9951 +rivlan 1 6.957497 0 0 9952 +tracker 1 6.957497 0 0 9953 +rivli 1 6.957497 0 0 9954 +smpd 1 6.957497 0 0 9955 +generatorfor 1 6.957497 0 0 9956 +webar 1 6.957497 0 0 9957 +buddi 1 6.957497 0 0 9958 +resourceful 1 6.957497 0 0 9959 +pipe 1 6.957497 0 0 9960 +meanth 1 6.957497 0 0 9961 +comrad 1 6.957497 0 0 9962 +ofhi 1 6.957497 0 0 9963 +swirl 1 6.957497 0 0 9964 +nefari 1 6.957497 0 0 9965 +toilet 1 6.957497 0 0 9966 +mukhopadyai 1 6.957497 0 0 9967 +bonei 1 6.957497 0 0 9968 +magoo 1 6.957497 0 0 9969 +fletop 1 6.957497 0 0 9970 +bigro 1 6.957497 0 0 9971 +koster 1 6.957497 0 0 9972 +bot 1 6.957497 0 0 9973 +tffl 1 6.957497 0 0 9974 +pageuuencod 1 6.957497 0 0 9975 +pagetar 1 6.957497 0 0 9976 +zip 1 6.957497 0 0 9977 +downloadsgraphicsbarb 1 6.957497 0 0 9978 +gifponch 1 6.957497 0 0 9979 +htmlres_htmlres_curemmittemmitt 1 6.957497 0 0 9980 +delawar 1 6.957497 0 0 9981 +eduwhat 1 6.957497 0 0 9982 +musicor 1 6.957497 0 0 9983 +coollik 1 6.957497 0 0 9984 +indiawho 1 6.957497 0 0 9985 +hardpink 1 6.957497 0 0 9986 +floydfanat 1 6.957497 0 0 9987 +childhood 1 6.957497 0 0 9988 +livelast 1 6.957497 0 0 9989 +plum 1 6.957497 0 0 9990 +unwash 1 6.957497 0 0 9991 +attendedwoodstock 1 6.957497 0 0 9992 +onlinewoodstock 1 6.957497 0 0 9993 +woodstock 1 6.957497 0 0 9994 +thesocc 1 6.957497 0 0 9995 +worldcup 1 6.957497 0 0 9996 +butunfortun 1 6.957497 0 0 9997 +putsomerecip 1 6.957497 0 0 9998 +connoisseurof 1 6.957497 0 0 9999 +whiski 1 6.957497 0 0 10000 +malt 1 6.957497 0 0 10001 +cheer 1 6.957497 0 0 10002 +buttheadoth 1 6.957497 0 0 10003 +gopherand 1 6.957497 0 0 10004 +projectwhich 1 6.957497 0 0 10005 +acheiv 1 6.957497 0 0 10006 +passinglay 1 6.957497 0 0 10007 +thatshow 1 6.957497 0 0 10008 +saturateth 1 6.957497 0 0 10009 +fibr 1 6.957497 0 0 10010 +specificationfor 1 6.957497 0 0 10011 +processesboth 1 6.957497 0 0 10012 +abridgedvers 1 6.957497 0 0 10013 +jpop 1 6.957497 0 0 10014 +homepageben 1 6.957497 0 0 10015 +haogradu 1 6.957497 0 0 10016 +studentbhao 1 6.957497 0 0 10017 +flea 1 6.957497 0 0 10018 +taylorwhen 1 6.957497 0 0 10019 +itsgorg 1 6.957497 0 0 10020 +cornellwhat 1 6.957497 0 0 10021 +dissectionmagazin 1 6.957497 0 0 10022 +magazinea 1 6.957497 0 0 10023 +shoemak 1 6.957497 0 0 10024 +weblouvr 1 6.957497 0 0 10025 +xmorphia 1 6.957497 0 0 10026 +kaleidospac 1 6.957497 0 0 10027 +bonsai 1 6.957497 0 0 10028 +seiyuu 1 6.957497 0 0 10029 +archivenetwork 1 6.957497 0 0 10030 +edgelibrari 1 6.957497 0 0 10031 +infonih 1 6.957497 0 0 10032 +courseth 1 6.957497 0 0 10033 +guidecern 1 6.957497 0 0 10034 +bhao 1 6.957497 0 0 10035 +pageaastha 1 6.957497 0 0 10036 +bhardwajdepart 1 6.957497 0 0 10037 +ofengineeeringresumehtmlpost 1 6.957497 0 0 10038 +scriptcourseworkadvanceddatabas 1 6.957497 0 0 10039 +csmultimediasystem 1 6.957497 0 0 10040 +csengineeringcomput 1 6.957497 0 0 10041 +cssoftwareengin 1 6.957497 0 0 10042 +cscontact 1 6.957497 0 0 10043 +bhardwaj 1 6.957497 0 0 10044 +tommi 1 6.957497 0 0 10045 +feeder 1 6.957497 0 0 10046 +vibratori 1 6.957497 0 0 10047 +kinodynam 1 6.957497 0 0 10048 +xavier 1 6.957497 0 0 10049 +ourlab 1 6.957497 0 0 10050 +toulous 1 6.957497 0 0 10051 +icra 1 6.957497 0 0 10052 +provablygood 1 6.957497 0 0 10053 +couch 1 6.957497 0 0 10054 +donaldbruc 1 6.957497 0 0 10055 +donaldassoci 1 6.957497 0 0 10056 +professorbrd 1 6.957497 0 0 10057 +laboratorydan 1 6.957497 0 0 10058 +microactu 1 6.957497 0 0 10059 +arrayi 1 6.957497 0 0 10060 +squarecentemet 1 6.957497 0 0 10061 +sensoryfeedback 1 6.957497 0 0 10062 +buildself 1 6.957497 0 0 10063 +propel 1 6.957497 0 0 10064 +amybrigg 1 6.957497 0 0 10065 +surveil 1 6.957497 0 0 10066 +andintercept 1 6.957497 0 0 10067 +developedbi 1 6.957497 0 0 10068 +informationalon 1 6.957497 0 0 10069 +andlow 1 6.957497 0 0 10070 +memsand 1 6.957497 0 0 10071 +thealgorithm 1 6.957497 0 0 10072 +robustgeometr 1 6.957497 0 0 10073 +andimprov 1 6.957497 0 0 10074 +partsfeed 1 6.957497 0 0 10075 +partii 1 6.957497 0 0 10076 +robotswith 1 6.957497 0 0 10077 +forcartesian 1 6.957497 0 0 10078 +canni 1 6.957497 0 0 10079 +inpress 1 6.957497 0 0 10080 +supermodular 1 6.957497 0 0 10081 +andtheoret 1 6.957497 0 0 10082 +jetai 1 6.957497 0 0 10083 +firstquart 1 6.957497 0 0 10084 +inminim 1 6.957497 0 0 10085 +iser 1 6.957497 0 0 10086 +automon 1 6.957497 0 0 10087 +ofjapan 1 6.957497 0 0 10088 +iro 1 6.957497 0 0 10089 +sensorlessmanipul 1 6.957497 0 0 10090 +andautom 1 6.957497 0 0 10091 +ofrobot 1 6.957497 0 0 10092 +otherpubl 1 6.957497 0 0 10093 +dinesh 1 6.957497 0 0 10094 +aval 1 6.957497 0 0 10095 +indexobtain 1 6.957497 0 0 10096 +paperscopi 1 6.957497 0 0 10097 +teamof 1 6.957497 0 0 10098 +movefurnitur 1 6.957497 0 0 10099 +mobot 1 6.957497 0 0 10100 +loretta 1 6.957497 0 0 10101 +pompilio 1 6.957497 0 0 10102 +discoverychannel 1 6.957497 0 0 10103 +funa 1 6.957497 0 0 10104 +moreoth 1 6.957497 0 0 10105 +tallest 1 6.957497 0 0 10106 +darkest 1 6.957497 0 0 10107 +hollywood 1 6.957497 0 0 10108 +merian 1 6.957497 0 0 10109 +wrai 1 6.957497 0 0 10110 +cardi 1 6.957497 0 0 10111 +kenmor 1 6.957497 0 0 10112 +pronoun 1 6.957497 0 0 10113 +naturallanguag 1 6.957497 0 0 10114 +knowledgeacquisit 1 6.957497 0 0 10115 +riloff 1 6.957497 0 0 10116 +tenth 1 6.957497 0 0 10117 +erlbaumassoci 1 6.957497 0 0 10118 +lehnert 1 6.957497 0 0 10119 +cardieclair 1 6.957497 0 0 10120 +teachselect 1 6.957497 0 0 10121 +publicationsnlp 1 6.957497 0 0 10122 +amalgam 1 6.957497 0 0 10123 +westi 1 6.957497 0 0 10124 +interestsalthough 1 6.957497 0 0 10125 +subfield 1 6.957497 0 0 10126 +cognitivemodel 1 6.957497 0 0 10127 +forexplor 1 6.957497 0 0 10128 +tworel 1 6.957497 0 0 10129 +reliablyextract 1 6.957497 0 0 10130 +cstr 1 6.957497 0 0 10131 +kenmoreacquir 1 6.957497 0 0 10132 +tworeal 1 6.957497 0 0 10133 +andconcept 1 6.957497 0 0 10134 +anteced 1 6.957497 0 0 10135 +disambiguationtask 1 6.957497 0 0 10136 +learningcompon 1 6.957497 0 0 10137 +isembed 1 6.957497 0 0 10138 +inartifici 1 6.957497 0 0 10139 +understandingselect 1 6.957497 0 0 10140 +publicationsautom 1 6.957497 0 0 10141 +wermter 1 6.957497 0 0 10142 +scheler 1 6.957497 0 0 10143 +andsymbol 1 6.957497 0 0 10144 +tolearn 1 6.957497 0 0 10145 +conceptualsent 1 6.957497 0 0 10146 +cmpsci 1 6.957497 0 0 10147 +onconstrain 1 6.957497 0 0 10148 +plausibl 1 6.957497 0 0 10149 +linkscomput 1 6.957497 0 0 10150 +linguistics 1 6.957497 0 0 10151 +aclspeci 1 6.957497 0 0 10152 +learningmachin 1 6.957497 0 0 10153 +digestmachinelearn 1 6.957497 0 0 10154 +researchersmachin 1 6.957497 0 0 10155 +associatephd 1 6.957497 0 0 10156 +eduappletsy 1 6.957497 0 0 10157 +asnetscap 1 6.957497 0 0 10158 +avoronoi 1 6.957497 0 0 10159 +onpract 1 6.957497 0 0 10160 +includedplac 1 6.957497 0 0 10161 +thataris 1 6.957497 0 0 10162 +isspecifi 1 6.957497 0 0 10163 +ofphys 1 6.957497 0 0 10164 +techniquesar 1 6.957497 0 0 10165 +effectiveprogram 1 6.957497 0 0 10166 +myonlin 1 6.957497 0 0 10167 +reportscornel 1 6.957497 0 0 10168 +computerscienceth 1 6.957497 0 0 10169 +simlabprojectaddress 1 6.957497 0 0 10170 +chichao 1 6.957497 0 0 10171 +isthorsten 1 6.957497 0 0 10172 +addressesand 1 6.957497 0 0 10173 +overheterogen 1 6.957497 0 0 10174 +tham 1 6.957497 0 0 10175 +multimatlab 1 6.957497 0 0 10176 +newsbraziliansocc 1 6.957497 0 0 10177 +portugues 1 6.957497 0 0 10178 +andhomepagesoliv 1 6.957497 0 0 10179 +lubrasa 1 6.957497 0 0 10180 +luso 1 6.957497 0 0 10181 +brazilian 1 6.957497 0 0 10182 +associationu 1 6.957497 0 0 10183 +centerjorn 1 6.957497 0 0 10184 +brasilmi 1 6.957497 0 0 10185 +carstockmasterjayhawk 1 6.957497 0 0 10186 +basketballwww 1 6.957497 0 0 10187 +tennisserverback 1 6.957497 0 0 10188 +keong 1 6.957497 0 0 10189 +liau 1 6.957497 0 0 10190 +liauwelcom 1 6.957497 0 0 10191 +networksc 1 6.957497 0 0 10192 +systemsbaccalaur 1 6.957497 0 0 10193 +japanhomei 1 6.957497 0 0 10194 +colemanthoma 1 6.957497 0 0 10195 +colemancornel 1 6.957497 0 0 10196 +universityi 1 6.957497 0 0 10197 +professcp 1 6.957497 0 0 10198 +ccop 1 6.957497 0 0 10199 +broadfield 1 6.957497 0 0 10200 +programmi 1 6.957497 0 0 10201 +computationalmethod 1 6.957497 0 0 10202 +calculi 1 6.957497 0 0 10203 +crari 1 6.957497 0 0 10204 +crarycrari 1 6.957497 0 0 10205 +researchbroadli 1 6.957497 0 0 10206 +implementationand 1 6.957497 0 0 10207 +kmlwhich 1 6.957497 0 0 10208 +richworld 1 6.957497 0 0 10209 +newprogram 1 6.957497 0 0 10210 +aminterest 1 6.957497 0 0 10211 +deepen 1 6.957497 0 0 10212 +mitig 1 6.957497 0 0 10213 +modelallow 1 6.957497 0 0 10214 +allowsth 1 6.957497 0 0 10215 +andcorrect 1 6.957497 0 0 10216 +additionaloptim 1 6.957497 0 0 10217 +automatedreason 1 6.957497 0 0 10218 +ofrobert 1 6.957497 0 0 10219 +jasonhickei 1 6.957497 0 0 10220 +linksmark 1 6.957497 0 0 10221 +cansearch 1 6.957497 0 0 10222 +biblestudi 1 6.957497 0 0 10223 +thelord 1 6.957497 0 0 10224 +neighbor 1 6.957497 0 0 10225 +commandmentgreat 1 6.957497 0 0 10226 +multifront 1 6.957497 0 0 10227 +pothen 1 6.957497 0 0 10228 +chunguang 1 6.957497 0 0 10229 +processingfor 1 6.957497 0 0 10230 +cliqu 1 6.957497 0 0 10231 +sunchunguang 1 6.957497 0 0 10232 +sunphd 1 6.957497 0 0 10233 +ppcx 1 6.957497 0 0 10234 +pssl 1 6.957497 0 0 10235 +psspd 1 6.957497 0 0 10236 +systemsrec 1 6.957497 0 0 10237 +lecturesparallel 1 6.957497 0 0 10238 +coeur 1 6.957497 0 0 10239 +alen 1 6.957497 0 0 10240 +bailei 1 6.957497 0 0 10241 +bjorstad 1 6.957497 0 0 10242 +gilbert 1 6.957497 0 0 10243 +mascagni 1 6.957497 0 0 10244 +schreiber 1 6.957497 0 0 10245 +torczon 1 6.957497 0 0 10246 +choleskyfactor 1 6.957497 0 0 10247 +matriceson 1 6.957497 0 0 10248 +sinovec 1 6.957497 0 0 10249 +leuz 1 6.957497 0 0 10250 +petzold 1 6.957497 0 0 10251 +messina 1 6.957497 0 0 10252 +sorensen 1 6.957497 0 0 10253 +voigt 1 6.957497 0 0 10254 +structuresin 1 6.957497 0 0 10255 +csun 1 6.957497 0 0 10256 +relationshipsamong 1 6.957497 0 0 10257 +dcooper 1 6.957497 0 0 10258 +securityarchitectur 1 6.957497 0 0 10259 +horuswhich 1 6.957497 0 0 10260 +kerberosnetwork 1 6.957497 0 0 10261 +cryptograph 1 6.957497 0 0 10262 +toprovid 1 6.957497 0 0 10263 +originalimplement 1 6.957497 0 0 10264 +failuremodel 1 6.957497 0 0 10265 +anyprocess 1 6.957497 0 0 10266 +isposs 1 6.957497 0 0 10267 +weaker 1 6.957497 0 0 10268 +untrustedprocess 1 6.957497 0 0 10269 +clientsto 1 6.957497 0 0 10270 +horussecur 1 6.957497 0 0 10271 +keymanag 1 6.957497 0 0 10272 +impersonateanoth 1 6.957497 0 0 10273 +achieveth 1 6.957497 0 0 10274 +asclient 1 6.957497 0 0 10275 +inherentin 1 6.957497 0 0 10276 +contentsof 1 6.957497 0 0 10277 +hiddenwith 1 6.957497 0 0 10278 +outsidersfrom 1 6.957497 0 0 10279 +maintainingth 1 6.957497 0 0 10280 +unlink 1 6.957497 0 0 10281 +chaum 1 6.957497 0 0 10282 +severaloth 1 6.957497 0 0 10283 +staticnetwork 1 6.957497 0 0 10284 +mobilecommun 1 6.957497 0 0 10285 +themessag 1 6.957497 0 0 10286 +advisorken 1 6.957497 0 0 10287 +internaland 1 6.957497 0 0 10288 +apriv 1 6.957497 0 0 10289 +deidr 1 6.957497 0 0 10290 +pandora 1 6.957497 0 0 10291 +abodedan 1 6.957497 0 0 10292 +abodegreet 1 6.957497 0 0 10293 +humbl 1 6.957497 0 0 10294 +prithe 1 6.957497 0 0 10295 +teari 1 6.957497 0 0 10296 +weari 1 6.957497 0 0 10297 +thyselv 1 6.957497 0 0 10298 +abod 1 6.957497 0 0 10299 +emot 1 6.957497 0 0 10300 +simnet 1 6.957497 0 0 10301 +builder 1 6.957497 0 0 10302 +faiththei 1 6.957497 0 0 10303 +hardli 1 6.957497 0 0 10304 +ferro 1 6.957497 0 0 10305 +scorn 1 6.957497 0 0 10306 +bend 1 6.957497 0 0 10307 +tone 1 6.957497 0 0 10308 +unseen 1 6.957497 0 0 10309 +unheard 1 6.957497 0 0 10310 +untouch 1 6.957497 0 0 10311 +silenc 1 6.957497 0 0 10312 +yearn 1 6.957497 0 0 10313 +lordlovewarm 1 6.957497 0 0 10314 +friendship 1 6.957497 0 0 10315 +mindless 1 6.957497 0 0 10316 +infatu 1 6.957497 0 0 10317 +sensual 1 6.957497 0 0 10318 +sigh 1 6.957497 0 0 10319 +hopemyth 1 6.957497 0 0 10320 +beauteou 1 6.957497 0 0 10321 +demon 1 6.957497 0 0 10322 +astrai 1 6.957497 0 0 10323 +glimmer 1 6.957497 0 0 10324 +tread 1 6.957497 0 0 10325 +amidst 1 6.957497 0 0 10326 +thorn 1 6.957497 0 0 10327 +filthi 1 6.957497 0 0 10328 +miseri 1 6.957497 0 0 10329 +etern 1 6.957497 0 0 10330 +urlsdean 1 6.957497 0 0 10331 +interestcornel 1 6.957497 0 0 10332 +dimund 1 6.957497 0 0 10333 +librarysearch 1 6.957497 0 0 10334 +veronica 1 6.957497 0 0 10335 +faqsvari 1 6.957497 0 0 10336 +folkbook 1 6.957497 0 0 10337 +facilitiesaddress 1 6.957497 0 0 10338 +guis 1 6.957497 0 0 10339 +anadministr 1 6.957497 0 0 10340 +andworri 1 6.957497 0 0 10341 +spart 1 6.957497 0 0 10342 +thecorpor 1 6.957497 0 0 10343 +cnri 1 6.957497 0 0 10344 +technicalresearch 1 6.957497 0 0 10345 +theexist 1 6.957497 0 0 10346 +disseminationov 1 6.957497 0 0 10347 +atechn 1 6.957497 0 0 10348 +ondienst 1 6.957497 0 0 10349 +togethera 1 6.957497 0 0 10350 +url 1 6.957497 0 0 10351 +divakar 1 6.957497 0 0 10352 +pagedivakar 1 6.957497 0 0 10353 +viswanathdivakar 1 6.957497 0 0 10354 +isnumer 1 6.957497 0 0 10355 +diyu 1 6.957497 0 0 10356 +pagediyu 1 6.957497 0 0 10357 +daisi 1 6.957497 0 0 10358 +translatorsfal 1 6.957497 0 0 10359 +systemc 1 6.957497 0 0 10360 +systemsel 1 6.957497 0 0 10361 +telecommunicationsm 1 6.957497 0 0 10362 +projectorigin 1 6.957497 0 0 10363 +projectsinc 1 6.957497 0 0 10364 +unviers 1 6.957497 0 0 10365 +linksjava 1 6.957497 0 0 10366 +tkfavorit 1 6.957497 0 0 10367 +sitestimecnnlondon 1 6.957497 0 0 10368 +timeswashington 1 6.957497 0 0 10369 +postchines 1 6.957497 0 0 10370 +digestchina 1 6.957497 0 0 10371 +digestfeng 1 6.957497 0 0 10372 +yuanxin 1 6.957497 0 0 10373 +siart 1 6.957497 0 0 10374 +chinaloc 1 6.957497 0 0 10375 +connectionsctc 1 6.957497 0 0 10376 +sunlabweathermovi 1 6.957497 0 0 10377 +professordph 1 6.957497 0 0 10378 +eigenspac 1 6.957497 0 0 10379 +digipap 1 6.957497 0 0 10380 +viewabl 1 6.957497 0 0 10381 +parc 1 6.957497 0 0 10382 +attitud 1 6.957497 0 0 10383 +dsouza 1 6.957497 0 0 10384 +ashvin 1 6.957497 0 0 10385 +bard 1 6.957497 0 0 10386 +oftool 1 6.957497 0 0 10387 +andverif 1 6.957497 0 0 10388 +withrespect 1 6.957497 0 0 10389 +immediatelyavail 1 6.957497 0 0 10390 +duplic 1 6.957497 0 0 10391 +gso 1 6.957497 0 0 10392 +loto 1 6.957497 0 0 10393 +exploringappl 1 6.957497 0 0 10394 +bdd 1 6.957497 0 0 10395 +algebraterm 1 6.957497 0 0 10396 +postscipt 1 6.957497 0 0 10397 +lite 1 6.957497 0 0 10398 +presentedth 1 6.957497 0 0 10399 +compass 1 6.957497 0 0 10400 +pagefunda 1 6.957497 0 0 10401 +ergn 1 6.957497 0 0 10402 +eduhi 1 6.957497 0 0 10403 +studentin 1 6.957497 0 0 10404 +programcheck 1 6.957497 0 0 10405 +researchpag 1 6.957497 0 0 10406 +izmir 1 6.957497 0 0 10407 +bilkentunivers 1 6.957497 0 0 10408 +ankara 1 6.957497 0 0 10409 +encounterpag 1 6.957497 0 0 10410 +turkish 1 6.957497 0 0 10411 +ori 1 6.957497 0 0 10412 +shmoi 1 6.957497 0 0 10413 +plotkin 1 6.957497 0 0 10414 +approximationalgorithm 1 6.957497 0 0 10415 +klein 1 6.957497 0 0 10416 +grotschel 1 6.957497 0 0 10417 +tardosassoci 1 6.957497 0 0 10418 +engineeringphon 1 6.957497 0 0 10419 +researchrec 1 6.957497 0 0 10420 +mostlywork 1 6.957497 0 0 10421 +networkproblem 1 6.957497 0 0 10422 +paperssurvei 1 6.957497 0 0 10423 +cutratio 1 6.957497 0 0 10424 +fasterapproxim 1 6.957497 0 0 10425 +problemwith 1 6.957497 0 0 10426 +makedon 1 6.957497 0 0 10427 +tragouda 1 6.957497 0 0 10428 +flowproblem 1 6.957497 0 0 10429 +annualacm 1 6.957497 0 0 10430 +thefound 1 6.957497 0 0 10431 +designproblem 1 6.957497 0 0 10432 +discretealgorithm 1 6.957497 0 0 10433 +someevacu 1 6.957497 0 0 10434 +ondiscret 1 6.957497 0 0 10435 +quickest 1 6.957497 0 0 10436 +transship 1 6.957497 0 0 10437 +theproceed 1 6.957497 0 0 10438 +steiner 1 6.957497 0 0 10439 +multicut 1 6.957497 0 0 10440 +pathsproblem 1 6.957497 0 0 10441 +annualiee 1 6.957497 0 0 10442 +rabani 1 6.957497 0 0 10443 +fleischer 1 6.957497 0 0 10444 +comb 1 6.957497 0 0 10445 +ipco 1 6.957497 0 0 10446 +kort 1 6.957497 0 0 10447 +lovaszand 1 6.957497 0 0 10448 +schrijver 1 6.957497 0 0 10449 +inoptim 1 6.957497 0 0 10450 +ofmathematician 1 6.957497 0 0 10451 +kyoto 1 6.957497 0 0 10452 +inproc 1 6.957497 0 0 10453 +maastricht 1 6.957497 0 0 10454 +networkoptim 1 6.957497 0 0 10455 +netflow 1 6.957497 0 0 10456 +miniato 1 6.957497 0 0 10457 +berkeleymathemat 1 6.957497 0 0 10458 +departmentcomput 1 6.957497 0 0 10459 +departmenthumorfcc 1 6.957497 0 0 10460 +frederick 1 6.957497 0 0 10461 +kleen 1 6.957497 0 0 10462 +homepagefrederick 1 6.957497 0 0 10463 +zine 1 6.957497 0 0 10464 +cartalk 1 6.957497 0 0 10465 +clack 1 6.957497 0 0 10466 +adelstein 1 6.957497 0 0 10467 +checkout 1 6.957497 0 0 10468 +apollo 1 6.957497 0 0 10469 +scramo 1 6.957497 0 0 10470 +midi 1 6.957497 0 0 10471 +choreograph 1 6.957497 0 0 10472 +vpla 1 6.957497 0 0 10473 +animationlink 1 6.957497 0 0 10474 +packardlink 1 6.957497 0 0 10475 +laboratoryinterest 1 6.957497 0 0 10476 +cello 1 6.957497 0 0 10477 +aquarium 1 6.957497 0 0 10478 +burl 1 6.957497 0 0 10479 +fredhsu 1 6.957497 0 0 10480 +deepak 1 6.957497 0 0 10481 +balakrishna 1 6.957497 0 0 10482 +balakrishnamast 1 6.957497 0 0 10483 +resumeeducationcoursesperson 1 6.957497 0 0 10484 +surathk 1 6.957497 0 0 10485 +specialis 1 6.957497 0 0 10486 +godfrei 1 6.957497 0 0 10487 +chubbi 1 6.957497 0 0 10488 +weigh 1 6.957497 0 0 10489 +pound 1 6.957497 0 0 10490 +divin 1 6.957497 0 0 10491 +aishwarya 1 6.957497 0 0 10492 +miniscul 1 6.957497 0 0 10493 +krec 1 6.957497 0 0 10494 +here 1 6.957497 0 0 10495 +uddin 1 6.957497 0 0 10496 +ghiasasif 1 6.957497 0 0 10497 +constructioni 1 6.957497 0 0 10498 +karachi 1 6.957497 0 0 10499 +pakistan 1 6.957497 0 0 10500 +installationso 1 6.957497 0 0 10501 +astronomyasif 1 6.957497 0 0 10502 +dglaser 1 6.957497 0 0 10503 +htmlpleas 1 6.957497 0 0 10504 +grieswilliam 1 6.957497 0 0 10505 +engineeringdr 1 6.957497 0 0 10506 +formaldevelop 1 6.957497 0 0 10507 +asinterest 1 6.957497 0 0 10508 +researchin 1 6.957497 0 0 10509 +taughta 1 6.957497 0 0 10510 +anoverrid 1 6.957497 0 0 10511 +edushort 1 6.957497 0 0 10512 +griesi 1 6.957497 0 0 10513 +flush 1 6.957497 0 0 10514 +iescap 1 6.957497 0 0 10515 +workfor 1 6.957497 0 0 10516 +civilian 1 6.957497 0 0 10517 +amathematician 1 6.957497 0 0 10518 +fewmonth 1 6.957497 0 0 10519 +twogerman 1 6.957497 0 0 10520 +ruedig 1 6.957497 0 0 10521 +wiehl 1 6.957497 0 0 10522 +algol 1 6.957497 0 0 10523 +compilerfor 1 6.957497 0 0 10524 +implementrecurs 1 6.957497 0 0 10525 +stoer 1 6.957497 0 0 10526 +wasin 1 6.957497 0 0 10527 +notyet 1 6.957497 0 0 10528 +kosher 1 6.957497 0 0 10529 +thebirthdai 1 6.957497 0 0 10530 +intown 1 6.957497 0 0 10531 +whichha 1 6.957497 0 0 10532 +wasdepart 1 6.957497 0 0 10533 +lewisprofessor 1 6.957497 0 0 10534 +contentsi 1 6.957497 0 0 10535 +mytext 1 6.957497 0 0 10536 +writingand 1 6.957497 0 0 10537 +thewond 1 6.957497 0 0 10538 +wherey 1 6.957497 0 0 10539 +contributionsto 1 6.957497 0 0 10540 +sigcseaward 1 6.957497 0 0 10541 +clarkaward 1 6.957497 0 0 10542 +advise 1 6.957497 0 0 10543 +susanowicki 1 6.957497 0 0 10544 +laid 1 6.957497 0 0 10545 +freeness 1 6.957497 0 0 10546 +bestpap 1 6.957497 0 0 10547 +sthesi 1 6.957497 0 0 10548 +designedand 1 6.957497 0 0 10549 +printedor 1 6.957497 0 0 10550 +speakmathemat 1 6.957497 0 0 10551 +audiocassett 1 6.957497 0 0 10552 +officein 1 6.957497 0 0 10553 +taulbe 1 6.957497 0 0 10554 +responsesfrom 1 6.957497 0 0 10555 +noother 1 6.957497 0 0 10556 +itrequir 1 6.957497 0 0 10557 +sendin 1 6.957497 0 0 10558 +questionnair 1 6.957497 0 0 10559 +forchair 1 6.957497 0 0 10560 +andrespons 1 6.957497 0 0 10561 +takean 1 6.957497 0 0 10562 +willsuggest 1 6.957497 0 0 10563 +servewher 1 6.957497 0 0 10564 +fredb 1 6.957497 0 0 10565 +andmonograph 1 6.957497 0 0 10566 +isplit 1 6.957497 0 0 10567 +pant 1 6.957497 0 0 10568 +alectur 1 6.957497 0 0 10569 +turnedaround 1 6.957497 0 0 10570 +spoke 1 6.957497 0 0 10571 +everyonelaugh 1 6.957497 0 0 10572 +justsaid 1 6.957497 0 0 10573 +barbershop 1 6.957497 0 0 10574 +andgilbert 1 6.957497 0 0 10575 +sullivan 1 6.957497 0 0 10576 +carpentri 1 6.957497 0 0 10577 +remodel 1 6.957497 0 0 10578 +considerablesatisfact 1 6.957497 0 0 10579 +grinzayd 1 6.957497 0 0 10580 +homepagealex 1 6.957497 0 0 10581 +grinzaydm 1 6.957497 0 0 10582 +universitytel 1 6.957497 0 0 10583 +necx 1 6.957497 0 0 10584 +directinternet 1 6.957497 0 0 10585 +networkcomput 1 6.957497 0 0 10586 +damarkwarn 1 6.957497 0 0 10587 +grze 1 6.957497 0 0 10588 +czajkowskidepart 1 6.957497 0 0 10589 +krakow 1 6.957497 0 0 10590 +administ 1 6.957497 0 0 10591 +halpern 1 6.957497 0 0 10592 +pagejoseph 1 6.957497 0 0 10593 +professorcornel 1 6.957497 0 0 10594 +economist 1 6.957497 0 0 10595 +abouta 1 6.957497 0 0 10596 +sequel 1 6.957497 0 0 10597 +tsuneshi 1 6.957497 0 0 10598 +hashimoto 1 6.957497 0 0 10599 +hashimototsuneshi 1 6.957497 0 0 10600 +hashimotothi 1 6.957497 0 0 10601 +cstsuneshi 1 6.957497 0 0 10602 +hashi 1 6.957497 0 0 10603 +heji 1 6.957497 0 0 10604 +evangel 1 6.957497 0 0 10605 +deyu 1 6.957497 0 0 10606 +icchen 1 6.957497 0 0 10607 +nctu 1 6.957497 0 0 10608 +malik 1 6.957497 0 0 10609 +imalik 1 6.957497 0 0 10610 +tap 1 6.957497 0 0 10611 +vidyaprakash 1 6.957497 0 0 10612 +vidyaprakashmast 1 6.957497 0 0 10613 +universitywelcom 1 6.957497 0 0 10614 +financesumm 1 6.957497 0 0 10615 +tracingin 1 6.957497 0 0 10616 +perspectivetransform 1 6.957497 0 0 10617 +myresumeclick 1 6.957497 0 0 10618 +transformssom 1 6.957497 0 0 10619 +sgamelan 1 6.957497 0 0 10620 +calvinand 1 6.957497 0 0 10621 +gif 1 6.957497 0 0 10622 +chicker 1 6.957497 0 0 10623 +pageioi 1 6.957497 0 0 10624 +homeless 1 6.957497 0 0 10625 +lamioi 1 6.957497 0 0 10626 +multim 1 6.957497 0 0 10627 +htmladdress 1 6.957497 0 0 10628 +intereststheorem 1 6.957497 0 0 10629 +andhardwar 1 6.957497 0 0 10630 +informationmi 1 6.957497 0 0 10631 +developmentsystem 1 6.957497 0 0 10632 +bundi 1 6.957497 0 0 10633 +automateddeduct 1 6.957497 0 0 10634 +artif 1 6.957497 0 0 10635 +stavrid 1 6.957497 0 0 10636 +melham 1 6.957497 0 0 10637 +transactionsa 1 6.957497 0 0 10638 +theadvanc 1 6.957497 0 0 10639 +nuprlth 1 6.957497 0 0 10640 +getround 1 6.957497 0 0 10641 +thetheori 1 6.957497 0 0 10642 +foreach 1 6.957497 0 0 10643 +andtheorem 1 6.957497 0 0 10644 +thepolynomi 1 6.957497 0 0 10645 +tibor 1 6.957497 0 0 10646 +jnositibor 1 6.957497 0 0 10647 +jnosiwelcom 1 6.957497 0 0 10648 +constructionoffic 1 6.957497 0 0 10649 +zenotibor 1 6.957497 0 0 10650 +jnosi 1 6.957497 0 0 10651 +janwun 1 6.957497 0 0 10652 +tarditi 1 6.957497 0 0 10653 +tolmach 1 6.957497 0 0 10654 +papersrel 1 6.957497 0 0 10655 +ofadvanc 1 6.957497 0 0 10656 +forbuild 1 6.957497 0 0 10657 +safelanguag 1 6.957497 0 0 10658 +toolsfrom 1 6.957497 0 0 10659 +systemssoftwar 1 6.957497 0 0 10660 +paperssemant 1 6.957497 0 0 10661 +safetythrough 1 6.957497 0 0 10662 +yasuhiko 1 6.957497 0 0 10663 +minamid 1 6.957497 0 0 10664 +matthia 1 6.957497 0 0 10665 +felleisen 1 6.957497 0 0 10666 +reportcmu 1 6.957497 0 0 10667 +notecmu 1 6.957497 0 0 10668 +intensionaltyp 1 6.957497 0 0 10669 +parallelizationgreg 1 6.957497 0 0 10670 +mauric 1 6.957497 0 0 10671 +scienceperson 1 6.957497 0 0 10672 +informationhom 1 6.957497 0 0 10673 +jiun 1 6.957497 0 0 10674 +jhlin 1 6.957497 0 0 10675 +edujerri 1 6.957497 0 0 10676 +wangphd 1 6.957497 0 0 10677 +jiawang 1 6.957497 0 0 10678 +goldwat 1 6.957497 0 0 10679 +cbnet 1 6.957497 0 0 10680 +chinanet 1 6.957497 0 0 10681 +chinesecalendar 1 6.957497 0 0 10682 +mediainform 1 6.957497 0 0 10683 +hongkonglaserdisccent 1 6.957497 0 0 10684 +internetdistribut 1 6.957497 0 0 10685 +multilingu 1 6.957497 0 0 10686 +smovieplex 1 6.957497 0 0 10687 +diwww 1 6.957497 0 0 10688 +thesenior 1 6.957497 0 0 10689 +worldmap 1 6.957497 0 0 10690 +mandarin 1 6.957497 0 0 10691 +cssa 1 6.957497 0 0 10692 +ofengin 1 6.957497 0 0 10693 +assistantwork 1 6.957497 0 0 10694 +ismachin 1 6.957497 0 0 10695 +informationsom 1 6.957497 0 0 10696 +rant 1 6.957497 0 0 10697 +hillsboro 1 6.957497 0 0 10698 +moorejeff 1 6.957497 0 0 10699 +moorewel 1 6.957497 0 0 10700 +mastersof 1 6.957497 0 0 10701 +lafayett 1 6.957497 0 0 10702 +suburb 1 6.957497 0 0 10703 +employmentmi 1 6.957497 0 0 10704 +classesnba 1 6.957497 0 0 10705 +sectorc 1 6.957497 0 0 10706 +researchfal 1 6.957497 0 0 10707 +paperc 1 6.957497 0 0 10708 +opendoc 1 6.957497 0 0 10709 +mfcoptim 1 6.957497 0 0 10710 +researchsoftwar 1 6.957497 0 0 10711 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 1 6.957497 0 0 10712 +companiesintelsilicon 1 6.957497 0 0 10713 +graphicsibmsunapplemagazinespc 1 6.957497 0 0 10714 +weekpc 1 6.957497 0 0 10715 +computingcomput 1 6.957497 0 0 10716 +shopperwindow 1 6.957497 0 0 10717 +sourcescomput 1 6.957497 0 0 10718 +lifemacusermacweekinteract 1 6.957497 0 0 10719 +weekfamili 1 6.957497 0 0 10720 +pccomput 1 6.957497 0 0 10721 +worldelectron 1 6.957497 0 0 10722 +newspapersusa 1 6.957497 0 0 10723 +todaywal 1 6.957497 0 0 10724 +journalnew 1 6.957497 0 0 10725 +timesphiladelphia 1 6.957497 0 0 10726 +onlineth 1 6.957497 0 0 10727 +worldwideth 1 6.957497 0 0 10728 +opinionsth 1 6.957497 0 0 10729 +gopherth 1 6.957497 0 0 10730 +knoxvil 1 6.957497 0 0 10731 +sentinelth 1 6.957497 0 0 10732 +onlinelat 1 6.957497 0 0 10733 +serviceth 1 6.957497 0 0 10734 +nugget 1 6.957497 0 0 10735 +oregonrworld 1 6.957497 0 0 10736 +registerth 1 6.957497 0 0 10737 +examinersan 1 6.957497 0 0 10738 +mercuryth 1 6.957497 0 0 10739 +timesnando 1 6.957497 0 0 10740 +netusa 1 6.957497 0 0 10741 +todayboston 1 6.957497 0 0 10742 +globeportland 1 6.957497 0 0 10743 +telegramvisitor 1 6.957497 0 0 10744 +fdithaca 1 6.957497 0 0 10745 +pagejos 1 6.957497 0 0 10746 +fernandezjos 1 6.957497 0 0 10747 +fernandezmast 1 6.957497 0 0 10748 +ebithaca 1 6.957497 0 0 10749 +scroll 1 6.957497 0 0 10750 +presentationc 1 6.957497 0 0 10751 +spaceship 1 6.957497 0 0 10752 +battl 1 6.957497 0 0 10753 +picturesmusiccomputerswrit 1 6.957497 0 0 10754 +giel 1 6.957497 0 0 10755 +hurtado 1 6.957497 0 0 10756 +julin 1 6.957497 0 0 10757 +pagejulin 1 6.957497 0 0 10758 +universitymast 1 6.957497 0 0 10759 +managementmast 1 6.957497 0 0 10760 +science 1 6.957497 0 0 10761 +colombia 1 6.957497 0 0 10762 +er 1 6.957497 0 0 10763 +janeen 1 6.957497 0 0 10764 +homepagejaneen 1 6.957497 0 0 10765 +reich 1 6.957497 0 0 10766 +jreich 1 6.957497 0 0 10767 +camaro 1 6.957497 0 0 10768 +chevi 1 6.957497 0 0 10769 +jodi 1 6.957497 0 0 10770 +shapirojodi 1 6.957497 0 0 10771 +shapiroeduc 1 6.957497 0 0 10772 +engineeringe 1 6.957497 0 0 10773 +telecommunicationc 1 6.957497 0 0 10774 +researchspr 1 6.957497 0 0 10775 +systemse 1 6.957497 0 0 10776 +networksnba 1 6.957497 0 0 10777 +revolutionc 1 6.957497 0 0 10778 +researchma 1 6.957497 0 0 10779 +automot 1 6.957497 0 0 10780 +engineeringinterest 1 6.957497 0 0 10781 +animationlow 1 6.957497 0 0 10782 +videoconferenc 1 6.957497 0 0 10783 +recognitioninterest 1 6.957497 0 0 10784 +firebird 1 6.957497 0 0 10785 +yourselfelectron 1 6.957497 0 0 10786 +fuel 1 6.957497 0 0 10787 +inject 1 6.957497 0 0 10788 +alwayshav 1 6.957497 0 0 10789 +designingan 1 6.957497 0 0 10790 +pageefi 1 6.957497 0 0 10791 +pagethes 1 6.957497 0 0 10792 +gearsmodif 1 6.957497 0 0 10793 +hypertech 1 6.957497 0 0 10794 +flowmast 1 6.957497 0 0 10795 +hurst 1 6.957497 0 0 10796 +shifter 1 6.957497 0 0 10797 +wheel 1 6.957497 0 0 10798 +mustang 1 6.957497 0 0 10799 +speedmodif 1 6.957497 0 0 10800 +motorsport 1 6.957497 0 0 10801 +wiresbest 1 6.957497 0 0 10802 +mphbest 1 6.957497 0 0 10803 +pagenumb 1 6.957497 0 0 10804 +pagejulia 1 6.957497 0 0 10805 +komissarchik 1 6.957497 0 0 10806 +juliak 1 6.957497 0 0 10807 +julian 1 6.957497 0 0 10808 +pelenur 1 6.957497 0 0 10809 +centerithaca 1 6.957497 0 0 10810 +wfinger 1 6.957497 0 0 10811 +cyberserv 1 6.957497 0 0 10812 +httpserver 1 6.957497 0 0 10813 +prvf 1 6.957497 0 0 10814 +poss 1 6.957497 0 0 10815 +screenmot 1 6.957497 0 0 10816 +showthat 1 6.957497 0 0 10817 +snarf 1 6.957497 0 0 10818 +transferwith 1 6.957497 0 0 10819 +robertconst 1 6.957497 0 0 10820 +thefox 1 6.957497 0 0 10821 +markleon 1 6.957497 0 0 10822 +formalsystem 1 6.957497 0 0 10823 +levelmodul 1 6.957497 0 0 10824 +cornella 1 6.957497 0 0 10825 +publishedat 1 6.957497 0 0 10826 +havegiven 1 6.957497 0 0 10827 +theygiv 1 6.957497 0 0 10828 +galleryof 1 6.957497 0 0 10829 +tryth 1 6.957497 0 0 10830 +fineart 1 6.957497 0 0 10831 +cucshockei 1 6.957497 0 0 10832 +thebackcountri 1 6.957497 0 0 10833 +maintainedsoftwar 1 6.957497 0 0 10834 +hockeyfor 1 6.957497 0 0 10835 +kamijo 1 6.957497 0 0 10836 +koichi 1 6.957497 0 0 10837 +kamijokoichi 1 6.957497 0 0 10838 +papershometownseduc 1 6.957497 0 0 10839 +experienceskoichi 1 6.957497 0 0 10840 +muriel 1 6.957497 0 0 10841 +kkamijoh 1 6.957497 0 0 10842 +vnet 1 6.957497 0 0 10843 +andassembl 1 6.957497 0 0 10844 +dipl 1 6.957497 0 0 10845 +implementmicro 1 6.957497 0 0 10846 +withprogramm 1 6.957497 0 0 10847 +professorbruc 1 6.957497 0 0 10848 +founder 1 6.957497 0 0 10849 +macdonaldand 1 6.957497 0 0 10850 +hisresearch 1 6.957497 0 0 10851 +invis 1 6.957497 0 0 10852 +cantilev 1 6.957497 0 0 10853 +fallingwat 1 6.957497 0 0 10854 +outin 1 6.957497 0 0 10855 +lindseth 1 6.957497 0 0 10856 +karr 1 6.957497 0 0 10857 +karrdavid 1 6.957497 0 0 10858 +karrphd 1 6.957497 0 0 10859 +birmananddr 1 6.957497 0 0 10860 +protocolsmi 1 6.957497 0 0 10861 +formalspecif 1 6.957497 0 0 10862 +variousinterest 1 6.957497 0 0 10863 +usedin 1 6.957497 0 0 10864 +stylefor 1 6.957497 0 0 10865 +itsinterfac 1 6.957497 0 0 10866 +andbelow 1 6.957497 0 0 10867 +agiven 1 6.957497 0 0 10868 +unusualcombin 1 6.957497 0 0 10869 +systemsshould 1 6.957497 0 0 10870 +constructcustom 1 6.957497 0 0 10871 +theirassoci 1 6.957497 0 0 10872 +thesecur 1 6.957497 0 0 10873 +harden 1 6.957497 0 0 10874 +ofverifi 1 6.957497 0 0 10875 +stem 1 6.957497 0 0 10876 +thepromis 1 6.957497 0 0 10877 +variousguarante 1 6.957497 0 0 10878 +passingenviron 1 6.957497 0 0 10879 +delayedor 1 6.957497 0 0 10880 +componentswer 1 6.957497 0 0 10881 +considerablepromis 1 6.957497 0 0 10882 +consistencywhil 1 6.957497 0 0 10883 +filesin 1 6.957497 0 0 10884 +partitionedinto 1 6.957497 0 0 10885 +wouldallow 1 6.957497 0 0 10886 +performancemi 1 6.957497 0 0 10887 +andeffici 1 6.957497 0 0 10888 +ofsystem 1 6.957497 0 0 10889 +suscept 1 6.957497 0 0 10890 +javath 1 6.957497 0 0 10891 +applicationwith 1 6.957497 0 0 10892 +tonavig 1 6.957497 0 0 10893 +myweb 1 6.957497 0 0 10894 +anetscap 1 6.957497 0 0 10895 +abirthdai 1 6.957497 0 0 10896 +forverifi 1 6.957497 0 0 10897 +affiliationsi 1 6.957497 0 0 10898 +andmaa 1 6.957497 0 0 10899 +informationseemi 1 6.957497 0 0 10900 +hogman 1 6.957497 0 0 10901 +qualcomm 1 6.957497 0 0 10902 +pagekartik 1 6.957497 0 0 10903 +kapadiamast 1 6.957497 0 0 10904 +dabnei 1 6.957497 0 0 10905 +kkapadia 1 6.957497 0 0 10906 +comcurr 1 6.957497 0 0 10907 +incorporatedmi 1 6.957497 0 0 10908 +projectshoca 1 6.957497 0 0 10909 +chiphoca 1 6.957497 0 0 10910 +gameboard 1 6.957497 0 0 10911 +rivlrivl 1 6.957497 0 0 10912 +writingsi 1 6.957497 0 0 10913 +proudof 1 6.957497 0 0 10914 +morethought 1 6.957497 0 0 10915 +conscienti 1 6.957497 0 0 10916 +objector 1 6.957497 0 0 10917 +myfirst 1 6.957497 0 0 10918 +fewyear 1 6.957497 0 0 10919 +gulf 1 6.957497 0 0 10920 +vestart 1 6.957497 0 0 10921 +lest 1 6.957497 0 0 10922 +dprobabl 1 6.957497 0 0 10923 +essayist 1 6.957497 0 0 10924 +byron 1 6.957497 0 0 10925 +asuburb 1 6.957497 0 0 10926 +unabomb 1 6.957497 0 0 10927 +mathematiciansar 1 6.957497 0 0 10928 +infodistribut 1 6.957497 0 0 10929 +infocompani 1 6.957497 0 0 10930 +lisboa 1 6.957497 0 0 10931 +httpd 1 6.957497 0 0 10932 +xmosaic 1 6.957497 0 0 10933 +guokguo 1 6.957497 0 0 10934 +multicastprotocol 1 6.957497 0 0 10935 +publicationskatherin 1 6.957497 0 0 10936 +connemara 1 6.957497 0 0 10937 +rodrigu 1 6.957497 0 0 10938 +sargento 1 6.957497 0 0 10939 +paulo 1 6.957497 0 0 10940 +verisimo 1 6.957497 0 0 10941 +niagara 1 6.957497 0 0 10942 +networkscool 1 6.957497 0 0 10943 +toolsbibliographyconferencesjournalsacademia 1 6.957497 0 0 10944 +infoschool 1 6.957497 0 0 10945 +infojob 1 6.957497 0 0 10946 +searchinterest 1 6.957497 0 0 10947 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 1 6.957497 0 0 10948 +inforesearch 1 6.957497 0 0 10949 +systempointershoru 1 6.957497 0 0 10950 +productspringtotemtransisx 1 6.957497 0 0 10951 +microsystemslab 1 6.957497 0 0 10952 +networksmulticast 1 6.957497 0 0 10953 +protocolsn 1 6.957497 0 0 10954 +fromlblgun 1 6.957497 0 0 10955 +sguid 1 6.957497 0 0 10956 +quickrefer 1 6.957497 0 0 10957 +htmldocument 1 6.957497 0 0 10958 +finder 1 6.957497 0 0 10959 +bibliographybibliographi 1 6.957497 0 0 10960 +oldindex 1 6.957497 0 0 10961 +hpdc 1 6.957497 0 0 10962 +srd 1 6.957497 0 0 10963 +jsac 1 6.957497 0 0 10964 +scienceacademia 1 6.957497 0 0 10965 +openingsibmdelltandemtiapplebel 1 6.957497 0 0 10966 +gradschool 1 6.957497 0 0 10967 +gradjob 1 6.957497 0 0 10968 +ukinterest 1 6.957497 0 0 10969 +moviesbailei 1 6.957497 0 0 10970 +concertslibrari 1 6.957497 0 0 10971 +hightechin 1 6.957497 0 0 10972 +inesc 1 6.957497 0 0 10973 +resort 1 6.957497 0 0 10974 +coloradooth 1 6.957497 0 0 10975 +infoart 1 6.957497 0 0 10976 +weblouvreth 1 6.957497 0 0 10977 +linebook 1 6.957497 0 0 10978 +calvinhobb 1 6.957497 0 0 10979 +archivecardsmagicchinaart 1 6.957497 0 0 10980 +gourmetl 1 6.957497 0 0 10981 +cordonbleu 1 6.957497 0 0 10982 +fashional 1 6.957497 0 0 10983 +linksa 1 6.957497 0 0 10984 +cjlutz 1 6.957497 0 0 10985 +wwweb 1 6.957497 0 0 10986 +pagewith 1 6.957497 0 0 10987 +tmexpressfirst 1 6.957497 0 0 10988 +wireirc 1 6.957497 0 0 10989 +faqfashion 1 6.957497 0 0 10990 +nethair 1 6.957497 0 0 10991 +diesel 1 6.957497 0 0 10992 +guessfriend 1 6.957497 0 0 10993 +deng 1 6.957497 0 0 10994 +shiji 1 6.957497 0 0 10995 +edulibrari 1 6.957497 0 0 10996 +congressmagazin 1 6.957497 0 0 10997 +timegeorg 1 6.957497 0 0 10998 +gilder 1 6.957497 0 0 10999 +archivesinanet 1 6.957497 0 0 11000 +newsworld 1 6.957497 0 0 11001 +olymp 1 6.957497 0 0 11002 +streetheadlin 1 6.957497 0 0 11003 +weatherhunt 1 6.957497 0 0 11004 +informationglob 1 6.957497 0 0 11005 +navigatorhom 1 6.957497 0 0 11006 +wanderersand 1 6.957497 0 0 11007 +kguo 1 6.957497 0 0 11008 +wirefram 1 6.957497 0 0 11009 +desgin 1 6.957497 0 0 11010 +threader 1 6.957497 0 0 11011 +speific 1 6.957497 0 0 11012 +react 1 6.957497 0 0 11013 +cone 1 6.957497 0 0 11014 +cylind 1 6.957497 0 0 11015 +toru 1 6.957497 0 0 11016 +kleinber 1 6.957497 0 0 11017 +disjointpath 1 6.957497 0 0 11018 +anddisjoint 1 6.957497 0 0 11019 +stabilityof 1 6.957497 0 0 11020 +particularlyth 1 6.957497 0 0 11021 +seeselect 1 6.957497 0 0 11022 +publicationsmiscellan 1 6.957497 0 0 11023 +linkspapersapproxim 1 6.957497 0 0 11024 +unsplitt 1 6.957497 0 0 11025 +aggarw 1 6.957497 0 0 11026 +improvedapproxim 1 6.957497 0 0 11027 +thetafunct 1 6.957497 0 0 11028 +vertex 1 6.957497 0 0 11029 +simplepolygon 1 6.957497 0 0 11030 +serverbalanc 1 6.957497 0 0 11031 +yaniv 1 6.957497 0 0 11032 +serveralgorithm 1 6.957497 0 0 11033 +robotnavig 1 6.957497 0 0 11034 +awerbuch 1 6.957497 0 0 11035 +borodin 1 6.957497 0 0 11036 +raghavan 1 6.957497 0 0 11037 +sudan 1 6.957497 0 0 11038 +lynch 1 6.957497 0 0 11039 +offsbetween 1 6.957497 0 0 11040 +quiesc 1 6.957497 0 0 11041 +managementprotocol 1 6.957497 0 0 11042 +mullainathan 1 6.957497 0 0 11043 +boundsand 1 6.957497 0 0 11044 +athre 1 6.957497 0 0 11045 +kedem 1 6.957497 0 0 11046 +pointset 1 6.957497 0 0 11047 +invariantsof 1 6.957497 0 0 11048 +linkssearch 1 6.957497 0 0 11049 +bibliographiesaltavista 1 6.957497 0 0 11050 +nynex 1 6.957497 0 0 11051 +sitescornel 1 6.957497 0 0 11052 +computingtc 1 6.957497 0 0 11053 +crescenzi 1 6.957497 0 0 11054 +kann 1 6.957497 0 0 11055 +compendium 1 6.957497 0 0 11056 +biologycomput 1 6.957497 0 0 11057 +carb 1 6.957497 0 0 11058 +biocomput 1 6.957497 0 0 11059 +geometrydavid 1 6.957497 0 0 11060 +eppstein 1 6.957497 0 0 11061 +junkyard 1 6.957497 0 0 11062 +erickson 1 6.957497 0 0 11063 +securitymitr 1 6.957497 0 0 11064 +miscellaneousnetscap 1 6.957497 0 0 11065 +intellicast 1 6.957497 0 0 11066 +kleinbergdepart 1 6.957497 0 0 11067 +scienceupson 1 6.957497 0 0 11068 +kazushi 1 6.957497 0 0 11069 +otakota 1 6.957497 0 0 11070 +edukazushi 1 6.957497 0 0 11071 +mitusbishi 1 6.957497 0 0 11072 +isund 1 6.957497 0 0 11073 +kozendext 1 6.957497 0 0 11074 +kozenjoseph 1 6.957497 0 0 11075 +engineeringphd 1 6.957497 0 0 11076 +especiallycomplex 1 6.957497 0 0 11077 +onlinekleen 1 6.957497 0 0 11078 +algebraautomata 1 6.957497 0 0 11079 +logicbibliographylist 1 6.957497 0 0 11080 +reportscours 1 6.957497 0 0 11081 +notesc 1 6.957497 0 0 11082 +programsc 1 6.957497 0 0 11083 +theoryfun 1 6.957497 0 0 11084 +stufffamili 1 6.957497 0 0 11085 +rugbi 1 6.957497 0 0 11086 +effectcomput 1 6.957497 0 0 11087 +departmentupson 1 6.957497 0 0 11088 +usakozen 1 6.957497 0 0 11089 +kreitz 1 6.957497 0 0 11090 +lehr 1 6.957497 0 0 11091 +lernen 1 6.957497 0 0 11092 +vorlesungsskript 1 6.957497 0 0 11093 +medienunterst 1 6.957497 0 0 11094 +uumltzt 1 6.957497 0 0 11095 +lehren 1 6.957497 0 0 11096 +kuen 1 6.957497 0 0 11097 +myproject 1 6.957497 0 0 11098 +groupin 1 6.957497 0 0 11099 +ourgroup 1 6.957497 0 0 11100 +adistribut 1 6.957497 0 0 11101 +collaborateson 1 6.957497 0 0 11102 +thedienstsoftwar 1 6.957497 0 0 11103 +providesdistribut 1 6.957497 0 0 11104 +initiativesto 1 6.957497 0 0 11105 +iso 1 6.957497 0 0 11106 +dlib 1 6.957497 0 0 11107 +dlibwork 1 6.957497 0 0 11108 +iiin 1 6.957497 0 0 11109 +warwick 1 6.957497 0 0 11110 +amveri 1 6.957497 0 0 11111 +distributedobject 1 6.957497 0 0 11112 +paperfor 1 6.957497 0 0 11113 +codeworkshop 1 6.957497 0 0 11114 +meetm 1 6.957497 0 0 11115 +moreabout 1 6.957497 0 0 11116 +outsideof 1 6.957497 0 0 11117 +toddler 1 6.957497 0 0 11118 +lucyg 1 6.957497 0 0 11119 +avid 1 6.957497 0 0 11120 +movingwat 1 6.957497 0 0 11121 +lakeand 1 6.957497 0 0 11122 +itch 1 6.957497 0 0 11123 +ridingalong 1 6.957497 0 0 11124 +backwood 1 6.957497 0 0 11125 +sparehour 1 6.957497 0 0 11126 +shoe 1 6.957497 0 0 11127 +deeplyth 1 6.957497 0 0 11128 +physicalnor 1 6.957497 0 0 11129 +itspreserv 1 6.957497 0 0 11130 +lidong 1 6.957497 0 0 11131 +auto 1 6.957497 0 0 11132 +oasi 1 6.957497 0 0 11133 +adag 1 6.957497 0 0 11134 +sirac 1 6.957497 0 0 11135 +kerbero 1 6.957497 0 0 11136 +ocaml 1 6.957497 0 0 11137 +jobtrak 1 6.957497 0 0 11138 +hunter 1 6.957497 0 0 11139 +careermosa 1 6.957497 0 0 11140 +jobweb 1 6.957497 0 0 11141 +xjob 1 6.957497 0 0 11142 +yingjun 1 6.957497 0 0 11143 +isso 1 6.957497 0 0 11144 +autosit 1 6.957497 0 0 11145 +agenc 1 6.957497 0 0 11146 +indexlast 1 6.957497 0 0 11147 +hsian 1 6.957497 0 0 11148 +orwel 1 6.957497 0 0 11149 +totoro 1 6.957497 0 0 11150 +wangthi 1 6.957497 0 0 11151 +constructionlin 1 6.957497 0 0 11152 +fangliao 1 6.957497 0 0 11153 +videoe 1 6.957497 0 0 11154 +networkse 1 6.957497 0 0 11155 +amidonc 1 6.957497 0 0 11156 +transcrib 1 6.957497 0 0 11157 +zabihspr 1 6.957497 0 0 11158 +processingc 1 6.957497 0 0 11159 +managementc 1 6.957497 0 0 11160 +colloquimc 1 6.957497 0 0 11161 +webspac 1 6.957497 0 0 11162 +stuffscornel 1 6.957497 0 0 11163 +reportiee 1 6.957497 0 0 11164 +societytaiwan 1 6.957497 0 0 11165 +comth 1 6.957497 0 0 11166 +musicmovi 1 6.957497 0 0 11167 +movieweb 1 6.957497 0 0 11168 +moviemania 1 6.957497 0 0 11169 +picturesth 1 6.957497 0 0 11170 +linkstcl 1 6.957497 0 0 11171 +hacksth 1 6.957497 0 0 11172 +pagemiscellan 1 6.957497 0 0 11173 +cja 1 6.957497 0 0 11174 +calanimag 1 6.957497 0 0 11175 +pagelaputa 1 6.957497 0 0 11176 +conan 1 6.957497 0 0 11177 +slump 1 6.957497 0 0 11178 +kiki 1 6.957497 0 0 11179 +legend 1 6.957497 0 0 11180 +pagecampu 1 6.957497 0 0 11181 +uptown 1 6.957497 0 0 11182 +eithaca 1 6.957497 0 0 11183 +linhsian 1 6.957497 0 0 11184 +libbi 1 6.957497 0 0 11185 +lista 1 6.957497 0 0 11186 +projectemail 1 6.957497 0 0 11187 +mehit 1 6.957497 0 0 11188 +trefethenprofessorlnt 1 6.957497 0 0 11189 +thecent 1 6.957497 0 0 11190 +numericalsolut 1 6.957497 0 0 11191 +notorthogon 1 6.957497 0 0 11192 +textbooksfinit 1 6.957497 0 0 11193 +papersmultimatlab 1 6.957497 0 0 11194 +processorsmatrix 1 6.957497 0 0 11195 +gap 1 6.957497 0 0 11196 +betweenpotenti 1 6.957497 0 0 11197 +convergencepseudospectra 1 6.957497 0 0 11198 +operatorssom 1 6.957497 0 0 11199 +itemsclass 1 6.957497 0 0 11200 +analysiscurriculum 1 6.957497 0 0 11201 +vitaepseudospectra 1 6.957497 0 0 11202 +alfeldcurr 1 6.957497 0 0 11203 +howlegubjrn 1 6.957497 0 0 11204 +jnsson 1 6.957497 0 0 11205 +yohan 1 6.957497 0 0 11206 +kimdivakar 1 6.957497 0 0 11207 +viswanathprevi 1 6.957497 0 0 11208 +baggetttobi 1 6.957497 0 0 11209 +driscollalan 1 6.957497 0 0 11210 +edelman 1 6.957497 0 0 11211 +howel 1 6.957497 0 0 11212 +mascarenhasnoel 1 6.957497 0 0 11213 +nachtigalsatish 1 6.957497 0 0 11214 +chuan 1 6.957497 0 0 11215 +tohsom 1 6.957497 0 0 11216 +colleaguesjim 1 6.957497 0 0 11217 +demmelann 1 6.957497 0 0 11218 +greenbaummartin 1 6.957497 0 0 11219 +gutknechtd 1 6.957497 0 0 11220 +highamann 1 6.957497 0 0 11221 +trefethenandr 1 6.957497 0 0 11222 +weideman 1 6.957497 0 0 11223 +whiz 1 6.957497 0 0 11224 +systemscontact 1 6.957497 0 0 11225 +yuwu 1 6.957497 0 0 11226 +tkcgi 1 6.957497 0 0 11227 +securitypc 1 6.957497 0 0 11228 +lube 1 6.957497 0 0 11229 +ipngip_atmcomput 1 6.957497 0 0 11230 +sapient 1 6.957497 0 0 11231 +jobtrack 1 6.957497 0 0 11232 +artvark 1 6.957497 0 0 11233 +universitylinda 1 6.957497 0 0 11234 +lxwu 1 6.957497 0 0 11235 +univsers 1 6.957497 0 0 11236 +banyan 1 6.957497 0 0 11237 +mulitimedia 1 6.957497 0 0 11238 +kramer 1 6.957497 0 0 11239 +mart 1 6.957497 0 0 11240 +photoesus 1 6.957497 0 0 11241 +mateevnikolai 1 6.957497 0 0 11242 +mateevgradu 1 6.957497 0 0 11243 +studentmateev 1 6.957497 0 0 11244 +morgenstern 1 6.957497 0 0 11245 +pagematthew 1 6.957497 0 0 11246 +morgensternresearch 1 6.957497 0 0 11247 +leaderaddress 1 6.957497 0 0 11248 +centerxerox 1 6.957497 0 0 11249 +institutecornel 1 6.957497 0 0 11250 +edustatu 1 6.957497 0 0 11251 +scienceproject 1 6.957497 0 0 11252 +fundedresearch 1 6.957497 0 0 11253 +homepagemonika 1 6.957497 0 0 11254 +henzingerassist 1 6.957497 0 0 11255 +centerhomepageresearch 1 6.957497 0 0 11256 +interestscombinatori 1 6.957497 0 0 11257 +pageprogram 1 6.957497 0 0 11258 +stanlei 1 6.957497 0 0 11259 +kentucki 1 6.957497 0 0 11260 +huangmast 1 6.957497 0 0 11261 +studentmhuang 1 6.957497 0 0 11262 +courtcornel 1 6.957497 0 0 11263 +systemsdistribut 1 6.957497 0 0 11264 +systemsdatabas 1 6.957497 0 0 11265 +retrievalgraph 1 6.957497 0 0 11266 +interfacesoth 1 6.957497 0 0 11267 +horse_back 1 6.957497 0 0 11268 +myadvisor 1 6.957497 0 0 11269 +robbertvan 1 6.957497 0 0 11270 +planplan 1 6.957497 0 0 11271 +distributionplan 1 6.957497 0 0 11272 +updateplan 1 6.957497 0 0 11273 +faqhorusc 1 6.957497 0 0 11274 +memorydistribut 1 6.957497 0 0 11275 +memorysom 1 6.957497 0 0 11276 +communicationsnapshotu 1 6.957497 0 0 11277 +architecturejobscar 1 6.957497 0 0 11278 +pathbai 1 6.957497 0 0 11279 +jobscyberezumescar 1 6.957497 0 0 11280 +opportunitiesus 1 6.957497 0 0 11281 +stufftechn 1 6.957497 0 0 11282 +searchbel 1 6.957497 0 0 11283 +labsspbsd 1 6.957497 0 0 11284 +sourcesjavarfclast 1 6.957497 0 0 11285 +mhuang 1 6.957497 0 0 11286 +millett 1 6.957497 0 0 11287 +lynett 1 6.957497 0 0 11288 +millettdepart 1 6.957497 0 0 11289 +participatoryform 1 6.957497 0 0 11290 +internetdeserv 1 6.957497 0 0 11291 +intrus 1 6.957497 0 0 11292 +skit 1 6.957497 0 0 11293 +femin 1 6.957497 0 0 11294 +feminist 1 6.957497 0 0 11295 +whenver 1 6.957497 0 0 11296 +sentiment 1 6.957497 0 0 11297 +doormat 1 6.957497 0 0 11298 +prostitut 1 6.957497 0 0 11299 +newgroupc 1 6.957497 0 0 11300 +mishaal 1 6.957497 0 0 11301 +pagemisha 1 6.957497 0 0 11302 +kuwaiti 1 6.957497 0 0 11303 +mengc 1 6.957497 0 0 11304 +worcest 1 6.957497 0 0 11305 +inworcest 1 6.957497 0 0 11306 +bearaccess 1 6.957497 0 0 11307 +newgroupnba 1 6.957497 0 0 11308 +newgroupoptim 1 6.957497 0 0 11309 +kuwait 1 6.957497 0 0 11310 +quotescool 1 6.957497 0 0 11311 +cann 1 6.957497 0 0 11312 +accus 1 6.957497 0 0 11313 +almashanmisha 1 6.957497 0 0 11314 +korbi 1 6.957497 0 0 11315 +myguestbook 1 6.957497 0 0 11316 +poll 1 6.957497 0 0 11317 +vitya 1 6.957497 0 0 11318 +corbett 1 6.957497 0 0 11319 +eryn 1 6.957497 0 0 11320 +crave 1 6.957497 0 0 11321 +guttermouth 1 6.957497 0 0 11322 +byjust 1 6.957497 0 0 11323 +peic 1 6.957497 0 0 11324 +accuar 1 6.957497 0 0 11325 +atmak 1 6.957497 0 0 11326 +edubas 1 6.957497 0 0 11327 +nobuhiko 1 6.957497 0 0 11328 +mukai 1 6.957497 0 0 11329 +mukainobuhiko 1 6.957497 0 0 11330 +compressionon 1 6.957497 0 0 11331 +magicon 1 6.957497 0 0 11332 +nihow 1 6.957497 0 0 11333 +housti 1 6.957497 0 0 11334 +ellpack 1 6.957497 0 0 11335 +sukup 1 6.957497 0 0 11336 +mississippi 1 6.957497 0 0 11337 +papachi 1 6.957497 0 0 11338 +florian 1 6.957497 0 0 11339 +reza 1 6.957497 0 0 11340 +behforooz 1 6.957497 0 0 11341 +animesh 1 6.957497 0 0 11342 +chatterje 1 6.957497 0 0 11343 +rajani 1 6.957497 0 0 11344 +vaidyanathan 1 6.957497 0 0 11345 +bowyer 1 6.957497 0 0 11346 +offifth 1 6.957497 0 0 11347 +kohl 1 6.957497 0 0 11348 +yellick 1 6.957497 0 0 11349 +unstructur 1 6.957497 0 0 11350 +collid 1 6.957497 0 0 11351 +haupt 1 6.957497 0 0 11352 +scalableparallel 1 6.957497 0 0 11353 +engineeringresearch 1 6.957497 0 0 11354 +parallelhardwar 1 6.957497 0 0 11355 +differentialequ 1 6.957497 0 0 11356 +vichnevetski 1 6.957497 0 0 11357 +decompos 1 6.957497 0 0 11358 +kortesi 1 6.957497 0 0 11359 +domaindecomposit 1 6.957497 0 0 11360 +ussr 1 6.957497 0 0 11361 +glowinski 1 6.957497 0 0 11362 +karathanas 1 6.957497 0 0 11363 +samartzi 1 6.957497 0 0 11364 +vavali 1 6.957497 0 0 11365 +weerawarana 1 6.957497 0 0 11366 +onsupercomput 1 6.957497 0 0 11367 +andproblem 1 6.957497 0 0 11368 +computingappl 1 6.957497 0 0 11369 +pdecomput 1 6.957497 0 0 11370 +pcrc 1 6.957497 0 0 11371 +naumov 1 6.957497 0 0 11372 +orplai 1 6.957497 0 0 11373 +vazirani 1 6.957497 0 0 11374 +bipartit 1 6.957497 0 0 11375 +consistingof 1 6.957497 0 0 11376 +theubiquit 1 6.957497 0 0 11377 +heed 1 6.957497 0 0 11378 +lawsof 1 6.957497 0 0 11379 +layoutand 1 6.957497 0 0 11380 +accomplishedbi 1 6.957497 0 0 11381 +ihav 1 6.957497 0 0 11382 +couldb 1 6.957497 0 0 11383 +thisarchitectur 1 6.957497 0 0 11384 +designfor 1 6.957497 0 0 11385 +proteinstructur 1 6.957497 0 0 11386 +parallelcomput 1 6.957497 0 0 11387 +commodityand 1 6.957497 0 0 11388 +architectureand 1 6.957497 0 0 11389 +hideth 1 6.957497 0 0 11390 +underlyingvon 1 6.957497 0 0 11391 +architectureha 1 6.957497 0 0 11392 +easyto 1 6.957497 0 0 11393 +dunten 1 6.957497 0 0 11394 +kiewit 1 6.957497 0 0 11395 +pillai 1 6.957497 0 0 11396 +irregularli 1 6.957497 0 0 11397 +allerton 1 6.957497 0 0 11398 +peskin 1 6.957497 0 0 11399 +acacia 1 6.957497 0 0 11400 +andyour 1 6.957497 0 0 11401 +workeda 1 6.957497 0 0 11402 +cornellundergradu 1 6.957497 0 0 11403 +theirfield 1 6.957497 0 0 11404 +isrun 1 6.957497 0 0 11405 +valentin 1 6.957497 0 0 11406 +familycurr 1 6.957497 0 0 11407 +halfwai 1 6.957497 0 0 11408 +andharrisburg 1 6.957497 0 0 11409 +younev 1 6.957497 0 0 11410 +sinceit 1 6.957497 0 0 11411 +throughpittsburgh 1 6.957497 0 0 11412 +imperfectli 1 6.957497 0 0 11413 +tothat 1 6.957497 0 0 11414 +andmultiprocessor 1 6.957497 0 0 11415 +fromscientif 1 6.957497 0 0 11416 +withibm 1 6.957497 0 0 11417 +hasinterest 1 6.957497 0 0 11418 +athp 1 6.957497 0 0 11419 +wasabout 1 6.957497 0 0 11420 +necess 1 6.957497 0 0 11421 +looptransform 1 6.957497 0 0 11422 +loopparallel 1 6.957497 0 0 11423 +regardingdata 1 6.957497 0 0 11424 +centric 1 6.957497 0 0 11425 +availableund 1 6.957497 0 0 11426 +departmentmachin 1 6.957497 0 0 11427 +andfind 1 6.957497 0 0 11428 +alsofind 1 6.957497 0 0 11429 +adt 1 6.957497 0 0 11430 +ranjani 1 6.957497 0 0 11431 +ramamurthi 1 6.957497 0 0 11432 +ralph 1 6.957497 0 0 11433 +benzingerralph 1 6.957497 0 0 11434 +benzingerw 1 6.957497 0 0 11435 +sich 1 6.957497 0 0 11436 +seinen 1 6.957497 0 0 11437 +lorbeeren 1 6.957497 0 0 11438 +ausruht 1 6.957497 0 0 11439 +trgt 1 6.957497 0 0 11440 +derfalschen 1 6.957497 0 0 11441 +stell 1 6.957497 0 0 11442 +studienstiftung 1 6.957497 0 0 11443 +deutschen 1 6.957497 0 0 11444 +volk 1 6.957497 0 0 11445 +siemen 1 6.957497 0 0 11446 +international 1 6.957497 0 0 11447 +studentenkrei 1 6.957497 0 0 11448 +alumnusat 1 6.957497 0 0 11449 +aster 1 6.957497 0 0 11450 +bruno 1 6.957497 0 0 11451 +superscript 1 6.957497 0 0 11452 +knuth 1 6.957497 0 0 11453 +unambigu 1 6.957497 0 0 11454 +inton 1 6.957497 0 0 11455 +intermix 1 6.957497 0 0 11456 +demonstrationi 1 6.957497 0 0 11457 +forrend 1 6.957497 0 0 11458 +myphd 1 6.957497 0 0 11459 +dectalk 1 6.957497 0 0 11460 +mulaw 1 6.957497 0 0 11461 +mono 1 6.957497 0 0 11462 +dvip 1 6.957497 0 0 11463 +andround 1 6.957497 0 0 11464 +faad 1 6.957497 0 0 11465 +casey 1 6.957497 0 0 11466 +examplessinc 1 6.957497 0 0 11467 +inflect 1 6.957497 0 0 11468 +toconvei 1 6.957497 0 0 11469 +renderingsub 1 6.957497 0 0 11470 +audiost 1 6.957497 0 0 11471 +dimensionus 1 6.957497 0 0 11472 +verbatim 1 6.957497 0 0 11473 +layoutoper 1 6.957497 0 0 11474 +verydiffer 1 6.957497 0 0 11475 +monotonicchang 1 6.957497 0 0 11476 +trigonometr 1 6.957497 0 0 11477 +ambigu 1 6.957497 0 0 11478 +parenthesi 1 6.957497 0 0 11479 +asexpon 1 6.957497 0 0 11480 +isfulli 1 6.957497 0 0 11481 +innocu 1 6.957497 0 0 11482 +mostdifficult 1 6.957497 0 0 11483 +theintegr 1 6.957497 0 0 11484 +ofhuman 1 6.957497 0 0 11485 +ofcross 1 6.957497 0 0 11486 +referenceableobject 1 6.957497 0 0 11487 +latercross 1 6.957497 0 0 11488 +followingdeepli 1 6.957497 0 0 11489 +fledgedsymbol 1 6.957497 0 0 11490 +thematrix 1 6.957497 0 0 11491 +commenc 1 6.957497 0 0 11492 +aseach 1 6.957497 0 0 11493 +secondsto 1 6.957497 0 0 11494 +spacenot 1 6.957497 0 0 11495 +changeth 1 6.957497 0 0 11496 +techniquefor 1 6.957497 0 0 11497 +renderingsconvei 1 6.957497 0 0 11498 +thesub 1 6.957497 0 0 11499 +denomin 1 6.957497 0 0 11500 +uumln 1 6.957497 0 0 11501 +sivakumar 1 6.957497 0 0 11502 +jeyakumar 1 6.957497 0 0 11503 +muthukumarasami 1 6.957497 0 0 11504 +umakishor 1 6.957497 0 0 11505 +gautam 1 6.957497 0 0 11506 +pageramin 1 6.957497 0 0 11507 +zabihassist 1 6.957497 0 0 11508 +professorrdz 1 6.957497 0 0 11509 +agr 1 6.957497 0 0 11510 +studentsi 1 6.957497 0 0 11511 +vera 1 6.957497 0 0 11512 +kettnak 1 6.957497 0 0 11513 +olga 1 6.957497 0 0 11514 +veksler 1 6.957497 0 0 11515 +publicationsmost 1 6.957497 0 0 11516 +sarasota 1 6.957497 0 0 11517 +woodfil 1 6.957497 0 0 11518 +teachingi 1 6.957497 0 0 11519 +activitiesi 1 6.957497 0 0 11520 +comitte 1 6.957497 0 0 11521 +acknowledgementsthi 1 6.957497 0 0 11522 +huttenlocherlast 1 6.957497 0 0 11523 +roderick 1 6.957497 0 0 11524 +moten 1 6.957497 0 0 11525 +homepageronitt 1 6.957497 0 0 11526 +rubinfeldi 1 6.957497 0 0 11527 +rubinfeldcomput 1 6.957497 0 0 11528 +edupictur 1 6.957497 0 0 11529 +eitan 1 6.957497 0 0 11530 +thetechnion 1 6.957497 0 0 11531 +friedmanroi 1 6.957497 0 0 11532 +friedmanpost 1 6.957497 0 0 11533 +universityroi 1 6.957497 0 0 11534 +rennessein 1 6.957497 0 0 11535 +washagit 1 6.957497 0 0 11536 +wasconsist 1 6.957497 0 0 11537 +themilliped 1 6.957497 0 0 11538 +withassaf 1 6.957497 0 0 11539 +schuster 1 6.957497 0 0 11540 +papersr 1 6.957497 0 0 11541 +scalabledistribut 1 6.957497 0 0 11542 +coprocessor 1 6.957497 0 0 11543 +infodesign 1 6.957497 0 0 11544 +brand 1 6.957497 0 0 11545 +renesserobbert 1 6.957497 0 0 11546 +renessesenior 1 6.957497 0 0 11547 +universityrvr 1 6.957497 0 0 11548 +universityinithaca 1 6.957497 0 0 11549 +birmanin 1 6.957497 0 0 11550 +wasandi 1 6.957497 0 0 11551 +caml 1 6.957497 0 0 11552 +nynetth 1 6.957497 0 0 11553 +ageless 1 6.957497 0 0 11554 +accordion 1 6.957497 0 0 11555 +stuffcornel 1 6.957497 0 0 11556 +ithacaithacanet 1 6.957497 0 0 11557 +spinner 1 6.957497 0 0 11558 +paperssoftwar 1 6.957497 0 0 11559 +sabel 1 6.957497 0 0 11560 +laura 1 6.957497 0 0 11561 +asynchronousdistribut 1 6.957497 0 0 11562 +jelli 1 6.957497 0 0 11563 +bingo 1 6.957497 0 0 11564 +professorkeith 1 6.957497 0 0 11565 +tushar 1 6.957497 0 0 11566 +sfailur 1 6.957497 0 0 11567 +subcut 1 6.957497 0 0 11568 +wdag 1 6.957497 0 0 11569 +cow 1 6.957497 0 0 11570 +strawberri 1 6.957497 0 0 11571 +tart 1 6.957497 0 0 11572 +torch 1 6.957497 0 0 11573 +alpacanet 1 6.957497 0 0 11574 +gourmet 1 6.957497 0 0 11575 +thebobbi 1 6.957497 0 0 11576 +belli 1 6.957497 0 0 11577 +canplai 1 6.957497 0 0 11578 +roever 1 6.957497 0 0 11579 +rozenberg 1 6.957497 0 0 11580 +amdur 1 6.957497 0 0 11581 +wortman 1 6.957497 0 0 11582 +jayanti 1 6.957497 0 0 11583 +failuredetector 1 6.957497 0 0 11584 +unreli 1 6.957497 0 0 11585 +weakest 1 6.957497 0 0 11586 +neiger 1 6.957497 0 0 11587 +professorph 1 6.957497 0 0 11588 +toleranceand 1 6.957497 0 0 11589 +andshar 1 6.957497 0 0 11590 +gapbetween 1 6.957497 0 0 11591 +practicalsolut 1 6.957497 0 0 11592 +withtushar 1 6.957497 0 0 11593 +chandraand 1 6.957497 0 0 11594 +onunreli 1 6.957497 0 0 11595 +computingst 1 6.957497 0 0 11596 +adeterminist 1 6.957497 0 0 11597 +impossibilityresult 1 6.957497 0 0 11598 +aprocess 1 6.957497 0 0 11599 +wefirst 1 6.957497 0 0 11600 +canmak 1 6.957497 0 0 11601 +solveconsensu 1 6.957497 0 0 11602 +practicalityof 1 6.957497 0 0 11603 +theircorrect 1 6.957497 0 0 11604 +sharedobject 1 6.957497 0 0 11605 +accessesthi 1 6.957497 0 0 11606 +otherprocess 1 6.957497 0 0 11607 +thatcorrespond 1 6.957497 0 0 11608 +atani 1 6.957497 0 0 11609 +whetherrobust 1 6.957497 0 0 11610 +bracha 1 6.957497 0 0 11611 +srikanth 1 6.957497 0 0 11612 +abbadi 1 6.957497 0 0 11613 +detectorfor 1 6.957497 0 0 11614 +vancouv 1 6.957497 0 0 11615 +orbix 1 6.957497 0 0 11616 +landissean 1 6.957497 0 0 11617 +sciencewelcom 1 6.957497 0 0 11618 +weanalyz 1 6.957497 0 0 11619 +patternsprofession 1 6.957497 0 0 11620 +acorba 1 6.957497 0 0 11621 +iona 1 6.957497 0 0 11622 +alpin 1 6.957497 0 0 11623 +collectingi 1 6.957497 0 0 11624 +comeduc 1 6.957497 0 0 11625 +seena 1 6.957497 0 0 11626 +cherangara 1 6.957497 0 0 11627 +cherangaramast 1 6.957497 0 0 11628 +homepagecurr 1 6.957497 0 0 11629 +trivandrum 1 6.957497 0 0 11630 +processingalgorithm 1 6.957497 0 0 11631 +rosen 1 6.957497 0 0 11632 +sharmila 1 6.957497 0 0 11633 +vxtreme 1 6.957497 0 0 11634 +imagefram 1 6.957497 0 0 11635 +modifiedigmp 1 6.957497 0 0 11636 +unicast 1 6.957497 0 0 11637 +sitn 1 6.957497 0 0 11638 +microwav 1 6.957497 0 0 11639 +chaddha 1 6.957497 0 0 11640 +avneesh 1 6.957497 0 0 11641 +asilomar 1 6.957497 0 0 11642 +igmp 1 6.957497 0 0 11643 +internetdraft 1 6.957497 0 0 11644 +fenner 1 6.957497 0 0 11645 +niten 1 6.957497 0 0 11646 +malhan 1 6.957497 0 0 11647 +delhiunpublish 1 6.957497 0 0 11648 +preform 1 6.957497 0 0 11649 +blur 1 6.957497 0 0 11650 +speckl 1 6.957497 0 0 11651 +subband 1 6.957497 0 0 11652 +estmat 1 6.957497 0 0 11653 +writen 1 6.957497 0 0 11654 +flavour 1 6.957497 0 0 11655 +ifram 1 6.957497 0 0 11656 +nodisplai 1 6.957497 0 0 11657 +filenam 1 6.957497 0 0 11658 +putimageincanva 1 6.957497 0 0 11659 +dummi 1 6.957497 0 0 11660 +snooper 1 6.957497 0 0 11661 +doesnt 1 6.957497 0 0 11662 +replai 1 6.957497 0 0 11663 +kludg 1 6.957497 0 0 11664 +dissalow 1 6.957497 0 0 11665 +gaveth 1 6.957497 0 0 11666 +tongu 1 6.957497 0 0 11667 +sharm 1 6.957497 0 0 11668 +shyness 1 6.957497 0 0 11669 +actress 1 6.957497 0 0 11670 +tagor 1 6.957497 0 0 11671 +ealri 1 6.957497 0 0 11672 +jewish 1 6.957497 0 0 11673 +shim 1 6.957497 0 0 11674 +shimmast 1 6.957497 0 0 11675 +dryden 1 6.957497 0 0 11676 +irvinestudi 1 6.957497 0 0 11677 +classi 1 6.957497 0 0 11678 +stan 1 6.957497 0 0 11679 +getz 1 6.957497 0 0 11680 +jobim 1 6.957497 0 0 11681 +coltran 1 6.957497 0 0 11682 +earl 1 6.957497 0 0 11683 +klugh 1 6.957497 0 0 11684 +metheni 1 6.957497 0 0 11685 +archemi 1 6.957497 0 0 11686 +paradiso 1 6.957497 0 0 11687 +kiss 1 6.957497 0 0 11688 +saigon 1 6.957497 0 0 11689 +newswant 1 6.957497 0 0 11690 +anybodi 1 6.957497 0 0 11691 +hana 1 6.957497 0 0 11692 +jung 1 6.957497 0 0 11693 +hwan 1 6.957497 0 0 11694 +victor 1 6.957497 0 0 11695 +jiyang 1 6.957497 0 0 11696 +timessinc 1 6.957497 0 0 11697 +mandar 1 6.957497 0 0 11698 +gerardsalton 1 6.957497 0 0 11699 +lengthnorm 1 6.957497 0 0 11700 +mandarmitra 1 6.957497 0 0 11701 +mitra 1 6.957497 0 0 11702 +pageamit 1 6.957497 0 0 11703 +singhaldepart 1 6.957497 0 0 11704 +universitysingh 1 6.957497 0 0 11705 +andtext 1 6.957497 0 0 11706 +clairecardieher 1 6.957497 0 0 11707 +beenon 1 6.957497 0 0 11708 +informationretriev 1 6.957497 0 0 11709 +thirti 1 6.957497 0 0 11710 +thateffect 1 6.957497 0 0 11711 +chancessimilar 1 6.957497 0 0 11712 +normalizationfunct 1 6.957497 0 0 11713 +retrievaleffect 1 6.957497 0 0 11714 +normalizationtechniqu 1 6.957497 0 0 11715 +trecparticipationtext 1 6.957497 0 0 11716 +sponsoredeffort 1 6.957497 0 0 11717 +retrievaltechniqu 1 6.957497 0 0 11718 +hasconsist 1 6.957497 0 0 11719 +somepap 1 6.957497 0 0 11720 +summarizationnon 1 6.957497 0 0 11721 +expositori 1 6.957497 0 0 11722 +tocov 1 6.957497 0 0 11723 +selectiveaccess 1 6.957497 0 0 11724 +toanalyz 1 6.957497 0 0 11725 +texttravers 1 6.957497 0 0 11726 +papersnorm 1 6.957497 0 0 11727 +documentlength 1 6.957497 0 0 11728 +mitraand 1 6.957497 0 0 11729 +usingsmart 1 6.957497 0 0 11730 +textthem 1 6.957497 0 0 11731 +andmanag 1 6.957497 0 0 11732 +vectorspac 1 6.957497 0 0 11733 +machineread 1 6.957497 0 0 11734 +groupmemb 1 6.957497 0 0 11735 +fluctuat 1 6.957497 0 0 11736 +iinstal 1 6.957497 0 0 11737 +skeshav 1 6.957497 0 0 11738 +idlinet 1 6.957497 0 0 11739 +keshavemail 1 6.957497 0 0 11740 +spentfiv 1 6.957497 0 0 11741 +xunet 1 6.957497 0 0 11742 +incollabor 1 6.957497 0 0 11743 +fore 1 6.957497 0 0 11744 +zeitnet 1 6.957497 0 0 11745 +idlinetsourc 1 6.957497 0 0 11746 +applicationget 1 6.957497 0 0 11747 +linkspapersher 1 6.957497 0 0 11748 +linkto 1 6.957497 0 0 11749 +reali 1 6.957497 0 0 11750 +beout 1 6.957497 0 0 11751 +native_mod 1 6.957497 0 0 11752 +namein 1 6.957497 0 0 11753 +thanjavur 1 6.957497 0 0 11754 +beprecis 1 6.957497 0 0 11755 +prefix 1 6.957497 0 0 11756 +sonli 1 6.957497 0 0 11757 +surnam 1 6.957497 0 0 11758 +myfath 1 6.957497 0 0 11759 +intoth 1 6.957497 0 0 11760 +beconfus 1 6.957497 0 0 11761 +quotabl 1 6.957497 0 0 11762 +wolfgang 1 6.957497 0 0 11763 +presum 1 6.957497 0 0 11764 +heha 1 6.957497 0 0 11765 +beaver 1 6.957497 0 0 11766 +perri 1 6.957497 0 0 11767 +clearinglook 1 6.957497 0 0 11768 +thanyou 1 6.957497 0 0 11769 +onal 1 6.957497 0 0 11770 +theweath 1 6.957497 0 0 11771 +overcast 1 6.957497 0 0 11772 +ifit 1 6.957497 0 0 11773 +quiteclear 1 6.957497 0 0 11774 +bird 1 6.957497 0 0 11775 +chirp 1 6.957497 0 0 11776 +theymai 1 6.957497 0 0 11777 +louder 1 6.957497 0 0 11778 +nearbywaterfal 1 6.957497 0 0 11779 +gotta 1 6.957497 0 0 11780 +apath 1 6.957497 0 0 11781 +asign 1 6.957497 0 0 11782 +hillschool 1 6.957497 0 0 11783 +wormhol 1 6.957497 0 0 11784 +nearbyhous 1 6.957497 0 0 11785 +clearinginsid 1 6.957497 0 0 11786 +offand 1 6.957497 0 0 11787 +theclear 1 6.957497 0 0 11788 +rhyme 1 6.957497 0 0 11789 +reasonto 1 6.957497 0 0 11790 +sortsof 1 6.957497 0 0 11791 +betteridea 1 6.957497 0 0 11792 +itseem 1 6.957497 0 0 11793 +importantth 1 6.957497 0 0 11794 +clearingh 1 6.957497 0 0 11795 +oftendescrib 1 6.957497 0 0 11796 +mostdistinct 1 6.957497 0 0 11797 +quitelong 1 6.957497 0 0 11798 +elfin 1 6.957497 0 0 11799 +peoplebefor 1 6.957497 0 0 11800 +theresoon 1 6.957497 0 0 11801 +startstel 1 6.957497 0 0 11802 +whynichola 1 6.957497 0 0 11803 +negropont 1 6.957497 0 0 11804 +moron 1 6.957497 0 0 11805 +thenh 1 6.957497 0 0 11806 +obscur 1 6.957497 0 0 11807 +hetend 1 6.957497 0 0 11808 +appearanceinstead 1 6.957497 0 0 11809 +turquois 1 6.957497 0 0 11810 +linen 1 6.957497 0 0 11811 +imageof 1 6.957497 0 0 11812 +fromhim 1 6.957497 0 0 11813 +pewter 1 6.957497 0 0 11814 +pentacl 1 6.957497 0 0 11815 +neck 1 6.957497 0 0 11816 +hippi 1 6.957497 0 0 11817 +asclass 1 6.957497 0 0 11818 +clearingdan 1 6.957497 0 0 11819 +briani 1 6.957497 0 0 11820 +anundergrad 1 6.957497 0 0 11821 +newsprovid 1 6.957497 0 0 11822 +ancamosoiu 1 6.957497 0 0 11823 +schwa 1 6.957497 0 0 11824 +backwhen 1 6.957497 0 0 11825 +twoand 1 6.957497 0 0 11826 +wegradu 1 6.957497 0 0 11827 +shejust 1 6.957497 0 0 11828 +onewav 1 6.957497 0 0 11829 +issomeon 1 6.957497 0 0 11830 +severalmonth 1 6.957497 0 0 11831 +becamemuch 1 6.957497 0 0 11832 +eedepart 1 6.957497 0 0 11833 +multimediastud 1 6.957497 0 0 11834 +friendof 1 6.957497 0 0 11835 +finlei 1 6.957497 0 0 11836 +notanymor 1 6.957497 0 0 11837 +steelcas 1 6.957497 0 0 11838 +dserver 1 6.957497 0 0 11839 +kinda 1 6.957497 0 0 11840 +cheesi 1 6.957497 0 0 11841 +thebuild 1 6.957497 0 0 11842 +ius 1 6.957497 0 0 11843 +programcal 1 6.957497 0 0 11844 +graduatedfrom 1 6.957497 0 0 11845 +cuter 1 6.957497 0 0 11846 +thanth 1 6.957497 0 0 11847 +blurri 1 6.957497 0 0 11848 +dreamer 1 6.957497 0 0 11849 +ofdream 1 6.957497 0 0 11850 +aphex 1 6.957497 0 0 11851 +twindan 1 6.957497 0 0 11852 +snowman 1 6.957497 0 0 11853 +lookin 1 6.957497 0 0 11854 +upkeep 1 6.957497 0 0 11855 +setuup 1 6.957497 0 0 11856 +doingajaymanishanujmom 1 6.957497 0 0 11857 +daddepart 1 6.957497 0 0 11858 +sciencesearch 1 6.957497 0 0 11859 +netentertain 1 6.957497 0 0 11860 +weeklycricket 1 6.957497 0 0 11861 +soni 1 6.957497 0 0 11862 +sonia 1 6.957497 0 0 11863 +padif 1 6.957497 0 0 11864 +scottdawson 1 6.957497 0 0 11865 +shomebas 1 6.957497 0 0 11866 +stodghillstodghil 1 6.957497 0 0 11867 +acri 1 6.957497 0 0 11868 +projectinterest 1 6.957497 0 0 11869 +hyplan 1 6.957497 0 0 11870 +htmllast 1 6.957497 0 0 11871 +ritu 1 6.957497 0 0 11872 +mailsugata 1 6.957497 0 0 11873 +hichori 1 6.957497 0 0 11874 +estat 1 6.957497 0 0 11875 +owego 1 6.957497 0 0 11876 +sukhpal 1 6.957497 0 0 11877 +sanghera 1 6.957497 0 0 11878 +gater 1 6.957497 0 0 11879 +bilth 1 6.957497 0 0 11880 +empirewritten 1 6.957497 0 0 11881 +kanetkaremail 1 6.957497 0 0 11882 +kanetkar 1 6.957497 0 0 11883 +atmicrosoft 1 6.957497 0 0 11884 +artwork 1 6.957497 0 0 11885 +leak 1 6.957497 0 0 11886 +theful 1 6.957497 0 0 11887 +stripi 1 6.957497 0 0 11888 +themicrosoft 1 6.957497 0 0 11889 +perceiv 1 6.957497 0 0 11890 +problemand 1 6.957497 0 0 11891 +evilempir 1 6.957497 0 0 11892 +comicstrip 1 6.957497 0 0 11893 +theoutsid 1 6.957497 0 0 11894 +eitherbil 1 6.957497 0 0 11895 +heck 1 6.957497 0 0 11896 +summersof 1 6.957497 0 0 11897 +anyoneinterest 1 6.957497 0 0 11898 +thateveri 1 6.957497 0 0 11899 +theyshow 1 6.957497 0 0 11900 +trivia 1 6.957497 0 0 11901 +makey 1 6.957497 0 0 11902 +disclosur 1 6.957497 0 0 11903 +agreeement 1 6.957497 0 0 11904 +theymad 1 6.957497 0 0 11905 +theirheart 1 6.957497 0 0 11906 +pledg 1 6.957497 0 0 11907 +alleig 1 6.957497 0 0 11908 +everydesk 1 6.957497 0 0 11909 +roosterepisod 1 6.957497 0 0 11910 +threatepisod 1 6.957497 0 0 11911 +insigniaepisod 1 6.957497 0 0 11912 +kristen 1 6.957497 0 0 11913 +divid 1 6.957497 0 0 11914 +accessresearch 1 6.957497 0 0 11915 +mylong 1 6.957497 0 0 11916 +forsophist 1 6.957497 0 0 11917 +manipulationtool 1 6.957497 0 0 11918 +logicalstructur 1 6.957497 0 0 11919 +documentrepresent 1 6.957497 0 0 11920 +hierarchyof 1 6.957497 0 0 11921 +postscriptvers 1 6.957497 0 0 11922 +croft 1 6.957497 0 0 11923 +determininglog 1 6.957497 0 0 11924 +soin 1 6.957497 0 0 11925 +ofmultipl 1 6.957497 0 0 11926 +browsingco 1 6.957497 0 0 11927 +nabil 1 6.957497 0 0 11928 +bhargava 1 6.957497 0 0 11929 +yelena 1 6.957497 0 0 11930 +yesha 1 6.957497 0 0 11931 +seeheim 1 6.957497 0 0 11932 +podp 1 6.957497 0 0 11933 +taxonomi 1 6.957497 0 0 11934 +structureselectron 1 6.957497 0 0 11935 +dag 1 6.957497 0 0 11936 +scholaraward 1 6.957497 0 0 11937 +wordless 1 6.957497 0 0 11938 +analysisand 1 6.957497 0 0 11939 +masafumi 1 6.957497 0 0 11940 +suzukither 1 6.957497 0 0 11941 +suzukisuzuki 1 6.957497 0 0 11942 +educlassesfal 1 6.957497 0 0 11943 +heredevelopingrivl 1 6.957497 0 0 11944 +myaddress 1 6.957497 0 0 11945 +brighten 1 6.957497 0 0 11946 +dayjon 1 6.957497 0 0 11947 +connectioncool 1 6.957497 0 0 11948 +siteslast 1 6.957497 0 0 11949 +sunil 1 6.957497 0 0 11950 +srivastavamast 1 6.957497 0 0 11951 +linkscom 1 6.957497 0 0 11952 +sxsriva 1 6.957497 0 0 11953 +huangszu 1 6.957497 0 0 11954 +defend 1 6.957497 0 0 11955 +justic 1 6.957497 0 0 11956 +nevermind 1 6.957497 0 0 11957 +iarriv 1 6.957497 0 0 11958 +soundslik 1 6.957497 0 0 11959 +mobi 1 6.957497 0 0 11960 +nointent 1 6.957497 0 0 11961 +whale 1 6.957497 0 0 11962 +digress 1 6.957497 0 0 11963 +wholefamili 1 6.957497 0 0 11964 +philippin 1 6.957497 0 0 11965 +aroundsix 1 6.957497 0 0 11966 +fluentli 1 6.957497 0 0 11967 +bilingu 1 6.957497 0 0 11968 +thepoetri 1 6.957497 0 0 11969 +dynasti 1 6.957497 0 0 11970 +arabian 1 6.957497 0 0 11971 +doveright 1 6.957497 0 0 11972 +ienter 1 6.957497 0 0 11973 +philippineswith 1 6.957497 0 0 11974 +unabashedli 1 6.957497 0 0 11975 +alsoin 1 6.957497 0 0 11976 +whirlwind 1 6.957497 0 0 11977 +awoman 1 6.957497 0 0 11978 +effortlessli 1 6.957497 0 0 11979 +eek 1 6.957497 0 0 11980 +blunt 1 6.957497 0 0 11981 +ienrol 1 6.957497 0 0 11982 +segreg 1 6.957497 0 0 11983 +everydaygeek 1 6.957497 0 0 11984 +fromactu 1 6.957497 0 0 11985 +happili 1 6.957497 0 0 11986 +myspar 1 6.957497 0 0 11987 +linuxnet 1 6.957497 0 0 11988 +plastic 1 6.957497 0 0 11989 +suspens 1 6.957497 0 0 11990 +thriller 1 6.957497 0 0 11991 +sting 1 6.957497 0 0 11992 +sesam 1 6.957497 0 0 11993 +offend 1 6.957497 0 0 11994 +bysom 1 6.957497 0 0 11995 +blatant 1 6.957497 0 0 11996 +highlyinterest 1 6.957497 0 0 11997 +compatiblecomput 1 6.957497 0 0 11998 +metaballsund 1 6.957497 0 0 11999 +techniquesin 1 6.957497 0 0 12000 +andport 1 6.957497 0 0 12001 +hytechhytech 1 6.957497 0 0 12002 +toolw 1 6.957497 0 0 12003 +henzingerthoma 1 6.957497 0 0 12004 +movedassist 1 6.957497 0 0 12005 +researchform 1 6.957497 0 0 12006 +researchat 1 6.957497 0 0 12007 +cornelland 1 6.957497 0 0 12008 +resumepublicationsreact 1 6.957497 0 0 12009 +systemsclock 1 6.957497 0 0 12010 +systemshybrid 1 6.957497 0 0 12011 +systemsbibliographi 1 6.957497 0 0 12012 +publicationstoolshytech 1 6.957497 0 0 12013 +systemscoursesc 1 6.957497 0 0 12014 +languagesconferenceshybrid 1 6.957497 0 0 12015 +systemscav 1 6.957497 0 0 12016 +verificationlast 1 6.957497 0 0 12017 +byrobbert 1 6.957497 0 0 12018 +andfr 1 6.957497 0 0 12019 +tim_teitelbaum 1 6.957497 0 0 12020 +teitelbaumassoci 1 6.957497 0 0 12021 +adavita 1 6.957497 0 0 12022 +departement 1 6.957497 0 0 12023 +eickenassist 1 6.957497 0 0 12024 +eduprojectsth 1 6.957497 0 0 12025 +architectureprovid 1 6.957497 0 0 12026 +latencyand 1 6.957497 0 0 12027 +currentimplement 1 6.957497 0 0 12028 +tonon 1 6.957497 0 0 12029 +spmd 1 6.957497 0 0 12030 +extensionto 1 6.957497 0 0 12031 +newplatform 1 6.957497 0 0 12032 +multprocessor 1 6.957497 0 0 12033 +computerorgan 1 6.957497 0 0 12034 +maynd 1 6.957497 0 0 12035 +pagestv 1 6.957497 0 0 12036 +macpppwhich 1 6.957497 0 0 12037 +everhav 1 6.957497 0 0 12038 +passwordssuddenli 1 6.957497 0 0 12039 +installationinstruct 1 6.957497 0 0 12040 +publicationsu 1 6.957497 0 0 12041 +atmnetwork 1 6.957497 0 0 12042 +controlledthread 1 6.957497 0 0 12043 +spertu 1 6.957497 0 0 12044 +modelof 1 6.957497 0 0 12045 +sahai 1 6.957497 0 0 12046 +santo 1 6.957497 0 0 12047 +subramonian 1 6.957497 0 0 12048 +dataflowmultiprocess 1 6.957497 0 0 12049 +forintegr 1 6.957497 0 0 12050 +forleni 1 6.957497 0 0 12051 +minimalhardwar 1 6.957497 0 0 12052 +wawrzynek 1 6.957497 0 0 12053 +architecturesfor 1 6.957497 0 0 12054 +saavedra 1 6.957497 0 0 12055 +pagelfar 1 6.957497 0 0 12056 +specificationi 1 6.957497 0 0 12057 +incongruousiceland 1 6.957497 0 0 12058 +implementationbackgroundwher 1 6.957497 0 0 12059 +activitieswhat 1 6.957497 0 0 12060 +schedulewher 1 6.957497 0 0 12061 +researchwhat 1 6.957497 0 0 12062 +interestswhat 1 6.957497 0 0 12063 +acquaintancesthos 1 6.957497 0 0 12064 +infohow 1 6.957497 0 0 12065 +threedimens 1 6.957497 0 0 12066 +themesh 1 6.957497 0 0 12067 +softwaredownload 1 6.957497 0 0 12068 +andqmg 1 6.957497 0 0 12069 +mcphedran 1 6.957497 0 0 12070 +offinit 1 6.957497 0 0 12071 +ofsoftwar 1 6.957497 0 0 12072 +computationalgeometri 1 6.957497 0 0 12073 +shewchuk 1 6.957497 0 0 12074 +triangl 1 6.957497 0 0 12075 +onsabbat 1 6.957497 0 0 12076 +cass 1 6.957497 0 0 12077 +tsure 1 6.957497 0 0 12078 +essaybi 1 6.957497 0 0 12079 +issuesnumer 1 6.957497 0 0 12080 +problemsgeometr 1 6.957497 0 0 12081 +computingspars 1 6.957497 0 0 12082 +computationsi 1 6.957497 0 0 12083 +primal 1 6.957497 0 0 12084 +interiorpoint 1 6.957497 0 0 12085 +decompositionfor 1 6.957497 0 0 12086 +gridcut 1 6.957497 0 0 12087 +hyperplan 1 6.957497 0 0 12088 +packagei 1 6.957497 0 0 12089 +verycompl 1 6.957497 0 0 12090 +unstructuredtetrahedr 1 6.957497 0 0 12091 +boundaryvalu 1 6.957497 0 0 12092 +iswritten 1 6.957497 0 0 12093 +distributedfor 1 6.957497 0 0 12094 +distributionbegan 1 6.957497 0 0 12095 +manyimprov 1 6.957497 0 0 12096 +compatibilitywith 1 6.957497 0 0 12097 +pleasese 1 6.957497 0 0 12098 +reportback 1 6.957497 0 0 12099 +vitrano 1 6.957497 0 0 12100 +pagehei 1 6.957497 0 0 12101 +kolla 1 6.957497 0 0 12102 +scrollit 1 6.957497 0 0 12103 +unwant 1 6.957497 0 0 12104 +warrante 1 6.957497 0 0 12105 +zillion 1 6.957497 0 0 12106 +thoughtsfriend 1 6.957497 0 0 12107 +foeslinksa 1 6.957497 0 0 12108 +tryalta 1 6.957497 0 0 12109 +theinktomiresumein 1 6.957497 0 0 12110 +htmlin 1 6.957497 0 0 12111 +postscriptin 1 6.957497 0 0 12112 +perfectin 1 6.957497 0 0 12113 +asciith 1 6.957497 0 0 12114 +wanna 1 6.957497 0 0 12115 +vlad 1 6.957497 0 0 12116 +pagevladimir 1 6.957497 0 0 12117 +kotlyarvladimir 1 6.957497 0 0 12118 +wereteach 1 6.957497 0 0 12119 +andindu 1 6.957497 0 0 12120 +kodukulapubl 1 6.957497 0 0 12121 +kissing 1 6.957497 0 0 12122 +profess 1 6.957497 0 0 12123 +abritish 1 6.957497 0 0 12124 +sveri 1 6.957497 0 0 12125 +den 1 6.957497 0 0 12126 +asolut 1 6.957497 0 0 12127 +outpac 1 6.957497 0 0 12128 +ofsolut 1 6.957497 0 0 12129 +hardenough 1 6.957497 0 0 12130 +menonvijai 1 6.957497 0 0 12131 +pagewei 1 6.957497 0 0 12132 +weichen 1 6.957497 0 0 12133 +inpartition 1 6.957497 0 0 12134 +lwhere 1 6.957497 0 0 12135 +fromwhat 1 6.957497 0 0 12136 +watchwhat 1 6.957497 0 0 12137 +likec 1 6.957497 0 0 12138 +wrotepictur 1 6.957497 0 0 12139 +drawa 1 6.957497 0 0 12140 +motifcomput 1 6.957497 0 0 12141 +theoryhom 1 6.957497 0 0 12142 +vimi 1 6.957497 0 0 12143 +tsearch 1 6.957497 0 0 12144 +webcoolest 1 6.957497 0 0 12145 +sitessharewar 1 6.957497 0 0 12146 +archivem 1 6.957497 0 0 12147 +onlineunivers 1 6.957497 0 0 12148 +glavin 1 6.957497 0 0 12149 +billiard 1 6.957497 0 0 12150 +brave 1 6.957497 0 0 12151 +anferne 1 6.957497 0 0 12152 +hardawai 1 6.957497 0 0 12153 +warp 1 6.957497 0 0 12154 +webpaint 1 6.957497 0 0 12155 +whkao 1 6.957497 0 0 12156 +concerto 1 6.957497 0 0 12157 +sciencefrom 1 6.957497 0 0 12158 +carneig 1 6.957497 0 0 12159 +didresearch 1 6.957497 0 0 12160 +institu 1 6.957497 0 0 12161 +xsro 1 6.957497 0 0 12162 +atft 1 6.957497 0 0 12163 +lauderdal 1 6.957497 0 0 12164 +usta 1 6.957497 0 0 12165 +tournment 1 6.957497 0 0 12166 +faviorit 1 6.957497 0 0 12167 +boca 1 6.957497 0 0 12168 +ratonkei 1 6.957497 0 0 12169 +beethoven 1 6.957497 0 0 12170 +gershwin 1 6.957497 0 0 12171 +liszt 1 6.957497 0 0 12172 +mendelssohn 1 6.957497 0 0 12173 +mozart 1 6.957497 0 0 12174 +rachmaninoff 1 6.957497 0 0 12175 +ravel 1 6.957497 0 0 12176 +tchaikovski 1 6.957497 0 0 12177 +violinconcerto 1 6.957497 0 0 12178 +purifi 1 6.957497 0 0 12179 +computerc 1 6.957497 0 0 12180 +sectorcool 1 6.957497 0 0 12181 +links_leap 1 6.957497 0 0 12182 +frogski 1 6.957497 0 0 12183 +serverident 1 6.957497 0 0 12184 +crisi 1 6.957497 0 0 12185 +testweath 1 6.957497 0 0 12186 +undergroundinktomi 1 6.957497 0 0 12187 +enginequest 1 6.957497 0 0 12188 +archiveslast 1 6.957497 0 0 12189 +ecithaca 1 6.957497 0 0 12190 +wwlee 1 6.957497 0 0 12191 +xichun 1 6.957497 0 0 12192 +zhejiang 1 6.957497 0 0 12193 +jennif 1 6.957497 0 0 12194 +hangzhou 1 6.957497 0 0 12195 +javaworldsunhigh 1 6.957497 0 0 12196 +alumnimeng 1 6.957497 0 0 12197 +gouraud 1 6.957497 0 0 12198 +systeme 1 6.957497 0 0 12199 +communicationby 1 6.957497 0 0 12200 +computationderiv 1 6.957497 0 0 12201 +programsa 1 6.957497 0 0 12202 +themeprogram 1 6.957497 0 0 12203 +usessystemat 1 6.957497 0 0 12204 +deriveincrement 1 6.957497 0 0 12205 +peoplei 1 6.957497 0 0 12206 +liutim 1 6.957497 0 0 12207 +teitelbaumkeyword 1 6.957497 0 0 12208 +cacheti 1 6.957497 0 0 12209 +pageyanhong 1 6.957497 0 0 12210 +forincrement 1 6.957497 0 0 12211 +interactivesystem 1 6.957497 0 0 12212 +systemorgan 1 6.957497 0 0 12213 +talksph 1 6.957497 0 0 12214 +basedsystemat 1 6.957497 0 0 12215 +abstractjourn 1 6.957497 0 0 12216 +inexact 1 6.957497 0 0 12217 +wakayama 1 6.957497 0 0 12218 +oggeb 1 6.957497 0 0 12219 +basin 1 6.957497 0 0 12220 +ri 1 6.957497 0 0 12221 +tshinghua 1 6.957497 0 0 12222 +lindlei 1 6.957497 0 0 12223 +hallindiana 1 6.957497 0 0 12224 +universitybloomington 1 6.957497 0 0 12225 +huangyi 1 6.957497 0 0 12226 +documentscoursesprojectaccess 1 6.957497 0 0 12227 +byvisitorslast 1 6.957497 0 0 12228 +ilbo 1 6.957497 0 0 12229 +myoung 1 6.957497 0 0 12230 +husband 1 6.957497 0 0 12231 +chungyou 1 6.957497 0 0 12232 +thvisitor 1 6.957497 0 0 12233 +universitywher 1 6.957497 0 0 12234 +kindergarten 1 6.957497 0 0 12235 +universityin 1 6.957497 0 0 12236 +happiest 1 6.957497 0 0 12237 +forsaic 1 6.957497 0 0 12238 +shin 1 6.957497 0 0 12239 +seung 1 6.957497 0 0 12240 +hoon 1 6.957497 0 0 12241 +newpap 1 6.957497 0 0 12242 +hangook 1 6.957497 0 0 12243 +chosun 1 6.957497 0 0 12244 +joongang 1 6.957497 0 0 12245 +appletyoosun 1 6.957497 0 0 12246 +triphamm 1 6.957497 0 0 12247 +sbithaca 1 6.957497 0 0 12248 +ychung 1 6.957497 0 0 12249 +yooschung 1 6.957497 0 0 12250 +flapdragon 1 6.957497 0 0 12251 +yminski 1 6.957497 0 0 12252 +comstock 1 6.957497 0 0 12253 +onfault 1 6.957497 0 0 12254 +thetacoma 1 6.957497 0 0 12255 +livether 1 6.957497 0 0 12256 +anopen 1 6.957497 0 0 12257 +recommendit 1 6.957497 0 0 12258 +ancientchines 1 6.957497 0 0 12259 +extremlysimpl 1 6.957497 0 0 12260 +thannoth 1 6.957497 0 0 12261 +cgoban 1 6.957497 0 0 12262 +nicest 1 6.957497 0 0 12263 +goboard 1 6.957497 0 0 12264 +thenet 1 6.957497 0 0 12265 +minutesof 1 6.957497 0 0 12266 +favoritepoem 1 6.957497 0 0 12267 +lafiglia 1 6.957497 0 0 12268 +piang 1 6.957497 0 0 12269 +advicefor 1 6.957497 0 0 12270 +interestinglink 1 6.957497 0 0 12271 +alarmingli 1 6.957497 0 0 12272 +firefli 1 6.957497 0 0 12273 +bakeri 1 6.957497 0 0 12274 +bigbook 1 6.957497 0 0 12275 +bigyellow 1 6.957497 0 0 12276 +kinslei 1 6.957497 0 0 12277 +discount 1 6.957497 0 0 12278 +booksel 1 6.957497 0 0 12279 +mailcrypt 1 6.957497 0 0 12280 +interfacemqbtazgjohoaaaedalfhlgjmdg 1 6.957497 0 0 12281 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 1 6.957497 0 0 12282 +rbylf 1 6.957497 0 0 12283 +zwqujcioczoecv 1 6.957497 0 0 12284 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 1 6.957497 0 0 12285 +gkgarsokrinnoazihja 1 6.957497 0 0 12286 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 1 6.957497 0 0 12287 +wumjgzsnvispwkrvzgdrojswmc 1 6.957497 0 0 12288 +eigsqsb 1 6.957497 0 0 12289 +bsbpw 1 6.957497 0 0 12290 +jcwz 1 6.957497 0 0 12291 +yuichi 1 6.957497 0 0 12292 +tsuchimoto 1 6.957497 0 0 12293 +translatorsc 1 6.957497 0 0 12294 +pageyuichi 1 6.957497 0 0 12295 +workfal 1 6.957497 0 0 12296 +engineeringspr 1 6.957497 0 0 12297 +computingi 1 6.957497 0 0 12298 +activemessag 1 6.957497 0 0 12299 +secondpart 1 6.957497 0 0 12300 +messagescornel 1 6.957497 0 0 12301 +implementationsact 1 6.957497 0 0 12302 +codereleas 1 6.957497 0 0 12303 +instructionson 1 6.957497 0 0 12304 +releasenot 1 6.957497 0 0 12305 +fileto 1 6.957497 0 0 12306 +currentvers 1 6.957497 0 0 12307 +libmpci 1 6.957497 0 0 12308 +thedistribut 1 6.957497 0 0 12309 +fordetail 1 6.957497 0 0 12310 +briefnot 1 6.957497 0 0 12311 +ibmrisc 1 6.957497 0 0 12312 +hawblitzel 1 6.957497 0 0 12313 +ieeesupercomput 1 6.957497 0 0 12314 +spiteof 1 6.957497 0 0 12315 +scommun 1 6.957497 0 0 12316 +inferior 1 6.957497 0 0 12317 +tmccm 1 6.957497 0 0 12318 +standardmessag 1 6.957497 0 0 12319 +tooffer 1 6.957497 0 0 12320 +networkadapt 1 6.957497 0 0 12321 +yieldsa 1 6.957497 0 0 12322 +communicationsubstr 1 6.957497 0 0 12323 +cbenchmark 1 6.957497 0 0 12324 +lowmessag 1 6.957497 0 0 12325 +compens 1 6.957497 0 0 12326 +networklat 1 6.957497 0 0 12327 +availablempich 1 6.957497 0 0 12328 +implementationbenchmark 1 6.957497 0 0 12329 +firmwar 1 6.957497 0 0 12330 +butdo 1 6.957497 0 0 12331 +assumefamiliar 1 6.957497 0 0 12332 +mainperform 1 6.957497 0 0 12333 +timeof 1 6.957497 0 0 12334 +smessag 1 6.957497 0 0 12335 +theu 1 6.957497 0 0 12336 +themeiko 1 6.957497 0 0 12337 +thehpam 1 6.957497 0 0 12338 +fddi 1 6.957497 0 0 12339 +theparagon 1 6.957497 0 0 12340 +thesp 1 6.957497 0 0 12341 +networksus 1 6.957497 0 0 12342 +anyndia 1 6.957497 0 0 12343 +ascompar 1 6.957497 0 0 12344 +anatm 1 6.957497 0 0 12345 +systemsoftwar 1 6.957497 0 0 12346 +streamcommun 1 6.957497 0 0 12347 +flowcontrol 1 6.957497 0 0 12348 +builtfrom 1 6.957497 0 0 12349 +artmultiprocessor 1 6.957497 0 0 12350 +systemcoordin 1 6.957497 0 0 12351 +andrequir 1 6.957497 0 0 12352 +clusterinterconnect 1 6.957497 0 0 12353 +showappl 1 6.957497 0 0 12354 +smallmessag 1 6.957497 0 0 12355 +messagesimplement 1 6.957497 0 0 12356 +abstractth 1 6.957497 0 0 12357 +overlapcomput 1 6.957497 0 0 12358 +sacrificingprocessor 1 6.957497 0 0 12359 +passingmultiprocessor 1 6.957497 0 0 12360 +researchprototyp 1 6.957497 0 0 12361 +communicationoverhead 1 6.957497 0 0 12362 +simplecommun 1 6.957497 0 0 12363 +isintrins 1 6.957497 0 0 12364 +thehardwar 1 6.957497 0 0 12365 +ncube 1 6.957497 0 0 12366 +memoryextens 1 6.957497 0 0 12367 +messagesar 1 6.957497 0 0 12368 +forwhich 1 6.957497 0 0 12369 +hardwaresupport 1 6.957497 0 0 12370 +ofenhanc 1 6.957497 0 0 12371 +efficientcommun 1 6.957497 0 0 12372 +sitesact 1 6.957497 0 0 12373 +messagesin 1 6.957497 0 0 12374 +projectfor 1 6.957497 0 0 12375 +csrvlcornel 1 6.957497 0 0 12376 +nich 1 6.957497 0 0 12377 +rrentli 1 6.957497 0 0 12378 +ofresearch 1 6.957497 0 0 12379 +pictor 1 6.957497 0 0 12380 +projectsth 1 6.957497 0 0 12381 +byramin 1 6.957497 0 0 12382 +allowingscen 1 6.957497 0 0 12383 +onplatform 1 6.957497 0 0 12384 +nynet 1 6.957497 0 0 12385 +foru 1 6.957497 0 0 12386 +sproject 1 6.957497 0 0 12387 +currentlyconsid 1 6.957497 0 0 12388 +theissuesher 1 6.957497 0 0 12389 +thecsrvl 1 6.957497 0 0 12390 +serverar 1 6.957497 0 0 12391 +sensorless 1 6.957497 0 0 12392 +oiso 1 6.957497 0 0 12393 +micromechan 1 6.957497 0 0 12394 +quebc 1 6.957497 0 0 12395 +authorthes 1 6.957497 0 0 12396 +pedro 1 6.957497 0 0 12397 +felzenszwalb 1 6.957497 0 0 12398 +lilien 1 6.957497 0 0 12399 +maharbiz 1 6.957497 0 0 12400 +scharstein 1 6.957497 0 0 12401 +stump 1 6.957497 0 0 12402 +fernando 1 6.957497 0 0 12403 +viton 1 6.957497 0 0 12404 +wayt 1 6.957497 0 0 12405 +welsh 1 6.957497 0 0 12406 +whelan 1 6.957497 0 0 12407 +environmenthoru 1 6.957497 0 0 12408 +shoru 1 6.957497 0 0 12409 +reliabledistribut 1 6.957497 0 0 12410 +demonstrategroupwar 1 6.957497 0 0 12411 +foundto 1 6.957497 0 0 12412 +synchronousprocess 1 6.957497 0 0 12413 +importantresearch 1 6.957497 0 0 12414 +performancer 1 6.957497 0 0 12415 +calledact 1 6.957497 0 0 12416 +messageswith 1 6.957497 0 0 12417 +playbacksystem 1 6.957497 0 0 12418 +calledcontinu 1 6.957497 0 0 12419 +multimediaserv 1 6.957497 0 0 12420 +telemedicin 1 6.957497 0 0 12421 +videoon 1 6.957497 0 0 12422 +andsecur 1 6.957497 0 0 12423 +expectrapid 1 6.957497 0 0 12424 +uptak 1 6.957497 0 0 12425 +spana 1 6.957497 0 0 12426 +financialtrad 1 6.957497 0 0 12427 +factori 1 6.957497 0 0 12428 +fordiscret 1 6.957497 0 0 12429 +beingexplor 1 6.957497 0 0 12430 +othernon 1 6.957497 0 0 12431 +hiper 1 6.957497 0 0 12432 +systemthat 1 6.957497 0 0 12433 +aegi 1 6.957497 0 0 12434 +battleradar 1 6.957497 0 0 12435 +benefitfrom 1 6.957497 0 0 12436 +migrateisi 1 6.957497 0 0 12437 +communityin 1 6.957497 0 0 12438 +agreementswith 1 6.957497 0 0 12439 +subsidiari 1 6.957497 0 0 12440 +mixtur 1 6.957497 0 0 12441 +technologieswil 1 6.957497 0 0 12442 +beseen 1 6.957497 0 0 12443 +belowshow 1 6.957497 0 0 12444 +andus 1 6.957497 0 0 12445 +asset 1 6.957497 0 0 12446 +thissort 1 6.957497 0 0 12447 +utmost 1 6.957497 0 0 12448 +whilealso 1 6.957497 0 0 12449 +civilianand 1 6.957497 0 0 12450 +projectinform 1 6.957497 0 0 12451 +accessth 1 6.957497 0 0 12452 +ofonlin 1 6.957497 0 0 12453 +forhuman 1 6.957497 0 0 12454 +hopcroft 1 6.957497 0 0 12455 +davisin 1 6.957497 0 0 12456 +researchextract 1 6.957497 0 0 12457 +thestructur 1 6.957497 0 0 12458 +extractinginform 1 6.957497 0 0 12459 +collectionsof 1 6.957497 0 0 12460 +nationwid 1 6.957497 0 0 12461 +sciencetechn 1 6.957497 0 0 12462 +moreaccess 1 6.957497 0 0 12463 +toit 1 6.957497 0 0 12464 +visitingscientist 1 6.957497 0 0 12465 +jimdavi 1 6.957497 0 0 12466 +jrdpublicationsjam 1 6.957497 0 0 12467 +medianet 1 6.957497 0 0 12468 +projectmedianet 1 6.957497 0 0 12469 +protocolsth 1 6.957497 0 0 12470 +communicationmak 1 6.957497 0 0 12471 +foradvanc 1 6.957497 0 0 12472 +includeaudio 1 6.957497 0 0 12473 +technologyofficefor 1 6.957497 0 0 12474 +vaughn 1 6.957497 0 0 12475 +askaltavista 1 6.957497 0 0 12476 +simlab 1 6.957497 0 0 12477 +oncomplex 1 6.957497 0 0 12478 +bringingtogeth 1 6.957497 0 0 12479 +symbolicmathemat 1 6.957497 0 0 12480 +levelat 1 6.957497 0 0 12481 +softwarepackag 1 6.957497 0 0 12482 +microstoragearchitectur 1 6.957497 0 0 12483 +computeralgebra 1 6.957497 0 0 12484 +thechain 1 6.957497 0 0 12485 +thearpa 1 6.957497 0 0 12486 +madefast 1 6.957497 0 0 12487 +ofnon 1 6.957497 0 0 12488 +contemporan 1 6.957497 0 0 12489 +chainsprogram 1 6.957497 0 0 12490 +complextopolog 1 6.957497 0 0 12491 +numericalalgorithm 1 6.957497 0 0 12492 +granita 1 6.957497 0 0 12493 +bench 1 6.957497 0 0 12494 +am_run 1 6.957497 0 0 12495 +tcsh 1 6.957497 0 0 12496 +bash 1 6.957497 0 0 12497 +gmake 1 6.957497 0 0 12498 +ampicc 1 6.957497 0 0 12499 +granitathrough 1 6.957497 0 0 12500 +asinteract 1 6.957497 0 0 12501 +problemsdur 1 6.957497 0 0 12502 +stufffrom 1 6.957497 0 0 12503 +unam 1 6.957497 0 0 12504 +manyou 1 6.957497 0 0 12505 +infoexplor 1 6.957497 0 0 12506 +commandsand 1 6.957497 0 0 12507 +activemassag 1 6.957497 0 0 12508 +peor 1 6.957497 0 0 12509 +messagesor 1 6.957497 0 0 12510 +homegrown 1 6.957497 0 0 12511 +softwarein 1 6.957497 0 0 12512 +besur 1 6.957497 0 0 12513 +csplit 1 6.957497 0 0 12514 +globalpoint 1 6.957497 0 0 12515 +dereferenc 1 6.957497 0 0 12516 +latencyof 1 6.957497 0 0 12517 +shellsshould 1 6.957497 0 0 12518 +asact 1 6.957497 0 0 12519 +scriptsloc 1 6.957497 0 0 12520 +programfoo 1 6.957497 0 0 12521 +foodebug 1 6.957497 0 0 12522 +splitc_debug 1 6.957497 0 0 12523 +aftersplitc_main 1 6.957497 0 0 12524 +ongranita 1 6.957497 0 0 12525 +youwant 1 6.957497 0 0 12526 +thenattach 1 6.957497 0 0 12527 +breakpoint 1 6.957497 0 0 12528 +messagesact 1 6.957497 0 0 12529 +layerthat 1 6.957497 0 0 12530 +triplat 1 6.957497 0 0 12531 +libspgam 1 6.957497 0 0 12532 +aand 1 6.957497 0 0 12533 +beforerun 1 6.957497 0 0 12534 +runningprgm 1 6.957497 0 0 12535 +mpimpi 1 6.957497 0 0 12536 +popularmessag 1 6.957497 0 0 12537 +mpich 1 6.957497 0 0 12538 +overact 1 6.957497 0 0 12539 +easiest 1 6.957497 0 0 12540 +fooyou 1 6.957497 0 0 12541 +lookat 1 6.957497 0 0 12542 +examplesin 1 6.957497 0 0 12543 +ampi 1 6.957497 0 0 12544 +likeordinari 1 6.957497 0 0 12545 +softwaresoftwar 1 6.957497 0 0 12546 +xpdbx 1 6.957497 0 0 12547 +bison 1 6.957497 0 0 12548 +problemsif 1 6.957497 0 0 12549 +ccornel 1 6.957497 0 0 12550 +implementationssplit 1 6.957497 0 0 12551 +isimpl 1 6.957497 0 0 12552 +messagesfor 1 6.957497 0 0 12553 +ofsplit 1 6.957497 0 0 12554 +distr 1 6.957497 0 0 12555 +implementedon 1 6.957497 0 0 12556 +contactchi 1 6.957497 0 0 12557 +runningsolari 1 6.957497 0 0 12558 +mattwelsh 1 6.957497 0 0 12559 +cparallel 1 6.957497 0 0 12560 +abstractproject 1 6.957497 0 0 12561 +sitessplit 1 6.957497 0 0 12562 +chome 1 6.957497 0 0 12563 +redirect 1 6.957497 0 0 12564 +groupzeno 1 6.957497 0 0 12565 +curricula 1 6.957497 0 0 12566 +pagegener 1 6.957497 0 0 12567 +schedulespag 1 6.957497 0 0 12568 +directoryth 1 6.957497 0 0 12569 +universitywww 1 6.957497 0 0 12570 +informationgrip 1 6.957497 0 0 12571 +seligman 1 6.957497 0 0 12572 +pageadam 1 6.957497 0 0 12573 +specifiedth 1 6.957497 0 0 12574 +fileor 1 6.957497 0 0 12575 +pagemart 1 6.957497 0 0 12576 +fromreut 1 6.957497 0 0 12577 +agapito 1 6.957497 0 0 12578 +sustaita 1 6.957497 0 0 12579 +austincognit 1 6.957497 0 0 12580 +connection 1 6.957497 0 0 12581 +reasoningschoolingph 1 6.957497 0 0 12582 +miscellaneouspost 1 6.957497 0 0 12583 +addressth 1 6.957497 0 0 12584 +laboratoryut 1 6.957497 0 0 12585 +laboratoryth 1 6.957497 0 0 12586 +austinha 1 6.957497 0 0 12587 +andgradu 1 6.957497 0 0 12588 +causei 1 6.957497 0 0 12589 +deceas 1 6.957497 0 0 12590 +memoriam 1 6.957497 0 0 12591 +porterpoint 1 6.957497 0 0 12592 +agenciescontact 1 6.957497 0 0 12593 +johnajita 1 6.957497 0 0 12594 +programmingframework 1 6.957497 0 0 12595 +parallelprocedur 1 6.957497 0 0 12596 +brownemi 1 6.957497 0 0 12597 +ajohn 1 6.957497 0 0 12598 +uppsala 1 6.957497 0 0 12599 +almstrumabout 1 6.957497 0 0 12600 +doctoralresearch 1 6.957497 0 0 12601 +ispent 1 6.957497 0 0 12602 +pagether 1 6.957497 0 0 12603 +garden 1 6.957497 0 0 12604 +sew 1 6.957497 0 0 12605 +hubbi 1 6.957497 0 0 12606 +torgni 1 6.957497 0 0 12607 +stadler 1 6.957497 0 0 12608 +itics 1 6.957497 0 0 12609 +educationjun 1 6.957497 0 0 12610 +swedenoth 1 6.957497 0 0 12611 +frenzi 1 6.957497 0 0 12612 +educationsigsoft 1 6.957497 0 0 12613 +engineeringacm 1 6.957497 0 0 12614 +machineryieeeth 1 6.957497 0 0 12615 +engineerscpsrcomput 1 6.957497 0 0 12616 +responsibilityconnect 1 6.957497 0 0 12617 +elsewhereto 1 6.957497 0 0 12618 +seldom 1 6.957497 0 0 12619 +forewarn 1 6.957497 0 0 12620 +pagehung 1 6.957497 0 0 12621 +aruna 1 6.957497 0 0 12622 +addalacurr 1 6.957497 0 0 12623 +studentth 1 6.957497 0 0 12624 +sciencess 1 6.957497 0 0 12625 +engineeringmysorework 1 6.957497 0 0 12626 +mysoreindiai 1 6.957497 0 0 12627 +mysor 1 6.957497 0 0 12628 +cityindiato 1 6.957497 0 0 12629 +eduvoic 1 6.957497 0 0 12630 +ashi 1 6.957497 0 0 12631 +tarafdarashi 1 6.957497 0 0 12632 +tarafdarabout 1 6.957497 0 0 12633 +pageroberto 1 6.957497 0 0 12634 +infosleuth 1 6.957497 0 0 12635 +satisfactionmi 1 6.957497 0 0 12636 +generatingand 1 6.957497 0 0 12637 +bert 1 6.957497 0 0 12638 +imprecis 1 6.957497 0 0 12639 +kayresearch 1 6.957497 0 0 12640 +vitami 1 6.957497 0 0 12641 +stuffsonia 1 6.957497 0 0 12642 +andnina 1 6.957497 0 0 12643 +springbank 1 6.957497 0 0 12644 +scotchdrinksof 1 6.957497 0 0 12645 +bhanu 1 6.957497 0 0 12646 +homepagethi 1 6.957497 0 0 12647 +akhil 1 6.957497 0 0 12648 +reddythank 1 6.957497 0 0 12649 +austinm 1 6.957497 0 0 12650 +datacommun 1 6.957497 0 0 12651 +anitish 1 6.957497 0 0 12652 +barua 1 6.957497 0 0 12653 +schwetmani 1 6.957497 0 0 12654 +bogon 1 6.957497 0 0 12655 +avers 1 6.957497 0 0 12656 +outcom 1 6.957497 0 0 12657 +bogo 1 6.957497 0 0 12658 +bogomolnymichael 1 6.957497 0 0 12659 +bogomolni 1 6.957497 0 0 12660 +interestsnot 1 6.957497 0 0 12661 +jenef 1 6.957497 0 0 12662 +husman 1 6.957497 0 0 12663 +bet 1 6.957497 0 0 12664 +diminish 1 6.957497 0 0 12665 +tverski 1 6.957497 0 0 12666 +kahneman 1 6.957497 0 0 12667 +verbatimfrom 1 6.957497 0 0 12668 +outbreak 1 6.957497 0 0 12669 +beenpropos 1 6.957497 0 0 12670 +programsar 1 6.957497 0 0 12671 +besav 1 6.957497 0 0 12672 +digitalif 1 6.957497 0 0 12673 +electrochem 1 6.957497 0 0 12674 +axon 1 6.957497 0 0 12675 +shaki 1 6.957497 0 0 12676 +inaccur 1 6.957497 0 0 12677 +subtract 1 6.957497 0 0 12678 +checkbook 1 6.957497 0 0 12679 +nevertheless 1 6.957497 0 0 12680 +misfir 1 6.957497 0 0 12681 +italic 1 6.957497 0 0 12682 +researchcognit 1 6.957497 0 0 12683 +sciencearitifici 1 6.957497 0 0 12684 +intelligencemathemat 1 6.957497 0 0 12685 +logictopolog 1 6.957497 0 0 12686 +ghrist 1 6.957497 0 0 12687 +wilshir 1 6.957497 0 0 12688 +parkwai 1 6.957497 0 0 12689 +talentsdefinit 1 6.957497 0 0 12690 +bogodynamicsdefinit 1 6.957497 0 0 12691 +sortwhil 1 6.957497 0 0 12692 +bogos 1 6.957497 0 0 12693 +bogomet 1 6.957497 0 0 12694 +flux 1 6.957497 0 0 12695 +bogotifi 1 6.957497 0 0 12696 +autobogotiphobia 1 6.957497 0 0 12697 +blinkenlight 1 6.957497 0 0 12698 +lasher 1 6.957497 0 0 12699 +boyerhom 1 6.957497 0 0 12700 +philosophydepart 1 6.957497 0 0 12701 +austinhow 1 6.957497 0 0 12702 +mepap 1 6.957497 0 0 12703 +locationsclassescurriculum 1 6.957497 0 0 12704 +vitaeperson 1 6.957497 0 0 12705 +dataeducationpublicationshonorsjobsgradu 1 6.957497 0 0 12706 +studentsth 1 6.957497 0 0 12707 +nqthm 1 6.957497 0 0 12708 +mccarthi 1 6.957497 0 0 12709 +moffett 1 6.957497 0 0 12710 +controversyni 1 6.957497 0 0 12711 +robbin 1 6.957497 0 0 12712 +permitsth 1 6.957497 0 0 12713 +administrativeoverhead 1 6.957497 0 0 12714 +howthi 1 6.957497 0 0 12715 +confess 1 6.957497 0 0 12716 +acanon 1 6.957497 0 0 12717 +thumper 1 6.957497 0 0 12718 +universitiesstandard 1 6.957497 0 0 12719 +aweb 1 6.957497 0 0 12720 +anind 1 6.957497 0 0 12721 +endors 1 6.957497 0 0 12722 +habitu 1 6.957497 0 0 12723 +hislectur 1 6.957497 0 0 12724 +militaryacademi 1 6.957497 0 0 12725 +incens 1 6.957497 0 0 12726 +hisformalist 1 6.957497 0 0 12727 +hispromis 1 6.957497 0 0 12728 +turin 1 6.957497 0 0 12729 +sincomplet 1 6.957497 0 0 12730 +rucker 1 6.957497 0 0 12731 +extinct 1 6.957497 0 0 12732 +kroto 1 6.957497 0 0 12733 +britain 1 6.957497 0 0 12734 +sussex 1 6.957497 0 0 12735 +chemistrypr 1 6.957497 0 0 12736 +curl 1 6.957497 0 0 12737 +smallei 1 6.957497 0 0 12738 +inhouston 1 6.957497 0 0 12739 +asocc 1 6.957497 0 0 12740 +upup 1 6.957497 0 0 12741 +brownereg 1 6.957497 0 0 12742 +collegeph 1 6.957497 0 0 12743 +austinhonor 1 6.957497 0 0 12744 +societyarea 1 6.957497 0 0 12745 +sciencewith 1 6.957497 0 0 12746 +tenyear 1 6.957497 0 0 12747 +computation 1 6.957497 0 0 12748 +includesmethod 1 6.957497 0 0 12749 +highlevel 1 6.957497 0 0 12750 +throughdata 1 6.957497 0 0 12751 +compositionalapproach 1 6.957497 0 0 12752 +intelligenceprocess 1 6.957497 0 0 12753 +fluiddynam 1 6.957497 0 0 12754 +domaincompil 1 6.957497 0 0 12755 +basedlanguag 1 6.957497 0 0 12756 +timedecis 1 6.957497 0 0 12757 +andpract 1 6.957497 0 0 12758 +fourthworkshop 1 6.957497 0 0 12759 +santacruz 1 6.957497 0 0 12760 +theeffect 1 6.957497 0 0 12761 +parallelizingcompil 1 6.957497 0 0 12762 +kleyn 1 6.957497 0 0 12763 +thakur 1 6.957497 0 0 12764 +addressdepart 1 6.957497 0 0 12765 +chenyao 1 6.957497 0 0 12766 +yung 1 6.957497 0 0 12767 +fang 1 6.957497 0 0 12768 +shashidhar 1 6.957497 0 0 12769 +groupcan 1 6.957497 0 0 12770 +austinclick 1 6.957497 0 0 12771 +canfieldhom 1 6.957497 0 0 12772 +businessmi 1 6.957497 0 0 12773 +flaviu 1 6.957497 0 0 12774 +ther 1 6.957497 0 0 12775 +pleasuredomest 1 6.957497 0 0 12776 +bliss 1 6.957497 0 0 12777 +carla 1 6.957497 0 0 12778 +newborn 1 6.957497 0 0 12779 +parenthood 1 6.957497 0 0 12780 +struck 1 6.957497 0 0 12781 +peel 1 6.957497 0 0 12782 +bottl 1 6.957497 0 0 12783 +sofaspher 1 6.957497 0 0 12784 +olestra 1 6.957497 0 0 12785 +canfield 1 6.957497 0 0 12786 +peterst 1 6.957497 0 0 12787 +carruthpleas 1 6.957497 0 0 12788 +boundson 1 6.957497 0 0 12789 +ordersemant 1 6.957497 0 0 12790 +bufferinsert 1 6.957497 0 0 12791 +syllabustopicschung 1 6.957497 0 0 12792 +clen 1 6.957497 0 0 12793 +mehello 1 6.957497 0 0 12794 +tongji 1 6.957497 0 0 12795 +chinaa 1 6.957497 0 0 12796 +usahom 1 6.957497 0 0 12797 +chaputcliff 1 6.957497 0 0 12798 +chaputth 1 6.957497 0 0 12799 +robotlab 1 6.957497 0 0 12800 +dullchaput 1 6.957497 0 0 12801 +gothimself 1 6.957497 0 0 12802 +anemail 1 6.957497 0 0 12803 +odesta 1 6.957497 0 0 12804 +thelearn 1 6.957497 0 0 12805 +hewrot 1 6.957497 0 0 12806 +trane 1 6.957497 0 0 12807 +thenimpl 1 6.957497 0 0 12808 +studentscal 1 6.957497 0 0 12809 +gamesproject 1 6.957497 0 0 12810 +labannoi 1 6.957497 0 0 12811 +farka 1 6.957497 0 0 12812 +medeski 1 6.957497 0 0 12813 +rerun 1 6.957497 0 0 12814 +korg 1 6.957497 0 0 12815 +turnon 1 6.957497 0 0 12816 +breakfast 1 6.957497 0 0 12817 +raspi 1 6.957497 0 0 12818 +starfleet 1 6.957497 0 0 12819 +turnoff 1 6.957497 0 0 12820 +hangov 1 6.957497 0 0 12821 +fave 1 6.957497 0 0 12822 +eventsdaili 1 6.957497 0 0 12823 +reutersintellicast 1 6.957497 0 0 12824 +weatheraustin 1 6.957497 0 0 12825 +txchicago 1 6.957497 0 0 12826 +ilperiodicalssucksalonmirski 1 6.957497 0 0 12827 +onionmacweekmacuserreferencehypertext 1 6.957497 0 0 12828 +interfaceyahooalta 1 6.957497 0 0 12829 +vistacardiff 1 6.957497 0 0 12830 +databaselyco 1 6.957497 0 0 12831 +mapalt 1 6.957497 0 0 12832 +culturemacintosh 1 6.957497 0 0 12833 +dataappl 1 6.957497 0 0 12834 +computercyberdogquicktimequickdraw 1 6.957497 0 0 12835 +dappl 1 6.957497 0 0 12836 +supportmacintouchmacintosh 1 6.957497 0 0 12837 +resourcecyberdog 1 6.957497 0 0 12838 +poundinfo 1 6.957497 0 0 12839 +rootcool 1 6.957497 0 0 12840 +stufffringewareth 1 6.957497 0 0 12841 +actlabpbsnprnow 1 6.957497 0 0 12842 +catalogpap 1 6.957497 0 0 12843 +rsumsymbol 1 6.957497 0 0 12844 +groundingrobotmap 1 6.957497 0 0 12845 +peopledav 1 6.957497 0 0 12846 +falooncharl 1 6.957497 0 0 12847 +lewisjeff 1 6.957497 0 0 12848 +lindjeff 1 6.957497 0 0 12849 +sherwoodbrian 1 6.957497 0 0 12850 +slatorsandi 1 6.957497 0 0 12851 +chuanjun 1 6.957497 0 0 12852 +diamond 1 6.957497 0 0 12853 +stun 1 6.957497 0 0 12854 +hubei 1 6.957497 0 0 12855 +beautifulunivers 1 6.957497 0 0 12856 +faceless 1 6.957497 0 0 12857 +brilliant 1 6.957497 0 0 12858 +miner 1 6.957497 0 0 12859 +unemploi 1 6.957497 0 0 12860 +dobb 1 6.957497 0 0 12861 +prose 1 6.957497 0 0 12862 +porsch 1 6.957497 0 0 12863 +tser 1 6.957497 0 0 12864 +systemsexperiencei 1 6.957497 0 0 12865 +usinghidden 1 6.957497 0 0 12866 +friendli 1 6.957497 0 0 12867 +toexecut 1 6.957497 0 0 12868 +automatictag 1 6.957497 0 0 12869 +improvedbecaus 1 6.957497 0 0 12870 +interestsmovi 1 6.957497 0 0 12871 +semiolog 1 6.957497 0 0 12872 +siteschina 1 6.957497 0 0 12873 +timesminsheng 1 6.957497 0 0 12874 +dailyth 1 6.957497 0 0 12875 +timesusa 1 6.957497 0 0 12876 +economistth 1 6.957497 0 0 12877 +monthlymak 1 6.957497 0 0 12878 +chuang 1 6.957497 0 0 12879 +meyou 1 6.957497 0 0 12880 +cilkcilkcilk 1 6.957497 0 0 12881 +languageand 1 6.957497 0 0 12882 +thecilk 1 6.957497 0 0 12883 +keung 1 6.957497 0 0 12884 +poondepart 1 6.957497 0 0 12885 +askvinc 1 6.957497 0 0 12886 +gogan 1 6.957497 0 0 12887 +problemsom 1 6.957497 0 0 12888 +harmonica 1 6.957497 0 0 12889 +wongchung 1 6.957497 0 0 12890 +wonglast 1 6.957497 0 0 12891 +labwhich 1 6.957497 0 0 12892 +clearinghous 1 6.957497 0 0 12893 +rbac 1 6.957497 0 0 12894 +ckwong 1 6.957497 0 0 12895 +hyde 1 6.957497 0 0 12896 +baptist 1 6.957497 0 0 12897 +netbsd 1 6.957497 0 0 12898 +freebsd 1 6.957497 0 0 12899 +openbsd 1 6.957497 0 0 12900 +tockwong 1 6.957497 0 0 12901 +clanci 1 6.957497 0 0 12902 +clancyresearch 1 6.957497 0 0 12903 +containinga 1 6.957497 0 0 12904 +frequentlyi 1 6.957497 0 0 12905 +incomprehens 1 6.957497 0 0 12906 +simulationto 1 6.957497 0 0 12907 +distinctionsof 1 6.957497 0 0 12908 +whichaddress 1 6.957497 0 0 12909 +abstractiontechniqu 1 6.957497 0 0 12910 +jimbo 1 6.957497 0 0 12911 +chuwelcom 1 6.957497 0 0 12912 +myselfmi 1 6.957497 0 0 12913 +chuemail 1 6.957497 0 0 12914 +cnchu 1 6.957497 0 0 12915 +systemmast 1 6.957497 0 0 12916 +lawless 1 6.957497 0 0 12917 +codeless 1 6.957497 0 0 12918 +myriad 1 6.957497 0 0 12919 +tennysoncod 1 6.957497 0 0 12920 +sequentialprogram 1 6.957497 0 0 12921 +wheredata 1 6.957497 0 0 12922 +arc 1 6.957497 0 0 12923 +thesequenti 1 6.957497 0 0 12924 +sequent 1 6.957497 0 0 12925 +smp 1 6.957497 0 0 12926 +macdraw 1 6.957497 0 0 12927 +subgraph 1 6.957497 0 0 12928 +hpcwire 1 6.957497 0 0 12929 +backend 1 6.957497 0 0 12930 +xcodelib 1 6.957497 0 0 12931 +lieu 1 6.957497 0 0 12932 +groupgroup 1 6.957497 0 0 12933 +leaderprofessor 1 6.957497 0 0 12934 +bergerstud 1 6.957497 0 0 12935 +correlstev 1 6.957497 0 0 12936 +correlresearchph 1 6.957497 0 0 12937 +reportcontact 1 6.957497 0 0 12938 +esterel 1 6.957497 0 0 12939 +pucholcarlo 1 6.957497 0 0 12940 +pucholresearch 1 6.957497 0 0 12941 +mawl 1 6.957497 0 0 12942 +forbrows 1 6.957497 0 0 12943 +verifyingsafeti 1 6.957497 0 0 12944 +thequantavisionfram 1 6.957497 0 0 12945 +thejoystickdevic 1 6.957497 0 0 12946 +informationoffic 1 6.957497 0 0 12947 +dreal 1 6.957497 0 0 12948 +phun 1 6.957497 0 0 12949 +interestsmemb 1 6.957497 0 0 12950 +interesti 1 6.957497 0 0 12951 +fromgandia 1 6.957497 0 0 12952 +valencia 1 6.957497 0 0 12953 +modechart 1 6.957497 0 0 12954 +groundworkfor 1 6.957497 0 0 12955 +enforc 1 6.957497 0 0 12956 +timetool 1 6.957497 0 0 12957 +languagepublicationsabstract 1 6.957497 0 0 12958 +puchol 1 6.957497 0 0 12959 +yangalumni 1 6.957497 0 0 12960 +chih 1 6.957497 0 0 12961 +farn 1 6.957497 0 0 12962 +supoj 1 6.957497 0 0 12963 +suthandavibul 1 6.957497 0 0 12964 +farnam 1 6.957497 0 0 12965 +jahanian 1 6.957497 0 0 12966 +lexic 1 6.957497 0 0 12967 +thompsoncindi 1 6.957497 0 0 12968 +thompsonmachin 1 6.957497 0 0 12969 +candlelight 1 6.957497 0 0 12970 +vigil 1 6.957497 0 0 12971 +internetto 1 6.957497 0 0 12972 +violenc 1 6.957497 0 0 12973 +semanticrepresent 1 6.957497 0 0 12974 +atrobofest 1 6.957497 0 0 12975 +wolv 1 6.957497 0 0 12976 +counsel 1 6.957497 0 0 12977 +xingshan 1 6.957497 0 0 12978 +downloadth 1 6.957497 0 0 12979 +dahlin 1 6.957497 0 0 12980 +dahlingener 1 6.957497 0 0 12981 +architectureeveryon 1 6.957497 0 0 12982 +researchxf 1 6.957497 0 0 12983 +systemweb 1 6.957497 0 0 12984 +pagesummar 1 6.957497 0 0 12985 +compter 1 6.957497 0 0 12986 +includinghistor 1 6.957497 0 0 12987 +informationif 1 6.957497 0 0 12988 +damani 1 6.957497 0 0 12989 +howdi 1 6.957497 0 0 12990 +pagal 1 6.957497 0 0 12991 +dekho 1 6.957497 0 0 12992 +updateth 1 6.957497 0 0 12993 +oblig 1 6.957497 0 0 12994 +providesometh 1 6.957497 0 0 12995 +guadulp 1 6.957497 0 0 12996 +austinphon 1 6.957497 0 0 12997 +marshalldan 1 6.957497 0 0 12998 +electromechan 1 6.957497 0 0 12999 +thelogist 1 6.957497 0 0 13000 +attractor 1 6.957497 0 0 13001 +burnet 1 6.957497 0 0 13002 +pastur 1 6.957497 0 0 13003 +jupit 1 6.957497 0 0 13004 +pagedoug 1 6.957497 0 0 13005 +oflinksto 1 6.957497 0 0 13006 +aboutsport 1 6.957497 0 0 13007 +booksin 1 6.957497 0 0 13008 +fewjok 1 6.957497 0 0 13009 +testof 1 6.957497 0 0 13010 +aweath 1 6.957497 0 0 13011 +mapandcondit 1 6.957497 0 0 13012 +austinandnew 1 6.957497 0 0 13013 +amgraci 1 6.957497 0 0 13014 +puttingit 1 6.957497 0 0 13015 +webbrows 1 6.957497 0 0 13016 +thisi 1 6.957497 0 0 13017 +stuffmom 1 6.957497 0 0 13018 +calendarlink 1 6.957497 0 0 13019 +fictionbooksjokessportsfoodvideout 1 6.957497 0 0 13020 +libraryresumelast 1 6.957497 0 0 13021 +dasdastuart 1 6.957497 0 0 13022 +swhich 1 6.957497 0 0 13023 +annoi 1 6.957497 0 0 13024 +thisorthi 1 6.957497 0 0 13025 +lawdian 1 6.957497 0 0 13026 +problemnavig 1 6.957497 0 0 13027 +washingtonst 1 6.957497 0 0 13028 +stateunivers 1 6.957497 0 0 13029 +dianelaw 1 6.957497 0 0 13030 +gann 1 6.957497 0 0 13031 +illig 1 6.957497 0 0 13032 +dionisi 1 6.957497 0 0 13033 +papadopoulosdionisi 1 6.957497 0 0 13034 +papadopoulosabout 1 6.957497 0 0 13035 +medionisi 1 6.957497 0 0 13036 +papadopoulo 1 6.957497 0 0 13037 +panhellen 1 6.957497 0 0 13038 +associationpanathinaiko 1 6.957497 0 0 13039 +clubgreek 1 6.957497 0 0 13040 +newshellen 1 6.957497 0 0 13041 +networkeveryth 1 6.957497 0 0 13042 +sicomp 1 6.957497 0 0 13043 +zuckermandavid 1 6.957497 0 0 13044 +zuckermanassist 1 6.957497 0 0 13045 +cryptographyresearch 1 6.957497 0 0 13046 +myprofil 1 6.957497 0 0 13047 +transposit 1 6.957497 0 0 13048 +extractor 1 6.957497 0 0 13049 +jcss 1 6.957497 0 0 13050 +logspac 1 6.957497 0 0 13051 +tight 1 6.957497 0 0 13052 +derandom 1 6.957497 0 0 13053 +constructionand 1 6.957497 0 0 13054 +setfor 1 6.957497 0 0 13055 +rectangl 1 6.957497 0 0 13056 +unapproxim 1 6.957497 0 0 13057 +currentresearch 1 6.957497 0 0 13058 +andmultiresolut 1 6.957497 0 0 13059 +dmcl 1 6.957497 0 0 13060 +microsystemsinc 1 6.957497 0 0 13061 +yourcom 1 6.957497 0 0 13062 +jakarta 1 6.957497 0 0 13063 +batorydon 1 6.957497 0 0 13064 +batorysoftwar 1 6.957497 0 0 13065 +pluggabl 1 6.957497 0 0 13066 +schlumberg 1 6.957497 0 0 13067 +banerjeeabout 1 6.957497 0 0 13068 +methi 1 6.957497 0 0 13069 +programminggroup 1 6.957497 0 0 13070 +graphicalparallel 1 6.957497 0 0 13071 +departmentpost 1 6.957497 0 0 13072 +homepost 1 6.957497 0 0 13073 +enfield 1 6.957497 0 0 13074 +camahort 1 6.957497 0 0 13075 +gurrea 1 6.957497 0 0 13076 +mmmmm 1 6.957497 0 0 13077 +lose 1 6.957497 0 0 13078 +ecamahor 1 6.957497 0 0 13079 +posnak 1 6.957497 0 0 13080 +isod 1 6.957497 0 0 13081 +emersonbruton 1 6.957497 0 0 13082 +publications 1 6.957497 0 0 13083 +sistla 1 6.957497 0 0 13084 +sadler 1 6.957497 0 0 13085 +jutla 1 6.957497 0 0 13086 +determinaci 1 6.957497 0 0 13087 +modal 1 6.957497 0 0 13088 +amsterdam 1 6.957497 0 0 13089 +analyst 1 6.957497 0 0 13090 +ticam 1 6.957497 0 0 13091 +evangelist 1 6.957497 0 0 13092 +pageemeri 1 6.957497 0 0 13093 +randomli 1 6.957497 0 0 13094 +uttr 1 6.957497 0 0 13095 +othermi 1 6.957497 0 0 13096 +youngest 1 6.957497 0 0 13097 +handiwork 1 6.957497 0 0 13098 +systemtexbook 1 6.957497 0 0 13099 +exchangegrac 1 6.957497 0 0 13100 +macaddict 1 6.957497 0 0 13101 +wuabout 1 6.957497 0 0 13102 +myselfhi 1 6.957497 0 0 13103 +inibm 1 6.957497 0 0 13104 +costom 1 6.957497 0 0 13105 +manyalumni 1 6.957497 0 0 13106 +instrumentsinc 1 6.957497 0 0 13107 +computingmanag 1 6.957497 0 0 13108 +informationautomat 1 6.957497 0 0 13109 +baseyahoogalaxi 1 6.957497 0 0 13110 +universityyellow 1 6.957497 0 0 13111 +infoleisur 1 6.957497 0 0 13112 +timenewspagepeopl 1 6.957497 0 0 13113 +dailyartstim 1 6.957497 0 0 13114 +magazinechines 1 6.957497 0 0 13115 +magazinec 1 6.957497 0 0 13116 +antoniosan 1 6.957497 0 0 13117 +franciscomarina 1 6.957497 0 0 13118 +citysan 1 6.957497 0 0 13119 +pointemail 1 6.957497 0 0 13120 +emmawu 1 6.957497 0 0 13121 +remolinaemilio 1 6.957497 0 0 13122 +remolinaabout 1 6.957497 0 0 13123 +eremolin 1 6.957497 0 0 13124 +levent 1 6.957497 0 0 13125 +sayfasi 1 6.957497 0 0 13126 +erkok 1 6.957497 0 0 13127 +inturkei 1 6.957497 0 0 13128 +ceng 1 6.957497 0 0 13129 +metu 1 6.957497 0 0 13130 +erkokto 1 6.957497 0 0 13131 +esra 1 6.957497 0 0 13132 +erdem 1 6.957497 0 0 13133 +bilkent 1 6.957497 0 0 13134 +learninginduct 1 6.957497 0 0 13135 +sciencelearningreason 1 6.957497 0 0 13136 +reasoningknowledg 1 6.957497 0 0 13137 +representationemotionsphilosophi 1 6.957497 0 0 13138 +mindcontact 1 6.957497 0 0 13139 +pagestephen 1 6.957497 0 0 13140 +carlpardon 1 6.957497 0 0 13141 +planmi 1 6.957497 0 0 13142 +interestsa 1 6.957497 0 0 13143 +psuedo 1 6.957497 0 0 13144 +dose 1 6.957497 0 0 13145 +pike 1 6.957497 0 0 13146 +bandget 1 6.957497 0 0 13147 +touchpost 1 6.957497 0 0 13148 +esteban 1 6.957497 0 0 13149 +edureturn 1 6.957497 0 0 13150 +estlintara 1 6.957497 0 0 13151 +estlinmachin 1 6.957497 0 0 13152 +austinresearchcontrol 1 6.957497 0 0 13153 +byguid 1 6.957497 0 0 13154 +barbanson 1 6.957497 0 0 13155 +tank 1 6.957497 0 0 13156 +versionhom 1 6.957497 0 0 13157 +versionthi 1 6.957497 0 0 13158 +spool 1 6.957497 0 0 13159 +francoisabout 1 6.957497 0 0 13160 +mecurr 1 6.957497 0 0 13161 +genuin 1 6.957497 0 0 13162 +pastri 1 6.957497 0 0 13163 +fruit 1 6.957497 0 0 13164 +mouss 1 6.957497 0 0 13165 +groceri 1 6.957497 0 0 13166 +shed 1 6.957497 0 0 13167 +lighton 1 6.957497 0 0 13168 +hyogo 1 6.957497 0 0 13169 +atdominion 1 6.957497 0 0 13170 +hqcheck 1 6.957497 0 0 13171 +dilberti 1 6.957497 0 0 13172 +mentionthat 1 6.957497 0 0 13173 +edufrancoi 1 6.957497 0 0 13174 +eduinform 1 6.957497 0 0 13175 +fussellb 1 6.957497 0 0 13176 +georgemi 1 6.957497 0 0 13177 +youand 1 6.957497 0 0 13178 +goodthat 1 6.957497 0 0 13179 +wickersham 1 6.957497 0 0 13180 +gajit 1 6.957497 0 0 13181 +foundus 1 6.957497 0 0 13182 +geeta 1 6.957497 0 0 13183 +tofigur 1 6.957497 0 0 13184 +theexcess 1 6.957497 0 0 13185 +verbos 1 6.957497 0 0 13186 +thecollect 1 6.957497 0 0 13187 +putonli 1 6.957497 0 0 13188 +barest 1 6.957497 0 0 13189 +adieu 1 6.957497 0 0 13190 +outpour 1 6.957497 0 0 13191 +hag 1 6.957497 0 0 13192 +hopey 1 6.957497 0 0 13193 +untroubl 1 6.957497 0 0 13194 +conscienc 1 6.957497 0 0 13195 +untim 1 6.957497 0 0 13196 +demis 1 6.957497 0 0 13197 +vitriol 1 6.957497 0 0 13198 +reinstat 1 6.957497 0 0 13199 +signin 1 6.957497 0 0 13200 +lesscrit 1 6.957497 0 0 13201 +hideout 1 6.957497 0 0 13202 +wont 1 6.957497 0 0 13203 +disappoint 1 6.957497 0 0 13204 +geographi 1 6.957497 0 0 13205 +gala 1 6.957497 0 0 13206 +wasjust 1 6.957497 0 0 13207 +mehul 1 6.957497 0 0 13208 +shantanu 1 6.957497 0 0 13209 +likechess 1 6.957497 0 0 13210 +carrom 1 6.957497 0 0 13211 +racquet 1 6.957497 0 0 13212 +definetli 1 6.957497 0 0 13213 +horoscop 1 6.957497 0 0 13214 +compatabil 1 6.957497 0 0 13215 +destini 1 6.957497 0 0 13216 +hardwork 1 6.957497 0 0 13217 +dispos 1 6.957497 0 0 13218 +pragmat 1 6.957497 0 0 13219 +goudanetwork 1 6.957497 0 0 13220 +goudaacm 1 6.957497 0 0 13221 +surveysa 1 6.957497 0 0 13222 +specificationsand 1 6.957497 0 0 13223 +implementationsmoham 1 6.957497 0 0 13224 +goudath 1 6.957497 0 0 13225 +usagouda 1 6.957497 0 0 13226 +htmlabstract 1 6.957497 0 0 13227 +bridgeth 1 6.957497 0 0 13228 +implementationsaddit 1 6.957497 0 0 13229 +methodologypubl 1 6.957497 0 0 13230 +digitalor 1 6.957497 0 0 13231 +classroomus 1 6.957497 0 0 13232 +profit 1 6.957497 0 0 13233 +bearthi 1 6.957497 0 0 13234 +forcompon 1 6.957497 0 0 13235 +torepublish 1 6.957497 0 0 13236 +requiresprior 1 6.957497 0 0 13237 +frompubl 1 6.957497 0 0 13238 +orpermiss 1 6.957497 0 0 13239 +goudagouda 1 6.957497 0 0 13240 +gunnel 1 6.957497 0 0 13241 +transpos 1 6.957497 0 0 13242 +drank 1 6.957497 0 0 13243 +depict 1 6.957497 0 0 13244 +computationsif 1 6.957497 0 0 13245 +pageam 1 6.957497 0 0 13246 +towni 1 6.957497 0 0 13247 +tropschuhfrank 1 6.957497 0 0 13248 +tropschuh 1 6.957497 0 0 13249 +gunther 1 6.957497 0 0 13250 +schweiz 1 6.957497 0 0 13251 +clayton 1 6.957497 0 0 13252 +waldhofstrass 1 6.957497 0 0 13253 +rheinfelden 1 6.957497 0 0 13254 +vitaeenglishdeutschlinkscarnegi 1 6.957497 0 0 13255 +universitterlangen 1 6.957497 0 0 13256 +nrnberg 1 6.957497 0 0 13257 +abroad 1 6.957497 0 0 13258 +mathematisch 1 6.957497 0 0 13259 +maschinen 1 6.957497 0 0 13260 +datenverarbeitung 1 6.957497 0 0 13261 +tropschuhgunth 1 6.957497 0 0 13262 +yongxiang 1 6.957497 0 0 13263 +pagemerri 1 6.957497 0 0 13264 +christmashappi 1 6.957497 0 0 13265 +homepagegao 1 6.957497 0 0 13266 +yongxiangsever 1 6.957497 0 0 13267 +pointsto 1 6.957497 0 0 13268 +chinadepart 1 6.957497 0 0 13269 +male 1 6.957497 0 0 13270 +huanan 1 6.957497 0 0 13271 +tenniseduc 1 6.957497 0 0 13272 +schoolth 1 6.957497 0 0 13273 +semestercoursesc 1 6.957497 0 0 13274 +linc 1 6.957497 0 0 13275 +alvis 1 6.957497 0 0 13276 +mirankerfil 1 6.957497 0 0 13277 +databs 1 6.957497 0 0 13278 +formthank 1 6.957497 0 0 13279 +pnueli 1 6.957497 0 0 13280 +aprofessor 1 6.957497 0 0 13281 +prestig 1 6.957497 0 0 13282 +incompletelist 1 6.957497 0 0 13283 +publicationshai 1 6.957497 0 0 13284 +forriv 1 6.957497 0 0 13285 +crosstalk 1 6.957497 0 0 13286 +optimalnon 1 6.957497 0 0 13287 +elmor 1 6.957497 0 0 13288 +acmintern 1 6.957497 0 0 13289 +austintaylor 1 6.957497 0 0 13290 +staustin 1 6.957497 0 0 13291 +haizhou 1 6.957497 0 0 13292 +myselfnow 1 6.957497 0 0 13293 +pre 1 6.957497 0 0 13294 +alumnihom 1 6.957497 0 0 13295 +pagecontact 1 6.957497 0 0 13296 +haosun 1 6.957497 0 0 13297 +edunow 1 6.957497 0 0 13298 +micheal 1 6.957497 0 0 13299 +hewett 1 6.957497 0 0 13300 +hewetthewett 1 6.957497 0 0 13301 +fingerm 1 6.957497 0 0 13302 +stanfordunivers 1 6.957497 0 0 13303 +washburnunivers 1 6.957497 0 0 13304 +collegiateprogram 1 6.957497 0 0 13305 +wahlutc 1 6.957497 0 0 13306 +hanoi 1 6.957497 0 0 13307 +tokudaut 1 6.957497 0 0 13308 +locatem 1 6.957497 0 0 13309 +learnabout 1 6.957497 0 0 13310 +downloadmi 1 6.957497 0 0 13311 +learnmor 1 6.957497 0 0 13312 +timefax 1 6.957497 0 0 13313 +hewettemail 1 6.957497 0 0 13314 +hiep 1 6.957497 0 0 13315 +xwindow 1 6.957497 0 0 13316 +gunu 1 6.957497 0 0 13317 +netrek 1 6.957497 0 0 13318 +factoryx 1 6.957497 0 0 13319 +nguyenhiep 1 6.957497 0 0 13320 +nguyenabout 1 6.957497 0 0 13321 +meabout 1 6.957497 0 0 13322 +texasfor 1 6.957497 0 0 13323 +providinghigh 1 6.957497 0 0 13324 +hypertextresum 1 6.957497 0 0 13325 +con 1 6.957497 0 0 13326 +nsplace 1 6.957497 0 0 13327 +rexi 1 6.957497 0 0 13328 +emptiv 1 6.957497 0 0 13329 +gdraw 1 6.957497 0 0 13330 +specular 1 6.957497 0 0 13331 +sonar 1 6.957497 0 0 13332 +xgcl 1 6.957497 0 0 13333 +xakcl 1 6.957497 0 0 13334 +anonlin 1 6.957497 0 0 13335 +andmaintain 1 6.957497 0 0 13336 +currentlyact 1 6.957497 0 0 13337 +internetsoftwar 1 6.957497 0 0 13338 +mappingroutin 1 6.957497 0 0 13339 +potteri 1 6.957497 0 0 13340 +vrmlto 1 6.957497 0 0 13341 +hudson 1 6.957497 0 0 13342 +pagehudson 1 6.957497 0 0 13343 +turnerphd 1 6.957497 0 0 13344 +ofcommonsens 1 6.957497 0 0 13345 +msc 1 6.957497 0 0 13346 +mli 1 6.957497 0 0 13347 +linkseuropean 1 6.957497 0 0 13348 +spatialand 1 6.957497 0 0 13349 +reasoningto 1 6.957497 0 0 13350 +hyanbin 1 6.957497 0 0 13351 +cutti 1 6.957497 0 0 13352 +webmuseum 1 6.957497 0 0 13353 +homeland 1 6.957497 0 0 13354 +tarlor 1 6.957497 0 0 13355 +isheldon 1 6.957497 0 0 13356 +reciv 1 6.957497 0 0 13357 +unives 1 6.957497 0 0 13358 +schlaeor 1 6.957497 0 0 13359 +mellor 1 6.957497 0 0 13360 +bsptree 1 6.957497 0 0 13361 +butt 1 6.957497 0 0 13362 +adair 1 6.957497 0 0 13363 +crinkum 1 6.957497 0 0 13364 +crankum 1 6.957497 0 0 13365 +compound 1 6.957497 0 0 13366 +eileen 1 6.957497 0 0 13367 +mengerink 1 6.957497 0 0 13368 +fanat 1 6.957497 0 0 13369 +traylen 1 6.957497 0 0 13370 +jadair 1 6.957497 0 0 13371 +chamberssenior 1 6.957497 0 0 13372 +specialistb 1 6.957497 0 0 13373 +paso 1 6.957497 0 0 13374 +aftereffect 1 6.957497 0 0 13375 +tilt 1 6.957497 0 0 13376 +bednarjim 1 6.957497 0 0 13377 +ofcognit 1 6.957497 0 0 13378 +fewdecad 1 6.957497 0 0 13379 +thehuman 1 6.957497 0 0 13380 +beavail 1 6.957497 0 0 13381 +becomingpract 1 6.957497 0 0 13382 +refut 1 6.957497 0 0 13383 +makecognit 1 6.957497 0 0 13384 +purelyphilosoph 1 6.957497 0 0 13385 +psychologist 1 6.957497 0 0 13386 +inhibit 1 6.957497 0 0 13387 +indirect 1 6.957497 0 0 13388 +visualbehavior 1 6.957497 0 0 13389 +departmentmi 1 6.957497 0 0 13390 +ofjunfanghi 1 6.957497 0 0 13391 +sysadm 1 6.957497 0 0 13392 +unixish 1 6.957497 0 0 13393 +kristina 1 6.957497 0 0 13394 +jfang 1 6.957497 0 0 13395 +jprior 1 6.957497 0 0 13396 +priorjohn 1 6.957497 0 0 13397 +priormi 1 6.957497 0 0 13398 +accumul 1 6.957497 0 0 13399 +hurt 1 6.957497 0 0 13400 +nacho 1 6.957497 0 0 13401 +swisher 1 6.957497 0 0 13402 +homepagejeff 1 6.957497 0 0 13403 +homepagecontact 1 6.957497 0 0 13404 +informationpublicationssoftwar 1 6.957497 0 0 13405 +groupphoto 1 6.957497 0 0 13406 +albumfavorit 1 6.957497 0 0 13407 +sitesuniversityof 1 6.957497 0 0 13408 +departmentappliedresearch 1 6.957497 0 0 13409 +electricaland 1 6.957497 0 0 13410 +departmentedsfinanci 1 6.957497 0 0 13411 +fttc 1 6.957497 0 0 13412 +utacademiccalendarsut 1 6.957497 0 0 13413 +sportshook 1 6.957497 0 0 13414 +longhorn 1 6.957497 0 0 13415 +utfootbal 1 6.957497 0 0 13416 +scheduleaustintexa 1 6.957497 0 0 13417 +jthoma 1 6.957497 0 0 13418 +jiani 1 6.957497 0 0 13419 +indepart 1 6.957497 0 0 13420 +ofpek 1 6.957497 0 0 13421 +chinesechines 1 6.957497 0 0 13422 +scenerychines 1 6.957497 0 0 13423 +classicschines 1 6.957497 0 0 13424 +magazineschines 1 6.957497 0 0 13425 +newspapersus 1 6.957497 0 0 13426 +libraryut 1 6.957497 0 0 13427 +campusutaccessabout 1 6.957497 0 0 13428 +citylimit 1 6.957497 0 0 13429 +miscellaneousyahoojava 1 6.957497 0 0 13430 +sunjavascript 1 6.957497 0 0 13431 +netscapeth 1 6.957497 0 0 13432 +associationcomput 1 6.957497 0 0 13433 +webnetwork 1 6.957497 0 0 13434 +libraryth 1 6.957497 0 0 13435 +bibliographiesintern 1 6.957497 0 0 13436 +jyluo 1 6.957497 0 0 13437 +suggestionswould 1 6.957497 0 0 13438 +kedar 1 6.957497 0 0 13439 +namjoshiabout 1 6.957497 0 0 13440 +distributedalgorithm 1 6.957497 0 0 13441 +automatatheori 1 6.957497 0 0 13442 +amul 1 6.957497 0 0 13443 +adkedar 1 6.957497 0 0 13444 +harker 1 6.957497 0 0 13445 +kharker 1 6.957497 0 0 13446 +amateur 1 6.957497 0 0 13447 +rocketri 1 6.957497 0 0 13448 +lecturerassoci 1 6.957497 0 0 13449 +lamar 1 6.957497 0 0 13450 +technicalinnov 1 6.957497 0 0 13451 +andappli 1 6.957497 0 0 13452 +coeffici 1 6.957497 0 0 13453 +publicationsw 1 6.957497 0 0 13454 +chenei 1 6.957497 0 0 13455 +hay 1 6.957497 0 0 13456 +coput 1 6.957497 0 0 13457 +theperson 1 6.957497 0 0 13458 +productsdivis 1 6.957497 0 0 13459 +backgroundba 1 6.957497 0 0 13460 +susquehanna 1 6.957497 0 0 13461 +selinsgrov 1 6.957497 0 0 13462 +stern 1 6.957497 0 0 13463 +businessnew 1 6.957497 0 0 13464 +iwith 1 6.957497 0 0 13465 +algebrawith 1 6.957497 0 0 13466 +pflugervil 1 6.957497 0 0 13467 +kornerupjacob 1 6.957497 0 0 13468 +kuipersbenjamin 1 6.957497 0 0 13469 +kuipersbruton 1 6.957497 0 0 13470 +withparticular 1 6.957497 0 0 13471 +grouphom 1 6.957497 0 0 13472 +andavail 1 6.957497 0 0 13473 +qualitativereason 1 6.957497 0 0 13474 +kata 1 6.957497 0 0 13475 +submissionnew 1 6.957497 0 0 13476 +empt 1 6.957497 0 0 13477 +statesman 1 6.957497 0 0 13478 +currentinterest 1 6.957497 0 0 13479 +nsaunivers 1 6.957497 0 0 13480 +videoservic 1 6.957497 0 0 13481 +landrum 1 6.957497 0 0 13482 +viruspictur 1 6.957497 0 0 13483 +empirepch 1 6.957497 0 0 13484 +retreattexa 1 6.957497 0 0 13485 +rockrsumfamilyinterest 1 6.957497 0 0 13486 +councillandrum 1 6.957497 0 0 13487 +austinr 1 6.957497 0 0 13488 +lavenderadjunct 1 6.957497 0 0 13489 +anddepart 1 6.957497 0 0 13490 +engineeringth 1 6.957497 0 0 13491 +informationsuggest 1 6.957497 0 0 13492 +pageyeap 1 6.957497 0 0 13493 +designalgorithm 1 6.957497 0 0 13494 +communityi 1 6.957497 0 0 13495 +lovesto 1 6.957497 0 0 13496 +leekk 1 6.957497 0 0 13497 +buildreli 1 6.957497 0 0 13498 +projectsmemb 1 6.957497 0 0 13499 +lablessss 1 6.957497 0 0 13500 +seriessponsorslast 1 6.957497 0 0 13501 +sciencesth 1 6.957497 0 0 13502 +lincalvin 1 6.957497 0 0 13503 +linassist 1 6.957497 0 0 13504 +iswhat 1 6.957497 0 0 13505 +_study_ 1 6.957497 0 0 13506 +_play_ 1 6.957497 0 0 13507 +carrilresearch 1 6.957497 0 0 13508 +interestscompil 1 6.957497 0 0 13509 +biologyalgorithm 1 6.957497 0 0 13510 +dikaiako 1 6.957497 0 0 13511 +manoussaki 1 6.957497 0 0 13512 +woodward 1 6.957497 0 0 13513 +internationalparallel 1 6.957497 0 0 13514 +sublanguag 1 6.957497 0 0 13515 +compilersfor 1 6.957497 0 0 13516 +gelernt 1 6.957497 0 0 13517 +nicolau 1 6.957497 0 0 13518 +withl 1 6.957497 0 0 13519 +liugt 1 6.957497 0 0 13520 +homepagehi 1 6.957497 0 0 13521 +timeschedul 1 6.957497 0 0 13522 +laurea 1 6.957497 0 0 13523 +agrav 1 6.957497 0 0 13524 +taylorhal 1 6.957497 0 0 13525 +campusshow 1 6.957497 0 0 13526 +luxu 1 6.957497 0 0 13527 +networksoth 1 6.957497 0 0 13528 +studyut 1 6.957497 0 0 13529 +universityaustin 1 6.957497 0 0 13530 +siglink 1 6.957497 0 0 13531 +sigmm 1 6.957497 0 0 13532 +newsjob 1 6.957497 0 0 13533 +forcast 1 6.957497 0 0 13534 +xuelu 1 6.957497 0 0 13535 +werthlauri 1 6.957497 0 0 13536 +werthlectur 1 6.957497 0 0 13537 +lwerth 1 6.957497 0 0 13538 +scienceprofession 1 6.957497 0 0 13539 +servicevic 1 6.957497 0 0 13540 +presentco 1 6.957497 0 0 13541 +interestsoftwar 1 6.957497 0 0 13542 +andenviron 1 6.957497 0 0 13543 +publicationsl 1 6.957497 0 0 13544 +tomayko 1 6.957497 0 0 13545 +pagefaculti 1 6.957497 0 0 13546 +profilesc 1 6.957497 0 0 13547 +classeslast 1 6.957497 0 0 13548 +korupoluwelcom 1 6.957497 0 0 13549 +ahom 1 6.957497 0 0 13550 +madrashomepag 1 6.957497 0 0 13551 +ganga 1 6.957497 0 0 13552 +alumniclass 1 6.957497 0 0 13553 +utalgorithm 1 6.957497 0 0 13554 +sportszon 1 6.957497 0 0 13555 +batchu 1 6.957497 0 0 13556 +korupoluemail 1 6.957497 0 0 13557 +malloryrichard 1 6.957497 0 0 13558 +malloryresearchthesi 1 6.957497 0 0 13559 +quasi 1 6.957497 0 0 13560 +qsimsimul 1 6.957497 0 0 13561 +legitim 1 6.957497 0 0 13562 +illegitim 1 6.957497 0 0 13563 +schneidermarco 1 6.957497 0 0 13564 +schneiderph 1 6.957497 0 0 13565 +austinresearchth 1 6.957497 0 0 13566 +itsstat 1 6.957497 0 0 13567 +whenregardless 1 6.957497 0 0 13568 +systemwhich 1 6.957497 0 0 13569 +tolerantr 1 6.957497 0 0 13570 +anish 1 6.957497 0 0 13571 +silent 1 6.957497 0 0 13572 +shlomi 1 6.957497 0 0 13573 +dolev 1 6.957497 0 0 13574 +ctaylor 1 6.957497 0 0 13575 +usamarco 1 6.957497 0 0 13576 +johnstonemark 1 6.957497 0 0 13577 +johnstonecontact 1 6.957497 0 0 13578 +markj 1 6.957497 0 0 13579 +byrichard 1 6.957497 0 0 13580 +brice 1 6.957497 0 0 13581 +analysisclass 1 6.957497 0 0 13582 +somersetdesign 1 6.957497 0 0 13583 +centerresearch 1 6.957497 0 0 13584 +garbagecollector 1 6.957497 0 0 13585 +ofstudi 1 6.957497 0 0 13586 +dissertationpropos 1 6.957497 0 0 13587 +timingof 1 6.957497 0 0 13588 +markng 1 6.957497 0 0 13589 +kaltenbachmarku 1 6.957497 0 0 13590 +kaltenbachintroductionwelcom 1 6.957497 0 0 13591 +iapolog 1 6.957497 0 0 13592 +spsp 1 6.957497 0 0 13593 +stempor 1 6.957497 0 0 13594 +checkerfor 1 6.957497 0 0 13595 +avisit 1 6.957497 0 0 13596 +theut 1 6.957497 0 0 13597 +departmenthom 1 6.957497 0 0 13598 +archivefor 1 6.957497 0 0 13599 +sworld 1 6.957497 0 0 13600 +supporthom 1 6.957497 0 0 13601 +actansit 1 6.957497 0 0 13602 +theatt 1 6.957497 0 0 13603 +memarti 1 6.957497 0 0 13604 +researchal 1 6.957497 0 0 13605 +martym 1 6.957497 0 0 13606 +anywher 1 6.957497 0 0 13607 +virtualc 1 6.957497 0 0 13608 +internetrestaur 1 6.957497 0 0 13609 +tnstechnolog 1 6.957497 0 0 13610 +mccain 1 6.957497 0 0 13611 +mccainabout 1 6.957497 0 0 13612 +mephd 1 6.957497 0 0 13613 +baylor 1 6.957497 0 0 13614 +califfmari 1 6.957497 0 0 13615 +califfmachin 1 6.957497 0 0 13616 +especiallyinduct 1 6.957497 0 0 13617 +prado 1 6.957497 0 0 13618 +lester 1 6.957497 0 0 13619 +callawai 1 6.957497 0 0 13620 +andersen 1 6.957497 0 0 13621 +acker 1 6.957497 0 0 13622 +eilert 1 6.957497 0 0 13623 +groupknowledg 1 6.957497 0 0 13624 +overviewour 1 6.957497 0 0 13625 +atuniv 1 6.957497 0 0 13626 +currentexpert 1 6.957497 0 0 13627 +broadknowledg 1 6.957497 0 0 13628 +toexplain 1 6.957497 0 0 13629 +answeringa 1 6.957497 0 0 13630 +formallyrepres 1 6.957497 0 0 13631 +thebiolog 1 6.957497 0 0 13632 +andthos 1 6.957497 0 0 13633 +beanswer 1 6.957497 0 0 13634 +jeffrickel 1 6.957497 0 0 13635 +taskof 1 6.957497 0 0 13636 +thesimplest 1 6.957497 0 0 13637 +dauntingrequir 1 6.957497 0 0 13638 +manymodel 1 6.957497 0 0 13639 +compilerand 1 6.957497 0 0 13640 +bybuild 1 6.957497 0 0 13641 +computingenviron 1 6.957497 0 0 13642 +deskassist 1 6.957497 0 0 13643 +squestion 1 6.957497 0 0 13644 +projectsour 1 6.957497 0 0 13645 +kned 1 6.957497 0 0 13646 +kastl 1 6.957497 0 0 13647 +fare 1 6.957497 0 0 13648 +lex 1 6.957497 0 0 13649 +tripel 1 6.957497 0 0 13650 +theorist 1 6.957497 0 0 13651 +searcher 1 6.957497 0 0 13652 +alumna 1 6.957497 0 0 13653 +lian 1 6.957497 0 0 13654 +blumenth 1 6.957497 0 0 13655 +eolu 1 6.957497 0 0 13656 +uwyo 1 6.957497 0 0 13657 +clarkp 1 6.957497 0 0 13658 +redwood 1 6.957497 0 0 13659 +ncsu 1 6.957497 0 0 13660 +publicationsclick 1 6.957497 0 0 13661 +projectsclick 1 6.957497 0 0 13662 +rete 1 6.957497 0 0 13663 +belat 1 6.957497 0 0 13664 +fashionwai 1 6.957497 0 0 13665 +itscomparison 1 6.957497 0 0 13666 +encompass 1 6.957497 0 0 13667 +fundamentalcomput 1 6.957497 0 0 13668 +corollari 1 6.957497 0 0 13669 +thatgoal 1 6.957497 0 0 13670 +gadboi 1 6.957497 0 0 13671 +vasili 1 6.957497 0 0 13672 +samoladi 1 6.957497 0 0 13673 +schrag 1 6.957497 0 0 13674 +andrewsdavid 1 6.957497 0 0 13675 +brantchin 1 6.957497 0 0 13676 +kuoshiow 1 6.957497 0 0 13677 +salvator 1 6.957497 0 0 13678 +stolfo 1 6.957497 0 0 13679 +misrareg 1 6.957497 0 0 13680 +hopkin 1 6.957497 0 0 13681 +fellowarea 1 6.957497 0 0 13682 +asynchronoussystem 1 6.957497 0 0 13683 +otherpap 1 6.957497 0 0 13684 +anoverview 1 6.957497 0 0 13685 +apostscript 1 6.957497 0 0 13686 +versionaccess 1 6.957497 0 0 13687 +baff 1 6.957497 0 0 13688 +mahonei 1 6.957497 0 0 13689 +speedup 1 6.957497 0 0 13690 +knowledgerefin 1 6.957497 0 0 13691 +scicomp 1 6.957497 0 0 13692 +firstadvisor 1 6.957497 0 0 13693 +hwee 1 6.957497 0 0 13694 +nhweetou 1 6.957497 0 0 13695 +trantor 1 6.957497 0 0 13696 +ourston 1 6.957497 0 0 13697 +dirk_ourston 1 6.957497 0 0 13698 +cpqm 1 6.957497 0 0 13699 +saic 1 6.957497 0 0 13700 +furtwangen 1 6.957497 0 0 13701 +siddarth 1 6.957497 0 0 13702 +zell 1 6.957497 0 0 13703 +reasoningher 1 6.957497 0 0 13704 +fort 1 6.957497 0 0 13705 +chillin 1 6.957497 0 0 13706 +dolphin 1 6.957497 0 0 13707 +ilpnet 1 6.957497 0 0 13708 +sigart 1 6.957497 0 0 13709 +aritfici 1 6.957497 0 0 13710 +biblio 1 6.957497 0 0 13711 +jair 1 6.957497 0 0 13712 +foil 1 6.957497 0 0 13713 +homepageraymond 1 6.957497 0 0 13714 +mooneyassoci 1 6.957497 0 0 13715 +informationfal 1 6.957497 0 0 13716 +learningspr 1 6.957497 0 0 13717 +iiperson 1 6.957497 0 0 13718 +historyi 1 6.957497 0 0 13719 +fallon 1 6.957497 0 0 13720 +wherestart 1 6.957497 0 0 13721 +fallontownship 1 6.957497 0 0 13722 +urbanato 1 6.957497 0 0 13723 +learninggroup 1 6.957497 0 0 13724 +gerald 1 6.957497 0 0 13725 +dejong 1 6.957497 0 0 13726 +meadowfir 1 6.957497 0 0 13727 +moriarti 1 6.957497 0 0 13728 +moriartydav 1 6.957497 0 0 13729 +researchsequenti 1 6.957497 0 0 13730 +problemsinclud 1 6.957497 0 0 13731 +stateof 1 6.957497 0 0 13732 +selectanoth 1 6.957497 0 0 13733 +payoff 1 6.957497 0 0 13734 +madeor 1 6.957497 0 0 13735 +thesequ 1 6.957497 0 0 13736 +cumulativepayoff 1 6.957497 0 0 13737 +iscurr 1 6.957497 0 0 13738 +costli 1 6.957497 0 0 13739 +havestudi 1 6.957497 0 0 13740 +constraintsatisfact 1 6.957497 0 0 13741 +wade 1 6.957497 0 0 13742 +mwbarn 1 6.957497 0 0 13743 +barnesm 1 6.957497 0 0 13744 +barnesmwbarn 1 6.957497 0 0 13745 +workhelp 1 6.957497 0 0 13746 +pagestyp 1 6.957497 0 0 13747 +literatureliteratur 1 6.957497 0 0 13748 +notesclassesbackground 1 6.957497 0 0 13749 +informationph 1 6.957497 0 0 13750 +tanglebriar 1 6.957497 0 0 13751 +yete 1 6.957497 0 0 13752 +eduauthor 1 6.957497 0 0 13753 +barnesemail 1 6.957497 0 0 13754 +nell 1 6.957497 0 0 13755 +pagesunivers 1 6.957497 0 0 13756 +departmentwelcom 1 6.957497 0 0 13757 +utaustin 1 6.957497 0 0 13758 +fromful 1 6.957497 0 0 13759 +falland 1 6.957497 0 0 13760 +ofdissert 1 6.957497 0 0 13761 +memento 1 6.957497 0 0 13762 +nontechn 1 6.957497 0 0 13763 +anycorrespond 1 6.957497 0 0 13764 +ndale 1 6.957497 0 0 13765 +profilepublicationsresearch 1 6.957497 0 0 13766 +interestsperson 1 6.957497 0 0 13767 +interestsnel 1 6.957497 0 0 13768 +westlak 1 6.957497 0 0 13769 +obnoxi 1 6.957497 0 0 13770 +chartreus 1 6.957497 0 0 13771 +gnan 1 6.957497 0 0 13772 +pagegnana 1 6.957497 0 0 13773 +edufind 1 6.957497 0 0 13774 +bookmarksto 1 6.957497 0 0 13775 +acquist 1 6.957497 0 0 13776 +groupnatur 1 6.957497 0 0 13777 +austinw 1 6.957497 0 0 13778 +acquisitionand 1 6.957497 0 0 13779 +havedrawn 1 6.957497 0 0 13780 +bryant 1 6.957497 0 0 13781 +ataustinlast 1 6.957497 0 0 13782 +ristomiikkulainen 1 6.957497 0 0 13783 +basedvis 1 6.957497 0 0 13784 +mapbelow 1 6.957497 0 0 13785 +thecortex 1 6.957497 0 0 13786 +linkswusagemartym 1 6.957497 0 0 13787 +intelligencelaboratori 1 6.957497 0 0 13788 +genericalgorithmssolv 1 6.957497 0 0 13789 +specifiedinformallyartifici 1 6.957497 0 0 13790 +intelligencecurriculum 1 6.957497 0 0 13791 +publicationsemploymentgrantsprofession 1 6.957497 0 0 13792 +honorscurriculum 1 6.957497 0 0 13793 +vitaefre 1 6.957497 0 0 13794 +tmycin 1 6.957497 0 0 13795 +emycin 1 6.957497 0 0 13796 +lispconvers 1 6.957497 0 0 13797 +measurementsoftwar 1 6.957497 0 0 13798 +schemec 1 6.957497 0 0 13799 +programmingweb 1 6.957497 0 0 13800 +linksweatheraddress 1 6.957497 0 0 13801 +ctai 1 6.957497 0 0 13802 +austinaustintexa 1 6.957497 0 0 13803 +faxnovak 1 6.957497 0 0 13804 +meghan 1 6.957497 0 0 13805 +insult 1 6.957497 0 0 13806 +brienhi 1 6.957497 0 0 13807 +wipe 1 6.957497 0 0 13808 +crappi 1 6.957497 0 0 13809 +obrien 1 6.957497 0 0 13810 +oguer 1 6.957497 0 0 13811 +gutierrezogu 1 6.957497 0 0 13812 +gutierrezth 1 6.957497 0 0 13813 +austinprojectsomioswwhlinksconfer 1 6.957497 0 0 13814 +worldemail 1 6.957497 0 0 13815 +neeli 1 6.957497 0 0 13816 +groupoop 1 6.957497 0 0 13817 +groupthi 1 6.957497 0 0 13818 +studentsin 1 6.957497 0 0 13819 +kaplan 1 6.957497 0 0 13820 +wieren 1 6.957497 0 0 13821 +toimplement 1 6.957497 0 0 13822 +whichattempt 1 6.957497 0 0 13823 +unsoundstudi 1 6.957497 0 0 13824 +generationaland 1 6.957497 0 0 13825 +ongarbag 1 6.957497 0 0 13826 +managementfor 1 6.957497 0 0 13827 +andcompress 1 6.957497 0 0 13828 +noteson 1 6.957497 0 0 13829 +rawascii 1 6.957497 0 0 13830 +andrschemear 1 6.957497 0 0 13831 +thesiscontain 1 6.957497 0 0 13832 +whicharen 1 6.957497 0 0 13833 +sometimesoon 1 6.957497 0 0 13834 +htmlformat 1 6.957497 0 0 13835 +materialfrom 1 6.957497 0 0 13836 +expandedpresent 1 6.957497 0 0 13837 +texinfo 1 6.957497 0 0 13838 +metaobject 1 6.957497 0 0 13839 +backgroundread 1 6.957497 0 0 13840 +fortexa 1 6.957497 0 0 13841 +sftp 1 6.957497 0 0 13842 +notb 1 6.957497 0 0 13843 +boehm 1 6.957497 0 0 13844 +severalgarbag 1 6.957497 0 0 13845 +otuomagieaddress 1 6.957497 0 0 13846 +emailotu 1 6.957497 0 0 13847 +eduuniververs 1 6.957497 0 0 13848 +infouniversityth 1 6.957497 0 0 13849 +txa 1 6.957497 0 0 13850 +padgett 1 6.957497 0 0 13851 +padgettdon 1 6.957497 0 0 13852 +softar 1 6.957497 0 0 13853 +powerpointvers 1 6.957497 0 0 13854 +usafax 1 6.957497 0 0 13855 +battlebal 1 6.957497 0 0 13856 +hardinphilip 1 6.957497 0 0 13857 +hardinabout 1 6.957497 0 0 13858 +fallback 1 6.957497 0 0 13859 +multiplay 1 6.957497 0 0 13860 +runsund 1 6.957497 0 0 13861 +graphicssoftwar 1 6.957497 0 0 13862 +programmingto 1 6.957497 0 0 13863 +pahardin 1 6.957497 0 0 13864 +usanetrek 1 6.957497 0 0 13865 +pita 1 6.957497 0 0 13866 +digitaldisast 1 6.957497 0 0 13867 +plaster 1 6.957497 0 0 13868 +congradul 1 6.957497 0 0 13869 +smartest 1 6.957497 0 0 13870 +mcquestenpaul 1 6.957497 0 0 13871 +mcquestenphd 1 6.957497 0 0 13872 +bepract 1 6.957497 0 0 13873 +paulmcq 1 6.957497 0 0 13874 +forcsp 1 6.957497 0 0 13875 +programmingmor 1 6.957497 0 0 13876 +inmoriarti 1 6.957497 0 0 13877 +atcnr 1 6.957497 0 0 13878 +rome 1 6.957497 0 0 13879 +tout 1 6.957497 0 0 13880 +winer 1 6.957497 0 0 13881 +cynb 1 6.957497 0 0 13882 +humong 1 6.957497 0 0 13883 +knick 1 6.957497 0 0 13884 +knack 1 6.957497 0 0 13885 +nut 1 6.957497 0 0 13886 +pawang 1 6.957497 0 0 13887 +pawan 1 6.957497 0 0 13888 +pecina 1 6.957497 0 0 13889 +orpecina 1 6.957497 0 0 13890 +pecinaabout 1 6.957497 0 0 13891 +innuclear 1 6.957497 0 0 13892 +workedinvestig 1 6.957497 0 0 13893 +gaug 1 6.957497 0 0 13894 +graviti 1 6.957497 0 0 13895 +gravit 1 6.957497 0 0 13896 +unitari 1 6.957497 0 0 13897 +yuval 1 6.957497 0 0 13898 +eman 1 6.957497 0 0 13899 +jurgen 1 6.957497 0 0 13900 +fromcologn 1 6.957497 0 0 13901 +bureau 1 6.957497 0 0 13902 +geologi 1 6.957497 0 0 13903 +seismic 1 6.957497 0 0 13904 +tomographi 1 6.957497 0 0 13905 +hardag 1 6.957497 0 0 13906 +geophys 1 6.957497 0 0 13907 +geophysicist 1 6.957497 0 0 13908 +comerci 1 6.957497 0 0 13909 +solutionsin 1 6.957497 0 0 13910 +chromodynamicsmi 1 6.957497 0 0 13911 +defo 1 6.957497 0 0 13912 +phy 1 6.957497 0 0 13913 +woof 1 6.957497 0 0 13914 +nettl 1 6.957497 0 0 13915 +cornerinfolik 1 6.957497 0 0 13916 +blahblah 1 6.957497 0 0 13917 +eeek 1 6.957497 0 0 13918 +ibm 1 6.957497 0 0 13919 +afteri 1 6.957497 0 0 13920 +theinnoc 1 6.957497 0 0 13921 +buena 1 6.957497 0 0 13922 +movieplex 1 6.957497 0 0 13923 +employan 1 6.957497 0 0 13924 +improb 1 6.957497 0 0 13925 +plaxtongreg 1 6.957497 0 0 13926 +plaxtoncontact 1 6.957497 0 0 13927 +sciencetaylor 1 6.957497 0 0 13928 +profilepubl 1 6.957497 0 0 13929 +plaxtonplaxton 1 6.957497 0 0 13930 +porterassoci 1 6.957497 0 0 13931 +interestartifici 1 6.957497 0 0 13932 +researchhead 1 6.957497 0 0 13933 +basesand 1 6.957497 0 0 13934 +aait 1 6.957497 0 0 13935 +holt 1 6.957497 0 0 13936 +abstractand 1 6.957497 0 0 13937 +reportport 1 6.957497 0 0 13938 +kaltenbach 1 6.957497 0 0 13939 +thepowerlist 1 6.957497 0 0 13940 +austinpsp 1 6.957497 0 0 13941 +austinthi 1 6.957497 0 0 13942 +deriveparallel 1 6.957497 0 0 13943 +issupervis 1 6.957497 0 0 13944 +groupinclud 1 6.957497 0 0 13945 +edgar 1 6.957497 0 0 13946 +knapp 1 6.957497 0 0 13947 +ingolf 1 6.957497 0 0 13948 +krger 1 6.957497 0 0 13949 +josyula 1 6.957497 0 0 13950 +staskauska 1 6.957497 0 0 13951 +publicationsbelow 1 6.957497 0 0 13952 +wherev 1 6.957497 0 0 13953 +topap 1 6.957497 0 0 13954 +thenot 1 6.957497 0 0 13955 +inchandi 1 6.957497 0 0 13956 +amanuscript 1 6.957497 0 0 13957 +newun 1 6.957497 0 0 13958 +operatorco 1 6.957497 0 0 13959 +forrefer 1 6.957497 0 0 13960 +asymbol 1 6.957497 0 0 13961 +forfinit 1 6.957497 0 0 13962 +unityverifi 1 6.957497 0 0 13963 +toinclud 1 6.957497 0 0 13964 +twodiffer 1 6.957497 0 0 13965 +succinct 1 6.957497 0 0 13966 +givesnumer 1 6.957497 0 0 13967 +batcher 1 6.957497 0 0 13968 +asadd 1 6.957497 0 0 13969 +multipli 1 6.957497 0 0 13970 +addercircuit 1 6.957497 0 0 13971 +programscan 1 6.957497 0 0 13972 +speciallyhypercub 1 6.957497 0 0 13973 +caninterfer 1 6.957497 0 0 13974 +adisciplin 1 6.957497 0 0 13975 +genrat 1 6.957497 0 0 13976 +callsfor 1 6.957497 0 0 13977 +anexperi 1 6.957497 0 0 13978 +ingolfkrg 1 6.957497 0 0 13979 +thepsp 1 6.957497 0 0 13980 +sitejacob 1 6.957497 0 0 13981 +qime 1 6.957497 0 0 13982 +edureceiv 1 6.957497 0 0 13983 +univeris 1 6.957497 0 0 13984 +manoa 1 6.957497 0 0 13985 +hawaiiwork 1 6.957497 0 0 13986 +austincours 1 6.957497 0 0 13987 +teamweb 1 6.957497 0 0 13988 +utcssadaili 1 6.957497 0 0 13989 +texanstock 1 6.957497 0 0 13990 +picturesimageschines 1 6.957497 0 0 13991 +popsend 1 6.957497 0 0 13992 +jobtrakut 1 6.957497 0 0 13993 +gopherftp 1 6.957497 0 0 13994 +newstelnet 1 6.957497 0 0 13995 +cschen 1 6.957497 0 0 13996 +staffyour 1 6.957497 0 0 13997 +commentsguest 1 6.957497 0 0 13998 +ourresearch 1 6.957497 0 0 13999 +utexasqualit 1 6.957497 0 0 14000 +utexasth 1 6.957497 0 0 14001 +intelligentrobot 1 6.957497 0 0 14002 +knowledgerepresent 1 6.957497 0 0 14003 +algernon 1 6.957497 0 0 14004 +benjaminkuip 1 6.957497 0 0 14005 +areadescript 1 6.957497 0 0 14006 +qiang 1 6.957497 0 0 14007 +seriousjunk 1 6.957497 0 0 14008 +realjunk 1 6.957497 0 0 14009 +struggleforliv 1 6.957497 0 0 14010 +qzuo 1 6.957497 0 0 14011 +blumoferobert 1 6.957497 0 0 14012 +blumofei 1 6.957497 0 0 14013 +cilkmultithread 1 6.957497 0 0 14014 +hallpost 1 6.957497 0 0 14015 +wangwelcom 1 6.957497 0 0 14016 +rhwang 1 6.957497 0 0 14017 +miikkulainenristo 1 6.957497 0 0 14018 +miikkulainenassoci 1 6.957497 0 0 14019 +processeswith 1 6.957497 0 0 14020 +languageacquisit 1 6.957497 0 0 14021 +networkswith 1 6.957497 0 0 14022 +discoversequenti 1 6.957497 0 0 14023 +classessumm 1 6.957497 0 0 14024 +bigfoot 1 6.957497 0 0 14025 +pagerajmohan 1 6.957497 0 0 14026 +gregplaxton 1 6.957497 0 0 14027 +incombinator 1 6.957497 0 0 14028 +sciencemiscellan 1 6.957497 0 0 14029 +ephon 1 6.957497 0 0 14030 +utaccess 1 6.957497 0 0 14031 +homepagea 1 6.957497 0 0 14032 +chinachina 1 6.957497 0 0 14033 +jinan 1 6.957497 0 0 14034 +myhometown 1 6.957497 0 0 14035 +shandong 1 6.957497 0 0 14036 +gotmi 1 6.957497 0 0 14037 +ofchina 1 6.957497 0 0 14038 +texasaustin 1 6.957497 0 0 14039 +rent 1 6.957497 0 0 14040 +utcsth 1 6.957497 0 0 14041 +utnetcat 1 6.957497 0 0 14042 +browsabl 1 6.957497 0 0 14043 +austininform 1 6.957497 0 0 14044 +utcat 1 6.957497 0 0 14045 +systemsdynam 1 6.957497 0 0 14046 +graphicsc 1 6.957497 0 0 14047 +logicc 1 6.957497 0 0 14048 +moviesaustin 1 6.957497 0 0 14049 +filmsmicrosoft 1 6.957497 0 0 14050 +cinemania 1 6.957497 0 0 14051 +onlineal 1 6.957497 0 0 14052 +guidehollywood 1 6.957497 0 0 14053 +onlineinternet 1 6.957497 0 0 14054 +databaserog 1 6.957497 0 0 14055 +ebert 1 6.957497 0 0 14056 +moviesvisit 1 6.957497 0 0 14057 +contactmail 1 6.957497 0 0 14058 +aaustin 1 6.957497 0 0 14059 +emailrtan 1 6.957497 0 0 14060 +fingerclick 1 6.957497 0 0 14061 +strangl 1 6.957497 0 0 14062 +curious 1 6.957497 0 0 14063 +inquiri 1 6.957497 0 0 14064 +delic 1 6.957497 0 0 14065 +depriv 1 6.957497 0 0 14066 +distast 1 6.957497 0 0 14067 +deni 1 6.957497 0 0 14068 +duress 1 6.957497 0 0 14069 +fate 1 6.957497 0 0 14070 +messi 1 6.957497 0 0 14071 +ruwei 1 6.957497 0 0 14072 +rvdg 1 6.957497 0 0 14073 +geijnassoci 1 6.957497 0 0 14074 +oftradit 1 6.957497 0 0 14075 +sequentialmachin 1 6.957497 0 0 14076 +inoth 1 6.957497 0 0 14077 +researchconcentr 1 6.957497 0 0 14078 +forimpl 1 6.957497 0 0 14079 +allowssuch 1 6.957497 0 0 14080 +parallelprocessor 1 6.957497 0 0 14081 +intercom 1 6.957497 0 0 14082 +sl_librari 1 6.957497 0 0 14083 +sundeep 1 6.957497 0 0 14084 +sundeepabraham 1 6.957497 0 0 14085 +calicut 1 6.957497 0 0 14086 +tinkerwith 1 6.957497 0 0 14087 +sammi 1 6.957497 0 0 14088 +startingpoint 1 6.957497 0 0 14089 +religion 1 6.957497 0 0 14090 +orprohibit 1 6.957497 0 0 14091 +ofspeech 1 6.957497 0 0 14092 +peaceabl 1 6.957497 0 0 14093 +toassembl 1 6.957497 0 0 14094 +petit 1 6.957497 0 0 14095 +redress 1 6.957497 0 0 14096 +grievanc 1 6.957497 0 0 14097 +herbarium 1 6.957497 0 0 14098 +anagram 1 6.957497 0 0 14099 +nil 1 6.957497 0 0 14100 +reker 1 6.957497 0 0 14101 +pop 1 6.957497 0 0 14102 +anthropolog 1 6.957497 0 0 14103 +kate 1 6.957497 0 0 14104 +showbiz 1 6.957497 0 0 14105 +pollstar 1 6.957497 0 0 14106 +ryder 1 6.957497 0 0 14107 +laptop 1 6.957497 0 0 14108 +traveloc 1 6.957497 0 0 14109 +eduth 1 6.957497 0 0 14110 +sawadajun 1 6.957497 0 0 14111 +sawadacontact 1 6.957497 0 0 14112 +wooten 1 6.957497 0 0 14113 +kbresourc 1 6.957497 0 0 14114 +pvsother 1 6.957497 0 0 14115 +genvoca 1 6.957497 0 0 14116 +ssgrg 1 6.957497 0 0 14117 +professorangela 1 6.957497 0 0 14118 +dappert 1 6.957497 0 0 14119 +studentguillermo 1 6.957497 0 0 14120 +jimenez 1 6.957497 0 0 14121 +perezph 1 6.957497 0 0 14122 +studentjeff 1 6.957497 0 0 14123 +thomasph 1 6.957497 0 0 14124 +studentl 1 6.957497 0 0 14125 +studentyanni 1 6.957497 0 0 14126 +studentk 1 6.957497 0 0 14127 +shepherdresearch 1 6.957497 0 0 14128 +associateform 1 6.957497 0 0 14129 +datesdinesh 1 6.957497 0 0 14130 +dasph 1 6.957497 0 0 14131 +milli 1 6.957497 0 0 14132 +villarrealph 1 6.957497 0 0 14133 +geracipostdoc 1 6.957497 0 0 14134 +sirkinph 1 6.957497 0 0 14135 +sankar 1 6.957497 0 0 14136 +dasarim 1 6.957497 0 0 14137 +starter 1 6.957497 0 0 14138 +reengin 1 6.957497 0 0 14139 +generatorsautom 1 6.957497 0 0 14140 +pageokai 1 6.957497 0 0 14141 +overdu 1 6.957497 0 0 14142 +mead 1 6.957497 0 0 14143 +psion 1 6.957497 0 0 14144 +palmtop 1 6.957497 0 0 14145 +anastasi 1 6.957497 0 0 14146 +poke 1 6.957497 0 0 14147 +ala 1 6.957497 0 0 14148 +bebox 1 6.957497 0 0 14149 +sfkaplan 1 6.957497 0 0 14150 +shaob 1 6.957497 0 0 14151 +cyberhom 1 6.957497 0 0 14152 +hardvar 1 6.957497 0 0 14153 +verifc 1 6.957497 0 0 14154 +shma 1 6.957497 0 0 14155 +kumarshailesh 1 6.957497 0 0 14156 +kumarth 1 6.957497 0 0 14157 +skumar 1 6.957497 0 0 14158 +resumeresearch 1 6.957497 0 0 14159 +publicationscontact 1 6.957497 0 0 14160 +mesrcm 1 6.957497 0 0 14161 +spiritu 1 6.957497 0 0 14162 +offersom 1 6.957497 0 0 14163 +linkscognit 1 6.957497 0 0 14164 +scienceutc 1 6.957497 0 0 14165 +researchutc 1 6.957497 0 0 14166 +groupresearch 1 6.957497 0 0 14167 +neuroevolut 1 6.957497 0 0 14168 +predistort 1 6.957497 0 0 14169 +goetz 1 6.957497 0 0 14170 +bari 1 6.957497 0 0 14171 +bord 1 6.957497 0 0 14172 +aprl 1 6.957497 0 0 14173 +whiti 1 6.957497 0 0 14174 +offernet 1 6.957497 0 0 14175 +assistancesearch 1 6.957497 0 0 14176 +institutewww 1 6.957497 0 0 14177 +infoindia 1 6.957497 0 0 14178 +alchemi 1 6.957497 0 0 14179 +sitessmaragd 1 6.957497 0 0 14180 +eduyanni 1 6.957497 0 0 14181 +smaragdakisunivers 1 6.957497 0 0 14182 +departmenttai 1 6.957497 0 0 14183 +minion 1 6.957497 0 0 14184 +asphalt 1 6.957497 0 0 14185 +moonlight 1 6.957497 0 0 14186 +nerv 1 6.957497 0 0 14187 +dy 1 6.957497 0 0 14188 +pania 1 6.957497 0 0 14189 +leaf 1 6.957497 0 0 14190 +afloat 1 6.957497 0 0 14191 +waterfal 1 6.957497 0 0 14192 +southerart 1 6.957497 0 0 14193 +southerresearchbuild 1 6.957497 0 0 14194 +reportsouth 1 6.957497 0 0 14195 +ramachandransowmya 1 6.957497 0 0 14196 +ramachandranmachin 1 6.957497 0 0 14197 +ofartif 1 6.957497 0 0 14198 +learningbayesian 1 6.957497 0 0 14199 +withhidden 1 6.957497 0 0 14200 +thisproblem 1 6.957497 0 0 14201 +raocurr 1 6.957497 0 0 14202 +systemoper 1 6.957497 0 0 14203 +multimediagroup 1 6.957497 0 0 14204 +harrickvinpublicationsminegroupcontact 1 6.957497 0 0 14205 +informationofficetai 1 6.957497 0 0 14206 +miscellaneousotherinterest 1 6.957497 0 0 14207 +pagespicturesof 1 6.957497 0 0 14208 +toweraustin 1 6.957497 0 0 14209 +kannada 1 6.957497 0 0 14210 +koota 1 6.957497 0 0 14211 +tamil 1 6.957497 0 0 14212 +sangam 1 6.957497 0 0 14213 +sunghe 1 6.957497 0 0 14214 +choisunghe 1 6.957497 0 0 14215 +nuec 1 6.957497 0 0 14216 +choiemail 1 6.957497 0 0 14217 +kakkadsheet 1 6.957497 0 0 14218 +kakkadcontact 1 6.957497 0 0 14219 +storagesystem 1 6.957497 0 0 14220 +faulttim 1 6.957497 0 0 14221 +whilefinish 1 6.957497 0 0 14222 +svkakkad 1 6.957497 0 0 14223 +shengm 1 6.957497 0 0 14224 +homepageabout 1 6.957497 0 0 14225 +classmatesclass 1 6.957497 0 0 14226 +ustc 1 6.957497 0 0 14227 +sciencesus 1 6.957497 0 0 14228 +linksut 1 6.957497 0 0 14229 +libraryutaccesschines 1 6.957497 0 0 14230 +associationchina 1 6.957497 0 0 14231 +chinesechinainternet 1 6.957497 0 0 14232 +magazinestsinghua 1 6.957497 0 0 14233 +bbsncic 1 6.957497 0 0 14234 +bbschines 1 6.957497 0 0 14235 +classicsabout 1 6.957497 0 0 14236 +austinwhat 1 6.957497 0 0 14237 +citylimitsclassifi 1 6.957497 0 0 14238 +austinto 1 6.957497 0 0 14239 +renu 1 6.957497 0 0 14240 +tewarirenu 1 6.957497 0 0 14241 +tewariwhat 1 6.957497 0 0 14242 +addresshom 1 6.957497 0 0 14243 +tewari 1 6.957497 0 0 14244 +tumlin 1 6.957497 0 0 14245 +brenda 1 6.957497 0 0 14246 +ladd 1 6.957497 0 0 14247 +authenticationprotocol 1 6.957497 0 0 14248 +jiao 1 6.957497 0 0 14249 +lucent 1 6.957497 0 0 14250 +nank 1 6.957497 0 0 14251 +summerluc 1 6.957497 0 0 14252 +thissumm 1 6.957497 0 0 14253 +plexton 1 6.957497 0 0 14254 +libari 1 6.957497 0 0 14255 +glut 1 6.957497 0 0 14256 +mariah 1 6.957497 0 0 14257 +boyz 1 6.957497 0 0 14258 +babyfac 1 6.957497 0 0 14259 +haiq 1 6.957497 0 0 14260 +shenfeng 1 6.957497 0 0 14261 +deskmat 1 6.957497 0 0 14262 +pageulf 1 6.957497 0 0 14263 +hermjakobhello 1 6.957497 0 0 14264 +thedept 1 6.957497 0 0 14265 +austinand 1 6.957497 0 0 14266 +aboutexampl 1 6.957497 0 0 14267 +translationund 1 6.957497 0 0 14268 +einet 1 6.957497 0 0 14269 +dernir 1 6.957497 0 0 14270 +nouvel 1 6.957497 0 0 14271 +alsac 1 6.957497 0 0 14272 +spiegel 1 6.957497 0 0 14273 +svenska 1 6.957497 0 0 14274 +dagbladet 1 6.957497 0 0 14275 +tagesspiegel 1 6.957497 0 0 14276 +vanguardia 1 6.957497 0 0 14277 +welt 1 6.957497 0 0 14278 +zeitplusacm 1 6.957497 0 0 14279 +moltkestr 1 6.957497 0 0 14280 +bnde 1 6.957497 0 0 14281 +germanyphon 1 6.957497 0 0 14282 +unicron 1 6.957497 0 0 14283 +financ 1 6.957497 0 0 14284 +heeia 1 6.957497 0 0 14285 +kaneoh 1 6.957497 0 0 14286 +woodrow 1 6.957497 0 0 14287 +bledsoepet 1 6.957497 0 0 14288 +americanmathemat 1 6.957497 0 0 14289 +onartifici 1 6.957497 0 0 14290 +interestautomat 1 6.957497 0 0 14291 +theoremproof 1 6.957497 0 0 14292 +levelplan 1 6.957497 0 0 14293 +werthsenior 1 6.957497 0 0 14294 +emori 1 6.957497 0 0 14295 +accredit 1 6.957497 0 0 14296 +sobek 1 6.957497 0 0 14297 +daleno 1 6.957497 0 0 14298 +exet 1 6.957497 0 0 14299 +interestdatabas 1 6.957497 0 0 14300 +stagei 1 6.957497 0 0 14301 +studiedinclud 1 6.957497 0 0 14302 +brumfield 1 6.957497 0 0 14303 +brumfieldsenior 1 6.957497 0 0 14304 +interestperform 1 6.957497 0 0 14305 +designersof 1 6.957497 0 0 14306 +eachresourc 1 6.957497 0 0 14307 +tasksawait 1 6.957497 0 0 14308 +computationof 1 6.957497 0 0 14309 +graf 1 6.957497 0 0 14310 +verdi 1 6.957497 0 0 14311 +renka 1 6.957497 0 0 14312 +clinedavid 1 6.957497 0 0 14313 +bruton 1 6.957497 0 0 14314 +statisticalcomput 1 6.957497 0 0 14315 +socialrespons 1 6.957497 0 0 14316 +whichcan 1 6.957497 0 0 14317 +constructionof 1 6.957497 0 0 14318 +formathemat 1 6.957497 0 0 14319 +developmentha 1 6.957497 0 0 14320 +tension 1 6.957497 0 0 14321 +buoi 1 6.957497 0 0 14322 +barrier 1 6.957497 0 0 14323 +moler 1 6.957497 0 0 14324 +wybe 1 6.957497 0 0 14325 +dijkstraschlumberg 1 6.957497 0 0 14326 +sciencesprofessor 1 6.957497 0 0 14327 +mathematicskandidaatsexamen 1 6.957497 0 0 14328 +doctora 1 6.957497 0 0 14329 +examen 1 6.957497 0 0 14330 +leydenph 1 6.957497 0 0 14331 +amsterdamhonor 1 6.957497 0 0 14332 +awardsacm 1 6.957497 0 0 14333 +sciencesdistinguish 1 6.957497 0 0 14334 +societyafip 1 6.957497 0 0 14335 +honori 1 6.957497 0 0 14336 +causa 1 6.957497 0 0 14337 +belfastarea 1 6.957497 0 0 14338 +systemssummari 1 6.957497 0 0 14339 +argumentso 1 6.957497 0 0 14340 +ofform 1 6.957497 0 0 14341 +yurkananlectur 1 6.957497 0 0 14342 +internetwork 1 6.957497 0 0 14343 +cobb 1 6.957497 0 0 14344 +informaticsconfer 1 6.957497 0 0 14345 +gallagherlectur 1 6.957497 0 0 14346 +loyola 1 6.957497 0 0 14347 +necc 1 6.957497 0 0 14348 +andretent 1 6.957497 0 0 14349 +scienceeduc 1 6.957497 0 0 14350 +jenevein 1 6.957497 0 0 14351 +wafer 1 6.957497 0 0 14352 +menez 1 6.957497 0 0 14353 +malek 1 6.957497 0 0 14354 +interestinterconnect 1 6.957497 0 0 14355 +interconnectionnetwork 1 6.957497 0 0 14356 +restsin 1 6.957497 0 0 14357 +interconnectionstructur 1 6.957497 0 0 14358 +kindof 1 6.957497 0 0 14359 +beingappli 1 6.957497 0 0 14360 +communicationswitch 1 6.957497 0 0 14361 +iscontinu 1 6.957497 0 0 14362 +performanceport 1 6.957497 0 0 14363 +tobenchmark 1 6.957497 0 0 14364 +memorysystem 1 6.957497 0 0 14365 +kyklo 1 6.957497 0 0 14366 +laranjeira 1 6.957497 0 0 14367 +ullah 1 6.957497 0 0 14368 +metrix 1 6.957497 0 0 14369 +norman 1 6.957497 0 0 14370 +martinprofessor 1 6.957497 0 0 14371 +ofphilosophi 1 6.957497 0 0 14372 +asinterpret 1 6.957497 0 0 14373 +whichexploit 1 6.957497 0 0 14374 +intension 1 6.957497 0 0 14375 +significantearli 1 6.957497 0 0 14376 +missil 1 6.957497 0 0 14377 +trackingalgorithm 1 6.957497 0 0 14378 +radar 1 6.957497 0 0 14379 +inmani 1 6.957497 0 0 14380 +mokassoci 1 6.957497 0 0 14381 +professorfaculti 1 6.957497 0 0 14382 +federationof 1 6.957497 0 0 14383 +interestfault 1 6.957497 0 0 14384 +includespecif 1 6.957497 0 0 14385 +forguarante 1 6.957497 0 0 14386 +thetrad 1 6.957497 0 0 14387 +criticalsystem 1 6.957497 0 0 14388 +theanalysi 1 6.957497 0 0 14389 +industrialprocess 1 6.957497 0 0 14390 +ofnav 1 6.957497 0 0 14391 +forreal 1 6.957497 0 0 14392 +tilborg 1 6.957497 0 0 14393 +heitmey 1 6.957497 0 0 14394 +labaw 1 6.957497 0 0 14395 +aptl 1 6.957497 0 0 14396 +lecturerb 1 6.957497 0 0 14397 +aero 1 6.957497 0 0 14398 +universityprofession 1 6.957497 0 0 14399 +servicecoordin 1 6.957497 0 0 14400 +vol 1 6.957497 0 0 14401 +educationsummari 1 6.957497 0 0 14402 +potentialfor 1 6.957497 0 0 14403 +infal 1 6.957497 0 0 14404 +sectionof 1 6.957497 0 0 14405 +onfunct 1 6.957497 0 0 14406 +rastogi 1 6.957497 0 0 14407 +pod 1 6.957497 0 0 14408 +silberschatzprofessorship 1 6.957497 0 0 14409 +sciencesm 1 6.957497 0 0 14410 +brookhonor 1 6.957497 0 0 14411 +serviceiee 1 6.957497 0 0 14412 +futureof 1 6.957497 0 0 14413 +basedsystemssummari 1 6.957497 0 0 14414 +recentresearch 1 6.957497 0 0 14415 +multidatabas 1 6.957497 0 0 14416 +transactionmanag 1 6.957497 0 0 14417 +ganguli 1 6.957497 0 0 14418 +tsur 1 6.957497 0 0 14419 +datalog 1 6.957497 0 0 14420 +programexecut 1 6.957497 0 0 14421 +jagadish 1 6.957497 0 0 14422 +lieuwen 1 6.957497 0 0 14423 +dali 1 6.957497 0 0 14424 +biliri 1 6.957497 0 0 14425 +storageserv 1 6.957497 0 0 14426 +storageand 1 6.957497 0 0 14427 +relationaldata 1 6.957497 0 0 14428 +simmonsquinci 1 6.957497 0 0 14429 +professoremeritu 1 6.957497 0 0 14430 +psychologymai 1 6.957497 0 0 14431 +balayoghanv 1 6.957497 0 0 14432 +balayoghancontact 1 6.957497 0 0 14433 +ineosdi 1 6.957497 0 0 14434 +bookmarksvbb 1 6.957497 0 0 14435 +eurograph 1 6.957497 0 0 14436 +vinharrick 1 6.957497 0 0 14437 +electronicimag 1 6.957497 0 0 14438 +kaohsiung 1 6.957497 0 0 14439 +rostock 1 6.957497 0 0 14440 +interestmultimedia 1 6.957497 0 0 14441 +anend 1 6.957497 0 0 14442 +thintern 1 6.957497 0 0 14443 +designingmultimedia 1 6.957497 0 0 14444 +foundationresearch 1 6.957497 0 0 14445 +electricresearch 1 6.957497 0 0 14446 +electrospacesystem 1 6.957497 0 0 14447 +undergraduatefrom 1 6.957497 0 0 14448 +interestsreportsy 1 6.957497 0 0 14449 +ramachandranvijaya 1 6.957497 0 0 14450 +ramachandranblakemor 1 6.957497 0 0 14451 +sanjoi 1 6.957497 0 0 14452 +kelsen 1 6.957497 0 0 14453 +ramgop 1 6.957497 0 0 14454 +suel 1 6.957497 0 0 14455 +yuke 1 6.957497 0 0 14456 +grouput 1 6.957497 0 0 14457 +emba 1 6.957497 0 0 14458 +tsan 1 6.957497 0 0 14459 +tshsu 1 6.957497 0 0 14460 +pierr 1 6.957497 0 0 14461 +korupolu 1 6.957497 0 0 14462 +mackenzi 1 6.957497 0 0 14463 +philmac 1 6.957497 0 0 14464 +idbsu 1 6.957497 0 0 14465 +mettu 1 6.957497 0 0 14466 +santanu 1 6.957497 0 0 14467 +ssinha 1 6.957497 0 0 14468 +torsten 1 6.957497 0 0 14469 +lowvolum 1 6.957497 0 0 14470 +themidsouth 1 6.957497 0 0 14471 +midsouthwest 1 6.957497 0 0 14472 +keynot 1 6.957497 0 0 14473 +atut 1 6.957497 0 0 14474 +organizedanoth 1 6.957497 0 0 14475 +methodist 1 6.957497 0 0 14476 +oklahoma 1 6.957497 0 0 14477 +beheld 1 6.957497 0 0 14478 +algorithmsmail 1 6.957497 0 0 14479 +usuallytri 1 6.957497 0 0 14480 +ofaustin 1 6.957497 0 0 14481 +thatinclud 1 6.957497 0 0 14482 +sponsorsth 1 6.957497 0 0 14483 +interestar 1 6.957497 0 0 14484 +thesigact 1 6.957497 0 0 14485 +eccc 1 6.957497 0 0 14486 +rolodex 1 6.957497 0 0 14487 +lifschitzwhen 1 6.957497 0 0 14488 +burden 1 6.957497 0 0 14489 +downcast 1 6.957497 0 0 14490 +gladli 1 6.957497 0 0 14491 +therealm 1 6.957497 0 0 14492 +lucid 1 6.957497 0 0 14493 +grasp 1 6.957497 0 0 14494 +isobtain 1 6.957497 0 0 14495 +pleasantli 1 6.957497 0 0 14496 +conceptform 1 6.957497 0 0 14497 +bernai 1 6.957497 0 0 14498 +lifschitzgottesman 1 6.957497 0 0 14499 +texasat 1 6.957497 0 0 14500 +forartifici 1 6.957497 0 0 14501 +intelligenceb 1 6.957497 0 0 14502 +branchof 1 6.957497 0 0 14503 +steklov 1 6.957497 0 0 14504 +interesttempor 1 6.957497 0 0 14505 +reasoningand 1 6.957497 0 0 14506 +aboutactionslog 1 6.957497 0 0 14507 +reasoningteachingoth 1 6.957497 0 0 14508 +activitiespap 1 6.957497 0 0 14509 +bylifschitz 1 6.957497 0 0 14510 +studentsrecommend 1 6.957497 0 0 14511 +speechgood 1 6.957497 0 0 14512 +madelein 1 6.957497 0 0 14513 +albright 1 6.957497 0 0 14514 +regain 1 6.957497 0 0 14515 +soviet 1 6.957497 0 0 14516 +recycl 1 6.957497 0 0 14517 +actbad 1 6.957497 0 0 14518 +sequest 1 6.957497 0 0 14519 +archeolog 1 6.957497 0 0 14520 +societynot 1 6.957497 0 0 14521 +redrawn 1 6.957497 0 0 14522 +basisoth 1 6.957497 0 0 14523 +amnesti 1 6.957497 0 0 14524 +monthcontact 1 6.957497 0 0 14525 +vurgun 1 6.957497 0 0 14526 +sengul 1 6.957497 0 0 14527 +sengulvurgun 1 6.957497 0 0 14528 +ammainli 1 6.957497 0 0 14529 +evolutionaryalgorithm 1 6.957497 0 0 14530 +ofprefer 1 6.957497 0 0 14531 +skillacquisit 1 6.957497 0 0 14532 +mindto 1 6.957497 0 0 14533 +walbourn 1 6.957497 0 0 14534 +walbournmi 1 6.957497 0 0 14535 +charybdi 1 6.957497 0 0 14536 +grin 1 6.957497 0 0 14537 +professorwalk 1 6.957497 0 0 14538 +teachand 1 6.957497 0 0 14539 +atgrinnel 1 6.957497 0 0 14540 +robertson 1 6.957497 0 0 14541 +warshawlan 1 6.957497 0 0 14542 +laboratoryinvolv 1 6.957497 0 0 14543 +andat 1 6.957497 0 0 14544 +arlut 1 6.957497 0 0 14545 +wchen 1 6.957497 0 0 14546 +ltwilson 1 6.957497 0 0 14547 +headshot 1 6.957497 0 0 14548 +workson 1 6.957497 0 0 14549 +teachingin 1 6.957497 0 0 14550 +sciencesnot 1 6.957497 0 0 14551 +sin 1 6.957497 0 0 14552 +cent 1 6.957497 0 0 14553 +christiani 1 6.957497 0 0 14554 +alittl 1 6.957497 0 0 14555 +totallyunexpect 1 6.957497 0 0 14556 +compulsori 1 6.957497 0 0 14557 +thechristian 1 6.957497 0 0 14558 +tobecom 1 6.957497 0 0 14559 +slife 1 6.957497 0 0 14560 +deepli 1 6.957497 0 0 14561 +mylif 1 6.957497 0 0 14562 +misconcept 1 6.957497 0 0 14563 +christianwa 1 6.957497 0 0 14564 +christianand 1 6.957497 0 0 14565 +lovedeveri 1 6.957497 0 0 14566 +achristian 1 6.957497 0 0 14567 +virtuou 1 6.957497 0 0 14568 +thefellowship 1 6.957497 0 0 14569 +flesh 1 6.957497 0 0 14570 +sinless 1 6.957497 0 0 14571 +sympath 1 6.957497 0 0 14572 +weconfess 1 6.957497 0 0 14573 +cleans 1 6.957497 0 0 14574 +unright 1 6.957497 0 0 14575 +astheir 1 6.957497 0 0 14576 +saviour 1 6.957497 0 0 14577 +gratefulli 1 6.957497 0 0 14578 +redempt 1 6.957497 0 0 14579 +fortheir 1 6.957497 0 0 14580 +justifi 1 6.957497 0 0 14581 +roman 1 6.957497 0 0 14582 +thecontrari 1 6.957497 0 0 14583 +givesu 1 6.957497 0 0 14584 +physician 1 6.957497 0 0 14585 +onour 1 6.957497 0 0 14586 +thetruth 1 6.957497 0 0 14587 +thankgod 1 6.957497 0 0 14588 +wkmak 1 6.957497 0 0 14589 +ultrason 1 6.957497 0 0 14590 +rhino 1 6.957497 0 0 14591 +robokreta 1 6.957497 0 0 14592 +wyle 1 6.957497 0 0 14593 +clarinet 1 6.957497 0 0 14594 +mobilerobot 1 6.957497 0 0 14595 +rover 1 6.957497 0 0 14596 +tall 1 6.957497 0 0 14597 +robocac 1 6.957497 0 0 14598 +robofest 1 6.957497 0 0 14599 +besar 1 6.957497 0 0 14600 +kicik 1 6.957497 0 0 14601 +chassi 1 6.957497 0 0 14602 +andqualit 1 6.957497 0 0 14603 +malaysia 1 6.957497 0 0 14604 +interestsavid 1 6.957497 0 0 14605 +usba 1 6.957497 0 0 14606 +miscellaneousinterest 1 6.957497 0 0 14607 +qaustin 1 6.957497 0 0 14608 +xingang 1 6.957497 0 0 14609 +delight 1 6.957497 0 0 14610 +temporaryresort 1 6.957497 0 0 14611 +llgradual 1 6.957497 0 0 14612 +havesometh 1 6.957497 0 0 14613 +foliag 1 6.957497 0 0 14614 +miata 1 6.957497 0 0 14615 +xguo 1 6.957497 0 0 14616 +pagemov 1 6.957497 0 0 14617 +wwwdavid 1 6.957497 0 0 14618 +assad 1 6.957497 0 0 14619 +brothersin 1 6.957497 0 0 14620 +parkeningi 1 6.957497 0 0 14621 +guitarist 1 6.957497 0 0 14622 +ofconcert 1 6.957497 0 0 14623 +reconcili 1 6.957497 0 0 14624 +rekindl 1 6.957497 0 0 14625 +theamsterdam 1 6.957497 0 0 14626 +trio 1 6.957497 0 0 14627 +flair 1 6.957497 0 0 14628 +platini 1 6.957497 0 0 14629 +magazinepublish 1 6.957497 0 0 14630 +minist 1 6.957497 0 0 14631 +absmiddl 1 6.957497 0 0 14632 +homepageto 1 6.957497 0 0 14633 +homepagey 1 6.957497 0 0 14634 +milanitalian 1 6.957497 0 0 14635 +soccerk 1 6.957497 0 0 14636 +soccernba 1 6.957497 0 0 14637 +sitefox 1 6.957497 0 0 14638 +sportschicago 1 6.957497 0 0 14639 +bullsmichael 1 6.957497 0 0 14640 +jordannflnhlc 1 6.957497 0 0 14641 +rankingmarri 1 6.957497 0 0 14642 +childrenseinfeldcomput 1 6.957497 0 0 14643 +sciencesutilitieshtml 1 6.957497 0 0 14644 +convertersimag 1 6.957497 0 0 14645 +collectionssystemshtmllatexcgitcl 1 6.957497 0 0 14646 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 1 6.957497 0 0 14647 +tmiscinternet 1 6.957497 0 0 14648 +parcel 1 6.957497 0 0 14649 +usp 1 6.957497 0 0 14650 +fedexus 1 6.957497 0 0 14651 +guidefun 1 6.957497 0 0 14652 +todayu 1 6.957497 0 0 14653 +newsstarwavesupermodel 1 6.957497 0 0 14654 +yonglu 1 6.957497 0 0 14655 +youngashbel 1 6.957497 0 0 14656 +webb 1 6.957497 0 0 14657 +issueded 1 6.957497 0 0 14658 +mathematicalsocieti 1 6.957497 0 0 14659 +matrixappl 1 6.957497 0 0 14660 +numericallinear 1 6.957497 0 0 14661 +partialdifferenti 1 6.957497 0 0 14662 +oflinear 1 6.957497 0 0 14663 +andspars 1 6.957497 0 0 14664 +basedon 1 6.957497 0 0 14665 +beingextend 1 6.957497 0 0 14666 +distributedmemori 1 6.957497 0 0 14667 +methodsbas 1 6.957497 0 0 14668 +multilevel 1 6.957497 0 0 14669 +beingdevelop 1 6.957497 0 0 14670 +publicationsd 1 6.957497 0 0 14671 +srivasiava 1 6.957497 0 0 14672 +yanushauska 1 6.957497 0 0 14673 +publ 1 6.957497 0 0 14674 +vona 1 6.957497 0 0 14675 +sepehrnoori 1 6.957497 0 0 14676 +son 1 6.957497 0 0 14677 +spike 1 6.957497 0 0 14678 +yonsei 1 6.957497 0 0 14679 +systembas 1 6.957497 0 0 14680 +laterali 1 6.957497 0 0 14681 +synerget 1 6.957497 0 0 14682 +actualspik 1 6.957497 0 0 14683 +slissom 1 6.957497 0 0 14684 +beself 1 6.957497 0 0 14685 +retinabi 1 6.957497 0 0 14686 +desynchron 1 6.957497 0 0 14687 +mozer 1 6.957497 0 0 14688 +hasselmo 1 6.957497 0 0 14689 +handwritten 1 6.957497 0 0 14690 +techic 1 6.957497 0 0 14691 +unord 1 6.957497 0 0 14692 +interestingcontact 1 6.957497 0 0 14693 +yuanj 1 6.957497 0 0 14694 +xuwint 1 6.957497 0 0 14695 +aziz 1 6.957497 0 0 14696 +pagechines 1 6.957497 0 0 14697 +hefei 1 6.957497 0 0 14698 +chinaunivers 1 6.957497 0 0 14699 +atmunich 1 6.957497 0 0 14700 +germanyunivers 1 6.957497 0 0 14701 +higham 1 6.957497 0 0 14702 +lifan 1 6.957497 0 0 14703 +guizhongustc 1 6.957497 0 0 14704 +hailiang 1 6.957497 0 0 14705 +yuhongfriend 1 6.957497 0 0 14706 +linsoftwar 1 6.957497 0 0 14707 +yufeng 1 6.957497 0 0 14708 +zhii 1 6.957497 0 0 14709 +guangzhou 1 6.957497 0 0 14710 +pagezhii 1 6.957497 0 0 14711 +mefrom 1 6.957497 0 0 14712 +canton 1 6.957497 0 0 14713 +dong 1 6.957497 0 0 14714 +zchen 1 6.957497 0 0 14715 +zhouxiao 1 6.957497 0 0 14716 +maggi 1 6.957497 0 0 14717 +xiao 1 6.957497 0 0 14718 +buaa 1 6.957497 0 0 14719 +newspag 1 6.957497 0 0 14720 +clike 1 6.957497 0 0 14721 +qinghi 1 6.957497 0 0 14722 +scinc 1 6.957497 0 0 14723 +findmor 1 6.957497 0 0 14724 +pekingunivers 1 6.957497 0 0 14725 +professionalinternetpc 1 6.957497 0 0 14726 +relatedmac 1 6.957497 0 0 14727 +relatedaft 1 6.957497 0 0 14728 +worknetscap 1 6.957497 0 0 14729 +testtwin 1 6.957497 0 0 14730 +eldertwin 1 6.957497 0 0 14731 +youngernetscap 1 6.957497 0 0 14732 +testanim 1 6.957497 0 0 14733 +titledanc 1 6.957497 0 0 14734 +testanoth 1 6.957497 0 0 14735 +testyet 1 6.957497 0 0 14736 +testfriendsthi 1 6.957497 0 0 14737 +xiaohai 1 6.957497 0 0 14738 +shan 1 6.957497 0 0 14739 +shinan 1 6.957497 0 0 14740 +qingunivers 1 6.957497 0 0 14741 +zhuqe 1 6.957497 0 0 14742 +organizationsinclud 1 6.957497 0 0 14743 +ouraffili 1 6.957497 0 0 14744 +regioninclud 1 6.957497 0 0 14745 +spotlightuwwin 1 6.957497 0 0 14746 +programmingcontesttwovideo 1 6.957497 0 0 14747 +initiativesourcolloquia 1 6.957497 0 0 14748 +mbonemajordon 1 6.957497 0 0 14749 +corporationdickkarp 1 6.957497 0 0 14750 +scienceprofessionalmast 1 6.957497 0 0 14751 +departmentoverview 1 6.957497 0 0 14752 +staffposit 1 6.957497 0 0 14753 +newscan 1 6.957497 0 0 14754 +berman 1 6.957497 0 0 14755 +debbi 1 6.957497 0 0 14756 +pageandrew 1 6.957497 0 0 14757 +tron 1 6.957497 0 0 14758 +bothpostscript 1 6.957497 0 0 14759 +andhtml 1 6.957497 0 0 14760 +poison 1 6.957497 0 0 14761 +donut 1 6.957497 0 0 14762 +stupidmi 1 6.957497 0 0 14763 +finkelstein 1 6.957497 0 0 14764 +photocopi 1 6.957497 0 0 14765 +cup 1 6.957497 0 0 14766 +limp 1 6.957497 0 0 14767 +andlack 1 6.957497 0 0 14768 +atprinceton 1 6.957497 0 0 14769 +tibco 1 6.957497 0 0 14770 +teknekron 1 6.957497 0 0 14771 +alarg 1 6.957497 0 0 14772 +calledumatata 1 6.957497 0 0 14773 +thehilari 1 6.957497 0 0 14774 +caff 1 6.957497 0 0 14775 +lardo 1 6.957497 0 0 14776 +chilli 1 6.957497 0 0 14777 +snoqualmi 1 6.957497 0 0 14778 +hyakutak 1 6.957497 0 0 14779 +marcu 1 6.957497 0 0 14780 +dither 1 6.957497 0 0 14781 +onunbalanc 1 6.957497 0 0 14782 +whichperform 1 6.957497 0 0 14783 +outweighth 1 6.957497 0 0 14784 +polygonrender 1 6.957497 0 0 14785 +percent 1 6.957497 0 0 14786 +onbalanc 1 6.957497 0 0 14787 +devr 1 6.957497 0 0 14788 +wasdesign 1 6.957497 0 0 14789 +andintend 1 6.957497 0 0 14790 +unifieddata 1 6.957497 0 0 14791 +queryfacil 1 6.957497 0 0 14792 +andpromot 1 6.957497 0 0 14793 +ofproperti 1 6.957497 0 0 14794 +thepart 1 6.957497 0 0 14795 +buildinst 1 6.957497 0 0 14796 +inmodel 1 6.957497 0 0 14797 +secondcad 1 6.957497 0 0 14798 +flexibledata 1 6.957497 0 0 14799 +mentorship 1 6.957497 0 0 14800 +bernheim 1 6.957497 0 0 14801 +ofdigit 1 6.957497 0 0 14802 +gorp 1 6.957497 0 0 14803 +guideto 1 6.957497 0 0 14804 +recreationfun 1 6.957497 0 0 14805 +abig 1 6.957497 0 0 14806 +scoobi 1 6.957497 0 0 14807 +dooeduc 1 6.957497 0 0 14808 +summerwork 1 6.957497 0 0 14809 +highlyrecommend 1 6.957497 0 0 14810 +michail 1 6.957497 0 0 14811 +michailgradu 1 6.957497 0 0 14812 +studenti 1 6.957497 0 0 14813 +followingarea 1 6.957497 0 0 14814 +summationfor 1 6.957497 0 0 14815 +opsi 1 6.957497 0 0 14816 +appletdesign 1 6.957497 0 0 14817 +combinesprogram 1 6.957497 0 0 14818 +lunar 1 6.957497 0 0 14819 +lander 1 6.957497 0 0 14820 +gamethat 1 6.957497 0 0 14821 +unwillingto 1 6.957497 0 0 14822 +embarrass 1 6.957497 0 0 14823 +publishedincorrect 1 6.957497 0 0 14824 +unconvent 1 6.957497 0 0 14825 +proofstyl 1 6.957497 0 0 14826 +theyhav 1 6.957497 0 0 14827 +wasnot 1 6.957497 0 0 14828 +stylethat 1 6.957497 0 0 14829 +lesli 1 6.957497 0 0 14830 +lamport 1 6.957497 0 0 14831 +wayi 1 6.957497 0 0 14832 +theother 1 6.957497 0 0 14833 +stanfordin 1 6.957497 0 0 14834 +aon 1 6.957497 0 0 14835 +inberkelei 1 6.957497 0 0 14836 +yeara 1 6.957497 0 0 14837 +talksanderson 1 6.957497 0 0 14838 +syllabl 1 6.957497 0 0 14839 +hungari 1 6.957497 0 0 14840 +birthplac 1 6.957497 0 0 14841 +homepageanhai 1 6.957497 0 0 14842 +vietnam 1 6.957497 0 0 14843 +iwent 1 6.957497 0 0 14844 +kossuth 1 6.957497 0 0 14845 +lajo 1 6.957497 0 0 14846 +debrecen 1 6.957497 0 0 14847 +underuncertainti 1 6.957497 0 0 14848 +calm 1 6.957497 0 0 14849 +invietnames 1 6.957497 0 0 14850 +nghean 1 6.957497 0 0 14851 +haiphong 1 6.957497 0 0 14852 +folkswer 1 6.957497 0 0 14853 +younger 1 6.957497 0 0 14854 +theysimpli 1 6.957497 0 0 14855 +namehaian 1 6.957497 0 0 14856 +comtemporari 1 6.957497 0 0 14857 +snapshotsanhai 1 6.957497 0 0 14858 +onit 1 6.957497 0 0 14859 +andb 1 6.957497 0 0 14860 +inextens 1 6.957497 0 0 14861 +compilersupport 1 6.957497 0 0 14862 +synthesisfor 1 6.957497 0 0 14863 +fukunaga 1 6.957497 0 0 14864 +partovi 1 6.957497 0 0 14865 +christensen 1 6.957497 0 0 14866 +reiss 1 6.957497 0 0 14867 +shuman 1 6.957497 0 0 14868 +leapfrog 1 6.957497 0 0 14869 +lossili 1 6.957497 0 0 14870 +animationthat 1 6.957497 0 0 14871 +cartwheel 1 6.957497 0 0 14872 +andshuffl 1 6.957497 0 0 14873 +andcollaps 1 6.957497 0 0 14874 +isjust 1 6.957497 0 0 14875 +tosmooth 1 6.957497 0 0 14876 +thetalk 1 6.957497 0 0 14877 +boinge 1 6.957497 0 0 14878 +michalowskidepart 1 6.957497 0 0 14879 +engineeringmail 1 6.957497 0 0 14880 +bitsthank 1 6.957497 0 0 14881 +headscapewhenev 1 6.957497 0 0 14882 +gradstud 1 6.957497 0 0 14883 +inlinguist 1 6.957497 0 0 14884 +ultrahotlist 1 6.957497 0 0 14885 +ofal 1 6.957497 0 0 14886 +forsometh 1 6.957497 0 0 14887 +thave 1 6.957497 0 0 14888 +urouletteto 1 6.957497 0 0 14889 +ofwhich 1 6.957497 0 0 14890 +songsand 1 6.957497 0 0 14891 +fictiti 1 6.957497 0 0 14892 +puriti 1 6.957497 0 0 14893 +tokeep 1 6.957497 0 0 14894 +pagesfrom 1 6.957497 0 0 14895 +aslfingerspel 1 6.957497 0 0 14896 +blatantli 1 6.957497 0 0 14897 +chamberlain 1 6.957497 0 0 14898 +michalowski 1 6.957497 0 0 14899 +sanityerad 1 6.957497 0 0 14900 +beamepaul 1 6.957497 0 0 14901 +computationalcomplex 1 6.957497 0 0 14902 +academicyear 1 6.957497 0 0 14903 +presidentialyoung 1 6.957497 0 0 14904 +inproposit 1 6.957497 0 0 14905 +enthusiasm 1 6.957497 0 0 14906 +cancompens 1 6.957497 0 0 14907 +beckerdavid 1 6.957497 0 0 14908 +beckercontact 1 6.957497 0 0 14909 +makingspina 1 6.957497 0 0 14910 +drvier 1 6.957497 0 0 14911 +bethel 1 6.957497 0 0 14912 +men 1 6.957497 0 0 14913 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 1 6.957497 0 0 14914 +handbal 1 6.957497 0 0 14915 +playracquetballgolftenni 1 6.957497 0 0 14916 +bridgecampingcanoeingdisc 1 6.957497 0 0 14917 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 1 6.957497 0 0 14918 +skiingweightliftingwhitewat 1 6.957497 0 0 14919 +raftinghorseback 1 6.957497 0 0 14920 +ridingmountain 1 6.957497 0 0 14921 +bikingin 1 6.957497 0 0 14922 +currenc 1 6.957497 0 0 14923 +ssto 1 6.957497 0 0 14924 +rlv 1 6.957497 0 0 14925 +theologi 1 6.957497 0 0 14926 +centurai 1 6.957497 0 0 14927 +boot 1 6.957497 0 0 14928 +maeda 1 6.957497 0 0 14929 +midwai 1 6.957497 0 0 14930 +zekauska 1 6.957497 0 0 14931 +sawdon 1 6.957497 0 0 14932 +machnix 1 6.957497 0 0 14933 +drave 1 6.957497 0 0 14934 +forin 1 6.957497 0 0 14935 +respit 1 6.957497 0 0 14936 +asigmetr 1 6.957497 0 0 14937 +thestairmast 1 6.957497 0 0 14938 +extensibleoper 1 6.957497 0 0 14939 +parallelnetwork 1 6.957497 0 0 14940 +thesequel 1 6.957497 0 0 14941 +optimizationcours 1 6.957497 0 0 14942 +youmight 1 6.957497 0 0 14943 +extensiblesystem 1 6.957497 0 0 14944 +theodor 1 6.957497 0 0 14945 +implementationj 1 6.957497 0 0 14946 +defouw 1 6.957497 0 0 14947 +alapat 1 6.957497 0 0 14948 +becker 1 6.957497 0 0 14949 +sharedmemori 1 6.957497 0 0 14950 +conflictresolut 1 6.957497 0 0 14951 +uwtechn 1 6.957497 0 0 14952 +demultiplex 1 6.957497 0 0 14953 +yuhara 1 6.957497 0 0 14954 +andmostli 1 6.957497 0 0 14955 +moblic 1 6.957497 0 0 14956 +wheeler 1 6.957497 0 0 14957 +ginsburg 1 6.957497 0 0 14958 +inoper 1 6.957497 0 0 14959 +harrier 1 6.957497 0 0 14960 +budget 1 6.957497 0 0 14961 +doorenbo 1 6.957497 0 0 14962 +pagebob 1 6.957497 0 0 14963 +bobd 1 6.957497 0 0 14964 +netbot 1 6.957497 0 0 14965 +boffo 1 6.957497 0 0 14966 +zdnet 1 6.957497 0 0 14967 +anchordesk 1 6.957497 0 0 14968 +savvysearch 1 6.957497 0 0 14969 +inktomi 1 6.957497 0 0 14970 +crawler 1 6.957497 0 0 14971 +hotbot 1 6.957497 0 0 14972 +pointcom 1 6.957497 0 0 14973 +switchboard 1 6.957497 0 0 14974 +cnnfn 1 6.957497 0 0 14975 +newshour 1 6.957497 0 0 14976 +globe 1 6.957497 0 0 14977 +feed 1 6.957497 0 0 14978 +fedworld 1 6.957497 0 0 14979 +deficit 1 6.957497 0 0 14980 +debt 1 6.957497 0 0 14981 +concord 1 6.957497 0 0 14982 +coalit 1 6.957497 0 0 14983 +bipartisan 1 6.957497 0 0 14984 +andfun 1 6.957497 0 0 14985 +pagebobd 1 6.957497 0 0 14986 +principalresearch 1 6.957497 0 0 14987 +activitiesuwconstraint 1 6.957497 0 0 14988 +domainsourc 1 6.957497 0 0 14989 +democraci 1 6.957497 0 0 14990 +qualsproject 1 6.957497 0 0 14991 +teachingher 1 6.957497 0 0 14992 +informationhistori 1 6.957497 0 0 14993 +paloalto 1 6.957497 0 0 14994 +simulationlaboratori 1 6.957497 0 0 14995 +doctoralfellow 1 6.957497 0 0 14996 +ofedinburgh 1 6.957497 0 0 14997 +symbolicalgebra 1 6.957497 0 0 14998 +andexcept 1 6.957497 0 0 14999 +europarc 1 6.957497 0 0 15000 +pagebrad 1 6.957497 0 0 15001 +chamberlainphoto 1 6.957497 0 0 15002 +perkowitzth 1 6.957497 0 0 15003 +mumei 1 6.957497 0 0 15004 +brendan 1 6.957497 0 0 15005 +pagebrendan 1 6.957497 0 0 15006 +mumeyi 1 6.957497 0 0 15007 +information 1 6.957497 0 0 15008 +vitaein 1 6.957497 0 0 15009 +htmlorpostscriptformat 1 6.957497 0 0 15010 +landmark 1 6.957497 0 0 15011 +tosolv 1 6.957497 0 0 15012 +groupher 1 6.957497 0 0 15013 +papersb 1 6.957497 0 0 15014 +candidaci 1 6.957497 0 0 15015 +klaw 1 6.957497 0 0 15016 +ofdiscret 1 6.957497 0 0 15017 +containsom 1 6.957497 0 0 15018 +recreationhik 1 6.957497 0 0 15019 +coffeeto 1 6.957497 0 0 15020 +sailingand 1 6.957497 0 0 15021 +brickerlauren 1 6.957497 0 0 15022 +clue 1 6.957497 0 0 15023 +primarli 1 6.957497 0 0 15024 +userinterfac 1 6.957497 0 0 15025 +proclaim 1 6.957497 0 0 15026 +mathematicsexperi 1 6.957497 0 0 15027 +usingexploratori 1 6.957497 0 0 15028 +rote 1 6.957497 0 0 15029 +minterest 1 6.957497 0 0 15030 +supportedcollabor 1 6.957497 0 0 15031 +lawk 1 6.957497 0 0 15032 +dawg 1 6.957497 0 0 15033 +interfacea 1 6.957497 0 0 15034 +resumeschool 1 6.957497 0 0 15035 +dazethi 1 6.957497 0 0 15036 +quarterdoth 1 6.957497 0 0 15037 +quartershuman 1 6.957497 0 0 15038 +writeupwhat 1 6.957497 0 0 15039 +insocieti 1 6.957497 0 0 15040 +lifesportscookingpotteri 1 6.957497 0 0 15041 +garag 1 6.957497 0 0 15042 +stuffbecaus 1 6.957497 0 0 15043 +itaddress 1 6.957497 0 0 15044 +carlsonadam 1 6.957497 0 0 15045 +theview 1 6.957497 0 0 15046 +popov 1 6.957497 0 0 15047 +scanningproject 1 6.957497 0 0 15048 +sgigraph 1 6.957497 0 0 15049 +shouldalso 1 6.957497 0 0 15050 +scari 1 6.957497 0 0 15051 +codesignpersonalperson 1 6.957497 0 0 15052 +resumefoodi 1 6.957497 0 0 15053 +ofpeopl 1 6.957497 0 0 15054 +dish 1 6.957497 0 0 15055 +noodl 1 6.957497 0 0 15056 +beefskew 1 6.957497 0 0 15057 +toysb 1 6.957497 0 0 15058 +computersand 1 6.957497 0 0 15059 +taiwanesei 1 6.957497 0 0 15060 +taiwaneselanguag 1 6.957497 0 0 15061 +taiwanes 1 6.957497 0 0 15062 +sureto 1 6.957497 0 0 15063 +taiwanesedictionari 1 6.957497 0 0 15064 +viewedif 1 6.957497 0 0 15065 +beabl 1 6.957497 0 0 15066 +bouncingov 1 6.957497 0 0 15067 +barnei 1 6.957497 0 0 15068 +dynosaur 1 6.957497 0 0 15069 +andersoncorei 1 6.957497 0 0 15070 +andersonth 1 6.957497 0 0 15071 +localtalk 1 6.957497 0 0 15072 +collegi 1 6.957497 0 0 15073 +univser 1 6.957497 0 0 15074 +highlin 1 6.957497 0 0 15075 +polli 1 6.957497 0 0 15076 +treasuri 1 6.957497 0 0 15077 +providercool 1 6.957497 0 0 15078 +sunsit 1 6.957497 0 0 15079 +pageus 1 6.957497 0 0 15080 +washinton 1 6.957497 0 0 15081 +uwtv 1 6.957497 0 0 15082 +notesmi 1 6.957497 0 0 15083 +valedictorian 1 6.957497 0 0 15084 +experiencecraig 1 6.957497 0 0 15085 +kaplancurr 1 6.957497 0 0 15086 +copyof 1 6.957497 0 0 15087 +undergraduatewa 1 6.957497 0 0 15088 +cskaplan 1 6.957497 0 0 15089 +sanctuari 1 6.957497 0 0 15090 +asami 1 6.957497 0 0 15091 +chiaki 1 6.957497 0 0 15092 +ctkwok 1 6.957497 0 0 15093 +andoren 1 6.957497 0 0 15094 +aiuw 1 6.957497 0 0 15095 +informationleisur 1 6.957497 0 0 15096 +windlaputa 1 6.957497 0 0 15097 +skyhyp 1 6.957497 0 0 15098 +gunnm 1 6.957497 0 0 15099 +vile 1 6.957497 0 0 15100 +darrenc 1 6.957497 0 0 15101 +vitaperson 1 6.957497 0 0 15102 +christiansondbc 1 6.957497 0 0 15103 +inaiand 1 6.957497 0 0 15104 +graphicsa 1 6.957497 0 0 15105 +directedbehavior 1 6.957497 0 0 15106 +buzzwordacquisit 1 6.957497 0 0 15107 +bobdoorenbo 1 6.957497 0 0 15108 +somehowintegr 1 6.957497 0 0 15109 +applicationthat 1 6.957497 0 0 15110 +basket 1 6.957497 0 0 15111 +determinewhat 1 6.957497 0 0 15112 +moviethat 1 6.957497 0 0 15113 +technologyinto 1 6.957497 0 0 15114 +perpetr 1 6.957497 0 0 15115 +theucpop 1 6.957497 0 0 15116 +isher 1 6.957497 0 0 15117 +carboload 1 6.957497 0 0 15118 +publicationschristianson 1 6.957497 0 0 15119 +cinematographi 1 6.957497 0 0 15120 +firbi 1 6.957497 0 0 15121 +mcdougal 1 6.957497 0 0 15122 +fusion 1 6.957497 0 0 15123 +withfreder 1 6.957497 0 0 15124 +judo 1 6.957497 0 0 15125 +sibl 1 6.957497 0 0 15126 +sisterjust 1 6.957497 0 0 15127 +supercollid 1 6.957497 0 0 15128 +slack 1 6.957497 0 0 15129 +mirski 1 6.957497 0 0 15130 +youth 1 6.957497 0 0 15131 +wwwf 1 6.957497 0 0 15132 +grudg 1 6.957497 0 0 15133 +doomgat 1 6.957497 0 0 15134 +readersproject 1 6.957497 0 0 15135 +theracquetbal 1 6.957497 0 0 15136 +thecreat 1 6.957497 0 0 15137 +ddion 1 6.957497 0 0 15138 +thespinoper 1 6.957497 0 0 15139 +intercept 1 6.957497 0 0 15140 +havework 1 6.957497 0 0 15141 +ipromis 1 6.957497 0 0 15142 +enhancedthi 1 6.957497 0 0 15143 +vast 1 6.957497 0 0 15144 +sportzon 1 6.957497 0 0 15145 +eateri 1 6.957497 0 0 15146 +derrick 1 6.957497 0 0 15147 +weathersbi 1 6.957497 0 0 15148 +bullssupersonicsi 1 6.957497 0 0 15149 +phdin 1 6.957497 0 0 15150 +ofseattl 1 6.957497 0 0 15151 +prei 1 6.957497 0 0 15152 +therebyextend 1 6.957497 0 0 15153 +interestssignific 1 6.957497 0 0 15154 +securityresearch 1 6.957497 0 0 15155 +challengespres 1 6.957497 0 0 15156 +theseenviron 1 6.957497 0 0 15157 +daunt 1 6.957497 0 0 15158 +projectacadem 1 6.957497 0 0 15159 +achievementsinstructor 1 6.957497 0 0 15160 +collegeinstructor 1 6.957497 0 0 15161 +minoritystud 1 6.957497 0 0 15162 +engineeringoutstand 1 6.957497 0 0 15163 +cnnfinanciala 1 6.957497 0 0 15164 +javaw 1 6.957497 0 0 15165 +weathersbyderrick 1 6.957497 0 0 15166 +edutu 1 6.957497 0 0 15167 +dewei 1 6.957497 0 0 15168 +deweyabout 1 6.957497 0 0 15169 +doyou 1 6.957497 0 0 15170 +ilov 1 6.957497 0 0 15171 +oldroomm 1 6.957497 0 0 15172 +irelandi 1 6.957497 0 0 15173 +belfast 1 6.957497 0 0 15174 +sixti 1 6.957497 0 0 15175 +pagesof 1 6.957497 0 0 15176 +luggag 1 6.957497 0 0 15177 +getthos 1 6.957497 0 0 15178 +enlighteningformat 1 6.957497 0 0 15179 +thisproject 1 6.957497 0 0 15180 +sinn 1 6.957497 0 0 15181 +fein 1 6.957497 0 0 15182 +injuri 1 6.957497 0 0 15183 +recuri 1 6.957497 0 0 15184 +ancient 1 6.957497 0 0 15185 +imageek 1 6.957497 0 0 15186 +cuni 1 6.957497 0 0 15187 +jansteen 1 6.957497 0 0 15188 +dfasulo 1 6.957497 0 0 15189 +amber 1 6.957497 0 0 15190 +williamscolleg 1 6.957497 0 0 15191 +inaccuraci 1 6.957497 0 0 15192 +eastlak 1 6.957497 0 0 15193 +merlin 1 6.957497 0 0 15194 +corwin 1 6.957497 0 0 15195 +zelazni 1 6.957497 0 0 15196 +drpg 1 6.957497 0 0 15197 +phage 1 6.957497 0 0 15198 +dress 1 6.957497 0 0 15199 +dogbert 1 6.957497 0 0 15200 +dickeycomput 1 6.957497 0 0 15201 +washingtonwelcom 1 6.957497 0 0 15202 +schedulenarr 1 6.957497 0 0 15203 +blurbcs 1 6.957497 0 0 15204 +housesfavorit 1 6.957497 0 0 15205 +bookspirograph 1 6.957497 0 0 15206 +calder 1 6.957497 0 0 15207 +grunwald 1 6.957497 0 0 15208 +huberthumphrei 1 6.957497 0 0 15209 +likejean 1 6.957497 0 0 15210 +enginefor 1 6.957497 0 0 15211 +sensibl 1 6.957497 0 0 15212 +conced 1 6.957497 0 0 15213 +thatpolit 1 6.957497 0 0 15214 +lesser 1 6.957497 0 0 15215 +tweedledumand 1 6.957497 0 0 15216 +tweedlede 1 6.957497 0 0 15217 +abstain 1 6.957497 0 0 15218 +theyar 1 6.957497 0 0 15219 +torummag 1 6.957497 0 0 15220 +allth 1 6.957497 0 0 15221 +stew 1 6.957497 0 0 15222 +humphrei 1 6.957497 0 0 15223 +suprem 1 6.957497 0 0 15224 +whentricia 1 6.957497 0 0 15225 +flummeri 1 6.957497 0 0 15226 +ineffect 1 6.957497 0 0 15227 +splendid 1 6.957497 0 0 15228 +unforeseen 1 6.957497 0 0 15229 +zongker 1 6.957497 0 0 15230 +pagececi 1 6.957497 0 0 15231 +noless 1 6.957497 0 0 15232 +classeshow 1 6.957497 0 0 15233 +toxic 1 6.957497 0 0 15234 +custard 1 6.957497 0 0 15235 +filesth 1 6.957497 0 0 15236 +mento 1 6.957497 0 0 15237 +galleryvisit 1 6.957497 0 0 15238 +supercolliderth 1 6.957497 0 0 15239 +cron 1 6.957497 0 0 15240 +avirtu 1 6.957497 0 0 15241 +trove 1 6.957497 0 0 15242 +whichmai 1 6.957497 0 0 15243 +imajor 1 6.957497 0 0 15244 +andminor 1 6.957497 0 0 15245 +dubiou 1 6.957497 0 0 15246 +honorsjunior 1 6.957497 0 0 15247 +brotherhood 1 6.957497 0 0 15248 +crouton 1 6.957497 0 0 15249 +pizzicato 1 6.957497 0 0 15250 +clicker 1 6.957497 0 0 15251 +cruis 1 6.957497 0 0 15252 +inhigh 1 6.957497 0 0 15253 +buttstar 1 6.957497 0 0 15254 +wheremi 1 6.957497 0 0 15255 +dougz 1 6.957497 0 0 15256 +applicationsand 1 6.957497 0 0 15257 +mechanismthat 1 6.957497 0 0 15258 +replacementpolici 1 6.957497 0 0 15259 +machoper 1 6.957497 0 0 15260 +thathelp 1 6.957497 0 0 15261 +kernelthread 1 6.957497 0 0 15262 +tailor 1 6.957497 0 0 15263 +betweenobject 1 6.957497 0 0 15264 +achiv 1 6.957497 0 0 15265 +improvementscan 1 6.957497 0 0 15266 +betterserv 1 6.957497 0 0 15267 +paperscv 1 6.957497 0 0 15268 +lectureintroduc 1 6.957497 0 0 15269 +echri 1 6.957497 0 0 15270 +ecrock 1 6.957497 0 0 15271 +tosai 1 6.957497 0 0 15272 +hadto 1 6.957497 0 0 15273 +postcriptand 1 6.957497 0 0 15274 +workingon 1 6.957497 0 0 15275 +fordynam 1 6.957497 0 0 15276 +activit 1 6.957497 0 0 15277 +vine 1 6.957497 0 0 15278 +branchesmi 1 6.957497 0 0 15279 +knowof 1 6.957497 0 0 15280 +daveneti 1 6.957497 0 0 15281 +towardslik 1 6.957497 0 0 15282 +eveneasi 1 6.957497 0 0 15283 +freewai 1 6.957497 0 0 15284 +worldher 1 6.957497 0 0 15285 +edhong 1 6.957497 0 0 15286 +onexperiment 1 6.957497 0 0 15287 +incompil 1 6.957497 0 0 15288 +optimizationsand 1 6.957497 0 0 15289 +multithreadedarchitectur 1 6.957497 0 0 15290 +spinprevi 1 6.957497 0 0 15291 +sirereg 1 6.957497 0 0 15292 +istanbul 1 6.957497 0 0 15293 +labswork 1 6.957497 0 0 15294 +theplan 1 6.957497 0 0 15295 +thevesta 1 6.957497 0 0 15296 +projectsmi 1 6.957497 0 0 15297 +andprotect 1 6.957497 0 0 15298 +specificaspect 1 6.957497 0 0 15299 +alarm 1 6.957497 0 0 15300 +ofextend 1 6.957497 0 0 15301 +allowsu 1 6.957497 0 0 15302 +isdesign 1 6.957497 0 0 15303 +allowsisol 1 6.957497 0 0 15304 +withconflict 1 6.957497 0 0 15305 +beassur 1 6.957497 0 0 15306 +clincher 1 6.957497 0 0 15307 +extensionsthat 1 6.957497 0 0 15308 +protectionenforc 1 6.957497 0 0 15309 +performanceweb 1 6.957497 0 0 15310 +networkingstack 1 6.957497 0 0 15311 +andminim 1 6.957497 0 0 15312 +calledmipsi 1 6.957497 0 0 15313 +researchplatform 1 6.957497 0 0 15314 +featuresand 1 6.957497 0 0 15315 +talkslanguag 1 6.957497 0 0 15316 +interestswhenev 1 6.957497 0 0 15317 +windsurf 1 6.957497 0 0 15318 +bikingmak 1 6.957497 0 0 15319 +andhik 1 6.957497 0 0 15320 +andersonwher 1 6.957497 0 0 15321 +decisionin 1 6.957497 0 0 15322 +thedecis 1 6.957497 0 0 15323 +interim 1 6.957497 0 0 15324 +feloni 1 6.957497 0 0 15325 +themarketplac 1 6.957497 0 0 15326 +imostli 1 6.957497 0 0 15327 +greensideof 1 6.957497 0 0 15328 +onsteadi 1 6.957497 0 0 15329 +biharmon 1 6.957497 0 0 15330 +timesteppingmethod 1 6.957497 0 0 15331 +analysisissu 1 6.957497 0 0 15332 +nonlinearequ 1 6.957497 0 0 15333 +newtonstep 1 6.957497 0 0 15334 +spiffi 1 6.957497 0 0 15335 +structuresbi 1 6.957497 0 0 15336 +andersoni 1 6.957497 0 0 15337 +bioand 1 6.957497 0 0 15338 +heor 1 6.957497 0 0 15339 +searchmultipl 1 6.957497 0 0 15340 +pruningopt 1 6.957497 0 0 15341 +netrecommend 1 6.957497 0 0 15342 +locatesindividu 1 6.957497 0 0 15343 +bruteforc 1 6.957497 0 0 15344 +whenrun 1 6.957497 0 0 15345 +theweb 1 6.957497 0 0 15346 +richardseg 1 6.957497 0 0 15347 +fileretriev 1 6.957497 0 0 15348 +universalquantif 1 6.957497 0 0 15349 +terranc 1 6.957497 0 0 15350 +mikeperkowitz 1 6.957497 0 0 15351 +soderland 1 6.957497 0 0 15352 +roomi 1 6.957497 0 0 15353 +lesourd 1 6.957497 0 0 15354 +spiger 1 6.957497 0 0 15355 +alford 1 6.957497 0 0 15356 +fitchenholtz 1 6.957497 0 0 15357 +guido 1 6.957497 0 0 15358 +dymitr 1 6.957497 0 0 15359 +mozdyniewicz 1 6.957497 0 0 15360 +quark 1 6.957497 0 0 15361 +minecontain 1 6.957497 0 0 15362 +neuroprosearch 1 6.957497 0 0 15363 +statlib 1 6.957497 0 0 15364 +learningtoolbox 1 6.957497 0 0 15365 +bonn 1 6.957497 0 0 15366 +pagemik 1 6.957497 0 0 15367 +workstationclust 1 6.957497 0 0 15368 +opalproject 1 6.957497 0 0 15369 +injanuari 1 6.957497 0 0 15370 +summarycvsoutheast 1 6.957497 0 0 15371 +idaholast 1 6.957497 0 0 15372 +fisherdepart 1 6.957497 0 0 15373 +engineeringbox 1 6.957497 0 0 15374 +scheduleto 1 6.957497 0 0 15375 +probablyb 1 6.957497 0 0 15376 +activitiesmi 1 6.957497 0 0 15377 +areasof 1 6.957497 0 0 15378 +outta 1 6.957497 0 0 15379 +pea 1 6.957497 0 0 15380 +mofo 1 6.957497 0 0 15381 +peach 1 6.957497 0 0 15382 +ruel 1 6.957497 0 0 15383 +modugno 1 6.957497 0 0 15384 +francesmari 1 6.957497 0 0 15385 +pagefrancesmari 1 6.957497 0 0 15386 +algorthim 1 6.957497 0 0 15387 +includecycl 1 6.957497 0 0 15388 +previouslyitalian 1 6.957497 0 0 15389 +elleri 1 6.957497 0 0 15390 +pagegeorg 1 6.957497 0 0 15391 +ariadn 1 6.957497 0 0 15392 +gforman 1 6.957497 0 0 15393 +comhom 1 6.957497 0 0 15394 +watercolor 1 6.957497 0 0 15395 +checklist 1 6.957497 0 0 15396 +occam 1 6.957497 0 0 15397 +wordbot 1 6.957497 0 0 15398 +nietzschein 1 6.957497 0 0 15399 +algorithmspap 1 6.957497 0 0 15400 +algorithmsformerli 1 6.957497 0 0 15401 +winkenbach 1 6.957497 0 0 15402 +winkenbachdepart 1 6.957497 0 0 15403 +georgew 1 6.957497 0 0 15404 +doneund 1 6.957497 0 0 15405 +theautomat 1 6.957497 0 0 15406 +imagescr 1 6.957497 0 0 15407 +taweewan 1 6.957497 0 0 15408 +siwadun 1 6.957497 0 0 15409 +badro 1 6.957497 0 0 15410 +zshell 1 6.957497 0 0 15411 +nesbit 1 6.957497 0 0 15412 +isuppos 1 6.957497 0 0 15413 +excitingfeatur 1 6.957497 0 0 15414 +dukeunivers 1 6.957497 0 0 15415 +fortransworld 1 6.957497 0 0 15416 +indurham 1 6.957497 0 0 15417 +headquart 1 6.957497 0 0 15418 +myapart 1 6.957497 0 0 15419 +newer 1 6.957497 0 0 15420 +fvwm 1 6.957497 0 0 15421 +redhat 1 6.957497 0 0 15422 +transworldnumer 1 6.957497 0 0 15423 +ieeenat 1 6.957497 0 0 15424 +victori 1 6.957497 0 0 15425 +bycomput 1 6.957497 0 0 15426 +canterburi 1 6.957497 0 0 15427 +definitelynot 1 6.957497 0 0 15428 +juggl 1 6.957497 0 0 15429 +rubik 1 6.957497 0 0 15430 +sarahmclachlan 1 6.957497 0 0 15431 +parliamentari 1 6.957497 0 0 15432 +sgml 1 6.957497 0 0 15433 +sitcom 1 6.957497 0 0 15434 +linden 1 6.957497 0 0 15435 +webview 1 6.957497 0 0 15436 +wasrat 1 6.957497 0 0 15437 +andwa 1 6.957497 0 0 15438 +neuralnetwork 1 6.957497 0 0 15439 +headless 1 6.957497 0 0 15440 +horseman 1 6.957497 0 0 15441 +lindenmi 1 6.957497 0 0 15442 +wifecorina 1 6.957497 0 0 15443 +lofti 1 6.957497 0 0 15444 +undergraduatedegre 1 6.957497 0 0 15445 +anodd 1 6.957497 0 0 15446 +mactiv 1 6.957497 0 0 15447 +orset 1 6.957497 0 0 15448 +altavistawebviewand 1 6.957497 0 0 15449 +metawebview 1 6.957497 0 0 15450 +foraltavista 1 6.957497 0 0 15451 +searchservic 1 6.957497 0 0 15452 +dialog 1 6.957497 0 0 15453 +travelag 1 6.957497 0 0 15454 +whileallow 1 6.957497 0 0 15455 +andjar 1 6.957497 0 0 15456 +altavistawebview 1 6.957497 0 0 15457 +winner 1 6.957497 0 0 15458 +walsh 1 6.957497 0 0 15459 +meilleur 1 6.957497 0 0 15460 +ballet 1 6.957497 0 0 15461 +flicker 1 6.957497 0 0 15462 +standardsto 1 6.957497 0 0 15463 +mylgramm 1 6.957497 0 0 15464 +lgrammer 1 6.957497 0 0 15465 +theparticletre 1 6.957497 0 0 15466 +thejar 1 6.957497 0 0 15467 +dawn 1 6.957497 0 0 15468 +ademonstr 1 6.957497 0 0 15469 +cansuccessfulli 1 6.957497 0 0 15470 +myriadsoftwar 1 6.957497 0 0 15471 +filippo 1 6.957497 0 0 15472 +menzer 1 6.957497 0 0 15473 +latentenergi 1 6.957497 0 0 15474 +developingartifici 1 6.957497 0 0 15475 +enviro 1 6.957497 0 0 15476 +theautom 1 6.957497 0 0 15477 +assit 1 6.957497 0 0 15478 +majeski 1 6.957497 0 0 15479 +spitzer 1 6.957497 0 0 15480 +localizedinteract 1 6.957497 0 0 15481 +dilemma 1 6.957497 0 0 15482 +krishnamoorthi 1 6.957497 0 0 15483 +paturi 1 6.957497 0 0 15484 +blume 1 6.957497 0 0 15485 +liden 1 6.957497 0 0 15486 +esen 1 6.957497 0 0 15487 +hardwaretradeoff 1 6.957497 0 0 15488 +sdilemma 1 6.957497 0 0 15489 +funrai 1 6.957497 0 0 15490 +closeup 1 6.957497 0 0 15491 +sphere 1 6.957497 0 0 15492 +withreflect 1 6.957497 0 0 15493 +adaptivesampl 1 6.957497 0 0 15494 +thespher 1 6.957497 0 0 15495 +causingth 1 6.957497 0 0 15496 +refract 1 6.957497 0 0 15497 +surfaceand 1 6.957497 0 0 15498 +strike 1 6.957497 0 0 15499 +alow 1 6.957497 0 0 15500 +anyfurth 1 6.957497 0 0 15501 +resembl 1 6.957497 0 0 15502 +glinden 1 6.957497 0 0 15503 +kri 1 6.957497 0 0 15504 +infowork 1 6.957497 0 0 15505 +backgrounduwdynam 1 6.957497 0 0 15506 +engineeringperson 1 6.957497 0 0 15507 +stuffperson 1 6.957497 0 0 15508 +backgroundmi 1 6.957497 0 0 15509 +isismi 1 6.957497 0 0 15510 +singaporemi 1 6.957497 0 0 15511 +bookmarksmi 1 6.957497 0 0 15512 +keylast 1 6.957497 0 0 15513 +soonish 1 6.957497 0 0 15514 +dilbertfix 1 6.957497 0 0 15515 +thathit 1 6.957497 0 0 15516 +underacheiv 1 6.957497 0 0 15517 +scoutreserv 1 6.957497 0 0 15518 +greaterlowel 1 6.957497 0 0 15519 +casunset 1 6.957497 0 0 15520 +cabin 1 6.957497 0 0 15521 +drove 1 6.957497 0 0 15522 +detour 1 6.957497 0 0 15523 +somehihglight 1 6.957497 0 0 15524 +thoughi 1 6.957497 0 0 15525 +vitaeresearch 1 6.957497 0 0 15526 +hinshaw 1 6.957497 0 0 15527 +ultrasound 1 6.957497 0 0 15528 +elspeth 1 6.957497 0 0 15529 +unborn 1 6.957497 0 0 15530 +fromconcept 1 6.957497 0 0 15531 +ripe 1 6.957497 0 0 15532 +inmid 1 6.957497 0 0 15533 +ly 1 6.957497 0 0 15534 +lookingup 1 6.957497 0 0 15535 +torso 1 6.957497 0 0 15536 +theleft 1 6.957497 0 0 15537 +impend 1 6.957497 0 0 15538 +fatherhood 1 6.957497 0 0 15539 +myspam 1 6.957497 0 0 15540 +usingwebcrawl 1 6.957497 0 0 15541 +frogstv 1 6.957497 0 0 15542 +nationpenn 1 6.957497 0 0 15543 +tellermus 1 6.957497 0 0 15544 +lyricsian 1 6.957497 0 0 15545 +herv 1 6.957497 0 0 15546 +jamrozikherv 1 6.957497 0 0 15547 +memoi 1 6.957497 0 0 15548 +therebi 1 6.957497 0 0 15549 +intens 1 6.957497 0 0 15550 +lightli 1 6.957497 0 0 15551 +guideproject 1 6.957497 0 0 15552 +bull 1 6.957497 0 0 15553 +imaginstitut 1 6.957497 0 0 15554 +snot 1 6.957497 0 0 15555 +louvr 1 6.957497 0 0 15556 +somefamili 1 6.957497 0 0 15557 +somefriend 1 6.957497 0 0 15558 +eduv 1 6.957497 0 0 15559 +pagejason 1 6.957497 0 0 15560 +cunivers 1 6.957497 0 0 15561 +boxseattl 1 6.957497 0 0 15562 +projectseattl 1 6.957497 0 0 15563 +secoskylast 1 6.957497 0 0 15564 +baerjeremi 1 6.957497 0 0 15565 +twain 1 6.957497 0 0 15566 +shakespearei 1 6.957497 0 0 15567 +engineeringtool 1 6.957497 0 0 15568 +pierian 1 6.957497 0 0 15569 +softwareoregon 1 6.957497 0 0 15570 +omsi 1 6.957497 0 0 15571 +pomona 1 6.957497 0 0 15572 +collegeher 1 6.957497 0 0 15573 +searchcopyright 1 6.957497 0 0 15574 +jbaer 1 6.957497 0 0 15575 +buhler 1 6.957497 0 0 15576 +jbuhler 1 6.957497 0 0 15577 +pagejeremi 1 6.957497 0 0 15578 +pagedo 1 6.957497 0 0 15579 +tako 1 6.957497 0 0 15580 +stufflectur 1 6.957497 0 0 15581 +suffix 1 6.957497 0 0 15582 +keycyb 1 6.957497 0 0 15583 +grinsrecommend 1 6.957497 0 0 15584 +readingmi 1 6.957497 0 0 15585 +universityquot 1 6.957497 0 0 15586 +quotesmi 1 6.957497 0 0 15587 +biplan 1 6.957497 0 0 15588 +dang 1 6.957497 0 0 15589 +weren 1 6.957497 0 0 15590 +plansi 1 6.957497 0 0 15591 +sunni 1 6.957497 0 0 15592 +menlo 1 6.957497 0 0 15593 +avehicl 1 6.957497 0 0 15594 +weintend 1 6.957497 0 0 15595 +codein 1 6.957497 0 0 15596 +systemmicrokernel 1 6.957497 0 0 15597 +especiallyprofil 1 6.957497 0 0 15598 +howwhol 1 6.957497 0 0 15599 +assumedthat 1 6.957497 0 0 15600 +manycompromis 1 6.957497 0 0 15601 +wholeprogram 1 6.957497 0 0 15602 +underlyingimplement 1 6.957497 0 0 15603 +principaldesign 1 6.957497 0 0 15604 +independentintermedi 1 6.957497 0 0 15605 +ishigh 1 6.957497 0 0 15606 +messagesend 1 6.957497 0 0 15607 +wayof 1 6.957497 0 0 15608 +repeatedli 1 6.957497 0 0 15609 +passessepar 1 6.957497 0 0 15610 +classanalysi 1 6.957497 0 0 15611 +aliasanalysi 1 6.957497 0 0 15612 +structuringoptim 1 6.957497 0 0 15613 +stillallow 1 6.957497 0 0 15614 +eachoth 1 6.957497 0 0 15615 +flowanalys 1 6.957497 0 0 15616 +withrel 1 6.957497 0 0 15617 +assignmentelimin 1 6.957497 0 0 15618 +publicationssom 1 6.957497 0 0 15619 +personali 1 6.957497 0 0 15620 +spici 1 6.957497 0 0 15621 +mild 1 6.957497 0 0 15622 +heidi 1 6.957497 0 0 15623 +victoria 1 6.957497 0 0 15624 +honeymoon 1 6.957497 0 0 15625 +kauai 1 6.957497 0 0 15626 +hurrican 1 6.957497 0 0 15627 +iniki 1 6.957497 0 0 15628 +puget 1 6.957497 0 0 15629 +dare 1 6.957497 0 0 15630 +sadli 1 6.957497 0 0 15631 +passeng 1 6.957497 0 0 15632 +damon 1 6.957497 0 0 15633 +jdrees 1 6.957497 0 0 15634 +hazop 1 6.957497 0 0 15635 +waxahachi 1 6.957497 0 0 15636 +hildreth 1 6.957497 0 0 15637 +pagejon 1 6.957497 0 0 15638 +reesepost 1 6.957497 0 0 15639 +groupdepart 1 6.957497 0 0 15640 +catastroph 1 6.957497 0 0 15641 +wider 1 6.957497 0 0 15642 +siang 1 6.957497 0 0 15643 +dolin 1 6.957497 0 0 15644 +statechart 1 6.957497 0 0 15645 +como 1 6.957497 0 0 15646 +anddean 1 6.957497 0 0 15647 +lojack 1 6.957497 0 0 15648 +loph 1 6.957497 0 0 15649 +eseattl 1 6.957497 0 0 15650 +orsieg 1 6.957497 0 0 15651 +paintbal 1 6.957497 0 0 15652 +yahoojlo 1 6.957497 0 0 15653 +sherman 1 6.957497 0 0 15654 +shermanjoebob 1 6.957497 0 0 15655 +usami 1 6.957497 0 0 15656 +designinform 1 6.957497 0 0 15657 +useclass 1 6.957497 0 0 15658 +hcreat 1 6.957497 0 0 15659 +pagequ 1 6.957497 0 0 15660 +sarahsoftballstuff 1 6.957497 0 0 15661 +pagesif 1 6.957497 0 0 15662 +tojoebob 1 6.957497 0 0 15663 +seim 1 6.957497 0 0 15664 +begunin 1 6.957497 0 0 15665 +lockean 1 6.957497 0 0 15666 +observedbehavior 1 6.957497 0 0 15667 +graduatingfrom 1 6.957497 0 0 15668 +volit 1 6.957497 0 0 15669 +taskw 1 6.957497 0 0 15670 +ambulatori 1 6.957497 0 0 15671 +academichierarchi 1 6.957497 0 0 15672 +powerjoanna 1 6.957497 0 0 15673 +pagehi 1 6.957497 0 0 15674 +uwneat 1 6.957497 0 0 15675 +matercool 1 6.957497 0 0 15676 +shadegraph 1 6.957497 0 0 15677 +uwduoton 1 6.957497 0 0 15678 +reproductionmi 1 6.957497 0 0 15679 +matermost 1 6.957497 0 0 15680 +employmentpubl 1 6.957497 0 0 15681 +stollnitz 1 6.957497 0 0 15682 +duoton 1 6.957497 0 0 15683 +lifepast 1 6.957497 0 0 15684 +homesdiversionsgend 1 6.957497 0 0 15685 +issuesstatu 1 6.957497 0 0 15686 +sciencenow 1 6.957497 0 0 15687 +pagefeminist 1 6.957497 0 0 15688 +onlineultim 1 6.957497 0 0 15689 +frisbeefun 1 6.957497 0 0 15690 +stufffroggi 1 6.957497 0 0 15691 +quotesbrad 1 6.957497 0 0 15692 +musicevan 1 6.957497 0 0 15693 +jokes 1 6.957497 0 0 15694 +pagesmi 1 6.957497 0 0 15695 +herojpow 1 6.957497 0 0 15696 +finderresumlinkslast 1 6.957497 0 0 15697 +jshake 1 6.957497 0 0 15698 +alemanyjuan 1 6.957497 0 0 15699 +alemani 1 6.957497 0 0 15700 +pulli 1 6.957497 0 0 15701 +antero 1 6.957497 0 0 15702 +subdivis 1 6.957497 0 0 15703 +pagekari 1 6.957497 0 0 15704 +pullii 1 6.957497 0 0 15705 +thesedisciplin 1 6.957497 0 0 15706 +uwfor 1 6.957497 0 0 15707 +pixar 1 6.957497 0 0 15708 +lindashapiro 1 6.957497 0 0 15709 +andjohn 1 6.957497 0 0 15710 +mcdonald 1 6.957497 0 0 15711 +andhugu 1 6.957497 0 0 15712 +szeliski 1 6.957497 0 0 15713 +tribor 1 6.957497 0 0 15714 +triplet 1 6.957497 0 0 15715 +recognitionsystem 1 6.957497 0 0 15716 +surfacereconstruct 1 6.957497 0 0 15717 +baselin 1 6.957497 0 0 15718 +camerasystem 1 6.957497 0 0 15719 +waveletanalysi 1 6.957497 0 0 15720 +rigidregistr 1 6.957497 0 0 15721 +architecturesystem 1 6.957497 0 0 15722 +susanegg 1 6.957497 0 0 15723 +brianbershad 1 6.957497 0 0 15724 +eacutesum 1 6.957497 0 0 15725 +eacut 1 6.957497 0 0 15726 +kapu 1 6.957497 0 0 15727 +takavainionti 1 6.957497 0 0 15728 +oulu 1 6.957497 0 0 15729 +finland 1 6.957497 0 0 15730 +karlinanna 1 6.957497 0 0 15731 +rochel 1 6.957497 0 0 15732 +karlinassoci 1 6.957497 0 0 15733 +sincejuli 1 6.957497 0 0 15734 +paperskarlin 1 6.957497 0 0 15735 +yeunghom 1 6.957497 0 0 15736 +yeungperson 1 6.957497 0 0 15737 +infomi 1 6.957497 0 0 15738 +picturemi 1 6.957497 0 0 15739 +researchtelnet 1 6.957497 0 0 15740 +machinessend 1 6.957497 0 0 15741 +bddtcl 1 6.957497 0 0 15742 +decisiondiagram 1 6.957497 0 0 15743 +suburban 1 6.957497 0 0 15744 +oti 1 6.957497 0 0 15745 +questa 1 6.957497 0 0 15746 +pagina 1 6.957497 0 0 15747 +anch 1 6.957497 0 0 15748 +italiano 1 6.957497 0 0 15749 +researchsoftbotsplanningkrselect 1 6.957497 0 0 15750 +publicationscurriculum 1 6.957497 0 0 15751 +inpostscriptrandom 1 6.957497 0 0 15752 +hackingwordbot 1 6.957497 0 0 15753 +godless 1 6.957497 0 0 15754 +pinko 1 6.957497 0 0 15755 +dislik 1 6.957497 0 0 15756 +ellenmarcruben 1 6.957497 0 0 15757 +laurennickrich 1 6.957497 0 0 15758 +joannavivek 1 6.957497 0 0 15759 +keithgolden 1 6.957497 0 0 15760 +kgolden 1 6.957497 0 0 15761 +kingsum 1 6.957497 0 0 15762 +pcct 1 6.957497 0 0 15763 +feedbackresearchmi 1 6.957497 0 0 15764 +toolspap 1 6.957497 0 0 15765 +icsm 1 6.957497 0 0 15766 +griswold 1 6.957497 0 0 15767 +sorcererpcct 1 6.957497 0 0 15768 +terrenc 1 6.957497 0 0 15769 +parr 1 6.957497 0 0 15770 +newbiesresumepleasedrop 1 6.957497 0 0 15771 +mailto 1 6.957497 0 0 15772 +kongchines 1 6.957497 0 0 15773 +kongsingapor 1 6.957497 0 0 15774 +sitessingapor 1 6.957497 0 0 15775 +websom 1 6.957497 0 0 15776 +friendstom 1 6.957497 0 0 15777 +liew 1 6.957497 0 0 15778 +fook 1 6.957497 0 0 15779 +jiang 1 6.957497 0 0 15780 +weidongu 1 6.957497 0 0 15781 +relatedunivers 1 6.957497 0 0 15782 +webserv 1 6.957497 0 0 15783 +storeinvestmentsfre 1 6.957497 0 0 15784 +analysismisc 1 6.957497 0 0 15785 +boldingkwb 1 6.957497 0 0 15786 +juvenil 1 6.957497 0 0 15787 +squirt 1 6.957497 0 0 15788 +hunk 1 6.957497 0 0 15789 +cling 1 6.957497 0 0 15790 +rudimentari 1 6.957497 0 0 15791 +eat 1 6.957497 0 0 15792 +dennett 1 6.957497 0 0 15793 +latencylan 1 6.957497 0 0 15794 +researchha 1 6.957497 0 0 15795 +formass 1 6.957497 0 0 15796 +comethyakutak 1 6.957497 0 0 15797 +moustach 1 6.957497 0 0 15798 +ladnerrichard 1 6.957497 0 0 15799 +ladnerprofessor 1 6.957497 0 0 15800 +biographyresearch 1 6.957497 0 0 15801 +studentsteachingcomput 1 6.957497 0 0 15802 +mcmurchiedepart 1 6.957497 0 0 15803 +integratedsystem 1 6.957497 0 0 15804 +hework 1 6.957497 0 0 15805 +theconstruct 1 6.957497 0 0 15806 +hamiltonian 1 6.957497 0 0 15807 +coauthorof 1 6.957497 0 0 15808 +meld 1 6.957497 0 0 15809 +abinitio 1 6.957497 0 0 15810 +wirec 1 6.957497 0 0 15811 +aschemat 1 6.957497 0 0 15812 +withschemat 1 6.957497 0 0 15813 +concis 1 6.957497 0 0 15814 +parameteriz 1 6.957497 0 0 15815 +andcommerci 1 6.957497 0 0 15816 +hardwareenviron 1 6.957497 0 0 15817 +andsubsystem 1 6.957497 0 0 15818 +levesondepart 1 6.957497 0 0 15819 +mathand 1 6.957497 0 0 15820 +misanthrop 1 6.957497 0 0 15821 +aform 1 6.957497 0 0 15822 +airspac 1 6.957497 0 0 15823 +theiroffici 1 6.957497 0 0 15824 +safetyresearch 1 6.957497 0 0 15825 +subtop 1 6.957497 0 0 15826 +commissionon 1 6.957497 0 0 15827 +levesoni 1 6.957497 0 0 15828 +systemsaward 1 6.957497 0 0 15829 +promotingrespons 1 6.957497 0 0 15830 +propertyar 1 6.957497 0 0 15831 +stake 1 6.957497 0 0 15832 +keynoteaddress 1 6.957497 0 0 15833 +steam 1 6.957497 0 0 15834 +hazardanalysi 1 6.957497 0 0 15835 +writtenin 1 6.957497 0 0 15836 +newrequir 1 6.957497 0 0 15837 +cockpit 1 6.957497 0 0 15838 +problemsand 1 6.957497 0 0 15839 +projectcal 1 6.957497 0 0 15840 +theetch 1 6.957497 0 0 15841 +consecutiveacm 1 6.957497 0 0 15842 +symposia 1 6.957497 0 0 15843 +universityand 1 6.957497 0 0 15844 +machineryand 1 6.957497 0 0 15845 +survivedlevi 1 6.957497 0 0 15846 +haveal 1 6.957497 0 0 15847 +glu 1 6.957497 0 0 15848 +potato 1 6.957497 0 0 15849 +parlor 1 6.957497 0 0 15850 +publicationsreduc 1 6.957497 0 0 15851 +implementablesimultan 1 6.957497 0 0 15852 +joen 1 6.957497 0 0 15853 +edwardd 1 6.957497 0 0 15854 +recover 1 6.957497 0 0 15855 +projectop 1 6.957497 0 0 15856 +tunedto 1 6.957497 0 0 15857 +numberof 1 6.957497 0 0 15858 +andcooper 1 6.957497 0 0 15859 +directlycommun 1 6.957497 0 0 15860 +addressspac 1 6.957497 0 0 15861 +domainthat 1 6.957497 0 0 15862 +oneprocess 1 6.957497 0 0 15863 +protectionstructur 1 6.957497 0 0 15864 +relationshipbetween 1 6.957497 0 0 15865 +canimprov 1 6.957497 0 0 15866 +cooperatingappl 1 6.957497 0 0 15867 +lopez 1 6.957497 0 0 15868 +bjorn 1 6.957497 0 0 15869 +benson 1 6.957497 0 0 15870 +lopezgu 1 6.957497 0 0 15871 +lopezlopez 1 6.957497 0 0 15872 +dissertationresearch 1 6.957497 0 0 15873 +publicationsgu 1 6.957497 0 0 15874 +mayoh 1 6.957497 0 0 15875 +tougu 1 6.957497 0 0 15876 +jann 1 6.957497 0 0 15877 +penjam 1 6.957497 0 0 15878 +constraintprogram 1 6.957497 0 0 15879 +instituteseri 1 6.957497 0 0 15880 +publisheda 1 6.957497 0 0 15881 +tutorialsi 1 6.957497 0 0 15882 +conferencein 1 6.957497 0 0 15883 +itsextens 1 6.957497 0 0 15884 +tutorialshav 1 6.957497 0 0 15885 +introductorysurvei 1 6.957497 0 0 15886 +academicresearch 1 6.957497 0 0 15887 +attende 1 6.957497 0 0 15888 +weespeci 1 6.957497 0 0 15889 +requestguidelin 1 6.957497 0 0 15890 +theoopsla 1 6.957497 0 0 15891 +enthusiast 1 6.957497 0 0 15892 +proposalswithout 1 6.957497 0 0 15893 +notif 1 6.957497 0 0 15894 +withcamera 1 6.957497 0 0 15895 +jimi 1 6.957497 0 0 15896 +omid 1 6.957497 0 0 15897 +madani 1 6.957497 0 0 15898 +bhello 1 6.957497 0 0 15899 +enjoytheori 1 6.957497 0 0 15900 +islamicarchitectur 1 6.957497 0 0 15901 +isfahan 1 6.957497 0 0 15902 +nomine 1 6.957497 0 0 15903 +iran 1 6.957497 0 0 15904 +perkowitznewsflash 1 6.957497 0 0 15905 +blond 1 6.957497 0 0 15906 +randomfavorit 1 6.957497 0 0 15907 +sheba 1 6.957497 0 0 15908 +voyeur 1 6.957497 0 0 15909 +grooveneedl 1 6.957497 0 0 15910 +espressoresumemik 1 6.957497 0 0 15911 +langheinrich 1 6.957497 0 0 15912 +bielefeld 1 6.957497 0 0 15913 +marclang 1 6.957497 0 0 15914 +homepagemarc 1 6.957497 0 0 15915 +langheinrichuniversitt 1 6.957497 0 0 15916 +washingtontechnisch 1 6.957497 0 0 15917 +fakultt 1 6.957497 0 0 15918 +scienceemail 1 6.957497 0 0 15919 +imlangh 1 6.957497 0 0 15920 +techfak 1 6.957497 0 0 15921 +eduabout 1 6.957497 0 0 15922 +myselfi 1 6.957497 0 0 15923 +thefulbright 1 6.957497 0 0 15924 +depthinform 1 6.957497 0 0 15925 +biopost 1 6.957497 0 0 15926 +addressa 1 6.957497 0 0 15927 +mastersat 1 6.957497 0 0 15928 +homeschoolgermanyringstra 1 6.957497 0 0 15929 +maintalphon 1 6.957497 0 0 15930 +paulusplatz 1 6.957497 0 0 15931 +bielefeldphon 1 6.957497 0 0 15932 +woodlawn 1 6.957497 0 0 15933 +formatmarc 1 6.957497 0 0 15934 +bentlei 1 6.957497 0 0 15935 +interestsgraph 1 6.957497 0 0 15936 +coimag 1 6.957497 0 0 15937 +devleop 1 6.957497 0 0 15938 +contol 1 6.957497 0 0 15939 +cansimultan 1 6.957497 0 0 15940 +publicationsbak 1 6.957497 0 0 15941 +bohu 1 6.957497 0 0 15942 +margaret 1 6.957497 0 0 15943 +sorento 1 6.957497 0 0 15944 +apparatu 1 6.957497 0 0 15945 +compileri 1 6.957497 0 0 15946 +beast 1 6.957497 0 0 15947 +shortterm 1 6.957497 0 0 15948 +basedsystem 1 6.957497 0 0 15949 +canbenefit 1 6.957497 0 0 15950 +onprogram 1 6.957497 0 0 15951 +automaticdynam 1 6.957497 0 0 15952 +frequentlymiscellan 1 6.957497 0 0 15953 +importancefrom 1 6.957497 0 0 15954 +abuwhi 1 6.957497 0 0 15955 +campaign 1 6.957497 0 0 15956 +broadwai 1 6.957497 0 0 15957 +projectsgonna 1 6.957497 0 0 15958 +teenag 1 6.957497 0 0 15959 +lobotomi 1 6.957497 0 0 15960 +ramonesi 1 6.957497 0 0 15961 +projectconcern 1 6.957497 0 0 15962 +copiou 1 6.957497 0 0 15963 +expatri 1 6.957497 0 0 15964 +onchaot 1 6.957497 0 0 15965 +routingwith 1 6.957497 0 0 15966 +torusnetwork 1 6.957497 0 0 15967 +thecranium 1 6.957497 0 0 15968 +compatiblewith 1 6.957497 0 0 15969 +netlist 1 6.957497 0 0 15970 +calledgemini 1 6.957497 0 0 15971 +schemat 1 6.957497 0 0 15972 +cranium 1 6.957497 0 0 15973 +packetrout 1 6.957497 0 0 15974 +andcommun 1 6.957497 0 0 15975 +tomactest 1 6.957497 0 0 15976 +arlington 1 6.957497 0 0 15977 +livein 1 6.957497 0 0 15978 +ofballard 1 6.957497 0 0 15979 +artworkcr 1 6.957497 0 0 15980 +photoshop 1 6.957497 0 0 15981 +ownedthi 1 6.957497 0 0 15982 +onlyth 1 6.957497 0 0 15983 +correctlyguess 1 6.957497 0 0 15984 +toriddl 1 6.957497 0 0 15985 +jour 1 6.957497 0 0 15986 +honei 1 6.957497 0 0 15987 +myuncl 1 6.957497 0 0 15988 +edmonton 1 6.957497 0 0 15989 +alberta 1 6.957497 0 0 15990 +linkschairman 1 6.957497 0 0 15991 +linksnorm 1 6.957497 0 0 15992 +halcyon 1 6.957497 0 0 15993 +eugen 1 6.957497 0 0 15994 +spafford 1 6.957497 0 0 15995 +randi 1 6.957497 0 0 15996 +pausch 1 6.957497 0 0 15997 +wallach 1 6.957497 0 0 15998 +scool 1 6.957497 0 0 15999 +contacthttp 1 6.957497 0 0 16000 +grewup 1 6.957497 0 0 16001 +sseldorf 1 6.957497 0 0 16002 +fromrutg 1 6.957497 0 0 16003 +mitr 1 6.957497 0 0 16004 +proprietor 1 6.957497 0 0 16005 +companythat 1 6.957497 0 0 16006 +setof 1 6.957497 0 0 16007 +chasi 1 6.957497 0 0 16008 +univoic 1 6.957497 0 0 16009 +cardsand 1 6.957497 0 0 16010 +vxwork 1 6.957497 0 0 16011 +compellingperform 1 6.957497 0 0 16012 +tosimilar 1 6.957497 0 0 16013 +anextens 1 6.957497 0 0 16014 +betterperform 1 6.957497 0 0 16015 +conventionaloper 1 6.957497 0 0 16016 +technicalconfer 1 6.957497 0 0 16017 +describeshow 1 6.957497 0 0 16018 +deflect 1 6.957497 0 0 16019 +fulgham 1 6.957497 0 0 16020 +homepagemenghe 1 6.957497 0 0 16021 +edubox 1 6.957497 0 0 16022 +findimag 1 6.957497 0 0 16023 +virag 1 6.957497 0 0 16024 +andqbicar 1 6.957497 0 0 16025 +singaporesingapor 1 6.957497 0 0 16026 +infomap 1 6.957497 0 0 16027 +andstatist 1 6.957497 0 0 16028 +singaporeonlin 1 6.957497 0 0 16029 +boardi 1 6.957497 0 0 16030 +anintellig 1 6.957497 0 0 16031 +menghe 1 6.957497 0 0 16032 +pagemichael 1 6.957497 0 0 16033 +ernsti 1 6.957497 0 0 16034 +riceunivers 1 6.957497 0 0 16035 +programanalysi 1 6.957497 0 0 16036 +coloc 1 6.957497 0 0 16037 +semanticsi 1 6.957497 0 0 16038 +resourcesfor 1 6.957497 0 0 16039 +slip 1 6.957497 0 0 16040 +possibleinterest 1 6.957497 0 0 16041 +mernst 1 6.957497 0 0 16042 +rttemberg 1 6.957497 0 0 16043 +anotherpart 1 6.957497 0 0 16044 +biberach 1 6.957497 0 0 16045 +swabia 1 6.957497 0 0 16046 +oberschwaben 1 6.957497 0 0 16047 +solitud 1 6.957497 0 0 16048 +dieangst 1 6.957497 0 0 16049 +torwart 1 6.957497 0 0 16050 +beim 1 6.957497 0 0 16051 +elfmet 1 6.957497 0 0 16052 +handk 1 6.957497 0 0 16053 +merengu 1 6.957497 0 0 16054 +publicationssepar 1 6.957497 0 0 16055 +olympiad 1 6.957497 0 0 16056 +yerewan 1 6.957497 0 0 16057 +csek 1 6.957497 0 0 16058 +csebi 1 6.957497 0 0 16059 +cse 1 6.957497 0 0 16060 +studentsimag 1 6.957497 0 0 16061 +engineeringy 1 6.957497 0 0 16062 +realaudio 1 6.957497 0 0 16063 +linksand 1 6.957497 0 0 16064 +toil 1 6.957497 0 0 16065 +unto 1 6.957497 0 0 16066 +glorywa 1 6.957497 0 0 16067 +nara 1 6.957497 0 0 16068 +interestspap 1 6.957497 0 0 16069 +himanshu 1 6.957497 0 0 16070 +nautiy 1 6.957497 0 0 16071 +pagehimanshu 1 6.957497 0 0 16072 +nautiyalthi 1 6.957497 0 0 16073 +nautiyaldept 1 6.957497 0 0 16074 +edugod 1 6.957497 0 0 16075 +gift 1 6.957497 0 0 16076 +personkind 1 6.957497 0 0 16077 +pratchett 1 6.957497 0 0 16078 +wodehouseth 1 6.957497 0 0 16079 +aviat 1 6.957497 0 0 16080 +numismat 1 6.957497 0 0 16081 +profound 1 6.957497 0 0 16082 +mediocr 1 6.957497 0 0 16083 +bemoan 1 6.957497 0 0 16084 +hype 1 6.957497 0 0 16085 +skeptic 1 6.957497 0 0 16086 +automaticconstruct 1 6.957497 0 0 16087 +wrapper 1 6.957497 0 0 16088 +beeninvolv 1 6.957497 0 0 16089 +glbal 1 6.957497 0 0 16090 +infrmatin 1 6.957497 0 0 16091 +sperhighwai 1 6.957497 0 0 16092 +meter 1 6.957497 0 0 16093 +ronald 1 6.957497 0 0 16094 +reagan 1 6.957497 0 0 16095 +wendel 1 6.957497 0 0 16096 +berri 1 6.957497 0 0 16097 +constitutesgood 1 6.957497 0 0 16098 +kushmerick 1 6.957497 0 0 16099 +debut 1 6.957497 0 0 16100 +washingtonoffic 1 6.957497 0 0 16101 +fouryear 1 6.957497 0 0 16102 +lancast 1 6.957497 0 0 16103 +holon 1 6.957497 0 0 16104 +amta 1 6.957497 0 0 16105 +aremondai 1 6.957497 0 0 16106 +tose 1 6.957497 0 0 16107 +graphicsprogram 1 6.957497 0 0 16108 +riderlink 1 6.957497 0 0 16109 +seattletransport 1 6.957497 0 0 16110 +tequila 1 6.957497 0 0 16111 +einstein 1 6.957497 0 0 16112 +pageaft 1 6.957497 0 0 16113 +puppi 1 6.957497 0 0 16114 +updatedthu 1 6.957497 0 0 16115 +beth 1 6.957497 0 0 16116 +pardo 1 6.957497 0 0 16117 +courtesei 1 6.957497 0 0 16118 +untitl 1 6.957497 0 0 16119 +morri 1 6.957497 0 0 16120 +pardodepart 1 6.957497 0 0 16121 +washingtonusapardo 1 6.957497 0 0 16122 +edunot 1 6.957497 0 0 16123 +asimgsrc 1 6.957497 0 0 16124 +blueribbon 1 6.957497 0 0 16125 +rib_trn_plain_sm 1 6.957497 0 0 16126 +opposit 1 6.957497 0 0 16127 +speechprohibit 1 6.957497 0 0 16128 +academicsom 1 6.957497 0 0 16129 +papersi 1 6.957497 0 0 16130 +rtcg 1 6.957497 0 0 16131 +architectureandcompil 1 6.957497 0 0 16132 +otherpeopl 1 6.957497 0 0 16133 +stylenon 1 6.957497 0 0 16134 +academicfeatur 1 6.957497 0 0 16135 +itemsbicyclesbusinessescomputersfoodhumori 1 6.957497 0 0 16136 +weirdnesslinux 1 6.957497 0 0 16137 +journalmusicgoofi 1 6.957497 0 0 16138 +politicssci 1 6.957497 0 0 16139 +dant 1 6.957497 0 0 16140 +trepan 1 6.957497 0 0 16141 +wors 1 6.957497 0 0 16142 +newhous 1 6.957497 0 0 16143 +yesterdai 1 6.957497 0 0 16144 +stuffpardo 1 6.957497 0 0 16145 +pardi 1 6.957497 0 0 16146 +drizzl 1 6.957497 0 0 16147 +przemek 1 6.957497 0 0 16148 +interast 1 6.957497 0 0 16149 +notbusi 1 6.957497 0 0 16150 +happenswhen 1 6.957497 0 0 16151 +projectsspinan 1 6.957497 0 0 16152 +systemsgroup 1 6.957497 0 0 16153 +polish 1 6.957497 0 0 16154 +myoffic 1 6.957497 0 0 16155 +iliv 1 6.957497 0 0 16156 +norwegian 1 6.957497 0 0 16157 +likea 1 6.957497 0 0 16158 +localchines 1 6.957497 0 0 16159 +mundan 1 6.957497 0 0 16160 +stuffi 1 6.957497 0 0 16161 +hotlink 1 6.957497 0 0 16162 +pagesstuff 1 6.957497 0 0 16163 +maintainmi 1 6.957497 0 0 16164 +mewher 1 6.957497 0 0 16165 +inmorgan 1 6.957497 0 0 16166 +fromuc 1 6.957497 0 0 16167 +andy 1 6.957497 0 0 16168 +ididn 1 6.957497 0 0 16169 +ialso 1 6.957497 0 0 16170 +stuffwhil 1 6.957497 0 0 16171 +relatedact 1 6.957497 0 0 16172 +evengot 1 6.957497 0 0 16173 +marriag 1 6.957497 0 0 16174 +joann 1 6.957497 0 0 16175 +anexcus 1 6.957497 0 0 16176 +ofbergen 1 6.957497 0 0 16177 +hillier 1 6.957497 0 0 16178 +returnedto 1 6.957497 0 0 16179 +rollerblad 1 6.957497 0 0 16180 +wasnow 1 6.957497 0 0 16181 +hewlettpackard 1 6.957497 0 0 16182 +vengeanc 1 6.957497 0 0 16183 +intwo 1 6.957497 0 0 16184 +inseason 1 6.957497 0 0 16185 +justcommut 1 6.957497 0 0 16186 +lindyhop 1 6.957497 0 0 16187 +linethat 1 6.957497 0 0 16188 +doctyp 1 6.957497 0 0 16189 +ietf 1 6.957497 0 0 16190 +refresh 1 6.957497 0 0 16191 +frdric 1 6.957497 0 0 16192 +lcommun 1 6.957497 0 0 16193 +dani 1 6.957497 0 0 16194 +corto 1 6.957497 0 0 16195 +maltes 1 6.957497 0 0 16196 +venis 1 6.957497 0 0 16197 +traditionn 1 6.957497 0 0 16198 +systemher 1 6.957497 0 0 16199 +wxyc 1 6.957497 0 0 16200 +usai 1 6.957497 0 0 16201 +thisup 1 6.957497 0 0 16202 +weatherreport 1 6.957497 0 0 16203 +sneak 1 6.957497 0 0 16204 +drumhel 1 6.957497 0 0 16205 +rainier 1 6.957497 0 0 16206 +cleardai 1 6.957497 0 0 16207 +astructur 1 6.957497 0 0 16208 +anatomist 1 6.957497 0 0 16209 +internetracquetbal 1 6.957497 0 0 16210 +rotisseriebasebal 1 6.957497 0 0 16211 +fromusa 1 6.957497 0 0 16212 +africancichlid 1 6.957497 0 0 16213 +honolulu 1 6.957497 0 0 16214 +kalalau 1 6.957497 0 0 16215 +gambl 1 6.957497 0 0 16216 +darn 1 6.957497 0 0 16217 +javafamili 1 6.957497 0 0 16218 +mydad 1 6.957497 0 0 16219 +whoi 1 6.957497 0 0 16220 +polem 1 6.957497 0 0 16221 +emanuel 1 6.957497 0 0 16222 +swedenborg 1 6.957497 0 0 16223 +nahl 1 6.957497 0 0 16224 +whoprovid 1 6.957497 0 0 16225 +realtor 1 6.957497 0 0 16226 +uncl 1 6.957497 0 0 16227 +bioscienc 1 6.957497 0 0 16228 +bookmarksif 1 6.957497 0 0 16229 +twilight 1 6.957497 0 0 16230 +grimm 1 6.957497 0 0 16231 +likebrian 1 6.957497 0 0 16232 +andwayn 1 6.957497 0 0 16233 +ofinterpret 1 6.957497 0 0 16234 +rockyhom 1 6.957497 0 0 16235 +lobo 1 6.957497 0 0 16236 +listrandom 1 6.957497 0 0 16237 +limb 1 6.957497 0 0 16238 +arthroscop 1 6.957497 0 0 16239 +wrist 1 6.957497 0 0 16240 +dylansaid 1 6.957497 0 0 16241 +flowbe 1 6.957497 0 0 16242 +beingexperiment 1 6.957497 0 0 16243 +somepictur 1 6.957497 0 0 16244 +eatsomeon 1 6.957497 0 0 16245 +sincer 1 6.957497 0 0 16246 +forexampl 1 6.957497 0 0 16247 +leftth 1 6.957497 0 0 16248 +washingtonmemori 1 6.957497 0 0 16249 +researchdepart 1 6.957497 0 0 16250 +sharesth 1 6.957497 0 0 16251 +incur 1 6.957497 0 0 16252 +monitorappl 1 6.957497 0 0 16253 +resolvetlb 1 6.957497 0 0 16254 +tlbi 1 6.957497 0 0 16255 +severalmodern 1 6.957497 0 0 16256 +amultipl 1 6.957497 0 0 16257 +tlbperform 1 6.957497 0 0 16258 +ofwast 1 6.957497 0 0 16259 +todiffer 1 6.957497 0 0 16260 +constructingsuperpag 1 6.957497 0 0 16261 +ofmemori 1 6.957497 0 0 16262 +balancesth 1 6.957497 0 0 16263 +tlbmiss 1 6.957497 0 0 16264 +memorycopi 1 6.957497 0 0 16265 +misspattern 1 6.957497 0 0 16266 +attain 1 6.957497 0 0 16267 +largepag 1 6.957497 0 0 16268 +makea 1 6.957497 0 0 16269 +papersrom 1 6.957497 0 0 16270 +abstractpostscriptjava 1 6.957497 0 0 16271 +xjava 1 6.957497 0 0 16272 +benchmarkstoolsto 1 6.957497 0 0 16273 +vebeen 1 6.957497 0 0 16274 +yetpublicli 1 6.957497 0 0 16275 +etchhom 1 6.957497 0 0 16276 +documentationproject 1 6.957497 0 0 16277 +scam 1 6.957497 0 0 16278 +splash 1 6.957497 0 0 16279 +rogersrrog 1 6.957497 0 0 16280 +laboratri 1 6.957497 0 0 16281 +systol 1 6.957497 0 0 16282 +morpholog 1 6.957497 0 0 16283 +groundtruth 1 6.957497 0 0 16284 +environment 1 6.957497 0 0 16285 +ncee 1 6.957497 0 0 16286 +ag 1 6.957497 0 0 16287 +corn 1 6.957497 0 0 16288 +jessica 1 6.957497 0 0 16289 +squishi 1 6.957497 0 0 16290 +kuow 1 6.957497 0 0 16291 +stationi 1 6.957497 0 0 16292 +pecan 1 6.957497 0 0 16293 +seattlelast 1 6.957497 0 0 16294 +salisburysalisbur 1 6.957497 0 0 16295 +lifehistori 1 6.957497 0 0 16296 +vitacool 1 6.957497 0 0 16297 +pittsburghfor 1 6.957497 0 0 16298 +mnow 1 6.957497 0 0 16299 +strongbackground 1 6.957497 0 0 16300 +trash 1 6.957497 0 0 16301 +tocqeuvil 1 6.957497 0 0 16302 +tiresom 1 6.957497 0 0 16303 +exercisepolit 1 6.957497 0 0 16304 +tocurr 1 6.957497 0 0 16305 +merri 1 6.957497 0 0 16306 +onan 1 6.957497 0 0 16307 +projectsspinspin 1 6.957497 0 0 16308 +omnifemtokernel 1 6.957497 0 0 16309 +writingspin 1 6.957497 0 0 16310 +napa 1 6.957497 0 0 16311 +timer 1 6.957497 0 0 16312 +hikingthi 1 6.957497 0 0 16313 +segal 1 6.957497 0 0 16314 +segaldepart 1 6.957497 0 0 16315 +archeri 1 6.957497 0 0 16316 +printf 1 6.957497 0 0 16317 +cologn 1 6.957497 0 0 16318 +putchar 1 6.957497 0 0 16319 +bergstefan 1 6.957497 0 0 16320 +sgberg 1 6.957497 0 0 16321 +mittler 1 6.957497 0 0 16322 +thgrade 1 6.957497 0 0 16323 +schillergymnasium 1 6.957497 0 0 16324 +statesto 1 6.957497 0 0 16325 +distinctionin 1 6.957497 0 0 16326 +fromindiana 1 6.957497 0 0 16327 +momenth 1 6.957497 0 0 16328 +thiscenturi 1 6.957497 0 0 16329 +yourselfsometh 1 6.957497 0 0 16330 +particularsolut 1 6.957497 0 0 16331 +sall 1 6.957497 0 0 16332 +carriag 1 6.957497 0 0 16333 +inpostscript 1 6.957497 0 0 16334 +andtex 1 6.957497 0 0 16335 +shadegreet 1 6.957497 0 0 16336 +salut 1 6.957497 0 0 16337 +dubcs 1 6.957497 0 0 16338 +renderingof 1 6.957497 0 0 16339 +walkthruproject 1 6.957497 0 0 16340 +amonglot 1 6.957497 0 0 16341 +paperdescrib 1 6.957497 0 0 16342 +thepictur 1 6.957497 0 0 16343 +aspectsof 1 6.957497 0 0 16344 +thegraph 1 6.957497 0 0 16345 +scrunch 1 6.957497 0 0 16346 +leungshun 1 6.957497 0 0 16347 +shuntak 1 6.957497 0 0 16348 +koga 1 6.957497 0 0 16349 +skoga 1 6.957497 0 0 16350 +bynow 1 6.957497 0 0 16351 +alsoheavili 1 6.957497 0 0 16352 +andgovern 1 6.957497 0 0 16353 +alic 1 6.957497 0 0 16354 +destroi 1 6.957497 0 0 16355 +hassounit 1 6.957497 0 0 16356 +whoturn 1 6.957497 0 0 16357 +onarchitectur 1 6.957497 0 0 16358 +carlebel 1 6.957497 0 0 16359 +deede 1 6.957497 0 0 16360 +simultaneousmultithread 1 6.957497 0 0 16361 +tomultithread 1 6.957497 0 0 16362 +controlsystem 1 6.957497 0 0 16363 +patio 1 6.957497 0 0 16364 +workspac 1 6.957497 0 0 16365 +stottler 1 6.957497 0 0 16366 +henk 1 6.957497 0 0 16367 +oondhiu 1 6.957497 0 0 16368 +mango 1 6.957497 0 0 16369 +phad 1 6.957497 0 0 16370 +thai 1 6.957497 0 0 16371 +kung 1 6.957497 0 0 16372 +beverag 1 6.957497 0 0 16373 +screwdriv 1 6.957497 0 0 16374 +scotch 1 6.957497 0 0 16375 +ic 1 6.957497 0 0 16376 +dire 1 6.957497 0 0 16377 +pink 1 6.957497 0 0 16378 +collin 1 6.957497 0 0 16379 +petti 1 6.957497 0 0 16380 +sparekh 1 6.957497 0 0 16381 +fishcam 1 6.957497 0 0 16382 +memorialhappi 1 6.957497 0 0 16383 +kay 1 6.957497 0 0 16384 +pasti 1 6.957497 0 0 16385 +ur 1 6.957497 0 0 16386 +pro 1 6.957497 0 0 16387 +wedgwood 1 6.957497 0 0 16388 +diet 1 6.957497 0 0 16389 +roast 1 6.957497 0 0 16390 +bagel 1 6.957497 0 0 16391 +racer 1 6.957497 0 0 16392 +tini 1 6.957497 0 0 16393 +pinki 1 6.957497 0 0 16394 +phantom 1 6.957497 0 0 16395 +scrub 1 6.957497 0 0 16396 +thehomepag 1 6.957497 0 0 16397 +ofsung 1 6.957497 0 0 16398 +eunchoi 1 6.957497 0 0 16399 +myschoollifemi 1 6.957497 0 0 16400 +zplcompil 1 6.957497 0 0 16401 +beenspend 1 6.957497 0 0 16402 +communicationgener 1 6.957497 0 0 16403 +architechtur 1 6.957497 0 0 16404 +communicationlibrari 1 6.957497 0 0 16405 +programson 1 6.957497 0 0 16406 +nodeperform 1 6.957497 0 0 16407 +alsobeen 1 6.957497 0 0 16408 +chaosrout 1 6.957497 0 0 16409 +thatexperi 1 6.957497 0 0 16410 +inzpl 1 6.957497 0 0 16411 +myjunior 1 6.957497 0 0 16412 +dinner 1 6.957497 0 0 16413 +samewithout 1 6.957497 0 0 16414 +twosoccerteam 1 6.957497 0 0 16415 +cousin 1 6.957497 0 0 16416 +recdivis 1 6.957497 0 0 16417 +andcoop 1 6.957497 0 0 16418 +sacrifiedmi 1 6.957497 0 0 16419 +usualstep 1 6.957497 0 0 16420 +aerobicsclass 1 6.957497 0 0 16421 +trainingclass 1 6.957497 0 0 16422 +abit 1 6.957497 0 0 16423 +shakespear 1 6.957497 0 0 16424 +publictelevis 1 6.957497 0 0 16425 +classicalmus 1 6.957497 0 0 16426 +myotherlif 1 6.957497 0 0 16427 +sungeun 1 6.957497 0 0 16428 +multiprocessorsenviron 1 6.957497 0 0 16429 +frommi 1 6.957497 0 0 16430 +timeappl 1 6.957497 0 0 16431 +innow 1 6.957497 0 0 16432 +uniprogram 1 6.957497 0 0 16433 +goodglob 1 6.957497 0 0 16434 +cvpublic 1 6.957497 0 0 16435 +worldvietnameseresourc 1 6.957497 0 0 16436 +netcyclingplayground 1 6.957497 0 0 16437 +homepageyour 1 6.957497 0 0 16438 +rusti 1 6.957497 0 0 16439 +projectsopali 1 6.957497 0 0 16440 +thisexperi 1 6.957497 0 0 16441 +distrbut 1 6.957497 0 0 16442 +ecoop 1 6.957497 0 0 16443 +bosch 1 6.957497 0 0 16444 +messeng 1 6.957497 0 0 16445 +relatedgoodi 1 6.957497 0 0 16446 +clio 1 6.957497 0 0 16447 +andbrows 1 6.957497 0 0 16448 +kittyi 1 6.957497 0 0 16449 +siames 1 6.957497 0 0 16450 +memor 1 6.957497 0 0 16451 +therear 1 6.957497 0 0 16452 +tofind 1 6.957497 0 0 16453 +alsor 1 6.957497 0 0 16454 +classesi 1 6.957497 0 0 16455 +ofeight 1 6.957497 0 0 16456 +seminarlinux 1 6.957497 0 0 16457 +gameseverybodi 1 6.957497 0 0 16458 +gametom 1 6.957497 0 0 16459 +coolgam 1 6.957497 0 0 16460 +sleepingi 1 6.957497 0 0 16461 +crochet 1 6.957497 0 0 16462 +tlau 1 6.957497 0 0 16463 +receptionist 1 6.957497 0 0 16464 +thelma 1 6.957497 0 0 16465 +louis 1 6.957497 0 0 16466 +oyster 1 6.957497 0 0 16467 +surrealist 1 6.957497 0 0 16468 +propheci 1 6.957497 0 0 16469 +carol 1 6.957497 0 0 16470 +imprison 1 6.957497 0 0 16471 +captor 1 6.957497 0 0 16472 +washingtonsinc 1 6.957497 0 0 16473 +trial 1 6.957497 0 0 16474 +toanoth 1 6.957497 0 0 16475 +inmat 1 6.957497 0 0 16476 +wasrecaptur 1 6.957497 0 0 16477 +hisplight 1 6.957497 0 0 16478 +rescu 1 6.957497 0 0 16479 +ofwhat 1 6.957497 0 0 16480 +tracyk 1 6.957497 0 0 16481 +ieeesymposium 1 6.957497 0 0 16482 +measurementand 1 6.957497 0 0 16483 +usingo 1 6.957497 0 0 16484 +restor 1 6.957497 0 0 16485 +arctic 1 6.957497 0 0 16486 +esca 1 6.957497 0 0 16487 +volvo 1 6.957497 0 0 16488 +smithsonian 1 6.957497 0 0 16489 +uwin 1 6.957497 0 0 16490 +vanhilst 1 6.957497 0 0 16491 +angela 1 6.957497 0 0 16492 +vanhilstmichael 1 6.957497 0 0 16493 +vanhilstvanhilst 1 6.957497 0 0 16494 +edumvh 1 6.957497 0 0 16495 +usaclick 1 6.957497 0 0 16496 +personalmik 1 6.957497 0 0 16497 +theend 1 6.957497 0 0 16498 +udub 1 6.957497 0 0 16499 +atibm 1 6.957497 0 0 16500 +unterfac 1 6.957497 0 0 16501 +sdata 1 6.957497 0 0 16502 +maintainingcomput 1 6.957497 0 0 16503 +saoimagewhich 1 6.957497 0 0 16504 +astronom 1 6.957497 0 0 16505 +saoimag 1 6.957497 0 0 16506 +gnudistribut 1 6.957497 0 0 16507 +wyatt 1 6.957497 0 0 16508 +mandel 1 6.957497 0 0 16509 +minkfor 1 6.957497 0 0 16510 +seismologistsin 1 6.957497 0 0 16511 +theallianc 1 6.957497 0 0 16512 +colombiain 1 6.957497 0 0 16513 +studentsbrows 1 6.957497 0 0 16514 +pine 1 6.957497 0 0 16515 +shirei 1 6.957497 0 0 16516 +stenvik 1 6.957497 0 0 16517 +frommicrosoft 1 6.957497 0 0 16518 +sacrif 1 6.957497 0 0 16519 +isota 1 6.957497 0 0 16520 +inarchitectur 1 6.957497 0 0 16521 +wooden 1 6.957497 0 0 16522 +planningfrom 1 6.957497 0 0 16523 +mitand 1 6.957497 0 0 16524 +visualdesign 1 6.957497 0 0 16525 +andkayak 1 6.957497 0 0 16526 +bronson 1 6.957497 0 0 16527 +sebastien 1 6.957497 0 0 16528 +hilst 1 6.957497 0 0 16529 +vassilylong 1 6.957497 0 0 16530 +linki 1 6.957497 0 0 16531 +fewfil 1 6.957497 0 0 16532 +thisstuff 1 6.957497 0 0 16533 +quotesrussian 1 6.957497 0 0 16534 +pagesvari 1 6.957497 0 0 16535 +linksguid 1 6.957497 0 0 16536 +formsoth 1 6.957497 0 0 16537 +pagencsa 1 6.957497 0 0 16538 +andvortex 1 6.957497 0 0 16539 +befast 1 6.957497 0 0 16540 +themvi 1 6.957497 0 0 16541 +ourdepartment 1 6.957497 0 0 16542 +beenupgrad 1 6.957497 0 0 16543 +thezpl 1 6.957497 0 0 16544 +languageto 1 6.957497 0 0 16545 +repartit 1 6.957497 0 0 16546 +theslidesfrom 1 6.957497 0 0 16547 +toresourc 1 6.957497 0 0 16548 +eduobject 1 6.957497 0 0 16549 +pastor 1 6.957497 0 0 16550 +vybrasyvalsya 1 6.957497 0 0 16551 +okna 1 6.957497 0 0 16552 +pyatyi 1 6.957497 0 0 16553 +deystvov 1 6.957497 0 0 16554 +uwvirgil 1 6.957497 0 0 16555 +bourassavirgil 1 6.957497 0 0 16556 +interestsinclud 1 6.957497 0 0 16557 +boeingin 1 6.957497 0 0 16558 +scienceorgan 1 6.957497 0 0 16559 +bellevu 1 6.957497 0 0 16560 +arizonast 1 6.957497 0 0 16561 +temp 1 6.957497 0 0 16562 +accesswhat 1 6.957497 0 0 16563 +statusoccasion 1 6.957497 0 0 16564 +angelo 1 6.957497 0 0 16565 +scientistat 1 6.957497 0 0 16566 +morristown 1 6.957497 0 0 16567 +researchwork 1 6.957497 0 0 16568 +distributedsoftwar 1 6.957497 0 0 16569 +anatida 1 6.957497 0 0 16570 +indc 1 6.957497 0 0 16571 +foundher 1 6.957497 0 0 16572 +integrationof 1 6.957497 0 0 16573 +bydr 1 6.957497 0 0 16574 +fromrequir 1 6.957497 0 0 16575 +middletown 1 6.957497 0 0 16576 +purus 1 6.957497 0 0 16577 +lesserext 1 6.957497 0 0 16578 +ardent 1 6.957497 0 0 16579 +folow 1 6.957497 0 0 16580 +superson 1 6.957497 0 0 16581 +cowboi 1 6.957497 0 0 16582 +keen 1 6.957497 0 0 16583 +waltz 1 6.957497 0 0 16584 +foxtrot 1 6.957497 0 0 16585 +chacha 1 6.957497 0 0 16586 +rhumba 1 6.957497 0 0 16587 +mambo 1 6.957497 0 0 16588 +ecosoc 1 6.957497 0 0 16589 +rapidpopul 1 6.957497 0 0 16590 +prolifer 1 6.957497 0 0 16591 +ghalib 1 6.957497 0 0 16592 +centuryindian 1 6.957497 0 0 16593 +poet 1 6.957497 0 0 16594 +romant 1 6.957497 0 0 16595 +victorian 1 6.957497 0 0 16596 +obligatori 1 6.957497 0 0 16597 +sitesthat 1 6.957497 0 0 16598 +skywhoi 1 6.957497 0 0 16599 +wherechateau 1 6.957497 0 0 16600 +differentmemori 1 6.957497 0 0 16601 +beingdon 1 6.957497 0 0 16602 +rightnow 1 6.957497 0 0 16603 +peoplewho 1 6.957497 0 0 16604 +testwayn 1 6.957497 0 0 16605 +pagewilliam 1 6.957497 0 0 16606 +theadvisori 1 6.957497 0 0 16607 +airesearch 1 6.957497 0 0 16608 +ofintellig 1 6.957497 0 0 16609 +isco 1 6.957497 0 0 16610 +scad 1 6.957497 0 0 16611 +seattlewa 1 6.957497 0 0 16612 +sitesworldwid 1 6.957497 0 0 16613 +arehi 1 6.957497 0 0 16614 +aip 1 6.957497 0 0 16615 +exhaustivelist 1 6.957497 0 0 16616 +stormymountain 1 6.957497 0 0 16617 +galen 1 6.957497 0 0 16618 +desert 1 6.957497 0 0 16619 +morocco 1 6.957497 0 0 16620 +hsiehwilson 1 6.957497 0 0 16621 +hsiehi 1 6.957497 0 0 16622 +theschool 1 6.957497 0 0 16623 +engineeringatmit 1 6.957497 0 0 16624 +werefran 1 6.957497 0 0 16625 +kaashoekandbil 1 6.957497 0 0 16626 +publicationsselect 1 6.957497 0 0 16627 +interestswilson 1 6.957497 0 0 16628 +numberha 1 6.957497 0 0 16629 +whsieh 1 6.957497 0 0 16630 +keyoctob 1 6.957497 0 0 16631 +wolmanwolman 1 6.957497 0 0 16632 +eduworkcomput 1 6.957497 0 0 16633 +isroom 1 6.957497 0 0 16634 +executablesrocki 1 6.957497 0 0 16635 +performanceon 1 6.957497 0 0 16636 +trees 1 6.957497 0 0 16637 +fixha 1 6.957497 0 0 16638 +hallwolman 1 6.957497 0 0 16639 +diseasewolman 1 6.957497 0 0 16640 +lumber 1 6.957497 0 0 16641 +xiaohan 1 6.957497 0 0 16642 +xqin 1 6.957497 0 0 16643 +basedmultiprocessor 1 6.957497 0 0 16644 +nalluri 1 6.957497 0 0 16645 +processingon 1 6.957497 0 0 16646 +chinaread 1 6.957497 0 0 16647 +chinesesearch 1 6.957497 0 0 16648 +yasushi 1 6.957497 0 0 16649 +saitoyasushi 1 6.957497 0 0 16650 +saito 1 6.957497 0 0 16651 +atdepart 1 6.957497 0 0 16652 +workingwith 1 6.957497 0 0 16653 +andperson 1 6.957497 0 0 16654 +sightse 1 6.957497 0 0 16655 +trainer 1 6.957497 0 0 16656 +dvorak 1 6.957497 0 0 16657 +trycanva 1 6.957497 0 0 16658 +isra 1 6.957497 0 0 16659 +myundergradu 1 6.957497 0 0 16660 +hebrewunivers 1 6.957497 0 0 16661 +userwith 1 6.957497 0 0 16662 +orenetzioni 1 6.957497 0 0 16663 +sinai 1 6.957497 0 0 16664 +jeeptour 1 6.957497 0 0 16665 +professorand 1 6.957497 0 0 16666 +ingnieur 1 6.957497 0 0 16667 +doctorat 1 6.957497 0 0 16668 +decalcul 1 6.957497 0 0 16669 +technologygroup 1 6.957497 0 0 16670 +thesearea 1 6.957497 0 0 16671 +distinguishedvisitor 1 6.957497 0 0 16672 +asprogram 1 6.957497 0 0 16673 +sigarch 1 6.957497 0 0 16674 +eighteen 1 6.957497 0 0 16675 +professorba 1 6.957497 0 0 16676 +laboratoriesand 1 6.957497 0 0 16677 +inacademia 1 6.957497 0 0 16678 +hashad 1 6.957497 0 0 16679 +accent 1 6.957497 0 0 16680 +comparisonwith 1 6.957497 0 0 16681 +andisca 1 6.957497 0 0 16682 +optimisticapproach 1 6.957497 0 0 16683 +ceciland 1 6.957497 0 0 16684 +languageserv 1 6.957497 0 0 16685 +compilersystem 1 6.957497 0 0 16686 +andinterprocedur 1 6.957497 0 0 16687 +withfront 1 6.957497 0 0 16688 +chamberswa 1 6.957497 0 0 16689 +implementationsund 1 6.957497 0 0 16690 +systemintegr 1 6.957497 0 0 16691 +themodula 1 6.957497 0 0 16692 +spinalso 1 6.957497 0 0 16693 +grainedextens 1 6.957497 0 0 16694 +researchproject 1 6.957497 0 0 16695 +informationprof 1 6.957497 0 0 16696 +chambersdepart 1 6.957497 0 0 16697 +ebelingdepart 1 6.957497 0 0 16698 +wheatoncolleg 1 6.957497 0 0 16699 +illinoisunivers 1 6.957497 0 0 16700 +mellonunivers 1 6.957497 0 0 16701 +vlsiarchitectur 1 6.957497 0 0 16702 +hitech 1 6.957497 0 0 16703 +chessmachin 1 6.957497 0 0 16704 +apex 1 6.957497 0 0 16705 +routingnetwork 1 6.957497 0 0 16706 +placementand 1 6.957497 0 0 16707 +teachingspr 1 6.957497 0 0 16708 +designoffic 1 6.957497 0 0 16709 +fccm 1 6.957497 0 0 16710 +napamai 1 6.957497 0 0 16711 +burlington 1 6.957497 0 0 16712 +chicagojun 1 6.957497 0 0 16713 +vegasresearch 1 6.957497 0 0 16714 +amara 1 6.957497 0 0 16715 +galleryelan 1 6.957497 0 0 16716 +galleryebel 1 6.957497 0 0 16717 +hanksunivers 1 6.957497 0 0 16718 +washingtondepart 1 6.957497 0 0 16719 +architecturesai 1 6.957497 0 0 16720 +symphoni 1 6.957497 0 0 16721 +opera 1 6.957497 0 0 16722 +edita 1 6.957497 0 0 16723 +gruberova 1 6.957497 0 0 16724 +giulini 1 6.957497 0 0 16725 +discographi 1 6.957497 0 0 16726 +sumac 1 6.957497 0 0 16727 +highland 1 6.957497 0 0 16728 +receivedhi 1 6.957497 0 0 16729 +glasgow 1 6.957497 0 0 16730 +graduateapprentic 1 6.957497 0 0 16731 +edison 1 6.957497 0 0 16732 +phddegre 1 6.957497 0 0 16733 +learningin 1 6.957497 0 0 16734 +coursefrom 1 6.957497 0 0 16735 +colin 1 6.957497 0 0 16736 +cherri 1 6.957497 0 0 16737 +thebbc 1 6.957497 0 0 16738 +theuw 1 6.957497 0 0 16739 +departmentsform 1 6.957497 0 0 16740 +netmethodolog 1 6.957497 0 0 16741 +presentmemb 1 6.957497 0 0 16742 +salesman 1 6.957497 0 0 16743 +karprichard 1 6.957497 0 0 16744 +karpprofessor 1 6.957497 0 0 16745 +ofcomputersci 1 6.957497 0 0 16746 +andadjunct 1 6.957497 0 0 16747 +ofmolecularbiotechnologyunivers 1 6.957497 0 0 16748 +eduaward 1 6.957497 0 0 16749 +membershipsn 1 6.957497 0 0 16750 +babbag 1 6.957497 0 0 16751 +sciencedistinguish 1 6.957497 0 0 16752 +senat 1 6.957497 0 0 16753 +berkeleylanchest 1 6.957497 0 0 16754 +fulkerson 1 6.957497 0 0 16755 +hermann 1 6.957497 0 0 16756 +forsoci 1 6.957497 0 0 16757 +governor 1 6.957497 0 0 16758 +scienceinstitut 1 6.957497 0 0 16759 +presentselect 1 6.957497 0 0 16760 +turingaward 1 6.957497 0 0 16761 +upfal 1 6.957497 0 0 16762 +spanningtre 1 6.957497 0 0 16763 +ahalf 1 6.957497 0 0 16764 +exponentialprogress 1 6.957497 0 0 16765 +annualfaculti 1 6.957497 0 0 16766 +vicepresid 1 6.957497 0 0 16767 +gore 1 6.957497 0 0 16768 +eniac 1 6.957497 0 0 16769 +thanniversari 1 6.957497 0 0 16770 +nathanmyhrvold 1 6.957497 0 0 16771 +joinsedlazowska 1 6.957497 0 0 16772 +theuwcs 1 6.957497 0 0 16773 +testimonyto 1 6.957497 0 0 16774 +georgejetson 1 6.957497 0 0 16775 +forfr 1 6.957497 0 0 16776 +flintston 1 6.957497 0 0 16777 +mostlywearsti 1 6.957497 0 0 16778 +flier 1 6.957497 0 0 16779 +healso 1 6.957497 0 0 16780 +havefunnynos 1 6.957497 0 0 16781 +allgradu 1 6.957497 0 0 16782 +laboratoriesin 1 6.957497 0 0 16783 +ofcra 1 6.957497 0 0 16784 +scomputersci 1 6.957497 0 0 16785 +formicrosoft 1 6.957497 0 0 16786 +personnationalsemiconductor 1 6.957497 0 0 16787 +academicadvisori 1 6.957497 0 0 16788 +forcabl 1 6.957497 0 0 16789 +hows 1 6.957497 0 0 16790 +cascadia 1 6.957497 0 0 16791 +committeesfor 1 6.957497 0 0 16792 +eecsat 1 6.957497 0 0 16793 +councilpanel 1 6.957497 0 0 16794 +agencyhigh 1 6.957497 0 0 16795 +computingand 1 6.957497 0 0 16796 +sutherland 1 6.957497 0 0 16797 +examinersfor 1 6.957497 0 0 16798 +sspecial 1 6.957497 0 0 16799 +chairof 1 6.957497 0 0 16800 +andeditor 1 6.957497 0 0 16801 +servinga 1 6.957497 0 0 16802 +onacadem 1 6.957497 0 0 16803 +thecommitte 1 6.957497 0 0 16804 +deanship 1 6.957497 0 0 16805 +artsand 1 6.957497 0 0 16806 +biotechnolog 1 6.957497 0 0 16807 +amemb 1 6.957497 0 0 16808 +deanof 1 6.957497 0 0 16809 +fellowof 1 6.957497 0 0 16810 +associationfor 1 6.957497 0 0 16811 +andelectron 1 6.957497 0 0 16812 +seventeenph 1 6.957497 0 0 16813 +studentshav 1 6.957497 0 0 16814 +integratedoverview 1 6.957497 0 0 16815 +apersuas 1 6.957497 0 0 16816 +forloc 1 6.957497 0 0 16817 +consumpt 1 6.957497 0 0 16818 +persuas 1 6.957497 0 0 16819 +playertopten 1 6.957497 0 0 16820 +csebuild 1 6.957497 0 0 16821 +abbrevi 1 6.957497 0 0 16822 +cvcomputingresearch 1 6.957497 0 0 16823 +forwardmassi 1 6.957497 0 0 16824 +goldmanreport 1 6.957497 0 0 16825 +alleg 1 6.957497 0 0 16826 +cseph 1 6.957497 0 0 16827 +flaw 1 6.957497 0 0 16828 +medianyear 1 6.957497 0 0 16829 +boardstudi 1 6.957497 0 0 16830 +saturdayseminar 1 6.957497 0 0 16831 +houseappropri 1 6.957497 0 0 16832 +interestinghom 1 6.957497 0 0 16833 +odeto 1 6.957497 0 0 16834 +tallman 1 6.957497 0 0 16835 +trask 1 6.957497 0 0 16836 +departsfor 1 6.957497 0 0 16837 +lanelazowska 1 6.957497 0 0 16838 +pagedirect 1 6.957497 0 0 16839 +houseshilshol 1 6.957497 0 0 16840 +aquat 1 6.957497 0 0 16841 +discoveredreview 1 6.957497 0 0 16842 +poetryfing 1 6.957497 0 0 16843 +scheduleinform 1 6.957497 0 0 16844 +reflector 1 6.957497 0 0 16845 +shaw 1 6.957497 0 0 16846 +facultyappoint 1 6.957497 0 0 16847 +theibm 1 6.957497 0 0 16848 +publicationsinclud 1 6.957497 0 0 16849 +andan 1 6.957497 0 0 16850 +sciencescreen 1 6.957497 0 0 16851 +associateeditor 1 6.957497 0 0 16852 +andin 1 6.957497 0 0 16853 +scholarat 1 6.957497 0 0 16854 +theundecid 1 6.957497 0 0 16855 +hecreat 1 6.957497 0 0 16856 +thepok 1 6.957497 0 0 16857 +nowprincip 1 6.957497 0 0 16858 +nwli 1 6.957497 0 0 16859 +computerand 1 6.957497 0 0 16860 +foundationadvisori 1 6.957497 0 0 16861 +doctoraldissert 1 6.957497 0 0 16862 +degreesund 1 6.957497 0 0 16863 +seniorproject 1 6.957497 0 0 16864 +proteu 1 6.957497 0 0 16865 +msee 1 6.957497 0 0 16866 +mcgill 1 6.957497 0 0 16867 +govt 1 6.957497 0 0 16868 +offault 1 6.957497 0 0 16869 +tocach 1 6.957497 0 0 16870 +broadband 1 6.957497 0 0 16871 +generalizedenhanc 1 6.957497 0 0 16872 +hypercub 1 6.957497 0 0 16873 +coars 1 6.957497 0 0 16874 +dpcnl 1 6.957497 0 0 16875 +linkp 1 6.957497 0 0 16876 +fromharvard 1 6.957497 0 0 16877 +connecticut 1 6.957497 0 0 16878 +professorat 1 6.957497 0 0 16879 +hasalso 1 6.957497 0 0 16880 +atkob 1 6.957497 0 0 16881 +enseign 1 6.957497 0 0 16882 +superieur 1 6.957497 0 0 16883 +techniquesd 1 6.957497 0 0 16884 +electroniqu 1 6.957497 0 0 16885 +irest 1 6.957497 0 0 16886 +nant 1 6.957497 0 0 16887 +hasrec 1 6.957497 0 0 16888 +forimag 1 6.957497 0 0 16889 +processingand 1 6.957497 0 0 16890 +bordeaux 1 6.957497 0 0 16891 +ofimag 1 6.957497 0 0 16892 +currentlydirect 1 6.957497 0 0 16893 +throughimag 1 6.957497 0 0 16894 +softwarethat 1 6.957497 0 0 16895 +thebook 1 6.957497 0 0 16896 +introductionus 1 6.957497 0 0 16897 +accompanyingsoftwar 1 6.957497 0 0 16898 +serveda 1 6.957497 0 0 16899 +subconfer 1 6.957497 0 0 16900 +patternrecognit 1 6.957497 0 0 16901 +societyworkshop 1 6.957497 0 0 16902 +machineintellig 1 6.957497 0 0 16903 +symposiaon 1 6.957497 0 0 16904 +editorialboard 1 6.957497 0 0 16905 +cvgip 1 6.957497 0 0 16906 +engineeringeduc 1 6.957497 0 0 16907 +graduateof 1 6.957497 0 0 16908 +antioch 1 6.957497 0 0 16909 +hejoin 1 6.957497 0 0 16910 +seventeen 1 6.957497 0 0 16911 +atpurdu 1 6.957497 0 0 16912 +inperhap 1 6.957497 0 0 16913 +aschairman 1 6.957497 0 0 16914 +professorin 1 6.957497 0 0 16915 +iscoauthor 1 6.957497 0 0 16916 +executivecommitte 1 6.957497 0 0 16917 +interestgroup 1 6.957497 0 0 16918 +chairmanof 1 6.957497 0 0 16919 +annualsymposium 1 6.957497 0 0 16920 +hasserv 1 6.957497 0 0 16921 +stechnic 1 6.957497 0 0 16922 +advisorysubcommitte 1 6.957497 0 0 16923 +thiscommitte 1 6.957497 0 0 16924 +formallog 1 6.957497 0 0 16925 +dopostdoctor 1 6.957497 0 0 16926 +ofcalifornia 1 6.957497 0 0 16927 +avarieti 1 6.957497 0 0 16928 +leather 1 6.957497 0 0 16929 +motorcycl 1 6.957497 0 0 16930 +jacket 1 6.957497 0 0 16931 +frombrown 1 6.957497 0 0 16932 +oftoronto 1 6.957497 0 0 16933 +investigatoraward 1 6.957497 0 0 16934 +mediaappl 1 6.957497 0 0 16935 +torespond 1 6.957497 0 0 16936 +parallelizationof 1 6.957497 0 0 16937 +bothcontrol 1 6.957497 0 0 16938 +transactionson 1 6.957497 0 0 16939 +xform 1 6.957497 0 0 16940 +theseobject 1 6.957497 0 0 16941 +applicationsdesign 1 6.957497 0 0 16942 +enrich 1 6.957497 0 0 16943 +astandard 1 6.957497 0 0 16944 +withthes 1 6.957497 0 0 16945 +catalyz 1 6.957497 0 0 16946 +bylead 1 6.957497 0 0 16947 +theconcept 1 6.957497 0 0 16948 +toexplor 1 6.957497 0 0 16949 +warper 1 6.957497 0 0 16950 +srun 1 6.957497 0 0 16951 +mathematicsteach 1 6.957497 0 0 16952 +transcriptproject 1 6.957497 0 0 16953 +willfacilit 1 6.957497 0 0 16954 +academicinform 1 6.957497 0 0 16955 +floppi 1 6.957497 0 0 16956 +themetip 1 6.957497 0 0 16957 +ofmultiplay 1 6.957497 0 0 16958 +tointegr 1 6.957497 0 0 16959 +itsxform 1 6.957497 0 0 16960 +somethingfun 1 6.957497 0 0 16961 +beenput 1 6.957497 0 0 16962 +fundamentalattract 1 6.957497 0 0 16963 +digitalimag 1 6.957497 0 0 16964 +discussteach 1 6.957497 0 0 16965 +undergr 1 6.957497 0 0 16966 +washingtonher 1 6.957497 0 0 16967 +handheld 1 6.957497 0 0 16968 +operationdistribut 1 6.957497 0 0 16969 +systemcontact 1 6.957497 0 0 16970 +bershadprof 1 6.957497 0 0 16971 +borriellomarc 1 6.957497 0 0 16972 +fiuczynskigeorg 1 6.957497 0 0 16973 +formanprof 1 6.957497 0 0 16974 +levygeoff 1 6.957497 0 0 16975 +voelkerterri 1 6.957497 0 0 16976 +watsonprof 1 6.957497 0 0 16977 +projectuw 1 6.957497 0 0 16978 +projectwelcom 1 6.957497 0 0 16979 +rapidconstruct 1 6.957497 0 0 16980 +incorporatesmulti 1 6.957497 0 0 16981 +basedencapsul 1 6.957497 0 0 16982 +allowsstat 1 6.957497 0 0 16983 +pureobject 1 6.957497 0 0 16984 +incorporateshigh 1 6.957497 0 0 16985 +hierachyanalysi 1 6.957497 0 0 16986 +guidedselect 1 6.957497 0 0 16987 +commonsubexpress 1 6.957497 0 0 16988 +currentlyavail 1 6.957497 0 0 16989 +thebeta 1 6.957497 0 0 16990 +projectslast 1 6.957497 0 0 16991 +mtwong 1 6.957497 0 0 16992 +pcrcw 1 6.957497 0 0 16993 +peopleal 1 6.957497 0 0 16994 +allsort 1 6.957497 0 0 16995 +graphicalfront 1 6.957497 0 0 16996 +presentationof 1 6.957497 0 0 16997 +upwith 1 6.957497 0 0 16998 +shinook 1 6.957497 0 0 16999 +oncorhynchu 1 6.957497 0 0 17000 +tshawytscha 1 6.957497 0 0 17001 +amer 1 6.957497 0 0 17002 +tribe 1 6.957497 0 0 17003 +southerli 1 6.957497 0 0 17004 +sled 1 6.957497 0 0 17005 +doga 1 6.957497 0 0 17006 +cadtool 1 6.957497 0 0 17007 +reactivesystem 1 6.957497 0 0 17008 +descriptionto 1 6.957497 0 0 17009 +designdecis 1 6.957497 0 0 17010 +reiterateaft 1 6.957497 0 0 17011 +willnot 1 6.957497 0 0 17012 +designerto 1 6.957497 0 0 17013 +legacycod 1 6.957497 0 0 17014 +currentlyw 1 6.957497 0 0 17015 +interprocessorcommun 1 6.957497 0 0 17016 +assumesmanu 1 6.957497 0 0 17017 +intricateand 1 6.957497 0 0 17018 +asicarchitectur 1 6.957497 0 0 17019 +onoff 1 6.957497 0 0 17020 +discourag 1 6.957497 0 0 17021 +innovemb 1 6.957497 0 0 17022 +shownat 1 6.957497 0 0 17023 +mainfeatur 1 6.957497 0 0 17024 +peripheraldevic 1 6.957497 0 0 17025 +andsynthes 1 6.957497 0 0 17026 +hardwarenetlist 1 6.957497 0 0 17027 +interfacingproblem 1 6.957497 0 0 17028 +timingconstraint 1 6.957497 0 0 17029 +swcodedesign 1 6.957497 0 0 17030 +tremezzo 1 6.957497 0 0 17031 +severalmor 1 6.957497 0 0 17032 +chinookersfacultygaetano 1 6.957497 0 0 17033 +borriellogradu 1 6.957497 0 0 17034 +ortegaken 1 6.957497 0 0 17035 +hinesian 1 6.957497 0 0 17036 +selizabeth 1 6.957497 0 0 17037 +walkupscott 1 6.957497 0 0 17038 +henrik 1 6.957497 0 0 17039 +hulgaardstafflarri 1 6.957497 0 0 17040 +mcmurchielist 1 6.957497 0 0 17041 +paperschinook 1 6.957497 0 0 17042 +sponsorsarpa 1 6.957497 0 0 17043 +walkup 1 6.957497 0 0 17044 +patricia 1 6.957497 0 0 17045 +projectid 1 6.957497 0 0 17046 +makeus 1 6.957497 0 0 17047 +quickproduct 1 6.957497 0 0 17048 +isoften 1 6.957497 0 0 17049 +postpon 1 6.957497 0 0 17050 +beenfrozen 1 6.957497 0 0 17051 +havedesign 1 6.957497 0 0 17052 +quickdevelop 1 6.957497 0 0 17053 +basicfeatur 1 6.957497 0 0 17054 +synthesisand 1 6.957497 0 0 17055 +anddetail 1 6.957497 0 0 17056 +aneffici 1 6.957497 0 0 17057 +blockarchitectur 1 6.957497 0 0 17058 +tailorplac 1 6.957497 0 0 17059 +schematicspecif 1 6.957497 0 0 17060 +capturedand 1 6.957497 0 0 17061 +ofscholarli 1 6.957497 0 0 17062 +andal 1 6.957497 0 0 17063 +therein 1 6.957497 0 0 17064 +copyrighthold 1 6.957497 0 0 17065 +notwithstand 1 6.957497 0 0 17066 +hereelectron 1 6.957497 0 0 17067 +thisinform 1 6.957497 0 0 17068 +eachauthor 1 6.957497 0 0 17069 +repost 1 6.957497 0 0 17070 +theexplicit 1 6.957497 0 0 17071 +holder 1 6.957497 0 0 17072 +emeraldlarri 1 6.957497 0 0 17073 +arraysaid 1 6.957497 0 0 17074 +verificationtim 1 6.957497 0 0 17075 +prototypingtriptych 1 6.957497 0 0 17076 +toolscan 1 6.957497 0 0 17077 +fpgaarchitectur 1 6.957497 0 0 17078 +incorporatedinto 1 6.957497 0 0 17079 +circuitsretim 1 6.957497 0 0 17080 +uselevel 1 6.957497 0 0 17081 +andincreas 1 6.957497 0 0 17082 +synchronouscircuit 1 6.957497 0 0 17083 +contraint 1 6.957497 0 0 17084 +routerth 1 6.957497 0 0 17085 +systemsself 1 6.957497 0 0 17086 +kehlprevi 1 6.957497 0 0 17087 +reportsarpa 1 6.957497 0 0 17088 +bluebook 1 6.957497 0 0 17089 +reactor 1 6.957497 0 0 17090 +defenc 1 6.957497 0 0 17091 +malfunct 1 6.957497 0 0 17092 +softbotinternet 1 6.957497 0 0 17093 +softbotth 1 6.957497 0 0 17094 +softwareenviron 1 6.957497 0 0 17095 +pragmaticallyconveni 1 6.957497 0 0 17096 +acustomiz 1 6.957497 0 0 17097 +internetaccess 1 6.957497 0 0 17098 +generatesand 1 6.957497 0 0 17099 +itsexperi 1 6.957497 0 0 17100 +requestand 1 6.957497 0 0 17101 +satisfyit 1 6.957497 0 0 17102 +interactwith 1 6.957497 0 0 17103 +sgraphic 1 6.957497 0 0 17104 +tosearch 1 6.957497 0 0 17105 +sophisticatedprun 1 6.957497 0 0 17106 +cartoonrepresent 1 6.957497 0 0 17107 +blanchard 1 6.957497 0 0 17108 +ofcolumn 1 6.957497 0 0 17109 +xiiplann 1 6.957497 0 0 17110 +ilalearn 1 6.957497 0 0 17111 +ying 1 6.957497 0 0 17112 +systemspin 1 6.957497 0 0 17113 +thatsupport 1 6.957497 0 0 17114 +atruntim 1 6.957497 0 0 17115 +accesshardwar 1 6.957497 0 0 17116 +nooverhead 1 6.957497 0 0 17117 +byrefer 1 6.957497 0 0 17118 +systemservic 1 6.957497 0 0 17119 +allextens 1 6.957497 0 0 17120 +typesaf 1 6.957497 0 0 17121 +oftypesafeti 1 6.957497 0 0 17122 +attemptingto 1 6.957497 0 0 17123 +writeboth 1 6.957497 0 0 17124 +machinerun 1 6.957497 0 0 17125 +withlow 1 6.957497 0 0 17126 +executeit 1 6.957497 0 0 17127 +protectedprocedur 1 6.957497 0 0 17128 +overethernet 1 6.957497 0 0 17129 +oldadapt 1 6.957497 0 0 17130 +operationsund 1 6.957497 0 0 17131 +samehardwar 1 6.957497 0 0 17132 +saveyourself 1 6.957497 0 0 17133 +invoc 1 6.957497 0 0 17134 +andsimpl 1 6.957497 0 0 17135 +interposit 1 6.957497 0 0 17136 +raship 1 6.957497 0 0 17137 +ourmascot 1 6.957497 0 0 17138 +projectmor 1 6.957497 0 0 17139 +projectsuw 1 6.957497 0 0 17140 +webdynam 1 6.957497 0 0 17141 +ofinvari 1 6.957497 0 0 17142 +theserun 1 6.957497 0 0 17143 +memoryload 1 6.957497 0 0 17144 +theydetermin 1 6.957497 0 0 17145 +unrol 1 6.957497 0 0 17146 +performancebenefit 1 6.957497 0 0 17147 +offsetbi 1 6.957497 0 0 17148 +strive 1 6.957497 0 0 17149 +qualitydynam 1 6.957497 0 0 17150 +thetempl 1 6.957497 0 0 17151 +initialexperi 1 6.957497 0 0 17152 +producedspeedup 1 6.957497 0 0 17153 +dynamicallycompil 1 6.957497 0 0 17154 +spinev 1 6.957497 0 0 17155 +otherposs 1 6.957497 0 0 17156 +invirtu 1 6.957497 0 0 17157 +systemi 1 6.957497 0 0 17158 +arenow 1 6.957497 0 0 17159 +eduzpl 1 6.957497 0 0 17160 +recompil 1 6.957497 0 0 17161 +shatter 1 6.957497 0 0 17162 +yourmachin 1 6.957497 0 0 17163 +zpthi 1 6.957497 0 0 17164 +pagesimultan 1 6.957497 0 0 17165 +projectoverviewpeoplepubl 1 6.957497 0 0 17166 +overviewth 1 6.957497 0 0 17167 +interleav 1 6.957497 0 0 17168 +differentthread 1 6.957497 0 0 17169 +issuefeatur 1 6.957497 0 0 17170 +abilityof 1 6.957497 0 0 17171 +contextsar 1 6.957497 0 0 17172 +exploitthread 1 6.957497 0 0 17173 +formsof 1 6.957497 0 0 17174 +havedemonstr 1 6.957497 0 0 17175 +improvesprocessor 1 6.957497 0 0 17176 +parallelworkload 1 6.957497 0 0 17177 +achievedin 1 6.957497 0 0 17178 +ordersuperscalar 1 6.957497 0 0 17179 +synchronizationtechniqu 1 6.957497 0 0 17180 +otherarchitectur 1 6.957497 0 0 17181 +levygradu 1 6.957497 0 0 17182 +tullsenindustri 1 6.957497 0 0 17183 +andh 1 6.957497 0 0 17184 +margherita 1 6.957497 0 0 17185 +ligur 1 6.957497 0 0 17186 +doon 1 6.957497 0 0 17187 +departmentabout 1 6.957497 0 0 17188 +departmentour 1 6.957497 0 0 17189 +fourteen 1 6.957497 0 0 17190 +incent 1 6.957497 0 0 17191 +colophon 1 6.957497 0 0 17192 +infocomput 1 6.957497 0 0 17193 +madisona 1 6.957497 0 0 17194 +smoothli 1 6.957497 0 0 17195 +simpleobserv 1 6.957497 0 0 17196 +propertieseasi 1 6.957497 0 0 17197 +fixat 1 6.957497 0 0 17198 +toperform 1 6.957497 0 0 17199 +obstacl 1 6.957497 0 0 17200 +ourwork 1 6.957497 0 0 17201 +pointof 1 6.957497 0 0 17202 +makesimpl 1 6.957497 0 0 17203 +geometryof 1 6.957497 0 0 17204 +thesurfac 1 6.957497 0 0 17205 +generalobserv 1 6.957497 0 0 17206 +objectthan 1 6.957497 0 0 17207 +beexploit 1 6.957497 0 0 17208 +anddeterminist 1 6.957497 0 0 17209 +localshap 1 6.957497 0 0 17210 +qualitativestrategi 1 6.957497 0 0 17211 +viewingdirect 1 6.957497 0 0 17212 +selectedpoint 1 6.957497 0 0 17213 +observationso 1 6.957497 0 0 17214 +observationand 1 6.957497 0 0 17215 +noisi 1 6.957497 0 0 17216 +fung 1 6.957497 0 0 17217 +roland 1 6.957497 0 0 17218 +ofact 1 6.957497 0 0 17219 +minimax 1 6.957497 0 0 17220 +wherebi 1 6.957497 0 0 17221 +anduniqu 1 6.957497 0 0 17222 +priordistribut 1 6.957497 0 0 17223 +exert 1 6.957497 0 0 17224 +posterior 1 6.957497 0 0 17225 +withpattern 1 6.957497 0 0 17226 +nearman 1 6.957497 0 0 17227 +lemma 1 6.957497 0 0 17228 +classificationtest 1 6.957497 0 0 17229 +margin 1 6.957497 0 0 17230 +gsnake 1 6.957497 0 0 17231 +expressivenesscondit 1 6.957497 0 0 17232 +voxel 1 6.957497 0 0 17233 +calleda 1 6.957497 0 0 17234 +adha 1 6.957497 0 0 17235 +objectsrepres 1 6.957497 0 0 17236 +objectsfrequ 1 6.957497 0 0 17237 +functionswith 1 6.957497 0 0 17238 +containfinit 1 6.957497 0 0 17239 +chosenfrom 1 6.957497 0 0 17240 +palett 1 6.957497 0 0 17241 +numbersof 1 6.957497 0 0 17242 +computationalmodel 1 6.957497 0 0 17243 +informationcont 1 6.957497 0 0 17244 +thatdisplai 1 6.957497 0 0 17245 +onlythos 1 6.957497 0 0 17246 +itimpl 1 6.957497 0 0 17247 +satisfyingth 1 6.957497 0 0 17248 +expressivenss 1 6.957497 0 0 17249 +onhow 1 6.957497 0 0 17250 +wecan 1 6.957497 0 0 17251 +howprecis 1 6.957497 0 0 17252 +voxelresolut 1 6.957497 0 0 17253 +visualizationprocess 1 6.957497 0 0 17254 +objectsto 1 6.957497 0 0 17255 +theexpress 1 6.957497 0 0 17256 +primitivevari 1 6.957497 0 0 17257 +latitud 1 6.957497 0 0 17258 +constructor 1 6.957497 0 0 17259 +appropriatefor 1 6.957497 0 0 17260 +containsth 1 6.957497 0 0 17261 +canalso 1 6.957497 0 0 17262 +displayi 1 6.957497 0 0 17263 +graphicsprimit 1 6.957497 0 0 17264 +locationand 1 6.957497 0 0 17265 +animationsequ 1 6.957497 0 0 17266 +thedisplai 1 6.957497 0 0 17267 +isnatur 1 6.957497 0 0 17268 +andtemperatur 1 6.957497 0 0 17269 +calledvi 1 6.957497 0 0 17270 +adthat 1 6.957497 0 0 17271 +theircomput 1 6.957497 0 0 17272 +theirprogram 1 6.957497 0 0 17273 +thevi 1 6.957497 0 0 17274 +vvof 1 6.957497 0 0 17275 +thatsatisfi 1 6.957497 0 0 17276 +implementationi 1 6.957497 0 0 17277 +auser 1 6.957497 0 0 17278 +abstractionof 1 6.957497 0 0 17279 +ofmap 1 6.957497 0 0 17280 +defineddata 1 6.957497 0 0 17281 +ingener 1 6.957497 0 0 17282 +usualapproach 1 6.957497 0 0 17283 +bywrit 1 6.957497 0 0 17284 +evaluatemechan 1 6.957497 0 0 17285 +technologicaland 1 6.957497 0 0 17286 +sociolog 1 6.957497 0 0 17287 +suggestionscondor 1 6.957497 0 0 17288 +nobin 1 6.957497 0 0 17289 +projectcor 1 6.957497 0 0 17290 +projectdocu 1 6.957497 0 0 17291 +coralpeopl 1 6.957497 0 0 17292 +coraloth 1 6.957497 0 0 17293 +madisonobject 1 6.957497 0 0 17294 +efficientdeduct 1 6.957497 0 0 17295 +coralsystem 1 6.957497 0 0 17296 +durationof 1 6.957497 0 0 17297 +declaritiveand 1 6.957497 0 0 17298 +supportsgener 1 6.957497 0 0 17299 +coralimplement 1 6.957497 0 0 17300 +modulein 1 6.957497 0 0 17301 +insertand 1 6.957497 0 0 17302 +canprogram 1 6.957497 0 0 17303 +withcor 1 6.957497 0 0 17304 +allowingc 1 6.957497 0 0 17305 +coralimplemen 1 6.957497 0 0 17306 +theexodusstorag 1 6.957497 0 0 17307 +manang 1 6.957497 0 0 17308 +aclient 1 6.957497 0 0 17309 +requiringy 1 6.957497 0 0 17310 +announcemnt 1 6.957497 0 0 17311 +listwhich 1 6.957497 0 0 17312 +shawn 1 6.957497 0 0 17313 +flisakowski 1 6.957497 0 0 17314 +flisakow 1 6.957497 0 0 17315 +nonsmooth 1 6.957497 0 0 17316 +mcplib 1 6.957497 0 0 17317 +functionevalu 1 6.957497 0 0 17318 +subproblem 1 6.957497 0 0 17319 +uponreformul 1 6.957497 0 0 17320 +fruitfuldisciplin 1 6.957497 0 0 17321 +incomplementar 1 6.957497 0 0 17322 +meetingsof 1 6.957497 0 0 17323 +forcomplementar 1 6.957497 0 0 17324 +researcherssoftwar 1 6.957497 0 0 17325 +problemdescript 1 6.957497 0 0 17326 +frommatlab 1 6.957497 0 0 17327 +jacobian 1 6.957497 0 0 17328 +specificvers 1 6.957497 0 0 17329 +hook 1 6.957497 0 0 17330 +rutherford 1 6.957497 0 0 17331 +classicaljosephi 1 6.957497 0 0 17332 +linearizedsubproblem 1 6.957497 0 0 17333 +defineth 1 6.957497 0 0 17334 +dampedlinesearch 1 6.957497 0 0 17335 +infeas 1 6.957497 0 0 17336 +restartprocedur 1 6.957497 0 0 17337 +totermin 1 6.957497 0 0 17338 +rescal 1 6.957497 0 0 17339 +equilibr 1 6.957497 0 0 17340 +elementsappear 1 6.957497 0 0 17341 +mcpor 1 6.957497 0 0 17342 +anonsmooth 1 6.957497 0 0 17343 +reformul 1 6.957497 0 0 17344 +algorithmconsist 1 6.957497 0 0 17345 +pathto 1 6.957497 0 0 17346 +aposs 1 6.957497 0 0 17347 +thepath 1 6.957497 0 0 17348 +partiallycomput 1 6.957497 0 0 17349 +relinear 1 6.957497 0 0 17350 +anonmonoton 1 6.957497 0 0 17351 +watchdog 1 6.957497 0 0 17352 +minima 1 6.957497 0 0 17353 +robustnessimprov 1 6.957497 0 0 17354 +proxim 1 6.957497 0 0 17355 +qpcomp 1 6.957497 0 0 17356 +ishandl 1 6.957497 0 0 17357 +thenapproxim 1 6.957497 0 0 17358 +theaccuraci 1 6.957497 0 0 17359 +mpsge 1 6.957497 0 0 17360 +thatallow 1 6.957497 0 0 17361 +nemsth 1 6.957497 0 0 17362 +pageexodu 1 6.957497 0 0 17363 +toolkitnot 1 6.957497 0 0 17364 +succed 1 6.957497 0 0 17365 +theshor 1 6.957497 0 0 17366 +eduprincip 1 6.957497 0 0 17367 +dewittse 1 6.957497 0 0 17368 +exodusshor 1 6.957497 0 0 17369 +exoduslatest 1 6.957497 0 0 17370 +compilercontribut 1 6.957497 0 0 17371 +managera 1 6.957497 0 0 17372 +exodus_al 1 6.957497 0 0 17373 +oodbsdat 1 6.957497 0 0 17374 +projectparadis 1 6.957497 0 0 17375 +frontend 1 6.957497 0 0 17376 +sequoia 1 6.957497 0 0 17377 +iscap 1 6.957497 0 0 17378 +applyingobject 1 6.957497 0 0 17379 +ofstor 1 6.957497 0 0 17380 +tosignificantli 1 6.957497 0 0 17381 +thatcan 1 6.957497 0 0 17382 +andsupport 1 6.957497 0 0 17383 +paradiseprovid 1 6.957497 0 0 17384 +gisappl 1 6.957497 0 0 17385 +asinteg 1 6.957497 0 0 17386 +circl 1 6.957497 0 0 17387 +spatialattribut 1 6.957497 0 0 17388 +foroverlap 1 6.957497 0 0 17389 +selectingcolor 1 6.957497 0 0 17390 +withad 1 6.957497 0 0 17391 +issueimplicit 1 6.957497 0 0 17392 +arubb 1 6.957497 0 0 17393 +querycompos 1 6.957497 0 0 17394 +databaseschema 1 6.957497 0 0 17395 +beview 1 6.957497 0 0 17396 +bedisplai 1 6.957497 0 0 17397 +sqlwe 1 6.957497 0 0 17398 +extendedset 1 6.957497 0 0 17399 +byus 1 6.957497 0 0 17400 +standarddatabas 1 6.957497 0 0 17401 +anddrop 1 6.957497 0 0 17402 +paradiseserv 1 6.957497 0 0 17403 +theresult 1 6.957497 0 0 17404 +ismulti 1 6.957497 0 0 17405 +sameserv 1 6.957497 0 0 17406 +carefulattent 1 6.957497 0 0 17407 +processqueri 1 6.957497 0 0 17408 +largevolum 1 6.957497 0 0 17409 +frontendeurop 1 6.957497 0 0 17410 +pressher 1 6.957497 0 0 17411 +projectattn 1 6.957497 0 0 17412 +dewittunivers 1 6.957497 0 0 17413 +edumor 1 6.957497 0 0 17414 +homepagego 1 6.957497 0 0 17415 +versionnewslett 1 6.957497 0 0 17416 +newand 1 6.957497 0 0 17417 +toolsinternet 1 6.957497 0 0 17418 +effectiveinternet 1 6.957497 0 0 17419 +availablea 1 6.957497 0 0 17420 +studentssurf 1 6.957497 0 0 17421 +smarter 1 6.957497 0 0 17422 +canchoos 1 6.957497 0 0 17423 +annoucementseach 1 6.957497 0 0 17424 +networktool 1 6.957497 0 0 17425 +vefound 1 6.957497 0 0 17426 +byeduc 1 6.957497 0 0 17427 +encouragefeedback 1 6.957497 0 0 17428 +ournewest 1 6.957497 0 0 17429 +feedbackscout 1 6.957497 0 0 17430 +servicesfor 1 6.957497 0 0 17431 +shore_al 1 6.957497 0 0 17432 +odmg 1 6.957497 0 0 17433 +listproc 1 6.957497 0 0 17434 +shore_support 1 6.957497 0 0 17435 +informationsystem 1 6.957497 0 0 17436 +ashor 1 6.957497 0 0 17437 +vendor 1 6.957497 0 0 17438 +flatten 1 6.957497 0 0 17439 +legaci 1 6.957497 0 0 17440 +clutter 1 6.957497 0 0 17441 +pageshor 1 6.957497 0 0 17442 +repositorydocu 1 6.957497 0 0 17443 +informationmail 1 6.957497 0 0 17444 +listsse 1 6.957497 0 0 17445 +shorepeopl 1 6.957497 0 0 17446 +shorelatest 1 6.957497 0 0 17447 +arpaparadis 1 6.957497 0 0 17448 +shoreexodu 1 6.957497 0 0 17449 +shoreoo 1 6.957497 0 0 17450 +oodbsshor 1 6.957497 0 0 17451 +albumuw 1 6.957497 0 0 17452 +widevarieti 1 6.957497 0 0 17453 +cadsystem 1 6.957497 0 0 17454 +usedexodusstorag 1 6.957497 0 0 17455 +ofwai 1 6.957497 0 0 17456 +thisinterfac 1 6.957497 0 0 17457 +theunix 1 6.957497 0 0 17458 +viand 1 6.957497 0 0 17459 +withoutmodif 1 6.957497 0 0 17460 +shoreobject 1 6.957497 0 0 17461 +inheritingcharacterist 1 6.957497 0 0 17462 +fromfil 1 6.957497 0 0 17463 +ofshor 1 6.957497 0 0 17464 +scalabilitysupport 1 6.957497 0 0 17465 +heterogeneitysupport 1 6.957497 0 0 17466 +applicationswhen 1 6.957497 0 0 17467 +uniqueamong 1 6.957497 0 0 17468 +languageheterogen 1 6.957497 0 0 17469 +persistentstorag 1 6.957497 0 0 17470 +basicallycompat 1 6.957497 0 0 17471 +betransf 1 6.957497 0 0 17472 +architectureshor 1 6.957497 0 0 17473 +distributedarchitectur 1 6.957497 0 0 17474 +disksattach 1 6.957497 0 0 17475 +architectureus 1 6.957497 0 0 17476 +typicallyus 1 6.957497 0 0 17477 +notionof 1 6.957497 0 0 17478 +runsin 1 6.957497 0 0 17479 +forus 1 6.957497 0 0 17480 +theparadis 1 6.957497 0 0 17481 +seosdi 1 6.957497 0 0 17482 +aimport 1 6.957497 0 0 17483 +endeavor 1 6.957497 0 0 17484 +certainlydepend 1 6.957497 0 0 17485 +transmitobject 1 6.957497 0 0 17486 +whilecurr 1 6.957497 0 0 17487 +orientedtoward 1 6.957497 0 0 17488 +terabyt 1 6.957497 0 0 17489 +libraryar 1 6.957497 0 0 17490 +heterogeneityobject 1 6.957497 0 0 17491 +neutraltyp 1 6.957497 0 0 17492 +databasefeatur 1 6.957497 0 0 17493 +ofsupport 1 6.957497 0 0 17494 +feasibleto 1 6.957497 0 0 17495 +wasrec 1 6.957497 0 0 17496 +onprovid 1 6.957497 0 0 17497 +withina 1 6.957497 0 0 17498 +applicationsa 1 6.957497 0 0 17499 +currentlyus 1 6.957497 0 0 17500 +untyp 1 6.957497 0 0 17501 +structuredobject 1 6.957497 0 0 17502 +displac 1 6.957497 0 0 17503 +orientedfil 1 6.957497 0 0 17504 +standpoint 1 6.957497 0 0 17505 +manypersist 1 6.957497 0 0 17506 +indirectli 1 6.957497 0 0 17507 +usersa 1 6.957497 0 0 17508 +individualpersist 1 6.957497 0 0 17509 +oflarg 1 6.957497 0 0 17510 +unnam 1 6.957497 0 0 17511 +involvessever 1 6.957497 0 0 17512 +includingdirectori 1 6.957497 0 0 17513 +unixappl 1 6.957497 0 0 17514 +fromtradit 1 6.957497 0 0 17515 +standardunix 1 6.957497 0 0 17516 +mkdir 1 6.957497 0 0 17517 +chdir 1 6.957497 0 0 17518 +callsposs 1 6.957497 0 0 17519 +onevari 1 6.957497 0 0 17520 +asb 1 6.957497 0 0 17521 +objectthrough 1 6.957497 0 0 17522 +counterpart 1 6.957497 0 0 17523 +callswil 1 6.957497 0 0 17524 +thatwish 1 6.957497 0 0 17525 +datacontain 1 6.957497 0 0 17526 +bothnew 1 6.957497 0 0 17527 +componentof 1 6.957497 0 0 17528 +morestructur 1 6.957497 0 0 17529 +rleas 1 6.957497 0 0 17530 +completeimplement 1 6.957497 0 0 17531 +tosolari 1 6.957497 0 0 17532 +andpentium 1 6.957497 0 0 17533 +atftp 1 6.957497 0 0 17534 +liststher 1 6.957497 0 0 17535 +usebi 1 6.957497 0 0 17536 +madisonc 1 6.957497 0 0 17537 +unmoder 1 6.957497 0 0 17538 +unlikelyev 1 6.957497 0 0 17539 +isalreadi 1 6.957497 0 0 17540 +belowfor 1 6.957497 0 0 17541 +sentwhen 1 6.957497 0 0 17542 +beingpost 1 6.957497 0 0 17543 +yourrepli 1 6.957497 0 0 17544 +maysubscrib 1 6.957497 0 0 17545 +existenceof 1 6.957497 0 0 17546 +whenit 1 6.957497 0 0 17547 +yoursubscript 1 6.957497 0 0 17548 +conceal 1 6.957497 0 0 17549 +subscriberscannot 1 6.957497 0 0 17550 +specialmessag 1 6.957497 0 0 17551 +sendthi 1 6.957497 0 0 17552 +unsubscrib 1 6.957497 0 0 17553 +messageshould 1 6.957497 0 0 17554 +helplast 1 6.957497 0 0 17555 +nhall 1 6.957497 0 0 17556 +footnot 1 6.957497 0 0 17557 +odlshor 1 6.957497 0 0 17558 +modelidl 1 6.957497 0 0 17559 +odlar 1 6.957497 0 0 17560 +stabilizesw 1 6.957497 0 0 17561 +residenceoffic 1 6.957497 0 0 17562 +pagealain 1 6.957497 0 0 17563 +carnivor 1 6.957497 0 0 17564 +studentbiotechnolog 1 6.957497 0 0 17565 +traineecomput 1 6.957497 0 0 17566 +shavlikinterest 1 6.957497 0 0 17567 +protein 1 6.957497 0 0 17568 +networkseduc 1 6.957497 0 0 17569 +madisonb 1 6.957497 0 0 17570 +universityb 1 6.957497 0 0 17571 +mankato 1 6.957497 0 0 17572 +marci 1 6.957497 0 0 17573 +maven 1 6.957497 0 0 17574 +erin 1 6.957497 0 0 17575 +occasionali 1 6.957497 0 0 17576 +cvte 1 6.957497 0 0 17577 +deleg 1 6.957497 0 0 17578 +existencei 1 6.957497 0 0 17579 +nail 1 6.957497 0 0 17580 +lafollett 1 6.957497 0 0 17581 +meantim 1 6.957497 0 0 17582 +wacki 1 6.957497 0 0 17583 +eggplant 1 6.957497 0 0 17584 +daddi 1 6.957497 0 0 17585 +titanium 1 6.957497 0 0 17586 +screw 1 6.957497 0 0 17587 +desi 1 6.957497 0 0 17588 +relaford 1 6.957497 0 0 17589 +mulholland 1 6.957497 0 0 17590 +oxygen 1 6.957497 0 0 17591 +dioxid 1 6.957497 0 0 17592 +whack 1 6.957497 0 0 17593 +scaryarea 1 6.957497 0 0 17594 +rabid 1 6.957497 0 0 17595 +interestth 1 6.957497 0 0 17596 +hmmm 1 6.957497 0 0 17597 +handyinformatik 1 6.957497 0 0 17598 +madcat 1 6.957497 0 0 17599 +sportslin 1 6.957497 0 0 17600 +philli 1 6.957497 0 0 17601 +ickyth 1 6.957497 0 0 17602 +kemin 1 6.957497 0 0 17603 +boxsplin 1 6.957497 0 0 17604 +radial 1 6.957497 0 0 17605 +toscatt 1 6.957497 0 0 17606 +multiquadr 1 6.957497 0 0 17607 +plate 1 6.957497 0 0 17608 +splinesthi 1 6.957497 0 0 17609 +linksat 1 6.957497 0 0 17610 +paperaffin 1 6.957497 0 0 17611 +operatorof 1 6.957497 0 0 17612 +zuowei 1 6.957497 0 0 17613 +fromher 1 6.957497 0 0 17614 +directlyfrom 1 6.957497 0 0 17615 +accounther 1 6.957497 0 0 17616 +articlesof 1 6.957497 0 0 17617 +containspostscript 1 6.957497 0 0 17618 +theapproxim 1 6.957497 0 0 17619 +filesconcern 1 6.957497 0 0 17620 +andpubl 1 6.957497 0 0 17621 +therber 1 6.957497 0 0 17622 +therberoffic 1 6.957497 0 0 17623 +sphone 1 6.957497 0 0 17624 +andyt 1 6.957497 0 0 17625 +eduzooresumebookmarksapplet 1 6.957497 0 0 17626 +arvind 1 6.957497 0 0 17627 +ranganathan 1 6.957497 0 0 17628 +workplac 1 6.957497 0 0 17629 +ranga 1 6.957497 0 0 17630 +erstwhil 1 6.957497 0 0 17631 +indiaworld 1 6.957497 0 0 17632 +escher 1 6.957497 0 0 17633 +thusoo 1 6.957497 0 0 17634 +iitd 1 6.957497 0 0 17635 +ashisht 1 6.957497 0 0 17636 +alexandria 1 6.957497 0 0 17637 +pageashraf 1 6.957497 0 0 17638 +aboulnagacomput 1 6.957497 0 0 17639 +infooffic 1 6.957497 0 0 17640 +pageucla 1 6.957497 0 0 17641 +bannon 1 6.957497 0 0 17642 +championship 1 6.957497 0 0 17643 +researchsailinghors 1 6.957497 0 0 17644 +ridingscuba 1 6.957497 0 0 17645 +divingc 1 6.957497 0 0 17646 +algebraicalgorithm 1 6.957497 0 0 17647 +solvealgebra 1 6.957497 0 0 17648 +onetel 1 6.957497 0 0 17649 +possiblefactor 1 6.957497 0 0 17650 +intrins 1 6.957497 0 0 17651 +forreli 1 6.957497 0 0 17652 +iscomposit 1 6.957497 0 0 17653 +auxiliarynumb 1 6.957497 0 0 17654 +witnessbi 1 6.957497 0 0 17655 +followingnatur 1 6.957497 0 0 17656 +accurateheurist 1 6.957497 0 0 17657 +allowsthi 1 6.957497 0 0 17658 +cnta 1 6.957497 0 0 17659 +glaser 1 6.957497 0 0 17660 +tanguai 1 6.957497 0 0 17661 +shallit 1 6.957497 0 0 17662 +fuzz 1 6.957497 0 0 17663 +testingteach 1 6.957497 0 0 17664 +graduatesprofession 1 6.957497 0 0 17665 +monona 1 6.957497 0 0 17666 +terrac 1 6.957497 0 0 17667 +groupperson 1 6.957497 0 0 17668 +photosbart 1 6.957497 0 0 17669 +usaben 1 6.957497 0 0 17670 +edursumquinc 1 6.957497 0 0 17671 +gamezillion 1 6.957497 0 0 17672 +bookmarksspr 1 6.957497 0 0 17673 +dbseminar 1 6.957497 0 0 17674 +osseminar 1 6.957497 0 0 17675 +condormeet 1 6.957497 0 0 17676 +plseminar 1 6.957497 0 0 17677 +zealand 1 6.957497 0 0 17678 +massei 1 6.957497 0 0 17679 +pagegareth 1 6.957497 0 0 17680 +dpl 1 6.957497 0 0 17681 +dacc 1 6.957497 0 0 17682 +nois 1 6.957497 0 0 17683 +tradition 1 6.957497 0 0 17684 +intersect 1 6.957497 0 0 17685 +pagekevin 1 6.957497 0 0 17686 +beyerbey 1 6.957497 0 0 17687 +researchresearch 1 6.957497 0 0 17688 +coursesinstruct 1 6.957497 0 0 17689 +bezenek 1 6.957497 0 0 17690 +pith 1 6.957497 0 0 17691 +toddm 1 6.957497 0 0 17692 +cpu 1 6.957497 0 0 17693 +_great 1 6.957497 0 0 17694 +present_ 1 6.957497 0 0 17695 +uregina 1 6.957497 0 0 17696 +bayko 1 6.957497 0 0 17697 +squeez 1 6.957497 0 0 17698 +skateboard 1 6.957497 0 0 17699 +helen 1 6.957497 0 0 17700 +custer 1 6.957497 0 0 17701 +_insid 1 6.957497 0 0 17702 +pithi 1 6.957497 0 0 17703 +abound 1 6.957497 0 0 17704 +edubezenek 1 6.957497 0 0 17705 +pageback 1 6.957497 0 0 17706 +oraclesend 1 6.957497 0 0 17707 +bolo 1 6.957497 0 0 17708 +uwvax 1 6.957497 0 0 17709 +josef 1 6.957497 0 0 17710 +uucp 1 6.957497 0 0 17711 +essen 1 6.957497 0 0 17712 +hau 1 6.957497 0 0 17713 +bolobologreet 1 6.957497 0 0 17714 +christen 1 6.957497 0 0 17715 +mebolo 1 6.957497 0 0 17716 +bestexplan 1 6.957497 0 0 17717 +bywhat 1 6.957497 0 0 17718 +acomput 1 6.957497 0 0 17719 +shudder 1 6.957497 0 0 17720 +newoper 1 6.957497 0 0 17721 +sameto 1 6.957497 0 0 17722 +myroomm 1 6.957497 0 0 17723 +sublim 1 6.957497 0 0 17724 +thetig 1 6.957497 0 0 17725 +blake 1 6.957497 0 0 17726 +poemtyg 1 6.957497 0 0 17727 +tyger 1 6.957497 0 0 17728 +againin 1 6.957497 0 0 17729 +ahous 1 6.957497 0 0 17730 +isjosef 1 6.957497 0 0 17731 +roadmonona 1 6.957497 0 0 17732 +workwork 1 6.957497 0 0 17733 +banana 1 6.957497 0 0 17734 +grung 1 6.957497 0 0 17735 +perhapssom 1 6.957497 0 0 17736 +othermonth 1 6.957497 0 0 17737 +intosubmiss 1 6.957497 0 0 17738 +andstar 1 6.957497 0 0 17739 +fordav 1 6.957497 0 0 17740 +wiss 1 6.957497 0 0 17741 +themadison 1 6.957497 0 0 17742 +campusof 1 6.957497 0 0 17743 +peninsula 1 6.957497 0 0 17744 +technicalexpertis 1 6.957497 0 0 17745 +newsystem 1 6.957497 0 0 17746 +reviv 1 6.957497 0 0 17747 +oddbal 1 6.957497 0 0 17748 +tasksar 1 6.957497 0 0 17749 +serverbut 1 6.957497 0 0 17750 +mostlyempti 1 6.957497 0 0 17751 +activitiesuwvaxi 1 6.957497 0 0 17752 +svolunt 1 6.957497 0 0 17753 +organizationsi 1 6.957497 0 0 17754 +oftenhav 1 6.957497 0 0 17755 +usersof 1 6.957497 0 0 17756 +aopa 1 6.957497 0 0 17757 +blitz 1 6.957497 0 0 17758 +drinkingwhen 1 6.957497 0 0 17759 +friendsand 1 6.957497 0 0 17760 +loftili 1 6.957497 0 0 17761 +labelledblitz 1 6.957497 0 0 17762 +ofoctoberfest 1 6.957497 0 0 17763 +chud 1 6.957497 0 0 17764 +accumulateda 1 6.957497 0 0 17765 +whatnotof 1 6.957497 0 0 17766 +charad 1 6.957497 0 0 17767 +neglect 1 6.957497 0 0 17768 +seminaranywai 1 6.957497 0 0 17769 +beaucoup 1 6.957497 0 0 17770 +boir 1 6.957497 0 0 17771 +enginefind 1 6.957497 0 0 17772 +wideth 1 6.957497 0 0 17773 +duan 1 6.957497 0 0 17774 +mclaughlin 1 6.957497 0 0 17775 +addresseseducationresearch 1 6.957497 0 0 17776 +associatesaddressesscott 1 6.957497 0 0 17777 +breachdepart 1 6.957497 0 0 17778 +advisorguri 1 6.957497 0 0 17779 +sohiresearch 1 6.957497 0 0 17780 +architecturemultiscalarpublicationsmultiscalar 1 6.957497 0 0 17781 +processorsgurindar 1 6.957497 0 0 17782 +vijaykumarnd 1 6.957497 0 0 17783 +processorscott 1 6.957497 0 0 17784 +sohith 1 6.957497 0 0 17785 +errorstodd 1 6.957497 0 0 17786 +sohiconfer 1 6.957497 0 0 17787 +recreationwingsbeersquidtvassociatestodd 1 6.957497 0 0 17788 +austindoug 1 6.957497 0 0 17789 +burgerbabak 1 6.957497 0 0 17790 +falsafialain 1 6.957497 0 0 17791 +kagit 1 6.957497 0 0 17792 +vijaykumarlast 1 6.957497 0 0 17793 +bleed 1 6.957497 0 0 17794 +nontrivi 1 6.957497 0 0 17795 +waysher 1 6.957497 0 0 17796 +underst 1 6.957497 0 0 17797 +unadorn 1 6.957497 0 0 17798 +pizza 1 6.957497 0 0 17799 +stinkin 1 6.957497 0 0 17800 +myclass 1 6.957497 0 0 17801 +hypersensit 1 6.957497 0 0 17802 +rockjock 1 6.957497 0 0 17803 +cretin 1 6.957497 0 0 17804 +brood 1 6.957497 0 0 17805 +glare 1 6.957497 0 0 17806 +clenchesfist 1 6.957497 0 0 17807 +knuckl 1 6.957497 0 0 17808 +flightyfemm 1 6.957497 0 0 17809 +razz 1 6.957497 0 0 17810 +asskick 1 6.957497 0 0 17811 +thirdgrad 1 6.957497 0 0 17812 +hardbodi 1 6.957497 0 0 17813 +leatherboi 1 6.957497 0 0 17814 +leer 1 6.957497 0 0 17815 +atm 1 6.957497 0 0 17816 +todayi 1 6.957497 0 0 17817 +giggl 1 6.957497 0 0 17818 +aprostitut 1 6.957497 0 0 17819 +bigotri 1 6.957497 0 0 17820 +pedagodi 1 6.957497 0 0 17821 +goat 1 6.957497 0 0 17822 +refus 1 6.957497 0 0 17823 +claw 1 6.957497 0 0 17824 +sssuuuhhh 1 6.957497 0 0 17825 +mmuuuhhhh 1 6.957497 0 0 17826 +dddduuuuuhhhhh 1 6.957497 0 0 17827 +mmmmuuuhhhh 1 6.957497 0 0 17828 +maaaahhhjaaaaaahhhhh 1 6.957497 0 0 17829 +fffuuuhhhhh 1 6.957497 0 0 17830 +yyyyyyyuuuuuhhhhh 1 6.957497 0 0 17831 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 1 6.957497 0 0 17832 +uuuhhh 1 6.957497 0 0 17833 +uuummmm 1 6.957497 0 0 17834 +uuuhhhh 1 6.957497 0 0 17835 +wwwwwhhhhuuuuuhhhhh 1 6.957497 0 0 17836 +princetonunivers 1 6.957497 0 0 17837 +usacao 1 6.957497 0 0 17838 +cachingacf 1 6.957497 0 0 17839 +tracesrec 1 6.957497 0 0 17840 +papersintegr 1 6.957497 0 0 17841 +schedulingpei 1 6.957497 0 0 17842 +strategiespei 1 6.957497 0 0 17843 +peform 1 6.957497 0 0 17844 +tickertaip 1 6.957497 0 0 17845 +swee 1 6.957497 0 0 17846 +boon 1 6.957497 0 0 17847 +shivakumar 1 6.957497 0 0 17848 +venkataraman 1 6.957497 0 0 17849 +talksslid 1 6.957497 0 0 17850 +improvefil 1 6.957497 0 0 17851 +filecach 1 6.957497 0 0 17852 +individualappl 1 6.957497 0 0 17853 +useit 1 6.957497 0 0 17854 +fairglob 1 6.957497 0 0 17855 +cachereplac 1 6.957497 0 0 17856 +implementationon 1 6.957497 0 0 17857 +demonstratedthat 1 6.957497 0 0 17858 +informationcan 1 6.957497 0 0 17859 +amdevelop 1 6.957497 0 0 17860 +diskarrai 1 6.957497 0 0 17861 +managementproblem 1 6.957497 0 0 17862 +garlic 1 6.957497 0 0 17863 +arya 1 6.957497 0 0 17864 +fagin 1 6.957497 0 0 17865 +flickner 1 6.957497 0 0 17866 +petkov 1 6.957497 0 0 17867 +wimmer 1 6.957497 0 0 17868 +careymichael 1 6.957497 0 0 17869 +careyprofessor 1 6.957497 0 0 17870 +performanceand 1 6.957497 0 0 17871 +topicsof 1 6.957497 0 0 17872 +algorithmsrel 1 6.957497 0 0 17873 +userdatabas 1 6.957497 0 0 17874 +persistentobject 1 6.957497 0 0 17875 +objectmanag 1 6.957497 0 0 17876 +applicationssuch 1 6.957497 0 0 17877 +greatyear 1 6.957497 0 0 17878 +tackl 1 6.957497 0 0 17879 +anddiffer 1 6.957497 0 0 17880 +thesourc 1 6.957497 0 0 17881 +projectther 1 6.957497 0 0 17882 +multimediainform 1 6.957497 0 0 17883 +objectdatabas 1 6.957497 0 0 17884 +continuedto 1 6.957497 0 0 17885 +aqueri 1 6.957497 0 0 17886 +pesto 1 6.957497 0 0 17887 +thegarl 1 6.957497 0 0 17888 +kiernan 1 6.957497 0 0 17889 +orientedprogram 1 6.957497 0 0 17890 +tork 1 6.957497 0 0 17891 +visualdatabas 1 6.957497 0 0 17892 +garlicapproach 1 6.957497 0 0 17893 +luniewski 1 6.957497 0 0 17894 +withd 1 6.957497 0 0 17895 +kant 1 6.957497 0 0 17896 +onobject 1 6.957497 0 0 17897 +mehta 1 6.957497 0 0 17898 +thint 1 6.957497 0 0 17899 +smrc 1 6.957497 0 0 17900 +withb 1 6.957497 0 0 17901 +reinwald 1 6.957497 0 0 17902 +desslock 1 6.957497 0 0 17903 +lehman 1 6.957497 0 0 17904 +pirahesh 1 6.957497 0 0 17905 +tarascon 1 6.957497 0 0 17906 +provenc 1 6.957497 0 0 17907 +sigmodint 1 6.957497 0 0 17908 +managementof 1 6.957497 0 0 17909 +multivers 1 6.957497 0 0 17910 +bober 1 6.957497 0 0 17911 +oszu 1 6.957497 0 0 17912 +dayal 1 6.957497 0 0 17913 +valduriez 1 6.957497 0 0 17914 +pagechin 1 6.957497 0 0 17915 +tanggradu 1 6.957497 0 0 17916 +ameduc 1 6.957497 0 0 17917 +nostalgia 1 6.957497 0 0 17918 +linksclick 1 6.957497 0 0 17919 +megradu 1 6.957497 0 0 17920 +designresearch 1 6.957497 0 0 17921 +tunneleduc 1 6.957497 0 0 17922 +publicationscachi 1 6.957497 0 0 17923 +stormwatch 1 6.957497 0 0 17924 +protocolstrishul 1 6.957497 0 0 17925 +olympiadpresid 1 6.957497 0 0 17926 +examinationcertif 1 6.957497 0 0 17927 +chemistrycertif 1 6.957497 0 0 17928 +csashi 1 6.957497 0 0 17929 +curt 1 6.957497 0 0 17930 +ellmann 1 6.957497 0 0 17931 +webgnat 1 6.957497 0 0 17932 +defect 1 6.957497 0 0 17933 +opengi 1 6.957497 0 0 17934 +calmit 1 6.957497 0 0 17935 +illustra 1 6.957497 0 0 17936 +papersmiscellan 1 6.957497 0 0 17937 +sitescampu 1 6.957497 0 0 17938 +wyrm 1 6.957497 0 0 17939 +hoard 1 6.957497 0 0 17940 +wiscnet 1 6.957497 0 0 17941 +netcorpor 1 6.957497 0 0 17942 +paww 1 6.957497 0 0 17943 +taligentsearch 1 6.957497 0 0 17944 +savvi 1 6.957497 0 0 17945 +webcrawl 1 6.957497 0 0 17946 +winsock 1 6.957497 0 0 17947 +geolog 1 6.957497 0 0 17948 +gil 1 6.957497 0 0 17949 +oakridg 1 6.957497 0 0 17950 +datacurt 1 6.957497 0 0 17951 +ellmanncurt 1 6.957497 0 0 17952 +eduparadis 1 6.957497 0 0 17953 +pagechan 1 6.957497 0 0 17954 +sara 1 6.957497 0 0 17955 +bauman 1 6.957497 0 0 17956 +dailei 1 6.957497 0 0 17957 +baumandailei 1 6.957497 0 0 17958 +edugradu 1 6.957497 0 0 17959 +pagessend 1 6.957497 0 0 17960 +daileytu 1 6.957497 0 0 17961 +typhoon 1 6.957497 0 0 17962 +usadavid 1 6.957497 0 0 17963 +toonenrec 1 6.957497 0 0 17964 +rahmat 1 6.957497 0 0 17965 +alvi 1 6.957497 0 0 17966 +informix 1 6.957497 0 0 17967 +memorysteven 1 6.957497 0 0 17968 +communicationshubhendu 1 6.957497 0 0 17969 +costrahmat 1 6.957497 0 0 17970 +multiprocessorsalvin 1 6.957497 0 0 17971 +simulationalvin 1 6.957497 0 0 17972 +sigmetricsmai 1 6.957497 0 0 17973 +thrust 1 6.957497 0 0 17974 +hybridprogram 1 6.957497 0 0 17975 +similaritesof 1 6.957497 0 0 17976 +calledtempest 1 6.957497 0 0 17977 +handler 1 6.957497 0 0 17978 +suppliedmechan 1 6.957497 0 0 17979 +tempestmechan 1 6.957497 0 0 17980 +novelmechan 1 6.957497 0 0 17981 +tagblock 1 6.957497 0 0 17982 +theloc 1 6.957497 0 0 17983 +hardwareplatform 1 6.957497 0 0 17984 +revers 1 6.957497 0 0 17985 +translationt 1 6.957497 0 0 17986 +rtlb 1 6.957497 0 0 17987 +grainaccess 1 6.957497 0 0 17988 +thata 1 6.957497 0 0 17989 +performscompar 1 6.957497 0 0 17990 +memoryprogram 1 6.957497 0 0 17991 +thatoptim 1 6.957497 0 0 17992 +reducingsimul 1 6.957497 0 0 17993 +tightli 1 6.957497 0 0 17994 +byprovid 1 6.957497 0 0 17995 +referenceinvok 1 6.957497 0 0 17996 +andmemori 1 6.957497 0 0 17997 +processedbi 1 6.957497 0 0 17998 +functionfor 1 6.957497 0 0 17999 +usingbinari 1 6.957497 0 0 18000 +memoryrefer 1 6.957497 0 0 18001 +tothre 1 6.957497 0 0 18002 +thatcal 1 6.957497 0 0 18003 +onlythre 1 6.957497 0 0 18004 +slower 1 6.957497 0 0 18005 +techniquesto 1 6.957497 0 0 18006 +pageprofession 1 6.957497 0 0 18007 +summaryresum 1 6.957497 0 0 18008 +cvtranscriptcours 1 6.957497 0 0 18009 +projectsadvisoraffili 1 6.957497 0 0 18010 +sciwisconsin 1 6.957497 0 0 18011 +tunnelpag 1 6.957497 0 0 18012 +architectureuw 1 6.957497 0 0 18013 +architecturesimplescalar 1 6.957497 0 0 18014 +setgenericasacmperson 1 6.957497 0 0 18015 +meus 1 6.957497 0 0 18016 +linksphoto 1 6.957497 0 0 18017 +galleryrid 1 6.957497 0 0 18018 +demonhunt 1 6.957497 0 0 18019 +catsbewar 1 6.957497 0 0 18020 +ditto 1 6.957497 0 0 18021 +nevai 1 6.957497 0 0 18022 +pinku 1 6.957497 0 0 18023 +mathematicsdepart 1 6.957497 0 0 18024 +schoenberg 1 6.957497 0 0 18025 +approx 1 6.957497 0 0 18026 +theclick 1 6.957497 0 0 18027 +ofapproxim 1 6.957497 0 0 18028 +publishedpap 1 6.957497 0 0 18029 +andmuch 1 6.957497 0 0 18030 +foreast 1 6.957497 0 0 18031 +theirtabl 1 6.957497 0 0 18032 +singli 1 6.957497 0 0 18033 +thishandi 1 6.957497 0 0 18034 +alsoapproxim 1 6.957497 0 0 18035 +slist 1 6.957497 0 0 18036 +ila 1 6.957497 0 0 18037 +seeviva_vi 1 6.957497 0 0 18038 +alsoon 1 6.957497 0 0 18039 +thehtml 1 6.957497 0 0 18040 +primermight 1 6.957497 0 0 18041 +ever_chang 1 6.957497 0 0 18042 +griffeath 1 6.957497 0 0 18043 +sprimordi 1 6.957497 0 0 18044 +seeodd 1 6.957497 0 0 18045 +techunix 1 6.957497 0 0 18046 +nevaiif 1 6.957497 0 0 18047 +makehi 1 6.957497 0 0 18048 +outputavail 1 6.957497 0 0 18049 +taki 1 6.957497 0 0 18050 +souganid 1 6.957497 0 0 18051 +andthaleia 1 6.957497 0 0 18052 +zariphopoul 1 6.957497 0 0 18053 +szego 1 6.957497 0 0 18054 +bust 1 6.957497 0 0 18055 +inscript 1 6.957497 0 0 18056 +pagedevis 1 6.957497 0 0 18057 +visualizationt 1 6.957497 0 0 18058 +featuresexamplesin 1 6.957497 0 0 18059 +depthpublicationsrel 1 6.957497 0 0 18060 +workreleasecontactsfeaturesthes 1 6.957497 0 0 18061 +cancontrol 1 6.957497 0 0 18062 +ax 1 6.957497 0 0 18063 +cursor 1 6.957497 0 0 18064 +examplescheck 1 6.957497 0 0 18065 +validationmolecular 1 6.957497 0 0 18066 +soil 1 6.957497 0 0 18067 +clusteringfinanci 1 6.957497 0 0 18068 +explorationfamili 1 6.957497 0 0 18069 +climatedata 1 6.957497 0 0 18070 +centergeograph 1 6.957497 0 0 18071 +systemsoil 1 6.957497 0 0 18072 +sciencefil 1 6.957497 0 0 18073 +serverprogram 1 6.957497 0 0 18074 +tracesclin 1 6.957497 0 0 18075 +moreexampl 1 6.957497 0 0 18076 +depthfor 1 6.957497 0 0 18077 +visualizationvisu 1 6.957497 0 0 18078 +interfaceperform 1 6.957497 0 0 18079 +issuespublicationsmiron 1 6.957497 0 0 18080 +dataexplor 1 6.957497 0 0 18081 +praveenseshadri 1 6.957497 0 0 18082 +sequencequeri 1 6.957497 0 0 18083 +themanag 1 6.957497 0 0 18084 +seqproject 1 6.957497 0 0 18085 +queryrecord 1 6.957497 0 0 18086 +bevisu 1 6.957497 0 0 18087 +informationw 1 6.957497 0 0 18088 +executablesfor 1 6.957497 0 0 18089 +ld_library_path 1 6.957497 0 0 18090 +rundevis 1 6.957497 0 0 18091 +arestat 1 6.957497 0 0 18092 +shareabl 1 6.957497 0 0 18093 +contactsfor 1 6.957497 0 0 18094 +contactmiron 1 6.957497 0 0 18095 +usersupport 1 6.957497 0 0 18096 +romn 1 6.957497 0 0 18097 +databasebenchmark 1 6.957497 0 0 18098 +objectiveof 1 6.957497 0 0 18099 +objectsystem 1 6.957497 0 0 18100 +applicationsinclud 1 6.957497 0 0 18101 +capabilitiesof 1 6.957497 0 0 18102 +typedobject 1 6.957497 0 0 18103 +hierarchicalnam 1 6.957497 0 0 18104 +interfaceto 1 6.957497 0 0 18105 +toeas 1 6.957497 0 0 18106 +systemenviron 1 6.957497 0 0 18107 +ccwill 1 6.957497 0 0 18108 +networksto 1 6.957497 0 0 18109 +ajoint 1 6.957497 0 0 18110 +relationaldatabas 1 6.957497 0 0 18111 +thetask 1 6.957497 0 0 18112 +formanag 1 6.957497 0 0 18113 +modelingne 1 6.957497 0 0 18114 +manipulatingmuch 1 6.957497 0 0 18115 +muchbett 1 6.957497 0 0 18116 +differencefrom 1 6.957497 0 0 18117 +parallelismto 1 6.957497 0 0 18118 +assatellit 1 6.957497 0 0 18119 +withm 1 6.957497 0 0 18120 +persistentappl 1 6.957497 0 0 18121 +chuh 1 6.957497 0 0 18122 +santiego 1 6.957497 0 0 18123 +seal 1 6.957497 0 0 18124 +allmen 1 6.957497 0 0 18125 +kjell 1 6.957497 0 0 18126 +pagecharl 1 6.957497 0 0 18127 +dyerprofessordepart 1 6.957497 0 0 18128 +infoph 1 6.957497 0 0 18129 +visualizationgroup 1 6.957497 0 0 18130 +groupprogram 1 6.957497 0 0 18131 +synthesisth 1 6.957497 0 0 18132 +controllingin 1 6.957497 0 0 18133 +cameraof 1 6.957497 0 0 18134 +videostream 1 6.957497 0 0 18135 +whicha 1 6.957497 0 0 18136 +througha 1 6.957497 0 0 18137 +thesit 1 6.957497 0 0 18138 +predetermin 1 6.957497 0 0 18139 +researchquest 1 6.957497 0 0 18140 +synthesizenew 1 6.957497 0 0 18141 +reconstructiona 1 6.957497 0 0 18142 +innovativetechniqu 1 6.957497 0 0 18143 +callview 1 6.957497 0 0 18144 +basisimag 1 6.957497 0 0 18145 +explorationcomput 1 6.957497 0 0 18146 +controllingcamera 1 6.957497 0 0 18147 +purposefulli 1 6.957497 0 0 18148 +theposit 1 6.957497 0 0 18149 +adjustviewpoint 1 6.957497 0 0 18150 +forsolv 1 6.957497 0 0 18151 +findspecif 1 6.957497 0 0 18152 +unknownshap 1 6.957497 0 0 18153 +appearanceof 1 6.957497 0 0 18154 +computationsrequir 1 6.957497 0 0 18155 +andelimin 1 6.957497 0 0 18156 +thecamera 1 6.957497 0 0 18157 +towardsviewpoint 1 6.957497 0 0 18158 +viewedobject 1 6.957497 0 0 18159 +thisapproach 1 6.957497 0 0 18160 +visualizationin 1 6.957497 0 0 18161 +techniquescap 1 6.957497 0 0 18162 +specificgraph 1 6.957497 0 0 18163 +displayingarbitrari 1 6.957497 0 0 18164 +commonfram 1 6.957497 0 0 18165 +algorithmexecut 1 6.957497 0 0 18166 +dataanalysi 1 6.957497 0 0 18167 +forexperi 1 6.957497 0 0 18168 +visualizingintermedi 1 6.957497 0 0 18169 +forproblem 1 6.957497 0 0 18170 +cloud 1 6.957497 0 0 18171 +azriel 1 6.957497 0 0 18172 +occlud 1 6.957497 0 0 18173 +battaiola 1 6.957497 0 0 18174 +santek 1 6.957497 0 0 18175 +voidrot 1 6.957497 0 0 18176 +martinez 1 6.957497 0 0 18177 +liangyin 1 6.957497 0 0 18178 +yuph 1 6.957497 0 0 18179 +whibbard 1 6.957497 0 0 18180 +onlattic 1 6.957497 0 0 18181 +kiriako 1 6.957497 0 0 18182 +ofobserv 1 6.957497 0 0 18183 +iutech 1 6.957497 0 0 18184 +spatiotempor 1 6.957497 0 0 18185 +brent 1 6.957497 0 0 18186 +dimensionalshap 1 6.957497 0 0 18187 +plantinga 1 6.957497 0 0 18188 +wheaton 1 6.957497 0 0 18189 +representationfor 1 6.957497 0 0 18190 +ccsua 1 6.957497 0 0 18191 +ctstateu 1 6.957497 0 0 18192 +measureslink 1 6.957497 0 0 18193 +interestmi 1 6.957497 0 0 18194 +machinew 1 6.957497 0 0 18195 +arduou 1 6.957497 0 0 18196 +vitaecheck 1 6.957497 0 0 18197 +hazennon 1 6.957497 0 0 18198 +professorroom 1 6.957497 0 0 18199 +fornet 1 6.957497 0 0 18200 +elegantli 1 6.957497 0 0 18201 +fearless 1 6.957497 0 0 18202 +withtech 1 6.957497 0 0 18203 +capitalist 1 6.957497 0 0 18204 +pragmatist 1 6.957497 0 0 18205 +metaphys 1 6.957497 0 0 18206 +makethi 1 6.957497 0 0 18207 +drosophila 1 6.957497 0 0 18208 +geneticist 1 6.957497 0 0 18209 +ezin 1 6.957497 0 0 18210 +shockwav 1 6.957497 0 0 18211 +kudon 1 6.957497 0 0 18212 +quicktimevr 1 6.957497 0 0 18213 +documentari 1 6.957497 0 0 18214 +plight 1 6.957497 0 0 18215 +bosnia 1 6.957497 0 0 18216 +uproot 1 6.957497 0 0 18217 +preslei 1 6.957497 0 0 18218 +meetingsejhazen 1 6.957497 0 0 18219 +pagetina 1 6.957497 0 0 18220 +bldgphone 1 6.957497 0 0 18221 +rotenberg 1 6.957497 0 0 18222 +passsth 1 6.957497 0 0 18223 +budweisth 1 6.957497 0 0 18224 +ericro 1 6.957497 0 0 18225 +smithresearch 1 6.957497 0 0 18226 +mispredict 1 6.957497 0 0 18227 +tolerancepubl 1 6.957497 0 0 18228 +jacobsen 1 6.957497 0 0 18229 +mentorcultresearch 1 6.957497 0 0 18230 +modelseduc 1 6.957497 0 0 18231 +morf 1 6.957497 0 0 18232 +dionosi 1 6.957497 0 0 18233 +hillari 1 6.957497 0 0 18234 +profan 1 6.957497 0 0 18235 +variationalinequ 1 6.957497 0 0 18236 +toproblem 1 6.957497 0 0 18237 +andinterfac 1 6.957497 0 0 18238 +beingconsid 1 6.957497 0 0 18239 +oncarbon 1 6.957497 0 0 18240 +emiss 1 6.957497 0 0 18241 +solvingproblem 1 6.957497 0 0 18242 +partitioningtechniqu 1 6.957497 0 0 18243 +forexploit 1 6.957497 0 0 18244 +underlyingmodel 1 6.957497 0 0 18245 +cpnet 1 6.957497 0 0 18246 +prgram 1 6.957497 0 0 18247 +finton 1 6.957497 0 0 18248 +openstep 1 6.957497 0 0 18249 +nerdin 1 6.957497 0 0 18250 +intelligenceher 1 6.957497 0 0 18251 +softwarefor 1 6.957497 0 0 18252 +trusti 1 6.957497 0 0 18253 +nextstationor 1 6.957497 0 0 18254 +enjoyplai 1 6.957497 0 0 18255 +longhair 1 6.957497 0 0 18256 +intervarsityfolk 1 6.957497 0 0 18257 +supersoak 1 6.957497 0 0 18258 +accountto 1 6.957497 0 0 18259 +intelligenti 1 6.957497 0 0 18260 +intelligencei 1 6.957497 0 0 18261 +actappropri 1 6.957497 0 0 18262 +todistinguish 1 6.957497 0 0 18263 +orimport 1 6.957497 0 0 18264 +basedfeatur 1 6.957497 0 0 18265 +learningprocess 1 6.957497 0 0 18266 +intelligentadapt 1 6.957497 0 0 18267 +whichwil 1 6.957497 0 0 18268 +hotlistthi 1 6.957497 0 0 18269 +omniweb 1 6.957497 0 0 18270 +eleg 1 6.957497 0 0 18271 +omniwebi 1 6.957497 0 0 18272 +responseto 1 6.957497 0 0 18273 +jehovah 1 6.957497 0 0 18274 +deiti 1 6.957497 0 0 18275 +christwisconsin 1 6.957497 0 0 18276 +intervars 1 6.957497 0 0 18277 +weatherin 1 6.957497 0 0 18278 +nebula 1 6.957497 0 0 18279 +crosssearch 1 6.957497 0 0 18280 +farsid 1 6.957497 0 0 18281 +voyagerent 1 6.957497 0 0 18282 +zoneroam 1 6.957497 0 0 18283 +stereogram 1 6.957497 0 0 18284 +nbsp 1 6.957497 0 0 18285 +kurland 1 6.957497 0 0 18286 +proebst 1 6.957497 0 0 18287 +harish 1 6.957497 0 0 18288 +patil 1 6.957497 0 0 18289 +nbspcharl 1 6.957497 0 0 18290 +nbspprofessor 1 6.957497 0 0 18291 +nbspunivers 1 6.957497 0 0 18292 +enormouscap 1 6.957497 0 0 18293 +haveinvestig 1 6.957497 0 0 18294 +registerresid 1 6.957497 0 0 18295 +loadsand 1 6.957497 0 0 18296 +theprocedur 1 6.957497 0 0 18297 +studiedinterprocedur 1 6.957497 0 0 18298 +modelsthat 1 6.957497 0 0 18299 +optimallyalloc 1 6.957497 0 0 18300 +toautomat 1 6.957497 0 0 18301 +orno 1 6.957497 0 0 18302 +slowdown 1 6.957497 0 0 18303 +inacm 1 6.957497 0 0 18304 +activitiesa 1 6.957497 0 0 18305 +cytronand 1 6.957497 0 0 18306 +studentsdonn 1 6.957497 0 0 18307 +rowland 1 6.957497 0 0 18308 +skedzielewski 1 6.957497 0 0 18309 +reevalu 1 6.957497 0 0 18310 +corrector 1 6.957497 0 0 18311 +sensitivepars 1 6.957497 0 0 18312 +mahadevan 1 6.957497 0 0 18313 +ganapathi 1 6.957497 0 0 18314 +vimal 1 6.957497 0 0 18315 +begwami 1 6.957497 0 0 18316 +maunei 1 6.957497 0 0 18317 +anil 1 6.957497 0 0 18318 +winsborough 1 6.957497 0 0 18319 +woest 1 6.957497 0 0 18320 +nagi 1 6.957497 0 0 18321 +contentsgalileoproject 1 6.957497 0 0 18322 +descriptionpublicationsrel 1 6.957497 0 0 18323 +projectssci 1 6.957497 0 0 18324 +wisconsinproject 1 6.957497 0 0 18325 +descriptionpublicationsproject 1 6.957497 0 0 18326 +membersgalileo 1 6.957497 0 0 18327 +wisconsingalileo 1 6.957497 0 0 18328 +therelationship 1 6.957497 0 0 18329 +futuresystem 1 6.957497 0 0 18330 +issuabl 1 6.957497 0 0 18331 +orlimit 1 6.957497 0 0 18332 +capacityon 1 6.957497 0 0 18333 +sizabl 1 6.957497 0 0 18334 +fractionof 1 6.957497 0 0 18335 +mopin 1 6.957497 0 0 18336 +ofprocessor 1 6.957497 0 0 18337 +eventuallyobvi 1 6.957497 0 0 18338 +andlimit 1 6.957497 0 0 18339 +systemsperform 1 6.957497 0 0 18340 +theprocessor 1 6.957497 0 0 18341 +spectrumcach 1 6.957497 0 0 18342 +systemsdesign 1 6.957497 0 0 18343 +systemprogram 1 6.957497 0 0 18344 +bottlenecksdoug 1 6.957497 0 0 18345 +modeldoug 1 6.957497 0 0 18346 +microprocessorsdoug 1 6.957497 0 0 18347 +microprocessorsdougla 1 6.957497 0 0 18348 +berkeleyppram 1 6.957497 0 0 18349 +kyushu 1 6.957497 0 0 18350 +japansci 1 6.957497 0 0 18351 +wisconsinour 1 6.957497 0 0 18352 +coherentshar 1 6.957497 0 0 18353 +coherentinterfac 1 6.957497 0 0 18354 +qolb 1 6.957497 0 0 18355 +pairwis 1 6.957497 0 0 18356 +definitionfor 1 6.957497 0 0 18357 +betweenprocess 1 6.957497 0 0 18358 +structureseffici 1 6.957497 0 0 18359 +extensionsaggress 1 6.957497 0 0 18360 +multiprocessorswisconsin 1 6.957497 0 0 18361 +tunneldougla 1 6.957497 0 0 18362 +scijam 1 6.957497 0 0 18363 +memoryross 1 6.957497 0 0 18364 +aboulenein 1 6.957497 0 0 18365 +ringsross 1 6.957497 0 0 18366 +ringsteven 1 6.957497 0 0 18367 +coherenceross 1 6.957497 0 0 18368 +multiprocessorsphilip 1 6.957497 0 0 18369 +multiprocessorjam 1 6.957497 0 0 18370 +abouleneinross 1 6.957497 0 0 18371 +johnsonstev 1 6.957497 0 0 18372 +scottlast 1 6.957497 0 0 18373 +dburger 1 6.957497 0 0 18374 +madisonin 1 6.957497 0 0 18375 +compsci 1 6.957497 0 0 18376 +pontif 1 6.957497 0 0 18377 +jacqu 1 6.957497 0 0 18378 +derrida 1 6.957497 0 0 18379 +heidegg 1 6.957497 0 0 18380 +camu 1 6.957497 0 0 18381 +sartr 1 6.957497 0 0 18382 +nietzsch 1 6.957497 0 0 18383 +gideon 1 6.957497 0 0 18384 +tweak 1 6.957497 0 0 18385 +toonion 1 6.957497 0 0 18386 +seethi 1 6.957497 0 0 18387 +blockbust 1 6.957497 0 0 18388 +predica 1 6.957497 0 0 18389 +dismal 1 6.957497 0 0 18390 +donationto 1 6.957497 0 0 18391 +defrai 1 6.957497 0 0 18392 +orderscan 1 6.957497 0 0 18393 +monro 1 6.957497 0 0 18394 +usathank 1 6.957497 0 0 18395 +unread 1 6.957497 0 0 18396 +achil 1 6.957497 0 0 18397 +cstechreport 1 6.957497 0 0 18398 +otherstuff 1 6.957497 0 0 18399 +averagewil 1 6.957497 0 0 18400 +doofu 1 6.957497 0 0 18401 +zippi 1 6.957497 0 0 18402 +pinheadha 1 6.957497 0 0 18403 +justtri 1 6.957497 0 0 18404 +mozilla 1 6.957497 0 0 18405 +buttonher 1 6.957497 0 0 18406 +somethingin 1 6.957497 0 0 18407 +hater 1 6.957497 0 0 18408 +mailand 1 6.957497 0 0 18409 +advoc 1 6.957497 0 0 18410 +beef 1 6.957497 0 0 18411 +krazi 1 6.957497 0 0 18412 +wannab 1 6.957497 0 0 18413 +softwareto 1 6.957497 0 0 18414 +teresa 1 6.957497 0 0 18415 +largelyform 1 6.957497 0 0 18416 +snippet 1 6.957497 0 0 18417 +stylishor 1 6.957497 0 0 18418 +summarycontact 1 6.957497 0 0 18419 +addressescalendar 1 6.957497 0 0 18420 +taker 1 6.957497 0 0 18421 +priest 1 6.957497 0 0 18422 +boyn 1 6.957497 0 0 18423 +frost 1 6.957497 0 0 18424 +almighti 1 6.957497 0 0 18425 +dollar 1 6.957497 0 0 18426 +bellow 1 6.957497 0 0 18427 +ranter 1 6.957497 0 0 18428 +preacher 1 6.957497 0 0 18429 +beecher 1 6.957497 0 0 18430 +harbour 1 6.957497 0 0 18431 +deplor 1 6.957497 0 0 18432 +churchmen 1 6.957497 0 0 18433 +notori 1 6.957497 0 0 18434 +atheist 1 6.957497 0 0 18435 +chariti 1 6.957497 0 0 18436 +sailor 1 6.957497 0 0 18437 +chord 1 6.957497 0 0 18438 +firewood 1 6.957497 0 0 18439 +meal 1 6.957497 0 0 18440 +manifesto 1 6.957497 0 0 18441 +handbil 1 6.957497 0 0 18442 +hackeralthough 1 6.957497 0 0 18443 +formerlyhad 1 6.957497 0 0 18444 +fake 1 6.957497 0 0 18445 +andstil 1 6.957497 0 0 18446 +wistfulli 1 6.957497 0 0 18447 +suspend 1 6.957497 0 0 18448 +bald 1 6.957497 0 0 18449 +architectureonc 1 6.957497 0 0 18450 +architecturei 1 6.957497 0 0 18451 +grabbag 1 6.957497 0 0 18452 +antidot 1 6.957497 0 0 18453 +afford 1 6.957497 0 0 18454 +diskspac 1 6.957497 0 0 18455 +provideror 1 6.957497 0 0 18456 +architectureon 1 6.957497 0 0 18457 +datasheet 1 6.957497 0 0 18458 +netscapebookmarksstockscod 1 6.957497 0 0 18459 +standardsroi 1 6.957497 0 0 18460 +standardsi 1 6.957497 0 0 18461 +enfopris 1 6.957497 0 0 18462 +writingto 1 6.957497 0 0 18463 +longstand 1 6.957497 0 0 18464 +configurationmanag 1 6.957497 0 0 18465 +scc 1 6.957497 0 0 18466 +box 1 6.957497 0 0 18467 +hardlink 1 6.957497 0 0 18468 +deprec 1 6.957497 0 0 18469 +livelock 1 6.957497 0 0 18470 +insist 1 6.957497 0 0 18471 +checkinsso 1 6.957497 0 0 18472 +approachin 1 6.957497 0 0 18473 +fetterman 1 6.957497 0 0 18474 +deserv 1 6.957497 0 0 18475 +wisconsinhow 1 6.957497 0 0 18476 +programat 1 6.957497 0 0 18477 +cmtool 1 6.957497 0 0 18478 +ical 1 6.957497 0 0 18479 +anyof 1 6.957497 0 0 18480 +manuallyadd 1 6.957497 0 0 18481 +intelat 1 6.957497 0 0 18482 +devout 1 6.957497 0 0 18483 +ontim 1 6.957497 0 0 18484 +meetingswith 1 6.957497 0 0 18485 +reserveth 1 6.957497 0 0 18486 +blindli 1 6.957497 0 0 18487 +proposeif 1 6.957497 0 0 18488 +overallschedul 1 6.957497 0 0 18489 +secretariesand 1 6.957497 0 0 18490 +usaresearch 1 6.957497 0 0 18491 +pic 1 6.957497 0 0 18492 +wyom 1 6.957497 0 0 18493 +satelit 1 6.957497 0 0 18494 +handwrit 1 6.957497 0 0 18495 +schwab 1 6.957497 0 0 18496 +tgif 1 6.957497 0 0 18497 +notesclass 1 6.957497 0 0 18498 +aboutsearch 1 6.957497 0 0 18499 +ohioc 1 6.957497 0 0 18500 +cygnu 1 6.957497 0 0 18501 +mumit 1 6.957497 0 0 18502 +newbi 1 6.957497 0 0 18503 +guideplatform 1 6.957497 0 0 18504 +kit 1 6.957497 0 0 18505 +amulet 1 6.957497 0 0 18506 +dclap 1 6.957497 0 0 18507 +wxwindow 1 6.957497 0 0 18508 +yacl 1 6.957497 0 0 18509 +projectclass 1 6.957497 0 0 18510 +projectmisc 1 6.957497 0 0 18511 +cygwin 1 6.957497 0 0 18512 +gimp 1 6.957497 0 0 18513 +harmonai 1 6.957497 0 0 18514 +vasc 1 6.957497 0 0 18515 +jaida 1 6.957497 0 0 18516 +seamless 1 6.957497 0 0 18517 +meteor 1 6.957497 0 0 18518 +antarctica 1 6.957497 0 0 18519 +niae 1 6.957497 0 0 18520 +vistex 1 6.957497 0 0 18521 +databaseartifici 1 6.957497 0 0 18522 +primoridi 1 6.957497 0 0 18523 +dermatolog 1 6.957497 0 0 18524 +erlang 1 6.957497 0 0 18525 +orthopaed 1 6.957497 0 0 18526 +ecvnet 1 6.957497 0 0 18527 +nici 1 6.957497 0 0 18528 +groupimag 1 6.957497 0 0 18529 +raytrac 1 6.957497 0 0 18530 +rayshad 1 6.957497 0 0 18531 +avalon 1 6.957497 0 0 18532 +grimstead 1 6.957497 0 0 18533 +dsite 1 6.957497 0 0 18534 +intergraph 1 6.957497 0 0 18535 +glint 1 6.957497 0 0 18536 +chipset 1 6.957497 0 0 18537 +nvidia 1 6.957497 0 0 18538 +chipsetcomput 1 6.957497 0 0 18539 +geometeri 1 6.957497 0 0 18540 +geometrylispuseless 1 6.957497 0 0 18541 +pagescomput 1 6.957497 0 0 18542 +superdlx 1 6.957497 0 0 18543 +parl 1 6.957497 0 0 18544 +washingt 1 6.957497 0 0 18545 +groupjapanes 1 6.957497 0 0 18546 +unvers 1 6.957497 0 0 18547 +infowav 1 6.957497 0 0 18548 +edict 1 6.957497 0 0 18549 +shodouka 1 6.957497 0 0 18550 +asiasoftinform 1 6.957497 0 0 18551 +retrev 1 6.957497 0 0 18552 +peregrin 1 6.957497 0 0 18553 +infomin 1 6.957497 0 0 18554 +other_sw 1 6.957497 0 0 18555 +info_retriev 1 6.957497 0 0 18556 +jedi 1 6.957497 0 0 18557 +hartlib 1 6.957497 0 0 18558 +stemmer 1 6.957497 0 0 18559 +twainhumor 1 6.957497 0 0 18560 +threw 1 6.957497 0 0 18561 +investorweb 1 6.957497 0 0 18562 +networth 1 6.957497 0 0 18563 +fundscap 1 6.957497 0 0 18564 +stockmastermutu 1 6.957497 0 0 18565 +brokerag 1 6.957497 0 0 18566 +fidel 1 6.957497 0 0 18567 +vanguard 1 6.957497 0 0 18568 +gabelli 1 6.957497 0 0 18569 +mutualsmisc 1 6.957497 0 0 18570 +psnuplast 1 6.957497 0 0 18571 +gopalsridhar 1 6.957497 0 0 18572 +gopalgsri 1 6.957497 0 0 18573 +edubon 1 6.957497 0 0 18574 +marrow 1 6.957497 0 0 18575 +pageresumest 1 6.957497 0 0 18576 +pagecalvin 1 6.957497 0 0 18577 +hobbesbookmark 1 6.957497 0 0 18578 +gviswana 1 6.957497 0 0 18579 +parallelappl 1 6.957497 0 0 18580 +harit 1 6.957497 0 0 18581 +mvsr 1 6.957497 0 0 18582 +murthi 1 6.957497 0 0 18583 +zubber 1 6.957497 0 0 18584 +claud 1 6.957497 0 0 18585 +welcomethank 1 6.957497 0 0 18586 +bestbet 1 6.957497 0 0 18587 +onmai 1 6.957497 0 0 18588 +nichol 1 6.957497 0 0 18589 +discours 1 6.957497 0 0 18590 +barwis 1 6.957497 0 0 18591 +epigram 1 6.957497 0 0 18592 +perli 1 6.957497 0 0 18593 +laud 1 6.957497 0 0 18594 +truman 1 6.957497 0 0 18595 +missouri 1 6.957497 0 0 18596 +poop 1 6.957497 0 0 18597 +unabash 1 6.957497 0 0 18598 +psychot 1 6.957497 0 0 18599 +alogirthm 1 6.957497 0 0 18600 +sanjai 1 6.957497 0 0 18601 +reznik 1 6.957497 0 0 18602 +samantha 1 6.957497 0 0 18603 +hogenson 1 6.957497 0 0 18604 +myschedul 1 6.957497 0 0 18605 +workout 1 6.957497 0 0 18606 +tryto 1 6.957497 0 0 18607 +ghana 1 6.957497 0 0 18608 +usernam 1 6.957497 0 0 18609 +pnhp 1 6.957497 0 0 18610 +eilun 1 6.957497 0 0 18611 +accessedtim 1 6.957497 0 0 18612 +swanton 1 6.957497 0 0 18613 +familyemploymenteducationresearchgenealog 1 6.957497 0 0 18614 +horwitzsusan 1 6.957497 0 0 18615 +horwitzprofessorcomput 1 6.957497 0 0 18616 +environmentsprogram 1 6.957497 0 0 18617 +mergingstat 1 6.957497 0 0 18618 +programsinterprocedur 1 6.957497 0 0 18619 +analysisresearch 1 6.957497 0 0 18620 +affectedbi 1 6.957497 0 0 18621 +betweentwo 1 6.957497 0 0 18622 +retest 1 6.957497 0 0 18623 +certainsemant 1 6.957497 0 0 18624 +concentratedeith 1 6.957497 0 0 18625 +newalgorithm 1 6.957497 0 0 18626 +publicationsm 1 6.957497 0 0 18627 +constantpropag 1 6.957497 0 0 18628 +bate 1 6.957497 0 0 18629 +pagesid 1 6.957497 0 0 18630 +hummertoffic 1 6.957497 0 0 18631 +pageigorivanisev 1 6.957497 0 0 18632 +generalgradu 1 6.957497 0 0 18633 +departmentwa 1 6.957497 0 0 18634 +departmentaddress 1 6.957497 0 0 18635 +iigor 1 6.957497 0 0 18636 +eduiigor 1 6.957497 0 0 18637 +pageireland 1 6.957497 0 0 18638 +madisonmadison 1 6.957497 0 0 18639 +sciencestelephon 1 6.957497 0 0 18640 +pmsection 1 6.957497 0 0 18641 +pmboth 1 6.957497 0 0 18642 +sciencesc 1 6.957497 0 0 18643 +announcementshandoutsmoth 1 6.957497 0 0 18644 +basneyjim 1 6.957497 0 0 18645 +basneygradu 1 6.957497 0 0 18646 +jbasnei 1 6.957497 0 0 18647 +directionof 1 6.957497 0 0 18648 +fromoberlin 1 6.957497 0 0 18649 +oberlin 1 6.957497 0 0 18650 +codefrom 1 6.957497 0 0 18651 +jerel 1 6.957497 0 0 18652 +pagejerel 1 6.957497 0 0 18653 +specialti 1 6.957497 0 0 18654 +violin 1 6.957497 0 0 18655 +baroqu 1 6.957497 0 0 18656 +shock 1 6.957497 0 0 18657 +funni 1 6.957497 0 0 18658 +abba 1 6.957497 0 0 18659 +shoot 1 6.957497 0 0 18660 +jerellast 1 6.957497 0 0 18661 +larson 1 6.957497 0 0 18662 +roomat 1 6.957497 0 0 18663 +censorship 1 6.957497 0 0 18664 +disembody 1 6.957497 0 0 18665 +millisecond 1 6.957497 0 0 18666 +overriden 1 6.957497 0 0 18667 +aquir 1 6.957497 0 0 18668 +skellington 1 6.957497 0 0 18669 +thath 1 6.957497 0 0 18670 +forgotten 1 6.957497 0 0 18671 +cult 1 6.957497 0 0 18672 +hippothi 1 6.957497 0 0 18673 +matriarch 1 6.957497 0 0 18674 +yahooooooooooooo 1 6.957497 0 0 18675 +bazillion 1 6.957497 0 0 18676 +muppet 1 6.957497 0 0 18677 +rachel 1 6.957497 0 0 18678 +bing 1 6.957497 0 0 18679 +jieb 1 6.957497 0 0 18680 +systemsresearch 1 6.957497 0 0 18681 +shorepublicationsbuild 1 6.957497 0 0 18682 +scaleabl 1 6.957497 0 0 18683 +implment 1 6.957497 0 0 18684 +lueder 1 6.957497 0 0 18685 +ellman 1 6.957497 0 0 18686 +kupsch 1 6.957497 0 0 18687 +prong 1 6.957497 0 0 18688 +tile 1 6.957497 0 0 18689 +goddard 1 6.957497 0 0 18690 +reclam 1 6.957497 0 0 18691 +reorgan 1 6.957497 0 0 18692 +serverpersist 1 6.957497 0 0 18693 +grouphobbi 1 6.957497 0 0 18694 +volleyballweb 1 6.957497 0 0 18695 +whitewat 1 6.957497 0 0 18696 +jignesh 1 6.957497 0 0 18697 +madhuri 1 6.957497 0 0 18698 +kashmir 1 6.957497 0 0 18699 +gehrk 1 6.957497 0 0 18700 +homepagejohann 1 6.957497 0 0 18701 +gehrkewelcom 1 6.957497 0 0 18702 +raghuramakrishnan 1 6.957497 0 0 18703 +stoica 1 6.957497 0 0 18704 +abdel 1 6.957497 0 0 18705 +wahab 1 6.957497 0 0 18706 +algorithmfor 1 6.957497 0 0 18707 +anexpand 1 6.957497 0 0 18708 +fastschedul 1 6.957497 0 0 18709 +processingsymposium 1 6.957497 0 0 18710 +powerbook 1 6.957497 0 0 18711 +amass 1 6.957497 0 0 18712 +catagori 1 6.957497 0 0 18713 +needsth 1 6.957497 0 0 18714 +coverageth 1 6.957497 0 0 18715 +operaish 1 6.957497 0 0 18716 +drivelziffnet 1 6.957497 0 0 18717 +newsc 1 6.957497 0 0 18718 +classworktodai 1 6.957497 0 0 18719 +chucklejon 1 6.957497 0 0 18720 +frombeij 1 6.957497 0 0 18721 +capitol 1 6.957497 0 0 18722 +specil 1 6.957497 0 0 18723 +chinacurr 1 6.957497 0 0 18724 +tele 1 6.957497 0 0 18725 +stuffjava 1 6.957497 0 0 18726 +placeshor 1 6.957497 0 0 18727 +tutorialchina 1 6.957497 0 0 18728 +affairchina 1 6.957497 0 0 18729 +democracybeij 1 6.957497 0 0 18730 +groupstanford 1 6.957497 0 0 18731 +informaticsmit 1 6.957497 0 0 18732 +processingjob 1 6.957497 0 0 18733 +newsyou 1 6.957497 0 0 18734 +mercuri 1 6.957497 0 0 18735 +andnando 1 6.957497 0 0 18736 +shabel 1 6.957497 0 0 18737 +pagech 1 6.957497 0 0 18738 +wisconsinch 1 6.957497 0 0 18739 +informationmajor 1 6.957497 0 0 18740 +monta 1 6.957497 0 0 18741 +warrior 1 6.957497 0 0 18742 +shark 1 6.957497 0 0 18743 +oakland 1 6.957497 0 0 18744 +newsmus 1 6.957497 0 0 18745 +jshabel 1 6.957497 0 0 18746 +storageto 1 6.957497 0 0 18747 +andtap 1 6.957497 0 0 18748 +yoav 1 6.957497 0 0 18749 +weiss 1 6.957497 0 0 18750 +scsi 1 6.957497 0 0 18751 +myllymakijussi 1 6.957497 0 0 18752 +summaryi 1 6.957497 0 0 18753 +onadvanc 1 6.957497 0 0 18754 +mcurrent 1 6.957497 0 0 18755 +deviseproject 1 6.957497 0 0 18756 +mironlivni 1 6.957497 0 0 18757 +joinsof 1 6.957497 0 0 18758 +listbelow 1 6.957497 0 0 18759 +andfunct 1 6.957497 0 0 18760 +datavisu 1 6.957497 0 0 18761 +managementissu 1 6.957497 0 0 18762 +publicationseffici 1 6.957497 0 0 18763 +programperform 1 6.957497 0 0 18764 +bartonp 1 6.957497 0 0 18765 +tertiarystorag 1 6.957497 0 0 18766 +withmiron 1 6.957497 0 0 18767 +acmsigmetr 1 6.957497 0 0 18768 +publicationdevis 1 6.957497 0 0 18769 +donjerkov 1 6.957497 0 0 18770 +andmiron 1 6.957497 0 0 18771 +publicationsdisk 1 6.957497 0 0 18772 +tapeaccess 1 6.957497 0 0 18773 +degreeproject 1 6.957497 0 0 18774 +networkarchitectur 1 6.957497 0 0 18775 +finnish 1 6.957497 0 0 18776 +documentsimplement 1 6.957497 0 0 18777 +treealgorithm 1 6.957497 0 0 18778 +productsoverview 1 6.957497 0 0 18779 +supplier 1 6.957497 0 0 18780 +productssom 1 6.957497 0 0 18781 +adaptec 1 6.957497 0 0 18782 +workstationsandpcsandtechn 1 6.957497 0 0 18783 +journaland 1 6.957497 0 0 18784 +whitepap 1 6.957497 0 0 18785 +researchandcyberjourn 1 6.957497 0 0 18786 +tapeanddlt 1 6.957497 0 0 18787 +faqandwhitepap 1 6.957497 0 0 18788 +solarisandsparcstationsandtechn 1 6.957497 0 0 18789 +faqandstorag 1 6.957497 0 0 18790 +faqand 1 6.957497 0 0 18791 +otherusenet 1 6.957497 0 0 18792 +faqsmani 1 6.957497 0 0 18793 +jyothithi 1 6.957497 0 0 18794 +dissappoint 1 6.957497 0 0 18795 +karavaniceveryth 1 6.957497 0 0 18796 +karavanicresearch 1 6.957497 0 0 18797 +databasesask 1 6.957497 0 0 18798 +studentstrio 1 6.957497 0 0 18799 +safer 1 6.957497 0 0 18800 +chocol 1 6.957497 0 0 18801 +onlystuyves 1 6.957497 0 0 18802 +associationstuyves 1 6.957497 0 0 18803 +legisl 1 6.957497 0 0 18804 +internetth 1 6.957497 0 0 18805 +cure 1 6.957497 0 0 18806 +sweat 1 6.957497 0 0 18807 +isak 1 6.957497 0 0 18808 +dinesen 1 6.957497 0 0 18809 +admir 1 6.957497 0 0 18810 +grace 1 6.957497 0 0 18811 +hopper 1 6.957497 0 0 18812 +pioneer 1 6.957497 0 0 18813 +kiloprocessor 1 6.957497 0 0 18814 +glow 1 6.957497 0 0 18815 +papakonstantin 1 6.957497 0 0 18816 +tsanaka 1 6.957497 0 0 18817 +sciresearch 1 6.957497 0 0 18818 +collaborationwith 1 6.957497 0 0 18819 +incolabor 1 6.957497 0 0 18820 +goodmanto 1 6.957497 0 0 18821 +kaxirasto 1 6.957497 0 0 18822 +goodmannd 1 6.957497 0 0 18823 +goodmanst 1 6.957497 0 0 18824 +kaxirasunivers 1 6.957497 0 0 18825 +stafylopati 1 6.957497 0 0 18826 +kaxirasinform 1 6.957497 0 0 18827 +pekmestzi 1 6.957497 0 0 18828 +kaxirasp 1 6.957497 0 0 18829 +kaxirasmicroprocess 1 6.957497 0 0 18830 +hedgehog 1 6.957497 0 0 18831 +pager 1 6.957497 0 0 18832 +foughtthei 1 6.957497 0 0 18833 +bitmap 1 6.957497 0 0 18834 +theblind 1 6.957497 0 0 18835 +whateverbrows 1 6.957497 0 0 18836 +literari 1 6.957497 0 0 18837 +satir 1 6.957497 0 0 18838 +butnoth 1 6.957497 0 0 18839 +herein 1 6.957497 0 0 18840 +areoffend 1 6.957497 0 0 18841 +firsttwo 1 6.957497 0 0 18842 +addup 1 6.957497 0 0 18843 +fizzl 1 6.957497 0 0 18844 +areobtain 1 6.957497 0 0 18845 +creatingkiosk 1 6.957497 0 0 18846 +thosewho 1 6.957497 0 0 18847 +mybe 1 6.957497 0 0 18848 +thoughtson 1 6.957497 0 0 18849 +wantto 1 6.957497 0 0 18850 +todo 1 6.957497 0 0 18851 +sporad 1 6.957497 0 0 18852 +danenet 1 6.957497 0 0 18853 +dilhr 1 6.957497 0 0 18854 +jobnet 1 6.957497 0 0 18855 +photonet 1 6.957497 0 0 18856 +databaseus 1 6.957497 0 0 18857 +freez 1 6.957497 0 0 18858 +fought 1 6.957497 0 0 18859 +sfuai 1 6.957497 0 0 18860 +assigna 1 6.957497 0 0 18861 +contextu 1 6.957497 0 0 18862 +distil 1 6.957497 0 0 18863 +rsuminto 1 6.957497 0 0 18864 +pinch 1 6.957497 0 0 18865 +certaintruth 1 6.957497 0 0 18866 +eventuallypick 1 6.957497 0 0 18867 +mull 1 6.957497 0 0 18868 +accessibleto 1 6.957497 0 0 18869 +tough 1 6.957497 0 0 18870 +kunchithapadamkrishna 1 6.957497 0 0 18871 +kunchithapadamgreet 1 6.957497 0 0 18872 +miscellaneouspubl 1 6.957497 0 0 18873 +toolsresum 1 6.957497 0 0 18874 +bykk 1 6.957497 0 0 18875 +pagekristin 1 6.957497 0 0 18876 +eduadvisor 1 6.957497 0 0 18877 +serveruw 1 6.957497 0 0 18878 +groupacm 1 6.957497 0 0 18879 +pageeo 1 6.957497 0 0 18880 +officelast 1 6.957497 0 0 18881 +tuftekristin 1 6.957497 0 0 18882 +krung 1 6.957497 0 0 18883 +homepageupd 1 6.957497 0 0 18884 +underconstructioni 1 6.957497 0 0 18885 +serf 1 6.957497 0 0 18886 +cometh 1 6.957497 0 0 18887 +linkedth 1 6.957497 0 0 18888 +sinapiromsaran 1 6.957497 0 0 18889 +emailkrung 1 6.957497 0 0 18890 +hart 1 6.957497 0 0 18891 +axiom 1 6.957497 0 0 18892 +fundamenta 1 6.957497 0 0 18893 +quasigroup 1 6.957497 0 0 18894 +professormath 1 6.957497 0 0 18895 +resolutionto 1 6.957497 0 0 18896 +likeprolog 1 6.957497 0 0 18897 +prologus 1 6.957497 0 0 18898 +incompat 1 6.957497 0 0 18899 +betweenleast 1 6.957497 0 0 18900 +backtrack 1 6.957497 0 0 18901 +thissubject 1 6.957497 0 0 18902 +usualaxiom 1 6.957497 0 0 18903 +ramsei 1 6.957497 0 0 18904 +corson 1 6.957497 0 0 18905 +moufang 1 6.957497 0 0 18906 +conjugaci 1 6.957497 0 0 18907 +moschovaki 1 6.957497 0 0 18908 +usalaru 1 6.957497 0 0 18909 +structuresc 1 6.957497 0 0 18910 +spim 1 6.957497 0 0 18911 +wartsrec 1 6.957497 0 0 18912 +paperseffici 1 6.957497 0 0 18913 +teapot 1 6.957497 0 0 18914 +andjam 1 6.957497 0 0 18915 +annerog 1 6.957497 0 0 18916 +practiceof 1 6.957497 0 0 18917 +languagesdesign 1 6.957497 0 0 18918 +youfeng 1 6.957497 0 0 18919 +jameslaru 1 6.957497 0 0 18920 +cachier 1 6.957497 0 0 18921 +graduatesbrad 1 6.957497 0 0 18922 +vassar 1 6.957497 0 0 18923 +languagesfirst 1 6.957497 0 0 18924 +huelsbergen 1 6.957497 0 0 18925 +tball 1 6.957497 0 0 18926 +havehelp 1 6.957497 0 0 18927 +coherencepolici 1 6.957497 0 0 18928 +programmersunderstand 1 6.957497 0 0 18929 +hasidentifi 1 6.957497 0 0 18930 +pagenick 1 6.957497 0 0 18931 +pageoffic 1 6.957497 0 0 18932 +wednessdai 1 6.957497 0 0 18933 +lederman 1 6.957497 0 0 18934 +huss 1 6.957497 0 0 18935 +mpistandard 1 6.957497 0 0 18936 +iscov 1 6.957497 0 0 18937 +prismproject 1 6.957497 0 0 18938 +invol 1 6.957497 0 0 18939 +ongoingwork 1 6.957497 0 0 18940 +compressedtar 1 6.957497 0 0 18941 +desper 1 6.957497 0 0 18942 +faber 1 6.957497 0 0 18943 +electronicmail 1 6.957497 0 0 18944 +participatingin 1 6.957497 0 0 18945 +gigabit 1 6.957497 0 0 18946 +involvesth 1 6.957497 0 0 18947 +atgigabit 1 6.957497 0 0 18948 +onissu 1 6.957497 0 0 18949 +visualizationof 1 6.957497 0 0 18950 +establishmentmethod 1 6.957497 0 0 18951 +olsen 1 6.957497 0 0 18952 +witht 1 6.957497 0 0 18953 +sigcommconfer 1 6.957497 0 0 18954 +coursesconnect 1 6.957497 0 0 18955 +shannon 1 6.957497 0 0 18956 +xsoft 1 6.957497 0 0 18957 +lexdemo 1 6.957497 0 0 18958 +luka 1 6.957497 0 0 18959 +lone 1 6.957497 0 0 18960 +checkbox 1 6.957497 0 0 18961 +pagechristoph 1 6.957497 0 0 18962 +lukasrelev 1 6.957497 0 0 18963 +mspl 1 6.957497 0 0 18964 +workshipi 1 6.957497 0 0 18965 +quest 1 6.957497 0 0 18966 +sunivers 1 6.957497 0 0 18967 +prisonerthi 1 6.957497 0 0 18968 +quoteserv 1 6.957497 0 0 18969 +fabul 1 6.957497 0 0 18970 +pagebet 1 6.957497 0 0 18971 +identitycaptain 1 6.957497 0 0 18972 +throughamaz 1 6.957497 0 0 18973 +withtri 1 6.957497 0 0 18974 +teri 1 6.957497 0 0 18975 +incred 1 6.957497 0 0 18976 +catthi 1 6.957497 0 0 18977 +buttmunchextrem 1 6.957497 0 0 18978 +dudemichael 1 6.957497 0 0 18979 +nesmith 1 6.957497 0 0 18980 +fanfoolmyth 1 6.957497 0 0 18981 +figurewick 1 6.957497 0 0 18982 +playervalu 1 6.957497 0 0 18983 +studentment 1 6.957497 0 0 18984 +defectivea 1 6.957497 0 0 18985 +wkrp 1 6.957497 0 0 18986 +cincinatti 1 6.957497 0 0 18987 +figuregeek 1 6.957497 0 0 18988 +tradesgonzo 1 6.957497 0 0 18989 +admirernetscap 1 6.957497 0 0 18990 +pornpersonifi 1 6.957497 0 0 18991 +condom 1 6.957497 0 0 18992 +stretch 1 6.957497 0 0 18993 +blowflam 1 6.957497 0 0 18994 +testicl 1 6.957497 0 0 18995 +goodpoetri 1 6.957497 0 0 18996 +guruhogwildthi 1 6.957497 0 0 18997 +assman 1 6.957497 0 0 18998 +manbig 1 6.957497 0 0 18999 +dudeuh 1 6.957497 0 0 19000 +ohprofession 1 6.957497 0 0 19001 +muff 1 6.957497 0 0 19002 +diverregress 1 6.957497 0 0 19003 +lifeformherald 1 6.957497 0 0 19004 +invas 1 6.957497 0 0 19005 +forcechri 1 6.957497 0 0 19006 +formsalienherpetophiletodd 1 6.957497 0 0 19007 +hatth 1 6.957497 0 0 19008 +mancreepi 1 6.957497 0 0 19009 +headsmal 1 6.957497 0 0 19010 +planetdr 1 6.957497 0 0 19011 +companioneast 1 6.957497 0 0 19012 +bunnycyberweenietcl 1 6.957497 0 0 19013 +hellbeast 1 6.957497 0 0 19014 +drug 1 6.957497 0 0 19015 +cosmo 1 6.957497 0 0 19016 +irrit 1 6.957497 0 0 19017 +scatolog 1 6.957497 0 0 19018 +pervert 1 6.957497 0 0 19019 +etymolog 1 6.957497 0 0 19020 +phat 1 6.957497 0 0 19021 +gnarli 1 6.957497 0 0 19022 +cybermuffin 1 6.957497 0 0 19023 +erotica 1 6.957497 0 0 19024 +zheng 1 6.957497 0 0 19025 +lzheng 1 6.957497 0 0 19026 +boss 1 6.957497 0 0 19027 +prese 1 6.957497 0 0 19028 +winsconsin 1 6.957497 0 0 19029 +goncalv 1 6.957497 0 0 19030 +hereif 1 6.957497 0 0 19031 +sthe 1 6.957497 0 0 19032 +schoolssend 1 6.957497 0 0 19033 +manuvir 1 6.957497 0 0 19034 +pagemanuvir 1 6.957497 0 0 19035 +dasnow 1 6.957497 0 0 19036 +andwhat 1 6.957497 0 0 19037 +feelfre 1 6.957497 0 0 19038 +somethingsend 1 6.957497 0 0 19039 +anact 1 6.957497 0 0 19040 +manuvirwhat 1 6.957497 0 0 19041 +thisto 1 6.957497 0 0 19042 +theorigin 1 6.957497 0 0 19043 +consin 1 6.957497 0 0 19044 +sarita 1 6.957497 0 0 19045 +kessler 1 6.957497 0 0 19046 +subblock 1 6.957497 0 0 19047 +sampler 1 6.957497 0 0 19048 +madhu 1 6.957497 0 0 19049 +tlb 1 6.957497 0 0 19050 +pagemark 1 6.957497 0 0 19051 +andsummari 1 6.957497 0 0 19052 +graduateslink 1 6.957497 0 0 19053 +oralpresent 1 6.957497 0 0 19054 +forcach 1 6.957497 0 0 19055 +usamarkhil 1 6.957497 0 0 19056 +icatalog 1 6.957497 0 0 19057 +teachc 1 6.957497 0 0 19058 +iieduc 1 6.957497 0 0 19059 +evaluationresearch 1 6.957497 0 0 19060 +multiprocessorsand 1 6.957497 0 0 19061 +evaluationtechniqu 1 6.957497 0 0 19062 +windtunnel 1 6.957497 0 0 19063 +manystud 1 6.957497 0 0 19064 +computerswil 1 6.957497 0 0 19065 +levelparallel 1 6.957497 0 0 19066 +inwhich 1 6.957497 0 0 19067 +recentlypropos 1 6.957497 0 0 19068 +aclust 1 6.957497 0 0 19069 +toolsto 1 6.957497 0 0 19070 +cull 1 6.957497 0 0 19071 +designairplan 1 6.957497 0 0 19072 +talluritarget 1 6.957497 0 0 19073 +lookasid 1 6.957497 0 0 19074 +superpagesand 1 6.957497 0 0 19075 +asplosandsosppap 1 6.957497 0 0 19076 +papersth 1 6.957497 0 0 19077 +bidirect 1 6.957497 0 0 19078 +pad 1 6.957497 0 0 19079 +yousef 1 6.957497 0 0 19080 +khalidi 1 6.957497 0 0 19081 +microstructur 1 6.957497 0 0 19082 +electrostat 1 6.957497 0 0 19083 +traenkl 1 6.957497 0 0 19084 +sangta 1 6.957497 0 0 19085 +tpd 1 6.957497 0 0 19086 +farid 1 6.957497 0 0 19087 +pour 1 6.957497 0 0 19088 +palacharla 1 6.957497 0 0 19089 +kourosh 1 6.957497 0 0 19090 +gharachorloo 1 6.957497 0 0 19091 +netzer 1 6.957497 0 0 19092 +vikram 1 6.957497 0 0 19093 +kessleracm 1 6.957497 0 0 19094 +graduatesmadhusudhan 1 6.957497 0 0 19095 +updatedw 1 6.957497 0 0 19096 +marko 1 6.957497 0 0 19097 +alltraxx 1 6.957497 0 0 19098 +shoringup 1 6.957497 0 0 19099 +atrac 1 6.957497 0 0 19100 +towardseffect 1 6.957497 0 0 19101 +tautolog 1 6.957497 0 0 19102 +fond 1 6.957497 0 0 19103 +repuls 1 6.957497 0 0 19104 +ponder 1 6.957497 0 0 19105 +jacki 1 6.957497 0 0 19106 +dimasi 1 6.957497 0 0 19107 +twisti 1 6.957497 0 0 19108 +amanda 1 6.957497 0 0 19109 +peet 1 6.957497 0 0 19110 +retreather 1 6.957497 0 0 19111 +thepul 1 6.957497 0 0 19112 +cobbl 1 6.957497 0 0 19113 +nowinclud 1 6.957497 0 0 19114 +shapiroand 1 6.957497 0 0 19115 +marion 1 6.957497 0 0 19116 +ferguson 1 6.957497 0 0 19117 +pagerob 1 6.957497 0 0 19118 +minimalist 1 6.957497 0 0 19119 +taship 1 6.957497 0 0 19120 +russian 1 6.957497 0 0 19121 +melskicurr 1 6.957497 0 0 19122 +statisticsmadison 1 6.957497 0 0 19123 +permen 1 6.957497 0 0 19124 +ivesmarshfield 1 6.957497 0 0 19125 +kasei 1 6.957497 0 0 19126 +myexact 1 6.957497 0 0 19127 +studiesher 1 6.957497 0 0 19128 +semesterof 1 6.957497 0 0 19129 +beenbik 1 6.957497 0 0 19130 +numerousbook 1 6.957497 0 0 19131 +tomapquest 1 6.957497 0 0 19132 +alot 1 6.957497 0 0 19133 +marshfield 1 6.957497 0 0 19134 +gustavu 1 6.957497 0 0 19135 +adolphu 1 6.957497 0 0 19136 +atlanti 1 6.957497 0 0 19137 +humm 1 6.957497 0 0 19138 +micklich 1 6.957497 0 0 19139 +illicitsubst 1 6.957497 0 0 19140 +neutron 1 6.957497 0 0 19141 +hailperin 1 6.957497 0 0 19142 +pagemilo 1 6.957497 0 0 19143 +byappointmentba 1 6.957497 0 0 19144 +larusteach 1 6.957497 0 0 19145 +beinfluenc 1 6.957497 0 0 19146 +yule 1 6.957497 0 0 19147 +sagalovski 1 6.957497 0 0 19148 +nucl 1 6.957497 0 0 19149 +inst 1 6.957497 0 0 19150 +languageflex 1 6.957497 0 0 19151 +anintern 1 6.957497 0 0 19152 +toadvanc 1 6.957497 0 0 19153 +fosteringth 1 6.957497 0 0 19154 +highestprofession 1 6.957497 0 0 19155 +bignfl 1 6.957497 0 0 19156 +vike 1 6.957497 0 0 19157 +colon 1 6.957497 0 0 19158 +imho 1 6.957497 0 0 19159 +mythic 1 6.957497 0 0 19160 +engaug 1 6.957497 0 0 19161 +wizard 1 6.957497 0 0 19162 +underworld 1 6.957497 0 0 19163 +ofsocc 1 6.957497 0 0 19164 +afrisbe 1 6.957497 0 0 19165 +quarterback 1 6.957497 0 0 19166 +ultimatein 1 6.957497 0 0 19167 +garofalakismino 1 6.957497 0 0 19168 +eduphd 1 6.957497 0 0 19169 +workresearch 1 6.957497 0 0 19170 +theoryeduc 1 6.957497 0 0 19171 +banu 1 6.957497 0 0 19172 +ioannidismor 1 6.957497 0 0 19173 +centerdr 1 6.957497 0 0 19174 +bibliograpi 1 6.957497 0 0 19175 +perpetu 1 6.957497 0 0 19176 +gonalv 1 6.957497 0 0 19177 +mjrg 1 6.957497 0 0 19178 +addresswork 1 6.957497 0 0 19179 +morgangradu 1 6.957497 0 0 19180 +dyerresearch 1 6.957497 0 0 19181 +interestsvirtu 1 6.957497 0 0 19182 +moshovosresearch 1 6.957497 0 0 19183 +sohigroup 1 6.957497 0 0 19184 +notese 1 6.957497 0 0 19185 +aroundw 1 6.957497 0 0 19186 +clickheremi 1 6.957497 0 0 19187 +explot 1 6.957497 0 0 19188 +thecour 1 6.957497 0 0 19189 +theopportun 1 6.957497 0 0 19190 +kateveni 1 6.957497 0 0 19191 +viha 1 6.957497 0 0 19192 +resouc 1 6.957497 0 0 19193 +atwww 1 6.957497 0 0 19194 +devil 1 6.957497 0 0 19195 +fraud 1 6.957497 0 0 19196 +centerusenet 1 6.957497 0 0 19197 +afax 1 6.957497 0 0 19198 +pagewhat 1 6.957497 0 0 19199 +newoctob 1 6.957497 0 0 19200 +inmadison 1 6.957497 0 0 19201 +informationlast 1 6.957497 0 0 19202 +educopyright 1 6.957497 0 0 19203 +poobah 1 6.957497 0 0 19204 +edufal 1 6.957497 0 0 19205 +scheduleresearch 1 6.957497 0 0 19206 +tin 1 6.957497 0 0 19207 +orientedenviron 1 6.957497 0 0 19208 +postscriptand 1 6.957497 0 0 19209 +faint 1 6.957497 0 0 19210 +alink 1 6.957497 0 0 19211 +honorsthesi 1 6.957497 0 0 19212 +poobahlook 1 6.957497 0 0 19213 +dear 1 6.957497 0 0 19214 +tosomeon 1 6.957497 0 0 19215 +youshould 1 6.957497 0 0 19216 +elton 1 6.957497 0 0 19217 +imaginethat 1 6.957497 0 0 19218 +aforement 1 6.957497 0 0 19219 +poobahship 1 6.957497 0 0 19220 +ill 1 6.957497 0 0 19221 +afew 1 6.957497 0 0 19222 +indatabas 1 6.957497 0 0 19223 +inearli 1 6.957497 0 0 19224 +andinfrequ 1 6.957497 0 0 19225 +rapidlyrid 1 6.957497 0 0 19226 +chilliest 1 6.957497 0 0 19227 +helmet 1 6.957497 0 0 19228 +mynot 1 6.957497 0 0 19229 +ilik 1 6.957497 0 0 19230 +librarylast 1 6.957497 0 0 19231 +pagewisconsin 1 6.957497 0 0 19232 +homepagemik 1 6.957497 0 0 19233 +homepagemsteel 1 6.957497 0 0 19234 +struggl 1 6.957497 0 0 19235 +sometimearound 1 6.957497 0 0 19236 +motto 1 6.957497 0 0 19237 +freezein 1 6.957497 0 0 19238 +graduateinstructor 1 6.957497 0 0 19239 +scomput 1 6.957497 0 0 19240 +publicationsgrindston 1 6.957497 0 0 19241 +jefferyk 1 6.957497 0 0 19242 +hollingsworth 1 6.957497 0 0 19243 +reportc 1 6.957497 0 0 19244 +postscriptfil 1 6.957497 0 0 19245 +semesterc 1 6.957497 0 0 19246 +vernonc 1 6.957497 0 0 19247 +dyermi 1 6.957497 0 0 19248 +pagesinform 1 6.957497 0 0 19249 +gettingin 1 6.957497 0 0 19250 +marylandwhom 1 6.957497 0 0 19251 +teamssom 1 6.957497 0 0 19252 +listth 1 6.957497 0 0 19253 +listi 1 6.957497 0 0 19254 +thefruit 1 6.957497 0 0 19255 +ofmaryland 1 6.957497 0 0 19256 +insidejok 1 6.957497 0 0 19257 +andnow 1 6.957497 0 0 19258 +someinfrar 1 6.957497 0 0 19259 +looklik 1 6.957497 0 0 19260 +infrar 1 6.957497 0 0 19261 +memik 1 6.957497 0 0 19262 +steelemsteel 1 6.957497 0 0 19263 +pagemaria 1 6.957497 0 0 19264 +pagehow 1 6.957497 0 0 19265 +venezuela 1 6.957497 0 0 19266 +barquisimeto 1 6.957497 0 0 19267 +naim 1 6.957497 0 0 19268 +oscar 1 6.957497 0 0 19269 +bienvenido 1 6.957497 0 0 19270 +southampton 1 6.957497 0 0 19271 +universidad 1 6.957497 0 0 19272 +bolivar 1 6.957497 0 0 19273 +caraca 1 6.957497 0 0 19274 +barquisimetoi 1 6.957497 0 0 19275 +ofabout 1 6.957497 0 0 19276 +playclass 1 6.957497 0 0 19277 +excellentmaestro 1 6.957497 0 0 19278 +rodrigo 1 6.957497 0 0 19279 +riera 1 6.957497 0 0 19280 +lauro 1 6.957497 0 0 19281 +sherlock 1 6.957497 0 0 19282 +holm 1 6.957497 0 0 19283 +beati 1 6.957497 0 0 19284 +mundo 1 6.957497 0 0 19285 +anastassia 1 6.957497 0 0 19286 +ailamaki 1 6.957497 0 0 19287 +islandsar 1 6.957497 0 0 19288 +natassa 1 6.957497 0 0 19289 +naughtonjeffrei 1 6.957497 0 0 19290 +naughtonnaughton 1 6.957497 0 0 19291 +interestsolap 1 6.957497 0 0 19292 +relationaldbm 1 6.957497 0 0 19293 +ofdatabas 1 6.957497 0 0 19294 +inperform 1 6.957497 0 0 19295 +ofmulti 1 6.957497 0 0 19296 +computingth 1 6.957497 0 0 19297 +valuedattribut 1 6.957497 0 0 19298 +withsameet 1 6.957497 0 0 19299 +sunita 1 6.957497 0 0 19300 +sarawagi 1 6.957497 0 0 19301 +thend 1 6.957497 0 0 19302 +aggregatesin 1 6.957497 0 0 19303 +bucki 1 6.957497 0 0 19304 +gerhk 1 6.957497 0 0 19305 +dhaval 1 6.957497 0 0 19306 +withyihong 1 6.957497 0 0 19307 +kabranavin 1 6.957497 0 0 19308 +kabragradu 1 6.957497 0 0 19309 +newhal 1 6.957497 0 0 19310 +newhalltia 1 6.957497 0 0 19311 +paradynadvisor 1 6.957497 0 0 19312 +millermummi 1 6.957497 0 0 19313 +guanajuato 1 6.957497 0 0 19314 +hallcomput 1 6.957497 0 0 19315 +chunhui 1 6.957497 0 0 19316 +misclassif 1 6.957497 0 0 19317 +solodov 1 6.957497 0 0 19318 +effectivecomputation 1 6.957497 0 0 19319 +encompassestheoret 1 6.957497 0 0 19320 +parallelgradi 1 6.957497 0 0 19321 +problemsa 1 6.957497 0 0 19322 +animport 1 6.957497 0 0 19323 +ahighli 1 6.957497 0 0 19324 +useat 1 6.957497 0 0 19325 +hospit 1 6.957497 0 0 19326 +solodova 1 6.957497 0 0 19327 +descent 1 6.957497 0 0 19328 +monotonecomplementar 1 6.957497 0 0 19329 +jong 1 6.957497 0 0 19330 +pangexact 1 6.957497 0 0 19331 +programswith 1 6.957497 0 0 19332 +mangasarianmathemat 1 6.957497 0 0 19333 +miningmathemat 1 6.957497 0 0 19334 +mangasarianerror 1 6.957497 0 0 19335 +nondifferenti 1 6.957497 0 0 19336 +slater 1 6.957497 0 0 19337 +ritter 1 6.957497 0 0 19338 +riedmuel 1 6.957497 0 0 19339 +schaeffler 1 6.957497 0 0 19340 +physica 1 6.957497 0 0 19341 +siag 1 6.957497 0 0 19342 +bilinear 1 6.957497 0 0 19343 +cowan 1 6.957497 0 0 19344 +tesauro 1 6.957497 0 0 19345 +alspector 1 6.957497 0 0 19346 +inequalitiesand 1 6.957497 0 0 19347 +vianonmonoton 1 6.957497 0 0 19348 +minimn 1 6.957497 0 0 19349 +malign 1 6.957497 0 0 19350 +nuclei 1 6.957497 0 0 19351 +cytolog 1 6.957497 0 0 19352 +biopsi 1 6.957497 0 0 19353 +oncolog 1 6.957497 0 0 19354 +needl 1 6.957497 0 0 19355 +xcyt 1 6.957497 0 0 19356 +setiono 1 6.957497 0 0 19357 +ofeach 1 6.957497 0 0 19358 +ofdiseas 1 6.957497 0 0 19359 +lymph 1 6.957497 0 0 19360 +histolog 1 6.957497 0 0 19361 +heisei 1 6.957497 0 0 19362 +prognosismachin 1 6.957497 0 0 19363 +prognosisthi 1 6.957497 0 0 19364 +learningapproach 1 6.957497 0 0 19365 +ofbreast 1 6.957497 0 0 19366 +betweenprof 1 6.957497 0 0 19367 +anddr 1 6.957497 0 0 19368 +wolbergof 1 6.957497 0 0 19369 +thepress 1 6.957497 0 0 19370 +inmarch 1 6.957497 0 0 19371 +linksdiagnosisthi 1 6.957497 0 0 19372 +diagnosebreast 1 6.957497 0 0 19373 +heidentifi 1 6.957497 0 0 19374 +consideredrelev 1 6.957497 0 0 19375 +andtwo 1 6.957497 0 0 19376 +aclassifi 1 6.957497 0 0 19377 +thatsuccessfulli 1 6.957497 0 0 19378 +iswel 1 6.957497 0 0 19379 +streetto 1 6.957497 0 0 19380 +adigit 1 6.957497 0 0 19381 +consolid 1 6.957497 0 0 19382 +clinicalpractic 1 6.957497 0 0 19383 +thenmount 1 6.957497 0 0 19384 +stain 1 6.957497 0 0 19385 +cellularnuclei 1 6.957497 0 0 19386 +arewel 1 6.957497 0 0 19387 +afram 1 6.957497 0 0 19388 +mous 1 6.957497 0 0 19389 +showingxcyt 1 6.957497 0 0 19390 +thisfas 1 6.957497 0 0 19391 +standarderror 1 6.957497 0 0 19392 +wasconstruct 1 6.957497 0 0 19393 +thisclassifi 1 6.957497 0 0 19394 +threeof 1 6.957497 0 0 19395 +bayesiancomput 1 6.957497 0 0 19396 +thesedens 1 6.957497 0 0 19397 +consecut 1 6.957497 0 0 19398 +newpati 1 6.957497 0 0 19399 +didxcyt 1 6.957497 0 0 19400 +suspici 1 6.957497 0 0 19401 +estimatedprob 1 6.957497 0 0 19402 +goodtest 1 6.957497 0 0 19403 +petsegment 1 6.957497 0 0 19404 +inthes 1 6.957497 0 0 19405 +prognosisth 1 6.957497 0 0 19406 +haveapproach 1 6.957497 0 0 19407 +inputfeatur 1 6.957497 0 0 19408 +atim 1 6.957497 0 0 19409 +censor 1 6.957497 0 0 19410 +linearprogram 1 6.957497 0 0 19411 +fornew 1 6.957497 0 0 19412 +caseswith 1 6.957497 0 0 19413 +anindividu 1 6.957497 0 0 19414 +intoxcyt 1 6.957497 0 0 19415 +ourorigin 1 6.957497 0 0 19416 +thereforeha 1 6.957497 0 0 19417 +freeafter 1 6.957497 0 0 19418 +xcytgiv 1 6.957497 0 0 19419 +tumors 1 6.957497 0 0 19420 +corrobor 1 6.957497 0 0 19421 +axillari 1 6.957497 0 0 19422 +bibliographylink 1 6.957497 0 0 19423 +notlink 1 6.957497 0 0 19424 +patholog 1 6.957497 0 0 19425 +priediti 1 6.957497 0 0 19426 +teagu 1 6.957497 0 0 19427 +indetermin 1 6.957497 0 0 19428 +imit 1 6.957497 0 0 19429 +sentinel 1 6.957497 0 0 19430 +marilynn 1 6.957497 0 0 19431 +marchion 1 6.957497 0 0 19432 +sorel 1 6.957497 0 0 19433 +surgic 1 6.957497 0 0 19434 +column 1 6.957497 0 0 19435 +schooloth 1 6.957497 0 0 19436 +oncolink 1 6.957497 0 0 19437 +misclassifi 1 6.957497 0 0 19438 +euclideanspac 1 6.957497 0 0 19439 +programmingpattern 1 6.957497 0 0 19440 +programmingthi 1 6.957497 0 0 19441 +outlinemathemat 1 6.957497 0 0 19442 +failon 1 6.957497 0 0 19443 +discard 1 6.957497 0 0 19444 +eachnod 1 6.957497 0 0 19445 +thesam 1 6.957497 0 0 19446 +astrain 1 6.957497 0 0 19447 +traditionallearn 1 6.957497 0 0 19448 +inthat 1 6.957497 0 0 19449 +insepar 1 6.957497 0 0 19450 +orsa 1 6.957497 0 0 19451 +csto 1 6.957497 0 0 19452 +presentationthi 1 6.957497 0 0 19453 +goalsth 1 6.957497 0 0 19454 +manualsstatu 1 6.957497 0 0 19455 +reporta 1 6.957497 0 0 19456 +inflorida 1 6.957497 0 0 19457 +tocompil 1 6.957497 0 0 19458 +postera 1 6.957497 0 0 19459 +spdt 1 6.957497 0 0 19460 +toolsyou 1 6.957497 0 0 19461 +placehold 1 6.957497 0 0 19462 +informationparadyn 1 6.957497 0 0 19463 +parker 1 6.957497 0 0 19464 +prism 1 6.957497 0 0 19465 +projectfal 1 6.957497 0 0 19466 +bradleygradu 1 6.957497 0 0 19467 +mangasarianinterestsmathemat 1 6.957497 0 0 19468 +programmingmachin 1 6.957497 0 0 19469 +learningfli 1 6.957497 0 0 19470 +currentlyb 1 6.957497 0 0 19471 +madisonmathemat 1 6.957497 0 0 19472 +thiswork 1 6.957497 0 0 19473 +olvimangasarian 1 6.957497 0 0 19474 +publicationsal 1 6.957497 0 0 19475 +picksthes 1 6.957497 0 0 19476 +grate 1 6.957497 0 0 19477 +timesfax 1 6.957497 0 0 19478 +uroullett 1 6.957497 0 0 19479 +molecularbiolog 1 6.957497 0 0 19480 +embryo 1 6.957497 0 0 19481 +westdayton 1 6.957497 0 0 19482 +pdevri 1 6.957497 0 0 19483 +andthen 1 6.957497 0 0 19484 +iread 1 6.957497 0 0 19485 +topai 1 6.957497 0 0 19486 +alsoprovid 1 6.957497 0 0 19487 +folksat 1 6.957497 0 0 19488 +microscopi 1 6.957497 0 0 19489 +seancarrol 1 6.957497 0 0 19490 +confoc 1 6.957497 0 0 19491 +lotof 1 6.957497 0 0 19492 +johnwhit 1 6.957497 0 0 19493 +imrstaff 1 6.957497 0 0 19494 +augustnd 1 6.957497 0 0 19495 +calcutta 1 6.957497 0 0 19496 +bosco 1 6.957497 0 0 19497 +yumpe 1 6.957497 0 0 19498 +manoj 1 6.957497 0 0 19499 +universityofwisconsin 1 6.957497 0 0 19500 +salesian 1 6.957497 0 0 19501 +stare 1 6.957497 0 0 19502 +barrel 1 6.957497 0 0 19503 +nerdi 1 6.957497 0 0 19504 +seealso 1 6.957497 0 0 19505 +pinup 1 6.957497 0 0 19506 +suresh 1 6.957497 0 0 19507 +wisecrack 1 6.957497 0 0 19508 +constuct 1 6.957497 0 0 19509 +depar 1 6.957497 0 0 19510 +multidimensionalaggreg 1 6.957497 0 0 19511 +timex 1 6.957497 0 0 19512 +comix 1 6.957497 0 0 19513 +hakuna 1 6.957497 0 0 19514 +matata 1 6.957497 0 0 19515 +vishi 1 6.957497 0 0 19516 +viswanath 1 6.957497 0 0 19517 +reseach 1 6.957497 0 0 19518 +voluntari 1 6.957497 0 0 19519 +interestsuw 1 6.957497 0 0 19520 +clemen 1 6.957497 0 0 19521 +hockert 1 6.957497 0 0 19522 +prockoffic 1 6.957497 0 0 19523 +doonesburi 1 6.957497 0 0 19524 +trot 1 6.957497 0 0 19525 +complexityclass 1 6.957497 0 0 19526 +interactiveproof 1 6.957497 0 0 19527 +nondetermin 1 6.957497 0 0 19528 +suchmodel 1 6.957497 0 0 19529 +proven 1 6.957497 0 0 19530 +classicproblem 1 6.957497 0 0 19531 +theoryof 1 6.957497 0 0 19532 +computationalproblem 1 6.957497 0 0 19533 +whichhard 1 6.957497 0 0 19534 +recentresult 1 6.957497 0 0 19535 +modelsof 1 6.957497 0 0 19536 +approximabilityresult 1 6.957497 0 0 19537 +developingboth 1 6.957497 0 0 19538 +hardcombinatori 1 6.957497 0 0 19539 +forsort 1 6.957497 0 0 19540 +costscan 1 6.957497 0 0 19541 +probabilisticst 1 6.957497 0 0 19542 +hellerstein 1 6.957497 0 0 19543 +pottl 1 6.957497 0 0 19544 +pspace 1 6.957497 0 0 19545 +caiand 1 6.957497 0 0 19546 +lipton 1 6.957497 0 0 19547 +deborah 1 6.957497 0 0 19548 +studyingth 1 6.957497 0 0 19549 +andnondeterminist 1 6.957497 0 0 19550 +stillknow 1 6.957497 0 0 19551 +computerscientist 1 6.957497 0 0 19552 +techniquesfor 1 6.957497 0 0 19553 +investigatesth 1 6.957497 0 0 19554 +exploresin 1 6.957497 0 0 19555 +resolveproblem 1 6.957497 0 0 19556 +theseinclud 1 6.957497 0 0 19557 +handlingrepetit 1 6.957497 0 0 19558 +graphtheoret 1 6.957497 0 0 19559 +subexponenti 1 6.957497 0 0 19560 +pruim 1 6.957497 0 0 19561 +theoryconfer 1 6.957497 0 0 19562 +spanner 1 6.957497 0 0 19563 +althof 1 6.957497 0 0 19564 +dobkin 1 6.957497 0 0 19565 +meidanisand 1 6.957497 0 0 19566 +scandinavianworkshop 1 6.957497 0 0 19567 +asreal 1 6.957497 0 0 19568 +specialemphasi 1 6.957497 0 0 19569 +systemand 1 6.957497 0 0 19570 +performancestudi 1 6.957497 0 0 19571 +modelingand 1 6.957497 0 0 19572 +implementinga 1 6.957497 0 0 19573 +visualizationtool 1 6.957497 0 0 19574 +sashadri 1 6.957497 0 0 19575 +haberand 1 6.957497 0 0 19576 +precondit 1 6.957497 0 0 19577 +seymour 1 6.957497 0 0 19578 +indefinit 1 6.957497 0 0 19579 +classicalit 1 6.957497 0 0 19580 +multigrid 1 6.957497 0 0 19581 +effectivelywhen 1 6.957497 0 0 19582 +bemad 1 6.957497 0 0 19583 +operatori 1 6.957497 0 0 19584 +casedirect 1 6.957497 0 0 19585 +challengingproblem 1 6.957497 0 0 19586 +nowinvolv 1 6.957497 0 0 19587 +specialmultigrid 1 6.957497 0 0 19588 +chebyshev 1 6.957497 0 0 19589 +collact 1 6.957497 0 0 19590 +ellipticparti 1 6.957497 0 0 19591 +journalon 1 6.957497 0 0 19592 +numbersand 1 6.957497 0 0 19593 +applicationto 1 6.957497 0 0 19594 +techniquesi 1 6.957497 0 0 19595 +colleaguesinclud 1 6.957497 0 0 19596 +customizedmean 1 6.957497 0 0 19597 +gtpn 1 6.957497 0 0 19598 +systemfeatur 1 6.957497 0 0 19599 +equationsthat 1 6.957497 0 0 19600 +butcan 1 6.957497 0 0 19601 +proposedth 1 6.957497 0 0 19602 +approximationsfor 1 6.957497 0 0 19603 +techniquemai 1 6.957497 0 0 19604 +broader 1 6.957497 0 0 19605 +performanceparallel 1 6.957497 0 0 19606 +dqdb 1 6.957497 0 0 19607 +slot 1 6.957497 0 0 19608 +brewster 1 6.957497 0 0 19609 +pateland 1 6.957497 0 0 19610 +forrun 1 6.957497 0 0 19611 +with 1 6.957497 0 0 19612 +sigmetricsconfer 1 6.957497 0 0 19613 +qinqin 1 6.957497 0 0 19614 +pageqw 1 6.957497 0 0 19615 +minibaseand 1 6.957497 0 0 19616 +coralth 1 6.957497 0 0 19617 +undergraduateand 1 6.957497 0 0 19618 +inconjunct 1 6.957497 0 0 19619 +coursesthat 1 6.957497 0 0 19620 +deductiona 1 6.957497 0 0 19621 +diversifi 1 6.957497 0 0 19622 +increasinglyimport 1 6.957497 0 0 19623 +dispers 1 6.957497 0 0 19624 +rodin 1 6.957497 0 0 19625 +severalissu 1 6.957497 0 0 19626 +forsemant 1 6.957497 0 0 19627 +serviceand 1 6.957497 0 0 19628 +networkedclust 1 6.957497 0 0 19629 +explorationfrom 1 6.957497 0 0 19630 +assequ 1 6.957497 0 0 19631 +seqsystem 1 6.957497 0 0 19632 +optimizationissu 1 6.957497 0 0 19633 +identifyingtrend 1 6.957497 0 0 19634 +fromlarg 1 6.957497 0 0 19635 +implementingan 1 6.957497 0 0 19636 +customizea 1 6.957497 0 0 19637 +specializedinform 1 6.957497 0 0 19638 +indexedand 1 6.957497 0 0 19639 +andmin 1 6.957497 0 0 19640 +birchfor 1 6.957497 0 0 19641 +devisea 1 6.957497 0 0 19642 +databasequeri 1 6.957497 0 0 19643 +featuressuch 1 6.957497 0 0 19644 +ofarithmet 1 6.957497 0 0 19645 +morecompactli 1 6.957497 0 0 19646 +coraldeduct 1 6.957497 0 0 19647 +fixpointevalu 1 6.957497 0 0 19648 +efficientacross 1 6.957497 0 0 19649 +sudarsha 1 6.957497 0 0 19650 +divesh 1 6.957497 0 0 19651 +managementfirst 1 6.957497 0 0 19652 +livabl 1 6.957497 0 0 19653 +kapoorhello 1 6.957497 0 0 19654 +schedulemydepartmentmyuniversityiitkanpuriitkclass 1 6.957497 0 0 19655 +relatedlink 1 6.957497 0 0 19656 +menow 1 6.957497 0 0 19657 +andrais 1 6.957497 0 0 19658 +elder 1 6.957497 0 0 19659 +moneymagazin 1 6.957497 0 0 19660 +editormust 1 6.957497 0 0 19661 +greenland 1 6.957497 0 0 19662 +complain 1 6.957497 0 0 19663 +isawesom 1 6.957497 0 0 19664 +regret 1 6.957497 0 0 19665 +genr 1 6.957497 0 0 19666 +gymnast 1 6.957497 0 0 19667 +cloudi 1 6.957497 0 0 19668 +breezi 1 6.957497 0 0 19669 +youget 1 6.957497 0 0 19670 +musicstuffmovi 1 6.957497 0 0 19671 +televisioninternettravelotherbookmark 1 6.957497 0 0 19672 +meget 1 6.957497 0 0 19673 +guestbookrahul 1 6.957497 0 0 19674 +eduh 1 6.957497 0 0 19675 +old 1 6.957497 0 0 19676 +homm 1 6.957497 0 0 19677 +winsonsin 1 6.957497 0 0 19678 +chimera 1 6.957497 0 0 19679 +contradict 1 6.957497 0 0 19680 +feebleworm 1 6.957497 0 0 19681 +depositari 1 6.957497 0 0 19682 +cloaca 1 6.957497 0 0 19683 +theglori 1 6.957497 0 0 19684 +shame 1 6.957497 0 0 19685 +blais 1 6.957497 0 0 19686 +karthik 1 6.957497 0 0 19687 +pagekarthikeyan 1 6.957497 0 0 19688 +ramasamyabouti 1 6.957497 0 0 19689 +projectshack 1 6.957497 0 0 19690 +connectivityparadis 1 6.957497 0 0 19691 +pthread 1 6.957497 0 0 19692 +wrapperspublicationsstorag 1 6.957497 0 0 19693 +presentationsweb 1 6.957497 0 0 19694 +picturearchitectur 1 6.957497 0 0 19695 +serversphoto 1 6.957497 0 0 19696 +albumencount 1 6.957497 0 0 19697 +leafperson 1 6.957497 0 0 19698 +inforesum 1 6.957497 0 0 19699 +financemonei 1 6.957497 0 0 19700 +interestshack 1 6.957497 0 0 19701 +photographycontact 1 6.957497 0 0 19702 +informationstreet 1 6.957497 0 0 19703 +addresskarthik 1 6.957497 0 0 19704 +suggestionspleas 1 6.957497 0 0 19705 +ratliffoffic 1 6.957497 0 0 19706 +genealog 1 6.957497 0 0 19707 +ratnakar 1 6.957497 0 0 19708 +viresh 1 6.957497 0 0 19709 +lossi 1 6.957497 0 0 19710 +qclicauthor 1 6.957497 0 0 19711 +qclic 1 6.957497 0 0 19712 +qclicbrows 1 6.957497 0 0 19713 +rever 1 6.957497 0 0 19714 +omin 1 6.957497 0 0 19715 +monasteriu 1 6.957497 0 0 19716 +doominu 1 6.957497 0 0 19717 +rcarl 1 6.957497 0 0 19718 +subsurfac 1 6.957497 0 0 19719 +depositori 1 6.957497 0 0 19720 +dig 1 6.957497 0 0 19721 +solitari 1 6.957497 0 0 19722 +innebri 1 6.957497 0 0 19723 +vampir 1 6.957497 0 0 19724 +nostalg 1 6.957497 0 0 19725 +funki 1 6.957497 0 0 19726 +monk 1 6.957497 0 0 19727 +binklei 1 6.957497 0 0 19728 +ramalingam 1 6.957497 0 0 19729 +prin 1 6.957497 0 0 19730 +idfa 1 6.957497 0 0 19731 +interf 1 6.957497 0 0 19732 +wilhelm 1 6.957497 0 0 19733 +tosem 1 6.957497 0 0 19734 +pfeiffer 1 6.957497 0 0 19735 +demer 1 6.957497 0 0 19736 +fromacm 1 6.957497 0 0 19737 +berzin 1 6.957497 0 0 19738 +sigsoftsymposium 1 6.957497 0 0 19739 +wadern 1 6.957497 0 0 19740 +rosai 1 6.957497 0 0 19741 +fseb 1 6.957497 0 0 19742 +thesiswuu 1 6.957497 0 0 19743 +esop 1 6.957497 0 0 19744 +poplb 1 6.957497 0 0 19745 +pepma 1 6.957497 0 0 19746 +fsea 1 6.957497 0 0 19747 +diku 1 6.957497 0 0 19748 +fase 1 6.957497 0 0 19749 +pepmb 1 6.957497 0 0 19750 +lape 1 6.957497 0 0 19751 +psde 1 6.957497 0 0 19752 +toconst 1 6.957497 0 0 19753 +paradigmsfor 1 6.957497 0 0 19754 +brighton 1 6.957497 0 0 19755 +abramski 1 6.957497 0 0 19756 +maibaum 1 6.957497 0 0 19757 +wherefor 1 6.957497 0 0 19758 +sigoa 1 6.957497 0 0 19759 +pepm 1 6.957497 0 0 19760 +onparti 1 6.957497 0 0 19761 +ibfi 1 6.957497 0 0 19762 +repsprofessorcomput 1 6.957497 0 0 19763 +thehom 1 6.957497 0 0 19764 +createtool 1 6.957497 0 0 19765 +manipulationoper 1 6.957497 0 0 19766 +slicingcan 1 6.957497 0 0 19767 +elementss 1 6.957497 0 0 19768 +thatmight 1 6.957497 0 0 19769 +findsemant 1 6.957497 0 0 19770 +thedecomposit 1 6.957497 0 0 19771 +solvingmani 1 6.957497 0 0 19772 +applicationsin 1 6.957497 0 0 19773 +atimprov 1 6.957497 0 0 19774 +relatedoper 1 6.957497 0 0 19775 +slicer 1 6.957497 0 0 19776 +unexpect 1 6.957497 0 0 19777 +betweeninterprocedur 1 6.957497 0 0 19778 +oninterprocedur 1 6.957497 0 0 19779 +transformingthem 1 6.957497 0 0 19780 +timebi 1 6.957497 0 0 19781 +probleminst 1 6.957497 0 0 19782 +publicationsprogram 1 6.957497 0 0 19783 +slicing_pat 1 6.957497 0 0 19784 +thesismerg 1 6.957497 0 0 19785 +iwscm 1 6.957497 0 0 19786 +popla 1 6.957497 0 0 19787 +iwsvcc 1 6.957497 0 0 19788 +ccpsd 1 6.957497 0 0 19789 +npfo_submiss 1 6.957497 0 0 19790 +ccipl 1 6.957497 0 0 19791 +prog_integration_system 1 6.957497 0 0 19792 +prog_integration_manu 1 6.957497 0 0 19793 +subsetof 1 6.957497 0 0 19794 +clickingher 1 6.957497 0 0 19795 +andexpect 1 6.957497 0 0 19796 +anddifferenc 1 6.957497 0 0 19797 +thesesdavid 1 6.957497 0 0 19798 +thesisphil 1 6.957497 0 0 19799 +thesisinterprocedur 1 6.957497 0 0 19800 +analysisdemand 1 6.957497 0 0 19801 +tcs_ide_pap 1 6.957497 0 0 19802 +ptime 1 6.957497 0 0 19803 +acta_pap 1 6.957497 0 0 19804 +pfeiffer_thesi 1 6.957497 0 0 19805 +jalg_pap 1 6.957497 0 0 19806 +popl_not 1 6.957497 0 0 19807 +publicationsbooksrep 1 6.957497 0 0 19808 +constructinglanguag 1 6.957497 0 0 19809 +publicationssagiv 1 6.957497 0 0 19810 +j_alg 1 6.957497 0 0 19811 +preservingtransform 1 6.957497 0 0 19812 +grammarswith 1 6.957497 0 0 19813 +movement 1 6.957497 0 0 19814 +sublinear 1 6.957497 0 0 19815 +papershorwitz 1 6.957497 0 0 19816 +ganzing 1 6.957497 0 0 19817 +chaptersrep 1 6.957497 0 0 19818 +bohner 1 6.957497 0 0 19819 +fromproceed 1 6.957497 0 0 19820 +ichikawa 1 6.957497 0 0 19821 +tsubotani 1 6.957497 0 0 19822 +barstow 1 6.957497 0 0 19823 +sandewal 1 6.957497 0 0 19824 +shrobe 1 6.957497 0 0 19825 +publicationssiff 1 6.957497 0 0 19826 +danvi 1 6.957497 0 0 19827 +glueck 1 6.957497 0 0 19828 +thiemann 1 6.957497 0 0 19829 +hentenryck 1 6.957497 0 0 19830 +formalapproach 1 6.957497 0 0 19831 +nielsen 1 6.957497 0 0 19832 +schwartzbach 1 6.957497 0 0 19833 +tapsoft 1 6.957497 0 0 19834 +compilerconstruct 1 6.957497 0 0 19835 +edinburgh 1 6.957497 0 0 19836 +reducibleflowgraph 1 6.957497 0 0 19837 +velen 1 6.957497 0 0 19838 +onalgebra 1 6.957497 0 0 19839 +softwareconfigur 1 6.957497 0 0 19840 +issuesin 1 6.957497 0 0 19841 +barcelona 1 6.957497 0 0 19842 +diaz 1 6.957497 0 0 19843 +oreja 1 6.957497 0 0 19844 +versionand 1 6.957497 0 0 19845 +grassau 1 6.957497 0 0 19846 +bericht 1 6.957497 0 0 19847 +winkler 1 6.957497 0 0 19848 +teubner 1 6.957497 0 0 19849 +stuttgart 1 6.957497 0 0 19850 +marceau 1 6.957497 0 0 19851 +engineeringsymposium 1 6.957497 0 0 19852 +alpern 1 6.957497 0 0 19853 +albuquerqu 1 6.957497 0 0 19854 +tosyntax 1 6.957497 0 0 19855 +williamsburg 1 6.957497 0 0 19856 +softwarerep 1 6.957497 0 0 19857 +patentsrep 1 6.957497 0 0 19858 +pend 1 6.957497 0 0 19859 +submissionsrep 1 6.957497 0 0 19860 +reportsrep 1 6.957497 0 0 19861 +mehlhorn 1 6.957497 0 0 19862 +datalogisk 1 6.957497 0 0 19863 +psramalingam 1 6.957497 0 0 19864 +klint 1 6.957497 0 0 19865 +snelt 1 6.957497 0 0 19866 +extendedabstract 1 6.957497 0 0 19867 +reconstitut 1 6.957497 0 0 19868 +studentsvisitor 1 6.957497 0 0 19869 +jiazhen 1 6.957497 0 0 19870 +paig 1 6.957497 0 0 19871 +chiao 1 6.957497 0 0 19872 +studentsramalingam 1 6.957497 0 0 19873 +programintegr 1 6.957497 0 0 19874 +statusclock 1 6.957497 0 0 19875 +pagespe 1 6.957497 0 0 19876 +clearid 1 6.957497 0 0 19877 +cleartimeout 1 6.957497 0 0 19878 +lucknow 1 6.957497 0 0 19879 +listn 1 6.957497 0 0 19880 +netsurf 1 6.957497 0 0 19881 +wismad 1 6.957497 0 0 19882 +goodwin 1 6.957497 0 0 19883 +lecturercomput 1 6.957497 0 0 19884 +guidanc 1 6.957497 0 0 19885 +trier 1 6.957497 0 0 19886 +mdd 1 6.957497 0 0 19887 +niiip 1 6.957497 0 0 19888 +transcoop 1 6.957497 0 0 19889 +needi 1 6.957497 0 0 19890 +pageand 1 6.957497 0 0 19891 +bookmarksar 1 6.957497 0 0 19892 +garfield 1 6.957497 0 0 19893 +ashwin 1 6.957497 0 0 19894 +iitb 1 6.957497 0 0 19895 +meto 1 6.957497 0 0 19896 +sashwin 1 6.957497 0 0 19897 +subramanya 1 6.957497 0 0 19898 +hospet 1 6.957497 0 0 19899 +tungabhadra 1 6.957497 0 0 19900 +favourit 1 6.957497 0 0 19901 +hampi 1 6.957497 0 0 19902 +ruin 1 6.957497 0 0 19903 +vijayanagara 1 6.957497 0 0 19904 +fewphotograph 1 6.957497 0 0 19905 +classmatesat 1 6.957497 0 0 19906 +presentcurr 1 6.957497 0 0 19907 +registeredfor 1 6.957497 0 0 19908 +playphatta 1 6.957497 0 0 19909 +champ 1 6.957497 0 0 19910 +entertainmentin 1 6.957497 0 0 19911 +donot 1 6.957497 0 0 19912 +sshow 1 6.957497 0 0 19913 +voraci 1 6.957497 0 0 19914 +unsuccesfulli 1 6.957497 0 0 19915 +grip 1 6.957497 0 0 19916 +ifposs 1 6.957497 0 0 19917 +archer 1 6.957497 0 0 19918 +jane 1 6.957497 0 0 19919 +austen 1 6.957497 0 0 19920 +pride 1 6.957497 0 0 19921 +prejudic 1 6.957497 0 0 19922 +ramesh 1 6.957497 0 0 19923 +mahadeven 1 6.957497 0 0 19924 +sarticl 1 6.957497 0 0 19925 +wonderfulgam 1 6.957497 0 0 19926 +itagain 1 6.957497 0 0 19927 +crossword 1 6.957497 0 0 19928 +cryptic 1 6.957497 0 0 19929 +andeduc 1 6.957497 0 0 19930 +reloc 1 6.957497 0 0 19931 +speciallibrarian 1 6.957497 0 0 19932 +systemadministr 1 6.957497 0 0 19933 +calcarimanag 1 6.957497 0 0 19934 +servicescomput 1 6.957497 0 0 19935 +madisonsc 1 6.957497 0 0 19936 +scoutservic 1 6.957497 0 0 19937 +internicand 1 6.957497 0 0 19938 +bestresourc 1 6.957497 0 0 19939 +soonth 1 6.957497 0 0 19940 +sprout 1 6.957497 0 0 19941 +andthousand 1 6.957497 0 0 19942 +annotatedlist 1 6.957497 0 0 19943 +itemsinclud 1 6.957497 0 0 19944 +happeningspost 1 6.957497 0 0 19945 +weekdai 1 6.957497 0 0 19946 +wheni 1 6.957497 0 0 19947 +thensfnet 1 6.957497 0 0 19948 +informationservic 1 6.957497 0 0 19949 +tonat 1 6.957497 0 0 19950 +internetand 1 6.957497 0 0 19951 +seminarseri 1 6.957497 0 0 19952 +internetend 1 6.957497 0 0 19953 +forcerfnet 1 6.957497 0 0 19954 +internicproject 1 6.957497 0 0 19955 +theport 1 6.957497 0 0 19956 +workof 1 6.957497 0 0 19957 +andrequest 1 6.957497 0 0 19958 +heartilyagre 1 6.957497 0 0 19959 +servicesat 1 6.957497 0 0 19960 +solock 1 6.957497 0 0 19961 +theaddit 1 6.957497 0 0 19962 +livesei 1 6.957497 0 0 19963 +asscout 1 6.957497 0 0 19964 +researcharea 1 6.957497 0 0 19965 +campus 1 6.957497 0 0 19966 +includenetwork 1 6.957497 0 0 19967 +nidr 1 6.957497 0 0 19968 +anddisciplin 1 6.957497 0 0 19969 +willincludecomput 1 6.957497 0 0 19970 +ofour 1 6.957497 0 0 19971 +theonlin 1 6.957497 0 0 19972 +librarian 1 6.957497 0 0 19973 +aresum 1 6.957497 0 0 19974 +contactm 1 6.957497 0 0 19975 +calcariinternet 1 6.957497 0 0 19976 +scal 1 6.957497 0 0 19977 +chandrasekar 1 6.957497 0 0 19978 +tamilnadu 1 6.957497 0 0 19979 +inindia 1 6.957497 0 0 19980 +officedept 1 6.957497 0 0 19981 +sivasankaran 1 6.957497 0 0 19982 +schandra 1 6.957497 0 0 19983 +larusresearch 1 6.957497 0 0 19984 +languagesfunct 1 6.957497 0 0 19985 +designinterest 1 6.957497 0 0 19986 +sacm 1 6.957497 0 0 19987 +iraklio 1 6.957497 0 0 19988 +systemspubl 1 6.957497 0 0 19989 +cretan 1 6.957497 0 0 19990 +beverli 1 6.957497 0 0 19991 +seavei 1 6.957497 0 0 19992 +ramayana 1 6.957497 0 0 19993 +drama 1 6.957497 0 0 19994 +ramakien 1 6.957497 0 0 19995 +colvil 1 6.957497 0 0 19996 +pagein 1 6.957497 0 0 19997 +pickingand 1 6.957497 0 0 19998 +artsi 1 6.957497 0 0 19999 +roget 1 6.957497 0 0 20000 +dyerour 1 6.957497 0 0 20001 +teachinga 1 6.957497 0 0 20002 +hasit 1 6.957497 0 0 20003 +cartoon 1 6.957497 0 0 20004 +teleconferenc 1 6.957497 0 0 20005 +performa 1 6.957497 0 0 20006 +repertoir 1 6.957497 0 0 20007 +beinvok 1 6.957497 0 0 20008 +cu 1 6.957497 0 0 20009 +levelev 1 6.957497 0 0 20010 +nonrigid 1 6.957497 0 0 20011 +dyerw 1 6.957497 0 0 20012 +graphicscommun 1 6.957497 0 0 20013 +techniquescurr 1 6.957497 0 0 20014 +validityha 1 6.957497 0 0 20015 +ofthat 1 6.957497 0 0 20016 +simplerectif 1 6.957497 0 0 20017 +therectifi 1 6.957497 0 0 20018 +theinterpol 1 6.957497 0 0 20019 +computedinterpol 1 6.957497 0 0 20020 +icpr 1 6.957497 0 0 20021 +compel 1 6.957497 0 0 20022 +betweenimag 1 6.957497 0 0 20023 +causeunnatur 1 6.957497 0 0 20024 +distort 1 6.957497 0 0 20025 +projectivegeometri 1 6.957497 0 0 20026 +morphingthat 1 6.957497 0 0 20027 +prewarp 1 6.957497 0 0 20028 +imagesprior 1 6.957497 0 0 20029 +postwarp 1 6.957497 0 0 20030 +appliedto 1 6.957497 0 0 20031 +structureafford 1 6.957497 0 0 20032 +imagetransform 1 6.957497 0 0 20033 +poscript 1 6.957497 0 0 20034 +turntabl 1 6.957497 0 0 20035 +dyermani 1 6.957497 0 0 20036 +locomotori 1 6.957497 0 0 20037 +shuffl 1 6.957497 0 0 20038 +areperiod 1 6.957497 0 0 20039 +beenproduc 1 6.957497 0 0 20040 +ourapproach 1 6.957497 0 0 20041 +tracethi 1 6.957497 0 0 20042 +imagesequ 1 6.957497 0 0 20043 +phonograph 1 6.957497 0 0 20044 +ramp 1 6.957497 0 0 20045 +timewher 1 6.957497 0 0 20046 +momentarili 1 6.957497 0 0 20047 +shownsuperimpos 1 6.957497 0 0 20048 +variesslightli 1 6.957497 0 0 20049 +changesin 1 6.957497 0 0 20050 +motionsthat 1 6.957497 0 0 20051 +evolutionof 1 6.957497 0 0 20052 +quantiti 1 6.957497 0 0 20053 +asposit 1 6.957497 0 0 20054 +veloc 1 6.957497 0 0 20055 +delimit 1 6.957497 0 0 20056 +correspondencesacross 1 6.957497 0 0 20057 +parsinga 1 6.957497 0 0 20058 +tracecan 1 6.957497 0 0 20059 +fromdiffer 1 6.957497 0 0 20060 +recoveredfrom 1 6.957497 0 0 20061 +angiograph 1 6.957497 0 0 20062 +additionalstructur 1 6.957497 0 0 20063 +surreal 1 6.957497 0 0 20064 +mlrg 1 6.957497 0 0 20065 +thememb 1 6.957497 0 0 20066 +jonathon 1 6.957497 0 0 20067 +cherkauer 1 6.957497 0 0 20068 +craven 1 6.957497 0 0 20069 +maclin 1 6.957497 0 0 20070 +opitz 1 6.957497 0 0 20071 +papersvisit 1 6.957497 0 0 20072 +recentabstractsi 1 6.957497 0 0 20073 +theoriesy 1 6.957497 0 0 20074 +severalml 1 6.957497 0 0 20075 +sgroup 1 6.957497 0 0 20076 +neurosci 1 6.957497 0 0 20077 +mimi 1 6.957497 0 0 20078 +avirup 1 6.957497 0 0 20079 +linkseducationph 1 6.957497 0 0 20080 +cachabl 1 6.957497 0 0 20081 +dirsw 1 6.957497 0 0 20082 +siff 1 6.957497 0 0 20083 +lecturerc 1 6.957497 0 0 20084 +coordinatorgradu 1 6.957497 0 0 20085 +sciencesemail 1 6.957497 0 0 20086 +groupskrentni 1 6.957497 0 0 20087 +hon 1 6.957497 0 0 20088 +kgpite 1 6.957497 0 0 20089 +framemak 1 6.957497 0 0 20090 +graduatesaddress 1 6.957497 0 0 20091 +usasohi 1 6.957497 0 0 20092 +thehighest 1 6.957497 0 0 20093 +circa 1 6.957497 0 0 20094 +transistor 1 6.957497 0 0 20095 +availableon 1 6.957497 0 0 20096 +getth 1 6.957497 0 0 20097 +ofov 1 6.957497 0 0 20098 +thenatur 1 6.957497 0 0 20099 +numericappl 1 6.957497 0 0 20100 +andcarri 1 6.957497 0 0 20101 +assessth 1 6.957497 0 0 20102 +vijaykumarrec 1 6.957497 0 0 20103 +talkswil 1 6.957497 0 0 20104 +researchcent 1 6.957497 0 0 20105 +yorktown 1 6.957497 0 0 20106 +publicationshigh 1 6.957497 0 0 20107 +ofdetail 1 6.957497 0 0 20108 +resultsi 1 6.957497 0 0 20109 +latencyt 1 6.957497 0 0 20110 +processorsj 1 6.957497 0 0 20111 +referencesm 1 6.957497 0 0 20112 +communicationin 1 6.957497 0 0 20113 +errorst 1 6.957497 0 0 20114 +processorsd 1 6.957497 0 0 20115 +knapsack 1 6.957497 0 0 20116 +componentt 1 6.957497 0 0 20117 +processorst 1 6.957497 0 0 20118 +gradstodd 1 6.957497 0 0 20119 +latencydionisio 1 6.957497 0 0 20120 +setsmanoj 1 6.957497 0 0 20121 +architecturemark 1 6.957497 0 0 20122 +executionsriram 1 6.957497 0 0 20123 +vajapeyam 1 6.957497 0 0 20124 +processormen 1 6.957497 0 0 20125 +andmarvin 1 6.957497 0 0 20126 +astech 1 6.957497 0 0 20127 +odyssea 1 6.957497 0 0 20128 +publicationstoward 1 6.957497 0 0 20129 +abstractpostscriptth 1 6.957497 0 0 20130 +andyanni 1 6.957497 0 0 20131 +abstractpostscriptexpand 1 6.957497 0 0 20132 +journalv 1 6.957497 0 0 20133 +abstractpostscriptshor 1 6.957497 0 0 20134 +andmichael 1 6.957497 0 0 20135 +zwillingavail 1 6.957497 0 0 20136 +capitl 1 6.957497 0 0 20137 +photoalbum 1 6.957497 0 0 20138 +fredriksen 1 6.957497 0 0 20139 +travisprofessorcomput 1 6.957497 0 0 20140 +californa 1 6.957497 0 0 20141 +ofartifici 1 6.957497 0 0 20142 +automaticdeduct 1 6.957497 0 0 20143 +contruct 1 6.957497 0 0 20144 +informationcontain 1 6.957497 0 0 20145 +beingdevot 1 6.957497 0 0 20146 +visualiz 1 6.957497 0 0 20147 +organiz 1 6.957497 0 0 20148 +suppositionsunderli 1 6.957497 0 0 20149 +ohar 1 6.957497 0 0 20150 +swanson 1 6.957497 0 0 20151 +whitsitt 1 6.957497 0 0 20152 +zahn 1 6.957497 0 0 20153 +oravec 1 6.957497 0 0 20154 +reflex 1 6.957497 0 0 20155 +falsework 1 6.957497 0 0 20156 +shilpa 1 6.957497 0 0 20157 +pastfor 1 6.957497 0 0 20158 +schoolher 1 6.957497 0 0 20159 +syster 1 6.957497 0 0 20160 +madisonsurf 1 6.957497 0 0 20161 +madisonst 1 6.957497 0 0 20162 +clubowl 1 6.957497 0 0 20163 +signatur 1 6.957497 0 0 20164 +lovesnowi 1 6.957497 0 0 20165 +linksher 1 6.957497 0 0 20166 +iswher 1 6.957497 0 0 20167 +tossl 1 6.957497 0 0 20168 +shilpal 1 6.957497 0 0 20169 +thru 1 6.957497 0 0 20170 +formlast 1 6.957497 0 0 20171 +stenglein 1 6.957497 0 0 20172 +stenglei 1 6.957497 0 0 20173 +pageespn 1 6.957497 0 0 20174 +hotwir 1 6.957497 0 0 20175 +tunnelgroup 1 6.957497 0 0 20176 +andjim 1 6.957497 0 0 20177 +mewhat 1 6.957497 0 0 20178 +stever 1 6.957497 0 0 20179 +usalast 1 6.957497 0 0 20180 +strikwerda 1 6.957497 0 0 20181 +strikwerdadepart 1 6.957497 0 0 20182 +strik 1 6.957497 0 0 20183 +dynamicsmyoffici 1 6.957497 0 0 20184 +tribun 1 6.957497 0 0 20185 +footballmi 1 6.957497 0 0 20186 +prooocessor 1 6.957497 0 0 20187 +madisonoffic 1 6.957497 0 0 20188 +stelephon 1 6.957497 0 0 20189 +emailoffic 1 6.957497 0 0 20190 +seednet 1 6.957497 0 0 20191 +vistor 1 6.957497 0 0 20192 +browbeck 1 6.957497 0 0 20193 +effronteri 1 6.957497 0 0 20194 +femor 1 6.957497 0 0 20195 +arteri 1 6.957497 0 0 20196 +blood 1 6.957497 0 0 20197 +anesthetist 1 6.957497 0 0 20198 +groin 1 6.957497 0 0 20199 +hamstr 1 6.957497 0 0 20200 +scalpel 1 6.957497 0 0 20201 +stab 1 6.957497 0 0 20202 +leg 1 6.957497 0 0 20203 +voilet 1 6.957497 0 0 20204 +baboon 1 6.957497 0 0 20205 +wig 1 6.957497 0 0 20206 +pois 1 6.957497 0 0 20207 +stomp 1 6.957497 0 0 20208 +cop 1 6.957497 0 0 20209 +rush 1 6.957497 0 0 20210 +burrough 1 6.957497 0 0 20211 +catapult 1 6.957497 0 0 20212 +mann 1 6.957497 0 0 20213 +wearabl 1 6.957497 0 0 20214 +tierra 1 6.957497 0 0 20215 +arcosanti 1 6.957497 0 0 20216 +arcolog 1 6.957497 0 0 20217 +krishnamurti 1 6.957497 0 0 20218 +harass 1 6.957497 0 0 20219 +factoid 1 6.957497 0 0 20220 +astound 1 6.957497 0 0 20221 +onion 1 6.957497 0 0 20222 +washburn 1 6.957497 0 0 20223 +len 1 6.957497 0 0 20224 +insignific 1 6.957497 0 0 20225 +webweath 1 6.957497 0 0 20226 +timothi 1 6.957497 0 0 20227 +leari 1 6.957497 0 0 20228 +noam 1 6.957497 0 0 20229 +chomski 1 6.957497 0 0 20230 +conspiraci 1 6.957497 0 0 20231 +buri 1 6.957497 0 0 20232 +tamch 1 6.957497 0 0 20233 +ariel 1 6.957497 0 0 20234 +municip 1 6.957497 0 0 20235 +bond 1 6.957497 0 0 20236 +tamchesari 1 6.957497 0 0 20237 +assistantemail 1 6.957497 0 0 20238 +posei 1 6.957497 0 0 20239 +sresearch 1 6.957497 0 0 20240 +toolsstatu 1 6.957497 0 0 20241 +toolsparallel 1 6.957497 0 0 20242 +systemsbluesth 1 6.957497 0 0 20243 +simpsonsseinfeldskiingskinetkeyston 1 6.957497 0 0 20244 +vacum 1 6.957497 0 0 20245 +dirt 1 6.957497 0 0 20246 +whoa 1 6.957497 0 0 20247 +incom 1 6.957497 0 0 20248 +yahooespncpu 1 6.957497 0 0 20249 +infoskinetoth 1 6.957497 0 0 20250 +exokernel 1 6.957497 0 0 20251 +zebra 1 6.957497 0 0 20252 +pagejeff 1 6.957497 0 0 20253 +ricardo 1 6.957497 0 0 20254 +montalban 1 6.957497 0 0 20255 +foron 1 6.957497 0 0 20256 +incrimin 1 6.957497 0 0 20257 +aconvict 1 6.957497 0 0 20258 +lasttim 1 6.957497 0 0 20259 +threaten 1 6.957497 0 0 20260 +intoa 1 6.957497 0 0 20261 +dispens 1 6.957497 0 0 20262 +anautograph 1 6.957497 0 0 20263 +pictureappear 1 6.957497 0 0 20264 +weasel 1 6.957497 0 0 20265 +factswho 1 6.957497 0 0 20266 +relatedwhat 1 6.957497 0 0 20267 +entertainmentbook 1 6.957497 0 0 20268 +subjectsfriendsno 1 6.957497 0 0 20269 +organizationsgroup 1 6.957497 0 0 20270 +inmi 1 6.957497 0 0 20271 +linksugh 1 6.957497 0 0 20272 +servo 1 6.957497 0 0 20273 +eclect 1 6.957497 0 0 20274 +paraphenaliai 1 6.957497 0 0 20275 +mathematicalprogram 1 6.957497 0 0 20276 +homepagetodd 1 6.957497 0 0 20277 +homepagein 1 6.957497 0 0 20278 +toonen 1 6.957497 0 0 20279 +cswhatev 1 6.957497 0 0 20280 +seattleth 1 6.957497 0 0 20281 +tipi 1 6.957497 0 0 20282 +itsmean 1 6.957497 0 0 20283 +kinship 1 6.957497 0 0 20284 +acknowledgingun 1 6.957497 0 0 20285 +infus 1 6.957497 0 0 20286 +thetru 1 6.957497 0 0 20287 +luther 1 6.957497 0 0 20288 +oglala 1 6.957497 0 0 20289 +siouxlast 1 6.957497 0 0 20290 +upgrad 1 6.957497 0 0 20291 +ifthat 1 6.957497 0 0 20292 +turnidgeschoolcomput 1 6.957497 0 0 20293 +homemuppet 1 6.957497 0 0 20294 +eyesightright 1 6.957497 0 0 20295 +studyingprogram 1 6.957497 0 0 20296 +mathematicsand 1 6.957497 0 0 20297 +reserveunivers 1 6.957497 0 0 20298 +cleveland 1 6.957497 0 0 20299 +myfamili 1 6.957497 0 0 20300 +pageuri 1 6.957497 0 0 20301 +pageemail 1 6.957497 0 0 20302 +eduinterest 1 6.957497 0 0 20303 +diversionsstart 1 6.957497 0 0 20304 +ganti 1 6.957497 0 0 20305 +godav 1 6.957497 0 0 20306 +pagevenkatesh 1 6.957497 0 0 20307 +vganti 1 6.957497 0 0 20308 +studentoffic 1 6.957497 0 0 20309 +kakinada 1 6.957497 0 0 20310 +hostel 1 6.957497 0 0 20311 +architecturet 1 6.957497 0 0 20312 +watrou 1 6.957497 0 0 20313 +artin 1 6.957497 0 0 20314 +whapl 1 6.957497 0 0 20315 +canadiannumb 1 6.957497 0 0 20316 +assort 1 6.957497 0 0 20317 +lanl 1 6.957497 0 0 20318 +hypatia 1 6.957497 0 0 20319 +stylehypertext 1 6.957497 0 0 20320 +interfaceroget 1 6.957497 0 0 20321 +parasol 1 6.957497 0 0 20322 +recordsplayst 1 6.957497 0 0 20323 +linksweath 1 6.957497 0 0 20324 +madisonth 1 6.957497 0 0 20325 +pagemathemat 1 6.957497 0 0 20326 +servermathematician 1 6.957497 0 0 20327 +biographiesgeek 1 6.957497 0 0 20328 +weiru 1 6.957497 0 0 20329 +eiru 1 6.957497 0 0 20330 +ppppleas 1 6.957497 0 0 20331 +asylum 1 6.957497 0 0 20332 +verbal 1 6.957497 0 0 20333 +cargo 1 6.957497 0 0 20334 +havenos 1 6.957497 0 0 20335 +smell 1 6.957497 0 0 20336 +leder 1 6.957497 0 0 20337 +beoffer 1 6.957497 0 0 20338 +customari 1 6.957497 0 0 20339 +begina 1 6.957497 0 0 20340 +amountof 1 6.957497 0 0 20341 +merest 1 6.957497 0 0 20342 +ofaffect 1 6.957497 0 0 20343 +excruciatingli 1 6.957497 0 0 20344 +atmadison 1 6.957497 0 0 20345 +grei 1 6.957497 0 0 20346 +francai 1 6.957497 0 0 20347 +dictionnairefrancai 1 6.957497 0 0 20348 +anglai 1 6.957497 0 0 20349 +dictionnair 1 6.957497 0 0 20350 +relatif 1 6.957497 0 0 20351 +lafrancophoni 1 6.957497 0 0 20352 +degrammair 1 6.957497 0 0 20353 +chinaemail 1 6.957497 0 0 20354 +experiencecontractor 1 6.957497 0 0 20355 +tuxedo 1 6.957497 0 0 20356 +pathwai 1 6.957497 0 0 20357 +sherpa 1 6.957497 0 0 20358 +hobbiesma 1 6.957497 0 0 20359 +jiangbridg 1 6.957497 0 0 20360 +pingpong 1 6.957497 0 0 20361 +joggingth 1 6.957497 0 0 20362 +challengesolv 1 6.957497 0 0 20363 +sweeper 1 6.957497 0 0 20364 +dayth 1 6.957497 0 0 20365 +ackowledgementthi 1 6.957497 0 0 20366 +pothol 1 6.957497 0 0 20367 +wengerassoci 1 6.957497 0 0 20368 +researchercomput 1 6.957497 0 0 20369 +arecod 1 6.957497 0 0 20370 +anddevis 1 6.957497 0 0 20371 +acronym 1 6.957497 0 0 20372 +importantpart 1 6.957497 0 0 20373 +visualizationproduc 1 6.957497 0 0 20374 +livnyraghu 1 6.957497 0 0 20375 +ramakrishnanmor 1 6.957497 0 0 20376 +pagewiscinfo 1 6.957497 0 0 20377 +personallinksimageslast 1 6.957497 0 0 20378 +projectmost 1 6.957497 0 0 20379 +fromworkst 1 6.957497 0 0 20380 +whichprocess 1 6.957497 0 0 20381 +abovesystem 1 6.957497 0 0 20382 +wascoop 1 6.957497 0 0 20383 +toconvent 1 6.957497 0 0 20384 +revolutionari 1 6.957497 0 0 20385 +andprogram 1 6.957497 0 0 20386 +transparentshar 1 6.957497 0 0 20387 +developingimplement 1 6.957497 0 0 20388 +wisconsincow 1 6.957497 0 0 20389 +cowus 1 6.957497 0 0 20390 +sram 1 6.957497 0 0 20391 +collaboratingwith 1 6.957497 0 0 20392 +overviewand 1 6.957497 0 0 20393 +pageor 1 6.957497 0 0 20394 +xuelin 1 6.957497 0 0 20395 +otto 1 6.957497 0 0 20396 +messmer 1 6.957497 0 0 20397 +whichwa 1 6.957497 0 0 20398 +chaplin 1 6.957497 0 0 20399 +keaton 1 6.957497 0 0 20400 +polo 1 6.957497 0 0 20401 +lindbergh 1 6.957497 0 0 20402 +theatlant 1 6.957497 0 0 20403 +oneev 1 6.957497 0 0 20404 +teeth 1 6.957497 0 0 20405 +whisker 1 6.957497 0 0 20406 +tail 1 6.957497 0 0 20407 +sui 1 6.957497 0 0 20408 +vritabl 1 6.957497 0 0 20409 +partout 1 6.957497 0 0 20410 +haber 1 6.957497 0 0 20411 +vldbconfer 1 6.957497 0 0 20412 +tod 1 6.957497 0 0 20413 +ofheterogen 1 6.957497 0 0 20414 +ondatabas 1 6.957497 0 0 20415 +opossum 1 6.957497 0 0 20416 +ioannidisyanni 1 6.957497 0 0 20417 +toqueri 1 6.957497 0 0 20418 +thanin 1 6.957497 0 0 20419 +highera 1 6.957497 0 0 20420 +tooptim 1 6.957497 0 0 20421 +querywil 1 6.957497 0 0 20422 +optimum 1 6.957497 0 0 20423 +viabl 1 6.957497 0 0 20424 +propertiesof 1 6.957497 0 0 20425 +especiallythos 1 6.957497 0 0 20426 +alsopart 1 6.957497 0 0 20427 +appropriateinform 1 6.957497 0 0 20428 +thepropag 1 6.957497 0 0 20429 +ofoptim 1 6.957497 0 0 20430 +inrel 1 6.957497 0 0 20431 +manyexperi 1 6.957497 0 0 20432 +aspectsthat 1 6.957497 0 0 20433 +managementenviron 1 6.957497 0 0 20434 +theirexperiment 1 6.957497 0 0 20435 +arefor 1 6.957497 0 0 20436 +scientistsso 1 6.957497 0 0 20437 +facilitatetransl 1 6.957497 0 0 20438 +experimentalscientif 1 6.957497 0 0 20439 +specificproject 1 6.957497 0 0 20440 +plantgrowth 1 6.957497 0 0 20441 +issueon 1 6.957497 0 0 20442 +beyondrel 1 6.957497 0 0 20443 +forschema 1 6.957497 0 0 20444 +tsangari 1 6.957497 0 0 20445 +tkde 1 6.957497 0 0 20446 +christodoulaki 1 6.957497 0 0 20447 +limitingworst 1 6.957497 0 0 20448 +winger 1 6.957497 0 0 20449 +algorithmsbas 1 6.957497 0 0 20450 +databaseestim 1 6.957497 0 0 20451 +ponnekanti 1 6.957497 0 0 20452 +experimentmanag 1 6.957497 0 0 20453 +itsappl 1 6.957497 0 0 20454 +anjur 1 6.957497 0 0 20455 +bridgesbetween 1 6.957497 0 0 20456 +shekita 1 6.957497 0 0 20457 +forselect 1 6.957497 0 0 20458 +internationalacm 1 6.957497 0 0 20459 +layoutat 1 6.957497 0 0 20460 +granular 1 6.957497 0 0 20461 +advancedvisu 1 6.957497 0 0 20462 +gubbio 1 6.957497 0 0 20463 +managementthrough 1 6.957497 0 0 20464 +practicalityfor 1 6.957497 0 0 20465 +sigmodconfer 1 6.957497 0 0 20466 +forphys 1 6.957497 0 0 20467 +dexa 1 6.957497 0 0 20468 +athen 1 6.957497 0 0 20469 +lashkari 1 6.957497 0 0 20470 +theirdisambigu 1 6.957497 0 0 20471 +schemavisu 1 6.957497 0 0 20472 +edbt 1 6.957497 0 0 20473 +internationalvldb 1 6.957497 0 0 20474 +capacityin 1 6.957497 0 0 20475 +wiener 1 6.957497 0 0 20476 +moos 1 6.957497 0 0 20477 +withdata 1 6.957497 0 0 20478 +yinng 1 6.957497 0 0 20479 +pageindexofyinongwei 1 6.957497 0 0 20480 +spagehi 1 6.957497 0 0 20481 +alsolink 1 6.957497 0 0 20482 +inforesumehobbiestravel 1 6.957497 0 0 20483 +pointersr 1 6.957497 0 0 20484 +computingmacin 1 6.957497 0 0 20485 +learningpattern 1 6.957497 0 0 20486 +recognitioncomputatin 1 6.957497 0 0 20487 +geometrydatabasevisionacadem 1 6.957497 0 0 20488 +diarythi 1 6.957497 0 0 20489 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 1 6.957497 0 0 20490 +beida 1 6.957497 0 0 20491 +classmatespek 1 6.957497 0 0 20492 +ciumi 1 6.957497 0 0 20493 +bookmarkcom 1 6.957497 0 0 20494 +yinong 1 6.957497 0 0 20495 +zhongbin 1 6.957497 0 0 20496 +convuls 1 6.957497 0 0 20497 +sera 1 6.957497 0 0 20498 +pageuntil 1 6.957497 0 0 20499 +zeidenbergcent 1 6.957497 0 0 20500 +gilson 1 6.957497 0 0 20501 +zeiden 1 6.957497 0 0 20502 +eduzeidenb 1 6.957497 0 0 20503 +eduwhen 1 6.957497 0 0 20504 +coho 1 6.957497 0 0 20505 +huntington 1 6.957497 0 0 20506 +breton 1 6.957497 0 0 20507 +nadja 1 6.957497 0 0 20508 +beaut 1 6.957497 0 0 20509 +saint 1 6.957497 0 0 20510 +whyth 1 6.957497 0 0 20511 +communist 1 6.957497 0 0 20512 +helder 1 6.957497 0 0 20513 +camara 1 6.957497 0 0 20514 +jianwei 1 6.957497 0 0 20515 +assistantadvisor 1 6.957497 0 0 20516 +compilerminor 1 6.957497 0 0 20517 +bankingoffic 1 6.957497 0 0 20518 +intereststher 1 6.957497 0 0 20519 +territori 1 6.957497 0 0 20520 +densityanalysi 1 6.957497 0 0 20521 +crowd 1 6.957497 0 0 20522 +dataclassif 1 6.957497 0 0 20523 +knowledgediscoveri 1 6.957497 0 0 20524 +dimensionreduct 1 6.957497 0 0 20525 +findpath 1 6.957497 0 0 20526 +yihong 1 6.957497 0 0 20527 +educationb 1 6.957497 0 0 20528 +hillm 1 6.957497 0 0 20529 +wiscosin 1 6.957497 0 0 20530 +datamin 1 6.957497 0 0 20531 +microstrategi 1 6.957497 0 0 20532 +rolap 1 6.957497 0 0 20533 +lombard 1 6.957497 0 0 20534 +kiwi 1 6.957497 0 0 20535 +pgmo 1 6.957497 0 0 20536 +zhewang 1 6.957497 0 0 20537 +zhichen 1 6.957497 0 0 20538 +larusprofessor 1 6.957497 0 0 20539 +millerawardbest 1 6.957497 0 0 20540 +eliminateperform 1 6.957497 0 0 20541 +toolwith 1 6.957497 0 0 20542 +wisconsinwind 1 6.957497 0 0 20543 +interestprogram 1 6.957497 0 0 20544 +andimcrement 1 6.957497 0 0 20545 +programjourn 1 6.957497 0 0 20546 +researchchines 1 6.957497 0 0 20547 +supper 1 6.957497 0 0 20548 +weihai 1 6.957497 0 0 20549 +krzysztof 1 6.957497 0 0 20550 +zmudzinskikrzysztof 1 6.957497 0 0 20551 +zmudzinskispin 1 6.957497 0 0 20552 +pole 1 6.957497 0 0 20553 +salujaprofessor 1 6.957497 0 0 20554 +jpgdepartmentselectr 1 6.957497 0 0 20555 +engineeringcomput 1 6.957497 0 0 20556 +interestsdesign 1 6.957497 0 0 20557 +testableand 1 6.957497 0 0 20558 +thisarea 1 6.957497 0 0 20559 +theresearch 1 6.957497 0 0 20560 +testgener 1 6.957497 0 0 20561 +inself 1 6.957497 0 0 20562 +andfault 1 6.957497 0 0 20563 +methodsapplic 1 6.957497 0 0 20564 +testenviron 1 6.957497 0 0 20565 +regularstructur 1 6.957497 0 0 20566 +ram 1 6.957497 0 0 20567 +areinvestig 1 6.957497 0 0 20568 +inhardwar 1 6.957497 0 0 20569 +projectw 1 6.957497 0 0 20570 +thatth 1 6.957497 0 0 20571 +noimpact 1 6.957497 0 0 20572 +digitalsystem 1 6.957497 0 0 20573 +withcolor 1 6.957497 0 0 20574 +departmentsmechan 1 6.957497 0 0 20575 +engineeringeducationb 1 6.957497 0 0 20576 +madisonm 1 6.957497 0 0 20577 +madisonphd 1 6.957497 0 0 20578 +madisonresearch 1 6.957497 0 0 20579 +interestsrobot 1 6.957497 0 0 20580 +micromechanismscent 1 6.957497 0 0 20581 +consortiamanufactur 1 6.957497 0 0 20582 +programwisconsin 1 6.957497 0 0 20583 +roboticsprofessor 1 6.957497 0 0 20584 +inspect 1 6.957497 0 0 20585 +mold 1 6.957497 0 0 20586 +rework 1 6.957497 0 0 20587 +agricultur 1 6.957497 0 0 20588 +tactil 1 6.957497 0 0 20589 +sensori 1 6.957497 0 0 20590 +fatigu 1 6.957497 0 0 20591 +vadim 1 6.957497 0 0 20592 +vshapiro 1 6.957497 0 0 20593 +jpgurl 1 6.957497 0 0 20594 +departmentscomput 1 6.957497 0 0 20595 +sciencemechan 1 6.957497 0 0 20596 +engineeringeducationba 1 6.957497 0 0 20597 +universitym 1 6.957497 0 0 20598 +angelesm 1 6.957497 0 0 20599 +universityphd 1 6.957497 0 0 20600 +univeristyresearch 1 6.957497 0 0 20601 +automationcent 1 6.957497 0 0 20602 +consortiamathemat 1 6.957497 0 0 20603 +programmanufactur 1 6.957497 0 0 20604 +programspati 1 6.957497 0 0 20605 +laboratoryselect 1 6.957497 0 0 20606 +honorsn 1 6.957497 0 0 20607 +vossler 1 6.957497 0 0 20608 +betweengeometri 1 6.957497 0 0 20609 +bemodel 1 6.957497 0 0 20610 +manufacturedbas 1 6.957497 0 0 20611 +ofdistinct 1 6.957497 0 0 20612 +technologicalbarri 1 6.957497 0 0 20613 +undermin 1 6.957497 0 0 20614 +commercialgeometr 1 6.957497 0 0 20615 +eliminatingambigu 1 6.957497 0 0 20616 +ofparametr 1 6.957497 0 0 20617 +bedescrib 1 6.957497 0 0 20618 +interactingprimit 1 6.957497 0 0 20619 +roadblock 1 6.957497 0 0 20620 +withtheoret 1 6.957497 0 0 20621 +smoothintegr 1 6.957497 0 0 20622 +thedesir 1 6.957497 0 0 20623 +tounifi 1 6.957497 0 0 20624 +theseand 1 6.957497 0 0 20625 +physicalobject 1 6.957497 0 0 20626 +hein 1 6.957497 0 0 20627 +borel 1 6.957497 0 0 20628 +groupautom 1 6.957497 0 0 20629 +techreport 1 6.957497 0 0 20630 +reportseri 1 6.957497 0 0 20631 +grouplarri 1 6.957497 0 0 20632 +hinesmarti 1 6.957497 0 0 20633 +mayberrybenjamin 1 6.957497 0 0 20634 +shultsalumniprevi 1 6.957497 0 0 20635 +robertboyerj 1 6.957497 0 0 20636 +strother 1 6.957497 0 0 20637 +moorethi 1 6.957497 0 0 20638 +collaboratorswhat 1 6.957497 0 0 20639 +implyth 1 6.957497 0 0 20640 +proverstrivelarri 1 6.957497 0 0 20641 +struvelarri 1 6.957497 0 0 20642 +proverand 1 6.957497 0 0 20643 +theretoinclud 1 6.957497 0 0 20644 +mcphee 1 6.957497 0 0 20645 +theoryimplement 1 6.957497 0 0 20646 +theoremprecondit 1 6.957497 0 0 20647 +proverbledso 1 6.957497 0 0 20648 +theoremnqthmboy 1 6.957497 0 0 20649 +andmoor 1 6.957497 0 0 20650 +clinc 1 6.957497 0 0 20651 +iprshult 1 6.957497 0 0 20652 +relatedlinksdo 1 6.957497 0 0 20653 +shult 1 6.957497 0 0 20654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..7b8e89c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,73 @@ +home +pagec +system +program +oper +systemsc +practicum +oper +systemkenneth +birmanc +new +groupcours +syllabuslectur +note +unix +filesystem +structur +link +static +dynam +assign +homework +homework +homework +homework +homework +assign +solut +solut +solut +solut +solut +prelim +solut +prelim +solut +taslili +upson +hall +phone +mail +lili +cornel +offic +hour +wednesdai +fridai +cheng +huang +upson +hall +phone +mail +ychuang +cornel +offic +hour +tuesdai +thursdai +mihai +budiu +upson +hall +phone +mail +budiu +cornel +offic +hour +wednesdai +thursdai +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..843dbdfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,59 @@ +home +pagec +system +program +oper +systemsc +practicum +oper +systemsselect +displai +symbol +correspond +postcriptdocu +hand +phase +hocacours +inform +cours +schedul +last +chang +group +handout +handout +format +postcript +format +penn +broccoli +question +answer +last +chang +chip +comput +system +consol +window +exampl +us +chip +chip +consol +tutori +principl +oper +configur +fileth +hoca +oper +systemth +hoca +oper +system +specif +page +maintain +lorenzo +alvisi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..bc371e90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,491 @@ +home +pagec +databas +system +inform +retrievaldepart +computersci +cornel +universityspr +gradeshav +nice +summer +introductionthi +three +credit +cours +cover +fundament +databasesystem +inform +retriev +cours +roughli +twothird +databas +third +inform +retriev +topic +cover +databas +systemsinclud +follow +data +model +entiti +relationship +model +relat +model +physic +organ +index +hash +relat +databas +design +databas +queri +languag +queryoptim +crash +recoveri +concurr +control +transactionprocess +inform +retriev +part +deal +find +usefulinform +larg +textual +databas +part +cours +willcov +invert +file +system +vector +space +model +smartsystem +vector +similar +index +weight +rank +relevancefeedback +phrase +gener +term +relationship +thesaurusconstruct +retriev +evalu +time +permit +automatictext +structur +summar +link +cours +materi +class +note +homework +solut +class +time +placetuesdai +thursdai +minut +thurston +prerequisitesc +recommend +booksdatabas +system +concept +korth +silberschatz +mcgrawhil +second +edit +requir +cover +fundament +databas +system +elmasri +andnavath +benjamin +cum +second +edit +reserv +principl +databas +knowledg +base +system +byullman +comput +scienc +press +reserv +inform +retriev +part +cours +photocopiedmateri +salton +book +research +paper +instructor +amitsingh +singhal +cornel +upson +offic +hour +tuesdai +thursdai +teach +assist +sophia +georgiakaki +cornel +offic +hour +wednesdai +upson +appoint +send +mail +marco +aguilera +aguilera +cornel +forc +amith +yamasani +amith +cornel +officehour +appoint +send +mail +gradingexam +midterm +worth +ofyour +final +grade +final +exam +worth +yourfin +grade +homework +five +homework +semest +worth +final +grade +homework +policiesy +work +group +peopl +homework +work +group +clearli +indic +name +thegroup +member +homework +entir +group +receiv +samegrad +homework +avail +home +page +tuesdayand +class +thursdai +follow +week +solut +along +grade +guid +avail +ofcours +date +throughth +cours +home +page +late +homework +accept +illeg +homework +hard +grade +even +though +iti +requir +encourag +type +homework +latexif +possibl +alreadi +know +goodopportun +learn +latex +homework +submissionpleas +attach +cover +page +homework +name +thegroup +member +sort +alphabet +last +name +also +write +homework +cover +page +exampl +bill +clinton +dole +ross +perot +homework +grade +homeworksgrad +homework +return +class +sortedalphabet +last +name +first +group +member +thecov +page +grade +list +first +pagefollow +cover +page +want +homework +return +pleas +sendmail +instructor +regrad +policyal +regrad +request +submit +instructor +inwrit +within +week +back +grade +homework +cours +schedulethi +tent +schedul +cours +chapter +referto +korth +silberschatz +tuesdai +januari +introduct +entiti +relationship +model +read +chapter +thursdai +januari +entiti +relationship +model +relat +model +read +chapter +tuesdai +januari +relat +algebra +read +chapter +homework +availablethursdai +februari +tupl +relat +calculu +domain +relat +calculu +read +chapter +tuesdai +februari +read +chapter +thursdai +februari +integr +constraint +relat +databas +design +read +chapter +homework +duetuesdai +februari +relat +databas +design +read +chapter +homework +availablethursdai +februari +relat +databas +design +read +chapter +tuesdai +februari +file +structur +read +chapter +thursdai +februari +index +read +chapter +homework +duetuesdai +februari +queri +optim +read +chapter +thursdai +februari +prelim +tuesdai +march +queri +optim +read +chapter +homework +availablethursdai +march +crash +recoveri +read +chapter +tuesdai +march +crash +recoveri +concurr +control +read +chapter +thursdai +march +concurr +control +read +chapter +homework +spring +break +tuesdai +march +transact +process +read +chapter +homework +availablethursdai +march +transact +process +read +chapter +tuesdai +april +introduct +inform +retrievalthursdai +april +vector +space +modelhomework +duetuesdai +april +term +weightingthursdai +april +prelim +tuesdai +april +indexinghomework +availablethursdai +april +evaluationtuesdai +april +relev +feedbackthursdai +april +document +clusteringhomework +duetuesdai +april +advanc +inform +retrievalthursdai +advanc +inform +retriev diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..fccad365 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,87 @@ +home +page +home +page +look +admin +handout +inform +incl +offic +hour +lectur +note +assign +inform +recit +inform +ethic +profession +social +respons +page +mayb +electron +submiss +procedur +group +perform +evalu +resourc +quot +stuff +collect +joke +start +submit +sumedh +offic +hour +break +new +recit +misc +stuff +convert +text +postscript +peopl +ask +convert +text +postscript +simpl +unix +program +enscript +suggest +wai +enscript +pfile +file +enscript +pfile +file +first +print +file +good +sourc +code +second +print +give +nice +header +gener +postscript +file +file +leav +pfile +send +file +printer +come +last +modif diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..ebcd5413 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,82 @@ +practicum +distribut +systemsor +hand +dirti +real +work +cours +practic +aspect +distribut +system +studi +design +implement +signific +system +practicum +student +also +take +practic +distribut +system +cours +offersa +varieti +project +rang +simpl +project +internetworkingto +complex +project +distribut +system +student +work +teamsof +person +choos +project +interest +theywil +work +trough +semest +credit +hour +earn +cours +rang +depend +size +complexityof +project +develop +cours +us +offcial +inform +interact +pageslink +inform +page +find +basic +inform +instruct +project +descript +design +plan +progress +report +final +present +tabl +contentspag +comment +werner +vogel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..f490b7ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,137 @@ +home +page +intro +comput +architectur +fall +professor +saluja +note +page +contain +link +inform +cours +electr +andcomput +engin +depart +univers +wisconsin +madison +materi +intend +sole +studentsenrol +cours +fall +semest +professor +prof +kewal +saluja +sorin +generalinform +cours +outlin +cours +conduct +midtermsyllabu +avail +midtermi +avail +project +specif +avail +homework +assign +fall +problem +solut +problem +solut +part +solut +avail +problem +solut +part +solut +part +problem +valid +problem +solut +mentor +help +theproject +need +mentor +graphic +tool +avail +caeworkst +pleas +refer +cours +outlin +project +duedat +follow +literatur +mentor +tool +assist +anyon +whomai +problem +manual +avail +onlin +throughbold_brows +check +gettingstart +design +architect +train +workbook +page +get +start +quicksim +trainingworkbook +exersis +addition +thesedocu +contain +refer +document +help +click +send +email +professor +kewal +saluja +sorin +surf +link +engin +homepag +announc +new +group +wiscinfo +gopher +site +uwengin +server +file +last +modifi +septemb +pmcst +email +question +comment +sorin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..5c9edd21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,55 @@ +homepag +fall +run +netscap +click +herelink +individu +page +frame +cours +info +gener +stuff +section +info +offic +hour +motw +homework +download +homework +handout +info +class +handout +syllabu +cours +syllabu +exam +info +exam +requir +grade +criteria +grade +homework +schedul +link +refer +goofi +stuffnot +preced +page +contain +tabl +browser +abl +handl +tabl +pleas +email +cornel +edupag +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..eb0eac44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,54 @@ +home +pagec +structur +interpret +comput +program +comput +scienc +depart +cornel +univers +fall +cours +materi +access +cours +materi +requir +user +password +request +attempt +access +materi +cours +info +dylan +interpret +window +dylan +interpret +window +note +browser +includ +netscap +correctli +check +chang +java +class +file +thu +noodll +chang +inconsist +behavior +work +parter +link +directori +partnerjoin +util +announc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..2d4a8499 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,626 @@ +comput +scienc +comput +scienc +fall +cours +informationaugust +introductori +cours +cover +broad +rang +computersci +concept +techniqu +includ +data +abstract +recurs +program +correct +gener +function +object +orient +program +pattern +match +languag +evalu +useth +dylan +languag +object +orient +dynam +languag +developedat +appl +comput +well +suit +cover +broad +rangeof +introductori +comput +scienc +topic +courseabout +dylan +languag +happen +notationthat +chosen +write +program +major +goal +ofth +cours +teach +student +think +clearli +programsand +program +provid +toolbox +modern +programmingtechniqu +applic +languag +cours +take +student +often +wonder +whether +takec +focus +program +skill +object +orientedlanguag +java +wherea +provid +exposur +broad +rangeof +comput +program +problem +us +number +programmingparadigm +includ +function +object +orient +imperativeprogram +techniqu +good +background +goodform +skill +mathemat +physic +probablytak +transfer +either +direct +encourag +first +week +reach +best +reach +cours +staff +post +questionsor +comment +us +site +http +cornel +info +cours +current +site +contain +cours +materi +run +conot +serverwhich +allow +student +staff +post +question +answersa +annot +handout +problem +set +order +access +site +need +request +user +idand +password +conot +system +simpli +toth +home +page +follow +instruct +user +idand +password +creat +mondai +request +thisweek +also +reach +cours +staff +send +email +cornel +edubut +us +ask +question +aboutproblem +set +handout +site +huttenloch +professor +upson +tobia +mayr +upsonjam +hamblin +ugrad +tarobert +szewczyk +ugrad +tajustin +voskuhl +ugrad +taandra +ferencz +consult +melissa +consultantwhen +meetlectur +tuesdai +thursdai +kimbal +andrecit +mondai +wednesdai +recitationsexpand +materi +lectur +provid +opportunityto +question +consult +hour +help +problem +set +question +held +midnight +even +eachproblem +schedul +thu +problem +setsdu +tuesdai +consult +hour +sundai +mondayeven +thursdai +consultinghour +tuesdai +wednesdai +even +consult +hour +inth +public +upson +offic +hour +jame +hamblin +huttenloch +tobia +mayr +robert +szewczyk +justin +voskuhltba +cours +materialsther +textbook +cours +cours +handoutsand +lectur +note +avail +hardcopi +andon +cours +site +dylan +interpret +avail +free +cours +site +develop +justin +voskuhl +class +implementedin +java +thu +java +capabl +browser +best +current +browser +chang +almost +weekli +netscap +window +borland +time +java +compil +also +standalon +version +avail +downloadonto +comput +want +browser +version +word +warn +download +standalon +dylan +ontoyour +machin +make +sure +us +recentvers +check +site +cours +requirementsstud +respons +materi +assign +read +well +cover +lectur +recit +therewil +problem +set +preliminari +exam +final +exam +schedul +given +problem +combinationof +written +exercis +program +assign +cours +gradeswil +base +combin +problem +set +exam +score +problem +set +account +approxim +half +thetot +grade +late +assign +accept +willgener +grade +assign +immedi +return +followingclass +period +complet +program +assign +earli +accept +late +work +best +time +andth +machin +time +think +problem +sittingdown +comput +matter +mani +time +take +long +time +sink +think +problem +beforesit +comput +polici +joint +workmuch +learn +cours +come +programmingproblem +work +jointli +person +assign +peopl +work +togeth +howev +youwork +togeth +someon +must +submit +singl +jointassign +name +circumstancesmai +hand +work +done +someon +els +yourown +name +doubt +credit +person +yougot +help +would +amaz +easi +tell +whenpeopl +work +togeth +problem +set +pleas +make +lifeunpleas +break +rule +public +facilitiescit +variou +colleg +campu +provid +public +macintosh +andpc +facil +machin +public +on +depart +provid +comput +facil +thiscours +cours +consult +avail +upsonmac +problem +date +exam +datesal +problem +set +date +exampl +assign +tuesdai +must +submit +electron +toth +cours +server +tuesdai +late +mondaynight +late +assign +accept +make +sure +submityour +final +solut +correct +time +date +assign +assign +assign +prelim +assign +assign +prelim +assign +final +exam +exam +schedul +lectur +outlin +studi +comput +introduct +dylan +function +abstract +substitut +model +evalu +procedur +process +iter +recurs +induct +higher +order +procedur +function +argument +valu +analysi +algorithm +order +growth +data +abstract +structur +contract +implement +hierarch +data +list +tree +need +quotat +recurs +list +process +reason +list +symbol +differenti +extend +exampl +gener +oper +type +dispatch +gener +function +gener +oper +polynomi +arithmet +system +assign +environ +model +evalu +assign +local +state +variabl +object +state +object +orient +program +object +orient +program +inherit +multimethod +object +orient +program +mutabl +data +stack +queue +mutabl +data +heap +heapsort +prioriti +queue +metacircular +evalu +dylan +dylan +variat +express +evalu +compil +optim +stream +infinit +stream +nonloc +exit +catch +throw +garbag +collect +illus +infinit +memori +topic +random +quicksort +topic +comput diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..8a67c6b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,122 @@ +home +page +introduct +digit +system +comput +organizationthorsten +eickenfal +kimbal +btopic +includ +represent +inform +machin +assembl +languag +processor +organ +interrupt +memori +hierarchi +combinatori +sequenti +circuit +data +path +control +unit +design +microprogram +helpif +problem +relat +lectur +homework +project +best +help +annot +appropri +point +cours +materi +allow +help +cours +staff +also +class +mate +otherwis +send +email +cornel +talk +consult +cours +materialsal +cours +materi +separ +server +want +bookmark +us +conot +allow +annot +document +cours +materi +includ +lectur +note +section +note +homework +project +file +cours +inform +date +date +inform +pleas +check +cours +materi +conot +account +request +post +saturdai +process +pleas +send +email +encount +difficulti +read +cours +inform +homework +follow +hidden +instruct +sign +cours +materi +section +start +mondai +conot +registr +listlist +made +registr +process +page +maintain +thorsten +eicken diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..9ae04548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,92 @@ +home +page +home +page +look +admin +handout +inform +incl +offic +hour +lectur +note +assign +inform +recit +inform +ethic +profession +social +respons +page +mayb +electron +submiss +procedur +group +perform +evalu +resourc +quot +stuff +collect +joke +start +submit +sumedh +offic +hour +break +new +recit +mondai +held +csuglab +floor +upson +misc +stuff +convert +text +postscript +peopl +ask +convert +text +postscript +simpl +unix +program +enscript +suggest +wai +enscript +pfile +file +enscript +pfile +file +first +print +file +good +sourc +code +second +print +give +nice +header +gener +postscript +file +file +leav +pfile +send +file +printer +come +last +modif diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..0cac944d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,72 @@ +home +pagec +system +program +oper +systemsc +practicum +oper +systemkenneth +birmanc +new +groupcours +syllabuslectur +note +unix +filesystem +structur +link +static +dynam +assign +assign +assign +assign +assign +assign +solut +solut +solut +solut +solut +prelim +solut +prelim +solut +taslili +upson +hall +phone +mail +lili +cornel +offic +hour +wednesdai +fridai +cheng +huang +upson +hall +phone +mail +ychuang +cornel +offic +hour +tuesdai +thursdai +mihai +budiu +upson +hall +phone +mail +budiu +cornel +offic +hour +wednesdai +thursdai +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..3162986a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,196 @@ +home +pagecsfound +artifici +intellig +comput +scienc +depart +cornel +univers +fall +welcom +cours +inform +cours +materi +code +academ +integr +pleas +read +announc +clair +date +program +move +mondai +atth +begin +class +scott +solut +homework +kevin +code +program +assign +notethat +file +rubix +oper +need +load +thefunct +rearrang +slightli +also +variabl +previous +defin +constant +chang +paramet +appeas +compil +var +still +bracket +youus +file +us +machinesshould +file +netscap +kevin +find +bind +sbin +found +ksaunder +find +bind +sbinfor +account +kevin +us +seriou +gremlin +codefor +program +assign +still +abl +look +code +andget +start +assign +planner +success +uponcomplet +appli +oper +schema +meanwhil +hunt +thoseus +machin +shouldn +problem +kevin +special +offer +limit +time +concern +get +zeroon +third +program +worri +longer +complet +assign +asspecifi +cours +materi +page +posit +grade +result +guarante +offer +avail +novemb +oneassign +group +pleas +clair +clarif +problem +homework +unless +otherwis +specifi +assum +either +system +add +fact +dodg +vanto +queri +time +fact +dodg +alreadi +exist +postscript +document +modifi +includ +thisclarif +scott +homework +newhomework +kevin +solut +program +assign +avail +coursemateri +page +clair +solut +midterm +avail +homework +solut +portion +home +page +clair +inform +statu +report +tuesdai +avail +thec +section +home +page +clair +remind +class +mondai +scott +solut +homework +right +postscript +cours +home +pagesc +depart +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..8e715b0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,44 @@ +fall +home +pagec +fall +automata +comput +theorywelcom +click +cours +inform +lectur +note +homework +exam +studi +guideannounc +cours +note +avail +hardcopi +homework +set +note +offic +hour +incorrect +date +prelim +prelim +revis +homework +homework +erratum +chang +room +nikolai +offic +hourscod +academ +integr +pleas +read +cours +depart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..ca500608 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,737 @@ +advanc +databas +system +advanc +databas +system +time +tuesdai +thursdai +place +upson +survei +propos +project +propos +prelim +exam +paper +survei +project +evalu +project +complet +final +exam +prelim +result +stat +content +lectur +schedul +prelim +samplequest +answer +outsidefirewal +prelim +result +stat +project +survei +project +info +predatordbm +currentproject +survei +list +outsidefirewal +refer +materi +list +handout +note +mail +archiv +cours +descript +prerequisit +textbook +inform +grade +professor +teach +assist +coursedescript +offer +first +time +fall +intendedto +give +student +solid +background +design +develop +databasemanag +system +dbm +databas +system +possibl +world +slargest +piec +softwar +certainli +among +valuabl +piecesof +softwar +dbm +sens +giant +applic +program +surprisingli +mani +principl +behind +develop +databas +industri +grow +thrive +demand +knowledgeabledatabas +engin +much +greater +suppli +databas +researchcommun +also +activ +alwai +problem +addressedbecaus +explos +amount +data +peopl +wish +access +thiscours +form +essenti +background +anyon +want +becom +asystem +engin +databas +develop +compani +becom +informedus +databas +system +becom +databas +system +research +develop +system +domain +manipul +larg +amount +data +find +teller +machin +realli +work +number +advanc +fundament +dbm +concept +cover +although +intend +introductori +cours +newcours +student +class +differ +background +consequ +discuss +variou +topic +begin +quickreview +basic +materi +taught +click +herefor +tent +list +topic +cover +term +workload +cours +involv +midterm +final +examin +test +abreadth +basic +concept +student +survei +paper +specif +advancedtop +list +possibl +topic +thepurpos +awar +taught +coursei +fraction +paper +three +weeksaft +midterm +complet +refer +requireread +paper +journal +confer +proceed +engineeringlibrari +suggest +initi +refer +pursueaddit +refer +click +forinform +refer +materi +librari +develop +project +involv +program +look +herefor +inform +written +homework +assign +student +take +turn +write +lectur +note +depend +enrol +mean +person +take +note +twice +semest +hopefulli +addit +work +turn +us +around +examtim +detail +developmentproject +term +project +import +part +cours +involvea +signific +amount +program +choos +whether +wishto +work +alon +team +howev +person +project +willinvolv +proportion +work +goal +project +fold +hand +experi +build +specif +dbm +compon +comfort +work +larg +exist +code +base +andmodifi +modular +manner +second +goal +import +thefirst +databas +system +huge +softwar +system +andrar +luxuri +start +scratch +forc +youto +write +modular +code +also +understand +interact +thediffer +system +compon +inevit +bug +appear +research +dbm +prototyp +us +project +minibas +softwar +associ +textbook +simpl +singl +user +databas +system +provid +compon +fromth +parser +disk +manag +hopefulli +avail +depend +abl +compil +comput +environ +import +part +minibas +interfac +descript +varioussystem +compon +actual +code +come +class +project +consequ +project +us +minibas +involv +write +compon +like +buffermanag +base +specif +interfac +prototyp +predat +queri +process +enginethat +develop +research +number +possibleproject +could +lead +research +topic +builton +predat +focu +high +function +likecomplex +queri +data +type +familiar +recommend +minibas +project +becauseth +amount +design +need +minim +think +interestedin +databas +system +research +predat +project +ifyou +fall +neither +categori +decid +whether +want +aproject +lower +level +dbm +storag +access +buffer +area +thehigh +level +queri +process +optim +area +choos +betweenminibas +lower +level +predat +higher +level +also +somegener +project +involv +neither +system +ideaon +suitabl +project +talk +well +advanceof +project +propos +date +tent +list +possibl +project +project +certain +step +follow +part +project +propos +discuss +submitan +order +list +piec +function +project +produc +project +review +meet +discussth +progress +made +toward +complet +project +code +write +must +follow +code +convent +particularsystem +work +detail +code +convent +documentwil +provid +follow +close +someth +thati +picki +contribut +grade +geton +project +project +submiss +includ +demo +reason +amount +oftest +data +us +refer +home +page +predat +homepag +minibas +coursetextbook +primari +text +beta +edit +book +databas +system +databas +manag +system +raghu +ramakrishnan +bookcontain +mani +detail +introductori +databasebook +also +associ +free +softwar +instruct +databasesystem +minibas +might +class +assign +textbook +avail +thecampu +store +textbook +could +us +refer +korth +silberschatz +databas +system +concept +mcgraw +hill +secondedit +standard +introductori +databas +text +lack +detail +tobe +us +graduat +cours +michael +stonebrak +read +databas +system +morgan +kaufmann +second +edit +collect +rel +recent +paper +area +collectedand +introduc +stonebrak +develop +ingr +postgr +andillustra +databas +system +mani +fundament +paper +corearea +elmasri +navath +fundament +databas +system +benjamin +cum +second +edit +altern +introductori +databas +text +grai +reuter +transact +process +concept +techniqu +morgan +kaufmann +bibl +transact +process +page +long +tellsyou +know +transact +wonderfulrefer +clear +confus +aspect +concurr +control +recoveri +transact +semant +refer +materi +also +place +librari +click +herefor +detail +inform +resourc +program +tutori +languag +construct +debuggingwith +make +gradingpolici +grade +cours +assign +base +follow +percentag +prelim +term +exam +final +exam +survei +paper +lectur +note +term +project +prelim +exam +even +upson +comfort +finish +anextra +half +hour +provid +need +likewis +thefin +exam +final +exam +period +thur +tent +need +confirm +final +exam +willfocu +materi +test +term +materi +coveredin +earlier +part +cours +form +background +question +professorpraveen +seshadri +offic +upson +phone +mail +praveen +offic +hour +tuesdai +thursdai +teachingassist +weitsang +offic +upson +hall +phone +mail +weitsang +offic +hour +noon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..d669175e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,270 @@ +semant +program +languag +semant +program +languag +content +descript +text +prerequisiteshandoutsscrib +lectur +noteshomework +assignmentscontact +informationrelev +link +descript +lectur +upson +though +call +advanc +program +languag +cours +book +better +entitl +semant +program +languag +goalof +cours +conduct +broad +survei +tech +programminglanguag +like +java +directli +studi +implement +mechan +languag +compress +dispatch +tabl +multipleinherit +rather +goal +cours +studi +principlesof +formal +notat +describ +comput +tool +analyz +prove +properti +comput +concern +subsum +thestudi +specif +program +languag +implement +mechan +henc +lead +deeper +understand +program +specif +logic +mathemat +proof +theori +exampl +studi +notat +abstractli +specifi +howprogram +comput +oper +semant +well +asnot +describ +program +comput +denot +semant +turn +abstract +preciser +notat +allow +studi +techniqu +induct +logic +relat +forform +prove +interest +relev +properti +program +languag +type +safeti +compil +correct +ideal +student +come +cours +learn +somethingabout +make +inform +concept +notat +precis +tomanipul +notat +demonstr +us +properti +textbook +semant +program +languag +carl +gunter +work +programm +second +edit +larri +paulson +prerequisit +program +side +assum +experi +least +pascal +like +languag +prefer +student +knowledg +experi +work +witha +function +languag +scheme +haskel +theoret +side +assum +basic +profici +undergraduatemathemat +logic +comput +scienc +basic +knowledg +comput +ture +machin +recurs +function +andlog +predic +calculu +well +mathematicalmatur +requir +cours +design +student +math +meng +undergradu +student +anmeng +undergradu +student +must +talk +instructor +find +ifth +cours +suitabl +contact +inform +newsgroup +cornel +class +instructor +greg +morrisett +upson +cornel +offic +hour +class +appoint +admin +assist +linda +competillo +upson +lfar +erlingsson +upson +ulfar +cornel +offic +hour +pmrelev +link +mark +leon +resourc +program +languag +research +emac +mode +comint +need +project +line +inform +standard +refer +inform +postscript +user +guid +descript +base +environ +descript +system +environ +librari +document +document +avail +tool +refer +indexdocument +avail +toolsa +gentl +introduct +andrew +cum +info diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..95e957cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,36 @@ +home +pagecsmultimedia +systemscomput +scienc +depart +cornel +univers +fall +final +project +present +schedul +cours +staff +cours +info +cours +materi +student +page +project +page +us +link +newsgroup +anounc +newsgroup +access +rivl +bugcom +question +page +send +mail +janosi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..e5131a10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,32 @@ +home +pagecsmultimedia +systemscomput +scienc +depart +cornel +univers +fall +cours +staff +cours +info +cours +materi +student +page +project +page +us +link +newsgroup +anounc +newsgroup +access +rivl +bugcom +question +page +send +mail +janosi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..79565455 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,87 @@ +design +analysi +algorithm +homepag +instructor +ronitt +rubinfeld +evan +moran +time +locat +upson +text +kozen +design +analysi +algorithm +springer +verlag +handout +cours +announc +syllabu +homework +homework +last +modifi +homework +last +modifi +homework +last +modifi +homework +last +modifi +addendum +last +modifi +homework +last +modifi +addendum +last +modifi +homework +addendum +copi +homework +last +modifi +homework +last +modifi +solut +solut +solut +solut +solut +solut +solut +solut +announc +exam +thursdai +inupson +talk +tome +evan +reschedul +cannot +make +time +refer +kozen +text +cheat +sheet +class +note +homework +rajeev +motwani +lectur +note +approxim +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..4841c7db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,204 @@ +home +pagefronti +parallel +system +thorsten +eickenfal +locat +upson +pmoffic +hour +pmcours +descriptionparallel +machin +stai +underscor +fact +system +manufactur +offer +multiprocessor +product +line +howev +debat +parallel +machin +futur +look +like +heat +consider +past +month +feder +spend +cut +erad +perform +price +massiv +parallel +processor +manufactur +competitor +glorifi +workstat +farm +smile +machin +cannot +offer +level +perform +eas +busi +suffer +much +machin +price +competit +leverag +latest +microprocessor +develop +quickli +core +technolog +debat +larg +number +system +issu +integr +larg +number +shelf +processor +cost +effect +system +easili +program +high +level +parallel +program +languag +host +vari +applic +workload +cours +program +parallel +machin +although +topic +first +week +parallel +algorithm +languag +architectur +matur +consider +last +year +point +parallel +machin +almost +usabl +system +support +adequ +allow +gener +purpos +cours +focu +architectur +oper +system +aspect +requir +support +featur +taken +grant +sequenti +comput +portabl +parallel +program +power +debugg +multi +user +machin +access +virtual +memori +fast +first +part +cours +examin +complet +parallel +system +split +network +workstat +us +vertic +approach +studi +interact +algorithm +model +languag +associ +execut +model +oper +system +architectur +hardwar +implement +focuss +support +requir +layer +second +part +cours +focu +specif +topic +slice +horizont +across +system +select +issu +analysi +design +altern +depth +studi +dash +provid +share +memori +contrast +cours +cours +materialscours +formatlectur +note +problem +set +term +project diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..89a85c05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,146 @@ +home +pageintroduct +digit +system +comput +organ +thorsten +eickenfal +kimbal +btopic +includ +representationof +inform +machin +assembl +languag +processor +organ +interrupt +memori +hierarchi +combinatori +sequentialcircuit +data +path +control +unit +design +andmicroprogram +helpif +problem +relat +lectur +homework +aproject +best +help +annot +theappropri +point +cours +materi +allow +gethelp +cours +staff +also +class +mate +otherwis +send +email +cornel +talk +toon +consult +cours +informationcoursemateri +announcementsannounc +lectur +note +lectur +video +assign +part +us +conot +allow +annot +document +small +get +start +document +avail +case +never +us +conot +tutori +onlinean +introduct +marshal +brain +great +introduct +forpeopl +know +procedur +languag +like +pascal +fortran +program +david +marshal +cours +note +cclass +cardiff +univers +lot +exampl +program +learnc +todai +guid +book +exampl +programsand +onlin +tutori +refer +well +annot +theyahoo +page +wish +surf +search +ofmor +materi +place +start +frequent +ask +question +page +us +inansw +common +question +come +learn +us +also +contain +link +sever +onlin +tutori +newsgroup +page +maintain +thorsten +voneicken diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..8a2fa10c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,45 @@ +home +pagecsfound +artifici +intellig +comput +scienc +depart +cornel +univers +fall +welcom +cours +inform +cours +materi +code +academ +integr +pleas +read +announc +clair +final +grade +avail +sometim +saturdai +send +yourgrad +mail +request +mail +clair +final +exam +upson +altern +date +upson +cours +home +pagesc +depart +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..874729f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,63 @@ +home +pagec +softwar +engin +technolog +techniquescomput +scienc +depart +cornel +univers +fall +cours +staff +samuel +weber +professor +upson +weber +cornel +offic +hour +upson +cornel +offic +hour +vineet +buch +upson +buch +cornel +offic +hour +thursdai +yaron +minski +offic +hour +none +cours +materi +cours +overview +overview +cours +handout +lectur +note +recit +note +line +resourc +assign +grade +remark +stuff +frequent +ask +question +borland +samuel +cornel +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..e74b48d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,40 @@ +home +page +home +page +brian +smith +tour +guid +cours +inform +homework +assign +assign +assign +assign +assign +project +project +project +spec +lectur +tabl +content +postscript +slide +introduct +comput +system +organ +program +procedur +recurs +stack +assembl +linker +loader +interrupt +introduct +logic +design diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..df2e4a72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,67 @@ +home +page +spring +home +page +spring +messag +welcom +home +page +rememb +check +frequentlyfor +import +inform +regard +cours +prelim +tuesdai +april +review +session +held +onsundai +april +baker +cours +inform +instructor +teach +assist +offic +hour +get +cours +materi +theworld +wide +codewarrior +personalmac +program +lectur +program +program +program +program +program +program +program +exam +prelim +tue +februari +prelim +thur +march +prelim +tue +april +final +exam +last +updat +april +spring +pierc +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..9a3d0f72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,652 @@ +spring +home +page +comput +program +comput +scienc +depart +cornel +univers +spring +question +problem +page +email +jeff +foster +jfoster +cornel +troubl +view +tabl +us +earlyvers +netscap +contest +first +prelim +thursdai +march +announcetim +place +soon +topic +cover +theprelim +second +prelim +tuesdai +april +room +wereannounc +lectur +topic +cover +thesecond +prelim +final +exam +mondai +noon +olin +lastnam +olin +last +name +final +exam +covereveryth +cours +addit +topicsconv +final +offic +hour +daywhenwherewhomondai +upson +davetuesdai +upson +jeffwednesdai +upson +davethursdai +upson +halfridai +upson +halsaturdai +upson +breview +session +consult +chri +jose +thank +chrisand +jose +take +time +studi +cours +descript +cours +staff +lectur +note +handout +code +sampl +gofer +enhanc +server +cours +descript +comput +program +also +engrd +fall +spring +summer +credit +credit +grant +bothcom +prerequisit +equival +programmingexperi +intermedi +program +high +level +languag +introduct +tocomput +scienc +topic +includ +program +develop +proof +ofprogram +correct +program +structur +recurs +abstract +datatyp +object +orient +program +data +structur +analysi +ofalgorithm +princip +program +languag +cours +staff +offic +hour +instructor +perkin +upson +offic +hour +thursdai +appoint +email +cornel +teach +assist +correct +offic +hour +held +offic +jeff +foster +upson +offic +hour +tuesdai +email +jfoster +cornel +alan +kwan +upson +email +kwan +cornel +david +walker +upson +offic +hour +mondai +email +walker +cornel +section +spring +sectionsdaytimeroominstructortuesdai +upson +ahal +perkinstuesdai +upson +ahal +perkinswednesdai +hollist +david +walkerwednesdai +hollist +david +walkerwednesdai +upson +david +walkerthursdai +upson +jeff +fosterfridai +hollist +jeff +foster +consult +consult +hour +upson +regular +consult +schedul +effect +last +ofclass +sundai +thursdai +fridai +spring +consultingsundaymondaytuesdaywednesdaythursdayfridai +steveerickylechrisjpkyl +steveerickylechrisjpvasantha +josejosekayjosejpvasantha +josejosekayjosejp +none +kaykylesteveericvasantha +none +kaykylesteveericvasantha +none +danerickaychrisdan +none +danerickaychrisdan +none +lectur +note +lectur +note +avail +three +format +binhqx +macbinari +file +contain +microsoft +word +file +rich +text +format +file +parseabl +microsoft +word +other +plain +text +date +list +next +lectur +date +lectur +waspost +date +lectur +given +macintosh +binhqx +lectur +preliminari +lectur +basic +class +lectur +class +lectur +pointer +arrai +lectur +dynam +storag +alloc +lectur +class +dynamicdata +lectur +fine +point +class +lectur +introduct +lectur +deriv +class +lectur +program +correct +algorithm +notat +lectur +tripl +assign +lectur +assign +condit +loop +lectur +prove +loop +correct +lectur +function +program +lectur +type +gofer +lectur +curri +filter +lectur +recurs +iter +lectur +applic +architectur +framework +lectur +link +list +lectur +algorithm +analysi +lectur +link +list +lectur +binari +tree +lectur +class +link +data +structur +lectur +industri +strength +lectur +java +binhqx +file +process +stuffit +expand +http +address +foraladdin +system +find +window +version +thank +armandonunez +rich +text +lectur +preliminari +lectur +basic +class +lectur +class +lectur +pointer +arrai +lectur +dynam +storag +alloc +lectur +class +dynamicdata +lectur +fine +point +class +lectur +introduct +lectur +deriv +class +lectur +program +correct +algorithm +notat +lectur +tripl +assign +lectur +assign +condit +loop +lectur +prove +loop +correct +lectur +function +program +lectur +type +gofer +lectur +curri +filter +lectur +recurs +iter +lectur +applic +architectur +framework +lectur +link +list +lectur +algorithm +analysi +lectur +link +list +lectur +binari +tree +lectur +class +link +data +structur +lectur +industri +strength +lectur +java +plain +text +lectur +preliminari +lectur +basic +class +lectur +class +lectur +pointer +arrai +lectur +dynam +storag +alloc +lectur +class +dynamicdata +lectur +fine +point +class +lectur +introduct +lectur +deriv +class +lectur +program +correct +algorithm +notat +lectur +tripl +assign +lectur +assign +condit +loop +lectur +prove +loop +correct +lectur +function +program +lectur +type +gofer +lectur +curri +filter +lectur +recurs +iter +lectur +applic +architectur +framework +lectur +link +list +lectur +algorithm +analysi +lectur +link +list +lectur +binari +tree +lectur +class +link +data +structur +lectur +industri +strength +lectur +java +handout +handout +postscript +format +print +almost +anylas +printer +want +view +need +applicationlik +ghostview +handout +preliminari +handout +assign +handout +codewarrior +intro +section +handout +assign +handout +assign +handout +assign +code +sampl +lectur +cell +class +lectur +complex +class +lectur +set +charact +lectur +simpl +list +class +lectur +dictionari +class +gofer +flavor +gofer +avail +macgof +avail +inth +public +lab +macgof +gofer +unix +gofer +site +gofer +ishaskel +systemsz +yale +haskel +gofer +site +macgof +pleas +help +find +piec +ofgof +think +gener +interest +know +make +itavail +macgof +sourc +manual +avail +onth +depart +server +cornel +jfoster +server +final +project +line +avail +csdepart +server +cornel +enhance_assign +server +comput +scienc +depart +cuinfo +metrowerk +codewarrior +homepag +aladdin +system +maker +stuffit +expand +home +page +comment +suggest +mail +jeff +foster diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..20e6fa60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,35 @@ +home +pagec +structur +interpretationof +comput +programscomput +scienc +departmentcornel +universityspr +cours +staff +cours +info +cours +materi +announc +emac +macmarlai +demo +section +room +chang +prelim +time +place +make +grader +happi +gener +exam +announc +extens +date +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..c0fa5291 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,125 @@ +advanc +languag +implement +advanc +languag +implement +content +descriptionhandoutsadministriviaweb +link +descript +modern +program +languag +java +haskel +dylan +provid +high +level +featur +object +closur +polymorph +abstract +data +type +class +garbag +collect +except +continu +thread +synchron +construct +class +survei +modern +techniqu +effici +implementationof +featur +focu +implement +ofmodern +function +languag +make +connectionsto +kind +languag +notabl +object +orient +handout +handout +separ +page +administrivia +instructor +greg +morrisett +offic +upson +email +cornel +phone +admin +assist +linda +competillo +upson +offic +hour +send +email +appoint +evan +moran +offic +upson +email +evan +cornel +phone +offic +hour +tuesdai +thursdai +pmweb +link +mark +leon +resourc +program +languag +research +project +line +inform +standard +refer +inform +postscript +user +guid +descript +base +environ +descript +system +environ +librari +document +document +avail +tool +refer +indexdocument +avail +toolsa +gentl +introduct +andrew +cum +info diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..3acd370c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,30 @@ +practic +distribut +systemspract +distribut +system +registr +necessari +student +takingc +gener +informationcours +overview +logist +read +homeworkshomework +homework +postscript +homework +amexaminationsmidterm +examin +postscript +final +examin +postscript +annot +bibliographiesselect +annot +bibliographi +prepar +class diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..058f3b25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,204 @@ +home +pagehigh +perform +comput +system +thorsten +eickenspr +poster +sessionthu +upson +upson +tbdpleas +sign +session +outsid +upson +willdetermin +present +order +begin +postersess +pleas +arriv +late +instruct +poster +pick +poster +board +cindywilliam +limit +board +pleas +hold +ithorizont +hang +board +corridor +instruct +poster +session +minut +presentyour +poster +plu +minut +question +give +everyon +asens +problem +attack +solut +contempl +andth +result +gotten +learn +presentationswil +judg +well +messag +across +everi +memberof +group +particip +present +nativespeak +difficulti +taken +consider +instruct +final +report +final +report +info +cours +current +mondai +noon +absolut +page +must +subdirectori +willb +import +contribut +project +well +thelongest +last +page +remain +server +year +tocom +mani +peopl +find +search +engin +finalreport +start +usual +introduct +problem +aretri +solv +follow +thorough +discuss +trade +off +import +part +need +explain +chose +thesolut +option +consid +youreject +project +judg +futur +webread +well +convic +arriv +bestsolut +showcas +work +us +ampl +experiment +data +goodexplan +exactli +measur +final +know +whatyou +think +shown +left +open +futur +work +projectsproject +reportsproject +proposalsiniti +project +ideascours +materialshomework +homework +pleas +check +cuc +pagebefor +start +split +machin +might +also +check +sampl +program +homework +homework +introc +casec +technologyc +cachesc +netsc +spc +split +cyou +find +inform +split +paper +parallel +programmingin +split +emdc +sortingc +spamc +msgpassc +mpic +cachecohc +locksc +threadsc +atmc +netc +scoreboardc +tomasuloc +predc +superscalarc +busesc +pentiummaintain +thorsten +eicken diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..9df79a4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,119 @@ +machin +vision +machin +vision +cours +staff +instructor +ramin +zabihteach +assist +justin +millerclass +time +place +phillip +project +suggestionsproblem +set +problem +problem +cours +class +note +scribe +week +januari +regular +januari +simul +anneal +februari +comput +motion +regular +februari +calculu +variat +februari +maximum +likelihood +estim +februari +markov +random +field +februari +snake +februari +stereo +motion +februari +introduct +correl +februari +mestim +march +parametr +method +march +guest +lectur +march +parametr +transform +cont +march +correl +censu +transform +march +lectur +cont +march +stereo +geometri +cont +april +geometr +transform +april +motion +segment +april +track +april +motion +track +cont +april +edg +detect +april +continu +model +base +vision +april +hausdorff +distanc +april +guest +lectur +eigenhausdorff +april +face +recognitionsect +note +optic +flow +constraint +equationoth +sourc +comput +vision +home +page +histori +object +recognit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..232ff69a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,98 @@ +home +pagecsintroduct +natur +languag +understandingcomput +scienc +depart +cornel +univers +spring +welcom +cours +inform +cours +materi +code +academ +integr +pleas +read +announcementsher +list +resourc +avail +project +brill +directori +brill +part +speech +taggerbrown +directori +brown +corpu +part +speech +tag +directori +small +corpu +annot +withpart +speech +inform +text +corpu +execut +wordnet +sure +environ +variabl +wnsearchdir +archiv +wordnet +dict +final +site +contain +descript +ofth +content +penn +treebank +iicollect +annot +text +cornel +canus +part +like +project +talk +francisabout +access +us +inform +project +databas +recent +paper +computationallinguist +repositori +contain +pointer +code +variou +system +compon +present +schedulewhat +turn +project +cours +home +pagesc +depart +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..df9a7f32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,48 @@ +cours +home +page +info +syllabu +lab +link +announcementsroom +updat +unforseen +circumst +still +unableto +upson +class +therefor +follow +room +assign +tuesdai +wednesdai +section +philip +section +upson +thursdai +class +meet +maclab +usual +prelim +first +class +prelim +wednesdai +class +close +book +exam +cover +cours +materialcov +first +assign +lab +tuesdai +need +comput diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..b793ebdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,82 @@ +fall +fall +professor +monika +rauch +henzingeremail +cornel +cours +informationhomework +solut +lectur +lectur +graph +explor +lectur +greedi +algorithm +lectur +matroid +lectur +matroid +lectur +dijkstra +algorithm +lectur +bellman +ford +algorithm +lectur +matrix +closur +lectur +binomi +heap +lectur +binomi +heap +lectur +fibonacci +heap +lectur +treap +lectur +randomizedsearch +tree +lectur +union +find +lectur +union +find +lectur +union +find +lectur +maxflow +lectur +maxflow +mincut +theorem +lectur +maxflow +edmond +karp +algorithm +lectur +maxflow +dinitz +algorithm +lectur +maxflow +preflow +push +lectur +maxflow +preflow +push +lectur +maxflow +dynam +tree +implement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..c5a57ebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,377 @@ +introduct +scientif +comput +introduct +scientif +computationsumm +class +inform +syllabu +sourc +code +handout +problem +setsan +introduct +elementari +numer +analysi +scientificcomput +topic +includ +interpol +quadratur +linear +andnonlinear +equat +solv +least +squar +fit +ordinarydifferenti +equat +matlab +comput +environ +us +vector +effici +reliabl +stabil +stress +class +informationstaff +niko +pitsiani +instructor +offic +upson +hall +niko +cornel +offic +hour +time +appoint +ozan +hafizogullari +teach +assist +offic +upson +hall +ozan +cornel +offic +hour +time +appoint +lecturesclass +meet +everi +upson +hall +cours +administrationlauri +buck +upson +question +concern +grade +record +account +addressedto +cours +administr +prerequisitesc +corequisit +math +math +cours +materialstext +introduct +scientif +comput +matrix +vector +approachus +matlab +charl +loan +distribut +class +softwar +matlab +purchas +student +matlab +eitherth +macintosh +version +though +comput +labsthi +cours +design +three +comput +lab +upson +siblei +martha +renssela +problem +setsther +assign +hand +lectur +orfrom +page +extra +avail +rack +outsid +upson +assign +collect +class +computingproblem +done +matlab +return +grade +work +behandl +class +assign +begin +class +duedat +late +assign +accept +credit +worst +gradefrom +assign +ignor +final +grade +assign +done +alon +partner +printyour +name +copi +name +work +pair +firstpag +includ +student +chang +addit +partnernam +assign +hand +examsther +midterm +final +exam +dai +time +list +gradingyour +final +total +score +comput +follow +best +assign +midterm +final +final +grade +beassign +accord +rel +rank +class +base +onyour +final +total +score +syllabu +calendar +june +introduct +june +program +matlab +june +error +june +float +point +number +registr +deadlin +june +polynomi +interpol +juli +vandermond +newton +juli +piecewis +interpol +juli +linear +cubic +hermit +juli +class +juli +cubic +spline +cours +deadlin +juli +numer +integr +juli +newton +cote +juli +composit +rule +chang +credit +grade +deadlin +juli +adapt +quadratur +juli +review +drop +cours +deadlin +juli +midterm +exam +classroom +juli +matric +oper +juli +linear +system +juli +least +squar +juli +given +juli +choleski +juli +find +root +juli +minim +function +variabl +juli +minim +multivari +function +juli +solv +linear +system +juli +initi +valu +problem +juli +euler +backward +euler +juli +rung +kutta +method +adam +method +review +class +final +exam +classroom +sourc +code +exampl +introduct +scientif +computingat +lab +upson +siblei +martha +rennselaerhal +sourc +code +locat +folder +applic +matlab +chapter +plan +work +stand +alon +comput +otherthan +assign +on +sourc +code +exampl +scmv +file +system +unix +scmv +uncompress +untar +unix +command +zcat +scmv +highli +recommend +zcat +brows +session +need +postscript +file +viewer +instal +comput +order +file +handout +grade +randperm +length +grade +problem +set +assign +assign +assign +assign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..246c3747 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,282 @@ +summer +home +page +system +program +oper +system +summer +prereq +permiss +instructor +instructor +indupraka +kodukula +praka +cornel +teach +assist +nawaaz +ahm +nawaaz +cornel +motd +motd +archiv +subject +descript +prerequsit +cours +outlin +textbook +cours +schedul +quizz +grade +polici +statement +collabor +offic +hour +cours +materi +send +comment +subject +descript +system +program +oper +system +anintroduct +logic +design +system +program +emphasison +multiprogram +oper +system +topic +includ +processsynchron +deadlock +memori +manag +input +output +method +inform +share +protect +secur +file +system +theimpact +network +distribut +comput +environ +operatingsystem +also +discuss +fast +pace +subject +requiringconst +attent +prerequsitescomplet +familiar +materi +assum +inparticular +knowledg +comput +architectur +assembl +programminglanguag +program +structur +requir +cover +theintroductori +materi +class +requir +purpos +thatwil +remind +audienc +materi +cours +outlineth +cours +organ +roughli +follow +roughli +depend +feedback +class +chang +theorder +content +particular +section +start +overview +concurr +issu +lldiscuss +synchron +issu +particular +ensur +mutualexclus +deadlock +detect +prevent +algorithm +lldiscuss +multiprocessor +issu +well +next +memorymanag +discuss +virtual +memori +variou +method +usedto +implement +virtual +memori +page +segment +next +cover +file +system +final +look +evolut +thetradit +oper +system +look +micro +kernel +timepermit +lectur +advanc +topic +multithread +serverless +file +system +textbooksth +princip +text +book +class +oper +system +conceptsbook +abraham +silberschatz +peter +galvin +also +distributeclass +note +cover +materi +complet +class +noteswil +also +avail +world +wide +class +home +pageat +class +cours +schedul +meet +mondaythru +thursdai +week +class +quizz +first +second +final +addit +weekli +assign +first +week +class +thesewil +hand +thursdai +follow +thursdayat +start +class +quizz +gradingeach +homework +carri +weightag +combinedweightag +final +worth +twomidterm +worth +also +surpris +quizz +todetermin +understand +cours +materi +class +polici +statement +collaborationat +peopl +form +group +collabor +eachhomework +group +need +submit +copi +thehomework +quizz +final +close +book +closednot +offic +hour +indupraka +kodukula +mondai +nawaaz +ahm +tuesdai +wednesdai +thursdai +upson +cours +materi +send +comment +maintain +induprakaskodukula diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..275a0f7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,174 @@ +csc +advanc +program +languagesfal +upson +instructor +henzingerupson +cornel +offic +hour +class +appoint +teach +assist +neal +glewupson +glew +cornel +offic +hour +appoint +handoutshandout +septemb +cours +informationhandout +septemb +get +start +mlhandout +octob +meta +lambdahomeworkshomework +septemb +solut +homework +septemb +solut +homework +octob +solut +homework +octob +solut +homework +novemb +solut +homework +grieshomework +novemb +solut +homework +decemb +solut +homework +decemb +solut +notesraw +note +septemb +note +septemb +note +septemb +note +septemb +note +septemb +note +septemb +note +septemb +note +septemb +note +octob +note +octob +note +octob +note +octob +note +octob +note +octob +note +octob +note +novemb +note +novemb +note +novemb +note +novemb +note +novemb +note +decemb +note +decemb +scribe +noteslectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +introduct +mllectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +septemb +lectur +octob +lectur +octob +lectur +octob +lectur +octob +lectur +octob +lectur +octob +midterm +octob +solut +lectur +octob +lectur +octob +lectur +grieslectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +novemb +lectur +decemb +lectur +decemb +lectur +decemb +lectur +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..d05111cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,42 @@ +fall +advanc +program +languag +fall +faculti +prof +robert +constabl +upson +offic +hour +mondai +teach +assist +ravi +kumar +ravi +upson +offic +hour +thur +fall +note +fall +assign +fall +note +fall +note +fall +note +nuprl +classic +comment +question +suggest +page +pleas +mail +pavel +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..2ebf1690 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,17 @@ +coursesc +cours +fall +spring +addit +cours +inform +maintain +individualfaculti +member +consult +class +page +addit +inform +contactgloria +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..24efed1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,43 @@ +analysi +program +analysi +program +fall +instructor +vicki +almstrum +linyuan +yang +syllabu +announc +homework +assign +handout +interest +tutori +new +utexa +class +instructor +almstrum +homepag +last +updat +page +prepar +vicki +almstrum +suggest +comment +welcom +click +send +mail +almstrum +utexa +linyuan +utexa +depart +comput +scienc +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..bf90fe43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,155 @@ +spring +foundat +ofmathemat +taylor +cours +blurb +mani +approach +formal +reason +theobject +specifi +comput +program +includ +formalizationof +world +program +interact +creationof +numer +tool +formal +reason +examin +systemsfor +formal +reason +examin +number +mechan +formalmethod +tool +support +differ +system +exampl +suchsystem +tool +pair +system +tool +primit +recurs +arithmet +boyer +moor +prover +first +order +logic +otter +nelson +higher +order +logic +imp +equat +reason +theori +mizar +quaif +otter +type +theori +nuprl +lego +coqstud +choos +help +instructor +system +ortool +examin +grade +base +upon +present +aboutthes +projecthtml +version +theqe +manifestoplain +text +version +qedmanifestobowen +formal +method +page +backup +copi +chief +assign +select +formal +method +system +bowen +formal +method +page +report +class +oral +present +either +logic +foundat +upon +mani +system +good +freeli +avail +implement +consult +make +final +choic +test +final +present +hope +number +guest +present +localform +method +commun +tent +schedul +april +rick +tannei +continu +april +trevor +hick +otter +april +ruben +gamboa +squar +root +april +samuel +guyer +circal +process +algebra +april +sawada +russel +turpin +galoi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..0219f9b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,196 @@ +advanc +comput +architectur +advanc +comput +architecturethi +cours +focus +techniqu +quantit +analysi +evaluationof +modern +comput +system +select +appropri +benchmarksto +reveal +compar +perform +altern +design +choic +insystem +design +emphasi +major +compon +subsystem +highperform +comput +pipelin +instruct +level +parallel +memoryhierarchi +input +output +network +orient +interconnect +studentswil +undertak +major +comput +system +analysi +design +project +oftheir +choos +administr +informationuniqu +number +meet +place +instructor +mikedahlinoffic +hour +appoint +tbdtaoffic +hour +tbdreadingstextbook +hennessi +patteson +computerarchitectur +quantit +approach +second +edit +note +edit +significantli +differ +stedit +recommend +attempt +stedit +textbook +cours +errata +sheetfor +hennessi +pattersonin +addit +read +current +paper +variou +aspect +currentcomput +architectur +research +watch +space +pointer +readinglist +grade +class +particip +homework +work +pair +exam +midterm +project +work +pair +cours +scheduleweekdatetopicreadingduejan +intro +admin +review +perf +cost +amdahl +tech +trendsch +cach +memori +isa +pipelin +hazard +branch +predictionch +mlkholidayf +pipelin +hazard +static +branch +predictionch +project +proposalfeb +scoreboard +tomasulu +speculationch +dynam +predict +limit +vector +processorsch +dfeb +memori +hierarchych +project +surveyfeb +memori +dram +banksf +memori +revieww +midterm +spring +breakm +spring +breakmar +metric +queu +buss +disk +raidch +tertiari +networksf +networksch +project +checkpointapr +network +parallel +architecturesf +mppsch +mpp +revieww +midterm +project +preseantationsm +project +presentationsfri +last +classesm +project +written +reportaddit +resourcescours +page +product +research +confer +bibliographi +tech +reportsyahoo +businessand +economi +compani +comput +system diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..c9b915e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,91 @@ +oper +system +oper +systemsuniqu +recent +explos +interest +world +wide +resultedin +evolv +protocol +us +protocol +address +thetradit +concern +oper +system +interprocess +commun +resourc +alloc +secur +gener +contextof +internet +goal +class +provid +understandingof +current +state +oper +system +also +addressproblem +must +solv +provid +matur +gener +purpos +operatingsystem +hypothesi +behind +design +class +mani +theissu +address +context +also +addressedin +tradit +area +oper +system +occasionallyread +relat +paper +bear +understandingcurr +problem +class +project +reportspoint +internet +research +refer +inform +class +syllabu +read +list +schedul +longer +less +organ +list +paper +class +rosterhandout +verif +sslprotocol +proofsketch +guidelin +final +project +talk +report diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..638c2560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,728 @@ +fall +comput +organ +program +fall +page +constantli +construct +last +updat +onmon +cours +titl +comput +organ +program +prerequisit +grade +least +professor +chri +edmondson +yurkanan +dragon +utexa +import +date +final +exam +lectur +noon +tue +handout +final +exam +lectur +noon +handout +new +program +exampl +solut +post +program +solut +section +exam +object +final +handout +check +extra +offic +hour +fantasm +page +visit +fantasm +updat +version +content +class +info +meet +time +place +lectur +discuss +session +professor +cours +oper +info +syllabu +email +dragon +utexa +offic +hour +back +content +offic +hour +locat +offic +hour +offic +hour +thursdai +attend +discuss +section +regist +email +offic +hour +place +yoonsuck +choe +yschoe +utexa +edum +cynthia +utexa +edum +deepa +ramani +dparam +utexa +eduw +zhang +gzhang +utexa +eduf +discuss +session +conduct +pleas +meet +time +place +back +content +calendar +import +date +labor +holidai +last +us +class +last +drop +us +last +drop +refund +last +rare +extenu +circumst +automat +drop +period +begin +last +drop +cours +academ +penalti +period +start +last +drop +cours +academ +reason +last +withdraw +univers +last +chang +registr +cours +pass +fail +deadlin +appli +graduat +thanksgiv +holidai +last +class +last +appeal +academ +drop +withdraw +lectur +homework +test +schedul +glanc +homework +correct +version +mondai +typo +bit +remov +rightmost +make +bit +homework +mondai +program +part +wednesdai +earli +date +tue +bonu +program +part +program +part +fridai +earli +date +thursdai +late +date +saturdai +program +part +homework +wednesdai +program +exam +object +homework +solut +practic +problem +night +exam +program +part +mondai +late +exam +solut +program +announc +date +chang +slide +door +earli +date +late +date +late +date +program +test +turn +procudur +program +thur +exam +wednesdai +object +exam +practic +problem +avail +electronc +program +earli +final +exam +noon +exam +object +lectur +lectur +final +exam +noon +exam +object +back +content +lectur +handout +back +content +handout +class +note +homework +program +assign +handout +avail +electron +pleas +check +boxin +front +prof +yurkanan +offic +class +handout +handout +letter +student +handout +overview +descript +cours +topic +handout +cours +oper +lectur +note +slide +singl +page +ascii +code +chart +handout +home +work +mondai +class +typo +bit +remov +rightmost +make +bit +handout +class +info +offic +hour +discuss +section +handout +homework +mondai +handout +endia +memori +hierarchi +avail +electron +pick +front +handout +program +part +wednesdai +earli +date +tue +bonu +handout +program +part +handout +program +part +fridai +earli +date +thursdai +bonu +late +date +saturdai +submit +handout +program +part +handout +homework +wednesdai +handout +program +exam +object +handout +homework +solut +handout +practic +problem +handout +program +part +mondai +late +handout +exam +solut +handout +program +announc +date +chang +slide +door +earli +date +late +date +late +date +handout +program +test +turn +procudur +handout +fantasm +user +manual +also +avail +system +disk +fantasm +document +folder +powermac +quadra +handout +fantasm +program +exampl +also +avail +mac +start +p_global +handout +program +thur +handout +class +note +pick +handout +class +note +pass +paramet +us +regist +stack +function +avail +electron +pick +handout +exampl +paramet +pass +handout +discuss +section +macsbug +instruct +handout +import +fantasm +info +turn +requir +handout +comment +real +compil +proc +func +call +class +note +pick +handout +exampl +compil +might +regist +risc +architectur +includ +call +valu +paramet +pass +class +note +pick +handout +exam +wednesdai +object +handout +exam +practic +problem +avail +electronc +pick +handout +exam +practiv +problem +solut +avail +electronc +pick +handout +program +obsolet +handout +handout +program +spec +turnin +procedur +earli +handout +handout +handout +program +model +gener +disk +interfac +avail +electoron +pick +handout +problem +charact +orient +model +avail +electoron +pick +handout +exam +solut +avail +electoron +pick +handout +exam +object +noon +lectur +lectur +noon +discuss +session +handout +cheat +polici +questionair +handout +homework +solut +fantasm +user +manual +user +manual +also +avail +system +disk +fantasm +document +folder +powermac +quadra +exampl +program +also +avail +mac +start +p_global +macsbug +instruct +back +content +discuss +session +must +discuss +session +offici +regist +except +go +discuss +session +held +regist +meet +time +place +handout +info +back +content +utexa +class +newsgroup +class +newsgroup +utexa +class +back +content +homework +solut +homework +solut +homework +solut +homework +solut +back +content +program +solut +free +solut +toward +next +program +assign +program +exampl +solut +thank +brett +jame +pascal +sourc +output +test +test +data +output +test +program +exampl +solut +pascal +sourc +test +output +sourc +program +exampl +solut +p_global +start +p_global +start +p_global +start +program +exampl +solut +p_global +start +macsbug +p_global +start +subroutine_fil +exception_fil +back +content +test +solut +exam +solut +exam +solut +handout +avali +electron +pick +back +content +program +resourc +check +fantasm +page +back +content +exam +studi +guid +check +handout +exam +object +exam +handout +practic +problem +handout +exam +object +exam +handout +practic +problem +avail +electron +handout +exam +object +exam +back +content +page +maintain +yoonsuck +choe +yschoe +utexa +austin +utc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..70e7243e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,371 @@ +porter +homepag +warn +page +construct +link +us +right +becom +activ +semest +progress +andther +relev +inform +regard +link +topic +import +announc +take +todai +home +unabl +maintain +page +forthes +last +coupl +dai +howev +put +link +porter +page +class +relat +announc +avail +follow +good +luckfor +final +next +week +special +class +review +import +topic +cover +class +class +held +painter +hall +exact +locat +dependon +room +availib +howev +note +post +door +offic +therewil +someon +offic +inform +time +post +moreov +almost +total +coverag +next +week +offic +porter +right +uptoth +time +final +exam +need +help +feel +free +come +glad +help +good +luck +final +special +review +next +week +resolutio +topic +bruce +porter +complex +theori +nimar +arora +parallel +process +dwip +banerje +boolean +circuit +bruce +porterquest +review +sheet +rotat +bit +disregard +somewhat +beyond +scope +cover +class +slide +present +lectur +decemb +summar +thecont +whole +semest +avail +reserv +desk +atugl +hope +post +webpag +soon +experienc +technic +difficulti +caus +length +file +click +schedul +offic +final +week +also +pleas +check +room +assign +post +click +addendum +review +question +question +final +review +question +html +version +postscript +version +final +exam +question +ad +sostai +tune +last +program +assign +avail +download +tutori +prolog +program +porter +lectur +week +avail +porter +comput +scienc +iinstructorbruc +porter +porter +utexa +offic +mondai +email +porter +utexa +tasoffic +hourslab +discuss +section +schedulec +porter +final +exam +thursdai +decemb +room +assign +exam +room +uniqu +number +welch +welch +welch +cours +descriptionclass +scheduleclass +lectur +note +porter +class +discuss +session +includ +note +discuss +import +new +articlesclass +newsgroupprogram +assignmentsprogram +solut +studi +guid +midterm +test +solut +midterm +midterm +test +html +version +postscript +version +review +question +midterm +review +question +html +version +postscript +version +final +exam +also +addendum +us +link +pascal +pascaltutori +text +format +ansi +pascal +faqyou +sampl +pascal +program +programm +page +gener +turbo +pascal +program +languag +materi +frequent +ask +question +turbo +pascal +turbo +pascal +zipe +turbo +pascal +tutori +program +concept +structur +pascal +base +newsgroup +pascal +relat +newgroup +might +interest +comp +lang +pascal +ansi +isocomp +lang +pascal +maccomp +lang +pascal +borlandcomp +lang +pascal +misccomp +lang +pascal +delphi +miscfj +lang +pascal +rememb +access +newsgroup +dell +need +new +serverto +new +utexa +mail +new +prefer +item +option +menu +take +look +import +new +articl +link +usual +lead +importantstuff +home +descript +discuss +session +new +articl +newsgroup +program +assign +studi +guid +test +solut +send +comment +critic +suggest +addit +us +link +dwip +dwip +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..f13ccef2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,107 @@ +comput +graphic +spring +cscomput +graphicsspr +instructor +donald +fussel +gener +cours +inform +cours +descript +cours +syllabu +mesa +graphic +librari +year +us +mesa +graphic +librari +provid +anopengl +like +platform +cours +librari +hasbeen +instal +public +workstat +comput +sciencesdepart +instruct +us +mesa +librari +utc +sampl +makefil +mesa +utc +machin +opengl +page +opengl +specif +mesa +home +page +opengl +center +inform +gener +refer +manual +page +ousterhout +book +welch +book +assign +turn +assign +assign +assign +assign +note +assign +option +second +exam +oneor +requir +student +show +second +examwil +requir +submit +assign +student +wish +bothmai +higher +score +curv +willcount +exampl +xlib +exampl +code +updat +exampl +code +updat +exampl +driver +updat +billthecat +copi +file +directori +contain +slate diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..d37b3f92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,107 @@ +comput +graphic +fall +gcomput +graphicsfal +instructor +donald +fussel +gener +cours +inform +cours +descript +cours +syllabu +inform +mesa +graphic +librari +year +us +mesa +graphic +librari +provid +anopengl +like +platform +cours +librari +hasbeen +instal +public +workstat +comput +sciencesdepart +instruct +us +mesa +librari +utc +sampl +makefil +mesa +utc +machin +opengl +page +opengl +specif +mesa +home +page +opengl +center +inform +gener +refer +manual +page +ousterhout +book +welch +book +program +exampl +welch +book +assign +turn +assign +assign +assign +assign +exampl +xlib +exampl +code +exampl +code +exampl +driver +billthecat +copi +file +directori +contain +slate +new +turnin +work +graphic +machin +libtcl +libtk +reinstal +tclsh +wish +reinstal +compil +walker +demo +graphic +machin +repair +walker +sourc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..ca51e538 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,59 @@ +cryptographi +professor +david +zuckerman +offic +hour +taylor +email +utexa +huiqun +offic +hour +station +taylor +hall +basement +email +hqliu +utexa +syllabu +mathemat +background +homework +homework +textbook +ciphertext +problem +notic +answer +last +problem +french +abl +recogn +french +word +canada +appear +frequenc +common +letter +french +chang +drastic +english +howev +digram +like +appear +page +last +modifi +septemb +comment +welcom +send +email +hqliu +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..043415e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,503 @@ +distribut +comput +distribut +comput +ispr +instructor +lorenzo +alvisiteach +assist +rajeev +joshicont +offic +hour +locat +mechan +requir +textbook +cours +content +grade +problem +set +inform +pertain +final +exam +suggest +solut +midterm +exam +newsgroup +utexa +class +instruct +stafflorenzo +alvisi +taylor +hall +phone +offic +hour +tuesdai +rajeev +joshi +phone +offic +hour +mondai +thursdai +meet +lorenzo +rajeev +arrang +appoint +mechanicsi +expect +class +cover +materi +requiredtextbook +remaind +come +sourc +paper +textbook +refer +sourc +given +classat +appropri +time +lectur +mondai +wednesdai +robert +moor +hall +newsgroup +class +isutexa +class +requir +textbook +distribut +system +second +edit +mullend +editor +acmpress +addison +weslei +publish +compani +read +cours +contentc +cover +abstract +prove +us +expect +tobe +us +design +build +tomorrow +distributedsystem +includ +global +state +cut +logic +vector +clock +causal +messagedeliveri +global +properti +detect +messag +log +checkpoint +replic +manag +state +machin +approach +primari +backupapproach +agreement +protocol +byzantin +agreement +order +multicast +group +program +techniqu +applic +distribut +file +system +cach +disconnect +oper +time +servic +byzantin +clock +synchron +secur +encrypt +authent +secur +group +program +integr +discuss +gener +principl +thepresent +case +studi +exemplifi +principleshav +us +design +implement +real +system +topic +depend +time +interest +present +meor +size +class +allow +give +apresent +topic +includ +distribut +share +memori +distribut +object +kernel +support +distribut +system +weak +consist +replica +manag +protocol +electron +commerc +protocol +wide +area +networksgradingther +written +homework +assign +solut +begrad +solut +demonstr +credibl +effort +onbehalf +author +whether +solut +right +wrong +willrec +better +collabor +homework +assign +three +student +ispermit +encourag +requir +acollabor +singl +solut +submit +forgrad +name +collabor +collaborationswil +consid +violat +academ +integr +written +take +home +midterm +examin +nocollabor +allow +final +exam +student +howev +requir +towrit +final +paper +page +survei +issuesthat +discuss +class +list +suggest +topic +bedistribut +class +mondai +paper +start +ofth +last +class +wednesdai +henc +week +tocomplet +paper +also +team +colleagu +prepar +twolectur +topic +previous +cover +class +choosethi +option +colleagu +requir +write +asingl +survei +paper +page +warmli +encourag +toconsid +volunt +present +give +excellentopportun +improv +commun +skill +problem +setsin +subsequ +problem +set +shouldconform +follow +gener +guidelin +prove +show +synonym +precis +proof +isrequir +ask +prove +show +someth +show +someth +imposs +give +proof +thatmak +clear +problem +cannot +solv +matter +algorithmi +insuffici +show +particular +algorithm +work +algorithm +develop +must +accompani +proof +ofcorrect +unless +explicitli +told +otherwis +problem +snapshot +protocol +discuss +class +thetextbook +assum +commun +channel +fifo +deriv +asnapshot +protocol +asynchron +system +depend +onth +fifo +assumpt +prove +correct +prove +theprotocol +produc +consist +global +state +assum +atmost +snapshot +comput +point +note +book +contain +refer +paper +mattern +thatcontain +solut +problem +urg +resist +thetempt +solv +problem +visit +librari +problem +take +snapshot +distribut +comput +agener +techniqu +comput +stabl +global +predic +moreeffici +protocol +deriv +comput +specif +predic +often +conceptu +simpler +effici +term +ofth +number +messag +exchang +snapshot +base +solut +problem +requir +deriv +special +protocol +detect +deadlock +asynchron +distributedsystem +ideal +protocol +would +need +central +monitorprocess +would +messag +cost +number +process +distribut +system +monitor +basedsnapshot +protocol +detect +deadlock +cost +suggest +solut +problem +nowonlin +link +point +postscript +file +link +point +postscript +filedescrib +second +homework +assign +final +examth +assign +constitut +final +exam +fridaymai +link +point +thepostscript +file +describ +assign +question +feel +freeto +send +email +lorenzo +rajeev +idea +improv +page +pleas +send +yoursuggest +joshi +utexa +edurajeev +joshi +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..4782b472 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,73 @@ +fall +fall +note +page +reflect +taught +fall +current +version +class +differ +content +scope +welcom +homepag +austin +class +taught +bywil +adam +jacob +kornerup +practic +informationabout +cours +look +syllabu +avail +linea +technic +note +compil +program +turn +inhomework +electron +homework +solut +time +crude +interfac +newsgrouputexa +class +correspond +class +takesplac +exampl +textbook +pascalprogramm +organ +chapter +read +link +home +page +requir +read +jacob +kornerup +overhead +avail +viewinginform +class +projecthow +find +jacob +kornerup +offic +studi +midterm +midterm +examand +answer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..ac06e8c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,344 @@ +cours +descriptionc +object +orient +design +programminglast +updat +professor +greg +lavend +lavend +utexa +eduoffic +hour +class +appt +gokul +rajaram +gokul +utexa +eduoffic +hour +mondai +wednesdai +station +newsgroup +utexa +class +lavendercours +infocours +syllabusannouncementslectur +noteshomework +solutionsprogram +assignmentsgnu +manualsstandard +templat +librari +manual +sourc +codesocket +sourc +code +manualdescript +cours +intend +student +alreadi +anintroductori +program +cours +offer +introduct +object +cours +give +student +opportun +think +solut +comput +problem +object +orient +manner +captur +reusablepattern +design +construct +polymorph +typehierarchi +write +program +profici +professionallyus +student +opportun +program +solut +challeng +problem +us +java +cours +text +follow +text +avail +coop +bookstor +us +cours +horstmann +master +object +orient +design +john +wilei +associ +relat +materiali +drawn +lectur +materi +follow +sourc +stroustrup +program +languag +edit +addison +weslei +elli +stroustrup +annot +refer +manual +addison +weslei +stroustrup +design +evolut +addison +weslei +cargil +program +style +addison +weslei +cline +lomow +faq +addison +weslei +coplien +advanc +program +style +idiom +addison +weslei +plauger +draft +standard +librari +prentic +hall +gamma +helm +johnson +vlissid +design +pattern +element +reusabl +object +orient +softwar +addison +weslei +newsgroup +cours +newsgroup +setup +forum +open +discuss +announcementsabout +cours +strongli +encourag +particip +linediscuss +fellow +classmat +utexa +class +lavendery +also +interest +follow +newsgroup +usenet +newsgroup +comp +lang +comp +helpjava +newsgroup +hotjava +comp +lang +java +advocaci +comp +lang +java +comp +lang +java +misc +comp +lang +java +programm +comp +lang +java +secur +comp +lang +java +setup +comp +lang +java +tech +comp +lang +javascript +lang +java +internet +inform +java +oopth +link +date +pleas +know +link +dead +relat +site +ansi +draft +standard +standard +templat +librari +manual +postscript +home +page +sourc +code +lab +sourc +code +renssela +polytechn +institut +product +info +objectspac +compil +librari +libg +libstdc +server +mitgnu +server +cygnusgnu +postscript +document +doug +librari +libg +pagec +faq +compil +comp +lang +archiv +mirror +list +librari +avail +ftpobject +orient +system +developmentindex +program +librariesth +virtual +libraryindex +object +orient +inform +sourcesth +talig +frameworkjava +relat +site +javasoft +home +java +gamelan +huge +registri +java +applet +digit +espresso +good +summari +current +inform +java +java +centr +inform +java +applet +new +event +jar +rate +java +applet +java +faq +java +java +java +archiv +cours +denni +kafura +cours +virginia +techdoug +schmidt +cours +note +irvin +washington +universitydoug +cours +note +sunyintroductori +cours +univers +groningen diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..4e48ea75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,74 @@ +parallel +languag +compilerscst +parallel +languag +compilersfal +lectur +tuesdai +thursdai +instructor +calvin +offic +taylor +phone +email +utexa +offic +hour +tuesdai +thursdai +handout +gener +inform +case +tera +comput +copyright +program +assign +posix +thread +skeleton +code +program +assign +tutori +exampl +hello +world +commun +exampl +ironman +commun +interfac +onlin +manual +manual +postscript +logp +paper +time +spent +messag +pass +share +memori +program +program +assign +foundat +practic +parallel +program +languag +partit +dynam +adapt +grid +hierarchieslast +modifi +decemb +calvin +linlin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..fbdd2bb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,19 @@ +home +page +csintroduct +oper +system +class +inform +handout +assign +read +project +inform +group +send +mail +prof +send +mail +newsgroup diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..418b7dc7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,17 @@ +read +list +fall +thot +topic +distribut +systemsfil +systemstopolog +distribut +systemselectron +commenrcefailur +detectorsdistribut +objectsconsistencysecuregroup +communicationlanguag +system +dsmmobil +comput diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..53d8efec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,15 @@ +mine +monitor +databas +mine +monitor +databasesprof +daniel +mirankernew +seminarschedul +term +project +materi +overviewtentativeread +list +homeworkproject diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..6f86223d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,28 @@ +introduct +artifici +intellig +instructor +raymond +mooneytim +placespr +tuth +taylor +hall +cours +informationclick +cours +inform +sheetand +cours +syllabu +last +year +updat +file +moonei +code +depart +networkfor +code +trace +assign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..5662d41c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,45 @@ +lisp +symbol +program +instructor +raymond +mooneyteach +assist +sowmya +ramachandrantim +placetu +cours +informationclick +cours +inform +sheet +cours +syllabu +informationon +lisp +alsout +allegro +info +page +textparadigm +artifici +intellig +program +case +studi +common +lispassignmentsse +file +moonei +code +depart +networkfor +code +trace +homework +homework +homework +homework +test +test +test diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..ae7f0c9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,71 @@ +machin +learn +instructor +raymond +mooneytim +placetu +cours +informationclick +cours +inform +sheetand +cours +syllabu +textmachinelearninglectur +slide +introduct +machin +learn +concept +learn +gener +order +decis +tree +learn +experiment +evalu +comput +learn +theori +rule +learn +induct +logic +program +neural +network +learn +cluster +unsupervis +learn +bayesian +learn +instanc +base +learn +explan +base +learningassignmentsse +file +moonei +code +depart +networkfor +code +trace +homework +homework +homework +homework +homework +final +project +project +suggest +spring +paper +format +outlin +talk +version diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..febe2bf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,126 @@ +foundat +comput +sciencec +foundat +comput +sciencec +introduct +comput +scienc +program +section +intend +major +atleast +semest +program +high +school +cours +number +willb +chang +next +year +never +taken +programmingcours +take +porter +section +instead +strong +math +background +least +precalculu +requir +program +languag +scheme +dialect +lisp +theschem +implement +call +gambit +run +macintoshcomput +cours +move +faster +previou +coursesand +emphas +concept +program +languag +syntax +program +work +hard +hopefulli +learninga +syllabu +directori +softwar +scheme +tutorcopi +scheme +pcassign +machin +languag +simulationassign +surf +webassign +basic +schemeassign +plai +peano +gamblingassign +turtl +graphicsassign +snow +treesassign +list +manipulationstudi +guid +exam +vocabulari +exam +assign +plot +thickensassign +treasur +huntassign +symbol +algebraassign +data +abstract +matricesstudi +guid +exam +assign +draw +treesassign +express +unparsingassign +languag +translationstudi +guid +final +exam +thur +gordon +novak +program +assignmentsprogram +file +descriptionsprogram +submiss +gradingmidterm +studi +guidefin +exam +studi +guid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..53655e04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,52 @@ +compilersc +compilersc +cover +design +construct +compil +programminglanguag +student +write +compil +pascal +codei +gener +powerpc +processor +server +incorpor +powerpc +chip +cours +heavi +program +workload +especi +summer +student +plan +take +cours +summer +expect +dedicatetheir +live +cours +five +week +syllabusprogram +assignmentsprogram +file +descript +directori +program +file +program +submiss +gradingmidterm +studi +guidefin +exam +studi +guidegordon +novak diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..b19748ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,60 @@ +artifici +intelligencec +artifici +intelligenceartifici +intellig +defin +studi +thecomput +requir +intellig +behavior +attempt +todupl +comput +us +comput +intellig +connectspercept +environ +action +appropri +achiev +thegoal +actor +cours +survei +major +topic +includ +search +logic +andknowledg +represent +natur +languag +process +withbrief +coverag +brain +machin +vision +syllabusprogram +assignmentsprogram +file +descriptionsmidterm +studi +guidefin +exam +studi +guidepred +calculu +stori +problemssolut +select +stori +problemsnot +bibliographi +human +braingordon +novak diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..14ad68c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,59 @@ +automat +programmingc +automat +programmingautomat +program +gener +execut +programsfrom +specif +higher +level +ordinari +program +languag +cours +consist +lectur +first +third +semest +homework +problem +program +assign +given +illustrateth +lectur +materi +program +long +requirelearn +sever +kind +program +system +latter +partof +semest +cover +read +research +literatur +student +expect +present +paper +class +syllabusbibliographyassign +compil +optim +done +handpattern +matchingobject +orient +programmingintroduct +glispview +graphic +programminggordon +novak diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..8b3b5c53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,1171 @@ +introduct +graduat +comput +sciencecst +introduct +graduat +comput +sciencefal +lectur +mondai +instructor +robert +blumof +offic +taylor +phone +email +utexa +offic +hour +thursdai +feel +free +stop +time +semest +seminar +cours +taken +apass +fail +basi +graduat +student +cours +introduct +graduat +comput +scienc +undergradu +cours +undergradu +topic +comput +scienc +honor +receiv +credit +cours +student +must +beregist +cours +attend +least +lectur +schedulespeakertitleseptemb +mirankeralamo +data +warehouseseptemb +kuipersth +spatial +semant +hierarchi +humanand +robot +cognit +mapsseptemb +robert +blumofecilk +cilk +adapt +reliableparallel +comput +network +workstationsseptemb +risto +miikkulainenlearn +sequenti +decis +task +throughsymbiot +evolut +neural +networksoctob +vladimir +lifschitzmathemat +principl +logic +programmingoctob +paul +wilsonextens +languag +open +compil +reflectionoctob +mooneylearn +process +natur +languag +usinginduct +logic +programmingoctob +mike +dahlindistribut +cluster +internetsnovemb +gordon +novaksoftwar +reus +special +genericprocedur +viewsnovemb +vijaya +ramachandranth +design +evalu +parallelalgorithmsnovemb +lorenzo +alvisilighweight +fault +tolerancenovemb +calvin +linadapt +librari +high +level +optimizationdecemb +greg +plaxtonanalysi +algorithmslighweight +fault +tolerancelorenzo +alvisidistribut +system +move +beyond +confin +academia +andresearch +lab +revolution +busi +government +organ +simpl +citizen +process +andcollect +inform +current +technolog +trend +promis +todramat +increas +pace +revolut +enabl +thedesign +highli +cooper +distribut +applic +beyondth +client +server +paradigm +har +comput +power +ofdistribut +system +environ +scope +emphasi +fault +toleranttechniqu +undergo +dramat +chang +fault +toler +willceas +expens +featur +requir +hand +applicationsto +toler +exot +failur +user +highli +distributedinform +infrastructur +fault +toler +translat +acompetit +advantag +guarante +reliabl +access +criticalinform +talk +explor +design +engineerfault +toler +solut +call +lightweight +fault +toler +goal +lightweight +fault +toler +requir +dedic +resourc +negligibleimpact +perform +failur +free +execut +scale +cost +depend +sever +number +offailur +need +toler +integr +applic +transpar +theapplic +programm +enabl +support +emerg +applic +communicatethrough +messag +well +file +address +softwar +gener +fault +effect +cilk +cilk +adapt +reliabl +parallel +comput +onnetwork +workstationsrobert +blumofethi +present +overview +cilk +pronouncedsilk +algorithm +parallel +multithread +languag +andcilk +runtim +system +support +functionalsubset +cilk +network +workstat +cilk +providesadapt +parallel +fault +toler +tranpar +touser +program +adapt +parallel +mean +ofworkst +cilk +program +run +grow +shrinkdynam +depend +avail +idl +workstat +onth +amount +parallel +within +program +addit +cilkprogram +continu +execut +even +workstationscrash +cilk +runtim +system +automat +detect +andrecov +failur +present +includ +livedemonstr +distribut +cluster +internetsmik +dahlinthi +present +give +overview +current +issu +indistribut +file +system +technolog +trend +applicationsmotiv +aggress +cluster +wide +area +network +system +inclust +fast +network +allow +machin +cooper +close +servicei +request +file +system +us +close +cooper +among +nodesto +provid +better +perform +avail +singl +centralserv +wide +area +network +challeng +provid +goodperform +avail +consist +despit +limit +networkperform +node +network +failur +file +system +projectwil +explor +issu +spatial +semant +hierarchi +human +robot +cognit +mapsbenjamin +kuipershuman +cognit +map +reli +sever +differ +represent +forlarg +scale +space +ontolog +similarli +varietyof +differ +approach +propos +robot +explor +andmap +unknown +environ +cast +diverserepresent +natur +structur +call +spatialsemant +hierarchi +object +relat +andassumpt +level +abstract +level +level +mathemat +foundat +thecontrol +level +allow +robot +environ +formal +continu +dynam +system +whose +stabl +equilibrium +point +beabstract +discret +distinct +state +trajectori +link +state +abstract +action +givinga +discret +causal +graph +represent +state +space +causalgraph +state +action +turn +abstract +topologicalnetwork +place +path +local +metric +model +occupancygrid +neighborhood +place +path +built +theframework +topolog +network +without +usual +problem +ofglob +consist +mathemat +principl +logic +programmingvladimir +lifschitzlog +program +well +sister +approach +functionalprogram +base +view +comput +program +notne +contain +explicit +oper +instruct +instead +itcan +simpli +provid +fact +problem +sufficientto +solv +declar +program +executedus +method +autom +reason +prolog +best +known +logicprogram +languag +mathemat +theori +logic +program +concern +withdefin +semant +logic +program +languag +describ +thereason +algorithm +us +implement +investig +thesound +algorithm +adapt +librari +high +level +optimizationcalvin +linthi +talk +describ +approach +build +softwar +librari +make +librari +adapt +implement +andtheir +interfac +differ +applic +need +differenthardwar +platform +produc +librari +efficientand +wide +usabl +describ +framework +build +suchlibrari +describ +three +plan +experi +appli +thesetechniqu +librari +parallel +scientif +comput +weexplain +approach +facilit +high +level +optim +learn +sequenti +decis +task +symbiot +evolut +ofneur +networksristo +miikkulainena +novel +reinforc +learn +method +call +sane +symbiot +adapt +neuro +evolut +evolv +popul +neuronsthrough +genet +algorithm +form +neural +network +given +task +symbiot +evolut +promot +cooper +special +inth +popul +result +fast +effici +genet +search +anddiscourag +converg +suboptim +solut +sane +abl +toextract +domain +specif +inform +even +spars +reinforc +make +effect +approach +broad +rang +sequentialdecis +task +robot +control +game +plai +resourcemanag +alamo +data +warehousedan +mirankerth +alamo +effort +direct +intra +develop +andint +user +enumer +interest +site +datasourc +goal +integr +data +sourc +provid +theuser +illus +singl +virtual +databas +follow +byqueri +analysi +present +tool +central +alamo +architectur +softwar +call +theabstract +search +machin +corba +compliant +interfacethat +provid +uniform +interfac +heterogen +datasourc +beyond +simpl +data +access +embodi +higher +level +ofabstract +enabl +effici +code +clever +search +algorithmsand +separ +isol +system +concern +includ +buffer +anddata +prefetch +broad +claim +high +perform +often +optim +implement +advanc +databas +facil +anobject +orient +queri +engin +deduct +infer +engin +activedatabas +engin +data +mine +facil +constructedus +common +interfac +final +sinc +output +advanc +databasefacil +serv +data +sourc +compon +thealamo +architectur +compos +resolv +higher +level +dataintegr +problem +particular +anticip +us +elementsof +alamo +repres +meta +data +resolv +structur +andsemant +conflict +among +data +sourc +ultim +furthercomposit +embodi +complex +knowledg +base +abl +answerhigh +level +queri +learn +process +natur +languag +us +induct +logicprogrammingraymond +mooneyinduct +logic +program +address +problem +learningprolog +program +exampl +represent +power +offirst +order +logic +offer +advantag +standard +machin +learningmethod +constrain +fix +length +featur +vector +areappli +method +natur +languag +learn +believethi +richer +represent +offer +import +advantag +havedevelop +system +chill +learn +determinist +parsersfrom +corpu +pars +sentenc +chill +obtain +superior +result +onsever +artifici +corpora +previous +us +test +neural +networkmethod +encourag +result +realist +ati +corpu +ofairlin +queri +chill +also +us +automaticallydevelop +complet +natur +languag +interfac +translat +englishdatabas +queri +execut +prolog +form +produc +moreaccur +parser +hand +built +system +queri +smallgeograph +databas +also +develop +system +foidl +appli +learn +past +tens +english +surpass +previou +result +neural +network +decis +treemethod +problem +softwar +reus +special +gener +procedur +throughviewsgordon +novak +softwar +reus +clearli +good +idea +difficult +toachiev +practic +data +assumpt +thesoftwar +reus +softwar +difficult +approach +view +describ +applic +data +type +implement +abstract +typesus +gener +procedur +compil +process +special +agener +procedur +produc +version +custom +theapplic +data +graphic +user +interfac +make +easi +specifyview +automat +program +server +implement +theworld +wide +write +special +program +user +adesir +languag +serv +sourc +code +user +file +analysi +algorithmsgreg +plaxtona +major +focu +theoret +comput +scienc +design +andanalysi +asymptot +effici +algorithm +sequenti +parallel +distribut +determinist +random +forspecif +comput +problem +research +area +notuncommon +come +across +well +written +paper +inform +main +underli +idea +conceptu +straightforward +theform +present +surprisingli +lengthi +theform +deal +minor +side +issu +special +case +havelittl +noth +main +underli +idea +suchpap +seem +signific +conceptu +andform +difficulti +algorithm +present +gapsinher +convent +mathemat +notat +simpli +inadequatefor +succinctli +formal +certain +conceptu +straightforwardalgorithm +idea +talk +describ +notat +asymptot +analysi +call +notat +significantli +reduc +theconceptu +formal +associ +trivialclass +algorithm +concret +exampl +consid +analysisof +well +known +linear +time +select +algorithm +blum +floyd +pratt +rivest +tarjan +design +evalu +parallel +algorithmsvijaya +ramachandranth +design +analysi +effici +parallel +algorithm +forcombinatori +problem +area +extens +studi +recentyear +larg +number +algorithm +develop +theabstract +pram +model +parallel +comput +talk +willdescrib +work +design +effici +parallelalgorithm +experi +implement +evalu +thesealgorithm +massiv +parallel +machin +maspar +thendescrib +queu +variant +pram +model +wepropos +appropri +model +current +avail +parallelshar +memori +machin +tradit +pram +model +extens +languag +open +compil +reflectionpaul +wilsonextens +languag +allow +interest +featur +ad +languag +portabl +within +languag +open +compil +allow +fairli +easi +modif +compil +addnew +featur +analys +optim +reflect +allow +program +examin +represent +ofinterest +part +affect +structureaccordingli +discuss +thing +us +build +modular +portabl +adapat +softwar +also +discuss +recent +workon +rscheme +compil +open +compil +extensiblelanguag +last +modifi +novemb +robert +blumoferdb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..4b75c408 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,135 @@ +introduct +oper +systemsc +introduct +oper +systemsfal +lectur +mondai +wednesdai +instructor +robert +blumof +offic +taylor +phone +email +utexa +offic +hour +thursdai +feel +free +stop +time +teach +assist +subramanyam +gooti +offic +phone +email +gooti +utexa +offic +hour +tuesdai +thursdai +station +exampl +solut +program +assign +solari +canb +found +crypt +crypt +implement +support +multiplemap +assum +map +file +least +long +themap +exampl +test +program +found +encrypt +decrypt +assign +problem +problem +problem +program +assign +problem +handout +gener +inform +problem +problem +problem +solut +problem +solut +topic +cover +midterm +exam +midterm +exam +solut +problem +program +assign +problem +solut +problem +topic +cover +final +exam +problem +solutionsread +book +chapter +lectur +date +chapter +septemb +chapter +except +septemb +chapter +except +septemb +chapter +except +septemb +octob +chapter +except +octob +chapter +octob +chapter +octob +novemb +chapter +novemb +chapter +except +novemb +chapter +except +decemb +last +modifi +decemb +robert +blumoferdb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..a7e083e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,54 @@ +neural +network +fall +neural +networksfal +uniqu +number +instructor +risto +miikkulainen +risto +utexa +offic +bednar +jbednar +utexa +offic +station +text +lauren +fausett +fundament +ofneur +network +architectur +algorithm +applic +englewood +cliff +prenticehal +select +paper +class +note +copi +slide +us +lectur +grade +homework +midterm +final +detail +neural +network +class +schedulehomework +assignmentsexamsclass +resourcesa +postscript +versionof +syllabusristo +utexa +edusun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..73c8bd88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,103 @@ +introduct +cognit +scienc +fall +introduct +cognit +sciencefal +instructor +nichola +asher +philosophi +dept +waggen +hall +nasher +berti +utexa +offic +hour +appt +risto +miikkulainen +dept +comput +scienc +taylor +hall +risto +utexa +offic +hour +appt +text +posner +foundat +cognit +scienc +mitpress +packet +read +requir +student +regular +interv +submit +discuss +note +short +page +critic +commentari +read +collabor +withanoth +student +also +write +short +paper +approxim +page +discuss +signific +research +topic +find +ofinterest +discuss +note +count +toward +final +grade +thepap +class +attend +particip +read +alsorequir +detail +cours +descriptioncours +schedulediscuss +notesperson +adscollabor +paperclass +resourcesstud +questionnaireus +link +cognit +scienc +center +list +cognit +sciencefaculti +pointer +cognit +scienc +resourc +gener +risto +utexa +edusun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..29eb615e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,259 @@ +pascal +programmingcsp +pascalintroductori +comput +programminginstructor +suzi +gallagherwelcom +pascal +program +excit +intellectu +challeng +cspi +design +give +firm +foundat +pascal +program +andso +need +effort +read +page +thecours +syllabu +carefulli +page +summari +cours +syllabu +contain +detail +assign +requir +wella +import +polici +schedul +date +exam +otherdeadlin +respons +everyth +thesyllabu +without +delai +avail +jenn +copi +guadalup +hundr +student +takethi +courseeach +semest +highli +structur +respons +foral +detail +monitor +page +thenewsgroup +utexa +class +frequent +updat +cours +take +work +expect +depend +howwel +prepar +event +cours +becom +extrem +difficult +behind +grade +procedur +riski +wait +near +deadlineto +take +quiz +turn +program +late +quiz +program +get +half +credit +wait +line +becom +long +hour +deadlin +warn +unfortun +page +construct +semest +mani +link +nowher +apolog +everyon +attend +lectur +suzi +gallagh +everi +thursdayeven +welch +mondai +wednesdai +break +intosmal +group +section +discuss +ofth +materi +ateach +assist +nine +program +assign +written +debug +grade +theprogram +laboratori +note +thatlaboratori +hour +limit +thatgrad +hour +even +less +andyou +respons +schedul +work +within +limit +take +eight +quizz +thetest +room +note +thattest +room +hour +limitedand +respons +take +quizz +deadlin +warn +often +long +wait +foravail +proctor +grade +assign +quizz +sever +hoursbefor +deadlin +respons +submit +program +andquizz +earli +enough +grade +three +exam +must +betaken +prescrib +time +make +exam +soon +possibl +begin +semest +need +thetest +room +file +openedfor +uniqu +identifi +assign +yourstud +access +orsak +requiredtextbook +pascal +dale +weem +wewil +cover +chapter +individu +student +background +vari +consider +thiscours +partial +self +pace +feel +well +prepar +click +detail +howev +still +liabl +requir +syllabu +thenewsgroup +utexa +class +commun +student +class +exampl +could +form +studi +group +mani +student +also +gripe +cours +thought +articl +gener +interest +elicit +repli +staff +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..e4a7b109 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,110 @@ +databas +system +implement +databas +system +implement +fall +professor +batori +tong +wang +syllabu +homework +problem +solut +problem +solut +problem +solut +project +pleas +read +first +test +file +project +retriev +sampl +output +ret_into +sampl +output +replac +sampl +output +append +sampl +output +delet +sampl +output +test +file +project +recoveri +recoveri +recoveri +recoveri +data +file +project +data +dept +data +student +data +contest +contest +mdb +benchmark +script +data +file +data +student +data +dept +data +time +us +measur +run +time +sinc +order +tupl +attribut +tupl +differ +output +anoth +wrote +perl +script +transform +differ +output +order +compar +diff +turn +program +pass +benchmark +without +error +program +fail +pass +reason +test +sampl +output +pleas +email +suggest +comment +medec +tong +wang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..cb82efe5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,597 @@ +advanc +oper +system +advanc +oper +system +instructor +harrick +tabl +content +cours +descript +gener +inform +prerequisit +synopsi +textbook +cours +requir +read +list +fall +earli +idea +system +distribut +file +oper +system +real +time +oper +system +system +support +wireless +mobil +comput +cours +descript +gener +inform +cours +titl +advanc +oper +system +instructor +professor +harrick +last +offer +fall +prerequisitegradu +stand +undergradu +cours +oper +systemssuch +student +expect +familiar +materialin +chapter +oper +system +concept +peterson +andsilberschatz +synopsisc +breadth +cours +advanc +oper +system +coveringboth +theoret +practic +issu +oper +systemdesign +topic +cover +includ +design +implement +ofdistribut +real +time +oper +system +system +support +formobil +wireless +comput +environ +case +studi +anemphasi +place +current +design +issu +research +topic +textbook +collect +research +articl +made +avail +theinstructor +cours +requirementsstud +requir +read +number +paper +area +anddiscuss +grade +determin +examin +aterm +project +project +present +read +list +fall +earli +idea +systemsfernando +corbato +marjori +merwin +daggett +robert +dalei +anexperiment +time +share +system +afip +proceed +spring +joint +comput +confer +page +brinch +hansen +nucleu +multiprogram +system +commun +page +april +bensoussan +clingen +dalei +multic +virtualmemori +concept +design +commun +page +denni +ritchi +thompson +unix +time +share +system +commun +page +juli +distribut +file +oper +system +overview +paper +andrew +tannenbaum +robbert +reness +distribut +operatingsystem +comput +survei +page +decemb +levi +silberschatz +distribut +file +system +concept +andexampl +comput +survei +page +decemb +process +thread +managementa +tucker +gupta +process +control +schedul +issu +formultiprogram +share +memori +multiprocessor +proceed +theth +sosp +oper +system +review +page +decemb +thoma +anderson +edward +lazowska +henri +levi +theperform +implic +thread +manag +altern +forshar +memori +multiprocessor +ieee +transact +comput +page +decemb +schedulingr +bunt +schedul +techniqu +oper +system +ieee +comput +page +octob +black +schedul +support +concurr +parallel +inth +mach +oper +system +ieee +comput +page +inter +process +communicationj +barrera +fast +mach +network +implement +inproceed +usenix +mach +symposium +novemb +cheriton +distribut +process +group +kernel +acmtransact +comput +system +page +remot +procedur +call +andrew +birel +bruce +nelson +implement +rpc +transact +oncomput +system +page +februari +bershad +anderson +lazowska +levi +lightweightremot +procedur +call +proceed +symposium +onoper +system +principl +oper +system +review +page +decemb +process +migrationf +dougli +ousterhout +process +migrat +spriteoper +system +proceed +ieee +internationalconfer +distribut +comput +system +berlin +germani +page +septemb +theimer +lantz +cheriton +preemptabl +remot +execut +proceed +sosp +oper +system +review +page +decemb +fault +tolerancef +cristian +basic +concept +issu +fault +toler +distributedsystem +intern +workshop +oper +system +sand +beyond +karshmer +nehmer +springer +verlag +birman +joseph +reliabl +commun +presenc +offailur +transact +comput +system +page +februari +file +systemsr +sandberg +goldberg +kleiman +design +implement +ofsun +proceed +summer +usenix +confer +page +june +mckusick +leffler +fabri +fastfil +system +unix +transact +comput +system +page +august +rosenblum +ousterhout +design +implement +alog +structur +file +system +proceed +sosp +operatingsystem +review +novemb +cach +distribut +systemsm +schroeder +gifford +needham +cach +file +system +fora +programm +workstat +proceed +sosp +page +terri +cach +hint +distribut +system +ieeetransact +softwar +engin +page +januari +protect +securityr +needham +schroeder +us +encrypt +authent +inlarg +network +comput +commun +page +decemb +butler +lampson +protect +oper +system +review +page +januari +origin +proc +princeton +symposium +oninform +scienc +system +system +princeton +march +kernel +cheriton +distribut +system +commun +page +march +accetta +baron +boloski +golub +rashid +tevanian +young +mach +kernel +foundat +unix +develop +proceed +usenix +summer +confer +page +june +real +time +oper +systemsh +kopetz +event +trigger +versu +time +trigger +real +timesystem +proceed +intern +workshop +onoper +system +beyond +germani +karshmer +nehmer +springer +verlag +page +layland +schedul +algorithm +formultiprogram +hard +real +time +environ +journal +theacm +page +januari +zhao +ramamritham +stankov +preemptiv +schedulingund +time +resourc +constraint +ieee +transact +comput +page +august +tokuda +mercer +art +distribut +real +time +kernel +oper +system +review +page +juli +system +support +wireless +mobil +computingb +badrinath +acharya +imielinski +impact +mobil +ondistribut +comput +oper +system +review +page +april +satyanarayanan +kistler +kumar +okasaki +siegel +steer +coda +highli +avail +file +system +distributedworkst +environ +ieee +transact +comput +page +april +harrick diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..c93a33d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,827 @@ +multimedia +system +multimedia +system +instructor +harrick +tabl +content +cours +descript +gener +inform +prerequisit +synopsi +textbook +cours +requir +offic +hour +teach +assist +read +list +spring +overview +technolog +trend +problem +media +compress +multimedia +storag +server +network +architectur +multimedia +applic +protocol +oper +system +support +multimedia +multimedia +databas +class +handout +note +postscript +format +cours +descript +read +list +overview +technolog +trend +problem +septemb +video +compress +fundament +septemb +jpeg +mpeg +compress +algorithm +septemb +scalabl +compress +algorithm +septemb +multimedia +server +design +issu +septemb +placement +issu +multimedia +server +design +septemb +retriev +issu +multimedia +server +design +octob +list +possibl +project +octob +cach +batch +techniqu +octob +integr +servic +network +introduct +novemb +algorithm +real +time +channel +establish +novemb +packet +schedul +algorithm +analysi +novemb +oper +system +support +multimedia +novemb +processor +schedul +techniqu +novemb +transport +protocol +issu +decemb +cours +descriptiongener +informationcours +titl +multimedia +system +instructor +professor +harrick +cours +detail +offer +fall +uniqu +number +time +place +taylor +hall +prerequisitesgradu +stand +familiar +basic +concept +incomput +network +oper +system +design +synopsisc +cours +advanc +system +cover +boththeoret +practic +issu +design +multimediasystem +topic +cover +includ +introduct +multimediasystem +digit +video +compress +techniqu +oper +systemsupport +digit +audio +video +well +network +transportprotocol +multimedia +emphasi +place +current +designissu +research +topic +textbooka +collect +recent +research +articl +madeavail +instructor +cours +requirementsth +instructor +introduc +basic +concept +thetop +follow +class +discuss +relatedpap +question +answer +format +student +expect +tounderstand +describ +critiqu +research +contribut +ofpap +addition +student +expect +carri +asemest +long +implement +project +grade +determin +project +examin +andclass +particip +offic +hour +harrick +vintuesdai +appoint +phone +mail +utexa +teach +assistantmr +prashant +shenoi +offic +hour +appoint +mail +shenoi +utexa +eduread +list +fall +cours +packet +cntain +copi +paper +theread +list +avail +mondai +septemb +speedwai +copi +locat +dobi +mall +guadalup +austin +phone +number +pleas +callthem +make +sure +packag +readi +video +compressionr +steinmetz +data +compress +techniqu +multimedia +comput +principl +techniqu +multimedia +system +page +wallac +jpeg +still +pictur +compress +standard +commun +page +april +gall +mpeg +video +compress +standard +multimediaappl +commun +page +april +chiang +anastassi +hierarch +code +digitaltelevis +ieee +commun +magazin +multimedia +storag +serversoverview +shenoi +goyal +issu +multimedia +serverdesign +comput +survei +page +decemb +gemmel +kandlur +venkat +rangan +row +multimedia +storag +server +tutori +survei +ieee +comput +page +effici +placement +techniqu +goyal +optim +placement +ofmultimedia +object +disk +arrai +proceed +ieeeintern +confer +multimedia +comput +system +icmc +washington +page +shenoi +effici +failur +recoveri +inmulti +disk +multimedia +server +proceed +annualintern +symposium +fault +toler +comput +ftc +pasadena +california +page +june +chiueh +katz +multi +resolut +video +represent +forparallel +disk +arrai +proceed +multimedia +anaheim +page +august +retriev +techniqu +admiss +control +algorithm +chen +kandlur +design +analysi +groupedsweep +scheme +multimedia +storag +manag +proceed +ofthird +intern +workshop +network +oper +system +supportfor +digit +audio +video +diego +page +novemb +narasimha +reddi +wylli +disk +schedul +multimediai +system +proceed +multimedia +anaheim +page +august +goyal +goyal +goyal +statist +admissioncontrol +algorithm +multimedia +server +proceed +acmmultimedia +francisco +page +octob +goyal +goyal +algorithm +designinglarg +scale +multimedia +server +comput +commun +page +march +shenoi +effici +support +scan +oper +inmultimedia +server +proceed +multimedia +sanfrancisco +page +novemb +chen +kandlur +stream +convers +support +interactivevideo +playout +ieee +multimedia +magazin +page +summer +buffer +space +manag +cach +sitaram +shahabuddin +dynam +batch +polici +foran +demand +video +server +multimedia +system +page +june +sitaram +buffer +manag +polici +demandvideo +server +research +report +octob +papadimitri +ramanathan +venkat +rangan +informationcach +deliveri +person +video +program +homeentertain +channel +proceed +internationalconfer +multimedia +comput +system +icmc +boston +page +network +architectur +multimedianetwork +layer +issu +multimedia +shenker +fundament +design +issu +futur +internet +ieee +journal +select +area +commun +page +septemb +ferrari +verma +scheme +real +time +channelestablish +wide +area +network +ieee +journal +select +areasin +commun +page +april +zhang +keshav +comparison +rate +base +servicedisciplin +proceed +sigcomm +august +goyal +determin +delaybound +heterogen +network +multimedia +system +toappear +also +proceed +intern +workshopon +network +oper +system +support +digit +audio +video +nossdav +durham +hampshir +page +april +chow +algorithm +losslesssmooth +mpeg +video +proceed +sigcomm +london +septemb +salehi +zhang +kuros +towslei +support +storedvideo +reduc +rate +variabl +resourc +requirementsthrough +optim +smooth +proceed +sigmetr +philadelphia +grossglaus +keshav +rcbr +simpl +efficientservic +multipl +time +scale +traffic +proceed +acmsigcomm +page +august +kanakia +misra +reibman +adapt +congestioncontrol +scheme +real +time +packet +video +transport +proceedingsof +sigcomm +comput +commun +review +page +octob +multimedia +transport +protocol +clark +tennenhous +architectur +consider +newgener +protocol +proceed +sigcomm +campbel +coulson +hutchison +qualiti +servicearchitectur +comput +commun +review +page +april +turner +peterson +imag +transfer +design +proceed +sigcomm +comput +commun +review +page +octob +floyd +jacobson +mccann +zhang +reliablemulticast +framework +light +weight +session +applic +levelfram +proceed +sigcomm +boston +page +august +buss +deffner +schulzrinn +dynam +control +ofmultimedia +applic +base +comput +commun +januari +blakowski +steinmetz +media +synchron +survei +refer +model +specif +case +studi +ieee +journal +onselect +area +commun +page +januaryoper +system +support +multimediag +coulson +campbel +robin +blair +papathoma +andd +shepherd +design +control +basedcommun +system +choru +ieee +journal +select +area +incommun +page +druschel +abbott +pagel +peterson +analysi +subsystem +design +multimedia +workstat +proceed +ofth +third +intern +workshop +network +oper +systemssupport +digit +video +audio +diego +page +novemb +govindan +anderson +schedul +mechan +forcontinu +media +proceed +symposium +operatingsystem +principl +pacif +grove +page +octob +goyal +hierarch +schedul +formultimedia +oper +system +proceed +second +symposiumon +oper +system +design +implement +osdi +seattl +washington +octob +applic +multimedia +conferencingh +zellweg +swinehart +venkatrangan +multimedia +conferenc +etherphon +environ +ieee +comput +page +octob +mccann +jacobson +flexibl +framework +packetvideo +proceed +multimedia +francisco +page +novemb +harrick diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..7c1ca6c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,460 @@ +multimedia +commun +databas +multimedia +commun +databas +fall +instructor +harrick +tabl +content +cours +descript +gener +inform +prerequisit +synopsi +textbook +cours +requir +offic +hour +read +list +multimedia +conferenc +rout +multicast +internet +servic +processor +schedul +support +multimedia +databas +cours +descript +gener +inform +cours +titl +multimedia +commun +databas +instructor +professor +harrick +semest +fall +meet +time +fridai +prerequisitesgradu +stand +familiar +basic +concept +networkprotocol +oper +system +design +multimediasystem +cours +synopsisthi +advanc +cours +multimedia +system +topic +bediscuss +cours +includ +transport +protocol +design +formultimedia +rout +multicast +mobil +network +andmultimedia +oper +system +support +multimedia +multimediadatabas +emphasi +place +current +design +issu +andresearch +topic +textbook +collect +research +articl +made +avail +theinstructor +cours +requirementsstud +requir +read +number +paper +area +aswel +present +discuss +class +grade +determinedbas +paper +present +class +particip +studentsenrol +letter +grade +requir +submit +paper +orcarri +project +offic +hoursfridai +appoint +phone +mail +utexa +read +list +multimedia +conferenc +mccann +jacobson +flexibleframework +packet +video +proceed +acmmultimedia +francisco +novemb +handlei +wakeman +crowcroft +confer +controlchannel +protocol +cccp +scalabl +base +build +conferencecontrol +applic +proceed +sigcomm +boston +gajewska +kistler +manass +redel +argo +systemfor +distribut +collabor +proceed +acmmultimedia +francisco +novemb +gong +multipoint +audio +video +control +packet +basedmultimedia +conferenc +proceed +acmmultimedia +francisco +novemb +zellweg +swinehart +venkat +rangan +multimedia +conferenc +etherphon +environ +ieeecomput +octob +rout +multicast +deer +cheriton +multicast +rout +datagraminternetwork +extend +lan +transact +computersystem +page +ballardi +franci +crowcroft +core +base +tree +architectur +scalabl +inter +domain +multicast +rout +proceed +sigcomm +page +thyagarajan +deer +hierarch +distanc +vector +multicast +rout +mbone +proceed +sigcomm +boston +widyono +design +andevalu +rout +algorithm +real +time +channel +msthesi +berkelei +kompella +pasqual +polyzo +multicast +rout +multimediacommun +technic +report +univers +california +diego +floyd +jacobson +mccann +zhang +reliabl +multicast +framework +light +weightsess +applic +level +frame +proceed +ofacm +sigcomm +boston +holbrook +singhal +cheriton +base +receiv +reliabl +multicast +fordistribut +interact +simul +proceed +theacm +sigcomm +boston +herzog +estrin +shenker +share +cost +multicast +tree +axiomat +analysi +proceed +sigcomm +boston +gupta +how +moran +nguyen +resourc +share +multi +parti +real +timecommun +proceed +infocom +internet +servicesj +guyton +schwartz +locat +nearbi +copi +replic +internet +server +proceed +sigcomm +mogul +case +forpersist +connect +http +proceed +acmsigcomm +processor +schedul +supportc +warldersburg +weihl +lotteri +schedul +effici +flexibleproport +share +resourc +mangement +proceed +ofoper +system +design +implement +osdi +warldersburg +weihl +strideschedul +determinist +proport +share +resourcemanag +technic +report +golestani +self +clock +fair +queue +scheme +high +speedappl +proceed +infocom +govindan +anderson +schedul +mechan +forcontinu +media +proceed +symposium +onoper +system +principl +sosp +monterei +jeffai +real +timeproduc +consum +paradigm +paradigm +construct +ofeffici +predict +real +time +system +proceed +ofth +sigapp +symposium +appli +comput +jeffai +latenc +manag +intim +share +oper +system +workshop +real +timeoper +system +softwar +seattl +multimedia +databasesw +niblack +qbic +project +queri +imag +contentus +color +textur +shape +technic +report +februari +cawkel +pictur +queri +pictur +databas +journal +ofinform +scienc +page +bach +paul +jain +interact +imag +managementsystem +face +inform +retriev +ieee +transact +knowledgeand +data +engin +page +august +gupta +weymouth +jain +semant +queri +pictur +vimsi +model +proceed +intern +confer +onveri +larg +databas +harrick diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..fad6a0e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,147 @@ +algorithm +techniqu +theori +fall +algorithm +techniqu +theori +fall +instructor +vijaya +ramachandranuniqu +number +cours +descript +cours +handout +cours +handout +home +work +home +work +home +work +home +work +home +work +home +work +final +exam +instruct +final +exam +updat +respons +question +receiv +question +pose +quot +problem +last +sentenc +take +time +sigma +onsigma +word +time +refer +amort +time +total +time +oroth +answer +total +time +problem +well +known +face +data +structur +disjoint +set +requir +omega +time +inth +worst +case +text +book +omega +isther +differ +answer +meant +omega +first +sentenc +second +paragraphof +chapter +note +problem +last +sentenc +vertic +cycl +containdistinct +label +vertic +cycl +cancontain +label +right +answer +right +think +problem +final +exam +littl +unclear +denot +size +largest +true +submatrix +size +largest +true +submatrix +equal +answer +submatrix +true +somek +entri +true +updat +fridai +decemb +updat +mondai +decemb +receiv +question +final +exam +bepost +either +question +answer +question +youhav +sent +question +pleas +address +us +yourbest +judgment diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..b94ca9e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,519 @@ +wilson +class +pagec +program +languag +wilson +class +pagethi +home +page +paulwilson +class +page +construct +thing +subject +chang +thing +chang +reload +button +yourbrows +come +page +make +sure +see +mostrec +version +onlin +cours +read +refer +materi +syllabu +lectur +note +ondeclar +program +cours +note +scheme +implement +html +brows +note +construct +first +section +arereason +well +index +brows +later +section +willchang +cours +goe +along +read +ahead +class +islik +adventur +suggest +page +browser +usinga +scheme +system +interact +especi +work +throughchapt +tutori +allow +text +outof +document +past +run +scheme +system +error +text +especi +later +chapter +sanoth +reason +read +html +version +browser +rather +thanprint +hardcopi +correct +error +weget +chapter +scheme +languag +definit +html +format +onlinebrows +list +standard +featur +scheme +ters +describ +stuff +cours +scheme +coursenot +help +want +know +miscellanousfunct +exactli +author +first +practic +question +also +version +answer +second +practic +question +also +version +answer +quiz +answer +third +practic +question +also +version +answer +note +name +convent +indent +scheme +shouldconsult +homework +read +code +grade +itsens +andnot +draw +data +structur +homework +assign +write +merg +sort +scheme +solut +first +three +problem +comment +scheme +code +note +simpl +reader +includ +regular +express +grammar +actual +code +scheme +scheme +code +simpl +backward +chain +proposit +calculu +theoremprov +essenti +littl +subset +prolog +setofrul +classifi +anim +logic +program +kind +ofanim +plai +us +theorem +prover +pictur +class +object +instanc +illustr +simpleobject +system +scheme +note +anoth +pictur +show +class +object +metaclass +object +illustr +circular +make +latter +self +describ +scheme +code +simpl +object +base +program +system +base +onclass +gener +procedur +inherit +implement +note +type +system +includ +inherit +subtyp +take +home +quiz +answer +explan +test +version +test +answer +explan +main +languag +us +cours +scheme +default +implement +rscheme +whichi +instal +public +sparc +run +solari +run +machin +rscheme +command +runschem +machin +run +linux +solari +orani +sever +implement +unix +rscheme +andinstal +machin +free +find +itfrom +donovan +kolbl +home +page +sure +version +later +qing +patch +make +friendlier +fornewbi +us +machin +recommend +gettinggambit +marc +feelei +implement +scheme +free +youcan +scheme +repositori +us +run +window +sure +bestschem +bunch +avail +scheme +repositori +scheme +guil +standard +scheme +might +gambit +mark +feelei +mzscheme +rice +someth +besid +rscheme +recommend +get +meroon +version +object +system +scheme +repositori +start +doingobject +orient +program +assign +tous +rscheme +unix +take +advantag +rscheme +object +system +thing +abl +meroon +scheme +repositori +scheme +repositori +univers +indiana +lot +freeimplement +scheme +variou +document +scheme +getinterest +learn +scheme +cover +class +place +look +comp +lang +scheme +internet +newsgroup +devot +scheme +comp +lang +scheme +main +languag +us +cours +scheme +default +implement +rscheme +whichi +instal +public +sparc +run +solari +run +machin +rscheme +command +runschem +machin +run +linux +solari +orani +sever +implement +unix +rscheme +andinstal +machin +free +find +itfrom +donovan +kolbl +home +page +sure +version +later +qing +patch +make +friendlier +fornewbi +us +machin +recommend +gettinggambit +marc +feelei +implement +scheme +free +youcan +scheme +repositori +us +run +window +sure +bestschem +bunch +avail +scheme +repositori +scheme +guil +standard +scheme +might +gambit +mark +feelei +mzscheme +rice +someth +besid +rscheme +recommend +get +meroon +version +object +system +scheme +repositori +start +doingobject +orient +program +assign +tous +rscheme +unix +take +advantag +rscheme +object +system +thing +abl +meroon +scheme +repositori +scheme +repositori +univers +indiana +lot +freeimplement +scheme +variou +document +scheme +getinterest +learn +scheme +cover +class +place +look +comp +lang +scheme +internet +newsgroup +devot +scheme +comp +lang +scheme diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..d976e049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,188 @@ +fall +comput +program +fall +welcom +homepag +austin +class +taught +adam +ajit +georg +cours +announc +final +surpris +contact +xunnow +make +test +like +solut +homeworksreview +session +slidesth +slide +second +half +semest +pleas +view +onlineif +possibl +print +realli +need +print +file +found +updatedhomework +sourc +filemidterm +solut +webta +inform +weekli +timetableta +section +offic +hour +locat +contact +guana +kumar +natarajan +tuesdai +thursdai +station +utexa +eduxun +feng +tuesdai +thursdai +station +xfeng +utexa +detail +weekli +time +tabl +cours +cours +guid +new +group +class +new +utexa +class +note +class +homework +also +tip +postscript +file +homework +fridai +file +homework +homework +file +download +file +homework +homework +solut +score +homework +solut +homework +solut +model +solut +homework +requir +wordlist +linux +provid +warren +wang +wwang +utexa +note +midterm +test +note +review +session +answer +exercis +kumar +note +review +session +model +solut +made +modif +mondai +afternoon +postscript +file +model +solut +ajit +postscript +file +postscript +file +midterm +solut +homework +file +decimalinteg +decimalinteg +hexinteg +hexinteg +octalinteg +octalinteg +slide +section +postscript +format +slide +section +postscript +format +homework +assign +postscript +format +kumar +review +session +review +session +homework +solut +homework +solut +kumar +homework +solut +homework +solut +kumar +homework +solut +maintain +feng +xfeng +utexa +edudepart +comput +sciencesunivers +texa +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..cfb68325 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,75 @@ +comput +system +architectur +fall +instructor +herb +schwetman +mesquit +softwar +offic +hour +class +appointmentcontact +mesquit +offic +hour +station +contact +utexa +yang +yang +offic +hour +station +contact +yangyang +utexa +syllabu +assign +assign +solut +solut +file +print +statist +program +solut +statist +program +statisticsassign +solut +solut +file +print +statist +program +asga +statist +assign +solut +statisticsyour +final +gradesect +section +microsparc +datasheetonlin +resourc +classmat +email +schwetman +ruiliu +yang +yang +postmessag +new +group +pagei +creat +august +comment +welcom +send +email +yangyang +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..720f999b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,105 @@ +csnet +csnet +network +protocol +implement +gener +inform +professor +offic +hour +tuesdai +thursdai +teach +assist +offic +hour +mondai +wensdai +station +class +descript +text +background +read +newsgroup +utexa +class +prerequisit +grade +refer +draft +multicast +rout +protocol +texa +internet +platform +netsim +corejava +class +mobil +support +specif +address +alloc +manag +tutori +draft +http +http +digest +access +authent +newsgroup +comp +protocol +java +schedul +present +schedul +individu +paper +handout +handout +handout +handout +handout +handout +handout +handout +handout +handout +handout +handout +handout +homework +turn +homework +homework +sampl +solut +homework +homework +homework +group +project +project +info +group +project +configur +file +group +project +pleas +read +note +first +fengyufeng +utexa +edufing +public +ring +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..ba9471e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,127 @@ +home +pageclick +help +comput +techniqu +spring +quarterwelcom +home +page +world +wide +hypermedia +document +whichcontain +bounti +inform +class +keep +mind +thatthi +document +static +inform +addedfrequ +problem +document +send +mail +weld +click +highlight +item +inform +class +personnel +professor +cours +syllabu +read +assign +homework +polici +comput +announc +check +regularli +last +chang +handout +lectur +note +homework +assign +gradesoth +us +link +offici +mathematica +page +mvi +home +page +visitor +room +schedul +comput +scienc +engin +depart +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +mosaic +help +help +avail +follow +topic +basic +inform +mosaic +inform +hypertext +markup +languag +html +uniform +resourc +locat +read +home +usinglynx +charact +base +browserport +engr +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accur +quotedand +duli +credit +engr +copyright +departmentof +comput +scienc +engin +univers +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..88e187f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,208 @@ +engr +home +page +autumn +engr +comput +program +dugan +martin +tompa +autumn +welcom +home +page +world +wide +short +hypermedia +documentfor +engr +contain +bounti +inform +theclass +keep +mind +document +static +newinform +especi +class +messag +ad +frequent +problem +document +send +mail +webmast +click +highlight +item +inform +click +help +class +messag +check +class +messag +frequent +last +updat +cours +syllabu +offic +hour +staff +lectur +slide +homework +midterm +exam +studi +guid +exam +solut +final +exam +studi +guid +exam +solut +tip +mac +think +think +debugg +netscap +help +bulletin +board +comput +home +textbook +code +refer +final +exam +regularli +schedul +time +place +special +demo +raini +activ +cours +web +earlier +quarter +intact +less +notic +problem +instanc +link +work +pleas +send +mail +webmast +washington +might +like +assign +test +like +us +previous +autumn +winter +spring +summer +search +autumn +page +previou +quarter +tip +miscellan +info +raini +activ +case +insensit +match +whole +word +inform +avail +univers +washington +comput +scienc +engin +depart +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +relat +cours +major +nonmajor +consid +take +preview +netscap +help +run +netscap +find +help +itemsund +balloon +help +menu +particular +onlin +handbook +portion +engr +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accur +quotedand +duli +credit +engr +copyright +departmentof +comput +scienc +engin +univers +washington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..7f9b9dd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,145 @@ +engr +home +page +autumn +engr +comput +program +martin +dickei +richard +ladner +autumn +welcom +home +page +world +wide +short +hypermediadocu +engr +contain +bounti +informationabout +class +click +highlight +item +moreinform +class +messag +check +frequent +cours +syllabu +sort +schedulesth +week +glanceweek +week +activ +schedulecomput +lab +includinglab +hour +watch +hour +chang +staff +includ +instructor +andta +offic +hour +lectur +slide +lectur +slide +audiofrom +summer +homework +examin +midtermand +final +originallyschedul +place +time +studi +guid +test +place +andtim +kind +tip +usingth +compil +macintosh +user +comput +home +textbook +code +refer +tutori +special +demo +cours +web +earlier +quarter +less +intactand +invit +brows +notic +problem +forinst +link +work +pleas +send +mail +webmast +might +like +look +assign +test +andth +like +us +previous +autumn +winter +spring +summer +inform +avail +univers +washington +comput +scienc +engin +depart +comput +scienc +degre +program +colleg +art +scienc +comput +engin +degre +program +colleg +engin +andrel +cours +major +nonmajor +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..fd771ea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,84 @@ +autumn +discret +structur +autumn +instructorpaul +beam +beam +washington +edulectur +offic +sieg +phone +offic +hour +thursdai +appoint +teach +assistantjonathan +nowitz +nowitz +washington +edusect +thursdai +johnson +section +thursdai +loew +offic +hour +tuesdai +sieg +wednesdai +sieg +handout +syllabu +induct +recurs +defin +set +postscript +acrobat +acrobat +reader +part +ofyour +browser +midterm +wednesdai +novemb +class +sampl +question +midterm +homework +assign +assign +assign +assign +assign +assign +assign +assign +previou +cours +web +fall +karp +spring +ruzzo +winter +leveson +spring +beam +comput +scienc +engin +depart +univers +washington +seattl +beam +washington +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..d2da2849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,187 @@ +winter +intro +formal +model +winter +richard +ladnerclass +messag +check +email +frequent +last +updat +handout +cours +syllabu +latex +handout +latex +construct +regular +grammar +regular +express +midterm +extra +latex +rambl +regard +question +exam +handout +latex +construc +pars +grammar +handout +latex +review +final +exam +handout +latex +proof +halt +problem +undecidableexam +midterm +exam +latex +solut +latex +final +exam +latex +homework +homework +latex +solut +latex +homework +latex +solut +latex +homework +latex +solut +latex +comment +notat +homework +latex +solut +latex +homework +latex +solut +latex +homework +latex +solut +text +latex +text +state +diagram +homework +latex +solut +text +latex +text +state +diagram +homework +latex +solut +text +latex +text +state +diagram +homework +latex +solut +latex +homework +latex +solut +latex +text +file +format +cours +materi +provid +three +format +html +hypertext +markup +languag +html +document +format +handl +browser +fact +current +look +view +html +document +mani +handout +provid +origin +latex +document +convert +html +us +latexhtml +strang +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +postscript +ghostscript +home +page +free +viewer +window +linux +web +previou +quarter +autumn +winter +autumn +autumn +ladner +washington +edufix +washington +last +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..04023f79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,70 @@ +introduct +formal +model +fall +intro +formal +model +fall +ann +condon +welcom +home +page +check +page +regularli +findhomework +solut +set +pointer +upcom +lectur +exam +class +messag +messag +sent +class +mail +list +willb +log +subscrib +mail +list +send +mail +majordomo +includ +messag +subscrib +userid +check +email +frequent +last +updat +homework +handout +upcom +lectur +list +content +previou +lectur +web +previou +quarter +winter +autumn +winter +autumn +autumn +condon +washington +edukaye +washington +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..5239fd3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,32 @@ +autumn +data +structur +autumn +martin +tompaclass +messag +check +mail +frequent +last +updat +cours +informationlab +technot +unix +questionnaireloc +cdeletemin +algorithm +treeshomework +web +previou +quarter +winter +spring +autumn +winter +spring +request +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..5dddebab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,69 @@ +spring +home +pagecs +spring +data +structuresrichard +ladner +instructordan +fasulo +teach +assistantthi +world +wide +short +hypermedia +documentfor +contain +inform +theclass +taught +spring +keep +mind +document +static +newinform +especi +class +messag +ad +frequent +click +help +class +messag +check +frequent +offic +hour +suggest +read +project +homework +exam +lectur +overheadsport +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accur +quotedand +duli +credit +copyright +departmentof +comput +scienc +engin +univers +washington +seattl +ladner +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..1ff8cb9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,67 @@ +home +pagecs +program +languagesfal +quarter +current +offer +home +page +autumn +informationth +languag +listinfo +everi +program +languag +program +languag +research +pagehom +page +spring +offer +ofcs +home +page +winter +offer +ofcs +home +page +autumn +offer +ofcs +home +page +spring +offer +ofcs +home +page +winter +offer +ofcs +home +page +depart +computersci +engineeringport +reprint +adapt +academicnonprofit +purpos +provid +sourc +accur +quot +dulycredit +copyright +depart +comput +scienceand +engin +univers +washington +webmast +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..70f89675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,323 @@ +home +pagecs +program +languagesspr +quarter +lectur +section +sieg +sieg +final +exam +review +session +mondai +june +sieg +tuesdai +june +sieg +final +exam +thursdai +june +instructor +steve +hanks +mail +hank +washington +eduoffic +sieg +offic +hour +dave +grove +mail +grove +washington +eduoffic +sieg +offic +hour +administr +cours +syllabu +cours +overviewcours +newsgroup +help +documentsgeneralintroduct +new +us +netscap +last +updat +us +unix +last +updat +us +turnin +electron +submiss +homework +assign +last +updat +us +emac +last +updat +emac +clip +save +last +updat +lisp +relatedrun +lisp +emac +last +updat +reason +dave +grove +thing +last +updat +smalltalk +relat +us +smalltalk +last +updat +assign +lisp +assign +html +postscript +last +updat +lisp +assign +html +postscript +sampl +solut +last +updat +lisp +assign +html +postscript +last +updat +smalltalk +assign +done +section +html +postscript +last +updat +smalltalk +assign +html +postscript +last +updat +smalltalk +assign +html +postscript +last +updat +prolog +assign +assign +hand +html +postscript +last +updat +prolog +assign +assign +html +miss +figur +postscript +includ +figur +partial +solut +last +updat +quiz +test +lisp +quiz +htmlpostscript +sampl +solut +smalltalk +quiz +htmlpostscript +sampl +solut +daili +class +note +note +code +transcript +march +note +code +transcript +march +note +code +transcript +suggest +read +april +note +code +transcript +suggest +read +april +note +code +transcript +suggest +read +april +note +code +transcript +suggest +read +april +note +code +transcript +suggest +read +april +note +code +april +note +code +transcript +suggest +read +april +note +code +transcript +suggest +read +april +note +april +note +april +smalltalk +transcipt +note +april +smalltalk +transcipt +note +april +smalltalk +transcipt +note +april +smalltalk +transcipt +note +suggest +smalltalk +read +note +note +note +full +note +interfac +build +note +code +prolog +databas +method +note +method +code +method +note +method +code +suggest +prolog +read +prolog +code +employe +informationth +languag +listinfo +everi +program +languag +program +languag +research +pagehom +page +winter +offer +ofcs +home +page +autumn +offer +ofcs +home +page +spring +offer +ofcs +home +page +winter +offer +ofcs +home +page +depart +computersci +engineeringport +reprint +adapt +academicnonprofit +purpos +provid +sourc +accur +quot +dulycredit +copyright +depart +comput +scienceand +engin +univers +washington +webmast +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..fadaab3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,72 @@ +index +page +index +pagecurr +quarterth +current +quarter +previou +quarterscours +web +earlier +quarter +intact +less +younotic +problem +instanc +link +work +pleas +send +mail +webmast +washington +spring +inform +avail +univers +washington +depart +comput +scienc +engin +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +copyright +depart +comput +scienc +engin +univers +washington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..ead133ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,179 @@ +home +page +autumn +introduct +digit +design +autumn +quarter +gaetano +borriello +corei +andersonwelcom +home +page +home +page +contain +whole +bunch +us +inform +class +keep +mind +document +static +inform +especi +class +announc +messag +ad +frequent +problem +document +gener +send +mail +tocs +webmast +class +announc +notic +instructor +system +administr +last +updat +class +mail +archiv +messagess +washington +last +updat +send +mail +class +instructor +instructor +everyon +cours +administr +goal +syllabu +meet +time +lectur +final +exam +mondai +decemb +workload +grade +expect +laboratori +softwar +tool +polici +collabor +cheat +announc +mail +address +overal +schedul +lectur +topic +instructor +gaetano +borriello +gaetano +offic +hour +sieg +corei +anderson +corin +offic +hour +sieg +aweekli +assign +weekli +quizz +final +exam +lectur +onlin +version +slide +us +lectur +textbook +contemporari +logic +design +katz +benjamin +cum +addison +weslei +maintain +author +katz +maintain +publish +benjamin +cum +addison +weslei +note +topic +interest +evolut +implement +technolog +comput +aid +design +tool +logic +design +synario +feedback +tell +think +thing +go +even +anonym +desir +link +previou +quarter +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +copyright +depart +comput +scienc +engin +univers +washington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..1e34b575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,95 @@ +cours +pagecs +data +structur +algorithmsautumn +basic +inform +instructor +steve +tanimoto +tanimoto +washington +sieg +hall +room +offic +hour +appoint +teach +assist +anhai +doan +offic +hour +announc +place +dai +time +smith +comput +facil +unix +account +mscc +option +student +languag +requir +lisp +option +textbook +shaffer +practic +introduct +data +structur +algorithm +analysi +publish +summer +prentic +hall +grade +breakdown +tent +assign +assign +midterm +project +final +late +polici +keep +grade +manag +encourag +punctual +work +point +deduct +late +assign +assign +penalti +schedul +updat +inform +aboutth +project +topic +studi +midterm +examinform +final +exambas +inform +us +compilerassignmentssolut +assignmentsteach +assist +informationscheduleweb +previou +offer +winter +autumn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..a0f00852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,57 @@ +home +pagecs +data +structur +algorithmsspr +instructor +alistair +holden +holden +washington +jonathan +nowitz +nowitz +washington +class +messag +last +updat +mondai +cours +materi +comput +syllabu +homework +demo +exam +inform +depart +comput +scienc +engin +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +relat +cours +major +major +mosaic +help +interest +page +raini +funnowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..24964a46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,97 @@ +cours +pagecs +data +structur +algorithmswint +meet +time +meet +place +sieg +instructor +linda +shapirooffic +siegtelephon +email +shapiro +washington +eduoffic +hour +denis +pinneloffic +siegtelephon +email +denisep +washington +eduoffic +hour +syllabustransparencieshomework +assignmentshomework +answer +homework +homework +homework +program +assignmentsprogram +assign +test +file +test +file +test +file +test +file +program +assign +note +data +set +follow +line +begin +indic +insert +tree +next +enclos +inquot +state +assign +charact +long +next +come +integ +indic +length +associatedvalu +final +string +valu +also +enclos +quot +linebegin +indic +find +search +isfollow +search +enclos +quot +test +file +test +file +test +file +program +assign +object +model +graphimag +graphreview +listsfin +studi +sheet diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..81d5b2d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,27 @@ +compil +classhomethi +world +wide +hypermedia +documentfor +contain +inform +class +keep +inmind +document +static +inform +willb +ad +frequent +urgent +announc +assign +onlin +class +meet +admin +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..2390eaed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,467 @@ +home +pagecs +experiment +boe +softwar +engin +project +cours +professor +nanci +leveson +offic +sieg +phone +hour +appoint +mail +leveson +washington +educours +descriptioninstruct +object +teach +terminolog +concept +softwar +engin +teach +fundament +softwar +project +manag +work +team +provid +experi +real +industri +softwar +engin +project +teach +oral +written +commun +skill +produc +portfolio +cours +studi +concept +method +tool +specif +design +construct +test +analysi +document +larg +softwaresystem +includ +also +technic +topic +essenti +tocreat +complex +softwar +system +successfulli +project +manag +effectiveor +written +commun +group +interact +latter +topicsar +industri +feedback +sai +import +employersand +often +lack +graduat +experiment +version +last +quarter +realbo +project +exampl +particip +work +largegroup +sever +reason +try +approach +first +isthat +quarter +short +realist +project +student +cannotlearn +enough +softwar +engin +class +session +devotedto +discuss +organ +project +regular +class +thegroup +project +also +usual +learn +experi +hard +isto +work +group +enough +learn +work +effectivelytogeth +group +head +instructor +disast +beavoid +experi +provid +correct +worktogeth +addit +student +experi +requirementsanalysi +real +softwar +develop +possibl +thenorm +class +set +quarter +student +portfolio +areal +softwar +engin +project +search +exampl +portfolio +done +softwar +engineeringinstitut +master +softwar +engin +program +providedat +first +class +meet +class +student +assign +specif +role +playthat +allow +experi +leadership +posit +attach +listof +role +howev +everyon +particip +phase +projectso +learn +part +instructor +theproject +manag +outlin +topic +natur +qualiti +principl +softwar +softwar +engin +manag +softwar +project +process +model +work +group +project +plan +risk +assess +cost +estim +metric +requir +analysi +specif +softwar +design +verif +valid +test +analysi +configur +manag +review +mainten +evolut +reus +ethic +profession +softwar +engin +embed +system +safeti +role +class +project +student +take +leadership +role +project +howev +everyon +particip +activ +review +ofth +activ +experi +aspect +softwaredevelop +team +addit +class +member +responsiblefor +present +boe +review +meet +project +administr +assist +program +manag +respons +project +plan +control +primari +duti +includ +write +updat +project +manag +plan +track +project +statu +make +sure +proper +meet +held +work +get +done +time +princip +architect +respons +creation +softwar +product +primari +respons +includ +overal +consist +design +hardwar +softwar +platform +issu +transit +plan +design +respons +exist +system +primari +duti +includ +learn +augment +document +necessari +evalu +current +system +design +primari +respons +organ +develop +system +design +configur +secur +manag +respons +chang +control +secur +primari +duti +includ +write +configur +manag +plan +set +configur +manag +system +project +document +control +organ +chang +document +ensur +design +secur +human +factor +specialist +respons +user +interfac +interact +duti +includ +design +human +interfac +evalu +respect +requir +plan +user +survei +interview +boe +employe +qualiti +assur +specialist +respons +overal +qualiti +releas +product +primari +duri +includ +conduct +review +product +ensur +design +requir +characterist +conduct +review +deliver +normal +qualiti +assur +duti +program +manag +respons +mockup +prototyp +primari +duti +provid +expertis +program +languag +implement +organ +develop +prototyp +mockup +handl +technic +issu +document +specialist +respons +appear +clariti +document +creation +user +manual +duti +includ +determin +tool +us +provid +expertis +make +deliver +document +readabl +understand +plan +user +support +system +mainten +reliabl +engin +primari +respons +creat +guid +mainten +deliv +product +reliabl +plan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..ccc451aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,128 @@ +home +pagecs +softwar +engineeringmeet +time +locat +loew +time +mondai +wednesdai +fridai +professor +nanci +leveson +offic +sieg +phone +hour +appoint +mail +leveson +washington +eduta +adam +carlson +offic +sieg +hour +appoint +mail +carlson +washington +educours +descriptionthi +cours +studi +concept +method +tool +specif +design +construct +test +analysi +document +larg +softwar +system +includ +also +technic +topic +essenti +creat +complex +softwar +system +successfulli +includ +project +manag +textbookghezzi +jazayeri +mandrioli +fundament +softwar +engin +prentic +hall +note +note +requir +sampl +requir +interview +question +produc +consum +petri +axiomat +specif +note +note +coupl +cohes +note +assign +assign +assign +assign +assign +assign +link +interest +cours +syllabu +updat +pleas +read +newsgroup +access +machin +cours +send +mail +class +mailinglist +new +comp +risk +militari +standard +defens +system +softwar +develop +inform +avail +winter +spring +winter +comput +scienc +engin +departmentsuggest +feedback +request +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..7663ddfc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,706 @@ +winter +home +pagecs +winter +program +languag +implement +instructor +steve +tanimoto +depart +comput +scienc +engin +univers +washington +seattl +ruth +andersonmeet +tuesdai +thursdai +sieg +except +thursdai +meet +thompson +hall +room +time +schedul +steve +offic +hour +tuesdai +sieg +thursdai +sieg +ruth +offic +hour +mondai +sieg +wednesdai +sieg +cours +mail +list +mail +list +archiv +schedul +tent +schedul +topic +examin +transpar +past +lectur +also +post +number +credit +grade +homework +midterm +exam +final +exam +project +class +particip +hardcopi +read +text +select +section +mani +read +cours +onlin +first +ofread +part +recent +book +lisp +artifici +intellig +sever +read +book +throughout +cours +includ +read +lisp +logic +program +prolog +grammar +andpars +languag +understand +system +shell +expert +system +either +purchas +whole +book +bookstor +approxim +copi +order +combin +get +copi +fromth +copi +center +read +librari +book +entitl +element +artifici +intellig +us +common +lisp +edit +freeman +chapter +introduct +program +incommon +lisp +lisp +glossari +purchasedsepar +copi +center +basement +commun +build +onlin +refer +materi +lisp +lisp +lisp +provid +answer +mani +question +lisp +andit +implement +common +lisp +languag +edit +standard +referenceon +common +lisp +seem +best +access +tabl +contentspag +rather +try +download +entir +html +file +orpostscript +sourc +code +lisp +program +element +artifici +intellig +usingcommon +lisp +edit +site +fordigitool +compani +thatmaintain +support +macintosh +common +lisp +provid +currentinform +interest +link +info +lisp +program +world +wide +applic +onlin +refer +materi +introductionto +program +onlin +tutori +program +anoth +onlin +tutori +websit +thatdoesn +alwai +respond +promptli +sever +tutori +list +onlin +refer +materi +java +java +languag +trail +announc +januari +welcom +cours +cours +cours +program +languag +implement +depart +list +call +languag +compil +although +catalog +call +program +languag +theirimplement +cover +interpret +compil +techniqu +buildingprogram +system +attent +focu +mainli +particular +programminglanguag +lisp +lisp +explor +mani +issu +relev +tointepret +languag +investig +compil +addit +tradit +languag +issu +techniqu +alsolook +current +issu +visual +program +system +programmingfacil +world +wide +januari +facil +quarter +student +mscc +macintosh +run +macintosh +common +lisp +mscc +unix +system +mscc +themathemat +scienc +comput +center +mscc +mac +locat +thebas +thompson +hall +mac +room +room +mac +offer +varieti +languag +mathematica +mscc +unix +host +offer +yacc +allegrocommon +lisp +softwar +facil +macintosh +common +lisp +provid +particularli +powerfulenviron +includ +full +implement +common +lisp +standard +integr +editor +call +fred +extens +facil +graphicsand +user +interfac +construct +macintosh +comput +network +file +easili +transfer +unix +host +machinesof +mscc +student +difficulti +get +macintosh +inthompson +hall +supplement +macintosh +work +work +theirown +us +packag +xlisp +stat +window +micro +emac +howev +limit +abil +support +altern +facil +student +wish +work +need +access +theseresourc +archiv +internet +also +note +thatxlisp +stat +bare +bone +implement +common +lisp +nothav +extens +program +develop +support +fortun +power +system +cours +disadvantag +us +must +mscc +labunless +purchas +copi +fromdigitool +normal +cost +copi +special +student +dealallow +student +purchas +point +free +version +allegro +common +lispfor +window +download +franz +attract +tool +student +final +examin +final +exam +given +mondai +march +sieg +close +book +test +januari +note +thursdai +januari +meet +inthompson +hall +room +introduc +thelaboratori +facil +cours +thompson +thistim +instead +go +regular +classroom +januari +week +ruth +wednesdai +offic +hour +move +thursdai +sieg +januari +beginn +guid +html +might +help +assign +januari +post +messag +sent +class +new +group +regardingread +file +path +januari +remind +pleas +email +assign +ruth +todai +even +turn +hard +copi +class +click +info +email +file +mac +januari +assign +pleas +turn +printout +token +program +sever +exampl +also +pleas +email +copi +token +ruth +februari +midterm +examin +onthursdai +februari +cover +topic +option +review +session +midterm +exam +review +session +held +mondai +insieg +hall +room +pleas +bring +question +materi +becov +homework +help +click +info +convert +string +number +symbol +check +mail +archiv +import +messag +archiv +found +page +send +email +receiv +mail +mail +list +februari +note +modif +assign +page +deadlin +announcedearli +email +plu +onlin +read +materi +part +februari +free +postscript +viewer +window +avail +februari +pictur +koch +snowflak +march +detail +complet +project +given +projectgener +descript +page +check +find +aboutdemonstr +writeup +review +session +final +exam +schedul +fridai +march +sieg +march +final +examin +onmondai +march +cover +topic +part +exam +multipl +choic +format +bring +mark +sens +form +pencil +exam +close +book +assign +tuesdai +januari +assign +tuesdai +januari +assign +tuesdai +januari +solut +part +part +assign +tuesdai +januari +solut +exercisestokenizerassign +part +tuesdai +februari +andpart +thursdai +februari +part +solut +parsertokenizerpart +solut +koch +snowflakeassign +tuesdai +march +java +tutori +local +copi +assign +help +project +gener +descript +includ +info +ondemonstr +writeup +project +mileston +februari +project +mileston +februari +file +show +displai +text +window +project +demonstr +schedul +thursdai +march +project +writeup +fridai +march +turn +review +session +final +exam +tanimoto +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..cad2781a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,112 @@ +introduct +artifici +intelligencecs +introduct +artifici +intellig +spring +professor +alistair +holden +holden +csoffic +offic +hour +noon +noonta +joshua +redston +redston +msoffic +thompson +offic +hour +mondai +hour +mondai +thursdai +text +rich +knight +artifici +intellig +secondedit +touretzki +common +lisp +gentl +introduct +symboliccomput +gener +inform +basic +comput +inform +cours +outlin +inform +turnin +instruct +project +first +program +assign +april +solut +program +project +first +homework +assign +april +second +homework +assign +april +second +program +assign +april +solut +second +program +assign +third +program +assign +sampl +solut +third +program +assign +final +program +assign +june +final +exam +inform +introduct +us +allegro +emacsinterfac +postscript +emac +refcard +postscript +allegro +emac +interfac +refcard +postscript +instruct +download +standalonelisp +macintosh +note +check +grade +record +type +gradesredston +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..57cb73b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,105 @@ +winter +intro +algorithm +winter +larri +ruzzo +martin +tompaclass +messag +check +email +frequent +last +updat +text +book +errata +list +handout +latex +syllabu +midtem +solut +acrobat +syllabu +midtem +solut +postscript +syllabu +midtem +solut +homework +latex +acrobat +postscript +web +previou +quarter +winter +karlin +file +format +thecours +materi +provid +three +format +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +adob +acrobat +latest +greatest +free +viewer +adob +acrobat +page +postscript +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +ruzzo +tompa +aberman +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..81b29fc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,139 @@ +intro +theori +comput +spring +introduct +theori +comput +larri +ruzzo +spring +gener +inform +instructor +larri +ruzzo +jayram +thathachar +meet +time +instructor +offic +hour +tent +sieg +offic +hour +sieg +welcom +home +page +problem +document +sendmail +jayram +washington +class +mail +last +updat +messag +sent +class +mail +list +washington +textbook +errata +handout +admin +problem +solut +everyth +latex +sourc +syllabu +midterm +final +midterm +final +everyth +acrobat +syllabu +midterm +final +midterm +final +everyth +postscript +syllabu +midterm +final +midterm +final +everyth +file +format +thecours +materi +provid +three +format +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +adob +acrobat +latest +greatest +free +viewer +adob +acrobat +page +postscript +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +cours +web +spring +spring +jayram +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..1ed64194 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,60 @@ +introduct +databas +systemscs +introduct +databas +systemsfal +quarter +instructor +prof +linda +shapiro +shapiro +offic +sieg +telephon +offic +hour +patrick +crowlei +pcrowlei +offic +hour +sieg +announc +syllabu +assign +homework +word +shift +left +click +link +save +potenti +us +link +unisql +home +page +qbic +queri +imag +content +imag +databas +manag +system +link +dbm +page +probabl +interest +want +know +window +back +cours +webcs +request +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..46624821 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,111 @@ +home +page +introduct +oper +system +autumn +instructor +brian +bershad +instructor +washington +lectur +offic +hour +sung +choi +washington +section +section +offic +hour +sieg +appoint +cours +intro +brian +bershad +cours +adminth +class +outlin +administr +info +textbook +grade +andoth +word +wisdom +cours +messag +mail +sent +archiv +midterm +solut +scale +solut +first +midterm +lectur +schedulewhat +cover +schedul +aggress +andwil +updat +regularli +reflect +actual +pace +lectur +note +handout +slide +lectur +projectsdescript +project +relat +project +materi +solutionsto +project +project +solut +avail +project +avail +section +notesnot +materi +section +watchthi +space +carefulli +inform +vital +surviv +andgrad +project +hint +appear +person +page +person +home +page +receiv +feedback +onproject +feedback +page +send +feedback +instructor +anonym +wish +page +lost +click +webmast +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..2d77309b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,110 @@ +home +page +autumn +quarterwelcom +world +wide +hypermedia +document +contain +bounti +inform +class +keepin +mind +document +mean +static +informationwil +ad +frequent +problem +thisdocu +send +mail +pighin +click +help +classpersonnel +professor +student +cours +syllabuscours +calendarta +offic +hourshandout +assignmentslectur +notesread +assignmentshomework +assignmentsprojectsproject +handoutsproject +artifactsproject +help +sessionsproject +grade +policyproject +write +upslibui +documentationoth +cours +relat +informationget +classhearn +baker +erratath +instruct +labus +indi +guid +opengl +exampl +program +spring +quarter +home +page +autumn +quarter +home +page +spring +quarter +home +page +winter +quarter +home +pagegraph +linkssgi +silicon +surfgrafica +obscurasiggraphgrailgraph +site +indexoth +us +linksmvi +home +page +visitor +room +schedul +comput +scienc +engin +departmentth +comput +scienc +degre +programth +comput +engin +degre +programweb +helpbas +helpmosa +netscap +lynxus +netscap +indyspighin +washington +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..fbfae5c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,137 @@ +autumn +intro +comput +commun +networksautumn +instructor +arun +somani +somani +cslectur +offic +sieg +eebphon +offic +hour +jari +kristensen +jari +csoffic +offic +hour +offic +hour +chang +tomatch +prof +somani +offic +hour +thu +cover +everi +week +andprovid +larger +timewindow +consult +class +messag +check +email +frequent +lectur +overheadshomeworksprojectsinterest +stuffattentionif +would +like +request +academ +accommod +disabl +pleasecontact +disabl +student +servic +schmitz +havea +letter +disabl +student +servic +indic +disabl +requiresacadem +accommod +pleas +present +letter +discuss +accommod +might +need +class +file +format +cours +materi +provid +three +format +html +hypertext +markup +languag +html +document +format +handl +browser +fact +current +look +view +html +document +mani +handout +provid +origin +latex +document +convert +html +us +latexhtml +strang +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +postscript +ghostscript +home +page +free +viewer +window +linux +jari +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..6bb9fece --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,97 @@ +spring +intro +comput +commun +networksspr +instructor +richard +ladner +ladner +cslectur +sieg +offic +sieg +phone +offic +hour +noonta +william +chan +wchan +csoffic +hour +sieg +sieg +class +messag +check +email +frequent +lectur +overheadshomeworksprojectsabout +file +format +cours +materi +provid +three +format +html +hypertext +markup +languag +html +document +format +handl +browser +fact +current +look +view +html +document +mani +handout +provid +origin +latex +document +convert +html +us +latexhtml +strang +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +postscript +ghostscript +home +page +free +viewer +window +linux +ladner +washington +eduwchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..45854090 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,11 @@ +home +pagecs +advanc +digit +designt +kehl +fall +page +fall +found +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..7579ec80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,159 @@ +home +pagecs +advanc +digit +designt +kehl +fall +welcom +home +page +cours +inform +time +place +johnson +import +announc +last +updat +summari +syllabusschedul +polici +hour +staff +kehl +instructor +offic +mark +savoi +tue +savac +richard +chinn +thur +richin +howard +chang +gener +shchang +csjason +aaron +scott +stephen +hardwar +laboratori +manag +offic +hour +student +work +group +final +exam +review +topic +cover +quarterhomework +assignmentsweb +page +march +homework +homework +duehomework +fridai +homework +fridai +march +writeup +februari +midnight +march +written +februari +midnight +assign +final +project +sampl +abel +state +machin +sampl +test +fixtur +option +option +optionlab +optionlab +handout +combin +logic +combin +logic +sequenti +logic +sequenti +logic +fpga +fpga +memori +communicationoth +inform +depart +comput +scienc +engin +home +page +murphi +recent +dilbert +comic +collect +resourc +care +gaetano +borriello +list +vlsi +link +comprehensivelist +icmanufactur +nation +semiconductor +data +sheet +motorola +data +book +server +philip +semiconduct +data +book +server +micron +technolog +data +sheetsth +copyright +depart +comput +scienc +engin +univers +washington +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..ca83193b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,217 @@ +comput +design +organ +comput +design +organ +gener +inform +meet +loew +instructor +larri +snyder +offic +hour +appoint +mail +address +snyder +offic +sieg +assist +judi +watson +jwatson +sieg +robert +chenoffic +hour +sieg +tuesdai +thursdays +mail +address +chensg +catalog +descript +instruct +address +model +structur +function +comput +arithmet +logic +unit +regist +transfer +level +design +hardwar +microprogram +control +memori +hierarchi +design +andorgan +system +compon +interconnect +laboratoryproject +involv +design +simul +instruct +setprocessor +prerequisit +class +note +mondai +postscript +read +wednesdai +postscript +read +fridai +postscript +read +mondai +postscript +review +sheet +answer +sheet +wednesdai +postscript +fridai +postscript +mondai +postscript +homework +html +read +skim +appendix +wednesdai +postscript +fridai +color +postscript +read +mondai +postscript +read +wednesdai +postscript +homework +read +fridai +postscript +read +mondai +postscript +homework +read +skim +chap +wednesdai +postscript +fridai +postscript +mondai +revis +review +wednesdai +postscript +review +answer +fridai +midterm +fast +answer +holidai +mondai +wednesdai +postscript +homework +read +fridai +postscript +mondai +postscript +wednesdai +postscript +homework +fridai +postscript +mondai +postscript +wednesdai +postscript +holidai +fridai +postscript +mondai +postscript +wednesdai +postscript +fridai +postscript +mondai +postscript +wednesdai +postscript +materi +follow +file +avail +verilog +pipelin +design +verilog +simul +mip +pipelin +pipelin +addit +modul +pipelin +design +common +sampl +program +sourc +form +test +sampl +program +program +segment +binari +prog +sampl +program +data +segment +binari +data +simpl +assembl +mip +assembl +languag +page +assembl +previou +quarter +fall +fall +verilog +referencesthi +free +postscript +verilog +refer +card +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..b2be21cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,424 @@ +spring +home +pagecs +spring +introduct +artifici +intellig +instructor +steve +tanimoto +depart +comput +scienc +engin +univers +washington +seattl +jeremi +baermeet +mondai +wednesdai +fridai +sieg +hall +room +steve +offic +hour +tuesdai +sieg +wednesdai +sieg +jeremi +offic +hour +wednesdai +sieg +thursdai +sieg +cours +newsgroup +newsgroup +cours +creat +newsgroup +access +machin +messag +newsgroup +post +send +mail +post +csor +us +new +interfac +pnew +cours +mail +list +mail +list +archiv +implement +schedul +tent +schedul +topic +number +credit +grade +homework +midterm +exam +final +exam +project +class +particip +hardcopi +read +requir +text +element +artifici +intellig +us +common +lisp +edit +freeman +work +hard +creat +textbook +self +contain +purchas +separ +book +lisp +separ +book +exampl +program +youdon +purchas +separ +book +mathemat +theori +onlin +refer +materi +lisp +lisp +lisp +provid +answer +mani +question +lisp +andit +implement +common +lisp +languag +edit +standard +referenceon +common +lisp +seem +best +access +tabl +contentspag +rather +try +download +entir +html +file +orpostscript +sourc +code +lisp +program +element +artifici +intellig +usingcommon +lisp +edit +interest +link +info +lisp +forprogram +world +wide +applic +facil +order +take +advantag +allegro +common +lisp +windowsimplement +lisp +excel +featur +programdevelop +interfac +construct +us +theintel +pentium +laboratori +sieg +hall +free +version +allegro +common +lisp +isfor +window +window +bedownload +franz +kind +comput +home +least +us +attract +option +final +examin +final +exam +givenaccord +schedul +final +examin +close +book +multipl +choic +test +bring +mark +sens +form +test +alist +topic +know +final +announc +march +welcom +cours +cours +cours +artifici +intellig +coversboth +mathemat +theori +implementationof +techniqu +lisp +topic +includ +lisp +programmingtechniqu +knowledg +represent +search +logicalreason +probabilist +reason +case +base +reason +plan +learn +languag +understand +vision +neural +net +expert +system +march +read +clo +link +assign +page +cours +newsgroup +staff +newsgroup +cours +creat +newsgroup +access +machin +messag +newsgroup +post +send +mail +post +csor +us +new +interfac +pnew +april +term +examin +given +wednesdai +april +review +session +midterm +exam +review +held +tuesdai +april +inour +regular +meet +room +april +note +assign +continu +programmingpart +assign +midterm +mondai +april +turn +paper +follow +solut +part +exercis +state +represent +part +screenshot +user +interfac +descript +ofhow +go +gener +move +search +program +current +statu +program +workload +reduct +propos +circul +email +approv +schedul +rest +term +fridai +lectur +neural +net +preliminari +demo +ofproject +either +give +preliminari +demo +right +class +orturn +page +progress +report +class +mondai +memori +holidai +class +tuesdai +review +session +final +exam +insieg +wednesdai +lectur +expert +system +explan +peer +evalu +system +project +fridai +wrap +demonstr +project +peer +evaluationof +project +june +tuesdai +final +exam +sieg +bring +mark +sens +form +test +assignmentsassign +fridai +march +class +assign +fridai +april +class +assign +fridai +april +class +assign +mondai +april +class +assign +mondai +april +class +assign +mondai +class +project +portion +class +tanimoto +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..67f86d2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,107 @@ +home +pagecs +digit +system +designstev +burn +spring +welcom +home +page +cours +inform +time +place +loew +import +announc +last +updat +summari +syllabu +schedul +polici +hour +staff +steve +burn +instructor +kent +smith +casei +anderson +stephen +hardwar +laboratori +manag +offic +hour +studentslab +assign +mchc +info +nice +introduct +fred +martinrobot +societi +seattl +robot +societi +portland +area +robot +societyoth +inform +depart +comput +scienc +engin +home +page +murphi +recent +dilbert +comic +collect +resourc +care +gaetano +borriello +comprehens +list +sourc +nation +semiconductor +data +sheet +motorola +data +book +server +philip +semiconduct +data +book +serverth +copyright +depart +comput +scienc +engin +univers +washington +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +burn +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..03ebf136 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,37 @@ +home +pagewelcom +cours +contain +bevi +inform +relatingto +cours +usual +document +frequentlychang +send +mail +bswest +csif +encount +problem +classpersonnelsyllabuslectur +scheduleguest +lectur +scheduleoffic +hoursproject +project +handout +schedul +project +help +session +final +projectoth +inform +bug +erratarefer +pagesmidterm +questionnairebswest +washington +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..74b75980 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,152 @@ +home +pagecs +implement +program +languageswint +quarter +import +cours +informationmeet +time +instructor +craigchamb +chamber +offic +hour +start +second +week +sieg +vass +litvinov +vass +offic +hour +sieg +cubicl +floor +archivesslid +lectur +avail +handout +avail +read +assign +avail +full +read +list +class +avail +homework +assign +avail +messag +sent +mail +list +archivedher +last +year +midterm +exam +answer +avail +last +year +final +exam +answer +avail +note +test +closedbook +affect +kind +question +wereask +year +midterm +exam +avail +sampl +solut +year +final +exam +answer +avail +cecil +vortex +informationhandout +tutorialsth +cecil +languag +tutorialth +vortex +compil +tutorialhow +vortex +front +enda +list +compil +file +interestdead +assign +elim +cecil +simpl +exampl +idfacfg +travers +interfac +cecil +travers +frameworkvortex +textual +descript +grammarcecil +refer +documentationdocument +avail +html +postscript +format +cecil +languag +refer +manual +postscript +version +vortex +compil +postscript +version +cecil +standard +librari +refer +manual +postscript +version +resourcesth +previou +quarter +page +includ +slide +avail +inform +compil +languag +research +found +onmark +leon +program +languag +research +page +chamber +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..d05801ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,94 @@ +home +pagecs +implement +program +languagesimport +cours +informationmeet +time +sieg +instructor +craig +chamber +chamber +offic +hour +sieg +jeff +dean +jdean +dave +grove +grove +offic +hour +come +find +chateau +chateau +confer +room +archivesslid +lectur +avail +slide +cecil +languag +turori +avail +handout +avail +read +assign +avail +full +read +list +class +avail +messag +sent +mail +list +archiv +projectth +cours +project +implement +sort +program +analysi +andtransform +vortex +compil +vortex +optimizingcompil +object +orient +languag +written +cecil +inform +vortex +compil +found +cecilproject +home +page +cecil +manual +found +resourcesmor +inform +compil +languag +research +found +onmark +leon +program +languag +research +page +jdean +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..39ddf80c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,17 @@ +home +pagecs +softwar +engineeringdavid +notkin +spring +introductori +handout +kwic +assign +assign +assign +assign +assign +sampl +projectsnotkin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..0b21f14c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,123 @@ +home +pagecs +concept +program +languagesautumn +loew +instructor +david +notkin +notkin +offic +hour +sieg +byappoint +kurt +partridg +kepart +offic +hour +floor +cubiclescours +handout +introductori +handout +html +postscript +assign +readingsmail +list +thread +archivesw +us +mail +list +administr +instructionalpurpos +wish +refer +previous +sent +messag +thread +mail +list +archiv +send +mail +list +washington +subscrib +list +send +emailto +majordomo +washington +singl +line +messag +bodi +subject +subscrib +csegener +program +languag +research +resourc +yahoo +page +mark +leon +excel +program +languag +research +pagesprogram +languag +critiquesgari +leaven +languag +self +studi +pagefunct +languag +resourcesmit +scheme +home +pagecmu +standard +pagea +gentl +introduct +mlhaskel +page +lambda +calculu +washington +univers +lambda +calculu +page +monash +universityobject +orient +languag +resourc +univers +geneva +object +orient +program +info +page +cecil +project +dylan +page +carnegi +mellon +appl +comput +question +send +mail diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..c7aa03cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,174 @@ +design +analysi +algorithmscs +design +analysi +algorithm +winter +instructor +richard +anderson +anderson +washington +lectur +seig +offic +hour +mondai +time +appoint +teach +assist +william +chan +wchan +washington +offic +hour +mondai +wednesdai +chateau +confer +room +sieg +floor +cubicl +somebodi +els +us +confer +room +cours +inform +prerequisit +go +assum +alreadi +undergradu +cours +algorithm +wrong +know +soon +possibl +lectur +suggest +readingtextbook +errata +list +project +realli +project +preview +check +outer +anderson +sapplet +assign +handout +written +homework +set +gener +tuesdai +class +background +quiz +post +script +homework +set +homework +solut +homework +solut +homework +solut +homework +solut +homework +solut +homework +homework +solut +homework +homework +midterm +exam +cancel +lack +interest +final +exam +told +mondai +march +probabl +verifi +time +exam +hour +close +book +class +exam +cover +materi +class +exam +willconsist +short +answer +problem +solv +question +bureaucrat +stuffgrad +base +upon +homework +exam +project +class +particip +work +togeth +homework +okai +discuss +homeworkproblem +classmat +must +write +solut +upindepend +gilligan +island +rule +could +invok +betweenani +discuss +homework +write +solut +mustwatch +least +half +hour +gilligan +island +theori +thatan +episod +gilligan +equival +reboot +anyth +thatsurv +learn +understood +anderson +washington +eduwchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..19d72637 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,315 @@ +parallel +algorithmscs +parallel +algorithm +spring +gener +inform +meet +sieg +instructor +richard +anderson +offic +hour +appointment +mail +address +anderson +offic +sieg +homework +exam +catalog +descript +design +analysi +parallel +algorithm +fundament +parallel +algorithmsfor +sort +arithmet +matrix +graph +problem +addit +select +topic +emphasi +gener +techniqu +approach +us +developingfast +effici +parallel +algorithm +limit +theirefficaci +prerequisit +equival +major +homework +assign +note +syllabu +homework +thursdai +april +homework +plu +rambl +commentsabout +cours +thursdai +april +lectur +transpar +april +code +analysisfor +list +rank +lectur +note +connect +compon +algorithmi +simpler +correct +section +latex +version +pointer +paper +pointer +referencesfor +erew +crew +connect +ullman +yannakaki +paper +homework +tuesdai +union +find +paper +homework +thursdai +certifi +write +paper +impli +exist +effici +consensu +algorithm +base +upon +swap +although +likelysometh +go +insid +next +supercomput +homework +thursdai +asynchron +refer +martel +foc +buss +manuscript +note +memori +model +real +descript +special +topic +cours +content +whim +instructor +descript +titl +year +cours +would +theori +share +memori +parallel +comput +mayb +topic +theori +smpc +cours +start +collect +basic +algorithm +spend +time +model +comput +syllabu +give +list +topic +could +cover +term +share +memori +indic +lookingat +topic +pertain +specif +interconnect +topolog +wewil +consid +situat +cost +memori +access +isnon +uniform +cours +theori +cours +sens +notconsid +particular +real +machin +prove +theorem +andyou +expect +parallel +machin +howev +topic +motiv +practic +consider +goal +indevelop +parallel +algorithm +come +algorithmswhich +could +conceiv +effici +parallel +machin +expect +three +four +problem +set +contain +routin +challeng +problem +goingto +requir +project +happi +student +outsidework +cours +relat +topic +text +cours +introduct +parallelalgorithm +nice +book +although +befollow +close +feel +exception +cheap +youcould +probabl +without +purchas +copi +origin +plan +volunt +teach +cours +year +textwould +theori +share +memori +parallel +comput +anderson +howev +book +progress +fast +volum +artof +comput +program +chose +book +instead +go +quit +flexibl +cours +taught +mychoic +topic +influenc +consid +interestingor +uninterest +also +choic +teach +cours +aseith +tradit +lectur +cours +work +researchcont +number +open +problem +mind +could +turninto +nice +research +result +could +present +half +bake +ideason +provid +other +interest +andenergi +think +anderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..717576eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,34 @@ +home +page +automata +comput +complex +page +move +current +quarter +autumn +autumn +portion +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accuratelyquot +duli +credit +copyright +depart +comput +scienc +engin +univers +ofwashington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..13a6b9e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,25 @@ +home +page +fall +automataautumn +instructor +paul +beam +welcom +home +page +world +wide +short +hypermedia +documentfor +exam +quiz +postscript +quiz +postscript +final +postscript +latex +beam +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..3d25be69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,150 @@ +fall +automata +comput +complex +larri +ruzzo +fall +tuth +sieg +staffnameemailphoneoffic +hour +instructor +larri +ruzzo +ruzzo +sieg +nitin +sharma +nitin +csmw +sieg +class +mail +last +updat +messag +sent +class +mail +list +washington +textbook +errata +handout +administrivia +homework +midterm +latex +sourc +cours +organ +syllabu +collabor +midterm +acrobat +cours +organ +syllabu +collabor +midterm +postscript +cours +organ +syllabu +collabor +midterm +file +format +thecours +materi +provid +three +format +latex +plain +ascii +text +includ +format +command +simpl +thing +assign +gener +quit +legibl +format +figur +complex +math +stuff +hard +imposs +read +adob +acrobat +latest +greatest +free +viewer +avail +depart +unix +system +acroread +perhap +aavail +adob +acrobat +page +postscript +ghostview +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +cours +web +autumn +autumn +portion +reprint +adapt +foracadem +nonprofit +purpos +provid +sourc +accuratelyquot +duli +credit +copyright +depart +comput +scienc +engin +univers +ofwashington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..63710625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,142 @@ +topic +complex +autumn +proposit +theorem +prove +satisfi +test +proof +complex +gener +inform +instructor +paul +beam +meet +time +tuesdai +thursdai +loew +autom +theorem +prove +comput +aid +verif +vlsi +andsoftwar +engin +give +algorithm +attempt +decid +truthof +logic +statement +proposit +first +higher +order +logic +cours +concentr +complex +issu +proposit +casea +well +flip +side +satisfi +test +even +us +oftheorem +prove +first +order +higher +order +logic +often +involv +finitedomain +proof +interpret +proposit +logic +anywai +consid +varieti +system +proposit +theoremprov +satisfi +test +issu +system +complex +proof +within +system +good +choic +search +strategi +consider +theoret +practic +work +thesequest +concentr +theoret +issu +proof +complexityand +rel +complex +search +strategi +also +examin +anumb +implement +proposit +logic +algorithm +compar +theoryand +practic +paper +thing +urquhart +complex +proof +survei +talk +slide +instal +softwar +amus +instal +theorem +prover +sato +andboy +moor +well +satisfi +tester +gsat +june +thedirectori +cours +proversther +paper +scatter +well +process +ofinstal +theorem +prover diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..0bd2a0dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,41 @@ +home +pagecs +comput +systemperform +modelingspr +host +lazowskaandmaryvernonwelcom +home +page +comput +system +performancemodel +meet +mondai +wednesdai +fridai +loew +hall +offic +hourstent +topic +schedulecom +goingsassignmentsproject +informationmap +queue +network +solut +packag +emailoth +inform +avail +sigmetr +confer +measur +model +computersystemsuw +depart +comput +scienc +engineeringlazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..43cc65eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,104 @@ +home +page +comput +system +architecturewint +instructorsusan +egger +egger +washington +sieg +offic +hour +tuth +tajoshua +redston +redston +washington +sieg +offic +hour +sieg +cours +inform +cours +overview +postscript +schedul +continu +updat +postscript +lectur +note +problem +set +previou +test +architectur +histori +postscript +specmark +rate +postscript +inform +tool +shade +instuct +simul +sparc +atom +system +build +analysi +tool +alpha +tullsen +simul +execut +driven +instruct +level +simul +simul +superscalar +architectur +close +etch +binari +rewrit +analyz +pentium +code +alpha +hardwar +monitor +multiflow +compil +alpha +pixi +user +manual +postscript +dinero +uniprocessor +cach +simul +local +machin +alpha +pentium +powerpc +sparc +applic +multiprocessor +uniprocessor +spec +benchmark +neat +page +info +center +info +current +futur +processor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..b6a83ebd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,62 @@ +home +pagecs +oper +system +instructor +hank +levi +spring +freder +pighin +meet +time +instructor +offic +hour +offic +hour +chateau +confer +room +number +unit +welcom +home +page +world +wide +short +hypermedia +document +forcs +contain +inform +class +keep +mind +thisdocu +static +inform +especi +classmessag +ad +frequent +problem +thisdocu +send +mail +pighin +announc +april +first +assign +readi +iti +april +cours +inform +cours +mail +assign +projectlevi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..fa62415f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,108 @@ +home +pagecs +comput +graphicsautumn +quarter +welcom +home +page +world +wide +hypermedia +document +whichcontain +wealth +inform +class +keep +mind +thatthi +document +static +inform +addedfrequ +problem +document +send +mail +deros +click +help +avail +inform +professor +cours +syllabu +lectur +note +written +homework +assign +solut +last +year +project +handout +project +grade +polici +test +cool +imag +last +year +addit +inform +get +class +instruct +us +indi +mvi +home +page +visitor +room +schedul +comput +scienc +engin +depart +comput +scienc +degre +program +offer +colleg +art +scienc +comput +engin +degre +program +offer +colleg +engin +mosaic +help +help +avail +follow +topic +basic +inform +mosaic +inform +hypertext +markup +languag +html +uniform +resourc +locat +read +home +usinglynx +charact +base +browser diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..ad397d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,176 @@ +home +page +principl +digit +system +design +carl +ebel +fall +welcom +home +page +cours +inform +time +place +loew +import +announc +summari +syllabu +text +book +staff +carl +ebel +instructor +ebel +offic +hour +wednesdai +fridai +sieg +paul +franklin +paul +offic +hour +mondai +thursdai +sieg +hine +hineskj +offic +hour +tuesdai +wednesdai +fridai +sieg +larri +mcmurchi +research +staff +tool +guru +larri +document +simul +synthesi +design +pamett +board +mostli +complet +still +construct +student +work +groupsfin +exam +review +topic +cover +quarter +homework +assign +note +homework +homework +fridai +begin +class +homework +hand +class +begin +class +homework +mondai +begin +class +homework +wednesdai +begin +class +homework +fridai +begin +class +homework +mondai +begin +class +homework +fridai +begin +class +handout +combin +logic +combin +logic +sequenti +logic +sequenti +logic +fpga +fpga +memori +commun +inform +depart +comput +scienc +engin +home +page +mother +site +list +vlsi +link +comprehensivelist +icmanufactur +murphi +recent +dilbert +comic +nation +semiconductor +data +sheet +motorola +data +book +server +philip +semiconduct +data +book +server +micron +technolog +data +sheet +copyright +depart +comput +scienc +engin +univers +washington +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +ebel +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..5d196f51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,56 @@ +index +pagecs +artifici +intelligencefal +quarter +artifici +intellig +pose +fundament +andchalleng +question +comput +scienc +build +intelligentmachin +cours +address +question +provid +anin +depth +introduct +select +topic +includ +agentarchitectur +knowledg +represent +search +plan +machinelearn +reason +uncertainti +methodolog +staff +weldweld +sieg +hour +marc +friedmanfriedman +sieg +hour +nick +kushmericknick +sieg +hour +outlin +topicsread +assignmentsassign +examsgradingresourcesth +class +mailinglist +also +archiv +past +messag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..4f775c87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,141 @@ +uncertainti +decis +make +uncertainti +decis +make +artifici +intellig +winter +professor +steve +hank +hank +offic +sieg +offic +hour +whenev +around +appoint +email +address +mail +washington +goe +class +member +send +mail +request +washington +list +read +materi +pearl +probabilist +reason +intellig +systemsthi +requir +text +class +read +sever +chapter +probabl +without +bui +strappedfor +cash +though +nice +refer +book +shafer +pearl +read +uncertain +reasoningthi +nice +collect +foundat +paper +reason +uncertainti +read +sever +select +copi +avail +grail +librari +jayn +probabl +theori +logic +scienc +fragmentari +edit +juli +extrem +interest +technic +histor +look +foundationsof +probabl +theori +statist +decis +theori +definit +worth +look +refer +list +histor +perspect +alon +math +heavi +go +place +beautifulli +written +neapolitan +probabilist +reason +expert +system +theori +algorithmsa +signific +overlap +pearl +book +good +secondari +sourc +inform +graphic +model +propagationalgorithm +avail +math +research +librari +paper +arrang +cours +summari +summari +topic +cover +read +html +postscript +hank +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..88f7aa10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,425 @@ +home +pagecs +imag +understandingwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +especi +class +messag +ad +frequent +problem +document +send +mail +mock +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +assign +first +assign +read +chapter +cours +note +doexercis +second +assign +wednesdai +april +read +chapter +cours +note +exercis +next +determin +conveni +torun +khoro +cantata +run +sun +aslillith +edit +local +workstat +login +file +containxhost +lilliththen +cshrc +file +sun +follow +setenv +khoros_hom +local +khoro +setenv +manpath +local +khoro +path +khoros_hom +path +rlogin +onto +lillith +rhost +assignmentsand +displai +environ +variabl +appropri +typecantata +unix +prompt +machin +georg +haskhoro +cantata +instal +also +cours +home +page +wwwhttp +washington +educ +cours +index +htmland +follow +link +khoro +cantata +tutori +itscours +outlin +experi +least +first +twotop +imag +inform +spatial +resolut +ideal +take +tutori +read +pagesand +experi +khoro +anoth +window +noth +turn +part +assign +third +assign +read +articl +huerta +andnevatia +cvpr +proceed +also +tolook +articl +wolff +fourth +assign +mondai +april +assign +make +comparison +three +imag +process +softwar +environ +khoro +msvc +fast +oper +level +learn +effort +requir +part +announc +final +examin +mondai +june +pmin +regular +class +meet +room +exam +cover +combinationof +midterm +post +midterm +materi +list +topic +studi +remind +class +approv +sundai +june +time +final +exam +review +meet +plan +meet +insieg +outlin +select +class +period +avail +fridai +april +mondai +wednesdai +mondai +fridai +mondai +wednesdai +fridai +mondai +wednesdai +copi +overhead +transpar +lectur +onneur +net +avail +engin +librari +copi +center +floor +packet +number +trainabl +classifi +fridai +student +permit +temporari +copi +ofmatlab +cours +requirethat +fill +form +sign +contract +know +interest +term +project +import +part +ofth +cours +start +week +april +correct +cours +note +fridai +april +introduc +pentium +laboratori +includingth +msvc +softwar +develop +environ +evan +mclain +documentexplain +transform +imag +applic +current +statu +khoro +instal +sun +contain +recent +inform +get +start +withkhoro +accompani +cours +pleas +read +class +fridai +march +mondai +april +student +alreadi +comput +account +accesskhoro +cantata +contact +rene +reed +onthursdai +fridai +make +arrang +pick +youraccount +login +name +password +itov +weekend +earli +next +week +rene +hour +arelimit +plan +ahead +email +address +reed +andsh +part +sieg +back +offic +sieg +kept +lock +either +need +knock +orhav +prior +arrang +meet +mani +account +applic +card +sign +willhav +take +care +next +week +select +lectur +slide +mondai +march +wednesdai +march +inform +resourc +imag +understand +onlin +intro +imag +process +khoro +cantata +delft +univ +pattern +recognit +inform +page +comput +vision +home +page +store +inform +home +page +thedepart +comput +scienc +engin +onlin +version +comput +scienc +undergradu +brochur +onlin +version +comput +engin +undergradu +brochuremosa +help +run +mosaic +find +help +itemsund +balloon +help +menu +macmosa +home +page +itemund +navig +menu +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..7fee1043 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,257 @@ +home +pagecs +parallel +comput +imag +processingwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +ad +time +time +schedul +informationon +octob +novemb +class +begin +half +hourearli +normal +start +dai +novemb +meet +guest +speaker +prof +nian +simon +fraser +univers +burnabi +canada +tuesdai +novemb +class +novemb +bharath +modayur +guest +speaker +titl +present +effici +parallel +object +recognit +simd +mimd +machin +tuesdai +novemb +class +begin +topic +complet +discuss +pyramid +algorithm +scale +invariantoper +algorithm +segment +hierarchicalrelax +us +isodata +approach +burt +hong +rosenfeld +introduct +embed +virtual +process +overview +neural +network +architectur +algorithm +tuesdai +novemb +class +begin +topic +complet +overview +neural +network +architectur +embed +neural +network +mesh +pyramid +brief +treatment +icon +symbol +comput +thursdai +novemb +class +begin +topic +parallel +imag +analysi +digit +librari +demo +schedul +find +term +project +topicsdur +week +octob +student +activelyexplor +topic +term +project +written +descript +topic +hand +inon +tuesdai +octob +templat +writeupsi +avail +resourcespvm +parallel +virtualmachin +softwar +layear +permit +user +program +aviru +machin +made +heterogen +collect +moreworkst +conveni +implement +studydistribut +algorithm +intel +technicalpubl +includ +document +intel +paragonparallel +comput +system +languag +good +languag +implement +arrai +orient +algorithm +intel +paragon +variousvendor +supercomput +parallel +machin +info +onth +maspar +nation +supercomput +center +sweden +onlin +inform +maspar +theunivers +tennesse +resourc +found +neal +friedman +report +also +paragon +document +error +correctionsto +cours +note +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +term +project +import +part +ofth +cours +start +week +octob +review +session +final +schedul +fridai +decemb +sieg +hall +final +exam +schedul +wednesdai +decemb +normal +class +meet +room +exam +close +book +term +project +thursdai +decemb +last +updat +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..a01a5a29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,91 @@ +graphic +seminarc +rspring +numer +method +graphic +april +matrix +comput +intro +definit +properti +invers +brad +solv +linear +system +eric +april +matrix +comput +eigenvalu +eigenvector +singular +valu +decomposit +joel +april +root +find +nonlinear +equat +corei +shuichi +april +optim +intro +unconstrain +optim +kari +constrain +global +optim +kevin +linear +quadrat +program +linear +program +chuck +ronen +linear +quadrat +program +exampl +daniel +data +fit +intro +conclus +mike +linear +regress +calibr +exampl +brad +ordinari +differenti +equat +intro +method +paper +adam +method +paper +joanna +discret +method +finit +element +radios +fred +pde +finit +differ +interv +arithmet +troi +jonathan +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..f0869049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,56 @@ +quarterscs +special +topicssteven +tanimoto +instructorcs +autumn +transcript +base +educ +winter +mathemat +experi +imag +process +spring +mathemat +experi +imag +process +autumn +technolog +collabor +learn +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +graduat +seminar +explor +varieti +topic +relat +useof +comput +educ +specif +topic +activ +varyfrom +quarter +quarter +last +updat +septemb +tanimoto +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..015c09bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,225 @@ +home +page +autumn +autumn +transcript +base +educ +wwwwelcom +home +page +world +wide +short +hypermedia +documentfor +contain +inform +theclass +keep +mind +document +static +newinform +ad +time +time +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +read +octob +mccalla +central +importanceof +student +model +intellig +tutor +read +octob +present +sandi +youngquist +meet +octob +discuss +paul +barton +davi +aboutinternet +servic +read +octob +labord +labord +problem +solv +geometri +microworld +tointellig +comput +environ +present +tessa +read +novemb +bartel +promot +mathematicsconnect +concept +map +plu +present +gari +anderson +meet +novemb +read +novemb +read +meet +onlin +first +paper +combin +degre +vision +littl +degre +technolog +noth +particularli +ambiti +descript +state +second +paper +technic +piec +promot +thethem +learner +take +respons +educ +someth +increasingli +import +futur +choic +third +read +paper +moresophist +either +first +paper +beyond +brows +elabor +possibl +group +annot +ofwww +materi +paper +toolkit +describ +intechn +term +layer +internet +infrastructur +couldmak +possibl +smart +distribut +tutori +applicationsthat +mosaic +netscap +achiev +pleas +read +either +option +advanc +educ +us +world +wide +webhttp +proceed +paper +paper +html +presentor +jeremi +baer +empow +student +inform +agehttp +ncsa +uiuc +proceed +educ +ward +ward +html +presentor +marla +baker +either +beyond +brows +share +comment +soap +trail +line +communitieshttp +proceed +paper +html +presentor +john +dietz +toolkit +enhanc +protocol +lower +layer +serviceshttp +proceed +paper +dcewebkit +html +presentor +adam +carlson +presentor +paper +concept +map +hong +zhumeet +novemb +discuss +michael +aboutcurriculum +navig +last +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..8ba6a86d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,95 @@ +home +page +autumn +autumn +technolog +collabor +learningwelcom +home +page +copyright +notic +materi +cours +subjectto +copyright +view +public +instal +site +otherthan +univers +washington +gener +descript +autumn +comput +technolog +internet +methodologiesfor +teach +learn +current +come +togeth +innew +wai +seminar +explor +read +number +paper +technolog +forcollabor +learn +particip +student +willtak +respons +make +present +group +ofthes +paper +cover +subset +paper +also +explor +possibl +applic +ofai +visual +techniqu +analysi +evid +ofstud +learn +onlin +context +meet +current +schedul +tuesdai +howev +decid +move +time +better +intopeopl +schedul +visit +meani +middl +school +washington +middl +schoolmai +schedul +depend +interest +participatingstud +last +updat +septemb +tanimoto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..0a68c260 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,353 @@ +mvmv +global +resourc +manag +distribut +systemsprofessor +mari +vernontim +pmlocat +processor +alloc +gang +schedul +now +ousterhout +schedul +techniqu +concurr +system +inrd +conf +distribut +system +page +arpaci +dusseau +vahdat +anderson +patterson +interact +parallel +andsequenti +workload +network +workstat +proc +ofth +sigmetr +conf +processor +alloc +dynam +equi +partit +nguyen +tucker +gupta +process +control +schedul +issuesfor +multiprogram +share +memori +multiprocessor +proc +symp +oper +system +principl +page +nguyen +vaswani +zahorjan +us +runtim +measur +workloadcharacterist +parallel +processor +schedul +univ +ofwashington +technic +report +tutori +applic +processor +shun +leung +evangelo +markato +thoma +leblanc +us +processor +affin +loopschedul +share +memori +multiprocessor +proc +supercomput +expand +version +iniee +tran +parallel +distribut +system +han +zima +barbara +mari +chapman +compil +distribut +memori +system +proc +ieee +edjlali +agraw +sussman +saltz +data +parallelprogram +adapt +environ +proc +parallel +process +symp +santa +barbara +april +tutori +processor +alloc +polici +comparisonsshikharesh +majumdar +derek +eager +richard +bunt +schedul +multiprogram +parallel +system +proc +sigmetr +confer +measur +model +ofcomput +system +santa +eric +parson +kenneth +sevcik +multiprocessor +schedul +high +variabilityservic +time +distribut +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +dror +feitelson +bill +nitzberg +characterist +product +parallel +scientif +workload +thenasa +am +ipsc +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +follow +also +cover +requir +read +leutenegg +vernon +perform +multiprogram +multiprocessor +schedul +polici +proc +sigmetr +conf +measur +model +ofcomput +system +mccann +vaswani +zahorjan +dynam +processor +alloc +polici +multiprogram +share +memorymultiprocessor +transact +comput +system +processor +alloc +demand +base +schedul +patrick +sobalvarro +william +weihl +demand +base +coschedul +ofparallel +job +multiprogram +multiprocessor +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +impact +page +page +migrat +burger +hyder +miller +wood +page +tradeoff +distribut +share +memorymultiprocessor +proc +supercomput +rohit +chandra +scott +devin +verghes +anoop +gupta +mendel +rosenblum +schedul +page +migrat +multiprocessorcomput +server +proc +conf +architectur +support +programminglanguag +oper +system +asplo +jose +coordin +schedul +processor +memori +alverson +kahan +korri +mccann +smith +schedul +tera +proc +ipp +workshop +schedul +strategi +parallel +system +santa +barbara +eric +parson +kenneth +sevcik +coordin +alloc +memori +processor +multiprocessor +octob +demand +base +schedul +discuss +open +problem +dusseau +arpaci +culler +effectivedistribut +schedul +parallel +workload +proc +sigmetr +conf +measur +model +computersystem +philadelphia +june +appear +feitelson +rudolph +coschedul +base +runtim +identif +activ +work +set +parallel +program +theoret +result +processor +memori +alloc +karlin +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..1791a5f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,91 @@ +winter +winter +algorithm +molecular +biologi +richard +karp +larri +ruzzo +martin +tompaclass +bboard +last +updat +handout +administr +lectur +note +draft +homework +html +syllabu +schedul +acrobat +titl +syllabu +schedul +postscript +titl +syllabu +schedul +slide +file +format +cours +materi +provid +sever +format +html +usual +format +load +fast +usual +readabl +mani +part +gener +automat +translat +latex +translat +faith +format +origin +adob +acrobat +latest +greatest +free +viewer +adob +acrobat +page +postscript +ghostscript +home +page +free +viewer +window +linux +time +acrobat +support +fewer +system +isprefer +file +smaller +render +isfast +legibl +print +ghostscriptcan +exampl +ruzzo +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..b2a0d514 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,236 @@ +architectur +lunchcs +architectur +lunchcours +organ +jean +loupbaermeet +time +tuesdai +cseg +architectur +lunch +continu +quarter +withalmost +format +previou +year +select +paper +discussedat +begin +quarter +distribut +week +ofth +paper +tobe +read +week +discuss +week +might +formal +present +work +progress +byesteem +member +lunch +mostlyw +hopefulli +heat +discussionson +paper +literatur +differ +quarter +previou +quartersi +start +read +posit +paper +fromparticip +recent +workshop +oncrit +issu +comput +architectur +research +copi +hereread +posit +paper +lead +morethem +paper +read +quarter +mani +thank +ruth +anderson +molli +brown +gershoni +matthai +philipos +tabular +summari +ofth +guru +posit +found +herefor +usual +format +thestud +lead +discuss +paper +either +informallyor +slide +credit +cours +variabl +credit +ifyou +present +read +first +meet +organ +meet +tuesdai +octob +tuesdai +read +valu +local +load +valu +predict +lipasti +wilkerson +shen +asplo +asplo +paper +line +follow +link +asplo +advanceprogrami +short +bibliographi +processor +memori +line +appreci +volunt +thesaulsburi +burger +machin +paper +tuesdai +readashlei +saulsburi +fong +pong +andrea +nowatzyk +miss +memori +wall +case +processor +memori +integr +isca +tuesdai +readm +fillo +keckler +dalli +machin +multicomput +micro +avail +follow +machinelink +tuesdai +readdoug +burger +stefano +kaxira +jame +goodman +datascalar +architectur +spsd +execut +model +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +avail +neton +tuesdai +read +intellig +iram +chip +rememb +comput +patterson +anderson +cardwel +fromm +keeton +kozyraki +thomasand +yelick +paper +availableher +fortun +author +prof +anderson +present +paper +subscrib +mail +list +send +email +themajordomo +mail +list +majordomo +mail +content +shouldinclud +line +subscrib +cseg +leav +subject +lineblank +shortli +receiv +messag +back +sai +welcom +baer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..2b92a5d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +home +pagecs +home +page +spring +offer +experiment +graduat +cours +human +comput +interact +born +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..de9cb450 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,151 @@ +compil +seminarcs +compil +seminarcours +organ +susan +eggersand +craigchambersmeet +time +wednesdai +offici +loew +butreal +meet +second +floor +atrium +scheduleweek +memspi +analyz +memori +system +bottleneck +program +margaretmartonosi +anoop +gupta +thoma +anderson +anderson +week +gener +approach +time +special +applic +charl +consel +francoi +noel +week +practic +data +flow +framework +arrai +refer +analysi +itsus +optim +evelyn +duesterwald +rajiv +gupta +maryl +soffa +week +valu +depend +graph +represent +without +taxat +danielweis +roger +crew +michael +ernst +bjarn +steensgaard +litvinov +week +iter +regist +coalesc +georg +andrew +appel +garrett +week +gener +machin +specif +optim +compil +roger +hooverand +kenneth +zadeck +dean +grant +week +paradigm +compil +distribut +memori +multicomput +byprivthviraj +banerje +lewi +week +minimum +cost +interprocedur +regist +alloc +stevenkurland +charl +fischer +secoski +week +data +special +todd +knoblock +erik +grove +week +lazi +strength +reduct +jen +knoop +oliv +ruth +andbernhard +steffen +mock +tullsen +subscrib +mail +list +send +email +majordomo +mail +content +includ +line +subscribecsek +leav +subject +line +blank +shortlyrec +messag +back +sai +welcom +melodi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..3b19b343 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,317 @@ +parallel +program +environmentslarri +snyderautumn +quarter +mondai +loew +welcom +home +page +quarter +read +select +paper +recent +ipp +ppopp +supercomput +icpp +lcpc +ten +schedul +quarter +atmospher +casual +andwil +hopefulli +ignit +live +discuss +everyon +attend +seminar +expect +present +thepap +still +spot +open +hurri +sign +pleas +send +mail +majordomo +subscrib +cseo +bodi +messag +subscribeto +class +mail +list +datepaperpresentor +compil +matlab +program +scalapack +exploit +task +data +parallel +ramaswami +hodg +banerje +ipp +falcon +matlab +interact +restructur +compil +deros +gallivan +gallopoulo +marsolf +padua +lcpc +compil +portabl +messag +driven +program +ramkumar +forb +kale +icpp +sung +cross +loop +reus +analysi +applic +cach +optim +cooper +kennedi +mcintosh +lcpc +ruth +global +commun +analysi +optim +chakarabarti +gupta +choi +pldi +sean +integer +compil +perform +analysi +environ +data +parallel +program +adv +input +output +characterist +scalabl +parallel +applic +crandal +aydt +chien +reed +jason +holidai +stream +librari +complex +distribut +data +structur +gotwal +sriniva +gannon +ppopp +brad +model +compil +strategi +core +data +parallel +program +bordawekar +choudahari +kennedi +koelbel +paleczni +ppopp +local +iter +comput +block +cyclic +distribut +midkiff +icpp +util +thread +data +parallel +program +fahring +hain +mehrotra +eric +cilk +effici +multithread +runtim +system +blumof +joerg +kuszmaul +leiserson +randal +zhou +ppopp +compil +gener +parallel +code +object +orient +mathemat +model +andersson +fritzson +ppopp +analysi +cross +loop +reus +analysi +applic +cach +optim +cooper +kennedi +mcintosh +lcpc +commun +optim +global +commun +analysi +optim +chakarabarti +gupta +choi +pldi +gener +realign +base +commun +program +kamachi +kusano +suehiro +tamura +sakon +ipp +commun +optim +parallel +comput +us +data +access +inform +rinard +tool +integer +compil +perform +analysi +environ +data +parallel +program +adv +rel +debug +applic +develop +larg +numer +model +abramson +foster +michalak +sosic +parallel +model +compil +strategi +core +data +parallel +program +bordawekar +choudahari +kennedi +koelbel +paleczni +ppopp +input +output +characterist +scalabl +parallel +applic +crandal +aydt +chien +reed +data +distribut +local +iter +comput +block +cyclic +distribut +midkiff +icpp +potpourri +compil +matlab +program +scalapack +exploit +task +data +parallel +ramaswami +hodg +banerje +ipp +util +thread +data +parallel +program +fahring +hain +mehrotra +sung +choi +last +modifi +tuesdai +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..f12efd0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,33 @@ +system +seminar +preliminariesif +alreadi +system +mail +list +need +variou +crucial +bit +ofinform +week +seminar +cancel +besent +list +send +mail +system +request +line +subscrib +systemsin +messag +bodi +quarterli +web +spring +summer +autumn +winter +autumn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..bc30aee2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,89 @@ +summer +quarterw +meet +fridai +loew +quarter +wewil +read +final +paper +appear +upcomingacm +symposium +oper +system +principl +sosp +pleas +read +paper +meet +havean +interact +discuss +quarter +scheduleoct +implement +global +memori +manag +workstat +cluster +present +feelei +log +virtual +memori +present +savag +autoraid +hierarch +storag +system +present +wilk +serverless +network +file +system +present +franklin +montgomeri +tiwari +hypervisor +base +fault +toler +present +chan +philipos +wolman +exploit +weak +connect +mobil +file +access +present +voelker +litvinov +perform +cach +coher +stackabl +file +present +sriram +fiuczynski +impact +architectur +trend +oper +system +perform +present +anderson +romer +return +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..6ea61383 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,219 @@ +high +perform +scientif +comput +zphigh +perform +scientif +comput +zpllarri +snyder +teamautumn +quarter +wednesdai +sieg +loew +dai +welcom +home +page +pleas +send +mail +majordomo +subscrib +csezpl +bodi +messag +subscribeto +class +mail +list +student +also +interest +join +usersmail +list +mail +list +us +distribut +informationabout +compil +librarai +releas +relatedinform +ad +mail +list +send +mail +majordomo +subscrib +user +bodi +messag +descriptionzpl +scientificprogram +languag +suitabl +comput +previous +written +infortran +arrai +languag +dramaticallysimplifi +program +elimin +nuisanc +loop +index +run +fast +modern +machin +includ +parallel +supercomput +allow +programm +develop +code +workstat +andtrivi +migrat +largest +parallel +machin +simpli +byrecompil +develop +releas +toth +scientif +comput +commun +class +design +scientist +engin +comput +scientist +want +learn +modern +languag +supercomput +effect +scientif +comput +class +cover +follow +topic +state +high +perform +comput +syntax +semant +algorithm +exploit +high +perform +parallel +machin +wysiwyg +perform +write +fast +program +easili +develop +program +workstat +supercomput +well +program +perform +scienc +faster +program +prototyp +scientif +comput +matlab +text +booknon +class +reli +materi +document +found +onin +page +specif +follow +close +zplprogram +guid +version +prerequisitesfamiliar +scientif +comput +fortran +ormatlab +program +unix +platform +assum +class +variabl +credit +audit +student +write +debug +program +select +technic +disciplin +suitabl +comput +rang +whole +applic +kernel +inner +loop +scientif +comput +us +informationcours +syllabu +includ +lectur +note +appli +ncsa +block +grant +account +faculti +staff +student +compil +program +us +remotezpl +compileroth +import +link +sung +choi +last +modifi +wednesdai +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..cc49a04f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,185 @@ +home +page +autumn +introduct +digit +design +autumn +quarter +gaetano +borriello +corei +andersonwelcom +home +page +home +page +contain +whole +bunch +us +inform +class +keep +mind +document +static +inform +especi +class +announc +messag +ad +frequent +problem +document +gener +send +mail +tocs +webmast +class +announc +notic +instructor +system +administr +last +updat +class +mail +archiv +messagess +washington +last +updat +send +mail +class +instructor +instructor +everyon +cours +administr +goal +syllabu +meet +time +lectur +final +exam +mondai +decemb +workload +grade +expect +laboratori +softwar +tool +polici +collabor +cheat +announc +mail +address +overal +schedul +lectur +topic +instructor +gaetano +borriello +gaetano +offic +hour +sieg +corei +anderson +corin +offic +hour +sieg +aweekli +assign +weekli +quizz +final +exam +lectur +onlin +version +slide +us +lectur +textbook +contemporari +logic +design +katz +benjamin +cum +addison +weslei +maintain +author +katz +maintain +publish +benjamin +cum +addison +weslei +note +topic +interest +evolut +implement +technolog +comput +aid +design +tool +logic +design +synario +feedback +tell +think +thing +go +even +anonym +desir +question +cours +evalu +complet +last +class +link +previou +quarter +portion +reprint +adapt +academ +nonprofit +purpos +provid +sourc +accur +quot +duli +credit +copyright +depart +comput +scienc +engin +univers +washington +comment +webmast +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..51be713f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,41 @@ +comput +societycs +comput +societywelcom +home +page +comput +societi +cours +wintercs +comput +societi +focu +social +econom +ethic +legal +implic +present +internet +futur +nation +andglob +inform +highwai +instructor +alan +born +class +time +tue +thur +sieg +cours +syllabusclass +schedulelink +relev +sitesbook +journal +avail +referenceassignmentsassign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..bf348fd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,56 @@ +index +pagecs +artifici +intelligencefal +quarter +artifici +intellig +pose +fundament +andchalleng +question +comput +scienc +build +intelligentmachin +cours +address +question +provid +anin +depth +introduct +select +topic +includ +agentarchitectur +knowledg +represent +search +plan +machinelearn +reason +uncertainti +methodolog +staff +weldweld +sieg +hour +marc +friedmanfriedman +sieg +hour +nick +kushmericknick +sieg +hour +outlin +topicsprojectread +assignmentsassign +examsgradingresourcesth +class +mailinglist +also +archiv +past +messag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..5d62e099 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,191 @@ +ics +intellig +inform +internet +server +meet +tuesdai +siegcreat +server +side +scriptspleas +read +guidelin +towrit +program +execut +someon +follow +link +tothem +peopl +place +collect +mail +list +gener +paul +program +check +futur +begun +updat +file +rememb +want +check +first +us +index +html +check +back +index +html +ad +phoenix +impress +older +topic +offici +releas +glimps +instal +check +manual +page +well +glimps +develop +home +page +interest +detail +glimps +work +read +winter +usenix +paper +design +implement +glimps +inform +wide +area +inform +server +wai +zwhere +mosiac +interfac +zephyr +locat +databas +show +user +current +regist +zephyr +make +guess +room +show +regist +zephyr +server +anoth +version +znol +zwatch +zlocat +extra +info +link +except +show +statu +regist +zephyr +user +on +anyon +file +note +lectur +discuss +mail +sent +mail +list +displai +belief +index +page +short +mike +releg +review +site +comment +make +good +page +anoth +page +miscellan +comment +rather +rambl +kurt +grumbl +interfac +problem +improv +mosaic +bring +class +paul +provid +luddit +perspect +idea +intellig +filter +network +inform +sourc +nick +provid +vagu +relat +comment +decemb +cacm +inform +filter +check +summari +articl +rememb +want +chang +document +check +itout +first +us +index +html +check +back +withci +index +html +send +mail +theentir +class +us +address +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..37bff6ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,37 @@ +home +page +machin +organ +program +credit +introduct +current +system +structur +control +commun +memori +processor +devic +project +involv +detail +studi +specif +small +computerhardwar +softwar +system +prerequisit +consent +instructor +open +student +taken +open +freshmen +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..d252f66a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,31 @@ +page +introduct +comput +architectur +credit +design +comput +system +compon +processor +design +instruct +design +address +control +structur +microprogram +memori +manag +cach +memori +hierarchi +interrupt +structur +prerequisit +andc +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..bee89134 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,30 @@ +home +page +advanc +comput +architectur +credit +advanc +techniqu +comput +design +parallel +process +andpipelin +multiprocessor +multi +comput +network +high +performancemachin +special +purpos +processor +data +flow +architectur +prerequisit +semesterli +cours +inform +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..c4bca9a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,33 @@ +home +page +advanc +comput +architectur +credit +parallel +algorithm +principl +parallel +detect +vectorizingcompil +interconnect +network +simd +mimd +machin +processorsynchron +data +coher +multi +dataflow +machin +special +purposeprocessor +prerequisit +consent +instructor +semesterli +cours +inform +info +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..de983b10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,446 @@ +introduct +oper +system +spring +univers +wisconsin +madisoncomput +scienc +departmentc +spring +bart +millerc +introduct +oper +systemsnew +stufffin +grade +post +problem +set +avail +program +assign +avail +quizz +answer +avail +section +lectur +note +readi +read +print +class +staffinstructor +bart +milleremail +bart +wisc +eduoffic +csphone +offic +hour +wednesdai +fridai +noonor +appoint +karuna +muthiahemail +muthiah +wisc +eduoffic +csphone +offic +hour +mondai +wednesdai +jonathan +weyersemail +weyer +wisc +eduoffic +csphone +offic +hour +mondai +fridai +wednesdai +cours +materialsth +cours +organ +around +lectur +note +thelectur +notesar +avail +class +page +need +textbookmodern +oper +system +tanenbaum +programmingassign +purchas +copi +ofobject +orient +program +us +pohl +whatev +favorit +book +lectur +note +avail +read +first +section +come +class +modern +oper +systemsandobject +orient +program +us +avail +book +store +lectur +discuss +sectionslectur +time +tuesdai +thursdai +comput +sciencesdiscuss +section +wednesdai +nolandnot +extra +wednesdai +discuss +section +section +us +mainli +recit +section +discuss +materialcov +lectur +weekli +quizz +occas +us +discuss +import +detail +ofth +program +assign +homework +make +sure +leav +room +schedul +attend +section +exam +quizzesther +midterm +final +option +week +start +second +week +class +quiz +thediscuss +section +quizz +last +minut +follow +past +quizz +answer +process +concurr +februari +semaphor +februari +monitor +februari +messag +februari +schedul +usetrac +activ +real +unix +system +drive +simul +goal +assign +learn +schedul +algorithm +learn +trace +driven +simul +experi +quantit +analyz +comput +system +written +problem +setsdur +semest +hand +severalwritten +problem +set +base +lectur +problem +set +need +turn +though +find +poorli +week +quizz +youdon +problemssolut +set +problem +hand +week +theproblem +happi +answer +question +problem +andlook +solut +problem +avail +goal +assign +learn +us +variou +synchronizationprimit +solv +problem +problem +avail +goal +assign +learn +memori +manag +hardwar +softwar +late +workassign +date +list +handout +entir +semest +havethre +late +daysof +credit +late +dai +differ +assign +eachof +three +assign +three +dai +assign +three +dai +absolut +late +work +accept +late +dai +cannot +us +assignmentthat +last +weekof +class +cheatingprogram +assign +done +partner +group +work +independ +group +cheater +receiv +maximum +penalti +includ +receivingan +grade +cours +mark +transcript +comput +facilitiesw +probabl +us +solari +unix +workstat +cours +unix +workstat +run +solari +oper +system +window +student +regist +class +account +grade +policyif +take +final +take +final +program +assign +program +assign +quizz +quizz +final +final +lowest +quiz +grade +drop +averag +beno +quiz +first +week +week +spring +break +take +final +exam +count +past +taught +class +class +inth +rang +class +scheduleth +follow +schedul +tent +could +probabl +chang +week +januari +introduct +overview +processesweek +januari +februari +dispatch +process +creationweek +februari +cooper +process +synchronizationweek +februari +semaphoresweek +februari +semaphor +monitorsweek +februari +messag +deadlocksweek +march +debug +strategi +dynam +memori +alloc +march +spring +breakweek +march +relocationweek +march +segment +page +tlbsweek +april +virtual +memori +page +replac +thrash +class +thursdai +week +april +work +set +devic +filesweek +april +disk +alloc +schedul +directoriesweek +april +protectionweek +april +secur +advanc +topic +read +week +advanc +topic +read +final +final +exam +tuesdai +last +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..61b1107f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,109 @@ +internet +honor +seminarunivers +wisconsin +madisoncomput +scienc +departmentc +spring +bart +millerc +internet +honor +seminarinstructor +bart +milleremail +bart +wisc +eduoffic +csphone +offic +hour +wednesdai +fridai +noonor +appoint +lectureslectur +time +mondai +comput +sciencesclass +schedulether +written +assign +class +requir +attendal +lectur +particip +discuss +follow +schedul +mostli +right +could +chang +week +januari +bart +miller +introduct +overviewweek +januari +larri +landweb +internet +architectur +protocolsweek +februari +week +februari +bart +miller +client +server +remot +procedur +callsweek +februari +system +securityweek +februari +eric +bach +secur +encryptionweek +march +march +spring +breakweek +march +miron +livni +imag +pictur +netweek +march +week +april +high +perform +file +systemsweek +april +week +april +david +wood +internet +supercomputerweek +april +laru +javaweek +april +week +bart +miller +discussionslast +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..2ff893b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,254 @@ +advanc +oper +system +fall +univers +wisconsin +madisoncomput +scienc +departmentc +fall +bart +millerc +advanc +oper +systemssummarythi +cours +intend +give +broad +exposur +advancedoper +system +topic +read +discuss +topic +protect +secur +memori +manag +oper +system +kernel +file +system +synchron +name +distribut +system +pleas +read +rest +inform +sheet +carefulli +textther +realli +satisfactori +textbook +graduat +level +operatingsystemsclass +current +literatur +text +cours +structur +around +read +journal +articl +andconfer +proceed +abl +purchas +read +doit +handout +class +discuss +topic +relev +current +paper +lectur +detail +detail +review +paper +willinstead +adiscuss +major +topic +theme +us +paper +focal +point +form +read +group +classmat +meetonc +twice +week +discuss +detail +assign +paper +read +especi +import +part +class +read +listaccord +post +read +schedul +formula +success +class +read +papersindepend +discuss +read +group +try +identifyth +import +issu +particip +class +discuss +thepap +class +discussionsclass +meet +form +discuss +lectur +talk +topic +discuss +besupport +comment +opinion +will +particip +activ +daili +class +geta +expect +quietli +listen +week +beveri +unhappi +class +papersdur +class +write +paper +short +page +andon +longer +first +paperwil +design +base +idea +read +work +well +understood +oper +system +facilityand +design +extens +area +second +paper +involv +project +paper +summaryof +project +aselect +project +topicsfrom +choos +write +well +import +write +good +idea +paper +review +least +twice +first +read +refere +paper +fellowstud +give +writer +critic +comment +anoth +person +giveth +reader +look +someon +els +write +paper +revis +second +pass +read +examsther +exam +paper +read +keep +busi +gradesscor +final +grade +post +assign +grade +first +paper +assignmenti +availbl +summari +score +fromth +project +proposalsi +also +availbl +final +cours +gradesar +avail +detailstim +tuesdai +thursdai +place +csoffic +hour +tuesdai +thursdai +noonlast +modifi +bybart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..c2869d67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,262 @@ +section +home +pagec +introduct +comput +programmingsect +fortran +credit +cours +cover +basic +program +structur +need +prepar +student +elementari +engin +cours +prior +comput +program +experi +requir +basic +knowledg +comput +assum +materi +cover +enabl +write +simpl +comput +program +solv +engin +problem +elementari +cours +program +done +fortran +cours +intend +student +receiv +littl +program +instruct +high +school +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +click +cours +descript +menu +import +announc +read +lectur +instructor +grade +polici +syllabu +text +lectur +note +program +assign +problem +solv +exercis +comput +pointer +interest +lectur +section +psycholog +march +pleas +punctual +lectur +avoid +disturb +class +instructor +gareth +bestor +offic +comput +scienc +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +solut +class +follow +mondai +exercis +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +subroutin +click +solut +question +function +click +solut +week +question +click +solut +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +gareth +bestor +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +gareth +bestor +bestor +wisc +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..4df8da39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,613 @@ +section +home +pagec +algebra +languag +programmingsect +fortran +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +click +cours +descript +menu +import +announc +read +lectur +instructor +grade +polici +syllabu +text +lectur +note +exam +program +assign +problem +solv +exercis +comput +pointer +interest +lectur +section +psycholog +pleas +punctual +lectur +avoid +disturb +class +instructor +gareth +bestor +offic +comput +scienc +lowest +exam +score +contribut +must +complet +hand +assign +elig +receiv +pass +grade +cours +final +grade +section +grade +curv +mean +rang +curv +comput +final +exam +complet +exam +thur +februari +exam +april +comput +scienc +final +amclick +list +current +class +grade +identif +student +syllabu +tent +follow +topic +section +text +approxim +cover +week +semest +class +read +relev +section +text +come +class +abl +question +class +anyth +unsur +instead +wait +try +work +assign +discov +didn +realli +understand +someth +week +term +exam +week +term +exam +week +final +exam +text +lectur +notestext +fortran +engin +applic +edit +koffman +friedman +lectur +note +copi +lectur +note +avail +line +week +class +import +line +lectur +note +substitut +come +class +cover +show +overhead +projector +includ +exampl +addit +note +board +respons +materi +cover +class +week +week +week +week +week +week +week +week +week +week +week +week +week +week +week +exam +program +assignmentsther +three +exam +term +exam +final +exam +exam +constitut +final +grade +import +well +ensur +good +grade +regardless +perform +assign +exam +approxim +hour +long +though +stai +longer +need +extra +time +close +book +need +bring +pencil +exam +calcul +necessari +even +us +exam +solut +term +exam +term +exam +seven +program +assign +includ +program +contribut +final +grade +assign +must +complet +hand +elig +receiv +pass +grade +cours +must +attempt +complet +everi +program +assign +hand +program +even +compil +without +error +grade +automat +receiv +zero +grade +risk +fail +cours +gradesheet +handin +directori +hand +assign +onlin +late +polici +polici +academ +misconduct +cheat +assign +specif +program +mondai +program +fridai +program +wednesdai +program +wednesdai +program +fridai +program +mondai +program +fridai +pmhow +help +assign +consult +consult +comput +help +problem +wear +name +tag +duti +approxim +consult +answer +short +question +compil +error +messag +program +syntax +well +login +printer +send +mail +netscap +click +inform +consult +instructor +gener +question +assign +question +requir +long +explan +best +answer +pleas +offic +hour +send +mail +normal +offic +except +offic +hour +dissert +research +home +modem +therefor +want +outsid +offic +hour +pleas +make +appoint +first +easili +contact +mail +regularli +login +read +mail +home +click +send +mail +problem +solv +exerciseson +import +skill +learn +class +problem +solv +good +problem +solv +skill +distinguish +good +comput +programm +doesn +matter +familiar +skill +particular +program +languag +understand +solv +problem +abl +write +comput +program +languag +help +learn +problem +solv +skill +techniqu +assign +weekli +problem +solv +exercis +small +trivial +problem +give +mondai +look +problem +think +week +right +step +would +solv +problem +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +fridai +solut +class +exercis +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +subroutin +click +solut +question +function +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +click +solut +week +question +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +gareth +bestor +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +gareth +bestor +bestor +wisc +last +modifi +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..c5e14118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,86 @@ +fall +section +fall +section +algebra +languag +program +name +dave +egglestonemail +burnett +wisc +eduoffic +offic +phone +offic +hour +announc +updat +note +origin +output +prog +page +error +dai +week +correct +valu +inform +exam +updat +question +ask +program +hourlywork +classread +scan +thursdai +class +program +avail +solut +quiz +grade +page +gener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuswork +homeclass +handout +gradeshomeworkexam +quizzesmiscellan +archivepolici +informationemail +policygrad +policyl +policyacadem +misconduct +policytextproblem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +dave +eggleston +burnett +wisc +base +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..7a8a93f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,180 @@ +spring +advanc +oper +system +spring +summari +cours +intend +give +broad +exposur +advancedoper +system +topic +import +compon +cours +read +discuss +ofvari +research +paper +project +involv +implement +anexperiment +system +research +paper +cover +topicsinclud +synchron +commun +memori +manag +file +system +protect +secur +distribut +system +project +requir +tochoos +problem +research +propos +solut +implement +prototyp +system +lectur +info +class +discuss +topic +relev +current +paper +lectur +detail +review +paper +rathera +discuss +major +topic +theme +us +paper +focal +point +activ +particip +discuss +strongli +encourag +lectur +tuesdai +thursdai +engin +halloffic +hour +tuesdai +appoint +comput +scienc +text +text +select +classic +paper +oper +system +design +implement +purchas +read +doit +formerli +macc +document +deskfor +read +semest +differ +previou +semest +pleas +copi +paper +grade +exam +cours +instead +assign +first +assign +us +benchmark +suit +measur +performanceof +variou +oper +system +suno +solari +linux +window +manya +hand +second +assign +project +involv +project +propos +implement +final +report +project +present +total +grade +class +particip +count +first +assign +count +project +count +schedul +tent +schedul +project +list +suggest +project +make +project +well +either +case +need +come +discuss +choos +project +team +peopl +allow +slide +slide +us +lectur +assig +first +assign diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..75c0256a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,87 @@ +introduct +comput +program +comput +scienc +fall +credit +cours +design +cover +basic +programmingstructur +need +prepar +student +elementaryengin +cours +materi +cover +suffici +enableth +student +write +simpl +program +solv +engin +problem +inelementari +cours +materi +essenti +first +half +list +fall +section +lectur +fortran +jeff +lampert +lectur +fortran +jeff +lampert +lectur +toni +silva +lectur +toni +silva +lectur +sidnei +hummert +lectur +sidnei +hummert +lectur +michael +birk +lectur +michael +birk +lectur +sidnei +hummert +lectur +sidnei +hummert +lectur +toni +silva +lectur +toni +silva +lectur +russel +man +lectur +russel +man +lectur +martin +reameslast +modifi +anthoni +silva diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..0f718d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffic +comput +sciencesoffic +hour +mondai +wednesdai +appoint +phone +dept +offic +mail +wisc +teach +assistantsfollow +link +home +page +name +kelli +ratliff +email +kelli +wisc +offic +offic +phone +offic +hour +section +grade +name +nathan +bockrath +email +bockrath +wisc +offic +offic +phone +offic +hour +section +grade +name +rehnuma +rahman +email +rehnuma +wisc +offic +offic +phone +offic +hour +section +grade +name +jaim +fink +email +jfink +wisc +offic +offic +phone +offic +hour +section +grade +name +ashraf +aboulnaga +email +ashraf +wisc +offic +offic +phone +offic +hour +section +grade +name +andrew +geeri +email +geeri +wisc +offic +offic +phone +offic +hour +section +grade +name +jame +herro +email +jherro +wisc +offic +offic +phone +offic +hour +section +grade +name +abhinav +gupta +email +agupta +wisc +offic +offic +phone +offic +hour +section +grade +name +jyothi +krothap +email +jyothi +wisc +offic +offic +phone +offic +hour +section +grade +name +chiang +email +suhui +wisc +offic +offic +phone +offic +hour +section +grade +name +thano +tsioli +email +tsioli +wisc +offic +offic +phone +offic +hour +section +gradesexplor +compani +whose +softwar +hardwar +borland +hewlett +packard +intel +microsoft +novel +us +link +explor +lyco +enorm +databas +site +yahoo +internet +resourc +classifi +categori +lookup +search +virtual +tourist +find +site +around +world +click +world +mother +larg +alphabet +list +site +cool +list +especi +excel +site +univers +wisconsin +madison +home +page +page +origin +creat +maintain +teitelbaum +thano +tsioli +modifi +maintain +kelli +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..0f718d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,222 @@ +fall +midterm +exam +answer +keyinstructorprofessor +desautelsoffic +comput +sciencesoffic +hour +mondai +wednesdai +appoint +phone +dept +offic +mail +wisc +teach +assistantsfollow +link +home +page +name +kelli +ratliff +email +kelli +wisc +offic +offic +phone +offic +hour +section +grade +name +nathan +bockrath +email +bockrath +wisc +offic +offic +phone +offic +hour +section +grade +name +rehnuma +rahman +email +rehnuma +wisc +offic +offic +phone +offic +hour +section +grade +name +jaim +fink +email +jfink +wisc +offic +offic +phone +offic +hour +section +grade +name +ashraf +aboulnaga +email +ashraf +wisc +offic +offic +phone +offic +hour +section +grade +name +andrew +geeri +email +geeri +wisc +offic +offic +phone +offic +hour +section +grade +name +jame +herro +email +jherro +wisc +offic +offic +phone +offic +hour +section +grade +name +abhinav +gupta +email +agupta +wisc +offic +offic +phone +offic +hour +section +grade +name +jyothi +krothap +email +jyothi +wisc +offic +offic +phone +offic +hour +section +grade +name +chiang +email +suhui +wisc +offic +offic +phone +offic +hour +section +grade +name +thano +tsioli +email +tsioli +wisc +offic +offic +phone +offic +hour +section +gradesexplor +compani +whose +softwar +hardwar +borland +hewlett +packard +intel +microsoft +novel +us +link +explor +lyco +enorm +databas +site +yahoo +internet +resourc +classifi +categori +lookup +search +virtual +tourist +find +site +around +world +click +world +mother +larg +alphabet +list +site +cool +list +especi +excel +site +univers +wisconsin +madison +home +page +page +origin +creat +maintain +teitelbaum +thano +tsioli +modifi +maintain +kelli +ratliff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..0ba84476 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,228 @@ +us +comput +lectur +us +computersinstructor +info +instructor +salli +petersonoffic +comput +sciencephon +mail +salli +wisc +slpeter +facstaff +wisc +eduoffic +hour +tuesdai +thursdai +appointmentvit +class +info +time +lectur +lectur +place +lectur +held +engin +halllectur +text +inform +technolog +societi +laudon +traver +laudonlab +text +point +click +drag +us +macintosh +petersoncours +introduct +class +design +take +zero +knowledg +computersto +crack +shot +user +us +skill +throughcolleg +arena +section +taught +us +macintoshcomput +section +us +avail +csuse +comput +lectur +cours +compon +part +lectur +lectur +discuss +comput +gener +term +gener +comput +scienc +topic +discuss +comput +work +includ +follow +topic +necessarili +order +applic +program +includ +word +processor +spreadsheet +graphic +databas +hardwar +input +output +storag +devic +oper +system +program +languag +network +telecommun +artifici +intellig +expert +system +comput +relat +social +issu +part +laboratori +discuss +section +hand +experienceon +macintosh +iici +comput +follow +program +word +process +word +electron +mail +newsgroup +world +wide +eudora +netscap +paint +draw +aldu +superpaint +spreadsheet +chart +excel +databas +filemak +present +manag +hypercard +desktop +publish +aldu +pagemak +integr +part +learn +macintosh +oper +system +system +well +addit +special +tool +scanner +avail +teach +section +thegoal +provid +high +qualiti +instruct +rich +educationalexperi +namesectiontimedai +bodner +mwnick +leavi +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghes +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommend +background +background +necessari +cours +assign +quizz +exam +grade +base +exam +lectur +regular +assignmentsand +quizz +syllabu +glanc +syllabu +contain +nitti +gritti +class +detail +click +assign +assign +superpaintassign +excellast +modifi +octob +jonbodn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..0ba84476 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,228 @@ +us +comput +lectur +us +computersinstructor +info +instructor +salli +petersonoffic +comput +sciencephon +mail +salli +wisc +slpeter +facstaff +wisc +eduoffic +hour +tuesdai +thursdai +appointmentvit +class +info +time +lectur +lectur +place +lectur +held +engin +halllectur +text +inform +technolog +societi +laudon +traver +laudonlab +text +point +click +drag +us +macintosh +petersoncours +introduct +class +design +take +zero +knowledg +computersto +crack +shot +user +us +skill +throughcolleg +arena +section +taught +us +macintoshcomput +section +us +avail +csuse +comput +lectur +cours +compon +part +lectur +lectur +discuss +comput +gener +term +gener +comput +scienc +topic +discuss +comput +work +includ +follow +topic +necessarili +order +applic +program +includ +word +processor +spreadsheet +graphic +databas +hardwar +input +output +storag +devic +oper +system +program +languag +network +telecommun +artifici +intellig +expert +system +comput +relat +social +issu +part +laboratori +discuss +section +hand +experienceon +macintosh +iici +comput +follow +program +word +process +word +electron +mail +newsgroup +world +wide +eudora +netscap +paint +draw +aldu +superpaint +spreadsheet +chart +excel +databas +filemak +present +manag +hypercard +desktop +publish +aldu +pagemak +integr +part +learn +macintosh +oper +system +system +well +addit +special +tool +scanner +avail +teach +section +thegoal +provid +high +qualiti +instruct +rich +educationalexperi +namesectiontimedai +bodner +mwnick +leavi +mwtrshannon +lloyd +trtrjeff +reminga +mwfmwira +sharenow +trtrbrian +swander +mwfmwfbrad +thayer +mwfmwfjoe +varghes +trtrgeoff +weinberg +mwftrmaria +yuin +mwfmwrecommend +background +background +necessari +cours +assign +quizz +exam +grade +base +exam +lectur +regular +assignmentsand +quizz +syllabu +glanc +syllabu +contain +nitti +gritti +class +detail +click +assign +assign +superpaintassign +excellast +modifi +octob +jonbodn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..f0471463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,81 @@ +home +pagecomput +scienc +algebra +languag +program +section +instructorsw +would +like +comment +suggest +complaint +feedback +provid +click +skrentni +coordin +offic +email +skrentni +csinform +section +frequent +ask +question +cours +overview +microcomput +laboratori +consult +fall +consult +schedul +tutor +mainli +polici +academ +misconduct +cours +offer +depart +softwar +section +introduct +microsoft +window +hint +window +compil +window +oper +system +email +netscap +creat +us +subdirectoriesc +inform +savitch +text +book +introduct +borland +languag +borland +integr +develop +environmentfortran +inform +jeff +lampert +home +page +section +last +updat +skrentni +coordin +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..3b8596ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,41 @@ +cours +infocours +inform +cscours +descriptionfrom +guidebook +undergradu +student +construct +algorithm +problem +solv +instruct +experi +least +procedur +orient +languag +pascal +fortran +survei +languag +advanc +program +techniqu +prereq +advanc +high +school +mathemat +prepar +colleg +work +mathemat +statist +logic +consent +instructor +open +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..f0471463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,81 @@ +home +pagecomput +scienc +algebra +languag +program +section +instructorsw +would +like +comment +suggest +complaint +feedback +provid +click +skrentni +coordin +offic +email +skrentni +csinform +section +frequent +ask +question +cours +overview +microcomput +laboratori +consult +fall +consult +schedul +tutor +mainli +polici +academ +misconduct +cours +offer +depart +softwar +section +introduct +microsoft +window +hint +window +compil +window +oper +system +email +netscap +creat +us +subdirectoriesc +inform +savitch +text +book +introduct +borland +languag +borland +integr +develop +environmentfortran +inform +jeff +lampert +home +page +section +last +updat +skrentni +coordin +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..b0fe92c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solv +us +comput +fall +comput +scienc +check +follow +page +inform +instructor +teach +assist +includ +offic +hour +inform +assign +includ +suggest +copi +assign +explan +grade +check +polici +assign +work +inform +examin +copi +past +exam +inform +lab +includ +copi +handout +document +includ +syllabu +mani +document +page +postscript +need +postscript +viewer +obtain +site +check +local +servic +section +depart +home +page +local +servic +page +ghost +directori +read +readm +file +direct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..b0fe92c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,69 @@ +home +page +problem +solv +us +comput +fall +comput +scienc +check +follow +page +inform +instructor +teach +assist +includ +offic +hour +inform +assign +includ +suggest +copi +assign +explan +grade +check +polici +assign +work +inform +examin +copi +past +exam +inform +lab +includ +copi +handout +document +includ +syllabu +mani +document +page +postscript +need +postscript +viewer +obtain +site +check +local +servic +section +depart +home +page +local +servic +page +ghost +directori +read +readm +file +direct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..c48a1560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,271 @@ +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +class +cancel +handout +assign +solut +exam +grade +simul +help +lectur +note +frequent +ask +question +instructor +section +jerri +tusch +offic +phone +hour +mail +jerri +wisc +tutsch +execpc +class +section +section +nolandsect +karen +miller +offic +phone +hour +mail +smoler +wisc +class +time +psycholog +sunlung +suen +offic +phone +hour +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +offic +phone +hour +mail +bsri +wisc +edumohammad +asgarian +offic +phone +hour +tuth +mail +wisc +class +cancel +karen +section +class +mondai +septemb +class +wednesdai +novemb +jerri +section +cancel +schedul +handout +revis +chapter +postscript +cours +overview +jerri +section +html +assign +assign +html +html +assign +program +homework +assign +karen +section +program +homework +assign +program +homework +assign +program +homework +solut +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +programs +programm +quiz +section +assign +html +program +examsal +quizz +open +book +note +calcul +karen +section +quiz +fridai +septemb +class +quiz +fridai +septemb +class +quiz +fridai +octob +class +quiz +fridai +octob +class +quiz +fridai +novemb +class +quiz +fridai +novemb +class +quiz +probabl +fridai +decemb +last +class +jerri +section +section +syllabu +html +section +syllabu +html +previou +exam +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answer +fall +final +exam +answer +grade +lookup +grade +simul +help +graphic +interfac +manual +lectur +noteskaren +miller +section +section +chapter +chapter +chapter +number +system +chapter +data +represent +chapter +integ +arithmet +chapter +float +point +arithmet +chapter +data +structur +chapter +regist +chapter +procedur +updat +wednesdai +chapter +assembl +updatedmondai +octob +chapter +chapter +except +process +chapter +featur +perform +chapter +architecur +case +studi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..73b31f2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,296 @@ +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +class +cancel +handout +assign +solut +exam +grade +simul +help +lectur +note +frequent +ask +question +instructor +section +jerri +tusch +offic +phone +hour +mail +jerri +wisc +tutsch +execpc +class +section +section +nolandsect +karen +miller +offic +phone +hour +mail +smoler +wisc +class +time +psycholog +sunlung +suen +offic +phone +hour +tuth +mail +ssuen +wisc +edusridevi +bhamidipati +offic +phone +hour +mail +bsri +wisc +edumohammad +asgarian +offic +phone +hour +tuth +mail +wisc +class +cancel +karen +section +class +mondai +septemb +class +wednesdai +novemb +jerri +section +cancel +schedul +handout +revis +chapter +postscript +cours +overview +jerri +section +html +assign +assign +html +html +assign +program +homework +assign +karen +section +program +homework +assign +program +homework +assign +program +homework +assign +program +homework +solut +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +program +quiz +section +assign +html +programs +programm +quiz +section +assign +html +program +quiz +section +assign +html +programa +programb +quiz +section +examsal +quizz +open +book +note +calcul +karen +section +quiz +fridai +septemb +class +quiz +fridai +septemb +class +quiz +fridai +octob +class +quiz +fridai +octob +class +quiz +fridai +novemb +class +quiz +fridai +novemb +class +quiz +probabl +fridai +decemb +last +class +option +final +thursdai +decemb +difficult +cumul +final +offer +desperateto +rais +grade +sign +advanc +decemb +jerri +section +section +syllabu +html +section +syllabu +html +previou +exam +postscript +format +fall +exam +spring +exam +fall +exam +spring +exam +fall +exam +fall +exam +spring +exam +summer +exam +fall +midterm +exam +answer +fall +final +exam +answer +grade +lookup +grade +simul +help +graphic +interfac +manual +lectur +noteskaren +miller +section +section +chapter +chapter +chapter +number +system +chapter +data +represent +chapter +integ +arithmet +chapter +float +point +arithmet +chapter +data +structur +chapter +regist +chapter +procedur +updat +wednesdai +chapter +assembl +updatedmondai +octob +chapter +chapter +except +process +chapter +featur +perform +chapter +architecur +case +studi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..d47582db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,255 @@ +data +structur +lec +introduct +data +structureslectur +psychologylectur +psychologycours +inform +announc +read +assign +get +start +get +help +exam +program +assign +sampl +code +lectur +inform +cours +materi +comput +lab +home +announc +gener +announc +place +recent +announc +first +announc +problem +program +assign +found +locat +assign +page +binari +search +tree +sampl +code +onlin +sampl +page +last +makeup +exam +done +place +copi +solut +exam +reserv +kurt +wendt +librari +queue +sampl +code +onlin +sampl +page +stack +sampl +code +onlin +sampl +page +exam +topic +onlin +exam +page +list +sampl +code +onlin +sampl +page +handin +directori +creat +list +common +program +error +onlin +suggest +addit +welcom +either +vega +comput +lab +work +comput +inform +sampl +code +place +line +mondai +wednesdai +lectur +get +magic +number +error +sourc +file +must +abl +compil +otherwis +unusu +error +look +stale +page +forget +reload +page +page +updat +copi +browser +cach +becom +outdat +stale +attend +unix +tutori +need +attend +time +list +thur +thur +read +assign +futur +balanc +search +tree +chapter +page +futur +tabl +chapter +discuss +comparison +implement +lectur +lectur +tree +chapter +lectur +queue +chapter +skip +simul +lectur +overload +oper +chapter +page +lectur +hash +tabl +chapter +page +lectur +stack +chapter +lectur +link +list +chapter +lectur +pointer +dynam +memori +alloc +chapter +page +lectur +sort +search +analysi +chapter +page +lectur +sort +algorithm +chapter +page +lectur +basic +recurs +search +algorithm +chapter +lectur +basic +chapter +page +skip +focu +lectur +basic +appendix +page +lectur +skrentni +skrentni +wisc +offic +comput +scienc +offic +hour +teach +assist +baicheng +billi +liao +bail +wisc +offic +comput +scienc +offic +hour +cheng +jiacheng +wisc +offic +comput +scienc +offic +hour +pmcopyright +copi +jame +skrentni +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..4b0947a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,987 @@ +lectur +introduct +data +structuresfal +cours +email +address +wisc +cours +home +page +http +wisc +htmlinstructor +yanni +ioannidi +offic +comput +sciencesoffic +hour +tuesdai +thursdai +amoffic +phone +email +address +yanni +wisc +home +page +http +wisc +yanni +yanni +html +content +new +teach +assist +lectur +inform +languag +text +grade +exam +cours +schedul +assign +program +assign +late +polici +cheat +help +program +grade +style +extern +document +intern +document +us +unix +program +develop +cycl +newsassign +assign +readi +midterm +statisticssom +interest +exam +statist +section +median +mean +midterma +sampl +oldmidterm +avail +help +prepar +ownmidterm +assign +assign +readi +note +notat +binari +searchth +note +notat +binari +search +avail +want +print +either +open +filemenu +ghostview +window +show +document +andchoos +print +menu +item +women +comput +sciencesom +femal +faculti +graduat +student +undergradu +haveform +group +call +wic +women +comput +scienc +oneof +group +goal +encourag +women +becomecomput +scienc +major +women +thisclass +would +like +talk +someon +major +incomput +scienc +graduat +studi +comput +scienc +women +would +like +extra +help +withtheir +classwork +suzan +computersci +grad +student +offic +hour +email +tomak +appoint +suzan +mail +address +stodder +wisc +eduand +offic +hour +tuesdai +grow +tremend +field +theodd +ever +write +anoth +program +thiscours +end +abl +write +statementi +true +pascal +also +wide +avail +aniniti +startup +period +product +take +comput +scienc +cours +exceptionsy +requir +cours +textth +text +book +cours +isdata +abstract +problem +solv +wall +mirror +frank +carrano +isbn +well +written +text +cover +materi +cours +also +includ +separ +text +languag +notnecessari +lectur +often +alwai +follow +lectur +note +fall +david +dewitt +note +actual +consider +complet +simpl +lecturenot +still +short +true +text +book +isveri +littl +narr +text +exercis +recommend +addit +sourc +want +purchas +note +whichar +avail +doit +document +desk +near +dayton +street +entranceof +comput +scienc +build +dayton +first +experi +unix +needsom +inform +activ +account +log +creat +edit +manipul +file +compil +run +debug +program +handoutc +avail +doit +inform +desk +dewitt +notesar +avail +contain +inform +find +invalu +also +help +section +mention +lectur +often +follow +dewitt +note +althoughi +supplement +handout +courseof +semest +nonetheless +respons +materi +cover +lectur +exam +base +onth +lectur +materi +read +assign +note +andth +cours +assign +gradingther +even +exam +cours +semest +final +exam +five +program +assign +exam +determin +final +grade +approxim +equal +weight +programmingassign +count +exam +exam +tuesdai +octob +chemistri +exam +final +exam +wednesdai +decemb +place +cours +schedul +follow +list +topic +cover +thiscours +detail +schedul +provid +later +semest +topic +dewitt +note +wall +mirror +introduct +administr +gener +familiar +basic +stuff +lectur +function +lectur +apoint +lectur +record +equival +madison +prerequisitecours +assign +must +done +design +machin +thesear +machin +room +first +floor +thec +build +encourag +machin +prefer +home +comput +certainrestrict +must +compil +home +machin +must +univers +account +often +read +emailand +copi +data +file +final +requir +thatyou +turn +program +electron +email +youwork +home +must +make +provis +download +program +toyour +univers +account +make +sure +compil +runwith +compil +sparcstat +often +electron +mail +notifi +student +chang +inassign +hint +program +assum +read +allelectron +mail +send +late +policyno +late +assign +accept +assign +must +turn +exactli +order +avoid +late +caus +machin +load +coincid +duedat +sever +class +simpli +sure +start +right +awai +oneach +assign +thing +certain +wrong +wait +thelast +minut +start +except +must +approv +need +good +excus +troubl +soon +possibl +cheatingth +comput +scienc +depart +take +hard +linest +cheat +welcom +tocommun +design +algorithm +datastructur +butther +share +code +also +expect +learn +understand +obei +thecomput +system +policiesgovern +comput +account +helpif +problem +cours +work +program +pleas +know +earli +semest +possibl +offic +hour +policiesif +need +help +debug +program +best +help +tovisit +thec +offic +hour +take +along +currenthard +copi +program +offic +hour +intend +time +explain +conceptsthat +present +class +still +confus +answer +specif +question +cours +materi +encourag +email +reliabl +contact +problem +read +respond +emailsever +time +daili +almost +everi +week +program +gradingprogram +grade +follow +criteria +correct +program +behav +correctli +normal +typicalinput +program +behav +state +projectspecif +clariti +program +easi +read +understand +note +style +informationabout +clariti +robust +correct +behavior +extrem +unusu +situat +program +handl +situat +reason +andlog +manner +simpli +blow +qualiti +test +data +test +data +program +shoulddemonstr +facet +program +capabl +includingunusu +case +effici +avoid +unnecessarili +ineffici +algorithm +construct +howev +effici +never +pursu +expens +clariti +modular +program +modular +make +effect +useof +paramet +complet +incorpor +inform +program +need +sort +extra +paper +document +gener +program +gener +possibl +subject +considerationof +effici +clariti +avoid +arbitrari +limit +bound +size +orcomplex +input +whenev +possibl +limit +necessari +express +definedconst +near +program +easili +chang +numer +liter +appear +program +thosevalu +like +chang +styleus +meaning +identifi +name +consist +name +scheme +identifi +name +suggest +convent +follow +variable_nam +function_nam +argument +const +defined_const +enum +enumtyp +valu +valu +class +classnam +multipl +statement +singl +line +skip +line +function +group +code +clear +consist +indent +style +dewitt +notesfor +suggest +style +indent +continu +statement +loop +line +label +meaningfulli +done +extern +documentationthi +includ +long +comment +begin +yourprogram +address +typic +user +someonewho +want +know +superfici +program +work +includ +full +name +student +begin +comment +give +gener +descript +program +tell +program +call +format +data +give +limit +bug +special +featur +assumpt +made +describ +neg +well +posit +aspect +program +includ +neg +assum +unawar +inform +includ +assign +problem +descriptionne +repeat +briefli +summar +first +point +statement +refer +user +assign +document +thensuffici +note +appli +problem +descript +intern +documentationther +four +main +type +intern +document +header +comment +header +function +class +major +data +structuresshould +describ +purpos +assumpt +paramet +main +outlin +algorithm +declar +comment +next +declar +variabl +data +membershould +provid +extra +inform +convei +identifi +sname +name +variabl +tell +much +possibl +withoutmak +long +addit +inform +suppli +comment +exampl +index +last +element +ad +stackyou +comment +explain +paramet +well +local +variabl +within +segment +code +tricki +opaqu +section +code +beavoid +sometim +necessari +case +commentcan +help +reader +understand +go +segment +code +comment +clarifi +level +outlineof +algorithm +us +unix +vimani +peopl +work +unix +thefirst +time +find +take +time +becomecomfort +particularli +true +youronli +previou +program +experi +pascal +us +macpasc +macintosh +strongli +urg +inth +time +earli +semest +becom +comfort +withunix +time +pain +time +wellspent +also +wish +attend +unix +tutori +held +room +comp +session +thefollow +dai +tbayou +want +pick +copi +program +develop +cycl +program +develop +cycl +unix +environ +edit +program +program +compil +program +wall +program +compil +error +continu +program +inputfil +outputfil +look +output +outputfil +outputfil +error +break +tire +continu +print +list +take +home +program +inputfil +outputfil +goto +home +debug +program +quit +done +turn +result +submiss +instruct +given +later +yanni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..30707856 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,519 @@ +introduct +data +structur +http +wisc +html +revis +fall +jame +larusinstructor +jame +laruslaru +wisc +comput +scienc +http +wisc +laru +laru +html +offic +hour +tuesdai +fridai +amcontentsteach +assistantstextlectur +informationelectron +mailth +languagegradingexamscours +scheduleassign +assign +assign +assign +program +assignmentscours +objectivesc +object +present +concept +data +structur +gener +wide +us +structur +detail +data +structur +fundament +build +block +comput +program +cours +abl +identifi +situat +data +structur +necessari +determin +requir +data +structur +select +appropri +data +structur +cover +cours +reiter +concept +structur +program +abstract +data +type +modular +principl +introduc +essenti +write +clear +correct +maintain +softwar +close +connect +abstract +data +type +data +structur +cours +place +strong +emphasi +appli +principl +program +exercis +teach +assistantswei +zhang +chin +tang +chin +teach +assist +forthi +cours +section +grade +homework +assignmentsand +happi +answer +question +theassign +aspect +cours +give +troubl +zhangoffic +compuer +sciencesoffic +hour +wednesdai +thursdai +sundai +offic +phone +email +address +weiz +wisc +chin +tang +chin +offic +comput +sciencesoffic +hour +mondai +tuesdai +fridai +amoffic +phone +email +address +cchin +wisc +home +page +http +wisc +cchin +cchin +html +textth +text +book +cours +data +abstract +problem +solv +wall +mirror +frank +carrano +isbn +well +written +littl +long +wind +text +cover +materi +cours +also +includ +background +separ +text +languag +necessari +lectur +often +alwai +follow +david +dewitt +lectur +note +fall +note +complet +simpl +lectur +note +fall +short +true +text +book +contain +littl +narr +text +exercis +us +note +basi +lectur +feel +free +skip +portion +cover +addit +materi +want +purchas +note +avail +doit +document +desk +dayton +street +entranc +comput +scienc +build +dayton +cours +first +experi +unix +need +inform +activ +account +log +creat +edit +manipul +file +compil +run +debug +program +handout +also +avail +doit +inform +desk +contain +crucial +inform +also +also +help +section +lectur +inform +tuesdai +thursdai +psycholog +mention +lectur +often +follow +dewitt +note +lectur +attend +strongli +recommend +regularli +present +materi +appear +textbook +lectur +note +us +program +assign +exam +needless +respons +materi +cover +lectur +exam +base +lectur +materi +read +assign +note +cours +assign +electron +maili +often +electron +mail +notifi +student +chang +assign +hint +program +assum +regularli +read +electron +mail +gradingther +even +exam +semest +final +exam +five +program +assign +exam +determin +final +grade +approxim +equal +weight +program +assign +count +languag +taught +us +program +languag +program +assign +must +written +know +section +skrentni +teach +section +cover +addit +data +structur +larg +complex +languag +unless +experi +program +even +difficult +languag +learn +book +also +anoth +page +inform +program +assign +gdbthere +also +page +describ +program +debugg +exam +exam +tuesdai +chemistri +exam +final +exam +wednesdai +decemb +place +cours +schedul +follow +rough +outlin +topic +cover +cours +detail +schedul +provid +later +topic +dewitt +note +introduct +administrationbas +stuff +lectur +function +lectur +pointer +lectur +record +dynam +storagelectur +list +lectur +binari +search +notat +advanc +listslectur +stackslectur +queueslectur +hashinglectur +even +exam +lectur +recursionlectur +treesbinari +tree +sort +searchlectur +treesgraphslectur +even +exam +sortinglectur +tbaassign +absolut +requir +grade +turn +index +card +follow +inform +name +login +nameyear +school +freshman +sophomor +previou +coursesprevi +program +experiencerec +photograph +pictur +birthdai +girl +scout +trip +summer +color +black +white +size +grade +given +without +photo +assign +first +program +assign +write +simpl +abstract +data +byte +fora +bound +integ +sequenc +text +assign +line +assign +second +program +assign +write +program +maintain +databaseof +score +tenni +tournament +text +assign +line +assign +second +program +assign +write +program +produc +aconcord +us +hash +tabl +text +assign +line diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..e397724a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,234 @@ +data +structur +lec +introduct +data +structureslectur +psychologylectur +psychologycours +inform +announc +read +assign +get +start +get +help +exam +program +assign +sampl +code +lectur +inform +cours +materi +comput +lab +home +announc +gener +announc +place +recent +announc +first +announc +problem +program +assign +found +locat +assign +page +last +makeup +exam +done +place +copi +solut +exam +reserv +kurt +wendt +librari +queue +sampl +code +onlin +sampl +page +stack +sampl +code +onlin +sampl +page +exam +topic +onlin +exam +page +list +sampl +code +onlin +sampl +page +handin +directori +creat +list +common +program +error +onlin +suggest +addit +welcom +either +vega +comput +lab +work +comput +inform +sampl +code +place +line +mondai +wednesdai +lectur +get +magic +number +error +sourc +file +must +abl +compil +otherwis +unusu +error +look +stale +page +forget +reload +page +page +updat +copi +browser +cach +becom +outdat +stale +attend +unix +tutori +need +attend +time +list +thur +thur +read +assign +futur +tree +chapter +lectur +queue +chapter +skip +simul +lectur +overload +oper +chapter +page +lectur +hash +tabl +chapter +page +lectur +stack +chapter +lectur +link +list +chapter +lectur +pointer +dynam +memori +alloc +chapter +page +lectur +sort +search +analysi +chapter +page +lectur +sort +algorithm +chapter +page +lectur +basic +recurs +search +algorithm +chapter +lectur +basic +chapter +page +skip +focu +lectur +basic +appendix +page +lectur +skrentni +skrentni +wisc +offic +comput +scienc +offic +hour +teach +assist +baicheng +billi +liao +bail +wisc +offic +comput +scienc +offic +hour +cheng +jiacheng +wisc +offic +comput +scienc +offic +hour +pmcopyright +copi +jame +skrentni +skrentni +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..073bcaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,82 @@ +introduct +theoret +comput +scienc +introduct +theoret +comput +scienc +fall +room +lectur +brian +cole +email +wisc +offic +offic +hour +mondai +fridai +teach +assist +david +sundaram +stukel +email +sundaram +wisc +offic +offic +hour +tuesdai +wednesdai +thursdai +text +introduct +languag +theori +comput +john +martin +north +dakota +state +univers +mcgraw +hill +isbn +tent +lectur +schedul +includ +exam +inform +lectur +clarif +assign +page +grade +polici +written +assign +term +examin +final +examin +archiv +mail +list +home +page +septemb +brian +cole +madison +comput +scienc +home +page +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..4dd446fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,158 @@ +also +math +stat +fall +also +math +stat +linear +programmingfal +schedul +lectur +mechan +engin +open +book +midterm +exam +time +date +thursdai +octob +locat +mechan +engin +open +book +final +exam +time +date +wednesdai +decemb +locat +instructor +olvi +mangasarian +offic +comp +stat +pphone +mail +olvi +wisc +offic +hour +wednesdai +fall +semest +teach +assist +offic +comp +stat +telephon +mail +wisc +offic +hour +textbook +linear +program +matlab +ferri +mangasarian +preliminari +version +doit +madison +syllabu +cours +overview +cours +inform +cours +inform +book +reserv +kurt +wendt +librari +matlab +setup +homework +septemb +homework +septemb +homework +septemb +homework +septemb +homework +octob +homework +octob +homework +octob +homework +octob +homework +novemb +homework +novemb +homework +decemb +homework +decemb +program +project +novemb +sampl +midterm +exam +march +solut +sampl +midterm +exam +march +midterm +exam +march +solut +midterm +exam +march +midterm +exam +octob +solut +midterm +exam +octob +sampl +final +exam +final +exam +solut +final +exam +mathemat +program +home +page +cours +relev +site +searchabl +bibliograph +databas +item +link +variou +site +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..0efc0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,87 @@ +fall +introduct +program +languag +compilersspr +stori +month +octob +schedul +lectur +tuth +comp +stat +recit +psycholog +instructor +susan +horwitz +offic +telephon +mail +horwitz +wisc +offic +hour +tuesdai +fridai +appoint +teach +assist +rahul +kapoor +offic +telephon +mail +rahul +wisc +offic +hour +mondai +wednesdai +appoint +text +reserv +wendt +librari +compil +principl +techniqu +tool +sethi +ullman +craft +compil +fischer +leblanc +check +regularli +gener +cours +inform +cours +overview +date +inform +assign +exam +grade +includ +late +polici +get +start +read +program +assign +homework +examin +lectur +note +us +program +tool +grade +email +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..455428a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,1005 @@ +introduct +oper +system +fall +introduct +oper +systemssect +fall +instructormarvin +solomon +offic +comput +sciencesoffic +hour +troffic +phone +email +address +solomon +wisc +tarob +mellencamp +offic +comput +sciencesoffic +hour +mwfoffic +phone +email +address +mellen +wisc +new +watch +space +latest +updat +answer +midterm +exam +summari +score +avail +detail +breakdown +grade +distributioni +also +avail +specif +forproject +avaiabl +date +project +move +thursdai +typograph +error +note +deadlock +avoid +correct +importantli +arraywa +call +place +other +call +place +popular +demand +midterm +exam +isavail +look +warn +take +exampl +larg +grain +salt +exam +long +time +courseus +differ +text +cover +topic +differ +order +semest +midterm +likelyb +quit +differ +time +place +midterm +exam +determin +room +comp +octob +specif +forproject +avaiabl +discuss +issu +presentedin +class +avail +summari +grade +project +avail +electron +hand +direct +forprogram +post +procedur +givefork +algorithm +theproject +specificationshould +contain +call +notifi +page +correct +show +sept +fix +bug +theproject +specif +minor +import +first +caus +introduct +paragraph +slightli +garbl +thank +jake +dawlei +carr +point +second +line +omit +sampl +code +algorithm +theprogram +detailssect +creat +threadschedul +start +threadschedul +sched +threadschedul +sched +start +specifi +correctli +later +section +threadschedul +detail +section +page +fix +thank +lipe +zhang +sept +test +data +file +project +avail +directori +public +srccontain +three +data +file +java +class +read +file +public +graph +javacontain +definit +classgraphdescrib +project +specif +file +public +petersoncycl +graphcontain +peterson +graph +shown +project +specif +mention +initi +placement +fork +notacycl +file +public +petersonacycl +graphcontain +peterson +graph +acycl +initi +placement +fork +file +public +star +graphcontain +star +topolog +central +philosoph +sharingfork +nine +other +sept +todd +jenner +point +typo +specif +project +fork +number +read +fork +number +maxthink +replac +maxeat +onlin +versionha +correct +thank +todd +sept +mistak +thejava +tutori +note +section +string +argument +version +string +substr +second +argumenti +offset +substr +number +charactersin +string +note +correct +thank +franco +tung +chan +point +sept +occasion +send +urgent +messag +directli +mail +listof +student +regist +cours +archiv +messag +sent +list +receiv +messag +think +sendmail +solomon +wisc +specif +forproject +avaiabl +receiv +request +makefil +java +sampl +makefil +public +makefil +copi +file +work +directori +java +sourc +file +rememb +separ +directori +project +edit +describ +comment +type +maketo +compil +program +make +class +compilewithout +run +sept +note +hand +assign +simul +preemptiv +multitask +solari +computershav +ad +sept +java +tutoriali +finish +finish +go +section +onthread +find +help +hint +structur +project +awar +weekli +seminar +oper +systemsand +network +meet +mondai +first +seminar +semest +mondai +checkth +colloquia +seminar +page +detail +sept +java +book +final +avail +theunivers +bookstor +sept +begin +ajava +tutori +avail +sept +java +depart +unix +workstat +must +creat +afil +name +cshrc +local +home +directori +contain +onelin +path +path +java +make +chang +take +effect +either +type +sourc +cshrc +localor +simpli +back +sept +specif +project +readi +sept +unix +orient +session +unix +user +schedul +forth +follow +time +tue +thur +sept +room +csmon +thur +sept +room +cslast +updat +content +new +summari +lectur +inform +text +project +grade +cours +schedul +lectur +note +summari +intend +gener +introduct +techniqu +usedto +implement +oper +system +relat +kind +system +softwar +among +topic +cover +beprocess +manag +creation +synchron +commun +processor +schedul +deadlock +prevent +avoid +recoveri +main +memori +manag +virtual +memori +manag +swap +page +segment +page +replacementalgorithm +control +disk +input +output +devic +file +system +structur +implement +protect +secur +lectur +inform +lectur +tuesdai +thursdai +comput +sciencesand +statisticsdiscuss +wednesdai +psychologyth +discuss +section +option +least +import +lectur +primari +focu +wednesdai +meet +topic +relat +theproject +includ +introduct +thejavaprogram +languag +time +also +avail +answer +anyquest +regard +point +rais +lectur +thetext +text +requir +modern +oper +systemsbi +andrew +tanenbaum +prentic +hall +strongli +recommend +java +program +languagebi +arnold +jame +gosl +addison +weslei +onlin +refer +lot +addit +help +materi +java +avail +follow +refer +collect +local +fast +access +java +tutorialth +java +languag +specificationjava +documentationwatch +spot +addit +link +project +five +program +project +thejavaprogram +languag +sparcstat +workstat +run +solari +dialect +unixoper +system +provid +anycomput +access +implement +java +programminglanguag +howev +comput +scienc +depart +comput +respons +transfer +requireddata +set +softwar +packag +comput +first +assign +easi +acquaint +exercis +designedto +help +becom +familiar +comput +environ +thejava +languag +subsequ +project +involveprocess +synchron +processor +schedul +disk +schedul +file +system +implement +first +project +student +requir +work +pair +member +pair +receiv +grade +project +feel +free +discuss +project +anyon +butyou +must +share +code +anyon +partner +cheat +vigor +punish +enough +said +assign +begin +class +dateind +entir +semest +havethre +late +daysof +credit +late +dai +differ +assign +eachof +three +assign +three +dai +assign +late +dai +us +last +assign +java +student +take +cours +familiar +java +choos +java +sever +argument +favor +java +congeni +program +environ +runtim +error +subscript +null +pointer +uniniti +variabl +caus +except +caught +languag +runtimerath +mysteri +crash +random +behavior +java +string +much +easier +char +arrai +garbag +collect +storag +manag +extrem +handi +java +trendi +java +caught +faster +languag +histori +mani +reason +java +grow +popular +littl +withth +cours +discuss +issu +class +byproduct +coursewil +knowledg +java +becom +quit +market +commod +java +oper +system +featur +built +particular +first +wide +us +program +languag +withlanguag +level +support +concurr +thread +synchron +monitor +hand +switch +program +languag +alwaysa +disloc +fortun +excel +resourc +avail +eas +thetransit +java +program +languagebi +arnold +gosl +amazingli +good +neither +introductori +program +primer +author +assum +youalreadi +know +program +refer +manual +although +arefer +manuali +avail +onlin +readabl +introduct +languag +take +wayfrom +get +start +everyth +need +write +quit +sophisticatedprogram +java +book +avail +univers +bookstor +strongli +encourag +also +gather +varieti +ofoth +resourc +togeth +includ +niceonlin +tutorialabout +java +program +anda +refer +manualfor +standard +class +librari +us +grade +midterm +final +exam +count +grade +midterm +even +wednesdai +octob +room +comput +scienc +statist +final +schedul +timet +tuesdai +decemb +first +program +project +get +start +count +yourgrad +remain +four +project +count +cours +schedul +follow +schedul +tent +updat +later +semest +check +back +frequent +sept +introduct +chapter +sept +process +synchron +processor +schedul +chapter +sept +project +learn +javaoct +project +synchronizationoct +memori +manag +virtual +memori +chapter +project +schedulingoct +midterm +exam +room +comp +devic +file +system +chapter +project +disk +schedulingdec +protect +secur +section +project +file +systemsdec +final +exam +lectur +note +introduct +histori +bottom +view +view +cours +outlin +java +programm +process +synchron +us +process +process +process +creat +process +process +state +synchron +race +condit +semaphor +bound +buffer +problem +dine +philosoph +monitor +messag +deadlock +terminolog +deadlock +detect +deadlock +recoveri +deadlock +prevent +deadlock +avoid +implement +process +implement +monitor +implement +semaphor +implement +critic +section +short +term +schedul +memori +manag +alloc +main +memori +algorithm +memori +manag +compact +garbag +collect +swap +page +disk +come +solomon +wisc +eduthu +copyright +marvin +solomon +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..84a8914b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,107 @@ +fall +home +pagec +introduct +oper +systemsfal +tuesdai +thursdai +discuss +fridai +host +maryvernon +instructor +andkarunamuthiah +welcom +home +page +note +thursdai +lectur +fridai +discuss +beinterchang +follow +date +solut +quiz +assign +offic +hour +email +textbook +read +grade +project +quizz +mail +archiveapproxim +schedul +topicsweek +oftopicsreadingsep +introduct +concurr +thread +address +space +processeschapt +thread +manag +cooper +threadschapt +synchron +implement +mutual +exclusioncont +semaphorescont +monitor +concurr +summarycont +doct +deadlock +process +schedulingchapt +memori +manag +protect +address +translat +cach +tlbschapter +demand +page +virtual +memorycont +review +survei +systemschapt +file +system +name +directorieschapt +protect +java +object +core +methodstbanov +java +thread +secur +thanksgiv +class +network +distribut +system +remot +procedur +call +chapter +distribut +file +system +global +memori +system +reviewchapt +vernon +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..977eeee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,128 @@ +lectur +home +page +fall +fall +cours +inform +instructor +offic +hour +offic +hour +handout +assign +exam +grade +instructor +david +wood +offic +hour +tuesdai +wednesdai +appoint +mail +david +wisc +phone +class +time +tuesdai +thursdai +locat +phil +atkinson +offic +hour +tuesdai +thursdai +appoint +mail +atkinson +wisc +phone +get +start +help +inform +handout +cours +descript +get +start +mentor +error +check +correct +sampl +vhdl +code +compil +simul +vhdl +mentorassign +assign +answer +question +assign +assign +select +answer +assign +assign +select +answer +assign +assign +select +answer +assign +assign +assign +projectthi +section +includ +inform +cours +project +project +descript +project +deadlin +project +report +project +demonstr +time +decemb +examsth +midterm +exam +wednesdai +room +final +exam +tuesdai +room +exam +previou +spring +midterm +fall +midterm +spring +midterm +spring +midterm +spring +midterm +solut +fall +midterm +solut +spring +midterm +solut +spring +endterm diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..7bfa728c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,144 @@ +databas +manag +system +design +implementationc +databas +manag +system +design +implementationcours +inform +postscript +version +class +fridai +instead +offic +hour +time +assign +chang +fridai +assign +text +last +updat +assign +handout +postscript +class +mail +list +solut +chapter +exercis +pleas +dont +print +solut +chapter +exercis +postscript +first +inform +overview +prerequisit +offic +hour +topic +cover +grade +import +date +import +polici +issu +minibas +home +page +check +detail +assign +assign +assign +handout +postscript +assign +html +last +updat +assign +handout +postscript +assign +html +last +updat +assign +handout +postscript +last +year +midterm +sampl +postscript +last +year +midterm +postscript +us +sybas +info +sybas +info +help +yahoo +entri +resourc +tutori +info +tree +debugg +info +tree +languag +construct +assign +handout +grade +experi +assignmentoth +handout +code +convent +instructor +raghu +ramakrishnan +offic +phone +mail +raghu +offic +hour +lectur +discuss +lectur +time +place +ingraham +teach +assist +xuemei +offic +phone +mail +xbao +offic +hour +tue +thur +last +modifi +sept +xbao diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..a8707f59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,67 @@ +lectur +home +page +welcom +home +page +page +obvious +construct +semest +progress +addinginform +need +know +page +import +thing +know +class +meetingroom +chang +current +meet +russel +labsfor +lectur +option +discuss +fridai +beenmov +still +psycholog +instructor +jeff +naughton +offic +wednesdai +lectur +discuss +lectur +time +place +russel +lab +discuss +option +time +place +psycholog +inform +lectur +taught +close +cooper +lectur +fact +assign +probabl +exam +inform +gener +minibas +assign +particular +pleas +lectur +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..147f4c5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,58 @@ +introduct +algorithm +introduct +algorithm +cours +inform +instructor +eric +bach +offic +phone +mail +bach +wisc +hour +appt +teach +assist +bill +donaldson +offic +phone +mail +wisc +hour +teach +assist +raji +gopalakrishnan +offic +phone +mail +raji +wisc +hour +midterm +exam +cours +handout +cours +descript +syllabu +book +reserv +cours +organ +homework +homework +homework +solut +homework +homework +graph +fractal +behaviour +homework +mail +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..2d9fd97a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,190 @@ +comput +network +cours +professor +landweb +comput +network +cours +introduct +comput +network +advanc +comput +networksintroduct +comput +network +tabl +content +intern +connect +network +cours +offer +cours +inform +instructor +teach +assist +cours +syllabu +mail +archiv +assign +program +refer +select +readingsclick +hereto +latest +text +version +networkingcours +madisoncours +informationlecturetim +mwfplace +comp +statclass +email +listinstructor +lawrenc +landweberoffic +comp +statphon +email +wisc +eduoffic +hour +teach +assist +srinivasa +narayananoffic +phone +email +wisc +eduoffic +hour +mondai +wednesdai +time +conveni +feel +free +email +wisc +appoint +teach +assist +teitelbaumoffic +phone +naemail +wisc +eduoffic +hour +tuesdai +thursdai +time +conveni +feel +free +email +wisc +appoint +fall +cours +syllabu +fall +mail +archiv +moder +mail +archiv +complet +assign +fall +program +assign +error +warn +code +class +project +implement +network +layer +reliabl +adapt +layer +handout +postscript +project +overview +slide +postscript +powerpoint +softwar +engin +slide +postscript +powerpoint +design +document +evalu +form +postscript +html +version +pictur +project +slide +document +grade +criteria +gradingmidterm +exam +final +exam +assign +term +project +prior +midterm +fall +midterm +fall +midterm +option +refer +book +project +unix +network +program +steven +richard +prentic +hall +isbn +program +refer +socket +interfac +socket +interfac +lectur +garbler +packag +annot +bibliographyread +partial +icmp +ospf +ipng +advanc +comput +network +lectur +schedul +spring +review +form diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..a2d0a918 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,30 @@ +home +page +fall +fall +instructor +robert +meyer +wisc +time +place +comp +offic +hour +cours +descript +homework +homework +solut +note +homework +homework +solut +note +comput +project +part +comput +project +part +option diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..f4823c68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +program +theori +applicationsfal +schedul +lectur +cours +mail +list +wisc +cours +http +wisc +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +tuesdai +wednesdai +teach +assist +offic +telephon +mail +wisc +offic +hour +wednesdai +thursdai +class +text +nonlinear +program +olvi +mangasarian +siam +publish +philadelphia +us +text +nonlinear +program +theori +algorithm +bazaraa +sherali +shetti +second +edit +wilei +york +nonlinear +program +bertseka +athena +scientif +gener +cours +inform +cours +overview +introduct +linear +inequ +theorem +altern +convex +set +convex +concav +function +saddlepoint +optim +criteria +without +differenti +differenti +convex +concav +function +first +order +optim +criteria +differenti +second +order +optim +criteria +differenti +dualiti +nonlinear +program +gener +convex +function +optim +condit +exact +penalti +augment +lagrangian +gradient +project +book +reserv +kurt +wendt +librari +assign +grade +homework +assign +grade +assign +week +midterm +examin +novemb +grade +final +examin +grade +homework +assign +homework +homework +homework +homework +mathemat +program +home +page +cours +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..f4823c68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,149 @@ +fall +also +math +stat +nonlinear +program +theori +applicationsfal +schedul +lectur +cours +mail +list +wisc +cours +http +wisc +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +tuesdai +wednesdai +teach +assist +offic +telephon +mail +wisc +offic +hour +wednesdai +thursdai +class +text +nonlinear +program +olvi +mangasarian +siam +publish +philadelphia +us +text +nonlinear +program +theori +algorithm +bazaraa +sherali +shetti +second +edit +wilei +york +nonlinear +program +bertseka +athena +scientif +gener +cours +inform +cours +overview +introduct +linear +inequ +theorem +altern +convex +set +convex +concav +function +saddlepoint +optim +criteria +without +differenti +differenti +convex +concav +function +first +order +optim +criteria +differenti +second +order +optim +criteria +differenti +dualiti +nonlinear +program +gener +convex +function +optim +condit +exact +penalti +augment +lagrangian +gradient +project +book +reserv +kurt +wendt +librari +assign +grade +homework +assign +grade +assign +week +midterm +examin +novemb +grade +final +examin +grade +homework +assign +homework +homework +homework +homework +mathemat +program +home +page +cours +page +updat +period +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..932e3407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,86 @@ +home +page +comput +system +perform +evalu +model +new +sept +assign +postscript +text +sept +mimic +librari +avail +public +mimic +cours +inform +lectur +comput +scienc +devis +softwar +home +page +html +user +manual +postscript +pleas +print +file +contain +mani +imag +take +least +half +hour +initi +instruct +text +mimic +softwar +tutori +html +postscript +onlin +help +html +qnet +exampl +devc +html +professor +miron +livni +offic +comput +scienc +hour +phone +mail +miron +wisc +teach +assist +chee +yong +chan +offic +comput +scienc +hour +phone +mail +cychan +wisc +suggest +comment +pleas +send +cychan +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..65ee6d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,91 @@ +find +uwisc +find +inform +retriev +technolog +seek +knowledgerichard +belewvisit +professorc +lectur +univ +wisconsin +comput +scienc +departmentfal +thur +acal +room +engrthi +cours +design +student +interest +understand +aboutth +inform +retriev +knowledg +represent +machinelearn +techniqu +underli +much +excit +activ +occur +onth +world +wide +complet +descript +coures +canse +abstract +asyllabu +major +topic +consid +graphic +mapof +thesear +relat +anda +tent +schedul +semesterwil +proce +cours +resourc +read +overview +part +postscript +overview +part +postscript +polit +infidel +imag +postscript +assign +class +email +digest +hypermai +suggest +compos +email +classrel +resourc +class +minut +taken +student +student +last +modifi +belew +wisc +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..61f70813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,195 @@ +introduct +numer +method +last +chang +introduct +numer +methodsthi +page +contain +inform +fall +cours +smile +tent +syllabu +order +score +orderli +mind +addit +materi +chang +note +cooper +assignmentson +assign +page +order +total +score +midterm +chang +assign +note +chang +date +well +slightli +chang +point +problem +diari +class +addit +materi +residu +error +condit +chang +rick +carl +offic +hour +list +errata +text +chang +diari +class +diari +class +chang +us +email +concern +problem +updat +sinc +question +comput +complex +numericalanalysi +algorithm +post +preprint +foremostmathematician +todai +subject +interest +trickytop +least +squar +solut +approxim +time +place +also +textmai +supplement +byaddit +materi +file +mention +book +areavail +organ +chapter +awar +though +site +mention +book +well +name +begin +capitallett +sometim +name +book +begin +lower +caselett +matlab +diari +class +session +present +plan +comput +assign +matlab +rather +fortran +kermit +sigmon +matlab +primer +edit +avail +doit +look +handout +student +reaction +student +edit +matlab +access +matlab +telnet +access +matlab +telnet +winor +machin +cours +overviewcours +syllabu +tent +assign +also +answer +list +word +grade +look +last +four +digit +student +email +concern +current +question +conduct +unix +orient +session +user +andp +relat +linksyou +might +wish +explor +csdepart +home +page +comput +system +frequent +ask +question +list +simpl +tutori +avail +well +advanc +referenceviva +also +good +introduct +unix diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..3883b051 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,61 @@ +numer +function +analysi +last +chang +numer +function +analysisthi +page +contain +inform +fall +version +math +current +class +note +avail +follow +directori +well +hard +copi +doit +class +recent +announc +post +grade +time +locat +statlectur +carl +boor +email +deboor +wisc +offic +hour +stat +line +classnot +viii +index +assign +none +email +concern +homework +cours +relat +question +relat +linksyou +might +wish +explor +depart +home +page +courseoff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..89c18b9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,15 @@ +approxim +theori +last +chang +approxim +theorythi +page +contain +inform +spring +version +math +cours +note +cours diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..56a70d12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,93 @@ +home +page +toni +silva +sectioncsm +instructor +toni +silva +contact +email +dsilva +wisc +offic +comput +scienc +statist +phone +offic +hour +appoint +textbookproblem +solv +object +program +walter +savitch +section +inform +sept +comp +stat +firstdai +noland +specifi +timet +sept +chamberlin +comput +comput +scienc +statist +cours +inform +handout +tent +syllabu +semest +late +polici +grade +criteria +academ +misconduct +handout +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +text +assign +program +tuesdai +program +tuesdai +program +tuesdai +program +thursdai +program +thursdai +solut +quizz +solut +quiz +solut +quiz +last +modifi +anthoni +silva +dsilva +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..eead663b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,853 @@ +comput +visionc +comput +visionfal +instructor +chuck +dyeroffic +csstelephon +email +dyer +wisc +eduoffic +hour +mondai +thursdai +appointmentteach +assist +bryan +sooffic +csstelephon +email +wisc +eduoffic +hour +wednesdai +fridai +appointmentstud +gener +cours +informationfundament +comput +vision +first +introduct +level +imag +analysi +method +includ +imag +format +edg +detect +featuredetect +segment +principl +defin +modul +forreconstruct +three +dimension +scene +inform +usingtechniqu +asshap +shade +depth +stereo +activ +method +scene +recoveri +depth +focu +andocclud +contour +detect +viewpoint +control +motion +detect +analysi +includ +track +model +base +three +dimension +object +recognit +schedul +tuesdai +thursdai +prerequisit +fundament +calculu +probabl +theori +linear +algebra +grade +midterm +exam +thursdai +novemb +homework +assign +project +class +particip +syllabu +requir +read +select +part +machin +vision +jain +kasturi +schunck +mcgraw +hill +york +collect +readingsfrom +journal +confer +proceed +sold +doit +document +small +batchessupplementari +read +sourcesonlin +informationmost +cours +inform +avail +onlin +urlhttp +wisc +dyer +html +read +assign +date +chapter +paper +doit +chapter +chapter +chapter +paper +doit +avail +handout +chapter +except +chapter +paper +doit +avail +handout +paper +doit +avail +handout +chapter +chapter +primarili +student +score +grade +homework +assignmentshomework +imag +enhanc +histogram +modif +option +make +copi +portrait +imag +public +imag +contrast +enhanc +face +byfirst +rotat +imag +crop +window +around +head +shoulder +final +interact +adjust +theintens +modif +function +color +editor +window +thewindow +button +also +free +modifi +thing +colorif +wish +found +good +grayscal +transformationsav +result +color +imag +andput +directori +whereth +origin +imag +send +email +tell +qualit +whatintens +transform +appli +improv +qualityof +imag +overal +imag +photo +board +student +class +feel +free +imag +ownweb +home +page +well +homework +skeleton +octob +learn +get +start +vista +vision +softwar +read +introduct +vista +program +manual +avail +doit +document +correct +origin +assign +method +chang +condit +least +instead +least +prevent +type +shape +disappear +altogeth +method +condit +also +count +transit +case +method +matrix +citi +block +distanc +infin +larg +constant +four +corner +method +matrix +chessboard +distanc +center +posit +test +imag +us +vision +imag +doit +vision +imag +hand +evalu +thin +result +might +want +follow +addit +experi +us +output +least +test +convert +skeleton +imag +ubyt +format +us +vconvert +edit +need +emac +clean +header +imag +file +contain +follow +line +right +repn +ubyt +line +component_interp +gradient +low_threshold +high_threshold +vlink +file +vsegedg +us +result +imag +hand +exampl +well +approach +might +us +determin +direct +index +finger +point +applic +note +thin +method +fact +delet +entir +shape +block +surround +disappear +thin +algorithm +base +paper +zhang +suen +fast +parallel +algorithm +thin +digit +pattern +comm +wang +comment +fast +parallel +algorithm +thin +digit +pattern +comm +comparison +student +result +differ +method +appli +point +hand +imag +rotat +version +test +imag +homework +imag +spline +mosaic +octob +read +paper +adelson +pyramid +method +imag +process +engin +burt +adelson +laplacian +pyramid +compact +imag +code +ieee +tran +comm +burt +adelson +multiresolut +spline +applic +imag +mosaic +tran +graphic +hint +faq +spline +imag +produc +student +class +homework +segment +snake +novemb +read +paper +kass +witkin +terzopoulo +snake +activ +contour +model +comput +vision +william +shah +fast +algorithm +activ +contour +curvatur +estim +comput +vision +graphic +imag +process +imag +understand +hint +faq +homework +project +decemb +student +project +titl +abstract +supplementari +read +addit +paper +might +help +select +topic +student +project +done +stanford +vision +cours +comput +account +account +cours +account +sparcstat +call +room +account +larg +disk +space +quota +store +imag +homework +project +sure +delet +imag +compress +other +gzip +howev +order +save +space +email +email +sent +list +goe +everyon +class +includ +instructor +printer +print +imag +laserprint +laser +laser +locat +room +altern +gener +printer +name +laser +send +output +four +printer +shortest +queue +caution +send +imag +printer +sure +check +queue +job +print +manner +send +imag +print +take +long +print +consider +vision +softwar +vista +vista +program +environ +us +homework +assign +code +locat +directori +vision +tool +vista +page +vision +tool +vista +execut +vision +tool +vista +interact +imag +displai +program +window +system +us +displai +imag +varieti +format +imgstar +basic +imag +process +oper +invok +us +unix +like +command +line +code +execut +manual +vision +tool +imgstar +khoro +khoro +imag +process +softwar +develop +environ +provid +basic +imag +process +modul +graphic +program +languag +interfac +rapid +prototyp +simpl +imag +process +algorithm +code +locat +directori +vision +tool +khoro +vision +tool +khoro +cantata +execut +start +interact +environ +netpbm +toolkit +convers +imag +larg +varieti +differ +format +base +pbmplu +packag +page +vision +tool +execut +vision +tool +matlab +matlab +numer +comput +visual +environ +signal +process +imag +process +toolbox +especi +relev +test +imag +test +imag +directori +vision +imag +although +requir +format +convers +us +imag +public +imag +numer +imag +databas +also +access +exampl +collect +test +imag +examin +examin +solut +exam +held +thursdai +novemb +regular +classroom +note +earli +start +time +exam +cover +topic +shape +shade +includ +read +textbook +paper +sold +doit +homework +assign +bring +exam +sheet +paper +note +want +side +exam +focu +main +idea +algorithm +proof +exam +type +question +ask +exam +exam +spring +exam +spring +exam +spring +exam +spring +link +interest +comput +vision +home +page +highli +recommend +chuck +dyer +link +interest +wandel +list +us +number +vision +scienc +hdtv +grand +allianc +hdtv +system +specif +advanc +televis +system +committe +atsc +atsc +document +postscript +spie +optic +scienc +engin +librari +vision +demo +project +appl +quicktim +imag +mosaic +product +panoramix +imag +mosaic +exampl +panoram +imag +mosaic +decfac +talk +synthet +face +video +rate +stereo +machin +virtual +realiti +project +qbic +imag +databas +project +miscellan +comput +vision +demo +comput +vision +relat +cours +boston +univers +cardiff +univers +khoro +digit +imag +process +onlin +cours +royal +institut +sweden +stanford +univers +univers +virginia +univers +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..1d871bde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,77 @@ +fall +section +fall +section +algebra +languag +program +instructor +dave +zimmermannemail +dzimm +wisc +educlass +meet +time +place +nolandoffic +offic +phone +offic +hour +announcementsprogram +wednesdai +octob +program +readi +fridai +novemb +gener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuswork +homeclass +handoutsprogramsexam +quizzeslectur +notesgreg +sharp +style +guidegrad +referenc +last +digit +number +quizzesprogramsexam +polici +informationemail +policygrad +policyl +policyacadem +misconduct +policytext +problem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +dave +zimmermann +dzimm +wisc +base +greg +sharp +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..7104cbb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,42 @@ +linear +program +method +linear +program +method +gener +cours +inform +cours +offer +fall +spring +semest +page +variou +instructor +michael +ferri +spring +mangasarian +fall +graduat +cours +wisconsin +network +flow +integ +program +nonlinear +program +theori +nonlinear +program +algorithm +comput +larg +spars +system +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..1ac5241e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,197 @@ +spring +network +flowsspr +schedul +lectur +cours +mail +list +wisc +class +fridai +februari +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +wednesdai +teach +assist +offic +telephon +mail +leei +wisc +offic +hour +tuesdai +thursdai +requir +text +network +flow +ravindra +ahuja +thoma +magnanti +jame +orlin +prentic +hall +us +text +linear +program +chvatal +freeman +linear +network +optim +bertseka +press +gener +cours +inform +cours +overview +path +tree +cycl +data +structur +shortest +path +flow +cost +network +simplex +method +gener +flow +convex +equilibria +lagrangian +relax +multicommod +flow +applic +prerequisit +knowledg +linear +program +grade +homework +assign +grade +project +assign +grade +wednesdai +class +final +examin +grade +mondai +close +book +except +sheet +paper +allow +repres +question +assign +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +mondai +april +homework +mondai +april +homework +fridai +april +homework +fridai +comput +inform +unix +orient +session +first +time +unix +user +novic +unix +user +previous +us +unix +workstat +held +mondai +thursdai +first +week +class +room +mondai +thursdai +second +week +class +room +orient +session +last +minut +introduct +unix +login +access +gam +public +cshrc +local +cshrc +local +sourc +cshrc +local +alter +path +set +gam +directori +appropri +solari +machin +cours +machin +mathemat +program +home +page +cours +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..72998a6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,207 @@ +spring +also +math +comput +method +larg +spars +systemsspr +schedul +lectur +cours +mail +list +wisc +class +fridai +februari +instructor +michael +ferri +offic +telephon +mail +ferri +wisc +offic +hour +mondai +wednesdai +teach +assist +offic +telephon +mail +leei +wisc +offic +hour +tuesdai +thursdai +recommend +textbook +matrix +comput +golub +loan +john +hopkinsunivers +press +second +edit +direct +method +spars +matric +duff +erisman +reid +oxford +scienc +public +finit +dimension +vector +space +halmo +springer +verlag +gener +cours +inform +cours +overview +introduct +storag +scheme +gaussian +elimin +dens +error +analysi +spars +local +pivot +strategi +matrix +modif +iter +linear +solver +spars +least +squar +spars +nonlinear +equat +optim +applic +parallel +techniqu +eigenvalu +eigenvector +prerequisit +math +consent +instructor +grade +homework +assign +grade +project +assign +grade +wednesdai +class +final +examin +grade +mondai +close +book +except +sheet +paper +allow +repres +question +assign +homework +mondai +februari +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +fridai +march +homework +mondai +april +homework +mondai +april +homework +fridai +april +homework +fridai +handout +ieee +arithmet +handout +spars +handout +comput +inform +cours +machin +unix +orient +session +first +time +unix +user +novic +unix +user +previous +us +unix +workstat +held +mondai +thursdai +first +week +class +room +mondai +thursdai +second +week +class +room +orient +session +last +minut +introduct +unix +instruct +matlab +mathemat +program +home +page +cours +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..3ef1d088 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,82 @@ +fall +construct +compilersfal +schedul +lectur +tuth +csst +instructor +charl +fischer +offic +telephon +mail +fischer +wisc +offic +hour +mondai +wednesdai +fridai +appoint +teach +assist +krishna +kunchithapadam +offic +telephon +mail +krisna +wisc +offic +hour +tuesdai +thursdai +appoint +program +assign +homework +read +teach +assist +weyer +offic +telephon +mail +weyer +wisc +offic +hour +mondai +wednesdai +fridai +class +text +craft +compil +charl +fischer +richard +leblanc +benjamin +cum +check +regularli +gener +cours +inform +cours +overview +date +grade +examin +get +start +handout +lectur +note +us +program +tool +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..d7b2e584 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,67 @@ +fall +section +fall +section +algebra +languag +program +instructor +greg +sharpemail +greg +wisc +eduoffic +offic +phone +offic +hour +appt +grader +krishna +kunchithapadamemail +krisna +wisc +edugener +cours +informationc +home +pagecours +objectivesvectra +labc +consultantssyllabuscours +difficultywork +homenewsstartup +informationclass +noteshomeworkexam +quizzesstyl +guideemail +archivepolici +informationemail +policygrad +policyl +policyacadem +misconduct +polici +must +read +textproblem +solv +object +porgrammingwalt +savitchaddison +weslei +publish +compani +pleas +list +known +erratalast +modifi +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..018ed98d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,245 @@ +class +home +page +fall +algebra +languag +program +section +fall +chad +lane +wisc +announc +program +line +tue +import +need +version +tribbl +compil +problem +enumer +type +recogn +outsid +class +must +declar +insid +public +section +class +result +privat +section +need +bump +privat +section +bottom +shown +version +also +note +definit +randomintinrang +defin +line +need +correspond +function +bodi +tribbl +call +within +class +work +file +prog +forgot +chang +enumer +type +valu +uppercas +need +compil +everyth +chang +line +copi +prog +want +overwrit +obsolet +copi +also +us +includ +prog +chang +prog +might +also +want +check +help +link +someth +ad +comment +suggest +program +import +name +throughout +program +project +part +name +project +directori +name +file +name +updat +program +descript +consist +prog +version +us +prog +pleas +make +sure +consist +name +discrep +sampl +chri +weaver +public +directori +call +prog +shouldn +matter +program +sampl +program +line +readi +crucial +read +entir +assign +understand +class +basic +attempt +earli +start +hard +requir +time +piec +everyth +togeth +bring +question +class +tuesdai +midterm +grade +freshmen +either +mean +fine +mean +great +thumb +grade +mean +noth +freshman +disregard +stuff +class +tent +semest +syllabu +read +assign +program +assign +handout +prepar +quizz +test +quizz +test +solutionscours +inform +polici +text +problem +solv +object +program +walter +savitch +addison +weslei +publish +compani +meet +vleck +polici +administr +inform +grade +polici +late +polici +mail +inform +attend +polici +academ +misconduct +link +inform +introduct +microsoft +window +first +introduct +borland +second +home +page +vectra +sourc +code +text +consult +extra +refer +materi +mani +question +answer +work +home +sharp +lectur +note +sharp +style +guidelast +modifi +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..0efc0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,87 @@ +fall +introduct +program +languag +compilersspr +stori +month +octob +schedul +lectur +tuth +comp +stat +recit +psycholog +instructor +susan +horwitz +offic +telephon +mail +horwitz +wisc +offic +hour +tuesdai +fridai +appoint +teach +assist +rahul +kapoor +offic +telephon +mail +rahul +wisc +offic +hour +mondai +wednesdai +appoint +text +reserv +wendt +librari +compil +principl +techniqu +tool +sethi +ullman +craft +compil +fischer +leblanc +check +regularli +gener +cours +inform +cours +overview +date +inform +assign +exam +grade +includ +late +polici +get +start +read +program +assign +homework +examin +lectur +note +us +program +tool +grade +email +link +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..0eb7b07f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,70 @@ +home +page +hummert +sectionsc +instructor +hummert +contact +email +hummert +wisc +offic +comput +scienc +statist +offic +phone +home +phone +offic +hour +mondai +thursdai +announc +textbookproblem +solv +object +program +walter +savitch +section +inform +psych +psych +grade +comput +comput +scienc +statist +cours +inform +handout +tent +syllabu +semest +late +polici +grade +criteria +academ +misconduct +viewgraph +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +text +assign +program +program diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..30a8eff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,53 @@ +infoc +info +section +name +bodnersect +offic +hour +mondai +thursdai +offic +number +comput +scienc +statist +hall +doit +phone +mail +jonb +wisc +eduher +thing +keep +mind +need +copi +guid +section +click +choos +print +file +menu +click +question +pleas +stop +offic +hour +send +mail +grade +section +avail +click +bodner +jonb +wisc +mound +madison +last +modifi +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..87d83305 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,224 @@ +intro +kunen +section +introduct +artifici +intellig +notic +inform +spring +detail +coursewil +appear +later +instructor +kunenoffic +stat +buildingtelephon +email +kunen +wisc +eduoffic +hour +appoint +email +grade +four +program +assign +count +three +exam +count +thirdexam +schedul +time +place +final +program +turn +time +midnight +thedai +late +assign +loos +late +topic +cover +follow +order +topic +entir +logic +buti +design +topic +program +assignmenti +discuss +program +introduct +lisp +program +search +game +plai +program +introduct +prolog +natur +languag +understand +program +learn +neural +network +program +logic +deduct +plan +reason +uncertain +knowledg +lisp +inform +sinc +lisp +us +program +coursewil +begin +discuss +common +lisp +would +probabl +usefulto +lisp +refer +avail +supplement +lecturesand +line +help +avail +within +lisp +manypaperback +avail +probabl +like +common +lispcraft +wilenski +anoth +possibl +ansi +common +lisp +book +graham +code +us +book +line +ultim +lisp +refer +steel +common +lisp +languag +edit +page +also +avail +line +click +inform +us +common +lisp +sun +addit +inform +textbook +artifici +intellig +modern +approach +russel +norvig +class +time +recit +session +engr +psych +essentiallli +materi +present +answer +question +give +hint +program +assign +review +exam +usual +last +minut +sinc +teach +section +attend +recit +section +differ +lectur +section +cours +directori +cours +kunen +public +alpha +beta +problem +previou +exam +cours +directori +alpha_beta +click +line +best +first +search +problem +previou +exam +cours +directori +astar +click +line +exam +fall +postscript +exam +exam +exam +final +still +older +exam +cours +directori +last +chang +novemb +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..ac43ba9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,168 @@ +fall +advanc +comput +architectur +ifal +offer +cours +inform +instructor +mark +hilloffic +comp +statemail +markhil +wisc +eduoffic +hour +tuesdai +fridai +appoint +shenoffic +comp +statphon +email +mshen +wisc +eduoffic +hour +mondai +thursdai +appoint +tabl +content +reader +lectur +note +homework +project +miscellaneawhat +give +talksread +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +reader +tabl +content +full +paper +doit +lectur +note +introduct +chapter +perform +cost +chapter +instruct +set +chapter +cach +chapter +part +memori +chapter +part +talluri +hill +basic +pipelin +chapter +part +basic +pipelin +chapter +part +instruct +level +parallel +chapter +part +instruct +level +parallel +chapter +part +input +output +chapter +interconnect +chapter +note +parallel +process +chapter +homework +homework +assign +solut +homework +assign +solut +homework +assign +solut +homework +assign +solut +homework +assign +solutionproject +assign +propos +novemb +class +talk +decemb +class +report +decemb +noonmiscellanea +give +talk +spring +final +spring +project +assign +spring +midterm +us +first +edit +hennessi +patterson +architectur +qualifi +exam +sourc +hard +question +comput +architectur +seminar +wisconsin +comput +architectur +group +world +wide +comput +architectur +inform diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..0cd45cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,105 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +offic +comp +phone +offic +home +offic +hour +offic +appoint +assign +program +program +administr +inform +text +problem +solv +object +program +walter +savitch +room +time +psycholog +tuesdai +thursdai +comp +vectra +syllabu +comput +grade +grade +standard +late +assign +polici +handin +procedur +cheat +academ +misconduct +consult +exampl +string +class +us +dynam +alloc +us +dynam +alloc +ration +class +exampl +us +oper +overload +complex +class +repres +float +point +complex +number +anoth +exampl +oper +overload +intstack +class +simpl +exampl +unlimit +size +data +structur +classinfo +exampl +us +struct +us +class +link +home +page +home +page +introduct +microsoft +window +introduct +borland +tutori +us +debugg +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..f99bfe93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,165 @@ +section +section +instructor +michael +birk +email +mbirk +wisc +offic +comp +phone +offic +home +offic +hour +offic +appoint +announc +test +case +program +avail +dice +code +discuss +class +class +rank +last +four +digit +student +number +past +exam +onlin +spring +fall +spring +note +hangman +assign +program +exam +mondai +octob +comp +room +lectur +instruct +format +syllabu +first +eight +week +avail +second +eight +week +come +soon +instruct +print +program +output +comput +outsid +late +polici +final +room +chang +meet +comp +assign +program +program +program +program +program +program +program +administr +inform +text +problem +solv +object +program +walter +savitch +room +time +comp +tuesdai +thursdai +comp +vectra +syllabu +comput +grade +grade +standard +late +assign +polici +handin +procedur +cheat +academ +misconduct +consult +exampl +string +class +us +dynam +alloc +us +dynam +alloc +ration +class +exampl +us +oper +overload +complex +class +repres +float +point +complex +number +anoth +exampl +oper +overload +intstack +class +simpl +exampl +unlimit +size +data +structur +classinfo +exampl +us +struct +us +class +link +home +page +introduct +microsoft +window +introduct +borland +tutori +us +debugg +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..0956fa48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,107 @@ +section +dave +melskithes +page +chang +frequent +press +reload +button +daili +get +start +alreadi +stuff +assign +page +uncomfort +comput +andth +softwar +page +help +link +info +instructor +david +melski +offic +comput +scienc +statist +floor +phone +offic +hour +email +melski +wisc +click +attach +pleas +section +info +section +meet +noland +section +meet +psycholog +text +problem +solv +object +program +walter +savitch +addison +wesleypublish +gener +stuff +us +us +window +usingborland +refer +materi +rough +syllabu +section +email +archiv +section +email +archiv +section +polici +info +academ +misconduct +must +read +rule +thumb +share +code +consult +help +grade +late +work +email +check +often +essenti +link +assign +solut +handout +list +tutor +avail +last +modifi +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..bb328c04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,301 @@ +home +page +section +fall +section +algebra +languag +program +instructor +milo +martin +milo +wisc +time +locat +psychologyinstructor +milo +martin +email +milo +wisc +eduoffic +offic +hour +tuesdai +thursdai +appoint +offic +phone +announcementsthi +page +chang +frequent +respons +check +page +often +novemb +quiz +take +home +given +class +todai +place +onth +homepag +novemb +ad +file +us +project +program +page +homepag +novemb +room +test +scheduledfor +wednesdai +novemb +octob +updat +current +grade +haseveryth +quiz +pleas +check +make +sure +isaccur +octob +ad +link +coupl +page +withinform +html +languag +us +page +linksar +section +titl +document +octob +program +avail +onfridai +novemb +take +home +quiz +mondai +novemb +remind +exam +wednesdai +novemb +gener +class +inform +current +class +grade +gener +inform +sheet +turn +assign +syllabu +code +style +guid +home +page +vectra +consult +fall +consult +schedul +academ +misconduct +policyclass +document +final +bankaccount +class +code +bankaccount +bankaccount +main +postscript +bankaccount +class +code +bankaccount +postscript +bankaccount +struct +code +bankaccount +postscript +minmax +exampl +code +findth +minimum +maximum +list +number +case +enteredfrom +stdin +form +code +creat +formlett +data +specifi +file +us +file +theopen_fil +function +introduc +class +call +refer +class +exampl +function +user +input +withprompt +call +refer +version +function +user +input +withprompt +call +valu +version +document +beginn +guid +html +standard +introduct +toth +html +languag +html +refer +guid +refer +guid +latest +html +standard +releas +good +time +viru +hoax +code +ethic +andprofession +conductassign +class +survei +questionar +requir +mondai +septemb +program +wednesdai +septemb +program +wednesdai +septemb +program +wednesdai +septemb +program +fridai +octob +program +mondai +octob +program +wednesdai +octob +program +fridai +novemb +program +program +program +wednesdai +decemb +quiz +quiz +solut +score +mondai +septemb +quiz +solut +score +wednesdai +octob +quiz +solut +score +mondai +octob +quiz +solut +score +take +home +quiz +mondai +novemb +exam +exam +solut +score +wednesdai +octob +exam +solut +score +wednesdai +novemb +exam +solut +score +tuesdai +decemb +textbook +problem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +errata +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..1d0c329a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,140 @@ +home +page +martin +ream +classc +section +algebra +languag +programmingspr +martin +ream +teach +assist +absolut +nothingeverydai +informationc +class +inform +pagescommon +programmingmistakesarch +section +section +class +mail +list +messag +sent +list +semest +calendar +program +tuesdai +januari +program +tuesdai +februari +program +tuesdai +februari +program +thursdai +februari +program +thursdai +march +exam +tuesdai +march +program +tuesdai +march +program +thursdai +april +program +tuesdai +april +exam +tuesdai +april +program +thursdai +april +program +thursdai +final +exam +thursdai +placeto +announcedcours +detail +contact +email +mream +wisc +offic +comput +scienc +statist +dayton +phone +offic +hour +appoint +talk +class +send +email +textbookproblem +solv +object +program +walter +savitch +section +inform +section +noland +section +noland +comput +csst +contain +vectra +run +window +andborland +addit +cours +inform +tent +syllabu +semest +extra +materi +late +polici +grade +criteria +academicmisconduct +rule +thumb +share +code +assign +work +anyform +former +student +made +bigtodd +thielwendi +staatsabout +instructor +last +modifi +martin +ream +mream +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..3c8b8e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,130 @@ +fall +section +fall +section +algebra +languag +program +nolandinstructor +mike +steeleemail +msteel +wisc +eduoffic +comp +stat +buildingoffic +hour +time +appoint +soffic +phone +import +announcementsi +extend +deadlin +program +pleas +check +mail +read +thenew +program +deadlin +informationmidterm +tuesdai +novemb +comp +stat +current +grade +line +includ +grade +everyth +hand +tuesdai +novemb +sampl +program +taken +exampl +pastfew +week +class +fill +stuff +gloss +makefulli +function +program +find +us +ifyou +miss +even +didn +understand +exampl +note +exampl +page +near +bottom +rememb +check +mail +clarif +programmingassign +gener +cours +informationc +home +pagecours +objectivesabout +vectra +labc +consultantscours +syllabu +read +assignmentsnot +work +homeclass +handoutsprogram +assignmentsexam +quizzessom +note +examplespolici +informationemail +policygrad +polici +late +polici +academ +misconduct +policyus +refer +pagesintroduct +microsoft +windowsintroduct +borland +greg +sharp +styleguid +codetextproblem +solv +object +program +walter +savitchaddison +weslei +publish +compani +list +known +erratalast +modifi +mike +steel +msteel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..872b7374 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,192 @@ +fall +session +infoc +fall +sessionalgebra +languag +program +instructor +andrew +prockemail +prock +wisc +eduoffic +comput +scienc +statist +offic +phone +offic +hour +thgrader +haihong +wangemail +wisc +eduoffic +comput +scienc +statist +offic +phone +consult +hour +mtwrannounc +grade +link +pleas +check +grade +verifi +score +modifi +crazi +offic +hour +todai +todai +made +minor +modif +crazi +file +assign +copi +alreadi +gotton +assign +five +onlin +exam +result +rang +ad +grade +polici +assign +sampl +quiz +test +onlin +gener +perus +test +quiz +anoth +think +give +good +idea +level +knowledg +need +rememb +topic +test +test +onlin +webpag +seem +done +email +notic +error +question +make +sure +check +assing +assign +onlin +well +like +work +ahead +final +tuesdai +decemb +mark +calendar +everyon +requir +take +final +check +link +page +feel +thing +locat +import +carefulli +read +polici +administr +inform +welcom +class +class +inform +text +problem +solv +walter +savitch +room +comput +scienc +statist +time +section +section +tent +syllabu +section +grade +section +grade +grade +polici +late +polici +mail +polici +academ +misconductcours +materi +gener +cours +info +style +guid +lectur +note +assign +email +archiv +section +email +archiv +section +info +introduct +microsoft +window +first +introduct +borland +second +home +page +vectra +sourc +code +text +consult +extra +refer +materi +mani +question +answer +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..d318e3b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,115 @@ +kelli +page +kelli +ratliff +current +grade +keyword +search +mail +messag +exampl +enter +function +without +quot +everi +paragraph +us +word +function +also +wildcard +link +mail +messag +sent +semest +info +info +info +info +info +info +info +info +info +addit +inform +interest +backup +copi +disk +filesviru +inform +world +wide +faqfun +stuff +usenet +oracl +resourc +index +virtual +tourist +world +mapth +space +shuttl +clickabl +badger +herald +site +comicshumor +abort +retri +ignor +nine +type +usersfin +weeklab +jokesget +softwar +comput +home +might +interest +tryingsom +sharewar +freewar +softwar +avail +internet +archiv +program +usual +compress +need +somecompress +archiv +softwar +unpack +need +reviewsom +command +try +biggest +best +maintain +archiv +simtel +minclud +link +simtel +file +post +usenet +simtel +site +infocompress +infofavorit +site +clickher +visit +desautel +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..1bb717e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,94 @@ +home +page +russ +man +csc +introduct +comput +program +instructor +russel +manningemail +rman +wisc +eduoffic +room +comput +scienceoffic +hour +find +basement +saturdai +except +home +footbal +game +sundai +come +keep +compani +work +like +vectra +although +student +prioriti +grade +lectur +final +click +textbook +problem +solv +object +program +walter +savitch +section +inform +semest +univers +rotc +build +comput +room +comput +scienc +statist +assign +program +mondai +novemb +program +wednesdai +novemb +program +program +program +program +cours +inform +handout +syllabu +late +polici +academ +misconduct +import +softwar +introduct +microsoft +windowshint +window +compilersth +window +oper +systememailmosaicnetscap +inform +introduct +borland +languageth +savitch +textold +quizz +none diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..a63d075a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,143 @@ +introduct +artifici +intellig +introduct +artifici +intellig +gener +cours +inform +cours +offer +fall +spring +semest +academ +year +section +thefal +spring +topic +cover +principl +knowledg +base +search +techniqu +best +first +search +alpha +beta +search +knowledg +represent +us +predic +logic +semant +network +connectionist +network +frame +rule +autom +deduct +applic +problem +solv +plan +expert +system +game +plai +vision +natur +languag +understand +learn +robot +program +includ +lisp +possibl +prolog +previou +knowledg +languag +assumedprerequisit +page +variou +instructor +chuck +dyer +spring +fall +kunen +fall +spring +jude +shavlik +fall +sabbat +spring +bryan +spring +local +relat +link +madison +seminar +qualifi +exam +recent +tabl +content +abstract +journal +mostli +wendt +librari +readabl +wisc +wisc +group +wisc +comput +vision +group +wisc +machin +learn +group +wisc +robot +group +wisc +comput +biologi +includ +wisc +dept +graduat +cours +wisconsin +machin +learn +deduct +problem +solv +comput +vision +robot +motion +plan +extern +relat +link +last +modifi +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..6e1ba1dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,484 @@ +machin +learn +machin +learn +spring +gener +cours +inform +instructor +jude +shavlik +stat +shavlik +wisc +offic +mondai +wednesdai +teach +assist +geoff +weinberg +highwai +lab +basement +build +geoffrei +wisc +offic +mondai +wednesdai +offic +phone +home +home +cours +overview +postscript +cours +syllabu +postscript +archiv +class +email +readabl +wisc +suggest +class +project +postscript +read +assign +assign +read +chapter +theori +refin +chapter +comput +learn +theori +mitchel +textbook +feedback +author +assign +april +read +learn +logic +definit +relat +quinlan +knowledg +base +artifici +neural +network +towel +shavlik +assign +april +read +chunk +soar +laird +rosenbloom +newel +learn +knowledg +level +dietterich +assign +april +read +chapter +analyt +learn +mitchel +textbook +feedback +author +april +assign +april +read +journal +articl +unsupervis +learn +fisher +rumelhart +zipser +cogsci +lenat +assign +april +read +chapter +genet +algorithm +mitchel +textbook +feedback +author +april +assign +march +read +chapter +reinforc +learn +mitchel +textbook +feedback +author +april +assign +march +read +backpropag +basic +theori +rumelhart +assign +februari +read +chapter +neural +network +mitchel +textbook +feedback +author +march +also +read +journal +articl +shavlik +moonei +towel +empir +compar +backprop +assign +februari +read +chapter +concept +space +mitchel +textbook +feedback +author +februari +assign +januari +read +machin +learn +experiment +scienc +kibler +feedback +author +februari +assign +januari +read +chapter +introduct +mitchel +textbook +feedback +author +januari +assign +journal +articl +towel +shavlik +kbann +algorithm +mondai +journal +articl +fisher +cobweb +algorithm +wednesdai +april +journal +articl +shavlik +moonei +towel +empir +compar +backprop +wednesdai +march +sure +answer +sheet +paper +best +idea +next +summar +assign +paper +sentenc +summari +lead +instead +analyz +late +polici +brr +hand +materi +cover +lectur +homework +assign +homework +learn +reinforc +learn +wednesdai +april +homework +train +neural +network +mondai +march +homework +experiment +methodolog +mondai +februari +homework +induc +decis +tree +mondai +februari +homework +creat +person +concept +mondai +januari +late +polici +start +class +student +five +free +late +dai +semest +exhaust +penalti +measur +noon +noon +weekend +free +make +tractabl +accept +week +late +previous +us +homework +postscript +homework +spring +migrat +semest +progress +homework +induc +decis +tree +homework +heurist +search +concept +space +homework +train +neural +network +homework +learn +reinforc +learn +homework +version +space +postscript +homework +explan +base +learn +postscript +homework +cobweb +postscript +previou +exam +postscript +spring +spring +spring +spring +spring +spring +spring +spring +ineedagoodicon +relat +link +machin +learn +journal +line +page +nip +paper +premier +neural +confer +recent +tabl +content +abstract +select +journal +mostli +wendt +librari +readabl +wisc +irvin +dataset +archiv +pointer +cours +knowledg +discoveri +databas +neural +network +resourc +stuff +machin +learn +benchmark +ieee +neural +network +council +sever +journal +connect +page +intern +societi +adapt +behavior +bibliographi +server +austrian +institut +neural +network +bibliographi +server +austrian +institut +resourc +canadian +server +link +peopl +extern +refer +help +program +assign +page +us +akcl +common +lisp +department +workstat +tip +us +emac +lisp +code +write +frequent +ask +question +lisp +cours +comput +tip +us +akcl +debugg +help +lisp +novic +lisp +frequent +ask +question +steel +common +lisp +languag +edit +refer +manual +textbook +print +printer +print +page +relat +local +link +wisc +group +wisc +math +program +group +wisc +comp +biologi +includ +wisc +group +wisc +dept +wisc +librari +local +link +last +modifi +jude +shavlikshavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..671267cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,358 @@ +fall +advanc +oper +systemsfal +marvin +solomon +offic +comput +sciencesoffic +hour +troffic +phone +email +address +solomon +wisc +new +watch +space +latest +updat +last +updat +schedul +project +present +list +final +exam +mondai +inroom +comput +scienc +statist +build +project +present +room +fridai +noon +exampl +past +midterm +examtogeth +sampl +answer +midterm +exam +wednesdai +octob +pmin +room +comput +scienc +final +exam +mondai +decemb +exact +time +place +bedetermin +project +suggest +informationabout +project +avaiabl +readabl +version +figur +multic +memori +manag +paper +avail +content +new +summari +lectur +inform +text +cours +schedul +grade +project +project +present +summari +cours +intend +give +broad +exposur +advancedoper +system +topic +assum +student +good +semest +cours +onoper +system +equival +cover +topic +normal +present +cours +inconsider +detail +synchron +interprocess +commun +memori +manag +file +system +protect +secur +distribut +system +lectur +inform +lectur +tuesdai +thursdai +comput +sciencestextther +realli +satisfactori +textbook +graduat +level +operatingsystem +class +usea +select +classic +papersa +text +cours +structur +around +read +journal +articl +andconfer +proceed +purchas +read +doit +formerli +macc +document +deskfor +read +semest +similar +ident +thoseof +previou +semest +us +copi +make +avail +individu +paper +youto +copi +class +discuss +topic +relev +current +paper +click +herefor +tent +schedul +lectur +detail +detail +review +paper +willinstead +adiscuss +major +topic +theme +us +paper +focal +point +activ +particip +discuss +strongli +encourag +will +particip +activ +daili +class +geta +expect +quietli +listen +week +much +lessout +class +gradingther +exam +midterm +final +project +worth +total +grade +exam +design +verifi +carefulli +thoroughli +read +read +projecty +requir +complet +term +project +list +suggest +topic +provid +strongli +encourag +make +project +project +involv +implement +tool +experiment +implementationsof +algorithm +suggest +research +literatur +measur +studi +simul +project +must +experiment +compon +literatur +survei +unvalid +design +paper +suffici +project +done +person +group +larger +smaller +group +approv +case +basi +write +term +paper +summar +result +project +paper +must +meet +standard +research +public +grade +qualiti +write +well +content +also +make +ashort +presentationabout +project +class +project +presentationsher +schedul +project +presen +present +room +comput +scienc +statist +time +approxim +manyan +stubb +andrew +bigg +franci +salmon +gunawan +agu +qingmin +wang +chien +pang +jame +chen +eric +larsen +conroi +fritz +craig +jordan +prasad +deshpand +avinash +sodani +basnei +rajesh +raman +biswadeep +chen +taxiao +wang +yanm +xinyu +richard +zhang +todd +munson +wenjun +xinyi +wang +yufei +zeyu +chen +sridhar +gopal +michael +leesolomon +wisc +eduthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..810f16f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +chiang +time +gradesgo +homepag +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..fc42e9c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,167 @@ +section +overal +structur +program +primarili +exercis +gener +problem +solv +write +fortran +code +though +want +time +solut +algorithm +even +depend +particular +program +languag +fortran +solut +class +follow +mondai +exercis +comput +labyou +us +vectra +comput +scienc +statist +contain +hewlett +packard +vectra +run +microsoft +window +microsoft +fortran +open +seven +dai +week +except +certain +holidai +printer +room +locat +across +hall +quota +page +print +exce +quota +must +contact +either +mail +go +room +offic +hour +prefer +mail +increas +quota +bewar +machin +vectra +aren +configur +correctli +particular +machin +along +wall +closest +outsidehallwai +toward +left +hand +part +room +avoid +also +home +dorm +comput +write +program +howev +probabl +purchas +copi +microsoft +fortran +lahei +person +fortran +insid +cover +textbook +also +work +comput +lab +campu +howev +fortran +compil +pleas +first +us +softwar +us +includ +microsoft +window +microsoft +fortran +mail +netscap +pointer +interest +home +page +jeff +lampert +home +page +comput +scienc +depart +home +page +start +point +internet +explor +lyco +search +world +wide +keyword +dilbert +comic +relief +long +night +assign +copyright +copi +modifi +gareth +bestor +bestor +wisc +last +modifi +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..2b42565f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,55 @@ +homepagec +homepagewelcom +homepag +purpos +homepag +provid +student +inform +pertain +section +sinc +page +chang +frequent +responsibilityto +check +page +often +gener +informationinstructor +todd +munsonemail +tmunson +wisc +eduoffic +comput +scienc +statisticsoffic +phone +offic +hour +appointmentsect +textbook +problem +solv +walter +savitchclass +informationexpectationssyllabusexam +schedule +mailgradingl +assignmentsextra +creditpoliciesconsult +responsibilitiesacadem +misconductoth +informationdaili +note +assignmentshomework +assignmentsprogram +document +us +classoth +program +resourcesc +homepagetmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..bca2b44b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,92 @@ +section +comput +scienc +section +time +place +nolandinstructor +todd +turnidgeoffic +hour +tbalab +hour +tbaannouncementsclass +note +class +handout +struct +avail +program +avail +get +start +us +inform +read +get +start +gener +inform +text +facil +grade +polici +syllabu +tent +syllabu +cours +assign +text +read +program +assign +solut +handout +collect +class +handout +date +class +mail +list +inform +send +messag +classa +whole +cours +home +page +home +page +section +muchinform +gener +interest +includ +inform +tutor +consult +window +oper +system +email +netscap +section +find +inform +provid +byother +instructor +help +exampl +handout +gregorysharp +cours +difficulti +last +modifi +todd +turnidg +turnidg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..09f580b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,120 @@ +section +section +algebra +languag +program +announc +exam +thursdai +psych +reload +page +everi +time +login +instructor +chri +weaver +email +weaver +wisc +email +polici +offic +comput +scienc +statist +offic +phone +offic +hour +appoint +offic +hour +first +week +grader +zhang +email +wisc +offic +comput +scienc +statist +offic +phone +locat +section +noland +section +noland +comput +vectra +comput +scienc +statist +hour +seven +dai +week +staf +consult +gener +cours +info +syllabu +text +problem +solv +object +program +walter +savitch +addison +weslei +isbn +includ +errata +sourc +code +text +misconduct +policyassign +grade +handout +program +assign +homework +read +assign +lectur +note +handout +exampl +program +exam +quiz +kei +late +polici +grade +polici +style +guidelin +still +rough +print +paper +statement +chri +weaver +comput +scienc +depart +univers +wisconsin +madison +last +chang +chri +weaver diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..742038f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,185 @@ +spring +advanc +comput +architectur +spring +offer +cours +inform +instructor +prof +jame +smith +offic +engin +hall +offic +hour +tue +thur +offic +phone +email +wisc +princeoffic +engin +hall +offic +hour +offic +phone +mail +address +princ +wisc +tabl +content +new +read +lectur +note +homework +project +miscellaneousnew +homework +soln +special +offic +hour +final +exam +biochemistri +pmread +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +read +tabl +content +full +paper +doit +lectur +note +cours +overview +introduct +comput +architectur +perform +cost +instruct +set +pipelin +advanc +pipelin +part +advanc +pipelin +part +vector +vliw +limit +softwar +cach +memori +advanc +cach +advanc +cach +main +memori +main +memori +system +disk +arrai +disk +arrai +interconnect +technolog +interconnect +technolog +network +network +multiprocessor +part +multiprocessor +part +multiprocessor +part +multiprocessor +part +homework +homework +assign +homework +solut +homework +assign +homework +solut +homework +assign +note +homework +homework +solut +homework +assign +homework +solut +homework +assign +homework +solut +project +project +assign +trace +inform +project +list +miscellan +us +tool +project +review +midterm +midterm +exam +specmark +consid +harm +analysi +pipelin +clock +detail +design +reserv +station +lectur +network +rout +lectur +network +rout +cach +summari +final +exam diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..380daa58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,409 @@ +main +pagecomput +scienc +comput +graphic +page +evolv +incomplet +hopefulli +us +begin +home +page +forc +deal +comput +graphic +scientif +visual +atth +level +comput +graphic +principl +practic +folei +computergraph +alan +watt +current +taught +bruceland +also +project +leader +visual +group +atth +cornel +theori +center +content +cours +summari +administrivia +textbook +error +homework +staff +schedul +relev +math +cornel +math +graphic +cours +cours +summari +comput +graphic +topic +requir +mathemat +program +artist +skill +among +other +content +comput +graphic +fundament +cours +cornel +focus +mathemat +skill +associ +cours +program +cours +designedto +help +illumin +math +cours +cover +follow +topic +year +construct +surfac +explicit +polygon +list +parametr +oper +quadric +surfac +figur +rotat +swept +surfac +tensor +product +surfac +parametr +surfac +viewer +implicit +surfac +quadric +surfac +blobbi +model +oper +surfac +tessel +comput +surfac +normal +hierarch +group +simpl +object +form +complex +surfac +scene +composit +anim +introduct +homogen +coordin +geometr +transform +build +object +kinemat +anim +hierarch +model +combin +prototyp +object +mimic +connect +rigid +part +invers +kinemat +dynam +system +anim +differenti +equat +cellular +automata +view +group +object +camera +transform +clip +view +volum +project +onto +screen +parallel +project +perspect +project +camera +simul +transform +clip +project +stereo +vision +render +shade +light +human +color +vision +color +devic +limit +light +geometr +optic +wave +gourand +phong +shade +hidden +surfac +remov +buffer +transpar +shadow +scan +convers +anti +alias +polygon +pixel +human +vernier +hyper +acuiti +imag +space +method +object +space +method +surfac +properti +modif +textur +map +bump +map +volum +textur +model +scientif +data +scientif +visual +aspect +scientif +data +dimension +field +scalar +vector +field +object +wall +channel +scalar +field +contour +line +surfac +color +color +mispercept +volum +render +vector +field +difficulti +arrow +field +line +particl +advect +multiparamet +high +dimension +data +dynam +system +administrivia +error +textbook +comput +graphic +alan +watt +homework +assign +homework +homework +homework +homework +homework +march +homework +march +homework +march +homework +april +homework +april +homework +cours +schedul +prelim +first +test +spring +serv +gener +guid +test +style +also +list +schedul +prelim +cornel +spring +break +prelim +religi +holidai +student +educationlaw +mandat +faculti +make +avail +opportun +tomak +examin +miss +religi +belief +inord +facilit +prepar +makeup +exam +student +intendingto +absent +order +observ +holidai +requestedto +notifi +instructor +last +lectur +final +schedul +exam +period +tuesdai +upson +final +mean +standard +deviat +staff +bruce +land +rhode +bruce +cornel +jing +huang +upson +huang +upson +justin +mccune +upson +jmccune +csrelev +math +cornel +univers +math +graphic +cours +univers +california +davi +univers +waterloo +univers +wale +colleg +cardiff +univers +manchest +oregon +state +universityrel +topic +final +project +anim +visual +cornel +theori +center +comment +theori +center +onlin +document +welcom +sent +todoc +comment +cornel +last +modifi +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..f0aa7c3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,133 @@ +main +pagecomput +scienc +comput +graphic +laboratori +exercisesthi +site +cornel +universityundergradu +comput +graphic +laboratori +page +contain +materi +includ +procedur +softwar +student +result +section +deal +computergraph +scientif +visual +level +computergraph +principl +practic +folei +dcomput +graphic +watt +current +taught +bruceland +also +project +leader +visual +group +atth +cornel +theori +center +page +first +place +sigucc +basededuc +train +materi +competit +exercis +get +start +build +polygon +object +parametr +surfac +transform +model +us +virtual +camera +camera +perspect +transform +light +textur +bump +map +model +scientif +visual +design +project +physic +base +anim +implicit +surfac +procedur +textur +exercis +done +order +note +exercis +mark +current +mark +areinclud +refer +current +exercis +chat +facil +commun +aboutc +relat +topic +spring +semesteraccess +restrict +enrol +student +relat +topic +final +project +anim +visual +cornel +theori +center +comment +theori +center +onlin +document +welcom +sent +todoc +comment +cornel +last +modifi +land +copyright +statement diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..c4bdbe1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,75 @@ +cornel +fall +topic +comput +graphic +fall +semestereach +group +student +chose +current +research +topic +computergraph +read +appropri +paper +implement +code +group +deliv +lectur +chosen +topic +anddocu +work +document +student +topic +metabal +model +window +michael +arcuri +alex +benton +model +human +facial +express +huang +hung +content +base +imag +retriev +system +interior +design +sean +landi +interdepend +particl +system +justin +mccune +visual +diffus +distribut +pollut +us +spatial +explicit +landscap +modelsfu +tsai +antialias +video +imag +us +stochast +sampl +arun +vermach +hsun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..4e3356ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,74 @@ +patti +houghpatti +hough +sandia +nation +laboratori +livermor +cornel +student +center +appli +mathemat +whichi +hous +frankh +rhode +hall +cornellunivers +thesi +advisor +steve +vavasi +member +committe +nicktrefethen +schatz +research +interest +fall +area +numer +linearalgebra +scientif +comput +optimizationi +current +work +postdoc +juan +meza +scientif +comput +depart +sandia +nationallaboratori +livermor +resum +statement +research +goal +tech +report +complet +orthogon +decomposit +weight +least +squar +vavasi +appear +siam +matrix +anal +stabl +effici +solut +ofweight +least +squar +problem +applic +interior +pointmethod +thesi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..1d0f6c14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,112 @@ +jeff +baggettjeff +baggett +center +appli +mathemat +frank +rhode +hall +cornel +univers +ithaca +baggett +cornel +sixth +year +graduat +student +depart +mathemat +atcornel +univers +supervis +nick +trefethen +expect +finish +thesi +titl +normal +dynam +applic +hydrodynam +stabil +summer +would +like +continu +research +seek +research +posit +detail +outlin +postscript +page +thesi +curriculum +vita +postscript +page +interest +activ +interest +background +unusu +blend +scientif +comput +dynam +system +fluid +mechan +research +propos +postscript +page +work +iwould +like +next +coupl +year +paper +mostli +linear +model +transit +turbul +postscript +driscol +trefethen +physic +fluid +april +exponenti +type +versu +spectral +abscissa +hill +andphillip +exampl +postscript +submit +integr +equat +oper +theori +dimension +model +subcrit +transit +turbul +postscript +trefethen +submit +physic +fluid +misc +link +satish diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..c235bb9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,104 @@ +network +comput +scienc +technic +report +librari +network +comput +scienc +technic +report +libraryncstrl +pronounc +ancestr +internationalcollect +comput +scienc +technic +report +departmentsand +industri +govern +research +laboratori +made +availablefor +commerci +eduat +ncstrlcollect +distribut +among +interoper +serversoper +participatinginstitut +read +offici +ncstrlpress +packag +descript +background +goal +andorgan +ncstrl +search +ncstrl +collect +field +search +form +allow +perform +search +sever +field +bibliograph +data +limit +search +specif +institut +enter +sever +word +list +document +collect +whose +author +titl +abstract +contain +search +word +brows +report +theparticip +institut +want +join +ncstrl +tell +moreread +forinstitut +interest +particip +ncstrl +collect +informationfind +snew +ncstrl +brows +list +document +relat +ncstrl +ncstrl +cornel +comput +scienc +send +email +totech +report +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..dc9fa02a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu @@ -0,0 +1,37 @@ +design +research +institut +design +research +instituteabout +researchersat +brows +searchal +public +file +technic +reportssearch +technic +report +institut +ipic +home +page +intern +work +confer +integr +enterpris +informationand +process +anoth +site +inform +itisingapor +altavistaforum +send +question +comment +server +mike +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..379232b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,235 @@ +davi +design +research +institutejim +davisxerox +corporationphd +media +davi +cornel +edumi +goal +gener +build +softwar +system +improvecommun +among +peopl +believ +commun +medium +ofth +futur +increas +understand +structur +andcont +messag +transmit +manipul +reformat +even +gener +content +interest +inhypertext +system +network +inform +access +collabor +work +thecstr +project +anarpa +sponsor +effort +make +comput +scienc +technic +report +moreeasili +access +part +work +design +distribut +technic +report +server +whichi +run +mani +univers +interest +corpor +group +memori +mean +electronicsystem +captur +access +knowledg +us +produc +worker +institut +order +increas +qualiti +ofor +reduc +time +requir +futur +work +corpor +memoryinclud +intellectu +product +institut +engin +design +lawyer +contract +author +sscreenplai +also +knowledg +process +producedth +product +knowledg +dead +end +explor +tool +us +andjustif +support +final +decis +also +begun +project +huttenloch +developingcorpor +memori +sharedannot +structur +document +project +investig +howpeopl +share +inform +read +write +annot +inelectron +document +share +group +initi +prototypeimplement +us +cornel +class +shareddocu +problem +set +cours +note +nnotat +might +berequest +clarif +student +technic +question +orcorrect +made +staff +question +whether +aus +mean +student +obtain +answer +whether +student +willfind +question +us +sourc +learn +whetherstud +often +abl +answer +question +correctli +whether +cours +staff +find +usefulmean +feedback +improv +cours +evid +isthat +also +interest +natur +languag +gener +designof +comput +proxi +agent +safe +reliablycarri +remot +comput +foreign +machin +without +risk +toeither +owner +remot +machin +alsopap +onlin +copi +publicatiion +resourc +list +resourc +seem +especi +us +thedrimi +resourc +resourc +collect +seem +us +meprofession +historythi +narr +resum +contact +improvisationi +sport +resumeno +market +thank +ask +likeit +fine +xerox diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..0e9b28c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,36 @@ +metacrawl +searchingmetacrawlerbi +erik +selberg +greg +lauckhartand +prof +oren +etzioniif +search +person +home +page +ahoi +search +phrase +word +wordssort +result +relev +locationcontrol +search +java +home +configur +problemswebmast +metacrawl +comcopyright +erik +selberg +oren +etzioni +greg +lauckhart +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..54cd37c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,139 @@ +univers +wisconsin +robot +home +page +madison +robot +mechan +engin +univers +avenuemadison +vladimir +lumelski +director +brief +review +research +activ +peopl +technic +report +compress +postscript +avail +simul +librari +relat +cours +robot +seminar +recent +project +select +public +abstract +maze +search +algorithm +effect +kinemat +sensor +base +motion +plan +dynam +sensor +base +control +jogger +model +sens +plan +decentr +intellig +group +robot +special +topic +sensor +base +motion +plan +tether +robot +underwat +robot +kinemat +redund +sensit +skin +project +human +center +system +comput +geometri +relat +research +madison +neil +duffi +manufactur +system +chuck +dyer +comput +vision +robert +lorenz +sensor +actuat +jude +shavlik +machin +learn +robot +link +ieee +societi +robot +autom +ieee +ieee +tech +committe +robot +motion +path +plan +robot +internet +resourc +page +robot +server +robot +nasa +telerobot +research +program +robot +frequent +ask +question +list +local +link +madison +dept +madison +group +madison +colleg +engin +comment +suggest +errata +hert +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..1fe012d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,62 @@ +vladimir +lumelski +home +page +vladimir +lumelskyprofessormechan +engin +comput +scienc +underwat +robot +kinemat +redund +sensit +skin +project +human +center +system +comput +geometri +global +link +ieee +societi +robot +autom +ieee +ieee +tech +committe +robot +motion +path +plan +link +wisconsin +robot +home +page +colleg +engin +mechan +engin +dept +electr +comput +engin +dept +comput +scienc +dept +mathemat +dept +mathemat +comput +engin +graduat +program +mace +grant +institut diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..6d85583c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,300 @@ +werner +vogel +werner +vogel +researchera +upson +halldept +comput +sciencecornel +univers +ithaca +phone +email +vogel +cornel +protocol +design +perfect +reach +noth +left +noth +left +take +awai +involv +major +system +project +cornel +comput +scienc +depart +thehorusand +cornel +cluster +project +think +myresearch +interest +best +describ +latenc +high +bandwith +commun +support +highli +reliabl +distribut +system +real +time +requir +focu +system +design +engin +issu +thing +amwork +highli +predict +execut +environ +horu +integr +horuswith +real +time +environ +need +lead +situat +reason +advanc +oper +guarante +mechan +effici +data +transfer +high +speed +network +devic +applic +level +latenc +messag +high +bandwidth +small +messag +issu +fallen +behind +softwar +design +high +speed +network +adapt +high +speed +cluster +commun +protocol +achiev +desir +latencyfor +messag +pass +system +protocolsar +abl +exploit +need +think +structureand +interact +pattern +method +deal +guarante +failur +miss +deadlin +support +abl +meet +guarant +gave +tell +anyon +acur +failur +detect +want +take +distribut +system +aglob +scope +need +find +gener +mechan +supportfailur +suspis +detect +manag +process +node +network +experi +group +system +extract +mechan +workwith +middlewar +packag +regardless +function +horu +brainchild +robbert +reness +andken +birman +cluster +work +done +cooper +withthorsten +eicken +multimedia +video +demand +horusexperi +concert +brian +smith +respons +practicum +distribut +system +teach +number +lectureson +network +protocol +high +speed +network +technolog +practic +distribut +system +recent +public +world +wide +failur +werner +vogel +appear +proceed +sigop +workshopconnamoran +ierland +septemb +structur +virtual +synchroni +explor +bound +virtuallysynchron +group +commun +katherin +werner +vogel +robbert +reness +appear +proceed +sigop +workshopconnamoran +ierland +septemb +user +level +network +interfacefor +parallel +distribut +comput +anindya +basu +vineet +buch +werner +vogel +thorsten +eicken +proceed +symposium +oper +system +princpl +copper +mountain +decemb +deliv +high +perform +commun +applic +level +werner +vogel +thorsten +eicken +proceed +third +ieee +workshop +architectur +implementationof +high +perform +commun +subsystem +hpc +august +horu +flexibl +group +commun +system +robbert +reness +kenneth +birman +brad +glade +kati +mark +hayden +takako +hickei +dalia +malki +alex +vaysburd +werner +vogel +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..4b6e7098 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,350 @@ +horu +projectth +compani +ofth +god +rejoic +rejoic +come +horu +osiri +whose +heart +firm +triumphant +ofisi +heir +osiri +horu +project +develop +modular +extens +process +groupcommun +system +address +requir +wide +varieti +robust +distribut +applic +horu +isi +osiri +whose +attribut +appealedstrongli +egyptian +egypt +becausein +everi +woman +type +wish +possess +renew +life +life +oppos +death +movementa +oppos +inact +horu +provid +framework +develop +distribut +applicationsbas +group +commun +style +comput +aris +infault +toler +system +manag +distribut +system +applic +thatexploit +data +replic +coher +cach +groupwar +within +theoveral +horu +framework +larg +collect +system +applicationprotocol +develop +allow +applic +design +toconstruct +commun +modul +exactli +meet +applicationrequir +minim +cost +horu +project +origin +launch +effort +redesign +theisi +groupcommun +system +evolv +gener +purpos +communicationarchitectur +advanc +support +develop +robustdistribut +system +set +isi +unsuit +asappl +special +secur +real +time +requir +besidesth +practic +us +softwar +project +contribut +toward +theori +virtual +synchroni +runtim +model +usedfor +implement +data +replic +fault +toler +sametim +softwar +much +faster +lighter +weight +isi +system +horu +exist +system +initi +version +code +beus +research +purpos +restrict +commericalright +version +call +ensembl +written +usabl +manyoth +languag +avail +class +user +nofe +ensembl +activ +develop +seri +ofreleas +fall +spring +earli +ensemblewil +outstand +environ +build +java +base +groupwareappl +multimedia +conferenc +horu +ensembl +design +platform +independ +areavail +differentclass +workstat +person +comput +parallel +processor +onnext +gener +cluster +environ +us +standard +high +speedcommun +network +horu +effort +collabor +close +mani +distribut +systemsproject +includ +transi +navtech +stormcast +tacomaproject +link +project +found +elsewher +thesepag +circumst +horu +begotten +gain +good +idea +froma +osiri +sorrow +mother +isi +death +herhusband +describ +goddess +greatli +distress +equippedwith +mighti +word +power +knew +utter +mighthav +greatest +effect +search +dead +bodi +osiri +never +rest +found +hair +made +light +wing +stir +made +lament +brother +osiri +length +brought +bodi +state +activ +unit +thu +becam +child +horu +born +secret +place +suckl +rear +introduct +horusvisit +papersand +abstractpag +overview +public +report +relatedto +horu +project +follow +recent +articl +present +high +level +introduct +horu +kenneth +birman +robbert +reness +softwar +reliabl +network +scientif +american +robbert +reness +kenneth +birman +silvano +maffei +horu +flexiblegroup +commun +system +commun +april +final +sentenc +hyme +osirisfrom +papyru +better +know +book +dead +walli +budg +god +egyptian +studiesin +egyptian +mytholog +volum +page +open +court +publish +compani +london +comment +werner +vogel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..dd5c84c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,132 @@ +cornel +depart +comput +scienc +gener +info +academ +info +faculti +research +project +tech +report +annual +report +welcom +cornel +universitydepart +comput +scienc +site +feel +free +brows +around +know +depart +gener +infoget +gener +inform +depart +locat +size +also +find +inform +contactswithin +depart +standard +disclaim +facultyfind +list +faculti +check +offici +annual +report +home +page +ortheir +person +home +page +researchcheck +research +project +go +depart +find +aboutour +research +research +collabor +publicationsfind +link +public +depart +faculti +researcherseith +technic +report +projector +annual +report +degreeslook +degre +program +doctor +master +engin +orundergradu +academicsrefer +cours +home +page +taught +webfor +semest +read +generalcoursedescript +appear +cours +studi +peopleget +know +outstand +peopl +keep +depart +go +includ +staff +student +also +directorylist +peopl +depart +activitiesfind +activ +depart +theassoci +undergradu +excellenthockei +team +serverscheck +server +depart +cornel +gopherserv +cornel +anonym +ftpserver +check +server +page +cornel +sitesquest +comment +informationpres +direct +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..52eb89ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,417 @@ +gerard +saltongerard +saltonprofessorg +cornel +eduph +harvard +univers +natur +languag +text +process +rapidli +expand +field +research +develop +larg +mass +machin +readabl +text +exist +cheapli +store +high +densiti +optic +storag +media +rapidli +retriev +demand +furthermor +sophist +method +avail +analyz +document +text +formul +appropri +user +queri +conduct +rapid +file +search +rank +retriev +item +decreas +order +import +user +cornel +design +oper +larg +gener +purpos +text +process +environ +text +handl +without +restrict +size +subject +matter +absenc +knowledg +base +would +us +unrestrict +text +databas +corpu +base +text +analysi +system +determin +mean +word +express +refin +context +analysi +us +statist +probabilist +criteria +us +corpu +base +approach +abl +determin +text +similar +high +degre +accuraci +main +applic +automat +gener +structur +text +collect +hypertext +semant +similar +piec +text +automat +link +hypertext +represent +larg +databas +provid +flexibl +brows +capabl +gener +purpos +text +access +automat +retriev +interest +text +excerpt +respons +avail +search +queri +done +extens +work +autom +encyclopedia +consist +encyclopedia +articl +funk +wagnal +encyclopedia +addit +also +process +trec +collect +consist +full +text +document +cover +number +differ +subject +area +gigabyt +text +sophist +search +retriev +servic +exist +well +text +link +system +capabl +relat +differ +text +section +paragraph +sentenc +main +test +vehicl +continu +current +version +smart +text +analysi +retriev +system +oper +unix +sparc +station +termin +equip +univers +activitiesmemb +engin +colleg +librari +committeeprofession +activitiesassoci +editor +transact +inform +systemsprogram +committe +sigir +seventeenth +confer +research +develop +inform +retriev +dublin +ireland +electron +publish +darmstadt +germani +inform +retriev +genom +nation +librari +medicin +bethesda +maryland +multimedia +hypermedia +virtual +realiti +moscow +septemb +lecturesautomat +construct +hypertext +link +feder +institut +technolog +zurich +switzerland +june +progress +inform +retriev +research +univers +konstanz +germani +june +hypertext +inform +retriev +asi +nation +meet +columbu +ohio +octob +automat +text +util +larg +full +text +databas +comput +scienc +colloquium +ohio +state +univers +columbu +ohio +octob +automat +inform +retriev +lectur +cours +hypertext +seattl +washington +novemb +full +text +inform +retriev +microsoft +corpor +seattl +washington +novemb +automat +text +util +workshop +inform +genom +nation +librari +medicin +bethesda +maryland +publicationsapproach +passag +retriev +inform +system +proceed +annual +nation +confer +research +develop +inform +retriev +sigir +associ +comput +machineri +york +allan +bucklei +select +text +util +text +travers +proceed +hypertext +associ +comput +machineri +york +novemb +allan +automat +structur +retriev +larg +text +file +commun +februari +allan +bucklei +text +retriev +us +vector +process +model +proceed +third +annual +symposium +document +analysi +inform +retriev +univers +nevada +vega +nevada +april +allan +softwareth +smart +text +analysi +retriev +system +made +avail +free +charg +research +purpos +sever +hundr +copi +smart +version +distribut +us +around +world +return +list +faculti +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..3d3289fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,143 @@ +yui +liyui +liresearch +associateyui +cornel +eduph +univers +waterloo +gener +research +interest +numer +optim +scientif +comput +also +interest +appli +optim +techniqu +solv +real +world +engin +problem +current +interest +includ +gener +trust +region +theori +unconstrain +minim +nonlinearli +constrain +minim +particular +nonlinearli +constrain +problem +exist +accept +condit +optim +investig +affin +scale +trust +region +method +minim +nonlinear +function +includ +trust +region +converg +analysi +method +us +solv +gener +nonlinearli +constrain +minim +problem +us +exact +penalti +approach +applic +method +consid +imag +enhanc +problem +lecturesan +interior +trust +region +method +nonlinear +minim +subject +bound +confer +scientif +engin +comput +chines +young +scientist +beij +china +august +publicationsa +global +converg +method +problem +siam +journal +optim +center +trust +region +reflect +techniqu +nonlinear +minim +subject +bound +proceed +confer +scientif +engin +comput +chines +young +scientist +return +list +research +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..12943561 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,297 @@ +richard +zippelrichard +zippelsenior +research +associaterz +cornel +eduph +research +focus +us +symbol +mathemat +modern +softwar +techniqu +autom +gener +scientif +softwar +current +focus +comput +fluid +dynam +work +allow +scientist +specifi +differenti +equat +studi +mathemat +techniqu +us +perform +numer +comput +architectur +machin +calcul +done +toolkit +convert +differenti +equat +suppli +method +special +code +architectur +colleagu +depart +mechan +aerospac +engin +toolkit +us +gener +dynam +system +aris +studi +boundari +layer +fluid +compon +toolkit +comput +algebra +substrat +call +weyl +extend +data +structur +avail +common +lisp +includ +object +like +polynomi +matric +ration +function +ring +vector +space +ideal +introduct +object +program +languag +provid +number +challeng +languag +type +system +provid +opportun +deduct +reason +pursu +profession +activitieseditori +board +journal +symbol +comput +transact +mathemat +softwareprogram +committe +principl +practic +constraint +program +workshop +refere +review +journal +algebra +algorithm +error +correct +code +intern +symposium +symbol +algebra +computationlecturesalgebra +function +decomposit +american +mathemat +societi +region +meet +syracus +york +septemb +dexter +kozen +susan +landau +dexter +kozen +present +american +mathemat +societi +region +meet +syracus +york +susan +landau +present +effect +algorithm +polynomi +irreduc +test +american +mathemat +societi +region +meet +syracus +york +septemb +vista +microstorag +architectur +intern +workshop +object +orient +oper +system +durham +north +carolina +decemb +joint +dawson +dean +dawson +dean +present +scientif +center +haifa +israel +januari +depart +electr +engin +technion +haifa +israel +januari +synthes +scientif +program +us +weyl +depart +comput +scienc +weizmann +institut +rehovot +israel +decemb +depart +electr +engin +technion +haifa +israel +januari +depart +comput +scienc +suni +albani +york +april +modular +interpol +algorithm +factor +multivari +polynomi +algebra +number +theori +symposium +ithaca +york +joint +ronitt +rubinfeld +ronitt +rubinfeld +present +publicationseffect +polynomi +comput +kluwer +academ +publish +boston +massachusett +june +page +vista +microstorag +architectur +implement +file +system +object +databas +proceed +intern +workshop +object +orient +databas +decemb +dawson +dean +return +list +research +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..a8215282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,345 @@ +kenneth +birmankenneth +birmanprofessorphd +univ +california +berkelei +research +concern +fault +toler +distribut +comput +oper +system +focu +us +distribut +program +model +base +upon +virtual +synchron +process +group +solv +problem +manag +replic +data +coordin +action +distribut +set +perform +dynam +reconfigur +done +provid +fault +toler +although +limit +certain +class +reason +benign +failur +effort +theoret +practic +side +practic +work +start +develop +comput +system +call +isi +toolkit +isi +softwar +tool +support +virtual +synchroni +fault +toler +becam +wide +popular +develop +system +horu +intend +flexibl +isi +address +issu +real +time +commun +secur +import +featur +horu +extens +layer +permit +reconfigur +special +purpos +basic +idea +horu +user +featur +actual +avail +broad +collect +option +horu +also +seek +leverag +emerg +network +technolog +commun +techniqu +activ +messag +origin +work +parallel +supercomput +horu +embodi +advanc +secur +technolog +develop +graduat +student +mike +reiter +david +cooper +unusu +combin +secur +privaci +high +avail +singl +packag +fundament +side +effort +horu +group +look +techniqu +specifi +prove +properti +process +group +structur +system +us +languag +develop +execut +refer +implement +major +horu +layer +goal +us +constabl +nuprl +system +prove +latter +correctli +implement +former +also +studi +extens +virtual +synchroni +model +notabl +work +friedman +ad +real +time +guarante +horu +work +mark +hayden +support +probabilist +broadcast +primit +program +tool +horu +much +collabor +architectur +develop +side +effort +head +robbert +reness +werner +vogel +friedman +graduat +student +work +aspect +system +includ +develop +object +orient +program +tool +multimedia +commun +applic +secur +privaci +high +speed +protocol +exploit +problem +also +collabor +within +depart +notabl +thorsten +eicken +brian +smith +univers +activitieschair +engin +polici +committe +act +head +comput +scienc +depart +master +engin +program +member +comput +scienc +depart +faculti +recruit +committe +member +cornel +academ +leadership +committe +profession +activitieseditor +chief +transact +comput +system +chief +scientist +isi +distribut +system +member +isat +studi +group +robust +critic +element +nation +inform +infrastructur +publicationsth +process +group +approach +reliabl +distribut +comput +commun +decemb +integr +runtim +consist +model +distribut +comput +journal +parallel +distribut +comput +reliabl +distribut +comput +us +isi +toolkit +birman +reness +ieee +comput +societi +press +alamito +california +reliabl +consist +ieee +softwar +glade +distribut +softwar +horu +system +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..5ddfe6c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,236 @@ +donald +greenbergdonald +greenberg +jacob +gould +schurman +prof +comput +scienc +director +scienc +technolog +center +comput +graphic +scientif +visual +cornel +univers +past +decad +comput +graphic +activ +involv +develop +wide +rang +graphic +input +displai +techniqu +number +input +method +implement +progress +made +larg +varieti +displai +routin +graphic +research +topic +previous +investig +includ +polygon +clip +hidden +surfac +algorithm +textur +spatial +tempor +alias +problem +geometr +model +parametr +surfac +descript +color +scienc +current +focu +graphic +research +involv +three +dimension +model +complex +environ +algorithm +realist +imag +synthesi +modular +testb +suffici +flexibl +evalu +differ +model +imag +gener +techniqu +creat +laboratori +research +conduct +light +reflect +model +method +determin +interact +reflect +surfac +techniqu +improv +comput +effici +trace +parallel +process +strategi +perceptu +studi +micro +geometri +surfac +model +motion +control +dynam +constraint +model +anti +alias +strategi +host +topic +relat +complex +model +realist +imag +displai +applic +research +start +volum +render +medic +imag +digit +photographi +anim +develop +gener +tool +scientif +visual +well +core +technolog +multi +media +environ +research +conduct +within +facil +program +comput +graphic +member +nation +scienc +foundat +scienc +technolog +center +comput +graphic +scientif +visual +particip +univers +brown +univers +california +institut +technolog +univers +north +carolina +chapel +hill +univers +utah +univers +activitiesdirector +program +comput +graphic +director +nation +scienc +foundat +scienc +technolog +center +comput +graphic +scientif +visualizationprofession +activitieseditori +board +comput +graphic +journal +nation +academi +engin +found +fellow +american +institut +medic +biolog +engin +fellow +acmreturn +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..5960737a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,346 @@ +juri +hartmanisjuri +hartmani +walter +read +professor +engin +california +institut +technolog +strateg +goal +research +contribut +develop +comprehens +theori +comput +complex +comput +complex +studi +quantit +law +govern +comput +essenti +part +scienc +base +need +guid +har +exploit +explos +grow +comput +technolog +comput +complex +classifi +problem +amount +variou +comput +resourc +need +solv +classif +yield +complex +class +consist +problem +solv +within +given +comput +resourc +bound +gain +deeper +understand +make +problem +hard +comput +explor +variou +complex +class +relat +class +intern +structur +class +also +studi +trade +off +differ +comput +resourc +problem +solv +particular +attent +sequenti +time +parallel +time +nondeterminist +time +memori +requir +random +comput +resourc +interact +comput +univers +activitiesmemb +faculti +council +representativeschair +comput +scienc +depart +recruit +committeehonorsacm +ture +award +stearn +member +nation +academi +engin +foreign +member +latvian +academi +scienc +fellow +american +academi +art +scienc +fellow +york +state +academi +scienc +fellow +american +associ +advanc +scienc +aaa +charter +fellow +profession +activitieseditor +springer +verlag +lectur +note +comput +scienc +siam +journal +comput +journal +comput +system +scienc +advisori +board +eatc +monograph +theoret +comput +scienc +springer +verlag +board +director +comput +research +associ +ifip +technic +committe +foundat +comput +scienc +advisori +council +georg +brown +school +engin +rice +univers +houston +texa +nation +academi +engin +peer +committe +comput +scienc +engin +visit +committe +physic +scienc +divis +univers +chicago +eatc +council +board +advisor +intern +journal +foundat +comput +scienc +world +scientif +presseditori +board +chicago +journal +theoret +comput +scienc +electron +journal +foundat +comput +scienc +press +foundat +editor +electron +journal +univers +comput +sciencegoedel +prize +committe +member +comput +scienc +telecommun +board +nation +research +council +awardshonorari +doctor +degre +univers +dortmund +germani +lecturessom +observ +comput +scienc +banquet +speech +intern +logic +program +symposium +cornel +univers +novemb +comput +complex +scope +natur +futur +distinguish +lectur +seri +univers +virginia +februari +distinguish +lectur +seri +univers +tennesse +april +publicationson +comput +complex +natur +comput +scienc +ture +award +lectur +commun +octob +random +oracl +hypothesi +fals +journal +comput +system +scienc +august +richard +chang +benni +chor +od +goldreich +johan +hastad +desh +ranjan +pankaj +rohatgi +hausdorff +topolog +dimens +kolmogorov +complex +real +line +journal +comput +system +scienc +decemb +weight +comput +eatc +bulletin +februari +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..811f1398 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,183 @@ +john +hopcroftjohn +hopcroftjoseph +silbert +dean +engineeringprofessor +comput +sciencephd +stanford +univers +januari +appoint +dean +colleg +engin +overse +academ +depart +compris +colleg +well +variou +research +unit +involv +comput +scienc +depart +continu +research +robust +geometr +algorithm +model +simul +inform +captur +access +collabor +design +research +institut +develop +technolog +facilit +inform +captur +access +within +engin +design +environ +among +technolog +research +distribut +databas +persist +object +storag +document +imag +process +manag +multimedia +user +interfac +technolog +inform +scienc +heterogen +data +knowledg +represent +organ +remot +collabor +technolog +profession +activitiesmemb +nation +research +council +commiss +physic +scienc +mathemat +applicationsmemb +nation +scienc +boardmemb +scientif +advisori +board +unit +state +forcememb +nation +academi +engineeringfellow +american +academi +art +sciencesfellow +american +associ +advanc +scienc +aaa +fellow +institut +electr +electron +engin +ieee +fellow +associ +comput +machinerychairman +siam +board +trusteesmemb +scientif +advisori +committe +david +lucil +packard +foundationmemb +sloan +research +fellowship +committeeadvisori +board +supercomput +research +center +institut +defens +analysiseditor +oxford +univers +press +intern +seri +comput +scienc +algorithmica +discret +comput +geometryassoci +editor +intern +journal +comput +geometri +applic +journal +comput +system +scienc +journal +inform +sciencesreturn +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..9a489ddf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,435 @@ +keshav +pingalikeshav +pingaliassoci +professorphd +research +group +work +area +program +languag +compil +parallel +architectur +goal +develop +tool +gener +parallel +code +applic +program +deal +larg +spars +matric +scientif +applic +involv +numer +solut +partial +differenti +equat +techniqu +us +almost +alwai +produc +system +algebra +equat +involv +larg +spars +matric +unfortun +exist +compil +technolog +poor +parallel +spars +matrix +program +take +radic +differ +approach +problem +compil +produc +parallel +spars +matrix +program +sequenti +dens +matrix +program +us +inform +user +sparsiti +structur +matric +program +enabl +tool +restructur +compil +area +preliminari +experi +krylov +space +solver +show +code +produc +compil +competit +hand +parallel +code +librari +like +argonn +petsc +librari +extend +approach +direct +method +solv +linear +system +applic +requir +adapt +mesh +refin +project +build +earlier +work +restructur +compil +techniqu +dens +matrix +program +develop +restructur +techniqu +compil +program +distribut +memori +uniform +memori +access +numa +architectur +like +processor +access +local +memori +faster +local +memori +good +perform +compil +must +parallel +must +also +ensur +local +refer +match +code +data +distribut +local +refer +must +made +block +transfer +prefer +mani +small +messag +recent +develop +best +algorithm +known +automat +align +comput +data +incorpor +compil +test +earlier +work +develop +novel +loop +restructur +techniqu +call +access +normal +transform +loop +nest +increas +local +potenti +block +transfer +implement +lambda +loop +transform +toolkit +paper +summar +result +best +paper +prize +asplo +work +hewlett +packard +transfer +technolog +fortran +compil +product +line +uniprocessor +multiprocessor +develop +framework +program +analysi +optim +base +depend +flow +graph +knit +togeth +data +control +depend +inform +program +permit +develop +optim +algorithm +gener +better +code +possibl +compet +approach +result +independ +interest +exampl +recent +develop +optim +algorithm +control +depend +problem +answer +foundat +question +open +almost +decad +work +develop +linear +time +algorithm +comput +static +singl +assign +form +program +result +incorpor +number +compil +includ +microsoft +flavor +profession +activitiespanel +member +organ +symposium +principl +practic +parallel +program +member +nation +young +investig +award +panel +consult +hewlett +packard +lab +intel +corpor +armi +ballist +research +lab +odyssei +research +math +scienc +institut +refere +review +topla +ieee +transact +comput +journal +parallel +distribut +comput +journal +supercomput +ieee +computereditori +board +intern +journal +parallel +program +awardsn +scienc +foundat +presidenti +young +investig +faculti +develop +award +best +paper +prize +asplo +lecturesfast +algorithm +control +depend +problem +hewlett +packard +corpor +chelmsford +massachusett +januari +comput +scienc +depart +wayn +state +univers +detroit +michigan +februari +rutger +univers +brunswick +jersei +microsoft +research +laboratori +redmond +washington +june +publicationssolv +align +us +elementari +linear +algebra +proceed +seventh +annual +workshop +languag +compil +parallel +comput +lcpc +lectur +note +comput +scienc +ithaca +august +david +indupraka +kodukula +vladimir +kotlyar +paul +stodghil +data +structur +optim +control +depend +comput +sigplan +confer +program +languag +design +implement +pldi +june +gianfranco +bilardi +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..00966a19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,739 @@ +fred +schneiderfr +schneider +professorphd +state +univ +stoni +brook +techniqu +understand +concurr +program +becom +increasingli +import +distribut +comput +system +becom +widespread +mission +critic +applic +research +focus +develop +techniqu +heavili +involv +appli +assert +reason +design +concurr +distribut +fault +toler +real +time +program +complet +textbook +subject +along +david +gri +continu +investig +concern +first +order +equat +logic +past +year +streamlin +infer +rule +evalu +number +techniqu +handl +undefin +term +partial +function +thoma +bressoud +complet +build +analyz +hypervisor +base +implement +replic +manag +risc +architectur +protocol +ensur +sequenc +instruct +execut +virtual +machin +run +differ +physic +processor +ident +protocol +also +coordin +issu +virtual +machin +hypervisor +implement +replica +coordin +attract +least +theori +replica +coordin +implement +hypervisor +instantli +becom +avail +hardwar +realiz +given +instruct +architectur +includ +realiz +exist +hypervisor +written +second +replica +coordin +implement +hypervisor +singl +implement +suffic +everi +oper +system +execut +instruct +architectur +final +implement +replica +coordin +hypervisor +applic +programm +freed +task +jointli +johansen +univers +trom +norwai +robbert +reness +start +tacoma +project +trom +cornel +move +agent +investig +support +mobil +process +build +mission +critic +applic +structur +system +term +agent +applic +construct +commun +network +bandwidth +conserv +data +access +agent +execut +site +data +resid +agent +typic +filter +otherwis +reduc +data +read +carri +relev +inform +roam +network +tacoma +prototyp +complet +implement +third +system +base +experi +final +develop +scott +stoller +algorithm +detect +whether +particular +comput +asynchron +distribut +system +could +pass +global +state +satisfi +given +state +predic +algorithm +allow +effici +detect +possibl +previou +algorithm +univers +activitiessabbat +leav +profession +activitieseditor +chief +distribut +computingeditor +inform +process +letterseditor +ieee +transact +softwar +engineeringeditor +high +integr +systemseditor +annal +softwar +engineeringeditor +comput +surveysco +editor +text +monograph +comput +scienc +springer +verlagprogram +committe +member +intern +school +symposium +formal +techniqu +real +time +fault +toler +systemsprogram +committe +member +intern +confer +mathemat +program +constructionprogram +committe +member +intern +workshop +respons +comput +systemsprogram +committe +member +workshop +compos +fault +resili +real +time +systemsprogram +committe +member +fifth +ifip +work +confer +depend +comput +critic +applicationsprogram +committe +member +sixteenth +ieee +intern +real +time +system +symposiumprogram +committe +member +dimac +workshop +verif +control +hybrid +systemsst +committe +center +high +integr +softwar +system +assur +chissa +nation +institut +standard +technologymemb +isat +defens +inform +warfar +studi +group +advanc +research +project +agencyreview +committe +leibniz +center +hebrew +universitymemb +ifip +work +group +program +methodolog +awardsfellow +american +associ +advanc +sciencefellow +associ +comput +machinerylecturesproof +outlin +program +lectur +intern +summer +school +marktoberdorf +germani +juli +origin +tradit +banquet +speech +intern +summer +school +marktoberdorf +germani +juli +reason +program +exploit +environ +afosr +grante +contractor +meet +softwar +system +washington +sept +verifi +hybrid +system +exploit +environ +symposium +formal +techniqu +real +time +fault +toler +system +lubeck +germani +sept +panelist +compar +merit +synchron +partial +synchron +asynchron +model +safeti +critic +real +time +system +symposium +formal +techniqu +real +time +fault +toler +system +lubeck +germani +sept +moder +issu +write +formal +specif +specif +refin +reactiv +system +intern +confer +research +center +comput +scienc +dagstuhl +germani +sept +merg +polici +workshop +comput +support +polici +analysi +design +georg +mason +univers +virginia +avoid +mistak +invit +speaker +traffic +manag +workshop +nasa +am +research +center +reason +program +exploit +environ +technic +univers +munich +munich +germani +proof +outlin +past +univers +north +carolina +chapel +hill +north +carolina +march +ad +fault +toler +virtual +distinguish +lectur +seri +univers +north +carolina +chapel +hill +north +carolina +march +moder +panel +organ +teach +logic +tool +sigcs +technic +symposium +comput +scienc +educ +nashvil +tennesse +march +proof +outlin +past +technion +haifa +israel +march +ad +fault +toler +virtual +univers +trom +trom +norwai +april +concurr +program +specif +univers +trom +trom +norwai +april +place +agent +airplan +view +successor +arpa +isat +defens +inform +warfar +studi +group +meet +washington +june +publicationsreason +program +exploit +environ +proceed +intern +colloquium +icalp +jerusalem +israel +juli +lectur +note +comput +scienc +springer +verlag +york +note +proof +outlin +logic +work +materi +intern +summer +school +marktoberdorf +germani +juli +research +fault +toler +real +time +comput +softwar +system +program +summari +boll +forc +base +washington +sept +forc +offic +scientif +research +hybrid +verif +exploit +environ +formal +techniqu +real +time +fault +toler +system +lubeck +germani +septemb +lectur +note +comput +scienc +volum +springer +verlag +york +limor +equat +proposit +logic +inform +process +letter +februari +gri +refin +fault +toler +aircraft +hand +protocol +foundat +ultradepend +parallel +distribut +comput +paradigm +depend +applic +kluwer +academ +publish +marzullo +dehn +teach +logic +tool +proceed +sigcs +technic +symposium +comput +scienc +educ +nashvil +tennesse +march +sigcs +bulletin +gri +oper +system +support +mobil +agent +proceed +fifth +workshop +topic +oper +system +hoto +orca +island +washington +johansen +robbert +reness +verifi +program +causal +order +messag +pass +scienc +comput +program +stoller +teach +proof +art +scienc +newslett +spring +gri +approach +discret +teach +mathemat +primu +june +gri +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..32f7f52c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,174 @@ +charl +loancharl +loanprofessorphd +univers +michigan +continu +develop +method +variou +kroneck +product +comput +particular +method +solv +nearest +kroneck +product +problem +factor +matric +subject +inhomogen +constraint +applic +signal +process +markov +process +solv +result +kroneck +product +descript +variou +fast +wavelet +transform +also +deriv +proce +analog +descript +plai +import +role +develop +high +perform +algorithm +undergradu +text +comput +scienc +work +last +four +year +product +current +translat +anticip +fall +semest +univers +activitiescomput +scienc +depart +undergradu +curriculum +committeedepart +repres +art +scienc +chair +meetingfreshman +admiss +reader +art +scienc +profession +activitieseditor +siam +journal +matrix +analysismemb +wilkinson +prize +committe +siam +member +diprima +prize +committe +siam +member +organ +committe +household +confer +member +household +prize +committe +lecturesappl +kroneck +product +linkop +univers +sweden +januari +umea +univers +sweden +januari +build +intuit +comput +scienc +umea +univers +sweden +januari +applic +kroneck +product +ohio +state +univers +april +build +intuit +comput +scienc +ohio +state +univers +april +publicationsoptim +close +loop +adapt +optic +perform +multipl +control +bandwidth +journal +optic +societi +america +ellerbroek +pitsiani +plemmon +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..ab89c463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,69 @@ +catherin +wagnercatherin +wagnersenior +lecturerphd +cornel +univers +primari +respons +comput +scienc +depart +teach +work +other +depart +revis +curriculum +lower +level +cours +specif +interest +develop +cours +student +prepar +introductori +cours +program +univers +activitiescomput +scienc +undergradu +curriculum +committe +profession +activitiesassoci +symbol +logic +associ +comput +machineri +institut +electr +electron +engin +associ +women +mathemat +return +annual +report +home +page +department +home +pageif +question +comment +pleas +contact +cornel +last +modifi +novemb +denis +moor +denis +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..4790db77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,673 @@ +faculti +research +interest +brian +smith +brian +smith +bsmith +cornel +xerox +professor +comput +scienc +univers +california +berkelei +offic +upson +hall +offic +phone +offic +hour +semest +tue +thur +research +intereststeachingselect +publicationsresearch +talksmisc +linksresearch +interestsmi +research +goal +make +video +first +class +data +type +ourcomput +environ +research +group +project +zeno +build +technolog +support +storag +commun +andprocess +continu +media +data +contrast +commercialand +research +approach +requir +special +hardwar +operatingsystem +network +usabl +technolog +aredesign +current +research +environ +premis +isthat +current +hardwar +softwar +commun +infrastructurei +suffici +support +research +continu +media +system +andappl +verifi +hypothesi +build +workingsystem +research +storag +system +direct +toward +build +zenodistribut +video +file +server +zeno +architectur +us +network +ofworkst +connect +gener +local +area +network +anethernet +common +environ +comput +research +laboratori +workstat +simultan +client +serverof +continu +media +data +client +workstat +plai +videostor +server +server +workstat +fileserv +video +data +client +receiv +video +store +severalserv +server +servic +sever +client +compar +withlarg +central +server +advantag +design +scalabl +client +ad +server +automat +ad +load +balanc +load +gener +serv +video +distribut +across +machin +across +network +case +server +locat +differ +network +initi +invest +util +exist +infrastructur +zeno +architectur +promot +earli +adopt +research +environ +almost +initi +invest +research +commun +system +center +around +best +effortdeliveri +protocol +protocol +built +exist +networkprotocol +contrast +mani +research +effort +needto +reserv +network +resourc +establish +connect +resourcereserv +protocol +well +suit +nation +communicationinfrastructur +user +charg +call +basi +forbandwidth +connect +poorli +suit +networkenviron +network +share +resourc +equal +accessiblebi +research +approach +appropri +latterenviron +commonli +found +research +laboratori +thecommun +protocol +develop +call +cyclic +builton +datagram +protocol +design +transport +audioand +video +data +playback +applic +local +metropolitan +andwid +area +network +cyclic +us +zeno +file +server +todeliv +audio +video +data +client +paper +describ +cyclic +availableonlin +well +slide +research +talk +research +process +video +data +fold +first +aredevelop +algorithm +process +video +data +compressedrepresent +process +video +without +decompress +lead +todramat +speed +process +perform +sinc +remov +thetim +consum +process +compress +decompress +reducesth +amount +data +must +process +experi +animplement +idea +jpeg +compress +imag +data +indicatesthat +data +process +order +magnitud +fasterthan +possibl +previou +approach +currentlyextend +idea +parallel +algorithm +us +network +ofworkst +develop +method +transcod +video +insoftwar +video +transcod +video +translat +onecompress +format +anoth +us +oper +video +fileserv +must +servic +heterogen +client +paper +describ +compresseddomain +process +avail +onlin +research +video +process +also +explor +method +simplifyexperiment +video +process +develop +programminglanguag +video +first +class +data +type +languag +calledrivl +pronounc +rival +allowsvideo +process +effect +specifi +independ +resolutionand +format +sourc +materi +languag +video +whatpostscript +text +graphic +provid +resolutionindepend +method +specifi +video +process +thu +sameprogram +process +qualiti +quicktim +video +quickli +whileedit +decis +made +us +format +high +qualityfinish +product +line +much +postscript +bepreview +workstat +qualiti +sent +dpiprint +camera +readi +copi +paper +describ +avail +onlin +talk +review +research +onvideo +process +compress +domain +process +rivl +avail +onlin +common +theme +effort +provid +tool +make +videous +research +environ +talk +review +research +also +avail +onlin +teachingat +cornel +univers +teach +undergradu +cours +computerarchitectur +graduat +cours +multimedia +system +select +public +jonathan +swartz +brian +smith +resolut +independ +video +languag +proc +third +intern +confer +multimedia +francisco +novemb +html +version +also +avail +asif +ghia +jonathan +logan +david +chamberlin +brian +smith +queri +hum +larg +music +inform +retriev +audio +databas +proc +third +intern +confer +multimedia +francisco +novemb +html +version +also +avail +peter +brian +smith +lawrenc +row +name +server +proc +workshop +toronto +ontario +canada +juli +brian +smith +cyclic +prioriti +driven +best +effort +brian +smith +fast +softwar +process +motion +jpeg +video +proc +second +intern +confer +multimedia +sanfrancisco +octob +lawrenc +row +ketan +patel +brian +smith +mpeg +video +softwar +represent +transmiss +playback +spie +symposium +electron +imag +scienc +technolog +jose +california +februari +brian +smith +lawrenc +row +stephen +distribut +program +proc +workshop +berkelei +june +ketan +patel +brian +smith +lawrenc +row +perform +softwar +mpeg +video +decod +proc +first +internationalconfer +multimedia +anaheim +august +brian +smith +lawrenc +row +famili +algorithm +manipul +compress +imag +ieee +comput +graphic +applic +septemb +lawrenc +row +brian +smith +continu +media +player +third +intern +workshop +network +oper +system +supportfor +digit +audio +video +diego +select +research +talk +distribut +multimedia +system +research +cornel +recent +research +resolut +independ +video +languag +talk +xerox +webster +research +center +june +best +effort +transmiss +protocol +continu +media +data +comput +video +xerox +site +review +univers +minnesota +colloquium +talk +misc +link +multimedia +cours +work +annett +hanna +manual +mmcn +electron +proceed +documentationth +priceweb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..f28a7f8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,457 @@ +robert +constablerobert +constabledepart +chair +professorrc +cornel +eduph +univers +wisconsin +madison +researchw +engag +studi +comput +system +providemechan +assist +problem +solv +especi +programmingand +mathemat +involv +long +term +studi +wai +make +theform +mathemat +feasibl +us +implementedthre +system +past +year +nuprl +major +experiment +nuprl +line +lispprogram +implement +construct +theori +type +system +sucha +nuprl +us +formal +mathemat +canexpress +wide +varieti +proof +program +build +method +asmetalevel +program +system +provid +consider +theoremprov +power +moreov +nuprl +especi +us +canevalu +comput +content +theorem +principl +nuprli +fomal +system +mathemat +program +languag +continu +improv +nuprl +current +version +us +cornel +iscal +nuprl +differ +predecessor +termeditor +design +stuart +allen +implement +richard +eaton +itsintern +structur +modular +make +system +suitabl +hedefinit +wide +varieti +logic +beyond +built +inconstruct +type +theori +also +entir +theorem +prove +mechanismha +rebuilt +stream +line +paul +jackson +build +thework +dougla +how +contribut +gener +natur +nuprl +final +version +system +refer +isan +intern +descript +languag +logic +builtprincip +william +aitken +us +theori +develop +allen +how +richard +eaton +design +link +internaldescript +logic +logic +make +possibleto +prove +theorem +process +prove +theorem +also +engag +three +excit +joint +ventur +withmiriam +leeser +electr +engin +incomput +scienc +davidgri +polya +richardzippel +weyl +withless +involv +hardwar +synthesi +verif +leeser +student +mark +aagard +us +nuprl +prove +thecorrect +line +boolean +circuit +minim +packag +us +circuit +design +compon +leeser +bedrocsystem +implement +weak +divis +algorithm +widelyus +circuit +design +system +major +theorem +prove +efforttaught +great +deal +effect +technolog +inth +hand +expert +user +applic +domain +second +joint +ventur +involv +build +model +polya +program +languag +aprogram +refin +mechan +design +david +gri +enabl +write +handbook +algorithm +themann +devis +year +studi +programmingprocess +stuart +allen +givn +formal +type +theoret +definit +ofpolya +expect +experi +soon +transform +tryingto +captur +program +style +gri +want +recent +begun +collabor +hope +relat +thepolya +effort +conal +mannion +explor +possibl +ofus +nuprl +comput +scienc +discussingproblem +richard +zippel +hope +connect +zippel +ssymbol +algebra +system +weyl +nuprl +near +futur +thiswil +us +explor +develop +scientif +computingsoftwar +us +weyl +nuprl +togeth +tool +zippel +isbuild +profession +activitieseditor +journal +symbol +computationeditor +academ +presseditor +journal +logic +computationeditor +oxford +univers +pressgener +chair +licsprogram +committe +north +american +jumelageprogram +committe +theoret +aspect +comput +softwarerefere +review +nserc +canada +theoret +comput +scienceunivers +activitieschair +comput +scienc +recruit +committeecomput +scienc +comput +facil +committeeprovost +studi +committe +mathematicslecturesform +theori +softwar +system +fundament +connect +betweencomput +scienc +logic +inria +anniversari +celebr +pari +franc +decemb +nuprl +softwar +develop +system +comput +scienc +colloquium +bengurion +univers +sheva +israel +januari +formal +theori +softwar +system +state +israel +symposium +aviv +israel +januari +associ +symbol +logic +annual +meet +notr +dame +univers +notr +dame +indiana +march +metaprogram +type +theori +state +univers +york +buffalo +york +march +formal +explan +softwar +formal +method +softwar +engineeringworkshop +univers +pennsylvania +philadelphia +pennsylvania +publicationsform +theori +softwar +system +fundament +connect +betweencomput +scienc +logic +futur +tendenc +comput +scienc +control +appli +mathemat +bensoussan +verju +lectur +note +comput +scienc +springer +verlag +decemb +metalevel +program +construct +type +theori +program +andmathemat +method +manfr +broi +nato +seri +springer +verlag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..ce6acfa1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,35 @@ +adam +florenc +adam +florenc +upson +hall +cornel +univers +ithaca +aflorenc +cornel +first +year +student +incomput +scienc +atcornel +univers +professionalinterest +includ +simul +numer +analysi +follow +link +find +academicsresearchworkinterest +athlet +last +updat +sept +mail +mewith +comment +correct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..1671449c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,45 @@ +marco +kawazo +aguilera +home +page +marco +kawazo +aguilera +marco +kawazo +aguilera +depart +comput +scienc +cornel +univers +ithaca +aguilera +cornel +second +year +student +cornel +univers +distribut +system +algorithmsrandom +failur +detect +hybrid +approach +solv +consensusgo +tour +brazil +check +page +suggest +warn +perman +constructionmarco +kawazo +aguilera +aguilera +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..4dde89ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,38 @@ +alfr +home +page +alfr +hong +get +dizzi +offic +upson +hall +cornel +univers +ithaca +usaoffic +phone +worthwhil +site +check +new +sinanet +taiwan +new +chines +chines +requir +nandonet +sunworld +javaworldcours +stuff +corba +essenti +annot +bibliographyc +project +reportalfr +hong +ahong +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..4f494b04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,74 @@ +vineet +home +pagevineet +ahujam +engg +depart +comput +sciencecornel +universityithaca +address +hasbrouck +apt +ithaca +mail +ahuja +cornel +academ +student +depart +comput +scienc +cornel +univers +main +area +interest +parallel +process +advanc +architectur +oper +system +object +orient +program +coursework +cornel +fall +automata +comput +theori +engin +comput +network +spring +high +perform +system +final +project +report +softwar +design +high +perform +architectur +final +project +reportfal +high +capac +inform +network +multimedia +system +resum +postscript +postscript +recent +html +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..cfcc017e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,22 @@ +ching +ching +chinglan +cornel +edumast +engin +student +comput +scienc +depart +beau +pair +address +seneca +ithaca +telephon +page +still +construct +java +examplegraph +project diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..b20a19b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,102 @@ +alexei +home +pagealexei +vaysburdalexei +cornel +program +comput +scienc +cornel +univers +research +interest +includ +distribut +system +group +commun +larg +scale +wide +area +system +andobject +orient +tool +distribut +comput +work +within +horu +group +lead +birman +androbbert +reness +paper +implement +replic +state +machin +partition +network +friedman +alexei +vaysburd +cornel +univers +link +search +engin +lyco +technic +report +cornel +technic +report +broadcast +technic +report +seri +ecol +polytechniqu +federal +lausann +hebrew +univers +transi +home +page +cornel +cuinfo +cornel +gopher +direct +cornellcornel +mail +directorycornel +staff +directori +cornel +student +directorycours +class +examscornel +calendarcornel +art +musicbailei +hall +concertscornel +music +event +new +weather +ithaca +weather +current +condit +ithacaworld +new +brief +odessa +odessaweb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..ec46d0d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,218 @@ +yama +yama +hell +accord +hindu +mytholog +yama +death +come +downto +earth +give +ride +heaven +hell +though +kind +imparti +better +mess +amithyamasanim +engg +depart +comput +scienc +cornel +univers +ithaca +yorki +make +monei +microsystem +garcia +avenu +mountain +view +mailstop +ucup +cupertino +javasoft +watch +warburton +avenu +santa +clara +mail +amith +yamasani +comi +currentlyemploi +javasoft +microsystem +cupertino +california +workingin +javamedia +groupeducationfal +coursesvlsi +multimedia +cssoftwar +engin +csspring +coursesvlsi +high +perform +comput +system +cscomput +graphic +csproject +ride +heaven +train +ride +simul +graphic +final +project +mpeg +descript +parallel +trace +propos +virtual +realiti +railroad +simul +master +project +involv +simul +railroadsystem +includ +train +track +switch +station +landscap +user +interact +wall +cave +environ +stereo +vision +simul +done +processor +onyx +system +softwarei +written +us +openinventord +graphic +librari +silicon +graphic +simul +includ +landscap +gener +dynam +ofth +train +environ +through +documentimag +compress +chipdevelop +imag +compress +chip +vlsi +cours +chip +basicallycompress +data +stream +us +compress +algorithm +thisalgorithm +better +suit +imag +chip +control +extern +cachecam +content +address +memori +store +pattern +inputstream +hope +pattern +might +occur +chip +capableof +compress +data +rate +byte +nano +second +nowai +softwar +routin +compress +data +speed +snap +chip +imag +video +transit +rivlproposalpresentationand +sampl +pyramania +dthi +game +develop +softwar +engin +cours +itswritten +us +interfac +xlib +thed +render +take +look +snapshot +game +screen +pyramania +parallelomania +resumehtmlpostscript +memori +past +present +futur +pal +home +page +satyaprasad +avinashgupta +kartikh +kapadia +hrishikeshdixit +joselui +fernandez +vineetahuja +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..84662dbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,98 @@ +ankit +patel +homepag +ankit +patel +east +state +street +ithaca +apatel +cornel +person +photo +galleria +chronologia +curriculum +vita +cornel +universityresumedepart +comput +scienc +cours +enrolledgradu +research +assist +prof +brian +smith +multimedia +group +project +zeno +canvasd +toolkit +interact +platform +independ +develop +high +perform +graphic +multimedia +virtual +realiti +applic +virtual +realiti +video +conferencingmultimedia +system +read +project +assignmentsproject +kernel +endpoint +netan +annot +bibliographi +common +object +request +broker +architectur +corba +critiqu +understand +limit +causal +total +order +commun +david +cheriton +dale +skeen +carnegi +mellon +summer +school +comput +scienceworld +wide +technolog +spring +cours +link +real +time +support +multimediamaharaja +sayajirao +univers +academicsfriend +techoreli +industri +limitedjob +profilelif +relianc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..9ad617fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,16 @@ +ashish +jhaveriashish +jhaveridepart +comput +sciencemast +engineeringresumehtmlpost +scriptcourseworkadvanc +databas +systemscsmultimedia +systemscsengin +comput +networkscsprogram +languag +softwareengin +csashish +jhaveri diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..75336dbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,57 @@ +aswin +berg +aswin +berg +graduat +student +aswin +cornel +eduoffic +upson +hall +phone +home +skyacr +drive +ithaca +phone +interest +program +languag +program +transform +transform +systemmi +person +life +famili +album +annek +page +swing +danc +swing +danc +server +jean +aswin +swing +danc +page +deejay +guid +isdn +record +hop +person +swing +pictur +nederlands +club +cornel +cornel +dutch +clubi +graduat +student +atcornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..81a8a35a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,164 @@ +welcomeavinash +guptam +engg +depart +comput +sciencecornel +universityithaca +home +address +magazin +streetcambridg +mail +avinash +thecia +resumemi +main +area +interest +comput +graphic +multimedia +distribut +system +cours +oper +system +multimedia +softwar +engin +distribut +system +high +perform +system +project +hoca +oper +system +chip +hoca +pronounc +hodja +full +fledg +oper +system +chip +cornel +hypothet +instruct +processor +support +featur +like +multipl +user +multitask +virtual +memori +scene +transit +effect +rivl +presentationpent +network +game +skillpent +mean +five +board +game +skill +requir +player +object +game +five +point +five +piec +horizont +vertic +diagon +point +earn +trap +oppon +piec +piec +even +number +piec +trap +either +pair +piec +trap +point +player +reach +five +point +first +win +requir +window +screenshot +gamedownload +game +ipng +user +space +implement +next +gener +ipvimpl +ipng +next +gener +internet +protocol +us +interfac +make +abl +user +space +network +interfac +like +stream +proposalprogress +reportsam +caveat +appli +home +page +almost +everi +pageon +still +construct +brows +internethytelnetth +librari +subject +catalogeinet +galaxyplanet +earth +home +pagejoel +hierarch +subject +indexyahoo +guid +wwwwebcrawlerlycosmi +friend +link +sign +guestbook +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..f4b7a26e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,351 @@ +jonathan +barber +fleshpooooooooooooooch +jonathan +barber +upson +hall +cours +research +interest +futur +cool +dude +page +felt +inclin +chang +first +sentenc +birth +name +jonathan +barber +peopl +usual +callm +peopl +also +call +ponch +orpooch +peopl +us +call +lake +fork +guppi +skin +mama +phin +list +goe +finish +degre +graduat +student +master +engin +program +computersci +cornellunivers +also +undergrad +cornel +comput +scienc +well +grew +coupl +town +jersei +attendedmontgomeri +high +school +current +live +collegetown +adjac +cornel +campu +cheap +summer +sublet +oncolleg +ultra +conveni +sinc +work +plai +half +mile +radiu +student +cornel +went +greek +spent +time +epsilon +fratern +border +cayuga +height +kind +hard +believ +dai +hand +time +finish +master +engin +project +prof +thorsten +eicken +develop +packet +filter +thefilt +work +high +bandwidth +network +protocol +bring +parallel +comput +home +person +comput +also +taught +week +summer +session +ofc +fundament +program +concept +introductori +cours +comput +program +past +year +teach +assist +forc +introduct +digit +system +comput +organ +fall +spring +cours +research +intereststhi +past +semest +work +rivl +resolut +independ +video +languag +product +multimedia +research +group +cornel +final +product +labor +project +report +parallel +implement +continu +media +rivlan +improv +implement +object +tracker +rivli +also +taken +interest +high +perform +compil +good +friend +sugata +mukhopadhyai +still +finish +class +project +implement +smpd +code +generatorfor +high +perform +fortran +base +linear +algebra +framework +paper +come +soon +previou +semest +good +friend +greg +weber +develop +webar +encrypt +smart +encrypt +protocol +frame +depend +video +stream +mpeg +unfortun +paper +electron +form +trust +futur +look +bright +summer +pack +gear +take +perman +road +trip +oregon +hopefulli +buddi +surfer +oregon +inform +like +share +good +pleas +write +work +intel +corp +ahead +live +pictur +taken +futur +home +portland +clarif +actual +locat +hous +peopl +thought +would +inde +pretti +resourceful +camera +live +room +window +pipe +page +home +meanth +gener +locat +live +hous +favorit +peopl +line +comrad +upson +georg +bush +realli +nice +page +keep +list +ofhi +favorit +peopl +found +page +inde +lucki +also +consid +keep +list +least +favorit +peopl +know +decid +otherwis +goe +georg +bush +like +call +captain +swirl +nefari +toilet +sugata +mukhopadyai +bonei +magoo +fletop +bigro +dave +koster +bot +ponch +offici +tffl +bulli +home +pageuuencod +graphic +need +bulli +pagetar +zip +graphic +need +bulli +pageif +brows +feel +urg +write +send +mail +downloadsgraphicsbarb +gifponch +htmlres_htmlres_curemmittemmitt diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..b87e7dad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,276 @@ +anindya +homepag +anindya +homepag +realli +look +like +click +photograph +recent +mugshot +locat +anindya +work +home +upson +hall +delawar +avenu +depart +comput +scienc +cornel +univers +ithaca +ithaca +phone +offic +phone +mail +basu +cornel +eduwhat +anindya +graduat +student +comput +scienceat +cornel +univers +hope +complet +turn +centuri +listen +rock +musicor +goof +work +thecornel +cluster +projectwith +advisor +thorsten +eicken +anindya +think +coollik +graduat +student +indiawho +went +indian +institut +technolog +kanpur +hardpink +floydfanat +final +realiz +childhood +dream +see +perform +livelast +summer +philadelphia +love +plum +known +thegreat +unwash +pelham +grenvil +wodehous +also +attendedwoodstock +truli +motiv +onlinewoodstock +review +photograph +woodstock +experienc +metallica +live +first +time +would +love +thesocc +worldcup +last +year +butunfortun +happen +like +cook +tri +putsomerecip +like +onlin +also +fanci +connoisseurof +good +whiski +especi +singl +malt +link +cool +stuff +calvin +hobb +archiv +south +asian +writer +cheer +monti +python +beavi +buttheadoth +us +stuff +cuinfo +gopherand +someth +complet +differ +work +cornel +cluster +projectwhich +aim +provid +high +perform +commun +layer +cluster +ofworkst +parallel +program +perform +compar +state +mpp +acheiv +cluster +develop +fast +messag +passinglay +call +implement +activ +messag +split +thatshow +perform +parallel +program +run +meiko +addit +implement +show +realli +latenc +saturateth +fibr +segment +size +also +work +collabor +withth +berkelei +project +team +develop +specificationfor +commun +layer +would +enabl +fast +commun +processesboth +trust +untrust +environ +public +activ +messag +activ +messag +mechan +integr +commun +comput +thorsten +eicken +david +culler +seth +goldstein +klau +schauser +proceed +symp +comput +architectur +gold +coast +australia +latenc +commun +network +us +activ +messag +thorsten +eicken +veena +avula +anindya +basu +vineet +buch +present +interconnect +palo +alto +abridgedvers +paper +appear +ieee +micro +februari +user +level +network +interfac +parallel +distribut +comput +thorsten +eicken +anindya +basu +vineet +buch +werner +vogel +proceed +sosp +appear +back +cornel +comput +scienc +homepagelast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..7f1175cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,125 @@ +simpl +homepageben +haogradu +studentbhao +cornel +educornel +universitydept +comput +scienc +upson +hallithaca +insid +cornel +flea +forth +year +student +incomput +scienc +cornel +work +code +synthesi +massiv +parallel +processor +advisor +richard +zippel +first +heard +georg +taylorwhen +microsystem +famili +francisco +area +find +inform +aboutth +area +read +local +newspap +forget +check +itsgorg +weather +life +cornellwhat +cornel +check +ithaca +weather +movi +studi +librari +dept +home +page +check +gener +neat +stuff +stock +weather +map +interact +frog +dissectionmagazin +intertext +magazin +wire +magazinea +peek +galaxi +view +solar +system +shoemak +levi +music +weblouvr +xmorphia +galleri +interact +line +geometri +kaleidospac +jpop +overview +bonsai +jpop +seiyuu +jpop +internet +underground +music +archivenetwork +navig +global +network +navig +wander +spider +edgelibrari +librari +congress +martial +scientif +comput +info +tutori +internet +infonih +internet +courseth +intern +info +guidecern +seminar +last +modifi +bhao +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..b55b33d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,23 @@ +aastha +home +pageaastha +bhardwajdepart +comput +sciencemast +ofengineeeringresumehtmlpost +scriptcourseworkadvanceddatabas +system +csmultimediasystem +csengineeringcomput +network +cssoftwareengin +program +languag +cscontact +inform +hasbrouck +apart +ithaca +york +bhardwaj +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..bc424098 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,524 @@ +bruce +randal +donaldbruc +randal +donaldassoci +professorbrd +cornel +eduph +weather +palo +alto +offici +department +home +page +cornel +robot +vision +laboratorydan +huttenloch +found +thecornel +robot +vision +laboratori +researchmi +interest +includ +professor +noel +macdonald +build +massiv +parallel +arrai +microactu +thecornel +nation +nanofabr +laboratori +arrayi +scream +chip +contain +actuat +squarecentemet +orient +small +part +without +sensoryfeedback +microfabr +actuat +arrai +could +us +toconstruct +programm +part +feeder +scale +buildself +propel +walk +vlsi +chip +graduat +student +amybrigg +work +huttenloch +vision +group +develop +sensor +plan +surveil +system +team +ofmobil +robot +robot +board +vision +detect +andintercept +target +demo +massiv +parallel +micro +fabric +actuat +arrai +mpeg +video +tommi +chase +lili +tommi +lili +mobil +robot +built +us +algorithm +developedbi +vision +group +ourlab +lili +track +tommi +follow +us +visual +informationalon +video +show +lili +view +chase +face +morph +select +recent +public +bhringer +donald +macdonald +upper +andlow +bound +programm +vector +field +applic +memsand +vibratori +part +feeder +intern +workshop +thealgorithm +foundat +robot +toulous +franc +brigg +donald +robustgeometr +algorithm +sensor +plan +internationalworkshop +algorithm +foundat +robot +toulous +franc +bhringer +donald +macdonald +singl +crystal +silicon +actuat +arrai +micro +manipul +task +ieee +workshop +micro +electro +mechan +system +mem +diego +california +februari +bhringer +donald +macdonald +classif +lower +bound +mem +arrai +vibratori +part +feeder +programm +vector +field +cannot +part +ieee +intern +confer +robot +autom +icra +minneapoli +minnesota +april +bhringer +donald +macdonald +andimprov +manipul +algorithm +mem +arrai +vibratori +partsfeed +programm +vector +field +cannot +partii +ieee +intern +confer +robot +autom +icra +minneapoli +minnesota +april +provablygood +approxim +algorithm +optim +kinodynam +plan +robotswith +decoupl +dynam +bound +xavier +algorithmica +provablygood +approxim +algorithm +optim +kinodynam +plan +forcartesian +robot +open +chain +manipul +xavier +algorithmica +kinodynam +motion +plan +xavier +canni +reif +journal +inform +invari +distribut +manipul +jen +intern +journal +robot +research +inpress +donald +jen +minim +distribut +supermodular +journal +experiment +andtheoret +artifici +intellig +jetai +press +write +book +entitl +inform +invari +robot +draft +firstquart +book +appear +paper +artificialintellig +inform +invari +robot +revis +base +paper +inform +invari +robot +artificialintellig +paper +distribut +robot +manipul +experi +inminim +intern +symposium +experiment +robot +iser +stanford +move +furnitur +team +automon +mobil +robot +jen +proc +ieee +robot +societi +ofjapan +intern +workshop +intellig +robot +system +iro +pittsburgh +sensorlessmanipul +us +massiv +parallel +micro +fabric +actuatorarrai +bhringer +mihailovich +noel +macdonald +proc +ieee +intern +confer +robot +andautom +diego +demo +detail +explan +program +mobil +robot +scheme +ree +proc +ieee +intern +confer +robot +automationnic +franc +inform +invari +distribut +manipul +jen +first +workshop +algorithm +foundat +ofrobot +peter +boston +wilson +andj +latomb +automat +sensor +configur +task +direct +plan +brigg +proceed +ieee +intern +confer +robot +autom +diego +otherpubl +recent +these +paper +student +post +doc +train +ourlab +daniela +jonathan +ree +dinesh +paper +aval +cornel +server +paper +list +cornel +version +onlin +tech +report +cornel +librari +catalogc +indexobtain +copi +paperscopi +paper +avail +anonym +pictur +develop +teamof +small +autonom +mobil +robot +movefurnitur +around +group +portrait +robot +click +hereto +pictur +tommi +lili +mobot +push +couch +click +hereto +pictur +tommi +lili +rotat +couch +click +apictur +tommi +mobil +robot +drawn +loretta +pompilio +click +hereto +pictur +peopl +robot +work +discoverychannel +beyond +find +funa +poem +alfr +mail +agent +famili +pictur +plai +harm +swallow +ithaca +sometim +plai +california +moreoth +peopl +robot +cornel +robot +vision +laboratori +cornel +robot +vision +laboratori +home +page +peopl +cornel +depart +click +herefor +search +tool +inform +access +stuff +return +cornel +level +clickher +tallest +darkest +lead +hollywood +merian +cooper +wrai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..12bdc809 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,455 @@ +clair +cardieclair +cardi +assist +professor +upson +hallphon +email +cardi +cornel +educlick +research +interestscours +teachselect +publicationsnlp +link +entri +depart +annual +report +amalgam +softbal +stat +finger +lake +tandem +tour +westi +research +interestsalthough +research +span +number +subfield +within +artifici +intellig +includ +machin +learn +case +base +reason +cognitivemodel +focu +research +area +naturallanguag +understand +group +cornel +primarili +interest +investig +machinelearn +techniqu +tool +guid +natur +languag +system +develop +forexplor +mechan +underli +languag +acquisit +work +focus +tworel +area +design +user +train +system +effici +reliablyextract +import +inform +document +machin +learn +naturallanguag +inform +extract +part +cornel +cstr +project +us +inform +extract +techniqu +tosupport +content +base +brows +technic +text +kenmor +project +focu +kenmor +project +develop +techniqu +autom +knowledgeacquisit +task +compris +build +system +gener +kenmoreacquir +linguist +knowledg +us +combin +symbol +machin +learningtechniqu +robust +sentenc +analysi +us +corpora +tworeal +world +domain +perform +part +speech +tag +semant +featur +tag +andconcept +activ +find +anteced +rel +pronoun +current +work +extend +kenmor +handl +larger +text +corpora +addit +disambiguationtask +work +evalu +languag +learningcompon +context +larger +applic +isembed +goal +project +determin +condit +machin +learn +techniqu +expect +offer +cost +effect +approach +knowledgeacquisit +system +teachingc +natur +languag +understand +spring +foundat +artifici +intellig +practicum +inartifici +intellig +fall +seminar +natur +languag +understandingselect +publicationsautom +featur +select +case +base +learn +linguist +knowledg +cardi +proceed +conferenceon +empir +method +natur +languag +process +univers +pennsylvania +embed +machin +learn +system +natur +languag +process +agener +framework +cardi +wermter +riloff +scheler +gabriel +connectionist +statist +andsymbol +approach +learn +natur +languag +process +lectur +note +artifici +intellig +springer +origin +present +workshop +approach +tolearn +natur +languag +process +intern +jointconfer +artifici +intellig +ijcai +aaai +press +chapter +introduct +thesi +cardi +domain +specif +knowledg +acquisit +conceptu +sentenc +analysi +thesi +univers +massachusett +amherst +note +file +contain +introductori +chapter +thesi +domain +specif +knowledg +acquisit +conceptualsent +analysi +cardi +thesi +univers +massachusett +amherst +avail +univers +massachusett +cmpsci +technic +report +page +case +base +approach +knowledg +acquisit +domain +specif +sentenc +analysi +cardi +proceed +eleventh +nation +confer +artifici +intellig +washington +aaai +press +press +us +decis +tree +improv +case +base +learn +cardi +proceed +tenth +intern +confer +machin +learn +amherst +morgan +kaufmann +corpu +base +acquisit +rel +pronoun +disambigu +heurist +cardi +proceed +annual +confer +associ +comput +linguist +newark +associ +comput +linguist +learn +disambigu +rel +pronoun +cardi +proceed +tenth +nation +confer +artifici +intellig +jose +aaai +press +press +us +cognit +bias +guid +featur +select +cardi +proceed +fourteenth +annual +confer +cognit +scienc +societi +bloomington +lawrenc +erlbaumassoci +work +note +aaai +workshop +onconstrain +learn +prior +knowledg +jose +cognit +plausibl +approach +understand +complic +syntax +cardi +lehnert +proceed +ninth +nation +confer +artifici +intellig +anaheim +aaai +press +press +analyz +research +paper +us +citat +sentenc +lehnert +cardi +riloff +proceed +twelfth +annual +confer +cognit +scienc +societi +cambridg +lawrenc +erlbaumassoci +machin +learn +linkscomput +linguistics +print +archiv +databas +recent +paper +aclspeci +interest +group +natur +languag +learningmachin +learn +digestmachinelearn +resourc +researchersmachin +learn +link +home +page +research +penn +treebank +repositori +contain +pointer +code +variou +system +compon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..cb039cac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,108 @@ +paul +chew +paul +chew +senior +research +associatephd +purdu +univers +chew +cornel +eduappletsy +need +java +compat +beta +version +browser +asnetscap +make +work +voronoi +delaunai +applet +creat +avoronoi +diagram +delaunai +triangul +click +point +research +agenda +primari +interest +geometr +algorithm +emphasi +onpract +applic +practic +applic +includedplac +motion +plan +shape +comparison +vision +sens +mesh +gener +work +mesh +gener +exampl +geometr +issu +thataris +part +problem +automat +gener +scientificsoftwar +goal +rais +level +softwar +isspecifi +develop +environ +scientif +softwar +canb +creat +us +natur +high +level +mathemat +concept +ofphys +engin +thu +program +specifi +implicitli +acollect +mathemat +equat +geometr +symbol +techniquesar +us +transform +mathemat +express +effectiveprogram +myonlin +tech +reportscornel +depart +computerscienceth +simlabprojectaddress +rhode +hall +cornel +univers +ithaca diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..f8a9d3b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,95 @@ +chao +chang +home +page +chao +chang +chichao +cornel +edui +student +thedepart +comput +scienceat +cornel +univers +faculti +advisor +isthorsten +eicken +summer +microsoft +network +group +click +addressesand +phone +number +research +interest +interest +interact +compil +runtim +andoper +system +toward +effici +concurr +program +overheterogen +network +tham +composit +activ +messageslow +latenc +commun +risc +system +multimatlab +matlab +multipl +processor +design +perform +activ +messag +soccer +anyon +sport +server +latest +soccer +newsbraziliansocc +page +portugues +world +soccer +result +andhomepagesoliv +soccer +guid +cool +stuff +lubrasa +cornel +luso +brazilian +student +associationu +chess +centerjorn +brasilmi +carstockmasterjayhawk +basketballwww +tennisserverback +cornel +comput +scienc +homepagelast +modifi +chao +chang +chichao +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..f82efdb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,70 @@ +chee +keong +liau +chee +keong +liauwelcom +home +page +graduat +student +master +engin +programm +apolog +forth +construct +work +hopefulli +thing +improv +soon +graduat +school +comput +scienc +depart +cornel +univers +ithaca +fall +class +foundat +artifici +intelligencec +program +languag +softwar +engineeringc +engin +comput +networksc +advanc +databas +systemsbaccalaur +cours +depart +inform +scienc +tokyo +institut +technolog +tokyo +japanhomei +come +small +countri +call +singapor +know +find +inform +avail +singapor +homepag +mapl +avenu +ithaca +cornel +edulast +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..b84fb269 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,114 @@ +thoma +colemanthoma +colemancornel +universityi +professcp +match +http +cornel +comput +scienc +depart +center +appli +mathemat +also +strong +affili +theori +center +director +research +applic +group +advanc +comput +research +institut +final +member +cornel +comput +optim +project +ccop +group +cornel +faculti +research +interest +broadfield +comput +optim +discret +continu +research +programmi +research +program +concern +design +understand +practic +effici +numer +algorithm +continu +optim +problem +primari +interest +develop +computationalmethod +tool +larg +scale +problem +project +automat +differenti +imag +reconstruct +biomed +imag +parallel +linear +program +larg +scale +minim +linear +inequ +nonlinear +equal +constraint +student +postdoc +profession +activ +recent +paper +book +current +former +student +research +associ +postdoc +softwar +comput +mathemat +link +curriculum +vita +best +thoma +coleman +rhode +hall +cornel +univers +ithaca +york +coleman +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..2158f4f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,232 @@ +karl +crari +home +pagekarl +crarycrari +cornel +eduoffic +address +upson +halloffic +phone +researchbroadli +speak +primari +research +interest +type +theori +design +implementationand +semant +program +languag +recent +emphasi +area +subtyp +object +orient +program +modular +implement +practic +program +languag +kmlwhich +combin +featur +function +program +languag +formul +atyp +theoret +semant +interest +view +program +languag +tractabl +approxim +intract +richworld +foundat +type +theori +whole +mathemat +perform +newprogram +languag +develop +often +result +map +type +theoret +construct +anapproxim +program +languag +set +convers +paradigm +programminglanguag +often +well +understood +formul +type +theoret +aminterest +deepen +understand +relationship +type +theori +programminglanguag +particularli +issu +tractabl +approxim +mitig +also +interest +model +compil +view +seri +translat +lower +intermedi +calculi +intermedi +calculu +embed +type +theori +andth +correspond +interpret +program +invari +translat +modelallow +relat +stage +compil +origin +type +theoret +semant +allowsth +standard +compil +techniqu +optim +guarante +safeti +andcorrect +also +care +formul +intermedi +calculi +make +possibl +additionaloptim +unavail +compil +strategi +work +form +part +thenuprl +project +hereat +cornel +project +name +come +nuprl +system +formal +mathemat +logic +atyp +theori +base +type +theori +martin +also +implement +automatedreason +system +committe +consist +ofrobert +constabl +greg +morrisett +dexter +kozen +also +work +close +jasonhickei +select +papersoth +linksmark +leon +maintain +collect +ofprogram +languag +research +resourc +cansearch +comput +scienc +technic +report +onlin +cornel +grad +life +biblestudi +home +pageth +lurker +guid +babylon +command +import +import +answer +jesu +hear +israel +thelord +lord +love +lord +heart +andwith +soul +mind +strength +thesecond +love +neighbor +commandmentgreat +mark +cornel +univers +home +pagedepart +comput +scienc +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..a26ed466 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,287 @@ +chunguang +sunchunguang +sunphd +pennsylvania +state +univers +welcom +home +page +research +associ +advanc +comput +research +institut +cornel +theori +center +also +affili +thecornel +comput +optim +project +work +close +professorthoma +coleman +research +interest +parallel +scientif +comput +spars +matrix +algorithm +numer +linear +algebra +mathemat +softwar +current +project +ppcx +parallel +linear +program +parallel +solut +rank +defici +spars +linear +least +squar +problem +parallel +solut +spars +least +squar +problem +bound +softwar +packag +spars +matrix +comput +pssl +parallel +solut +spars +least +squar +problem +psspd +parallel +solut +spars +symmetr +posit +definit +systemsrec +lecturesparallel +solut +spars +linear +least +squar +problem +contain +dens +row +second +siam +confer +spars +matric +coeur +alen +idaho +octob +parallel +multifront +solut +spars +linear +least +squar +problem +ondistribut +memori +multiprocessor +seventh +siam +confer +parallelprocess +scientif +comput +francisco +februari +select +public +parallel +spars +orthogon +factor +distribut +memori +multiprocessor +siam +journal +scientif +comput +deal +dens +row +solutionof +spars +linear +least +squar +problem +cornel +theori +center +technic +report +ctctr +cornel +univers +decemb +parallel +solut +spars +linear +least +squar +problem +distribut +memori +multiprocessor +cornel +theori +center +technic +report +ctctr +cornellunivers +parallel +multifront +solut +spars +linear +least +squar +problem +distribut +memori +multiprocessor +proceed +seventh +siam +conferenceon +parallel +process +scientif +comput +bailei +bjorstad +gilbert +mascagni +schreiber +simon +torczon +watson +siam +philadelphia +map +algorithm +parallel +spars +choleskyfactor +pothen +siam +journal +scientif +comput +septemb +parallel +orthogon +factor +larg +spars +matriceson +distribut +memori +multiprocessor +coleman +proceed +sixth +siam +confer +parallel +processingfor +scientif +comput +sinovec +key +leuz +petzold +reed +siam +philadelphia +distribut +multifront +factor +us +cliqu +tree +pothen +proceed +fifth +siam +confer +parallel +processingfor +scientif +comput +dongarra +kennedi +messina +sorensen +voigt +siam +philadelphia +compact +cliqu +tree +data +structuresin +spars +matrix +factor +pothen +larg +scale +numer +optim +coleman +siam +philadelphia +chunguang +advanc +comput +research +institut +cornel +theori +center +cornel +univeristi +ithaca +mail +csun +cornel +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..4d512b7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,317 @@ +david +cooper +home +page +david +cooper +postdoctor +associ +upson +hall +phone +email +dcooper +cornel +current +research +current +research +involv +design +implement +securityarchitectur +horu +goal +work +provid +layer +horuswhich +interact +kerberosnetwork +authent +servic +cryptograph +tool +order +toprovid +privaci +authent +servic +process +group +set +origin +secur +architectur +horu +implement +mike +reiter +secur +architectur +fault +toler +system +originalimplement +horu +process +group +support +virtual +synchroni +model +ofcomput +order +maintain +virtual +synchroni +crash +failuremodel +us +horu +necessari +process +within +group +honest +result +origin +secur +architectur +make +assumpt +anyprocess +allow +join +group +trust +group +member +current +version +horu +isposs +maintain +process +group +whose +semant +weaker +ofvirtu +synchroni +group +desir +permit +untrustedprocess +join +exampl +might +involv +allow +untrust +clientsto +join +client +server +group +set +server +would +communicatewith +untrust +client +would +accept +limit +command +fromth +client +would +respons +screen +messag +horussecur +architectur +permit +arbitrari +trust +relationshipsamong +process +within +group +accomplish +us +keymanag +scheme +allow +process +group +impersonateanoth +group +member +us +scheme +process +group +trivial +achieveth +semant +provid +origin +secur +architectur +howev +witha +slightli +higher +overhead +howev +unlik +origin +secur +architectur +architectur +enabl +implement +group +asclient +server +group +mani +complic +trust +relationshipsamong +group +member +thesi +research +thesi +propos +solut +privaci +problem +inherentin +mobil +network +static +network +basic +type +ofinform +user +wish +keep +privat +first +contentsof +messag +send +user +inform +hiddenwith +proper +encrypt +user +also +wish +prevent +outsidersfrom +determin +commun +solut +maintainingth +unlink +messag +sender +recipi +first +propos +david +chaum +commun +februari +sinc +severaloth +made +improv +origin +scheme +mobil +network +addit +type +inform +staticnetwork +also +locat +inform +user +carri +mobilecommun +devic +gener +desir +privaci +howev +themessag +devic +send +receiv +reveal +privat +informationabout +devic +owner +research +develop +along +advisorken +birman +protocol +prevent +attack +internaland +extern +adversari +public +david +cooper +kenneth +birman +preserv +privaci +network +ofmobil +comput +proceed +ieee +symposium +securityand +privaci +page +david +cooper +kenneth +birman +design +implement +apriv +messag +servic +mobil +comput +wireless +network +david +anthoni +cooper +design +implement +privat +messag +servic +mobilecomput +dissert +cornel +univers +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..8ae6b457 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,149 @@ +cyber +abodedan +cyber +abodegreet +travel +digit +space +welcom +mine +humbl +home +prithe +gentl +surf +shore +instead +blink +teari +ey +rest +weari +kei +mice +born +hand +make +thyselv +home +mine +abod +brief +autobiographi +resum +project +deidr +model +facial +express +project +deidr +model +univers +emot +project +simnet +builder +type +game +faiththei +hardli +faith +prize +ferro +concret +sai +wise +scorn +bend +instead +ear +lawyer +tone +scientist +word +need +unseen +unheard +untouch +silenc +night +dread +unknown +question +uncertain +yearn +true +faith +direct +field +lordlovewarm +friendship +mindless +infatu +sensual +romanc +burn +passion +love +love +love +soft +sigh +belov +poetri +hopemyth +favor +beauteou +pandora +ever +place +fault +human +role +releas +dread +demon +hope +mani +astrai +pretti +glimmer +fals +hope +tread +tortur +broken +road +amidst +thorn +dark +filthi +soul +diseas +pain +horror +suffer +reach +fear +tear +pandora +cannot +blame +ever +deed +told +burn +hope +heart +take +hold +world +would +never +ever +frozen +miseri +cold +spring +etern diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..47b15584 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,107 @@ +dean +krafft +select +urlsdean +point +interestcornel +server +cornel +home +page +cuinfo +server +engin +librari +page +cornel +legal +inform +institut +cornel +directori +project +public +server +page +dimund +document +imag +understand +charact +recognit +server +siam +gopher +server +inform +captur +access +cours +illinoi +digit +librari +stanford +digit +librari +michigan +digit +librari +berkelei +digit +librarysearch +tool +lyco +search +engin +veronica +gopher +search +tool +archi +gatewai +search +anonym +site +depart +togeth +excel +collect +search +tool +network +inform +scout +report +intern +planet +earth +whole +internet +catalog +part +global +network +navig +cern +refer +page +faqsvari +stuff +head +mail +list +audio +gear +folkbook +folk +music +home +page +ithaca +weather +forecast +weather +elsewher +secur +refer +index diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..80bcbc23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,128 @@ +dean +krafft +home +page +dean +cornel +dean +krafft +director +comput +facilitiesaddress +upson +halldepart +comput +sciencecornel +universityithaca +phone +current +serv +research +administr +inth +comput +scienc +depart +cornel +guis +anadministr +manag +comput +facil +support +group +andworri +number +issu +includ +comput +secur +network +build +servic +research +side +princip +investig +cornel +spart +project +arpa +fund +consortium +five +comput +sciencedepart +thecorpor +nation +research +initi +cnri +researchi +intend +rapid +dissemin +break +technicalresearch +internet +well +make +avail +line +theexist +librari +technic +report +consortium +member +part +project +research +work +cornel +davi +xerox +employe +thedesign +research +institut +carl +lagoz +emploi +project +develop +implement +protocol +system +disseminationov +technic +report +similar +materi +system +call +dienst +eight +univers +site +atechn +report +dienst +avail +inform +ondienst +project +pleas +send +email +cornel +togethera +page +select +url +relat +research +comput +facil +thing +interestedin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..efd1594a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,28 @@ +divakar +home +pagedivakar +viswanathdivakar +cornel +address +upson +hall +cornel +univers +ithaca +graduat +student +comput +scienc +area +interest +isnumer +analysi +advis +home +page +good +place +find +numer +analysi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..da780871 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,104 @@ +diyu +home +pagediyu +daisi +welcom +home +spring +cours +practic +distribut +comput +databas +system +compil +translat +practicum +compil +translatorsfal +coursesc +oper +systemc +softwar +engineeringc +advanc +databas +systemsel +comput +network +telecommunicationsm +projectorigin +virtual +realiti +railroad +projectsinc +mayb +like +know +littl +doubt +anywai +current +master +engin +student +depart +comput +scienc +cornel +univers +cornel +locat +ithaca +central +york +gorgeou +place +live +except +winter +last +year +receiv +appli +physic +jersei +institut +technolog +newark +jersei +brought +beauti +campu +tsinghua +unviers +beij +china +also +receiv +engin +physic +want +friend +china +miss +us +linksjava +html +tkfavorit +sitestimecnnlondon +timeswashington +postchines +digestchina +new +digestfeng +yuanxin +siart +chinaloc +connectionsctc +sunlabweathermovi +miller +ithaca +diyu +cornel +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..6bc5396f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,159 @@ +huttenloch +home +page +daniel +huttenloch +associ +professordph +cornel +research +main +area +research +visual +match +recognit +work +area +rang +theoret +algorithm +us +techniqu +comput +geometri +applic +visual +match +system +remot +collabor +view +document +imag +wide +area +network +video +monitor +target +recognit +also +interest +us +type +electron +document +commun +remot +collabor +educ +algorithm +compar +geometr +structur +hausdorff +base +method +visual +match +recognit +implement +avail +fast +index +us +eigenspac +approxim +hausdorff +fraction +matlab +implement +avail +perform +evalu +model +recognit +method +video +monitor +object +track +identif +digipap +highli +compact +univers +viewabl +document +imag +format +conot +system +support +collabor +share +document +teach +brian +smith +develop +cours +author +document +offer +first +time +spring +also +teach +introduct +comput +program +cours +comput +vision +profession +activ +work +xerox +parc +electron +document +imag +process +start +small +group +investig +problem +cornel +program +chair +cvpr +ieee +confer +comput +vision +pattern +recognit +held +juan +june +interest +favorit +comput +geek +activ +snowboard +mountain +bike +without +extrem +sport +cool +stupid +attitud +last +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..6806d2e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,119 @@ +ashvin +dsouza +ashvin +dsouza +dsouza +cornel +edui +graduat +student +work +bard +bloom +focu +thesi +research +develop +oftool +support +process +algebra +method +specif +andverif +concurr +system +design +tool +withrespect +metatheori +process +algebra +becom +immediatelyavail +wide +class +process +algebra +allevi +theproblem +duplic +effort +inher +custom +tool +exampl +design +prototyp +base +calculu +model +checker +simpl +gso +process +algebra +semant +process +algebra +form +part +input +make +tool +applic +mani +commonli +us +process +algebra +includ +basic +loto +addit +investig +express +power +process +algebra +order +better +understand +compar +final +exploringappl +techniqu +work +gener +bdd +process +algebraterm +full +postscipt +lite +postscript +version +also +written +express +result +postscript +presentedth +former +comput +aid +verif +lnc +latter +foundat +softwar +technolog +theoret +computersci +lnc +june +present +work +verifi +specif +compass diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..7c0b24ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,59 @@ +funda +ever +improv +pagefunda +ergn +mail +ergun +cornel +eduhi +welcom +home +page +name +funda +ergun +studentin +comput +scienc +dept +cornel +work +programcheck +prof +ronitt +rubinfeld +researchpag +also +minor +paint +depart +fine +art +origin +come +izmir +turkei +undergrad +bilkentunivers +ankara +turkei +research +relat +stuff +warn +might +encounterpag +written +turkish +angri +dog +risk +person +visit +page +sinc +page +alwai +heavi +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..45b2683e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,374 @@ +activ +tardo +tardosassoci +professor +depart +comput +scienc +upson +hallcornel +universityithaca +phone +email +cornel +school +oper +research +industri +engineeringphon +ori +cornel +click +daughter +rebecca +julia +shmoi +current +activ +current +researchrec +public +current +research +broadli +speak +research +interest +theori +algorithm +includ +mani +aspect +comput +complex +theori +mostlywork +combinatori +optim +problem +particular +networkproblem +approxim +algorithm +linear +integ +programmingproblem +recent +public +research +paperssurvei +paper +research +paper +shmoi +tardo +approxim +algorithm +thegener +assign +problem +mathemat +program +preliminari +version +appear +proceed +annual +siam +symposium +discret +algorithm +januari +plotkin +tardo +improv +bound +flow +cutratio +multicommod +flow +appear +combinatorica +preliminari +version +appear +proceed +annual +symposium +theori +comput +ori +klein +plotkin +stein +tardo +fasterapproxim +algorithm +unit +capac +concurr +flow +problemwith +applic +rout +find +spars +cut +siam +journal +oncomput +preliminari +version +appearedin +proceed +annual +symposium +theori +comput +leighton +makedon +plotkin +stein +tardo +tragouda +fast +approxim +algorithm +multicommod +flowproblem +journal +comput +system +scienc +stoc +special +issu +preliminari +version +appear +proceed +annualacm +symposium +theori +comput +plotkin +shmoi +tardo +fast +approximationalgorithm +fraction +pack +cover +problem +appear +inmathemat +oper +research +ori +preliminari +version +hasappear +proceed +annual +ieee +symposium +thefound +comput +scienc +goeman +goldberg +plotkin +shmoi +tardo +williamson +improv +approxim +algorithm +network +designproblem +proceed +annual +siam +symposium +discretealgorithm +januari +ori +hopp +tardo +polynomi +time +algorithm +someevacu +problem +proceed +annual +siam +symposium +ondiscret +algorithm +januari +ori +hopp +tardo +quickest +transship +problem +theproceed +annual +siam +symposium +discret +algorithm +ori +klein +plotkin +tardo +approximationalgorithm +steiner +direct +multicut +ori +kleinberg +tardo +approxim +disjoint +pathsproblem +high +diamet +planar +network +proceedingsof +annual +symposium +theori +comput +ori +kleinberg +tardo +disjoint +path +dens +embed +graph +proceed +annualiee +symposium +foundat +comput +scienc +version +ori +rabani +tardo +distribut +packet +switch +arbitrari +network +symposium +theori +comput +version +fleischer +tardo +separ +maxim +violat +comb +inequ +planar +graph +appear +ipco +june +ori +survei +paper +goldberg +tardo +tarjan +network +flow +algorithm +sept +path +flow +vlsi +design +kort +lovaszand +schrijver +springer +verlag +tardo +strongli +polynomi +combinatori +algorithm +inoptim +proceed +intern +congress +ofmathematician +kyoto +springer +verlag +tokyo +shmoi +tardo +comput +complex +handbook +combinator +graham +grotschel +lovasz +north +holland +appear +lovasz +shmoi +tardo +combinator +computersci +appear +handbook +combinator +graham +grotschel +lovasz +north +holland +appear +tardo +approxim +theorem +fast +approximationalgorithm +multicommod +flow +problem +annot +bibliographi +inproc +summer +school +combinatori +optim +maastricht +netherland +tardo +approxim +theorem +fast +approximationalgorithm +multicommod +flow +problem +proc +networkoptim +theori +practic +netflow +miniato +itali diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..df247056 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,16 @@ +franci +home +page +franci +graduat +student +univers +california +berkeleymathemat +departmentcomput +scienc +departmentcornel +universitycomput +scienc +departmenthumorfcc +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..ecfc1b48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,2 @@ +felix +world diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..7a1fd110 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,78 @@ +frederick +smith +homepagefrederick +smith +grad +student +cornel +upson +halldepart +comput +sciencecornel +universityithaca +paper +erni +cohen +dexter +kozen +frederick +smith +complex +kleen +algebra +test +technic +report +cornel +univers +juli +dexter +kozen +frederick +smith +kleen +algebra +test +complet +decid +technic +report +cornel +univers +april +person +us +link +dexter +kozen +homepag +greg +morrisett +homepag +program +languag +research +page +class +take +system +multimedia +semant +program +languag +math +introduct +analysi +link +epicuri +food +zine +cartalk +home +click +clack +catch +sundai +last +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..60f60159 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,35 @@ +frank +page +frank +page +frank +planet +either +matter +frank +adelstein +post +doctor +associ +cornel +xerox +design +research +institut +offic +phone +electron +mail +frank +cornel +frank +cornel +actual +inform +frank +checkout +improv +happi +page +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..8ab450ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,92 @@ +yuan +fred +softwar +engin +yuan +fred +softwar +engin +current +version +feet +shown +fred +fred +fred +resum +project +other +appear +soon +distribut +http +server +scramo +midi +choreograph +anim +model +postscript +vpla +visual +program +languag +animationlink +current +affili +massachusett +languag +comput +languag +hewlett +packardlink +previou +affili +cornel +comput +scienc +cornel +theori +center +visual +group +cornel +univers +binghamton +univers +lawrenc +berkelei +laboratoryinterest +hobbi +section +develop +soon +mayb +next +year +photographi +cello +guitar +aquarium +cornel +comput +sciencecornel +theori +center +home +burl +work +chelmsford +email +fredhsu +apollo +snail +apollo +drive +chelmsford +http +apollo +peopl +fred +fred +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..4b3493cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,213 @@ +deepak +balakrishna +deepak +balakrishnamast +engineeringdepart +comput +sciencecornel +univers +resumeeducationcoursesperson +deepak +balakrishna +cornel +resum +html +postscript +back +main +page +educ +undergradu +complet +undergradu +june +karnataka +region +engin +colleg +surathk +india +major +comput +scienc +major +interest +multimedia +cours +relat +comput +scienc +oper +system +artifici +intellig +compil +construct +data +commun +comput +graphic +graduat +present +cornel +univers +pursu +master +engin +degre +comput +scienc +specialis +multimedia +graduat +want +involv +project +deal +multimedia +server +program +back +main +page +cours +follow +cours +list +taken +fall +semest +multimedia +system +prof +brian +smith +advanc +databas +system +prof +praveen +seshadri +engin +comput +network +prof +srinivasan +keshav +softwar +engin +prof +michael +godfrei +back +main +page +person +well +start +goe +upon +time +long +long +actual +novemb +land +call +bharat +india +outsid +world +precis +born +cute +chubbi +littl +babi +weigh +approxim +four +pound +took +name +deepak +mean +light +went +actual +still +process +chang +world +incident +probabl +divin +interfer +aishwarya +miss +world +born +lucki +born +leav +miniscul +detail +earlier +life +dive +straight +high +school +well +lucki +nation +public +school +bangalor +greater +part +school +place +someon +colleg +noth +better +krec +that +major +comput +scienc +wonder +long +anoth +four +year +holidai +part +conquer +class +never +match +cornel +univers +pursu +master +degre +comput +scienc +well +hope +get +somewher +final +here +link +friend +ashish +aastha +indira +ankit +vineet +back +main +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..24bd4f50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,76 @@ +asif +uddin +ghiasasif +uddin +ghia +welcom +mywww +home +page +warn +constructioni +student +comput +scienc +area +interest +distribut +system +multimedia +system +bachelor +degre +comput +system +engin +univers +engin +technolog +karachi +pakistan +sinc +work +global +inform +solut +system +engin +present +studi +leav +master +program +cornel +respons +includ +system +applic +program +unix +system +administr +support +educ +network +manag +installationso +work +number +interest +project +cornel +plan +onlin +good +hopefulli +year +publicationsth +follow +interest +music +cricket +astronomyasif +uddin +ghia +ghia +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..08a39412 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,12 @@ +home +page +move +home +page +http +berkelei +dglaser +home +htmlpleas +visit +million diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..8cf55089 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,449 @@ +david +gri +home +pagedavid +grieswilliam +lewi +professor +engineeringdr +munich +institut +technolog +interest +program +methodolog +particular +formaldevelop +program +relat +area +programminglanguag +program +languag +semant +logic +asinterest +teach +topic +researchin +fact +understand +logic +formal +taughta +us +tool +freshman +sophomor +colleg +level +anoverrid +concern +mine +click +follow +item +inform +teach +logic +tool +curriculum +vita +short +biographi +text +written +gri +program +languag +polya +announc +dimac +symposium +teach +logic +link +paper +symposium +cornel +depart +home +pagecomput +scienc +upson +hallcornel +universityithaca +gri +cornel +edushort +biographi +david +griesi +born +flush +york +spent +year +iescap +receiv +queen +colleg +went +workfor +naval +weapon +laboratori +civilian +amathematician +programm +wife +elain +fewmonth +later +marri +novemb +went +illinoi +educ +receiv +master +degreein +math +illinoi +assistantship +help +twogerman +manfr +paul +ruedig +wiehl +write +full +algol +compilerfor +comput +figur +implementrecurs +effici +mani +paper +topic +end +wife +go +munich +almost +three +year +receiv +doctor +bauer +joseph +stoer +munich +institut +technolog +germani +june +wasin +math +numer +analysi +sinc +comput +scienc +these +notyet +kosher +assist +professor +comput +scienc +stanford +stanford +twin +paul +susan +born +made +excit +usual +born +thebirthdai +twin +april +twin +intown +elain +make +four +birthdai +cake +left +stanford +weather +move +cornel +whichha +weather +snow +ever +sinc +wasdepart +chair +becam +william +lewisprofessor +engin +guggenheim +fellowship +return +tabl +contentsi +better +known +mytext +writingand +contribut +educ +thewond +research +good +bloom +wherey +plant +receiv +number +award +contributionsto +educ +ieee +taylor +booth +award +sigcseaward +cornel +outstand +educ +award +clarkaward +cornel +colleg +art +scienc +theamerican +feder +inform +process +societi +afip +educ +award +proud +advise +stand +susanowicki +thesi +laid +foundat +proof +correct +ofparallel +program +notion +interfer +freeness +paper +author +topic +award +bestpap +program +langaug +system +andt +raman +sthesi +best +dissert +award +raman +designedand +implement +system +speak +latex +document +includ +technic +articl +book +document +printedor +spoken +abl +speakmathemat +effect +manner +import +goal +work +read +blind +alreadi +us +system +produc +audiocassett +serv +chair +comput +research +associ +thecomput +scienc +board +late +open +officein +washington +began +serious +repres +comput +researchinterest +also +conduct +taulbe +survei +period +proud +obtain +essenti +complet +responsesfrom +grant +comput +scienc +depart +period +noother +compar +survei +respons +rate +year +itrequir +telephon +call +depart +sendin +questionnair +receiv +comput +researchassoci +servic +award +work +survei +forchair +associ +move +toward +respect +andrespons +current +editor +acta +informatica +formal +aspect +comput +softwar +concept +andtool +edit +keep +busi +enjoi +takean +interest +individu +paper +know +area +willsuggest +substanti +rewrit +believ +help +servewher +best +serv +fredb +schneider +editor +springer +verlag +text +andmonograph +comput +scienc +spare +time +us +sport +like +golf +softbal +volleybal +swim +tabl +tenni +china +isplit +pant +plai +ping +pong +hour +later +give +alectur +mention +audienc +laugh +turnedaround +explain +interpret +spoke +everyonelaugh +howev +know +whether +told +truth +justsaid +gri +made +joke +laugh +also +us +sing +barbershop +andgilbert +sullivan +work +around +hous +carpentri +wire +remodel +taken +time +yield +considerablesatisfact +return +tabl +content diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..e1d4488b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,25 @@ +alex +grinzayd +homepagealex +grinzaydm +student +comput +sciencecornel +universitytel +email +grinzayd +cornel +first +week +cornel +link +necx +directinternet +shop +networkcomput +express +damarkwarn +page +bore +learn +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..c9eed105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,52 @@ +grzegorz +czajkowski +homepag +grzegorz +czajkowskidepart +comput +sciencecornel +universityithaca +offic +grze +cornel +second +year +student +program +depart +comput +scienceat +cornel +univers +ithaca +york +complet +master +degre +comput +scienc +krakow +poland +current +involv +sever +project +also +charg +administ +cuc +advisor +thorsten +eicken +link +relat +research +architectur +activ +messag +split +last +modifi +novemb +grze +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..6d3ef82f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,96 @@ +halpern +home +pagejoseph +halpern +professorcornel +universitycomput +scienc +depart +upson +hallithaca +halpern +cornel +research +focus +reason +knowledg +uncertainti +applic +distribut +comput +game +theori +although +also +done +work +continu +interest +topic +fault +toler +distribut +comput +program +languag +semant +work +li +boundari +number +field +recent +gave +talk +econom +depart +princeton +describ +someon +mathemat +call +comput +scientist +give +talk +economist +abouta +subject +mainli +studi +philosoph +probabl +best +sentenc +descript +give +like +detail +check +list +public +pointer +abstract +paper +mani +case +paper +also +avail +detail +activ +resum +fall +teach +cours +reason +knowledg +fall +teach +sequel +reason +uncertainti +last +updat +septemb +halpern diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..e5473ca0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,12 @@ +tsuneshi +hashimototsuneshi +hashimotothi +home +page +tsuneshi +hashimoto +construct +cstsuneshi +hashimoto +hashi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..98500cc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,32 @@ +mark +hayden +mark +hayden +hayden +cornel +offic +upson +cornel +univers +ithaca +fall +teach +tast +unix +interest +horu +distribut +commun +system +ensembl +distribut +commun +system +nuprl +proof +develop +system +hockei +last +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..1c8a6cf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,7 @@ +heji +improv +home +page +heji +cyber +pond diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..7c4a4e59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,35 @@ +deyu +home +page +deyu +graduat +student +cornel +cornel +universitydept +comput +scienc +upson +hallithaca +third +year +student +comput +scienc +cornel +born +shanghai +china +receiv +undergradu +degre +berkelei +faculti +advisor +thorsten +eicken +come +last +modifi +deyu +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..17c93a9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,82 @@ +jing +huang +home +page +jing +huang +upson +hall +depart +comput +scienc +cornel +univers +ithaca +huang +cornel +student +thedepart +comput +scienceat +cornel +univers +receiv +bachelorand +master +degre +depart +appli +mathemat +tsinghua +univers +beij +chinami +academ +interest +comput +vision +multimedia +system +work +professor +ramin +zabih +imag +retriev +video +process +motion +track +us +link +annot +comput +vision +bibliographi +pattern +recognit +relat +machin +learn +optim +check +cornel +chines +christian +fellowship +cornel +univers +evangel +fellowship +chines +christian +resourc +center +chines +christian +mission +back +cornel +comput +scienc +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..8adaa8bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,44 @@ +chin +chen +home +page +chin +chen +welcom +home +page +current +address +mapl +ithaca +mail +icchen +cornel +perman +address +sung +taipei +taiwan +class +spring +comput +graphic +practic +distribut +system +practic +distribut +system +practicum +databas +manag +album +resum +new +china +time +nctu +page +construct +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..37843245 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,33 @@ +home +page +indira +malik +depart +comput +scienc +master +engin +imalik +cornel +resum +post +script +cours +program +system +softwar +engin +advanc +databas +system +engin +comput +network +multimedia +system +visit +high +school +tap +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..c5c44cf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,108 @@ +indira +vidyaprakash +indira +vidyaprakashmast +engineeringclass +dept +comput +sciencecornel +universitywelcom +homepag +current +student +comput +scienc +depart +cornel +univers +ithaca +degre +comput +scienc +engin +colleg +technolog +coimbator +india +inform +cours +taken +cornelluniversityfal +oper +system +practicum +oper +system +project +specif +hoca +oper +system +softwar +engin +multimedia +system +project +audio +process +toolkit +manag +inform +system +polici +spring +comput +graphic +cspracticum +comput +graphic +project +anim +magic +carpet +comput +scienc +colloqium +manageri +financesumm +independ +research +tracingin +comput +graphic +project +camera +perspectivetransform +java +click +postscript +version +myresumeclick +java +applet +camera +perspect +transformssom +interest +site +cool +applet +java +sgamelan +directori +calvinand +hobb +galleri +gif +indian +recip +chicker +wood +drive +nashvil +tennesse +indira +cornel +last +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..1d999bfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,110 @@ +homeless +pageioi +home +pageioi +lamioi +cornel +current +research +assist +prof +brian +smith +comput +scienc +depart +cornel +univers +interest +multi +media +parallel +program +us +softwar +system +instruct +set +environ +system +softwar +knowledg +base +engin +manual +guidelin +write +good +extens +code +doesn +cover +much +write +script +section +write +test +suit +valuabl +programm +postscript +version +complet +engin +manual +packag +includ +templat +sourc +file +document +short +introduct +us +system +short +descript +user +manual +us +remot +machin +index +inform +tutori +right +week +put +togeth +knowledgebas +repositori +try +packag +spam +site +test +multim +home +directori +get +start +virtual +realiti +conferenc +detail +come +work +prototyp +mpeg +video +file +server +base +http +protocol +detail +come +spring +homework +solut diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..9a07c87d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,205 @@ +paul +jackson +home +pagepaul +jackson +post +doctor +associatecornel +univers +mail +jackson +cornel +eduwww +http +cornel +info +peopl +jackson +jackson +htmladdress +depart +comput +scienc +upson +hall +cornel +univers +ithaca +usaphon +depart +research +intereststheorem +prove +environ +formal +method +softwar +andhardwar +develop +comput +algebra +synthesi +scientif +program +linkag +softwar +tool +engin +design +thesi +informationmi +thesi +entitl +enhanc +nuprl +proof +developmentsystem +appli +comput +abstract +algebra +abstract +avail +full +text +postscript +format +paper +paul +jackson +explor +abstract +algebra +construct +type +theori +bundi +editor +intern +confer +automateddeduct +lectur +note +artif +intellig +springer +verlag +june +abstract +avail +full +text +postscript +format +paul +jackson +nuprl +circuit +design +bout +stavrid +melham +editor +proceed +inter +confer +theorem +prover +circuit +design +ifip +transactionsa +north +holland +abstract +avail +full +text +postscript +format +paul +jackson +develop +toolkit +float +point +hardwar +thenuprl +proof +develop +system +proceed +theadvanc +research +workshop +correct +hardwar +design +methodolog +elsevi +nuprlth +nuprl +project +world +wide +home +page +access +document +nuprl +commun +live +nuprl +session +basic +theori +load +collect +nuprl +page +still +need +workon +make +access +someon +els +getround +pai +attent +sometim +next +month +hypertext +list +thetheori +develop +thesi +avail +list +foreach +theori +includ +introduct +summari +definit +andtheorem +format +proof +list +thepolynomi +relat +theori +includ +moment +shouldb +next +coupl +dai +last +modifi +paul +jackson +jackson +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..bc7b7068 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,3 @@ +hani +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..e1d51fb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,22 @@ +tibor +jnositibor +jnosiwelcom +mywww +home +page +perman +constructionoffic +upson +hall +cornel +univers +ithaca +usaoffic +phone +interest +site +project +zenotibor +jnosi +janosi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..6907a412 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,15 @@ +janwun +janwun +janwun +cornel +master +engin +student +comput +scienc +depart +address +mapl +avenu +ithaca +telephon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..6e79f2d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,333 @@ +faculti +research +interest +greg +morrisett +greg +morrisett +cornel +assist +professor +comput +scienc +cornel +univers +ithaca +offic +upson +hall +offic +phone +tabl +content +research +intereststeachingselect +papersrel +research +linksperson +informationresearch +interestsmi +primari +research +interest +develop +ofadvanc +program +languag +particularli +interestedin +high +level +languag +standard +forbuild +system +softwar +includ +time +system +operatingsystem +distribut +system +late +focus +onth +implement +issu +kept +high +level +safelanguag +us +construct +system +softwar +research +concentr +produc +code +high +level +languag +faster +consum +less +memori +support +hack +bit +also +interest +bring +power +semant +base +toolsfrom +program +languag +theori +type +direct +compil +partial +evalu +abstract +interpret +time +code +gener +design +specif +construct +real +systemssoftwar +teachingc +semant +program +languag +fall +advanc +languag +implement +spring +select +paperssemant +memori +manag +polymorph +languag +greg +morrisett +robert +harper +technic +report +also +appear +septemb +compil +type +greg +morrisett +gzip +postscript +thesi +publish +technic +report +decemb +type +direct +optim +compil +tarditi +morrisett +cheng +stone +harper +sigplan +confer +program +languag +design +implement +compil +perform +safetythrough +type +morrisett +tarditi +cheng +stone +harper +workshop +compil +support +system +softwar +type +closur +convers +yasuhiko +minamid +greg +morrisett +robert +harper +appear +symposium +principl +program +languag +extend +version +publish +technic +report +juli +abstract +model +memori +manag +version +greg +morrisett +matthia +felleisen +robert +harper +conf +function +program +languag +andcomput +architectur +extend +version +publish +technic +reportcmu +version +also +notecmu +compil +polymorph +us +intensionaltyp +analysi +robert +harper +greg +morrisett +proc +annual +symposium +principl +program +languag +francisco +januari +optimist +parallelizationgreg +morrisett +mauric +herlihi +octob +refin +first +class +store +gregori +morrisett +proceed +sigplan +workshop +state +program +languag +copenhagen +denmark +june +proc +lock +portabl +multiprocess +platform +standard +jersei +gregori +morrisett +andrew +tolmach +proceed +fourth +sigplan +symposium +principl +practic +parallel +program +diego +portabl +multiprocess +interfac +standard +jersei +gregori +morrisett +andrew +tolmach +june +also +appear +princeton +ad +thread +standard +eric +cooper +gregori +morrisett +decemb +relat +research +link +mark +leon +resourc +program +languag +research +us +member +project +carnegi +mellon +line +inform +standard +home +page +research +program +languag +home +page +research +project +program +languag +program +languag +orient +bibliographi +cornel +depart +comput +scienceperson +informationhom +address +warren +road +ithaca +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..217ea8f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,17 @@ +jiun +jiun +resum +java +current +address +mapl +avenu +ithaca +email +jhlin +cornel +perman +address +shing +taipei +taiwan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..1884e890 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,6 @@ +jerri +cornel +edujerri +project +jerri +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..97ddd926 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,186 @@ +wang +home +page +twin +sister +wangphd +student +depart +comput +scienc +cornel +univers +upson +hallithaca +offic +phone +home +phone +email +jiawang +cornel +first +year +student +graduat +depart +comput +scienc +state +univers +york +binghamton +degre +comput +scienc +transfer +suni +binghamton +student +depart +mathemat +nankai +univers +tianjin +china +honor +award +barri +goldwat +scholar +mathemat +scienc +engin +nation +sciencefound +graduat +research +fellowship +famili +twin +sister +cool +link +hongkong +taiwan +beij +review +china +chinaand +chines +relat +site +china +daili +cbnet +china +internet +forum +chinanet +china +stamp +china +time +china +window +chinesecalendar +chines +internet +directori +chines +magazin +chines +mediainform +chines +music +chines +new +digest +chines +homepag +servic +educ +taiwan +histori +cultur +china +hongkonglaserdisccent +internetdistribut +chines +magazin +multilingu +softwar +digest +taiwan +ryan +smovieplex +sceneri +pictur +china +china +tour +entertain +china +sheng +tian +diwww +page +futur +interest +site +america +best +graduat +school +new +rank +new +graduat +liber +art +rank +comput +scienc +film +music +ieee +comput +societi +monei +nation +scienc +foundat +peterson +guid +postcard +servic +rank +program +incomput +scienc +thesenior +homepag +virtual +tourist +worldmap +yahoo +cornel +chines +christian +fellowship +mandarin +cssa +cornel +univers +weather +univers +binghamton +univers +cornel +univers +suni +stoni +brook +ucla +univers +china diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..04d95fcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,53 @@ +justin +miller +home +page +justin +miller +home +page +navi +uniform +current +master +engin +student +comput +scienc +colleg +ofengin +cornel +univers +semest +teach +assist +com +machin +vision +mani +mani +long +night +found +robot +vision +csrvl +research +assistantwork +prof +ramin +zabih +primari +research +interest +ismachin +vision +particularli +level +imag +process +gener +informationsom +rant +project +info diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..6821a7f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,164 @@ +jeff +moorejeff +moorewel +current +graduat +student +cornel +work +mastersof +engin +comput +scienc +receiv +comput +scienc +purdu +univers +west +lafayett +indiana +went +high +school +hillsboro +high +school +hillsboro +oregon +suburb +portland +employmentmi +resum +anyon +interest +work +intel +internet +product +divis +creat +cool +softwar +spring +classesnba +thrive +inform +revolut +entertain +sectorc +practic +distribut +systemsc +practicum +distribut +system +cornellopoli +game +high +perform +comput +architectur +network +comput +optim +parallel +mpeg +encod +researchfal +classesc +softwar +engin +technolog +techniquec +formal +methodsc +multimedia +system +research +paperc +comput +scienc +colloquiumc +cool +softwar +tool +seminar +present +opendoc +mfcoptim +parallel +mpeg +encod +researchsoftwar +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar +companiesintelsilicon +graphicsibmsunapplemagazinespc +magazinepc +weekpc +computingcomput +shopperwindow +sourcescomput +lifemacusermacweekinteract +weekfamili +pccomput +game +worldelectron +newspapersusa +todaywal +street +journalnew +york +timesphiladelphia +onlineth +daili +new +worldwideth +dalla +morn +new +opinionsth +detroit +free +press +gopherth +knoxvil +new +sentinelth +leader +onlinelat +new +serviceth +nugget +newspap +sister +oregonrworld +orang +counti +registerth +francisco +chronicl +examinersan +jose +mercuryth +seattl +timesnando +netusa +todayboston +globeportland +press +herald +main +sundai +telegramvisitor +sinc +januari +campu +address +mapl +fdithaca +york +last +updat +april +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..8d060eee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,109 @@ +jose +lui +fernandez +home +pagejos +lui +fernandezjos +lui +fernandezmast +engin +depart +comput +sciencecornel +universityithaca +home +address +mapl +avenu +ebithaca +mail +joselui +cornel +us +java +enabl +browser +would +anim +scroll +text +sign +resum +curriculum +vita +interest +current +interest +distribut +system +multimedia +comput +graphic +project +project +imag +video +transit +rivl +exampl +page +presentationc +project +pyramania +game +spaceship +battl +project +report +project +hoca +design +code +oper +system +implement +multitask +virtual +memori +meng +project +project +distribut +system +autonom +vehicl +simul +hobbi +photographi +click +view +picturesmusiccomputerswrit +direct +crazi +movi +video +actor +recruit +friend +current +time +us +java +enabl +browser +would +anim +clock +clock +courtesi +bill +giel +visitor +number +page +better +view +java +enabl +browser diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..be4dc8aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,34 @@ +julin +hurtado +home +pagejulin +hurtado +click +curriculum +vita +cornel +universitymast +busi +administr +johnson +graduat +school +managementmast +engin +depart +comput +science +mail +cornel +colombia +linda +er +master +engin +project +project +distribut +system +autonom +vehicl +simul diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..28b9371e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,32 @@ +janeen +homepagejaneen +reich +welcom +home +page +current +cornel +univers +complet +comput +scienc +august +septemb +join +system +group +texa +instrument +dalla +texa +send +email +jreich +cornel +edumi +resum +current +ad +page +favorit +thing diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..7b355f4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,155 @@ +jodi +shapirojodi +shapiroeduc +receiv +comput +system +engin +univers +massachusett +amherst +current +cornel +get +meng +comput +scienc +graduat +resum +fall +cours +multimedia +systemsc +softwar +engineeringe +comput +network +telecommunicationc +master +researchspr +cours +machin +visionc +high +perform +comput +systemse +high +capac +comput +networksnba +thrive +inform +revolutionc +master +researchma +automot +engineeringinterest +project +design +implement +dynam +gener +synchron +comput +speech +facial +animationlow +cost +portabl +desktop +videoconferenc +window +parallel +object +recognit +applic +facial +recognitioninterest +main +interest +comput +obvious +car +memberof +bodi +mail +list +camaro +firebird +yourselfelectron +fuel +inject +mail +list +although +alwayshav +time +particip +also +mail +list +designingan +system +ground +page +list +bodi +home +page +home +pageefi +home +pagethes +pictur +car +chevi +camaro +sold +stock +engin +speed +gearsmodif +hypertech +stage +chip +flowmast +exhaust +hurst +shifter +grant +steer +wheel +filter +ford +mustang +bought +septemb +still +stock +engin +speedmodif +gear +accel +plug +ford +motorsport +wiresbest +mile +mphbest +chevi +camaro +come +januari +bodi +home +page +chevi +come +septemb +home +pagenumb +visit +sinc +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..cb89ae47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,8 @@ +julia +home +pagejulia +komissarchik +juliak +cornel +eduto +continu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..800239dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,139 @@ +julian +pelenur +julian +emilio +pelenur +julian +cornel +better +pictur +avail +master +engin +cornel +univers +comput +scienc +graduat +cornel +comput +scienc +engin +campu +adress +summit +ithaca +offic +theori +center +engin +theori +centerithaca +offic +comput +scienc +upson +hallcornel +univers +current +occup +fulltim +student +teach +assist +databas +administr +cornel +theori +center +recent +project +global +pointer +complet +toolkit +write +parallel +program +network +workstat +independ +platform +network +topolog +compil +develop +sparcstat +ethernet +network +wfinger +system +search +home +page +document +world +wide +current +develop +cyberserv +grow +need +faster +httpserver +fulfil +increas +demand +servic +addit +grow +commerci +fault +toler +high +avail +becom +critic +paper +describ +design +implement +distribut +fault +toler +http +server +us +horu +prvf +poss +realli +fast +video +thegoal +design +implement +techniqu +achiev +full +screenmot +video +cluster +workstat +network +showthat +innov +snarf +blast +techniqu +capit +hardwar +produc +fast +video +transferwith +compress +full +color +full +screen diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..90f700a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,127 @@ +jason +hickei +home +page +graduat +student +cornel +comput +scienc +depart +supervis +robertconst +summari +current +statu +interest +includ +theori +practic +program +languag +great +resourc +thefox +project +home +page +especi +markleon +resourc +program +languag +research +work +program +languag +mainli +softwar +verif +tool +specif +type +theori +formalsystem +nuprl +develop +cornel +universitydepart +comput +scienc +research +make +higher +levelmodul +abstract +data +type +relat +type +theori +recent +paper +paper +publish +cornella +bibliographi +paper +publishedat +bellcor +also +slide +talk +havegiven +nuprl +seminar +slide +pretti +technic +theygiv +overview +work +done +cornel +want +sequenc +identif +interest +includ +fine +art +galleryof +mine +resourc +fine +art +tryth +orth +fineart +forum +cucshockei +backcountri +take +look +thebackcountri +home +page +perform +servic +depart +publicli +maintainedsoftwar +czar +hockei +hockei +equip +czar +back +hockeyfor +info +theatr +czar +schedul +forth +cornel +center +theatr +art diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..8c5e534e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,45 @@ +koichi +kamijokoichi +kamijo +welcom +home +page +koichi +kamijo +construct +reach +juli +english +japanes +us +thing +sell +back +japan +juli +sold +click +like +sale +class +papershometownseduc +work +experienceskoichi +kamijo +muriel +ithaca +kamijo +cornel +kkamijoh +vnet +go +back +japan +accept +english +japanes +access +time +sinc +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..3c87509c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,146 @@ +karl +friedrich +bhringer +karl +friedrich +bhringer +cornel +univers +dipl +inform +karlsruh +graduat +student +cornel +univers +dept +comput +scienc +upson +hall +ithaca +email +karl +cornel +educurr +address +stanford +univers +robot +laboratori +gate +build +stanford +current +research +interest +micro +robot +manipul +andassembl +cornel +nanofabr +facil +build +microfabr +actuat +arrai +implementmicro +manipul +strategi +gener +interest +innew +devic +handl +andassembl +part +manipul +strategi +withprogramm +forc +vector +field +also +investig +design +autom +micro +structur +earlier +work +univers +karlsruh +germani +includ +develop +better +graph +layout +algorithm +thesi +advisor +professorbruc +donald +founder +director +cornel +comput +scienc +robot +vision +laboratori +project +close +collabor +professor +noel +macdonaldand +hisresearch +group +public +document +confer +announc +call +paper +anim +video +micro +sculptur +invis +cantilev +microfabr +model +frank +lloyd +wright +fallingwat +articl +york +time +magazin +march +wire +octob +offic +nano +work +outin +kwon +club +find +lindseth +climb +wall +navig +page +previou +page +higher +level +deeper +level +next +pagekarl +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..3c3d9657 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,351 @@ +david +karrdavid +karrphd +studentdepart +comput +sciencecornel +univers +upson +hall +ithaca +mail +karr +cornel +edui +student +depart +comput +scienceat +cornel +univers +work +thehoru +project +layer +architectur +reliabl +distribut +system +withprofessor +kenneth +birmananddr +robbert +reness +minor +field +mathemat +concentr +statist +research +interest +engin +layer +commun +protocol +weak +consist +perform +html +java +engin +layer +commun +protocolsmi +research +interest +includ +problem +specifi +implement +verifi +applic +distribut +comput +system +dissert +work +concentr +formalspecif +verif +properti +ofhoru +protocol +layer +us +tempor +logic +action +specifi +variousinterest +fundament +properti +protocol +layer +usedin +horu +protocol +stack +furthermor +write +formula +assum +guarante +stylefor +layer +specifi +properti +might +provid +itsinterfac +depend +properti +layer +andbelow +stack +emploi +straightforward +techniqu +verifi +agiven +stack +provid +certain +desir +properti +thetop +stack +specifi +condit +even +unusualcombin +layer +layer +stack +atyp +stack +order +ultim +user +horu +layer +commun +systemsshould +abl +call +verif +techniqu +help +constructcustom +stack +omit +unnecessari +layer +avoid +theirassoci +cost +confid +includ +layer +stack +order +sufficientto +provid +desir +guarante +work +intend +part +basi +thesecur +harden +horu +project +develop +java +applet +give +rough +demonstr +propos +method +ofverifi +properti +horu +protocol +stack +initi +interest +horu +project +stem +thepromis +horu +protocol +suit +provid +variousguarante +consist +programm +messag +passingenviron +host +crash +messag +delayedor +lost +softwar +develop +engin +haswork +distribut +applic +whose +componentswer +prone +failur +feel +featur +horu +offer +considerablepromis +applic +develop +weak +consistencywhil +cornel +becom +interest +problem +ofdistribut +comput +wide +area +network +look +problem +revis +control +filesin +wide +area +environ +gener +distributedenviron +whose +network +prone +partitionedinto +disconnect +portion +gener +interest +notion +weak +consist +wouldallow +multipl +temporarili +disconnect +site +make +progress +concurr +performancemi +research +cornel +concentr +correct +protocol +measur +high +avail +respons +time +andeffici +resourc +clearli +equal +import +larg +part +problem +appar +random +time +ofsystem +load +activ +distribut +applic +notabl +except +dedic +parallelmachin +behavior +also +suscept +mathemat +analysi +though +differ +kind +encourag +interest +statist +html +javath +world +wide +interest +distribut +applicationwith +mani +possibl +explor +experi +simpl +wai +hypertext +tonavig +inform +appear +myweb +site +lego +toi +hack +java +applet +execut +code +anetscap +browser +download +exampl +abirthdai +puzzl +calcul +tool +forverifi +properti +horu +protocol +stack +profession +affiliationsi +member +ieee +andmaa +informationseemi +linksfor +topic +find +interest +us +last +updat +june +david +karr +karr +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..c8925f0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,191 @@ +welcom +home +pagekartik +kapadiamast +engineeringclass +depart +comput +sciencecornel +universityithaca +home +address +dabnei +drive +diego +california +phone +home +work +mail +kkapadia +qualcomm +comcurr +work +softwar +engin +qualcomm +incorporatedmi +main +area +interest +comput +graphic +distribut +system +comput +network +cours +took +spring +distribut +system +comput +graphic +architectur +high +capac +inform +network +cours +took +fall +oper +system +multimedia +system +softwar +engin +projectshoca +oper +system +chiphoca +pronounc +hodja +full +fledg +oper +system +chip +cornel +hypothet +instruct +processor +support +featur +like +multitask +virtual +memori +hogman +enjoy +game +quick +break +work +hogman +singl +player +game +window +platform +code +interfac +hogman +good +sourc +entertain +take +break +work +click +screenshot +gameboard +click +screenshot +help +screen +click +view +postscript +design +document +scene +transit +effect +rivlrivl +stand +resolut +independ +video +languag +rivl +develop +cornel +univers +jonathan +swartz +brian +smith +excel +languag +develop +multimedia +applic +project +enhanc +rivl +incorpor +primit +implement +scene +transit +effect +click +present +simul +railroad +system +master +engin +project +visual +captur +scientif +aspect +lai +track +vehicl +model +dynam +motion +graphic +code +us +combin +open +inventor +opengl +interfac +virtual +realiti +facil +cornel +click +view +postscript +resumesom +favorit +site +favorit +star +cool +mpeg +clip +lot +lot +music +cool +graphic +page +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..168cb551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,73 @@ +publish +true +writingsi +much +publish +written +thing +proudof +flame +mine +other +morethought +conscienti +objector +arm +conflict +myfirst +publish +letter +publish +grand +rapid +press +fewyear +back +gulf +sinc +time +howev +vestart +keep +work +onlin +lest +wonder +also +write +poetri +thing +dprobabl +rather +famou +essayist +anyhow +like +argu +write +dread +mess +go +byron +center +asuburb +mile +went +high +school +wrote +letter +unabomb +suspect +mathematician +mathematiciansar +suspect +terrorist +think +wrote +letter +editor +newspap +wide +read +letter diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..519ac530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,360 @@ +katherin +home +page +katherin +guokguo +cornel +educornel +universitydept +comput +scienc +upson +hall +ithaca +student +comput +scienc +cornel +work +distribut +system +interest +scalabl +reliabl +multicastprotocol +work +horu +project +direct +birman +robbert +reness +werner +vogel +recent +publicationskatherin +werner +vogel +robbert +reness +structur +virtual +synchroni +explor +bound +ofvirtu +synchron +group +commun +appear +proceed +sigop +european +workshop +connemara +ireland +septemb +lui +rodrigu +katherin +antonio +sargento +robbert +reness +brad +glade +paulo +verisimo +birman +transpar +light +weight +group +servic +appear +proceed +ieee +symposiumon +reliabl +distribut +system +niagara +lake +canada +octob +also +avail +technic +report +depart +comput +scienc +cornel +univers +robbert +reness +kenneth +birman +brad +glade +katherin +mark +hayden +takako +hickei +dalia +malki +alex +vaysburd +werner +vogel +horu +flexibl +group +commun +system +depart +comput +scienc +cornel +univers +march +research +relat +infodistribut +systemscomput +networkscool +toolsbibliographyconferencesjournalsacademia +industri +infocompani +infoschool +infojob +searchinterest +place +ithaca +austin +lisboa +colorado +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt +inforesearch +relat +infodistribut +systempointershoru +commerci +productspringtotemtransisx +kernel +arizona +microsystemslab +comput +networksmulticast +protocolsn +fromlblgun +cool +tool +consortium +lectur +gener +info +theproject +lectur +html +simpl +html +beginn +sguid +html +html +quickrefer +htmldocument +tabl +content +info +common +gatewai +interfac +httpd +ncsa +httpd +overview +find +imag +html +file +imag +finder +mosaic +sourc +binari +document +xmosaic +ncsa +uiuc +xmosaic +other +info +cern +java +hotjava +bibliographybibliographi +distribut +system +oldindex +index +other +confer +pointer +hpdc +ftc +sosp +sigop +srd +icdc +jsac +journal +ieee +elsevi +scienceacademia +industri +infocompani +info +motorola +openingsibmdelltandemtiapplebel +atlant +school +info +texa +austin +dept +ucsd +gradschool +advic +gradjob +search +databas +databas +ukinterest +place +ithaca +life +cornel +ithaca +weather +moviesbailei +hall +concertslibrari +austin +hightechin +austin +institut +lisboa +inesc +colorado +copper +mountain +resort +summit +counti +coloradooth +infoart +weblouvreth +world +women +linebook +amazon +book +calvinhobb +archivecardsmagicchinaart +china +cook +electron +gourmetl +cordonbleu +cook +school +itali +dessert +fashional +linksa +cjlutz +fashion +wwweb +pagewith +heart +tmexpressfirst +view +fashion +women +wireirc +faqfashion +nethair +crew +diesel +jean +guessfriend +alan +cheng +david +deng +shiji +insur +plan +email +grove +edulibrari +librari +congressmagazin +intertext +wire +timegeorg +gilder +discoveri +mail +electron +postcard +map +music +internet +underground +music +archivesinanet +newsworld +new +brief +sport +open +olymp +stock +wall +streetheadlin +wall +street +journal +sourc +weatherhunt +infoth +lyco +home +page +hunt +informationglob +network +navigatorhom +page +global +network +navig +scout +report +wanderersand +spider +edg +yahoo +refer +netscap +home +page +last +modifi +katherin +kguo +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..93b97a62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,51 @@ +wirefram +rotat +wirefram +rotat +introductionthi +project +desgin +learn +tool +comput +graphic +provid +understand +polygon +form +list +vertic +written +java +simpl +power +java +languag +creat +safe +portabl +interact +object +orient +multi +threader +program +java +program +environ +platform +speific +java +creat +applet +program +react +user +input +dynam +chang +cone +cube +cylind +tetra +toru diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..f83cb391 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,454 @@ +kleinberg +homepag +kleinberg +kleinber +cornel +assist +professor +comput +scienc +cornel +univers +ithaca +research +interest +algorithm +combinatori +optim +emphasi +approxim +comput +geometri +network +optim +distribut +comput +algorithm +molecular +biologi +recent +work +includ +approxim +algorithm +rout +anddisjoint +path +problem +network +adversari +queue +theori +approach +analyz +stabilityof +network +rout +protocol +without +probabilist +assumpt +geometr +method +combinatori +optim +particularlyth +posit +semi +definit +program +geometr +algorithm +studi +molecular +conform +spend +academ +year +visit +almaden +research +center +click +seeselect +publicationsmiscellan +linkspapersapproxim +algorithm +combinatori +optim +kleinberg +singl +sourc +unsplitt +flow +proc +ieee +symposium +foundat +comput +scienc +appear +kleinberg +rubinfeld +short +path +expand +graph +proc +ieee +symposium +foundat +comput +scienc +appear +kleinberg +tardo +disjointpath +dens +embed +graph +proc +ieee +symposium +foundat +comput +scienc +kleinberg +tardo +approxim +disjoint +path +problem +high +diamet +planar +network +proc +symposium +theori +comput +aggarw +kleinberg +williamson +node +disjointpath +mesh +trade +vlsi +layout +proc +symposium +theori +comput +goeman +kleinberg +improvedapproxim +ratio +minimum +latenc +problem +proc +siam +symposium +discret +algorithm +kleinberg +goeman +lovasz +thetafunct +semi +definit +program +relax +vertex +cover +appear +siam +discret +math +line +algorithm +kleinberg +local +problem +formobil +robot +proc +ieee +symposium +foundat +computersci +kleinberg +line +search +simplepolygon +proc +siam +symposium +discret +algorithm +kleinberg +lower +bound +serverbalanc +algorithm +inform +process +letter +yaniv +kleinberg +geometr +serveralgorithm +inform +process +letter +kleinberg +line +algorithm +robotnavig +server +problem +master +thesi +parallel +distribut +comput +andrew +awerbuch +fernandez +kleinberg +leighton +univers +stabil +result +greedi +content +resolut +protocol +proc +ieee +symposium +foundat +comput +scienc +appear +borodin +kleinberg +raghavan +sudan +williamson +adversari +queue +theori +proc +symposium +theori +comput +kleinberg +attiya +lynch +trade +offsbetween +messag +deliveri +quiesc +time +connect +managementprotocol +proc +israel +symposium +theori +comput +system +kleinberg +mullainathan +resourc +boundsand +combin +consensu +object +proc +symposium +onprincipl +distribut +comput +geometr +algorithm +berger +kleinberg +leighton +reconstruct +athre +dimension +model +arbitrari +error +proc +symposium +theori +comput +huttenloch +kleinberg +compar +point +set +project +proc +siam +symposium +discret +algorithm +huttenloch +kedem +kleinberg +dynam +voronoi +diagram +minimum +hausdorff +distanc +pointset +euclidean +motion +plane +proc +symposiumon +comput +geometri +huttenloch +kleinberg +invariantsof +point +line +segment +project +cornel +universitycomput +scienc +technic +report +juli +linkssearch +tool +bibliographiesaltavista +infoseek +excit +yahoo +nynex +yellow +page +glimps +comput +scienc +bibliographi +ncstrl +network +comput +scienc +technic +report +librari +david +jone +hypertext +bibliographi +project +academ +sitescornel +univers +cornel +comput +scienc +cornel +oper +research +comput +scienc +theori +comput +group +stanford +comput +scienc +berkelei +comput +scienc +comput +research +associ +nation +scienc +foundat +theori +computingtc +virtual +address +book +bibliographi +theori +foundat +comput +scienc +crescenzi +kann +compendium +optim +problem +foc +confer +soda +confer +stoc +confer +comput +biologycomput +biologi +carb +biocomput +resourc +sdsc +list +comput +biologi +server +comput +geometrydavid +eppstein +geometri +junkyard +jeff +erickson +comput +geometri +page +internet +securitymitr +corp +secur +inform +resourc +princeton +safe +internet +program +group +rivest +cryptographi +secur +link +miscellaneousnetscap +intellicast +interact +tenni +associ +chess +onlin +talk +kleinbergdepart +comput +scienceupson +hallcornel +universityithaca +kleinber +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..91294a48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,60 @@ +homepag +kazushi +otakota +cornel +edukazushi +melco +current +master +engin +student +comput +scienc +cornellunivers +receiv +univers +tokyo +back +march +work +mitusbishi +electr +corpor +japan +return +degre +worth +page +isund +construct +homepag +start +assign +acquaint +html +forc +cornel +depart +inform +superhighwai +cours +homepag +interest +pictur +music +move +sale +come +take +japan +februari +thing +want +sell +think +advert +cornel +comput +scienc +peopl +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..311385c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,65 @@ +faculti +research +interest +dexter +kozendext +kozenjoseph +newton +professor +engineeringphd +cornel +univers +research +interestsmi +research +interest +includ +algorithm +complex +especiallycomplex +decis +problem +logic +algebra +logic +andsemant +program +languag +paper +avail +onlinekleen +algebra +constraint +type +infer +comput +algebraautomata +theori +algorithm +complex +logicbibliographylist +public +cornel +technic +reportscours +notesc +structur +interpret +comput +programsc +automata +comput +theoryfun +stufffamili +pictur +rugbi +effectcomput +scienc +departmentupson +hallcornel +universityithaca +york +usakozen +cornel +work +home diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..e87b1192 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,56 @@ +home +page +christoph +kreitz +pictur +soon +christoph +kreitz +research +associ +depart +comput +scienc +cornel +univers +ithaca +offic +phone +email +upson +hall +kreitz +cornel +research +topic +program +synthesi +autom +deduct +type +theori +theori +comput +teach +learn +german +lehr +lernen +vorlesungsskript +medienunterst +uumltzt +lehren +person +home +page +inform +avail +soon +last +modifi +novemb +christoph +kreitz +kreitz +cornel +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..bae2e19e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,39 @@ +kuen +heng +kuen +heng +kuen +cornel +kuen +isi +master +engin +comput +scienc +depart +cornel +univers +address +grove +street +newton +telephon +welcom +visit +place +myproject +multimedia +system +cours +would +like +read +daili +new +taiwan +home +countri +enjoi +page +still +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..67b6e606 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,311 @@ +carl +lagoz +person +home +page +carl +lagoz +project +leader +digit +librari +research +group +depart +comput +scienc +upson +hall +cornel +universityithaca +phone +internet +lagoz +cornel +edui +lead +digit +librari +research +groupin +comput +scienc +departmentat +cornel +univers +ourgroup +manag +oper +technic +develop +network +comput +scienc +technic +report +librari +ncstrl +intern +consortium +maintain +adistribut +digit +librari +comput +scienc +research +collaborateson +number +digit +librari +research +issu +develop +davi +thedienstsoftwar +protocol +refer +implement +providesdistribut +digit +librari +server +access +worldwid +dienst +current +enabl +technolog +ncstrl +author +author +number +paper +dienst +drop +publish +world +wide +confer +dienst +architectur +distribut +document +librari +commun +april +page +protocol +server +distribut +technic +report +librari +cornel +comput +scienc +technic +report +dienst +implement +refer +manual +cornel +comput +scienc +technic +report +dienst +build +product +technic +report +server +chapter +advanc +digit +librari +springer +verlag +primari +research +involv +defin +servic +protocolsfor +interoper +digit +librari +infrastructur +area +collabor +corpor +nation +research +initiativesto +extend +implement +digit +object +framework +developeda +part +darpa +fund +comput +scienc +technic +report +project +author +author +number +paper +area +implement +issu +open +architectur +framework +digit +object +servic +cornel +comput +scienc +technic +report +design +interoper +secur +object +store +iso +cornel +comput +scienc +technic +report +secur +repositori +design +digit +librari +dlib +magazin +decemb +part +work +also +member +dlibwork +group +repositori +interfacesand +author +releas +final +report +metadata +workshop +iiin +warwick +final +also +part +work +amveri +interest +extend +world +wide +us +distributedobject +technolog +read +posit +paperfor +joint +workshop +distribut +object +mobil +codeworkshop +research +know +meetm +confer +workshop +meet +find +poor +substitut +person +contact +littl +moreabout +charact +pictur +pagei +luci +daughter +rule +major +life +outsideof +work +time +toddler +constant +challeng +lucyg +mean +life +never +provid +work +also +avid +outdoor +person +site +fast +movingwat +quiet +lakeand +itch +cano +give +beauti +think +bike +ridingalong +quiet +road +backwood +trail +tell +sparehour +run +shoe +breath +deeplyth +fresh +spend +much +time +digit +world +never +substitut +joi +physicalnor +ever +interfer +desir +fight +itspreserv +hope +meet +sometim +carl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..987fb576 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,135 @@ +lidong +zhou +homepag +welcom +lidong +zhou +homepag +depart +comput +sciencecornel +universityithaca +fall +cours +system +concept +local +access +multimedia +system +research +relat +materi +sigop +paper +oasi +cambridg +research +report +adag +author +distribut +applic +group +project +level +secur +java +safe +internet +program +legion +project +sirac +kerbero +network +authent +servic +massiv +distribut +system +ocaml +homepag +advanc +materi +internet +standard +robot +exclus +comput +network +secur +career +document +cornel +career +opportun +jobtrak +colleg +grad +hunter +open +career +center +onlin +servic +careermosa +page +jobweb +home +page +career +xjob +page +friend +homepag +yingjun +fudan +classmat +friend +cornel +inform +resours +tutori +comput +languag +tool +yellow +book +cornel +isso +homepag +sunris +chines +soccer +world +edmund +automobil +buyer +guid +autosit +ultim +auto +buyer +guid +auto +insur +basic +legal +surviv +guid +link +travel +agenc +rank +depart +succe +graduat +school +fudan +homepag +back +indexlast +updat +homepag +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..7b675a58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,197 @@ +hsian +wangthi +page +major +constructionlin +hsian +wang +master +student +comput +scienceat +cornel +univers +degre +comput +inform +scienceat +ohio +state +univers +born +fangliao +small +villag +southern +coast +taiwan +still +construct +fall +classesc +multimedia +system +final +project +orwel +remov +track +object +digit +videoe +comput +networkse +comput +vision +final +project +moment +base +edg +oper +amidonc +autom +video +transcrib +annot +research +advisor +prof +ramin +zabihspr +classesc +natur +languag +processingc +practic +distribut +computingc +practicum +distribut +system +project +distribut +network +visual +managementc +machin +visionc +comput +scienc +colloquimc +project +orwel +object +remov +move +scene +high +perform +system +audit +program +java +webspac +interest +site +link +us +stuffscornel +info +link +depart +annual +reportiee +comput +societytaiwan +headlin +new +sinanet +comth +musicmovi +connect +swartz +movieweb +cool +movi +site +moviemania +cool +link +also +swartz +click +collect +think +picturesth +list +best +sell +book +releas +taiwan +publish +world +journal +bookstor +quot +chines +classic +linux +linkstcl +line +resourc +softwar +engin +galleri +hacksth +earth +home +pagemiscellan +hongkong +bridg +hong +kong +linksfor +like +japanes +anim +take +look +carlo +home +page +jump +anim +link +cja +calanimag +alpha +chapter +berkelei +totoro +home +pagelaputa +castl +nausicaa +vallei +wind +conan +totoro +slump +kiki +legend +galact +hero +ming +anim +pagecampu +address +uptown +eithaca +york +linhsian +cornel +edulast +updat +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..ff01e17d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,43 @@ +libbi +home +pagewelcom +libbi +home +page +collect +thought +essai +last +updat +sept +show +download +theme +song +check +video +clip +show +take +look +read +lista +littl +essai +thing +septemb +thought +june +thought +april +interest +take +look +projectemail +mehit +page +page +counter +courtesi +http +digit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..329fd89f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,14 @@ +name +offic +upson +hall +offic +hour +offic +mail +lili +cornel +oper +system +take +cours diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..5ceb073f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,122 @@ +lloyd +trefethen +lloyd +trefethenprofessorlnt +cornel +edumi +appoint +cornel +thecomput +scienc +depart +also +affili +thecent +appli +mathemat +depart +mathemat +thecornel +theori +center +field +numer +analysi +scientif +comput +havea +person +view +mean +specif +interest +includ +numer +linear +algebra +numericalsolut +numer +conform +map +approxim +theori +fluid +mechan +recent +year +much +work +hasbeen +relat +normal +matric +oper +matric +oper +whose +eigenvector +notorthogon +applic +textbooksfinit +differ +spectral +method +textbook +numer +linear +algebra +textbook +trefethen +siam +recent +papersmultimatlab +matlab +multipl +processorsmatrix +iter +gap +betweenpotenti +theori +convergencepseudospectra +linear +operatorssom +recent +papersoth +itemsclass +paper +numer +analysiscurriculum +vitaepseudospectra +bibliographi +peter +alfeldcurr +student +cornel +vicki +howlegubjrn +jnsson +yohan +kimdivakar +viswanathprevi +student +cornel +jeff +baggetttobi +driscollalan +edelman +loui +howel +walter +mascarenhasnoel +nachtigalsatish +reddi +chuan +tohsom +colleaguesjim +demmelann +greenbaummartin +gutknechtd +nick +highamann +trefethenandr +weideman diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..82f45122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,126 @@ +luci +home +page +luci +welcom +student +comput +scienc +depart +cornel +univers +ithaca +interest +comput +topic +network +distribut +system +programminglanguag +internet +applic +hobbi +ping +pong +badminton +swim +travel +photograph +read +music +resum +whiz +stock +search +analysi +tool +degre +project +spring +class +practic +distribut +comput +practicum +distribut +system +databas +system +databas +manag +fall +class +oper +system +softwar +engin +multimedia +systemscontact +yuwu +cornel +favorit +site +softwar +stuff +java +spring +system +corba +home +page +silvano +home +page +tkcgi +html +vrml +object +orient +languag +databas +languag +oper +system +network +network +manag +network +product +server +securitypc +lube +tune +internet +ipngip_atmcomput +compani +netscap +busi +cube +sapient +microsoft +novel +china +china +chines +relat +site +chines +stuff +misc +jobtrack +new +artvark +galleri +internet +underground +music +archiv +person +connect +cornel +librari +catalog +travel +mail +sunlab +caltech diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..3cd8021d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,56 @@ +linda +home +page +cornel +universitylinda +lxwu +cornel +master +engin +student +comput +scienc +depart +cornel +univsers +receiv +univers +massachusett +lowel +sinc +work +digit +equip +corp +banyan +system +main +research +interest +network +mulitimedia +click +resum +project +nativ +protocol +stack +window +us +multicast +group +layer +video +electron +commerc +kramer +mart +coursesfal +oper +system +engin +comput +network +multimedia +photoesus +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..00090d6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,13 @@ +nikolai +mateevnikolai +mateevgradu +studentmateev +cornel +upson +halldepart +comput +sciencecornel +universityithaca +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..0797e2e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,61 @@ +matthew +morgenstern +home +pagematthew +morgensternresearch +project +leaderaddress +engin +theori +centerxerox +design +research +institutecornel +universityithaca +phone +email +morgenstern +cornel +edustatu +visit +fellow +cornel +comput +scienc +princip +scientist +xerox +laboratori +comput +scienceproject +distribut +heterogen +databas +system +arpa +fund +project +metadata +manag +databas +multimedia +document +arpa +supervis +select +student +project +relat +area +fundedresearch +work +academ +year +summer +avail +stop +chat +inform +come +page +soon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..fb426793 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,48 @@ +monika +henzing +homepagemonika +rauch +henzingerassist +professorcomput +scienc +departmentcornel +universityithaca +email +cornel +eduphon +current +leav +digit +equip +corpor +system +research +centerhomepageresearch +interestscombinatori +graph +algorithm +especi +dynam +graph +algorithm +random +data +structur +graph +theori +data +structur +lower +bound +recent +public +dynam +graph +algorithm +project +pageprogram +committe +stoc +soda +homepag +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..cbc571ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,100 @@ +stanlei +huang +home +page +stanlei +huangmast +engin +studentmhuang +cornel +sheldon +courtcornel +univers +ithaca +master +engin +comput +scienc +cornel +univers +class +bachelor +scienc +comput +scienc +univers +kentucki +kentucki +area +interest +oper +systemsdistribut +systemsdatabas +system +inform +retrievalgraph +user +interfacesoth +interest +movi +tenni +horse_back +ride +travel +read +work +project +distribut +comput +plan +myadvisor +werner +vogel +robbertvan +reness +object +project +integr +horu +plan +link +relat +project +planplan +distributionplan +updateplan +faqhorusc +final +exam +paper +collect +distribut +share +memorydistribut +share +memorysom +technic +paper +interest +group +communicationsnapshotu +user +level +network +interfac +architecturejobscar +pathbai +area +jobscyberezumescar +opportunitiesus +stufftechn +paper +field +searchbel +labsspbsd +sourcesjavarfclast +modifi +stanlei +huang +mhuang +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..53e93436 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,72 @@ +lynett +millett +homepag +lynett +millettdepart +comput +sciencecornel +universityithaca +millett +cornel +participatoryform +mass +speech +develop +internetdeserv +highest +protect +government +intrus +decis +aclu +reno +challeng +script +second +year +skit +cuc +holidai +parti +person +inform +pictur +cat +last +updat +list +person +homepag +list +link +never +abl +find +precis +femin +know +peopl +call +feminist +whenver +express +sentiment +differenti +doormat +prostitut +rebecca +west +last +modifi +octob +comment +welcom +copi +public +pleas +look +millett +cornel +copyright +lynett +millett diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..77709323 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,112 @@ +mishaal +home +pagemisha +home +page +kuwaiti +student +cornel +univers +master +engin +computersci +program +mengc +graduat +doubl +major +electr +engin +andcomput +scienc +worcest +polytechn +institut +inworcest +coolest +place +earth +list +stuff +involv +interest +temporari +link +cornel +servic +bearaccess +menu +link +cours +take +machin +vision +newgroupc +practic +distribut +system +practicum +newgroupc +high +perform +comput +system +newgroupc +advanc +languag +implement +newgroup +high +capac +network +newgroupnba +databas +manag +newgroupoptim +video +transmiss +meng +project +extens +kuwait +home +pagemi +resum +check +stock +quotescool +link +public +server +hope +offer +conot +soon +weather +ithaca +latest +new +cann +intern +film +festiv +everyth +wrong +reason +want +ever +accus +nerd +well +sure +home +interest +cool +link +home +page +mishaal +almashanmisha +cornel +educornel +univers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..7596b0ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,173 @@ +mike +pagethi +page +yeah +date +version +page +pleas +recent +browser +stuff +quit +date +would +like +sign +myguestbook +would +cool +chanc +make +opinion +count +dark +stockholm +right +take +look +vote +look +poll +result +coupl +link +peopl +know +guess +pretti +cool +ryan +call +beavi +vitya +better +construct +danc +frog +stuff +maria +pleas +mark +korbi +realli +know +last +name +andrew +corbett +suck +card +cool +nicknam +kevin +donnel +last +name +complet +love +pictur +eryn +crave +attent +want +movi +make +graphic +class +golf +amaz +anim +plai +golf +mpeg +link +place +univers +stop +expand +click +find +world +realli +need +beavi +click +person +pick +cool +site +look +made +onlin +student +homepag +list +lame +list +guttermouth +link +page +brought +byjust +kid +cours +kid +member +internet +link +exchang +peic +page +connect +whole +bunch +other +line +thing +like +come +join +hand +sing +togeth +spirit +harmoni +someth +visitor +number +thing +keep +reset +check +complet +statist +page +thing +pretti +cool +accuar +stuff +mike +korbi +pleas +send +mail +atmak +cornel +edubas +page +pretti +much +coupl +random +imag +realli +relat +anyth +thank +everybodi +page +idea +us +make diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..aba58fd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,25 @@ +nobuhiko +mukainobuhiko +mukai +home +page +nobuhiko +mukai +construct +research +effect +jpeg +compressionon +multimedia +system +last +fall +semest +made +anim +titl +magicon +comput +graphic +spring +semest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..56ccd03c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,24 @@ +homepag +nichola +how +nichola +how +click +photo +imag +graduat +studentdepart +comput +sciencecornel +universityithaca +email +nihow +cornel +eduoffic +upson +hall +phone +resum +refer +person +info diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..f85c6b33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,27 @@ +niko +home +page +research +niko +pitsiani +upson +hall +dept +comput +scienc +cornel +univers +ithaca +work +home +niko +cornel +public +lectur +teach +java +vita +pointer +sinc +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..86764b58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,325 @@ +niko +chrisochoid +niko +chrisochoid +comput +scienc +depart +purdu +univers +chrisochoid +work +touch +mani +facet +parallel +distribut +comput +work +includ +research +parallel +runtim +support +back +system +parallel +compil +problem +solv +environ +multicomput +parallel +algorithm +schedul +load +balanc +data +distribut +adapt +comput +design +implement +mani +softwar +compon +well +known +simul +environ +parallel +ellpack +current +project +workshop +bernoulli +project +prema +portabl +runtim +environ +multicomput +architecur +comput +sensit +messag +dynam +load +balanc +grid +gener +project +cornel +binari +black +hole +grand +challeng +parallel +ellpack +student +project +master +engin +project +student +florian +sukup +kodukula +indupraka +pingali +vineet +ahuja +master +reza +behforooz +undergradu +former +student +animesh +chatterje +rajani +vaidyanathan +select +paper +task +parallel +implement +bowyer +watson +algorithm +chrisochoid +sukup +appear +proceed +offifth +intern +confer +numer +grid +gener +incomput +fluid +dynam +relat +field +multithread +model +dynam +load +balanc +parallel +adapt +comput +niko +chrisochoid +ctctr +octob +appear +appli +numer +mathemat +journal +parallel +object +orient +softwar +tool +kale +chrisochoid +kohl +yellick +appear +journal +scientif +program +menu +map +environ +unstructur +structur +numer +parallel +grid +gener +chrisochoid +thompson +contemporari +mathemat +key +map +algorithm +softwar +environ +data +parallel +iter +solver +chrisochoid +housti +rice +special +issu +journal +parallel +distribut +comput +data +parallel +algorithm +program +april +comput +toolkit +collid +black +hole +chrisochoid +haupt +proceed +aiaa +fluid +dynam +confer +colorado +spring +june +altern +data +map +scalabl +iter +solver +parallel +grid +gener +chrisochoid +proceed +scalableparallel +librari +confer +nation +scienc +foundat +engineeringresearch +center +comput +field +simul +mississippi +state +mississippi +partit +heurist +comput +base +parallelhardwar +geometri +characterist +chrisochoid +rice +advanc +comput +method +partial +differentialequ +vichnevetski +knight +richter +imac +brunswick +page +domain +decompos +softwar +tool +map +comput +parallel +architectur +chrisochoid +housti +housti +papachi +kortesi +rice +domain +decomposit +method +partial +differenti +equat +proceed +intern +symposium +domaindecomposit +method +moscow +ussr +glowinski +siam +public +page +parallel +ellpack +numer +simul +programmingenviron +parallel +mimd +machin +housti +rice +chrisochoid +karathanas +papachi +samartzi +vavali +yang +wang +and +weerawarana +proceed +intern +confer +onsupercomput +public +page +nikosc +cornel +advanc +comput +research +institut +comput +scienc +cornel +theori +center +cornel +univeristi +rhode +hall +room +ithaca +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..fe69afc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,105 @@ +welcom +prema +parallel +runtim +support +system +prema +portabl +runtim +environ +multicomput +architectur +prema +portabl +runtim +environ +multicomput +architectur +advanc +comput +research +institut +cornel +theori +center +cornel +univers +overview +prema +runtim +support +system +parallel +compil +andproblem +solv +environ +target +scientif +computingappl +prema +build +port +cornel +implement +design +varieti +comput +prema +suppot +global +address +space +memori +model +data +task +parallel +program +model +multi +thread +style +execut +automat +work +share +mechan +dynam +load +balanc +paper +multithread +model +dynam +load +balanc +parallel +adapt +pdecomput +niko +chrisochoid +ctctr +journal +appli +numer +mathemat +relat +research +port +portabl +runtim +system +group +pcrc +parallel +compil +runtim +consortium +copyright +copi +niko +chrisochoid +nikosc +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..b4e60b80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,31 @@ +pavel +naumov +welcom +home +page +invit +learn +locat +cyberspac +real +world +look +work +nuprl +project +tire +take +rest +galleri +visit +cinema +look +photo +orplai +game +java +written +sundai +send +mail +place diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..a5e914df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,313 @@ +david +pearson +david +pearson +research +interest +thesi +investig +highli +scalabl +parallel +comput +consistingof +simpl +processor +connect +dimension +mesh +guid +vision +work +time +perhap +year +henc +materi +scienc +taken +place +comput +architectur +comput +crystal +processor +molecul +lattic +long +goal +realiz +prepar +theubiquit +parallel +offer +algorithm +must +heed +lawsof +physic +attent +chip +design +spatial +layoutand +current +hidden +cost +commun +accomplishedbi +design +algorithm +mesh +pursu +vision +requir +theoret +practic +work +work +could +character +feasibl +studi +ihav +produc +cellular +architectur +couldb +effici +realiz +current +hardwar +simul +thisarchitectur +algorithm +program +oper +system +designfor +gener +purpos +comput +believ +gener +purpos +comput +problem +like +proteinstructur +grand +challeng +parallel +architectur +parallelcomput +power +realli +succe +becom +commodityand +sold +desktop +machin +video +game +direct +futur +research +includ +vlsi +implement +architectureand +design +program +languag +wide +us +languag +hideth +detail +machin +instruct +reflect +underlyingvon +neumann +architectur +believ +connect +architectureha +good +thing +algorithm +design +realli +exploit +parallelmachin +need +languag +cost +oper +easyto +estim +neumann +machin +public +dunten +pearson +arm +kiewit +network +high +speed +campu +network +ieee +comput +societi +intern +confer +ieee +compcon +fall +pearson +pillai +algorithm +near +optim +placement +sensor +element +ieee +transact +inform +theori +pearson +vazirani +fast +parallel +algorithm +find +maxim +bipartit +foundat +softwar +technolog +theoret +comput +scienc +publish +lectur +note +comput +scienc +pearson +vazirani +effici +sequenti +parallel +algorithm +maxim +bipartit +set +journal +algorithm +johnson +pearson +pingali +find +region +fast +singl +entri +singl +exit +control +region +linear +time +cornel +tech +report +johnson +pearson +pingali +program +structur +tree +comput +control +region +linear +time +proceed +sigplan +confer +program +languag +design +implement +pldi +publish +sigplan +notic +pearson +polynomi +time +algorithm +chang +make +problem +cornel +tech +report +pearson +instruct +schedul +global +regist +alloc +simd +multiprocessor +intern +workshop +parallel +algorithm +irregularli +structur +problem +irregular +sept +publish +lectur +note +comput +scienc +pearson +zippel +global +regist +alloc +simd +multiprocessor +journal +comput +scienc +technolog +allerton +press +pearson +parallel +implement +select +area +cryptographi +appear +comput +scienc +depart +upson +hallcornel +universityithaca +york +usaemail +pearson +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..88379d08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,191 @@ +todd +peskin +page +todd +peskin +page +pictur +content +work +favorit +site +resum +cours +taken +student +year +meng +program +cornel +univers +joint +degre +program +offer +jointli +colleg +engin +johnson +graduat +school +manag +receiv +master +engin +comput +scienc +follow +year +receiv +master +busi +administr +also +current +presid +cornel +chapter +acacia +fratern +brother +cornel +chapter +chapter +acacia +would +like +becom +part +mail +list +pleas +contact +mail +cornel +eduand +soon +suppli +chapter +andyour +roll +number +abl +complet +request +quickli +well +best +reach +mail +cornel +check +mail +year +round +alwai +contact +sinc +meng +student +also +found +mani +time +log +depart +comput +work +fall +semest +junior +year +follow +semest +workeda +intern +intern +throughth +engin +cooper +program +cornel +program +enabl +cornellundergradu +student +colleg +engin +supplement +classroom +knowledg +practic +experi +work +compani +theirfield +experi +work +develop +enhanc +nation +level +client +server +databas +system +server +softwar +isrun +microsystem +comput +client +softwar +run +window +favorit +site +espn +page +stock +quot +onlin +java +page +final +project +larg +mpeg +file +client +comput +page +creat +visitor +sinc +februari +page +still +construct +hope +content +page +possibl +includ +us +java +applet +case +wonder +creat +ticker +tape +applet +construct +applet +borrow +permiss +java +develop +todd +peskin +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..04034ec2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,74 @@ +dave +pierc +home +page +david +pierc +student +cornel +univers +comput +scienc +address +stuff +offic +upson +hall +home +valentin +ithaca +person +stuff +second +year +student +comput +scienc +recent +home +pittsburgh +pennsylvania +although +familycurr +resid +read +pennsylvania +read +citi +approxim +halfwai +philadelphia +andharrisburg +famou +shop +outlet +otherwis +younev +want +howev +pittsburgh +great +place +mani +alreadi +know +sinceit +imposs +ithaca +without +go +throughpittsburgh +stuff +month +favorit +cornel +quot +list +work +stuff +home +page +david +pierc +pierc +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..e8a59d93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,166 @@ +indupraka +kodukula +home +page +indupraka +kodukula +engin +theori +center +cornel +univers +ithaca +praka +cornel +student +depart +comput +scienc +cornel +univers +prior +tothat +undergradu +comput +scienc +madra +cornel +work +bernoulli +group +prof +keshav +pingali +member +group +nawaaz +ahm +vladimir +kotlyar +vijai +menon +paul +stodghil +also +affili +advanc +comput +research +institut +cornel +theori +center +research +work +center +interplai +applic +compil +runtim +system +tradit +andmultiprocessor +architectur +applic +deriv +fromscientif +comput +imag +process +multimedia +withibm +vliw +group +hasinterest +comput +architectur +well +public +talk +given +seri +talk +dens +compil +technolog +first +talk +athp +chelmsford +wasabout +necess +deal +imperfectli +nest +looptransform +abl +handl +trivial +code +present +framework +perform +imperfectli +nest +loop +transform +loopparallel +seminar +schloss +dagstuhl +april +summer +present +talk +watson +regard +useof +loop +transform +vliw +compil +octob +present +talk +lab +palo +alto +regardingdata +centric +multi +level +block +teach +taught +system +program +andoper +system +summer +project +czar +instal +maintain +support +packag +availableund +gener +public +licens +departmentmachin +check +home +andfind +handi +tip +abl +alsofind +extens +info +packag +support +random +link +person +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..03d547e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,56 @@ +praveen +seshadri +home +page +praveen +seshadri +assist +professor +comput +scienc +depart +cornel +univers +upson +hall +ithaca +offic +advanc +databas +system +fall +predat +dbm +project +adt +know +case +enhanc +abstract +data +type +sigmod +submiss +profession +public +project +time +databas +order +manag +sequenc +data +postscript +thesi +page +tree +save +format +person +warren +road +ithaca +ranjani +ramamurthi +green +packer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..cb92f567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,770 @@ +home +pageth +project +queri +sequenc +data +document +construct +time +order +databas +order +time +databas +time +databas +order +document +content +project +objectivescurr +statusmotiv +exampleseq +data +model +sequin +queri +languageoptim +techniquesseq +system +developmentpublicationsrel +workcontact +informationproject +object +number +import +databas +applic +requir +processingof +larg +amount +order +sequenc +data +domain +theseappl +includ +financi +manag +histor +analysi +econom +social +scienc +metereolog +medic +scienc +andbiolog +scienc +exist +relat +databas +inadequ +regard +data +collect +treat +set +sequenc +consequ +express +sequenc +queri +tediou +evalu +ineffici +databas +model +data +us +abstract +sequenc +allow +data +sequenc +queri +declar +manner +util +order +semanticstak +advantag +uniqu +opportun +avail +queri +optim +evaluationintegr +sequenc +data +relat +data +user +canstor +queri +combin +relat +sequencesthes +requir +serv +goal +project +variou +kind +sequenc +need +support +tempor +sequenc +themost +import +kind +queri +express +us +notion +like +next +previou +natur +consid +sequenc +queri +optim +evalu +effici +issu +need +studi +theori +databas +system +need +built +demonstr +feasibl +theoret +idea +project +statusth +current +statu +project +defin +data +model +support +import +kind +sequenc +data +also +defin +algebraicqueri +oper +compos +form +sequenc +queri +analogousto +composit +relat +algebra +oper +form +relat +queri +describ +sequenc +queri +effici +process +identifi +variou +optim +techniqu +sequenc +queri +languag +sequin +candeclar +express +queri +sequenc +sequin +queri +includ +embed +express +relat +queri +languag +likesql +vice +versa +build +disk +base +databas +system +demonstr +feasibl +propos +system +implement +model +us +nest +complex +object +architectur +built +shore +storag +manag +process +sever +megabyt +data +relat +sequenc +support +integr +extens +manner +motiv +exampl +sequenc +querya +weather +monitor +system +record +inform +variou +meteorolog +phenomena +sequenti +occurr +phenomena +variou +meteorolog +event +sequenc +time +record +scientist +ask +queri +volcano +erupt +didth +recent +earthquak +strength +greater +richter +scale +queri +express +relat +queri +languag +like +complex +featur +like +groupbi +claus +correl +subqueri +aggregatefunct +requir +convent +relat +queri +optim +would +find +effici +queri +execut +plan +even +given +knowledg +earthquak +volcano +relat +sort +time +howev +effici +plan +exist +model +data +sequencesord +time +sequenc +scan +lock +step +similar +sort +merg +join +recent +earthquak +record +scan +store +temporari +buffer +whenev +volcano +record +process +valu +recent +earthquak +record +store +buffer +check +strength +greater +possibl +gener +answer +queri +therefor +process +singl +scan +sequenc +us +littl +memori +optim +sequenti +data +queri +data +modelth +detail +data +model +aredescrib +publish +paper +click +postscript +version +present +gist +basic +model +sequenc +record +map +ordereddomain +posit +mani +mani +relationship +record +andposit +view +dual +distinct +wai +recordsmap +posit +posit +map +record +view +call +posit +record +orient +respect +give +rise +queri +oper +base +view +queri +sequenc +could +requir +oper +either +flavor +record +orient +oper +similar +relationaloper +includ +variou +kind +join +overlap +contain +andaggreg +oper +extens +explor +researchersin +tempor +databas +commun +posit +oper +includ +next +previou +offset +movingaggreg +oper +allow +zoom +oper +sequenc +mean +collaps +expand +order +domain +associ +sequenc +instanc +daili +sequenc +could +zoom +collaps +weekli +sequenc +zoom +expand +hourli +sequenc +last +part +model +deal +oper +group +set +sequenc +advantag +make +easi +model +queri +involv +sequenc +collect +case +mani +real +worldsitu +sequenc +oper +extend +work +group +similar +sequenc +instead +singl +sequenc +extensionof +model +indic +practic +implement +ofseq +would +probabl +involv +nest +complex +object +system +sequin +queri +languagew +devis +queri +languag +call +sequin +usingwhich +declar +sequenc +queri +specifi +languagei +similar +flavor +except +input +queriesa +well +result +queri +sequenc +click +descript +sequin +languag +exampl +optim +techniquesw +propos +optim +techniqu +sequenc +queri +involv +posit +oper +exist +techniqu +thathav +propos +queri +record +orient +oper +optim +queri +transform +meta +data +cach +intermedi +result +effici +evalu +queri +optim +queri +evalu +plan +gener +us +algorithm +reli +cost +estim +import +observ +access +sequenc +data +singl +stream +probabl +effici +evalu +strategi +take +account +detail +optim +techniqu +aredescrib +publish +paper +click +postscript +version +system +developmentth +databas +system +client +serverarchitectur +support +multipl +client +viaa +multi +thread +server +server +built +ontop +shore +storag +manag +sequin +subset +support +queri +languageswhich +embed +insid +data +mode +nest +complex +object +model +allow +arbitrarylevel +nest +relat +insid +sequenc +viceversa +system +also +extens +provid +supportfor +data +type +order +domain +user +defin +function +storag +implement +queri +languag +detailson +system +click +publicationssequ +queri +process +praveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +sigmod +confer +data +manag +framework +sequenc +datapraveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +ieee +confer +data +engin +march +design +implement +sequenc +databas +systempraveen +seshadri +miron +livni +raghu +ramakrishnan +submit +vldb +next +sequenc +queriesraghu +ramakrishnan +michael +cheng +miron +livni +praveen +seshadri +proceed +intern +confer +manag +data +comad +decemb +relat +workthedevis +project +complementari +provid +visualizationenviron +us +explor +sequenc +data +devis +front +queri +pose +databas +server +answer +examin +graphic +also +shore +project +storag +manag +us +peopl +work +relat +project +madison +databas +research +group +madison +depart +servercontact +informationfor +inform +contact +praveen +seshadri +praveen +wisc +eduraghu +ramakrishnan +raghu +wisc +edumiron +livni +miron +wisc +educomput +scienc +depart +univers +wisconsin +dayton +street +madison +last +modifi +praveen +seshadripraveen +seshadri +praveen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..f224f2a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,56 @@ +ralph +benzingerralph +benzingerw +sich +seinen +lorbeeren +ausruht +trgt +derfalschen +stell +stori +exchang +student +univers +karlsruh +germani +german +comput +scienc +august +fellow +studienstiftung +deutschen +volk +fulbright +fellow +member +siemen +international +studentenkrei +alumnusat +cornel +graduat +student +depart +comput +scienc +cours +taken +advanc +program +languag +design +analysi +algorithm +reason +knowledg +contact +inform +mail +ralph +cornel +offic +upson +hall +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..22fe7158 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,759 @@ +mathemat +comput +gener +spoken +document +titl +audio +aster +demonstrationi +dedic +guid +aster +audio +system +technic +read +comput +system +forrend +technic +document +audio +aster +develop +myphd +page +audio +format +version +thesi +approxim +hour +produc +aster +made +avail +record +blind +first +comput +gener +talk +book +abstract +print +audio +format +version +hypertext +document +demonstr +audio +render +gener +aster +enhanc +demo +us +inlin +imag +exampl +made +three +compon +origin +latex +input +audio +format +output +produc +aster +speech +produc +dectalk +digit +mulaw +aster +us +stereo +render +tabl +effect +convei +mono +encod +visual +format +version +produc +latex +dvip +demo +exampl +demonstr +progress +difficult +suggest +initi +section +sequenti +short +demo +typic +show +peopl +first +three +section +andround +continu +fraction +section +quick +overview +faad +bruno +formula +postscript +file +contain +exampl +casey +want +look +first +place +singl +file +contain +audio +examplessinc +would +section +simpl +fraction +express +exampl +demonstr +voic +inflect +paus +toconvei +group +express +succinctli +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +state +vari +along +dimens +audio +space +renderingsub +express +section +superscript +subscript +convei +subscript +superscript +visual +attribut +vari +audiost +along +dimens +orthogon +independ +dimensionus +convei +express +allow +nest +mutual +independ +concept +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +knuth +exampl +fraction +expon +exampl +taken +verbatim +book +donald +knuth +us +book +demonstr +power +layoutoper +notic +exampl +compris +symbol +verydiffer +aster +render +unambigu +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +continu +fraction +move +along +dimens +audio +space +defin +percept +monotonicchang +notion +percept +monoton +vital +convei +nest +audio +latex +postscript +section +simpl +school +algebra +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +squar +root +notic +choic +unambigu +render +follow +express +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +trigonometr +ident +written +mathemat +notat +ambigu +hard +recogn +notic +complet +absenc +parenthesi +exampl +aster +us +sever +heurist +construct +correct +tree +structur +forthes +express +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +logarithm +notic +context +specif +render +speak +base +logarithm +render +chosen +reduc +cognit +load +base +oppos +base +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +seri +context +specif +render +rule +allow +aster +interpret +superscript +asexpon +interpret +hard +wire +render +isfulli +customiz +user +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +integr +first +exampl +probabl +innocu +also +mostdifficult +recogn +imposs +determin +variabl +ofintegr +notic +aster +interpret +tripl +integr +nest +applic +theintegr +oper +user +brows +tripl +integr +listen +piec +integr +shown +exampl +trick +experienc +ofhuman +reader +error +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +summat +notic +express +written +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +limit +audio +latex +postscript +audio +latex +postscript +section +cross +referenc +equat +follow +section +meant +illustr +aster +render +ofcross +refer +effect +aster +us +interact +aster +enabl +listen +give +meaning +name +cross +referenceableobject +us +name +refer +object +latercross +refer +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +distanc +formula +notic +aster +produc +good +inton +structur +speak +text +thati +intermix +mathemat +audio +latex +postscript +section +quantifi +express +quantifi +present +interest +challeng +aster +recogn +audio +latex +postscript +section +exponenti +percept +monoton +allow +aster +convei +followingdeepli +nest +express +succinctli +exampl +produc +emac +calcul +full +fledgedsymbol +algebra +system +aster +interfac +directli +calcul +render +output +justa +well +render +document +audio +latex +postscript +audio +latex +postscript +audio +latex +postscript +section +gener +matrix +aster +us +stereo +effect +convei +dimension +structur +thematrix +render +commenc +left +move +progress +right +aseach +element +spoken +audio +latex +postscript +section +bruno +formula +section +present +bruno +formula +taken +knuth +ofcomput +program +first +heard +spoken +reader +talk +book +took +secondsto +speak +sinc +render +produc +aster +util +featur +audio +spacenot +avail +human +reader +still +reader +changeth +size +shape +head +talk +render +take +second +hear +soon +even +long +forget +begin +thetim +hear +later +present +render +us +variabl +substitut +power +techniquefor +convei +level +structur +complex +express +notic +proper +inton +structur +produc +text +intermix +mathemat +audio +latex +postscript +audio +latex +postscript +bruno +formula +glori +audio +second +latex +postscript +aster +process +complex +express +like +upon +request +replac +complex +express +meaning +identifi +renderingsconvei +level +structur +listen +listen +thesub +express +separ +sinc +substitut +process +perform +aster +latex +orpostscript +equival +audio +output +case +level +formula +audio +second +lower +constraint +audio +second +numer +audio +second +denomin +audio +second +raman +raman +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..b44b464b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,100 @@ +ravi +kumar +ravi +kumar +depart +comput +scienc +cornel +univers +ithaca +ravi +cornel +program +check +approxim +check +polynomi +function +equat +ieee +foundat +comput +scienc +octob +funda +uumln +ronitt +rubinfeld +effici +self +test +self +correct +linear +recurr +ieee +foundat +comput +scienc +octob +sivakumar +self +test +without +gener +bottleneck +foundat +softwar +technolog +theoret +comput +scienc +lnc +decemb +sivakumar +learn +theori +learn +bound +width +branch +program +confer +comput +learn +theori +juli +funda +uumln +ronitt +rubinfeld +combinator +approxim +latin +squar +extens +confer +comput +combinator +lnc +june +alexand +russel +ravi +sundaram +parallel +process +scalabl +studi +intern +confer +parallel +process +august +jeyakumar +muthukumarasami +umakishor +ramachandran +gautam +shah diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..5bc2b01a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,210 @@ +ramin +zabih +home +pageramin +zabihassist +professorrdz +cornel +researchmi +research +interest +comput +vision +multimedia +current +interest +construct +search +engin +imag +us +method +develop +recent +think +econom +impact +freeli +avail +price +inform +essai +subject +appear +phil +agr +electron +newslett +network +observ +march +studentsi +work +student +jing +huang +vera +kettnak +olga +veksler +also +spend +fair +amount +time +variou +undergradu +princip +greg +pass +justin +voskuhl +undergradu +includ +scott +cytacki +justin +miller +szewczyk +publicationsmost +public +avail +postscript +acrobat +format +free +reader +varieti +differ +architectur +avail +adob +histogram +refin +content +base +imag +retriev +greg +pass +ramin +zabih +ieee +workshop +applic +comput +vision +sarasota +florida +decemb +compar +imag +us +color +coher +vector +greg +pass +ramin +zabih +justin +miller +fourth +confer +multimedia +boston +massachusett +novemb +featur +base +algorithm +detect +classifi +scene +break +ramin +zabih +justin +miller +kevin +third +confer +multimedia +francisco +california +novemb +parametr +local +transform +comput +visual +correspond +ramin +zabih +john +woodfil +third +european +confer +comput +vision +stockholm +sweden +teachingi +current +teach +introduct +comput +program +spring +teach +cours +comput +vision +interest +cours +scribe +note +lectur +avail +page +also +taught +introduct +comput +program +profession +activitiesi +program +comitte +cvpr +ieee +confer +comput +vision +pattern +recognit +held +juan +june +also +organ +committe +ieee +workshop +content +base +access +imag +video +librari +held +conjunct +cvpr +acknowledgementsthi +page +design +courtesi +huttenlocherlast +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..ea642b8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,4 @@ +roderick +moten +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..736019bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,55 @@ +ronitt +rubinfeld +homepageronitt +rubinfeldi +assist +professor +cornel +depart +comput +scienc +recent +paper +talk +cours +random +comput +spring +fall +engin +fall +graduat +student +funda +ergun +ravi +kumar +comput +scienc +fair +homepag +wasserman +page +describ +work +research +area +result +check +address +ronitt +rubinfeldcomput +scienc +depart +upson +hallcornel +universityithaca +york +telephon +email +ronitt +cornel +edupictur +nephew +eitan +rubinfeld diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..d6230feb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,104 @@ +friedmanroi +friedmanpost +doctor +associ +cornel +universityroi +cornel +edui +post +doctor +associ +depart +comput +scienceatcornel +univers +work +withken +birman +androbbert +rennessein +area +distribut +system +mainli +thehoru +project +receiv +thedepart +comput +scienc +thetechnion +israel +institut +technolog +advisor +washagit +attiya +thesi +titl +wasconsist +condit +distribut +share +memori +current +also +involv +themilliped +project +work +withassaf +schuster +thedepart +comput +scienc +thetechnion +israel +institut +technolog +recent +papersr +friedman +birman +trade +consist +avail +distribut +system +technic +report +depart +comput +scienc +cornellunivers +friedman +birman +us +group +commun +technolog +implement +reliabl +scalabledistribut +coprocessor +appear +tina +friedman +vaysburd +implement +replic +state +machin +partition +network +technic +report +depart +comput +scienc +cornellunivers +full +list +public +clickher diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..daef0d32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,36 @@ +daniela +home +page +daniela +research +associ +comput +scienc +cornel +photograph +address +upson +hall +depart +comput +scienc +cornel +univers +ithaca +model +simul +home +page +recent +paper +version +onlin +tech +report +cornel +librari +catalogc +dept +infodesign +research +institut diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..ad27eeb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,126 @@ +robbert +renesserobbert +renessesenior +research +associatecornel +universityrvr +cornel +edui +senior +research +associ +depart +comput +scienceatcornel +universityinithaca +work +withken +birmanin +area +distribut +system +advisor +wasandi +tanenbaum +interestsmi +brand +babi +girl +brand +hous +horu +system +tacoma +project +caml +applet +nynetth +ithaca +ageless +jazz +band +ithaca +swing +danc +network +jazz +guitar +accordion +sharewar +dutch +stuffcornel +dutch +club +contain +mani +link +netherland +dutch +jazz +ithacaithacanet +spinner +ithaca +market +place +paperssoftwar +reliabl +network +scientif +american +design +perform +horu +lightweight +group +commun +system +html +version +design +perform +horu +lightweight +group +commun +system +version +framework +protocol +composit +horu +protocol +composit +horu +incorpor +system +resourc +inform +flow +control +strong +weak +virtual +synchroni +horu +horu +flexibl +group +commun +system +secur +architectur +fault +toler +system +support +complex +multi +media +applic +us +horu +system +oper +support +mobil +agent diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..7c210c5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,168 @@ +laura +sabel +laura +sabel +sabel +cornel +profession +inform +doctor +research +professorkeith +marzullo +univers +california +diego +failur +detect +asynchron +distribut +system +formor +research +failur +detector +tushar +chandra +sfailur +detect +page +final +finish +thesi +approxim +perfect +failur +detector +asynchronousdistribut +system +public +elect +consensu +asynchron +system +laura +sabel +keith +marzullo +cornel +univers +comput +scienc +technic +report +februari +submit +inform +process +letter +postscript +copi +click +approxim +perfect +failur +detector +asynchronousdistribut +system +laura +sabel +keith +marzullo +version +appear +proceed +annual +symposium +principl +distributedcomput +august +proceed +symposium +reliabl +distribut +system +octob +cornel +univers +comput +scienc +technic +report +march +revis +june +postscript +copi +click +us +consist +subcut +detect +stabl +properti +keith +marzullo +laura +sabel +version +appear +distribut +comput +intern +workshop +distribut +algorithm +wdag +octob +proceed +publish +springer +verlag +lecturenot +comput +scienc +seri +cornel +univers +comput +scienc +technic +report +postscript +copi +click +expand +horizon +cow +interest +page +strawberri +tart +blow +torch +experi +alpacanet +electron +gourmet +guid +thebobbi +award +especi +spam +page +jelli +belli +jelli +bean +page +free +sampl +answer +survei +bingo +zone +canplai +bingo +cash +prize diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..6b17ddd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,292 @@ +faculti +research +interest +toueg +toueg +professorph +princeton +univers +research +interestsmi +research +interest +includ +distribut +comput +fault +toleranceand +real +time +work +methodolog +paradigm +algorithm +forfault +toler +distribut +system +messag +pass +andshar +memori +system +long +term +goal +bridg +gapbetween +theoret +result +need +effici +practicalsolut +collabor +withtushar +chandraand +prasad +jayanti +comput +scienc +student +continu +work +onunreli +failuredetector +messag +pass +system +wait +free +object +share +memori +system +fundament +result +fault +toler +distribut +computingst +consensu +problem +cannot +solv +adeterminist +algorithm +asynchron +system +impossibilityresult +inher +difficulti +determin +whether +aprocess +crash +mere +slow +system +inour +work +abl +determin +exactli +much +informationabout +failur +necessari +suffici +solv +consensu +wefirst +show +unreli +failur +detector +canmak +infinit +number +mistak +solv +consensu +systemswith +major +correct +process +prove +solveconsensu +failur +detector +provid +least +muchinform +failur +thu +weakest +failuredetector +solv +consensu +asynchron +system +amajor +correct +process +explor +practicalityof +implement +applic +reli +theircorrect +concurr +system +consist +process +commun +sharedobject +share +object +wait +free +process +accessesthi +object +guarante +respons +even +otherprocess +crash +explor +wait +free +hierarchi +ofobject +type +object +type +assign +level +thatcorrespond +abil +implement +wait +free +object +particular +prasad +jayanti +shown +well +known +hierarchi +herlihi +robust +inform +hierarchi +anobject +level +us +implement +wait +free +object +atani +level +explor +question +whetherrobust +wait +free +hierarchi +exist +select +public +bracha +toueg +asynchron +consensu +broadcast +protocol +journal +srikanth +toueg +optim +clock +synchron +journal +abbadi +toueg +maintain +avail +partit +replic +databas +transact +databas +system +neiger +toueg +automat +increas +fault +toler +distribut +algorithm +journal +algorithm +chandra +toueg +unreli +failuredetector +asynchron +system +proceed +symposium +principl +distribut +comput +august +montreal +canada +chandra +hadzilaco +toueg +weakest +failur +detectorfor +solv +consensu +proceed +symposium +principl +distribut +comput +august +vancouv +canada +jayanti +chandra +toueg +fault +toler +wait +free +share +object +proceed +ieee +symposium +foundat +comput +scienc +octob +pittsburgh +pennsylvania +neiger +toueg +simul +synchron +clock +common +knowledg +distribut +system +journal diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..2fdb69ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,176 @@ +samuel +weber +samuel +weber +act +assist +professor +upson +hallphon +email +samuel +cornel +educurr +act +assist +professor +cornel +univers +assist +director +master +engin +program +comput +scienc +research +interest +softwar +design +specif +verif +program +languag +design +semant +distribut +system +cours +softwar +engin +technolog +techniqu +fall +introduct +comput +program +spring +public +weber +bloom +metatheori +calculu +technic +report +cornel +univers +submit +confer +public +weber +bloom +brown +compil +silicon +formal +verifi +compil +delai +insensit +circuit +technic +report +cornellunivers +submit +journal +public +weber +process +algebra +meta +algebra +theori +practic +thesi +cornel +univers +august +weber +bloom +brown +compil +silicon +exercis +appli +structur +oper +semant +workshop +semant +foundat +applic +bakker +roever +rozenberg +editor +lectur +note +comput +scienc +springer +verlag +page +weber +bloom +brown +compil +silicon +verifi +silicon +compil +scheme +knight +savag +editor +proceed +advanc +research +vlsi +parallel +system +confer +page +amdur +weber +hadzilaco +messag +complex +binari +byzantin +agreement +crash +failur +distribut +comput +page +weber +bound +messag +complex +byzantin +agreement +master +thesi +univers +toronto +septemb +seshadri +wortman +weber +small +semant +analysi +concurr +compil +proceed +sigplan +confer +program +languag +design +implement +page +samuel +weber +samuel +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..266245a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,128 @@ +sean +landissean +landi +master +engin +comput +sciencewelcom +cornel +home +page +resum +cornel +inform +current +cours +advanc +databas +systemsc +master +engin +project +render +system +window +past +cours +machin +percept +final +project +system +analyz +color +book +draw +clickherefor +postscript +version +project +sampl +draw +weanalyz +topic +comput +graphic +content +base +imag +retriev +system +interior +design +master +engin +project +window +base +graphic +render +system +comput +graphic +comput +graphic +educ +interest +comput +graphic +window +object +orient +program +object +orient +design +patternsprofession +interest +work +isi +distribut +system +divis +stratu +comput +project +lead +orbix +isi +develop +team +product +combin +orbix +acorba +compliant +object +request +broker +iona +technolog +isi +current +work +releas +orbix +isi +person +interest +basebal +favorit +team +alpin +ski +golf +plai +softbal +basebal +card +collectingi +reach +isi +comeduc +sheet +last +modifi +sean +landi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..1ec5e338 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,87 @@ +seena +cherangara +seena +cherangaramast +engineeringclass +dept +comput +sciencecornel +univers +welcom +homepagecurr +student +comput +scienc +depart +cornel +univers +ithaca +tech +degre +comput +scienc +engin +colleg +engin +trivandrum +kerala +india +inform +cours +taken +cornelluniversityfal +oper +system +practicum +oper +system +project +specif +hoca +oper +system +softwar +engin +multimedia +system +project +post +processingalgorithm +jpeg +artifact +reduct +spring +comput +graphic +cspracticum +comput +graphic +project +anim +magic +carpet +distribut +system +comput +scienc +colloqium +summer +project +graphic +model +java +parametr +equat +viewer +click +postscript +version +myresum +mapl +ithaca +york +seena +cornel +last +updat +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..0b36be25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,350 @@ +rosen +sharma +offic +upson +hall +email +sharma +cornel +came +cornel +stanford +spent +coupl +year +student +work +research +stanford +stream +live +media +like +audio +video +internet +part +modifiedigmp +multicast +protocol +develop +protocol +multicast +unicast +layer +video +develop +virtual +classroom +system +initi +prototyp +deploi +spring +fall +quarter +us +altern +stanford +instruct +televis +network +sitn +program +us +microwav +link +student +remot +site +us +softwar +asynchron +access +class +lectur +note +internet +also +stumbl +upon +fact +media +like +text +slide +link +portion +video +enhanc +us +greatli +leadto +format +compani +vxtreme +palo +alto +vxtreme +silicon +vallei +start +develop +client +sever +applic +stream +multimedia +deliveri +internet +paper +signal +oper +system +support +nativ +mode +applic +rosen +sharma +keshav +sigcomm +text +segment +mix +mode +imag +navin +chaddha +rosen +sharma +avneesh +agarw +anoop +gupta +asilomar +igmp +internet +group +membership +protocol +design +rosen +sharma +steve +deer +releas +part +multicast +internetdraft +written +bill +fenner +optic +charact +recognit +us +statist +structur +method +rosen +sharma +niten +malhan +bachelor +thesi +dept +comput +scienc +indian +institut +technolog +delhiunpublish +stuff +character +variabl +rate +sourc +rosen +sharma +term +paper +preform +softwar +video +conferenc +system +rosen +sharma +intern +report +cool +stuff +imag +video +manipul +languag +languag +video +imag +first +class +data +type +allow +oper +imag +sequenc +like +blur +speckl +transform +like +affin +subband +motion +estmat +also +make +manipul +imag +video +data +fast +effici +implement +writen +current +test +machin +support +displai +give +flavour +languag +look +like +imag +imag +imagefram +width +height +ifram +nodisplai +file +filenam +imag +imag +putimageincanva +imagefram +imag +predecessor +implement +hate +us +motif +languag +interpret +written +yacc +limit +shell +like +script +cool +shell +script +gener +widget +gener +file +given +name +widget +file +compil +give +dummi +widget +event +snooper +record +player +consid +coolest +thing +ever +someth +similar +microsoft +window +releas +doesnt +script +languag +replai +record +lot +kludg +fool +server +postscript +fractal +creat +file +call +directori +us +hole +postscript +viewer +fix +dissalow +write +call +write +semant +name +rosen +indian +name +question +often +ask +peopl +gaveth +name +interpret +chines +friend +make +wonder +claim +mean +small +hors +smart +hindi +nativ +tongu +sharm +mean +shyness +sharmila +mean +actress +call +sharmila +tagor +nicknam +sharmila +frozen +sharmila +stupid +hors +peopl +claim +ealri +jewish +leader +name diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..8e0409f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,147 @@ +eric +shim +home +pagewelcom +eric +home +page +eric +young +sang +shimmast +engin +comput +sciencecornel +univers +view +cornel +univers +address +dryden +citi +ithaca +phone +shim +cornel +korean +version +home +page +click +onthi +receiv +comput +scienc +degre +univers +california +irvinestudi +inform +comput +scienc +meng +project +view +system +camera +transform +meng +project +abstract +final +project +comput +graphic +classi +love +plai +follow +music +instrument +acoust +guitar +piano +keyboard +listen +music +stan +getz +antonio +carlo +jobim +john +coltran +mile +davi +earl +klugh +metheni +acoust +archemi +chopin +watch +movi +music +cinema +paradiso +french +kiss +miser +miss +saigon +favorit +page +korea +newswant +know +korean +graduat +student +associ +cornel +anybodi +like +jazz +check +jazz +interest +java +cyberspac +friend +hana +work +melco +last +time +went +movi +friend +world +jung +hwan +middl +school +friend +back +victor +kwan +hong +jiyang +kang +homepag +kwan +page +access +timessinc +page +still +construct +resum +avail +near +futur +also +page +korean +version +soon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..75892692 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,324 @@ +amit +singhal +home +pageamit +singhaldepart +comput +scienc +cornel +universitysingh +cornel +eduphon +research +interest +area +inform +retriev +andtext +process +thesi +advisor +late +prof +gerardsalton +current +thesi +supervisor +chri +bucklei +prof +clairecardieher +postscript +copi +resum +smart +group +cornel +depart +beenon +foremost +research +group +field +informationretriev +last +thirti +year +current +research +involv +document +length +normal +inform +retriev +fairli +retriev +text +vari +size +document +lengthnorm +commonli +us +term +weight +show +thateffect +system +retriev +document +size +chancessimilar +likelihood +relev +propos +pivot +techniqu +us +modifi +exist +normalizationfunct +yield +substanti +improv +retrievaleffect +also +propos +effect +normalizationtechniqu +paper +trecparticipationtext +retriev +confer +trec +nist +arpa +sponsoredeffort +object +evalu +variou +inform +retrievaltechniqu +independ +testb +smart +system +hasconsist +best +system +trec +somepap +automat +text +structur +summarizationnon +expositori +text +usual +read +cover +tocov +reader +help +circumst +provid +selectiveaccess +text +excerpt +need +develop +techniqu +toanalyz +structur +text +provid +tool +select +texttravers +paper +papersnorm +paper +pivot +document +length +normal +amit +singhal +chri +bucklei +mandarmitra +gerard +salton +documentlength +normal +amit +singhal +gerard +salton +mandar +mitraand +chri +bucklei +lengthnorm +degrad +text +collect +amit +singhal +gerardsalton +chri +bucklei +trec +paper +come +soon +retriev +approach +usingsmart +trec +chri +bucklei +amit +singhal +mandarmitra +gerard +salton +automat +queri +expans +us +smart +trec +chri +bucklei +gerard +salton +jame +allan +amit +singhal +proceedingsof +third +text +retriev +confer +nist +special +public +text +structur +paper +automat +text +decomposit +us +text +segment +textthem +gerard +salton +amit +singhal +chri +bucklei +mandar +mitra +hypertext +appear +automat +text +decomposit +structur +gerard +salton +jame +allan +amit +singhal +inform +process +andmanag +appear +automat +text +brows +us +vectorspac +model +amit +singhal +gerard +salton +proceed +ofth +dual +technolog +applic +confer +select +text +travers +gerard +salton +amit +singhal +automat +text +theme +gener +analysi +text +structur +gerard +salton +amit +singhal +automat +analysi +theme +gener +summar +machineread +text +gerard +salton +jame +allan +chri +bucklei +amitsingh +scienc +june +smart +groupmemb +smart +group +chri +bucklei +senior +research +associ +amit +singhal +student +mandar +mitra +student +david +field +master +engin +student +other +group +slowli +fluctuat +thank +visit +home +page +visitor +sinc +iinstal +counter diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..5f8d250c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,192 @@ +keshav +keshavemail +skeshav +cornel +work +upson +hall +depart +comput +scienc +cornel +univers +ithaca +home +christoph +lane +ithaca +skeshav +cornel +edui +current +associ +professor +comput +scienc +depart +cornel +univers +spentfiv +year +work +sinc +build +network +xunet +wide +area +network +built +scratch +router +switch +softwar +oper +idlinet +incollabor +delhi +base +name +equip +fore +systemsand +zeitnet +idlinetsourc +code +public +domain +avail +nativ +mode +home +page +featur +idlinet +protocol +stack +nativ +mode +applicationget +write +directli +virtual +circuit +also +support +independ +signal +span +compliant +final +goof +build +talk +head +facial +anim +snoop +send +video +format +mbone +canb +driven +remot +site +internet +linkspapersher +linkto +paper +paper +avail +postscript +reali +build +real +packet +level +simul +sinc +still +maintain +instal +site +idea +mani +peopl +actual +link +real +version +also +simul +latest +version +version +releas +fall +version +work +includ +base +beout +goe +well +native_mod +home +pagemi +namein +part +world +come +south +india +thanjavur +district +beprecis +peopl +name +prefix +father +sonli +name +sometim +villag +name +surnam +thu +name +keshav +myfath +name +srinivasan +unfortun +round +intoth +squar +hole +custom +first +name +last +beconfus +quotabl +quot +ought +everi +least +hear +littl +song +read +good +poem +possibl +speak +reason +word +johann +wolfgang +goeth +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..ec89cb74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,30 @@ +kenneth +home +page +kenneth +road +success +alwai +construct +meng +electr +engin +depart +sinc +work +prof +zabih +depart +place +student +page +came +engin +univers +wisconsin +madison +cornel +cornel +sunlab +cornel +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..38d794b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,430 @@ +come +clear +step +forest +around +clearinglook +around +realiz +must +walk +much +thanyou +plan +wide +varieti +tree +surround +onal +side +especi +tree +theweath +seem +fairli +overcast +somehow +figur +ifit +go +rain +snow +perhap +distanc +larg +mountain +quiteclear +snow +question +hear +bird +chirp +quit +near +cours +theymai +respond +sound +insid +clear +much +much +louder +sound +come +nearbywaterfal +gotta +could +follow +sound +want +seem +apath +leav +direct +path +leav +clear +label +asign +sai +hillschool +label +sign +hell +school +also +small +wormhol +seem +connect +nearbyhous +presum +traffic +place +insid +clearinginsid +clear +coupl +structur +shack +door +fall +offand +complet +modern +glass +hous +withno +stone +front +pile +book +score +magazin +random +paper +scatter +throughout +theclear +well +seem +like +vagu +rhyme +reasonto +glanc +topic +seem +sortsof +topic +figur +look +recent +read +book +probabl +betteridea +make +person +clear +tick +also +resum +itseem +somewhat +corner +presum +importantth +worri +seem +kind +strang +clearingh +smile +sai +hello +oftendescrib +thin +especi +mother +mostdistinct +featur +hair +bright +golden +quitelong +look +vagu +elfin +seen +like +peoplebefor +warn +link +next +paragraph +written +theresoon +obviou +alwai +thing +startstel +stori +stop +hum +tune +start +tell +whynichola +negropont +moron +ifyou +never +heard +start +talk +polit +late +twentieth +centuri +america +thenh +paus +start +talk +obscur +theorem +theoret +comput +scienc +obviou +rather +well +read +listen +hetend +much +start +look +appearanceinstead +wear +mostli +color +purpl +dark +turquois +everyth +wear +seem +either +silk +linen +contrast +nice +hair +heha +glass +well +gold +ring +imageof +beaver +right +ring +finger +beaver +point +awai +fromhim +left +ring +finger +silver +ocean +wave +pattern +also +wear +pewter +pentacl +around +neck +andlook +altogeth +like +hippi +someon +asclass +intellectu +sound +sound +like +peopl +visit +clearingdan +start +talk +peopl +occasion +visit +heha +spent +time +briani +friend +us +live +anundergrad +nowadai +work +pointcast +internet +newsprovid +ancamosoiu +pronounc +schwa +best +friend +backwhen +us +mani +time +week +twoand +half +year +went +europ +togeth +summer +wegradu +inner +child +shejust +quit +work +onewav +blame +dread +name +actual +usedto +busi +object +power +daniel +issomeon +gotten +know +quit +better +last +severalmonth +think +probabl +sometim +last +novemb +becamemuch +better +friend +summer +switch +eedepart +quit +wise +much +multimediastud +dept +commun +reason +inth +school +also +small +internet +busi +own +friendof +us +link +perri +finlei +page +notanymor +instead +brian +pictur +perri +pictur +page +work +steelcas +isth +largest +manufactur +offic +furnitur +world +dserver +kinda +cheesi +us +haveth +pictur +pyramid +still +neat +thebuild +work +ius +sing +cornel +left +japan +programcal +hire +teach +english +high +school +student +ideal +world +visit +next +winter +hani +graduatedfrom +june +couldn +happen +someon +neededto +place +great +even +nick +agood +year +half +cuter +thanth +somewhat +blurri +pictur +page +would +indic +music +maker +dreamer +ofdream +aphex +twindan +brown +snowman +cornel +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..4179b8e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,28 @@ +autobiographi +upkeep +lookin +lot +inform +ultra +cool +home +page +soon +keep +lookin +home +page +take +long +setuup +doingajaymanishanujmom +daddepart +comput +sciencesearch +netentertain +weeklycricket +rate +ashish +soni +sonia +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..2881995a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,11 @@ +scott +dawson +padif +us +form +capabl +browser +would +better +scottdawson +shomebas diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..268d1036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,27 @@ +paul +stodghil +home +pagepaul +stodghillstodghil +cornel +rhode +hall +affili +depart +comput +scienc +atcornel +univers +advanc +comput +research +institut +acri +cornel +theori +center +bernoulli +projectinterest +ultim +hockei +scheme diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..623076b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,19 @@ +scott +stoller +former +home +pagescott +stoller +former +home +page +home +page +move +http +indiana +hyplan +stoller +htmllast +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..3bdeb81b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,63 @@ +sugata +mukhopadhyai +home +page +sugata +mukhopadhyai +welcom +home +page +graduat +student +depart +ofcomput +scienc +cornel +univers +work +multimedia +system +prof +brian +smith +marri +wonder +person +earth +ritu +spring +take +high +perform +comput +system +compil +design +high +perform +architectur +also +take +advanc +option +price +theori +czar +work +progress +seminar +previou +semest +contact +mehom +phone +work +phone +mailsugata +cornel +eduaddress +hichori +estat +owego +sugata +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..82691438 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,34 @@ +home +page +paul +sukhpal +sanghera +paul +sukhpal +sanghera +physic +carleton +univers +present +student +comput +scienc +cornel +univers +ithaca +sukhpal +cornel +background +project +philosophi +life +resum +rout +clock +tick +need +java +capabl +browser +view +anim diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..c44f596d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,162 @@ +bilth +gater +galact +empir +bilth +gater +galact +empirewritten +illustr +sumedh +kanetkaremail +kanetkar +cornel +eduthi +seri +weekli +comic +strip +drew +intern +atmicrosoft +summer +strip +post +weekli +theintern +social +alia +read +regularli +peopl +thesumm +progress +notic +artwork +begun +leak +theful +time +employe +well +whether +high +lord +gater +read +never +found +episod +first +first +stripi +drew +within +week +arriv +redmond +tri +persuad +themicrosoft +newslett +print +perceiv +problemand +declin +didn +want +microsoft +portrai +evilempir +understand +viewpoint +told +comicstrip +attempt +show +compani +view +mani +theoutsid +world +person +bitter +feel +toward +eitherbil +gate +corpor +heck +realli +enjoi +summersof +work +strongli +recommend +internship +program +anyoneinterest +work +industri +first +strip +make +employe +orient +thateveri +employe +suffer +long +session +theyshow +video +fill +kind +trivia +also +makey +sign +disclosur +agreeement +would +fit +theymad +everyon +stand +place +hand +theirheart +microsoft +pledg +alleig +comput +everydesk +everi +home +run +microsoft +softwar +anywai +episod +space +roosterepisod +rebel +threatepisod +flame +episod +lord +gater +parti +part +episod +imperi +insigniaepisod +lord +gater +parti +part +episod +lord +gater +parti +part diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..e1395393 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,227 @@ +kristen +summer +kristen +summer +student +cornel +univers +summer +cornel +upson +hall +research +interest +work +inform +captur +accessresearch +group +document +analysi +mylong +term +goal +provid +support +forsophist +electron +document +manipulationtool +index +brows +link +primari +interest +discov +logicalstructur +arbitrari +electron +document +goal +take +electron +documentrepresent +input +return +hierarchyof +logic +piec +document +output +exampl +given +scan +postscriptvers +technic +report +would +like +tobe +abl +divid +section +paragraph +similarli +busi +letter +address +head +bodi +close +identifi +problem +primari +compon +segment +divid +document +logic +piec +andclassif +categor +piec +also +rais +question +evalu +previou +work +differ +descript +correct +hierarchi +type +logic +structur +theoret +limit +task +relev +bruce +croft +stop +research +issu +inform +retriev +novemb +issu +magazin +number +interfac +brows +number +effici +flexibl +index +retriev +determininglog +structur +enabl +flexibl +hierarch +brows +soin +gener +support +system +flexibl +handl +ofmultipl +document +type +paper +us +textual +cue +electron +document +browsingco +author +daniela +digit +librari +current +issu +nabil +adam +bharat +bhargava +yelena +yesha +editor +chapter +lectur +note +comput +scienc +seri +springer +verlag +version +geometr +algorithm +experi +autom +document +structur +mathemat +comput +model +forthcom +us +white +space +autom +document +structur +cornel +univers +comput +scienc +technic +report +proceed +workshop +principl +document +process +seeheim +podp +toward +taxonomi +logic +document +structureselectron +publish +inform +superhighwai +proceed +dartmouth +institut +advanc +graduat +studi +boston +donald +johnson +memori +dag +scholaraward +best +student +paper +recipi +near +wordless +document +structur +classif +proceed +intern +confer +document +analysisand +recognit +montral +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..3d5af0e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,74 @@ +masafumi +suzukither +would +applet +browser +suppot +java +masafumi +suzukisuzuki +cornel +educlassesfal +case +studi +oper +research +optim +engin +probabl +statist +design +analysi +simul +system +project +spring +oper +research +introduct +stochast +model +project +comput +graphic +comput +graphic +summer +data +structur +fall +softwar +engin +technolog +techniqu +system +program +oper +system +multimedia +system +project +report +comput +network +telecommun +polici +spring +introduct +databas +system +thrive +inform +revolut +sector +site +databas +manag +independ +project +polygon +displai +us +java +prototyp +resum diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..71f3a0ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,36 @@ +jonathan +swartz +home +page +jonathan +swartz +swartz +cornel +edui +student +departmentof +comput +scienc +cornel +univers +spend +time +heredevelopingrivl +languag +multimedia +process +myaddress +phone +number +littl +humor +brighten +dayjon +movi +connectioncool +siteslast +modifi +jonathan +swartz +swartz +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..b4456dcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,27 @@ +sunil +srivastava +home +pagewelcom +home +page +sunil +srivastavamast +engin +studentcomput +scienc +departmentcornel +univers +academ +class +class +project +person +inform +us +linkscom +question +page +send +mail +sxsriva +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..7d35a396 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,254 @@ +huangszu +huang +defend +truth +champion +justic +around +nice +steven +year +nevermind +long +exactli +iarriv +second +proud +parent +soundslik +mobi +dick +assur +nointent +find +ship +hunt +whale +digress +brought +taiwan +tender +seven +wholefamili +migrat +south +tropic +island +philippin +made +home +live +fifteen +year +pictur +aroundsix +year +thu +becam +quit +fluentli +bilingu +love +read +thepoetri +tang +dynasti +arabian +night +quit +natur +children +version +host +stori +somewhat +fulfil +name +mean +literatur +class +grade +seven +cours +life +doveright +marvel +comput +four +year +later +ienter +univers +philippineswith +major +like +career +found +talent +draw +scienc +ofcomput +unabashedli +knew +quit +good +alsoin +colleg +whirlwind +happi +peac +three +year +run +awoman +becom +import +part +life +effortlessli +defeat +hobbi +love +eek +career +ideal +higher +pai +blunt +ienrol +cornel +univers +graduat +program +thu +reward +almost +everyth +ever +want +andwork +lucki +septemb +welcom +home +page +segreg +everydaygeek +like +think +mani +interest +asid +fromactu +write +program +happili +myspar +time +read +anyth +calvin +hobb +unix +network +program +internet +relai +chat +gener +found +steven +linuxnet +build +lego +thing +cours +practic +object +orient +design +build +plastic +model +weapon +watch +suspens +thriller +film +listen +music +sting +mani +other +watch +sesam +street +discoveri +channel +railroad +model +rich +enough +field +comput +also +rather +broad +spectrum +ofinterest +though +studi +concentr +area +ofcomput +graphic +network +wish +offend +bysom +blatant +self +promot +want +check +myresum +also +highlyinterest +linux +freeli +avail +oper +system +intel +compatiblecomput +master +engin +project +model +blobbi +model +metaballsund +supervis +bruce +land +current +involv +anoth +cours +project +involv +survei +techniquesin +model +human +face +resolut +independ +andport +audio +effect +editor +last +modifi +octob +huang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..df43fb21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,6 @@ +henzing +hytechhytech +hybrid +technolog +toolw +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..335dd1c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,79 @@ +henzingerthoma +henzing +movedassist +professorcomput +scienc +departmentcornel +universityithaca +email +cornel +eduphon +researchform +support +develop +analysi +concurr +real +time +embed +system +relat +researchat +cornelland +worldwid +resumepublicationsreact +modul +formal +methodolog +analysi +concurr +systemsr +time +logic +time +transit +system +formal +methodolog +analysi +real +time +systemsclock +system +time +automata +formal +methodolog +analysi +real +time +systemshybrid +automata +formal +methodolog +analysi +embed +systemsbibliographi +bibtex +list +publicationstoolshytech +symbol +model +checker +linear +hybrid +systemscoursesc +fall +advanc +program +languagesconferenceshybrid +verif +control +hybrid +systemscav +comput +aid +verificationlast +updat +septemb +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..f711d43b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,42 @@ +takako +hickei +homepag +takako +hickei +email +takako +cornel +eduoffic +upson +hallphon +student +depart +comput +scienc +atcornel +univers +advis +byrobbert +reness +andfr +schneider +research +interest +distribut +system +program +environ +resourc +manag +horu +project +previou +life +interest +social +psycholog +backcountri +hockei +quot +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..e0b33cb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,26 @@ +tim_teitelbaum +teitelbaumassoci +professor +depart +comput +scienc +cornel +univers +cornel +eduresearch +interest +increment +comput +transform +program +program +environ +languag +base +editor +compil +attribut +grammar +adavita +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..cd1df94b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,361 @@ +thorsten +eicken +thorsten +eickenassist +professor +upson +hallphon +email +cornel +eduprojectsth +architectureprovid +user +level +network +interfacefor +cluster +workstat +offer +latencyand +high +bandwidth +commun +high +speed +lan +currentimplement +us +workstat +interconnect +activ +messag +sever +project +port +activ +messag +platform +includingth +cluster +extend +model +tonon +spmd +program +split +simpl +extensionto +parallel +comput +split +port +sever +newplatform +includ +share +memori +multprocessor +run +coursesc +introduct +digit +system +computerorgan +fall +high +perform +comput +architectur +spring +frontier +parallel +system +fall +user +level +network +architectur +guest +lectur +maynd +department +talk +report +departement +annual +report +entri +fall +forum +talk +slide +departement +annual +report +entri +fall +forum +talk +slide +person +pagestv +pond +real +water +fish +plant +tire +firewal +macpppwhich +gener +time +password +automat +without +everhav +think +well +month +passwordssuddenli +installationinstruct +select +publicationsu +user +level +network +interfac +parallel +distributedcomput +thorsten +eicken +anindya +basu +vineet +buch +werner +vogel +appear +june +latenc +commun +atmnetwork +us +activ +messag +eicken +avula +basu +buch +present +interconnect +palo +alto +abridg +version +paper +appear +ieee +micro +magazin +activ +messag +mechan +integr +commun +andcomput +eicken +culler +goldstein +schauser +proceed +symp +comput +architectur +gold +coast +australia +activ +messag +effici +communicationarchitectur +multiprocessor +eicken +thesi +novemb +univers +california +berkelei +paper +publish +berkelei +link +lead +postscript +version +paper +parallel +program +split +culler +dusseau +goldstein +krishnamurthi +lumetta +eicken +yelick +proceed +supercomput +novemb +compil +controlledthread +abstract +machin +culler +goldstein +schauser +eicken +journal +parallel +distribut +comput +special +issu +dataflow +june +evalu +mechan +fine +grain +parallel +program +machin +spertu +goldstein +schauser +eicken +culler +dalli +proc +symp +comput +architectur +diego +logp +toward +realist +modelof +parallel +comput +culler +karp +patterson +sahai +schauser +santo +subramonian +eicken +proc +fourth +sigplan +symp +principl +practic +parallel +program +diego +fundament +limit +dataflowmultiprocess +culler +schauser +eicken +proceed +ifip +work +conf +architectur +compil +techniqu +fine +medium +grain +parallel +orlando +activ +messag +mechan +forintegr +commun +comput +eicken +culler +goldstein +schauser +proc +symposium +comput +architectur +gold +coast +australia +compil +control +multithread +forleni +parallel +languag +schauser +culler +eicken +proceed +confer +function +program +languag +comput +architectur +cambridg +august +fine +grain +parallel +minimalhardwar +support +compil +control +thread +abstract +machin +culler +schauser +eicken +wawrzynek +proc +conf +architectur +support +program +languag +oper +system +santa +clara +april +analysi +multithread +architecturesfor +parallel +comput +saavedra +barrera +culler +eicken +proceed +annual +symp +parallel +algorithm +architectur +crete +greec +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..3aa16808 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,46 @@ +lfar +erlingsson +pagelfar +erlingsson +specificationi +lfar +erlingsson +student +incomput +scienc +cornel +univers +apart +enjoi +somewhat +incongruousiceland +link +inform +implementationbackgroundwher +come +current +activitieswhat +moment +schedulewher +time +researchwhat +real +work +done +interestswhat +actual +like +acquaintancesthos +know +contact +infohow +touch +pleas +note +page +often +date +gener +assum +disclaim +appli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..03e0fd9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,73 @@ +project +mesh +gener +relat +softwar +packag +finit +element +mesh +gener +threedimens +packag +includ +geometr +model +softwar +themesh +gener +finit +element +solver +free +softwaredownload +run +unix +window +releas +releas +andqmg +releas +novemb +us +websit +mesh +gener +geometr +softwar +robert +schneider +mesh +gener +home +page +mcphedran +page +offinit +element +resourc +univers +minnesota +geometri +center +list +ofsoftwar +computationalgeometri +jonathan +shewchuk +triangl +packag +back +vavasi +home +page +stephen +vavasi +comput +scienc +depart +cornel +univers +ithaca +vavasi +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..faa3773c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,200 @@ +stephen +vavasi +stephen +vavasi +associ +professor +depart +comput +scienc +rhode +hall +cornel +univers +ithaca +email +vavasi +cornel +phone +period +onsabbat +divis +bldg +argonn +nation +laboratori +cass +argonn +email +vavasi +phone +note +chang +area +code +effect +research +interest +numer +analysi +aren +tsure +numer +analysi +pleas +essaybi +colleagu +trefethen +specif +interest +numer +optim +complex +issuesnumer +method +boundari +valu +problemsgeometr +problem +aris +scientif +computingspars +matrix +computationsi +recent +manuscript +avail +line +vavasi +primal +dual +acceler +interiorpoint +method +whose +run +time +depend +click +hough +vavasi +complet +orthogon +decompositionfor +weight +least +squar +click +mitchel +vavasi +aspect +ratio +bound +triangul +gridcut +hyperplan +click +driscol +vavasi +numer +conform +map +us +cross +ratio +delaunai +triangul +click +packagei +recent +complet +softwar +project +mesh +gener +forth +finit +element +method +three +dimens +softwar +packag +call +avail +sourc +code +level +anonym +construct +polyhedr +geometr +object +verycompl +topolog +hole +intern +boundari +andautomat +creat +unstructuredtetrahedr +mesh +mesh +gener +base +algorithm +work +scott +mitchel +also +solv +ellipt +boundaryvalu +problem +grad +domain +packag +iswritten +matlab +distributedfor +free +sourc +code +level +anonym +distributionbegan +releas +novemb +featur +manyimprov +includ +faster +mesh +gener +algorithm +vrml +graphic +much +cleaner +code +boundari +mesh +gener +algorithm +compat +microsoft +window +well +unix +compatibilitywith +well +matlab +pleasese +line +document +vavasi +page +annual +reportback +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..091d15f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,10 @@ +arun +verma +homepag +need +browser +support +frame +netscap +higher +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..29abd944 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,10 @@ +vinc +browser +us +suck +download +date +netscap +read +page +thank diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..21064bd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,19 @@ +vitrano +vitrano +home +pagehei +start +thing +give +break +internet +engin +pagec +advanc +databas +pagec +multimedia +pageer +vitrano +vitrano +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..be5cb342 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,129 @@ +kolla +home +ground +copyright +right +reserv +held +respons +unwant +effect +usag +applet +deriv +warrante +usabl +specif +applic +given +impli +function +scrollit +seed +reach +page +vivek +page +view +million +zillion +wonder +reach +call +send +mail +kolla +cornel +dont +expect +back +seed +seed +scrollit +seed +timertwo +window +settimeout +els +seed +visitor +number +home +page +happi +contact +info +transmit +thoughtsfriend +foeslinksa +small +collect +relev +life +us +maintain +larg +list +favorit +link +think +wast +time +search +someth +specif +might +tryalta +vista +yahoo +theinktomiresumein +htmlin +postscriptin +word +perfectin +asciith +current +time +good +browser +browser +know +java +dont +clock +time +wanna +know +time +around +world +need +java +capabl +browser +view +anim +site +construct +wonder +mani +imag +heavi +file +like +note +home +page +us +java +anim +promis +made +regard +qualiti +visit +us +browser +fulli +support +technolog diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..cb30dce0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,98 @@ +vlad +home +pagevladimir +kotlyarvladimir +cornel +look +like +fall +david +wereteach +might +guess +graduat +student +depart +comput +scienc +cornellunivers +work +prof +keshav +pingali +research +interest +compil +high +perform +architectur +particular +work +parallel +spars +matrix +code +work +part +bernoulli +project +member +group +paul +stodghil +andindu +kodukulapubl +henri +kissing +said +lawyer +professor +friend +legal +profess +like +remind +comment +abritish +judg +differ +lawyer +professor +sveri +simpl +said +lord +den +function +lawyer +find +asolut +everi +difficulti +present +wherea +functionof +professor +find +difficulti +everi +solut +todayth +number +difficulti +seem +outpac +number +ofsolut +either +lawyer +friend +work +hardenough +mani +professor +govern +support +privaci +strong +encrypt diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..021c2874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,20 @@ +vijai +menonvijai +menon +graduat +student +cornel +offic +address +home +address +rhode +hall +mapl +cornel +univers +ithaca +ithaca +last +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..466999d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,58 @@ +chen +home +pagewei +chen +upson +hall +depart +comput +sciencecornel +universityithaca +weichen +cornel +current +third +year +student +depart +computersci +cornel +univers +receiv +bachelorand +master +degre +depart +comput +scienc +tsinghua +univers +beij +chinami +interest +academ +distributedsystem +fault +toler +algorithm +work +professor +toueg +failur +detect +group +membership +inpartition +network +system +interest +spare +time +spare +time +soccer +resum +bookmark +last +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..ede91d71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,22 @@ +weitsang +homepag +lwhere +fromwhat +watchwhat +movi +likec +page +wrotepictur +drawa +window +motifcomput +theoryhom +page +vimi +tsearch +webcoolest +sitessharewar +archivem +newspap +onlineunivers +site diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..48d7bff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,99 @@ +hung +hung +glavin +address +mapl +avenu +ithaca +telephon +photo +academ +background +graduat +nation +taiwan +univers +comput +scienc +plan +graduat +cornel +univers +comput +scienc +habit +sport +basketbal +billiard +tabl +tenni +bowl +tenni +swim +volleybal +other +sing +drive +danc +except +studi +favorit +team +orlando +magic +atlanta +brave +favorit +player +anferne +hardawai +glavin +technic +skill +understand +distribut +oper +system +comput +graphic +multimedia +comput +network +databas +system +comput +vision +financi +calcul +extens +window +java +program +multimedia +final +project +paper +warp +morph +rivl +partial +result +meng +project +webpaint +job +interest +market +relat +field +comput +scienc +softwar +develop +resum +page +still +construct +email +whkao +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..fcd94c91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,191 @@ +william +visitor +william +jersei +exit +minut +awai +princeton +master +student +comput +scienceat +cornel +univers +degre +comput +engineeringand +mathemat +comput +sciencefrom +carneig +mellon +univers +didresearch +project +engin +design +research +center +robot +institu +spent +year +write +oper +system +xsro +hpux +motorola +atft +lauderdal +florida +besid +sleep +school +work +project +like +optim +parallel +mpeg +encod +cornellopoli +network +comput +databas +sector +analysi +research +partner +system +compet +usta +tenni +tournment +south +florida +could +never +somehow +parti +enjoi +weekli +match +mani +beauti +place +plai +tenni +south +florida +faviorit +on +boca +ratonkei +west +get +coral +spring +live +collect +piano +concerto +beethoven +chopin +gershwin +liszt +mendelssohn +mozart +rachmaninoff +ravel +tchaikovski +also +collect +violinconcerto +probabl +guess +concerto +type +even +wrote +graduat +school +applic +essai +base +piano +concerto +probabl +reason +reject +school +fall +classesc +softwar +engin +technolog +techniquec +formal +methodsc +multimedia +systemsc +comput +scienc +colloquiumc +cool +softwar +tool +seminar +purifi +quantifi +wart +present +optim +parallel +mpeg +encod +research +spring +classesc +practic +distribut +computingc +practicum +distribut +comput +cornellopoli +high +perform +system +network +computerc +optim +parallel +mpeg +encod +research +thrive +inform +revolut +databas +sectorcool +links_leap +copi +frogski +serverident +crisi +testweath +undergroundinktomi +search +enginequest +week +archiveslast +updat +campu +address +mapl +ecithaca +york +wwlee +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..51b67b24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,73 @@ +welcom +xichun +jennif +home +page +welcom +xichun +jennif +upson +hall +depart +comput +sciencecornel +universityithaca +offic +home +xichun +cornel +edui +current +master +engin +student +comput +scienc +atcornel +receiv +bachelor +master +degre +depart +comput +scienc +zhejiang +univers +hangzhou +zhejiang +china +site +javaworldsunhigh +school +alumni +zhejiang +univers +alumnimeng +project +phong +shade +gouraud +shade +current +cours +spring +comput +graphic +high +capac +inform +network +databas +manag +cours +taken +fall +multimedia +systemsc +softwar +engineeringc +oper +systeme +comput +network +communicationby diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..ce38e397 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,2 @@ +topic +interest diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..5303f48c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,133 @@ +cachet +relat +project +effici +increment +computationderiv +increment +programsa +gener +systemat +transform +approach +improv +effici +comput +themeprogram +analysi +transform +increment +cachet +increment +attribut +base +interact +system +usessystemat +program +analysi +transform +techniqu +deriveincrement +program +written +function +languag +select +public +teitelbaum +systemat +deriv +increment +program +scienc +comput +program +februari +teitelbaum +cach +intermedi +result +program +improv +proceed +sigplan +symposium +partial +evalu +semant +base +program +manipul +page +jolla +california +june +stoller +teitelbaum +discov +auxiliari +inform +increment +comput +proceed +annual +sigplan +sigact +symposium +principl +program +languag +petersburg +beach +florida +januari +cachet +interact +increment +attribut +base +program +transform +system +deriv +increment +program +proceed +knowledg +base +softwar +engin +confer +boston +massachusett +novemb +ieee +comput +societi +press +principl +strength +reduct +juli +peoplei +anni +liutim +teitelbaumkeyword +increment +comput +increment +program +effici +improv +optim +program +analysi +program +transform +cacheti +anni +yanhong +cornel +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..bc622167 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,522 @@ +yanhong +anni +home +pageyanhong +anni +post +doctor +associ +work +professor +teitelbaum +research +interest +gener +systemat +approach +improv +effici +ofcomput +program +analysi +transform +techniqu +forincrement +comput +parallel +concurr +comput +applic +optim +compil +languag +base +interactivesystem +algorithm +design +program +develop +softwar +systemorgan +softwar +mainten +select +public +talksph +dissert +yanhong +increment +comput +semant +basedsystemat +transform +approach +cornel +univers +ithaca +york +januari +also +appear +cornel +technic +report +octob +abstractjourn +public +teitelbaum +systemat +deriv +increment +program +scienc +comput +program +februari +refere +confer +public +stoller +teitelbaum +discov +auxiliari +inform +increment +comput +proceed +annual +sigplan +sigact +symposium +principl +program +languag +page +petersburg +beach +florida +januari +cachet +interact +increment +attribut +base +program +transform +system +deriv +increment +program +proceed +knowledg +base +softwar +engin +confer +page +boston +massachusett +novemb +ieee +comput +societi +press +select +cach +intermedi +result +increment +comput +proceed +intern +confer +young +comput +scientist +page +beij +china +juli +peke +univers +press +teitelbaum +cach +intermedi +result +program +improv +proceed +sigplan +symposium +partial +evalu +semant +base +program +manipul +page +jolla +california +june +deriv +increment +program +proceed +intern +confer +young +comput +scientist +beij +china +juli +tsinghua +univers +press +zhang +wang +formal +uncertainti +reason +model +combin +qualit +partit +quantit +descript +multi +factor +combin +problem +proceed +intern +fuzzi +system +associ +world +congress +seattl +washington +august +zhang +wang +quantit +qualit +inexact +reason +multi +factor +combin +problem +proceed +intern +symposium +young +comput +profession +beij +china +august +publish +hous +survei +map +technic +report +teitelbaum +increment +comput +transform +softwar +develop +technic +report +depart +comput +scienc +cornel +univers +ithaca +york +march +teitelbaum +cach +intermedi +result +program +improv +technic +report +depart +comput +scienc +cornel +univers +ithaca +york +march +teitelbaum +systemat +deriv +increment +program +technic +report +depart +comput +scienc +cornel +univers +ithaca +york +august +teitelbaum +deriv +increment +program +technic +report +depart +comput +scienc +cornel +univers +ithaca +york +septemb +revis +octob +wakayama +increment +line +break +algorithm +technic +report +xerox +webster +research +center +webster +york +august +talk +discov +auxiliari +inform +increment +comput +annual +sigplan +sigact +symposium +principl +program +languag +petersburg +beach +florida +januari +cachet +system +deriv +increment +program +knowledg +base +softwar +engin +confer +boston +massachusett +novemb +select +cach +intermedi +result +increment +comput +intern +confer +young +comput +scientist +beij +china +juli +cach +intermedi +result +program +improv +symposium +partial +evalu +semant +base +program +manipul +jolla +california +june +systemat +deriv +increment +program +kestrel +institut +palo +alto +california +juli +systemat +deriv +increment +program +dagstuhl +seminar +increment +comput +dynam +algorithm +intern +confer +research +center +comput +scienc +schloss +dagstuhl +germani +deriv +increment +program +intern +confer +young +comput +scientist +beij +china +juli +automat +deriv +increment +program +system +scienc +laboratori +xerox +webster +research +center +webster +york +juli +softwar +system +document +cachet +increment +attribut +base +interact +system +us +systemat +program +analysi +transform +techniqu +obtain +effici +increment +program +depart +comput +scienc +cornel +univers +present +oggeb +expert +system +evalu +gener +basin +principl +report +implement +techniqu +test +report +usag +manual +expert +knowledg +summari +research +institut +explor +develop +scienc +ri +tshinghua +univers +beij +author +song +huang +zhang +wang +current +project +deriv +increment +program +gener +systemat +transform +approach +improv +effici +comput +compos +effici +program +optim +select +techniqu +build +effici +program +compon +depart +comput +scienc +upson +hallcornel +universityithaca +offic +home +yanhong +cornel +last +updat +novemb +move +august +tocomput +scienc +department +lindlei +hallindiana +universitybloomington +offic +home +indiana +eduhttp +indiana +peopl +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..aa663e0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,35 @@ +home +page +cheng +huangyi +cheng +huang +upson +hall +depart +comput +scienc +cornel +univers +ithaca +email +ychuang +cornel +edui +graduat +student +depart +comput +scienc +cornel +univers +favorit +link +onlin +documentscoursesprojectaccess +byvisitorslast +modifi +cheng +huang +ychuang +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..446b1e18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,172 @@ +chung +home +pagewelcom +everybodi +name +chungyou +thvisitor +sinc +april +studi +comput +scienc +master +degre +cornel +univers +pleas +check +school +master +engin +comput +sciencecornel +universitywher +origin +came +seoul +korea +graduat +high +school +korea +came +america +studi +school +went +korea +kindergarten +elementari +school +sang +myoung +junior +high +school +kang +junior +high +school +myoung +sung +women +high +school +came +america +degre +comput +scienc +georg +mason +universityin +virginia +happiest +thing +life +marri +april +wonder +husband +chang +work +forsaic +virginia +comput +programm +husband +pictur +would +like +beauti +moment +java +avail +browser +click +free +plai +keyboard +actual +like +plai +piano +better +keyboard +piano +korea +listen +music +kind +music +like +korean +music +love +shin +seung +hoon +moon +like +classic +music +forth +brows +world +wide +us +link +interest +java +search +want +us +search +engin +korean +onlin +newpap +hangook +ilbo +chosun +ilbo +joongang +ilbo +would +like +hire +resum +resum +word +perfect +version +meng +project +still +work +titl +imag +process +java +appletyoosun +person +infom +triphamm +sbithaca +phone +emerg +chang +email +ychung +cornel +email +forward +cornel +yooschung +automat +page +construct +last +modifi +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..00df3a4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,275 @@ +yaron +minski +home +page +yaron +minski +graduat +student +yminski +cornel +edudepart +computersci +upson +hall +ithaca +cornel +univers +phone +comstock +place +syracus +graduat +student +current +focus +onfault +toler +distribut +comput +particular +work +thetacoma +project +attempt +build +oper +system +support +forfault +toler +agent +base +comput +flapdragon +longer +year +longer +livether +still +great +veggi +coop +crash +often +nowadai +slightli +outof +date +webpag +howev +time +notic +flapdragon +anopen +start +need +place +live +highli +recommendit +though +plai +much +ancientchines +game +extremlysimpl +rule +complic +satisfi +strategi +like +learn +great +intro +page +also +want +plai +internet +know +nota +good +game +real +live +person +front +better +thannoth +unix +machin +take +look +cgoban +nicest +goboard +program +seen +make +trivial +easi +plai +thenet +newli +marri +wife +lisa +live +syracus +go +medic +school +suni +health +scienc +center +uniqu +qualifi +medic +school +within +hour +twenti +minutesof +cornel +love +everi +favoritepoem +lafiglia +piang +theidea +order +west +advicefor +good +love +resumesom +interestinglink +movi +critic +impress +good +exampl +fairli +simpl +technolog +us +great +effect +site +engin +take +rate +movi +compar +rate +other +come +recommend +found +almost +alarmingli +good +contrast +firefli +tri +thing +fail +miser +yahoo +yellow +page +home +address +give +direct +seven +closest +bakeri +perfect +better +yellow +page +tri +bigbook +bigyellow +know +advertis +york +time +know +great +read +free +plu +save +paper +note +take +much +time +download +text +page +slate +magazin +much +hate +admit +microsoft +someth +right +everi +thought +well +execut +expect +michael +kinslei +run +linux +great +compani +make +linux +easi +instal +maintain +ithaca +movi +list +amazon +book +solid +discount +virtual +bookstor +hope +local +booksel +town +brother +follow +public +begin +public +block +version +comment +process +mailcrypt +emac +interfacemqbtazgjohoaaaedalfhlgjmdg +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea +rbylf +zwqujcioczoecv +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc +gkgarsokrinnoazihja +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv +wumjgzsnvispwkrvzgdrojswmc +eigsqsb +bsbpw +jcwz +public +block diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..3b936383 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,63 @@ +yuichi +tsuchimoto +home +pageyuichi +tsuchimoto +home +pagecours +workfal +current +semest +system +program +oper +systemsc +program +languag +softwar +engineeringspr +introduct +compil +translatorsc +practicum +compil +translatorsc +machin +visionfal +foundat +artifici +intelligencec +practicum +artifici +intelligencec +introduct +theori +computingi +current +look +unit +state +resum +format +postscript +format +yuichi +tsuchimoto +address +yuichi +cornel +eduhttp +cornel +info +peopl +yuichi +last +modif +novemb +http +cornel +info +peopl +yuichi +welcom +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..d88622c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,539 @@ +cornel +activ +messagescornel +activ +messag +cornel +activ +messag +implementationsact +messag +neta +sourc +code +releas +activ +messag +part +thegener +releas +conform +spec +moreinform +project +page +activ +messag +object +codereleas +avail +conform +thegam +spec +pleas +read +readm +instal +file +distribut +instructionson +us +inform +contact +chao +chang +grzegorz +czajkowski +thorsten +eicken +pleas +read +releasenot +fileto +find +chang +previou +version +currentvers +also +releas +major +differencebetween +releas +modifi +libmpci +includ +thedistribut +pleas +read +document +packag +fordetail +interest +know +current +us +pleaseclick +send +briefnot +let +know +someth +organ +theus +intend +select +public +activ +messageslow +latenc +commun +ibmrisc +system +chao +chang +grzegorz +czajkowski +chri +hawblitzel +thorsten +eicken +appear +ieeesupercomput +pittsburgh +novemb +abstract +power +commerci +mpp +spiteof +fast +processor +high +network +bandwidth +scommun +latenc +inferior +older +machin +tmccm +meiko +paper +investig +activ +messag +commun +primit +altern +standardmessag +pass +order +reduc +commun +overhead +tooffer +good +build +block +higher +layer +softwar +first +part +paper +describ +implement +activemessag +layer +directli +networkadapt +compar +bandwidth +overhead +yieldsa +round +trip +latenc +lower +secondpart +paper +demonstr +power +communicationsubstr +layer +split +well +split +cbenchmark +us +compar +mpp +show +lowmessag +overhead +high +throughput +compens +high +networklat +implement +base +freeli +availablempich +version +achiev +perform +equival +onth +benchmark +design +perform +activ +messag +chao +chang +grzegorz +czajkowski +thorsten +eicken +cornel +technic +report +februari +abstract +technic +report +describ +design +implement +andevalu +activ +messag +implementationbenchmark +us +standard +network +adapt +firmwar +butdo +softwar +power +processor +assumefamiliar +concept +underli +activ +messag +mainperform +characterist +word +messag +round +trip +timeof +asymptot +network +bandwidth +present +select +implement +detail +paper +focuseson +detail +perform +analysi +includ +comparison +smessag +pass +layer +split +benchmark +gener +activ +messag +specif +version +gener +activ +messag +specif +version +defin +activemessag +interfac +portabl +across +varieti +parallel +machin +implement +avail +theu +cluster +themeiko +thehpam +fddi +ring +theparagon +thesp +latenc +commun +networksus +activ +messag +thorsten +eicken +veena +avula +anyndia +basu +vineet +buch +present +interconnect +palo +alto +abridg +version +paper +appear +ieee +micro +magazin +slide +interconnect +talk +abstract +recent +develop +commun +architectur +forparallel +machin +made +signific +progress +reduc +thecommun +overhead +latenc +order +magnitud +ascompar +earlier +propos +paper +examin +whether +thesetechniqu +carri +cluster +workstat +connect +anatm +network +even +though +cluster +standard +oper +systemsoftwar +equip +network +interfac +optim +streamcommun +allow +direct +protect +user +level +access +thenetwork +network +without +reliabl +transmiss +flowcontrol +first +part +paper +describ +differ +incommun +characterist +cluster +workstat +builtfrom +standard +hardwar +softwar +compon +state +artmultiprocessor +lack +flow +control +oper +systemcoordin +affect +commun +layer +design +significantli +andrequir +larger +buffer +multiprocessor +secondpart +evalu +prototyp +implement +latenc +activemessag +commun +model +workstat +clusterinterconnect +network +measur +showappl +applic +latenc +microsecond +smallmessag +roughli +compar +activ +messagesimplement +think +machin +multiprocessor +activ +messag +mechan +integr +commun +andcomput +eicken +culler +goldstein +schauser +proceed +symp +comput +architectur +gold +coast +australia +abstractth +design +challeng +larg +scale +multiprocessor +tominim +commun +overhead +allow +commun +overlapcomput +coordin +without +sacrificingprocessor +cost +perform +show +exist +messag +passingmultiprocessor +unnecessarili +high +commun +cost +researchprototyp +messag +driven +machin +demonstr +communicationoverhead +poor +processor +cost +perform +introduc +simplecommun +mechan +activ +messag +show +isintrins +architectur +allow +cost +effect +thehardwar +offer +tremend +flexibl +implement +ncube +describ +evalu +us +split +phase +share +memoryextens +split +show +activ +messagesar +suffici +implement +dynam +schedul +languag +forwhich +messag +driven +machin +design +mechan +latenc +toler +becom +program +compil +concern +hardwaresupport +activ +messag +desir +outlin +rang +ofenhanc +mainstream +processor +activ +messag +efficientcommun +architectur +multiprocessor +eicken +thesi +novemb +univers +california +berkelei +project +sitesact +messagesin +berkelei +projectfor +inform +contactthorsten +eicken diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..b2574e20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,300 @@ +horu +distribut +comput +environmenthoru +distribut +comput +environ +kenneth +birman +cornel +robbert +reness +cornel +cornel +shoru +effort +develop +program +environ +reliabledistribut +comput +last +year +horu +us +demonstrategroupwar +fault +toler +high +perform +network +foundto +offer +higher +perform +similar +system +novel +featur +ofhoru +flexibl +softwar +architectur +applic +featur +support +virtual +synchronousprocess +group +technolog +develop +prior +work +isi +toolkit +becom +signific +commerci +success +horu +also +offersa +fault +toler +securityand +privaci +technolog +view +importantresearch +advanc +extend +horu +provid +extrem +latenc +high +performancer +time +capabl +approach +combin +element +commun +technolog +calledact +messageswith +multi +media +playbacksystem +calledcontinu +media +year +expect +demonstr +high +speed +interact +applic +remot +multimediaserv +might +us +remot +telemedicin +applic +videoon +demand +system +retain +exist +fault +toler +andsecur +option +horu +virtual +synchroni +program +model +prior +work +isi +creat +substanti +user +base +expectrapid +uptak +horu +within +commun +matur +isi +user +spana +wide +rang +industri +includ +telecommun +system +financialtrad +system +stock +market +autom +factori +floor +process +control +fordiscret +electron +compon +manufactur +traffic +control +space +basedcommun +system +manag +control +applic +isi +beingexplor +sever +branch +militari +well +othernon +militari +govern +branch +among +visibl +militari +effort +isth +naval +hiper +project +explor +isi +systemthat +prototyp +technolog +futur +enhanc +aegi +battleradar +system +demand +applic +user +base +would +benefitfrom +access +horu +initi +plan +make +possibl +migrateisi +applic +horu +chang +thu +benefit +communityin +direct +technolog +transit +occur +licens +agreementswith +isi +distribut +system +subsidiari +stratu +comput +howev +cornel +work +also +avail +research +isdescrib +detail +public +program +manual +look +futur +hope +mixtur +isi +horu +technologieswil +permit +develop +demand +applic +beseen +next +gener +groupwar +plan +system +illustr +belowshow +applic +militari +mission +control +plan +system +integr +data +varieti +space +ground +resourc +andus +coordin +action +variou +theatr +asset +system +thissort +demand +utmost +perform +reliabl +secur +whilealso +toler +failur +rapidli +reconfigur +respond +chang +demand +success +project +thu +impact +wide +rang +civilianand +militari +technolog +effort +dept +comput +scienc +cornel +univers +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..15a21ef9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,107 @@ +cornel +medianet +projectmedianet +high +perform +platform +network +media +process +medianet +combin +technolog +develop +research +cornel +todevelop +flexibl +high +perform +testb +store +transport +process +us +multimedia +data +medianet +combin +user +level +network +architectur +user +level +access +network +dramat +improv +perform +facilit +develop +commun +protocolsth +order +magnitud +improv +communicationmak +parallel +comput +workstat +cluster +practic +horu +group +communicationprimit +multimedia +adapt +industri +strength +group +commun +tool +horu +multimedia +applic +secur +reliabl +group +commun +primit +critic +foradvanc +militari +commerci +multimedia +applic +toolkit +approach +reliabl +distribut +audio +video +applic +portabl +toolkit +build +applic +includeaudio +video +facilit +rapid +prototyp +multimediaappl +fund +project +provid +contract +fromth +darpa +inform +technologyofficefor +inform +contact +thorstenvon +eicken +brian +smith diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..45f14001 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,49 @@ +cornel +nuprl +autom +reason +project +nuprl +project +introduct +nuprl +nuprl +theori +theorem +browser +design +written +vaughn +nuprl +theori +articl +nuprl +theori +articl +nuprl +user +document +relat +public +link +class +note +nuprl +linux +announc +suggest +feedback +help +nuprl +browser +main +index +nuprl +project +nuprl +cornel +curiou +mani +link +page +askaltavista diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..3d99a03d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,358 @@ +cuc +comput +scienc +us +cuc +machin +call +granita +eight +node +granitathrough +granita +cuc +login +granita +granita +design +asinteract +node +shell +instal +tcsh +bash +experi +problemsdur +first +login +remov +oper +system +specif +stufffrom +shell +configur +file +exampl +haveth +arch +command +unam +instead +file +readm +contain +informationabout +releas +us +addit +manyou +infoexplor +inform +commandsand +usag +machin +program +remot +displai +properli +type +info +parallel +job +neither +activemassag +split +info +peor +read +read +parallel +program +activ +messagesor +split +inform +hardwar +cornel +theori +center +homegrown +softwarein +gener +local +softwar +instal +besur +path +split +csplit +simpl +extens +forparallel +comput +provid +global +address +space +though +globalpoint +dereferenc +like +regular +pointer +split +phase +assign +statement +allow +programm +hide +latencyof +remot +access +overlap +comput +commun +exampl +makefil +found +split +bench +bench +work +split +sourc +setenv +user +shellsshould +execut +command +setenv +compil +split +program +creat +makefil +look +sampl +variou +directori +split +bench +bench +type +gmake +must +includ +make +split +makefil +split +program +asact +messag +program +us +scriptsloc +exampl +programfoo +processor +type +foodebug +split +debug +split +program +follow +step +need +done +includ +split +debug +insert +splitc_debug +first +statement +execut +aftersplitc_main +compil +program +describ +previou +section +follow +messag +node +commonli +ongranita +debug +split +enter +continu +hit +return +onto +node +want +debug +youwant +debug +master +node +open +shell +directori +program +sourc +locat +insid +file +am_run +thenattach +theth +proc +am_run +process +node +debug +return +node +comput +proce +attach +am_run +am_run +stop +andyou +breakpoint +look +stack +frame +activ +messagesact +messag +overhead +commun +layerthat +offer +high +perform +commun +mani +parallel +machin +nativ +activ +messag +layer +spam +avail +main +perform +characterist +word +round +triplat +asymptot +network +bandwidth +spam +librari +found +libspgam +aand +header +file +includ +beforerun +program +activ +messag +sourc +setenv +read +runningprgm +script +locat +also +mpimpi +popularmessag +pass +interfac +portabl +parallel +program +animplement +base +mpich +librari +run +overact +messag +header +file +locat +includ +librari +file +locat +easiest +compil +link +script +file +ampicc +whichi +built +ampicc +fooyou +also +compil +program +split +pleas +lookat +examplesin +directori +ampi +exampl +inform +program +exactli +likeordinari +activ +messag +program +sure +sourc +setenv +softwaresoftwar +avail +granita +granita +also +includ +tcsh +bash +fortran +xpdbx +matlab +softwar +instal +includ +emac +gmake +bison +replic +local +local +problemsif +experi +difficulti +pleas +contact +czar +grzegorz +czajkowski diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..1f9163de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,139 @@ +cornel +model +simul +project +home +page +cornel +model +simul +project +enorm +effort +current +expend +creat +scientificsoftwar +particularli +simul +physic +system +defin +oncomplex +geometri +us +advanc +comput +hardwar +thegoal +simlab +project +reduc +effort +bringingtogeth +technolog +geometr +model +symbolicmathemat +numer +analysi +compil +code +gener +andform +method +creat +tool +rais +semant +levelat +possibl +creat +scientif +softwar +overview +project +simlab +softwarepackag +select +research +activ +collabor +mathemat +environ +research +propos +postscript +version +guarante +qualiti +mesh +gener +microstoragearchitectur +weyl +computeralgebra +substrat +high +levelprogram +languag +synthes +scientif +softwar +thechain +algebra +topolog +program +languag +select +present +simlab +compon +thearpa +nist +madefast +collabor +design +manufactur +exercis +longer +version +direct +insystem +research +richard +zippel +present +ideason +system +research +proce +includ +brief +discuss +ofnon +contemporan +commun +microstorag +architectur +theus +program +transform +chainsprogram +languag +languag +comput +complextopolog +system +engin +model +numericalalgorithm +rick +palmer +peopl +select +public +report +paul +chew +chew +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..8e2d2b8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,76 @@ +cornel +split +ccornel +split +cornel +split +implementationssplit +neta +sourc +code +releas +split +prepar +isimpl +activ +messagesfor +inform +contact +thorsten +eicken +split +sourc +code +releas +ofsplit +split +distr +implementedon +spam +inform +contactchi +chao +chang +grzegorz +czajkowski +thorstenvon +eicken +split +share +memori +multiprocessorsa +sourc +code +releas +split +multiprocessor +runningsolari +prepar +inform +mattwelsh +inform +page +select +public +split +cparallel +program +split +culler +dusseau +goldstein +krishnamurthi +lumetta +eicken +yelick +proceed +supercomput +novemb +abstractproject +sitessplit +chome +page +berkelei +inform +contactthorsten +eicken diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..393e8093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,9 @@ +page +move +browser +redirect +second +http +cornel +default +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..1fa24588 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,345 @@ +cornel +csrvlcornel +robot +vision +laboratorywelcom +nich +cornel +robot +vision +laboratori +match +match +rrentli +develop +pleas +hard +question +comment +direct +cornel +thank +csrvl +cornel +comput +scienc +robot +vision +laboratori +locat +cornel +univers +ithaca +three +main +area +ofresearch +comput +vision +prof +daniel +huttenloch +multimedia +applic +comput +vision +prof +ramin +zabih +robot +includ +distribut +manipul +micro +electro +mechan +system +mem +prof +bruce +donald +pictor +tour +csrvl +current +projectsth +follow +project +activ +csrvl +supervis +byramin +zabih +automat +detect +andclassif +scene +break +digit +video +mpeg +browser +allowingscen +break +global +motion +base +queri +real +time +video +sourc +transmiss +full +frame +video +parallel +comput +onplatform +cornel +nynet +cluster +number +project +involv +high +perform +imag +applic +includ +parallel +implement +split +foru +symmetr +multiprocessor +list +potenti +master +sproject +maintain +justin +miller +work +done +unix +currentlyconsid +move +windowsnt +discuss +theissuesher +hope +move +support +microsoft +select +publicationsth +follow +list +select +paper +research +done +thecsrvl +mani +paper +avail +anonym +mani +public +cornel +robot +vision +laboratori +avail +cornel +tech +report +server +paper +avail +serverar +list +program +mobil +robot +scheme +donald +ree +proc +ieee +intern +confer +robot +automationnic +franc +complex +comput +homolog +type +triangul +donald +chang +revis +base +paper +ieee +symposium +foundat +comput +scienc +juan +octob +inform +invari +distribut +manipul +donald +jen +first +workshop +algorithm +foundat +robot +peter +boston +wilson +andj +latomb +inform +invari +robot +donald +revis +base +paper +submit +artifici +intellig +automat +sensor +configur +task +direct +plan +donald +brigg +proceed +ieee +intern +confer +robot +autom +diego +sensorless +manipul +us +massiv +parallel +microfabr +actuatorarrai +bhringer +donald +mihailovich +macdonald +proc +ieee +intern +confer +robot +autom +diego +theori +manipul +control +microfabr +actuat +arrai +bhringer +donald +mihailovich +macdonald +proceed +ieee +workshop +micro +electro +mechan +system +oiso +japan +januari +comput +approach +design +micromechan +hing +structur +extend +abstract +bhringer +proceed +siggraph +symposium +solid +model +applic +montral +quebc +canada +paper +list +technic +report +authorthes +list +gener +dynam +cornel +server +server +index +search +technic +report +author +titl +keyword +scott +cytacki +bruce +donald +associ +professor +pedro +felzenszwalb +daniel +huttenloch +associ +professor +ryan +lilien +michel +maharbiz +justin +miller +greg +pass +daniel +scharstein +aaron +stump +szewczyk +fernando +viton +justin +voskuhl +wayt +matt +welsh +greg +whelan +ramin +zabih +assist +professor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..1e180cb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,114 @@ +inform +captur +access +projectinform +captur +accessth +inform +captur +access +research +group +work +waysthat +comput +locat +inform +ever +increas +volum +ofonlin +data +determin +structur +extract +inform +forhuman +user +group +found +john +hopcroft +davisin +current +area +researchextract +structur +materi +onlin +document +thestructur +explicit +document +extractinginform +present +tabular +form +relat +databas +construct +summari +overview +collectionsof +text +construct +nationwid +librari +comput +sciencetechn +report +begun +digit +cornel +computersci +technic +report +collect +order +make +work +moreaccess +internet +collect +avail +server +addit +toit +util +gener +research +commun +thisdocu +collect +test +materi +research +inform +access +group +consist +cornel +research +dean +krafft +visitingscientist +jimdavi +well +number +graduat +undergradu +student +fall +project +activ +longer +jrdpublicationsjam +allan +informationag +build +hyperlink +proceed +confer +oninform +knowledg +manag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..da58a74f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,17 @@ +zeno +research +groupzeno +cornel +multimedia +research +group +peopl +mission +project +paper +softwar +multimedia +curricula +develop +potpourri +direct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..82c06f26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,118 @@ +home +page +home +pagewelcom +depart +issu +pictur +left +see +quit +date +page +frame +challeng +viewer +syosset +york +town +long +island +receiv +bachelor +scienc +degre +decemb +work +month +california +decid +come +back +fall +current +work +master +engin +degre +leav +land +come +back +ithaca +mayb +miss +season +rain +wind +snow +actual +enough +rain +santa +barbara +anywai +plan +graduat +current +work +meng +project +prof +ramin +zabih +cornel +robot +vision +csrvl +interest +topic +motion +video +segment +gener +video +process +paper +relat +research +area +link +compani +green +hill +softwar +santa +barbara +californialockhe +martin +control +system +binghamton +yorkaltera +corp +jose +californiafun +stuff +game +domainvth +babylon +siteoth +place +univers +worldcareermosaictop +site +student +page +email +kmai +cornel +cours +page +still +construct +last +modifi +januari +access +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..03f53ee8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,48 @@ +utc +home +pagegener +inform +faculti +recruit +depart +overview +research +group +relat +program +academ +inform +admiss +requir +cours +descript +catalog +depart +public +comput +facil +upcom +event +public +calendar +seminar +utc +talk +visitor +schedulespag +peopl +class +person +page +student +organ +alumni +link +find +peopl +faculti +staff +directoryth +universitywww +informationgrip +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..bd522a1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,97 @@ +woodrow +bledso +woodrow +bledsoepet +donnel +centenni +chair +emeritu +comput +system +professor +emeritu +mathemat +mathemat +univers +utah +salt +lake +citi +mathemat +univers +california +berkelei +honor +award +profession +servic +third +mileston +award +autom +theorem +prove +americanmathemat +societi +distinguish +servic +award +intern +jointconfer +presid +american +associ +artifici +intellig +board +truste +intern +joint +confer +artificialintellig +chair +board +truste +intern +joint +confer +onartifici +intellig +board +editor +intern +journal +artificialintellig +presentarea +interestautomat +theorem +prove +artifici +intellig +summari +researchmi +research +focus +autom +theorem +prove +automat +theoremproof +check +involv +heurist +higher +levelplan +well +exampl +analog +alsointerest +research +analog +learn +artificialintellig +previou +profil +index +next +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..b1880478 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,153 @@ +john +werth +john +werthsenior +lectur +research +scientist +mathemat +mathemat +emori +univers +mathemat +univers +washington +profession +servic +chair +educ +board +comput +research +associ +board +comput +scienc +accredit +board +vice +chair +educ +technic +committe +softwareengin +ieee +present +chair +area +interestparallel +program +softwar +engin +compil +computersci +educ +summari +researchmi +current +interest +program +environ +parallelprogram +associ +softwar +engin +compil +andimplement +issu +also +activ +set +direct +incomput +scienc +educ +local +nation +level +select +recent +publicationss +hyder +werth +brown +unifi +model +concurr +debug +proceed +intern +confer +parallel +process +ieee +comput +societi +august +werth +brown +sobek +newton +jain +interact +formal +practic +parallel +program +environ +develop +code +lectur +note +comput +scienc +york +springer +verlag +jain +werth +brown +schedul +parallel +oper +multipl +system +journal +parallel +distribut +comput +decemb +jain +werth +brown +gener +model +schedul +parallel +comput +applic +parallel +oper +proceed +intern +confer +parallel +process +august +werth +werth +direct +softwar +engin +educ +proceed +thirteenth +intern +confer +softwar +engin +previou +profil +index +next +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..642aaf90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,62 @@ +alfr +dale +alfr +daleno +person +page +trammel +crow +regent +professor +emeritu +comput +scienc +exet +colleg +oxford +england +univers +texa +austin +area +interestdatabas +manag +system +databas +architectur +summari +researchmi +area +interest +involv +applic +parallel +multi +stagei +architectur +databas +manag +problem +studiedinclud +data +distribut +strategi +distribut +index +andmap +relat +algebra +oper +architectur +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..41e8df0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,127 @@ +jeffrei +brumfield +jeffrei +brumfieldsenior +lectur +math +comput +scienc +mathemat +univers +georgia +comput +scienc +comput +scienc +purdu +univers +honor +award +colleg +natur +scienc +teach +excel +award +area +interestperform +analysi +distribut +system +oper +system +summari +researchi +interest +role +model +plai +comput +scienc +designersof +comput +system +mathemat +model +studi +performanceof +exist +propos +system +queue +network +model +eachresourc +comput +system +repres +queue +tasksawait +servic +solut +model +involv +computationof +respons +time +queue +length +throughput +select +recent +publicationsj +brumfield +shen +richter +graf +verdi +visual +environ +design +distribut +system +journal +ofparallel +distribut +comput +brumfield +miller +chou +perform +modelingof +distribut +object +orient +databas +system +intern +symposium +databas +parallel +distributedsystem +austin +texa +decemb +brumfield +concurr +program +modula +inproceed +sigcs +technic +symposium +loui +sigcs +bulletin +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..afffcb64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,164 @@ +alan +cline +alan +clinedavid +bruton +centenni +professor +comput +scienc +professor +mathemat +appli +mathemat +mathemat +mathemat +univers +michigan +profession +servic +editor +algorithm +commun +associ +editor +transact +mathemat +softwar +editori +board +siam +journal +scientif +statisticalcomput +director +special +interest +group +numer +mathemat +southern +region +director +comput +profession +socialrespons +area +interestmathemat +softwar +numer +analysi +summari +researchi +interest +transform +mathemat +tool +whichcan +appli +scientif +problem +involv +constructionof +mathemat +softwar +explor +methodolog +formathemat +softwar +particular +major +softwar +developmentha +packag +hundr +subprogram +curv +andsurfac +fit +emploi +tension +spline +select +recent +publicationsr +renka +cline +scatter +data +fit +us +constrain +delaunai +triangul +imac +transact +scientif +comput +expert +system +symbol +comput +north +holland +cline +king +meyer +rout +schedul +coast +guard +buoi +tender +interfac +cline +renka +constrain +dimension +triangul +solut +closest +node +problem +presenc +barrier +siam +journal +numer +analysi +cline +counter +exampl +three +condit +number +estim +siam +journal +scientif +statist +comput +cline +moler +stewart +wilkinson +estim +condit +number +matrix +siam +journal +numer +analysi +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..feddad34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,83 @@ +edsger +wybe +dijkstra +edsger +wybe +dijkstraschlumberg +centenni +chair +comput +sciencesprofessor +mathematicskandidaatsexamen +mathemat +physic +doctora +examen +theoret +physic +univers +leydenph +univers +amsterdamhonor +awardsacm +ture +award +foreign +honorari +member +american +academi +art +sciencesmemb +royal +netherland +academi +art +sciencesdistinguish +fellow +british +comput +societyafip +harri +good +memori +award +doctor +scienc +honori +causa +queen +univers +belfastarea +interest +program +correct +mathemat +methodolog +algorithm +systemssummari +research +area +interest +focus +streamlin +mathemat +argumentso +increas +power +reason +particular +ofform +techniqu +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..6b1dfcf1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,74 @@ +chri +edmondson +yurkanan +chri +edmondson +yurkananlectur +mathemat +comput +scienc +comput +scienc +univers +texa +austin +profession +servic +secretari +treasur +sigcomm +area +interestcomput +network +comput +scienc +educ +manag +larg +softwar +project +mobil +network +databas +design +summari +researchmi +research +interest +protocol +high +speed +commun +protocol +specif +internetwork +select +recent +public +cobb +edmondson +yurkanan +andm +gouda +univers +mobil +address +internet +inproceed +annual +comput +theori +informaticsconfer +press +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..b8d55bad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,66 @@ +suzi +gallagh +suzi +gallagherlectur +coordin +academ +program +secondari +educ +loyola +univers +comput +scienc +univers +southwestern +louisiana +profession +servic +sigcs +confer +committe +necc +confer +committe +confer +committe +area +interestcomput +scienc +educ +librari +inform +process +summari +researchmi +interest +area +student +servic +recruit +andretent +women +minor +improv +comput +scienceeduc +secondari +school +local +area +univers +system +retriev +techniqu +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..f1165024 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,173 @@ +jenevein +jenevein +senior +lectur +chemistri +chemistri +louisiana +state +univers +orlean +area +interestinterconnect +network +parallel +process +comput +architectur +summari +researchmi +research +comput +architectur +focus +interconnectionnetwork +success +failur +parallel +comput +system +restsin +abil +devis +appropri +cost +perform +interconnectionstructur +recent +work +interconnect +involv +thedevelop +wafer +scale +optic +interconnect +special +kindof +laser +wave +guid +design +beinginvestig +techniqu +lead +fault +toler +parallelsystem +wafer +optic +interconnect +beingappli +optic +system +buss +optic +communicationswitch +work +perform +processor +system +iscontinu +methodolog +measur +processor +performanceport +across +machin +develop +contrast +tobenchmark +repres +true +measur +processor +memorysystem +select +recent +publicationsr +jenevein +menez +kyklo +multicomput +network +interconnect +strategi +properti +applic +ieee +transact +comput +june +jenevein +laranjeira +malek +nest +nest +predic +scheme +fault +toler +ieee +transact +comput +press +jenevein +ullah +metrix +precis +methodolog +comput +system +perform +measur +proceed +intern +confer +comput +applic +industri +engin +decemb +jenevein +menez +johnson +malek +fault +impact +fault +toler +multiprocessor +interconnect +network +journal +qualiti +reliabl +engin +octob +jenevein +campbel +wafer +scale +optic +interconnect +prototyp +proceed +intern +confer +wafer +scale +integr +januari +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..05c29525 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,81 @@ +norman +martin +norman +martinprofessor +emeritu +comput +scienc +professor +emeritu +ofphilosophi +philosophi +univers +chicago +philosophi +univers +california +angel +area +interestmathemat +logic +comput +architectur +summari +researchmi +current +activ +concentr +abstract +structur +asinterpret +logic +theori +center +closur +space +whichexploit +notion +deduct +closur +logic +oper +andon +intension +model +classic +mathemat +significantearli +research +comput +architectur +logic +design +especi +missil +space +vehicl +applic +trackingalgorithm +track +scan +radar +function +complet +inmani +valu +delai +logic +logic +metatheori +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..f304aae5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,248 @@ +aloysiu +aloysiu +mokassoci +professorfaculti +fellow +comput +scienc +electr +engin +massachusett +institut +technolog +profession +servic +associ +editor +real +time +system +intern +journal +time +critic +comput +system +present +editori +board +intern +journal +formal +method +systemdesign +present +program +committe +intern +comput +symposium +taiwan +vice +chair +ieee +technic +committe +real +time +system +chair +ieee +technic +committe +real +time +system +present +work +group +real +time +program +intern +federationof +automat +control +presentarea +interestfault +toler +hard +real +time +system +system +architectur +comput +aid +system +design +tool +softwar +engin +summari +researchi +current +conduct +fundament +research +area +ofdistribut +real +time +system +primari +concern +includespecif +techniqu +real +time +system +algorithm +forguarante +stringent +time +constraint +understand +thetrad +robust +respons +time +time +criticalsystem +goal +develop +formal +framework +autom +theanalysi +synthesi +robust +real +time +system +applic +areasinclud +robot +control +system +avion +softwar +industrialprocess +control +system +fund +provid +offic +ofnav +research +develop +highli +autom +design +environ +forreal +time +system +select +recent +publicationsa +toward +mechan +real +time +system +design +foundat +real +time +comput +formal +specif +method +tilborg +kluwer +academ +publish +heitmey +labaw +clement +engin +case +tool +support +formal +method +real +time +softwar +develop +proceed +fifth +intern +workshop +comput +aid +softwar +engin +montreal +juli +wang +emerson +formal +specif +asynchron +distribut +real +time +system +aptl +proceed +intern +confer +softwar +engin +melbourn +load +adjust +adapt +real +time +system +proceed +real +time +system +symposium +antonio +decemb +wang +tsou +wang +aloysiu +brown +autom +analysi +bound +respons +time +nasa +expert +system +proceed +ieee +sigsoft +confer +orlean +decemb +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..c11c9236 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,90 @@ +hamilton +richard +hamilton +richard +senior +lecturerb +engin +appli +physic +harvard +collegem +aero +astronaut +engin +stanford +universityph +comput +scienc +iowa +state +universityprofession +servicecoordin +univers +texa +austin +year +program +seri +editor +vol +addison +weslei +area +interest +function +program +concurr +process +object +orient +program +undergradu +educationsummari +research +maintain +long +stand +interest +function +program +potentialfor +concurr +process +suitabl +formal +reason +infal +us +function +program +languag +teach +sectionof +work +time +permit +function +languag +implementationof +real +microcomput +applic +longer +term +project +book +onfunct +algorithm +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..e867e70a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,187 @@ +abraham +silberschatz +abraham +silberschatzprofessorship +comput +sciencesm +stoni +brookhonor +award +profession +serviceiee +comput +societi +outstand +paper +award +ieee +journal +paper +advisori +committe +nation +scienc +foundat +divis +inform +robot +intellig +system +gener +confer +chair +seventh +eighth +sigact +sigmod +symposiumon +principl +databas +system +pod +organ +ullman +invit +workshop +futureof +databas +system +research +program +chair +pod +ieee +symposium +parallel +distributedsystem +intern +confer +knowledg +manag +area +interest +databas +system +oper +system +distribut +system +knowledg +basedsystemssummari +research +main +area +special +concurr +process +recentresearch +concentr +area +multidatabas +transactionmanag +parallel +process +knowledg +base +system +real +time +databasesystem +multiresolut +databas +system +continu +media +storag +server +high +perform +transact +system +select +recent +publicationss +ganguli +silberschatz +tsur +map +datalog +programexecut +network +processor +ieee +transact +knowledgeand +data +engin +june +jagadish +lieuwen +rastogi +silberschatz +sudarshan +dali +high +perform +main +memori +storag +manag +internationalconfer +larg +databas +septemb +ozden +biliri +rastogi +silberschatz +cost +storageserv +movi +demand +databas +intern +confer +onveri +larg +databas +septemb +ozden +rastogi +silberschatz +framework +storageand +retriev +continu +media +data +ieee +intern +conferenceon +multimedia +comput +system +read +fussel +silberschatz +multi +resolut +relationaldata +model +intern +confer +larg +databas +august +addit +inform +obtain +fromindividu +faculti +member +home +page +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..b40c530c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,19 @@ +robert +simmon +robert +simmonsquinci +centenni +professor +emeritu +comput +scienc +professoremeritu +psychologymai +novemb +bledso +simmon +rememb +back +list +faculti +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..3606c838 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,59 @@ +adam +seligman +home +pageadam +seligman +home +page +click +log +gradual +student +austin +program +aweekli +happi +hour +depart +claim +fame +undergradu +thesi +specifiedth +type +rule +oper +semant +core +avail +gzip +fileor +gzip +postscript +file +knowwhat +think +touch +email +adam +utexa +call +page +email +pagemart +graphic +phone +number +read +progress +vrml +paper +new +junki +fromreut +yahoo +altern +could +check +nando +time diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..a15d5aa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,49 @@ +agapito +sustaita +agapito +sustaita +univers +texa +austincognit +scienc +interest +machin +learn +languag +acquisit +chill +specif +connection +commonsens +reasoningschoolingph +comput +scienc +univers +texa +austin +hopefulli +comput +scienc +texa +univers +colleg +station +comput +scienc +univers +california +santa +barbara +miscellaneouspost +addressth +univers +texa +austin +depart +comput +scienc +austin +mail +agapito +utexa +eduphon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..978cbcb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,116 @@ +artifici +intellig +laboratoryut +artifici +intellig +laboratoryth +artifici +intellig +laboratori +atth +univers +texa +austinha +distinguish +histori +larg +number +excel +faculti +andgradu +student +new +world +report +rank +program +nation +close +link +comput +scienc +depart +faculti +boyer +autom +theorem +prove +robert +causei +logic +philosoph +foundat +benjamin +kuiper +qualit +reason +vladimir +lifschitz +reason +action +risto +miikkulainen +neural +network +mirank +rule +base +system +moonei +machin +learn +gordon +novak +automat +program +physic +problem +solv +bruce +porter +multi +function +knowledg +base +emeritu +faculti +woodi +bledso +deceas +autom +theorem +prove +dream +aaai +presidenti +address +robert +simmon +memoriam +postdoc +peter +clark +souther +technic +report +softwar +directori +current +avail +technic +report +autom +theorem +prove +technic +report +kuiper +miikkulainen +moonei +novak +porterpoint +lab +fund +agenciescontact +novak +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..00c1b26e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,51 @@ +ajita +johnajita +john +candid +parallel +program +group +depart +comput +scienc +univers +texa +austin +hello +research +work +system +automat +parallel +programmingframework +base +constraint +compil +parallelprocedur +program +advisor +professor +brownemi +papersmi +work +us +translat +routin +code +parallel +program +system +want +contact +postal +comput +scienc +austin +austin +usavoic +main +offic +offic +taylor +ajohn +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..b39774d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,162 @@ +vicki +almstrum +utc +home +page +vicki +almstrumabout +educ +comput +scientist +interest +understand +peopl +learn +learn +particularli +interestedin +learn +mathemat +logic +formal +method +doctoralresearch +topic +limit +understand +mathematicallog +novic +comput +scienc +student +lectur +univers +texa +austin +addit +ispent +fall +semest +teach +uppsala +sweden +home +pagether +link +univers +interest +includ +encourag +other +excel +mathemat +computersci +garden +travel +craft +sew +woodwork +pictur +hubbi +torgni +stadler +check +site +itics +confer +integr +technolog +comput +scienc +educationjun +work +group +june +uppsala +swedenoth +page +maintain +class +teach +austin +technolog +camp +field +comput +scienc +educ +includ +research +method +evalu +mentor +issu +interest +jump +point +area +suffer +spurt +construct +frenzi +home +page +organ +belong +sigcs +special +interest +group +comput +scienc +educationsigsoft +special +interest +group +softwar +engineeringacm +associ +comput +machineryieeeth +institut +electr +electron +engineerscpsrcomput +profession +social +responsibilityconnect +home +page +austin +comput +scienc +austin +texa +elsewhereto +contact +offic +depart +comput +scienc +univers +texa +austin +austin +main +offic +direct +seldom +home +alwai +connect +need +forewarn +leav +plenti +time +email +address +almstrum +utexa +almstrum +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..85546ffe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,21 @@ +hung +hing +anthoni +pang +home +pagehung +hing +anthoni +pang +offic +offic +hour +mondai +wednesdai +email +anthoni +utexa +anthoni +inform +compil +cours diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..7d655c3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,39 @@ +aruna +homepag +aruna +addalacurr +graduat +studentth +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +educ +bachelor +engin +comput +sciencess +colleg +engineeringmysorework +experi +lectur +fall +fall +depart +studi +comput +sciencesunivers +mysoreindiai +come +mysor +cityindiato +contact +email +aruna +utexa +eduvoic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..2c388d42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,24 @@ +home +page +ashi +tarafdarashi +tarafdarabout +get +round +let +exist +known +patienc +pleas +contact +mepost +comput +scienc +austin +austin +usavoic +main +offic +offic +ashi +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..c7d179f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,82 @@ +roberto +bayardo +home +pageroberto +bayardo +candid +expect +complet +date +fall +depart +comput +scienc +univers +texa +austin +current +also +work +within +infosleuth +project +research +interest +queri +process +activ +expert +databas +system +data +mine +constraint +satisfactionmi +thesi +advisor +prof +daniel +mirank +research +paper +line +along +toolkit +generatingand +solv +exception +hard +instanc +contact +inform +mail +address +bayardo +utexa +campu +mail +address +univers +texa +austin +dept +comput +scienc +taylor +hall +austin +histori +comput +scienc +engin +electr +engin +comput +scienc +work +center +coordin +scienc +number +sinc +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..270270cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,48 @@ +bert +bert +kayresearch +reason +refin +imprecis +model +physic +process +overviewof +research +vitami +network +retriev +paper +dissert +entitl +refin +imprecis +model +behavior +abstract +stuffsonia +andnina +page +drink +ofth +month +springbank +scotchdrinksof +month +past +contact +informationemail +address +bert +utexa +offic +taylor +hall +address +depart +comput +scienc +univers +texa +austin +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..8e50d962 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,42 @@ +home +page +bhanu +welcom +bhanu +homepagethi +akhil +reddythank +visit +homepag +visitor +number +school +univers +texa +austinm +comput +scienc +third +semest +coursesc +multimedia +system +harrick +vinc +introduct +mathemat +logic +vladimir +lifschitz +datacommun +network +anitish +barua +comput +system +architectur +schwetmani +term +project +databas +manag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..685a3c27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,302 @@ +michael +bogomolnymichael +bogomolni +cogsci +advert +although +pictur +sometim +feel +read +articl +current +first +semest +comput +scienc +program +univers +texa +austin +physic +comput +scienc +amherst +colleg +research +interestsnot +intend +work +jenef +husman +risk +avers +decis +final +project +risk +avers +decis +quarter +outcom +coin +toss +would +probabl +accept +coin +toss +would +probabl +reject +peopl +reject +fair +bet +well +sound +econom +theori +involv +maxim +util +diminish +return +explain +howev +explain +peopl +ask +question +formul +risk +take +risk +prevent +manner +respond +differ +exampl +belov +tverski +kahneman +taken +almost +verbatimfrom +frame +decis +psycholog +choic +scienc +imagin +prepar +outbreak +unusu +diseas +expect +kill +peopl +altern +program +combat +diseas +beenpropos +assum +exact +scientif +estim +consequ +programsar +follow +problem +program +adopt +peopl +save +program +adopt +probabl +peopl +besav +probabl +peopl +save +problem +program +adopt +peopl +program +adopt +probabl +nobodi +probabl +peopl +program +would +favor +analog +digitalif +human +brain +made +neuron +neuron +fire +depend +level +electrochem +charg +built +axon +make +brain +analog +biolog +foundat +shaki +ahead +scream +hypothesi +wrong +comput +transfer +inform +particular +wire +either +high +voltag +interpret +make +comput +analog +well +accur +process +inaccur +outcom +sometim +simpli +come +wrong +answer +make +mistak +subtract +balanc +checkbook +rememb +invalid +telephon +number +mayb +gave +number +wasn +real +telephon +number +anoth +stori +nevertheless +would +hard +press +point +misfir +neuron +account +error +correct +process +lead +incorrect +result +human +cognit +process +sound +even +complet +remind +make +quot +italic +class +append +introduct +cours +graduat +comput +scienc +researchcognit +sciencearitifici +intelligencemathemat +logictopolog +ghrist +oper +system +cogsci +paper +symbol +differenti +puzzl +theorem +prover +contact +inform +email +bogo +utexa +better +send +postcard +phone +postal +address +wilshir +parkwai +austin +updat +informationthi +page +written +us +text +editor +last +updat +insert +empti +promis +construct +updat +soon +suppos +list +hidden +talentsdefinit +quantum +bogodynamicsdefinit +bogo +sortwhil +feel +free +look +bogos +bogomet +bogu +bogon +bogon +filter +bogon +flux +bogu +bogotifi +autobogotiphobia +blinkenlight +lasher +pleas +work +connect +stupid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..c33d3a03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,223 @@ +home +page +robert +boyerhom +page +robert +stephen +boyer +professor +comput +scienc +mathemat +philosophydepart +univers +texa +austinhow +reach +mepap +mail +boyer +comput +scienc +dept +univ +texa +austin +usaemail +boyer +utexa +edufax +physic +locationsclassescurriculum +vitaeperson +dataeducationpublicationshonorsjobsgradu +studentsth +boyer +moor +prover +also +knowna +nqthm +photo +recommend +read +project +vote +smart +webth +projectmi +view +undergradu +educ +comput +scienc +john +mccarthi +pageth +moffett +build +controversyni +time +articl +mccune +robbin +algebra +result +andsom +technic +detail +verif +float +point +divis +algorithm +microprocessor +wonder +softwar +licens +polici +permitsth +public +licens +close +zero +administrativeoverhead +short +cours +howthi +work +much +intellectu +properti +thegreat +book +variou +enumer +thereof +confess +acanon +thumper +possibl +end +tenur +universitiesstandard +disclaim +natur +noth +page +shouldb +taken +repres +offici +posit +univers +oftexa +austin +part +govern +state +oftexa +furthermor +steal +joke +peter +deutsch +aweb +page +govern +own +comput +taken +anind +endors +everyth +govern +formal +method +alwai +riski +peano +first +call +symbol +logic +introduc +instanc +symbol +mean +habitu +wrote +hislectur +note +symbol +teach +militaryacademi +time +student +incens +hisformalist +approach +mathemat +rebel +despit +hispromis +pass +fire +subsequ +found +amor +congeni +set +univers +turin +sincomplet +theorem +rudi +rucker +death +fundament +scienc +fundament +scienc +verg +extinct +said +harold +kroto +britain +sussex +univers +share +chemistrypr +robert +curl +richard +smallei +rice +univers +inhouston +discoveri +carbon +atom +bound +shape +asocc +ball +articl +scientist +lament +loss +fund +associ +press +decemb +daili +texan +upup +univers +texa +austin +comput +scienc +depart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..b70d37d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,287 @@ +jame +brown +jame +brownereg +chair +comput +scienc +professor +physic +professor +electr +comput +engin +hendrix +collegeph +univers +texa +austinhonor +award +fellow +british +comput +societi +fellow +american +physic +societyarea +interestparallel +comput +major +focu +parallel +program +high +level +specif +languag +integr +comput +sciencewith +applic +area +summari +researchi +work +parallel +program +tenyear +computation +orient +displai +environ +code +anabstract +declar +graphic +environ +parallel +program +evolv +three +gener +ongo +research +includesmethod +optim +parallel +comput +structur +highlevel +abstract +integr +parallel +structur +throughdata +partit +gener +data +flow +model +code +debug +graphic +visual +environ +compositionalapproach +parallel +program +addit +intelligenceprocess +control +parallel +program +comput +fluiddynam +also +work +design +develop +narrow +domaincompil +high +level +specif +languag +includ +logic +basedlanguag +robust +method +program +intellig +real +timedecis +system +select +recent +publicationsj +brown +hyder +dongarra +moor +newton +visual +program +debug +parallel +comput +ieee +parallel +distribut +technolog +spring +volum +number +compar +visual +parallel +program +environ +henc +code +brown +hyder +dongarra +moor +newton +visual +program +debug +parallel +comput +technic +report +dept +comput +scienc +univ +texa +austin +compar +visual +parallel +program +environ +henc +code +longer +version +paper +refer +brown +werth +interact +formal +andpract +develop +parallel +program +environ +code +parallel +program +system +proceed +fourthworkshop +languag +compil +parallel +comput +santacruz +california +august +brown +jain +werth +experiment +studi +theeffect +high +level +parallel +program +proceed +ofth +siam +confer +parallel +process +brown +mirank +parallelizingcompil +rule +base +program +proceed +intern +confer +parallel +process +august +hyder +werth +brown +unifi +model +concurr +debug +proceed +intern +confer +parallel +process +ieee +comput +societi +august +kleyn +brown +high +level +languag +specifi +graph +base +languag +program +environ +intern +confer +softwar +engin +baltimor +april +postscript +file +extend +version +paper +newton +brown +code +graphic +parallel +program +languag +proc +conf +supercomput +juli +paper +describ +prototyp +implement +code +notat +chang +idea +paper +remain +good +broad +introduct +code +brief +brown +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..dee06fdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,88 @@ +utc +home +page +vlsi +research +group +addressdepart +comput +scienc +univers +texa +austin +austin +peopl +group +supervis +prof +martin +wong +member +group +chang +chung +ping +chenyao +ping +chen +yung +ming +fang +depart +shashidhar +thakur +zhou +researchth +current +interest +group +wide +rang +area +vlsi +area +broadli +classifi +follow +fpga +placement +rout +fpga +architectur +partit +architectur +logic +synthesi +issu +high +perform +vlsi +abstract +recent +public +groupcan +found +trace +link +link +interest +sigda +special +interest +group +design +autom +ieee +institut +electr +electron +engin +inform +comment +inform +depart +austinclick +comment +mail +thakur +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..bbf08b4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,102 @@ +bill +canfieldhom +businessmi +resum +postscript +spring +give +report +softwar +highli +avail +distribut +system +class +slide +talk +effort +mach +implement +flaviu +cristian +distribut +algorithm +work +done +prof +built +work +guangtian +current +work +hardwar +verif +project +ther +divis +ti +research +professor +allen +emerson +pleasuredomest +bliss +depart +photo +wife +carla +newborn +daughter +ruth +clair +parenthood +struck +travel +beer +high +prioriti +somewher +li +enjoy +peel +beer +label +bottl +foreign +land +humor +variou +sourcesth +sofaspher +project +haiku +olestra +approv +substitut +speak +poetri +interest +women +disinform +dole +canfield +utexa +last +updat +april +thank +todd +peter +peterst +mail +utexa +mani +humor +link +andth +home +pictur +cool +page +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..3e1410ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,79 @@ +carruth +carruthpleas +send +mail +carruth +utexa +question +suggest +introduct +candid +austin +depart +computersci +supervis +professor +jayadev +misra +mydissert +topic +real +time +uniti +member +professor +misra +research +group +extend +uniti +theori +order +express +finit +time +boundson +usual +uniti +oper +progress +safeti +alsointerest +function +program +languag +partial +ordersemant +autom +theorem +prove +contact +inform +person +home +page +offic +address +offic +phone +home +phone +email +address +carruth +utexa +mail +carruth +depart +comput +scienc +taylor +hall +univers +texa +austin +austin +link +world +wide +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..b238ee00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,49 @@ +chung +ping +chen +chung +ping +chen +student +depart +comput +scienc +univers +texa +austin +fiance +meng +tsai +current +intel +summer +intern +work +bufferinsert +problem +syllabu +grade +polici +exam +schedul +homework +exercis +schedul +offic +hour +locat +new +utexa +class +fall +syllabustopicschung +ping +clen +last +updat +idea +improv +page +send +suggest +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..fcbe161e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,44 @@ +home +page +deji +chen +deji +chenabout +mehello +homepag +student +tongji +univers +shanghai +chinaa +bullet +list +easi +includ +well +first +item +anoth +third +anoth +paragraph +forget +paragraph +break +contact +mepost +comput +scienc +austin +austin +usahom +lake +austin +blvd +austin +usaphon +main +offic +offic +home +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..11a826b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,155 @@ +cliff +chaputcliff +chaputth +univers +texa +austindepart +comput +sciencestaylor +hall +austin +robotlab +home +dullchaput +utexa +cliff +comput +studi +northwestern +univers +gothimself +emploi +programm +anywai +spent +year +write +anemail +client +portabl +visual +object +librari +odesta +system +corpor +left +institut +thelearn +scienc +hewrot +educ +trane +softwar +macintosh +common +lisp +thenimpl +simul +environ +educ +high +school +studentscal +gamesproject +cliff +graduat +student +comput +scienc +program +austin +hang +robot +labannoi +peopl +hair +brain +scheme +mean +symbol +represent +artifici +life +program +cliff +sleep +dream +read +write +fiction +listen +farka +tour +medeski +martin +wood +watch +mstk +rerun +plai +korg +ride +bike +turnon +includ +breakfast +system +version +raspi +voic +starfleet +captain +turnoff +republican +microsoft +hangov +fave +site +current +eventsdaili +new +reutersintellicast +weatheraustin +txchicago +ilperiodicalssucksalonmirski +worst +webth +onionmacweekmacuserreferencehypertext +webster +interfaceyahooalta +vistacardiff +movi +databaselyco +road +mapalt +culturemacintosh +dataappl +computercyberdogquicktimequickdraw +dappl +supportmacintouchmacintosh +resourcecyberdog +poundinfo +archiv +rootcool +weird +stufffringewareth +actlabpbsnprnow +plai +mstk +catch +phrase +catalogpap +softwareth +rsumsymbol +emerg +symbol +groundingrobotmap +macintosh +peopledav +falooncharl +lewisjeff +lindjeff +sherwoodbrian +slatorsandi +stone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..f205ea3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,120 @@ +chuanjun +wang +homepag +welcom +chuanjun +wang +page +pictur +captur +gloriou +moment +came +earth +stun +detail +place +origin +come +orient +countri +call +china +check +page +know +hometown +hubei +provinc +china +graduat +student +tsinghua +univ +decid +time +chang +better +place +thought +texa +end +beautifulunivers +texa +austin +current +work +comput +scienc +take +break +read +page +enjoi +view +tower +opinion +nifti +thing +like +televis +surf +mind +numb +faceless +howev +find +brilliant +us +materi +inform +search +like +miner +search +diamond +among +million +rock +unemploi +internet +philosoph +well +person +diamond +look +real +unix +program +magazin +jump +dobb +journal +word +need +fresh +view +world +hard +check +fine +graphic +design +unusu +prose +cours +list +would +complet +without +link +find +pope +porsch +page +write +austin +return +depart +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..4b26f3c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,118 @@ +welcom +homepag +chin +tser +huang +last +updat +decemb +educ +june +degre +dept +comput +scienc +inform +engin +nation +taiwan +univers +taipei +taiwan +current +master +student +depart +comput +scienc +univers +texa +austin +research +interest +natur +languag +process +human +comput +interfac +network +distribut +systemsexperiencei +ever +work +chines +knowledg +inform +process +group +instituteof +inform +scienc +academia +sinica +research +assist +major +worki +design +system +capabl +word +segment +categori +tag +usinghidden +markov +model +improv +user +friendli +tool +allow +user +toexecut +line +proof +read +result +automat +tag +automatictag +system +reach +accuraci +improvedbecaus +continu +expans +train +data +person +interestsmovi +book +music +literatur +semiolog +basebal +basketbal +tabl +tenni +pinbal +favorit +siteschina +timesminsheng +dailyth +york +timesusa +todayth +economistth +atlant +monthlymak +contact +austin +texa +chuang +utexa +edufing +meyou +visitor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..52fc63bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,24 @@ +cilkcilkcilk +pronounc +silk +parallel +multithread +base +languageand +runtim +system +find +time +us +inform +inthi +page +check +thecilk +page +last +modifi +august +robert +blumoferdb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..e61eb8d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,36 @@ +chung +keung +poon +home +page +chung +keung +poondepart +comput +sciencesunivers +texa +austinaustin +offic +ckpoon +utexa +edumi +plan +hungri +fish +askvinc +gogan +pleas +thesi +complex +connect +problemsom +interest +site +theoret +comput +scienc +hong +kong +harmonica +high +school diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..8460ebbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,84 @@ +home +page +chung +wongchung +wonglast +modifi +graduat +student +thedepart +comput +scienc +univers +texa +austin +member +thenetwork +research +labwhich +head +byprof +simon +research +relat +link +java +secur +project +nist +comput +secur +divis +comput +secur +resourc +clearinghous +role +base +access +control +rbac +prof +rivest +cryptographi +secur +page +contact +meemail +ckwong +utexa +edupost +comput +scienc +austin +austin +usavoic +offic +dept +offic +link +hyde +park +baptist +church +chines +mission +hong +kong +student +associ +austin +linux +home +page +netbsd +project +freebsd +home +page +openbsd +project +send +email +tockwong +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..1b22ce29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,95 @@ +clanci +clancyresearch +qualit +reason +us +incomplet +knowledg +comput +descriptionof +possibl +behavior +dynam +system +complex +system +containinga +larg +number +variabl +constraint +simul +frequentlyi +intract +result +larg +incomprehens +behavior +descript +abstract +aggreg +techniqu +requir +simulationto +elimin +irrelev +detail +focu +simul +distinctionsof +interest +develop +abstract +aggreg +techniqu +whichaddress +problem +particular +interest +abstractiontechniqu +automat +appli +simul +thiswil +facilit +integr +qualit +simul +techniqu +withlarg +scale +knowledg +base +automat +model +build +followingtechniqu +develop +address +issu +vita +list +network +retriev +real +paper +contact +informationemail +address +clanci +utexa +offic +taylor +hall +address +depart +comput +scienc +univers +texa +austin +austin +finger +inform +hotlist +netscap diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..26481aa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,11 @@ +page +construct +jimbo +click +three +four +five +seven +eight +nine +eleven diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..6cac2183 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,51 @@ +chri +chuwelcom +chri +home +page +myselfmi +photo +student +phone +number +address +call +offic +home +mail +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +interest +link +austin +chines +campu +christian +fellowship +austin +chines +church +depart +comput +scienc +austin +austin +hong +kong +china +author +chri +chuemail +cnchu +utexa +edulast +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..754b6d21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,286 @@ +code +visual +parallel +program +systemmast +lawless +scienc +codeless +myriad +preced +wilder +singl +instanc +alfr +lord +tennysoncod +visual +parallel +program +system +allow +user +compos +sequentialprogram +parallel +parallel +program +direct +graph +wheredata +flow +arc +connect +node +repres +sequenti +program +thesequenti +program +written +languag +code +produc +parallelprogram +varieti +architectur +model +architectur +independ +click +screen +shot +tutori +code +system +produc +parallel +program +base +network +machin +well +sequent +symmetri +newest +version +releas +avail +support +crai +smp +announc +releas +version +code +avail +free +click +download +softwar +major +revis +code +click +screen +shot +featur +sophist +user +interfac +provid +mani +improv +previou +version +code +make +easier +pleasant +featur +includ +like +interfac +macdraw +multipl +window +subgraph +edit +hierarchi +browser +articl +code +hpcwire +line +journal +high +perform +comput +recent +publish +articl +code +entitl +visual +parallel +program +come +code +kind +enough +reproduc +code +tutori +line +provid +introduct +us +code +code +mail +list +current +prospect +code +user +notifi +releas +backend +join +mail +list +fill +form +download +code +also +ad +mail +list +first +name +last +name +mail +address +relat +softwar +xcodelib +compon +librari +system +code +document +publicationscod +tutori +line +construct +directori +compress +postscript +file +document +previou +version +code +made +avail +lieu +document +prepar +stage +despit +chang +user +interfac +manual +still +quit +us +code +user +manual +code +refer +manual +list +code +relat +public +includ +link +postscript +version +contact +informationfor +specif +comment +regard +code +send +mail +emeri +berger +emeri +utexa +send +snail +mail +group +member +address +depart +comput +scienc +univers +texa +austin +austin +research +groupgroup +leaderprofessor +jame +brown +affili +faculti +member +john +werth +project +manag +emeri +bergerstud +member +dwip +banerje +incorpor +dynam +data +partit +code +model +ajita +john +develop +program +system +base +constraint +automat +parallel +code +alumni +overview +announc +softwar +research +public +contact +code +home +page +emeri +utexa +last +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..aad25c17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,48 @@ +steve +correlstev +correlresearchph +student +work +multifunct +knowledg +base +group +addit +inform +current +construct +hotlist +search +site +search +page +search +email +address +search +public +search +tech +reportcontact +inform +mail +correl +utexa +offic +mail +comput +scienc +depart +univers +texa +austin +taylor +hall +austin +texa +home +address +austin +home +correl +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..9264b68b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,96 @@ +carlo +pucholcarlo +pucholresearch +interest +respons +real +time +reactiv +system +gener +formal +method +specif +implement +real +time +system +distribut +control +robot +esterel +synchron +program +languag +mawl +languag +applic +develop +check +utc +real +time +system +group +home +page +publicationsi +list +public +avail +forbrows +softwareth +tempest +toolset +packag +verifyingsafeti +properti +program +written +esterel +program +languag +wrote +half +linux +devic +driver +thequantavisionfram +grabber +part +thejoystickdevic +driver +contact +informationoffic +dreal +time +system +taylor +hall +offic +offic +univers +texa +austindepart +comput +sciencesaustin +utexa +home +austin +lot +phun +interestsmemb +theth +robot +group +check +group +page +latest +interesti +origin +fromgandia +inth +provinc +valencia +spain diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..46ecc818 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,106 @@ +utc +real +time +system +research +groupth +real +time +system +research +group +head +byprof +aloysiu +past +year +work +toward +lai +groundworkfor +establish +firm +theoret +foundat +real +time +systemsand +also +build +design +tool +base +foundat +work +canb +categor +three +area +follow +specif +model +precis +formul +real +time +properti +system +analysi +verif +reason +real +time +properti +synthesi +enforc +stringent +time +constraint +real +time +properti +project +real +time +logic +modechart +toolset +modechart +editor +modechart +verifi +modechart +simul +modechart +compil +timetool +scenario +languagepublicationsabstract +ofth +group +paper +availableonlin +postscript +current +member +deji +chen +carlo +puchol +doug +stuart +chung +tsou +guangtian +wang +yangalumni +paul +clement +chih +wang +farn +wang +supoj +suthandavibul +farnam +jahanian diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..86e2d8d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,174 @@ +cindi +thompsoncindi +thompsonmachin +learn +research +groupunivers +texa +austini +particip +candlelight +vigil +across +internetto +help +increas +awar +violenc +women +researchmi +current +research +interest +artifici +intellig +primarilyin +area +machin +learn +specif +interestedin +natur +languag +acquisit +learn +produc +deep +semanticrepresent +input +sentenc +would +us +mani +task +propos +corpu +base +lexic +acquisit +wrote +master +thesi +system +learn +rule +suitabl +diagnost +expert +system +also +interest +mobil +robot +exhibit +atrobofest +spring +semest +build +intellig +agent +finger +inform +pictur +vita +list +public +also +machin +learn +page +inform +group +educ +comput +scienc +univers +texa +austin +comput +scienc +north +carolina +state +univers +contact +inform +offic +taylor +hall +phone +email +address +cthomp +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +hotlist +start +point +internet +explor +misc +comput +scienc +consortium +lexic +research +collect +comput +scienc +bibliographi +project +collect +resourc +women +comput +comput +research +associ +robot +internet +resourc +page +artifici +intellig +repositori +knowledg +system +laboratori +home +page +georgia +tech +page +journal +artifici +intellig +research +associ +comput +linguist +home +page +folk +cognit +scienc +resourc +page +miscellan +stuff +wolv +truth +evalu +counsel +home +page +expand +horizon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..7713d29b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,21 @@ +home +page +xingshan +welcom +xingshan +home +page +browser +doesn +seem +support +frame +want +downloadth +latest +netscap +school +work +famili +friend +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..bbc4f56a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,88 @@ +mike +dahlin +mike +dahlingener +informationassist +professor +comput +scienc +univers +texa +austin +comput +scienc +berkelei +comput +scienc +berkelei +electr +engin +rice +univers +teachingfal +oper +systemsspr +advanc +comput +architectureeveryon +read +technic +classic +researchxf +serverless +network +file +systemweb +oper +systemsth +experiment +softwar +system +less +public +list +informationtechnolog +trend +pagethi +pagesummar +recent +technolog +trend +interest +operatingsystem +research +compter +architect +includinghistor +data +gather +price +capac +price +disk +memori +person +informationif +page +seem +bore +probabl +want +work +internet +root +page +link +world +email +dahlin +utexa +offic +taylor +hall +postal +taylor +hall +univers +texa +austinaustin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..bfa52ddc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,52 @@ +home +page +damani +howdi +pagal +dekho +student +busi +read +lazi +updateth +homepag +regularli +suffic +anyth +crazi +appeal +phrase +us +probabl +meant +research +activ +work +parallel +distribut +sytem +laboratori +vijai +garg +interest +distribut +system +network +public +follow +time +honor +tradit +feel +oblig +providesometh +servic +contact +mehom +guadulp +austin +offic +austinphon +dept +damani +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..3f0d99e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,73 @@ +dane +marshalldan +marshal +student +depart +comput +scienc +univers +texa +austin +research +multiresolut +render +system +autom +model +tree +real +time +system +global +illumin +electromechan +pinbal +machin +mainten +view +thelogist +equat +escap +attractor +view +complex +plane +main +area +research +make +nice +imag +contact +inform +work +address +appli +research +laboratori +austin +burnet +austin +texa +phone +email +address +dane +utexa +school +address +univers +texa +austin +depart +comput +scienc +austin +unrel +link +pinbal +pastur +jupit +probe +happi +station diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..750943ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,77 @@ +doug +stuart +welcom +pagedoug +stuart +home +pagewelcom +page +construct +page +bear +sure +number +oflinksto +interest +place +well +inform +aboutsport +scienc +fiction +booksin +gener +fewjok +testof +latexhtml +aweath +mapandcondit +austinandnew +orlean +guess +sort +us +person +archiv +amgraci +share +sure +perhap +link +process +provid +index +puttingit +simpl +keep +webbrows +provid +us +databas +browser +well +know +thisi +good +idea +go +save +someth +justa +easi +save +access +manner +stuffmom +click +comput +scienc +calendarlink +video +link +scienc +fictionbooksjokessportsfoodvideout +libraryresumelast +updat +dasdastuart +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..0a8b9512 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,4 @@ +doug +swhich +annoi +thisorthi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..65f7ca2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,94 @@ +dian +lawdian +student +depart +comput +scienc +univers +texa +austin +research +intereststh +symbol +ground +problemnavig +robot +agent +us +neural +network +evolv +theus +genet +algorithm +educ +comput +scienc +universityof +texa +austin +comput +scienc +universityof +texa +austin +spanish +literatur +washingtonst +univers +fine +art +washington +stateunivers +contact +inform +offic +taylor +hall +phone +email +address +dianelaw +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +local +link +utc +neural +network +homepag +home +page +utc +home +page +austin +home +page +genet +algorithm +link +gann +genet +algorithm +neural +network +illig +home +page +santa +institut +digest +archiv +univers +michigan +research +group diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..557af27e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,61 @@ +home +page +dionisi +papadopoulosdionisi +papadopoulosabout +graduat +student +depart +comput +scienc +undergradu +student +depart +comput +engin +informat +univers +patra +greec +also +work +comput +technolog +institut +patra +greec +member +softwar +engin +applic +reasearch +unit +contact +medionisi +papadopoulo +univers +texa +austin +comput +scienc +depart +austin +mail +dionisi +utexa +link +mine +monitor +databas +homework +panhellen +student +associationpanathinaiko +athlet +clubgreek +newshellen +resourc +networkeveryth +alwai +want +know diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..989c53a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,176 @@ +david +zuckermandavid +zuckermanassist +professor +comput +scienc +univers +texa +austin +contact +inform +offic +taylor +hall +email +address +utexa +postal +address +depart +comput +scienc +univers +texa +austin +austin +texa +finger +cours +fall +cryptographyresearch +intereststh +role +random +comput +complex +theori +expand +applic +random +walk +graph +cryptographi +paragraph +descript +well +inform +myprofil +annual +report +also +look +recent +public +asymptot +good +code +correct +insert +delet +transposit +soda +random +optim +sampl +extractor +construct +leader +elect +stoc +multipl +cover +time +random +structur +algorithm +appear +random +linear +space +jcss +preliminari +version +call +determinist +simul +logspac +stoc +simul +us +gener +weak +random +sourc +algorithmica +preliminari +version +foc +tight +analys +local +load +balanc +algorithm +stoc +derandom +graph +product +comput +complex +comput +weak +random +sourc +foc +revis +expand +beat +eigenvalu +bound +explicit +constructionand +applic +combinatorica +appear +utc +technic +report +preliminari +version +stoc +effici +construct +small +hit +setfor +combinatori +rectangl +high +dimens +combinatorica +appear +revis +preliminari +version +stoc +lower +bound +random +mutual +exclus +sicomp +appear +preliminari +version +stoc +unapproxim +version +complet +problem +sicomp +appear +preliminari +version +structur +complet +list +public +abstract +visit +page +sinc +april +last +modifi +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..d7aa80ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,100 @@ +multimedia +home +page +distribut +multimedia +comput +laboratori +univers +texa +austin +welcom +distribut +multimedia +comput +laboratori +main +object +research +investig +wide +rangeof +research +issu +area +multimedia +system +currentresearch +focus +design +multimedia +storag +server +network +transport +protocol +digit +audio +video +andmultiresolut +multimedia +databas +distribut +multimediacomput +laboratori +dmcl +part +departmentof +comput +scienc +univers +texa +austin +sponsor +research +work +carri +distribut +multimediacomput +laboratori +sponsor +variou +industri +federalinstitut +includ +foundat +intel +nation +scienc +foundat +nasa +microsoft +mitsubishi +electr +research +laboratori +merl +microsystemsinc +univers +texa +austin +tabl +content +research +agenda +paper +relev +technic +report +list +member +call +paper +would +like +hear +send +yourcom +suggest +multimedia +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..cf9f8611 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,114 @@ +batorydon +batorysoftwar +gener +improv +programm +product +reduc +mainten +cost +enhanc +applic +perform +student +investig +wai +realiz +practic +domain +specif +compon +base +design +methodolog +technolog +larg +scale +softwar +system +synthesi +span +topic +softwar +architectur +design +pattern +extens +languag +subject +domain +model +parameter +program +object +orient +program +framework +domain +current +interest +databas +manag +data +structur +avion +current +research +build +program +languag +support +softwar +gener +goal +jakarta +project +build +extens +preprocessor +java +languag +domain +specif +gener +would +encapsul +pluggabl +extens +jakarta +research +fund +darpa +microsoft +research +univers +texa +appli +research +laboratori +schlumberg +public +project +student +softwar +contact +inform +offic +taylor +hall +email +address +batori +utexa +phone +number +offic +postal +address +univers +texa +austin +depart +comput +scienc +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..5cf05efd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,49 @@ +home +page +dwip +banerje +photograph +dwip +banerjeeabout +methi +info +work +code +parallel +programminggroup +methodolog +includ +data +partit +graphicalparallel +program +system +paper +present +theintern +parallel +process +symposium +list +favorit +site +info +insert +know +contact +departmentpost +comput +scienc +austin +austin +usavoic +main +offic +offic +homepost +enfield +road +austin +usavoic +dwip +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..db4c4a6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,61 @@ +emilio +camahort +gurrea +emilio +camahort +gurrea +promis +set +decent +home +page +summer +mmmmm +multipl +complaint +meet +previou +home +page +deadlin +come +anoth +excus +know +siggraph +paper +finish +januari +thing +think +bout +right +make +promis +time +els +lose +credibl +left +first +item +anoth +third +anoth +paragraph +forget +paragraph +break +contact +mepost +comput +scienc +taylor +austin +austin +usavoic +main +offic +offic +ecamahor +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..6d25fdd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,38 @@ +posnak +posnak +graduat +student +comput +scienc +univers +texa +austin +interest +network +oper +system +support +multimedia +system +work +distribut +multimediacomput +laboratori +head +harrick +research +supervis +greg +lavend +isod +consortium +austin +base +view +research +summari +view +public +posnak +utexa +eduphon diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..7118c1f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,102 @@ +allen +emerson +allen +emersonbruton +centenni +professor +comput +scienc +depart +taylor +hall +univers +texa +austin +austin +texa +mail +emerson +utexa +phone +direct +secretari +area +research +interest +formal +method +comput +aid +verif +tempor +logic +automata +infinit +object +concurr +distribut +systemsselect +recent +publications +emerson +sistla +srinivasan +quantit +tempor +reason +journal +real +time +system +emerson +sadler +srinivasan +effici +tempor +satisfi +journal +logic +comput +emerson +real +time +calculu +real +time +theori +practic +bakker +york +springer +verlag +lectur +note +comput +scienc +emerson +jutla +tree +automata +calculu +determinaci +annual +ieee +symposium +foundat +comput +foc +juan +emerson +tempor +modal +logic +handbook +theoret +comput +scienc +leeuwen +elsevi +press +amsterdam +cambridg +mass diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..81fb6e3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,153 @@ +emeri +berger +home +pageemeri +berger +person +contact +info +mail +address +dept +comput +scienc +taylor +hall +univers +texa +austin +austin +phone +work +home +mail +emeri +utexa +work +system +analyst +parallel +program +groupi +system +analyst +parallel +program +research +group +austin +work +code +visual +parallel +program +system +inform +code +code +home +page +ticam +composit +materi +groupi +also +affili +ticam +work +composit +materi +group +inform +project +click +group +name +tool +search +lyco +databas +file +randomli +select +info +mirror +near +view +unix +page +utexa +user +academ +function +program +haskel +emeri +berger +uttr +abstract +program +languag +haskel +add +object +orient +function +us +concept +known +type +class +pure +function +program +framework +paper +describ +extens +analyz +accomplish +well +problem +compress +postscript +html +othermi +youngest +brother +doug +aspir +artist +page +code +graphic +doug +handiwork +linksth +code +visual +parallel +program +systemtexbook +textbook +exchangegrac +graduat +repres +associ +comput +scienc +emeri +utexa +last +updat +octob +believ +macintosh +check +http +evangelist +macaddict +join +evangelist +mail +list diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..ae41557c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,130 @@ +emma +home +page +emma +wuabout +myselfhi +welcom +emma +home +page +emma +chines +girl +come +august +studi +depart +comput +scienc +univers +texa +austin +interest +china +immedi +degre +comput +scienc +zhongshan +univers +becam +market +repres +inibm +china +compani +south +china +branch +try +deliv +solut +small +planet +costom +telecommun +media +industri +zhongshan +univers +would +surpris +find +manyalumni +enter +alumni +club +nice +thing +attend +graduat +school +austin +lot +intern +opportun +engin +student +semest +work +part +time +programm +nation +instrumentsinc +cours +schedul +spring +distribut +computingmanag +informationautomat +program +tool +baseyahoogalaxi +librari +onlin +universityyellow +page +mini +librari +introduct +us +fortran +tutori +infoleisur +timenewspagepeopl +dailyartstim +magazinechines +magazinepc +magazinec +visit +orlean +houston +antoniosan +franciscomarina +peac +citysan +jose +capit +silicon +vallei +love +francisco +contact +pointemail +emmawu +utexa +eduphon +mail +depart +comput +scienc +univers +texa +austin +austin +last +date +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..f13d21f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,37 @@ +home +page +emilio +remolinaemilio +remolinaabout +mehi +emilio +first +page +bullet +list +easi +includ +well +first +item +anoth +third +anoth +paragraph +forget +paragraph +break +curriculum +vita +contact +mepost +comput +scienc +austin +austin +usavoic +main +offic +offic +eremolin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..c560850e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,29 @@ +levent +sayfasi +welcom +home +page +levent +erkok +graduat +student +depart +comput +sciencesat +universityof +texa +austin +former +home +page +locat +inturkei +person +inform +reach +http +ceng +metu +erkokto +find +thank diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..82b44ad7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,63 @@ +esra +erdem +homepag +esra +erdem +esra +erdem +student +thedepart +comput +scienc +univers +texa +austin +esra +erdem +educ +comput +scienc +depart +comput +engin +andinform +scienc +bilkent +univers +turkei +area +interest +machin +learninginduct +logic +program +monoton +reason +topic +interest +cognit +sciencelearningreason +children +theori +mind +monoton +reason +commonsens +reasoningknowledg +representationemotionsphilosophi +mindcontact +inform +postal +depart +comput +scienc +univers +texa +austin +austin +voic +mail +esra +utexa +esra +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..284da02d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,77 @@ +carl +home +pagestephen +carlpardon +dust +current +student +work +toward +master +art +degre +comput +scienc +depart +univers +texa +thesi +describ +system +perform +syntact +extens +scheme +program +languag +wasn +alwai +student +life +myresum +believ +item +person +interest +planmi +resum +research +interestsa +psuedo +random +collect +linksth +carl +household +daili +dose +thing +world +wide +snow +pike +peak +houston +chronicl +interact +sport +worth +rice +athlet +rice +univers +march +bandget +touchpost +comput +scienc +austin +austin +usavoic +main +offic +know +offic +esteban +utexa +edureturn diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..d69a5b17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,83 @@ +tara +estlintara +estlinmachin +learn +research +groupth +univers +texa +austinresearchcontrol +knowledg +improv +perform +problem +solver +byguid +effici +accur +solut +researchinvolv +us +combin +analyt +induct +machinelearn +techniqu +acquir +control +inform +amparticularli +interest +us +method +improv +theperform +plan +schedul +system +inform +includ +detail +descript +myresearch +vita +list +public +also +check +machin +learn +research +group +page +educ +comput +scienc +univers +texa +austin +comput +scienc +tulan +univers +contact +inform +offic +taylor +hall +phone +email +address +estlin +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +estlin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..f1e89f26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,107 @@ +home +page +francoi +barbanson +utc +versionhom +francoi +barbanson +utc +versionthi +page +locat +directori +spool +user +francoi +francoisabout +mecurr +research +interest +black +forest +cake +central +market +genuin +find +real +pastri +fruit +mouss +austin +pack +groceri +well +swim +forthcom +trip +central +market +shed +lighton +interest +research +issu +central +market +stop +shop +food +women +current +research +interest +crawl +join +foreign +legion +todai +chines +wisdom +suggest +watch +plai +basketbal +hyogo +japan +check +tank +polic +action +atdominion +tank +polic +hqcheck +todai +dilberti +knew +databas +class +would +noth +troubl +mentionthat +parallel +comput +class +contact +mepost +francoi +barbanson +guadalup +street +suit +austin +texa +voic +theori +number +assum +machin +work +mail +francoi +utexa +edufrancoi +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..c6d5fac8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,88 @@ +donald +fussel +donald +fussel +trammel +crow +regent +professor +depart +comput +scienc +director +advanc +technolog +divis +inform +technolog +group +appli +research +laboratori +member +comput +engin +research +center +depart +electr +comput +engin +texa +institut +comput +appli +mathemat +univers +texa +austin +austin +phone +mail +fussel +utexa +eduinform +http +utexa +user +fussellb +mathemat +social +scienc +dartmouth +collegem +comput +scienc +univers +texa +dalla +area +interest +comput +architectur +comput +graphic +databas +system +design +autom +fault +toler +comput +cours +introduct +comput +graphic +comput +graphic +journal +public +confer +public +research +group +work +progress +current +former +student diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..1f9afee4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,37 @@ +ajit +georgemi +gener +useless +pagethi +gener +useless +page +go +youand +construct +someth +odd +goodthat +find +anyth +start +research +address +ajit +georg +wickersham +lane +austin +gajit +utexa +eduher +file +softwar +document +foundus +recent +file +david +last +updat +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..ab4325b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,28 @@ +geeta +arora +home +page +graduat +student +current +year +still +try +tofigur +research +undergrad +indian +institut +technolog +kanpur +india +contact +mehom +river +oak +medic +art +austin +phone +geeta +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..d3ab3e0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,75 @@ +gokul +home +page +final +receiv +countless +flame +gripe +theexcess +verbos +home +page +decid +thecollect +wish +mass +democrat +world +putonli +barest +minimum +adieu +outpour +critic +head +plakal +hag +hopey +sleep +easi +untroubl +conscienc +send +perfectli +good +home +page +untim +demis +actual +quit +want +kind +page +could +merit +vitriol +click +risk +mayb +comment +help +reinstat +earlier +page +signin +lesscrit +comment +click +contact +medic +art +austin +visitor +number +send +comment +suggest +critic +flame +gokul +utexa +last +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..669468d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,238 @@ +gooti +home +page +subramanyam +gooti +intro +past +present +like +futur +hideout +visitor +number +intro +welcom +home +page +subramanyam +gooti +bold +name +start +suggest +continu +read +know +hopefulli +wont +disappoint +gold +well +born +sept +somebodi +interest +hyderabad +place +andhra +pradesh +peopl +interest +geographi +southern +state +india +peopl +curiou +know +famili +school +join +osmania +univers +colleg +technolog +bachelor +chemic +engin +came +contact +peopl +vari +background +thought +made +friend +alwai +proud +call +batch +came +univers +nebraska +lincoln +great +gala +time +becam +addict +american +footbal +except +cold +winter +everi +thing +els +wasjust +great +studi +year +comput +scienc +transfer +univers +texa +austin +life +present +enrol +master +program +comput +scienc +depart +univers +texa +austin +austin +real +cool +place +hang +around +especi +like +weather +austin +also +made +friend +usual +love +acad +care +peopl +list +alphabet +order +abraham +gokul +kumar +mehul +neeraj +shantanu +shailesh +vipin +like +thing +like +best +keep +chat +friend +know +mani +know +also +like +make +friend +travel +around +plai +game +anoth +plai +game +likechess +question +carrom +board +racquet +ball +tenni +tabl +tenni +cricket +soccer +love +watch +game +like +read +book +definetli +text +book +want +check +horoscop +todai +check +compatabil +love +sign +also +like +listen +hindi +song +well +write +would +like +bore +also +narrow +option +like +golden +futur +goe +without +sai +control +destini +ever +success +life +cours +attribut +hardwork +power +good +thing +thing +happen +alwai +propos +dispos +pleas +spend +time +fill +valuabl +comment +guest +book +hide +medic +art +austin +gooti +utexa +finger +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..865a14c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,149 @@ +posit +statement +goudaacm +comput +surveysa +decemb +http +survei +goudanetwork +copyright +associ +comput +machineri +permiss +statement +citat +page +fornetwork +protocol +exact +specificationsand +pragmat +implementationsmoham +goudath +univers +texa +austin +depart +comput +sciencesaustin +texa +usagouda +utexa +http +utexa +user +utc +report +profil +gouda +htmlabstract +argu +studi +protocol +evolv +bridgeth +exact +specif +pragmat +implement +networkprotocol +gener +term +network +protocol +formal +specif +implementationsaddit +word +phrase +compil +softwar +tool +protocol +develop +methodologypubl +inform +citat +gouda +network +protocol +exact +specif +pragmat +implement +comput +survei +decemb +http +survei +goudanetwork +submiss +date +june +revis +date +octob +accept +date +octob +public +sourc +html +avail +permiss +make +digitalor +hard +copi +part +work +person +classroomus +grant +without +provid +copi +made +ordistribut +profit +commerci +advantag +copi +bearthi +notic +full +citat +first +page +copyright +forcompon +work +own +other +must +honor +abstract +credit +permit +copi +otherwis +torepublish +post +server +redistribut +list +requiresprior +specif +permiss +request +permiss +frompubl +dept +orpermiss +last +modifi +moham +goudagouda +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..a4cd3032 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,73 @@ +john +gunnel +john +gunnel +depart +comput +scienc +univers +texa +austin +gunnel +utexa +plapack +minut +transpos +case +assign +transpos +case +assign +connect +imag +report +except +guess +drank +depict +product +latter +report +author +collect +code +spars +matrix +computationsif +would +like +meet +best +friend +take +look +data +pageam +log +check +class +also +glimps +mysteri +land +hail +central +oregon +home +towni +less +redmond +doesn +much +home +page +look +talk +visitor +rememb +test +plan +file +long +bore +plan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..6424a322 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,44 @@ +frank +tropschuhfrank +tropschuh +gunther +utexa +schweiz +clayton +austin +waldhofstrass +rheinfelden +curriculum +vitaeenglishdeutschlinkscarnegi +mellon +univers +undergradu +student +school +comput +scienc +universitterlangen +nrnberg +junior +year +abroad +institut +mathematisch +maschinen +datenverarbeitung +depart +comput +scienc +oper +system +univers +texa +austin +graduat +student +depart +comput +scienc +frank +tropschuhgunth +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..b594a999 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,47 @@ +yongxiang +home +pagemerri +christmashappi +year +welcom +homepagegao +yongxiangsever +pointsto +contact +addresspictur +mine +ceremoni +grant +master +degre +chinadepart +comput +scienc +univers +texa +austin +austin +texa +gener +inform +name +yongxiang +male +birthdai +birth +place +huanan +jiangsu +china +hobbi +tabl +tenniseduc +background +juli +comput +softwar +univers +scienc +directori +servic +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..966d0302 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,30 @@ +home +page +zhang +zhang +schoolth +univers +texa +austin +comput +scienc +second +semestercoursesc +comput +languag +linc +distribut +comput +alvis +databas +manag +mirankerfil +term +project +databs +manag +queri +formthank +stop +gzhang +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..387d51b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,137 @@ +zhou +home +pagealan +zhou +headlin +new +year +ture +award +given +amir +pnueli +aprofessor +weizmann +institut +scienc +israel +comput +scienc +theoret +compuer +scienc +tsinghua +univers +prestig +institut +china +incompletelist +undergradu +classmat +kept +alex +zhao +current +student +depart +comput +scienc +univers +texa +austin +research +interest +focus +vlsi +find +mani +applic +mathemat +scienc +algorithm +design +analysi +combinatori +optim +comput +complex +even +mathematicallog +vlsi +researchgroup +head +prof +martin +wong +publicationshai +zhou +wong +optim +algorithm +forriv +rout +crosstalk +constraint +ieee +internationalconfer +comput +aid +design +jose +chen +zhou +wong +optimalnon +uniform +wire +size +elmor +delai +model +ieee +acmintern +confer +comput +aid +design +jose +studi +room +academ +refer +techniqu +refer +industri +directori +bulletin +live +room +period +chines +staff +movi +search +engin +internet +contact +inform +campu +depart +comput +sciencesunivers +texa +austintaylor +hall +austin +campu +staustin +voic +mail +haizhou +utexa +edulast +modifi +number +visit +homepag +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..42761d36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,58 @@ +welcom +home +page +construct +myselfnow +first +year +student +departmentof +comput +scienc +universityof +texa +austin +want +know +click +hear +educ +pre +dept +comput +scienc +univ +texa +austin +nation +softwar +engin +wuhan +univ +china +dept +comput +scienc +wuhan +univ +china +alumni +pal +wuhan +univers +alumnihom +page +china +home +pagecontact +austin +texa +wait +email +haosun +utexa +edunow +call +visitor +sinc +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..2a80a628 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,140 @@ +micheal +hewett +micheal +hewetthewett +utexa +educlick +fingerm +click +email +fourth +year +student +departmentof +comput +scienc +universityof +texa +austin +educ +comput +scienc +stanfordunivers +comput +scienc +universityof +kansa +electr +engin +universityof +kansa +mathemat +honor +washburnunivers +honor +first +place +intern +collegiateprogram +contest +first +place +nation +mathemat +competit +utc +comput +bowl +champion +sawada +ioanni +smaragdaki +thoma +wahlutc +comput +bowl +tower +hanoi +champion +lanc +tokudaut +intramur +volleybal +champion +faculti +grad +divis +fall +intramur +volleybal +champion +open +divis +summer +intramur +volleybal +champion +faculti +grad +divis +spring +intramur +volleybal +champion +open +divis +summer +intramur +volleybal +champion +club +divis +fall +finish +motorola +austin +marathon +hour +minut +finish +might +want +visit +myfavorit +page +locatem +learnabout +research +interest +view +downloadmi +public +learnmor +address +phone +number +call +offic +home +central +timefax +mail +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +author +micheal +hewettemail +hewett +utexa +edulast +updat +wednesdai +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..b49f41b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,239 @@ +hiep +nguyenhiep +nguyenabout +meabout +vietnames +american +born +came +unit +state +five +resid +texasfor +life +current +live +austin +texa +current +work +contract +programm +activ +seek +client +process +start +busi +providinghigh +internet +softwar +solut +product +rang +video +game +databas +current +work +detail +resum +link +hypertextresum +occup +current +gordon +novak +compil +class +educ +receiv +receiv +univers +texa +austin +softwar +packag +softwar +packag +havedevelop +year +resum +databas +onlin +resum +databas +natur +scienc +placement +center +address +http +utexa +con +nsplace +rexi +real +time +emptiv +oper +system +board +us +robot +research +gdraw +object +orient +cross +platform +graphic +librari +xwindow +postscript +legion +data +flow +languag +us +robot +control +flat +graphic +robot +simul +realist +specular +reflect +sonar +xgcl +xakcl +gunu +common +lisp +xwindow +function +interfac +akcl +gunu +common +lisp +standalon +packag +written +provid +function +packag +john +ousterhout +current +work +current +contract +theunivers +databas +work +specif +anonlin +resum +databas +access +student +prototyp +moredetail +researchwith +java +like +languag +allow +easi +build +andmaintain +network +program +port +netrek +java +explor +methodolog +port +larg +softwar +system +written +java +also +currentlyact +search +contract +expertis +internetsoftwar +solut +might +best +leverag +technic +interest +also +interest +graphic +game +program +especi +window +work +fast +textur +mappingroutin +anim +processor +us +assembl +interest +write +poetri +make +potteri +also +like +outdoor +activ +list +spot +robot +work +austin +robot +group +java +page +http +java +netrek +page +http +factoryx +factoryx +virtual +realiti +vrml +page +http +sdsc +vrmlto +contact +mepost +comput +scienc +austin +austin +usavoic +main +offic +offic +hiep +utexa +edulast +updat +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..532cca3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,98 @@ +huiqun +huiqun +home +page +nice +meet +student +depart +comput +scienc +univers +texa +austin +member +vlsi +design +research +group +guid +professor +martin +wong +inform +world +new +virtual +world +tour +austin +citi +collect +chines +site +sunris +stuff +internet +research +world +comput +societi +ieee +comput +giant +depart +search +tool +yahoo +infoseek +internet +directori +univers +onlin +career +center +career +mosaic +bookmark +entertain +languag +unix +book +java +java +book +perl +expect +rosett +refer +manual +rosett +program +exampl +contact +inform +mail +hqliu +utexa +phone +address +campu +depart +comput +scienc +taylor +univers +texa +austin +austin +home +page +last +modifi +comment +welcom +send +email +hqliu +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..5a7608d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,77 @@ +hudson +home +pagehudson +turnerphd +student +comput +scienc +colleg +natur +sciencesat +univers +texa +austin +advisor +vladimir +lifschitz +comput +scienc +expect +austin +thesi +titl +infer +rule +causal +represent +ofcommonsens +knowledg +action +msc +comput +scienc +austin +mli +librari +inform +scienc +austin +english +philosophi +austin +vita +postscript +avail +onlin +draft +dissert +also +avail +research +interestscommonsens +reason +actionlog +program +nonmonoton +reasoningmi +paper +avail +onlin +research +linkseuropean +colloquium +spatialand +tempor +reasoningto +contact +mepost +comput +scienc +austin +austin +usavoic +main +offic +offic +hudson +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..3138310a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,93 @@ +yanbin +zhang +welcom +yanbin +zhang +home +page +littl +cutti +allen +graduat +student +depart +comput +scienc +univers +texa +austin +graduat +current +seek +part +time +full +time +spring +cours +left +spring +full +time +resum +click +postscript +format +comput +world +ieee +onlin +career +center +compani +home +page +comput +compani +help +languag +internet +librari +webmuseum +world +travel +beauti +homeland +contact +mail +hyanbin +utexa +phone +home +offic +address +campu +depart +comput +scienc +tarlor +univers +texa +austin +austin +home +address +lake +austin +blvd +austin +number +visit +homepag +sinc +home +page +last +modifi +septemb +comment +welcom +send +email +hyanbin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..e9f90968 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,75 @@ +isaac +sheldon +isaac +sheldon +contact +inform +phone +mail +isheldon +utexa +inform +http +utexa +user +isheldon +profession +inform +current +graduat +student +univeristi +texa +austin +depart +comput +scienc +reciv +master +scienc +decemb +coursework +concentr +comput +graphic +reciev +undergradu +degre +comput +scienc +unives +mass +lowel +summer +intern +scientif +engin +softwar +small +austin +compani +creat +schlaeor +mellor +case +tool +project +construct +solid +geometri +us +bsptree +modular +trace +framework +butt +person +inform +babi +page +isaac +sheldon +isheldon +utexa +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..4f1f0287 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,49 @@ +john +adair +john +crinkum +crankum +homepag +live +compound +wife +holli +eileen +taylor +evan +jame +taylor +adair +rice +alumni +friend +live +includ +carl +white +also +internet +consult +matthew +mengerink +fish +fanat +work +dejanew +steve +traylen +get +doctor +book +email +jadair +utexa +back +graduat +student +page +back +home +page +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..4f905035 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,26 @@ +john +chamber +home +pagejohn +chamberssenior +oper +system +specialistb +physic +univers +texa +paso +comput +scienc +yale +universityph +oper +research +univers +texa +austin +research +paper +vita +link +mail diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..f01fb239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,261 @@ +bednarjim +bednar +candid +dept +comput +scienc +univers +texa +austin +comput +scienc +univers +texa +austin +austin +texa +philosophi +univers +texa +austin +austin +texa +electr +comput +engin +univers +texa +austin +austin +texa +decemb +research +research +concentr +biolog +realist +model +ofcognit +process +us +artifici +neural +network +seek +useth +dramat +advanc +comput +technolog +past +fewdecad +make +equal +dramat +advanc +understand +thehuman +mind +comput +power +avail +soon +beavail +realist +simul +cortic +process +becomingpract +enabl +make +necessari +refut +testabl +hypothes +brain +function +overal +goal +makecognit +research +empir +scienc +rather +purelyphilosoph +domain +centuri +master +thesi +tilt +aftereffect +self +organ +model +ofth +primari +visual +cortex +nearli +complet +abstract +visual +illus +aftereffect +long +studi +psychologist +vision +research +appar +function +failur +might +offer +insight +visual +process +carri +brain +particular +class +visual +illus +call +tilt +illus +tilt +aftereffect +thought +aris +primari +visual +cortex +human +thu +serv +test +case +theori +area +brain +specif +sever +research +propos +result +later +inhibit +neuron +receiv +visual +input +thesi +examin +tilt +illus +aftereffect +lissom +sirosh +miikkulainen +self +organ +model +primari +visual +cortex +incorpor +later +interact +demonstr +self +organ +principl +drive +lissom +result +aftereffect +qualit +quantit +similar +measur +human +basi +result +explan +call +indirect +effect +interact +line +differ +orient +propos +thesi +self +organ +model +also +appli +figur +aftereffect +spatial +frequenc +aftereffect +predict +result +later +interact +process +preliminari +report +research +avail +postscript +file +also +begin +doctor +research +includ +simul +detail +level +visualbehavior +us +extens +lissom +model +contact +inform +email +jbednar +utexa +mail +address +univers +texa +austin +depart +comput +scienc +austin +inform +finger +command +machin +log +departmentmi +resum +postscript +ascii +format +link +probabl +outdat +paper +interest +jbednar +utexa +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..c0a9571d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,118 @@ +homepag +home +ofjunfanghi +welcom +homepag +test +frame +graduat +student +comput +scienc +depart +univers +texa +austin +librari +largest +academ +librari +north +america +catalog +resum +cours +professor +novak +assign +topic +comput +excel +sourc +ethernet +info +technolog +special +sysadm +topic +domain +name +system +inform +free +unixish +oper +system +linux +document +project +home +page +info +us +debug +transfer +latex +file +html +help +unix +html +email +stuff +visit +utc +visit +kristina +ross +tutori +learn +construct +page +take +jeff +cours +comput +network +system +administr +last +summer +florida +state +univers +visit +edmund +automobil +buyer +guid +want +document +java +packag +java +languag +specif +public +ascii +format +look +pretti +good +privaci +help +like +sceneri +pictur +jfang +utexa +start +construct +homepag +visitor +number +sinc +test +test +java +applet diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..73d2f903 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,48 @@ +john +priorjohn +priormi +resum +john +accumul +knowledg +year +dog +good +someth +long +enough +start +hurt +probabl +chew +nacho +chip +swallow +beer +good +good +beer +good +sleep +good +contact +inform +email +jprior +utexa +mail +address +univers +texa +austin +depart +comput +scienc +austin +home +address +phone +swisher +austin +jprior +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..b00a2625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,48 @@ +jeff +thoma +homepagejeff +thoma +homepagecontact +informationpublicationssoftwar +system +gener +research +groupphoto +albumfavorit +internet +sitesuniversityof +texa +comput +scienc +departmentappliedresearch +laboratori +electricaland +comput +engin +departmentedsfinanci +trade +technolog +center +fttc +keyword +search +utacademiccalendarsut +sportshook +ultim +longhorn +site +utfootbal +scheduleaustintexa +jeff +thoma +comput +scienc +depart +univers +texa +austin +last +modifi +octob +jthoma +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..9f6e5c6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,89 @@ +jiani +homepagewelcom +jiani +homepag +first +year +student +indepart +comput +scienc +univers +texa +ataustin +comput +scienc +peke +univers +beij +china +peke +univers +alumni +comput +scienc +depart +peke +univers +depart +ofpek +univers +china +chinesechines +scenerychines +novelschines +classicschines +magazineschines +newspapersus +link +registrar +gradaut +studiesut +libraryut +campusutaccessabout +austin +weather +todai +austin +citylimit +lot +excit +stuff +austin +miscellaneousyahoojava +page +sunjavascript +page +netscapeth +perl +languag +home +pagecomput +research +associationcomput +journal +magzin +webnetwork +comput +scienc +technicalreport +libraryth +collect +comput +scienc +bibliographiesintern +student +contact +street +austin +texa +jyluo +utexa +finger +meyour +comment +suggestionswould +highli +appreci +visitorsinc +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..66c1fa72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,64 @@ +kedar +namjoshiabout +mehi +thank +check +doctor +student +austin +depart +comput +scienc +research +advisor +professor +allen +emerson +interest +tempor +logic +reason +concurr +program +semant +concurr +distributedalgorithm +automatatheori +came +fall +receiv +bachelor +degre +comput +scienc +indian +institut +technolog +madra +wonder +madra +home +page +lot +stuff +would +like +know +interest +person +inform +contact +inform +offic +phone +home +phone +home +address +west +street +austin +todai +amul +adkedar +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..ae2cb91d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,72 @@ +kenneth +harker +kenneth +harker +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +kharker +utexa +amateur +radio +babylon +linux +rocketri +cyberspac +public +academ +work +polit +stuff +resum +public +last +updat +kharker +utexa +world +wide +facil +utexa +provid +servic +faculti +student +staff +guest +depart +comput +scienc +univers +texa +austin +view +opinion +express +page +sole +respons +author +kenneth +harker +necessarili +reflect +view +opinion +depart +comput +scienc +univers +texa +austin +univers +texa +system +board +regent diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..550d4887 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,130 @@ +david +kincaid +david +kincaid +senior +lecturerassoci +director +center +numer +analysi +lamar +univers +univers +texa +austin +honor +award +profession +servic +certif +recognit +creativ +develop +technicalinnov +basic +linear +algebra +subprogram +nasa +technic +committe +comput +linear +algebra +imac +session +organ +imac +world +congress +comput +andappli +mathemat +area +interestmathemat +softwar +high +perform +comput +numer +analysi +summari +researchmi +interest +focus +research +us +iter +algorithm +solv +system +linear +algebra +equat +larg +spars +coeffici +matric +system +aris +solut +ellipt +partial +differenti +equat +develop +implement +numer +algorithm +softwar +parallel +comput +anoth +area +interest +select +recent +publicationsw +chenei +kincaid +numer +mathemat +comput +pacif +grove +brook +cole +kincaid +hay +young +itpack +imac +proceed +world +congress +coput +mathemat +atlanta +young +kincaid +linear +stationari +second +degre +method +solut +larg +linear +system +topic +polynomi +sever +variabl +applic +rassia +world +scientif +river +edg +jersei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..465930a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,89 @@ +mike +kistler +home +page +mike +kistler +home +page +page +construct +first +year +student +univers +texa +ataustin +thedepart +comput +scienc +also +current +emploi +theperson +softwar +productsdivis +academ +backgroundba +mathemat +comput +scienc +susquehanna +univers +selinsgrov +comput +inform +scienc +syracus +univers +syracus +master +busi +administr +stern +school +businessnew +york +univers +york +academ +interestsi +interest +parallel +parallel +algorithm +particularli +us +commerci +data +process +press +random +collect +link +inform +parallel +comput +coursesfal +distribut +comput +iwith +prof +jayadev +misra +numer +analysi +linear +algebrawith +prof +alan +cline +visitor +number +contact +juli +walk +pflugervil +email +kistler +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..7b89bf7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,11 @@ +jacob +kornerupjacob +kornerup +welcom +home +page +time +sinc +march +jacob +kornerup diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..9990de39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,107 @@ +benjamin +kuipersbenjamin +kuipersbruton +centenni +professor +comput +scienc +univers +texa +austin +mathemat +swarthmor +colleg +mathemat +research +interest +represent +commonsens +expert +knowledg +withparticular +emphasi +effect +incomplet +knowledg +thequalit +reason +research +grouphom +page +describ +research +topic +paper +student +andavail +softwar +consider +detail +research +accomplish +includ +tour +model +spatial +knowledg +cognit +qsim +algorithm +qualit +simul +access +limit +logic +knowledg +represent +robot +explor +map +strategi +base +qualit +recognit +distinct +place +kuiper +qualitativereason +model +simul +incomplet +knowledg +cambridg +press +teach +plan +spring +build +intellig +agent +fall +commonsens +reason +physic +world +spring +build +intellig +agent +contact +inform +mail +prof +benjamin +kuiper +comput +scienc +depart +univers +texa +austin +austin +texa +email +kuiper +utexa +phone +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..e8b7785b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,61 @@ +simon +simon +professor +comput +sciencesdepart +comput +sciencesunivers +texa +austin +texa +email +utexa +eduphon +offic +taylor +hall +campu +mail +comput +scienc +photo +profil +network +research +laboratori +fall +spring +administr +assist +also +editori +assist +ieee +transact +network +kata +carbon +email +kata +utexa +eduphon +inform +electron +submissionnew +clip +tune +turn +toss +internet +empt +austin +american +statesman +februari +front +page +compress +postscript +cont +compress +postscript diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..2a329ff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,81 @@ +network +research +laboratori +austin +network +research +laboratori +depart +comput +scienc +univers +texa +austin +research +activ +laboratori +span +entir +develop +cycl +network +protocol +design +specif +verif +test +perform +analysi +implement +perform +tune +currentinterest +architectur +protocol +address +chang +underli +commun +technolog +well +network +applic +laboratori +research +project +supervis +simon +professor +comput +scienc +research +fund +provid +nation +scienc +foundat +nsaunivers +research +program +texa +advanc +research +program +foundat +lockhe +current +research +project +recent +paper +network +support +videoservic +network +secur +protocol +theori +workshop +integr +novemb +research diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..b91e5c6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,40 @@ +home +page +robert +landrum +stori +mail +viruspictur +mountain +empirepch +retreattexa +republican +convent +backbon +rockrsumfamilyinterest +christian +council +home +page +home +page +awai +home +graham +gordon +landrum +home +pageth +comput +scienc +depart +ofth +univers +texa +austin +christian +councillandrum +utexa +edulast +updat +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..43708054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,38 @@ +greg +lavend +univers +texa +austinr +greg +lavenderadjunct +assist +professordepart +comput +scienc +anddepart +electr +comput +engineeringth +univers +texa +austin +contact +address +research +activ +comput +scienc +comput +engin +cours +recommend +read +biograph +informationsuggest +improv +page +welcom +last +updat +lavend +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..5495e391 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,89 @@ +home +page +jame +welcom +home +pageyeap +normal +look +jame +student +depart +comput +sciencesat +univers +texa +austin +bachelor +scienc +master +scienc +comput +scienc +atth +depart +inform +system +comput +scienc +disc +nation +univers +singapor +research +interest +algorithm +data +structur +vlsi +designalgorithm +small +tropic +island +call +singapor +locat +degre +north +equat +singapor +internet +communityi +much +aliv +welcom +particip +know +aboutthi +island +nation +peopl +wife +come +hong +kong +come +month +activ +lovesto +smile +contact +inform +mail +leekk +utexa +phone +home +home +campu +addr +depart +comput +scienc +taylor +univers +texa +austin +austin +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..dc33c9a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,43 @@ +laboratori +experiment +softwar +system +less +laboratori +experiment +softwar +system +less +main +object +research +investig +wai +buildreli +high +perform +softwar +parallel +distributedsystem +laboratori +experiment +softwar +system +less +apart +depart +computersci +univers +oftexa +austin +research +projectsmemb +lablessss +less +seminar +seriessponsorslast +modifi +decemb +robert +blumoferdb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..79767171 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,102 @@ +calvin +lincalvin +linassist +professor +comput +sciencesth +import +thing +iswhat +studi +_study_ +plai +_play_ +pete +carrilresearch +interestscompil +languag +parallel +comput +parallel +performanceanalysi +scientif +comput +program +languag +project +home +page +select +publicationsth +portabl +parallel +implement +novel +mathemat +biologyalgorithm +dikaiako +manoussaki +woodward +conf +supercomput +accommod +polymorph +data +decomposit +explicitli +parallelprogram +snyder +proceed +internationalparallel +process +symposium +april +arrai +sublanguag +snyder +languag +compilersfor +parallel +comput +banerje +gelernt +nicolau +padua +springer +verlag +portabl +implement +simpl +snyder +intern +journal +parallel +program +comparison +program +model +share +memori +multiprocessor +withl +snyder +proceed +intern +confer +parallelprocess +contact +inform +offic +taylor +hall +email +address +utexa +postal +address +depart +comput +sciencesth +univers +texa +austinaustin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..0f392062 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,82 @@ +guangtian +home +page +guangtian +homepagehi +welcom +home +page +current +construct +page +content +time +permit +apolog +incomplet +result +inconveni +current +graduat +student +depart +comput +scienc +theunivers +texa +austin +researchi +member +professor +real +time +system +research +group +research +interest +includ +real +timeschedul +algorithm +oper +system +network +perform +distribut +system +also +work +data +replic +knowledg +mine +last +summer +internship +contact +inform +offic +good +view +phone +email +liugt +utexa +mail +address +univers +texa +austin +depart +comput +scienc +austin +page +last +updat +pleas +send +comment +liugt +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..b37a9224 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,62 @@ +lorenzo +alvisi +home +page +lorenzo +alvisi +assist +professor +depart +comput +scienc +comput +scienc +cornel +comput +scienc +cornel +laurea +physic +universit +agrav +bologna +itali +offic +taylorhal +campusshow +locat +taylor +hall +phone +mail +lorenzo +utexa +research +interestsi +interest +distribut +comput +special +emphasi +fault +toler +cours +distribut +comput +spring +oper +system +fall +topic +distribut +sytem +fall +public +photo +maria +last +modifi +lorenzo +alvisi +lorenzo +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..084d91b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,79 @@ +home +page +home +page +graduat +student +current +work +work +comput +networksoth +class +tsinghua +univers +undergradu +institut +china +depart +comput +scienc +studyut +austin +universityaustin +texa +live +academ +link +internet +comput +scienc +area +comput +languag +system +ieee +sigcomm +sigmod +siglink +siggraph +sigmm +sigir +comput +compani +link +onlin +shop +cool +site +chines +music +current +newsjob +hunt +weather +forcast +dictionari +contact +inform +campu +dept +univ +texa +austin +austin +current +addr +microsoft +corpor +mail +luxu +utexa +xuelu +microsoft +thank +come +last +modifi +luxu +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..0b181312 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,129 @@ +lauri +honour +werthlauri +honour +werthlectur +lwerth +utexa +educurr +semest +fall +offic +hour +fall +time +offic +taylor +phone +link +class +softwar +engineeringc +contemporari +issu +comput +scienceprofession +servicevic +chair +educ +ieee +technic +committe +softwar +engin +presentco +chair +confer +chair +profession +develop +committe +presentarea +interestsoftwar +engin +cognit +scienc +summari +researchmi +current +work +center +develop +softwar +tool +andenviron +area +includ +comput +human +interfac +andsoftwar +metric +select +recent +publicationsl +werth +qualiti +assur +softwar +engin +project +ieee +transact +educ +januari +werth +lectur +note +softwar +process +improv +werth +industri +strength +case +tool +softwar +engin +class +softwar +engin +educ +tomayko +springer +verlag +werth +john +werth +direct +softwar +engin +educ +proceed +workshop +direct +softwar +engin +ics +werth +object +orient +program +macintosh +journal +object +orient +program +us +link +univers +texa +comput +scienc +depart +home +pagefaculti +profilesc +classeslast +updat +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..f1600e2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,56 @@ +madhukar +reddi +korupoluwelcom +madhukar +home +page +reach +home +avenu +austin +texa +offic +taylor +hall +dept +comp +scienc +univ +texa +austin +austin +texa +ahom +offic +madhukar +utexa +link +offici +madrashomepag +ganga +alumniclass +madra +utalgorithm +comput +theori +group +colloquium +oncomput +complex +info +cricket +worldwid +offici +site +espnet +sportszon +interact +batchu +india +page +author +madhukar +reddi +korupoluemail +madhukar +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..208cd66c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,23 @@ +richard +malloryrichard +malloryresearchthesi +research +produc +quasi +natur +languag +explan +qsimsimul +current +implement +work +simpl +system +contact +email +mallori +utexa +offic +taylor +austin +home diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..6695d67f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,206 @@ +home +page +marco +schneidermarco +schneiderph +candid +depart +comput +scienc +univers +texa +austinresearchth +titl +dissert +flow +rout +comput +network +research +interest +area +network +protocol +distribut +comput +fault +toler +particular +self +stabil +system +implicit +design +system +label +itsstat +legitim +illegitim +identifi +legitim +state +occur +correct +intend +execut +system +state +consid +illegitim +system +said +self +stabil +whenregardless +initi +state +guarante +converg +legitim +state +finit +number +step +systemwhich +self +stabil +stai +illegitim +state +forev +vita +postscript +public +self +stabil +comput +survei +march +self +stabil +real +time +decis +system +respons +comput +system +step +toward +fault +tolerantr +time +system +kluwer +academ +publish +earlier +version +appear +proceed +third +intern +workshop +respons +comput +system +octob +stabil +maximum +flow +tree +invit +talk +proceed +third +annual +joint +confer +inform +scienc +novemb +submit +inform +scienc +journal +author +moham +gouda +maximum +flow +rout +proceed +second +workshop +self +stabil +system +author +moham +gouda +minimum +depth +flow +rout +prepar +author +moham +gouda +anish +arora +memori +requir +silent +stabil +appear +fifteenth +symposium +principl +distribut +comput +author +shlomi +dolev +moham +gouda +stabil +minimum +span +tree +prepar +author +moham +gouda +implement +flow +rout +internet +prepar +author +moham +gouda +person +interest +list +link +construct +contact +inform +offic +taylor +hall +email +marco +utexa +postal +address +univers +texa +austin +depart +comput +scienc +ctaylor +austin +usamarco +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..8773d809 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,120 @@ +home +page +mark +johnstonemark +johnstonecontact +inform +offic +taylor +hall +postal +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +usual +find +offic +best +reach +isvia +email +markj +utexa +mark +johnston +also +look +full +finger +inform +semest +oper +system +taught +byrichard +brice +addit +object +orient +design +analysisclass +taught +glenn +down +appl +somerset +compani +pleas +page +class +graduat +comput +scienc +spring +work +motorola +appl +somersetdesign +centerresearch +informationi +member +oop +research +group +depart +comput +scienc +univers +texa +austin +part +research +build +real +time +garbagecollector +addit +perform +number +ofstudi +memori +alloc +routin +postscript +copi +dissertationpropos +avail +inform +pleas +listof +public +along +brief +descript +develop +class +librari +allow +precis +timingof +routin +intel +pentium +run +linux +code +publicli +avail +stuff +relat +research +depart +comput +scienc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..804c7fb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,15 @@ +mark +home +page +point +interest +visit +also +friend +home +page +click +last +modifi +markng +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..ca5ee610 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,110 @@ +marku +kaltenbachmarku +kaltenbachintroductionwelcom +home +page +current +construct +page +andwil +entri +link +time +permit +time +iapolog +incomplet +result +inconveni +researchi +member +prof +misra +spsp +research +groupand +prof +emerson +stempor +reason +group +part +work +develop +model +checkerfor +finit +state +uniti +program +proposit +uniti +logic +uniti +verifi +system +recent +version +thesi +isalso +avail +contact +inform +offic +taylor +hall +phone +taylor +hall +phone +email +address +marku +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +place +interest +find +interest +place +internet +worth +avisit +theut +comput +scienc +departmenthom +page +softwar +archivefor +macintosh +comput +appl +comput +sworld +wide +technic +supporthom +page +link +actansit +comprehens +archiv +network +link +theatt +distribut +archiv +page +last +updat +marku +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..1738e8bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,74 @@ +memarti +mayberri +student +depart +comput +scienc +univers +texa +austin +researchal +kind +stuff +educ +comput +scienc +univers +texa +austin +math +comput +scienc +univers +texa +austin +contact +inform +offic +taylor +hall +phone +email +address +martym +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +click +applet +paus +resum +displai +local +link +utc +neural +network +homepag +home +page +utc +home +page +austin +home +page +hotlist +downtown +anywher +virtualc +internetrestaur +guid +virtual +tnstechnolog +demonstr +read +daili +texan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..c84f0c63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,62 @@ +home +page +norm +mccain +norm +mccainabout +mephd +student +comput +scienc +colleg +natur +sciencesat +univers +texa +austin +advisor +vladimir +lifschitz +comput +scienc +expect +austin +thesi +titl +causal +commonsens +reason +action +comput +scienc +univers +kansa +philosophi +baker +univers +vita +postscript +avail +onlin +research +interestscommonsens +reason +actionlog +program +nonmonoton +reasoningmi +paper +avail +onlin +contact +mepost +comput +scienc +austin +austin +usavoic +main +offic +offic +mccain +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..858e22c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,56 @@ +mari +elain +califfmari +elain +califfmachin +learn +research +groupunivers +texa +austinresearchmi +current +research +interest +us +machin +learn +especiallyinduct +logic +program +natur +languag +acquisit +formor +info +check +vita +educ +comput +scienc +baylor +univers +english +baylor +univers +english +baylor +univers +contact +inform +offic +taylor +hall +phone +email +address +mecaliff +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..e4c0303e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,356 @@ +knowledg +base +system +groupknowledg +base +system +group +bruce +porter +rich +mallori +peter +clark +souther +fred +prado +charl +callawai +shown +carl +andersen +steve +correl +overviewour +group +part +depart +comput +scienc +atuniv +texa +austin +long +term +goal +research +develop +technolog +forconstruct +us +larg +multifunct +knowledg +base +oncomput +knowledg +base +would +significantli +improv +currentexpert +system +tutor +system +contain +broadknowledg +domain +requir +perform +multipl +task +toexplain +domain +knowledg +multipl +viewpoint +past +eight +year +built +larg +knowledg +base +inon +area +biologi +develop +method +automat +answeringa +varieti +question +us +knowledg +base +contain +fact +concern +concept +knowledg +base +ofth +largest +kind +content +structur +formallyrepres +addit +expand +knowledg +base +arealso +begin +construct +similar +knowledg +base +domain +notabl +domain +distribut +comput +especi +encourag +result +us +knowledgebas +varieti +task +recent +jame +lester +us +thebiolog +knowledg +base +test +system +explan +gener +system +gener +explan +express +english +concern +biolog +object +event +control +experi +domain +expert +found +littl +differ +explan +andthos +written +colleagu +current +extend +type +question +beanswer +us +autom +reason +larg +knowledg +base +jeffrickel +develop +method +composit +model +taskof +construct +model +appropri +answer +predict +question +perform +task +well +requir +build +thesimplest +model +adequ +answer +question +dauntingrequir +sinc +knowledg +base +like +implicitli +contain +manymodel +numer +level +detail +qualit +process +compilerand +qsim +us +simul +model +built +jeff +program +final +test +gener +research +result +bybuild +knowledg +base +anoth +domain +distribut +computingenviron +focuss +construct +help +deskassist +automat +answer +proport +custom +squestion +would +otherwis +phone +normal +help +desk +research +projectsour +complet +ongo +research +project +includ +knowledg +represent +languag +kned +knowledg +base +editor +biologi +knowledg +base +kastl +viewpoint +retriev +knight +explan +text +plan +fare +natur +languag +gener +text +plan +lex +comput +aid +mainten +lexicon +tripel +composit +model +answer +predict +question +help +desk +assist +help +desk +assist +project +research +bruce +porter +porter +utexa +rich +mallori +mallori +utexa +souther +souther +utexa +charl +callawai +theorist +utexa +fred +prado +prado +utexa +carl +andersen +searcher +utexa +steve +correl +correl +utexa +recent +alumni +alumna +lian +acker +acker +austin +erik +eilert +eilert +utexa +bareiss +bareiss +brad +blumenth +karl +brant +karl +eolu +uwyo +peter +clark +clarkp +redwood +boe +jame +lester +lester +ncsu +murrai +murrai +jeff +rickel +rickel +publicationsclick +select +public +group +relat +projectsclick +herefor +extens +collect +pointer +project +aroundth +world +porter +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..09efa6e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,123 @@ +daniel +mirank +home +page +welcom +daniel +mirank +belat +presenc +construct +someth +like +finish +hold +breath +send +note +mirank +utexa +fashionwai +student +page +wouldn +place +either +rule +match +learn +treat +algorithm +itscomparison +rete +warn +render +obsolet +byth +leap +algorithm +current +research +interest +current +research +goal +encompass +venu +rule +languag +usea +basi +activ +distribut +databas +fundamentalcomput +scienc +problem +corollari +evolv +thatgoal +constraint +satisfact +search +queri +optim +relat +object +orient +parallel +execut +rule +base +program +knowledg +compil +bibliographi +sometim +link +paper +come +soon +mirank +group +text +bibtex +rule +match +text +bibtex +rete +treat +text +bibtex +student +current +candid +roberto +bayardo +david +gadboi +lanc +obermey +vasili +samoladi +robert +schrag +master +candid +srinivasan +vaidyaraman +lane +warshaw +past +student +archi +andrewsdavid +brantchin +ming +kuoshiow +yang +past +student +salvator +stolfo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..50fcbeef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,107 @@ +jayadev +misra +jayadev +misrareg +chair +comput +scienc +depart +chair +tech +indian +institut +technolog +kanpur +john +hopkin +univers +honor +award +profession +servic +john +simon +guggenheim +fellow +ieee +fellow +fellowarea +interestparallel +program +summari +researchmi +interest +appli +formal +method +practic +particularli +inth +specif +design +synchron +asynchronoussystem +select +recent +publicationsj +misra +powerlist +structur +parallel +recurs +classic +mind +essai +honor +hoar +prentic +hall +januari +misra +loos +coupl +process +futur +gener +comput +system +north +holland +misra +phase +synchron +inform +process +letter +misra +equat +reason +nondeterminist +process +formal +aspect +comput +misra +chandi +parallel +program +design +foundat +addison +weslei +research +group +group +homepag +inform +work +electron +access +otherpap +current +research +project +seuss +anoverview +apostscript +versionaccess diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..a63ca0fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,326 @@ +utc +machin +learn +research +group +machin +learn +research +group +supervis +professor +moonei +research +focuseson +combin +empir +knowledg +base +learn +techniqu +includ +applic +natur +languag +acquisit +knowledgerefin +learn +plan +group +part +artifici +intellig +comput +scienc +depart +atth +univers +texa +ataustin +pictur +group +click +graduat +student +mari +elain +califf +mecaliff +utexa +tara +estlin +estlin +utexa +hermjakob +utexa +sowmya +ramachandran +sowmya +utexa +cindi +thompson +cthomp +utexa +alumni +paul +baff +baff +scicomp +jeff +mahonei +mahonei +firstadvisor +hwee +nhweetou +trantor +dirk +ourston +dirk_ourston +cpqm +saic +bradlei +richard +bradlei +furtwangen +siddarth +subramanian +georgetown +john +zell +acad +drake +research +area +public +abduct +induct +logic +program +natur +languag +acquisit +qualit +model +diagnosi +speedup +learn +learn +plan +student +model +intellig +tutor +system +theori +refin +uncertain +reasoningher +complet +list +public +research +softwar +accel +abduct +reason +system +neither +theori +revis +system +proposit +rule +base +fort +theori +revis +system +first +order +rule +base +chillin +induct +logic +program +system +predic +invent +foidl +induct +logic +program +system +first +order +decis +list +dolphin +speedup +learn +system +ad +search +control +prolog +program +program +standard +induct +classif +algorithm +softwar +autom +experiment +system +comparison +data +repositori +languag +learn +data +form +relat +site +associ +group +aaai +american +associ +associ +comput +linguist +ilpnet +european +induct +logic +program +scientif +network +sigart +special +interest +group +artifici +intellig +signll +special +interest +group +natur +languag +learn +confer +intern +joint +confer +aritfici +intellig +ijcai +nation +confer +artifici +intellig +aaai +intern +confer +machin +learn +icml +fourth +european +confer +plan +inform +sourc +subject +index +biblio +queri +index +machinelearn +home +page +machin +learn +inform +servic +comput +languag +paper +archiv +journal +artifici +intellig +journal +comput +linguist +journal +jair +journal +artifici +intellig +research +machin +learn +journal +learn +system +foil +quinlan +induct +learner +prodigi +problem +solv +system +plan +learn +carnegi +mellon +ucpop +partial +order +planner +univers +washington +research +group +explan +base +learn +group +univers +illinoi +induct +learn +group +univers +illinoi +machin +learn +research +group +oxford +univers +machin +learn +research +group +irvin +natur +languag +acquisit +group +univers +texa +austin +machin +learn +research +group +univers +wisconsin +madison +estlin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..5d5737a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,166 @@ +raymond +moonei +homepageraymond +mooneyassoci +professor +computersci +univers +texa +austin +comput +engin +univers +illinoi +urbana +champaign +comput +scienc +univers +illinoi +urbana +champaign +comput +scienc +univers +illinoi +urbana +champaign +research +interestsmi +current +research +interest +artifici +intellig +primarilyin +area +machin +learn +includ +natur +languag +acquisit +learn +parser +lexicon +inform +extract +word +sens +disambigu +exampl +induct +logic +program +learn +prolog +program +exampl +knowledg +base +theori +refin +automat +modifi +rule +base +bayesian +network +empir +data +search +control +acquisit +learn +improv +plan +effici +qualiti +compar +combin +neural +network +symbol +learn +inform +public +machin +learn +home +page +vita +finger +inform +cours +informationfal +lisp +symbol +program +machin +learningspr +artifici +intellig +iiperson +historyi +grew +small +town +fallon +illinoi +wherestart +attend +fallontownship +highschool +start +fall +went +univers +illinoi +champaign +urbanato +obtain +degre +list +decemb +complet +myph +thesi +explan +base +learninggroup +direct +prof +gerald +dejong +began +posit +depart +comput +scienc +univers +texa +austin +contact +inform +offic +taylor +hall +email +address +moonei +utexa +postal +address +depart +comput +scienc +univers +texa +austin +austin +texa +home +address +meadowfir +austin +texa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..39c5fd4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,143 @@ +dave +moriartydav +moriarti +student +depart +comput +scienc +univers +texa +austin +researchsequenti +decis +task +appear +mani +practic +real +world +problemsinclud +control +resourc +alloc +rout +task +canb +character +follow +scenario +agent +observ +stateof +dynam +system +select +finit +action +thesystem +enter +state +upon +agent +must +selectanoth +action +system +return +payoff +decis +madeor +sequenc +decis +object +select +thesequ +action +return +highest +total +cumulativepayoff +research +evolv +neural +network +geneticalgorithm +learn +perform +sequenti +decis +task +amparticularli +interest +task +problem +specif +knowledg +iscurr +unavail +costli +obtain +domain +havestudi +includ +game +plai +intellig +control +constraintsatisfact +inform +list +public +educ +comput +scienc +universityof +texa +austin +comput +scienc +tulan +univers +contact +inform +offic +taylor +hall +phone +email +address +moriarti +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin +local +link +utc +neural +network +homepag +home +page +utc +home +page +austin +home +page +us +link +research +link +sport +link +misc +link +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..6b3604ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,54 @@ +wade +barnesm +wade +barnesmwbarn +utexa +eduresearch +workhelp +pagestyp +map +literatureliteratur +research +notesclassesbackground +informationph +student +depart +comput +scienc +univers +texa +austin +educ +comput +scienc +univers +texa +austin +mine +engin +univers +utah +mine +engin +univers +utah +reach +mehom +tanglebriar +trail +austin +campu +offic +yete +mail +mwbarn +utexa +eduauthor +wade +barnesemail +mwbarn +utexa +edulast +updat +mondai +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..3eb872bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,108 @@ +professor +nell +dale +websit +seed +scrollit_rl +seed +timertwo +window +settimeout +els +seed +professor +nell +dale +home +pagesunivers +texa +comput +scienc +departmentwelcom +home +page +reach +address +nell +dale +retir +senior +lectur +comput +scienc +univers +oftexa +austin +receiv +comput +scienc +utaustin +faculti +sinc +retir +fromful +time +teach +summer +teach +full +load +falland +spend +spring +summer +write +travel +pleas +feel +free +brows +room +resum +room +contain +curriculum +vita +bibliographi +room +contain +inform +text +book +author +author +research +room +contain +abstract +ofdissert +chair +recent +person +room +whichcontain +memento +nontechn +interest +pleas +direct +anycorrespond +mail +account +ndale +utexa +profession +profilepublicationsresearch +interestsperson +interestsnel +dale +westlak +austin +offic +document +creat +assist +right +reserv +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..6aac491c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,38 @@ +neeraj +page +browser +support +frame +view +home +page +either +download +netscap +navig +view +without +frame +note +choos +latter +pleas +keep +mind +page +design +view +netscap +pretti +background +color +chosen +page +work +browser +obnoxi +chartreus +color +page +blame +netscap diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..e053a3f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,23 @@ +gnan +home +pagegnana +kumar +natarajan +home +page +depart +comput +sciencesunivers +texa +austini +graduat +student +comput +sciencedepart +univers +texa +austin +mail +utexa +edufind +log diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..b22b7ce5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,38 @@ +nimar +singh +arora +home +page +nimar +arora +medic +art +austin +home +typic +first +year +student +doesn +quit +knowwhat +area +interest +look +resum +know +altern +look +bookmarksto +clearer +pictur +contact +click +queri +hit +term +score +ters +output +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..ab8ce5b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,106 @@ +utc +natur +languag +acquist +groupnatur +languag +acquisit +groupat +depart +comput +scienc +univers +texa +austinw +explor +discuss +paper +area +natur +languag +acquisitionand +learn +fall +meet +usual +everi +wednesdai +havedrawn +close +probabl +resum +meet +second +third +week +januari +current +propos +paper +previous +discuss +paper +current +particip +includ +prof +moonei +prof +risto +miikkulainen +bobbi +bryant +mari +elain +califf +hermjakob +marti +mayberri +rupert +tang +poon +cindi +thompson +inform +pleas +contact +group +coordin +hermjakob +utexa +relat +site +associ +comput +linguist +signll +special +interest +group +natur +languag +learn +comput +languag +print +archiv +utc +machin +learn +research +group +utc +neural +network +research +group +depart +comput +scienc +ofth +univers +texa +ataustinlast +updat +novemb +hermjakob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..784c160f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,80 @@ +utc +neural +net +research +groupth +utc +neural +net +research +group +supervis +prof +ristomiikkulainen +group +part +artifici +intellig +comput +scienc +depart +univers +texa +ataustin +research +concentr +artifici +intellig +andcognit +scienc +includ +natur +languag +process +schema +basedvis +cortic +self +organ +episod +memori +decis +make +evolv +neural +network +genet +algorithm +click +mapbelow +detail +check +hypertext +book +later +interact +thecortex +structur +function +risto +miikkulainen +graduat +student +alumni +visitor +public +demo +poster +softwar +home +page +confer +newsgroup +archiv +inform +sourc +gener +tool +privat +linkswusagemartym +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..e78fa9cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,119 @@ +gordon +novak +gordon +novak +support +free +speech +internet +associ +professor +ofcomput +scienc +atth +univers +texa +austin +director +artifici +intelligencelaboratori +highest +honor +univers +texa +austin +comput +scienc +comput +scienc +research +automat +program +reus +genericalgorithmssolv +physic +problem +specifiedinformallyartifici +intelligencecurriculum +vita +publicationsemploymentgrantsprofession +activ +honorscurriculum +vitaefre +softwar +tmycin +emycin +like +expert +system +shell +interfac +common +lispconvers +unit +measurementsoftwar +demo +construct +automat +program +server +creat +program +lisp +demo +write +program +physic +connect +diagram +unit +convers +demo +convert +unit +measur +isaac +demo +solv +physic +problem +state +english +lisp +server +interact +lisp +lisp +demo +interact +graphic +lisp +draw +interact +draw +us +graphic +lisp +class +comput +scienc +us +schemec +compilersc +artifici +intelligencec +automat +programmingweb +linksweatheraddress +gordon +novak +comput +scienc +ctai +univ +texa +austinaustintexa +offic +offic +faxnovak +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..8f30d8c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,56 @@ +meghan +brienhi +meghan +welcom +home +page +pleas +wipe +feet +enter +mani +complaint +crappi +laugh +stock +internet +page +remov +link +chanc +work +thank +sent +insult +panic +panic +still +download +gorgeou +pictur +blow +poster +size +beauti +queen +date +wait +hear +paul +get +marri +august +hope +come +except +insult +page +anyon +want +give +feel +free +resum +email +obrien +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..a3faf004 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,14 @@ +oguer +gutierrezogu +gutierrezth +depart +comput +scienc +theunivers +texa +austinprojectsomioswwhlinksconfer +databas +systemsth +worldemail +oguer +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..fc64af38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,338 @@ +oop +research +groupoop +research +groupthi +home +page +oop +research +group +supervis +prof +paul +wilson +graduat +studentsin +group +stephen +paul +carl +ajit +georg +mark +johnston +sheetal +kakkad +scott +kaplan +donovan +kolbl +michael +neeli +qing +dougla +wieren +research +area +memori +hierarchi +especi +persist +object +store +distribut +virtual +memori +cach +develop +simpl +high +perform +persist +store +call +texa +us +pointer +swizzl +page +fault +time +toimplement +larg +address +space +effici +stock +hardwar +andoper +system +us +standard +compil +basic +studi +program +behavior +memori +alloc +whichattempt +repair +damag +done +three +decad +mostli +unsoundstudi +memori +alloc +extens +alloc +survei +mike +neeli +master +thesi +automat +storag +manag +especi +real +time +generationaland +distribut +wilson +larg +small +survei +ongarbag +collect +paper +effici +local +garbag +collector +recent +develop +hard +real +time +garbag +collector +written +smart +pointer +interfac +object +orient +scheme +adapt +memori +managementfor +virtual +memori +file +system +especi +dynam +group +andcompress +structur +file +system +checkpoint +forfault +toler +time +travel +debug +implement +highli +extens +portabl +programmingsystem +includ +object +orient +extend +scheme +system +rscheme +thread +socket +real +time +interfac +donovan +kolbl +homepag +info +alpha +releas +sourc +code +paul +wilson +cours +noteson +scheme +scheme +interpret +compil +rawascii +text +andrschemear +also +avail +descript +integr +macro +process +algorithm +tosupport +extens +languag +open +compil +stephen +carl +master +thesi +note +thesiscontain +refer +coupl +paper +write +whicharen +avail +draft +form +anywai +sometimesoon +also +onlin +draft +paul +wilson +book +progress +introduct +scheme +implement +htmlformat +brows +contain +materialfrom +ascii +cours +note +scheme +much +improv +expandedpresent +standard +texinfo +page +materiali +work +includ +intro +object +system +metaobject +besid +good +gener +introduct +scheme +languag +scheme +program +interpret +compil +providesa +gener +introduct +thing +like +macro +make +good +backgroundread +stephen +carl +master +thesi +list +paper +brief +descript +also +avail +paper +bibliographi +heap +manag +sourc +code +fortexa +persist +store +avail +anonym +utexa +garbag +readm +file +list +avail +materi +includ +subdirectori +whichcontain +collect +paper +oopsla +garbag +collect +memori +manag +workshop +peopl +interest +garbag +collect +also +interest +henri +baker +sftp +site +although +overload +site +notb +access +keep +try +anoth +site +great +interest +han +boehm +whichcontain +sever +paper +well +free +sourc +code +severalgarbag +collector +us +languag +sheetal +kakkad diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..ed0adbcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,12 @@ +robert +otuomagieaddress +austin +phone +emailotu +utexa +eduuniververs +depart +infouniversityth +depart +univers +txa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..f70e5e66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,133 @@ +padgettdon +padgett +dissert +researchi +work +professor +brown +design +andimplement +devic +control +softwar +devic +driver +investig +creation +domain +specif +program +environ +construct +class +softwar +focu +work +thu +languag +compil +technolog +devis +prototyp +domain +specif +languag +specifi +devic +control +softar +call +virtual +devic +languag +languag +contain +variou +featur +reduc +effort +requir +construct +class +softwar +refer +manualfor +current +construct +postscript +draft +manual +avail +view +exampl +specif +also +avail +view +specif +counter +compon +avail +follow +file +physic +compon +interfac +file +virtual +compon +implement +file +virtual +compon +export +file +virtual +compon +interfac +file +specif +multifunct +devic +avail +follow +file +physic +devic +interfac +file +virtual +devic +implement +file +virtual +devic +export +file +virtual +devic +interfac +file +transpar +us +recent +present +also +avail +view +creat +us +microsoft +powerpointvers +window +contact +meemail +padgett +utexa +edupost +depart +comput +scienc +austin +austin +usafax diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..0eaa29a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,113 @@ +home +page +philip +hardinphilip +hardinabout +plan +elimin +bug +softwar +everywher +fail +fallback +plan +write +game +battlebal +access +page +binari +execut +file +oper +system +unfortun +avail +want +port +suno +solari +work +go +school +time +battlebal +multiplay +game +wrote +runsund +window +line +code +us +standard +templat +librari +anda +campbel +binari +space +partit +tree +librari +imag +page +screenshot +battlebal +student +univers +texa +austin +guess +comput +scienc +depart +interest +research +area +geometr +model +graphic +tree +realli +cool +geometr +model +main +research +interest +comput +graphicssoftwar +reus +engin +softwar +system +gener +research +group +automat +programmingto +contact +meemail +pahardin +utexa +edupost +comput +scienc +austin +austin +usanetrek +server +pita +curli +utexa +handl +digitaldisast +look +get +plaster +congradul +smartest +person +inth +univers diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..b2a2c1e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,98 @@ +paul +mcquestenpaul +mcquestenphd +studentdepart +comput +scienc +univers +texa +austin +interest +interact +learn +evolut +think +mechan +natur +evolut +might +bepract +addit +current +comput +techniqu +exampl +death +usual +studi +explicitli +email +paulmcq +utexa +offic +taylor +hall +phone +postal +address +univers +texa +austindepart +comput +scienc +austin +spring +head +forcsp +introduct +pascal +programmingmor +neuro +evolut +inmoriarti +research +link +learn +evolut +atcnr +rome +neural +network +research +group +artifici +intellig +cours +schedul +comput +scienc +depart +handi +access +tout +librari +onlin +univers +texa +austin +seriou +reflect +dave +winer +websit +need +pointer +wast +hour +surf +check +cynb +humong +hotlist +mix +knowledg +knick +knack +nut +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..87bf1ab3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,48 @@ +pawan +goyal +home +page +research +summari +public +multimedia +network +protocol +multimedia +file +system +multimedia +oper +system +affili +multimedia +group +depart +comput +scienc +univers +texa +austin +get +touch +email +pawang +utexa +austin +univers +texa +austin +depart +comput +scienc +austin +inform +finger +pawang +utexa +also +check +log +comment +pawang +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..bfb2c116 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,166 @@ +jose +pecina +jose +pecinaabout +obtain +physic +univers +texa +austin +previous +complet +master +innuclear +engin +current +finish +thesi +obtain +comput +scienc +depart +univers +texa +austin +graduat +studi +physic +workedinvestig +gaug +theori +graviti +base +group +quantiz +gravit +field +calcul +invari +group +unitari +irreduc +represent +publish +three +joint +paper +dissert +advisor +yuval +eman +dissert +advisor +georg +sudarshan +jurgen +lemk +fromcologn +germani +previou +posit +bureau +econom +geologi +spent +year +half +work +seismic +invers +tomographi +supervisor +hardag +editor +geophys +journal +societi +explor +geophysicist +research +interest +comput +scienc +algorithm +numer +analysi +parallel +comput +cryptographi +quantum +comput +also +research +visitor +theori +group +thephys +depart +carnegi +mellon +univers +pittsburgh +current +open +compani +scientif +softwar +develop +interest +fill +scientif +comerci +softwar +current +interest +physic +symmetri +algebra +group +represent +invari +also +explor +numer +sequenti +parallel +solutionsin +gener +rel +problem +also +quantum +chromodynamicsmi +curriculum +vita +click +want +print +contact +center +particl +theori +physic +depart +univers +texa +austin +austin +comput +scienc +austin +austin +usavoic +main +offic +home +austin +home +mail +pecina +utexa +orpecina +physic +utexa +orpecina +defo +phy diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..57761223 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,64 @@ +nettl +welcom +unix +cornerinfolik +anyon +would +want +know +make +realli +short +blahblah +system +woof +woof +window +blah +stuff +eeek +unix +staff +escap +floor +mean +ibm +know +file +afteri +instal +stori +true +name +chang +protect +theinnoc +experiment +work +pleas +know +think +us +neat +page +page +cool +comic +page +buena +vista +movieplex +find +meyour +chanc +find +would +increas +drastic +could +employan +improb +drive +send +mail +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..65742f4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,30 @@ +greg +plaxtongreg +plaxtongreg +plaxtoncontact +informationemail +plaxton +utexa +eduphon +offic +taylor +hall +postal +depart +comput +sciencetaylor +hall +univers +texa +austinaustin +texa +inform +annual +report +profilepubl +last +modifi +decemb +greg +plaxtonplaxton +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..4d79ce08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,115 @@ +bruce +porter +bruce +porterassoci +professor +faculti +fellow +comput +scienc +comput +scienc +univers +california +irvin +honor +award +profession +servic +presidenti +young +investig +editor +machin +learn +presentarea +interestartifici +intellig +machin +learn +knowledg +base +system +summari +researchhead +knowledg +base +system +research +group +research +develop +method +build +larg +knowledg +basesand +us +solv +problem +answer +question +researchinterest +machin +learn +case +base +learn +select +recent +public +rickel +porter +autom +model +answer +predict +question +select +thetim +scale +system +boundari +aaai +cambridg +aait +press +abstract +andpostscript +brant +porter +rule +preced +complementari +warrant +aaai +abstract +bareiss +porter +holt +concept +learn +heurist +classif +weak +theori +domain +artifici +intellig +journal +abstractand +postscript +hotlist +search +site +search +page +search +email +address +search +public +search +tech +reportport +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..a64829ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,300 @@ +group +austinpsp +group +austinthi +home +page +group +depart +comput +scienc +univers +texa +ataustin +stand +program +specif +proof +emphasi +work +group +deriveparallel +distribut +program +rigor +manner +group +issupervis +jayadev +misra +develop +theori +work +research +area +uniti +powerlist +seuss +current +former +member +groupinclud +jayadev +misra +adam +carruth +erni +cohen +graduat +rajeev +joshi +marku +kaltenbach +graduat +edgar +knapp +graduat +jacob +kornerup +ingolf +krger +graduat +josyula +graduat +mark +staskauska +graduat +publicationsbelow +summar +area +work +wherev +possibl +give +link +topap +avail +electron +uniti +uniti +program +notat +logic +reason +parallel +anddistribut +program +uniti +present +book +misra +chandi +parallel +program +design +foundat +addison +weslei +note +uniti +seri +ofpap +present +variou +result +uniti +applic +thenot +assum +basic +understand +uniti +theori +present +inchandi +misra +book +sinc +public +book +sever +improv +made +inth +theori +reflect +note +uniti +jayadev +misra +written +amanuscript +book +present +newun +includ +introduct +tempor +operatorco +specifi +safeti +uniti +refer +forrefer +paper +implement +marku +kaltenbach +current +write +asymbol +model +checker +forfinit +state +uniti +program +call +unityverifi +carruth +extend +uniti +logic +toinclud +real +time +aspect +comput +hybrid +system +powerlist +powerlist +notat +synchron +parallel +program +circuit +data +structur +list +length +equal +power +twodiffer +oper +balanc +divis +list +mani +parallelalgorithm +succinct +present +simpl +proof +thepowerlist +notat +jayadev +misra +paper +powerlist +structur +parallel +recurs +present +notat +givesnumer +exampl +algorithm +proof +correct +includ +fast +fourier +transform +batcher +sort +network +adam +studi +differ +arithmet +circuit +asadd +multipli +specifi +prove +correct +thepowerlist +notat +paper +verifi +addercircuit +us +powerlist +avail +jacob +kornerup +studi +powerlist +programscan +map +effici +differ +parallel +architectur +speciallyhypercub +list +ofpap +detail +seuss +seuss +offspr +work +uniti +address +issu +ofprogram +composit +restrict +program +compon +caninterfer +introduct +seuss +read +overview +seuss +chapter +froma +monograph +adisciplin +multiprogram +written +jayadev +misra +alsoavail +compil +seuss +genrat +code +callsfor +messag +commun +network +describ +thesi +anexperi +compil +design +concurr +object +basedprogram +languag +ingolfkrg +site +mani +paper +found +thepsp +sitejacob +kornerup diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..22c2b4c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,96 @@ +qime +huang +qime +huang +depart +comput +sciencesunivers +texa +austin +austin +texa +phone +email +qime +utexa +edureceiv +comput +scienc +nankai +univers +tianjin +inform +comput +scienc +univeris +hawaii +manoa +hawaiiwork +comput +scienc +univers +texa +austin +austincours +spring +advanc +telecommun +client +server +system +develop +appli +data +commun +system +cours +academ +resourc +teamweb +inform +center +utcssadaili +texanstock +room +attract +picturesimageschines +popsend +card +electr +postcard +line +job +jobtrakut +placement +center +connect +gopherftp +csc +newstelnet +cschen +junk +staffyour +person +visit +page +pleas +sign +guest +book +guest +inform +commentsguest +name +guest +email +guest +comment +page +construct +last +modifi +march +mail +comment +qime +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..e55d8ee6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,95 @@ +qualit +reason +research +utexasqualit +reason +research +utexasth +qualit +reason +research +group +research +sever +area +qualit +reason +thephys +world +qsim +index +bibliographi +ourresearch +paper +qsim +user +applic +qsim +like +system +spatial +reason +intelligentrobot +tour +index +bibliographi +ourresearch +paper +access +limit +logic +knowledgerepresent +algernon +index +bibliographi +ourresearch +paper +group +supervis +professor +benjaminkuip +kuiper +utexa +part +artifici +intellig +comput +scienc +depart +atth +univers +texa +ataustin +pointer +qualit +reason +book +graduat +student +qualit +reason +robot +knowledg +represent +alumni +includ +dissert +visitor +world +dissert +abstract +yellow +page +paper +softwar +easili +access +research +areadescript +also +visit +directori +paper +directori +softwar +document diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..4674f63d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,82 @@ +qiang +john +home +page +welcom +home +page +pleas +feel +free +brows +around +leav +comment +suggest +joke +thank +visit +pleas +come +seed +scrollit_rl +seed +timertwo +window +settimeout +els +seed +qiang +john +home +page +sinc +thank +com +austin +time +self +introduct +current +master +program +comput +scienc +depart +univers +texa +austin +pleas +click +inform +seriousjunk +comput +cours +languag +unix +comput +graphic +linux +window +program +program +technic +java +realjunk +sport +game +new +struggleforliv +institut +qzuo +utexa +guestbook +page +still +underconstruct +welcom +back +last +modif +copyright +qiang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..8d4ff350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,76 @@ +robert +blumoferobert +blumofei +gener +bobbi +last +name +pronounc +bloom +gener +informationassist +professor +ofcomput +scienc +univers +texa +austin +comput +scienc +massachusett +institut +technolog +comput +scienc +massachusett +institut +technolog +comput +scienc +brown +univers +work +cilkmultithread +languag +runtim +system +laboratori +experiment +softwar +system +less +compil +list +paper +paper +document +also +avail +directori +semest +spring +teach +abstract +data +type +contact +informationemail +utexa +eduphon +offic +taylor +hallpost +depart +comput +sciencestaylor +hall +univers +texa +austinaustin +texa +last +modifi +decemb +robert +blumoferdb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..9ab441f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,47 @@ +wang +homepag +wangwelcom +current +construct +page +content +time +permit +current +candid +depart +comput +scienc +theunivers +texa +austin +research +interest +includ +real +time +system +rule +base +program +time +analysi +softwar +engin +artifici +intellig +comput +publicationsi +list +public +avail +brows +interest +page +last +updat +pleas +send +comment +rhwang +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..1475873a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,124 @@ +risto +miikkulainenristo +miikkulainenassoci +professor +computersci +univers +oftexa +austin +comput +scienc +ucla +appli +mathemat +helsinki +univers +technolog +research +intereststh +research +group +concentr +model +cognit +processeswith +artifici +neural +network +current +work +includ +model +languageacquisit +episod +memori +self +organ +visual +cortex +schema +base +vision +also +work +evolv +neural +networkswith +genet +algorithm +goal +automat +discoversequenti +decis +strategi +problem +solv +robot +detail +utc +neural +network +research +group +home +page +classessumm +artifici +intellig +undergradu +lectur +cours +fall +neural +network +undergradu +lectur +cours +fall +neural +network +graduat +seminar +spring +artifici +intellig +graduat +lectur +cours +fall +neural +network +undergradu +lectur +cours +fall +cognit +scienc +graduat +seminar +spring +artifici +intellig +graduat +lectur +cours +contact +inform +offic +taylor +hall +email +address +risto +utexa +phone +postal +address +depart +comput +scienc +univers +texa +austin +austin +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..4c35f220 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,2 @@ +rong +bigfoot diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..fc33dd36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,67 @@ +rajmohan +rajaraman +home +pagerajmohan +rajaraman +graduat +student +depart +comput +scienc +atth +univers +texa +ataustin +plan +complet +spring +mydissert +supervisor +gregplaxton +research +member +algorithm +andcomput +theori +group +particularli +interest +incombinator +distribut +network +algorithm +onlin +algorithm +parallel +model +comput +random +list +mypubl +curriculum +vita +us +link +relat +comput +sciencemiscellan +linkscontact +inform +email +rraj +utexa +home +austin +phone +offic +ephon +postal +univers +texa +austin +depart +comput +scienc +austin +rraj +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..929525aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,104 @@ +rong +homepagea +rong +homepag +chinachina +homepag +jinan +myhometown +capit +shandong +provinc +tsinghua +univers +gotmi +depart +computersci +technolog +beij +capit +ofth +peopl +republ +ofchina +spent +five +colleg +year +zhai +sinanet +homepag +austin +texasaustin +citi +live +weather +austin +apart +rent +austin +graduat +utcsth +univers +texa +austin +depart +comput +scienc +utnetcat +browsabl +onlin +catalog +librari +utaccess +austininform +servic +login +utcat +utaccess +cours +fall +oper +systemsdynam +file +replic +final +project +comput +graphicsc +introduct +mathemat +logicc +introduct +graduat +comput +scienc +moviesaustin +chronicl +film +time +yahoo +entertain +movi +filmsmicrosoft +cinemania +onlineal +movi +guidehollywood +onlineinternet +movi +databaserog +ebert +moviesvisit +movi +page +contactmail +address +river +aaustin +telephon +emailrtan +utexa +fingerclick +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..f1c4af63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,81 @@ +rupert +tang +home +page +rupert +tang +student +dept +comput +scienc +univers +texa +austin +comput +scienc +univers +texa +austin +austin +almost +miracl +modern +teach +method +entir +strangl +holi +curious +inquiri +delic +littl +plant +need +anyth +besid +stimul +freedom +think +life +would +realli +empti +depriv +opportun +alwai +choos +altern +distast +deni +actual +littl +wish +aspir +fear +duress +fate +would +think +life +much +differ +truck +wash +machin +nice +meet +complet +cool +servic +know +academ +interest +research +messi +area +page +construct +alwai +rupert +utexa +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..b3cf3339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,18 @@ +ruwei +homepag +frame +alert +see +messag +us +browser +support +frame +click +frame +version +document +could +download +netscap +navig diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..a51e074b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,121 @@ +robert +geijn +robert +geijnassoci +professor +depart +comput +scienc +texa +institut +comput +appliedmathemat +univers +texa +austin +austin +phone +mail +rvdg +utexa +http +utexa +user +rvdg +mathemat +comput +scienc +univers +wisconsin +madison +appli +mathemat +univers +maryland +colleg +park +area +interestnumer +analysi +parallel +supercomput +scientif +comput +summari +researchth +introduct +parallel +comput +forc +evalu +oftradit +numer +method +develop +sequentialmachin +case +techniqu +continu +us +inoth +case +method +prove +perform +better +researchconcentr +develop +parallel +techniqu +forimpl +numer +method +well +environ +allowssuch +method +easili +implement +variou +parallelprocessor +inform +graduat +program +comput +scienc +comput +appli +mathemat +workshop +parallel +infrastructur +applic +april +austin +current +project +intercom +project +plapack +project +sl_librari +public +book +journal +public +confer +public +technic +report +tutori +major +softwar +effort +class +fall +schedul +current +former +student +meet +famili diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..5ecd30a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,48 @@ +sundeep +abraham +home +page +seed +scrollit_rl +seed +timertwo +window +settimeout +els +seed +sundeepabraham +master +student +computersci +dept +universityof +texa +austin +undergradu +studi +comput +sciencesand +engin +region +engg +colleg +calicut +india +home +countri +india +india +hail +state +kerala +know +contact +click +home +page +construct +tinkerwith +time +time +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..c418f3a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,173 @@ +sammi +altavista +startingpoint +yahoo +lyco +map +refer +weather +white +page +yellow +page +depart +comput +scienc +univers +texa +austin +sammi +utexa +congress +shall +make +respect +establish +religion +orprohibit +free +exercis +thereof +abridg +freedom +ofspeech +press +right +peopl +peaceabl +toassembl +petit +govern +redress +grievanc +constitut +unit +state +america +research +prof +calvin +prof +robert +geijn +project +parallel +linear +algebra +packag +schedul +artifici +intellig +topic +distribut +comput +parallel +languag +compil +advanc +oper +system +project +experi +world +wide +herbarium +plapack +sign +page +hypertext +code +anagram +server +univers +texa +depart +comput +scienc +librari +librari +friend +nil +virtual +reker +yanni +musician +jeff +hockei +andrea +hamilton +technolog +famili +pop +guyer +public +health +northwestern +anthropolog +kate +nate +activ +texa +swim +swim +link +yellow +page +swim +entertain +showbiz +austin +chronicl +pollstar +concert +databas +comput +html +quick +refer +guid +research +dell +fring +ryder +laptop +info +world +wide +consortium +miscellan +boston +hotlist +list +traveloc +construct +construct +offic +address +taylor +hall +depart +comput +scienc +univers +texa +austin +austin +home +address +great +hill +austin +sammi +utexa +eduth +opinion +express +mine +necessarili +repres +view +univers +texa +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..2c24e1b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,63 @@ +sawadajun +sawadacontact +inform +email +address +sawada +utexa +offic +main +mail +address +univ +texa +depart +comput +scienc +taylor +hall +austin +home +mail +address +wooten +austin +dissert +oral +propos +time +place +abstract +propos +paper +file +supplementari +technic +report +file +kbresourc +common +lisp +languag +edit +bowen +formal +method +page +formal +method +around +world +pvsother +frequent +access +page +austin +depart +teacher +fellow +comput +logic +boyer +class +inform diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..92456d61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,243 @@ +ssgrg +titl +pagewelcom +home +page +softwar +system +gener +research +group +softwar +system +gener +tool +assembl +complex +softwar +interchang +reusabl +compon +develop +genvoca +domain +independ +model +softwar +construct +defin +system +algebra +equat +term +compon +genvoca +successfulli +appli +mani +domain +includ +databas +manag +system +avion +data +structur +result +demonstr +genvoca +gener +substanti +improv +product +applic +time +perform +first +visit +question +best +place +start +take +look +get +start +research +group +member +get +start +project +index +public +softwar +distribut +relat +page +utc +gener +member +batori +professorangela +dappert +studentguillermo +jimenez +perezph +studentjeff +thomasph +studentl +tokuda +studentyanni +smaragdaki +studentk +shepherdresearch +associateform +member +graduat +datesdinesh +dasph +milli +villarrealph +decemb +bart +geracipostdoc +marti +sirkinph +march +sankar +dasarim +overview +get +start +softwar +compon +us +gener +build +softwar +system +typic +softwar +modul +compon +encapsul +featur +domain +mani +system +domain +share +possibl +compon +must +encapsul +refin +mani +differ +part +class +softwar +system +refin +requir +manipul +metadata +reflect +comput +thu +like +basic +approach +goe +beyond +simpl +object +orient +larg +scale +program +transform +feel +basic +issu +involv +breadth +genvoca +applic +recommend +follow +paper +starter +read +order +scalabl +softwar +librari +creat +refer +architectur +design +implement +composit +valid +subject +look +specif +result +improv +product +perform +deliv +gener +relationship +work +design +pattern +check +order +reengin +complex +applic +lightweight +dbm +gener +memori +simul +softwar +generatorsautom +softwar +evolut +design +pattern +inform +pleas +contact +batori +batori +utexa +period +releas +lectur +note +tutori +softwar +system +gener +architectur +reus +avail +lectur +note +distribut +file +contain +compress +postscript +file +last +modifi +decemb +batori +batori +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..d07b70d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,192 @@ +scott +realli +basic +home +pagescott +realli +basic +home +pageokai +long +overdu +slight +updat +page +doesn +mean +go +look +fanci +expect +littl +least +littl +current +interest +inform +work +stuff +univers +texa +austin +depart +finish +year +try +think +mani +oop +group +research +group +work +within +utc +interest +system +languag +research +think +class +object +orient +design +instructor +class +glenn +down +amherst +colleg +great +place +spend +undergradu +dai +time +spent +unpredict +amherst +colleg +depart +santa +institut +amaz +place +spent +summer +work +differ +kind +research +orient +hobbi +stuffit +perfect +linux +invalu +texa +squash +home +pageth +mead +home +page +like +wine +beer +doesn +stuff +psion +maker +cool +palmtop +without +would +forget +name +peopl +stuff +anastasi +want +peopl +check +page +well +try +sharp +amaz +home +page +much +free +time +neat +stuff +miscellan +item +particular +order +austin +check +austin +dine +guid +actual +page +inform +austin +think +import +part +citizen +poke +good +humor +amherst +publish +appl +comput +still +thing +right +iici +last +forev +ala +longer +sure +bright +futur +type +machin +bebox +look +like +could +neat +simpl +page +better +noth +probabl +didn +wait +five +minut +load +right +want +send +email +page +maintain +sfkaplan +utexa +might +want +grab +includ +link +inform +encrypt +gener diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..7b5751fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,52 @@ +prashant +shenoi +home +page +prashant +shenoi +welcom +home +page +get +touch +email +shenoi +utexa +river +austin +univers +texa +austin +depart +comput +scienc +austin +offic +main +tower +floor +main +build +inform +finger +shenoi +utexa +also +check +log +multimedia +group +depart +comput +scienc +univers +texa +austin +list +recent +public +avail +onlin +comment +shenoi +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..59b5f1e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,40 @@ +shaob +cyberhom +welcom +cyberhom +shaob +current +student +depart +comput +scienc +univers +texa +austin +want +know +check +ongo +work +hardvar +verifc +comput +network +vlsi +final +project +fall +bookshelf +coffe +tabl +campu +austin +citi +make +contact +pleasant +vallei +austin +shma +utexa +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..7b871f4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,115 @@ +shailesh +kumarshailesh +kumarth +univers +texa +austindepart +comput +sciencestaylor +hall +austin +skumar +utexa +edumi +resumeresearch +interest +publicationscontact +mesrcm +spiritu +affili +internet +offersom +linkscognit +scienceutc +neural +network +researchutc +machin +learn +research +groupresearch +interest +artifici +intellig +artifici +life +neuroevolut +neural +network +applic +genet +algorithm +cellular +automata +chao +nonlinear +dynam +fuzzi +logic +massiv +parallel +processor +publicationson +line +adapt +signal +predistort +dual +reinforc +learn +page +patrick +goetz +shailesh +kumar +risto +miikkulainen +comput +appli +mathemat +univers +texa +austin +depart +comput +scienc +univers +texa +austin +machin +learn +proceed +annual +confer +bari +itali +object +base +evolut +program +shailesh +kumar +bord +singh +symposium +genet +algorithm +aprl +india +contact +snail +mail +whiti +avenu +austin +phone +home +offic +internet +offernet +assistancesearch +tool +qualiti +institutewww +infoindia +music diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..a6a4e553 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,58 @@ +smaragdaki +yanni +utc +yanni +smaragdaki +utc +posit +graduat +student +interest +alchemi +project +turn +lead +gold +moder +success +plan +scheme +make +arrang +someth +project +program +webster +world +dictionari +serious +student +depart +comput +scienc +main +research +interest +area +meta +program +system +applic +particularli +softwar +gener +research +photo +album +favorit +sitessmaragd +utexa +eduyanni +smaragdakisunivers +texa +austin +comput +scienc +departmenttai +austin +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..b900cade --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,66 @@ +homepag +edward +danger +construct +site +fall +asphalt +bodi +resum +tell +like +danc +shadow +moonlight +click +email +email +utexa +person +data +strictli +need +know +basi +pleas +send +effort +duli +note +pictur +interest +link +greatest +experi +stimul +nerv +center +wouldn +mean +absolut +pleas +ever +dy +ignor +previou +link +link +minion +minion +minion +recent +addit +field +trip +pania +haiku +like +leaf +afloat +wind +stream +eddi +waterfal +mean +life +visitor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..788ef002 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,37 @@ +southerart +southerresearchbuild +member +knowledg +base +system +research +group +contact +inform +email +souther +utexa +work +mail +comput +scienc +depart +univers +texa +austin +austin +texa +hotlist +search +site +search +page +search +email +address +search +public +search +tech +reportsouth +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..6cacbd00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,75 @@ +sowmya +ramachandransowmya +ramachandranmachin +learn +research +groupunivers +texa +austinresearchmi +research +area +machin +learn +field +ofartif +intellig +interest +problem +learningbayesian +network +exampl +learn +bayesian +network +withhidden +variabl +challeng +approach +appli +symbol +connectionist +theori +revis +techniqu +address +thisproblem +also +interest +design +creat +multimediaappl +resum +list +paper +educ +comput +scienc +rutger +univers +tech +comput +scienc +indian +institut +technolog +madra +india +contact +inform +offic +taylor +hall +phone +email +address +sowmya +utexa +postal +address +univers +texa +austin +depart +comput +scienc +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..fdad8341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,62 @@ +sriram +home +page +sriram +raocurr +research +involv +design +implement +multimedia +file +systemoper +system +support +multimediai +work +multimediagroup +comput +sciencesdepart +univers +texa +austin +advisor +prof +harrickvinpublicationsminegroupcontact +informationofficetai +email +sriram +utexa +edudepart +comput +scienc +univers +texa +austinaustin +miscellaneousotherinterest +pagespicturesof +toweraustin +isth +capit +texa +locat +central +texa +hill +countri +click +herefor +inform +austin +kannada +koota +click +informationabout +austin +tamil +sangam +comment +pleas +free +send +mail diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..d2a076f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,17 @@ +tiger +frame +alert +see +messag +us +frame +challeng +browser +click +frame +version +document +could +download +netscap +navig diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..db704abf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,70 @@ +sunghe +choisunghe +choiwelcom +home +page +page +construct +master +program +depart +comput +scienc +univers +texa +austin +educ +comput +engin +seoul +nation +univers +seoul +korea +august +work +experi +present +system +administr +depart +chemic +engin +univers +texa +austin +graduat +research +assist +prof +aloysiu +utc +real +time +system +research +group +contact +inform +work +home +nuec +austin +click +list +machin +current +log +click +finger +author +sunghe +choiemail +sunghe +utexa +edulast +updat +septemb +utc +home +home diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..96914aa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,101 @@ +home +page +sheetal +kakkadsheet +kakkadcontact +inform +offic +taylor +hall +postal +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +usual +avail +offic +best +reach +isvia +email +full +finger +inform +research +informationi +member +oop +research +group +inth +depart +comput +scienc +univers +texa +austin +part +research +implement +persist +storagesystem +call +texa +provid +easi +persist +us +novel +techniqu +call +pointer +swizzl +page +faulttim +effici +support +larg +address +standard +hardwar +inform +pleas +list +mypubl +along +brief +descript +plan +graduat +comput +scienc +myresum +avail +postscript +current +work +motorola +somerset +design +center +whilefinish +comput +scienc +januari +sheetal +kakkad +depart +comput +scienc +univers +texa +austin +svkakkad +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..b3ad5e9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,70 @@ +home +page +shengm +welcom +shengm +homepageabout +student +depart +comput +scienc +univers +texa +austin +want +know +check +classmatesclass +univers +scienc +technolog +china +ustc +class +graduat +school +chines +academi +sciencesus +linksut +campu +registrar +gradaut +studiesut +libraryutaccesschines +student +scholar +associationchina +chinesechinainternet +distribut +chines +magazinestsinghua +bbsncic +bbschines +novelschines +classicsabout +austinwhat +weather +todai +austin +citylimitsclassifi +item +sale +austinto +contact +address +medic +art +austin +texa +voic +email +utexa +finger +meyour +comment +suggest +highli +appreci +visitorsinc +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..e0322ce1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,17 @@ +wang +frame +alert +see +messag +us +browser +support +frame +click +frame +version +document +could +download +netscap +navig diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..4bc961cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,5 @@ +welcom +home +page +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..ba4dc653 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,34 @@ +renu +tewarirenu +tewariwhat +addresshom +austin +home +email +tewari +utexa +work +multimedia +comput +dept +comput +scienc +univers +texa +austin +austin +public +work +done +internship +watson +research +center +plai +interest +site +bore +send +comment +name +option diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..9cd2fc3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,109 @@ +tumlin +pierc +photo +brenda +ladd +photographi +tumlin +pierc +interest +random +stuff +come +soon +stai +tune +research +interest +issu +secur +distribut +system +present +studi +formal +logic +develop +analyz +authenticationprotocol +done +work +formal +method +synthesi +specif +verif +parallel +distribut +system +amwork +paper +synthesi +resourc +control +communicatewith +client +mean +queu +messag +draft +paper +synthesi +distribut +control +system +addit +student +research +assist +appli +research +laboratori +current +work +project +investig +us +evolutionari +comput +techniqu +genet +algorithm +develop +finit +state +machin +inform +project +click +resum +avail +html +postscript +format +contact +inform +offic +taylor +hall +phone +email +address +tumlin +utexa +postal +address +metric +blvd +austin +page +last +updat +novemb +page +access +time +sinc +novemb +tumlin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..3c78039f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,273 @@ +tong +wang +home +page +netscap +recommend +view +chines +listen +page +page +contain +java +applet +visit +mpeg +viewer +demo +page +written +pure +java +tong +wang +nanj +nank +peopl +republ +china +current +program +comput +scienc +depart +univers +texa +austin +seek +full +time +resum +html +format +click +postscript +format +shanghai +jiao +tong +univeristi +shanghai +china +tsinghua +univers +beij +china +jersei +summerluc +technolog +system +technolog +compani +form +result +plan +restructur +bell +laboratori +compani +work +thissumm +anoth +homepag +lucent +technolog +life +austin +cours +work +spring +fall +fall +distribut +comput +prof +misra +commun +network +prof +gouda +theori +comput +prof +zuckerman +distribut +comput +prof +misra +databas +system +implement +prof +batori +algorithm +techniqu +theori +prof +plexton +comput +graphic +prof +fussel +network +perform +prof +multimedia +system +prof +teach +assist +introduct +oper +system +fall +program +spring +databas +implement +fall +present +fault +toler +clock +synchron +distribut +real +time +system +april +mobil +host +protocol +project +mini +databas +manag +system +unix +cours +project +network +design +tool +unix +standard +compon +libari +lucent +robot +opengl +glut +unix +cours +project +mpeg +decod +player +java +java +unix +cours +project +work +plai +mpeg +page +semest +know +troubl +made +visit +mpeg +viewer +demo +page +music +favorit +recommend +mariah +boyz +babyfac +movi +sound +clip +sampl +misc +chines +netscap +china +chines +zodiac +person +associ +differ +anim +kind +anim +associ +find +page +friend +high +school +attach +nanj +normal +univers +friend +haiq +friend +shanghai +jiao +tong +univers +maintain +shenfeng +chen +thank +friend +tsinghua +univers +quit +chines +learn +info +perl +java +tutori +reach +lake +austin +blvd +austin +austin +austin +twang +utexa +page +still +underconstruct +check +time +like +never +leav +eagl +copyright +tong +wang +creat +last +modifi +background +song +deskmat +perform +lang +visitor +accord +counter +sinc +trust +book diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..c3174c44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,96 @@ +hermjakob +home +pageulf +hermjakobhello +welcom +graduat +student +thedept +comput +scienc +univers +texa +austinand +work +dissert +aboutexampl +base +decis +make +context +orient +pars +machin +translationund +supervis +prof +raymond +moonei +activ +utc +natur +languag +acquisit +groupand +utc +machin +learn +research +group +place +interest +associ +comput +linguist +signll +special +interest +group +natur +languag +learn +comput +languag +print +archiv +european +student +associ +search +engin +altavista +einet +galaxi +infoseek +lyco +yahoo +new +thing +consid +dernir +nouvel +alsac +deutsch +well +focu +york +time +spiegel +svenska +dagbladet +tagesspiegel +vanguardia +welt +zeitplusacm +austin +weather +resourc +perman +address +moltkestr +bnde +germanyphon +voic +last +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..5af546a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,51 @@ +lanc +tokuda +univers +texa +austin +lanc +tokuda +softwar +system +gener +research +group +depart +comput +scienc +univers +texa +austin +unicron +utexa +twelv +time +intramur +sport +champion +directori +entertain +financ +intramur +sport +magic +gather +new +organ +peopl +refer +research +schedul +softwar +sport +home +austin +texa +offic +taylor +schedul +perman +heeia +street +kaneoh +hawaii diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..9879e371 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,32 @@ +home +page +balayoghanv +balayoghancontact +informationemail +utexa +eduoffic +painter +hall +telephon +postal +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +click +send +email +finger +account +find +whether +log +ineosdi +bookmarksvbb +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..0a3e2837 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,288 @@ +harrick +vinharrick +assist +professor +comput +scienc +atth +univers +texa +ataustin +director +distribut +multimediacomput +laboratori +educ +tech +comput +scienc +engin +indian +institut +technolog +bombai +comput +scienc +colorado +state +univers +comput +scienc +univers +california +diego +honor +award +profession +servic +career +award +faculti +develop +award +research +initi +award +diego +supercomput +center +creativ +comput +award +innov +award +editori +board +ieee +multimedia +vice +chair +area +distribut +multimedia +system +intern +confer +distribut +comput +system +icdc +chair +program +committe +multimedia +comput +andnetwork +chair +program +committe +multimedia +comput +andnetwork +member +program +committe +multimedia +electronicimag +multimedia +system +beij +china +novemb +intern +confer +network +multimedia +kaohsiung +taiwan +decemb +eurograph +workshop +multimedia +rostock +germani +multimedia +second +internationalconfer +distribut +multimedia +system +applic +third +intern +eurograph +symposium +multimediasystem +area +interestmultimedia +system +high +speed +network +databas +mobilecomput +distribut +system +summari +research +main +object +research +design +implement +anend +system +architectur +enabl +wide +rang +ofdistribut +multimedia +applic +specif +develop +integr +multimedia +file +system +algorithm +protocolsfor +effici +transmiss +digit +audio +video +network +larg +scale +multimedia +databas +select +recent +public +shenoi +effici +failur +recoveri +inmulti +disk +multimedia +server +proceed +annualintern +symposium +fault +toler +comput +ftc +pasadena +california +page +june +gemmel +kandlur +venkat +rangan +row +multimedia +storag +server +tutori +ieee +comput +page +goyal +optim +placement +ofmultimedia +object +disk +arrai +proceed +ieeeintern +confer +multimedia +comput +system +icmc +washington +page +goyal +determin +delaybound +heterogen +network +proceed +thintern +workshop +network +oper +system +support +fordigit +audio +video +nossdav +durham +hampshir +april +goyal +goyal +algorithm +designingmultimedia +server +comput +commun +page +march +sponsor +research +work +sponsor +variou +industri +federalinstitut +includ +intel +nation +scienc +foundationresearch +initi +award +career +award +nasa +mitsubishi +electricresearch +laboratori +merl +microsystem +electrospacesystem +univers +texa +austin +cours +advanc +oper +system +multimedia +system +multimedia +commun +databas +contact +inform +email +utexa +phone +mail +address +depart +comput +scienc +taylor +hall +univers +texa +austin +austin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..fc59b813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,74 @@ +vipin +home +page +vipin +home +haven +found +time +thing +shall +updat +soon +yeah +know +color +match +pictur +cours +interest +report +activ +guestbook +resum +pleas +give +graduat +student +univers +texa +austin +depart +comput +scienc +undergraduatefrom +delhi +india +academ +interestscours +work +list +less +incomplet +haven +pass +semest +take +risk +put +interestsreportsy +visitor +number +go +reset +number +increas +never +decreas +contact +medic +art +street +austin +log +sure +sign +guestbook +though +mani +guest +comment +suggest +pleas +send +email diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..0121e188 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,173 @@ +vladimir +lifschitzwhen +feel +burden +downcast +human +mind +gladli +turn +therealm +mathemat +lucid +precis +grasp +object +isobtain +insight +gain +pleasantli +appropri +conceptform +human +spirit +feel +home +paul +bernai +vladimir +lifschitzgottesman +famili +centenni +professor +incomput +sciencesat +univers +texasat +austin +fellow +theamerican +associ +forartifici +intelligenceb +mathemat +petersburg +univers +russia +mathemat +petersburg +branchof +steklov +mathemat +institut +russia +area +interesttempor +reasoningand +reason +aboutactionslog +programmingand +nonmonoton +reasoningteachingoth +profession +activitiespap +line +lectur +note +survei +research +paper +lifschitz +research +paper +dissert +bylifschitz +studentsrecommend +read +edsger +dijkstra +convoc +speechgood +new +madelein +albright +nomin +secretari +state +germani +becam +better +place +live +germani +regain +book +taken +soviet +armi +daniel +ortega +lost +need +recycl +paper +helm +burton +actbad +new +wang +sentenc +year +prison +tortur +us +polic +democrat +countri +sequest +import +archeolog +evid +world +close +societynot +race +problem +america +black +elect +recent +redrawn +district +california +civil +right +initi +ratio +white +black +finish +high +school +admit +student +race +neutral +basisoth +note +amnesti +intern +scientist +scienc +favorit +stori +three +silli +joke +quot +monthcontact +inform +offic +taylor +hall +phone +number +offic +postal +address +depart +comput +sciencesunivers +texa +austinaustin +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..1d4c6f72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,76 @@ +vijaya +ramachandranvijaya +ramachandranblakemor +regent +professor +comput +scienc +univers +texa +austin +princeton +univers +research +interestsmi +research +interest +algorithm +comput +theori +primarilyin +area +parallel +comput +algorithm +design +includ +design +analysi +effici +parallel +sequenti +algorithm +design +evalu +model +parallel +machin +experiment +evalu +algorithm +access +copi +recent +paper +mine +complet +list +public +avail +vita +offici +faculti +profil +contact +inform +offic +taylor +hall +email +address +utexa +postal +address +depart +comput +scienc +univers +texa +austin +austin +texa +number +visit +page +sinc +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..4d9b2077 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,369 @@ +algorithm +comput +theori +grouput +algorithm +comput +theori +groupth +algorithm +comput +theori +group +focus +theoret +foundat +comput +scienc +current +research +interest +faculti +group +includ +algorithm +design +complex +theori +parallel +comput +graph +theori +probabilist +method +major +focu +group +design +analysi +provabl +effici +algorithm +solv +fundament +comput +problem +effici +measur +term +differ +resourc +time +space +number +processor +number +random +bit +faculti +greg +plaxton +plaxton +utexa +parallel +comput +algorithm +design +analysi +combinator +lower +bound +random +vijaya +ramachandran +utexa +parallel +comput +algorithm +design +analysi +machin +model +graph +theori +graph +algorithm +david +zuckerman +utexa +random +comput +complex +theori +random +walk +graph +theori +cryptographi +affili +folk +postdoc +student +alumni +sanjoi +baruah +sanjoi +emba +tsan +sheng +tshsu +sinica +pierr +kelsen +kelsen +madhukar +korupolu +madhukar +utexa +phil +mackenzi +philmac +idbsu +ramgop +mettu +ramgop +utexa +poon +ckpoon +utexa +rajmohan +rajaraman +rraj +utexa +santanu +sinha +ssinha +utexa +torsten +suel +suel +berkelei +yuke +zhou +yuke +utexa +algorithm +mail +list +algorithm +mail +list +electron +mail +list +announc +relat +seminar +activ +theoret +comput +scienc +post +lowvolum +mail +list +typic +dozen +messag +semest +mail +list +send +mail +messag +utexa +express +interest +ad +algorithm +mail +list +remov +name +mail +list +time +send +messag +request +remov +gripe +utexa +warm +warm +stand +workshop +algorithm +research +themidsouth +midsouthwest +forum +research +theoret +comput +scienc +texa +surround +state +meet +twice +year +differ +locat +meet +consist +sever +talk +research +region +recent +research +result +often +distinguish +keynot +speaker +first +warm +organ +vijaya +ramachandran +atut +austin +spring +greg +plaxton +organizedanoth +warm +austin +fall +warm +held +sever +locat +includ +texa +dalla +southern +methodist +univers +univers +north +texa +louisiana +state +univers +univers +southwestern +louisiana +univers +oklahoma +next +warm +schedul +beheld +dalla +novemb +program +announc +warm +sent +algorithmsmail +list +becom +avail +also +usuallytri +pool +attend +warm +take +place +outsid +ofaustin +announc +regard +arrang +also +sent +algorithm +mail +list +sigact +sigact +stand +special +interest +group +algorithmsand +comput +theori +activ +group +thatinclud +mani +distinguish +comput +scientist +sigact +sponsorsth +symposium +theori +comput +stoc +sponsor +siam +symposium +discret +algorithm +soda +symposium +parallel +algorithm +andarchitectur +spaa +import +confer +interestar +ieee +symposium +foundat +comput +scienc +foc +ieee +symposium +comput +complex +vijaya +ramachandran +serv +elect +member +thesigact +execut +committe +us +pointer +sigact +theori +calendar +eccc +electron +colloquium +comput +complex +virtual +rolodex +bibliographi +theori +foundat +comput +scienc +hypertext +bibliographi +project +univers +dept +comput +scienc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..05383f31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,11 @@ +srinivasan +vaidyaraman +srinivasan +vaidyaraman +email +utexa +offic +phone +offic +phone +home diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..cf9b3724 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,50 @@ +sengul +vurgun +sengulvurgun +background +interestsi +student +comput +scienc +depart +ammainli +interest +artifici +intellig +neural +network +evolutionaryalgorithm +term +paper +topic +interest +order +ofprefer +memori +represent +knowledg +howto +retriev +learn +theori +problem +solv +cognit +skillacquisit +search +understand +visual +attent +connectionist +approach +architectur +mindto +contact +mepost +comput +scienc +austin +austin +usavoic +vurgun +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..04e98ee5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,11 @@ +walbourn +home +page +chuck +walbournmi +person +page +locat +charybdi +enterpris +server diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..b17d2d28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,76 @@ +henri +mackai +walker +henri +mackai +walker +visit +senior +lectur +depart +comput +scienc +univers +texa +austin +professor +mathemat +comput +scienc +depart +mathemat +comput +scienc +grinnel +colleg +walker +math +grin +edua +regular +tenur +member +grinnel +colleg +faculti +professorwalk +visit +univers +texa +austin +period +teachand +variou +profession +activ +formal +appoint +comput +scienc +depart +follow +academ +year +summer +fall +complet +inform +avail +professor +walker +home +page +atgrinnel +colleg +http +math +grin +walker +creat +septemb +last +revis +septemb +photograph +jack +robertson diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..084da79c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,89 @@ +home +page +lane +warshawlan +warshaw +mike +cool +senior +comput +scienc +student +recent +accept +graduat +school +univers +texa +austin +graduat +work +area +activ +databas +rule +base +system +current +posit +appli +research +laboratoryinvolv +maintain +venu +rule +base +languag +developedat +univers +texa +austin +mirank +andat +appli +research +laboratori +lanc +obermey +first +item +anoth +third +follow +list +research +paper +warshaw +mirank +case +studi +venu +declar +basi +rule +modul +unpublish +warshaw +mirank +case +studi +venu +declar +basi +rule +modul +confer +inform +knowledg +manag +contact +mepost +comput +scienc +austin +austin +usavoic +home +warshaw +arlut +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..850581f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,65 @@ +chen +chen +master +student +comput +scienc +dept +austin +texa +comput +scienc +austin +decemb +mathemat +austin +august +mathemat +fudan +univers +china +juli +offic +phone +email +math +utexa +wchen +utexa +mail +address +center +numer +analysi +univers +texa +austin +austin +us +link +unix +book +expect +perl +site +java +java +demo +rosett +refer +manual +rosett +program +exampl +common +gatewai +interfac +sampl +program +pleas +click +load +file +comment +wchen +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..459daa10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,69 @@ +home +page +paul +wilson +paul +wilson +contact +inform +offic +taylor +hall +postal +address +univers +texa +austin +depart +comput +scienc +taylor +hall +austin +best +reach +email +ltwilson +utexa +usual +headshot +novelti +thought +cross +section +research +informationi +lead +oop +research +group +depart +comput +scienc +univers +texa +austin +oop +object +orient +program +system +research +group +workson +memori +manag +program +languag +design +implement +teachingin +fall +teach +program +languag +depart +comput +sciencesnot +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..0913e69c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,177 @@ +arthur +homepag +cent +becam +christiani +made +major +decis +life +believ +jesu +christ +alittl +month +come +totallyunexpect +though +religi +studi +compulsori +class +thechristian +high +school +attend +hong +kong +intent +tobecom +christian +time +thought +record +jesu +slife +bibl +quit +credibl +teach +good +take +time +think +deepli +relat +mylif +also +misconcept +mean +becom +christianwa +clear +came +past +thought +differ +christianand +wrong +thought +lovedeveri +matter +whether +decid +achristian +also +us +think +true +christian +shouldb +virtuou +holi +good +enough +christian +howev +listen +friend +church +thefellowship +realiz +thing +true +christian +also +sin +born +theywil +also +stumbl +wrong +howev +becam +flesh +though +hei +sinless +abl +sympath +weak +weconfess +sin +faith +forgiv +sin +andto +cleans +unright +john +differencebetween +christian +peopl +trust +jesu +christ +astheir +saviour +gratefulli +accept +christ +redempt +fortheir +sin +therefor +justifi +faith +without +deed +ofth +roman +doubt +christian +live +holi +life +wedo +holi +becom +christian +thecontrari +reli +strength +givesu +becom +holi +jesu +said +whole +need +nota +physician +sick +matthew +count +onour +strength +abl +becom +holi +know +number +dai +ought +seek +thetruth +earli +hesit +start +seek +thankgod +lead +give +opportun +realli +know +wkmak +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..09cecfec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,270 @@ +hello +visitor +number +topic +research +interest +interest +pictur +robot +work +research +robot +work +educ +robot +work +public +hotlist +person +interest +miscellan +send +comment +contact +inform +research +interest +mobil +robot +explor +learn +navig +intellig +control +research +worki +primarili +spatial +semant +hierarchi +approach +mobilerobot +explor +learn +navig +interest +softwar +engin +develop +area +rang +artifici +intellig +includ +machin +learn +neural +network +qualit +reason +machin +vision +oper +system +network +system +embed +system +graphic +user +interfac +multimedia +pictur +robot +work +research +robot +work +spot +real +world +interfac +robot +ring +ultrason +rang +sensor +rover +home +built +tall +robot +ring +ultrason +rang +sensor +rhino +robot +manipul +commerci +robot +manipul +rhino +robot +educ +robot +work +robocac +robot +worm +built +specif +robofest +organ +robot +group +austin +robokreta +besar +robokreta +kicik +intellig +autonom +car +built +us +chassi +motor +fast +remot +control +race +car +public +paper +robot +andqualit +reason +research +avail +onlin +doctor +dissert +titl +spatial +semant +hierarchi +fora +physic +mobil +robot +also +onlin +hotlist +robot +group +home +page +offici +robot +group +home +page +start +point +internet +explor +engin +technolog +resourc +world +wide +catalog +usenet +frequent +ask +question +robot +internet +resourc +page +internet +resourc +meta +index +ncsa +malaysia +onlin +badminton +homepag +yahoo +send +comment +comment +note +topic +common +interest +pleas +email +wyle +utexa +eduperson +interestsavid +badminton +player +member +unit +state +badminton +associ +usba +badminton +page +member +robot +group +love +plai +guitar +clarinet +interest +page +guitar +interest +page +clarinet +miscellaneousinterest +martial +art +well +martial +art +movi +contact +inform +email +wyle +utexa +offic +taylor +hall +phone +phone +mail +comput +scienc +depart +univers +texa +austin +austin +texa +finger +back +topic +list +utexa +austin +qualit +reason +research +group +page +utexa +austin +robot +research +group +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..e38083c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,10 @@ +contact +xfeng +utexa +address +qaustin +phone +address +west +austin +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..e85fe59b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,111 @@ +xingang +home +page +xingang +photographi +classic +music +audio +sport +travel +visual +welcom +home +page +hang +around +univers +texa +austin +student +depart +comput +scienc +work +distribut +multimedia +comput +head +harrick +year +graduat +program +rank +nation +delight +surpris +realli +surpris +pretti +well +pictur +well +hard +take +pictur +work +pictur +aswel +interest +stuff +line +soon +right +temporaryresort +imagin +hopefulli +time +goe +llgradual +walk +paper +present +clearer +imag +creat +link +find +interest +creat +page +feel +havesometh +watch +frequent +access +pointer +serious +austin +foliag +marvel +search +engin +alta +vista +string +search +infoseek +keyword +search +miata +club +unit +morn +newspap +american +express +financi +travel +student +card +york +time +atlant +monthli +page +china +soccer +major +leagu +group +xguo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..18d77d98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,4 @@ +geoffrei +home +pagemov +address diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..76500ccb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,18 @@ +yang +yang +frame +alert +see +messag +us +frame +challeng +browser +click +frame +version +document +could +download +netscap +navig diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..8f0c9dbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,96 @@ +david +corner +wwwdavid +corner +hello +whatev +took +welcom +make +home +establish +contact +street +austin +depart +comput +scienc +univers +texa +austin +austin +internet +utexa +home +utc +utc +get +busi +research +outsid +work +still +seriou +favorit +quot +word +hope +daili +medit +stuff +classic +guitar +concert +byth +assad +brothersin +hong +kong +art +festiv +must +first +introduc +beauti +classic +guitar +christoph +parkeningi +guitarist +interest +life +stori +tell +grew +tire +ofconcert +retir +reconcili +jesu +christ +rekindl +passion +also +theamsterdam +guitar +trio +french +rich +artist +flair +like +nation +footbal +team +michel +platini +label +franc +magazinepublish +minist +align +absmiddl +sinc +sept +utc +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..6fbb9fea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,17 @@ +yanbin +frame +alert +see +messag +us +frame +challeng +browser +click +frame +version +document +could +download +netscap +navig diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..d3c42fe3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,9 @@ +yuan +home +page +oop +us +browser +pleas +click +continu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..b7d0e299 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,75 @@ +yong +homepageto +yong +homepagey +number +visitor +sinc +yong +beij +china +current +program +comput +scienc +univers +texa +austin +settl +stai +mathemat +graduat +program +rutger +univers +brunswick +year +jersei +beauti +place +wife +tsinghua +univers +beij +china +milanitalian +soccerk +soccernba +sitefox +sportschicago +bullsmichael +jordannflnhlc +rankingmarri +childrenseinfeldcomput +sciencesutilitieshtml +convertersimag +collectionssystemshtmllatexcgitcl +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat +tmiscinternet +travel +network +unit +parcel +servic +unit +state +postal +servic +usp +fedexus +guidefun +todayu +newsstarwavesupermodel +contact +river +street +austin +finger +yonglu +utexa +page +heavi +construct +last +modifi +yong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..0a207cde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,217 @@ +david +young +david +youngashbel +smith +professor +professor +comput +scienc +professor +ofmathemat +director +center +numer +analysi +webb +institut +naval +architectur +mathemat +mathemat +harvard +univers +honor +award +profession +servic +fellow +american +associ +advanc +scienc +award +outstand +contribut +comput +scienc +journal +linear +algebra +applic +special +issueded +chair +appli +mathemat +committe +american +mathematicalsocieti +board +truste +argonn +univers +associ +associ +editor +siam +journal +matric +matrixappl +area +interestnumer +analysi +partial +differenti +equat +numericallinear +algebra +summari +researchmi +research +activ +focus +numer +solut +partialdifferenti +equat +base +finit +differ +methodsand +iter +method +solv +associ +system +oflinear +algebra +equat +involv +matric +larg +andspars +sever +comput +softwar +packag +develop +basedon +research +part +itpack +project +research +beingextend +includ +method +suitabl +share +memori +distributedmemori +parallel +comput +rapidli +converg +iter +methodsbas +parallel +multilevel +procedur +also +beingdevelop +select +recent +publicationsd +young +kincaid +linear +stationari +second +degre +method +solut +larg +linear +system +topic +polynomi +sever +variabl +applic +rassia +srivasiava +yanushauska +world +scientif +publ +compani +singapor +young +vona +ration +iter +method +solv +larg +spars +linear +system +appli +numer +mathemat +young +search +omega +iter +method +larg +linear +system +kincaid +academ +press +young +carei +kincaid +sepehrnoori +vector +parallel +iter +solut +larg +spars +system +pde +scienc +engin +crai +comput +minneapoli +crai +research +young +search +high +level +parallel +iter +spars +linear +system +solver +parallel +supercomput +method +algorithm +applic +graham +carei +john +wilei +son +previou +profil +index +next +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..d54f1689 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,240 @@ +yoonsuck +choe +yoonsuck +choe +photo +ad +student +dept +comput +scienc +univers +texa +austin +comput +scienc +univers +texa +austin +austin +decemb +comput +scienc +yonsei +univers +seoul +korea +august +welcom +homepag +visitor +sinc +research +interest +model +cortic +structur +neural +network +current +work +hand +written +digit +recognit +systembas +laterali +interconnect +synerget +self +organ +featur +lissom +develop +joseph +sirosh +prof +risto +miikkulainen +utc +neural +network +research +group +recent +work +includ +extend +lissom +model +actualspik +event +model +call +spike +lissom +slissom +beself +organ +lissom +segment +multipl +object +retinabi +synchron +spike +within +group +repres +object +desynchron +spike +group +repres +differ +object +research +outlin +also +check +html +book +relat +research +edit +prof +miikkulainen +joseph +sirosh +later +interact +cortex +structur +function +comment +public +relat +public +research +group +utc +group +public +page +yoonsuck +choe +risto +miikkulainen +self +organ +segment +later +connect +spike +neuron +technic +report +depart +comput +scienc +univers +texa +austin +septemb +joseph +sirosh +risto +miikkulainen +yoonsuck +choe +later +interact +cortex +structur +function +electron +book +isbn +yoonsuck +choe +joseph +sirosh +risto +miikkulainen +later +interconnect +self +organ +map +hand +written +digit +recognit +appear +touretzki +mozer +hasselmo +editor +advanc +neural +inform +process +system +cambridg +press +yoonsuck +choe +later +interconnect +self +organ +featur +handwritten +digit +recognit +techic +report +depart +comput +scienc +univers +texa +austin +august +master +thesi +bunch +link +total +unord +click +find +interestingcontact +inform +offic +phone +email +yschoe +utexa +mail +address +univers +texa +austin +depart +comput +scienc +austin +page +maintain +yoonsuck +choe +yschoe +utexa +last +updat +utc +home +home +newsgroup +summari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..d4a92a24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,87 @@ +yuanj +xuwint +break +updat +addresspictur +famili +depart +comput +scienc +univers +texa +austin +austin +texa +spring +time +tabl +select +cours +schedulec +object +orient +softwar +engr +brown +logic +synthesi +aziz +client +server +system +develop +gang +previou +semest +china +pagechines +student +associ +austin +china +alumni +page +work +studi +universityof +scienc +technolog +china +hefei +china +institut +mathemat +chines +academi +scienc +beij +chinaunivers +munich +atmunich +germanyunivers +manchest +manchest +prof +nick +higham +famili +wang +lifan +hong +chen +guizhongustc +yuan +hailiang +yang +yuhongfriend +linsoftwar +program +java +java +perl +common +gatewai +interfac +link +yahoo +publish diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..196905e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +feng +feng +visitor +number +sinc +yufeng +utexa +edufing +public +ring +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..d23b70f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,86 @@ +zhii +chen +home +pagezhii +chenabout +mefrom +guangzhou +canton +peopl +republ +china +current +master +program +comput +scienc +depart +univers +texa +austin +seek +full +time +resum +click +resum +postcript +format +zhongshan +univers +pleas +view +chines +guangzhou +china +life +austin +fall +spring +calculu +fall +comput +architectur +spring +misc +china +chines +zodiac +person +associ +differ +anim +kind +anim +associ +find +page +friend +zhongshan +univers +maintain +john +dong +thank +els +world +wide +info +contact +burton +austin +zchen +utexa +page +still +construct +copyright +zhii +chen +creat +last +modifi +visitor +accord +counter +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..b74c506d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,129 @@ +maggi +xiao +zhou +maggi +xiao +zhou +home +page +educ +assistantship +graduat +student +teach +assist +databas +manag +depart +comput +sciencesat +univers +texa +austin +buaa +beij +china +life +austin +current +work +fall +multimedia +system +algorithm +data +commun +network +distribut +process +work +spring +fall +look +around +campu +kaleidoscop +china +land +beauti +visit +world +peopl +daili +china +new +digest +zhai +chines +magazin +newspag +time +magazin +magazin +entertain +movi +stamp +comput +world +world +comput +societi +ieee +comput +giant +onlin +career +center +compani +home +page +internet +search +yahoo +galaxi +lyco +internet +directori +univers +guid +html +script +onlin +librari +contact +inform +mail +zhouxiao +utexa +http +utexa +user +zhouxiao +offic +main +build +room +campu +offic +phone +address +campu +depart +comput +scienc +taylor +univers +texa +austin +austin +home +page +last +modifi +sept +comment +welcom +send +email +zhouxiao +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..9b41c86f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,110 @@ +qing +homepag +qinghi +friend +welcom +homepag +know +graduat +student +inth +depart +comput +scinc +univers +texa +austin +born +beij +capit +citi +china +bachelor +degre +peke +univers +meet +ofmi +friend +former +classmat +peke +univers +findmor +peopl +pekingunivers +alumni +home +page +oversea +html +enjoi +live +austin +texa +peek +follow +site +know +like +well +find +lot +valuabl +informationand +professionalinternetpc +relatedmac +relatedaft +worknetscap +dynam +document +testtwin +eldertwin +youngernetscap +dynam +document +testanim +danc +titledanc +titl +testanoth +netscap +dynam +document +testyet +anoth +netscap +dynam +document +testfriendsthi +china +travel +site +maintain +former +classmat +xiaohai +best +friend +china +shan +shinan +clike +clike +student +visitor +number +sinc +octob +site +construct +last +modifi +qingunivers +texa +austin +depart +comput +sciencesaustin +zhuqe +utexa diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..2b9acab7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,73 @@ +univ +washington +comput +scienc +organizationsinclud +faculti +staff +student +visitor +organ +ouraffili +program +graduat +student +regioninclud +local +inform +desktop +refer +link +elsewher +spotlightuwwin +pacif +region +intern +student +programmingcontesttwovideo +highlight +educ +initiativesourcolloquia +live +mbonemajordon +intel +corporationdickkarp +receiv +nation +medal +scienceprofessionalmast +program +applic +deadlin +autumn +departmentoverview +theimpact +research +univers +perspect +faculti +staffposit +avail +half +centuri +exponenti +progress +inform +technolog +page +peopl +region +cours +laboratori +research +newscan +handl +tabl +click +univers +washington +seattl +voic +comment +webmast +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..ec665d56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,65 @@ +andrew +berman +home +pageandrew +berman +aberman +washington +educomput +scienc +bourassa +virgil +selberg +erik +tron +process +specif +file +protect +unix +oper +system +bothpostscript +andhtml +proceed +winter +usenix +confer +berman +andrew +data +structur +fast +approxim +match +postscript +format +berman +andrew +shapiro +linda +effici +imag +retriev +multipl +distanc +measur +avail +postscript +format +appear +spie +special +link +wife +debbi +debbi +beauti +daughter +melani +miscellan +poison +donut +stupid +stupidmi +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..fa303a9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,127 @@ +adam +finkelstein +adam +finkelstein +adam +washington +depart +comput +scienc +univers +washington +seattl +washington +start +drink +cup +coffe +hair +limp +andlack +bodi +year +live +good +life +graduat +student +final +finish +doctor +comput +graphic +fall +quarter +post +earli +join +thecomput +scienc +depart +atprinceton +univers +softwar +engin +tibco +formerli +teknekron +softwar +system +palo +alto +wrote +softwar +peopl +trade +stock +undergradu +student +swarthmor +colleg +class +studi +physic +occasion +recent +research +project +find +specif +imag +alarg +databas +imag +sinc +work +someth +call +multiresolut +video +photo +plai +ultim +frisbe +team +calledumatata +address +phone +number +look +plan +file +across +photocopi +photocopi +thehilari +menu +seattl +least +visit +coffe +hous +caff +lardo +recent +chilli +night +visit +snoqualmi +pass +made +excel +view +comet +hyakutak +great +pictur +taken +friend +marcu +cool +imag +made +glass +sculptur +dither +mona +gothic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..3defba13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,291 @@ +jame +ahren +home +page +jame +ahren +home +page +address +comput +scienc +engin +depart +univers +washington +seattl +email +ahren +washington +phone +research +interest +visual +parallel +distribut +comput +scientif +databas +manag +project +univers +washington +databas +environ +vision +research +alamo +nation +laboratori +visual +project +public +jame +ahren +charl +hansen +cost +effect +data +parallel +load +balanc +intern +confer +parallel +process +august +load +balanc +algorithm +typic +improv +program +perform +onunbalanc +dataset +degrad +perform +balanc +dataset +unnecessari +load +redistribut +occur +paper +present +cost +effect +data +parallel +load +balanc +algorithm +whichperform +load +redistribut +possibl +save +outweighth +redistribut +cost +experi +data +parallel +polygonrender +show +perform +improv +factor +onunbalanc +dataset +perform +loss +percent +onbalanc +dataset +us +algorithm +linda +shapiro +steven +tanimoto +jame +brinklei +jame +ahren +jakobovit +lara +lewi +visual +databas +system +data +experi +manag +model +base +comput +vision +proceed +second +base +vision +workshop +februari +paper +present +design +visual +databas +system +data +experi +manag +system +design +gener +scientif +databas +system +motiv +intend +model +base +comput +vision +provid +unifi +data +model +highli +graphic +user +interfac +advanc +queri +facil +interact +laboratori +notebook +system +aid +scientif +experiment +promot +data +share +comput +vision +commun +frank +ortega +charl +hansen +jame +ahren +fast +data +parallel +polygon +render +supercomput +novemb +paper +describ +data +parallel +method +polygon +render +massiv +parallel +machin +method +base +simpl +shade +model +target +applic +requir +fast +render +extrem +larg +set +polygon +set +found +mani +scientif +visual +applic +render +handl +arbitrarili +complex +polygon +need +mesh +issu +involv +load +balanc +address +data +parallel +load +balanc +algorithm +present +render +toolkit +enabl +scientist +displai +shade +polygon +directli +parallel +machin +avoid +transmiss +huge +amount +data +post +process +render +system +jame +ahren +charl +hansen +cost +effect +data +parallel +load +balanc +univers +ofwashington +depart +comput +scienc +engin +april +longer +version +icpp +paper +also +describ +fast +data +parallel +load +redistribut +algorithm +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..80e92ea2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,176 @@ +databas +environ +vision +research +databas +environ +vision +research +overview +databas +environ +vision +research +wasdesign +gener +scientif +databas +system +motiv +andintend +model +base +comput +vision +provid +unifieddata +model +highli +graphic +user +interfac +advanc +queryfacil +interact +laboratori +notebook +databaseenviron +vision +research +aid +scientif +experiment +andpromot +data +share +comput +vision +commun +devr +entiti +store +hierarch +relat +datastructur +schema +entiti +contain +name +ofproperti +part +attribut +relat +among +thepart +graphic +definit +describ +buildinst +specif +visual +studi +mani +differ +imag +databas +research +topic +includ +visual +interfac +multi +level +queri +experi +manag +peopl +princip +investig +linda +shapiro +steven +tanimoto +brinklei +graduat +student +jame +ahren +jakobovit +lara +lewi +public +linda +shapiro +steven +tanimoto +jame +brinklei +jame +ahren +jakobovit +lara +lewi +visual +databas +system +data +experi +manag +inmodel +base +comput +vision +proceed +secondcad +base +vision +workshop +februari +present +overview +devr +project +lara +lewi +linda +shapiro +steven +tanimoto +flexibledata +organ +visual +support +visual +databasesystem +spie +symposium +electron +imag +scienceand +technolog +februari +jakobovit +linda +shapiro +steven +tanimoto +implement +multi +level +queri +databas +environ +vision +research +spie +symposium +electron +imag +scienc +technolog +februari +email +ahren +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..c0528f24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,101 @@ +page +bernheim +washington +start +comput +scienc +graduat +school +univers +washington +page +still +construct +graduat +william +colleg +comput +scienc +math +front +comput +plai +ultim +frisbe +autumn +quarter +class +programminglanguag +automata +principl +ofdigit +system +design +comput +graphic +seminar +parallel +program +environ +outdoor +relat +link +nation +park +home +page +gorp +guideto +outdoor +recreationfun +link +christian +scott +interact +list +abig +pile +cool +link +blast +past +scoobi +dooeduc +link +great +refer +women +undergrad +comput +scienc +peterson +educ +center +sourc +inform +graduat +school +educ +opportun +inform +distribut +mentorship +project +mentorship +project +allow +women +undergradu +spend +summerwork +research +femal +mentor +great +experi +highlyrecommend +program +back +home +pagelast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..b43384e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,151 @@ +amir +michail +amir +michailgradu +studenti +second +year +graduat +student +depart +comput +scienceat +univers +washington +research +interest +includ +followingarea +algorithm +design +softwar +engin +educ +softwar +master +degre +universityof +toronto +master +thesi +optim +broadcast +summationfor +hierarch +ring +architectur +shift +click +hereto +obtain +compress +postscript +file +recent +experi +wai +teach +algorithm +particular +built +opsi +java +appletdesign +teach +balanc +binari +tree +algorithm +combinesprogram +proof +anim +final +lunar +lander +style +gamethat +wrote +part +undergradu +graphic +cours +quotat +comput +scientist +mathematician +tend +conserv +mani +unwillingto +consid +might +better +write +proof +told +mathematician +embarrass +learn +publishedincorrect +theorem +motiv +avoid +error +believ +theywil +like +structur +proof +persuad +comput +scientist +will +explor +unconvent +proofstyl +unfortun +found +care +whether +theyhav +publish +incorrect +result +often +seem +glad +error +wasnot +caught +refere +sinc +would +meant +fewer +public +fear +comput +scientist +motiv +proof +stylethat +like +reveal +mistak +lesli +lamport +wai +construct +softwar +design +wayi +make +simpl +obvious +defici +theother +make +complic +obviou +defici +hoar diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..eed4f7e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,87 @@ +richard +anderson +home +page +richard +anderson +associ +professor +graduat +inmathemat +reed +colleg +comput +scienc +stanfordin +join +univers +washington +aon +year +postdoc +mathemat +scienc +research +institut +inberkelei +receiv +presidenti +younginvestig +award +spent +academ +yeara +visit +professor +indian +institut +scienc +bangalor +india +richard +anderson +main +research +interest +theori +implementationof +algorithm +includ +parallel +algorithm +comput +geometri +scientif +applic +work +comput +scienc +engin +depart +univers +washington +seattl +teach +paper +work +progress +research +project +qualifi +evalu +project +travel +note +year +visit +theindian +institut +scienc +resum +travel +tourist +project +pictur +recent +talksanderson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..c1f43a0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,160 @@ +anhai +doan +homepageanhai +doan +page +reconstruct +pleas +revisit +soon +born +brought +vietnam +finish +high +school +iwent +hungari +studi +comput +scienc +graduat +kossuth +lajo +univers +art +andscienc +debrecen +hungari +receiv +also +comput +scienc +univers +wisconsin +milwauke +start +fall +program +depart +comput +scienc +andengin +univers +ofwashington +seattl +research +interest +mostli +artifici +intellig +amcurr +investig +decis +make +underuncertainti +decis +theoret +plan +markov +decis +process +qualit +decis +theori +academ +interest +includ +read +travel +listen +music +mostli +jazz +blue +interest +thing +first +name +anhai +mean +calm +invietnames +made +combin +last +syllabl +name +ofmi +mother +birthplac +nghean +first +syllabl +name +ofmi +father +birthplac +haiphong +show +creativ +folkswer +thought +birth +younger +brother +theysimpli +switch +syllabl +gave +namehaian +content +research +interest +probabilist +plan +knowledg +represent +recent +paper +research +librari +curriculum +vita +research +interest +educ +employ +histori +award +honor +public +teach +data +structur +algorithm +take +cours +check +inform +offic +hour +locat +person +interest +comtemporari +vietnames +affair +literatur +write +music +paint +foreign +languag +travel +gener +purpos +librari +life +snapshotsanhai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..1ff28b2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,165 @@ +joel +ausland +joel +ausland +joel +ausland +hope +depart +comput +scienc +engineeringat +univers +washington +pictur +spring +comput +anim +class +click +onit +origin +last +quarter +complet +qual +project +time +code +gener +multiflow +compil +offic +sieg +hall +home +univers +washington +univ +seattl +seattl +resum +written +follow +paper +fast +effect +dynam +compil +ausland +philipos +chamber +egger +andb +bershad +pldi +automat +dynam +compil +support +event +dispatch +inextens +system +chamber +egger +ausland +philipos +mock +andp +pardyak +workshop +compilersupport +system +softwar +februari +experi +control +base +automat +motion +synthesisfor +articul +figur +ausland +fukunaga +partovi +christensen +reiss +shuman +mark +acmtransact +graphic +page +paper +also +site +optim +leapfrog +ausland +benjamin +wilkerson +mathemat +magazin +page +lossili +compress +mpeg +animationthat +goe +motion +synthesi +paper +last +sequenc +show +mywork +comput +piec +togeth +cartwheel +jump +andshuffl +anim +without +figur +fall +andcollaps +brown +figur +us +algorithm +orang +figur +isjust +try +switch +motion +without +consider +tosmooth +physic +autumn +took +super +short +anim +doubl +speed +small +version +final +project +find +better +place +slide +thetalk +singular +valu +decomposit +gave +graphic +seminar +ausland +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..953de632 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,110 @@ +girl +name +brian +name +brian +boinge +boinge +boinge +brian +michalowskidepart +comput +scienc +engineeringmail +stop +univers +washingtonseattl +offic +locat +sieg +offic +phone +current +take +ling +current +edit +mossi +bitsthank +visit +page +visitor +number +page +worst +view +us +headscapewhenev +second +year +gradstud +actual +liber +artist +interest +inlinguist +confus +good +get +know +alreadi +ultrahotlist +favorit +site +ofal +time +search +look +onlin +refer +look +forsometh +glorifi +hotlist +doesn +thave +urouletteto +random +find +past +institut +ofwhich +mental +person +info +quot +file +favorit +songsand +poem +fictiti +thrash +band +puriti +test +origin +work +tokeep +touch +finger +info +mail +info +guestbook +info +pagesfrom +friend +idea +includ +aslfingerspel +snapshot +blatantli +stolen +brad +chamberlain +brian +michalowski +dept +complet +sanityerad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..918b0b55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,72 @@ +paul +beamepaul +beamepaul +beam +associ +professor +thedepart +comput +scienc +engineeringat +theunivers +washington +paul +receiv +mathemat +comput +sciencein +comput +scienc +univers +toronto +post +doctor +research +associ +academicyear +join +univers +washington +autumn +receiv +presidentialyoung +investig +award +paul +research +concern +primarili +computationalcomplex +theoret +aspect +paralleland +distribut +comput +recent +research +concentr +connect +computationalcomplex +proof +theori +particular +complex +proof +inproposit +proof +system +paul +enjoi +squash +softbal +sport +enthusiasm +cancompens +lack +talent +recent +paper +qual +project +beam +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..4ae94e38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,92 @@ +david +beckerdavid +beckercontact +info +mark +spot +stuff +spin +much +time +goe +makingspina +real +oper +system +respons +level +code +borrow +devic +model +devic +drvier +support +build +environ +code +manag +keep +develop +platform +function +somedai +getto +perform +measur +optim +sport +tri +bunch +ultim +frisbe +favorit +confer +tripl +jump +minnesota +athlet +confer +bethel +colleg +ultim +frisbe +champion +volleybal +men +grad +champion +team +sport +plai +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam +handbal +also +playracquetballgolftenni +done +bridgecampingcanoeingdisc +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat +skiingweightliftingwhitewat +raftinghorseback +ridingmountain +bikingin +line +skate +interest +librari +econom +topic +particularli +interest +free +bank +anti +trust +currenc +ssto +rlv +theologi +centurai +railroad +boot +locomot diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..a9450f80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,525 @@ +brian +bershad +brian +bershad +bershad +washington +eduwork +comput +scienc +engin +depart +univers +washington +seattl +home +street +seattl +brian +bershad +assist +professor +univers +ofwashington +sinc +receiv +univers +ofwashington +took +brief +respit +seattl +experi +post +industri +cultur +northeast +return +northwest +coffe +bershad +research +oper +system +distribut +system +network +parallel +system +architectur +work +hasappear +toc +sosp +asplo +isca +although +seem +asigmetr +paper +publish +save +life +besid +work +bershad +run +plai +squash +hang +thestairmast +project +includ +spin +extensibleoper +system +mobil +comput +memori +manag +oper +system +architectur +midwai +project +carnegi +mellon +parallelnetwork +scalabl +rocki +thesequel +etch +binari +instrument +optimizationcours +winter +look +master +qual +project +click +list +project +youmight +master +degre +qual +project +recent +paper +trace +driven +comparison +algorithm +parallel +prefetch +cachingtraci +kimbrel +andrew +tomkin +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +sigop +usenix +associ +symposium +onoper +system +design +implemen +octob +dynam +bind +extensiblesystem +przemyslaw +pardyak +brian +bershad +osdi +sigop +usenix +associ +symposium +onoper +system +design +implemen +octob +structur +perform +interpret +theodor +romer +denni +geoffrei +voelker +alec +wolman +wayn +wong +jean +loup +baer +brian +bershad +henri +levi +asplo +octob +fast +effect +dynamiccompil +confer +program +languag +design +implementationj +ausland +philipos +chamber +egger +bershad +extens +protocol +architectur +applic +specif +network +marc +fiuczynski +brian +bershad +write +oper +system +us +modula +emin +sirer +stefan +savag +przemyslaw +pardyak +greg +defouw +mari +alapat +brian +bershad +appear +workshop +compil +support +system +softwar +februari +languag +support +extens +oper +system +wilson +hsieh +marc +fiuczynski +charl +garrett +stefan +savag +david +becker +brian +bershad +appear +workshop +compil +support +system +softwar +februari +safe +dynam +link +extens +oper +system +emin +sirer +marc +fiuczynski +przemyslaw +pardyak +brian +bershad +appear +workshop +compil +support +system +softwar +februari +automat +dynam +compil +support +event +dispatch +extens +systemsc +chamber +egger +ausland +philipos +mock +pardyak +workshop +compil +support +system +softwar +februari +extens +safeti +perform +spin +oper +system +bershad +sosp +reduc +memori +overhead +us +onlinesuperpag +promot +romer +ohlrich +karlin +bershad +isca +write +detect +distribut +sharedmemori +zekauska +sawdon +bershad +paper +appearedin +osdi +confer +dynam +page +map +polici +cach +conflictresolut +standard +hardwar +romer +bershad +chen +paper +appear +osdi +confer +mobisa +voelker +bershad +paper +appear +inth +mobil +comput +workshop +issu +extens +oper +system +savag +bershad +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +chen +romer +asplo +spin +extens +microkernel +forappl +specif +oper +system +servic +bershad +uwtechn +report +effici +packet +demultiplex +multipl +endpoint +larg +messag +yuhara +bershad +maeda +moss +winter +usenix +impact +oper +system +structur +memori +system +perform +chen +bershad +sosp +protocol +servic +decomposit +high +perform +network +maeda +bershad +sosp +practic +consider +block +concurr +object +bershad +fast +interrupt +prioriti +manag +oper +system +kernel +usenix +microkernel +workshop +midwai +distribut +share +memori +system +bershad +zekauska +sawdon +ieee +compcon +local +area +mobil +comput +stock +hardwar +andmostli +stock +softwar +watson +bershad +usenix +moblic +consist +manag +virtual +index +cach +wheeler +bershad +asplo +fast +mutual +exclus +uniprocessor +bershad +redel +elli +asplo +us +mach +commun +primit +ginsburg +baron +bershad +machnix +us +microbenchmark +evalu +system +perform +drave +bershad +forin +wwo +network +perform +microkernel +maeda +bershad +wwo +increas +irrelev +perform +micro +kernel +base +oper +system +bershad +usenix +microkernel +workshop +system +mach +forin +golub +bershad +machnix +us +continu +implement +thread +manag +commun +inoper +system +drave +bershad +rashid +dean +sosp +inform +arpa +relat +inform +rain +citi +hash +hous +harrier +relat +inform +rel +abduct +alien diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..89bca368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,196 @@ +doorenbo +home +pagebob +doorenbo +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +bobd +washington +current +work +netbot +union +place +suit +seattl +voic +daili +page +comedi +dilbert +strip +boffo +david +letterman +list +geeki +new +new +zdnet +anchordesk +magazin +good +stuff +shortcut +todai +cool +tool +research +current +postdoc +work +oren +etzioni +steve +hank +weld +softbot +project +also +particular +shopbot +internet +shop +agent +previou +soar +project +thesi +scienc +site +collect +pointer +repositori +canada +pointer +gopher +scienc +scientif +american +sigma +american +scientist +miscellan +link +meta +search +metacrawl +savvysearch +search +alta +vista +lyco +inktomi +open +text +infoseek +excit +crawler +hotbot +directori +hierarch +select +yahoo +magellan +pointcom +list +search +engin +guid +search +onlin +search +search +telephon +directori +world +yellow +page +network +yellow +switchboard +white +page +list +directori +new +cnnfn +newshour +washington +post +todai +reuter +headlin +yahoo +new +page +social +cafe +new +world +report +boston +globe +span +seattl +time +view +slate +feed +salon +atlant +monthli +harper +sport +espn +sport +zone +govern +fedworld +index +white +hous +congress +arpa +feder +budget +deficit +nation +debt +clock +concord +coalit +hand +balanc +budget +bipartisan +commiss +entitl +reform +budget +american +univers +museum +link +past +life +pittsburgh +upcom +birthdai +home +page +person +home +page +andfun +pagebobd +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..95c0bb1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,144 @@ +alan +born +home +pagealan +born +home +pagei +professor +depart +comput +scienc +engin +univers +washington +principalresearch +interest +constraint +base +languag +system +object +orient +languag +logic +program +languag +human +computerinteract +comput +societi +current +research +activitiesuwconstraint +page +contain +link +recent +paper +public +domainsourc +code +constraint +satisfact +algorithm +media +technolog +democraci +groupuw +student +also +idea +qualsproject +teachingher +page +cours +taught +recent +program +languag +autumn +concept +program +languag +autumn +comput +societi +winter +human +comput +interact +spring +informationhistori +grew +idaho +graduat +reed +colleg +mathemat +graduat +work +comput +scienc +atstanford +univers +receiv +degre +dissert +research +done +associ +xerox +paloalto +research +center +concern +constraint +orient +simulationlaboratori +receiv +spent +year +post +doctoralfellow +depart +artifici +intellig +univers +ofedinburgh +scotland +work +mechan +problem +solv +symbolicalgebra +join +comput +scienc +depart +andexcept +sabbat +spent +xerox +europarc +cambridg +england +havebeen +sinc +address +dept +comput +scienc +engin +univers +washington +seattl +phone +email +born +washington +eduwww +http +washington +home +born diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..54821524 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,23 @@ +brad +home +pagebrad +chamberlainphoto +credit +mike +perkowitzth +probabl +couldn +care +less +offic +address +thing +work +thing +like +thing +ad +subset +ofth +brad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..3ee3b9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,151 @@ +brendan +home +pagebrendan +mumeyi +student +depart +comput +scienceat +theunivers +washington +enter +fall +expect +graduat +around +fall +contact +information +mail +brendan +washington +edufor +address +click +curriculum +vitaein +htmlorpostscriptformat +academ +interestsi +would +call +appli +theoret +comput +scientist +current +work +comput +biologi +moment +look +problem +physic +map +build +rough +locat +landmark +genom +gener +speak +interest +us +theori +math +tosolv +reason +practic +problem +also +done +work +incomput +astrophys +hpcc +groupher +onlin +papersb +mumei +power +clone +overlap +test +html +postscript +poster +present +ismb +confer +mumei +comput +aspect +physic +map +probe +html +postscript +survei +paper +written +fulfil +candidaci +requir +mumei +find +cluster +quickli +parallel +html +postscript +present +dimac +challeng +klaw +mumei +upper +lower +bound +construct +alphabet +binari +tree +html +postscript +present +soda +confer +also +siam +ofdiscret +mathemat +note +html +version +produc +latexhtml +containsom +error +readabl +part +recreationhik +cycl +ski +climb +drink +coffeeto +name +us +sailingand +hope +sometim +like +plai +bridg +older +photo +first +galleri +second +galleri +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..83a2624e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,105 @@ +lauren +brickerlauren +bricker +comput +scienc +engin +depart +univers +washington +seattl +click +need +clue +research +info +research +interest +primarli +graphic +userinterfac +although +self +proclaim +graphic +groupi +current +work +stevetanimoto +mathematicsexperi +imag +process +metip +project +goal +ofthi +project +interest +junior +high +school +kid +mathemat +usingexploratori +rather +rote +learn +method +particular +minterest +develop +comput +supportedcollabor +learn +cscl +user +interfac +us +inthi +well +project +workin +lawk +dawg +interfacea +fairli +extens +resumeschool +dazethi +quarterdoth +quartershuman +comput +interact +cours +spring +quarter +writeup +final +project +writeupwhat +asystem +cours +interest +cours +comput +insocieti +excit +hobbi +us +enjoi +busi +lifesportscookingpotteri +even +studio +garag +us +year +stuffbecaus +ask +itaddress +bricker +washington +last +modifi +mondai +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..7d8ab404 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,9 @@ +adam +carlsonadam +carlson +carlson +washington +comput +scienc +carlson +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..9b47d1c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,56 @@ +andrew +certain +home +page +andrew +certain +certain +washington +server +fix +give +note +page +interest +follow +direct +download +theview +look +model +current +work +tonyderos +david +salesin +werner +stuetzl +duchamp +jovan +popov +scanningproject +current +build +viewer +download +requir +sgigraph +workstat +paper +viewer +look +model +modifi +netscap +shouldalso +work +browser +modifi +similar +comput +scienc +engin +depart +univers +washington +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..6f3a2d20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,99 @@ +chou +chou +welcom +homepag +grad +student +computersci +washington +seattl +pictur +right +first +school +fall +quarter +ross +think +scari +school +relat +infoth +chinook +projectmi +fall +schedulemi +list +publicationscod +workshop +codesignpersonalperson +info +taiwan +greec +resumefoodi +enjoi +cook +peopl +open +restaur +ofpeopl +favorit +dish +includ +stir +fri +rice +noodl +beefskew +recip +line +toysb +comput +geek +computersand +cool +toi +taiwanesei +also +promot +taiwaneselanguag +current +develop +comput +tool +taiwanes +sureto +check +experiment +line +taiwanesedictionari +though +absolut +requir +page +best +viewedif +instal +chines +charact +font +us +java +enabl +browser +like +netscap +beabl +java +applet +yellow +ball +bouncingov +barnei +purpl +dynosaur +last +updat +email +chou +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..aebf5dd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,153 @@ +corei +andersoncorei +andersonth +interest +late +set +home +page +research +color +percept +automat +correct +induc +color +compet +programm +month +contest +search +info +localtalk +adapt +set +linux +plai +wavelet +intern +collegi +program +contest +pacif +region +contest +problem +result +final +problem +result +html +version +previou +year +final +problem +version +duke +internet +program +contest +rsum +onlin +recent +august +review +comput +graphic +text +refer +book +thing +done +graduat +univser +washington +highlin +commun +colleg +self +tune +fpga +fall +help +polli +organ +depart +contribut +engin +open +hous +april +manag +chapter +treasuri +spring +spring +develop +read +done +macintosh +program +search +good +internet +servic +providercool +thing +found +usag +statist +lurker +guid +babylon +sunsit +linux +archiv +dilbert +zone +home +page +brother +home +pageus +link +peek +insid +term +lab +html +refer +html +featur +netscap +comput +scienc +engin +depart +home +page +univers +washinton +home +page +uwtv +tech +notesmi +autumn +schedul +mondai +tuesdai +wednesdai +thursdai +fridai +meet +meet +math +math +math +math +math +math +corin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..ad7a9714 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,75 @@ +craig +experiencecraig +kaplancurr +locat +student +depart +comput +scienc +universityof +washington +seattl +washington +well +page +copyof +homepag +univers +waterloo +time +modifi +page +appropri +tomi +current +situat +enjoy +experi +near +undergraduatewa +grad +ball +photo +fromth +ball +second +enjoy +experi +occur +saturdai +address +convoc +valedictorian +cannot +express +honour +felt +wonder +graduat +class +choos +repres +incident +didn +know +parent +found +valedictorian +minut +start +ceremoni +sai +never +forgiv +text +valedictorian +address +found +anyon +curiou +visitor +number +last +updat +cskaplan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..6f6353a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,65 @@ +codi +kwok +home +page +first +thing +thought +peopl +think +mean +aliv +asami +chiaki +welcom +codi +chung +kwok +ctkwok +washington +edui +graduat +student +work +weld +andoren +etzioni +plan +andsoftwar +agent +sanctuari +work +ingram +softbot +aiuw +contact +informationleisur +sanctuari +nausicaa +vallei +windlaputa +castl +skyhyp +futur +vision +gunnm +nausicaa +vallei +wind +arch +vile +welcom +java +applet +anim +take +load +codi +kwok +last +modifi +visitor +sinc +figur +doom +numer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..ceb203b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,37 @@ +darren +cronquist +darren +cronquist +darrenc +washington +depart +comput +scienc +engin +univers +washington +seattl +welcom +home +page +last +updat +employ +inform +current +plan +complet +myph +html +postscript +resum +resum +curriculum +vita +curriculum +vitaperson +inform +rest +homepag +current +underconstruct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..56df5147 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,239 @@ +david +christianson +second +till +midnight +second +till +nowher +babi +david +christiansondbc +washington +comput +scienc +engin +depart +univers +washington +seattl +current +workin +spare +time +third +year +graduat +student +atth +interest +inaiand +plan +gotten +user +interfac +mayb +even +graphicsa +well +current +studi +knowledg +represent +goal +directedbehavior +mix +initi +plan +activ +recognit +buzzwordacquisit +context +human +comput +interact +recent +work +prototyp +us +intellig +interfac +bobdoorenbo +shopbot +rather +build +interfac +plan +somehowintegr +interfac +variou +line +store +build +shop +assist +simpl +assist +built +applicationthat +automat +read +pars +shop +basket +order +determinewhat +product +user +interest +also +collabor +sean +anderson +weld +david +salesin +michael +cohen +develop +simpl +interact +moviethat +demonstr +automat +camera +plan +appl +intern +work +russel +plan +technologyinto +user +experi +also +develop +perpetr +graphic +debugg +theucpop +famili +planner +programm +also +client +number +peopl +whose +live +touch +grow +everi +manual +isher +spent +undergradu +career +theunivers +chicago +carboload +harold +chicken +assist +theanim +agent +publicationschristianson +anderson +salesin +weld +cohen +declar +camera +control +automat +cinematographi +appear +aaai +firbi +christianson +mcdougal +fast +local +map +support +navig +object +local +symposium +sensor +fusion +boston +novemb +find +dave +work +thechateau +cynic +offic +withfreder +william +darren +adam +gloriou +leader +juan +import +thing +favorit +activ +practic +judo +recent +compet +senior +nation +sibl +sisterjust +graduat +school +inform +librari +studi +univers +michigan +surf +cut +edg +research +inform +supercollid +realli +feel +like +slack +mirski +help +watch +hero +youth +duel +death +wwwf +grudg +match +fame +fortun +check +respons +week +check +game +domain +straight +doomgat +sai +evil +book +tick +dave +christianson diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..9673f6e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,44 @@ +home +page +dave +johnson +david +johnson +washington +comput +scienc +engin +depart +univers +washington +seattl +research +interest +navig +assist +hypertext +readersproject +activ +racquetbal +golf +basketbal +softbal +tutori +script +fit +togeth +take +theracquetbal +quiz +quiz +creat +take +look +thecreat +assess +page +form +give +last +modifi +mondai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..f6e9d637 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,232 @@ +home +page +david +dion +david +dion +ddion +washington +person +yeah +right +like +anyth +person +okai +mayb +littl +academ +current +research +past +year +work +brian +bershad +primari +respons +construct +unix +server +thespinoper +system +spin +oper +system +applic +achiev +impress +perform +dynam +extend +kernel +safeti +protect +maintain +languag +level +spin +extens +written +modula +user +level +spin +unix +server +slight +variant +unix +server +think +run +mach +spin +dynam +link +extens +us +intercept +system +call +emul +mach +kernel +environ +previou +research +spin +first +extens +system +havework +undergradu +studi +notr +dame +help +implement +user +level +memori +manag +extens +commun +subsystem +stuff +afraid +time +around +surf +wouldn +claim +know +cool +stuff +ipromis +surf +soon +netscap +enhancedthi +page +hold +breath +meanwhil +site +visit +occasion +work +distract +univers +washington +comput +scienc +engin +reason +seattl +univers +washington +homepag +featur +date +view +campu +weather +condit +spin +occupi +vast +major +time +modula +languag +program +dai +debug +manual +solv +countless +problem +univers +notr +dame +undergradu +institut +notr +dame +band +undergradu +life +notr +dame +trumpet +section +undergradu +life +racquetbal +ladder +main +outlet +athlet +espn +sportzon +stai +touch +sport +world +todai +stai +touch +rest +world +dilbert +learn +real +world +restaur +fine +seattl +eateri +recommend +other +comput +scienc +movi +seattl +region +movi +list +transport +line +guid +seattl +excel +public +transport +system +current +traffic +condit +current +statu +seattl +infam +traffic +seattl +marin +first +major +leagu +basebal +team +page +bean +shop +page +visit +david +dion +last +modifi +mondai +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..1cab1e46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,167 @@ +derrick +index +derrick +weathersbi +bullssupersonicsi +pursu +phdin +comput +scienc +univers +washington +seattl +beauti +campu +univers +li +heart +ofseattl +offer +mani +divers +graduat +student +often +fall +prei +therebyextend +time +averag +graduat +student +career +practic +experi +interestssignific +project +skill +data +sheet +share +server +project +data +parallel +arrai +languag +compil +host +token +ring +commun +protocol +develop +data +base +design +administr +system +administr +network +administr +tool +securityresearch +experi +interestsmi +research +center +around +parallel +distribut +comput +challengespres +field +on +perform +portabl +conveni +perform +typic +foremost +goal +run +parallel +ordistribut +environ +howev +portabl +suffer +final +theseenviron +offer +extra +challeng +asynchron +independ +event +daunt +task +develop +system +parallel +distributedenviron +issu +address +group +project +research +page +spaa +paper +collect +commun +gener +topic +collect +comm +dissert +topic +tool +integr +softwar +engin +projectacadem +achievementsinstructor +extens +colleg +advanc +summer +curriculum +design +advanc +cours +certif +program +extens +collegeinstructor +extens +colleg +advanc +fall +teach +assist +start +undergradu +tutor +women +minoritystud +depart +comput +scienc +engineeringoutstand +teach +assist +award +colleg +engin +person +interest +interact +cnnfinanciala +newslet +would +javaw +derrick +weathersbyderrick +washington +edutu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..23f44057 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,160 @@ +brian +dewei +home +pagebrian +deweyabout +first +year +student +depart +comput +scienceand +engin +univers +washington +doyou +need +know +like +music +book +ilov +plai +game +ride +bike +write +letter +drink +myfavorit +color +blue +favorit +ocean +atlant +oldroomm +think +like +anim +public +avail +finger +northern +irelandi +return +trip +belfast +june +carri +sixti +pagesof +note +interview +carri +luggag +need +getthos +semi +chaotic +note +readabl +hopefulli +enlighteningformat +work +draft +feel +free +read +shoulder +period +make +draft +avail +html +postscript +format +goal +encourag +frequent +feedback +soon +possibl +pleas +read +mail +pleas +note +well +much +time +work +thisproject +late +hopefulli +progress +near +ofth +summer +imag +northern +ireland +note +thecurr +draft +sinn +fein +inform +bibliographi +postscript +statist +terrorist +relat +death +northern +ireland +postscript +statist +terrorist +relat +injuri +northern +ireland +postscript +game +alreadi +addict +recuri +game +check +link +fascin +thorough +histori +develop +game +china +read +ancient +china +page +imageek +york +cuni +page +provid +mani +link +interest +site +jansteen +page +thorough +seen +brian +dewei +dewei +washington +edulast +modifi +tuesdai +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..db4558f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,145 @@ +home +pagewelcom +fasulo +home +page +dfasulo +washington +third +year +graduat +student +depart +comput +scienc +univers +washington +graduat +williamscolleg +computersci +appli +mathemat +class +note +portrait +contain +slight +inaccuraci +find +home +eastlak +seattl +work +depart +comput +scienc +engin +univers +ofwashington +seattl +offic +chateau +email +dfasulo +washington +academ +interest +comput +graphic +comput +biologi +person +interest +scienc +fiction +fantasi +written +otherwis +fact +probabl +honest +identifi +illustr +merlin +corwin +pictur +favorit +fiction +charact +mine +roger +zelazni +chronicl +amber +imag +taken +amber +drpg +publish +phage +press +would +recommend +anyon +like +book +also +recommend +seri +babylon +creativ +write +fiction +poetri +absolut +link +work +athlet +particular +order +tenni +kwon +distanc +run +role +plai +random +thing +depend +cat +random +thing +homepag +friend +fellow +william +alumnu +sean +sandi +look +woman +former +grad +student +wendi +belluomini +dress +dogbert +peopl +ask +theori +worthwhil +area +research +whether +abstract +us +better +explan +goal +futur +theori +ever +given +dfasulo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..e8b21f2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,34 @@ +martin +dickei +home +page +martin +dickeycomput +scienc +engineeringunivers +washingtonwelcom +home +page +weekli +schedulenarr +resum +blurbcs +engr +autumn +favorit +seattl +coffe +housesfavorit +internet +site +sister +bookspirograph +java +script +garg +plai +dickei +washington +eduupd +tuesdai +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..889863ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,233 @@ +denni +denni +primari +interest +high +perform +comput +architectur +researchwith +smart +peopl +likejean +loup +baer +brian +bershad +brad +calder +brad +chen +alan +eustac +dirk +grunwald +andt +romer +recent +paper +energi +manag +issu +comput +system +gener +paper +instruct +cach +fetch +polici +specul +execut +baer +calder +grunwald +isca +dynam +page +map +polici +cach +conflict +resolutionon +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +romer +chen +asplo +instruct +cach +effect +differ +code +reorder +algorithm +qualifi +project +report +univers +washington +contact +work +comput +scienc +engin +depart +univers +washington +seattl +offic +sieg +dlee +washington +home +seattl +index +pointer +hotlist +good +entri +point +explor +yahoo +yellow +page +internet +lyco +realli +good +search +enginefor +seattl +guid +click +vote +million +sensibl +peopl +high +mind +conced +thatpolit +almost +alwai +choic +lesser +evil +tweedledumand +tweedlede +vote +abstain +theyar +present +presid +appoint +peopl +go +torummag +around +live +next +four +year +consid +allth +peopl +home +stew +rather +vote +huberthumphrei +show +humphrei +peopl +taught +huberthumphrei +lesson +still +enjoi +nixon +suprem +court +whentricia +juli +begin +find +silver +thread +among +gold +theblack +russel +baker +ford +without +flummeri +commit +commit +hesit +chanc +draw +back +alwai +ineffect +concern +act +initi +element +truth +ignor +kill +countless +idea +splendid +plan +moment +definit +commit +provid +move +sort +thing +occur +help +would +never +otherwis +occur +whole +stream +event +issu +decis +rais +favor +manner +unforeseen +incid +meet +materi +assist +magic +could +dream +would +come +whatev +dream +begin +goeth +dlee +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..e66bb847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,140 @@ +anoth +unnecessari +home +pagececi +home +page +well +much +anywai +sure +casual +mention +name +home +page +buti +feel +strongli +bold +noless +doug +zongker +exhaust +list +usual +research +public +classeshow +contact +meth +unusu +toxic +custard +workshop +filesth +mento +galleryvisit +inform +supercolliderth +useless +todai +date +wast +time +cron +player +databas +wast +time +test +ground +caveat +user +sarcasm +lucki +brows +todai +avirtu +intend +treasur +trove +inform +whichmai +actual +us +realli +sarcasm +first +year +grad +student +comput +scienc +engineeringdepart +univers +ofwashington +graduat +michigan +state +univers +imajor +comput +scienc +andminor +math +dubiou +honorsjunior +apprentic +keeper +brotherhood +crouton +death +cart +pizzicato +intern +club +member +bryan +worst +execut +vice +presid +charg +emerg +backup +clicker +cruis +inform +highwai +inhigh +gear +actual +sit +buttstar +comput +screen +tast +background +stolen +depart +labor +homepag +wheremi +sister +work +depart +homepag +doug +zongker +dougz +washington +home +research +class +contact +last +edit +thursdai +novemb +hit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..59fde30d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,128 @@ +dylan +mcname +dylan +jame +mcname +dylan +washington +comput +scienc +engin +depart +univers +washington +seattl +person +inform +research +concentr +interact +applicationsand +oper +system +implement +oper +system +mechanismthat +allow +applic +implement +page +replacementpolici +kernel +polici +caus +perform +poorli +help +implement +schedul +activ +machoper +system +schedul +activ +mechan +thathelp +user +level +thread +system +interact +properli +kernelthread +schedul +experi +lead +work +spin +project +folk +build +oper +system +architecturethat +gener +idea +applic +tailor +oper +system +current +work +carri +research +start +opal +project +thesi +investig +interact +betweenobject +orient +databas +oodb +oper +system +virtual +memori +work +demonstr +signific +improv +perform +achiv +us +commod +oper +system +differ +current +done +addit +improvementscan +come +modifi +oper +system +slightli +betterserv +oodb +paperscv +cours +geoff +voelker +built +winter +quarter +system +seminar +dedic +java +gave +lectureintroduc +java +languag +environ +slide +us diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..303fbe63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,39 @@ +christoph +lewi +home +page +christoph +lewi +graduat +student +dept +comput +scienc +engin +univers +washington +seattl +echri +washington +hello +glad +could +make +graduat +student +work +program +languag +project +offic +hour +tent +mondai +wednesdai +sieg +christoph +lewi +last +modifi +thur +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..6606c38b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page +blank +ecrock +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..cc4fe199 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,139 @@ +eddi +hong +home +page +know +second +year +graduat +comput +scienc +studentat +univers +washington +well +tosai +busi +type +peopl +littl +time +hand +hadto +includ +resum +link +postcriptand +plain +text +form +offic +room +seig +hall +current +home +current +work +anna +karlin +craig +chamber +theoret +model +dynam +compil +specif +workingon +develop +line +algorithm +work +well +fordynam +compil +plan +qual +project +access +sinc +august +histor +fact +free +time +activit +vine +branchesmi +sister +want +home +page +help +creat +also +list +variou +peopl +know +anoth +page +interest +includ +keep +comput +industri +site +give +insight +commentari +happen +knowof +place +pleas +mail +daveneti +power +macintosh +guess +make +bias +towardslik +mac +howev +think +better +comput +eveneasi +come +place +sometim +visit +appl +comput +check +seattl +freewai +traffic +look +advic +import +book +worldher +us +inform +alwai +want +know +found +find +address +domain +name +find +countri +mail +friend +stand +edhong +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..79ac4481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,99 @@ +susan +egger +susan +egger +http +washington +home +egger +depart +computersci +engin +univers +washington +seattl +voic +email +egger +washington +offic +sieg +hall +research +interest +comput +architectur +back +compil +emphasi +onexperiment +perform +analysi +current +work +issu +incompil +optim +dynam +compil +share +data +optimizationsand +instruct +schedul +processor +design +multithreadedarchitectur +current +research +project +compil +time +algorithm +reduc +fals +share +dynam +compil +multithread +architectur +spinprevi +research +cach +coher +code +schedul +compil +time +prefetch +share +memori +machin +miscellan +tool +workload +new +asplo +program +committe +call +paper +asplo +homepag +inform +asplo +look +research +project +click +list +project +might +qual +project +amast +degre +begin +thesi +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..414437a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,311 @@ +emin +sirer +emin +sirereg +washington +depart +comput +scienc +engin +univers +washington +seattl +backgroundi +current +third +year +graduat +student +univers +washington +grew +istanbul +turkei +receiv +comput +scienc +princeton +univers +current +work +toward +spinproject +prof +brian +bershad +spent +summer +bell +labswork +theplan +oper +system +help +build +prototyp +displai +thesumm +research +center +princeton +jersei +recent +summer +work +thevesta +project +projectsmi +goal +develop +safe +adapt +extens +oper +system +develop +thread +schedul +synchron +dynam +link +andprotect +domain +subsystem +spin +also +wrote +machin +specificaspect +spin +kernel +system +call +interrupt +path +andsom +miscellan +interfac +alarm +mach +compat +support +novel +aspect +spin +thread +schedul +provid +mean +ofextend +kernel +arbitrari +thread +implement +schedulingpolici +dynam +link +extens +kernel +allowsu +achiev +high +perform +interfac +call +strand +isdesign +safe +provid +fault +isol +novel +aspect +spin +protect +domain +interfac +allowsisol +safe +fine +grain +share +time +extens +withconflict +symbol +simultan +activ +system +extens +hide +code +data +beassur +possibl +access +clincher +extensionsthat +want +share +code +data +dynam +protectionenforc +overhead +also +implement +high +performanceweb +server +spin +networkingstack +main +object +design +reduc +http +latenc +andminim +load +wrote +mip +instruct +simul +coupl +year +calledmipsi +robust +enough +simul +spec +benchmark +standard +ofnew +jersei +us +educ +tool +researchplatform +page +describ +mipsi +featuresand +avail +paper +safe +dynam +link +extens +oper +system +wcsss +describ +spin +protect +namespac +manag +mechan +write +oper +system +us +modula +wcsss +describ +experi +us +modula +implement +spin +extens +safeti +perform +spin +oper +system +sosp +design +implement +perform +paper +protect +softwar +issu +hoto +posit +paper +compar +softwar +hardwar +protect +mechan +spin +extens +microkernel +applic +specif +oper +system +servic +sigop +european +workshop +oper +system +review +version +spin +extens +microkernel +applic +specif +oper +system +servic +univers +washington +technic +report +march +measur +limit +fine +grain +parallel +senior +independ +work +princeton +univers +june +talkslanguag +support +extens +oper +system +slide +present +first +workshop +compil +support +system +softwar +wcsss +tucson +arizona +interestswhenev +find +time +opportun +follow +sail +windsurf +dive +ski +bikingmak +outdoor +cloth +andhik +dylan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..6e49fa4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,174 @@ +eric +anderson +home +page +eric +andersonwher +find +sieg +hall +depart +comput +scienc +engin +univers +washington +seattl +home +street +seattl +page +longer +black +honor +recent +june +decisionin +aclu +reno +page +longer +black +mind +thedecis +mere +interim +step +could +read +take +probabl +still +commit +feloni +care +speech +freedom +themarketplac +idea +fact +third +year +graduat +student +comput +scienc +mean +imostli +panic +qual +project +also +try +write +paper +prof +henri +greensideof +duke +univers +recent +finish +master +thesi +onsteadi +state +solut +particular +nonlinear +biharmon +stabil +criterion +explicit +method +restrict +fourth +power +spatial +resolut +implicit +timesteppingmethod +backward +euler +necessari +numer +analysisissu +involv +newton +method +solv +implicit +nonlinearequ +spars +matrix +solut +method +solv +newtonstep +interplai +pictur +realli +spiffi +work +bodi +code +astrophys +simul +work +support +project +data +structuresbi +prof +richard +andersoni +work +signal +process +comput +music +project +aim +automat +transcript +acoust +signal +prof +anna +karlin +isth +musician +interest +project +like +everybodi +els +page +applet +first +link +text +small +graphic +section +materi +preparedfor +last +fall +like +everybodi +els +page +snapshot +mostli +famili +prove +brother +final +weather +seattl +eric +washington +meander +washington +sept diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..8b1a65ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,292 @@ +oren +etzioni +home +pageoren +etzioni +home +pagedepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +etzioni +washington +offic +sieg +hall +room +brief +bioand +photo +current +research +internet +softbot +enabl +human +user +state +heor +want +accomplish +softbot +disambigu +request +anddynam +determin +satisfi +softbot +finalist +discoveraward +technolog +innov +comput +softwar +metacrawl +softbot +field +servic +enabl +searchmultipl +indic +parallel +provid +sophist +pruningopt +netrecommend +metacrawl +search +servic +choic +ahoi +softbot +white +page +servic +locatesindividu +home +page +high +accuraci +bruteforc +learn +brute +analyz +hypothes +second +whenrun +sparc +select +public +effici +inform +gather +internet +foc +move +inform +food +chain +deploi +softbot +theweb +aaai +ascal +comparison +shop +agent +world +wide +autonom +agent +multi +servic +search +comparison +us +metacrawl +postscript +html +softbot +base +interfac +internet +cacm +juli +intellig +agent +internet +fact +fiction +forecast +ieee +expert +august +intellig +without +robot +repli +brook +magazin +decemb +learn +understand +inform +internet +ijcai +sound +effici +close +world +reason +plan +toappear +first +robot +aaai +addit +paper +student +advis +richardseg +master +thesi +bernard +fileretriev +softbot +neal +lesh +master +thesi +planner +unix +softbot +keith +golden +master +thesi +plan +universalquantif +incomplet +inform +terranc +goan +master +thesi +learn +softwar +error +mikeperkowitz +master +thesi +learn +understand +inform +internet +erik +selberg +master +thesi +multi +servic +search +comparison +us +metacrawl +oren +zamir +jonathan +shake +undergradu +student +advis +stephen +soderland +program +umass +amherst +juli +roomi +hewlett +packard +bruce +lesourd +robert +spiger +lockhe +research +center +william +alford +program +univers +wisconsin +greg +fitchenholtz +hewlett +packard +guido +hunt +dymitr +mozdyniewicz +quark +machin +learn +resourc +machin +learn +databas +repositori +irvin +machin +learn +program +repositori +irvin +knowledg +discoveri +minecontain +inform +knowledg +discoveri +databas +neuroprosearch +contain +recent +paper +relev +neural +network +machin +learn +inform +servic +univers +illinoi +induct +learn +group +statlib +contain +data +algorithm +inform +relev +statist +machin +learningtoolbox +bonn +german +list +usenet +faq +access +count +sinc +etzioni +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..cf10f210 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,39 @@ +mike +feelei +home +pagemik +feelei +comput +scienc +finish +done +soon +thesi +concern +global +memori +manag +workstationclust +also +work +distribut +comput +opalproject +join +faculti +univers +british +columbia +injanuari +inform +avail +us +link +papersmi +research +summarycvsoutheast +idaholast +modifi +juli +mike +feelei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..429ef8a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,28 @@ +chri +fisher +home +page +pictur +chri +fisherdepart +comput +scienc +engineeringbox +univers +washington +seattl +fisher +washington +voic +mail +fisher +washington +sieg +hall +room +page +current +construct +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..5be978ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,80 @@ +depart +comput +scienc +engineeringunivers +washington +seattl +sieg +hall +washington +schedulethi +quarter +autumn +ta +cours +rather +work +gener +exam +check +scheduleto +otherwis +around +probablyb +found +librari +somewher +nice +read +paper +research +activitiesmi +main +interest +comput +algorithm +specif +areasof +parallel +comput +comput +geometri +public +meander +place +denni +outta +mind +vista +pea +music +site +chateau +galleri +fund +drive +thing +alec +wolman +might +server +seven +lost +soul +captur +html +listen +phone +booth +mofo +peopl +luci +place +paul +peach +ruel +might +look +like +moment +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..a95378d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,112 @@ +francesmari +modugno +home +pagefrancesmari +modugno +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +phone +mail +washington +research +interest +main +interest +human +comput +interact +also +interest +user +program +formal +model +softwar +specif +verif +technolog +polici +welcom +opportun +collabor +relat +topic +current +research +project +softwar +safeti +human +machin +interfac +previou +research +public +avail +onlin +summari +ofmi +thesi +research +formal +model +real +time +concurr +distribut +system +parallel +algorthim +profession +activ +basic +research +symposium +chair +uist +demonstr +chair +educ +comput +scienc +carnegi +mellon +univers +march +comput +scienc +carnegi +mellon +univers +august +comput +scienc +mathemat +cornel +univers +activ +anyth +recent +interest +includecycl +ski +languag +cultur +current +spanish +previouslyitalian +vegetarian +cook +elleri +line +greet +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..bf452b2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,50 @@ +georg +forman +home +pagegeorg +forman +home +pagei +receiv +comput +scienc +optim +compil +ariadn +scalabl +pattern +match +parallel +trace +debugg +public +mobil +comput +hyperlink +librari +someth +interest +free +handi +softwar +script +written +word +puzzl +water +song +chang +netscap +anim +georg +forman +gforman +comhom +page +mail +finger +weather +dept +live +pictur +gener diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..20a26fe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,60 @@ +home +page +marc +friedman +marc +friedman +home +page +cours +page +watercolor +applet +camp +checklist +spanish +english +collabor +dictionari +poetri +research +favorit +quot +link +elsewher +occam +inform +gather +agent +keith +golden +keith +wordbot +bike +trip +artifici +intellig +codi +kwok +weld +ucpop +planner +research +tool +chang +life +work +nietzschein +english +netscap +bookmark +file +everi +page +refer +visitor +sinc +marc +friedman +friedman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..4039698b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,32 @@ +charli +garrett +home +page +charli +garrett +address +seattl +research +interest +compil +graphic +neural +network +genet +algorithm +game +plai +algorithmspap +line +genet +algorithmsformerli +member +cecil +group +univers +ofwashington +bookshelf +audio +file +garrett +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..b415d565 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,56 @@ +georg +winkenbach +georg +winkenbachdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +mail +georgew +washington +eduphon +interest +comput +graphic +multimedia +thesi +work +doneund +supervis +professor +david +salesin +deal +appli +tradit +illustr +techniqu +theautomat +render +three +dimension +model +imagescr +prototyp +render +system +found +link +follow +imag +galleri +grail +graphic +imag +laboratori +depart +comput +scienc +engin +wife +home +page +taweewan +siwadun diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..61f60e78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,289 @@ +greg +badro +home +pagegreg +badro +welcom +home +page +last +updat +email +washington +eduaddress +nesbit +seattl +hello +welcom +homepag +pleas +feel +free +send +feedback +email +address +page +alwai +isuppos +construct +keep +check +back +excitingfeatur +recent +graduat +dukeunivers +complet +degre +doubl +major +incomput +scienc +mathemat +spring +amcurr +emploi +part +time +senior +research +scientist +fortransworld +numer +small +compani +origin +locat +indurham +headquart +bermuda +work +myapart +seattl +washington +also +full +time +graduat +student +computersci +engin +depart +univers +ofwashington +fall +cours +home +pagecs +home +pagecs +home +pagecs +softwar +engin +seminarcs +compil +seminar +newer +stuff +philosophi +mathemat +cours +note +misc +patch +fvwm +first +place +best +show +redhat +desktop +configur +competit +entri +emac +configur +readm +file +emac +configur +archiv +file +zshell +configur +readm +file +zshell +configur +archiv +file +chronicl +duke +univers +newspap +articl +transworldnumer +spring +ieeenat +program +competit +victori +vertic +winter +issu +duke +magazin +contain +recent +articl +geneticalgorithm +person +link +rsum +data +date +first +busi +sampl +drew +bycomput +simpl +magic +creat +canterburi +progress +variou +random +pictur +life +definitelynot +work +mani +hobbi +includ +tenni +ski +especi +jackson +hole +jackson +volleybal +juggl +piano +plai +mathemat +game +rubik +cube +linux +freewar +unix +music +especi +sarahmclachlan +billi +joel +joel +yahoo +page +list +link +parliamentari +procedur +ncaa +basketbal +interest +link +lyco +search +duke +comput +scienc +home +page +duke +univers +home +page +chronicl +duke +commun +daili +newspap +univ +washington +home +page +unoffici +seattl +microsoft +corpor +world +wide +server +gatewai +user +group +histor +comput +imag +hotjava +global +network +navig +home +page +perl +practic +extract +report +languag +virtual +librari +inter +languag +unif +interest +devic +connect +write +html +sgml +seinfeld +index +page +friend +sitcom +part +materi +base +upon +work +support +nation +scienc +foundat +graduat +fellowship +opinion +find +conclus +recommend +express +public +author +necessarili +reflect +view +nation +scienc +foundat +greg +badro +washington +comput +scienc +engin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..c78a5557 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,414 @@ +greg +linden +home +page +greg +lindenmi +love +wifecorina +current +third +year +graduat +program +polit +scienceher +third +year +graduat +program +thecomput +scienc +depart +univers +ofwashington +slave +awai +toward +lofti +goal +complet +undergraduatedegre +univers +california +diego +anodd +doubl +major +comput +scienc +polit +scienc +go +leav +graduat +school +decemb +mactiv +look +posit +softwar +develop +interest +check +resum +java +applet +webview +allow +link +page +orset +page +addit +webview +famili +altavistawebviewand +metawebview +instead +enter +enter +keyword +foraltavista +metacrawl +webview +hit +search +servic +return +graph +displai +page +found +searchservic +link +page +autom +travel +assist +emul +dialog +travelag +client +gradual +elicit +flight +prefer +whileallow +brows +real +flight +data +research +prototyp +quit +function +even +earli +stage +webview +highli +rate +gamelan +andjar +wasrat +java +applet +jar +cool +gamelan +andwa +gamelan +staff +pick +webview +wasrat +java +applet +jar +cool +gamelan +andwa +gamelan +staff +pick +gamelan +staff +pick +altavistawebview +winner +thejava +repositori +java +contest +applet +also +publish +book +walsh +foundat +java +book +meilleur +java +best +java +first +java +applet +linear +ballet +oop +us +java +capabl +browser +sorri +cool +java +applet +sourc +demonstr +us +doubl +buffer +avoid +flicker +us +thread +give +time +run +applet +code +certainli +could +cleaner +though +expect +work +thought +cool +might +enough +standardsto +impress +mylgramm +particl +tree +applet +draw +tree +lgrammer +tree +look +much +realist +theparticletre +tree +interest +recent +start +posit +jar +judg +evalu +java +applet +submit +thejar +archiv +interest +work +summer +develop +dawn +civil +ademonstr +applic +show +plan +techniqu +cansuccessfulli +appli +entertain +softwar +myriadsoftwar +ucsd +work +professor +belew +filippo +menzer +latentenergi +environ +project +tool +developingartifici +life +model +experi +artifici +neuralnetwork +evolutionari +learn +softwar +enviro +paper +hank +lesh +linden +elicit +user +prefer +theautom +travel +assit +submit +user +model +majeski +linden +linden +spitzer +model +localizedinteract +spatial +constraint +iter +prison +dilemma +associ +econom +scientist +krishnamoorthi +paturi +blume +linden +liden +esen +hardwaretradeoff +boolean +concept +learn +world +congress +neuralnetwork +linden +recurr +neural +network +iter +prison +sdilemma +unpublish +honor +thesi +adam +carlson +sujai +parekh +wrote +funrai +tracer +ofth +project +inc +graduat +comput +graphic +imag +headless +horseman +closeup +headless +horseman +chess +duel +assembl +requir +sphere +imag +withreflect +transpar +shadow +distribut +trace +adaptivesampl +mess +cool +thing +pattern +thespher +transpar +reflect +causingth +rai +reflect +refract +multipl +time +surfaceand +intern +also +made +second +comput +anim +call +strike +theanim +written +us +inventor +code +manipul +thed +model +origin +movi +file +made +alow +qualiti +quicktim +movi +avail +qualiti +mbquicktim +movi +avail +sorri +doesn +compress +anyfurth +least +anyth +resembl +reason +qualiti +program +stuff +dilbert +cognit +scienc +ucsd +repositori +artifici +life +info +occasion +found +chateau +guggenheim +annex +comput +scienc +engin +univers +washington +seattl +glinden +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..c77594ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,32 @@ +brian +kri +grant +home +pagebrian +home +awai +homework +relat +infowork +backgrounduwdynam +compil +groupuw +depart +computersci +engineeringperson +stuffperson +backgroundmi +daughter +isismi +trip +singaporemi +bookmarksmi +public +keylast +updat +octob +brian +kri +grant +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..0d22cac5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,111 @@ +dave +grove +dave +grove +depart +comput +scienc +engin +univers +washington +seattl +offic +chateau +sieg +worki +spend +time +plai +cecil +cecil +pure +object +orient +langaug +us +vehicl +integr +research +area +languag +design +program +environ +optim +compil +also +us +hord +consult +hang +aroundth +fring +spinproject +actual +attempt +graduat +sometim +soonish +much +less +frequent +paper +author +author +plai +wouldn +complet +without +dilbertfix +strip +thathit +littl +close +home +current +manag +underacheiv +fantasi +footbal +team +us +spend +summer +hampshir +work +gui +scoutreserv +greaterlowel +council +pictur +casunset +taken +right +cabin +kick +anoth +everi +boi +offic +someth +silli +white +water +raft +trip +especi +cool +spend +month +toronto +drove +back +toseattl +took +number +detour +along +somehihglight +trip +grove +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..3f514774 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,105 @@ +scott +hauck +scott +hauck +hauck +washington +comput +scienc +engin +depart +univers +washington +seattl +year +graduat +student +univers +washington +current +work +multi +fpga +system +rapid +prototyp +board +level +design +thoughi +also +interest +asynchron +circuit +fpga +architectur +parallel +graduat +june +person +biographi +educ +experi +public +curriculum +vitaeresearch +asynchron +circuit +survei +current +asynchron +design +methodolog +well +first +fpga +asynchron +circuit +triptych +montag +fpga +architectur +develop +triptych +montag +fpga +architectur +architectur +improv +densiti +current +commerci +fpga +multi +fpga +system +rapid +prototyp +develop +springbok +rapid +prototyp +system +board +level +design +well +partit +assign +rout +topolog +work +gener +multi +fpga +system +chinook +project +hardwar +softwar +design +synthesi +simul +system +embed +applic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..68b1569e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin +hinshaw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..c76245f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,73 @@ +place +place +macduff +ultrasound +imag +emma +elspeth +macduff +name +subject +chang +without +notic +unborn +daughter +week +fromconcept +week +ripe +inmid +decemb +view +profil +ly +back +lookingup +head +right +upper +half +torso +theleft +busi +obsess +impend +fatherhood +master +thesi +part +chinook +project +pass +time +silli +possibl +also +rememb +myspam +unfortun +also +wast +time +html +brows +around +interest +stuff +usingwebcrawl +pointer +neat +stuff +frogstv +nationpenn +tellermus +lyricsian +macduff +washington +dept +comput +scienc +engin +univ +washingtonseattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..4cca8596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,210 @@ +herv +jamrozikherv +jamrozik +postdoc +univers +washington +sinc +septemb +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +sieg +hall +phone +mail +jamrozik +washington +research +main +interest +distribut +system +object +orient +languag +softwar +engin +current +work +global +memoi +manag +workstat +cluster +hank +levi +mari +vernon +anna +karlin +mike +feelei +geoff +voelker +high +speed +network +greatli +encourag +network +memori +cach +virtual +memori +file +page +therebi +reduc +need +disk +access +network +node +memori +intens +applic +primari +memori +lightli +load +node +temporari +back +store +introduc +level +memori +hierarchi +name +global +memori +cach +li +logic +local +memori +disk +page +fundament +transfer +access +unit +remot +memori +system +page +size +perform +factor +recent +page +size +modern +processor +increas +order +provid +coverag +amort +disk +access +cost +unfortun +high +speed +network +small +transfer +need +provid +latenc +trend +page +size +thu +odd +network +memori +high +speed +network +studi +subpag +mean +reduc +transfer +size +latenc +remot +memori +environ +reduc +network +latenc +us +subpag +global +memori +environ +jamrozik +feelei +voelker +evan +karlin +levi +vernon +inproceed +seventh +confer +architectur +support +program +languag +oper +system +octob +postscript +thesi +research +debug +distribut +object +orient +system +theuniversit +joseph +fourier +grenobl +involv +guideproject +laboratoir +bull +imag +part +imaginstitut +extrem +peopl +area +snot +visit +louvr +galleri +look +map +franc +europ +world +somefamili +pictur +somefriend +pictur +jamrozik +washington +eduv +march +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..b77d3ed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,30 @@ +jason +secoski +home +pagejason +secoski +jason +washington +eduaddress +comput +scienc +engin +depart +sieg +hall +cunivers +washington +boxseattl +offic +frequent +us +page +projectseattl +weather +forecast +weather +channel +jason +secoskylast +modifi +thursdai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..5b8edf67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,93 @@ +jeremi +baerjeremi +baer +never +school +interfer +educ +mark +twain +stuff +dream +made +william +shakespearei +current +graduat +student +comput +scienceat +univers +washington +interest +includ +artifici +intellig +human +computerinteract +multimedia +educ +softwar +softwar +engineeringtool +comput +gener +music +person +creativ +stuff +cool +place +spend +signific +time +pierian +spring +softwareoregon +museum +scienc +industri +omsi +pomona +collegeher +look +current +project +mine +eight +puzzl +java +applet +work +progress +experiment +virtual +travel +applet +copi +effect +demo +applet +question +project +static +layer +analysi +program +feel +stress +realli +silli +littl +macintosh +thati +wrote +year +download +like +metacrawl +searchcopyright +jeremi +baer +jbaer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..394ad98d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,74 @@ +jeremi +buhler +home +pagejeremi +buhler +home +pagedo +attempt +adjust +browser +control +transmiss +statu +first +year +student +institut +univers +washington +depart +comput +scienc +engin +offic +sieg +hall +offic +phone +home +phone +mail +address +jbuhler +washington +finger +tako +washington +import +stufflectur +note +suffix +tree +postscript +latex +research +come +soon +cours +schedulemi +public +keycyb +activ +electron +frontier +foundat +grinsrecommend +readingmi +undergradu +alma +mater +rice +universityquot +quotesmi +home +page +return +control +browser +jeremi +buhler +jbuhler +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..527f728c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,376 @@ +jeff +dean +jeff +dean +depart +comput +scienc +engin +univers +washington +seattl +offic +chateau +sieg +view +offic +would +dang +build +weren +futur +plansi +plan +graduat +summer +join +western +research +laboratori +sunni +palo +alto +bought +hous +nearbi +menlo +park +curriculum +vita +postscript +also +summari +postscript +research +teach +experi +projectsi +work +primarili +cecil +project +cecil +pure +object +orient +langaug +us +avehicl +integr +research +area +languag +design +program +environ +optim +compil +techniqu +weintend +techniqu +scale +larg +real +world +program +andto +keep +true +goal +implement +vortexcompil +cecil +current +line +cecil +codein +compil +much +work +group +involv +track +compil +bug +also +hang +spinproject +meet +spin +extens +oper +systemmicrokernel +support +dynam +adapt +system +interfacesand +implement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +spring +quarter +organ +depart +compil +seminar +research +interest +research +concern +effici +implement +ofobject +orient +languag +compil +optim +techniqu +particular +explor +whole +program +analysi +usedto +improv +perform +affect +increment +compil +andhow +interact +optim +techniqu +especiallyprofil +guid +optim +techniqu +also +interest +howwhol +program +optim +affect +languag +design +assumedthat +compil +access +entir +program +manycompromis +made +exist +languag +design +becom +unnecessari +interest +appli +lesson +learn +explor +wholeprogram +optim +toward +design +system +programminglanguag +flexibl +effici +base +underlyingimplement +whole +program +analysi +valid +research +three +principaldesign +develop +vortex +optim +compil +forobject +orient +languag +vortex +defin +languag +independentintermedi +languag +object +orient +languag +ishigh +enough +level +permit +optim +featur +messagesend +closur +object +creation +also +contain +uniqu +wayof +compos +optim +pass +parallel +obtain +better +result +repeatedli +run +passessepar +exampl +compil +appli +intraprocedur +classanalysi +profil +guid +receiv +class +predict +inlin +aliasanalysi +split +singl +combin +pass +part +work +vortex +develop +wai +structuringoptim +pass +permit +kind +composit +stillallow +pass +develop +larg +independ +eachoth +nice +framework +specifi +iter +data +flowanalys +permit +client +develop +optim +pass +withrel +littl +effort +exampl +vortex +dead +assignmentelimin +pass +approxim +line +code +publicationssom +recent +paper +author +author +personali +love +spici +food +mild +four +letter +word +coke +probabl +kick +caffein +habit +enjoy +moment +spent +wife +heidi +daughter +victoria +would +realli +like +somedai +wife +spent +honeymoon +kauai +hurrican +iniki +recent +biplan +ride +galvin +fly +seattl +guess +never +anyth +anymor +wife +took +flight +consist +minut +flight +around +downtown +seattl +puget +sound +travel +model +biplan +feel +dare +sadli +insur +coverag +doesn +permit +passeng +walk +wing +back +enjoi +ride +view +would +fantast +even +highli +recommend +look +someth +seattl +number +rather +lengthi +hotlist +jdean +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..59d4e647 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,279 @@ +damon +rees +home +pagejon +damon +reesepost +doctor +research +safeti +critic +softwar +groupdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +chateau +guggenheim +annex +phone +mail +jdrees +washington +research +interest +problem +safeti +critic +softwar +awar +devic +system +structur +caus +catastroph +fail +comput +hardwar +becom +less +expens +expect +place +softwar +comput +place +control +wider +rang +applic +softwar +advantag +convent +technolog +flexibl +advantag +come +price +softwar +behavior +complex +unpredict +perhap +best +public +exampl +three +mile +island +incid +oper +great +difficulti +diagnos +state +system +emerg +requir +stage +softwar +develop +project +success +especi +respect +safeti +reason +colleagu +concentr +requir +especi +commun +requir +requir +specif +develop +state +base +languag +call +requir +state +machin +languag +rsml +valid +us +languag +specifi +tca +avion +system +doctor +thesi +invent +hazard +analysi +procedur +base +hazard +oper +hazop +studi +signific +concept +procedur +borrow +hazop +deviat +henc +name +deviat +analysi +link +html +transcript +dissert +current +project +deviat +analysi +write +confer +articl +summar +deviat +analysi +make +deviat +analysi +softwar +avail +safeti +critic +softwar +group +studi +possibl +dynam +displai +control +deviat +analysi +search +siang +rsml +tool +integr +deviat +analysi +softwar +rsml +tool +kurt +partridg +make +alpha +version +rsml +tool +publicli +avail +kurt +partridg +sean +sandi +rsml +semant +draft +semant +document +includ +discuss +rsml +variant +develop +exampl +improv +semant +academ +histori +inform +comput +scienc +univers +california +irvin +dissert +softwar +deviat +analysi +postscript +comput +scienc +linguist +rice +univers +waxahachi +high +school +waxahachi +public +nanci +leveson +mat +heimdahl +holli +hildreth +rees +requir +specif +process +control +system +ieee +transact +softwar +engin +septemb +postscript +steven +dolin +rees +curv +interpret +diagnost +techniqu +industri +process +ieee +transact +industri +applic +januari +februari +leveson +heimdahl +hildreth +rees +ortega +experi +us +statechart +system +requir +specif +sixth +intern +workshop +softwar +specif +design +como +itali +octob +jdrees +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..d2af25d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,191 @@ +home +page +jack +lojack +lojlo +washington +depart +comput +scienc +engin +univers +washington +seattl +home +page +current +construct +research +paper +convert +thread +level +parallel +instruct +level +parallel +simultan +multithread +abstract +postscript +jack +susan +egger +joel +emer +henri +levi +rebecca +stamm +anddean +tullsen +submit +public +juli +exploit +choic +instruct +fetch +issu +implement +simultan +multithread +processor +abstract +postscript +dean +tullsen +susan +egger +joel +emer +henri +levi +jack +rebecca +stamm +proceed +annual +intern +symposium +comput +architectur +philadelphia +compil +issu +simultan +multithread +processor +postscript +jack +susan +egger +henri +levi +anddean +tullsen +proceed +first +suif +compil +workshop +stanford +januari +improv +balanc +schedul +compil +optim +increas +instruct +level +parallel +abstract +postscript +jack +susan +egger +proceed +sigplan +confer +program +languag +design +implement +jolla +california +june +page +compar +static +dynam +schedul +superscalar +processor +jack +gener +examin +written +report +examin +interact +balanc +schedul +compil +optim +jack +loph +qualifi +examin +written +report +current +work +architectur +compil +support +simultan +multithread +research +interest +also +includ +static +dynam +schedul +superscalar +vliw +processor +instruct +level +parallel +issu +well +compil +multithread +architectur +particular +investig +compil +issu +simultan +multithread +person +jack +page +find +franklin +eseattl +orsieg +hall +room +phone +coupl +pictur +recent +paintbal +experi +pictur +pictur +yahoojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..109537b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,54 @@ +sherman +home +page +shermanjoebob +washington +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +usami +research +interest +user +interfac +designinform +navig +visual +project +activ +user +interfac +inform +local +survei +useclass +project +hcreat +impress +home +pagequ +time +sarahsoftballstuff +might +want +automat +suggest +page +link +user +interfac +research +relat +topic +directori +us +pagesif +browser +support +send +mail +tojoebob +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..8867a2ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,73 @@ +home +page +joshua +seim +home +page +joshua +seim +abstract +joshua +seim +biolog +base +neural +network +system +current +test +theunivers +washington +depart +comput +scienc +begunin +lockean +blank +slate +josh +learn +emul +observedbehavior +successfulli +accomplish +sever +task +graduatingfrom +colleg +travel +volit +recent +start +demonstr +potenti +independ +creativ +thought +current +taskw +present +josh +earn +comput +scienc +expect +take +sever +year +document +provid +overviewof +josh +cognit +ambulatori +achiev +organ +person +academichierarchi +addition +futur +work +discuss +within +context diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..a9eb5135 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,7 @@ +jovan +home +page +jovan +home +page +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..ba3116f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,78 @@ +joanna +powerjoanna +pagehi +joanna +cat +academ +interestsmi +main +interest +comput +scienc +graphic +grad +school +uwneat +stuff +alma +matercool +graphic +link +jonathan +shadegraph +research +uwduoton +reproductionmi +main +academ +interest +biologi +especi +genet +molecular +biologi +alma +matermost +recent +site +gain +employmentpubl +joanna +power +brad +west +eric +stollnitz +david +salesin +reproduc +color +imag +duoton +proceed +siggraph +page +york +real +lifepast +homesdiversionsgend +issuesstatu +women +comput +sciencenow +home +pagefeminist +major +onlineultim +frisbeefun +stufffroggi +page +sean +quotesbrad +comic +musicevan +jokes +page +pagesmi +herojpow +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..b94a0a52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,19 @@ +jonathan +shake +jonathan +shake +sieg +hall +comput +scienc +engin +univers +washington +seattl +ahoi +homepag +finderresumlinkslast +updat +august +jshake +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..5cdb1031 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan +alemanyjuan +alemani +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..de747c3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,157 @@ +kari +pulli +home +pagekari +antero +pullii +third +year +graduat +student +comput +scienc +engineeringdepart +univers +ofwashington +work +interest +comput +graphic +comput +vision +andmathemat +try +combin +aspect +thesedisciplin +research +professor +depart +work +closest +tonyderos +graphic +actual +left +uwfor +pixar +lindashapiro +vision +addition +work +werner +stuetzl +andjohn +mcdonald +statist +duchamp +mathemat +andhugu +hopp +rick +szeliski +microsoft +research +qual +project +tribor +triplet +base +object +recognitionsystem +work +linda +report +technic +report +depart +comput +scienc +engin +universityof +washington +current +work +surfacereconstruct +rang +data +multipl +baselin +camerasystem +obtain +data +subdivis +surfac +waveletanalysi +surfac +geometri +reflect +function +pass +gener +examin +topic +rigidregistr +data +click +find +class +project +year +comput +architecturesystem +class +taught +susanegg +distribut +comput +theclass +taught +brianbershad +imag +understand +theclass +taught +steven +tanimoto +present +technic +sketch +siggraph +getto +slide +subdivis +surfac +slide +remov +wavelet +herear +slide +speaker +note +eacutesum +eacut +find +sieg +hall +comput +scienc +engin +univers +washington +seattl +email +kapu +washington +home +union +seattl +folk +takavainionti +oulu +finland +kari +antero +pulli +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..80e4d185 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,20 @@ +anna +karlinanna +rochel +karlinassoci +professor +univers +washington +sincejuli +work +comput +scienc +engin +depart +univers +washington +seattl +home +page +paperskarlin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..1fac85aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,13 @@ +home +page +yeunghom +page +yeungperson +infomi +picturemi +researchtelnet +machinessend +email +back +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..ad7396ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,109 @@ +kurt +partridg +kurt +partridg +academ +inform +graduat +student +comput +scienc +depart +univers +washington +interest +includ +softwar +engin +softwar +specif +specif +usabl +readabl +applic +formal +method +specif +softwar +safeti +page +describ +other +work +area +also +dabbl +user +interfac +design +human +comput +interact +java +program +recent +public +kurt +partridg +bddtcl +environ +visual +manipul +binari +decisiondiagram +interact +poster +html +postscript +poster +preview +postscript +nanci +leveson +bauer +mat +heimdahl +wayn +ohlrich +kurt +partridg +vivek +ratan +rees +environ +safeti +critic +softwar +nasa +confer +safeti +qualiti +postscript +background +start +graduat +school +complet +comput +scienc +berkelei +live +love +suburban +life +thousand +oak +parent +sister +name +oti +right +kurt +humor +corner +univers +washington +seattl +voic +kepart +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..8a6fb88d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,53 @@ +keith +golden +home +page +questa +pagina +anch +italiano +keith +golden +researchsoftbotsplanningkrselect +publicationscurriculum +vita +also +inpostscriptrandom +hackingwordbot +collabor +dictionari +like +bicycl +tour +languag +paint +photographi +natur +coffe +godless +pinko +stuff +dislik +suit +lawyer +car +friend +ellenmarcruben +laurennickrich +joannavivek +advisor +oren +etzioni +weld +keithgolden +depart +ofcomput +scienc +engin +univers +washington +seattl +kgolden +washington +complet +list diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..5bc2a2e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,219 @@ +kingsum +chow +kingsum +chow +kingsum +washington +kingsum +washington +educomput +scienc +engin +depart +univers +washington +seattl +usathi +inform +highwai +alwai +construct +tabl +content +person +research +upcom +confer +resum +current +schedul +bridg +glossari +univers +hong +kong +suggest +feedbackresearchmi +advisor +david +notkin +asynchron +softwar +evolut +softwar +develop +toolspap +line +softwar +qualiti +manag +respons +driven +softwar +evolut +readi +kingsum +chow +david +notkin +semi +automat +updat +applic +respons +librari +chang +technic +report +revis +version +appear +icsm +kingsum +chow +david +notkin +asynchron +softwar +evolut +asia +pacif +workshop +softwar +engin +research +march +hong +kong +kingsum +chow +program +transform +asynchron +softwar +mainten +proceed +ics +workshop +program +transform +softwar +evolut +william +griswold +editor +intern +confer +softwar +engin +april +seattl +washington +us +site +pcct +sorcererpcct +home +page +pcct +page +terrenc +parr +note +pcct +newbiesresumepleasedrop +mailto +resum +specifi +text +postscript +format +univers +hong +kongchines +univers +hong +kong +univers +hong +kong +univers +scienc +technolog +hong +kong +polytechn +univers +citi +univers +hong +kongsingapor +sitessingapor +onlin +world +wide +server +alumnu +websom +campu +friendstom +liew +fook +home +page +wang +page +jiang +weidongu +relatedunivers +washington +style +polici +manual +these +dissert +graduat +school +webserv +univers +book +storeinvestmentsfre +minut +delai +quot +watch +quot +market +data +experiment +mutual +fund +chart +line +invest +center +stock +commod +technic +analysismisc +read +chines +list +thoma +china +new +servic +welcom +onlin +hong +kong +movi +movi +movi +visit +sinc +kingsum +chow +last +modifi +date diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..e64c2c1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,108 @@ +kevin +bold +kevin +boldingkwb +washington +comput +scienc +engin +depart +univers +washington +seattl +juvenil +squirt +wander +search +suitabl +rock +hunk +coral +cling +make +home +life +task +rudimentari +nervou +system +find +spot +take +root +doesn +need +brain +eat +rather +like +get +tenur +dennett +conscious +explain +research +current +work +build +high +speed +latencylan +chaotic +router +previou +researchha +chaoticrout +form +minim +adapt +rout +formass +parallel +multicomput +profession +assist +professor +electr +engin +seattl +pacif +univers +also +work +part +time +researchassoci +univers +ofwashington +signific +paper +written +archiv +ofth +chaotic +rout +group +spend +time +teach +electr +comput +engineeringat +seattl +pacif +univers +person +photo +took +comethyakutak +seattl +moustach +real +case +want +visit +home +anoth +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..2e159f9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,45 @@ +richard +ladnerrichard +ladnerprofessor +depart +comput +scienc +engin +univers +washington +seattl +mail +ladner +washington +phone +offic +sieg +hall +room +person +short +biographyresearch +public +studentsteachingcomput +program +fall +quarter +introduct +comput +commun +network +spring +quarter +introduct +formal +model +comput +scienc +winter +quarter +data +structur +spring +quarter +ladner +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..dbdfeb5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,147 @@ +larri +mcmurchi +home +page +larri +mcmurchiedepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +larri +washington +offic +sieg +hall +room +current +research +larri +mcmurchi +director +laboratori +integratedsystem +chemistri +western +washington +univers +chemistri +univers +washington +hework +area +quantum +chemistri +graduat +studi +primari +focu +number +evalu +class +ofintegr +gaussian +function +later +appli +work +theconstruct +larg +spars +hamiltonian +matric +coauthorof +comprehens +packag +comput +program +meld +us +abinitio +calcul +small +molecul +sinc +join +staff +depart +comput +scienc +andengin +larri +supervis +work +technic +staff +ofth +laboratori +integr +system +coauthor +wirec +aschemat +captur +system +allow +design +code +withschemat +symbol +creat +concis +parameteriz +representationof +design +larri +also +involv +develop +andcommerci +mactest +integr +softwar +hardwareenviron +function +test +chip +board +andsubsystem +recent +work +area +fpga +andha +develop +gener +purpos +perform +driven +router +fpga +northwest +laboratori +integr +system +mactest +cost +vlsi +chip +tester +triptych +high +densiti +fpga +architectur +public +journal +articl +upcom +confer +return +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..a6f92079 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,321 @@ +nanci +leveson +home +page +nanci +levesondepart +comput +scienc +engineeringunivers +washingtonbox +express +mail +sieg +hall +seattl +leveson +washington +nanci +leveson +professor +join +faculti +come +california +search +rain +receiv +degre +mathand +comput +scienc +ucla +spent +form +year +professor +univers +california +irvin +professor +leveson +start +area +research +softwar +safeti +concern +problem +build +softwar +real +time +system +failur +result +loss +life +properti +advantag +topic +nobodi +question +goal +except +misanthrop +matter +anywai +student +recent +produc +aform +requir +specif +tca +real +collis +avoid +system +requir +commerci +aircraft +airspac +lesson +learn +project +never +anyth +like +seem +pleas +though +adopt +theiroffici +specif +student +current +work +safeti +analysi +specifi +behavior +tca +claim +thatyou +read +anyth +fact +take +train +late +safetyresearch +project +also +work +model +analysi +autom +highwai +automobil +variou +aerospac +system +subtop +research +area +includ +model +analysi +safeti +specif +safe +softwar +design +softwar +fault +toler +verif +valid +safeti +professor +leveson +editor +chief +ieee +transact +softwareengin +elect +member +board +director +computingresearch +associ +member +nation +research +council +commissionon +engin +technic +system +member +committe +comput +public +polici +recent +chair +nation +research +council +studi +evalu +space +shuttl +softwar +process +levesoni +fellow +award +aiaa +inform +systemsaward +contribut +space +aeronaut +comput +technolog +andscienc +develop +field +softwar +safeti +promotingrespons +softwar +system +engin +practic +life +propertyar +stake +year +leveson +book +softwar +safeti +safewar +system +safeti +comput +addison +weslei +publish +recent +paper +avail +list +paper +isalso +avail +copi +favorit +paper +actual +keynoteaddress +conf +softwar +engin +melbourn +titl +high +pressur +steam +engin +comput +softwar +click +qual +project +avail +follow +topic +appli +hazardanalysi +techniqu +aircraft +collis +avoid +system +model +writtenin +state +machin +style +languag +call +rsml +determin +wai +build +fault +tree +analys +rsml +model +gener +design +newrequir +specif +languag +includ +specifi +human +comput +interfac +deriv +gener +principl +design +languag +appli +hazard +analysi +human +machin +interfac +model +model +human +machin +interfac +control +system +aircraft +cockpit +analyz +aircraft +accid +report +involv +mode +awar +problemsand +gener +issu +deriv +inform +safe +design +human +comput +interact +finger +finger +leveson +washington +inform +citi +airport +current +perhap +contact diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..2c60afd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,328 @@ +hank +levi +home +page +henri +levi +professor +join +faculti +hank +current +research +project +focu +oper +system +parallel +distribut +comput +comput +architectur +particularli +simultan +multithread +architectur +object +base +languag +environ +recent +projectcal +opal +deal +singl +address +space +oper +system +comput +architectur +theetch +projecti +produc +tool +perform +instrument +optim +binari +execut +levi +author +book +numer +paper +comput +system +includ +outstand +paper +select +four +consecutiveacm +symposia +oper +system +principl +former +chair +sigop +special +interest +group +onoper +system +program +chair +theth +symposium +oper +system +principl +tobe +held +hold +carnegi +mellon +universityand +univers +washington +come +washington +consult +engin +digit +equip +corpor +work +span +rang +oper +system +architectur +distribut +system +workstat +hank +fellow +associ +comput +machineryand +recipi +fulbright +research +scholar +award +eleven +master +student +nine +student +survivedlevi +supervis +student +haveal +escap +academ +posit +major +research +lab +glu +workstat +hank +usual +befound +ski +bike +plai +tenni +help +lead +thedepart +infam +softbal +team +smile +potato +death +sampl +dessert +seattl +mani +dessert +parlor +recent +publicationsreduc +network +latenc +us +subpag +global +memori +environ +jamrozik +feelei +voelker +evan +karlin +levi +vernon +inproceed +seventh +confer +architectur +support +program +languag +oper +system +octob +postscript +implement +global +memori +manag +workstat +cluster +michael +feelei +william +morgan +freder +pighin +anna +karlin +henri +levi +chandramohan +thekkath +appear +proc +symposium +oper +system +principl +decemb +simultan +multithread +maxim +chip +parallel +dean +tullsen +susan +egger +henri +levi +proc +annual +intern +symposium +comput +architectur +june +exploit +choic +instruct +fetch +issu +implementablesimultan +multithread +processor +dean +tullsen +susan +egger +joen +emer +henri +levi +jack +rebecca +stamm +proc +intern +symposium +comput +architectur +share +protect +singl +address +space +oper +system +jeffrei +chase +henri +levi +michael +feelei +edwardd +lazowska +transact +comput +system +novemb +integr +coher +recover +distribut +system +michael +feelei +jeffrei +chase +vivek +narasayya +henri +levi +proc +first +symposium +oper +system +design +implement +novemb +hardwar +softwar +support +effici +except +handl +thekkath +levi +proc +conf +arch +support +prog +languag +oper +system +asplo +octob +separ +data +control +transfer +distribut +oper +system +thekkath +levi +lazowska +proc +conf +arch +support +prog +languag +oper +system +asplo +octob +levi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..06073029 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,156 @@ +opal +oper +system +projectop +opal +project +explor +oper +system +structur +tunedto +need +complex +applic +numberof +cooper +program +manipul +larg +share +persist +databaseof +object +opal +code +data +exist +singl +huge +share +address +space +singl +address +space +enhanc +share +andcooper +address +uniqu +time +interpret +thu +pointer +base +data +structur +directlycommun +share +program +time +bestor +directli +secondari +storag +without +need +translat +structur +simplifi +avail +larg +addressspac +provid +alpha +mip +risc +protect +opal +independ +singl +address +space +opal +thread +execut +within +protect +domainthat +defin +virtual +page +right +access +right +access +page +easili +transmit +oneprocess +anoth +result +much +flexibl +protectionstructur +permit +differ +dynam +chang +protect +option +depend +trust +relationshipbetween +cooper +parti +believ +organ +canimprov +structur +perform +complex +cooperatingappl +opal +prototyp +built +alpha +platform +ofth +mach +oper +system +inform +sourc +list +opal +relat +paper +faculti +member +hank +levi +lazowska +jeff +chase +duke +univers +current +graduat +student +mike +feelei +ashutosh +tiwari +vivek +narasayya +dylan +mcname +relat +inform +singl +address +space +mail +list +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..0505ebc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,209 @@ +lopezgu +lopezlopez +washington +school +sieg +hall +depart +comput +scienc +engin +univers +washington +seattl +home +student +univers +washington +dissertationresearch +design +implement +constraint +imper +object +orient +languag +curriculum +vita +publicationsgu +lopez +bjorn +freeman +benson +alan +born +kaleidoscop +constraint +imper +program +languag +brian +mayoh +tougu +jann +penjam +editor +constraintprogram +springer +verlag +nato +advanc +studi +instituteseri +seri +comput +system +scienc +also +publisheda +technic +report +lopez +bjorn +freeman +benson +alan +born +constraint +object +ident +inproceed +european +confer +object +orient +program +bologna +itali +juli +lopez +bjorn +freeman +benson +alan +born +implement +constraint +imper +program +languag +kaleidoscop +virtual +machin +inproceed +confer +object +orient +programmingsystem +languag +applic +portland +oregon +octob +oopsla +tutorialsi +also +tutori +chair +upcom +oopsla +conferencein +jose +california +peopl +object +orient +technolog +andsoftwar +develop +meet +speak +oopsla +confer +oopsla +well +known +breadth +depth +high +qualiti +itsextens +tutori +program +previou +year +oopsla +tutorialshav +cover +aspect +object +orient +technolog +introductorysurvei +industri +softwar +engin +practic +lead +edg +academicresearch +topic +respons +request +past +attende +weespeci +encourag +propos +engin +issu +advanc +topic +anyon +consid +submit +propos +tutori +requestguidelin +tutori +submiss +tutori +chair +theoopsla +electron +hotlin +electron +mail +submiss +propos +encourag +enthusiast +accept +tutori +proposalswithout +email +address +accept +tutori +propos +march +notif +accept +withcamera +readi +note +august +interest +link +constraint +oopsla +tutori +green +direct +jimi +hendrix +grave +star +war +collector +archiv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..d20abcac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,45 @@ +omid +home +page +omid +madani +madani +washington +comput +scienc +engin +depart +univers +washington +seattl +chateau +suit +bhello +curiou +browser +welcom +fourth +year +graduat +student +depart +enjoytheori +also +like +keep +touch +areasinclud +graphic +life +work +academ +want +look +islamicarchitectur +isfahan +best +nomine +citi +home +countri +iran diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..48409060 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,21 @@ +mike +perkowitz +page +mike +perkowitznewsflash +mike +goe +blond +area +research +academia +music +creativ +randomfavorit +sheba +voyeur +written +grooveneedl +espressoresumemik +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..935c5a8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,84 @@ +marc +langheinrich +homepagemarc +langheinrich +homepagemarc +langheinrichuniversitt +bielefeld +univers +washingtontechnisch +fakultt +depart +comput +scienceemail +imlangh +techfak +bielefeld +email +marclang +washington +eduabout +myselfi +spent +last +year +depart +comput +scienc +theunivers +washington +visit +graduat +student +thefulbright +program +check +follow +link +depthinform +resum +project +short +biopost +addressa +septemb +back +germani +finish +mastersat +univers +bielefeld +pleas +contact +german +address +homeschoolgermanyringstra +maintalphon +paulusplatz +bielefeldphon +woodlawn +seattl +phone +sieg +hall +phone +browser +support +tabl +access +data +list +formatmarc +langheinrich +univers +washington +depart +comput +scienc +email +marclang +washington +http +washington +home +marclang diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..1ba24ebc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,172 @@ +home +marla +baker +marla +washington +chief +editor +depart +comput +scienc +engin +univers +washington +seattl +partner +crime +bentlei +academ +interestsgraph +user +interfac +human +comput +interact +educ +softwar +comput +support +collabor +learn +cscl +comput +graphic +visual +techniqu +visual +program +languag +current +work +current +work +stevetanimoto +lauren +bricker +coimag +project +devleop +collabor +educ +activ +order +explor +cooper +contol +object +goal +work +investig +differ +wai +multipl +user +cansimultan +share +manipul +given +object +wai +assess +interact +also +work +part +time +interfac +packard +bell +compani +resum +publicationsbak +marla +stephen +eick +space +fill +softwar +visual +journal +visual +languag +comput +june +burnett +baker +bohu +carlson +yang +scale +visual +program +languag +ieee +comput +special +issu +visual +program +march +burnett +margaret +marla +baker +classif +system +visual +program +languag +journal +visual +languag +comput +septemb +baker +marla +stephen +eick +visual +tool +larg +softwar +system +proceed +intern +confer +softwar +engin +sorento +itali +baker +marla +stephen +eick +baker +eick +method +apparatu +displai +hierarch +inform +larg +softwar +system +patent +applic +submit +octob +tutori +geometr +transform +imag +metip +program +environ +check +page +offic +sieg +marla +baker +marla +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..ac4e3a2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,118 @@ +matthai +philipos +home +page +matthai +philipos +work +dynam +compil +project +dynam +compileri +beast +gener +optim +code +runtim +shortterm +interest +figur +produc +good +code +dynam +runtim +modern +processor +architectur +applic +side +think +interpret +basedsystem +real +time +constraint +like +java +browser +canbenefit +select +runtim +compil +like +wire +asystem +runtim +compil +goe +work +withprofessor +susan +eggersand +craig +chamber +work +comput +scienc +engin +depart +univers +washington +seattl +phone +home +seattl +public +ausland +philipos +chamber +egger +bershad +fast +effect +dynam +compil +confer +onprogram +languag +design +implement +chamber +egger +ausland +philipos +mock +andp +pardyak +automaticdynam +compil +support +event +dispatch +extens +system +workshop +compil +support +system +softwar +februari +bookmark +stuff +plai +frequentlymiscellan +link +stuff +local +importancefrom +past +abuwhi +page +black +blue +ribbon +campaign +matthai +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..9a6f8499 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,229 @@ +neil +mckenzi +menu +fine +dine +index +page +contact +futur +project +current +project +past +project +public +list +person +inform +game +contact +inform +neil +mckenzi +mitsubishi +electr +research +laboratori +broadwai +floor +cambridg +phone +mail +mckenzi +merl +current +projectsgonna +teenag +lobotomi +ramonesi +live +east +coast +mile +east +seattl +andwork +merl +note +current +involv +projectconcern +real +time +volum +render +medic +data +copiou +free +time +expatri +graduat +student +work +onchaot +routingwith +faculti +advisor +carl +ebel +larri +snyder +chaotic +rout +packet +rout +algorithm +mesh +torusnetwork +dissert +design +implement +thecranium +messag +pass +interfac +compatiblewith +network +us +chaotic +rout +past +projectsi +teach +assist +summer +design +implement +chip +tester +call +mactest +maintain +carl +netlist +graph +isomorph +tool +calledgemini +industri +speak +knowna +layout +schemat +tool +gemini +avail +interest +pleas +send +mail +larri +mcmurchi +larri +washington +public +cranium +interfac +messag +pass +adapt +packetrout +network +proceed +parallel +comput +rout +andcommun +workshop +seattl +link +tomactest +home +page +gemini +user +guid +last +updat +march +person +inform +angel +marri +pictur +hous +arlington +massachusett +head +east +coast +us +livein +fashion +seattl +neighborhood +ofballard +angel +creativ +page +contain +exampl +comput +artworkcr +us +adob +photoshop +ownedthi +year +onlyth +memori +remain +shirt +correctlyguess +answer +toriddl +jour +octob +label +place +jar +mckenzi +countri +farm +honei +produc +myuncl +mckenzi +edmonton +alberta +canada +amus +neil +linkschairman +shot +linksnorm +gregori +bookmark +halcyon +eugen +spafford +link +purdu +randi +pausch +shot +link +virginia +wallach +scool +link +princeton +neil +mckenzi +mckenzi +washington +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..0ee9abae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,251 @@ +marc +fiuczynski +home +page +marc +fiuczynski +home +page +marc +fiuczynski +washington +comput +scienc +engin +depart +univers +washington +seattl +backgroundi +graduat +student +univers +washington +depart +comput +scienc +engin +grewup +germani +near +sseldorf +spent +year +highschool +princeton +receiv +comput +scienc +fromrutg +univers +spent +sever +summer +bell +lab +mitr +corpor +work +rang +ofproject +sole +proprietor +system +companythat +creat +decemb +sell +distribut +fault +toler +network +base +telephon +system +built +scratch +setof +chasi +processor +us +univoic +telephon +interfac +cardsand +vxwork +oper +system +time +spend +hack +spin +safe +adapt +extens +oper +system +primari +contribut +spin +extens +protocol +architectur +forappl +specif +network +applic +achiev +compellingperform +improv +us +structur +compar +tosimilar +applic +run +commerci +platform +demonstr +work +servic +http +request +contacthttp +spin +washington +recent +report +paper +extens +protocol +architectur +forappl +specif +network +design +implement +perform +paper +describ +anextens +protocol +architectur +allow +anyon +custom +anin +kernel +protocol +graph +enabl +applic +achiev +betterperform +compar +similar +applic +run +conventionaloper +system +demonstr +work +servic +http +request +contacthttp +spin +washington +appear +proceed +winter +usenix +technicalconfer +extens +safeti +perform +spin +oper +system +design +implement +perform +paper +appear +proceed +fifteenth +symposium +oper +system +principl +languag +support +extens +oper +system +pretti +happi +deal +shortcom +inord +languag +safe +extens +oper +system +paper +describeshow +address +shortcom +safe +dynam +link +extens +oper +system +describ +dynam +linker +load +code +kernel +point +isth +abil +creat +manag +linkabl +namespac +describ +interfac +andcollect +interfac +protect +softwar +issu +posit +paper +compar +softwar +hardwar +protect +mechan +proceed +fifth +ieee +workshop +topic +oper +system +region +analysi +parallel +elimin +method +data +flow +analysi +ieee +transact +softwar +engin +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..671dfe7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,39 @@ +melani +fulgham +comput +scienc +minim +versu +minim +rout +algorithm +rout +method +model +develop +rout +model +help +predict +compar +perform +router +real +parallel +machin +deflect +rout +upper +lower +bound +practic +requir +sort +deflect +rout +algorithm +mesh +topolog +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..a2a2abd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,74 @@ +meng +heng +homepag +meng +homepagemenghe +washington +edubox +depart +comput +scienc +engineeringunivers +washingtonseattl +second +year +student +comput +scienceat +univers +washington +undergrad +univers +pennsylvania +research +interestsi +interest +imag +retriev +problem +try +findimag +huge +databas +imag +virag +andqbicar +commerci +exampl +similar +kind +stuff +work +snapshot +done +singaporesingapor +infomap +provid +fact +andstatist +singapor +singaporeonlin +guid +plan +take +trip +nation +comput +boardi +charg +transform +singapor +anintellig +island +work +graduat +strait +time +singapor +main +english +newspap +visit +sinc +menghe +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..8711a5e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,88 @@ +michael +ernst +home +pagemichael +ernsti +graduat +student +univers +washington +comput +scienc +depart +previous +lectur +riceunivers +comput +sciencedepart +research +programanalysi +group +microsoft +research +graduat +student +laboratori +comput +scienc +eec +depart +page +frequent +updat +technic +interest +includ +compil +static +analysi +slice +debug +optim +code +serial +parallel +program +program +chair +intermedi +represent +workshop +coloc +popl +intellectu +properti +particularli +comput +program +area +includ +game +theori +cryptographi +philosophi +denot +semanticsi +maintain +list +resourcesfor +confer +workshop +organ +occasion +manag +slip +awai +work +carri +real +life +link +possibleinterest +includ +page +maintain +michael +ernst +mernst +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..66ec8882 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,159 @@ +marku +mock +home +page +marku +mock +mock +washington +comput +scienc +rttemberg +grew +anotherpart +state +district +biberach +upper +swabia +oberschwaben +come +studi +comput +scienc +univers +karlsruh +whichi +obtain +diplom +comput +scienc +also +spent +year +umass +fulbright +grante +main +research +interest +parallel +distribut +comput +object +orient +system +compil +current +work +dynam +compil +interest +includ +spanish +latin +american +cultur +travel +good +book +labyrinth +solitud +chess +mainstream +movi +dieangst +torwart +beim +elfmet +know +handk +salsa +merengu +danc +still +time +left +check +els +seattl +come +publicationssepar +list +link +interest +stuff +current +chess +event +chess +olympiad +yerewan +colloquia +oopsla +volunt +page +mossi +bit +grad +journal +link +csek +home +page +link +home +page +link +csebi +home +page +link +cse +home +page +cours +graduat +studentsimag +depart +electr +engineeringy +wouldn +expect +squar +live +view +metacrawl +search +altavista +search +deutsch +well +realaudio +live +stuff +cool +linksand +quot +consid +lili +field +grow +toil +neither +spin +unto +even +solomon +glorywa +arrai +like +matthew +page +access +time +sinc +last +updat +mock +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..7d4017c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,18 @@ +vivek +narasayya +home +page +vivek +narasayya +nara +washington +comput +scienc +engin +depart +univers +washington +seattl +person +informationresearch +interestspap diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..a6b65768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,81 @@ +himanshu +nautiy +home +pagehimanshu +nautiyalthi +page +heavi +construct +himanshu +nautiyaldept +comput +scienc +engin +mail +stop +univers +washington +seattl +offic +sieg +offic +phone +cours +take +autumn +quarter +principl +digit +system +design +artifici +intellig +finger +nautiy +washington +edugod +gift +personkind +dougla +adam +terri +pratchett +pelham +grenvil +wodehouseth +order +alphabet +last +name +impli +favorit +link +place +india +internet +radio +search +engin +india +himanshu +nautiy +name +friend +delhi +finish +tech +astronomi +skate +comput +scienc +aviat +travel +numismat +sound +much +profound +coin +collect +cook +internet +movi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..26e13552 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,95 @@ +labyrinth +mediocr +bemoan +hype +skeptic +cynic +sinc +research +current +work +automaticconstruct +wrapper +inform +resourc +alsointerest +sever +area +artifici +intellig +andcognit +scienc +paper +beeninvolv +stuff +avail +anonym +servic +provid +glbal +infrmatin +sperhighwai +preliminari +version +divers +meter +avail +pictur +hand +shortli +surgeri +need +random +number +alwai +handi +know +date +time +week +favorit +color +avail +line +lost +easili +return +page +ronald +wilson +reagan +need +temperatur +look +javascript +enabl +browser +automat +send +mail +great +republican +tell +like +miscellani +need +contact +bookmark +technolog +societi +awar +bitter +ironi +involv +nonetheless +madeavail +wendel +berri +guidelin +constitutesgood +technolog +comment +nichola +kushmerick +uwcs +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..4398e802 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,109 @@ +wayn +ohlrich +wayn +ohlrich +ohlrich +washington +depart +comput +scienc +engin +univers +washington +seattl +offic +guggenheim +annex +chateau +public +research +current +work +nanci +leveson +model +check +automat +test +gener +specif +spare +time +work +brian +bershad +anna +karlin +romer +memori +perform +analysi +project +local +known +sever +damag +project +group +paper +make +debut +isca +itali +summer +safeti +research +home +page +leveson +memori +sytem +home +page +isca +romer +ohlrich +bershad +karlin +reduc +memori +overhead +us +onlin +superpag +promot +univers +class +inform +cours +person +interest +game +wayn +world +wonder +inform +page +contain +sort +us +link +seattl +inform +home +page +invest +page +contain +invest +inform +research +inform +found +us +page +creat +octob +last +modifi +march diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..0d3f005e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,106 @@ +gershoni +gershoni +gershoni +washington +graduat +studentcomput +scienc +engin +departmentunivers +washingtonoffic +sieg +home +seattl +second +year +graduat +student +univers +ofwashington +move +seattl +live +california +seven +year +fouryear +berkelei +three +year +lancast +northeast +angel +origin +israel +live +haifa +holon +like +practic +kwon +plai +basketbal +hike +quarter +take +class +whole +bunch +seminar +amta +comput +architectur +usual +find +offic +sieg +class +offic +hour +aremondai +wednesdai +potenti +employ +welcom +look +resum +pictur +took +last +summer +click +tose +cool +shirt +design +made +summer +graphicsprogram +call +virtual +realiti +interest +link +time +daili +new +summari +york +time +riderlink +seattletransport +option +inform +israel +comput +scienc +mathemat +depart +univers +california +berkelei +gershoni +washington +access +sinc +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..88031629 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,90 @@ +ross +ortega +ross +ortega +wear +jean +ortega +washington +depart +comput +scienc +engin +univers +washington +seattl +knew +would +call +research +would +albert +einstein +welcom +home +pageaft +go +school +work +boston +year +decid +head +west +realli +northwest +came +fall +leav +sometim +accord +advisor +gaetano +borriello +offici +work +chinook +project +tool +real +time +embed +control +system +unoffici +brew +beer +learn +hack +try +teach +german +shepherd +tequila +behav +profession +section +chinook +project +myresum +file +educ +experi +public +paper +puppi +pictur +tequila +tequila +color +tequila +color +offic +sieg +check +page +link +find +interest +last +updatedthu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..9704812e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,138 @@ +untitl +document +flat +morri +minor +pardodepart +comput +scienc +engineeringunivers +washingtonbox +seattl +washingtonusapardo +washington +edunot +show +blue +ribbon +asimgsrc +http +graphic +icon +blueribbon +rib_trn_plain_sm +show +quiet +opposit +free +speechprohibit +nation +telecommun +bill +likewis +white +letter +black +background +beth +everybodi +need +pardo +around +sometim +pardo +sometim +beth +share +academicsom +papersi +work +find +particularli +interest +runtim +code +gener +rtcg +instruct +simul +trace +tool +home +page +oncomput +architectureandcompil +quick +link +otherpeopl +work +comput +class +thesi +stylenon +academicfeatur +item +featur +item +month +weak +site +week +doesn +blink +anymor +regular +itemsbicyclesbusinessescomputersfoodhumori +famou +thing +relat +legal +ethic +weirdnesslinux +journalmusicgoofi +politicssci +think +though +unrel +stuff +think +transport +seattl +movi +list +seattl +film +festiv +dant +search +truli +gross +stori +trepan +privaci +site +log +mail +address +wors +take +data +disk +everi +time +consid +weather +courtesei +seattl +particular +also +weather +courtesei +newhous +newspap +also +weather +courtesi +yesterdai +stuffpardo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..d5afc1a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,119 @@ +przemek +pardyak +home +page +przemyslaw +pardyak +pardi +washington +first +week +grad +school +coupl +year +later +third +year +graduat +student +comput +scienc +theunivers +washington +current +research +area +ofoper +system +also +interast +distribut +system +languag +compil +besid +grad +school +life +fill +withth +seattl +drizzl +hike +outdoor +activ +notbusi +enjoi +school +drizzl +book +music +find +short +descript +research +interest +resum +also +list +paper +outdat +happenswhen +busi +schedul +projectsspinan +extens +oper +system +built +gloriou +leadership +brian +bershad +group +mechan +object +base +systemsgroup +commun +group +mechan +emerald +object +basedprogram +languag +time +system +interest +link +polish +connect +variou +resourc +somehow +relat +poland +research +relat +project +relat +mine +univers +research +unrel +miscellan +work +comput +scienc +engin +depart +univers +washington +seattl +phone +home +seattl +phone +pardi +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..3dc96102 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,212 @@ +paul +franklin +home +pagepaul +franklin +home +pagei +current +graduat +student +univ +washington +inth +depart +comput +scienc +offici +myoffic +sieg +work +rapid +project +us +first +year +student +thesumm +second +year +someon +express +concern +aboutthi +character +usual +somewher +north +seattl +iliv +school +anoth +pictur +best +oneof +dai +around +scan +better +on +norwegian +poem +likea +collect +fortun +receiv +friend +localchines +restaur +mundan +stuffi +hope +folk +might +find +stuff +us +hotlink +pagesstuff +maintainmi +schedul +rememb +updat +contact +love +travel +necessarili +tell +everyon +hire +mewher +come +high +school +diploma +live +high +school +inmorgan +hill +taught +prolog +first +time +graduat +comput +scienc +engin +fromuc +davi +taught +prolog +second +time +andy +taught +prolog +glad +ididn +year +univers +bergen +ialso +research +professor +electr +andcomput +engin +depart +stuffwhil +davi +partner +variou +relatedact +tend +matt +chri +jame +evengot +togeth +recent +us +chri +marriag +joann +anexcus +chri +brother +also +made +itin +photo +throughout +undergradu +year +kept +bike +never +davi +flat +year +exchang +student +univers +ofbergen +bike +hillier +longer +rout +returnedto +davi +took +rollerblad +sinc +bike +around +town +wasnow +easi +bike +drop +year +work +hewlettpackard +return +vengeanc +move +toseattl +done +annual +seattl +portland +bike +ride +intwo +dai +year +inseason +march +april +june +rest +year +justcommut +bike +lot +danc +particularli +lindyhop +know +everi +html +document +header +linethat +look +someth +like +doctyp +html +public +ietf +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..42a93aa1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,115 @@ +frdric +pighin +pighin +washington +comput +scienc +lcommun +wonder +citi +tourist +quarter +ta +taught +anna +karlin +gui +found +much +often +dani +qual +report +rest +like +british +movi +monti +python +swim +nick +cave +corto +maltes +italian +comic +cat +pari +berlin +venis +simpson +mpeg +rain +surpris +like +traditionn +french +marin +song +collect +otherwis +work +graphic +anna +karlin +supervis +although +formerli +studi +systemher +name +paper +implement +global +memori +manag +workstat +cluster +michael +feelei +william +morgan +freder +pighin +anna +karlin +henri +levi +chandramohan +thekkath +proceed +symposium +oper +system +principl +decemb +postscript +live +action +breath +take +pictur +door +refresh +everi +minut +lucki +might +even +look +darren +juan +note +might +dark +live +action +pictur +squar +refresh +everi +five +minut +note +might +rain diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..19efbf7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,17 @@ +ruth +anderson +home +page +ruth +anderson +washington +comput +scienc +engin +depart +univers +washington +seattl +wxyc +map +brother diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..a39b01cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,18 @@ +home +home +josh +home +page +comput +scienc +engin +depart +univers +washington +seattl +redston +washington +joshua +redston +redston +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..820dd5f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,188 @@ +jakobovit +home +page +jakobovit +washington +comput +scienc +engin +depart +univers +washington +seattl +usai +get +comput +scienc +departmentof +univers +ofwashington +wonder +citi +seattl +alwai +rain +thisup +date +weatherreport +sneak +peek +live +imag +video +camera +mount +outsid +camera +point +beauti +drumhel +fountain +clear +youcan +catch +glimps +rainier +glori +probabl +cleardai +nice +color +pictur +research +develop +base +repositori +manag +program +toolkit +build +multi +media +consol +construct +imag +databas +part +astructur +inform +framework +brain +map +build +knowledg +base +support +digit +anatomist +line +interact +atla +human +bodi +implement +databaseenviron +vision +research +local +expert +persistentprogram +languag +interest +els +proud +creator +internetracquetbal +ladder +taught +advanc +program +extens +wrote +perl +script +manag +rotisseriebasebal +leagu +stand +updat +daili +stat +fromusa +todai +rais +happi +famili +africancichlid +visit +home +town +honolulu +everi +chanc +camp +magic +kalalau +vallei +movi +gambl +stock +market +darn +good +fantasi +footbal +team +newslet +would +javafamili +link +mydad +leon +jame +professor +psycholog +univers +hawaii +whoi +write +book +traffic +psycholog +foster +live +onlin +polem +emanuel +swedenborg +step +dian +nahl +professor +librari +inform +scienc +whoprovid +great +index +onlin +librari +databas +judi +jakobovit +realtor +hawaii +uncl +eddi +jakobovit +run +site +bioscienc +profession +bookmarksif +java +click +drag +word +make +poem diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..66783e56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight +welcom +galleri +twenti +photograph +five +head +robert +grimm +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..ebb13364 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,174 @@ +romer +romer +depart +comput +scienc +engin +univers +washington +seattl +home +offic +romer +washington +eduoffic +chateau +sieg +research +interestsi +research +oper +system +supportfor +high +perform +memori +systemswith +realli +smart +peopl +likebrian +bershad +brad +chen +alan +eustac +anna +karlin +denni +wayn +ohlrich +andwayn +wong +three +recent +paper +subject +reduc +memori +overhead +us +onlinesuperpag +promot +romer +ohlrich +karlin +bershad +isca +dynam +page +map +polici +cach +conflict +resolutionon +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +mappedcach +bershad +romer +chen +asplo +addit +friend +studi +perform +ofinterpret +learn +theproject +rockyhom +page +also +wrote +paper +togeth +structur +perform +interpret +romer +voelker +wolman +wong +baer +bershad +levi +asplo +appear +abstract +postscript +bibliographi +lobo +read +listrandom +stuffa +hous +comput +scientist +rai +limb +romer +knee +arthroscop +surgeri +mark +hill +wrist +friend +dylansaid +hair +couldn +flowbe +said +could +beingexperiment +scientist +conduct +experi +judg +result +attend +isca +travel +europ +took +somepictur +romer +eatsomeon +els +food +accompani +sincer +ration +forexampl +romer +lunch +thought +leftth +countri +would +didn +origin +unknown +edward +tuft +tip +public +speak +father +edit +american +journal +physic +place +ticker +symbol diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..abb89de2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,281 @@ +memori +system +research +univers +washingtonmemori +system +researchdepart +comput +scienc +engin +univers +washington +seattl +welcom +home +page +memori +system +research +descript +research +group +investig +techniqu +operatingsystem +improv +memori +system +perform +work +sharesth +follow +featur +reli +combin +simpl +hardwar +support +oper +system +modif +monitor +dynam +behavior +applic +monitor +mechan +incur +small +overhead +runtim +inform +collect +us +identifi +sourc +memori +system +delai +cach +miss +miss +identifi +resolv +bottleneck +overhead +monitor +mechan +also +significantli +improv +overal +system +perform +recent +project +explor +polici +monitorappl +memori +refer +pattern +order +identifi +resolvetlb +perform +problem +poor +perform +result +tlbi +small +cover +current +applic +work +severalmodern +architectur +support +superpag +page +whose +size +amultipl +system +base +page +size +system +tlbperform +improv +us +larger +page +cost +ofwast +memori +intern +fragment +simul +sever +polici +adapt +page +size +dynam +todiffer +region +applic +address +space +constructingsuperpag +copi +compon +page +contigu +region +ofmemori +develop +polici +monitor +miss +balancesth +potenti +benefit +superpag +reduct +futur +tlbmiss +cost +construct +superpag +memorycopi +construct +superpag +misspattern +warrant +polici +attain +perform +largepag +without +intern +fragment +detail +project +paper +reduc +memori +overhead +us +onlin +superpag +promot +isca +appear +look +someon +implement +algorithm +would +makea +good +qual +master +project +project +descript +peoplefaculti +brian +bershad +bershad +washington +anna +karlin +karlin +washington +current +student +denni +dlee +washington +wayn +ohlrich +ohlrich +washington +romer +romer +washington +wayn +wong +waynew +washington +paper +reduc +memori +overhead +us +onlin +superpag +promot +romer +ohlrich +karlin +bershad +isca +appear +dynam +page +map +polici +cach +conflict +resolut +standard +hardwar +romer +bershad +chen +osdi +avoid +conflict +miss +dynam +larg +direct +map +cach +bershad +romer +chen +asplo +comparison +memori +perform +mip +alpha +wong +qual +project +report +univers +washington +instruct +cach +effect +differ +code +reorder +algorithm +qual +project +report +univers +washington +memori +system +bibliographi +romer +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..67a9b92e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,149 @@ +project +rocki +architectur +perform +interpret +languag +project +rocki +architectur +perform +interpret +languag +project +descript +interpret +languag +becom +increasingli +popular +last +sever +year +part +demand +portabl +safeti +eas +project +examin +perform +interpret +languag +environ +sever +perspect +interpret +strategi +implement +processor +architectur +util +basi +studi +collect +benchmark +microbenchmark +implement +sever +interpret +languag +perl +java +mipsi +us +variou +instrument +trace +techniqu +evalu +perform +characterist +benchmark +order +gain +insight +similar +differ +languag +execut +environ +peoplefaculti +jean +loup +baer +baer +washington +brian +bershad +bershad +washington +henri +levi +levi +washington +student +denni +dlee +washington +romer +romer +washington +geoff +voelker +voelker +washington +alec +wolman +wolman +washington +wayn +wong +waynew +washington +papersrom +voelker +wolman +wong +baer +bershad +levi +structur +perform +interpret +asplo +appear +abstractpostscriptjava +measur +xjava +sourc +file +benchmarkstoolsto +collect +inform +perform +applic +vebeen +build +binari +rewrit +tool +call +etch +etch +yetpublicli +avail +read +etchhom +page +intern +documentationproject +intern +document +avail +peopl +last +updat +juli +romer +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..fa5d9009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,125 @@ +richard +roger +home +page +richard +rogersrrog +washington +educomput +scienc +engin +departmentunivers +washington +seattl +usaoffic +chateau +sieg +phone +intellig +system +laboratri +phone +research +develop +system +softwar +systol +cellular +arrai +machin +scam +massiv +parallel +imag +process +comput +softwar +includ +compil +basic +imag +process +morpholog +librari +simul +obtain +softwar +paper +scam +current +work +document +layout +extract +intellig +system +also +help +produc +document +groundtruth +databas +optic +charact +recognit +commun +scienc +camp +director +comput +facil +northwest +center +environment +educ +ncee +offer +summer +scienc +camp +student +ag +washington +beauti +juan +island +also +work +scienc +splash +program +seattl +univers +splash +year +long +nation +scienc +foundat +fund +scienc +program +grade +minor +girl +seattl +area +interest +corn +snake +jessica +squishi +order +increas +length +kuow +nation +public +radio +stationi +bake +best +pecan +seattlelast +modifi +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..f3006a7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,22 @@ +mike +home +page +mike +salisburysalisbur +washington +comput +scienc +engin +depart +univers +washington +seattl +usaoffic +chateau +sieg +lifehistori +school +home +friend +vitacool +stuff diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..935efcc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,307 @@ +stefan +savag +stefan +savag +savag +washington +work +comput +scienc +engin +depart +univers +washington +seattl +home +seattl +sampl +rich +post +industri +cultur +modern +pittsburghfor +year +caught +ride +bershad +migrat +mnow +gradual +student +first +rank +strongbackground +centuri +american +histori +provid +witha +firm +irrelev +platform +trash +talk +peer +fool +tocqeuvil +statement +american +find +tiresom +inconveni +exercisepolit +right +distract +industri +quit +similar +tocurr +microprocessor +architectur +trend +favor +need +ofappl +code +oper +system +work +brian +bershad +rest +merri +band +onan +oper +system +project +call +spin +projectsspinspin +extens +oper +system +omnifemtokernel +whichsupport +dynam +adapt +system +interfac +andimplement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +thing +writingspin +paper +extens +safeti +perform +spin +oper +system +proceed +symposium +oper +system +principl +sosp +copper +mountain +decemb +slide +talk +languag +support +extens +oper +system +proceed +first +workshop +compil +support +system +softwar +wcsss +tucson +write +oper +system +modula +proceed +first +workshop +compil +support +system +softwar +wcsss +tucson +protect +softwar +issu +proceed +fifth +workshop +topic +oper +system +hoto +orca +island +issu +design +extens +oper +system +proceed +first +usenix +symposium +oper +system +design +implement +osdi +monterei +novemb +panel +abstract +longer +unpublish +version +paper +spin +extens +microkernel +applic +specif +oper +system +servic +proceed +sixth +sigop +european +workshop +match +oper +system +applic +need +version +appear +oper +system +review +januari +spin +extens +microkernel +applic +specif +oper +system +servic +univers +washington +tech +report +march +afraid +paper +afraid +frequent +redund +arrai +independ +disk +proceed +winter +usenix +technic +confer +diego +januari +best +student +paper +slide +talk +reserv +paper +processor +capac +reserv +oper +system +support +multimedia +applic +proceed +first +ieee +intern +confer +multimedia +comput +system +boston +processor +capac +reserv +abstract +manag +processor +usag +proceed +fourth +workshop +workstat +oper +system +wwo +napa +octob +processor +capac +reserv +multimedia +oper +system +carnegi +mellon +tech +report +real +time +mach +paper +real +time +mach +timer +export +time +user +proceed +third +usenix +mach +symposium +santa +april +slide +talk +interest +music +hikingthi +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..fa5feed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,12 @@ +sean +sandi +sean +david +sandi +washington +comput +scienc +washington +last +revis +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..7bec1b22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,35 @@ +richard +segal +home +page +richard +segaldepart +comput +scienc +engin +univers +washingtonbox +seattl +segal +washington +person +biographi +better +half +famili +pictur +research +overview +brute +internet +softbot +public +curriculum +vita +postscript +amus +archeri +bicycl +racquetbal +ski +softbal diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..a0fb8e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,179 @@ +stefan +bergstefan +berg +work +home +sieg +hall +univers +washington +seattl +univers +seattl +phone +phone +email +sgberg +washington +content +address +inform +past +project +activ +current +project +activ +interest +page +finger +inform +stefan +born +cologn +germani +spring +complet +mittler +reif +thgrade +schillergymnasium +cologn +come +unit +statesto +receiv +high +school +diploma +bloomington +high +school +north +indiana +stefan +receiv +bachelor +scienc +honor +distinctionin +field +comput +scienc +fromindiana +univers +momenth +work +toward +univers +washington +expect +complet +date +sometim +thiscenturi +past +project +activ +project +implement +reduct +machin +teach +assist +project +comparison +hardwar +softwar +solut +fals +share +teach +assist +project +studi +linear +time +sort +algorithm +teach +assist +current +project +activ +qual +project +comparison +hardwar +softwar +solut +fals +share +interest +page +pictur +peopl +around +raft +pictur +bookmark +excit +squar +univers +washington +moment +weather +seattl +print +yourselfsometh +crazi +didn +even +come +particularsolut +implement +done +sall +line +shouldn +contain +trail +carriag +return +byte +compil +without +warn +program +print +exact +sourc +code +itin +fewer +byte +like +putchar +char +els +els +char +main +char +putchar +char +els +els +char +main +char +printf +printf +printf +printf +stefan +resum +avail +inpostscript +andtex +format diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..0d895f78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,67 @@ +ward +shadegreet +salut +third +year +grad +student +hereat +dubcs +interact +renderingof +complex +scene +current +thing +thing +follow +project +link +walkthruproject +amonglot +pictur +anim +find +siggraph +paperdescrib +recent +work +shortcut +click +thepictur +island +lower +left +corner +page +lot +interest +work +go +mani +differ +aspectsof +comput +graphic +thegraph +imag +laboratori +get +done +contact +info +daili +schedul +travel +plan +project +public +pictur +page +look +scrunch +make +browser +least +pixel +wide diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..ddcff9a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,37 @@ +shun +leungshun +leung +student +depart +comput +scienc +andengin +univers +ofwashington +work +prof +johnzahorjan +pointer +research +research +summari +public +curriculum +vita +upon +request +shun +leung +depart +comput +scienc +engin +univers +washington +seattl +email +shuntak +washington +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..627f578f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,87 @@ +welcom +shuichi +home +page +shuichi +koga +skoga +washington +graduat +studentcomput +scienc +engin +departmentunivers +washington +name +shuichi +koga +haven +notic +bynow +start +graduat +studi +univers +ofwashington +haven +quit +figur +myqual +much +less +dissert +graduat +universityof +virginia +degre +mathemat +alsoheavili +involv +asian +studi +foreign +relat +andgovern +depart +origin +slate +also +degreein +asian +studi +also +heavili +involv +user +interfac +groupand +comput +sciencedepart +work +project +call +alic +sinc +anywai +take +look +pictur +smaller +shuichi +mean +finger +info +current +schedul +neat +hypertext +link +hunt +destroi +bug +shuichi +koga +skoga +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..75ac1b97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,59 @@ +soha +hassoun +home +page +soha +washington +soha +hassounit +year +graduat +school +univers +washington +comput +scienc +engin +dept +circuit +design +whoturn +develop +current +work +onarchitectur +retim +professor +carlebel +weekli +schedul +busi +current +previou +research +current +educ +experi +public +patent +chao +group +dept +profession +interest +vlsi +site +inform +littl +deede +photo +galleri +address +comput +scienc +engin +depart +univers +washington +seattl +phone diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..01722dcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,136 @@ +sujai +parekh +home +page +sujai +parekh +work +depart +comput +scienc +engin +sieg +hall +chateau +guggenheim +annex +univers +washington +seattl +home +seattl +quarter +class +seminar +system +seminar +french +french +research +simultaneousmultithread +particular +issu +relat +tomultithread +architectur +softbot +project +evalu +simon +softbot +emploi +procedur +search +controlsystem +control +action +report +construct +design +remov +patio +convent +workspac +interest +fund +project +pleas +contact +sport +spud +soccer +right +sort +bookmark +page +like +keep +track +interest +academ +cognit +scienc +distribut +parallel +system +psycholog +philosophi +tenni +soccer +sail +squash +volleybal +ballroom +danc +food +cornel +comput +scienc +oracl +corpor +stottler +henk +associ +done +resum +random +person +info +favorit +food +oondhiu +mango +phad +thai +kung +chicken +favorit +beverag +screwdriv +scotch +long +island +ic +favorit +danc +tango +swing +east +west +coast +salsa +favorit +rock +music +dire +strait +pink +floyd +phil +collin +genesi +peter +gabriel +petti +sparekh +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..9ccc221f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,68 @@ +erik +selberg +improv +home +page +name +erik +speed +washington +academ +speed +almost +live +fishcam +address +lara +lewi +memorialhappi +link +peopl +sport +food +drink +cultur +mari +kay +offic +friend +famili +fish +disc +drive +ultim +pasti +power +ur +machin +mountain +bike +spud +softbal +ski +utah +raquetbal +pro +colleg +wedgwood +hous +diet +pepper +salt +lake +roast +compani +bean +bagel +speed +racer +star +war +tini +toon +pinki +brain +phantom +babylon +comic +erik diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..e1bc7a16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,151 @@ +home +page +sung +choiwelcom +thehomepag +ofsung +eunchoi +myschoollifemi +primari +research +interest +compil +parallel +program +languag +involv +zplcompil +project +univers +washington +late +beenspend +time +think +optim +communicationgener +us +architechtur +independ +communicationlibrari +ironman +addit +experi +simul +data +parallel +programson +superscalar +processor +goal +work +improv +nodeperform +come +gener +parallel +machin +alsobeen +seen +hang +chaosrout +group +work +simul +includ +graphic +front +visual +thatexperi +current +implement +anoth +router +simul +inzpl +final +also +littl +astronomi +quarter +ta +enjoi +watch +movi +mostli +comfort +home +like +yeah +vegetarian +sinc +myjunior +year +colleg +drink +dinner +would +samewithout +good +wine +result +must +exercis +quit +plai +twosoccerteam +cousin +scrub +divis +cooper +recdivis +last +season +scrub +came +second +place +andcoop +divis +unfortun +recent +sacrifiedmi +left +knee +game +plai +soccer +take +usualstep +aerobicsclass +instead +find +try +swim +weight +trainingclass +like +good +peopl +world +read +book +take +abit +shakespear +watch +publictelevis +listen +classicalmus +myotherlif +sung +choi +sungeun +washington +depart +comput +scienc +engin +univers +washington +seattl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..295eb333 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,66 @@ +nguyen +nguyen +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +washington +work +world +research +interest +includ +oper +system +distribut +andparallel +system +network +secur +current +help +frommi +advisor +johnzahorjan +build +system +support +run +soft +real +timeappl +visual +partial +idl +workstat +innow +recent +complet +studi +runtim +measur +ofappl +characterist +us +runtim +system +tominim +applic +execut +time +uniprogram +multiprocessorsenviron +well +system +schedul +make +goodglob +schedul +decis +multiprogram +multiprocessorsenviron +cvpublic +worldvietnameseresourc +netcyclingplayground diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..1b51a436 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +tian +homepageyour +browser +rusti +yellow +turkei +frame +even +part +html +standard +click +frame +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..3cbc514d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,217 @@ +ashutosh +tiwari +ashutosh +tiwari +tiwari +washington +depart +comput +scienc +engin +univers +washington +seattl +mostli +full +time +fourth +year +graduat +student +work +area +singl +address +space +oper +system +opal +persist +object +system +object +orient +databas +applic +workload +measur +oper +system +support +databas +past +work +infrastructur +user +interfac +distribut +object +system +spare +time +work +comput +scienc +group +research +technolog +organ +boe +comput +servic +oopsla +workshop +build +larg +distribut +softwar +system +us +object +oopsla +workshop +object +larg +distribut +persist +softwar +system +projectsopali +work +distribut +opal +opal +oper +system +project +address +issu +opportun +involv +creat +singl +global +address +space +across +multipl +user +machin +jeff +chase +primari +architect +opal +hank +levi +advisor +work +close +opal +also +advisor +applic +workload +measur +also +work +measur +character +behavior +persist +object +applic +gener +techniqu +paper +work +area +distribut +object +system +work +sever +distribut +object +system +profession +career +thisexperi +basi +oopsla +workshop +build +larg +distribut +softwar +system +us +object +organ +follow +oopsla +workshop +object +larg +distrbut +persist +softwar +system +public +us +virtual +address +object +refer +chase +levi +tiwari +proc +intern +workshop +object +orient +oper +system +septemb +except +handl +parallel +distribut +environ +tiwari +levi +ecoop +workshop +except +handl +juli +build +larg +distribut +system +us +object +tiwari +bosch +addendum +proceed +oopsla +oop +messeng +octob +evalu +system +applic +benchmark +tiwari +narasayya +levi +oopsla +workshop +object +databas +behavior +benchmark +perform +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..530c3339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,103 @@ +tessa +tessa +anoth +dead +inform +superhighwai +second +yeargradu +student +comput +scienc +univers +washington +research +interest +includ +sort +relatedgoodi +current +work +clio +system +search +andbrows +person +histori +avail +current +seek +gain +employ +myresum +onlin +curiou +kittyi +honor +share +apart +gambit +siames +cat +great +memor +inform +found +therear +pictur +page +tofind +appar +alsor +interest +scotland +classesi +still +work +qual +quarter +take +last +ofeight +class +fulfil +breadth +requir +digit +system +seminarlinux +gameseverybodi +plai +game +maintain +linux +gametom +commit +advanc +linux +pretti +coolgam +platform +also +first +attempt +java +program +simpl +maze +applet +also +java +linux +sleepingi +known +frequent +seattl +area +bookstor +also +knit +crochet +copyright +tlau +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..f03065fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,55 @@ +martin +tompa +martin +tompa +depart +comput +scienc +engin +univers +washington +seattl +phone +receptionist +lectur +note +articl +comput +trajectori +thelma +louis +recent +holidai +moon +pearl +among +wash +oyster +collabor +surrealist +electron +propheci +build +across +pierc +lane +carol +martin +photograph +photo +courtesi +health +scienc +center +educ +resourc +provid +mani +imag +univers +washington +martin +tompa +finger +tompa +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..e424c281 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,146 @@ +traci +kimbrel +held +prison +traci +kimbrel +held +prison +traci +kimbrel +held +prison +univers +washingtonsinc +without +charg +trial +move +year +toanoth +seattl +area +prison +inmat +forc +tomanufactur +airplan +escap +institut +wasrecaptur +return +univers +washington +help +hisplight +rescu +imprison +list +thing +done +curriculum +vita +imprison +detail +statement +ofwhat +promis +histori +goal +free +captor +depart +comput +scienc +engin +univers +washington +seattl +tracyk +washington +eduher +captor +forc +trace +driven +comparison +algorithm +parallel +prefetch +cachingtraci +kimbrel +andrew +tomkin +hugo +patterson +brian +bershad +edward +felten +garth +gibson +anna +karlin +appear +sigop +usenix +associ +symposium +onoper +system +design +implemen +near +optim +parallel +prefetch +cachingtraci +kimbrel +anna +karlin +appear +ieeesymposium +foundat +comput +scienc +longer +version +integr +parallel +prefetch +cach +page +extend +abstract +traci +kimbrel +edward +felten +anna +karlin +proceed +sigmetr +confer +measurementand +model +comput +system +probabilist +algorithm +verifi +matrix +product +usingo +squar +time +base +random +bit +traci +kimbrel +rakesh +kumar +sinha +inform +process +letter diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..d4347e74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,63 @@ +travi +craig +home +page +travi +craig +travi +washington +comput +scienc +engin +depart +univers +washington +seattl +research +interest +mechan +predict +real +time +system +cach +restor +queu +spin +lock +arctic +submarin +current +cours +take +quarter +dissert +work +real +time +system +time +consum +side +project +work +half +time +esca +corpor +help +keep +volvo +run +press +latest +motor +pool +statu +understand +comput +scienc +travi +craig +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..d159aac0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,16 @@ +dean +tullsen +home +page +dean +tullsen +biograph +inform +research +interest +bibliographi +home +page +download +resumemi +hobbi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..b7f99de9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,325 @@ +michael +vanhilstmichael +vanhilstvanhilst +washington +edumvh +harvard +eduperson +research +comput +scienc +engin +depart +univers +washington +seattl +usaclick +send +email +messag +mike +vanhilst +personalmik +start +year +graduat +student +univers +washington +hopefulli +littl +luck +finish +around +theend +winter +quarter +immedi +prior +come +udub +mike +work +contractor +atibm +research +wrote +motif +widget +user +unterfac +sdata +explor +mike +start +programm +fix +maintainingcomput +hardwar +smithsonian +astrophys +observatori +part +smithsonian +locat +within +harvard +learn +debug +softwar +mike +could +convinc +programm +hardwar +realli +work +correctli +mike +stai +smithsonian +year +time +wrote +program +call +saoimagewhich +us +lot +astronom +look +imag +saoimag +part +gnudistribut +mike +would +like +thank +bill +wyatt +eric +mandel +schwarz +doug +minkfor +start +guid +continu +project +mention +countless +other +contribut +mike +took +year +work +group +seismologistsin +pari +franc +data +acquisit +calibr +analysi +truli +wonder +time +pari +wife +angela +french +languag +class +theallianc +francais +angela +come +pari +year +nativ +colombiain +south +america +summer +mike +wrote +front +studentsbrows +univers +time +schedul +data +base +uwin +work +talent +staff +comput +commun +folk +brought +pine +special +thank +bill +shirei +design +traci +stenvik +wrote +uwin +screen +librari +machin +uwin +work +time +schedul +mike +also +taught +begin +program +extens +motiv +group +frommicrosoft +product +support +sacrif +summer +learn +recent +mike +present +paper +theintern +symposium +object +technolog +advanc +softwar +isota +confer +object +orient +program +system +languag +applic +oopsla +theacm +sigsoft +symposium +foundat +softwar +engin +also +present +poster +oopsla +made +present +subject +workshop +oopsla +oopsla +particip +doctor +symposium +oopsla +particip +demo +uist +thank +steve +earlier +life +mike +earn +degre +inarchitectur +wooden +kind +citi +planningfrom +mitand +work +commun +develop +director +forth +citi +grinnel +iowa +thing +work +differ +skill +visualdesign +problem +solv +continu +valu +still +get +talk +chri +alexand +seattl +mike +activ +student +chapter +washington +softwar +associ +improv +ti +student +larg +small +softwar +compani +area +enjoi +hike +cross +countri +ski +sail +andkayak +also +enjoi +swim +lake +bronson +recent +mike +free +time +taken +marco +harold +sebastien +hilst +born +mike +post +pictur +soon +locat +anoth +photo +scanner +visit +sinc +novemb +michael +vanhilst +last +modifi +fridai +novemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..2381e683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,122 @@ +hello +vassilylong +live +hello +start +realli +person +best +linki +come +thu +http +washington +home +vass +us +document +also +shortest +write +young +collect +quit +link +download +fewfil +expand +collect +sinc +thisstuff +select +link +quotesrussian +home +pagesvari +linksguid +html +formsoth +link +home +pageuw +home +pagencsa +mosaic +home +pagerec +join +cecilproject +cecil +cool +pure +object +orient +languag +andvortex +even +cooler +compil +infrastructur +befast +mayb +paper +written +member +staff +design +implement +themvi +system +assist +access +ourdepartment +room +visitor +databas +recent +beenupgrad +staff +peopl +qual +project +expand +thezpl +languageto +handl +irregular +data +structur +repres +graph +anddynam +repartit +data +graph +arrai +myqual +writeup +page +short +overview +check +theslidesfrom +present +page +slide +also +collect +link +toresourc +relat +project +vass +washington +eduobject +mirror +closer +appear +pastor +vybrasyvalsya +okna +pyatyi +deystvov diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..1543c597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,77 @@ +virgil +bourassa +home +page +uwvirgil +evan +bourassavirgil +bourassa +virgil +washington +student +depart +comput +scienc +engin +theunivers +washington +seattl +washington +research +interestsinclud +comput +oper +system +architectur +join +boeingin +work +scientist +comput +scienceorgan +inform +support +servic +divis +bellevu +washington +receiv +electr +engin +arizonast +univers +temp +arizona +electricalengin +univers +washington +seattl +washington +comput +scienc +engin +theunivers +washington +accesswhat +work +interest +expertis +resum +patent +invent +public +present +profession +histori +educ +achiev +recommend +letter +statusoccasion +updat +last +modifi +virgil +bourassa +virgil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..aa886126 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,300 @@ +vivek +ratan +home +page +vivek +ratan +particular +graduat +student +comput +scienc +depart +univers +washington +current +academ +leav +work +research +scientistat +bellcor +morristown +researchwork +bellcor +interest +distribut +comput +issu +distribut +system +applic +system +develop +high +avail +current +work +distribut +system +group +bellcor +high +avail +distributedsoftwar +system +simpli +highli +avail +system +continu +presenc +failur +group +develop +toolset +anatida +provid +high +avail +mechan +distribut +applic +adher +corba +standard +also +look +activ +replic +scheme +high +avail +indc +applic +recent +effort +focuss +enhanc +avail +fault +toler +cell +directori +servic +detail +project +foundher +also +interest +high +avail +issu +integrationof +distribut +technolog +server +univers +washington +work +murphi +project +softwar +safeti +methodolog +softwar +safeti +group +head +bydr +nanci +leveson +detail +safeti +research +found +safeti +page +much +work +involv +look +automat +fault +tree +gener +fromrequir +specif +written +rsml +well +work +rsml +languag +simul +public +list +recent +public +found +person +inform +born +brought +india +last +eight +year +undergradu +attend +angelo +state +univers +angelo +wesleyan +univers +middletown +receiv +physic +math +comput +scienc +right +came +univers +washington +seattl +purus +graduat +studi +comput +scienc +like +plai +tenni +whenev +squash +lesserext +racquetbal +suffic +ardent +folow +game +cricket +particip +ultra +cricket +plai +email +cricket +simul +game +mani +year +also +follow +exploit +superson +marin +cowboi +last +year +half +taken +keen +interest +learn +ballroom +danc +waltz +foxtrot +chacha +rhumba +tango +swing +west +coast +pleas +occasion +dabbl +mambo +area +best +place +learn +danc +seattl +center +us +band +session +everi +saturdai +involv +model +unit +nation +chapter +model +unit +nation +intern +educ +organ +simul +work +confer +held +throughout +year +current +topic +restructur +reform +part +like +secur +council +ecosoc +world +bank +rapidpopul +growth +nuclear +prolifer +home +page +chapter +interest +read +poetri +mirza +ghalib +centuryindian +poet +also +interest +english +literatur +especi +romant +victorian +period +link +obligatori +collect +sitesthat +tend +visit +often +depart +comput +scienc +engin +univers +washington +seattl +vivek +washington +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..263ab550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,37 @@ +geoff +skywhoi +graduat +student +univers +washington +inseattl +whati +master +thesi +wireless +mobil +comput +design +andbuild +system +call +mobisa +current +avoid +settl +thesi +topic +wherechateau +guggenheim +annex +univers +washingtonseattl +washington +look +emac +window +window +geoff +voelker +voelker +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..a5a8a009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,64 @@ +home +wayn +home +comput +scienc +engin +depart +univers +washington +seattl +waynew +washington +stuff +current +look +memori +system +perform +particular +differentmemori +system +organ +investig +work +beingdon +jean +loup +baer +also +look +interpret +other +denni +geoff +alec +rightnow +thing +littl +rocki +actual +earli +version +paper +thing +keep +jump +alpha +interest +place +fish +list +cool +site +howev +list +peoplewho +list +peopl +well +test +testwayn +wong +waynew +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..655d0a4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,16 @@ +william +chan +home +pagewilliam +chan +home +pagei +spend +time +hell +spare +time +hang +heaven +wchan +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..169b5327 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,179 @@ +daniel +weld +daniel +weld +associ +professor +comput +scienc +engineeringat +univers +ofwashington +receiv +bachelor +degre +comput +scienc +biochemistri +yale +univers +land +artifici +intellig +receiv +presidenti +young +investig +award +offic +naval +research +younginvestig +award +weld +theadvisori +board +journal +airesearch +guest +editor +comput +intellig +edit +aaai +report +role +ofintellig +system +nation +inform +infrastructur +isco +program +chair +aaai +weld +publish +book +scad +technic +paper +person +data +reach +offic +sieg +hall +phone +work +home +mail +dept +comput +scienc +engin +univers +washington +seattlewa +research +interest +weld +current +research +interest +artifici +intellig +specif +softwar +agent +plan +exampl +weld +group +support +ucpop +planner +us +almost +hundr +sitesworldwid +mani +weld +paper +avail +electron +arehi +current +favorit +repres +sens +action +middl +ground +revisit +plan +gather +inform +aaai +august +plan +base +control +softwar +agent +aip +ascal +comparison +shop +agent +world +wide +januari +softbot +base +interfac +internet +cacm +juli +anintroduct +least +commit +plan +magazin +winter +select +exhaustivelist +recreat +absent +offic +weld +foundat +cafe +allegro +stormymountain +climb +past +enjoi +travel +theworld +like +found +plai +twin +boi +adam +galen +invit +visit +galleri +pacif +northwest +desert +wilder +photograph +also +illustr +stori +morocco +weld +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..87e4d4c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,13 @@ +wendi +belluomini +wendi +belluomini +wendi +washington +graduat +master +current +work +univ +utah +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..65814fef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,67 @@ +wilson +hsiehwilson +hsiehi +postdoc +thedepart +comput +scienc +engin +theunivers +washington +inseattl +member +thespin +project +receiv +thedepart +electr +engin +comput +sciencein +theschool +engineeringatmit +work +thelaboratori +comput +scienc +advisor +werefran +kaashoekandbil +weihl +research +compil +parallel +system +myresearch +interest +interact +among +compil +programminglanguag +runtim +oper +system +architectur +select +publicationsselect +linksperson +interestswilson +hsieh +depart +comput +scienc +engin +univers +washington +seattl +offic +sieg +move +phone +numberha +chang +voic +whsieh +washington +public +keyoctob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..d50888a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,103 @@ +alec +wolman +home +page +alec +wolmanwolman +washington +eduworkcomput +scienc +engin +departmentunivers +washingtonbox +seattl +home +seattl +current +graduat +student +thecomput +scienc +departmentat +univers +washington +offic +isroom +thechateau +gradual +school +work +fordigit +equip +corp +cambridg +research +research +interest +includ +oper +system +network +architectur +current +recent +project +scalabl +network +perform +etch +binari +instrument +optim +executablesrocki +interpret +performanceon +line +paper +firewal +applic +relai +trees +wolman +summer +usenix +latenc +analysi +network +wolman +voelker +thekkath +winter +usenix +structur +perform +interpret +romer +voelker +wolman +wong +baer +bershad +levi +appear +asplo +hungri +otter +fixha +strang +idea +nervou +habit +realli +plai +guitar +wolman +link +wolman +hallwolman +diseasewolman +pressur +treat +lumber +wolman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..ffc01734 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,132 @@ +xiaohan +xiaohan +xqin +washington +comput +scienc +engin +depart +univers +washington +seattl +offic +sieg +phone +year +graduat +student +work +jean +loup +baer +research +interest +includ +comput +architectur +parallel +distribut +system +perform +evalu +method +model +simul +short +term +goal +school +soon +possibl +paper +perform +evalu +cluster +base +architectur +baer +submit +confer +perform +explicit +communicationprimit +cach +coher +multiprocessor +system +baer +appear +proceed +hpca +compar +studi +conserv +optimist +trace +driven +simul +baer +award +paper +simul +symposium +page +optimist +trace +driven +simul +baer +tech +report +dept +comput +scienc +engin +univ +washington +parallel +trace +driven +simul +implement +perform +baer +proceed +intern +confer +parallel +process +page +graph +toolfor +monitor +visual +basedmultiprocessor +perform +zhang +nalluri +journal +parallel +distribut +comput +june +page +perform +predict +evalu +parallel +processingon +numa +multiprocessor +zhang +ieee +tran +softwar +engin +page +interest +stuff +photo +chinaread +chinesesearch +engin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..653bc703 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,82 @@ +yasushi +saitoyasushi +saito +second +year +graduat +student +atdepart +comput +scienc +engin +univers +washington +seattl +current +workingwith +brian +bershad +thespin +project +address +andperson +info +meta +link +metacrawl +yahoo +desktop +refer +index +alta +vista +lyco +archi +tech +index +research +link +spin +intern +document +modula +info +time +schedul +survei +oper +system +transact +servic +spin +qual +project +sightse +japanes +link +random +info +javascript +apprentic +page +us +linux +connect +gatewai +japan +perl +patch +touch +type +trainer +dvorak +lesson +text +yasushi +washington +want +finger +talk +trycanva +washington +desktop diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..e19067a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,85 @@ +oren +zamir +home +pageoren +zamir +home +page +depart +comput +scienc +engineeringunivers +washingtonbox +seattl +offic +chateau +zamir +washington +home +seattl +zamir +washington +edui +isra +graduat +student +depart +computersci +engin +univers +washington +myundergradu +degre +physic +mathemat +hebrewunivers +jerusalem +israel +interest +field +artifici +intellig +softwareengin +current +work +line +cluster +algorithmsfor +internet +document +retriev +basic +idea +help +userwith +internet +search +result +hundr +document +worki +done +part +metacrawl +parallel +search +servic +along +orenetzioni +erik +selberg +resum +avail +pictur +thing +like +dive +sinai +jeeptour +ski +pictur +last +raft +trip +interest +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..e036f21e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,175 @@ +titl +jean +loup +baer +professorand +adjunct +professor +electr +engin +receiv +diplom +ingnieur +electricalengin +doctorat +cycl +comput +scienc +theuniversit +grenobl +franc +ucla +prior +join +univers +washington +research +engin +laboratoir +decalcul +universit +grenobl +member +digit +technologygroup +ucla +present +interest +parallel +anddistribut +process +comput +system +architectur +author +coauthor +paper +thesearea +author +textbook +comput +system +architectur +comput +scienc +press +professor +baer +serv +ieee +comput +scienc +distinguishedvisitor +nation +lectur +guggenheim +fellow +ieee +fellow +editor +journal +parallel +distribut +comput +journal +comput +languag +serv +asprogram +chairman +intern +confer +parallelprocess +program +chairman +internationalsymposium +comput +architectur +gener +chairman +ofth +internationalsymposium +comput +architectur +current +chair +sigarch +eighteen +student +complet +dissert +professorba +direct +twelv +work +industri +research +laboratoriesand +inacademia +although +year +baer +hashad +difficulti +retain +french +accent +cours +recent +research +project +look +comput +architectur +page +project +involv +cach +coher +protocol +cluster +architectur +improv +protocol +singl +system +perform +softwar +primit +cluster +appear +hpca +prefetch +uniprocessor +hardwar +also +ieee +comparisonwith +block +cach +also +asplo +prefetch +multiprocessor +isca +impact +specul +execut +cach +denni +home +page +andisca +parallel +trace +driven +simul +conserv +approach +also +icpp +optimisticapproach +comparison +also +distribut +simul diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..df7bd6e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,149 @@ +craig +chamber +craig +chamber +assist +professor +join +faculti +receiv +degre +comput +scienc +comput +scienc +stanford +chamber +research +interest +design +implementationof +advanc +program +system +incorpor +express +programminglanguag +effici +implement +support +programmingenviron +current +investig +object +orient +languagesand +lead +ceciland +vortex +project +cecil +pure +object +orient +languageserv +vehicl +investig +multi +method +static +type +modul +featur +vortex +optim +compilersystem +object +orient +languag +incorpor +intra +andinterprocedur +static +analys +profil +guid +optim +withfront +end +cecil +modula +java +previous +chamberswa +member +self +project +chamber +also +member +spinproject +spin +extens +oper +system +microkernel +whichsupport +dynam +adapt +system +interfac +implementationsund +direct +applic +control +still +maintain +systemintegr +isol +applic +spin +util +dialect +themodula +languag +pointer +safe +kernel +extens +languag +spinalso +reli +dynamiccompil +achiev +high +perform +despit +fine +grainedextens +click +herefor +inform +undergradu +graduat +level +researchproject +area +contact +informationprof +craig +chambersdepart +comput +scienc +engineeringunivers +washingtonbox +seattl +chamber +washington +mail +requir +street +address +sieg +hall +room +last +updat +april +chamber +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..49f0d450 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,138 @@ +carl +ebel +home +page +carl +ebelingdepart +comput +scienc +engin +univers +ofwashington +seattl +washington +voic +mail +ebel +washington +offic +sieg +hall +room +carl +ebel +associ +professor +physic +wheatoncolleg +comput +scienc +southern +illinoisunivers +comput +scienc +carnegi +mellonunivers +join +carl +ebel +research +interest +fall +categori +vlsiarchitectur +comput +aid +design +digit +system +haswork +number +vlsi +project +includ +hitech +chessmachin +apex +graphic +chip +draw +spline +curv +andsurfac +triptych +field +programm +gate +arrai +current +hei +involv +chao +project +build +multicomput +routingnetwork +interest +focu +method +optim +theperform +circuit +us +level +sensit +latch +placementand +rout +algorithm +fpga +particularli +triptych +teachingspr +advanc +logic +designoffic +hour +mondai +thursdai +travel +april +fccm +napamai +burlington +chicagojun +vegasresearch +project +northwest +laboratori +integr +system +chao +router +project +triptych +high +densiti +fpga +architectur +public +journal +articl +confer +workshop +paper +graduat +student +soha +hassoun +neil +mckenzi +darren +cronquist +paul +franklin +amara +galleryelan +galleryebel +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..70a4127d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,48 @@ +steve +hanksunivers +washingtondepart +comput +scienc +design +agent +architecturesai +magazin +seriou +link +home +page +spring +uncertainti +confer +inform +uncertainti +page +group +page +link +seattl +restaur +seattl +symphoni +schedul +seattl +wine +opera +schedul +server +edita +gruberova +page +photo +carlo +maria +giulini +discographi +sumac +inform +tenni +new +hank +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..17acd0ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,82 @@ +titl +alistair +holden +professor +professor +electr +engin +origin +highland +scotland +receivedhi +degre +univers +glasgow +spent +year +withth +british +broadcast +corpor +engin +divis +graduateapprentic +spent +research +divis +spent +year +yale +edison +fellowship +degre +phddegre +univers +washington +dissert +learningin +artifici +intellig +interest +began +take +coursefrom +colin +cherri +imperi +colleg +london +thebbc +initi +comput +scienc +program +theuw +time +group +faculti +mostli +math +departmentsform +group +within +graduat +school +current +work +applic +knowledg +base +system +verif +expert +system +integr +symbol +neural +netmethodolog +speech +understand +comput +aid +design diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..a12ee410 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,178 @@ +dick +karprichard +karpprofessor +ofcomputersci +engin +andadjunct +professor +ofmolecularbiotechnologyunivers +ofwashington +karp +washington +eduaward +membershipsn +medal +scienc +babbag +prize +berkelei +univers +professor +fellow +ture +award +member +nation +academi +sciencesmemb +nation +academi +engineeringfellow +american +academi +art +sciencesfellow +american +associ +advanc +sciencedistinguish +teach +award +berkelei +academ +senat +class +chair +berkeleylanchest +prize +oper +research +societi +america +institut +manag +scienc +fulkerson +prize +american +mathemat +societi +mathemat +program +societi +john +neumann +theori +prize +oper +research +societi +america +institut +manag +scienc +faculti +research +lectur +berkelei +hermann +weyl +lectur +institut +advanc +studi +john +neumann +lectur +societi +industri +appliedmathemat +miller +research +professor +berkelei +honorari +doctor +georgetown +univers +univers +massachusett +technion +univers +pennsylvania +member +nation +advisori +board +comput +profession +forsoci +respons +presentmemb +board +governor +weizmann +institut +scienc +presentmemb +board +truste +intern +comput +scienceinstitut +presentselect +public +combinator +complex +random +turingaward +lectur +commun +construct +perfect +match +random +upfal +wigderson +combinatorica +probabilist +analysi +partit +algorithm +travel +salesman +problem +plane +mathemat +ofoper +research +theoret +improv +algorithm +effici +fornetwork +flow +problem +edmond +journal +theacm +reduc +among +combinatori +problem +complex +comput +comput +plenum +press +travel +salesman +problem +minimum +spanningtre +part +held +mathemat +program +karp +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..21f22b22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,405 @@ +lazowska +ahalf +centuri +exponentialprogress +inform +technolog +univers +washington +annualfaculti +lectur +vicepresid +gore +speech +eniac +thanniversari +celebr +congress +talk +like +georgejetson +support +polici +appropri +forfr +flintston +universityof +california +berkelei +invent +chines +cook +nathanmyhrvold +joinsedlazowska +theuwcs +faculti +trip +memori +lane +lazowska +lazowska +professor +chair +thedepart +comput +scienc +lazowska +mostlywearsti +push +paper +rack +frequent +flier +mile +graduat +student +seem +pick +onthi +mbquicktim +healso +host +lot +visitor +surpris +number +havefunnynos +lazowska +member +board +director +comput +researchassoci +member +includ +essenti +allgradu +depart +industri +research +laboratoriesin +field +chair +ofcra +govern +affair +committe +serv +nation +scienc +foundat +advisorycommitte +comput +inform +scienc +andengin +nation +research +council +scomputersci +telecommun +board +member +person +technic +advisori +board +formicrosoft +research +personnationalsemiconductor +comput +scienc +system +academicadvisori +council +board +director +ofdata +corpor +scientif +advisori +board +forcabl +hows +ventur +cascadia +fund +belong +stand +advisori +committeesfor +thedepart +eecsat +berkelei +andth +depart +comput +scienc +atstanford +univers +universityof +virginia +hongkong +univers +scienc +technolog +member +ture +award +select +committe +complet +servic +person +nation +research +councilpanel +review +multi +agencyhigh +perform +computingand +commun +program +brook +sutherland +committe +andha +recent +serv +chair +committe +examinersfor +graduat +record +examin +board +comput +scienc +test +chair +sigmetr +associ +comput +machineri +sspecial +interest +group +concern +comput +system +perform +chair +softwar +system +award +committe +program +chairof +symposium +oper +system +principl +andeditor +ieee +transact +comput +univers +washington +addit +servinga +chair +thedepart +comput +scienc +engin +lazowska +chair +univers +advisori +committe +onacadem +technolog +recent +serv +member +thecommitte +deanship +colleg +artsand +scienc +chair +review +committe +forth +program +molecular +biotechnolog +amemb +perform +review +committe +deanof +engin +select +deliv +theunivers +washington +annual +faculti +lectur +lazowska +fellowof +associationfor +comput +machineri +theinstitut +electr +andelectron +engin +seventeenph +student +studentshav +complet +degre +work +miscellan +link +integratedoverview +univers +washington +thedepart +region +also +apersuas +player +version +intend +forloc +consumpt +theimpact +research +univers +inform +technolog +perspect +uwcs +profession +master +program +persuas +playertopten +reason +major +inform +csebuild +project +abbrevi +cvcomputingresearch +drive +informationtechnolog +inform +industri +forwardmassi +goldmanreport +alleg +cseph +product +issu +flaw +data +medianyear +confer +boardstudi +doctor +program +think +ahalf +centuri +exponentialprogress +inform +technolog +univers +washington +annualfaculti +lectur +driver +inform +highwai +univers +washington +saturdayseminar +novemb +testimonyto +houseappropri +committe +concern +april +testimonyto +hous +scienc +committe +concern +hpcc +octob +vicepresid +gore +speech +eniac +thanniversari +celebr +februari +interestinghom +page +sometim +demo +purpos +odeto +execut +vice +presid +tallman +trask +departsfor +duke +univers +universityof +california +berkelei +invent +chines +cook +nathanmyhrvold +joinsedlazowska +theuwcs +faculti +trip +memori +lanelazowska +down +famili +home +pagedirect +houseshilshol +aquat +club +home +pagerec +discoveredreview +grade +grade +poetryfing +lazowska +washington +scheduleinform +seem +offic +reflector +home +page +http +washington +home +lazowska +lazowska +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..1285e53d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,130 @@ +titl +alan +shaw +professor +graduat +bachelor +sdegre +engin +physic +univers +toronto +amast +mathemat +stanford +univers +incomput +scienc +stanford +addit +facultyappoint +univers +washington +start +hasbeen +assist +professor +comput +scienc +cornel +univers +visit +professor +fulbright +research +scholar +univers +pari +guest +professor +informat +zurich +research +associ +atth +stanford +linear +acceler +center +system +engin +theibm +corpor +current +research +interest +real +time +system +softwar +specif +method +professor +shaw +publicationsinclud +textbook +oper +system +book +softwareengin +introductori +comput +scienc +text +andan +edit +book +document +prepar +system +serv +memberof +editori +committe +member +comput +sciencescreen +committe +fulbright +award +associateeditor +journal +real +time +system +associ +editor +ieee +transact +softwar +engin +among +thing +supervis +mani +these +project +fifteen +dissert +includ +distinguish +dissert +half +former +student +academ +posit +half +work +live +professor +shaw +hobbi +includ +good +food +trumpet +hike +bike +hobbi +tenni diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..f0fb77a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,137 @@ +titl +lawrenc +snyder +professor +receiv +bachelor +sdegre +univers +iowa +mathemat +econom +andin +receiv +carnegi +mellon +univers +computersci +visit +scholar +univers +washington +join +faculti +perman +serv +onth +faculti +yale +purdu +visit +scholarat +harvard +professor +snyder +research +rang +proof +theundecid +properti +program +design +developmentof +singl +chip +cmo +microprocessor +quarter +hors +hecreat +configur +highli +parallel +chip +architectur +thepok +parallel +program +environ +inventor +chaoticrout +follow +complet +blue +chip +project +nowprincip +investig +orca +project +nwli +professor +snyder +associ +editor +journal +computerand +system +scienc +parallel +system +editor +journal +ofth +area +editor +ieee +transact +parallel +anddistribut +system +serv +nation +scienc +foundationadvisori +committe +divis +comput +research +particip +numer +nation +advisorycommitte +futur +research +direct +parallel +comput +andcomput +scienc +polici +serv +distinguish +doctoraldissert +award +select +committe +chair +program +chair +first +symposium +parallel +algorithmsand +architectur +addit +dozen +student +complet +doctor +degreesund +direct +professor +snyder +guid +numer +master +seniorproject diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..cbc74ad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,109 @@ +arun +somani +arun +somani +professor +professor +electr +engin +earn +msee +degre +electr +engin +mcgill +univers +montreal +canada +respect +prior +work +scientif +offic +govt +india +delhi +period +design +develop +anti +submarin +warfar +system +indian +navi +professor +somani +research +interest +area +offault +toler +comput +interconnect +network +comput +architectur +parallel +comput +system +parallel +algorithm +current +involv +three +major +project +high +integr +system +design +address +issu +relat +tocach +memori +design +redund +comput +system +evalu +tool +system +congest +control +fault +toler +broadband +network +develop +proteu +architectur +multiprocessor +system +autom +classif +object +base +generalizedenhanc +hypercub +reconfigur +interconnect +network +explor +coars +grain +parallel +like +cook +indian +food +hike +plai +bridg +tabl +tenni +tenni +inform +dpcnl +proteu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..634baed9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,269 @@ +steven +tanimoto +home +page +steven +tanimoto +professor +comput +scienc +engin +adjunct +professor +electricalengin +receiv +degre +fromharvard +princeton +join +theunivers +washington +faculti +year +teach +atth +univers +connecticut +visit +professorat +institut +programm +univers +pari +franc +anda +visit +scholar +linkp +univers +sweden +sinc +hasalso +serv +adjunct +member +depart +electricalengin +visit +scientist +atkob +univers +japan +think +machin +corpor +cambridg +massachusett +linkp +univers +sweden +june +june +visit +scientist +theinstitut +research +enseign +superieur +techniquesd +electroniqu +irest +univers +nant +franc +addit +assist +variou +confer +hasrec +home +page +forimag +confer +devot +imag +processingand +commun +confer +take +place +bordeaux +franc +professor +tanimoto +research +interest +includ +comput +analysi +ofimag +particularli +us +parallel +processor +educ +technolog +visual +program +artifici +intellig +currentlydirect +sponsor +project +mathemat +experi +throughimag +process +whose +object +develop +person +comput +softwarethat +motiv +grade +student +studi +mathemat +written +coauthor +paper +edit +thebook +structur +comput +vision +author +textbook +entitl +element +artifici +intellig +introductionus +lisp +publish +common +lisp +edit +accompanyingsoftwar +current +work +book +subject +ofparallel +comput +imag +process +tanimoto +organ +ieee +comput +societi +internationalworkshop +visual +languag +held +seattl +serveda +gener +chair +meet +bergen +norwai +also +serv +program +chair +intern +conferenceon +pattern +recognit +subconfer +parallel +comput +anda +program +chair +ieee +comput +societi +conferenceon +comput +vision +pattern +recognit +serv +programcommitte +numer +confer +imag +process +patternrecognit +gener +chairman +ieee +comput +societyworkshop +comput +architectur +pattern +analysi +machineintellig +current +serv +steer +committe +theieee +symposiaon +visual +languag +serv +editorialboard +journal +pattern +recognit +journal +visual +languag +comput +cvgip +imag +understand +serv +editor +chief +ieeetransact +pattern +analysi +machin +intellig +addit +research +relat +activ +tanimoto +serv +chair +colleg +engineeringeduc +polici +committe +vice +chair +colleg +council +chair +elect +fellow +ieee +outsid +comput +scienc +steve +tanimoto +enjoi +plai +jazz +andclass +piano +music diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..beaafbd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,212 @@ +titl +paul +young +professor +graduateof +antioch +colleg +receiv +hejoin +univers +washington +seventeen +year +atpurdu +univers +earli +faculti +member +inperhap +first +comput +scienc +depart +unit +state +also +nation +scienc +foundat +postdoctor +fellow +atstanford +serv +faculti +reed +colleg +serv +briefli +aschairman +comput +inform +scienc +depart +theunivers +mexico +twice +taught +visit +professorin +comput +scienc +divis +univers +california +berkelei +becam +associ +dean +research +facil +colleg +engin +research +interest +theoret +comput +scienc +emphasi +question +comput +complex +thegener +theori +algorithm +connect +mathematicallog +author +coauthor +paper +area +iscoauthor +graduat +textbook +gener +theori +algorithm +serv +three +time +program +committe +symposiumon +theori +comput +serv +executivecommitte +nomin +committe +special +interestgroup +theori +comput +sigact +also +chairmanof +program +committe +ieee +comput +societi +annualsymposium +foundat +comput +scienc +foc +hasserv +vice +chairman +chairman +comput +societi +stechnic +committe +mathemat +foundat +comput +also +serv +program +committe +chair +programcommitte +structur +complex +theori +confer +serv +nation +scienc +foundat +advisorysubcommitte +comput +scienc +serv +chairman +thiscommitte +serv +chairman +ofth +comput +research +associ +professor +young +serv +editori +board +special +issu +inform +control +annal +histori +ofcomput +current +serv +editori +board +theoret +comput +scienc +notr +dame +journal +formallog +journal +comput +system +scienc +eleven +student +complet +doctor +dissert +underprofessor +young +direct +sever +gone +dopostdoctor +work +cornel +univers +ofcalifornia +berkelei +eight +current +hold +faculti +posit +avarieti +univers +chosen +industri +employ +professor +young +leather +motorcycl +jacket +read +ratherthan diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..29fadae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,86 @@ +titl +john +zahorjan +professor +graduat +frombrown +univers +receiv +univers +oftoronto +receiv +presidenti +young +investigatoraward +primari +research +interest +area +schedul +parallelsystem +runtim +support +parallel +comput +applic +supportfor +mobil +comput +current +focu +schedul +polici +support +continu +mediaappl +involv +real +time +audio +video +thegoal +provid +polici +system +interfac +allow +applic +torespond +easili +chang +system +load +activ +research +topic +includ +techniqu +runtim +parallelizationof +code +written +sequenti +languag +support +program +exhibit +bothcontrol +data +parallel +program +construct +develop +applic +intend +formobil +comput +platform +zahorjan +editori +board +ieee +transactionson +softwar +engin +comput +survei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..0b85a648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,265 @@ +mathemat +experi +imag +process +metip +mathemat +experi +imag +process +metip +project +director +steven +tanimoto +depart +comput +scienc +emphas +practic +us +mathemat +encourag +discuss +group +learn +encourag +exploratori +open +end +learn +goal +metip +project +digit +imag +process +help +meet +theseobject +particular +develop +seri +applicationsdesign +allow +student +manipul +digit +imag +choic +materi +intend +us +enrich +activ +rather +part +astandard +classroom +curriculum +teacher +plai +variou +role +withthes +activ +exampl +catalyz +student +learn +bylead +discuss +theconcept +student +explor +comput +metip +project +current +number +program +allow +student +toexplor +mathemat +imag +process +pixel +calcul +click +order +free +copi +todai +imag +warper +click +order +free +copi +todai +transform +programm +click +order +free +copi +todai +applic +develop +primarili +pentium +base +srun +microsoft +window +applic +pixel +calcul +alsoavail +appl +macintosh +volunt +mathematicsteach +particip +test +experiment +learn +materi +transcriptproject +current +design +record +keep +framework +willfacilit +storag +person +academicinform +hard +disk +floppi +own +student +list +peopl +work +themetip +project +close +relat +project +involv +studi +ofmultiplay +educ +activ +metip +project +work +tointegr +activ +idea +describ +prospect +forth +direct +distribut +imag +databas +educ +imag +process +current +project +collect +experi +user +itsxform +imag +transform +softwar +done +somethingfun +us +softwar +pleas +know +put +current +version +document +onlin +link +littl +demonstr +xform +beenput +togeth +graduat +student +took +seminar +winter +xform +program +environ +integr +witha +subset +common +lisp +offer +technic +essenti +newapproach +learn +teach +comput +program +fundamentalattract +us +approach +student +learn +program +thecomput +pursuit +creat +neat +visual +effect +digitalimag +portrai +peopl +thing +interest +successfulli +instal +softwar +would +like +discussteach +program +pleas +contact +link +relat +project +list +metip +support +part +nation +scienc +foundat +undergr +number +bricker +washington +tanimoto +washington +last +modifi +tuesdai +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..f433377b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,65 @@ +univers +washington +research +mobil +ubiquit +comput +mobil +comput +univers +washingtonher +overview +mobil +computingresearch +project +mobisa +inform +system +mobil +wireless +comput +environ +system +infrastructur +mobil +handheld +comput +task +graph +manag +applic +infrastructur +cope +resourc +variabl +paper +survei +paper +describ +fundament +challeng +field +program +methodolog +disconnect +operationdistribut +transact +mobilecomput +systemcontact +prof +brian +bershadprof +gaetano +borriellomarc +fiuczynskigeorg +formanprof +hank +levygeoff +voelkerterri +watsonprof +john +zahorjan +last +updat +forman +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..fd5d0149 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,215 @@ +cecil +vortex +projectuw +cecil +vortex +projectwelcom +home +page +cecil +vortex +project +conduct +research +program +languag +design +implement +emphasi +issu +object +orient +languag +cecil +pure +object +orient +languag +intend +support +rapidconstruct +high +qualiti +extens +softwar +cecil +incorporatesmulti +method +simpl +prototyp +base +object +model +mechan +tosupport +structur +form +comput +inherit +modul +basedencapsul +flexibl +static +type +system +allowsstat +dynam +type +code +freeli +vortex +optim +compil +infrastructur +forobject +orient +high +level +languag +target +pureobject +orient +languag +like +cecil +hybrid +object +orientedlanguag +like +modula +java +vortex +current +incorporateshigh +level +optim +static +class +analysi +class +hierachyanalysi +profil +guid +receiv +class +predict +profil +guidedselect +procedur +special +intraprocedur +messag +split +automat +inlin +static +closur +analys +also +includ +acollect +standard +intraprocedur +analys +commonsubexpress +elimin +dead +assign +elimin +vortexcompil +written +entir +cecil +initi +beta +releas +cecil +vortex +system +currentlyavail +sparc +run +either +suno +suno +solari +send +mail +cecil +interest +request +messag +bodi +subscrib +like +subscrib +list +ofinterest +parti +inform +obtain +thebeta +releas +recent +finish +technic +report +describ +much +implement +research +inform +project +overview +detail +overview +project +goal +direct +postscript +version +also +avail +project +member +current +past +project +member +paper +paper +written +sampl +project +list +sampl +research +project +avail +peopl +uwcs +intern +document +project +intern +document +avail +peopl +support +page +list +sourc +support +project +relat +project +pointer +object +orient +languag +implement +projectslast +updat +august +cecil +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..216f37ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,45 @@ +grail +graphic +imag +laboratorywelcom +home +page +grail +graphic +imag +laboratori +theunivers +washington +depart +comput +scienc +engin +inform +peopl +cours +research +project +public +these +softwar +data +cool +imag +neighborhood +depart +comput +scienc +engin +univers +washington +seattl +local +interest +grail +disk +usag +polici +comment +mtwong +washington +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..aebcba96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,118 @@ +chaotic +rout +project +chaotic +rout +project +comput +scienc +engin +depart +univers +washington +seattl +chao +friend +mine +dylan +chaotic +peopleal +sort +peopl +work +chaotic +rout +project +research +paper +technic +report +repositori +research +papersand +technic +report +chaoticrout +project +avail +chao +router +chip +chao +router +chip +implement +chao +rout +algorithm +hardwar +built +test +micron +cmo +redesign +micron +process +better +perform +simul +chao +router +simul +power +simul +simul +allsort +network +rout +algorithm +includ +nice +graphicalfront +standard +present +result +pcrcw +discuss +presentationof +simul +result +rout +algorithm +abl +come +upwith +guidelin +present +result +research +group +build +list +research +group +thathav +web +describ +research +rout +interconnect +network +parallel +comput +rout +commun +workshop +pcrcw +pcrcw +held +univeristi +washington +seattl +proceed +avail +univers +washington +home +page +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..ab1562c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,273 @@ +chinook +project +chinook +project +shinook +chinook +salmon +larg +salmon +oncorhynchu +tshawytscha +pacif +amer +name +tribe +warm +wind +blow +east +rocki +mountain +warm +southerli +wind +west +rocki +mountain +rare +american +sled +doga +hardwar +softwar +synthesi +toolfor +real +time +embed +system +chinook +hardwar +softwar +synthesi +cadtool +embed +system +design +control +domin +reactivesystem +time +constraint +chinook +map +behavior +descriptionto +user +target +architectur +fill +detail +neededto +build +complet +system +enabl +design +make +inform +designdecis +high +level +earli +design +cycl +rather +reiterateaft +work +level +detail +retarget +willnot +make +design +maintain +also +enabl +designerto +take +advantag +technolog +instead +ti +legacycod +chinook +current +activ +develop +gener +currentlyw +activ +work +softwar +architectur +synthesi +interprocessorcommun +synthesi +effici +accur +simul +moredetail +becom +avail +shortli +chinook +character +follow +meet +time +constraint +ratherthan +try +maxim +averag +perform +util +assumesmanu +partit +believ +issu +intricateand +sometim +even +technic +want +design +evalu +differentarchitectur +rather +forc +singl +fix +processor +asicarchitectur +synthes +softwar +architectur +rather +reli +onoff +shelf +real +time +kernel +discourag +retarget +first +version +chinook +synthesi +tool +oper +innovemb +version +shownat +design +autom +confer +diego +june +mainfeatur +includ +automat +connect +processor +peripheraldevic +gener +sequenti +code +concurr +descript +andsynthes +devic +driver +input +verilog +output +hardwarenetlist +need +connect +hardwar +compon +togeth +softwareprogram +processor +main +topic +includ +interfacingproblem +hardwar +softwar +compon +schedul +timingconstraint +partit +function +improv +version +demonstr +nato +summer +school +swcodedesign +tremezzo +itali +june +incorpor +severalmor +interfac +synthesi +techniqu +includ +memori +map +moreeffici +code +gener +simul +chinookersfacultygaetano +borriellogradu +student +chou +ross +ortegaken +hinesian +macduff +recent +selizabeth +walkupscott +hauck +henrik +hulgaardstafflarri +mcmurchielist +paperschinook +sponsorsarpa +contract +nation +scienc +foundat +grant +nation +scienc +graduat +fellowship +walkup +patricia +robert +harri +fellowship +ortega +graduat +fellowship +chou +embed +link +depart +comput +scienc +engin +universityof +washington +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..9498ea23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,189 @@ +emerald +emerald +projectid +develop +phase +fpga +architectur +would +makeus +reliabl +map +tool +produc +accur +performanceevalu +propos +design +unfortun +given +quickproduct +time +frame +face +develop +tool +construct +isoften +postpon +mani +architectur +featur +beenfrozen +satisfi +need +fast +tool +prototyp +havedesign +emerald +power +architectur +driven +system +quickdevelop +fpga +tool +heart +emerald +provid +basicfeatur +need +fpga +system +logic +block +analysi +synthesisand +technolog +map +global +placement +partit +anddetail +placement +rout +environ +provid +aneffici +thoroughli +specifi +fpga +rout +logic +blockarchitectur +well +architectur +specif +metric +tailorplac +rout +moreov +emerald +parameter +schematicspecif +allow +architectur +variat +quickli +capturedand +evalu +emerald +public +document +contain +page +includ +byth +contribut +author +mean +ensur +time +dissemin +ofscholarli +technic +work +commerci +basi +copyright +andal +right +therein +maintain +author +copyrighthold +notwithstand +offer +work +hereelectron +understood +person +copi +thisinform +adher +term +constraint +invok +eachauthor +copyright +work +repost +without +theexplicit +permiss +copyright +holder +definit +emerald +paper +darren +cronquist +larri +mcmurchi +emerald +architectur +driven +tool +compil +fpga +appear +proceed +sigda +fourth +intern +symposium +field +programm +gate +arrai +februari +router +us +emeraldlarri +mcmurchi +carl +ebel +pathfind +negoti +basedperform +driven +router +fpga +proceed +third +intern +symposium +field +programm +gate +arraysaid +design +februari +research +darren +cronquist +carl +ebel +larri +mcmurchi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..7deb65da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,220 @@ +northwest +laboratori +integr +system +northwest +laboratori +integr +system +depart +comput +scienc +engin +univers +washington +seattl +usath +depart +comput +scienc +engin +univers +washington +engag +larg +scale +integr +vlsi +comput +aid +design +research +develop +educ +sinc +late +todai +northwest +laboratori +integr +system +focu +wide +varieti +vlsi +architectur +embed +sytem +research +current +research +project +asynchron +circuit +verificationtim +separ +event +specif +synthesi +verif +time +asynchron +circuit +asynchron +circuit +survei +current +asynchron +design +methodolog +well +first +fpga +asynchron +circuit +fpga +rapid +prototypingtriptych +montag +fpga +architectur +develop +triptych +montag +fpga +architectur +architectur +improv +densiti +current +commerci +fpga +multi +fpga +system +rapid +prototyp +develop +springbok +rapid +prototyp +system +board +level +design +well +partit +assign +rout +topolog +work +gener +multi +fpga +system +emerald +architectur +adapt +toolset +fpga +complet +map +placement +rout +toolscan +gener +automat +descript +fpgaarchitectur +architectur +specif +metric +incorporatedinto +variou +tool +improv +result +embed +systemsth +chinook +project +hardwar +softwar +design +synthesi +simul +system +embed +applic +perform +optim +synchron +circuitsretim +level +clock +circuit +effici +algorithm +retim +circuit +uselevel +sensit +latch +improv +perform +reduc +cost +andincreas +toler +clock +skew +architectur +retim +method +improv +perform +synchronouscircuit +latenc +feedback +contraint +network +routerth +chaoticrout +project +self +tune +systemsself +tune +system +direct +kehlprevi +research +project +gemini +valid +layout +compar +specif +circuit +implement +circuit +mactest +cost +digit +function +tester +chip +circuit +cmo +voltag +level +arpa +reportsarpa +bluebook +paragraph +overview +accomplish +embed +system +high +perform diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..3daf0859 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,94 @@ +softwar +safeti +univers +washington +softwar +plai +increasingli +import +role +system +nuclear +reactor +aircraft +defenc +space +system +chemic +plant +medic +equip +consequ +malfunct +safeti +critic +system +must +pass +rigor +test +review +us +although +system +safeti +engin +techniqu +exist +decad +appli +system +contain +digit +comput +softwar +goal +univers +washington +safeti +project +develop +theoret +foundat +safeti +methodolog +build +safeti +critic +system +built +upon +foundat +safewar +system +safeti +comput +nanci +leveson +summar +issu +involv +lai +foundat +methodolog +work +safeti +analysi +techniqu +support +methodolog +prototyp +tool +us +valid +specif +analysi +techniqu +univers +washington +comput +scienc +softwar +engin +safeti +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..94ec28fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,239 @@ +internet +softbotinternet +softbotth +softbot +finalist +discoveraward +technolog +innov +comput +softwar +build +autonom +agent +interact +real +world +softwareenviron +oper +system +databas +pragmaticallyconveni +intellectu +challeng +substrat +research +support +claim +util +plan +machin +learningtechniqu +develop +internet +softbot +softwar +robot +acustomiz +moder +intellig +assist +internetaccess +softbot +accept +goal +high +level +languag +generatesand +execut +plan +achiev +goal +learn +itsexperi +softbot +enabl +human +user +state +want +accomplish +softbot +disambigu +requestand +dynam +determin +satisfyit +softbot +us +unix +shell +world +wide +interactwith +wide +rang +internet +resourc +take +tour +softbot +sgraphic +user +interfac +princip +investig +oren +etzioni +daniel +weld +also +check +metacrawl +softbot +field +servic +enabl +tosearch +multipl +indic +parallel +provid +sophisticatedprun +option +inform +contact +oren +etzioni +etzioni +washington +access +introduct +softbot +project +found +softbot +base +interfac +internet +cacm +juli +methodolog +motiv +project +found +intellig +without +robot +repli +brook +magazin +decemb +technic +softbot +paper +found +cartoonrepresent +internet +softbot +taken +blanchard +articl +appear +decemb +issu +ofcolumn +univers +washington +alumni +magazin +softbot +research +group +current +develop +graphic +user +interfac +toth +softbot +allow +user +easili +specifi +high +level +goal +extend +maintain +xiiplann +keith +golden +work +graphic +specif +search +control +keith +golden +implement +advanc +plan +space +browser +debug +planner +control +dave +christianson +compar +rule +base +versu +procedur +search +control +sujai +parekh +ilalearn +inform +resourc +design +protocol +multi +softbot +collabor +negoti +ying +experi +reactiv +system +softwar +domain +kwok +goan +build +optim +agent +ingram +inform +gather +reactiv +system +internet +kwok +softbot +hacker +info +local +access +back +home +page +back +home +page +mike +perkowitz +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..adde1ffb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,385 @@ +spin +spin +washington +spin +oper +systemspin +extens +oper +system +kernel +thatsupport +dynam +adapt +system +interfac +andimplement +direct +applic +control +stillmaintain +system +integr +inter +applic +isol +spin +allow +applic +load +code +kernel +atruntim +run +kernel +extens +code +accesshardwar +oper +system +servic +almost +nooverhead +basic +procedur +call +data +pass +byrefer +rather +copi +spin +kernel +providesa +core +interfac +capabl +basic +resourc +thesear +us +collect +extens +implement +gener +systemservic +address +space +thread +network +allextens +code +kernel +written +typesaf +languag +modula +properti +oftypesafeti +prevent +extens +crash +system +attemptingto +manipul +arbitrari +piec +code +data +spin +run +alpha +workstat +us +writeboth +special +applic +network +video +system +wella +support +gener +purpos +unix +program +connect +machinerun +spin +kernel +spin +server +quick +result +structur +allow +program +extend +oper +system +servic +withlow +overhead +exampl +spin +run +alpha +applic +handl +recov +page +fault +microsecond +take +microsecond +creat +thread +control +executeit +termin +synchron +termin +fork +join +protectedprocedur +call +anoth +extens +kernel +function +take +microsecond +cross +machin +applic +applic +overethernet +take +microsecond +realli +oldadapt +take +less +microsecond +operationsund +mach +unix +take +time +longer +samehardwar +time +benchmark +page +saveyourself +effort +recent +report +paper +dynam +bind +extens +oper +system +invoc +mechan +provid +flexibl +effici +andsimpl +integr +extens +execut +system +appear +osdi +extens +safeti +perform +spin +oper +system +design +implement +perform +paper +appear +sosp +extens +protocol +architectur +forappl +specif +network +design +implement +perform +paper +appear +usenix +winter +confer +write +oper +system +us +modula +describ +experi +us +modula +build +high +perform +extens +system +make +clear +distinct +languag +implement +languag +support +extens +oper +system +pretti +happi +deal +shortcom +order +languag +safe +extens +oper +system +paper +describ +address +shortcom +safe +dynam +link +extens +oper +system +describ +dynam +linker +load +code +kernel +point +abil +creat +manag +linkabl +namespac +describ +interfac +collect +interfac +languag +runtim +support +dynam +interposit +system +code +describ +kernel +intern +commun +extens +facil +show +dynam +code +gener +improv +perform +critic +kernel +servic +inform +dynam +compil +wait +time +compil +code +paper +trail +project +report +talk +paper +project +member +benchmark +interest +bottom +line +modula +inform +modula +arpa +project +overview +execut +summari +regular +report +friend +gotten +assist +academia +industri +project +page +sai +involv +relat +project +pointer +extens +system +project +peopl +sai +barb +arrow +intern +document +latest +statu +project +member +avail +project +project +could +result +qualif +credit +master +degre +fund +raship +posit +undergradu +project +credit +mascot +encourag +mani +peopl +decid +adopt +ourmascot +page +maintain +brian +bershad +bershad +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..b6610ae5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,167 @@ +dynam +compil +projectth +dynam +compil +projectmor +inform +dynam +compil +project +member +paper +relat +projectsuw +section +student +project +group +webdynam +compil +enabl +optim +base +valu +ofinvari +data +comput +time +us +valu +theserun +time +constant +dynam +compil +elimin +memoryload +perform +constant +propag +fold +remov +branch +theydetermin +fulli +unrol +loop +bound +howev +performancebenefit +effici +dynam +compil +code +offsetbi +time +cost +dynam +compil +approach +dynamiccompil +strive +fast +dynam +compil +high +qualitydynam +compil +code +programm +annot +region +theprogram +compil +dynam +static +optimizingcompil +automat +produc +optim +machin +code +templat +us +pair +dataflow +analys +identifi +variabl +willb +constant +time +simpl +dynam +compil +copi +thetempl +patch +comput +valu +time +constant +produc +optim +execut +code +work +target +gener +purpos +imper +program +languag +initi +initialexperi +appli +dynam +compil +program +producedspeedup +rang +part +spinproject +eventu +system +us +dynamicallycompil +code +spin +kernel +exampl +spinev +dispatch +howev +also +activ +explor +otherposs +applic +dynam +compil +invirtu +machin +interpret +prototyp +dynam +compil +systemi +describ +pldi +paper +arenow +start +design +build +second +gener +system +wewil +releas +detail +soon +last +updat +august +grant +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..96c255a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +move +permanentlymov +permanentlyth +document +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..065c8197 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,133 @@ +project +project +depart +comput +scienc +engin +univers +washington +seattl +info +washington +eduzpl +arrai +base +program +languag +suitabl +comput +would +previous +written +fortran +program +fast +sequenti +parallel +comput +without +modif +without +special +direct +machin +independ +recompil +necessari +program +machin +higher +level +concept +like +arrai +border +elimin +error +prone +index +tediou +loop +typic +program +shorter +easili +written +easili +understood +modifi +comput +scientist +find +concept +region +direct +border +shatter +control +flow +conclus +ideal +engin +scientif +program +walk +small +program +write +compil +program +yourmachin +scientif +programm +area +shouldconsid +enrol +zpthi +autumn +program +check +recent +chang +languag +project +overview +high +level +overview +program +walk +minut +introduct +languag +base +compil +compil +program +browser +right +paper +paper +manual +relat +detail +line +inform +sampl +program +peopl +project +member +horizon +descript +group +direct +futur +project +acknowledg +list +help +support +work +info +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..9978ad96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,255 @@ +simultan +multithread +home +pagesimultan +multithread +projectoverviewpeoplepubl +overviewth +crucial +problem +face +todai +high +speed +microprocessor +maintain +high +processor +util +face +long +instruct +memori +latenc +allevi +problem +modern +processor +issu +multipl +instruct +cycl +superscalar +interleav +execut +differentthread +differ +cycl +multithread +processor +ultim +though +techniqu +limit +amount +parallel +avail +within +singl +thread +singl +cycl +simultan +multithread +techniqu +permit +multipl +independ +thread +issu +instruct +superscalar +function +unit +singl +cycl +combin +multipl +instruct +issuefeatur +wide +superscalar +processor +latenc +hide +abilityof +multithread +architectur +processor +hardwar +contextsar +activ +simultan +compet +cycl +avail +resourc +dynam +share +processor +resourc +enabl +exploitthread +level +instruct +level +parallel +interchang +formsof +parallel +effect +us +increas +processor +util +studi +havedemonstr +simultan +multithread +significantli +improvesprocessor +throughput +perform +multiprogram +parallelworkload +shown +perform +gain +achievedin +architectur +minim +extens +modern +ordersuperscalar +processor +current +futur +work +includ +investig +fast +synchronizationtechniqu +enabl +also +conduct +research +otherarchitectur +compil +issu +simultan +multithread +peoplefaculti +susan +egger +hank +levygradu +student +jack +dean +tullsenindustri +collabor +digit +equip +corpor +joel +emer +rebecca +stamm +public +convert +thread +level +parallel +instruct +level +parallel +simultan +multithread +abstract +postscript +egger +emer +levi +stamm +andd +tullsen +submit +public +juli +exploit +choic +instruct +fetch +issu +implement +simultan +multithread +processor +abstract +postscript +tullsen +egger +emer +levi +stamm +proceed +annual +intern +symposium +comput +architectur +philadelphia +compil +issu +simultan +multithread +processor +postscript +egger +levi +andd +tullsen +proceed +first +suif +compil +workshop +stanford +januari +simultan +multithread +maxim +chip +parallel +abstract +postscript +tullsen +egger +andh +levi +proceed +annual +intern +symposium +comput +architectur +santa +margherita +ligur +itali +june +student +check +list +research +project +still +doon +student +affair +page +page +maintain +jack +lojlo +washington diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..4ce6c8e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,102 @@ +home +pagecomput +scienc +departmentabout +comput +scienc +departmentour +depart +form +consist +rank +comput +scienc +depart +countri +faculti +member +receiv +fourteen +presidenti +young +investig +award +packard +fellowship +faculti +award +women +scientist +engin +incent +excel +award +three +doctor +dissert +award +three +faculti +develop +award +comput +scienc +depart +area +research +project +inform +peopl +comput +scienc +depart +cours +offer +fall +class +futur +timet +technic +report +comput +system +answer +frequent +ask +question +comput +scienc +alumni +inform +graduat +guidebook +undergradu +guidebook +depart +annual +report +onlin +util +madison +local +servic +relat +organ +colophon +statist +server +us +infocomput +scienc +departmentunivers +wisconsin +madisona +comput +scienc +statist +west +dayton +streetmadison +wisc +voic +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..e73e8f09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,171 @@ +object +explor +purpos +viewpoint +control +object +explor +purpos +viewpoint +control +kyro +kutulako +chuck +dyer +consider +interest +recent +emploi +simpleobserv +behavior +either +make +recoveri +scene +propertieseasi +fixat +combin +simpl +behavior +order +toperform +complex +task +navig +obstacl +avoid +ourwork +focus +abil +activ +observ +control +pointof +observ +perform +task +involv +explor +object +develop +behavior +provabl +correct +makesimpl +motion +decis +base +observ +local +geometryof +scene +requir +minim +process +imag +first +consid +task +recov +local +shape +thesurfac +select +point +approach +base +generalobserv +posit +provid +inform +objectthan +other +exist +special +viewpoint +beexploit +observ +mobil +effici +anddeterminist +strategi +reach +show +localshap +recoveri +task +achiev +us +simpl +qualitativestrategi +smoothli +control +point +observ +viewingdirect +align +princip +direct +selectedpoint +second +consid +task +deriv +global +descriptionof +object +formul +global +surfac +reconstruct +thequalit +task +smoothli +control +point +observationso +visibl +slide +maxim +connect +reconstruct +region +show +task +provabl +achiev +arbitrari +smooth +surfac +attempt +maintain +well +defin +geometr +relationship +point +observationand +view +surfac +approach +suggest +abil +smoothli +control +point +observ +lead +provabl +correct +behavior +achiev +local +global +task +scene +explor +navig +also +simplifi +frame +comput diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..40fd3773 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,142 @@ +deform +contour +model +extract +detect +classif +deform +contour +model +extract +detect +classif +fung +roland +chin +develop +integr +approach +model +extract +detect +classifi +deform +contour +directli +noisi +imag +conduct +case +studi +regular +formul +initi +ofact +contour +model +snake +us +minimax +principl +deriv +regular +criterion +wherebi +valu +automat +implicitli +determin +along +contour +furthermor +formul +energi +function +yield +snake +contain +hough +transform +special +case +subsequ +consid +problem +model +extract +arbitrari +deform +contour +noisi +imag +combin +stabl +invari +anduniqu +contour +model +markov +random +field +yield +priordistribut +exert +influenc +arbitrari +global +model +allow +deform +bayesian +framework +contour +extract +turn +posterior +estim +turn +equival +energi +minim +gener +activ +contour +model +final +integr +lower +level +visual +task +withpattern +recognit +process +detect +classif +base +nearman +pearson +lemma +deriv +optim +detect +classificationtest +summat +peak +practic +applic +small +region +need +consid +margin +distribut +valid +formul +confirm +extens +rigor +experiment +gsnake +softwar +avail diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..d7bfba5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,423 @@ +data +visual +base +lattic +data +visual +base +lattic +bill +hibbard +brian +paul +chuck +dyer +defin +foundat +data +visual +base +theidea +visual +process +function +ofdata +object +call +data +model +displai +calleda +displai +model +prototyp +system +call +adha +implement +base +upon +idea +guid +follow +principl +scientist +develop +mathemat +model +natur +data +objectsrepres +object +mathemat +model +mathemat +objectsfrequ +contain +infinit +precis +real +number +functionswith +infinit +domain +wherea +comput +data +object +containfinit +amount +inform +thu +approxim +themathemat +object +repres +comput +displai +contain +finit +amount +inform +contain +finit +number +pixel +color +chosenfrom +finit +palett +anim +sequenc +contain +finit +numbersof +frame +thu +approxim +ideal +displai +close +relationship +data +comput +data +model +appropri +data +object +computationalmodel +program +languag +purpos +data +visual +commun +informationcont +data +object +particular +defin +expressivenesscondit +map +data +object +displai +thatdisplai +encod +fact +data +object +onlythos +fact +visual +system +complet +sens +itimpl +function +data +object +displai +satisfyingth +expressivenss +condit +defin +order +relat +data +object +base +onhow +precis +approxim +mathemat +object +wecan +defin +order +relat +displai +base +howprecis +approxim +ideal +displai +base +voxelresolut +order +relat +defin +lattic +ofdata +object +displai +therefor +model +visualizationprocess +function +lattic +data +objectsto +lattic +displai +interpret +expressivenesscondit +context +show +satisfi +theexpress +condit +lattic +isomorph +defin +particular +lattic +appropri +scientificdata +object +data +object +built +primitivevari +call +scalar +time +latitud +radianc +temperatur +tupl +arrai +data +type +constructor +arrai +time +temperatur +data +type +appropriatefor +time +seri +temperatur +thu +lattic +containsth +data +object +scientif +program +languag +canalso +defin +particular +lattic +displai +displayi +voxel +voxel +specifi +graphicsprimit +call +displai +scalar +pixel +locationand +size +volum +color +place +animationsequ +show +function +satisfi +expressivenesscondit +class +function +defin +map +fromth +scalar +us +primit +variabl +data +object +thedisplai +scalar +specifi +graphic +primit +voxel +ofcours +design +scientif +displai +alreadi +assum +primit +variabl +map +graphic +primit +exampl +given +data +object +type +arrai +time +temperatur +isnatur +displai +graph +time +along +axi +andtemperatur +along +anoth +remark +thing +wedo +take +design +displai +assumpt +consequ +fundament +expressivenesscondit +develop +implement +system +calledvi +adthat +allow +scientist +experi +algorithm +steer +theircomput +visual +data +object +creat +theirprogram +lattic +defin +data +object +thevi +program +languag +system +implement +lattic +vvof +displai +data +displai +us +function +thatsatisfi +express +condit +howev +implementationi +quit +precis +data +flow +system +defin +auser +interfac +control +data +displai +base +abstractionof +render +pipelin +system +defin +user +interfacefor +control +data +displai +base +abstract +ofmap +scalar +displai +scalar +possibl +defin +data +lattic +recurs +defineddata +type +complex +link +type +tree +us +ingener +purpos +program +languag +abstract +datatyp +object +class +object +orient +program +languag +lattic +provid +rigor +foundat +visual +particular +help +develop +analyt +altern +usualapproach +defin +visual +process +construct +bywrit +special +purpos +program +comput +displai +fora +specif +data +object diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..26777388 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,68 @@ +condor +project +homepag +object +goal +condor +project +develop +implement +deploi +evaluatemechan +polici +support +high +throughput +comput +larg +collect +distribut +own +comput +resourc +guid +technologicaland +sociolog +challeng +comput +environ +condor +team +build +softwar +tool +enabl +scientist +engin +increas +comput +throughput +introduct +start +high +throughput +comput +research +condor +system +condor +pool +univers +wisconsin +madison +condor +help +page +project +home +page +condor +world +mail +list +comment +suggestionscondor +admin +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..51db6034 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,11 @@ +next +homepag +next +peopl +next +project +last +modifi +septemb +miron +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..aa2e5d10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,262 @@ +coral +databas +projectcor +databas +projectdocu +content +objectiveoverviewreleas +informationse +also +public +relat +coralpeopl +work +coraloth +research +madisonobject +object +coral +project +develop +robust +efficientdeduct +databas +system +investig +variou +applic +domain +sever +algorithm +underli +coralsystem +develop +member +group +durationof +project +sinc +overview +coral +deduct +system +support +rich +declar +languag +interfac +allow +combin +declaritiveand +imper +program +declar +queri +languag +supportsgener +horn +claus +augment +complex +term +group +aggreg +negat +relat +tupl +contain +univers +quantifi +variabl +coral +declar +program +canb +organ +collect +interact +modul +coralimplement +support +wide +rang +evalu +strategi +andautomat +choos +effici +evalu +strategi +modulein +program +addit +user +permit +guid +queri +optim +desir +select +among +wide +rang +control +choic +atth +level +modul +coral +system +provid +imper +construct +updat +insertand +delet +rule +coral +also +interfac +user +canprogram +combin +declar +coral +extend +withcor +primit +high +degre +extens +provid +allowingc +programm +class +structur +enhanc +coralimplemen +coral +provid +support +main +memori +disk +resid +data +disk +resid +data +support +us +theexodusstorag +manang +also +provid +transact +manag +aclient +server +environ +releas +inform +current +releas +coral +version +releas +octob +instal +coral +system +grab +file +want +nobin +version +contain +sourc +code +requiringy +compil +coral +version +includ +made +binari +forth +indic +machin +type +click +file +grab +readm +gener +inform +instal +manual +coral +instruct +instal +coral +nobin +binari +includ +coral +hpux +seri +binari +includ +coral +suno +binari +includ +coral +solari +binari +includ +coral +solari +binari +includ +coral +linux +linux +binari +includ +stai +inform +releas +coral +announcemnt +mail +listwhich +reciev +announc +releas +relev +inform +releas +also +announc +newsgroup +comp +lang +misc +also +submit +question +comment +report +coral +send +mail +coral +wisc +edulast +modifi +octob +shawn +flisakowski +flisakow +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..0f5d6b06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,410 @@ +complementar +problem +algorithm +applic +complementar +problem +neta +result +three +decad +research +subject +complementar +problem +divers +applic +engin +econom +scienc +becom +well +establish +fruitfuldisciplin +within +mathemat +program +sever +monograph +survei +document +basic +theori +algorithm +applic +complementar +problem +role +optim +theori +page +serv +center +inform +regard +research +incomplementar +problem +list +meetingsof +interest +commun +pointer +softwar +forcomplementar +problem +well +link +page +interest +list +research +applic +area +also +given +tabl +content +directori +research +complementar +softwar +complementar +problem +applic +complementar +relev +link +directori +complementar +researcherssoftwar +complementar +problem +mcplib +collect +nonlinear +mix +complementar +problem +problemdescript +access +gam +sourc +file +forthes +problem +complementar +toolbox +matlab +evolv +freeli +avail +toolbox +consist +sever +andm +file +allow +mcplib +problem +access +frommatlab +without +access +gam +file +give +functionevalu +spars +jacobian +evalu +machin +specificvers +download +path +solver +also +avail +toolbox +detail +solver +interfac +gam +describ +librari +routin +areavail +help +hook +solver +gam +model +languag +contact +steve +gam +rutherford +colorado +ferri +wisc +edufor +detail +mile +mile +extens +classicaljosephi +newton +method +solut +linearizedsubproblem +comput +lemk +almost +complementari +pivot +algorithm +newton +point +us +defineth +newton +direct +us +dampedlinesearch +merit +function +us +measur +violat +infeas +complementar +mile +also +emploi +restartprocedur +case +newton +point +cannot +comput +totermin +secondari +everi +linear +subproblem +rescal +equilibr +elementsappear +data +subproblem +path +run +gam +mcpor +directli +matlab +path +solver +appli +techniqu +similarto +us +newton +method +smooth +system +anonsmooth +reformul +algorithmconsist +sequenc +major +iter +consist +anapproxim +linear +step +similar +mile +construct +pathto +newton +point +solut +approxim +aposs +search +path +newton +point +exist +thepath +cannot +entir +construct +step +along +partiallycomput +path +taken +problem +relinear +anonmonoton +watchdog +strategi +emploi +appli +path +search +help +avoid +converg +local +minima +norm +function +forth +underli +nonsmooth +equat +keep +number +functionevalu +requir +small +possibl +list +solver +option +given +document +algorithm +base +uponreformul +system +nonsmooth +equat +algorithm +implement +gam +solver +robustnessimprov +us +proxim +perturb +strategi +give +qpcomp +algorithm +nonsmooth +equat +ishandl +us +direct +deriv +smooth +smooth +algorithm +base +uponreformul +system +nonsmooth +equat +thenapproxim +solv +sequenc +smooth +approxim +leadto +zero +nonsmooth +system +iter +smooth +approxim +origin +system +form +theaccuraci +approxim +determin +residu +thecurr +point +implement +gam +system +solver +implement +subsystem +gam +compar +paper +applic +complementar +engineeringand +econom +applic +complementar +problem +paper +list +mani +known +applic +complementar +problem +mpsge +preprocessor +gam +model +languag +thatallow +econom +equilibrium +problem +formul +easili +thegam +home +page +inform +nemsth +nation +energi +model +system +sever +paper +relat +algorithmsand +paper +give +overview +project +relev +link +look +michael +trick +oper +research +page +interest +link +look +interior +point +inform +interior +pointmethod +argonn +nation +laboratori +archiv +last +modifi +octob +michael +ferri +ferri +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..c98b2788 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,71 @@ +exodu +project +home +pageexodu +extens +object +orient +databas +system +toolkitnot +document +construct +exodu +project +succed +theshor +project +still +provid +minim +support +user +theexodu +storag +manag +compil +persistentprogram +languag +avail +wisc +licens +requir +inform +need +contact +exodu +wisc +eduprincip +investig +mike +carei +david +dewittse +also +public +relat +exodusshor +successor +exoduslatest +exodu +storag +manag +compilercontribut +softwar +storag +managera +mail +list +exodu +user +exodus_al +wisc +benchmark +benchmark +oodbsdat +prepar +april +michael +zwill +zwill +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..0034cc71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,326 @@ +paradis +projectparadis +parallel +databas +system +applic +document +content +object +client +server +paradis +sampl +paradis +frontend +contact +informationse +also +public +relat +paradis +peopl +work +paradis +shore +project +object +manag +us +paradis +extens +optim +paradis +report +examin +sequoia +benchmark +script +us +vldb +paper +inform +madison +databas +research +group +madison +depart +serverobject +object +paradis +project +design +implement +andevalu +scalabl +parallel +geograph +inform +system +iscap +store +manipul +massiv +data +set +applyingobject +orient +parallel +databas +technolog +problem +ofstor +manipul +geograph +inform +hope +tosignificantli +advanc +size +complex +data +set +thatcan +successfulli +store +brows +queri +client +server +paradis +paradis +databasesystem +aim +handl +type +applic +paradis +providesa +graphic +user +interfac +queri +brows +databas +andsupport +subset +issu +queri +paradiseprovid +extend +relat +data +model +model +gisappl +addit +support +base +data +type +asinteg +real +string +paradis +built +support +raster +polygon +polylin +point +circl +video +mpeg +imag +data +paradis +us +shore +underli +persist +object +manag +paradis +front +allow +displai +object +spatialattribut +provid +layer +displai +foroverlap +spatial +attribut +layer +correspond +queri +custom +order +displai +layer +selectingcolor +label +displai +attribut +queri +databas +queri +either +graphic +interfac +withad +queri +graphic +interfac +us +issueimplicit +spatial +queri +zoom +click +sketch +arubb +band +queri +paradis +provid +querycompos +menu +queri +compos +provid +access +databaseschema +assist +queri +composit +queri +result +either +beview +spatial +attribut +bedisplai +tabl +brows +tupl +result +front +also +provid +context +sensit +help +paradis +support +subset +issu +queri +sqlwe +ad +abil +invok +method +defin +extendedset +type +exampl +calcul +area +polygon +byus +method +polygon +area +paradis +also +support +standarddatabas +oper +includ +creat +drop +databas +creat +anddrop +extent +creat +drop +indic +insert +updat +current +version +paradis +emploi +client +server +architectur +front +ship +queri +syntax +paradiseserv +execut +execut +queri +server +ship +theresult +object +back +client +paradis +server +ismulti +thread +multipl +client +connect +sameserv +design +implement +paradis +sever +carefulattent +paid +insur +system +could +effici +processqueri +especi +involv +spatial +attribut +largevolum +data +sampl +paradis +frontendeurop +data +sampl +pressher +contact +inform +paradis +projectattn +prof +david +dewittunivers +wisconsin +madisoncomput +scienc +depart +west +dayton +streetmadison +email +paradis +wisc +edumor +come +biswadeep +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..111a8d89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,114 @@ +scout +servic +homepagego +text +versionnewslett +newand +newli +discov +internet +resourc +network +toolsinternet +announc +updat +daili +effectiveinternet +tool +availablea +report +student +studentssurf +smarter +longer +intern +scout +project +universityof +wisconsin +madison +show +best +canchoos +best +filter +hundr +internet +annoucementseach +week +look +valuabl +onlin +resourc +networktool +organ +summar +annot +best +vefound +offer +internet +commun +sever +us +format +goal +scout +support +effect +internet +byeduc +research +howev +everyon +welcom +useth +public +site +provid +scout +encouragefeedback +suggest +entir +internet +commun +three +primari +servic +provid +includ +scout +report +happen +thescout +toolkit +ournewest +project +know +report +student +student +scout +servic +locat +depart +comput +scienc +theunivers +wisconsin +madison +project +intern +comment +suggest +feedbackscout +intern +scout +servicesfor +inform +us +internet +intern +inform +educ +servic diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..420517f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,961 @@ +shore +project +home +pageshor +high +perform +scalabl +persist +object +repositorydocu +content +objectiveoverviewreleas +informationmail +listsse +also +shore +version +line +inform +sourc +binari +document +beta +releas +shore +version +public +relat +shorepeopl +work +shorelatest +research +summari +arpaparadis +project +built +shoreexodu +project +predecessor +shoreoo +benchmark +benchmark +oodbsshor +photo +albumuw +madison +databas +research +group +madison +depart +serverobject +object +shore +project +design +implement +andevalu +persist +object +system +serv +need +widevarieti +target +applic +includ +hardwar +softwar +cadsystem +persist +program +languag +geograph +informationsystem +satellit +data +repositori +multi +media +applic +shore +expand +basic +capabl +wide +usedexodusstorag +manag +develop +wisconsin +fund +arpa +number +ofwai +includ +support +type +object +multipl +programminglanguag +unix +like +hierarch +name +space +name +object +anda +unix +compat +interfac +object +text +field +thisinterfac +intend +eas +transit +applic +theunix +file +system +environ +shore +exist +unix +tool +viand +abl +store +data +shore +object +withoutmodif +basic +unix +file +becom +either +singl +shoreobject +text +field +complex +object +overview +shore +someth +hybrid +system +natur +inheritingcharacterist +object +orient +databas +system +fromfil +system +section +briefli +describ +basic +featur +ofshor +paper +shore +persist +applic +describ +shore +much +greater +detail +shore +three +major +goal +scalabilitysupport +hardwar +languag +heterogeneitysupport +exist +file +base +applicationswhen +shore +project +began +year +goal +uniqueamong +research +commerci +oodbm +commun +odmg +effort +also +concentr +provid +degre +support +languageheterogen +turn +facilit +hardwar +heterogen +shore +remain +distinguish +focu +scalabl +supportfor +applic +depend +unix +file +system +persistentstorag +furthermor +sinc +shore +data +model +basicallycompat +odmg +data +model +expect +much +technolog +develop +eventu +betransf +commerci +sector +scalabl +architectureshor +softwar +architectur +uniqu +sever +wai +first +shore +us +symmetr +peer +peer +distributedarchitectur +shore +everi +particip +processor +run +ashor +server +process +whether +processor +shore +data +disksattach +softwar +design +scalabl +singl +processor +network +workstat +larg +parallel +processor +intel +paragon +design +contrast +client +server +architectureus +exodu +oodbm +vendor +client +serverarchitectur +fine +design +environ +typicallyus +softwar +hardwar +effort +scalabl +second +uniqu +featur +shore +architectur +notionof +valu +ad +server +structur +softwar +runsin +server +extens +mind +rel +simpl +forus +build +applic +specif +server +exampl +theparadis +project +alreadi +us +shore +server +build +geograph +inform +system +nasa +seosdi +project +feel +uniqu +piec +technolog +plai +aimport +role +varieti +futur +research +commerci +endeavor +exampl +digit +librari +futur +almost +certainlydepend +avail +scalabl +persist +object +technolog +system +go +store +retriev +manipul +transmitobject +contain +video +pictur +well +text +whilecurr +oodbm +product +could +us +system +orientedtoward +deal +gigabyt +terabyt +data +customiz +equal +import +index +retriev +queri +process +mechan +need +digit +libraryar +differ +requir +geograph +informationsystem +languag +hardwar +heterogeneityobject +shore +type +shore +provid +singl +languag +neutraltyp +system +us +defin +type +shore +object +type +system +embodi +shore +data +languag +languag +shore +object +type +defin +enhanc +data +model +support +databasefeatur +bulk +type +set +list +persist +provis +type +persist +object +simplifi +task +ofsupport +heterogen +hardwar +environ +make +feasibleto +support +access +persist +object +multipl +programminglanguag +object +shore +project +mention +earlier +quit +close +relat +languag +neutral +object +type +definit +languag +wasrec +propos +standard +oodb +vendor +consortium +odmg +term +emphasi +howev +odmg +larg +concentr +onprovid +standard +interfac +exist +orient +oodb +focu +support +inter +languag +object +share +withina +larg +name +space +object +support +exist +file +base +applicationsa +major +goal +shore +enabl +applic +currentlyus +untyp +byte +orient +file +persist +data +flatten +flatten +data +time +access +stop +applic +abl +store +data +type +structuredobject +conveni +type +safe +intra +inter +program +data +share +ultim +hope +shore +displac +byte +orientedfil +system +unix +file +system +shore +provid +major +servic +file +system +standpoint +first +support +object +name +space +manag +world +manypersist +object +shore +provid +flexibl +tree +structur +unix +like +name +space +persist +object +reachabl +either +directli +indirectli +give +shore +usersa +familiar +framework +regist +individualpersist +object +term +regist +object +root +oflarg +persist +data +structur +bulk +set +unnam +object +term +anonym +object +realiz +framework +involvessever +differ +kind +shore +file +system +object +includingdirectori +pool +file +contain +anonym +object +symbol +link +cross +refer +shore +provid +mechan +eas +transit +legaci +unixappl +compil +editor +system +fromtradit +byte +stream +file +shore +first +applic +link +shore +provid +standardunix +compat +file +system +interfac +open +close +read +write +mkdir +chdir +order +make +access +shore +object +unix +file +system +callsposs +defin +shore +object +type +option +design +onevari +length +byte +string +charact +string +attribut +object +asb +object +unix +data +program +attempt +read +objectthrough +shore +counterpart +unix +file +system +callswil +portion +object +legaci +program +thatwish +without +link +possibl +mount +ashor +file +system +access +unix +datacontain +object +directli +make +feasibl +bothnew +applic +access +object +applic +access +unix +data +componentof +object +applic +defin +access +morestructur +attribut +object +releas +inform +latest +time +tabl +releas +shore +date +approxim +subject +chang +question +contact +shore_support +wisc +beta +releas +first +beta +releas +beta +releas +second +beta +rleas +shore +version +avail +sept +includ +improv +document +completeimplement +mani +featur +mani +fix +port +tosolari +linux +version +august +releas +shore +version +gzip +file +sourc +document +binari +releas +sparc +andpentium +solari +found +atftp +wisc +shore +mail +liststher +shore +relat +mail +list +shore_support +wisc +eduand +shore_al +wisc +shore_support +wisc +eduthi +mail +list +reach +shore +develop +team +usebi +shore +user +submit +question +comment +report +cannot +subscrib +mail +list +shore_al +wisc +mail +list +user +interest +shore +list +manag +listproc +softwar +madisonc +depart +current +unmoder +unlikelyev +get +clutter +junk +mail +moder +mail +messag +interest +list +mailbox +isalreadi +clutter +sign +weekli +digest +belowfor +inform +inform +list +sentwhen +subscrib +purpos +shore_al +notifi +interest +parti +releas +chang +shore +archiv +request +help +user +default +repli +sent +sender +rather +beingpost +entir +list +want +entir +list +yourrepli +copi +repli +shore_al +list +public +mail +list +thu +anyon +maysubscrib +subscrib +post +list +existenceof +list +shown +list +return +listproc +whenit +process +list +request +subscrib +yoursubscript +conceal +default +subscriberscannot +obtain +membership +list +listproc +system +subscrib +shore_al +subscrib +chang +subscript +must +mail +specialmessag +listproc +wisc +subscrib +content +messag +look +like +subscrib +shore_al +receiv +weekli +digest +rather +individu +messag +sendthi +along +subscript +send +separ +messag +shore_al +mail +digest +subscrib +content +messag +unsubscrib +shore_al +help +list +processor +content +messageshould +helplast +modifi +nanci +hall +nhall +wisc +footnot +compat +odlshor +odmg +concurr +decid +data +modelidl +start +point +data +model +henc +odlar +similar +anoth +stabilizesw +convert +compat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..db9c0734 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,40 @@ +abhinav +home +page +welcom +abhinav +gupta +agupta +wisc +gupta +wisc +page +construct +graduat +student +depart +comput +scienc +univers +wisconsin +madison +contact +residenceoffic +kendal +avenu +madison +depart +comput +scienc +dayton +street +madison +interest +link +indian +newspap +stuff +sport +finger +find +whereabout diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..4267d655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,15 @@ +alain +home +pagealain +click +larger +pictur +largest +carnivor +ever +live +last +modifi +alain +alain +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..c4371d8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,84 @@ +carolyn +allex +home +page +carolyn +allex +graduat +studentbiotechnolog +train +program +traineecomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +mail +allex +wisc +edutelephon +advisor +professor +jude +shavlikinterest +comput +biologi +sequenc +protein +fold +artifici +intellig +machin +learn +neural +networkseduc +comput +scienc +univers +wisconsin +madisonb +comput +scienc +purdu +universityb +educ +mankato +state +univers +relat +link +univers +wisconsin +depart +univers +wisconsin +group +univers +wisconsin +machin +learn +group +univers +wisconsin +comput +biologi +research +intellig +system +molecular +biologi +ismb +intellig +system +molecular +biologi +ismb +intellig +system +molecular +biologi +ismb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..e4ed9148 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,225 @@ +amir +home +page +amir +roth +delphi +maven +show +erin +madison +amir +wisc +occasionali +updat +copi +resum +cvte +truth +group +seminar +arch +group +arch +seminar +week +research +topic +implement +compil +us +preprocessor +deleg +work +project +partner +set +airport +metal +detector +out +existencei +graduat +student +depart +comput +scienc +univers +wisconsin +madison +advisor +guri +sohi +look +method +allevi +data +depend +multiscalar +program +us +distribut +regist +file +multiscalar +program +side +curli +fri +physic +yale +univers +advanc +degre +nail +design +vallei +beauti +school +physic +interest +beauti +degre +much +practic +live +girlfriend +marci +cat +charli +marci +also +went +yale +get +master +public +polici +lafollett +institut +go +presid +meantim +like +solv +linear +regress +problem +wacki +recip +find +magazin +watch +parti +five +like +eggplant +peopl +think +weird +anywai +promis +out +subba +officem +daddi +novemb +titanium +screw +desi +relaford +terri +mulholland +oxygen +carbon +dioxid +area +vagu +interest +program +languag +program +analysi +super +whack +compil +optim +parallel +algorithm +theori +good +soul +analysi +evalu +model +perform +enhanc +three +point +shot +thing +scaryarea +rabid +interestth +love +know +talk +better +leav +page +never +return +hmmm +interest +super +handyinformatik +index +comput +scienc +journal +author +madcat +architectur +resourc +minut +score +sportslin +philli +everybodi +favorit +engin +super +ickyth +new +friend +barb +write +articl +gui +go +read +want +kid +barb +friend +drew +home +page +cornel +david +home +page +think +wierd +page +featur +friend +friend +associ +kemin +last +modifi +amir +roth +amir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..da8997bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..b24137cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,181 @@ +amo +home +page +word +search +engin +approxim +theori +spline +wavelet +boxsplin +radial +basi +function +shift +invari +space +approxim +toscatt +data +multiquadr +thin +plate +splinesthi +page +netscap +enhanc +homepag +amo +associ +professordepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usa +mail +amo +wisc +telephon +amo +home +page +present +tabl +linksat +present +item +access +order +download +paperaffin +system +analysi +analysi +operatorof +zuowei +shen +choos +follow +version +us +unix +clickher +compress +version +otherwis +download +uncompress +version +fromher +none +work +server +copi +directlyfrom +accounther +handout +email +clickher +click +vita +want +vita +list +variou +public +includ +abstract +select +articlesof +mine +inform +anonym +site +carl +boor +maintain +site +wisc +site +containspostscript +compress +postscript +file +variou +articl +theapproxim +theori +group +also +found +technic +filesconcern +us +gener +public +recommend +click +read +file +provid +list +avail +file +click +wish +view +line +view +line +download +wish +student +includ +inform +research +andpubl +main +area +interest +togeth +short +summari +present +research +futur +goal +activ +approxim +theori +group +univeristi +ofwisconsin +madison +activ +numer +analysi +group +link +home +page +peopl +approxim +theori +commun +found +miscellan +topic +activ +final +offici +homepag +pleas +deposit +comment +mailbox diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..37089648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi +therber +home +pageandi +therberoffic +sphone +email +andyt +wisc +eduzooresumebookmarksapplet diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..fccccea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,22 @@ +ranga +arvind +ranganathan +erstwhil +workplac +present +workplac +indiaworld +fascin +world +escher +collect +classic +paper +comput +scienc +finger +log +arvind +ranganathan +arvind +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..ab364550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,37 @@ +ashish +home +page +ashish +thusoo +graduat +student +depart +comput +scienc +univers +wisconsin +madison +come +india +hadmi +undergradu +educ +indianinstitut +technolog +delhi +depart +iitd +fantast +place +worth +visit +like +contact +canfing +find +whereabout +altern +send +email +ashisht +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..bf382460 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,51 @@ +ashraf +aboulnaga +home +pageashraf +aboulnagacomput +scienc +depart +room +univers +wisconsin +madison +west +dayton +madison +usaphon +mail +ashraf +wisc +edueduc +comput +scienc +alexandria +univers +alexandria +egypt +juli +comput +scienc +alexandria +univers +alexandria +egypt +june +info +section +view +grade +section +view +grade +offic +hour +desautel +home +page +last +modifi +septemb +ashraf +aboulnaga +finger diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..95a011f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,29 @@ +phil +atkinson +home +page +phil +atkinson +home +pageucla +bannon +win +ncaa +basketbal +championship +seattl +gener +infooffic +phone +email +atkinson +wisc +educurr +researchsailinghors +back +ridingscuba +divingc +infooffic +hour +tuth +appoint diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..bc771092 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,162 @@ +home +page +eric +bach +eric +bach +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +bach +wisc +univers +california +berkelei +interest +theoret +comput +scienc +comput +number +theori +algebraicalgorithm +complex +theori +cryptographi +string +automata +research +summari +interest +us +comput +effici +solvealgebra +number +theoret +problem +exampl +onetel +digit +number +prime +without +examin +possiblefactor +problem +intrins +mathemat +interest +well +applic +random +number +gener +code +forreli +secur +inform +transmiss +comput +algebra +area +also +interest +appli +probabl +theori +designand +analysi +algorithm +exampl +larg +number +iscomposit +prove +simpl +test +us +auxiliarynumb +call +wit +practic +usual +find +witnessbi +direct +search +among +small +prime +lead +followingnatur +question +larg +least +wit +functionof +number +test +recent +work +given +accurateheurist +model +base +probabilist +assumpt +allowsthi +similar +question +answer +recent +public +improv +approxim +euler +product +proc +cnta +canadian +math +proceed +model +algorithm +complet +problem +condon +glaser +tanguai +proc +annual +conf +comput +complex +algorithm +number +theori +volum +effici +algorithm +shallit +press +info +click +curriculum +vita +page +creat +juli +email +bach +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..5985b197 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,70 @@ +bart +miller +home +page +barton +miller +bart +wisc +professorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usath +follow +list +thing +research +project +paradyn +parallel +perform +tool +fuzz +random +softwar +testingteach +introduct +oper +system +spring +honor +internet +seminar +advanc +oper +system +fall +distribut +system +director +undergradu +project +graduatesprofession +symposium +parallel +distribut +tool +monona +terrac +frank +lloyd +wright +convent +center +technic +advisori +groupperson +offici +depart +home +page +famili +photosbart +wisc +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..7abedf34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,47 @@ +hyper +home +page +benjamin +teitelbaum +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +usaben +wisc +edursumquinc +internet +ultim +word +gamezillion +bookmarksspr +schedul +browser +support +tabl +look +like +garbag +click +someth +readabl +mondai +tuesdai +wednesdai +thursdai +fridai +offic +hour +offic +hour +dbseminar +osseminar +condormeet +miron +plseminar diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..946979d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,404 @@ +gareth +bestor +home +pagewelcom +gareth +bestor +home +pagegareth +bestor +dissert +teach +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +telephon +mail +bestor +wisc +click +finger +world +wide +http +wisc +bestor +system +administr +data +program +librari +servic +observatori +drive +madison +telephon +mail +bestor +dpl +dacc +wisc +edueduc +comput +scienc +univers +wisconsin +madison +honor +comput +scienc +massei +univers +zealand +comput +scienc +massei +univers +zealand +curriculum +vita +postscript +resum +postscript +graduat +coursework +postscript +dissert +research +titl +structur +motion +invers +project +problem +abstract +import +problem +comput +vision +recov +structur +scene +posit +observ +within +project +imag +essenti +invers +project +problem +exist +structur +motion +techniqu +solv +problem +examin +multipl +imag +project +rigid +scene +howev +us +extens +practic +techniqu +sensit +nois +accur +model +optic +project +restrict +posit +observ +structur +scene +research +us +techniqu +solv +invers +project +problem +call +concurr +projector +model +make +assumpt +scene +rigid +assumpt +posit +observ +techniqu +us +projector +base +model +project +instead +camera +base +model +tradition +us +result +algorithm +defin +geometr +transform +dimens +perspect +project +given +transform +dimens +algorithm +identifi +invers +project +problem +constrain +specifi +minimum +number +point +imag +requir +solv +concurr +projector +model +also +examin +addit +point +imag +minim +type +project +error +occur +real +world +applic +allow +projector +approxim +intersect +techniqu +current +appli +problem +robot +navig +explor +determin +posit +robot +unknown +environ +time +environ +advisor +prof +charl +dyer +research +interest +comput +machin +vision +vision +base +robot +navig +explor +comput +graphic +virtual +realiti +artifici +intellig +group +comput +vision +group +machin +learn +research +group +robot +teach +duti +spring +introduct +comput +program +section +fortran +credit +cours +cover +basic +program +structur +need +prepar +student +elementari +engin +cours +prior +comput +program +experi +requir +basic +knowledg +comput +assum +materi +cover +enabl +student +write +simpl +comput +program +solv +engin +problem +elementari +cours +program +done +fortran +cours +intend +student +receiv +littl +program +instruct +high +school +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +section +home +page +algebra +languag +program +section +fortran +construct +algorithm +problem +solv +instruct +experi +least +procedur +orient +languag +pascal +fortran +survei +languag +advanc +program +techniqu +prereq +advanc +high +school +mathemat +prepar +colleg +work +mathemat +statist +logic +consent +instructor +open +section +taught +entir +fortran +program +languag +intend +primarili +engin +student +comput +scienc +major +section +home +page +pointer +interest +comput +scienc +depart +home +page +univers +wisconsin +madison +wiscinfo +home +page +inform +zealand +hoofer +out +club +nextstep +next +softwar +start +point +internet +explor +lyco +search +world +wide +keyword +copyright +copi +gareth +bestor +bestor +wisc +last +modifi +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..799e9b19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,38 @@ +kevin +beyer +home +pagekevin +beyerbey +wisc +caution +work +graduat +student +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +advisor +raghu +ramakrishnan +area +interest +databas +researchresearch +project +coral +local +cours +inform +project +graduat +cours +undergradu +coursesinstruct +beyer +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..15a7785a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,69 @@ +todd +bezenek +home +page +toddm +bezenek +back +introduc +actual +faster +cpu +_great +microprocessor +past +present_ +uregina +bayko +html +window +express +locomot +squeez +skateboard +size +packag +helen +custer +_insid +window +microsoft +press +current +cours +advanc +oper +system +bart +miller +pithi +pith +consist +abound +pith +take +yeah +point +skew +associ +cach +access +inform +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +offic +phone +home +phone +mail +bezenek +wisc +edubezenek +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..a878ef52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,67 @@ +nathan +bockrath +graduat +student +nathan +bockrath +teach +assist +graduat +student +averag +pictur +nate +jpeg +send +email +click +bockrath +wisc +section +section +held +section +section +grade +quiz +review +viru +info +word +macro +viru +make +page +offic +hour +anywai +mondai +wednesdai +schedul +distribut +system +simul +model +support +free +speech +onlin +info +site +anoth +dai +back +home +pageback +depart +home +pageoth +neat +stuff +condor +project +internet +oraclesend +comment +bockrath +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..fdf48b56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,392 @@ +bolobologreet +bolo +although +parent +christen +josef +thoma +burger +roll +wai +call +mebolo +everyon +includ +parent +quit +sure +bestexplan +develop +mani +wai +question +person +defin +bywhat +person +case +softwar +engin +develop +semi +real +timeoper +system +util +last +year +kernel +hacker +unix +system +administr +creat +thing +engin +engin +appli +scienc +design +construct +thing +realli +call +scientist +though +acomput +scienc +degre +scientist +pai +david +dewitt +scientist +shudder +methodolog +hand +right +time +work +design +architect +implement +newoper +system +type +thing +often +sameto +thing +thing +forth +system +woodwork +home +control +draw +brew +beer +complet +relax +sleep +work +wonder +sleep +enough +although +myroomm +disagre +pursuit +enjoi +fly +read +scienc +fiction +comic +book +railroad +prototyp +model +role +plai +game +notic +imag +sublim +stripe +creatur +thetig +appear +throughout +page +tiger +taken +william +blake +poemtyg +tyger +put +word +wonder +tiger +road +againin +tremend +leap +insan +purchas +ahous +address +place +isjosef +burger +east +gate +roadmonona +voic +number +bore +workwork +work +drive +banana +us +grung +either +matur +job +chang +year +perhapssom +seem +like +parallel +comput +everi +othermonth +beat +intosubmiss +everyth +els +moon +andstar +current +work +follow +project +fordav +dewitt +world +famou +databas +hacker +gamma +parallel +relat +databas +like +queri +interpret +object +store +paradis +geograph +inform +system +implement +shore +shore +object +orient +data +store +wiss +wisconsin +storag +system +whatev +els +need +done +whole +occur +thecomput +scienc +departmentof +themadison +campusof +univers +wisconsin +campu +locat +madison +peninsula +madison +five +lake +bore +workin +addit +work +also +consult +provid +solut +rather +advic +technicalexpertis +help +internet +provid +port +softwar +newsystem +reviv +comput +oddbal +tasksar +kind +thing +tell +tovisit +serverbut +haven +time +anyth +mostlyempti +except +home +page +friend +activitiesuwvaxi +oper +uwvax +usenet +new +uucp +site +free +time +new +that +print +someth +along +line +uwvax +depart +comput +part +usenet +uucp +internet +longer +work +along +line +also +comput +scienc +depart +svolunt +new +master +much +goe +hand +hand +run +uwvax +howev +try +take +care +new +reader +across +differentarchitectur +try +task +much +time +take +care +softwar +organizationsi +member +follow +organ +alwai +agre +oftenhav +good +benefit +member +usersof +commun +aopa +aircraft +owner +pilot +associ +experiment +aircraft +associ +usenix +associ +blitz +drinkingwhen +school +hord +friendsand +visit +local +everi +thursdai +night +place +essen +hau +import +beer +world +slowli +work +entir +select +year +develop +acquaint +mani +becam +part +loftili +labelledblitz +drink +societi +rather +divers +member +drink +meet +year +essen +hau +time +ofoctoberfest +weekend +chud +accumulateda +short +histori +whatnotof +charad +bolo +home +pagelast +modifi +bolo +josef +burger +bolo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..7506b271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,68 @@ +brad +home +page +welcom +brad +thayer +homepag +much +mail +link +link +comput +scienc +home +page +intro +network +home +page +possibl +advanc +oper +system +page +would +foolish +neglect +comput +system +model +page +interest +thec +us +comput +home +page +probabl +bore +check +oper +system +seminaranywai +aim +beaucoup +boir +pepper +badger +packer +pagesom +link +search +altavista +search +enginefind +email +adress +world +wideth +jazz +page +duan +mclaughlin +home +pageuw +athlet +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..829cdf00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,84 @@ +scott +breach +home +pagescott +breach +breach +wisc +addresseseducationresearch +interest +public +recreat +associatesaddressesscott +breachdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usatel +educationph +comput +scienc +univers +wisconsin +madison +comput +engin +carnegi +mellon +univers +advisorguri +sohiresearch +interestscomput +architecturemultiscalarpublicationsmultiscalar +processorsgurindar +sohi +scott +breach +vijaykumarnd +intern +symposium +comput +architectur +anatomi +regist +file +multiscalar +processorscott +breach +vijaykumar +gurindar +sohith +intern +symposium +microarchitectur +effici +detect +pointer +arrai +access +errorstodd +austin +scott +breach +gurindar +sohiconfer +program +languag +design +implement +recreationwingsbeersquidtvassociatestodd +austindoug +burgerbabak +falsafialain +kagit +vijaykumarlast +updat +septemb +scott +breach +breach +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..0570b69a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,91 @@ +bleed +nontrivi +waysher +temporarili +underst +page +unadorn +page +provid +section +pizza +pool +page +brief +hobbi +page +schedul +spring +stinkin +bookmark +suni +albani +fall +poor +unfortun +name +myclass +hypersensit +rockjock +cretin +brood +glare +clenchesfist +crack +knuckl +tragic +flightyfemm +get +razz +asskick +sinc +thirdgrad +perhap +smooth +skin +hardbodi +leatherboi +leer +atm +whenev +call +roll +differ +make +sinc +todayi +giggl +said +name +becam +aprostitut +societi +bigotri +pedagodi +isaac +theblack +goat +refus +stai +claw +hand +sssuuuhhh +mmuuuhhhh +dddduuuuuhhhhh +mmmmuuuhhhh +maaaahhhjaaaaaahhhhh +fffuuuhhhhh +yyyyyyyuuuuuhhhhh +mmmmmuuuuuhhhhhmmmmuuuhhhhh +uuuhhh +uuummmm +uuuhhhh +wwwwwhhhhuuuuuhhhhh +suni +albani +fall +zhang +wouldn +notic +eggleston +smile diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..b4c56342 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,284 @@ +home +page +wisc +assist +professor +comput +sciencedepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usacao +wisc +eduphon +department +offic +educ +research +interest +cours +recent +paper +recent +talk +summari +collect +link +educ +princeton +univers +princeton +univers +tsinghua +univers +beij +china +research +interest +oper +system +high +perform +file +system +memori +resourc +alloc +high +perform +system +parallel +comput +research +project +optim +parallel +prefetch +cachingacf +applic +control +file +cach +prefetch +cours +research +topic +distribut +system +oper +system +fall +advanc +oper +system +spring +trace +simul +file +access +tracesrec +papersintegr +parallel +prefetch +cachingtraci +kimbrel +anna +karlin +felten +princeton +depart +tech +report +novemb +shorter +version +proceed +sigmetr +confer +applic +control +file +cach +prefetch +thesi +also +princeton +depart +tech +report +implement +perform +integr +applic +control +cach +prefetch +disk +schedulingpei +edward +felten +anna +karlin +princetonunivers +appear +toc +studi +integr +prefetch +cach +strategiespei +edward +felten +anna +karlin +princetonunivers +proceed +sigmetr +peform +implement +perform +applic +control +file +cach +edward +felten +princeton +univers +proceed +first +osdi +symposium +slide +present +osdi +applic +control +file +cach +polici +edward +felten +proceed +usenix +summer +technic +confer +tickertaip +parallel +raid +architectur +swee +boon +shivakumar +venkataraman +john +wilk +proceed +isca +recent +talksslid +applic +control +file +cach +prefetch +postscript +page +andpostscript +page +research +summarymi +research +focus +storag +manag +uniprocessor +andparallel +system +particular +investig +techniqu +improvefil +system +perform +applic +specif +replac +polici +filecach +aggress +prefetch +file +data +disk +havedevelop +system +kernel +alloc +physic +page +individualappl +applic +respons +decid +useit +physic +page +cach +prefetch +system +us +fairglob +alloc +polici +kernel +carefulli +integr +cachereplac +prefetch +disk +schedul +prototyp +implementationon +uniprocessor +system +demonstratedthat +good +applic +chosen +replac +strategi +prefetch +informationcan +significantli +improv +perform +mani +applic +current +extend +techniqu +parallel +system +amdevelop +integr +cach +prefetch +algorithm +parallel +diskarrai +addit +investig +global +resourc +managementproblem +oper +system +last +modifi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..ffecdf04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,433 @@ +mike +careymichael +careyprofessor +leav +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +research +staff +member +almaden +research +center +harri +road +jose +phone +primari +altern +mail +carei +almaden +carei +wisc +eduresearch +interestsdatabas +manag +system +parallel +distribut +comput +appli +perform +evalu +research +interest +main +area +databas +system +performanceand +next +gener +databas +system +perform +area +topicsof +current +interest +includ +perform +tradeoff +techniqu +forobject +orient +databas +system +design +evalu +algorithmsrel +transact +process +schedul +complex +multi +userdatabas +workload +base +user +specifi +perform +goal +next +gener +databas +system +area +involv +theexodu +extens +dbm +project +involv +shore +project +aimedat +develop +scalabl +repositori +storag +share +persistentobject +heterogen +environ +goal +shore +effort +whichi +build +upon +experi +exodu +project +meet +objectmanag +need +replac +unix +file +applicationssuch +case +recent +move +academia +industri +twelv +greatyear +part +becom +best +academ +databas +system +researchgroup +known +univers +time +come +tackl +anddiffer +challeng +work +almaden +research +center +thesourc +number +paper +teach +student +forth +past +twelv +year +work +relat +object +databas +signific +fraction +time +spent +rel +projectther +call +garlic +garlic +effort +build +heterogen +multimediainform +system +allow +data +live +varieti +repositori +tobe +queri +manipul +though +resid +homogen +objectdatabas +spent +sabbat +work +garlic +continuedto +work +madison +focus +graduat +student +aqueri +browser +front +tool +call +pesto +work +thegarl +project +locat +almaden +recent +public +extend +oodb +access +design +implement +experi +kiernan +proc +conf +object +orientedprogram +system +languag +applic +oopsla +austin +octob +appear +queri +multimedia +data +multipl +repositori +content +garlic +project +codi +haa +niblack +arya +fagin +flickner +petkov +schwarz +thoma +tork +roth +william +wimmer +proc +ifip +work +confer +visualdatabas +system +lausann +switzerland +march +toward +heterogen +multimedia +inform +system +garlicapproach +haa +schwarz +arya +codi +fagin +flickner +luniewski +niblack +petkov +thoma +william +and +wimmer +proc +ieee +workshop +research +issu +dataengin +ride +taipei +taiwan +march +statu +report +oodbm +benchmark +effort +withd +dewitt +kant +naughton +proc +conf +onobject +orient +program +system +languag +applic +portland +octob +toward +autom +perform +tune +complex +workload +brown +mehta +livni +proc +thint +conf +larg +data +base +santiago +chile +septemb +make +real +data +persist +initi +experi +smrc +withb +reinwald +desslock +lehman +pirahesh +srinivasan +proc +persist +object +system +workshop +tarascon +provenc +franc +septemb +shore +persist +applic +dewitt +franklin +hall +mcauliff +naughton +schuh +solomon +tsatalo +white +zwill +proc +sigmodint +conf +manag +data +minneapoli +fine +grain +share +page +server +oodbm +franklin +andm +zaharioudaki +proc +sigmod +conf +managementof +data +minneapoli +manag +memori +real +time +queri +pang +livni +proc +sigmod +conf +manag +data +minneapoli +accur +model +hybrid +hash +join +algorithm +patel +andm +vernon +proc +sigmetr +conf +measur +modelingof +comput +system +nashvil +index +altern +multivers +lock +bober +proc +conf +extend +databas +technolog +cambridg +england +march +client +server +cach +revisit +franklin +indistribut +object +manag +oszu +dayal +andp +valduriez +morgan +kaufmann +publish diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..97018a2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,42 @@ +chin +chin +tang +home +pagechin +chin +tanggradu +student +univers +wisconsin +depart +west +dayton +streetmadison +wisconsin +offic +bldg +mail +cchin +wisc +edutelephon +current +assign +introduct +data +structur +offic +hour +mondai +tuesdai +fridai +ameduc +biochemistri +univers +wisconsin +madison +biochemistri +univers +wisconsin +madison +cchin +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..9f608b0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,43 @@ +satish +chandra +home +page +satish +chandra +chandra +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +research +research +interest +summari +public +come +soon +real +soon +stuff +wodehous +book +internet +movi +databas +nostalgia +york +time +altavista +italian +languag +cultur +miscellan +linksclick +log diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..3cf0f690 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,127 @@ +trishul +chilimbi +home +page +trishul +chilimbi +chilimbi +wisc +click +real +megradu +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +laru +research +interest +program +languag +compil +architectur +parallel +comput +compil +integr +share +memori +messag +pass +parallel +program +perform +analysi +enhanc +visual +share +memori +parallel +comput +designresearch +project +wisconsin +wind +tunneleduc +univers +wisconsin +madison +tech +indian +institut +technolog +bombai +research +summari +publicationscachi +tool +automat +insert +cico +annot +trishul +chilimbi +jame +laru +intern +confer +parallel +process +icpp +august +stormwatch +tool +visual +memori +system +protocolstrishul +chilimbi +thoma +ball +stephen +eick +jame +laru +supercomput +appear +decemb +award +honor +certif +merit +state +mathemat +olympiadpresid +gold +medal +indian +nation +physic +examinationcertif +merit +state +examin +chemistrycertif +merit +state +examin +electron +miscellan +click +movi +dream +curriculum +vita +last +updat +mail +suggest +page +chilimbi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..4c186fc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,34 @@ +chandrasekaran +sashikanth +home +page +chandrasekaran +sashikanth +csashi +wisc +graduat +studentdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +mark +hill +project +educ +btech +indian +institut +technolog +madra +june +univeristi +wisconsin +depart +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..8e427c21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,145 @@ +curt +ellmann +curt +ellmann +paradis +databas +project +depart +comput +scienc +univers +wisconsin +madison +curt +wisc +item +focu +java +relat +item +paradis +home +page +paradis +develop +page +webgnat +defect +track +paradis +prototyp +index +shore +page +previou +life +doit +eosdi +relat +opengi +consortium +global +posit +system +calmit +univers +nebraska +lincoln +feder +approach +eosdi +databas +relat +object +databas +manag +group +home +page +free +databas +list +standard +home +page +transact +process +perform +council +illustra +white +papersmiscellan +sitescampu +site +wyrm +hoard +comput +scienc +wiscinfo +wiscinfo +gopher +wiscinfo +site +madison +librari +wiscnet +netcorpor +appl +microsoft +research +land +paww +commerc +metrowerk +taligentsearch +savvi +search +webcrawl +open +text +worm +network +inform +site +intern +intern +organ +standard +internet +draft +site +dilbert +world +onlin +winsock +applic +current +weather +map +dienst +dienst +implement +geolog +survei +govern +inform +locat +gil +oakridg +nation +center +comput +scienc +stock +market +datacurt +ellmanncurt +wisc +eduparadis +databas +projectdepart +comput +sciencesunivers +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..18e1ec33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,25 @@ +chee +yong +home +pagechan +chee +yong +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +email +cychan +wisc +offic +phone +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..b65d1c33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,63 @@ +sara +bauman +home +page +sara +dailei +baumandailei +wisc +edugradu +program +mathemat +comput +engin +mace +univers +wisconsin +madison +comput +scienc +depart +engin +mechan +astronaut +depart +nuclear +engin +engin +physic +educ +physic +math +comput +scienc +lewi +clark +colleg +research +work +public +current +schedul +page +link +friend +home +pagessend +mail +offic +address +univers +wisconsin +madison +comput +scienc +statist +west +dayton +street +madison +last +modifi +sara +daileytu diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..9b6e391b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,633 @@ +david +wood +home +page +david +wood +david +wisc +associ +professor +comput +scienceand +electr +comput +engineeringdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usadavid +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +research +interest +comput +architectur +especi +memori +system +design +uniprocessor +multiprocessor +design +implement +program +parallel +comput +oper +system +parallel +comput +perform +evalu +tool +techniqu +especi +memori +system +analysi +vlsi +design +includ +power +design +portabl +comput +research +project +wisconsin +wind +tunnel +memori +system +perform +tool +wart +educ +univers +california +berkelei +univers +california +berkelei +current +graduat +student +babak +falsafi +steve +reinhardt +brian +toonenrec +graduat +student +rahmat +hyder +intel +alvi +lebeck +duke +univers +pfile +microsystem +mark +callaghan +informix +cours +teach +fall +introduct +comput +architecturec +machin +organ +programmingc +introduct +comput +architecturec +advanc +comput +architectur +advanc +comput +architectur +select +recent +paper +decoupl +hardwar +support +distribut +share +memorysteven +reinhardt +robert +pfile +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +coher +network +interfac +fine +grain +communicationshubhendu +mukherje +babak +falsafi +mark +hill +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +synchron +hardwar +network +workstat +perform +costrahmat +hyder +david +wood +ieee +intern +confer +supercomput +dynam +self +invalid +reduc +coher +overhead +share +memori +multiprocessorsalvin +lebeck +anddavid +wood +ieee +intern +symposium +comput +architectur +isca +june +activ +memori +abstract +memori +system +simulationalvin +lebeck +anddavid +wood +sigmetricsmai +accuraci +perform +parallel +simul +interconnect +network +dougla +burger +david +wood +proceed +intern +parallel +process +symposium +april +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +proceed +supercomput +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jame +laru +david +wood +proceed +asplo +tempest +typhoon +user +level +share +memori +steven +reinhardt +jame +laru +david +wood +proceed +symposium +comput +architectur +cach +profil +spec +benchmark +case +studi +alvin +lebeck +anddavid +wood +page +ieee +comput +octob +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +comput +architectur +new +decemb +line +version +revis +frequent +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +research +summari +main +research +goal +develop +cost +effect +computerarchitectur +take +advantag +rapidli +chang +technolog +myresearch +program +major +thrust +evalu +perform +feasibl +correct +architectur +develop +tool +techniqu +facilit +evalu +current +research +focuss +follow +three +area +multi +paradigm +multiprocessor +effici +integr +share +memori +messag +pass +hybridprogram +paradigm +virtual +prototyp +system +exploit +similaritesof +exist +parallel +machin +simul +hypothet +parallel +machin +techniqu +understand +tune +program +perform +recent +result +includ +develop +interfac +calledtempest +user +level +protocol +handler +system +suppliedmechan +tempest +provid +mechan +allow +programm +compil +program +librari +implement +messag +pass +transpar +share +memori +hybrid +combin +tempestmechan +overhead +messag +bulk +data +transfer +virtualmemori +manag +fine +grain +access +control +novelmechan +fine +grain +access +control +allow +user +softwar +tagblock +byte +read +write +read +invalid +theloc +memori +us +transpar +cach +remot +data +explor +altern +wai +support +interfac +first +call +typhoon +propos +hardwareplatform +implement +tempest +mechan +fulli +programm +user +level +processor +network +interfac +revers +translationt +rtlb +invok +network +processor +detect +fine +grainaccess +fault +simul +typhoon +wisconsin +wind +tunnel +found +thata +transpar +share +memori +protocol +run +typhoon +performscompar +anal +hardwar +cach +coher +protocol +five +share +memoryprogram +also +develop +memori +system +simul +method +thatoptim +common +case +cach +hit +significantli +reducingsimul +time +fast +cach +tightli +integr +refer +gener +simul +byprovid +abstract +tag +memori +block +referenceinvok +user +specifi +function +depend +upon +refer +type +andmemori +block +state +simul +control +refer +processedbi +manipul +memori +block +state +specifi +special +null +functionfor +action +case +fast +cach +implement +abstract +usingbinari +rewrit +perform +tabl +lookup +memoryrefer +sparcstat +fast +cach +simul +time +tothre +time +faster +convent +trace +driven +simul +thatcal +procedur +memori +refer +simul +time +onlythre +time +slower +origin +instrument +program +also +investig +us +fast +cach +binari +rewrit +techniquesto +support +tempest +interfac +exist +hardwar +platform +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..f97fe77e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,34 @@ +doug +burger +home +page +doug +burger +home +pageprofession +inform +research +summaryresum +cvtranscriptcours +projectsadvisoraffili +project +galileo +sciwisconsin +wind +tunnelpag +maintain +comput +architectureuw +comput +architecturesimplescalar +tool +setgenericasacmperson +stuff +meus +linksphoto +galleryrid +demonhunt +damn +catsbewar +grad +school diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..551612d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,228 @@ +carl +boor +home +page +last +chang +carl +boor +professor +comput +scienc +mathematicsdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usaoffic +hour +fall +town +schoenberg +work +madison +death +email +deboor +wisc +telephon +schedul +fall +teach +look +former +present +student +select +recent +articl +approxim +theori +written +areavail +anonym +wisc +approx +read +file +provid +access +individu +file +theclick +button +clickabl +version +read +file +small +subset +author +clickabl +click +list +errata +third +edit +cont +boor +elementari +numer +analysi +algorithm +approach +list +errata +variou +print +carl +boor +practic +guid +spline +check +latest +version +thevari +program +driver +latter +book +click +journal +ofapproxim +theori +publish +academ +press +inform +journal +includ +recent +accept +publishedpap +well +email +postal +address +mani +approxim +andmuch +much +ditto +forconstruct +approxim +publish +springer +verlag +ditto +foreast +journal +approxim +search +theirtabl +content +singli +combin +thank +paul +nevai +thishandi +tool +alsoapproxim +amo +slist +homepag +approxim +spline +bibliographi +avail +link +variou +publish +journal +peopl +resourc +ila +inform +center +seek +shall +find +organ +introduct +joi +seeviva_vi +alsoon +screen +tutori +click +great +pictur +hermit +place +also +contain +us +inform +html +thehtml +primermight +even +better +unusu +ever_chang +home +page +david +griffeath +sprimordi +soup +kitchen +variou +interest +inform +seeodd +end +thank +allan +pinku +pinku +techunix +technion +paul +nevaiif +find +us +also +check +paul +nevai +makehi +mathemat +outputavail +cours +check +inform +math +click +inform +numer +analysi +hous +next +door +occupi +taki +souganid +andthaleia +zariphopoul +szego +bust +stand +look +inscript diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..09caa7cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,249 @@ +devis +home +pagedevis +environ +data +explor +visualizationt +content +featuresexamplesin +depthpublicationsrel +workreleasecontactsfeaturesthes +featur +distinguish +devis +visual +environ +visual +queri +interfac +visual +construct +oneset +data +save +appli +input +data +data +larger +memori +effici +handl +map +data +graphic +record +level +cancontrol +color +shape +individu +record +abil +queri +data +record +us +repres +graphic +flexibl +layout +mechan +within +window +help +user +group +data +togeth +comparison +asid +need +link +ax +cursor +help +compar +relationship +differ +viewsof +data +record +base +input +data +direct +input +ascii +file +integ +float +date +string +data +type +examplescheck +follow +exampl +cool +pictur +quick +introduct +devis +tree +validationmolecular +biologi +cell +imag +soil +scienc +birch +clusteringfinanci +data +explorationfamili +medicin +nation +climatedata +centergeograph +inform +systemsoil +sciencefil +serverprogram +tracesclin +scienc +mani +moreexampl +data +viewer +famili +medicin +depthfor +detail +descript +devis +model +visualizationvisu +queri +interfaceperform +issuespublicationsmiron +livni +raghu +ramakrishnan +jussi +myllymaki +visual +explor +larg +dataset +proceed +spie +confer +visual +dataexplor +analysi +januari +michael +cheng +miron +livni +raghu +ramakrishnan +visual +analysi +stream +data +inproceed +spie +confer +visual +data +explor +andanalysi +februari +raghu +ramakrishnan +michael +cheng +miron +livni +praveenseshadri +next +sequencequeri +proceed +intern +confer +themanag +data +comad +decemb +relat +workth +seqproject +complementari +devis +design +queryrecord +base +sequenc +data +output +queri +bevisu +devis +releas +informationw +current +releas +version +devis +executablesfor +solari +platform +dynam +link +need +ld_library_path +environ +variabl +appropri +rundevis +support +architectur +execut +arestat +link +requir +shareabl +librari +time +download +devis +click +contactsfor +inform +research +project +contactmiron +livni +raghu +ramakrishnan +jussi +myllymaki +guangshun +chen +kent +wenger +user +support +hotlin +send +mail +devis +usersupport +hotlin +page +access +time +sinc +octob diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..c5787c80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,300 @@ +home +page +david +dewitt +david +dewitt +professor +romn +fellow +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +dewitt +wisc +univers +michigan +interest +object +orient +databas +system +parallel +databas +system +databasebenchmark +geograph +inform +system +research +summari +main +research +project +shore +paradis +objectiveof +shore +design +implement +evalu +persist +objectsystem +serv +need +wide +varieti +target +applicationsinclud +hardwar +softwar +system +persist +programminglanguag +geograph +inform +system +satellit +data +repositori +multimedia +applic +shore +expand +basic +capabilitiesof +wide +us +exodu +storag +manag +develop +wisconsin +fund +arpa +number +wai +includ +support +typedobject +multipl +program +languag +unix +like +hierarchicalnam +space +name +object +unix +compat +interfaceto +object +text +field +interfac +intend +toeas +transit +applic +unix +file +systemenviron +shore +exist +unix +tool +ccwill +abl +store +data +shore +object +without +modif +basic +unix +file +becom +either +singl +shore +object +orth +text +field +complex +object +shore +target +wide +rang +hardwar +environ +scale +fromindividu +workstat +heterogen +client +server +networksto +larg +multiprocessor +intel +paragon +shore +ajoint +project +prof +carei +naughton +solomon +paradis +project +attempt +appli +technolog +developeda +part +shore +gamma +project +gamma +parallel +relationaldatabas +system +develop +univers +wisconsin +thetask +store +manipul +geograph +data +set +current +mani +geograph +inform +system +relat +databasesystem +hold +data +system +excel +formanag +busi +data +poor +match +modelingne +must +capabl +store +manipulatingmuch +complex +object +polygon +polylin +instead +paradis +emploi +object +orient +data +model +provid +muchbett +match +type +need +anoth +signific +differencefrom +current +system +paradis +emploi +parallelismto +facilit +execut +process +larg +data +set +assatellit +imag +target +hardwar +platform +projecti +cluster +sparc +connect +sampl +recent +public +benchmark +withm +carei +naughton +proceed +sigmod +confer +washington +shore +persistentappl +dewitt +franklin +hall +mcauliff +naughton +chuh +tsatalo +white +zwill +proceed +sigmod +intern +conferenceon +manag +data +minneapoli +client +server +paradis +kabra +patel +proceedingsof +larg +data +base +confer +santiego +chile +august +recent +talk +vldb +invit +talk +object +relat +summit +present +page +automat +creat +januari +email +pub +wisc +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..1dfa8306 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,625 @@ +charl +dyer +home +pagecharl +dyerprofessordepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +usa +mail +dyer +wisc +edutelephon +finger +infoph +univers +maryland +curriculum +vita +area +interest +comput +vision +three +dimension +shape +represent +appear +model +view +synthesi +activ +vision +visualizationgroup +leader +wisconsin +comput +vision +groupprogram +chair +cvpr +research +interest +view +synthesisth +goal +work +develop +basic +tool +controllingin +real +time +either +autonom +interact +virtual +cameraof +real +environ +input +imag +videostream +acquir +fix +mobil +camera +around +site +output +panoram +visual +scene +whicha +virtual +user +control +camera +move +environ +technolog +user +could +interact +navig +througha +real +environ +control +custom +path +view +thesit +predetermin +input +imag +main +researchquest +adapt +combin +basi +imag +synthesizenew +view +scene +without +model +scene +reconstructiona +intermedi +step +recent +develop +innovativetechniqu +callview +morph +take +basisimag +interpol +continu +rang +imag +correspond +view +linear +path +connect +camera +center +visual +explorationcomput +vision +research +recent +start +investig +howto +activ +control +imag +acquisit +process +controllingcamera +paramet +studi +purposefulli +control +theposit +orient +camera +order +dynam +adjustviewpoint +base +appear +three +dimension +scene +theus +real +time +viewpoint +control +behavior +import +forsolv +task +explor +unknown +object +order +findspecif +surfac +mark +build +global +model +unknownshap +recogn +object +coordin +simpl +observ +behavior +chang +appearanceof +surfac +well +defin +simplifi +imag +computationsrequir +make +precis +global +progress +algorithm +andelimin +need +accur +differenti +measur +thecamera +motion +believ +activ +approach +move +towardsviewpoint +close +relat +geometri +viewedobject +import +gener +us +thisapproach +develop +provabl +correct +algorithm +move +asid +view +surfac +revolut +order +recov +shape +reconstruct +global +surfac +unknown +smooth +arbitrarili +shape +object +visualizationin +area +visual +develop +map +techniquescap +gener +displai +possibl +data +object +defin +user +algorithm +without +need +user +defin +type +specificgraph +displai +procedur +capabl +displayingarbitrari +combin +algorithm +data +object +commonfram +refer +coupl +interact +control +algorithmexecut +provid +power +understand +algorithm +behavior +especi +interact +visual +experi +scientif +dataanalysi +algorithm +implement +system +call +forexperi +techniqu +us +visualizingintermedi +final +result +data +analysi +algorithm +forproblem +discrimin +cloud +satellit +imag +recent +public +seitz +dyer +cyclic +motion +analysi +us +period +trace +motion +base +recognit +shah +jain +kluwer +boston +appear +seitz +dyer +view +invari +analysi +cyclic +motion +comput +vision +appear +seitz +dyer +view +morph +proc +siggraph +seitz +dyer +toward +imag +base +scene +represent +us +view +morph +proc +conf +pattern +recognit +track +comput +vision +dyer +shape +recoveri +stationari +surfac +contour +control +observ +motion +advanc +imag +understand +festschrift +azriel +rosenfeld +ieee +comput +societi +press +alamito +kutulako +dyer +global +surfac +reconstruct +purpos +control +observ +motion +artifici +intellig +seitz +dyer +complet +scene +reconstruct +four +point +correspond +proc +conf +comput +vision +seitz +dyer +physic +valid +view +synthesi +imag +interpol +proc +workshop +represent +visual +scene +kutulako +dyer +recov +shape +purpos +viewpoint +adjust +comput +vision +kutulako +seal +dyer +build +global +object +model +purpos +viewpoint +control +proc +base +vision +workshop +kutulako +dyer +lumelski +provabl +strategi +vision +guid +explor +three +dimens +proc +ieee +conf +robot +autom +kutulako +dyer +occlud +contour +detect +us +affin +invari +purpos +viewpoint +control +proc +comput +vision +pattern +recognit +conf +seitz +dyer +affin +invari +detect +period +motion +proc +comput +vision +pattern +recognit +conf +seitz +dyer +detect +irregular +cyclic +motion +proc +workshop +motion +rigid +articul +object +hibbard +paul +battaiola +santek +voidrot +martinez +dyer +interact +visual +earth +space +scienc +comput +comput +juli +hibbard +dyer +paul +lattic +model +data +displai +proc +visual +recent +public +includ +abstract +wisconsin +comput +vision +groupcours +taught +introduct +artifici +intellig +spring +fall +comput +vision +fall +spring +current +student +gareth +bestor +brian +morgan +steve +seitz +liangyin +yuph +graduat +bill +hibbard +whibbard +macc +wisc +visual +scientif +comput +system +base +onlattic +structur +data +displai +model +kiriako +kutulako +kyro +rochest +explor +three +dimension +object +control +point +ofobserv +mark +allmen +allmen +iutech +imag +sequenc +descript +us +spatiotempor +flow +curv +toward +motion +base +recognit +brent +seal +seal +appear +model +three +dimensionalshap +machin +vision +graphic +harri +plantinga +wheaton +continu +viewer +center +object +representationfor +comput +vision +charl +stewart +stewart +connectionist +model +stereo +vision +bradlei +kjell +kjell +ccsua +ctstateu +orient +edg +separ +textur +measureslink +interestmi +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..55e5c771 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,24 @@ +welcom +friend +welcom +machinew +appreci +patienc +long +arduou +task +bring +better +home +page +check +educ +curriculum +vitaecheck +class +teach +home +page +section +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..dbe649ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,212 @@ +eric +hazen +home +pageer +hazennon +professorroom +comput +scienc +west +dayton +madison +current +work +fornet +scout +servic +project +intern +wait +wait +part +registr +servic +couldn +help +domain +name +problem +even +want +intern +neat +thing +monei +scout +thing +locat +comput +scienc +depart +univers +ofwisconsin +madison +see +could +never +explain +scout +elegantli +fearless +leader +susan +calcari +check +scout +page +offici +explan +design +mainten +site +email +list +scout +report +also +assist +pete +devri +withtech +support +mac +unix +machin +come +scout +spent +half +year +laboratori +molecular +biologi +time +full +time +student +well +molecular +biologi +call +technic +support +assist +digit +video +specialist +meant +around +fix +peopl +broken +mice +answer +email +question +time +lucki +make +cool +video +interest +us +digit +video +instruct +purpos +check +multimedia +servic +page +graduat +univers +wisconsin +philosophi +program +make +philosoph +real +commod +capitalist +societi +also +complet +requir +curriculum +make +shameless +pragmatist +comput +scienc +depart +email +talk +sinc +good +metaphys +discuss +link +serv +practic +purpos +makethi +page +look +standard +resum +date +multimedia +page +molecular +biologi +explain +made +anim +shown +world +among +drosophila +geneticist +told +wonder +girlfriend +page +salon +magazin +entertain +inform +ezin +creat +complet +mac +lauri +anderson +green +room +shockwav +check +kudon +page +link +know +wit +quicktimevr +documentari +plight +bosnia +uproot +popul +billi +holidai +homepag +nation +secur +archiv +check +nixon +preslei +meetingsejhazen +facstaff +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..ddb85c34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,16 @@ +tina +eliassi +home +pagetina +eliassi +univers +illinoi +urbana +champaign +univers +wisconsin +madison +offic +bldgphone +eliassi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..aa4d4a9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,118 @@ +eric +home +page +passsth +anoth +cold +budweisth +address +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +depart +electr +comput +engin +univers +wisconsin +madison +johnson +drive +madison +offic +phone +mail +ericro +wisc +research +area +comput +architectur +advisor +professor +smithresearch +topic +kestrel +multiscalar +project +instruct +level +parallel +high +bandwidth +instruct +fetch +mechan +branch +predict +confid +branch +mispredict +tolerancepubl +trace +cach +latenc +approach +high +bandwidth +instruct +fetch +eric +rotenberg +steve +bennett +jame +smith +appear +proceed +annual +intern +symposium +microarchitectur +decemb +assign +confid +condit +branch +predict +erik +jacobsen +eric +rotenberg +jame +smith +appear +proceed +annual +intern +symposium +microarchitectur +decemb +trace +cach +latenc +approach +high +bandwidth +instruct +fetch +eric +rotenberg +steve +bennett +jame +smith +univers +wisconsin +madison +technic +report +april +resum diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..ab1194a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,95 @@ +babak +falsafi +home +page +babak +falsafi +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usatel +email +falsafi +wisc +work +peopl +mentorcultresearch +interest +comput +architectur +perform +evalu +measur +comput +system +parallel +program +modelseduc +comput +scienc +univers +wisconsin +madison +decemb +comput +scienc +suni +buffalo +june +electr +comput +engin +suni +buffalo +june +miscellan +public +would +rather +drink +would +rather +would +rather +read +would +rather +listen +us +high +school +idea +like +fail +morf +shubu +dionosi +hillari +profan +phone +convers +check +american +french +queen +comput +architect +look +like +hack +partner +crime +next +gener +parallel +comput +last +updat +babak +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..2e138d05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,150 @@ +home +page +michael +ferri +michael +ferri +associ +professor +comput +scienc +industri +engineeringand +member +center +mathemat +scienc +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +telephon +email +ferri +wisc +univers +cambridg +interest +theori +algorithm +applic +mathemat +program +research +summari +look +robust +method +solv +larg +scale +variationalinequ +nonlinear +program +problem +applic +toproblem +econom +engin +pivot +path +followingtechniqu +investig +base +success +linear +emphasi +numer +properti +larg +scale +problem +andinterfac +model +languag +particular +applic +beingconsid +includ +econom +equilibria +effect +taxat +oncarbon +emiss +traffic +congest +effect +toll +structur +optim +contact +problem +chemic +process +design +consid +parallel +architectur +solvingproblem +nonlinear +optim +graph +partitioningtechniqu +determin +underli +structur +investig +tool +gener +purpos +parallel +optim +techniqu +forexploit +parallel +machin +directli +within +model +system +arealso +consider +prototyp +us +condor +system +extens +complementar +framework +also +beinginvestig +emphasi +identifi +exploit +underlyingmodel +structur +public +complet +list +paper +mostli +electron +avail +relev +link +cpnet +complementar +problem +mathemat +prgram +home +pagec +page +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..2fc1ff40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,335 @@ +david +finton +home +page +david +finton +finton +wisc +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +welcom +page +grad +student +research +nerdin +artifici +intelligenceher +univers +wisconsin +madison +grew +grand +rapid +michigan +late +show +home +offic +earn +degre +math +michigan +state +master +comput +scienceher +dissert +institut +take +littl +year +develop +traffic +measur +softwarefor +first +thesi +advisor +left +wisconsin +trusti +nextstationor +librari +enjoyplai +trumpet +piano +listen +longhair +music +plai +volleybal +intervarsityfolk +contribut +supersoak +arm +race +comment +page +feel +free +comment +form +send +mail +finger +accountto +current +plan +whether +system +gain +employ +introduct +artifici +intellig +current +project +comput +smart +understand +make +machin +intelligenti +goal +artifici +intellig +essenc +intelligencei +abil +learn +adapt +learn +actappropri +order +reach +goal +reinforc +learn +treat +problem +gener +case +system +output +control +action +chang +environ +input +sens +environ +also +input +reinforc +weak +kind +feedback +express +posit +neg +number +instead +teacher +present +thesystem +input +output +pair +system +instead +receiv +thumb +thumb +irregular +interv +work +focuss +need +todistinguish +good +action +on +direct +process +build +agood +represent +environ +term +relev +orimport +featur +note +import +basedfeatur +extract +current +appli +notion +import +problem +learn +balanc +need +explor +world +need +perform +optim +explor +exploit +also +investig +wai +us +import +make +learningprocess +effici +allow +system +specifi +start +point +learn +experi +activ +learn +goal +develop +better +understand +intelligentadapt +hope +provid +basi +intellig +action +whichwil +also +benefit +knowledg +base +task +base +work +realli +date +sorri +reinforc +learn +pagefor +inform +hotlistthi +browser +independ +hotlist +keep +copi +access +browser +platform +combin +actual +bookmark +file +omniweb +eleg +function +browser +netscap +opinion +omniwebi +current +avail +nextstep +avail +foral +openstep +variant +openstep +releas +editori +page +responseto +jehovah +wit +deiti +christwisconsin +site +intervars +graduat +fellowship +univers +wisconsin +madison +check +weatherin +citi +madison +wisc +star +trek +page +program +inform +link +page +isthmu +daili +pagesom +favorit +place +visit +nebula +nasa +pictur +world +wide +studi +bibl +crosssearch +minor +glenn +gould +homepag +farsid +daili +star +trek +star +trek +voyagerent +dilbert +zoneroam +world +virtual +tourist +stereogram +tell +head +blow +true +next +head +show +think +bill +gate +word +sponsor +last +modifi +octob +finton +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..b06df88e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,412 @@ +home +page +charl +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspcharl +fischer +nbsp +nbsp +nbsp +nbsp +nbsp +nbspprofessor +comput +scienc +nbsp +nbsp +nbsp +nbsp +nbsp +nbspunivers +wisconsin +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +messag +email +fischer +wisc +teach +semest +teachingc +graduat +cours +compil +spring +teachingc +undergradu +cours +compil +research +research +interest +focu +compil +design +implement +recent +interest +best +exploit +enormouscap +provid +modern +comput +architectur +issu +student +investig +includ +code +schedul +import +avoid +unnecessari +pipelin +delai +haveinvestig +issu +optim +schedul +arithmet +express +domin +model +global +procedur +level +regist +alloc +graph +color +best +model +mayb +model +explicitli +quantifi +likelihood +benefit +registerresid +attract +care +regist +alloc +vital +gener +code +unnecessari +loadsand +store +must +avoid +ultim +regist +alloc +interprocedur +regist +alloc +theprocedur +program +analyz +studiedinterprocedur +alloc +modelsthat +optimallyalloc +regist +among +procedur +polynomi +time +approach +seem +effect +practic +anyon +code +know +easi +make +mistak +error +involv +pointer +arrai +indic +especi +common +studi +wai +toautomat +check +pointer +arrai +oper +time +share +memori +multiprocessor +workstat +us +processor +program +anoth +monitor +program +possibl +routin +check +program +execut +littl +orno +appar +slowdown +recent +public +minimum +cost +interprocedur +regist +alloc +steven +kurland +charl +fischer +popl +sigplan +sigact +symposium +principl +programminglanguag +januari +cost +concurr +check +pointer +arrai +access +program +harish +patil +charl +fischer +appear +insoftwar +practic +experi +demand +driven +regist +alloc +todd +proebst +charl +fischer +appear +inacm +transact +program +languag +system +effici +instruct +schedul +delai +load +architectur +steven +kurland +todd +proebst +charl +fischer +transact +program +languag +system +zero +cost +rang +split +steven +kurland +charl +fischer +sigplan +confer +program +languag +design +implement +june +activitiesa +complet +revis +second +edit +craft +compil +author +cytronand +richard +leblanc +almost +complet +publish +benjamin +cum +look +soon +better +bookstor +everywher +short +commun +editor +transact +program +languag +system +topla +educationph +cornel +univers +pars +context +free +languag +parallel +environ +supervis +john +william +studentsdonn +milton +syntact +specif +analysi +attribut +grammar +juli +bruce +rowland +semant +attribut +evalu +syntact +analysi +juli +stephen +skedzielewski +definit +attribut +reevalu +attribut +grammar +septemb +bernard +dion +local +least +cost +error +corrector +context +free +context +sensitivepars +decemb +mahadevan +ganapathi +retarget +code +gener +optim +us +attribut +grammar +novemb +vimal +begwami +approach +attribut +evalu +error +correct +compil +august +maunei +least +cost +syntact +error +correct +us +extend +right +context +januari +gregori +johnson +context +sensit +attribut +flow +august +anil +gener +execut +facil +integr +program +environ +decemb +william +winsborough +automat +transpar +parallel +logic +program +compil +time +august +venkatesh +framework +specif +implement +program +analysi +algorithm +august +todd +proebst +code +gener +techniqu +august +steve +kurland +approach +interprocedur +regist +alloc +januari +harish +patil +effici +program +monitor +techniqu +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..8c2adf0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,547 @@ +galileo +home +page +galileo +project +wisconsint +contentsgalileoproject +descriptionpublicationsrel +projectssci +wisconsinproject +descriptionpublicationsproject +membersgalileo +wisconsingalileo +project +conduct +comput +architectur +groupat +univers +wisconsin +madison +project +focus +medium +long +term +evolut +processor +system +architectur +emphasison +memori +system +specif +studi +therelationship +processor +main +memori +futuresystem +complet +separ +todai +integr +extent +process +capabl +storag +merg +least +wai +increas +chip +penalti +issuabl +instruct +orlimit +chip +bandwidth +design +place +capacityon +processor +chip +modul +eventu +sizabl +fractionof +main +memori +resid +chip +repres +arrow +label +mopin +diagram +differ +possibl +migrat +ofprocessor +capabl +onto +dram +chip +eventuallyobvi +central +processor +iram +arrow +specif +research +current +focus +follow +area +examin +perform +impact +larg +memori +latenc +andlimit +memori +bandwidth +current +futur +microprocessor +base +systemsperform +model +variou +design +point +along +theprocessor +memori +integr +spectrumcach +hierarchi +design +integr +systemsdesign +main +memori +bank +integr +systemprogram +execut +system +multipl +integr +chip +datascalar +architectur +galileo +specif +public +exploit +optic +interconnect +elimin +serial +bottlenecksdoug +burger +jame +goodman +appear +intern +confer +massiv +parallel +process +us +optic +interconnect +octob +datascalar +architectur +spsd +execut +modeldoug +burger +stefano +kaxira +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +quantifi +memori +bandwidth +limit +current +futur +microprocessorsdoug +burger +jame +goodman +alain +appear +intern +symposium +comput +architectur +declin +effect +dynam +cach +gener +purpos +microprocessorsdougla +burger +jame +goodman +alain +univers +wisconsin +madison +comput +scienc +depart +technic +report +januari +relat +project +iram +berkeleyppram +kyushu +univeristi +japansci +wisconsinour +group +also +close +involv +research +relat +cach +coherentshar +memori +multiprocessor +design +specif +studi +scalabl +coherentinterfac +coher +transport +layer +us +ieee +standard +base +platform +explor +idea +standard +specifi +link +list +base +hardwar +coher +protocol +includ +support +effici +synchron +primit +queue +lock +qolb +aswel +optim +differ +share +pattern +pairwis +share +fresh +read +share +standard +also +includ +definitionfor +extrem +high +bandwidth +latenc +transport +layer +betweenprocess +element +individu +cluster +current +perform +relat +research +follow +topic +extend +logarithm +grow +share +structureseffici +hardwar +synchron +share +memori +multiprocessorsa +scalabl +studi +base +protocol +includ +standard +extensionsaggress +consist +model +share +memori +multiprocessorswisconsin +public +mechan +minim +synchron +overhead +share +memori +applic +appear +best +architectur +paper +proceed +intern +confer +supercomput +juli +also +univers +wisconsin +madison +comput +scienc +depart +simul +transport +layer +wisconsin +wind +tunneldougla +burger +jame +goodman +proceed +second +intern +workshop +base +high +perform +cost +comput +march +also +univers +wisconsin +madison +comput +scienc +depart +technic +report +march +hierarch +extens +scijam +goodman +stefano +kaxira +univers +wisconsin +madison +comput +scienc +depart +technic +report +juli +extend +scalabl +coher +interfac +larg +scale +share +memoryross +evan +johnson +univers +wisconsin +madison +comput +scienc +depart +technic +report +februari +hardwar +support +synchron +scalabl +coher +interfac +nagi +aboulenein +stein +gjess +jame +goodman +philip +woest +univers +wisconsin +madison +comput +scienc +depart +technic +report +novemb +interconnect +topolog +point +point +ringsross +johnson +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +decemb +analysi +ringsteven +scott +jame +goodman +mari +vernon +univers +wisconsin +madison +comput +scienc +depart +technic +report +novemb +lower +bound +latenc +scalabl +link +list +cach +coherenceross +johnson +univers +wisconsin +madison +comput +scienc +depart +technic +report +june +analysi +synchron +mechan +share +memori +multiprocessorsphilip +woest +jame +goodman +univers +wisconsin +madison +comput +scienc +depart +technic +report +februari +effici +synchron +primit +larg +scale +share +memori +multiprocessorjam +goodman +mari +vernon +philip +woest +proceed +third +intern +confer +architectur +support +program +languag +oper +system +april +also +univers +wisconsin +madison +comput +scienc +depart +technic +report +project +particip +faculti +goodman +graduat +student +doug +burger +alain +stefano +kaxira +project +alumni +nagi +abouleneinross +johnsonstev +scottlast +modifi +doug +burger +dburger +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..156c7118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,38 @@ +andrew +geeri +home +page +andrew +geeri +geeri +wisc +west +dayton +street +madison +regent +madison +madisonin +comput +scienc +current +work +compsci +grade +schedul +pontif +peopl +interest +jacqu +derrida +post +structur +martin +heidegg +albert +camu +jean +paul +sartr +friedrich +nietzsch diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..0cacf901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,167 @@ +gideon +glass +homepag +continu +tweak +homepag +thank +stop +collect +imag +find +deposit +directori +sampl +sampl +pictur +gui +claim +cooler +accord +toonion +seethi +movi +usual +suspect +walk +nearest +blockbust +note +recent +move +none +roommat +dutch +cheap +either +given +predica +dismal +prospect +improv +withno +outsid +influenc +pleas +consid +make +small +donationto +help +defrai +cost +purchas +check +monei +orderscan +sent +follow +address +pleas +send +cash +gideon +glass +monro +floor +madison +usathank +support +grad +student +sometim +find +read +paper +eventhough +shelf +feet +unread +book +wait +read +anyhow +look +someth +christian +achil +huge +index +might +also +unifi +cstechreport +index +class +project +report +otherstuff +avail +follow +program +machin +load +averagewil +grow +fast +main +fork +doofu +actual +time +share +machin +back +calvin +great +time +killer +zippi +pinheadha +reload +sever +time +justtri +last +fall +kill +time +tweak +netscap +noth +think +work +mozilla +higher +well +dabbl +object +orient +programmingin +mostli +exercis +suppos +netscap +buttonher +thing +right +suffic +case +told +somethingin +bookmark +denni +ritchi +creator +unix +wrote +anti +forward +unix +hater +handbook +send +mailand +mayb +mayb +pleas diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..1d83c8bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,608 @@ +andi +krazi +glew +univers +wisconsin +home +pageandi +krazi +glew +univers +wisconsin +home +pagethi +andi +first +wisconsin +home +page +largelyform +snippet +intel +home +page +stylishor +organ +summarycontact +touch +ship +thing +addressescalendar +arrang +meet +filesystem +access +filesystem +glew +public +html +http +wisc +glew +approach +anyon +read +file +unless +specif +arrang +open +default +scribe +minut +taker +sohi +research +group +weekli +meet +research +interest +rather +gener +form +adapt +applic +dynam +versu +static +comput +architectur +wannab +author +comput +architectur +high +level +edit +thought +higher +educ +suggest +effect +work +patent +claim +fame +miscellan +info +stuff +self +imag +configur +manag +resum +favorit +quot +sai +self +imag +beef +montreal +peopl +care +pope +priest +parson +king +william +boyn +want +coin +trust +summer +time +keep +harm +see +first +frost +snow +poor +trust +almighti +dollar +good +mapl +wood +keep +bellow +warm +church +chapel +ranter +preacher +beecher +stuff +montreal +alreadi +enough +beef +keeper +montreal +harbour +deplor +churchmen +notori +atheist +less +well +known +chariti +strand +sailor +knew +could +alwai +beef +exchang +chop +chord +firewood +meal +warm +place +sleep +print +manifesto +handbil +advertis +comput +architect +hackeralthough +aspir +comput +architect +someth +formerlyhad +fake +motorola +busi +card +ever +sinc +start +work +comput +first +thing +learn +assembl +redesign +chip +start +unix +kernel +hacker +gould +real +time +unix +andstil +think +wistfulli +hacker +beard +frequent +wear +suspend +bald +comput +architectureonc +futur +involv +microarchitectur +intel +pentium +adopt +intel +resum +wannab +author +comput +architecturei +constantli +verg +write +book +entitl +comput +architectur +grabbag +trick +techniqu +sort +antidot +hennessi +patterson +cannot +afford +diskspac +internet +servic +provideror +charg +alwai +connect +system +suggest +appreci +interest +piec +comput +architectureon +best +wai +learn +comput +architectur +read +textbook +datasheet +instruct +refer +miscellan +info +wander +netscapebookmarksstockscod +standardsroi +wilkinson +code +standardsi +disagre +mani +configur +manag +although +perhap +quit +first +real +quickli +defunct +startup +compani +call +enfopris +build +busi +workstat +first +chang +assign +unix +driver +writingto +system +integr +longstand +love +hate +relationship +configurationmanag +tool +like +scc +first +publish +paper +box +link +parallel +tree +element +configur +manag +system +first +usenix +workshop +softwar +manag +describ +central +databas +multipl +view +hardlink +clone +save +space +time +us +gould +comput +system +divis +unix +team +brian +berlin +deprec +approach +paper +mainli +advoc +optimist +concurr +control +approach +wherea +thought +advoc +lock +actual +advoc +optimist +concurr +control +also +advoc +lock +case +optimist +version +get +livelock +usual +insist +singl +identifi +serial +schedul +sourc +code +checkinsso +test +proce +linear +manner +requir +programm +test +code +work +system +previou +fix +appli +although +recogn +even +requir +relax +often +strip +version +approachin +thing +like +apolog +never +creat +truli +portabl +tool +accomplish +us +similar +approach +although +mike +fetterman +mark +aitken +deserv +credit +enhanc +sever +featur +went +notabl +version +number +becam +overal +suffici +everyth +includ +cshrc +login +even +page +calendar +wisconsinhow +arrang +meet +sinc +seem +ubiquit +calendar +schedul +programat +univers +wisconsin +depart +variou +peopl +cmtool +public +domain +ical +plan +critic +mass +anyof +calendar +isol +us +pilot +associ +softwar +least +mean +meet +get +pilot +meet +commit +least +commit +possibl +also +record +meet +us +voic +organ +therefor +arrang +meet +must +touch +prefer +email +possibl +phone +person +manuallyadd +meet +calendar +creat +download +calendar +pilot +softwareto +page +creat +download +calendar +pilot +softwareto +microsoft +watch +intelat +intel +devout +user +group +schedul +program +last +time +intel +synchron +unix +also +ontim +past +weak +disconnect +oper +email +oper +least +allow +major +peopl +schedul +meetingswith +without +manual +intervent +intel +schedul +algorithm +access +andi +calendar +us +synchron +also +tell +andi +meet +email +person +reserveth +right +meet +blindli +invit +cannot +synchron +urgent +make +meet +andi +admin +teresa +lock +synchron +possibl +check +andi +calendar +page +proposeif +meet +urgent +week +futur +avoid +bother +teresa +send +andi +email +realiz +andi +miss +meet +sent +email +enough +advanc +overallschedul +calendar +like +topic +someth +fascin +bring +effici +advantag +person +secretariesand +aid +camp +comput +user +header +wisc +glew +public +html +glew +html +glew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..9c96b883 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,28 @@ +goodman +home +page +jame +goodman +goodman +wisc +professor +comput +sciencesdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaresearch +interest +lot +good +stuff +current +project +galileo +wisconsin +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..81e25fd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,540 @@ +greg +sharp +home +pagegreg +sharp +home +pagenam +greg +sharpemail +greg +wisc +eduoffic +offic +phone +offic +hour +appt +tgif +everi +fridai +dept +section +instructor +fall +lectur +note +spring +lectur +notesclass +fall +topic +databas +manag +system +find +aboutsearch +engin +altavista +dejanew +excit +infoseek +lyco +metacrawl +yahoo +usenet +mirror +html +format +ohioc +program +learn +todai +librari +draft +standard +april +mirror +stanford +cygnu +info +refer +mumit +newbi +guideplatform +independ +librari +portabl +develop +kit +amulet +dclap +string +attach +requir +motif +suit +string +attach +wxwindow +yacl +class +spring +introduct +comput +geometri +comput +architectur +comput +architectur +project +machin +learn +machin +learn +projectclass +fall +numer +linear +algebra +introduct +theoret +comput +scienc +comput +vision +imag +homework +comput +vision +projectmisc +freewar +sharewar +info +cygwin +softwar +internet +directori +gimp +harmonai +harmoni +unix +client +hyper +hyper +browser +imag +databas +vasc +imag +databas +altern +link +video +pic +vision +research +rang +imag +databas +shape +shade +pic +rang +imag +databas +shape +shade +pic +give +link +imag +databas +imag +databas +pretti +cool +idea +specifi +section +night +imag +japan +imag +databas +jaida +year +worth +atmospher +data +imag +multiresolut +seamless +imag +databas +cool +click +zoom +resolut +view +solar +system +nice +pic +moon +planet +comet +meteor +wyom +imag +databas +version +pic +wyom +also +lot +misc +stuff +like +pic +planet +overhead +shot +antarctica +satelit +imag +catalogu +niae +satelit +pic +japan +gothic +imag +databas +electr +postcard +card +rack +nice +select +thank +link +todd +vistex +textur +databaseartifici +gener +imag +primoridi +soup +kitchen +math +depart +awesom +site +medic +imag +databas +line +imag +histori +medicin +document +pictur +diagon +line +dermatolog +onlin +imag +atla +base +erlang +imag +databas +germani +mirror +japan +pretti +cool +idea +enter +diagnosi +back +imag +orthopaed +imag +databas +nice +databas +ecvnet +imag +data +base +list +link +optic +charact +recognit +handwrit +recognit +home +page +nici +handwrit +recognit +groupimag +process +home +page +imag +process +home +page +washington +state +univers +imag +librari +softwar +comput +graphic +softwar +raytrac +home +page +rayshad +utah +raster +toolkit +radianc +tracer +radios +packag +avalon +archiv +object +stuff +tracer +mirror +grimstead +massiv +list +trace +dsite +refer +site +comput +graphic +hardwar +graphic +board +intergraph +lockhe +glint +chipset +nvidia +chipsetcomput +geometeri +geometri +center +applic +challeng +comput +geometrylispuseless +pagescomput +architectur +comput +architectur +home +page +hennessi +patterson +resourc +text +superdlx +parallel +comput +simul +parl +mexico +state +univ +includ +databas +trace +architectur +link +univ +washingt +architectur +group +index +simul +georgia +tech +architectur +groupjapanes +comput +guid +japanes +comput +unvers +washington +monash +archiv +index +japanes +comput +stuff +infowav +edict +window +english +japanes +japanes +english +dictionari +shodouka +asiasoftinform +retrev +peregrin +robot +travers +index +written +perl +trec +text +retriev +home +page +infomin +manag +gigabyt +freewar +search +engin +text +imag +textual +imag +provid +info +internet +experi +thoma +thoma +feedback +linguist +util +repositori +inform +retriev +tool +survei +natur +languag +process +inform +retriev +nist +comput +languag +index +softwar +other_sw +info_retriev +world +wide +robot +wander +spider +jedi +project +might +strictli +relat +hartlib +paper +project +latin +stemmer +inform +retriev +multimedia +retriev +group +academ +group +inform +retriev +system +relat +cours +inform +storag +retriev +relat +cours +includ +histori +inform +retriev +relat +cours +new +pointcast +check +custom +portfolio +automat +updat +tool +literatur +mark +twainhumor +apolog +citizen +offens +link +site +threw +link +garbag +belong +invest +investorweb +networth +fundscap +brill +editori +servic +stockmastermutu +fund +brokerag +hous +fidel +invest +vanguard +row +price +jack +white +compani +schwab +charl +schwab +gabelli +fund +mutualsmisc +page +psnuplast +modifi +greg +sharpgreg +wisc +http +wisc +greg +greg +html diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..64572acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar +gopalsridhar +gopalgsri +wisc +edubon +marrow +pageresumest +wisconsin +pagecalvin +hobbesbookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..c6cf7e66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,82 @@ +guangshun +home +page +welcom +guangshun +home +page +graduat +student +depart +comput +scienc +dayton +univers +wisconsin +madison +madison +phone +home +offic +educ +univers +wisconsin +madison +california +state +univers +angel +peke +univers +physic +depart +grade +research +interest +databas +manag +system +advis +raghu +ramakrishnan +miron +livni +project +data +analysi +project +famili +medicin +devis +data +explor +visual +environ +class +interest +link +stuff +comput +relat +career +plan +chines +relat +miscellani +send +email +send +email +around +weather +forecast +madison +visitor +sinc +june +visitor +number +sinc +visitor +number +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..e4bd21a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,53 @@ +guhan +viswanathan +home +page +guhan +viswanathan +gviswana +wisc +graduat +studentdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaadvisor +laru +thesi +research +thesi +focus +design +implement +data +parallellanguag +involv +design +data +parallellanguag +base +develop +local +implement +compil +target +investig +data +parallelappl +execut +effici +hand +code +parallelprogram +amor +detail +research +summari +list +public +us +link diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..b9a209ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,97 @@ +harit +home +page +harit +graduat +student +univers +wisconsin +madison +depart +comput +scienc +would +like +list +classmat +cours +take +fall +databas +manag +system +prof +raghu +ramakrishnan +advanc +comput +architectur +prof +mark +hill +cours +take +spring +advanc +comput +architectur +prof +jame +goodman +undergradu +student +world +famou +mvsr +engin +colleg +osmania +univers +hyderabad +india +meet +draw +line +thing +interest +indian +newspap +stuff +sport +sastri +link +roommat +home +page +saeed +mirza +murthi +link +zubber +dust +photo +photograph +univers +warn +click +year +folk +page +access +time +sinc +sept +counter +courtesi +counter +page +access +time +sinc +sept +electron +mail +mail +address +harit +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..7eca51cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,55 @@ +rebecca +hasti +home +page +rebecca +hasti +graduat +student +research +assistantcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +hasti +wisc +edutelephon +telephon +dept +first +java +applet +click +fall +schedul +engr +java +noland +seminar +comput +scienc +univers +wisconsin +madison +mathemat +univers +wisconsin +madison +mathemat +carleton +colleg +interest +program +languag +basketbal +volleybal +softbal +linkag +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..fc101a2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,140 @@ +home +page +chad +lane +academ +person +info +neat +stuff +dept +comput +scienc +univers +wisconsin +madison +west +dayton +madison +dept +phone +mail +offic +address +offic +hour +offic +phone +home +phone +wisc +welcomethank +stop +page +hope +enjoi +bestbet +link +stuff +section +biggest +new +life +right +get +marri +onmai +nichol +final +want +tell +good +luck +count +academ +fall +cours +inform +retriev +technolog +seek +inform +databas +manag +system +ling +audit +advanc +semant +research +interest +comput +linguist +discours +process +us +advic +research +barwis +epigram +program +alan +perli +educ +mathemat +comput +scienc +minor +philosophi +laud +truman +state +univers +formerli +northeast +missouri +state +univers +comput +scienc +expect +univers +wisconsin +madison +person +inform +stand +neat +stuff +accord +truli +click +imag +cyber +poop +creation +unabash +brother +bart +arthur +lane +download +claud +claud +psychot +program +talk +rais +plant +internet +deep +thought +jack +handi +reload +differ +on +last +modifi +chad +lane diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..dad5c60c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,196 @@ +susan +hert +home +page +susan +hert +hert +wisc +research +assist +depart +comput +scienc +univers +wisconsin +madison +dayton +madison +telephon +curriculum +vita +postscript +research +interest +public +softwar +interest +link +research +interest +appli +experiment +comput +geometri +analysi +algorithm +design +motion +plan +algorithm +comput +graphic +geometr +algorithm +advisor +vladimir +lumelski +current +work +robot +develop +motion +plan +alogirthm +multipl +robot +common +environ +select +public +susan +hert +vladimir +lumelski +deform +curv +plane +tether +robot +motion +plan +extend +abstract +paper +appear +proceed +canadian +confer +comput +geometri +august +susan +hert +vladimir +lumelski +planar +curv +rout +tether +robot +motion +plan +appear +intern +journal +comput +geometri +applic +susan +hert +vladimir +lumelski +ti +bind +motion +plan +multipl +tether +robot +robot +autonom +system +version +paper +publish +proc +ieee +intern +confer +robot +autom +susan +hert +sanjai +tiwari +vladimir +lumelski +terrain +cover +algorithm +appear +journal +autonom +robot +special +issu +autonom +underwat +robot +susan +hert +vladimir +lumelski +move +multipl +tether +robot +arbitrari +configur +proc +intern +confer +intellig +robot +system +august +susan +hert +reznik +simul +librari +basi +anim +program +version +technic +report +univers +wisconsin +madison +robot +laboratori +juli +interest +link +comput +geometri +page +comput +scienc +educ +link +book +refer +shelf +librari +congress +line +book +page +travel +samantha +cook +epicuri +veggi +unit diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..00be66d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,64 @@ +kirk +hogenson +kirk +hogenson +graduat +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +wisc +edutelephon +kirk +hogenson +graduat +student +madison +madison +wisconsin +depart +section +offic +hour +tue +also +look +myschedul +none +offic +hour +workout +mail +tryto +appoint +time +finger +send +mail +visit +ghana +countri +serv +peac +corp +usernam +check +pnhp +student +group +page +maintain +wife +eilun +experi +counter +sai +accessedtim +sinc +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..b1e5c1b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,15 @@ +jeffrei +horn +jeffrei +horn +swanton +road +madison +wisconsin +phone +email +horn +wisc +wise +linear +familyemploymenteducationresearchgenealog diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..456fb2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,244 @@ +susan +horwitzsusan +horwitzprofessorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usa +mail +horwitz +wisc +telephon +secretari +depart +cornel +univers +research +interest +languag +base +program +environmentsprogram +slice +differenc +mergingstat +analysi +programsinterprocedur +dataflow +analysisresearch +summarymi +work +mainli +involv +design +implementationof +languag +base +program +tool +help +programm +problem +like +understand +exist +program +work +would +affectedbi +propos +modif +understand +textual +structur +semant +differ +betweentwo +version +program +retest +program +chang +combin +piec +program +produc +program +certainsemant +guarante +work +involv +program +represent +call +theprogram +depend +graph +oper +call +slice +also +work +algorithm +precis +interproceduraldataflow +analysi +previou +work +interprocedur +dataflow +analysi +mainli +concentratedeith +effici +algorithm +specif +individu +problem +necessarili +effici +algorithm +gener +class +problem +thoma +rep +mooli +sagiv +develop +implement +newalgorithm +effici +appli +larg +class +problem +recent +publicationsm +shapiro +horwitz +fast +accur +flow +insensit +point +analysi +appear +confer +record +twenti +fourth +symposium +onprincipl +program +languag +pari +franc +januari +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +proceed +sigsoft +symposium +foundat +softwareengin +washington +octob +sagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +constantpropag +proceed +sixth +intern +joint +confer +theoryand +practic +softwar +develop +aarhu +denmark +rep +sagiv +horwitz +precis +interprocedur +dataflow +analysi +graph +reachabl +confer +record +twenti +second +symposium +principlesof +program +languag +francisco +januari +bate +horwitz +increment +program +test +us +program +depend +graph +confer +record +twentieth +symposium +principlesof +program +languag +charleston +januari +horwitz +rep +program +depend +graph +softwar +engin +proceed +fourteenth +intern +conferenceon +softwar +engin +melbourn +australia +horwitz +identifi +semant +textual +differ +version +aprogram +proceed +sigplan +confer +program +languagedesign +implement +white +plain +june +teach diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..0c29ed6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid +pagenam +sidnei +hummertoffic +phone +offic +email +hummert +wisc +edua +postscript +version +resum +pictur +click +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..3a830729 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,54 @@ +igor +ivanisev +home +pageigorivanisev +work +newest +project +disclaim +alien +speak +alien +particular +needless +page +construct +ever +feel +like +actual +construct +alreadi +link +mail +research +interest +robot +vision +stuff +generalgradu +slave +univers +wisconsin +departmentwa +undergrad +drake +univers +math +departmentaddress +comput +scienc +departmentunivers +wisconsin +west +dayton +streetmadison +offic +phone +home +phone +mail +iigor +wisc +eduiigor +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..27ce8d3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..a3def617 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,47 @@ +sharenow +home +page +sharenow +wisc +teach +assist +peterson +comput +scienc +depart +univers +wisconsin +madisonmadison +offic +comput +sciencestelephon +offic +hour +section +tuesdai +thursdai +pmsection +meet +section +meet +pmboth +class +meet +room +comput +sciencesc +announcementshandoutsmoth +jone +profil +sharenow +recreat +site +pleas +send +email +comment +last +modifi +tuesdai +septemb +sharenow diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..136d4a78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,49 @@ +basneyjim +basneygradu +student +research +assistantcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +email +jbasnei +wisc +eduoffic +comput +scienc +statisticsoffic +phone +research +interest +area +oper +system +andnetwork +current +work +condor +directionof +prof +miron +livni +receiv +fromoberlin +colleg +comput +scienc +english +webpag +oberlin +resum +codefrom +previou +project +avail +onlin +last +modifi +basnei diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..f570b2bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,96 @@ +jerel +mackai +home +pagejerel +mackai +assist +research +comput +scienc +special +databas +oper +system +work +fulltim +comput +system +univers +wisconsin +madison +comput +scienc +depart +respons +includ +develop +support +sybas +ingr +databas +instal +backup +softwar +train +student +hourli +plai +electr +guitar +thrash +metal +specialti +also +violin +classic +baroqu +mainli +seen +error +evil +wai +click +shock +case +didn +believ +ey +first +time +work +like +record +mostli +funni +cover +stuff +abba +metallica +also +origin +soon +abl +sampl +hit +watch +favorit +show +plai +raquetbal +golf +shoot +pool +stand +around +towel +yeah +know +much +work +finger +jerellast +modifi +jerel +mackai +jerel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..daf1eee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home +page +johan +larson +homepag +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..fe795cfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,173 @@ +home +page +relief +happi +java +good +censorship +realli +mail +jherro +wisc +note +class +made +home +page +relax +let +would +cool +thing +home +page +apictur +pictur +girlfriend +half +year +afraid +date +though +almost +year +mani +year +tortur +disembody +becam +float +head +death +directori +anim +frame +seri +pictur +jpeg +format +default +name +number +start +frame +number +frame +millisecond +paus +imag +default +overriden +paus +repeat +sequenc +explicit +order +frame +stuff +vital +inform +aquir +nicknam +would +like +take +time +apolog +lame +page +pleas +bear +pretti +pictur +jack +skellington +kermit +frog +interest +someth +els +neat +link +friend +homepag +write +haiku +said +thath +go +click +stuff +roomat +mine +grad +notr +dame +memori +forgotten +time +roomat +cult +hippothi +exploratori +intervent +chaotic +exist +realiti +follow +link +enjoi +benefit +matriarch +societi +join +todai +exclus +club +hierarchi +rule +friend +homepag +semi +cool +link +notr +dame +home +page +yahooooooooooooo +work +link +contain +free +softwar +shack +bazillion +search +engin +search +engin +mpeg +movi +archiv +realli +cool +link +great +muppet +page +sound +imag +link +cool +rachel +want +select +cool +cano +trip +pictur +look +bout +cano +pictur diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..cbcf6723 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,188 @@ +bing +home +page +bing +index +gener +inform +educ +advisor +research +interest +research +project +public +pointer +hobbi +gener +informationresearch +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +jieb +wisc +edueduc +comput +scienc +univers +wisconsin +madison +advisor +prof +david +dewittresearch +interest +tertiari +storag +support +dbm +parallel +databas +manag +system +object +orient +databas +manag +system +geograph +inform +systemsresearch +project +paradis +shorepublicationsbuild +scaleabl +spatial +dbm +technolog +implment +evalu +patel +kabra +tuft +burger +hall +ramasami +lueder +ellman +kupsch +dewitt +naughton +submit +public +octob +queri +execut +batch +paradis +prong +approach +effici +process +queri +tape +resid +data +set +dewitt +submit +public +octob +process +satellit +imag +tertiari +storag +studi +impact +tile +size +perform +dewitt +appear +nasa +goddard +conferenceon +mass +storag +system +technolog +septemb +us +constraint +queri +tree +goldstein +ramakrishnan +shaft +shorter +version +appear +workshop +constraint +databas +februari +client +server +paradis +dewitt +kabra +patel +proceed +larg +data +base +confer +santiago +chile +septemb +storag +reclam +reorgan +client +serverpersist +object +store +yong +naughton +proceed +ieee +data +engin +confer +houston +februari +pointer +eosdi +sigmod +madison +dbm +research +grouphobbi +tenni +tenni +server +volleybal +volleyballweb +white +water +raft +whitewat +page +find +pictur +click +full +size +pictur +last +updat +juli +bing +jieb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..0ad8a829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,74 @@ +jignesh +home +page +jignesh +patel +jignesh +wisc +welcom +research +assist +depart +comput +scienc +univers +wisconsin +madison +west +dayton +street +madison +telephon +advisor +david +dewitt +research +interest +system +parallel +databas +system +object +relat +databas +current +work +paradis +project +public +relat +paradis +client +server +paradis +paper +publish +vldb +partit +base +spatial +merg +join +publish +sigmod +public +accur +model +hybrid +hash +join +algorithm +paper +publish +sigmetr +miscellan +stuff +virtual +tourist +inlin +skate +home +page +madhuri +kashmir +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..faaaf1b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +georg +varghes +peopl +download +netscap +page +click +warn +page +pretti +lame diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..7678ab80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,106 @@ +johann +gehrk +homepagejohann +gehrkewelcom +intern +graduat +studentat +comput +sciencesdepart +univers +ofwisconsin +madison +area +interest +databasemanag +system +work +area +data +mine +underprofessor +raghuramakrishnan +page +construct +contact +inform +public +interest +linkscontact +inform +email +johann +utexa +offic +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +room +madison +wisconsin +home +eagl +height +madison +public +stoica +abdel +wahab +jeffai +baruah +gehrk +plaxton +proport +share +resourc +alloc +algorithmfor +real +time +time +share +system +proceed +ieee +real +time +system +symposium +washington +decemb +appear +anexpand +version +baruah +gehrk +plaxton +fastschedul +period +task +multipl +resourc +inproceed +ieee +intern +parallel +processingsymposium +april +expand +version +avail +technicalreport +depart +comput +scienc +universityof +texa +austin +februari +johann +gehrk diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..66175672 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,84 @@ +home +pagewelcom +home +page +first +year +graduat +student +univers +wisconsin +madison +studi +comput +scienc +us +comput +also +maintain +frequent +ask +question +list +latest +powerbook +model +releas +appl +thing +look +student +section +click +herei +amass +list +good +site +number +catagori +check +page +madison +depart +madison +alma +mater +site +visit +often +appl +home +page +needsth +nando +time +great +new +coverageth +spot +mind +numb +soap +operaish +drivelziffnet +comput +industri +newsc +databas +manag +system +construct +compil +keep +classworktodai +dilbert +chucklejon +bodner +jonb +wisc +mound +madison +last +modifi +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..a74743cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,107 @@ +welcom +home +page +first +year +graduat +student +depart +came +frombeij +chines +academi +scienc +china +hometown +nanj +capitol +jiangsu +provinc +degre +student +depart +univers +wisconsin +madison +institut +autom +chines +academi +scienc +beij +china +specil +pattern +recognit +imag +process +biomed +engin +depart +southeast +univers +nanj +chinacurr +activ +cours +advanc +comput +network +topic +databas +manag +advanc +oper +system +teach +assist +data +structur +current +address +home +spring +madison +work +comput +scienc +depart +west +dayton +street +madison +tele +offic +home +could +finger +wisc +refer +comput +scienc +depart +inform +class +technic +stuffjava +placeshor +tutorialchina +affairchina +democracybeij +spring +place +interest +stanford +network +groupstanford +medic +informaticsmit +commun +control +signal +processingjob +site +newsyou +visitor +number +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..f27140ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,111 @@ +jeff +shabel +home +pagech +welcom +jeff +shabel +home +page +wisconsinch +theme +song +offic +hour +tue +thur +appoint +offic +person +informationmajor +comput +scienc +architectur +emphasi +statu +second +year +graduat +student +view +fall +schedul +academ +background +receiv +comput +engin +diego +electr +comput +engin +depart +home +town +cupertino +jose +high +school +monta +vista +high +school +cupertino +plan +graduat +comput +scienc +favorit +sport +team +golden +state +warrior +basketbal +jose +mercuri +new +andnando +jose +shark +hockei +jose +mercuri +new +andnando +francisco +footbal +jose +mercuri +new +andnando +oakland +favorit +link +new +jose +mercuri +newsmus +columbia +hous +find +join +columbia +hous +deal +also +tip +info +join +music +club +miscellan +view +print +postscript +document +window +send +mail +jshabel +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..637957be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,311 @@ +jussi +myllymakijussi +myllymaki +research +assist +comput +scienc +depart +univers +wisconsin +west +dayton +street +madison +telephon +email +jussi +wisc +eduresearch +summaryi +interest +perform +analysi +dbm +oper +onadvanc +tape +disk +technolog +includ +disk +tape +arrai +mcurrent +studi +buffer +larg +dataset +tertiari +storageto +disk +memori +data +us +data +explor +andvisu +deviseproject +advisor +prof +mironlivni +recent +work +includ +improv +perform +relat +joinsof +larg +volum +disk +tape +resid +data +public +listbelow +appli +structur +organ +tertiari +storageto +solv +problem +associ +divers +characterist +andfunct +limit +tertiari +media +recent +paper +datavisu +explor +discuss +data +metadata +managementissu +larg +complex +data +set +involv +refere +publicationseffici +buffer +concurr +disk +andtap +miron +livni +proceed +perform +intern +confer +perform +theori +measur +evalu +comput +commun +system +octob +integr +visual +parallel +programperform +data +karen +karavan +miron +livni +bartonp +miller +proceed +third +workshop +environ +andtool +parallel +scientif +comput +august +structur +organ +tertiarystorag +daniel +ford +proceed +theintern +confer +data +engin +februari +alsoavail +almaden +research +report +visual +explor +larg +data +set +withmiron +livni +raghu +ramakrishnan +proceed +spie +theintern +societi +optic +engin +januari +disk +tape +join +synchron +disk +andtap +access +miron +livni +proceed +acmsigmetr +confer +submit +publicationdevis +integr +queri +visual +larg +dataset +livni +ramakrishnan +beyer +chen +donjerkov +lawand +myllymaki +wenger +submit +sigmod +confer +relat +join +data +tertiari +storag +jussi +myllymaki +andmiron +livni +submit +intern +confer +dataengin +publicationsdisk +tape +join +synchron +disk +tapeaccess +miron +livni +univers +wisconsin +depart +technic +report +join +tape +project +report +master +degreeproject +report +univers +wisconsin +depart +appli +client +server +model +comput +networkarchitectur +master +thesi +helsinki +univers +technolog +depart +industri +manag +finnish +documentsimplement +perform +analysi +treealgorithm +jeff +schwarz +yoav +weiss +class +report +experi +implement +structur +filesystem +trishul +chilimbi +yoav +weiss +class +report +overview +current +tape +technolog +productsoverview +raid +technolog +supplier +productssom +frequent +need +link +unifi +technic +report +search +adaptec +scsi +adapt +home +digit +alpha +workstationsandpcsandtechn +journaland +whitepap +home +technolog +researchandcyberjourn +quantum +digit +linear +tapeanddlt +faqandwhitepap +home +solarisandsparcstationsandtechn +report +home +scsi +faqandstorag +faqand +otherusenet +faqsmani +link +found +jussi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..000bcf40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag +jyothithi +page +construct +info +student +cours +grade +other +sorri +dissappoint +email +jyothi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..49f70225 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,108 @@ +karen +karavaniceveryth +need +know +learn +public +school +karen +karavanicresearch +assist +paradyn +parallel +perform +tool +project +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +karavan +wisc +current +pursu +comput +scienc +research +interest +includ +parallel +comput +environ +autom +perform +tune +process +oper +system +databasesask +madison +women +comput +scienc +wic +frontier +scienc +cool +program +dane +counti +high +school +studentstrio +student +support +servic +free +tutor +support +madison +undergradu +miss +site +page +could +save +life +safer +pagefor +chocol +lover +onlystuyves +high +school +alumni +associationstuyves +high +school +class +thoma +legisl +inform +internetth +constitut +cure +anyth +salt +water +sweat +tear +isak +dinesen +ship +port +safe +ship +sail +thing +admir +grace +hopper +comput +pioneer diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..3f8a12ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,178 @@ +stefano +kaxira +home +page +stefano +kaxira +kaxira +wisc +editor +ieee +kiloprocessor +extens +sciresearch +assist +univers +wisconsin +research +interest +summari +recent +publicationsresearch +interest +share +memori +multiprocess +scalabl +coher +interfac +cach +design +aspect +parallel +processor +memori +memori +processor +architectur +galileo +research +summari +introduc +glow +kiloprocessor +extens +hierarch +extens +collaborationwith +goodman +work +examin +depth +design +option +develop +upcom +standard +incolabor +goodman +david +jame +stein +gjess +recent +public +glow +cach +coher +protocol +extens +wide +share +data +stefano +kaxira +jame +goodmanto +appear +proceed +intern +confer +supercomput +also +technic +report +kiloprocessor +extens +stefano +kaxirasto +appear +proceed +intern +parallel +process +symposium +april +implement +perform +glow +kiloprocessor +extens +wisconsin +wind +tunnel +stefano +kaxira +jame +goodmannd +intern +workshop +base +high +perform +cost +comput +march +hierarch +extens +cach +coher +stefano +kaxira +jame +goodmanst +intern +workshop +base +high +perform +cost +comput +august +hierarch +extens +jame +goodman +stefano +kaxirasunivers +wisconsin +comput +scienc +dept +juli +softwar +tool +simul +prototyp +monitor +multiprocessor +system +stafylopati +papakonstantin +kaxirasinform +softwar +technolog +autom +synthesi +parallel +dedic +architectur +us +prolog +specif +tsanaka +papakonstantin +pekmestzi +kaxirasp +greec +hardwar +synthesi +methodolog +us +prolog +tsanaka +papakonstantin +kaxirasmicroprocess +microprogram +north +holland diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..b76667aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,323 @@ +keeper +home +page +steven +foughtthei +heaven +like +perfect +littl +world +doesn +realli +need +everyth +made +light +lauri +anderson +strang +angel +note +possibl +page +make +structur +markup +indic +piec +inform +rather +look +made +inform +page +access +peopl +bitmap +displai +includ +theblind +us +standard +markup +tag +allow +whateverbrows +wish +also +note +page +make +literari +convent +ironi +satir +sarcasm +butnoth +contain +herein +meant +offens +areoffend +probabl +stupid +start +third +year +graduat +student +comput +scienc +depart +firsttwo +week +sinc +support +take +care +varieti +machin +vari +degre +success +graduat +student +side +never +publish +ever +done +anyth +impress +miracl +pass +prelim +research +addup +hill +bean +like +fizzl +result +areobtain +probabl +leav +junior +level +programm +creatingkiosk +front +end +visual +primit +system +leav +perman +skill +free +surpris +year +discov +especi +surpris +wasn +convinc +aliv +thosewho +know +well +would +probabl +argu +proof +mybe +aliv +anywai +might +want +read +thoughtson +hedgehog +contact +electron +mail +pager +reliabl +specif +locat +often +work +home +ifyou +would +like +person +write +someth +address +keeper +wisc +becom +clear +wantto +contact +short +notic +give +pager +number +variou +creation +thought +hedgehog +todo +list +updat +sporad +list +associ +rsum +postscript +document +html +section +long +outof +date +unfortun +danenet +dilhr +jobnet +databas +soon +defunct +caltech +project +caltech +institut +archiv +photonet +databas +caltech +personnel +directori +look +much +better +insid +caltech +caltech +databas +insid +caltech +anyon +enter +page +databaseus +form +interfac +slide +slide +talk +gave +slide +talk +gave +distribut +object +slide +talk +gave +call +java +danger +love +come +hell +freez +rate +inform +page +break +hierarchi +consult +inform +somewher +steven +fought +unifi +attribut +index +sfuai +informationag +intellectu +properti +everyth +index +allow +assigna +uniqu +serial +number +refer +quot +atth +page +provid +quot +sourc +contextu +inform +pointer +relev +quot +index +bui +adob +distil +translat +rsuminto +world +user +chanc +ofread +suppos +print +pinch +want +write +us +comput +certaintruth +psycholog +us +comput +softwar +peopl +eventuallypick +aren +ever +taught +explicitli +think +possibleto +would +make +start +comput +easier +cheap +shot +thing +hate +project +idea +mull +probabl +accessibleto +small +subset +user +tough +world diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..3e3bdb0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +krishna +kunchithapadamkrishna +kunchithapadamgreet +welcom +page +interest +read +languag +indian +classic +music +miscellaneouspubl +data +distribut +perform +steer +perform +toolsresum +gzip +postscript +contact +search +last +modifi +bykk diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..283429dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,42 @@ +kristin +home +pagekristin +tuft +research +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +tuft +wisc +eduadvisor +david +dewitt +miscellani +inform +serveruw +madison +dbm +research +groupacm +sigmod +inform +server +home +pageeo +project +scienc +officelast +modifi +kristin +tuftekristin +tuft +tuft +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..f1db3bf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,61 @@ +krung +homepageupd +novemb +krung +homepag +underconstructioni +keep +page +short +inform +good +serf +year +cometh +follow +relat +topic +research +mathemat +program +project +pursu +cours +work +cours +work +comput +scienc +depart +comput +compani +favorit +hobbi +person +inform +person +opinion +life +madison +wisconsin +linkedth +follow +page +import +link +univers +madison +wisconsin +whole +comput +scienc +depart +uniqu +entiti +electron +librari +system +krung +sinapiromsaran +emailkrung +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..4453013c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,225 @@ +kunen +home +page +kenneth +kunen +professormath +comput +scienc +univers +wisconsin +dayton +madison +mail +kunen +wisc +edutelephon +stanford +univers +interest +autom +deduct +logic +program +theori +topolog +research +summari +research +work +involv +logic +applic +typic +applic +autom +deduct +logic +program +autom +deduct +tool +like +resolutionto +prove +mathemat +theorem +logic +program +studi +semant +languag +likeprolog +specif +topic +consid +prologus +negat +failur +semant +incompat +betweenleast +fix +point +comput +prolog +style +backtrack +comput +mathemat +logic +work +axiomat +theori +besid +interest +right +thissubject +relat +variou +abstract +area +mathemat +theoret +topolog +measur +theori +mani +basic +question +turn +independ +usualaxiom +theori +select +recent +public +follow +postscript +file +kunen +shortest +singl +axiom +group +expon +comput +mathemat +applic +hart +kunen +singl +axiom +expon +group +autom +reason +kunen +ramsei +theorem +boyer +moor +logic +appear +autom +reason +kunen +mill +measur +corson +compact +space +fundamenta +mathematica +hart +kunen +local +constant +function +fundamenta +mathematica +kunen +semant +answer +liter +technic +report +univers +wisconsin +appear +autom +reason +kunen +construct +comput +mathemat +technic +report +univers +wisconsin +appear +autom +reason +kunen +moufang +quasigroup +algebra +kunen +quasigroup +loop +associ +law +preprint +appear +algebra +kunen +structur +conjugaci +close +loop +preprint +kunen +complet +result +link +resolut +appear +press +hart +kunen +weak +measur +extens +axiom +rough +draft +book +review +hart +kunen +review +note +theori +moschovaki +american +mathemat +monthli +cours +taught +fall +math +geometr +infer +reason +math +foundat +mathemat +spring +comp +artifici +intellig +last +chang +octob +kunen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..00fcd239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,520 @@ +jame +laru +home +page +jame +laru +laru +wisc +associ +professor +comput +sciencedepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usalaru +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +educ +research +interest +research +project +upcom +cours +softwar +recent +paper +graduat +summari +educ +univers +california +berkelei +univers +california +berkelei +harvard +colleg +research +interest +program +languagesand +compil +particular +languag +compil +parallel +machin +design +program +share +memori +parallel +comput +compil +symbol +languag +program +profil +trace +program +execut +edit +research +project +wisconsin +wind +tunnel +larg +grain +data +parallel +program +languag +execut +edit +librari +cours +data +structuresc +java +softwar +spim +wartsrec +paperseffici +path +profil +thoma +ball +jame +laru +appear +micro +decemb +parallel +program +larg +grain +data +parallel +programminglanguag +jame +laru +brad +richard +guhan +viswanathan +gregori +wilson +parallel +program +us +press +teapot +languag +support +write +memori +coher +protocol +satish +chandra +brad +richard +jame +laru +sigplan +program +languag +design +implement +pldi +instruct +schedul +execut +edit +eric +schnarr +andjam +laru +appear +workshop +compil +support +system +softwar +wcsss +februari +effici +support +irregular +applic +distribut +memori +machin +shubhendu +mukherje +shamik +sharma +mark +hill +jame +laru +annerog +joel +saltz +fifth +sigplan +symposium +principl +practiceof +parallel +program +ppopp +juli +machin +independ +execut +edit +jame +laru +eric +schnarr +sigplan +confer +program +languagesdesign +implement +pldi +june +tempest +substrat +portabl +parallel +program +mark +hill +jame +laru +david +wood +compcon +spring +march +static +branch +frequenc +program +profil +analysi +youfeng +jame +laru +annual +ieee +intern +symposium +microarchitectur +micro +novemb +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +markhil +jame +laru +ann +roger +david +wood +supercomput +novemb +time +spent +messag +pass +share +memori +program +satish +chandra +jame +laru +ann +roger +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +system +support +languag +implement +jame +laru +brad +richard +guhan +viswanathan +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jameslaru +david +wood +sixth +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +cachier +tool +automat +insert +cico +annot +trishul +chilimbi +jame +laru +intern +confer +parallel +program +icpp +august +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +unpublish +manuscript +revis +frequent +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +graduatesbrad +richard +august +memori +system +parallel +program +first +employ +vassar +colleg +guhan +viswanathan +septemb +techniqu +compil +data +parallel +languagesfirst +employ +oracl +lorenz +huelsbergen +august +dynam +languag +parallel +first +employ +bell +lab +lorenz +research +thoma +ball +august +control +flow +control +depend +softwar +tool +first +employ +bell +lab +tball +research +research +summarymi +research +focus +problem +program +comput +part +thewisconsin +wind +tunnel +project +havehelp +develop +hybrid +softwar +hardwar +share +memori +computerarchitectur +facilit +program +compil +parallelmachin +current +student +develop +languag +compil +tool +demonstr +exploit +power +user +level +coherencepolici +also +interest +perform +evalu +tool +help +programmersunderstand +improv +program +perform +recent +ball +andi +develop +effici +path +profil +algorithm +provid +moredetail +understand +control +flow +within +routin +hasidentifi +possibl +better +compil +last +modifi +jame +laru +laru +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..72572875 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick +pagenick +pageoffic +phone +email +leavi +wisc +eduoffic +hour +tuesdai +wednessdai diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..88435d57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,107 @@ +steven +huss +lederman +home +page +steven +huss +lederman +home +page +research +interest +includ +research +univ +wisconsin +madison +relat +thewisconsin +wind +tunnel +project +research +area +parallel +linear +algebra +iscov +prismproject +also +heavili +invol +mpistandard +sever +other +recent +publish +book +origin +mpistandard +inform +order +book +press +isbn +also +look +complet +refer +editor +current +draft +forum +pleas +keep +mind +work +forum +ongo +andit +document +intend +interest +ongoingwork +forum +committe +member +compress +postscript +complet +sourc +compressedtar +file +individu +sourc +file +avail +inform +would +finger +steven +huss +lederman +comput +scienc +dept +univ +wisconsin +madison +dayton +madison +phone +messag +desper +mail +lederman +wisc +http +wisc +lederman +lederman +html +offic +comput +scienc +statist +build diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..4fa0d0a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,118 @@ +home +page +lawrenc +landweb +lawrenc +landweb +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +wisc +purdu +univers +interest +comput +network +protocol +high +speed +network +electronicmail +research +summari +research +program +focus +high +speed +network +participatingin +gigabit +project +darpa +nation +project +involvesth +design +implement +network +testb +oper +atgigabit +second +data +rate +wisconsin +work +onissu +protocol +design +congest +admiss +control +visualizationof +atmospher +phenomena +virtual +conferenc +sampl +recent +public +design +implement +fast +virtual +circuit +establishmentmethod +network +olsen +proceed +theieee +infocom +confer +francisco +april +dynam +time +window +packet +admiss +control +feedback +witht +faber +mukherje +proceed +sigcommconfer +baltimor +august +dynam +time +window +gener +virtual +clock +combin +close +loop +open +loop +congest +control +mukherje +faber +proceed +ieee +infocom +confer +florenc +network +coursesconnect +tabl diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..8ca180e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,71 @@ +shannon +lloyd +home +page +shannon +lloyd +work +address +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +lloyd +wisc +respons +us +comput +lectur +section +comp +offic +hour +comp +wednesdai +thursdai +appoint +fall +cours +construct +compil +comput +linguist +variou +link +women +comput +scienc +univers +utah +depart +chemistri +univers +utah +depart +comput +scienc +person +engin +career +servic +comput +languag +archiv +comput +linguist +natur +languag +process +artifici +intellig +cognit +linguist +scienc +xsoft +lexdemo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..0dc93aa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,223 @@ +christoph +luka +home +pagechristoph +lukasrelev +inform +offic +phone +email +luka +wisc +edui +appar +coordin +mspl +fall +workshipi +also +defeat +rival +quest +becom +semest +sunivers +wisconsin +program +languag +seminar +czar +cours +go +take +fall +festiv +java +taught +advisor +tuft +univers +mail +list +click +site +entertain +save +tiger +number +free +prisonerthi +stock +quoteserv +maintain +fabul +wealth +todd +amus +page +check +friend +page +cool +thing +also +pagebet +polit +candid +legal +iowa +electron +market +identitycaptain +kirk +sing +lone +gui +lone +troubl +meet +women +throughamaz +technolog +longer +need +concern +withtri +interact +real +women +virtual +girlfriend +traci +teri +wait +meet +wife +incred +jump +catthi +realli +cool +fill +anywai +like +know +well +realli +entertain +pleas +fill +send +option +case +feel +someth +current +list +name +mail +address +favorit +thing +feel +check +appli +killer +buttmunchextrem +dudemichael +nesmith +fanfoolmyth +figurewick +good +basketbal +playervalu +studentment +defectivea +wkrp +cincinatti +tragic +like +figuregeek +tradesgonzo +admirernetscap +junki +search +child +pornpersonifi +organ +condom +stretch +much +readi +blowflam +testicl +outer +space +tast +goodpoetri +guruhogwildthi +kick +assman +manbig +dudeuh +ohprofession +muff +diverregress +higher +lifeformherald +alien +invas +forcechri +know +html +formsalienherpetophiletodd +turnidg +hatth +mancreepi +laugh +headsmal +planetdr +companioneast +bunnycyberweenietcl +hellbeast +option +checkbox +simpli +fill +click +submit +reload +page +mayb +figur +automat +option +cool +interact +checkbox +option +keyword +includ +includ +page +interest +search +superhighwai +drug +cosmo +irrit +gross +nake +scatolog +pervert +offspr +food +etymolog +phat +gnarli +bogu +wierd +cybermuffin +pictur +erotica +chees diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..62de1570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,123 @@ +welcom +ling +zheng +home +page +depart +comput +scienc +sheboygan +dayton +madison +madison +offic +phone +mail +lzheng +wisc +shameless +self +promot +resum +text +version +want +know +side +click +research +interest +research +assist +paradyn +group +current +hack +paradyn +onto +hpux +port +boss +barton +miller +also +charg +home +page +wuhan +univers +chinaand +want +take +look +girlfriend +pictur +temporarili +comput +architectur +educ +prese +dept +comput +scienc +univers +winsconsin +dept +comput +scienc +univers +iowa +dept +comput +scienc +wuhan +univers +officem +marcelo +goncalv +alumni +click +ignor +wuhan +univers +alumni +home +page +china +home +page +place +surf +compani +univers +hereif +want +know +best +graduat +school +comput +scienc +sthe +infom +could +take +look +america +best +graduat +schoolssend +ling +zheng +mail +suggest +homepag +bother +thank +last +updat +march +visitor +number +sinc +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..5b49647d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,50 @@ +manuvir +home +pagemanuvir +dasnow +know +name +andwhat +look +like +hello +feelfre +look +around +need +inform +somethingsend +email +like +passion +golf +anact +photo +later +manuvirwhat +know +know +gener +start +advisor +better +thisto +keep +monei +come +turn +theorigin +america +team +cours +leagu +plai +dai +sundai +round +golf +final +consin +said +manuvir +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..6c760e29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,913 @@ +mark +hill +home +pagemark +hill +markhil +wisc +associ +professor +comput +scienc +andelectr +comput +engineeringat +univers +wisconsint +content +address +offic +hour +current +teach +catalog +inform +educ +research +interest +andsummari +wisconsin +wind +tunnel +project +sampler +recent +paper +graduateslink +us +inform +world +wide +comput +architectur +inform +wisconsin +comput +architectur +group +wisconsin +architectur +research +tool +wart +stuff +like +oralpresent +advic +includ +david +patterson +show +give +talk +onlin +data +forcach +perform +spec +benchmark +suit +proof +hardwar +wisconsin +sound +address +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usamarkhil +wisc +eduphon +secretari +juli +fingerson +thea +sklenar +department +offic +offic +hour +fall +mondai +wednesdai +appoint +markhil +wisc +educurr +teachingfal +advanc +comput +architectur +ifal +topic +comput +java +languag +implement +icatalog +inform +cours +teachc +machin +organ +programmingc +introduct +comput +architecturec +advanc +comput +architectur +advanc +comput +architectur +iieduc +comput +scienc +univers +california +berkelei +comput +scienc +univers +california +berkelei +comput +engin +univers +michigan +research +interest +comput +architectur +parallel +comput +memori +system +perform +evaluationresearch +summarymi +research +target +memori +system +share +memori +multiprocessorsand +high +perform +uniprocessor +memori +system +design +import +larg +determin +comput +sustain +perform +mywork +emphas +quantit +analysi +often +requir +evaluationtechniqu +system +level +hardwar +perform +much +recent +work +part +wisconsin +windtunnel +projectwith +prof +laru +wood +manystud +project +expect +futur +massiv +parallel +computerswil +built +workstat +like +node +program +high +levelparallel +languag +like +support +share +address +space +inwhich +process +uniformli +refer +data +research +seek +todevelop +consensu +middl +level +interfac +languagesand +compil +system +softwar +hardwar +recentlypropos +tempest +interfac +enabl +programm +compil +program +librari +implement +messag +pass +transpar +share +memori +hybrid +combin +aredevelop +tempest +implement +think +machin +aclust +workstat +hypothet +hardwar +platform +wisconsin +wind +tunnel +project +name +toolsto +cull +design +space +parallel +supercomput +manner +similarto +aeronaut +engin +convent +wind +tunnel +designairplan +recent +work +madhu +talluritarget +improv +translat +lookasid +buffer +page +tabl +perform +cluster +align +group +base +page +option +requir +chang +hardwar +complet +subblock +tlb +oper +system +cluster +page +tabl +superpagesand +partial +subblock +tlb +asplosandsosppap +sampler +recent +papersth +wisconsin +wind +tunnel +project +annot +bibliographi +mark +hill +jame +laru +david +wood +unpublish +manuscript +revis +frequent +parallel +comput +research +wisconsin +wind +tunnel +project +mark +hill +jame +laru +david +wood +confer +experiment +research +comput +system +june +bidirect +technolog +transfer +sabbat +industri +mark +hill +confer +experiment +research +comput +system +june +coher +network +interfac +fine +grain +commun +shubhendu +mukherje +babak +falsafi +mark +hill +anddavid +wood +intern +symposium +comput +architectur +isca +optimist +simul +parallel +architectur +us +program +execut +sashikanth +chandrasekaran +mark +hill +workshop +parallel +distribut +simul +pad +page +tabl +address +space +madhusudhan +talluri +mark +hill +yousef +khalidi +symposium +oper +system +princip +sosp +decemb +presidenti +young +investig +award +final +report +mark +hill +juli +effici +support +irregular +applic +distribut +memori +machin +shubhendu +mukherje +shamik +sharma +mark +hill +jame +laru +ann +roger +joel +saltz +ppopp +juli +cost +effect +parallel +comput +david +wood +mark +hill +ieee +comput +februari +solv +microstructur +electrostat +propos +parallel +comput +frank +traenkl +mark +hill +sangta +comput +chemic +engin +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +supercomput +surpass +perform +superpag +less +oper +system +support +madhusudhan +talluri +mark +hill +intern +confer +architectur +support +forprogram +languag +oper +system +asplo +octob +evalu +directori +protocol +medium +scale +share +memorymultiprocessor +shubhendu +mukherje +mark +hill +intern +confer +supercomput +juli +comparison +trace +sampl +techniqu +multi +megabyt +cach +kessler +mark +hill +david +wood +ieee +transact +comput +june +cooper +share +memori +softwar +hardwar +scalabl +multiprocessor +mark +hill +jame +laru +steven +reinhardt +david +wood +transact +comput +system +toc +novemb +wisconsin +architectur +research +tool +wart +mark +hill +jame +laru +alvin +lebeck +madhusudhan +talluri +david +wood +comput +architectur +new +august +cach +perform +spec +benchmark +suit +jeffrei +mark +hill +dionisio +pnevmatikato +alan +smith +ieee +micro +august +unifi +formal +four +share +memori +model +sarita +adv +mark +hill +ieee +transact +parallel +distribut +system +tpd +june +perform +implic +toler +cach +fault +andrea +farid +pour +mark +hill +ieee +transact +comput +march +mechan +cooper +share +memori +david +wood +satish +chandra +babak +falsafi +mark +hill +jame +laru +alvin +lebeck +jame +lewi +shubhendu +mukherje +subbarao +palacharla +steven +reinhardt +intern +symposium +comput +architectur +isca +wisconsin +wind +tunnel +virtual +prototyp +parallel +comput +steven +reinhardt +mark +hill +jame +laru +alvin +lebeck +jame +lewi +david +wood +sigmetr +page +placement +algorithm +larg +real +index +cach +kessler +mark +hill +transact +comput +system +novemb +program +differ +memori +consist +model +kourosh +gharachorloo +sarita +adv +anoop +gupta +john +hennessi +mark +hill +journal +parallel +distribut +comput +august +tradeoff +support +page +size +madhusudhan +talluri +shing +kong +mark +hill +david +patterson +intern +symposium +comput +architectur +isca +detect +data +race +weak +memori +system +sarita +adv +mark +hill +barton +miller +robert +netzer +intern +symposium +comput +architectur +isca +june +comparison +hardwar +softwar +cach +coher +scheme +sarita +adv +vikram +adv +mark +hill +mari +vernon +intern +symposium +comput +architectur +isca +june +model +estim +trace +sampl +miss +ratio +david +wood +mark +hill +kessleracm +sigmetr +implement +stack +simul +highli +associ +memori +extend +abstract +mark +hill +david +wood +sigmetr +implement +sequenti +consist +cach +base +system +sarita +adv +mark +hill +intern +confer +parallel +process +august +weak +order +definit +sarita +adv +mark +hill +intern +symposium +comput +architectur +isca +june +graduatesmadhusudhan +talluri +expect +august +superpag +subblock +address +translat +hierarchi +first +employ +microsystem +current +email +madhu +sarita +adv +novemb +design +memori +consist +model +share +memori +multiprocessor +first +employ +assist +professor +rice +univers +current +email +sarita +rice +richard +kessler +juli +analysi +multi +megabyt +secondari +cach +memori +click +tabl +content +first +employ +crai +research +current +email +richard +kessler +crai +last +updatedw +keyword +help +search +engin +rank +page +higher +page +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin +mark +hill +home +page +comput +scienc +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..10a8554b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,22 @@ +home +page +marko +zaharioudaki +marko +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +mail +marko +wisc +note +page +construct diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..bb4c44c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,22 @@ +michael +birk +home +page +michael +birk +home +page +section +section +project +list +home +page +program +languag +link +alltraxx +home +page +mbirk +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..e363721e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,70 @@ +mark +mcauliff +mark +mcauliff +comput +scienc +depart +univers +wisconsin +madison +dayton +madison +mcauliff +wisc +research +interest +design +implement +object +orient +databas +system +public +carei +dewitt +franklin +hall +mcauliff +naughton +schuh +solomon +tsatalo +white +zwill +shoringup +persist +applic +proc +sigmod +mark +mcauliff +marvin +solomon +atrac +base +simul +pointer +swizzl +techniqu +proceed +ieee +data +engin +march +mark +mcauliff +michael +carei +marvin +solomon +towardseffect +effici +free +space +manag +appear +proceed +sigmod +confer +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..fc0a95c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,141 @@ +marc +shapiro +page +marc +shapiro +believ +tautolog +tautolog +tautolog +current +obsess +fond +disappear +fear +repuls +ponder +fast +pointer +analys +watch +lot +jacki +chan +movi +think +program +languag +design +read +much +try +teach +elementari +school +student +think +term +recurs +hope +interrupt +hoar +wrote +pointer +introduct +high +level +languag +step +backward +never +recov +home +page +schedul +todd +automat +accid +gener +elain +dimasi +twisti +littl +page +amanda +peet +retreather +hyper +mode +emac +thepul +menu +doesn +cool +html +tag +submiss +softwarei +cobbl +togeth +pldi +abl +work +nowinclud +previous +mostli +miss +file +submit +html +popl +paper +fast +accur +flow +insensit +point +analysi +marc +shapiroand +susan +horwitz +appear +symposium +principl +program +languag +variou +address +marc +shapiro +dept +dayton +madison +mail +wisc +talk +hous +wisc +finger +hous +wisc +marion +madison +list +peopl +know +realli +marc +shapiro +meet +jonathan +goldstein +paul +ferguson +lawrenc +brown +last +modifi +marc +shapiro +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..04df81f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,27 @@ +home +pagerob +minimalist +home +page +last +modifi +august +mellencamp +taship +introduct +oper +system +email +mellen +wisc +offic +comput +scienc +build +offic +phone +offic +hour +appoint +mellen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..11f57785 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,96 @@ +david +melski +person +info +pagedavid +melskicurr +depart +mill +comput +scienc +statisticsmadison +dayton +madison +permen +west +ivesmarshfield +michel +awesom +person +page +current +construct +sister +kasei +great +home +page +brother +eric +semest +teach +coupl +section +also +work +rep +program +languag +myexact +schedul +still +need +determin +undergrad +major +comput +scienc +russian +studiesher +univers +wisconsin +even +spent +fall +semesterof +russia +chanc +russian +often +miss +somedai +somedai +make +back +interest +includ +chess +soccer +recent +beenbik +also +distract +work +numerousbook +hasti +rewrit +page +also +want +link +tomapquest +plan +steal +alot +map +second +give +direct +madison +marshfield +last +modifi +david +melski +melski +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..0481eebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,297 @@ +milo +martin +home +pagemilo +martin +milo +wisc +graduat +student +teach +assistantcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usaemail +milo +wisc +eduoffic +offic +phone +offic +hour +tuesdai +thursdai +byappointmentba +comput +scienc +gustavu +adolphu +colleg +class +compil +construct +charl +fischer +advanc +comput +architectur +mark +hill +java +sit +mark +hill +jame +larusteach +algebra +languag +program +section +research +interestsi +first +year +student +interest +programminglanguag +architectur +system +specif +interest +compil +optim +technolog +beinfluenc +hardwar +oper +system +advanc +mobil +program +java +addit +challeng +present +compil +architectur +oper +system +design +mani +mani +thing +mani +even +know +interest +publicationsresearch +perform +summer +argonn +nation +laboratori +technolog +develop +divis +advis +charl +fink +fink +humm +martin +micklich +evalu +view +reconstruct +paramet +illicitsubst +detect +us +fast +neutron +transmiss +spectroscopi +ieee +nuclear +scienc +symposium +medic +imag +confer +fink +micklich +yule +humm +sagalovski +martin +evalu +neutron +techniqu +illicitsubst +detect +nucl +inst +meth +publicationsresearch +perform +school +year +gustavu +adolphu +colleg +advis +hailperin +milo +martin +hailperin +program +languageflex +determinist +dynam +parallel +comput +senior +honor +thesi +mathemat +comput +scienc +depart +gustavu +adolphu +colleg +postscript +comput +interest +java +java +resourc +next +softwar +next +comput +softwar +compani +found +anintern +scientif +educ +organ +dedic +toadvanc +scienc +engin +applic +informationtechnolog +serv +profession +public +interest +fosteringth +open +interchang +inform +promot +highestprofession +ethic +standard +direct +quot +page +person +interest +footbal +bignfl +footbal +sinc +live +minnesota +year +myfavorit +team +minnesota +vike +eventhough +live +land +chees +head +colon +conquer +multi +player +plai +mail +space +explor +combat +game +wrote +babylon +best +show +imho +atlanti +atlanti +plai +email +game +mythic +world +atlanti +player +build +armi +engaug +trade +explor +land +fight +wonder +monster +train +wizard +discov +underworld +right +player +rule +current +list +player +ultim +frisbe +ultim +player +associ +ultim +combin +element +ofsocc +footbal +basketbal +fast +pace +game +plai +afrisbe +everyon +quarterback +everyon +receiv +direct +quot +home +page +ultimatein +simpl +rule +milo +martin +milo +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..df3c3596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,169 @@ +mino +home +page +mino +garofalakismino +wisc +eduphd +candid +research +assist +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaoffic +stat +phone +home +workresearch +interest +effect +resourc +manag +parallel +multimedia +databas +system +complex +queri +process +optim +parallel +algorithm +databas +theoryeduc +comput +scienc +univers +wisconsin +madison +dept +comput +scienc +decemb +comput +scienc +univers +patra +dept +comput +engin +informat +june +refere +public +multi +dimension +resourc +schedul +parallel +queri +mino +garofalaki +yanni +ioannidi +proceed +sigmod +confer +montreal +canada +june +abstract +paper +postscript +schedul +issu +multimedia +queri +optim +mino +garofalaki +yanni +ioannidi +comput +survei +decemb +paper +postscript +technic +report +resourc +schedul +enhanc +view +continu +media +databas +mino +garofalaki +banu +ozden +silberschatz +submit +public +octob +model +check +sequenti +probabilist +real +time +system +mino +garofalaki +technic +report +comput +technolog +institut +patra +februari +advisor +yanni +ioannidismor +feel +free +peek +resum +pointer +interest +stuff +madison +dbm +reasearch +home +page +madison +hellen +societi +home +page +sigmod +home +page +vldb +home +page +almaden +research +center +watson +research +centerdr +michael +bibliograpi +server +databas +logic +program +page +perpetu +construct +last +updat +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..49ee2025 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,28 @@ +marcelo +gonalv +marcelo +gonalv +mjrg +wisc +associ +research +paradyn +project +addresswork +home +comput +scienc +depart +sheboygan +west +dayton +street +madison +madison +phone +phone +depart +comput +sciencesunivers +wisconsin +madison diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..47fb6c67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,39 @@ +brian +morgan +home +page +brian +morgangradu +studentcomput +scienc +depart +univers +wisconsin +madison +west +dayton +streetmadison +mail +morgan +wisc +telephon +advisor +chuck +dyerresearch +interestsvirtu +conferenc +system +imag +compress +video +conferenc +high +bandwidth +network +relat +link +interest +wisconsin +comput +vision +group diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..c0e49e14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,149 @@ +andrea +moshovo +home +page +andrea +moshovosresearch +assist +depart +comput +sciencesunivers +wisconsin +madisonadvisor +guri +sohigroup +multiscalar +wisconsin +kestrel +comput +architectur +address +leav +notese +aroundw +peek +futur +clickheremi +brother +write +poetri +click +herefor +sampl +work +current +work +data +depend +specul +processor +download +technic +report +compress +postscript +uncompress +postscript +download +talk +slide +load +balanc +multiscalar +processor +data +specul +processor +gener +interest +comput +architectur +instruct +level +parallel +compil +support +explot +vlsi +fall +spring +graduat +student +thecour +instituteof +york +univers +earn +degre +sinc +transfer +wisconsin +howev +theopportun +work +excel +peopl +meet +wife +comput +scienc +univers +crete +greec +implement +numer +algorithm +access +decoupl +architecturethat +support +softwar +pipelin +advisor +kateveni +short +descript +found +comput +scienc +univers +crete +greec +viha +like +editor +support +edit +greek +mani +mani +interest +link +hellen +resouc +network +sure +visit +obtain +instal +greek +font +local +copi +page +resid +atwww +hyper +devil +dictionari +bookmark +mess +nation +fraud +inform +centerusenet +chang +want +send +afax +free diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..5bd705db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,47 @@ +toni +home +pagewhat +newoctob +back +inmadison +updat +sever +page +chang +background +black +better +contrast +updat +contact +inform +minor +chang +variou +page +list +older +updat +prefer +keep +main +page +brief +herear +link +second +level +page +navig +index +friend +favorit +page +interest +contact +informationlast +modifi +octob +wisc +educopyright +toni diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..31288a0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,190 @@ +martin +ream +home +page +martin +ream +graduat +student +teach +assist +also +coke +poobah +finger +coke +machin +comput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +telephon +telephon +dept +email +mream +wisc +edufal +scheduleresearch +interest +databas +particular +digit +terrain +model +tin +program +languag +compil +design +logic +logicprogram +qualifi +exam +databas +spring +previou +year +exam +interest +softwar +design +develop +product +orientedenviron +exploit +comput +scienc +educ +interestsin +databas +compil +design +resum +postscriptand +html +distribut +wisconsin +affili +mathemat +comput +scienc +wesleyan +univers +faint +heart +section +page +alink +senior +honorsthesi +gener +unif +coke +poobahlook +work +mighti +afraid +dear +gone +coke +poobah +tomi +head +usual +gone +realli +want +talk +tosomeon +better +adjust +crucial +role +dept +life +youshould +probabl +elton +doesn +even +mention +poobah +page +imaginethat +besid +aforement +coke +poobahship +mental +ill +afew +thing +might +want +know +third +yeargradu +student +depart +wisconsin +concentr +indatabas +current +studi +qual +sometim +inearli +februari +exercis +relax +plai +squash +reason +well +year +round +ultim +frisbe +summer +basketbal +poorli +andinfrequ +notic +thing +might +rapidlyrid +mountain +bike +around +campu +even +chilliest +weather +alwai +helmet +wish +learn +interest +feel +free +examin +mynot +often +updat +hierarchi +stuff +ilik +enjoi +wisconsin +line +librarylast +modifi +martin +ream +mream +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..8d40b0ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,45 @@ +wisconsin +multiscalar +project +home +pagewisconsin +multiscalar +project +technic +paper +talk +given +multiscalar +peopl +contributor +fund +sourc +relat +project +avail +softwar +wisconsin +comput +architectur +group +comput +scienc +departmentat +univers +wisconsin +world +wide +comput +architectur +inform +inform +interest +local +user +last +updat +februari +guri +sohi +sohi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..b56ee27d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,169 @@ +mike +steel +homepagemik +steel +homepagemsteel +wisc +eduoffic +comp +stat +build +sit +univ +maryland +depart +comput +struggl +undergradu +oper +system +class +sometimearound +april +note +time +stamp +lower +right +corner +sai +folk +graduat +student +comput +scienc +depart +univers +wisconsin +madison +school +motto +come +freezein +land +chees +research +studi +interest +center +around +artificialintellig +comput +vision +oper +system +hope +narrow +come +year +semest +graduateinstructor +section +algebra +languag +program +receiv +bachelor +degre +theunivers +maryland +scomput +scienc +depart +publicationsgrindston +test +suit +parallel +perform +tool +jefferyk +hollingsworth +michael +steel +comput +scienc +technic +reportc +univers +maryland +gzip +postscriptfil +class +semesterc +introduct +oper +system +mari +vernonc +introduct +artifici +intellig +chuck +dyermi +pagesinform +gettingin +touch +friend +back +marylandwhom +forgot +mail +address +favorit +linksmi +favorit +sport +teamssom +friend +usenet +dave +barri +frequent +ask +question +listth +usenet +billi +joel +frequent +ask +question +listi +also +mail +list +administr +thefruit +mail +list +still +host +univers +ofmaryland +start +insidejok +around +offic +hand +sometim +andnow +member +world +wide +predat +come +kill +someinfrar +photo +know +looklik +infrar +pictur +memik +steelemsteel +wisc +eduunivers +wisconsin +madisoncomput +scienc +depart diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..e61999c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,19 @@ +maria +home +pagemaria +home +pagehow +visit +univers +maryland +colleg +park +mayb +univers +wisconsin +madison +section +might +want +visit +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..fda41c84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,91 @@ +oscar +naim +home +page +bienvenido +staff +member +work +paradyn +project +finish +myph +univers +southampton +england +undergrad +universidad +simon +bolivar +caraca +venezuela +born +beauti +citi +barquisimeto +venezuela +barquisimetoi +locat +central +western +part +venezuela +popul +ofabout +million +peopl +barquisimeto +also +known +music +capit +venezuela +main +research +area +perform +analysi +visual +parallel +program +howev +apart +research +like +playclass +guitar +fact +studi +year +excellentmaestro +rodrigo +riera +antonio +lauro +like +spend +time +plai +guitar +read +good +book +sherlock +holm +stori +cook +watch +basebal +work +like +beati +pictur +venezuela +pleaseclick +finger +naim +wisc +check +around +mundo +barquisimeto diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..2780ad66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,45 @@ +anastassia +ailamaki +home +welcom +home +anastassia +ailamaki +graduat +student +comput +scienc +depart +univers +wisconsin +madison +dayton +street +madison +phone +realli +realli +want +pictur +import +notic +find +time +make +decent +home +page +nice +link +georg +rochest +alex +guid +greek +islandsar +worth +visit +send +mail +natassa +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..f91e9643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,134 @@ +jeffrei +naughtonjeffrei +naughtonnaughton +wisc +eduresearch +interestsolap +multi +dimension +data +analysi +parallel +object +relationaldbm +overal +goal +research +develop +ofdatabas +system +surpass +current +databas +system +inperform +eas +current +three +main +area +ofinterest +techniqu +improv +perform +ofmulti +dimension +data +analysi +includ +arrai +base +storag +andprocess +algorithm +indic +algorithm +computingth +cube +perform +object +relat +databas +system +includ +benchmark +dbm +algorithm +valuedattribut +techniqu +parallel +workload +parallel +spatial +inform +system +recent +public +comput +multidimension +aggreg +withsameet +agarw +rakesh +agraw +prasad +deshpand +ashish +gupta +raghu +ramakrishnan +sunita +sarawagi +proceed +thend +intern +confer +larg +databas +mumbai +bombai +storag +estim +multidimension +aggregatesin +presenc +hierarchi +amit +shukla +prasad +deshpand +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +bucki +object +relat +databas +benchmark +michael +carei +david +dewitt +johann +gerhk +dhaval +shah +moham +asgarian +prepar +toward +molap +object +relat +dbm +withyihong +zhao +kristin +tuft +submit +public diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..581d9315 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +navin +kabranavin +kabragradu +student +depart +comput +scienc +univers +wisconsin +madisonadvisor +david +dewittresearch +area +databas +research +interest +customiz +queri +optim +paradis +project +plan +address +noth +better +explor +bookmark +could +look +indian +stuff +includ +among +thing +archiv +hindi +song +navin +wisc +public diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..10796fb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,31 @@ +newhalltia +newhal +newhal +wisc +graduat +student +dayton +madison +telephon +research +interest +parallel +distribut +system +perform +tool +scalabl +analysi +perform +predict +java +research +group +paradynadvisor +bart +millermummi +pictur +guanajuato +last +chang +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..581f1f49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +nanci +hallcomput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +project +shore +scalabl +heterogen +object +repositori diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..5b139f0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,472 @@ +olvi +mangasarian +home +page +olvi +mangasarian +john +neumann +professor +mathemat +comput +scienc +member +center +mathemat +scienc +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +olvi +wisc +harvard +univers +interest +mathemat +program +machin +learn +parallel +comput +research +summari +optim +theori +rich +mathemat +effectivecomputation +solv +mani +real +life +problem +interestsin +topic +rang +broad +spectrum +encompassestheoret +aspect +error +bound +mathemat +programsand +variat +inequ +converg +proof +parallelgradi +variabl +distribut +algorithm +optim +smooth +techniqu +solv +constrain +optim +problemsa +differenti +nonlinear +equat +well +applicationsto +machin +learn +gener +specif +context +animport +aspect +research +mathemat +programmingtechniqu +diagnos +breast +cancer +result +ahighli +accur +computer +diagnost +system +current +useat +univers +wisconsin +hospit +current +student +paul +bradlei +recent +public +mangasarian +solodova +linearli +converg +descent +method +strongli +monotonecomplementar +problem +mathemat +program +technic +report +octob +mangasarian +jong +pangexact +penalti +function +mathemat +programswith +linear +complementar +constraint +mathemat +program +technic +report +august +mangasarianmathemat +program +data +miningmathemat +program +technic +report +august +mangasarianerror +bound +nondifferenti +convex +inequ +strong +slater +constraint +qualif +mathemat +program +technic +report +juli +bradlei +mangasarian +street +cluster +concav +minim +mathemat +program +technic +report +submit +neural +inform +process +system +street +mangasarian +wolberg +individu +collect +prognost +predict +mathemat +program +technic +report +januari +bradlei +mangasarian +street +featur +select +mathemat +program +mathemat +program +technic +report +decemb +submit +inform +journal +comput +mangasarian +machin +learn +polyhedr +concav +minim +mathemat +program +technic +report +novemb +appli +mathemat +parallel +comput +festschrift +klau +ritter +fischer +riedmuel +schaeffler +editor +physica +verlag +germani +mangasarian +pose +linear +complementar +problem +mathemat +program +technic +report +august +submit +siam +proceed +internationalsymposium +complementar +problem +baltimor +novemb +revis +novemb +nick +street +mangasarian +improv +gener +toler +train +mathemat +program +technic +report +juli +mangasarian +mathemat +program +machin +learn +mathemat +program +technic +report +april +revis +juli +appear +proceed +nonlinear +optim +applic +workshop +eric +june +plenum +press +chunhui +chen +mangasarian +hybrid +misclassif +minim +mathemat +program +technic +report +februari +revis +juli +august +appear +advanc +comput +mathemat +mangasarian +optim +machin +learn +mathemat +program +technic +report +januari +siag +view +new +chunhui +chen +mangasarian +class +smooth +function +nonlinear +mix +complementar +problem +mathemat +program +technic +report +august +revis +octob +februari +septemb +comput +optim +applic +mangasarian +nick +street +wolberg +breast +cancer +diagnosi +prognosi +linear +program +mathemat +program +technic +report +august +revis +decemb +oper +research +juli +august +mangasarian +linear +complementar +problem +separ +bilinear +program +mathemat +program +technic +report +juli +journal +global +optim +mangasarian +solodov +backpropag +converg +determinist +nonmonoton +perturb +minim +mathemat +program +technic +report +june +advanc +neural +inform +process +system +cowan +tesauro +alspector +editor +morgan +kaufmann +publish +francisco +california +chunhui +chen +mangasarian +smooth +method +convex +inequalitiesand +linear +complementar +problem +comput +scienc +technic +report +novemb +revis +novemb +mathemat +program +mangasarian +misclassif +minim +comput +scienc +technic +report +octob +revis +septemb +journal +global +optim +decemb +mangasarian +solodov +serial +parallel +backpropag +neural +net +vianonmonoton +perturb +minimn +comput +scienc +technic +report +april +revis +decemb +optim +method +softwar +chronolog +cancer +bibliographi +page +public +group +wisconsin +paper +report +view +download +paper +report +group +view +home +page +group +period +updat +olvi +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..5d4cfd3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,806 @@ +machin +learn +cancer +diagnosi +prognosismachin +learn +cancer +diagnosi +prognosisthi +page +describ +variou +linear +program +base +machin +learningapproach +appli +diagnosi +prognosi +ofbreast +cancer +work +result +collabor +theunivers +wisconsin +madison +betweenprof +olvi +mangasarian +comput +scienc +depart +anddr +william +wolbergof +depart +surgeri +human +oncolog +copi +thepress +releas +distribut +american +cancer +societi +scienc +writer +seminar +inmarch +provid +good +overview +research +tabl +content +diagnosi +prognosi +bibliographi +citat +popular +press +local +relat +link +relat +linksdiagnosisthi +work +grew +desir +wolberg +accur +diagnosebreast +mass +base +sole +fine +needl +aspir +heidentifi +nine +visual +assess +characterist +sampl +consideredrelev +diagnosi +collabor +prof +mangasarian +andtwo +graduat +student +rudi +setiono +kristin +bennett +aclassifi +construct +us +multisurfac +method +pattern +separ +nine +featur +thatsuccessfulli +diagnos +case +result +data +iswel +known +wisconsin +breast +cancer +data +imag +analysi +work +began +addit +nick +streetto +research +team +goal +diagnos +sampl +base +adigit +imag +small +section +slide +result +ofthi +research +consolid +softwar +system +known +xcyt +current +us +wolberg +clinicalpractic +diagnosi +process +perform +follow +taken +breast +mass +materi +thenmount +microscop +slide +stain +highlight +cellularnuclei +portion +slide +cell +arewel +differenti +scan +us +digit +camera +afram +grabber +board +user +isol +individu +nuclei +us +xcyt +us +mous +pointer +user +draw +approxim +boundari +ofeach +nucleu +us +comput +vision +approach +known +snake +approxim +converg +exact +nuclear +boundari +interact +process +take +five +minut +slide +imag +showingxcyt +nuclei +isol +thisfas +program +comput +valu +characterist +ofeach +nuclei +measur +size +shape +textur +mean +standarderror +extrem +valu +featur +comput +result +total +nuclear +featur +sampl +base +train +case +linear +classifi +wasconstruct +differenti +benign +malign +sampl +thisclassifi +consist +singl +separ +plane +space +threeof +featur +extrem +valu +area +extrem +valu +smooth +mean +valu +textur +project +case +onto +thenorm +separ +plane +approxim +probabl +densiti +ofth +benign +malign +point +construct +allow +simpl +bayesiancomput +probabl +malign +patient +thesedens +shown +patient +allow +judg +confid +diagnosi +comparison +hundr +previou +sampl +date +system +correctli +diagnos +consecut +newpati +benign +malign +eight +case +didxcyt +return +suspici +diagnosi +estimatedprob +malign +small +subset +sourc +imag +us +research +found +goodtest +case +imag +segment +object +recognit +algorithm +petsegment +algorithm +automat +identifi +nuclei +inthes +imag +pleas +email +street +wisc +work +togeth +prognosisth +second +problem +consid +research +prognosi +predict +long +term +behavior +diseas +haveapproach +prognosi +function +approxim +problem +us +inputfeatur +includ +comput +xcyt +predict +atim +recurr +malign +patient +us +right +censor +data +solut +term +recurr +surfac +approxim +method +util +linearprogram +construct +surfac +predict +time +recurr +fornew +patient +examin +actual +recurr +train +caseswith +similar +predict +recurr +time +plot +probabl +ofdiseas +free +surviv +variou +time +year +anindividu +patient +capabl +incorpor +intoxcyt +exampl +shown +surviv +curv +plot +probabl +diseas +free +surviv +versu +time +year +black +diseas +free +surviv +curv +repres +patient +ourorigin +studi +curv +repres +probabl +ofdiseas +free +surviv +sampl +case +particular +case +thereforeha +averag +prognosi +probabl +diseas +freeafter +year +equal +procedur +also +us +compar +predict +power +ofvari +prognost +factor +result +indic +precis +detail +cytolog +inform +type +provid +xcytgiv +better +prognost +accuraci +tradit +factor +tumors +lymph +node +statu +corrobor +research +result +could +remov +need +often +pain +axillari +lymph +node +surgeri +chronolog +bibliographylink +paper +provid +postscript +format +postscript +viewer +download +file +shift +click +netscap +print +abstract +ascii +text +obtain +paper +notlink +pleas +contact +first +author +mangasarian +setiono +wolberg +pattern +recognit +linear +program +theori +applic +medic +diagnosi +proceed +workshop +larg +scale +numer +optim +page +philadelphia +siam +mangasarian +wolberg +cancer +diagnosi +linear +program +siam +new +page +abstract +wolberg +street +mangasarian +imag +analysi +machin +learn +appli +breast +cancer +diagnosi +prognosi +analyt +quantit +cytolog +histolog +page +april +abstract +wolberg +street +heisei +mangasarian +comput +deriv +nuclear +featur +distinguish +malign +benign +breast +cytolog +human +patholog +page +abstract +wolberg +street +heisei +mangasarian +comput +deriv +nuclear +grade +breast +cancer +prognosi +analyt +quantit +cytolog +histolog +page +august +abstract +mangasarian +street +wolberg +breast +cancer +diagnosi +prognosi +linear +program +oper +research +page +juli +august +avail +mathemat +program +technic +report +abstract +street +mangasarian +wolberg +induct +learn +approach +prognost +predict +proceed +twelfth +intern +confer +machin +learn +priediti +russel +page +morgan +kaufmann +abstract +teagu +wolberg +street +mangasarian +call +page +indetermin +fine +needl +aspir +breast +imag +analysi +aid +diagnosi +cancer +submit +abstract +street +mangasarian +wolberg +individu +collect +prognost +predict +technic +report +comput +scienc +depart +univers +wisconsin +madison +januari +submit +icml +aaai +confer +abstract +citat +medic +popular +press +new +medicin +segment +prime +new +march +breast +biopsi +without +surgeri +friend +todai +march +cancer +detect +imit +prospect +man +milwauke +sentinel +march +analyz +breast +cancer +detroit +new +march +high +tech +cancer +hunt +marilynn +marchion +milwauke +journal +march +computer +interpret +breast +biopsi +progress +report +oncolog +time +april +comput +program +hunt +breast +cancer +ruth +sorel +houston +chronicl +april +comput +program +improv +interpret +aspir +oncolog +new +intern +data +suggest +needl +biopsi +could +replac +surgic +biopsi +diagnos +breast +cancer +journal +american +medic +associ +medic +new +perspect +column +june +diagnosi +imag +analysi +machin +learn +cope +septemb +octob +comput +seek +breast +cancer +madison +capit +time +januari +comput +aid +cancer +predict +angel +time +januari +local +relat +link +mathemat +program +group +machin +learn +group +medic +schooloth +relat +link +nation +librari +medicin +univers +nevada +center +biomed +model +research +oncolink +washington +univers +institut +biomed +comput +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..602a7ab2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,328 @@ +pattern +separ +mathemat +programmingpattern +separ +mathemat +programmingthi +page +describ +work +pattern +separ +linear +program +mathemat +program +section +univers +wisconsin +madison +comput +scienc +depart +brief +histori +method +outlinemathemat +optim +approach +particular +linear +program +long +us +problem +pattern +separ +linear +program +us +construct +plane +separ +linearli +separ +point +set +separ +nonlinear +surfac +us +linear +program +also +describ +whenev +surfac +paramet +appear +linearli +quadrat +polynomi +surfac +formul +howev +could +failon +set +separ +surfac +linear +paramet +multisurfac +method +avoid +difficulti +separ +disjoint +finit +point +set +dimension +euclidean +space +follow +choos +parallel +plane +dimension +euclidean +space +close +togeth +region +plane +contain +point +set +region +parallel +plane +contain +point +point +discard +point +region +parallel +plane +repeat +process +point +parallel +plane +region +parallel +plane +contain +point +point +multisurfac +method +tree +variant +multisurfac +method +develop +finit +disjoint +point +set +dimension +euclideanspac +goal +todetermin +sequenc +plane +dimension +euclideanspac +separ +set +follow +determin +plane +dimension +euclidean +space +minim +averag +distanc +misclassifi +point +point +misclassifi +li +side +separ +plane +assign +similarli +point +misclassifi +li +side +separ +plane +assign +region +assign +contain +mostli +point +stop +otherwis +gener +anoth +error +minim +plane +region +sequenc +plane +gener +view +decis +tree +eachnod +tree +best +split +point +reach +node +found +solv +node +split +branch +thesam +procedur +appli +mostli +point +oneset +node +linear +program +approach +also +view +astrain +neural +network +hidden +layer +shown +learn +concept +well +better +traditionallearn +method +cart +also +advantag +artifici +neural +network +method +backpropag +inthat +train +proce +much +faster +implement +implement +us +mino +numer +optim +packag +nick +street +kristin +bennett +also +implement +matlab +optim +packag +paul +bradlei +follow +descript +matlab +implement +togeth +file +requir +chronolog +bibliographi +mangasarian +linear +nonlinear +separ +pattern +linear +program +oper +research +june +page +mangasarian +multisurfac +method +pattern +separ +ieee +transact +inform +theori +novemb +page +bennett +decis +tree +construct +linear +program +proceed +midwest +artifici +intellig +cognit +scienc +societi +confer +page +bennett +mangasarian +robust +linear +program +discrimin +linearli +insepar +set +optim +method +softwar +page +mangasarian +mathemat +program +neural +network +orsa +journal +comput +fall +page +last +modifi +paul +bradlei +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..ca640da3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,124 @@ +paradyn +project +home +page +paradyn +parallel +perform +tool +releas +informationthi +page +contain +describ +copi +ofreleas +paradyn +tool +project +goalsth +paradyn +parallel +program +perform +tool +project +explor +newapproach +build +scalabl +tool +parallel +program +perform +technic +paper +paradyn +manualsstatu +reporta +recent +statu +report +blizzard +paradyn +project +arpa +meet +panel +presentationthi +present +made +arpa +csto +meet +insan +antonio +arpa +csto +super +symbol +tabl +presentationthi +present +made +arpa +csto +meet +inflorida +project +effort +develop +common +access +routin +tocompil +gener +inform +us +tool +high +level +parallellanguag +project +staff +super +comput +postera +hypertext +version +poster +super +comput +relat +project +elsewher +spdt +sigmetr +symposium +parallel +distribut +toolsyou +also +restaur +includ +paradyn +page +temporari +placehold +contact +informationparadyn +projectdepart +comput +sciencesunivers +wisconsin +west +dayton +streetmadison +email +paradyn +wisc +edufax +last +modifi +bart +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..7661acc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,42 @@ +steven +parker +home +page +steven +parker +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +wisconsin +offic +parker +wisc +depth +area +numer +analysi +employ +prism +projectfal +schedul +math +prism +relat +link +home +page +home +page +send +mail +last +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..f1ce9a60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,138 @@ +home +page +paul +bradleygradu +student +comput +scienc +depart +univers +wisconsin +madison +paulb +wisc +eduoffic +csphone +advisor +mangasarianinterestsmathemat +programmingmachin +learningfli +fish +interest +us +mathemat +program +techniqu +specif +nonlinear +linear +program +induct +learn +summari +work +currentlyb +done +area +univers +wisconsin +pleas +madisonmathemat +program +page +thiswork +guid +professor +olvimangasarian +nick +street +publicationsal +paper +store +postscript +format +abstract +ascii +text +postscript +viewer +download +file +shift +click +us +netscap +print +bradlei +mangasarian +street +featur +select +mathemat +program +mathemat +program +technic +report +comput +scienc +depart +univers +wisconsin +madison +wisconsin +decemb +revis +march +submit +inform +journal +comput +abstract +bradlei +mangasarian +street +cluster +concav +minim +mathemat +program +technic +report +comput +scienc +depart +univers +wisconsin +madison +wisconsin +accept +present +neural +inform +process +system +abstract +nick +street +work +paul +picksthes +site +backcountri +page +grate +dead +nasa +wisconsin +fish +frog +espnet +timesfax +uroullett +last +modifi +paul +bradlei +paulb +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..6abeb8d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,147 @@ +pete +devri +home +page +peter +devri +internet +tool +specialist +room +comput +scienc +westdayton +madison +pdevri +wisc +internet +tool +specialist +intern +scout +mean +isthat +read +everyth +internet +technolog +make +sens +andthen +write +overview +articl +scout +toolkit +great +thing +iread +everyth +think +anywai +rather +foolish +topai +tell +eric +hazen +alsoprovid +technic +webmast +servic +scout +group +although +excel +help +comput +system +folksat +intern +recent +join +scout +team +work +laboratori +molecularbiolog +integr +microscopi +resourc +biomed +resourc +nearli +eight +year +molecularbiolog +fortun +develop +prof +seancarrol +techniqu +creat +multipl +label +confoc +imag +basic +cool +look +imag +develop +embryo +lotof +journal +book +cover +also +develop +molecular +biologi +site +fortun +work +prof +johnwhit +rest +imrstaff +develop +site +receiv +star +inth +magellan +internet +guid +last +major +project +involv +develop +dimension +microscop +studi +embryo +develop +microscop +isdescrib +articl +appear +augustnd +issu +journal +scienc +photo +guest +scout +lab +standard +info +resum +relat +experi +public +present +updat +tuesdai +decemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..7a072482 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,131 @@ +yumpe +home +page +manoj +plakal +graduat +slave +dept +comput +scienc +universityofwisconsin +madison +blah +home +countri +india +though +origin +state +kerala +stai +life +calcutta +studi +bosco +school +calcutta +salesian +bosco +undergrad +kanpur +major +comput +scienc +engin +current +first +year +graduat +student +support +teach +assistantship +depart +comput +scienc +univers +wisconsin +madison +stare +barrel +either +comput +architectur +program +languag +interest +music +rock +metal +altern +blue +movi +book +stuff +acad +hack +industri +geeki +nerdi +stuff +featur +home +page +kanpur +chat +gatewai +kanpur +class +seealso +iitk +class +link +friend +snap +friend +pinup +galleri +everi +nerd +need +check +bookmark +link +page +access +log +visit +page +contact +north +randal +avenu +madison +comput +scienc +univers +wisconsin +madison +dayton +street +madison +plakal +wisc +plakal +wisc +acknowledg +suresh +venkat +nifti +tabl +igor +ivanisev +graduat +slave +wisecrack +icon +variou +corner +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..7eccb42b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,90 @@ +prasad +home +page +page +constuct +meanwhil +prasad +deshpand +graduat +student +comput +scienc +depar +univers +wisconsin +madison +home +address +princeton +madison +offic +address +comput +scienc +build +univers +wisconsin +madison +dayton +madison +academ +interest +databas +system +theori +research +area +databas +current +schedul +theori +invest +manageri +econom +meet +prof +jeff +naughton +music +introduct +music +theori +public +comput +multidimension +aggreg +vldb +storag +estim +multidimensionalaggreg +presenc +hierarchi +vldb +interest +cours +project +packag +java +download +want +spend +time +timex +world +find +india +dilbert +comix +explor +bookmark +random +link +finger +time +sinc +hakuna +matata +info +creat +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..4e7f1ca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,45 @@ +vishi +home +page +viswanath +poosala +research +assist +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +poosala +wisc +research +inform +reseach +summari +resum +html +postscript +inform +relat +databas +advisor +prof +yanni +ioannidi +inform +asha +voluntari +organ +help +improv +basic +educ +india +interestsuw +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..cb92f567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,770 @@ +home +pageth +project +queri +sequenc +data +document +construct +time +order +databas +order +time +databas +time +databas +order +document +content +project +objectivescurr +statusmotiv +exampleseq +data +model +sequin +queri +languageoptim +techniquesseq +system +developmentpublicationsrel +workcontact +informationproject +object +number +import +databas +applic +requir +processingof +larg +amount +order +sequenc +data +domain +theseappl +includ +financi +manag +histor +analysi +econom +social +scienc +metereolog +medic +scienc +andbiolog +scienc +exist +relat +databas +inadequ +regard +data +collect +treat +set +sequenc +consequ +express +sequenc +queri +tediou +evalu +ineffici +databas +model +data +us +abstract +sequenc +allow +data +sequenc +queri +declar +manner +util +order +semanticstak +advantag +uniqu +opportun +avail +queri +optim +evaluationintegr +sequenc +data +relat +data +user +canstor +queri +combin +relat +sequencesthes +requir +serv +goal +project +variou +kind +sequenc +need +support +tempor +sequenc +themost +import +kind +queri +express +us +notion +like +next +previou +natur +consid +sequenc +queri +optim +evalu +effici +issu +need +studi +theori +databas +system +need +built +demonstr +feasibl +theoret +idea +project +statusth +current +statu +project +defin +data +model +support +import +kind +sequenc +data +also +defin +algebraicqueri +oper +compos +form +sequenc +queri +analogousto +composit +relat +algebra +oper +form +relat +queri +describ +sequenc +queri +effici +process +identifi +variou +optim +techniqu +sequenc +queri +languag +sequin +candeclar +express +queri +sequenc +sequin +queri +includ +embed +express +relat +queri +languag +likesql +vice +versa +build +disk +base +databas +system +demonstr +feasibl +propos +system +implement +model +us +nest +complex +object +architectur +built +shore +storag +manag +process +sever +megabyt +data +relat +sequenc +support +integr +extens +manner +motiv +exampl +sequenc +querya +weather +monitor +system +record +inform +variou +meteorolog +phenomena +sequenti +occurr +phenomena +variou +meteorolog +event +sequenc +time +record +scientist +ask +queri +volcano +erupt +didth +recent +earthquak +strength +greater +richter +scale +queri +express +relat +queri +languag +like +complex +featur +like +groupbi +claus +correl +subqueri +aggregatefunct +requir +convent +relat +queri +optim +would +find +effici +queri +execut +plan +even +given +knowledg +earthquak +volcano +relat +sort +time +howev +effici +plan +exist +model +data +sequencesord +time +sequenc +scan +lock +step +similar +sort +merg +join +recent +earthquak +record +scan +store +temporari +buffer +whenev +volcano +record +process +valu +recent +earthquak +record +store +buffer +check +strength +greater +possibl +gener +answer +queri +therefor +process +singl +scan +sequenc +us +littl +memori +optim +sequenti +data +queri +data +modelth +detail +data +model +aredescrib +publish +paper +click +postscript +version +present +gist +basic +model +sequenc +record +map +ordereddomain +posit +mani +mani +relationship +record +andposit +view +dual +distinct +wai +recordsmap +posit +posit +map +record +view +call +posit +record +orient +respect +give +rise +queri +oper +base +view +queri +sequenc +could +requir +oper +either +flavor +record +orient +oper +similar +relationaloper +includ +variou +kind +join +overlap +contain +andaggreg +oper +extens +explor +researchersin +tempor +databas +commun +posit +oper +includ +next +previou +offset +movingaggreg +oper +allow +zoom +oper +sequenc +mean +collaps +expand +order +domain +associ +sequenc +instanc +daili +sequenc +could +zoom +collaps +weekli +sequenc +zoom +expand +hourli +sequenc +last +part +model +deal +oper +group +set +sequenc +advantag +make +easi +model +queri +involv +sequenc +collect +case +mani +real +worldsitu +sequenc +oper +extend +work +group +similar +sequenc +instead +singl +sequenc +extensionof +model +indic +practic +implement +ofseq +would +probabl +involv +nest +complex +object +system +sequin +queri +languagew +devis +queri +languag +call +sequin +usingwhich +declar +sequenc +queri +specifi +languagei +similar +flavor +except +input +queriesa +well +result +queri +sequenc +click +descript +sequin +languag +exampl +optim +techniquesw +propos +optim +techniqu +sequenc +queri +involv +posit +oper +exist +techniqu +thathav +propos +queri +record +orient +oper +optim +queri +transform +meta +data +cach +intermedi +result +effici +evalu +queri +optim +queri +evalu +plan +gener +us +algorithm +reli +cost +estim +import +observ +access +sequenc +data +singl +stream +probabl +effici +evalu +strategi +take +account +detail +optim +techniqu +aredescrib +publish +paper +click +postscript +version +system +developmentth +databas +system +client +serverarchitectur +support +multipl +client +viaa +multi +thread +server +server +built +ontop +shore +storag +manag +sequin +subset +support +queri +languageswhich +embed +insid +data +mode +nest +complex +object +model +allow +arbitrarylevel +nest +relat +insid +sequenc +viceversa +system +also +extens +provid +supportfor +data +type +order +domain +user +defin +function +storag +implement +queri +languag +detailson +system +click +publicationssequ +queri +process +praveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +sigmod +confer +data +manag +framework +sequenc +datapraveen +seshadri +miron +livni +raghu +ramakrishnan +proceed +ieee +confer +data +engin +march +design +implement +sequenc +databas +systempraveen +seshadri +miron +livni +raghu +ramakrishnan +submit +vldb +next +sequenc +queriesraghu +ramakrishnan +michael +cheng +miron +livni +praveen +seshadri +proceed +intern +confer +manag +data +comad +decemb +relat +workthedevis +project +complementari +provid +visualizationenviron +us +explor +sequenc +data +devis +front +queri +pose +databas +server +answer +examin +graphic +also +shore +project +storag +manag +us +peopl +work +relat +project +madison +databas +research +group +madison +depart +servercontact +informationfor +inform +contact +praveen +seshadri +praveen +wisc +eduraghu +ramakrishnan +raghu +wisc +edumiron +livni +miron +wisc +educomput +scienc +depart +univers +wisconsin +dayton +street +madison +last +modifi +praveen +seshadripraveen +seshadri +praveen +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..ae6dca7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,26 @@ +andrew +prock +home +page +andrew +clemen +hockert +prockoffic +hour +person +histori +school +class +madison +class +school +bookmark +resum +doonesburi +trot +alta +vista +alta +vista +prock +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..a00a0b63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,174 @@ +home +page +ann +condon +ann +condon +associ +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +condon +wisc +univers +washington +interest +complex +theori +interact +proof +system +random +complexityclass +theori +parallel +comput +research +summari +interest +model +comput +interactiveproof +system +combin +nondetermin +random +suchmodel +recent +proven +surprisingli +us +solv +classicproblem +complex +theori +exampl +although +theoryof +complet +long +us +identifi +hard +computationalproblem +much +progress +understand +whichhard +problem +solut +easi +approxim +recentresult +interact +proof +system +result +novel +modelsof +turn +us +prove +approximabilityresult +sever +hard +problem +work +developingboth +posit +neg +result +approxim +hardcombinatori +problem +aris +game +theori +graph +theoryand +automata +theori +also +interest +design +analysi +parallel +algorithm +current +work +develop +parallel +algorithm +forsort +graph +problem +minimum +span +tree +goal +develop +algorithm +work +well +practic +parallel +model +commun +synchron +costscan +expens +sampl +recent +public +interact +proof +system +polynomi +bound +strategi +ladner +journal +comput +system +scienc +finit +state +automata +nondeterminist +probabilisticst +hellerstein +pottl +wigderson +proceedingsof +annual +symposium +theori +comput +pspace +provabl +prover +round +caiand +lipton +journal +comput +system +scienc +februari +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..099485ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,192 @@ +home +page +deborah +joseph +deborah +joseph +associ +professor +comput +scienc +mathemat +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +joseph +wisc +purdu +univers +interest +structur +appli +complex +theori +comput +biologi +comput +geometri +mathemat +logic +research +summari +research +concern +area +theoret +comput +scienc +studi +structur +properti +complex +class +design +analysi +algorithm +biolog +problem +last +twenti +year +great +deal +work +gone +studyingth +properti +set +decid +determinist +andnondeterminist +polynomi +time +despit +effort +stillknow +littl +class +recent +fact +computerscientist +question +adequaci +known +proof +techniquesfor +resolv +question +whether +research +investigatesth +structur +properti +set +class +exploresin +formal +type +proof +techniqu +necessari +resolveproblem +concern +complex +class +research +interest +comput +biologi +primarili +inth +area +comput +method +genom +sequenc +theseinclud +develop +dynam +data +structur +algorithmsfor +fragment +assembl +larg +scale +genom +sequenc +project +develop +specif +algorithm +techniqu +handlingrepetit +sequenc +addit +research +util +graphtheoret +method +rapid +homolog +detect +analysisof +anonym +sequenc +sampl +recent +public +collaps +degre +subexponenti +time +pruim +young +proceed +ninth +structur +complex +theoryconfer +spars +spanner +weight +graph +althof +dobkin +soar +discret +comput +geometri +obtain +global +similar +local +similar +meidanisand +tiwari +proceed +fourth +scandinavianworkshop +algorithm +springer +verlag +lectur +note +incomput +scienc +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..3fe4a0fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,143 @@ +home +page +miron +livni +miron +livni +professor +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +miron +wisc +weizmann +institut +scienc +rehovot +israel +interest +resourc +manag +algorithm +perform +model +analysi +discret +event +simul +research +summari +major +emphasi +research +design +evaluationof +resourc +manag +polici +involv +developmentof +resourc +manag +polici +process +data +managementsystem +type +system +gener +purpos +well +asreal +time +schedul +algorithm +consid +researchinvolv +perform +studi +differ +polici +specialemphasi +interplai +properti +systemand +perform +polici +sinc +performancestudi +emploi +simul +interest +discret +event +modelingand +simul +techniqu +current +process +implementinga +simul +laboratori +base +novel +simul +languag +laboratori +includ +data +manag +util +visualizationtool +graphic +interfac +sampl +recent +public +disk +tape +join +synchron +disk +tape +access +myllymaki +proceed +sigmetr +confer +sequenc +queri +process +sashadri +ramakrishnan +proceed +sigmod +confer +foundat +visual +metaphor +schema +displai +haberand +ioannidi +journal +intellig +inform +system +juli +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..4414a99d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,145 @@ +home +page +seymour +parter +seymour +parter +professor +comput +scienc +mathemat +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +parter +wisc +york +univers +interest +numer +method +partial +differenti +equat +research +summari +time +major +emphasi +work +solutionof +indefinit +discret +ellipt +system +equat +classicalit +method +multigrid +method +work +effectivelywhen +system +posit +definit +method +also +bemad +effect +real +symmetr +part +operatori +posit +definit +hand +indefinit +casedirect +method +attempt +preserv +spars +thesystem +encount +small +pivot +thu +challengingproblem +effect +mix +concept +procedur +linearalgebra +ellipt +partial +differenti +equat +nowinvolv +sever +project +attack +class +problem +includ +precondit +studi +research +specialmultigrid +method +sampl +recent +public +precondit +chebyshev +collact +discret +ellipticparti +differenti +equat +appear +siam +journalon +numer +analysi +precondit +boundari +condit +without +estim +condit +number +distribut +singular +valu +siam +journal +numer +analysi +precondit +second +order +ellipt +oper +condit +numbersand +distribut +singular +valu +journal +scientificcomput +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..ed6bb239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,180 @@ +home +page +mari +vernon +mari +vernon +professor +comput +scienc +industri +engin +comput +scienc +depart +univers +wisconsin +dayton +madison +telephon +email +vernon +wisc +univers +california +angel +interest +techniqu +applic +comput +system +perform +analysi +perform +parallel +system +parallel +architectur +operatingsystem +research +summari +interest +analyt +model +techniqu +applicationto +comput +system +perform +issu +emphasi +paralleland +distribut +system +design +issu +model +techniquesi +develop +togeth +graduat +student +colleaguesinclud +gener +time +petri +customizedmean +valu +analysi +gtpn +repres +parallel +systemfeatur +synchron +prioriti +servic +precis +custom +techniqu +yield +intuit +equationsthat +repres +parallel +system +featur +approxim +butcan +solv +effici +also +recent +proposedth +techniqu +call +interpol +approximationsfor +analysi +parallel +processor +alloc +polici +techniquemai +also +broader +applic +parallel +system +performanceanalysi +current +research +project +includ +character +high +performanceparallel +workload +analysi +parallel +processor +schedulingpolici +schedul +issu +multimedia +server +memorymanag +polici +network +workstat +sampl +recent +public +fair +dqdb +network +slot +reus +brewster +proceed +ieee +infocom +confer +august +accur +model +hybrid +hash +join +algorithm +pateland +carei +proceed +sigmetr +confer +june +applic +characterist +limit +preemption +forrun +complet +parallel +processor +schedul +polici +with +chiang +mansharamani +proceed +sigmetricsconfer +june +page +automat +creat +octob +email +pub +wisc +eduto +report +error diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..df988728 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,12 @@ +qinqin +wang +home +page +welcom +qinqin +wang +home +pageqw +wisc +edulast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..12abd19a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,364 @@ +raghu +ramakrishnan +home +page +raghu +ramakrishnan +associ +professor +comput +scienc +raghu +wisc +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +street +madison +usaphon +depart +educ +teach +activ +research +interest +research +project +graduat +educ +univers +texa +austin +tech +indian +institut +technolog +madra +teach +activ +cours +text +databas +manag +system +softwar +educ +minibaseand +coralth +text +databas +manag +system +publish +mcgraw +hill +aimedat +first +second +cours +databas +system +undergraduateand +graduat +level +minibas +relat +dbm +develop +inconjunct +text +coral +system +also +us +coursesthat +deal +logic +databas +sever +school +research +interest +integr +heterogen +data +sourc +content +base +queri +index +sequenc +imag +data +exploratori +analysi +larg +data +set +data +mine +extend +databas +queri +languag +us +constraint +deductiona +databas +grow +diversifi +increasinglyimport +abl +access +data +dispers +heterogen +independ +develop +sourc +easili +rodin +project +successor +project +investig +severalissu +formal +techniqu +practic +toolkit +forsemant +integr +support +multipl +level +serviceand +access +databas +databas +access +networkedclust +machin +joint +work +prof +ioannidi +livni +recent +work +result +visual +data +explorationfrom +next +project +joint +work +prof +livni +appli +data +integr +queri +second +area +interest +content +base +queri +complex +data +assequ +imag +set +seqsystem +deal +queri +sequenc +data +focus +dbm +design +optimizationissu +relat +sequenc +data +part +next +project +joint +work +prof +livni +import +aspect +work +identifyingtrend +data +gener +identifi +us +pattern +ofinform +project +goal +support +content +base +retriev +fromlarg +set +imag +focu +develop +implementingan +express +data +definit +languag +us +customizea +gener +imag +databas +system +take +advantag +specializedinform +given +collect +imag +indexedand +queri +interest +queri +analysi +data +cover +data +explor +andmin +develop +power +cluster +algorithm +call +birchfor +larg +dataset +visual +data +explor +tool +call +devisea +part +next +project +long +stand +research +interest +extens +relat +databasequeri +languag +logic +program +featuressuch +structur +term +recurs +ofarithmet +constraint +specifi +data +queri +morecompactli +effici +ongo +project +involv +continu +develop +coraldeduct +system +evalu +base +upon +bottom +fixpointevalu +techniqu +sever +optim +appli +make +efficientacross +broad +rang +program +research +project +coral +next +graduat +sudarshan +august +time +optim +bottom +evalu +logic +program +first +employ +bell +lab +murrai +hill +sudarsha +research +srivastava +august +deduct +object +orient +languag +first +employ +bell +lab +murrai +hill +divesh +research +august +design +evalu +transit +closur +algorithm +first +employ +bell +lab +murrai +hill +research +seshadri +august +sequenc +data +managementfirst +employ +comput +scienc +depart +cornel +univers +praveen +cornel diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..034757d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,195 @@ +rahul +home +page +rahul +kapoorhello +internet +surfer +welcom +cyber +home +hope +stai +long +enough +know +littl +offici +third +final +semest +master +student +comput +scienc +depart +univers +wisconsin +madison +came +fall +get +bachelor +degre +indianinstitut +technolog +kanpur +interest +employ +pleas +check +resum +cours +schedulemydepartmentmyuniversityiitkanpuriitkclass +india +relatedlink +menow +gone +offici +want +person +well +normal +kind +born +andrais +small +love +famili +compris +parent +elder +sister +nice +town +india +call +kanpur +came +state +fall +good +fortun +live +madison +moneymagazin +rate +livabl +citi +year +editormust +come +greenland +think +winter +livabl +guess +shouldn +complain +fall +spring +madison +isawesom +summer +jose +california +work +almaden +research +centr +cannot +much +madison +summer +suppos +great +anywai +regret +great +time +area +whatev +monei +magazin +sai +think +northern +california +place +littl +like +love +music +take +look +collect +know +kind +movi +almost +genr +though +prefer +romanc +comedi +comedi +show +file +star +trek +read +stuff +novel +philosophi +comput +surf +sport +concern +love +watch +cricket +india +plai +win +tenni +figur +skate +gymnast +semest +try +learn +swim +final +bridg +natur +lover +enjoi +go +long +walk +hike +cloudi +slightli +breezi +wish +could +job +televis +travel +show +youget +interest +place +world +paid +musicstuffmovi +televisioninternettravelotherbookmark +contact +meget +know +form +rest +contact +guestbookrahul +wisc +eduh diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..350981fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,98 @@ +rajesh +raman +home +page +old +homm +page +off +rajesh +raman +rajesh +raman +comput +scienc +depart +west +dayton +street +madison +email +raman +wisc +telephon +ohio +wesleyan +univers +major +comput +scienc +mathemat +minor +music +current +first +year +graduat +student +univers +winsonsin +madison +depart +comput +scienc +person +curriculum +vita +postscript +specif +current +cours +comput +architectur +saluja +system +perform +evalu +model +livni +distribut +system +inform +current +work +team +member +condor +project +integr +part +committe +bookmark +chimera +novelti +monster +chao +subject +contradict +prodigi +judg +thing +feebleworm +earth +depositari +truth +cloaca +uncertainti +error +theglori +shame +univers +blais +pascal +last +modifi +rajesh +raman +raman +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..27c3c047 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,88 @@ +karthik +pagekarthikeyan +ramasamyabouti +student +comput +scienc +depart +univers +wisconsin +research +interest +mostli +databas +oper +system +work +jeffrei +naughton +paradis +project +projectshack +david +dewitt +jeffrei +naughton +connectivityparadis +parallel +databas +wisconsin +pthread +wrapperspublicationsstorag +estim +multidimension +aggreg +presenc +hierarchi +amit +shukla +prasad +deshpand +jeffrei +naughton +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +avail +presentationsweb +picturearchitectur +altern +scalabl +serversphoto +albumencount +leafperson +inforesum +financemonei +wall +street +journal +person +interestshack +photographycontact +informationstreet +address +dayton +comput +scienc +depart +madison +electron +mail +addresskarthik +wisc +eduoffic +phone +number +comment +suggestionspleas +tell +think +home +page +might +improv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..9f48370f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,24 @@ +kelli +home +page +kelli +ratliffoffic +phone +email +kelli +wisc +edulast +login +offic +hour +inform +student +genealog +page +interest +place +visit +space +construct +stai +tune diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..3b74f7e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,64 @@ +viresh +ratnakar +page +viresh +ratnakar +research +assist +comput +scienc +depart +univers +wisconsin +madison +research +area +digit +imag +video +compress +advisor +miron +livni +main +interest +base +compress +vector +quantiz +fractal +compress +qualiti +control +lossi +compress +product +mode +compress +public +home +page +invok +qclicauthor +avail +qclic +imag +invok +qclicbrows +avail +thing +rever +reveal +click +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +ratnakar +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..662cb242 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,97 @@ +monasteriu +omin +doominu +welcom +monasteriu +omin +doominu +brother +richard +without +beard +person +haven +address +offic +haven +address +offic +haven +address +univers +wisconsin +madison +comput +scienc +depart +west +dayton +street +madison +offic +number +rcarl +wisc +offic +hour +thur +home +page +current +cours +load +distribut +oper +system +mondai +wednesdai +underwat +fire +prevent +saturdai +advanc +comput +architectur +tuesdai +thursdai +math +introduct +whole +number +emphasi +number +sundai +subsurfac +depositori +engin +grave +dig +mondai +wednesdai +fridai +mani +shade +brother +richard +profession +omin +doom +polit +goofi +solitari +innebri +vampir +seriou +nostalg +funki +monk +fellow +order +ever +need +graphic +artist +desktop +publish +check +best diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..f7cbaaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,1961 @@ +thoma +rep +home +page +thoma +repsprofessorcomput +scienc +departmentunivers +wisconsin +madison +west +dayton +streetmadison +usa +mail +rep +wisc +telephon +secretari +depart +cornel +univers +curriculum +vita +research +interest +program +slice +differenc +merg +interprocedur +dataflow +analysi +alia +analysi +pointer +analysi +shape +analysi +languag +base +program +develop +environ +increment +comput +attribut +grammar +also +thehom +page +wisconsin +program +slice +project +content +research +summari +categor +index +public +list +public +visitor +post +doctor +associ +student +research +summarymi +research +aim +creat +tool +support +thedevelop +complex +softwar +system +object +createtool +provid +power +languag +specif +program +manipulationoper +particular +work +explor +program +slicingcan +serv +basi +program +manipul +oper +slice +program +respect +program +elementss +project +program +includ +program +element +thatmight +affect +either +directli +transit +valu +thevari +us +member +slice +allow +findsemant +meaning +decomposit +program +thedecomposit +consist +element +textual +contigu +program +slice +fundament +oper +solvingmani +softwar +engin +problem +instanc +applicationsin +program +understand +mainten +debug +test +differenc +special +reus +merg +project +worker +carri +aim +atimprov +underli +technolog +program +slice +relatedoper +implement +program +slicer +develop +method +us +slice +softwar +engin +tool +andbuild +slice +base +program +manipul +tool +clickherefor +home +page +wisconsin +program +slice +project +recent +establish +unexpect +connect +betweeninterprocedur +dataflow +analysi +previou +work +oninterprocedur +program +slice +particular +show +larg +class +interproceduraldataflow +analysi +problem +solv +transformingthem +special +kind +graph +reachabl +problem +graph +reachabl +problem +solv +precis +polynomi +timebi +algorithm +origin +develop +interprocedur +slice +also +interest +subject +increment +algorithm +increment +algorithm +mean +algorithm +make +solut +probleminst +find +solut +nearbi +problem +instanc +categor +index +publicationsprogram +slice +differenc +merg +overview +ics +slice +dagstuhl +slicing_pat +david +binklei +thesi +acta +topla +also +pldi +chop +fseb +differenc +differenc +yang +thesismerg +tosem +tosem +also +sigsoft +david +binklei +thesiswuu +yang +thesi +iwscm +topla +also +popla +esop +iwsvcc +algebra +slice +applic +program +merg +also +esop +ccpsd +amast +npfo_submiss +semant +slice +ccipl +poplb +applic +slice +dagstuhl +pepma +implement +slice +system +prog_integration_system +prog_integration_manu +note +system +describ +paper +handl +small +subsetof +pascal +system +distribut +licens +obtain +clickingher +current +retarget +implement +handl +program +andexpect +distribut +system +support +slice +chop +anddifferenc +probabl +integr +program +miscellan +thesesdavid +binklei +thesiswuu +yang +thesisphil +pfeiffer +thesisinterprocedur +dataflow +analysisdemand +idfa +bottom +logic +program +magic +set +transform +also +exhaust +demand +idfa +graph +reachabl +also +fsea +popl +diku +idfa +us +graph +reachabl +tcs_ide_pap +also +fase +ptime +complet +idfa +acta_pap +alia +analysi +pointer +analysi +shape +analysi +dagstuhl +also +popl +pepmb +pfeiffer_thesi +sigplan +languag +base +program +develop +environ +dagstuhl +synthes +gener +book +synthes +gener +manual +comput +also +lape +popl +rep +thesi +psde +topla +also +popl +popl +compcon +cacm +also +also +increment +comput +ramalingam +thesi +jalg_pap +dagstuhl +popl +popl +popl_not +synthes +gener +book +acta +comput +also +lape +popl +rep +thesi +topla +also +popl +popl +attribut +grammar +spaa +synthes +gener +book +synthes +gener +manual +acta +topla +popl +rep +thesi +psde +popl +topla +also +popl +popl +compcon +list +publicationsbooksrep +teitelbaum +synthes +gener +system +constructinglanguag +base +editor +springer +verlag +york +rep +teitelbaum +synthes +gener +refer +manual +third +edit +springer +verlag +york +chines +reprint +publish +world +publish +corpor +beij +china +rep +gener +languag +base +environ +press +cambridg +journal +publicationssagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +toconst +propag +appear +theoret +comput +scienc +rep +sequenti +natur +interprocedur +program +analysi +problem +appear +acta +informatica +acta +ramalingam +rep +increment +algorithm +gener +shortest +path +problem +appear +journal +algorithm +j_alg +ramalingam +rep +comput +complex +dynam +graph +problem +theoret +comput +scienc +binklei +horwitz +rep +program +integr +languag +procedur +call +transact +softwar +engin +methodolog +januari +tosem +ramalingam +rep +competit +line +algorithm +dynam +prioriti +order +problem +inform +process +letter +yang +horwitz +rep +program +integr +algorithm +accommod +semant +preservingtransform +transact +softwar +engin +methodolog +juli +rep +algebra +properti +program +integr +scienc +comput +program +horwitz +rep +effici +comparison +program +slice +acta +informatica +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +transact +program +languag +system +januari +topla +horwitz +prin +rep +integr +interf +version +program +transact +program +languag +system +juli +topla +rep +increment +evalu +attribut +grammarswith +unrestrict +movement +tree +modif +acta +informatica +rep +teitelbaum +languag +process +program +editor +ieee +comput +novemb +rep +demer +sublinear +space +evalu +algorithm +attribut +grammar +transact +program +languag +system +juli +rep +teitelbaum +demer +increment +context +depend +analysi +languag +base +editor +transact +program +languag +system +juli +teitelbaum +rep +cornel +program +synthes +syntax +direct +program +environ +commun +septemb +invit +papershorwitz +rep +program +depend +graph +softwar +engin +proceed +fourteenth +intern +conferenceon +softwar +engin +melbourn +australia +york +ics +rep +horwitz +semant +base +program +integr +proceed +second +european +symposium +program +nanci +franc +march +lectur +note +comput +scienc +ganzing +springer +verlag +york +book +chaptersrep +demand +interprocedur +program +analysi +us +logic +databas +applic +logic +databas +ramakrishnan +kluwer +academ +publish +boston +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +softwar +chang +impact +analysi +bohner +arnold +ieee +comput +societi +alamito +appear +reprint +fromacm +transact +program +languag +system +januari +topla +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromacm +transact +program +languag +system +januari +topla +horwitz +prin +rep +integr +interf +version +program +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromacm +transact +program +languag +system +juli +topla +ramalingam +rep +theori +program +modif +softwar +merg +slice +berzin +ieee +comput +societi +alamito +reprint +fromproceed +colloquium +combin +paradigmsfor +softwar +develop +brighton +april +lectur +note +comput +scienc +abramski +maibaum +springer +verlag +york +rep +teitelbaum +languag +process +program +editor +languag +architectur +program +environ +ichikawa +tsubotani +world +scientif +publish +compani +singapor +reprint +ieee +comput +novemb +teitelbaum +rep +cornel +program +synthes +syntax +direct +program +environ +interact +program +environ +barstow +sandewal +shrobe +mcgraw +hill +reprint +commun +septemb +teitelbaum +rep +horwitz +wherefor +cornel +program +synthes +softwar +develop +environ +wasserman +ieee +comput +societi +washington +reprint +proceed +sigplan +sigoa +symposiumon +text +manipul +portland +june +sigplan +notic +june +confer +publicationssiff +rep +program +gener +softwar +reus +appear +sigsoft +proceed +fourth +sigsoftsymposium +foundat +softwar +engin +francisco +octob +sigsoft +softwar +engin +note +rep +turnidg +program +special +program +slice +proceed +dagstuhl +seminar +partial +evalu +schloss +dagstuhl +wadern +germani +lectur +note +comput +scienc +danvi +glueck +thiemann +springer +verlag +york +dagstuhl +sagiv +rep +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +confer +record +twenti +third +symposiumon +principl +program +languag +petersburg +york +popl +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +sigsoft +proceed +third +sigsoftsymposium +foundat +softwar +engin +washington +octob +sigsoft +softwar +engin +note +fsea +rep +rosai +precis +interprocedur +chop +sigsoft +proceed +third +sigsoftsymposium +foundat +softwar +engin +washington +octob +sigsoft +softwar +engin +note +fseb +rep +hentenryck +semant +foundat +bind +time +analysi +imper +program +pepm +proceed +sigplan +symposium +onparti +evalu +semant +base +program +manipul +jolla +california +june +york +pepma +rep +shape +analysi +gener +path +problem +pepm +proceed +sigplan +symposium +onparti +evalu +semant +base +program +manipul +jolla +california +june +york +pepmb +sagiv +rep +horwitz +precis +interprocedur +dataflow +analysi +applic +toconst +propag +proceed +fase +colloquium +formalapproach +softwar +engin +aarhu +denmark +lectur +note +comput +scienc +moss +nielsen +schwartzbach +springer +verlag +york +tapsoft +rep +horwitz +sagiv +precis +interprocedur +dataflow +analysi +graph +reachabl +confer +record +twenti +second +symposiumon +principl +program +languag +francisco +popl +rep +horwitz +sagiv +rosai +speed +slice +sigsoft +proceed +second +sigsoft +symposium +onth +foundat +softwar +engin +orlean +decemb +sigsoft +softwar +engin +note +decemb +rep +solv +demand +version +interprocedur +analysi +problem +proceed +fifth +intern +confer +compilerconstruct +edinburgh +scotland +april +lectur +note +comput +scienc +fritzson +springer +verlag +york +ramalingam +rep +increment +algorithm +maintain +domin +tree +reducibleflowgraph +confer +record +twenti +first +symposiumon +principl +program +languag +portland +popl +rep +scan +grammar +parallel +attribut +evalu +data +parallel +proceed +fifth +symposium +parallel +algorithm +andarchitectur +velen +germani +june +juli +spaa +ramalingam +rep +modif +algebra +proceed +second +intern +confer +onalgebra +methodolog +softwar +technolog +amast +iowa +citi +iowa +ramalingam +rep +theori +program +modif +proceed +colloquium +combin +paradigmsfor +softwar +develop +brighton +april +lectur +note +comput +scienc +abramski +maibaum +springer +verlag +york +yang +horwitz +rep +program +integr +algorithm +accommod +semant +preserv +transform +sigsoft +proceed +fourth +sigsoft +symposiumon +softwar +develop +environ +irvin +decemb +softwar +engin +note +decemb +rep +algebra +properti +program +integr +proceed +european +symposium +program +copenhagen +denmark +lectur +note +comput +scienc +jone +springer +verlag +york +rep +bricker +illustr +interfer +interf +version +program +proceed +second +intern +workshop +softwareconfigur +manag +princeton +octob +softwar +engin +note +novemb +horwitz +pfeiffer +rep +depend +analysi +pointer +variabl +proceed +sigplan +confer +program +languagedesign +implement +portland +june +sigplan +notic +juli +rep +yang +semant +program +slice +program +integr +proceed +colloquium +current +issuesin +program +languag +barcelona +spain +march +lectur +note +comput +scienc +diaz +oreja +springer +verlag +york +horwitz +rep +binklei +interprocedur +slice +us +depend +graph +proceed +sigplan +confer +programminglanguag +design +implement +atlanta +june +sigplan +notic +juli +rep +horwitz +prin +support +integr +program +variant +environ +forprogram +larg +proceed +intern +workshop +softwar +versionand +configur +control +grassau +germani +bericht +german +chapter +winkler +teubner +stuttgart +germani +horwitz +prin +rep +integr +interf +version +program +confer +record +fifteenth +symposium +principl +ofprogram +languag +diego +januari +york +horwitz +prin +rep +adequaci +program +depend +graph +repres +program +confer +record +fifteenth +symposium +principl +ofprogram +languag +diego +januari +york +poplb +rep +marceau +teitelbaum +remot +attribut +updat +languag +base +editor +confer +record +thirteenth +symposium +principl +ofprogram +languag +petersburg +januari +york +rep +teitelbaum +synthes +gener +proceed +sigsoft +sigplan +softwar +engineeringsymposium +practic +softwar +develop +environ +pittsburgh +april +sigplan +notic +rep +alpern +interact +proof +check +confer +record +eleventh +symposium +onprincipl +program +languag +salt +lake +citi +utah +januari +york +rep +static +semant +analysi +languag +base +editor +digest +paper +ieee +spring +compcon +francisco +march +ieee +comput +societi +washington +rep +optim +time +increment +semant +analysi +syntax +direct +editor +confer +record +ninth +symposium +principlesof +program +languag +albuquerqu +januari +york +teitelbaum +rep +horwitz +wherefor +cornel +program +synthes +proceed +sigplan +sigoa +symposium +text +manipul +portland +june +sigplan +notic +june +demer +rep +teitelbaum +increment +evalu +attribut +grammar +applic +tosyntax +direct +editor +confer +record +eighth +symposium +principlesof +program +languag +williamsburg +januari +york +softwarerep +bricker +rosai +wisconsin +program +integr +system +releas +april +releas +april +releas +juli +licens +site +click +herefor +licens +inform +patentsrep +horwitz +binklei +interprocedur +slice +comput +program +us +depend +graph +patent +number +novemb +pend +submissionsrep +sagiv +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +comput +scienc +depart +univers +wisconsin +madison +august +submit +journal +public +horwitz +rep +sagiv +demand +interprocedur +dataflow +analysi +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +click +access +latest +version +submit +journal +public +ramalingam +rep +program +comput +scienc +depart +univers +wisconsin +madison +novemb +click +access +latest +version +submit +journal +public +public +reportsrep +sagiv +wilhelm +shape +abstract +shape +analys +comput +scienc +depart +univers +wisconsin +madison +juli +rep +sagiv +wilhelm +solv +shape +analysi +problem +languag +destruct +updat +comput +scienc +depart +univers +wisconsin +madison +juli +leeuwen +mehlhorn +rep +increment +comput +dynam +algorithm +dagstuhl +seminar +report +intern +confer +research +center +comput +scienc +ibfi +schloss +dagstuhl +wadern +germani +rep +sagiv +horwitz +interprocedur +dataflow +analysi +graph +reachabl +datalogisk +institut +univers +copenhagen +copenhagen +denmark +april +diku +rep +wisconsin +program +integr +system +refer +manual +releas +comput +scienc +depart +univers +wisconsin +madison +juli +manual +psramalingam +rep +categor +bibliographi +increment +comput +confer +record +twentieth +symposiumon +principl +program +languag +charleston +york +tutori +paper +rep +increment +comput +unpublish +tutori +note +present +twentieth +symposium +principl +program +languag +charleston +klint +rep +snelt +program +environ +dagstuhl +seminar +report +intern +confer +research +center +comput +scienc +ibfi +schloss +dagstuhl +wadern +germani +binklei +horwitz +rep +identifi +semant +differ +program +procedur +extendedabstract +comput +scienc +depart +univers +wisconsin +madison +septemb +ball +horwitz +rep +correct +algorithm +reconstitut +program +depend +graph +comput +scienc +depart +univers +wisconsin +madison +juli +click +access +paper +ramalingam +rep +semant +program +represent +graph +comput +scienc +depart +univers +wisconsin +madison +decemb +click +access +paper +binklei +horwitz +rep +multi +procedur +equival +theorem +comput +scienc +depart +univers +wisconsin +madison +novemb +click +access +paper +rep +demonstr +prototyp +tool +program +integr +comput +scienc +depart +univers +wisconsin +madison +januari +click +access +paper +visitor +post +doc +studentsvisitor +mooli +sagiv +israel +scientif +center +jiazhen +robert +paig +post +doctor +associ +yang +univ +wisconsin +current +associ +professor +depart +comput +inform +scienc +nation +chiao +tung +univers +taiwan +prin +cornel +univ +current +associ +professor +depart +comput +scienc +univers +north +carolina +chapel +hill +studentsramalingam +bound +increment +comput +lectur +note +comput +scienc +springer +verlag +york +binklei +multi +procedur +program +integr +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +pfeiffer +depend +base +represent +program +refer +variabl +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper +yang +algorithm +semant +base +programintegr +dissert +tech +comput +scienc +depart +univers +wisconsin +madison +august +click +access +paper diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..312bf022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,94 @@ +saeed +home +pagespe +function +statusclock +window +statu +date +window +settimeout +statusclock +speed +function +clearid +window +cleartimeout +saeed +mirza +tech +depart +comput +scienc +engin +indian +institut +technolog +kanpur +graduat +student +univ +wisconsin +madison +comput +scienc +depart +home +lucknow +india +like +spend +time +listn +indian +film +song +netsurf +read +comic +hero +calvin +love +peopl +beauti +seem +beauti +love +contact +home +offic +randal +apart +madison +comp +depart +univ +wisconsin +madison +dayton +street +madison +contact +best +email +saeed +wisc +friend +contact +right +pictur +wismad +suggest +send +check +guestbook +page +access +time +sinc +last +updat +saeed +copi +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..74613f0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,38 @@ +salli +peterson +home +page +salli +goodwin +peterson +lecturercomput +scienc +univers +wisconsin +dayton +madison +mail +salli +wisc +edutelephon +interest +desktop +comput +real +time +oper +system +program +languag +cours +taught +fall +comp +lectur +us +comput +last +chang +septemb +salli +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..365323cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,179 @@ +amit +home +page +warn +reach +protocol +offic +home +email +amit +wisc +snail +mail +comput +scienc +dept +univers +wisconsin +madison +princeton +madison +phone +educ +work +toward +guidanc +jeff +naughton +master +scienc +comput +scienc +univers +wisconsin +madison +bachelor +technolog +comput +scienc +engin +indian +institut +technolog +madra +research +interest +onlin +analyt +process +queri +process +perform +evalu +public +storag +estim +multidimension +aggreg +presenc +hierarchi +amit +shukla +prasad +deshpand +jeffrei +naughton +karthikeyan +ramasami +intern +confer +larg +databas +mumbai +bombai +paper +postscript +slide +present +vldb +interest +link +madison +run +boston +marathon +chicago +marathon +madison +marathon +york +marathon +seattl +marathon +georg +marathon +utah +databas +relat +pointer +research +network +activ +databas +bibliographi +server +databas +logic +program +competit +profil +spec +idea +intern +databas +confer +sort +name +univers +trier +databas +confer +sort +date +sigmod +server +larg +data +base +vldb +endow +databas +articl +archiv +massiv +digit +data +system +mdd +initi +multimedia +inform +sourc +nation +industri +inform +infrastructur +protocol +niiip +consortium +transact +process +council +transcoop +transact +manag +support +cooper +applic +olap +page +olap +introduct +pilot +softwar +interest +help +educ +needi +children +look +asha +home +page +person +pageand +bookmarksar +also +onlin +garfield diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..2b3515b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,41 @@ +ashwin +home +page +page +construct +name +ashwin +graduat +student +depart +comput +scienc +univers +wisconsin +madison +come +india +hadmi +undergradu +educ +indianinstitut +technolog +bombai +depart +iitb +fantast +place +worth +visit +like +contact +canfing +meto +find +whereabout +altern +send +email +sashwin +wisc +bookmark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..61770da4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,228 @@ +subramanya +sastri +home +pagei +subramanya +sastri +mugshot +mine +come +hospet +town +karnataka +india +year +school +near +hospet +awai +hampi +ruin +vijayanagara +empir +also +awai +tungabhadra +built +across +tungabhadra +river +place +beauti +unfortun +dont +photograph +place +would +scan +photo +album +long +undergradu +indian +institut +technolog +kanpur +depart +comput +scienc +engin +year +wonder +photo +album +fewphotograph +time +gokul +also +maintain +photo +album +contain +mani +photo +iitk +iitk +class +homepag +inform +classmatesat +iitk +presentcurr +graduat +student +comput +scienc +depart +univers +wisconsin +madison +plan +cours +registeredfor +spring +interestsmi +academ +interest +field +architectur +program +languag +compil +hope +graduat +field +cricket +favourit +sport +us +playphatta +iitk +tenni +ball +version +cricket +thati +champ +anyth +provid +entertainmentin +compani +friend +bookmark +link +cricket +site +enjoi +listen +music +anyth +pleasant +must +dont +consid +hard +rock +metal +someth +realli +pleasant +donot +watch +much +whatev +watch +like +seinfeld +sshow +much +read +goe +voraci +reader +rather +whati +like +better +like +like +horror +fantasi +neither +like +scienc +fiction +unsuccesfulli +tri +come +grip +like +someth +earth +know +mean +someth +hint +romanc +ifposs +jeffrei +archer +favourit +author +date +also +like +jane +austen +pride +prejudic +much +talk +read +ramesh +mahadeven +sarticl +make +interest +read +also +like +plai +bridg +back +kanpur +pick +wonderfulgam +compani +friend +long +time +back +itagain +also +like +solv +crossword +cryptic +kind +inth +past +year +devot +much +time +hobbi +mine +last +updat +januari +send +comment +suggest +sastri +wisc +eduunivers +wisconsin +madison +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..aeb0fa6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,347 @@ +susan +calcari +home +page +susan +calcarimanag +scout +servicescomput +scienc +departmentunivers +wisconsin +madisonsc +wisc +edumi +titl +internet +scout +manag +scout +servic +comput +scienc +depart +univers +wisconsin +madison +scoutservic +project +internicand +support +nation +sciencefound +scout +servic +support +internet +user +thehigh +educ +commun +provid +time +inform +bestresourc +tool +internet +goal +help +research +andeduc +internet +effect +work +week +filter +hundr +item +internet +edit +organ +import +present +inform +multipl +usabl +format +servic +includ +scout +report +scout +toolkit +happen +come +soonth +scout +sprout +report +newslett +written +kid +kid +peopl +receiv +scout +report +week +email +andthousand +read +scout +report +annotatedlist +best +newli +discov +internet +resourc +tool +public +kind +devot +research +andeduc +commun +scout +report +select +itemsinclud +issu +peopl +receiv +happeningspost +everi +weekdai +thousand +read +orth +newsgroup +scout +servic +page +moreinform +servic +profession +background +involv +nation +wide +internet +project +sinc +wheni +join +merit +arbor +organ +manag +thensfnet +backbon +project +work +informationservic +divis +project +spent +three +year +speak +tonat +intern +higher +educ +audienc +internetand +resourc +also +develop +produc +merit +network +seminarseri +first +nation +seminar +seri +focus +need +internetend +user +later +becam +director +network +inform +servic +forcerfnet +respect +internet +provid +base +diego +wrote +propos +result +award +part +internicproject +year +cooper +agreement +nation +sciencefound +third +year +agreement +termin +theport +intern +base +diego +chose +continu +workof +internet +scout +time +elect +reloc +wisconsin +andrequest +approv +reloc +scout +project +thecomput +scienc +depart +madison +heartilyagre +futur +plan +scout +servicesat +time +scout +servic +staff +includ +jack +solock +speciallibrarian +project +expand +theaddit +includ +open +systemadministr +posit +open +speciallibrarian +posit +june +matthew +livesei +join +staff +aproject +assist +goal +staff +includ +expans +thescout +toolkit +addit +disciplin +specif +servic +asscout +report +page +devot +specif +area +studi +scout +servic +also +branch +researcharea +collabor +intern +support +servic +group +madison +campus +potenti +research +topic +includenetwork +inform +discoveri +retriev +nidr +anddisciplin +orient +inform +gather +public +depend +onth +natur +research +addit +staff +hire +willincludecomput +scienc +research +graduat +undergradu +level +inform +visit +site +find +scout +servic +join +ofour +mail +list +ifyou +interest +appli +open +posit +theonlin +descript +systemadministr +special +librarian +send +aresum +write +sampl +address +feel +free +contactm +telephon +email +susan +calcariinternet +scout +comput +scienc +departmentunivers +wisconsin +madison +dayton +street +madison +scal +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..17ab7562 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,64 @@ +chandrasekar +home +page +welcom +chandrasekar +homepag +worri +happi +present +graduat +student +depart +comput +scienc +past +born +june +coimbator +southern +state +tamilnadu +inindia +high +school +educ +higher +secondari +school +coimbator +undergradu +educ +theindian +institut +technolog +kharagpur +major +dept +comput +scienc +engin +person +stuff +resid +kendal +avenu +madison +officedept +comput +scienc +dayton +madison +sivasankaran +chandrasekar +schandra +wisc +last +updat +finger +find +whereabout +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..1e03e7fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,36 @@ +eric +schnarr +home +pageer +schnarr +schnarr +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usaphon +advisor +larusresearch +interest +architectur +descript +languagesfunct +languag +designinterest +link +wisconsin +wind +tunnel +sacm +hockei +club +dragon +byte +schnarr +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..42a2fe44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,91 @@ +yanni +schoina +home +page +yanni +schoina +schoina +wisc +research +assistantdepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +note +page +construct +advisor +mark +hill +research +interest +parallel +systemspubl +fine +grain +access +control +distribut +share +memori +ioanni +schoina +babak +falsafi +alvin +lebeck +steven +reinhardt +jame +laru +david +wood +sixth +intern +confer +architectur +support +programminglanguag +oper +system +asplo +applic +specif +protocol +user +level +share +memori +babak +falsafi +alvin +lebeck +steven +reinhardt +ioanni +schoina +mark +hill +jame +laru +ann +roger +david +wood +supercomput +educ +univers +crete +iraklio +univers +crete +iraklio +last +updat +juli +cretan +cook diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..de276481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,47 @@ +beverli +seavei +home +page +beverli +seavei +current +regist +grad +student +comput +scienc +switch +special +degre +comput +biologi +comput +interest +mine +includ +asian +classic +danc +differ +version +ramayana +india +southeast +asia +danc +drama +ramayana +ramakien +india +southeast +asia +wish +could +finger +give +account +hairbal +keyboard +finger +keeper +instead diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..8da4b1f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,79 @@ +scott +colvil +home +pagescott +colvil +home +page +mail +wisc +eduoffic +address +comput +scienc +offic +dayton +madison +home +franc +madison +welcom +welcom +home +page +well +school +back +univers +wisconsin +madison +seen +largest +ball +chees +want +school +link +uwisc +home +page +uwisc +home +pagein +addit +list +page +find +interest +hopefulli +also +enjoi +beer +world +drink +game +absolut +add +caffein +rate +soda +guid +lock +pickingand +educ +artsi +page +world +fact +book +constitut +english +dictionari +roget +thesauru +poetri +databas +wisc +last +updat +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..c705d419 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,74 @@ +steve +seitz +anim +writeup +imag +motion +analysi +charact +anim +control +steve +seitz +chuck +dyerour +research +motiv +problem +teachinga +graphic +model +perform +realist +motion +problem +hasit +root +cartoon +anim +modern +applic +tocomput +anim +virtual +realiti +teleconferenc +robot +task +endow +graphic +model +knowledg +performa +repertoir +interest +motion +learn +motion +beinvok +directli +high +level +cue +smile +walk +infer +anabstract +goal +store +cu +levelev +virtual +input +devic +imag +sequenc +project +includ +analysi +period +motion +track +rigid +nonrigid +object diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..4627dae4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,132 @@ +steve +seitz +view +interpol +view +synthesi +imag +interpol +investig +steve +seitz +chuck +dyerw +devis +provabl +correct +autom +techniqu +creat +view +scene +basi +view +scene +techniqu +reli +geometr +imag +interpol +known +morph +graphicscommun +produc +intermedi +imag +although +morph +techniquescurr +enjoi +widespread +theoret +validityha +establish +particular +interpol +viewsof +scene +produc +sequenc +physic +valid +view +ofthat +scene +surprisingli +answer +provid +imag +first +undergo +simplerectif +procedur +certain +assumpt +visibl +theproject +process +satisfi +view +synthesi +work +describ +us +autom +stereo +techniqu +todetermin +imag +correspond +recent +work +view +morph +consid +user +interact +us +guid +interpol +comput +interpol +three +differ +imag +pair +therectifi +origin +imag +shown +left +right +click +theinterpol +imag +center +mpeg +movi +show +computedinterpol +view +interpol +origin +interpol +origin +work +describ +physic +valid +view +synthesi +imag +interpol +seitz +dyer +proc +workshop +represent +visual +scene +last +chang +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..1eacae64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,158 @@ +view +morph +steve +seitz +view +morph +investig +steve +seitz +chuck +dyer +relat +public +view +morph +appear +siggraph +toward +imag +base +scene +represent +us +view +morph +appear +icpr +imag +morph +techniqu +gener +compel +transit +betweenimag +howev +differ +object +pose +viewpoint +often +causeunnatur +distort +imag +morph +difficult +correct +manual +us +basic +principl +projectivegeometri +paper +introduc +simpl +extens +imag +morphingthat +correctli +handl +project +camera +scene +transform +techniqu +call +view +morph +work +prewarp +imagesprior +comput +morph +postwarp +interpol +imag +knowledg +shape +requir +techniqu +appliedto +photograph +draw +well +render +scene +abil +synthes +chang +viewpoint +imag +structureafford +wide +varieti +interest +effect +simpl +imagetransform +view +morph +imag +object +taken +differ +viewpoint +produc +illus +physic +move +virtual +camera +click +mpeg +movi +interpol +morph +view +differ +face +produc +simultan +interpol +facial +shape +color +pose +click +mpeg +movi +interpol +mpeg +movi +view +morph +resolut +mona +lisa +mona +lisa +reflect +high +resolut +mpeg +movi +frame +resolut +mpeg +movi +frame +click +mpeg +movi +jude +shavlik +chuck +dyer +last +chang +septemb diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..16c08604 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,196 @@ +period +motion +inform +period +cyclic +motion +analysi +steve +seitz +chuck +dyermani +real +life +motion +period +frame +refer +instanc +human +locomotori +motion +walk +run +skip +shuffl +areperiod +frame +refer +move +person +havedevelop +approach +determin +imag +sequenc +could +beenproduc +object +whose +motion +period +refer +frame +unlik +previou +attempt +determin +period +inform +ourapproach +allow +camera +move +film +inform +poscript +paper +click +period +tracethi +imag +show +period +trace +line +recov +imagesequ +phonograph +turntabl +ramp +correspond +moment +timewher +turntabl +momentarili +slow +period +trace +shownsuperimpos +error +surfac +recov +real +repeat +motion +tend +perfectli +even +period +variesslightli +cycl +next +physic +import +changesin +scene +gener +period +defin +cyclic +motionsthat +make +period +variat +explicit +represent +call +period +trace +compact +pure +tempor +describ +evolutionof +object +scene +without +refer +spatial +quantiti +asposit +veloc +delimit +cycl +identifi +correspondencesacross +cycl +period +trace +provid +mean +tempor +parsinga +cyclic +motion +addit +sever +pure +tempor +motion +featur +canb +deriv +relat +natur +locat +irregular +period +tracecan +also +us +medic +imag +enhanc +composit +imag +fromdiffer +cycl +furthermor +period +trace +reliabl +recoveredfrom +imag +sequenc +view +invari +fashion +us +theori +affin +invari +inform +poscript +paper +clickher +heart +imag +enhanc +correspond +angiograph +imag +determin +period +trace +bottom +composit +correspond +imag +note +additionalstructur +visibl +composit +appar +singl +imag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..455cc72b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,62 @@ +steve +seitz +home +page +steve +seitz +seitz +wisc +graduat +student +berkelei +math +area +interest +imag +motion +analysi +imag +base +render +machin +vision +comput +graphic +research +project +view +morph +view +synthesi +mpeg +movi +show +comput +interpol +imag +left +click +exampl +mpeg +movi +analysi +cyclic +motion +recent +public +stuff +frequent +link +wisconsin +comput +vision +group +surreal +cach +click +closer +look +seitz +last +chang +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..b590c7e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,199 @@ +home +page +madison +machin +learn +research +group +home +page +contain +relev +inform +thememb +machin +learn +research +group +mlrg +univers +wisconsin +madison +tabl +content +group +member +mlrg +archiv +recent +paper +mlrg +archiv +dataset +domain +theori +mlrg +paper +read +schedul +seminar +machin +learn +graduat +cours +relev +local +link +us +extern +link +group +member +carolyn +allex +jonathon +bodner +kevin +cherkauer +mark +craven +tina +eliassi +richard +maclin +graduat +august +david +opitz +graduat +august +jude +shavlik +mlrg +archiv +recent +papersvisit +page +describ +recent +public +ascii +file +contain +list +recentabstractsi +also +avail +mlrg +archiv +dataset +domain +theoriesy +access +directori +contain +severalml +testb +also +access +wisconsin +breast +cancer +databas +prof +olvi +mangasarian +sgroup +mlrg +paper +read +schedul +mlrg +current +schedul +paper +read +schedul +also +line +seminar +current +schedul +local +seminar +relev +local +link +machin +learn +graduat +cours +madison +machin +learn +math +program +group +comput +biologi +madison +dept +madison +comput +neurosci +madison +group +madison +comput +vision +group +madison +robot +group +madison +dept +home +page +madison +home +page +doit +madison +center +mathemat +scienc +gopher +madison +graduat +school +madison +librari +recent +tabl +content +abstract +select +journal +mostli +wendt +librari +readabl +wisc +proc +workshop +agent +learn +agent +held +intern +machin +learn +confer +local +link +last +modifi +jude +shavlik +shavlik +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..b5f36f8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,130 @@ +shubu +mukherje +home +page +shubu +mukherje +shubu +wisc +fiance +mimi +nephew +avirup +month +graduat +research +assist +comput +scienc +departmentunivers +wisconsin +madison +west +dayton +street +madison +usaphon +shubhendu +mukherje +click +button +jump +correspond +home +page +articl +advisor +mark +hill +research +project +wisconsin +wind +tunnel +public +wisconsin +comput +architect +world +wide +comput +architect +badger +ballroom +danc +team +person +interest +hobbi +morph +dionisio +courtesi +steve +seitz +random +interest +linkseducationph +univers +wisconsin +madison +spring +expect +univers +wisconsin +madison +tech +indian +institut +technolog +kanpur +india +research +summari +coher +network +interfac +dissert +cachabl +queue +design +space +isca +progress +distribut +share +memori +mechan +cooper +share +memori +isca +commod +workstat +submit +public +cach +coher +protocol +custom +protocol +irregular +applic +ppopp +grai +softwar +dirsw +isca +ppopp +parallel +simul +wind +tunnel +tutori +cach +simul +copyright +copi +shubu +mukherje +right +reserv diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..130274be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,27 @@ +michael +siff +home +page +michael +siff +philosophi +research +academ +interest +run +club +fall +midwest +seminar +wonder +wai +wast +time +new +inform +resourc +book +movi +televis +sport +humor diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..6f425770 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,31 @@ +skrentni +home +page +skrentni +lecturerc +coordinatorgradu +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +comput +sciencesemail +skrentni +wisc +edutelephon +relat +link +univers +wisconsin +depart +univers +wisconsin +groupskrentni +wisc +last +updat +februari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..ae4e6bf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,79 @@ +bryan +home +page +bryan +graduat +studentcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +offic +mail +wisc +edutelephon +comput +scienc +univers +wisconsin +madison +comput +scienc +purdu +univers +interest +intellig +help +system +human +comput +interact +knowledg +represent +oper +system +activ +select +recent +public +travi +step +toward +intellig +unix +help +system +knowledg +represent +unix +util +technic +report +univers +wisconsin +madison +april +miller +fredriksen +empir +studi +reliabl +unix +util +commun +relat +link +univers +wisconsin +depart +univers +wisconsin +group +professor +larri +travi +advisor +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..4b3b430f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,176 @@ +larri +travi +home +page +larri +travisprofessorcomput +scienc +departmentunivers +wisconsin +dayton +madison +mail +travi +wisc +edutelephon +univers +californa +angel +interest +expert +system +procedur +control +automat +deduct +comput +support +understand +complex +data +philosoph +foundat +ofartifici +intellig +comput +manag +social +implic +comput +research +summari +research +center +around +us +logic +basi +knowledg +formal +expert +system +deduct +augment +databasesystem +recent +work +focus +procedur +control +automaticdeduct +design +system +support +contruct +displai +test +high +level +abstract +pattern +form +informationcontain +larg +heterogen +databas +special +attent +beingdevot +represent +geograph +inform +waysthat +enhanc +data +integr +data +visualiz +map +activ +involv +sever +expert +system +develop +project +andwith +larg +intellig +databas +project +incorpor +databas +model +visual +aid +singl +integr +system +organiz +social +issu +associ +introduct +inform +technolog +analysi +suppositionsunderli +altern +approach +artifici +intellig +current +student +chuck +ohar +bryan +scott +swanson +andi +whitsitt +derek +zahn +recent +public +could +failur +expert +system +develop +implement +oravec +appear +journal +system +softwar +comput +metaphor +artifici +intellig +reflex +examin +falsework +west +artifici +intellig +magazin +societi +landscap +altern +metaphor +artifici +intellig +west +artifici +intellig +magazin +interest +link +wisc +dept +wisc +group +last +chang +june +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..15672c56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,86 @@ +avinash +sodani +home +page +avinash +sodani +sodani +wisc +graduat +student +depart +comput +scienc +univers +wisconsin +madison +west +dayton +street +madison +educ +comput +scienc +univers +wisconsin +madison +tech +hon +comput +scienc +indian +institut +technolog +kharagpur +india +juli +academ +interest +comput +architectur +multiscalar +kestrel +project +program +languag +compil +interest +cours +project +packag +java +download +packag +meet +batch +mate +relat +link +info +center +depart +rank +depart +india +relat +link +india +new +hindu +onlin +edit +random +link +look +kgpite +follow +toll +free +directori +will +world +cricket +page +access +time +sinc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..df200481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,522 @@ +guri +sohi +home +page +gurindar +sohi +sohi +wisc +associ +professor +comput +scienc +andelectr +comput +engin +address +educ +research +interest +summari +current +graduat +student +recent +talk +recent +public +recent +graduatesaddress +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usasohi +wisc +eduphon +department +offic +educ +comput +scienc +univers +illinoi +urbana +elect +engin +univers +illinoi +urbana +electr +electron +engin +birla +institut +technolog +scienc +pilani +india +research +interest +instruct +level +parallel +process +compil +architectur +share +memori +multiprocessor +memori +system +research +summari +current +research +focus +design +thehighest +perform +uniprocessor +current +gener +current +investig +architectur +circa +processor +plenti +transistor +availableon +chip +challeng +resourc +getth +highest +possibl +perform +execut +sequenti +program +target +sustain +execut +ofov +instruct +cycl +ordinari +numer +applic +program +research +group +investig +sever +issu +needto +resolv +goal +achiev +studi +character +thenatur +instruct +level +parallel +numericappl +program +order +understand +avail +parallel +andhow +could +exploit +bulk +group +research +effort +expend +continu +thedevelop +multiscalar +process +model +novel +paradigm +exploit +current +develop +multiscalar +compil +andcarri +detail +simul +studi +assessth +potenti +multiscalar +concept +current +graduat +student +todd +austin +scott +breach +andrea +moshovo +vijaykumarrec +talkswil +instruct +set +import +futur +processor +given +risc +symposium +held +watson +researchcent +yorktown +height +novemb +file +compress +postscript +gener +framemak +multiscalar +processor +gener +multiscalar +talk +given +sever +place +file +compress +postscript +gener +framemak +recent +publicationshigh +bandwidth +address +translat +multipl +issu +processor +austin +sohi +appear +inrd +annual +intern +symposium +comput +architectur +appendix +ofdetail +resultsi +also +avail +zero +cycl +load +microarchitectur +support +reduc +load +latencyt +austin +sohi +annual +intern +symposium +microarchitectur +micro +microarchitectur +superscalar +processorsj +smith +sohi +proceed +ieee +decemb +hardwar +mechan +dynam +reorder +memori +referencesm +franklin +sohi +appear +ieee +transact +comput +multiscalar +processor +sohi +breach +vijaykumar +intern +symposium +comput +architectur +streamlin +data +cach +access +fast +address +calcul +austin +pnevmatikato +sohi +intern +symposium +comput +architectur +anatomi +regist +file +multiscalar +processor +breach +vijaykumar +sohi +annual +intern +symposium +microarchitectur +micro +request +combin +multiprocessor +arbitrari +interconnect +network +lebeck +sohi +ieee +transact +parallel +distribut +system +effici +detect +pointer +arrai +access +error +austin +breach +sohi +sigplan +confer +program +languag +design +implement +guard +execut +branch +predict +dynam +processor +pnevmatikato +sohi +intern +symposium +comput +architectur +memori +system +goodman +sohi +handbook +electr +engin +press +control +flow +predict +dynam +processor +pnevmatikato +franklin +sohi +annual +intern +symposium +microarchitectur +micro +regist +traffic +analysi +streamlin +inter +oper +communicationin +fine +grain +parallel +processor +franklin +sohi +annual +intern +symposium +microarchitectur +micro +expand +split +window +paradigm +exploit +fine +grain +parallel +franklin +sohi +intern +symposium +comput +architectur +dynam +depend +analysi +ordinari +program +austin +sohi +intern +symposium +comput +architectur +effici +detect +pointer +arrai +access +errorst +austin +breach +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +decemb +guard +execut +branch +predict +dynam +processorsd +pnevmatikato +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +novemb +knapsack +zero +cycl +memori +hierarchi +componentt +austin +vijaykumar +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +novemb +tetra +evalu +serial +program +perform +fine +grain +parallel +processorst +austin +sohi +technic +report +comput +scienc +depart +univers +wisconsin +madison +juli +recent +gradstodd +austin +april +hardwar +softwar +mechan +reduc +load +latencydionisio +pnevmatikato +decemb +incorpor +guard +execut +exist +instruct +setsmanoj +franklin +decemb +multiscalar +architecturemark +friedman +januari +architectur +character +prolog +executionsriram +vajapeyam +decemb +instruct +level +character +crai +processormen +chow +chiang +septemb +memori +system +design +base +multiprocessor +last +updat +april diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..cb8489bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,183 @@ +solomon +home +page +marvin +solomon +professor +former +chair +chair +goodman +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +phone +solomon +wisc +research +interest +object +orient +databas +system +softwar +develop +support +environ +distribut +oper +system +comput +network +design +implement +program +languag +program +languag +theori +recent +publicationstoward +effect +effici +free +space +manag +proc +sigmod +conf +manag +data +june +mark +mcauliff +michael +carei +andmarvin +solomon +abstractpostscriptth +gmap +versatil +tool +physic +data +independ +proc +conf +larg +databas +septemb +odyssea +tsatalo +marvin +solomon +andyanni +ioannidi +abstractpostscriptexpand +version +appear +inth +vldb +journalv +april +abstractpostscriptshor +persist +applic +proc +sigmod +conf +manag +data +june +michael +carei +david +dewitt +michael +franklin +nanci +hall +mark +mcauliff +jeffrei +naughton +daniel +schuh +marvin +solomon +odyssea +tsatalo +seth +white +andmichael +zwillingavail +astech +report +overview +capitl +softwar +develop +environ +fourth +intern +workshop +softwar +configur +manag +paul +adam +andmarvin +solomon +avail +astech +report +updat +version +appear +lectur +note +comput +scienc +persist +object +logic +paul +adam +andmarvin +solomon +avail +astech +report +point +interest +graphic +interfac +room +program +built +us +java +home +page +spring +univ +wisconsin +comput +scienc +depart +shore +project +home +page +shore +project +photoalbum +todai +dilbert diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..3c22ba34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,10 @@ +sowmya +home +page +welcom +home +page +sowmya +subramanian +sowmya +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..06a6a8a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,91 @@ +shilpa +lawand +home +page +welcom +shilpa +lawand +home +pagei +graduat +student +depart +comput +scienc +univers +wisconsin +madison +person +stuffa +link +pastfor +info +schoolher +resum +html +ascii +comput +scienc +second +love +us +resours +stuff +want +place +syster +women +comput +scienc +stuff +relat +madisonsurf +madisonst +wisconsin +inform +serverth +hoofer +sail +clubowl +music +book +movi +java +shilpa +signatur +meet +first +lovesnowi +homepag +cool +linksher +iswher +finger +three +judg +panel +philadelphia +vote +constitut +follow +link +read +decis +page +access +time +sinc +june +send +comment +suggest +email +tossl +wisc +shilpal +wisc +thru +guest +formlast +modifi +juli diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..40f03605 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,49 @@ +jeremi +stenglein +home +page +jeremi +stenglein +graduat +student +comput +scienc +depart +univers +wisconsin +madison +west +dayton +street +madison +offic +comput +scienc +phone +mail +stenglei +wisc +teach +comput +scienc +section +gener +home +page +section +home +page +take +construct +compil +link +comput +scienc +depart +home +pageth +simpson +home +pageespn +sport +hotwir diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..f3771503 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,61 @@ +steve +reinhardt +home +page +steven +reinhardt +graduat +student +computerarchitectur +work +wisconsin +wind +tunnelgroup +advisor +david +wood +although +project +mark +hill +andjim +laru +often +feel +free +tell +mewhat +well +plan +finish +fall +join +faculti +ofth +univers +michigan +eec +depart +januari +interest +find +page +publicationsresearch +summari +email +stever +wisc +click +finger +phone +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +usalast +updat +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..fd89c6e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,74 @@ +john +strikwerda +home +page +john +strikwerda +professor +comput +scienc +john +strikwerdadepart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +wisconsin +email +strik +wisc +telephon +fall +teach +problem +solv +us +comput +begin +januari +assign +nation +scienc +foundat +year +click +inform +numer +analysi +qualifi +exam +research +interest +numer +analysi +comput +fluid +dynamicsmyoffici +depart +home +pageoth +stuff +field +museum +point +search +rate +home +page +inform +chicago +best +chicago +tribun +talk +radio +show +car +footballmi +kid +nathan +nathan +drew diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..c6cea69d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +subba +home +page +thing +enjoi +calvin +hobb +late +show +david +letterman +seinfeld +interest +prooocessor +histor +interest +paper +evalu +stream +buffer +secondari +cach +replac +decoupl +integ +execut +superscalar +processor +subbarao +cambridg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..72ca79f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,93 @@ +chiang +home +page +chiang +depart +univers +wisconsin +madisonoffic +stelephon +mail +suhui +wisc +educlick +send +emailoffic +hour +thur +page +still +construct +ta +fall +public +applic +characterist +limit +preemption +complet +parallel +processor +schedul +polici +rajesh +mansharamani +mari +vernon +proc +sigmetr +conf +measur +model +comput +system +nashvil +dynam +static +quantum +base +parallel +processor +alloc +mari +vernon +workshop +schedul +strategi +parallel +process +conjunct +ipp +april +search +engin +yahoo +sourc +resourc +bibliographi +world +wide +virtual +librari +subject +catalogu +link +relat +taiwan +taiwan +network +servic +sinanet +shop +magzin +new +job +calendar +seednet +vistor +guid +taiwan +academia +sinica +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..8cc1ff73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,253 @@ +david +sundaram +stukel +homepag +david +sundaram +stukel +page +upon +effronteri +push +hand +sever +patient +femor +arteri +blood +spurt +blind +anesthetist +hall +scream +browbeck +tri +knee +groin +manag +hamstr +scalpel +crawl +floor +stab +feet +leg +voilet +baboon +assist +woman +ever +care +damn +realli +wig +climb +tabl +pois +jump +browbeck +feet +stomp +cop +rush +william +burrough +nake +lunch +construct +page +catapult +reader +page +choos +page +index +brief +class +take +class +link +relat +comput +scienc +site +dedic +smart +cloth +also +steve +mann +page +link +view +current +see +wearabl +camera +site +link +inform +artifici +life +santa +institut +specif +link +project +call +tierra +thoma +recent +dilbert +strip +technic +comput +scienc +math +joke +somewher +link +philosoph +scientif +artist +natur +physic +conscious +surviv +research +laboratori +site +info +variou +destruct +show +organ +arcosanti +arcolog +site +outsid +phoenix +krishnamurti +foundat +site +tell +centuri +philosoph +link +variou +beat +writer +includ +pictur +site +fill +info +wait +link +variou +new +sourc +packer +new +scientist +onlin +regist +harass +mail +reward +dozen +interest +factoid +astound +friend +scientif +american +onlin +take +advantag +hypertext +addit +provid +select +current +articl +print +edit +scienc +new +publish +weekli +contain +smaller +current +articl +hindu +nation +newspap +india +onion +link +local +interest +madison +astronomi +depart +page +specif +washburn +observatori +public +view +univers +len +insignific +piec +histori +astronomi +madison +madison +weather +info +obtain +follow +site +webweath +nation +weather +servic +madison +channel +new +weather +channel +new +weather +link +interest +home +page +late +timothi +leari +link +numer +articl +written +note +optimist +noam +chomski +disinform +great +list +conspiraci +theori +buri +within +ultra +trendi +movi +review +back +madison +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..d2067ec1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,11 @@ +brian +swander +home +pagebrian +swander +think +offic +offic +hour +bookmark +mark diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..c546173b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,139 @@ +ariel +tamchesari +tamch +research +assistantemail +tamch +wisc +ariel +tamch +comput +scienc +depart +west +dayton +street +madison +typic +pose +angri +posei +organ +fall +oper +system +comput +scienc +colleg +park +offic +sresearch +paradyn +parallel +perform +toolsstatu +search +thesi +topic +els +interest +parallel +perform +toolsparallel +distribut +oper +systemsbluesth +simpsonsseinfeldskiingskinetkeyston +favorit +area +snowboard +joke +differ +snowboard +vacum +cleaner +dirt +attach +snowboard +greet +peopl +whoa +sorri +dude +differ +municip +bond +snowboard +municip +bond +eventu +matur +gener +incom +hate +countri +music +fortran +cool +link +yahooespncpu +infoskinetoth +stuff +talk +exokernel +oper +system +architectur +applic +level +resourc +manag +octob +paper +techniqu +tool +distribut +share +memori +perform +improv +spring +callaghan +talk +supercomput +interconnect +network +april +talk +zebra +stripe +network +file +system +need +structur +file +system +raid +paper +perform +block +wait +free +highli +concurr +object +asynchron +share +memori +multiprocessor +version +postscript +version +spring +paper +analysi +risc +instruct +enhanc +fall diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..93da0266 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,127 @@ +jeff +lampert +home +pagejeff +lampert +home +page +ricardo +montalban +voic +welcom +home +page +know +nota +pictur +least +good +still +look +foron +incrimin +doesn +make +look +like +aconvict +babi +pictur +high +school +yearbook +lasttim +show +someon +never +heard +cute +babi +someon +think +well +found +coupl +pictur +tick +threaten +turn +intoa +human +dispens +took +pictur +henc +pictur +befound +separ +page +click +anautograph +copi +sign +name +monitor +pictureappear +choos +link +weasel +seek +take +pace +turn +click +basic +factswho +person +last +night +academ +work +relatedwhat +class +take +work +dept +resum +entertainmentbook +movi +music +program +newsgroup +import +subjectsfriendsno +show +show +sick +theme +song +hobbi +club +organizationsgroup +plu +wish +inmi +favorit +linksugh +sound +like +song +sound +music +servo +look +juli +andrew +fire +crow +good +mstk +eclect +paraphenaliai +would +miscellan +straight +forward +tick +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..2b829d36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +todd +homepagetodd +homepagein +fall +teach +section +sinc +area +mathemat +program +plug +mathematicalprogram +page +contain +wealth +inform +mathematicalprogram +tmunson +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..e975541a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian +home +pagebrian +toonen +comput +scienc +departmentunivers +wisconsin +dayton +streetmadison +offic +cswhatev +chief +seattleth +ground +tipi +medit +life +itsmean +accept +kinship +creatur +acknowledgingun +univers +thing +infus +thetru +essenc +civil +luther +stand +bear +oglala +siouxlast +modifi diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..aa75f430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,19 @@ +thano +tsioli +home +page +site +netscap +enhanc +read +shouldconsid +upgrad +browser +latest +version +netscap +ifthat +option +page +home +page diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..8c282281 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,66 @@ +todd +turnidg +todd +dougla +turnidgeschoolcomput +scienc +departmentunivers +wisconsin +madison +dayton +madison +homemuppet +babylon +milton +madison +eyesightright +axi +left +axi +graduat +student +depart +comput +sciencesat +univers +wisconsin +madison +year +work +professorthoma +rep +studyingprogram +languag +teach +section +hold +mathematicsand +computersci +case +western +reserveunivers +locat +cleveland +ohio +origin +kent +ohio +myfamili +live +judg +compani +keep +click +enough +evid +awai +long +time +amus +shortcut +last +modifi +todd +turnidg +turnidg +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..bd221275 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,54 @@ +taxiao +wang +home +page +welcom +taxiao +wang +home +page +page +heavi +construct +click +finger +contact +inform +taxiao +wang +graduat +student +teach +assist +comput +scienc +depart +univers +wisconsin +madison +offic +bldg +dayton +street +madison +offic +phone +home +phone +mail +twang +wisc +visitor +number +sinc +home +page +visit +time +sinc +visitor +number +sinc +last +updat +june diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..b24cc52f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,16 @@ +shaft +home +pageuri +shaft +home +pageemail +wisc +eduinterest +diversionsstart +trek +meet +microsoft +start +trek +meet +window diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..1ff9c943 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,76 @@ +venkatesh +ganti +home +pagevenkatesh +ganti +vganti +wisc +graduat +studentoffic +comput +scienc +depart +dayton +madison +usaphon +note +page +construct +past +present +graduat +student +univers +wisconsin +madison +fall +earlier +undergradu +student +madra +india +nativ +kakinada +andhra +pradesh +info +asha +asha +basic +educ +asha +madison +india +click +know +india +godav +homepag +hostel +madra +godav +yearbook +hope +onlin +sometim +research +interest +databas +work +till +btech +project +real +time +databas +want +look +real +time +genesi +madison +group +homepag +last +updat +januari diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..3587ea90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,113 @@ +vijai +home +page +vijaykumar +vijai +wisc +profession +affili +comput +scienc +depart +univers +wisconsin +madison +contact +address +comput +scienc +depart +dayton +street +madison +phone +email +vijai +wisc +advisor +guri +sohi +project +multiscalar +project +educ +doctor +univers +wisconsin +madison +august +undergradu +birla +institut +technolog +scienc +pilani +india +research +compil +multiscalar +architectur +dissert +distribut +regist +file +design +anatomi +regist +file +multiscalar +processor +breach +vijaykumar +sohi +annual +intern +symposium +microarchitectur +micro +compil +regist +commun +regist +commun +strategi +multiscalar +architectur +breach +vijaykumar +sohi +submit +annual +intern +symposium +microarchitectur +micro +multiscalar +processor +multiscalar +processor +sohi +breach +vijaykumar +intern +symposium +comput +architectur +schedul +regist +commun +compil +regist +commun +multiscalar +architecturet +vijaykumar +sohi +go +work +memori +data +depend +predict +person +side diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..e9344bf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,101 @@ +john +watrou +home +pagejohn +watrou +watrou +wisc +comput +scienc +departmentunivers +wisconsin +madison +dayton +streetmadison +telephon +public +john +watrou +dimension +quantum +cellular +automata +proc +symp +foundat +comput +scienc +john +watrou +polynomi +time +algorithm +artin +whapl +approxim +theorem +number +theori +fourth +confer +canadiannumb +theori +associ +assort +link +quantum +comput +link +quantum +comput +archiv +stanford +quantum +inform +home +page +oxford +particl +beam +physic +laboratori +quantum +inform +page +ucla +laboratori +theoret +quantum +comput +montreal +lanl +preprint +bibliographi +comput +scienc +bibliographi +hypertext +bibliographi +project +hypatia +gener +refer +element +stylehypertext +webster +interfaceroget +thesauru +random +link +parasol +recordsplayst +linksweath +forecast +madisonth +isthmu +daili +pagemathemat +quotat +servermathematician +biographiesgeek +site diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..5e8d5506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,132 @@ +weiru +home +page +eiru +home +page +send +email +ppppleas +find +around +sometim +think +english +speaker +commit +asylum +verbal +insan +languag +peopl +recit +plai +plai +recit +ship +truck +send +cargo +ship +havenos +feet +smell +richard +leder +three +possibl +part +date +least +must +beoffer +entertain +food +affect +customari +begina +seri +date +great +deal +entertain +moder +amountof +food +merest +suggest +affect +amount +ofaffect +increas +entertain +reduc +proportion +affect +entertain +longer +call +date +circumst +food +omit +miss +manner +guid +excruciatingli +correct +behaviour +univers +peke +univers +friend +physic +depart +alumni +associ +atmadison +univers +world +littl +grei +cell +pictur +univers +game +entertain +late +show +david +letterman +show +univers +studio +wish +send +postcard +someon +movi +world +movi +review +favorit +hockei +player +steve +francai +dictionnairefrancai +anglai +dictionnair +softwar +relatif +lafrancophoni +test +degrammair +francais +french +lesson +weather +forecast +madison +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..310d8db4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,107 @@ +welcom +zhang +home +page +first +year +graduat +student +depart +hometown +shanghai +peopl +republ +china +educ +student +depart +comput +scienc +univers +wisconsin +madison +comput +scienc +jose +state +univers +jose +california +depart +comput +scienc +technolog +tsinghua +univers +beij +peopl +republ +chinaemail +weiz +wisc +eduwork +experiencecontractor +develop +variou +inform +manag +system +differ +platform +platform +includ +windowsnt +solari +technolog +us +includ +tuxedo +pathwai +softwar +design +tandem +comput +corpor +softwar +engin +sherpa +corpor +system +oper +nasa +am +research +center +hobbiesma +jiangbridg +card +game +tabl +tenni +pingpong +joggingth +ultim +challengesolv +mine +sweeper +expert +level +puzzl +within +second +without +cheat +quot +dayth +best +memori +manag +memori +manag +ackowledgementthi +home +page +written +us +framework +provid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..8f92cac5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,79 @@ +kent +wenger +home +page +welcom +kent +wenger +home +page +note +page +definit +still +construct +preparedfor +pothol +need +pictur +scan +kent +wengerassoci +researchercomput +scienc +departmentunivers +wisconsin +west +dayton +streetmadison +telephon +email +wenger +wisc +edufing +workth +main +project +work +arecod +cluster +data +provid +anddevis +data +explor +andvisu +come +good +acronym +importantpart +project +wouldn +agre +visualizationproduc +devis +softwar +peopl +work +yanni +ioannidi +miron +livnyraghu +ramakrishnanmor +inform +univers +wisconsin +madison +dbm +research +groupuw +madison +comput +scienc +home +pagewiscinfo +madison +home +page +personallinksimageslast +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..5fb2580c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,155 @@ +wisconsin +wind +tunnel +project +home +page +wisconsin +wind +tunnel +projectmost +futur +massiv +parallel +comput +built +fromworkst +like +node +program +high +level +parallellanguag +like +support +share +address +space +whichprocess +uniformli +refer +data +wisconsin +wind +tunnel +project +seek +develop +consensu +aboutth +middl +level +interfac +languag +compil +abovesystem +softwar +hardwar +first +propos +interfac +wascoop +share +memori +evolutionari +extens +toconvent +share +memori +softwar +hardwar +recent +havebeen +work +revolutionari +interfac +call +tempest +tempest +provid +mechan +allow +programm +compil +andprogram +librari +implement +messag +pass +transparentshar +memori +hybrid +combin +developingimplement +tempest +think +machin +cluster +ofworkst +wisconsincow +hypothet +hardwar +platform +approach +cowus +snoop +logic +implement +fpga +sram +collaboratingwith +wisconsin +paradyn +project +adapt +perform +tool +tempest +overviewand +annot +bibliographi +slide +overview +talk +novemb +slide +pageor +four +slide +page +complet +technic +paper +contributor +fund +sourc +origin +project +name +wisconsin +week +articl +paradyn +relat +project +wisconsin +comput +architectur +group +comput +scienc +departmentat +univers +wisconsin +world +wide +comput +architectur +inform +last +updat +juli +mark +hill +markhil +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..fded9407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,94 @@ +xuelin +home +page +felix +charact +creat +otto +messmer +first +base +anim +human +person +first +featur +felix +save +whichwa +shown +famou +star +rival +chaplin +keaton +princ +wale +pick +polo +team +mascot +pictur +accompani +charl +lindbergh +across +theatlant +statu +first +imag +successfulli +transmit +develop +televis +star +televis +seri +somehow +obtain +magic +trick +didn +oneev +seem +agre +whether +teeth +whisker +like +spend +time +make +film +televis +program +appear +newspap +comic +strip +advertis +hundr +product +make +thing +trick +remov +tail +ear +put +back +wish +could +finger +give +account +hairbal +keyboard +finger +keeper +instead +sui +vritabl +chat +pass +partout diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..db2ff6e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,608 @@ +yanni +ioannidisyanni +ioannidi +yanni +wisc +eduresearch +interestsdatabas +manag +system +scientif +databas +user +interfac +andinform +visual +complex +queri +optim +heterogen +databas +research +primarili +focus +area +databas +system +optim +complex +queri +databas +support +scientificdata +futur +databas +applic +pose +sever +challeng +toqueri +optim +complex +queri +ask +significantli +higher +thanin +tradit +system +number +altern +evalu +algorithm +much +highera +well +especi +parallel +attempt +tooptim +sever +valu +time +paramet +parametr +queryoptim +thu +number +altern +access +plan +process +querywil +extrem +larg +current +us +algorithmsfor +find +optimum +among +inadequ +research +investig +random +optim +algorithmsa +viabl +solut +problem +primarili +interest +simul +anneal +genet +algorithm +well +altern +take +advantag +special +propertiesof +queri +optim +also +look +complex +queri +schedul +problem +especiallythos +aris +parallel +multimedia +environ +error +propag +size +cost +estim +complex +queri +alsopart +studi +try +identifi +appropriateinform +must +maintain +databas +system +limit +thepropag +error +primarili +focus +identifi +properti +ofoptim +histogram +approxim +distribut +valu +inrel +attribut +comput +mode +investig +expect +part +manyexperi +variou +scientif +disciplin +futur +databas +gener +need +special +support +mani +aspectsthat +current +technolog +readi +provid +involv +develop +desktop +experi +managementenviron +help +scientist +throughout +life +cycl +theirexperiment +studi +primari +compon +system +databas +system +major +issu +work +address +visual +user +interfac +andsemant +heterogen +former +concentr +identifi +right +metaphor +arefor +repres +complex +databas +schema +queri +object +scientistsso +natur +also +investig +power +dynam +visual +queri +latter +concentr +develop +visual +tool +facilitatetransl +integr +differ +data +format +schema +although +issu +gener +aris +experimentalscientif +disciplin +effort +guid +need +specificproject +associ +particular +simul +basedperform +studi +comput +system +simul +base +model +plantgrowth +spectroscopi +sequenc +microscop +imag +recent +publicationsi +ioannidi +queri +optim +comput +survei +symposium +issueon +anniversari +march +garofalaki +ioannidi +schedul +issu +multimedia +queryoptim +comput +survei +symposium +issu +multimediasystem +decemb +ioannidi +ramakrishnan +contain +conjunct +queri +beyondrel +set +transact +databas +system +tod +septemb +haber +ioannidi +livni +foundat +visual +metaphor +forschema +displai +journal +intellig +inform +system +juli +special +issu +visual +inform +system +ioannidi +tsangari +design +implement +performanceevalu +bermuda +ieee +transact +knowledg +data +engin +tkde +februari +miller +ioannidi +ramakrishnan +translat +integr +ofheterogen +schema +bridg +theori +practic +inform +system +januari +ioannidi +christodoulaki +optim +histogram +limitingworst +case +error +propag +size +join +result +transact +databas +system +tod +decemb +ioannidi +ramakrishnan +winger +transit +closur +algorithmsbas +graph +travers +transact +ondatabas +system +tod +septemb +ioannidi +dynam +inform +visual +sigmod +record +decemb +ioannidi +poosala +histogram +base +solut +divers +databaseestim +problem +ieee +data +engin +septemb +ioannidi +livni +gupta +ponnekanti +desktop +experimentmanag +environ +proc +intern +vldb +confer +bombai +india +septemb +poosala +ioannidi +estim +queri +result +distribut +itsappl +parallel +join +load +balanc +proc +intern +vldbconfer +bombai +india +septemb +anjur +ioannidi +livni +frog +turtl +visual +bridgesbetween +file +object +orient +data +proc +intern +conferenceon +scientif +statist +databas +manag +stockholm +sweden +june +garofalaki +ioannidi +multi +dimension +resourc +schedul +forparallel +queri +proc +intern +sigmod +confer +montreal +canada +poosala +ioannidi +haa +shekita +improv +histogram +forselect +estim +rang +predic +proc +internationalacm +sigmod +confer +montreal +canada +ioannidi +livni +haber +user +orient +visual +layoutat +multipl +granular +proc +intern +workshop +advancedvisu +interfac +gubbio +itali +haber +ioannidi +livni +opossum +desk +schema +managementthrough +customiz +visual +proc +intern +vldbconfer +zurich +switzerland +septemb +ioannidi +poosala +balanc +histogram +optim +practicalityfor +queri +result +size +estim +proc +intern +sigmodconfer +jose +tsatalo +solomon +ioannidi +gmap +versatil +tool +forphys +data +independ +proc +intern +vldbconfer +santiago +chile +septemb +tsatalo +ioannidi +unifi +framework +index +databasesystem +proc +intern +dexa +confer +athen +greec +septemb +ioannidi +lashkari +incomplet +path +express +theirdisambigu +proc +intern +sigmod +confer +minneapoli +haber +ioannidi +livni +opossum +flexibl +schemavisu +edit +tool +proc +confer +boston +april +miller +ioannidi +ramakrishnan +translat +integr +ofheterogen +schema +bridg +theori +practic +proc +intern +edbt +confer +cambridg +england +march +ioannidi +univers +serial +histogram +proc +internationalvldb +confer +dublin +ireland +august +miller +ioannidi +ramakrishnan +inform +capacityin +schema +integr +translat +proc +intern +vldbconfer +dublin +ireland +august +wiener +ioannidi +moos +scientist +withdata +manag +problem +proc +intern +workshop +ondatabas +program +languag +york +august diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..2f283936 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin +zhongbin +homepag diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..98736a68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,85 @@ +yinng +home +pageindexofyinongwei +spagehi +welcom +homepag +pleas +good +look +person +info +especi +employ +give +alsolink +classmat +cours +take +good +time +telephon +work +home +address +offic +comp +stat +bldg +madison +home +univ +person +inforesumehobbiestravel +usathi +collect +pictur +took +travel +articl +wrote +trip +chicago +seattl +cours +pointersr +time +computingmacin +learningpattern +recognitioncomputatin +geometrydatabasevisionacadem +diarythi +diari +everi +month +sometim +amaz +mani +littl +read +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel +linksmi +beida +classmatespek +univers +alumni +home +page +oversea +chines +organ +madison +ciumi +bookmarkcom +press +client +support +send +comment +visitor +number +last +access +last +modifi +yinong diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..acbc20a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,52 @@ +matt +home +pageuntil +around +updat +basic +inform +offic +matthew +zeidenbergcent +wisconsin +strategi +observatori +drive +room +madison +voic +home +gilson +madison +email +zeiden +wisc +eduzeidenb +wisc +eduwhen +california +parent +hous +coho +huntington +beach +beauti +convuls +breton +nadja +beaut +sera +convuls +sera +give +food +poor +call +saint +whyth +poor +food +call +communist +helder +camara diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..b45a5712 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,308 @@ +tian +zhang +home +page +tian +zhang +gener +inform +student +research +assistantadvisor +prof +raghu +ramakrishnan +prof +miron +livni +joint +major +concentr +databas +artifici +intellig +compilerminor +financi +invest +bankingoffic +room +comput +scienc +dept +univ +wisconsin +madison +madison +wisconsin +mail +zhang +wisc +eduoffic +telephon +home +telephon +depart +research +intereststher +grow +need +exploratori +analysi +larg +dataset +discov +us +pattern +data +mine +territori +develop +purpos +interest +design +effici +data +mine +algorithm +ortool +larg +databas +integr +techniqu +databas +artifici +intellig +statist +thesi +topic +cluster +densityanalysi +larg +dataset +given +larg +multi +dimension +dataset +limit +amount +resourc +run +time +memori +design +implement +algorithm +effici +accur +identifi +spars +crowd +region +cluster +analysi +estim +densiti +function +overal +data +distribut +densiti +analysi +import +practic +branch +data +mine +appli +mani +domain +dataclassif +imag +compress +pattern +recognit +recent +research +project +birch +effici +data +cluster +densiti +analysi +system +larg +databas +select +public +data +cluster +system +birch +applic +tian +zhang +raghu +ramakrishnan +miron +livni +submit +data +mine +knowledg +discoveri +journal +june +birch +effici +data +cluster +method +larg +databas +tian +zhang +raghu +ramakrishnan +miron +livni +proc +sigmod +conf +data +manag +june +canada +interact +classif +larg +dataset +birch +tian +zhang +raghu +ramakrishnan +miron +livni +proc +workshop +research +issu +data +mine +knowledgediscoveri +cooper +sigmod +june +canada +fast +densiti +probabl +estim +us +kernel +method +larg +databas +miron +livni +raghu +ramakrishnan +tian +zhang +technic +report +juli +motion +plan +multi +joint +robot +topolog +dimensionreduct +method +zhang +ling +zhang +tian +zhang +proc +joint +confer +artifici +intellig +ijcai +findpath +algorithm +manipul +finit +divis +configur +space +zhang +jianwei +zhang +ling +zhang +tian +zhang +robot +manufactur +recent +trend +research +educ +applic +proc +symposium +robot +andmanufactur +research +educ +applic +motion +plan +robot +topolog +dimens +reduct +method +zhang +tian +zhang +jianwei +zhang +ling +zhang +journal +comput +scienc +technolog +find +collis +free +path +mobil +robot +tian +zhang +zhang +proc +symposium +young +comput +profession +beij +relev +link +technic +document +journal +confer +organ +beij +china +interest +last +updat diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..8abc041b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,90 @@ +yihong +home +page +zhao +yihong +zhao +wisc +research +assist +depart +comput +sciencesunivers +wisconsin +madison +west +dayton +streetmadison +advis +prof +jeff +naughton +research +interest +parallel +object +relat +dbm +line +analyt +process +olap +data +mine +financi +data +dbm +benchmark +educationb +univers +north +carolina +chapel +hillm +madison +fall +research +relat +site +wiscosin +group +sigmod +data +mine +maryland +datamin +microstrategi +rolap +arbor +molap +stock +financi +site +lombard +graph +server +pathfind +server +kiwi +club +server +daili +new +site +pathfind +todai +monei +daili +chines +taiwan +new +search +engin +lyco +excit +yahoo +surf +ters +detail +comment +pgmo diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..22befd72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,27 @@ +home +page +still +construct +wang +homepag +offer +inform +home +address +johnson +madison +home +phone +offic +address +dayton +street +madison +offic +phone +email +address +wisc +zhewang +student +wisc diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..0bf30ed1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,139 @@ +zhichen +home +page +zhichen +depart +comput +scienc +dayton +madison +offic +phone +research +assist +advisor +professor +jame +larusprofessor +barton +millerawardbest +paper +award +intern +confer +supercomput +press +juli +research +interest +area +program +languag +perform +issu +parallel +anddistribut +system +recent +studi +techniqu +detect +eliminateperform +bottleneck +distribut +share +memori +system +combin +paradyn +perform +toolwith +blizzard +wisconsinwind +tunnel +think +machin +andth +cluster +workstat +recent +public +field +interestprogram +languag +environ +tool +parallel +distribut +comput +network +comput +parallel +distribut +oper +system +comput +architectur +perform +evalu +benchmark +place +studi +work +high +perform +comput +softwar +laboratori +univers +texa +antonio +studi +publish +area +ofparallel +perform +predict +model +simul +comput +scienc +departmentat +fudan +univers +particip +sever +nation +project +china +area +softwar +develop +environ +high +levelprogram +languag +object +orient +technolog +andimcrement +compil +techniqu +click +postscript +version +html +version +interest +link +asplo +programjourn +confer +compil +program +languag +researchchines +novel +friend +fudan +java diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..9afd5f7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,29 @@ +zhang +home +page +hello +name +zhang +pictur +taken +invit +supper +theth +restaur +tsinghua +univers +chen +weihai +wang +tong +univers +wisconsin +madison +depart +comput +scienc +west +dayton +street +madison +wisconsin diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..54ce4c39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +krzysztof +zmudzinskikrzysztof +zmudzinskispin +inform +student +inc +pictur +inform +poland +pole +thank +stop +visitor +number diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..acfc3630 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,188 @@ +saluja +kewal +colleg +engineeringunivers +wisconsin +madison +kewal +salujaprofessor +engin +hall +engin +drivemadison +mail +saluja +engr +wisc +eduportrait +jpgdepartmentselectr +comput +engineeringcomput +scienc +educ +univers +iowa +research +interestsdesign +testabl +comput +architectur +data +compress +integr +circuit +vlsi +fault +toler +comput +gener +area +research +interest +test +gener +testableand +reliabl +design +digit +system +carri +research +thisarea +make +extens +vlsi +analysi +tool +theresearch +involv +model +fault +design +digit +circuit +testgener +design +modif +enhanc +testabl +built +inself +test +design +fabric +circuit +test +applic +investig +techniqu +make +test +gener +andfault +simul +process +effici +combin +andsequenti +circuit +data +compress +compact +methodsapplic +design +testabl +built +self +testenviron +investig +area +built +self +test +concentr +regularstructur +programm +logic +arrai +ram +areinvestig +self +test +algorithm +implement +inhardwar +littl +perform +area +penalti +anoth +projectw +investig +wai +built +self +test +hardwar +test +asystem +perform +normal +oper +goal +thatth +system +test +continu +oper +littl +noimpact +system +perform +much +work +perform +us +facil +vlsi +digitalsystem +laboratori +laboratori +hous +number +station +withcolor +monitor +termin +program +design +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +fridai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..6bdd6292 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,213 @@ +duffi +neil +colleg +engineeringunivers +wisconsin +madison +neil +duffi +professor +engin +research +build +engin +drivemadison +mail +duffi +engr +wisc +eduportrait +departmentsmechan +engineeringeducationb +univers +wisconsin +madisonm +univers +wisconsin +madisonphd +univers +wisconsin +madisonresearch +interestsrobot +comput +control +manufactur +system +precis +engr +comput +integr +manufactur +micromechanismscent +consortiamanufactur +system +engin +programwisconsin +center +space +autom +roboticsprofessor +duffi +research +manufactur +system +involv +integr +sensor +actuat +comput +data +base +advanc +autom +product +system +develop +control +self +guid +inspect +machin +weld +robot +high +perform +materi +handl +system +autom +finish +system +mold +product +rework +studi +highli +distribut +hierarch +system +control +architectur +hope +reduc +cost +complex +larg +scale +comput +control +manufactur +system +increas +flexibl +fault +toler +duffi +construct +sever +experiment +manufactur +system +incorpor +real +time +fulli +distribut +schedul +optim +control +system +develop +theori +explain +properti +perform +system +duffi +associ +director +wisconsin +center +space +autom +robot +research +nasa +fund +center +emphas +autom +agricultur +system +sensor +tactil +feedback +human +oper +telerobot +system +method +perform +evalu +well +human +factor +research +sensori +feedback +fatigu +develop +telerobot +test +experiment +work +carri +duffi +work +close +manufactur +aerospac +industri +teach +cours +manufactur +system +automat +control +comput +control +author +comput +control +machin +process +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +tuesdai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..e12db07f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,318 @@ +shapiro +vadim +colleg +engineeringunivers +wisconsin +madison +vadim +shapiro +assist +professor +mechan +engin +univers +avenuemadison +mail +vshapiro +engr +wisc +eduportrait +jpgurl +http +wisc +departmentscomput +sciencemechan +engineeringeducationba +york +universitym +univers +california +angelesm +cornel +universityphd +cornel +univeristyresearch +interestscomput +aid +design +manufactur +appli +comput +geometri +geometr +solid +model +physic +model +analysi +simul +design +product +automationcent +consortiamathemat +comput +engin +graduat +programmanufactur +system +engin +programspati +autom +laboratoryselect +award +honorsn +scienc +foundat +career +award +gener +motor +fellow +select +public +mainten +geometr +represent +space +decomposit +intern +journal +comput +geometri +applic +chain +model +physic +behavior +engin +analysi +design +research +engin +design +april +palmer +real +function +represent +rigid +solid +comput +aid +geometr +design +april +separ +boundari +convers +transact +graphic +januari +vossler +professor +shapiro +research +interest +center +relationship +betweengeometri +physic +phenomena +mechan +artifact +bemodel +repres +analyz +manipul +design +manufacturedbas +comput +represent +algorithm +specif +ongo +project +includ +geometr +model +abil +creat +convert +maintain +consist +ofdistinct +represent +mechan +part +major +technologicalbarri +undermin +us +reliabl +commercialgeometr +model +system +current +research +effort +focu +eliminatingambigu +commun +engin +specif +formal +model +ofparametr +famili +mechan +part +investig +novel +methodsand +comput +techniqu +support +design +manufactur +mechan +design +todai +mechan +form +function +fabric +process +cannot +bedescrib +combinatori +term +discret +simpl +interactingprimit +appar +lack +combinatori +structur +amajor +roadblock +competit +design +manufactur +mechan +system +collabor +industri +present +research +deal +withtheoret +practic +comput +aspect +mechan +designand +seek +establish +formal +basi +make +mechan +design +andmanufactur +part +systemat +competit +smoothintegr +mechan +form +model +engin +activ +physic +model +geometr +model +contain +part +inform +need +captur +thedesir +physic +behavior +artifact +process +us +tomanufactur +recent +studi +algebra +topolog +model +call +chain +model +physic +behavior +suggest +possibl +tounifi +physic +geometr +model +thu +facilit +develop +ofnew +comput +aid +engin +tool +current +investig +theseand +model +physic +behavior +develop +engin +languagesand +comput +algorithm +systemat +specif +model +simul +analysi +physicalobject +system +dept +center +consortia +servic +fountain +index +search +mail +credit +help +last +modifi +thursdai +cdtthi +page +best +view +browser +support +tabl +photograph +colleg +engin +address +comment +webmast +engr +wisc +eduupd +profil diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..a1ba50e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,144 @@ +autom +theorem +prove +groupautom +theorem +prove +groupth +autom +theorem +prove +group +part +comput +scienc +mathemat +depart +univers +texa +ataustin +produc +method +system +intend +prove +theorem +first +higher +order +logic +intent +appli +systemsand +method +problem +primarili +mathemat +also +computersci +technolog +herei +index +electron +avail +tech +report +site +tech +report +seri +continu +current +techreport +ad +tech +reportseri +present +grouplarri +hinesmarti +mayberrybenjamin +shultsalumniprevi +student +woodi +bledso +previou +student +robert +boyer +incomplet +list +other +relat +groupth +late +woodi +bledso +comput +scienc +faculti +profil +robertboyerj +strother +moorethi +incomplet +list +past +visitor +collaboratorswhat +done +implyth +natur +deduct +proverstrivelarri +hine +first +order +logic +inequ +prover +struvelarri +hine +theori +prover +chou +geometri +proverand +variou +improv +theretoinclud +mcphee +feng +prover +theoryimplement +descript +proof +hein +borel +theoremprecondit +proverbledso +prover +analog +proof +hein +borel +theoremnqthmboy +andmoor +prover +develop +clinc +incomplet +list +iprshult +knowledg +us +prover +mathemat +incomplet +list +relatedlinksdo +feedback +want +inform +contact +benjamin +shult diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..e834f132 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +document +moveddocu +movedthi +document +perman +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..e834f132 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/ps/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +document +moveddocu +movedthi +document +perman +move diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..81344ce2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +solut 1 +homework 0 +upson 0 +hall 0 +phone 0 +mail 0 +cornel 0 +offic 0 +hour 0 +oper 0 +assign 0 +prelim 0 +wednesdai 0 +thursdai 0 +budiu 0 +home 0 +pagec 0 +system 0 +program 0 +systemsc 0 +practicum 0 +systemkenneth 0 +birmanc 0 +new 0 +groupcours 0 +syllabuslectur 0 +note 0 +unix 0 +filesystem 0 +structur 0 +link 0 +static 0 +dynam 0 +taslili 0 +lili 0 +fridai 0 +cheng 0 +huang 0 +ychuang 0 +tuesdai 0 +mihai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..37368957 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,45 @@ +oper 1 +system 0 +chip 0 +last 0 +chang 0 +handout 0 +format 0 +consol 0 +hoca 0 +home 0 +pagec 0 +program 0 +systemsc 0 +practicum 0 +systemsselect 0 +displai 0 +symbol 0 +correspond 0 +postcriptdocu 0 +hand 0 +phase 0 +hocacours 0 +inform 0 +cours 0 +schedul 0 +group 0 +postcript 0 +penn 0 +broccoli 0 +question 0 +answer 0 +comput 0 +window 0 +exampl 0 +us 0 +tutori 0 +principl 0 +configur 0 +fileth 0 +systemth 0 +specif 0 +page 0 +maintain 0 +lorenzo 0 +alvisi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..c4475db1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,240 @@ +homework 1 +chapter 0 +read 0 +databas 0 +tuesdai 0 +relat 0 +februari 0 +april 0 +inform 0 +cours 0 +thursdai 0 +grade 0 +model 0 +cover 0 +retriev 0 +page 0 +march 0 +system 0 +cornel 0 +availablethursdai 0 +relationship 0 +design 0 +class 0 +group 0 +name 0 +duetuesdai 0 +home 0 +entiti 0 +index 0 +queri 0 +crash 0 +recoveri 0 +concurr 0 +control 0 +part 0 +vector 0 +instructor 0 +worth 0 +final 0 +member 0 +januari 0 +fundament 0 +follow 0 +file 0 +space 0 +term 0 +time 0 +structur 0 +solut 0 +korth 0 +silberschatz 0 +second 0 +edit 0 +requir 0 +reserv 0 +upson 0 +offic 0 +hour 0 +appoint 0 +send 0 +mail 0 +aguilera 0 +amith 0 +work 0 +thegroup 0 +avail 0 +week 0 +last 0 +return 0 +first 0 +regrad 0 +introduct 0 +calculu 0 +optim 0 +prelim 0 +transact 0 +process 0 +retrievalthursdai 0 +advanc 0 +pagec 0 +retrievaldepart 0 +computersci 0 +universityspr 0 +gradeshav 0 +nice 0 +summer 0 +introductionthi 0 +three 0 +credit 0 +databasesystem 0 +roughli 0 +twothird 0 +third 0 +topic 0 +systemsinclud 0 +data 0 +physic 0 +organ 0 +hash 0 +languag 0 +queryoptim 0 +transactionprocess 0 +deal 0 +find 0 +usefulinform 0 +larg 0 +textual 0 +willcov 0 +invert 0 +smartsystem 0 +similar 0 +weight 0 +rank 0 +relevancefeedback 0 +phrase 0 +gener 0 +thesaurusconstruct 0 +evalu 0 +permit 0 +automatictext 0 +summar 0 +link 0 +materi 0 +note 0 +placetuesdai 0 +minut 0 +thurston 0 +prerequisitesc 0 +recommend 0 +booksdatabas 0 +concept 0 +mcgrawhil 0 +elmasri 0 +andnavath 0 +benjamin 0 +cum 0 +principl 0 +knowledg 0 +base 0 +byullman 0 +comput 0 +scienc 0 +press 0 +photocopiedmateri 0 +salton 0 +book 0 +research 0 +paper 0 +amitsingh 0 +singhal 0 +teach 0 +assist 0 +sophia 0 +georgiakaki 0 +wednesdai 0 +marco 0 +forc 0 +yamasani 0 +officehour 0 +gradingexam 0 +midterm 0 +ofyour 0 +exam 0 +yourfin 0 +five 0 +semest 0 +policiesy 0 +peopl 0 +clearli 0 +indic 0 +entir 0 +receiv 0 +samegrad 0 +tuesdayand 0 +along 0 +guid 0 +ofcours 0 +date 0 +throughth 0 +late 0 +accept 0 +illeg 0 +hard 0 +even 0 +though 0 +iti 0 +encourag 0 +type 0 +latexif 0 +possibl 0 +alreadi 0 +know 0 +goodopportun 0 +learn 0 +latex 0 +submissionpleas 0 +attach 0 +sort 0 +alphabet 0 +also 0 +write 0 +exampl 0 +bill 0 +clinton 0 +dole 0 +ross 0 +perot 0 +homeworksgrad 0 +sortedalphabet 0 +thecov 0 +list 0 +pagefollow 0 +want 0 +pleas 0 +sendmail 0 +policyal 0 +request 0 +submit 0 +inwrit 0 +within 0 +back 0 +schedulethi 0 +tent 0 +schedul 0 +referto 0 +algebra 0 +tupl 0 +domain 0 +integr 0 +constraint 0 +spring 0 +break 0 +modelhomework 0 +weightingthursdai 0 +indexinghomework 0 +evaluationtuesdai 0 +relev 0 +feedbackthursdai 0 +document 0 +clusteringhomework 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..7026292a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,64 @@ +file 1 +page 0 +inform 0 +postscript 0 +enscript 0 +pfile 0 +home 0 +offic 0 +hour 0 +recit 0 +stuff 0 +convert 0 +text 0 +print 0 +look 0 +admin 0 +handout 0 +incl 0 +lectur 0 +note 0 +assign 0 +ethic 0 +profession 0 +social 0 +respons 0 +mayb 0 +electron 0 +submiss 0 +procedur 0 +group 0 +perform 0 +evalu 0 +resourc 0 +quot 0 +collect 0 +joke 0 +start 0 +submit 0 +sumedh 0 +break 0 +new 0 +misc 0 +peopl 0 +ask 0 +simpl 0 +unix 0 +program 0 +suggest 0 +wai 0 +first 0 +good 0 +sourc 0 +code 0 +second 0 +give 0 +nice 0 +header 0 +gener 0 +leav 0 +send 0 +printer 0 +come 0 +last 0 +modif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..1d05c0be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,59 @@ +project 1 +distribut 0 +cours 0 +system 0 +work 0 +inform 0 +practicum 0 +practic 0 +design 0 +student 0 +rang 0 +systemsor 0 +hand 0 +dirti 0 +real 0 +aspect 0 +studi 0 +implement 0 +signific 0 +also 0 +take 0 +offersa 0 +varieti 0 +simpl 0 +internetworkingto 0 +complex 0 +teamsof 0 +person 0 +choos 0 +interest 0 +theywil 0 +trough 0 +semest 0 +credit 0 +hour 0 +earn 0 +depend 0 +size 0 +complexityof 0 +develop 0 +us 0 +offcial 0 +interact 0 +pageslink 0 +page 0 +find 0 +basic 0 +instruct 0 +descript 0 +plan 0 +progress 0 +report 0 +final 0 +present 0 +tabl 0 +contentspag 0 +comment 0 +werner 0 +vogel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..710e58a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,94 @@ +avail 1 +problem 1 +solut 1 +cours 0 +page 0 +fall 0 +professor 0 +saluja 0 +sorin 0 +part 0 +mentor 0 +contain 0 +link 0 +engin 0 +kewal 0 +outlin 0 +project 0 +help 0 +tool 0 +refer 0 +email 0 +home 0 +intro 0 +comput 0 +architectur 0 +note 0 +inform 0 +electr 0 +andcomput 0 +depart 0 +univers 0 +wisconsin 0 +madison 0 +materi 0 +intend 0 +sole 0 +studentsenrol 0 +semest 0 +prof 0 +generalinform 0 +conduct 0 +midtermsyllabu 0 +midtermi 0 +specif 0 +homework 0 +assign 0 +valid 0 +theproject 0 +need 0 +graphic 0 +caeworkst 0 +pleas 0 +duedat 0 +follow 0 +literatur 0 +assist 0 +anyon 0 +whomai 0 +manual 0 +onlin 0 +throughbold_brows 0 +check 0 +gettingstart 0 +design 0 +architect 0 +train 0 +workbook 0 +get 0 +start 0 +quicksim 0 +trainingworkbook 0 +exersis 0 +addition 0 +thesedocu 0 +document 0 +click 0 +send 0 +surf 0 +homepag 0 +announc 0 +new 0 +group 0 +wiscinfo 0 +gopher 0 +site 0 +uwengin 0 +server 0 +file 0 +last 0 +modifi 0 +septemb 0 +pmcst 0 +question 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..e55d9034 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,43 @@ +info 1 +homework 0 +page 0 +cours 0 +handout 0 +syllabu 0 +exam 0 +grade 0 +tabl 0 +homepag 0 +fall 0 +run 0 +netscap 0 +click 0 +herelink 0 +individu 0 +frame 0 +gener 0 +stuff 0 +section 0 +offic 0 +hour 0 +motw 0 +download 0 +class 0 +requir 0 +criteria 0 +schedul 0 +link 0 +refer 0 +goofi 0 +stuffnot 0 +preced 0 +contain 0 +browser 0 +abl 0 +handl 0 +pleas 0 +email 0 +cornel 0 +edupag 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..0638e96f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,43 @@ +interpret 1 +cours 1 +materi 1 +comput 0 +access 0 +dylan 0 +window 0 +chang 0 +home 0 +pagec 0 +structur 0 +program 0 +scienc 0 +depart 0 +cornel 0 +univers 0 +fall 0 +requir 0 +user 0 +password 0 +request 0 +attempt 0 +info 0 +note 0 +browser 0 +includ 0 +netscap 0 +correctli 0 +check 0 +java 0 +class 0 +file 0 +thu 0 +noodll 0 +inconsist 0 +behavior 0 +work 0 +parter 0 +link 0 +directori 0 +partnerjoin 0 +util 0 +announc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..65e08cce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,359 @@ +cours 1 +assign 0 +problem 0 +comput 0 +program 0 +object 0 +set 0 +dylan 0 +site 0 +orient 0 +time 0 +exam 0 +data 0 +function 0 +languag 0 +evalu 0 +provid 0 +consult 0 +tuesdai 0 +work 0 +gener 0 +java 0 +us 0 +question 0 +lectur 0 +hour 0 +public 0 +avail 0 +late 0 +scienc 0 +cover 0 +broad 0 +abstract 0 +recurs 0 +topic 0 +student 0 +think 0 +reach 0 +best 0 +staff 0 +materi 0 +order 0 +ugrad 0 +schedul 0 +browser 0 +machin 0 +make 0 +final 0 +accept 0 +togeth 0 +date 0 +list 0 +introductori 0 +techniqu 0 +includ 0 +correct 0 +well 0 +rangeof 0 +take 0 +skill 0 +post 0 +cornel 0 +current 0 +conot 0 +handout 0 +need 0 +request 0 +user 0 +idand 0 +password 0 +system 0 +toth 0 +mondai 0 +also 0 +huttenloch 0 +upson 0 +tobia 0 +mayr 0 +hamblin 0 +szewczyk 0 +voskuhl 0 +thursdai 0 +wednesdai 0 +help 0 +even 0 +thu 0 +justin 0 +compil 0 +standalon 0 +version 0 +sure 0 +grade 0 +person 0 +someon 0 +must 0 +submit 0 +name 0 +facil 0 +exampl 0 +prelim 0 +model 0 +procedur 0 +process 0 +oper 0 +state 0 +mutabl 0 +queue 0 +stream 0 +infinit 0 +fall 0 +informationaugust 0 +rang 0 +computersci 0 +concept 0 +pattern 0 +match 0 +useth 0 +dynam 0 +developedat 0 +appl 0 +suit 0 +courseabout 0 +happen 0 +notationthat 0 +chosen 0 +write 0 +major 0 +goal 0 +ofth 0 +teach 0 +clearli 0 +programsand 0 +toolbox 0 +modern 0 +programmingtechniqu 0 +applic 0 +often 0 +wonder 0 +whether 0 +takec 0 +focus 0 +orientedlanguag 0 +wherea 0 +exposur 0 +number 0 +programmingparadigm 0 +imperativeprogram 0 +good 0 +background 0 +goodform 0 +mathemat 0 +physic 0 +probablytak 0 +transfer 0 +either 0 +direct 0 +encourag 0 +first 0 +week 0 +questionsor 0 +comment 0 +http 0 +info 0 +contain 0 +run 0 +serverwhich 0 +allow 0 +answersa 0 +annot 0 +access 0 +simpli 0 +home 0 +page 0 +follow 0 +instruct 0 +creat 0 +thisweek 0 +send 0 +email 0 +edubut 0 +ask 0 +aboutproblem 0 +professor 0 +upsonjam 0 +tarobert 0 +tajustin 0 +taandra 0 +ferencz 0 +melissa 0 +consultantwhen 0 +meetlectur 0 +kimbal 0 +andrecit 0 +recitationsexpand 0 +opportunityto 0 +held 0 +midnight 0 +eachproblem 0 +setsdu 0 +sundai 0 +mondayeven 0 +consultinghour 0 +inth 0 +offic 0 +jame 0 +robert 0 +voskuhltba 0 +materialsther 0 +textbook 0 +handoutsand 0 +note 0 +hardcopi 0 +andon 0 +interpret 0 +free 0 +develop 0 +class 0 +implementedin 0 +capabl 0 +chang 0 +almost 0 +weekli 0 +netscap 0 +window 0 +borland 0 +downloadonto 0 +want 0 +word 0 +warn 0 +download 0 +ontoyour 0 +recentvers 0 +check 0 +requirementsstud 0 +respons 0 +read 0 +recit 0 +therewil 0 +preliminari 0 +given 0 +combinationof 0 +written 0 +exercis 0 +gradeswil 0 +base 0 +combin 0 +score 0 +account 0 +approxim 0 +half 0 +thetot 0 +willgener 0 +immedi 0 +return 0 +followingclass 0 +period 0 +complet 0 +earli 0 +andth 0 +sittingdown 0 +matter 0 +mani 0 +long 0 +sink 0 +beforesit 0 +polici 0 +joint 0 +workmuch 0 +learn 0 +come 0 +programmingproblem 0 +jointli 0 +peopl 0 +howev 0 +youwork 0 +singl 0 +jointassign 0 +circumstancesmai 0 +hand 0 +done 0 +els 0 +yourown 0 +doubt 0 +credit 0 +yougot 0 +would 0 +amaz 0 +easi 0 +tell 0 +whenpeopl 0 +pleas 0 +lifeunpleas 0 +break 0 +rule 0 +facilitiescit 0 +variou 0 +colleg 0 +campu 0 +macintosh 0 +andpc 0 +on 0 +depart 0 +thiscours 0 +upsonmac 0 +datesal 0 +electron 0 +server 0 +mondaynight 0 +submityour 0 +solut 0 +outlin 0 +studi 0 +introduct 0 +substitut 0 +iter 0 +induct 0 +higher 0 +argument 0 +valu 0 +analysi 0 +algorithm 0 +growth 0 +structur 0 +contract 0 +implement 0 +hierarch 0 +tree 0 +quotat 0 +reason 0 +symbol 0 +differenti 0 +extend 0 +type 0 +dispatch 0 +polynomi 0 +arithmet 0 +environ 0 +local 0 +variabl 0 +inherit 0 +multimethod 0 +stack 0 +heap 0 +heapsort 0 +prioriti 0 +metacircular 0 +variat 0 +express 0 +optim 0 +nonloc 0 +exit 0 +catch 0 +throw 0 +garbag 0 +collect 0 +illus 0 +memori 0 +random 0 +quicksort 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..adb62bee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,88 @@ +cours 1 +materi 0 +inform 0 +homework 0 +conot 0 +page 0 +includ 0 +lectur 0 +project 0 +help 0 +annot 0 +allow 0 +send 0 +email 0 +note 0 +section 0 +date 0 +pleas 0 +process 0 +registr 0 +home 0 +introduct 0 +digit 0 +system 0 +comput 0 +organizationthorsten 0 +eickenfal 0 +kimbal 0 +btopic 0 +represent 0 +machin 0 +assembl 0 +languag 0 +processor 0 +organ 0 +interrupt 0 +memori 0 +hierarchi 0 +combinatori 0 +sequenti 0 +circuit 0 +data 0 +path 0 +control 0 +unit 0 +design 0 +microprogram 0 +helpif 0 +problem 0 +relat 0 +best 0 +appropri 0 +point 0 +staff 0 +also 0 +class 0 +mate 0 +otherwis 0 +cornel 0 +talk 0 +consult 0 +materialsal 0 +separ 0 +server 0 +want 0 +bookmark 0 +us 0 +document 0 +file 0 +check 0 +account 0 +request 0 +post 0 +saturdai 0 +encount 0 +difficulti 0 +read 0 +follow 0 +hidden 0 +instruct 0 +sign 0 +start 0 +mondai 0 +listlist 0 +made 0 +maintain 0 +thorsten 0 +eicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..73a6a4f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,69 @@ +file 1 +page 0 +inform 0 +postscript 0 +enscript 0 +pfile 0 +home 0 +offic 0 +hour 0 +recit 0 +stuff 0 +convert 0 +text 0 +print 0 +look 0 +admin 0 +handout 0 +incl 0 +lectur 0 +note 0 +assign 0 +ethic 0 +profession 0 +social 0 +respons 0 +mayb 0 +electron 0 +submiss 0 +procedur 0 +group 0 +perform 0 +evalu 0 +resourc 0 +quot 0 +collect 0 +joke 0 +start 0 +submit 0 +sumedh 0 +break 0 +new 0 +mondai 0 +held 0 +csuglab 0 +floor 0 +upson 0 +misc 0 +peopl 0 +ask 0 +simpl 0 +unix 0 +program 0 +suggest 0 +wai 0 +first 0 +good 0 +sourc 0 +code 0 +second 0 +give 0 +nice 0 +header 0 +gener 0 +leav 0 +send 0 +printer 0 +come 0 +last 0 +modif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..597b93bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,42 @@ +solut 1 +assign 0 +upson 0 +hall 0 +phone 0 +mail 0 +cornel 0 +offic 0 +hour 0 +oper 0 +prelim 0 +wednesdai 0 +thursdai 0 +budiu 0 +home 0 +pagec 0 +system 0 +program 0 +systemsc 0 +practicum 0 +systemkenneth 0 +birmanc 0 +new 0 +groupcours 0 +syllabuslectur 0 +note 0 +unix 0 +filesystem 0 +structur 0 +link 0 +static 0 +dynam 0 +taslili 0 +lili 0 +fridai 0 +cheng 0 +huang 0 +ychuang 0 +tuesdai 0 +mihai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..916c5686 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,135 @@ +home 1 +clair 1 +program 1 +solut 1 +homework 1 +kevin 1 +assign 1 +page 1 +cours 0 +avail 0 +code 0 +scott 0 +file 0 +depart 0 +inform 0 +materi 0 +pleas 0 +mondai 0 +class 0 +oper 0 +still 0 +us 0 +find 0 +bind 0 +problem 0 +offer 0 +time 0 +fact 0 +dodg 0 +postscript 0 +pagecsfound 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +cornel 0 +univers 0 +fall 0 +welcom 0 +academ 0 +integr 0 +read 0 +announc 0 +date 0 +move 0 +atth 0 +begin 0 +notethat 0 +rubix 0 +need 0 +load 0 +thefunct 0 +rearrang 0 +slightli 0 +also 0 +variabl 0 +previous 0 +defin 0 +constant 0 +chang 0 +paramet 0 +appeas 0 +compil 0 +var 0 +bracket 0 +youus 0 +machinesshould 0 +netscap 0 +sbin 0 +found 0 +ksaunder 0 +sbinfor 0 +account 0 +seriou 0 +gremlin 0 +codefor 0 +abl 0 +look 0 +andget 0 +start 0 +planner 0 +success 0 +uponcomplet 0 +appli 0 +schema 0 +meanwhil 0 +hunt 0 +thoseus 0 +machin 0 +shouldn 0 +special 0 +limit 0 +concern 0 +get 0 +zeroon 0 +third 0 +worri 0 +longer 0 +complet 0 +asspecifi 0 +posit 0 +grade 0 +result 0 +guarante 0 +novemb 0 +oneassign 0 +group 0 +clarif 0 +unless 0 +otherwis 0 +specifi 0 +assum 0 +either 0 +system 0 +add 0 +vanto 0 +queri 0 +alreadi 0 +exist 0 +document 0 +modifi 0 +includ 0 +thisclarif 0 +newhomework 0 +coursemateri 0 +midterm 0 +portion 0 +statu 0 +report 0 +tuesdai 0 +thec 0 +section 0 +remind 0 +right 0 +pagesc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..64c7799d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,34 @@ +homework 1 +cours 0 +note 0 +fall 0 +offic 0 +prelim 0 +home 0 +pagec 0 +automata 0 +comput 0 +theorywelcom 0 +click 0 +inform 0 +lectur 0 +exam 0 +studi 0 +guideannounc 0 +avail 0 +hardcopi 0 +set 0 +hour 0 +incorrect 0 +date 0 +revis 0 +erratum 0 +chang 0 +room 0 +nikolai 0 +hourscod 0 +academ 0 +integr 0 +pleas 0 +read 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..d5354014 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,403 @@ +project 1 +databas 0 +system 0 +exam 0 +refer 0 +paper 0 +cours 0 +develop 0 +survei 0 +prelim 0 +list 0 +materi 0 +dbm 0 +work 0 +us 0 +code 0 +minibas 0 +final 0 +softwar 0 +also 0 +amount 0 +research 0 +term 0 +detail 0 +compon 0 +predat 0 +upson 0 +propos 0 +note 0 +textbook 0 +student 0 +background 0 +program 0 +data 0 +concept 0 +introductori 0 +topic 0 +involv 0 +write 0 +part 0 +process 0 +level 0 +follow 0 +transact 0 +offic 0 +advanc 0 +complet 0 +lectur 0 +mail 0 +inform 0 +grade 0 +possibl 0 +mani 0 +becom 0 +fundament 0 +class 0 +click 0 +herefor 0 +tent 0 +specif 0 +assign 0 +import 0 +base 0 +second 0 +provid 0 +queri 0 +need 0 +area 0 +text 0 +edit 0 +hour 0 +time 0 +tuesdai 0 +thursdai 0 +place 0 +result 0 +stat 0 +outsidefirewal 0 +descript 0 +fall 0 +design 0 +piec 0 +engin 0 +access 0 +form 0 +want 0 +larg 0 +number 0 +cover 0 +consequ 0 +discuss 0 +basic 0 +taught 0 +midterm 0 +test 0 +librari 0 +take 0 +turn 0 +depend 0 +person 0 +hopefulli 0 +choos 0 +whether 0 +goal 0 +comfort 0 +modular 0 +prototyp 0 +associ 0 +manag 0 +avail 0 +interfac 0 +could 0 +function 0 +neither 0 +lower 0 +convent 0 +page 0 +stonebrak 0 +morgan 0 +kaufmann 0 +phone 0 +weitsang 0 +evalu 0 +content 0 +schedul 0 +samplequest 0 +answer 0 +info 0 +predatordbm 0 +currentproject 0 +handout 0 +archiv 0 +prerequisit 0 +professor 0 +teach 0 +assist 0 +coursedescript 0 +offer 0 +first 0 +intendedto 0 +give 0 +solid 0 +databasemanag 0 +world 0 +slargest 0 +certainli 0 +among 0 +valuabl 0 +piecesof 0 +sens 0 +giant 0 +applic 0 +surprisingli 0 +principl 0 +behind 0 +industri 0 +grow 0 +thrive 0 +demand 0 +knowledgeabledatabas 0 +much 0 +greater 0 +suppli 0 +researchcommun 0 +activ 0 +alwai 0 +problem 0 +addressedbecaus 0 +explos 0 +peopl 0 +wish 0 +thiscours 0 +essenti 0 +anyon 0 +asystem 0 +compani 0 +informedus 0 +domain 0 +manipul 0 +find 0 +teller 0 +machin 0 +realli 0 +although 0 +intend 0 +newcours 0 +differ 0 +variou 0 +begin 0 +quickreview 0 +workload 0 +examin 0 +abreadth 0 +advancedtop 0 +thepurpos 0 +awar 0 +coursei 0 +fraction 0 +three 0 +weeksaft 0 +requireread 0 +journal 0 +confer 0 +proceed 0 +engineeringlibrari 0 +suggest 0 +initi 0 +pursueaddit 0 +forinform 0 +look 0 +written 0 +homework 0 +enrol 0 +mean 0 +twice 0 +semest 0 +addit 0 +around 0 +examtim 0 +developmentproject 0 +involvea 0 +signific 0 +wishto 0 +alon 0 +team 0 +howev 0 +willinvolv 0 +proportion 0 +fold 0 +hand 0 +experi 0 +build 0 +exist 0 +andmodifi 0 +manner 0 +thefirst 0 +huge 0 +andrar 0 +luxuri 0 +start 0 +scratch 0 +forc 0 +youto 0 +understand 0 +interact 0 +thediffer 0 +inevit 0 +bug 0 +appear 0 +simpl 0 +singl 0 +user 0 +fromth 0 +parser 0 +disk 0 +abl 0 +compil 0 +comput 0 +environ 0 +varioussystem 0 +actual 0 +come 0 +like 0 +buffermanag 0 +enginethat 0 +possibleproject 0 +lead 0 +builton 0 +focu 0 +high 0 +likecomplex 0 +type 0 +familiar 0 +recommend 0 +becauseth 0 +minim 0 +think 0 +interestedin 0 +ifyou 0 +categori 0 +decid 0 +aproject 0 +storag 0 +buffer 0 +thehigh 0 +optim 0 +betweenminibas 0 +higher 0 +somegener 0 +ideaon 0 +suitabl 0 +talk 0 +well 0 +advanceof 0 +date 0 +certain 0 +step 0 +submitan 0 +order 0 +produc 0 +review 0 +meet 0 +discussth 0 +progress 0 +made 0 +toward 0 +must 0 +particularsystem 0 +documentwil 0 +close 0 +someth 0 +thati 0 +picki 0 +contribut 0 +geton 0 +submiss 0 +includ 0 +demo 0 +reason 0 +oftest 0 +home 0 +homepag 0 +coursetextbook 0 +primari 0 +beta 0 +book 0 +raghu 0 +ramakrishnan 0 +bookcontain 0 +databasebook 0 +free 0 +instruct 0 +databasesystem 0 +might 0 +thecampu 0 +store 0 +korth 0 +silberschatz 0 +mcgraw 0 +hill 0 +secondedit 0 +standard 0 +lack 0 +tobe 0 +graduat 0 +michael 0 +read 0 +collect 0 +rel 0 +recent 0 +collectedand 0 +introduc 0 +ingr 0 +postgr 0 +andillustra 0 +corearea 0 +elmasri 0 +navath 0 +benjamin 0 +cum 0 +altern 0 +grai 0 +reuter 0 +techniqu 0 +bibl 0 +long 0 +tellsyou 0 +know 0 +wonderfulrefer 0 +clear 0 +confus 0 +aspect 0 +concurr 0 +control 0 +recoveri 0 +semant 0 +resourc 0 +tutori 0 +languag 0 +construct 0 +debuggingwith 0 +make 0 +gradingpolici 0 +percentag 0 +even 0 +finish 0 +anextra 0 +half 0 +likewis 0 +thefin 0 +period 0 +thur 0 +confirm 0 +willfocu 0 +coveredin 0 +earlier 0 +question 0 +professorpraveen 0 +seshadri 0 +praveen 0 +teachingassist 0 +hall 0 +noon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..c5afd176 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,183 @@ +program 1 +languag 1 +semant 0 +cours 0 +comput 0 +notat 0 +student 0 +descript 0 +upson 0 +studi 0 +inform 0 +properti 0 +logic 0 +cornel 0 +lectur 0 +link 0 +like 0 +implement 0 +mechan 0 +describ 0 +tool 0 +prove 0 +specif 0 +well 0 +work 0 +side 0 +assum 0 +experi 0 +knowledg 0 +function 0 +basic 0 +undergradu 0 +instructor 0 +class 0 +offic 0 +hour 0 +refer 0 +environ 0 +document 0 +avail 0 +content 0 +text 0 +prerequisiteshandoutsscrib 0 +noteshomework 0 +assignmentscontact 0 +informationrelev 0 +though 0 +call 0 +advanc 0 +book 0 +better 0 +entitl 0 +goalof 0 +conduct 0 +broad 0 +survei 0 +tech 0 +programminglanguag 0 +java 0 +directli 0 +compress 0 +dispatch 0 +tabl 0 +multipleinherit 0 +rather 0 +goal 0 +principlesof 0 +formal 0 +analyz 0 +concern 0 +subsum 0 +thestudi 0 +henc 0 +lead 0 +deeper 0 +understand 0 +mathemat 0 +proof 0 +theori 0 +exampl 0 +abstractli 0 +specifi 0 +howprogram 0 +oper 0 +asnot 0 +denot 0 +turn 0 +abstract 0 +preciser 0 +allow 0 +techniqu 0 +induct 0 +relat 0 +forform 0 +interest 0 +relev 0 +type 0 +safeti 0 +compil 0 +correct 0 +ideal 0 +come 0 +learn 0 +somethingabout 0 +make 0 +concept 0 +precis 0 +tomanipul 0 +demonstr 0 +us 0 +textbook 0 +carl 0 +gunter 0 +programm 0 +second 0 +edit 0 +larri 0 +paulson 0 +prerequisit 0 +least 0 +pascal 0 +prefer 0 +witha 0 +scheme 0 +haskel 0 +theoret 0 +profici 0 +undergraduatemathemat 0 +scienc 0 +ture 0 +machin 0 +recurs 0 +andlog 0 +predic 0 +calculu 0 +mathematicalmatur 0 +requir 0 +design 0 +math 0 +meng 0 +anmeng 0 +must 0 +talk 0 +find 0 +ifth 0 +suitabl 0 +contact 0 +newsgroup 0 +greg 0 +morrisett 0 +appoint 0 +admin 0 +assist 0 +linda 0 +competillo 0 +lfar 0 +erlingsson 0 +ulfar 0 +pmrelev 0 +mark 0 +leon 0 +resourc 0 +research 0 +emac 0 +mode 0 +comint 0 +need 0 +project 0 +line 0 +standard 0 +postscript 0 +user 0 +guid 0 +base 0 +system 0 +librari 0 +indexdocument 0 +toolsa 0 +gentl 0 +introduct 0 +andrew 0 +cum 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..3a913aa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,29 @@ +cours 1 +page 1 +cornel 0 +project 0 +newsgroup 0 +home 0 +pagecsmultimedia 0 +systemscomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +final 0 +present 0 +schedul 0 +staff 0 +info 0 +materi 0 +student 0 +us 0 +link 0 +anounc 0 +access 0 +rivl 0 +bugcom 0 +question 0 +send 0 +mail 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..114da926 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,26 @@ +cours 1 +page 1 +cornel 0 +newsgroup 0 +home 0 +pagecsmultimedia 0 +systemscomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +staff 0 +info 0 +materi 0 +student 0 +project 0 +us 0 +link 0 +anounc 0 +access 0 +rivl 0 +bugcom 0 +question 0 +send 0 +mail 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..fae8ff20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,44 @@ +homework 1 +last 0 +modifi 0 +solut 0 +addendum 0 +design 0 +analysi 0 +algorithm 0 +evan 0 +time 0 +text 0 +kozen 0 +announc 0 +note 0 +homepag 0 +instructor 0 +ronitt 0 +rubinfeld 0 +moran 0 +locat 0 +upson 0 +springer 0 +verlag 0 +handout 0 +cours 0 +syllabu 0 +copi 0 +exam 0 +thursdai 0 +inupson 0 +talk 0 +tome 0 +reschedul 0 +cannot 0 +make 0 +refer 0 +cheat 0 +sheet 0 +class 0 +rajeev 0 +motwani 0 +lectur 0 +approxim 0 +paper 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..022d9f41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,146 @@ +parallel 1 +system 1 +machin 0 +cours 0 +program 0 +languag 0 +architectur 0 +support 0 +manufactur 0 +offer 0 +debat 0 +consider 0 +perform 0 +price 0 +processor 0 +workstat 0 +level 0 +larg 0 +number 0 +issu 0 +topic 0 +first 0 +algorithm 0 +focu 0 +oper 0 +requir 0 +memori 0 +part 0 +studi 0 +model 0 +home 0 +pagefronti 0 +thorsten 0 +eickenfal 0 +locat 0 +upson 0 +pmoffic 0 +hour 0 +pmcours 0 +descriptionparallel 0 +stai 0 +underscor 0 +fact 0 +multiprocessor 0 +product 0 +line 0 +howev 0 +futur 0 +look 0 +like 0 +heat 0 +past 0 +month 0 +feder 0 +spend 0 +cut 0 +erad 0 +massiv 0 +competitor 0 +glorifi 0 +farm 0 +smile 0 +cannot 0 +eas 0 +busi 0 +suffer 0 +much 0 +competit 0 +leverag 0 +latest 0 +microprocessor 0 +develop 0 +quickli 0 +core 0 +technolog 0 +integr 0 +shelf 0 +cost 0 +effect 0 +easili 0 +high 0 +host 0 +vari 0 +applic 0 +workload 0 +although 0 +week 0 +matur 0 +last 0 +year 0 +point 0 +almost 0 +usabl 0 +adequ 0 +allow 0 +gener 0 +purpos 0 +aspect 0 +featur 0 +taken 0 +grant 0 +sequenti 0 +comput 0 +portabl 0 +power 0 +debugg 0 +multi 0 +user 0 +access 0 +virtual 0 +fast 0 +examin 0 +complet 0 +split 0 +network 0 +us 0 +vertic 0 +approach 0 +interact 0 +associ 0 +execut 0 +hardwar 0 +implement 0 +focuss 0 +layer 0 +second 0 +specif 0 +slice 0 +horizont 0 +across 0 +select 0 +analysi 0 +design 0 +altern 0 +depth 0 +dash 0 +provid 0 +share 0 +contrast 0 +materialscours 0 +formatlectur 0 +note 0 +problem 0 +set 0 +term 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..7a045306 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,116 @@ +cours 1 +us 1 +lectur 0 +annot 0 +tutori 0 +page 0 +organ 0 +thorsten 0 +languag 0 +materi 0 +allow 0 +also 0 +note 0 +conot 0 +document 0 +start 0 +introduct 0 +marshal 0 +program 0 +exampl 0 +onlin 0 +question 0 +home 0 +pageintroduct 0 +digit 0 +system 0 +comput 0 +eickenfal 0 +kimbal 0 +btopic 0 +includ 0 +representationof 0 +inform 0 +machin 0 +assembl 0 +processor 0 +interrupt 0 +memori 0 +hierarchi 0 +combinatori 0 +sequentialcircuit 0 +data 0 +path 0 +control 0 +unit 0 +design 0 +andmicroprogram 0 +helpif 0 +problem 0 +relat 0 +homework 0 +aproject 0 +best 0 +help 0 +theappropri 0 +point 0 +gethelp 0 +staff 0 +class 0 +mate 0 +otherwis 0 +send 0 +email 0 +cornel 0 +talk 0 +toon 0 +consult 0 +informationcoursemateri 0 +announcementsannounc 0 +video 0 +assign 0 +part 0 +small 0 +get 0 +avail 0 +case 0 +never 0 +onlinean 0 +brain 0 +great 0 +forpeopl 0 +know 0 +procedur 0 +like 0 +pascal 0 +fortran 0 +david 0 +cclass 0 +cardiff 0 +univers 0 +lot 0 +learnc 0 +todai 0 +guid 0 +book 0 +programsand 0 +refer 0 +well 0 +theyahoo 0 +wish 0 +surf 0 +search 0 +ofmor 0 +place 0 +frequent 0 +ask 0 +inansw 0 +common 0 +come 0 +learn 0 +contain 0 +link 0 +sever 0 +newsgroup 0 +maintain 0 +voneicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..bdd61596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,36 @@ +home 1 +cours 1 +depart 0 +clair 0 +final 0 +mail 0 +upson 0 +pagecsfound 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +cornel 0 +univers 0 +fall 0 +welcom 0 +inform 0 +materi 0 +code 0 +academ 0 +integr 0 +pleas 0 +read 0 +announc 0 +grade 0 +avail 0 +sometim 0 +saturdai 0 +send 0 +yourgrad 0 +request 0 +exam 0 +altern 0 +date 0 +pagesc 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..f7633872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,43 @@ +cornel 1 +cours 0 +offic 0 +hour 0 +upson 0 +samuel 0 +weber 0 +buch 0 +overview 0 +note 0 +home 0 +pagec 0 +softwar 0 +engin 0 +technolog 0 +techniquescomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +staff 0 +professor 0 +vineet 0 +thursdai 0 +yaron 0 +minski 0 +none 0 +materi 0 +handout 0 +lectur 0 +recit 0 +line 0 +resourc 0 +assign 0 +grade 0 +remark 0 +stuff 0 +frequent 0 +ask 0 +question 0 +borland 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..4b189cfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,31 @@ +assign 1 +project 0 +home 0 +page 0 +introduct 0 +brian 0 +smith 0 +tour 0 +guid 0 +cours 0 +inform 0 +homework 0 +spec 0 +lectur 0 +tabl 0 +content 0 +postscript 0 +slide 0 +comput 0 +system 0 +organ 0 +program 0 +procedur 0 +recurs 0 +stack 0 +assembl 0 +linker 0 +loader 0 +interrupt 0 +logic 0 +design 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..8aed0a1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,43 @@ +program 1 +prelim 0 +april 0 +home 0 +page 0 +spring 0 +cours 0 +inform 0 +exam 0 +tue 0 +messag 0 +welcom 0 +rememb 0 +check 0 +frequentlyfor 0 +import 0 +regard 0 +tuesdai 0 +review 0 +session 0 +held 0 +onsundai 0 +baker 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +get 0 +materi 0 +theworld 0 +wide 0 +codewarrior 0 +personalmac 0 +lectur 0 +februari 0 +thur 0 +march 0 +final 0 +last 0 +updat 0 +pierc 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..dcf16938 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,260 @@ +lectur 1 +class 0 +upson 0 +program 0 +gofer 0 +assign 0 +handout 0 +link 0 +cornel 0 +correct 0 +list 0 +offic 0 +hour 0 +algorithm 0 +loop 0 +spring 0 +email 0 +jeff 0 +cours 0 +server 0 +structur 0 +none 0 +avail 0 +comput 0 +foster 0 +final 0 +consult 0 +introduct 0 +recurs 0 +data 0 +analysi 0 +david 0 +file 0 +text 0 +preliminari 0 +macgof 0 +page 0 +scienc 0 +depart 0 +jfoster 0 +prelim 0 +thursdai 0 +topic 0 +note 0 +hollist 0 +format 0 +binhqx 0 +date 0 +basic 0 +pointer 0 +arrai 0 +dynam 0 +storag 0 +alloc 0 +dynamicdata 0 +fine 0 +point 0 +deriv 0 +notat 0 +tripl 0 +condit 0 +prove 0 +function 0 +type 0 +curri 0 +filter 0 +iter 0 +applic 0 +architectur 0 +framework 0 +binari 0 +tree 0 +industri 0 +strength 0 +java 0 +home 0 +view 0 +cover 0 +tuesdai 0 +exam 0 +mondai 0 +olin 0 +last 0 +jose 0 +thank 0 +descript 0 +staff 0 +code 0 +sampl 0 +credit 0 +languag 0 +kwan 0 +walker 0 +section 0 +ahal 0 +walkerwednesdai 0 +kaykylesteveericvasantha 0 +danerickaychrisdan 0 +microsoft 0 +word 0 +rich 0 +plain 0 +stuffit 0 +expand 0 +system 0 +find 0 +codewarrior 0 +site 0 +univers 0 +question 0 +problem 0 +troubl 0 +tabl 0 +us 0 +earlyvers 0 +netscap 0 +contest 0 +first 0 +march 0 +announcetim 0 +place 0 +soon 0 +theprelim 0 +second 0 +april 0 +room 0 +wereannounc 0 +thesecond 0 +noon 0 +lastnam 0 +name 0 +covereveryth 0 +addit 0 +topicsconv 0 +daywhenwherewhomondai 0 +davetuesdai 0 +jeffwednesdai 0 +davethursdai 0 +halfridai 0 +halsaturdai 0 +breview 0 +session 0 +chri 0 +chrisand 0 +take 0 +time 0 +studi 0 +enhanc 0 +also 0 +engrd 0 +fall 0 +summer 0 +grant 0 +bothcom 0 +prerequisit 0 +equival 0 +programmingexperi 0 +intermedi 0 +high 0 +level 0 +tocomput 0 +includ 0 +develop 0 +proof 0 +ofprogram 0 +abstract 0 +datatyp 0 +object 0 +orient 0 +ofalgorithm 0 +princip 0 +instructor 0 +perkin 0 +appoint 0 +teach 0 +assist 0 +held 0 +alan 0 +sectionsdaytimeroominstructortuesdai 0 +perkinstuesdai 0 +perkinswednesdai 0 +walkerthursdai 0 +fosterfridai 0 +regular 0 +schedul 0 +effect 0 +ofclass 0 +sundai 0 +fridai 0 +consultingsundaymondaytuesdaywednesdaythursdayfridai 0 +steveerickylechrisjpkyl 0 +steveerickylechrisjpvasantha 0 +josejosekayjosejpvasantha 0 +josejosekayjosejp 0 +three 0 +macbinari 0 +contain 0 +parseabl 0 +other 0 +next 0 +waspost 0 +given 0 +macintosh 0 +process 0 +http 0 +address 0 +foraladdin 0 +window 0 +version 0 +armandonunez 0 +postscript 0 +print 0 +almost 0 +anylas 0 +printer 0 +want 0 +need 0 +applicationlik 0 +ghostview 0 +intro 0 +cell 0 +complex 0 +set 0 +charact 0 +simpl 0 +dictionari 0 +flavor 0 +inth 0 +public 0 +lab 0 +unix 0 +ishaskel 0 +systemsz 0 +yale 0 +haskel 0 +pleas 0 +help 0 +piec 0 +ofgof 0 +think 0 +gener 0 +interest 0 +know 0 +make 0 +itavail 0 +sourc 0 +manual 0 +onth 0 +project 0 +line 0 +csdepart 0 +enhance_assign 0 +cuinfo 0 +metrowerk 0 +homepag 0 +aladdin 0 +maker 0 +comment 0 +suggest 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..eaf60c78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,32 @@ +cours 1 +announc 0 +home 0 +pagec 0 +structur 0 +interpretationof 0 +comput 0 +programscomput 0 +scienc 0 +departmentcornel 0 +universityspr 0 +staff 0 +info 0 +materi 0 +emac 0 +macmarlai 0 +demo 0 +section 0 +room 0 +chang 0 +prelim 0 +time 0 +place 0 +make 0 +grader 0 +happi 0 +gener 0 +exam 0 +extens 0 +date 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..4245d5f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,92 @@ +languag 1 +offic 0 +implement 0 +descript 0 +upson 0 +email 0 +advanc 0 +link 0 +modern 0 +program 0 +featur 0 +object 0 +class 0 +handout 0 +cornel 0 +phone 0 +hour 0 +evan 0 +inform 0 +refer 0 +environ 0 +document 0 +avail 0 +content 0 +descriptionhandoutsadministriviaweb 0 +java 0 +haskel 0 +dylan 0 +provid 0 +high 0 +level 0 +closur 0 +polymorph 0 +abstract 0 +data 0 +type 0 +garbag 0 +collect 0 +except 0 +continu 0 +thread 0 +synchron 0 +construct 0 +survei 0 +techniqu 0 +effici 0 +implementationof 0 +focu 0 +ofmodern 0 +function 0 +make 0 +connectionsto 0 +kind 0 +notabl 0 +orient 0 +separ 0 +page 0 +administrivia 0 +instructor 0 +greg 0 +morrisett 0 +admin 0 +assist 0 +linda 0 +competillo 0 +send 0 +appoint 0 +moran 0 +tuesdai 0 +thursdai 0 +pmweb 0 +mark 0 +leon 0 +resourc 0 +research 0 +project 0 +line 0 +standard 0 +postscript 0 +user 0 +guid 0 +base 0 +system 0 +librari 0 +tool 0 +indexdocument 0 +toolsa 0 +gentl 0 +introduct 0 +andrew 0 +cum 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..dc5586ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,24 @@ +postscript 1 +distribut 0 +homework 0 +examin 0 +annot 0 +practic 0 +systemspract 0 +system 0 +registr 0 +necessari 0 +student 0 +takingc 0 +gener 0 +informationcours 0 +overview 0 +logist 0 +read 0 +homeworkshomework 0 +amexaminationsmidterm 0 +final 0 +bibliographiesselect 0 +bibliographi 0 +prepar 0 +class 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..b019bcd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,166 @@ +poster 1 +split 0 +upson 0 +pleas 0 +instruct 0 +board 0 +well 0 +final 0 +project 0 +homework 0 +thorsten 0 +session 0 +present 0 +arriv 0 +minut 0 +problem 0 +judg 0 +report 0 +page 0 +import 0 +find 0 +start 0 +futur 0 +work 0 +check 0 +home 0 +pagehigh 0 +perform 0 +comput 0 +system 0 +eickenspr 0 +sessionthu 0 +tbdpleas 0 +sign 0 +outsid 0 +willdetermin 0 +order 0 +begin 0 +postersess 0 +late 0 +pick 0 +cindywilliam 0 +limit 0 +hold 0 +ithorizont 0 +hang 0 +corridor 0 +presentyour 0 +plu 0 +question 0 +give 0 +everyon 0 +asens 0 +attack 0 +solut 0 +contempl 0 +andth 0 +result 0 +gotten 0 +learn 0 +presentationswil 0 +messag 0 +across 0 +everi 0 +memberof 0 +group 0 +particip 0 +nativespeak 0 +difficulti 0 +taken 0 +consider 0 +info 0 +cours 0 +current 0 +mondai 0 +noon 0 +absolut 0 +must 0 +subdirectori 0 +willb 0 +contribut 0 +thelongest 0 +last 0 +remain 0 +server 0 +year 0 +tocom 0 +mani 0 +peopl 0 +search 0 +engin 0 +finalreport 0 +usual 0 +introduct 0 +aretri 0 +solv 0 +follow 0 +thorough 0 +discuss 0 +trade 0 +off 0 +part 0 +need 0 +explain 0 +chose 0 +thesolut 0 +option 0 +consid 0 +youreject 0 +webread 0 +convic 0 +bestsolut 0 +showcas 0 +us 0 +ampl 0 +experiment 0 +data 0 +goodexplan 0 +exactli 0 +measur 0 +know 0 +whatyou 0 +think 0 +shown 0 +left 0 +open 0 +projectsproject 0 +reportsproject 0 +proposalsiniti 0 +ideascours 0 +materialshomework 0 +cuc 0 +pagebefor 0 +machin 0 +might 0 +also 0 +sampl 0 +program 0 +introc 0 +casec 0 +technologyc 0 +cachesc 0 +netsc 0 +spc 0 +cyou 0 +inform 0 +paper 0 +parallel 0 +programmingin 0 +emdc 0 +sortingc 0 +spamc 0 +msgpassc 0 +mpic 0 +cachecohc 0 +locksc 0 +threadsc 0 +atmc 0 +netc 0 +scoreboardc 0 +tomasuloc 0 +predc 0 +superscalarc 0 +busesc 0 +pentiummaintain 0 +eicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..1da591e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,74 @@ +april 1 +februari 0 +march 0 +vision 0 +motion 0 +cont 0 +lectur 0 +transform 0 +machin 0 +cours 0 +problem 0 +note 0 +januari 0 +regular 0 +comput 0 +stereo 0 +correl 0 +parametr 0 +guest 0 +track 0 +staff 0 +instructor 0 +ramin 0 +zabihteach 0 +assist 0 +justin 0 +millerclass 0 +time 0 +place 0 +phillip 0 +project 0 +suggestionsproblem 0 +set 0 +class 0 +scribe 0 +week 0 +simul 0 +anneal 0 +calculu 0 +variat 0 +maximum 0 +likelihood 0 +estim 0 +markov 0 +random 0 +field 0 +snake 0 +introduct 0 +mestim 0 +method 0 +censu 0 +geometri 0 +geometr 0 +segment 0 +edg 0 +detect 0 +continu 0 +model 0 +base 0 +hausdorff 0 +distanc 0 +eigenhausdorff 0 +face 0 +recognitionsect 0 +optic 0 +flow 0 +constraint 0 +equationoth 0 +sourc 0 +home 0 +page 0 +histori 0 +object 0 +recognit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..453cbde5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,73 @@ +project 1 +home 0 +cours 0 +inform 0 +directori 0 +part 0 +speech 0 +corpu 0 +depart 0 +cornel 0 +code 0 +brill 0 +annot 0 +text 0 +wordnet 0 +contain 0 +pagecsintroduct 0 +natur 0 +languag 0 +understandingcomput 0 +scienc 0 +univers 0 +spring 0 +welcom 0 +materi 0 +academ 0 +integr 0 +pleas 0 +read 0 +announcementsher 0 +list 0 +resourc 0 +avail 0 +taggerbrown 0 +brown 0 +tag 0 +small 0 +withpart 0 +execut 0 +sure 0 +environ 0 +variabl 0 +wnsearchdir 0 +archiv 0 +dict 0 +final 0 +site 0 +descript 0 +ofth 0 +content 0 +penn 0 +treebank 0 +iicollect 0 +canus 0 +like 0 +talk 0 +francisabout 0 +access 0 +us 0 +databas 0 +recent 0 +paper 0 +computationallinguist 0 +repositori 0 +pointer 0 +variou 0 +system 0 +compon 0 +present 0 +schedulewhat 0 +turn 0 +pagesc 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..893af668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,36 @@ +class 1 +cours 0 +lab 0 +upson 0 +assign 0 +tuesdai 0 +wednesdai 0 +section 0 +prelim 0 +first 0 +home 0 +page 0 +info 0 +syllabu 0 +link 0 +announcementsroom 0 +updat 0 +unforseen 0 +circumst 0 +still 0 +unableto 0 +therefor 0 +follow 0 +room 0 +philip 0 +thursdai 0 +meet 0 +maclab 0 +usual 0 +close 0 +book 0 +exam 0 +cover 0 +materialcov 0 +need 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..b0b44c92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,38 @@ +lectur 1 +maxflow 0 +algorithm 0 +heap 0 +union 0 +find 0 +fall 0 +matroid 0 +binomi 0 +tree 0 +preflow 0 +push 0 +professor 0 +monika 0 +rauch 0 +henzingeremail 0 +cornel 0 +cours 0 +informationhomework 0 +solut 0 +graph 0 +explor 0 +greedi 0 +dijkstra 0 +bellman 0 +ford 0 +matrix 0 +closur 0 +fibonacci 0 +treap 0 +randomizedsearch 0 +mincut 0 +theorem 0 +edmond 0 +karp 0 +dinitz 0 +dynam 0 +implement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..33c9f509 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,212 @@ +juli 1 +assign 0 +class 0 +comput 0 +matlab 0 +upson 0 +grade 0 +final 0 +introduct 0 +cours 0 +june 0 +scientif 0 +sourc 0 +code 0 +problem 0 +linear 0 +offic 0 +deadlin 0 +interpol 0 +hall 0 +time 0 +work 0 +midterm 0 +exam 0 +system 0 +scmv 0 +file 0 +syllabu 0 +handout 0 +numer 0 +includ 0 +quadratur 0 +equat 0 +solv 0 +least 0 +squar 0 +vector 0 +niko 0 +cornel 0 +hour 0 +appoint 0 +ozan 0 +math 0 +student 0 +lab 0 +siblei 0 +martha 0 +hand 0 +done 0 +credit 0 +alon 0 +name 0 +chang 0 +total 0 +score 0 +newton 0 +cubic 0 +review 0 +classroom 0 +minim 0 +function 0 +euler 0 +method 0 +exampl 0 +unix 0 +zcat 0 +computationsumm 0 +inform 0 +setsan 0 +elementari 0 +analysi 0 +scientificcomput 0 +topic 0 +andnonlinear 0 +fit 0 +ordinarydifferenti 0 +environ 0 +us 0 +effici 0 +reliabl 0 +stabil 0 +stress 0 +informationstaff 0 +pitsiani 0 +instructor 0 +hafizogullari 0 +teach 0 +assist 0 +lecturesclass 0 +meet 0 +everi 0 +administrationlauri 0 +buck 0 +question 0 +concern 0 +record 0 +account 0 +addressedto 0 +administr 0 +prerequisitesc 0 +corequisit 0 +materialstext 0 +matrix 0 +approachus 0 +charl 0 +loan 0 +distribut 0 +softwar 0 +purchas 0 +eitherth 0 +macintosh 0 +version 0 +though 0 +labsthi 0 +design 0 +three 0 +renssela 0 +setsther 0 +lectur 0 +orfrom 0 +page 0 +extra 0 +avail 0 +rack 0 +outsid 0 +collect 0 +computingproblem 0 +return 0 +behandl 0 +begin 0 +duedat 0 +late 0 +accept 0 +worst 0 +gradefrom 0 +ignor 0 +partner 0 +printyour 0 +copi 0 +pair 0 +firstpag 0 +addit 0 +partnernam 0 +examsther 0 +dai 0 +list 0 +gradingyour 0 +follow 0 +best 0 +beassign 0 +accord 0 +rel 0 +rank 0 +base 0 +onyour 0 +calendar 0 +program 0 +error 0 +float 0 +point 0 +number 0 +registr 0 +polynomi 0 +vandermond 0 +piecewis 0 +hermit 0 +spline 0 +integr 0 +cote 0 +composit 0 +rule 0 +adapt 0 +drop 0 +matric 0 +oper 0 +given 0 +choleski 0 +find 0 +root 0 +variabl 0 +multivari 0 +initi 0 +valu 0 +backward 0 +rung 0 +kutta 0 +adam 0 +computingat 0 +rennselaerhal 0 +locat 0 +folder 0 +applic 0 +chapter 0 +plan 0 +stand 0 +otherthan 0 +on 0 +uncompress 0 +untar 0 +command 0 +highli 0 +recommend 0 +brows 0 +session 0 +need 0 +postscript 0 +viewer 0 +instal 0 +order 0 +randperm 0 +length 0 +set 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..51180326 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,182 @@ +system 1 +class 0 +cours 0 +materi 0 +oper 0 +quizz 0 +program 0 +also 0 +final 0 +nawaaz 0 +subject 0 +memori 0 +file 0 +cover 0 +issu 0 +thursdai 0 +summer 0 +home 0 +page 0 +instructor 0 +indupraka 0 +kodukula 0 +cornel 0 +ahm 0 +motd 0 +descript 0 +schedul 0 +polici 0 +statement 0 +collabor 0 +offic 0 +hour 0 +send 0 +comment 0 +topic 0 +deadlock 0 +method 0 +comput 0 +discuss 0 +requir 0 +roughli 0 +follow 0 +particular 0 +start 0 +lldiscuss 0 +next 0 +virtual 0 +look 0 +book 0 +week 0 +first 0 +worth 0 +group 0 +prereq 0 +permiss 0 +praka 0 +teach 0 +assist 0 +archiv 0 +prerequsit 0 +outlin 0 +textbook 0 +grade 0 +anintroduct 0 +logic 0 +design 0 +emphasison 0 +multiprogram 0 +includ 0 +processsynchron 0 +manag 0 +input 0 +output 0 +inform 0 +share 0 +protect 0 +secur 0 +theimpact 0 +network 0 +distribut 0 +environ 0 +operatingsystem 0 +fast 0 +pace 0 +requiringconst 0 +attent 0 +prerequsitescomplet 0 +familiar 0 +assum 0 +inparticular 0 +knowledg 0 +architectur 0 +assembl 0 +programminglanguag 0 +structur 0 +theintroductori 0 +purpos 0 +thatwil 0 +remind 0 +audienc 0 +outlineth 0 +organ 0 +depend 0 +feedback 0 +chang 0 +theorder 0 +content 0 +section 0 +overview 0 +concurr 0 +synchron 0 +ensur 0 +mutualexclus 0 +detect 0 +prevent 0 +algorithm 0 +multiprocessor 0 +well 0 +memorymanag 0 +variou 0 +usedto 0 +implement 0 +segment 0 +evolut 0 +thetradit 0 +micro 0 +kernel 0 +timepermit 0 +lectur 0 +advanc 0 +multithread 0 +serverless 0 +textbooksth 0 +princip 0 +text 0 +conceptsbook 0 +abraham 0 +silberschatz 0 +peter 0 +galvin 0 +distributeclass 0 +note 0 +complet 0 +noteswil 0 +avail 0 +world 0 +wide 0 +pageat 0 +meet 0 +mondaythru 0 +second 0 +addit 0 +weekli 0 +assign 0 +thesewil 0 +hand 0 +thursdayat 0 +gradingeach 0 +homework 0 +carri 0 +weightag 0 +combinedweightag 0 +twomidterm 0 +surpris 0 +todetermin 0 +understand 0 +collaborationat 0 +peopl 0 +form 0 +eachhomework 0 +need 0 +submit 0 +copi 0 +thehomework 0 +close 0 +closednot 0 +mondai 0 +tuesdai 0 +wednesdai 0 +upson 0 +maintain 0 +induprakaskodukula 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..9e458976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,41 @@ +lectur 1 +septemb 0 +note 0 +octob 0 +novemb 0 +solut 0 +decemb 0 +homework 0 +cornel 0 +offic 0 +hour 0 +appoint 0 +csc 0 +advanc 0 +program 0 +languagesfal 0 +upson 0 +instructor 0 +henzingerupson 0 +class 0 +teach 0 +assist 0 +neal 0 +glewupson 0 +glew 0 +handoutshandout 0 +cours 0 +informationhandout 0 +get 0 +start 0 +mlhandout 0 +meta 0 +lambdahomeworkshomework 0 +grieshomework 0 +notesraw 0 +scribe 0 +noteslectur 0 +introduct 0 +mllectur 0 +midterm 0 +grieslectur 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..0f1e8aba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,29 @@ +fall 1 +note 0 +upson 0 +offic 0 +hour 0 +ravi 0 +advanc 0 +program 0 +languag 0 +faculti 0 +prof 0 +robert 0 +constabl 0 +mondai 0 +teach 0 +assist 0 +kumar 0 +thur 0 +assign 0 +nuprl 0 +classic 0 +comment 0 +question 0 +suggest 0 +page 0 +pleas 0 +mail 0 +pavel 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..91a43807 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,14 @@ +cours 1 +addit 1 +inform 1 +coursesc 0 +fall 0 +spring 0 +maintain 0 +individualfaculti 0 +member 0 +consult 0 +class 0 +page 0 +contactgloria 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..df53f438 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,33 @@ +almstrum 1 +utexa 0 +analysi 0 +program 0 +instructor 0 +vicki 0 +linyuan 0 +fall 0 +yang 0 +syllabu 0 +announc 0 +homework 0 +assign 0 +handout 0 +interest 0 +tutori 0 +new 0 +class 0 +homepag 0 +last 0 +updat 0 +page 0 +prepar 0 +suggest 0 +comment 0 +welcom 0 +click 0 +send 0 +mail 0 +depart 0 +comput 0 +scienc 0 +austin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..22fb8143 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,114 @@ +formal 1 +system 0 +april 0 +reason 0 +tool 0 +present 0 +method 0 +examin 0 +logic 0 +otter 0 +foundat 0 +mani 0 +program 0 +number 0 +order 0 +theori 0 +upon 0 +version 0 +page 0 +final 0 +spring 0 +ofmathemat 0 +taylor 0 +cours 0 +blurb 0 +approach 0 +theobject 0 +specifi 0 +comput 0 +includ 0 +formalizationof 0 +world 0 +interact 0 +creationof 0 +numer 0 +systemsfor 0 +mechan 0 +formalmethod 0 +support 0 +differ 0 +exampl 0 +suchsystem 0 +pair 0 +primit 0 +recurs 0 +arithmet 0 +boyer 0 +moor 0 +prover 0 +first 0 +nelson 0 +higher 0 +imp 0 +equat 0 +mizar 0 +quaif 0 +type 0 +nuprl 0 +lego 0 +coqstud 0 +choos 0 +help 0 +instructor 0 +ortool 0 +grade 0 +base 0 +aboutthes 0 +projecthtml 0 +theqe 0 +manifestoplain 0 +text 0 +qedmanifestobowen 0 +backup 0 +copi 0 +chief 0 +assign 0 +select 0 +bowen 0 +report 0 +class 0 +oral 0 +either 0 +good 0 +freeli 0 +avail 0 +implement 0 +consult 0 +make 0 +choic 0 +test 0 +hope 0 +guest 0 +localform 0 +commun 0 +tent 0 +schedul 0 +rick 0 +tannei 0 +continu 0 +trevor 0 +hick 0 +ruben 0 +gamboa 0 +squar 0 +root 0 +samuel 0 +guyer 0 +circal 0 +process 0 +algebra 0 +sawada 0 +russel 0 +turpin 0 +galoi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..c6caaa62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,151 @@ +project 1 +comput 0 +memori 0 +cours 0 +system 0 +design 0 +pipelin 0 +midterm 0 +advanc 0 +architectur 0 +quantit 0 +analysi 0 +major 0 +parallel 0 +network 0 +hour 0 +hennessi 0 +edit 0 +stedit 0 +research 0 +work 0 +pair 0 +tech 0 +hazard 0 +branch 0 +predictionch 0 +revieww 0 +spring 0 +architecturethi 0 +focus 0 +techniqu 0 +evaluationof 0 +modern 0 +select 0 +appropri 0 +benchmarksto 0 +reveal 0 +compar 0 +perform 0 +altern 0 +choic 0 +insystem 0 +emphasi 0 +compon 0 +subsystem 0 +highperform 0 +instruct 0 +level 0 +memoryhierarchi 0 +input 0 +output 0 +orient 0 +interconnect 0 +studentswil 0 +undertak 0 +oftheir 0 +choos 0 +administr 0 +informationuniqu 0 +number 0 +meet 0 +place 0 +instructor 0 +mikedahlinoffic 0 +appoint 0 +tbdtaoffic 0 +tbdreadingstextbook 0 +patteson 0 +computerarchitectur 0 +approach 0 +second 0 +note 0 +significantli 0 +differ 0 +recommend 0 +attempt 0 +textbook 0 +errata 0 +sheetfor 0 +pattersonin 0 +addit 0 +read 0 +current 0 +paper 0 +variou 0 +aspect 0 +currentcomput 0 +watch 0 +space 0 +pointer 0 +readinglist 0 +grade 0 +class 0 +particip 0 +homework 0 +exam 0 +scheduleweekdatetopicreadingduejan 0 +intro 0 +admin 0 +review 0 +perf 0 +cost 0 +amdahl 0 +trendsch 0 +cach 0 +isa 0 +mlkholidayf 0 +static 0 +proposalfeb 0 +scoreboard 0 +tomasulu 0 +speculationch 0 +dynam 0 +predict 0 +limit 0 +vector 0 +processorsch 0 +dfeb 0 +hierarchych 0 +surveyfeb 0 +dram 0 +banksf 0 +breakm 0 +breakmar 0 +metric 0 +queu 0 +buss 0 +disk 0 +raidch 0 +tertiari 0 +networksf 0 +networksch 0 +checkpointapr 0 +architecturesf 0 +mppsch 0 +mpp 0 +preseantationsm 0 +presentationsfri 0 +last 0 +classesm 0 +written 0 +reportaddit 0 +resourcescours 0 +page 0 +product 0 +confer 0 +bibliographi 0 +reportsyahoo 0 +businessand 0 +economi 0 +compani 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..32a7b4ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,71 @@ +oper 1 +class 1 +system 0 +protocol 0 +address 0 +gener 0 +internet 0 +provid 0 +also 0 +paper 0 +project 0 +list 0 +systemsuniqu 0 +recent 0 +explos 0 +interest 0 +world 0 +wide 0 +resultedin 0 +evolv 0 +us 0 +thetradit 0 +concern 0 +interprocess 0 +commun 0 +resourc 0 +alloc 0 +secur 0 +contextof 0 +goal 0 +understandingof 0 +current 0 +state 0 +addressproblem 0 +must 0 +solv 0 +matur 0 +purpos 0 +operatingsystem 0 +hypothesi 0 +behind 0 +design 0 +mani 0 +theissu 0 +context 0 +addressedin 0 +tradit 0 +area 0 +occasionallyread 0 +relat 0 +bear 0 +understandingcurr 0 +problem 0 +reportspoint 0 +research 0 +refer 0 +inform 0 +syllabu 0 +read 0 +schedul 0 +longer 0 +less 0 +organ 0 +rosterhandout 0 +verif 0 +sslprotocol 0 +proofsketch 0 +guidelin 0 +final 0 +talk 0 +report 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..0f508bb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,217 @@ +handout 1 +program 0 +exam 0 +solut 0 +date 0 +class 0 +homework 0 +avail 0 +content 0 +back 0 +last 0 +object 0 +pick 0 +lectur 0 +part 0 +utexa 0 +exampl 0 +fantasm 0 +discuss 0 +offic 0 +start 0 +test 0 +earli 0 +late 0 +problem 0 +cours 0 +hour 0 +p_global 0 +noon 0 +session 0 +drop 0 +mondai 0 +wednesdai 0 +practic 0 +note 0 +page 0 +final 0 +info 0 +regist 0 +electron 0 +section 0 +check 0 +place 0 +pass 0 +bit 0 +also 0 +import 0 +tue 0 +meet 0 +time 0 +thursdai 0 +us 0 +academ 0 +chang 0 +bonu 0 +slide 0 +turn 0 +electronc 0 +user 0 +manual 0 +disk 0 +paramet 0 +macsbug 0 +electoron 0 +sourc 0 +output 0 +fall 0 +comput 0 +organ 0 +updat 0 +professor 0 +yurkanan 0 +dragon 0 +version 0 +oper 0 +email 0 +yoonsuck 0 +choe 0 +yschoe 0 +edum 0 +pleas 0 +holidai 0 +period 0 +withdraw 0 +typo 0 +remov 0 +rightmost 0 +make 0 +fridai 0 +saturdai 0 +announc 0 +door 0 +procudur 0 +thur 0 +assign 0 +front 0 +system 0 +document 0 +folder 0 +powermac 0 +quadra 0 +mac 0 +instruct 0 +compil 0 +call 0 +model 0 +newsgroup 0 +pascal 0 +constantli 0 +construct 0 +onmon 0 +titl 0 +prerequisit 0 +grade 0 +least 0 +chri 0 +edmondson 0 +new 0 +post 0 +extra 0 +visit 0 +syllabu 0 +locat 0 +attend 0 +cynthia 0 +deepa 0 +ramani 0 +dparam 0 +eduw 0 +zhang 0 +gzhang 0 +eduf 0 +conduct 0 +calendar 0 +labor 0 +refund 0 +rare 0 +extenu 0 +circumst 0 +automat 0 +begin 0 +penalti 0 +reason 0 +univers 0 +registr 0 +fail 0 +deadlin 0 +appli 0 +graduat 0 +thanksgiv 0 +appeal 0 +schedul 0 +glanc 0 +correct 0 +night 0 +boxin 0 +prof 0 +letter 0 +student 0 +overview 0 +descript 0 +topic 0 +singl 0 +ascii 0 +code 0 +chart 0 +home 0 +work 0 +endia 0 +memori 0 +hierarchi 0 +submit 0 +stack 0 +function 0 +requir 0 +comment 0 +real 0 +proc 0 +func 0 +might 0 +risc 0 +architectur 0 +includ 0 +valu 0 +practiv 0 +obsolet 0 +spec 0 +turnin 0 +procedur 0 +gener 0 +interfac 0 +charact 0 +orient 0 +cheat 0 +polici 0 +questionair 0 +must 0 +offici 0 +except 0 +go 0 +held 0 +free 0 +toward 0 +next 0 +thank 0 +brett 0 +jame 0 +data 0 +subroutine_fil 0 +exception_fil 0 +avali 0 +resourc 0 +studi 0 +guid 0 +maintain 0 +austin 0 +utc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..52b25d61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,206 @@ +pascal 1 +porter 0 +final 0 +review 0 +question 0 +link 0 +class 0 +version 0 +program 0 +new 0 +lang 0 +week 0 +offic 0 +exam 0 +page 0 +import 0 +avail 0 +room 0 +post 0 +assign 0 +utexa 0 +discuss 0 +midterm 0 +turbo 0 +us 0 +topic 0 +next 0 +note 0 +dwip 0 +lectur 0 +html 0 +postscript 0 +welch 0 +solut 0 +test 0 +newsgroup 0 +right 0 +semest 0 +inform 0 +announc 0 +take 0 +home 0 +last 0 +howev 0 +relat 0 +good 0 +special 0 +cover 0 +time 0 +need 0 +help 0 +bruce 0 +decemb 0 +click 0 +also 0 +addendum 0 +tutori 0 +session 0 +studi 0 +guid 0 +ansi 0 +articl 0 +homepag 0 +warn 0 +construct 0 +becom 0 +activ 0 +progress 0 +andther 0 +relev 0 +regard 0 +todai 0 +unabl 0 +maintain 0 +forthes 0 +coupl 0 +dai 0 +put 0 +follow 0 +luckfor 0 +held 0 +painter 0 +hall 0 +exact 0 +locat 0 +dependon 0 +availib 0 +door 0 +therewil 0 +someon 0 +moreov 0 +almost 0 +total 0 +coverag 0 +uptoth 0 +feel 0 +free 0 +come 0 +glad 0 +luck 0 +resolutio 0 +complex 0 +theori 0 +nimar 0 +arora 0 +parallel 0 +process 0 +banerje 0 +boolean 0 +circuit 0 +porterquest 0 +sheet 0 +rotat 0 +bit 0 +disregard 0 +somewhat 0 +beyond 0 +scope 0 +slide 0 +present 0 +summar 0 +thecont 0 +whole 0 +reserv 0 +desk 0 +atugl 0 +hope 0 +webpag 0 +soon 0 +experienc 0 +technic 0 +difficulti 0 +caus 0 +length 0 +file 0 +schedul 0 +pleas 0 +check 0 +ad 0 +sostai 0 +tune 0 +download 0 +prolog 0 +comput 0 +scienc 0 +iinstructorbruc 0 +mondai 0 +email 0 +tasoffic 0 +hourslab 0 +section 0 +schedulec 0 +thursdai 0 +uniqu 0 +number 0 +cours 0 +descriptionclass 0 +scheduleclass 0 +includ 0 +articlesclass 0 +newsgroupprogram 0 +assignmentsprogram 0 +pascaltutori 0 +text 0 +format 0 +faqyou 0 +sampl 0 +programm 0 +gener 0 +languag 0 +materi 0 +frequent 0 +ask 0 +zipe 0 +concept 0 +structur 0 +base 0 +newgroup 0 +might 0 +interest 0 +comp 0 +isocomp 0 +maccomp 0 +borlandcomp 0 +misccomp 0 +delphi 0 +miscfj 0 +rememb 0 +access 0 +dell 0 +serverto 0 +mail 0 +prefer 0 +item 0 +option 0 +menu 0 +look 0 +usual 0 +lead 0 +importantstuff 0 +descript 0 +send 0 +comment 0 +critic 0 +suggest 0 +addit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..9a809864 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,70 @@ +assign 1 +mesa 0 +cours 0 +librari 0 +exampl 0 +graphic 0 +opengl 0 +page 0 +updat 0 +comput 0 +gener 0 +inform 0 +us 0 +utc 0 +book 0 +second 0 +requir 0 +student 0 +code 0 +spring 0 +cscomput 0 +graphicsspr 0 +instructor 0 +donald 0 +fussel 0 +descript 0 +syllabu 0 +year 0 +provid 0 +anopengl 0 +like 0 +platform 0 +hasbeen 0 +instal 0 +public 0 +workstat 0 +sciencesdepart 0 +instruct 0 +sampl 0 +makefil 0 +machin 0 +specif 0 +home 0 +center 0 +refer 0 +manual 0 +ousterhout 0 +welch 0 +turn 0 +note 0 +option 0 +exam 0 +oneor 0 +show 0 +examwil 0 +submit 0 +wish 0 +bothmai 0 +higher 0 +score 0 +curv 0 +willcount 0 +xlib 0 +driver 0 +billthecat 0 +copi 0 +file 0 +directori 0 +contain 0 +slate 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..1c94c0e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,67 @@ +graphic 1 +mesa 1 +exampl 1 +assign 1 +cours 0 +librari 0 +inform 0 +machin 0 +opengl 0 +page 0 +book 0 +comput 0 +gener 0 +us 0 +utc 0 +welch 0 +code 0 +reinstal 0 +walker 0 +fall 0 +gcomput 0 +graphicsfal 0 +instructor 0 +donald 0 +fussel 0 +descript 0 +syllabu 0 +year 0 +provid 0 +anopengl 0 +like 0 +platform 0 +hasbeen 0 +instal 0 +public 0 +workstat 0 +sciencesdepart 0 +instruct 0 +sampl 0 +makefil 0 +specif 0 +home 0 +center 0 +refer 0 +manual 0 +ousterhout 0 +program 0 +turn 0 +xlib 0 +driver 0 +billthecat 0 +copi 0 +file 0 +directori 0 +contain 0 +slate 0 +new 0 +turnin 0 +work 0 +libtcl 0 +libtk 0 +tclsh 0 +wish 0 +compil 0 +demo 0 +repair 0 +sourc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..c037f09c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,45 @@ +email 1 +utexa 1 +french 1 +offic 0 +hour 0 +taylor 0 +hqliu 0 +homework 0 +problem 0 +last 0 +appear 0 +cryptographi 0 +professor 0 +david 0 +zuckerman 0 +huiqun 0 +station 0 +hall 0 +basement 0 +syllabu 0 +mathemat 0 +background 0 +textbook 0 +ciphertext 0 +notic 0 +answer 0 +abl 0 +recogn 0 +word 0 +canada 0 +frequenc 0 +common 0 +letter 0 +chang 0 +drastic 0 +english 0 +howev 0 +digram 0 +like 0 +page 0 +modifi 0 +septemb 0 +comment 0 +welcom 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..b0462f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,314 @@ +problem 1 +class 1 +distribut 0 +protocol 0 +solut 0 +comput 0 +requir 0 +paper 0 +system 0 +final 0 +prove 0 +assign 0 +rajeev 0 +exam 0 +global 0 +topic 0 +show 0 +snapshot 0 +point 0 +lorenzo 0 +offic 0 +hour 0 +textbook 0 +suggest 0 +joshi 0 +mondai 0 +cover 0 +time 0 +us 0 +state 0 +detect 0 +messag 0 +file 0 +discuss 0 +give 0 +homework 0 +page 0 +proof 0 +deriv 0 +link 0 +cours 0 +set 0 +midterm 0 +newsgroup 0 +utexa 0 +hall 0 +phone 0 +expect 0 +sourc 0 +refer 0 +wednesdai 0 +second 0 +design 0 +distributedsystem 0 +includ 0 +clock 0 +manag 0 +agreement 0 +byzantin 0 +group 0 +program 0 +techniqu 0 +secur 0 +integr 0 +gener 0 +depend 0 +present 0 +allow 0 +consist 0 +written 0 +collabor 0 +student 0 +encourag 0 +take 0 +survei 0 +ofth 0 +last 0 +colleagu 0 +improv 0 +commun 0 +someth 0 +solv 0 +algorithm 0 +assum 0 +fifo 0 +asynchron 0 +predic 0 +number 0 +deadlock 0 +would 0 +cost 0 +postscript 0 +send 0 +ispr 0 +instructor 0 +alvisiteach 0 +assist 0 +joshicont 0 +locat 0 +mechan 0 +content 0 +grade 0 +inform 0 +pertain 0 +instruct 0 +stafflorenzo 0 +alvisi 0 +taylor 0 +tuesdai 0 +thursdai 0 +meet 0 +arrang 0 +appoint 0 +mechanicsi 0 +materi 0 +requiredtextbook 0 +remaind 0 +come 0 +given 0 +classat 0 +appropri 0 +lectur 0 +robert 0 +moor 0 +isutexa 0 +edit 0 +mullend 0 +editor 0 +acmpress 0 +addison 0 +weslei 0 +publish 0 +compani 0 +read 0 +contentc 0 +abstract 0 +tobe 0 +build 0 +tomorrow 0 +cut 0 +logic 0 +vector 0 +causal 0 +messagedeliveri 0 +properti 0 +log 0 +checkpoint 0 +replic 0 +machin 0 +approach 0 +primari 0 +backupapproach 0 +order 0 +multicast 0 +applic 0 +cach 0 +disconnect 0 +oper 0 +servic 0 +synchron 0 +encrypt 0 +authent 0 +principl 0 +thepresent 0 +case 0 +studi 0 +exemplifi 0 +principleshav 0 +implement 0 +real 0 +interest 0 +meor 0 +size 0 +apresent 0 +share 0 +memori 0 +object 0 +kernel 0 +support 0 +weak 0 +replica 0 +electron 0 +commerc 0 +wide 0 +area 0 +networksgradingther 0 +begrad 0 +demonstr 0 +credibl 0 +effort 0 +onbehalf 0 +author 0 +whether 0 +right 0 +wrong 0 +willrec 0 +better 0 +three 0 +ispermit 0 +acollabor 0 +singl 0 +submit 0 +forgrad 0 +name 0 +collaborationswil 0 +consid 0 +violat 0 +academ 0 +home 0 +examin 0 +nocollabor 0 +howev 0 +towrit 0 +issuesthat 0 +list 0 +bedistribut 0 +start 0 +henc 0 +week 0 +tocomplet 0 +also 0 +team 0 +prepar 0 +twolectur 0 +previous 0 +choosethi 0 +option 0 +write 0 +asingl 0 +warmli 0 +toconsid 0 +volunt 0 +excellentopportun 0 +skill 0 +setsin 0 +subsequ 0 +shouldconform 0 +follow 0 +guidelin 0 +synonym 0 +precis 0 +isrequir 0 +ask 0 +imposs 0 +thatmak 0 +clear 0 +cannot 0 +matter 0 +algorithmi 0 +insuffici 0 +particular 0 +work 0 +develop 0 +must 0 +accompani 0 +ofcorrect 0 +unless 0 +explicitli 0 +told 0 +otherwis 0 +thetextbook 0 +channel 0 +asnapshot 0 +onth 0 +assumpt 0 +correct 0 +theprotocol 0 +produc 0 +atmost 0 +note 0 +book 0 +contain 0 +mattern 0 +thatcontain 0 +urg 0 +resist 0 +thetempt 0 +visit 0 +librari 0 +agener 0 +stabl 0 +moreeffici 0 +specif 0 +often 0 +conceptu 0 +simpler 0 +effici 0 +term 0 +exchang 0 +base 0 +special 0 +ideal 0 +need 0 +central 0 +monitorprocess 0 +process 0 +monitor 0 +basedsnapshot 0 +nowonlin 0 +filedescrib 0 +examth 0 +constitut 0 +fridaymai 0 +thepostscript 0 +describ 0 +question 0 +feel 0 +freeto 0 +email 0 +idea 0 +pleas 0 +yoursuggest 0 +edurajeev 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..245df5ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,57 @@ +class 1 +fall 0 +jacob 0 +kornerup 0 +note 0 +page 0 +taught 0 +avail 0 +read 0 +midterm 0 +reflect 0 +current 0 +version 0 +differ 0 +content 0 +scope 0 +welcom 0 +homepag 0 +austin 0 +bywil 0 +adam 0 +practic 0 +informationabout 0 +cours 0 +look 0 +syllabu 0 +linea 0 +technic 0 +compil 0 +program 0 +turn 0 +inhomework 0 +electron 0 +homework 0 +solut 0 +time 0 +crude 0 +interfac 0 +newsgrouputexa 0 +correspond 0 +takesplac 0 +exampl 0 +textbook 0 +pascalprogramm 0 +organ 0 +chapter 0 +link 0 +home 0 +requir 0 +overhead 0 +viewinginform 0 +projecthow 0 +find 0 +offic 0 +studi 0 +examand 0 +answer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..446a347d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,198 @@ +java 1 +cours 0 +comp 0 +lang 0 +program 0 +addison 0 +weslei 0 +object 0 +orient 0 +newsgroup 0 +librari 0 +design 0 +sourc 0 +utexa 0 +inform 0 +class 0 +manual 0 +code 0 +student 0 +follow 0 +relat 0 +stroustrup 0 +faq 0 +standard 0 +applet 0 +lavend 0 +eduoffic 0 +hour 0 +gokul 0 +templat 0 +opportun 0 +solut 0 +problem 0 +us 0 +text 0 +avail 0 +style 0 +draft 0 +setup 0 +link 0 +site 0 +postscript 0 +home 0 +compil 0 +libg 0 +server 0 +archiv 0 +note 0 +descriptionc 0 +programminglast 0 +updat 0 +professor 0 +greg 0 +appt 0 +rajaram 0 +mondai 0 +wednesdai 0 +station 0 +lavendercours 0 +infocours 0 +syllabusannouncementslectur 0 +noteshomework 0 +solutionsprogram 0 +assignmentsgnu 0 +manualsstandard 0 +codesocket 0 +manualdescript 0 +intend 0 +alreadi 0 +anintroductori 0 +offer 0 +introduct 0 +give 0 +think 0 +comput 0 +manner 0 +captur 0 +reusablepattern 0 +construct 0 +polymorph 0 +typehierarchi 0 +write 0 +profici 0 +professionallyus 0 +challeng 0 +coop 0 +bookstor 0 +horstmann 0 +master 0 +john 0 +wilei 0 +associ 0 +materiali 0 +drawn 0 +lectur 0 +materi 0 +languag 0 +edit 0 +elli 0 +annot 0 +refer 0 +evolut 0 +cargil 0 +cline 0 +lomow 0 +coplien 0 +advanc 0 +idiom 0 +plauger 0 +prentic 0 +hall 0 +gamma 0 +helm 0 +johnson 0 +vlissid 0 +pattern 0 +element 0 +reusabl 0 +softwar 0 +forum 0 +open 0 +discuss 0 +announcementsabout 0 +strongli 0 +encourag 0 +particip 0 +linediscuss 0 +fellow 0 +classmat 0 +lavendery 0 +also 0 +interest 0 +usenet 0 +helpjava 0 +hotjava 0 +advocaci 0 +misc 0 +programm 0 +secur 0 +tech 0 +javascript 0 +internet 0 +oopth 0 +date 0 +pleas 0 +know 0 +dead 0 +ansi 0 +page 0 +lab 0 +renssela 0 +polytechn 0 +institut 0 +product 0 +info 0 +objectspac 0 +libstdc 0 +mitgnu 0 +cygnusgnu 0 +document 0 +doug 0 +pagec 0 +mirror 0 +list 0 +ftpobject 0 +system 0 +developmentindex 0 +librariesth 0 +virtual 0 +libraryindex 0 +sourcesth 0 +talig 0 +frameworkjava 0 +javasoft 0 +gamelan 0 +huge 0 +registri 0 +digit 0 +espresso 0 +good 0 +summari 0 +current 0 +centr 0 +new 0 +event 0 +jar 0 +rate 0 +denni 0 +kafura 0 +virginia 0 +techdoug 0 +schmidt 0 +irvin 0 +washington 0 +universitydoug 0 +sunyintroductori 0 +univers 0 +groningen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..cdb590d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,56 @@ +program 1 +parallel 0 +languag 0 +assign 0 +tuesdai 0 +thursdai 0 +calvin 0 +offic 0 +utexa 0 +exampl 0 +commun 0 +manual 0 +compilerscst 0 +compilersfal 0 +lectur 0 +instructor 0 +taylor 0 +phone 0 +email 0 +hour 0 +handout 0 +gener 0 +inform 0 +case 0 +tera 0 +comput 0 +copyright 0 +posix 0 +thread 0 +skeleton 0 +code 0 +tutori 0 +hello 0 +world 0 +ironman 0 +interfac 0 +onlin 0 +postscript 0 +logp 0 +paper 0 +time 0 +spent 0 +messag 0 +pass 0 +share 0 +memori 0 +foundat 0 +practic 0 +partit 0 +dynam 0 +adapt 0 +grid 0 +hierarchieslast 0 +modifi 0 +decemb 0 +linlin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..ce81ce6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,16 @@ +inform 1 +send 1 +mail 1 +home 0 +page 0 +csintroduct 0 +oper 0 +system 0 +class 0 +handout 0 +assign 0 +read 0 +project 0 +group 0 +prof 0 +newsgroup 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..5d8171f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,16 @@ +distribut 1 +read 0 +list 0 +fall 0 +thot 0 +topic 0 +systemsfil 0 +systemstopolog 0 +systemselectron 0 +commenrcefailur 0 +detectorsdistribut 0 +objectsconsistencysecuregroup 0 +communicationlanguag 0 +system 0 +dsmmobil 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..3aaf04a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,13 @@ +mine 1 +monitor 1 +databas 0 +databasesprof 0 +daniel 0 +mirankernew 0 +seminarschedul 0 +term 0 +project 0 +materi 0 +overviewtentativeread 0 +list 0 +homeworkproject 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..2499f564 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,25 @@ +cours 1 +code 0 +introduct 0 +artifici 0 +intellig 0 +instructor 0 +raymond 0 +mooneytim 0 +placespr 0 +tuth 0 +taylor 0 +hall 0 +informationclick 0 +inform 0 +sheetand 0 +syllabu 0 +last 0 +year 0 +updat 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 +assign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..4d2663e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,35 @@ +homework 1 +cours 0 +test 0 +lisp 0 +program 0 +code 0 +symbol 0 +instructor 0 +raymond 0 +mooneyteach 0 +assist 0 +sowmya 0 +ramachandrantim 0 +placetu 0 +informationclick 0 +inform 0 +sheet 0 +syllabu 0 +informationon 0 +alsout 0 +allegro 0 +info 0 +page 0 +textparadigm 0 +artifici 0 +intellig 0 +case 0 +studi 0 +common 0 +lispassignmentsse 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..5b4fa2e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,52 @@ +learn 1 +homework 0 +cours 0 +machin 0 +base 0 +code 0 +project 0 +instructor 0 +raymond 0 +mooneytim 0 +placetu 0 +informationclick 0 +inform 0 +sheetand 0 +syllabu 0 +textmachinelearninglectur 0 +slide 0 +introduct 0 +concept 0 +gener 0 +order 0 +decis 0 +tree 0 +experiment 0 +evalu 0 +comput 0 +theori 0 +rule 0 +induct 0 +logic 0 +program 0 +neural 0 +network 0 +cluster 0 +unsupervis 0 +bayesian 0 +instanc 0 +explan 0 +learningassignmentsse 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 +final 0 +suggest 0 +spring 0 +paper 0 +format 0 +outlin 0 +talk 0 +version 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..09229283 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,100 @@ +program 1 +exam 0 +languag 0 +guid 0 +comput 0 +scheme 0 +foundat 0 +sciencec 0 +section 0 +cours 0 +treesassign 0 +assign 0 +studi 0 +introduct 0 +scienc 0 +intend 0 +major 0 +atleast 0 +semest 0 +high 0 +school 0 +number 0 +willb 0 +chang 0 +next 0 +year 0 +never 0 +taken 0 +programmingcours 0 +take 0 +porter 0 +instead 0 +strong 0 +math 0 +background 0 +least 0 +precalculu 0 +requir 0 +dialect 0 +lisp 0 +theschem 0 +implement 0 +call 0 +gambit 0 +run 0 +macintoshcomput 0 +move 0 +faster 0 +previou 0 +coursesand 0 +emphas 0 +concept 0 +syntax 0 +work 0 +hard 0 +hopefulli 0 +learninga 0 +syllabu 0 +directori 0 +softwar 0 +tutorcopi 0 +pcassign 0 +machin 0 +simulationassign 0 +surf 0 +webassign 0 +basic 0 +schemeassign 0 +plai 0 +peano 0 +gamblingassign 0 +turtl 0 +graphicsassign 0 +snow 0 +list 0 +manipulationstudi 0 +vocabulari 0 +plot 0 +thickensassign 0 +treasur 0 +huntassign 0 +symbol 0 +algebraassign 0 +data 0 +abstract 0 +matricesstudi 0 +draw 0 +express 0 +unparsingassign 0 +translationstudi 0 +final 0 +thur 0 +gordon 0 +novak 0 +assignmentsprogram 0 +file 0 +descriptionsprogram 0 +submiss 0 +gradingmidterm 0 +guidefin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..4478c2ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,41 @@ +cours 1 +program 1 +compilersc 0 +compil 0 +student 0 +powerpc 0 +summer 0 +file 0 +studi 0 +cover 0 +design 0 +construct 0 +programminglanguag 0 +write 0 +pascal 0 +codei 0 +gener 0 +processor 0 +server 0 +incorpor 0 +chip 0 +heavi 0 +workload 0 +especi 0 +plan 0 +take 0 +expect 0 +dedicatetheir 0 +live 0 +five 0 +week 0 +syllabusprogram 0 +assignmentsprogram 0 +descript 0 +directori 0 +submiss 0 +gradingmidterm 0 +guidefin 0 +exam 0 +guidegordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..513dfb1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,53 @@ +intellig 1 +studi 1 +artifici 0 +comput 0 +stori 0 +intelligencec 0 +intelligenceartifici 0 +defin 0 +thecomput 0 +requir 0 +behavior 0 +attempt 0 +todupl 0 +us 0 +connectspercept 0 +environ 0 +action 0 +appropri 0 +achiev 0 +thegoal 0 +actor 0 +cours 0 +survei 0 +major 0 +topic 0 +includ 0 +search 0 +logic 0 +andknowledg 0 +represent 0 +natur 0 +languag 0 +process 0 +withbrief 0 +coverag 0 +brain 0 +machin 0 +vision 0 +syllabusprogram 0 +assignmentsprogram 0 +file 0 +descriptionsmidterm 0 +guidefin 0 +exam 0 +guidepred 0 +calculu 0 +problemssolut 0 +select 0 +problemsnot 0 +bibliographi 0 +human 0 +braingordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..8fb5182c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,52 @@ +program 1 +automat 0 +lectur 0 +semest 0 +programmingc 0 +programmingautomat 0 +gener 0 +execut 0 +programsfrom 0 +specif 0 +higher 0 +level 0 +ordinari 0 +languag 0 +cours 0 +consist 0 +first 0 +third 0 +homework 0 +problem 0 +assign 0 +given 0 +illustrateth 0 +materi 0 +long 0 +requirelearn 0 +sever 0 +kind 0 +system 0 +latter 0 +partof 0 +cover 0 +read 0 +research 0 +literatur 0 +student 0 +expect 0 +present 0 +paper 0 +class 0 +syllabusbibliographyassign 0 +compil 0 +optim 0 +done 0 +handpattern 0 +matchingobject 0 +orient 0 +programmingintroduct 0 +glispview 0 +graphic 0 +programminggordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..b0295edd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,675 @@ +languag 1 +system 1 +program 1 +comput 0 +data 0 +parallel 0 +level 0 +algorithm 0 +network 0 +fault 0 +cilk 0 +toler 0 +logic 0 +compil 0 +softwar 0 +us 0 +problem 0 +adapt 0 +special 0 +design 0 +librari 0 +present 0 +approach 0 +describ 0 +implement 0 +learn 0 +cours 0 +robot 0 +natur 0 +applic 0 +user 0 +call 0 +file 0 +allow 0 +machin 0 +provid 0 +represent 0 +model 0 +interfac 0 +effici 0 +queri 0 +graduat 0 +task 0 +evolut 0 +neural 0 +open 0 +process 0 +reus 0 +high 0 +cooper 0 +featur 0 +failur 0 +talk 0 +well 0 +avail 0 +wide 0 +area 0 +abstract 0 +mathemat 0 +state 0 +result 0 +alamo 0 +sourc 0 +analysi 0 +idea 0 +introduct 0 +robert 0 +scienc 0 +semant 0 +hierarchi 0 +cognit 0 +sequenti 0 +decis 0 +cluster 0 +evalu 0 +inform 0 +current 0 +enabl 0 +distribut 0 +environ 0 +advantag 0 +explor 0 +perform 0 +execut 0 +gener 0 +issu 0 +differ 0 +formal 0 +method 0 +build 0 +make 0 +produc 0 +optim 0 +symbiot 0 +search 0 +develop 0 +databas 0 +engin 0 +chill 0 +also 0 +procedur 0 +conceptu 0 +notat 0 +pram 0 +lectur 0 +offic 0 +utexa 0 +free 0 +time 0 +student 0 +undergradu 0 +spatial 0 +principl 0 +programmingoctob 0 +wilsonextens 0 +ramachandranth 0 +beyond 0 +simpl 0 +technolog 0 +trend 0 +highli 0 +server 0 +power 0 +requir 0 +hand 0 +translat 0 +reliabl 0 +access 0 +solut 0 +lightweight 0 +goal 0 +scale 0 +depend 0 +sever 0 +number 0 +need 0 +integr 0 +theapplic 0 +support 0 +address 0 +effect 0 +overview 0 +runtim 0 +workstat 0 +within 0 +continu 0 +even 0 +automat 0 +includ 0 +fast 0 +close 0 +among 0 +singl 0 +consist 0 +space 0 +structur 0 +discret 0 +action 0 +place 0 +path 0 +built 0 +base 0 +view 0 +simpli 0 +prolog 0 +known 0 +concern 0 +experi 0 +appli 0 +reinforc 0 +sane 0 +popul 0 +genet 0 +form 0 +abl 0 +broad 0 +interest 0 +datasourc 0 +architectur 0 +theabstract 0 +embodi 0 +higher 0 +code 0 +advanc 0 +facil 0 +serv 0 +resolv 0 +exampl 0 +offer 0 +determinist 0 +corpu 0 +difficult 0 +easi 0 +asymptot 0 +main 0 +underli 0 +theform 0 +extens 0 +portabl 0 +discuss 0 +sciencecst 0 +sciencefal 0 +mondai 0 +instructor 0 +blumof 0 +taylor 0 +phone 0 +email 0 +hour 0 +thursdai 0 +feel 0 +stop 0 +semest 0 +seminar 0 +taken 0 +apass 0 +fail 0 +basi 0 +topic 0 +honor 0 +receiv 0 +credit 0 +must 0 +beregist 0 +attend 0 +least 0 +schedulespeakertitleseptemb 0 +mirankeralamo 0 +warehouseseptemb 0 +kuipersth 0 +humanand 0 +mapsseptemb 0 +blumofecilk 0 +reliableparallel 0 +workstationsseptemb 0 +risto 0 +miikkulainenlearn 0 +throughsymbiot 0 +networksoctob 0 +vladimir 0 +lifschitzmathemat 0 +paul 0 +reflectionoctob 0 +mooneylearn 0 +usinginduct 0 +mike 0 +dahlindistribut 0 +internetsnovemb 0 +gordon 0 +novaksoftwar 0 +genericprocedur 0 +viewsnovemb 0 +vijaya 0 +parallelalgorithmsnovemb 0 +lorenzo 0 +alvisilighweight 0 +tolerancenovemb 0 +calvin 0 +linadapt 0 +optimizationdecemb 0 +greg 0 +plaxtonanalysi 0 +algorithmslighweight 0 +tolerancelorenzo 0 +alvisidistribut 0 +move 0 +confin 0 +academia 0 +andresearch 0 +lab 0 +revolution 0 +busi 0 +government 0 +organ 0 +citizen 0 +andcollect 0 +promis 0 +todramat 0 +increas 0 +pace 0 +revolut 0 +thedesign 0 +beyondth 0 +client 0 +paradigm 0 +har 0 +ofdistribut 0 +scope 0 +emphasi 0 +toleranttechniqu 0 +undergo 0 +dramat 0 +chang 0 +willceas 0 +expens 0 +applicationsto 0 +exot 0 +distributedinform 0 +infrastructur 0 +acompetit 0 +guarante 0 +criticalinform 0 +engineerfault 0 +dedic 0 +resourc 0 +negligibleimpact 0 +cost 0 +offailur 0 +transpar 0 +programm 0 +emerg 0 +communicatethrough 0 +messag 0 +onnetwork 0 +workstationsrobert 0 +blumofethi 0 +pronouncedsilk 0 +multithread 0 +andcilk 0 +functionalsubset 0 +providesadapt 0 +tranpar 0 +touser 0 +mean 0 +ofworkst 0 +run 0 +grow 0 +shrinkdynam 0 +idl 0 +onth 0 +amount 0 +addit 0 +cilkprogram 0 +workstationscrash 0 +detect 0 +andrecov 0 +livedemonstr 0 +internetsmik 0 +dahlinthi 0 +give 0 +indistribut 0 +applicationsmotiv 0 +aggress 0 +inclust 0 +servicei 0 +request 0 +nodesto 0 +better 0 +centralserv 0 +challeng 0 +goodperform 0 +despit 0 +limit 0 +networkperform 0 +node 0 +projectwil 0 +human 0 +mapsbenjamin 0 +kuipershuman 0 +map 0 +reli 0 +forlarg 0 +ontolog 0 +similarli 0 +varietyof 0 +propos 0 +andmap 0 +unknown 0 +cast 0 +diverserepresent 0 +spatialsemant 0 +object 0 +relat 0 +andassumpt 0 +foundat 0 +thecontrol 0 +dynam 0 +whose 0 +stabl 0 +equilibrium 0 +point 0 +beabstract 0 +distinct 0 +trajectori 0 +link 0 +givinga 0 +causal 0 +graph 0 +causalgraph 0 +turn 0 +topologicalnetwork 0 +local 0 +metric 0 +occupancygrid 0 +neighborhood 0 +theframework 0 +topolog 0 +without 0 +usual 0 +ofglob 0 +programmingvladimir 0 +lifschitzlog 0 +sister 0 +functionalprogram 0 +notne 0 +contain 0 +explicit 0 +oper 0 +instruct 0 +instead 0 +itcan 0 +fact 0 +sufficientto 0 +solv 0 +declar 0 +executedus 0 +autom 0 +reason 0 +best 0 +logicprogram 0 +theori 0 +withdefin 0 +thereason 0 +investig 0 +thesound 0 +optimizationcalvin 0 +linthi 0 +andtheir 0 +differenthardwar 0 +platform 0 +efficientand 0 +usabl 0 +framework 0 +suchlibrari 0 +three 0 +plan 0 +thesetechniqu 0 +scientif 0 +weexplain 0 +facilit 0 +ofneur 0 +networksristo 0 +miikkulainena 0 +novel 0 +neuro 0 +evolv 0 +neuronsthrough 0 +given 0 +promot 0 +inth 0 +anddiscourag 0 +converg 0 +suboptim 0 +toextract 0 +domain 0 +specif 0 +spars 0 +rang 0 +sequentialdecis 0 +control 0 +game 0 +plai 0 +resourcemanag 0 +warehousedan 0 +mirankerth 0 +effort 0 +direct 0 +intra 0 +andint 0 +enumer 0 +site 0 +theuser 0 +illus 0 +virtual 0 +follow 0 +byqueri 0 +tool 0 +central 0 +corba 0 +compliant 0 +interfacethat 0 +uniform 0 +heterogen 0 +ofabstract 0 +clever 0 +algorithmsand 0 +separ 0 +isol 0 +buffer 0 +anddata 0 +prefetch 0 +claim 0 +often 0 +anobject 0 +orient 0 +deduct 0 +infer 0 +activedatabas 0 +mine 0 +constructedus 0 +common 0 +final 0 +sinc 0 +output 0 +databasefacil 0 +compon 0 +thealamo 0 +compos 0 +dataintegr 0 +particular 0 +anticip 0 +elementsof 0 +repres 0 +meta 0 +andsemant 0 +conflict 0 +ultim 0 +furthercomposit 0 +complex 0 +knowledg 0 +answerhigh 0 +induct 0 +logicprogrammingraymond 0 +mooneyinduct 0 +learningprolog 0 +offirst 0 +order 0 +standard 0 +learningmethod 0 +constrain 0 +fix 0 +length 0 +vector 0 +areappli 0 +believethi 0 +richer 0 +import 0 +havedevelop 0 +parsersfrom 0 +pars 0 +sentenc 0 +obtain 0 +superior 0 +onsever 0 +artifici 0 +corpora 0 +previous 0 +test 0 +networkmethod 0 +encourag 0 +realist 0 +ati 0 +ofairlin 0 +automaticallydevelop 0 +complet 0 +englishdatabas 0 +moreaccur 0 +parser 0 +smallgeograph 0 +foidl 0 +past 0 +tens 0 +english 0 +surpass 0 +previou 0 +treemethod 0 +throughviewsgordon 0 +novak 0 +clearli 0 +good 0 +toachiev 0 +practic 0 +assumpt 0 +thesoftwar 0 +type 0 +typesus 0 +agener 0 +version 0 +custom 0 +graphic 0 +specifyview 0 +theworld 0 +write 0 +adesir 0 +algorithmsgreg 0 +plaxtona 0 +major 0 +focu 0 +theoret 0 +andanalysi 0 +random 0 +forspecif 0 +research 0 +notuncommon 0 +come 0 +across 0 +written 0 +paper 0 +straightforward 0 +surprisingli 0 +lengthi 0 +deal 0 +minor 0 +side 0 +case 0 +havelittl 0 +noth 0 +suchpap 0 +seem 0 +signific 0 +andform 0 +difficulti 0 +gapsinher 0 +convent 0 +inadequatefor 0 +succinctli 0 +certain 0 +straightforwardalgorithm 0 +significantli 0 +reduc 0 +theconceptu 0 +associ 0 +trivialclass 0 +concret 0 +consid 0 +analysisof 0 +linear 0 +select 0 +blum 0 +floyd 0 +pratt 0 +rivest 0 +tarjan 0 +algorithmsvijaya 0 +forcombinatori 0 +studi 0 +recentyear 0 +larg 0 +willdescrib 0 +work 0 +parallelalgorithm 0 +thesealgorithm 0 +massiv 0 +maspar 0 +thendescrib 0 +queu 0 +variant 0 +wepropos 0 +appropri 0 +parallelshar 0 +memori 0 +tradit 0 +reflectionpaul 0 +ad 0 +fairli 0 +modif 0 +addnew 0 +analys 0 +reflect 0 +examin 0 +ofinterest 0 +part 0 +affect 0 +structureaccordingli 0 +thing 0 +modular 0 +adapat 0 +recent 0 +workon 0 +rscheme 0 +extensiblelanguag 0 +last 0 +modifi 0 +novemb 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..d4562d7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,68 @@ +problem 1 +chapter 0 +except 0 +solut 0 +offic 0 +program 0 +assign 0 +septemb 0 +octob 0 +utexa 0 +exam 0 +novemb 0 +introduct 0 +oper 0 +lectur 0 +robert 0 +phone 0 +email 0 +hour 0 +thursdai 0 +gooti 0 +exampl 0 +found 0 +crypt 0 +topic 0 +cover 0 +midterm 0 +decemb 0 +systemsc 0 +systemsfal 0 +mondai 0 +wednesdai 0 +instructor 0 +blumof 0 +taylor 0 +feel 0 +free 0 +stop 0 +time 0 +teach 0 +assist 0 +subramanyam 0 +tuesdai 0 +station 0 +solari 0 +canb 0 +implement 0 +support 0 +multiplemap 0 +assum 0 +map 0 +file 0 +least 0 +long 0 +themap 0 +test 0 +encrypt 0 +decrypt 0 +handout 0 +gener 0 +inform 0 +final 0 +solutionsread 0 +book 0 +date 0 +last 0 +modifi 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..8008aeac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,45 @@ +neural 1 +network 1 +utexa 1 +risto 0 +offic 0 +class 0 +fall 0 +networksfal 0 +uniqu 0 +number 0 +instructor 0 +miikkulainen 0 +bednar 0 +jbednar 0 +station 0 +text 0 +lauren 0 +fausett 0 +fundament 0 +ofneur 0 +architectur 0 +algorithm 0 +applic 0 +englewood 0 +cliff 0 +prenticehal 0 +select 0 +paper 0 +note 0 +copi 0 +slide 0 +us 0 +lectur 0 +grade 0 +homework 0 +midterm 0 +final 0 +detail 0 +schedulehomework 0 +assignmentsexamsclass 0 +resourcesa 0 +postscript 0 +versionof 0 +syllabusristo 0 +edusun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..1c67e2b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,76 @@ +cognit 1 +scienc 0 +utexa 0 +risto 0 +read 0 +discuss 0 +introduct 0 +dept 0 +hall 0 +offic 0 +hour 0 +appt 0 +student 0 +note 0 +short 0 +page 0 +fall 0 +sciencefal 0 +instructor 0 +nichola 0 +asher 0 +philosophi 0 +waggen 0 +nasher 0 +berti 0 +miikkulainen 0 +comput 0 +taylor 0 +text 0 +posner 0 +foundat 0 +mitpress 0 +packet 0 +requir 0 +regular 0 +interv 0 +submit 0 +critic 0 +commentari 0 +collabor 0 +withanoth 0 +also 0 +write 0 +paper 0 +approxim 0 +signific 0 +research 0 +topic 0 +find 0 +ofinterest 0 +count 0 +toward 0 +final 0 +grade 0 +thepap 0 +class 0 +attend 0 +particip 0 +alsorequir 0 +detail 0 +cours 0 +descriptioncours 0 +schedulediscuss 0 +notesperson 0 +adscollabor 0 +paperclass 0 +resourcesstud 0 +questionnaireus 0 +link 0 +center 0 +list 0 +sciencefaculti 0 +pointer 0 +resourc 0 +gener 0 +edusun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..4a3130e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,190 @@ +program 1 +respons 0 +pascal 0 +page 0 +cours 0 +assign 0 +student 0 +take 0 +grade 0 +hour 0 +syllabu 0 +detail 0 +exam 0 +semest 0 +class 0 +wait 0 +deadlin 0 +quizz 0 +room 0 +suzi 0 +need 0 +requir 0 +schedul 0 +thenewsgroup 0 +utexa 0 +updat 0 +work 0 +prepar 0 +becom 0 +quiz 0 +long 0 +warn 0 +mani 0 +group 0 +note 0 +limit 0 +thetest 0 +programmingcsp 0 +pascalintroductori 0 +comput 0 +programminginstructor 0 +gallagherwelcom 0 +excit 0 +intellectu 0 +challeng 0 +cspi 0 +design 0 +give 0 +firm 0 +foundat 0 +andso 0 +effort 0 +read 0 +thecours 0 +carefulli 0 +summari 0 +contain 0 +wella 0 +import 0 +polici 0 +date 0 +otherdeadlin 0 +everyth 0 +thesyllabu 0 +without 0 +delai 0 +avail 0 +jenn 0 +copi 0 +guadalup 0 +hundr 0 +takethi 0 +courseeach 0 +highli 0 +structur 0 +foral 0 +monitor 0 +frequent 0 +expect 0 +depend 0 +howwel 0 +event 0 +extrem 0 +difficult 0 +behind 0 +procedur 0 +riski 0 +near 0 +deadlineto 0 +turn 0 +late 0 +get 0 +half 0 +credit 0 +line 0 +unfortun 0 +construct 0 +link 0 +nowher 0 +apolog 0 +everyon 0 +attend 0 +lectur 0 +gallagh 0 +everi 0 +thursdayeven 0 +welch 0 +mondai 0 +wednesdai 0 +break 0 +intosmal 0 +section 0 +discuss 0 +ofth 0 +materi 0 +ateach 0 +assist 0 +nine 0 +written 0 +debug 0 +theprogram 0 +laboratori 0 +thatlaboratori 0 +thatgrad 0 +even 0 +less 0 +andyou 0 +within 0 +eight 0 +thattest 0 +limitedand 0 +often 0 +foravail 0 +proctor 0 +sever 0 +hoursbefor 0 +submit 0 +andquizz 0 +earli 0 +enough 0 +three 0 +must 0 +betaken 0 +prescrib 0 +time 0 +make 0 +soon 0 +possibl 0 +begin 0 +file 0 +openedfor 0 +uniqu 0 +identifi 0 +yourstud 0 +access 0 +orsak 0 +requiredtextbook 0 +dale 0 +weem 0 +wewil 0 +cover 0 +chapter 0 +individu 0 +background 0 +vari 0 +consider 0 +thiscours 0 +partial 0 +self 0 +pace 0 +feel 0 +well 0 +click 0 +howev 0 +still 0 +liabl 0 +commun 0 +exampl 0 +could 0 +form 0 +studi 0 +also 0 +gripe 0 +thought 0 +articl 0 +gener 0 +interest 0 +elicit 0 +repli 0 +staff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..f53e2162 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,59 @@ +output 1 +data 1 +sampl 0 +project 0 +file 0 +recoveri 0 +problem 0 +solut 0 +test 0 +databas 0 +system 0 +implement 0 +tong 0 +wang 0 +pleas 0 +dept 0 +student 0 +contest 0 +benchmark 0 +script 0 +time 0 +order 0 +tupl 0 +differ 0 +program 0 +pass 0 +fall 0 +professor 0 +batori 0 +syllabu 0 +homework 0 +read 0 +first 0 +retriev 0 +ret_into 0 +replac 0 +append 0 +delet 0 +mdb 0 +us 0 +measur 0 +run 0 +sinc 0 +attribut 0 +anoth 0 +wrote 0 +perl 0 +transform 0 +compar 0 +diff 0 +turn 0 +without 0 +error 0 +fail 0 +reason 0 +email 0 +suggest 0 +comment 0 +medec 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..6ece683d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,302 @@ +system 1 +page 0 +oper 0 +comput 0 +distribut 0 +proceed 0 +time 0 +cours 0 +file 0 +decemb 0 +review 0 +real 0 +commun 0 +process 0 +ieee 0 +transact 0 +design 0 +implement 0 +advanc 0 +support 0 +concept 0 +issu 0 +schedul 0 +sosp 0 +mach 0 +kernel 0 +harrick 0 +read 0 +fall 0 +wireless 0 +mobil 0 +environ 0 +share 0 +confer 0 +april 0 +unix 0 +levi 0 +usenix 0 +symposium 0 +cheriton 0 +cach 0 +januari 0 +instructor 0 +descript 0 +gener 0 +inform 0 +textbook 0 +requir 0 +list 0 +earli 0 +idea 0 +topic 0 +research 0 +avail 0 +paper 0 +project 0 +dalei 0 +juli 0 +andrew 0 +operatingsystem 0 +survei 0 +thread 0 +formultiprogram 0 +memori 0 +multiprocessor 0 +anderson 0 +lazowska 0 +network 0 +novemb 0 +remot 0 +procedur 0 +call 0 +februari 0 +onoper 0 +ousterhout 0 +germani 0 +fault 0 +intern 0 +workshop 0 +beyond 0 +karshmer 0 +nehmer 0 +springer 0 +verlag 0 +summer 0 +june 0 +august 0 +schroeder 0 +needham 0 +protect 0 +princeton 0 +march 0 +trigger 0 +tabl 0 +content 0 +prerequisit 0 +synopsi 0 +titl 0 +professor 0 +last 0 +offer 0 +prerequisitegradu 0 +stand 0 +undergradu 0 +systemssuch 0 +student 0 +expect 0 +familiar 0 +materialin 0 +chapter 0 +peterson 0 +andsilberschatz 0 +synopsisc 0 +breadth 0 +coveringboth 0 +theoret 0 +practic 0 +systemdesign 0 +cover 0 +includ 0 +ofdistribut 0 +formobil 0 +case 0 +studi 0 +anemphasi 0 +place 0 +current 0 +collect 0 +articl 0 +made 0 +theinstructor 0 +requirementsstud 0 +number 0 +area 0 +anddiscuss 0 +grade 0 +determin 0 +examin 0 +aterm 0 +present 0 +systemsfernando 0 +corbato 0 +marjori 0 +merwin 0 +daggett 0 +robert 0 +anexperiment 0 +afip 0 +spring 0 +joint 0 +brinch 0 +hansen 0 +nucleu 0 +multiprogram 0 +bensoussan 0 +clingen 0 +multic 0 +virtualmemori 0 +denni 0 +ritchi 0 +thompson 0 +overview 0 +tannenbaum 0 +robbert 0 +reness 0 +silberschatz 0 +andexampl 0 +managementa 0 +tucker 0 +gupta 0 +control 0 +theth 0 +thoma 0 +edward 0 +henri 0 +theperform 0 +implic 0 +manag 0 +altern 0 +forshar 0 +schedulingr 0 +bunt 0 +techniqu 0 +octob 0 +black 0 +concurr 0 +parallel 0 +inth 0 +inter 0 +communicationj 0 +barrera 0 +fast 0 +inproceed 0 +group 0 +acmtransact 0 +birel 0 +bruce 0 +nelson 0 +rpc 0 +oncomput 0 +bershad 0 +lightweightremot 0 +principl 0 +migrationf 0 +dougli 0 +migrat 0 +spriteoper 0 +internationalconfer 0 +berlin 0 +septemb 0 +theimer 0 +lantz 0 +preemptabl 0 +execut 0 +tolerancef 0 +cristian 0 +basic 0 +toler 0 +distributedsystem 0 +sand 0 +birman 0 +joseph 0 +reliabl 0 +presenc 0 +offailur 0 +systemsr 0 +sandberg 0 +goldberg 0 +kleiman 0 +ofsun 0 +mckusick 0 +leffler 0 +fabri 0 +fastfil 0 +rosenblum 0 +alog 0 +structur 0 +systemsm 0 +gifford 0 +fora 0 +programm 0 +workstat 0 +terri 0 +hint 0 +ieeetransact 0 +softwar 0 +engin 0 +securityr 0 +us 0 +encrypt 0 +authent 0 +inlarg 0 +butler 0 +lampson 0 +origin 0 +proc 0 +oninform 0 +scienc 0 +accetta 0 +baron 0 +boloski 0 +golub 0 +rashid 0 +tevanian 0 +young 0 +foundat 0 +develop 0 +systemsh 0 +kopetz 0 +event 0 +versu 0 +timesystem 0 +layland 0 +algorithm 0 +hard 0 +journal 0 +theacm 0 +zhao 0 +ramamritham 0 +stankov 0 +preemptiv 0 +schedulingund 0 +resourc 0 +constraint 0 +tokuda 0 +mercer 0 +art 0 +computingb 0 +badrinath 0 +acharya 0 +imielinski 0 +impact 0 +ondistribut 0 +satyanarayanan 0 +kistler 0 +kumar 0 +okasaki 0 +siegel 0 +steer 0 +coda 0 +highli 0 +distributedworkst 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..fc420d2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,403 @@ +multimedia 1 +page 0 +proceed 0 +system 0 +video 0 +comput 0 +server 0 +design 0 +commun 0 +network 0 +oper 0 +octob 0 +septemb 0 +novemb 0 +goyal 0 +cours 0 +compress 0 +algorithm 0 +issu 0 +ieee 0 +support 0 +techniqu 0 +sigcomm 0 +list 0 +time 0 +schedul 0 +digit 0 +shenoi 0 +april 0 +august 0 +instructor 0 +harrick 0 +storag 0 +applic 0 +protocol 0 +audio 0 +research 0 +disk 0 +journal 0 +area 0 +offic 0 +hour 0 +media 0 +architectur 0 +mpeg 0 +placement 0 +project 0 +real 0 +packet 0 +analysi 0 +transport 0 +survei 0 +kandlur 0 +effici 0 +control 0 +scheme 0 +manag 0 +intern 0 +select 0 +zhang 0 +review 0 +descript 0 +teach 0 +read 0 +overview 0 +technolog 0 +trend 0 +problem 0 +class 0 +format 0 +fundament 0 +jpeg 0 +retriev 0 +cach 0 +batch 0 +introduct 0 +channel 0 +decemb 0 +fall 0 +number 0 +place 0 +basic 0 +concept 0 +cover 0 +multimediasystem 0 +topic 0 +student 0 +expect 0 +implement 0 +determin 0 +appoint 0 +phone 0 +mail 0 +utexa 0 +copi 0 +steinmetz 0 +principl 0 +standard 0 +hierarch 0 +magazin 0 +venkat 0 +rangan 0 +optim 0 +ofmultimedia 0 +arrai 0 +icmc 0 +washington 0 +symposium 0 +june 0 +anaheim 0 +chen 0 +workshop 0 +diego 0 +francisco 0 +scale 0 +buffer 0 +sitaram 0 +dynam 0 +polici 0 +boston 0 +keshav 0 +rate 0 +base 0 +campbel 0 +coulson 0 +peterson 0 +jacobson 0 +mccann 0 +framework 0 +tabl 0 +content 0 +gener 0 +inform 0 +prerequisit 0 +synopsi 0 +textbook 0 +requir 0 +assist 0 +spring 0 +databas 0 +handout 0 +note 0 +postscript 0 +scalabl 0 +possibl 0 +integr 0 +servic 0 +establish 0 +processor 0 +descriptiongener 0 +informationcours 0 +titl 0 +professor 0 +detail 0 +offer 0 +uniqu 0 +taylor 0 +hall 0 +prerequisitesgradu 0 +stand 0 +familiar 0 +incomput 0 +synopsisc 0 +advanc 0 +boththeoret 0 +practic 0 +includ 0 +systemsupport 0 +well 0 +transportprotocol 0 +emphasi 0 +current 0 +designissu 0 +textbooka 0 +collect 0 +recent 0 +articl 0 +madeavail 0 +requirementsth 0 +introduc 0 +thetop 0 +follow 0 +discuss 0 +relatedpap 0 +question 0 +answer 0 +tounderstand 0 +describ 0 +critiqu 0 +contribut 0 +ofpap 0 +addition 0 +carri 0 +asemest 0 +long 0 +grade 0 +examin 0 +andclass 0 +particip 0 +vintuesdai 0 +assistantmr 0 +prashant 0 +eduread 0 +cntain 0 +paper 0 +theread 0 +avail 0 +mondai 0 +speedwai 0 +locat 0 +dobi 0 +mall 0 +guadalup 0 +austin 0 +pleas 0 +callthem 0 +make 0 +sure 0 +packag 0 +readi 0 +compressionr 0 +data 0 +wallac 0 +still 0 +pictur 0 +gall 0 +multimediaappl 0 +chiang 0 +anastassi 0 +code 0 +digitaltelevis 0 +serversoverview 0 +serverdesign 0 +gemmel 0 +row 0 +tutori 0 +object 0 +ieeeintern 0 +confer 0 +failur 0 +recoveri 0 +inmulti 0 +annualintern 0 +fault 0 +toler 0 +ftc 0 +pasadena 0 +california 0 +chiueh 0 +katz 0 +multi 0 +resolut 0 +represent 0 +forparallel 0 +admiss 0 +groupedsweep 0 +ofthird 0 +supportfor 0 +narasimha 0 +reddi 0 +wylli 0 +multimediai 0 +statist 0 +admissioncontrol 0 +acmmultimedia 0 +designinglarg 0 +march 0 +scan 0 +inmultimedia 0 +sanfrancisco 0 +stream 0 +convers 0 +interactivevideo 0 +playout 0 +summer 0 +space 0 +shahabuddin 0 +foran 0 +demand 0 +demandvideo 0 +report 0 +papadimitri 0 +ramanathan 0 +informationcach 0 +deliveri 0 +person 0 +program 0 +homeentertain 0 +internationalconfer 0 +multimedianetwork 0 +layer 0 +shenker 0 +futur 0 +internet 0 +ferrari 0 +verma 0 +channelestablish 0 +wide 0 +areasin 0 +comparison 0 +servicedisciplin 0 +delaybound 0 +heterogen 0 +toappear 0 +also 0 +workshopon 0 +nossdav 0 +durham 0 +hampshir 0 +chow 0 +losslesssmooth 0 +london 0 +salehi 0 +kuros 0 +towslei 0 +storedvideo 0 +reduc 0 +variabl 0 +resourc 0 +requirementsthrough 0 +smooth 0 +sigmetr 0 +philadelphia 0 +grossglaus 0 +rcbr 0 +simpl 0 +efficientservic 0 +multipl 0 +traffic 0 +acmsigcomm 0 +kanakia 0 +misra 0 +reibman 0 +adapt 0 +congestioncontrol 0 +proceedingsof 0 +clark 0 +tennenhous 0 +consider 0 +newgener 0 +hutchison 0 +qualiti 0 +servicearchitectur 0 +turner 0 +imag 0 +transfer 0 +floyd 0 +reliablemulticast 0 +light 0 +weight 0 +session 0 +levelfram 0 +buss 0 +deffner 0 +schulzrinn 0 +januari 0 +blakowski 0 +synchron 0 +refer 0 +model 0 +specif 0 +case 0 +studi 0 +onselect 0 +januaryoper 0 +multimediag 0 +robin 0 +blair 0 +papathoma 0 +andd 0 +shepherd 0 +basedcommun 0 +choru 0 +incommun 0 +druschel 0 +abbott 0 +pagel 0 +subsystem 0 +workstat 0 +ofth 0 +third 0 +systemssupport 0 +govindan 0 +anderson 0 +mechan 0 +forcontinu 0 +operatingsystem 0 +pacif 0 +grove 0 +formultimedia 0 +second 0 +symposiumon 0 +osdi 0 +seattl 0 +conferencingh 0 +zellweg 0 +swinehart 0 +venkatrangan 0 +conferenc 0 +etherphon 0 +environ 0 +flexibl 0 +packetvideo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..f1d2a57c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,296 @@ +proceed 1 +multimedia 0 +multicast 0 +cours 0 +rout 0 +system 0 +sigcomm 0 +databas 0 +design 0 +boston 0 +real 0 +share 0 +conferenc 0 +schedul 0 +page 0 +commun 0 +harrick 0 +inform 0 +requir 0 +read 0 +internet 0 +time 0 +oper 0 +paper 0 +acmmultimedia 0 +francisco 0 +novemb 0 +base 0 +technic 0 +report 0 +queri 0 +pictur 0 +fall 0 +instructor 0 +descript 0 +gener 0 +textbook 0 +offic 0 +list 0 +processor 0 +support 0 +topic 0 +protocol 0 +present 0 +class 0 +grade 0 +project 0 +mccann 0 +jacobson 0 +packet 0 +video 0 +crowcroft 0 +confer 0 +scalabl 0 +applic 0 +deer 0 +cheriton 0 +transact 0 +tree 0 +reliabl 0 +interact 0 +gupta 0 +resourc 0 +infocom 0 +warldersburg 0 +weihl 0 +symposium 0 +jeffai 0 +paradigm 0 +imag 0 +jain 0 +tabl 0 +content 0 +prerequisit 0 +synopsi 0 +hour 0 +servic 0 +titl 0 +professor 0 +semest 0 +meet 0 +fridai 0 +prerequisitesgradu 0 +stand 0 +familiar 0 +basic 0 +concept 0 +networkprotocol 0 +multimediasystem 0 +synopsisthi 0 +advanc 0 +bediscuss 0 +includ 0 +transport 0 +formultimedia 0 +mobil 0 +network 0 +andmultimedia 0 +multimediadatabas 0 +emphasi 0 +place 0 +current 0 +issu 0 +andresearch 0 +collect 0 +research 0 +articl 0 +made 0 +avail 0 +theinstructor 0 +requirementsstud 0 +number 0 +area 0 +aswel 0 +discuss 0 +determinedbas 0 +particip 0 +studentsenrol 0 +letter 0 +submit 0 +orcarri 0 +hoursfridai 0 +appoint 0 +phone 0 +mail 0 +utexa 0 +flexibleframework 0 +handlei 0 +wakeman 0 +controlchannel 0 +cccp 0 +build 0 +conferencecontrol 0 +gajewska 0 +kistler 0 +manass 0 +redel 0 +argo 0 +systemfor 0 +distribut 0 +collabor 0 +gong 0 +multipoint 0 +audio 0 +control 0 +basedmultimedia 0 +zellweg 0 +swinehart 0 +venkat 0 +rangan 0 +etherphon 0 +environ 0 +ieeecomput 0 +octob 0 +datagraminternetwork 0 +extend 0 +lan 0 +computersystem 0 +ballardi 0 +franci 0 +core 0 +architectur 0 +inter 0 +domain 0 +thyagarajan 0 +hierarch 0 +distanc 0 +vector 0 +mbone 0 +widyono 0 +andevalu 0 +algorithm 0 +channel 0 +msthesi 0 +berkelei 0 +kompella 0 +pasqual 0 +polyzo 0 +multimediacommun 0 +univers 0 +california 0 +diego 0 +floyd 0 +zhang 0 +framework 0 +light 0 +weightsess 0 +level 0 +frame 0 +ofacm 0 +holbrook 0 +singhal 0 +receiv 0 +fordistribut 0 +simul 0 +theacm 0 +herzog 0 +estrin 0 +shenker 0 +cost 0 +axiomat 0 +analysi 0 +how 0 +moran 0 +nguyen 0 +multi 0 +parti 0 +timecommun 0 +servicesj 0 +guyton 0 +schwartz 0 +locat 0 +nearbi 0 +copi 0 +replic 0 +server 0 +mogul 0 +case 0 +forpersist 0 +connect 0 +http 0 +acmsigcomm 0 +supportc 0 +lotteri 0 +effici 0 +flexibleproport 0 +mangement 0 +ofoper 0 +implement 0 +osdi 0 +strideschedul 0 +determinist 0 +proport 0 +resourcemanag 0 +golestani 0 +self 0 +clock 0 +fair 0 +queue 0 +scheme 0 +high 0 +speedappl 0 +govindan 0 +anderson 0 +mechan 0 +forcontinu 0 +media 0 +onoper 0 +principl 0 +sosp 0 +monterei 0 +timeproduc 0 +consum 0 +construct 0 +ofeffici 0 +predict 0 +ofth 0 +sigapp 0 +appli 0 +comput 0 +latenc 0 +manag 0 +intim 0 +workshop 0 +timeoper 0 +softwar 0 +seattl 0 +databasesw 0 +niblack 0 +qbic 0 +contentus 0 +color 0 +textur 0 +shape 0 +februari 0 +cawkel 0 +journal 0 +ofinform 0 +scienc 0 +bach 0 +paul 0 +managementsystem 0 +face 0 +retriev 0 +ieee 0 +knowledgeand 0 +data 0 +engin 0 +august 0 +weymouth 0 +semant 0 +vimsi 0 +model 0 +intern 0 +onveri 0 +larg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..4bebcccd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,86 @@ +home 1 +work 1 +question 1 +time 1 +answer 0 +final 0 +exam 0 +problem 0 +true 0 +cours 0 +updat 0 +sentenc 0 +omega 0 +submatrix 0 +algorithm 0 +techniqu 0 +theori 0 +fall 0 +handout 0 +receiv 0 +last 0 +total 0 +vertic 0 +cycl 0 +label 0 +right 0 +size 0 +largest 0 +decemb 0 +instructor 0 +vijaya 0 +ramachandranuniqu 0 +number 0 +descript 0 +instruct 0 +respons 0 +pose 0 +quot 0 +take 0 +sigma 0 +onsigma 0 +word 0 +refer 0 +amort 0 +oroth 0 +well 0 +known 0 +face 0 +data 0 +structur 0 +disjoint 0 +set 0 +requir 0 +inth 0 +worst 0 +case 0 +text 0 +book 0 +isther 0 +differ 0 +meant 0 +first 0 +second 0 +paragraphof 0 +chapter 0 +note 0 +containdistinct 0 +cancontain 0 +think 0 +littl 0 +unclear 0 +denot 0 +equal 0 +somek 0 +entri 0 +fridai 0 +mondai 0 +bepost 0 +either 0 +youhav 0 +sent 0 +pleas 0 +address 0 +us 0 +yourbest 0 +judgment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..0a9a9014 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,227 @@ +scheme 1 +rscheme 0 +version 0 +repositori 0 +system 0 +run 0 +class 0 +implement 0 +object 0 +machin 0 +note 0 +us 0 +program 0 +page 0 +cours 0 +answer 0 +sure 0 +code 0 +languag 0 +home 0 +thing 0 +make 0 +read 0 +later 0 +solari 0 +unix 0 +free 0 +recommend 0 +feelei 0 +meroon 0 +comp 0 +lang 0 +html 0 +first 0 +document 0 +standard 0 +practic 0 +question 0 +also 0 +assign 0 +simpl 0 +take 0 +wilson 0 +construct 0 +chang 0 +brows 0 +section 0 +browser 0 +especi 0 +text 0 +error 0 +chapter 0 +describ 0 +quiz 0 +homework 0 +includ 0 +pictur 0 +illustr 0 +base 0 +inherit 0 +explan 0 +test 0 +main 0 +default 0 +whichi 0 +instal 0 +public 0 +sparc 0 +command 0 +runschem 0 +linux 0 +orani 0 +sever 0 +andinstal 0 +find 0 +itfrom 0 +donovan 0 +kolbl 0 +qing 0 +patch 0 +friendlier 0 +fornewbi 0 +gettinggambit 0 +marc 0 +youcan 0 +window 0 +bestschem 0 +bunch 0 +avail 0 +guil 0 +might 0 +gambit 0 +mark 0 +mzscheme 0 +rice 0 +someth 0 +besid 0 +get 0 +start 0 +doingobject 0 +orient 0 +tous 0 +advantag 0 +abl 0 +univers 0 +indiana 0 +lot 0 +freeimplement 0 +variou 0 +getinterest 0 +learn 0 +cover 0 +place 0 +look 0 +internet 0 +newsgroup 0 +devot 0 +pagec 0 +pagethi 0 +paulwilson 0 +subject 0 +reload 0 +button 0 +yourbrows 0 +come 0 +see 0 +mostrec 0 +onlin 0 +refer 0 +materi 0 +syllabu 0 +lectur 0 +ondeclar 0 +arereason 0 +well 0 +index 0 +willchang 0 +goe 0 +along 0 +ahead 0 +islik 0 +adventur 0 +suggest 0 +usinga 0 +interact 0 +work 0 +throughchapt 0 +tutori 0 +allow 0 +outof 0 +past 0 +sanoth 0 +reason 0 +rather 0 +thanprint 0 +hardcopi 0 +correct 0 +weget 0 +definit 0 +format 0 +onlinebrows 0 +list 0 +featur 0 +ters 0 +stuff 0 +coursenot 0 +help 0 +want 0 +know 0 +miscellanousfunct 0 +exactli 0 +author 0 +second 0 +third 0 +name 0 +convent 0 +indent 0 +shouldconsult 0 +grade 0 +itsens 0 +andnot 0 +draw 0 +data 0 +structur 0 +write 0 +merg 0 +sort 0 +solut 0 +three 0 +problem 0 +comment 0 +reader 0 +regular 0 +express 0 +grammar 0 +actual 0 +backward 0 +chain 0 +proposit 0 +calculu 0 +theoremprov 0 +essenti 0 +littl 0 +subset 0 +prolog 0 +setofrul 0 +classifi 0 +anim 0 +logic 0 +kind 0 +ofanim 0 +plai 0 +theorem 0 +prover 0 +instanc 0 +simpleobject 0 +anoth 0 +show 0 +metaclass 0 +circular 0 +latter 0 +self 0 +onclass 0 +gener 0 +procedur 0 +type 0 +subtyp 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..1c4e7f38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,97 @@ +homework 1 +solut 0 +file 0 +postscript 0 +session 0 +kumar 0 +utexa 0 +class 0 +note 0 +review 0 +cours 0 +slide 0 +section 0 +model 0 +format 0 +fall 0 +comput 0 +austin 0 +ajit 0 +contact 0 +test 0 +print 0 +weekli 0 +tuesdai 0 +thursdai 0 +station 0 +feng 0 +xfeng 0 +new 0 +midterm 0 +decimalinteg 0 +hexinteg 0 +octalinteg 0 +program 0 +welcom 0 +homepag 0 +taught 0 +adam 0 +georg 0 +announc 0 +final 0 +surpris 0 +xunnow 0 +make 0 +like 0 +homeworksreview 0 +slidesth 0 +second 0 +half 0 +semest 0 +pleas 0 +view 0 +onlineif 0 +possibl 0 +realli 0 +need 0 +found 0 +updatedhomework 0 +sourc 0 +filemidterm 0 +webta 0 +inform 0 +timetableta 0 +offic 0 +hour 0 +locat 0 +guana 0 +natarajan 0 +eduxun 0 +detail 0 +time 0 +tabl 0 +guid 0 +group 0 +also 0 +tip 0 +fridai 0 +download 0 +score 0 +requir 0 +wordlist 0 +linux 0 +provid 0 +warren 0 +wang 0 +wwang 0 +answer 0 +exercis 0 +made 0 +modif 0 +mondai 0 +afternoon 0 +assign 0 +maintain 0 +edudepart 0 +sciencesunivers 0 +texa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..693fa920 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,46 @@ +solut 1 +yang 0 +statist 0 +offic 0 +hour 0 +utexa 0 +assign 0 +program 0 +schwetman 0 +mesquit 0 +station 0 +contact 0 +yangyang 0 +file 0 +print 0 +email 0 +comput 0 +system 0 +architectur 0 +fall 0 +instructor 0 +herb 0 +softwar 0 +class 0 +appointmentcontact 0 +syllabu 0 +statisticsassign 0 +asga 0 +statisticsyour 0 +final 0 +gradesect 0 +section 0 +microsparc 0 +datasheetonlin 0 +resourc 0 +classmat 0 +ruiliu 0 +postmessag 0 +new 0 +group 0 +pagei 0 +creat 0 +august 0 +comment 0 +welcom 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..a5b6102b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,70 @@ +handout 1 +homework 0 +project 0 +protocol 0 +class 0 +group 0 +csnet 0 +offic 0 +hour 0 +read 0 +newsgroup 0 +utexa 0 +draft 0 +http 0 +schedul 0 +network 0 +implement 0 +gener 0 +inform 0 +professor 0 +tuesdai 0 +thursdai 0 +teach 0 +assist 0 +mondai 0 +wensdai 0 +station 0 +descript 0 +text 0 +background 0 +prerequisit 0 +grade 0 +refer 0 +multicast 0 +rout 0 +texa 0 +internet 0 +platform 0 +netsim 0 +corejava 0 +mobil 0 +support 0 +specif 0 +address 0 +alloc 0 +manag 0 +tutori 0 +digest 0 +access 0 +authent 0 +comp 0 +java 0 +present 0 +individu 0 +paper 0 +turn 0 +sampl 0 +solut 0 +info 0 +configur 0 +file 0 +pleas 0 +note 0 +first 0 +fengyufeng 0 +edufing 0 +public 0 +ring 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..06d00acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,93 @@ +comput 1 +inform 0 +home 0 +scienc 0 +engin 0 +help 0 +page 0 +document 0 +class 0 +read 0 +assign 0 +homework 0 +degre 0 +program 0 +offer 0 +colleg 0 +mosaic 0 +engr 0 +pageclick 0 +techniqu 0 +spring 0 +quarterwelcom 0 +world 0 +wide 0 +hypermedia 0 +whichcontain 0 +bounti 0 +keep 0 +mind 0 +thatthi 0 +static 0 +addedfrequ 0 +problem 0 +send 0 +mail 0 +weld 0 +click 0 +highlight 0 +item 0 +personnel 0 +professor 0 +cours 0 +syllabu 0 +polici 0 +announc 0 +check 0 +regularli 0 +last 0 +chang 0 +handout 0 +lectur 0 +note 0 +gradesoth 0 +us 0 +link 0 +offici 0 +mathematica 0 +mvi 0 +visitor 0 +room 0 +schedul 0 +depart 0 +art 0 +avail 0 +follow 0 +topic 0 +basic 0 +hypertext 0 +markup 0 +languag 0 +html 0 +uniform 0 +resourc 0 +locat 0 +usinglynx 0 +charact 0 +base 0 +browserport 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +univers 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..57d1c2cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,139 @@ +comput 1 +engr 0 +help 0 +exam 0 +autumn 0 +washington 0 +scienc 0 +engin 0 +home 0 +page 0 +program 0 +inform 0 +class 0 +messag 0 +webmast 0 +cours 0 +netscap 0 +document 0 +frequent 0 +problem 0 +send 0 +mail 0 +click 0 +last 0 +updat 0 +studi 0 +guid 0 +solut 0 +final 0 +tip 0 +think 0 +raini 0 +activ 0 +quarter 0 +like 0 +univers 0 +degre 0 +offer 0 +colleg 0 +dugan 0 +martin 0 +tompa 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +bounti 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +especi 0 +ad 0 +highlight 0 +item 0 +check 0 +syllabu 0 +offic 0 +hour 0 +staff 0 +lectur 0 +slide 0 +homework 0 +midterm 0 +mac 0 +debugg 0 +bulletin 0 +board 0 +textbook 0 +code 0 +refer 0 +regularli 0 +schedul 0 +time 0 +place 0 +special 0 +demo 0 +web 0 +earlier 0 +intact 0 +less 0 +notic 0 +instanc 0 +link 0 +work 0 +pleas 0 +might 0 +assign 0 +test 0 +us 0 +previous 0 +winter 0 +spring 0 +summer 0 +search 0 +previou 0 +miscellan 0 +info 0 +case 0 +insensit 0 +match 0 +whole 0 +word 0 +avail 0 +depart 0 +art 0 +relat 0 +major 0 +nonmajor 0 +consid 0 +take 0 +preview 0 +run 0 +find 0 +itemsund 0 +balloon 0 +menu 0 +particular 0 +onlin 0 +handbook 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..93b94f6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,112 @@ +comput 1 +engr 0 +home 0 +autumn 0 +program 0 +cours 0 +hour 0 +scienc 0 +engin 0 +page 0 +class 0 +week 0 +lectur 0 +slide 0 +summer 0 +place 0 +test 0 +webmast 0 +like 0 +washington 0 +degre 0 +colleg 0 +martin 0 +dickei 0 +richard 0 +ladner 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermediadocu 0 +contain 0 +bounti 0 +informationabout 0 +click 0 +highlight 0 +item 0 +moreinform 0 +messag 0 +check 0 +frequent 0 +syllabu 0 +sort 0 +schedulesth 0 +glanceweek 0 +activ 0 +schedulecomput 0 +lab 0 +includinglab 0 +watch 0 +chang 0 +staff 0 +includ 0 +instructor 0 +andta 0 +offic 0 +audiofrom 0 +homework 0 +examin 0 +midtermand 0 +final 0 +originallyschedul 0 +time 0 +studi 0 +guid 0 +andtim 0 +kind 0 +tip 0 +usingth 0 +compil 0 +macintosh 0 +user 0 +textbook 0 +code 0 +refer 0 +tutori 0 +special 0 +demo 0 +web 0 +earlier 0 +quarter 0 +less 0 +intactand 0 +invit 0 +brows 0 +notic 0 +problem 0 +forinst 0 +link 0 +work 0 +pleas 0 +send 0 +mail 0 +might 0 +look 0 +assign 0 +andth 0 +us 0 +previous 0 +winter 0 +spring 0 +inform 0 +avail 0 +univers 0 +depart 0 +art 0 +andrel 0 +major 0 +nonmajor 0 +comment 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..7bffad83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,58 @@ +assign 1 +beam 0 +washington 0 +offic 0 +sieg 0 +thursdai 0 +autumn 0 +hour 0 +nowitz 0 +wednesdai 0 +acrobat 0 +midterm 0 +spring 0 +discret 0 +structur 0 +instructorpaul 0 +edulectur 0 +phone 0 +appoint 0 +teach 0 +assistantjonathan 0 +edusect 0 +johnson 0 +section 0 +loew 0 +tuesdai 0 +handout 0 +syllabu 0 +induct 0 +recurs 0 +defin 0 +set 0 +postscript 0 +reader 0 +part 0 +ofyour 0 +browser 0 +novemb 0 +class 0 +sampl 0 +question 0 +homework 0 +previou 0 +cours 0 +web 0 +fall 0 +karp 0 +ruzzo 0 +winter 0 +leveson 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..ae8a19f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,96 @@ +latex 1 +solut 0 +homework 0 +text 0 +handout 0 +format 0 +exam 0 +html 0 +winter 0 +state 0 +diagram 0 +document 0 +autumn 0 +last 0 +updat 0 +cours 0 +regular 0 +grammar 0 +midterm 0 +final 0 +provid 0 +washington 0 +intro 0 +formal 0 +model 0 +richard 0 +ladnerclass 0 +messag 0 +check 0 +email 0 +frequent 0 +syllabu 0 +construct 0 +express 0 +extra 0 +rambl 0 +regard 0 +question 0 +construc 0 +pars 0 +review 0 +proof 0 +halt 0 +problem 0 +undecidableexam 0 +comment 0 +notat 0 +file 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +web 0 +previou 0 +quarter 0 +ladner 0 +edufix 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..53f7467a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,46 @@ +lectur 1 +messag 1 +mail 1 +list 1 +autumn 1 +formal 0 +model 0 +fall 0 +condon 0 +page 0 +check 0 +upcom 0 +class 0 +subscrib 0 +last 0 +updat 0 +previou 0 +winter 0 +washington 0 +introduct 0 +intro 0 +ann 0 +welcom 0 +home 0 +regularli 0 +findhomework 0 +solut 0 +set 0 +pointer 0 +exam 0 +sent 0 +willb 0 +log 0 +send 0 +majordomo 0 +includ 0 +userid 0 +email 0 +frequent 0 +homework 0 +handout 0 +content 0 +web 0 +quarter 0 +edukaye 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..27e3568c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,26 @@ +autumn 1 +last 0 +updat 0 +winter 0 +spring 0 +data 0 +structur 0 +martin 0 +tompaclass 0 +messag 0 +check 0 +mail 0 +frequent 0 +cours 0 +informationlab 0 +technot 0 +unix 0 +questionnaireloc 0 +cdeletemin 0 +algorithm 0 +treeshomework 0 +web 0 +previou 0 +quarter 0 +request 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..c3286449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,62 @@ +spring 1 +ladner 0 +class 0 +messag 0 +frequent 0 +washington 0 +home 0 +pagecs 0 +data 0 +structuresrichard 0 +instructordan 0 +fasulo 0 +teach 0 +assistantthi 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +inform 0 +theclass 0 +taught 0 +keep 0 +mind 0 +document 0 +static 0 +newinform 0 +especi 0 +ad 0 +click 0 +help 0 +check 0 +offic 0 +hour 0 +suggest 0 +read 0 +project 0 +homework 0 +exam 0 +lectur 0 +overheadsport 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..585dc16a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,37 @@ +home 1 +page 1 +offer 0 +ofcs 0 +program 0 +languag 0 +autumn 0 +spring 0 +winter 0 +depart 0 +washington 0 +pagecs 0 +languagesfal 0 +quarter 0 +current 0 +informationth 0 +listinfo 0 +everi 0 +research 0 +pagehom 0 +computersci 0 +engineeringport 0 +reprint 0 +adapt 0 +academicnonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +dulycredit 0 +copyright 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..dae1c7dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,114 @@ +note 1 +last 0 +updat 0 +code 0 +april 0 +assign 0 +smalltalk 0 +transcript 0 +suggest 0 +read 0 +html 0 +postscript 0 +sieg 0 +lisp 0 +home 0 +us 0 +prolog 0 +page 0 +washington 0 +solut 0 +transcipt 0 +method 0 +offer 0 +ofcs 0 +program 0 +june 0 +grove 0 +emac 0 +sampl 0 +quiz 0 +languag 0 +section 0 +final 0 +exam 0 +mail 0 +eduoffic 0 +offic 0 +hour 0 +dave 0 +cours 0 +figur 0 +htmlpostscript 0 +march 0 +winter 0 +depart 0 +pagecs 0 +languagesspr 0 +quarter 0 +lectur 0 +review 0 +session 0 +mondai 0 +tuesdai 0 +thursdai 0 +instructor 0 +steve 0 +hanks 0 +hank 0 +administr 0 +syllabu 0 +overviewcours 0 +newsgroup 0 +help 0 +documentsgeneralintroduct 0 +new 0 +netscap 0 +unix 0 +turnin 0 +electron 0 +submiss 0 +homework 0 +clip 0 +save 0 +relatedrun 0 +reason 0 +thing 0 +relat 0 +done 0 +hand 0 +miss 0 +includ 0 +partial 0 +test 0 +daili 0 +class 0 +full 0 +interfac 0 +build 0 +databas 0 +employe 0 +informationth 0 +listinfo 0 +everi 0 +research 0 +pagehom 0 +autumn 0 +spring 0 +computersci 0 +engineeringport 0 +reprint 0 +adapt 0 +academicnonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +dulycredit 0 +copyright 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..a280f303 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,51 @@ +washington 1 +comput 1 +scienc 1 +engin 1 +index 0 +quarter 0 +webmast 0 +univers 0 +depart 0 +degre 0 +program 0 +offer 0 +colleg 0 +page 0 +pagecurr 0 +quarterth 0 +current 0 +previou 0 +quarterscours 0 +web 0 +earlier 0 +intact 0 +less 0 +younotic 0 +problem 0 +instanc 0 +link 0 +work 0 +pleas 0 +send 0 +mail 0 +spring 0 +inform 0 +avail 0 +art 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +comment 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..9cf5bf7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,124 @@ +class 1 +design 0 +mail 0 +instructor 0 +lectur 0 +home 0 +page 0 +gaetano 0 +announc 0 +last 0 +updat 0 +washington 0 +autumn 0 +quarter 0 +borriello 0 +corei 0 +us 0 +inform 0 +document 0 +send 0 +webmast 0 +administr 0 +final 0 +exam 0 +tool 0 +topic 0 +offic 0 +hour 0 +sieg 0 +logic 0 +katz 0 +benjamin 0 +cum 0 +addison 0 +weslei 0 +maintain 0 +comput 0 +introduct 0 +digit 0 +andersonwelcom 0 +contain 0 +whole 0 +bunch 0 +keep 0 +mind 0 +static 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +gener 0 +tocs 0 +notic 0 +system 0 +archiv 0 +messagess 0 +everyon 0 +cours 0 +goal 0 +syllabu 0 +meet 0 +time 0 +mondai 0 +decemb 0 +workload 0 +grade 0 +expect 0 +laboratori 0 +softwar 0 +polici 0 +collabor 0 +cheat 0 +address 0 +overal 0 +schedul 0 +anderson 0 +corin 0 +aweekli 0 +assign 0 +weekli 0 +quizz 0 +onlin 0 +version 0 +slide 0 +textbook 0 +contemporari 0 +author 0 +publish 0 +note 0 +interest 0 +evolut 0 +implement 0 +technolog 0 +aid 0 +synario 0 +feedback 0 +tell 0 +think 0 +thing 0 +go 0 +even 0 +anonym 0 +desir 0 +link 0 +previou 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..b40f3df5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,77 @@ +assign 1 +inform 0 +data 0 +structur 0 +tanimoto 0 +hall 0 +offic 0 +hour 0 +assist 0 +option 0 +grade 0 +midterm 0 +project 0 +final 0 +late 0 +cours 0 +pagecs 0 +algorithmsautumn 0 +basic 0 +instructor 0 +steve 0 +washington 0 +sieg 0 +room 0 +appoint 0 +teach 0 +anhai 0 +doan 0 +announc 0 +place 0 +dai 0 +time 0 +smith 0 +comput 0 +facil 0 +unix 0 +account 0 +mscc 0 +student 0 +languag 0 +requir 0 +lisp 0 +textbook 0 +shaffer 0 +practic 0 +introduct 0 +algorithm 0 +analysi 0 +publish 0 +summer 0 +prentic 0 +breakdown 0 +tent 0 +polici 0 +keep 0 +manag 0 +encourag 0 +punctual 0 +work 0 +point 0 +deduct 0 +penalti 0 +schedul 0 +updat 0 +aboutth 0 +topic 0 +studi 0 +examinform 0 +exambas 0 +us 0 +compilerassignmentssolut 0 +assignmentsteach 0 +informationscheduleweb 0 +previou 0 +offer 0 +winter 0 +autumn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..d8b76d31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,40 @@ +comput 1 +washington 0 +scienc 0 +engin 0 +holden 0 +nowitz 0 +cours 0 +degre 0 +program 0 +offer 0 +colleg 0 +major 0 +home 0 +pagecs 0 +data 0 +structur 0 +algorithmsspr 0 +instructor 0 +alistair 0 +jonathan 0 +class 0 +messag 0 +last 0 +updat 0 +mondai 0 +materi 0 +syllabu 0 +homework 0 +demo 0 +exam 0 +inform 0 +depart 0 +art 0 +relat 0 +mosaic 0 +help 0 +interest 0 +page 0 +raini 0 +funnowitz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..92c2a5cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,64 @@ +test 1 +file 1 +assign 0 +homework 0 +program 0 +indic 0 +enclos 0 +data 0 +meet 0 +siegtelephon 0 +email 0 +washington 0 +eduoffic 0 +hour 0 +next 0 +quot 0 +search 0 +cours 0 +pagecs 0 +structur 0 +algorithmswint 0 +time 0 +place 0 +sieg 0 +instructor 0 +linda 0 +shapirooffic 0 +shapiro 0 +denis 0 +pinneloffic 0 +denisep 0 +syllabustransparencieshomework 0 +assignmentshomework 0 +answer 0 +assignmentsprogram 0 +note 0 +set 0 +follow 0 +line 0 +begin 0 +insert 0 +tree 0 +inquot 0 +state 0 +charact 0 +long 0 +come 0 +integ 0 +length 0 +associatedvalu 0 +final 0 +string 0 +valu 0 +also 0 +linebegin 0 +find 0 +isfollow 0 +object 0 +model 0 +graphimag 0 +graphreview 0 +listsfin 0 +studi 0 +sheet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..5ba87396 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,25 @@ +inform 1 +class 1 +compil 0 +classhomethi 0 +world 0 +wide 0 +hypermedia 0 +documentfor 0 +contain 0 +keep 0 +inmind 0 +document 0 +static 0 +willb 0 +ad 0 +frequent 0 +urgent 0 +announc 0 +assign 0 +onlin 0 +meet 0 +admin 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..8f2eb05d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,234 @@ +project 1 +softwar 0 +manag 0 +respons 0 +engin 0 +design 0 +includ 0 +plan 0 +primari 0 +document 0 +system 0 +class 0 +duti 0 +work 0 +experi 0 +student 0 +review 0 +provid 0 +group 0 +organ 0 +learn 0 +program 0 +role 0 +qualiti 0 +configur 0 +product 0 +user 0 +boe 0 +teach 0 +portfolio 0 +specif 0 +analysi 0 +quarter 0 +particip 0 +develop 0 +meet 0 +requir 0 +mainten 0 +control 0 +secur 0 +specialist 0 +experiment 0 +cours 0 +leveson 0 +concept 0 +team 0 +real 0 +industri 0 +written 0 +commun 0 +tool 0 +test 0 +also 0 +technic 0 +topic 0 +interact 0 +exampl 0 +first 0 +enough 0 +instructor 0 +addit 0 +set 0 +done 0 +leadership 0 +howev 0 +everyon 0 +activ 0 +write 0 +make 0 +creation 0 +overal 0 +issu 0 +evalu 0 +chang 0 +ensur 0 +human 0 +interfac 0 +assur 0 +conduct 0 +deliver 0 +mockup 0 +prototyp 0 +expertis 0 +reliabl 0 +home 0 +pagecs 0 +professor 0 +nanci 0 +offic 0 +sieg 0 +phone 0 +hour 0 +appoint 0 +mail 0 +washington 0 +educours 0 +descriptioninstruct 0 +object 0 +terminolog 0 +fundament 0 +oral 0 +skill 0 +produc 0 +studi 0 +method 0 +construct 0 +larg 0 +softwaresystem 0 +essenti 0 +tocreat 0 +complex 0 +successfulli 0 +effectiveor 0 +latter 0 +topicsar 0 +feedback 0 +sai 0 +import 0 +employersand 0 +often 0 +lack 0 +graduat 0 +version 0 +last 0 +realbo 0 +largegroup 0 +sever 0 +reason 0 +try 0 +approach 0 +isthat 0 +short 0 +realist 0 +cannotlearn 0 +session 0 +devotedto 0 +discuss 0 +regular 0 +thegroup 0 +usual 0 +hard 0 +isto 0 +effectivelytogeth 0 +head 0 +disast 0 +beavoid 0 +correct 0 +worktogeth 0 +requirementsanalysi 0 +possibl 0 +thenorm 0 +areal 0 +search 0 +engineeringinstitut 0 +master 0 +providedat 0 +assign 0 +playthat 0 +allow 0 +posit 0 +attach 0 +listof 0 +phase 0 +projectso 0 +part 0 +theproject 0 +outlin 0 +natur 0 +principl 0 +process 0 +model 0 +risk 0 +assess 0 +cost 0 +estim 0 +metric 0 +verif 0 +valid 0 +evolut 0 +reus 0 +ethic 0 +profession 0 +embed 0 +safeti 0 +take 0 +ofth 0 +aspect 0 +softwaredevelop 0 +member 0 +responsiblefor 0 +present 0 +administr 0 +assist 0 +updat 0 +track 0 +statu 0 +sure 0 +proper 0 +held 0 +get 0 +time 0 +princip 0 +architect 0 +consist 0 +hardwar 0 +platform 0 +transit 0 +exist 0 +augment 0 +necessari 0 +current 0 +factor 0 +respect 0 +survei 0 +interview 0 +employe 0 +releas 0 +duri 0 +characterist 0 +normal 0 +languag 0 +implement 0 +handl 0 +appear 0 +clariti 0 +manual 0 +determin 0 +us 0 +readabl 0 +understand 0 +support 0 +creat 0 +guid 0 +deliv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..103ca214 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,95 @@ +assign 1 +softwar 0 +note 0 +mail 0 +washington 0 +cours 0 +system 0 +time 0 +leveson 0 +offic 0 +sieg 0 +hour 0 +appoint 0 +carlson 0 +specif 0 +includ 0 +engin 0 +requir 0 +winter 0 +home 0 +pagecs 0 +engineeringmeet 0 +locat 0 +loew 0 +mondai 0 +wednesdai 0 +fridai 0 +professor 0 +nanci 0 +phone 0 +eduta 0 +adam 0 +educours 0 +descriptionthi 0 +studi 0 +concept 0 +method 0 +tool 0 +design 0 +construct 0 +test 0 +analysi 0 +document 0 +larg 0 +also 0 +technic 0 +topic 0 +essenti 0 +creat 0 +complex 0 +successfulli 0 +project 0 +manag 0 +textbookghezzi 0 +jazayeri 0 +mandrioli 0 +fundament 0 +prentic 0 +hall 0 +sampl 0 +interview 0 +question 0 +produc 0 +consum 0 +petri 0 +axiomat 0 +coupl 0 +cohes 0 +link 0 +interest 0 +syllabu 0 +updat 0 +pleas 0 +read 0 +newsgroup 0 +access 0 +machin 0 +send 0 +class 0 +mailinglist 0 +new 0 +comp 0 +risk 0 +militari 0 +standard 0 +defens 0 +develop 0 +inform 0 +avail 0 +spring 0 +comput 0 +scienc 0 +departmentsuggest 0 +feedback 0 +request 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..676a107a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,346 @@ +lisp 1 +januari 0 +program 0 +languag 0 +copi 0 +common 0 +assign 0 +februari 0 +tuesdai 0 +sieg 0 +cours 0 +exam 0 +march 0 +thursdai 0 +project 0 +onlin 0 +part 0 +macintosh 0 +mscc 0 +final 0 +read 0 +book 0 +system 0 +student 0 +implement 0 +ruth 0 +hall 0 +room 0 +schedul 0 +mail 0 +list 0 +materi 0 +file 0 +facil 0 +email 0 +archiv 0 +examin 0 +info 0 +tutori 0 +mac 0 +window 0 +pleas 0 +review 0 +session 0 +solut 0 +comput 0 +thompson 0 +offic 0 +hour 0 +mondai 0 +topic 0 +also 0 +midterm 0 +class 0 +mani 0 +artifici 0 +intellig 0 +sever 0 +includ 0 +purchas 0 +center 0 +us 0 +edit 0 +refer 0 +provid 0 +support 0 +java 0 +call 0 +compil 0 +cover 0 +issu 0 +unix 0 +work 0 +note 0 +help 0 +turn 0 +page 0 +writeup 0 +winter 0 +steve 0 +tanimoto 0 +depart 0 +scienc 0 +washington 0 +meet 0 +wednesdai 0 +post 0 +number 0 +homework 0 +text 0 +get 0 +element 0 +question 0 +standard 0 +access 0 +download 0 +html 0 +world 0 +wide 0 +techniqu 0 +offer 0 +host 0 +extens 0 +inthompson 0 +stat 0 +free 0 +given 0 +close 0 +messag 0 +click 0 +token 0 +bring 0 +check 0 +koch 0 +descript 0 +fridai 0 +mileston 0 +home 0 +pagecs 0 +instructor 0 +engin 0 +univers 0 +seattl 0 +andersonmeet 0 +except 0 +time 0 +tent 0 +transpar 0 +past 0 +lectur 0 +credit 0 +grade 0 +particip 0 +hardcopi 0 +select 0 +section 0 +first 0 +ofread 0 +recent 0 +throughout 0 +logic 0 +prolog 0 +grammar 0 +andpars 0 +understand 0 +shell 0 +expert 0 +either 0 +whole 0 +bookstor 0 +approxim 0 +order 0 +combin 0 +fromth 0 +librari 0 +entitl 0 +freeman 0 +chapter 0 +introduct 0 +incommon 0 +glossari 0 +purchasedsepar 0 +basement 0 +commun 0 +build 0 +answer 0 +andit 0 +referenceon 0 +seem 0 +best 0 +tabl 0 +contentspag 0 +rather 0 +try 0 +entir 0 +orpostscript 0 +sourc 0 +code 0 +usingcommon 0 +site 0 +fordigitool 0 +compani 0 +thatmaintain 0 +currentinform 0 +interest 0 +link 0 +applic 0 +introductionto 0 +anoth 0 +websit 0 +thatdoesn 0 +alwai 0 +respond 0 +promptli 0 +trail 0 +announc 0 +welcom 0 +although 0 +catalog 0 +theirimplement 0 +interpret 0 +buildingprogram 0 +attent 0 +focu 0 +mainli 0 +particular 0 +programminglanguag 0 +explor 0 +relev 0 +tointepret 0 +investig 0 +addit 0 +tradit 0 +alsolook 0 +current 0 +visual 0 +programmingfacil 0 +quarter 0 +run 0 +themathemat 0 +locat 0 +thebas 0 +varieti 0 +mathematica 0 +yacc 0 +allegrocommon 0 +softwar 0 +particularli 0 +powerfulenviron 0 +full 0 +integr 0 +editor 0 +fred 0 +graphicsand 0 +user 0 +interfac 0 +construct 0 +network 0 +easili 0 +transfer 0 +machinesof 0 +difficulti 0 +supplement 0 +theirown 0 +packag 0 +xlisp 0 +micro 0 +emac 0 +howev 0 +limit 0 +abil 0 +altern 0 +wish 0 +need 0 +theseresourc 0 +internet 0 +thatxlisp 0 +bare 0 +bone 0 +nothav 0 +develop 0 +fortun 0 +power 0 +disadvantag 0 +must 0 +labunless 0 +fromdigitool 0 +normal 0 +cost 0 +special 0 +dealallow 0 +point 0 +version 0 +allegro 0 +lispfor 0 +franz 0 +attract 0 +tool 0 +test 0 +introduc 0 +thelaboratori 0 +thistim 0 +instead 0 +go 0 +regular 0 +classroom 0 +week 0 +move 0 +beginn 0 +guid 0 +might 0 +sent 0 +new 0 +group 0 +regardingread 0 +path 0 +remind 0 +todai 0 +even 0 +hard 0 +printout 0 +exampl 0 +onthursdai 0 +option 0 +held 0 +insieg 0 +becov 0 +convert 0 +string 0 +symbol 0 +import 0 +found 0 +send 0 +receiv 0 +modif 0 +deadlin 0 +announcedearli 0 +plu 0 +postscript 0 +viewer 0 +avail 0 +pictur 0 +snowflak 0 +detail 0 +complet 0 +projectgener 0 +find 0 +aboutdemonstr 0 +onmondai 0 +multipl 0 +choic 0 +format 0 +mark 0 +sens 0 +form 0 +pencil 0 +exercisestokenizerassign 0 +andpart 0 +parsertokenizerpart 0 +snowflakeassign 0 +local 0 +gener 0 +ondemonstr 0 +show 0 +displai 0 +demonstr 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..95962480 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,66 @@ +assign 1 +program 0 +introduct 0 +inform 0 +april 0 +artifici 0 +hour 0 +solut 0 +second 0 +postscript 0 +intellig 0 +holden 0 +offic 0 +redston 0 +mondai 0 +instruct 0 +project 0 +first 0 +homework 0 +third 0 +final 0 +allegro 0 +emac 0 +refcard 0 +intelligencecs 0 +spring 0 +professor 0 +alistair 0 +csoffic 0 +noon 0 +noonta 0 +joshua 0 +msoffic 0 +thompson 0 +thursdai 0 +text 0 +rich 0 +knight 0 +secondedit 0 +touretzki 0 +common 0 +lisp 0 +gentl 0 +symboliccomput 0 +gener 0 +basic 0 +comput 0 +cours 0 +outlin 0 +turnin 0 +sampl 0 +june 0 +exam 0 +us 0 +emacsinterfac 0 +interfac 0 +download 0 +standalonelisp 0 +macintosh 0 +note 0 +check 0 +grade 0 +record 0 +type 0 +gradesredston 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..cd911ccd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,76 @@ +acrobat 1 +format 0 +winter 0 +latex 0 +syllabu 0 +midtem 0 +solut 0 +postscript 0 +ruzzo 0 +last 0 +updat 0 +text 0 +file 0 +legibl 0 +adob 0 +free 0 +viewer 0 +page 0 +intro 0 +algorithm 0 +larri 0 +martin 0 +tompaclass 0 +messag 0 +check 0 +email 0 +frequent 0 +book 0 +errata 0 +list 0 +handout 0 +homework 0 +web 0 +previou 0 +quarter 0 +karlin 0 +thecours 0 +materi 0 +provid 0 +three 0 +plain 0 +ascii 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +tompa 0 +aberman 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..df39b707 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,88 @@ +midterm 1 +final 1 +spring 0 +everyth 0 +acrobat 0 +format 0 +jayram 0 +page 0 +washington 0 +syllabu 0 +theori 0 +comput 0 +larri 0 +ruzzo 0 +gener 0 +instructor 0 +time 0 +offic 0 +hour 0 +sieg 0 +home 0 +problem 0 +class 0 +mail 0 +latex 0 +postscript 0 +file 0 +legibl 0 +adob 0 +free 0 +viewer 0 +intro 0 +introduct 0 +inform 0 +thathachar 0 +meet 0 +tent 0 +welcom 0 +document 0 +sendmail 0 +last 0 +updat 0 +messag 0 +sent 0 +list 0 +textbook 0 +errata 0 +handout 0 +admin 0 +solut 0 +sourc 0 +thecours 0 +materi 0 +provid 0 +three 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +quit 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +ghostscript 0 +window 0 +linux 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +cours 0 +web 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..1cd5fdbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,48 @@ +databas 1 +offic 1 +link 1 +introduct 0 +shapiro 0 +sieg 0 +hour 0 +page 0 +imag 0 +systemscs 0 +systemsfal 0 +quarter 0 +instructor 0 +prof 0 +linda 0 +telephon 0 +patrick 0 +crowlei 0 +pcrowlei 0 +announc 0 +syllabu 0 +assign 0 +homework 0 +word 0 +shift 0 +left 0 +click 0 +save 0 +potenti 0 +us 0 +unisql 0 +home 0 +qbic 0 +queri 0 +content 0 +manag 0 +system 0 +dbm 0 +probabl 0 +interest 0 +want 0 +know 0 +window 0 +back 0 +cours 0 +webcs 0 +request 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..3cc2b295 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,77 @@ +project 1 +page 0 +lectur 0 +section 0 +instructor 0 +washington 0 +cours 0 +solut 0 +feedback 0 +home 0 +brian 0 +bershad 0 +offic 0 +hour 0 +midterm 0 +materi 0 +avail 0 +person 0 +introduct 0 +oper 0 +system 0 +autumn 0 +sung 0 +choi 0 +sieg 0 +appoint 0 +intro 0 +adminth 0 +class 0 +outlin 0 +administr 0 +info 0 +textbook 0 +grade 0 +andoth 0 +word 0 +wisdom 0 +messag 0 +mail 0 +sent 0 +archiv 0 +scale 0 +first 0 +schedulewhat 0 +cover 0 +schedul 0 +aggress 0 +andwil 0 +updat 0 +regularli 0 +reflect 0 +actual 0 +pace 0 +note 0 +handout 0 +slide 0 +projectsdescript 0 +relat 0 +solutionsto 0 +notesnot 0 +watchthi 0 +space 0 +carefulli 0 +inform 0 +vital 0 +surviv 0 +andgrad 0 +hint 0 +appear 0 +receiv 0 +onproject 0 +send 0 +anonym 0 +wish 0 +lost 0 +click 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..40cc427c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,87 @@ +home 1 +page 0 +quarter 0 +comput 0 +autumn 0 +document 0 +help 0 +cours 0 +spring 0 +scienc 0 +engin 0 +degre 0 +netscap 0 +quarterwelcom 0 +world 0 +wide 0 +hypermedia 0 +contain 0 +bounti 0 +inform 0 +class 0 +keepin 0 +mind 0 +mean 0 +static 0 +informationwil 0 +ad 0 +frequent 0 +problem 0 +thisdocu 0 +send 0 +mail 0 +pighin 0 +click 0 +classpersonnel 0 +professor 0 +student 0 +syllabuscours 0 +calendarta 0 +offic 0 +hourshandout 0 +assignmentslectur 0 +notesread 0 +assignmentshomework 0 +assignmentsprojectsproject 0 +handoutsproject 0 +artifactsproject 0 +sessionsproject 0 +grade 0 +policyproject 0 +write 0 +upslibui 0 +documentationoth 0 +relat 0 +informationget 0 +classhearn 0 +baker 0 +erratath 0 +instruct 0 +labus 0 +indi 0 +guid 0 +opengl 0 +exampl 0 +program 0 +winter 0 +pagegraph 0 +linkssgi 0 +silicon 0 +surfgrafica 0 +obscurasiggraphgrailgraph 0 +site 0 +indexoth 0 +us 0 +linksmvi 0 +visitor 0 +room 0 +schedul 0 +departmentth 0 +programth 0 +programweb 0 +helpbas 0 +helpmosa 0 +lynxus 0 +indyspighin 0 +washington 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..58c0eb3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,106 @@ +offic 1 +format 1 +hour 0 +disabl 0 +html 0 +somani 0 +jari 0 +accommod 0 +document 0 +class 0 +student 0 +servic 0 +letter 0 +provid 0 +latex 0 +autumn 0 +intro 0 +comput 0 +commun 0 +networksautumn 0 +instructor 0 +arun 0 +cslectur 0 +sieg 0 +eebphon 0 +kristensen 0 +csoffic 0 +chang 0 +tomatch 0 +prof 0 +thu 0 +cover 0 +everi 0 +week 0 +andprovid 0 +larger 0 +timewindow 0 +consult 0 +messag 0 +check 0 +email 0 +frequent 0 +lectur 0 +overheadshomeworksprojectsinterest 0 +stuffattentionif 0 +would 0 +like 0 +request 0 +academ 0 +pleasecontact 0 +schmitz 0 +havea 0 +indic 0 +requiresacadem 0 +pleas 0 +present 0 +discuss 0 +might 0 +need 0 +file 0 +cours 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +handout 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..34f954de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,78 @@ +format 1 +sieg 0 +html 0 +ladner 0 +document 0 +offic 0 +hour 0 +provid 0 +latex 0 +washington 0 +spring 0 +intro 0 +comput 0 +commun 0 +networksspr 0 +instructor 0 +richard 0 +cslectur 0 +phone 0 +noonta 0 +william 0 +chan 0 +wchan 0 +csoffic 0 +class 0 +messag 0 +check 0 +email 0 +frequent 0 +lectur 0 +overheadshomeworksprojectsabout 0 +file 0 +cours 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +handout 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +eduwchan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..5733a077 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,10 @@ +fall 1 +home 0 +pagecs 0 +advanc 0 +digit 0 +designt 0 +kehl 0 +page 0 +found 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..f27ae8b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,124 @@ +logic 1 +data 1 +home 0 +page 0 +march 0 +homework 0 +kehl 0 +inform 0 +hour 0 +offic 0 +final 0 +fridai 0 +februari 0 +midnight 0 +sampl 0 +option 0 +optionlab 0 +combin 0 +sequenti 0 +fpga 0 +depart 0 +comput 0 +scienc 0 +engin 0 +book 0 +server 0 +washington 0 +pagecs 0 +advanc 0 +digit 0 +designt 0 +fall 0 +welcom 0 +cours 0 +time 0 +place 0 +johnson 0 +import 0 +announc 0 +last 0 +updat 0 +summari 0 +syllabusschedul 0 +polici 0 +staff 0 +instructor 0 +mark 0 +savoi 0 +tue 0 +savac 0 +richard 0 +chinn 0 +thur 0 +richin 0 +howard 0 +chang 0 +gener 0 +shchang 0 +csjason 0 +aaron 0 +scott 0 +stephen 0 +hardwar 0 +laboratori 0 +manag 0 +student 0 +work 0 +group 0 +exam 0 +review 0 +topic 0 +cover 0 +quarterhomework 0 +assignmentsweb 0 +duehomework 0 +writeup 0 +written 0 +assign 0 +project 0 +abel 0 +state 0 +machin 0 +test 0 +fixtur 0 +handout 0 +memori 0 +communicationoth 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +collect 0 +resourc 0 +care 0 +gaetano 0 +borriello 0 +list 0 +vlsi 0 +link 0 +comprehensivelist 0 +icmanufactur 0 +nation 0 +semiconductor 0 +sheet 0 +motorola 0 +philip 0 +semiconduct 0 +micron 0 +technolog 0 +sheetsth 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..fbcaed9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,102 @@ +postscript 1 +mondai 0 +wednesdai 0 +read 0 +fridai 0 +design 0 +homework 0 +verilog 0 +pipelin 0 +program 0 +comput 0 +address 0 +sieg 0 +review 0 +answer 0 +sampl 0 +assembl 0 +organ 0 +snyder 0 +offic 0 +hour 0 +mail 0 +instruct 0 +simul 0 +sheet 0 +skim 0 +holidai 0 +mip 0 +segment 0 +binari 0 +data 0 +fall 0 +gener 0 +inform 0 +meet 0 +loew 0 +instructor 0 +larri 0 +appoint 0 +assist 0 +judi 0 +watson 0 +jwatson 0 +robert 0 +chenoffic 0 +tuesdai 0 +thursdays 0 +chensg 0 +catalog 0 +descript 0 +model 0 +structur 0 +function 0 +arithmet 0 +logic 0 +unit 0 +regist 0 +transfer 0 +level 0 +hardwar 0 +microprogram 0 +control 0 +memori 0 +hierarchi 0 +andorgan 0 +system 0 +compon 0 +interconnect 0 +laboratoryproject 0 +involv 0 +setprocessor 0 +prerequisit 0 +class 0 +note 0 +html 0 +appendix 0 +color 0 +chap 0 +revis 0 +midterm 0 +fast 0 +materi 0 +follow 0 +file 0 +avail 0 +addit 0 +modul 0 +common 0 +sourc 0 +form 0 +test 0 +prog 0 +simpl 0 +languag 0 +page 0 +previou 0 +quarter 0 +referencesthi 0 +free 0 +refer 0 +card 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..20931c99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,232 @@ +lisp 1 +class 0 +april 0 +cours 0 +newsgroup 0 +assign 0 +sieg 0 +final 0 +mondai 0 +fridai 0 +exam 0 +wednesdai 0 +us 0 +project 0 +common 0 +artifici 0 +intellig 0 +tuesdai 0 +post 0 +mail 0 +interfac 0 +schedul 0 +book 0 +program 0 +creat 0 +access 0 +topic 0 +midterm 0 +edit 0 +separ 0 +examin 0 +test 0 +march 0 +system 0 +review 0 +spring 0 +home 0 +steve 0 +tanimoto 0 +comput 0 +washington 0 +jeremi 0 +hall 0 +room 0 +offic 0 +hour 0 +machin 0 +messag 0 +send 0 +csor 0 +new 0 +pnew 0 +list 0 +implement 0 +read 0 +element 0 +purchas 0 +mathemat 0 +theori 0 +languag 0 +link 0 +allegro 0 +window 0 +bring 0 +mark 0 +sens 0 +form 0 +represent 0 +search 0 +reason 0 +neural 0 +net 0 +expert 0 +page 0 +term 0 +session 0 +part 0 +lectur 0 +preliminari 0 +demo 0 +peer 0 +pagecs 0 +introduct 0 +instructor 0 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +baermeet 0 +thursdai 0 +archiv 0 +tent 0 +number 0 +credit 0 +grade 0 +homework 0 +particip 0 +hardcopi 0 +requir 0 +text 0 +freeman 0 +work 0 +hard 0 +textbook 0 +self 0 +contain 0 +exampl 0 +youdon 0 +onlin 0 +refer 0 +materi 0 +provid 0 +answer 0 +mani 0 +question 0 +andit 0 +standard 0 +referenceon 0 +seem 0 +best 0 +tabl 0 +contentspag 0 +rather 0 +try 0 +download 0 +entir 0 +html 0 +file 0 +orpostscript 0 +sourc 0 +code 0 +usingcommon 0 +interest 0 +info 0 +forprogram 0 +world 0 +wide 0 +applic 0 +facil 0 +order 0 +take 0 +advantag 0 +windowsimplement 0 +excel 0 +featur 0 +programdevelop 0 +construct 0 +theintel 0 +pentium 0 +laboratori 0 +free 0 +version 0 +isfor 0 +bedownload 0 +franz 0 +kind 0 +least 0 +attract 0 +option 0 +givenaccord 0 +close 0 +multipl 0 +choic 0 +alist 0 +know 0 +announc 0 +welcom 0 +coversboth 0 +implementationof 0 +techniqu 0 +includ 0 +programmingtechniqu 0 +knowledg 0 +logicalreason 0 +probabilist 0 +case 0 +base 0 +plan 0 +learn 0 +understand 0 +vision 0 +clo 0 +staff 0 +given 0 +held 0 +inour 0 +regular 0 +meet 0 +note 0 +continu 0 +programmingpart 0 +turn 0 +paper 0 +follow 0 +solut 0 +exercis 0 +state 0 +screenshot 0 +user 0 +descript 0 +ofhow 0 +go 0 +gener 0 +move 0 +current 0 +statu 0 +workload 0 +reduct 0 +propos 0 +circul 0 +email 0 +approv 0 +rest 0 +ofproject 0 +either 0 +give 0 +right 0 +orturn 0 +progress 0 +report 0 +memori 0 +holidai 0 +insieg 0 +explan 0 +evalu 0 +wrap 0 +demonstr 0 +evaluationof 0 +june 0 +assignmentsassign 0 +portion 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..2b7d5702 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,89 @@ +home 1 +burn 1 +data 1 +page 0 +inform 0 +hour 0 +societi 0 +robot 0 +depart 0 +comput 0 +scienc 0 +engin 0 +sourc 0 +book 0 +washington 0 +pagecs 0 +digit 0 +system 0 +designstev 0 +spring 0 +welcom 0 +cours 0 +time 0 +place 0 +loew 0 +import 0 +announc 0 +last 0 +updat 0 +summari 0 +syllabu 0 +schedul 0 +polici 0 +staff 0 +steve 0 +instructor 0 +kent 0 +smith 0 +casei 0 +anderson 0 +stephen 0 +hardwar 0 +laboratori 0 +manag 0 +offic 0 +studentslab 0 +assign 0 +mchc 0 +info 0 +nice 0 +introduct 0 +fred 0 +martinrobot 0 +seattl 0 +portland 0 +area 0 +societyoth 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +collect 0 +resourc 0 +care 0 +gaetano 0 +borriello 0 +comprehens 0 +list 0 +nation 0 +semiconductor 0 +sheet 0 +motorola 0 +server 0 +philip 0 +semiconduct 0 +serverth 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..eef16262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,34 @@ +cours 1 +inform 1 +project 1 +home 0 +pagewelcom 0 +contain 0 +bevi 0 +relatingto 0 +usual 0 +document 0 +frequentlychang 0 +send 0 +mail 0 +bswest 0 +csif 0 +encount 0 +problem 0 +classpersonnelsyllabuslectur 0 +scheduleguest 0 +lectur 0 +scheduleoffic 0 +hoursproject 0 +handout 0 +schedul 0 +help 0 +session 0 +final 0 +projectoth 0 +bug 0 +erratarefer 0 +pagesmidterm 0 +questionnairebswest 0 +washington 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..0a2481b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,94 @@ +avail 1 +cecil 0 +year 0 +exam 0 +vortex 0 +languag 0 +compil 0 +postscript 0 +assign 0 +list 0 +answer 0 +refer 0 +version 0 +program 0 +quarter 0 +chamber 0 +offic 0 +hour 0 +sieg 0 +vass 0 +read 0 +last 0 +midterm 0 +final 0 +travers 0 +manual 0 +page 0 +research 0 +home 0 +pagecs 0 +implement 0 +languageswint 0 +import 0 +cours 0 +informationmeet 0 +time 0 +instructor 0 +craigchamb 0 +start 0 +second 0 +week 0 +litvinov 0 +cubicl 0 +floor 0 +archivesslid 0 +lectur 0 +handout 0 +full 0 +class 0 +homework 0 +messag 0 +sent 0 +mail 0 +archivedher 0 +note 0 +test 0 +closedbook 0 +affect 0 +kind 0 +question 0 +wereask 0 +sampl 0 +solut 0 +informationhandout 0 +tutorialsth 0 +tutorialth 0 +tutorialhow 0 +front 0 +enda 0 +file 0 +interestdead 0 +elim 0 +simpl 0 +exampl 0 +idfacfg 0 +interfac 0 +frameworkvortex 0 +textual 0 +descript 0 +grammarcecil 0 +documentationdocument 0 +html 0 +format 0 +standard 0 +librari 0 +resourcesth 0 +previou 0 +includ 0 +slide 0 +inform 0 +found 0 +onmark 0 +leon 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..6ed63291 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,62 @@ +avail 1 +languag 0 +program 0 +cecil 0 +vortex 0 +compil 0 +found 0 +home 0 +implement 0 +cours 0 +sieg 0 +chamber 0 +offic 0 +hour 0 +jdean 0 +grove 0 +chateau 0 +read 0 +list 0 +inform 0 +page 0 +research 0 +pagecs 0 +languagesimport 0 +informationmeet 0 +time 0 +instructor 0 +craig 0 +jeff 0 +dean 0 +dave 0 +come 0 +find 0 +confer 0 +room 0 +archivesslid 0 +lectur 0 +slide 0 +turori 0 +handout 0 +assign 0 +full 0 +class 0 +messag 0 +sent 0 +mail 0 +archiv 0 +projectth 0 +project 0 +sort 0 +analysi 0 +andtransform 0 +optimizingcompil 0 +object 0 +orient 0 +written 0 +cecilproject 0 +manual 0 +resourcesmor 0 +onmark 0 +leon 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..0df548a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,13 @@ +assign 1 +home 0 +pagecs 0 +softwar 0 +engineeringdavid 0 +notkin 0 +spring 0 +introductori 0 +handout 0 +kwic 0 +sampl 0 +projectsnotkin 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..9beb8393 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,86 @@ +languag 1 +list 0 +page 0 +program 0 +mail 0 +send 0 +washington 0 +home 0 +notkin 0 +offic 0 +hour 0 +handout 0 +thread 0 +messag 0 +subscrib 0 +research 0 +resourc 0 +lambda 0 +calculu 0 +univers 0 +orient 0 +pagecs 0 +concept 0 +languagesautumn 0 +loew 0 +instructor 0 +david 0 +sieg 0 +byappoint 0 +kurt 0 +partridg 0 +kepart 0 +floor 0 +cubiclescours 0 +introductori 0 +html 0 +postscript 0 +assign 0 +readingsmail 0 +archivesw 0 +us 0 +administr 0 +instructionalpurpos 0 +wish 0 +refer 0 +previous 0 +sent 0 +archiv 0 +emailto 0 +majordomo 0 +singl 0 +line 0 +bodi 0 +subject 0 +csegener 0 +yahoo 0 +mark 0 +leon 0 +excel 0 +pagesprogram 0 +critiquesgari 0 +leaven 0 +self 0 +studi 0 +pagefunct 0 +resourcesmit 0 +scheme 0 +pagecmu 0 +standard 0 +pagea 0 +gentl 0 +introduct 0 +mlhaskel 0 +monash 0 +universityobject 0 +geneva 0 +object 0 +info 0 +cecil 0 +project 0 +dylan 0 +carnegi 0 +mellon 0 +appl 0 +comput 0 +question 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..13911669 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,118 @@ +homework 1 +solut 0 +exam 0 +anderson 0 +washington 0 +hour 0 +class 0 +mondai 0 +project 0 +gilligan 0 +design 0 +analysi 0 +algorithm 0 +lectur 0 +offic 0 +time 0 +confer 0 +room 0 +cours 0 +set 0 +discuss 0 +write 0 +island 0 +algorithmscs 0 +winter 0 +instructor 0 +richard 0 +seig 0 +appoint 0 +teach 0 +assist 0 +william 0 +chan 0 +wchan 0 +wednesdai 0 +chateau 0 +sieg 0 +floor 0 +cubicl 0 +somebodi 0 +els 0 +us 0 +inform 0 +prerequisit 0 +go 0 +assum 0 +alreadi 0 +undergradu 0 +wrong 0 +know 0 +soon 0 +possibl 0 +suggest 0 +readingtextbook 0 +errata 0 +list 0 +realli 0 +preview 0 +check 0 +outer 0 +sapplet 0 +assign 0 +handout 0 +written 0 +gener 0 +tuesdai 0 +background 0 +quiz 0 +post 0 +script 0 +midterm 0 +cancel 0 +lack 0 +interest 0 +final 0 +told 0 +march 0 +probabl 0 +verifi 0 +close 0 +book 0 +cover 0 +materi 0 +willconsist 0 +short 0 +answer 0 +problem 0 +solv 0 +question 0 +bureaucrat 0 +stuffgrad 0 +base 0 +upon 0 +particip 0 +work 0 +togeth 0 +okai 0 +homeworkproblem 0 +classmat 0 +must 0 +upindepend 0 +rule 0 +could 0 +invok 0 +betweenani 0 +mustwatch 0 +least 0 +half 0 +theori 0 +thatan 0 +episod 0 +equival 0 +reboot 0 +anyth 0 +thatsurv 0 +learn 0 +understood 0 +eduwchan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..2e6df567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,218 @@ +cours 1 +parallel 0 +topic 0 +homework 0 +algorithm 0 +memori 0 +anderson 0 +problem 0 +thursdai 0 +paper 0 +theori 0 +comput 0 +could 0 +descript 0 +effici 0 +note 0 +april 0 +lectur 0 +share 0 +machin 0 +book 0 +gener 0 +sieg 0 +instructor 0 +offic 0 +syllabu 0 +list 0 +connect 0 +pointer 0 +although 0 +go 0 +model 0 +real 0 +year 0 +consid 0 +expect 0 +howev 0 +nice 0 +teach 0 +algorithmscs 0 +spring 0 +inform 0 +meet 0 +richard 0 +hour 0 +appointment 0 +mail 0 +address 0 +exam 0 +catalog 0 +design 0 +analysi 0 +fundament 0 +algorithmsfor 0 +sort 0 +arithmet 0 +matrix 0 +graph 0 +addit 0 +select 0 +emphasi 0 +techniqu 0 +approach 0 +us 0 +developingfast 0 +limit 0 +theirefficaci 0 +prerequisit 0 +equival 0 +major 0 +assign 0 +plu 0 +rambl 0 +commentsabout 0 +transpar 0 +code 0 +analysisfor 0 +rank 0 +compon 0 +algorithmi 0 +simpler 0 +correct 0 +section 0 +latex 0 +version 0 +referencesfor 0 +erew 0 +crew 0 +ullman 0 +yannakaki 0 +tuesdai 0 +union 0 +find 0 +certifi 0 +write 0 +impli 0 +exist 0 +consensu 0 +base 0 +upon 0 +swap 0 +likelysometh 0 +insid 0 +next 0 +supercomput 0 +asynchron 0 +refer 0 +martel 0 +foc 0 +buss 0 +manuscript 0 +special 0 +content 0 +whim 0 +titl 0 +would 0 +mayb 0 +smpc 0 +start 0 +collect 0 +basic 0 +spend 0 +time 0 +give 0 +cover 0 +term 0 +indic 0 +lookingat 0 +pertain 0 +specif 0 +interconnect 0 +topolog 0 +wewil 0 +situat 0 +cost 0 +access 0 +isnon 0 +uniform 0 +sens 0 +notconsid 0 +particular 0 +prove 0 +theorem 0 +andyou 0 +motiv 0 +practic 0 +consider 0 +goal 0 +indevelop 0 +come 0 +algorithmswhich 0 +conceiv 0 +three 0 +four 0 +set 0 +contain 0 +routin 0 +challeng 0 +goingto 0 +requir 0 +project 0 +happi 0 +student 0 +outsidework 0 +relat 0 +text 0 +introduct 0 +parallelalgorithm 0 +befollow 0 +close 0 +feel 0 +exception 0 +cheap 0 +youcould 0 +probabl 0 +without 0 +purchas 0 +copi 0 +origin 0 +plan 0 +volunt 0 +textwould 0 +progress 0 +fast 0 +volum 0 +artof 0 +program 0 +chose 0 +instead 0 +quit 0 +flexibl 0 +taught 0 +mychoic 0 +influenc 0 +interestingor 0 +uninterest 0 +also 0 +choic 0 +aseith 0 +tradit 0 +work 0 +researchcont 0 +number 0 +open 0 +mind 0 +turninto 0 +research 0 +result 0 +present 0 +half 0 +bake 0 +ideason 0 +provid 0 +other 0 +interest 0 +andenergi 0 +think 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..a0f140e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,31 @@ +page 1 +comput 1 +autumn 1 +home 0 +automata 0 +complex 0 +move 0 +current 0 +quarter 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accuratelyquot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +ofwashington 0 +comment 0 +webmast 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..01ed79cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,19 @@ +postscript 1 +home 0 +page 0 +beam 0 +quiz 0 +fall 0 +automataautumn 0 +instructor 0 +paul 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +exam 0 +final 0 +latex 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..f726316e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,105 @@ +midterm 1 +cours 1 +acrobat 1 +format 1 +ruzzo 0 +sieg 0 +organ 0 +syllabu 0 +collabor 0 +fall 0 +comput 0 +complex 0 +larri 0 +nitin 0 +class 0 +mail 0 +last 0 +updat 0 +washington 0 +latex 0 +sourc 0 +postscript 0 +file 0 +provid 0 +legibl 0 +adob 0 +free 0 +viewer 0 +depart 0 +system 0 +page 0 +autumn 0 +automata 0 +tuth 0 +staffnameemailphoneoffic 0 +hour 0 +instructor 0 +sharma 0 +csmw 0 +messag 0 +sent 0 +list 0 +textbook 0 +errata 0 +handout 0 +administrivia 0 +homework 0 +thecours 0 +materi 0 +three 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +figur 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +avail 0 +unix 0 +acroread 0 +perhap 0 +aavail 0 +ghostview 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +web 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +accuratelyquot 0 +duli 0 +credit 0 +copyright 0 +scienc 0 +engin 0 +univers 0 +ofwashington 0 +comment 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..624154a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,94 @@ +complex 1 +proposit 1 +proof 0 +logic 0 +theorem 0 +satisfi 0 +prove 0 +test 0 +order 0 +issu 0 +well 0 +system 0 +algorithm 0 +first 0 +higher 0 +cours 0 +concentr 0 +search 0 +strategi 0 +theoret 0 +practic 0 +paper 0 +instal 0 +prover 0 +topic 0 +autumn 0 +gener 0 +inform 0 +instructor 0 +paul 0 +beam 0 +meet 0 +time 0 +tuesdai 0 +thursdai 0 +loew 0 +autom 0 +comput 0 +aid 0 +verif 0 +vlsi 0 +andsoftwar 0 +engin 0 +give 0 +attempt 0 +decid 0 +truthof 0 +statement 0 +casea 0 +flip 0 +side 0 +even 0 +us 0 +oftheorem 0 +often 0 +involv 0 +finitedomain 0 +interpret 0 +anywai 0 +consid 0 +varieti 0 +theoremprov 0 +within 0 +good 0 +choic 0 +consider 0 +work 0 +thesequest 0 +complexityand 0 +rel 0 +also 0 +examin 0 +anumb 0 +implement 0 +compar 0 +theoryand 0 +thing 0 +urquhart 0 +survei 0 +talk 0 +slide 0 +softwar 0 +amus 0 +sato 0 +andboy 0 +moor 0 +tester 0 +gsat 0 +june 0 +thedirectori 0 +proversther 0 +scatter 0 +process 0 +ofinstal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..2c6ca03a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,38 @@ +comput 1 +home 0 +pagecs 0 +systemperform 0 +modelingspr 0 +host 0 +lazowskaandmaryvernonwelcom 0 +page 0 +system 0 +performancemodel 0 +meet 0 +mondai 0 +wednesdai 0 +fridai 0 +loew 0 +hall 0 +offic 0 +hourstent 0 +topic 0 +schedulecom 0 +goingsassignmentsproject 0 +informationmap 0 +queue 0 +network 0 +solut 0 +packag 0 +emailoth 0 +inform 0 +avail 0 +sigmetr 0 +confer 0 +measur 0 +model 0 +computersystemsuw 0 +depart 0 +scienc 0 +engineeringlazowska 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..c153ea07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,76 @@ +postscript 1 +simul 1 +alpha 0 +sieg 0 +page 0 +system 0 +egger 0 +washington 0 +offic 0 +hour 0 +redston 0 +cours 0 +inform 0 +architectur 0 +tool 0 +sparc 0 +pentium 0 +uniprocessor 0 +info 0 +home 0 +comput 0 +architecturewint 0 +instructorsusan 0 +tuth 0 +tajoshua 0 +overview 0 +schedul 0 +continu 0 +updat 0 +lectur 0 +note 0 +problem 0 +set 0 +previou 0 +test 0 +histori 0 +specmark 0 +rate 0 +shade 0 +instuct 0 +atom 0 +build 0 +analysi 0 +tullsen 0 +execut 0 +driven 0 +instruct 0 +level 0 +superscalar 0 +close 0 +etch 0 +binari 0 +rewrit 0 +analyz 0 +code 0 +hardwar 0 +monitor 0 +multiflow 0 +compil 0 +pixi 0 +user 0 +manual 0 +dinero 0 +cach 0 +local 0 +machin 0 +powerpc 0 +applic 0 +multiprocessor 0 +spec 0 +benchmark 0 +neat 0 +center 0 +current 0 +futur 0 +processor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..c9ad2d96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,50 @@ +inform 1 +home 0 +instructor 0 +pighin 0 +offic 0 +hour 0 +thisdocu 0 +mail 0 +april 0 +assign 0 +cours 0 +pagecs 0 +oper 0 +system 0 +hank 0 +levi 0 +spring 0 +freder 0 +meet 0 +time 0 +chateau 0 +confer 0 +room 0 +number 0 +unit 0 +welcom 0 +page 0 +world 0 +wide 0 +short 0 +hypermedia 0 +document 0 +forcs 0 +contain 0 +class 0 +keep 0 +mind 0 +static 0 +especi 0 +classmessag 0 +ad 0 +frequent 0 +problem 0 +send 0 +announc 0 +first 0 +readi 0 +iti 0 +projectlevi 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..f76af8ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,78 @@ +inform 1 +home 0 +comput 0 +document 0 +help 0 +scienc 0 +engin 0 +page 0 +class 0 +avail 0 +last 0 +year 0 +project 0 +degre 0 +program 0 +offer 0 +colleg 0 +mosaic 0 +pagecs 0 +graphicsautumn 0 +quarter 0 +welcom 0 +world 0 +wide 0 +hypermedia 0 +whichcontain 0 +wealth 0 +keep 0 +mind 0 +thatthi 0 +static 0 +addedfrequ 0 +problem 0 +send 0 +mail 0 +deros 0 +click 0 +professor 0 +cours 0 +syllabu 0 +lectur 0 +note 0 +written 0 +homework 0 +assign 0 +solut 0 +handout 0 +grade 0 +polici 0 +test 0 +cool 0 +imag 0 +addit 0 +get 0 +instruct 0 +us 0 +indi 0 +mvi 0 +visitor 0 +room 0 +schedul 0 +depart 0 +art 0 +follow 0 +topic 0 +basic 0 +hypertext 0 +markup 0 +languag 0 +html 0 +uniform 0 +resourc 0 +locat 0 +read 0 +usinglynx 0 +charact 0 +base 0 +browser 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..f73766e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,110 @@ +homework 1 +class 0 +begin 0 +fridai 0 +ebel 0 +logic 0 +data 0 +home 0 +page 0 +book 0 +offic 0 +hour 0 +wednesdai 0 +sieg 0 +mondai 0 +design 0 +carl 0 +inform 0 +staff 0 +paul 0 +larri 0 +combin 0 +sequenti 0 +fpga 0 +depart 0 +comput 0 +scienc 0 +engin 0 +sheet 0 +server 0 +washington 0 +principl 0 +digit 0 +system 0 +fall 0 +welcom 0 +cours 0 +time 0 +place 0 +loew 0 +import 0 +announc 0 +summari 0 +syllabu 0 +text 0 +instructor 0 +franklin 0 +thursdai 0 +hine 0 +hineskj 0 +tuesdai 0 +mcmurchi 0 +research 0 +tool 0 +guru 0 +document 0 +simul 0 +synthesi 0 +pamett 0 +board 0 +mostli 0 +complet 0 +still 0 +construct 0 +student 0 +work 0 +groupsfin 0 +exam 0 +review 0 +topic 0 +cover 0 +quarter 0 +assign 0 +note 0 +hand 0 +handout 0 +memori 0 +commun 0 +mother 0 +site 0 +list 0 +vlsi 0 +link 0 +comprehensivelist 0 +icmanufactur 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +nation 0 +semiconductor 0 +motorola 0 +philip 0 +semiconduct 0 +micron 0 +technolog 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..ab095893 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,50 @@ +sieg 1 +hour 1 +artifici 0 +question 0 +index 0 +pagecs 0 +intelligencefal 0 +quarter 0 +intellig 0 +pose 0 +fundament 0 +andchalleng 0 +comput 0 +scienc 0 +build 0 +intelligentmachin 0 +cours 0 +address 0 +provid 0 +anin 0 +depth 0 +introduct 0 +select 0 +topic 0 +includ 0 +agentarchitectur 0 +knowledg 0 +represent 0 +search 0 +plan 0 +machinelearn 0 +reason 0 +uncertainti 0 +methodolog 0 +staff 0 +weldweld 0 +marc 0 +friedmanfriedman 0 +nick 0 +kushmericknick 0 +outlin 0 +topicsread 0 +assignmentsassign 0 +examsgradingresourcesth 0 +class 0 +mailinglist 0 +also 0 +archiv 0 +past 0 +messag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..3947a90e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,102 @@ +read 1 +theori 0 +uncertainti 0 +decis 0 +hank 0 +washington 0 +pearl 0 +reason 0 +probabl 0 +make 0 +intellig 0 +offic 0 +mail 0 +class 0 +list 0 +probabilist 0 +sever 0 +nice 0 +refer 0 +book 0 +paper 0 +avail 0 +librari 0 +histor 0 +look 0 +math 0 +summari 0 +artifici 0 +winter 0 +professor 0 +steve 0 +sieg 0 +hour 0 +whenev 0 +around 0 +appoint 0 +email 0 +address 0 +goe 0 +member 0 +send 0 +request 0 +materi 0 +systemsthi 0 +requir 0 +text 0 +chapter 0 +without 0 +bui 0 +strappedfor 0 +cash 0 +though 0 +shafer 0 +uncertain 0 +reasoningthi 0 +collect 0 +foundat 0 +select 0 +copi 0 +grail 0 +jayn 0 +logic 0 +scienc 0 +fragmentari 0 +edit 0 +juli 0 +extrem 0 +interest 0 +technic 0 +foundationsof 0 +statist 0 +definit 0 +worth 0 +perspect 0 +alon 0 +heavi 0 +go 0 +place 0 +beautifulli 0 +written 0 +neapolitan 0 +expert 0 +system 0 +algorithmsa 0 +signific 0 +overlap 0 +good 0 +secondari 0 +sourc 0 +inform 0 +graphic 0 +model 0 +propagationalgorithm 0 +research 0 +arrang 0 +cours 0 +topic 0 +cover 0 +html 0 +postscript 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..c506cf80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,271 @@ +cours 1 +khoro 0 +mondai 0 +assign 0 +fridai 0 +home 0 +imag 0 +page 0 +inform 0 +april 0 +class 0 +read 0 +wednesdai 0 +cantata 0 +comput 0 +part 0 +meet 0 +instal 0 +note 0 +next 0 +sun 0 +local 0 +environ 0 +copi 0 +engin 0 +week 0 +march 0 +onlin 0 +help 0 +contain 0 +document 0 +copyright 0 +materi 0 +washington 0 +first 0 +chapter 0 +run 0 +login 0 +file 0 +follow 0 +setenv 0 +khoros_hom 0 +path 0 +also 0 +tutori 0 +outlin 0 +experi 0 +take 0 +articl 0 +make 0 +process 0 +softwar 0 +msvc 0 +final 0 +june 0 +exam 0 +midterm 0 +plan 0 +select 0 +avail 0 +lectur 0 +student 0 +sign 0 +start 0 +applic 0 +account 0 +rene 0 +reed 0 +arrang 0 +sieg 0 +scienc 0 +version 0 +undergradu 0 +menu 0 +pagecs 0 +understandingwelcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +send 0 +mail 0 +mock 0 +notic 0 +subjectto 0 +view 0 +public 0 +site 0 +otherthan 0 +univers 0 +doexercis 0 +second 0 +exercis 0 +determin 0 +conveni 0 +torun 0 +aslillith 0 +edit 0 +workstat 0 +containxhost 0 +lilliththen 0 +cshrc 0 +manpath 0 +rlogin 0 +onto 0 +lillith 0 +rhost 0 +assignmentsand 0 +displai 0 +variabl 0 +appropri 0 +typecantata 0 +unix 0 +prompt 0 +machin 0 +georg 0 +haskhoro 0 +wwwhttp 0 +educ 0 +index 0 +htmland 0 +link 0 +itscours 0 +least 0 +twotop 0 +spatial 0 +resolut 0 +ideal 0 +pagesand 0 +anoth 0 +window 0 +noth 0 +turn 0 +third 0 +huerta 0 +andnevatia 0 +cvpr 0 +proceed 0 +tolook 0 +wolff 0 +fourth 0 +comparison 0 +three 0 +fast 0 +oper 0 +level 0 +learn 0 +effort 0 +requir 0 +announc 0 +examin 0 +pmin 0 +regular 0 +room 0 +cover 0 +combinationof 0 +post 0 +list 0 +topic 0 +studi 0 +remind 0 +approv 0 +sundai 0 +time 0 +review 0 +insieg 0 +period 0 +overhead 0 +transpar 0 +onneur 0 +net 0 +librari 0 +center 0 +floor 0 +packet 0 +number 0 +trainabl 0 +classifi 0 +permit 0 +temporari 0 +ofmatlab 0 +requirethat 0 +fill 0 +form 0 +contract 0 +know 0 +interest 0 +term 0 +project 0 +import 0 +ofth 0 +correct 0 +introduc 0 +pentium 0 +laboratori 0 +includingth 0 +develop 0 +evan 0 +mclain 0 +documentexplain 0 +transform 0 +current 0 +statu 0 +recent 0 +get 0 +withkhoro 0 +accompani 0 +pleas 0 +alreadi 0 +accesskhoro 0 +contact 0 +onthursdai 0 +pick 0 +youraccount 0 +name 0 +password 0 +itov 0 +weekend 0 +earli 0 +hour 0 +arelimit 0 +ahead 0 +email 0 +address 0 +andsh 0 +back 0 +offic 0 +kept 0 +lock 0 +either 0 +need 0 +knock 0 +orhav 0 +prior 0 +mani 0 +card 0 +willhav 0 +care 0 +slide 0 +resourc 0 +understand 0 +intro 0 +delft 0 +univ 0 +pattern 0 +recognit 0 +vision 0 +store 0 +thedepart 0 +brochur 0 +brochuremosa 0 +mosaic 0 +find 0 +itemsund 0 +balloon 0 +macmosa 0 +itemund 0 +navig 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..8d15865f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,174 @@ +novemb 1 +class 0 +parallel 0 +octob 0 +topic 0 +algorithm 0 +schedul 0 +begin 0 +tuesdai 0 +term 0 +project 0 +comput 0 +document 0 +machin 0 +neural 0 +network 0 +intel 0 +cours 0 +decemb 0 +home 0 +imag 0 +inform 0 +time 0 +normal 0 +start 0 +meet 0 +guest 0 +speaker 0 +univers 0 +complet 0 +pyramid 0 +embed 0 +overview 0 +architectur 0 +thursdai 0 +week 0 +implement 0 +languag 0 +paragon 0 +supercomput 0 +maspar 0 +copyright 0 +final 0 +exam 0 +pagecs 0 +processingwelcom 0 +page 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +ad 0 +informationon 0 +half 0 +hourearli 0 +dai 0 +prof 0 +nian 0 +simon 0 +fraser 0 +burnabi 0 +canada 0 +bharath 0 +modayur 0 +titl 0 +present 0 +effici 0 +object 0 +recognit 0 +simd 0 +mimd 0 +discuss 0 +scale 0 +invariantoper 0 +segment 0 +hierarchicalrelax 0 +us 0 +isodata 0 +approach 0 +burt 0 +hong 0 +rosenfeld 0 +introduct 0 +virtual 0 +process 0 +mesh 0 +brief 0 +treatment 0 +icon 0 +symbol 0 +analysi 0 +digit 0 +librari 0 +demo 0 +find 0 +topicsdur 0 +student 0 +activelyexplor 0 +written 0 +descript 0 +hand 0 +inon 0 +templat 0 +writeupsi 0 +avail 0 +resourcespvm 0 +virtualmachin 0 +softwar 0 +layear 0 +permit 0 +user 0 +program 0 +aviru 0 +made 0 +heterogen 0 +collect 0 +moreworkst 0 +conveni 0 +studydistribut 0 +technicalpubl 0 +includ 0 +paragonparallel 0 +system 0 +good 0 +arrai 0 +orient 0 +variousvendor 0 +info 0 +onth 0 +nation 0 +center 0 +sweden 0 +onlin 0 +theunivers 0 +tennesse 0 +resourc 0 +found 0 +neal 0 +friedman 0 +report 0 +also 0 +error 0 +correctionsto 0 +note 0 +notic 0 +materi 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +washington 0 +import 0 +part 0 +ofth 0 +review 0 +session 0 +fridai 0 +sieg 0 +hall 0 +wednesdai 0 +room 0 +close 0 +book 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..2f9727d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,65 @@ +linear 1 +method 0 +april 0 +intro 0 +optim 0 +program 0 +graphic 0 +matrix 0 +comput 0 +brad 0 +equat 0 +quadrat 0 +exampl 0 +paper 0 +finit 0 +seminarc 0 +rspring 0 +numer 0 +definit 0 +properti 0 +invers 0 +solv 0 +system 0 +eric 0 +eigenvalu 0 +eigenvector 0 +singular 0 +valu 0 +decomposit 0 +joel 0 +root 0 +find 0 +nonlinear 0 +corei 0 +shuichi 0 +unconstrain 0 +kari 0 +constrain 0 +global 0 +kevin 0 +chuck 0 +ronen 0 +daniel 0 +data 0 +fit 0 +conclus 0 +mike 0 +regress 0 +calibr 0 +ordinari 0 +differenti 0 +adam 0 +joanna 0 +discret 0 +element 0 +radios 0 +fred 0 +pde 0 +differ 0 +interv 0 +arithmet 0 +troi 0 +jonathan 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..c8bf58a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,45 @@ +tanimoto 1 +autumn 1 +educ 1 +mathemat 1 +experi 1 +imag 1 +process 1 +copyright 1 +washington 1 +topic 1 +quarter 1 +quarterscs 0 +special 0 +topicssteven 0 +instructorcs 0 +transcript 0 +base 0 +winter 0 +spring 0 +technolog 0 +collabor 0 +learn 0 +notic 0 +materi 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +graduat 0 +seminar 0 +explor 0 +varieti 0 +relat 0 +useof 0 +comput 0 +specif 0 +activ 0 +varyfrom 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..aee00007 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,160 @@ +paper 1 +read 0 +novemb 0 +presentor 0 +educ 0 +octob 0 +proceed 0 +html 0 +present 0 +meet 0 +either 0 +home 0 +page 0 +autumn 0 +world 0 +wide 0 +inform 0 +time 0 +copyright 0 +materi 0 +student 0 +discuss 0 +labord 0 +promot 0 +concept 0 +map 0 +first 0 +degre 0 +beyond 0 +brows 0 +possibl 0 +toolkit 0 +layer 0 +ward 0 +transcript 0 +base 0 +wwwwelcom 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +theclass 0 +keep 0 +mind 0 +document 0 +static 0 +newinform 0 +ad 0 +notic 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +washington 0 +mccalla 0 +central 0 +importanceof 0 +model 0 +intellig 0 +tutor 0 +sandi 0 +youngquist 0 +paul 0 +barton 0 +davi 0 +aboutinternet 0 +servic 0 +problem 0 +solv 0 +geometri 0 +microworld 0 +tointellig 0 +comput 0 +environ 0 +tessa 0 +bartel 0 +mathematicsconnect 0 +plu 0 +gari 0 +anderson 0 +onlin 0 +combin 0 +vision 0 +littl 0 +technolog 0 +noth 0 +particularli 0 +ambiti 0 +descript 0 +state 0 +second 0 +technic 0 +piec 0 +thethem 0 +learner 0 +take 0 +respons 0 +someth 0 +increasingli 0 +import 0 +futur 0 +choic 0 +third 0 +moresophist 0 +elabor 0 +group 0 +annot 0 +ofwww 0 +describ 0 +intechn 0 +term 0 +internet 0 +infrastructur 0 +couldmak 0 +smart 0 +distribut 0 +tutori 0 +applicationsthat 0 +mosaic 0 +netscap 0 +achiev 0 +pleas 0 +option 0 +advanc 0 +us 0 +webhttp 0 +jeremi 0 +baer 0 +empow 0 +agehttp 0 +ncsa 0 +uiuc 0 +marla 0 +baker 0 +share 0 +comment 0 +soap 0 +trail 0 +line 0 +communitieshttp 0 +john 0 +dietz 0 +enhanc 0 +protocol 0 +lower 0 +serviceshttp 0 +dcewebkit 0 +adam 0 +carlson 0 +hong 0 +zhumeet 0 +michael 0 +aboutcurriculum 0 +navig 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..f65872cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,78 @@ +autumn 1 +technolog 1 +learn 1 +paper 1 +schedul 1 +home 0 +page 0 +copyright 0 +washington 0 +current 0 +explor 0 +middl 0 +collabor 0 +learningwelcom 0 +notic 0 +materi 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +gener 0 +descript 0 +comput 0 +internet 0 +methodologiesfor 0 +teach 0 +come 0 +togeth 0 +innew 0 +wai 0 +seminar 0 +read 0 +number 0 +forcollabor 0 +particip 0 +student 0 +willtak 0 +respons 0 +make 0 +present 0 +group 0 +ofthes 0 +cover 0 +subset 0 +also 0 +possibl 0 +applic 0 +ofai 0 +visual 0 +techniqu 0 +analysi 0 +evid 0 +ofstud 0 +onlin 0 +context 0 +meet 0 +tuesdai 0 +howev 0 +decid 0 +move 0 +time 0 +better 0 +intopeopl 0 +visit 0 +meani 0 +school 0 +schoolmai 0 +depend 0 +interest 0 +participatingstud 0 +last 0 +updat 0 +septemb 0 +tanimoto 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..3b92b75c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,191 @@ +schedul 1 +system 0 +proc 0 +parallel 0 +processor 0 +alloc 0 +distribut 0 +page 0 +memori 0 +multiprocessor 0 +barbara 0 +santa 0 +conf 0 +multiprogram 0 +sigmetr 0 +share 0 +measur 0 +ipp 0 +workshop 0 +strategi 0 +base 0 +workload 0 +polici 0 +model 0 +demand 0 +mari 0 +arpaci 0 +dusseau 0 +dynam 0 +nguyen 0 +gupta 0 +process 0 +symp 0 +oper 0 +vaswani 0 +zahorjan 0 +us 0 +runtim 0 +tutori 0 +supercomput 0 +ofcomput 0 +eric 0 +parson 0 +kenneth 0 +sevcik 0 +feitelson 0 +mccann 0 +memorymultiprocessor 0 +coschedul 0 +migrat 0 +coordin 0 +mvmv 0 +global 0 +resourc 0 +manag 0 +systemsprofessor 0 +vernontim 0 +pmlocat 0 +gang 0 +now 0 +ousterhout 0 +techniqu 0 +concurr 0 +inrd 0 +vahdat 0 +anderson 0 +patterson 0 +interact 0 +andsequenti 0 +network 0 +workstat 0 +ofth 0 +equi 0 +partit 0 +tucker 0 +control 0 +issuesfor 0 +principl 0 +workloadcharacterist 0 +univ 0 +ofwashington 0 +technic 0 +report 0 +applic 0 +shun 0 +leung 0 +evangelo 0 +markato 0 +thoma 0 +leblanc 0 +affin 0 +loopschedul 0 +expand 0 +version 0 +iniee 0 +tran 0 +han 0 +zima 0 +chapman 0 +compil 0 +ieee 0 +edjlali 0 +agraw 0 +sussman 0 +saltz 0 +data 0 +parallelprogram 0 +adapt 0 +environ 0 +april 0 +comparisonsshikharesh 0 +majumdar 0 +derek 0 +eager 0 +richard 0 +bunt 0 +confer 0 +high 0 +variabilityservic 0 +time 0 +dror 0 +bill 0 +nitzberg 0 +characterist 0 +product 0 +scientif 0 +thenasa 0 +am 0 +ipsc 0 +follow 0 +also 0 +cover 0 +requir 0 +read 0 +leutenegg 0 +vernon 0 +perform 0 +transact 0 +comput 0 +patrick 0 +sobalvarro 0 +william 0 +weihl 0 +ofparallel 0 +job 0 +impact 0 +burger 0 +hyder 0 +miller 0 +wood 0 +tradeoff 0 +rohit 0 +chandra 0 +scott 0 +devin 0 +verghes 0 +anoop 0 +mendel 0 +rosenblum 0 +multiprocessorcomput 0 +server 0 +architectur 0 +support 0 +programminglanguag 0 +asplo 0 +jose 0 +alverson 0 +kahan 0 +korri 0 +smith 0 +tera 0 +octob 0 +discuss 0 +open 0 +problem 0 +culler 0 +effectivedistribut 0 +computersystem 0 +philadelphia 0 +june 0 +appear 0 +rudolph 0 +identif 0 +activ 0 +work 0 +set 0 +program 0 +theoret 0 +result 0 +karlin 0 +paper 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..7b66b039 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,67 @@ +acrobat 1 +format 1 +syllabu 0 +schedul 0 +winter 0 +ruzzo 0 +last 0 +updat 0 +html 0 +titl 0 +postscript 0 +file 0 +usual 0 +translat 0 +adob 0 +free 0 +viewer 0 +page 0 +algorithm 0 +molecular 0 +biologi 0 +richard 0 +karp 0 +larri 0 +martin 0 +tompaclass 0 +bboard 0 +handout 0 +administr 0 +lectur 0 +note 0 +draft 0 +homework 0 +slide 0 +cours 0 +materi 0 +provid 0 +sever 0 +load 0 +fast 0 +readabl 0 +mani 0 +part 0 +gener 0 +automat 0 +latex 0 +faith 0 +origin 0 +latest 0 +greatest 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +legibl 0 +print 0 +ghostscriptcan 0 +exampl 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..4233f704 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,172 @@ +paper 1 +tuesdai 0 +read 0 +architectur 0 +quarter 0 +week 0 +present 0 +posit 0 +comput 0 +anderson 0 +asplo 0 +line 0 +memori 0 +mail 0 +organ 0 +cseg 0 +lunch 0 +format 0 +previou 0 +ofth 0 +discuss 0 +lead 0 +credit 0 +meet 0 +valu 0 +follow 0 +processor 0 +burger 0 +machin 0 +avail 0 +subscrib 0 +list 0 +lunchcs 0 +lunchcours 0 +jean 0 +loupbaermeet 0 +time 0 +continu 0 +withalmost 0 +year 0 +select 0 +discussedat 0 +begin 0 +distribut 0 +tobe 0 +might 0 +formal 0 +work 0 +progress 0 +byesteem 0 +member 0 +mostlyw 0 +hopefulli 0 +heat 0 +discussionson 0 +literatur 0 +differ 0 +quartersi 0 +start 0 +fromparticip 0 +recent 0 +workshop 0 +oncrit 0 +issu 0 +research 0 +copi 0 +hereread 0 +morethem 0 +mani 0 +thank 0 +ruth 0 +molli 0 +brown 0 +gershoni 0 +matthai 0 +philipos 0 +tabular 0 +summari 0 +guru 0 +found 0 +herefor 0 +usual 0 +thestud 0 +either 0 +informallyor 0 +slide 0 +cours 0 +variabl 0 +ifyou 0 +first 0 +octob 0 +local 0 +load 0 +predict 0 +lipasti 0 +wilkerson 0 +shen 0 +link 0 +advanceprogrami 0 +short 0 +bibliographi 0 +appreci 0 +volunt 0 +thesaulsburi 0 +readashlei 0 +saulsburi 0 +fong 0 +pong 0 +andrea 0 +nowatzyk 0 +miss 0 +wall 0 +case 0 +integr 0 +isca 0 +readm 0 +fillo 0 +keckler 0 +dalli 0 +multicomput 0 +micro 0 +machinelink 0 +readdoug 0 +stefano 0 +kaxira 0 +jame 0 +goodman 0 +datascalar 0 +spsd 0 +execut 0 +model 0 +univers 0 +wisconsin 0 +madison 0 +scienc 0 +depart 0 +technic 0 +report 0 +juli 0 +neton 0 +intellig 0 +iram 0 +chip 0 +rememb 0 +patterson 0 +cardwel 0 +fromm 0 +keeton 0 +kozyraki 0 +thomasand 0 +yelick 0 +availableher 0 +fortun 0 +author 0 +prof 0 +send 0 +email 0 +themajordomo 0 +majordomo 0 +content 0 +shouldinclud 0 +leav 0 +subject 0 +lineblank 0 +shortli 0 +receiv 0 +messag 0 +back 0 +sai 0 +welcom 0 +baer 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..369847a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,13 @@ +home 1 +pagecs 0 +page 0 +spring 0 +offer 0 +experiment 0 +graduat 0 +cours 0 +human 0 +comput 0 +interact 0 +born 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..c2f4a1a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,127 @@ +week 1 +compil 0 +time 0 +memori 0 +gupta 0 +anderson 0 +gener 0 +special 0 +charl 0 +data 0 +optim 0 +roger 0 +regist 0 +mail 0 +line 0 +seminarcs 0 +seminarcours 0 +organ 0 +susan 0 +eggersand 0 +craigchambersmeet 0 +wednesdai 0 +offici 0 +loew 0 +butreal 0 +meet 0 +second 0 +floor 0 +atrium 0 +scheduleweek 0 +memspi 0 +analyz 0 +system 0 +bottleneck 0 +program 0 +margaretmartonosi 0 +anoop 0 +thoma 0 +approach 0 +applic 0 +consel 0 +francoi 0 +noel 0 +practic 0 +flow 0 +framework 0 +arrai 0 +refer 0 +analysi 0 +itsus 0 +evelyn 0 +duesterwald 0 +rajiv 0 +maryl 0 +soffa 0 +valu 0 +depend 0 +graph 0 +represent 0 +without 0 +taxat 0 +danielweis 0 +crew 0 +michael 0 +ernst 0 +bjarn 0 +steensgaard 0 +litvinov 0 +iter 0 +coalesc 0 +georg 0 +andrew 0 +appel 0 +garrett 0 +machin 0 +specif 0 +hooverand 0 +kenneth 0 +zadeck 0 +dean 0 +grant 0 +paradigm 0 +distribut 0 +multicomput 0 +byprivthviraj 0 +banerje 0 +lewi 0 +minimum 0 +cost 0 +interprocedur 0 +alloc 0 +stevenkurland 0 +fischer 0 +secoski 0 +todd 0 +knoblock 0 +erik 0 +grove 0 +lazi 0 +strength 0 +reduct 0 +jen 0 +knoop 0 +oliv 0 +ruth 0 +andbernhard 0 +steffen 0 +mock 0 +tullsen 0 +subscrib 0 +list 0 +send 0 +email 0 +majordomo 0 +content 0 +includ 0 +subscribecsek 0 +leav 0 +subject 0 +blank 0 +shortlyrec 0 +messag 0 +back 0 +sai 0 +welcom 0 +melodi 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..8831de00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,178 @@ +parallel 1 +program 0 +data 0 +compil 0 +analysi 0 +ppopp 0 +optim 0 +applic 0 +commun 0 +ipp 0 +icpp 0 +lcpc 0 +kennedi 0 +distribut 0 +model 0 +quarter 0 +matlab 0 +choi 0 +comput 0 +mail 0 +messag 0 +scalapack 0 +exploit 0 +task 0 +ramaswami 0 +hodg 0 +banerje 0 +sung 0 +cross 0 +loop 0 +reus 0 +cach 0 +cooper 0 +mcintosh 0 +global 0 +chakarabarti 0 +gupta 0 +pldi 0 +integer 0 +perform 0 +environ 0 +adv 0 +input 0 +output 0 +characterist 0 +scalabl 0 +crandal 0 +aydt 0 +chien 0 +reed 0 +strategi 0 +core 0 +bordawekar 0 +choudahari 0 +koelbel 0 +paleczni 0 +local 0 +iter 0 +block 0 +cyclic 0 +midkiff 0 +util 0 +thread 0 +fahring 0 +hain 0 +mehrotra 0 +gener 0 +environmentslarri 0 +snyderautumn 0 +mondai 0 +loew 0 +welcom 0 +home 0 +page 0 +read 0 +select 0 +paper 0 +recent 0 +supercomput 0 +ten 0 +schedul 0 +atmospher 0 +casual 0 +andwil 0 +hopefulli 0 +ignit 0 +live 0 +discuss 0 +everyon 0 +attend 0 +seminar 0 +expect 0 +present 0 +thepap 0 +still 0 +spot 0 +open 0 +hurri 0 +sign 0 +pleas 0 +send 0 +majordomo 0 +subscrib 0 +cseo 0 +bodi 0 +subscribeto 0 +class 0 +list 0 +datepaperpresentor 0 +falcon 0 +interact 0 +restructur 0 +deros 0 +gallivan 0 +gallopoulo 0 +marsolf 0 +padua 0 +portabl 0 +driven 0 +ramkumar 0 +forb 0 +kale 0 +ruth 0 +sean 0 +jason 0 +holidai 0 +stream 0 +librari 0 +complex 0 +structur 0 +gotwal 0 +sriniva 0 +gannon 0 +brad 0 +eric 0 +cilk 0 +effici 0 +multithread 0 +runtim 0 +system 0 +blumof 0 +joerg 0 +kuszmaul 0 +leiserson 0 +randal 0 +zhou 0 +code 0 +object 0 +orient 0 +mathemat 0 +andersson 0 +fritzson 0 +realign 0 +base 0 +kamachi 0 +kusano 0 +suehiro 0 +tamura 0 +sakon 0 +us 0 +access 0 +inform 0 +rinard 0 +tool 0 +rel 0 +debug 0 +develop 0 +larg 0 +numer 0 +abramson 0 +foster 0 +michalak 0 +sosic 0 +potpourri 0 +last 0 +modifi 0 +tuesdai 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..4a9dcdb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,27 @@ +system 1 +seminar 0 +mail 0 +list 0 +autumn 0 +preliminariesif 0 +alreadi 0 +need 0 +variou 0 +crucial 0 +bit 0 +ofinform 0 +week 0 +cancel 0 +besent 0 +send 0 +request 0 +line 0 +subscrib 0 +systemsin 0 +messag 0 +bodi 0 +quarterli 0 +web 0 +spring 0 +summer 0 +winter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..d38f14b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,70 @@ +present 1 +system 0 +file 0 +meet 0 +quarter 0 +read 0 +paper 0 +oper 0 +memori 0 +perform 0 +summer 0 +quarterw 0 +fridai 0 +loew 0 +wewil 0 +final 0 +appear 0 +upcomingacm 0 +symposium 0 +principl 0 +sosp 0 +pleas 0 +havean 0 +interact 0 +discuss 0 +scheduleoct 0 +implement 0 +global 0 +manag 0 +workstat 0 +cluster 0 +feelei 0 +log 0 +virtual 0 +savag 0 +autoraid 0 +hierarch 0 +storag 0 +wilk 0 +serverless 0 +network 0 +franklin 0 +montgomeri 0 +tiwari 0 +hypervisor 0 +base 0 +fault 0 +toler 0 +chan 0 +philipos 0 +wolman 0 +exploit 0 +weak 0 +connect 0 +mobil 0 +access 0 +voelker 0 +litvinov 0 +cach 0 +coher 0 +stackabl 0 +sriram 0 +fiuczynski 0 +impact 0 +architectur 0 +trend 0 +anderson 0 +romer 0 +return 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..f6d432d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,146 @@ +comput 1 +program 0 +scientif 0 +perform 0 +mail 0 +class 0 +list 0 +high 0 +student 0 +us 0 +languag 0 +machin 0 +parallel 0 +supercomput 0 +develop 0 +wednesdai 0 +page 0 +send 0 +majordomo 0 +subscrib 0 +bodi 0 +messag 0 +compil 0 +releas 0 +suitabl 0 +loop 0 +fast 0 +modern 0 +includ 0 +workstat 0 +scientist 0 +follow 0 +write 0 +zphigh 0 +zpllarri 0 +snyder 0 +teamautumn 0 +quarter 0 +sieg 0 +loew 0 +dai 0 +welcom 0 +home 0 +pleas 0 +csezpl 0 +subscribeto 0 +also 0 +interest 0 +join 0 +usersmail 0 +distribut 0 +informationabout 0 +librarai 0 +relatedinform 0 +ad 0 +user 0 +descriptionzpl 0 +scientificprogram 0 +previous 0 +written 0 +infortran 0 +arrai 0 +dramaticallysimplifi 0 +elimin 0 +nuisanc 0 +index 0 +run 0 +allow 0 +programm 0 +code 0 +andtrivi 0 +migrat 0 +largest 0 +simpli 0 +byrecompil 0 +toth 0 +commun 0 +design 0 +engin 0 +want 0 +learn 0 +effect 0 +cover 0 +topic 0 +state 0 +syntax 0 +semant 0 +algorithm 0 +exploit 0 +wysiwyg 0 +easili 0 +well 0 +scienc 0 +faster 0 +prototyp 0 +matlab 0 +text 0 +booknon 0 +reli 0 +materi 0 +document 0 +found 0 +onin 0 +specif 0 +close 0 +zplprogram 0 +guid 0 +version 0 +prerequisitesfamiliar 0 +fortran 0 +ormatlab 0 +unix 0 +platform 0 +assum 0 +variabl 0 +credit 0 +audit 0 +debug 0 +select 0 +technic 0 +disciplin 0 +rang 0 +whole 0 +applic 0 +kernel 0 +inner 0 +informationcours 0 +syllabu 0 +lectur 0 +note 0 +appli 0 +ncsa 0 +block 0 +grant 0 +account 0 +faculti 0 +staff 0 +remotezpl 0 +compileroth 0 +import 0 +link 0 +sung 0 +choi 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..09e09205 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,127 @@ +class 1 +design 0 +mail 0 +instructor 0 +last 0 +lectur 0 +home 0 +page 0 +gaetano 0 +announc 0 +updat 0 +washington 0 +autumn 0 +quarter 0 +borriello 0 +corei 0 +us 0 +inform 0 +document 0 +send 0 +webmast 0 +administr 0 +cours 0 +final 0 +exam 0 +tool 0 +topic 0 +offic 0 +hour 0 +sieg 0 +logic 0 +katz 0 +benjamin 0 +cum 0 +addison 0 +weslei 0 +maintain 0 +comput 0 +introduct 0 +digit 0 +andersonwelcom 0 +contain 0 +whole 0 +bunch 0 +keep 0 +mind 0 +static 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +gener 0 +tocs 0 +notic 0 +system 0 +archiv 0 +messagess 0 +everyon 0 +goal 0 +syllabu 0 +meet 0 +time 0 +mondai 0 +decemb 0 +workload 0 +grade 0 +expect 0 +laboratori 0 +softwar 0 +polici 0 +collabor 0 +cheat 0 +address 0 +overal 0 +schedul 0 +anderson 0 +corin 0 +aweekli 0 +assign 0 +weekli 0 +quizz 0 +onlin 0 +version 0 +slide 0 +textbook 0 +contemporari 0 +author 0 +publish 0 +note 0 +interest 0 +evolut 0 +implement 0 +technolog 0 +aid 0 +synario 0 +feedback 0 +tell 0 +think 0 +thing 0 +go 0 +even 0 +anonym 0 +desir 0 +question 0 +evalu 0 +complet 0 +link 0 +previou 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..0f3c0ff0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,36 @@ +comput 1 +societi 0 +cours 0 +societycs 0 +societywelcom 0 +home 0 +page 0 +wintercs 0 +focu 0 +social 0 +econom 0 +ethic 0 +legal 0 +implic 0 +present 0 +internet 0 +futur 0 +nation 0 +andglob 0 +inform 0 +highwai 0 +instructor 0 +alan 0 +born 0 +class 0 +time 0 +tue 0 +thur 0 +sieg 0 +syllabusclass 0 +schedulelink 0 +relev 0 +sitesbook 0 +journal 0 +avail 0 +referenceassignmentsassign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..06da3b2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,50 @@ +sieg 1 +hour 1 +artifici 0 +question 0 +index 0 +pagecs 0 +intelligencefal 0 +quarter 0 +intellig 0 +pose 0 +fundament 0 +andchalleng 0 +comput 0 +scienc 0 +build 0 +intelligentmachin 0 +cours 0 +address 0 +provid 0 +anin 0 +depth 0 +introduct 0 +select 0 +topic 0 +includ 0 +agentarchitectur 0 +knowledg 0 +represent 0 +search 0 +plan 0 +machinelearn 0 +reason 0 +uncertainti 0 +methodolog 0 +staff 0 +weldweld 0 +marc 0 +friedmanfriedman 0 +nick 0 +kushmericknick 0 +outlin 0 +topicsprojectread 0 +assignmentsassign 0 +examsgradingresourcesth 0 +class 0 +mailinglist 0 +also 0 +archiv 0 +past 0 +messag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..b5024b15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,132 @@ +check 1 +inform 0 +index 0 +page 0 +server 0 +mail 0 +html 0 +glimps 0 +zephyr 0 +us 0 +show 0 +regist 0 +comment 0 +intellig 0 +read 0 +program 0 +link 0 +list 0 +paul 0 +file 0 +rememb 0 +want 0 +first 0 +back 0 +interfac 0 +user 0 +make 0 +anoth 0 +class 0 +provid 0 +filter 0 +ics 0 +internet 0 +meet 0 +tuesdai 0 +siegcreat 0 +side 0 +scriptspleas 0 +guidelin 0 +towrit 0 +execut 0 +someon 0 +follow 0 +tothem 0 +peopl 0 +place 0 +collect 0 +gener 0 +futur 0 +begun 0 +updat 0 +ad 0 +phoenix 0 +impress 0 +older 0 +topic 0 +offici 0 +releas 0 +instal 0 +manual 0 +well 0 +develop 0 +home 0 +interest 0 +detail 0 +work 0 +winter 0 +usenix 0 +paper 0 +design 0 +implement 0 +wide 0 +area 0 +wai 0 +zwhere 0 +mosiac 0 +locat 0 +databas 0 +current 0 +guess 0 +room 0 +version 0 +znol 0 +zwatch 0 +zlocat 0 +extra 0 +info 0 +except 0 +statu 0 +on 0 +anyon 0 +note 0 +lectur 0 +discuss 0 +sent 0 +displai 0 +belief 0 +short 0 +mike 0 +releg 0 +review 0 +site 0 +good 0 +miscellan 0 +rather 0 +rambl 0 +kurt 0 +grumbl 0 +problem 0 +improv 0 +mosaic 0 +bring 0 +luddit 0 +perspect 0 +idea 0 +network 0 +sourc 0 +nick 0 +vagu 0 +relat 0 +decemb 0 +cacm 0 +summari 0 +articl 0 +chang 0 +document 0 +itout 0 +withci 0 +send 0 +theentir 0 +address 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..b11de321 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,35 @@ +system 1 +open 1 +home 0 +page 0 +machin 0 +organ 0 +program 0 +credit 0 +introduct 0 +current 0 +structur 0 +control 0 +commun 0 +memori 0 +processor 0 +devic 0 +project 0 +involv 0 +detail 0 +studi 0 +specif 0 +small 0 +computerhardwar 0 +softwar 0 +prerequisit 0 +consent 0 +instructor 0 +student 0 +taken 0 +freshmen 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..1f22c2d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,26 @@ +design 1 +comput 0 +structur 0 +memori 0 +page 0 +introduct 0 +architectur 0 +credit 0 +system 0 +compon 0 +processor 0 +instruct 0 +address 0 +control 0 +microprogram 0 +manag 0 +cach 0 +hierarchi 0 +interrupt 0 +prerequisit 0 +andc 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..491e906d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,26 @@ +comput 1 +advanc 0 +architectur 0 +home 0 +page 0 +credit 0 +techniqu 0 +design 0 +parallel 0 +process 0 +andpipelin 0 +multiprocessor 0 +multi 0 +network 0 +high 0 +performancemachin 0 +special 0 +purpos 0 +processor 0 +data 0 +flow 0 +prerequisit 0 +semesterli 0 +cours 0 +inform 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..488603d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,31 @@ +parallel 1 +machin 1 +home 0 +page 0 +advanc 0 +comput 0 +architectur 0 +credit 0 +algorithm 0 +principl 0 +detect 0 +vectorizingcompil 0 +interconnect 0 +network 0 +simd 0 +mimd 0 +processorsynchron 0 +data 0 +coher 0 +multi 0 +dataflow 0 +special 0 +purposeprocessor 0 +prerequisit 0 +consent 0 +instructor 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..f823fe59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,243 @@ +assign 1 +class 0 +problem 0 +week 0 +februari 0 +avail 0 +section 0 +final 0 +quizz 0 +us 0 +program 0 +lectur 0 +oper 0 +system 0 +set 0 +wednesdai 0 +schedul 0 +late 0 +april 0 +spring 0 +grade 0 +read 0 +cours 0 +discuss 0 +learn 0 +dai 0 +march 0 +introduct 0 +bart 0 +answer 0 +note 0 +wisc 0 +eduoffic 0 +csphone 0 +offic 0 +hour 0 +page 0 +comput 0 +exam 0 +quiz 0 +last 0 +process 0 +unix 0 +goal 0 +memori 0 +three 0 +work 0 +take 0 +fridai 0 +mondai 0 +need 0 +orient 0 +book 0 +first 0 +tuesdai 0 +thursdai 0 +follow 0 +past 0 +semaphor 0 +messag 0 +simul 0 +semest 0 +hand 0 +group 0 +probabl 0 +solari 0 +workstat 0 +januari 0 +alloc 0 +advanc 0 +topic 0 +univers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +millerc 0 +systemsnew 0 +stufffin 0 +post 0 +readi 0 +print 0 +staffinstructor 0 +milleremail 0 +noonor 0 +appoint 0 +karuna 0 +muthiahemail 0 +muthiah 0 +jonathan 0 +weyersemail 0 +weyer 0 +materialsth 0 +organ 0 +around 0 +thelectur 0 +notesar 0 +textbookmodern 0 +tanenbaum 0 +programmingassign 0 +purchas 0 +copi 0 +ofobject 0 +pohl 0 +whatev 0 +favorit 0 +come 0 +modern 0 +systemsandobject 0 +store 0 +sectionslectur 0 +time 0 +sciencesdiscuss 0 +nolandnot 0 +extra 0 +mainli 0 +recit 0 +materialcov 0 +weekli 0 +occas 0 +import 0 +detail 0 +ofth 0 +homework 0 +make 0 +sure 0 +leav 0 +room 0 +attend 0 +quizzesther 0 +midterm 0 +option 0 +start 0 +second 0 +thediscuss 0 +minut 0 +concurr 0 +monitor 0 +usetrac 0 +activ 0 +real 0 +drive 0 +algorithm 0 +trace 0 +driven 0 +experi 0 +quantit 0 +analyz 0 +written 0 +setsdur 0 +severalwritten 0 +base 0 +turn 0 +though 0 +find 0 +poorli 0 +youdon 0 +problemssolut 0 +theproblem 0 +happi 0 +question 0 +andlook 0 +solut 0 +variou 0 +synchronizationprimit 0 +solv 0 +manag 0 +hardwar 0 +softwar 0 +workassign 0 +date 0 +list 0 +handout 0 +entir 0 +havethre 0 +daysof 0 +credit 0 +differ 0 +eachof 0 +absolut 0 +accept 0 +cannot 0 +assignmentthat 0 +weekof 0 +cheatingprogram 0 +done 0 +partner 0 +independ 0 +cheater 0 +receiv 0 +maximum 0 +penalti 0 +includ 0 +receivingan 0 +mark 0 +transcript 0 +facilitiesw 0 +run 0 +window 0 +student 0 +regist 0 +account 0 +policyif 0 +lowest 0 +drop 0 +averag 0 +beno 0 +break 0 +count 0 +taught 0 +inth 0 +rang 0 +scheduleth 0 +tent 0 +could 0 +chang 0 +overview 0 +processesweek 0 +dispatch 0 +creationweek 0 +cooper 0 +synchronizationweek 0 +semaphoresweek 0 +monitorsweek 0 +deadlocksweek 0 +debug 0 +strategi 0 +dynam 0 +breakweek 0 +relocationweek 0 +segment 0 +tlbsweek 0 +virtual 0 +replac 0 +thrash 0 +devic 0 +filesweek 0 +disk 0 +directoriesweek 0 +protectionweek 0 +secur 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..7853a9a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,82 @@ +bart 1 +week 0 +april 0 +internet 0 +februari 0 +march 0 +miller 0 +honor 0 +spring 0 +januari 0 +seminarunivers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +millerc 0 +seminarinstructor 0 +milleremail 0 +wisc 0 +eduoffic 0 +csphone 0 +offic 0 +hour 0 +wednesdai 0 +fridai 0 +noonor 0 +appoint 0 +lectureslectur 0 +time 0 +mondai 0 +comput 0 +sciencesclass 0 +schedulether 0 +written 0 +assign 0 +class 0 +requir 0 +attendal 0 +lectur 0 +particip 0 +discuss 0 +follow 0 +schedul 0 +mostli 0 +right 0 +could 0 +chang 0 +introduct 0 +overviewweek 0 +larri 0 +landweb 0 +architectur 0 +protocolsweek 0 +client 0 +server 0 +remot 0 +procedur 0 +callsweek 0 +system 0 +securityweek 0 +eric 0 +bach 0 +secur 0 +encryptionweek 0 +breakweek 0 +miron 0 +livni 0 +imag 0 +pictur 0 +netweek 0 +high 0 +perform 0 +file 0 +systemsweek 0 +david 0 +wood 0 +supercomputerweek 0 +laru 0 +javaweek 0 +discussionslast 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..21698801 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,168 @@ +read 1 +paper 0 +class 0 +discuss 0 +system 0 +topic 0 +oper 0 +write 0 +project 0 +cours 0 +detail 0 +import 0 +first 0 +advanc 0 +fall 0 +give 0 +current 0 +lectur 0 +review 0 +form 0 +group 0 +twice 0 +week 0 +assign 0 +post 0 +particip 0 +comment 0 +design 0 +idea 0 +well 0 +second 0 +final 0 +grade 0 +availbl 0 +tuesdai 0 +thursdai 0 +univers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +bart 0 +millerc 0 +systemssummarythi 0 +intend 0 +broad 0 +exposur 0 +advancedoper 0 +protect 0 +secur 0 +memori 0 +manag 0 +kernel 0 +file 0 +synchron 0 +name 0 +distribut 0 +pleas 0 +rest 0 +inform 0 +sheet 0 +carefulli 0 +textther 0 +realli 0 +satisfactori 0 +textbook 0 +graduat 0 +level 0 +operatingsystemsclass 0 +literatur 0 +text 0 +structur 0 +around 0 +journal 0 +articl 0 +andconfer 0 +proceed 0 +abl 0 +purchas 0 +doit 0 +handout 0 +relev 0 +willinstead 0 +adiscuss 0 +major 0 +theme 0 +us 0 +focal 0 +point 0 +classmat 0 +meetonc 0 +especi 0 +part 0 +listaccord 0 +schedul 0 +formula 0 +success 0 +papersindepend 0 +try 0 +identifyth 0 +issu 0 +thepap 0 +discussionsclass 0 +meet 0 +talk 0 +besupport 0 +opinion 0 +will 0 +activ 0 +daili 0 +geta 0 +expect 0 +quietli 0 +listen 0 +beveri 0 +unhappi 0 +papersdur 0 +short 0 +page 0 +andon 0 +longer 0 +paperwil 0 +base 0 +work 0 +understood 0 +facilityand 0 +extens 0 +area 0 +involv 0 +summaryof 0 +aselect 0 +topicsfrom 0 +choos 0 +good 0 +least 0 +refere 0 +fellowstud 0 +writer 0 +critic 0 +anoth 0 +person 0 +giveth 0 +reader 0 +look 0 +someon 0 +els 0 +revis 0 +pass 0 +examsther 0 +exam 0 +keep 0 +busi 0 +gradesscor 0 +assignmenti 0 +summari 0 +score 0 +fromth 0 +proposalsi 0 +also 0 +gradesar 0 +avail 0 +detailstim 0 +place 0 +csoffic 0 +hour 0 +noonlast 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..1b1a6e2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,146 @@ +comput 1 +fortran 0 +program 0 +solut 0 +click 0 +week 0 +question 0 +home 0 +cours 0 +microsoft 0 +scienc 0 +lectur 0 +bestor 0 +section 0 +cover 0 +student 0 +engin 0 +write 0 +solv 0 +problem 0 +exercis 0 +gareth 0 +us 0 +page 0 +basic 0 +structur 0 +elementari 0 +intend 0 +languag 0 +primarili 0 +instructor 0 +assign 0 +pointer 0 +interest 0 +pleas 0 +class 0 +vectra 0 +window 0 +also 0 +howev 0 +copi 0 +pagec 0 +introduct 0 +programmingsect 0 +credit 0 +need 0 +prepar 0 +prior 0 +experi 0 +requir 0 +knowledg 0 +assum 0 +materi 0 +enabl 0 +simpl 0 +done 0 +receiv 0 +littl 0 +instruct 0 +high 0 +school 0 +taught 0 +entir 0 +major 0 +descript 0 +menu 0 +import 0 +announc 0 +read 0 +grade 0 +polici 0 +syllabu 0 +text 0 +note 0 +psycholog 0 +march 0 +punctual 0 +avoid 0 +disturb 0 +offic 0 +overal 0 +gener 0 +code 0 +though 0 +want 0 +time 0 +algorithm 0 +even 0 +depend 0 +particular 0 +follow 0 +mondai 0 +subroutin 0 +function 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +seven 0 +dai 0 +except 0 +certain 0 +holidai 0 +printer 0 +room 0 +locat 0 +across 0 +hall 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +textbook 0 +work 0 +lab 0 +campu 0 +compil 0 +first 0 +softwar 0 +includ 0 +mail 0 +netscap 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +long 0 +night 0 +copyright 0 +wisc 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..23843e24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,265 @@ +week 1 +program 0 +exam 0 +question 0 +click 0 +solut 0 +comput 0 +assign 0 +problem 0 +grade 0 +fortran 0 +class 0 +solv 0 +lectur 0 +home 0 +final 0 +section 0 +mail 0 +languag 0 +scienc 0 +note 0 +offic 0 +term 0 +microsoft 0 +cours 0 +import 0 +text 0 +exercis 0 +pleas 0 +bestor 0 +complet 0 +hand 0 +cover 0 +hour 0 +us 0 +fridai 0 +consult 0 +skill 0 +read 0 +instructor 0 +polici 0 +gareth 0 +must 0 +receiv 0 +approxim 0 +copi 0 +includ 0 +good 0 +long 0 +even 0 +compil 0 +mondai 0 +help 0 +send 0 +write 0 +page 0 +primarili 0 +engin 0 +student 0 +syllabu 0 +pointer 0 +interest 0 +contribut 0 +elig 0 +pass 0 +curv 0 +april 0 +come 0 +abl 0 +work 0 +understand 0 +line 0 +well 0 +though 0 +need 0 +time 0 +seven 0 +error 0 +wednesdai 0 +answer 0 +login 0 +printer 0 +netscap 0 +gener 0 +except 0 +want 0 +first 0 +learn 0 +particular 0 +vectra 0 +window 0 +also 0 +howev 0 +pagec 0 +algebra 0 +programmingsect 0 +taught 0 +entir 0 +intend 0 +major 0 +descript 0 +menu 0 +announc 0 +psycholog 0 +punctual 0 +avoid 0 +disturb 0 +lowest 0 +score 0 +mean 0 +rang 0 +thur 0 +februari 0 +amclick 0 +list 0 +current 0 +identif 0 +tent 0 +follow 0 +topic 0 +semest 0 +relev 0 +anyth 0 +unsur 0 +instead 0 +wait 0 +try 0 +discov 0 +didn 0 +realli 0 +someth 0 +notestext 0 +applic 0 +edit 0 +koffman 0 +friedman 0 +avail 0 +substitut 0 +show 0 +overhead 0 +projector 0 +exampl 0 +addit 0 +board 0 +respons 0 +materi 0 +assignmentsther 0 +three 0 +constitut 0 +ensur 0 +regardless 0 +perform 0 +stai 0 +longer 0 +extra 0 +close 0 +book 0 +bring 0 +pencil 0 +calcul 0 +necessari 0 +attempt 0 +everi 0 +without 0 +automat 0 +zero 0 +risk 0 +fail 0 +gradesheet 0 +handin 0 +directori 0 +onlin 0 +late 0 +academ 0 +misconduct 0 +cheat 0 +specif 0 +pmhow 0 +wear 0 +name 0 +tag 0 +duti 0 +short 0 +messag 0 +syntax 0 +inform 0 +requir 0 +explan 0 +best 0 +normal 0 +dissert 0 +research 0 +modem 0 +therefor 0 +outsid 0 +make 0 +appoint 0 +easili 0 +contact 0 +regularli 0 +exerciseson 0 +distinguish 0 +programm 0 +doesn 0 +matter 0 +familiar 0 +techniqu 0 +weekli 0 +small 0 +trivial 0 +give 0 +look 0 +think 0 +right 0 +step 0 +would 0 +overal 0 +structur 0 +code 0 +algorithm 0 +depend 0 +subroutin 0 +function 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +dai 0 +certain 0 +holidai 0 +room 0 +locat 0 +across 0 +hall 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +textbook 0 +lab 0 +campu 0 +softwar 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +night 0 +copyright 0 +wisc 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..45982287 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,73 @@ +program 1 +page 0 +fall 0 +section 0 +dave 0 +burnett 0 +wisc 0 +offic 0 +updat 0 +home 0 +algebra 0 +languag 0 +name 0 +egglestonemail 0 +eduoffic 0 +phone 0 +hour 0 +announc 0 +note 0 +origin 0 +output 0 +prog 0 +error 0 +dai 0 +week 0 +correct 0 +valu 0 +inform 0 +exam 0 +question 0 +ask 0 +hourlywork 0 +classread 0 +scan 0 +thursdai 0 +class 0 +avail 0 +solut 0 +quiz 0 +grade 0 +gener 0 +cours 0 +informationc 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuswork 0 +homeclass 0 +handout 0 +gradeshomeworkexam 0 +quizzesmiscellan 0 +archivepolici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +policytextproblem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +eggleston 0 +base 0 +greg 0 +sharp 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..cfecd9e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,117 @@ +project 1 +system 0 +paper 0 +discuss 0 +assign 0 +implement 0 +lectur 0 +oper 0 +cours 0 +topic 0 +read 0 +research 0 +us 0 +first 0 +count 0 +spring 0 +involv 0 +propos 0 +class 0 +particip 0 +tuesdai 0 +text 0 +semest 0 +grade 0 +schedul 0 +slide 0 +advanc 0 +summari 0 +intend 0 +give 0 +broad 0 +exposur 0 +advancedoper 0 +import 0 +compon 0 +ofvari 0 +anexperiment 0 +cover 0 +topicsinclud 0 +synchron 0 +commun 0 +memori 0 +manag 0 +file 0 +protect 0 +secur 0 +distribut 0 +requir 0 +tochoos 0 +problem 0 +solut 0 +prototyp 0 +info 0 +relev 0 +current 0 +detail 0 +review 0 +rathera 0 +major 0 +theme 0 +focal 0 +point 0 +activ 0 +strongli 0 +encourag 0 +thursdai 0 +engin 0 +halloffic 0 +hour 0 +appoint 0 +comput 0 +scienc 0 +select 0 +classic 0 +design 0 +purchas 0 +doit 0 +formerli 0 +macc 0 +document 0 +deskfor 0 +differ 0 +previou 0 +pleas 0 +copi 0 +exam 0 +instead 0 +benchmark 0 +suit 0 +measur 0 +performanceof 0 +variou 0 +suno 0 +solari 0 +linux 0 +window 0 +manya 0 +hand 0 +second 0 +final 0 +report 0 +present 0 +total 0 +tent 0 +list 0 +suggest 0 +make 0 +well 0 +either 0 +case 0 +need 0 +come 0 +choos 0 +team 0 +peopl 0 +allow 0 +assig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..51560614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,45 @@ +lectur 1 +silva 0 +toni 0 +sidnei 0 +hummert 0 +cours 0 +comput 0 +program 0 +fall 0 +cover 0 +student 0 +materi 0 +fortran 0 +jeff 0 +lampert 0 +michael 0 +birk 0 +russel 0 +man 0 +introduct 0 +scienc 0 +credit 0 +design 0 +basic 0 +programmingstructur 0 +need 0 +prepar 0 +elementaryengin 0 +suffici 0 +enableth 0 +write 0 +simpl 0 +solv 0 +engin 0 +problem 0 +inelementari 0 +essenti 0 +first 0 +half 0 +list 0 +section 0 +martin 0 +reameslast 0 +modifi 0 +anthoni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..921958b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,96 @@ +offic 1 +hour 0 +phone 0 +wisc 0 +name 0 +email 0 +section 0 +grade 0 +site 0 +page 0 +kelli 0 +tsioli 0 +link 0 +home 0 +ratliff 0 +bockrath 0 +rehnuma 0 +ashraf 0 +geeri 0 +jyothi 0 +thano 0 +world 0 +list 0 +maintain 0 +fall 0 +midterm 0 +exam 0 +answer 0 +keyinstructorprofessor 0 +desautelsoffic 0 +comput 0 +sciencesoffic 0 +mondai 0 +wednesdai 0 +appoint 0 +dept 0 +mail 0 +teach 0 +assistantsfollow 0 +nathan 0 +rahman 0 +jaim 0 +fink 0 +jfink 0 +aboulnaga 0 +andrew 0 +jame 0 +herro 0 +jherro 0 +abhinav 0 +gupta 0 +agupta 0 +krothap 0 +chiang 0 +suhui 0 +gradesexplor 0 +compani 0 +whose 0 +softwar 0 +hardwar 0 +borland 0 +hewlett 0 +packard 0 +intel 0 +microsoft 0 +novel 0 +us 0 +explor 0 +lyco 0 +enorm 0 +databas 0 +yahoo 0 +internet 0 +resourc 0 +classifi 0 +categori 0 +lookup 0 +search 0 +virtual 0 +tourist 0 +find 0 +around 0 +click 0 +mother 0 +larg 0 +alphabet 0 +cool 0 +especi 0 +excel 0 +univers 0 +wisconsin 0 +madison 0 +origin 0 +creat 0 +teitelbaum 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..921958b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,96 @@ +offic 1 +hour 0 +phone 0 +wisc 0 +name 0 +email 0 +section 0 +grade 0 +site 0 +page 0 +kelli 0 +tsioli 0 +link 0 +home 0 +ratliff 0 +bockrath 0 +rehnuma 0 +ashraf 0 +geeri 0 +jyothi 0 +thano 0 +world 0 +list 0 +maintain 0 +fall 0 +midterm 0 +exam 0 +answer 0 +keyinstructorprofessor 0 +desautelsoffic 0 +comput 0 +sciencesoffic 0 +mondai 0 +wednesdai 0 +appoint 0 +dept 0 +mail 0 +teach 0 +assistantsfollow 0 +nathan 0 +rahman 0 +jaim 0 +fink 0 +jfink 0 +aboulnaga 0 +andrew 0 +jame 0 +herro 0 +jherro 0 +abhinav 0 +gupta 0 +agupta 0 +krothap 0 +chiang 0 +suhui 0 +gradesexplor 0 +compani 0 +whose 0 +softwar 0 +hardwar 0 +borland 0 +hewlett 0 +packard 0 +intel 0 +microsoft 0 +novel 0 +us 0 +explor 0 +lyco 0 +enorm 0 +databas 0 +yahoo 0 +internet 0 +resourc 0 +classifi 0 +categori 0 +lookup 0 +search 0 +virtual 0 +tourist 0 +find 0 +around 0 +click 0 +mother 0 +larg 0 +alphabet 0 +cool 0 +especi 0 +excel 0 +univers 0 +wisconsin 0 +madison 0 +origin 0 +creat 0 +teitelbaum 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..08a60f12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,169 @@ +comput 1 +lectur 1 +us 0 +section 0 +system 0 +class 0 +macintosh 0 +part 0 +discuss 0 +program 0 +word 0 +assign 0 +info 0 +salli 0 +mail 0 +wisc 0 +text 0 +click 0 +avail 0 +cours 0 +gener 0 +topic 0 +includ 0 +follow 0 +spreadsheet 0 +databas 0 +oper 0 +aldu 0 +background 0 +quizz 0 +exam 0 +syllabu 0 +computersinstructor 0 +instructor 0 +petersonoffic 0 +sciencephon 0 +slpeter 0 +facstaff 0 +eduoffic 0 +hour 0 +tuesdai 0 +thursdai 0 +appointmentvit 0 +time 0 +place 0 +held 0 +engin 0 +halllectur 0 +inform 0 +technolog 0 +societi 0 +laudon 0 +traver 0 +laudonlab 0 +point 0 +drag 0 +petersoncours 0 +introduct 0 +design 0 +take 0 +zero 0 +knowledg 0 +computersto 0 +crack 0 +shot 0 +user 0 +skill 0 +throughcolleg 0 +arena 0 +taught 0 +macintoshcomput 0 +csuse 0 +compon 0 +term 0 +scienc 0 +work 0 +necessarili 0 +order 0 +applic 0 +processor 0 +graphic 0 +hardwar 0 +input 0 +output 0 +storag 0 +devic 0 +languag 0 +network 0 +telecommun 0 +artifici 0 +intellig 0 +expert 0 +relat 0 +social 0 +issu 0 +laboratori 0 +hand 0 +experienceon 0 +iici 0 +process 0 +electron 0 +newsgroup 0 +world 0 +wide 0 +eudora 0 +netscap 0 +paint 0 +draw 0 +superpaint 0 +chart 0 +excel 0 +filemak 0 +present 0 +manag 0 +hypercard 0 +desktop 0 +publish 0 +pagemak 0 +integr 0 +learn 0 +well 0 +addit 0 +special 0 +tool 0 +scanner 0 +teach 0 +thegoal 0 +provid 0 +high 0 +qualiti 0 +instruct 0 +rich 0 +educationalexperi 0 +namesectiontimedai 0 +bodner 0 +mwnick 0 +leavi 0 +mwtrshannon 0 +lloyd 0 +trtrjeff 0 +reminga 0 +mwfmwira 0 +sharenow 0 +trtrbrian 0 +swander 0 +mwfmwfbrad 0 +thayer 0 +mwfmwfjoe 0 +varghes 0 +trtrgeoff 0 +weinberg 0 +mwftrmaria 0 +yuin 0 +mwfmwrecommend 0 +necessari 0 +grade 0 +base 0 +regular 0 +assignmentsand 0 +glanc 0 +contain 0 +nitti 0 +gritti 0 +detail 0 +superpaintassign 0 +excellast 0 +modifi 0 +octob 0 +jonbodn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..08a60f12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,169 @@ +comput 1 +lectur 1 +us 0 +section 0 +system 0 +class 0 +macintosh 0 +part 0 +discuss 0 +program 0 +word 0 +assign 0 +info 0 +salli 0 +mail 0 +wisc 0 +text 0 +click 0 +avail 0 +cours 0 +gener 0 +topic 0 +includ 0 +follow 0 +spreadsheet 0 +databas 0 +oper 0 +aldu 0 +background 0 +quizz 0 +exam 0 +syllabu 0 +computersinstructor 0 +instructor 0 +petersonoffic 0 +sciencephon 0 +slpeter 0 +facstaff 0 +eduoffic 0 +hour 0 +tuesdai 0 +thursdai 0 +appointmentvit 0 +time 0 +place 0 +held 0 +engin 0 +halllectur 0 +inform 0 +technolog 0 +societi 0 +laudon 0 +traver 0 +laudonlab 0 +point 0 +drag 0 +petersoncours 0 +introduct 0 +design 0 +take 0 +zero 0 +knowledg 0 +computersto 0 +crack 0 +shot 0 +user 0 +skill 0 +throughcolleg 0 +arena 0 +taught 0 +macintoshcomput 0 +csuse 0 +compon 0 +term 0 +scienc 0 +work 0 +necessarili 0 +order 0 +applic 0 +processor 0 +graphic 0 +hardwar 0 +input 0 +output 0 +storag 0 +devic 0 +languag 0 +network 0 +telecommun 0 +artifici 0 +intellig 0 +expert 0 +relat 0 +social 0 +issu 0 +laboratori 0 +hand 0 +experienceon 0 +iici 0 +process 0 +electron 0 +newsgroup 0 +world 0 +wide 0 +eudora 0 +netscap 0 +paint 0 +draw 0 +superpaint 0 +chart 0 +excel 0 +filemak 0 +present 0 +manag 0 +hypercard 0 +desktop 0 +publish 0 +pagemak 0 +integr 0 +learn 0 +well 0 +addit 0 +special 0 +tool 0 +scanner 0 +teach 0 +thegoal 0 +provid 0 +high 0 +qualiti 0 +instruct 0 +rich 0 +educationalexperi 0 +namesectiontimedai 0 +bodner 0 +mwnick 0 +leavi 0 +mwtrshannon 0 +lloyd 0 +trtrjeff 0 +reminga 0 +mwfmwira 0 +sharenow 0 +trtrbrian 0 +swander 0 +mwfmwfbrad 0 +thayer 0 +mwfmwfjoe 0 +varghes 0 +trtrgeoff 0 +weinberg 0 +mwftrmaria 0 +yuin 0 +mwfmwrecommend 0 +necessari 0 +grade 0 +base 0 +regular 0 +assignmentsand 0 +glanc 0 +contain 0 +nitti 0 +gritti 0 +detail 0 +superpaintassign 0 +excellast 0 +modifi 0 +octob 0 +jonbodn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..ce00ce84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,64 @@ +section 1 +skrentni 1 +window 0 +home 0 +languag 0 +coordin 0 +email 0 +cours 0 +consult 0 +introduct 0 +inform 0 +borland 0 +pagecomput 0 +scienc 0 +algebra 0 +program 0 +instructorsw 0 +would 0 +like 0 +comment 0 +suggest 0 +complaint 0 +feedback 0 +provid 0 +click 0 +offic 0 +csinform 0 +frequent 0 +ask 0 +question 0 +overview 0 +microcomput 0 +laboratori 0 +fall 0 +schedul 0 +tutor 0 +mainli 0 +polici 0 +academ 0 +misconduct 0 +offer 0 +depart 0 +softwar 0 +microsoft 0 +hint 0 +compil 0 +oper 0 +system 0 +netscap 0 +creat 0 +us 0 +subdirectoriesc 0 +savitch 0 +text 0 +book 0 +integr 0 +develop 0 +environmentfortran 0 +jeff 0 +lampert 0 +page 0 +last 0 +updat 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..0ab5fa36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,38 @@ +languag 1 +advanc 1 +mathemat 1 +cours 0 +infocours 0 +inform 0 +cscours 0 +descriptionfrom 0 +guidebook 0 +undergradu 0 +student 0 +construct 0 +algorithm 0 +problem 0 +solv 0 +instruct 0 +experi 0 +least 0 +procedur 0 +orient 0 +pascal 0 +fortran 0 +survei 0 +program 0 +techniqu 0 +prereq 0 +high 0 +school 0 +prepar 0 +colleg 0 +work 0 +statist 0 +logic 0 +consent 0 +instructor 0 +open 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..ce00ce84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,64 @@ +section 1 +skrentni 1 +window 0 +home 0 +languag 0 +coordin 0 +email 0 +cours 0 +consult 0 +introduct 0 +inform 0 +borland 0 +pagecomput 0 +scienc 0 +algebra 0 +program 0 +instructorsw 0 +would 0 +like 0 +comment 0 +suggest 0 +complaint 0 +feedback 0 +provid 0 +click 0 +offic 0 +csinform 0 +frequent 0 +ask 0 +question 0 +overview 0 +microcomput 0 +laboratori 0 +fall 0 +schedul 0 +tutor 0 +mainli 0 +polici 0 +academ 0 +misconduct 0 +offer 0 +depart 0 +softwar 0 +microsoft 0 +hint 0 +compil 0 +oper 0 +system 0 +netscap 0 +creat 0 +us 0 +subdirectoriesc 0 +savitch 0 +text 0 +book 0 +integr 0 +develop 0 +environmentfortran 0 +jeff 0 +lampert 0 +page 0 +last 0 +updat 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..aa9226d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,47 @@ +page 1 +inform 0 +includ 0 +check 0 +assign 0 +copi 0 +home 0 +comput 0 +document 0 +postscript 0 +local 0 +servic 0 +problem 0 +solv 0 +us 0 +fall 0 +scienc 0 +follow 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +suggest 0 +explan 0 +grade 0 +polici 0 +work 0 +examin 0 +past 0 +exam 0 +lab 0 +handout 0 +syllabu 0 +mani 0 +need 0 +viewer 0 +obtain 0 +site 0 +section 0 +depart 0 +ghost 0 +directori 0 +read 0 +readm 0 +file 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..aa9226d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,47 @@ +page 1 +inform 0 +includ 0 +check 0 +assign 0 +copi 0 +home 0 +comput 0 +document 0 +postscript 0 +local 0 +servic 0 +problem 0 +solv 0 +us 0 +fall 0 +scienc 0 +follow 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +suggest 0 +explan 0 +grade 0 +polici 0 +work 0 +examin 0 +past 0 +exam 0 +lab 0 +handout 0 +syllabu 0 +mani 0 +need 0 +viewer 0 +obtain 0 +site 0 +section 0 +depart 0 +ghost 0 +directori 0 +read 0 +readm 0 +file 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..13654122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,104 @@ +section 1 +chapter 0 +class 0 +assign 0 +exam 0 +quiz 0 +html 0 +fall 0 +program 0 +offic 0 +hour 0 +fridai 0 +jerri 0 +phone 0 +mail 0 +wisc 0 +karen 0 +homework 0 +cancel 0 +grade 0 +septemb 0 +novemb 0 +octob 0 +spring 0 +cours 0 +instructor 0 +handout 0 +solut 0 +simul 0 +help 0 +lectur 0 +note 0 +miller 0 +tuth 0 +wednesdai 0 +postscript 0 +syllabu 0 +answer 0 +data 0 +arithmet 0 +home 0 +page 0 +inform 0 +frequent 0 +ask 0 +question 0 +tusch 0 +tutsch 0 +execpc 0 +nolandsect 0 +smoler 0 +time 0 +psycholog 0 +sunlung 0 +suen 0 +ssuen 0 +edusridevi 0 +bhamidipati 0 +bsri 0 +edumohammad 0 +asgarian 0 +mondai 0 +schedul 0 +revis 0 +overview 0 +programs 0 +programm 0 +examsal 0 +quizz 0 +open 0 +book 0 +calcul 0 +probabl 0 +decemb 0 +last 0 +previou 0 +format 0 +summer 0 +midterm 0 +final 0 +lookup 0 +graphic 0 +interfac 0 +manual 0 +noteskaren 0 +number 0 +system 0 +represent 0 +integ 0 +float 0 +point 0 +structur 0 +regist 0 +procedur 0 +updat 0 +assembl 0 +updatedmondai 0 +except 0 +process 0 +featur 0 +perform 0 +architecur 0 +case 0 +studi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..e800ed96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,115 @@ +section 1 +chapter 0 +assign 0 +quiz 0 +class 0 +exam 0 +html 0 +program 0 +fall 0 +offic 0 +hour 0 +fridai 0 +jerri 0 +phone 0 +mail 0 +wisc 0 +homework 0 +grade 0 +karen 0 +cancel 0 +septemb 0 +novemb 0 +octob 0 +decemb 0 +final 0 +spring 0 +cours 0 +instructor 0 +handout 0 +solut 0 +simul 0 +help 0 +lectur 0 +note 0 +miller 0 +tuth 0 +wednesdai 0 +postscript 0 +syllabu 0 +answer 0 +data 0 +arithmet 0 +home 0 +page 0 +inform 0 +frequent 0 +ask 0 +question 0 +tusch 0 +tutsch 0 +execpc 0 +nolandsect 0 +smoler 0 +time 0 +psycholog 0 +sunlung 0 +suen 0 +ssuen 0 +edusridevi 0 +bhamidipati 0 +bsri 0 +edumohammad 0 +asgarian 0 +mondai 0 +schedul 0 +revis 0 +overview 0 +programs 0 +programm 0 +programa 0 +programb 0 +examsal 0 +quizz 0 +open 0 +book 0 +calcul 0 +probabl 0 +last 0 +option 0 +thursdai 0 +difficult 0 +cumul 0 +offer 0 +desperateto 0 +rais 0 +sign 0 +advanc 0 +previou 0 +format 0 +summer 0 +midterm 0 +lookup 0 +graphic 0 +interfac 0 +manual 0 +noteskaren 0 +number 0 +system 0 +represent 0 +integ 0 +float 0 +point 0 +structur 0 +regist 0 +procedur 0 +updat 0 +assembl 0 +updatedmondai 0 +except 0 +process 0 +featur 0 +perform 0 +architecur 0 +case 0 +studi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..1708b965 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,126 @@ +page 1 +lectur 0 +chapter 0 +sampl 0 +code 0 +comput 0 +onlin 0 +offic 0 +announc 0 +assign 0 +exam 0 +search 0 +list 0 +skrentni 0 +wisc 0 +inform 0 +get 0 +program 0 +place 0 +tree 0 +copi 0 +error 0 +basic 0 +scienc 0 +hour 0 +data 0 +read 0 +lab 0 +queue 0 +stack 0 +stale 0 +attend 0 +thur 0 +futur 0 +tabl 0 +skip 0 +sort 0 +algorithm 0 +structur 0 +lec 0 +introduct 0 +structureslectur 0 +psychologylectur 0 +psychologycours 0 +start 0 +help 0 +cours 0 +materi 0 +home 0 +gener 0 +recent 0 +first 0 +problem 0 +found 0 +locat 0 +binari 0 +last 0 +makeup 0 +done 0 +solut 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +topic 0 +handin 0 +directori 0 +creat 0 +common 0 +suggest 0 +addit 0 +welcom 0 +either 0 +vega 0 +work 0 +line 0 +mondai 0 +wednesdai 0 +magic 0 +number 0 +sourc 0 +file 0 +must 0 +abl 0 +compil 0 +otherwis 0 +unusu 0 +look 0 +forget 0 +reload 0 +updat 0 +browser 0 +cach 0 +becom 0 +outdat 0 +unix 0 +tutori 0 +need 0 +time 0 +balanc 0 +discuss 0 +comparison 0 +implement 0 +simul 0 +overload 0 +oper 0 +hash 0 +link 0 +pointer 0 +dynam 0 +memori 0 +alloc 0 +analysi 0 +recurs 0 +focu 0 +appendix 0 +teach 0 +assist 0 +baicheng 0 +billi 0 +liao 0 +bail 0 +cheng 0 +jiacheng 0 +pmcopyright 0 +jame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..0ce4f638 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,548 @@ +program 1 +assign 0 +cours 0 +lectur 0 +exam 0 +note 0 +comput 0 +inform 0 +data 0 +home 0 +help 0 +document 0 +scienc 0 +time 0 +comment 0 +wisc 0 +offic 0 +hour 0 +text 0 +unix 0 +avail 0 +compil 0 +must 0 +email 0 +yanni 0 +women 0 +also 0 +take 0 +problem 0 +includ 0 +follow 0 +dewitt 0 +semest 0 +read 0 +machin 0 +possibl 0 +code 0 +name 0 +address 0 +grade 0 +late 0 +style 0 +want 0 +student 0 +materi 0 +often 0 +account 0 +final 0 +gener 0 +class 0 +algorithm 0 +clariti 0 +outputfil 0 +tuesdai 0 +schedul 0 +intern 0 +us 0 +develop 0 +cycl 0 +section 0 +print 0 +group 0 +encourag 0 +major 0 +like 0 +extra 0 +mail 0 +true 0 +wall 0 +well 0 +cover 0 +first 0 +debug 0 +function 0 +done 0 +copi 0 +turn 0 +make 0 +chang 0 +avoid 0 +need 0 +understand 0 +work 0 +effici 0 +paramet 0 +limit 0 +identifi 0 +statement 0 +line 0 +continu 0 +variabl 0 +introduct 0 +page 0 +http 0 +languag 0 +cheat 0 +extern 0 +readi 0 +notat 0 +binari 0 +graduat 0 +call 0 +would 0 +suzan 0 +write 0 +thiscours 0 +pascal 0 +requir 0 +book 0 +mirror 0 +complet 0 +still 0 +addit 0 +doit 0 +desk 0 +near 0 +dayton 0 +build 0 +experi 0 +edit 0 +file 0 +find 0 +list 0 +topic 0 +provid 0 +later 0 +design 0 +room 0 +thec 0 +univers 0 +electron 0 +sure 0 +assum 0 +simpli 0 +start 0 +know 0 +earli 0 +explain 0 +correct 0 +behav 0 +situat 0 +test 0 +case 0 +modular 0 +necessari 0 +consist 0 +suggest 0 +valu 0 +indent 0 +long 0 +begin 0 +user 0 +give 0 +descript 0 +tell 0 +assumpt 0 +describ 0 +neg 0 +main 0 +header 0 +declar 0 +segment 0 +error 0 +inputfil 0 +structuresfal 0 +htmlinstructor 0 +ioannidi 0 +sciencesoffic 0 +thursdai 0 +amoffic 0 +phone 0 +html 0 +content 0 +new 0 +teach 0 +assist 0 +polici 0 +newsassign 0 +midterm 0 +statisticssom 0 +interest 0 +statist 0 +median 0 +mean 0 +midterma 0 +sampl 0 +oldmidterm 0 +prepar 0 +ownmidterm 0 +searchth 0 +search 0 +either 0 +open 0 +filemenu 0 +ghostview 0 +window 0 +show 0 +andchoos 0 +menu 0 +item 0 +sciencesom 0 +femal 0 +faculti 0 +undergradu 0 +haveform 0 +wic 0 +oneof 0 +goal 0 +becomecomput 0 +thisclass 0 +talk 0 +someon 0 +incomput 0 +studi 0 +withtheir 0 +classwork 0 +computersci 0 +grad 0 +tomak 0 +appoint 0 +stodder 0 +eduand 0 +grow 0 +tremend 0 +field 0 +theodd 0 +ever 0 +anoth 0 +end 0 +abl 0 +statementi 0 +wide 0 +aniniti 0 +startup 0 +period 0 +product 0 +exceptionsy 0 +textth 0 +isdata 0 +abstract 0 +solv 0 +frank 0 +carrano 0 +isbn 0 +written 0 +separ 0 +notnecessari 0 +alwai 0 +fall 0 +david 0 +actual 0 +consider 0 +simpl 0 +lecturenot 0 +short 0 +isveri 0 +littl 0 +narr 0 +exercis 0 +recommend 0 +sourc 0 +purchas 0 +whichar 0 +street 0 +entranceof 0 +needsom 0 +activ 0 +log 0 +creat 0 +manipul 0 +run 0 +handoutc 0 +notesar 0 +contain 0 +invalu 0 +mention 0 +althoughi 0 +supplement 0 +handout 0 +courseof 0 +nonetheless 0 +respons 0 +base 0 +onth 0 +andth 0 +gradingther 0 +even 0 +five 0 +determin 0 +approxim 0 +equal 0 +weight 0 +programmingassign 0 +count 0 +octob 0 +chemistri 0 +wednesdai 0 +decemb 0 +place 0 +detail 0 +administr 0 +familiar 0 +basic 0 +stuff 0 +apoint 0 +record 0 +equival 0 +madison 0 +prerequisitecours 0 +thesear 0 +floor 0 +prefer 0 +certainrestrict 0 +emailand 0 +thatyou 0 +youwork 0 +provis 0 +download 0 +toyour 0 +runwith 0 +sparcstat 0 +notifi 0 +inassign 0 +hint 0 +allelectron 0 +send 0 +policyno 0 +accept 0 +exactli 0 +order 0 +caus 0 +load 0 +coincid 0 +duedat 0 +sever 0 +right 0 +awai 0 +oneach 0 +thing 0 +certain 0 +wrong 0 +wait 0 +thelast 0 +minut 0 +except 0 +approv 0 +good 0 +excus 0 +troubl 0 +soon 0 +cheatingth 0 +depart 0 +hard 0 +linest 0 +welcom 0 +tocommun 0 +datastructur 0 +butther 0 +share 0 +expect 0 +learn 0 +obei 0 +thecomput 0 +system 0 +policiesgovern 0 +helpif 0 +pleas 0 +policiesif 0 +best 0 +tovisit 0 +along 0 +currenthard 0 +intend 0 +conceptsthat 0 +present 0 +confus 0 +answer 0 +specif 0 +question 0 +reliabl 0 +contact 0 +respond 0 +emailsever 0 +daili 0 +almost 0 +everi 0 +week 0 +gradingprogram 0 +criteria 0 +correctli 0 +normal 0 +typicalinput 0 +state 0 +projectspecif 0 +easi 0 +informationabout 0 +robust 0 +behavior 0 +extrem 0 +unusu 0 +handl 0 +reason 0 +andlog 0 +manner 0 +blow 0 +qualiti 0 +shoulddemonstr 0 +facet 0 +capabl 0 +includingunusu 0 +unnecessarili 0 +ineffici 0 +construct 0 +howev 0 +never 0 +pursu 0 +expens 0 +effect 0 +useof 0 +incorpor 0 +sort 0 +paper 0 +subject 0 +considerationof 0 +arbitrari 0 +bound 0 +size 0 +orcomplex 0 +input 0 +whenev 0 +express 0 +definedconst 0 +easili 0 +numer 0 +liter 0 +appear 0 +thosevalu 0 +styleus 0 +meaning 0 +scheme 0 +convent 0 +variable_nam 0 +function_nam 0 +argument 0 +const 0 +defined_const 0 +enum 0 +enumtyp 0 +classnam 0 +multipl 0 +singl 0 +skip 0 +clear 0 +notesfor 0 +loop 0 +label 0 +meaningfulli 0 +documentationthi 0 +yourprogram 0 +typic 0 +someonewho 0 +superfici 0 +full 0 +format 0 +bug 0 +special 0 +featur 0 +made 0 +posit 0 +aspect 0 +unawar 0 +descriptionne 0 +repeat 0 +briefli 0 +summar 0 +point 0 +refer 0 +thensuffici 0 +appli 0 +documentationther 0 +four 0 +type 0 +structuresshould 0 +purpos 0 +outlin 0 +next 0 +membershould 0 +convei 0 +sname 0 +much 0 +withoutmak 0 +suppli 0 +exampl 0 +index 0 +last 0 +element 0 +ad 0 +stackyou 0 +local 0 +within 0 +tricki 0 +opaqu 0 +beavoid 0 +sometim 0 +commentcan 0 +reader 0 +go 0 +clarifi 0 +level 0 +outlineof 0 +vimani 0 +peopl 0 +thefirst 0 +becomecomfort 0 +particularli 0 +youronli 0 +previou 0 +macpasc 0 +macintosh 0 +strongli 0 +urg 0 +inth 0 +becom 0 +comfort 0 +withunix 0 +pain 0 +wellspent 0 +wish 0 +attend 0 +tutori 0 +held 0 +comp 0 +session 0 +thefollow 0 +dai 0 +tbayou 0 +pick 0 +environ 0 +look 0 +output 0 +break 0 +tire 0 +goto 0 +quit 0 +result 0 +submiss 0 +instruct 0 +given 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..f0196ce0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,284 @@ +assign 1 +program 1 +lectur 0 +data 0 +cours 0 +exam 0 +structur 0 +note 0 +text 0 +wisc 0 +cover 0 +also 0 +inform 0 +us 0 +materi 0 +languag 0 +comput 0 +tuesdai 0 +abstract 0 +write 0 +chin 0 +section 0 +grade 0 +follow 0 +even 0 +http 0 +html 0 +fall 0 +offic 0 +hour 0 +teach 0 +cchin 0 +page 0 +book 0 +often 0 +dewitt 0 +electron 0 +final 0 +line 0 +introduct 0 +jame 0 +scienc 0 +laru 0 +fridai 0 +present 0 +concept 0 +detail 0 +build 0 +necessari 0 +determin 0 +requir 0 +type 0 +principl 0 +maintain 0 +place 0 +exercis 0 +tang 0 +sciencesoffic 0 +wednesdai 0 +thursdai 0 +phone 0 +email 0 +address 0 +written 0 +littl 0 +simpl 0 +contain 0 +addit 0 +avail 0 +doit 0 +desk 0 +dayton 0 +first 0 +experi 0 +regularli 0 +read 0 +mail 0 +schedul 0 +topic 0 +second 0 +revis 0 +larusinstructor 0 +laruslaru 0 +amcontentsteach 0 +assistantstextlectur 0 +informationelectron 0 +mailth 0 +languagegradingexamscours 0 +scheduleassign 0 +assignmentscours 0 +objectivesc 0 +object 0 +gener 0 +wide 0 +fundament 0 +block 0 +abl 0 +identifi 0 +situat 0 +select 0 +appropri 0 +reiter 0 +modular 0 +introduc 0 +essenti 0 +clear 0 +correct 0 +softwar 0 +close 0 +connect 0 +strong 0 +emphasi 0 +appli 0 +assistantswei 0 +zhang 0 +assist 0 +forthi 0 +homework 0 +assignmentsand 0 +happi 0 +answer 0 +question 0 +theassign 0 +aspect 0 +give 0 +troubl 0 +zhangoffic 0 +compuer 0 +sundai 0 +weiz 0 +mondai 0 +amoffic 0 +home 0 +textth 0 +problem 0 +solv 0 +wall 0 +mirror 0 +frank 0 +carrano 0 +isbn 0 +well 0 +long 0 +wind 0 +includ 0 +background 0 +separ 0 +alwai 0 +david 0 +complet 0 +short 0 +true 0 +narr 0 +basi 0 +feel 0 +free 0 +skip 0 +portion 0 +want 0 +purchas 0 +document 0 +street 0 +entranc 0 +unix 0 +need 0 +activ 0 +account 0 +log 0 +creat 0 +edit 0 +manipul 0 +file 0 +compil 0 +run 0 +debug 0 +handout 0 +crucial 0 +help 0 +psycholog 0 +mention 0 +attend 0 +strongli 0 +recommend 0 +appear 0 +textbook 0 +needless 0 +respons 0 +base 0 +maili 0 +notifi 0 +student 0 +chang 0 +hint 0 +assum 0 +gradingther 0 +semest 0 +five 0 +approxim 0 +equal 0 +weight 0 +count 0 +taught 0 +must 0 +know 0 +skrentni 0 +larg 0 +complex 0 +unless 0 +difficult 0 +learn 0 +anoth 0 +gdbthere 0 +describ 0 +debugg 0 +chemistri 0 +decemb 0 +rough 0 +outlin 0 +provid 0 +later 0 +administrationbas 0 +stuff 0 +function 0 +pointer 0 +record 0 +dynam 0 +storagelectur 0 +list 0 +binari 0 +search 0 +notat 0 +advanc 0 +listslectur 0 +stackslectur 0 +queueslectur 0 +hashinglectur 0 +recursionlectur 0 +treesbinari 0 +tree 0 +sort 0 +searchlectur 0 +treesgraphslectur 0 +sortinglectur 0 +tbaassign 0 +absolut 0 +turn 0 +index 0 +card 0 +name 0 +login 0 +nameyear 0 +school 0 +freshman 0 +sophomor 0 +previou 0 +coursesprevi 0 +experiencerec 0 +photograph 0 +pictur 0 +birthdai 0 +girl 0 +scout 0 +trip 0 +summer 0 +color 0 +black 0 +white 0 +size 0 +given 0 +without 0 +photo 0 +byte 0 +fora 0 +bound 0 +integ 0 +sequenc 0 +databaseof 0 +score 0 +tenni 0 +tournament 0 +produc 0 +aconcord 0 +hash 0 +tabl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..227c4b55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,121 @@ +page 1 +lectur 0 +chapter 0 +sampl 0 +comput 0 +offic 0 +announc 0 +assign 0 +exam 0 +code 0 +onlin 0 +list 0 +skrentni 0 +wisc 0 +inform 0 +get 0 +program 0 +place 0 +copi 0 +error 0 +basic 0 +scienc 0 +hour 0 +data 0 +read 0 +lab 0 +queue 0 +stack 0 +stale 0 +attend 0 +thur 0 +skip 0 +sort 0 +search 0 +algorithm 0 +structur 0 +lec 0 +introduct 0 +structureslectur 0 +psychologylectur 0 +psychologycours 0 +start 0 +help 0 +cours 0 +materi 0 +home 0 +gener 0 +recent 0 +first 0 +problem 0 +found 0 +locat 0 +last 0 +makeup 0 +done 0 +solut 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +topic 0 +handin 0 +directori 0 +creat 0 +common 0 +suggest 0 +addit 0 +welcom 0 +either 0 +vega 0 +work 0 +line 0 +mondai 0 +wednesdai 0 +magic 0 +number 0 +sourc 0 +file 0 +must 0 +abl 0 +compil 0 +otherwis 0 +unusu 0 +look 0 +forget 0 +reload 0 +updat 0 +browser 0 +cach 0 +becom 0 +outdat 0 +unix 0 +tutori 0 +need 0 +time 0 +futur 0 +tree 0 +simul 0 +overload 0 +oper 0 +hash 0 +tabl 0 +link 0 +pointer 0 +dynam 0 +memori 0 +alloc 0 +analysi 0 +recurs 0 +focu 0 +appendix 0 +teach 0 +assist 0 +baicheng 0 +billi 0 +liao 0 +bail 0 +cheng 0 +jiacheng 0 +pmcopyright 0 +jame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..11d3d475 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,55 @@ +comput 1 +offic 1 +page 1 +introduct 0 +scienc 0 +lectur 0 +home 0 +theoret 0 +brian 0 +cole 0 +email 0 +wisc 0 +hour 0 +sundaram 0 +assign 0 +examin 0 +madison 0 +fall 0 +room 0 +mondai 0 +fridai 0 +teach 0 +assist 0 +david 0 +stukel 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +text 0 +languag 0 +theori 0 +john 0 +martin 0 +north 0 +dakota 0 +state 0 +univers 0 +mcgraw 0 +hill 0 +isbn 0 +tent 0 +schedul 0 +includ 0 +exam 0 +inform 0 +clarif 0 +grade 0 +polici 0 +written 0 +term 0 +final 0 +archiv 0 +mail 0 +list 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..38e0cca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,72 @@ +homework 1 +exam 0 +midterm 0 +octob 0 +stat 0 +final 0 +offic 0 +cours 0 +septemb 0 +march 0 +solut 0 +book 0 +decemb 0 +program 0 +novemb 0 +sampl 0 +also 0 +math 0 +fall 0 +linear 0 +mechan 0 +engin 0 +open 0 +time 0 +date 0 +locat 0 +wednesdai 0 +olvi 0 +mangasarian 0 +comp 0 +mail 0 +wisc 0 +hour 0 +semest 0 +matlab 0 +inform 0 +page 0 +site 0 +programmingfal 0 +schedul 0 +lectur 0 +thursdai 0 +instructor 0 +pphone 0 +teach 0 +assist 0 +telephon 0 +textbook 0 +ferri 0 +preliminari 0 +version 0 +doit 0 +madison 0 +syllabu 0 +overview 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +setup 0 +project 0 +mathemat 0 +home 0 +relev 0 +searchabl 0 +bibliograph 0 +databas 0 +item 0 +link 0 +variou 0 +updat 0 +period 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..1f4b1b20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,68 @@ +offic 1 +program 0 +lectur 0 +horwitz 0 +telephon 0 +mail 0 +wisc 0 +hour 0 +appoint 0 +rahul 0 +compil 0 +tool 0 +cours 0 +inform 0 +assign 0 +grade 0 +fall 0 +introduct 0 +languag 0 +compilersspr 0 +stori 0 +month 0 +octob 0 +schedul 0 +tuth 0 +comp 0 +stat 0 +recit 0 +psycholog 0 +instructor 0 +susan 0 +tuesdai 0 +fridai 0 +teach 0 +assist 0 +kapoor 0 +mondai 0 +wednesdai 0 +text 0 +reserv 0 +wendt 0 +librari 0 +principl 0 +techniqu 0 +sethi 0 +ullman 0 +craft 0 +fischer 0 +leblanc 0 +check 0 +regularli 0 +gener 0 +overview 0 +date 0 +exam 0 +includ 0 +late 0 +polici 0 +get 0 +start 0 +read 0 +homework 0 +examin 0 +note 0 +us 0 +email 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..5d4c697f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,526 @@ +java 1 +project 0 +sept 0 +avail 0 +file 0 +program 0 +schedul 0 +languag 0 +introduct 0 +system 0 +comput 0 +specif 0 +implement 0 +process 0 +midterm 0 +exam 0 +deadlock 0 +section 0 +assign 0 +lectur 0 +manag 0 +memori 0 +oper 0 +grade 0 +note 0 +class 0 +page 0 +first 0 +public 0 +cours 0 +synchron 0 +solomon 0 +room 0 +wisc 0 +summari 0 +correct 0 +take 0 +time 0 +differ 0 +semest 0 +discuss 0 +thank 0 +point 0 +threadschedul 0 +start 0 +directori 0 +fork 0 +string 0 +messag 0 +final 0 +disk 0 +refer 0 +us 0 +chapter 0 +updat 0 +detail 0 +also 0 +forproject 0 +avaiabl 0 +avoid 0 +call 0 +place 0 +text 0 +topic 0 +quit 0 +hand 0 +algorithm 0 +theproject 0 +creat 0 +three 0 +graph 0 +graphcontain 0 +number 0 +onlin 0 +student 0 +receiv 0 +makefil 0 +help 0 +seminar 0 +unix 0 +follow 0 +processor 0 +wednesdai 0 +collect 0 +late 0 +dai 0 +monitor 0 +count 0 +fall 0 +offic 0 +sciencesoffic 0 +hour 0 +phone 0 +email 0 +address 0 +new 0 +answer 0 +thursdai 0 +error 0 +other 0 +popular 0 +cover 0 +comp 0 +octob 0 +issu 0 +contain 0 +fix 0 +import 0 +caus 0 +second 0 +sampl 0 +code 0 +sched 0 +later 0 +data 0 +read 0 +peterson 0 +initi 0 +placement 0 +star 0 +philosoph 0 +todd 0 +thejava 0 +tutori 0 +argument 0 +substr 0 +work 0 +sourc 0 +type 0 +make 0 +run 0 +solari 0 +finish 0 +structur 0 +meet 0 +mondai 0 +book 0 +bookstor 0 +begin 0 +depart 0 +workstat 0 +must 0 +cshrc 0 +local 0 +path 0 +back 0 +thur 0 +inform 0 +relat 0 +softwar 0 +prevent 0 +recoveri 0 +main 0 +virtual 0 +swap 0 +devic 0 +protect 0 +secur 0 +tuesdai 0 +includ 0 +thejavaprogram 0 +requir 0 +strongli 0 +languagebi 0 +arnold 0 +gosl 0 +addit 0 +access 0 +scienc 0 +becom 0 +familiar 0 +environ 0 +pair 0 +anyon 0 +caught 0 +garbag 0 +histori 0 +resourc 0 +get 0 +view 0 +semaphor 0 +systemssect 0 +instructormarvin 0 +troffic 0 +tarob 0 +mellencamp 0 +mwfoffic 0 +mellen 0 +watch 0 +space 0 +latest 0 +score 0 +breakdown 0 +distributioni 0 +date 0 +move 0 +typograph 0 +importantli 0 +arraywa 0 +demand 0 +isavail 0 +look 0 +warn 0 +exampl 0 +larg 0 +grain 0 +salt 0 +long 0 +courseus 0 +order 0 +likelyb 0 +determin 0 +presentedin 0 +electron 0 +direct 0 +forprogram 0 +post 0 +procedur 0 +givefork 0 +specificationshould 0 +notifi 0 +show 0 +bug 0 +minor 0 +paragraph 0 +slightli 0 +garbl 0 +jake 0 +dawlei 0 +carr 0 +line 0 +omit 0 +theprogram 0 +detailssect 0 +specifi 0 +correctli 0 +lipe 0 +zhang 0 +test 0 +srccontain 0 +javacontain 0 +definit 0 +classgraphdescrib 0 +petersoncycl 0 +shown 0 +mention 0 +notacycl 0 +petersonacycl 0 +acycl 0 +topolog 0 +central 0 +sharingfork 0 +nine 0 +jenner 0 +typo 0 +maxthink 0 +replac 0 +maxeat 0 +versionha 0 +mistak 0 +version 0 +argumenti 0 +offset 0 +charactersin 0 +franco 0 +tung 0 +chan 0 +occasion 0 +send 0 +urgent 0 +directli 0 +mail 0 +listof 0 +regist 0 +archiv 0 +sent 0 +list 0 +think 0 +sendmail 0 +request 0 +copi 0 +rememb 0 +separ 0 +edit 0 +describ 0 +comment 0 +maketo 0 +compil 0 +compilewithout 0 +simul 0 +preemptiv 0 +multitask 0 +computershav 0 +ad 0 +tutoriali 0 +go 0 +onthread 0 +find 0 +hint 0 +awar 0 +weekli 0 +systemsand 0 +network 0 +checkth 0 +colloquia 0 +theunivers 0 +ajava 0 +afil 0 +name 0 +home 0 +onelin 0 +chang 0 +effect 0 +either 0 +localor 0 +simpli 0 +readi 0 +orient 0 +session 0 +user 0 +forth 0 +tue 0 +csmon 0 +cslast 0 +content 0 +intend 0 +gener 0 +techniqu 0 +usedto 0 +kind 0 +among 0 +beprocess 0 +creation 0 +commun 0 +segment 0 +replacementalgorithm 0 +control 0 +input 0 +output 0 +sciencesand 0 +statisticsdiscuss 0 +psychologyth 0 +option 0 +least 0 +primari 0 +focu 0 +anyquest 0 +regard 0 +rais 0 +thetext 0 +modern 0 +systemsbi 0 +andrew 0 +tanenbaum 0 +prentic 0 +hall 0 +recommend 0 +jame 0 +addison 0 +weslei 0 +lot 0 +materi 0 +fast 0 +tutorialth 0 +specificationjava 0 +documentationwatch 0 +spot 0 +link 0 +five 0 +sparcstat 0 +dialect 0 +unixoper 0 +provid 0 +anycomput 0 +programminglanguag 0 +howev 0 +respons 0 +transfer 0 +requireddata 0 +set 0 +packag 0 +easi 0 +acquaint 0 +exercis 0 +designedto 0 +subsequ 0 +involveprocess 0 +member 0 +feel 0 +free 0 +butyou 0 +share 0 +partner 0 +cheat 0 +vigor 0 +punish 0 +enough 0 +said 0 +dateind 0 +entir 0 +havethre 0 +daysof 0 +credit 0 +eachof 0 +last 0 +choos 0 +sever 0 +favor 0 +congeni 0 +runtim 0 +subscript 0 +null 0 +pointer 0 +uniniti 0 +variabl 0 +except 0 +runtimerath 0 +mysteri 0 +crash 0 +random 0 +behavior 0 +much 0 +easier 0 +char 0 +arrai 0 +storag 0 +extrem 0 +handi 0 +trendi 0 +faster 0 +mani 0 +reason 0 +grow 0 +littl 0 +withth 0 +byproduct 0 +coursewil 0 +knowledg 0 +market 0 +commod 0 +featur 0 +built 0 +particular 0 +wide 0 +withlanguag 0 +level 0 +support 0 +concurr 0 +thread 0 +switch 0 +alwaysa 0 +disloc 0 +fortun 0 +excel 0 +eas 0 +thetransit 0 +amazingli 0 +good 0 +neither 0 +introductori 0 +primer 0 +author 0 +assum 0 +youalreadi 0 +know 0 +manual 0 +although 0 +arefer 0 +manuali 0 +readabl 0 +wayfrom 0 +everyth 0 +need 0 +write 0 +sophisticatedprogram 0 +univers 0 +encourag 0 +gather 0 +varieti 0 +ofoth 0 +togeth 0 +niceonlin 0 +tutorialabout 0 +anda 0 +manualfor 0 +standard 0 +librari 0 +even 0 +statist 0 +timet 0 +decemb 0 +yourgrad 0 +remain 0 +four 0 +tent 0 +check 0 +frequent 0 +learn 0 +javaoct 0 +synchronizationoct 0 +schedulingoct 0 +schedulingdec 0 +systemsdec 0 +bottom 0 +outlin 0 +programm 0 +state 0 +race 0 +condit 0 +bound 0 +buffer 0 +problem 0 +dine 0 +terminolog 0 +detect 0 +critic 0 +short 0 +term 0 +alloc 0 +compact 0 +come 0 +eduthu 0 +copyright 0 +marvin 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..b17e92e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,88 @@ +system 1 +thread 0 +home 0 +introduct 0 +thursdai 0 +discuss 0 +fridai 0 +page 0 +concurr 0 +address 0 +manag 0 +memori 0 +protect 0 +file 0 +java 0 +distribut 0 +fall 0 +pagec 0 +oper 0 +systemsfal 0 +tuesdai 0 +host 0 +maryvernon 0 +instructor 0 +andkarunamuthiah 0 +welcom 0 +note 0 +lectur 0 +beinterchang 0 +follow 0 +date 0 +solut 0 +quiz 0 +assign 0 +offic 0 +hour 0 +email 0 +textbook 0 +read 0 +grade 0 +project 0 +quizz 0 +mail 0 +archiveapproxim 0 +schedul 0 +topicsweek 0 +oftopicsreadingsep 0 +space 0 +processeschapt 0 +cooper 0 +threadschapt 0 +synchron 0 +implement 0 +mutual 0 +exclusioncont 0 +semaphorescont 0 +monitor 0 +summarycont 0 +doct 0 +deadlock 0 +process 0 +schedulingchapt 0 +translat 0 +cach 0 +tlbschapter 0 +demand 0 +virtual 0 +memorycont 0 +review 0 +survei 0 +systemschapt 0 +name 0 +directorieschapt 0 +object 0 +core 0 +methodstbanov 0 +secur 0 +thanksgiv 0 +class 0 +network 0 +remot 0 +procedur 0 +call 0 +chapter 0 +global 0 +reviewchapt 0 +vernon 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..cde1581c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,60 @@ +assign 1 +midterm 0 +spring 0 +project 0 +fall 0 +offic 0 +hour 0 +exam 0 +tuesdai 0 +answer 0 +cours 0 +inform 0 +select 0 +solut 0 +instructor 0 +handout 0 +david 0 +wednesdai 0 +appoint 0 +mail 0 +wisc 0 +phone 0 +time 0 +thursdai 0 +atkinson 0 +get 0 +start 0 +descript 0 +vhdl 0 +room 0 +lectur 0 +home 0 +page 0 +grade 0 +wood 0 +class 0 +locat 0 +phil 0 +help 0 +mentor 0 +error 0 +check 0 +correct 0 +sampl 0 +code 0 +compil 0 +simul 0 +mentorassign 0 +question 0 +projectthi 0 +section 0 +includ 0 +deadlin 0 +report 0 +demonstr 0 +decemb 0 +examsth 0 +final 0 +previou 0 +endterm 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..a838160d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,80 @@ +assign 1 +postscript 0 +offic 0 +last 0 +handout 0 +hour 0 +info 0 +updat 0 +mail 0 +databas 0 +manag 0 +system 0 +design 0 +inform 0 +class 0 +fridai 0 +time 0 +solut 0 +chapter 0 +exercis 0 +grade 0 +import 0 +html 0 +year 0 +midterm 0 +sybas 0 +tree 0 +raghu 0 +phone 0 +lectur 0 +xbao 0 +implementationc 0 +implementationcours 0 +version 0 +instead 0 +chang 0 +text 0 +list 0 +pleas 0 +dont 0 +print 0 +first 0 +overview 0 +prerequisit 0 +topic 0 +cover 0 +date 0 +polici 0 +issu 0 +minibas 0 +home 0 +page 0 +check 0 +detail 0 +sampl 0 +us 0 +help 0 +yahoo 0 +entri 0 +resourc 0 +tutori 0 +debugg 0 +languag 0 +construct 0 +experi 0 +assignmentoth 0 +code 0 +convent 0 +instructor 0 +ramakrishnan 0 +discuss 0 +place 0 +ingraham 0 +teach 0 +assist 0 +xuemei 0 +tue 0 +thur 0 +modifi 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..6a7a8e10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,47 @@ +lectur 1 +page 0 +discuss 0 +home 0 +know 0 +russel 0 +option 0 +psycholog 0 +time 0 +place 0 +inform 0 +assign 0 +welcom 0 +obvious 0 +construct 0 +semest 0 +progress 0 +addinginform 0 +need 0 +import 0 +thing 0 +class 0 +meetingroom 0 +chang 0 +current 0 +meet 0 +labsfor 0 +fridai 0 +beenmov 0 +still 0 +instructor 0 +jeff 0 +naughton 0 +offic 0 +wednesdai 0 +lab 0 +taught 0 +close 0 +cooper 0 +fact 0 +probabl 0 +exam 0 +gener 0 +minibas 0 +particular 0 +pleas 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..fc9a5eda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,33 @@ +homework 1 +cours 0 +mail 0 +offic 0 +phone 0 +wisc 0 +hour 0 +introduct 0 +algorithm 0 +bach 0 +teach 0 +assist 0 +raji 0 +inform 0 +instructor 0 +eric 0 +appt 0 +bill 0 +donaldson 0 +gopalakrishnan 0 +midterm 0 +exam 0 +handout 0 +descript 0 +syllabu 0 +book 0 +reserv 0 +organ 0 +solut 0 +graph 0 +fractal 0 +behaviour 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..70e28ebf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,112 @@ +network 1 +comput 0 +cours 0 +email 0 +wisc 0 +fall 0 +project 0 +assign 0 +program 0 +postscript 0 +teach 0 +assist 0 +mail 0 +archiv 0 +refer 0 +eduoffic 0 +hour 0 +slide 0 +midterm 0 +advanc 0 +syllabu 0 +version 0 +comp 0 +phone 0 +time 0 +conveni 0 +feel 0 +free 0 +appoint 0 +layer 0 +powerpoint 0 +document 0 +form 0 +exam 0 +socket 0 +interfac 0 +lectur 0 +professor 0 +landweb 0 +introduct 0 +networksintroduct 0 +tabl 0 +content 0 +intern 0 +connect 0 +offer 0 +inform 0 +instructor 0 +select 0 +readingsclick 0 +hereto 0 +latest 0 +text 0 +networkingcours 0 +madisoncours 0 +informationlecturetim 0 +mwfplace 0 +statclass 0 +listinstructor 0 +lawrenc 0 +landweberoffic 0 +statphon 0 +srinivasa 0 +narayananoffic 0 +mondai 0 +wednesdai 0 +teitelbaumoffic 0 +naemail 0 +tuesdai 0 +thursdai 0 +moder 0 +complet 0 +error 0 +warn 0 +code 0 +class 0 +implement 0 +reliabl 0 +adapt 0 +handout 0 +overview 0 +softwar 0 +engin 0 +design 0 +evalu 0 +html 0 +pictur 0 +grade 0 +criteria 0 +gradingmidterm 0 +final 0 +term 0 +prior 0 +option 0 +book 0 +unix 0 +steven 0 +richard 0 +prentic 0 +hall 0 +isbn 0 +garbler 0 +packag 0 +annot 0 +bibliographyread 0 +partial 0 +icmp 0 +ospf 0 +ipng 0 +schedul 0 +spring 0 +review 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..e3bf7c64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,21 @@ +homework 1 +fall 0 +solut 0 +note 0 +comput 0 +project 0 +part 0 +home 0 +page 0 +instructor 0 +robert 0 +meyer 0 +wisc 0 +time 0 +place 0 +comp 0 +offic 0 +hour 0 +cours 0 +descript 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..87a76852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,92 @@ +program 1 +homework 1 +nonlinear 0 +cours 0 +wisc 0 +offic 0 +convex 0 +optim 0 +differenti 0 +assign 0 +grade 0 +mail 0 +function 0 +criteria 0 +theori 0 +ferri 0 +telephon 0 +hour 0 +wednesdai 0 +text 0 +second 0 +gener 0 +concav 0 +order 0 +examin 0 +page 0 +fall 0 +also 0 +math 0 +stat 0 +applicationsfal 0 +schedul 0 +lectur 0 +list 0 +http 0 +instructor 0 +michael 0 +mondai 0 +tuesdai 0 +teach 0 +assist 0 +thursdai 0 +class 0 +olvi 0 +mangasarian 0 +siam 0 +publish 0 +philadelphia 0 +us 0 +algorithm 0 +bazaraa 0 +sherali 0 +shetti 0 +edit 0 +wilei 0 +york 0 +bertseka 0 +athena 0 +scientif 0 +inform 0 +overview 0 +introduct 0 +linear 0 +inequ 0 +theorem 0 +altern 0 +set 0 +saddlepoint 0 +without 0 +first 0 +dualiti 0 +condit 0 +exact 0 +penalti 0 +augment 0 +lagrangian 0 +gradient 0 +project 0 +book 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +week 0 +midterm 0 +novemb 0 +final 0 +mathemat 0 +home 0 +updat 0 +period 0 +semest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..87a76852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,92 @@ +program 1 +homework 1 +nonlinear 0 +cours 0 +wisc 0 +offic 0 +convex 0 +optim 0 +differenti 0 +assign 0 +grade 0 +mail 0 +function 0 +criteria 0 +theori 0 +ferri 0 +telephon 0 +hour 0 +wednesdai 0 +text 0 +second 0 +gener 0 +concav 0 +order 0 +examin 0 +page 0 +fall 0 +also 0 +math 0 +stat 0 +applicationsfal 0 +schedul 0 +lectur 0 +list 0 +http 0 +instructor 0 +michael 0 +mondai 0 +tuesdai 0 +teach 0 +assist 0 +thursdai 0 +class 0 +olvi 0 +mangasarian 0 +siam 0 +publish 0 +philadelphia 0 +us 0 +algorithm 0 +bazaraa 0 +sherali 0 +shetti 0 +edit 0 +wilei 0 +york 0 +bertseka 0 +athena 0 +scientif 0 +inform 0 +overview 0 +introduct 0 +linear 0 +inequ 0 +theorem 0 +altern 0 +set 0 +saddlepoint 0 +without 0 +first 0 +dualiti 0 +condit 0 +exact 0 +penalti 0 +augment 0 +lagrangian 0 +gradient 0 +project 0 +book 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +week 0 +midterm 0 +novemb 0 +final 0 +mathemat 0 +home 0 +updat 0 +period 0 +semest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..968f97a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,59 @@ +comput 1 +html 1 +postscript 0 +mimic 0 +scienc 0 +hour 0 +wisc 0 +home 0 +page 0 +sept 0 +text 0 +softwar 0 +pleas 0 +miron 0 +offic 0 +phone 0 +mail 0 +cychan 0 +system 0 +perform 0 +evalu 0 +model 0 +new 0 +assign 0 +librari 0 +avail 0 +public 0 +cours 0 +inform 0 +lectur 0 +devis 0 +user 0 +manual 0 +print 0 +file 0 +contain 0 +mani 0 +imag 0 +take 0 +least 0 +half 0 +initi 0 +instruct 0 +tutori 0 +onlin 0 +help 0 +qnet 0 +exampl 0 +devc 0 +professor 0 +livni 0 +teach 0 +assist 0 +chee 0 +yong 0 +chan 0 +suggest 0 +comment 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..24fd53d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,78 @@ +student 1 +postscript 1 +find 0 +inform 0 +retriev 0 +cours 0 +resourc 0 +overview 0 +part 0 +class 0 +email 0 +uwisc 0 +technolog 0 +seek 0 +knowledgerichard 0 +belewvisit 0 +professorc 0 +lectur 0 +univ 0 +wisconsin 0 +comput 0 +scienc 0 +departmentfal 0 +thur 0 +acal 0 +room 0 +engrthi 0 +design 0 +interest 0 +understand 0 +aboutth 0 +knowledg 0 +represent 0 +machinelearn 0 +techniqu 0 +underli 0 +much 0 +excit 0 +activ 0 +occur 0 +onth 0 +world 0 +wide 0 +complet 0 +descript 0 +coures 0 +canse 0 +abstract 0 +asyllabu 0 +major 0 +topic 0 +consid 0 +graphic 0 +mapof 0 +thesear 0 +relat 0 +anda 0 +tent 0 +schedul 0 +semesterwil 0 +proce 0 +read 0 +polit 0 +infidel 0 +imag 0 +assign 0 +digest 0 +hypermai 0 +suggest 0 +compos 0 +classrel 0 +minut 0 +taken 0 +last 0 +modifi 0 +belew 0 +wisc 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..8247805b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,132 @@ +chang 1 +matlab 0 +assign 0 +diari 0 +class 0 +introduct 0 +page 0 +materi 0 +well 0 +list 0 +question 0 +comput 0 +also 0 +book 0 +student 0 +numer 0 +last 0 +cours 0 +tent 0 +syllabu 0 +order 0 +score 0 +addit 0 +note 0 +problem 0 +email 0 +concern 0 +mention 0 +name 0 +begin 0 +session 0 +edit 0 +avail 0 +look 0 +access 0 +telnet 0 +unix 0 +method 0 +methodsthi 0 +contain 0 +inform 0 +fall 0 +smile 0 +orderli 0 +mind 0 +cooper 0 +assignmentson 0 +total 0 +midterm 0 +date 0 +slightli 0 +point 0 +residu 0 +error 0 +condit 0 +rick 0 +carl 0 +offic 0 +hour 0 +errata 0 +text 0 +us 0 +updat 0 +sinc 0 +complex 0 +numericalanalysi 0 +algorithm 0 +post 0 +preprint 0 +foremostmathematician 0 +todai 0 +subject 0 +interest 0 +trickytop 0 +least 0 +squar 0 +solut 0 +approxim 0 +time 0 +place 0 +textmai 0 +supplement 0 +byaddit 0 +file 0 +areavail 0 +organ 0 +chapter 0 +awar 0 +though 0 +site 0 +capitallett 0 +sometim 0 +lower 0 +caselett 0 +present 0 +plan 0 +rather 0 +fortran 0 +kermit 0 +sigmon 0 +primer 0 +doit 0 +handout 0 +reaction 0 +winor 0 +machin 0 +overviewcours 0 +answer 0 +word 0 +grade 0 +four 0 +digit 0 +current 0 +conduct 0 +orient 0 +user 0 +andp 0 +relat 0 +linksyou 0 +might 0 +wish 0 +explor 0 +csdepart 0 +home 0 +system 0 +frequent 0 +ask 0 +simpl 0 +tutori 0 +advanc 0 +referenceviva 0 +good 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..c7fba341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,55 @@ +numer 1 +function 1 +page 1 +class 1 +email 1 +relat 1 +analysi 0 +last 0 +chang 0 +analysisthi 0 +contain 0 +inform 0 +fall 0 +version 0 +math 0 +current 0 +note 0 +avail 0 +follow 0 +directori 0 +well 0 +hard 0 +copi 0 +doit 0 +recent 0 +announc 0 +post 0 +grade 0 +time 0 +locat 0 +statlectur 0 +carl 0 +boor 0 +deboor 0 +wisc 0 +offic 0 +hour 0 +stat 0 +line 0 +classnot 0 +viii 0 +index 0 +assign 0 +none 0 +concern 0 +homework 0 +cours 0 +question 0 +linksyou 0 +might 0 +wish 0 +explor 0 +depart 0 +home 0 +courseoff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..eff8261d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,13 @@ +approxim 1 +cours 1 +theori 0 +last 0 +chang 0 +theorythi 0 +page 0 +contain 0 +inform 0 +spring 0 +version 0 +math 0 +note 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..70100dee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,65 @@ +program 1 +silva 0 +comput 0 +inform 0 +tuesdai 0 +solut 0 +toni 0 +dsilva 0 +wisc 0 +offic 0 +scienc 0 +statist 0 +savitch 0 +sept 0 +handout 0 +introduct 0 +window 0 +thursdai 0 +quiz 0 +home 0 +page 0 +sectioncsm 0 +instructor 0 +contact 0 +email 0 +phone 0 +hour 0 +appoint 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +section 0 +comp 0 +stat 0 +firstdai 0 +noland 0 +specifi 0 +timet 0 +chamberlin 0 +cours 0 +tent 0 +syllabu 0 +semest 0 +late 0 +polici 0 +grade 0 +criteria 0 +academ 0 +misconduct 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +text 0 +assign 0 +quizz 0 +last 0 +modifi 0 +anthoni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..934b22da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,432 @@ +imag 1 +vision 0 +exam 0 +comput 0 +paper 0 +us 0 +method 0 +homework 0 +process 0 +project 0 +read 0 +doit 0 +chapter 0 +tool 0 +vista 0 +cours 0 +student 0 +test 0 +algorithm 0 +email 0 +format 0 +assign 0 +avail 0 +thin 0 +mosaic 0 +khoro 0 +univers 0 +thursdai 0 +includ 0 +class 0 +result 0 +directori 0 +send 0 +page 0 +program 0 +code 0 +account 0 +printer 0 +print 0 +environ 0 +execut 0 +spring 0 +dyer 0 +wisc 0 +detect 0 +activ 0 +contour 0 +base 0 +novemb 0 +document 0 +handout 0 +window 0 +interact 0 +also 0 +start 0 +softwar 0 +least 0 +shape 0 +larg 0 +hand 0 +might 0 +line 0 +exampl 0 +note 0 +fast 0 +digit 0 +comm 0 +spline 0 +adelson 0 +graphic 0 +laser 0 +locat 0 +system 0 +instructor 0 +chuck 0 +csstelephon 0 +eduoffic 0 +hour 0 +gener 0 +introduct 0 +analysi 0 +segment 0 +modul 0 +three 0 +dimension 0 +scene 0 +inform 0 +shade 0 +depth 0 +stereo 0 +focu 0 +model 0 +grade 0 +requir 0 +select 0 +machin 0 +collect 0 +sold 0 +onlin 0 +enhanc 0 +modif 0 +public 0 +face 0 +rotat 0 +color 0 +free 0 +origin 0 +appli 0 +home 0 +well 0 +skeleton 0 +octob 0 +manual 0 +condit 0 +type 0 +disappear 0 +matrix 0 +block 0 +distanc 0 +four 0 +want 0 +follow 0 +addit 0 +output 0 +ubyt 0 +file 0 +point 0 +applic 0 +delet 0 +parallel 0 +pattern 0 +differ 0 +pyramid 0 +engin 0 +burt 0 +tran 0 +hint 0 +faq 0 +snake 0 +topic 0 +stanford 0 +room 0 +space 0 +sure 0 +list 0 +queue 0 +displai 0 +varieti 0 +imgstar 0 +basic 0 +convers 0 +matlab 0 +numer 0 +databas 0 +examin 0 +link 0 +interest 0 +scienc 0 +hdtv 0 +atsc 0 +demo 0 +visionc 0 +visionfal 0 +dyeroffic 0 +mondai 0 +appointmentteach 0 +assist 0 +bryan 0 +sooffic 0 +wednesdai 0 +fridai 0 +appointmentstud 0 +informationfundament 0 +first 0 +level 0 +edg 0 +featuredetect 0 +principl 0 +defin 0 +forreconstruct 0 +usingtechniqu 0 +asshap 0 +recoveri 0 +andocclud 0 +viewpoint 0 +control 0 +motion 0 +track 0 +object 0 +recognit 0 +schedul 0 +tuesdai 0 +prerequisit 0 +fundament 0 +calculu 0 +probabl 0 +theori 0 +linear 0 +algebra 0 +midterm 0 +particip 0 +syllabu 0 +part 0 +jain 0 +kasturi 0 +schunck 0 +mcgraw 0 +hill 0 +york 0 +readingsfrom 0 +journal 0 +confer 0 +proceed 0 +small 0 +batchessupplementari 0 +sourcesonlin 0 +informationmost 0 +urlhttp 0 +html 0 +date 0 +except 0 +primarili 0 +score 0 +assignmentshomework 0 +histogram 0 +option 0 +make 0 +copi 0 +portrait 0 +contrast 0 +byfirst 0 +crop 0 +around 0 +head 0 +shoulder 0 +final 0 +adjust 0 +theintens 0 +function 0 +editor 0 +thewindow 0 +button 0 +modifi 0 +thing 0 +colorif 0 +wish 0 +found 0 +good 0 +grayscal 0 +transformationsav 0 +andput 0 +whereth 0 +tell 0 +qualit 0 +whatintens 0 +transform 0 +improv 0 +qualityof 0 +overal 0 +photo 0 +board 0 +feel 0 +ownweb 0 +learn 0 +get 0 +correct 0 +chang 0 +instead 0 +prevent 0 +altogeth 0 +count 0 +transit 0 +case 0 +citi 0 +infin 0 +constant 0 +corner 0 +chessboard 0 +center 0 +posit 0 +evalu 0 +experi 0 +convert 0 +vconvert 0 +edit 0 +need 0 +emac 0 +clean 0 +header 0 +contain 0 +right 0 +repn 0 +component_interp 0 +gradient 0 +low_threshold 0 +high_threshold 0 +vlink 0 +vsegedg 0 +approach 0 +determin 0 +direct 0 +index 0 +finger 0 +fact 0 +entir 0 +surround 0 +zhang 0 +suen 0 +wang 0 +comment 0 +comparison 0 +version 0 +laplacian 0 +compact 0 +ieee 0 +multiresolut 0 +produc 0 +kass 0 +witkin 0 +terzopoulo 0 +william 0 +shah 0 +curvatur 0 +estim 0 +understand 0 +decemb 0 +titl 0 +abstract 0 +supplementari 0 +help 0 +done 0 +sparcstat 0 +call 0 +disk 0 +quota 0 +store 0 +compress 0 +other 0 +gzip 0 +howev 0 +order 0 +save 0 +sent 0 +goe 0 +everyon 0 +laserprint 0 +altern 0 +name 0 +shortest 0 +caution 0 +check 0 +job 0 +manner 0 +take 0 +long 0 +consider 0 +oper 0 +invok 0 +unix 0 +like 0 +command 0 +develop 0 +provid 0 +languag 0 +interfac 0 +rapid 0 +prototyp 0 +simpl 0 +cantata 0 +netpbm 0 +toolkit 0 +pbmplu 0 +packag 0 +visual 0 +signal 0 +toolbox 0 +especi 0 +relev 0 +although 0 +access 0 +solut 0 +held 0 +regular 0 +classroom 0 +earli 0 +time 0 +cover 0 +textbook 0 +bring 0 +sheet 0 +side 0 +main 0 +idea 0 +proof 0 +question 0 +ask 0 +highli 0 +recommend 0 +wandel 0 +number 0 +grand 0 +allianc 0 +specif 0 +advanc 0 +televis 0 +committe 0 +postscript 0 +spie 0 +optic 0 +librari 0 +appl 0 +quicktim 0 +product 0 +panoramix 0 +panoram 0 +decfac 0 +talk 0 +synthet 0 +video 0 +rate 0 +virtual 0 +realiti 0 +qbic 0 +miscellan 0 +relat 0 +boston 0 +cardiff 0 +royal 0 +institut 0 +sweden 0 +virginia 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..9fb37338 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,67 @@ +program 1 +fall 0 +section 0 +dave 0 +dzimm 0 +wisc 0 +offic 0 +home 0 +sharp 0 +algebra 0 +languag 0 +instructor 0 +zimmermannemail 0 +educlass 0 +meet 0 +time 0 +place 0 +nolandoffic 0 +phone 0 +hour 0 +announcementsprogram 0 +wednesdai 0 +octob 0 +readi 0 +fridai 0 +novemb 0 +gener 0 +cours 0 +informationc 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuswork 0 +homeclass 0 +handoutsprogramsexam 0 +quizzeslectur 0 +notesgreg 0 +style 0 +guidegrad 0 +referenc 0 +last 0 +digit 0 +number 0 +quizzesprogramsexam 0 +polici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +policytext 0 +problem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +zimmermann 0 +base 0 +greg 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..71ce448d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,31 @@ +program 1 +cours 0 +linear 0 +method 0 +fall 0 +spring 0 +nonlinear 0 +gener 0 +inform 0 +offer 0 +semest 0 +page 0 +variou 0 +instructor 0 +michael 0 +ferri 0 +mangasarian 0 +graduat 0 +wisconsin 0 +network 0 +flow 0 +integ 0 +theori 0 +algorithm 0 +comput 0 +larg 0 +spars 0 +system 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..96a4e9f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,115 @@ +homework 1 +fridai 0 +mondai 0 +cours 0 +unix 0 +network 0 +class 0 +offic 0 +flow 0 +grade 0 +march 0 +mail 0 +wisc 0 +thursdai 0 +linear 0 +program 0 +path 0 +assign 0 +april 0 +cshrc 0 +local 0 +ferri 0 +telephon 0 +hour 0 +wednesdai 0 +text 0 +us 0 +gener 0 +inform 0 +orient 0 +session 0 +first 0 +user 0 +week 0 +room 0 +gam 0 +machin 0 +page 0 +spring 0 +flowsspr 0 +schedul 0 +lectur 0 +list 0 +februari 0 +instructor 0 +michael 0 +teach 0 +assist 0 +leei 0 +tuesdai 0 +requir 0 +ravindra 0 +ahuja 0 +thoma 0 +magnanti 0 +jame 0 +orlin 0 +prentic 0 +hall 0 +chvatal 0 +freeman 0 +optim 0 +bertseka 0 +press 0 +overview 0 +tree 0 +cycl 0 +data 0 +structur 0 +shortest 0 +cost 0 +simplex 0 +method 0 +convex 0 +equilibria 0 +lagrangian 0 +relax 0 +multicommod 0 +applic 0 +prerequisit 0 +knowledg 0 +project 0 +final 0 +examin 0 +close 0 +book 0 +except 0 +sheet 0 +paper 0 +allow 0 +repres 0 +question 0 +comput 0 +time 0 +novic 0 +previous 0 +workstat 0 +held 0 +second 0 +last 0 +minut 0 +introduct 0 +login 0 +access 0 +public 0 +sourc 0 +alter 0 +set 0 +directori 0 +appropri 0 +solari 0 +mathemat 0 +home 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..58503001 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,128 @@ +homework 1 +fridai 0 +mondai 0 +spars 0 +cours 0 +unix 0 +class 0 +offic 0 +grade 0 +march 0 +comput 0 +mail 0 +wisc 0 +thursdai 0 +assign 0 +april 0 +handout 0 +math 0 +method 0 +februari 0 +instructor 0 +ferri 0 +telephon 0 +hour 0 +wednesdai 0 +matrix 0 +second 0 +inform 0 +introduct 0 +orient 0 +session 0 +first 0 +user 0 +week 0 +room 0 +page 0 +spring 0 +also 0 +larg 0 +systemsspr 0 +schedul 0 +lectur 0 +list 0 +michael 0 +teach 0 +assist 0 +leei 0 +tuesdai 0 +recommend 0 +textbook 0 +golub 0 +loan 0 +john 0 +hopkinsunivers 0 +press 0 +edit 0 +direct 0 +matric 0 +duff 0 +erisman 0 +reid 0 +oxford 0 +scienc 0 +public 0 +finit 0 +dimension 0 +vector 0 +space 0 +halmo 0 +springer 0 +verlag 0 +gener 0 +overview 0 +storag 0 +scheme 0 +gaussian 0 +elimin 0 +dens 0 +error 0 +analysi 0 +local 0 +pivot 0 +strategi 0 +modif 0 +iter 0 +linear 0 +solver 0 +least 0 +squar 0 +nonlinear 0 +equat 0 +optim 0 +applic 0 +parallel 0 +techniqu 0 +eigenvalu 0 +eigenvector 0 +prerequisit 0 +consent 0 +project 0 +final 0 +examin 0 +close 0 +book 0 +except 0 +sheet 0 +paper 0 +allow 0 +repres 0 +question 0 +ieee 0 +arithmet 0 +machin 0 +time 0 +novic 0 +previous 0 +us 0 +workstat 0 +held 0 +last 0 +minut 0 +instruct 0 +matlab 0 +mathemat 0 +program 0 +home 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..1d52985b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,56 @@ +offic 1 +fischer 0 +telephon 0 +mail 0 +wisc 0 +hour 0 +lectur 0 +charl 0 +mondai 0 +wednesdai 0 +fridai 0 +appoint 0 +teach 0 +assist 0 +program 0 +weyer 0 +cours 0 +fall 0 +construct 0 +compilersfal 0 +schedul 0 +tuth 0 +csst 0 +instructor 0 +krishna 0 +kunchithapadam 0 +krisna 0 +tuesdai 0 +thursdai 0 +assign 0 +homework 0 +read 0 +class 0 +text 0 +craft 0 +compil 0 +richard 0 +leblanc 0 +benjamin 0 +cum 0 +check 0 +regularli 0 +gener 0 +inform 0 +overview 0 +date 0 +grade 0 +examin 0 +get 0 +start 0 +handout 0 +note 0 +us 0 +tool 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..559d5787 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,57 @@ +greg 1 +wisc 0 +fall 0 +section 0 +offic 0 +algebra 0 +languag 0 +program 0 +instructor 0 +sharpemail 0 +eduoffic 0 +phone 0 +hour 0 +appt 0 +grader 0 +krishna 0 +kunchithapadamemail 0 +krisna 0 +edugener 0 +cours 0 +informationc 0 +home 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuscours 0 +difficultywork 0 +homenewsstartup 0 +informationclass 0 +noteshomeworkexam 0 +quizzesstyl 0 +guideemail 0 +archivepolici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +polici 0 +must 0 +read 0 +textproblem 0 +solv 0 +object 0 +porgrammingwalt 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +pleas 0 +list 0 +known 0 +erratalast 0 +modifi 0 +sharpgreg 0 +http 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..94870db9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,159 @@ +program 1 +class 0 +prog 0 +name 0 +polici 0 +section 0 +line 0 +need 0 +inform 0 +home 0 +version 0 +also 0 +chang 0 +assign 0 +grade 0 +mean 0 +page 0 +fall 0 +chad 0 +lane 0 +import 0 +tribbl 0 +compil 0 +problem 0 +enumer 0 +type 0 +public 0 +privat 0 +note 0 +call 0 +work 0 +file 0 +everyth 0 +copi 0 +want 0 +us 0 +link 0 +project 0 +directori 0 +consist 0 +sampl 0 +read 0 +question 0 +quizz 0 +test 0 +text 0 +introduct 0 +sharp 0 +algebra 0 +languag 0 +wisc 0 +announc 0 +tue 0 +recogn 0 +outsid 0 +must 0 +declar 0 +insid 0 +result 0 +bump 0 +bottom 0 +shown 0 +definit 0 +randomintinrang 0 +defin 0 +correspond 0 +function 0 +bodi 0 +within 0 +forgot 0 +valu 0 +uppercas 0 +overwrit 0 +obsolet 0 +includ 0 +might 0 +check 0 +help 0 +someth 0 +ad 0 +comment 0 +suggest 0 +throughout 0 +part 0 +updat 0 +descript 0 +pleas 0 +make 0 +sure 0 +discrep 0 +chri 0 +weaver 0 +shouldn 0 +matter 0 +readi 0 +crucial 0 +entir 0 +understand 0 +basic 0 +attempt 0 +earli 0 +start 0 +hard 0 +requir 0 +time 0 +piec 0 +togeth 0 +bring 0 +tuesdai 0 +midterm 0 +freshmen 0 +either 0 +fine 0 +great 0 +thumb 0 +noth 0 +freshman 0 +disregard 0 +stuff 0 +tent 0 +semest 0 +syllabu 0 +handout 0 +prepar 0 +solutionscours 0 +solv 0 +object 0 +walter 0 +savitch 0 +addison 0 +weslei 0 +publish 0 +compani 0 +meet 0 +vleck 0 +administr 0 +late 0 +mail 0 +attend 0 +academ 0 +misconduct 0 +microsoft 0 +window 0 +first 0 +borland 0 +second 0 +vectra 0 +sourc 0 +code 0 +consult 0 +extra 0 +refer 0 +materi 0 +mani 0 +answer 0 +lectur 0 +style 0 +guidelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..1f4b1b20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,68 @@ +offic 1 +program 0 +lectur 0 +horwitz 0 +telephon 0 +mail 0 +wisc 0 +hour 0 +appoint 0 +rahul 0 +compil 0 +tool 0 +cours 0 +inform 0 +assign 0 +grade 0 +fall 0 +introduct 0 +languag 0 +compilersspr 0 +stori 0 +month 0 +octob 0 +schedul 0 +tuth 0 +comp 0 +stat 0 +recit 0 +psycholog 0 +instructor 0 +susan 0 +tuesdai 0 +fridai 0 +teach 0 +assist 0 +kapoor 0 +mondai 0 +wednesdai 0 +text 0 +reserv 0 +wendt 0 +librari 0 +principl 0 +techniqu 0 +sethi 0 +ullman 0 +craft 0 +fischer 0 +leblanc 0 +check 0 +regularli 0 +gener 0 +overview 0 +date 0 +exam 0 +includ 0 +late 0 +polici 0 +get 0 +start 0 +read 0 +homework 0 +examin 0 +note 0 +us 0 +email 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..068e0749 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,51 @@ +hummert 1 +offic 1 +comput 1 +program 1 +inform 1 +home 0 +scienc 0 +statist 0 +phone 0 +savitch 0 +psych 0 +grade 0 +introduct 0 +window 0 +page 0 +sectionsc 0 +instructor 0 +contact 0 +email 0 +wisc 0 +hour 0 +mondai 0 +thursdai 0 +announc 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +section 0 +cours 0 +handout 0 +tent 0 +syllabu 0 +semest 0 +late 0 +polici 0 +criteria 0 +academ 0 +misconduct 0 +viewgraph 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +text 0 +assign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..7fb78a04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,43 @@ +section 1 +offic 1 +click 1 +hour 0 +mail 0 +jonb 0 +wisc 0 +infoc 0 +info 0 +name 0 +bodnersect 0 +mondai 0 +thursdai 0 +number 0 +comput 0 +scienc 0 +statist 0 +hall 0 +doit 0 +phone 0 +eduher 0 +thing 0 +keep 0 +mind 0 +need 0 +copi 0 +guid 0 +choos 0 +print 0 +file 0 +menu 0 +question 0 +pleas 0 +stop 0 +send 0 +grade 0 +avail 0 +bodner 0 +mound 0 +madison 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..007b683b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,139 @@ +program 1 +lisp 1 +exam 0 +common 0 +line 0 +cours 0 +kunen 0 +section 0 +inform 0 +avail 0 +directori 0 +introduct 0 +assign 0 +time 0 +topic 0 +us 0 +click 0 +artifici 0 +intellig 0 +coursewil 0 +email 0 +wisc 0 +count 0 +final 0 +late 0 +logic 0 +discuss 0 +search 0 +languag 0 +sinc 0 +probabl 0 +refer 0 +book 0 +recit 0 +last 0 +problem 0 +previou 0 +intro 0 +notic 0 +spring 0 +detail 0 +appear 0 +later 0 +instructor 0 +kunenoffic 0 +stat 0 +buildingtelephon 0 +eduoffic 0 +hour 0 +appoint 0 +grade 0 +four 0 +three 0 +thirdexam 0 +schedul 0 +place 0 +turn 0 +midnight 0 +thedai 0 +loos 0 +cover 0 +follow 0 +order 0 +entir 0 +buti 0 +design 0 +assignmenti 0 +game 0 +plai 0 +prolog 0 +natur 0 +understand 0 +learn 0 +neural 0 +network 0 +deduct 0 +plan 0 +reason 0 +uncertain 0 +knowledg 0 +begin 0 +would 0 +usefulto 0 +supplement 0 +lecturesand 0 +help 0 +within 0 +manypaperback 0 +like 0 +lispcraft 0 +wilenski 0 +anoth 0 +possibl 0 +ansi 0 +graham 0 +code 0 +ultim 0 +steel 0 +edit 0 +page 0 +also 0 +sun 0 +addit 0 +textbook 0 +modern 0 +approach 0 +russel 0 +norvig 0 +class 0 +session 0 +engr 0 +psych 0 +essentiallli 0 +materi 0 +present 0 +answer 0 +question 0 +give 0 +hint 0 +review 0 +usual 0 +minut 0 +teach 0 +attend 0 +differ 0 +lectur 0 +public 0 +alpha 0 +beta 0 +alpha_beta 0 +best 0 +first 0 +astar 0 +fall 0 +postscript 0 +still 0 +older 0 +chang 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..19921f19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,86 @@ +chapter 1 +homework 0 +assign 0 +part 0 +architectur 0 +tabl 0 +content 0 +reader 0 +comput 0 +full 0 +paper 0 +doit 0 +solut 0 +note 0 +instruct 0 +parallel 0 +spring 0 +inform 0 +comp 0 +wisc 0 +eduoffic 0 +hour 0 +appoint 0 +lectur 0 +project 0 +give 0 +basic 0 +pipelin 0 +level 0 +class 0 +talk 0 +decemb 0 +fall 0 +advanc 0 +ifal 0 +offer 0 +cours 0 +instructor 0 +mark 0 +hilloffic 0 +statemail 0 +markhil 0 +tuesdai 0 +fridai 0 +shenoffic 0 +statphon 0 +email 0 +mshen 0 +mondai 0 +thursdai 0 +miscellaneawhat 0 +talksread 0 +introduct 0 +perform 0 +cost 0 +set 0 +cach 0 +memori 0 +talluri 0 +hill 0 +input 0 +output 0 +interconnect 0 +process 0 +solutionproject 0 +propos 0 +novemb 0 +report 0 +noonmiscellanea 0 +final 0 +midterm 0 +us 0 +first 0 +edit 0 +hennessi 0 +patterson 0 +qualifi 0 +exam 0 +sourc 0 +hard 0 +question 0 +seminar 0 +wisconsin 0 +group 0 +world 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..8581bf0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,72 @@ +us 1 +exampl 0 +class 0 +offic 0 +home 0 +program 0 +section 0 +mbirk 0 +wisc 0 +comp 0 +assign 0 +grade 0 +dynam 0 +alloc 0 +oper 0 +overload 0 +complex 0 +page 0 +introduct 0 +instructor 0 +michael 0 +birk 0 +email 0 +phone 0 +hour 0 +appoint 0 +administr 0 +inform 0 +text 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +room 0 +time 0 +psycholog 0 +tuesdai 0 +thursdai 0 +vectra 0 +syllabu 0 +comput 0 +standard 0 +late 0 +polici 0 +handin 0 +procedur 0 +cheat 0 +academ 0 +misconduct 0 +consult 0 +string 0 +ration 0 +repres 0 +float 0 +point 0 +number 0 +anoth 0 +intstack 0 +simpl 0 +unlimit 0 +size 0 +data 0 +structur 0 +classinfo 0 +struct 0 +link 0 +microsoft 0 +window 0 +borland 0 +tutori 0 +debugg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..3f9cd00f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,107 @@ +program 1 +class 0 +us 0 +comp 0 +exampl 0 +offic 0 +assign 0 +room 0 +section 0 +mbirk 0 +wisc 0 +home 0 +avail 0 +number 0 +exam 0 +spring 0 +instruct 0 +syllabu 0 +eight 0 +week 0 +comput 0 +late 0 +polici 0 +grade 0 +dynam 0 +alloc 0 +oper 0 +overload 0 +complex 0 +introduct 0 +instructor 0 +michael 0 +birk 0 +email 0 +phone 0 +hour 0 +appoint 0 +announc 0 +test 0 +case 0 +dice 0 +code 0 +discuss 0 +rank 0 +last 0 +four 0 +digit 0 +student 0 +past 0 +onlin 0 +fall 0 +note 0 +hangman 0 +mondai 0 +octob 0 +lectur 0 +format 0 +first 0 +second 0 +come 0 +soon 0 +print 0 +output 0 +outsid 0 +final 0 +chang 0 +meet 0 +administr 0 +inform 0 +text 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +time 0 +tuesdai 0 +thursdai 0 +vectra 0 +standard 0 +handin 0 +procedur 0 +cheat 0 +academ 0 +misconduct 0 +consult 0 +string 0 +ration 0 +repres 0 +float 0 +point 0 +anoth 0 +intstack 0 +simpl 0 +unlimit 0 +size 0 +data 0 +structur 0 +classinfo 0 +struct 0 +link 0 +page 0 +microsoft 0 +window 0 +borland 0 +tutori 0 +debugg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..489cb52d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,80 @@ +section 1 +melski 0 +email 0 +page 0 +info 0 +stuff 0 +assign 0 +comput 0 +help 0 +link 0 +david 0 +offic 0 +wisc 0 +meet 0 +us 0 +archiv 0 +dave 0 +melskithes 0 +chang 0 +frequent 0 +press 0 +reload 0 +button 0 +daili 0 +get 0 +start 0 +alreadi 0 +uncomfort 0 +andth 0 +softwar 0 +instructor 0 +scienc 0 +statist 0 +floor 0 +phone 0 +hour 0 +click 0 +attach 0 +pleas 0 +noland 0 +psycholog 0 +text 0 +problem 0 +solv 0 +object 0 +program 0 +walter 0 +savitch 0 +addison 0 +wesleypublish 0 +gener 0 +window 0 +usingborland 0 +refer 0 +materi 0 +rough 0 +syllabu 0 +polici 0 +academ 0 +misconduct 0 +must 0 +read 0 +rule 0 +thumb 0 +share 0 +code 0 +consult 0 +grade 0 +late 0 +work 0 +check 0 +often 0 +essenti 0 +solut 0 +handout 0 +list 0 +tutor 0 +avail 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..c0671b65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,142 @@ +program 1 +novemb 0 +wednesdai 0 +quiz 0 +octob 0 +class 0 +page 0 +code 0 +bankaccount 0 +solut 0 +score 0 +milo 0 +mondai 0 +home 0 +html 0 +exam 0 +septemb 0 +guid 0 +refer 0 +section 0 +languag 0 +martin 0 +wisc 0 +take 0 +file 0 +us 0 +document 0 +postscript 0 +function 0 +call 0 +fall 0 +time 0 +offic 0 +tuesdai 0 +check 0 +homepag 0 +ad 0 +current 0 +grade 0 +gener 0 +inform 0 +consult 0 +exampl 0 +list 0 +user 0 +input 0 +withprompt 0 +version 0 +standard 0 +fridai 0 +decemb 0 +algebra 0 +instructor 0 +locat 0 +psychologyinstructor 0 +email 0 +eduoffic 0 +hour 0 +thursdai 0 +appoint 0 +phone 0 +announcementsthi 0 +chang 0 +frequent 0 +respons 0 +often 0 +given 0 +todai 0 +place 0 +onth 0 +project 0 +room 0 +test 0 +scheduledfor 0 +updat 0 +haseveryth 0 +pleas 0 +make 0 +sure 0 +isaccur 0 +link 0 +coupl 0 +withinform 0 +linksar 0 +titl 0 +avail 0 +onfridai 0 +remind 0 +sheet 0 +turn 0 +assign 0 +syllabu 0 +style 0 +vectra 0 +schedul 0 +academ 0 +misconduct 0 +policyclass 0 +final 0 +main 0 +struct 0 +minmax 0 +findth 0 +minimum 0 +maximum 0 +number 0 +case 0 +enteredfrom 0 +stdin 0 +form 0 +creat 0 +formlett 0 +data 0 +specifi 0 +theopen_fil 0 +introduc 0 +valu 0 +beginn 0 +introduct 0 +toth 0 +latest 0 +releas 0 +good 0 +viru 0 +hoax 0 +ethic 0 +andprofession 0 +conductassign 0 +survei 0 +questionar 0 +requir 0 +textbook 0 +problem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +known 0 +errata 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..3a085a28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,89 @@ +program 1 +tuesdai 0 +section 0 +thursdai 0 +april 0 +martin 0 +ream 0 +class 0 +inform 0 +februari 0 +march 0 +exam 0 +list 0 +semest 0 +email 0 +mream 0 +wisc 0 +offic 0 +comput 0 +noland 0 +home 0 +page 0 +classc 0 +algebra 0 +languag 0 +programmingspr 0 +teach 0 +assist 0 +absolut 0 +nothingeverydai 0 +informationc 0 +pagescommon 0 +programmingmistakesarch 0 +mail 0 +messag 0 +sent 0 +calendar 0 +januari 0 +final 0 +placeto 0 +announcedcours 0 +detail 0 +contact 0 +scienc 0 +statist 0 +dayton 0 +phone 0 +hour 0 +appoint 0 +talk 0 +send 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +savitch 0 +csst 0 +contain 0 +vectra 0 +run 0 +window 0 +andborland 0 +addit 0 +cours 0 +tent 0 +syllabu 0 +extra 0 +materi 0 +late 0 +polici 0 +grade 0 +criteria 0 +academicmisconduct 0 +rule 0 +thumb 0 +share 0 +code 0 +assign 0 +work 0 +anyform 0 +former 0 +student 0 +made 0 +bigtodd 0 +thielwendi 0 +staatsabout 0 +instructor 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..f7617b6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,107 @@ +program 1 +exampl 0 +fall 0 +section 0 +mike 0 +msteel 0 +wisc 0 +comp 0 +stat 0 +deadlin 0 +check 0 +mail 0 +read 0 +tuesdai 0 +novemb 0 +grade 0 +note 0 +polici 0 +algebra 0 +languag 0 +nolandinstructor 0 +steeleemail 0 +eduoffic 0 +buildingoffic 0 +hour 0 +time 0 +appoint 0 +soffic 0 +phone 0 +import 0 +announcementsi 0 +extend 0 +pleas 0 +thenew 0 +informationmidterm 0 +current 0 +line 0 +includ 0 +everyth 0 +hand 0 +sampl 0 +taken 0 +pastfew 0 +week 0 +class 0 +fill 0 +stuff 0 +gloss 0 +makefulli 0 +function 0 +find 0 +us 0 +ifyou 0 +miss 0 +even 0 +didn 0 +understand 0 +page 0 +near 0 +bottom 0 +rememb 0 +clarif 0 +programmingassign 0 +gener 0 +cours 0 +informationc 0 +home 0 +pagecours 0 +objectivesabout 0 +vectra 0 +labc 0 +consultantscours 0 +syllabu 0 +assignmentsnot 0 +work 0 +homeclass 0 +handoutsprogram 0 +assignmentsexam 0 +quizzessom 0 +examplespolici 0 +informationemail 0 +policygrad 0 +late 0 +academ 0 +misconduct 0 +policyus 0 +refer 0 +pagesintroduct 0 +microsoft 0 +windowsintroduct 0 +borland 0 +greg 0 +sharp 0 +styleguid 0 +codetextproblem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +steel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..49558f6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,131 @@ +grade 1 +section 1 +assign 0 +polici 0 +offic 0 +onlin 0 +test 0 +wisc 0 +comput 0 +scienc 0 +statist 0 +hour 0 +check 0 +email 0 +fall 0 +prock 0 +eduoffic 0 +phone 0 +consult 0 +link 0 +crazi 0 +todai 0 +quiz 0 +gener 0 +question 0 +final 0 +page 0 +inform 0 +class 0 +text 0 +materi 0 +info 0 +archiv 0 +introduct 0 +session 0 +infoc 0 +sessionalgebra 0 +languag 0 +program 0 +instructor 0 +andrew 0 +prockemail 0 +thgrader 0 +haihong 0 +wangemail 0 +mtwrannounc 0 +pleas 0 +verifi 0 +score 0 +modifi 0 +made 0 +minor 0 +modif 0 +file 0 +copi 0 +alreadi 0 +gotton 0 +five 0 +exam 0 +result 0 +rang 0 +ad 0 +sampl 0 +perus 0 +anoth 0 +think 0 +give 0 +good 0 +idea 0 +level 0 +knowledg 0 +need 0 +rememb 0 +topic 0 +webpag 0 +seem 0 +done 0 +notic 0 +error 0 +make 0 +sure 0 +assing 0 +well 0 +like 0 +work 0 +ahead 0 +tuesdai 0 +decemb 0 +mark 0 +calendar 0 +everyon 0 +requir 0 +take 0 +feel 0 +thing 0 +locat 0 +import 0 +carefulli 0 +read 0 +administr 0 +welcom 0 +problem 0 +solv 0 +walter 0 +savitch 0 +room 0 +time 0 +tent 0 +syllabu 0 +late 0 +mail 0 +academ 0 +misconductcours 0 +cours 0 +style 0 +guid 0 +lectur 0 +note 0 +microsoft 0 +window 0 +first 0 +borland 0 +second 0 +home 0 +vectra 0 +sourc 0 +code 0 +extra 0 +refer 0 +mani 0 +answer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..779e3ba5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,88 @@ +info 1 +site 0 +softwar 0 +archiv 0 +simtel 0 +kelli 0 +page 0 +mail 0 +messag 0 +function 0 +link 0 +inform 0 +interest 0 +world 0 +usenet 0 +need 0 +ratliff 0 +current 0 +grade 0 +keyword 0 +search 0 +exampl 0 +enter 0 +without 0 +quot 0 +everi 0 +paragraph 0 +us 0 +word 0 +also 0 +wildcard 0 +sent 0 +semest 0 +addit 0 +backup 0 +copi 0 +disk 0 +filesviru 0 +wide 0 +faqfun 0 +stuff 0 +oracl 0 +resourc 0 +index 0 +virtual 0 +tourist 0 +mapth 0 +space 0 +shuttl 0 +clickabl 0 +badger 0 +herald 0 +comicshumor 0 +abort 0 +retri 0 +ignor 0 +nine 0 +type 0 +usersfin 0 +weeklab 0 +jokesget 0 +comput 0 +home 0 +might 0 +tryingsom 0 +sharewar 0 +freewar 0 +avail 0 +internet 0 +program 0 +usual 0 +compress 0 +somecompress 0 +unpack 0 +reviewsom 0 +command 0 +try 0 +biggest 0 +best 0 +maintain 0 +minclud 0 +file 0 +post 0 +infocompress 0 +infofavorit 0 +clickher 0 +visit 0 +desautel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..2a40e0d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,75 @@ +program 1 +comput 0 +introduct 0 +inform 0 +home 0 +room 0 +savitch 0 +novemb 0 +window 0 +page 0 +russ 0 +man 0 +csc 0 +instructor 0 +russel 0 +manningemail 0 +rman 0 +wisc 0 +eduoffic 0 +scienceoffic 0 +hour 0 +find 0 +basement 0 +saturdai 0 +except 0 +footbal 0 +game 0 +sundai 0 +come 0 +keep 0 +compani 0 +work 0 +like 0 +vectra 0 +although 0 +student 0 +prioriti 0 +grade 0 +lectur 0 +final 0 +click 0 +textbook 0 +problem 0 +solv 0 +object 0 +walter 0 +section 0 +semest 0 +univers 0 +rotc 0 +build 0 +scienc 0 +statist 0 +assign 0 +mondai 0 +wednesdai 0 +cours 0 +handout 0 +syllabu 0 +late 0 +polici 0 +academ 0 +misconduct 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +textold 0 +quizz 0 +none 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..a2a590a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,95 @@ +wisc 1 +spring 0 +fall 0 +group 0 +cours 0 +knowledg 0 +search 0 +vision 0 +learn 0 +robot 0 +shavlik 0 +comput 0 +introduct 0 +artifici 0 +intellig 0 +network 0 +deduct 0 +problem 0 +solv 0 +plan 0 +languag 0 +includ 0 +jude 0 +relat 0 +link 0 +machin 0 +gener 0 +inform 0 +offer 0 +semest 0 +academ 0 +year 0 +section 0 +thefal 0 +topic 0 +cover 0 +principl 0 +base 0 +techniqu 0 +best 0 +first 0 +alpha 0 +beta 0 +represent 0 +us 0 +predic 0 +logic 0 +semant 0 +connectionist 0 +frame 0 +rule 0 +autom 0 +applic 0 +expert 0 +system 0 +game 0 +plai 0 +natur 0 +understand 0 +program 0 +lisp 0 +possibl 0 +prolog 0 +previou 0 +assumedprerequisit 0 +page 0 +variou 0 +instructor 0 +chuck 0 +dyer 0 +kunen 0 +sabbat 0 +bryan 0 +local 0 +madison 0 +seminar 0 +qualifi 0 +exam 0 +recent 0 +tabl 0 +content 0 +abstract 0 +journal 0 +mostli 0 +wendt 0 +librari 0 +readabl 0 +biologi 0 +dept 0 +graduat 0 +wisconsin 0 +motion 0 +extern 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..e9d8fd96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,216 @@ +assign 1 +learn 0 +homework 0 +read 0 +wisc 0 +spring 0 +april 0 +postscript 0 +chapter 0 +textbook 0 +feedback 0 +author 0 +neural 0 +journal 0 +mondai 0 +mitchel 0 +network 0 +shavlik 0 +februari 0 +lisp 0 +machin 0 +cours 0 +wednesdai 0 +articl 0 +march 0 +towel 0 +januari 0 +late 0 +us 0 +link 0 +page 0 +offic 0 +class 0 +theori 0 +relat 0 +knowledg 0 +algorithm 0 +reinforc 0 +concept 0 +space 0 +paper 0 +server 0 +group 0 +jude 0 +home 0 +archiv 0 +readabl 0 +comput 0 +base 0 +fisher 0 +rumelhart 0 +moonei 0 +empir 0 +compar 0 +backprop 0 +experiment 0 +cobweb 0 +polici 0 +train 0 +induc 0 +decis 0 +tree 0 +free 0 +semest 0 +noon 0 +librari 0 +resourc 0 +bibliographi 0 +austrian 0 +institut 0 +refer 0 +help 0 +program 0 +akcl 0 +common 0 +tip 0 +frequent 0 +ask 0 +question 0 +print 0 +local 0 +gener 0 +inform 0 +instructor 0 +stat 0 +teach 0 +assist 0 +geoff 0 +weinberg 0 +highwai 0 +lab 0 +basement 0 +build 0 +geoffrei 0 +phone 0 +overview 0 +syllabu 0 +email 0 +suggest 0 +project 0 +refin 0 +logic 0 +definit 0 +quinlan 0 +artifici 0 +chunk 0 +soar 0 +laird 0 +rosenbloom 0 +newel 0 +level 0 +dietterich 0 +analyt 0 +unsupervis 0 +zipser 0 +cogsci 0 +lenat 0 +genet 0 +backpropag 0 +basic 0 +also 0 +scienc 0 +kibler 0 +introduct 0 +kbann 0 +sure 0 +answer 0 +sheet 0 +best 0 +idea 0 +next 0 +summar 0 +sentenc 0 +summari 0 +lead 0 +instead 0 +analyz 0 +brr 0 +hand 0 +materi 0 +cover 0 +lectur 0 +methodolog 0 +creat 0 +person 0 +start 0 +student 0 +five 0 +dai 0 +exhaust 0 +penalti 0 +measur 0 +weekend 0 +make 0 +tractabl 0 +accept 0 +week 0 +previous 0 +migrat 0 +progress 0 +heurist 0 +search 0 +version 0 +explan 0 +previou 0 +exam 0 +ineedagoodicon 0 +line 0 +nip 0 +premier 0 +confer 0 +recent 0 +tabl 0 +content 0 +abstract 0 +select 0 +mostli 0 +wendt 0 +irvin 0 +dataset 0 +pointer 0 +discoveri 0 +databas 0 +stuff 0 +benchmark 0 +ieee 0 +council 0 +sever 0 +connect 0 +intern 0 +societi 0 +adapt 0 +behavior 0 +canadian 0 +peopl 0 +extern 0 +department 0 +workstat 0 +emac 0 +code 0 +write 0 +debugg 0 +novic 0 +steel 0 +languag 0 +edit 0 +manual 0 +printer 0 +math 0 +comp 0 +biologi 0 +includ 0 +dept 0 +last 0 +modifi 0 +shavlikshavlik 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..2aac08e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,247 @@ +project 1 +paper 0 +comput 0 +present 0 +exam 0 +cours 0 +topic 0 +class 0 +read 0 +schedul 0 +lectur 0 +system 0 +final 0 +scienc 0 +room 0 +midterm 0 +suggest 0 +grade 0 +semest 0 +detail 0 +make 0 +wang 0 +chen 0 +solomon 0 +wisc 0 +new 0 +updat 0 +list 0 +mondai 0 +statist 0 +time 0 +memori 0 +manag 0 +avail 0 +content 0 +summari 0 +inform 0 +text 0 +us 0 +copi 0 +discuss 0 +activ 0 +particip 0 +strongli 0 +encourag 0 +design 0 +term 0 +experiment 0 +research 0 +literatur 0 +must 0 +group 0 +write 0 +fall 0 +advanc 0 +oper 0 +systemsfal 0 +marvin 0 +offic 0 +sciencesoffic 0 +hour 0 +troffic 0 +phone 0 +email 0 +address 0 +watch 0 +space 0 +latest 0 +last 0 +inroom 0 +build 0 +fridai 0 +noon 0 +exampl 0 +past 0 +examtogeth 0 +sampl 0 +answer 0 +wednesdai 0 +octob 0 +pmin 0 +decemb 0 +exact 0 +place 0 +bedetermin 0 +informationabout 0 +avaiabl 0 +readabl 0 +version 0 +figur 0 +multic 0 +intend 0 +give 0 +broad 0 +exposur 0 +advancedoper 0 +assum 0 +student 0 +good 0 +onoper 0 +equival 0 +cover 0 +normal 0 +inconsider 0 +synchron 0 +interprocess 0 +commun 0 +file 0 +protect 0 +secur 0 +distribut 0 +tuesdai 0 +thursdai 0 +sciencestextther 0 +realli 0 +satisfactori 0 +textbook 0 +graduat 0 +level 0 +operatingsystem 0 +usea 0 +select 0 +classic 0 +papersa 0 +structur 0 +around 0 +journal 0 +articl 0 +andconfer 0 +proceed 0 +purchas 0 +doit 0 +formerli 0 +macc 0 +document 0 +deskfor 0 +similar 0 +ident 0 +thoseof 0 +previou 0 +individu 0 +youto 0 +relev 0 +current 0 +click 0 +herefor 0 +tent 0 +review 0 +willinstead 0 +adiscuss 0 +major 0 +theme 0 +focal 0 +point 0 +will 0 +daili 0 +geta 0 +expect 0 +quietli 0 +listen 0 +week 0 +much 0 +lessout 0 +gradingther 0 +worth 0 +total 0 +verifi 0 +carefulli 0 +thoroughli 0 +projecty 0 +requir 0 +complet 0 +provid 0 +involv 0 +implement 0 +tool 0 +implementationsof 0 +algorithm 0 +measur 0 +studi 0 +simul 0 +compon 0 +survei 0 +unvalid 0 +suffici 0 +done 0 +person 0 +larger 0 +smaller 0 +approv 0 +case 0 +basi 0 +summar 0 +result 0 +meet 0 +standard 0 +public 0 +qualiti 0 +well 0 +also 0 +ashort 0 +presentationabout 0 +presentationsher 0 +presen 0 +approxim 0 +manyan 0 +stubb 0 +andrew 0 +bigg 0 +franci 0 +salmon 0 +gunawan 0 +agu 0 +qingmin 0 +chien 0 +pang 0 +jame 0 +eric 0 +larsen 0 +conroi 0 +fritz 0 +craig 0 +jordan 0 +prasad 0 +deshpand 0 +avinash 0 +sodani 0 +basnei 0 +rajesh 0 +raman 0 +biswadeep 0 +taxiao 0 +yanm 0 +xinyu 0 +richard 0 +zhang 0 +todd 0 +munson 0 +wenjun 0 +xinyi 0 +yufei 0 +zeyu 0 +sridhar 0 +gopal 0 +michael 0 +leesolomon 0 +eduthu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..223bbd2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,4 @@ +homepag 1 +chiang 0 +time 0 +gradesgo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..52420e22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,123 @@ +fortran 1 +comput 0 +microsoft 0 +page 0 +home 0 +program 0 +us 0 +vectra 0 +room 0 +quota 0 +mail 0 +exercis 0 +write 0 +solut 0 +particular 0 +scienc 0 +window 0 +machin 0 +also 0 +howev 0 +copi 0 +modifi 0 +bestor 0 +section 0 +overal 0 +structur 0 +primarili 0 +gener 0 +problem 0 +solv 0 +code 0 +though 0 +want 0 +time 0 +algorithm 0 +even 0 +depend 0 +languag 0 +class 0 +follow 0 +mondai 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +seven 0 +dai 0 +week 0 +except 0 +certain 0 +holidai 0 +printer 0 +locat 0 +across 0 +hall 0 +print 0 +exce 0 +must 0 +contact 0 +either 0 +go 0 +offic 0 +hour 0 +prefer 0 +increas 0 +bewar 0 +aren 0 +configur 0 +correctli 0 +along 0 +wall 0 +closest 0 +outsidehallwai 0 +toward 0 +left 0 +hand 0 +part 0 +avoid 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +cover 0 +textbook 0 +work 0 +lab 0 +campu 0 +compil 0 +pleas 0 +first 0 +softwar 0 +includ 0 +netscap 0 +pointer 0 +interest 0 +jeff 0 +lampert 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +long 0 +night 0 +assign 0 +copyright 0 +gareth 0 +wisc 0 +last 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..dba21e7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,52 @@ +homepag 1 +page 1 +wisc 1 +homepagec 0 +homepagewelcom 0 +purpos 0 +provid 0 +student 0 +inform 0 +pertain 0 +section 0 +sinc 0 +chang 0 +frequent 0 +responsibilityto 0 +check 0 +often 0 +gener 0 +informationinstructor 0 +todd 0 +munsonemail 0 +tmunson 0 +eduoffic 0 +comput 0 +scienc 0 +statisticsoffic 0 +phone 0 +offic 0 +hour 0 +appointmentsect 0 +textbook 0 +problem 0 +solv 0 +walter 0 +savitchclass 0 +informationexpectationssyllabusexam 0 +schedule 0 +mailgradingl 0 +assignmentsextra 0 +creditpoliciesconsult 0 +responsibilitiesacadem 0 +misconductoth 0 +informationdaili 0 +note 0 +assignmentshomework 0 +assignmentsprogram 0 +document 0 +us 0 +classoth 0 +program 0 +resourcesc 0 +homepagetmunson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..66ba50c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,64 @@ +inform 1 +section 0 +handout 0 +class 0 +cours 0 +todd 0 +hour 0 +avail 0 +program 0 +get 0 +start 0 +read 0 +gener 0 +text 0 +syllabu 0 +assign 0 +home 0 +page 0 +turnidg 0 +comput 0 +scienc 0 +time 0 +place 0 +nolandinstructor 0 +turnidgeoffic 0 +tbalab 0 +tbaannouncementsclass 0 +note 0 +struct 0 +us 0 +facil 0 +grade 0 +polici 0 +tent 0 +solut 0 +collect 0 +date 0 +mail 0 +list 0 +send 0 +messag 0 +classa 0 +whole 0 +muchinform 0 +interest 0 +includ 0 +tutor 0 +consult 0 +window 0 +oper 0 +system 0 +email 0 +netscap 0 +find 0 +provid 0 +byother 0 +instructor 0 +help 0 +exampl 0 +gregorysharp 0 +difficulti 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..af1b6048 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,80 @@ +offic 1 +comput 0 +section 0 +program 0 +weaver 0 +scienc 0 +chri 0 +email 0 +polici 0 +statist 0 +hour 0 +exam 0 +wisc 0 +phone 0 +week 0 +noland 0 +text 0 +grade 0 +handout 0 +assign 0 +algebra 0 +languag 0 +announc 0 +thursdai 0 +psych 0 +reload 0 +page 0 +everi 0 +time 0 +login 0 +instructor 0 +appoint 0 +first 0 +grader 0 +zhang 0 +locat 0 +vectra 0 +seven 0 +dai 0 +staf 0 +consult 0 +gener 0 +cours 0 +info 0 +syllabu 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +addison 0 +weslei 0 +isbn 0 +includ 0 +errata 0 +sourc 0 +code 0 +misconduct 0 +policyassign 0 +homework 0 +read 0 +lectur 0 +note 0 +exampl 0 +quiz 0 +kei 0 +late 0 +style 0 +guidelin 0 +still 0 +rough 0 +print 0 +paper 0 +statement 0 +depart 0 +univers 0 +wisconsin 0 +madison 0 +last 0 +chang 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..2c9aaae4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,84 @@ +homework 1 +offic 0 +part 0 +assign 0 +advanc 0 +tabl 0 +content 0 +read 0 +project 0 +solut 0 +lectur 0 +full 0 +paper 0 +doit 0 +pipelin 0 +cach 0 +network 0 +multiprocessor 0 +hour 0 +note 0 +exam 0 +memori 0 +spring 0 +comput 0 +architectur 0 +cours 0 +inform 0 +engin 0 +hall 0 +phone 0 +wisc 0 +final 0 +main 0 +disk 0 +arrai 0 +interconnect 0 +technolog 0 +midterm 0 +rout 0 +offer 0 +instructor 0 +prof 0 +jame 0 +smith 0 +tue 0 +thur 0 +email 0 +princeoffic 0 +mail 0 +address 0 +princ 0 +new 0 +miscellaneousnew 0 +soln 0 +special 0 +biochemistri 0 +pmread 0 +overview 0 +introduct 0 +perform 0 +cost 0 +instruct 0 +set 0 +vector 0 +vliw 0 +limit 0 +softwar 0 +system 0 +trace 0 +list 0 +miscellan 0 +us 0 +tool 0 +review 0 +specmark 0 +consid 0 +harm 0 +analysi 0 +clock 0 +detail 0 +design 0 +reserv 0 +station 0 +summari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..7e164502 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,244 @@ +surfac 1 +homework 0 +cours 0 +graphic 0 +cornel 0 +comput 0 +project 0 +object 0 +math 0 +field 0 +univers 0 +scientif 0 +visual 0 +schedul 0 +anim 0 +color 0 +upson 0 +group 0 +theori 0 +center 0 +topic 0 +model 0 +transform 0 +volum 0 +data 0 +march 0 +prelim 0 +final 0 +page 0 +atth 0 +alan 0 +watt 0 +also 0 +content 0 +summari 0 +administrivia 0 +textbook 0 +error 0 +staff 0 +mathemat 0 +program 0 +skill 0 +polygon 0 +list 0 +parametr 0 +oper 0 +quadric 0 +hierarch 0 +geometr 0 +kinemat 0 +dynam 0 +system 0 +view 0 +camera 0 +clip 0 +vision 0 +render 0 +shade 0 +light 0 +human 0 +space 0 +method 0 +textur 0 +map 0 +dimension 0 +scalar 0 +vector 0 +line 0 +april 0 +test 0 +spring 0 +religi 0 +holidai 0 +student 0 +exam 0 +last 0 +bruce 0 +land 0 +huang 0 +comment 0 +main 0 +pagecomput 0 +scienc 0 +evolv 0 +incomplet 0 +hopefulli 0 +us 0 +begin 0 +home 0 +forc 0 +deal 0 +level 0 +principl 0 +practic 0 +folei 0 +computergraph 0 +current 0 +taught 0 +bruceland 0 +leader 0 +relev 0 +requir 0 +artist 0 +among 0 +other 0 +fundament 0 +focus 0 +associ 0 +designedto 0 +help 0 +illumin 0 +cover 0 +follow 0 +year 0 +construct 0 +explicit 0 +figur 0 +rotat 0 +swept 0 +tensor 0 +product 0 +viewer 0 +implicit 0 +blobbi 0 +tessel 0 +normal 0 +simpl 0 +form 0 +complex 0 +scene 0 +composit 0 +introduct 0 +homogen 0 +coordin 0 +build 0 +combin 0 +prototyp 0 +mimic 0 +connect 0 +rigid 0 +part 0 +invers 0 +differenti 0 +equat 0 +cellular 0 +automata 0 +onto 0 +screen 0 +parallel 0 +perspect 0 +simul 0 +stereo 0 +devic 0 +limit 0 +optic 0 +wave 0 +gourand 0 +phong 0 +hidden 0 +remov 0 +buffer 0 +transpar 0 +shadow 0 +scan 0 +convers 0 +anti 0 +alias 0 +pixel 0 +vernier 0 +hyper 0 +acuiti 0 +imag 0 +properti 0 +modif 0 +bump 0 +aspect 0 +wall 0 +channel 0 +contour 0 +mispercept 0 +difficulti 0 +arrow 0 +particl 0 +advect 0 +multiparamet 0 +high 0 +assign 0 +first 0 +serv 0 +gener 0 +guid 0 +style 0 +break 0 +educationlaw 0 +mandat 0 +faculti 0 +make 0 +avail 0 +opportun 0 +tomak 0 +examin 0 +miss 0 +belief 0 +inord 0 +facilit 0 +prepar 0 +makeup 0 +intendingto 0 +absent 0 +order 0 +observ 0 +requestedto 0 +notifi 0 +instructor 0 +lectur 0 +period 0 +tuesdai 0 +mean 0 +standard 0 +deviat 0 +rhode 0 +jing 0 +justin 0 +mccune 0 +jmccune 0 +csrelev 0 +california 0 +davi 0 +waterloo 0 +wale 0 +colleg 0 +cardiff 0 +manchest 0 +oregon 0 +state 0 +universityrel 0 +onlin 0 +document 0 +welcom 0 +sent 0 +todoc 0 +modifi 0 +copyright 0 +statement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..bad3422a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,96 @@ +cornel 1 +visual 1 +exercis 1 +graphic 0 +current 0 +project 0 +theori 0 +center 0 +comput 0 +laboratori 0 +page 0 +materi 0 +procedur 0 +student 0 +computergraph 0 +scientif 0 +surfac 0 +transform 0 +model 0 +camera 0 +textur 0 +anim 0 +mark 0 +relat 0 +topic 0 +comment 0 +main 0 +pagecomput 0 +scienc 0 +exercisesthi 0 +site 0 +universityundergradu 0 +contain 0 +includ 0 +softwar 0 +result 0 +section 0 +deal 0 +level 0 +principl 0 +practic 0 +folei 0 +dcomput 0 +watt 0 +taught 0 +bruceland 0 +also 0 +leader 0 +group 0 +atth 0 +first 0 +place 0 +sigucc 0 +basededuc 0 +train 0 +competit 0 +get 0 +start 0 +build 0 +polygon 0 +object 0 +parametr 0 +us 0 +virtual 0 +perspect 0 +light 0 +bump 0 +map 0 +design 0 +physic 0 +base 0 +implicit 0 +done 0 +order 0 +note 0 +areinclud 0 +refer 0 +chat 0 +facil 0 +commun 0 +aboutc 0 +spring 0 +semesteraccess 0 +restrict 0 +enrol 0 +final 0 +onlin 0 +document 0 +welcom 0 +sent 0 +todoc 0 +last 0 +modifi 0 +land 0 +copyright 0 +statement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..f637b9f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,65 @@ +topic 1 +fall 0 +group 0 +student 0 +model 0 +imag 0 +system 0 +us 0 +cornel 0 +comput 0 +graphic 0 +semestereach 0 +chose 0 +current 0 +research 0 +computergraph 0 +read 0 +appropri 0 +paper 0 +implement 0 +code 0 +deliv 0 +lectur 0 +chosen 0 +anddocu 0 +work 0 +document 0 +metabal 0 +window 0 +michael 0 +arcuri 0 +alex 0 +benton 0 +human 0 +facial 0 +express 0 +huang 0 +hung 0 +content 0 +base 0 +retriev 0 +interior 0 +design 0 +sean 0 +landi 0 +interdepend 0 +particl 0 +justin 0 +mccune 0 +visual 0 +diffus 0 +distribut 0 +pollut 0 +spatial 0 +explicit 0 +landscap 0 +modelsfu 0 +tsai 0 +antialias 0 +video 0 +stochast 0 +sampl 0 +arun 0 +vermach 0 +hsun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..517bee27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,65 @@ +sandia 1 +livermor 1 +thesi 1 +vavasi 1 +research 1 +scientif 1 +comput 1 +least 1 +squar 1 +patti 0 +houghpatti 0 +hough 0 +nation 0 +laboratori 0 +cornel 0 +student 0 +center 0 +appli 0 +mathemat 0 +whichi 0 +hous 0 +frankh 0 +rhode 0 +hall 0 +cornellunivers 0 +advisor 0 +steve 0 +member 0 +committe 0 +nicktrefethen 0 +schatz 0 +interest 0 +fall 0 +area 0 +numer 0 +linearalgebra 0 +optimizationi 0 +current 0 +work 0 +postdoc 0 +juan 0 +meza 0 +depart 0 +nationallaboratori 0 +resum 0 +statement 0 +goal 0 +tech 0 +report 0 +complet 0 +orthogon 0 +decomposit 0 +weight 0 +appear 0 +siam 0 +matrix 0 +anal 0 +stabl 0 +effici 0 +solut 0 +ofweight 0 +problem 0 +applic 0 +interior 0 +pointmethod 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..e6c62bdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,85 @@ +postscript 1 +trefethen 0 +research 0 +page 0 +fluid 0 +baggett 0 +mathemat 0 +cornel 0 +univers 0 +year 0 +thesi 0 +dynam 0 +like 0 +interest 0 +model 0 +transit 0 +turbul 0 +physic 0 +submit 0 +jeff 0 +baggettjeff 0 +center 0 +appli 0 +frank 0 +rhode 0 +hall 0 +ithaca 0 +sixth 0 +graduat 0 +student 0 +depart 0 +atcornel 0 +supervis 0 +nick 0 +expect 0 +finish 0 +titl 0 +normal 0 +applic 0 +hydrodynam 0 +stabil 0 +summer 0 +would 0 +continu 0 +seek 0 +posit 0 +detail 0 +outlin 0 +curriculum 0 +vita 0 +activ 0 +background 0 +unusu 0 +blend 0 +scientif 0 +comput 0 +system 0 +mechan 0 +propos 0 +work 0 +iwould 0 +next 0 +coupl 0 +paper 0 +mostli 0 +linear 0 +driscol 0 +april 0 +exponenti 0 +type 0 +versu 0 +spectral 0 +abscissa 0 +hill 0 +andphillip 0 +exampl 0 +integr 0 +equat 0 +oper 0 +theori 0 +dimension 0 +subcrit 0 +misc 0 +link 0 +satish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..8f5df6d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,71 @@ +ncstrl 1 +report 0 +search 0 +comput 0 +scienc 0 +technic 0 +collect 0 +network 0 +field 0 +sever 0 +institut 0 +word 0 +list 0 +document 0 +brows 0 +cornel 0 +librari 0 +libraryncstrl 0 +pronounc 0 +ancestr 0 +internationalcollect 0 +departmentsand 0 +industri 0 +govern 0 +research 0 +laboratori 0 +made 0 +availablefor 0 +commerci 0 +eduat 0 +ncstrlcollect 0 +distribut 0 +among 0 +interoper 0 +serversoper 0 +participatinginstitut 0 +read 0 +offici 0 +ncstrlpress 0 +packag 0 +descript 0 +background 0 +goal 0 +andorgan 0 +form 0 +allow 0 +perform 0 +bibliograph 0 +data 0 +limit 0 +specif 0 +enter 0 +whose 0 +author 0 +titl 0 +abstract 0 +contain 0 +theparticip 0 +want 0 +join 0 +tell 0 +moreread 0 +forinstitut 0 +interest 0 +particip 0 +informationfind 0 +snew 0 +relat 0 +send 0 +email 0 +totech 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..dc7e2e56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu @@ -0,0 +1,33 @@ +design 1 +research 1 +institut 1 +technic 1 +instituteabout 0 +researchersat 0 +brows 0 +searchal 0 +public 0 +file 0 +reportssearch 0 +report 0 +ipic 0 +home 0 +page 0 +intern 0 +work 0 +confer 0 +integr 0 +enterpris 0 +informationand 0 +process 0 +anoth 0 +site 0 +inform 0 +itisingapor 0 +altavistaforum 0 +send 0 +question 0 +comment 0 +server 0 +mike 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..49eecd4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,177 @@ +us 1 +question 0 +resourc 0 +design 0 +gener 0 +interest 0 +access 0 +work 0 +project 0 +comput 0 +technic 0 +knowledg 0 +also 0 +cours 0 +student 0 +whether 0 +davi 0 +cornel 0 +system 0 +futur 0 +increas 0 +structur 0 +inform 0 +report 0 +corpor 0 +group 0 +memori 0 +mean 0 +institut 0 +product 0 +document 0 +share 0 +staff 0 +answer 0 +remot 0 +machin 0 +seem 0 +research 0 +institutejim 0 +davisxerox 0 +corporationphd 0 +media 0 +edumi 0 +goal 0 +build 0 +softwar 0 +improvecommun 0 +among 0 +peopl 0 +believ 0 +commun 0 +medium 0 +ofth 0 +understand 0 +andcont 0 +messag 0 +transmit 0 +manipul 0 +reformat 0 +even 0 +content 0 +inhypertext 0 +network 0 +collabor 0 +thecstr 0 +anarpa 0 +sponsor 0 +effort 0 +make 0 +scienc 0 +moreeasili 0 +part 0 +distribut 0 +server 0 +whichi 0 +run 0 +mani 0 +univers 0 +electronicsystem 0 +captur 0 +produc 0 +worker 0 +order 0 +qualiti 0 +ofor 0 +reduc 0 +time 0 +requir 0 +memoryinclud 0 +intellectu 0 +engin 0 +lawyer 0 +contract 0 +author 0 +sscreenplai 0 +process 0 +producedth 0 +dead 0 +end 0 +explor 0 +tool 0 +andjustif 0 +support 0 +final 0 +decis 0 +begun 0 +huttenloch 0 +developingcorpor 0 +sharedannot 0 +investig 0 +howpeopl 0 +read 0 +write 0 +annot 0 +inelectron 0 +initi 0 +prototypeimplement 0 +class 0 +shareddocu 0 +problem 0 +set 0 +note 0 +nnotat 0 +might 0 +berequest 0 +clarif 0 +orcorrect 0 +made 0 +aus 0 +obtain 0 +willfind 0 +sourc 0 +learn 0 +whetherstud 0 +often 0 +abl 0 +correctli 0 +find 0 +usefulmean 0 +feedback 0 +improv 0 +evid 0 +isthat 0 +natur 0 +languag 0 +designof 0 +proxi 0 +agent 0 +safe 0 +reliablycarri 0 +foreign 0 +without 0 +risk 0 +toeither 0 +owner 0 +alsopap 0 +onlin 0 +copi 0 +publicatiion 0 +list 0 +especi 0 +thedrimi 0 +collect 0 +meprofession 0 +historythi 0 +narr 0 +resum 0 +contact 0 +improvisationi 0 +sport 0 +resumeno 0 +market 0 +thank 0 +ask 0 +likeit 0 +fine 0 +xerox 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..c89c67e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,28 @@ +search 1 +metacrawl 0 +erik 0 +selberg 0 +greg 0 +oren 0 +home 0 +searchingmetacrawlerbi 0 +lauckhartand 0 +prof 0 +etzioniif 0 +person 0 +page 0 +ahoi 0 +phrase 0 +word 0 +wordssort 0 +result 0 +relev 0 +locationcontrol 0 +java 0 +configur 0 +problemswebmast 0 +comcopyright 0 +etzioni 0 +lauckhart 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..f0e27561 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,99 @@ +robot 1 +madison 0 +sensor 0 +plan 0 +research 0 +base 0 +motion 0 +ieee 0 +univers 0 +page 0 +engin 0 +relat 0 +project 0 +kinemat 0 +group 0 +system 0 +comput 0 +link 0 +wisconsin 0 +home 0 +mechan 0 +avenuemadison 0 +vladimir 0 +lumelski 0 +director 0 +brief 0 +review 0 +activ 0 +peopl 0 +technic 0 +report 0 +compress 0 +postscript 0 +avail 0 +simul 0 +librari 0 +cours 0 +seminar 0 +recent 0 +select 0 +public 0 +abstract 0 +maze 0 +search 0 +algorithm 0 +effect 0 +dynam 0 +control 0 +jogger 0 +model 0 +sens 0 +decentr 0 +intellig 0 +special 0 +topic 0 +tether 0 +underwat 0 +redund 0 +sensit 0 +skin 0 +human 0 +center 0 +geometri 0 +neil 0 +duffi 0 +manufactur 0 +chuck 0 +dyer 0 +vision 0 +robert 0 +lorenz 0 +actuat 0 +jude 0 +shavlik 0 +machin 0 +learn 0 +societi 0 +autom 0 +tech 0 +committe 0 +path 0 +internet 0 +resourc 0 +server 0 +nasa 0 +telerobot 0 +program 0 +frequent 0 +ask 0 +question 0 +list 0 +local 0 +dept 0 +colleg 0 +comment 0 +suggest 0 +errata 0 +hert 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..bfeef105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,40 @@ +engin 1 +comput 1 +robot 0 +dept 0 +ieee 0 +vladimir 0 +home 0 +page 0 +scienc 0 +link 0 +mathemat 0 +lumelski 0 +lumelskyprofessormechan 0 +underwat 0 +kinemat 0 +redund 0 +sensit 0 +skin 0 +project 0 +human 0 +center 0 +system 0 +geometri 0 +global 0 +societi 0 +autom 0 +tech 0 +committe 0 +motion 0 +path 0 +plan 0 +wisconsin 0 +colleg 0 +mechan 0 +electr 0 +graduat 0 +program 0 +mace 0 +grant 0 +institut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..2b9aed42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,196 @@ +system 1 +vogel 0 +high 0 +werner 0 +commun 0 +network 0 +distribut 0 +speed 0 +proceed 0 +comput 0 +cornel 0 +protocol 0 +design 0 +cluster 0 +horu 0 +need 0 +mechan 0 +level 0 +messag 0 +failur 0 +group 0 +robbert 0 +reness 0 +eicken 0 +noth 0 +left 0 +take 0 +project 0 +think 0 +latenc 0 +support 0 +highli 0 +real 0 +time 0 +issu 0 +environ 0 +oper 0 +guarante 0 +applic 0 +abl 0 +detect 0 +birman 0 +appear 0 +sigop 0 +workshopconnamoran 0 +ierland 0 +septemb 0 +thorsten 0 +perform 0 +researchera 0 +upson 0 +halldept 0 +sciencecornel 0 +univers 0 +ithaca 0 +phone 0 +email 0 +perfect 0 +reach 0 +awai 0 +involv 0 +major 0 +scienc 0 +depart 0 +thehorusand 0 +myresearch 0 +interest 0 +best 0 +describ 0 +bandwith 0 +reliabl 0 +requir 0 +focu 0 +engin 0 +thing 0 +amwork 0 +predict 0 +execut 0 +integr 0 +horuswith 0 +lead 0 +situat 0 +reason 0 +advanc 0 +effici 0 +data 0 +transfer 0 +devic 0 +bandwidth 0 +small 0 +fallen 0 +behind 0 +softwar 0 +adapt 0 +achiev 0 +desir 0 +latencyfor 0 +pass 0 +protocolsar 0 +exploit 0 +structureand 0 +interact 0 +pattern 0 +method 0 +deal 0 +miss 0 +deadlin 0 +meet 0 +guarant 0 +gave 0 +tell 0 +anyon 0 +acur 0 +want 0 +aglob 0 +scope 0 +find 0 +gener 0 +supportfailur 0 +suspis 0 +manag 0 +process 0 +node 0 +experi 0 +extract 0 +workwith 0 +middlewar 0 +packag 0 +regardless 0 +function 0 +brainchild 0 +andken 0 +work 0 +done 0 +cooper 0 +withthorsten 0 +multimedia 0 +video 0 +demand 0 +horusexperi 0 +concert 0 +brian 0 +smith 0 +respons 0 +practicum 0 +teach 0 +number 0 +lectureson 0 +technolog 0 +practic 0 +recent 0 +public 0 +world 0 +wide 0 +structur 0 +virtual 0 +synchroni 0 +explor 0 +bound 0 +virtuallysynchron 0 +katherin 0 +user 0 +interfacefor 0 +parallel 0 +anindya 0 +basu 0 +vineet 0 +buch 0 +symposium 0 +princpl 0 +copper 0 +mountain 0 +decemb 0 +deliv 0 +third 0 +ieee 0 +workshop 0 +architectur 0 +implementationof 0 +subsystem 0 +hpc 0 +august 0 +flexibl 0 +kenneth 0 +brad 0 +glade 0 +kati 0 +mark 0 +hayden 0 +takako 0 +hickei 0 +dalia 0 +malki 0 +alex 0 +vaysburd 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..91f883aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,265 @@ +horu 1 +system 0 +osiri 0 +project 0 +develop 0 +distribut 0 +isi 0 +commun 0 +applic 0 +egyptian 0 +softwar 0 +ensembl 0 +compani 0 +god 0 +rejoic 0 +whose 0 +groupcommun 0 +requir 0 +life 0 +oppos 0 +death 0 +framework 0 +comput 0 +toler 0 +data 0 +replic 0 +design 0 +effort 0 +gener 0 +purpos 0 +us 0 +version 0 +activ 0 +environ 0 +high 0 +network 0 +found 0 +dead 0 +bodi 0 +made 0 +introduct 0 +kenneth 0 +birman 0 +robbert 0 +reness 0 +projectth 0 +ofth 0 +come 0 +heart 0 +firm 0 +triumphant 0 +ofisi 0 +heir 0 +modular 0 +extens 0 +process 0 +address 0 +wide 0 +varieti 0 +robust 0 +attribut 0 +appealedstrongli 0 +egypt 0 +becausein 0 +everi 0 +woman 0 +type 0 +wish 0 +possess 0 +renew 0 +movementa 0 +inact 0 +provid 0 +applicationsbas 0 +group 0 +style 0 +aris 0 +infault 0 +manag 0 +thatexploit 0 +coher 0 +cach 0 +groupwar 0 +within 0 +theoveral 0 +larg 0 +collect 0 +applicationprotocol 0 +allow 0 +toconstruct 0 +modul 0 +exactli 0 +meet 0 +applicationrequir 0 +minim 0 +cost 0 +origin 0 +launch 0 +redesign 0 +theisi 0 +evolv 0 +communicationarchitectur 0 +advanc 0 +support 0 +robustdistribut 0 +set 0 +unsuit 0 +asappl 0 +special 0 +secur 0 +real 0 +time 0 +besidesth 0 +practic 0 +contribut 0 +toward 0 +theori 0 +virtual 0 +synchroni 0 +runtim 0 +model 0 +usedfor 0 +implement 0 +fault 0 +sametim 0 +much 0 +faster 0 +lighter 0 +weight 0 +exist 0 +initi 0 +code 0 +beus 0 +research 0 +restrict 0 +commericalright 0 +call 0 +written 0 +usabl 0 +manyoth 0 +languag 0 +avail 0 +class 0 +user 0 +nofe 0 +seri 0 +ofreleas 0 +fall 0 +spring 0 +earli 0 +ensemblewil 0 +outstand 0 +build 0 +java 0 +base 0 +groupwareappl 0 +multimedia 0 +conferenc 0 +platform 0 +independ 0 +areavail 0 +differentclass 0 +workstat 0 +person 0 +parallel 0 +processor 0 +onnext 0 +cluster 0 +standard 0 +speedcommun 0 +collabor 0 +close 0 +mani 0 +systemsproject 0 +includ 0 +transi 0 +navtech 0 +stormcast 0 +tacomaproject 0 +link 0 +elsewher 0 +thesepag 0 +circumst 0 +begotten 0 +gain 0 +good 0 +idea 0 +froma 0 +sorrow 0 +mother 0 +herhusband 0 +describ 0 +goddess 0 +greatli 0 +distress 0 +equippedwith 0 +mighti 0 +word 0 +power 0 +knew 0 +utter 0 +mighthav 0 +greatest 0 +effect 0 +search 0 +never 0 +rest 0 +hair 0 +light 0 +wing 0 +stir 0 +lament 0 +brother 0 +length 0 +brought 0 +state 0 +unit 0 +thu 0 +becam 0 +child 0 +born 0 +secret 0 +place 0 +suckl 0 +rear 0 +horusvisit 0 +papersand 0 +abstractpag 0 +overview 0 +public 0 +report 0 +relatedto 0 +follow 0 +recent 0 +articl 0 +present 0 +level 0 +reliabl 0 +scientif 0 +american 0 +silvano 0 +maffei 0 +flexiblegroup 0 +april 0 +final 0 +sentenc 0 +hyme 0 +osirisfrom 0 +papyru 0 +better 0 +know 0 +book 0 +walli 0 +budg 0 +studiesin 0 +mytholog 0 +volum 0 +page 0 +open 0 +court 0 +publish 0 +london 0 +comment 0 +werner 0 +vogel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..de723d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,87 @@ +depart 1 +cornel 0 +report 0 +research 0 +page 0 +gener 0 +faculti 0 +annual 0 +home 0 +comput 0 +scienc 0 +info 0 +project 0 +know 0 +inform 0 +also 0 +find 0 +check 0 +go 0 +cours 0 +peopl 0 +server 0 +academ 0 +tech 0 +welcom 0 +universitydepart 0 +site 0 +feel 0 +free 0 +brows 0 +around 0 +infoget 0 +locat 0 +size 0 +contactswithin 0 +standard 0 +disclaim 0 +facultyfind 0 +list 0 +offici 0 +ortheir 0 +person 0 +researchcheck 0 +aboutour 0 +collabor 0 +publicationsfind 0 +link 0 +public 0 +researcherseith 0 +technic 0 +projector 0 +degreeslook 0 +degre 0 +program 0 +doctor 0 +master 0 +engin 0 +orundergradu 0 +academicsrefer 0 +taught 0 +webfor 0 +semest 0 +read 0 +generalcoursedescript 0 +appear 0 +studi 0 +peopleget 0 +outstand 0 +keep 0 +includ 0 +staff 0 +student 0 +directorylist 0 +activitiesfind 0 +activ 0 +theassoci 0 +undergradu 0 +excellenthockei 0 +team 0 +serverscheck 0 +gopherserv 0 +anonym 0 +ftpserver 0 +sitesquest 0 +comment 0 +informationpres 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..3b0da087 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,242 @@ +text 1 +retriev 0 +inform 0 +automat 0 +hypertext 0 +univers 0 +research 0 +larg 0 +us 0 +analysi 0 +system 0 +cornel 0 +process 0 +nation 0 +novemb 0 +allan 0 +develop 0 +avail 0 +document 0 +search 0 +gener 0 +purpos 0 +base 0 +databas 0 +link 0 +encyclopedia 0 +full 0 +smart 0 +librari 0 +ohio 0 +util 0 +comput 0 +proceed 0 +annual 0 +rapidli 0 +exist 0 +high 0 +sophist 0 +user 0 +queri 0 +file 0 +oper 0 +subject 0 +corpu 0 +determin 0 +similar 0 +main 0 +structur 0 +collect 0 +capabl 0 +consist 0 +differ 0 +version 0 +sigir 0 +confer 0 +germani 0 +genom 0 +medicin 0 +bethesda 0 +maryland 0 +june 0 +columbu 0 +octob 0 +seattl 0 +washington 0 +associ 0 +machineri 0 +york 0 +bucklei 0 +nevada 0 +home 0 +denis 0 +gerard 0 +saltongerard 0 +saltonprofessorg 0 +eduph 0 +harvard 0 +natur 0 +languag 0 +expand 0 +field 0 +mass 0 +machin 0 +readabl 0 +cheapli 0 +store 0 +densiti 0 +optic 0 +storag 0 +media 0 +demand 0 +furthermor 0 +method 0 +analyz 0 +formul 0 +appropri 0 +conduct 0 +rapid 0 +rank 0 +item 0 +decreas 0 +order 0 +import 0 +design 0 +environ 0 +handl 0 +without 0 +restrict 0 +size 0 +matter 0 +absenc 0 +knowledg 0 +would 0 +unrestrict 0 +mean 0 +word 0 +express 0 +refin 0 +context 0 +statist 0 +probabilist 0 +criteria 0 +approach 0 +abl 0 +degre 0 +accuraci 0 +applic 0 +semant 0 +piec 0 +represent 0 +provid 0 +flexibl 0 +brows 0 +access 0 +interest 0 +excerpt 0 +respons 0 +done 0 +extens 0 +work 0 +autom 0 +articl 0 +funk 0 +wagnal 0 +addit 0 +also 0 +trec 0 +cover 0 +number 0 +area 0 +gigabyt 0 +servic 0 +well 0 +relat 0 +section 0 +paragraph 0 +sentenc 0 +test 0 +vehicl 0 +continu 0 +current 0 +unix 0 +sparc 0 +station 0 +termin 0 +equip 0 +activitiesmemb 0 +engin 0 +colleg 0 +committeeprofession 0 +activitiesassoci 0 +editor 0 +transact 0 +systemsprogram 0 +committe 0 +seventeenth 0 +dublin 0 +ireland 0 +electron 0 +publish 0 +darmstadt 0 +multimedia 0 +hypermedia 0 +virtual 0 +realiti 0 +moscow 0 +septemb 0 +lecturesautomat 0 +construct 0 +feder 0 +institut 0 +technolog 0 +zurich 0 +switzerland 0 +progress 0 +konstanz 0 +asi 0 +meet 0 +scienc 0 +colloquium 0 +state 0 +lectur 0 +cours 0 +microsoft 0 +corpor 0 +workshop 0 +publicationsapproach 0 +passag 0 +select 0 +travers 0 +commun 0 +februari 0 +vector 0 +model 0 +third 0 +symposium 0 +vega 0 +april 0 +softwareth 0 +made 0 +free 0 +charg 0 +sever 0 +hundr 0 +copi 0 +distribut 0 +around 0 +world 0 +return 0 +list 0 +faculti 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..4893d499 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,87 @@ +minim 1 +problem 0 +trust 0 +region 0 +method 0 +optim 0 +cornel 0 +gener 0 +interest 0 +scientif 0 +comput 0 +engin 0 +nonlinearli 0 +constrain 0 +nonlinear 0 +research 0 +techniqu 0 +solv 0 +includ 0 +converg 0 +us 0 +subject 0 +bound 0 +confer 0 +chines 0 +young 0 +scientist 0 +home 0 +denis 0 +yui 0 +liyui 0 +liresearch 0 +associateyui 0 +eduph 0 +univers 0 +waterloo 0 +numer 0 +also 0 +appli 0 +real 0 +world 0 +current 0 +theori 0 +unconstrain 0 +particular 0 +exist 0 +accept 0 +condit 0 +investig 0 +affin 0 +scale 0 +function 0 +analysi 0 +exact 0 +penalti 0 +approach 0 +applic 0 +consid 0 +imag 0 +enhanc 0 +lecturesan 0 +interior 0 +beij 0 +china 0 +august 0 +publicationsa 0 +global 0 +siam 0 +journal 0 +center 0 +reflect 0 +proceed 0 +return 0 +list 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..50d58958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,178 @@ +comput 1 +mathemat 0 +depart 0 +object 0 +york 0 +us 0 +architectur 0 +system 0 +algebra 0 +polynomi 0 +present 0 +israel 0 +research 0 +cornel 0 +symbol 0 +scientif 0 +toolkit 0 +engin 0 +program 0 +workshop 0 +algorithm 0 +intern 0 +american 0 +societi 0 +region 0 +meet 0 +syracus 0 +decemb 0 +dawson 0 +dean 0 +haifa 0 +januari 0 +focus 0 +softwar 0 +techniqu 0 +gener 0 +fluid 0 +dynam 0 +differenti 0 +equat 0 +studi 0 +code 0 +weyl 0 +function 0 +languag 0 +provid 0 +number 0 +journal 0 +symposium 0 +septemb 0 +dexter 0 +kozen 0 +susan 0 +landau 0 +vista 0 +microstorag 0 +orient 0 +joint 0 +electr 0 +technion 0 +scienc 0 +ronitt 0 +rubinfeld 0 +page 0 +databas 0 +home 0 +denis 0 +richard 0 +zippelrichard 0 +zippelsenior 0 +associaterz 0 +eduph 0 +modern 0 +autom 0 +current 0 +work 0 +allow 0 +scientist 0 +specifi 0 +perform 0 +numer 0 +machin 0 +calcul 0 +done 0 +convert 0 +suppli 0 +method 0 +special 0 +colleagu 0 +mechan 0 +aerospac 0 +aris 0 +boundari 0 +layer 0 +compon 0 +substrat 0 +call 0 +extend 0 +data 0 +structur 0 +avail 0 +common 0 +lisp 0 +includ 0 +like 0 +matric 0 +ration 0 +ring 0 +vector 0 +space 0 +ideal 0 +introduct 0 +challeng 0 +type 0 +opportun 0 +deduct 0 +reason 0 +pursu 0 +profession 0 +activitieseditori 0 +board 0 +transact 0 +softwareprogram 0 +committe 0 +principl 0 +practic 0 +constraint 0 +refere 0 +review 0 +error 0 +correct 0 +computationlecturesalgebra 0 +decomposit 0 +effect 0 +irreduc 0 +test 0 +oper 0 +durham 0 +north 0 +carolina 0 +center 0 +synthes 0 +weizmann 0 +institut 0 +rehovot 0 +suni 0 +albani 0 +april 0 +modular 0 +interpol 0 +factor 0 +multivari 0 +theori 0 +ithaca 0 +publicationseffect 0 +kluwer 0 +academ 0 +publish 0 +boston 0 +massachusett 0 +june 0 +implement 0 +file 0 +proceed 0 +return 0 +list 0 +annual 0 +report 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..632e4162 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,217 @@ +comput 1 +horu 1 +distribut 0 +system 0 +develop 0 +group 0 +work 0 +isi 0 +us 0 +program 0 +commun 0 +secur 0 +fault 0 +toler 0 +model 0 +virtual 0 +process 0 +effort 0 +side 0 +softwar 0 +tool 0 +also 0 +depart 0 +committe 0 +member 0 +cornel 0 +reliabl 0 +california 0 +problem 0 +reconfigur 0 +practic 0 +toolkit 0 +support 0 +synchroni 0 +real 0 +time 0 +featur 0 +extens 0 +layer 0 +avail 0 +technolog 0 +techniqu 0 +parallel 0 +graduat 0 +student 0 +privaci 0 +high 0 +prove 0 +implement 0 +studi 0 +notabl 0 +friedman 0 +collabor 0 +head 0 +reness 0 +engin 0 +scienc 0 +chief 0 +consist 0 +ieee 0 +home 0 +denis 0 +kenneth 0 +birmankenneth 0 +birmanprofessorphd 0 +univ 0 +berkelei 0 +research 0 +concern 0 +oper 0 +focu 0 +base 0 +upon 0 +synchron 0 +solv 0 +manag 0 +replic 0 +data 0 +coordin 0 +action 0 +set 0 +perform 0 +dynam 0 +done 0 +provid 0 +although 0 +limit 0 +certain 0 +class 0 +reason 0 +benign 0 +failur 0 +theoret 0 +start 0 +call 0 +becam 0 +wide 0 +popular 0 +intend 0 +flexibl 0 +address 0 +issu 0 +import 0 +permit 0 +special 0 +purpos 0 +basic 0 +idea 0 +user 0 +actual 0 +broad 0 +collect 0 +option 0 +seek 0 +leverag 0 +emerg 0 +network 0 +activ 0 +messag 0 +origin 0 +supercomput 0 +embodi 0 +advanc 0 +mike 0 +reiter 0 +david 0 +cooper 0 +unusu 0 +combin 0 +singl 0 +packag 0 +fundament 0 +look 0 +specifi 0 +properti 0 +structur 0 +languag 0 +execut 0 +refer 0 +major 0 +goal 0 +constabl 0 +nuprl 0 +latter 0 +correctli 0 +former 0 +ad 0 +guarante 0 +mark 0 +hayden 0 +probabilist 0 +broadcast 0 +primit 0 +much 0 +architectur 0 +robbert 0 +werner 0 +vogel 0 +aspect 0 +includ 0 +object 0 +orient 0 +multimedia 0 +applic 0 +speed 0 +protocol 0 +exploit 0 +within 0 +thorsten 0 +eicken 0 +brian 0 +smith 0 +univers 0 +activitieschair 0 +polici 0 +act 0 +master 0 +faculti 0 +recruit 0 +academ 0 +leadership 0 +profession 0 +activitieseditor 0 +transact 0 +scientist 0 +isat 0 +robust 0 +critic 0 +element 0 +nation 0 +inform 0 +infrastructur 0 +publicationsth 0 +approach 0 +decemb 0 +integr 0 +runtim 0 +journal 0 +birman 0 +societi 0 +press 0 +alamito 0 +glade 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..ddd46803 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,150 @@ +graphic 1 +comput 0 +scienc 0 +model 0 +univers 0 +technolog 0 +research 0 +scientif 0 +surfac 0 +imag 0 +center 0 +visual 0 +cornel 0 +displai 0 +techniqu 0 +nation 0 +director 0 +involv 0 +develop 0 +input 0 +method 0 +topic 0 +algorithm 0 +alias 0 +complex 0 +environ 0 +realist 0 +gener 0 +conduct 0 +reflect 0 +strategi 0 +medic 0 +program 0 +foundat 0 +institut 0 +engin 0 +fellow 0 +home 0 +denis 0 +donald 0 +greenbergdonald 0 +greenberg 0 +jacob 0 +gould 0 +schurman 0 +prof 0 +past 0 +decad 0 +activ 0 +wide 0 +rang 0 +number 0 +implement 0 +progress 0 +made 0 +larg 0 +varieti 0 +routin 0 +previous 0 +investig 0 +includ 0 +polygon 0 +clip 0 +hidden 0 +textur 0 +spatial 0 +tempor 0 +problem 0 +geometr 0 +parametr 0 +descript 0 +color 0 +current 0 +focu 0 +three 0 +dimension 0 +synthesi 0 +modular 0 +testb 0 +suffici 0 +flexibl 0 +evalu 0 +differ 0 +creat 0 +laboratori 0 +light 0 +determin 0 +interact 0 +improv 0 +effici 0 +trace 0 +parallel 0 +process 0 +perceptu 0 +studi 0 +micro 0 +geometri 0 +motion 0 +control 0 +dynam 0 +constraint 0 +anti 0 +host 0 +relat 0 +applic 0 +start 0 +volum 0 +render 0 +digit 0 +photographi 0 +anim 0 +tool 0 +well 0 +core 0 +multi 0 +media 0 +within 0 +facil 0 +member 0 +particip 0 +brown 0 +california 0 +north 0 +carolina 0 +chapel 0 +hill 0 +utah 0 +activitiesdirector 0 +visualizationprofession 0 +activitieseditori 0 +board 0 +journal 0 +academi 0 +found 0 +american 0 +biolog 0 +acmreturn 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..7635fdc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,200 @@ +comput 1 +scienc 0 +complex 0 +univers 0 +journal 0 +engin 0 +academi 0 +board 0 +problem 0 +resourc 0 +class 0 +council 0 +fellow 0 +lectur 0 +committe 0 +foundat 0 +research 0 +solv 0 +intern 0 +time 0 +member 0 +nation 0 +system 0 +eatc 0 +cornel 0 +technolog 0 +studi 0 +need 0 +variou 0 +random 0 +ture 0 +award 0 +american 0 +associ 0 +springer 0 +verlag 0 +advisori 0 +theoret 0 +chicago 0 +electron 0 +novemb 0 +natur 0 +distinguish 0 +seri 0 +februari 0 +home 0 +denis 0 +juri 0 +hartmanisjuri 0 +hartmani 0 +walter 0 +read 0 +professor 0 +california 0 +institut 0 +strateg 0 +goal 0 +contribut 0 +develop 0 +comprehens 0 +theori 0 +quantit 0 +law 0 +govern 0 +essenti 0 +part 0 +base 0 +guid 0 +har 0 +exploit 0 +explos 0 +grow 0 +classifi 0 +amount 0 +classif 0 +yield 0 +consist 0 +within 0 +given 0 +bound 0 +gain 0 +deeper 0 +understand 0 +make 0 +hard 0 +explor 0 +relat 0 +structur 0 +also 0 +trade 0 +off 0 +differ 0 +particular 0 +attent 0 +sequenti 0 +parallel 0 +nondeterminist 0 +memori 0 +requir 0 +interact 0 +activitiesmemb 0 +faculti 0 +representativeschair 0 +depart 0 +recruit 0 +committeehonorsacm 0 +stearn 0 +foreign 0 +latvian 0 +art 0 +york 0 +state 0 +advanc 0 +aaa 0 +charter 0 +profession 0 +activitieseditor 0 +note 0 +siam 0 +monograph 0 +director 0 +ifip 0 +technic 0 +georg 0 +brown 0 +school 0 +rice 0 +houston 0 +texa 0 +peer 0 +visit 0 +physic 0 +divis 0 +advisor 0 +world 0 +scientif 0 +presseditori 0 +press 0 +editor 0 +sciencegoedel 0 +prize 0 +telecommun 0 +awardshonorari 0 +doctor 0 +degre 0 +dortmund 0 +germani 0 +lecturessom 0 +observ 0 +banquet 0 +speech 0 +logic 0 +program 0 +symposium 0 +scope 0 +futur 0 +virginia 0 +tennesse 0 +april 0 +publicationson 0 +commun 0 +octob 0 +oracl 0 +hypothesi 0 +fals 0 +august 0 +richard 0 +chang 0 +benni 0 +chor 0 +od 0 +goldreich 0 +johan 0 +hastad 0 +desh 0 +ranjan 0 +pankaj 0 +rohatgi 0 +hausdorff 0 +topolog 0 +dimens 0 +kolmogorov 0 +real 0 +line 0 +decemb 0 +weight 0 +bulletin 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..07d47bb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,130 @@ +comput 1 +research 1 +scienc 1 +inform 0 +technolog 0 +engin 0 +institut 0 +nation 0 +board 0 +journal 0 +dean 0 +univers 0 +colleg 0 +depart 0 +unit 0 +captur 0 +access 0 +collabor 0 +design 0 +scientif 0 +advisori 0 +academi 0 +american 0 +associ 0 +fellow 0 +intern 0 +home 0 +cornel 0 +denis 0 +john 0 +hopcroftjohn 0 +hopcroftjoseph 0 +silbert 0 +engineeringprofessor 0 +sciencephd 0 +stanford 0 +januari 0 +appoint 0 +overse 0 +academ 0 +compris 0 +well 0 +variou 0 +involv 0 +continu 0 +robust 0 +geometr 0 +algorithm 0 +model 0 +simul 0 +develop 0 +facilit 0 +within 0 +environ 0 +among 0 +distribut 0 +databas 0 +persist 0 +object 0 +storag 0 +document 0 +imag 0 +process 0 +manag 0 +multimedia 0 +user 0 +interfac 0 +heterogen 0 +data 0 +knowledg 0 +represent 0 +organ 0 +remot 0 +profession 0 +activitiesmemb 0 +council 0 +commiss 0 +physic 0 +mathemat 0 +applicationsmemb 0 +boardmemb 0 +state 0 +forcememb 0 +engineeringfellow 0 +art 0 +sciencesfellow 0 +advanc 0 +aaa 0 +electr 0 +electron 0 +ieee 0 +machinerychairman 0 +siam 0 +trusteesmemb 0 +committe 0 +david 0 +lucil 0 +packard 0 +foundationmemb 0 +sloan 0 +fellowship 0 +committeeadvisori 0 +supercomput 0 +center 0 +defens 0 +analysiseditor 0 +oxford 0 +press 0 +seri 0 +algorithmica 0 +discret 0 +geometryassoci 0 +editor 0 +geometri 0 +applic 0 +system 0 +sciencesreturn 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..7fb25c56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,257 @@ +program 1 +compil 0 +parallel 0 +develop 0 +comput 0 +work 0 +code 0 +local 0 +algorithm 0 +depend 0 +research 0 +spars 0 +techniqu 0 +matrix 0 +restructur 0 +memori 0 +data 0 +optim 0 +control 0 +scienc 0 +languag 0 +applic 0 +matric 0 +us 0 +produc 0 +approach 0 +problem 0 +linear 0 +distribut 0 +access 0 +must 0 +transfer 0 +best 0 +loop 0 +paper 0 +result 0 +hewlett 0 +packard 0 +journal 0 +area 0 +architectur 0 +tool 0 +gener 0 +larg 0 +involv 0 +equat 0 +almost 0 +system 0 +algebra 0 +technolog 0 +dens 0 +inform 0 +structur 0 +librari 0 +like 0 +earlier 0 +refer 0 +block 0 +recent 0 +align 0 +incorpor 0 +transform 0 +implement 0 +prize 0 +asplo 0 +foundat 0 +question 0 +microsoft 0 +member 0 +young 0 +investig 0 +award 0 +lab 0 +corpor 0 +ieee 0 +univers 0 +june 0 +annual 0 +home 0 +cornel 0 +denis 0 +keshav 0 +pingalikeshav 0 +pingaliassoci 0 +professorphd 0 +group 0 +goal 0 +deal 0 +scientif 0 +numer 0 +solut 0 +partial 0 +differenti 0 +alwai 0 +unfortun 0 +exist 0 +poor 0 +take 0 +radic 0 +differ 0 +sequenti 0 +user 0 +sparsiti 0 +enabl 0 +preliminari 0 +experi 0 +krylov 0 +space 0 +solver 0 +show 0 +competit 0 +hand 0 +argonn 0 +petsc 0 +extend 0 +direct 0 +method 0 +solv 0 +requir 0 +adapt 0 +mesh 0 +refin 0 +project 0 +build 0 +uniform 0 +numa 0 +processor 0 +faster 0 +good 0 +perform 0 +also 0 +ensur 0 +match 0 +made 0 +prefer 0 +mani 0 +small 0 +messag 0 +known 0 +automat 0 +test 0 +novel 0 +call 0 +normal 0 +nest 0 +increas 0 +potenti 0 +lambda 0 +toolkit 0 +summar 0 +fortran 0 +product 0 +line 0 +uniprocessor 0 +multiprocessor 0 +framework 0 +analysi 0 +base 0 +flow 0 +graph 0 +knit 0 +togeth 0 +permit 0 +better 0 +possibl 0 +compet 0 +independ 0 +interest 0 +exampl 0 +answer 0 +open 0 +decad 0 +time 0 +static 0 +singl 0 +assign 0 +form 0 +number 0 +includ 0 +flavor 0 +profession 0 +activitiespanel 0 +organ 0 +symposium 0 +principl 0 +practic 0 +nation 0 +panel 0 +consult 0 +intel 0 +armi 0 +ballist 0 +odyssei 0 +math 0 +institut 0 +refere 0 +review 0 +topla 0 +transact 0 +supercomput 0 +computereditori 0 +board 0 +intern 0 +awardsn 0 +presidenti 0 +faculti 0 +lecturesfast 0 +chelmsford 0 +massachusett 0 +januari 0 +depart 0 +wayn 0 +state 0 +detroit 0 +michigan 0 +februari 0 +rutger 0 +brunswick 0 +jersei 0 +laboratori 0 +redmond 0 +washington 0 +publicationssolv 0 +elementari 0 +proceed 0 +seventh 0 +workshop 0 +lcpc 0 +lectur 0 +note 0 +ithaca 0 +august 0 +david 0 +indupraka 0 +kodukula 0 +vladimir 0 +kotlyar 0 +paul 0 +stodghil 0 +sigplan 0 +confer 0 +design 0 +pldi 0 +gianfranco 0 +bilardi 0 +return 0 +report 0 +page 0 +department 0 +pageif 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..f605e2c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,366 @@ +system 1 +comput 0 +program 0 +fault 0 +toler 0 +real 0 +time 0 +committe 0 +intern 0 +univers 0 +scienc 0 +germani 0 +techniqu 0 +implement 0 +member 0 +research 0 +hypervisor 0 +trom 0 +agent 0 +workshop 0 +distribut 0 +applic 0 +gri 0 +logic 0 +coordin 0 +inform 0 +softwar 0 +symposium 0 +formal 0 +exploit 0 +environ 0 +sept 0 +march 0 +critic 0 +virtual 0 +replica 0 +school 0 +center 0 +outlin 0 +lectur 0 +juli 0 +washington 0 +proof 0 +north 0 +carolina 0 +teach 0 +state 0 +concurr 0 +becom 0 +reason 0 +complet 0 +past 0 +base 0 +architectur 0 +protocol 0 +instruct 0 +execut 0 +oper 0 +norwai 0 +cornel 0 +support 0 +process 0 +data 0 +algorithm 0 +springer 0 +systemsprogram 0 +confer 0 +work 0 +hybrid 0 +group 0 +summer 0 +marktoberdorf 0 +lubeck 0 +specif 0 +technic 0 +sigcs 0 +proceed 0 +note 0 +mission 0 +develop 0 +design 0 +investig 0 +order 0 +equat 0 +term 0 +partial 0 +build 0 +manag 0 +machin 0 +issu 0 +realiz 0 +given 0 +final 0 +johansen 0 +robbert 0 +reness 0 +tacoma 0 +project 0 +mobil 0 +network 0 +stoller 0 +detect 0 +asynchron 0 +pass 0 +ieee 0 +engineeringeditor 0 +high 0 +integr 0 +mathemat 0 +fifth 0 +ifip 0 +depend 0 +verif 0 +isat 0 +defens 0 +warfar 0 +studi 0 +advanc 0 +associ 0 +meet 0 +verifi 0 +synchron 0 +moder 0 +refin 0 +polici 0 +munich 0 +chapel 0 +hill 0 +ad 0 +tool 0 +educ 0 +nashvil 0 +tennesse 0 +israel 0 +april 0 +june 0 +verlag 0 +york 0 +forc 0 +home 0 +denis 0 +fred 0 +schneiderfr 0 +schneider 0 +professorphd 0 +univ 0 +stoni 0 +brook 0 +understand 0 +increasingli 0 +import 0 +widespread 0 +focus 0 +heavili 0 +involv 0 +appli 0 +assert 0 +textbook 0 +subject 0 +along 0 +david 0 +continu 0 +concern 0 +first 0 +year 0 +streamlin 0 +infer 0 +rule 0 +evalu 0 +number 0 +handl 0 +undefin 0 +function 0 +thoma 0 +bressoud 0 +analyz 0 +replic 0 +risc 0 +ensur 0 +sequenc 0 +run 0 +differ 0 +physic 0 +processor 0 +ident 0 +also 0 +attract 0 +least 0 +theori 0 +instantli 0 +avail 0 +hardwar 0 +includ 0 +exist 0 +written 0 +second 0 +singl 0 +suffic 0 +everi 0 +programm 0 +freed 0 +task 0 +jointli 0 +start 0 +move 0 +structur 0 +construct 0 +commun 0 +bandwidth 0 +conserv 0 +access 0 +site 0 +resid 0 +typic 0 +filter 0 +otherwis 0 +reduc 0 +read 0 +carri 0 +relev 0 +roam 0 +prototyp 0 +third 0 +experi 0 +scott 0 +whether 0 +particular 0 +could 0 +global 0 +satisfi 0 +predic 0 +allow 0 +effici 0 +possibl 0 +previou 0 +activitiessabbat 0 +leav 0 +profession 0 +activitieseditor 0 +chief 0 +computingeditor 0 +letterseditor 0 +transact 0 +systemseditor 0 +annal 0 +surveysco 0 +editor 0 +text 0 +monograph 0 +verlagprogram 0 +constructionprogram 0 +respons 0 +compos 0 +resili 0 +applicationsprogram 0 +sixteenth 0 +symposiumprogram 0 +dimac 0 +control 0 +systemsst 0 +assur 0 +chissa 0 +nation 0 +institut 0 +standard 0 +technologymemb 0 +agencyreview 0 +leibniz 0 +hebrew 0 +universitymemb 0 +methodolog 0 +awardsfellow 0 +american 0 +sciencefellow 0 +machinerylecturesproof 0 +origin 0 +tradit 0 +banquet 0 +speech 0 +afosr 0 +grante 0 +contractor 0 +panelist 0 +compar 0 +merit 0 +model 0 +safeti 0 +write 0 +reactiv 0 +dagstuhl 0 +merg 0 +analysi 0 +georg 0 +mason 0 +virginia 0 +avoid 0 +mistak 0 +invit 0 +speaker 0 +traffic 0 +nasa 0 +am 0 +distinguish 0 +seri 0 +panel 0 +organ 0 +technion 0 +haifa 0 +place 0 +airplan 0 +view 0 +successor 0 +arpa 0 +publicationsreason 0 +colloquium 0 +icalp 0 +jerusalem 0 +materi 0 +summari 0 +boll 0 +offic 0 +scientif 0 +septemb 0 +volum 0 +limor 0 +proposit 0 +letter 0 +februari 0 +aircraft 0 +hand 0 +foundat 0 +ultradepend 0 +parallel 0 +paradigm 0 +kluwer 0 +academ 0 +publish 0 +marzullo 0 +dehn 0 +bulletin 0 +topic 0 +hoto 0 +orca 0 +island 0 +causal 0 +messag 0 +art 0 +newslett 0 +spring 0 +approach 0 +discret 0 +primu 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..07c28988 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,115 @@ +univers 1 +product 0 +scienc 0 +kroneck 0 +comput 0 +committe 0 +siam 0 +prize 0 +member 0 +sweden 0 +januari 0 +develop 0 +method 0 +variou 0 +solv 0 +applic 0 +process 0 +descript 0 +perform 0 +undergradu 0 +last 0 +art 0 +journal 0 +household 0 +umea 0 +build 0 +intuit 0 +ohio 0 +state 0 +april 0 +optic 0 +home 0 +cornel 0 +denis 0 +charl 0 +loancharl 0 +loanprofessorphd 0 +michigan 0 +continu 0 +particular 0 +nearest 0 +problem 0 +factor 0 +matric 0 +subject 0 +inhomogen 0 +constraint 0 +signal 0 +markov 0 +result 0 +fast 0 +wavelet 0 +transform 0 +also 0 +deriv 0 +proce 0 +analog 0 +plai 0 +import 0 +role 0 +high 0 +algorithm 0 +text 0 +work 0 +four 0 +year 0 +current 0 +translat 0 +anticip 0 +fall 0 +semest 0 +activitiescomput 0 +depart 0 +curriculum 0 +committeedepart 0 +repres 0 +chair 0 +meetingfreshman 0 +admiss 0 +reader 0 +profession 0 +activitieseditor 0 +matrix 0 +analysismemb 0 +wilkinson 0 +diprima 0 +organ 0 +confer 0 +lecturesappl 0 +linkop 0 +publicationsoptim 0 +close 0 +loop 0 +adapt 0 +multipl 0 +control 0 +bandwidth 0 +societi 0 +america 0 +ellerbroek 0 +pitsiani 0 +plemmon 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..2d091b6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,57 @@ +cornel 1 +cours 1 +univers 0 +comput 0 +scienc 0 +depart 0 +curriculum 0 +associ 0 +home 0 +denis 0 +catherin 0 +wagnercatherin 0 +wagnersenior 0 +lecturerphd 0 +primari 0 +respons 0 +teach 0 +work 0 +other 0 +revis 0 +lower 0 +level 0 +specif 0 +interest 0 +develop 0 +student 0 +prepar 0 +introductori 0 +program 0 +activitiescomput 0 +undergradu 0 +committe 0 +profession 0 +activitiesassoci 0 +symbol 0 +logic 0 +machineri 0 +institut 0 +electr 0 +electron 0 +engin 0 +women 0 +mathemat 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..428a2436 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,350 @@ +video 1 +research 0 +process 0 +data 0 +brian 0 +smith 0 +network 0 +server 0 +client 0 +multimedia 0 +system 0 +protocol 0 +talk 0 +avail 0 +proc 0 +lawrenc 0 +row 0 +environ 0 +continu 0 +media 0 +us 0 +comput 0 +first 0 +zeno 0 +softwar 0 +workstat 0 +effort 0 +cyclic 0 +compress 0 +onlin 0 +also 0 +languag 0 +intern 0 +cornel 0 +xerox 0 +univers 0 +offic 0 +build 0 +technolog 0 +commun 0 +approach 0 +connect 0 +distribut 0 +best 0 +develop 0 +audio 0 +paper 0 +describ 0 +algorithm 0 +imag 0 +method 0 +format 0 +independ 0 +review 0 +cours 0 +third 0 +confer 0 +workshop 0 +scienc 0 +california 0 +berkelei 0 +make 0 +class 0 +type 0 +support 0 +storag 0 +contrast 0 +hardwar 0 +current 0 +file 0 +architectur 0 +ofworkst 0 +gener 0 +local 0 +area 0 +common 0 +laboratori 0 +fileserv 0 +servic 0 +design 0 +ad 0 +load 0 +across 0 +initi 0 +invest 0 +exist 0 +center 0 +resourc 0 +well 0 +suit 0 +call 0 +playback 0 +applic 0 +decompress 0 +perform 0 +must 0 +idea 0 +jpeg 0 +transcod 0 +oper 0 +specifi 0 +graphic 0 +provid 0 +qualiti 0 +select 0 +jonathan 0 +resolut 0 +francisco 0 +novemb 0 +html 0 +version 0 +ketan 0 +patel 0 +mpeg 0 +transmiss 0 +electron 0 +june 0 +faculti 0 +interest 0 +bsmith 0 +professor 0 +upson 0 +hall 0 +phone 0 +hour 0 +semest 0 +tue 0 +thur 0 +intereststeachingselect 0 +publicationsresearch 0 +talksmisc 0 +linksresearch 0 +interestsmi 0 +goal 0 +ourcomput 0 +group 0 +project 0 +andprocess 0 +commercialand 0 +requir 0 +special 0 +operatingsystem 0 +usabl 0 +aredesign 0 +premis 0 +isthat 0 +infrastructurei 0 +suffici 0 +andappl 0 +verifi 0 +hypothesi 0 +workingsystem 0 +direct 0 +toward 0 +zenodistribut 0 +anethernet 0 +simultan 0 +serverof 0 +plai 0 +videostor 0 +receiv 0 +store 0 +severalserv 0 +sever 0 +compar 0 +withlarg 0 +central 0 +advantag 0 +scalabl 0 +automat 0 +balanc 0 +serv 0 +machin 0 +case 0 +locat 0 +differ 0 +util 0 +infrastructur 0 +promot 0 +earli 0 +adopt 0 +almost 0 +around 0 +effortdeliveri 0 +built 0 +networkprotocol 0 +mani 0 +needto 0 +reserv 0 +establish 0 +resourcereserv 0 +nation 0 +communicationinfrastructur 0 +user 0 +charg 0 +basi 0 +forbandwidth 0 +poorli 0 +networkenviron 0 +share 0 +equal 0 +accessiblebi 0 +appropri 0 +latterenviron 0 +commonli 0 +found 0 +thecommun 0 +builton 0 +datagram 0 +transport 0 +audioand 0 +metropolitan 0 +andwid 0 +todeliv 0 +availableonlin 0 +slide 0 +fold 0 +aredevelop 0 +compressedrepresent 0 +without 0 +lead 0 +todramat 0 +speed 0 +sinc 0 +remov 0 +thetim 0 +consum 0 +reducesth 0 +amount 0 +experi 0 +animplement 0 +indicatesthat 0 +order 0 +magnitud 0 +fasterthan 0 +possibl 0 +previou 0 +currentlyextend 0 +parallel 0 +insoftwar 0 +translat 0 +onecompress 0 +anoth 0 +heterogen 0 +compresseddomain 0 +explor 0 +simplifyexperiment 0 +programminglanguag 0 +calledrivl 0 +pronounc 0 +rival 0 +allowsvideo 0 +effect 0 +resolutionand 0 +sourc 0 +materi 0 +whatpostscript 0 +text 0 +resolutionindepend 0 +thu 0 +sameprogram 0 +quicktim 0 +quickli 0 +whileedit 0 +decis 0 +made 0 +high 0 +qualityfinish 0 +product 0 +line 0 +much 0 +postscript 0 +bepreview 0 +sent 0 +dpiprint 0 +camera 0 +readi 0 +copi 0 +onvideo 0 +domain 0 +rivl 0 +theme 0 +tool 0 +videous 0 +teachingat 0 +teach 0 +undergradu 0 +computerarchitectur 0 +graduat 0 +public 0 +swartz 0 +asif 0 +ghia 0 +logan 0 +david 0 +chamberlin 0 +queri 0 +hum 0 +larg 0 +music 0 +inform 0 +retriev 0 +databas 0 +peter 0 +name 0 +toronto 0 +ontario 0 +canada 0 +juli 0 +prioriti 0 +driven 0 +fast 0 +motion 0 +second 0 +sanfrancisco 0 +octob 0 +represent 0 +spie 0 +symposium 0 +jose 0 +februari 0 +stephen 0 +program 0 +decod 0 +internationalconfer 0 +anaheim 0 +august 0 +famili 0 +manipul 0 +ieee 0 +septemb 0 +player 0 +supportfor 0 +digit 0 +diego 0 +recent 0 +webster 0 +site 0 +minnesota 0 +colloquium 0 +misc 0 +link 0 +work 0 +annett 0 +hanna 0 +manual 0 +mmcn 0 +proceed 0 +documentationth 0 +priceweb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..ce561511 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,279 @@ +system 1 +nuprl 0 +comput 0 +us 0 +scienc 0 +theori 0 +logic 0 +program 0 +univers 0 +softwar 0 +mathemat 0 +type 0 +formal 0 +theorem 0 +design 0 +prove 0 +studi 0 +involv 0 +make 0 +line 0 +implement 0 +build 0 +method 0 +languag 0 +allen 0 +richard 0 +develop 0 +leeser 0 +weyl 0 +circuit 0 +theoret 0 +zippel 0 +connect 0 +committe 0 +israel 0 +chair 0 +cornel 0 +engag 0 +especi 0 +year 0 +major 0 +construct 0 +wide 0 +varieti 0 +version 0 +stuart 0 +eaton 0 +also 0 +how 0 +joint 0 +ventur 0 +polya 0 +algorithm 0 +gri 0 +hope 0 +explor 0 +futur 0 +journal 0 +symbol 0 +computationeditor 0 +fundament 0 +betweencomput 0 +decemb 0 +januari 0 +state 0 +notr 0 +dame 0 +march 0 +york 0 +pennsylvania 0 +springer 0 +verlag 0 +robert 0 +constablerobert 0 +constabledepart 0 +professorrc 0 +eduph 0 +wisconsin 0 +madison 0 +researchw 0 +providemechan 0 +assist 0 +problem 0 +solv 0 +programmingand 0 +long 0 +term 0 +wai 0 +theform 0 +feasibl 0 +implementedthre 0 +past 0 +experiment 0 +lispprogram 0 +sucha 0 +canexpress 0 +proof 0 +asmetalevel 0 +provid 0 +consider 0 +theoremprov 0 +power 0 +moreov 0 +canevalu 0 +content 0 +principl 0 +nuprli 0 +fomal 0 +continu 0 +improv 0 +current 0 +iscal 0 +differ 0 +predecessor 0 +termeditor 0 +itsintern 0 +structur 0 +modular 0 +suitabl 0 +hedefinit 0 +beyond 0 +built 0 +inconstruct 0 +entir 0 +mechanismha 0 +rebuilt 0 +stream 0 +paul 0 +jackson 0 +thework 0 +dougla 0 +contribut 0 +gener 0 +natur 0 +final 0 +refer 0 +isan 0 +intern 0 +descript 0 +builtprincip 0 +william 0 +aitken 0 +link 0 +internaldescript 0 +possibleto 0 +process 0 +three 0 +excit 0 +withmiriam 0 +electr 0 +engin 0 +incomput 0 +davidgri 0 +richardzippel 0 +withless 0 +hardwar 0 +synthesi 0 +verif 0 +student 0 +mark 0 +aagard 0 +thecorrect 0 +boolean 0 +minim 0 +packag 0 +compon 0 +bedrocsystem 0 +weak 0 +divis 0 +widelyus 0 +efforttaught 0 +great 0 +deal 0 +effect 0 +technolog 0 +inth 0 +hand 0 +expert 0 +user 0 +applic 0 +domain 0 +second 0 +model 0 +aprogram 0 +refin 0 +mechan 0 +david 0 +enabl 0 +write 0 +handbook 0 +themann 0 +devis 0 +programmingprocess 0 +givn 0 +definit 0 +ofpolya 0 +expect 0 +experi 0 +soon 0 +transform 0 +tryingto 0 +captur 0 +style 0 +want 0 +recent 0 +begun 0 +collabor 0 +relat 0 +thepolya 0 +effort 0 +conal 0 +mannion 0 +possibl 0 +ofus 0 +discussingproblem 0 +ssymbol 0 +algebra 0 +near 0 +thiswil 0 +scientif 0 +computingsoftwar 0 +togeth 0 +tool 0 +isbuild 0 +profession 0 +activitieseditor 0 +academ 0 +presseditor 0 +oxford 0 +pressgener 0 +licsprogram 0 +north 0 +american 0 +jumelageprogram 0 +aspect 0 +softwarerefere 0 +review 0 +nserc 0 +canada 0 +scienceunivers 0 +activitieschair 0 +recruit 0 +committeecomput 0 +facil 0 +committeeprovost 0 +mathematicslecturesform 0 +inria 0 +anniversari 0 +celebr 0 +pari 0 +franc 0 +colloquium 0 +bengurion 0 +sheva 0 +symposium 0 +aviv 0 +associ 0 +annual 0 +meet 0 +indiana 0 +metaprogram 0 +buffalo 0 +explan 0 +engineeringworkshop 0 +philadelphia 0 +publicationsform 0 +tendenc 0 +control 0 +appli 0 +bensoussan 0 +verju 0 +lectur 0 +note 0 +metalevel 0 +andmathemat 0 +manfr 0 +broi 0 +nato 0 +seri 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..c861de36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,31 @@ +adam 1 +florenc 1 +cornel 1 +univers 1 +upson 0 +hall 0 +ithaca 0 +aflorenc 0 +first 0 +year 0 +student 0 +incomput 0 +scienc 0 +atcornel 0 +professionalinterest 0 +includ 0 +simul 0 +numer 0 +analysi 0 +follow 0 +link 0 +find 0 +academicsresearchworkinterest 0 +athlet 0 +last 0 +updat 0 +sept 0 +mail 0 +mewith 0 +comment 0 +correct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..d52957c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,30 @@ +aguilera 1 +kawazo 0 +cornel 0 +marco 0 +page 0 +univers 0 +home 0 +depart 0 +comput 0 +scienc 0 +ithaca 0 +second 0 +year 0 +student 0 +distribut 0 +system 0 +algorithmsrandom 0 +failur 0 +detect 0 +hybrid 0 +approach 0 +solv 0 +consensusgo 0 +tour 0 +brazil 0 +check 0 +suggest 0 +warn 0 +perman 0 +constructionmarco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..93a27d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,33 @@ +alfr 1 +hong 1 +cornel 1 +new 1 +chines 1 +home 0 +page 0 +get 0 +dizzi 0 +offic 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +usaoffic 0 +phone 0 +worthwhil 0 +site 0 +check 0 +sinanet 0 +taiwan 0 +requir 0 +nandonet 0 +sunworld 0 +javaworldcours 0 +stuff 0 +corba 0 +essenti 0 +annot 0 +bibliographyc 0 +project 0 +reportalfr 0 +ahong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..de0ef983 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,58 @@ +comput 1 +cornel 0 +system 0 +high 0 +depart 0 +architectur 0 +network 0 +perform 0 +final 0 +project 0 +postscript 0 +vineet 0 +home 0 +pagevineet 0 +ahujam 0 +engg 0 +sciencecornel 0 +universityithaca 0 +address 0 +hasbrouck 0 +apt 0 +ithaca 0 +mail 0 +ahuja 0 +academ 0 +student 0 +scienc 0 +univers 0 +main 0 +area 0 +interest 0 +parallel 0 +process 0 +advanc 0 +oper 0 +object 0 +orient 0 +program 0 +coursework 0 +fall 0 +automata 0 +theori 0 +engin 0 +spring 0 +report 0 +softwar 0 +design 0 +reportfal 0 +capac 0 +inform 0 +multimedia 0 +resum 0 +recent 0 +html 0 +page 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..bb070fb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,21 @@ +ching 1 +chinglan 0 +cornel 0 +edumast 0 +engin 0 +student 0 +comput 0 +scienc 0 +depart 0 +beau 0 +pair 0 +address 0 +seneca 0 +ithaca 0 +telephon 0 +page 0 +still 0 +construct 0 +java 0 +examplegraph 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..352ef79e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,82 @@ +cornel 1 +univers 0 +technic 0 +report 0 +alexei 0 +home 0 +comput 0 +distribut 0 +system 0 +group 0 +new 0 +weather 0 +pagealexei 0 +vaysburdalexei 0 +program 0 +scienc 0 +research 0 +interest 0 +includ 0 +commun 0 +larg 0 +scale 0 +wide 0 +area 0 +andobject 0 +orient 0 +tool 0 +work 0 +within 0 +horu 0 +lead 0 +birman 0 +androbbert 0 +reness 0 +paper 0 +implement 0 +replic 0 +state 0 +machin 0 +partition 0 +network 0 +friedman 0 +vaysburd 0 +link 0 +search 0 +engin 0 +lyco 0 +broadcast 0 +seri 0 +ecol 0 +polytechniqu 0 +federal 0 +lausann 0 +hebrew 0 +transi 0 +page 0 +cuinfo 0 +gopher 0 +direct 0 +cornellcornel 0 +mail 0 +directorycornel 0 +staff 0 +directori 0 +student 0 +directorycours 0 +class 0 +examscornel 0 +calendarcornel 0 +art 0 +musicbailei 0 +hall 0 +concertscornel 0 +music 0 +event 0 +ithaca 0 +current 0 +condit 0 +ithacaworld 0 +brief 0 +odessa 0 +odessaweb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..715f6f26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,170 @@ +simul 1 +compress 1 +chip 1 +graphic 0 +yama 0 +ride 0 +train 0 +us 0 +imag 0 +data 0 +hell 0 +heaven 0 +better 0 +comput 0 +microsystem 0 +avenu 0 +cupertino 0 +javasoft 0 +coursesvlsi 0 +engin 0 +system 0 +project 0 +includ 0 +landscap 0 +environ 0 +cours 0 +memori 0 +pattern 0 +softwar 0 +pyramania 0 +game 0 +accord 0 +hindu 0 +mytholog 0 +death 0 +come 0 +downto 0 +earth 0 +give 0 +though 0 +kind 0 +imparti 0 +mess 0 +amithyamasanim 0 +engg 0 +depart 0 +scienc 0 +cornel 0 +univers 0 +ithaca 0 +yorki 0 +make 0 +monei 0 +garcia 0 +mountain 0 +view 0 +mailstop 0 +ucup 0 +watch 0 +warburton 0 +santa 0 +clara 0 +mail 0 +amith 0 +yamasani 0 +comi 0 +currentlyemploi 0 +california 0 +workingin 0 +javamedia 0 +groupeducationfal 0 +multimedia 0 +cssoftwar 0 +csspring 0 +high 0 +perform 0 +cscomput 0 +csproject 0 +final 0 +mpeg 0 +descript 0 +parallel 0 +trace 0 +propos 0 +virtual 0 +realiti 0 +railroad 0 +master 0 +involv 0 +railroadsystem 0 +track 0 +switch 0 +station 0 +user 0 +interact 0 +wall 0 +cave 0 +stereo 0 +vision 0 +done 0 +processor 0 +onyx 0 +softwarei 0 +written 0 +openinventord 0 +librari 0 +silicon 0 +gener 0 +dynam 0 +ofth 0 +through 0 +documentimag 0 +chipdevelop 0 +vlsi 0 +basicallycompress 0 +stream 0 +algorithm 0 +thisalgorithm 0 +suit 0 +control 0 +extern 0 +cachecam 0 +content 0 +address 0 +store 0 +inputstream 0 +hope 0 +might 0 +occur 0 +capableof 0 +rate 0 +byte 0 +nano 0 +second 0 +nowai 0 +routin 0 +speed 0 +snap 0 +video 0 +transit 0 +rivlproposalpresentationand 0 +sampl 0 +dthi 0 +develop 0 +itswritten 0 +interfac 0 +xlib 0 +thed 0 +render 0 +take 0 +look 0 +snapshot 0 +screen 0 +parallelomania 0 +resumehtmlpostscript 0 +past 0 +present 0 +futur 0 +pal 0 +home 0 +page 0 +satyaprasad 0 +avinashgupta 0 +kartikh 0 +kapadia 0 +hrishikeshdixit 0 +joselui 0 +fernandez 0 +vineetahuja 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..14a71b05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,89 @@ +ankit 1 +patel 1 +cornel 1 +comput 1 +cours 1 +multimedia 1 +project 1 +virtual 1 +realiti 1 +homepag 0 +east 0 +state 0 +street 0 +ithaca 0 +apatel 0 +person 0 +photo 0 +galleria 0 +chronologia 0 +curriculum 0 +vita 0 +universityresumedepart 0 +scienc 0 +enrolledgradu 0 +research 0 +assist 0 +prof 0 +brian 0 +smith 0 +group 0 +zeno 0 +canvasd 0 +toolkit 0 +interact 0 +platform 0 +independ 0 +develop 0 +high 0 +perform 0 +graphic 0 +applic 0 +video 0 +conferencingmultimedia 0 +system 0 +read 0 +assignmentsproject 0 +kernel 0 +endpoint 0 +netan 0 +annot 0 +bibliographi 0 +common 0 +object 0 +request 0 +broker 0 +architectur 0 +corba 0 +critiqu 0 +understand 0 +limit 0 +causal 0 +total 0 +order 0 +commun 0 +david 0 +cheriton 0 +dale 0 +skeen 0 +carnegi 0 +mellon 0 +summer 0 +school 0 +scienceworld 0 +wide 0 +technolog 0 +spring 0 +link 0 +real 0 +time 0 +support 0 +multimediamaharaja 0 +sayajirao 0 +univers 0 +academicsfriend 0 +techoreli 0 +industri 0 +limitedjob 0 +profilelif 0 +relianc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..8bdb648a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,15 @@ +comput 1 +ashish 0 +jhaveriashish 0 +jhaveridepart 0 +sciencemast 0 +engineeringresumehtmlpost 0 +scriptcourseworkadvanc 0 +databas 0 +systemscsmultimedia 0 +systemscsengin 0 +networkscsprogram 0 +languag 0 +softwareengin 0 +csashish 0 +jhaveri 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..e1e0623e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,39 @@ +aswin 1 +swing 1 +cornel 0 +danc 0 +berg 0 +graduat 0 +student 0 +phone 0 +program 0 +transform 0 +person 0 +page 0 +eduoffic 0 +upson 0 +hall 0 +home 0 +skyacr 0 +drive 0 +ithaca 0 +interest 0 +languag 0 +systemmi 0 +life 0 +famili 0 +album 0 +annek 0 +server 0 +jean 0 +deejay 0 +guid 0 +isdn 0 +record 0 +hop 0 +pictur 0 +nederlands 0 +club 0 +dutch 0 +clubi 0 +atcornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..768665e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,122 @@ +system 1 +piec 0 +game 0 +five 0 +point 0 +home 0 +oper 0 +user 0 +trap 0 +comput 0 +multimedia 0 +distribut 0 +hoca 0 +chip 0 +like 0 +network 0 +requir 0 +player 0 +ipng 0 +space 0 +next 0 +gener 0 +interfac 0 +page 0 +subject 0 +welcomeavinash 0 +guptam 0 +engg 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +magazin 0 +streetcambridg 0 +mail 0 +avinash 0 +thecia 0 +resumemi 0 +main 0 +area 0 +interest 0 +graphic 0 +cours 0 +softwar 0 +engin 0 +high 0 +perform 0 +project 0 +pronounc 0 +hodja 0 +full 0 +fledg 0 +cornel 0 +hypothet 0 +instruct 0 +processor 0 +support 0 +featur 0 +multipl 0 +multitask 0 +virtual 0 +memori 0 +scene 0 +transit 0 +effect 0 +rivl 0 +presentationpent 0 +skillpent 0 +mean 0 +board 0 +skill 0 +object 0 +horizont 0 +vertic 0 +diagon 0 +earn 0 +oppon 0 +even 0 +number 0 +either 0 +pair 0 +reach 0 +first 0 +win 0 +window 0 +screenshot 0 +gamedownload 0 +implement 0 +ipvimpl 0 +internet 0 +protocol 0 +us 0 +make 0 +abl 0 +stream 0 +proposalprogress 0 +reportsam 0 +caveat 0 +appli 0 +almost 0 +everi 0 +pageon 0 +still 0 +construct 0 +brows 0 +internethytelnetth 0 +librari 0 +catalogeinet 0 +galaxyplanet 0 +earth 0 +pagejoel 0 +hierarch 0 +indexyahoo 0 +guid 0 +wwwwebcrawlerlycosmi 0 +friend 0 +link 0 +sign 0 +guestbook 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..ae0c2f42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,257 @@ +peopl 1 +also 0 +comput 0 +page 0 +cornel 0 +high 0 +live 0 +work 0 +home 0 +jonathan 0 +barber 0 +cours 0 +research 0 +futur 0 +call 0 +list 0 +finish 0 +program 0 +summer 0 +project 0 +implement 0 +good 0 +favorit 0 +bulli 0 +upson 0 +interest 0 +ponch 0 +goe 0 +student 0 +master 0 +engin 0 +time 0 +develop 0 +protocol 0 +parallel 0 +past 0 +semest 0 +video 0 +product 0 +taken 0 +perform 0 +friend 0 +sugata 0 +paper 0 +encrypt 0 +oregon 0 +like 0 +write 0 +locat 0 +hous 0 +inde 0 +georg 0 +bush 0 +keep 0 +graphic 0 +need 0 +fleshpooooooooooooooch 0 +hall 0 +cool 0 +dude 0 +felt 0 +inclin 0 +chang 0 +first 0 +sentenc 0 +birth 0 +name 0 +usual 0 +callm 0 +orpooch 0 +us 0 +lake 0 +fork 0 +guppi 0 +skin 0 +mama 0 +phin 0 +degre 0 +graduat 0 +computersci 0 +cornellunivers 0 +undergrad 0 +scienc 0 +well 0 +grew 0 +coupl 0 +town 0 +jersei 0 +attendedmontgomeri 0 +school 0 +current 0 +collegetown 0 +adjac 0 +campu 0 +cheap 0 +sublet 0 +oncolleg 0 +ultra 0 +conveni 0 +sinc 0 +plai 0 +half 0 +mile 0 +radiu 0 +went 0 +greek 0 +spent 0 +epsilon 0 +fratern 0 +border 0 +cayuga 0 +height 0 +kind 0 +hard 0 +believ 0 +dai 0 +hand 0 +prof 0 +thorsten 0 +eicken 0 +packet 0 +filter 0 +thefilt 0 +bandwidth 0 +network 0 +bring 0 +person 0 +taught 0 +week 0 +session 0 +ofc 0 +fundament 0 +concept 0 +introductori 0 +year 0 +teach 0 +assist 0 +forc 0 +introduct 0 +digit 0 +system 0 +organ 0 +fall 0 +spring 0 +intereststhi 0 +rivl 0 +resolut 0 +independ 0 +languag 0 +multimedia 0 +group 0 +final 0 +labor 0 +report 0 +continu 0 +media 0 +rivlan 0 +improv 0 +object 0 +tracker 0 +rivli 0 +compil 0 +mukhopadhyai 0 +still 0 +class 0 +smpd 0 +code 0 +generatorfor 0 +fortran 0 +base 0 +linear 0 +algebra 0 +framework 0 +come 0 +soon 0 +previou 0 +greg 0 +weber 0 +webar 0 +smart 0 +frame 0 +depend 0 +stream 0 +mpeg 0 +unfortun 0 +electron 0 +form 0 +trust 0 +look 0 +bright 0 +pack 0 +gear 0 +take 0 +perman 0 +road 0 +trip 0 +hopefulli 0 +buddi 0 +surfer 0 +inform 0 +share 0 +pleas 0 +intel 0 +corp 0 +ahead 0 +pictur 0 +portland 0 +clarif 0 +actual 0 +thought 0 +would 0 +pretti 0 +resourceful 0 +camera 0 +room 0 +window 0 +pipe 0 +meanth 0 +gener 0 +line 0 +comrad 0 +realli 0 +nice 0 +ofhi 0 +found 0 +lucki 0 +consid 0 +least 0 +know 0 +decid 0 +otherwis 0 +captain 0 +swirl 0 +nefari 0 +toilet 0 +mukhopadyai 0 +bonei 0 +magoo 0 +fletop 0 +bigro 0 +dave 0 +koster 0 +bot 0 +offici 0 +tffl 0 +pageuuencod 0 +pagetar 0 +zip 0 +pageif 0 +brows 0 +feel 0 +urg 0 +send 0 +mail 0 +downloadsgraphicsbarb 0 +gifponch 0 +htmlres_htmlres_curemmittemmitt 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..b57b18c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,202 @@ +anindya 1 +comput 0 +cornel 0 +commun 0 +messag 0 +work 0 +cluster 0 +thorsten 0 +eicken 0 +perform 0 +activ 0 +like 0 +basu 0 +also 0 +parallel 0 +homepag 0 +realli 0 +photograph 0 +scienc 0 +univers 0 +ithaca 0 +phone 0 +graduat 0 +student 0 +complet 0 +love 0 +would 0 +stuff 0 +us 0 +layer 0 +program 0 +develop 0 +fast 0 +implement 0 +latenc 0 +proceed 0 +network 0 +vineet 0 +buch 0 +appear 0 +look 0 +click 0 +recent 0 +mugshot 0 +locat 0 +home 0 +upson 0 +hall 0 +delawar 0 +avenu 0 +depart 0 +offic 0 +mail 0 +eduwhat 0 +scienceat 0 +hope 0 +turn 0 +centuri 0 +listen 0 +rock 0 +musicor 0 +goof 0 +thecornel 0 +projectwith 0 +advisor 0 +think 0 +coollik 0 +indiawho 0 +went 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +hardpink 0 +floydfanat 0 +final 0 +realiz 0 +childhood 0 +dream 0 +see 0 +livelast 0 +summer 0 +philadelphia 0 +plum 0 +known 0 +thegreat 0 +unwash 0 +pelham 0 +grenvil 0 +wodehous 0 +attendedwoodstock 0 +truli 0 +motiv 0 +onlinewoodstock 0 +review 0 +woodstock 0 +experienc 0 +metallica 0 +live 0 +first 0 +time 0 +thesocc 0 +worldcup 0 +last 0 +year 0 +butunfortun 0 +happen 0 +cook 0 +tri 0 +putsomerecip 0 +onlin 0 +fanci 0 +connoisseurof 0 +good 0 +whiski 0 +especi 0 +singl 0 +malt 0 +link 0 +cool 0 +calvin 0 +hobb 0 +archiv 0 +south 0 +asian 0 +writer 0 +cheer 0 +monti 0 +python 0 +beavi 0 +buttheadoth 0 +cuinfo 0 +gopherand 0 +someth 0 +differ 0 +projectwhich 0 +aim 0 +provid 0 +high 0 +ofworkst 0 +compar 0 +state 0 +mpp 0 +acheiv 0 +passinglay 0 +call 0 +split 0 +thatshow 0 +run 0 +meiko 0 +addit 0 +show 0 +saturateth 0 +fibr 0 +segment 0 +size 0 +collabor 0 +withth 0 +berkelei 0 +project 0 +team 0 +specificationfor 0 +enabl 0 +processesboth 0 +trust 0 +untrust 0 +environ 0 +public 0 +mechan 0 +integr 0 +david 0 +culler 0 +seth 0 +goldstein 0 +klau 0 +schauser 0 +symp 0 +architectur 0 +gold 0 +coast 0 +australia 0 +veena 0 +avula 0 +present 0 +interconnect 0 +palo 0 +alto 0 +abridgedvers 0 +paper 0 +ieee 0 +micro 0 +februari 0 +user 0 +level 0 +interfac 0 +distribut 0 +werner 0 +vogel 0 +sosp 0 +back 0 +homepagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..b1f36362 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,105 @@ +cornel 1 +check 0 +weather 0 +jpop 0 +internet 0 +comput 0 +scienc 0 +area 0 +librari 0 +interact 0 +music 0 +navig 0 +info 0 +simpl 0 +homepageben 0 +haogradu 0 +studentbhao 0 +educornel 0 +universitydept 0 +upson 0 +hallithaca 0 +insid 0 +flea 0 +forth 0 +year 0 +student 0 +incomput 0 +work 0 +code 0 +synthesi 0 +massiv 0 +parallel 0 +processor 0 +advisor 0 +richard 0 +zippel 0 +first 0 +heard 0 +georg 0 +taylorwhen 0 +microsystem 0 +famili 0 +francisco 0 +find 0 +inform 0 +aboutth 0 +read 0 +local 0 +newspap 0 +forget 0 +itsgorg 0 +life 0 +cornellwhat 0 +ithaca 0 +movi 0 +studi 0 +dept 0 +home 0 +page 0 +gener 0 +neat 0 +stuff 0 +stock 0 +map 0 +frog 0 +dissectionmagazin 0 +intertext 0 +magazin 0 +wire 0 +magazinea 0 +peek 0 +galaxi 0 +view 0 +solar 0 +system 0 +shoemak 0 +levi 0 +weblouvr 0 +xmorphia 0 +galleri 0 +line 0 +geometri 0 +kaleidospac 0 +overview 0 +bonsai 0 +seiyuu 0 +underground 0 +archivenetwork 0 +global 0 +network 0 +wander 0 +spider 0 +edgelibrari 0 +congress 0 +martial 0 +scientif 0 +tutori 0 +infonih 0 +courseth 0 +intern 0 +guidecern 0 +seminar 0 +last 0 +modifi 0 +bhao 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..45ce2df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,23 @@ +aastha 1 +home 1 +pageaastha 1 +bhardwajdepart 1 +comput 1 +sciencemast 1 +ofengineeeringresumehtmlpost 1 +scriptcourseworkadvanceddatabas 1 +system 1 +csmultimediasystem 1 +csengineeringcomput 1 +network 1 +cssoftwareengin 1 +program 1 +languag 1 +cscontact 1 +inform 1 +hasbrouck 1 +apart 1 +ithaca 1 +york 1 +bhardwaj 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..039acbdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,283 @@ +robot 1 +cornel 0 +intern 0 +vision 0 +algorithm 0 +ieee 0 +paper 0 +macdonald 0 +arrai 0 +plan 0 +tommi 0 +lili 0 +donald 0 +manipul 0 +inform 0 +part 0 +mobil 0 +bhringer 0 +confer 0 +invari 0 +pictur 0 +click 0 +laboratori 0 +actuat 0 +us 0 +programm 0 +micro 0 +workshop 0 +distribut 0 +jen 0 +massiv 0 +parallel 0 +feeder 0 +group 0 +sensor 0 +system 0 +bound 0 +vector 0 +field 0 +vibratori 0 +foundat 0 +franc 0 +mem 0 +diego 0 +autom 0 +kinodynam 0 +xavier 0 +journal 0 +proc 0 +hereto 0 +peopl 0 +randal 0 +home 0 +page 0 +huttenloch 0 +thecornel 0 +noel 0 +chip 0 +small 0 +student 0 +work 0 +develop 0 +team 0 +demo 0 +fabric 0 +video 0 +chase 0 +ourlab 0 +recent 0 +toulous 0 +brigg 0 +task 0 +california 0 +cannot 0 +icra 0 +minneapoli 0 +minnesota 0 +april 0 +provablygood 0 +approxim 0 +optim 0 +algorithmica 0 +experiment 0 +intellig 0 +book 0 +artificialintellig 0 +ree 0 +couch 0 +plai 0 +bruce 0 +donaldbruc 0 +donaldassoci 0 +professorbrd 0 +eduph 0 +weather 0 +palo 0 +alto 0 +offici 0 +department 0 +laboratorydan 0 +found 0 +researchmi 0 +interest 0 +includ 0 +professor 0 +build 0 +microactu 0 +nation 0 +nanofabr 0 +arrayi 0 +scream 0 +contain 0 +squarecentemet 0 +orient 0 +without 0 +sensoryfeedback 0 +microfabr 0 +could 0 +toconstruct 0 +scale 0 +buildself 0 +propel 0 +walk 0 +vlsi 0 +graduat 0 +amybrigg 0 +surveil 0 +ofmobil 0 +board 0 +detect 0 +andintercept 0 +target 0 +mpeg 0 +built 0 +developedbi 0 +track 0 +follow 0 +visual 0 +informationalon 0 +show 0 +view 0 +face 0 +morph 0 +select 0 +public 0 +upper 0 +andlow 0 +applic 0 +memsand 0 +thealgorithm 0 +robustgeometr 0 +internationalworkshop 0 +singl 0 +crystal 0 +silicon 0 +electro 0 +mechan 0 +februari 0 +classif 0 +lower 0 +andimprov 0 +partsfeed 0 +partii 0 +robotswith 0 +decoupl 0 +dynam 0 +forcartesian 0 +open 0 +chain 0 +motion 0 +canni 0 +reif 0 +research 0 +inpress 0 +minim 0 +supermodular 0 +andtheoret 0 +artifici 0 +jetai 0 +press 0 +write 0 +entitl 0 +draft 0 +firstquart 0 +appear 0 +revis 0 +base 0 +experi 0 +inminim 0 +symposium 0 +iser 0 +stanford 0 +move 0 +furnitur 0 +automon 0 +societi 0 +ofjapan 0 +iro 0 +pittsburgh 0 +sensorlessmanipul 0 +actuatorarrai 0 +mihailovich 0 +andautom 0 +detail 0 +explan 0 +program 0 +scheme 0 +automationnic 0 +first 0 +ofrobot 0 +peter 0 +boston 0 +wilson 0 +andj 0 +latomb 0 +automat 0 +configur 0 +direct 0 +proceed 0 +otherpubl 0 +these 0 +post 0 +doc 0 +train 0 +daniela 0 +jonathan 0 +dinesh 0 +aval 0 +server 0 +list 0 +version 0 +onlin 0 +tech 0 +report 0 +librari 0 +catalogc 0 +indexobtain 0 +copi 0 +paperscopi 0 +avail 0 +anonym 0 +teamof 0 +autonom 0 +movefurnitur 0 +around 0 +portrait 0 +mobot 0 +push 0 +rotat 0 +apictur 0 +drawn 0 +loretta 0 +pompilio 0 +discoverychannel 0 +beyond 0 +find 0 +funa 0 +poem 0 +alfr 0 +mail 0 +agent 0 +famili 0 +harm 0 +swallow 0 +ithaca 0 +sometim 0 +moreoth 0 +depart 0 +herefor 0 +search 0 +tool 0 +access 0 +stuff 0 +return 0 +level 0 +clickher 0 +tallest 0 +darkest 0 +lead 0 +hollywood 0 +merian 0 +cooper 0 +wrai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..2cb29c3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,242 @@ +cardi 1 +learn 1 +languag 0 +intellig 0 +natur 0 +proceed 0 +artifici 0 +machin 0 +press 0 +confer 0 +research 0 +base 0 +system 0 +us 0 +knowledg 0 +acquisit 0 +approach 0 +aaai 0 +annual 0 +case 0 +techniqu 0 +work 0 +project 0 +linguist 0 +sentenc 0 +analysi 0 +domain 0 +process 0 +univers 0 +thesi 0 +cognit 0 +cornel 0 +understand 0 +inform 0 +kenmor 0 +featur 0 +rel 0 +pronoun 0 +note 0 +specif 0 +massachusett 0 +amherst 0 +nation 0 +link 0 +report 0 +focu 0 +area 0 +naturallanguag 0 +group 0 +interest 0 +guid 0 +develop 0 +extract 0 +part 0 +technic 0 +text 0 +knowledgeacquisit 0 +corpora 0 +tag 0 +larger 0 +select 0 +riloff 0 +workshop 0 +intern 0 +chapter 0 +contain 0 +page 0 +tenth 0 +disambigu 0 +associ 0 +comput 0 +jose 0 +scienc 0 +societi 0 +lawrenc 0 +erlbaumassoci 0 +lehnert 0 +paper 0 +clair 0 +cardieclair 0 +assist 0 +professor 0 +upson 0 +hallphon 0 +email 0 +educlick 0 +interestscours 0 +teachselect 0 +publicationsnlp 0 +entri 0 +depart 0 +amalgam 0 +softbal 0 +stat 0 +finger 0 +lake 0 +tandem 0 +tour 0 +westi 0 +interestsalthough 0 +span 0 +number 0 +subfield 0 +within 0 +includ 0 +reason 0 +cognitivemodel 0 +primarili 0 +investig 0 +machinelearn 0 +tool 0 +forexplor 0 +mechan 0 +underli 0 +focus 0 +tworel 0 +design 0 +user 0 +train 0 +effici 0 +reliablyextract 0 +import 0 +document 0 +cstr 0 +tosupport 0 +content 0 +brows 0 +autom 0 +task 0 +compris 0 +build 0 +gener 0 +kenmoreacquir 0 +combin 0 +symbol 0 +learningtechniqu 0 +robust 0 +tworeal 0 +world 0 +perform 0 +speech 0 +semant 0 +andconcept 0 +activ 0 +find 0 +anteced 0 +current 0 +extend 0 +handl 0 +addit 0 +disambiguationtask 0 +evalu 0 +learningcompon 0 +context 0 +applic 0 +isembed 0 +goal 0 +determin 0 +condit 0 +expect 0 +offer 0 +cost 0 +effect 0 +teachingc 0 +spring 0 +foundat 0 +practicum 0 +inartifici 0 +fall 0 +seminar 0 +understandingselect 0 +publicationsautom 0 +conferenceon 0 +empir 0 +method 0 +pennsylvania 0 +embed 0 +agener 0 +framework 0 +wermter 0 +scheler 0 +gabriel 0 +connectionist 0 +statist 0 +andsymbol 0 +lectur 0 +springer 0 +origin 0 +present 0 +tolearn 0 +jointconfer 0 +ijcai 0 +introduct 0 +conceptu 0 +file 0 +introductori 0 +conceptualsent 0 +avail 0 +cmpsci 0 +eleventh 0 +washington 0 +decis 0 +tree 0 +improv 0 +morgan 0 +kaufmann 0 +corpu 0 +heurist 0 +newark 0 +bias 0 +fourteenth 0 +bloomington 0 +onconstrain 0 +prior 0 +plausibl 0 +complic 0 +syntax 0 +ninth 0 +anaheim 0 +analyz 0 +citat 0 +twelfth 0 +cambridg 0 +linkscomput 0 +linguistics 0 +print 0 +archiv 0 +databas 0 +recent 0 +aclspeci 0 +learningmachin 0 +digestmachinelearn 0 +resourc 0 +researchersmachin 0 +home 0 +penn 0 +treebank 0 +repositori 0 +pointer 0 +code 0 +variou 0 +compon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..8f15f5b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,88 @@ +chew 1 +geometr 1 +gener 1 +mathemat 1 +paul 0 +research 0 +univers 0 +cornel 0 +work 0 +delaunai 0 +creat 0 +applic 0 +mesh 0 +level 0 +softwar 0 +us 0 +senior 0 +associatephd 0 +purdu 0 +eduappletsy 0 +need 0 +java 0 +compat 0 +beta 0 +version 0 +browser 0 +asnetscap 0 +make 0 +voronoi 0 +applet 0 +avoronoi 0 +diagram 0 +triangul 0 +click 0 +point 0 +agenda 0 +primari 0 +interest 0 +algorithm 0 +emphasi 0 +onpract 0 +practic 0 +includedplac 0 +motion 0 +plan 0 +shape 0 +comparison 0 +vision 0 +sens 0 +exampl 0 +issu 0 +thataris 0 +part 0 +problem 0 +automat 0 +scientificsoftwar 0 +goal 0 +rais 0 +isspecifi 0 +develop 0 +environ 0 +scientif 0 +canb 0 +natur 0 +high 0 +concept 0 +ofphys 0 +engin 0 +thu 0 +program 0 +specifi 0 +implicitli 0 +acollect 0 +equat 0 +symbol 0 +techniquesar 0 +transform 0 +express 0 +effectiveprogram 0 +myonlin 0 +tech 0 +reportscornel 0 +depart 0 +computerscienceth 0 +simlabprojectaddress 0 +rhode 0 +hall 0 +ithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..471690e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,76 @@ +cornel 1 +soccer 0 +chao 0 +chang 0 +page 0 +chichao 0 +student 0 +comput 0 +network 0 +interest 0 +system 0 +activ 0 +home 0 +edui 0 +thedepart 0 +scienceat 0 +univers 0 +faculti 0 +advisor 0 +isthorsten 0 +eicken 0 +summer 0 +microsoft 0 +group 0 +click 0 +addressesand 0 +phone 0 +number 0 +research 0 +interact 0 +compil 0 +runtim 0 +andoper 0 +toward 0 +effici 0 +concurr 0 +program 0 +overheterogen 0 +tham 0 +composit 0 +messageslow 0 +latenc 0 +commun 0 +risc 0 +multimatlab 0 +matlab 0 +multipl 0 +processor 0 +design 0 +perform 0 +messag 0 +anyon 0 +sport 0 +server 0 +latest 0 +newsbraziliansocc 0 +portugues 0 +world 0 +result 0 +andhomepagesoliv 0 +guid 0 +cool 0 +stuff 0 +lubrasa 0 +luso 0 +brazilian 0 +associationu 0 +chess 0 +centerjorn 0 +brasilmi 0 +carstockmasterjayhawk 0 +basketballwww 0 +tennisserverback 0 +scienc 0 +homepagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..2fe0d958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,58 @@ +chee 1 +keong 1 +graduat 1 +engin 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +ithaca 1 +inform 1 +tokyo 1 +singapor 1 +liau 0 +liauwelcom 0 +home 0 +page 0 +student 0 +master 0 +programm 0 +apolog 0 +forth 0 +construct 0 +work 0 +hopefulli 0 +thing 0 +improv 0 +soon 0 +school 0 +univers 0 +fall 0 +class 0 +foundat 0 +artifici 0 +intelligencec 0 +program 0 +languag 0 +softwar 0 +engineeringc 0 +networksc 0 +advanc 0 +databas 0 +systemsbaccalaur 0 +cours 0 +institut 0 +technolog 0 +japanhomei 0 +come 0 +small 0 +countri 0 +call 0 +know 0 +find 0 +avail 0 +homepag 0 +mapl 0 +avenu 0 +edulast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..92c9fab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,83 @@ +research 1 +cornel 0 +comput 0 +optim 0 +thoma 0 +center 0 +mathemat 0 +group 0 +project 0 +interest 0 +continu 0 +program 0 +problem 0 +larg 0 +scale 0 +imag 0 +linear 0 +student 0 +postdoc 0 +coleman 0 +colemanthoma 0 +colemancornel 0 +universityi 0 +professcp 0 +match 0 +http 0 +scienc 0 +depart 0 +appli 0 +also 0 +strong 0 +affili 0 +theori 0 +director 0 +applic 0 +advanc 0 +institut 0 +final 0 +member 0 +ccop 0 +faculti 0 +broadfield 0 +discret 0 +programmi 0 +concern 0 +design 0 +understand 0 +practic 0 +effici 0 +numer 0 +algorithm 0 +primari 0 +develop 0 +computationalmethod 0 +tool 0 +automat 0 +differenti 0 +reconstruct 0 +biomed 0 +parallel 0 +minim 0 +inequ 0 +nonlinear 0 +equal 0 +constraint 0 +profession 0 +activ 0 +recent 0 +paper 0 +book 0 +current 0 +former 0 +associ 0 +softwar 0 +link 0 +curriculum 0 +vita 0 +best 0 +rhode 0 +hall 0 +univers 0 +ithaca 0 +york 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..76397094 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,166 @@ +type 1 +program 0 +languag 0 +theori 0 +home 0 +cornel 0 +theoret 0 +also 0 +compil 0 +interest 0 +semant 0 +formul 0 +intermedi 0 +research 0 +implement 0 +atyp 0 +view 0 +tractabl 0 +approxim 0 +mathemat 0 +often 0 +programminglanguag 0 +translat 0 +calculi 0 +work 0 +project 0 +system 0 +comput 0 +scienc 0 +import 0 +lord 0 +love 0 +karl 0 +crari 0 +pagekarl 0 +crarycrari 0 +eduoffic 0 +address 0 +upson 0 +halloffic 0 +phone 0 +researchbroadli 0 +speak 0 +primari 0 +design 0 +implementationand 0 +recent 0 +emphasi 0 +area 0 +subtyp 0 +object 0 +orient 0 +modular 0 +practic 0 +kmlwhich 0 +combin 0 +featur 0 +function 0 +intract 0 +richworld 0 +foundat 0 +whole 0 +perform 0 +newprogram 0 +develop 0 +result 0 +map 0 +construct 0 +anapproxim 0 +set 0 +convers 0 +paradigm 0 +well 0 +understood 0 +aminterest 0 +deepen 0 +understand 0 +relationship 0 +particularli 0 +issu 0 +mitig 0 +model 0 +seri 0 +lower 0 +calculu 0 +embed 0 +andth 0 +correspond 0 +interpret 0 +invari 0 +modelallow 0 +relat 0 +stage 0 +origin 0 +allowsth 0 +standard 0 +techniqu 0 +optim 0 +guarante 0 +safeti 0 +andcorrect 0 +care 0 +make 0 +possibl 0 +additionaloptim 0 +unavail 0 +strategi 0 +form 0 +part 0 +thenuprl 0 +hereat 0 +name 0 +come 0 +nuprl 0 +formal 0 +logic 0 +base 0 +martin 0 +automatedreason 0 +committe 0 +consist 0 +ofrobert 0 +constabl 0 +greg 0 +morrisett 0 +dexter 0 +kozen 0 +close 0 +jasonhickei 0 +select 0 +papersoth 0 +linksmark 0 +leon 0 +maintain 0 +collect 0 +ofprogram 0 +resourc 0 +cansearch 0 +technic 0 +report 0 +onlin 0 +grad 0 +life 0 +biblestudi 0 +pageth 0 +lurker 0 +guid 0 +babylon 0 +command 0 +answer 0 +jesu 0 +hear 0 +israel 0 +thelord 0 +heart 0 +andwith 0 +soul 0 +mind 0 +strength 0 +thesecond 0 +neighbor 0 +commandmentgreat 0 +mark 0 +univers 0 +pagedepart 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..b1d66ef1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,130 @@ +spars 1 +parallel 0 +comput 0 +siam 0 +linear 0 +solut 0 +least 0 +squar 0 +problem 0 +cornel 0 +scientif 0 +memori 0 +multiprocessor 0 +distribut 0 +research 0 +theori 0 +center 0 +confer 0 +factor 0 +philadelphia 0 +coleman 0 +matrix 0 +multifront 0 +proceed 0 +pothen 0 +chunguang 0 +univers 0 +advanc 0 +institut 0 +optim 0 +project 0 +algorithm 0 +numer 0 +softwar 0 +dens 0 +row 0 +seventh 0 +orthogon 0 +journal 0 +technic 0 +report 0 +ctctr 0 +larg 0 +processingfor 0 +cliqu 0 +tree 0 +sunchunguang 0 +sunphd 0 +pennsylvania 0 +state 0 +welcom 0 +home 0 +page 0 +associ 0 +also 0 +affili 0 +thecornel 0 +work 0 +close 0 +professorthoma 0 +interest 0 +algebra 0 +mathemat 0 +current 0 +ppcx 0 +program 0 +rank 0 +defici 0 +bound 0 +packag 0 +pssl 0 +psspd 0 +symmetr 0 +posit 0 +definit 0 +systemsrec 0 +lecturesparallel 0 +contain 0 +second 0 +matric 0 +coeur 0 +alen 0 +idaho 0 +octob 0 +ondistribut 0 +parallelprocess 0 +francisco 0 +februari 0 +select 0 +public 0 +deal 0 +solutionof 0 +decemb 0 +cornellunivers 0 +conferenceon 0 +process 0 +bailei 0 +bjorstad 0 +gilbert 0 +mascagni 0 +schreiber 0 +simon 0 +torczon 0 +watson 0 +map 0 +choleskyfactor 0 +septemb 0 +matriceson 0 +sixth 0 +sinovec 0 +key 0 +leuz 0 +petzold 0 +reed 0 +us 0 +fifth 0 +dongarra 0 +kennedi 0 +messina 0 +sorensen 0 +voigt 0 +compact 0 +data 0 +structuresin 0 +scale 0 +univeristi 0 +ithaca 0 +mail 0 +csun 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..d6188466 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,182 @@ +group 1 +process 0 +architectur 0 +david 0 +cooper 0 +implement 0 +horu 0 +privaci 0 +origin 0 +secur 0 +messag 0 +network 0 +research 0 +servic 0 +user 0 +current 0 +design 0 +synchroni 0 +us 0 +allow 0 +join 0 +trust 0 +member 0 +client 0 +server 0 +would 0 +scheme 0 +howev 0 +mobil 0 +privat 0 +inform 0 +devic 0 +birman 0 +page 0 +cornel 0 +involv 0 +provid 0 +authent 0 +order 0 +set 0 +virtual 0 +maintain 0 +within 0 +semant 0 +desir 0 +permit 0 +untrust 0 +relationshipsamong 0 +thesi 0 +propos 0 +solut 0 +type 0 +wish 0 +first 0 +send 0 +also 0 +prevent 0 +commun 0 +kenneth 0 +comput 0 +home 0 +postdoctor 0 +associ 0 +upson 0 +hall 0 +phone 0 +email 0 +dcooper 0 +securityarchitectur 0 +goal 0 +work 0 +layer 0 +horuswhich 0 +interact 0 +kerberosnetwork 0 +cryptograph 0 +tool 0 +toprovid 0 +mike 0 +reiter 0 +fault 0 +toler 0 +system 0 +originalimplement 0 +support 0 +model 0 +ofcomput 0 +crash 0 +failuremodel 0 +necessari 0 +honest 0 +result 0 +make 0 +assumpt 0 +anyprocess 0 +version 0 +isposs 0 +whose 0 +weaker 0 +ofvirtu 0 +untrustedprocess 0 +exampl 0 +might 0 +clientsto 0 +communicatewith 0 +accept 0 +limit 0 +command 0 +fromth 0 +respons 0 +screen 0 +horussecur 0 +arbitrari 0 +accomplish 0 +keymanag 0 +impersonateanoth 0 +trivial 0 +achieveth 0 +witha 0 +slightli 0 +higher 0 +overhead 0 +unlik 0 +enabl 0 +asclient 0 +mani 0 +complic 0 +problem 0 +inherentin 0 +static 0 +basic 0 +ofinform 0 +keep 0 +contentsof 0 +hiddenwith 0 +proper 0 +encrypt 0 +outsidersfrom 0 +determin 0 +maintainingth 0 +unlink 0 +sender 0 +recipi 0 +chaum 0 +februari 0 +sinc 0 +severaloth 0 +made 0 +improv 0 +addit 0 +staticnetwork 0 +locat 0 +carri 0 +mobilecommun 0 +gener 0 +themessag 0 +receiv 0 +reveal 0 +informationabout 0 +owner 0 +develop 0 +along 0 +advisorken 0 +protocol 0 +attack 0 +internaland 0 +extern 0 +adversari 0 +public 0 +preserv 0 +ofmobil 0 +proceed 0 +ieee 0 +symposium 0 +securityand 0 +apriv 0 +wireless 0 +anthoni 0 +mobilecomput 0 +dissert 0 +univers 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..35a93e74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,131 @@ +project 1 +love 1 +ever 1 +hope 1 +cyber 0 +mine 0 +home 0 +instead 0 +deidr 0 +model 0 +faith 0 +dread 0 +burn 0 +pandora 0 +abodedan 0 +abodegreet 0 +travel 0 +digit 0 +space 0 +welcom 0 +humbl 0 +prithe 0 +gentl 0 +surf 0 +shore 0 +blink 0 +teari 0 +ey 0 +rest 0 +weari 0 +kei 0 +mice 0 +born 0 +hand 0 +make 0 +thyselv 0 +abod 0 +brief 0 +autobiographi 0 +resum 0 +facial 0 +express 0 +univers 0 +emot 0 +simnet 0 +builder 0 +type 0 +game 0 +faiththei 0 +hardli 0 +prize 0 +ferro 0 +concret 0 +sai 0 +wise 0 +scorn 0 +bend 0 +ear 0 +lawyer 0 +tone 0 +scientist 0 +word 0 +need 0 +unseen 0 +unheard 0 +untouch 0 +silenc 0 +night 0 +unknown 0 +question 0 +uncertain 0 +yearn 0 +true 0 +direct 0 +field 0 +lordlovewarm 0 +friendship 0 +mindless 0 +infatu 0 +sensual 0 +romanc 0 +passion 0 +soft 0 +sigh 0 +belov 0 +poetri 0 +hopemyth 0 +favor 0 +beauteou 0 +place 0 +fault 0 +human 0 +role 0 +releas 0 +demon 0 +mani 0 +astrai 0 +pretti 0 +glimmer 0 +fals 0 +tread 0 +tortur 0 +broken 0 +road 0 +amidst 0 +thorn 0 +dark 0 +filthi 0 +soul 0 +diseas 0 +pain 0 +horror 0 +suffer 0 +reach 0 +fear 0 +tear 0 +cannot 0 +blame 0 +deed 0 +told 0 +heart 0 +take 0 +hold 0 +world 0 +would 0 +never 0 +frozen 0 +miseri 0 +cold 0 +spring 0 +etern 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..3afa006f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,78 @@ +server 1 +page 1 +librari 0 +digit 0 +search 0 +cornel 0 +inform 0 +tool 0 +home 0 +engin 0 +gopher 0 +network 0 +refer 0 +weather 0 +dean 0 +krafft 0 +select 0 +urlsdean 0 +point 0 +interestcornel 0 +cuinfo 0 +legal 0 +institut 0 +directori 0 +project 0 +public 0 +dimund 0 +document 0 +imag 0 +understand 0 +charact 0 +recognit 0 +siam 0 +captur 0 +access 0 +cours 0 +illinoi 0 +stanford 0 +michigan 0 +berkelei 0 +librarysearch 0 +lyco 0 +veronica 0 +archi 0 +gatewai 0 +anonym 0 +site 0 +depart 0 +togeth 0 +excel 0 +collect 0 +scout 0 +report 0 +intern 0 +planet 0 +earth 0 +whole 0 +internet 0 +catalog 0 +part 0 +global 0 +navig 0 +cern 0 +faqsvari 0 +stuff 0 +head 0 +mail 0 +list 0 +audio 0 +gear 0 +folkbook 0 +folk 0 +music 0 +ithaca 0 +forecast 0 +elsewher 0 +secur 0 +index 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..15654b8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,98 @@ +comput 1 +research 0 +cornel 0 +project 0 +dean 0 +report 0 +krafft 0 +page 0 +facil 0 +consortium 0 +avail 0 +technic 0 +system 0 +dienst 0 +home 0 +director 0 +facilitiesaddress 0 +upson 0 +halldepart 0 +sciencecornel 0 +universityithaca 0 +phone 0 +current 0 +serv 0 +administr 0 +inth 0 +scienc 0 +depart 0 +guis 0 +anadministr 0 +manag 0 +support 0 +group 0 +andworri 0 +number 0 +issu 0 +includ 0 +secur 0 +network 0 +build 0 +servic 0 +side 0 +princip 0 +investig 0 +spart 0 +arpa 0 +fund 0 +five 0 +sciencedepart 0 +thecorpor 0 +nation 0 +initi 0 +cnri 0 +researchi 0 +intend 0 +rapid 0 +dissemin 0 +break 0 +technicalresearch 0 +internet 0 +well 0 +make 0 +line 0 +theexist 0 +librari 0 +member 0 +part 0 +work 0 +davi 0 +xerox 0 +employe 0 +thedesign 0 +institut 0 +carl 0 +lagoz 0 +emploi 0 +develop 0 +implement 0 +protocol 0 +disseminationov 0 +similar 0 +materi 0 +call 0 +eight 0 +univers 0 +site 0 +atechn 0 +inform 0 +ondienst 0 +pleas 0 +send 0 +email 0 +togethera 0 +select 0 +url 0 +relat 0 +thing 0 +interestedin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..8097025d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,24 @@ +cornel 1 +home 0 +analysi 0 +divakar 0 +pagedivakar 0 +viswanathdivakar 0 +address 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +graduat 0 +student 0 +comput 0 +scienc 0 +area 0 +interest 0 +isnumer 0 +advis 0 +page 0 +good 0 +place 0 +find 0 +numer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..6d5a9259 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,90 @@ +comput 1 +cornel 1 +diyu 0 +home 0 +databas 0 +compil 0 +engin 0 +ithaca 0 +receiv 0 +physic 0 +jersei 0 +china 0 +pagediyu 0 +daisi 0 +welcom 0 +spring 0 +cours 0 +practic 0 +distribut 0 +system 0 +translat 0 +practicum 0 +translatorsfal 0 +coursesc 0 +oper 0 +systemc 0 +softwar 0 +engineeringc 0 +advanc 0 +systemsel 0 +network 0 +telecommunicationsm 0 +projectorigin 0 +virtual 0 +realiti 0 +railroad 0 +projectsinc 0 +mayb 0 +like 0 +know 0 +littl 0 +doubt 0 +anywai 0 +current 0 +master 0 +student 0 +depart 0 +scienc 0 +univers 0 +locat 0 +central 0 +york 0 +gorgeou 0 +place 0 +live 0 +except 0 +winter 0 +last 0 +year 0 +appli 0 +institut 0 +technolog 0 +newark 0 +brought 0 +beauti 0 +campu 0 +tsinghua 0 +unviers 0 +beij 0 +also 0 +want 0 +friend 0 +miss 0 +us 0 +linksjava 0 +html 0 +tkfavorit 0 +sitestimecnnlondon 0 +timeswashington 0 +postchines 0 +digestchina 0 +new 0 +digestfeng 0 +yuanxin 0 +siart 0 +chinaloc 0 +connectionsctc 0 +sunlabweathermovi 0 +miller 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..2e776251 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,113 @@ +document 1 +recognit 0 +comput 0 +area 0 +visual 0 +match 0 +us 0 +collabor 0 +imag 0 +huttenloch 0 +cornel 0 +research 0 +work 0 +algorithm 0 +system 0 +remot 0 +video 0 +monitor 0 +also 0 +interest 0 +electron 0 +hausdorff 0 +method 0 +implement 0 +avail 0 +teach 0 +cours 0 +program 0 +vision 0 +activ 0 +home 0 +page 0 +daniel 0 +associ 0 +professordph 0 +main 0 +rang 0 +theoret 0 +techniqu 0 +geometri 0 +applic 0 +view 0 +wide 0 +network 0 +target 0 +type 0 +commun 0 +educ 0 +compar 0 +geometr 0 +structur 0 +base 0 +fast 0 +index 0 +eigenspac 0 +approxim 0 +fraction 0 +matlab 0 +perform 0 +evalu 0 +model 0 +object 0 +track 0 +identif 0 +digipap 0 +highli 0 +compact 0 +univers 0 +viewabl 0 +format 0 +conot 0 +support 0 +share 0 +brian 0 +smith 0 +develop 0 +author 0 +offer 0 +first 0 +time 0 +spring 0 +introduct 0 +profession 0 +xerox 0 +parc 0 +process 0 +start 0 +small 0 +group 0 +investig 0 +problem 0 +chair 0 +cvpr 0 +ieee 0 +confer 0 +pattern 0 +held 0 +juan 0 +june 0 +favorit 0 +geek 0 +snowboard 0 +mountain 0 +bike 0 +without 0 +extrem 0 +sport 0 +cool 0 +stupid 0 +attitud 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..1bd1231e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,94 @@ +process 1 +algebra 0 +dsouza 0 +work 0 +tool 0 +ashvin 0 +specif 0 +design 0 +express 0 +postscript 0 +lnc 0 +cornel 0 +edui 0 +graduat 0 +student 0 +bard 0 +bloom 0 +focu 0 +thesi 0 +research 0 +develop 0 +oftool 0 +support 0 +method 0 +andverif 0 +concurr 0 +system 0 +withrespect 0 +metatheori 0 +becom 0 +immediatelyavail 0 +wide 0 +class 0 +allevi 0 +theproblem 0 +duplic 0 +effort 0 +inher 0 +custom 0 +exampl 0 +prototyp 0 +base 0 +calculu 0 +model 0 +checker 0 +simpl 0 +gso 0 +semant 0 +form 0 +part 0 +input 0 +make 0 +applic 0 +mani 0 +commonli 0 +us 0 +includ 0 +basic 0 +loto 0 +addit 0 +investig 0 +power 0 +order 0 +better 0 +understand 0 +compar 0 +final 0 +exploringappl 0 +techniqu 0 +gener 0 +bdd 0 +algebraterm 0 +full 0 +postscipt 0 +lite 0 +version 0 +also 0 +written 0 +result 0 +presentedth 0 +former 0 +comput 0 +aid 0 +verif 0 +latter 0 +foundat 0 +softwar 0 +technolog 0 +theoret 0 +computersci 0 +june 0 +present 0 +verifi 0 +compass 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..da16a50e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,53 @@ +page 1 +funda 0 +ergun 0 +cornel 0 +turkei 0 +ever 0 +improv 0 +pagefunda 0 +ergn 0 +mail 0 +eduhi 0 +welcom 0 +home 0 +name 0 +studentin 0 +comput 0 +scienc 0 +dept 0 +work 0 +programcheck 0 +prof 0 +ronitt 0 +rubinfeld 0 +researchpag 0 +also 0 +minor 0 +paint 0 +depart 0 +fine 0 +art 0 +origin 0 +come 0 +izmir 0 +undergrad 0 +bilkentunivers 0 +ankara 0 +research 0 +relat 0 +stuff 0 +warn 0 +might 0 +encounterpag 0 +written 0 +turkish 0 +angri 0 +dog 0 +risk 0 +person 0 +visit 0 +sinc 0 +alwai 0 +heavi 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..7671cc91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,183 @@ +tardo 1 +algorithm 0 +comput 0 +symposium 0 +ori 0 +appear 0 +proceed 0 +theori 0 +annual 0 +problem 0 +approxim 0 +version 0 +flow 0 +research 0 +shmoi 0 +plotkin 0 +preliminari 0 +siam 0 +scienc 0 +multicommod 0 +fast 0 +approximationalgorithm 0 +network 0 +current 0 +combinatori 0 +paper 0 +januari 0 +combinator 0 +lovasz 0 +activ 0 +cornel 0 +school 0 +oper 0 +public 0 +complex 0 +optim 0 +discret 0 +improv 0 +klein 0 +stein 0 +journal 0 +goldberg 0 +hopp 0 +polynomi 0 +kleinberg 0 +disjoint 0 +planar 0 +path 0 +graph 0 +springer 0 +verlag 0 +handbook 0 +graham 0 +grotschel 0 +north 0 +holland 0 +theorem 0 +tardosassoci 0 +professor 0 +depart 0 +upson 0 +hallcornel 0 +universityithaca 0 +phone 0 +email 0 +industri 0 +engineeringphon 0 +click 0 +daughter 0 +rebecca 0 +julia 0 +researchrec 0 +broadli 0 +speak 0 +interest 0 +includ 0 +mani 0 +aspect 0 +mostlywork 0 +particular 0 +networkproblem 0 +linear 0 +integ 0 +programmingproblem 0 +recent 0 +paperssurvei 0 +thegener 0 +assign 0 +mathemat 0 +program 0 +bound 0 +cutratio 0 +combinatorica 0 +fasterapproxim 0 +unit 0 +capac 0 +concurr 0 +problemwith 0 +applic 0 +rout 0 +find 0 +spars 0 +cut 0 +oncomput 0 +appearedin 0 +leighton 0 +makedon 0 +tragouda 0 +flowproblem 0 +system 0 +stoc 0 +special 0 +issu 0 +annualacm 0 +fraction 0 +pack 0 +cover 0 +inmathemat 0 +hasappear 0 +ieee 0 +thefound 0 +goeman 0 +williamson 0 +designproblem 0 +discretealgorithm 0 +time 0 +someevacu 0 +ondiscret 0 +quickest 0 +transship 0 +theproceed 0 +steiner 0 +direct 0 +multicut 0 +pathsproblem 0 +high 0 +diamet 0 +proceedingsof 0 +dens 0 +embed 0 +annualiee 0 +foundat 0 +rabani 0 +distribut 0 +packet 0 +switch 0 +arbitrari 0 +fleischer 0 +separ 0 +maxim 0 +violat 0 +comb 0 +inequ 0 +ipco 0 +june 0 +survei 0 +tarjan 0 +sept 0 +vlsi 0 +design 0 +kort 0 +lovaszand 0 +schrijver 0 +strongli 0 +inoptim 0 +intern 0 +congress 0 +ofmathematician 0 +kyoto 0 +tokyo 0 +computersci 0 +annot 0 +bibliographi 0 +inproc 0 +summer 0 +maastricht 0 +netherland 0 +proc 0 +networkoptim 0 +practic 0 +netflow 0 +miniato 0 +itali 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..5a72b230 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,14 @@ +franci 1 +scienc 1 +home 0 +page 0 +graduat 0 +student 0 +univers 0 +california 0 +berkeleymathemat 0 +departmentcomput 0 +departmentcornel 0 +universitycomput 0 +departmenthumorfcc 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..57859a0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,2 @@ +felix 1 +world 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..55208ede --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,57 @@ +smith 1 +frederick 0 +cornel 0 +dexter 0 +kozen 0 +kleen 0 +algebra 0 +test 0 +technic 0 +report 0 +univers 0 +link 0 +homepag 0 +program 0 +languag 0 +homepagefrederick 0 +grad 0 +student 0 +upson 0 +halldepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +paper 0 +erni 0 +cohen 0 +complex 0 +juli 0 +complet 0 +decid 0 +april 0 +person 0 +us 0 +greg 0 +morrisett 0 +research 0 +page 0 +class 0 +take 0 +system 0 +multimedia 0 +semant 0 +math 0 +introduct 0 +analysi 0 +epicuri 0 +food 0 +zine 0 +cartalk 0 +home 0 +click 0 +clack 0 +catch 0 +sundai 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..b9be4c9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,25 @@ +frank 1 +page 0 +cornel 0 +planet 0 +either 0 +matter 0 +adelstein 0 +post 0 +doctor 0 +associ 0 +xerox 0 +design 0 +research 0 +institut 0 +offic 0 +phone 0 +electron 0 +mail 0 +actual 0 +inform 0 +checkout 0 +improv 0 +happi 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..2be96567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,65 @@ +fred 1 +cornel 0 +languag 0 +comput 0 +apollo 0 +yuan 0 +softwar 0 +engin 0 +current 0 +soon 0 +http 0 +visual 0 +affili 0 +theori 0 +center 0 +univers 0 +chelmsford 0 +version 0 +feet 0 +shown 0 +resum 0 +project 0 +other 0 +appear 0 +distribut 0 +server 0 +scramo 0 +midi 0 +choreograph 0 +anim 0 +model 0 +postscript 0 +vpla 0 +program 0 +animationlink 0 +massachusett 0 +hewlett 0 +packardlink 0 +previou 0 +scienc 0 +group 0 +binghamton 0 +lawrenc 0 +berkelei 0 +laboratoryinterest 0 +hobbi 0 +section 0 +develop 0 +mayb 0 +next 0 +year 0 +photographi 0 +cello 0 +guitar 0 +aquarium 0 +sciencecornel 0 +home 0 +burl 0 +work 0 +email 0 +fredhsu 0 +snail 0 +drive 0 +peopl 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..b7409aef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,149 @@ +comput 1 +scienc 0 +deepak 0 +back 0 +main 0 +page 0 +engin 0 +multimedia 0 +prof 0 +univers 0 +cornel 0 +major 0 +cours 0 +system 0 +well 0 +long 0 +world 0 +born 0 +school 0 +balakrishna 0 +undergradu 0 +colleg 0 +india 0 +graduat 0 +pursu 0 +master 0 +degre 0 +actual 0 +four 0 +lucki 0 +part 0 +balakrishnamast 0 +engineeringdepart 0 +sciencecornel 0 +resumeeducationcoursesperson 0 +resum 0 +html 0 +postscript 0 +educ 0 +complet 0 +june 0 +karnataka 0 +region 0 +surathk 0 +interest 0 +relat 0 +oper 0 +artifici 0 +intellig 0 +compil 0 +construct 0 +data 0 +commun 0 +graphic 0 +present 0 +specialis 0 +want 0 +involv 0 +project 0 +deal 0 +server 0 +program 0 +follow 0 +list 0 +taken 0 +fall 0 +semest 0 +brian 0 +smith 0 +advanc 0 +databas 0 +praveen 0 +seshadri 0 +network 0 +srinivasan 0 +keshav 0 +softwar 0 +michael 0 +godfrei 0 +person 0 +start 0 +goe 0 +upon 0 +time 0 +novemb 0 +land 0 +call 0 +bharat 0 +outsid 0 +precis 0 +cute 0 +chubbi 0 +littl 0 +babi 0 +weigh 0 +approxim 0 +pound 0 +took 0 +name 0 +mean 0 +light 0 +went 0 +still 0 +process 0 +chang 0 +incident 0 +probabl 0 +divin 0 +interfer 0 +aishwarya 0 +miss 0 +leav 0 +miniscul 0 +detail 0 +earlier 0 +life 0 +dive 0 +straight 0 +high 0 +nation 0 +public 0 +bangalor 0 +greater 0 +place 0 +someon 0 +noth 0 +better 0 +krec 0 +that 0 +wonder 0 +anoth 0 +year 0 +holidai 0 +conquer 0 +class 0 +never 0 +match 0 +hope 0 +get 0 +somewher 0 +final 0 +here 0 +link 0 +friend 0 +ashish 0 +aastha 0 +indira 0 +ankit 0 +vineet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..b04ee490 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,58 @@ +system 1 +uddin 0 +ghia 0 +interest 0 +engin 0 +cornel 0 +comput 0 +work 0 +program 0 +asif 0 +ghiasasif 0 +welcom 0 +mywww 0 +home 0 +page 0 +warn 0 +constructioni 0 +student 0 +scienc 0 +area 0 +distribut 0 +multimedia 0 +bachelor 0 +degre 0 +univers 0 +technolog 0 +karachi 0 +pakistan 0 +sinc 0 +global 0 +inform 0 +solut 0 +present 0 +studi 0 +leav 0 +master 0 +respons 0 +includ 0 +applic 0 +unix 0 +administr 0 +support 0 +educ 0 +network 0 +manag 0 +installationso 0 +number 0 +project 0 +plan 0 +onlin 0 +good 0 +hopefulli 0 +year 0 +publicationsth 0 +follow 0 +music 0 +cricket 0 +astronomyasif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..d02b875c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,9 @@ +home 1 +page 0 +move 0 +http 0 +berkelei 0 +dglaser 0 +htmlpleas 0 +visit 0 +million 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..ab2fb949 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,333 @@ +comput 1 +scienc 0 +award 0 +program 0 +cornel 0 +receiv 0 +educ 0 +gri 0 +logic 0 +us 0 +paper 0 +munich 0 +teach 0 +topic 0 +colleg 0 +depart 0 +born 0 +year 0 +stanford 0 +twin 0 +tabl 0 +system 0 +work 0 +survei 0 +david 0 +home 0 +professor 0 +institut 0 +technolog 0 +interest 0 +area 0 +languag 0 +formal 0 +tool 0 +inform 0 +biographi 0 +text 0 +symposium 0 +went 0 +wife 0 +elain 0 +later 0 +illinoi 0 +math 0 +help 0 +paul 0 +sinc 0 +made 0 +weather 0 +move 0 +chair 0 +return 0 +research 0 +proud 0 +raman 0 +best 0 +document 0 +serv 0 +associ 0 +also 0 +period 0 +editor 0 +know 0 +time 0 +laugh 0 +pagedavid 0 +grieswilliam 0 +lewi 0 +engineeringdr 0 +methodolog 0 +particular 0 +formaldevelop 0 +relat 0 +programminglanguag 0 +semant 0 +asinterest 0 +researchin 0 +fact 0 +understand 0 +taughta 0 +freshman 0 +sophomor 0 +level 0 +anoverrid 0 +concern 0 +mine 0 +click 0 +follow 0 +item 0 +curriculum 0 +vita 0 +short 0 +written 0 +polya 0 +announc 0 +dimac 0 +link 0 +pagecomput 0 +upson 0 +hallcornel 0 +universityithaca 0 +edushort 0 +griesi 0 +flush 0 +york 0 +spent 0 +iescap 0 +queen 0 +workfor 0 +naval 0 +weapon 0 +laboratori 0 +civilian 0 +amathematician 0 +programm 0 +fewmonth 0 +marri 0 +novemb 0 +master 0 +degreein 0 +assistantship 0 +twogerman 0 +manfr 0 +ruedig 0 +wiehl 0 +write 0 +full 0 +algol 0 +compilerfor 0 +figur 0 +implementrecurs 0 +effici 0 +mani 0 +end 0 +go 0 +almost 0 +three 0 +doctor 0 +bauer 0 +joseph 0 +stoer 0 +germani 0 +june 0 +wasin 0 +numer 0 +analysi 0 +these 0 +notyet 0 +kosher 0 +assist 0 +susan 0 +excit 0 +usual 0 +thebirthdai 0 +april 0 +intown 0 +make 0 +four 0 +birthdai 0 +cake 0 +left 0 +whichha 0 +snow 0 +ever 0 +wasdepart 0 +becam 0 +william 0 +lewisprofessor 0 +engin 0 +guggenheim 0 +fellowship 0 +contentsi 0 +better 0 +known 0 +mytext 0 +writingand 0 +contribut 0 +thewond 0 +good 0 +bloom 0 +wherey 0 +plant 0 +number 0 +contributionsto 0 +ieee 0 +taylor 0 +booth 0 +sigcseaward 0 +outstand 0 +clarkaward 0 +art 0 +theamerican 0 +feder 0 +process 0 +societi 0 +afip 0 +advise 0 +stand 0 +susanowicki 0 +thesi 0 +laid 0 +foundat 0 +proof 0 +correct 0 +ofparallel 0 +notion 0 +interfer 0 +freeness 0 +author 0 +bestpap 0 +langaug 0 +andt 0 +sthesi 0 +dissert 0 +designedand 0 +implement 0 +speak 0 +latex 0 +includ 0 +technic 0 +articl 0 +book 0 +printedor 0 +spoken 0 +abl 0 +speakmathemat 0 +effect 0 +manner 0 +import 0 +goal 0 +read 0 +blind 0 +alreadi 0 +produc 0 +audiocassett 0 +thecomput 0 +board 0 +late 0 +open 0 +officein 0 +washington 0 +began 0 +serious 0 +repres 0 +researchinterest 0 +conduct 0 +taulbe 0 +obtain 0 +essenti 0 +complet 0 +responsesfrom 0 +grant 0 +noother 0 +compar 0 +respons 0 +rate 0 +itrequir 0 +telephon 0 +call 0 +sendin 0 +questionnair 0 +researchassoci 0 +servic 0 +forchair 0 +toward 0 +respect 0 +andrespons 0 +current 0 +acta 0 +informatica 0 +aspect 0 +softwar 0 +concept 0 +andtool 0 +edit 0 +keep 0 +busi 0 +enjoi 0 +takean 0 +individu 0 +willsuggest 0 +substanti 0 +rewrit 0 +believ 0 +servewher 0 +fredb 0 +schneider 0 +springer 0 +verlag 0 +andmonograph 0 +spare 0 +sport 0 +like 0 +golf 0 +softbal 0 +volleybal 0 +swim 0 +tenni 0 +china 0 +isplit 0 +pant 0 +plai 0 +ping 0 +pong 0 +hour 0 +give 0 +alectur 0 +mention 0 +audienc 0 +turnedaround 0 +explain 0 +interpret 0 +spoke 0 +everyonelaugh 0 +howev 0 +whether 0 +told 0 +truth 0 +justsaid 0 +joke 0 +sing 0 +barbershop 0 +andgilbert 0 +sullivan 0 +around 0 +hous 0 +carpentri 0 +wire 0 +remodel 0 +taken 0 +yield 0 +considerablesatisfact 0 +content 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..2986cd6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,23 @@ +grinzayd 1 +cornel 1 +alex 0 +homepagealex 0 +grinzaydm 0 +student 0 +comput 0 +sciencecornel 0 +universitytel 0 +email 0 +first 0 +week 0 +link 0 +necx 0 +directinternet 0 +shop 0 +networkcomput 0 +express 0 +damarkwarn 0 +page 0 +bore 0 +learn 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..351940b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,46 @@ +comput 1 +cornel 1 +grzegorz 0 +grze 0 +czajkowski 0 +homepag 0 +czajkowskidepart 0 +sciencecornel 0 +universityithaca 0 +offic 0 +second 0 +year 0 +student 0 +program 0 +depart 0 +scienceat 0 +univers 0 +ithaca 0 +york 0 +complet 0 +master 0 +degre 0 +scienc 0 +krakow 0 +poland 0 +current 0 +involv 0 +sever 0 +project 0 +also 0 +charg 0 +administ 0 +cuc 0 +advisor 0 +thorsten 0 +eicken 0 +link 0 +relat 0 +research 0 +architectur 0 +activ 0 +messag 0 +split 0 +last 0 +modifi 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..4a9c5da7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,77 @@ +halpern 1 +reason 0 +comput 0 +depart 0 +knowledg 0 +uncertainti 0 +distribut 0 +also 0 +work 0 +talk 0 +give 0 +detail 0 +paper 0 +fall 0 +teach 0 +home 0 +pagejoseph 0 +professorcornel 0 +universitycomput 0 +scienc 0 +upson 0 +hallithaca 0 +cornel 0 +research 0 +focus 0 +applic 0 +game 0 +theori 0 +although 0 +done 0 +continu 0 +interest 0 +topic 0 +fault 0 +toler 0 +program 0 +languag 0 +semant 0 +li 0 +boundari 0 +number 0 +field 0 +recent 0 +gave 0 +econom 0 +princeton 0 +describ 0 +someon 0 +mathemat 0 +call 0 +scientist 0 +economist 0 +abouta 0 +subject 0 +mainli 0 +studi 0 +philosoph 0 +probabl 0 +best 0 +sentenc 0 +descript 0 +like 0 +check 0 +list 0 +public 0 +pointer 0 +abstract 0 +mani 0 +case 0 +avail 0 +activ 0 +resum 0 +cours 0 +sequel 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..813e1298 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,10 @@ +tsuneshi 1 +hashimoto 1 +hashimototsuneshi 0 +hashimotothi 0 +home 0 +page 0 +construct 0 +cstsuneshi 0 +hashi 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..068978f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,24 @@ +hayden 1 +system 1 +mark 0 +cornel 0 +distribut 0 +commun 0 +offic 0 +upson 0 +univers 0 +ithaca 0 +fall 0 +teach 0 +tast 0 +unix 0 +interest 0 +horu 0 +ensembl 0 +nuprl 0 +proof 0 +develop 0 +hockei 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..34f17551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,6 @@ +heji 1 +improv 0 +home 0 +page 0 +cyber 0 +pond 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..ebb741f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,27 @@ +cornel 1 +deyu 0 +student 0 +comput 0 +scienc 0 +home 0 +page 0 +graduat 0 +universitydept 0 +upson 0 +hallithaca 0 +third 0 +year 0 +born 0 +shanghai 0 +china 0 +receiv 0 +undergradu 0 +degre 0 +berkelei 0 +faculti 0 +advisor 0 +thorsten 0 +eicken 0 +come 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..754b2252 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,59 @@ +cornel 1 +comput 0 +univers 0 +huang 0 +chines 0 +christian 0 +jing 0 +depart 0 +scienc 0 +vision 0 +fellowship 0 +home 0 +page 0 +upson 0 +hall 0 +ithaca 0 +student 0 +thedepart 0 +scienceat 0 +receiv 0 +bachelorand 0 +master 0 +degre 0 +appli 0 +mathemat 0 +tsinghua 0 +beij 0 +chinami 0 +academ 0 +interest 0 +multimedia 0 +system 0 +work 0 +professor 0 +ramin 0 +zabih 0 +imag 0 +retriev 0 +video 0 +process 0 +motion 0 +track 0 +us 0 +link 0 +annot 0 +bibliographi 0 +pattern 0 +recognit 0 +relat 0 +machin 0 +learn 0 +optim 0 +check 0 +evangel 0 +resourc 0 +center 0 +mission 0 +back 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..4382d628 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,35 @@ +page 1 +chin 0 +chen 0 +home 0 +address 0 +practic 0 +distribut 0 +system 0 +welcom 0 +current 0 +mapl 0 +ithaca 0 +mail 0 +icchen 0 +cornel 0 +perman 0 +sung 0 +taipei 0 +taiwan 0 +class 0 +spring 0 +comput 0 +graphic 0 +practicum 0 +databas 0 +manag 0 +album 0 +resum 0 +new 0 +china 0 +time 0 +nctu 0 +construct 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..c3c25a55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,27 @@ +engin 1 +system 1 +page 0 +comput 0 +home 0 +indira 0 +malik 0 +depart 0 +scienc 0 +master 0 +imalik 0 +cornel 0 +resum 0 +post 0 +script 0 +cours 0 +program 0 +softwar 0 +advanc 0 +databas 0 +network 0 +multimedia 0 +visit 0 +high 0 +school 0 +tap 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..138d86e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,80 @@ +comput 1 +system 0 +project 0 +indira 0 +scienc 0 +oper 0 +graphic 0 +java 0 +cornel 0 +engin 0 +inform 0 +camera 0 +applet 0 +vidyaprakash 0 +vidyaprakashmast 0 +engineeringclass 0 +dept 0 +sciencecornel 0 +universitywelcom 0 +homepag 0 +current 0 +student 0 +depart 0 +univers 0 +ithaca 0 +degre 0 +colleg 0 +technolog 0 +coimbator 0 +india 0 +cours 0 +taken 0 +cornelluniversityfal 0 +practicum 0 +specif 0 +hoca 0 +softwar 0 +multimedia 0 +audio 0 +process 0 +toolkit 0 +manag 0 +polici 0 +spring 0 +cspracticum 0 +anim 0 +magic 0 +carpet 0 +colloqium 0 +manageri 0 +financesumm 0 +independ 0 +research 0 +tracingin 0 +perspectivetransform 0 +click 0 +postscript 0 +version 0 +myresumeclick 0 +perspect 0 +transformssom 0 +interest 0 +site 0 +cool 0 +sgamelan 0 +directori 0 +calvinand 0 +hobb 0 +galleri 0 +gif 0 +indian 0 +recip 0 +chicker 0 +wood 0 +drive 0 +nashvil 0 +tennesse 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..f91e8d01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,90 @@ +us 1 +system 1 +manual 1 +write 1 +pageioi 0 +home 0 +cornel 0 +softwar 0 +base 0 +engin 0 +test 0 +packag 0 +file 0 +short 0 +detail 0 +come 0 +homeless 0 +lamioi 0 +current 0 +research 0 +assist 0 +prof 0 +brian 0 +smith 0 +comput 0 +scienc 0 +depart 0 +univers 0 +interest 0 +multi 0 +media 0 +parallel 0 +program 0 +instruct 0 +set 0 +environ 0 +knowledg 0 +guidelin 0 +good 0 +extens 0 +code 0 +doesn 0 +cover 0 +much 0 +script 0 +section 0 +suit 0 +valuabl 0 +programm 0 +postscript 0 +version 0 +complet 0 +includ 0 +templat 0 +sourc 0 +document 0 +introduct 0 +descript 0 +user 0 +remot 0 +machin 0 +index 0 +inform 0 +tutori 0 +right 0 +week 0 +put 0 +togeth 0 +knowledgebas 0 +repositori 0 +try 0 +spam 0 +site 0 +multim 0 +directori 0 +get 0 +start 0 +virtual 0 +realiti 0 +conferenc 0 +work 0 +prototyp 0 +mpeg 0 +video 0 +server 0 +http 0 +protocol 0 +spring 0 +homework 0 +solut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..74d47bfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,135 @@ +jackson 1 +nuprl 0 +paul 0 +abstract 0 +cornel 0 +develop 0 +design 0 +avail 0 +format 0 +theori 0 +comput 0 +algebra 0 +thesi 0 +proof 0 +full 0 +text 0 +postscript 0 +list 0 +home 0 +univers 0 +depart 0 +research 0 +softwar 0 +editor 0 +confer 0 +circuit 0 +proceed 0 +hardwar 0 +page 0 +access 0 +next 0 +includ 0 +pagepaul 0 +post 0 +doctor 0 +associatecornel 0 +mail 0 +eduwww 0 +http 0 +info 0 +peopl 0 +htmladdress 0 +scienc 0 +upson 0 +hall 0 +ithaca 0 +usaphon 0 +intereststheorem 0 +prove 0 +environ 0 +formal 0 +method 0 +andhardwar 0 +synthesi 0 +scientif 0 +program 0 +linkag 0 +tool 0 +engin 0 +informationmi 0 +entitl 0 +enhanc 0 +developmentsystem 0 +appli 0 +paper 0 +explor 0 +construct 0 +type 0 +bundi 0 +intern 0 +automateddeduct 0 +lectur 0 +note 0 +artif 0 +intellig 0 +springer 0 +verlag 0 +june 0 +bout 0 +stavrid 0 +melham 0 +inter 0 +theorem 0 +prover 0 +ifip 0 +transactionsa 0 +north 0 +holland 0 +toolkit 0 +float 0 +point 0 +thenuprl 0 +system 0 +theadvanc 0 +workshop 0 +correct 0 +methodolog 0 +elsevi 0 +nuprlth 0 +project 0 +world 0 +wide 0 +document 0 +commun 0 +live 0 +session 0 +basic 0 +load 0 +collect 0 +still 0 +need 0 +workon 0 +make 0 +someon 0 +els 0 +getround 0 +pai 0 +attent 0 +sometim 0 +month 0 +hypertext 0 +thetheori 0 +foreach 0 +introduct 0 +summari 0 +definit 0 +andtheorem 0 +thepolynomi 0 +relat 0 +moment 0 +shouldb 0 +coupl 0 +dai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..c33fb575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,3 @@ +hani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..c21e7d90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,21 @@ +cornel 1 +tibor 0 +jnositibor 0 +jnosiwelcom 0 +mywww 0 +home 0 +page 0 +perman 0 +constructionoffic 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +usaoffic 0 +phone 0 +interest 0 +site 0 +project 0 +zenotibor 0 +jnosi 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..d71218a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,13 @@ +janwun 1 +cornel 0 +master 0 +engin 0 +student 0 +comput 0 +scienc 0 +depart 0 +address 0 +mapl 0 +avenu 0 +ithaca 0 +telephon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..79bf6358 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,183 @@ +languag 1 +morrisett 0 +program 0 +research 0 +greg 0 +compil 0 +harper 0 +standard 0 +system 0 +type 0 +also 0 +robert 0 +technic 0 +version 0 +gregori 0 +interest 0 +cornel 0 +high 0 +level 0 +softwar 0 +implement 0 +us 0 +memori 0 +report 0 +appear 0 +publish 0 +sigplan 0 +symposium 0 +principl 0 +comput 0 +ithaca 0 +offic 0 +phone 0 +time 0 +construct 0 +code 0 +support 0 +semant 0 +direct 0 +abstract 0 +design 0 +manag 0 +polymorph 0 +decemb 0 +tarditi 0 +cheng 0 +stone 0 +workshop 0 +extend 0 +proc 0 +proceed 0 +june 0 +portabl 0 +multiprocess 0 +jersei 0 +andrew 0 +tolmach 0 +project 0 +home 0 +page 0 +faculti 0 +assist 0 +professor 0 +scienc 0 +univers 0 +upson 0 +hall 0 +tabl 0 +content 0 +intereststeachingselect 0 +papersrel 0 +linksperson 0 +informationresearch 0 +interestsmi 0 +primari 0 +develop 0 +ofadvanc 0 +particularli 0 +interestedin 0 +forbuild 0 +includ 0 +operatingsystem 0 +distribut 0 +late 0 +focus 0 +onth 0 +issu 0 +kept 0 +safelanguag 0 +concentr 0 +produc 0 +faster 0 +consum 0 +less 0 +hack 0 +bit 0 +bring 0 +power 0 +base 0 +toolsfrom 0 +theori 0 +partial 0 +evalu 0 +interpret 0 +gener 0 +specif 0 +real 0 +systemssoftwar 0 +teachingc 0 +fall 0 +advanc 0 +spring 0 +select 0 +paperssemant 0 +septemb 0 +gzip 0 +postscript 0 +thesi 0 +optim 0 +confer 0 +perform 0 +safetythrough 0 +closur 0 +convers 0 +yasuhiko 0 +minamid 0 +juli 0 +model 0 +matthia 0 +felleisen 0 +conf 0 +function 0 +andcomput 0 +architectur 0 +reportcmu 0 +notecmu 0 +intensionaltyp 0 +analysi 0 +annual 0 +francisco 0 +januari 0 +optimist 0 +parallelizationgreg 0 +mauric 0 +herlihi 0 +octob 0 +refin 0 +first 0 +class 0 +store 0 +state 0 +copenhagen 0 +denmark 0 +lock 0 +platform 0 +fourth 0 +practic 0 +parallel 0 +diego 0 +interfac 0 +princeton 0 +ad 0 +thread 0 +eric 0 +cooper 0 +relat 0 +link 0 +mark 0 +leon 0 +resourc 0 +member 0 +carnegi 0 +mellon 0 +line 0 +inform 0 +orient 0 +bibliographi 0 +depart 0 +scienceperson 0 +informationhom 0 +address 0 +warren 0 +road 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..12f3fe85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,15 @@ +jiun 1 +address 1 +resum 0 +java 0 +current 0 +mapl 0 +avenu 0 +ithaca 0 +email 0 +jhlin 0 +cornel 0 +perman 0 +shing 0 +taipei 0 +taiwan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..796f1910 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,5 @@ +jerri 1 +cornel 0 +edujerri 0 +project 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..89c533cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,115 @@ +china 1 +chines 0 +univers 0 +scienc 0 +comput 0 +cornel 0 +graduat 0 +student 0 +depart 0 +binghamton 0 +taiwan 0 +new 0 +rank 0 +home 0 +page 0 +twin 0 +sister 0 +phone 0 +suni 0 +mathemat 0 +nation 0 +fellowship 0 +site 0 +internet 0 +magazin 0 +music 0 +digest 0 +homepag 0 +servic 0 +wang 0 +wangphd 0 +upson 0 +hallithaca 0 +offic 0 +email 0 +jiawang 0 +first 0 +year 0 +state 0 +york 0 +degre 0 +transfer 0 +nankai 0 +tianjin 0 +honor 0 +award 0 +barri 0 +goldwat 0 +scholar 0 +engin 0 +sciencefound 0 +research 0 +famili 0 +cool 0 +link 0 +hongkong 0 +beij 0 +review 0 +chinaand 0 +relat 0 +daili 0 +cbnet 0 +forum 0 +chinanet 0 +stamp 0 +time 0 +window 0 +chinesecalendar 0 +directori 0 +mediainform 0 +educ 0 +histori 0 +cultur 0 +hongkonglaserdisccent 0 +internetdistribut 0 +multilingu 0 +softwar 0 +ryan 0 +smovieplex 0 +sceneri 0 +pictur 0 +tour 0 +entertain 0 +sheng 0 +tian 0 +diwww 0 +futur 0 +interest 0 +america 0 +best 0 +school 0 +liber 0 +art 0 +film 0 +ieee 0 +societi 0 +monei 0 +foundat 0 +peterson 0 +guid 0 +postcard 0 +program 0 +incomput 0 +thesenior 0 +virtual 0 +tourist 0 +worldmap 0 +yahoo 0 +christian 0 +mandarin 0 +cssa 0 +weather 0 +stoni 0 +brook 0 +ucla 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..7feb0058 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,45 @@ +vision 1 +justin 0 +miller 0 +home 0 +page 0 +mani 0 +research 0 +navi 0 +uniform 0 +current 0 +master 0 +engin 0 +student 0 +comput 0 +scienc 0 +colleg 0 +ofengin 0 +cornel 0 +univers 0 +semest 0 +teach 0 +assist 0 +com 0 +machin 0 +long 0 +night 0 +found 0 +robot 0 +csrvl 0 +assistantwork 0 +prof 0 +ramin 0 +zabih 0 +primari 0 +interest 0 +ismachin 0 +particularli 0 +level 0 +imag 0 +process 0 +gener 0 +informationsom 0 +rant 0 +project 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..712d8e7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,137 @@ +comput 1 +new 0 +scienc 0 +high 0 +softwar 0 +cornel 0 +work 0 +engin 0 +school 0 +hillsboro 0 +cool 0 +distribut 0 +system 0 +game 0 +parallel 0 +mpeg 0 +encod 0 +york 0 +press 0 +jeff 0 +moorejeff 0 +moorewel 0 +current 0 +graduat 0 +student 0 +mastersof 0 +receiv 0 +purdu 0 +univers 0 +west 0 +lafayett 0 +indiana 0 +went 0 +oregon 0 +suburb 0 +portland 0 +employmentmi 0 +resum 0 +anyon 0 +interest 0 +intel 0 +internet 0 +product 0 +divis 0 +creat 0 +spring 0 +classesnba 0 +thrive 0 +inform 0 +revolut 0 +entertain 0 +sectorc 0 +practic 0 +systemsc 0 +practicum 0 +cornellopoli 0 +perform 0 +architectur 0 +network 0 +optim 0 +researchfal 0 +classesc 0 +technolog 0 +techniquec 0 +formal 0 +methodsc 0 +multimedia 0 +research 0 +paperc 0 +colloquiumc 0 +tool 0 +seminar 0 +present 0 +opendoc 0 +mfcoptim 0 +researchsoftwar 0 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 0 +companiesintelsilicon 0 +graphicsibmsunapplemagazinespc 0 +magazinepc 0 +weekpc 0 +computingcomput 0 +shopperwindow 0 +sourcescomput 0 +lifemacusermacweekinteract 0 +weekfamili 0 +pccomput 0 +worldelectron 0 +newspapersusa 0 +todaywal 0 +street 0 +journalnew 0 +timesphiladelphia 0 +onlineth 0 +daili 0 +worldwideth 0 +dalla 0 +morn 0 +opinionsth 0 +detroit 0 +free 0 +gopherth 0 +knoxvil 0 +sentinelth 0 +leader 0 +onlinelat 0 +serviceth 0 +nugget 0 +newspap 0 +sister 0 +oregonrworld 0 +orang 0 +counti 0 +registerth 0 +francisco 0 +chronicl 0 +examinersan 0 +jose 0 +mercuryth 0 +seattl 0 +timesnando 0 +netusa 0 +todayboston 0 +globeportland 0 +herald 0 +main 0 +sundai 0 +telegramvisitor 0 +sinc 0 +januari 0 +campu 0 +address 0 +mapl 0 +fdithaca 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..e070e675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,81 @@ +project 1 +lui 0 +java 0 +enabl 0 +browser 0 +system 0 +home 0 +comput 0 +us 0 +would 0 +anim 0 +interest 0 +current 0 +distribut 0 +video 0 +page 0 +view 0 +clock 0 +jose 0 +fernandez 0 +pagejos 0 +fernandezjos 0 +fernandezmast 0 +engin 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +mapl 0 +avenu 0 +ebithaca 0 +mail 0 +joselui 0 +cornel 0 +scroll 0 +text 0 +sign 0 +resum 0 +curriculum 0 +vita 0 +multimedia 0 +graphic 0 +imag 0 +transit 0 +rivl 0 +exampl 0 +presentationc 0 +pyramania 0 +game 0 +spaceship 0 +battl 0 +report 0 +hoca 0 +design 0 +code 0 +oper 0 +implement 0 +multitask 0 +virtual 0 +memori 0 +meng 0 +autonom 0 +vehicl 0 +simul 0 +hobbi 0 +photographi 0 +click 0 +picturesmusiccomputerswrit 0 +direct 0 +crazi 0 +movi 0 +actor 0 +recruit 0 +friend 0 +time 0 +courtesi 0 +bill 0 +giel 0 +visitor 0 +number 0 +better 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..f7b18353 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,30 @@ +hurtado 1 +cornel 1 +engin 1 +project 1 +julin 0 +home 0 +pagejulin 0 +click 0 +curriculum 0 +vita 0 +universitymast 0 +busi 0 +administr 0 +johnson 0 +graduat 0 +school 0 +managementmast 0 +depart 0 +comput 0 +science 0 +mail 0 +colombia 0 +linda 0 +er 0 +master 0 +distribut 0 +system 0 +autonom 0 +vehicl 0 +simul 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..bc7617e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,28 @@ +page 1 +current 1 +cornel 1 +texa 1 +janeen 0 +homepagejaneen 0 +reich 0 +welcom 0 +home 0 +univers 0 +complet 0 +comput 0 +scienc 0 +august 0 +septemb 0 +join 0 +system 0 +group 0 +instrument 0 +dalla 0 +send 0 +email 0 +jreich 0 +edumi 0 +resum 0 +ad 0 +favorit 0 +thing 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..636f03b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,120 @@ +comput 1 +home 0 +list 0 +engin 0 +bodi 0 +mail 0 +camaro 0 +page 0 +chevi 0 +system 0 +cours 0 +master 0 +high 0 +facial 0 +car 0 +stock 0 +ford 0 +septemb 0 +come 0 +jodi 0 +shapirojodi 0 +shapiroeduc 0 +receiv 0 +univers 0 +massachusett 0 +amherst 0 +current 0 +cornel 0 +get 0 +meng 0 +scienc 0 +graduat 0 +resum 0 +fall 0 +multimedia 0 +systemsc 0 +softwar 0 +engineeringe 0 +network 0 +telecommunicationc 0 +researchspr 0 +machin 0 +visionc 0 +perform 0 +systemse 0 +capac 0 +networksnba 0 +thrive 0 +inform 0 +revolutionc 0 +researchma 0 +automot 0 +engineeringinterest 0 +project 0 +design 0 +implement 0 +dynam 0 +gener 0 +synchron 0 +speech 0 +animationlow 0 +cost 0 +portabl 0 +desktop 0 +videoconferenc 0 +window 0 +parallel 0 +object 0 +recognit 0 +applic 0 +recognitioninterest 0 +main 0 +interest 0 +obvious 0 +memberof 0 +firebird 0 +yourselfelectron 0 +fuel 0 +inject 0 +although 0 +alwayshav 0 +time 0 +particip 0 +also 0 +designingan 0 +ground 0 +pageefi 0 +pagethes 0 +pictur 0 +sold 0 +speed 0 +gearsmodif 0 +hypertech 0 +stage 0 +chip 0 +flowmast 0 +exhaust 0 +hurst 0 +shifter 0 +grant 0 +steer 0 +wheel 0 +filter 0 +mustang 0 +bought 0 +still 0 +speedmodif 0 +gear 0 +accel 0 +plug 0 +motorsport 0 +wiresbest 0 +mile 0 +mphbest 0 +januari 0 +pagenumb 0 +visit 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..b954d3a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,8 @@ +julia 1 +home 1 +pagejulia 1 +komissarchik 1 +juliak 1 +cornel 1 +eduto 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..52df329e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,104 @@ +cornel 1 +network 1 +julian 0 +engin 0 +comput 0 +scienc 0 +theori 0 +video 0 +full 0 +pelenur 0 +avail 0 +univers 0 +offic 0 +center 0 +current 0 +workstat 0 +develop 0 +grow 0 +fault 0 +toler 0 +design 0 +implement 0 +fast 0 +techniqu 0 +emilio 0 +better 0 +pictur 0 +master 0 +graduat 0 +campu 0 +adress 0 +summit 0 +ithaca 0 +centerithaca 0 +upson 0 +hallcornel 0 +occup 0 +fulltim 0 +student 0 +teach 0 +assist 0 +databas 0 +administr 0 +recent 0 +project 0 +global 0 +pointer 0 +complet 0 +toolkit 0 +write 0 +parallel 0 +program 0 +independ 0 +platform 0 +topolog 0 +compil 0 +sparcstat 0 +ethernet 0 +wfinger 0 +system 0 +search 0 +home 0 +page 0 +document 0 +world 0 +wide 0 +cyberserv 0 +need 0 +faster 0 +httpserver 0 +fulfil 0 +increas 0 +demand 0 +servic 0 +addit 0 +commerci 0 +high 0 +becom 0 +critic 0 +paper 0 +describ 0 +distribut 0 +http 0 +server 0 +us 0 +horu 0 +prvf 0 +poss 0 +realli 0 +thegoal 0 +achiev 0 +screenmot 0 +cluster 0 +showthat 0 +innov 0 +snarf 0 +blast 0 +capit 0 +hardwar 0 +produc 0 +transferwith 0 +compress 0 +color 0 +screen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..d413b52a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,92 @@ +cornel 1 +home 0 +page 0 +theori 0 +program 0 +languag 0 +resourc 0 +type 0 +paper 0 +art 0 +czar 0 +comput 0 +scienc 0 +depart 0 +interest 0 +includ 0 +research 0 +work 0 +nuprl 0 +slide 0 +fine 0 +hockei 0 +theatr 0 +jason 0 +hickei 0 +graduat 0 +student 0 +supervis 0 +robertconst 0 +summari 0 +current 0 +statu 0 +practic 0 +great 0 +thefox 0 +project 0 +especi 0 +markleon 0 +mainli 0 +softwar 0 +verif 0 +tool 0 +specif 0 +formalsystem 0 +develop 0 +universitydepart 0 +make 0 +higher 0 +levelmodul 0 +abstract 0 +data 0 +relat 0 +recent 0 +publish 0 +cornella 0 +bibliographi 0 +publishedat 0 +bellcor 0 +also 0 +talk 0 +havegiven 0 +seminar 0 +pretti 0 +technic 0 +theygiv 0 +overview 0 +done 0 +want 0 +sequenc 0 +identif 0 +galleryof 0 +mine 0 +tryth 0 +orth 0 +fineart 0 +forum 0 +cucshockei 0 +backcountri 0 +take 0 +look 0 +thebackcountri 0 +perform 0 +servic 0 +publicli 0 +maintainedsoftwar 0 +equip 0 +back 0 +hockeyfor 0 +info 0 +schedul 0 +forth 0 +center 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..33b657e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,36 @@ +kamijo 1 +koichi 0 +juli 0 +english 0 +japanes 0 +back 0 +japan 0 +kamijokoichi 0 +welcom 0 +home 0 +page 0 +construct 0 +reach 0 +us 0 +thing 0 +sell 0 +sold 0 +click 0 +like 0 +sale 0 +class 0 +papershometownseduc 0 +work 0 +experienceskoichi 0 +muriel 0 +ithaca 0 +cornel 0 +kkamijoh 0 +vnet 0 +go 0 +accept 0 +access 0 +time 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..55a5a497 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,115 @@ +cornel 1 +univers 0 +karl 0 +robot 0 +micro 0 +manipul 0 +friedrich 0 +bhringer 0 +karlsruh 0 +comput 0 +scienc 0 +stanford 0 +laboratori 0 +build 0 +interest 0 +andassembl 0 +microfabr 0 +strategi 0 +work 0 +page 0 +level 0 +dipl 0 +inform 0 +graduat 0 +student 0 +dept 0 +upson 0 +hall 0 +ithaca 0 +email 0 +educurr 0 +address 0 +gate 0 +current 0 +research 0 +nanofabr 0 +facil 0 +actuat 0 +arrai 0 +implementmicro 0 +gener 0 +innew 0 +devic 0 +handl 0 +part 0 +withprogramm 0 +forc 0 +vector 0 +field 0 +also 0 +investig 0 +design 0 +autom 0 +structur 0 +earlier 0 +germani 0 +includ 0 +develop 0 +better 0 +graph 0 +layout 0 +algorithm 0 +thesi 0 +advisor 0 +professorbruc 0 +donald 0 +founder 0 +director 0 +vision 0 +project 0 +close 0 +collabor 0 +professor 0 +noel 0 +macdonaldand 0 +hisresearch 0 +group 0 +public 0 +document 0 +confer 0 +announc 0 +call 0 +paper 0 +anim 0 +video 0 +sculptur 0 +invis 0 +cantilev 0 +model 0 +frank 0 +lloyd 0 +wright 0 +fallingwat 0 +articl 0 +york 0 +time 0 +magazin 0 +march 0 +wire 0 +octob 0 +offic 0 +nano 0 +outin 0 +kwon 0 +club 0 +find 0 +lindseth 0 +climb 0 +wall 0 +navig 0 +previou 0 +higher 0 +deeper 0 +next 0 +pagekarl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..f750f5e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,232 @@ +layer 1 +stack 0 +interest 0 +protocol 0 +horu 0 +properti 0 +cornel 0 +distribut 0 +comput 0 +problem 0 +specifi 0 +applic 0 +provid 0 +karr 0 +work 0 +project 0 +concentr 0 +research 0 +engin 0 +commun 0 +weak 0 +consist 0 +java 0 +develop 0 +wide 0 +david 0 +univers 0 +system 0 +mathemat 0 +statist 0 +html 0 +includ 0 +verifi 0 +verif 0 +us 0 +guarante 0 +techniqu 0 +desir 0 +order 0 +part 0 +applet 0 +messag 0 +whose 0 +prone 0 +area 0 +network 0 +gener 0 +disconnect 0 +site 0 +time 0 +karrdavid 0 +karrphd 0 +studentdepart 0 +sciencecornel 0 +upson 0 +hall 0 +ithaca 0 +mail 0 +edui 0 +student 0 +depart 0 +scienceat 0 +thehoru 0 +architectur 0 +reliabl 0 +withprofessor 0 +kenneth 0 +birmananddr 0 +robbert 0 +reness 0 +minor 0 +field 0 +perform 0 +protocolsmi 0 +implement 0 +dissert 0 +formalspecif 0 +ofhoru 0 +tempor 0 +logic 0 +action 0 +variousinterest 0 +fundament 0 +usedin 0 +furthermor 0 +write 0 +formula 0 +assum 0 +stylefor 0 +might 0 +itsinterfac 0 +depend 0 +andbelow 0 +emploi 0 +straightforward 0 +agiven 0 +certain 0 +thetop 0 +condit 0 +even 0 +unusualcombin 0 +atyp 0 +ultim 0 +user 0 +systemsshould 0 +abl 0 +call 0 +help 0 +constructcustom 0 +omit 0 +unnecessari 0 +avoid 0 +theirassoci 0 +cost 0 +confid 0 +sufficientto 0 +intend 0 +basi 0 +thesecur 0 +harden 0 +give 0 +rough 0 +demonstr 0 +propos 0 +method 0 +ofverifi 0 +initi 0 +stem 0 +thepromis 0 +suit 0 +variousguarante 0 +programm 0 +passingenviron 0 +host 0 +crash 0 +delayedor 0 +lost 0 +softwar 0 +haswork 0 +componentswer 0 +failur 0 +feel 0 +featur 0 +offer 0 +considerablepromis 0 +consistencywhil 0 +becom 0 +ofdistribut 0 +look 0 +revis 0 +control 0 +filesin 0 +environ 0 +distributedenviron 0 +partitionedinto 0 +portion 0 +notion 0 +wouldallow 0 +multipl 0 +temporarili 0 +make 0 +progress 0 +concurr 0 +performancemi 0 +correct 0 +measur 0 +high 0 +avail 0 +respons 0 +andeffici 0 +resourc 0 +clearli 0 +equal 0 +import 0 +larg 0 +appar 0 +random 0 +ofsystem 0 +load 0 +activ 0 +notabl 0 +except 0 +dedic 0 +parallelmachin 0 +behavior 0 +also 0 +suscept 0 +analysi 0 +though 0 +differ 0 +kind 0 +encourag 0 +javath 0 +world 0 +applicationwith 0 +mani 0 +possibl 0 +explor 0 +experi 0 +simpl 0 +wai 0 +hypertext 0 +tonavig 0 +inform 0 +appear 0 +myweb 0 +lego 0 +toi 0 +hack 0 +execut 0 +code 0 +anetscap 0 +browser 0 +download 0 +exampl 0 +abirthdai 0 +puzzl 0 +calcul 0 +tool 0 +forverifi 0 +profession 0 +affiliationsi 0 +member 0 +ieee 0 +andmaa 0 +informationseemi 0 +linksfor 0 +topic 0 +find 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..043123e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,137 @@ +system 1 +click 0 +comput 0 +work 0 +graphic 0 +home 0 +engin 0 +oper 0 +cornel 0 +hogman 0 +qualcomm 0 +softwar 0 +distribut 0 +network 0 +cours 0 +took 0 +multimedia 0 +virtual 0 +game 0 +break 0 +code 0 +interfac 0 +screenshot 0 +view 0 +postscript 0 +scene 0 +transit 0 +effect 0 +languag 0 +rivl 0 +develop 0 +project 0 +favorit 0 +cool 0 +lot 0 +welcom 0 +pagekartik 0 +kapadiamast 0 +engineeringclass 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +dabnei 0 +drive 0 +diego 0 +california 0 +phone 0 +mail 0 +kkapadia 0 +comcurr 0 +incorporatedmi 0 +main 0 +area 0 +interest 0 +spring 0 +architectur 0 +high 0 +capac 0 +inform 0 +fall 0 +projectshoca 0 +chiphoca 0 +pronounc 0 +hodja 0 +full 0 +fledg 0 +chip 0 +hypothet 0 +instruct 0 +processor 0 +support 0 +featur 0 +like 0 +multitask 0 +memori 0 +enjoy 0 +quick 0 +singl 0 +player 0 +window 0 +platform 0 +good 0 +sourc 0 +entertain 0 +take 0 +gameboard 0 +help 0 +screen 0 +design 0 +document 0 +rivlrivl 0 +stand 0 +resolut 0 +independ 0 +video 0 +univers 0 +jonathan 0 +swartz 0 +brian 0 +smith 0 +excel 0 +applic 0 +enhanc 0 +incorpor 0 +primit 0 +implement 0 +present 0 +simul 0 +railroad 0 +master 0 +visual 0 +captur 0 +scientif 0 +aspect 0 +lai 0 +track 0 +vehicl 0 +model 0 +dynam 0 +motion 0 +us 0 +combin 0 +open 0 +inventor 0 +opengl 0 +realiti 0 +facil 0 +resumesom 0 +site 0 +star 0 +mpeg 0 +clip 0 +music 0 +page 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..2d943de8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,63 @@ +publish 1 +letter 1 +thing 0 +write 0 +wrote 0 +suspect 0 +true 0 +writingsi 0 +much 0 +written 0 +proudof 0 +flame 0 +mine 0 +other 0 +morethought 0 +conscienti 0 +objector 0 +arm 0 +conflict 0 +myfirst 0 +grand 0 +rapid 0 +press 0 +fewyear 0 +back 0 +gulf 0 +sinc 0 +time 0 +howev 0 +vestart 0 +keep 0 +work 0 +onlin 0 +lest 0 +wonder 0 +also 0 +poetri 0 +dprobabl 0 +rather 0 +famou 0 +essayist 0 +anyhow 0 +like 0 +argu 0 +dread 0 +mess 0 +go 0 +byron 0 +center 0 +asuburb 0 +mile 0 +went 0 +high 0 +school 0 +unabomb 0 +mathematician 0 +mathematiciansar 0 +terrorist 0 +think 0 +editor 0 +newspap 0 +wide 0 +read 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..6c0cb20b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,268 @@ +cornel 1 +katherin 0 +comput 0 +info 0 +html 0 +page 0 +scienc 0 +ithaca 0 +system 0 +robbert 0 +reness 0 +austin 0 +home 0 +distribut 0 +birman 0 +werner 0 +vogel 0 +group 0 +hall 0 +work 0 +reliabl 0 +horu 0 +commun 0 +appear 0 +proceed 0 +sigop 0 +brad 0 +glade 0 +ieee 0 +report 0 +depart 0 +univers 0 +relat 0 +infodistribut 0 +industri 0 +infocompani 0 +place 0 +lisboa 0 +colorado 0 +lectur 0 +httpd 0 +ncsa 0 +imag 0 +sourc 0 +xmosaic 0 +other 0 +journal 0 +school 0 +databas 0 +women 0 +cook 0 +electron 0 +fashion 0 +music 0 +wall 0 +network 0 +guokguo 0 +educornel 0 +universitydept 0 +upson 0 +student 0 +interest 0 +scalabl 0 +multicastprotocol 0 +project 0 +direct 0 +recent 0 +publicationskatherin 0 +structur 0 +virtual 0 +synchroni 0 +explor 0 +bound 0 +ofvirtu 0 +synchron 0 +european 0 +workshop 0 +connemara 0 +ireland 0 +septemb 0 +lui 0 +rodrigu 0 +antonio 0 +sargento 0 +paulo 0 +verisimo 0 +transpar 0 +light 0 +weight 0 +servic 0 +symposiumon 0 +niagara 0 +lake 0 +canada 0 +octob 0 +also 0 +avail 0 +technic 0 +kenneth 0 +mark 0 +hayden 0 +takako 0 +hickei 0 +dalia 0 +malki 0 +alex 0 +vaysburd 0 +flexibl 0 +march 0 +research 0 +systemscomput 0 +networkscool 0 +toolsbibliographyconferencesjournalsacademia 0 +infoschool 0 +infojob 0 +searchinterest 0 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 0 +inforesearch 0 +systempointershoru 0 +commerci 0 +productspringtotemtransisx 0 +kernel 0 +arizona 0 +microsystemslab 0 +networksmulticast 0 +protocolsn 0 +fromlblgun 0 +cool 0 +tool 0 +consortium 0 +gener 0 +theproject 0 +simpl 0 +beginn 0 +sguid 0 +quickrefer 0 +htmldocument 0 +tabl 0 +content 0 +common 0 +gatewai 0 +interfac 0 +overview 0 +find 0 +file 0 +finder 0 +mosaic 0 +binari 0 +document 0 +uiuc 0 +cern 0 +java 0 +hotjava 0 +bibliographybibliographi 0 +oldindex 0 +index 0 +confer 0 +pointer 0 +hpdc 0 +ftc 0 +sosp 0 +srd 0 +icdc 0 +jsac 0 +elsevi 0 +scienceacademia 0 +motorola 0 +openingsibmdelltandemtiapplebel 0 +atlant 0 +texa 0 +dept 0 +ucsd 0 +gradschool 0 +advic 0 +gradjob 0 +search 0 +ukinterest 0 +life 0 +weather 0 +moviesbailei 0 +concertslibrari 0 +hightechin 0 +institut 0 +inesc 0 +copper 0 +mountain 0 +resort 0 +summit 0 +counti 0 +coloradooth 0 +infoart 0 +weblouvreth 0 +world 0 +linebook 0 +amazon 0 +book 0 +calvinhobb 0 +archivecardsmagicchinaart 0 +china 0 +gourmetl 0 +cordonbleu 0 +itali 0 +dessert 0 +fashional 0 +linksa 0 +cjlutz 0 +wwweb 0 +pagewith 0 +heart 0 +tmexpressfirst 0 +view 0 +wireirc 0 +faqfashion 0 +nethair 0 +crew 0 +diesel 0 +jean 0 +guessfriend 0 +alan 0 +cheng 0 +david 0 +deng 0 +shiji 0 +insur 0 +plan 0 +email 0 +grove 0 +edulibrari 0 +librari 0 +congressmagazin 0 +intertext 0 +wire 0 +timegeorg 0 +gilder 0 +discoveri 0 +mail 0 +postcard 0 +map 0 +internet 0 +underground 0 +archivesinanet 0 +newsworld 0 +new 0 +brief 0 +sport 0 +open 0 +olymp 0 +stock 0 +streetheadlin 0 +street 0 +weatherhunt 0 +infoth 0 +lyco 0 +hunt 0 +informationglob 0 +navigatorhom 0 +global 0 +navig 0 +scout 0 +wanderersand 0 +spider 0 +edg 0 +yahoo 0 +refer 0 +netscap 0 +last 0 +modifi 0 +kguo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..4035bed2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,43 @@ +java 1 +program 0 +wirefram 0 +rotat 0 +creat 0 +introductionthi 0 +project 0 +desgin 0 +learn 0 +tool 0 +comput 0 +graphic 0 +provid 0 +understand 0 +polygon 0 +form 0 +list 0 +vertic 0 +written 0 +simpl 0 +power 0 +languag 0 +safe 0 +portabl 0 +interact 0 +object 0 +orient 0 +multi 0 +threader 0 +environ 0 +platform 0 +speific 0 +applet 0 +react 0 +user 0 +input 0 +dynam 0 +chang 0 +cone 0 +cube 0 +cylind 0 +tetra 0 +toru 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..33ad0500 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,242 @@ +comput 1 +kleinberg 0 +proc 0 +scienc 0 +symposium 0 +algorithm 0 +theori 0 +foundat 0 +cornel 0 +problem 0 +optim 0 +network 0 +ieee 0 +research 0 +geometri 0 +geometr 0 +appear 0 +siam 0 +discret 0 +line 0 +univers 0 +combinatori 0 +approxim 0 +distribut 0 +biologi 0 +path 0 +program 0 +inform 0 +resourc 0 +huttenloch 0 +project 0 +bibliographi 0 +confer 0 +kleinber 0 +molecular 0 +rout 0 +adversari 0 +queue 0 +protocol 0 +semi 0 +definit 0 +academ 0 +graph 0 +tardo 0 +disjointpath 0 +williamson 0 +trade 0 +goeman 0 +minimum 0 +process 0 +letter 0 +server 0 +leighton 0 +point 0 +technic 0 +report 0 +page 0 +group 0 +associ 0 +internet 0 +secur 0 +homepag 0 +assist 0 +professor 0 +ithaca 0 +interest 0 +emphasi 0 +recent 0 +work 0 +includ 0 +anddisjoint 0 +approach 0 +analyz 0 +stabilityof 0 +without 0 +probabilist 0 +assumpt 0 +method 0 +particularlyth 0 +posit 0 +studi 0 +conform 0 +spend 0 +year 0 +visit 0 +almaden 0 +center 0 +click 0 +seeselect 0 +publicationsmiscellan 0 +linkspapersapproxim 0 +singl 0 +sourc 0 +unsplitt 0 +flow 0 +rubinfeld 0 +short 0 +expand 0 +dens 0 +embed 0 +disjoint 0 +high 0 +diamet 0 +planar 0 +aggarw 0 +node 0 +mesh 0 +vlsi 0 +layout 0 +improvedapproxim 0 +ratio 0 +latenc 0 +lovasz 0 +thetafunct 0 +relax 0 +vertex 0 +cover 0 +math 0 +local 0 +formobil 0 +robot 0 +computersci 0 +search 0 +simplepolygon 0 +lower 0 +bound 0 +serverbalanc 0 +yaniv 0 +serveralgorithm 0 +robotnavig 0 +master 0 +thesi 0 +parallel 0 +andrew 0 +awerbuch 0 +fernandez 0 +stabil 0 +result 0 +greedi 0 +content 0 +resolut 0 +borodin 0 +raghavan 0 +sudan 0 +attiya 0 +lynch 0 +offsbetween 0 +messag 0 +deliveri 0 +quiesc 0 +time 0 +connect 0 +managementprotocol 0 +israel 0 +system 0 +mullainathan 0 +boundsand 0 +combin 0 +consensu 0 +object 0 +onprincipl 0 +berger 0 +reconstruct 0 +athre 0 +dimension 0 +model 0 +arbitrari 0 +error 0 +compar 0 +set 0 +kedem 0 +dynam 0 +voronoi 0 +diagram 0 +hausdorff 0 +distanc 0 +pointset 0 +euclidean 0 +motion 0 +plane 0 +symposiumon 0 +invariantsof 0 +segment 0 +universitycomput 0 +juli 0 +linkssearch 0 +tool 0 +bibliographiesaltavista 0 +infoseek 0 +excit 0 +yahoo 0 +nynex 0 +yellow 0 +glimps 0 +ncstrl 0 +librari 0 +david 0 +jone 0 +hypertext 0 +sitescornel 0 +oper 0 +stanford 0 +berkelei 0 +nation 0 +computingtc 0 +virtual 0 +address 0 +book 0 +crescenzi 0 +kann 0 +compendium 0 +foc 0 +soda 0 +stoc 0 +biologycomput 0 +carb 0 +biocomput 0 +sdsc 0 +list 0 +geometrydavid 0 +eppstein 0 +junkyard 0 +jeff 0 +erickson 0 +securitymitr 0 +corp 0 +princeton 0 +safe 0 +rivest 0 +cryptographi 0 +link 0 +miscellaneousnetscap 0 +intellicast 0 +interact 0 +tenni 0 +chess 0 +onlin 0 +talk 0 +kleinbergdepart 0 +scienceupson 0 +hallcornel 0 +universityithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..75ee2635 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,52 @@ +homepag 1 +cornel 1 +comput 0 +scienc 0 +japan 0 +page 0 +kazushi 0 +otakota 0 +edukazushi 0 +melco 0 +current 0 +master 0 +engin 0 +student 0 +cornellunivers 0 +receiv 0 +univers 0 +tokyo 0 +back 0 +march 0 +work 0 +mitusbishi 0 +electr 0 +corpor 0 +return 0 +degre 0 +worth 0 +isund 0 +construct 0 +start 0 +assign 0 +acquaint 0 +html 0 +forc 0 +depart 0 +inform 0 +superhighwai 0 +cours 0 +interest 0 +pictur 0 +music 0 +move 0 +sale 0 +come 0 +take 0 +februari 0 +thing 0 +want 0 +sell 0 +think 0 +advert 0 +peopl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..9f750325 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,54 @@ +research 1 +cornel 1 +comput 1 +interest 0 +algorithm 0 +complex 0 +logic 0 +algebra 0 +faculti 0 +dexter 0 +kozendext 0 +kozenjoseph 0 +newton 0 +professor 0 +engineeringphd 0 +univers 0 +interestsmi 0 +includ 0 +especiallycomplex 0 +decis 0 +problem 0 +andsemant 0 +program 0 +languag 0 +paper 0 +avail 0 +onlinekleen 0 +constraint 0 +type 0 +infer 0 +algebraautomata 0 +theori 0 +logicbibliographylist 0 +public 0 +technic 0 +reportscours 0 +notesc 0 +structur 0 +interpret 0 +programsc 0 +automata 0 +theoryfun 0 +stufffamili 0 +pictur 0 +rugbi 0 +effectcomput 0 +scienc 0 +departmentupson 0 +hallcornel 0 +universityithaca 0 +york 0 +usakozen 0 +work 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..b067d172 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,42 @@ +kreitz 1 +christoph 0 +cornel 0 +home 0 +page 0 +soon 0 +research 0 +comput 0 +theori 0 +pictur 0 +associ 0 +depart 0 +scienc 0 +univers 0 +ithaca 0 +offic 0 +phone 0 +email 0 +upson 0 +hall 0 +topic 0 +program 0 +synthesi 0 +autom 0 +deduct 0 +type 0 +teach 0 +learn 0 +german 0 +lehr 0 +lernen 0 +vorlesungsskript 0 +medienunterst 0 +uumltzt 0 +lehren 0 +person 0 +inform 0 +avail 0 +last 0 +modifi 0 +novemb 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..2a7ad674 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,34 @@ +kuen 1 +heng 0 +cornel 0 +isi 0 +master 0 +engin 0 +comput 0 +scienc 0 +depart 0 +univers 0 +address 0 +grove 0 +street 0 +newton 0 +telephon 0 +welcom 0 +visit 0 +place 0 +myproject 0 +multimedia 0 +system 0 +cours 0 +would 0 +like 0 +read 0 +daili 0 +new 0 +taiwan 0 +home 0 +countri 0 +enjoi 0 +page 0 +still 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..8e4ce8dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,182 @@ +digit 1 +librari 1 +comput 0 +scienc 0 +technic 0 +report 0 +research 0 +cornel 0 +dienst 0 +author 0 +implement 0 +object 0 +work 0 +carl 0 +lagoz 0 +person 0 +number 0 +server 0 +world 0 +distribut 0 +part 0 +also 0 +workshop 0 +page 0 +project 0 +group 0 +develop 0 +ncstrl 0 +issu 0 +protocol 0 +refer 0 +technolog 0 +paper 0 +wide 0 +confer 0 +architectur 0 +servic 0 +interoper 0 +area 0 +extend 0 +framework 0 +design 0 +secur 0 +repositori 0 +final 0 +meet 0 +substitut 0 +life 0 +time 0 +never 0 +quiet 0 +home 0 +leader 0 +depart 0 +upson 0 +hall 0 +universityithaca 0 +phone 0 +internet 0 +edui 0 +lead 0 +groupin 0 +departmentat 0 +univers 0 +ourgroup 0 +manag 0 +oper 0 +network 0 +intern 0 +consortium 0 +maintain 0 +adistribut 0 +collaborateson 0 +davi 0 +thedienstsoftwar 0 +providesdistribut 0 +access 0 +worldwid 0 +current 0 +enabl 0 +drop 0 +publish 0 +document 0 +commun 0 +april 0 +manual 0 +build 0 +product 0 +chapter 0 +advanc 0 +springer 0 +verlag 0 +primari 0 +involv 0 +defin 0 +protocolsfor 0 +infrastructur 0 +collabor 0 +corpor 0 +nation 0 +initiativesto 0 +developeda 0 +darpa 0 +fund 0 +open 0 +store 0 +iso 0 +dlib 0 +magazin 0 +decemb 0 +member 0 +dlibwork 0 +interfacesand 0 +releas 0 +metadata 0 +iiin 0 +warwick 0 +amveri 0 +interest 0 +us 0 +distributedobject 0 +read 0 +posit 0 +paperfor 0 +joint 0 +mobil 0 +codeworkshop 0 +know 0 +meetm 0 +find 0 +poor 0 +contact 0 +littl 0 +moreabout 0 +charact 0 +pictur 0 +pagei 0 +luci 0 +daughter 0 +rule 0 +major 0 +outsideof 0 +toddler 0 +constant 0 +challeng 0 +lucyg 0 +mean 0 +provid 0 +avid 0 +outdoor 0 +site 0 +fast 0 +movingwat 0 +lakeand 0 +itch 0 +cano 0 +give 0 +beauti 0 +think 0 +bike 0 +ridingalong 0 +road 0 +backwood 0 +trail 0 +tell 0 +sparehour 0 +run 0 +shoe 0 +breath 0 +deeplyth 0 +fresh 0 +spend 0 +much 0 +joi 0 +physicalnor 0 +ever 0 +interfer 0 +desir 0 +fight 0 +itspreserv 0 +hope 0 +sometim 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..9f7d8a44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,101 @@ +homepag 1 +career 0 +comput 0 +system 0 +cornel 0 +page 0 +guid 0 +lidong 0 +zhou 0 +depart 0 +research 0 +materi 0 +distribut 0 +project 0 +secur 0 +internet 0 +network 0 +servic 0 +friend 0 +fudan 0 +buyer 0 +auto 0 +welcom 0 +sciencecornel 0 +universityithaca 0 +fall 0 +cours 0 +concept 0 +local 0 +access 0 +multimedia 0 +relat 0 +sigop 0 +paper 0 +oasi 0 +cambridg 0 +report 0 +adag 0 +author 0 +applic 0 +group 0 +level 0 +java 0 +safe 0 +program 0 +legion 0 +sirac 0 +kerbero 0 +authent 0 +massiv 0 +ocaml 0 +advanc 0 +standard 0 +robot 0 +exclus 0 +document 0 +opportun 0 +jobtrak 0 +colleg 0 +grad 0 +hunter 0 +open 0 +center 0 +onlin 0 +careermosa 0 +jobweb 0 +home 0 +xjob 0 +yingjun 0 +classmat 0 +inform 0 +resours 0 +tutori 0 +languag 0 +tool 0 +yellow 0 +book 0 +isso 0 +sunris 0 +chines 0 +soccer 0 +world 0 +edmund 0 +automobil 0 +autosit 0 +ultim 0 +insur 0 +basic 0 +legal 0 +surviv 0 +link 0 +travel 0 +agenc 0 +rank 0 +succe 0 +graduat 0 +school 0 +back 0 +indexlast 0 +updat 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..86943958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,163 @@ +comput 1 +project 0 +link 0 +system 0 +distribut 0 +home 0 +anim 0 +hsian 0 +page 0 +scienceat 0 +cornel 0 +univers 0 +taiwan 0 +classesc 0 +final 0 +orwel 0 +remov 0 +object 0 +site 0 +swartz 0 +cool 0 +totoro 0 +wangthi 0 +major 0 +constructionlin 0 +wang 0 +master 0 +student 0 +degre 0 +inform 0 +ohio 0 +state 0 +born 0 +fangliao 0 +small 0 +villag 0 +southern 0 +coast 0 +still 0 +construct 0 +fall 0 +multimedia 0 +track 0 +digit 0 +videoe 0 +networkse 0 +vision 0 +moment 0 +base 0 +edg 0 +oper 0 +amidonc 0 +autom 0 +video 0 +transcrib 0 +annot 0 +research 0 +advisor 0 +prof 0 +ramin 0 +zabihspr 0 +natur 0 +languag 0 +processingc 0 +practic 0 +computingc 0 +practicum 0 +network 0 +visual 0 +managementc 0 +machin 0 +visionc 0 +scienc 0 +colloquimc 0 +move 0 +scene 0 +high 0 +perform 0 +audit 0 +program 0 +java 0 +webspac 0 +interest 0 +us 0 +stuffscornel 0 +info 0 +depart 0 +annual 0 +reportiee 0 +societytaiwan 0 +headlin 0 +new 0 +sinanet 0 +comth 0 +musicmovi 0 +connect 0 +movieweb 0 +movi 0 +moviemania 0 +also 0 +click 0 +collect 0 +think 0 +picturesth 0 +list 0 +best 0 +sell 0 +book 0 +releas 0 +publish 0 +world 0 +journal 0 +bookstor 0 +quot 0 +chines 0 +classic 0 +linux 0 +linkstcl 0 +line 0 +resourc 0 +softwar 0 +engin 0 +galleri 0 +hacksth 0 +earth 0 +pagemiscellan 0 +hongkong 0 +bridg 0 +hong 0 +kong 0 +linksfor 0 +like 0 +japanes 0 +take 0 +look 0 +carlo 0 +jump 0 +cja 0 +calanimag 0 +alpha 0 +chapter 0 +berkelei 0 +pagelaputa 0 +castl 0 +nausicaa 0 +vallei 0 +wind 0 +conan 0 +slump 0 +kiki 0 +legend 0 +galact 0 +hero 0 +ming 0 +pagecampu 0 +address 0 +uptown 0 +eithaca 0 +york 0 +linhsian 0 +edulast 0 +updat 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..b972a49c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,33 @@ +page 1 +thought 1 +libbi 0 +home 0 +essai 0 +show 0 +take 0 +look 0 +pagewelcom 0 +collect 0 +last 0 +updat 0 +sept 0 +download 0 +theme 0 +song 0 +check 0 +video 0 +clip 0 +read 0 +lista 0 +littl 0 +thing 0 +septemb 0 +june 0 +april 0 +interest 0 +projectemail 0 +mehit 0 +counter 0 +courtesi 0 +http 0 +digit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..e521a3b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,12 @@ +offic 1 +name 0 +upson 0 +hall 0 +hour 0 +mail 0 +lili 0 +cornel 0 +oper 0 +system 0 +take 0 +cours 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..0ea82d34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,100 @@ +numer 1 +cornel 0 +theori 0 +linear 0 +recent 0 +lloyd 0 +trefethen 0 +depart 0 +mathemat 0 +algebra 0 +matric 0 +oper 0 +textbook 0 +student 0 +trefethenprofessorlnt 0 +edumi 0 +appoint 0 +thecomput 0 +scienc 0 +also 0 +affili 0 +thecent 0 +appli 0 +thecornel 0 +center 0 +field 0 +analysi 0 +scientif 0 +comput 0 +havea 0 +person 0 +view 0 +mean 0 +specif 0 +interest 0 +includ 0 +numericalsolut 0 +conform 0 +map 0 +approxim 0 +fluid 0 +mechan 0 +year 0 +much 0 +work 0 +hasbeen 0 +relat 0 +normal 0 +whose 0 +eigenvector 0 +notorthogon 0 +applic 0 +textbooksfinit 0 +differ 0 +spectral 0 +method 0 +siam 0 +papersmultimatlab 0 +matlab 0 +multipl 0 +processorsmatrix 0 +iter 0 +gap 0 +betweenpotenti 0 +convergencepseudospectra 0 +operatorssom 0 +papersoth 0 +itemsclass 0 +paper 0 +analysiscurriculum 0 +vitaepseudospectra 0 +bibliographi 0 +peter 0 +alfeldcurr 0 +vicki 0 +howlegubjrn 0 +jnsson 0 +yohan 0 +kimdivakar 0 +viswanathprevi 0 +jeff 0 +baggetttobi 0 +driscollalan 0 +edelman 0 +loui 0 +howel 0 +walter 0 +mascarenhasnoel 0 +nachtigalsatish 0 +reddi 0 +chuan 0 +tohsom 0 +colleaguesjim 0 +demmelann 0 +greenbaummartin 0 +gutknechtd 0 +nick 0 +highamann 0 +trefethenandr 0 +weideman 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..4b529969 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,91 @@ +system 1 +network 0 +home 0 +page 0 +comput 0 +cornel 0 +distribut 0 +internet 0 +databas 0 +luci 0 +travel 0 +music 0 +spring 0 +class 0 +manag 0 +oper 0 +softwar 0 +site 0 +stuff 0 +languag 0 +china 0 +chines 0 +welcom 0 +student 0 +scienc 0 +depart 0 +univers 0 +ithaca 0 +interest 0 +topic 0 +programminglanguag 0 +applic 0 +hobbi 0 +ping 0 +pong 0 +badminton 0 +swim 0 +photograph 0 +read 0 +resum 0 +whiz 0 +stock 0 +search 0 +analysi 0 +tool 0 +degre 0 +project 0 +practic 0 +practicum 0 +fall 0 +engin 0 +multimedia 0 +systemscontact 0 +yuwu 0 +favorit 0 +java 0 +corba 0 +silvano 0 +tkcgi 0 +html 0 +vrml 0 +object 0 +orient 0 +product 0 +server 0 +securitypc 0 +lube 0 +tune 0 +ipngip_atmcomput 0 +compani 0 +netscap 0 +busi 0 +cube 0 +sapient 0 +microsoft 0 +novel 0 +relat 0 +misc 0 +jobtrack 0 +new 0 +artvark 0 +galleri 0 +underground 0 +archiv 0 +person 0 +connect 0 +librari 0 +catalog 0 +mail 0 +sunlab 0 +caltech 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..d892a96b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,50 @@ +cornel 1 +engin 0 +comput 0 +system 0 +network 0 +linda 0 +home 0 +page 0 +universitylinda 0 +lxwu 0 +master 0 +student 0 +scienc 0 +depart 0 +univsers 0 +receiv 0 +univers 0 +massachusett 0 +lowel 0 +sinc 0 +work 0 +digit 0 +equip 0 +corp 0 +banyan 0 +main 0 +research 0 +interest 0 +mulitimedia 0 +click 0 +resum 0 +project 0 +nativ 0 +protocol 0 +stack 0 +window 0 +us 0 +multicast 0 +group 0 +layer 0 +video 0 +electron 0 +commerc 0 +kramer 0 +mart 0 +coursesfal 0 +oper 0 +multimedia 0 +photoesus 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..18348b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,13 @@ +nikolai 1 +mateevnikolai 1 +mateevgradu 1 +studentmateev 1 +cornel 1 +upson 1 +halldepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..664de2cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,54 @@ +project 1 +morgenstern 0 +cornel 0 +comput 0 +databas 0 +arpa 0 +matthew 0 +home 0 +pagematthew 0 +morgensternresearch 0 +leaderaddress 0 +engin 0 +theori 0 +centerxerox 0 +design 0 +research 0 +institutecornel 0 +universityithaca 0 +phone 0 +email 0 +edustatu 0 +visit 0 +fellow 0 +scienc 0 +princip 0 +scientist 0 +xerox 0 +laboratori 0 +scienceproject 0 +distribut 0 +heterogen 0 +system 0 +fund 0 +metadata 0 +manag 0 +multimedia 0 +document 0 +supervis 0 +select 0 +student 0 +relat 0 +area 0 +fundedresearch 0 +work 0 +academ 0 +year 0 +summer 0 +avail 0 +stop 0 +chat 0 +inform 0 +come 0 +page 0 +soon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..406b6478 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,40 @@ +graph 1 +algorithm 0 +dynam 0 +data 0 +structur 0 +monika 0 +henzing 0 +homepagemonika 0 +rauch 0 +henzingerassist 0 +professorcomput 0 +scienc 0 +departmentcornel 0 +universityithaca 0 +email 0 +cornel 0 +eduphon 0 +current 0 +leav 0 +digit 0 +equip 0 +corpor 0 +system 0 +research 0 +centerhomepageresearch 0 +interestscombinatori 0 +especi 0 +random 0 +theori 0 +lower 0 +bound 0 +recent 0 +public 0 +project 0 +pageprogram 0 +committe 0 +stoc 0 +soda 0 +homepag 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..4c610d8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,76 @@ +stanlei 1 +cornel 1 +univers 1 +comput 1 +scienc 1 +interest 1 +project 1 +paper 1 +huang 0 +engin 0 +kentucki 0 +area 0 +user 0 +distribut 0 +plan 0 +share 0 +home 0 +page 0 +huangmast 0 +studentmhuang 0 +sheldon 0 +courtcornel 0 +ithaca 0 +master 0 +class 0 +bachelor 0 +oper 0 +systemsdistribut 0 +systemsdatabas 0 +system 0 +inform 0 +retrievalgraph 0 +interfacesoth 0 +movi 0 +tenni 0 +horse_back 0 +ride 0 +travel 0 +read 0 +work 0 +myadvisor 0 +werner 0 +vogel 0 +robbertvan 0 +reness 0 +object 0 +integr 0 +horu 0 +link 0 +relat 0 +planplan 0 +distributionplan 0 +updateplan 0 +faqhorusc 0 +final 0 +exam 0 +collect 0 +memorydistribut 0 +memorysom 0 +technic 0 +group 0 +communicationsnapshotu 0 +level 0 +network 0 +interfac 0 +architecturejobscar 0 +pathbai 0 +jobscyberezumescar 0 +opportunitiesus 0 +stufftechn 0 +field 0 +searchbel 0 +labsspbsd 0 +sourcesjavarfclast 0 +modifi 0 +mhuang 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..5dc7afca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,62 @@ +millett 1 +lynett 0 +homepag 0 +cornel 0 +person 0 +last 0 +list 0 +millettdepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +participatoryform 0 +mass 0 +speech 0 +develop 0 +internetdeserv 0 +highest 0 +protect 0 +government 0 +intrus 0 +decis 0 +aclu 0 +reno 0 +challeng 0 +script 0 +second 0 +year 0 +skit 0 +cuc 0 +holidai 0 +parti 0 +inform 0 +pictur 0 +cat 0 +updat 0 +link 0 +never 0 +abl 0 +find 0 +precis 0 +femin 0 +know 0 +peopl 0 +call 0 +feminist 0 +whenver 0 +express 0 +sentiment 0 +differenti 0 +doormat 0 +prostitut 0 +rebecca 0 +west 0 +modifi 0 +octob 0 +comment 0 +welcom 0 +copi 0 +public 0 +pleas 0 +look 0 +copyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..f68a7150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,94 @@ +home 1 +link 0 +cornel 0 +newgroupc 0 +mishaal 0 +page 0 +univers 0 +engin 0 +interest 0 +system 0 +high 0 +pagemisha 0 +kuwaiti 0 +student 0 +master 0 +computersci 0 +program 0 +mengc 0 +graduat 0 +doubl 0 +major 0 +electr 0 +andcomput 0 +scienc 0 +worcest 0 +polytechn 0 +institut 0 +inworcest 0 +coolest 0 +place 0 +earth 0 +list 0 +stuff 0 +involv 0 +temporari 0 +servic 0 +bearaccess 0 +menu 0 +cours 0 +take 0 +machin 0 +vision 0 +practic 0 +distribut 0 +practicum 0 +perform 0 +comput 0 +advanc 0 +languag 0 +implement 0 +newgroup 0 +capac 0 +network 0 +newgroupnba 0 +databas 0 +manag 0 +newgroupoptim 0 +video 0 +transmiss 0 +meng 0 +project 0 +extens 0 +kuwait 0 +pagemi 0 +resum 0 +check 0 +stock 0 +quotescool 0 +public 0 +server 0 +hope 0 +offer 0 +conot 0 +soon 0 +weather 0 +ithaca 0 +latest 0 +new 0 +cann 0 +intern 0 +film 0 +festiv 0 +everyth 0 +wrong 0 +reason 0 +want 0 +ever 0 +accus 0 +nerd 0 +well 0 +sure 0 +cool 0 +almashanmisha 0 +educornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..a8bacdba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,131 @@ +page 1 +cool 0 +link 0 +pleas 0 +stuff 0 +make 0 +look 0 +pretti 0 +realli 0 +thing 0 +mike 0 +date 0 +would 0 +like 0 +coupl 0 +know 0 +beavi 0 +korbi 0 +last 0 +name 0 +complet 0 +golf 0 +click 0 +list 0 +kid 0 +pagethi 0 +yeah 0 +version 0 +recent 0 +browser 0 +quit 0 +sign 0 +myguestbook 0 +chanc 0 +opinion 0 +count 0 +dark 0 +stockholm 0 +right 0 +take 0 +vote 0 +poll 0 +result 0 +peopl 0 +guess 0 +ryan 0 +call 0 +vitya 0 +better 0 +construct 0 +danc 0 +frog 0 +maria 0 +mark 0 +andrew 0 +corbett 0 +suck 0 +card 0 +nicknam 0 +kevin 0 +donnel 0 +love 0 +pictur 0 +eryn 0 +crave 0 +attent 0 +want 0 +movi 0 +graphic 0 +class 0 +amaz 0 +anim 0 +plai 0 +mpeg 0 +place 0 +univers 0 +stop 0 +expand 0 +find 0 +world 0 +need 0 +person 0 +pick 0 +site 0 +made 0 +onlin 0 +student 0 +homepag 0 +lame 0 +guttermouth 0 +brought 0 +byjust 0 +cours 0 +member 0 +internet 0 +exchang 0 +peic 0 +connect 0 +whole 0 +bunch 0 +other 0 +line 0 +come 0 +join 0 +hand 0 +sing 0 +togeth 0 +spirit 0 +harmoni 0 +someth 0 +visitor 0 +number 0 +keep 0 +reset 0 +check 0 +statist 0 +accuar 0 +send 0 +mail 0 +atmak 0 +cornel 0 +edubas 0 +much 0 +random 0 +imag 0 +relat 0 +anyth 0 +thank 0 +everybodi 0 +idea 0 +us 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..fdd32a37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,22 @@ +nobuhiko 1 +mukai 1 +semest 1 +mukainobuhiko 0 +home 0 +page 0 +construct 0 +research 0 +effect 0 +jpeg 0 +compressionon 0 +multimedia 0 +system 0 +last 0 +fall 0 +made 0 +anim 0 +titl 0 +magicon 0 +comput 0 +graphic 0 +spring 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..6a2b0e65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,22 @@ +nichola 1 +how 1 +homepag 0 +click 0 +photo 0 +imag 0 +graduat 0 +studentdepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +email 0 +nihow 0 +cornel 0 +eduoffic 0 +upson 0 +hall 0 +phone 0 +resum 0 +refer 0 +person 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..9c371d47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,23 @@ +niko 1 +home 0 +cornel 0 +page 0 +research 0 +pitsiani 0 +upson 0 +hall 0 +dept 0 +comput 0 +scienc 0 +univers 0 +ithaca 0 +work 0 +public 0 +lectur 0 +teach 0 +java 0 +vita 0 +pointer 0 +sinc 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..009044da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,186 @@ +parallel 1 +chrisochoid 0 +comput 0 +environ 0 +project 0 +proceed 0 +algorithm 0 +data 0 +softwar 0 +dynam 0 +grid 0 +gener 0 +cornel 0 +confer 0 +numer 0 +map 0 +housti 0 +rice 0 +niko 0 +scienc 0 +distribut 0 +load 0 +balanc 0 +simul 0 +ellpack 0 +student 0 +appear 0 +intern 0 +journal 0 +method 0 +page 0 +work 0 +mani 0 +research 0 +runtim 0 +multicomput 0 +adapt 0 +implement 0 +black 0 +hole 0 +master 0 +sukup 0 +fluid 0 +field 0 +mathemat 0 +tool 0 +program 0 +iter 0 +solver 0 +center 0 +mississippi 0 +advanc 0 +partial 0 +domain 0 +papachi 0 +public 0 +depart 0 +purdu 0 +univers 0 +touch 0 +facet 0 +includ 0 +support 0 +back 0 +system 0 +compil 0 +problem 0 +solv 0 +schedul 0 +design 0 +compon 0 +well 0 +known 0 +current 0 +workshop 0 +bernoulli 0 +prema 0 +portabl 0 +architecur 0 +sensit 0 +messag 0 +binari 0 +grand 0 +challeng 0 +engin 0 +florian 0 +kodukula 0 +indupraka 0 +pingali 0 +vineet 0 +ahuja 0 +reza 0 +behforooz 0 +undergradu 0 +former 0 +animesh 0 +chatterje 0 +rajani 0 +vaidyanathan 0 +select 0 +paper 0 +task 0 +bowyer 0 +watson 0 +offifth 0 +incomput 0 +relat 0 +multithread 0 +model 0 +ctctr 0 +octob 0 +appli 0 +object 0 +orient 0 +kale 0 +kohl 0 +yellick 0 +scientif 0 +menu 0 +unstructur 0 +structur 0 +thompson 0 +contemporari 0 +key 0 +special 0 +issu 0 +april 0 +toolkit 0 +collid 0 +haupt 0 +aiaa 0 +colorado 0 +spring 0 +june 0 +altern 0 +scalabl 0 +scalableparallel 0 +librari 0 +nation 0 +foundat 0 +engineeringresearch 0 +state 0 +partit 0 +heurist 0 +base 0 +parallelhardwar 0 +geometri 0 +characterist 0 +differentialequ 0 +vichnevetski 0 +knight 0 +richter 0 +imac 0 +brunswick 0 +decompos 0 +architectur 0 +kortesi 0 +decomposit 0 +differenti 0 +equat 0 +symposium 0 +domaindecomposit 0 +moscow 0 +ussr 0 +glowinski 0 +siam 0 +programmingenviron 0 +mimd 0 +machin 0 +karathanas 0 +samartzi 0 +vavali 0 +yang 0 +wang 0 +and 0 +weerawarana 0 +onsupercomput 0 +nikosc 0 +institut 0 +theori 0 +univeristi 0 +rhode 0 +hall 0 +room 0 +ithaca 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..8b53e105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,68 @@ +prema 1 +runtim 1 +parallel 0 +cornel 0 +system 0 +portabl 0 +environ 0 +model 0 +support 0 +multicomput 0 +architectur 0 +comput 0 +research 0 +compil 0 +port 0 +dynam 0 +load 0 +balanc 0 +niko 0 +chrisochoid 0 +welcom 0 +advanc 0 +institut 0 +theori 0 +center 0 +univers 0 +overview 0 +andproblem 0 +solv 0 +target 0 +scientif 0 +computingappl 0 +build 0 +implement 0 +design 0 +varieti 0 +suppot 0 +global 0 +address 0 +space 0 +memori 0 +data 0 +task 0 +program 0 +multi 0 +thread 0 +style 0 +execut 0 +automat 0 +work 0 +share 0 +mechan 0 +paper 0 +multithread 0 +adapt 0 +pdecomput 0 +ctctr 0 +journal 0 +appli 0 +numer 0 +mathemat 0 +relat 0 +group 0 +pcrc 0 +consortium 0 +copyright 0 +copi 0 +nikosc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..2400458b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,30 @@ +look 1 +pavel 0 +naumov 0 +welcom 0 +home 0 +page 0 +invit 0 +learn 0 +locat 0 +cyberspac 0 +real 0 +world 0 +work 0 +nuprl 0 +project 0 +tire 0 +take 0 +rest 0 +galleri 0 +visit 0 +cinema 0 +photo 0 +orplai 0 +game 0 +java 0 +written 0 +sundai 0 +send 0 +mail 0 +place 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..33e38ba1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,194 @@ +pearson 1 +comput 0 +algorithm 0 +parallel 0 +scienc 0 +design 0 +time 0 +architectur 0 +program 0 +languag 0 +work 0 +problem 0 +machin 0 +implement 0 +ieee 0 +publish 0 +region 0 +cornel 0 +david 0 +research 0 +processor 0 +connect 0 +mesh 0 +vision 0 +realiz 0 +current 0 +cost 0 +theoret 0 +effici 0 +oper 0 +gener 0 +purpos 0 +believ 0 +realli 0 +instruct 0 +neumann 0 +network 0 +intern 0 +confer 0 +vazirani 0 +fast 0 +find 0 +maxim 0 +bipartit 0 +technolog 0 +lectur 0 +note 0 +journal 0 +johnson 0 +pingali 0 +singl 0 +control 0 +linear 0 +tech 0 +report 0 +structur 0 +sigplan 0 +global 0 +regist 0 +alloc 0 +simd 0 +multiprocessor 0 +interest 0 +thesi 0 +investig 0 +highli 0 +scalabl 0 +consistingof 0 +simpl 0 +dimension 0 +guid 0 +perhap 0 +year 0 +henc 0 +materi 0 +taken 0 +place 0 +crystal 0 +molecul 0 +lattic 0 +long 0 +goal 0 +prepar 0 +theubiquit 0 +offer 0 +must 0 +heed 0 +lawsof 0 +physic 0 +attent 0 +chip 0 +spatial 0 +layoutand 0 +hidden 0 +commun 0 +accomplishedbi 0 +pursu 0 +requir 0 +practic 0 +could 0 +character 0 +feasibl 0 +studi 0 +ihav 0 +produc 0 +cellular 0 +couldb 0 +hardwar 0 +simul 0 +thisarchitectur 0 +system 0 +designfor 0 +like 0 +proteinstructur 0 +grand 0 +challeng 0 +parallelcomput 0 +power 0 +succe 0 +becom 0 +commodityand 0 +sold 0 +desktop 0 +video 0 +game 0 +direct 0 +futur 0 +includ 0 +vlsi 0 +architectureand 0 +wide 0 +us 0 +hideth 0 +detail 0 +reflect 0 +underlyingvon 0 +architectureha 0 +good 0 +thing 0 +exploit 0 +parallelmachin 0 +need 0 +easyto 0 +estim 0 +public 0 +dunten 0 +arm 0 +kiewit 0 +high 0 +speed 0 +campu 0 +societi 0 +compcon 0 +fall 0 +pillai 0 +near 0 +optim 0 +placement 0 +sensor 0 +element 0 +transact 0 +inform 0 +theori 0 +foundat 0 +softwar 0 +sequenti 0 +set 0 +entri 0 +exit 0 +tree 0 +proceed 0 +pldi 0 +notic 0 +polynomi 0 +chang 0 +make 0 +schedul 0 +workshop 0 +irregularli 0 +irregular 0 +sept 0 +zippel 0 +allerton 0 +press 0 +select 0 +area 0 +cryptographi 0 +appear 0 +depart 0 +upson 0 +hallcornel 0 +universityithaca 0 +york 0 +usaemail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..e1fe256d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,126 @@ +page 1 +cornel 1 +work 0 +year 0 +program 0 +engin 0 +comput 0 +chapter 0 +mail 0 +todd 0 +peskin 0 +student 0 +client 0 +java 0 +applet 0 +content 0 +favorit 0 +site 0 +meng 0 +colleg 0 +receiv 0 +master 0 +follow 0 +also 0 +acacia 0 +contact 0 +sinc 0 +semest 0 +intern 0 +experi 0 +develop 0 +server 0 +softwar 0 +creat 0 +construct 0 +pictur 0 +resum 0 +cours 0 +taken 0 +univers 0 +joint 0 +degre 0 +offer 0 +jointli 0 +johnson 0 +graduat 0 +school 0 +manag 0 +scienc 0 +busi 0 +administr 0 +current 0 +presid 0 +fratern 0 +brother 0 +would 0 +like 0 +becom 0 +part 0 +list 0 +pleas 0 +eduand 0 +soon 0 +suppli 0 +andyour 0 +roll 0 +number 0 +abl 0 +complet 0 +request 0 +quickli 0 +well 0 +best 0 +reach 0 +check 0 +round 0 +alwai 0 +found 0 +mani 0 +time 0 +log 0 +depart 0 +fall 0 +junior 0 +workeda 0 +throughth 0 +cooper 0 +enabl 0 +cornellundergradu 0 +supplement 0 +classroom 0 +knowledg 0 +practic 0 +compani 0 +theirfield 0 +enhanc 0 +nation 0 +level 0 +databas 0 +system 0 +isrun 0 +microsystem 0 +run 0 +window 0 +espn 0 +stock 0 +quot 0 +onlin 0 +final 0 +project 0 +larg 0 +mpeg 0 +file 0 +visitor 0 +februari 0 +still 0 +hope 0 +possibl 0 +includ 0 +us 0 +case 0 +wonder 0 +ticker 0 +tape 0 +borrow 0 +permiss 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..4c99012e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,54 @@ +pierc 1 +home 1 +stuff 1 +cornel 0 +page 0 +david 0 +student 0 +comput 0 +scienc 0 +ithaca 0 +pittsburgh 0 +pennsylvania 0 +read 0 +dave 0 +univers 0 +address 0 +offic 0 +upson 0 +hall 0 +valentin 0 +person 0 +second 0 +year 0 +recent 0 +although 0 +familycurr 0 +resid 0 +citi 0 +approxim 0 +halfwai 0 +philadelphia 0 +andharrisburg 0 +famou 0 +shop 0 +outlet 0 +otherwis 0 +younev 0 +want 0 +howev 0 +great 0 +place 0 +mani 0 +alreadi 0 +know 0 +sinceit 0 +imposs 0 +without 0 +go 0 +throughpittsburgh 0 +month 0 +favorit 0 +quot 0 +list 0 +work 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..0ed7775c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,123 @@ +cornel 1 +comput 1 +talk 1 +center 0 +group 0 +compil 0 +system 0 +present 0 +indupraka 0 +kodukula 0 +home 0 +page 0 +theori 0 +univers 0 +scienc 0 +work 0 +research 0 +applic 0 +architectur 0 +vliw 0 +public 0 +imperfectli 0 +nest 0 +abl 0 +loop 0 +transform 0 +summer 0 +support 0 +packag 0 +engin 0 +ithaca 0 +praka 0 +student 0 +depart 0 +prior 0 +tothat 0 +undergradu 0 +madra 0 +bernoulli 0 +prof 0 +keshav 0 +pingali 0 +member 0 +nawaaz 0 +ahm 0 +vladimir 0 +kotlyar 0 +vijai 0 +menon 0 +paul 0 +stodghil 0 +also 0 +affili 0 +advanc 0 +institut 0 +interplai 0 +runtim 0 +tradit 0 +andmultiprocessor 0 +deriv 0 +fromscientif 0 +imag 0 +process 0 +multimedia 0 +withibm 0 +hasinterest 0 +well 0 +given 0 +seri 0 +dens 0 +technolog 0 +first 0 +athp 0 +chelmsford 0 +wasabout 0 +necess 0 +deal 0 +looptransform 0 +handl 0 +trivial 0 +code 0 +framework 0 +perform 0 +loopparallel 0 +seminar 0 +schloss 0 +dagstuhl 0 +april 0 +watson 0 +regard 0 +useof 0 +octob 0 +lab 0 +palo 0 +alto 0 +regardingdata 0 +centric 0 +multi 0 +level 0 +block 0 +teach 0 +taught 0 +program 0 +andoper 0 +project 0 +czar 0 +instal 0 +maintain 0 +availableund 0 +gener 0 +licens 0 +departmentmachin 0 +check 0 +andfind 0 +handi 0 +tip 0 +alsofind 0 +extens 0 +info 0 +random 0 +link 0 +person 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..eb3e146e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,49 @@ +praveen 1 +seshadri 1 +page 1 +ithaca 1 +databas 1 +project 1 +data 1 +home 0 +assist 0 +professor 0 +comput 0 +scienc 0 +depart 0 +cornel 0 +univers 0 +upson 0 +hall 0 +offic 0 +advanc 0 +system 0 +fall 0 +predat 0 +dbm 0 +adt 0 +know 0 +case 0 +enhanc 0 +abstract 0 +type 0 +sigmod 0 +submiss 0 +profession 0 +public 0 +time 0 +order 0 +manag 0 +sequenc 0 +postscript 0 +thesi 0 +tree 0 +save 0 +format 0 +person 0 +warren 0 +road 0 +ranjani 0 +ramamurthi 0 +green 0 +packer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..c84cf0d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 1 +queri 0 +data 0 +databas 0 +relat 0 +oper 0 +model 0 +record 0 +system 0 +optim 0 +project 0 +order 0 +sequin 0 +us 0 +time 0 +manag 0 +effici 0 +languag 0 +posit 0 +praveen 0 +seshadri 0 +express 0 +evalu 0 +variou 0 +support 0 +process 0 +miron 0 +livni 0 +ramakrishnan 0 +object 0 +import 0 +requir 0 +includ 0 +scienc 0 +kind 0 +techniqu 0 +implement 0 +nest 0 +complex 0 +storag 0 +earthquak 0 +similar 0 +click 0 +raghu 0 +wisc 0 +domain 0 +exist 0 +allow 0 +need 0 +like 0 +next 0 +built 0 +defin 0 +also 0 +propos 0 +shore 0 +extens 0 +volcano 0 +recent 0 +plan 0 +scan 0 +singl 0 +mani 0 +view 0 +orient 0 +zoom 0 +group 0 +involv 0 +server 0 +proceed 0 +confer 0 +madison 0 +document 0 +collect 0 +set 0 +declar 0 +manner 0 +advantag 0 +user 0 +tempor 0 +previou 0 +demonstr 0 +feasibl 0 +form 0 +embed 0 +base 0 +exampl 0 +inform 0 +meteorolog 0 +phenomena 0 +sequenti 0 +strength 0 +greater 0 +would 0 +sort 0 +join 0 +store 0 +buffer 0 +gener 0 +answer 0 +detail 0 +aredescrib 0 +publish 0 +paper 0 +postscript 0 +version 0 +map 0 +call 0 +could 0 +flavor 0 +explor 0 +collaps 0 +expand 0 +last 0 +work 0 +probabl 0 +devis 0 +result 0 +client 0 +insid 0 +provid 0 +depart 0 +home 0 +pageth 0 +construct 0 +content 0 +objectivescurr 0 +statusmotiv 0 +exampleseq 0 +languageoptim 0 +techniquesseq 0 +developmentpublicationsrel 0 +workcontact 0 +informationproject 0 +number 0 +applic 0 +processingof 0 +larg 0 +amount 0 +theseappl 0 +financi 0 +histor 0 +analysi 0 +econom 0 +social 0 +metereolog 0 +medic 0 +andbiolog 0 +inadequ 0 +regard 0 +treat 0 +consequ 0 +tediou 0 +ineffici 0 +abstract 0 +util 0 +semanticstak 0 +uniqu 0 +opportun 0 +avail 0 +evaluationintegr 0 +canstor 0 +combin 0 +sequencesthes 0 +serv 0 +goal 0 +themost 0 +notion 0 +natur 0 +consid 0 +issu 0 +studi 0 +theori 0 +theoret 0 +idea 0 +statusth 0 +current 0 +statu 0 +algebraicqueri 0 +compos 0 +analogousto 0 +composit 0 +algebra 0 +describ 0 +identifi 0 +candeclar 0 +likesql 0 +vice 0 +versa 0 +build 0 +disk 0 +architectur 0 +sever 0 +megabyt 0 +integr 0 +motiv 0 +querya 0 +weather 0 +monitor 0 +occurr 0 +event 0 +scientist 0 +ask 0 +erupt 0 +didth 0 +richter 0 +scale 0 +featur 0 +groupbi 0 +claus 0 +correl 0 +subqueri 0 +aggregatefunct 0 +convent 0 +find 0 +execut 0 +even 0 +given 0 +knowledg 0 +howev 0 +sequencesord 0 +lock 0 +step 0 +merg 0 +temporari 0 +whenev 0 +valu 0 +check 0 +possibl 0 +therefor 0 +littl 0 +memori 0 +modelth 0 +present 0 +gist 0 +basic 0 +ordereddomain 0 +relationship 0 +andposit 0 +dual 0 +distinct 0 +wai 0 +recordsmap 0 +respect 0 +give 0 +rise 0 +either 0 +relationaloper 0 +overlap 0 +contain 0 +andaggreg 0 +researchersin 0 +commun 0 +offset 0 +movingaggreg 0 +mean 0 +associ 0 +instanc 0 +daili 0 +weekli 0 +hourli 0 +part 0 +deal 0 +make 0 +easi 0 +case 0 +real 0 +worldsitu 0 +extend 0 +instead 0 +extensionof 0 +indic 0 +practic 0 +ofseq 0 +languagew 0 +usingwhich 0 +specifi 0 +languagei 0 +except 0 +input 0 +queriesa 0 +well 0 +descript 0 +techniquesw 0 +thathav 0 +transform 0 +meta 0 +cach 0 +intermedi 0 +algorithm 0 +reli 0 +cost 0 +estim 0 +observ 0 +access 0 +stream 0 +strategi 0 +take 0 +account 0 +developmentth 0 +serverarchitectur 0 +multipl 0 +viaa 0 +multi 0 +thread 0 +ontop 0 +subset 0 +languageswhich 0 +mode 0 +arbitrarylevel 0 +viceversa 0 +supportfor 0 +type 0 +function 0 +detailson 0 +publicationssequ 0 +sigmod 0 +framework 0 +datapraveen 0 +ieee 0 +engin 0 +march 0 +design 0 +systempraveen 0 +submit 0 +vldb 0 +queriesraghu 0 +michael 0 +cheng 0 +intern 0 +comad 0 +decemb 0 +workthedevis 0 +complementari 0 +visualizationenviron 0 +front 0 +pose 0 +examin 0 +graphic 0 +peopl 0 +research 0 +servercontact 0 +informationfor 0 +contact 0 +eduraghu 0 +edumiron 0 +educomput 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +modifi 0 +seshadripraveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..1ab4fc92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,50 @@ +ralph 1 +student 1 +comput 1 +scienc 1 +fellow 1 +cornel 1 +benzingerralph 0 +benzingerw 0 +sich 0 +seinen 0 +lorbeeren 0 +ausruht 0 +trgt 0 +derfalschen 0 +stell 0 +stori 0 +exchang 0 +univers 0 +karlsruh 0 +germani 0 +german 0 +august 0 +studienstiftung 0 +deutschen 0 +volk 0 +fulbright 0 +member 0 +siemen 0 +international 0 +studentenkrei 0 +alumnusat 0 +graduat 0 +depart 0 +cours 0 +taken 0 +advanc 0 +program 0 +languag 0 +design 0 +analysi 0 +algorithm 0 +reason 0 +knowledg 0 +contact 0 +inform 0 +mail 0 +offic 0 +upson 0 +hall 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..5597afd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,291 @@ +audio 1 +latex 0 +postscript 0 +section 0 +aster 0 +render 0 +express 0 +exampl 0 +produc 0 +us 0 +notic 0 +convei 0 +formula 0 +structur 0 +second 0 +first 0 +mathemat 0 +gener 0 +document 0 +format 0 +book 0 +demonstr 0 +fraction 0 +bruno 0 +nest 0 +integr 0 +listen 0 +reader 0 +comput 0 +spoken 0 +system 0 +version 0 +talk 0 +demo 0 +output 0 +effect 0 +along 0 +dimens 0 +superscript 0 +allow 0 +knuth 0 +percept 0 +recogn 0 +speak 0 +base 0 +interpret 0 +refer 0 +present 0 +level 0 +complex 0 +technic 0 +made 0 +avail 0 +three 0 +stereo 0 +visual 0 +progress 0 +continu 0 +file 0 +contain 0 +simpl 0 +succinctli 0 +vari 0 +space 0 +subscript 0 +independ 0 +taken 0 +power 0 +unambigu 0 +move 0 +monoton 0 +algebra 0 +follow 0 +written 0 +hard 0 +logarithm 0 +context 0 +specif 0 +user 0 +variabl 0 +tripl 0 +cross 0 +meaning 0 +name 0 +inton 0 +text 0 +intermix 0 +quantifi 0 +calcul 0 +sinc 0 +hear 0 +substitut 0 +process 0 +raman 0 +titl 0 +demonstrationi 0 +dedic 0 +guid 0 +read 0 +forrend 0 +develop 0 +myphd 0 +page 0 +thesi 0 +approxim 0 +hour 0 +record 0 +blind 0 +abstract 0 +print 0 +hypertext 0 +enhanc 0 +inlin 0 +imag 0 +compon 0 +origin 0 +input 0 +speech 0 +dectalk 0 +digit 0 +mulaw 0 +tabl 0 +mono 0 +encod 0 +dvip 0 +difficult 0 +suggest 0 +initi 0 +sequenti 0 +short 0 +typic 0 +show 0 +peopl 0 +andround 0 +quick 0 +overview 0 +faad 0 +casey 0 +want 0 +look 0 +place 0 +singl 0 +examplessinc 0 +would 0 +voic 0 +inflect 0 +paus 0 +toconvei 0 +group 0 +state 0 +renderingsub 0 +attribut 0 +audiost 0 +orthogon 0 +dimensionus 0 +mutual 0 +concept 0 +expon 0 +verbatim 0 +donald 0 +layoutoper 0 +compris 0 +symbol 0 +verydiffer 0 +defin 0 +monotonicchang 0 +notion 0 +vital 0 +school 0 +squar 0 +root 0 +choic 0 +trigonometr 0 +ident 0 +notat 0 +ambigu 0 +complet 0 +absenc 0 +parenthesi 0 +sever 0 +heurist 0 +construct 0 +correct 0 +tree 0 +forthes 0 +chosen 0 +reduc 0 +cognit 0 +load 0 +oppos 0 +seri 0 +rule 0 +asexpon 0 +wire 0 +isfulli 0 +customiz 0 +probabl 0 +innocu 0 +also 0 +mostdifficult 0 +imposs 0 +determin 0 +ofintegr 0 +applic 0 +theintegr 0 +oper 0 +brows 0 +piec 0 +shown 0 +trick 0 +experienc 0 +ofhuman 0 +error 0 +summat 0 +limit 0 +referenc 0 +equat 0 +meant 0 +illustr 0 +ofcross 0 +interact 0 +enabl 0 +give 0 +referenceableobject 0 +object 0 +latercross 0 +distanc 0 +good 0 +thati 0 +interest 0 +challeng 0 +exponenti 0 +followingdeepli 0 +emac 0 +full 0 +fledgedsymbol 0 +interfac 0 +directli 0 +justa 0 +well 0 +matrix 0 +dimension 0 +thematrix 0 +commenc 0 +left 0 +right 0 +aseach 0 +element 0 +ofcomput 0 +program 0 +heard 0 +took 0 +secondsto 0 +util 0 +featur 0 +spacenot 0 +human 0 +still 0 +changeth 0 +size 0 +shape 0 +head 0 +take 0 +soon 0 +even 0 +long 0 +forget 0 +begin 0 +thetim 0 +later 0 +techniquefor 0 +proper 0 +glori 0 +like 0 +upon 0 +request 0 +replac 0 +identifi 0 +renderingsconvei 0 +thesub 0 +separ 0 +perform 0 +orpostscript 0 +equival 0 +case 0 +lower 0 +constraint 0 +numer 0 +denomin 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..388e2a78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,63 @@ +comput 1 +ravi 0 +scienc 0 +foundat 0 +self 0 +learn 0 +confer 0 +kumar 0 +cornel 0 +program 0 +check 0 +approxim 0 +ieee 0 +octob 0 +funda 0 +uumln 0 +ronitt 0 +rubinfeld 0 +test 0 +sivakumar 0 +lnc 0 +theori 0 +combinator 0 +parallel 0 +process 0 +depart 0 +univers 0 +ithaca 0 +polynomi 0 +function 0 +equat 0 +effici 0 +correct 0 +linear 0 +recurr 0 +without 0 +gener 0 +bottleneck 0 +softwar 0 +technolog 0 +theoret 0 +decemb 0 +bound 0 +width 0 +branch 0 +juli 0 +latin 0 +squar 0 +extens 0 +june 0 +alexand 0 +russel 0 +sundaram 0 +scalabl 0 +studi 0 +intern 0 +august 0 +jeyakumar 0 +muthukumarasami 0 +umakishor 0 +ramachandran 0 +gautam 0 +shah 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..9e874511 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,147 @@ +comput 1 +ramin 0 +zabih 0 +vision 0 +imag 0 +avail 0 +justin 0 +confer 0 +interest 0 +multimedia 0 +also 0 +greg 0 +pass 0 +miller 0 +base 0 +ieee 0 +novemb 0 +program 0 +current 0 +us 0 +undergradu 0 +content 0 +workshop 0 +third 0 +teach 0 +introduct 0 +cours 0 +page 0 +cvpr 0 +held 0 +home 0 +pageramin 0 +zabihassist 0 +professorrdz 0 +cornel 0 +researchmi 0 +research 0 +construct 0 +search 0 +engin 0 +method 0 +develop 0 +recent 0 +think 0 +econom 0 +impact 0 +freeli 0 +price 0 +inform 0 +essai 0 +subject 0 +appear 0 +phil 0 +agr 0 +electron 0 +newslett 0 +network 0 +observ 0 +march 0 +studentsi 0 +work 0 +student 0 +jing 0 +huang 0 +vera 0 +kettnak 0 +olga 0 +veksler 0 +spend 0 +fair 0 +amount 0 +time 0 +variou 0 +princip 0 +voskuhl 0 +includ 0 +scott 0 +cytacki 0 +szewczyk 0 +publicationsmost 0 +public 0 +postscript 0 +acrobat 0 +format 0 +free 0 +reader 0 +varieti 0 +differ 0 +architectur 0 +adob 0 +histogram 0 +refin 0 +retriev 0 +applic 0 +sarasota 0 +florida 0 +decemb 0 +compar 0 +color 0 +coher 0 +vector 0 +fourth 0 +boston 0 +massachusett 0 +featur 0 +algorithm 0 +detect 0 +classifi 0 +scene 0 +break 0 +kevin 0 +francisco 0 +california 0 +parametr 0 +local 0 +transform 0 +visual 0 +correspond 0 +john 0 +woodfil 0 +european 0 +stockholm 0 +sweden 0 +teachingi 0 +spring 0 +scribe 0 +note 0 +lectur 0 +taught 0 +profession 0 +activitiesi 0 +comitte 0 +pattern 0 +recognit 0 +juan 0 +june 0 +organ 0 +committe 0 +access 0 +video 0 +librari 0 +conjunct 0 +acknowledgementsthi 0 +design 0 +courtesi 0 +huttenlocherlast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..723aa152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,4 @@ +roderick 1 +moten 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..dd0de518 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,45 @@ +ronitt 1 +comput 1 +scienc 1 +rubinfeld 0 +cornel 0 +depart 0 +fall 0 +homepageronitt 0 +rubinfeldi 0 +assist 0 +professor 0 +recent 0 +paper 0 +talk 0 +cours 0 +random 0 +spring 0 +engin 0 +graduat 0 +student 0 +funda 0 +ergun 0 +ravi 0 +kumar 0 +fair 0 +homepag 0 +wasserman 0 +page 0 +describ 0 +work 0 +research 0 +area 0 +result 0 +check 0 +address 0 +rubinfeldcomput 0 +upson 0 +hallcornel 0 +universityithaca 0 +york 0 +telephon 0 +email 0 +edupictur 0 +nephew 0 +eitan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..993bd0b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,73 @@ +comput 1 +scienc 0 +depart 0 +birman 0 +distribut 0 +technolog 0 +friedman 0 +doctor 0 +associ 0 +cornel 0 +work 0 +system 0 +project 0 +thedepart 0 +thetechnion 0 +israel 0 +institut 0 +technic 0 +report 0 +cornellunivers 0 +implement 0 +friedmanroi 0 +friedmanpost 0 +universityroi 0 +edui 0 +post 0 +scienceatcornel 0 +univers 0 +withken 0 +androbbert 0 +rennessein 0 +area 0 +mainli 0 +thehoru 0 +receiv 0 +advisor 0 +washagit 0 +attiya 0 +thesi 0 +titl 0 +wasconsist 0 +condit 0 +share 0 +memori 0 +current 0 +also 0 +involv 0 +themilliped 0 +withassaf 0 +schuster 0 +recent 0 +papersr 0 +trade 0 +consist 0 +avail 0 +us 0 +group 0 +commun 0 +reliabl 0 +scalabledistribut 0 +coprocessor 0 +appear 0 +tina 0 +vaysburd 0 +replic 0 +state 0 +machin 0 +partition 0 +network 0 +full 0 +list 0 +public 0 +clickher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..ee8911a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,28 @@ +cornel 1 +daniela 0 +home 0 +page 0 +research 0 +comput 0 +scienc 0 +associ 0 +photograph 0 +address 0 +upson 0 +hall 0 +depart 0 +univers 0 +ithaca 0 +model 0 +simul 0 +recent 0 +paper 0 +version 0 +onlin 0 +tech 0 +report 0 +librari 0 +catalogc 0 +dept 0 +infodesign 0 +institut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..b1b67e0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,92 @@ +system 1 +horu 1 +ithaca 0 +jazz 0 +dutch 0 +group 0 +commun 0 +research 0 +brand 0 +network 0 +design 0 +perform 0 +lightweight 0 +version 0 +protocol 0 +composit 0 +support 0 +robbert 0 +renesserobbert 0 +renessesenior 0 +associatecornel 0 +universityrvr 0 +cornel 0 +edui 0 +senior 0 +associ 0 +depart 0 +comput 0 +scienceatcornel 0 +universityinithaca 0 +work 0 +withken 0 +birmanin 0 +area 0 +distribut 0 +advisor 0 +wasandi 0 +tanenbaum 0 +interestsmi 0 +babi 0 +girl 0 +hous 0 +tacoma 0 +project 0 +caml 0 +applet 0 +nynetth 0 +ageless 0 +band 0 +swing 0 +danc 0 +guitar 0 +accordion 0 +sharewar 0 +stuffcornel 0 +club 0 +contain 0 +mani 0 +link 0 +netherland 0 +ithacaithacanet 0 +spinner 0 +market 0 +place 0 +paperssoftwar 0 +reliabl 0 +scientif 0 +american 0 +html 0 +framework 0 +incorpor 0 +resourc 0 +inform 0 +flow 0 +control 0 +strong 0 +weak 0 +virtual 0 +synchroni 0 +flexibl 0 +secur 0 +architectur 0 +fault 0 +toler 0 +complex 0 +multi 0 +media 0 +applic 0 +us 0 +oper 0 +mobil 0 +agent 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..619d13bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,100 @@ +sabel 1 +laura 0 +system 0 +comput 0 +cornel 0 +marzullo 0 +univers 0 +failur 0 +distribut 0 +page 0 +scienc 0 +detect 0 +detector 0 +keith 0 +technic 0 +report 0 +postscript 0 +copi 0 +click 0 +proceed 0 +inform 0 +research 0 +asynchron 0 +approxim 0 +perfect 0 +asynchronousdistribut 0 +version 0 +appear 0 +symposium 0 +octob 0 +jelli 0 +bingo 0 +profession 0 +doctor 0 +professorkeith 0 +california 0 +diego 0 +formor 0 +tushar 0 +chandra 0 +sfailur 0 +final 0 +finish 0 +thesi 0 +public 0 +elect 0 +consensu 0 +februari 0 +submit 0 +process 0 +letter 0 +annual 0 +principl 0 +distributedcomput 0 +august 0 +reliabl 0 +march 0 +revis 0 +june 0 +us 0 +consist 0 +subcut 0 +stabl 0 +properti 0 +intern 0 +workshop 0 +algorithm 0 +wdag 0 +publish 0 +springer 0 +verlag 0 +lecturenot 0 +seri 0 +expand 0 +horizon 0 +cow 0 +interest 0 +strawberri 0 +tart 0 +blow 0 +torch 0 +experi 0 +alpacanet 0 +electron 0 +gourmet 0 +guid 0 +thebobbi 0 +award 0 +especi 0 +spam 0 +belli 0 +bean 0 +free 0 +sampl 0 +answer 0 +survei 0 +zone 0 +canplai 0 +cash 0 +prize 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..bf6f9bfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,168 @@ +system 1 +toueg 0 +distribut 0 +wait 0 +free 0 +object 0 +consensu 0 +comput 0 +solv 0 +failur 0 +fault 0 +algorithm 0 +toler 0 +asynchron 0 +process 0 +hierarchi 0 +journal 0 +research 0 +work 0 +jayanti 0 +failuredetector 0 +share 0 +explor 0 +implement 0 +level 0 +chandra 0 +proceed 0 +symposium 0 +interest 0 +messag 0 +pass 0 +memori 0 +result 0 +prasad 0 +scienc 0 +determin 0 +crash 0 +unreli 0 +detector 0 +correct 0 +weakest 0 +type 0 +clock 0 +synchron 0 +databas 0 +neiger 0 +principl 0 +august 0 +canada 0 +faculti 0 +professorph 0 +princeton 0 +univers 0 +interestsmi 0 +includ 0 +toleranceand 0 +real 0 +time 0 +methodolog 0 +paradigm 0 +forfault 0 +andshar 0 +long 0 +term 0 +goal 0 +bridg 0 +gapbetween 0 +theoret 0 +need 0 +effici 0 +practicalsolut 0 +collabor 0 +withtushar 0 +chandraand 0 +student 0 +continu 0 +onunreli 0 +fundament 0 +computingst 0 +problem 0 +cannot 0 +adeterminist 0 +impossibilityresult 0 +inher 0 +difficulti 0 +whether 0 +aprocess 0 +mere 0 +slow 0 +inour 0 +abl 0 +exactli 0 +much 0 +informationabout 0 +necessari 0 +suffici 0 +wefirst 0 +show 0 +canmak 0 +infinit 0 +number 0 +mistak 0 +systemswith 0 +major 0 +prove 0 +solveconsensu 0 +provid 0 +least 0 +muchinform 0 +thu 0 +amajor 0 +practicalityof 0 +applic 0 +reli 0 +theircorrect 0 +concurr 0 +consist 0 +commun 0 +sharedobject 0 +accessesthi 0 +guarante 0 +respons 0 +even 0 +otherprocess 0 +ofobject 0 +assign 0 +thatcorrespond 0 +abil 0 +particular 0 +shown 0 +well 0 +known 0 +herlihi 0 +robust 0 +inform 0 +anobject 0 +us 0 +atani 0 +question 0 +whetherrobust 0 +exist 0 +select 0 +public 0 +bracha 0 +broadcast 0 +protocol 0 +srikanth 0 +optim 0 +abbadi 0 +maintain 0 +avail 0 +partit 0 +replic 0 +transact 0 +automat 0 +increas 0 +montreal 0 +hadzilaco 0 +detectorfor 0 +vancouv 0 +ieee 0 +foundat 0 +octob 0 +pittsburgh 0 +pennsylvania 0 +simul 0 +common 0 +knowledg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..8975400d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,100 @@ +weber 1 +compil 0 +samuel 0 +cornel 0 +univers 0 +program 0 +comput 0 +semant 0 +bloom 0 +silicon 0 +page 0 +assist 0 +design 0 +public 0 +confer 0 +brown 0 +act 0 +professor 0 +master 0 +engin 0 +scienc 0 +research 0 +softwar 0 +languag 0 +distribut 0 +system 0 +technic 0 +report 0 +submit 0 +verifi 0 +algebra 0 +thesi 0 +editor 0 +proceed 0 +messag 0 +complex 0 +byzantin 0 +agreement 0 +upson 0 +hallphon 0 +email 0 +educurr 0 +director 0 +interest 0 +specif 0 +verif 0 +cours 0 +technolog 0 +techniqu 0 +fall 0 +introduct 0 +spring 0 +metatheori 0 +calculu 0 +formal 0 +delai 0 +insensit 0 +circuit 0 +cornellunivers 0 +journal 0 +process 0 +meta 0 +theori 0 +practic 0 +august 0 +exercis 0 +appli 0 +structur 0 +oper 0 +workshop 0 +foundat 0 +applic 0 +bakker 0 +roever 0 +rozenberg 0 +lectur 0 +note 0 +springer 0 +verlag 0 +scheme 0 +knight 0 +savag 0 +advanc 0 +vlsi 0 +parallel 0 +amdur 0 +hadzilaco 0 +binari 0 +crash 0 +failur 0 +bound 0 +toronto 0 +septemb 0 +seshadri 0 +wortman 0 +small 0 +analysi 0 +concurr 0 +sigplan 0 +implement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..858d4ca0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,81 @@ +comput 1 +project 0 +system 0 +graphic 0 +isi 0 +master 0 +engin 0 +cornel 0 +window 0 +interest 0 +object 0 +orbix 0 +sean 0 +landi 0 +current 0 +cours 0 +render 0 +draw 0 +base 0 +design 0 +orient 0 +work 0 +team 0 +basebal 0 +landissean 0 +sciencewelcom 0 +home 0 +page 0 +resum 0 +inform 0 +advanc 0 +databas 0 +systemsc 0 +past 0 +machin 0 +percept 0 +final 0 +analyz 0 +color 0 +book 0 +clickherefor 0 +postscript 0 +version 0 +sampl 0 +weanalyz 0 +topic 0 +content 0 +imag 0 +retriev 0 +interior 0 +educ 0 +program 0 +patternsprofession 0 +distribut 0 +divis 0 +stratu 0 +lead 0 +develop 0 +product 0 +combin 0 +acorba 0 +compliant 0 +request 0 +broker 0 +iona 0 +technolog 0 +releas 0 +person 0 +favorit 0 +alpin 0 +ski 0 +golf 0 +plai 0 +softbal 0 +card 0 +collectingi 0 +reach 0 +comeduc 0 +sheet 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..ce264091 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,62 @@ +comput 1 +system 0 +project 0 +seena 0 +scienc 0 +engin 0 +oper 0 +graphic 0 +univers 0 +cornel 0 +ithaca 0 +cherangara 0 +cherangaramast 0 +engineeringclass 0 +dept 0 +sciencecornel 0 +welcom 0 +homepagecurr 0 +student 0 +depart 0 +tech 0 +degre 0 +colleg 0 +trivandrum 0 +kerala 0 +india 0 +inform 0 +cours 0 +taken 0 +cornelluniversityfal 0 +practicum 0 +specif 0 +hoca 0 +softwar 0 +multimedia 0 +post 0 +processingalgorithm 0 +jpeg 0 +artifact 0 +reduct 0 +spring 0 +cspracticum 0 +anim 0 +magic 0 +carpet 0 +distribut 0 +colloqium 0 +summer 0 +model 0 +java 0 +parametr 0 +equat 0 +viewer 0 +click 0 +postscript 0 +version 0 +myresum 0 +mapl 0 +york 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..78a3516b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,238 @@ +imag 1 +rosen 0 +sharma 0 +video 0 +us 0 +like 0 +languag 0 +name 0 +internet 0 +file 0 +sharmila 0 +stanford 0 +multicast 0 +protocol 0 +develop 0 +system 0 +script 0 +widget 0 +call 0 +mean 0 +cornel 0 +student 0 +stream 0 +media 0 +part 0 +link 0 +softwar 0 +class 0 +also 0 +text 0 +vxtreme 0 +applic 0 +paper 0 +oper 0 +support 0 +nativ 0 +mode 0 +releas 0 +written 0 +indian 0 +stuff 0 +cool 0 +manipul 0 +data 0 +make 0 +implement 0 +give 0 +imagefram 0 +interpret 0 +shell 0 +gener 0 +record 0 +postscript 0 +write 0 +peopl 0 +claim 0 +hors 0 +offic 0 +upson 0 +hall 0 +email 0 +came 0 +spent 0 +coupl 0 +year 0 +work 0 +research 0 +live 0 +audio 0 +modifiedigmp 0 +unicast 0 +layer 0 +virtual 0 +classroom 0 +initi 0 +prototyp 0 +deploi 0 +spring 0 +fall 0 +quarter 0 +altern 0 +instruct 0 +televis 0 +network 0 +sitn 0 +program 0 +microwav 0 +remot 0 +site 0 +asynchron 0 +access 0 +lectur 0 +note 0 +stumbl 0 +upon 0 +fact 0 +slide 0 +portion 0 +enhanc 0 +greatli 0 +leadto 0 +format 0 +compani 0 +palo 0 +alto 0 +silicon 0 +vallei 0 +start 0 +client 0 +sever 0 +multimedia 0 +deliveri 0 +signal 0 +keshav 0 +sigcomm 0 +segment 0 +mix 0 +navin 0 +chaddha 0 +avneesh 0 +agarw 0 +anoop 0 +gupta 0 +asilomar 0 +igmp 0 +group 0 +membership 0 +design 0 +steve 0 +deer 0 +internetdraft 0 +bill 0 +fenner 0 +optic 0 +charact 0 +recognit 0 +statist 0 +structur 0 +method 0 +niten 0 +malhan 0 +bachelor 0 +thesi 0 +dept 0 +comput 0 +scienc 0 +institut 0 +technolog 0 +delhiunpublish 0 +character 0 +variabl 0 +rate 0 +sourc 0 +term 0 +preform 0 +conferenc 0 +intern 0 +report 0 +first 0 +type 0 +allow 0 +sequenc 0 +blur 0 +speckl 0 +transform 0 +affin 0 +subband 0 +motion 0 +estmat 0 +fast 0 +effici 0 +writen 0 +current 0 +test 0 +machin 0 +displai 0 +flavour 0 +look 0 +width 0 +height 0 +ifram 0 +nodisplai 0 +filenam 0 +putimageincanva 0 +predecessor 0 +hate 0 +motif 0 +yacc 0 +limit 0 +given 0 +compil 0 +dummi 0 +event 0 +snooper 0 +player 0 +consid 0 +coolest 0 +thing 0 +ever 0 +someth 0 +similar 0 +microsoft 0 +window 0 +doesnt 0 +replai 0 +lot 0 +kludg 0 +fool 0 +server 0 +fractal 0 +creat 0 +directori 0 +hole 0 +viewer 0 +fix 0 +dissalow 0 +semant 0 +question 0 +often 0 +ask 0 +gaveth 0 +chines 0 +friend 0 +wonder 0 +small 0 +smart 0 +hindi 0 +tongu 0 +sharm 0 +shyness 0 +actress 0 +tagor 0 +nicknam 0 +frozen 0 +stupid 0 +ealri 0 +jewish 0 +leader 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..afdd122d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,114 @@ +page 1 +comput 0 +eric 0 +home 0 +univers 0 +cornel 0 +korean 0 +project 0 +music 0 +friend 0 +shim 0 +view 0 +version 0 +scienc 0 +meng 0 +acoust 0 +movi 0 +jazz 0 +kwan 0 +pagewelcom 0 +young 0 +sang 0 +shimmast 0 +engin 0 +sciencecornel 0 +address 0 +dryden 0 +citi 0 +ithaca 0 +phone 0 +click 0 +onthi 0 +receiv 0 +degre 0 +california 0 +irvinestudi 0 +inform 0 +system 0 +camera 0 +transform 0 +abstract 0 +final 0 +graphic 0 +classi 0 +love 0 +plai 0 +follow 0 +instrument 0 +guitar 0 +piano 0 +keyboard 0 +listen 0 +stan 0 +getz 0 +antonio 0 +carlo 0 +jobim 0 +john 0 +coltran 0 +mile 0 +davi 0 +earl 0 +klugh 0 +metheni 0 +archemi 0 +chopin 0 +watch 0 +cinema 0 +paradiso 0 +french 0 +kiss 0 +miser 0 +miss 0 +saigon 0 +favorit 0 +korea 0 +newswant 0 +know 0 +graduat 0 +student 0 +associ 0 +anybodi 0 +like 0 +check 0 +interest 0 +java 0 +cyberspac 0 +hana 0 +work 0 +melco 0 +last 0 +time 0 +went 0 +world 0 +jung 0 +hwan 0 +middl 0 +school 0 +back 0 +victor 0 +hong 0 +jiyang 0 +kang 0 +homepag 0 +access 0 +timessinc 0 +still 0 +construct 0 +resum 0 +avail 0 +near 0 +futur 0 +also 0 +soon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..f72ad866 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,169 @@ +text 1 +amit 0 +singhal 0 +gerard 0 +salton 0 +chri 0 +bucklei 0 +retriev 0 +automat 0 +smart 0 +us 0 +paper 0 +trec 0 +structur 0 +research 0 +inform 0 +group 0 +document 0 +cornel 0 +normal 0 +system 0 +confer 0 +mandar 0 +jame 0 +allan 0 +student 0 +home 0 +scienc 0 +process 0 +thesi 0 +prof 0 +gerardsalton 0 +current 0 +field 0 +length 0 +size 0 +lengthnorm 0 +propos 0 +pivot 0 +techniqu 0 +nist 0 +provid 0 +select 0 +mandarmitra 0 +decomposit 0 +mitra 0 +appear 0 +theme 0 +gener 0 +analysi 0 +pageamit 0 +singhaldepart 0 +comput 0 +universitysingh 0 +eduphon 0 +interest 0 +area 0 +andtext 0 +advisor 0 +late 0 +supervisor 0 +clairecardieher 0 +postscript 0 +copi 0 +resum 0 +depart 0 +beenon 0 +foremost 0 +informationretriev 0 +last 0 +thirti 0 +year 0 +involv 0 +fairli 0 +vari 0 +commonli 0 +term 0 +weight 0 +show 0 +thateffect 0 +chancessimilar 0 +likelihood 0 +relev 0 +modifi 0 +exist 0 +normalizationfunct 0 +yield 0 +substanti 0 +improv 0 +retrievaleffect 0 +also 0 +effect 0 +normalizationtechniqu 0 +trecparticipationtext 0 +arpa 0 +sponsoredeffort 0 +object 0 +evalu 0 +variou 0 +retrievaltechniqu 0 +independ 0 +testb 0 +hasconsist 0 +best 0 +somepap 0 +summarizationnon 0 +expositori 0 +usual 0 +read 0 +cover 0 +tocov 0 +reader 0 +help 0 +circumst 0 +selectiveaccess 0 +excerpt 0 +need 0 +develop 0 +toanalyz 0 +tool 0 +texttravers 0 +papersnorm 0 +documentlength 0 +mitraand 0 +degrad 0 +collect 0 +come 0 +soon 0 +approach 0 +usingsmart 0 +queri 0 +expans 0 +proceedingsof 0 +third 0 +special 0 +public 0 +segment 0 +textthem 0 +hypertext 0 +andmanag 0 +brows 0 +vectorspac 0 +model 0 +proceed 0 +ofth 0 +dual 0 +technolog 0 +applic 0 +travers 0 +summar 0 +machineread 0 +amitsingh 0 +june 0 +groupmemb 0 +senior 0 +associ 0 +david 0 +master 0 +engin 0 +other 0 +slowli 0 +fluctuat 0 +thank 0 +visit 0 +page 0 +visitor 0 +sinc 0 +iinstal 0 +counter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..93c64211 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,153 @@ +name 1 +cornel 0 +version 0 +work 0 +home 0 +build 0 +keshav 0 +skeshav 0 +depart 0 +comput 0 +scienc 0 +univers 0 +ithaca 0 +sinc 0 +network 0 +idlinet 0 +base 0 +avail 0 +nativ 0 +mode 0 +also 0 +site 0 +paper 0 +real 0 +simul 0 +peopl 0 +last 0 +keshavemail 0 +upson 0 +hall 0 +christoph 0 +lane 0 +edui 0 +current 0 +associ 0 +professor 0 +spentfiv 0 +year 0 +xunet 0 +wide 0 +area 0 +built 0 +scratch 0 +router 0 +switch 0 +softwar 0 +oper 0 +incollabor 0 +delhi 0 +equip 0 +fore 0 +systemsand 0 +zeitnet 0 +idlinetsourc 0 +code 0 +public 0 +domain 0 +page 0 +featur 0 +protocol 0 +stack 0 +applicationget 0 +write 0 +directli 0 +virtual 0 +circuit 0 +support 0 +independ 0 +signal 0 +span 0 +compliant 0 +final 0 +goof 0 +talk 0 +head 0 +facial 0 +anim 0 +snoop 0 +send 0 +video 0 +format 0 +mbone 0 +canb 0 +driven 0 +remot 0 +internet 0 +linkspapersher 0 +linkto 0 +postscript 0 +reali 0 +packet 0 +level 0 +still 0 +maintain 0 +instal 0 +idea 0 +mani 0 +actual 0 +link 0 +latest 0 +releas 0 +fall 0 +includ 0 +beout 0 +goe 0 +well 0 +native_mod 0 +pagemi 0 +namein 0 +part 0 +world 0 +come 0 +south 0 +india 0 +thanjavur 0 +district 0 +beprecis 0 +prefix 0 +father 0 +sonli 0 +sometim 0 +villag 0 +surnam 0 +thu 0 +myfath 0 +srinivasan 0 +unfortun 0 +round 0 +intoth 0 +squar 0 +hole 0 +custom 0 +first 0 +beconfus 0 +quotabl 0 +quot 0 +ought 0 +everi 0 +least 0 +hear 0 +littl 0 +song 0 +read 0 +good 0 +poem 0 +possibl 0 +speak 0 +reason 0 +word 0 +johann 0 +wolfgang 0 +goeth 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..27a69d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,23 @@ +cornel 1 +kenneth 0 +page 0 +engin 0 +depart 0 +home 0 +road 0 +success 0 +alwai 0 +construct 0 +meng 0 +electr 0 +sinc 0 +work 0 +prof 0 +zabih 0 +place 0 +student 0 +came 0 +univers 0 +wisconsin 0 +madison 0 +sunlab 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..3fcce922 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,329 @@ +seem 1 +clear 0 +much 0 +sound 0 +start 0 +quit 0 +also 0 +like 0 +us 0 +work 0 +pictur 0 +around 0 +school 0 +well 0 +look 0 +talk 0 +wear 0 +ring 0 +visit 0 +friend 0 +last 0 +page 0 +come 0 +tree 0 +especi 0 +figur 0 +snow 0 +insid 0 +leav 0 +label 0 +sai 0 +small 0 +presum 0 +place 0 +glass 0 +book 0 +vagu 0 +topic 0 +read 0 +probabl 0 +somewhat 0 +hair 0 +link 0 +next 0 +obviou 0 +heha 0 +beaver 0 +finger 0 +left 0 +someon 0 +peopl 0 +time 0 +internet 0 +half 0 +year 0 +summer 0 +busi 0 +better 0 +perri 0 +world 0 +cornel 0 +step 0 +forest 0 +clearinglook 0 +realiz 0 +must 0 +walk 0 +thanyou 0 +plan 0 +wide 0 +varieti 0 +surround 0 +onal 0 +side 0 +theweath 0 +fairli 0 +overcast 0 +somehow 0 +ifit 0 +go 0 +rain 0 +perhap 0 +distanc 0 +larg 0 +mountain 0 +quiteclear 0 +question 0 +hear 0 +bird 0 +chirp 0 +near 0 +cours 0 +theymai 0 +respond 0 +louder 0 +nearbywaterfal 0 +gotta 0 +could 0 +follow 0 +want 0 +apath 0 +direct 0 +path 0 +asign 0 +hillschool 0 +sign 0 +hell 0 +wormhol 0 +connect 0 +nearbyhous 0 +traffic 0 +clearinginsid 0 +coupl 0 +structur 0 +shack 0 +door 0 +fall 0 +offand 0 +complet 0 +modern 0 +hous 0 +withno 0 +stone 0 +front 0 +pile 0 +score 0 +magazin 0 +random 0 +paper 0 +scatter 0 +throughout 0 +theclear 0 +rhyme 0 +reasonto 0 +glanc 0 +sortsof 0 +recent 0 +betteridea 0 +make 0 +person 0 +tick 0 +resum 0 +itseem 0 +corner 0 +importantth 0 +worri 0 +kind 0 +strang 0 +clearingh 0 +smile 0 +hello 0 +oftendescrib 0 +thin 0 +mother 0 +mostdistinct 0 +featur 0 +bright 0 +golden 0 +quitelong 0 +elfin 0 +seen 0 +peoplebefor 0 +warn 0 +paragraph 0 +written 0 +theresoon 0 +alwai 0 +thing 0 +startstel 0 +stori 0 +stop 0 +hum 0 +tune 0 +tell 0 +whynichola 0 +negropont 0 +moron 0 +ifyou 0 +never 0 +heard 0 +polit 0 +late 0 +twentieth 0 +centuri 0 +america 0 +thenh 0 +paus 0 +obscur 0 +theorem 0 +theoret 0 +comput 0 +scienc 0 +rather 0 +listen 0 +hetend 0 +appearanceinstead 0 +mostli 0 +color 0 +purpl 0 +dark 0 +turquois 0 +everyth 0 +either 0 +silk 0 +linen 0 +contrast 0 +nice 0 +gold 0 +imageof 0 +right 0 +point 0 +awai 0 +fromhim 0 +silver 0 +ocean 0 +wave 0 +pattern 0 +pewter 0 +pentacl 0 +neck 0 +andlook 0 +altogeth 0 +hippi 0 +asclass 0 +intellectu 0 +clearingdan 0 +occasion 0 +spent 0 +briani 0 +live 0 +anundergrad 0 +nowadai 0 +pointcast 0 +newsprovid 0 +ancamosoiu 0 +pronounc 0 +schwa 0 +best 0 +backwhen 0 +mani 0 +week 0 +twoand 0 +went 0 +europ 0 +togeth 0 +wegradu 0 +inner 0 +child 0 +shejust 0 +onewav 0 +blame 0 +dread 0 +name 0 +actual 0 +usedto 0 +object 0 +power 0 +daniel 0 +issomeon 0 +gotten 0 +know 0 +severalmonth 0 +think 0 +sometim 0 +novemb 0 +becamemuch 0 +switch 0 +eedepart 0 +wise 0 +multimediastud 0 +dept 0 +commun 0 +reason 0 +inth 0 +own 0 +friendof 0 +finlei 0 +notanymor 0 +instead 0 +brian 0 +steelcas 0 +isth 0 +largest 0 +manufactur 0 +offic 0 +furnitur 0 +dserver 0 +kinda 0 +cheesi 0 +haveth 0 +pyramid 0 +still 0 +neat 0 +thebuild 0 +ius 0 +sing 0 +japan 0 +programcal 0 +hire 0 +teach 0 +english 0 +high 0 +student 0 +ideal 0 +winter 0 +hani 0 +graduatedfrom 0 +june 0 +couldn 0 +happen 0 +neededto 0 +great 0 +even 0 +nick 0 +agood 0 +cuter 0 +thanth 0 +blurri 0 +would 0 +indic 0 +music 0 +maker 0 +dreamer 0 +ofdream 0 +aphex 0 +twindan 0 +brown 0 +snowman 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..39b6ac77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,25 @@ +lookin 1 +home 1 +page 1 +autobiographi 0 +upkeep 0 +lot 0 +inform 0 +ultra 0 +cool 0 +soon 0 +keep 0 +take 0 +long 0 +setuup 0 +doingajaymanishanujmom 0 +daddepart 0 +comput 0 +sciencesearch 0 +netentertain 0 +weeklycricket 0 +rate 0 +ashish 0 +soni 0 +sonia 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..c63be7dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,11 @@ +scott 1 +dawson 1 +padif 1 +us 1 +form 1 +capabl 1 +browser 1 +would 1 +better 1 +scottdawson 1 +shomebas 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..3429ba77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,25 @@ +cornel 1 +comput 1 +paul 0 +stodghil 0 +home 0 +pagepaul 0 +stodghillstodghil 0 +rhode 0 +hall 0 +affili 0 +depart 0 +scienc 0 +atcornel 0 +univers 0 +advanc 0 +research 0 +institut 0 +acri 0 +theori 0 +center 0 +bernoulli 0 +projectinterest 0 +ultim 0 +hockei 0 +scheme 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..c5ff6c31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,13 @@ +stoller 1 +home 1 +former 0 +page 0 +scott 0 +pagescott 0 +move 0 +http 0 +indiana 0 +hyplan 0 +htmllast 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..8468a373 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,49 @@ +sugata 1 +cornel 1 +work 1 +mukhopadhyai 0 +home 0 +page 0 +system 0 +take 0 +high 0 +perform 0 +phone 0 +welcom 0 +graduat 0 +student 0 +depart 0 +ofcomput 0 +scienc 0 +univers 0 +multimedia 0 +prof 0 +brian 0 +smith 0 +marri 0 +wonder 0 +person 0 +earth 0 +ritu 0 +spring 0 +comput 0 +compil 0 +design 0 +architectur 0 +also 0 +advanc 0 +option 0 +price 0 +theori 0 +czar 0 +progress 0 +seminar 0 +previou 0 +semest 0 +contact 0 +mehom 0 +mailsugata 0 +eduaddress 0 +hichori 0 +estat 0 +owego 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..26dcbd66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,28 @@ +sukhpal 1 +paul 0 +sanghera 0 +univers 0 +cornel 0 +home 0 +page 0 +physic 0 +carleton 0 +present 0 +student 0 +comput 0 +scienc 0 +ithaca 0 +background 0 +project 0 +philosophi 0 +life 0 +resum 0 +rout 0 +clock 0 +tick 0 +need 0 +java 0 +capabl 0 +browser 0 +view 0 +anim 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..f91f4bd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,132 @@ +gater 1 +episod 0 +lord 0 +strip 0 +employe 0 +first 0 +microsoft 0 +parti 0 +part 0 +bilth 0 +galact 0 +weekli 0 +drew 0 +read 0 +work 0 +empir 0 +empirewritten 0 +illustr 0 +sumedh 0 +kanetkaremail 0 +kanetkar 0 +cornel 0 +eduthi 0 +seri 0 +comic 0 +intern 0 +atmicrosoft 0 +summer 0 +post 0 +theintern 0 +social 0 +alia 0 +regularli 0 +peopl 0 +thesumm 0 +progress 0 +notic 0 +artwork 0 +begun 0 +leak 0 +theful 0 +time 0 +well 0 +whether 0 +high 0 +never 0 +found 0 +stripi 0 +within 0 +week 0 +arriv 0 +redmond 0 +tri 0 +persuad 0 +themicrosoft 0 +newslett 0 +print 0 +perceiv 0 +problemand 0 +declin 0 +didn 0 +want 0 +portrai 0 +evilempir 0 +understand 0 +viewpoint 0 +told 0 +comicstrip 0 +attempt 0 +show 0 +compani 0 +view 0 +mani 0 +theoutsid 0 +world 0 +person 0 +bitter 0 +feel 0 +toward 0 +eitherbil 0 +gate 0 +corpor 0 +heck 0 +realli 0 +enjoi 0 +summersof 0 +strongli 0 +recommend 0 +internship 0 +program 0 +anyoneinterest 0 +industri 0 +make 0 +orient 0 +thateveri 0 +suffer 0 +long 0 +session 0 +theyshow 0 +video 0 +fill 0 +kind 0 +trivia 0 +also 0 +makey 0 +sign 0 +disclosur 0 +agreeement 0 +would 0 +fit 0 +theymad 0 +everyon 0 +stand 0 +place 0 +hand 0 +theirheart 0 +pledg 0 +alleig 0 +comput 0 +everydesk 0 +everi 0 +home 0 +run 0 +softwar 0 +anywai 0 +space 0 +roosterepisod 0 +rebel 0 +threatepisod 0 +flame 0 +imperi 0 +insigniaepisod 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..04f4d3ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,167 @@ +document 1 +structur 0 +electron 0 +logic 0 +summer 0 +cornel 0 +inform 0 +brows 0 +piec 0 +issu 0 +flexibl 0 +comput 0 +proceed 0 +kristen 0 +student 0 +univers 0 +research 0 +interest 0 +work 0 +goal 0 +support 0 +index 0 +primari 0 +technic 0 +report 0 +divid 0 +type 0 +retriev 0 +number 0 +paper 0 +us 0 +scienc 0 +autom 0 +upson 0 +hall 0 +captur 0 +accessresearch 0 +group 0 +analysi 0 +mylong 0 +term 0 +provid 0 +forsophist 0 +manipulationtool 0 +link 0 +discov 0 +logicalstructur 0 +arbitrari 0 +take 0 +documentrepresent 0 +input 0 +return 0 +hierarchyof 0 +output 0 +exampl 0 +given 0 +scan 0 +postscriptvers 0 +would 0 +like 0 +tobe 0 +abl 0 +section 0 +paragraph 0 +similarli 0 +busi 0 +letter 0 +address 0 +head 0 +bodi 0 +close 0 +identifi 0 +problem 0 +compon 0 +segment 0 +andclassif 0 +categor 0 +also 0 +rais 0 +question 0 +evalu 0 +previou 0 +differ 0 +descript 0 +correct 0 +hierarchi 0 +theoret 0 +limit 0 +task 0 +relev 0 +bruce 0 +croft 0 +stop 0 +novemb 0 +magazin 0 +interfac 0 +effici 0 +determininglog 0 +enabl 0 +hierarch 0 +soin 0 +gener 0 +system 0 +handl 0 +ofmultipl 0 +textual 0 +cue 0 +browsingco 0 +author 0 +daniela 0 +digit 0 +librari 0 +current 0 +nabil 0 +adam 0 +bharat 0 +bhargava 0 +yelena 0 +yesha 0 +editor 0 +chapter 0 +lectur 0 +note 0 +seri 0 +springer 0 +verlag 0 +version 0 +geometr 0 +algorithm 0 +experi 0 +mathemat 0 +model 0 +forthcom 0 +white 0 +space 0 +workshop 0 +principl 0 +process 0 +seeheim 0 +podp 0 +toward 0 +taxonomi 0 +structureselectron 0 +publish 0 +superhighwai 0 +dartmouth 0 +institut 0 +advanc 0 +graduat 0 +studi 0 +boston 0 +donald 0 +johnson 0 +memori 0 +dag 0 +scholaraward 0 +best 0 +recipi 0 +near 0 +wordless 0 +classif 0 +intern 0 +confer 0 +analysisand 0 +recognit 0 +montral 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..e3cf643c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,55 @@ +system 1 +project 0 +oper 0 +comput 0 +masafumi 0 +java 0 +research 0 +engin 0 +spring 0 +introduct 0 +graphic 0 +databas 0 +suzukither 0 +would 0 +applet 0 +browser 0 +suppot 0 +suzukisuzuki 0 +cornel 0 +educlassesfal 0 +case 0 +studi 0 +optim 0 +probabl 0 +statist 0 +design 0 +analysi 0 +simul 0 +stochast 0 +model 0 +summer 0 +data 0 +structur 0 +fall 0 +softwar 0 +technolog 0 +techniqu 0 +program 0 +multimedia 0 +report 0 +network 0 +telecommun 0 +polici 0 +thrive 0 +inform 0 +revolut 0 +sector 0 +site 0 +manag 0 +independ 0 +polygon 0 +displai 0 +us 0 +prototyp 0 +resum 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..603e49e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,28 @@ +swartz 1 +jonathan 0 +cornel 0 +home 0 +page 0 +edui 0 +student 0 +departmentof 0 +comput 0 +scienc 0 +univers 0 +spend 0 +time 0 +heredevelopingrivl 0 +languag 0 +multimedia 0 +process 0 +myaddress 0 +phone 0 +number 0 +littl 0 +humor 0 +brighten 0 +dayjon 0 +movi 0 +connectioncool 0 +siteslast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..5893725c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,23 @@ +sunil 1 +home 1 +page 1 +class 1 +srivastava 0 +pagewelcom 0 +srivastavamast 0 +engin 0 +studentcomput 0 +scienc 0 +departmentcornel 0 +univers 0 +academ 0 +project 0 +person 0 +inform 0 +us 0 +linkscom 0 +question 0 +send 0 +mail 0 +sxsriva 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..f8469a9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,218 @@ +year 1 +model 1 +quit 0 +cours 0 +program 0 +huang 0 +steven 0 +seven 0 +home 0 +thu 0 +love 0 +read 0 +life 0 +comput 0 +univers 0 +like 0 +career 0 +found 0 +ofcomput 0 +want 0 +mani 0 +network 0 +build 0 +watch 0 +also 0 +project 0 +involv 0 +huangszu 0 +defend 0 +truth 0 +champion 0 +justic 0 +around 0 +nice 0 +nevermind 0 +long 0 +exactli 0 +iarriv 0 +second 0 +proud 0 +parent 0 +soundslik 0 +mobi 0 +dick 0 +assur 0 +nointent 0 +find 0 +ship 0 +hunt 0 +whale 0 +digress 0 +brought 0 +taiwan 0 +tender 0 +wholefamili 0 +migrat 0 +south 0 +tropic 0 +island 0 +philippin 0 +made 0 +live 0 +fifteen 0 +pictur 0 +aroundsix 0 +becam 0 +fluentli 0 +bilingu 0 +thepoetri 0 +tang 0 +dynasti 0 +arabian 0 +night 0 +natur 0 +children 0 +version 0 +host 0 +stori 0 +somewhat 0 +fulfil 0 +name 0 +mean 0 +literatur 0 +class 0 +grade 0 +doveright 0 +marvel 0 +four 0 +later 0 +ienter 0 +philippineswith 0 +major 0 +talent 0 +draw 0 +scienc 0 +unabashedli 0 +knew 0 +good 0 +alsoin 0 +colleg 0 +whirlwind 0 +happi 0 +peac 0 +three 0 +run 0 +awoman 0 +becom 0 +import 0 +part 0 +effortlessli 0 +defeat 0 +hobbi 0 +eek 0 +ideal 0 +higher 0 +pai 0 +blunt 0 +ienrol 0 +cornel 0 +graduat 0 +reward 0 +almost 0 +everyth 0 +ever 0 +andwork 0 +lucki 0 +septemb 0 +welcom 0 +page 0 +segreg 0 +everydaygeek 0 +think 0 +interest 0 +asid 0 +fromactu 0 +write 0 +happili 0 +myspar 0 +time 0 +anyth 0 +calvin 0 +hobb 0 +unix 0 +internet 0 +relai 0 +chat 0 +gener 0 +linuxnet 0 +lego 0 +thing 0 +practic 0 +object 0 +orient 0 +design 0 +plastic 0 +weapon 0 +suspens 0 +thriller 0 +film 0 +listen 0 +music 0 +sting 0 +other 0 +sesam 0 +street 0 +discoveri 0 +channel 0 +railroad 0 +rich 0 +enough 0 +field 0 +rather 0 +broad 0 +spectrum 0 +ofinterest 0 +though 0 +studi 0 +concentr 0 +area 0 +graphic 0 +wish 0 +offend 0 +bysom 0 +blatant 0 +self 0 +promot 0 +check 0 +myresum 0 +highlyinterest 0 +linux 0 +freeli 0 +avail 0 +oper 0 +system 0 +intel 0 +compatiblecomput 0 +master 0 +engin 0 +blobbi 0 +metaballsund 0 +supervis 0 +bruce 0 +land 0 +current 0 +anoth 0 +survei 0 +techniquesin 0 +human 0 +face 0 +resolut 0 +independ 0 +andport 0 +audio 0 +effect 0 +editor 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..b00230b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,6 @@ +henzing 1 +hytechhytech 1 +hybrid 1 +technolog 1 +toolw 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..95f25bdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,55 @@ +time 1 +analysi 0 +formal 0 +methodolog 0 +real 0 +system 0 +cornel 0 +concurr 0 +embed 0 +automata 0 +hybrid 0 +henzingerthoma 0 +henzing 0 +movedassist 0 +professorcomput 0 +scienc 0 +departmentcornel 0 +universityithaca 0 +email 0 +eduphon 0 +researchform 0 +support 0 +develop 0 +relat 0 +researchat 0 +cornelland 0 +worldwid 0 +resumepublicationsreact 0 +modul 0 +systemsr 0 +logic 0 +transit 0 +systemsclock 0 +systemshybrid 0 +systemsbibliographi 0 +bibtex 0 +list 0 +publicationstoolshytech 0 +symbol 0 +model 0 +checker 0 +linear 0 +systemscoursesc 0 +fall 0 +advanc 0 +program 0 +languagesconferenceshybrid 0 +verif 0 +control 0 +systemscav 0 +comput 0 +aid 0 +verificationlast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..e7590204 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,38 @@ +takako 1 +hickei 0 +interest 0 +homepag 0 +email 0 +cornel 0 +eduoffic 0 +upson 0 +hallphon 0 +student 0 +depart 0 +comput 0 +scienc 0 +atcornel 0 +univers 0 +advis 0 +byrobbert 0 +reness 0 +andfr 0 +schneider 0 +research 0 +distribut 0 +system 0 +program 0 +environ 0 +resourc 0 +manag 0 +horu 0 +project 0 +previou 0 +life 0 +social 0 +psycholog 0 +backcountri 0 +hockei 0 +quot 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..718fac84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,23 @@ +comput 1 +cornel 1 +program 1 +tim_teitelbaum 0 +teitelbaumassoci 0 +professor 0 +depart 0 +scienc 0 +univers 0 +eduresearch 0 +interest 0 +increment 0 +transform 0 +environ 0 +languag 0 +base 0 +editor 0 +compil 0 +attribut 0 +grammar 0 +adavita 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..707fd8d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,207 @@ +eicken 1 +parallel 0 +comput 0 +culler 0 +architectur 0 +schauser 0 +activ 0 +messag 0 +program 0 +goldstein 0 +proceed 0 +commun 0 +fall 0 +symp 0 +compil 0 +proc 0 +thorsten 0 +user 0 +level 0 +network 0 +high 0 +split 0 +system 0 +talk 0 +report 0 +annual 0 +paper 0 +mechan 0 +machin 0 +fine 0 +grain 0 +languag 0 +cluster 0 +workstat 0 +us 0 +interconnect 0 +sever 0 +port 0 +departement 0 +entri 0 +forum 0 +slide 0 +basu 0 +buch 0 +appear 0 +june 0 +version 0 +gold 0 +coast 0 +australia 0 +novemb 0 +berkelei 0 +abstract 0 +diego 0 +conf 0 +control 0 +multithread 0 +support 0 +eickenassist 0 +professor 0 +upson 0 +hallphon 0 +email 0 +cornel 0 +eduprojectsth 0 +architectureprovid 0 +interfacefor 0 +offer 0 +latencyand 0 +bandwidth 0 +speed 0 +lan 0 +currentimplement 0 +project 0 +platform 0 +includingth 0 +extend 0 +model 0 +tonon 0 +spmd 0 +simpl 0 +extensionto 0 +newplatform 0 +includ 0 +share 0 +memori 0 +multprocessor 0 +run 0 +coursesc 0 +introduct 0 +digit 0 +computerorgan 0 +perform 0 +spring 0 +frontier 0 +guest 0 +lectur 0 +maynd 0 +department 0 +person 0 +pagestv 0 +pond 0 +real 0 +water 0 +fish 0 +plant 0 +tire 0 +firewal 0 +macpppwhich 0 +gener 0 +time 0 +password 0 +automat 0 +without 0 +everhav 0 +think 0 +well 0 +month 0 +passwordssuddenli 0 +installationinstruct 0 +select 0 +publicationsu 0 +interfac 0 +distributedcomput 0 +anindya 0 +vineet 0 +werner 0 +vogel 0 +latenc 0 +atmnetwork 0 +avula 0 +present 0 +palo 0 +alto 0 +abridg 0 +ieee 0 +micro 0 +magazin 0 +integr 0 +andcomput 0 +effici 0 +communicationarchitectur 0 +multiprocessor 0 +thesi 0 +univers 0 +california 0 +publish 0 +link 0 +lead 0 +postscript 0 +dusseau 0 +krishnamurthi 0 +lumetta 0 +yelick 0 +supercomput 0 +controlledthread 0 +journal 0 +distribut 0 +special 0 +issu 0 +dataflow 0 +evalu 0 +spertu 0 +dalli 0 +logp 0 +toward 0 +realist 0 +modelof 0 +karp 0 +patterson 0 +sahai 0 +santo 0 +subramonian 0 +fourth 0 +sigplan 0 +principl 0 +practic 0 +fundament 0 +limit 0 +dataflowmultiprocess 0 +ifip 0 +work 0 +techniqu 0 +medium 0 +orlando 0 +forintegr 0 +symposium 0 +forleni 0 +confer 0 +function 0 +cambridg 0 +august 0 +minimalhardwar 0 +thread 0 +wawrzynek 0 +oper 0 +santa 0 +clara 0 +april 0 +analysi 0 +architecturesfor 0 +saavedra 0 +barrera 0 +algorithm 0 +crete 0 +greec 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..87cb00f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,43 @@ +erlingsson 1 +lfar 0 +pagelfar 0 +specificationi 0 +student 0 +incomput 0 +scienc 0 +cornel 0 +univers 0 +apart 0 +enjoi 0 +somewhat 0 +incongruousiceland 0 +link 0 +inform 0 +implementationbackgroundwher 0 +come 0 +current 0 +activitieswhat 0 +moment 0 +schedulewher 0 +time 0 +researchwhat 0 +real 0 +work 0 +done 0 +interestswhat 0 +actual 0 +like 0 +acquaintancesthos 0 +know 0 +contact 0 +infohow 0 +touch 0 +pleas 0 +note 0 +page 0 +often 0 +date 0 +gener 0 +assum 0 +disclaim 0 +appli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..b5425e35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,49 @@ +gener 1 +mesh 0 +softwar 0 +packag 0 +element 0 +releas 0 +page 0 +vavasi 0 +finit 0 +geometr 0 +home 0 +univers 0 +cornel 0 +project 0 +relat 0 +threedimens 0 +includ 0 +model 0 +themesh 0 +solver 0 +free 0 +softwaredownload 0 +run 0 +unix 0 +window 0 +andqmg 0 +novemb 0 +us 0 +websit 0 +robert 0 +schneider 0 +mcphedran 0 +offinit 0 +resourc 0 +minnesota 0 +geometri 0 +center 0 +list 0 +ofsoftwar 0 +computationalgeometri 0 +jonathan 0 +shewchuk 0 +triangl 0 +back 0 +stephen 0 +comput 0 +scienc 0 +depart 0 +ithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..8e62e2ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,147 @@ +vavasi 1 +mesh 0 +code 0 +numer 0 +click 0 +gener 0 +method 0 +boundari 0 +algorithm 0 +stephen 0 +cornel 0 +email 0 +phone 0 +argonn 0 +interest 0 +analysi 0 +problem 0 +recent 0 +avail 0 +line 0 +complet 0 +mitchel 0 +ratio 0 +triangul 0 +softwar 0 +packag 0 +sourc 0 +level 0 +anonym 0 +matlab 0 +well 0 +page 0 +associ 0 +professor 0 +depart 0 +comput 0 +scienc 0 +rhode 0 +hall 0 +univers 0 +ithaca 0 +period 0 +onsabbat 0 +divis 0 +bldg 0 +nation 0 +laboratori 0 +cass 0 +note 0 +chang 0 +area 0 +effect 0 +research 0 +aren 0 +tsure 0 +pleas 0 +essaybi 0 +colleagu 0 +trefethen 0 +specif 0 +optim 0 +complex 0 +issuesnumer 0 +valu 0 +problemsgeometr 0 +aris 0 +scientif 0 +computingspars 0 +matrix 0 +computationsi 0 +manuscript 0 +primal 0 +dual 0 +acceler 0 +interiorpoint 0 +whose 0 +run 0 +time 0 +depend 0 +hough 0 +orthogon 0 +decompositionfor 0 +weight 0 +least 0 +squar 0 +aspect 0 +bound 0 +gridcut 0 +hyperplan 0 +driscol 0 +conform 0 +map 0 +us 0 +cross 0 +delaunai 0 +packagei 0 +project 0 +forth 0 +finit 0 +element 0 +three 0 +dimens 0 +call 0 +construct 0 +polyhedr 0 +geometr 0 +object 0 +verycompl 0 +topolog 0 +hole 0 +intern 0 +andautomat 0 +creat 0 +unstructuredtetrahedr 0 +base 0 +work 0 +scott 0 +also 0 +solv 0 +ellipt 0 +boundaryvalu 0 +grad 0 +domain 0 +iswritten 0 +distributedfor 0 +free 0 +distributionbegan 0 +releas 0 +novemb 0 +featur 0 +manyimprov 0 +includ 0 +faster 0 +vrml 0 +graphic 0 +much 0 +cleaner 0 +compat 0 +microsoft 0 +window 0 +unix 0 +compatibilitywith 0 +pleasese 0 +document 0 +annual 0 +reportback 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..befeca06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,10 @@ +arun 1 +verma 1 +homepag 1 +need 1 +browser 1 +support 1 +frame 1 +netscap 1 +higher 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..eb3d6123 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,10 @@ +vinc 1 +browser 1 +us 1 +suck 1 +download 1 +date 1 +netscap 1 +read 1 +page 1 +thank 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..faa7f18c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,15 @@ +vitrano 1 +pagec 0 +home 0 +pagehei 0 +start 0 +thing 0 +give 0 +break 0 +internet 0 +engin 0 +advanc 0 +databas 0 +multimedia 0 +pageer 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..8f1ff8ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,101 @@ +seed 1 +page 0 +time 0 +browser 0 +home 0 +us 0 +java 0 +kolla 0 +specif 0 +scrollit 0 +reach 0 +view 0 +wonder 0 +dont 0 +know 0 +anim 0 +ground 0 +copyright 0 +right 0 +reserv 0 +held 0 +respons 0 +unwant 0 +effect 0 +usag 0 +applet 0 +deriv 0 +warrante 0 +usabl 0 +applic 0 +given 0 +impli 0 +function 0 +vivek 0 +million 0 +zillion 0 +call 0 +send 0 +mail 0 +cornel 0 +expect 0 +back 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +visitor 0 +number 0 +happi 0 +contact 0 +info 0 +transmit 0 +thoughtsfriend 0 +foeslinksa 0 +small 0 +collect 0 +relev 0 +life 0 +maintain 0 +larg 0 +list 0 +favorit 0 +link 0 +think 0 +wast 0 +search 0 +someth 0 +might 0 +tryalta 0 +vista 0 +yahoo 0 +theinktomiresumein 0 +htmlin 0 +postscriptin 0 +word 0 +perfectin 0 +asciith 0 +current 0 +good 0 +clock 0 +wanna 0 +around 0 +world 0 +need 0 +capabl 0 +site 0 +construct 0 +mani 0 +imag 0 +heavi 0 +file 0 +like 0 +note 0 +promis 0 +made 0 +regard 0 +qualiti 0 +visit 0 +fulli 0 +support 0 +technolog 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..ce0143cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,81 @@ +work 1 +lawyer 1 +professor 1 +difficulti 0 +like 0 +said 0 +friend 0 +find 0 +everi 0 +number 0 +vlad 0 +home 0 +pagevladimir 0 +kotlyarvladimir 0 +cornel 0 +look 0 +fall 0 +david 0 +wereteach 0 +might 0 +guess 0 +graduat 0 +student 0 +depart 0 +comput 0 +scienc 0 +cornellunivers 0 +prof 0 +keshav 0 +pingali 0 +research 0 +interest 0 +compil 0 +high 0 +perform 0 +architectur 0 +particular 0 +parallel 0 +spars 0 +matrix 0 +code 0 +part 0 +bernoulli 0 +project 0 +member 0 +group 0 +paul 0 +stodghil 0 +andindu 0 +kodukulapubl 0 +henri 0 +kissing 0 +legal 0 +profess 0 +remind 0 +comment 0 +abritish 0 +judg 0 +differ 0 +sveri 0 +simpl 0 +lord 0 +den 0 +function 0 +asolut 0 +present 0 +wherea 0 +functionof 0 +solut 0 +todayth 0 +seem 0 +outpac 0 +ofsolut 0 +either 0 +hardenough 0 +mani 0 +govern 0 +support 0 +privaci 0 +strong 0 +encrypt 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..702bdde2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,17 @@ +cornel 1 +address 1 +ithaca 1 +vijai 0 +menonvijai 0 +menon 0 +graduat 0 +student 0 +offic 0 +home 0 +rhode 0 +hall 0 +mapl 0 +univers 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..2a9a7b20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,49 @@ +depart 1 +chen 0 +comput 0 +cornel 0 +univers 0 +interest 0 +spare 0 +time 0 +home 0 +pagewei 0 +upson 0 +hall 0 +sciencecornel 0 +universityithaca 0 +weichen 0 +current 0 +third 0 +year 0 +student 0 +computersci 0 +receiv 0 +bachelorand 0 +master 0 +degre 0 +scienc 0 +tsinghua 0 +beij 0 +chinami 0 +academ 0 +distributedsystem 0 +fault 0 +toler 0 +algorithm 0 +work 0 +professor 0 +toueg 0 +failur 0 +detect 0 +group 0 +membership 0 +inpartition 0 +network 0 +system 0 +soccer 0 +resum 0 +bookmark 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..821f647b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,21 @@ +page 1 +weitsang 0 +homepag 0 +lwhere 0 +fromwhat 0 +watchwhat 0 +movi 0 +likec 0 +wrotepictur 0 +drawa 0 +window 0 +motifcomput 0 +theoryhom 0 +vimi 0 +tsearch 0 +webcoolest 0 +sitessharewar 0 +archivem 0 +newspap 0 +onlineunivers 0 +site 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..66a0dc4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,82 @@ +comput 1 +scienc 0 +hung 0 +glavin 0 +graduat 0 +univers 0 +cornel 0 +tenni 0 +favorit 0 +system 0 +multimedia 0 +project 0 +address 0 +mapl 0 +avenu 0 +ithaca 0 +telephon 0 +photo 0 +academ 0 +background 0 +nation 0 +taiwan 0 +plan 0 +habit 0 +sport 0 +basketbal 0 +billiard 0 +tabl 0 +bowl 0 +swim 0 +volleybal 0 +other 0 +sing 0 +drive 0 +danc 0 +except 0 +studi 0 +team 0 +orlando 0 +magic 0 +atlanta 0 +brave 0 +player 0 +anferne 0 +hardawai 0 +technic 0 +skill 0 +understand 0 +distribut 0 +oper 0 +graphic 0 +network 0 +databas 0 +vision 0 +financi 0 +calcul 0 +extens 0 +window 0 +java 0 +program 0 +final 0 +paper 0 +warp 0 +morph 0 +rivl 0 +partial 0 +result 0 +meng 0 +webpaint 0 +job 0 +interest 0 +market 0 +relat 0 +field 0 +softwar 0 +develop 0 +resum 0 +page 0 +still 0 +construct 0 +email 0 +whkao 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..234a6ade --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,150 @@ +comput 1 +research 0 +system 0 +florida 0 +school 0 +optim 0 +parallel 0 +mpeg 0 +encod 0 +concerto 0 +william 0 +cornel 0 +univers 0 +project 0 +engin 0 +cornellopoli 0 +network 0 +databas 0 +tenni 0 +south 0 +spring 0 +collect 0 +piano 0 +probabl 0 +classesc 0 +softwar 0 +distribut 0 +visitor 0 +jersei 0 +exit 0 +minut 0 +awai 0 +princeton 0 +master 0 +student 0 +scienceat 0 +degre 0 +engineeringand 0 +mathemat 0 +sciencefrom 0 +carneig 0 +mellon 0 +didresearch 0 +design 0 +center 0 +robot 0 +institu 0 +spent 0 +year 0 +write 0 +oper 0 +xsro 0 +hpux 0 +motorola 0 +atft 0 +lauderdal 0 +besid 0 +sleep 0 +work 0 +like 0 +sector 0 +analysi 0 +partner 0 +compet 0 +usta 0 +tournment 0 +could 0 +never 0 +somehow 0 +parti 0 +enjoi 0 +weekli 0 +match 0 +mani 0 +beauti 0 +place 0 +plai 0 +faviorit 0 +on 0 +boca 0 +ratonkei 0 +west 0 +get 0 +coral 0 +live 0 +beethoven 0 +chopin 0 +gershwin 0 +liszt 0 +mendelssohn 0 +mozart 0 +rachmaninoff 0 +ravel 0 +tchaikovski 0 +also 0 +violinconcerto 0 +guess 0 +type 0 +even 0 +wrote 0 +graduat 0 +applic 0 +essai 0 +base 0 +reason 0 +reject 0 +fall 0 +technolog 0 +techniquec 0 +formal 0 +methodsc 0 +multimedia 0 +systemsc 0 +scienc 0 +colloquiumc 0 +cool 0 +tool 0 +seminar 0 +purifi 0 +quantifi 0 +wart 0 +present 0 +practic 0 +computingc 0 +practicum 0 +high 0 +perform 0 +computerc 0 +thrive 0 +inform 0 +revolut 0 +sectorcool 0 +links_leap 0 +copi 0 +frogski 0 +serverident 0 +crisi 0 +testweath 0 +undergroundinktomi 0 +search 0 +enginequest 0 +week 0 +archiveslast 0 +updat 0 +campu 0 +address 0 +mapl 0 +ecithaca 0 +york 0 +wwlee 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..a9537597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,54 @@ +comput 1 +xichun 0 +zhejiang 0 +welcom 0 +jennif 0 +home 0 +depart 0 +current 0 +master 0 +scienc 0 +univers 0 +shade 0 +cours 0 +network 0 +page 0 +upson 0 +hall 0 +sciencecornel 0 +universityithaca 0 +offic 0 +cornel 0 +edui 0 +engin 0 +student 0 +atcornel 0 +receiv 0 +bachelor 0 +degre 0 +hangzhou 0 +china 0 +site 0 +javaworldsunhigh 0 +school 0 +alumni 0 +alumnimeng 0 +project 0 +phong 0 +gouraud 0 +spring 0 +graphic 0 +high 0 +capac 0 +inform 0 +databas 0 +manag 0 +taken 0 +fall 0 +multimedia 0 +systemsc 0 +softwar 0 +engineeringc 0 +oper 0 +systeme 0 +communicationby 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..cdbbcf17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,2 @@ +topic 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..a879eb3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,80 @@ +program 1 +increment 0 +transform 0 +comput 0 +base 0 +cachet 0 +effici 0 +improv 0 +analysi 0 +teitelbaum 0 +proceed 0 +systemat 0 +attribut 0 +interact 0 +system 0 +languag 0 +deriv 0 +sigplan 0 +symposium 0 +principl 0 +anni 0 +relat 0 +project 0 +computationderiv 0 +programsa 0 +gener 0 +approach 0 +themeprogram 0 +usessystemat 0 +techniqu 0 +deriveincrement 0 +written 0 +function 0 +select 0 +public 0 +scienc 0 +februari 0 +cach 0 +intermedi 0 +result 0 +partial 0 +evalu 0 +semant 0 +manipul 0 +page 0 +jolla 0 +california 0 +june 0 +stoller 0 +discov 0 +auxiliari 0 +inform 0 +annual 0 +sigact 0 +petersburg 0 +beach 0 +florida 0 +januari 0 +knowledg 0 +softwar 0 +engin 0 +confer 0 +boston 0 +massachusett 0 +novemb 0 +ieee 0 +societi 0 +press 0 +strength 0 +reduct 0 +juli 0 +peoplei 0 +liutim 0 +teitelbaumkeyword 0 +optim 0 +cacheti 0 +yanhong 0 +cornel 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..8aea26c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,191 @@ +program 1 +comput 0 +increment 0 +deriv 0 +scienc 0 +univers 0 +report 0 +teitelbaum 0 +cornel 0 +confer 0 +systemat 0 +base 0 +york 0 +technic 0 +proceed 0 +system 0 +intern 0 +transform 0 +softwar 0 +beij 0 +juli 0 +depart 0 +research 0 +improv 0 +effici 0 +ithaca 0 +symposium 0 +cach 0 +intermedi 0 +result 0 +young 0 +china 0 +august 0 +techniqu 0 +select 0 +page 0 +scientist 0 +webster 0 +yanhong 0 +home 0 +gener 0 +approach 0 +languag 0 +algorithm 0 +develop 0 +public 0 +semant 0 +januari 0 +sigplan 0 +principl 0 +cachet 0 +knowledg 0 +novemb 0 +press 0 +evalu 0 +california 0 +zhang 0 +wang 0 +combin 0 +center 0 +anni 0 +associ 0 +analysi 0 +optim 0 +octob 0 +discov 0 +auxiliari 0 +inform 0 +annual 0 +sigact 0 +petersburg 0 +beach 0 +florida 0 +interact 0 +attribut 0 +engin 0 +boston 0 +massachusett 0 +partial 0 +manipul 0 +jolla 0 +june 0 +reason 0 +qualit 0 +quantit 0 +multi 0 +factor 0 +problem 0 +march 0 +xerox 0 +institut 0 +dagstuhl 0 +expert 0 +offic 0 +indiana 0 +pageyanhong 0 +post 0 +doctor 0 +work 0 +professor 0 +interest 0 +ofcomput 0 +forincrement 0 +parallel 0 +concurr 0 +applic 0 +compil 0 +interactivesystem 0 +design 0 +systemorgan 0 +mainten 0 +talksph 0 +dissert 0 +basedsystemat 0 +also 0 +appear 0 +abstractjourn 0 +februari 0 +refere 0 +stoller 0 +ieee 0 +societi 0 +peke 0 +tsinghua 0 +formal 0 +uncertainti 0 +model 0 +partit 0 +descript 0 +fuzzi 0 +world 0 +congress 0 +seattl 0 +washington 0 +inexact 0 +profession 0 +publish 0 +hous 0 +survei 0 +map 0 +septemb 0 +revis 0 +wakayama 0 +line 0 +break 0 +talk 0 +kestrel 0 +palo 0 +alto 0 +seminar 0 +dynam 0 +schloss 0 +germani 0 +automat 0 +laboratori 0 +document 0 +us 0 +obtain 0 +present 0 +oggeb 0 +basin 0 +implement 0 +test 0 +usag 0 +manual 0 +summari 0 +explor 0 +ri 0 +tshinghua 0 +author 0 +song 0 +huang 0 +current 0 +project 0 +compos 0 +build 0 +compon 0 +upson 0 +hallcornel 0 +universityithaca 0 +last 0 +updat 0 +move 0 +tocomput 0 +department 0 +lindlei 0 +hallindiana 0 +universitybloomington 0 +eduhttp 0 +peopl 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..540f53b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,24 @@ +cornel 1 +cheng 0 +huang 0 +depart 0 +comput 0 +scienc 0 +univers 0 +ychuang 0 +home 0 +page 0 +huangyi 0 +upson 0 +hall 0 +ithaca 0 +email 0 +edui 0 +graduat 0 +student 0 +favorit 0 +link 0 +onlin 0 +documentscoursesprojectaccess 0 +byvisitorslast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..97343ebd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,120 @@ +school 1 +like 0 +comput 0 +korea 0 +high 0 +music 0 +cornel 0 +came 0 +java 0 +ilbo 0 +april 0 +studi 0 +scienc 0 +master 0 +degre 0 +engin 0 +america 0 +myoung 0 +junior 0 +virginia 0 +husband 0 +chang 0 +work 0 +would 0 +plai 0 +keyboard 0 +piano 0 +korean 0 +us 0 +search 0 +resum 0 +email 0 +chung 0 +home 0 +pagewelcom 0 +everybodi 0 +name 0 +chungyou 0 +thvisitor 0 +sinc 0 +univers 0 +pleas 0 +check 0 +sciencecornel 0 +universitywher 0 +origin 0 +seoul 0 +graduat 0 +went 0 +kindergarten 0 +elementari 0 +sang 0 +kang 0 +sung 0 +women 0 +georg 0 +mason 0 +universityin 0 +happiest 0 +thing 0 +life 0 +marri 0 +wonder 0 +forsaic 0 +programm 0 +pictur 0 +beauti 0 +moment 0 +avail 0 +browser 0 +click 0 +free 0 +actual 0 +better 0 +listen 0 +kind 0 +love 0 +shin 0 +seung 0 +hoon 0 +moon 0 +classic 0 +forth 0 +brows 0 +world 0 +wide 0 +link 0 +interest 0 +want 0 +onlin 0 +newpap 0 +hangook 0 +chosun 0 +joongang 0 +hire 0 +word 0 +perfect 0 +version 0 +meng 0 +project 0 +still 0 +titl 0 +imag 0 +process 0 +appletyoosun 0 +person 0 +infom 0 +triphamm 0 +sbithaca 0 +phone 0 +emerg 0 +ychung 0 +forward 0 +yooschung 0 +automat 0 +page 0 +construct 0 +last 0 +modifi 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..d0af9865 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,221 @@ +page 1 +great 1 +good 0 +cornel 0 +time 0 +live 0 +plai 0 +much 0 +know 0 +take 0 +movi 0 +public 0 +yaron 0 +minski 0 +home 0 +graduat 0 +student 0 +ithaca 0 +place 0 +syracus 0 +toler 0 +comput 0 +flapdragon 0 +longer 0 +game 0 +better 0 +make 0 +easi 0 +medic 0 +school 0 +love 0 +everi 0 +rate 0 +tri 0 +yellow 0 +linux 0 +block 0 +yminski 0 +edudepart 0 +computersci 0 +upson 0 +hall 0 +univers 0 +phone 0 +comstock 0 +current 0 +focus 0 +onfault 0 +distribut 0 +particular 0 +work 0 +thetacoma 0 +project 0 +attempt 0 +build 0 +oper 0 +system 0 +support 0 +forfault 0 +agent 0 +base 0 +year 0 +livether 0 +still 0 +veggi 0 +coop 0 +crash 0 +often 0 +nowadai 0 +slightli 0 +outof 0 +date 0 +webpag 0 +howev 0 +notic 0 +anopen 0 +start 0 +need 0 +highli 0 +recommendit 0 +though 0 +ancientchines 0 +extremlysimpl 0 +rule 0 +complic 0 +satisfi 0 +strategi 0 +like 0 +learn 0 +intro 0 +also 0 +want 0 +internet 0 +nota 0 +real 0 +person 0 +front 0 +thannoth 0 +unix 0 +machin 0 +look 0 +cgoban 0 +nicest 0 +goboard 0 +program 0 +seen 0 +trivial 0 +thenet 0 +newli 0 +marri 0 +wife 0 +lisa 0 +go 0 +suni 0 +health 0 +scienc 0 +center 0 +uniqu 0 +qualifi 0 +within 0 +hour 0 +twenti 0 +minutesof 0 +favoritepoem 0 +lafiglia 0 +piang 0 +theidea 0 +order 0 +west 0 +advicefor 0 +resumesom 0 +interestinglink 0 +critic 0 +impress 0 +exampl 0 +fairli 0 +simpl 0 +technolog 0 +us 0 +effect 0 +site 0 +engin 0 +compar 0 +other 0 +come 0 +recommend 0 +found 0 +almost 0 +alarmingli 0 +contrast 0 +firefli 0 +thing 0 +fail 0 +miser 0 +yahoo 0 +address 0 +give 0 +direct 0 +seven 0 +closest 0 +bakeri 0 +perfect 0 +bigbook 0 +bigyellow 0 +advertis 0 +york 0 +read 0 +free 0 +plu 0 +save 0 +paper 0 +note 0 +download 0 +text 0 +slate 0 +magazin 0 +hate 0 +admit 0 +microsoft 0 +someth 0 +right 0 +thought 0 +well 0 +execut 0 +expect 0 +michael 0 +kinslei 0 +run 0 +compani 0 +instal 0 +maintain 0 +list 0 +amazon 0 +book 0 +solid 0 +discount 0 +virtual 0 +bookstor 0 +hope 0 +local 0 +booksel 0 +town 0 +brother 0 +follow 0 +begin 0 +version 0 +comment 0 +process 0 +mailcrypt 0 +emac 0 +interfacemqbtazgjohoaaaedalfhlgjmdg 0 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 0 +rbylf 0 +zwqujcioczoecv 0 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 0 +gkgarsokrinnoazihja 0 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 0 +wumjgzsnvispwkrvzgdrojswmc 0 +eigsqsb 0 +bsbpw 0 +jcwz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..4ad392bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,43 @@ +yuichi 1 +tsuchimoto 0 +cornel 0 +home 0 +current 0 +program 0 +introduct 0 +compil 0 +translatorsc 0 +practicum 0 +artifici 0 +intelligencec 0 +format 0 +info 0 +peopl 0 +pageyuichi 0 +pagecours 0 +workfal 0 +semest 0 +system 0 +oper 0 +systemsc 0 +languag 0 +softwar 0 +engineeringspr 0 +machin 0 +visionfal 0 +foundat 0 +theori 0 +computingi 0 +look 0 +unit 0 +state 0 +resum 0 +postscript 0 +address 0 +eduhttp 0 +last 0 +modif 0 +novemb 0 +http 0 +welcom 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..3ee3933a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,326 @@ +messag 1 +activ 0 +commun 0 +latenc 0 +implement 0 +eicken 0 +network 0 +paper 0 +machin 0 +us 0 +version 0 +overhead 0 +layer 0 +split 0 +perform 0 +design 0 +releas 0 +chang 0 +thorsten 0 +processor 0 +high 0 +describ 0 +show 0 +cluster 0 +architectur 0 +multiprocessor 0 +cost 0 +cornel 0 +part 0 +pleas 0 +read 0 +chao 0 +grzegorz 0 +czajkowski 0 +abstract 0 +power 0 +bandwidth 0 +softwar 0 +activemessag 0 +compar 0 +standard 0 +workstat 0 +allow 0 +mechan 0 +conform 0 +spec 0 +project 0 +avail 0 +inform 0 +includ 0 +know 0 +select 0 +appear 0 +novemb 0 +mpp 0 +pass 0 +order 0 +reduc 0 +first 0 +round 0 +trip 0 +secondpart 0 +demonstr 0 +benchmark 0 +technic 0 +report 0 +characterist 0 +present 0 +detail 0 +gener 0 +specif 0 +interfac 0 +interconnect 0 +oper 0 +without 0 +evalu 0 +driven 0 +berkelei 0 +messagescornel 0 +implementationsact 0 +neta 0 +sourc 0 +code 0 +thegener 0 +moreinform 0 +page 0 +object 0 +codereleas 0 +thegam 0 +readm 0 +instal 0 +file 0 +distribut 0 +instructionson 0 +contact 0 +releasenot 0 +fileto 0 +find 0 +previou 0 +currentvers 0 +also 0 +major 0 +differencebetween 0 +modifi 0 +libmpci 0 +thedistribut 0 +document 0 +packag 0 +fordetail 0 +interest 0 +current 0 +pleaseclick 0 +send 0 +briefnot 0 +let 0 +someth 0 +organ 0 +theus 0 +intend 0 +public 0 +messageslow 0 +ibmrisc 0 +system 0 +chri 0 +hawblitzel 0 +ieeesupercomput 0 +pittsburgh 0 +commerci 0 +spiteof 0 +fast 0 +scommun 0 +inferior 0 +older 0 +tmccm 0 +meiko 0 +investig 0 +primit 0 +altern 0 +standardmessag 0 +tooffer 0 +good 0 +build 0 +block 0 +higher 0 +directli 0 +networkadapt 0 +yieldsa 0 +lower 0 +communicationsubstr 0 +well 0 +cbenchmark 0 +lowmessag 0 +throughput 0 +compens 0 +networklat 0 +base 0 +freeli 0 +availablempich 0 +achiev 0 +equival 0 +onth 0 +februari 0 +andevalu 0 +implementationbenchmark 0 +adapt 0 +firmwar 0 +butdo 0 +assumefamiliar 0 +concept 0 +underli 0 +mainperform 0 +word 0 +timeof 0 +asymptot 0 +focuseson 0 +analysi 0 +comparison 0 +smessag 0 +defin 0 +portabl 0 +across 0 +varieti 0 +parallel 0 +theu 0 +themeiko 0 +thehpam 0 +fddi 0 +ring 0 +theparagon 0 +thesp 0 +networksus 0 +veena 0 +avula 0 +anyndia 0 +basu 0 +vineet 0 +buch 0 +palo 0 +alto 0 +abridg 0 +ieee 0 +micro 0 +magazin 0 +slide 0 +talk 0 +recent 0 +develop 0 +forparallel 0 +made 0 +signific 0 +progress 0 +thecommun 0 +magnitud 0 +ascompar 0 +earlier 0 +propos 0 +examin 0 +whether 0 +thesetechniqu 0 +carri 0 +connect 0 +anatm 0 +even 0 +though 0 +systemsoftwar 0 +equip 0 +optim 0 +streamcommun 0 +direct 0 +protect 0 +user 0 +level 0 +access 0 +thenetwork 0 +reliabl 0 +transmiss 0 +flowcontrol 0 +differ 0 +incommun 0 +builtfrom 0 +hardwar 0 +compon 0 +state 0 +artmultiprocessor 0 +lack 0 +flow 0 +control 0 +systemcoordin 0 +affect 0 +significantli 0 +andrequir 0 +larger 0 +buffer 0 +prototyp 0 +model 0 +clusterinterconnect 0 +measur 0 +showappl 0 +applic 0 +microsecond 0 +smallmessag 0 +roughli 0 +messagesimplement 0 +think 0 +integr 0 +andcomput 0 +culler 0 +goldstein 0 +schauser 0 +proceed 0 +symp 0 +comput 0 +gold 0 +coast 0 +australia 0 +abstractth 0 +challeng 0 +larg 0 +scale 0 +tominim 0 +overlapcomput 0 +coordin 0 +sacrificingprocessor 0 +exist 0 +passingmultiprocessor 0 +unnecessarili 0 +researchprototyp 0 +communicationoverhead 0 +poor 0 +introduc 0 +simplecommun 0 +isintrins 0 +effect 0 +thehardwar 0 +offer 0 +tremend 0 +flexibl 0 +ncube 0 +phase 0 +share 0 +memoryextens 0 +messagesar 0 +suffici 0 +dynam 0 +schedul 0 +languag 0 +forwhich 0 +toler 0 +becom 0 +program 0 +compil 0 +concern 0 +hardwaresupport 0 +desir 0 +outlin 0 +rang 0 +ofenhanc 0 +mainstream 0 +efficientcommun 0 +thesi 0 +univers 0 +california 0 +sitesact 0 +messagesin 0 +projectfor 0 +contactthorsten 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..30ca4dfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,202 @@ +system 1 +horu 0 +applic 0 +isi 0 +cornel 0 +technolog 0 +comput 0 +demand 0 +militari 0 +toler 0 +control 0 +distribut 0 +effort 0 +develop 0 +program 0 +fault 0 +high 0 +perform 0 +work 0 +user 0 +plan 0 +environ 0 +year 0 +us 0 +featur 0 +virtual 0 +prior 0 +success 0 +also 0 +commun 0 +media 0 +remot 0 +base 0 +wide 0 +rang 0 +space 0 +branch 0 +project 0 +futur 0 +chang 0 +thu 0 +environmenthoru 0 +kenneth 0 +birman 0 +robbert 0 +reness 0 +shoru 0 +reliabledistribut 0 +last 0 +demonstrategroupwar 0 +network 0 +foundto 0 +offer 0 +higher 0 +similar 0 +novel 0 +ofhoru 0 +flexibl 0 +softwar 0 +architectur 0 +support 0 +synchronousprocess 0 +group 0 +toolkit 0 +becom 0 +signific 0 +commerci 0 +offersa 0 +securityand 0 +privaci 0 +view 0 +importantresearch 0 +advanc 0 +extend 0 +provid 0 +extrem 0 +latenc 0 +performancer 0 +time 0 +capabl 0 +approach 0 +combin 0 +element 0 +calledact 0 +messageswith 0 +multi 0 +playbacksystem 0 +calledcontinu 0 +expect 0 +demonstr 0 +speed 0 +interact 0 +multimediaserv 0 +might 0 +telemedicin 0 +videoon 0 +retain 0 +exist 0 +andsecur 0 +option 0 +synchroni 0 +model 0 +creat 0 +substanti 0 +expectrapid 0 +uptak 0 +within 0 +matur 0 +spana 0 +industri 0 +includ 0 +telecommun 0 +financialtrad 0 +stock 0 +market 0 +autom 0 +factori 0 +floor 0 +process 0 +fordiscret 0 +electron 0 +compon 0 +manufactur 0 +traffic 0 +basedcommun 0 +manag 0 +beingexplor 0 +sever 0 +well 0 +othernon 0 +govern 0 +among 0 +visibl 0 +isth 0 +naval 0 +hiper 0 +explor 0 +systemthat 0 +prototyp 0 +enhanc 0 +aegi 0 +battleradar 0 +would 0 +benefitfrom 0 +access 0 +initi 0 +make 0 +possibl 0 +migrateisi 0 +benefit 0 +communityin 0 +direct 0 +transit 0 +occur 0 +licens 0 +agreementswith 0 +subsidiari 0 +stratu 0 +howev 0 +avail 0 +research 0 +isdescrib 0 +detail 0 +public 0 +manual 0 +look 0 +hope 0 +mixtur 0 +technologieswil 0 +permit 0 +beseen 0 +next 0 +gener 0 +groupwar 0 +illustr 0 +belowshow 0 +mission 0 +integr 0 +data 0 +varieti 0 +ground 0 +resourc 0 +andus 0 +coordin 0 +action 0 +variou 0 +theatr 0 +asset 0 +thissort 0 +utmost 0 +reliabl 0 +secur 0 +whilealso 0 +failur 0 +rapidli 0 +reconfigur 0 +respond 0 +impact 0 +civilianand 0 +dept 0 +scienc 0 +univers 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..faf28fe5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,77 @@ +multimedia 1 +applic 1 +medianet 0 +perform 0 +network 0 +commun 0 +group 0 +cornel 0 +high 0 +process 0 +combin 0 +develop 0 +user 0 +level 0 +improv 0 +facilit 0 +horu 0 +reliabl 0 +toolkit 0 +video 0 +inform 0 +projectmedianet 0 +platform 0 +media 0 +technolog 0 +research 0 +todevelop 0 +flexibl 0 +testb 0 +store 0 +transport 0 +us 0 +data 0 +architectur 0 +access 0 +dramat 0 +protocolsth 0 +order 0 +magnitud 0 +communicationmak 0 +parallel 0 +comput 0 +workstat 0 +cluster 0 +practic 0 +communicationprimit 0 +adapt 0 +industri 0 +strength 0 +tool 0 +secur 0 +primit 0 +critic 0 +foradvanc 0 +militari 0 +commerci 0 +approach 0 +distribut 0 +audio 0 +portabl 0 +build 0 +includeaudio 0 +rapid 0 +prototyp 0 +multimediaappl 0 +fund 0 +project 0 +provid 0 +contract 0 +fromth 0 +darpa 0 +technologyofficefor 0 +contact 0 +thorstenvon 0 +eicken 0 +brian 0 +smith 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..70234241 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,31 @@ +nuprl 1 +project 0 +theori 0 +cornel 0 +browser 0 +articl 0 +link 0 +autom 0 +reason 0 +introduct 0 +theorem 0 +design 0 +written 0 +vaughn 0 +user 0 +document 0 +relat 0 +public 0 +class 0 +note 0 +linux 0 +announc 0 +suggest 0 +feedback 0 +help 0 +main 0 +index 0 +curiou 0 +mani 0 +page 0 +askaltavista 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..869184cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,217 @@ +split 1 +program 0 +node 0 +file 0 +messag 0 +granita 0 +includ 0 +debug 0 +activ 0 +comput 0 +exampl 0 +parallel 0 +bench 0 +sourc 0 +setenv 0 +compil 0 +locat 0 +am_run 0 +cuc 0 +us 0 +machin 0 +shell 0 +instal 0 +inform 0 +type 0 +read 0 +local 0 +commun 0 +makefil 0 +directori 0 +librari 0 +also 0 +login 0 +tcsh 0 +bash 0 +experi 0 +first 0 +command 0 +remot 0 +info 0 +softwar 0 +statement 0 +found 0 +execut 0 +look 0 +gmake 0 +follow 0 +return 0 +perform 0 +spam 0 +avail 0 +header 0 +script 0 +ampicc 0 +pleas 0 +scienc 0 +call 0 +eight 0 +granitathrough 0 +design 0 +asinteract 0 +problemsdur 0 +remov 0 +oper 0 +system 0 +specif 0 +stufffrom 0 +configur 0 +haveth 0 +arch 0 +unam 0 +instead 0 +readm 0 +contain 0 +informationabout 0 +releas 0 +addit 0 +manyou 0 +infoexplor 0 +commandsand 0 +usag 0 +displai 0 +properli 0 +job 0 +neither 0 +activemassag 0 +peor 0 +messagesor 0 +hardwar 0 +cornel 0 +theori 0 +center 0 +homegrown 0 +softwarein 0 +gener 0 +besur 0 +path 0 +csplit 0 +simpl 0 +extens 0 +forparallel 0 +provid 0 +global 0 +address 0 +space 0 +though 0 +globalpoint 0 +dereferenc 0 +like 0 +regular 0 +pointer 0 +phase 0 +assign 0 +allow 0 +programm 0 +hide 0 +latencyof 0 +access 0 +overlap 0 +work 0 +user 0 +shellsshould 0 +creat 0 +sampl 0 +variou 0 +must 0 +make 0 +asact 0 +scriptsloc 0 +programfoo 0 +processor 0 +foodebug 0 +step 0 +need 0 +done 0 +insert 0 +splitc_debug 0 +aftersplitc_main 0 +describ 0 +previou 0 +section 0 +commonli 0 +ongranita 0 +enter 0 +continu 0 +hit 0 +onto 0 +want 0 +youwant 0 +master 0 +open 0 +insid 0 +thenattach 0 +theth 0 +proc 0 +process 0 +proce 0 +attach 0 +stop 0 +andyou 0 +breakpoint 0 +stack 0 +frame 0 +messagesact 0 +overhead 0 +layerthat 0 +offer 0 +high 0 +mani 0 +nativ 0 +layer 0 +main 0 +characterist 0 +word 0 +round 0 +triplat 0 +asymptot 0 +network 0 +bandwidth 0 +libspgam 0 +aand 0 +beforerun 0 +runningprgm 0 +mpimpi 0 +popularmessag 0 +pass 0 +interfac 0 +portabl 0 +animplement 0 +base 0 +mpich 0 +run 0 +overact 0 +easiest 0 +link 0 +whichi 0 +built 0 +fooyou 0 +lookat 0 +examplesin 0 +ampi 0 +exactli 0 +likeordinari 0 +sure 0 +softwaresoftwar 0 +fortran 0 +xpdbx 0 +matlab 0 +emac 0 +bison 0 +replic 0 +problemsif 0 +difficulti 0 +contact 0 +czar 0 +grzegorz 0 +czajkowski 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..01958ce6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,105 @@ +model 1 +project 1 +research 1 +languag 1 +cornel 0 +simul 0 +creat 0 +system 0 +simlab 0 +select 0 +effort 0 +comput 0 +gener 0 +scientif 0 +softwar 0 +collabor 0 +version 0 +program 0 +present 0 +chew 0 +home 0 +page 0 +enorm 0 +current 0 +expend 0 +scientificsoftwar 0 +particularli 0 +physic 0 +defin 0 +oncomplex 0 +geometri 0 +us 0 +advanc 0 +hardwar 0 +thegoal 0 +reduc 0 +bringingtogeth 0 +technolog 0 +geometr 0 +symbolicmathemat 0 +numer 0 +analysi 0 +compil 0 +code 0 +andform 0 +method 0 +tool 0 +rais 0 +semant 0 +levelat 0 +possibl 0 +overview 0 +softwarepackag 0 +activ 0 +mathemat 0 +environ 0 +propos 0 +postscript 0 +guarante 0 +qualiti 0 +mesh 0 +microstoragearchitectur 0 +weyl 0 +computeralgebra 0 +substrat 0 +high 0 +levelprogram 0 +synthes 0 +thechain 0 +algebra 0 +topolog 0 +compon 0 +thearpa 0 +nist 0 +madefast 0 +design 0 +manufactur 0 +exercis 0 +longer 0 +direct 0 +insystem 0 +richard 0 +zippel 0 +ideason 0 +proce 0 +includ 0 +brief 0 +discuss 0 +ofnon 0 +contemporan 0 +commun 0 +microstorag 0 +architectur 0 +theus 0 +transform 0 +chainsprogram 0 +complextopolog 0 +engin 0 +numericalalgorithm 0 +rick 0 +palmer 0 +peopl 0 +public 0 +report 0 +paul 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..d15f92ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,51 @@ +split 1 +inform 0 +eicken 0 +sourc 0 +code 0 +releas 0 +cornel 0 +prepar 0 +page 0 +ccornel 0 +implementationssplit 0 +neta 0 +isimpl 0 +activ 0 +messagesfor 0 +contact 0 +thorsten 0 +ofsplit 0 +distr 0 +implementedon 0 +spam 0 +contactchi 0 +chao 0 +chang 0 +grzegorz 0 +czajkowski 0 +thorstenvon 0 +share 0 +memori 0 +multiprocessorsa 0 +multiprocessor 0 +runningsolari 0 +mattwelsh 0 +select 0 +public 0 +cparallel 0 +program 0 +culler 0 +dusseau 0 +goldstein 0 +krishnamurthi 0 +lumetta 0 +yelick 0 +proceed 0 +supercomput 0 +novemb 0 +abstractproject 0 +sitessplit 0 +chome 0 +berkelei 0 +contactthorsten 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..32c1466f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,9 @@ +page 1 +move 1 +browser 1 +redirect 1 +second 1 +http 1 +cornel 1 +default 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..7c0f1eb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,220 @@ +robot 1 +cornel 0 +donald 0 +comput 0 +vision 0 +paper 0 +list 0 +ieee 0 +manipul 0 +laboratori 0 +csrvl 0 +prof 0 +daniel 0 +applic 0 +zabih 0 +video 0 +base 0 +parallel 0 +justin 0 +avail 0 +report 0 +server 0 +intern 0 +confer 0 +proceed 0 +bhringer 0 +professor 0 +match 0 +direct 0 +scienc 0 +huttenloch 0 +ramin 0 +includ 0 +distribut 0 +micro 0 +electro 0 +mechan 0 +system 0 +bruce 0 +follow 0 +project 0 +automat 0 +break 0 +miller 0 +done 0 +move 0 +select 0 +mani 0 +proc 0 +revis 0 +symposium 0 +foundat 0 +inform 0 +invari 0 +workshop 0 +autom 0 +diego 0 +microfabr 0 +mihailovich 0 +macdonald 0 +technic 0 +associ 0 +greg 0 +csrvlcornel 0 +laboratorywelcom 0 +nich 0 +rrentli 0 +develop 0 +pleas 0 +hard 0 +question 0 +comment 0 +thank 0 +locat 0 +univers 0 +ithaca 0 +three 0 +main 0 +area 0 +ofresearch 0 +multimedia 0 +mem 0 +pictor 0 +tour 0 +current 0 +projectsth 0 +activ 0 +supervis 0 +byramin 0 +detect 0 +andclassif 0 +scene 0 +digit 0 +mpeg 0 +browser 0 +allowingscen 0 +global 0 +motion 0 +queri 0 +real 0 +time 0 +sourc 0 +transmiss 0 +full 0 +frame 0 +onplatform 0 +nynet 0 +cluster 0 +number 0 +involv 0 +high 0 +perform 0 +imag 0 +implement 0 +split 0 +foru 0 +symmetr 0 +multiprocessor 0 +potenti 0 +master 0 +sproject 0 +maintain 0 +work 0 +unix 0 +currentlyconsid 0 +windowsnt 0 +discuss 0 +theissuesher 0 +hope 0 +support 0 +microsoft 0 +publicationsth 0 +research 0 +thecsrvl 0 +anonym 0 +public 0 +tech 0 +serverar 0 +program 0 +mobil 0 +scheme 0 +ree 0 +automationnic 0 +franc 0 +complex 0 +homolog 0 +type 0 +triangul 0 +chang 0 +juan 0 +octob 0 +jen 0 +first 0 +algorithm 0 +peter 0 +boston 0 +wilson 0 +andj 0 +latomb 0 +submit 0 +artifici 0 +intellig 0 +sensor 0 +configur 0 +task 0 +plan 0 +brigg 0 +sensorless 0 +us 0 +massiv 0 +actuatorarrai 0 +theori 0 +control 0 +actuat 0 +arrai 0 +oiso 0 +japan 0 +januari 0 +approach 0 +design 0 +micromechan 0 +hing 0 +structur 0 +extend 0 +abstract 0 +siggraph 0 +solid 0 +model 0 +montral 0 +quebc 0 +canada 0 +authorthes 0 +gener 0 +dynam 0 +index 0 +search 0 +author 0 +titl 0 +keyword 0 +scott 0 +cytacki 0 +pedro 0 +felzenszwalb 0 +ryan 0 +lilien 0 +michel 0 +maharbiz 0 +pass 0 +scharstein 0 +aaron 0 +stump 0 +szewczyk 0 +fernando 0 +viton 0 +voskuhl 0 +wayt 0 +matt 0 +welsh 0 +whelan 0 +assist 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..15cdcc1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,91 @@ +inform 1 +research 0 +captur 0 +access 0 +group 0 +collect 0 +work 0 +comput 0 +structur 0 +materi 0 +document 0 +construct 0 +report 0 +cornel 0 +projectinform 0 +accessth 0 +waysthat 0 +locat 0 +ever 0 +increas 0 +volum 0 +ofonlin 0 +data 0 +determin 0 +extract 0 +forhuman 0 +user 0 +found 0 +john 0 +hopcroft 0 +davisin 0 +current 0 +area 0 +researchextract 0 +onlin 0 +thestructur 0 +explicit 0 +extractinginform 0 +present 0 +tabular 0 +form 0 +relat 0 +databas 0 +summari 0 +overview 0 +collectionsof 0 +text 0 +nationwid 0 +librari 0 +sciencetechn 0 +begun 0 +digit 0 +computersci 0 +technic 0 +order 0 +make 0 +moreaccess 0 +internet 0 +avail 0 +server 0 +addit 0 +toit 0 +util 0 +gener 0 +commun 0 +thisdocu 0 +test 0 +consist 0 +dean 0 +krafft 0 +visitingscientist 0 +jimdavi 0 +well 0 +number 0 +graduat 0 +undergradu 0 +student 0 +fall 0 +project 0 +activ 0 +longer 0 +jrdpublicationsjam 0 +allan 0 +informationag 0 +build 0 +hyperlink 0 +proceed 0 +confer 0 +oninform 0 +knowledg 0 +manag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..005b96f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,15 @@ +research 1 +multimedia 1 +zeno 0 +groupzeno 0 +cornel 0 +group 0 +peopl 0 +mission 0 +project 0 +paper 0 +softwar 0 +curricula 0 +develop 0 +potpourri 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..9032d0a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,102 @@ +page 1 +work 0 +home 0 +degre 0 +come 0 +back 0 +current 0 +rain 0 +santa 0 +barbara 0 +cornel 0 +video 0 +pagewelcom 0 +depart 0 +issu 0 +pictur 0 +left 0 +see 0 +quit 0 +date 0 +frame 0 +challeng 0 +viewer 0 +syosset 0 +york 0 +town 0 +long 0 +island 0 +receiv 0 +bachelor 0 +scienc 0 +decemb 0 +month 0 +california 0 +decid 0 +fall 0 +master 0 +engin 0 +leav 0 +land 0 +ithaca 0 +mayb 0 +miss 0 +season 0 +wind 0 +snow 0 +actual 0 +enough 0 +anywai 0 +plan 0 +graduat 0 +meng 0 +project 0 +prof 0 +ramin 0 +zabih 0 +robot 0 +vision 0 +csrvl 0 +interest 0 +topic 0 +motion 0 +segment 0 +gener 0 +process 0 +paper 0 +relat 0 +research 0 +area 0 +link 0 +compani 0 +green 0 +hill 0 +softwar 0 +californialockhe 0 +martin 0 +control 0 +system 0 +binghamton 0 +yorkaltera 0 +corp 0 +jose 0 +californiafun 0 +stuff 0 +game 0 +domainvth 0 +babylon 0 +siteoth 0 +place 0 +univers 0 +worldcareermosaictop 0 +site 0 +student 0 +email 0 +kmai 0 +cours 0 +still 0 +construct 0 +last 0 +modifi 0 +januari 0 +access 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..5420262c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,42 @@ +utc 1 +inform 1 +faculti 1 +depart 1 +public 1 +peopl 1 +home 0 +pagegener 0 +recruit 0 +overview 0 +research 0 +group 0 +relat 0 +program 0 +academ 0 +admiss 0 +requir 0 +cours 0 +descript 0 +catalog 0 +comput 0 +facil 0 +upcom 0 +event 0 +calendar 0 +seminar 0 +talk 0 +visitor 0 +schedulespag 0 +class 0 +person 0 +page 0 +student 0 +organ 0 +alumni 0 +link 0 +find 0 +staff 0 +directoryth 0 +universitywww 0 +informationgrip 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..cf810f48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,67 @@ +intern 1 +mathemat 0 +award 0 +theorem 0 +prove 0 +intellig 0 +board 0 +artificialintellig 0 +woodrow 0 +chair 0 +emeritu 0 +univers 0 +servic 0 +autom 0 +artifici 0 +truste 0 +joint 0 +confer 0 +research 0 +analog 0 +profil 0 +bledso 0 +bledsoepet 0 +donnel 0 +centenni 0 +comput 0 +system 0 +professor 0 +utah 0 +salt 0 +lake 0 +citi 0 +california 0 +berkelei 0 +honor 0 +profession 0 +third 0 +mileston 0 +americanmathemat 0 +societi 0 +distinguish 0 +jointconfer 0 +presid 0 +american 0 +associ 0 +onartifici 0 +editor 0 +journal 0 +presentarea 0 +interestautomat 0 +summari 0 +researchmi 0 +focus 0 +automat 0 +theoremproof 0 +check 0 +involv 0 +heurist 0 +higher 0 +levelplan 0 +well 0 +exampl 0 +alsointerest 0 +learn 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..4365031e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,90 @@ +werth 1 +parallel 1 +comput 0 +educ 0 +softwar 0 +engin 0 +brown 0 +mathemat 0 +chair 0 +board 0 +scienc 0 +program 0 +proceed 0 +intern 0 +confer 0 +jain 0 +john 0 +lectur 0 +research 0 +univers 0 +associ 0 +ieee 0 +compil 0 +environ 0 +direct 0 +model 0 +process 0 +august 0 +schedul 0 +oper 0 +profil 0 +werthsenior 0 +scientist 0 +emori 0 +washington 0 +profession 0 +servic 0 +accredit 0 +vice 0 +technic 0 +committe 0 +softwareengin 0 +present 0 +area 0 +interestparallel 0 +computersci 0 +summari 0 +researchmi 0 +current 0 +interest 0 +parallelprogram 0 +andimplement 0 +issu 0 +also 0 +activ 0 +set 0 +incomput 0 +local 0 +nation 0 +level 0 +select 0 +recent 0 +publicationss 0 +hyder 0 +unifi 0 +concurr 0 +debug 0 +societi 0 +sobek 0 +newton 0 +interact 0 +formal 0 +practic 0 +develop 0 +code 0 +note 0 +york 0 +springer 0 +verlag 0 +multipl 0 +system 0 +journal 0 +distribut 0 +decemb 0 +gener 0 +applic 0 +thirteenth 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..ebf452ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,53 @@ +architectur 1 +alfr 0 +page 0 +area 0 +manag 0 +databas 0 +distribut 0 +faculti 0 +dale 0 +daleno 0 +person 0 +trammel 0 +crow 0 +regent 0 +professor 0 +emeritu 0 +comput 0 +scienc 0 +exet 0 +colleg 0 +oxford 0 +england 0 +univers 0 +texa 0 +austin 0 +interestdatabas 0 +system 0 +summari 0 +researchmi 0 +interest 0 +involv 0 +applic 0 +parallel 0 +multi 0 +stagei 0 +problem 0 +studiedinclud 0 +data 0 +strategi 0 +index 0 +andmap 0 +relat 0 +algebra 0 +oper 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..b23a88fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,92 @@ +comput 1 +system 1 +scienc 0 +brumfield 0 +distribut 0 +model 0 +queue 0 +jeffrei 0 +mathemat 0 +univers 0 +award 0 +databas 0 +symposium 0 +sigcs 0 +faculti 0 +brumfieldsenior 0 +lectur 0 +math 0 +georgia 0 +purdu 0 +honor 0 +colleg 0 +natur 0 +teach 0 +excel 0 +area 0 +interestperform 0 +analysi 0 +oper 0 +summari 0 +researchi 0 +interest 0 +role 0 +plai 0 +designersof 0 +studi 0 +performanceof 0 +exist 0 +propos 0 +network 0 +eachresourc 0 +repres 0 +tasksawait 0 +servic 0 +solut 0 +involv 0 +computationof 0 +respons 0 +time 0 +length 0 +throughput 0 +select 0 +recent 0 +publicationsj 0 +shen 0 +richter 0 +graf 0 +verdi 0 +visual 0 +environ 0 +design 0 +journal 0 +ofparallel 0 +miller 0 +chou 0 +perform 0 +modelingof 0 +object 0 +orient 0 +intern 0 +parallel 0 +distributedsystem 0 +austin 0 +texa 0 +decemb 0 +concurr 0 +program 0 +modula 0 +inproceed 0 +technic 0 +loui 0 +bulletin 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..4def6729 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,113 @@ +mathemat 1 +cline 0 +comput 0 +softwar 0 +siam 0 +journal 0 +scientif 0 +numer 0 +analysi 0 +alan 0 +professor 0 +appli 0 +profession 0 +editor 0 +transact 0 +director 0 +interest 0 +problem 0 +fit 0 +renka 0 +constrain 0 +triangul 0 +condit 0 +number 0 +estim 0 +faculti 0 +clinedavid 0 +bruton 0 +centenni 0 +scienc 0 +univers 0 +michigan 0 +servic 0 +algorithm 0 +commun 0 +associ 0 +editori 0 +board 0 +statisticalcomput 0 +special 0 +group 0 +southern 0 +region 0 +socialrespons 0 +area 0 +interestmathemat 0 +summari 0 +researchi 0 +transform 0 +tool 0 +whichcan 0 +involv 0 +constructionof 0 +explor 0 +methodolog 0 +formathemat 0 +particular 0 +major 0 +developmentha 0 +packag 0 +hundr 0 +subprogram 0 +curv 0 +andsurfac 0 +emploi 0 +tension 0 +spline 0 +select 0 +recent 0 +publicationsr 0 +scatter 0 +data 0 +us 0 +delaunai 0 +imac 0 +expert 0 +system 0 +symbol 0 +north 0 +holland 0 +king 0 +meyer 0 +rout 0 +schedul 0 +coast 0 +guard 0 +buoi 0 +tender 0 +interfac 0 +dimension 0 +solut 0 +closest 0 +node 0 +presenc 0 +barrier 0 +counter 0 +exampl 0 +three 0 +statist 0 +moler 0 +stewart 0 +wilkinson 0 +matrix 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..772afda2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,69 @@ +mathemat 1 +univers 1 +edsger 0 +wybe 0 +comput 0 +physic 0 +award 0 +member 0 +academi 0 +art 0 +interest 0 +faculti 0 +dijkstra 0 +dijkstraschlumberg 0 +centenni 0 +chair 0 +sciencesprofessor 0 +mathematicskandidaatsexamen 0 +doctora 0 +examen 0 +theoret 0 +leydenph 0 +amsterdamhonor 0 +awardsacm 0 +ture 0 +foreign 0 +honorari 0 +american 0 +sciencesmemb 0 +royal 0 +netherland 0 +sciencesdistinguish 0 +fellow 0 +british 0 +societyafip 0 +harri 0 +good 0 +memori 0 +doctor 0 +scienc 0 +honori 0 +causa 0 +queen 0 +belfastarea 0 +program 0 +correct 0 +methodolog 0 +algorithm 0 +systemssummari 0 +research 0 +area 0 +focus 0 +streamlin 0 +argumentso 0 +increas 0 +power 0 +reason 0 +particular 0 +ofform 0 +techniqu 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..2251e697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,60 @@ +comput 1 +edmondson 0 +scienc 0 +chri 0 +yurkanan 0 +univers 0 +network 0 +mobil 0 +protocol 0 +faculti 0 +yurkananlectur 0 +mathemat 0 +texa 0 +austin 0 +profession 0 +servic 0 +secretari 0 +treasur 0 +sigcomm 0 +area 0 +interestcomput 0 +educ 0 +manag 0 +larg 0 +softwar 0 +project 0 +databas 0 +design 0 +summari 0 +researchmi 0 +research 0 +interest 0 +high 0 +speed 0 +commun 0 +specif 0 +internetwork 0 +select 0 +recent 0 +public 0 +cobb 0 +andm 0 +gouda 0 +address 0 +internet 0 +inproceed 0 +annual 0 +theori 0 +informaticsconfer 0 +press 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..9d1f6151 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,50 @@ +univers 1 +confer 1 +committe 1 +area 1 +suzi 0 +secondari 0 +educ 0 +comput 0 +scienc 0 +servic 0 +inform 0 +faculti 0 +gallagh 0 +gallagherlectur 0 +coordin 0 +academ 0 +program 0 +loyola 0 +southwestern 0 +louisiana 0 +profession 0 +sigcs 0 +necc 0 +interestcomput 0 +librari 0 +process 0 +summari 0 +researchmi 0 +interest 0 +student 0 +recruit 0 +andretent 0 +women 0 +minor 0 +improv 0 +scienceeduc 0 +school 0 +local 0 +system 0 +retriev 0 +techniqu 0 +addit 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..6343675a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,114 @@ +jenevein 1 +comput 1 +interconnect 0 +optic 0 +system 0 +wafer 0 +fault 0 +network 0 +perform 0 +scale 0 +toler 0 +processor 0 +measur 0 +chemistri 0 +parallel 0 +architectur 0 +recent 0 +work 0 +methodolog 0 +menez 0 +applic 0 +ieee 0 +transact 0 +malek 0 +nest 0 +proceed 0 +intern 0 +confer 0 +engin 0 +faculti 0 +senior 0 +lectur 0 +louisiana 0 +state 0 +univers 0 +orlean 0 +area 0 +interestinterconnect 0 +process 0 +summari 0 +researchmi 0 +research 0 +focus 0 +interconnectionnetwork 0 +success 0 +failur 0 +restsin 0 +abil 0 +devis 0 +appropri 0 +cost 0 +interconnectionstructur 0 +involv 0 +thedevelop 0 +special 0 +kindof 0 +laser 0 +wave 0 +guid 0 +design 0 +beinginvestig 0 +techniqu 0 +lead 0 +parallelsystem 0 +beingappli 0 +buss 0 +communicationswitch 0 +iscontinu 0 +performanceport 0 +across 0 +machin 0 +develop 0 +contrast 0 +tobenchmark 0 +repres 0 +true 0 +memorysystem 0 +select 0 +publicationsr 0 +kyklo 0 +multicomput 0 +strategi 0 +properti 0 +june 0 +laranjeira 0 +predic 0 +scheme 0 +press 0 +ullah 0 +metrix 0 +precis 0 +industri 0 +decemb 0 +johnson 0 +impact 0 +multiprocessor 0 +journal 0 +qualiti 0 +reliabl 0 +octob 0 +campbel 0 +prototyp 0 +integr 0 +januari 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..098b0875 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,66 @@ +logic 1 +comput 0 +norman 0 +emeritu 0 +philosophi 0 +univers 0 +architectur 0 +closur 0 +space 0 +faculti 0 +martin 0 +martinprofessor 0 +scienc 0 +professor 0 +ofphilosophi 0 +chicago 0 +california 0 +angel 0 +area 0 +interestmathemat 0 +summari 0 +researchmi 0 +current 0 +activ 0 +concentr 0 +abstract 0 +structur 0 +asinterpret 0 +theori 0 +center 0 +whichexploit 0 +notion 0 +deduct 0 +oper 0 +andon 0 +intension 0 +model 0 +classic 0 +mathemat 0 +significantearli 0 +research 0 +design 0 +especi 0 +missil 0 +vehicl 0 +applic 0 +trackingalgorithm 0 +track 0 +scan 0 +radar 0 +function 0 +complet 0 +inmani 0 +valu 0 +delai 0 +metatheori 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..8915488b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,140 @@ +time 1 +system 0 +real 0 +comput 0 +intern 0 +engin 0 +formal 0 +softwar 0 +proceed 0 +aloysiu 0 +present 0 +method 0 +committe 0 +ieee 0 +control 0 +design 0 +develop 0 +autom 0 +wang 0 +journal 0 +program 0 +symposium 0 +chair 0 +technic 0 +aid 0 +tool 0 +research 0 +robust 0 +respons 0 +specif 0 +confer 0 +decemb 0 +faculti 0 +mokassoci 0 +professorfaculti 0 +fellow 0 +scienc 0 +electr 0 +massachusett 0 +institut 0 +technolog 0 +profession 0 +servic 0 +associ 0 +editor 0 +critic 0 +editori 0 +board 0 +systemdesign 0 +taiwan 0 +vice 0 +work 0 +group 0 +federationof 0 +automat 0 +presentarea 0 +interestfault 0 +toler 0 +hard 0 +architectur 0 +summari 0 +researchi 0 +current 0 +conduct 0 +fundament 0 +area 0 +ofdistribut 0 +primari 0 +concern 0 +includespecif 0 +techniqu 0 +algorithm 0 +forguarante 0 +stringent 0 +constraint 0 +understand 0 +thetrad 0 +criticalsystem 0 +goal 0 +framework 0 +theanalysi 0 +synthesi 0 +applic 0 +areasinclud 0 +robot 0 +avion 0 +industrialprocess 0 +fund 0 +provid 0 +offic 0 +ofnav 0 +highli 0 +environ 0 +forreal 0 +select 0 +recent 0 +publicationsa 0 +toward 0 +mechan 0 +foundat 0 +tilborg 0 +kluwer 0 +academ 0 +publish 0 +heitmey 0 +labaw 0 +clement 0 +case 0 +support 0 +fifth 0 +workshop 0 +montreal 0 +juli 0 +emerson 0 +asynchron 0 +distribut 0 +aptl 0 +melbourn 0 +load 0 +adjust 0 +adapt 0 +antonio 0 +tsou 0 +brown 0 +analysi 0 +bound 0 +nasa 0 +expert 0 +sigsoft 0 +orlean 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..e87668ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,75 @@ +program 1 +function 0 +hamilton 0 +richard 0 +engin 0 +interest 0 +concurr 0 +process 0 +languag 0 +faculti 0 +senior 0 +lecturerb 0 +appli 0 +physic 0 +harvard 0 +collegem 0 +aero 0 +astronaut 0 +stanford 0 +universityph 0 +comput 0 +scienc 0 +iowa 0 +state 0 +universityprofession 0 +servicecoordin 0 +univers 0 +texa 0 +austin 0 +year 0 +seri 0 +editor 0 +vol 0 +addison 0 +weslei 0 +area 0 +object 0 +orient 0 +undergradu 0 +educationsummari 0 +research 0 +maintain 0 +long 0 +stand 0 +potentialfor 0 +suitabl 0 +formal 0 +reason 0 +infal 0 +us 0 +teach 0 +sectionof 0 +work 0 +time 0 +permit 0 +implementationof 0 +real 0 +microcomput 0 +applic 0 +longer 0 +term 0 +project 0 +book 0 +onfunct 0 +algorithm 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..0335b3fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,126 @@ +system 1 +databas 0 +silberschatz 0 +ieee 0 +confer 0 +intern 0 +comput 0 +knowledg 0 +area 0 +rastogi 0 +larg 0 +abraham 0 +award 0 +paper 0 +inform 0 +chair 0 +pod 0 +research 0 +parallel 0 +manag 0 +main 0 +process 0 +continu 0 +media 0 +storag 0 +high 0 +perform 0 +transact 0 +data 0 +septemb 0 +ozden 0 +faculti 0 +silberschatzprofessorship 0 +sciencesm 0 +stoni 0 +brookhonor 0 +profession 0 +serviceiee 0 +societi 0 +outstand 0 +journal 0 +advisori 0 +committe 0 +nation 0 +scienc 0 +foundat 0 +divis 0 +robot 0 +intellig 0 +gener 0 +seventh 0 +eighth 0 +sigact 0 +sigmod 0 +symposiumon 0 +principl 0 +organ 0 +ullman 0 +invit 0 +workshop 0 +futureof 0 +program 0 +symposium 0 +distributedsystem 0 +interest 0 +oper 0 +distribut 0 +basedsystemssummari 0 +special 0 +concurr 0 +recentresearch 0 +concentr 0 +multidatabas 0 +transactionmanag 0 +base 0 +real 0 +time 0 +databasesystem 0 +multiresolut 0 +server 0 +select 0 +recent 0 +publicationss 0 +ganguli 0 +tsur 0 +map 0 +datalog 0 +programexecut 0 +network 0 +processor 0 +knowledgeand 0 +engin 0 +june 0 +jagadish 0 +lieuwen 0 +sudarshan 0 +dali 0 +memori 0 +internationalconfer 0 +biliri 0 +cost 0 +storageserv 0 +movi 0 +demand 0 +onveri 0 +framework 0 +storageand 0 +retriev 0 +conferenceon 0 +multimedia 0 +read 0 +fussel 0 +multi 0 +resolut 0 +relationaldata 0 +model 0 +august 0 +addit 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..e63c6ef7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,17 @@ +robert 1 +simmon 1 +simmonsquinci 0 +centenni 0 +professor 0 +emeritu 0 +comput 0 +scienc 0 +professoremeritu 0 +psychologymai 0 +novemb 0 +bledso 0 +rememb 0 +back 0 +list 0 +faculti 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..d2882a52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,53 @@ +adam 1 +seligman 1 +home 1 +page 1 +gzip 1 +email 1 +pageadam 0 +click 0 +log 0 +gradual 0 +student 0 +austin 0 +program 0 +aweekli 0 +happi 0 +hour 0 +depart 0 +claim 0 +fame 0 +undergradu 0 +thesi 0 +specifiedth 0 +type 0 +rule 0 +oper 0 +semant 0 +core 0 +avail 0 +fileor 0 +postscript 0 +file 0 +knowwhat 0 +think 0 +touch 0 +utexa 0 +call 0 +pagemart 0 +graphic 0 +phone 0 +number 0 +read 0 +progress 0 +vrml 0 +paper 0 +new 0 +junki 0 +fromreut 0 +yahoo 0 +altern 0 +could 0 +check 0 +nando 0 +time 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..8862712d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,30 @@ +univers 1 +scienc 1 +texa 0 +comput 0 +agapito 0 +austin 0 +sustaita 0 +austincognit 0 +interest 0 +machin 0 +learn 0 +languag 0 +acquisit 0 +chill 0 +specif 0 +connection 0 +commonsens 0 +reasoningschoolingph 0 +hopefulli 0 +colleg 0 +station 0 +california 0 +santa 0 +barbara 0 +miscellaneouspost 0 +addressth 0 +depart 0 +mail 0 +utexa 0 +eduphon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..debb26a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,90 @@ +report 1 +artifici 0 +intellig 0 +faculti 0 +autom 0 +theorem 0 +prove 0 +novak 0 +technic 0 +program 0 +robert 0 +kuiper 0 +reason 0 +miikkulainen 0 +base 0 +moonei 0 +laboratoryut 0 +laboratoryth 0 +laboratori 0 +atth 0 +univers 0 +texa 0 +austinha 0 +distinguish 0 +histori 0 +larg 0 +number 0 +excel 0 +andgradu 0 +student 0 +new 0 +world 0 +rank 0 +nation 0 +close 0 +link 0 +comput 0 +scienc 0 +depart 0 +boyer 0 +causei 0 +logic 0 +philosoph 0 +foundat 0 +benjamin 0 +qualit 0 +vladimir 0 +lifschitz 0 +action 0 +risto 0 +neural 0 +network 0 +mirank 0 +rule 0 +system 0 +machin 0 +learn 0 +gordon 0 +automat 0 +physic 0 +problem 0 +solv 0 +bruce 0 +porter 0 +multi 0 +function 0 +knowledg 0 +emeritu 0 +woodi 0 +bledso 0 +deceas 0 +dream 0 +aaai 0 +presidenti 0 +address 0 +simmon 0 +memoriam 0 +postdoc 0 +peter 0 +clark 0 +souther 0 +softwar 0 +directori 0 +current 0 +avail 0 +porterpoint 0 +lab 0 +fund 0 +agenciescontact 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..baeda15c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,40 @@ +parallel 1 +program 1 +austin 1 +comput 0 +scienc 0 +work 0 +system 0 +offic 0 +ajita 0 +johnajita 0 +john 0 +candid 0 +group 0 +depart 0 +univers 0 +texa 0 +hello 0 +research 0 +automat 0 +programmingframework 0 +base 0 +constraint 0 +compil 0 +parallelprocedur 0 +advisor 0 +professor 0 +brownemi 0 +papersmi 0 +us 0 +translat 0 +routin 0 +code 0 +want 0 +contact 0 +postal 0 +usavoic 0 +main 0 +taylor 0 +ajohn 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..1dd9c317 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,112 @@ +comput 1 +scienc 0 +austin 0 +home 0 +interest 0 +page 0 +almstrum 0 +learn 0 +univers 0 +texa 0 +group 0 +vicki 0 +educ 0 +understand 0 +mathemat 0 +method 0 +teach 0 +uppsala 0 +includ 0 +technolog 0 +special 0 +offic 0 +utexa 0 +utc 0 +almstrumabout 0 +scientist 0 +peopl 0 +particularli 0 +interestedin 0 +logic 0 +formal 0 +doctoralresearch 0 +topic 0 +limit 0 +mathematicallog 0 +novic 0 +student 0 +lectur 0 +addit 0 +ispent 0 +fall 0 +semest 0 +sweden 0 +pagether 0 +link 0 +encourag 0 +other 0 +excel 0 +computersci 0 +garden 0 +travel 0 +craft 0 +sew 0 +woodwork 0 +pictur 0 +hubbi 0 +torgni 0 +stadler 0 +check 0 +site 0 +itics 0 +confer 0 +integr 0 +educationjun 0 +work 0 +june 0 +swedenoth 0 +maintain 0 +class 0 +camp 0 +field 0 +research 0 +evalu 0 +mentor 0 +issu 0 +jump 0 +point 0 +area 0 +suffer 0 +spurt 0 +construct 0 +frenzi 0 +organ 0 +belong 0 +sigcs 0 +educationsigsoft 0 +softwar 0 +engineeringacm 0 +associ 0 +machineryieeeth 0 +institut 0 +electr 0 +electron 0 +engineerscpsrcomput 0 +profession 0 +social 0 +responsibilityconnect 0 +elsewhereto 0 +contact 0 +depart 0 +main 0 +direct 0 +seldom 0 +alwai 0 +connect 0 +need 0 +forewarn 0 +leav 0 +plenti 0 +time 0 +email 0 +address 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..417b1122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,15 @@ +anthoni 1 +hing 0 +pang 0 +offic 0 +hung 0 +home 0 +pagehung 0 +hour 0 +mondai 0 +wednesdai 0 +email 0 +utexa 0 +inform 0 +compil 0 +cours 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..8039c7d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,32 @@ +aruna 1 +comput 1 +austin 0 +depart 0 +fall 0 +homepag 0 +addalacurr 0 +graduat 0 +studentth 0 +univers 0 +texa 0 +scienc 0 +taylor 0 +hall 0 +educ 0 +bachelor 0 +engin 0 +sciencess 0 +colleg 0 +engineeringmysorework 0 +experi 0 +lectur 0 +studi 0 +sciencesunivers 0 +mysoreindiai 0 +come 0 +mysor 0 +cityindiato 0 +contact 0 +email 0 +utexa 0 +eduvoic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..9d2c5834 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,21 @@ +ashi 1 +austin 1 +offic 1 +home 0 +page 0 +tarafdarashi 0 +tarafdarabout 0 +get 0 +round 0 +let 0 +exist 0 +known 0 +patienc 0 +pleas 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..3a80a33f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,64 @@ +scienc 1 +comput 0 +bayardo 0 +austin 0 +univers 0 +texa 0 +work 0 +research 0 +mail 0 +address 0 +engin 0 +roberto 0 +home 0 +pageroberto 0 +candid 0 +expect 0 +complet 0 +date 0 +fall 0 +depart 0 +current 0 +also 0 +within 0 +infosleuth 0 +project 0 +interest 0 +queri 0 +process 0 +activ 0 +expert 0 +databas 0 +system 0 +data 0 +mine 0 +constraint 0 +satisfactionmi 0 +thesi 0 +advisor 0 +prof 0 +daniel 0 +mirank 0 +paper 0 +line 0 +along 0 +toolkit 0 +generatingand 0 +solv 0 +exception 0 +hard 0 +instanc 0 +contact 0 +inform 0 +utexa 0 +campu 0 +dept 0 +taylor 0 +hall 0 +histori 0 +electr 0 +center 0 +coordin 0 +number 0 +sinc 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..98c46a22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,40 @@ +bert 1 +refin 0 +imprecis 0 +model 0 +month 0 +address 0 +austin 0 +kayresearch 0 +reason 0 +physic 0 +process 0 +overviewof 0 +research 0 +vitami 0 +network 0 +retriev 0 +paper 0 +dissert 0 +entitl 0 +behavior 0 +abstract 0 +stuffsonia 0 +andnina 0 +page 0 +drink 0 +ofth 0 +springbank 0 +scotchdrinksof 0 +past 0 +contact 0 +informationemail 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..b05c082b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,39 @@ +bhanu 1 +comput 1 +system 1 +home 0 +page 0 +welcom 0 +homepagethi 0 +akhil 0 +reddythank 0 +visit 0 +homepag 0 +visitor 0 +number 0 +school 0 +univers 0 +texa 0 +austinm 0 +scienc 0 +third 0 +semest 0 +coursesc 0 +multimedia 0 +harrick 0 +vinc 0 +introduct 0 +mathemat 0 +logic 0 +vladimir 0 +lifschitz 0 +datacommun 0 +network 0 +anitish 0 +barua 0 +architectur 0 +schwetmani 0 +term 0 +project 0 +databas 0 +manag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..b185971e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,228 @@ +peopl 1 +program 0 +probabl 0 +comput 0 +scienc 0 +risk 0 +would 0 +adopt 0 +make 0 +decis 0 +analog 0 +neuron 0 +process 0 +number 0 +updat 0 +bogon 0 +cogsci 0 +sometim 0 +feel 0 +austin 0 +work 0 +avers 0 +outcom 0 +coin 0 +toss 0 +reject 0 +well 0 +sound 0 +explain 0 +diseas 0 +problem 0 +save 0 +human 0 +brain 0 +wrong 0 +inform 0 +telephon 0 +bogo 0 +bogu 0 +michael 0 +bogomolnymichael 0 +bogomolni 0 +advert 0 +although 0 +pictur 0 +read 0 +articl 0 +current 0 +first 0 +semest 0 +univers 0 +texa 0 +physic 0 +amherst 0 +colleg 0 +research 0 +interestsnot 0 +intend 0 +jenef 0 +husman 0 +final 0 +project 0 +quarter 0 +accept 0 +fair 0 +bet 0 +econom 0 +theori 0 +involv 0 +maxim 0 +util 0 +diminish 0 +return 0 +howev 0 +ask 0 +question 0 +formul 0 +take 0 +prevent 0 +manner 0 +respond 0 +differ 0 +exampl 0 +belov 0 +tverski 0 +kahneman 0 +taken 0 +almost 0 +verbatimfrom 0 +frame 0 +psycholog 0 +choic 0 +imagin 0 +prepar 0 +outbreak 0 +unusu 0 +expect 0 +kill 0 +altern 0 +combat 0 +beenpropos 0 +assum 0 +exact 0 +scientif 0 +estim 0 +consequ 0 +programsar 0 +follow 0 +besav 0 +nobodi 0 +favor 0 +digitalif 0 +made 0 +fire 0 +depend 0 +level 0 +electrochem 0 +charg 0 +built 0 +axon 0 +biolog 0 +foundat 0 +shaki 0 +ahead 0 +scream 0 +hypothesi 0 +transfer 0 +particular 0 +wire 0 +either 0 +high 0 +voltag 0 +interpret 0 +accur 0 +inaccur 0 +simpli 0 +come 0 +answer 0 +mistak 0 +subtract 0 +balanc 0 +checkbook 0 +rememb 0 +invalid 0 +mayb 0 +gave 0 +wasn 0 +real 0 +anoth 0 +stori 0 +nevertheless 0 +hard 0 +press 0 +point 0 +misfir 0 +account 0 +error 0 +correct 0 +lead 0 +incorrect 0 +result 0 +cognit 0 +even 0 +complet 0 +remind 0 +quot 0 +italic 0 +class 0 +append 0 +introduct 0 +cours 0 +graduat 0 +researchcognit 0 +sciencearitifici 0 +intelligencemathemat 0 +logictopolog 0 +ghrist 0 +oper 0 +system 0 +paper 0 +symbol 0 +differenti 0 +puzzl 0 +theorem 0 +prover 0 +contact 0 +email 0 +utexa 0 +better 0 +send 0 +postcard 0 +phone 0 +postal 0 +address 0 +wilshir 0 +parkwai 0 +informationthi 0 +page 0 +written 0 +us 0 +text 0 +editor 0 +last 0 +insert 0 +empti 0 +promis 0 +construct 0 +soon 0 +suppos 0 +list 0 +hidden 0 +talentsdefinit 0 +quantum 0 +bogodynamicsdefinit 0 +sortwhil 0 +free 0 +look 0 +bogos 0 +bogomet 0 +filter 0 +flux 0 +bogotifi 0 +autobogotiphobia 0 +blinkenlight 0 +lasher 0 +pleas 0 +connect 0 +stupid 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..f0d686d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,186 @@ +scienc 1 +univers 1 +comput 0 +page 0 +boyer 0 +robert 0 +texa 0 +austin 0 +govern 0 +symbol 0 +mathemat 0 +time 0 +articl 0 +licens 0 +taken 0 +oftexa 0 +fundament 0 +home 0 +boyerhom 0 +stephen 0 +professor 0 +philosophydepart 0 +austinhow 0 +reach 0 +mepap 0 +mail 0 +dept 0 +univ 0 +usaemail 0 +utexa 0 +edufax 0 +physic 0 +locationsclassescurriculum 0 +vitaeperson 0 +dataeducationpublicationshonorsjobsgradu 0 +studentsth 0 +moor 0 +prover 0 +also 0 +knowna 0 +nqthm 0 +photo 0 +recommend 0 +read 0 +project 0 +vote 0 +smart 0 +webth 0 +projectmi 0 +view 0 +undergradu 0 +educ 0 +john 0 +mccarthi 0 +pageth 0 +moffett 0 +build 0 +controversyni 0 +mccune 0 +robbin 0 +algebra 0 +result 0 +andsom 0 +technic 0 +detail 0 +verif 0 +float 0 +point 0 +divis 0 +algorithm 0 +microprocessor 0 +wonder 0 +softwar 0 +polici 0 +permitsth 0 +public 0 +close 0 +zero 0 +administrativeoverhead 0 +short 0 +cours 0 +howthi 0 +work 0 +much 0 +intellectu 0 +properti 0 +thegreat 0 +book 0 +variou 0 +enumer 0 +thereof 0 +confess 0 +acanon 0 +thumper 0 +possibl 0 +end 0 +tenur 0 +universitiesstandard 0 +disclaim 0 +natur 0 +noth 0 +shouldb 0 +repres 0 +offici 0 +posit 0 +part 0 +state 0 +furthermor 0 +steal 0 +joke 0 +peter 0 +deutsch 0 +aweb 0 +own 0 +anind 0 +endors 0 +everyth 0 +formal 0 +method 0 +alwai 0 +riski 0 +peano 0 +first 0 +call 0 +logic 0 +introduc 0 +instanc 0 +mean 0 +habitu 0 +wrote 0 +hislectur 0 +note 0 +teach 0 +militaryacademi 0 +student 0 +incens 0 +hisformalist 0 +approach 0 +rebel 0 +despit 0 +hispromis 0 +pass 0 +fire 0 +subsequ 0 +found 0 +amor 0 +congeni 0 +set 0 +turin 0 +sincomplet 0 +theorem 0 +rudi 0 +rucker 0 +death 0 +verg 0 +extinct 0 +said 0 +harold 0 +kroto 0 +britain 0 +sussex 0 +share 0 +chemistrypr 0 +curl 0 +richard 0 +smallei 0 +rice 0 +inhouston 0 +discoveri 0 +carbon 0 +atom 0 +bound 0 +shape 0 +asocc 0 +ball 0 +scientist 0 +lament 0 +loss 0 +fund 0 +associ 0 +press 0 +decemb 0 +daili 0 +texan 0 +upup 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..4e81a20f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,156 @@ +parallel 1 +program 0 +comput 0 +brown 0 +code 0 +environ 0 +languag 0 +visual 0 +high 0 +level 0 +debug 0 +paper 0 +proceed 0 +confer 0 +graphic 0 +hyder 0 +newton 0 +werth 0 +august 0 +process 0 +intern 0 +jame 0 +scienc 0 +professor 0 +physic 0 +engin 0 +texa 0 +fellow 0 +societi 0 +specif 0 +integr 0 +work 0 +gener 0 +structur 0 +model 0 +develop 0 +system 0 +dongarra 0 +moor 0 +ieee 0 +compar 0 +henc 0 +version 0 +base 0 +brownereg 0 +chair 0 +electr 0 +hendrix 0 +collegeph 0 +univers 0 +austinhonor 0 +award 0 +british 0 +american 0 +societyarea 0 +interestparallel 0 +major 0 +focu 0 +sciencewith 0 +applic 0 +area 0 +summari 0 +researchi 0 +tenyear 0 +computation 0 +orient 0 +displai 0 +anabstract 0 +declar 0 +evolv 0 +three 0 +ongo 0 +research 0 +includesmethod 0 +optim 0 +highlevel 0 +abstract 0 +throughdata 0 +partit 0 +data 0 +flow 0 +compositionalapproach 0 +addit 0 +intelligenceprocess 0 +control 0 +fluiddynam 0 +also 0 +design 0 +narrow 0 +domaincompil 0 +includ 0 +logic 0 +basedlanguag 0 +robust 0 +method 0 +intellig 0 +real 0 +timedecis 0 +select 0 +recent 0 +publicationsj 0 +distribut 0 +technolog 0 +spring 0 +volum 0 +number 0 +technic 0 +report 0 +dept 0 +univ 0 +austin 0 +longer 0 +refer 0 +interact 0 +formal 0 +andpract 0 +fourthworkshop 0 +compil 0 +santacruz 0 +california 0 +jain 0 +experiment 0 +studi 0 +theeffect 0 +ofth 0 +siam 0 +mirank 0 +parallelizingcompil 0 +rule 0 +unifi 0 +concurr 0 +kleyn 0 +specifi 0 +graph 0 +softwar 0 +baltimor 0 +april 0 +postscript 0 +file 0 +extend 0 +proc 0 +conf 0 +supercomput 0 +juli 0 +describ 0 +prototyp 0 +implement 0 +notat 0 +chang 0 +idea 0 +remain 0 +good 0 +broad 0 +introduct 0 +brief 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..548c7813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,70 @@ +group 1 +vlsi 0 +interest 0 +austin 0 +ping 0 +depart 0 +thakur 0 +area 0 +fpga 0 +architectur 0 +link 0 +inform 0 +comment 0 +utc 0 +home 0 +page 0 +research 0 +addressdepart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +peopl 0 +supervis 0 +prof 0 +martin 0 +wong 0 +member 0 +chang 0 +chung 0 +chenyao 0 +chen 0 +yung 0 +ming 0 +fang 0 +shashidhar 0 +zhou 0 +researchth 0 +current 0 +wide 0 +rang 0 +broadli 0 +classifi 0 +follow 0 +placement 0 +rout 0 +partit 0 +logic 0 +synthesi 0 +issu 0 +high 0 +perform 0 +abstract 0 +recent 0 +public 0 +groupcan 0 +found 0 +trace 0 +sigda 0 +special 0 +design 0 +autom 0 +ieee 0 +institut 0 +electr 0 +electron 0 +engin 0 +austinclick 0 +mail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..844cbcba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,93 @@ +work 1 +distribut 0 +project 0 +beer 0 +humor 0 +utexa 0 +home 0 +page 0 +bill 0 +canfieldhom 0 +businessmi 0 +resum 0 +postscript 0 +spring 0 +give 0 +report 0 +softwar 0 +highli 0 +avail 0 +system 0 +class 0 +slide 0 +talk 0 +effort 0 +mach 0 +implement 0 +flaviu 0 +cristian 0 +algorithm 0 +done 0 +prof 0 +built 0 +guangtian 0 +current 0 +hardwar 0 +verif 0 +ther 0 +divis 0 +ti 0 +research 0 +professor 0 +allen 0 +emerson 0 +pleasuredomest 0 +bliss 0 +depart 0 +photo 0 +wife 0 +carla 0 +newborn 0 +daughter 0 +ruth 0 +clair 0 +parenthood 0 +struck 0 +travel 0 +high 0 +prioriti 0 +somewher 0 +li 0 +enjoy 0 +peel 0 +label 0 +bottl 0 +foreign 0 +land 0 +variou 0 +sourcesth 0 +sofaspher 0 +haiku 0 +olestra 0 +approv 0 +substitut 0 +speak 0 +poetri 0 +interest 0 +women 0 +disinform 0 +dole 0 +canfield 0 +last 0 +updat 0 +april 0 +thank 0 +todd 0 +peter 0 +peterst 0 +mail 0 +mani 0 +link 0 +andth 0 +pictur 0 +cool 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..8c87a753 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,61 @@ +carruth 1 +austin 0 +uniti 0 +mail 0 +utexa 0 +depart 0 +professor 0 +misra 0 +time 0 +home 0 +page 0 +offic 0 +address 0 +phone 0 +carruthpleas 0 +send 0 +question 0 +suggest 0 +introduct 0 +candid 0 +computersci 0 +supervis 0 +jayadev 0 +mydissert 0 +topic 0 +real 0 +member 0 +research 0 +group 0 +extend 0 +theori 0 +order 0 +express 0 +finit 0 +boundson 0 +usual 0 +oper 0 +progress 0 +safeti 0 +alsointerest 0 +function 0 +program 0 +languag 0 +partial 0 +ordersemant 0 +autom 0 +theorem 0 +prove 0 +contact 0 +inform 0 +person 0 +email 0 +comput 0 +scienc 0 +taylor 0 +hall 0 +univers 0 +texa 0 +link 0 +world 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..43dcc6d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,43 @@ +ping 1 +chung 0 +chen 0 +schedul 0 +utexa 0 +student 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +austin 0 +fiance 0 +meng 0 +tsai 0 +current 0 +intel 0 +summer 0 +intern 0 +work 0 +bufferinsert 0 +problem 0 +syllabu 0 +grade 0 +polici 0 +exam 0 +homework 0 +exercis 0 +offic 0 +hour 0 +locat 0 +new 0 +class 0 +fall 0 +syllabustopicschung 0 +clen 0 +last 0 +updat 0 +idea 0 +improv 0 +page 0 +send 0 +suggest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..594c44de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,36 @@ +austin 1 +home 0 +deji 0 +anoth 0 +paragraph 0 +offic 0 +page 0 +chen 0 +chenabout 0 +mehello 0 +homepag 0 +student 0 +tongji 0 +univers 0 +shanghai 0 +chinaa 0 +bullet 0 +list 0 +easi 0 +includ 0 +well 0 +first 0 +item 0 +third 0 +forget 0 +break 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usahom 0 +lake 0 +blvd 0 +usaphon 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..4a2a258a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,139 @@ +cliff 1 +comput 0 +univers 0 +austin 0 +write 0 +system 0 +scienc 0 +educ 0 +macintosh 0 +program 0 +symbol 0 +mstk 0 +plai 0 +chaputcliff 0 +chaputth 0 +texa 0 +austindepart 0 +sciencestaylor 0 +hall 0 +robotlab 0 +home 0 +dullchaput 0 +utexa 0 +studi 0 +northwestern 0 +gothimself 0 +emploi 0 +programm 0 +anywai 0 +spent 0 +year 0 +anemail 0 +client 0 +portabl 0 +visual 0 +object 0 +librari 0 +odesta 0 +corpor 0 +left 0 +institut 0 +thelearn 0 +hewrot 0 +trane 0 +softwar 0 +common 0 +lisp 0 +thenimpl 0 +simul 0 +environ 0 +high 0 +school 0 +studentscal 0 +gamesproject 0 +graduat 0 +student 0 +hang 0 +robot 0 +labannoi 0 +peopl 0 +hair 0 +brain 0 +scheme 0 +mean 0 +represent 0 +artifici 0 +life 0 +sleep 0 +dream 0 +read 0 +fiction 0 +listen 0 +farka 0 +tour 0 +medeski 0 +martin 0 +wood 0 +watch 0 +rerun 0 +korg 0 +ride 0 +bike 0 +turnon 0 +includ 0 +breakfast 0 +version 0 +raspi 0 +voic 0 +starfleet 0 +captain 0 +turnoff 0 +republican 0 +microsoft 0 +hangov 0 +fave 0 +site 0 +current 0 +eventsdaili 0 +new 0 +reutersintellicast 0 +weatheraustin 0 +txchicago 0 +ilperiodicalssucksalonmirski 0 +worst 0 +webth 0 +onionmacweekmacuserreferencehypertext 0 +webster 0 +interfaceyahooalta 0 +vistacardiff 0 +movi 0 +databaselyco 0 +road 0 +mapalt 0 +culturemacintosh 0 +dataappl 0 +computercyberdogquicktimequickdraw 0 +dappl 0 +supportmacintouchmacintosh 0 +resourcecyberdog 0 +poundinfo 0 +archiv 0 +rootcool 0 +weird 0 +stufffringewareth 0 +actlabpbsnprnow 0 +catch 0 +phrase 0 +catalogpap 0 +softwareth 0 +rsumsymbol 0 +emerg 0 +groundingrobotmap 0 +peopledav 0 +falooncharl 0 +lewisjeff 0 +lindjeff 0 +sherwoodbrian 0 +slatorsandi 0 +stone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..380de483 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,104 @@ +page 1 +chuanjun 0 +wang 0 +place 0 +china 0 +check 0 +texa 0 +austin 0 +view 0 +like 0 +find 0 +search 0 +diamond 0 +homepag 0 +welcom 0 +pictur 0 +captur 0 +gloriou 0 +moment 0 +came 0 +earth 0 +stun 0 +detail 0 +origin 0 +come 0 +orient 0 +countri 0 +call 0 +know 0 +hometown 0 +hubei 0 +provinc 0 +graduat 0 +student 0 +tsinghua 0 +univ 0 +decid 0 +time 0 +chang 0 +better 0 +thought 0 +end 0 +beautifulunivers 0 +current 0 +work 0 +comput 0 +scienc 0 +take 0 +break 0 +read 0 +enjoi 0 +tower 0 +opinion 0 +nifti 0 +thing 0 +televis 0 +surf 0 +mind 0 +numb 0 +faceless 0 +howev 0 +brilliant 0 +us 0 +materi 0 +inform 0 +miner 0 +among 0 +million 0 +rock 0 +unemploi 0 +internet 0 +philosoph 0 +well 0 +person 0 +look 0 +real 0 +unix 0 +program 0 +magazin 0 +jump 0 +dobb 0 +journal 0 +word 0 +need 0 +fresh 0 +world 0 +hard 0 +fine 0 +graphic 0 +design 0 +unusu 0 +prose 0 +cours 0 +list 0 +would 0 +complet 0 +without 0 +link 0 +pope 0 +porsch 0 +write 0 +return 0 +depart 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..2e764816 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,103 @@ +comput 1 +scienc 1 +inform 1 +taiwan 0 +univers 0 +texa 0 +austin 0 +research 0 +process 0 +system 0 +tag 0 +user 0 +welcom 0 +homepag 0 +chin 0 +tser 0 +huang 0 +last 0 +updat 0 +decemb 0 +educ 0 +june 0 +degre 0 +dept 0 +engin 0 +nation 0 +taipei 0 +current 0 +master 0 +student 0 +depart 0 +interest 0 +natur 0 +languag 0 +human 0 +interfac 0 +network 0 +distribut 0 +systemsexperiencei 0 +ever 0 +work 0 +chines 0 +knowledg 0 +group 0 +instituteof 0 +academia 0 +sinica 0 +assist 0 +major 0 +worki 0 +design 0 +capabl 0 +word 0 +segment 0 +categori 0 +usinghidden 0 +markov 0 +model 0 +improv 0 +friendli 0 +tool 0 +allow 0 +toexecut 0 +line 0 +proof 0 +read 0 +result 0 +automat 0 +automatictag 0 +reach 0 +accuraci 0 +improvedbecaus 0 +continu 0 +expans 0 +train 0 +data 0 +person 0 +interestsmovi 0 +book 0 +music 0 +literatur 0 +semiolog 0 +basebal 0 +basketbal 0 +tabl 0 +tenni 0 +pinbal 0 +favorit 0 +siteschina 0 +timesminsheng 0 +dailyth 0 +york 0 +timesusa 0 +todayth 0 +economistth 0 +atlant 0 +monthlymak 0 +contact 0 +chuang 0 +utexa 0 +edufing 0 +meyou 0 +visitor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..8ccdfb9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,23 @@ +page 1 +cilkcilkcilk 0 +pronounc 0 +silk 0 +parallel 0 +multithread 0 +base 0 +languageand 0 +runtim 0 +system 0 +find 0 +time 0 +us 0 +inform 0 +inthi 0 +check 0 +thecilk 0 +last 0 +modifi 0 +august 0 +robert 0 +blumoferdb 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..8eb93622 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,33 @@ +chung 1 +keung 1 +comput 1 +poon 0 +home 0 +page 0 +poondepart 0 +sciencesunivers 0 +texa 0 +austinaustin 0 +offic 0 +ckpoon 0 +utexa 0 +edumi 0 +plan 0 +hungri 0 +fish 0 +askvinc 0 +gogan 0 +pleas 0 +thesi 0 +complex 0 +connect 0 +problemsom 0 +interest 0 +site 0 +theoret 0 +scienc 0 +hong 0 +kong 0 +harmonica 0 +high 0 +school 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..2db33c49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,62 @@ +page 1 +comput 1 +austin 1 +secur 1 +home 0 +project 0 +student 0 +scienc 0 +research 0 +link 0 +utexa 0 +offic 0 +chung 0 +wongchung 0 +wonglast 0 +modifi 0 +graduat 0 +thedepart 0 +univers 0 +texa 0 +member 0 +thenetwork 0 +labwhich 0 +head 0 +byprof 0 +simon 0 +relat 0 +java 0 +nist 0 +divis 0 +resourc 0 +clearinghous 0 +role 0 +base 0 +access 0 +control 0 +rbac 0 +prof 0 +rivest 0 +cryptographi 0 +contact 0 +meemail 0 +ckwong 0 +edupost 0 +usavoic 0 +dept 0 +hyde 0 +park 0 +baptist 0 +church 0 +chines 0 +mission 0 +hong 0 +kong 0 +associ 0 +linux 0 +netbsd 0 +freebsd 0 +openbsd 0 +send 0 +email 0 +tockwong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..4ad94e67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,75 @@ +simul 1 +techniqu 0 +address 0 +clanci 0 +qualit 0 +knowledg 0 +comput 0 +behavior 0 +system 0 +larg 0 +abstract 0 +aggreg 0 +interest 0 +develop 0 +automat 0 +austin 0 +clancyresearch 0 +reason 0 +us 0 +incomplet 0 +descriptionof 0 +possibl 0 +dynam 0 +complex 0 +containinga 0 +number 0 +variabl 0 +constraint 0 +frequentlyi 0 +intract 0 +result 0 +incomprehens 0 +descript 0 +requir 0 +simulationto 0 +elimin 0 +irrelev 0 +detail 0 +focu 0 +distinctionsof 0 +whichaddress 0 +problem 0 +particular 0 +abstractiontechniqu 0 +appli 0 +thiswil 0 +facilit 0 +integr 0 +withlarg 0 +scale 0 +base 0 +model 0 +build 0 +followingtechniqu 0 +issu 0 +vita 0 +list 0 +network 0 +retriev 0 +real 0 +paper 0 +contact 0 +informationemail 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +depart 0 +scienc 0 +univers 0 +texa 0 +finger 0 +inform 0 +hotlist 0 +netscap 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..55c659f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,11 @@ +page 1 +construct 1 +jimbo 1 +click 1 +three 1 +four 1 +five 1 +seven 1 +eight 1 +nine 1 +eleven 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..0c429e20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,38 @@ +austin 1 +chri 0 +home 0 +address 0 +depart 0 +comput 0 +scienc 0 +chines 0 +chuwelcom 0 +page 0 +myselfmi 0 +photo 0 +student 0 +phone 0 +number 0 +call 0 +offic 0 +mail 0 +univers 0 +texa 0 +taylor 0 +hall 0 +interest 0 +link 0 +campu 0 +christian 0 +fellowship 0 +church 0 +hong 0 +kong 0 +china 0 +author 0 +chuemail 0 +cnchu 0 +utexa 0 +edulast 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..bae1819f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,179 @@ +code 1 +program 0 +parallel 0 +mail 0 +user 0 +version 0 +system 0 +list 0 +emeri 0 +visual 0 +click 0 +tutori 0 +releas 0 +avail 0 +softwar 0 +interfac 0 +line 0 +document 0 +manual 0 +member 0 +scienc 0 +produc 0 +architectur 0 +model 0 +screen 0 +shot 0 +base 0 +announc 0 +download 0 +featur 0 +provid 0 +previou 0 +includ 0 +articl 0 +comput 0 +us 0 +name 0 +last 0 +address 0 +relat 0 +postscript 0 +public 0 +contact 0 +send 0 +utexa 0 +austin 0 +research 0 +john 0 +systemmast 0 +lawless 0 +codeless 0 +myriad 0 +preced 0 +wilder 0 +singl 0 +instanc 0 +alfr 0 +lord 0 +tennysoncod 0 +allow 0 +compos 0 +sequentialprogram 0 +direct 0 +graph 0 +wheredata 0 +flow 0 +arc 0 +connect 0 +node 0 +repres 0 +sequenti 0 +thesequenti 0 +written 0 +languag 0 +parallelprogram 0 +varieti 0 +independ 0 +network 0 +machin 0 +well 0 +sequent 0 +symmetri 0 +newest 0 +support 0 +crai 0 +smp 0 +free 0 +major 0 +revis 0 +sophist 0 +mani 0 +improv 0 +make 0 +easier 0 +pleasant 0 +like 0 +macdraw 0 +multipl 0 +window 0 +subgraph 0 +edit 0 +hierarchi 0 +browser 0 +hpcwire 0 +journal 0 +high 0 +perform 0 +recent 0 +publish 0 +entitl 0 +come 0 +kind 0 +enough 0 +reproduc 0 +introduct 0 +current 0 +prospect 0 +notifi 0 +backend 0 +join 0 +fill 0 +form 0 +also 0 +ad 0 +first 0 +xcodelib 0 +compon 0 +librari 0 +publicationscod 0 +construct 0 +directori 0 +compress 0 +file 0 +made 0 +lieu 0 +prepar 0 +stage 0 +despit 0 +chang 0 +still 0 +quit 0 +refer 0 +link 0 +informationfor 0 +specif 0 +comment 0 +regard 0 +berger 0 +snail 0 +group 0 +depart 0 +univers 0 +texa 0 +groupgroup 0 +leaderprofessor 0 +jame 0 +brown 0 +affili 0 +faculti 0 +werth 0 +project 0 +manag 0 +bergerstud 0 +dwip 0 +banerje 0 +incorpor 0 +dynam 0 +data 0 +partit 0 +ajita 0 +develop 0 +constraint 0 +automat 0 +alumni 0 +overview 0 +home 0 +page 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..5761d6b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,35 @@ +search 1 +austin 0 +inform 0 +address 0 +mail 0 +correl 0 +utexa 0 +texa 0 +home 0 +steve 0 +correlstev 0 +correlresearchph 0 +student 0 +work 0 +multifunct 0 +knowledg 0 +base 0 +group 0 +addit 0 +current 0 +construct 0 +hotlist 0 +site 0 +page 0 +email 0 +public 0 +tech 0 +reportcontact 0 +offic 0 +comput 0 +scienc 0 +depart 0 +univers 0 +taylor 0 +hall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..53995a77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,75 @@ +time 1 +system 1 +real 0 +program 0 +languag 0 +group 0 +robot 0 +esterel 0 +check 0 +home 0 +page 0 +driver 0 +offic 0 +carlo 0 +pucholcarlo 0 +pucholresearch 0 +interest 0 +respons 0 +reactiv 0 +gener 0 +formal 0 +method 0 +specif 0 +implement 0 +distribut 0 +control 0 +synchron 0 +mawl 0 +applic 0 +develop 0 +utc 0 +publicationsi 0 +list 0 +public 0 +avail 0 +forbrows 0 +softwareth 0 +tempest 0 +toolset 0 +packag 0 +verifyingsafeti 0 +properti 0 +written 0 +wrote 0 +half 0 +linux 0 +devic 0 +thequantavisionfram 0 +grabber 0 +part 0 +thejoystickdevic 0 +contact 0 +informationoffic 0 +dreal 0 +taylor 0 +hall 0 +univers 0 +texa 0 +austindepart 0 +comput 0 +sciencesaustin 0 +utexa 0 +austin 0 +lot 0 +phun 0 +interestsmemb 0 +theth 0 +latest 0 +interesti 0 +origin 0 +fromgandia 0 +inth 0 +provinc 0 +valencia 0 +spain 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..0e45adc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,79 @@ +time 1 +real 0 +modechart 0 +system 0 +properti 0 +wang 0 +research 0 +group 0 +work 0 +foundat 0 +utc 0 +groupth 0 +head 0 +byprof 0 +aloysiu 0 +past 0 +year 0 +toward 0 +lai 0 +groundworkfor 0 +establish 0 +firm 0 +theoret 0 +systemsand 0 +also 0 +build 0 +design 0 +tool 0 +base 0 +canb 0 +categor 0 +three 0 +area 0 +follow 0 +specif 0 +model 0 +precis 0 +formul 0 +analysi 0 +verif 0 +reason 0 +synthesi 0 +enforc 0 +stringent 0 +constraint 0 +project 0 +logic 0 +toolset 0 +editor 0 +verifi 0 +simul 0 +compil 0 +timetool 0 +scenario 0 +languagepublicationsabstract 0 +ofth 0 +paper 0 +availableonlin 0 +postscript 0 +current 0 +member 0 +deji 0 +chen 0 +carlo 0 +puchol 0 +doug 0 +stuart 0 +chung 0 +tsou 0 +guangtian 0 +yangalumni 0 +paul 0 +clement 0 +chih 0 +farn 0 +supoj 0 +suthandavibul 0 +farnam 0 +jahanian 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..a9f07219 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,118 @@ +comput 1 +page 0 +scienc 0 +learn 0 +research 0 +intellig 0 +texa 0 +artifici 0 +system 0 +inform 0 +univers 0 +austin 0 +resourc 0 +home 0 +women 0 +interest 0 +machin 0 +acquisit 0 +lexic 0 +also 0 +robot 0 +address 0 +internet 0 +collect 0 +associ 0 +cindi 0 +thompsoncindi 0 +thompsonmachin 0 +groupunivers 0 +austini 0 +particip 0 +candlelight 0 +vigil 0 +across 0 +internetto 0 +help 0 +increas 0 +awar 0 +violenc 0 +researchmi 0 +current 0 +primarilyin 0 +area 0 +specif 0 +interestedin 0 +natur 0 +languag 0 +produc 0 +deep 0 +semanticrepresent 0 +input 0 +sentenc 0 +would 0 +us 0 +mani 0 +task 0 +propos 0 +corpu 0 +base 0 +wrote 0 +master 0 +thesi 0 +rule 0 +suitabl 0 +diagnost 0 +expert 0 +mobil 0 +exhibit 0 +atrobofest 0 +spring 0 +semest 0 +build 0 +agent 0 +finger 0 +pictur 0 +vita 0 +list 0 +public 0 +group 0 +educ 0 +north 0 +carolina 0 +state 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +cthomp 0 +utexa 0 +postal 0 +depart 0 +hotlist 0 +start 0 +point 0 +explor 0 +misc 0 +consortium 0 +bibliographi 0 +project 0 +repositori 0 +knowledg 0 +laboratori 0 +georgia 0 +tech 0 +journal 0 +linguist 0 +folk 0 +cognit 0 +miscellan 0 +stuff 0 +wolv 0 +truth 0 +evalu 0 +counsel 0 +expand 0 +horizon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..dc9b9a2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,18 @@ +home 1 +page 1 +xingshan 1 +welcom 0 +browser 0 +doesn 0 +seem 0 +support 0 +frame 0 +want 0 +downloadth 0 +latest 0 +netscap 0 +school 0 +work 0 +famili 0 +friend 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..17627b44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,71 @@ +comput 1 +scienc 0 +univers 0 +mike 0 +dahlin 0 +texa 0 +berkelei 0 +oper 0 +trend 0 +price 0 +page 0 +taylor 0 +hall 0 +dahlingener 0 +informationassist 0 +professor 0 +austin 0 +electr 0 +engin 0 +rice 0 +teachingfal 0 +systemsspr 0 +advanc 0 +architectureeveryon 0 +read 0 +technic 0 +classic 0 +researchxf 0 +serverless 0 +network 0 +file 0 +systemweb 0 +systemsth 0 +experiment 0 +softwar 0 +system 0 +less 0 +public 0 +list 0 +informationtechnolog 0 +pagethi 0 +pagesummar 0 +recent 0 +technolog 0 +interest 0 +operatingsystem 0 +research 0 +compter 0 +architect 0 +includinghistor 0 +data 0 +gather 0 +capac 0 +disk 0 +memori 0 +person 0 +informationif 0 +seem 0 +bore 0 +probabl 0 +want 0 +work 0 +internet 0 +root 0 +link 0 +world 0 +email 0 +utexa 0 +offic 0 +postal 0 +austinaustin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..641536c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,50 @@ +damani 1 +distribut 1 +home 0 +page 0 +howdi 0 +pagal 0 +dekho 0 +student 0 +busi 0 +read 0 +lazi 0 +updateth 0 +homepag 0 +regularli 0 +suffic 0 +anyth 0 +crazi 0 +appeal 0 +phrase 0 +us 0 +probabl 0 +meant 0 +research 0 +activ 0 +work 0 +parallel 0 +sytem 0 +laboratori 0 +vijai 0 +garg 0 +interest 0 +system 0 +network 0 +public 0 +follow 0 +time 0 +honor 0 +tradit 0 +feel 0 +oblig 0 +providesometh 0 +servic 0 +contact 0 +mehom 0 +guadulp 0 +austin 0 +offic 0 +austinphon 0 +dept 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..3bc635a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,55 @@ +austin 1 +texa 0 +research 0 +address 0 +dane 0 +depart 0 +comput 0 +scienc 0 +univers 0 +system 0 +pinbal 0 +view 0 +marshalldan 0 +marshal 0 +student 0 +multiresolut 0 +render 0 +autom 0 +model 0 +tree 0 +real 0 +time 0 +global 0 +illumin 0 +electromechan 0 +machin 0 +mainten 0 +thelogist 0 +equat 0 +escap 0 +attractor 0 +complex 0 +plane 0 +main 0 +area 0 +make 0 +nice 0 +imag 0 +contact 0 +inform 0 +work 0 +appli 0 +laboratori 0 +burnet 0 +phone 0 +email 0 +utexa 0 +school 0 +unrel 0 +link 0 +pastur 0 +jupit 0 +probe 0 +happi 0 +station 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..d3ed0957 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,67 @@ +scienc 1 +stuart 0 +page 0 +sure 0 +well 0 +us 0 +link 0 +provid 0 +save 0 +doug 0 +welcom 0 +pagedoug 0 +home 0 +pagewelcom 0 +construct 0 +bear 0 +number 0 +oflinksto 0 +interest 0 +place 0 +inform 0 +aboutsport 0 +fiction 0 +booksin 0 +gener 0 +fewjok 0 +testof 0 +latexhtml 0 +aweath 0 +mapandcondit 0 +austinandnew 0 +orlean 0 +guess 0 +sort 0 +person 0 +archiv 0 +amgraci 0 +share 0 +perhap 0 +process 0 +index 0 +puttingit 0 +simpl 0 +keep 0 +webbrows 0 +databas 0 +browser 0 +know 0 +thisi 0 +good 0 +idea 0 +go 0 +someth 0 +justa 0 +easi 0 +access 0 +manner 0 +stuffmom 0 +click 0 +comput 0 +calendarlink 0 +video 0 +fictionbooksjokessportsfoodvideout 0 +libraryresumelast 0 +updat 0 +dasdastuart 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..9ee67009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,4 @@ +doug 1 +swhich 1 +annoi 1 +thisorthi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..6cc40bea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,57 @@ +austin 1 +comput 0 +scienc 0 +univers 0 +texa 0 +home 0 +page 0 +neural 0 +network 0 +genet 0 +algorithm 0 +depart 0 +research 0 +universityof 0 +address 0 +link 0 +utc 0 +dian 0 +lawdian 0 +student 0 +intereststh 0 +symbol 0 +ground 0 +problemnavig 0 +robot 0 +agent 0 +us 0 +evolv 0 +theus 0 +educ 0 +spanish 0 +literatur 0 +washingtonst 0 +fine 0 +art 0 +washington 0 +stateunivers 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +dianelaw 0 +utexa 0 +postal 0 +local 0 +homepag 0 +gann 0 +illig 0 +santa 0 +institut 0 +digest 0 +archiv 0 +michigan 0 +group 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..6c7c056b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,47 @@ +comput 1 +student 0 +depart 0 +dionisi 0 +scienc 0 +engin 0 +univers 0 +patra 0 +greec 0 +austin 0 +home 0 +page 0 +papadopoulosdionisi 0 +papadopoulosabout 0 +graduat 0 +undergradu 0 +informat 0 +also 0 +work 0 +technolog 0 +institut 0 +member 0 +softwar 0 +applic 0 +reasearch 0 +unit 0 +contact 0 +medionisi 0 +papadopoulo 0 +texa 0 +mail 0 +utexa 0 +link 0 +mine 0 +monitor 0 +databas 0 +homework 0 +panhellen 0 +associationpanathinaiko 0 +athlet 0 +clubgreek 0 +newshellen 0 +resourc 0 +networkeveryth 0 +alwai 0 +want 0 +know 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..9386c6ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,119 @@ +random 1 +version 0 +stoc 0 +preliminari 0 +comput 0 +appear 0 +texa 0 +austin 0 +scienc 0 +univers 0 +inform 0 +address 0 +complex 0 +expand 0 +applic 0 +graph 0 +report 0 +public 0 +construct 0 +structur 0 +algorithm 0 +simul 0 +weak 0 +sourc 0 +foc 0 +revis 0 +bound 0 +combinatorica 0 +sicomp 0 +complet 0 +david 0 +zuckermandavid 0 +zuckermanassist 0 +professor 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +finger 0 +cours 0 +fall 0 +cryptographyresearch 0 +intereststh 0 +role 0 +theori 0 +walk 0 +cryptographi 0 +paragraph 0 +descript 0 +well 0 +myprofil 0 +annual 0 +also 0 +look 0 +recent 0 +asymptot 0 +good 0 +code 0 +correct 0 +insert 0 +delet 0 +transposit 0 +soda 0 +optim 0 +sampl 0 +extractor 0 +leader 0 +elect 0 +multipl 0 +cover 0 +time 0 +linear 0 +space 0 +jcss 0 +call 0 +determinist 0 +logspac 0 +us 0 +gener 0 +algorithmica 0 +tight 0 +analys 0 +local 0 +load 0 +balanc 0 +derandom 0 +product 0 +beat 0 +eigenvalu 0 +explicit 0 +constructionand 0 +utc 0 +technic 0 +effici 0 +small 0 +hit 0 +setfor 0 +combinatori 0 +rectangl 0 +high 0 +dimens 0 +lower 0 +mutual 0 +exclus 0 +unapproxim 0 +problem 0 +list 0 +abstract 0 +visit 0 +page 0 +sinc 0 +april 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..aa38ef06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,70 @@ +multimedia 1 +laboratori 0 +research 0 +distribut 0 +comput 0 +univers 0 +texa 0 +austin 0 +multimediacomput 0 +scienc 0 +sponsor 0 +foundat 0 +paper 0 +home 0 +page 0 +welcom 0 +main 0 +object 0 +investig 0 +wide 0 +rangeof 0 +issu 0 +area 0 +system 0 +currentresearch 0 +focus 0 +design 0 +storag 0 +server 0 +network 0 +transport 0 +protocol 0 +digit 0 +audio 0 +video 0 +andmultiresolut 0 +databas 0 +dmcl 0 +part 0 +departmentof 0 +work 0 +carri 0 +variou 0 +industri 0 +federalinstitut 0 +includ 0 +intel 0 +nation 0 +nasa 0 +microsoft 0 +mitsubishi 0 +electr 0 +merl 0 +microsystemsinc 0 +tabl 0 +content 0 +agenda 0 +relev 0 +technic 0 +report 0 +list 0 +member 0 +call 0 +would 0 +like 0 +hear 0 +send 0 +yourcom 0 +suggest 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..5ccedd97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,85 @@ +domain 1 +softwar 1 +research 1 +gener 0 +extens 0 +languag 0 +program 0 +student 0 +specif 0 +design 0 +current 0 +build 0 +jakarta 0 +project 0 +univers 0 +texa 0 +offic 0 +address 0 +austin 0 +batorydon 0 +batorysoftwar 0 +improv 0 +programm 0 +product 0 +reduc 0 +mainten 0 +cost 0 +enhanc 0 +applic 0 +perform 0 +investig 0 +wai 0 +realiz 0 +practic 0 +compon 0 +base 0 +methodolog 0 +technolog 0 +larg 0 +scale 0 +system 0 +synthesi 0 +span 0 +topic 0 +architectur 0 +pattern 0 +subject 0 +model 0 +parameter 0 +object 0 +orient 0 +framework 0 +interest 0 +databas 0 +manag 0 +data 0 +structur 0 +avion 0 +support 0 +goal 0 +preprocessor 0 +java 0 +would 0 +encapsul 0 +pluggabl 0 +fund 0 +darpa 0 +microsoft 0 +appli 0 +laboratori 0 +schlumberg 0 +public 0 +contact 0 +inform 0 +taylor 0 +hall 0 +email 0 +batori 0 +utexa 0 +phone 0 +number 0 +postal 0 +depart 0 +comput 0 +scienc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..0543b188 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,41 @@ +dwip 1 +austin 1 +info 0 +parallel 0 +usavoic 0 +offic 0 +home 0 +page 0 +banerje 0 +photograph 0 +banerjeeabout 0 +methi 0 +work 0 +code 0 +programminggroup 0 +methodolog 0 +includ 0 +data 0 +partit 0 +graphicalparallel 0 +program 0 +system 0 +paper 0 +present 0 +theintern 0 +process 0 +symposium 0 +list 0 +favorit 0 +site 0 +insert 0 +know 0 +contact 0 +departmentpost 0 +comput 0 +scienc 0 +main 0 +homepost 0 +enfield 0 +road 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..dc9f6cd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,50 @@ +anoth 1 +emilio 0 +camahort 0 +gurrea 0 +promis 0 +home 0 +page 0 +paragraph 0 +austin 0 +offic 0 +set 0 +decent 0 +summer 0 +mmmmm 0 +multipl 0 +complaint 0 +meet 0 +previou 0 +deadlin 0 +come 0 +excus 0 +know 0 +siggraph 0 +paper 0 +finish 0 +januari 0 +thing 0 +think 0 +bout 0 +right 0 +make 0 +time 0 +els 0 +lose 0 +credibl 0 +left 0 +first 0 +item 0 +third 0 +forget 0 +break 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +taylor 0 +usavoic 0 +main 0 +ecamahor 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..7b50ab92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,32 @@ +posnak 1 +austin 0 +system 0 +research 0 +view 0 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +texa 0 +interest 0 +network 0 +oper 0 +support 0 +multimedia 0 +work 0 +distribut 0 +multimediacomput 0 +laboratori 0 +head 0 +harrick 0 +supervis 0 +greg 0 +lavend 0 +isod 0 +consortium 0 +base 0 +summari 0 +public 0 +utexa 0 +eduphon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..7150affc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,73 @@ +emerson 1 +comput 0 +tempor 0 +scienc 0 +logic 0 +real 0 +time 0 +allen 0 +texa 0 +austin 0 +automata 0 +srinivasan 0 +journal 0 +calculu 0 +emersonbruton 0 +centenni 0 +professor 0 +depart 0 +taylor 0 +hall 0 +univers 0 +mail 0 +utexa 0 +phone 0 +direct 0 +secretari 0 +area 0 +research 0 +interest 0 +formal 0 +method 0 +aid 0 +verif 0 +infinit 0 +object 0 +concurr 0 +distribut 0 +systemsselect 0 +recent 0 +publications 0 +sistla 0 +quantit 0 +reason 0 +system 0 +sadler 0 +effici 0 +satisfi 0 +theori 0 +practic 0 +bakker 0 +york 0 +springer 0 +verlag 0 +lectur 0 +note 0 +jutla 0 +tree 0 +determinaci 0 +annual 0 +ieee 0 +symposium 0 +foundat 0 +foc 0 +juan 0 +modal 0 +handbook 0 +theoret 0 +leeuwen 0 +elsevi 0 +press 0 +amsterdam 0 +cambridg 0 +mass 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..ced569de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,103 @@ +program 1 +code 0 +emeri 0 +work 0 +parallel 0 +berger 0 +home 0 +mail 0 +austin 0 +utexa 0 +system 0 +group 0 +page 0 +function 0 +info 0 +comput 0 +scienc 0 +analyst 0 +groupi 0 +visual 0 +inform 0 +ticam 0 +composit 0 +materi 0 +haskel 0 +doug 0 +evangelist 0 +pageemeri 0 +person 0 +contact 0 +address 0 +dept 0 +taylor 0 +hall 0 +univers 0 +texa 0 +phone 0 +research 0 +also 0 +affili 0 +project 0 +click 0 +name 0 +tool 0 +search 0 +lyco 0 +databas 0 +file 0 +randomli 0 +select 0 +mirror 0 +near 0 +view 0 +unix 0 +user 0 +academ 0 +uttr 0 +abstract 0 +languag 0 +add 0 +object 0 +orient 0 +us 0 +concept 0 +known 0 +type 0 +class 0 +pure 0 +framework 0 +paper 0 +describ 0 +extens 0 +analyz 0 +accomplish 0 +well 0 +problem 0 +compress 0 +postscript 0 +html 0 +othermi 0 +youngest 0 +brother 0 +aspir 0 +artist 0 +graphic 0 +handiwork 0 +linksth 0 +systemtexbook 0 +textbook 0 +exchangegrac 0 +graduat 0 +repres 0 +associ 0 +last 0 +updat 0 +octob 0 +believ 0 +macintosh 0 +check 0 +http 0 +macaddict 0 +join 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..5142390f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,108 @@ +emma 1 +univers 1 +austin 1 +page 0 +comput 0 +scienc 0 +china 0 +home 0 +depart 0 +texa 0 +zhongshan 0 +librari 0 +wuabout 0 +myselfhi 0 +welcom 0 +chines 0 +girl 0 +come 0 +august 0 +studi 0 +interest 0 +immedi 0 +degre 0 +becam 0 +market 0 +repres 0 +inibm 0 +compani 0 +south 0 +branch 0 +try 0 +deliv 0 +solut 0 +small 0 +planet 0 +costom 0 +telecommun 0 +media 0 +industri 0 +would 0 +surpris 0 +find 0 +manyalumni 0 +enter 0 +alumni 0 +club 0 +nice 0 +thing 0 +attend 0 +graduat 0 +school 0 +lot 0 +intern 0 +opportun 0 +engin 0 +student 0 +semest 0 +work 0 +part 0 +time 0 +programm 0 +nation 0 +instrumentsinc 0 +cours 0 +schedul 0 +spring 0 +distribut 0 +computingmanag 0 +informationautomat 0 +program 0 +tool 0 +baseyahoogalaxi 0 +onlin 0 +universityyellow 0 +mini 0 +introduct 0 +us 0 +fortran 0 +tutori 0 +infoleisur 0 +timenewspagepeopl 0 +dailyartstim 0 +magazinechines 0 +magazinepc 0 +magazinec 0 +visit 0 +orlean 0 +houston 0 +antoniosan 0 +franciscomarina 0 +peac 0 +citysan 0 +jose 0 +capit 0 +silicon 0 +vallei 0 +love 0 +francisco 0 +contact 0 +pointemail 0 +emmawu 0 +utexa 0 +eduphon 0 +mail 0 +last 0 +date 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..0ebbe4bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,30 @@ +page 1 +emilio 1 +first 1 +anoth 1 +paragraph 1 +austin 1 +offic 1 +home 0 +remolinaemilio 0 +remolinaabout 0 +mehi 0 +bullet 0 +list 0 +easi 0 +includ 0 +well 0 +item 0 +third 0 +forget 0 +break 0 +curriculum 0 +vita 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usavoic 0 +main 0 +eremolin 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..f8ee9e3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,26 @@ +levent 1 +home 1 +page 1 +sayfasi 0 +welcom 0 +erkok 0 +graduat 0 +student 0 +depart 0 +comput 0 +sciencesat 0 +universityof 0 +texa 0 +austin 0 +former 0 +locat 0 +inturkei 0 +person 0 +inform 0 +reach 0 +http 0 +ceng 0 +metu 0 +erkokto 0 +find 0 +thank 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..524b77a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,39 @@ +esra 1 +erdem 0 +comput 0 +scienc 0 +univers 0 +austin 0 +texa 0 +depart 0 +interest 0 +monoton 0 +reason 0 +utexa 0 +homepag 0 +student 0 +thedepart 0 +educ 0 +engin 0 +andinform 0 +bilkent 0 +turkei 0 +area 0 +machin 0 +learninginduct 0 +logic 0 +program 0 +topic 0 +cognit 0 +sciencelearningreason 0 +children 0 +theori 0 +mind 0 +commonsens 0 +reasoningknowledg 0 +representationemotionsphilosophi 0 +mindcontact 0 +inform 0 +postal 0 +voic 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..f1732cd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,69 @@ +carl 1 +student 1 +comput 1 +scienc 1 +univers 1 +rice 1 +austin 1 +offic 1 +home 0 +pagestephen 0 +carlpardon 0 +dust 0 +current 0 +work 0 +toward 0 +master 0 +art 0 +degre 0 +depart 0 +texa 0 +thesi 0 +describ 0 +system 0 +perform 0 +syntact 0 +extens 0 +scheme 0 +program 0 +languag 0 +wasn 0 +alwai 0 +life 0 +myresum 0 +believ 0 +item 0 +person 0 +interest 0 +planmi 0 +resum 0 +research 0 +interestsa 0 +psuedo 0 +random 0 +collect 0 +linksth 0 +household 0 +daili 0 +dose 0 +thing 0 +world 0 +wide 0 +snow 0 +pike 0 +peak 0 +houston 0 +chronicl 0 +interact 0 +sport 0 +worth 0 +athlet 0 +march 0 +bandget 0 +touchpost 0 +usavoic 0 +main 0 +know 0 +esteban 0 +utexa 0 +edureturn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..fea4f5c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,63 @@ +univers 1 +texa 0 +inform 0 +comput 0 +scienc 0 +austin 0 +learn 0 +research 0 +improv 0 +us 0 +address 0 +estlin 0 +utexa 0 +tara 0 +estlintara 0 +estlinmachin 0 +groupth 0 +austinresearchcontrol 0 +knowledg 0 +perform 0 +problem 0 +solver 0 +byguid 0 +effici 0 +accur 0 +solut 0 +researchinvolv 0 +combin 0 +analyt 0 +induct 0 +machinelearn 0 +techniqu 0 +acquir 0 +control 0 +amparticularli 0 +interest 0 +method 0 +theperform 0 +plan 0 +schedul 0 +system 0 +includ 0 +detail 0 +descript 0 +myresearch 0 +vita 0 +list 0 +public 0 +also 0 +check 0 +machin 0 +group 0 +page 0 +educ 0 +tulan 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..8cd0993d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,85 @@ +francoi 1 +barbanson 0 +research 0 +interest 0 +central 0 +market 0 +page 0 +utc 0 +austin 0 +todai 0 +tank 0 +polic 0 +class 0 +utexa 0 +home 0 +versionhom 0 +versionthi 0 +locat 0 +directori 0 +spool 0 +user 0 +francoisabout 0 +mecurr 0 +black 0 +forest 0 +cake 0 +genuin 0 +find 0 +real 0 +pastri 0 +fruit 0 +mouss 0 +pack 0 +groceri 0 +well 0 +swim 0 +forthcom 0 +trip 0 +shed 0 +lighton 0 +issu 0 +stop 0 +shop 0 +food 0 +women 0 +current 0 +crawl 0 +join 0 +foreign 0 +legion 0 +chines 0 +wisdom 0 +suggest 0 +watch 0 +plai 0 +basketbal 0 +hyogo 0 +japan 0 +check 0 +action 0 +atdominion 0 +hqcheck 0 +dilberti 0 +knew 0 +databas 0 +would 0 +noth 0 +troubl 0 +mentionthat 0 +parallel 0 +comput 0 +contact 0 +mepost 0 +guadalup 0 +street 0 +suit 0 +texa 0 +voic 0 +theori 0 +number 0 +assum 0 +machin 0 +work 0 +mail 0 +edufrancoi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..46130858 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,58 @@ +comput 1 +fussel 0 +scienc 0 +research 0 +texa 0 +graphic 0 +donald 0 +depart 0 +technolog 0 +group 0 +appli 0 +engin 0 +mathemat 0 +univers 0 +austin 0 +utexa 0 +public 0 +trammel 0 +crow 0 +regent 0 +professor 0 +director 0 +advanc 0 +divis 0 +inform 0 +laboratori 0 +member 0 +center 0 +electr 0 +institut 0 +phone 0 +mail 0 +eduinform 0 +http 0 +user 0 +fussellb 0 +social 0 +dartmouth 0 +collegem 0 +dalla 0 +area 0 +interest 0 +architectur 0 +databas 0 +system 0 +design 0 +autom 0 +fault 0 +toler 0 +cours 0 +introduct 0 +journal 0 +confer 0 +work 0 +progress 0 +current 0 +former 0 +student 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..3132b88d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,33 @@ +ajit 1 +gener 1 +useless 1 +file 1 +georgemi 0 +pagethi 0 +page 0 +go 0 +youand 0 +construct 0 +someth 0 +odd 0 +goodthat 0 +find 0 +anyth 0 +start 0 +research 0 +address 0 +georg 0 +wickersham 0 +lane 0 +austin 0 +gajit 0 +utexa 0 +eduher 0 +softwar 0 +document 0 +foundus 0 +recent 0 +david 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..53c9e230 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,27 @@ +geeta 1 +arora 0 +home 0 +page 0 +graduat 0 +student 0 +current 0 +year 0 +still 0 +try 0 +tofigur 0 +research 0 +undergrad 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +india 0 +contact 0 +mehom 0 +river 0 +oak 0 +medic 0 +art 0 +austin 0 +phone 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..15f96817 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,62 @@ +page 1 +home 0 +comment 0 +gokul 0 +flame 0 +critic 0 +send 0 +click 0 +final 0 +receiv 0 +countless 0 +gripe 0 +theexcess 0 +verbos 0 +decid 0 +thecollect 0 +wish 0 +mass 0 +democrat 0 +world 0 +putonli 0 +barest 0 +minimum 0 +adieu 0 +outpour 0 +head 0 +plakal 0 +hag 0 +hopey 0 +sleep 0 +easi 0 +untroubl 0 +conscienc 0 +perfectli 0 +good 0 +untim 0 +demis 0 +actual 0 +quit 0 +want 0 +kind 0 +could 0 +merit 0 +vitriol 0 +risk 0 +mayb 0 +help 0 +reinstat 0 +earlier 0 +signin 0 +lesscrit 0 +contact 0 +medic 0 +art 0 +austin 0 +visitor 0 +number 0 +suggest 0 +utexa 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..98e81984 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,176 @@ +like 1 +austin 0 +gooti 0 +know 0 +peopl 0 +univers 0 +friend 0 +thing 0 +also 0 +love 0 +game 0 +book 0 +home 0 +page 0 +subramanyam 0 +intro 0 +present 0 +futur 0 +read 0 +well 0 +interest 0 +place 0 +came 0 +made 0 +alwai 0 +great 0 +time 0 +comput 0 +scienc 0 +texa 0 +life 0 +around 0 +plai 0 +tenni 0 +check 0 +past 0 +hideout 0 +visitor 0 +number 0 +welcom 0 +bold 0 +name 0 +start 0 +suggest 0 +continu 0 +hopefulli 0 +wont 0 +disappoint 0 +gold 0 +born 0 +sept 0 +somebodi 0 +hyderabad 0 +andhra 0 +pradesh 0 +geographi 0 +southern 0 +state 0 +india 0 +curiou 0 +famili 0 +school 0 +join 0 +osmania 0 +colleg 0 +technolog 0 +bachelor 0 +chemic 0 +engin 0 +contact 0 +vari 0 +background 0 +thought 0 +proud 0 +call 0 +batch 0 +nebraska 0 +lincoln 0 +gala 0 +becam 0 +addict 0 +american 0 +footbal 0 +except 0 +cold 0 +winter 0 +everi 0 +els 0 +wasjust 0 +studi 0 +year 0 +transfer 0 +enrol 0 +master 0 +program 0 +depart 0 +real 0 +cool 0 +hang 0 +especi 0 +weather 0 +usual 0 +acad 0 +care 0 +list 0 +alphabet 0 +order 0 +abraham 0 +gokul 0 +kumar 0 +mehul 0 +neeraj 0 +shantanu 0 +shailesh 0 +vipin 0 +best 0 +keep 0 +chat 0 +mani 0 +make 0 +travel 0 +anoth 0 +likechess 0 +question 0 +carrom 0 +board 0 +racquet 0 +ball 0 +tabl 0 +cricket 0 +soccer 0 +watch 0 +definetli 0 +text 0 +want 0 +horoscop 0 +todai 0 +compatabil 0 +sign 0 +listen 0 +hindi 0 +song 0 +write 0 +would 0 +bore 0 +narrow 0 +option 0 +golden 0 +goe 0 +without 0 +sai 0 +control 0 +destini 0 +ever 0 +success 0 +cours 0 +attribut 0 +hardwork 0 +power 0 +good 0 +happen 0 +propos 0 +dispos 0 +pleas 0 +spend 0 +fill 0 +valuabl 0 +comment 0 +guest 0 +hide 0 +medic 0 +art 0 +utexa 0 +finger 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..81b8bdbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,108 @@ +protocol 1 +comput 0 +permiss 0 +specif 0 +copi 0 +http 0 +survei 0 +citat 0 +exact 0 +pragmat 0 +utexa 0 +date 0 +statement 0 +decemb 0 +goudanetwork 0 +copyright 0 +page 0 +texa 0 +gouda 0 +implement 0 +network 0 +octob 0 +work 0 +posit 0 +goudaacm 0 +surveysa 0 +associ 0 +machineri 0 +fornetwork 0 +specificationsand 0 +implementationsmoham 0 +goudath 0 +univers 0 +austin 0 +depart 0 +sciencesaustin 0 +usagouda 0 +user 0 +utc 0 +report 0 +profil 0 +htmlabstract 0 +argu 0 +studi 0 +evolv 0 +bridgeth 0 +networkprotocol 0 +gener 0 +term 0 +formal 0 +implementationsaddit 0 +word 0 +phrase 0 +compil 0 +softwar 0 +tool 0 +develop 0 +methodologypubl 0 +inform 0 +submiss 0 +june 0 +revis 0 +accept 0 +public 0 +sourc 0 +html 0 +avail 0 +make 0 +digitalor 0 +hard 0 +part 0 +person 0 +classroomus 0 +grant 0 +without 0 +provid 0 +made 0 +ordistribut 0 +profit 0 +commerci 0 +advantag 0 +bearthi 0 +notic 0 +full 0 +first 0 +forcompon 0 +own 0 +other 0 +must 0 +honor 0 +abstract 0 +credit 0 +permit 0 +otherwis 0 +torepublish 0 +post 0 +server 0 +redistribut 0 +list 0 +requiresprior 0 +request 0 +frompubl 0 +dept 0 +orpermiss 0 +last 0 +modifi 0 +moham 0 +goudagouda 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..3d68fe98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,63 @@ +gunnel 1 +john 0 +transpos 0 +case 0 +assign 0 +report 0 +look 0 +home 0 +plan 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +austin 0 +utexa 0 +plapack 0 +minut 0 +connect 0 +imag 0 +except 0 +guess 0 +drank 0 +depict 0 +product 0 +latter 0 +author 0 +collect 0 +code 0 +spars 0 +matrix 0 +computationsif 0 +would 0 +like 0 +meet 0 +best 0 +friend 0 +take 0 +data 0 +pageam 0 +log 0 +check 0 +class 0 +also 0 +glimps 0 +mysteri 0 +land 0 +hail 0 +central 0 +oregon 0 +towni 0 +less 0 +redmond 0 +doesn 0 +much 0 +page 0 +talk 0 +visitor 0 +rememb 0 +test 0 +file 0 +long 0 +bore 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..31c24d3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,34 @@ +comput 1 +scienc 1 +frank 0 +utexa 0 +austin 0 +univers 0 +student 0 +depart 0 +tropschuhfrank 0 +tropschuh 0 +gunther 0 +schweiz 0 +clayton 0 +waldhofstrass 0 +rheinfelden 0 +curriculum 0 +vitaeenglishdeutschlinkscarnegi 0 +mellon 0 +undergradu 0 +school 0 +universitterlangen 0 +nrnberg 0 +junior 0 +year 0 +abroad 0 +institut 0 +mathematisch 0 +maschinen 0 +datenverarbeitung 0 +oper 0 +system 0 +texa 0 +graduat 0 +tropschuhgunth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..08aee5e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,41 @@ +yongxiang 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +home 0 +pagemerri 0 +christmashappi 0 +year 0 +welcom 0 +homepagegao 0 +yongxiangsever 0 +pointsto 0 +contact 0 +addresspictur 0 +mine 0 +ceremoni 0 +grant 0 +master 0 +degre 0 +chinadepart 0 +gener 0 +inform 0 +name 0 +male 0 +birthdai 0 +birth 0 +place 0 +huanan 0 +jiangsu 0 +china 0 +hobbi 0 +tabl 0 +tenniseduc 0 +background 0 +juli 0 +softwar 0 +directori 0 +servic 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..919eefd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,26 @@ +comput 1 +zhang 0 +manag 0 +home 0 +page 0 +schoolth 0 +univers 0 +texa 0 +austin 0 +scienc 0 +second 0 +semestercoursesc 0 +languag 0 +linc 0 +distribut 0 +alvis 0 +databas 0 +mirankerfil 0 +term 0 +project 0 +databs 0 +queri 0 +formthank 0 +stop 0 +gzhang 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..24bcd086 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,107 @@ +comput 1 +scienc 0 +zhou 0 +design 0 +wong 0 +institut 0 +univers 0 +depart 0 +texa 0 +austin 0 +vlsi 0 +algorithm 0 +optim 0 +ieee 0 +aid 0 +jose 0 +room 0 +refer 0 +campu 0 +home 0 +pagealan 0 +headlin 0 +new 0 +year 0 +ture 0 +award 0 +given 0 +amir 0 +pnueli 0 +aprofessor 0 +weizmann 0 +israel 0 +theoret 0 +compuer 0 +tsinghua 0 +prestig 0 +china 0 +incompletelist 0 +undergradu 0 +classmat 0 +kept 0 +alex 0 +zhao 0 +current 0 +student 0 +research 0 +interest 0 +focus 0 +find 0 +mani 0 +applic 0 +mathemat 0 +analysi 0 +combinatori 0 +complex 0 +even 0 +mathematicallog 0 +researchgroup 0 +head 0 +prof 0 +martin 0 +publicationshai 0 +forriv 0 +rout 0 +crosstalk 0 +constraint 0 +internationalconfer 0 +chen 0 +optimalnon 0 +uniform 0 +wire 0 +size 0 +elmor 0 +delai 0 +model 0 +acmintern 0 +confer 0 +studi 0 +academ 0 +techniqu 0 +industri 0 +directori 0 +bulletin 0 +live 0 +period 0 +chines 0 +staff 0 +movi 0 +search 0 +engin 0 +internet 0 +contact 0 +inform 0 +sciencesunivers 0 +austintaylor 0 +hall 0 +staustin 0 +voic 0 +mail 0 +haizhou 0 +utexa 0 +edulast 0 +modifi 0 +number 0 +visit 0 +homepag 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..3bc6f1cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,41 @@ +comput 1 +scienc 1 +texa 1 +austin 1 +univ 1 +wuhan 1 +china 1 +home 0 +page 0 +dept 0 +welcom 0 +construct 0 +myselfnow 0 +first 0 +year 0 +student 0 +departmentof 0 +universityof 0 +want 0 +know 0 +click 0 +hear 0 +educ 0 +pre 0 +nation 0 +softwar 0 +engin 0 +alumni 0 +pal 0 +univers 0 +alumnihom 0 +pagecontact 0 +wait 0 +email 0 +haosun 0 +utexa 0 +edunow 0 +call 0 +visitor 0 +sinc 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..fb644236 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,91 @@ +champion 1 +comput 0 +intramur 0 +volleybal 0 +divis 0 +scienc 0 +austin 0 +micheal 0 +universityof 0 +hewett 0 +utexa 0 +texa 0 +kansa 0 +mathemat 0 +honor 0 +first 0 +place 0 +bowl 0 +faculti 0 +grad 0 +fall 0 +open 0 +summer 0 +finish 0 +address 0 +hewetthewett 0 +educlick 0 +fingerm 0 +click 0 +email 0 +fourth 0 +year 0 +student 0 +departmentof 0 +educ 0 +stanfordunivers 0 +electr 0 +engin 0 +washburnunivers 0 +intern 0 +collegiateprogram 0 +contest 0 +nation 0 +competit 0 +utc 0 +sawada 0 +ioanni 0 +smaragdaki 0 +thoma 0 +wahlutc 0 +tower 0 +hanoi 0 +lanc 0 +tokudaut 0 +spring 0 +club 0 +motorola 0 +marathon 0 +hour 0 +minut 0 +might 0 +want 0 +visit 0 +myfavorit 0 +page 0 +locatem 0 +learnabout 0 +research 0 +interest 0 +view 0 +downloadmi 0 +public 0 +learnmor 0 +phone 0 +number 0 +call 0 +offic 0 +home 0 +central 0 +timefax 0 +mail 0 +univers 0 +depart 0 +taylor 0 +hall 0 +author 0 +hewettemail 0 +edulast 0 +updat 0 +wednesdai 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..9ddb3726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,167 @@ +current 1 +work 1 +austin 0 +databas 0 +robot 0 +java 0 +softwar 0 +resum 0 +packag 0 +http 0 +contract 0 +us 0 +graphic 0 +also 0 +interest 0 +page 0 +hiep 0 +texa 0 +activ 0 +solut 0 +game 0 +receiv 0 +scienc 0 +utexa 0 +system 0 +xwindow 0 +languag 0 +gunu 0 +common 0 +lisp 0 +function 0 +written 0 +like 0 +program 0 +port 0 +netrek 0 +factoryx 0 +offic 0 +nguyenhiep 0 +nguyenabout 0 +meabout 0 +vietnames 0 +american 0 +born 0 +came 0 +unit 0 +state 0 +five 0 +resid 0 +texasfor 0 +life 0 +live 0 +programm 0 +seek 0 +client 0 +process 0 +start 0 +busi 0 +providinghigh 0 +internet 0 +product 0 +rang 0 +video 0 +detail 0 +link 0 +hypertextresum 0 +occup 0 +gordon 0 +novak 0 +compil 0 +class 0 +educ 0 +univers 0 +havedevelop 0 +year 0 +onlin 0 +natur 0 +placement 0 +center 0 +address 0 +con 0 +nsplace 0 +rexi 0 +real 0 +time 0 +emptiv 0 +oper 0 +board 0 +research 0 +gdraw 0 +object 0 +orient 0 +cross 0 +platform 0 +librari 0 +postscript 0 +legion 0 +data 0 +flow 0 +control 0 +flat 0 +simul 0 +realist 0 +specular 0 +reflect 0 +sonar 0 +xgcl 0 +xakcl 0 +interfac 0 +akcl 0 +standalon 0 +provid 0 +john 0 +ousterhout 0 +theunivers 0 +specif 0 +anonlin 0 +access 0 +student 0 +prototyp 0 +moredetail 0 +researchwith 0 +allow 0 +easi 0 +build 0 +andmaintain 0 +network 0 +explor 0 +methodolog 0 +larg 0 +currentlyact 0 +search 0 +expertis 0 +internetsoftwar 0 +might 0 +best 0 +leverag 0 +technic 0 +especi 0 +window 0 +fast 0 +textur 0 +mappingroutin 0 +anim 0 +processor 0 +assembl 0 +write 0 +poetri 0 +make 0 +potteri 0 +outdoor 0 +list 0 +spot 0 +group 0 +virtual 0 +realiti 0 +vrml 0 +sdsc 0 +vrmlto 0 +contact 0 +mepost 0 +comput 0 +usavoic 0 +main 0 +edulast 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..546e001b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,72 @@ +comput 1 +austin 1 +depart 0 +univers 0 +world 0 +huiqun 0 +home 0 +page 0 +scienc 0 +texa 0 +research 0 +inform 0 +internet 0 +career 0 +book 0 +java 0 +rosett 0 +hqliu 0 +utexa 0 +nice 0 +meet 0 +student 0 +member 0 +vlsi 0 +design 0 +group 0 +guid 0 +professor 0 +martin 0 +wong 0 +new 0 +virtual 0 +tour 0 +citi 0 +collect 0 +chines 0 +site 0 +sunris 0 +stuff 0 +societi 0 +ieee 0 +giant 0 +search 0 +tool 0 +yahoo 0 +infoseek 0 +directori 0 +onlin 0 +center 0 +mosaic 0 +bookmark 0 +entertain 0 +languag 0 +unix 0 +perl 0 +expect 0 +refer 0 +manual 0 +program 0 +exampl 0 +contact 0 +mail 0 +phone 0 +address 0 +campu 0 +taylor 0 +last 0 +modifi 0 +comment 0 +welcom 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..1ecdc51c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,58 @@ +austin 1 +scienc 0 +comput 0 +avail 0 +hudson 0 +onlin 0 +research 0 +offic 0 +home 0 +pagehudson 0 +turnerphd 0 +student 0 +colleg 0 +natur 0 +sciencesat 0 +univers 0 +texa 0 +advisor 0 +vladimir 0 +lifschitz 0 +expect 0 +thesi 0 +titl 0 +infer 0 +rule 0 +causal 0 +represent 0 +ofcommonsens 0 +knowledg 0 +action 0 +msc 0 +mli 0 +librari 0 +inform 0 +english 0 +philosophi 0 +vita 0 +postscript 0 +draft 0 +dissert 0 +also 0 +interestscommonsens 0 +reason 0 +actionlog 0 +program 0 +nonmonoton 0 +reasoningmi 0 +paper 0 +linkseuropean 0 +colloquium 0 +spatialand 0 +tempor 0 +reasoningto 0 +contact 0 +mepost 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..67132e94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,63 @@ +home 1 +austin 1 +comput 0 +page 0 +time 0 +yanbin 0 +zhang 0 +welcom 0 +graduat 0 +depart 0 +scienc 0 +univers 0 +texa 0 +full 0 +spring 0 +world 0 +compani 0 +hyanbin 0 +utexa 0 +address 0 +littl 0 +cutti 0 +allen 0 +student 0 +current 0 +seek 0 +part 0 +cours 0 +left 0 +resum 0 +click 0 +postscript 0 +format 0 +ieee 0 +onlin 0 +career 0 +center 0 +help 0 +languag 0 +internet 0 +librari 0 +webmuseum 0 +travel 0 +beauti 0 +homeland 0 +contact 0 +mail 0 +phone 0 +offic 0 +campu 0 +tarlor 0 +lake 0 +blvd 0 +number 0 +visit 0 +homepag 0 +sinc 0 +last 0 +modifi 0 +septemb 0 +comment 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..7b3de2c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,59 @@ +inform 1 +isaac 0 +sheldon 0 +isheldon 0 +utexa 0 +comput 0 +scienc 0 +austin 0 +contact 0 +phone 0 +mail 0 +http 0 +user 0 +profession 0 +current 0 +graduat 0 +student 0 +univeristi 0 +texa 0 +depart 0 +reciv 0 +master 0 +decemb 0 +coursework 0 +concentr 0 +graphic 0 +reciev 0 +undergradu 0 +degre 0 +unives 0 +mass 0 +lowel 0 +summer 0 +intern 0 +scientif 0 +engin 0 +softwar 0 +small 0 +compani 0 +creat 0 +schlaeor 0 +mellor 0 +case 0 +tool 0 +project 0 +construct 0 +solid 0 +geometri 0 +us 0 +bsptree 0 +modular 0 +trace 0 +framework 0 +butt 0 +person 0 +babi 0 +page 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..d7ffe5b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,43 @@ +john 1 +adair 1 +live 1 +taylor 1 +back 1 +page 1 +crinkum 0 +crankum 0 +homepag 0 +compound 0 +wife 0 +holli 0 +eileen 0 +evan 0 +jame 0 +rice 0 +alumni 0 +friend 0 +includ 0 +carl 0 +white 0 +also 0 +internet 0 +consult 0 +matthew 0 +mengerink 0 +fish 0 +fanat 0 +work 0 +dejanew 0 +steve 0 +traylen 0 +get 0 +doctor 0 +book 0 +email 0 +jadair 0 +utexa 0 +graduat 0 +student 0 +home 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..fe0089a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,22 @@ +oper 1 +univers 1 +texa 1 +research 1 +john 0 +chamber 0 +home 0 +pagejohn 0 +chamberssenior 0 +system 0 +specialistb 0 +physic 0 +paso 0 +comput 0 +scienc 0 +yale 0 +universityph 0 +austin 0 +paper 0 +vita 0 +link 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..eaafe71e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,164 @@ +austin 1 +texa 0 +research 0 +aftereffect 0 +visual 0 +comput 0 +univers 0 +model 0 +scienc 0 +process 0 +tilt 0 +self 0 +organ 0 +illus 0 +result 0 +brain 0 +thesi 0 +primari 0 +cortex 0 +later 0 +lissom 0 +interact 0 +realist 0 +us 0 +dramat 0 +advanc 0 +make 0 +avail 0 +simul 0 +function 0 +call 0 +human 0 +propos 0 +also 0 +postscript 0 +inform 0 +jbednar 0 +utexa 0 +bednarjim 0 +bednar 0 +candid 0 +dept 0 +philosophi 0 +electr 0 +engin 0 +decemb 0 +concentr 0 +biolog 0 +ofcognit 0 +artifici 0 +neural 0 +network 0 +seek 0 +useth 0 +technolog 0 +past 0 +fewdecad 0 +equal 0 +understand 0 +thehuman 0 +mind 0 +power 0 +soon 0 +beavail 0 +cortic 0 +becomingpract 0 +enabl 0 +necessari 0 +refut 0 +testabl 0 +hypothes 0 +overal 0 +goal 0 +makecognit 0 +empir 0 +rather 0 +purelyphilosoph 0 +domain 0 +centuri 0 +master 0 +ofth 0 +nearli 0 +complet 0 +abstract 0 +long 0 +studi 0 +psychologist 0 +vision 0 +appar 0 +failur 0 +might 0 +offer 0 +insight 0 +carri 0 +particular 0 +class 0 +thought 0 +aris 0 +thu 0 +serv 0 +test 0 +case 0 +theori 0 +area 0 +specif 0 +sever 0 +inhibit 0 +neuron 0 +receiv 0 +input 0 +examin 0 +sirosh 0 +miikkulainen 0 +incorpor 0 +demonstr 0 +principl 0 +drive 0 +qualit 0 +quantit 0 +similar 0 +measur 0 +basi 0 +explan 0 +indirect 0 +effect 0 +line 0 +differ 0 +orient 0 +appli 0 +figur 0 +spatial 0 +frequenc 0 +predict 0 +preliminari 0 +report 0 +file 0 +begin 0 +doctor 0 +includ 0 +detail 0 +level 0 +visualbehavior 0 +extens 0 +contact 0 +email 0 +mail 0 +address 0 +depart 0 +finger 0 +command 0 +machin 0 +log 0 +departmentmi 0 +resum 0 +ascii 0 +format 0 +link 0 +probabl 0 +outdat 0 +paper 0 +interest 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..e37afaf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,95 @@ +homepag 1 +test 1 +comput 1 +system 1 +visit 1 +java 1 +home 0 +univers 0 +librari 0 +cours 0 +topic 0 +info 0 +document 0 +page 0 +html 0 +help 0 +construct 0 +ofjunfanghi 0 +welcom 0 +frame 0 +graduat 0 +student 0 +scienc 0 +depart 0 +texa 0 +austin 0 +largest 0 +academ 0 +north 0 +america 0 +catalog 0 +resum 0 +professor 0 +novak 0 +assign 0 +excel 0 +sourc 0 +ethernet 0 +technolog 0 +special 0 +sysadm 0 +domain 0 +name 0 +inform 0 +free 0 +unixish 0 +oper 0 +linux 0 +project 0 +us 0 +debug 0 +transfer 0 +latex 0 +file 0 +unix 0 +email 0 +stuff 0 +utc 0 +kristina 0 +ross 0 +tutori 0 +learn 0 +take 0 +jeff 0 +network 0 +administr 0 +last 0 +summer 0 +florida 0 +state 0 +edmund 0 +automobil 0 +buyer 0 +guid 0 +want 0 +packag 0 +languag 0 +specif 0 +public 0 +ascii 0 +format 0 +look 0 +pretti 0 +good 0 +privaci 0 +like 0 +sceneri 0 +pictur 0 +jfang 0 +utexa 0 +start 0 +visitor 0 +number 0 +sinc 0 +applet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..0bfd0fe4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,37 @@ +good 1 +austin 0 +john 0 +beer 0 +jprior 0 +utexa 0 +address 0 +priorjohn 0 +priormi 0 +resum 0 +accumul 0 +knowledg 0 +year 0 +dog 0 +someth 0 +long 0 +enough 0 +start 0 +hurt 0 +probabl 0 +chew 0 +nacho 0 +chip 0 +swallow 0 +sleep 0 +contact 0 +inform 0 +email 0 +mail 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +home 0 +phone 0 +swisher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..bfd14673 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,41 @@ +thoma 1 +comput 1 +jeff 0 +texa 0 +scienc 0 +homepagejeff 0 +homepagecontact 0 +informationpublicationssoftwar 0 +system 0 +gener 0 +research 0 +groupphoto 0 +albumfavorit 0 +internet 0 +sitesuniversityof 0 +departmentappliedresearch 0 +laboratori 0 +electricaland 0 +engin 0 +departmentedsfinanci 0 +trade 0 +technolog 0 +center 0 +fttc 0 +keyword 0 +search 0 +utacademiccalendarsut 0 +sportshook 0 +ultim 0 +longhorn 0 +site 0 +utfootbal 0 +scheduleaustintexa 0 +depart 0 +univers 0 +austin 0 +last 0 +modifi 0 +octob 0 +jthoma 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..0374cef5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,66 @@ +comput 1 +scienc 1 +univers 1 +austin 0 +peke 0 +jiani 0 +student 0 +texa 0 +china 0 +depart 0 +page 0 +homepagewelcom 0 +homepag 0 +first 0 +year 0 +indepart 0 +ataustin 0 +beij 0 +alumni 0 +ofpek 0 +chinesechines 0 +scenerychines 0 +novelschines 0 +classicschines 0 +magazineschines 0 +newspapersus 0 +link 0 +registrar 0 +gradaut 0 +studiesut 0 +libraryut 0 +campusutaccessabout 0 +weather 0 +todai 0 +citylimit 0 +lot 0 +excit 0 +stuff 0 +miscellaneousyahoojava 0 +sunjavascript 0 +netscapeth 0 +perl 0 +languag 0 +home 0 +pagecomput 0 +research 0 +associationcomput 0 +journal 0 +magzin 0 +webnetwork 0 +technicalreport 0 +libraryth 0 +collect 0 +bibliographiesintern 0 +contact 0 +street 0 +jyluo 0 +utexa 0 +finger 0 +meyour 0 +comment 0 +suggestionswould 0 +highli 0 +appreci 0 +visitorsinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..800dd633 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,54 @@ +home 1 +austin 0 +comput 0 +scienc 0 +interest 0 +concurr 0 +madra 0 +inform 0 +phone 0 +kedar 0 +namjoshiabout 0 +mehi 0 +thank 0 +check 0 +doctor 0 +student 0 +depart 0 +research 0 +advisor 0 +professor 0 +allen 0 +emerson 0 +tempor 0 +logic 0 +reason 0 +program 0 +semant 0 +distributedalgorithm 0 +automatatheori 0 +came 0 +fall 0 +receiv 0 +bachelor 0 +degre 0 +indian 0 +institut 0 +technolog 0 +wonder 0 +page 0 +lot 0 +stuff 0 +would 0 +like 0 +know 0 +person 0 +contact 0 +offic 0 +address 0 +west 0 +street 0 +todai 0 +amul 0 +adkedar 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..3a2336d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,47 @@ +univers 1 +texa 1 +austin 1 +kenneth 0 +harker 0 +depart 0 +comput 0 +scienc 0 +utexa 0 +kharker 0 +public 0 +view 0 +opinion 0 +taylor 0 +hall 0 +amateur 0 +radio 0 +babylon 0 +linux 0 +rocketri 0 +cyberspac 0 +academ 0 +work 0 +polit 0 +stuff 0 +resum 0 +last 0 +updat 0 +world 0 +wide 0 +facil 0 +provid 0 +servic 0 +faculti 0 +student 0 +staff 0 +guest 0 +express 0 +page 0 +sole 0 +respons 0 +author 0 +necessarili 0 +reflect 0 +system 0 +board 0 +regent 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..3669b49e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,92 @@ +kincaid 1 +linear 1 +comput 1 +numer 0 +algebra 0 +imac 0 +world 0 +mathemat 0 +system 0 +david 0 +analysi 0 +univers 0 +develop 0 +congress 0 +area 0 +softwar 0 +interest 0 +algorithm 0 +equat 0 +larg 0 +solut 0 +young 0 +senior 0 +lecturerassoci 0 +director 0 +center 0 +lamar 0 +texa 0 +austin 0 +honor 0 +award 0 +profession 0 +servic 0 +certif 0 +recognit 0 +creativ 0 +technicalinnov 0 +basic 0 +subprogram 0 +nasa 0 +technic 0 +committe 0 +session 0 +organ 0 +andappli 0 +interestmathemat 0 +high 0 +perform 0 +summari 0 +researchmi 0 +focus 0 +research 0 +us 0 +iter 0 +solv 0 +spars 0 +coeffici 0 +matric 0 +aris 0 +ellipt 0 +partial 0 +differenti 0 +implement 0 +parallel 0 +anoth 0 +select 0 +recent 0 +publicationsw 0 +chenei 0 +pacif 0 +grove 0 +brook 0 +cole 0 +hay 0 +itpack 0 +proceed 0 +coput 0 +atlanta 0 +stationari 0 +second 0 +degre 0 +method 0 +topic 0 +polynomi 0 +sever 0 +variabl 0 +applic 0 +rassia 0 +scientif 0 +river 0 +edg 0 +jersei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..44f66389 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,67 @@ +comput 1 +univers 0 +kistler 0 +page 0 +scienc 0 +parallel 0 +mike 0 +home 0 +academ 0 +inform 0 +syracus 0 +york 0 +prof 0 +construct 0 +first 0 +year 0 +student 0 +texa 0 +ataustin 0 +thedepart 0 +also 0 +current 0 +emploi 0 +theperson 0 +softwar 0 +productsdivis 0 +backgroundba 0 +mathemat 0 +susquehanna 0 +selinsgrov 0 +master 0 +busi 0 +administr 0 +stern 0 +school 0 +businessnew 0 +interestsi 0 +interest 0 +algorithm 0 +particularli 0 +us 0 +commerci 0 +data 0 +process 0 +press 0 +random 0 +collect 0 +link 0 +coursesfal 0 +distribut 0 +iwith 0 +jayadev 0 +misra 0 +numer 0 +analysi 0 +linear 0 +algebrawith 0 +alan 0 +cline 0 +visitor 0 +number 0 +contact 0 +juli 0 +walk 0 +pflugervil 0 +email 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..5e061df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,9 @@ +jacob 1 +kornerup 1 +kornerupjacob 0 +welcom 0 +home 0 +page 0 +time 0 +sinc 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..8d2ecdfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,78 @@ +knowledg 1 +research 0 +texa 0 +austin 0 +kuiper 0 +benjamin 0 +comput 0 +scienc 0 +univers 0 +mathemat 0 +represent 0 +commonsens 0 +incomplet 0 +reason 0 +model 0 +qualit 0 +simul 0 +spring 0 +build 0 +intellig 0 +agent 0 +kuipersbenjamin 0 +kuipersbruton 0 +centenni 0 +professor 0 +swarthmor 0 +colleg 0 +interest 0 +expert 0 +withparticular 0 +emphasi 0 +effect 0 +thequalit 0 +grouphom 0 +page 0 +describ 0 +topic 0 +paper 0 +student 0 +andavail 0 +softwar 0 +consider 0 +detail 0 +accomplish 0 +includ 0 +tour 0 +spatial 0 +cognit 0 +qsim 0 +algorithm 0 +access 0 +limit 0 +logic 0 +robot 0 +explor 0 +map 0 +strategi 0 +base 0 +recognit 0 +distinct 0 +place 0 +qualitativereason 0 +cambridg 0 +press 0 +teach 0 +plan 0 +fall 0 +physic 0 +world 0 +contact 0 +inform 0 +mail 0 +prof 0 +depart 0 +email 0 +utexa 0 +phone 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..66e4b525 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,48 @@ +comput 1 +simon 0 +texa 0 +austin 0 +email 0 +utexa 0 +eduphon 0 +network 0 +assist 0 +kata 0 +compress 0 +postscript 0 +professor 0 +sciencesdepart 0 +sciencesunivers 0 +offic 0 +taylor 0 +hall 0 +campu 0 +mail 0 +scienc 0 +photo 0 +profil 0 +research 0 +laboratori 0 +fall 0 +spring 0 +administr 0 +also 0 +editori 0 +ieee 0 +transact 0 +carbon 0 +inform 0 +electron 0 +submissionnew 0 +clip 0 +tune 0 +turn 0 +toss 0 +internet 0 +empt 0 +american 0 +statesman 0 +februari 0 +front 0 +page 0 +cont 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..633ec426 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,54 @@ +research 1 +network 0 +laboratori 0 +scienc 0 +protocol 0 +austin 0 +comput 0 +texa 0 +perform 0 +project 0 +foundat 0 +program 0 +depart 0 +univers 0 +activ 0 +span 0 +entir 0 +develop 0 +cycl 0 +design 0 +specif 0 +verif 0 +test 0 +analysi 0 +implement 0 +tune 0 +currentinterest 0 +architectur 0 +address 0 +chang 0 +underli 0 +commun 0 +technolog 0 +well 0 +applic 0 +supervis 0 +simon 0 +professor 0 +fund 0 +provid 0 +nation 0 +nsaunivers 0 +advanc 0 +lockhe 0 +current 0 +recent 0 +paper 0 +support 0 +videoservic 0 +secur 0 +theori 0 +workshop 0 +integr 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..cbc0c882 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,32 @@ +home 1 +page 0 +landrum 0 +christian 0 +robert 0 +stori 0 +mail 0 +viruspictur 0 +mountain 0 +empirepch 0 +retreattexa 0 +republican 0 +convent 0 +backbon 0 +rockrsumfamilyinterest 0 +council 0 +awai 0 +graham 0 +gordon 0 +pageth 0 +comput 0 +scienc 0 +depart 0 +ofth 0 +univers 0 +texa 0 +austin 0 +councillandrum 0 +utexa 0 +edulast 0 +updat 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..f63c0a4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,30 @@ +comput 1 +greg 0 +lavend 0 +univers 0 +texa 0 +scienc 0 +austinr 0 +lavenderadjunct 0 +assist 0 +professordepart 0 +anddepart 0 +electr 0 +engineeringth 0 +austin 0 +contact 0 +address 0 +research 0 +activ 0 +engin 0 +cours 0 +recommend 0 +read 0 +biograph 0 +informationsuggest 0 +improv 0 +page 0 +welcom 0 +last 0 +updat 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..479885cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,64 @@ +scienc 1 +home 0 +comput 0 +depart 0 +univers 0 +austin 0 +singapor 0 +jame 0 +welcom 0 +texa 0 +inform 0 +nation 0 +island 0 +come 0 +page 0 +pageyeap 0 +normal 0 +look 0 +student 0 +sciencesat 0 +bachelor 0 +master 0 +atth 0 +system 0 +disc 0 +research 0 +interest 0 +algorithm 0 +data 0 +structur 0 +vlsi 0 +designalgorithm 0 +small 0 +tropic 0 +call 0 +locat 0 +degre 0 +north 0 +equat 0 +internet 0 +communityi 0 +much 0 +aliv 0 +particip 0 +know 0 +aboutthi 0 +peopl 0 +wife 0 +hong 0 +kong 0 +month 0 +activ 0 +lovesto 0 +smile 0 +contact 0 +mail 0 +leekk 0 +utexa 0 +phone 0 +campu 0 +addr 0 +taylor 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..de644831 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,30 @@ +softwar 1 +less 1 +laboratori 0 +experiment 0 +system 0 +research 0 +main 0 +object 0 +investig 0 +wai 0 +buildreli 0 +high 0 +perform 0 +parallel 0 +distributedsystem 0 +apart 0 +depart 0 +computersci 0 +univers 0 +oftexa 0 +austin 0 +projectsmemb 0 +lablessss 0 +seminar 0 +seriessponsorslast 0 +modifi 0 +decemb 0 +robert 0 +blumoferdb 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..1ad6eea6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,81 @@ +comput 1 +parallel 1 +snyder 0 +languag 0 +program 0 +sciencesth 0 +portabl 0 +implement 0 +proceed 0 +intern 0 +address 0 +calvin 0 +lincalvin 0 +linassist 0 +professor 0 +import 0 +thing 0 +iswhat 0 +studi 0 +_study_ 0 +plai 0 +_play_ 0 +pete 0 +carrilresearch 0 +interestscompil 0 +performanceanalysi 0 +scientif 0 +project 0 +home 0 +page 0 +select 0 +publicationsth 0 +novel 0 +mathemat 0 +biologyalgorithm 0 +dikaiako 0 +manoussaki 0 +woodward 0 +conf 0 +supercomput 0 +accommod 0 +polymorph 0 +data 0 +decomposit 0 +explicitli 0 +parallelprogram 0 +internationalparallel 0 +process 0 +symposium 0 +april 0 +arrai 0 +sublanguag 0 +compilersfor 0 +banerje 0 +gelernt 0 +nicolau 0 +padua 0 +springer 0 +verlag 0 +simpl 0 +journal 0 +comparison 0 +model 0 +share 0 +memori 0 +multiprocessor 0 +withl 0 +confer 0 +parallelprocess 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +univers 0 +texa 0 +austinaustin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..0e0b02bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,62 @@ +page 1 +austin 0 +system 0 +guangtian 0 +home 0 +current 0 +time 0 +depart 0 +comput 0 +scienc 0 +texa 0 +real 0 +research 0 +last 0 +liugt 0 +utexa 0 +homepagehi 0 +welcom 0 +construct 0 +content 0 +permit 0 +apolog 0 +incomplet 0 +result 0 +inconveni 0 +graduat 0 +student 0 +theunivers 0 +researchi 0 +member 0 +professor 0 +group 0 +interest 0 +includ 0 +timeschedul 0 +algorithm 0 +oper 0 +network 0 +perform 0 +distribut 0 +also 0 +work 0 +data 0 +replic 0 +knowledg 0 +mine 0 +summer 0 +internship 0 +contact 0 +inform 0 +offic 0 +good 0 +view 0 +phone 0 +email 0 +mail 0 +address 0 +univers 0 +updat 0 +pleas 0 +send 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..d90dfa3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,45 @@ +lorenzo 1 +comput 1 +alvisi 0 +scienc 0 +distribut 0 +cornel 0 +utexa 0 +fall 0 +home 0 +page 0 +assist 0 +professor 0 +depart 0 +laurea 0 +physic 0 +universit 0 +agrav 0 +bologna 0 +itali 0 +offic 0 +taylorhal 0 +campusshow 0 +locat 0 +taylor 0 +hall 0 +phone 0 +mail 0 +research 0 +interestsi 0 +interest 0 +special 0 +emphasi 0 +fault 0 +toler 0 +cours 0 +spring 0 +oper 0 +system 0 +topic 0 +sytem 0 +public 0 +photo 0 +maria 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..d5e62a98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,62 @@ +comput 1 +current 0 +austin 0 +home 0 +page 0 +work 0 +scienc 0 +texa 0 +link 0 +microsoft 0 +luxu 0 +utexa 0 +graduat 0 +student 0 +networksoth 0 +class 0 +tsinghua 0 +univers 0 +undergradu 0 +institut 0 +china 0 +depart 0 +studyut 0 +universityaustin 0 +live 0 +academ 0 +internet 0 +area 0 +languag 0 +system 0 +ieee 0 +sigcomm 0 +sigmod 0 +siglink 0 +siggraph 0 +sigmm 0 +sigir 0 +compani 0 +onlin 0 +shop 0 +cool 0 +site 0 +chines 0 +music 0 +newsjob 0 +hunt 0 +weather 0 +forcast 0 +dictionari 0 +contact 0 +inform 0 +campu 0 +dept 0 +univ 0 +addr 0 +corpor 0 +mail 0 +xuelu 0 +thank 0 +come 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..9a2651fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,89 @@ +softwar 1 +engin 0 +werth 0 +educ 0 +comput 0 +chair 0 +honour 0 +fall 0 +offic 0 +link 0 +class 0 +ieee 0 +committe 0 +develop 0 +scienc 0 +tool 0 +direct 0 +object 0 +orient 0 +program 0 +lauri 0 +werthlauri 0 +werthlectur 0 +lwerth 0 +utexa 0 +educurr 0 +semest 0 +hour 0 +time 0 +taylor 0 +phone 0 +engineeringc 0 +contemporari 0 +issu 0 +scienceprofession 0 +servicevic 0 +technic 0 +presentco 0 +confer 0 +profession 0 +presentarea 0 +interestsoftwar 0 +cognit 0 +summari 0 +researchmi 0 +current 0 +work 0 +center 0 +andenviron 0 +area 0 +includ 0 +human 0 +interfac 0 +andsoftwar 0 +metric 0 +select 0 +recent 0 +publicationsl 0 +qualiti 0 +assur 0 +project 0 +transact 0 +januari 0 +lectur 0 +note 0 +process 0 +improv 0 +industri 0 +strength 0 +case 0 +tomayko 0 +springer 0 +verlag 0 +john 0 +proceed 0 +workshop 0 +ics 0 +macintosh 0 +journal 0 +us 0 +univers 0 +texa 0 +depart 0 +home 0 +pagefaculti 0 +profilesc 0 +classeslast 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..da130bda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,42 @@ +madhukar 1 +austin 0 +texa 0 +reddi 0 +home 0 +page 0 +offic 0 +utexa 0 +offici 0 +korupoluwelcom 0 +reach 0 +avenu 0 +taylor 0 +hall 0 +dept 0 +comp 0 +scienc 0 +univ 0 +ahom 0 +link 0 +madrashomepag 0 +ganga 0 +alumniclass 0 +madra 0 +utalgorithm 0 +comput 0 +theori 0 +group 0 +colloquium 0 +oncomput 0 +complex 0 +info 0 +cricket 0 +worldwid 0 +site 0 +espnet 0 +sportszon 0 +interact 0 +batchu 0 +india 0 +author 0 +korupoluemail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..d9f71ef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,23 @@ +richard 1 +malloryrichard 1 +malloryresearchthesi 1 +research 1 +produc 1 +quasi 1 +natur 1 +languag 1 +explan 1 +qsimsimul 1 +current 1 +implement 1 +work 1 +simpl 1 +system 1 +contact 1 +email 1 +mallori 1 +utexa 1 +offic 1 +taylor 1 +austin 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..ef720f49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,120 @@ +stabil 1 +system 1 +comput 0 +self 0 +author 0 +moham 0 +gouda 0 +flow 0 +state 0 +scienc 0 +rout 0 +legitim 0 +illegitim 0 +proceed 0 +inform 0 +prepar 0 +marco 0 +depart 0 +univers 0 +texa 0 +network 0 +interest 0 +distribut 0 +fault 0 +step 0 +time 0 +respons 0 +appear 0 +third 0 +workshop 0 +maximum 0 +tree 0 +minimum 0 +utexa 0 +austin 0 +home 0 +page 0 +schneidermarco 0 +schneiderph 0 +candid 0 +austinresearchth 0 +titl 0 +dissert 0 +research 0 +area 0 +protocol 0 +toler 0 +particular 0 +implicit 0 +design 0 +label 0 +itsstat 0 +identifi 0 +occur 0 +correct 0 +intend 0 +execut 0 +consid 0 +said 0 +whenregardless 0 +initi 0 +guarante 0 +converg 0 +finit 0 +number 0 +systemwhich 0 +stai 0 +forev 0 +vita 0 +postscript 0 +public 0 +survei 0 +march 0 +real 0 +decis 0 +toward 0 +tolerantr 0 +kluwer 0 +academ 0 +publish 0 +earlier 0 +version 0 +intern 0 +octob 0 +invit 0 +talk 0 +annual 0 +joint 0 +confer 0 +novemb 0 +submit 0 +journal 0 +second 0 +depth 0 +anish 0 +arora 0 +memori 0 +requir 0 +silent 0 +fifteenth 0 +symposium 0 +principl 0 +shlomi 0 +dolev 0 +span 0 +implement 0 +internet 0 +person 0 +list 0 +link 0 +construct 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +postal 0 +address 0 +ctaylor 0 +usamarco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..c7e07b5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,92 @@ +comput 1 +scienc 1 +inform 0 +austin 0 +depart 0 +research 0 +page 0 +mark 0 +offic 0 +taylor 0 +hall 0 +univers 0 +texa 0 +taught 0 +addit 0 +appl 0 +pleas 0 +class 0 +routin 0 +avail 0 +home 0 +johnstonemark 0 +johnstonecontact 0 +postal 0 +address 0 +usual 0 +find 0 +best 0 +reach 0 +isvia 0 +email 0 +markj 0 +utexa 0 +johnston 0 +also 0 +look 0 +full 0 +finger 0 +semest 0 +oper 0 +system 0 +byrichard 0 +brice 0 +object 0 +orient 0 +design 0 +analysisclass 0 +glenn 0 +down 0 +somerset 0 +compani 0 +graduat 0 +spring 0 +work 0 +motorola 0 +somersetdesign 0 +centerresearch 0 +informationi 0 +member 0 +oop 0 +group 0 +part 0 +build 0 +real 0 +time 0 +garbagecollector 0 +perform 0 +number 0 +ofstudi 0 +memori 0 +alloc 0 +postscript 0 +copi 0 +dissertationpropos 0 +listof 0 +public 0 +along 0 +brief 0 +descript 0 +develop 0 +librari 0 +allow 0 +precis 0 +timingof 0 +intel 0 +pentium 0 +run 0 +linux 0 +code 0 +publicli 0 +stuff 0 +relat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..0a6d8931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,13 @@ +home 1 +page 1 +mark 0 +point 0 +interest 0 +visit 0 +also 0 +friend 0 +click 0 +last 0 +modifi 0 +markng 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..f620afd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,85 @@ +page 1 +comput 0 +marku 0 +link 0 +uniti 0 +time 0 +prof 0 +taylor 0 +hall 0 +phone 0 +address 0 +utexa 0 +austin 0 +scienc 0 +place 0 +interest 0 +archiv 0 +kaltenbachmarku 0 +kaltenbachintroductionwelcom 0 +home 0 +current 0 +construct 0 +andwil 0 +entri 0 +permit 0 +iapolog 0 +incomplet 0 +result 0 +inconveni 0 +researchi 0 +member 0 +misra 0 +spsp 0 +research 0 +groupand 0 +emerson 0 +stempor 0 +reason 0 +group 0 +part 0 +work 0 +develop 0 +model 0 +checkerfor 0 +finit 0 +state 0 +program 0 +proposit 0 +logic 0 +verifi 0 +system 0 +recent 0 +version 0 +thesi 0 +isalso 0 +avail 0 +contact 0 +inform 0 +offic 0 +email 0 +postal 0 +univers 0 +texa 0 +depart 0 +find 0 +internet 0 +worth 0 +avisit 0 +theut 0 +departmenthom 0 +softwar 0 +archivefor 0 +macintosh 0 +appl 0 +sworld 0 +wide 0 +technic 0 +supporthom 0 +actansit 0 +comprehens 0 +network 0 +theatt 0 +distribut 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..025e6d1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,50 @@ +austin 1 +comput 0 +scienc 0 +univers 0 +texa 0 +home 0 +page 0 +depart 0 +address 0 +utc 0 +memarti 0 +mayberri 0 +student 0 +researchal 0 +kind 0 +stuff 0 +educ 0 +math 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +martym 0 +utexa 0 +postal 0 +click 0 +applet 0 +paus 0 +resum 0 +displai 0 +local 0 +link 0 +neural 0 +network 0 +homepag 0 +hotlist 0 +downtown 0 +anywher 0 +virtualc 0 +internetrestaur 0 +guid 0 +virtual 0 +tnstechnolog 0 +demonstr 0 +read 0 +daili 0 +texan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..7a1925dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,45 @@ +comput 1 +scienc 1 +austin 1 +univers 0 +norm 0 +mccain 0 +reason 0 +avail 0 +onlin 0 +offic 0 +home 0 +page 0 +mccainabout 0 +mephd 0 +student 0 +colleg 0 +natur 0 +sciencesat 0 +texa 0 +advisor 0 +vladimir 0 +lifschitz 0 +expect 0 +thesi 0 +titl 0 +causal 0 +commonsens 0 +action 0 +kansa 0 +philosophi 0 +baker 0 +vita 0 +postscript 0 +research 0 +interestscommonsens 0 +actionlog 0 +program 0 +nonmonoton 0 +reasoningmi 0 +paper 0 +contact 0 +mepost 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..2e98d1a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,42 @@ +univers 1 +baylor 0 +elain 0 +learn 0 +research 0 +texa 0 +comput 0 +scienc 0 +english 0 +address 0 +austin 0 +mari 0 +califfmari 0 +califfmachin 0 +groupunivers 0 +austinresearchmi 0 +current 0 +interest 0 +us 0 +machin 0 +especiallyinduct 0 +logic 0 +program 0 +natur 0 +languag 0 +acquisit 0 +formor 0 +info 0 +check 0 +vita 0 +educ 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +mecaliff 0 +utexa 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..afc7ecab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,209 @@ +knowledg 1 +base 0 +utexa 0 +system 0 +us 0 +domain 0 +research 0 +question 0 +model 0 +porter 0 +explan 0 +gener 0 +answer 0 +help 0 +group 0 +mallori 0 +souther 0 +prado 0 +correl 0 +comput 0 +develop 0 +larg 0 +contain 0 +task 0 +construct 0 +lester 0 +desk 0 +project 0 +bruce 0 +rich 0 +peter 0 +clark 0 +fred 0 +charl 0 +callawai 0 +carl 0 +andersen 0 +steve 0 +austin 0 +would 0 +requir 0 +perform 0 +multipl 0 +viewpoint 0 +built 0 +biologi 0 +method 0 +automat 0 +varieti 0 +concern 0 +distribut 0 +result 0 +recent 0 +jame 0 +test 0 +composit 0 +predict 0 +jeff 0 +languag 0 +text 0 +plan 0 +assist 0 +acker 0 +eilert 0 +bareiss 0 +karl 0 +murrai 0 +rickel 0 +groupknowledg 0 +shown 0 +overviewour 0 +part 0 +depart 0 +scienc 0 +atuniv 0 +texa 0 +long 0 +term 0 +goal 0 +technolog 0 +forconstruct 0 +multifunct 0 +oncomput 0 +significantli 0 +improv 0 +currentexpert 0 +tutor 0 +broadknowledg 0 +toexplain 0 +past 0 +eight 0 +year 0 +inon 0 +area 0 +answeringa 0 +fact 0 +concept 0 +ofth 0 +largest 0 +kind 0 +content 0 +structur 0 +formallyrepres 0 +addit 0 +expand 0 +arealso 0 +begin 0 +similar 0 +notabl 0 +especi 0 +encourag 0 +knowledgebas 0 +thebiolog 0 +express 0 +english 0 +biolog 0 +object 0 +event 0 +control 0 +experi 0 +expert 0 +found 0 +littl 0 +differ 0 +andthos 0 +written 0 +colleagu 0 +current 0 +extend 0 +type 0 +beanswer 0 +autom 0 +reason 0 +jeffrickel 0 +taskof 0 +appropri 0 +well 0 +build 0 +thesimplest 0 +adequ 0 +dauntingrequir 0 +sinc 0 +like 0 +implicitli 0 +manymodel 0 +numer 0 +level 0 +detail 0 +qualit 0 +process 0 +compilerand 0 +qsim 0 +simul 0 +program 0 +final 0 +bybuild 0 +anoth 0 +computingenviron 0 +focuss 0 +deskassist 0 +proport 0 +custom 0 +squestion 0 +otherwis 0 +phone 0 +normal 0 +projectsour 0 +complet 0 +ongo 0 +includ 0 +represent 0 +kned 0 +editor 0 +kastl 0 +retriev 0 +knight 0 +fare 0 +natur 0 +lex 0 +aid 0 +mainten 0 +lexicon 0 +tripel 0 +theorist 0 +searcher 0 +alumni 0 +alumna 0 +lian 0 +erik 0 +brad 0 +blumenth 0 +brant 0 +eolu 0 +uwyo 0 +clarkp 0 +redwood 0 +boe 0 +ncsu 0 +publicationsclick 0 +select 0 +public 0 +relat 0 +projectsclick 0 +herefor 0 +extens 0 +collect 0 +pointer 0 +aroundth 0 +world 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..0fc7dfe8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,99 @@ +mirank 1 +student 1 +rule 1 +current 0 +text 0 +bibtex 0 +daniel 0 +page 0 +match 0 +treat 0 +algorithm 0 +rete 0 +research 0 +candid 0 +past 0 +home 0 +welcom 0 +belat 0 +presenc 0 +construct 0 +someth 0 +like 0 +finish 0 +hold 0 +breath 0 +send 0 +note 0 +utexa 0 +fashionwai 0 +wouldn 0 +place 0 +either 0 +learn 0 +itscomparison 0 +warn 0 +render 0 +obsolet 0 +byth 0 +leap 0 +interest 0 +goal 0 +encompass 0 +venu 0 +languag 0 +usea 0 +basi 0 +activ 0 +distribut 0 +databas 0 +fundamentalcomput 0 +scienc 0 +problem 0 +corollari 0 +evolv 0 +thatgoal 0 +constraint 0 +satisfact 0 +search 0 +queri 0 +optim 0 +relat 0 +object 0 +orient 0 +parallel 0 +execut 0 +base 0 +program 0 +knowledg 0 +compil 0 +bibliographi 0 +sometim 0 +link 0 +paper 0 +come 0 +soon 0 +group 0 +roberto 0 +bayardo 0 +david 0 +gadboi 0 +lanc 0 +obermey 0 +vasili 0 +samoladi 0 +robert 0 +schrag 0 +master 0 +srinivasan 0 +vaidyaraman 0 +lane 0 +warshaw 0 +archi 0 +andrewsdavid 0 +brantchin 0 +ming 0 +kuoshiow 0 +yang 0 +salvator 0 +stolfo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..fa67a726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,85 @@ +misra 1 +comput 0 +process 0 +jayadev 0 +chair 0 +john 0 +honor 0 +fellow 0 +program 0 +formal 0 +design 0 +synchron 0 +parallel 0 +inform 0 +research 0 +group 0 +misrareg 0 +scienc 0 +depart 0 +tech 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +hopkin 0 +univers 0 +award 0 +profession 0 +servic 0 +simon 0 +guggenheim 0 +ieee 0 +fellowarea 0 +interestparallel 0 +summari 0 +researchmi 0 +interest 0 +appli 0 +method 0 +practic 0 +particularli 0 +inth 0 +specif 0 +asynchronoussystem 0 +select 0 +recent 0 +publicationsj 0 +powerlist 0 +structur 0 +recurs 0 +classic 0 +mind 0 +essai 0 +hoar 0 +prentic 0 +hall 0 +januari 0 +loos 0 +coupl 0 +futur 0 +gener 0 +system 0 +north 0 +holland 0 +phase 0 +letter 0 +equat 0 +reason 0 +nondeterminist 0 +aspect 0 +chandi 0 +foundat 0 +addison 0 +weslei 0 +homepag 0 +work 0 +electron 0 +access 0 +otherpap 0 +current 0 +project 0 +seuss 0 +anoverview 0 +apostscript 0 +versionaccess 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..8ae1aa45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,175 @@ +learn 1 +group 0 +research 0 +system 0 +machin 0 +intellig 0 +univers 0 +induct 0 +languag 0 +utexa 0 +program 0 +artifici 0 +confer 0 +journal 0 +base 0 +natur 0 +plan 0 +comput 0 +logic 0 +acquisit 0 +estlin 0 +theori 0 +order 0 +associ 0 +texa 0 +student 0 +sowmya 0 +baff 0 +mahonei 0 +bradlei 0 +public 0 +abduct 0 +model 0 +speedup 0 +list 0 +softwar 0 +revis 0 +rule 0 +first 0 +data 0 +aaai 0 +linguist 0 +european 0 +special 0 +interest 0 +intern 0 +inform 0 +index 0 +illinoi 0 +utc 0 +supervis 0 +professor 0 +moonei 0 +focuseson 0 +combin 0 +empir 0 +knowledg 0 +techniqu 0 +includ 0 +applic 0 +knowledgerefin 0 +part 0 +scienc 0 +depart 0 +atth 0 +ataustin 0 +pictur 0 +click 0 +graduat 0 +mari 0 +elain 0 +califf 0 +mecaliff 0 +tara 0 +hermjakob 0 +ramachandran 0 +cindi 0 +thompson 0 +cthomp 0 +alumni 0 +paul 0 +scicomp 0 +jeff 0 +firstadvisor 0 +hwee 0 +nhweetou 0 +trantor 0 +dirk 0 +ourston 0 +dirk_ourston 0 +cpqm 0 +saic 0 +richard 0 +furtwangen 0 +siddarth 0 +subramanian 0 +georgetown 0 +john 0 +zell 0 +acad 0 +drake 0 +area 0 +qualit 0 +diagnosi 0 +tutor 0 +refin 0 +uncertain 0 +reasoningher 0 +complet 0 +accel 0 +reason 0 +neither 0 +proposit 0 +fort 0 +chillin 0 +predic 0 +invent 0 +foidl 0 +decis 0 +dolphin 0 +ad 0 +search 0 +control 0 +prolog 0 +standard 0 +classif 0 +algorithm 0 +autom 0 +experiment 0 +comparison 0 +repositori 0 +form 0 +relat 0 +site 0 +american 0 +ilpnet 0 +scientif 0 +network 0 +sigart 0 +signll 0 +joint 0 +aritfici 0 +ijcai 0 +nation 0 +icml 0 +fourth 0 +sourc 0 +subject 0 +biblio 0 +queri 0 +machinelearn 0 +home 0 +page 0 +servic 0 +paper 0 +archiv 0 +jair 0 +foil 0 +quinlan 0 +learner 0 +prodigi 0 +problem 0 +solv 0 +carnegi 0 +mellon 0 +ucpop 0 +partial 0 +planner 0 +washington 0 +explan 0 +oxford 0 +irvin 0 +austin 0 +wisconsin 0 +madison 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..b83f9352 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,110 @@ +univers 1 +learn 0 +texa 0 +austin 0 +comput 0 +illinoi 0 +champaign 0 +scienc 0 +inform 0 +urbana 0 +machin 0 +program 0 +base 0 +address 0 +moonei 0 +research 0 +artifici 0 +intellig 0 +acquisit 0 +exampl 0 +network 0 +symbol 0 +home 0 +depart 0 +raymond 0 +homepageraymond 0 +mooneyassoci 0 +professor 0 +computersci 0 +engin 0 +interestsmi 0 +current 0 +interest 0 +primarilyin 0 +area 0 +includ 0 +natur 0 +languag 0 +parser 0 +lexicon 0 +extract 0 +word 0 +sens 0 +disambigu 0 +induct 0 +logic 0 +prolog 0 +knowledg 0 +theori 0 +refin 0 +automat 0 +modifi 0 +rule 0 +bayesian 0 +empir 0 +data 0 +search 0 +control 0 +improv 0 +plan 0 +effici 0 +qualiti 0 +compar 0 +combin 0 +neural 0 +public 0 +page 0 +vita 0 +finger 0 +cours 0 +informationfal 0 +lisp 0 +learningspr 0 +iiperson 0 +historyi 0 +grew 0 +small 0 +town 0 +fallon 0 +wherestart 0 +attend 0 +fallontownship 0 +highschool 0 +start 0 +fall 0 +went 0 +urbanato 0 +obtain 0 +degre 0 +list 0 +decemb 0 +complet 0 +myph 0 +thesi 0 +explan 0 +learninggroup 0 +direct 0 +prof 0 +gerald 0 +dejong 0 +began 0 +posit 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +meadowfir 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..c8ada596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,100 @@ +austin 1 +link 1 +comput 0 +scienc 0 +decis 0 +task 0 +univers 0 +texa 0 +action 0 +home 0 +page 0 +moriarti 0 +depart 0 +control 0 +agent 0 +system 0 +select 0 +return 0 +research 0 +neural 0 +network 0 +inform 0 +address 0 +utc 0 +dave 0 +moriartydav 0 +student 0 +researchsequenti 0 +appear 0 +mani 0 +practic 0 +real 0 +world 0 +problemsinclud 0 +resourc 0 +alloc 0 +rout 0 +canb 0 +character 0 +follow 0 +scenario 0 +observ 0 +stateof 0 +dynam 0 +finit 0 +thesystem 0 +enter 0 +state 0 +upon 0 +must 0 +selectanoth 0 +payoff 0 +madeor 0 +sequenc 0 +object 0 +thesequ 0 +highest 0 +total 0 +cumulativepayoff 0 +evolv 0 +geneticalgorithm 0 +learn 0 +perform 0 +sequenti 0 +amparticularli 0 +interest 0 +problem 0 +specif 0 +knowledg 0 +iscurr 0 +unavail 0 +costli 0 +obtain 0 +domain 0 +havestudi 0 +includ 0 +game 0 +plai 0 +intellig 0 +constraintsatisfact 0 +list 0 +public 0 +educ 0 +universityof 0 +tulan 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +utexa 0 +postal 0 +local 0 +homepag 0 +us 0 +sport 0 +misc 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..f20b5c64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,38 @@ +univers 1 +wade 0 +utexa 0 +austin 0 +comput 0 +scienc 0 +texa 0 +mine 0 +engin 0 +utah 0 +mwbarn 0 +barnesm 0 +barnesmwbarn 0 +eduresearch 0 +workhelp 0 +pagestyp 0 +map 0 +literatureliteratur 0 +research 0 +notesclassesbackground 0 +informationph 0 +student 0 +depart 0 +educ 0 +reach 0 +mehom 0 +tanglebriar 0 +trail 0 +campu 0 +offic 0 +yete 0 +mail 0 +eduauthor 0 +barnesemail 0 +edulast 0 +updat 0 +mondai 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..33c7ea2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,83 @@ +room 1 +dale 0 +nell 0 +seed 0 +comput 0 +scienc 0 +contain 0 +professor 0 +home 0 +retir 0 +austin 0 +teach 0 +summer 0 +pleas 0 +author 0 +websit 0 +scrollit_rl 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +pagesunivers 0 +texa 0 +departmentwelcom 0 +page 0 +reach 0 +address 0 +senior 0 +lectur 0 +univers 0 +oftexa 0 +receiv 0 +utaustin 0 +faculti 0 +sinc 0 +fromful 0 +time 0 +full 0 +load 0 +falland 0 +spend 0 +spring 0 +write 0 +travel 0 +feel 0 +free 0 +brows 0 +resum 0 +curriculum 0 +vita 0 +bibliographi 0 +inform 0 +text 0 +book 0 +research 0 +abstract 0 +ofdissert 0 +chair 0 +recent 0 +person 0 +whichcontain 0 +memento 0 +nontechn 0 +interest 0 +direct 0 +anycorrespond 0 +mail 0 +account 0 +ndale 0 +utexa 0 +profession 0 +profilepublicationsresearch 0 +interestsperson 0 +interestsnel 0 +westlak 0 +offic 0 +document 0 +creat 0 +assist 0 +right 0 +reserv 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..53b43f9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,27 @@ +page 1 +view 0 +netscap 0 +browser 0 +frame 0 +color 0 +neeraj 0 +support 0 +home 0 +either 0 +download 0 +navig 0 +without 0 +note 0 +choos 0 +latter 0 +pleas 0 +keep 0 +mind 0 +design 0 +pretti 0 +background 0 +chosen 0 +work 0 +obnoxi 0 +chartreus 0 +blame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..5e938bf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,20 @@ +home 1 +comput 1 +texa 1 +gnan 0 +pagegnana 0 +kumar 0 +natarajan 0 +page 0 +depart 0 +sciencesunivers 0 +austini 0 +graduat 0 +student 0 +sciencedepart 0 +univers 0 +austin 0 +mail 0 +utexa 0 +edufind 0 +log 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..b6d58d2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,34 @@ +nimar 1 +arora 1 +home 1 +look 1 +singh 0 +page 0 +medic 0 +art 0 +austin 0 +typic 0 +first 0 +year 0 +student 0 +doesn 0 +quit 0 +knowwhat 0 +area 0 +interest 0 +resum 0 +know 0 +altern 0 +bookmarksto 0 +clearer 0 +pictur 0 +contact 0 +click 0 +queri 0 +hit 0 +term 0 +score 0 +ters 0 +output 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..9916ddc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,77 @@ +languag 1 +comput 0 +group 0 +utc 0 +natur 0 +paper 0 +learn 0 +hermjakob 0 +depart 0 +scienc 0 +univers 0 +texa 0 +discuss 0 +meet 0 +current 0 +prof 0 +research 0 +acquist 0 +groupnatur 0 +acquisit 0 +groupat 0 +austinw 0 +explor 0 +area 0 +acquisitionand 0 +fall 0 +usual 0 +everi 0 +wednesdai 0 +havedrawn 0 +close 0 +probabl 0 +resum 0 +second 0 +third 0 +week 0 +januari 0 +propos 0 +previous 0 +particip 0 +includ 0 +moonei 0 +risto 0 +miikkulainen 0 +bobbi 0 +bryant 0 +mari 0 +elain 0 +califf 0 +marti 0 +mayberri 0 +rupert 0 +tang 0 +poon 0 +cindi 0 +thompson 0 +inform 0 +pleas 0 +contact 0 +coordin 0 +utexa 0 +relat 0 +site 0 +associ 0 +linguist 0 +signll 0 +special 0 +interest 0 +print 0 +archiv 0 +machin 0 +neural 0 +network 0 +ofth 0 +ataustinlast 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..c5a66246 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,70 @@ +neural 1 +research 1 +utc 0 +net 0 +group 0 +artifici 0 +intellig 0 +scienc 0 +groupth 0 +supervis 0 +prof 0 +ristomiikkulainen 0 +part 0 +comput 0 +depart 0 +univers 0 +texa 0 +ataustin 0 +concentr 0 +andcognit 0 +includ 0 +natur 0 +languag 0 +process 0 +schema 0 +basedvis 0 +cortic 0 +self 0 +organ 0 +episod 0 +memori 0 +decis 0 +make 0 +evolv 0 +network 0 +genet 0 +algorithm 0 +click 0 +mapbelow 0 +detail 0 +check 0 +hypertext 0 +book 0 +later 0 +interact 0 +thecortex 0 +structur 0 +function 0 +risto 0 +miikkulainen 0 +graduat 0 +student 0 +alumni 0 +visitor 0 +public 0 +demo 0 +poster 0 +softwar 0 +home 0 +page 0 +confer 0 +newsgroup 0 +archiv 0 +inform 0 +sourc 0 +gener 0 +tool 0 +privat 0 +linkswusagemartym 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..21dc1fd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,77 @@ +lisp 1 +scienc 0 +demo 0 +comput 0 +program 0 +gordon 0 +novak 0 +texa 0 +automat 0 +physic 0 +unit 0 +interact 0 +univers 0 +austin 0 +artifici 0 +problem 0 +server 0 +graphic 0 +draw 0 +us 0 +offic 0 +support 0 +free 0 +speech 0 +internet 0 +associ 0 +professor 0 +ofcomput 0 +atth 0 +director 0 +intelligencelaboratori 0 +highest 0 +honor 0 +research 0 +reus 0 +genericalgorithmssolv 0 +specifiedinformallyartifici 0 +intelligencecurriculum 0 +vita 0 +publicationsemploymentgrantsprofession 0 +activ 0 +honorscurriculum 0 +vitaefre 0 +softwar 0 +tmycin 0 +emycin 0 +like 0 +expert 0 +system 0 +shell 0 +interfac 0 +common 0 +lispconvers 0 +measurementsoftwar 0 +construct 0 +creat 0 +write 0 +connect 0 +diagram 0 +convers 0 +convert 0 +measur 0 +isaac 0 +solv 0 +state 0 +english 0 +class 0 +schemec 0 +compilersc 0 +intelligencec 0 +programmingweb 0 +linksweatheraddress 0 +ctai 0 +univ 0 +austinaustintexa 0 +faxnovak 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..0e6f0b3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,51 @@ +page 1 +meghan 0 +insult 0 +panic 0 +brienhi 0 +welcom 0 +home 0 +pleas 0 +wipe 0 +feet 0 +enter 0 +mani 0 +complaint 0 +crappi 0 +laugh 0 +stock 0 +internet 0 +remov 0 +link 0 +chanc 0 +work 0 +thank 0 +sent 0 +still 0 +download 0 +gorgeou 0 +pictur 0 +blow 0 +poster 0 +size 0 +beauti 0 +queen 0 +date 0 +wait 0 +hear 0 +paul 0 +get 0 +marri 0 +august 0 +hope 0 +come 0 +except 0 +anyon 0 +want 0 +give 0 +feel 0 +free 0 +resum 0 +email 0 +obrien 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..17c54c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,13 @@ +oguer 1 +gutierrezogu 0 +gutierrezth 0 +depart 0 +comput 0 +scienc 0 +theunivers 0 +texa 0 +austinprojectsomioswwhlinksconfer 0 +databas 0 +systemsth 0 +worldemail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..56d95042 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,209 @@ +scheme 1 +memori 0 +paper 0 +time 0 +system 0 +garbag 0 +avail 0 +research 0 +paul 0 +wilson 0 +object 0 +compil 0 +collect 0 +also 0 +page 0 +group 0 +stephen 0 +carl 0 +especi 0 +persist 0 +store 0 +us 0 +alloc 0 +extens 0 +master 0 +thesi 0 +manag 0 +real 0 +collector 0 +file 0 +includ 0 +sourc 0 +code 0 +languag 0 +introduct 0 +interest 0 +site 0 +oop 0 +sheetal 0 +kakkad 0 +donovan 0 +kolbl 0 +neeli 0 +distribut 0 +virtual 0 +develop 0 +pointer 0 +larg 0 +effici 0 +standard 0 +program 0 +survei 0 +interfac 0 +orient 0 +implement 0 +cours 0 +interpret 0 +descript 0 +macro 0 +note 0 +draft 0 +good 0 +gener 0 +list 0 +whichcontain 0 +groupoop 0 +groupthi 0 +home 0 +supervis 0 +prof 0 +graduat 0 +studentsin 0 +ajit 0 +georg 0 +mark 0 +johnston 0 +scott 0 +kaplan 0 +michael 0 +qing 0 +dougla 0 +wieren 0 +area 0 +hierarchi 0 +cach 0 +simpl 0 +high 0 +perform 0 +call 0 +texa 0 +swizzl 0 +fault 0 +toimplement 0 +address 0 +space 0 +stock 0 +hardwar 0 +andoper 0 +basic 0 +studi 0 +behavior 0 +whichattempt 0 +repair 0 +damag 0 +done 0 +three 0 +decad 0 +mostli 0 +unsoundstudi 0 +mike 0 +automat 0 +storag 0 +generationaland 0 +small 0 +ongarbag 0 +local 0 +recent 0 +hard 0 +written 0 +smart 0 +adapt 0 +managementfor 0 +dynam 0 +andcompress 0 +structur 0 +checkpoint 0 +forfault 0 +toler 0 +travel 0 +debug 0 +highli 0 +portabl 0 +programmingsystem 0 +extend 0 +rscheme 0 +thread 0 +socket 0 +homepag 0 +info 0 +alpha 0 +releas 0 +noteson 0 +rawascii 0 +text 0 +andrschemear 0 +integr 0 +process 0 +algorithm 0 +tosupport 0 +open 0 +thesiscontain 0 +refer 0 +coupl 0 +write 0 +whicharen 0 +form 0 +anywai 0 +sometimesoon 0 +onlin 0 +book 0 +progress 0 +htmlformat 0 +brows 0 +contain 0 +materialfrom 0 +ascii 0 +much 0 +improv 0 +expandedpresent 0 +texinfo 0 +materiali 0 +work 0 +intro 0 +metaobject 0 +besid 0 +providesa 0 +thing 0 +like 0 +make 0 +backgroundread 0 +brief 0 +bibliographi 0 +heap 0 +fortexa 0 +anonym 0 +utexa 0 +readm 0 +materi 0 +subdirectori 0 +oopsla 0 +workshop 0 +peopl 0 +henri 0 +baker 0 +sftp 0 +although 0 +overload 0 +notb 0 +access 0 +keep 0 +try 0 +anoth 0 +great 0 +han 0 +boehm 0 +sever 0 +well 0 +free 0 +severalgarbag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..c2230842 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,11 @@ +depart 1 +robert 0 +otuomagieaddress 0 +austin 0 +phone 0 +emailotu 0 +utexa 0 +eduuniververs 0 +infouniversityth 0 +univers 0 +txa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..526fa658 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,74 @@ +file 1 +devic 0 +virtual 0 +specif 0 +avail 0 +compon 0 +languag 0 +interfac 0 +softwar 0 +construct 0 +view 0 +padgett 0 +work 0 +control 0 +domain 0 +class 0 +also 0 +follow 0 +physic 0 +implement 0 +export 0 +us 0 +austin 0 +padgettdon 0 +dissert 0 +researchi 0 +professor 0 +brown 0 +design 0 +andimplement 0 +driver 0 +investig 0 +creation 0 +program 0 +environ 0 +focu 0 +thu 0 +compil 0 +technolog 0 +devis 0 +prototyp 0 +specifi 0 +softar 0 +call 0 +contain 0 +variou 0 +featur 0 +reduc 0 +effort 0 +requir 0 +refer 0 +manualfor 0 +current 0 +postscript 0 +draft 0 +manual 0 +exampl 0 +counter 0 +multifunct 0 +transpar 0 +recent 0 +present 0 +creat 0 +microsoft 0 +powerpointvers 0 +window 0 +contact 0 +meemail 0 +utexa 0 +edupost 0 +depart 0 +comput 0 +scienc 0 +usafax 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..d722f350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,90 @@ +page 1 +battlebal 1 +austin 1 +comput 1 +research 1 +plan 0 +softwar 0 +game 0 +binari 0 +system 0 +librari 0 +tree 0 +univers 0 +scienc 0 +interest 0 +geometr 0 +model 0 +utexa 0 +home 0 +philip 0 +hardinphilip 0 +hardinabout 0 +elimin 0 +bug 0 +everywher 0 +fail 0 +fallback 0 +write 0 +access 0 +execut 0 +file 0 +oper 0 +unfortun 0 +avail 0 +want 0 +port 0 +suno 0 +solari 0 +work 0 +go 0 +school 0 +time 0 +multiplay 0 +wrote 0 +runsund 0 +window 0 +line 0 +code 0 +us 0 +standard 0 +templat 0 +anda 0 +campbel 0 +space 0 +partit 0 +imag 0 +screenshot 0 +student 0 +texa 0 +guess 0 +depart 0 +area 0 +graphic 0 +realli 0 +cool 0 +main 0 +graphicssoftwar 0 +reus 0 +engin 0 +gener 0 +group 0 +automat 0 +programmingto 0 +contact 0 +meemail 0 +pahardin 0 +edupost 0 +usanetrek 0 +server 0 +pita 0 +curli 0 +handl 0 +digitaldisast 0 +look 0 +get 0 +plaster 0 +congradul 0 +smartest 0 +person 0 +inth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..f9de1846 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,82 @@ +comput 1 +evolut 1 +scienc 0 +univers 0 +texa 0 +austin 0 +learn 0 +research 0 +paul 0 +mcquestenpaul 0 +mcquestenphd 0 +studentdepart 0 +interest 0 +interact 0 +think 0 +mechan 0 +natur 0 +might 0 +bepract 0 +addit 0 +current 0 +techniqu 0 +exampl 0 +death 0 +usual 0 +studi 0 +explicitli 0 +email 0 +paulmcq 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +phone 0 +postal 0 +address 0 +austindepart 0 +spring 0 +head 0 +forcsp 0 +introduct 0 +pascal 0 +programmingmor 0 +neuro 0 +inmoriarti 0 +link 0 +atcnr 0 +rome 0 +neural 0 +network 0 +group 0 +artifici 0 +intellig 0 +cours 0 +schedul 0 +depart 0 +handi 0 +access 0 +tout 0 +librari 0 +onlin 0 +seriou 0 +reflect 0 +dave 0 +winer 0 +websit 0 +need 0 +pointer 0 +wast 0 +hour 0 +surf 0 +check 0 +cynb 0 +humong 0 +hotlist 0 +mix 0 +knowledg 0 +knick 0 +knack 0 +nut 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..3b407bc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,32 @@ +multimedia 1 +austin 1 +pawang 0 +utexa 0 +system 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +pawan 0 +goyal 0 +home 0 +page 0 +research 0 +summari 0 +public 0 +network 0 +protocol 0 +file 0 +oper 0 +affili 0 +group 0 +get 0 +touch 0 +email 0 +inform 0 +finger 0 +also 0 +check 0 +log 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..bcd7e9d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,115 @@ +austin 1 +physic 0 +comput 0 +univers 0 +group 0 +texa 0 +current 0 +scienc 0 +depart 0 +theori 0 +interest 0 +also 0 +jose 0 +pecina 0 +obtain 0 +invari 0 +represent 0 +dissert 0 +advisor 0 +explor 0 +research 0 +numer 0 +parallel 0 +quantum 0 +scientif 0 +softwar 0 +home 0 +utexa 0 +orpecina 0 +pecinaabout 0 +previous 0 +complet 0 +master 0 +innuclear 0 +engin 0 +finish 0 +thesi 0 +graduat 0 +studi 0 +workedinvestig 0 +gaug 0 +graviti 0 +base 0 +quantiz 0 +gravit 0 +field 0 +calcul 0 +unitari 0 +irreduc 0 +publish 0 +three 0 +joint 0 +paper 0 +yuval 0 +eman 0 +georg 0 +sudarshan 0 +jurgen 0 +lemk 0 +fromcologn 0 +germani 0 +previou 0 +posit 0 +bureau 0 +econom 0 +geologi 0 +spent 0 +year 0 +half 0 +work 0 +seismic 0 +invers 0 +tomographi 0 +supervisor 0 +hardag 0 +editor 0 +geophys 0 +journal 0 +societi 0 +geophysicist 0 +algorithm 0 +analysi 0 +cryptographi 0 +visitor 0 +thephys 0 +carnegi 0 +mellon 0 +pittsburgh 0 +open 0 +compani 0 +develop 0 +fill 0 +comerci 0 +symmetri 0 +algebra 0 +sequenti 0 +solutionsin 0 +gener 0 +rel 0 +problem 0 +chromodynamicsmi 0 +curriculum 0 +vita 0 +click 0 +want 0 +print 0 +contact 0 +center 0 +particl 0 +usavoic 0 +main 0 +offic 0 +mail 0 +defo 0 +phy 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..e0633fc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,56 @@ +know 1 +page 1 +unix 0 +would 0 +woof 0 +find 0 +nettl 0 +welcom 0 +cornerinfolik 0 +anyon 0 +want 0 +make 0 +realli 0 +short 0 +blahblah 0 +system 0 +window 0 +blah 0 +stuff 0 +eeek 0 +staff 0 +escap 0 +floor 0 +mean 0 +ibm 0 +file 0 +afteri 0 +instal 0 +stori 0 +true 0 +name 0 +chang 0 +protect 0 +theinnoc 0 +experiment 0 +work 0 +pleas 0 +think 0 +us 0 +neat 0 +cool 0 +comic 0 +buena 0 +vista 0 +movieplex 0 +meyour 0 +chanc 0 +increas 0 +drastic 0 +could 0 +employan 0 +improb 0 +drive 0 +send 0 +mail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..f14bd716 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,25 @@ +greg 1 +plaxtongreg 1 +utexa 1 +hall 1 +texa 1 +plaxtoncontact 0 +informationemail 0 +plaxton 0 +eduphon 0 +offic 0 +taylor 0 +postal 0 +depart 0 +comput 0 +sciencetaylor 0 +univers 0 +austinaustin 0 +inform 0 +annual 0 +report 0 +profilepubl 0 +last 0 +modifi 0 +decemb 0 +plaxtonplaxton 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..df4db327 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,85 @@ +learn 1 +search 1 +porter 0 +machin 0 +knowledg 0 +base 0 +system 0 +bruce 0 +comput 0 +scienc 0 +intellig 0 +research 0 +answer 0 +question 0 +select 0 +public 0 +aaai 0 +abstract 0 +porterassoci 0 +professor 0 +faculti 0 +fellow 0 +univers 0 +california 0 +irvin 0 +honor 0 +award 0 +profession 0 +servic 0 +presidenti 0 +young 0 +investig 0 +editor 0 +presentarea 0 +interestartifici 0 +summari 0 +researchhead 0 +group 0 +develop 0 +method 0 +build 0 +larg 0 +basesand 0 +us 0 +solv 0 +problem 0 +researchinterest 0 +case 0 +recent 0 +rickel 0 +autom 0 +model 0 +predict 0 +thetim 0 +scale 0 +boundari 0 +cambridg 0 +aait 0 +press 0 +andpostscript 0 +brant 0 +rule 0 +preced 0 +complementari 0 +warrant 0 +bareiss 0 +holt 0 +concept 0 +heurist 0 +classif 0 +weak 0 +theori 0 +domain 0 +artifici 0 +journal 0 +abstractand 0 +postscript 0 +hotlist 0 +site 0 +page 0 +email 0 +address 0 +tech 0 +reportport 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..e79deaa4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,197 @@ +uniti 1 +program 0 +misra 0 +powerlist 0 +seuss 0 +graduat 0 +present 0 +group 0 +jayadev 0 +notat 0 +parallel 0 +work 0 +book 0 +paper 0 +proof 0 +theori 0 +kornerup 0 +list 0 +comput 0 +area 0 +current 0 +adam 0 +carruth 0 +marku 0 +kaltenbach 0 +jacob 0 +avail 0 +logic 0 +design 0 +note 0 +ofpap 0 +written 0 +includ 0 +introduct 0 +specifi 0 +circuit 0 +structur 0 +mani 0 +thepowerlist 0 +correct 0 +network 0 +studi 0 +differ 0 +compil 0 +austinpsp 0 +austinthi 0 +home 0 +page 0 +depart 0 +scienc 0 +univers 0 +texa 0 +ataustin 0 +stand 0 +specif 0 +emphasi 0 +deriveparallel 0 +distribut 0 +rigor 0 +manner 0 +issupervis 0 +develop 0 +research 0 +former 0 +member 0 +groupinclud 0 +erni 0 +cohen 0 +rajeev 0 +joshi 0 +edgar 0 +knapp 0 +ingolf 0 +krger 0 +josyula 0 +mark 0 +staskauska 0 +publicationsbelow 0 +summar 0 +wherev 0 +possibl 0 +give 0 +link 0 +topap 0 +electron 0 +reason 0 +anddistribut 0 +chandi 0 +foundat 0 +addison 0 +weslei 0 +seri 0 +variou 0 +result 0 +applic 0 +thenot 0 +assum 0 +basic 0 +understand 0 +inchandi 0 +sinc 0 +public 0 +sever 0 +improv 0 +made 0 +inth 0 +reflect 0 +amanuscript 0 +newun 0 +tempor 0 +operatorco 0 +safeti 0 +refer 0 +forrefer 0 +implement 0 +write 0 +asymbol 0 +model 0 +checker 0 +forfinit 0 +state 0 +call 0 +unityverifi 0 +extend 0 +toinclud 0 +real 0 +time 0 +aspect 0 +hybrid 0 +system 0 +synchron 0 +data 0 +length 0 +equal 0 +power 0 +twodiffer 0 +oper 0 +balanc 0 +divis 0 +parallelalgorithm 0 +succinct 0 +simpl 0 +recurs 0 +givesnumer 0 +exampl 0 +algorithm 0 +fast 0 +fourier 0 +transform 0 +batcher 0 +sort 0 +arithmet 0 +asadd 0 +multipli 0 +prove 0 +verifi 0 +addercircuit 0 +us 0 +programscan 0 +map 0 +effici 0 +architectur 0 +speciallyhypercub 0 +detail 0 +offspr 0 +address 0 +issu 0 +ofprogram 0 +composit 0 +restrict 0 +compon 0 +caninterfer 0 +read 0 +overview 0 +chapter 0 +froma 0 +monograph 0 +adisciplin 0 +multiprogram 0 +alsoavail 0 +genrat 0 +code 0 +callsfor 0 +messag 0 +commun 0 +describ 0 +thesi 0 +anexperi 0 +concurr 0 +object 0 +basedprogram 0 +languag 0 +ingolfkrg 0 +site 0 +found 0 +thepsp 0 +sitejacob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..572cb7b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,71 @@ +qime 1 +comput 1 +guest 1 +texa 0 +austin 0 +scienc 0 +inform 0 +huang 0 +email 0 +utexa 0 +univers 0 +system 0 +center 0 +page 0 +comment 0 +depart 0 +sciencesunivers 0 +phone 0 +edureceiv 0 +nankai 0 +tianjin 0 +univeris 0 +hawaii 0 +manoa 0 +hawaiiwork 0 +austincours 0 +spring 0 +advanc 0 +telecommun 0 +client 0 +server 0 +develop 0 +appli 0 +data 0 +commun 0 +cours 0 +academ 0 +resourc 0 +teamweb 0 +utcssadaili 0 +texanstock 0 +room 0 +attract 0 +picturesimageschines 0 +popsend 0 +card 0 +electr 0 +postcard 0 +line 0 +job 0 +jobtrakut 0 +placement 0 +connect 0 +gopherftp 0 +csc 0 +newstelnet 0 +cschen 0 +junk 0 +staffyour 0 +person 0 +visit 0 +pleas 0 +sign 0 +book 0 +commentsguest 0 +name 0 +construct 0 +last 0 +modifi 0 +march 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..32a8d5b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,63 @@ +reason 1 +qualit 0 +research 0 +paper 0 +qsim 0 +index 0 +bibliographi 0 +ourresearch 0 +group 0 +world 0 +access 0 +dissert 0 +softwar 0 +directori 0 +utexasqualit 0 +utexasth 0 +sever 0 +area 0 +thephys 0 +user 0 +applic 0 +like 0 +system 0 +spatial 0 +intelligentrobot 0 +tour 0 +limit 0 +logic 0 +knowledgerepresent 0 +algernon 0 +supervis 0 +professor 0 +benjaminkuip 0 +kuiper 0 +utexa 0 +part 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +depart 0 +atth 0 +univers 0 +texa 0 +ataustin 0 +pointer 0 +book 0 +graduat 0 +student 0 +robot 0 +knowledg 0 +represent 0 +alumni 0 +includ 0 +visitor 0 +abstract 0 +yellow 0 +page 0 +easili 0 +areadescript 0 +also 0 +visit 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..46bd04af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,62 @@ +page 1 +qiang 0 +home 0 +pleas 0 +seed 0 +program 0 +comput 0 +john 0 +welcom 0 +thank 0 +window 0 +austin 0 +feel 0 +free 0 +brows 0 +around 0 +leav 0 +comment 0 +suggest 0 +joke 0 +visit 0 +come 0 +scrollit_rl 0 +timertwo 0 +settimeout 0 +els 0 +sinc 0 +com 0 +time 0 +self 0 +introduct 0 +current 0 +master 0 +scienc 0 +depart 0 +univers 0 +texa 0 +click 0 +inform 0 +seriousjunk 0 +cours 0 +languag 0 +unix 0 +graphic 0 +linux 0 +technic 0 +java 0 +realjunk 0 +sport 0 +game 0 +new 0 +struggleforliv 0 +institut 0 +qzuo 0 +utexa 0 +guestbook 0 +still 0 +underconstruct 0 +back 0 +last 0 +modif 0 +copyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..42fe19ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,57 @@ +scienc 1 +comput 1 +univers 0 +texa 0 +robert 0 +gener 0 +last 0 +massachusett 0 +institut 0 +technolog 0 +system 0 +paper 0 +utexa 0 +blumoferobert 0 +blumofei 0 +bobbi 0 +name 0 +pronounc 0 +bloom 0 +informationassist 0 +professor 0 +ofcomput 0 +austin 0 +brown 0 +work 0 +cilkmultithread 0 +languag 0 +runtim 0 +laboratori 0 +experiment 0 +softwar 0 +less 0 +compil 0 +list 0 +document 0 +also 0 +avail 0 +directori 0 +semest 0 +spring 0 +teach 0 +abstract 0 +data 0 +type 0 +contact 0 +informationemail 0 +eduphon 0 +offic 0 +taylor 0 +hallpost 0 +depart 0 +sciencestaylor 0 +hall 0 +austinaustin 0 +modifi 0 +decemb 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..ca95dbb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,41 @@ +time 1 +current 0 +page 0 +comput 0 +interest 0 +wang 0 +homepag 0 +wangwelcom 0 +construct 0 +content 0 +permit 0 +candid 0 +depart 0 +scienc 0 +theunivers 0 +texa 0 +austin 0 +research 0 +includ 0 +real 0 +system 0 +rule 0 +base 0 +program 0 +analysi 0 +softwar 0 +engin 0 +artifici 0 +intellig 0 +publicationsi 0 +list 0 +public 0 +avail 0 +brows 0 +last 0 +updat 0 +pleas 0 +send 0 +comment 0 +rhwang 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..1c9fcbe8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,77 @@ +neural 1 +network 0 +lectur 0 +cours 0 +artifici 0 +fall 0 +graduat 0 +univers 0 +austin 0 +scienc 0 +research 0 +intellig 0 +undergradu 0 +risto 0 +comput 0 +group 0 +model 0 +cognit 0 +work 0 +seminar 0 +spring 0 +address 0 +miikkulainenristo 0 +miikkulainenassoci 0 +professor 0 +computersci 0 +oftexa 0 +ucla 0 +appli 0 +mathemat 0 +helsinki 0 +technolog 0 +intereststh 0 +concentr 0 +processeswith 0 +current 0 +includ 0 +languageacquisit 0 +episod 0 +memori 0 +self 0 +organ 0 +visual 0 +cortex 0 +schema 0 +base 0 +vision 0 +also 0 +evolv 0 +networkswith 0 +genet 0 +algorithm 0 +goal 0 +automat 0 +discoversequenti 0 +decis 0 +strategi 0 +problem 0 +solv 0 +robot 0 +detail 0 +utc 0 +home 0 +page 0 +classessumm 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +phone 0 +postal 0 +depart 0 +texa 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..2cff8874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,2 @@ +rong 1 +bigfoot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..28d70448 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,52 @@ +comput 1 +algorithm 0 +austin 0 +rajaraman 0 +home 0 +depart 0 +scienc 0 +univers 0 +texa 0 +rraj 0 +utexa 0 +rajmohan 0 +pagerajmohan 0 +graduat 0 +student 0 +atth 0 +ataustin 0 +plan 0 +complet 0 +spring 0 +mydissert 0 +supervisor 0 +gregplaxton 0 +research 0 +member 0 +andcomput 0 +theori 0 +group 0 +particularli 0 +interest 0 +incombinator 0 +distribut 0 +network 0 +onlin 0 +parallel 0 +model 0 +random 0 +list 0 +mypubl 0 +curriculum 0 +vita 0 +us 0 +link 0 +relat 0 +sciencemiscellan 0 +linkscontact 0 +inform 0 +email 0 +phone 0 +offic 0 +ephon 0 +postal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..d38f6484 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,86 @@ +austin 1 +movi 1 +homepag 0 +comput 0 +rong 0 +capit 0 +univers 0 +depart 0 +graduat 0 +scienc 0 +utaccess 0 +introduct 0 +homepagea 0 +chinachina 0 +jinan 0 +myhometown 0 +shandong 0 +provinc 0 +tsinghua 0 +gotmi 0 +computersci 0 +technolog 0 +beij 0 +ofth 0 +peopl 0 +republ 0 +ofchina 0 +spent 0 +five 0 +colleg 0 +year 0 +zhai 0 +sinanet 0 +texasaustin 0 +citi 0 +live 0 +weather 0 +apart 0 +rent 0 +utcsth 0 +texa 0 +utnetcat 0 +browsabl 0 +onlin 0 +catalog 0 +librari 0 +austininform 0 +servic 0 +login 0 +utcat 0 +cours 0 +fall 0 +oper 0 +systemsdynam 0 +file 0 +replic 0 +final 0 +project 0 +graphicsc 0 +mathemat 0 +logicc 0 +moviesaustin 0 +chronicl 0 +film 0 +time 0 +yahoo 0 +entertain 0 +filmsmicrosoft 0 +cinemania 0 +onlineal 0 +guidehollywood 0 +onlineinternet 0 +databaserog 0 +ebert 0 +moviesvisit 0 +page 0 +contactmail 0 +address 0 +river 0 +aaustin 0 +telephon 0 +emailrtan 0 +utexa 0 +fingerclick 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..d7636d18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,66 @@ +rupert 1 +austin 1 +tang 0 +page 0 +comput 0 +scienc 0 +univers 0 +texa 0 +littl 0 +think 0 +life 0 +would 0 +alwai 0 +home 0 +student 0 +dept 0 +almost 0 +miracl 0 +modern 0 +teach 0 +method 0 +entir 0 +strangl 0 +holi 0 +curious 0 +inquiri 0 +delic 0 +plant 0 +need 0 +anyth 0 +besid 0 +stimul 0 +freedom 0 +realli 0 +empti 0 +depriv 0 +opportun 0 +choos 0 +altern 0 +distast 0 +deni 0 +actual 0 +wish 0 +aspir 0 +fear 0 +duress 0 +fate 0 +much 0 +differ 0 +truck 0 +wash 0 +machin 0 +nice 0 +meet 0 +complet 0 +cool 0 +servic 0 +know 0 +academ 0 +interest 0 +research 0 +messi 0 +area 0 +construct 0 +utexa 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..cde9937e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,16 @@ +frame 1 +ruwei 0 +homepag 0 +alert 0 +see 0 +messag 0 +us 0 +browser 0 +support 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..4f0c2b8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,87 @@ +comput 1 +parallel 0 +method 0 +scienc 0 +univers 0 +austin 0 +mathemat 0 +project 0 +public 0 +robert 0 +texa 0 +rvdg 0 +utexa 0 +appli 0 +numer 0 +develop 0 +case 0 +techniqu 0 +current 0 +geijn 0 +geijnassoci 0 +professor 0 +depart 0 +institut 0 +appliedmathemat 0 +phone 0 +mail 0 +http 0 +user 0 +wisconsin 0 +madison 0 +maryland 0 +colleg 0 +park 0 +area 0 +interestnumer 0 +analysi 0 +supercomput 0 +scientif 0 +summari 0 +researchth 0 +introduct 0 +forc 0 +evalu 0 +oftradit 0 +sequentialmachin 0 +continu 0 +us 0 +inoth 0 +prove 0 +perform 0 +better 0 +researchconcentr 0 +forimpl 0 +well 0 +environ 0 +allowssuch 0 +easili 0 +implement 0 +variou 0 +parallelprocessor 0 +inform 0 +graduat 0 +program 0 +workshop 0 +infrastructur 0 +applic 0 +april 0 +intercom 0 +plapack 0 +sl_librari 0 +book 0 +journal 0 +confer 0 +technic 0 +report 0 +tutori 0 +major 0 +softwar 0 +effort 0 +class 0 +fall 0 +schedul 0 +former 0 +student 0 +meet 0 +famili 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..9ff9dc2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,40 @@ +home 1 +seed 1 +india 1 +page 0 +time 0 +sundeep 0 +abraham 0 +scrollit_rl 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +sundeepabraham 0 +master 0 +student 0 +computersci 0 +dept 0 +universityof 0 +texa 0 +austin 0 +undergradu 0 +studi 0 +comput 0 +sciencesand 0 +engin 0 +region 0 +engg 0 +colleg 0 +calicut 0 +countri 0 +hail 0 +state 0 +kerala 0 +know 0 +contact 0 +click 0 +construct 0 +tinkerwith 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..c9c8a02f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,135 @@ +comput 1 +texa 1 +austin 1 +page 0 +univers 0 +sammi 0 +depart 0 +scienc 0 +swim 0 +refer 0 +yellow 0 +utexa 0 +research 0 +prof 0 +project 0 +parallel 0 +world 0 +wide 0 +librari 0 +construct 0 +address 0 +altavista 0 +startingpoint 0 +yahoo 0 +lyco 0 +map 0 +weather 0 +white 0 +congress 0 +shall 0 +make 0 +respect 0 +establish 0 +religion 0 +orprohibit 0 +free 0 +exercis 0 +thereof 0 +abridg 0 +freedom 0 +ofspeech 0 +press 0 +right 0 +peopl 0 +peaceabl 0 +toassembl 0 +petit 0 +govern 0 +redress 0 +grievanc 0 +constitut 0 +unit 0 +state 0 +america 0 +calvin 0 +robert 0 +geijn 0 +linear 0 +algebra 0 +packag 0 +schedul 0 +artifici 0 +intellig 0 +topic 0 +distribut 0 +languag 0 +compil 0 +advanc 0 +oper 0 +system 0 +experi 0 +herbarium 0 +plapack 0 +sign 0 +hypertext 0 +code 0 +anagram 0 +server 0 +friend 0 +nil 0 +virtual 0 +reker 0 +yanni 0 +musician 0 +jeff 0 +hockei 0 +andrea 0 +hamilton 0 +technolog 0 +famili 0 +pop 0 +guyer 0 +public 0 +health 0 +northwestern 0 +anthropolog 0 +kate 0 +nate 0 +activ 0 +link 0 +entertain 0 +showbiz 0 +chronicl 0 +pollstar 0 +concert 0 +databas 0 +html 0 +quick 0 +guid 0 +dell 0 +fring 0 +ryder 0 +laptop 0 +info 0 +consortium 0 +miscellan 0 +boston 0 +hotlist 0 +list 0 +traveloc 0 +offic 0 +taylor 0 +hall 0 +home 0 +great 0 +hill 0 +eduth 0 +opinion 0 +express 0 +mine 0 +necessarili 0 +repres 0 +view 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..fcb53177 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,50 @@ +address 1 +austin 1 +inform 0 +mail 0 +depart 0 +comput 0 +propos 0 +file 0 +formal 0 +method 0 +page 0 +sawadajun 0 +sawadacontact 0 +email 0 +sawada 0 +utexa 0 +offic 0 +main 0 +univ 0 +texa 0 +scienc 0 +taylor 0 +hall 0 +home 0 +wooten 0 +dissert 0 +oral 0 +time 0 +place 0 +abstract 0 +paper 0 +supplementari 0 +technic 0 +report 0 +kbresourc 0 +common 0 +lisp 0 +languag 0 +edit 0 +bowen 0 +around 0 +world 0 +pvsother 0 +frequent 0 +access 0 +teacher 0 +fellow 0 +logic 0 +boyer 0 +class 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..8d9ca067 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,169 @@ +softwar 1 +system 0 +gener 0 +compon 0 +batori 0 +genvoca 0 +domain 0 +start 0 +mani 0 +applic 0 +get 0 +member 0 +design 0 +page 0 +research 0 +group 0 +complex 0 +result 0 +improv 0 +product 0 +perform 0 +look 0 +distribut 0 +decemb 0 +encapsul 0 +refin 0 +basic 0 +order 0 +architectur 0 +pattern 0 +utexa 0 +lectur 0 +note 0 +file 0 +ssgrg 0 +titl 0 +pagewelcom 0 +home 0 +tool 0 +assembl 0 +interchang 0 +reusabl 0 +develop 0 +independ 0 +model 0 +construct 0 +defin 0 +algebra 0 +equat 0 +term 0 +successfulli 0 +appli 0 +includ 0 +databas 0 +manag 0 +avion 0 +data 0 +structur 0 +demonstr 0 +substanti 0 +time 0 +first 0 +visit 0 +question 0 +best 0 +place 0 +take 0 +project 0 +index 0 +public 0 +relat 0 +utc 0 +professorangela 0 +dappert 0 +studentguillermo 0 +jimenez 0 +perezph 0 +studentjeff 0 +thomasph 0 +studentl 0 +tokuda 0 +studentyanni 0 +smaragdaki 0 +studentk 0 +shepherdresearch 0 +associateform 0 +graduat 0 +datesdinesh 0 +dasph 0 +milli 0 +villarrealph 0 +bart 0 +geracipostdoc 0 +marti 0 +sirkinph 0 +march 0 +sankar 0 +dasarim 0 +overview 0 +us 0 +build 0 +typic 0 +modul 0 +featur 0 +share 0 +possibl 0 +must 0 +differ 0 +part 0 +class 0 +requir 0 +manipul 0 +metadata 0 +reflect 0 +comput 0 +thu 0 +like 0 +approach 0 +goe 0 +beyond 0 +simpl 0 +object 0 +orient 0 +larg 0 +scale 0 +program 0 +transform 0 +feel 0 +issu 0 +involv 0 +breadth 0 +recommend 0 +follow 0 +paper 0 +starter 0 +read 0 +scalabl 0 +librari 0 +creat 0 +refer 0 +implement 0 +composit 0 +valid 0 +subject 0 +specif 0 +deliv 0 +relationship 0 +work 0 +check 0 +reengin 0 +lightweight 0 +dbm 0 +memori 0 +simul 0 +generatorsautom 0 +evolut 0 +inform 0 +pleas 0 +contact 0 +period 0 +releas 0 +tutori 0 +reus 0 +avail 0 +contain 0 +compress 0 +postscript 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..7589405e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,142 @@ +page 1 +home 0 +stuff 0 +austin 0 +inform 0 +work 0 +think 0 +research 0 +amherst 0 +want 0 +realli 0 +basic 0 +doesn 0 +look 0 +littl 0 +interest 0 +texa 0 +depart 0 +try 0 +group 0 +class 0 +orient 0 +colleg 0 +place 0 +time 0 +spent 0 +amaz 0 +like 0 +peopl 0 +check 0 +neat 0 +right 0 +scott 0 +pagescott 0 +pageokai 0 +long 0 +overdu 0 +slight 0 +updat 0 +mean 0 +go 0 +fanci 0 +expect 0 +least 0 +current 0 +univers 0 +finish 0 +year 0 +mani 0 +oop 0 +within 0 +utc 0 +system 0 +languag 0 +object 0 +design 0 +instructor 0 +glenn 0 +down 0 +great 0 +spend 0 +undergradu 0 +dai 0 +unpredict 0 +santa 0 +institut 0 +summer 0 +differ 0 +kind 0 +hobbi 0 +stuffit 0 +perfect 0 +linux 0 +invalu 0 +squash 0 +pageth 0 +mead 0 +wine 0 +beer 0 +psion 0 +maker 0 +cool 0 +palmtop 0 +without 0 +would 0 +forget 0 +name 0 +anastasi 0 +well 0 +sharp 0 +much 0 +free 0 +miscellan 0 +item 0 +particular 0 +order 0 +dine 0 +guid 0 +actual 0 +import 0 +part 0 +citizen 0 +poke 0 +good 0 +humor 0 +publish 0 +appl 0 +comput 0 +still 0 +thing 0 +iici 0 +last 0 +forev 0 +ala 0 +longer 0 +sure 0 +bright 0 +futur 0 +type 0 +machin 0 +bebox 0 +could 0 +simpl 0 +better 0 +noth 0 +probabl 0 +didn 0 +wait 0 +five 0 +minut 0 +load 0 +send 0 +email 0 +maintain 0 +sfkaplan 0 +utexa 0 +might 0 +grab 0 +includ 0 +link 0 +encrypt 0 +gener 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..ba5426ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,34 @@ +shenoi 1 +austin 0 +utexa 0 +prashant 0 +home 0 +page 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +main 0 +welcom 0 +get 0 +touch 0 +email 0 +river 0 +offic 0 +tower 0 +floor 0 +build 0 +inform 0 +finger 0 +also 0 +check 0 +log 0 +multimedia 0 +group 0 +list 0 +recent 0 +public 0 +avail 0 +onlin 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..706476af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,35 @@ +austin 1 +shaob 0 +cyberhom 0 +comput 0 +welcom 0 +current 0 +student 0 +depart 0 +scienc 0 +univers 0 +texa 0 +want 0 +know 0 +check 0 +ongo 0 +work 0 +hardvar 0 +verifc 0 +network 0 +vlsi 0 +final 0 +project 0 +fall 0 +bookshelf 0 +coffe 0 +tabl 0 +campu 0 +citi 0 +make 0 +contact 0 +pleasant 0 +vallei 0 +shma 0 +utexa 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..19eedbe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,93 @@ +austin 1 +shailesh 0 +univers 0 +texa 0 +comput 0 +learn 0 +interest 0 +internet 0 +neural 0 +network 0 +machin 0 +artifici 0 +genet 0 +algorithm 0 +kumar 0 +kumarshailesh 0 +kumarth 0 +austindepart 0 +sciencestaylor 0 +hall 0 +skumar 0 +utexa 0 +edumi 0 +resumeresearch 0 +publicationscontact 0 +mesrcm 0 +spiritu 0 +affili 0 +offersom 0 +linkscognit 0 +scienceutc 0 +researchutc 0 +research 0 +groupresearch 0 +intellig 0 +life 0 +neuroevolut 0 +applic 0 +cellular 0 +automata 0 +chao 0 +nonlinear 0 +dynam 0 +fuzzi 0 +logic 0 +massiv 0 +parallel 0 +processor 0 +publicationson 0 +line 0 +adapt 0 +signal 0 +predistort 0 +dual 0 +reinforc 0 +page 0 +patrick 0 +goetz 0 +risto 0 +miikkulainen 0 +appli 0 +mathemat 0 +depart 0 +scienc 0 +proceed 0 +annual 0 +confer 0 +bari 0 +itali 0 +object 0 +base 0 +evolut 0 +program 0 +bord 0 +singh 0 +symposium 0 +aprl 0 +india 0 +contact 0 +snail 0 +mail 0 +whiti 0 +avenu 0 +phone 0 +home 0 +offic 0 +offernet 0 +assistancesearch 0 +tool 0 +qualiti 0 +institutewww 0 +infoindia 0 +music 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..6878c8ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,47 @@ +smaragdaki 1 +yanni 1 +utc 1 +student 1 +interest 1 +project 1 +program 1 +comput 1 +scienc 1 +research 1 +austin 1 +posit 0 +graduat 0 +alchemi 0 +turn 0 +lead 0 +gold 0 +moder 0 +success 0 +plan 0 +scheme 0 +make 0 +arrang 0 +someth 0 +webster 0 +world 0 +dictionari 0 +serious 0 +depart 0 +main 0 +area 0 +meta 0 +system 0 +applic 0 +particularli 0 +softwar 0 +gener 0 +photo 0 +album 0 +favorit 0 +sitessmaragd 0 +utexa 0 +eduyanni 0 +smaragdakisunivers 0 +texa 0 +departmenttai 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..434bb0ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,58 @@ +link 1 +minion 1 +like 0 +email 0 +pleas 0 +mean 0 +homepag 0 +edward 0 +danger 0 +construct 0 +site 0 +fall 0 +asphalt 0 +bodi 0 +resum 0 +tell 0 +danc 0 +shadow 0 +moonlight 0 +click 0 +utexa 0 +person 0 +data 0 +strictli 0 +need 0 +know 0 +basi 0 +send 0 +effort 0 +duli 0 +note 0 +pictur 0 +interest 0 +greatest 0 +experi 0 +stimul 0 +nerv 0 +center 0 +wouldn 0 +absolut 0 +ever 0 +dy 0 +ignor 0 +previou 0 +recent 0 +addit 0 +field 0 +trip 0 +pania 0 +haiku 0 +leaf 0 +afloat 0 +wind 0 +stream 0 +eddi 0 +waterfal 0 +life 0 +visitor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..dc0e5f30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,29 @@ +search 1 +email 0 +utexa 0 +texa 0 +austin 0 +southerart 0 +southerresearchbuild 0 +member 0 +knowledg 0 +base 0 +system 0 +research 0 +group 0 +contact 0 +inform 0 +souther 0 +work 0 +mail 0 +comput 0 +scienc 0 +depart 0 +univers 0 +hotlist 0 +site 0 +page 0 +address 0 +public 0 +tech 0 +reportsouth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..8a1304eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,60 @@ +learn 1 +address 1 +comput 1 +scienc 1 +sowmya 0 +research 0 +texa 0 +interest 0 +network 0 +univers 0 +austin 0 +ramachandransowmya 0 +ramachandranmachin 0 +groupunivers 0 +austinresearchmi 0 +area 0 +machin 0 +field 0 +ofartif 0 +intellig 0 +problem 0 +learningbayesian 0 +exampl 0 +bayesian 0 +withhidden 0 +variabl 0 +challeng 0 +approach 0 +appli 0 +symbol 0 +connectionist 0 +theori 0 +revis 0 +techniqu 0 +thisproblem 0 +also 0 +design 0 +creat 0 +multimediaappl 0 +resum 0 +list 0 +paper 0 +educ 0 +rutger 0 +tech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +india 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +utexa 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..194352ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,52 @@ +texa 1 +sriram 0 +austin 0 +comput 0 +univers 0 +click 0 +home 0 +page 0 +raocurr 0 +research 0 +involv 0 +design 0 +implement 0 +multimedia 0 +file 0 +systemoper 0 +system 0 +support 0 +multimediai 0 +work 0 +multimediagroup 0 +sciencesdepart 0 +advisor 0 +prof 0 +harrickvinpublicationsminegroupcontact 0 +informationofficetai 0 +email 0 +utexa 0 +edudepart 0 +scienc 0 +austinaustin 0 +miscellaneousotherinterest 0 +pagespicturesof 0 +toweraustin 0 +isth 0 +capit 0 +locat 0 +central 0 +hill 0 +countri 0 +herefor 0 +inform 0 +kannada 0 +koota 0 +informationabout 0 +tamil 0 +sangam 0 +comment 0 +pleas 0 +free 0 +send 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..1dbc16d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,15 @@ +frame 1 +tiger 0 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..40b96d6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,50 @@ +home 1 +sunghe 0 +univers 0 +austin 0 +page 0 +depart 0 +comput 0 +texa 0 +engin 0 +seoul 0 +work 0 +system 0 +research 0 +utc 0 +click 0 +choisunghe 0 +choiwelcom 0 +construct 0 +master 0 +program 0 +scienc 0 +educ 0 +nation 0 +korea 0 +august 0 +experi 0 +present 0 +administr 0 +chemic 0 +graduat 0 +assist 0 +prof 0 +aloysiu 0 +real 0 +time 0 +group 0 +contact 0 +inform 0 +nuec 0 +list 0 +machin 0 +current 0 +log 0 +finger 0 +author 0 +choiemail 0 +utexa 0 +edulast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..4b0719c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,70 @@ +comput 1 +scienc 1 +texa 0 +austin 0 +inform 0 +univers 0 +depart 0 +research 0 +page 0 +sheetal 0 +offic 0 +taylor 0 +hall 0 +address 0 +avail 0 +persist 0 +call 0 +home 0 +kakkadsheet 0 +kakkadcontact 0 +postal 0 +usual 0 +best 0 +reach 0 +isvia 0 +email 0 +full 0 +finger 0 +informationi 0 +member 0 +oop 0 +group 0 +inth 0 +part 0 +implement 0 +storagesystem 0 +provid 0 +easi 0 +us 0 +novel 0 +techniqu 0 +pointer 0 +swizzl 0 +faulttim 0 +effici 0 +support 0 +larg 0 +standard 0 +hardwar 0 +pleas 0 +list 0 +mypubl 0 +along 0 +brief 0 +descript 0 +plan 0 +graduat 0 +myresum 0 +postscript 0 +current 0 +work 0 +motorola 0 +somerset 0 +design 0 +center 0 +whilefinish 0 +januari 0 +kakkad 0 +svkakkad 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..00b6acbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,62 @@ +austin 1 +shengm 0 +student 0 +scienc 0 +univers 0 +texa 0 +chines 0 +home 0 +page 0 +welcom 0 +homepageabout 0 +depart 0 +comput 0 +want 0 +know 0 +check 0 +classmatesclass 0 +technolog 0 +china 0 +ustc 0 +class 0 +graduat 0 +school 0 +academi 0 +sciencesus 0 +linksut 0 +campu 0 +registrar 0 +gradaut 0 +studiesut 0 +libraryutaccesschines 0 +scholar 0 +associationchina 0 +chinesechinainternet 0 +distribut 0 +magazinestsinghua 0 +bbsncic 0 +bbschines 0 +novelschines 0 +classicsabout 0 +austinwhat 0 +weather 0 +todai 0 +citylimitsclassifi 0 +item 0 +sale 0 +austinto 0 +contact 0 +address 0 +medic 0 +art 0 +voic 0 +email 0 +utexa 0 +finger 0 +meyour 0 +comment 0 +suggest 0 +highli 0 +appreci 0 +visitorsinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..5f3f7697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,15 @@ +frame 1 +wang 0 +alert 0 +see 0 +messag 0 +us 0 +browser 0 +support 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..01cb1ddb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,4 @@ +page 1 +welcom 0 +home 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..db2fb9db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,30 @@ +austin 1 +work 0 +comput 0 +renu 0 +tewarirenu 0 +tewariwhat 0 +addresshom 0 +home 0 +email 0 +tewari 0 +utexa 0 +multimedia 0 +dept 0 +scienc 0 +univers 0 +texa 0 +public 0 +done 0 +internship 0 +watson 0 +research 0 +center 0 +plai 0 +interest 0 +site 0 +bore 0 +send 0 +comment 0 +name 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..05a3dbaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,85 @@ +tumlin 1 +research 0 +distribut 0 +system 0 +synthesi 0 +pierc 0 +interest 0 +formal 0 +develop 0 +work 0 +paper 0 +control 0 +project 0 +inform 0 +address 0 +utexa 0 +page 0 +novemb 0 +photo 0 +brenda 0 +ladd 0 +photographi 0 +random 0 +stuff 0 +come 0 +soon 0 +stai 0 +tune 0 +issu 0 +secur 0 +present 0 +studi 0 +logic 0 +analyz 0 +authenticationprotocol 0 +done 0 +method 0 +specif 0 +verif 0 +parallel 0 +amwork 0 +resourc 0 +communicatewith 0 +client 0 +mean 0 +queu 0 +messag 0 +draft 0 +addit 0 +student 0 +assist 0 +appli 0 +laboratori 0 +current 0 +investig 0 +us 0 +evolutionari 0 +comput 0 +techniqu 0 +genet 0 +algorithm 0 +finit 0 +state 0 +machin 0 +click 0 +resum 0 +avail 0 +html 0 +postscript 0 +format 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +postal 0 +metric 0 +blvd 0 +austin 0 +last 0 +updat 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..795234af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,173 @@ +prof 1 +page 0 +austin 0 +system 0 +tong 0 +java 0 +comput 0 +univers 0 +chines 0 +mpeg 0 +china 0 +cours 0 +fall 0 +project 0 +unix 0 +friend 0 +wang 0 +time 0 +shanghai 0 +technolog 0 +work 0 +distribut 0 +network 0 +databas 0 +netscap 0 +recommend 0 +visit 0 +viewer 0 +demo 0 +nanj 0 +program 0 +format 0 +jiao 0 +tsinghua 0 +compani 0 +lucent 0 +spring 0 +misra 0 +theori 0 +implement 0 +perform 0 +associ 0 +anim 0 +home 0 +view 0 +listen 0 +contain 0 +applet 0 +written 0 +pure 0 +nank 0 +peopl 0 +republ 0 +current 0 +scienc 0 +depart 0 +texa 0 +seek 0 +full 0 +resum 0 +html 0 +click 0 +postscript 0 +univeristi 0 +beij 0 +jersei 0 +summerluc 0 +form 0 +result 0 +plan 0 +restructur 0 +bell 0 +laboratori 0 +thissumm 0 +anoth 0 +homepag 0 +life 0 +commun 0 +gouda 0 +zuckerman 0 +batori 0 +algorithm 0 +techniqu 0 +plexton 0 +graphic 0 +fussel 0 +multimedia 0 +teach 0 +assist 0 +introduct 0 +oper 0 +present 0 +fault 0 +toler 0 +clock 0 +synchron 0 +real 0 +april 0 +mobil 0 +host 0 +protocol 0 +mini 0 +manag 0 +design 0 +tool 0 +standard 0 +compon 0 +libari 0 +robot 0 +opengl 0 +glut 0 +decod 0 +player 0 +plai 0 +semest 0 +know 0 +troubl 0 +made 0 +music 0 +favorit 0 +mariah 0 +boyz 0 +babyfac 0 +movi 0 +sound 0 +clip 0 +sampl 0 +misc 0 +zodiac 0 +person 0 +differ 0 +kind 0 +find 0 +high 0 +school 0 +attach 0 +normal 0 +haiq 0 +maintain 0 +shenfeng 0 +chen 0 +thank 0 +quit 0 +learn 0 +info 0 +perl 0 +tutori 0 +reach 0 +lake 0 +blvd 0 +twang 0 +utexa 0 +still 0 +underconstruct 0 +check 0 +like 0 +never 0 +leav 0 +eagl 0 +copyright 0 +creat 0 +last 0 +modifi 0 +background 0 +song 0 +deskmat 0 +lang 0 +visitor 0 +accord 0 +counter 0 +sinc 0 +trust 0 +book 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..ca422298 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,84 @@ +comput 1 +languag 1 +student 0 +machin 0 +utc 0 +natur 0 +learn 0 +group 0 +interest 0 +associ 0 +hermjakob 0 +home 0 +pageulf 0 +hermjakobhello 0 +welcom 0 +graduat 0 +thedept 0 +scienc 0 +univers 0 +texa 0 +austinand 0 +work 0 +dissert 0 +aboutexampl 0 +base 0 +decis 0 +make 0 +context 0 +orient 0 +pars 0 +translationund 0 +supervis 0 +prof 0 +raymond 0 +moonei 0 +activ 0 +acquisit 0 +groupand 0 +research 0 +place 0 +linguist 0 +signll 0 +special 0 +print 0 +archiv 0 +european 0 +search 0 +engin 0 +altavista 0 +einet 0 +galaxi 0 +infoseek 0 +lyco 0 +yahoo 0 +new 0 +thing 0 +consid 0 +dernir 0 +nouvel 0 +alsac 0 +deutsch 0 +well 0 +focu 0 +york 0 +time 0 +spiegel 0 +svenska 0 +dagbladet 0 +tagesspiegel 0 +vanguardia 0 +welt 0 +zeitplusacm 0 +austin 0 +weather 0 +resourc 0 +perman 0 +address 0 +moltkestr 0 +bnde 0 +germanyphon 0 +voic 0 +last 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..e84dec50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,38 @@ +texa 1 +austin 1 +sport 1 +lanc 0 +tokuda 0 +univers 0 +softwar 0 +research 0 +intramur 0 +schedul 0 +system 0 +gener 0 +group 0 +depart 0 +comput 0 +scienc 0 +unicron 0 +utexa 0 +twelv 0 +time 0 +champion 0 +directori 0 +entertain 0 +financ 0 +magic 0 +gather 0 +new 0 +organ 0 +peopl 0 +refer 0 +home 0 +offic 0 +taylor 0 +perman 0 +heeia 0 +street 0 +kaneoh 0 +hawaii 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..de530bc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,29 @@ +utexa 1 +hall 1 +austin 1 +home 0 +page 0 +balayoghanv 0 +balayoghancontact 0 +informationemail 0 +eduoffic 0 +painter 0 +telephon 0 +postal 0 +address 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +taylor 0 +click 0 +send 0 +email 0 +finger 0 +account 0 +find 0 +whether 0 +log 0 +ineosdi 0 +bookmarksvbb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..8cde9f39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,173 @@ +multimedia 1 +comput 0 +system 0 +award 0 +scienc 0 +univers 0 +distribut 0 +network 0 +research 0 +page 0 +goyal 0 +texa 0 +chair 0 +intern 0 +confer 0 +program 0 +committe 0 +databas 0 +server 0 +proceed 0 +austin 0 +laboratori 0 +california 0 +diego 0 +career 0 +develop 0 +initi 0 +ieee 0 +area 0 +andnetwork 0 +eurograph 0 +workshop 0 +applic 0 +symposium 0 +object 0 +algorithm 0 +effici 0 +audio 0 +video 0 +disk 0 +oper 0 +commun 0 +sponsor 0 +harrick 0 +vinharrick 0 +assist 0 +professor 0 +atth 0 +ataustin 0 +director 0 +multimediacomput 0 +educ 0 +tech 0 +engin 0 +indian 0 +institut 0 +technolog 0 +bombai 0 +colorado 0 +state 0 +honor 0 +profession 0 +servic 0 +faculti 0 +supercomput 0 +center 0 +creativ 0 +innov 0 +editori 0 +board 0 +vice 0 +icdc 0 +member 0 +electronicimag 0 +beij 0 +china 0 +novemb 0 +kaohsiung 0 +taiwan 0 +decemb 0 +rostock 0 +germani 0 +second 0 +internationalconfer 0 +third 0 +multimediasystem 0 +interestmultimedia 0 +high 0 +speed 0 +mobilecomput 0 +summari 0 +main 0 +design 0 +implement 0 +anend 0 +architectur 0 +enabl 0 +wide 0 +rang 0 +ofdistribut 0 +specif 0 +integr 0 +file 0 +protocolsfor 0 +transmiss 0 +digit 0 +larg 0 +scale 0 +select 0 +recent 0 +public 0 +shenoi 0 +failur 0 +recoveri 0 +inmulti 0 +annualintern 0 +fault 0 +toler 0 +ftc 0 +pasadena 0 +june 0 +gemmel 0 +kandlur 0 +venkat 0 +rangan 0 +row 0 +storag 0 +tutori 0 +optim 0 +placement 0 +ofmultimedia 0 +arrai 0 +ieeeintern 0 +icmc 0 +washington 0 +determin 0 +delaybound 0 +heterogen 0 +thintern 0 +support 0 +fordigit 0 +nossdav 0 +durham 0 +hampshir 0 +april 0 +designingmultimedia 0 +march 0 +work 0 +variou 0 +industri 0 +federalinstitut 0 +includ 0 +intel 0 +nation 0 +foundationresearch 0 +nasa 0 +mitsubishi 0 +electricresearch 0 +merl 0 +microsystem 0 +electrospacesystem 0 +cours 0 +advanc 0 +contact 0 +inform 0 +email 0 +utexa 0 +phone 0 +mail 0 +address 0 +depart 0 +taylor 0 +hall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..e12e1689 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,67 @@ +vipin 1 +home 1 +haven 1 +guestbook 1 +pleas 1 +austin 1 +number 1 +page 0 +found 0 +time 0 +thing 0 +shall 0 +updat 0 +soon 0 +yeah 0 +know 0 +color 0 +match 0 +pictur 0 +cours 0 +interest 0 +report 0 +activ 0 +resum 0 +give 0 +graduat 0 +student 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +undergraduatefrom 0 +delhi 0 +india 0 +academ 0 +interestscours 0 +work 0 +list 0 +less 0 +incomplet 0 +pass 0 +semest 0 +take 0 +risk 0 +put 0 +interestsreportsy 0 +visitor 0 +go 0 +reset 0 +increas 0 +never 0 +decreas 0 +contact 0 +medic 0 +art 0 +street 0 +log 0 +sure 0 +sign 0 +though 0 +mani 0 +guest 0 +comment 0 +suggest 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..68536b6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,155 @@ +mathemat 1 +paper 0 +vladimir 0 +feel 0 +human 0 +univers 0 +petersburg 0 +russia 0 +note 0 +research 0 +new 0 +germani 0 +race 0 +black 0 +offic 0 +lifschitzwhen 0 +burden 0 +downcast 0 +mind 0 +gladli 0 +turn 0 +therealm 0 +lucid 0 +precis 0 +grasp 0 +object 0 +isobtain 0 +insight 0 +gain 0 +pleasantli 0 +appropri 0 +conceptform 0 +spirit 0 +home 0 +paul 0 +bernai 0 +lifschitzgottesman 0 +famili 0 +centenni 0 +professor 0 +incomput 0 +sciencesat 0 +texasat 0 +austin 0 +fellow 0 +theamerican 0 +associ 0 +forartifici 0 +intelligenceb 0 +branchof 0 +steklov 0 +institut 0 +area 0 +interesttempor 0 +reasoningand 0 +reason 0 +aboutactionslog 0 +programmingand 0 +nonmonoton 0 +reasoningteachingoth 0 +profession 0 +activitiespap 0 +line 0 +lectur 0 +survei 0 +lifschitz 0 +dissert 0 +bylifschitz 0 +studentsrecommend 0 +read 0 +edsger 0 +dijkstra 0 +convoc 0 +speechgood 0 +madelein 0 +albright 0 +nomin 0 +secretari 0 +state 0 +becam 0 +better 0 +place 0 +live 0 +regain 0 +book 0 +taken 0 +soviet 0 +armi 0 +daniel 0 +ortega 0 +lost 0 +need 0 +recycl 0 +helm 0 +burton 0 +actbad 0 +wang 0 +sentenc 0 +year 0 +prison 0 +tortur 0 +us 0 +polic 0 +democrat 0 +countri 0 +sequest 0 +import 0 +archeolog 0 +evid 0 +world 0 +close 0 +societynot 0 +problem 0 +america 0 +elect 0 +recent 0 +redrawn 0 +district 0 +california 0 +civil 0 +right 0 +initi 0 +ratio 0 +white 0 +finish 0 +high 0 +school 0 +admit 0 +student 0 +neutral 0 +basisoth 0 +amnesti 0 +intern 0 +scientist 0 +scienc 0 +favorit 0 +stori 0 +three 0 +silli 0 +joke 0 +quot 0 +monthcontact 0 +inform 0 +taylor 0 +hall 0 +phone 0 +number 0 +postal 0 +address 0 +depart 0 +comput 0 +sciencesunivers 0 +texa 0 +austinaustin 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..577d6767 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,56 @@ +comput 1 +algorithm 1 +univers 0 +texa 0 +austin 0 +parallel 0 +design 0 +scienc 0 +research 0 +evalu 0 +address 0 +vijaya 0 +ramachandranvijaya 0 +ramachandranblakemor 0 +regent 0 +professor 0 +princeton 0 +interestsmi 0 +interest 0 +theori 0 +primarilyin 0 +area 0 +includ 0 +analysi 0 +effici 0 +sequenti 0 +model 0 +machin 0 +experiment 0 +access 0 +copi 0 +recent 0 +paper 0 +mine 0 +complet 0 +list 0 +public 0 +avail 0 +vita 0 +offici 0 +faculti 0 +profil 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +number 0 +visit 0 +page 0 +sinc 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..beffe8ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,203 @@ +comput 1 +algorithm 0 +theori 0 +utexa 0 +mail 0 +list 0 +warm 0 +scienc 0 +univers 0 +group 0 +research 0 +symposium 0 +design 0 +complex 0 +parallel 0 +graph 0 +random 0 +sigact 0 +theoret 0 +foundat 0 +interest 0 +analysi 0 +plaxton 0 +vijaya 0 +ramachandran 0 +announc 0 +messag 0 +texa 0 +faculti 0 +includ 0 +effici 0 +differ 0 +time 0 +number 0 +greg 0 +sanjoi 0 +kelsen 0 +madhukar 0 +ramgop 0 +suel 0 +yuke 0 +electron 0 +activ 0 +send 0 +remov 0 +stand 0 +state 0 +meet 0 +locat 0 +sever 0 +distinguish 0 +austin 0 +dalla 0 +louisiana 0 +sent 0 +also 0 +ieee 0 +bibliographi 0 +grouput 0 +groupth 0 +focus 0 +current 0 +probabilist 0 +method 0 +major 0 +focu 0 +provabl 0 +solv 0 +fundament 0 +problem 0 +measur 0 +term 0 +resourc 0 +space 0 +processor 0 +bit 0 +combinator 0 +lower 0 +bound 0 +machin 0 +model 0 +david 0 +zuckerman 0 +walk 0 +cryptographi 0 +affili 0 +folk 0 +postdoc 0 +student 0 +alumni 0 +baruah 0 +emba 0 +tsan 0 +sheng 0 +tshsu 0 +sinica 0 +pierr 0 +korupolu 0 +phil 0 +mackenzi 0 +philmac 0 +idbsu 0 +mettu 0 +poon 0 +ckpoon 0 +rajmohan 0 +rajaraman 0 +rraj 0 +santanu 0 +sinha 0 +ssinha 0 +torsten 0 +berkelei 0 +zhou 0 +relat 0 +seminar 0 +post 0 +lowvolum 0 +typic 0 +dozen 0 +semest 0 +express 0 +ad 0 +name 0 +request 0 +gripe 0 +workshop 0 +themidsouth 0 +midsouthwest 0 +forum 0 +surround 0 +twice 0 +year 0 +consist 0 +talk 0 +region 0 +recent 0 +result 0 +often 0 +keynot 0 +speaker 0 +first 0 +organ 0 +atut 0 +spring 0 +organizedanoth 0 +fall 0 +held 0 +southern 0 +methodist 0 +north 0 +southwestern 0 +oklahoma 0 +next 0 +schedul 0 +beheld 0 +novemb 0 +program 0 +algorithmsmail 0 +becom 0 +avail 0 +usuallytri 0 +pool 0 +attend 0 +take 0 +place 0 +outsid 0 +ofaustin 0 +regard 0 +arrang 0 +special 0 +algorithmsand 0 +thatinclud 0 +mani 0 +scientist 0 +sponsorsth 0 +stoc 0 +sponsor 0 +siam 0 +discret 0 +soda 0 +andarchitectur 0 +spaa 0 +import 0 +confer 0 +interestar 0 +foc 0 +serv 0 +elect 0 +member 0 +thesigact 0 +execut 0 +committe 0 +us 0 +pointer 0 +calendar 0 +eccc 0 +colloquium 0 +virtual 0 +rolodex 0 +hypertext 0 +project 0 +dept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..7c06a037 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,7 @@ +srinivasan 1 +vaidyaraman 1 +offic 1 +phone 1 +email 0 +utexa 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..1c5a8901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,45 @@ +vurgun 1 +comput 1 +scienc 1 +interest 1 +austin 1 +sengul 0 +sengulvurgun 0 +background 0 +interestsi 0 +student 0 +depart 0 +ammainli 0 +artifici 0 +intellig 0 +neural 0 +network 0 +evolutionaryalgorithm 0 +term 0 +paper 0 +topic 0 +order 0 +ofprefer 0 +memori 0 +represent 0 +knowledg 0 +howto 0 +retriev 0 +learn 0 +theori 0 +problem 0 +solv 0 +cognit 0 +skillacquisit 0 +search 0 +understand 0 +visual 0 +attent 0 +connectionist 0 +approach 0 +architectur 0 +mindto 0 +contact 0 +mepost 0 +usavoic 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..f9715528 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,10 @@ +page 1 +walbourn 0 +home 0 +chuck 0 +walbournmi 0 +person 0 +locat 0 +charybdi 0 +enterpris 0 +server 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..daf3db60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,50 @@ +walker 1 +comput 0 +scienc 0 +depart 0 +colleg 0 +henri 0 +mackai 0 +visit 0 +univers 0 +texa 0 +austin 0 +professor 0 +mathemat 0 +grinnel 0 +math 0 +grin 0 +septemb 0 +senior 0 +lectur 0 +edua 0 +regular 0 +tenur 0 +member 0 +faculti 0 +professorwalk 0 +period 0 +teachand 0 +variou 0 +profession 0 +activ 0 +formal 0 +appoint 0 +follow 0 +academ 0 +year 0 +summer 0 +fall 0 +complet 0 +inform 0 +avail 0 +home 0 +page 0 +atgrinnel 0 +http 0 +creat 0 +last 0 +revis 0 +photograph 0 +jack 0 +robertson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..090edd1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,61 @@ +warshaw 1 +austin 1 +rule 1 +research 0 +venu 0 +mirank 0 +home 0 +comput 0 +scienc 0 +graduat 0 +univers 0 +texa 0 +base 0 +appli 0 +case 0 +studi 0 +declar 0 +basi 0 +modul 0 +page 0 +lane 0 +warshawlan 0 +mike 0 +cool 0 +senior 0 +student 0 +recent 0 +accept 0 +school 0 +work 0 +area 0 +activ 0 +databas 0 +system 0 +current 0 +posit 0 +laboratoryinvolv 0 +maintain 0 +languag 0 +developedat 0 +andat 0 +laboratori 0 +lanc 0 +obermey 0 +first 0 +item 0 +anoth 0 +third 0 +follow 0 +list 0 +paper 0 +unpublish 0 +confer 0 +inform 0 +knowledg 0 +manag 0 +contact 0 +mepost 0 +usavoic 0 +arlut 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..5a23416e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,49 @@ +austin 1 +utexa 0 +chen 0 +comput 0 +scienc 0 +texa 0 +mathemat 0 +univers 0 +wchen 0 +java 0 +rosett 0 +program 0 +master 0 +student 0 +dept 0 +decemb 0 +august 0 +fudan 0 +china 0 +juli 0 +offic 0 +phone 0 +email 0 +math 0 +mail 0 +address 0 +center 0 +numer 0 +analysi 0 +us 0 +link 0 +unix 0 +book 0 +expect 0 +perl 0 +site 0 +demo 0 +refer 0 +manual 0 +exampl 0 +common 0 +gatewai 0 +interfac 0 +sampl 0 +pleas 0 +click 0 +load 0 +file 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..fb942ff7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,48 @@ +austin 1 +depart 1 +comput 1 +research 1 +program 1 +page 0 +paul 0 +wilson 0 +taylor 0 +hall 0 +univers 0 +texa 0 +scienc 0 +oop 0 +group 0 +languag 0 +home 0 +contact 0 +inform 0 +offic 0 +postal 0 +address 0 +best 0 +reach 0 +email 0 +ltwilson 0 +utexa 0 +usual 0 +headshot 0 +novelti 0 +thought 0 +cross 0 +section 0 +informationi 0 +lead 0 +object 0 +orient 0 +system 0 +workson 0 +memori 0 +manag 0 +design 0 +implement 0 +teachingin 0 +fall 0 +teach 0 +sciencesnot 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..aeace530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,137 @@ +christian 1 +holi 0 +jesu 0 +also 0 +becom 0 +sin 0 +christ 0 +thought 0 +becam 0 +life 0 +though 0 +time 0 +good 0 +think 0 +wrong 0 +true 0 +howev 0 +abl 0 +faith 0 +strength 0 +know 0 +seek 0 +arthur 0 +homepag 0 +cent 0 +christiani 0 +made 0 +major 0 +decis 0 +believ 0 +alittl 0 +month 0 +come 0 +totallyunexpect 0 +religi 0 +studi 0 +compulsori 0 +class 0 +thechristian 0 +high 0 +school 0 +attend 0 +hong 0 +kong 0 +intent 0 +tobecom 0 +record 0 +slife 0 +bibl 0 +quit 0 +credibl 0 +teach 0 +take 0 +deepli 0 +relat 0 +mylif 0 +misconcept 0 +mean 0 +christianwa 0 +clear 0 +came 0 +past 0 +differ 0 +christianand 0 +lovedeveri 0 +matter 0 +whether 0 +decid 0 +achristian 0 +us 0 +shouldb 0 +virtuou 0 +enough 0 +listen 0 +friend 0 +church 0 +thefellowship 0 +realiz 0 +thing 0 +born 0 +theywil 0 +stumbl 0 +flesh 0 +hei 0 +sinless 0 +sympath 0 +weak 0 +weconfess 0 +forgiv 0 +andto 0 +cleans 0 +unright 0 +john 0 +differencebetween 0 +peopl 0 +trust 0 +astheir 0 +saviour 0 +gratefulli 0 +accept 0 +redempt 0 +fortheir 0 +therefor 0 +justifi 0 +without 0 +deed 0 +ofth 0 +roman 0 +doubt 0 +live 0 +wedo 0 +thecontrari 0 +reli 0 +givesu 0 +said 0 +whole 0 +need 0 +nota 0 +physician 0 +sick 0 +matthew 0 +count 0 +onour 0 +number 0 +dai 0 +ought 0 +thetruth 0 +earli 0 +hesit 0 +start 0 +thankgod 0 +lead 0 +give 0 +opportun 0 +realli 0 +wkmak 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..d2671160 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,148 @@ +robot 1 +research 0 +interest 0 +page 0 +work 0 +group 0 +austin 0 +badminton 0 +utexa 0 +topic 0 +comment 0 +explor 0 +learn 0 +intellig 0 +rang 0 +reason 0 +system 0 +home 0 +built 0 +onlin 0 +internet 0 +resourc 0 +pictur 0 +educ 0 +public 0 +hotlist 0 +send 0 +contact 0 +inform 0 +mobil 0 +navig 0 +control 0 +spatial 0 +semant 0 +hierarchi 0 +engin 0 +machin 0 +network 0 +qualit 0 +interfac 0 +world 0 +ring 0 +ultrason 0 +sensor 0 +rhino 0 +manipul 0 +robokreta 0 +car 0 +email 0 +wyle 0 +member 0 +guitar 0 +clarinet 0 +martial 0 +art 0 +phone 0 +texa 0 +hello 0 +visitor 0 +number 0 +person 0 +miscellan 0 +worki 0 +primarili 0 +approach 0 +mobilerobot 0 +softwar 0 +develop 0 +area 0 +artifici 0 +includ 0 +neural 0 +vision 0 +oper 0 +embed 0 +graphic 0 +user 0 +multimedia 0 +spot 0 +real 0 +rover 0 +tall 0 +commerci 0 +robocac 0 +worm 0 +specif 0 +robofest 0 +organ 0 +besar 0 +kicik 0 +autonom 0 +us 0 +chassi 0 +motor 0 +fast 0 +remot 0 +race 0 +paper 0 +andqualit 0 +avail 0 +doctor 0 +dissert 0 +titl 0 +fora 0 +physic 0 +also 0 +offici 0 +start 0 +point 0 +technolog 0 +wide 0 +catalog 0 +usenet 0 +frequent 0 +ask 0 +question 0 +meta 0 +index 0 +ncsa 0 +malaysia 0 +homepag 0 +yahoo 0 +note 0 +common 0 +pleas 0 +eduperson 0 +interestsavid 0 +player 0 +unit 0 +state 0 +associ 0 +usba 0 +love 0 +plai 0 +miscellaneousinterest 0 +well 0 +movi 0 +offic 0 +taylor 0 +hall 0 +mail 0 +comput 0 +scienc 0 +depart 0 +univers 0 +finger 0 +back 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..c9fb9427 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,8 @@ +address 1 +phone 1 +contact 0 +xfeng 0 +utexa 0 +qaustin 0 +west 0 +austin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..06071e89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,92 @@ +page 1 +pictur 0 +search 0 +xingang 0 +home 0 +travel 0 +austin 0 +student 0 +comput 0 +work 0 +surpris 0 +well 0 +interest 0 +time 0 +creat 0 +photographi 0 +classic 0 +music 0 +audio 0 +sport 0 +visual 0 +welcom 0 +hang 0 +around 0 +univers 0 +texa 0 +depart 0 +scienc 0 +distribut 0 +multimedia 0 +head 0 +harrick 0 +year 0 +graduat 0 +program 0 +rank 0 +nation 0 +delight 0 +realli 0 +pretti 0 +hard 0 +take 0 +aswel 0 +stuff 0 +line 0 +soon 0 +right 0 +temporaryresort 0 +imagin 0 +hopefulli 0 +goe 0 +llgradual 0 +walk 0 +paper 0 +present 0 +clearer 0 +imag 0 +link 0 +find 0 +feel 0 +havesometh 0 +watch 0 +frequent 0 +access 0 +pointer 0 +serious 0 +foliag 0 +marvel 0 +engin 0 +alta 0 +vista 0 +string 0 +infoseek 0 +keyword 0 +miata 0 +club 0 +unit 0 +morn 0 +newspap 0 +american 0 +express 0 +financi 0 +card 0 +york 0 +atlant 0 +monthli 0 +china 0 +soccer 0 +major 0 +leagu 0 +group 0 +xguo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..8468f755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,4 @@ +geoffrei 1 +home 1 +pagemov 1 +address 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..e7ed6e1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,15 @@ +frame 1 +yang 0 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..4ba3bf8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,87 @@ +austin 1 +utc 1 +guitar 1 +corner 0 +home 0 +classic 0 +david 0 +wwwdavid 0 +hello 0 +whatev 0 +took 0 +welcom 0 +make 0 +establish 0 +contact 0 +street 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +internet 0 +utexa 0 +get 0 +busi 0 +research 0 +outsid 0 +work 0 +still 0 +seriou 0 +favorit 0 +quot 0 +word 0 +hope 0 +daili 0 +medit 0 +stuff 0 +concert 0 +byth 0 +assad 0 +brothersin 0 +hong 0 +kong 0 +art 0 +festiv 0 +must 0 +first 0 +introduc 0 +beauti 0 +christoph 0 +parkeningi 0 +guitarist 0 +interest 0 +life 0 +stori 0 +tell 0 +grew 0 +tire 0 +ofconcert 0 +retir 0 +reconcili 0 +jesu 0 +christ 0 +rekindl 0 +passion 0 +also 0 +theamsterdam 0 +trio 0 +french 0 +rich 0 +artist 0 +flair 0 +like 0 +nation 0 +footbal 0 +team 0 +michel 0 +platini 0 +label 0 +franc 0 +magazinepublish 0 +minist 0 +align 0 +absmiddl 0 +sinc 0 +sept 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..b9247d7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,15 @@ +frame 1 +yanbin 0 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..29ac7163 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,9 @@ +yuan 1 +home 1 +page 1 +oop 1 +us 1 +browser 1 +pleas 1 +click 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..17ca5dc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,64 @@ +yong 1 +univers 0 +beij 0 +china 0 +program 0 +austin 0 +unit 0 +servic 0 +homepageto 0 +homepagey 0 +number 0 +visitor 0 +sinc 0 +current 0 +comput 0 +scienc 0 +texa 0 +settl 0 +stai 0 +mathemat 0 +graduat 0 +rutger 0 +brunswick 0 +year 0 +jersei 0 +beauti 0 +place 0 +wife 0 +tsinghua 0 +milanitalian 0 +soccerk 0 +soccernba 0 +sitefox 0 +sportschicago 0 +bullsmichael 0 +jordannflnhlc 0 +rankingmarri 0 +childrenseinfeldcomput 0 +sciencesutilitieshtml 0 +convertersimag 0 +collectionssystemshtmllatexcgitcl 0 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 0 +tmiscinternet 0 +travel 0 +network 0 +parcel 0 +state 0 +postal 0 +usp 0 +fedexus 0 +guidefun 0 +todayu 0 +newsstarwavesupermodel 0 +contact 0 +river 0 +street 0 +finger 0 +yonglu 0 +utexa 0 +page 0 +heavi 0 +construct 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..88755e85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,138 @@ +young 1 +linear 1 +iter 1 +method 1 +system 1 +comput 0 +larg 0 +parallel 0 +scienc 0 +mathemat 0 +associ 0 +research 0 +professor 0 +numer 0 +algebra 0 +applic 0 +equat 0 +solut 0 +kincaid 0 +spars 0 +david 0 +analysi 0 +univers 0 +award 0 +american 0 +journal 0 +appli 0 +matric 0 +solv 0 +sever 0 +search 0 +carei 0 +crai 0 +profil 0 +youngashbel 0 +smith 0 +ofmathemat 0 +director 0 +center 0 +webb 0 +institut 0 +naval 0 +architectur 0 +harvard 0 +honor 0 +profession 0 +servic 0 +fellow 0 +advanc 0 +outstand 0 +contribut 0 +special 0 +issueded 0 +chair 0 +committe 0 +mathematicalsocieti 0 +board 0 +truste 0 +argonn 0 +editor 0 +siam 0 +matrixappl 0 +area 0 +interestnumer 0 +partial 0 +differenti 0 +numericallinear 0 +summari 0 +researchmi 0 +activ 0 +focus 0 +partialdifferenti 0 +base 0 +finit 0 +differ 0 +methodsand 0 +oflinear 0 +involv 0 +andspars 0 +softwar 0 +packag 0 +develop 0 +basedon 0 +part 0 +itpack 0 +project 0 +beingextend 0 +includ 0 +suitabl 0 +share 0 +memori 0 +distributedmemori 0 +rapidli 0 +converg 0 +methodsbas 0 +multilevel 0 +procedur 0 +also 0 +beingdevelop 0 +select 0 +recent 0 +publicationsd 0 +stationari 0 +second 0 +degre 0 +topic 0 +polynomi 0 +variabl 0 +rassia 0 +srivasiava 0 +yanushauska 0 +world 0 +scientif 0 +publ 0 +compani 0 +singapor 0 +vona 0 +ration 0 +omega 0 +academ 0 +press 0 +sepehrnoori 0 +vector 0 +pde 0 +engin 0 +minneapoli 0 +high 0 +level 0 +solver 0 +supercomput 0 +algorithm 0 +graham 0 +john 0 +wilei 0 +son 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..55483a6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,125 @@ +yoonsuck 1 +choe 1 +austin 1 +comput 0 +scienc 0 +univers 0 +texa 0 +research 0 +organ 0 +miikkulainen 0 +group 0 +later 0 +self 0 +lissom 0 +joseph 0 +sirosh 0 +risto 0 +spike 0 +model 0 +structur 0 +neural 0 +digit 0 +recognit 0 +interconnect 0 +utc 0 +object 0 +public 0 +depart 0 +august 0 +network 0 +work 0 +hand 0 +written 0 +featur 0 +prof 0 +segment 0 +repres 0 +book 0 +relat 0 +interact 0 +cortex 0 +function 0 +page 0 +report 0 +inform 0 +yschoe 0 +utexa 0 +home 0 +photo 0 +ad 0 +student 0 +dept 0 +decemb 0 +yonsei 0 +seoul 0 +korea 0 +welcom 0 +homepag 0 +visitor 0 +sinc 0 +interest 0 +cortic 0 +current 0 +systembas 0 +laterali 0 +synerget 0 +develop 0 +recent 0 +includ 0 +extend 0 +actualspik 0 +event 0 +call 0 +slissom 0 +beself 0 +multipl 0 +retinabi 0 +synchron 0 +within 0 +desynchron 0 +differ 0 +outlin 0 +also 0 +check 0 +html 0 +edit 0 +comment 0 +connect 0 +neuron 0 +technic 0 +septemb 0 +electron 0 +isbn 0 +map 0 +appear 0 +touretzki 0 +mozer 0 +hasselmo 0 +editor 0 +advanc 0 +process 0 +system 0 +cambridg 0 +press 0 +handwritten 0 +techic 0 +master 0 +thesi 0 +bunch 0 +link 0 +total 0 +unord 0 +click 0 +find 0 +interestingcontact 0 +offic 0 +phone 0 +email 0 +mail 0 +address 0 +maintain 0 +last 0 +updat 0 +newsgroup 0 +summari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..12294900 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,76 @@ +china 1 +scienc 0 +austin 0 +famili 0 +texa 0 +manchest 0 +java 0 +yuanj 0 +xuwint 0 +break 0 +updat 0 +addresspictur 0 +depart 0 +comput 0 +univers 0 +spring 0 +time 0 +tabl 0 +select 0 +cours 0 +schedulec 0 +object 0 +orient 0 +softwar 0 +engr 0 +brown 0 +logic 0 +synthesi 0 +aziz 0 +client 0 +server 0 +system 0 +develop 0 +gang 0 +previou 0 +semest 0 +pagechines 0 +student 0 +associ 0 +alumni 0 +page 0 +work 0 +studi 0 +universityof 0 +technolog 0 +hefei 0 +institut 0 +mathemat 0 +chines 0 +academi 0 +beij 0 +chinaunivers 0 +munich 0 +atmunich 0 +germanyunivers 0 +prof 0 +nick 0 +higham 0 +wang 0 +lifan 0 +hong 0 +chen 0 +guizhongustc 0 +yuan 0 +hailiang 0 +yang 0 +yuhongfriend 0 +linsoftwar 0 +program 0 +perl 0 +common 0 +gatewai 0 +interfac 0 +link 0 +yahoo 0 +publish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..1337b254 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,11 @@ +feng 1 +visitor 0 +number 0 +sinc 0 +yufeng 0 +utexa 0 +edufing 0 +public 0 +ring 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..518e5864 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,68 @@ +china 1 +univers 1 +austin 1 +zhii 0 +chen 0 +guangzhou 0 +comput 0 +resum 0 +zhongshan 0 +chines 0 +fall 0 +spring 0 +associ 0 +anim 0 +page 0 +home 0 +pagezhii 0 +chenabout 0 +mefrom 0 +canton 0 +peopl 0 +republ 0 +current 0 +master 0 +program 0 +scienc 0 +depart 0 +texa 0 +seek 0 +full 0 +time 0 +click 0 +postcript 0 +format 0 +pleas 0 +view 0 +life 0 +calculu 0 +architectur 0 +misc 0 +zodiac 0 +person 0 +differ 0 +kind 0 +find 0 +friend 0 +maintain 0 +john 0 +dong 0 +thank 0 +els 0 +world 0 +wide 0 +info 0 +contact 0 +burton 0 +zchen 0 +utexa 0 +still 0 +construct 0 +copyright 0 +creat 0 +last 0 +modifi 0 +visitor 0 +accord 0 +counter 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..4f73fbc7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,94 @@ +comput 1 +austin 0 +home 0 +page 0 +univers 0 +china 0 +campu 0 +world 0 +magazin 0 +zhouxiao 0 +utexa 0 +maggi 0 +xiao 0 +zhou 0 +depart 0 +texa 0 +work 0 +fall 0 +onlin 0 +internet 0 +offic 0 +educ 0 +assistantship 0 +graduat 0 +student 0 +teach 0 +assist 0 +databas 0 +manag 0 +sciencesat 0 +buaa 0 +beij 0 +life 0 +current 0 +multimedia 0 +system 0 +algorithm 0 +data 0 +commun 0 +network 0 +distribut 0 +process 0 +spring 0 +look 0 +around 0 +kaleidoscop 0 +land 0 +beauti 0 +visit 0 +peopl 0 +daili 0 +new 0 +digest 0 +zhai 0 +chines 0 +newspag 0 +time 0 +entertain 0 +movi 0 +stamp 0 +societi 0 +ieee 0 +giant 0 +career 0 +center 0 +compani 0 +search 0 +yahoo 0 +galaxi 0 +lyco 0 +directori 0 +guid 0 +html 0 +script 0 +librari 0 +contact 0 +inform 0 +mail 0 +http 0 +user 0 +main 0 +build 0 +room 0 +phone 0 +address 0 +scienc 0 +taylor 0 +last 0 +modifi 0 +sept 0 +comment 0 +welcom 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..290bd3d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,82 @@ +dynam 1 +document 1 +friend 0 +univers 0 +texa 0 +austin 0 +china 0 +site 0 +homepag 0 +know 0 +student 0 +depart 0 +comput 0 +peke 0 +former 0 +classmat 0 +netscap 0 +clike 0 +qing 0 +qinghi 0 +welcom 0 +graduat 0 +inth 0 +scinc 0 +born 0 +beij 0 +capit 0 +citi 0 +bachelor 0 +degre 0 +meet 0 +ofmi 0 +findmor 0 +peopl 0 +pekingunivers 0 +alumni 0 +home 0 +page 0 +oversea 0 +html 0 +enjoi 0 +live 0 +peek 0 +follow 0 +like 0 +well 0 +find 0 +lot 0 +valuabl 0 +informationand 0 +professionalinternetpc 0 +relatedmac 0 +relatedaft 0 +worknetscap 0 +testtwin 0 +eldertwin 0 +youngernetscap 0 +testanim 0 +danc 0 +titledanc 0 +titl 0 +testanoth 0 +testyet 0 +anoth 0 +testfriendsthi 0 +travel 0 +maintain 0 +xiaohai 0 +best 0 +shan 0 +shinan 0 +visitor 0 +number 0 +sinc 0 +octob 0 +construct 0 +last 0 +modifi 0 +qingunivers 0 +sciencesaustin 0 +zhuqe 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..5791a020 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,63 @@ +washington 1 +student 1 +faculti 0 +program 0 +inform 0 +region 0 +research 0 +univers 0 +univ 0 +comput 0 +scienc 0 +organizationsinclud 0 +staff 0 +visitor 0 +organ 0 +ouraffili 0 +graduat 0 +regioninclud 0 +local 0 +desktop 0 +refer 0 +link 0 +elsewher 0 +spotlightuwwin 0 +pacif 0 +intern 0 +programmingcontesttwovideo 0 +highlight 0 +educ 0 +initiativesourcolloquia 0 +live 0 +mbonemajordon 0 +intel 0 +corporationdickkarp 0 +receiv 0 +nation 0 +medal 0 +scienceprofessionalmast 0 +applic 0 +deadlin 0 +autumn 0 +departmentoverview 0 +theimpact 0 +perspect 0 +staffposit 0 +avail 0 +half 0 +centuri 0 +exponenti 0 +progress 0 +technolog 0 +page 0 +peopl 0 +cours 0 +laboratori 0 +newscan 0 +handl 0 +tabl 0 +click 0 +seattl 0 +voic 0 +comment 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..7e0ebbe2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,57 @@ +berman 1 +andrew 0 +postscript 0 +format 0 +debbi 0 +home 0 +pageandrew 0 +aberman 0 +washington 0 +educomput 0 +scienc 0 +bourassa 0 +virgil 0 +selberg 0 +erik 0 +tron 0 +process 0 +specif 0 +file 0 +protect 0 +unix 0 +oper 0 +system 0 +bothpostscript 0 +andhtml 0 +proceed 0 +winter 0 +usenix 0 +confer 0 +data 0 +structur 0 +fast 0 +approxim 0 +match 0 +shapiro 0 +linda 0 +effici 0 +imag 0 +retriev 0 +multipl 0 +distanc 0 +measur 0 +avail 0 +appear 0 +spie 0 +special 0 +link 0 +wife 0 +beauti 0 +daughter 0 +melani 0 +miscellan 0 +poison 0 +donut 0 +stupid 0 +stupidmi 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..406aa23a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,107 @@ +adam 1 +washington 1 +softwar 1 +imag 1 +finkelstein 0 +depart 0 +comput 0 +scienc 0 +univers 0 +seattl 0 +coffe 0 +student 0 +recent 0 +photocopi 0 +visit 0 +made 0 +start 0 +drink 0 +cup 0 +hair 0 +limp 0 +andlack 0 +bodi 0 +year 0 +live 0 +good 0 +life 0 +graduat 0 +final 0 +finish 0 +doctor 0 +graphic 0 +fall 0 +quarter 0 +post 0 +earli 0 +join 0 +thecomput 0 +atprinceton 0 +engin 0 +tibco 0 +formerli 0 +teknekron 0 +system 0 +palo 0 +alto 0 +wrote 0 +peopl 0 +trade 0 +stock 0 +undergradu 0 +swarthmor 0 +colleg 0 +class 0 +studi 0 +physic 0 +occasion 0 +research 0 +project 0 +find 0 +specif 0 +alarg 0 +databas 0 +sinc 0 +work 0 +someth 0 +call 0 +multiresolut 0 +video 0 +photo 0 +plai 0 +ultim 0 +frisbe 0 +team 0 +calledumatata 0 +address 0 +phone 0 +number 0 +look 0 +plan 0 +file 0 +across 0 +thehilari 0 +menu 0 +least 0 +hous 0 +caff 0 +lardo 0 +chilli 0 +night 0 +snoqualmi 0 +pass 0 +excel 0 +view 0 +comet 0 +hyakutak 0 +great 0 +pictur 0 +taken 0 +friend 0 +marcu 0 +cool 0 +glass 0 +sculptur 0 +dither 0 +mona 0 +gothic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..ea0bfd47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,146 @@ +data 1 +parallel 0 +load 0 +jame 0 +ahren 0 +balanc 0 +comput 0 +system 0 +render 0 +visual 0 +databas 0 +vision 0 +algorithm 0 +polygon 0 +scientif 0 +cost 0 +perform 0 +dataset 0 +redistribut 0 +paper 0 +model 0 +base 0 +univers 0 +washington 0 +manag 0 +charl 0 +hansen 0 +effect 0 +present 0 +experi 0 +fast 0 +home 0 +page 0 +address 0 +scienc 0 +engin 0 +depart 0 +research 0 +project 0 +laboratori 0 +process 0 +improv 0 +onunbalanc 0 +design 0 +describ 0 +method 0 +machin 0 +shade 0 +applic 0 +set 0 +seattl 0 +email 0 +phone 0 +interest 0 +distribut 0 +environ 0 +alamo 0 +nation 0 +public 0 +intern 0 +confer 0 +august 0 +typic 0 +program 0 +degrad 0 +unnecessari 0 +occur 0 +whichperform 0 +possibl 0 +save 0 +outweighth 0 +polygonrender 0 +show 0 +factor 0 +loss 0 +percent 0 +onbalanc 0 +us 0 +linda 0 +shapiro 0 +steven 0 +tanimoto 0 +brinklei 0 +jakobovit 0 +lara 0 +lewi 0 +proceed 0 +second 0 +workshop 0 +februari 0 +gener 0 +motiv 0 +intend 0 +provid 0 +unifi 0 +highli 0 +graphic 0 +user 0 +interfac 0 +advanc 0 +queri 0 +facil 0 +interact 0 +notebook 0 +aid 0 +experiment 0 +promot 0 +share 0 +commun 0 +frank 0 +ortega 0 +supercomput 0 +novemb 0 +massiv 0 +simpl 0 +target 0 +requir 0 +extrem 0 +larg 0 +found 0 +mani 0 +handl 0 +arbitrarili 0 +complex 0 +need 0 +mesh 0 +issu 0 +involv 0 +toolkit 0 +enabl 0 +scientist 0 +displai 0 +directli 0 +avoid 0 +transmiss 0 +huge 0 +amount 0 +post 0 +ofwashington 0 +april 0 +longer 0 +version 0 +icpp 0 +also 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..c1ea32ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,100 @@ +vision 1 +databas 0 +research 0 +visual 0 +environ 0 +linda 0 +shapiro 0 +steven 0 +tanimoto 0 +base 0 +comput 0 +imag 0 +jame 0 +ahren 0 +jakobovit 0 +lara 0 +lewi 0 +februari 0 +overview 0 +scientif 0 +system 0 +model 0 +graphic 0 +interfac 0 +data 0 +devr 0 +entiti 0 +relat 0 +multi 0 +level 0 +queri 0 +experi 0 +manag 0 +brinklei 0 +spie 0 +symposium 0 +electron 0 +technolog 0 +wasdesign 0 +gener 0 +motiv 0 +andintend 0 +provid 0 +unifieddata 0 +highli 0 +user 0 +advanc 0 +queryfacil 0 +interact 0 +laboratori 0 +notebook 0 +databaseenviron 0 +aid 0 +experiment 0 +andpromot 0 +share 0 +commun 0 +store 0 +hierarch 0 +datastructur 0 +schema 0 +contain 0 +name 0 +ofproperti 0 +part 0 +attribut 0 +among 0 +thepart 0 +definit 0 +describ 0 +buildinst 0 +specif 0 +studi 0 +mani 0 +differ 0 +topic 0 +includ 0 +peopl 0 +princip 0 +investig 0 +graduat 0 +student 0 +public 0 +inmodel 0 +proceed 0 +secondcad 0 +workshop 0 +present 0 +project 0 +flexibledata 0 +organ 0 +support 0 +databasesystem 0 +scienceand 0 +implement 0 +scienc 0 +email 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..abef84b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,77 @@ +comput 1 +link 0 +page 0 +scienc 0 +graduat 0 +washington 0 +school 0 +program 0 +outdoor 0 +home 0 +great 0 +women 0 +educ 0 +inform 0 +mentorship 0 +project 0 +bernheim 0 +start 0 +univers 0 +still 0 +construct 0 +william 0 +colleg 0 +math 0 +front 0 +plai 0 +ultim 0 +frisbe 0 +autumn 0 +quarter 0 +class 0 +programminglanguag 0 +automata 0 +principl 0 +ofdigit 0 +system 0 +design 0 +graphic 0 +seminar 0 +parallel 0 +environ 0 +relat 0 +nation 0 +park 0 +gorp 0 +guideto 0 +recreationfun 0 +christian 0 +scott 0 +interact 0 +list 0 +abig 0 +pile 0 +cool 0 +blast 0 +past 0 +scoobi 0 +dooeduc 0 +refer 0 +undergrad 0 +peterson 0 +center 0 +sourc 0 +opportun 0 +distribut 0 +allow 0 +undergradu 0 +spend 0 +summerwork 0 +research 0 +femal 0 +mentor 0 +experi 0 +highlyrecommend 0 +back 0 +pagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..4978f3e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,128 @@ +comput 1 +proof 1 +algorithm 0 +softwar 0 +scientist 0 +amir 0 +design 0 +master 0 +wai 0 +teach 0 +mathematician 0 +motiv 0 +error 0 +like 0 +make 0 +defici 0 +michail 0 +michailgradu 0 +studenti 0 +second 0 +year 0 +graduat 0 +student 0 +depart 0 +scienceat 0 +univers 0 +washington 0 +research 0 +interest 0 +includ 0 +followingarea 0 +engin 0 +educ 0 +degre 0 +universityof 0 +toronto 0 +thesi 0 +optim 0 +broadcast 0 +summationfor 0 +hierarch 0 +ring 0 +architectur 0 +shift 0 +click 0 +hereto 0 +obtain 0 +compress 0 +postscript 0 +file 0 +recent 0 +experi 0 +particular 0 +built 0 +opsi 0 +java 0 +appletdesign 0 +balanc 0 +binari 0 +tree 0 +combinesprogram 0 +anim 0 +final 0 +lunar 0 +lander 0 +style 0 +gamethat 0 +wrote 0 +part 0 +undergradu 0 +graphic 0 +cours 0 +quotat 0 +tend 0 +conserv 0 +mani 0 +unwillingto 0 +consid 0 +might 0 +better 0 +write 0 +told 0 +embarrass 0 +learn 0 +publishedincorrect 0 +theorem 0 +avoid 0 +believ 0 +theywil 0 +structur 0 +persuad 0 +will 0 +explor 0 +unconvent 0 +proofstyl 0 +unfortun 0 +found 0 +care 0 +whether 0 +theyhav 0 +publish 0 +incorrect 0 +result 0 +often 0 +seem 0 +glad 0 +wasnot 0 +caught 0 +refere 0 +sinc 0 +would 0 +meant 0 +fewer 0 +public 0 +fear 0 +stylethat 0 +reveal 0 +mistak 0 +lesli 0 +lamport 0 +construct 0 +wayi 0 +simpl 0 +obvious 0 +theother 0 +complic 0 +obviou 0 +hoar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..8f6f1cc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,62 @@ +scienc 1 +richard 0 +anderson 0 +comput 0 +washington 0 +research 0 +institut 0 +project 0 +professor 0 +univers 0 +year 0 +visit 0 +algorithm 0 +work 0 +travel 0 +home 0 +page 0 +associ 0 +graduat 0 +inmathemat 0 +reed 0 +colleg 0 +stanfordin 0 +join 0 +aon 0 +postdoc 0 +mathemat 0 +inberkelei 0 +receiv 0 +presidenti 0 +younginvestig 0 +award 0 +spent 0 +academ 0 +yeara 0 +indian 0 +bangalor 0 +india 0 +main 0 +interest 0 +theori 0 +implementationof 0 +includ 0 +parallel 0 +geometri 0 +scientif 0 +applic 0 +engin 0 +depart 0 +seattl 0 +teach 0 +paper 0 +progress 0 +qualifi 0 +evalu 0 +note 0 +theindian 0 +resum 0 +tourist 0 +pictur 0 +recent 0 +talksanderson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..b42bbf44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,128 @@ +interest 1 +research 0 +decis 0 +comput 0 +scienc 0 +univers 0 +name 0 +syllabl 0 +anhai 0 +doan 0 +hungari 0 +mostli 0 +plan 0 +travel 0 +music 0 +first 0 +ofmi 0 +birthplac 0 +librari 0 +homepageanhai 0 +page 0 +reconstruct 0 +pleas 0 +revisit 0 +soon 0 +born 0 +brought 0 +vietnam 0 +finish 0 +high 0 +school 0 +iwent 0 +studi 0 +graduat 0 +kossuth 0 +lajo 0 +art 0 +andscienc 0 +debrecen 0 +receiv 0 +also 0 +wisconsin 0 +milwauke 0 +start 0 +fall 0 +program 0 +depart 0 +andengin 0 +ofwashington 0 +seattl 0 +artifici 0 +intellig 0 +amcurr 0 +investig 0 +make 0 +underuncertainti 0 +theoret 0 +markov 0 +process 0 +qualit 0 +theori 0 +academ 0 +includ 0 +read 0 +listen 0 +jazz 0 +blue 0 +thing 0 +mean 0 +calm 0 +invietnames 0 +made 0 +combin 0 +last 0 +mother 0 +nghean 0 +father 0 +haiphong 0 +show 0 +creativ 0 +folkswer 0 +thought 0 +birth 0 +younger 0 +brother 0 +theysimpli 0 +switch 0 +gave 0 +namehaian 0 +content 0 +probabilist 0 +knowledg 0 +represent 0 +recent 0 +paper 0 +curriculum 0 +vita 0 +educ 0 +employ 0 +histori 0 +award 0 +honor 0 +public 0 +teach 0 +data 0 +structur 0 +algorithm 0 +take 0 +cours 0 +check 0 +inform 0 +offic 0 +hour 0 +locat 0 +person 0 +comtemporari 0 +vietnames 0 +affair 0 +literatur 0 +write 0 +paint 0 +foreign 0 +languag 0 +gener 0 +purpos 0 +life 0 +snapshotsanhai 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..125ec3b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,128 @@ +ausland 1 +figur 0 +joel 0 +comput 0 +washington 0 +anim 0 +compil 0 +paper 0 +motion 0 +univers 0 +last 0 +project 0 +seattl 0 +dynam 0 +philipos 0 +chamber 0 +egger 0 +automat 0 +system 0 +graphic 0 +page 0 +without 0 +hope 0 +depart 0 +scienc 0 +engineeringat 0 +pictur 0 +spring 0 +class 0 +click 0 +onit 0 +origin 0 +quarter 0 +complet 0 +qual 0 +time 0 +code 0 +gener 0 +multiflow 0 +offic 0 +sieg 0 +hall 0 +home 0 +univ 0 +resum 0 +written 0 +follow 0 +fast 0 +effect 0 +andb 0 +bershad 0 +pldi 0 +support 0 +event 0 +dispatch 0 +inextens 0 +mock 0 +andp 0 +pardyak 0 +workshop 0 +compilersupport 0 +softwar 0 +februari 0 +experi 0 +control 0 +base 0 +synthesisfor 0 +articul 0 +fukunaga 0 +partovi 0 +christensen 0 +reiss 0 +shuman 0 +mark 0 +acmtransact 0 +also 0 +site 0 +optim 0 +leapfrog 0 +benjamin 0 +wilkerson 0 +mathemat 0 +magazin 0 +lossili 0 +compress 0 +mpeg 0 +animationthat 0 +goe 0 +synthesi 0 +sequenc 0 +show 0 +mywork 0 +piec 0 +togeth 0 +cartwheel 0 +jump 0 +andshuffl 0 +fall 0 +andcollaps 0 +brown 0 +us 0 +algorithm 0 +orang 0 +isjust 0 +try 0 +switch 0 +consider 0 +tosmooth 0 +physic 0 +autumn 0 +took 0 +super 0 +short 0 +doubl 0 +speed 0 +small 0 +version 0 +final 0 +find 0 +better 0 +place 0 +slide 0 +thetalk 0 +singular 0 +valu 0 +decomposit 0 +gave 0 +seminar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..34ed239d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,96 @@ +brian 1 +info 1 +boinge 0 +name 0 +offic 0 +current 0 +page 0 +favorit 0 +look 0 +girl 0 +michalowskidepart 0 +comput 0 +scienc 0 +engineeringmail 0 +stop 0 +univers 0 +washingtonseattl 0 +locat 0 +sieg 0 +phone 0 +take 0 +ling 0 +edit 0 +mossi 0 +bitsthank 0 +visit 0 +visitor 0 +number 0 +worst 0 +view 0 +us 0 +headscapewhenev 0 +second 0 +year 0 +gradstud 0 +actual 0 +liber 0 +artist 0 +interest 0 +inlinguist 0 +confus 0 +good 0 +get 0 +know 0 +alreadi 0 +ultrahotlist 0 +site 0 +ofal 0 +time 0 +search 0 +onlin 0 +refer 0 +forsometh 0 +glorifi 0 +hotlist 0 +doesn 0 +thave 0 +urouletteto 0 +random 0 +find 0 +past 0 +institut 0 +ofwhich 0 +mental 0 +person 0 +quot 0 +file 0 +songsand 0 +poem 0 +fictiti 0 +thrash 0 +band 0 +puriti 0 +test 0 +origin 0 +work 0 +tokeep 0 +touch 0 +finger 0 +mail 0 +guestbook 0 +pagesfrom 0 +friend 0 +idea 0 +includ 0 +aslfingerspel 0 +snapshot 0 +blatantli 0 +stolen 0 +brad 0 +chamberlain 0 +michalowski 0 +dept 0 +complet 0 +sanityerad 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..bd207866 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,52 @@ +paul 1 +comput 1 +washington 0 +research 0 +proof 0 +beamepaul 0 +beam 0 +associ 0 +scienc 0 +receiv 0 +univers 0 +computationalcomplex 0 +recent 0 +professor 0 +thedepart 0 +engineeringat 0 +theunivers 0 +mathemat 0 +sciencein 0 +toronto 0 +post 0 +doctor 0 +academicyear 0 +join 0 +autumn 0 +presidentialyoung 0 +investig 0 +award 0 +concern 0 +primarili 0 +theoret 0 +aspect 0 +paralleland 0 +distribut 0 +concentr 0 +connect 0 +theori 0 +particular 0 +complex 0 +inproposit 0 +system 0 +enjoi 0 +squash 0 +softbal 0 +sport 0 +enthusiasm 0 +cancompens 0 +lack 0 +talent 0 +paper 0 +qual 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..b4a35d28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,84 @@ +code 1 +devic 1 +sport 1 +ultim 1 +frisbe 1 +confer 1 +champion 1 +interest 1 +david 0 +beckerdavid 0 +beckercontact 0 +info 0 +mark 0 +spot 0 +stuff 0 +spin 0 +much 0 +time 0 +goe 0 +makingspina 0 +real 0 +oper 0 +system 0 +respons 0 +level 0 +borrow 0 +model 0 +drvier 0 +support 0 +build 0 +environ 0 +manag 0 +keep 0 +develop 0 +platform 0 +function 0 +somedai 0 +getto 0 +perform 0 +measur 0 +optim 0 +tri 0 +bunch 0 +favorit 0 +tripl 0 +jump 0 +minnesota 0 +athlet 0 +bethel 0 +colleg 0 +volleybal 0 +men 0 +grad 0 +team 0 +plai 0 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 0 +handbal 0 +also 0 +playracquetballgolftenni 0 +done 0 +bridgecampingcanoeingdisc 0 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 0 +skiingweightliftingwhitewat 0 +raftinghorseback 0 +ridingmountain 0 +bikingin 0 +line 0 +skate 0 +librari 0 +econom 0 +topic 0 +particularli 0 +free 0 +bank 0 +anti 0 +trust 0 +currenc 0 +ssto 0 +rlv 0 +theologi 0 +centurai 0 +railroad 0 +boot 0 +locomot 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..c0760422 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,280 @@ +bershad 1 +system 0 +oper 0 +brian 0 +perform 0 +extens 0 +workshop 0 +usenix 0 +support 0 +sosp 0 +asplo 0 +paper 0 +project 0 +dynam 0 +us 0 +appear 0 +compil 0 +softwar 0 +comput 0 +network 0 +memori 0 +manag 0 +pardyak 0 +romer 0 +februari 0 +microkernel 0 +univers 0 +seattl 0 +distribut 0 +architectur 0 +spin 0 +mobil 0 +design 0 +octob 0 +przemyslaw 0 +osdi 0 +fast 0 +confer 0 +marc 0 +fiuczynski 0 +savag 0 +chen 0 +maeda 0 +inform 0 +washington 0 +ofwashington 0 +parallel 0 +work 0 +isca 0 +midwai 0 +winter 0 +master 0 +qual 0 +karlin 0 +sigop 0 +associ 0 +symposium 0 +onoper 0 +implemen 0 +structur 0 +voelker 0 +languag 0 +ausland 0 +philipos 0 +chamber 0 +egger 0 +protocol 0 +specif 0 +write 0 +emin 0 +sirer 0 +stefan 0 +zekauska 0 +sawdon 0 +cach 0 +hardwar 0 +larg 0 +servic 0 +kernel 0 +stock 0 +mach 0 +commun 0 +machnix 0 +drave 0 +forin 0 +wwo 0 +relat 0 +eduwork 0 +scienc 0 +engin 0 +depart 0 +home 0 +street 0 +assist 0 +professor 0 +sinc 0 +receiv 0 +took 0 +brief 0 +respit 0 +experi 0 +post 0 +industri 0 +cultur 0 +northeast 0 +return 0 +northwest 0 +coffe 0 +research 0 +hasappear 0 +toc 0 +although 0 +seem 0 +asigmetr 0 +publish 0 +save 0 +life 0 +besid 0 +run 0 +plai 0 +squash 0 +hang 0 +thestairmast 0 +includ 0 +extensibleoper 0 +carnegi 0 +mellon 0 +parallelnetwork 0 +scalabl 0 +rocki 0 +thesequel 0 +etch 0 +binari 0 +instrument 0 +optimizationcours 0 +look 0 +click 0 +list 0 +youmight 0 +degre 0 +recent 0 +trace 0 +driven 0 +comparison 0 +algorithm 0 +prefetch 0 +cachingtraci 0 +kimbrel 0 +andrew 0 +tomkin 0 +hugo 0 +patterson 0 +edward 0 +felten 0 +garth 0 +gibson 0 +anna 0 +bind 0 +extensiblesystem 0 +interpret 0 +theodor 0 +denni 0 +geoffrei 0 +alec 0 +wolman 0 +wayn 0 +wong 0 +jean 0 +loup 0 +baer 0 +henri 0 +levi 0 +effect 0 +dynamiccompil 0 +program 0 +implementationj 0 +applic 0 +modula 0 +greg 0 +defouw 0 +mari 0 +alapat 0 +wilson 0 +hsieh 0 +charl 0 +garrett 0 +david 0 +becker 0 +safe 0 +link 0 +automat 0 +event 0 +dispatch 0 +systemsc 0 +mock 0 +safeti 0 +reduc 0 +overhead 0 +onlinesuperpag 0 +promot 0 +ohlrich 0 +detect 0 +sharedmemori 0 +appearedin 0 +page 0 +map 0 +polici 0 +conflictresolut 0 +standard 0 +mobisa 0 +inth 0 +issu 0 +avoid 0 +conflict 0 +miss 0 +direct 0 +mappedcach 0 +forappl 0 +uwtechn 0 +report 0 +effici 0 +packet 0 +demultiplex 0 +multipl 0 +endpoint 0 +messag 0 +yuhara 0 +moss 0 +impact 0 +decomposit 0 +high 0 +practic 0 +consider 0 +block 0 +concurr 0 +object 0 +interrupt 0 +prioriti 0 +share 0 +ieee 0 +compcon 0 +local 0 +area 0 +andmostli 0 +watson 0 +moblic 0 +consist 0 +virtual 0 +index 0 +wheeler 0 +mutual 0 +exclus 0 +uniprocessor 0 +redel 0 +elli 0 +primit 0 +ginsburg 0 +baron 0 +microbenchmark 0 +evalu 0 +increas 0 +irrelev 0 +micro 0 +base 0 +golub 0 +continu 0 +implement 0 +thread 0 +inoper 0 +rashid 0 +dean 0 +arpa 0 +rain 0 +citi 0 +hash 0 +hous 0 +harrier 0 +rel 0 +abduct 0 +alien 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..87b24a5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,152 @@ +page 1 +search 0 +new 0 +home 0 +scienc 0 +seattl 0 +washington 0 +list 0 +american 0 +directori 0 +budget 0 +doorenbo 0 +current 0 +work 0 +todai 0 +project 0 +pointer 0 +link 0 +yahoo 0 +world 0 +yellow 0 +white 0 +sport 0 +pagebob 0 +depart 0 +comput 0 +engineeringunivers 0 +washingtonbox 0 +offic 0 +sieg 0 +hall 0 +bobd 0 +netbot 0 +union 0 +place 0 +suit 0 +voic 0 +daili 0 +comedi 0 +dilbert 0 +strip 0 +boffo 0 +david 0 +letterman 0 +geeki 0 +zdnet 0 +anchordesk 0 +magazin 0 +good 0 +stuff 0 +shortcut 0 +cool 0 +tool 0 +research 0 +postdoc 0 +oren 0 +etzioni 0 +steve 0 +hank 0 +weld 0 +softbot 0 +also 0 +particular 0 +shopbot 0 +internet 0 +shop 0 +agent 0 +previou 0 +soar 0 +thesi 0 +site 0 +collect 0 +repositori 0 +canada 0 +gopher 0 +scientif 0 +sigma 0 +scientist 0 +miscellan 0 +meta 0 +metacrawl 0 +savvysearch 0 +alta 0 +vista 0 +lyco 0 +inktomi 0 +open 0 +text 0 +infoseek 0 +excit 0 +crawler 0 +hotbot 0 +hierarch 0 +select 0 +magellan 0 +pointcom 0 +engin 0 +guid 0 +onlin 0 +telephon 0 +network 0 +switchboard 0 +cnnfn 0 +newshour 0 +post 0 +reuter 0 +headlin 0 +social 0 +cafe 0 +report 0 +boston 0 +globe 0 +span 0 +time 0 +view 0 +slate 0 +feed 0 +salon 0 +atlant 0 +monthli 0 +harper 0 +espn 0 +zone 0 +govern 0 +fedworld 0 +index 0 +hous 0 +congress 0 +arpa 0 +feder 0 +deficit 0 +nation 0 +debt 0 +clock 0 +concord 0 +coalit 0 +hand 0 +balanc 0 +bipartisan 0 +commiss 0 +entitl 0 +reform 0 +univers 0 +museum 0 +past 0 +life 0 +pittsburgh 0 +upcom 0 +birthdai 0 +person 0 +andfun 0 +pagebobd 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..8081e677 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,100 @@ +comput 1 +languag 0 +born 0 +scienc 0 +univers 0 +washington 0 +home 0 +depart 0 +constraint 0 +program 0 +research 0 +engin 0 +orient 0 +human 0 +societi 0 +page 0 +recent 0 +autumn 0 +graduat 0 +work 0 +receiv 0 +xerox 0 +spent 0 +alan 0 +pagealan 0 +pagei 0 +professor 0 +principalresearch 0 +interest 0 +base 0 +system 0 +object 0 +logic 0 +computerinteract 0 +current 0 +activitiesuwconstraint 0 +contain 0 +link 0 +paper 0 +public 0 +domainsourc 0 +code 0 +satisfact 0 +algorithm 0 +media 0 +technolog 0 +democraci 0 +groupuw 0 +student 0 +also 0 +idea 0 +qualsproject 0 +teachingher 0 +cours 0 +taught 0 +concept 0 +winter 0 +interact 0 +spring 0 +informationhistori 0 +grew 0 +idaho 0 +reed 0 +colleg 0 +mathemat 0 +atstanford 0 +degre 0 +dissert 0 +done 0 +associ 0 +paloalto 0 +center 0 +concern 0 +simulationlaboratori 0 +year 0 +post 0 +doctoralfellow 0 +artifici 0 +intellig 0 +ofedinburgh 0 +scotland 0 +mechan 0 +problem 0 +solv 0 +symbolicalgebra 0 +join 0 +andexcept 0 +sabbat 0 +europarc 0 +cambridg 0 +england 0 +havebeen 0 +sinc 0 +address 0 +dept 0 +seattl 0 +phone 0 +email 0 +eduwww 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..e89ecb0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,20 @@ +thing 1 +brad 0 +home 0 +pagebrad 0 +chamberlainphoto 0 +credit 0 +mike 0 +perkowitzth 0 +probabl 0 +couldn 0 +care 0 +less 0 +offic 0 +address 0 +work 0 +like 0 +ad 0 +subset 0 +ofth 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..10b43d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,125 @@ +html 1 +comput 0 +mumei 0 +postscript 0 +present 0 +brendan 0 +washington 0 +fall 0 +work 0 +problem 0 +physic 0 +map 0 +us 0 +also 0 +confer 0 +galleri 0 +home 0 +pagebrendan 0 +mumeyi 0 +student 0 +depart 0 +scienceat 0 +theunivers 0 +enter 0 +expect 0 +graduat 0 +around 0 +contact 0 +information 0 +mail 0 +edufor 0 +address 0 +click 0 +curriculum 0 +vitaein 0 +htmlorpostscriptformat 0 +academ 0 +interestsi 0 +would 0 +call 0 +appli 0 +theoret 0 +scientist 0 +current 0 +biologi 0 +moment 0 +look 0 +build 0 +rough 0 +locat 0 +landmark 0 +genom 0 +gener 0 +speak 0 +interest 0 +theori 0 +math 0 +tosolv 0 +reason 0 +practic 0 +done 0 +incomput 0 +astrophys 0 +hpcc 0 +groupher 0 +onlin 0 +papersb 0 +power 0 +clone 0 +overlap 0 +test 0 +poster 0 +ismb 0 +aspect 0 +probe 0 +survei 0 +paper 0 +written 0 +fulfil 0 +candidaci 0 +requir 0 +find 0 +cluster 0 +quickli 0 +parallel 0 +dimac 0 +challeng 0 +klaw 0 +upper 0 +lower 0 +bound 0 +construct 0 +alphabet 0 +binari 0 +tree 0 +soda 0 +siam 0 +ofdiscret 0 +mathemat 0 +note 0 +version 0 +produc 0 +latexhtml 0 +containsom 0 +error 0 +readabl 0 +part 0 +recreationhik 0 +cycl 0 +ski 0 +climb 0 +drink 0 +coffeeto 0 +name 0 +sailingand 0 +hope 0 +sometim 0 +like 0 +plai 0 +bridg 0 +older 0 +photo 0 +first 0 +second 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..1bbf3d45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,88 @@ +comput 1 +project 1 +interest 0 +us 0 +cours 0 +bricker 0 +washington 0 +research 0 +graphic 0 +learn 0 +lauren 0 +brickerlauren 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +click 0 +need 0 +clue 0 +info 0 +primarli 0 +userinterfac 0 +although 0 +self 0 +proclaim 0 +groupi 0 +current 0 +work 0 +stevetanimoto 0 +mathematicsexperi 0 +imag 0 +process 0 +metip 0 +goal 0 +ofthi 0 +junior 0 +high 0 +school 0 +kid 0 +mathemat 0 +usingexploratori 0 +rather 0 +rote 0 +method 0 +particular 0 +minterest 0 +develop 0 +supportedcollabor 0 +cscl 0 +user 0 +interfac 0 +inthi 0 +well 0 +workin 0 +lawk 0 +dawg 0 +interfacea 0 +fairli 0 +extens 0 +resumeschool 0 +dazethi 0 +quarterdoth 0 +quartershuman 0 +interact 0 +spring 0 +quarter 0 +writeup 0 +final 0 +writeupwhat 0 +asystem 0 +insocieti 0 +excit 0 +hobbi 0 +enjoi 0 +busi 0 +lifesportscookingpotteri 0 +even 0 +studio 0 +garag 0 +year 0 +stuffbecaus 0 +ask 0 +itaddress 0 +last 0 +modifi 0 +mondai 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..362ee194 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,6 @@ +carlson 1 +washington 0 +adam 0 +carlsonadam 0 +comput 0 +scienc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..c65aff35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,44 @@ +certain 1 +andrew 0 +page 0 +washington 0 +download 0 +look 0 +model 0 +current 0 +work 0 +viewer 0 +modifi 0 +home 0 +server 0 +fix 0 +give 0 +note 0 +interest 0 +follow 0 +direct 0 +theview 0 +tonyderos 0 +david 0 +salesin 0 +werner 0 +stuetzl 0 +duchamp 0 +jovan 0 +popov 0 +scanningproject 0 +build 0 +requir 0 +sgigraph 0 +workstat 0 +paper 0 +netscap 0 +shouldalso 0 +browser 0 +similar 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..bec14392 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,91 @@ +chou 1 +washington 0 +school 0 +fall 0 +line 0 +comput 0 +java 0 +welcom 0 +homepag 0 +grad 0 +student 0 +computersci 0 +seattl 0 +pictur 0 +right 0 +first 0 +quarter 0 +ross 0 +think 0 +scari 0 +relat 0 +infoth 0 +chinook 0 +projectmi 0 +schedulemi 0 +list 0 +publicationscod 0 +workshop 0 +codesignpersonalperson 0 +info 0 +taiwan 0 +greec 0 +resumefoodi 0 +enjoi 0 +cook 0 +peopl 0 +open 0 +restaur 0 +ofpeopl 0 +favorit 0 +dish 0 +includ 0 +stir 0 +fri 0 +rice 0 +noodl 0 +beefskew 0 +recip 0 +toysb 0 +geek 0 +computersand 0 +cool 0 +toi 0 +taiwanesei 0 +also 0 +promot 0 +taiwaneselanguag 0 +current 0 +develop 0 +tool 0 +taiwanes 0 +sureto 0 +check 0 +experiment 0 +taiwanesedictionari 0 +though 0 +absolut 0 +requir 0 +page 0 +best 0 +viewedif 0 +instal 0 +chines 0 +charact 0 +font 0 +us 0 +enabl 0 +browser 0 +like 0 +netscap 0 +beabl 0 +applet 0 +yellow 0 +ball 0 +bouncingov 0 +barnei 0 +purpl 0 +dynosaur 0 +last 0 +updat 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..21d03dc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,115 @@ +math 1 +home 0 +page 0 +contest 0 +program 0 +problem 0 +html 0 +set 0 +color 0 +search 0 +linux 0 +result 0 +final 0 +version 0 +internet 0 +comput 0 +refer 0 +thing 0 +done 0 +washington 0 +depart 0 +engin 0 +spring 0 +meet 0 +corei 0 +andersoncorei 0 +andersonth 0 +interest 0 +late 0 +research 0 +percept 0 +automat 0 +correct 0 +induc 0 +compet 0 +programm 0 +month 0 +info 0 +localtalk 0 +adapt 0 +plai 0 +wavelet 0 +intern 0 +collegi 0 +pacif 0 +region 0 +previou 0 +year 0 +duke 0 +rsum 0 +onlin 0 +recent 0 +august 0 +review 0 +graphic 0 +text 0 +book 0 +graduat 0 +univser 0 +highlin 0 +commun 0 +colleg 0 +self 0 +tune 0 +fpga 0 +fall 0 +help 0 +polli 0 +organ 0 +contribut 0 +open 0 +hous 0 +april 0 +manag 0 +chapter 0 +treasuri 0 +develop 0 +read 0 +macintosh 0 +good 0 +servic 0 +providercool 0 +found 0 +usag 0 +statist 0 +lurker 0 +guid 0 +babylon 0 +sunsit 0 +archiv 0 +dilbert 0 +zone 0 +brother 0 +pageus 0 +link 0 +peek 0 +insid 0 +term 0 +lab 0 +featur 0 +netscap 0 +scienc 0 +univers 0 +washinton 0 +uwtv 0 +tech 0 +notesmi 0 +autumn 0 +schedul 0 +mondai 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +fridai 0 +corin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..41813bef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,65 @@ +washington 1 +valedictorian 1 +page 0 +enjoy 0 +experi 0 +ball 0 +address 0 +found 0 +craig 0 +experiencecraig 0 +kaplancurr 0 +locat 0 +student 0 +depart 0 +comput 0 +scienc 0 +universityof 0 +seattl 0 +well 0 +copyof 0 +homepag 0 +univers 0 +waterloo 0 +time 0 +modifi 0 +appropri 0 +tomi 0 +current 0 +situat 0 +near 0 +undergraduatewa 0 +grad 0 +photo 0 +fromth 0 +second 0 +occur 0 +saturdai 0 +convoc 0 +cannot 0 +express 0 +honour 0 +felt 0 +wonder 0 +graduat 0 +class 0 +choos 0 +repres 0 +incident 0 +didn 0 +know 0 +parent 0 +minut 0 +start 0 +ceremoni 0 +sai 0 +never 0 +forgiv 0 +text 0 +anyon 0 +curiou 0 +visitor 0 +number 0 +last 0 +updat 0 +cskaplan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..7e9618eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,56 @@ +codi 1 +kwok 1 +welcom 0 +work 0 +sanctuari 0 +nausicaa 0 +vallei 0 +home 0 +page 0 +first 0 +thing 0 +thought 0 +peopl 0 +think 0 +mean 0 +aliv 0 +asami 0 +chiaki 0 +chung 0 +ctkwok 0 +washington 0 +edui 0 +graduat 0 +student 0 +weld 0 +andoren 0 +etzioni 0 +plan 0 +andsoftwar 0 +agent 0 +ingram 0 +softbot 0 +aiuw 0 +contact 0 +informationleisur 0 +windlaputa 0 +castl 0 +skyhyp 0 +futur 0 +vision 0 +gunnm 0 +wind 0 +arch 0 +vile 0 +java 0 +applet 0 +anim 0 +take 0 +load 0 +last 0 +modifi 0 +visitor 0 +sinc 0 +figur 0 +doom 0 +numer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..0769b162 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,30 @@ +darren 1 +cronquist 1 +washington 1 +inform 1 +current 1 +resum 1 +curriculum 1 +darrenc 0 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +welcom 0 +home 0 +page 0 +last 0 +updat 0 +employ 0 +plan 0 +complet 0 +myph 0 +html 0 +postscript 0 +vita 0 +vitaperson 0 +rest 0 +homepag 0 +underconstruct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..7cac078d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,193 @@ +plan 1 +interfac 0 +david 0 +christianson 0 +user 0 +work 0 +assist 0 +automat 0 +also 0 +second 0 +till 0 +washington 0 +comput 0 +univers 0 +current 0 +graduat 0 +interest 0 +studi 0 +activ 0 +interact 0 +recent 0 +build 0 +shop 0 +simpl 0 +anderson 0 +weld 0 +salesin 0 +cohen 0 +develop 0 +camera 0 +local 0 +dave 0 +inform 0 +check 0 +midnight 0 +nowher 0 +babi 0 +christiansondbc 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +workin 0 +spare 0 +time 0 +third 0 +year 0 +student 0 +atth 0 +inaiand 0 +gotten 0 +mayb 0 +even 0 +graphicsa 0 +well 0 +knowledg 0 +represent 0 +goal 0 +directedbehavior 0 +mix 0 +initi 0 +recognit 0 +buzzwordacquisit 0 +context 0 +human 0 +prototyp 0 +us 0 +intellig 0 +bobdoorenbo 0 +shopbot 0 +rather 0 +somehowintegr 0 +variou 0 +line 0 +store 0 +built 0 +applicationthat 0 +read 0 +pars 0 +basket 0 +order 0 +determinewhat 0 +product 0 +collabor 0 +sean 0 +michael 0 +moviethat 0 +demonstr 0 +appl 0 +intern 0 +russel 0 +technologyinto 0 +experi 0 +perpetr 0 +graphic 0 +debugg 0 +theucpop 0 +famili 0 +planner 0 +programm 0 +client 0 +number 0 +peopl 0 +whose 0 +live 0 +touch 0 +grow 0 +everi 0 +manual 0 +isher 0 +spent 0 +undergradu 0 +career 0 +theunivers 0 +chicago 0 +carboload 0 +harold 0 +chicken 0 +theanim 0 +agent 0 +publicationschristianson 0 +declar 0 +control 0 +cinematographi 0 +appear 0 +aaai 0 +firbi 0 +mcdougal 0 +fast 0 +map 0 +support 0 +navig 0 +object 0 +symposium 0 +sensor 0 +fusion 0 +boston 0 +novemb 0 +find 0 +thechateau 0 +cynic 0 +offic 0 +withfreder 0 +william 0 +darren 0 +adam 0 +gloriou 0 +leader 0 +juan 0 +import 0 +thing 0 +favorit 0 +practic 0 +judo 0 +compet 0 +senior 0 +nation 0 +sibl 0 +sisterjust 0 +school 0 +librari 0 +michigan 0 +surf 0 +cut 0 +edg 0 +research 0 +supercollid 0 +realli 0 +feel 0 +like 0 +slack 0 +mirski 0 +help 0 +watch 0 +hero 0 +youth 0 +duel 0 +death 0 +wwwf 0 +grudg 0 +match 0 +fame 0 +fortun 0 +respons 0 +week 0 +game 0 +domain 0 +straight 0 +doomgat 0 +sai 0 +evil 0 +book 0 +tick 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..3a995ae0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,39 @@ +page 1 +johnson 1 +washington 1 +take 1 +quiz 1 +home 0 +dave 0 +david 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +research 0 +interest 0 +navig 0 +assist 0 +hypertext 0 +readersproject 0 +activ 0 +racquetbal 0 +golf 0 +basketbal 0 +softbal 0 +tutori 0 +script 0 +fit 0 +togeth 0 +theracquetbal 0 +creat 0 +look 0 +thecreat 0 +assess 0 +form 0 +give 0 +last 0 +modifi 0 +mondai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..d0c7dfd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,161 @@ +spin 1 +seattl 1 +system 0 +page 0 +extens 0 +undergradu 0 +notr 0 +dame 0 +david 0 +dion 0 +washington 0 +current 0 +unix 0 +server 0 +level 0 +univers 0 +world 0 +person 0 +research 0 +work 0 +dynam 0 +kernel 0 +languag 0 +modula 0 +user 0 +mach 0 +first 0 +stuff 0 +time 0 +surf 0 +visit 0 +comput 0 +scienc 0 +condit 0 +major 0 +life 0 +stai 0 +touch 0 +movi 0 +transport 0 +traffic 0 +home 0 +ddion 0 +yeah 0 +right 0 +like 0 +anyth 0 +okai 0 +mayb 0 +littl 0 +academ 0 +past 0 +year 0 +brian 0 +bershad 0 +primari 0 +respons 0 +construct 0 +thespinoper 0 +oper 0 +applic 0 +achiev 0 +impress 0 +perform 0 +extend 0 +safeti 0 +protect 0 +maintain 0 +written 0 +slight 0 +variant 0 +think 0 +run 0 +link 0 +us 0 +intercept 0 +call 0 +emul 0 +environ 0 +previou 0 +havework 0 +studi 0 +help 0 +implement 0 +memori 0 +manag 0 +commun 0 +subsystem 0 +afraid 0 +around 0 +wouldn 0 +claim 0 +know 0 +cool 0 +ipromis 0 +soon 0 +netscap 0 +enhancedthi 0 +hold 0 +breath 0 +meanwhil 0 +site 0 +occasion 0 +distract 0 +engin 0 +reason 0 +homepag 0 +featur 0 +date 0 +view 0 +campu 0 +weather 0 +occupi 0 +vast 0 +program 0 +dai 0 +debug 0 +manual 0 +solv 0 +countless 0 +problem 0 +institut 0 +band 0 +trumpet 0 +section 0 +racquetbal 0 +ladder 0 +main 0 +outlet 0 +athlet 0 +espn 0 +sportzon 0 +sport 0 +todai 0 +rest 0 +dilbert 0 +learn 0 +real 0 +restaur 0 +fine 0 +eateri 0 +recommend 0 +other 0 +region 0 +list 0 +line 0 +guid 0 +excel 0 +public 0 +statu 0 +infam 0 +marin 0 +leagu 0 +basebal 0 +team 0 +bean 0 +shop 0 +last 0 +modifi 0 +mondai 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..0ae7e620 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,127 @@ +parallel 1 +derrick 0 +comput 0 +project 0 +data 0 +administr 0 +extens 0 +colleg 0 +advanc 0 +scienc 0 +univers 0 +washington 0 +offer 0 +graduat 0 +student 0 +fall 0 +experi 0 +commun 0 +develop 0 +design 0 +system 0 +tool 0 +research 0 +perform 0 +portabl 0 +collect 0 +topic 0 +engin 0 +teach 0 +assist 0 +index 0 +weathersbi 0 +bullssupersonicsi 0 +pursu 0 +phdin 0 +seattl 0 +beauti 0 +campu 0 +li 0 +heart 0 +ofseattl 0 +mani 0 +divers 0 +often 0 +prei 0 +therebyextend 0 +time 0 +averag 0 +career 0 +practic 0 +interestssignific 0 +skill 0 +sheet 0 +share 0 +server 0 +arrai 0 +languag 0 +compil 0 +host 0 +token 0 +ring 0 +protocol 0 +base 0 +network 0 +securityresearch 0 +interestsmi 0 +center 0 +around 0 +distribut 0 +challengespres 0 +field 0 +on 0 +conveni 0 +typic 0 +foremost 0 +goal 0 +run 0 +ordistribut 0 +environ 0 +howev 0 +suffer 0 +final 0 +theseenviron 0 +extra 0 +challeng 0 +asynchron 0 +independ 0 +event 0 +daunt 0 +task 0 +distributedenviron 0 +issu 0 +address 0 +group 0 +page 0 +spaa 0 +paper 0 +gener 0 +comm 0 +dissert 0 +integr 0 +softwar 0 +projectacadem 0 +achievementsinstructor 0 +summer 0 +curriculum 0 +cours 0 +certif 0 +program 0 +collegeinstructor 0 +start 0 +undergradu 0 +tutor 0 +women 0 +minoritystud 0 +depart 0 +engineeringoutstand 0 +award 0 +person 0 +interest 0 +interact 0 +cnnfinanciala 0 +newslet 0 +would 0 +javaw 0 +weathersbyderrick 0 +edutu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..2cde7bd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,123 @@ +game 1 +northern 1 +note 1 +postscript 1 +dewei 0 +draft 0 +read 0 +ireland 0 +page 0 +brian 0 +washington 0 +need 0 +like 0 +avail 0 +carri 0 +hopefulli 0 +work 0 +pleas 0 +statist 0 +terrorist 0 +relat 0 +link 0 +thorough 0 +china 0 +home 0 +pagebrian 0 +deweyabout 0 +first 0 +year 0 +student 0 +depart 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +doyou 0 +know 0 +music 0 +book 0 +ilov 0 +plai 0 +ride 0 +bike 0 +write 0 +letter 0 +drink 0 +myfavorit 0 +color 0 +blue 0 +favorit 0 +ocean 0 +atlant 0 +oldroomm 0 +think 0 +anim 0 +public 0 +finger 0 +irelandi 0 +return 0 +trip 0 +belfast 0 +june 0 +sixti 0 +pagesof 0 +interview 0 +luggag 0 +getthos 0 +semi 0 +chaotic 0 +readabl 0 +enlighteningformat 0 +feel 0 +free 0 +shoulder 0 +period 0 +make 0 +html 0 +format 0 +goal 0 +encourag 0 +frequent 0 +feedback 0 +soon 0 +possibl 0 +mail 0 +well 0 +much 0 +time 0 +thisproject 0 +late 0 +progress 0 +near 0 +ofth 0 +summer 0 +imag 0 +thecurr 0 +sinn 0 +fein 0 +inform 0 +bibliographi 0 +death 0 +injuri 0 +alreadi 0 +addict 0 +recuri 0 +check 0 +fascin 0 +histori 0 +develop 0 +ancient 0 +imageek 0 +york 0 +cuni 0 +provid 0 +mani 0 +interest 0 +site 0 +jansteen 0 +seen 0 +edulast 0 +modifi 0 +tuesdai 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..de19e950 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,119 @@ +washington 1 +comput 1 +home 0 +dfasulo 0 +scienc 0 +fiction 0 +graduat 0 +student 0 +depart 0 +univers 0 +seattl 0 +work 0 +interest 0 +amber 0 +recommend 0 +random 0 +thing 0 +theori 0 +pagewelcom 0 +fasulo 0 +page 0 +third 0 +year 0 +williamscolleg 0 +computersci 0 +appli 0 +mathemat 0 +class 0 +note 0 +portrait 0 +contain 0 +slight 0 +inaccuraci 0 +find 0 +eastlak 0 +engin 0 +ofwashington 0 +offic 0 +chateau 0 +email 0 +academ 0 +graphic 0 +biologi 0 +person 0 +fantasi 0 +written 0 +otherwis 0 +fact 0 +probabl 0 +honest 0 +identifi 0 +illustr 0 +merlin 0 +corwin 0 +pictur 0 +favorit 0 +charact 0 +mine 0 +roger 0 +zelazni 0 +chronicl 0 +imag 0 +taken 0 +drpg 0 +publish 0 +phage 0 +press 0 +would 0 +anyon 0 +like 0 +book 0 +also 0 +seri 0 +babylon 0 +creativ 0 +write 0 +poetri 0 +absolut 0 +link 0 +athlet 0 +particular 0 +order 0 +tenni 0 +kwon 0 +distanc 0 +run 0 +role 0 +plai 0 +depend 0 +cat 0 +homepag 0 +friend 0 +fellow 0 +william 0 +alumnu 0 +sean 0 +sandi 0 +look 0 +woman 0 +former 0 +grad 0 +wendi 0 +belluomini 0 +dress 0 +dogbert 0 +peopl 0 +ask 0 +worthwhil 0 +area 0 +research 0 +whether 0 +abstract 0 +us 0 +better 0 +explan 0 +goal 0 +futur 0 +ever 0 +given 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..a0b1c9a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,30 @@ +martin 1 +dickei 1 +home 1 +page 1 +dickeycomput 0 +scienc 0 +engineeringunivers 0 +washingtonwelcom 0 +weekli 0 +schedulenarr 0 +resum 0 +blurbcs 0 +engr 0 +autumn 0 +favorit 0 +seattl 0 +coffe 0 +housesfavorit 0 +internet 0 +site 0 +sister 0 +bookspirograph 0 +java 0 +script 0 +garg 0 +plai 0 +washington 0 +eduupd 0 +tuesdai 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..606f9d5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,187 @@ +peopl 1 +washington 0 +comput 0 +bershad 0 +chen 0 +romer 0 +cach 0 +seattl 0 +vote 0 +commit 0 +denni 0 +high 0 +baer 0 +brad 0 +calder 0 +grunwald 0 +paper 0 +issu 0 +instruct 0 +polici 0 +dynam 0 +page 0 +conflict 0 +univers 0 +dlee 0 +home 0 +good 0 +alwai 0 +huberthumphrei 0 +begin 0 +occur 0 +would 0 +dream 0 +primari 0 +interest 0 +perform 0 +architectur 0 +researchwith 0 +smart 0 +likejean 0 +loup 0 +brian 0 +alan 0 +eustac 0 +dirk 0 +andt 0 +recent 0 +energi 0 +manag 0 +system 0 +gener 0 +fetch 0 +specul 0 +execut 0 +isca 0 +map 0 +resolutionon 0 +standard 0 +hardwar 0 +osdi 0 +avoid 0 +miss 0 +larg 0 +direct 0 +mappedcach 0 +asplo 0 +effect 0 +differ 0 +code 0 +reorder 0 +algorithm 0 +qualifi 0 +project 0 +report 0 +contact 0 +work 0 +scienc 0 +engin 0 +depart 0 +offic 0 +sieg 0 +index 0 +pointer 0 +hotlist 0 +entri 0 +point 0 +explor 0 +yahoo 0 +yellow 0 +internet 0 +lyco 0 +realli 0 +search 0 +enginefor 0 +guid 0 +click 0 +million 0 +sensibl 0 +mind 0 +conced 0 +thatpolit 0 +almost 0 +choic 0 +lesser 0 +evil 0 +tweedledumand 0 +tweedlede 0 +abstain 0 +theyar 0 +present 0 +presid 0 +appoint 0 +go 0 +torummag 0 +around 0 +live 0 +next 0 +four 0 +year 0 +consid 0 +allth 0 +stew 0 +rather 0 +show 0 +humphrei 0 +taught 0 +lesson 0 +still 0 +enjoi 0 +nixon 0 +suprem 0 +court 0 +whentricia 0 +juli 0 +find 0 +silver 0 +thread 0 +among 0 +gold 0 +theblack 0 +russel 0 +baker 0 +ford 0 +without 0 +flummeri 0 +hesit 0 +chanc 0 +draw 0 +back 0 +ineffect 0 +concern 0 +act 0 +initi 0 +element 0 +truth 0 +ignor 0 +kill 0 +countless 0 +idea 0 +splendid 0 +plan 0 +moment 0 +definit 0 +provid 0 +move 0 +sort 0 +thing 0 +help 0 +never 0 +otherwis 0 +whole 0 +stream 0 +event 0 +decis 0 +rais 0 +favor 0 +manner 0 +unforeseen 0 +incid 0 +meet 0 +materi 0 +assist 0 +magic 0 +could 0 +come 0 +whatev 0 +goeth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..a1c3af4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,119 @@ +home 1 +inform 0 +comput 0 +page 0 +doug 0 +zongker 0 +research 0 +contact 0 +todai 0 +wast 0 +time 0 +sarcasm 0 +actual 0 +scienc 0 +univers 0 +depart 0 +homepag 0 +anoth 0 +unnecessari 0 +pagececi 0 +well 0 +much 0 +anywai 0 +sure 0 +casual 0 +mention 0 +name 0 +buti 0 +feel 0 +strongli 0 +bold 0 +noless 0 +exhaust 0 +list 0 +usual 0 +public 0 +classeshow 0 +meth 0 +unusu 0 +toxic 0 +custard 0 +workshop 0 +filesth 0 +mento 0 +galleryvisit 0 +supercolliderth 0 +useless 0 +date 0 +cron 0 +player 0 +databas 0 +test 0 +ground 0 +caveat 0 +user 0 +lucki 0 +brows 0 +avirtu 0 +intend 0 +treasur 0 +trove 0 +whichmai 0 +us 0 +realli 0 +first 0 +year 0 +grad 0 +student 0 +engineeringdepart 0 +ofwashington 0 +graduat 0 +michigan 0 +state 0 +imajor 0 +andminor 0 +math 0 +dubiou 0 +honorsjunior 0 +apprentic 0 +keeper 0 +brotherhood 0 +crouton 0 +death 0 +cart 0 +pizzicato 0 +intern 0 +club 0 +member 0 +bryan 0 +worst 0 +execut 0 +vice 0 +presid 0 +charg 0 +emerg 0 +backup 0 +clicker 0 +cruis 0 +highwai 0 +inhigh 0 +gear 0 +sit 0 +buttstar 0 +screen 0 +tast 0 +background 0 +stolen 0 +labor 0 +wheremi 0 +sister 0 +work 0 +dougz 0 +washington 0 +class 0 +last 0 +edit 0 +thursdai 0 +novemb 0 +hit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..1e514067 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,92 @@ +system 1 +oper 0 +dylan 0 +interact 0 +implement 0 +schedul 0 +work 0 +mcname 0 +washington 0 +research 0 +applic 0 +perform 0 +activ 0 +project 0 +current 0 +oodb 0 +us 0 +java 0 +jame 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +person 0 +inform 0 +concentr 0 +applicationsand 0 +mechanismthat 0 +allow 0 +page 0 +replacementpolici 0 +kernel 0 +polici 0 +caus 0 +poorli 0 +help 0 +machoper 0 +mechan 0 +thathelp 0 +user 0 +level 0 +thread 0 +properli 0 +kernelthread 0 +experi 0 +lead 0 +spin 0 +folk 0 +build 0 +architecturethat 0 +gener 0 +idea 0 +tailor 0 +carri 0 +start 0 +opal 0 +thesi 0 +investig 0 +betweenobject 0 +orient 0 +databas 0 +virtual 0 +memori 0 +demonstr 0 +signific 0 +improv 0 +achiv 0 +commod 0 +differ 0 +done 0 +addit 0 +improvementscan 0 +come 0 +modifi 0 +slightli 0 +betterserv 0 +paperscv 0 +cours 0 +geoff 0 +voelker 0 +built 0 +winter 0 +quarter 0 +seminar 0 +dedic 0 +gave 0 +lectureintroduc 0 +languag 0 +environ 0 +slide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..1582647c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,32 @@ +christoph 1 +lewi 1 +graduat 0 +student 0 +washington 0 +home 0 +page 0 +dept 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +echri 0 +hello 0 +glad 0 +could 0 +make 0 +work 0 +program 0 +languag 0 +project 0 +offic 0 +hour 0 +tent 0 +mondai 0 +wednesdai 0 +sieg 0 +last 0 +modifi 0 +thur 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..a41a4c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page 1 +blank 1 +ecrock 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..bd6837d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,118 @@ +comput 1 +home 0 +page 0 +know 0 +washington 0 +well 0 +peopl 0 +time 0 +includ 0 +current 0 +work 0 +compil 0 +want 0 +place 0 +mail 0 +find 0 +eddi 0 +hong 0 +second 0 +year 0 +graduat 0 +scienc 0 +studentat 0 +univers 0 +tosai 0 +busi 0 +type 0 +littl 0 +hand 0 +hadto 0 +resum 0 +link 0 +postcriptand 0 +plain 0 +text 0 +form 0 +offic 0 +room 0 +seig 0 +hall 0 +anna 0 +karlin 0 +craig 0 +chamber 0 +theoret 0 +model 0 +dynam 0 +specif 0 +workingon 0 +develop 0 +line 0 +algorithm 0 +fordynam 0 +plan 0 +qual 0 +project 0 +access 0 +sinc 0 +august 0 +histor 0 +fact 0 +free 0 +activit 0 +vine 0 +branchesmi 0 +sister 0 +help 0 +creat 0 +also 0 +list 0 +variou 0 +anoth 0 +interest 0 +keep 0 +industri 0 +site 0 +give 0 +insight 0 +commentari 0 +happen 0 +knowof 0 +pleas 0 +daveneti 0 +power 0 +macintosh 0 +guess 0 +make 0 +bias 0 +towardslik 0 +mac 0 +howev 0 +think 0 +better 0 +eveneasi 0 +come 0 +sometim 0 +visit 0 +appl 0 +check 0 +seattl 0 +freewai 0 +traffic 0 +look 0 +advic 0 +import 0 +book 0 +worldher 0 +us 0 +inform 0 +alwai 0 +found 0 +address 0 +domain 0 +name 0 +countri 0 +friend 0 +stand 0 +edhong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..7941d03f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,74 @@ +compil 1 +egger 0 +research 0 +project 0 +washington 0 +share 0 +asplo 0 +susan 0 +architectur 0 +current 0 +dynam 0 +schedul 0 +time 0 +http 0 +home 0 +depart 0 +computersci 0 +engin 0 +univers 0 +seattl 0 +voic 0 +email 0 +offic 0 +sieg 0 +hall 0 +interest 0 +comput 0 +back 0 +emphasi 0 +onexperiment 0 +perform 0 +analysi 0 +work 0 +issu 0 +incompil 0 +optim 0 +data 0 +optimizationsand 0 +instruct 0 +processor 0 +design 0 +multithreadedarchitectur 0 +algorithm 0 +reduc 0 +fals 0 +multithread 0 +spinprevi 0 +cach 0 +coher 0 +code 0 +prefetch 0 +memori 0 +machin 0 +miscellan 0 +tool 0 +workload 0 +new 0 +program 0 +committe 0 +call 0 +paper 0 +homepag 0 +inform 0 +look 0 +click 0 +list 0 +might 0 +qual 0 +amast 0 +degre 0 +begin 0 +thesi 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..0bc591bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,197 @@ +system 1 +spin 0 +oper 0 +extens 0 +washington 0 +univers 0 +safe 0 +dynam 0 +implement 0 +protect 0 +princeton 0 +work 0 +thread 0 +link 0 +kernel 0 +interfac 0 +support 0 +perform 0 +us 0 +describ 0 +paper 0 +wcsss 0 +softwar 0 +emin 0 +comput 0 +scienc 0 +current 0 +year 0 +summer 0 +jersei 0 +develop 0 +schedul 0 +domain 0 +also 0 +wrote 0 +call 0 +novel 0 +aspect 0 +provid 0 +high 0 +fine 0 +grain 0 +share 0 +time 0 +code 0 +data 0 +design 0 +simul 0 +mechan 0 +modula 0 +microkernel 0 +applic 0 +specif 0 +servic 0 +workshop 0 +sirer 0 +sirereg 0 +depart 0 +engin 0 +seattl 0 +backgroundi 0 +third 0 +graduat 0 +student 0 +grew 0 +istanbul 0 +turkei 0 +receiv 0 +toward 0 +spinproject 0 +prof 0 +brian 0 +bershad 0 +spent 0 +bell 0 +labswork 0 +theplan 0 +help 0 +build 0 +prototyp 0 +displai 0 +thesumm 0 +research 0 +center 0 +recent 0 +thevesta 0 +project 0 +projectsmi 0 +goal 0 +adapt 0 +synchron 0 +andprotect 0 +subsystem 0 +machin 0 +specificaspect 0 +interrupt 0 +path 0 +andsom 0 +miscellan 0 +alarm 0 +mach 0 +compat 0 +mean 0 +ofextend 0 +arbitrari 0 +schedulingpolici 0 +allowsu 0 +achiev 0 +strand 0 +isdesign 0 +fault 0 +isol 0 +allowsisol 0 +withconflict 0 +symbol 0 +simultan 0 +activ 0 +hide 0 +beassur 0 +possibl 0 +access 0 +clincher 0 +extensionsthat 0 +want 0 +protectionenforc 0 +overhead 0 +performanceweb 0 +server 0 +networkingstack 0 +main 0 +object 0 +reduc 0 +http 0 +latenc 0 +andminim 0 +load 0 +mip 0 +instruct 0 +coupl 0 +calledmipsi 0 +robust 0 +enough 0 +spec 0 +benchmark 0 +standard 0 +ofnew 0 +educ 0 +tool 0 +researchplatform 0 +page 0 +mipsi 0 +featuresand 0 +avail 0 +namespac 0 +manag 0 +write 0 +experi 0 +safeti 0 +sosp 0 +issu 0 +hoto 0 +posit 0 +compar 0 +hardwar 0 +sigop 0 +european 0 +review 0 +version 0 +technic 0 +report 0 +march 0 +measur 0 +limit 0 +parallel 0 +senior 0 +independ 0 +june 0 +talkslanguag 0 +slide 0 +present 0 +first 0 +compil 0 +tucson 0 +arizona 0 +interestswhenev 0 +find 0 +opportun 0 +follow 0 +sail 0 +windsurf 0 +dive 0 +ski 0 +bikingmak 0 +outdoor 0 +cloth 0 +andhik 0 +dylan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..1f045f17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,140 @@ +page 1 +project 0 +eric 0 +comput 0 +washington 0 +seattl 0 +prof 0 +method 0 +work 0 +home 0 +scienc 0 +univers 0 +longer 0 +black 0 +recent 0 +solut 0 +implicit 0 +solv 0 +signal 0 +like 0 +everybodi 0 +els 0 +anderson 0 +andersonwher 0 +find 0 +sieg 0 +hall 0 +depart 0 +engin 0 +street 0 +honor 0 +june 0 +decisionin 0 +aclu 0 +reno 0 +mind 0 +thedecis 0 +mere 0 +interim 0 +step 0 +could 0 +read 0 +take 0 +probabl 0 +still 0 +commit 0 +feloni 0 +care 0 +speech 0 +freedom 0 +themarketplac 0 +idea 0 +fact 0 +third 0 +year 0 +graduat 0 +student 0 +mean 0 +imostli 0 +panic 0 +qual 0 +also 0 +try 0 +write 0 +paper 0 +henri 0 +greensideof 0 +duke 0 +finish 0 +master 0 +thesi 0 +onsteadi 0 +state 0 +particular 0 +nonlinear 0 +biharmon 0 +stabil 0 +criterion 0 +explicit 0 +restrict 0 +fourth 0 +power 0 +spatial 0 +resolut 0 +timesteppingmethod 0 +backward 0 +euler 0 +necessari 0 +numer 0 +analysisissu 0 +involv 0 +newton 0 +nonlinearequ 0 +spars 0 +matrix 0 +newtonstep 0 +interplai 0 +pictur 0 +realli 0 +spiffi 0 +bodi 0 +code 0 +astrophys 0 +simul 0 +support 0 +data 0 +structuresbi 0 +richard 0 +andersoni 0 +process 0 +music 0 +aim 0 +automat 0 +transcript 0 +acoust 0 +anna 0 +karlin 0 +isth 0 +musician 0 +interest 0 +applet 0 +first 0 +link 0 +text 0 +small 0 +graphic 0 +section 0 +materi 0 +preparedfor 0 +last 0 +fall 0 +snapshot 0 +mostli 0 +famili 0 +prove 0 +brother 0 +final 0 +weather 0 +meander 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..8c1457e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,197 @@ +softbot 1 +learn 1 +inform 0 +internet 0 +servic 0 +master 0 +thesi 0 +machin 0 +etzioni 0 +metacrawl 0 +home 0 +univers 0 +washington 0 +search 0 +comparison 0 +agent 0 +program 0 +oren 0 +comput 0 +research 0 +enabl 0 +softwar 0 +page 0 +effici 0 +aaai 0 +world 0 +multi 0 +us 0 +juli 0 +intellig 0 +robot 0 +understand 0 +plan 0 +paper 0 +student 0 +advis 0 +hewlett 0 +packard 0 +databas 0 +repositori 0 +irvin 0 +knowledg 0 +discoveri 0 +contain 0 +relev 0 +pageoren 0 +pagedepart 0 +scienc 0 +engin 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +brief 0 +bioand 0 +photo 0 +current 0 +human 0 +user 0 +state 0 +heor 0 +want 0 +accomplish 0 +disambigu 0 +request 0 +anddynam 0 +determin 0 +satisfi 0 +finalist 0 +discoveraward 0 +technolog 0 +innov 0 +field 0 +searchmultipl 0 +indic 0 +parallel 0 +provid 0 +sophist 0 +pruningopt 0 +netrecommend 0 +choic 0 +ahoi 0 +white 0 +locatesindividu 0 +high 0 +accuraci 0 +bruteforc 0 +brute 0 +analyz 0 +hypothes 0 +second 0 +whenrun 0 +sparc 0 +select 0 +public 0 +gather 0 +foc 0 +move 0 +food 0 +chain 0 +deploi 0 +theweb 0 +ascal 0 +shop 0 +wide 0 +autonom 0 +postscript 0 +html 0 +base 0 +interfac 0 +cacm 0 +fact 0 +fiction 0 +forecast 0 +ieee 0 +expert 0 +august 0 +without 0 +repli 0 +brook 0 +magazin 0 +decemb 0 +ijcai 0 +sound 0 +close 0 +reason 0 +toappear 0 +first 0 +addit 0 +richardseg 0 +bernard 0 +fileretriev 0 +neal 0 +lesh 0 +planner 0 +unix 0 +keith 0 +golden 0 +universalquantif 0 +incomplet 0 +terranc 0 +goan 0 +error 0 +mikeperkowitz 0 +erik 0 +selberg 0 +zamir 0 +jonathan 0 +shake 0 +undergradu 0 +stephen 0 +soderland 0 +umass 0 +amherst 0 +roomi 0 +bruce 0 +lesourd 0 +robert 0 +spiger 0 +lockhe 0 +center 0 +william 0 +alford 0 +wisconsin 0 +greg 0 +fitchenholtz 0 +guido 0 +hunt 0 +dymitr 0 +mozdyniewicz 0 +quark 0 +resourc 0 +minecontain 0 +neuroprosearch 0 +recent 0 +neural 0 +network 0 +illinoi 0 +induct 0 +group 0 +statlib 0 +data 0 +algorithm 0 +statist 0 +learningtoolbox 0 +bonn 0 +german 0 +list 0 +usenet 0 +faq 0 +access 0 +count 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..bd134523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,35 @@ +feelei 1 +mike 0 +comput 0 +home 0 +pagemik 0 +scienc 0 +finish 0 +done 0 +soon 0 +thesi 0 +concern 0 +global 0 +memori 0 +manag 0 +workstationclust 0 +also 0 +work 0 +distribut 0 +opalproject 0 +join 0 +faculti 0 +univers 0 +british 0 +columbia 0 +injanuari 0 +inform 0 +avail 0 +us 0 +link 0 +papersmi 0 +research 0 +summarycvsoutheast 0 +idaholast 0 +modifi 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..d482920c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,20 @@ +fisher 1 +page 1 +washington 1 +chri 0 +home 0 +pictur 0 +fisherdepart 0 +comput 0 +scienc 0 +engineeringbox 0 +univers 0 +seattl 0 +voic 0 +mail 0 +sieg 0 +hall 0 +room 0 +current 0 +construct 0 +return 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..206acf26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,73 @@ +comput 1 +washington 0 +place 0 +might 0 +depart 0 +scienc 0 +engineeringunivers 0 +seattl 0 +sieg 0 +hall 0 +schedulethi 0 +quarter 0 +autumn 0 +ta 0 +cours 0 +rather 0 +work 0 +gener 0 +exam 0 +check 0 +scheduleto 0 +otherwis 0 +around 0 +probablyb 0 +found 0 +librari 0 +somewher 0 +nice 0 +read 0 +paper 0 +research 0 +activitiesmi 0 +main 0 +interest 0 +algorithm 0 +specif 0 +areasof 0 +parallel 0 +geometri 0 +public 0 +meander 0 +denni 0 +outta 0 +mind 0 +vista 0 +pea 0 +music 0 +site 0 +chateau 0 +galleri 0 +fund 0 +drive 0 +thing 0 +alec 0 +wolman 0 +server 0 +seven 0 +lost 0 +soul 0 +captur 0 +html 0 +listen 0 +phone 0 +booth 0 +mofo 0 +peopl 0 +luci 0 +paul 0 +peach 0 +ruel 0 +look 0 +like 0 +moment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..13f4851a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,84 @@ +comput 1 +research 1 +scienc 0 +interest 0 +univers 0 +modugno 0 +home 0 +washington 0 +human 0 +formal 0 +model 0 +softwar 0 +current 0 +activ 0 +chair 0 +carnegi 0 +mellon 0 +francesmari 0 +pagefrancesmari 0 +page 0 +depart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +sieg 0 +hall 0 +phone 0 +mail 0 +main 0 +interact 0 +also 0 +user 0 +program 0 +specif 0 +verif 0 +technolog 0 +polici 0 +welcom 0 +opportun 0 +collabor 0 +relat 0 +topic 0 +project 0 +safeti 0 +machin 0 +interfac 0 +previou 0 +public 0 +avail 0 +onlin 0 +summari 0 +ofmi 0 +thesi 0 +real 0 +time 0 +concurr 0 +distribut 0 +system 0 +parallel 0 +algorthim 0 +profession 0 +basic 0 +symposium 0 +uist 0 +demonstr 0 +educ 0 +march 0 +august 0 +mathemat 0 +cornel 0 +anyth 0 +recent 0 +includecycl 0 +ski 0 +languag 0 +cultur 0 +spanish 0 +previouslyitalian 0 +vegetarian 0 +cook 0 +elleri 0 +line 0 +greet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..082dcea9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,45 @@ +forman 1 +georg 0 +home 0 +comput 0 +pagegeorg 0 +pagei 0 +receiv 0 +scienc 0 +optim 0 +compil 0 +ariadn 0 +scalabl 0 +pattern 0 +match 0 +parallel 0 +trace 0 +debugg 0 +public 0 +mobil 0 +hyperlink 0 +librari 0 +someth 0 +interest 0 +free 0 +handi 0 +softwar 0 +script 0 +written 0 +word 0 +puzzl 0 +water 0 +song 0 +chang 0 +netscap 0 +anim 0 +gforman 0 +comhom 0 +page 0 +mail 0 +finger 0 +weather 0 +dept 0 +live 0 +pictur 0 +gener 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..112fc1f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,48 @@ +page 1 +friedman 1 +marc 0 +home 0 +english 0 +research 0 +keith 0 +cours 0 +watercolor 0 +applet 0 +camp 0 +checklist 0 +spanish 0 +collabor 0 +dictionari 0 +poetri 0 +favorit 0 +quot 0 +link 0 +elsewher 0 +occam 0 +inform 0 +gather 0 +agent 0 +golden 0 +wordbot 0 +bike 0 +trip 0 +artifici 0 +intellig 0 +codi 0 +kwok 0 +weld 0 +ucpop 0 +planner 0 +tool 0 +chang 0 +life 0 +work 0 +nietzschein 0 +netscap 0 +bookmark 0 +file 0 +everi 0 +refer 0 +visitor 0 +sinc 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..ceed8227 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,28 @@ +garrett 1 +charli 0 +genet 0 +home 0 +page 0 +address 0 +seattl 0 +research 0 +interest 0 +compil 0 +graphic 0 +neural 0 +network 0 +algorithm 0 +game 0 +plai 0 +algorithmspap 0 +line 0 +algorithmsformerli 0 +member 0 +cecil 0 +group 0 +univers 0 +ofwashington 0 +bookshelf 0 +audio 0 +file 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..43e3f951 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,49 @@ +comput 1 +georg 0 +scienc 0 +graphic 0 +render 0 +imag 0 +winkenbach 0 +winkenbachdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +mail 0 +georgew 0 +washington 0 +eduphon 0 +interest 0 +multimedia 0 +thesi 0 +work 0 +doneund 0 +supervis 0 +professor 0 +david 0 +salesin 0 +deal 0 +appli 0 +tradit 0 +illustr 0 +techniqu 0 +theautomat 0 +three 0 +dimension 0 +model 0 +imagescr 0 +prototyp 0 +system 0 +found 0 +link 0 +follow 0 +galleri 0 +grail 0 +laboratori 0 +depart 0 +engin 0 +wife 0 +home 0 +page 0 +taweewan 0 +siwadun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..92096a98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,211 @@ +home 1 +page 0 +washington 0 +scienc 0 +configur 0 +duke 0 +file 0 +badro 0 +seattl 0 +graduat 0 +mathemat 0 +work 0 +engin 0 +univers 0 +pagecs 0 +link 0 +comput 0 +greg 0 +welcom 0 +email 0 +recent 0 +spring 0 +part 0 +time 0 +cours 0 +first 0 +competit 0 +emac 0 +readm 0 +archiv 0 +zshell 0 +chronicl 0 +newspap 0 +articl 0 +especi 0 +jackson 0 +joel 0 +interest 0 +languag 0 +nation 0 +foundat 0 +pagegreg 0 +last 0 +updat 0 +eduaddress 0 +nesbit 0 +hello 0 +homepag 0 +pleas 0 +feel 0 +free 0 +send 0 +feedback 0 +address 0 +alwai 0 +isuppos 0 +construct 0 +keep 0 +check 0 +back 0 +excitingfeatur 0 +dukeunivers 0 +complet 0 +degre 0 +doubl 0 +major 0 +incomput 0 +amcurr 0 +emploi 0 +senior 0 +research 0 +scientist 0 +fortransworld 0 +numer 0 +small 0 +compani 0 +origin 0 +locat 0 +indurham 0 +headquart 0 +bermuda 0 +myapart 0 +also 0 +full 0 +student 0 +computersci 0 +depart 0 +ofwashington 0 +fall 0 +softwar 0 +seminarcs 0 +compil 0 +seminar 0 +newer 0 +stuff 0 +philosophi 0 +note 0 +misc 0 +patch 0 +fvwm 0 +place 0 +best 0 +show 0 +redhat 0 +desktop 0 +entri 0 +transworldnumer 0 +ieeenat 0 +program 0 +victori 0 +vertic 0 +winter 0 +issu 0 +magazin 0 +contain 0 +geneticalgorithm 0 +person 0 +rsum 0 +data 0 +date 0 +busi 0 +sampl 0 +drew 0 +bycomput 0 +simpl 0 +magic 0 +creat 0 +canterburi 0 +progress 0 +variou 0 +random 0 +pictur 0 +life 0 +definitelynot 0 +mani 0 +hobbi 0 +includ 0 +tenni 0 +ski 0 +hole 0 +volleybal 0 +juggl 0 +piano 0 +plai 0 +game 0 +rubik 0 +cube 0 +linux 0 +freewar 0 +unix 0 +music 0 +sarahmclachlan 0 +billi 0 +yahoo 0 +list 0 +parliamentari 0 +procedur 0 +ncaa 0 +basketbal 0 +lyco 0 +search 0 +commun 0 +daili 0 +univ 0 +unoffici 0 +microsoft 0 +corpor 0 +world 0 +wide 0 +server 0 +gatewai 0 +user 0 +group 0 +histor 0 +imag 0 +hotjava 0 +global 0 +network 0 +navig 0 +perl 0 +practic 0 +extract 0 +report 0 +virtual 0 +librari 0 +inter 0 +unif 0 +devic 0 +connect 0 +write 0 +html 0 +sgml 0 +seinfeld 0 +index 0 +friend 0 +sitcom 0 +materi 0 +base 0 +upon 0 +support 0 +fellowship 0 +opinion 0 +find 0 +conclus 0 +recommend 0 +express 0 +public 0 +author 0 +necessarili 0 +reflect 0 +view 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..1eb1d4a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,294 @@ +java 1 +applet 0 +linden 0 +gamelan 0 +page 0 +scienc 0 +webview 0 +cool 0 +graduat 0 +comput 0 +us 0 +tree 0 +model 0 +program 0 +univers 0 +softwar 0 +interest 0 +jar 0 +staff 0 +pick 0 +work 0 +movi 0 +qualiti 0 +greg 0 +third 0 +year 0 +polit 0 +doubl 0 +look 0 +posit 0 +develop 0 +link 0 +enter 0 +found 0 +travel 0 +elicit 0 +flight 0 +prefer 0 +wasrat 0 +andwa 0 +repositori 0 +also 0 +book 0 +sorri 0 +time 0 +code 0 +submit 0 +ucsd 0 +project 0 +life 0 +artifici 0 +neuralnetwork 0 +learn 0 +user 0 +iter 0 +prison 0 +imag 0 +headless 0 +horseman 0 +transpar 0 +reflect 0 +made 0 +avail 0 +washington 0 +home 0 +lindenmi 0 +love 0 +wifecorina 0 +current 0 +scienceher 0 +thecomput 0 +depart 0 +ofwashington 0 +slave 0 +awai 0 +toward 0 +lofti 0 +goal 0 +complet 0 +undergraduatedegre 0 +california 0 +diego 0 +anodd 0 +major 0 +go 0 +leav 0 +school 0 +decemb 0 +mactiv 0 +check 0 +resum 0 +allow 0 +orset 0 +addit 0 +famili 0 +altavistawebviewand 0 +metawebview 0 +instead 0 +keyword 0 +foraltavista 0 +metacrawl 0 +hit 0 +search 0 +servic 0 +return 0 +graph 0 +displai 0 +searchservic 0 +autom 0 +assist 0 +emul 0 +dialog 0 +travelag 0 +client 0 +gradual 0 +whileallow 0 +brows 0 +real 0 +data 0 +research 0 +prototyp 0 +quit 0 +function 0 +even 0 +earli 0 +stage 0 +highli 0 +rate 0 +andjar 0 +altavistawebview 0 +winner 0 +thejava 0 +contest 0 +publish 0 +walsh 0 +foundat 0 +meilleur 0 +best 0 +first 0 +linear 0 +ballet 0 +oop 0 +capabl 0 +browser 0 +sourc 0 +demonstr 0 +buffer 0 +avoid 0 +flicker 0 +thread 0 +give 0 +run 0 +certainli 0 +could 0 +cleaner 0 +though 0 +expect 0 +thought 0 +might 0 +enough 0 +standardsto 0 +impress 0 +mylgramm 0 +particl 0 +draw 0 +lgrammer 0 +much 0 +realist 0 +theparticletre 0 +recent 0 +start 0 +judg 0 +evalu 0 +thejar 0 +archiv 0 +summer 0 +dawn 0 +civil 0 +ademonstr 0 +applic 0 +show 0 +plan 0 +techniqu 0 +cansuccessfulli 0 +appli 0 +entertain 0 +myriadsoftwar 0 +professor 0 +belew 0 +filippo 0 +menzer 0 +latentenergi 0 +environ 0 +tool 0 +developingartifici 0 +experi 0 +evolutionari 0 +enviro 0 +paper 0 +hank 0 +lesh 0 +theautom 0 +assit 0 +majeski 0 +spitzer 0 +localizedinteract 0 +spatial 0 +constraint 0 +dilemma 0 +associ 0 +econom 0 +scientist 0 +krishnamoorthi 0 +paturi 0 +blume 0 +liden 0 +esen 0 +hardwaretradeoff 0 +boolean 0 +concept 0 +world 0 +congress 0 +recurr 0 +neural 0 +network 0 +sdilemma 0 +unpublish 0 +honor 0 +thesi 0 +adam 0 +carlson 0 +sujai 0 +parekh 0 +wrote 0 +funrai 0 +tracer 0 +ofth 0 +inc 0 +graphic 0 +closeup 0 +chess 0 +duel 0 +assembl 0 +requir 0 +sphere 0 +withreflect 0 +shadow 0 +distribut 0 +trace 0 +adaptivesampl 0 +mess 0 +thing 0 +pattern 0 +thespher 0 +causingth 0 +rai 0 +refract 0 +multipl 0 +surfaceand 0 +intern 0 +second 0 +anim 0 +call 0 +strike 0 +theanim 0 +written 0 +inventor 0 +manipul 0 +thed 0 +origin 0 +file 0 +alow 0 +quicktim 0 +mbquicktim 0 +doesn 0 +compress 0 +anyfurth 0 +least 0 +anyth 0 +resembl 0 +reason 0 +stuff 0 +dilbert 0 +cognit 0 +info 0 +occasion 0 +chateau 0 +guggenheim 0 +annex 0 +engin 0 +seattl 0 +glinden 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..88c2c28c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,27 @@ +grant 1 +brian 0 +kri 0 +home 0 +pagebrian 0 +awai 0 +homework 0 +relat 0 +infowork 0 +backgrounduwdynam 0 +compil 0 +groupuw 0 +depart 0 +computersci 0 +engineeringperson 0 +stuffperson 0 +backgroundmi 0 +daughter 0 +isismi 0 +trip 0 +singaporemi 0 +bookmarksmi 0 +public 0 +keylast 0 +updat 0 +octob 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..004af882 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,98 @@ +grove 1 +spend 1 +us 1 +dave 0 +washington 0 +offic 0 +plai 0 +cecil 0 +author 0 +trip 0 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +chateau 0 +sieg 0 +worki 0 +time 0 +pure 0 +object 0 +orient 0 +langaug 0 +vehicl 0 +integr 0 +research 0 +area 0 +languag 0 +design 0 +program 0 +environ 0 +optim 0 +compil 0 +also 0 +hord 0 +consult 0 +hang 0 +aroundth 0 +fring 0 +spinproject 0 +actual 0 +attempt 0 +graduat 0 +sometim 0 +soonish 0 +much 0 +less 0 +frequent 0 +paper 0 +wouldn 0 +complet 0 +without 0 +dilbertfix 0 +strip 0 +thathit 0 +littl 0 +close 0 +home 0 +current 0 +manag 0 +underacheiv 0 +fantasi 0 +footbal 0 +team 0 +summer 0 +hampshir 0 +work 0 +gui 0 +scoutreserv 0 +greaterlowel 0 +council 0 +pictur 0 +casunset 0 +taken 0 +right 0 +cabin 0 +kick 0 +anoth 0 +everi 0 +boi 0 +someth 0 +silli 0 +white 0 +water 0 +raft 0 +especi 0 +cool 0 +month 0 +toronto 0 +drove 0 +back 0 +toseattl 0 +took 0 +number 0 +detour 0 +along 0 +somehihglight 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..6080142a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,61 @@ +fpga 1 +system 0 +design 0 +asynchron 0 +architectur 0 +hauck 0 +washington 0 +current 0 +multi 0 +rapid 0 +prototyp 0 +circuit 0 +scott 0 +univers 0 +graduat 0 +work 0 +board 0 +level 0 +well 0 +triptych 0 +montag 0 +develop 0 +comput 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +year 0 +student 0 +thoughi 0 +also 0 +interest 0 +parallel 0 +june 0 +person 0 +biographi 0 +educ 0 +experi 0 +public 0 +curriculum 0 +vitaeresearch 0 +survei 0 +methodolog 0 +first 0 +improv 0 +densiti 0 +commerci 0 +springbok 0 +partit 0 +assign 0 +rout 0 +topolog 0 +gener 0 +chinook 0 +project 0 +hardwar 0 +softwar 0 +synthesi 0 +simul 0 +embed 0 +applic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..aae46406 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin 1 +hinshaw 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..ca46e190 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,66 @@ +macduff 1 +place 0 +week 0 +time 0 +also 0 +stuff 0 +ultrasound 0 +imag 0 +emma 0 +elspeth 0 +name 0 +subject 0 +chang 0 +without 0 +notic 0 +unborn 0 +daughter 0 +fromconcept 0 +ripe 0 +inmid 0 +decemb 0 +view 0 +profil 0 +ly 0 +back 0 +lookingup 0 +head 0 +right 0 +upper 0 +half 0 +torso 0 +theleft 0 +busi 0 +obsess 0 +impend 0 +fatherhood 0 +master 0 +thesi 0 +part 0 +chinook 0 +project 0 +pass 0 +silli 0 +possibl 0 +rememb 0 +myspam 0 +unfortun 0 +wast 0 +html 0 +brows 0 +around 0 +interest 0 +usingwebcrawl 0 +pointer 0 +neat 0 +frogstv 0 +nationpenn 0 +tellermus 0 +lyricsian 0 +washington 0 +dept 0 +comput 0 +scienc 0 +engin 0 +univ 0 +washingtonseattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..03e999ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,145 @@ +memori 1 +network 0 +page 0 +jamrozik 0 +system 0 +size 0 +washington 0 +global 0 +high 0 +speed 0 +reduc 0 +disk 0 +access 0 +transfer 0 +latenc 0 +research 0 +distribut 0 +object 0 +orient 0 +languag 0 +levi 0 +vernon 0 +karlin 0 +feelei 0 +voelker 0 +cach 0 +need 0 +node 0 +remot 0 +provid 0 +subpag 0 +environ 0 +pictur 0 +herv 0 +jamrozikherv 0 +postdoc 0 +univers 0 +sinc 0 +septemb 0 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +sieg 0 +hall 0 +phone 0 +mail 0 +main 0 +interest 0 +softwar 0 +engin 0 +current 0 +work 0 +memoi 0 +manag 0 +workstat 0 +cluster 0 +hank 0 +mari 0 +anna 0 +mike 0 +geoff 0 +greatli 0 +encourag 0 +virtual 0 +file 0 +therebi 0 +intens 0 +applic 0 +primari 0 +lightli 0 +load 0 +temporari 0 +back 0 +store 0 +introduc 0 +level 0 +hierarchi 0 +name 0 +li 0 +logic 0 +local 0 +fundament 0 +unit 0 +perform 0 +factor 0 +recent 0 +modern 0 +processor 0 +increas 0 +order 0 +coverag 0 +amort 0 +cost 0 +unfortun 0 +small 0 +trend 0 +thu 0 +odd 0 +studi 0 +mean 0 +us 0 +evan 0 +inproceed 0 +seventh 0 +confer 0 +architectur 0 +support 0 +program 0 +oper 0 +octob 0 +postscript 0 +thesi 0 +debug 0 +theuniversit 0 +joseph 0 +fourier 0 +grenobl 0 +involv 0 +guideproject 0 +laboratoir 0 +bull 0 +imag 0 +part 0 +imaginstitut 0 +extrem 0 +peopl 0 +area 0 +snot 0 +visit 0 +louvr 0 +galleri 0 +look 0 +map 0 +franc 0 +europ 0 +world 0 +somefamili 0 +somefriend 0 +eduv 0 +march 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..7733282b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,25 @@ +jason 1 +secoski 0 +washington 0 +weather 0 +home 0 +pagejason 0 +eduaddress 0 +comput 0 +scienc 0 +engin 0 +depart 0 +sieg 0 +hall 0 +cunivers 0 +boxseattl 0 +offic 0 +frequent 0 +us 0 +page 0 +projectseattl 0 +forecast 0 +channel 0 +secoskylast 0 +modifi 0 +thursdai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..272dabd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,82 @@ +applet 1 +jeremi 0 +baer 0 +educ 0 +stuff 0 +current 0 +comput 0 +washington 0 +softwar 0 +project 0 +baerjeremi 0 +never 0 +school 0 +interfer 0 +mark 0 +twain 0 +dream 0 +made 0 +william 0 +shakespearei 0 +graduat 0 +student 0 +scienceat 0 +univers 0 +interest 0 +includ 0 +artifici 0 +intellig 0 +human 0 +computerinteract 0 +multimedia 0 +engineeringtool 0 +gener 0 +music 0 +person 0 +creativ 0 +cool 0 +place 0 +spend 0 +signific 0 +time 0 +pierian 0 +spring 0 +softwareoregon 0 +museum 0 +scienc 0 +industri 0 +omsi 0 +pomona 0 +collegeher 0 +look 0 +mine 0 +eight 0 +puzzl 0 +java 0 +work 0 +progress 0 +experiment 0 +virtual 0 +travel 0 +copi 0 +effect 0 +demo 0 +question 0 +static 0 +layer 0 +analysi 0 +program 0 +feel 0 +stress 0 +realli 0 +silli 0 +littl 0 +macintosh 0 +thati 0 +wrote 0 +year 0 +download 0 +like 0 +metacrawl 0 +searchcopyright 0 +jbaer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..45b01f02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,60 @@ +home 1 +washington 1 +buhler 0 +jeremi 0 +browser 0 +control 0 +offic 0 +phone 0 +jbuhler 0 +pagejeremi 0 +pagedo 0 +attempt 0 +adjust 0 +transmiss 0 +statu 0 +first 0 +year 0 +student 0 +institut 0 +univers 0 +depart 0 +comput 0 +scienc 0 +engin 0 +sieg 0 +hall 0 +mail 0 +address 0 +finger 0 +tako 0 +import 0 +stufflectur 0 +note 0 +suffix 0 +tree 0 +postscript 0 +latex 0 +research 0 +come 0 +soon 0 +cours 0 +schedulemi 0 +public 0 +keycyb 0 +activ 0 +electron 0 +frontier 0 +foundat 0 +grinsrecommend 0 +readingmi 0 +undergradu 0 +alma 0 +mater 0 +rice 0 +universityquot 0 +quotesmi 0 +page 0 +return 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..b5fbdc4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,270 @@ +optim 1 +compil 0 +languag 0 +research 0 +program 0 +pass 0 +techniqu 0 +seattl 0 +also 0 +cecil 0 +orient 0 +design 0 +develop 0 +vortex 0 +permit 0 +would 0 +work 0 +object 0 +implement 0 +system 0 +interest 0 +wife 0 +jeff 0 +dean 0 +depart 0 +washington 0 +offic 0 +view 0 +postscript 0 +integr 0 +larg 0 +line 0 +applic 0 +effici 0 +explor 0 +whole 0 +analysi 0 +affect 0 +guid 0 +appli 0 +exampl 0 +recent 0 +author 0 +spent 0 +biplan 0 +ride 0 +flight 0 +comput 0 +scienc 0 +engin 0 +univers 0 +chateau 0 +sieg 0 +dang 0 +build 0 +weren 0 +futur 0 +plansi 0 +plan 0 +graduat 0 +summer 0 +join 0 +western 0 +laboratori 0 +sunni 0 +palo 0 +alto 0 +bought 0 +hous 0 +nearbi 0 +menlo 0 +park 0 +curriculum 0 +vita 0 +summari 0 +teach 0 +experi 0 +projectsi 0 +primarili 0 +project 0 +pure 0 +langaug 0 +us 0 +avehicl 0 +area 0 +environ 0 +weintend 0 +scale 0 +real 0 +world 0 +andto 0 +keep 0 +true 0 +goal 0 +vortexcompil 0 +current 0 +codein 0 +much 0 +group 0 +involv 0 +track 0 +bug 0 +hang 0 +spinproject 0 +meet 0 +spin 0 +extens 0 +oper 0 +systemmicrokernel 0 +support 0 +dynam 0 +adapt 0 +interfacesand 0 +direct 0 +control 0 +stillmaintain 0 +inter 0 +isol 0 +spring 0 +quarter 0 +organ 0 +seminar 0 +concern 0 +ofobject 0 +particular 0 +usedto 0 +improv 0 +perform 0 +increment 0 +andhow 0 +interact 0 +especiallyprofil 0 +howwhol 0 +assumedthat 0 +access 0 +entir 0 +manycompromis 0 +made 0 +exist 0 +becom 0 +unnecessari 0 +lesson 0 +learn 0 +wholeprogram 0 +toward 0 +programminglanguag 0 +flexibl 0 +base 0 +underlyingimplement 0 +valid 0 +three 0 +principaldesign 0 +forobject 0 +defin 0 +independentintermedi 0 +ishigh 0 +enough 0 +level 0 +featur 0 +messagesend 0 +closur 0 +creation 0 +contain 0 +uniqu 0 +wayof 0 +compos 0 +parallel 0 +obtain 0 +better 0 +result 0 +repeatedli 0 +run 0 +passessepar 0 +intraprocedur 0 +classanalysi 0 +profil 0 +receiv 0 +class 0 +predict 0 +inlin 0 +aliasanalysi 0 +split 0 +singl 0 +combin 0 +part 0 +wai 0 +structuringoptim 0 +kind 0 +composit 0 +stillallow 0 +independ 0 +eachoth 0 +nice 0 +framework 0 +specifi 0 +iter 0 +data 0 +flowanalys 0 +client 0 +withrel 0 +littl 0 +effort 0 +dead 0 +assignmentelimin 0 +approxim 0 +code 0 +publicationssom 0 +paper 0 +personali 0 +love 0 +spici 0 +food 0 +mild 0 +four 0 +letter 0 +word 0 +coke 0 +probabl 0 +kick 0 +caffein 0 +habit 0 +enjoy 0 +moment 0 +heidi 0 +daughter 0 +victoria 0 +realli 0 +like 0 +somedai 0 +honeymoon 0 +kauai 0 +hurrican 0 +iniki 0 +galvin 0 +fly 0 +guess 0 +never 0 +anyth 0 +anymor 0 +took 0 +consist 0 +minut 0 +around 0 +downtown 0 +puget 0 +sound 0 +travel 0 +model 0 +feel 0 +dare 0 +sadli 0 +insur 0 +coverag 0 +doesn 0 +passeng 0 +walk 0 +wing 0 +back 0 +enjoi 0 +fantast 0 +even 0 +highli 0 +recommend 0 +look 0 +someth 0 +number 0 +rather 0 +lengthi 0 +hotlist 0 +jdean 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..d9123b41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,176 @@ +softwar 1 +analysi 0 +deviat 0 +requir 0 +rsml 0 +comput 0 +system 0 +rees 0 +safeti 0 +specif 0 +critic 0 +scienc 0 +control 0 +state 0 +develop 0 +languag 0 +tool 0 +semant 0 +damon 0 +doctor 0 +research 0 +jdrees 0 +washington 0 +place 0 +applic 0 +advantag 0 +public 0 +exampl 0 +oper 0 +project 0 +especi 0 +base 0 +us 0 +hazard 0 +procedur 0 +hazop 0 +studi 0 +dissert 0 +make 0 +avail 0 +kurt 0 +partridg 0 +univers 0 +postscript 0 +waxahachi 0 +leveson 0 +heimdahl 0 +hildreth 0 +process 0 +ieee 0 +transact 0 +industri 0 +home 0 +pagejon 0 +reesepost 0 +groupdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +chateau 0 +guggenheim 0 +annex 0 +phone 0 +mail 0 +interest 0 +problem 0 +awar 0 +devic 0 +structur 0 +caus 0 +catastroph 0 +fail 0 +hardwar 0 +becom 0 +less 0 +expens 0 +expect 0 +wider 0 +rang 0 +convent 0 +technolog 0 +flexibl 0 +come 0 +price 0 +behavior 0 +complex 0 +unpredict 0 +perhap 0 +best 0 +three 0 +mile 0 +island 0 +incid 0 +great 0 +difficulti 0 +diagnos 0 +emerg 0 +stage 0 +success 0 +respect 0 +reason 0 +colleagu 0 +concentr 0 +commun 0 +call 0 +machin 0 +valid 0 +specifi 0 +tca 0 +avion 0 +thesi 0 +invent 0 +signific 0 +concept 0 +borrow 0 +henc 0 +name 0 +link 0 +html 0 +transcript 0 +current 0 +write 0 +confer 0 +articl 0 +summar 0 +group 0 +possibl 0 +dynam 0 +displai 0 +search 0 +siang 0 +integr 0 +alpha 0 +version 0 +publicli 0 +sean 0 +sandi 0 +draft 0 +document 0 +includ 0 +discuss 0 +variant 0 +improv 0 +academ 0 +histori 0 +inform 0 +california 0 +irvin 0 +linguist 0 +rice 0 +high 0 +school 0 +nanci 0 +mat 0 +holli 0 +engin 0 +septemb 0 +steven 0 +dolin 0 +curv 0 +interpret 0 +diagnost 0 +techniqu 0 +januari 0 +februari 0 +ortega 0 +experi 0 +statechart 0 +sixth 0 +intern 0 +workshop 0 +design 0 +como 0 +itali 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..aca90a1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,104 @@ +jack 1 +compil 0 +multithread 0 +simultan 0 +page 0 +level 0 +parallel 0 +instruct 0 +postscript 0 +susan 0 +egger 0 +issu 0 +processor 0 +schedul 0 +washington 0 +abstract 0 +henri 0 +levi 0 +tullsen 0 +proceed 0 +architectur 0 +examin 0 +pictur 0 +home 0 +comput 0 +current 0 +research 0 +joel 0 +emer 0 +rebecca 0 +stamm 0 +anddean 0 +implement 0 +balanc 0 +optim 0 +static 0 +dynam 0 +superscalar 0 +written 0 +report 0 +lojack 0 +lojlo 0 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +construct 0 +paper 0 +convert 0 +thread 0 +submit 0 +public 0 +juli 0 +exploit 0 +choic 0 +fetch 0 +dean 0 +annual 0 +intern 0 +symposium 0 +philadelphia 0 +first 0 +suif 0 +workshop 0 +stanford 0 +januari 0 +improv 0 +increas 0 +sigplan 0 +confer 0 +program 0 +languag 0 +design 0 +jolla 0 +california 0 +june 0 +compar 0 +gener 0 +interact 0 +loph 0 +qualifi 0 +work 0 +support 0 +interest 0 +also 0 +includ 0 +vliw 0 +well 0 +particular 0 +investig 0 +person 0 +find 0 +franklin 0 +eseattl 0 +orsieg 0 +hall 0 +room 0 +phone 0 +coupl 0 +recent 0 +paintbal 0 +experi 0 +yahoojlo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..f609577b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,45 @@ +user 1 +interfac 1 +home 0 +page 0 +washington 0 +research 0 +project 0 +sherman 0 +shermanjoebob 0 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +usami 0 +interest 0 +designinform 0 +navig 0 +visual 0 +activ 0 +inform 0 +local 0 +survei 0 +useclass 0 +hcreat 0 +impress 0 +pagequ 0 +time 0 +sarahsoftballstuff 0 +might 0 +want 0 +automat 0 +suggest 0 +link 0 +relat 0 +topic 0 +directori 0 +us 0 +pagesif 0 +browser 0 +support 0 +send 0 +mail 0 +tojoebob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..179750cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,61 @@ +joshua 1 +seim 1 +josh 1 +home 0 +page 0 +current 0 +comput 0 +scienc 0 +sever 0 +abstract 0 +biolog 0 +base 0 +neural 0 +network 0 +system 0 +test 0 +theunivers 0 +washington 0 +depart 0 +begunin 0 +lockean 0 +blank 0 +slate 0 +learn 0 +emul 0 +observedbehavior 0 +successfulli 0 +accomplish 0 +task 0 +graduatingfrom 0 +colleg 0 +travel 0 +volit 0 +recent 0 +start 0 +demonstr 0 +potenti 0 +independ 0 +creativ 0 +thought 0 +taskw 0 +present 0 +earn 0 +expect 0 +take 0 +year 0 +document 0 +provid 0 +overviewof 0 +cognit 0 +ambulatori 0 +achiev 0 +organ 0 +person 0 +academichierarchi 0 +addition 0 +futur 0 +work 0 +discuss 0 +within 0 +context 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..0fa025ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,4 @@ +jovan 1 +home 1 +page 1 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..b52b5a69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,67 @@ +joanna 1 +page 1 +academ 0 +main 0 +interest 0 +comput 0 +graphic 0 +alma 0 +biologi 0 +powerjoanna 0 +pagehi 0 +cat 0 +interestsmi 0 +scienc 0 +grad 0 +school 0 +uwneat 0 +stuff 0 +matercool 0 +link 0 +jonathan 0 +shadegraph 0 +research 0 +uwduoton 0 +reproductionmi 0 +especi 0 +genet 0 +molecular 0 +matermost 0 +recent 0 +site 0 +gain 0 +employmentpubl 0 +power 0 +brad 0 +west 0 +eric 0 +stollnitz 0 +david 0 +salesin 0 +reproduc 0 +color 0 +imag 0 +duoton 0 +proceed 0 +siggraph 0 +york 0 +real 0 +lifepast 0 +homesdiversionsgend 0 +issuesstatu 0 +women 0 +sciencenow 0 +home 0 +pagefeminist 0 +major 0 +onlineultim 0 +frisbeefun 0 +stufffroggi 0 +sean 0 +quotesbrad 0 +comic 0 +musicevan 0 +jokes 0 +pagesmi 0 +herojpow 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..e08d1b6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,16 @@ +jonathan 1 +shake 1 +washington 1 +sieg 0 +hall 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +ahoi 0 +homepag 0 +finderresumlinkslast 0 +updat 0 +august 0 +jshake 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..125d5ff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan 1 +alemanyjuan 1 +alemani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..ee820a40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,116 @@ +comput 1 +work 0 +scienc 0 +washington 0 +data 0 +surfac 0 +taught 0 +slide 0 +kari 0 +pulli 0 +home 0 +antero 0 +year 0 +univers 0 +graphic 0 +vision 0 +research 0 +depart 0 +project 0 +report 0 +technic 0 +engin 0 +subdivis 0 +find 0 +class 0 +theclass 0 +seattl 0 +pagekari 0 +pullii 0 +third 0 +graduat 0 +student 0 +engineeringdepart 0 +ofwashington 0 +interest 0 +andmathemat 0 +try 0 +combin 0 +aspect 0 +thesedisciplin 0 +professor 0 +closest 0 +tonyderos 0 +actual 0 +left 0 +uwfor 0 +pixar 0 +lindashapiro 0 +addition 0 +werner 0 +stuetzl 0 +andjohn 0 +mcdonald 0 +statist 0 +duchamp 0 +mathemat 0 +andhugu 0 +hopp 0 +rick 0 +szeliski 0 +microsoft 0 +qual 0 +tribor 0 +triplet 0 +base 0 +object 0 +recognitionsystem 0 +linda 0 +universityof 0 +current 0 +surfacereconstruct 0 +rang 0 +multipl 0 +baselin 0 +camerasystem 0 +obtain 0 +waveletanalysi 0 +geometri 0 +reflect 0 +function 0 +pass 0 +gener 0 +examin 0 +topic 0 +rigidregistr 0 +click 0 +architecturesystem 0 +susanegg 0 +distribut 0 +brianbershad 0 +imag 0 +understand 0 +steven 0 +tanimoto 0 +present 0 +sketch 0 +siggraph 0 +getto 0 +remov 0 +wavelet 0 +herear 0 +speaker 0 +note 0 +eacutesum 0 +eacut 0 +sieg 0 +hall 0 +email 0 +kapu 0 +union 0 +folk 0 +takavainionti 0 +oulu 0 +finland 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..d300c0d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,17 @@ +washington 1 +univers 0 +anna 0 +karlinanna 0 +rochel 0 +karlinassoci 0 +professor 0 +sincejuli 0 +work 0 +comput 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +home 0 +page 0 +paperskarlin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..372507b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,10 @@ +page 1 +home 0 +yeunghom 0 +yeungperson 0 +infomi 0 +picturemi 0 +researchtelnet 0 +machinessend 0 +email 0 +back 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..bfacec21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,83 @@ +kurt 1 +partridg 0 +softwar 0 +comput 0 +washington 0 +specif 0 +safeti 0 +postscript 0 +graduat 0 +scienc 0 +univers 0 +interact 0 +environ 0 +poster 0 +academ 0 +inform 0 +student 0 +depart 0 +interest 0 +includ 0 +engin 0 +usabl 0 +readabl 0 +applic 0 +formal 0 +method 0 +page 0 +describ 0 +other 0 +work 0 +area 0 +also 0 +dabbl 0 +user 0 +interfac 0 +design 0 +human 0 +java 0 +program 0 +recent 0 +public 0 +bddtcl 0 +visual 0 +manipul 0 +binari 0 +decisiondiagram 0 +html 0 +preview 0 +nanci 0 +leveson 0 +bauer 0 +mat 0 +heimdahl 0 +wayn 0 +ohlrich 0 +vivek 0 +ratan 0 +rees 0 +critic 0 +nasa 0 +confer 0 +qualiti 0 +background 0 +start 0 +school 0 +complet 0 +berkelei 0 +live 0 +love 0 +suburban 0 +life 0 +thousand 0 +oak 0 +parent 0 +sister 0 +name 0 +oti 0 +right 0 +humor 0 +corner 0 +seattl 0 +voic 0 +kepart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..24b2958e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,50 @@ +keith 1 +golden 1 +washington 1 +home 0 +page 0 +questa 0 +pagina 0 +anch 0 +italiano 0 +researchsoftbotsplanningkrselect 0 +publicationscurriculum 0 +vita 0 +also 0 +inpostscriptrandom 0 +hackingwordbot 0 +collabor 0 +dictionari 0 +like 0 +bicycl 0 +tour 0 +languag 0 +paint 0 +photographi 0 +natur 0 +coffe 0 +godless 0 +pinko 0 +stuff 0 +dislik 0 +suit 0 +lawyer 0 +car 0 +friend 0 +ellenmarcruben 0 +laurennickrich 0 +joannavivek 0 +advisor 0 +oren 0 +etzioni 0 +weld 0 +keithgolden 0 +depart 0 +ofcomput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +kgolden 0 +complet 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..44ce0829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,143 @@ +univers 1 +softwar 1 +kingsum 0 +hong 0 +chow 0 +kong 0 +washington 0 +evolut 0 +page 0 +engin 0 +david 0 +notkin 0 +asynchron 0 +pcct 0 +movi 0 +scienc 0 +seattl 0 +research 0 +confer 0 +resum 0 +line 0 +respons 0 +technic 0 +workshop 0 +program 0 +transform 0 +home 0 +onlin 0 +quot 0 +educomput 0 +depart 0 +usathi 0 +inform 0 +highwai 0 +alwai 0 +construct 0 +tabl 0 +content 0 +person 0 +upcom 0 +current 0 +schedul 0 +bridg 0 +glossari 0 +suggest 0 +feedbackresearchmi 0 +advisor 0 +develop 0 +toolspap 0 +qualiti 0 +manag 0 +driven 0 +readi 0 +semi 0 +automat 0 +updat 0 +applic 0 +librari 0 +chang 0 +report 0 +revis 0 +version 0 +appear 0 +icsm 0 +asia 0 +pacif 0 +march 0 +mainten 0 +proceed 0 +ics 0 +william 0 +griswold 0 +editor 0 +intern 0 +april 0 +us 0 +site 0 +sorcererpcct 0 +terrenc 0 +parr 0 +note 0 +newbiesresumepleasedrop 0 +mailto 0 +specifi 0 +text 0 +postscript 0 +format 0 +kongchines 0 +technolog 0 +polytechn 0 +citi 0 +kongsingapor 0 +sitessingapor 0 +world 0 +wide 0 +server 0 +alumnu 0 +websom 0 +campu 0 +friendstom 0 +liew 0 +fook 0 +wang 0 +jiang 0 +weidongu 0 +relatedunivers 0 +style 0 +polici 0 +manual 0 +these 0 +dissert 0 +graduat 0 +school 0 +webserv 0 +book 0 +storeinvestmentsfre 0 +minut 0 +delai 0 +watch 0 +market 0 +data 0 +experiment 0 +mutual 0 +fund 0 +chart 0 +invest 0 +center 0 +stock 0 +commod 0 +analysismisc 0 +read 0 +chines 0 +list 0 +thoma 0 +china 0 +new 0 +servic 0 +welcom 0 +visit 0 +sinc 0 +last 0 +modifi 0 +date 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..628a616c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,90 @@ +univers 1 +seattl 1 +home 0 +kevin 0 +washington 0 +comput 0 +engin 0 +work 0 +chaotic 0 +rout 0 +electr 0 +pacif 0 +time 0 +bold 0 +boldingkwb 0 +scienc 0 +depart 0 +juvenil 0 +squirt 0 +wander 0 +search 0 +suitabl 0 +rock 0 +hunk 0 +coral 0 +cling 0 +make 0 +life 0 +task 0 +rudimentari 0 +nervou 0 +system 0 +find 0 +spot 0 +take 0 +root 0 +doesn 0 +need 0 +brain 0 +eat 0 +rather 0 +like 0 +get 0 +tenur 0 +dennett 0 +conscious 0 +explain 0 +research 0 +current 0 +build 0 +high 0 +speed 0 +latencylan 0 +router 0 +previou 0 +researchha 0 +chaoticrout 0 +form 0 +minim 0 +adapt 0 +formass 0 +parallel 0 +multicomput 0 +profession 0 +assist 0 +professor 0 +also 0 +part 0 +researchassoci 0 +ofwashington 0 +signific 0 +paper 0 +written 0 +archiv 0 +ofth 0 +group 0 +spend 0 +teach 0 +engineeringat 0 +person 0 +photo 0 +took 0 +comethyakutak 0 +moustach 0 +real 0 +case 0 +want 0 +visit 0 +anoth 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..cc31b296 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,34 @@ +quarter 1 +comput 0 +washington 0 +scienc 0 +ladner 0 +introduct 0 +spring 0 +richard 0 +ladnerrichard 0 +ladnerprofessor 0 +depart 0 +engin 0 +univers 0 +seattl 0 +mail 0 +phone 0 +offic 0 +sieg 0 +hall 0 +room 0 +person 0 +short 0 +biographyresearch 0 +public 0 +studentsteachingcomput 0 +program 0 +fall 0 +commun 0 +network 0 +formal 0 +model 0 +winter 0 +data 0 +structur 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..7617e44e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,112 @@ +larri 1 +washington 0 +comput 0 +univers 0 +laboratori 0 +chemistri 0 +work 0 +integr 0 +system 0 +fpga 0 +mcmurchi 0 +home 0 +page 0 +scienc 0 +area 0 +function 0 +staff 0 +design 0 +develop 0 +mactest 0 +chip 0 +mcmurchiedepart 0 +engin 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +current 0 +research 0 +director 0 +integratedsystem 0 +western 0 +hework 0 +quantum 0 +graduat 0 +studi 0 +primari 0 +focu 0 +number 0 +evalu 0 +class 0 +ofintegr 0 +gaussian 0 +later 0 +appli 0 +theconstruct 0 +larg 0 +spars 0 +hamiltonian 0 +matric 0 +coauthorof 0 +comprehens 0 +packag 0 +program 0 +meld 0 +us 0 +abinitio 0 +calcul 0 +small 0 +molecul 0 +sinc 0 +join 0 +depart 0 +andengin 0 +supervis 0 +technic 0 +ofth 0 +coauthor 0 +wirec 0 +aschemat 0 +captur 0 +allow 0 +code 0 +withschemat 0 +symbol 0 +creat 0 +concis 0 +parameteriz 0 +representationof 0 +also 0 +involv 0 +andcommerci 0 +softwar 0 +hardwareenviron 0 +test 0 +board 0 +andsubsystem 0 +recent 0 +andha 0 +gener 0 +purpos 0 +perform 0 +driven 0 +router 0 +northwest 0 +cost 0 +vlsi 0 +tester 0 +triptych 0 +high 0 +densiti 0 +architectur 0 +public 0 +journal 0 +articl 0 +upcom 0 +confer 0 +return 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..40174357 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,207 @@ +softwar 1 +comput 0 +system 0 +leveson 0 +safeti 0 +model 0 +professor 0 +research 0 +specif 0 +aircraft 0 +analysi 0 +design 0 +engin 0 +human 0 +nanci 0 +recent 0 +project 0 +member 0 +inform 0 +paper 0 +avail 0 +machin 0 +languag 0 +gener 0 +interfac 0 +scienc 0 +washington 0 +california 0 +year 0 +area 0 +build 0 +real 0 +life 0 +topic 0 +student 0 +requir 0 +tca 0 +collis 0 +avoid 0 +anyth 0 +current 0 +work 0 +specifi 0 +includ 0 +safe 0 +fault 0 +nation 0 +council 0 +space 0 +appli 0 +rsml 0 +deriv 0 +finger 0 +home 0 +page 0 +levesondepart 0 +engineeringunivers 0 +washingtonbox 0 +express 0 +mail 0 +sieg 0 +hall 0 +seattl 0 +join 0 +faculti 0 +come 0 +search 0 +rain 0 +receiv 0 +degre 0 +mathand 0 +ucla 0 +spent 0 +form 0 +univers 0 +irvin 0 +start 0 +concern 0 +problem 0 +time 0 +failur 0 +result 0 +loss 0 +properti 0 +advantag 0 +nobodi 0 +question 0 +goal 0 +except 0 +misanthrop 0 +matter 0 +anywai 0 +produc 0 +aform 0 +commerci 0 +airspac 0 +lesson 0 +learn 0 +never 0 +like 0 +seem 0 +pleas 0 +though 0 +adopt 0 +theiroffici 0 +behavior 0 +claim 0 +thatyou 0 +read 0 +fact 0 +take 0 +train 0 +late 0 +safetyresearch 0 +also 0 +autom 0 +highwai 0 +automobil 0 +variou 0 +aerospac 0 +subtop 0 +toler 0 +verif 0 +valid 0 +editor 0 +chief 0 +ieee 0 +transact 0 +softwareengin 0 +elect 0 +board 0 +director 0 +computingresearch 0 +associ 0 +commissionon 0 +technic 0 +committe 0 +public 0 +polici 0 +chair 0 +studi 0 +evalu 0 +shuttl 0 +process 0 +levesoni 0 +fellow 0 +award 0 +aiaa 0 +systemsaward 0 +contribut 0 +aeronaut 0 +technolog 0 +andscienc 0 +develop 0 +field 0 +promotingrespons 0 +practic 0 +propertyar 0 +stake 0 +book 0 +safewar 0 +addison 0 +weslei 0 +publish 0 +list 0 +isalso 0 +copi 0 +favorit 0 +actual 0 +keynoteaddress 0 +conf 0 +melbourn 0 +titl 0 +high 0 +pressur 0 +steam 0 +click 0 +qual 0 +follow 0 +hazardanalysi 0 +techniqu 0 +writtenin 0 +state 0 +style 0 +call 0 +determin 0 +wai 0 +tree 0 +analys 0 +newrequir 0 +principl 0 +hazard 0 +control 0 +cockpit 0 +analyz 0 +accid 0 +report 0 +involv 0 +mode 0 +awar 0 +problemsand 0 +issu 0 +interact 0 +citi 0 +airport 0 +perhap 0 +contact 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..fb933568 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,202 @@ +system 1 +levi 0 +oper 0 +comput 0 +architectur 0 +henri 0 +proc 0 +symposium 0 +hank 0 +distribut 0 +languag 0 +feelei 0 +support 0 +research 0 +multithread 0 +principl 0 +washington 0 +workstat 0 +student 0 +octob 0 +michael 0 +thekkath 0 +parallel 0 +simultan 0 +environ 0 +recent 0 +singl 0 +address 0 +space 0 +paper 0 +chair 0 +program 0 +dessert 0 +global 0 +memori 0 +karlin 0 +implement 0 +dean 0 +tullsen 0 +susan 0 +egger 0 +intern 0 +jeffrei 0 +chase 0 +lazowska 0 +novemb 0 +conf 0 +arch 0 +prog 0 +asplo 0 +home 0 +page 0 +professor 0 +join 0 +faculti 0 +current 0 +project 0 +focu 0 +particularli 0 +object 0 +base 0 +projectcal 0 +opal 0 +deal 0 +theetch 0 +projecti 0 +produc 0 +tool 0 +perform 0 +instrument 0 +optim 0 +binari 0 +execut 0 +author 0 +book 0 +numer 0 +includ 0 +outstand 0 +select 0 +four 0 +consecutiveacm 0 +symposia 0 +former 0 +sigop 0 +special 0 +interest 0 +group 0 +onoper 0 +theth 0 +tobe 0 +held 0 +hold 0 +carnegi 0 +mellon 0 +universityand 0 +univers 0 +come 0 +consult 0 +engin 0 +digit 0 +equip 0 +corpor 0 +work 0 +span 0 +rang 0 +fellow 0 +associ 0 +machineryand 0 +recipi 0 +fulbright 0 +scholar 0 +award 0 +eleven 0 +master 0 +nine 0 +survivedlevi 0 +supervis 0 +haveal 0 +escap 0 +academ 0 +posit 0 +major 0 +lab 0 +glu 0 +usual 0 +befound 0 +ski 0 +bike 0 +plai 0 +tenni 0 +help 0 +lead 0 +thedepart 0 +infam 0 +softbal 0 +team 0 +smile 0 +potato 0 +death 0 +sampl 0 +seattl 0 +mani 0 +parlor 0 +publicationsreduc 0 +network 0 +latenc 0 +us 0 +subpag 0 +jamrozik 0 +voelker 0 +evan 0 +vernon 0 +inproceed 0 +seventh 0 +confer 0 +postscript 0 +manag 0 +cluster 0 +william 0 +morgan 0 +freder 0 +pighin 0 +anna 0 +chandramohan 0 +appear 0 +decemb 0 +maxim 0 +chip 0 +annual 0 +june 0 +exploit 0 +choic 0 +instruct 0 +fetch 0 +issu 0 +implementablesimultan 0 +processor 0 +joen 0 +emer 0 +jack 0 +rebecca 0 +stamm 0 +share 0 +protect 0 +edwardd 0 +transact 0 +integr 0 +coher 0 +recover 0 +vivek 0 +narasayya 0 +first 0 +design 0 +hardwar 0 +softwar 0 +effici 0 +except 0 +handl 0 +separ 0 +data 0 +control 0 +transfer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..152ec157 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,114 @@ +opal 1 +address 0 +structur 0 +share 0 +singl 0 +space 0 +oper 0 +system 0 +protect 0 +need 0 +complex 0 +cooper 0 +program 0 +larg 0 +data 0 +time 0 +alpha 0 +page 0 +right 0 +access 0 +inform 0 +list 0 +relat 0 +projectop 0 +project 0 +explor 0 +tunedto 0 +applic 0 +numberof 0 +manipul 0 +persist 0 +databaseof 0 +object 0 +code 0 +exist 0 +huge 0 +enhanc 0 +andcooper 0 +uniqu 0 +interpret 0 +thu 0 +pointer 0 +base 0 +directlycommun 0 +bestor 0 +directli 0 +secondari 0 +storag 0 +without 0 +translat 0 +simplifi 0 +avail 0 +addressspac 0 +provid 0 +mip 0 +risc 0 +independ 0 +thread 0 +execut 0 +within 0 +domainthat 0 +defin 0 +virtual 0 +easili 0 +transmit 0 +oneprocess 0 +anoth 0 +result 0 +much 0 +flexibl 0 +protectionstructur 0 +permit 0 +differ 0 +dynam 0 +chang 0 +option 0 +depend 0 +trust 0 +relationshipbetween 0 +parti 0 +believ 0 +organ 0 +canimprov 0 +perform 0 +cooperatingappl 0 +prototyp 0 +built 0 +platform 0 +ofth 0 +mach 0 +sourc 0 +paper 0 +faculti 0 +member 0 +hank 0 +levi 0 +lazowska 0 +jeff 0 +chase 0 +duke 0 +univers 0 +current 0 +graduat 0 +student 0 +mike 0 +feelei 0 +ashutosh 0 +tiwari 0 +vivek 0 +narasayya 0 +dylan 0 +mcname 0 +mail 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..7b800e70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,139 @@ +tutori 1 +object 0 +oopsla 0 +constraint 0 +orient 0 +languag 0 +program 0 +propos 0 +washington 0 +engin 0 +imper 0 +lopez 0 +bjorn 0 +freeman 0 +benson 0 +alan 0 +born 0 +confer 0 +accept 0 +comput 0 +scienc 0 +univers 0 +implement 0 +kaleidoscop 0 +advanc 0 +also 0 +inproceed 0 +chair 0 +technolog 0 +topic 0 +encourag 0 +submiss 0 +electron 0 +lopezgu 0 +lopezlopez 0 +school 0 +sieg 0 +hall 0 +depart 0 +seattl 0 +home 0 +student 0 +dissertationresearch 0 +design 0 +curriculum 0 +vita 0 +publicationsgu 0 +brian 0 +mayoh 0 +tougu 0 +jann 0 +penjam 0 +editor 0 +constraintprogram 0 +springer 0 +verlag 0 +nato 0 +studi 0 +instituteseri 0 +seri 0 +system 0 +publisheda 0 +technic 0 +report 0 +ident 0 +european 0 +bologna 0 +itali 0 +juli 0 +virtual 0 +machin 0 +programmingsystem 0 +applic 0 +portland 0 +oregon 0 +octob 0 +tutorialsi 0 +upcom 0 +conferencein 0 +jose 0 +california 0 +peopl 0 +andsoftwar 0 +develop 0 +meet 0 +speak 0 +well 0 +known 0 +breadth 0 +depth 0 +high 0 +qualiti 0 +itsextens 0 +previou 0 +year 0 +tutorialshav 0 +cover 0 +aspect 0 +introductorysurvei 0 +industri 0 +softwar 0 +practic 0 +lead 0 +edg 0 +academicresearch 0 +respons 0 +request 0 +past 0 +attende 0 +weespeci 0 +issu 0 +anyon 0 +consid 0 +submit 0 +requestguidelin 0 +theoopsla 0 +hotlin 0 +mail 0 +enthusiast 0 +proposalswithout 0 +email 0 +address 0 +march 0 +notif 0 +withcamera 0 +readi 0 +note 0 +august 0 +interest 0 +link 0 +green 0 +direct 0 +jimi 0 +hendrix 0 +grave 0 +star 0 +war 0 +collector 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..b7a9642d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,40 @@ +omid 1 +home 1 +madani 1 +washington 1 +depart 1 +page 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +chateau 0 +suit 0 +bhello 0 +curiou 0 +browser 0 +welcom 0 +fourth 0 +year 0 +graduat 0 +student 0 +enjoytheori 0 +also 0 +like 0 +keep 0 +touch 0 +areasinclud 0 +graphic 0 +life 0 +work 0 +academ 0 +want 0 +look 0 +islamicarchitectur 0 +isfahan 0 +best 0 +nomine 0 +citi 0 +countri 0 +iran 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..64722c02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,18 @@ +mike 1 +perkowitz 0 +page 0 +perkowitznewsflash 0 +goe 0 +blond 0 +area 0 +research 0 +academia 0 +music 0 +creativ 0 +randomfavorit 0 +sheba 0 +voyeur 0 +written 0 +grooveneedl 0 +espressoresumemik 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..d428ec7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,64 @@ +washington 1 +langheinrich 0 +bielefeld 0 +univers 0 +depart 0 +comput 0 +marclang 0 +homepagemarc 0 +email 0 +scienc 0 +phone 0 +marc 0 +langheinrichuniversitt 0 +washingtontechnisch 0 +fakultt 0 +scienceemail 0 +imlangh 0 +techfak 0 +eduabout 0 +myselfi 0 +spent 0 +last 0 +year 0 +theunivers 0 +visit 0 +graduat 0 +student 0 +thefulbright 0 +program 0 +check 0 +follow 0 +link 0 +depthinform 0 +resum 0 +project 0 +short 0 +biopost 0 +addressa 0 +septemb 0 +back 0 +germani 0 +finish 0 +mastersat 0 +pleas 0 +contact 0 +german 0 +address 0 +homeschoolgermanyringstra 0 +maintalphon 0 +paulusplatz 0 +bielefeldphon 0 +woodlawn 0 +seattl 0 +sieg 0 +hall 0 +browser 0 +support 0 +tabl 0 +access 0 +data 0 +list 0 +formatmarc 0 +http 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..ab94abaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,109 @@ +visual 1 +marla 0 +baker 0 +comput 0 +softwar 0 +program 0 +languag 0 +work 0 +eick 0 +washington 0 +stephen 0 +system 0 +engin 0 +user 0 +interfac 0 +interact 0 +educ 0 +collabor 0 +current 0 +object 0 +wai 0 +journal 0 +burnett 0 +larg 0 +home 0 +chief 0 +editor 0 +depart 0 +scienc 0 +univers 0 +seattl 0 +partner 0 +crime 0 +bentlei 0 +academ 0 +interestsgraph 0 +human 0 +support 0 +learn 0 +cscl 0 +graphic 0 +techniqu 0 +stevetanimoto 0 +lauren 0 +bricker 0 +coimag 0 +project 0 +devleop 0 +activ 0 +order 0 +explor 0 +cooper 0 +contol 0 +goal 0 +investig 0 +differ 0 +multipl 0 +cansimultan 0 +share 0 +manipul 0 +given 0 +assess 0 +also 0 +part 0 +time 0 +packard 0 +bell 0 +compani 0 +resum 0 +publicationsbak 0 +space 0 +fill 0 +june 0 +bohu 0 +carlson 0 +yang 0 +scale 0 +ieee 0 +special 0 +issu 0 +march 0 +margaret 0 +classif 0 +septemb 0 +tool 0 +proceed 0 +intern 0 +confer 0 +sorento 0 +itali 0 +method 0 +apparatu 0 +displai 0 +hierarch 0 +inform 0 +patent 0 +applic 0 +submit 0 +octob 0 +tutori 0 +geometr 0 +transform 0 +imag 0 +metip 0 +environ 0 +check 0 +page 0 +offic 0 +sieg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..b3244670 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,87 @@ +compil 1 +philipos 0 +dynam 0 +runtim 0 +matthai 0 +work 0 +chamber 0 +home 0 +page 0 +code 0 +like 0 +washington 0 +seattl 0 +ausland 0 +egger 0 +support 0 +system 0 +stuff 0 +project 0 +compileri 0 +beast 0 +gener 0 +optim 0 +shortterm 0 +interest 0 +figur 0 +produc 0 +good 0 +modern 0 +processor 0 +architectur 0 +applic 0 +side 0 +think 0 +interpret 0 +basedsystem 0 +real 0 +time 0 +constraint 0 +java 0 +browser 0 +canbenefit 0 +select 0 +wire 0 +asystem 0 +goe 0 +withprofessor 0 +susan 0 +eggersand 0 +craig 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +phone 0 +public 0 +bershad 0 +fast 0 +effect 0 +confer 0 +onprogram 0 +languag 0 +design 0 +implement 0 +mock 0 +andp 0 +pardyak 0 +automaticdynam 0 +event 0 +dispatch 0 +extens 0 +workshop 0 +softwar 0 +februari 0 +bookmark 0 +plai 0 +frequentlymiscellan 0 +link 0 +local 0 +importancefrom 0 +past 0 +abuwhi 0 +black 0 +blue 0 +ribbon 0 +campaign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..182df286 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,174 @@ +mckenzi 1 +neil 0 +rout 0 +link 0 +page 0 +project 0 +current 0 +inform 0 +east 0 +seattl 0 +larri 0 +us 0 +contact 0 +past 0 +public 0 +person 0 +mail 0 +merl 0 +coast 0 +time 0 +carl 0 +chaotic 0 +design 0 +implement 0 +messag 0 +pass 0 +interfac 0 +network 0 +tool 0 +gemini 0 +washington 0 +comput 0 +last 0 +updat 0 +angel 0 +shot 0 +menu 0 +fine 0 +dine 0 +index 0 +futur 0 +list 0 +game 0 +mitsubishi 0 +electr 0 +research 0 +laboratori 0 +broadwai 0 +floor 0 +cambridg 0 +phone 0 +projectsgonna 0 +teenag 0 +lobotomi 0 +ramonesi 0 +live 0 +mile 0 +andwork 0 +note 0 +involv 0 +projectconcern 0 +real 0 +volum 0 +render 0 +medic 0 +data 0 +copiou 0 +free 0 +expatri 0 +graduat 0 +student 0 +work 0 +onchaot 0 +routingwith 0 +faculti 0 +advisor 0 +ebel 0 +snyder 0 +packet 0 +algorithm 0 +mesh 0 +torusnetwork 0 +dissert 0 +thecranium 0 +compatiblewith 0 +projectsi 0 +teach 0 +assist 0 +summer 0 +chip 0 +tester 0 +call 0 +mactest 0 +maintain 0 +netlist 0 +graph 0 +isomorph 0 +calledgemini 0 +industri 0 +speak 0 +knowna 0 +layout 0 +schemat 0 +avail 0 +interest 0 +pleas 0 +send 0 +mcmurchi 0 +cranium 0 +adapt 0 +packetrout 0 +proceed 0 +parallel 0 +andcommun 0 +workshop 0 +tomactest 0 +home 0 +user 0 +guid 0 +march 0 +marri 0 +pictur 0 +hous 0 +arlington 0 +massachusett 0 +head 0 +livein 0 +fashion 0 +neighborhood 0 +ofballard 0 +creativ 0 +contain 0 +exampl 0 +artworkcr 0 +adob 0 +photoshop 0 +ownedthi 0 +year 0 +onlyth 0 +memori 0 +remain 0 +shirt 0 +correctlyguess 0 +answer 0 +toriddl 0 +jour 0 +octob 0 +label 0 +place 0 +jar 0 +countri 0 +farm 0 +honei 0 +produc 0 +myuncl 0 +edmonton 0 +alberta 0 +canada 0 +amus 0 +linkschairman 0 +linksnorm 0 +gregori 0 +bookmark 0 +halcyon 0 +eugen 0 +spafford 0 +purdu 0 +randi 0 +pausch 0 +virginia 0 +wallach 0 +scool 0 +princeton 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..39aaf1a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,151 @@ +system 1 +oper 0 +extens 0 +washington 0 +spin 0 +paper 0 +protocol 0 +applic 0 +marc 0 +fiuczynski 0 +comput 0 +scienc 0 +engin 0 +univers 0 +work 0 +network 0 +interfac 0 +safe 0 +architectur 0 +compar 0 +perform 0 +describ 0 +proceed 0 +softwar 0 +home 0 +page 0 +depart 0 +spent 0 +creat 0 +telephon 0 +us 0 +forappl 0 +specif 0 +achiev 0 +run 0 +demonstr 0 +servic 0 +http 0 +request 0 +contacthttp 0 +design 0 +implement 0 +kernel 0 +appear 0 +languag 0 +shortcom 0 +dynam 0 +protect 0 +ieee 0 +analysi 0 +seattl 0 +backgroundi 0 +graduat 0 +student 0 +grewup 0 +germani 0 +near 0 +sseldorf 0 +year 0 +highschool 0 +princeton 0 +receiv 0 +fromrutg 0 +sever 0 +summer 0 +bell 0 +lab 0 +mitr 0 +corpor 0 +rang 0 +ofproject 0 +sole 0 +proprietor 0 +companythat 0 +decemb 0 +sell 0 +distribut 0 +fault 0 +toler 0 +base 0 +built 0 +scratch 0 +setof 0 +chasi 0 +processor 0 +univoic 0 +cardsand 0 +vxwork 0 +time 0 +spend 0 +hack 0 +adapt 0 +primari 0 +contribut 0 +compellingperform 0 +improv 0 +structur 0 +tosimilar 0 +commerci 0 +platform 0 +recent 0 +report 0 +anextens 0 +allow 0 +anyon 0 +custom 0 +anin 0 +graph 0 +enabl 0 +betterperform 0 +similar 0 +conventionaloper 0 +winter 0 +usenix 0 +technicalconfer 0 +safeti 0 +fifteenth 0 +symposium 0 +principl 0 +support 0 +pretti 0 +happi 0 +deal 0 +inord 0 +describeshow 0 +address 0 +link 0 +linker 0 +load 0 +code 0 +point 0 +isth 0 +abil 0 +manag 0 +linkabl 0 +namespac 0 +andcollect 0 +issu 0 +posit 0 +hardwar 0 +mechan 0 +fifth 0 +workshop 0 +topic 0 +region 0 +parallel 0 +elimin 0 +method 0 +data 0 +flow 0 +transact 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..dd5aa7ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,31 @@ +rout 1 +minim 0 +algorithm 0 +model 0 +deflect 0 +melani 0 +fulgham 0 +comput 0 +scienc 0 +versu 0 +method 0 +develop 0 +help 0 +predict 0 +compar 0 +perform 0 +router 0 +real 0 +parallel 0 +machin 0 +upper 0 +lower 0 +bound 0 +practic 0 +requir 0 +sort 0 +mesh 0 +topolog 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..14315481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,64 @@ +washington 1 +comput 1 +singapor 1 +meng 0 +univers 0 +imag 0 +work 0 +heng 0 +homepag 0 +homepagemenghe 0 +edubox 0 +depart 0 +scienc 0 +engineeringunivers 0 +washingtonseattl 0 +second 0 +year 0 +student 0 +scienceat 0 +undergrad 0 +pennsylvania 0 +research 0 +interestsi 0 +interest 0 +retriev 0 +problem 0 +try 0 +findimag 0 +huge 0 +databas 0 +virag 0 +andqbicar 0 +commerci 0 +exampl 0 +similar 0 +kind 0 +stuff 0 +snapshot 0 +done 0 +singaporesingapor 0 +infomap 0 +provid 0 +fact 0 +andstatist 0 +singaporeonlin 0 +guid 0 +plan 0 +take 0 +trip 0 +nation 0 +boardi 0 +charg 0 +transform 0 +anintellig 0 +island 0 +graduat 0 +strait 0 +time 0 +main 0 +english 0 +newspap 0 +visit 0 +sinc 0 +menghe 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..41edd95b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,70 @@ +comput 1 +includ 0 +program 0 +michael 0 +ernst 0 +graduat 0 +student 0 +washington 0 +scienc 0 +depart 0 +research 0 +page 0 +workshop 0 +maintain 0 +home 0 +pagemichael 0 +ernsti 0 +univers 0 +previous 0 +lectur 0 +riceunivers 0 +sciencedepart 0 +programanalysi 0 +group 0 +microsoft 0 +laboratori 0 +eec 0 +frequent 0 +updat 0 +technic 0 +interest 0 +compil 0 +static 0 +analysi 0 +slice 0 +debug 0 +optim 0 +code 0 +serial 0 +parallel 0 +chair 0 +intermedi 0 +represent 0 +coloc 0 +popl 0 +intellectu 0 +properti 0 +particularli 0 +area 0 +game 0 +theori 0 +cryptographi 0 +philosophi 0 +denot 0 +semanticsi 0 +list 0 +resourcesfor 0 +confer 0 +organ 0 +occasion 0 +manag 0 +slip 0 +awai 0 +work 0 +carri 0 +real 0 +life 0 +link 0 +possibleinterest 0 +mernst 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..bbbc27d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,124 @@ +page 1 +home 0 +link 0 +mock 0 +comput 0 +scienc 0 +interest 0 +chess 0 +marku 0 +washington 0 +come 0 +compil 0 +current 0 +time 0 +stuff 0 +live 0 +search 0 +rttemberg 0 +grew 0 +anotherpart 0 +state 0 +district 0 +biberach 0 +upper 0 +swabia 0 +oberschwaben 0 +studi 0 +univers 0 +karlsruh 0 +whichi 0 +obtain 0 +diplom 0 +also 0 +spent 0 +year 0 +umass 0 +fulbright 0 +grante 0 +main 0 +research 0 +parallel 0 +distribut 0 +object 0 +orient 0 +system 0 +work 0 +dynam 0 +includ 0 +spanish 0 +latin 0 +american 0 +cultur 0 +travel 0 +good 0 +book 0 +labyrinth 0 +solitud 0 +mainstream 0 +movi 0 +dieangst 0 +torwart 0 +beim 0 +elfmet 0 +know 0 +handk 0 +salsa 0 +merengu 0 +danc 0 +still 0 +left 0 +check 0 +els 0 +seattl 0 +publicationssepar 0 +list 0 +event 0 +olympiad 0 +yerewan 0 +colloquia 0 +oopsla 0 +volunt 0 +mossi 0 +bit 0 +grad 0 +journal 0 +csek 0 +csebi 0 +cse 0 +cours 0 +graduat 0 +studentsimag 0 +depart 0 +electr 0 +engineeringy 0 +wouldn 0 +expect 0 +squar 0 +view 0 +metacrawl 0 +altavista 0 +deutsch 0 +well 0 +realaudio 0 +cool 0 +linksand 0 +quot 0 +consid 0 +lili 0 +field 0 +grow 0 +toil 0 +neither 0 +spin 0 +unto 0 +even 0 +solomon 0 +glorywa 0 +arrai 0 +like 0 +matthew 0 +access 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..831ad677 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,15 @@ +vivek 1 +narasayya 1 +washington 1 +home 0 +page 0 +nara 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +person 0 +informationresearch 0 +interestspap 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..a767800b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,69 @@ +himanshu 1 +nautiy 1 +comput 0 +scienc 0 +engin 0 +washington 0 +offic 0 +name 0 +india 0 +internet 0 +home 0 +pagehimanshu 0 +nautiyalthi 0 +page 0 +heavi 0 +construct 0 +nautiyaldept 0 +mail 0 +stop 0 +univers 0 +seattl 0 +sieg 0 +phone 0 +cours 0 +take 0 +autumn 0 +quarter 0 +principl 0 +digit 0 +system 0 +design 0 +artifici 0 +intellig 0 +finger 0 +edugod 0 +gift 0 +personkind 0 +dougla 0 +adam 0 +terri 0 +pratchett 0 +pelham 0 +grenvil 0 +wodehouseth 0 +order 0 +alphabet 0 +last 0 +impli 0 +favorit 0 +link 0 +place 0 +radio 0 +search 0 +friend 0 +delhi 0 +finish 0 +tech 0 +astronomi 0 +skate 0 +aviat 0 +travel 0 +numismat 0 +sound 0 +much 0 +profound 0 +coin 0 +collect 0 +cook 0 +movi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..46542ace --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,90 @@ +avail 1 +need 1 +technolog 0 +labyrinth 0 +mediocr 0 +bemoan 0 +hype 0 +skeptic 0 +cynic 0 +sinc 0 +research 0 +current 0 +work 0 +automaticconstruct 0 +wrapper 0 +inform 0 +resourc 0 +alsointerest 0 +sever 0 +area 0 +artifici 0 +intellig 0 +andcognit 0 +scienc 0 +paper 0 +beeninvolv 0 +stuff 0 +anonym 0 +servic 0 +provid 0 +glbal 0 +infrmatin 0 +sperhighwai 0 +preliminari 0 +version 0 +divers 0 +meter 0 +pictur 0 +hand 0 +shortli 0 +surgeri 0 +random 0 +number 0 +alwai 0 +handi 0 +know 0 +date 0 +time 0 +week 0 +favorit 0 +color 0 +line 0 +lost 0 +easili 0 +return 0 +page 0 +ronald 0 +wilson 0 +reagan 0 +temperatur 0 +look 0 +javascript 0 +enabl 0 +browser 0 +automat 0 +send 0 +mail 0 +great 0 +republican 0 +tell 0 +like 0 +miscellani 0 +contact 0 +bookmark 0 +societi 0 +awar 0 +bitter 0 +ironi 0 +involv 0 +nonetheless 0 +madeavail 0 +wendel 0 +berri 0 +guidelin 0 +constitutesgood 0 +comment 0 +nichola 0 +kushmerick 0 +uwcs 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..0f9d7287 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,75 @@ +page 1 +inform 0 +ohlrich 0 +wayn 0 +research 0 +memori 0 +home 0 +us 0 +washington 0 +univers 0 +seattl 0 +work 0 +leveson 0 +bershad 0 +karlin 0 +romer 0 +project 0 +isca 0 +contain 0 +invest 0 +depart 0 +comput 0 +scienc 0 +engin 0 +offic 0 +guggenheim 0 +annex 0 +chateau 0 +public 0 +current 0 +nanci 0 +model 0 +check 0 +automat 0 +test 0 +gener 0 +specif 0 +spare 0 +time 0 +brian 0 +anna 0 +perform 0 +analysi 0 +local 0 +known 0 +sever 0 +damag 0 +group 0 +paper 0 +make 0 +debut 0 +itali 0 +summer 0 +safeti 0 +sytem 0 +reduc 0 +overhead 0 +onlin 0 +superpag 0 +promot 0 +class 0 +cours 0 +person 0 +interest 0 +game 0 +world 0 +wonder 0 +sort 0 +link 0 +found 0 +creat 0 +octob 0 +last 0 +modifi 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..bc26fcc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,86 @@ +gershoni 1 +year 0 +washington 0 +graduat 0 +scienc 0 +sieg 0 +seattl 0 +univers 0 +live 0 +california 0 +berkelei 0 +israel 0 +class 0 +comput 0 +offic 0 +summer 0 +time 0 +studentcomput 0 +engin 0 +departmentunivers 0 +washingtonoffic 0 +home 0 +second 0 +student 0 +ofwashington 0 +move 0 +seven 0 +fouryear 0 +three 0 +lancast 0 +northeast 0 +angel 0 +origin 0 +haifa 0 +holon 0 +like 0 +practic 0 +kwon 0 +plai 0 +basketbal 0 +hike 0 +quarter 0 +take 0 +whole 0 +bunch 0 +seminar 0 +amta 0 +architectur 0 +usual 0 +find 0 +hour 0 +aremondai 0 +wednesdai 0 +potenti 0 +employ 0 +welcom 0 +look 0 +resum 0 +pictur 0 +took 0 +last 0 +click 0 +tose 0 +cool 0 +shirt 0 +design 0 +made 0 +graphicsprogram 0 +call 0 +virtual 0 +realiti 0 +interest 0 +link 0 +daili 0 +new 0 +summari 0 +york 0 +riderlink 0 +seattletransport 0 +option 0 +inform 0 +mathemat 0 +depart 0 +access 0 +sinc 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..116f5cf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,78 @@ +tequila 1 +ortega 0 +ross 0 +washington 0 +would 0 +work 0 +chinook 0 +project 0 +color 0 +wear 0 +jean 0 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +knew 0 +call 0 +research 0 +albert 0 +einstein 0 +welcom 0 +home 0 +pageaft 0 +go 0 +school 0 +boston 0 +year 0 +decid 0 +head 0 +west 0 +realli 0 +northwest 0 +came 0 +fall 0 +leav 0 +sometim 0 +accord 0 +advisor 0 +gaetano 0 +borriello 0 +offici 0 +tool 0 +real 0 +time 0 +embed 0 +control 0 +system 0 +unoffici 0 +brew 0 +beer 0 +learn 0 +hack 0 +try 0 +teach 0 +german 0 +shepherd 0 +behav 0 +profession 0 +section 0 +myresum 0 +file 0 +educ 0 +experi 0 +public 0 +paper 0 +puppi 0 +pictur 0 +offic 0 +sieg 0 +check 0 +page 0 +link 0 +find 0 +interest 0 +last 0 +updatedthu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..315d3998 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,121 @@ +seattl 1 +weather 0 +comput 0 +washington 0 +show 0 +beth 0 +pardo 0 +sometim 0 +work 0 +item 0 +site 0 +think 0 +courtesei 0 +also 0 +untitl 0 +document 0 +flat 0 +morri 0 +minor 0 +pardodepart 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +washingtonusapardo 0 +edunot 0 +blue 0 +ribbon 0 +asimgsrc 0 +http 0 +graphic 0 +icon 0 +blueribbon 0 +rib_trn_plain_sm 0 +quiet 0 +opposit 0 +free 0 +speechprohibit 0 +nation 0 +telecommun 0 +bill 0 +likewis 0 +white 0 +letter 0 +black 0 +background 0 +everybodi 0 +need 0 +around 0 +share 0 +academicsom 0 +papersi 0 +find 0 +particularli 0 +interest 0 +runtim 0 +code 0 +gener 0 +rtcg 0 +instruct 0 +simul 0 +trace 0 +tool 0 +home 0 +page 0 +oncomput 0 +architectureandcompil 0 +quick 0 +link 0 +otherpeopl 0 +class 0 +thesi 0 +stylenon 0 +academicfeatur 0 +featur 0 +month 0 +weak 0 +week 0 +doesn 0 +blink 0 +anymor 0 +regular 0 +itemsbicyclesbusinessescomputersfoodhumori 0 +famou 0 +thing 0 +relat 0 +legal 0 +ethic 0 +weirdnesslinux 0 +journalmusicgoofi 0 +politicssci 0 +though 0 +unrel 0 +stuff 0 +transport 0 +movi 0 +list 0 +film 0 +festiv 0 +dant 0 +search 0 +truli 0 +gross 0 +stori 0 +trepan 0 +privaci 0 +log 0 +mail 0 +address 0 +wors 0 +take 0 +data 0 +disk 0 +everi 0 +time 0 +consid 0 +particular 0 +newhous 0 +newspap 0 +courtesi 0 +yesterdai 0 +stuffpardo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..3253b81a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,88 @@ +washington 1 +research 1 +system 1 +school 0 +seattl 0 +relat 0 +pardyak 0 +home 0 +pardi 0 +grad 0 +year 0 +comput 0 +scienc 0 +also 0 +languag 0 +drizzl 0 +interest 0 +group 0 +mechan 0 +object 0 +univers 0 +phone 0 +przemek 0 +page 0 +przemyslaw 0 +first 0 +week 0 +coupl 0 +later 0 +third 0 +graduat 0 +student 0 +theunivers 0 +current 0 +area 0 +ofoper 0 +interast 0 +distribut 0 +compil 0 +besid 0 +life 0 +fill 0 +withth 0 +hike 0 +outdoor 0 +activ 0 +notbusi 0 +enjoi 0 +book 0 +music 0 +find 0 +short 0 +descript 0 +resum 0 +list 0 +paper 0 +outdat 0 +happenswhen 0 +busi 0 +schedul 0 +projectsspinan 0 +extens 0 +oper 0 +built 0 +gloriou 0 +leadership 0 +brian 0 +bershad 0 +base 0 +systemsgroup 0 +commun 0 +emerald 0 +basedprogram 0 +time 0 +link 0 +polish 0 +connect 0 +variou 0 +resourc 0 +somehow 0 +poland 0 +project 0 +mine 0 +unrel 0 +miscellan 0 +work 0 +engin 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..9cf307b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,167 @@ +year 1 +bike 0 +davi 0 +student 0 +us 0 +school 0 +taught 0 +prolog 0 +chri 0 +html 0 +franklin 0 +home 0 +graduat 0 +depart 0 +comput 0 +scienc 0 +work 0 +first 0 +second 0 +seattl 0 +dai 0 +around 0 +high 0 +time 0 +engin 0 +univers 0 +paul 0 +pagepaul 0 +pagei 0 +current 0 +univ 0 +washington 0 +inth 0 +offici 0 +myoffic 0 +sieg 0 +rapid 0 +project 0 +thesumm 0 +someon 0 +express 0 +concern 0 +aboutthi 0 +character 0 +usual 0 +somewher 0 +north 0 +iliv 0 +anoth 0 +pictur 0 +best 0 +oneof 0 +scan 0 +better 0 +on 0 +norwegian 0 +poem 0 +likea 0 +collect 0 +fortun 0 +receiv 0 +friend 0 +localchines 0 +restaur 0 +mundan 0 +stuffi 0 +hope 0 +folk 0 +might 0 +find 0 +stuff 0 +hotlink 0 +pagesstuff 0 +maintainmi 0 +schedul 0 +rememb 0 +updat 0 +contact 0 +love 0 +travel 0 +necessarili 0 +tell 0 +everyon 0 +hire 0 +mewher 0 +come 0 +diploma 0 +live 0 +inmorgan 0 +hill 0 +fromuc 0 +andy 0 +glad 0 +ididn 0 +bergen 0 +ialso 0 +research 0 +professor 0 +electr 0 +andcomput 0 +stuffwhil 0 +partner 0 +variou 0 +relatedact 0 +tend 0 +matt 0 +jame 0 +evengot 0 +togeth 0 +recent 0 +marriag 0 +joann 0 +anexcus 0 +brother 0 +also 0 +made 0 +itin 0 +photo 0 +throughout 0 +undergradu 0 +kept 0 +never 0 +flat 0 +exchang 0 +ofbergen 0 +hillier 0 +longer 0 +rout 0 +returnedto 0 +took 0 +rollerblad 0 +sinc 0 +town 0 +wasnow 0 +easi 0 +drop 0 +hewlettpackard 0 +return 0 +vengeanc 0 +move 0 +toseattl 0 +done 0 +annual 0 +portland 0 +ride 0 +intwo 0 +inseason 0 +march 0 +april 0 +june 0 +rest 0 +justcommut 0 +lot 0 +danc 0 +particularli 0 +lindyhop 0 +know 0 +everi 0 +document 0 +header 0 +linethat 0 +look 0 +someth 0 +like 0 +doctyp 0 +public 0 +ietf 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..300fc0ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,98 @@ +pighin 1 +anna 1 +karlin 1 +might 1 +like 0 +rain 0 +live 0 +action 0 +pictur 0 +refresh 0 +everi 0 +minut 0 +note 0 +frdric 0 +washington 0 +comput 0 +scienc 0 +lcommun 0 +wonder 0 +citi 0 +tourist 0 +quarter 0 +ta 0 +taught 0 +gui 0 +found 0 +much 0 +often 0 +dani 0 +qual 0 +report 0 +rest 0 +british 0 +movi 0 +monti 0 +python 0 +swim 0 +nick 0 +cave 0 +corto 0 +maltes 0 +italian 0 +comic 0 +cat 0 +pari 0 +berlin 0 +venis 0 +simpson 0 +mpeg 0 +surpris 0 +traditionn 0 +french 0 +marin 0 +song 0 +collect 0 +otherwis 0 +work 0 +graphic 0 +supervis 0 +although 0 +formerli 0 +studi 0 +systemher 0 +name 0 +paper 0 +implement 0 +global 0 +memori 0 +manag 0 +workstat 0 +cluster 0 +michael 0 +feelei 0 +william 0 +morgan 0 +freder 0 +henri 0 +levi 0 +chandramohan 0 +thekkath 0 +proceed 0 +symposium 0 +oper 0 +system 0 +principl 0 +decemb 0 +postscript 0 +breath 0 +take 0 +door 0 +lucki 0 +even 0 +look 0 +darren 0 +juan 0 +dark 0 +squar 0 +five 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..724acb1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,14 @@ +ruth 1 +anderson 1 +washington 1 +home 0 +page 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +wxyc 0 +map 0 +brother 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..fa51ca93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,12 @@ +home 1 +washington 1 +redston 1 +josh 0 +page 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +joshua 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..bd4113ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,162 @@ +jakobovit 1 +scienc 0 +univers 0 +home 0 +washington 0 +comput 0 +seattl 0 +live 0 +imag 0 +camera 0 +research 0 +base 0 +manag 0 +program 0 +build 0 +databas 0 +inform 0 +professor 0 +psycholog 0 +hawaii 0 +onlin 0 +librari 0 +page 0 +engin 0 +depart 0 +usai 0 +get 0 +departmentof 0 +ofwashington 0 +wonder 0 +citi 0 +alwai 0 +rain 0 +thisup 0 +date 0 +weatherreport 0 +sneak 0 +peek 0 +video 0 +mount 0 +outsid 0 +point 0 +beauti 0 +drumhel 0 +fountain 0 +clear 0 +youcan 0 +catch 0 +glimps 0 +rainier 0 +glori 0 +probabl 0 +cleardai 0 +nice 0 +color 0 +pictur 0 +develop 0 +repositori 0 +toolkit 0 +multi 0 +media 0 +consol 0 +construct 0 +part 0 +astructur 0 +framework 0 +brain 0 +map 0 +knowledg 0 +support 0 +digit 0 +anatomist 0 +line 0 +interact 0 +atla 0 +human 0 +bodi 0 +implement 0 +databaseenviron 0 +vision 0 +local 0 +expert 0 +persistentprogram 0 +languag 0 +interest 0 +els 0 +proud 0 +creator 0 +internetracquetbal 0 +ladder 0 +taught 0 +advanc 0 +extens 0 +wrote 0 +perl 0 +script 0 +rotisseriebasebal 0 +leagu 0 +stand 0 +updat 0 +daili 0 +stat 0 +fromusa 0 +todai 0 +rais 0 +happi 0 +famili 0 +africancichlid 0 +visit 0 +town 0 +honolulu 0 +everi 0 +chanc 0 +camp 0 +magic 0 +kalalau 0 +vallei 0 +movi 0 +gambl 0 +stock 0 +market 0 +darn 0 +good 0 +fantasi 0 +footbal 0 +team 0 +newslet 0 +would 0 +javafamili 0 +link 0 +mydad 0 +leon 0 +jame 0 +whoi 0 +write 0 +book 0 +traffic 0 +foster 0 +polem 0 +emanuel 0 +swedenborg 0 +step 0 +dian 0 +nahl 0 +whoprovid 0 +great 0 +index 0 +judi 0 +realtor 0 +uncl 0 +eddi 0 +run 0 +site 0 +bioscienc 0 +profession 0 +bookmarksif 0 +java 0 +click 0 +drag 0 +word 0 +make 0 +poem 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..10a84c88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight 1 +welcom 1 +galleri 1 +twenti 1 +photograph 1 +five 1 +head 1 +robert 1 +grimm 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..faf7eb9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,142 @@ +romer 1 +bershad 0 +perform 0 +chen 0 +comput 0 +washington 0 +research 0 +memori 0 +karlin 0 +ohlrich 0 +wong 0 +paper 0 +isca 0 +dynam 0 +page 0 +conflict 0 +asplo 0 +friend 0 +scientist 0 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +home 0 +offic 0 +eduoffic 0 +chateau 0 +sieg 0 +interestsi 0 +oper 0 +system 0 +supportfor 0 +high 0 +systemswith 0 +realli 0 +smart 0 +peopl 0 +likebrian 0 +brad 0 +alan 0 +eustac 0 +anna 0 +denni 0 +wayn 0 +andwayn 0 +three 0 +recent 0 +subject 0 +reduc 0 +overhead 0 +us 0 +onlinesuperpag 0 +promot 0 +map 0 +polici 0 +cach 0 +resolutionon 0 +standard 0 +hardwar 0 +osdi 0 +avoid 0 +miss 0 +larg 0 +direct 0 +mappedcach 0 +addit 0 +studi 0 +ofinterpret 0 +learn 0 +theproject 0 +rockyhom 0 +also 0 +wrote 0 +togeth 0 +structur 0 +interpret 0 +voelker 0 +wolman 0 +baer 0 +levi 0 +appear 0 +abstract 0 +postscript 0 +bibliographi 0 +lobo 0 +read 0 +listrandom 0 +stuffa 0 +hous 0 +rai 0 +limb 0 +knee 0 +arthroscop 0 +surgeri 0 +mark 0 +hill 0 +wrist 0 +dylansaid 0 +hair 0 +couldn 0 +flowbe 0 +said 0 +could 0 +beingexperiment 0 +conduct 0 +experi 0 +judg 0 +result 0 +attend 0 +travel 0 +europ 0 +took 0 +somepictur 0 +eatsomeon 0 +els 0 +food 0 +accompani 0 +sincer 0 +ration 0 +forexampl 0 +lunch 0 +thought 0 +leftth 0 +countri 0 +would 0 +didn 0 +origin 0 +unknown 0 +edward 0 +tuft 0 +tip 0 +public 0 +speak 0 +father 0 +edit 0 +american 0 +journal 0 +physic 0 +place 0 +ticker 0 +symbol 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..742e3614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,157 @@ +memori 1 +system 1 +washington 1 +page 0 +romer 0 +perform 0 +project 0 +superpag 0 +polici 0 +bershad 0 +univers 0 +monitor 0 +dynam 0 +overhead 0 +us 0 +cach 0 +miss 0 +research 0 +improv 0 +applic 0 +identifi 0 +size 0 +qual 0 +karlin 0 +ohlrich 0 +descript 0 +work 0 +hardwar 0 +support 0 +mechan 0 +small 0 +current 0 +cost 0 +intern 0 +fragment 0 +region 0 +construct 0 +paper 0 +reduc 0 +onlin 0 +promot 0 +isca 0 +appear 0 +algorithm 0 +wayn 0 +wong 0 +map 0 +conflict 0 +chen 0 +report 0 +washingtonmemori 0 +researchdepart 0 +comput 0 +scienc 0 +engin 0 +seattl 0 +welcom 0 +home 0 +group 0 +investig 0 +techniqu 0 +operatingsystem 0 +sharesth 0 +follow 0 +featur 0 +reli 0 +combin 0 +simpl 0 +oper 0 +modif 0 +behavior 0 +incur 0 +runtim 0 +inform 0 +collect 0 +sourc 0 +delai 0 +resolv 0 +bottleneck 0 +also 0 +significantli 0 +overal 0 +recent 0 +explor 0 +monitorappl 0 +refer 0 +pattern 0 +order 0 +resolvetlb 0 +problem 0 +poor 0 +result 0 +tlbi 0 +cover 0 +severalmodern 0 +architectur 0 +whose 0 +amultipl 0 +base 0 +tlbperform 0 +larger 0 +ofwast 0 +simul 0 +sever 0 +adapt 0 +todiffer 0 +address 0 +space 0 +constructingsuperpag 0 +copi 0 +compon 0 +contigu 0 +ofmemori 0 +develop 0 +balancesth 0 +potenti 0 +benefit 0 +reduct 0 +futur 0 +tlbmiss 0 +memorycopi 0 +misspattern 0 +warrant 0 +attain 0 +largepag 0 +without 0 +detail 0 +look 0 +someon 0 +implement 0 +would 0 +makea 0 +good 0 +master 0 +peoplefaculti 0 +brian 0 +anna 0 +student 0 +denni 0 +dlee 0 +waynew 0 +resolut 0 +standard 0 +osdi 0 +avoid 0 +larg 0 +direct 0 +asplo 0 +comparison 0 +mip 0 +alpha 0 +instruct 0 +effect 0 +differ 0 +code 0 +reorder 0 +bibliographi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..d6cb859c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,96 @@ +washington 1 +interpret 0 +perform 0 +languag 0 +project 0 +architectur 0 +sever 0 +baer 0 +bershad 0 +levi 0 +romer 0 +voelker 0 +wolman 0 +rocki 0 +last 0 +environ 0 +implement 0 +collect 0 +benchmark 0 +wong 0 +etch 0 +avail 0 +intern 0 +descript 0 +becom 0 +increasingli 0 +popular 0 +year 0 +part 0 +demand 0 +portabl 0 +safeti 0 +eas 0 +examin 0 +perspect 0 +strategi 0 +processor 0 +util 0 +basi 0 +studi 0 +microbenchmark 0 +perl 0 +java 0 +mipsi 0 +us 0 +variou 0 +instrument 0 +trace 0 +techniqu 0 +evalu 0 +characterist 0 +order 0 +gain 0 +insight 0 +similar 0 +differ 0 +execut 0 +peoplefaculti 0 +jean 0 +loup 0 +brian 0 +henri 0 +student 0 +denni 0 +dlee 0 +geoff 0 +alec 0 +wayn 0 +waynew 0 +papersrom 0 +structur 0 +asplo 0 +appear 0 +abstractpostscriptjava 0 +measur 0 +xjava 0 +sourc 0 +file 0 +benchmarkstoolsto 0 +inform 0 +applic 0 +vebeen 0 +build 0 +binari 0 +rewrit 0 +tool 0 +call 0 +yetpublicli 0 +read 0 +etchhom 0 +page 0 +documentationproject 0 +document 0 +peopl 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..fe3740d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,98 @@ +scienc 1 +washington 0 +seattl 0 +system 0 +softwar 0 +richard 0 +phone 0 +intellig 0 +scam 0 +imag 0 +process 0 +comput 0 +work 0 +document 0 +also 0 +camp 0 +splash 0 +program 0 +nation 0 +roger 0 +home 0 +page 0 +rogersrrog 0 +educomput 0 +engin 0 +departmentunivers 0 +usaoffic 0 +chateau 0 +sieg 0 +laboratri 0 +research 0 +develop 0 +systol 0 +cellular 0 +arrai 0 +machin 0 +massiv 0 +parallel 0 +includ 0 +compil 0 +basic 0 +morpholog 0 +librari 0 +simul 0 +obtain 0 +paper 0 +current 0 +layout 0 +extract 0 +help 0 +produc 0 +groundtruth 0 +databas 0 +optic 0 +charact 0 +recognit 0 +commun 0 +director 0 +facil 0 +northwest 0 +center 0 +environment 0 +educ 0 +ncee 0 +offer 0 +summer 0 +student 0 +ag 0 +beauti 0 +juan 0 +island 0 +univers 0 +year 0 +long 0 +foundat 0 +fund 0 +grade 0 +minor 0 +girl 0 +area 0 +interest 0 +corn 0 +snake 0 +jessica 0 +squishi 0 +order 0 +increas 0 +length 0 +kuow 0 +public 0 +radio 0 +stationi 0 +bake 0 +best 0 +pecan 0 +seattlelast 0 +modifi 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..c18014dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,19 @@ +mike 1 +home 1 +washington 1 +page 0 +salisburysalisbur 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +usaoffic 0 +chateau 0 +sieg 0 +lifehistori 0 +school 0 +friend 0 +vitacool 0 +stuff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..6a402a79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,177 @@ +system 1 +oper 0 +proceed 0 +extens 0 +applic 0 +paper 0 +first 0 +workshop 0 +talk 0 +spin 0 +support 0 +reserv 0 +processor 0 +savag 0 +washington 0 +symposium 0 +slide 0 +softwar 0 +usenix 0 +capac 0 +multimedia 0 +time 0 +mach 0 +stefan 0 +work 0 +comput 0 +univers 0 +seattl 0 +industri 0 +bershad 0 +student 0 +american 0 +need 0 +compil 0 +wcsss 0 +tucson 0 +issu 0 +design 0 +abstract 0 +version 0 +microkernel 0 +specif 0 +servic 0 +januari 0 +tech 0 +report 0 +afraid 0 +confer 0 +real 0 +scienc 0 +engin 0 +depart 0 +home 0 +sampl 0 +rich 0 +post 0 +cultur 0 +modern 0 +pittsburghfor 0 +year 0 +caught 0 +ride 0 +migrat 0 +mnow 0 +gradual 0 +rank 0 +strongbackground 0 +centuri 0 +histori 0 +provid 0 +witha 0 +firm 0 +irrelev 0 +platform 0 +trash 0 +peer 0 +fool 0 +tocqeuvil 0 +statement 0 +find 0 +tiresom 0 +inconveni 0 +exercisepolit 0 +right 0 +distract 0 +quit 0 +similar 0 +tocurr 0 +microprocessor 0 +architectur 0 +trend 0 +favor 0 +ofappl 0 +code 0 +brian 0 +rest 0 +merri 0 +band 0 +onan 0 +project 0 +call 0 +projectsspinspin 0 +omnifemtokernel 0 +whichsupport 0 +dynam 0 +adapt 0 +interfac 0 +andimplement 0 +direct 0 +control 0 +stillmaintain 0 +integr 0 +inter 0 +isol 0 +thing 0 +writingspin 0 +safeti 0 +perform 0 +principl 0 +sosp 0 +copper 0 +mountain 0 +decemb 0 +languag 0 +write 0 +modula 0 +protect 0 +fifth 0 +topic 0 +hoto 0 +orca 0 +island 0 +implement 0 +osdi 0 +monterei 0 +novemb 0 +panel 0 +longer 0 +unpublish 0 +sixth 0 +sigop 0 +european 0 +match 0 +appear 0 +review 0 +march 0 +frequent 0 +redund 0 +arrai 0 +independ 0 +disk 0 +winter 0 +technic 0 +diego 0 +best 0 +ieee 0 +intern 0 +boston 0 +manag 0 +usag 0 +fourth 0 +workstat 0 +wwo 0 +napa 0 +octob 0 +carnegi 0 +mellon 0 +timer 0 +export 0 +user 0 +third 0 +santa 0 +april 0 +interest 0 +music 0 +hikingthi 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..90c4baed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,9 @@ +sean 1 +sandi 1 +washington 1 +david 0 +comput 0 +scienc 0 +last 0 +revis 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..bb0401bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,33 @@ +richard 1 +segal 1 +home 0 +page 0 +segaldepart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +washingtonbox 0 +seattl 0 +washington 0 +person 0 +biographi 0 +better 0 +half 0 +famili 0 +pictur 0 +research 0 +overview 0 +brute 0 +internet 0 +softbot 0 +public 0 +curriculum 0 +vita 0 +postscript 0 +amus 0 +archeri 0 +bicycl 0 +racquetbal 0 +ski 0 +softbal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..ccd6249a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,116 @@ +project 1 +char 0 +univers 0 +stefan 0 +washington 0 +activ 0 +els 0 +printf 0 +seattl 0 +teach 0 +assist 0 +work 0 +phone 0 +inform 0 +past 0 +current 0 +interest 0 +page 0 +cologn 0 +complet 0 +come 0 +receiv 0 +high 0 +school 0 +scienc 0 +implement 0 +comparison 0 +hardwar 0 +softwar 0 +solut 0 +fals 0 +share 0 +pictur 0 +print 0 +byte 0 +putchar 0 +main 0 +bergstefan 0 +berg 0 +home 0 +sieg 0 +hall 0 +email 0 +sgberg 0 +content 0 +address 0 +finger 0 +born 0 +germani 0 +spring 0 +mittler 0 +reif 0 +thgrade 0 +schillergymnasium 0 +unit 0 +statesto 0 +diploma 0 +bloomington 0 +north 0 +indiana 0 +bachelor 0 +honor 0 +distinctionin 0 +field 0 +comput 0 +fromindiana 0 +momenth 0 +toward 0 +expect 0 +date 0 +sometim 0 +thiscenturi 0 +reduct 0 +machin 0 +studi 0 +linear 0 +time 0 +sort 0 +algorithm 0 +qual 0 +peopl 0 +around 0 +raft 0 +bookmark 0 +excit 0 +squar 0 +moment 0 +weather 0 +yourselfsometh 0 +crazi 0 +didn 0 +even 0 +particularsolut 0 +done 0 +sall 0 +line 0 +shouldn 0 +contain 0 +trail 0 +carriag 0 +return 0 +compil 0 +without 0 +warn 0 +program 0 +exact 0 +sourc 0 +code 0 +itin 0 +fewer 0 +like 0 +resum 0 +avail 0 +inpostscript 0 +andtex 0 +format 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..46dc67c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,62 @@ +thing 1 +project 1 +pictur 1 +work 1 +page 1 +ward 0 +shadegreet 0 +salut 0 +third 0 +year 0 +grad 0 +student 0 +hereat 0 +dubcs 0 +interact 0 +renderingof 0 +complex 0 +scene 0 +current 0 +follow 0 +link 0 +walkthruproject 0 +amonglot 0 +anim 0 +find 0 +siggraph 0 +paperdescrib 0 +recent 0 +shortcut 0 +click 0 +thepictur 0 +island 0 +lower 0 +left 0 +corner 0 +lot 0 +interest 0 +go 0 +mani 0 +differ 0 +aspectsof 0 +comput 0 +graphic 0 +thegraph 0 +imag 0 +laboratori 0 +get 0 +done 0 +contact 0 +info 0 +daili 0 +schedul 0 +travel 0 +plan 0 +public 0 +look 0 +scrunch 0 +make 0 +browser 0 +least 0 +pixel 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..0e7e0353 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,29 @@ +shun 1 +leung 1 +depart 1 +comput 1 +scienc 1 +univers 1 +research 1 +washington 1 +leungshun 0 +student 0 +andengin 0 +ofwashington 0 +work 0 +prof 0 +johnzahorjan 0 +pointer 0 +summari 0 +public 0 +curriculum 0 +vita 0 +upon 0 +request 0 +engin 0 +seattl 0 +email 0 +shuntak 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..ace8a9cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,70 @@ +shuichi 1 +koga 0 +washington 0 +graduat 0 +studi 0 +skoga 0 +haven 0 +involv 0 +asian 0 +also 0 +welcom 0 +home 0 +page 0 +studentcomput 0 +scienc 0 +engin 0 +departmentunivers 0 +name 0 +notic 0 +bynow 0 +start 0 +univers 0 +ofwashington 0 +quit 0 +figur 0 +myqual 0 +much 0 +less 0 +dissert 0 +universityof 0 +virginia 0 +degre 0 +mathemat 0 +alsoheavili 0 +foreign 0 +relat 0 +andgovern 0 +depart 0 +origin 0 +slate 0 +degreein 0 +heavili 0 +user 0 +interfac 0 +groupand 0 +comput 0 +sciencedepart 0 +work 0 +project 0 +call 0 +alic 0 +sinc 0 +anywai 0 +take 0 +look 0 +pictur 0 +smaller 0 +mean 0 +finger 0 +info 0 +current 0 +schedul 0 +neat 0 +hypertext 0 +link 0 +hunt 0 +destroi 0 +bug 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..020b1563 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,48 @@ +soha 1 +washington 1 +current 1 +univers 0 +comput 0 +scienc 0 +engin 0 +dept 0 +hassoun 0 +home 0 +page 0 +hassounit 0 +year 0 +graduat 0 +school 0 +circuit 0 +design 0 +whoturn 0 +develop 0 +work 0 +onarchitectur 0 +retim 0 +professor 0 +carlebel 0 +weekli 0 +schedul 0 +busi 0 +previou 0 +research 0 +educ 0 +experi 0 +public 0 +patent 0 +chao 0 +group 0 +profession 0 +interest 0 +vlsi 0 +site 0 +inform 0 +littl 0 +deede 0 +photo 0 +galleri 0 +address 0 +depart 0 +seattl 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..694715ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,115 @@ +favorit 1 +scienc 0 +sujai 0 +parekh 0 +home 0 +page 0 +comput 0 +washington 0 +seattl 0 +seminar 0 +system 0 +french 0 +softbot 0 +project 0 +interest 0 +soccer 0 +danc 0 +food 0 +work 0 +depart 0 +engin 0 +sieg 0 +hall 0 +chateau 0 +guggenheim 0 +annex 0 +univers 0 +quarter 0 +class 0 +research 0 +simultaneousmultithread 0 +particular 0 +issu 0 +relat 0 +tomultithread 0 +architectur 0 +evalu 0 +simon 0 +emploi 0 +procedur 0 +search 0 +controlsystem 0 +control 0 +action 0 +report 0 +construct 0 +design 0 +remov 0 +patio 0 +convent 0 +workspac 0 +fund 0 +pleas 0 +contact 0 +sport 0 +spud 0 +right 0 +sort 0 +bookmark 0 +like 0 +keep 0 +track 0 +academ 0 +cognit 0 +distribut 0 +parallel 0 +psycholog 0 +philosophi 0 +tenni 0 +sail 0 +squash 0 +volleybal 0 +ballroom 0 +cornel 0 +oracl 0 +corpor 0 +stottler 0 +henk 0 +associ 0 +done 0 +resum 0 +random 0 +person 0 +info 0 +oondhiu 0 +mango 0 +phad 0 +thai 0 +kung 0 +chicken 0 +beverag 0 +screwdriv 0 +scotch 0 +long 0 +island 0 +ic 0 +tango 0 +swing 0 +east 0 +west 0 +coast 0 +salsa 0 +rock 0 +music 0 +dire 0 +strait 0 +pink 0 +floyd 0 +phil 0 +collin 0 +genesi 0 +peter 0 +gabriel 0 +petti 0 +sparekh 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..58d9b9b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,64 @@ +erik 1 +speed 1 +selberg 0 +improv 0 +home 0 +page 0 +name 0 +washington 0 +academ 0 +almost 0 +live 0 +fishcam 0 +address 0 +lara 0 +lewi 0 +memorialhappi 0 +link 0 +peopl 0 +sport 0 +food 0 +drink 0 +cultur 0 +mari 0 +kay 0 +offic 0 +friend 0 +famili 0 +fish 0 +disc 0 +drive 0 +ultim 0 +pasti 0 +power 0 +ur 0 +machin 0 +mountain 0 +bike 0 +spud 0 +softbal 0 +ski 0 +utah 0 +raquetbal 0 +pro 0 +colleg 0 +wedgwood 0 +hous 0 +diet 0 +pepper 0 +salt 0 +lake 0 +roast 0 +compani 0 +bean 0 +bagel 0 +racer 0 +star 0 +war 0 +tini 0 +toon 0 +pinki 0 +brain 0 +phantom 0 +babylon 0 +comic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..4f4bbb03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,134 @@ +parallel 1 +washington 1 +simul 1 +home 0 +sung 0 +univers 0 +work 0 +watch 0 +like 0 +good 0 +plai 0 +scrub 0 +divis 0 +take 0 +page 0 +choiwelcom 0 +thehomepag 0 +ofsung 0 +eunchoi 0 +myschoollifemi 0 +primari 0 +research 0 +interest 0 +compil 0 +program 0 +languag 0 +involv 0 +zplcompil 0 +project 0 +late 0 +beenspend 0 +time 0 +think 0 +optim 0 +communicationgener 0 +us 0 +architechtur 0 +independ 0 +communicationlibrari 0 +ironman 0 +addit 0 +experi 0 +data 0 +programson 0 +superscalar 0 +processor 0 +goal 0 +improv 0 +nodeperform 0 +come 0 +gener 0 +machin 0 +alsobeen 0 +seen 0 +hang 0 +chaosrout 0 +group 0 +includ 0 +graphic 0 +front 0 +visual 0 +thatexperi 0 +current 0 +implement 0 +anoth 0 +router 0 +inzpl 0 +final 0 +also 0 +littl 0 +astronomi 0 +quarter 0 +ta 0 +enjoi 0 +movi 0 +mostli 0 +comfort 0 +yeah 0 +vegetarian 0 +sinc 0 +myjunior 0 +year 0 +colleg 0 +drink 0 +dinner 0 +would 0 +samewithout 0 +wine 0 +result 0 +must 0 +exercis 0 +quit 0 +twosoccerteam 0 +cousin 0 +cooper 0 +recdivis 0 +last 0 +season 0 +came 0 +second 0 +place 0 +andcoop 0 +unfortun 0 +recent 0 +sacrifiedmi 0 +left 0 +knee 0 +game 0 +soccer 0 +usualstep 0 +aerobicsclass 0 +instead 0 +find 0 +try 0 +swim 0 +weight 0 +trainingclass 0 +peopl 0 +world 0 +read 0 +book 0 +abit 0 +shakespear 0 +publictelevis 0 +listen 0 +classicalmus 0 +myotherlif 0 +choi 0 +sungeun 0 +depart 0 +comput 0 +scienc 0 +engin 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..ed54a098 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,58 @@ +system 1 +nguyen 0 +runtim 0 +multiprocessorsenviron 0 +schedul 0 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +washington 0 +work 0 +world 0 +research 0 +interest 0 +includ 0 +oper 0 +distribut 0 +andparallel 0 +network 0 +secur 0 +current 0 +help 0 +frommi 0 +advisor 0 +johnzahorjan 0 +build 0 +support 0 +run 0 +soft 0 +real 0 +timeappl 0 +visual 0 +partial 0 +idl 0 +workstat 0 +innow 0 +recent 0 +complet 0 +studi 0 +measur 0 +ofappl 0 +characterist 0 +us 0 +tominim 0 +applic 0 +execut 0 +time 0 +uniprogram 0 +well 0 +make 0 +goodglob 0 +decis 0 +multiprogram 0 +cvpublic 0 +worldvietnameseresourc 0 +netcyclingplayground 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..bfb47b51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,13 @@ +frame 1 +tian 0 +homepageyour 0 +browser 0 +rusti 0 +yellow 0 +turkei 0 +even 0 +part 0 +html 0 +standard 0 +click 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..4572c312 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,103 @@ +system 1 +object 1 +distribut 0 +work 0 +tiwari 0 +workshop 0 +oopsla 0 +opal 0 +larg 0 +address 0 +oper 0 +persist 0 +applic 0 +softwar 0 +us 0 +levi 0 +comput 0 +databas 0 +measur 0 +build 0 +ashutosh 0 +washington 0 +scienc 0 +time 0 +area 0 +singl 0 +space 0 +orient 0 +workload 0 +user 0 +organ 0 +chase 0 +advisor 0 +also 0 +behavior 0 +except 0 +handl 0 +octob 0 +benchmark 0 +depart 0 +engin 0 +univers 0 +seattl 0 +mostli 0 +full 0 +fourth 0 +year 0 +graduat 0 +student 0 +support 0 +past 0 +infrastructur 0 +interfac 0 +spare 0 +group 0 +research 0 +technolog 0 +boe 0 +servic 0 +projectsopali 0 +project 0 +issu 0 +opportun 0 +involv 0 +creat 0 +global 0 +across 0 +multipl 0 +machin 0 +jeff 0 +primari 0 +architect 0 +hank 0 +close 0 +character 0 +gener 0 +techniqu 0 +paper 0 +sever 0 +profession 0 +career 0 +thisexperi 0 +basi 0 +follow 0 +distrbut 0 +public 0 +virtual 0 +refer 0 +proc 0 +intern 0 +septemb 0 +parallel 0 +environ 0 +ecoop 0 +juli 0 +bosch 0 +addendum 0 +proceed 0 +oop 0 +messeng 0 +evalu 0 +narasayya 0 +perform 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..0b4fcfad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,91 @@ +linux 1 +also 1 +tessa 0 +inform 0 +washington 0 +interest 0 +current 0 +work 0 +system 0 +java 0 +anoth 0 +dead 0 +superhighwai 0 +second 0 +yeargradu 0 +student 0 +comput 0 +scienc 0 +univers 0 +research 0 +includ 0 +sort 0 +relatedgoodi 0 +clio 0 +search 0 +andbrows 0 +person 0 +histori 0 +avail 0 +seek 0 +gain 0 +employ 0 +myresum 0 +onlin 0 +curiou 0 +kittyi 0 +honor 0 +share 0 +apart 0 +gambit 0 +siames 0 +cat 0 +great 0 +memor 0 +found 0 +therear 0 +pictur 0 +page 0 +tofind 0 +appar 0 +alsor 0 +scotland 0 +classesi 0 +still 0 +qual 0 +quarter 0 +take 0 +last 0 +ofeight 0 +class 0 +fulfil 0 +breadth 0 +requir 0 +digit 0 +seminarlinux 0 +gameseverybodi 0 +plai 0 +game 0 +maintain 0 +gametom 0 +commit 0 +advanc 0 +pretti 0 +coolgam 0 +platform 0 +first 0 +attempt 0 +program 0 +simpl 0 +maze 0 +applet 0 +sleepingi 0 +known 0 +frequent 0 +seattl 0 +area 0 +bookstor 0 +knit 0 +crochet 0 +copyright 0 +tlau 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..3883b100 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,44 @@ +martin 1 +tompa 1 +washington 0 +comput 0 +scienc 0 +univers 0 +depart 0 +engin 0 +seattl 0 +phone 0 +receptionist 0 +lectur 0 +note 0 +articl 0 +trajectori 0 +thelma 0 +louis 0 +recent 0 +holidai 0 +moon 0 +pearl 0 +among 0 +wash 0 +oyster 0 +collabor 0 +surrealist 0 +electron 0 +propheci 0 +build 0 +across 0 +pierc 0 +lane 0 +carol 0 +photograph 0 +photo 0 +courtesi 0 +health 0 +center 0 +educ 0 +resourc 0 +provid 0 +mani 0 +imag 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..678cf4be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,106 @@ +kimbrel 1 +traci 0 +prison 0 +held 0 +univers 0 +washington 0 +comput 0 +parallel 0 +prefetch 0 +anna 0 +karlin 0 +seattl 0 +forc 0 +imprison 0 +captor 0 +scienc 0 +algorithm 0 +cachingtraci 0 +edward 0 +felten 0 +appear 0 +system 0 +washingtonsinc 0 +without 0 +charg 0 +trial 0 +move 0 +year 0 +toanoth 0 +area 0 +inmat 0 +tomanufactur 0 +airplan 0 +escap 0 +institut 0 +wasrecaptur 0 +return 0 +help 0 +hisplight 0 +rescu 0 +list 0 +thing 0 +done 0 +curriculum 0 +vita 0 +detail 0 +statement 0 +ofwhat 0 +promis 0 +histori 0 +goal 0 +free 0 +depart 0 +engin 0 +tracyk 0 +eduher 0 +trace 0 +driven 0 +comparison 0 +andrew 0 +tomkin 0 +hugo 0 +patterson 0 +brian 0 +bershad 0 +garth 0 +gibson 0 +sigop 0 +usenix 0 +associ 0 +symposium 0 +onoper 0 +design 0 +implemen 0 +near 0 +optim 0 +ieeesymposium 0 +foundat 0 +longer 0 +version 0 +integr 0 +cach 0 +page 0 +extend 0 +abstract 0 +proceed 0 +sigmetr 0 +confer 0 +measurementand 0 +model 0 +probabilist 0 +verifi 0 +matrix 0 +product 0 +usingo 0 +squar 0 +time 0 +base 0 +random 0 +bit 0 +rakesh 0 +kumar 0 +sinha 0 +inform 0 +process 0 +letter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..46927316 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,49 @@ +travi 1 +time 1 +craig 0 +washington 0 +comput 0 +scienc 0 +real 0 +system 0 +work 0 +home 0 +page 0 +engin 0 +depart 0 +univers 0 +seattl 0 +research 0 +interest 0 +mechan 0 +predict 0 +cach 0 +restor 0 +queu 0 +spin 0 +lock 0 +arctic 0 +submarin 0 +current 0 +cours 0 +take 0 +quarter 0 +dissert 0 +consum 0 +side 0 +project 0 +half 0 +esca 0 +corpor 0 +help 0 +keep 0 +volvo 0 +run 0 +press 0 +latest 0 +motor 0 +pool 0 +statu 0 +understand 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..e026418d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,12 @@ +dean 1 +tullsen 1 +home 1 +page 1 +biograph 0 +inform 0 +research 0 +interest 0 +bibliographi 0 +download 0 +resumemi 0 +hobbi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..d30f904b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,235 @@ +mike 1 +work 0 +softwar 0 +time 0 +oopsla 0 +washington 0 +year 0 +wrote 0 +univers 0 +start 0 +student 0 +smithsonian 0 +program 0 +thank 0 +pari 0 +uwin 0 +also 0 +present 0 +symposium 0 +michael 0 +harvard 0 +research 0 +comput 0 +engin 0 +seattl 0 +vanhilst 0 +come 0 +programm 0 +hardwar 0 +part 0 +locat 0 +learn 0 +bill 0 +continu 0 +group 0 +data 0 +angela 0 +languag 0 +summer 0 +schedul 0 +commun 0 +recent 0 +object 0 +particip 0 +citi 0 +enjoi 0 +novemb 0 +vanhilstmichael 0 +vanhilstvanhilst 0 +edumvh 0 +eduperson 0 +scienc 0 +depart 0 +usaclick 0 +send 0 +email 0 +messag 0 +personalmik 0 +graduat 0 +hopefulli 0 +littl 0 +luck 0 +finish 0 +around 0 +theend 0 +winter 0 +quarter 0 +immedi 0 +prior 0 +udub 0 +contractor 0 +atibm 0 +motif 0 +widget 0 +user 0 +unterfac 0 +sdata 0 +explor 0 +fix 0 +maintainingcomput 0 +astrophys 0 +observatori 0 +within 0 +debug 0 +could 0 +convinc 0 +realli 0 +correctli 0 +stai 0 +call 0 +saoimagewhich 0 +us 0 +lot 0 +astronom 0 +look 0 +imag 0 +saoimag 0 +gnudistribut 0 +would 0 +like 0 +wyatt 0 +eric 0 +mandel 0 +schwarz 0 +doug 0 +minkfor 0 +guid 0 +project 0 +mention 0 +countless 0 +other 0 +contribut 0 +took 0 +seismologistsin 0 +franc 0 +acquisit 0 +calibr 0 +analysi 0 +truli 0 +wonder 0 +wife 0 +french 0 +class 0 +theallianc 0 +francais 0 +nativ 0 +colombiain 0 +south 0 +america 0 +front 0 +studentsbrows 0 +base 0 +talent 0 +staff 0 +folk 0 +brought 0 +pine 0 +special 0 +shirei 0 +design 0 +traci 0 +stenvik 0 +screen 0 +librari 0 +machin 0 +taught 0 +begin 0 +extens 0 +motiv 0 +frommicrosoft 0 +product 0 +support 0 +sacrif 0 +paper 0 +theintern 0 +technolog 0 +advanc 0 +isota 0 +confer 0 +orient 0 +system 0 +applic 0 +theacm 0 +sigsoft 0 +foundat 0 +poster 0 +made 0 +subject 0 +workshop 0 +doctor 0 +demo 0 +uist 0 +steve 0 +earlier 0 +life 0 +earn 0 +degre 0 +inarchitectur 0 +wooden 0 +kind 0 +planningfrom 0 +mitand 0 +develop 0 +director 0 +forth 0 +grinnel 0 +iowa 0 +thing 0 +differ 0 +skill 0 +visualdesign 0 +problem 0 +solv 0 +valu 0 +still 0 +get 0 +talk 0 +chri 0 +alexand 0 +activ 0 +chapter 0 +associ 0 +improv 0 +ti 0 +larg 0 +small 0 +compani 0 +area 0 +hike 0 +cross 0 +countri 0 +ski 0 +sail 0 +andkayak 0 +swim 0 +lake 0 +bronson 0 +free 0 +taken 0 +marco 0 +harold 0 +sebastien 0 +hilst 0 +born 0 +post 0 +pictur 0 +soon 0 +anoth 0 +photo 0 +scanner 0 +visit 0 +sinc 0 +last 0 +modifi 0 +fridai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..49d808e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,103 @@ +home 1 +link 0 +collect 0 +hello 0 +washington 0 +vass 0 +also 0 +expand 0 +staff 0 +project 0 +data 0 +graph 0 +page 0 +vassilylong 0 +live 0 +start 0 +realli 0 +person 0 +best 0 +linki 0 +come 0 +thu 0 +http 0 +us 0 +document 0 +shortest 0 +write 0 +young 0 +quit 0 +download 0 +fewfil 0 +sinc 0 +thisstuff 0 +select 0 +quotesrussian 0 +pagesvari 0 +linksguid 0 +html 0 +formsoth 0 +pageuw 0 +pagencsa 0 +mosaic 0 +pagerec 0 +join 0 +cecilproject 0 +cecil 0 +cool 0 +pure 0 +object 0 +orient 0 +languag 0 +andvortex 0 +even 0 +cooler 0 +compil 0 +infrastructur 0 +befast 0 +mayb 0 +paper 0 +written 0 +member 0 +design 0 +implement 0 +themvi 0 +system 0 +assist 0 +access 0 +ourdepartment 0 +room 0 +visitor 0 +databas 0 +recent 0 +beenupgrad 0 +peopl 0 +qual 0 +thezpl 0 +languageto 0 +handl 0 +irregular 0 +structur 0 +repres 0 +anddynam 0 +repartit 0 +arrai 0 +myqual 0 +writeup 0 +short 0 +overview 0 +check 0 +theslidesfrom 0 +present 0 +slide 0 +toresourc 0 +relat 0 +eduobject 0 +mirror 0 +closer 0 +appear 0 +pastor 0 +vybrasyvalsya 0 +okna 0 +pyatyi 0 +deystvov 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..e1f9914e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,55 @@ +washington 1 +virgil 0 +comput 0 +bourassa 0 +engin 0 +scienc 0 +theunivers 0 +seattl 0 +work 0 +univers 0 +home 0 +page 0 +uwvirgil 0 +evan 0 +bourassavirgil 0 +student 0 +depart 0 +research 0 +interestsinclud 0 +oper 0 +system 0 +architectur 0 +join 0 +boeingin 0 +scientist 0 +scienceorgan 0 +inform 0 +support 0 +servic 0 +divis 0 +bellevu 0 +receiv 0 +electr 0 +arizonast 0 +temp 0 +arizona 0 +electricalengin 0 +accesswhat 0 +interest 0 +expertis 0 +resum 0 +patent 0 +invent 0 +public 0 +present 0 +profession 0 +histori 0 +educ 0 +achiev 0 +recommend 0 +letter 0 +statusoccasion 0 +updat 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..df6747cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,201 @@ +avail 1 +univers 0 +work 0 +comput 0 +washington 0 +interest 0 +distribut 0 +system 0 +high 0 +scienc 0 +also 0 +safeti 0 +year 0 +vivek 0 +page 0 +current 0 +bellcor 0 +applic 0 +group 0 +simul 0 +last 0 +seattl 0 +cricket 0 +ratan 0 +home 0 +graduat 0 +depart 0 +research 0 +issu 0 +develop 0 +look 0 +recent 0 +fault 0 +detail 0 +project 0 +softwar 0 +found 0 +involv 0 +rsml 0 +public 0 +angelo 0 +like 0 +plai 0 +game 0 +learn 0 +danc 0 +model 0 +unit 0 +nation 0 +chapter 0 +particular 0 +student 0 +academ 0 +leav 0 +scientistat 0 +morristown 0 +researchwork 0 +distributedsoftwar 0 +simpli 0 +highli 0 +continu 0 +presenc 0 +failur 0 +toolset 0 +anatida 0 +provid 0 +mechan 0 +adher 0 +corba 0 +standard 0 +activ 0 +replic 0 +scheme 0 +indc 0 +effort 0 +focuss 0 +enhanc 0 +toler 0 +cell 0 +directori 0 +servic 0 +foundher 0 +integrationof 0 +technolog 0 +server 0 +murphi 0 +methodolog 0 +head 0 +bydr 0 +nanci 0 +leveson 0 +much 0 +automat 0 +tree 0 +gener 0 +fromrequir 0 +specif 0 +written 0 +well 0 +languag 0 +list 0 +person 0 +inform 0 +born 0 +brought 0 +india 0 +eight 0 +undergradu 0 +attend 0 +state 0 +wesleyan 0 +middletown 0 +receiv 0 +physic 0 +math 0 +right 0 +came 0 +purus 0 +studi 0 +tenni 0 +whenev 0 +squash 0 +lesserext 0 +racquetbal 0 +suffic 0 +ardent 0 +folow 0 +particip 0 +ultra 0 +email 0 +mani 0 +follow 0 +exploit 0 +superson 0 +marin 0 +cowboi 0 +half 0 +taken 0 +keen 0 +ballroom 0 +waltz 0 +foxtrot 0 +chacha 0 +rhumba 0 +tango 0 +swing 0 +west 0 +coast 0 +pleas 0 +occasion 0 +dabbl 0 +mambo 0 +area 0 +best 0 +place 0 +center 0 +us 0 +band 0 +session 0 +everi 0 +saturdai 0 +intern 0 +educ 0 +organ 0 +confer 0 +held 0 +throughout 0 +topic 0 +restructur 0 +reform 0 +part 0 +secur 0 +council 0 +ecosoc 0 +world 0 +bank 0 +rapidpopul 0 +growth 0 +nuclear 0 +prolifer 0 +read 0 +poetri 0 +mirza 0 +ghalib 0 +centuryindian 0 +poet 0 +english 0 +literatur 0 +especi 0 +romant 0 +victorian 0 +period 0 +link 0 +obligatori 0 +collect 0 +sitesthat 0 +tend 0 +visit 0 +often 0 +engin 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..516364e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,30 @@ +washington 1 +geoff 0 +univers 0 +thesi 0 +window 0 +voelker 0 +skywhoi 0 +graduat 0 +student 0 +inseattl 0 +whati 0 +master 0 +wireless 0 +mobil 0 +comput 0 +design 0 +andbuild 0 +system 0 +call 0 +mobisa 0 +current 0 +avoid 0 +settl 0 +topic 0 +wherechateau 0 +guggenheim 0 +annex 0 +washingtonseattl 0 +look 0 +emac 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..5f47cb8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,55 @@ +washington 1 +list 1 +home 0 +waynew 0 +look 0 +system 0 +thing 0 +wayn 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +stuff 0 +current 0 +memori 0 +perform 0 +particular 0 +differentmemori 0 +organ 0 +investig 0 +work 0 +beingdon 0 +jean 0 +loup 0 +baer 0 +also 0 +interpret 0 +other 0 +denni 0 +geoff 0 +alec 0 +rightnow 0 +littl 0 +rocki 0 +actual 0 +earli 0 +version 0 +paper 0 +keep 0 +jump 0 +alpha 0 +interest 0 +place 0 +fish 0 +cool 0 +site 0 +howev 0 +peoplewho 0 +peopl 0 +well 0 +test 0 +testwayn 0 +wong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..4fc209dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,13 @@ +chan 1 +home 1 +time 1 +william 0 +pagewilliam 0 +pagei 0 +spend 0 +hell 0 +spare 0 +hang 0 +heaven 0 +wchan 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..1ac2357f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,140 @@ +weld 1 +comput 0 +plan 0 +scienc 0 +univers 0 +intellig 0 +offic 0 +research 0 +aaai 0 +agent 0 +daniel 0 +receiv 0 +artifici 0 +award 0 +inform 0 +paper 0 +washington 0 +interest 0 +current 0 +softwar 0 +base 0 +associ 0 +professor 0 +engineeringat 0 +ofwashington 0 +bachelor 0 +degre 0 +biochemistri 0 +yale 0 +land 0 +presidenti 0 +young 0 +investig 0 +naval 0 +younginvestig 0 +theadvisori 0 +board 0 +journal 0 +airesearch 0 +guest 0 +editor 0 +edit 0 +report 0 +role 0 +ofintellig 0 +system 0 +nation 0 +infrastructur 0 +isco 0 +program 0 +chair 0 +publish 0 +book 0 +scad 0 +technic 0 +person 0 +data 0 +reach 0 +sieg 0 +hall 0 +phone 0 +work 0 +home 0 +mail 0 +dept 0 +engin 0 +seattlewa 0 +specif 0 +exampl 0 +group 0 +support 0 +ucpop 0 +planner 0 +us 0 +almost 0 +hundr 0 +sitesworldwid 0 +mani 0 +avail 0 +electron 0 +arehi 0 +favorit 0 +repres 0 +sens 0 +action 0 +middl 0 +ground 0 +revisit 0 +gather 0 +august 0 +control 0 +aip 0 +ascal 0 +comparison 0 +shop 0 +world 0 +wide 0 +januari 0 +softbot 0 +interfac 0 +internet 0 +cacm 0 +juli 0 +anintroduct 0 +least 0 +commit 0 +magazin 0 +winter 0 +select 0 +exhaustivelist 0 +recreat 0 +absent 0 +foundat 0 +cafe 0 +allegro 0 +stormymountain 0 +climb 0 +past 0 +enjoi 0 +travel 0 +theworld 0 +like 0 +found 0 +plai 0 +twin 0 +boi 0 +adam 0 +galen 0 +invit 0 +visit 0 +galleri 0 +pacif 0 +northwest 0 +desert 0 +wilder 0 +photograph 0 +also 0 +illustr 0 +stori 0 +morocco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..49b8a000 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,10 @@ +wendi 1 +belluomini 0 +washington 0 +graduat 0 +master 0 +current 0 +work 0 +univ 0 +utah 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..382b97b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,55 @@ +comput 1 +scienc 0 +engin 0 +washington 0 +thedepart 0 +compil 0 +system 0 +wilson 0 +hsiehwilson 0 +hsiehi 0 +postdoc 0 +theunivers 0 +inseattl 0 +member 0 +thespin 0 +project 0 +receiv 0 +electr 0 +sciencein 0 +theschool 0 +engineeringatmit 0 +work 0 +thelaboratori 0 +advisor 0 +werefran 0 +kaashoekandbil 0 +weihl 0 +research 0 +parallel 0 +myresearch 0 +interest 0 +interact 0 +among 0 +programminglanguag 0 +runtim 0 +oper 0 +architectur 0 +select 0 +publicationsselect 0 +linksperson 0 +interestswilson 0 +hsieh 0 +depart 0 +univers 0 +seattl 0 +offic 0 +sieg 0 +move 0 +phone 0 +numberha 0 +chang 0 +voic 0 +whsieh 0 +public 0 +keyoctob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..56d8f55e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,83 @@ +wolman 1 +washington 0 +network 0 +alec 0 +home 0 +scienc 0 +seattl 0 +current 0 +research 0 +perform 0 +interpret 0 +usenix 0 +voelker 0 +page 0 +wolmanwolman 0 +eduworkcomput 0 +engin 0 +departmentunivers 0 +washingtonbox 0 +graduat 0 +student 0 +thecomput 0 +departmentat 0 +univers 0 +offic 0 +isroom 0 +thechateau 0 +gradual 0 +school 0 +work 0 +fordigit 0 +equip 0 +corp 0 +cambridg 0 +interest 0 +includ 0 +oper 0 +system 0 +architectur 0 +recent 0 +project 0 +scalabl 0 +etch 0 +binari 0 +instrument 0 +optim 0 +executablesrocki 0 +performanceon 0 +line 0 +paper 0 +firewal 0 +applic 0 +relai 0 +trees 0 +summer 0 +latenc 0 +analysi 0 +thekkath 0 +winter 0 +structur 0 +romer 0 +wong 0 +baer 0 +bershad 0 +levi 0 +appear 0 +asplo 0 +hungri 0 +otter 0 +fixha 0 +strang 0 +idea 0 +nervou 0 +habit 0 +realli 0 +plai 0 +guitar 0 +link 0 +hallwolman 0 +diseasewolman 0 +pressur 0 +treat 0 +lumber 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..057be808 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,85 @@ +baer 1 +perform 1 +parallel 0 +simul 0 +comput 0 +engin 0 +page 0 +washington 0 +evalu 0 +trace 0 +driven 0 +xiaohan 0 +scienc 0 +interest 0 +architectur 0 +distribut 0 +system 0 +paper 0 +confer 0 +multiprocessor 0 +proceed 0 +optimist 0 +zhang 0 +xqin 0 +depart 0 +univers 0 +seattl 0 +offic 0 +sieg 0 +phone 0 +year 0 +graduat 0 +student 0 +work 0 +jean 0 +loup 0 +research 0 +includ 0 +method 0 +model 0 +short 0 +term 0 +goal 0 +school 0 +soon 0 +possibl 0 +cluster 0 +base 0 +submit 0 +explicit 0 +communicationprimit 0 +cach 0 +coher 0 +appear 0 +hpca 0 +compar 0 +studi 0 +conserv 0 +award 0 +symposium 0 +tech 0 +report 0 +dept 0 +univ 0 +implement 0 +intern 0 +process 0 +graph 0 +toolfor 0 +monitor 0 +visual 0 +basedmultiprocessor 0 +nalluri 0 +journal 0 +june 0 +predict 0 +processingon 0 +numa 0 +ieee 0 +tran 0 +softwar 0 +stuff 0 +photo 0 +chinaread 0 +chinesesearch 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..c339cd46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,71 @@ +washington 1 +info 1 +link 1 +yasushi 0 +project 0 +desktop 0 +index 0 +spin 0 +saitoyasushi 0 +saito 0 +second 0 +year 0 +graduat 0 +student 0 +atdepart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +current 0 +workingwith 0 +brian 0 +bershad 0 +thespin 0 +address 0 +andperson 0 +meta 0 +metacrawl 0 +yahoo 0 +refer 0 +alta 0 +vista 0 +lyco 0 +archi 0 +tech 0 +research 0 +intern 0 +document 0 +modula 0 +time 0 +schedul 0 +survei 0 +oper 0 +system 0 +transact 0 +servic 0 +qual 0 +sightse 0 +japanes 0 +random 0 +javascript 0 +apprentic 0 +page 0 +us 0 +linux 0 +connect 0 +gatewai 0 +japan 0 +perl 0 +patch 0 +touch 0 +type 0 +trainer 0 +dvorak 0 +lesson 0 +text 0 +want 0 +finger 0 +talk 0 +trycanva 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..2dbd6be6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,71 @@ +zamir 1 +home 0 +washington 0 +depart 0 +seattl 0 +interest 0 +internet 0 +document 0 +search 0 +pictur 0 +oren 0 +pageoren 0 +page 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +offic 0 +chateau 0 +edui 0 +isra 0 +graduat 0 +student 0 +computersci 0 +engin 0 +univers 0 +myundergradu 0 +degre 0 +physic 0 +mathemat 0 +hebrewunivers 0 +jerusalem 0 +israel 0 +field 0 +artifici 0 +intellig 0 +softwareengin 0 +current 0 +work 0 +line 0 +cluster 0 +algorithmsfor 0 +retriev 0 +basic 0 +idea 0 +help 0 +userwith 0 +result 0 +hundr 0 +worki 0 +done 0 +part 0 +metacrawl 0 +parallel 0 +servic 0 +along 0 +orenetzioni 0 +erik 0 +selberg 0 +resum 0 +avail 0 +thing 0 +like 0 +dive 0 +sinai 0 +jeeptour 0 +ski 0 +last 0 +raft 0 +trip 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..fb1c680f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,126 @@ +comput 1 +architectur 0 +also 0 +baer 0 +scienc 0 +research 0 +parallel 0 +system 0 +ieee 0 +chairman 0 +cach 0 +professor 0 +engin 0 +grenobl 0 +ucla 0 +author 0 +serv 0 +fellow 0 +journal 0 +distribut 0 +internationalsymposium 0 +project 0 +page 0 +protocol 0 +cluster 0 +prefetch 0 +simul 0 +titl 0 +jean 0 +loup 0 +professorand 0 +adjunct 0 +electr 0 +receiv 0 +diplom 0 +ingnieur 0 +electricalengin 0 +doctorat 0 +cycl 0 +theuniversit 0 +franc 0 +prior 0 +join 0 +univers 0 +washington 0 +laboratoir 0 +decalcul 0 +universit 0 +member 0 +digit 0 +technologygroup 0 +present 0 +interest 0 +anddistribut 0 +process 0 +coauthor 0 +paper 0 +thesearea 0 +textbook 0 +press 0 +distinguishedvisitor 0 +nation 0 +lectur 0 +guggenheim 0 +editor 0 +languag 0 +asprogram 0 +intern 0 +confer 0 +parallelprocess 0 +program 0 +gener 0 +ofth 0 +current 0 +chair 0 +sigarch 0 +eighteen 0 +student 0 +complet 0 +dissert 0 +professorba 0 +direct 0 +twelv 0 +work 0 +industri 0 +laboratoriesand 0 +inacademia 0 +although 0 +year 0 +hashad 0 +difficulti 0 +retain 0 +french 0 +accent 0 +cours 0 +recent 0 +look 0 +involv 0 +coher 0 +improv 0 +singl 0 +perform 0 +softwar 0 +primit 0 +appear 0 +hpca 0 +uniprocessor 0 +hardwar 0 +comparisonwith 0 +block 0 +asplo 0 +multiprocessor 0 +isca 0 +impact 0 +specul 0 +execut 0 +denni 0 +home 0 +andisca 0 +trace 0 +driven 0 +conserv 0 +approach 0 +icpp 0 +optimisticapproach 0 +comparison 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..f4f8fb9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,118 @@ +chamber 1 +craig 0 +comput 0 +scienc 0 +system 0 +object 0 +orient 0 +languag 0 +incorpor 0 +investig 0 +vortex 0 +project 0 +cecil 0 +static 0 +optim 0 +member 0 +spin 0 +extens 0 +applic 0 +washington 0 +assist 0 +professor 0 +join 0 +faculti 0 +receiv 0 +degre 0 +stanford 0 +research 0 +interest 0 +design 0 +implementationof 0 +advanc 0 +program 0 +express 0 +programminglanguag 0 +effici 0 +implement 0 +support 0 +programmingenviron 0 +current 0 +languagesand 0 +lead 0 +ceciland 0 +pure 0 +languageserv 0 +vehicl 0 +multi 0 +method 0 +type 0 +modul 0 +featur 0 +compilersystem 0 +intra 0 +andinterprocedur 0 +analys 0 +profil 0 +guid 0 +withfront 0 +end 0 +modula 0 +java 0 +previous 0 +chamberswa 0 +self 0 +also 0 +spinproject 0 +oper 0 +microkernel 0 +whichsupport 0 +dynam 0 +adapt 0 +interfac 0 +implementationsund 0 +direct 0 +control 0 +still 0 +maintain 0 +systemintegr 0 +isol 0 +util 0 +dialect 0 +themodula 0 +pointer 0 +safe 0 +kernel 0 +spinalso 0 +reli 0 +dynamiccompil 0 +achiev 0 +high 0 +perform 0 +despit 0 +fine 0 +grainedextens 0 +click 0 +herefor 0 +inform 0 +undergradu 0 +graduat 0 +level 0 +researchproject 0 +area 0 +contact 0 +informationprof 0 +chambersdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +mail 0 +requir 0 +street 0 +address 0 +sieg 0 +hall 0 +room 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..7adba021 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,116 @@ +carl 1 +ebel 1 +comput 1 +project 1 +scienc 0 +washington 0 +triptych 0 +interest 0 +system 0 +chao 0 +fpga 0 +home 0 +page 0 +ebelingdepart 0 +engin 0 +univers 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +associ 0 +professor 0 +physic 0 +wheatoncolleg 0 +southern 0 +illinoisunivers 0 +carnegi 0 +mellonunivers 0 +join 0 +research 0 +fall 0 +categori 0 +vlsiarchitectur 0 +aid 0 +design 0 +digit 0 +haswork 0 +number 0 +vlsi 0 +includ 0 +hitech 0 +chessmachin 0 +apex 0 +graphic 0 +chip 0 +draw 0 +spline 0 +curv 0 +andsurfac 0 +field 0 +programm 0 +gate 0 +arrai 0 +current 0 +hei 0 +involv 0 +build 0 +multicomput 0 +routingnetwork 0 +focu 0 +method 0 +optim 0 +theperform 0 +circuit 0 +us 0 +level 0 +sensit 0 +latch 0 +placementand 0 +rout 0 +algorithm 0 +particularli 0 +teachingspr 0 +advanc 0 +logic 0 +designoffic 0 +hour 0 +mondai 0 +thursdai 0 +travel 0 +april 0 +fccm 0 +napamai 0 +burlington 0 +chicagojun 0 +vegasresearch 0 +northwest 0 +laboratori 0 +integr 0 +router 0 +high 0 +densiti 0 +architectur 0 +public 0 +journal 0 +articl 0 +confer 0 +workshop 0 +paper 0 +graduat 0 +student 0 +soha 0 +hassoun 0 +neil 0 +mckenzi 0 +darren 0 +cronquist 0 +paul 0 +franklin 0 +amara 0 +galleryelan 0 +galleryebel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..059c222b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,39 @@ +page 1 +seattl 0 +link 0 +uncertainti 0 +inform 0 +schedul 0 +steve 0 +hanksunivers 0 +washingtondepart 0 +comput 0 +scienc 0 +design 0 +agent 0 +architecturesai 0 +magazin 0 +seriou 0 +home 0 +spring 0 +confer 0 +group 0 +restaur 0 +symphoni 0 +wine 0 +opera 0 +server 0 +edita 0 +gruberova 0 +photo 0 +carlo 0 +maria 0 +giulini 0 +discographi 0 +sumac 0 +tenni 0 +new 0 +hank 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..c04b2d85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,71 @@ +spent 1 +professor 0 +engin 0 +degre 0 +univers 0 +year 0 +divis 0 +comput 0 +group 0 +system 0 +titl 0 +alistair 0 +holden 0 +electr 0 +origin 0 +highland 0 +scotland 0 +receivedhi 0 +glasgow 0 +withth 0 +british 0 +broadcast 0 +corpor 0 +graduateapprentic 0 +research 0 +yale 0 +edison 0 +fellowship 0 +phddegre 0 +washington 0 +dissert 0 +learningin 0 +artifici 0 +intellig 0 +interest 0 +began 0 +take 0 +coursefrom 0 +colin 0 +cherri 0 +imperi 0 +colleg 0 +london 0 +thebbc 0 +initi 0 +scienc 0 +program 0 +theuw 0 +time 0 +faculti 0 +mostli 0 +math 0 +departmentsform 0 +within 0 +graduat 0 +school 0 +current 0 +work 0 +applic 0 +knowledg 0 +base 0 +verif 0 +expert 0 +integr 0 +symbol 0 +neural 0 +netmethodolog 0 +speech 0 +understand 0 +aid 0 +design 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..9956624e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,116 @@ +research 1 +societi 1 +scienc 0 +prize 0 +berkelei 0 +univers 0 +institut 0 +mathemat 0 +lectur 0 +comput 0 +problem 0 +professor 0 +nation 0 +academi 0 +american 0 +board 0 +karp 0 +washington 0 +award 0 +member 0 +advanc 0 +oper 0 +america 0 +manag 0 +program 0 +john 0 +neumann 0 +presentmemb 0 +complex 0 +random 0 +algorithm 0 +travel 0 +salesman 0 +dick 0 +karprichard 0 +karpprofessor 0 +ofcomputersci 0 +engin 0 +andadjunct 0 +ofmolecularbiotechnologyunivers 0 +ofwashington 0 +eduaward 0 +membershipsn 0 +medal 0 +babbag 0 +fellow 0 +ture 0 +sciencesmemb 0 +engineeringfellow 0 +art 0 +sciencesfellow 0 +associ 0 +sciencedistinguish 0 +teach 0 +academ 0 +senat 0 +class 0 +chair 0 +berkeleylanchest 0 +fulkerson 0 +theori 0 +faculti 0 +hermann 0 +weyl 0 +studi 0 +industri 0 +appliedmathemat 0 +miller 0 +honorari 0 +doctor 0 +georgetown 0 +massachusett 0 +technion 0 +pennsylvania 0 +advisori 0 +profession 0 +forsoci 0 +respons 0 +governor 0 +weizmann 0 +truste 0 +intern 0 +scienceinstitut 0 +presentselect 0 +public 0 +combinator 0 +turingaward 0 +commun 0 +construct 0 +perfect 0 +match 0 +upfal 0 +wigderson 0 +combinatorica 0 +probabilist 0 +analysi 0 +partit 0 +plane 0 +ofoper 0 +theoret 0 +improv 0 +effici 0 +fornetwork 0 +flow 0 +edmond 0 +journal 0 +theacm 0 +reduc 0 +among 0 +combinatori 0 +plenum 0 +press 0 +minimum 0 +spanningtre 0 +part 0 +held 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..b0914170 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,250 @@ +comput 1 +lazowska 0 +univers 0 +scienc 0 +committe 0 +washington 0 +chair 0 +inform 0 +board 0 +technolog 0 +member 0 +research 0 +program 0 +thedepart 0 +advisori 0 +system 0 +home 0 +lectur 0 +universityof 0 +berkelei 0 +faculti 0 +serv 0 +nation 0 +review 0 +perform 0 +concern 0 +engin 0 +ahalf 0 +centuri 0 +exponentialprogress 0 +annualfaculti 0 +vicepresid 0 +gore 0 +speech 0 +eniac 0 +thanniversari 0 +celebr 0 +california 0 +invent 0 +chines 0 +cook 0 +nathanmyhrvold 0 +joinsedlazowska 0 +theuwcs 0 +trip 0 +memori 0 +graduat 0 +student 0 +seem 0 +director 0 +depart 0 +industri 0 +council 0 +person 0 +award 0 +select 0 +complet 0 +recent 0 +machineri 0 +testimonyto 0 +page 0 +grade 0 +congress 0 +talk 0 +like 0 +georgejetson 0 +support 0 +polici 0 +appropri 0 +forfr 0 +flintston 0 +lane 0 +professor 0 +mostlywearsti 0 +push 0 +paper 0 +rack 0 +frequent 0 +flier 0 +mile 0 +pick 0 +onthi 0 +mbquicktim 0 +healso 0 +host 0 +lot 0 +visitor 0 +surpris 0 +number 0 +havefunnynos 0 +researchassoci 0 +includ 0 +essenti 0 +allgradu 0 +laboratoriesin 0 +field 0 +ofcra 0 +govern 0 +affair 0 +foundat 0 +advisorycommitte 0 +andengin 0 +scomputersci 0 +telecommun 0 +technic 0 +formicrosoft 0 +personnationalsemiconductor 0 +academicadvisori 0 +ofdata 0 +corpor 0 +scientif 0 +forcabl 0 +hows 0 +ventur 0 +cascadia 0 +fund 0 +belong 0 +stand 0 +committeesfor 0 +eecsat 0 +andth 0 +atstanford 0 +virginia 0 +hongkong 0 +ture 0 +servic 0 +councilpanel 0 +multi 0 +agencyhigh 0 +computingand 0 +commun 0 +brook 0 +sutherland 0 +andha 0 +examinersfor 0 +record 0 +examin 0 +test 0 +sigmetr 0 +associ 0 +sspecial 0 +interest 0 +group 0 +softwar 0 +chairof 0 +symposium 0 +oper 0 +principl 0 +andeditor 0 +ieee 0 +transact 0 +addit 0 +servinga 0 +onacadem 0 +thecommitte 0 +deanship 0 +colleg 0 +artsand 0 +forth 0 +molecular 0 +biotechnolog 0 +amemb 0 +deanof 0 +deliv 0 +theunivers 0 +annual 0 +fellowof 0 +associationfor 0 +theinstitut 0 +electr 0 +andelectron 0 +seventeenph 0 +studentshav 0 +degre 0 +work 0 +miscellan 0 +link 0 +integratedoverview 0 +region 0 +also 0 +apersuas 0 +player 0 +version 0 +intend 0 +forloc 0 +consumpt 0 +theimpact 0 +perspect 0 +uwcs 0 +profession 0 +master 0 +persuas 0 +playertopten 0 +reason 0 +major 0 +csebuild 0 +project 0 +abbrevi 0 +cvcomputingresearch 0 +drive 0 +informationtechnolog 0 +forwardmassi 0 +goldmanreport 0 +alleg 0 +cseph 0 +product 0 +issu 0 +flaw 0 +data 0 +medianyear 0 +confer 0 +boardstudi 0 +doctor 0 +think 0 +driver 0 +highwai 0 +saturdayseminar 0 +novemb 0 +houseappropri 0 +april 0 +hous 0 +hpcc 0 +octob 0 +februari 0 +interestinghom 0 +sometim 0 +demo 0 +purpos 0 +odeto 0 +execut 0 +vice 0 +presid 0 +tallman 0 +trask 0 +departsfor 0 +duke 0 +lanelazowska 0 +down 0 +famili 0 +pagedirect 0 +houseshilshol 0 +aquat 0 +club 0 +pagerec 0 +discoveredreview 0 +poetryfing 0 +scheduleinform 0 +offic 0 +reflector 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..56adcb82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,94 @@ +professor 1 +univers 0 +system 0 +shaw 0 +engin 0 +stanford 0 +scienc 0 +comput 0 +research 0 +fulbright 0 +associ 0 +real 0 +time 0 +softwar 0 +book 0 +committe 0 +dissert 0 +includ 0 +half 0 +hobbi 0 +titl 0 +alan 0 +graduat 0 +bachelor 0 +sdegre 0 +physic 0 +toronto 0 +amast 0 +mathemat 0 +incomput 0 +addit 0 +facultyappoint 0 +washington 0 +start 0 +hasbeen 0 +assist 0 +cornel 0 +visit 0 +scholar 0 +pari 0 +guest 0 +informat 0 +zurich 0 +atth 0 +linear 0 +acceler 0 +center 0 +theibm 0 +corpor 0 +current 0 +interest 0 +specif 0 +method 0 +publicationsinclud 0 +textbook 0 +oper 0 +softwareengin 0 +introductori 0 +text 0 +andan 0 +edit 0 +document 0 +prepar 0 +serv 0 +memberof 0 +editori 0 +member 0 +sciencescreen 0 +award 0 +associateeditor 0 +journal 0 +editor 0 +ieee 0 +transact 0 +among 0 +thing 0 +supervis 0 +mani 0 +these 0 +project 0 +fifteen 0 +distinguish 0 +former 0 +student 0 +academ 0 +posit 0 +work 0 +live 0 +good 0 +food 0 +trumpet 0 +hike 0 +bike 0 +tenni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..39be5b68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,97 @@ +parallel 1 +snyder 0 +professor 0 +univers 0 +serv 0 +research 0 +program 0 +chip 0 +editor 0 +system 0 +scienc 0 +receiv 0 +visit 0 +faculti 0 +architectur 0 +complet 0 +project 0 +journal 0 +nation 0 +committe 0 +comput 0 +numer 0 +direct 0 +chair 0 +titl 0 +lawrenc 0 +bachelor 0 +sdegre 0 +iowa 0 +mathemat 0 +econom 0 +andin 0 +carnegi 0 +mellon 0 +computersci 0 +scholar 0 +washington 0 +join 0 +perman 0 +onth 0 +yale 0 +purdu 0 +scholarat 0 +harvard 0 +rang 0 +proof 0 +theundecid 0 +properti 0 +design 0 +developmentof 0 +singl 0 +cmo 0 +microprocessor 0 +quarter 0 +hors 0 +hecreat 0 +configur 0 +highli 0 +thepok 0 +environ 0 +inventor 0 +chaoticrout 0 +follow 0 +blue 0 +nowprincip 0 +investig 0 +orca 0 +nwli 0 +associ 0 +computerand 0 +ofth 0 +area 0 +ieee 0 +transact 0 +anddistribut 0 +foundationadvisori 0 +divis 0 +particip 0 +advisorycommitte 0 +futur 0 +andcomput 0 +polici 0 +distinguish 0 +doctoraldissert 0 +award 0 +select 0 +first 0 +symposium 0 +algorithmsand 0 +addit 0 +dozen 0 +student 0 +doctor 0 +degreesund 0 +guid 0 +master 0 +seniorproject 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..6d2f881c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,81 @@ +system 1 +comput 0 +somani 0 +professor 0 +design 0 +network 0 +parallel 0 +arun 0 +electr 0 +engin 0 +develop 0 +indian 0 +toler 0 +interconnect 0 +architectur 0 +proteu 0 +tenni 0 +earn 0 +msee 0 +degre 0 +mcgill 0 +univers 0 +montreal 0 +canada 0 +respect 0 +prior 0 +work 0 +scientif 0 +offic 0 +govt 0 +india 0 +delhi 0 +period 0 +anti 0 +submarin 0 +warfar 0 +navi 0 +research 0 +interest 0 +area 0 +offault 0 +algorithm 0 +current 0 +involv 0 +three 0 +major 0 +project 0 +high 0 +integr 0 +address 0 +issu 0 +relat 0 +tocach 0 +memori 0 +redund 0 +evalu 0 +tool 0 +congest 0 +control 0 +fault 0 +broadband 0 +multiprocessor 0 +autom 0 +classif 0 +object 0 +base 0 +generalizedenhanc 0 +hypercub 0 +reconfigur 0 +explor 0 +coars 0 +grain 0 +like 0 +cook 0 +food 0 +hike 0 +plai 0 +bridg 0 +tabl 0 +inform 0 +dpcnl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..0857ca1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,173 @@ +comput 1 +serv 0 +tanimoto 0 +univers 0 +chair 0 +pattern 0 +visit 0 +confer 0 +imag 0 +visual 0 +ieee 0 +professor 0 +franc 0 +research 0 +analysi 0 +program 0 +intellig 0 +process 0 +languag 0 +recognit 0 +steven 0 +home 0 +page 0 +scienc 0 +adjunct 0 +electricalengin 0 +anda 0 +linkp 0 +sweden 0 +scientist 0 +machin 0 +june 0 +addit 0 +parallel 0 +artifici 0 +mathemat 0 +edit 0 +vision 0 +lisp 0 +current 0 +societi 0 +gener 0 +conferenceon 0 +committe 0 +journal 0 +colleg 0 +engin 0 +receiv 0 +degre 0 +fromharvard 0 +princeton 0 +join 0 +theunivers 0 +washington 0 +faculti 0 +year 0 +teach 0 +atth 0 +connecticut 0 +professorat 0 +institut 0 +programm 0 +pari 0 +scholar 0 +sinc 0 +hasalso 0 +member 0 +depart 0 +atkob 0 +japan 0 +think 0 +corpor 0 +cambridg 0 +massachusett 0 +theinstitut 0 +enseign 0 +superieur 0 +techniquesd 0 +electroniqu 0 +irest 0 +nant 0 +assist 0 +variou 0 +hasrec 0 +forimag 0 +devot 0 +processingand 0 +commun 0 +take 0 +place 0 +bordeaux 0 +interest 0 +includ 0 +ofimag 0 +particularli 0 +us 0 +processor 0 +educ 0 +technolog 0 +currentlydirect 0 +sponsor 0 +project 0 +experi 0 +throughimag 0 +whose 0 +object 0 +develop 0 +person 0 +softwarethat 0 +motiv 0 +grade 0 +student 0 +studi 0 +written 0 +coauthor 0 +paper 0 +thebook 0 +structur 0 +author 0 +textbook 0 +entitl 0 +element 0 +introductionus 0 +publish 0 +common 0 +accompanyingsoftwar 0 +work 0 +book 0 +subject 0 +ofparallel 0 +organ 0 +internationalworkshop 0 +held 0 +seattl 0 +serveda 0 +meet 0 +bergen 0 +norwai 0 +also 0 +intern 0 +subconfer 0 +programcommitte 0 +numer 0 +patternrecognit 0 +chairman 0 +societyworkshop 0 +architectur 0 +machineintellig 0 +steer 0 +theieee 0 +symposiaon 0 +editorialboard 0 +cvgip 0 +understand 0 +editor 0 +chief 0 +ieeetransact 0 +relat 0 +activ 0 +engineeringeduc 0 +polici 0 +vice 0 +council 0 +elect 0 +fellow 0 +outsid 0 +steve 0 +enjoi 0 +plai 0 +jazz 0 +andclass 0 +piano 0 +music 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..2ee7b0dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,133 @@ +comput 1 +scienc 0 +serv 0 +univers 0 +theori 0 +committe 0 +young 0 +foundat 0 +chairman 0 +professor 0 +colleg 0 +faculti 0 +also 0 +research 0 +program 0 +depart 0 +nation 0 +inform 0 +berkelei 0 +associ 0 +theoret 0 +complex 0 +algorithm 0 +special 0 +societi 0 +editori 0 +board 0 +current 0 +journal 0 +titl 0 +paul 0 +graduateof 0 +antioch 0 +receiv 0 +hejoin 0 +washington 0 +seventeen 0 +year 0 +atpurdu 0 +earli 0 +member 0 +inperhap 0 +first 0 +unit 0 +state 0 +postdoctor 0 +fellow 0 +atstanford 0 +reed 0 +briefli 0 +aschairman 0 +theunivers 0 +mexico 0 +twice 0 +taught 0 +visit 0 +professorin 0 +divis 0 +california 0 +becam 0 +dean 0 +facil 0 +engin 0 +interest 0 +emphasi 0 +question 0 +thegener 0 +connect 0 +mathematicallog 0 +author 0 +coauthor 0 +paper 0 +area 0 +iscoauthor 0 +graduat 0 +textbook 0 +gener 0 +three 0 +time 0 +symposiumon 0 +executivecommitte 0 +nomin 0 +interestgroup 0 +sigact 0 +chairmanof 0 +ieee 0 +annualsymposium 0 +foc 0 +hasserv 0 +vice 0 +stechnic 0 +mathemat 0 +chair 0 +programcommitte 0 +structur 0 +confer 0 +advisorysubcommitte 0 +thiscommitte 0 +ofth 0 +issu 0 +control 0 +annal 0 +histori 0 +ofcomput 0 +notr 0 +dame 0 +formallog 0 +system 0 +eleven 0 +student 0 +complet 0 +doctor 0 +dissert 0 +underprofessor 0 +direct 0 +sever 0 +gone 0 +dopostdoctor 0 +work 0 +cornel 0 +ofcalifornia 0 +eight 0 +hold 0 +posit 0 +avarieti 0 +chosen 0 +industri 0 +employ 0 +leather 0 +motorcycl 0 +jacket 0 +read 0 +ratherthan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..cc2bd7ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,69 @@ +comput 1 +support 0 +applic 0 +zahorjan 0 +univers 0 +receiv 0 +research 0 +schedul 0 +runtim 0 +parallel 0 +polici 0 +system 0 +program 0 +titl 0 +john 0 +professor 0 +graduat 0 +frombrown 0 +oftoronto 0 +presidenti 0 +young 0 +investigatoraward 0 +primari 0 +interest 0 +area 0 +parallelsystem 0 +supportfor 0 +mobil 0 +current 0 +focu 0 +continu 0 +mediaappl 0 +involv 0 +real 0 +time 0 +audio 0 +video 0 +thegoal 0 +provid 0 +interfac 0 +allow 0 +torespond 0 +easili 0 +chang 0 +load 0 +activ 0 +topic 0 +includ 0 +techniqu 0 +parallelizationof 0 +code 0 +written 0 +sequenti 0 +languag 0 +exhibit 0 +bothcontrol 0 +data 0 +construct 0 +develop 0 +intend 0 +formobil 0 +platform 0 +editori 0 +board 0 +ieee 0 +transactionson 0 +softwar 0 +engin 0 +survei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..b42cfc14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,175 @@ +imag 1 +project 0 +student 0 +metip 0 +learn 0 +process 0 +program 0 +mathemat 0 +us 0 +activ 0 +current 0 +experi 0 +comput 0 +click 0 +order 0 +free 0 +copi 0 +todai 0 +softwar 0 +tanimoto 0 +scienc 0 +encourag 0 +discuss 0 +digit 0 +develop 0 +allow 0 +materi 0 +part 0 +number 0 +pixel 0 +calcul 0 +transform 0 +applic 0 +list 0 +peopl 0 +work 0 +relat 0 +educ 0 +pleas 0 +link 0 +xform 0 +washington 0 +director 0 +steven 0 +depart 0 +emphas 0 +practic 0 +group 0 +exploratori 0 +open 0 +end 0 +goal 0 +help 0 +meet 0 +theseobject 0 +particular 0 +seri 0 +applicationsdesign 0 +manipul 0 +choic 0 +intend 0 +enrich 0 +rather 0 +astandard 0 +classroom 0 +curriculum 0 +teacher 0 +plai 0 +variou 0 +role 0 +withthes 0 +exampl 0 +catalyz 0 +bylead 0 +theconcept 0 +explor 0 +toexplor 0 +warper 0 +programm 0 +primarili 0 +pentium 0 +base 0 +srun 0 +microsoft 0 +window 0 +alsoavail 0 +appl 0 +macintosh 0 +volunt 0 +mathematicsteach 0 +particip 0 +test 0 +experiment 0 +transcriptproject 0 +design 0 +record 0 +keep 0 +framework 0 +willfacilit 0 +storag 0 +person 0 +academicinform 0 +hard 0 +disk 0 +floppi 0 +own 0 +themetip 0 +close 0 +involv 0 +studi 0 +ofmultiplay 0 +tointegr 0 +idea 0 +describ 0 +prospect 0 +forth 0 +direct 0 +distribut 0 +databas 0 +collect 0 +user 0 +itsxform 0 +done 0 +somethingfun 0 +know 0 +put 0 +version 0 +document 0 +onlin 0 +littl 0 +demonstr 0 +beenput 0 +togeth 0 +graduat 0 +took 0 +seminar 0 +winter 0 +environ 0 +integr 0 +witha 0 +subset 0 +common 0 +lisp 0 +offer 0 +technic 0 +essenti 0 +newapproach 0 +teach 0 +fundamentalattract 0 +approach 0 +thecomput 0 +pursuit 0 +creat 0 +neat 0 +visual 0 +effect 0 +digitalimag 0 +portrai 0 +thing 0 +interest 0 +successfulli 0 +instal 0 +would 0 +like 0 +discussteach 0 +contact 0 +support 0 +nation 0 +foundat 0 +undergr 0 +bricker 0 +last 0 +modifi 0 +tuesdai 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..34669716 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,53 @@ +mobil 1 +comput 0 +univers 0 +washington 0 +system 0 +infrastructur 0 +paper 0 +research 0 +ubiquit 0 +washingtonher 0 +overview 0 +computingresearch 0 +project 0 +mobisa 0 +inform 0 +wireless 0 +environ 0 +handheld 0 +task 0 +graph 0 +manag 0 +applic 0 +cope 0 +resourc 0 +variabl 0 +survei 0 +describ 0 +fundament 0 +challeng 0 +field 0 +program 0 +methodolog 0 +disconnect 0 +operationdistribut 0 +transact 0 +mobilecomput 0 +systemcontact 0 +prof 0 +brian 0 +bershadprof 0 +gaetano 0 +borriellomarc 0 +fiuczynskigeorg 0 +formanprof 0 +hank 0 +levygeoff 0 +voelkerterri 0 +watsonprof 0 +john 0 +zahorjan 0 +last 0 +updat 0 +forman 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..b723377e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,138 @@ +cecil 1 +project 1 +vortex 0 +languag 0 +object 0 +orient 0 +research 0 +implement 0 +support 0 +static 0 +like 0 +class 0 +list 0 +avail 0 +page 0 +high 0 +type 0 +system 0 +optim 0 +level 0 +current 0 +profil 0 +intraprocedur 0 +messag 0 +analys 0 +also 0 +elimin 0 +written 0 +releas 0 +suno 0 +subscrib 0 +inform 0 +overview 0 +member 0 +paper 0 +sampl 0 +peopl 0 +intern 0 +document 0 +projectuw 0 +projectwelcom 0 +home 0 +conduct 0 +program 0 +design 0 +emphasi 0 +issu 0 +pure 0 +intend 0 +rapidconstruct 0 +qualiti 0 +extens 0 +softwar 0 +incorporatesmulti 0 +method 0 +simpl 0 +prototyp 0 +base 0 +model 0 +mechan 0 +tosupport 0 +structur 0 +form 0 +comput 0 +inherit 0 +modul 0 +basedencapsul 0 +flexibl 0 +allowsstat 0 +dynam 0 +code 0 +freeli 0 +compil 0 +infrastructur 0 +forobject 0 +target 0 +pureobject 0 +hybrid 0 +orientedlanguag 0 +modula 0 +java 0 +incorporateshigh 0 +analysi 0 +hierachyanalysi 0 +guid 0 +receiv 0 +predict 0 +guidedselect 0 +procedur 0 +special 0 +split 0 +automat 0 +inlin 0 +closur 0 +includ 0 +acollect 0 +standard 0 +commonsubexpress 0 +dead 0 +assign 0 +vortexcompil 0 +entir 0 +initi 0 +beta 0 +currentlyavail 0 +sparc 0 +run 0 +either 0 +solari 0 +send 0 +mail 0 +interest 0 +request 0 +bodi 0 +ofinterest 0 +parti 0 +obtain 0 +thebeta 0 +recent 0 +finish 0 +technic 0 +report 0 +describ 0 +much 0 +detail 0 +goal 0 +direct 0 +postscript 0 +version 0 +past 0 +uwcs 0 +sourc 0 +relat 0 +pointer 0 +projectslast 0 +updat 0 +august 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..d130a758 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,34 @@ +grail 1 +imag 1 +washington 1 +graphic 0 +depart 0 +comput 0 +scienc 0 +engin 0 +laboratorywelcom 0 +home 0 +page 0 +laboratori 0 +theunivers 0 +inform 0 +peopl 0 +cours 0 +research 0 +project 0 +public 0 +these 0 +softwar 0 +data 0 +cool 0 +neighborhood 0 +univers 0 +seattl 0 +local 0 +interest 0 +disk 0 +usag 0 +polici 0 +comment 0 +mtwong 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..02272eaa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,71 @@ +rout 1 +chao 0 +research 0 +simul 0 +chaotic 0 +project 0 +washington 0 +router 0 +algorithm 0 +result 0 +pcrcw 0 +comput 0 +univers 0 +seattl 0 +technic 0 +report 0 +avail 0 +chip 0 +micron 0 +network 0 +present 0 +group 0 +scienc 0 +engin 0 +depart 0 +friend 0 +mine 0 +dylan 0 +peopleal 0 +sort 0 +peopl 0 +work 0 +paper 0 +repositori 0 +papersand 0 +chaoticrout 0 +implement 0 +hardwar 0 +built 0 +test 0 +cmo 0 +redesign 0 +process 0 +better 0 +perform 0 +power 0 +allsort 0 +includ 0 +nice 0 +graphicalfront 0 +standard 0 +discuss 0 +presentationof 0 +abl 0 +come 0 +upwith 0 +guidelin 0 +build 0 +list 0 +thathav 0 +web 0 +describ 0 +interconnect 0 +parallel 0 +commun 0 +workshop 0 +held 0 +univeristi 0 +proceed 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..c7215bad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,201 @@ +chinook 1 +synthesi 0 +design 0 +softwar 0 +hardwar 0 +time 0 +embed 0 +system 0 +architectur 0 +rather 0 +gener 0 +processor 0 +version 0 +includ 0 +scienc 0 +fellowship 0 +project 0 +salmon 0 +warm 0 +wind 0 +rocki 0 +mountain 0 +real 0 +constraint 0 +map 0 +detail 0 +enabl 0 +make 0 +level 0 +work 0 +retarget 0 +activ 0 +simul 0 +partit 0 +june 0 +connect 0 +code 0 +compon 0 +chou 0 +nation 0 +graduat 0 +shinook 0 +larg 0 +oncorhynchu 0 +tshawytscha 0 +pacif 0 +amer 0 +name 0 +tribe 0 +blow 0 +east 0 +southerli 0 +west 0 +rare 0 +american 0 +sled 0 +doga 0 +toolfor 0 +cadtool 0 +control 0 +domin 0 +reactivesystem 0 +behavior 0 +descriptionto 0 +user 0 +target 0 +fill 0 +neededto 0 +build 0 +complet 0 +inform 0 +designdecis 0 +high 0 +earli 0 +cycl 0 +reiterateaft 0 +willnot 0 +maintain 0 +also 0 +designerto 0 +take 0 +advantag 0 +technolog 0 +instead 0 +ti 0 +legacycod 0 +current 0 +develop 0 +currentlyw 0 +interprocessorcommun 0 +effici 0 +accur 0 +moredetail 0 +becom 0 +avail 0 +shortli 0 +character 0 +follow 0 +meet 0 +ratherthan 0 +try 0 +maxim 0 +averag 0 +perform 0 +util 0 +assumesmanu 0 +believ 0 +issu 0 +intricateand 0 +sometim 0 +even 0 +technic 0 +want 0 +evalu 0 +differentarchitectur 0 +forc 0 +singl 0 +fix 0 +asicarchitectur 0 +synthes 0 +reli 0 +onoff 0 +shelf 0 +kernel 0 +discourag 0 +first 0 +tool 0 +oper 0 +innovemb 0 +shownat 0 +autom 0 +confer 0 +diego 0 +mainfeatur 0 +automat 0 +peripheraldevic 0 +sequenti 0 +concurr 0 +descript 0 +andsynthes 0 +devic 0 +driver 0 +input 0 +verilog 0 +output 0 +hardwarenetlist 0 +need 0 +togeth 0 +softwareprogram 0 +main 0 +topic 0 +interfacingproblem 0 +schedul 0 +timingconstraint 0 +function 0 +improv 0 +demonstr 0 +nato 0 +summer 0 +school 0 +swcodedesign 0 +tremezzo 0 +itali 0 +incorpor 0 +severalmor 0 +interfac 0 +techniqu 0 +memori 0 +moreeffici 0 +chinookersfacultygaetano 0 +borriellogradu 0 +student 0 +ross 0 +ortegaken 0 +hinesian 0 +macduff 0 +recent 0 +selizabeth 0 +walkupscott 0 +hauck 0 +henrik 0 +hulgaardstafflarri 0 +mcmurchielist 0 +paperschinook 0 +sponsorsarpa 0 +contract 0 +foundat 0 +grant 0 +walkup 0 +patricia 0 +robert 0 +harri 0 +ortega 0 +link 0 +depart 0 +comput 0 +engin 0 +universityof 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..16148295 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,135 @@ +emerald 1 +fpga 0 +architectur 0 +tool 0 +driven 0 +rout 0 +work 0 +copyright 0 +mcmurchi 0 +develop 0 +map 0 +design 0 +time 0 +need 0 +system 0 +provid 0 +logic 0 +placement 0 +author 0 +darren 0 +cronquist 0 +larri 0 +proceed 0 +intern 0 +symposium 0 +field 0 +programm 0 +gate 0 +februari 0 +router 0 +carl 0 +ebel 0 +projectid 0 +phase 0 +would 0 +makeus 0 +reliabl 0 +produc 0 +accur 0 +performanceevalu 0 +propos 0 +unfortun 0 +given 0 +quickproduct 0 +frame 0 +face 0 +construct 0 +isoften 0 +postpon 0 +mani 0 +featur 0 +beenfrozen 0 +satisfi 0 +fast 0 +prototyp 0 +havedesign 0 +power 0 +quickdevelop 0 +heart 0 +basicfeatur 0 +block 0 +analysi 0 +synthesisand 0 +technolog 0 +global 0 +partit 0 +anddetail 0 +environ 0 +aneffici 0 +thoroughli 0 +specifi 0 +blockarchitectur 0 +well 0 +specif 0 +metric 0 +tailorplac 0 +moreov 0 +parameter 0 +schematicspecif 0 +allow 0 +variat 0 +quickli 0 +capturedand 0 +evalu 0 +public 0 +document 0 +contain 0 +page 0 +includ 0 +byth 0 +contribut 0 +mean 0 +ensur 0 +dissemin 0 +ofscholarli 0 +technic 0 +commerci 0 +basi 0 +andal 0 +right 0 +therein 0 +maintain 0 +copyrighthold 0 +notwithstand 0 +offer 0 +hereelectron 0 +understood 0 +person 0 +copi 0 +thisinform 0 +adher 0 +term 0 +constraint 0 +invok 0 +eachauthor 0 +repost 0 +without 0 +theexplicit 0 +permiss 0 +holder 0 +definit 0 +paper 0 +compil 0 +appear 0 +sigda 0 +fourth 0 +arrai 0 +us 0 +emeraldlarri 0 +pathfind 0 +negoti 0 +basedperform 0 +third 0 +arraysaid 0 +research 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..0e56f008 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,133 @@ +system 1 +circuit 1 +fpga 0 +architectur 0 +asynchron 0 +integr 0 +design 0 +research 0 +embed 0 +project 0 +improv 0 +perform 0 +northwest 0 +laboratori 0 +comput 0 +develop 0 +current 0 +specif 0 +rapid 0 +level 0 +depart 0 +scienc 0 +engin 0 +univers 0 +washington 0 +vlsi 0 +synthesi 0 +well 0 +montag 0 +multi 0 +prototyp 0 +rout 0 +gener 0 +clock 0 +retim 0 +cost 0 +tune 0 +seattl 0 +usath 0 +engag 0 +larg 0 +scale 0 +aid 0 +educ 0 +sinc 0 +late 0 +todai 0 +focu 0 +wide 0 +varieti 0 +sytem 0 +verificationtim 0 +separ 0 +event 0 +verif 0 +time 0 +survei 0 +methodolog 0 +first 0 +prototypingtriptych 0 +triptych 0 +densiti 0 +commerci 0 +springbok 0 +board 0 +partit 0 +assign 0 +topolog 0 +work 0 +emerald 0 +adapt 0 +toolset 0 +complet 0 +map 0 +placement 0 +toolscan 0 +automat 0 +descript 0 +fpgaarchitectur 0 +metric 0 +incorporatedinto 0 +variou 0 +tool 0 +result 0 +systemsth 0 +chinook 0 +hardwar 0 +softwar 0 +simul 0 +applic 0 +optim 0 +synchron 0 +circuitsretim 0 +effici 0 +algorithm 0 +uselevel 0 +sensit 0 +latch 0 +reduc 0 +andincreas 0 +toler 0 +skew 0 +method 0 +synchronouscircuit 0 +latenc 0 +feedback 0 +contraint 0 +network 0 +routerth 0 +chaoticrout 0 +self 0 +systemsself 0 +direct 0 +kehlprevi 0 +gemini 0 +valid 0 +layout 0 +compar 0 +implement 0 +mactest 0 +digit 0 +function 0 +tester 0 +chip 0 +cmo 0 +voltag 0 +arpa 0 +reportsarpa 0 +bluebook 0 +paragraph 0 +overview 0 +accomplish 0 +high 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..117dd067 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,61 @@ +safeti 1 +system 0 +softwar 0 +univers 0 +washington 0 +techniqu 0 +comput 0 +foundat 0 +methodolog 0 +critic 0 +us 0 +engin 0 +analysi 0 +plai 0 +increasingli 0 +import 0 +role 0 +nuclear 0 +reactor 0 +aircraft 0 +defenc 0 +space 0 +chemic 0 +plant 0 +medic 0 +equip 0 +consequ 0 +malfunct 0 +must 0 +pass 0 +rigor 0 +test 0 +review 0 +although 0 +exist 0 +decad 0 +appli 0 +contain 0 +digit 0 +goal 0 +project 0 +develop 0 +theoret 0 +build 0 +built 0 +upon 0 +safewar 0 +nanci 0 +leveson 0 +summar 0 +issu 0 +involv 0 +lai 0 +work 0 +support 0 +prototyp 0 +tool 0 +valid 0 +specif 0 +scienc 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..9d4048f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,169 @@ +softbot 1 +internet 0 +user 0 +softwar 0 +system 0 +plan 0 +goal 0 +interfac 0 +etzioni 0 +inform 0 +washington 0 +found 0 +control 0 +build 0 +agent 0 +world 0 +research 0 +develop 0 +robot 0 +intellig 0 +high 0 +level 0 +enabl 0 +wide 0 +resourc 0 +oren 0 +access 0 +project 0 +base 0 +magazin 0 +decemb 0 +graphic 0 +keith 0 +golden 0 +search 0 +reactiv 0 +kwok 0 +back 0 +home 0 +page 0 +softbotinternet 0 +softbotth 0 +finalist 0 +discoveraward 0 +technolog 0 +innov 0 +comput 0 +autonom 0 +interact 0 +real 0 +softwareenviron 0 +oper 0 +databas 0 +pragmaticallyconveni 0 +intellectu 0 +challeng 0 +substrat 0 +support 0 +claim 0 +util 0 +machin 0 +learningtechniqu 0 +acustomiz 0 +moder 0 +assist 0 +internetaccess 0 +accept 0 +languag 0 +generatesand 0 +execut 0 +achiev 0 +learn 0 +itsexperi 0 +human 0 +state 0 +want 0 +accomplish 0 +disambigu 0 +requestand 0 +dynam 0 +determin 0 +satisfyit 0 +us 0 +unix 0 +shell 0 +interactwith 0 +rang 0 +take 0 +tour 0 +sgraphic 0 +princip 0 +investig 0 +daniel 0 +weld 0 +also 0 +check 0 +metacrawl 0 +field 0 +servic 0 +tosearch 0 +multipl 0 +indic 0 +parallel 0 +provid 0 +sophisticatedprun 0 +option 0 +contact 0 +introduct 0 +cacm 0 +juli 0 +methodolog 0 +motiv 0 +without 0 +repli 0 +brook 0 +technic 0 +paper 0 +cartoonrepresent 0 +taken 0 +blanchard 0 +articl 0 +appear 0 +issu 0 +ofcolumn 0 +univers 0 +alumni 0 +group 0 +current 0 +toth 0 +allow 0 +easili 0 +specifi 0 +extend 0 +maintain 0 +xiiplann 0 +work 0 +specif 0 +implement 0 +advanc 0 +space 0 +browser 0 +debug 0 +planner 0 +dave 0 +christianson 0 +compar 0 +rule 0 +versu 0 +procedur 0 +sujai 0 +parekh 0 +ilalearn 0 +design 0 +protocol 0 +multi 0 +collabor 0 +negoti 0 +ying 0 +experi 0 +domain 0 +goan 0 +optim 0 +ingram 0 +gather 0 +hacker 0 +info 0 +local 0 +mike 0 +perkowitz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..adacc765 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,213 @@ +system 1 +extens 0 +spin 0 +oper 0 +kernel 0 +project 0 +code 0 +dynam 0 +applic 0 +paper 0 +languag 0 +modula 0 +microsecond 0 +take 0 +perform 0 +describ 0 +interfac 0 +us 0 +implement 0 +page 0 +run 0 +servic 0 +gener 0 +network 0 +support 0 +time 0 +report 0 +appear 0 +washington 0 +control 0 +integr 0 +allow 0 +load 0 +basic 0 +call 0 +data 0 +collect 0 +address 0 +thread 0 +alpha 0 +unix 0 +program 0 +result 0 +creat 0 +termin 0 +benchmark 0 +execut 0 +design 0 +shortcom 0 +safe 0 +intern 0 +inform 0 +compil 0 +member 0 +sai 0 +peopl 0 +credit 0 +bershad 0 +systemspin 0 +thatsupport 0 +adapt 0 +andimplement 0 +direct 0 +stillmaintain 0 +inter 0 +isol 0 +atruntim 0 +accesshardwar 0 +almost 0 +nooverhead 0 +procedur 0 +pass 0 +byrefer 0 +rather 0 +copi 0 +providesa 0 +core 0 +capabl 0 +resourc 0 +thesear 0 +systemservic 0 +space 0 +allextens 0 +written 0 +typesaf 0 +properti 0 +oftypesafeti 0 +prevent 0 +crash 0 +attemptingto 0 +manipul 0 +arbitrari 0 +piec 0 +workstat 0 +writeboth 0 +special 0 +video 0 +wella 0 +purpos 0 +connect 0 +machinerun 0 +server 0 +quick 0 +structur 0 +extend 0 +withlow 0 +overhead 0 +exampl 0 +handl 0 +recov 0 +fault 0 +executeit 0 +synchron 0 +fork 0 +join 0 +protectedprocedur 0 +anoth 0 +function 0 +cross 0 +machin 0 +overethernet 0 +realli 0 +oldadapt 0 +less 0 +operationsund 0 +mach 0 +longer 0 +samehardwar 0 +saveyourself 0 +effort 0 +recent 0 +bind 0 +invoc 0 +mechan 0 +provid 0 +flexibl 0 +effici 0 +andsimpl 0 +osdi 0 +safeti 0 +sosp 0 +protocol 0 +architectur 0 +forappl 0 +specif 0 +usenix 0 +winter 0 +confer 0 +write 0 +experi 0 +build 0 +high 0 +make 0 +clear 0 +distinct 0 +pretti 0 +happi 0 +deal 0 +order 0 +link 0 +linker 0 +point 0 +abil 0 +manag 0 +linkabl 0 +namespac 0 +runtim 0 +interposit 0 +commun 0 +facil 0 +show 0 +improv 0 +critic 0 +wait 0 +trail 0 +talk 0 +interest 0 +bottom 0 +line 0 +arpa 0 +overview 0 +summari 0 +regular 0 +friend 0 +gotten 0 +assist 0 +academia 0 +industri 0 +involv 0 +relat 0 +pointer 0 +barb 0 +arrow 0 +document 0 +latest 0 +statu 0 +avail 0 +could 0 +qualif 0 +master 0 +degre 0 +fund 0 +raship 0 +posit 0 +undergradu 0 +mascot 0 +encourag 0 +mani 0 +decid 0 +adopt 0 +ourmascot 0 +maintain 0 +brian 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..713ed503 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,117 @@ +compil 1 +dynam 0 +time 0 +code 0 +constant 0 +optim 0 +valu 0 +us 0 +project 0 +paper 0 +comput 0 +howev 0 +produc 0 +machin 0 +gener 0 +program 0 +system 0 +projectth 0 +projectmor 0 +inform 0 +member 0 +relat 0 +projectsuw 0 +section 0 +student 0 +group 0 +webdynam 0 +enabl 0 +base 0 +ofinvari 0 +data 0 +theserun 0 +elimin 0 +memoryload 0 +perform 0 +propag 0 +fold 0 +remov 0 +branch 0 +theydetermin 0 +fulli 0 +unrol 0 +loop 0 +bound 0 +performancebenefit 0 +effici 0 +offsetbi 0 +cost 0 +approach 0 +dynamiccompil 0 +strive 0 +fast 0 +high 0 +qualitydynam 0 +programm 0 +annot 0 +region 0 +theprogram 0 +static 0 +optimizingcompil 0 +automat 0 +templat 0 +pair 0 +dataflow 0 +analys 0 +identifi 0 +variabl 0 +willb 0 +simpl 0 +copi 0 +thetempl 0 +patch 0 +execut 0 +work 0 +target 0 +purpos 0 +imper 0 +languag 0 +initi 0 +initialexperi 0 +appli 0 +producedspeedup 0 +rang 0 +part 0 +spinproject 0 +eventu 0 +dynamicallycompil 0 +spin 0 +kernel 0 +exampl 0 +spinev 0 +dispatch 0 +also 0 +activ 0 +explor 0 +otherposs 0 +applic 0 +invirtu 0 +interpret 0 +prototyp 0 +systemi 0 +describ 0 +pldi 0 +arenow 0 +start 0 +design 0 +build 0 +second 0 +wewil 0 +releas 0 +detail 0 +soon 0 +last 0 +updat 0 +august 0 +grant 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..2cc2ccd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,93 @@ +program 1 +project 0 +comput 0 +washington 0 +languag 0 +direct 0 +compil 0 +engin 0 +info 0 +arrai 0 +base 0 +written 0 +without 0 +machin 0 +level 0 +concept 0 +border 0 +easili 0 +scientif 0 +walk 0 +overview 0 +paper 0 +depart 0 +scienc 0 +univers 0 +seattl 0 +eduzpl 0 +suitabl 0 +would 0 +previous 0 +fortran 0 +fast 0 +sequenti 0 +parallel 0 +modif 0 +special 0 +independ 0 +recompil 0 +necessari 0 +higher 0 +like 0 +elimin 0 +error 0 +prone 0 +index 0 +tediou 0 +loop 0 +typic 0 +shorter 0 +understood 0 +modifi 0 +scientist 0 +find 0 +region 0 +shatter 0 +control 0 +flow 0 +conclus 0 +ideal 0 +small 0 +write 0 +yourmachin 0 +programm 0 +area 0 +shouldconsid 0 +enrol 0 +zpthi 0 +autumn 0 +check 0 +recent 0 +chang 0 +high 0 +minut 0 +introduct 0 +browser 0 +right 0 +manual 0 +relat 0 +detail 0 +line 0 +inform 0 +sampl 0 +peopl 0 +member 0 +horizon 0 +descript 0 +group 0 +futur 0 +acknowledg 0 +list 0 +help 0 +support 0 +work 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..80fef124 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,149 @@ +multithread 1 +processor 0 +simultan 0 +instruct 0 +parallel 0 +issu 0 +cycl 0 +egger 0 +architectur 0 +level 0 +postscript 0 +levi 0 +tullsen 0 +multipl 0 +superscalar 0 +singl 0 +thread 0 +compil 0 +student 0 +emer 0 +stamm 0 +abstract 0 +proceed 0 +problem 0 +face 0 +high 0 +maintain 0 +util 0 +latenc 0 +modern 0 +techniqu 0 +avail 0 +resourc 0 +enabl 0 +perform 0 +research 0 +jack 0 +public 0 +andd 0 +annual 0 +intern 0 +symposium 0 +comput 0 +page 0 +home 0 +pagesimultan 0 +projectoverviewpeoplepubl 0 +overviewth 0 +crucial 0 +todai 0 +speed 0 +microprocessor 0 +long 0 +memori 0 +allevi 0 +interleav 0 +execut 0 +differentthread 0 +differ 0 +ultim 0 +though 0 +limit 0 +amount 0 +within 0 +permit 0 +independ 0 +function 0 +unit 0 +combin 0 +issuefeatur 0 +wide 0 +hide 0 +abilityof 0 +hardwar 0 +contextsar 0 +activ 0 +compet 0 +dynam 0 +share 0 +exploitthread 0 +interchang 0 +formsof 0 +effect 0 +us 0 +increas 0 +studi 0 +havedemonstr 0 +significantli 0 +improvesprocessor 0 +throughput 0 +multiprogram 0 +parallelworkload 0 +shown 0 +gain 0 +achievedin 0 +minim 0 +extens 0 +ordersuperscalar 0 +current 0 +futur 0 +work 0 +includ 0 +investig 0 +fast 0 +synchronizationtechniqu 0 +also 0 +conduct 0 +otherarchitectur 0 +peoplefaculti 0 +susan 0 +hank 0 +levygradu 0 +dean 0 +tullsenindustri 0 +collabor 0 +digit 0 +equip 0 +corpor 0 +joel 0 +rebecca 0 +convert 0 +submit 0 +juli 0 +exploit 0 +choic 0 +fetch 0 +implement 0 +philadelphia 0 +first 0 +suif 0 +workshop 0 +stanford 0 +januari 0 +maxim 0 +chip 0 +andh 0 +santa 0 +margherita 0 +ligur 0 +itali 0 +june 0 +check 0 +list 0 +project 0 +still 0 +doon 0 +affair 0 +lojlo 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..b9643d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,73 @@ +scienc 1 +comput 0 +depart 0 +award 0 +faculti 0 +three 0 +inform 0 +report 0 +guidebook 0 +statist 0 +wisc 0 +home 0 +pagecomput 0 +departmentabout 0 +departmentour 0 +form 0 +consist 0 +rank 0 +countri 0 +member 0 +receiv 0 +fourteen 0 +presidenti 0 +young 0 +investig 0 +packard 0 +fellowship 0 +women 0 +scientist 0 +engin 0 +incent 0 +excel 0 +doctor 0 +dissert 0 +develop 0 +area 0 +research 0 +project 0 +peopl 0 +cours 0 +offer 0 +fall 0 +class 0 +futur 0 +timet 0 +technic 0 +system 0 +answer 0 +frequent 0 +ask 0 +question 0 +alumni 0 +graduat 0 +undergradu 0 +annual 0 +onlin 0 +util 0 +madison 0 +local 0 +servic 0 +relat 0 +organ 0 +colophon 0 +server 0 +us 0 +infocomput 0 +departmentunivers 0 +wisconsin 0 +madisona 0 +west 0 +dayton 0 +streetmadison 0 +voic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..cddcfa2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,114 @@ +task 1 +control 0 +observ 0 +point 0 +object 0 +explor 0 +behavior 0 +viewpoint 0 +scene 0 +provabl 0 +local 0 +achiev 0 +smoothli 0 +global 0 +surfac 0 +purpos 0 +recoveri 0 +simpl 0 +navig 0 +abil 0 +correct 0 +base 0 +consid 0 +approach 0 +show 0 +reconstruct 0 +kyro 0 +kutulako 0 +chuck 0 +dyer 0 +consider 0 +interest 0 +recent 0 +emploi 0 +simpleobserv 0 +either 0 +make 0 +propertieseasi 0 +fixat 0 +combin 0 +order 0 +toperform 0 +complex 0 +obstacl 0 +avoid 0 +ourwork 0 +focus 0 +activ 0 +pointof 0 +perform 0 +involv 0 +develop 0 +makesimpl 0 +motion 0 +decis 0 +geometryof 0 +requir 0 +minim 0 +process 0 +imag 0 +first 0 +recov 0 +shape 0 +thesurfac 0 +select 0 +generalobserv 0 +posit 0 +provid 0 +inform 0 +objectthan 0 +other 0 +exist 0 +special 0 +beexploit 0 +mobil 0 +effici 0 +anddeterminist 0 +strategi 0 +reach 0 +localshap 0 +us 0 +qualitativestrategi 0 +viewingdirect 0 +align 0 +princip 0 +direct 0 +selectedpoint 0 +second 0 +deriv 0 +descriptionof 0 +formul 0 +thequalit 0 +observationso 0 +visibl 0 +slide 0 +maxim 0 +connect 0 +region 0 +arbitrari 0 +smooth 0 +attempt 0 +maintain 0 +well 0 +defin 0 +geometr 0 +relationship 0 +observationand 0 +view 0 +suggest 0 +lead 0 +also 0 +simplifi 0 +frame 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..0d86a003 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,99 @@ +contour 1 +model 0 +deform 0 +extract 0 +detect 0 +classif 0 +formul 0 +integr 0 +noisi 0 +imag 0 +case 0 +regular 0 +snake 0 +deriv 0 +energi 0 +yield 0 +consid 0 +arbitrari 0 +turn 0 +fung 0 +roland 0 +chin 0 +develop 0 +approach 0 +classifi 0 +directli 0 +conduct 0 +studi 0 +initi 0 +ofact 0 +us 0 +minimax 0 +principl 0 +criterion 0 +wherebi 0 +valu 0 +automat 0 +implicitli 0 +determin 0 +along 0 +furthermor 0 +function 0 +contain 0 +hough 0 +transform 0 +special 0 +subsequ 0 +problem 0 +combin 0 +stabl 0 +invari 0 +anduniqu 0 +markov 0 +random 0 +field 0 +priordistribut 0 +exert 0 +influenc 0 +global 0 +allow 0 +bayesian 0 +framework 0 +posterior 0 +estim 0 +equival 0 +minim 0 +gener 0 +activ 0 +final 0 +lower 0 +level 0 +visual 0 +task 0 +withpattern 0 +recognit 0 +process 0 +base 0 +nearman 0 +pearson 0 +lemma 0 +optim 0 +classificationtest 0 +summat 0 +peak 0 +practic 0 +applic 0 +small 0 +region 0 +need 0 +margin 0 +distribut 0 +valid 0 +confirm 0 +extens 0 +rigor 0 +experiment 0 +gsnake 0 +softwar 0 +avail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..4f27127e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,198 @@ +data 1 +object 0 +displai 0 +lattic 0 +defin 0 +visual 0 +base 0 +function 0 +model 0 +system 0 +program 0 +scalar 0 +languag 0 +time 0 +type 0 +call 0 +mathemat 0 +contain 0 +comput 0 +approxim 0 +finit 0 +particular 0 +expressivenesscondit 0 +temperatur 0 +primit 0 +implement 0 +develop 0 +precis 0 +thu 0 +purpos 0 +map 0 +condit 0 +order 0 +relat 0 +arrai 0 +voxel 0 +us 0 +foundat 0 +process 0 +ofdata 0 +scientist 0 +infinit 0 +number 0 +amount 0 +inform 0 +pixel 0 +color 0 +ideal 0 +appropri 0 +fact 0 +show 0 +satisfi 0 +scientif 0 +specifi 0 +class 0 +variabl 0 +graphic 0 +design 0 +along 0 +control 0 +abstract 0 +bill 0 +hibbard 0 +brian 0 +paul 0 +chuck 0 +dyer 0 +theidea 0 +calleda 0 +prototyp 0 +adha 0 +upon 0 +idea 0 +guid 0 +follow 0 +principl 0 +natur 0 +objectsrepres 0 +objectsfrequ 0 +real 0 +functionswith 0 +domain 0 +wherea 0 +containfinit 0 +themathemat 0 +repres 0 +chosenfrom 0 +palett 0 +anim 0 +sequenc 0 +numbersof 0 +frame 0 +close 0 +relationship 0 +computationalmodel 0 +commun 0 +informationcont 0 +thatdisplai 0 +encod 0 +onlythos 0 +complet 0 +sens 0 +itimpl 0 +satisfyingth 0 +expressivenss 0 +onhow 0 +wecan 0 +howprecis 0 +voxelresolut 0 +therefor 0 +visualizationprocess 0 +objectsto 0 +interpret 0 +context 0 +theexpress 0 +isomorph 0 +scientificdata 0 +built 0 +primitivevari 0 +latitud 0 +radianc 0 +tupl 0 +constructor 0 +appropriatefor 0 +seri 0 +containsth 0 +canalso 0 +displayi 0 +graphicsprimit 0 +locationand 0 +size 0 +volum 0 +place 0 +animationsequ 0 +fromth 0 +thedisplai 0 +ofcours 0 +alreadi 0 +assum 0 +exampl 0 +given 0 +isnatur 0 +graph 0 +axi 0 +andtemperatur 0 +anoth 0 +remark 0 +thing 0 +wedo 0 +take 0 +assumpt 0 +consequ 0 +fundament 0 +calledvi 0 +adthat 0 +allow 0 +experi 0 +algorithm 0 +steer 0 +theircomput 0 +creat 0 +theirprogram 0 +thevi 0 +vvof 0 +thatsatisfi 0 +express 0 +howev 0 +implementationi 0 +quit 0 +flow 0 +auser 0 +interfac 0 +abstractionof 0 +render 0 +pipelin 0 +user 0 +interfacefor 0 +ofmap 0 +possibl 0 +recurs 0 +defineddata 0 +complex 0 +link 0 +tree 0 +ingener 0 +datatyp 0 +orient 0 +provid 0 +rigor 0 +help 0 +analyt 0 +altern 0 +usualapproach 0 +construct 0 +bywrit 0 +special 0 +fora 0 +specif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..2baeb05b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,52 @@ +condor 1 +comput 0 +project 0 +throughput 0 +high 0 +page 0 +homepag 0 +object 0 +goal 0 +develop 0 +implement 0 +deploi 0 +evaluatemechan 0 +polici 0 +support 0 +larg 0 +collect 0 +distribut 0 +own 0 +resourc 0 +guid 0 +technologicaland 0 +sociolog 0 +challeng 0 +environ 0 +team 0 +build 0 +softwar 0 +tool 0 +enabl 0 +scientist 0 +engin 0 +increas 0 +introduct 0 +start 0 +research 0 +system 0 +pool 0 +univers 0 +wisconsin 0 +madison 0 +help 0 +home 0 +world 0 +mail 0 +list 0 +comment 0 +suggestionscondor 0 +admin 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..f55193df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,9 @@ +next 1 +homepag 0 +peopl 0 +project 0 +last 0 +modifi 0 +septemb 0 +miron 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..7515ba1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,170 @@ +coral 1 +includ 0 +binari 0 +releas 0 +also 0 +system 0 +support 0 +declar 0 +provid 0 +inform 0 +databas 0 +program 0 +version 0 +instal 0 +relat 0 +project 0 +develop 0 +group 0 +languag 0 +interfac 0 +combin 0 +imper 0 +queri 0 +contain 0 +modul 0 +wide 0 +rang 0 +evalu 0 +strategi 0 +user 0 +disk 0 +resid 0 +data 0 +octob 0 +grab 0 +file 0 +nobin 0 +solari 0 +linux 0 +mail 0 +announc 0 +wisc 0 +projectcor 0 +projectdocu 0 +content 0 +objectiveoverviewreleas 0 +informationse 0 +public 0 +coralpeopl 0 +work 0 +coraloth 0 +research 0 +madisonobject 0 +object 0 +robust 0 +efficientdeduct 0 +investig 0 +variou 0 +applic 0 +domain 0 +sever 0 +algorithm 0 +underli 0 +coralsystem 0 +member 0 +durationof 0 +sinc 0 +overview 0 +deduct 0 +rich 0 +allow 0 +declaritiveand 0 +supportsgener 0 +horn 0 +claus 0 +augment 0 +complex 0 +term 0 +aggreg 0 +negat 0 +tupl 0 +univers 0 +quantifi 0 +variabl 0 +canb 0 +organ 0 +collect 0 +interact 0 +coralimplement 0 +andautomat 0 +choos 0 +effici 0 +modulein 0 +addit 0 +permit 0 +guid 0 +optim 0 +desir 0 +select 0 +among 0 +control 0 +choic 0 +atth 0 +level 0 +construct 0 +updat 0 +insertand 0 +delet 0 +rule 0 +canprogram 0 +extend 0 +withcor 0 +primit 0 +high 0 +degre 0 +extens 0 +allowingc 0 +programm 0 +class 0 +structur 0 +enhanc 0 +coralimplemen 0 +main 0 +memori 0 +us 0 +theexodusstorag 0 +manang 0 +transact 0 +manag 0 +aclient 0 +server 0 +environ 0 +current 0 +want 0 +sourc 0 +code 0 +requiringy 0 +compil 0 +made 0 +forth 0 +indic 0 +machin 0 +type 0 +click 0 +readm 0 +gener 0 +manual 0 +instruct 0 +hpux 0 +seri 0 +suno 0 +stai 0 +announcemnt 0 +listwhich 0 +reciev 0 +relev 0 +newsgroup 0 +comp 0 +lang 0 +misc 0 +submit 0 +question 0 +comment 0 +report 0 +send 0 +edulast 0 +modifi 0 +shawn 0 +flisakowski 0 +flisakow 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..a12994a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,232 @@ +complementar 1 +problem 1 +gam 0 +applic 0 +algorithm 0 +solver 0 +newton 0 +system 0 +path 0 +point 0 +us 0 +research 0 +smooth 0 +nonsmooth 0 +page 0 +list 0 +link 0 +mile 0 +approxim 0 +equat 0 +paper 0 +econom 0 +sever 0 +inform 0 +interest 0 +also 0 +access 0 +file 0 +toolbox 0 +give 0 +model 0 +ferri 0 +implement 0 +well 0 +document 0 +theori 0 +softwar 0 +given 0 +directori 0 +relev 0 +mcplib 0 +matlab 0 +avail 0 +consist 0 +functionevalu 0 +detail 0 +help 0 +languag 0 +wisc 0 +method 0 +solut 0 +comput 0 +direct 0 +function 0 +emploi 0 +cannot 0 +linear 0 +subproblem 0 +appli 0 +sequenc 0 +iter 0 +step 0 +construct 0 +search 0 +strategi 0 +base 0 +uponreformul 0 +nation 0 +look 0 +michael 0 +interior 0 +neta 0 +result 0 +three 0 +decad 0 +subject 0 +divers 0 +engin 0 +scienc 0 +becom 0 +establish 0 +fruitfuldisciplin 0 +within 0 +mathemat 0 +program 0 +monograph 0 +survei 0 +basic 0 +role 0 +optim 0 +serv 0 +center 0 +regard 0 +incomplementar 0 +meetingsof 0 +commun 0 +pointer 0 +forcomplementar 0 +area 0 +tabl 0 +content 0 +researcherssoftwar 0 +collect 0 +nonlinear 0 +mix 0 +problemdescript 0 +sourc 0 +forthes 0 +evolv 0 +freeli 0 +andm 0 +allow 0 +frommatlab 0 +without 0 +spars 0 +jacobian 0 +evalu 0 +machin 0 +specificvers 0 +download 0 +interfac 0 +describ 0 +librari 0 +routin 0 +areavail 0 +hook 0 +contact 0 +steve 0 +rutherford 0 +colorado 0 +edufor 0 +extens 0 +classicaljosephi 0 +linearizedsubproblem 0 +lemk 0 +almost 0 +complementari 0 +pivot 0 +defineth 0 +dampedlinesearch 0 +merit 0 +measur 0 +violat 0 +infeas 0 +restartprocedur 0 +case 0 +totermin 0 +secondari 0 +everi 0 +rescal 0 +equilibr 0 +elementsappear 0 +data 0 +run 0 +mcpor 0 +directli 0 +techniqu 0 +similarto 0 +anonsmooth 0 +reformul 0 +algorithmconsist 0 +major 0 +anapproxim 0 +similar 0 +pathto 0 +aposs 0 +exist 0 +thepath 0 +entir 0 +along 0 +partiallycomput 0 +taken 0 +relinear 0 +anonmonoton 0 +watchdog 0 +avoid 0 +converg 0 +local 0 +minima 0 +norm 0 +forth 0 +underli 0 +keep 0 +number 0 +requir 0 +small 0 +possibl 0 +option 0 +robustnessimprov 0 +proxim 0 +perturb 0 +qpcomp 0 +ishandl 0 +deriv 0 +thenapproxim 0 +solv 0 +leadto 0 +zero 0 +origin 0 +form 0 +theaccuraci 0 +determin 0 +residu 0 +thecurr 0 +subsystem 0 +compar 0 +engineeringand 0 +mani 0 +known 0 +mpsge 0 +preprocessor 0 +thatallow 0 +equilibrium 0 +formul 0 +easili 0 +thegam 0 +home 0 +nemsth 0 +energi 0 +relat 0 +algorithmsand 0 +overview 0 +project 0 +trick 0 +oper 0 +pointmethod 0 +argonn 0 +laboratori 0 +archiv 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..cf217a0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,56 @@ +exodu 1 +wisc 0 +project 0 +storag 0 +user 0 +manag 0 +benchmark 0 +zwill 0 +home 0 +pageexodu 0 +extens 0 +object 0 +orient 0 +databas 0 +system 0 +toolkitnot 0 +document 0 +construct 0 +succed 0 +theshor 0 +still 0 +provid 0 +minim 0 +support 0 +theexodu 0 +compil 0 +persistentprogram 0 +languag 0 +avail 0 +licens 0 +requir 0 +inform 0 +need 0 +contact 0 +eduprincip 0 +investig 0 +mike 0 +carei 0 +david 0 +dewittse 0 +also 0 +public 0 +relat 0 +exodusshor 0 +successor 0 +exoduslatest 0 +compilercontribut 0 +softwar 0 +managera 0 +mail 0 +list 0 +exodus_al 0 +oodbsdat 0 +prepar 0 +april 0 +michael 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..ea3abaac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,196 @@ +paradis 1 +queri 0 +data 0 +databas 0 +object 0 +client 0 +server 0 +us 0 +inform 0 +support 0 +displai 0 +provid 0 +spatial 0 +attribut 0 +parallel 0 +system 0 +sampl 0 +also 0 +brows 0 +type 0 +graphic 0 +interfac 0 +polygon 0 +front 0 +layer 0 +creat 0 +applic 0 +contact 0 +relat 0 +shore 0 +project 0 +manag 0 +madison 0 +depart 0 +design 0 +implement 0 +geograph 0 +store 0 +manipul 0 +set 0 +subset 0 +issu 0 +model 0 +either 0 +result 0 +method 0 +area 0 +drop 0 +ship 0 +execut 0 +wisc 0 +projectparadis 0 +document 0 +content 0 +frontend 0 +informationse 0 +public 0 +peopl 0 +work 0 +extens 0 +optim 0 +report 0 +examin 0 +sequoia 0 +benchmark 0 +script 0 +vldb 0 +paper 0 +research 0 +group 0 +serverobject 0 +andevalu 0 +scalabl 0 +iscap 0 +massiv 0 +applyingobject 0 +orient 0 +technolog 0 +problem 0 +ofstor 0 +hope 0 +tosignificantli 0 +advanc 0 +size 0 +complex 0 +thatcan 0 +successfulli 0 +databasesystem 0 +aim 0 +handl 0 +providesa 0 +user 0 +andsupport 0 +paradiseprovid 0 +extend 0 +gisappl 0 +addit 0 +base 0 +asinteg 0 +real 0 +string 0 +built 0 +raster 0 +polylin 0 +point 0 +circl 0 +video 0 +mpeg 0 +imag 0 +underli 0 +persist 0 +allow 0 +spatialattribut 0 +foroverlap 0 +correspond 0 +custom 0 +order 0 +selectingcolor 0 +label 0 +withad 0 +issueimplicit 0 +zoom 0 +click 0 +sketch 0 +arubb 0 +band 0 +querycompos 0 +menu 0 +compos 0 +access 0 +databaseschema 0 +assist 0 +composit 0 +beview 0 +bedisplai 0 +tabl 0 +tupl 0 +context 0 +sensit 0 +help 0 +sqlwe 0 +ad 0 +abil 0 +invok 0 +defin 0 +extendedset 0 +exampl 0 +calcul 0 +byus 0 +standarddatabas 0 +oper 0 +includ 0 +anddrop 0 +extent 0 +indic 0 +insert 0 +updat 0 +current 0 +version 0 +emploi 0 +architectur 0 +syntax 0 +paradiseserv 0 +theresult 0 +back 0 +ismulti 0 +thread 0 +multipl 0 +connect 0 +sameserv 0 +sever 0 +carefulattent 0 +paid 0 +insur 0 +could 0 +effici 0 +processqueri 0 +especi 0 +involv 0 +largevolum 0 +frontendeurop 0 +pressher 0 +projectattn 0 +prof 0 +david 0 +dewittunivers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +edumor 0 +come 0 +biswadeep 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..1db260da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,81 @@ +scout 1 +internet 0 +servic 0 +intern 0 +report 0 +student 0 +project 0 +best 0 +resourc 0 +wisconsin 0 +madison 0 +commun 0 +us 0 +provid 0 +suggest 0 +inform 0 +homepagego 0 +text 0 +versionnewslett 0 +newand 0 +newli 0 +discov 0 +network 0 +toolsinternet 0 +announc 0 +updat 0 +daili 0 +effectiveinternet 0 +tool 0 +availablea 0 +studentssurf 0 +smarter 0 +longer 0 +universityof 0 +show 0 +canchoos 0 +filter 0 +hundr 0 +annoucementseach 0 +week 0 +look 0 +valuabl 0 +onlin 0 +networktool 0 +organ 0 +summar 0 +annot 0 +vefound 0 +offer 0 +sever 0 +format 0 +goal 0 +support 0 +effect 0 +byeduc 0 +research 0 +howev 0 +everyon 0 +welcom 0 +useth 0 +public 0 +site 0 +encouragefeedback 0 +entir 0 +three 0 +primari 0 +includ 0 +happen 0 +thescout 0 +toolkit 0 +ournewest 0 +know 0 +locat 0 +depart 0 +comput 0 +scienc 0 +theunivers 0 +comment 0 +feedbackscout 0 +servicesfor 0 +educ 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..a1d239f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,481 @@ +shore 1 +object 0 +system 0 +list 0 +data 0 +file 0 +applic 0 +unix 0 +persist 0 +type 0 +mail 0 +releas 0 +languag 0 +subscrib 0 +project 0 +wisc 0 +shore_al 0 +support 0 +access 0 +scalabl 0 +hardwar 0 +softwar 0 +provid 0 +server 0 +version 0 +inform 0 +beta 0 +design 0 +name 0 +odmg 0 +processor 0 +messag 0 +content 0 +research 0 +program 0 +space 0 +compat 0 +exist 0 +model 0 +first 0 +us 0 +defin 0 +byte 0 +listproc 0 +document 0 +relat 0 +includ 0 +geograph 0 +basic 0 +manag 0 +develop 0 +like 0 +interfac 0 +text 0 +environ 0 +store 0 +singl 0 +orient 0 +featur 0 +major 0 +goal 0 +commerci 0 +oodbm 0 +technolog 0 +uniqu 0 +process 0 +larg 0 +structur 0 +make 0 +term 0 +link 0 +chang 0 +shore_support 0 +user 0 +interest 0 +digest 0 +also 0 +sourc 0 +binari 0 +public 0 +benchmark 0 +madison 0 +databas 0 +depart 0 +need 0 +informationsystem 0 +multipl 0 +programminglanguag 0 +field 0 +eas 0 +transit 0 +abl 0 +either 0 +describ 0 +much 0 +base 0 +effort 0 +concentr 0 +heterogen 0 +focu 0 +architectur 0 +peer 0 +ashor 0 +client 0 +vendor 0 +second 0 +build 0 +exampl 0 +futur 0 +digit 0 +avail 0 +retriev 0 +contain 0 +mechan 0 +differ 0 +bulk 0 +set 0 +close 0 +standard 0 +oodb 0 +inter 0 +share 0 +flatten 0 +time 0 +directli 0 +framework 0 +regist 0 +anonym 0 +legaci 0 +read 0 +string 0 +attribut 0 +question 0 +mani 0 +clutter 0 +weekli 0 +request 0 +help 0 +default 0 +repli 0 +rather 0 +entir 0 +subscript 0 +home 0 +pageshor 0 +high 0 +perform 0 +repositorydocu 0 +objectiveoverviewreleas 0 +informationmail 0 +listsse 0 +line 0 +shorepeopl 0 +work 0 +shorelatest 0 +summari 0 +arpaparadis 0 +built 0 +shoreexodu 0 +predecessor 0 +shoreoo 0 +oodbsshor 0 +photo 0 +albumuw 0 +group 0 +serverobject 0 +implement 0 +andevalu 0 +serv 0 +widevarieti 0 +target 0 +cadsystem 0 +satellit 0 +repositori 0 +multi 0 +media 0 +expand 0 +capabl 0 +wide 0 +usedexodusstorag 0 +wisconsin 0 +fund 0 +arpa 0 +number 0 +ofwai 0 +hierarch 0 +anda 0 +thisinterfac 0 +intend 0 +theunix 0 +tool 0 +viand 0 +withoutmodif 0 +becom 0 +shoreobject 0 +complex 0 +overview 0 +someth 0 +hybrid 0 +natur 0 +inheritingcharacterist 0 +fromfil 0 +section 0 +briefli 0 +ofshor 0 +paper 0 +greater 0 +detail 0 +three 0 +scalabilitysupport 0 +heterogeneitysupport 0 +applicationswhen 0 +began 0 +year 0 +uniqueamong 0 +commun 0 +degre 0 +languageheterogen 0 +turn 0 +facilit 0 +remain 0 +distinguish 0 +supportfor 0 +depend 0 +persistentstorag 0 +furthermor 0 +sinc 0 +basicallycompat 0 +expect 0 +eventu 0 +betransf 0 +sector 0 +architectureshor 0 +sever 0 +wai 0 +symmetr 0 +distributedarchitectur 0 +everi 0 +particip 0 +run 0 +whether 0 +disksattach 0 +network 0 +workstat 0 +parallel 0 +intel 0 +paragon 0 +contrast 0 +architectureus 0 +exodu 0 +serverarchitectur 0 +fine 0 +typicallyus 0 +notionof 0 +valu 0 +ad 0 +runsin 0 +extens 0 +mind 0 +rel 0 +simpl 0 +forus 0 +specif 0 +theparadis 0 +alreadi 0 +nasa 0 +seosdi 0 +feel 0 +piec 0 +plai 0 +aimport 0 +role 0 +varieti 0 +endeavor 0 +librari 0 +almost 0 +certainlydepend 0 +go 0 +manipul 0 +transmitobject 0 +video 0 +pictur 0 +well 0 +whilecurr 0 +product 0 +could 0 +orientedtoward 0 +deal 0 +gigabyt 0 +terabyt 0 +customiz 0 +equal 0 +import 0 +index 0 +queri 0 +libraryar 0 +requir 0 +heterogeneityobject 0 +neutraltyp 0 +embodi 0 +enhanc 0 +databasefeatur 0 +provis 0 +simplifi 0 +task 0 +ofsupport 0 +feasibleto 0 +mention 0 +earlier 0 +quit 0 +neutral 0 +definit 0 +wasrec 0 +propos 0 +consortium 0 +emphasi 0 +howev 0 +onprovid 0 +withina 0 +applicationsa 0 +enabl 0 +currentlyus 0 +untyp 0 +stop 0 +structuredobject 0 +conveni 0 +safe 0 +intra 0 +ultim 0 +hope 0 +displac 0 +orientedfil 0 +servic 0 +standpoint 0 +world 0 +manypersist 0 +flexibl 0 +tree 0 +reachabl 0 +indirectli 0 +give 0 +usersa 0 +familiar 0 +individualpersist 0 +root 0 +oflarg 0 +unnam 0 +realiz 0 +involvessever 0 +kind 0 +includingdirectori 0 +pool 0 +symbol 0 +cross 0 +refer 0 +unixappl 0 +compil 0 +editor 0 +fromtradit 0 +stream 0 +standardunix 0 +open 0 +write 0 +mkdir 0 +chdir 0 +order 0 +callsposs 0 +option 0 +onevari 0 +length 0 +charact 0 +asb 0 +attempt 0 +objectthrough 0 +counterpart 0 +callswil 0 +portion 0 +thatwish 0 +without 0 +possibl 0 +mount 0 +datacontain 0 +feasibl 0 +bothnew 0 +componentof 0 +morestructur 0 +latest 0 +tabl 0 +date 0 +approxim 0 +subject 0 +contact 0 +rleas 0 +sept 0 +improv 0 +completeimplement 0 +fix 0 +port 0 +tosolari 0 +linux 0 +august 0 +gzip 0 +sparc 0 +andpentium 0 +solari 0 +found 0 +atftp 0 +liststher 0 +eduand 0 +eduthi 0 +reach 0 +team 0 +usebi 0 +submit 0 +comment 0 +report 0 +cannot 0 +madisonc 0 +current 0 +unmoder 0 +unlikelyev 0 +get 0 +junk 0 +moder 0 +mailbox 0 +isalreadi 0 +sign 0 +belowfor 0 +sentwhen 0 +purpos 0 +notifi 0 +parti 0 +archiv 0 +sent 0 +sender 0 +beingpost 0 +want 0 +yourrepli 0 +copi 0 +thu 0 +anyon 0 +maysubscrib 0 +post 0 +existenceof 0 +shown 0 +return 0 +whenit 0 +yoursubscript 0 +conceal 0 +subscriberscannot 0 +obtain 0 +membership 0 +must 0 +specialmessag 0 +look 0 +receiv 0 +individu 0 +sendthi 0 +along 0 +send 0 +separ 0 +unsubscrib 0 +messageshould 0 +helplast 0 +modifi 0 +nanci 0 +hall 0 +nhall 0 +footnot 0 +odlshor 0 +concurr 0 +decid 0 +modelidl 0 +start 0 +point 0 +henc 0 +odlar 0 +similar 0 +anoth 0 +stabilizesw 0 +convert 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..98decb61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,31 @@ +madison 1 +abhinav 0 +page 0 +gupta 0 +wisc 0 +depart 0 +comput 0 +scienc 0 +home 0 +welcom 0 +agupta 0 +construct 0 +graduat 0 +student 0 +univers 0 +wisconsin 0 +contact 0 +residenceoffic 0 +kendal 0 +avenu 0 +dayton 0 +street 0 +interest 0 +link 0 +indian 0 +newspap 0 +stuff 0 +sport 0 +finger 0 +find 0 +whereabout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..5cb120c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,13 @@ +alain 1 +home 0 +pagealain 0 +click 0 +larger 0 +pictur 0 +largest 0 +carnivor 0 +ever 0 +live 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..d356704f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,49 @@ +wisconsin 1 +univers 0 +biologi 0 +comput 0 +intellig 0 +allex 0 +scienc 0 +system 0 +molecular 0 +ismb 0 +carolyn 0 +machin 0 +learn 0 +group 0 +home 0 +page 0 +graduat 0 +studentbiotechnolog 0 +train 0 +program 0 +traineecomput 0 +departmentunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +mail 0 +wisc 0 +edutelephon 0 +advisor 0 +professor 0 +jude 0 +shavlikinterest 0 +sequenc 0 +protein 0 +fold 0 +artifici 0 +neural 0 +networkseduc 0 +madisonb 0 +purdu 0 +universityb 0 +educ 0 +mankato 0 +state 0 +relat 0 +link 0 +depart 0 +research 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..c503fcdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,182 @@ +amir 1 +page 1 +program 0 +friend 0 +home 0 +interest 0 +super 0 +roth 0 +madison 0 +wisc 0 +group 0 +seminar 0 +arch 0 +compil 0 +us 0 +out 0 +comput 0 +scienc 0 +univers 0 +multiscalar 0 +physic 0 +yale 0 +degre 0 +beauti 0 +marci 0 +go 0 +like 0 +think 0 +analysi 0 +barb 0 +delphi 0 +maven 0 +show 0 +erin 0 +occasionali 0 +updat 0 +copi 0 +resum 0 +cvte 0 +truth 0 +week 0 +research 0 +topic 0 +implement 0 +preprocessor 0 +deleg 0 +work 0 +project 0 +partner 0 +set 0 +airport 0 +metal 0 +detector 0 +existencei 0 +graduat 0 +student 0 +depart 0 +wisconsin 0 +advisor 0 +guri 0 +sohi 0 +look 0 +method 0 +allevi 0 +data 0 +depend 0 +distribut 0 +regist 0 +file 0 +side 0 +curli 0 +fri 0 +advanc 0 +nail 0 +design 0 +vallei 0 +school 0 +much 0 +practic 0 +live 0 +girlfriend 0 +cat 0 +charli 0 +also 0 +went 0 +get 0 +master 0 +public 0 +polici 0 +lafollett 0 +institut 0 +presid 0 +meantim 0 +solv 0 +linear 0 +regress 0 +problem 0 +wacki 0 +recip 0 +find 0 +magazin 0 +watch 0 +parti 0 +five 0 +eggplant 0 +peopl 0 +weird 0 +anywai 0 +promis 0 +subba 0 +officem 0 +daddi 0 +novemb 0 +titanium 0 +screw 0 +desi 0 +relaford 0 +terri 0 +mulholland 0 +oxygen 0 +carbon 0 +dioxid 0 +area 0 +vagu 0 +languag 0 +whack 0 +optim 0 +parallel 0 +algorithm 0 +theori 0 +good 0 +soul 0 +evalu 0 +model 0 +perform 0 +enhanc 0 +three 0 +point 0 +shot 0 +thing 0 +scaryarea 0 +rabid 0 +interestth 0 +love 0 +know 0 +talk 0 +better 0 +leav 0 +never 0 +return 0 +hmmm 0 +handyinformatik 0 +index 0 +journal 0 +author 0 +madcat 0 +architectur 0 +resourc 0 +minut 0 +score 0 +sportslin 0 +philli 0 +everybodi 0 +favorit 0 +engin 0 +ickyth 0 +new 0 +write 0 +articl 0 +gui 0 +read 0 +want 0 +kid 0 +drew 0 +cornel 0 +david 0 +wierd 0 +featur 0 +associ 0 +kemin 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..237c8614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..c00ef9bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,131 @@ +amo 1 +page 1 +approxim 1 +theori 1 +home 0 +present 0 +download 0 +analysi 0 +version 0 +click 0 +site 0 +file 0 +group 0 +activ 0 +homepag 0 +wisconsin 0 +madison 0 +wisc 0 +us 0 +clickher 0 +compress 0 +vita 0 +list 0 +variou 0 +public 0 +includ 0 +inform 0 +found 0 +wish 0 +view 0 +line 0 +research 0 +word 0 +search 0 +engin 0 +spline 0 +wavelet 0 +boxsplin 0 +radial 0 +basi 0 +function 0 +shift 0 +invari 0 +space 0 +toscatt 0 +data 0 +multiquadr 0 +thin 0 +plate 0 +splinesthi 0 +netscap 0 +enhanc 0 +associ 0 +professordepart 0 +comput 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +telephon 0 +tabl 0 +linksat 0 +item 0 +access 0 +order 0 +paperaffin 0 +system 0 +operatorof 0 +zuowei 0 +shen 0 +choos 0 +follow 0 +unix 0 +otherwis 0 +uncompress 0 +fromher 0 +none 0 +work 0 +server 0 +copi 0 +directlyfrom 0 +accounther 0 +handout 0 +email 0 +want 0 +abstract 0 +select 0 +articlesof 0 +mine 0 +anonym 0 +carl 0 +boor 0 +maintain 0 +containspostscript 0 +postscript 0 +articl 0 +theapproxim 0 +also 0 +technic 0 +filesconcern 0 +gener 0 +recommend 0 +read 0 +provid 0 +avail 0 +student 0 +andpubl 0 +main 0 +area 0 +interest 0 +togeth 0 +short 0 +summari 0 +futur 0 +goal 0 +univeristi 0 +ofwisconsin 0 +numer 0 +link 0 +peopl 0 +commun 0 +miscellan 0 +topic 0 +final 0 +offici 0 +pleas 0 +deposit 0 +comment 0 +mailbox 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..e587fddd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi 1 +therber 1 +home 1 +pageandi 1 +therberoffic 1 +sphone 1 +email 1 +andyt 1 +wisc 1 +eduzooresumebookmarksapplet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..8980c983 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,18 @@ +arvind 1 +ranganathan 0 +workplac 0 +ranga 0 +erstwhil 0 +present 0 +indiaworld 0 +fascin 0 +world 0 +escher 0 +collect 0 +classic 0 +paper 0 +comput 0 +scienc 0 +finger 0 +log 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..244edda9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,35 @@ +ashish 1 +depart 1 +home 0 +page 0 +thusoo 0 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madison 0 +come 0 +india 0 +hadmi 0 +undergradu 0 +educ 0 +indianinstitut 0 +technolog 0 +delhi 0 +iitd 0 +fantast 0 +place 0 +worth 0 +visit 0 +like 0 +contact 0 +canfing 0 +find 0 +whereabout 0 +altern 0 +send 0 +email 0 +ashisht 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..e2342ad8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,34 @@ +alexandria 1 +ashraf 0 +scienc 0 +univers 0 +aboulnaga 0 +home 0 +madison 0 +comput 0 +egypt 0 +section 0 +view 0 +grade 0 +pageashraf 0 +aboulnagacomput 0 +depart 0 +room 0 +wisconsin 0 +west 0 +dayton 0 +usaphon 0 +mail 0 +wisc 0 +edueduc 0 +juli 0 +june 0 +info 0 +offic 0 +hour 0 +desautel 0 +page 0 +last 0 +modifi 0 +septemb 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..e82e2681 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,24 @@ +atkinson 1 +phil 0 +home 0 +infooffic 0 +page 0 +pageucla 0 +bannon 0 +win 0 +ncaa 0 +basketbal 0 +championship 0 +seattl 0 +gener 0 +phone 0 +email 0 +wisc 0 +educurr 0 +researchsailinghors 0 +back 0 +ridingscuba 0 +divingc 0 +hour 0 +tuth 0 +appoint 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..48f73e5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,118 @@ +number 1 +comput 0 +bach 0 +interest 0 +theori 0 +algorithm 0 +problem 0 +page 0 +eric 0 +scienc 0 +univers 0 +email 0 +wisc 0 +theoret 0 +complex 0 +us 0 +effici 0 +exampl 0 +prime 0 +larg 0 +test 0 +wit 0 +question 0 +recent 0 +model 0 +proc 0 +home 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +california 0 +berkelei 0 +algebraicalgorithm 0 +cryptographi 0 +string 0 +automata 0 +research 0 +summari 0 +solvealgebra 0 +onetel 0 +digit 0 +without 0 +examin 0 +possiblefactor 0 +intrins 0 +mathemat 0 +well 0 +applic 0 +random 0 +gener 0 +code 0 +forreli 0 +secur 0 +inform 0 +transmiss 0 +algebra 0 +area 0 +also 0 +appli 0 +probabl 0 +designand 0 +analysi 0 +iscomposit 0 +prove 0 +simpl 0 +auxiliarynumb 0 +call 0 +practic 0 +usual 0 +find 0 +witnessbi 0 +direct 0 +search 0 +among 0 +small 0 +lead 0 +followingnatur 0 +least 0 +functionof 0 +work 0 +given 0 +accurateheurist 0 +base 0 +probabilist 0 +assumpt 0 +allowsthi 0 +similar 0 +answer 0 +public 0 +improv 0 +approxim 0 +euler 0 +product 0 +cnta 0 +canadian 0 +math 0 +proceed 0 +complet 0 +condon 0 +glaser 0 +tanguai 0 +annual 0 +conf 0 +volum 0 +shallit 0 +press 0 +info 0 +click 0 +curriculum 0 +vita 0 +creat 0 +juli 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..32e693aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,58 @@ +system 1 +bart 0 +miller 0 +home 0 +page 0 +wisc 0 +project 0 +parallel 0 +tool 0 +oper 0 +distribut 0 +barton 0 +professorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usath 0 +follow 0 +list 0 +thing 0 +research 0 +paradyn 0 +perform 0 +fuzz 0 +random 0 +softwar 0 +testingteach 0 +introduct 0 +spring 0 +honor 0 +internet 0 +seminar 0 +advanc 0 +fall 0 +director 0 +undergradu 0 +graduatesprofession 0 +symposium 0 +monona 0 +terrac 0 +frank 0 +lloyd 0 +wright 0 +convent 0 +center 0 +technic 0 +advisori 0 +groupperson 0 +offici 0 +depart 0 +famili 0 +photosbart 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..41f2e056 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,44 @@ +madison 1 +offic 1 +hour 1 +hyper 0 +home 0 +page 0 +benjamin 0 +teitelbaum 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +usaben 0 +wisc 0 +edursumquinc 0 +internet 0 +ultim 0 +word 0 +gamezillion 0 +bookmarksspr 0 +schedul 0 +browser 0 +support 0 +tabl 0 +look 0 +like 0 +garbag 0 +click 0 +someth 0 +readabl 0 +mondai 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +fridai 0 +dbseminar 0 +osseminar 0 +condormeet 0 +miron 0 +plseminar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..a0200b3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,206 @@ +comput 1 +program 0 +project 0 +problem 0 +bestor 0 +scienc 0 +home 0 +techniqu 0 +section 0 +fortran 0 +univers 0 +madison 0 +structur 0 +solv 0 +model 0 +student 0 +languag 0 +wisc 0 +research 0 +invers 0 +vision 0 +scene 0 +posit 0 +imag 0 +us 0 +projector 0 +robot 0 +cours 0 +engin 0 +page 0 +gareth 0 +wisconsin 0 +world 0 +zealand 0 +postscript 0 +observ 0 +base 0 +algorithm 0 +point 0 +explor 0 +group 0 +intend 0 +dissert 0 +teach 0 +depart 0 +telephon 0 +mail 0 +wide 0 +massei 0 +motion 0 +examin 0 +rigid 0 +concurr 0 +assumpt 0 +transform 0 +dimens 0 +requir 0 +navig 0 +environ 0 +interest 0 +machin 0 +cover 0 +basic 0 +prepar 0 +elementari 0 +experi 0 +instruct 0 +high 0 +school 0 +taught 0 +entir 0 +primarili 0 +major 0 +advanc 0 +mathemat 0 +pagewelcom 0 +pagegareth 0 +assist 0 +west 0 +dayton 0 +street 0 +click 0 +finger 0 +http 0 +system 0 +administr 0 +data 0 +librari 0 +servic 0 +observatori 0 +drive 0 +dpl 0 +dacc 0 +edueduc 0 +honor 0 +curriculum 0 +vita 0 +resum 0 +graduat 0 +coursework 0 +titl 0 +abstract 0 +import 0 +recov 0 +within 0 +essenti 0 +exist 0 +multipl 0 +howev 0 +extens 0 +practic 0 +sensit 0 +nois 0 +accur 0 +optic 0 +restrict 0 +call 0 +make 0 +instead 0 +camera 0 +tradition 0 +result 0 +defin 0 +geometr 0 +perspect 0 +given 0 +identifi 0 +constrain 0 +specifi 0 +minimum 0 +number 0 +also 0 +addit 0 +minim 0 +type 0 +error 0 +occur 0 +real 0 +applic 0 +allow 0 +approxim 0 +intersect 0 +current 0 +appli 0 +determin 0 +unknown 0 +time 0 +advisor 0 +prof 0 +charl 0 +dyer 0 +graphic 0 +virtual 0 +realiti 0 +artifici 0 +intellig 0 +learn 0 +duti 0 +spring 0 +introduct 0 +credit 0 +need 0 +prior 0 +knowledg 0 +assum 0 +materi 0 +enabl 0 +write 0 +simpl 0 +done 0 +receiv 0 +littl 0 +algebra 0 +construct 0 +least 0 +procedur 0 +orient 0 +pascal 0 +survei 0 +prereq 0 +colleg 0 +work 0 +statist 0 +logic 0 +consent 0 +instructor 0 +open 0 +pointer 0 +wiscinfo 0 +inform 0 +hoofer 0 +out 0 +club 0 +nextstep 0 +next 0 +softwar 0 +start 0 +internet 0 +lyco 0 +search 0 +keyword 0 +copyright 0 +copi 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..eabe5f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,33 @@ +beyer 1 +wisc 1 +graduat 1 +project 1 +cours 1 +kevin 0 +home 0 +pagekevin 0 +beyerbey 0 +caution 0 +work 0 +student 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +advisor 0 +raghu 0 +ramakrishnan 0 +area 0 +interest 0 +databas 0 +researchresearch 0 +coral 0 +local 0 +inform 0 +undergradu 0 +coursesinstruct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..0b86fe7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,62 @@ +bezenek 1 +home 0 +window 0 +pith 0 +phone 0 +wisc 0 +todd 0 +page 0 +toddm 0 +back 0 +introduc 0 +actual 0 +faster 0 +cpu 0 +_great 0 +microprocessor 0 +past 0 +present_ 0 +uregina 0 +bayko 0 +html 0 +express 0 +locomot 0 +squeez 0 +skateboard 0 +size 0 +packag 0 +helen 0 +custer 0 +_insid 0 +microsoft 0 +press 0 +current 0 +cours 0 +advanc 0 +oper 0 +system 0 +bart 0 +miller 0 +pithi 0 +consist 0 +abound 0 +take 0 +yeah 0 +point 0 +skew 0 +associ 0 +cach 0 +access 0 +inform 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +madison 0 +offic 0 +mail 0 +edubezenek 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..d1492fb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,54 @@ +bockrath 1 +section 1 +nathan 0 +graduat 0 +student 0 +wisc 0 +viru 0 +info 0 +home 0 +teach 0 +assist 0 +averag 0 +pictur 0 +nate 0 +jpeg 0 +send 0 +email 0 +click 0 +held 0 +grade 0 +quiz 0 +review 0 +word 0 +macro 0 +make 0 +page 0 +offic 0 +hour 0 +anywai 0 +mondai 0 +wednesdai 0 +schedul 0 +distribut 0 +system 0 +simul 0 +model 0 +support 0 +free 0 +speech 0 +onlin 0 +site 0 +anoth 0 +dai 0 +back 0 +pageback 0 +depart 0 +pageoth 0 +neat 0 +stuff 0 +condor 0 +project 0 +internet 0 +oraclesend 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..f1c372fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,288 @@ +work 1 +system 0 +thing 0 +scienc 0 +time 0 +bolo 0 +year 0 +comput 0 +new 0 +burger 0 +develop 0 +softwar 0 +engin 0 +scientist 0 +hand 0 +home 0 +uwvax 0 +member 0 +associ 0 +although 0 +parent 0 +josef 0 +wai 0 +call 0 +mani 0 +person 0 +hacker 0 +design 0 +dewitt 0 +implement 0 +beer 0 +sleep 0 +wonder 0 +page 0 +tiger 0 +place 0 +bore 0 +like 0 +parallel 0 +everi 0 +els 0 +follow 0 +world 0 +databas 0 +object 0 +store 0 +shore 0 +wisconsin 0 +madison 0 +also 0 +provid 0 +rather 0 +internet 0 +usenet 0 +uucp 0 +along 0 +line 0 +depart 0 +part 0 +much 0 +try 0 +take 0 +care 0 +aircraft 0 +essen 0 +hau 0 +drink 0 +bolobologreet 0 +christen 0 +thoma 0 +roll 0 +mebolo 0 +everyon 0 +includ 0 +quit 0 +sure 0 +bestexplan 0 +question 0 +defin 0 +bywhat 0 +case 0 +semi 0 +real 0 +timeoper 0 +util 0 +last 0 +kernel 0 +unix 0 +administr 0 +creat 0 +appli 0 +construct 0 +realli 0 +though 0 +acomput 0 +degre 0 +pai 0 +david 0 +shudder 0 +methodolog 0 +right 0 +architect 0 +newoper 0 +type 0 +often 0 +sameto 0 +forth 0 +woodwork 0 +control 0 +draw 0 +brew 0 +complet 0 +relax 0 +enough 0 +myroomm 0 +disagre 0 +pursuit 0 +enjoi 0 +fly 0 +read 0 +fiction 0 +comic 0 +book 0 +railroad 0 +prototyp 0 +model 0 +role 0 +plai 0 +game 0 +notic 0 +imag 0 +sublim 0 +stripe 0 +creatur 0 +thetig 0 +appear 0 +throughout 0 +taken 0 +william 0 +blake 0 +poemtyg 0 +tyger 0 +put 0 +word 0 +road 0 +againin 0 +tremend 0 +leap 0 +insan 0 +purchas 0 +ahous 0 +address 0 +isjosef 0 +east 0 +gate 0 +roadmonona 0 +voic 0 +number 0 +workwork 0 +drive 0 +banana 0 +us 0 +grung 0 +either 0 +matur 0 +job 0 +chang 0 +perhapssom 0 +seem 0 +othermonth 0 +beat 0 +intosubmiss 0 +everyth 0 +moon 0 +andstar 0 +current 0 +project 0 +fordav 0 +famou 0 +gamma 0 +relat 0 +queri 0 +interpret 0 +paradis 0 +geograph 0 +inform 0 +orient 0 +data 0 +wiss 0 +storag 0 +whatev 0 +need 0 +done 0 +whole 0 +occur 0 +thecomput 0 +departmentof 0 +themadison 0 +campusof 0 +univers 0 +campu 0 +locat 0 +peninsula 0 +five 0 +lake 0 +workin 0 +addit 0 +consult 0 +solut 0 +advic 0 +technicalexpertis 0 +help 0 +port 0 +newsystem 0 +reviv 0 +oddbal 0 +tasksar 0 +kind 0 +tell 0 +tovisit 0 +serverbut 0 +haven 0 +anyth 0 +mostlyempti 0 +except 0 +friend 0 +activitiesuwvaxi 0 +oper 0 +site 0 +free 0 +that 0 +print 0 +someth 0 +longer 0 +svolunt 0 +master 0 +goe 0 +run 0 +howev 0 +reader 0 +across 0 +differentarchitectur 0 +task 0 +organizationsi 0 +organ 0 +alwai 0 +agre 0 +oftenhav 0 +good 0 +benefit 0 +usersof 0 +commun 0 +aopa 0 +owner 0 +pilot 0 +experiment 0 +usenix 0 +blitz 0 +drinkingwhen 0 +school 0 +hord 0 +friendsand 0 +visit 0 +local 0 +thursdai 0 +night 0 +import 0 +slowli 0 +entir 0 +select 0 +acquaint 0 +becam 0 +loftili 0 +labelledblitz 0 +societi 0 +divers 0 +meet 0 +ofoctoberfest 0 +weekend 0 +chud 0 +accumulateda 0 +short 0 +histori 0 +whatnotof 0 +charad 0 +pagelast 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..89cc095a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,47 @@ +page 1 +home 0 +link 0 +comput 0 +system 0 +brad 0 +oper 0 +search 0 +welcom 0 +thayer 0 +homepag 0 +much 0 +mail 0 +scienc 0 +intro 0 +network 0 +possibl 0 +advanc 0 +would 0 +foolish 0 +neglect 0 +model 0 +interest 0 +thec 0 +us 0 +probabl 0 +bore 0 +check 0 +seminaranywai 0 +aim 0 +beaucoup 0 +boir 0 +pepper 0 +badger 0 +packer 0 +pagesom 0 +altavista 0 +enginefind 0 +email 0 +adress 0 +world 0 +wideth 0 +jazz 0 +duan 0 +mclaughlin 0 +pageuw 0 +athlet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..23a242f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,64 @@ +breach 1 +scott 0 +comput 0 +wisc 0 +wisconsin 0 +madison 0 +univers 0 +intern 0 +symposium 0 +gurindar 0 +home 0 +pagescott 0 +addresseseducationresearch 0 +interest 0 +public 0 +recreat 0 +associatesaddressesscott 0 +breachdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usatel 0 +educationph 0 +scienc 0 +engin 0 +carnegi 0 +mellon 0 +advisorguri 0 +sohiresearch 0 +interestscomput 0 +architecturemultiscalarpublicationsmultiscalar 0 +processorsgurindar 0 +sohi 0 +vijaykumarnd 0 +architectur 0 +anatomi 0 +regist 0 +file 0 +multiscalar 0 +processorscott 0 +vijaykumar 0 +sohith 0 +microarchitectur 0 +effici 0 +detect 0 +pointer 0 +arrai 0 +access 0 +errorstodd 0 +austin 0 +sohiconfer 0 +program 0 +languag 0 +design 0 +implement 0 +recreationwingsbeersquidtvassociatestodd 0 +austindoug 0 +burgerbabak 0 +falsafialain 0 +kagit 0 +vijaykumarlast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..a8bc8cac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,83 @@ +page 1 +suni 0 +albani 0 +fall 0 +name 0 +sinc 0 +bleed 0 +nontrivi 0 +waysher 0 +temporarili 0 +underst 0 +unadorn 0 +provid 0 +section 0 +pizza 0 +pool 0 +brief 0 +hobbi 0 +schedul 0 +spring 0 +stinkin 0 +bookmark 0 +poor 0 +unfortun 0 +myclass 0 +hypersensit 0 +rockjock 0 +cretin 0 +brood 0 +glare 0 +clenchesfist 0 +crack 0 +knuckl 0 +tragic 0 +flightyfemm 0 +get 0 +razz 0 +asskick 0 +thirdgrad 0 +perhap 0 +smooth 0 +skin 0 +hardbodi 0 +leatherboi 0 +leer 0 +atm 0 +whenev 0 +call 0 +roll 0 +differ 0 +make 0 +todayi 0 +giggl 0 +said 0 +becam 0 +aprostitut 0 +societi 0 +bigotri 0 +pedagodi 0 +isaac 0 +theblack 0 +goat 0 +refus 0 +stai 0 +claw 0 +hand 0 +sssuuuhhh 0 +mmuuuhhhh 0 +dddduuuuuhhhhh 0 +mmmmuuuhhhh 0 +maaaahhhjaaaaaahhhhh 0 +fffuuuhhhhh 0 +yyyyyyyuuuuuhhhhh 0 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 0 +uuuhhh 0 +uuummmm 0 +uuuhhhh 0 +wwwwwhhhhuuuuuhhhhh 0 +zhang 0 +wouldn 0 +notic 0 +eggleston 0 +smile 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..c6476714 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,154 @@ +system 1 +prefetch 0 +applic 0 +cach 0 +file 0 +research 0 +perform 0 +parallel 0 +control 0 +page 0 +princeton 0 +felten 0 +proceed 0 +univers 0 +oper 0 +integr 0 +edward 0 +wisc 0 +comput 0 +recent 0 +alloc 0 +anna 0 +karlin 0 +disk 0 +polici 0 +educ 0 +interest 0 +cours 0 +high 0 +resourc 0 +depart 0 +tech 0 +report 0 +sigmetr 0 +confer 0 +implement 0 +princetonunivers 0 +osdi 0 +uniprocessor 0 +investig 0 +techniqu 0 +replac 0 +kernel 0 +physic 0 +home 0 +assist 0 +professor 0 +sciencedepart 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usacao 0 +eduphon 0 +department 0 +offic 0 +paper 0 +talk 0 +summari 0 +collect 0 +link 0 +tsinghua 0 +beij 0 +china 0 +memori 0 +project 0 +optim 0 +cachingacf 0 +topic 0 +distribut 0 +fall 0 +advanc 0 +spring 0 +trace 0 +simul 0 +access 0 +tracesrec 0 +papersintegr 0 +cachingtraci 0 +kimbrel 0 +novemb 0 +shorter 0 +version 0 +thesi 0 +also 0 +schedulingpei 0 +appear 0 +toc 0 +studi 0 +strategiespei 0 +peform 0 +first 0 +symposium 0 +slide 0 +present 0 +usenix 0 +summer 0 +technic 0 +tickertaip 0 +raid 0 +architectur 0 +swee 0 +boon 0 +shivakumar 0 +venkataraman 0 +john 0 +wilk 0 +isca 0 +talksslid 0 +postscript 0 +andpostscript 0 +summarymi 0 +focus 0 +storag 0 +manag 0 +andparallel 0 +particular 0 +improvefil 0 +specif 0 +filecach 0 +aggress 0 +data 0 +havedevelop 0 +individualappl 0 +respons 0 +decid 0 +useit 0 +us 0 +fairglob 0 +carefulli 0 +cachereplac 0 +schedul 0 +prototyp 0 +implementationon 0 +demonstratedthat 0 +good 0 +chosen 0 +strategi 0 +informationcan 0 +significantli 0 +improv 0 +mani 0 +current 0 +extend 0 +amdevelop 0 +algorithm 0 +diskarrai 0 +addit 0 +global 0 +managementproblem 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..1ab12180 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,284 @@ +system 1 +proc 0 +conf 0 +databas 0 +data 0 +work 0 +research 0 +manag 0 +perform 0 +project 0 +almaden 0 +object 0 +garlic 0 +comput 0 +madison 0 +area 0 +shore 0 +repositori 0 +heterogen 0 +effort 0 +experi 0 +time 0 +queri 0 +applic 0 +march 0 +persist 0 +franklin 0 +minneapoli 0 +univers 0 +center 0 +altern 0 +carei 0 +evalu 0 +interest 0 +next 0 +gener 0 +orient 0 +design 0 +complex 0 +workload 0 +base 0 +goal 0 +involv 0 +share 0 +build 0 +recent 0 +twelv 0 +student 0 +spent 0 +call 0 +extend 0 +languag 0 +octob 0 +multimedia 0 +codi 0 +haa 0 +niblack 0 +arya 0 +fagin 0 +flickner 0 +petkov 0 +schwarz 0 +thoma 0 +william 0 +wimmer 0 +toward 0 +workshop 0 +oodbm 0 +dewitt 0 +naughton 0 +livni 0 +septemb 0 +real 0 +server 0 +andm 0 +sigmod 0 +mike 0 +careymichael 0 +careyprofessor 0 +leav 0 +scienc 0 +depart 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +staff 0 +member 0 +harri 0 +road 0 +jose 0 +phone 0 +primari 0 +mail 0 +wisc 0 +eduresearch 0 +interestsdatabas 0 +parallel 0 +distribut 0 +appli 0 +main 0 +performanceand 0 +topicsof 0 +current 0 +includ 0 +tradeoff 0 +techniqu 0 +forobject 0 +algorithmsrel 0 +transact 0 +process 0 +schedul 0 +multi 0 +userdatabas 0 +user 0 +specifi 0 +theexodu 0 +extens 0 +dbm 0 +aimedat 0 +develop 0 +scalabl 0 +storag 0 +persistentobject 0 +environ 0 +whichi 0 +upon 0 +exodu 0 +meet 0 +objectmanag 0 +need 0 +replac 0 +unix 0 +file 0 +applicationssuch 0 +case 0 +move 0 +academia 0 +industri 0 +greatyear 0 +part 0 +becom 0 +best 0 +academ 0 +researchgroup 0 +known 0 +come 0 +tackl 0 +anddiffer 0 +challeng 0 +thesourc 0 +number 0 +paper 0 +teach 0 +forth 0 +past 0 +year 0 +relat 0 +signific 0 +fraction 0 +rel 0 +projectther 0 +multimediainform 0 +allow 0 +live 0 +varieti 0 +tobe 0 +manipul 0 +though 0 +resid 0 +homogen 0 +objectdatabas 0 +sabbat 0 +continuedto 0 +focus 0 +graduat 0 +aqueri 0 +browser 0 +front 0 +tool 0 +pesto 0 +thegarl 0 +locat 0 +public 0 +oodb 0 +access 0 +implement 0 +kiernan 0 +orientedprogram 0 +oopsla 0 +austin 0 +appear 0 +multipl 0 +content 0 +tork 0 +roth 0 +ifip 0 +confer 0 +visualdatabas 0 +lausann 0 +switzerland 0 +inform 0 +garlicapproach 0 +luniewski 0 +and 0 +ieee 0 +issu 0 +dataengin 0 +ride 0 +taipei 0 +taiwan 0 +statu 0 +report 0 +benchmark 0 +withd 0 +kant 0 +onobject 0 +program 0 +portland 0 +autom 0 +tune 0 +brown 0 +mehta 0 +thint 0 +larg 0 +santiago 0 +chile 0 +make 0 +initi 0 +smrc 0 +withb 0 +reinwald 0 +desslock 0 +lehman 0 +pirahesh 0 +srinivasan 0 +tarascon 0 +provenc 0 +franc 0 +hall 0 +mcauliff 0 +schuh 0 +solomon 0 +tsatalo 0 +white 0 +zwill 0 +sigmodint 0 +fine 0 +grain 0 +page 0 +zaharioudaki 0 +managementof 0 +memori 0 +pang 0 +accur 0 +model 0 +hybrid 0 +hash 0 +join 0 +algorithm 0 +patel 0 +vernon 0 +sigmetr 0 +measur 0 +modelingof 0 +nashvil 0 +index 0 +multivers 0 +lock 0 +bober 0 +technolog 0 +cambridg 0 +england 0 +client 0 +cach 0 +revisit 0 +indistribut 0 +oszu 0 +dayal 0 +andp 0 +valduriez 0 +morgan 0 +kaufmann 0 +publish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..af9644be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,30 @@ +wisconsin 1 +chin 0 +univers 0 +offic 0 +cchin 0 +wisc 0 +biochemistri 0 +madison 0 +tang 0 +home 0 +pagechin 0 +tanggradu 0 +student 0 +depart 0 +west 0 +dayton 0 +streetmadison 0 +bldg 0 +mail 0 +edutelephon 0 +current 0 +assign 0 +introduct 0 +data 0 +structur 0 +hour 0 +mondai 0 +tuesdai 0 +fridai 0 +ameduc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..4608e46b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,37 @@ +chandra 1 +research 1 +satish 0 +soon 0 +home 0 +page 0 +wisc 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +interest 0 +summari 0 +public 0 +come 0 +real 0 +stuff 0 +wodehous 0 +book 0 +internet 0 +movi 0 +databas 0 +nostalgia 0 +york 0 +time 0 +altavista 0 +italian 0 +languag 0 +cultur 0 +miscellan 0 +linksclick 0 +log 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..624b8521 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,91 @@ +chilimbi 1 +parallel 0 +trishul 0 +research 0 +comput 0 +wisconsin 0 +laru 0 +memori 0 +merit 0 +state 0 +page 0 +wisc 0 +click 0 +madison 0 +program 0 +compil 0 +share 0 +visual 0 +indian 0 +tool 0 +jame 0 +examin 0 +home 0 +real 0 +megradu 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +interest 0 +languag 0 +architectur 0 +integr 0 +messag 0 +pass 0 +perform 0 +analysi 0 +enhanc 0 +designresearch 0 +project 0 +wind 0 +tunneleduc 0 +univers 0 +tech 0 +institut 0 +technolog 0 +bombai 0 +summari 0 +publicationscachi 0 +automat 0 +insert 0 +cico 0 +annot 0 +intern 0 +confer 0 +process 0 +icpp 0 +august 0 +stormwatch 0 +system 0 +protocolstrishul 0 +thoma 0 +ball 0 +stephen 0 +eick 0 +supercomput 0 +appear 0 +decemb 0 +award 0 +honor 0 +certif 0 +mathemat 0 +olympiadpresid 0 +gold 0 +medal 0 +nation 0 +physic 0 +examinationcertif 0 +chemistrycertif 0 +electron 0 +miscellan 0 +movi 0 +dream 0 +curriculum 0 +vita 0 +last 0 +updat 0 +mail 0 +suggest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..558d9eb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,31 @@ +chandrasekaran 1 +sashikanth 1 +wisconsin 1 +home 0 +page 0 +csashi 0 +wisc 0 +graduat 0 +studentdepart 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +mark 0 +hill 0 +project 0 +educ 0 +btech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +june 0 +univeristi 0 +depart 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..c66c34ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,106 @@ +databas 1 +page 1 +paradis 0 +comput 0 +site 0 +curt 0 +scienc 0 +madison 0 +relat 0 +home 0 +wiscinfo 0 +ellmann 0 +univers 0 +wisconsin 0 +wisc 0 +item 0 +eosdi 0 +standard 0 +inform 0 +intern 0 +dienst 0 +project 0 +depart 0 +focu 0 +java 0 +develop 0 +webgnat 0 +defect 0 +track 0 +prototyp 0 +index 0 +shore 0 +previou 0 +life 0 +doit 0 +opengi 0 +consortium 0 +global 0 +posit 0 +system 0 +calmit 0 +nebraska 0 +lincoln 0 +feder 0 +approach 0 +object 0 +manag 0 +group 0 +free 0 +list 0 +transact 0 +process 0 +perform 0 +council 0 +illustra 0 +white 0 +papersmiscellan 0 +sitescampu 0 +wyrm 0 +hoard 0 +gopher 0 +librari 0 +wiscnet 0 +netcorpor 0 +appl 0 +microsoft 0 +research 0 +land 0 +paww 0 +commerc 0 +metrowerk 0 +taligentsearch 0 +savvi 0 +search 0 +webcrawl 0 +open 0 +text 0 +worm 0 +network 0 +organ 0 +internet 0 +draft 0 +dilbert 0 +world 0 +onlin 0 +winsock 0 +applic 0 +current 0 +weather 0 +map 0 +implement 0 +geolog 0 +survei 0 +govern 0 +locat 0 +gil 0 +oakridg 0 +nation 0 +center 0 +stock 0 +market 0 +datacurt 0 +ellmanncurt 0 +eduparadis 0 +projectdepart 0 +sciencesunivers 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..e74b352e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,22 @@ +chee 1 +yong 1 +madison 1 +home 0 +pagechan 0 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +email 0 +cychan 0 +wisc 0 +offic 0 +phone 0 +page 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..8473af5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,45 @@ +comput 1 +engin 1 +sara 0 +madison 0 +scienc 0 +home 0 +page 0 +univers 0 +wisconsin 0 +depart 0 +physic 0 +bauman 0 +dailei 0 +baumandailei 0 +wisc 0 +edugradu 0 +program 0 +mathemat 0 +mace 0 +mechan 0 +astronaut 0 +nuclear 0 +educ 0 +math 0 +lewi 0 +clark 0 +colleg 0 +research 0 +work 0 +public 0 +current 0 +schedul 0 +link 0 +friend 0 +pagessend 0 +mail 0 +offic 0 +address 0 +statist 0 +west 0 +dayton 0 +street 0 +last 0 +modifi 0 +daileytu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..27f34196 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,313 @@ +comput 1 +memori 0 +wood 0 +david 0 +architectur 0 +share 0 +system 0 +simul 0 +cach 0 +perform 0 +lebeck 0 +research 0 +parallel 0 +reinhardt 0 +mark 0 +user 0 +jame 0 +laru 0 +wisconsin 0 +program 0 +hardwar 0 +anddavid 0 +ieee 0 +intern 0 +symposium 0 +network 0 +interfac 0 +fine 0 +hill 0 +time 0 +design 0 +implement 0 +tool 0 +babak 0 +falsafi 0 +grain 0 +proceed 0 +protocol 0 +level 0 +alvin 0 +steven 0 +control 0 +tempest 0 +typhoon 0 +develop 0 +fast 0 +refer 0 +multiprocessor 0 +evalu 0 +techniqu 0 +wind 0 +tunnel 0 +univers 0 +machin 0 +support 0 +isca 0 +coher 0 +abstract 0 +access 0 +case 0 +messag 0 +transpar 0 +block 0 +page 0 +wisc 0 +juli 0 +especi 0 +includ 0 +project 0 +wart 0 +california 0 +berkelei 0 +current 0 +graduat 0 +student 0 +hyder 0 +pfile 0 +introduct 0 +architecturec 0 +advanc 0 +recent 0 +distribut 0 +supercomput 0 +invalid 0 +overhead 0 +ioanni 0 +schoina 0 +softwar 0 +new 0 +paradigm 0 +integr 0 +pass 0 +exist 0 +mechan 0 +allow 0 +programm 0 +data 0 +read 0 +us 0 +processor 0 +also 0 +specifi 0 +state 0 +rewrit 0 +home 0 +associ 0 +professor 0 +scienceand 0 +electr 0 +engineeringdepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usadavid 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +offic 0 +interest 0 +uniprocessor 0 +oper 0 +analysi 0 +vlsi 0 +power 0 +portabl 0 +educ 0 +steve 0 +brian 0 +toonenrec 0 +rahmat 0 +intel 0 +alvi 0 +duke 0 +microsystem 0 +callaghan 0 +informix 0 +cours 0 +teach 0 +fall 0 +organ 0 +programmingc 0 +select 0 +paper 0 +decoupl 0 +memorysteven 0 +robert 0 +communicationshubhendu 0 +mukherje 0 +synchron 0 +workstat 0 +costrahmat 0 +confer 0 +dynam 0 +self 0 +reduc 0 +multiprocessorsalvin 0 +june 0 +activ 0 +simulationalvin 0 +sigmetricsmai 0 +accuraci 0 +interconnect 0 +dougla 0 +burger 0 +process 0 +april 0 +applic 0 +specif 0 +ann 0 +roger 0 +asplo 0 +profil 0 +spec 0 +benchmark 0 +studi 0 +octob 0 +cooper 0 +scalabl 0 +transact 0 +toc 0 +novemb 0 +annot 0 +bibliographi 0 +decemb 0 +line 0 +version 0 +revis 0 +frequent 0 +madhusudhan 0 +talluri 0 +august 0 +summari 0 +main 0 +goal 0 +cost 0 +effect 0 +computerarchitectur 0 +take 0 +advantag 0 +rapidli 0 +chang 0 +technolog 0 +myresearch 0 +major 0 +thrust 0 +feasibl 0 +correct 0 +facilit 0 +focuss 0 +follow 0 +three 0 +area 0 +multi 0 +effici 0 +hybridprogram 0 +virtual 0 +prototyp 0 +exploit 0 +similaritesof 0 +hypothet 0 +understand 0 +tune 0 +result 0 +calledtempest 0 +handler 0 +suppliedmechan 0 +provid 0 +compil 0 +librari 0 +hybrid 0 +combin 0 +tempestmechan 0 +bulk 0 +transfer 0 +virtualmemori 0 +manag 0 +novelmechan 0 +tagblock 0 +byte 0 +write 0 +theloc 0 +remot 0 +explor 0 +altern 0 +wai 0 +first 0 +call 0 +propos 0 +hardwareplatform 0 +fulli 0 +revers 0 +translationt 0 +rtlb 0 +invok 0 +detect 0 +grainaccess 0 +fault 0 +found 0 +thata 0 +run 0 +performscompar 0 +anal 0 +five 0 +memoryprogram 0 +method 0 +thatoptim 0 +common 0 +hit 0 +significantli 0 +reducingsimul 0 +tightli 0 +gener 0 +byprovid 0 +tag 0 +referenceinvok 0 +function 0 +depend 0 +upon 0 +type 0 +andmemori 0 +processedbi 0 +manipul 0 +special 0 +null 0 +functionfor 0 +action 0 +usingbinari 0 +tabl 0 +lookup 0 +memoryrefer 0 +sparcstat 0 +tothre 0 +faster 0 +convent 0 +trace 0 +driven 0 +thatcal 0 +procedur 0 +onlythre 0 +slower 0 +origin 0 +instrument 0 +investig 0 +binari 0 +techniquesto 0 +platform 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..0cd9504f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,30 @@ +doug 1 +burger 1 +home 1 +comput 1 +page 0 +pageprofession 0 +inform 0 +research 0 +summaryresum 0 +cvtranscriptcours 0 +projectsadvisoraffili 0 +project 0 +galileo 0 +sciwisconsin 0 +wind 0 +tunnelpag 0 +maintain 0 +architectureuw 0 +architecturesimplescalar 0 +tool 0 +setgenericasacmperson 0 +stuff 0 +meus 0 +linksphoto 0 +galleryrid 0 +demonhunt 0 +damn 0 +catsbewar 0 +grad 0 +school 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..b5ed5d03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,172 @@ +inform 1 +approxim 0 +boor 0 +click 0 +journal 0 +carl 0 +file 0 +variou 0 +check 0 +publish 0 +paul 0 +home 0 +page 0 +comput 0 +wisconsin 0 +madison 0 +fall 0 +email 0 +wisc 0 +look 0 +recent 0 +theori 0 +read 0 +clickabl 0 +version 0 +list 0 +errata 0 +numer 0 +analysi 0 +spline 0 +ditto 0 +thank 0 +nevai 0 +find 0 +also 0 +us 0 +pinku 0 +last 0 +chang 0 +professor 0 +scienc 0 +mathematicsdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaoffic 0 +hour 0 +town 0 +schoenberg 0 +work 0 +death 0 +deboor 0 +telephon 0 +schedul 0 +teach 0 +former 0 +present 0 +student 0 +select 0 +articl 0 +written 0 +areavail 0 +anonym 0 +approx 0 +provid 0 +access 0 +individu 0 +theclick 0 +button 0 +small 0 +subset 0 +author 0 +third 0 +edit 0 +cont 0 +elementari 0 +algorithm 0 +approach 0 +print 0 +practic 0 +guid 0 +latest 0 +thevari 0 +program 0 +driver 0 +latter 0 +book 0 +ofapproxim 0 +academ 0 +press 0 +includ 0 +accept 0 +publishedpap 0 +well 0 +postal 0 +address 0 +mani 0 +andmuch 0 +much 0 +forconstruct 0 +springer 0 +verlag 0 +foreast 0 +search 0 +theirtabl 0 +content 0 +singli 0 +combin 0 +thishandi 0 +tool 0 +alsoapproxim 0 +amo 0 +slist 0 +homepag 0 +bibliographi 0 +avail 0 +link 0 +peopl 0 +resourc 0 +ila 0 +center 0 +seek 0 +shall 0 +organ 0 +introduct 0 +joi 0 +seeviva_vi 0 +alsoon 0 +screen 0 +tutori 0 +great 0 +pictur 0 +hermit 0 +place 0 +contain 0 +html 0 +thehtml 0 +primermight 0 +even 0 +better 0 +unusu 0 +ever_chang 0 +david 0 +griffeath 0 +sprimordi 0 +soup 0 +kitchen 0 +interest 0 +seeodd 0 +end 0 +allan 0 +techunix 0 +technion 0 +nevaiif 0 +makehi 0 +mathemat 0 +outputavail 0 +cours 0 +math 0 +hous 0 +next 0 +door 0 +occupi 0 +taki 0 +souganid 0 +andthaleia 0 +zariphopoul 0 +szego 0 +bust 0 +stand 0 +inscript 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..ca993c6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,175 @@ +data 1 +devis 0 +visual 0 +queri 0 +record 0 +livni 0 +raghu 0 +ramakrishnan 0 +environ 0 +explor 0 +input 0 +link 0 +confer 0 +graphic 0 +help 0 +user 0 +need 0 +base 0 +scienc 0 +medicin 0 +inform 0 +jussi 0 +myllymaki 0 +proceed 0 +spie 0 +analysi 0 +michael 0 +cheng 0 +miron 0 +releas 0 +support 0 +time 0 +hotlin 0 +home 0 +pagedevis 0 +visualizationt 0 +content 0 +featuresexamplesin 0 +depthpublicationsrel 0 +workreleasecontactsfeaturesthes 0 +featur 0 +distinguish 0 +interfac 0 +construct 0 +oneset 0 +save 0 +appli 0 +larger 0 +memori 0 +effici 0 +handl 0 +map 0 +level 0 +cancontrol 0 +color 0 +shape 0 +individu 0 +abil 0 +us 0 +repres 0 +flexibl 0 +layout 0 +mechan 0 +within 0 +window 0 +group 0 +togeth 0 +comparison 0 +asid 0 +ax 0 +cursor 0 +compar 0 +relationship 0 +differ 0 +viewsof 0 +direct 0 +ascii 0 +file 0 +integ 0 +float 0 +date 0 +string 0 +type 0 +examplescheck 0 +follow 0 +exampl 0 +cool 0 +pictur 0 +quick 0 +introduct 0 +tree 0 +validationmolecular 0 +biologi 0 +cell 0 +imag 0 +soil 0 +birch 0 +clusteringfinanci 0 +explorationfamili 0 +nation 0 +climatedata 0 +centergeograph 0 +systemsoil 0 +sciencefil 0 +serverprogram 0 +tracesclin 0 +mani 0 +moreexampl 0 +viewer 0 +famili 0 +depthfor 0 +detail 0 +descript 0 +model 0 +visualizationvisu 0 +interfaceperform 0 +issuespublicationsmiron 0 +larg 0 +dataset 0 +dataexplor 0 +januari 0 +stream 0 +inproceed 0 +andanalysi 0 +februari 0 +praveenseshadri 0 +next 0 +sequencequeri 0 +intern 0 +themanag 0 +comad 0 +decemb 0 +relat 0 +workth 0 +seqproject 0 +complementari 0 +design 0 +queryrecord 0 +sequenc 0 +output 0 +bevisu 0 +informationw 0 +current 0 +version 0 +executablesfor 0 +solari 0 +platform 0 +dynam 0 +ld_library_path 0 +variabl 0 +appropri 0 +rundevis 0 +architectur 0 +execut 0 +arestat 0 +requir 0 +shareabl 0 +librari 0 +download 0 +click 0 +contactsfor 0 +research 0 +project 0 +contactmiron 0 +guangshun 0 +chen 0 +kent 0 +wenger 0 +send 0 +mail 0 +usersupport 0 +page 0 +access 0 +sinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..c43df565 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,200 @@ +shore 1 +object 0 +system 0 +data 0 +paradis 0 +unix 0 +dewitt 0 +geograph 0 +project 0 +univers 0 +wisconsin 0 +inform 0 +wide 0 +target 0 +hardwar 0 +store 0 +larg 0 +naughton 0 +page 0 +david 0 +email 0 +wisc 0 +orient 0 +databas 0 +parallel 0 +research 0 +persist 0 +need 0 +applic 0 +basic 0 +manag 0 +develop 0 +text 0 +field 0 +file 0 +complex 0 +client 0 +server 0 +carei 0 +gamma 0 +set 0 +current 0 +relat 0 +match 0 +emploi 0 +recent 0 +proceed 0 +sigmod 0 +confer 0 +talk 0 +home 0 +professor 0 +romn 0 +fellow 0 +comput 0 +scienc 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +michigan 0 +interest 0 +databasebenchmark 0 +summari 0 +main 0 +objectiveof 0 +design 0 +implement 0 +evalu 0 +objectsystem 0 +serv 0 +varieti 0 +applicationsinclud 0 +softwar 0 +programminglanguag 0 +satellit 0 +repositori 0 +multimedia 0 +expand 0 +capabilitiesof 0 +us 0 +exodu 0 +storag 0 +fund 0 +arpa 0 +number 0 +wai 0 +includ 0 +support 0 +typedobject 0 +multipl 0 +program 0 +languag 0 +like 0 +hierarchicalnam 0 +space 0 +name 0 +compat 0 +interfaceto 0 +interfac 0 +intend 0 +toeas 0 +transit 0 +systemenviron 0 +exist 0 +tool 0 +ccwill 0 +abl 0 +without 0 +modif 0 +becom 0 +either 0 +singl 0 +orth 0 +rang 0 +environ 0 +scale 0 +fromindividu 0 +workstat 0 +heterogen 0 +networksto 0 +multiprocessor 0 +intel 0 +paragon 0 +ajoint 0 +prof 0 +solomon 0 +attempt 0 +appli 0 +technolog 0 +developeda 0 +part 0 +relationaldatabas 0 +thetask 0 +manipul 0 +mani 0 +databasesystem 0 +hold 0 +excel 0 +formanag 0 +busi 0 +poor 0 +modelingne 0 +must 0 +capabl 0 +manipulatingmuch 0 +polygon 0 +polylin 0 +instead 0 +model 0 +provid 0 +muchbett 0 +type 0 +anoth 0 +signific 0 +differencefrom 0 +parallelismto 0 +facilit 0 +execut 0 +process 0 +assatellit 0 +imag 0 +platform 0 +projecti 0 +cluster 0 +sparc 0 +connect 0 +sampl 0 +public 0 +benchmark 0 +withm 0 +washington 0 +persistentappl 0 +franklin 0 +hall 0 +mcauliff 0 +chuh 0 +tsatalo 0 +white 0 +zwill 0 +intern 0 +conferenceon 0 +minneapoli 0 +kabra 0 +patel 0 +proceedingsof 0 +base 0 +santiego 0 +chile 0 +august 0 +vldb 0 +invit 0 +summit 0 +present 0 +automat 0 +creat 0 +januari 0 +pub 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..986bff3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,326 @@ +dyer 1 +vision 0 +comput 0 +imag 0 +control 0 +view 0 +motion 0 +proc 0 +object 0 +seitz 0 +model 0 +visual 0 +scene 0 +algorithm 0 +base 0 +surfac 0 +us 0 +kutulako 0 +three 0 +shape 0 +appear 0 +interact 0 +global 0 +data 0 +recognit 0 +conf 0 +wisconsin 0 +develop 0 +real 0 +camera 0 +user 0 +recent 0 +viewpoint 0 +displai 0 +purpos 0 +dimension 0 +represent 0 +activ 0 +environ 0 +move 0 +morph 0 +order 0 +behavior 0 +explor 0 +observ 0 +defin 0 +reconstruct 0 +analysi 0 +cyclic 0 +invari 0 +pattern 0 +workshop 0 +seal 0 +detect 0 +hibbard 0 +charl 0 +wisc 0 +area 0 +interest 0 +synthesi 0 +research 0 +time 0 +virtual 0 +input 0 +path 0 +combin 0 +without 0 +interpol 0 +continu 0 +correspond 0 +center 0 +orient 0 +import 0 +unknown 0 +mark 0 +build 0 +need 0 +gener 0 +provabl 0 +recov 0 +understand 0 +scientif 0 +system 0 +public 0 +period 0 +toward 0 +contour 0 +ieee 0 +artifici 0 +intellig 0 +point 0 +affin 0 +paul 0 +spring 0 +fall 0 +allmen 0 +stewart 0 +kjell 0 +home 0 +pagecharl 0 +dyerprofessordepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +edutelephon 0 +finger 0 +infoph 0 +univers 0 +maryland 0 +curriculum 0 +vita 0 +visualizationgroup 0 +leader 0 +groupprogram 0 +chair 0 +cvpr 0 +synthesisth 0 +goal 0 +work 0 +basic 0 +tool 0 +controllingin 0 +either 0 +autonom 0 +cameraof 0 +videostream 0 +acquir 0 +fix 0 +mobil 0 +around 0 +site 0 +output 0 +panoram 0 +whicha 0 +technolog 0 +could 0 +navig 0 +througha 0 +custom 0 +thesit 0 +predetermin 0 +main 0 +researchquest 0 +adapt 0 +basi 0 +synthesizenew 0 +reconstructiona 0 +intermedi 0 +step 0 +innovativetechniqu 0 +callview 0 +take 0 +basisimag 0 +rang 0 +linear 0 +connect 0 +explorationcomput 0 +start 0 +investig 0 +howto 0 +acquisit 0 +process 0 +controllingcamera 0 +paramet 0 +studi 0 +purposefulli 0 +theposit 0 +dynam 0 +adjustviewpoint 0 +theus 0 +forsolv 0 +task 0 +findspecif 0 +unknownshap 0 +recogn 0 +coordin 0 +simpl 0 +chang 0 +appearanceof 0 +well 0 +simplifi 0 +computationsrequir 0 +make 0 +precis 0 +progress 0 +andelimin 0 +accur 0 +differenti 0 +measur 0 +thecamera 0 +believ 0 +approach 0 +towardsviewpoint 0 +close 0 +relat 0 +geometri 0 +viewedobject 0 +thisapproach 0 +correct 0 +asid 0 +revolut 0 +smooth 0 +arbitrarili 0 +visualizationin 0 +map 0 +techniquescap 0 +possibl 0 +type 0 +specificgraph 0 +procedur 0 +capabl 0 +displayingarbitrari 0 +commonfram 0 +refer 0 +coupl 0 +algorithmexecut 0 +provid 0 +power 0 +especi 0 +experi 0 +dataanalysi 0 +implement 0 +call 0 +forexperi 0 +techniqu 0 +visualizingintermedi 0 +final 0 +result 0 +forproblem 0 +discrimin 0 +cloud 0 +satellit 0 +trace 0 +shah 0 +jain 0 +kluwer 0 +boston 0 +siggraph 0 +track 0 +recoveri 0 +stationari 0 +advanc 0 +festschrift 0 +azriel 0 +rosenfeld 0 +societi 0 +press 0 +alamito 0 +complet 0 +four 0 +physic 0 +valid 0 +adjust 0 +lumelski 0 +strategi 0 +guid 0 +dimens 0 +robot 0 +autom 0 +occlud 0 +irregular 0 +rigid 0 +articul 0 +battaiola 0 +santek 0 +voidrot 0 +martinez 0 +earth 0 +space 0 +scienc 0 +juli 0 +lattic 0 +includ 0 +abstract 0 +groupcours 0 +taught 0 +introduct 0 +current 0 +student 0 +gareth 0 +bestor 0 +brian 0 +morgan 0 +steve 0 +liangyin 0 +yuph 0 +graduat 0 +bill 0 +whibbard 0 +macc 0 +onlattic 0 +structur 0 +kiriako 0 +kyro 0 +rochest 0 +ofobserv 0 +iutech 0 +sequenc 0 +descript 0 +spatiotempor 0 +flow 0 +curv 0 +brent 0 +dimensionalshap 0 +machin 0 +graphic 0 +harri 0 +plantinga 0 +wheaton 0 +viewer 0 +representationfor 0 +connectionist 0 +stereo 0 +bradlei 0 +ccsua 0 +ctstateu 0 +edg 0 +separ 0 +textur 0 +measureslink 0 +interestmi 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..5629eb79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,19 @@ +home 1 +page 1 +welcom 0 +friend 0 +machinew 0 +appreci 0 +patienc 0 +long 0 +arduou 0 +task 0 +bring 0 +better 0 +check 0 +educ 0 +curriculum 0 +vitaecheck 0 +class 0 +teach 0 +section 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..c33e68e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,165 @@ +scout 1 +page 1 +check 0 +comput 0 +scienc 0 +servic 0 +email 0 +molecular 0 +biologi 0 +time 0 +video 0 +make 0 +madison 0 +intern 0 +wait 0 +thing 0 +depart 0 +univers 0 +explain 0 +also 0 +assist 0 +support 0 +mac 0 +digit 0 +purpos 0 +multimedia 0 +complet 0 +link 0 +eric 0 +hazen 0 +home 0 +pageer 0 +hazennon 0 +professorroom 0 +west 0 +dayton 0 +current 0 +work 0 +fornet 0 +project 0 +part 0 +registr 0 +couldn 0 +help 0 +domain 0 +name 0 +problem 0 +even 0 +want 0 +neat 0 +monei 0 +locat 0 +ofwisconsin 0 +see 0 +could 0 +never 0 +elegantli 0 +fearless 0 +leader 0 +susan 0 +calcari 0 +offici 0 +explan 0 +design 0 +mainten 0 +site 0 +list 0 +report 0 +pete 0 +devri 0 +withtech 0 +unix 0 +machin 0 +come 0 +spent 0 +half 0 +year 0 +laboratori 0 +full 0 +student 0 +well 0 +call 0 +technic 0 +specialist 0 +meant 0 +around 0 +fix 0 +peopl 0 +broken 0 +mice 0 +answer 0 +question 0 +lucki 0 +cool 0 +interest 0 +us 0 +instruct 0 +graduat 0 +wisconsin 0 +philosophi 0 +program 0 +philosoph 0 +real 0 +commod 0 +capitalist 0 +societi 0 +requir 0 +curriculum 0 +shameless 0 +pragmatist 0 +talk 0 +sinc 0 +good 0 +metaphys 0 +discuss 0 +serv 0 +practic 0 +makethi 0 +look 0 +standard 0 +resum 0 +date 0 +made 0 +anim 0 +shown 0 +world 0 +among 0 +drosophila 0 +geneticist 0 +told 0 +wonder 0 +girlfriend 0 +salon 0 +magazin 0 +entertain 0 +inform 0 +ezin 0 +creat 0 +lauri 0 +anderson 0 +green 0 +room 0 +shockwav 0 +kudon 0 +know 0 +wit 0 +quicktimevr 0 +documentari 0 +plight 0 +bosnia 0 +uproot 0 +popul 0 +billi 0 +holidai 0 +homepag 0 +nation 0 +secur 0 +archiv 0 +nixon 0 +preslei 0 +meetingsejhazen 0 +facstaff 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..48de2445 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,13 @@ +eliassi 1 +univers 0 +tina 0 +home 0 +pagetina 0 +illinoi 0 +urbana 0 +champaign 0 +wisconsin 0 +madison 0 +offic 0 +bldgphone 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..0452c414 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,72 @@ +madison 1 +eric 0 +instruct 0 +comput 0 +univers 0 +wisconsin 0 +high 0 +bandwidth 0 +fetch 0 +branch 0 +rotenberg 0 +jame 0 +smith 0 +depart 0 +predict 0 +confid 0 +trace 0 +cach 0 +latenc 0 +approach 0 +steve 0 +bennett 0 +appear 0 +proceed 0 +annual 0 +intern 0 +symposium 0 +microarchitectur 0 +decemb 0 +home 0 +page 0 +passsth 0 +anoth 0 +cold 0 +budweisth 0 +address 0 +scienc 0 +west 0 +dayton 0 +street 0 +electr 0 +engin 0 +johnson 0 +drive 0 +offic 0 +phone 0 +mail 0 +ericro 0 +wisc 0 +research 0 +area 0 +architectur 0 +advisor 0 +professor 0 +smithresearch 0 +topic 0 +kestrel 0 +multiscalar 0 +project 0 +level 0 +parallel 0 +mechan 0 +mispredict 0 +tolerancepubl 0 +assign 0 +condit 0 +erik 0 +jacobsen 0 +technic 0 +report 0 +april 0 +resum 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..4b67c174 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,69 @@ +comput 1 +would 0 +rather 0 +babak 0 +falsafi 0 +wisconsin 0 +madison 0 +wisc 0 +parallel 0 +scienc 0 +suni 0 +buffalo 0 +june 0 +like 0 +home 0 +page 0 +research 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usatel 0 +email 0 +work 0 +peopl 0 +mentorcultresearch 0 +interest 0 +architectur 0 +perform 0 +evalu 0 +measur 0 +system 0 +program 0 +modelseduc 0 +univers 0 +decemb 0 +electr 0 +engin 0 +miscellan 0 +public 0 +drink 0 +read 0 +listen 0 +us 0 +high 0 +school 0 +idea 0 +fail 0 +morf 0 +shubu 0 +dionosi 0 +hillari 0 +profan 0 +phone 0 +convers 0 +check 0 +american 0 +french 0 +queen 0 +architect 0 +look 0 +hack 0 +partner 0 +crime 0 +next 0 +gener 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..643cd1fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,117 @@ +problem 1 +ferri 0 +scienc 0 +mathemat 0 +applic 0 +structur 0 +optim 0 +parallel 0 +home 0 +page 0 +michael 0 +comput 0 +univers 0 +program 0 +larg 0 +scale 0 +nonlinear 0 +econom 0 +investig 0 +emphasi 0 +model 0 +effect 0 +system 0 +complementar 0 +associ 0 +professor 0 +industri 0 +engineeringand 0 +member 0 +center 0 +depart 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +madison 0 +telephon 0 +email 0 +wisc 0 +cambridg 0 +interest 0 +theori 0 +algorithm 0 +research 0 +summari 0 +look 0 +robust 0 +method 0 +solv 0 +variationalinequ 0 +toproblem 0 +engin 0 +pivot 0 +path 0 +followingtechniqu 0 +base 0 +success 0 +linear 0 +numer 0 +properti 0 +andinterfac 0 +languag 0 +particular 0 +beingconsid 0 +includ 0 +equilibria 0 +taxat 0 +oncarbon 0 +emiss 0 +traffic 0 +congest 0 +toll 0 +contact 0 +chemic 0 +process 0 +design 0 +consid 0 +architectur 0 +solvingproblem 0 +graph 0 +partitioningtechniqu 0 +determin 0 +underli 0 +tool 0 +gener 0 +purpos 0 +techniqu 0 +forexploit 0 +machin 0 +directli 0 +within 0 +arealso 0 +consider 0 +prototyp 0 +us 0 +condor 0 +extens 0 +framework 0 +also 0 +beinginvestig 0 +identifi 0 +exploit 0 +underlyingmodel 0 +public 0 +complet 0 +list 0 +paper 0 +mostli 0 +electron 0 +avail 0 +relev 0 +link 0 +cpnet 0 +prgram 0 +pagec 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..43cb1e68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,254 @@ +learn 1 +page 0 +madison 0 +finton 0 +wisconsin 0 +current 0 +system 0 +wisc 0 +comput 0 +univers 0 +artifici 0 +intellig 0 +goal 0 +reinforc 0 +action 0 +environ 0 +input 0 +also 0 +need 0 +import 0 +world 0 +browser 0 +star 0 +trek 0 +david 0 +home 0 +michigan 0 +show 0 +develop 0 +comment 0 +understand 0 +make 0 +problem 0 +output 0 +instead 0 +thumb 0 +work 0 +explor 0 +base 0 +inform 0 +avail 0 +openstep 0 +daili 0 +head 0 +scienc 0 +depart 0 +west 0 +dayton 0 +street 0 +welcom 0 +grad 0 +student 0 +research 0 +nerdin 0 +intelligenceher 0 +grew 0 +grand 0 +rapid 0 +late 0 +offic 0 +earn 0 +degre 0 +math 0 +state 0 +master 0 +scienceher 0 +dissert 0 +institut 0 +take 0 +littl 0 +year 0 +traffic 0 +measur 0 +softwarefor 0 +first 0 +thesi 0 +advisor 0 +left 0 +trusti 0 +nextstationor 0 +librari 0 +enjoyplai 0 +trumpet 0 +piano 0 +listen 0 +longhair 0 +music 0 +plai 0 +volleybal 0 +intervarsityfolk 0 +contribut 0 +supersoak 0 +arm 0 +race 0 +feel 0 +free 0 +form 0 +send 0 +mail 0 +finger 0 +accountto 0 +plan 0 +whether 0 +gain 0 +employ 0 +introduct 0 +project 0 +smart 0 +machin 0 +intelligenti 0 +essenc 0 +intelligencei 0 +abil 0 +adapt 0 +actappropri 0 +order 0 +reach 0 +treat 0 +gener 0 +case 0 +control 0 +chang 0 +sens 0 +weak 0 +kind 0 +feedback 0 +express 0 +posit 0 +neg 0 +number 0 +teacher 0 +present 0 +thesystem 0 +pair 0 +receiv 0 +irregular 0 +interv 0 +focuss 0 +todistinguish 0 +good 0 +on 0 +direct 0 +process 0 +build 0 +agood 0 +represent 0 +term 0 +relev 0 +orimport 0 +featur 0 +note 0 +basedfeatur 0 +extract 0 +appli 0 +notion 0 +balanc 0 +perform 0 +optim 0 +exploit 0 +investig 0 +wai 0 +us 0 +learningprocess 0 +effici 0 +allow 0 +specifi 0 +start 0 +point 0 +experi 0 +activ 0 +better 0 +intelligentadapt 0 +hope 0 +provid 0 +basi 0 +whichwil 0 +benefit 0 +knowledg 0 +task 0 +realli 0 +date 0 +sorri 0 +pagefor 0 +hotlistthi 0 +independ 0 +hotlist 0 +keep 0 +copi 0 +access 0 +platform 0 +combin 0 +actual 0 +bookmark 0 +file 0 +omniweb 0 +eleg 0 +function 0 +netscap 0 +opinion 0 +omniwebi 0 +nextstep 0 +foral 0 +variant 0 +releas 0 +editori 0 +responseto 0 +jehovah 0 +wit 0 +deiti 0 +christwisconsin 0 +site 0 +intervars 0 +graduat 0 +fellowship 0 +check 0 +weatherin 0 +citi 0 +program 0 +link 0 +isthmu 0 +pagesom 0 +favorit 0 +place 0 +visit 0 +nebula 0 +nasa 0 +pictur 0 +wide 0 +studi 0 +bibl 0 +crosssearch 0 +minor 0 +glenn 0 +gould 0 +homepag 0 +farsid 0 +voyagerent 0 +dilbert 0 +zoneroam 0 +virtual 0 +tourist 0 +stereogram 0 +tell 0 +blow 0 +true 0 +next 0 +think 0 +bill 0 +gate 0 +word 0 +sponsor 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..63736a17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,242 @@ +nbsp 1 +program 0 +fischer 0 +regist 0 +alloc 0 +attribut 0 +charl 0 +compil 0 +code 0 +cost 0 +languag 0 +context 0 +august 0 +gener 0 +error 0 +kurland 0 +comput 0 +implement 0 +schedul 0 +model 0 +interprocedur 0 +time 0 +approach 0 +pointer 0 +arrai 0 +check 0 +us 0 +steven 0 +januari 0 +todd 0 +proebst 0 +transact 0 +system 0 +syntact 0 +analysi 0 +grammar 0 +scienc 0 +wisconsin 0 +univers 0 +teachingc 0 +cours 0 +research 0 +interest 0 +design 0 +recent 0 +best 0 +architectur 0 +issu 0 +avoid 0 +unnecessari 0 +delai 0 +optim 0 +procedur 0 +practic 0 +monitor 0 +execut 0 +sigplan 0 +harish 0 +patil 0 +appear 0 +effici 0 +june 0 +complet 0 +free 0 +parallel 0 +environ 0 +william 0 +specif 0 +juli 0 +evalu 0 +least 0 +decemb 0 +correct 0 +techniqu 0 +home 0 +page 0 +nbspcharl 0 +nbspprofessor 0 +nbspunivers 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +messag 0 +email 0 +wisc 0 +teach 0 +semest 0 +graduat 0 +spring 0 +undergradu 0 +focu 0 +exploit 0 +enormouscap 0 +provid 0 +modern 0 +student 0 +investig 0 +includ 0 +import 0 +pipelin 0 +haveinvestig 0 +arithmet 0 +express 0 +domin 0 +global 0 +level 0 +graph 0 +color 0 +mayb 0 +explicitli 0 +quantifi 0 +likelihood 0 +benefit 0 +registerresid 0 +attract 0 +care 0 +vital 0 +loadsand 0 +store 0 +must 0 +ultim 0 +theprocedur 0 +analyz 0 +studiedinterprocedur 0 +modelsthat 0 +optimallyalloc 0 +among 0 +polynomi 0 +seem 0 +effect 0 +anyon 0 +know 0 +easi 0 +make 0 +mistak 0 +involv 0 +indic 0 +especi 0 +common 0 +studi 0 +wai 0 +toautomat 0 +oper 0 +share 0 +memori 0 +multiprocessor 0 +workstat 0 +processor 0 +anoth 0 +possibl 0 +routin 0 +littl 0 +orno 0 +appar 0 +slowdown 0 +public 0 +minimum 0 +popl 0 +sigact 0 +symposium 0 +principl 0 +programminglanguag 0 +concurr 0 +access 0 +insoftwar 0 +experi 0 +demand 0 +driven 0 +inacm 0 +instruct 0 +load 0 +zero 0 +rang 0 +split 0 +confer 0 +activitiesa 0 +revis 0 +second 0 +edit 0 +craft 0 +author 0 +cytronand 0 +richard 0 +leblanc 0 +almost 0 +publish 0 +benjamin 0 +cum 0 +look 0 +soon 0 +better 0 +bookstor 0 +everywher 0 +short 0 +commun 0 +editor 0 +topla 0 +educationph 0 +cornel 0 +pars 0 +supervis 0 +john 0 +studentsdonn 0 +milton 0 +bruce 0 +rowland 0 +semant 0 +stephen 0 +skedzielewski 0 +definit 0 +reevalu 0 +septemb 0 +bernard 0 +dion 0 +local 0 +corrector 0 +sensitivepars 0 +mahadevan 0 +ganapathi 0 +retarget 0 +novemb 0 +vimal 0 +begwami 0 +maunei 0 +extend 0 +right 0 +gregori 0 +johnson 0 +sensit 0 +flow 0 +anil 0 +facil 0 +integr 0 +winsborough 0 +automat 0 +transpar 0 +logic 0 +venkatesh 0 +framework 0 +algorithm 0 +steve 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..56dac8b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,259 @@ +comput 1 +wisconsin 0 +memori 0 +univers 0 +madison 0 +goodman 0 +scienc 0 +depart 0 +technic 0 +report 0 +share 0 +jame 0 +architectur 0 +burger 0 +project 0 +chip 0 +synchron 0 +integr 0 +base 0 +intern 0 +also 0 +scalabl 0 +processor 0 +system 0 +specif 0 +bandwidth 0 +design 0 +current 0 +coher 0 +standard 0 +galileo 0 +studi 0 +main 0 +research 0 +perform 0 +larg 0 +latenc 0 +point 0 +interconnect 0 +appear 0 +confer 0 +stefano 0 +kaxira 0 +juli 0 +alain 0 +cach 0 +relat 0 +transport 0 +layer 0 +hardwar 0 +includ 0 +support 0 +proceed 0 +johnson 0 +woest 0 +focus 0 +process 0 +capabl 0 +arrow 0 +differ 0 +iram 0 +follow 0 +futur 0 +model 0 +execut 0 +datascalar 0 +public 0 +optic 0 +us 0 +link 0 +list 0 +protocol 0 +effici 0 +primit 0 +high 0 +extend 0 +mechan 0 +march 0 +interfac 0 +scale 0 +februari 0 +nagi 0 +philip 0 +novemb 0 +analysi 0 +mari 0 +vernon 0 +doug 0 +home 0 +page 0 +wisconsint 0 +contentsgalileoproject 0 +descriptionpublicationsrel 0 +projectssci 0 +wisconsinproject 0 +descriptionpublicationsproject 0 +membersgalileo 0 +wisconsingalileo 0 +conduct 0 +groupat 0 +medium 0 +long 0 +term 0 +evolut 0 +emphasison 0 +therelationship 0 +futuresystem 0 +complet 0 +separ 0 +todai 0 +extent 0 +storag 0 +merg 0 +least 0 +wai 0 +increas 0 +penalti 0 +issuabl 0 +instruct 0 +orlimit 0 +place 0 +capacityon 0 +modul 0 +eventu 0 +sizabl 0 +fractionof 0 +resid 0 +repres 0 +label 0 +mopin 0 +diagram 0 +possibl 0 +migrat 0 +ofprocessor 0 +onto 0 +dram 0 +eventuallyobvi 0 +central 0 +area 0 +examin 0 +impact 0 +andlimit 0 +microprocessor 0 +systemsperform 0 +variou 0 +along 0 +theprocessor 0 +spectrumcach 0 +hierarchi 0 +systemsdesign 0 +bank 0 +systemprogram 0 +multipl 0 +exploit 0 +elimin 0 +serial 0 +bottlenecksdoug 0 +massiv 0 +parallel 0 +octob 0 +spsd 0 +modeldoug 0 +quantifi 0 +limit 0 +microprocessorsdoug 0 +symposium 0 +declin 0 +effect 0 +dynam 0 +gener 0 +purpos 0 +microprocessorsdougla 0 +januari 0 +berkeleyppram 0 +kyushu 0 +univeristi 0 +japansci 0 +wisconsinour 0 +group 0 +close 0 +involv 0 +coherentshar 0 +multiprocessor 0 +coherentinterfac 0 +ieee 0 +platform 0 +explor 0 +idea 0 +specifi 0 +queue 0 +lock 0 +qolb 0 +aswel 0 +optim 0 +pattern 0 +pairwis 0 +fresh 0 +read 0 +definitionfor 0 +extrem 0 +betweenprocess 0 +element 0 +individu 0 +cluster 0 +topic 0 +logarithm 0 +grow 0 +structureseffici 0 +multiprocessorsa 0 +extensionsaggress 0 +consist 0 +multiprocessorswisconsin 0 +minim 0 +overhead 0 +applic 0 +best 0 +paper 0 +supercomput 0 +simul 0 +wind 0 +tunneldougla 0 +second 0 +workshop 0 +cost 0 +hierarch 0 +extens 0 +scijam 0 +memoryross 0 +evan 0 +aboulenein 0 +stein 0 +gjess 0 +topolog 0 +ringsross 0 +decemb 0 +ringsteven 0 +scott 0 +lower 0 +bound 0 +coherenceross 0 +june 0 +multiprocessorsphilip 0 +multiprocessorjam 0 +third 0 +program 0 +languag 0 +oper 0 +april 0 +particip 0 +faculti 0 +graduat 0 +student 0 +alumni 0 +abouleneinross 0 +johnsonstev 0 +scottlast 0 +modifi 0 +dburger 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..6911b2fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,34 @@ +geeri 1 +andrew 0 +madison 0 +home 0 +page 0 +wisc 0 +west 0 +dayton 0 +street 0 +regent 0 +madisonin 0 +comput 0 +scienc 0 +current 0 +work 0 +compsci 0 +grade 0 +schedul 0 +pontif 0 +peopl 0 +interest 0 +jacqu 0 +derrida 0 +post 0 +structur 0 +martin 0 +heidegg 0 +albert 0 +camu 0 +jean 0 +paul 0 +sartr 0 +friedrich 0 +nietzsch 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..7d9012f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,148 @@ +time 1 +pleas 0 +gideon 0 +glass 0 +homepag 0 +tweak 0 +find 0 +sampl 0 +follow 0 +send 0 +read 0 +index 0 +machin 0 +netscap 0 +unix 0 +mayb 0 +continu 0 +thank 0 +stop 0 +collect 0 +imag 0 +deposit 0 +directori 0 +pictur 0 +gui 0 +claim 0 +cooler 0 +accord 0 +toonion 0 +seethi 0 +movi 0 +usual 0 +suspect 0 +walk 0 +nearest 0 +blockbust 0 +note 0 +recent 0 +move 0 +none 0 +roommat 0 +dutch 0 +cheap 0 +either 0 +given 0 +predica 0 +dismal 0 +prospect 0 +improv 0 +withno 0 +outsid 0 +influenc 0 +consid 0 +make 0 +small 0 +donationto 0 +help 0 +defrai 0 +cost 0 +purchas 0 +check 0 +monei 0 +orderscan 0 +sent 0 +address 0 +cash 0 +monro 0 +floor 0 +madison 0 +usathank 0 +support 0 +grad 0 +student 0 +sometim 0 +paper 0 +eventhough 0 +shelf 0 +feet 0 +unread 0 +book 0 +wait 0 +anyhow 0 +look 0 +someth 0 +christian 0 +achil 0 +huge 0 +might 0 +also 0 +unifi 0 +cstechreport 0 +class 0 +project 0 +report 0 +otherstuff 0 +avail 0 +program 0 +load 0 +averagewil 0 +grow 0 +fast 0 +main 0 +fork 0 +doofu 0 +actual 0 +share 0 +back 0 +calvin 0 +great 0 +killer 0 +zippi 0 +pinheadha 0 +reload 0 +sever 0 +justtri 0 +last 0 +fall 0 +kill 0 +noth 0 +think 0 +work 0 +mozilla 0 +higher 0 +well 0 +dabbl 0 +object 0 +orient 0 +programmingin 0 +mostli 0 +exercis 0 +suppos 0 +buttonher 0 +thing 0 +right 0 +suffic 0 +case 0 +told 0 +somethingin 0 +bookmark 0 +denni 0 +ritchi 0 +creator 0 +wrote 0 +anti 0 +forward 0 +hater 0 +handbook 0 +mailand 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..7887a515 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,406 @@ +meet 1 +comput 0 +calendar 0 +andi 0 +glew 0 +first 0 +intel 0 +page 0 +unix 0 +system 0 +us 0 +schedul 0 +email 0 +wisconsin 0 +home 0 +arrang 0 +approach 0 +architectur 0 +manag 0 +time 0 +advoc 0 +also 0 +pilot 0 +synchron 0 +univers 0 +thing 0 +public 0 +html 0 +work 0 +configur 0 +beef 0 +montreal 0 +peopl 0 +code 0 +although 0 +like 0 +optimist 0 +lock 0 +version 0 +creat 0 +least 0 +possibl 0 +person 0 +krazi 0 +organ 0 +touch 0 +filesystem 0 +access 0 +wisc 0 +read 0 +research 0 +group 0 +interest 0 +wannab 0 +author 0 +thought 0 +suggest 0 +miscellan 0 +info 0 +stuff 0 +self 0 +imag 0 +resum 0 +trust 0 +keep 0 +warm 0 +enough 0 +alwai 0 +architect 0 +someth 0 +busi 0 +sinc 0 +start 0 +learn 0 +hacker 0 +gould 0 +real 0 +futur 0 +cannot 0 +tool 0 +paper 0 +softwar 0 +concurr 0 +control 0 +get 0 +test 0 +requir 0 +even 0 +commit 0 +download 0 +softwareto 0 +user 0 +oper 0 +urgent 0 +teresa 0 +pageandi 0 +pagethi 0 +largelyform 0 +snippet 0 +stylishor 0 +summarycontact 0 +ship 0 +addressescalendar 0 +http 0 +anyon 0 +file 0 +unless 0 +specif 0 +open 0 +default 0 +scribe 0 +minut 0 +taker 0 +sohi 0 +weekli 0 +rather 0 +gener 0 +form 0 +adapt 0 +applic 0 +dynam 0 +versu 0 +static 0 +high 0 +level 0 +edit 0 +higher 0 +educ 0 +effect 0 +patent 0 +claim 0 +fame 0 +favorit 0 +quot 0 +sai 0 +care 0 +pope 0 +priest 0 +parson 0 +king 0 +william 0 +boyn 0 +want 0 +coin 0 +summer 0 +harm 0 +see 0 +frost 0 +snow 0 +poor 0 +almighti 0 +dollar 0 +good 0 +mapl 0 +wood 0 +bellow 0 +church 0 +chapel 0 +ranter 0 +preacher 0 +beecher 0 +alreadi 0 +keeper 0 +harbour 0 +deplor 0 +churchmen 0 +notori 0 +atheist 0 +less 0 +well 0 +known 0 +chariti 0 +strand 0 +sailor 0 +knew 0 +could 0 +exchang 0 +chop 0 +chord 0 +firewood 0 +meal 0 +place 0 +sleep 0 +print 0 +manifesto 0 +handbil 0 +advertis 0 +hackeralthough 0 +aspir 0 +formerlyhad 0 +fake 0 +motorola 0 +card 0 +ever 0 +assembl 0 +redesign 0 +chip 0 +kernel 0 +andstil 0 +think 0 +wistfulli 0 +beard 0 +frequent 0 +wear 0 +suspend 0 +bald 0 +architectureonc 0 +involv 0 +microarchitectur 0 +pentium 0 +adopt 0 +architecturei 0 +constantli 0 +verg 0 +write 0 +book 0 +entitl 0 +grabbag 0 +trick 0 +techniqu 0 +sort 0 +antidot 0 +hennessi 0 +patterson 0 +afford 0 +diskspac 0 +internet 0 +servic 0 +provideror 0 +charg 0 +connect 0 +appreci 0 +piec 0 +architectureon 0 +best 0 +wai 0 +textbook 0 +datasheet 0 +instruct 0 +refer 0 +wander 0 +netscapebookmarksstockscod 0 +standardsroi 0 +wilkinson 0 +standardsi 0 +disagre 0 +mani 0 +perhap 0 +quit 0 +quickli 0 +defunct 0 +startup 0 +compani 0 +call 0 +enfopris 0 +build 0 +workstat 0 +chang 0 +assign 0 +driver 0 +writingto 0 +integr 0 +longstand 0 +love 0 +hate 0 +relationship 0 +configurationmanag 0 +scc 0 +publish 0 +box 0 +link 0 +parallel 0 +tree 0 +element 0 +usenix 0 +workshop 0 +describ 0 +central 0 +databas 0 +multipl 0 +view 0 +hardlink 0 +clone 0 +save 0 +space 0 +divis 0 +team 0 +brian 0 +berlin 0 +deprec 0 +mainli 0 +wherea 0 +actual 0 +case 0 +livelock 0 +usual 0 +insist 0 +singl 0 +identifi 0 +serial 0 +sourc 0 +checkinsso 0 +proce 0 +linear 0 +manner 0 +programm 0 +previou 0 +fix 0 +appli 0 +recogn 0 +relax 0 +often 0 +strip 0 +approachin 0 +apolog 0 +never 0 +truli 0 +portabl 0 +accomplish 0 +similar 0 +mike 0 +fetterman 0 +mark 0 +aitken 0 +deserv 0 +credit 0 +enhanc 0 +sever 0 +featur 0 +went 0 +notabl 0 +number 0 +becam 0 +overal 0 +suffici 0 +everyth 0 +includ 0 +cshrc 0 +login 0 +wisconsinhow 0 +seem 0 +ubiquit 0 +programat 0 +depart 0 +variou 0 +cmtool 0 +domain 0 +ical 0 +plan 0 +critic 0 +mass 0 +anyof 0 +isol 0 +associ 0 +mean 0 +record 0 +voic 0 +therefor 0 +must 0 +prefer 0 +phone 0 +manuallyadd 0 +microsoft 0 +watch 0 +intelat 0 +devout 0 +program 0 +last 0 +ontim 0 +past 0 +weak 0 +disconnect 0 +allow 0 +major 0 +meetingswith 0 +without 0 +manual 0 +intervent 0 +algorithm 0 +tell 0 +reserveth 0 +right 0 +blindli 0 +invit 0 +make 0 +admin 0 +check 0 +proposeif 0 +week 0 +avoid 0 +bother 0 +send 0 +realiz 0 +miss 0 +sent 0 +advanc 0 +overallschedul 0 +topic 0 +fascin 0 +bring 0 +effici 0 +advantag 0 +secretariesand 0 +aid 0 +camp 0 +header 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..7eea0460 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,24 @@ +goodman 1 +comput 0 +wisconsin 0 +home 0 +page 0 +jame 0 +wisc 0 +professor 0 +sciencesdepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaresearch 0 +interest 0 +lot 0 +good 0 +stuff 0 +current 0 +project 0 +galileo 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..3123566c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,338 @@ +imag 1 +databas 0 +comput 0 +home 0 +retriev 0 +architectur 0 +link 0 +pic 0 +page 0 +greg 0 +inform 0 +mirror 0 +softwar 0 +index 0 +japanes 0 +relat 0 +wisc 0 +fall 0 +system 0 +learn 0 +librari 0 +info 0 +project 0 +vision 0 +cool 0 +japan 0 +nice 0 +stuff 0 +site 0 +recognit 0 +process 0 +graphic 0 +text 0 +group 0 +cours 0 +sharp 0 +offic 0 +section 0 +lectur 0 +spring 0 +manag 0 +engin 0 +html 0 +refer 0 +string 0 +attach 0 +introduct 0 +geometri 0 +machin 0 +freewar 0 +internet 0 +hyper 0 +rang 0 +shape 0 +shade 0 +pretti 0 +idea 0 +data 0 +planet 0 +wyom 0 +satelit 0 +line 0 +histori 0 +base 0 +list 0 +handwrit 0 +washington 0 +state 0 +tracer 0 +archiv 0 +trace 0 +simul 0 +univ 0 +includ 0 +english 0 +robot 0 +thoma 0 +tool 0 +languag 0 +invest 0 +fund 0 +schwab 0 +pagegreg 0 +pagenam 0 +sharpemail 0 +eduoffic 0 +phone 0 +hour 0 +appt 0 +tgif 0 +everi 0 +fridai 0 +dept 0 +instructor 0 +note 0 +notesclass 0 +topic 0 +find 0 +aboutsearch 0 +altavista 0 +dejanew 0 +excit 0 +infoseek 0 +lyco 0 +metacrawl 0 +yahoo 0 +usenet 0 +format 0 +ohioc 0 +program 0 +todai 0 +draft 0 +standard 0 +april 0 +stanford 0 +cygnu 0 +mumit 0 +newbi 0 +guideplatform 0 +independ 0 +portabl 0 +develop 0 +kit 0 +amulet 0 +dclap 0 +requir 0 +motif 0 +suit 0 +wxwindow 0 +yacl 0 +class 0 +projectclass 0 +numer 0 +linear 0 +algebra 0 +theoret 0 +scienc 0 +homework 0 +projectmisc 0 +sharewar 0 +cygwin 0 +directori 0 +gimp 0 +harmonai 0 +harmoni 0 +unix 0 +client 0 +browser 0 +vasc 0 +altern 0 +video 0 +research 0 +give 0 +specifi 0 +night 0 +jaida 0 +year 0 +worth 0 +atmospher 0 +multiresolut 0 +seamless 0 +click 0 +zoom 0 +resolut 0 +view 0 +solar 0 +moon 0 +comet 0 +meteor 0 +version 0 +also 0 +lot 0 +misc 0 +like 0 +overhead 0 +shot 0 +antarctica 0 +catalogu 0 +niae 0 +gothic 0 +electr 0 +postcard 0 +card 0 +rack 0 +select 0 +thank 0 +todd 0 +vistex 0 +textur 0 +databaseartifici 0 +gener 0 +primoridi 0 +soup 0 +kitchen 0 +math 0 +depart 0 +awesom 0 +medic 0 +medicin 0 +document 0 +pictur 0 +diagon 0 +dermatolog 0 +onlin 0 +atla 0 +erlang 0 +germani 0 +enter 0 +diagnosi 0 +back 0 +orthopaed 0 +ecvnet 0 +optic 0 +charact 0 +nici 0 +groupimag 0 +univers 0 +raytrac 0 +rayshad 0 +utah 0 +raster 0 +toolkit 0 +radianc 0 +radios 0 +packag 0 +avalon 0 +object 0 +grimstead 0 +massiv 0 +dsite 0 +hardwar 0 +board 0 +intergraph 0 +lockhe 0 +glint 0 +chipset 0 +nvidia 0 +chipsetcomput 0 +geometeri 0 +center 0 +applic 0 +challeng 0 +geometrylispuseless 0 +pagescomput 0 +hennessi 0 +patterson 0 +resourc 0 +superdlx 0 +parallel 0 +parl 0 +mexico 0 +washingt 0 +georgia 0 +tech 0 +groupjapanes 0 +guid 0 +unvers 0 +monash 0 +infowav 0 +edict 0 +window 0 +dictionari 0 +shodouka 0 +asiasoftinform 0 +retrev 0 +peregrin 0 +travers 0 +written 0 +perl 0 +trec 0 +infomin 0 +gigabyt 0 +search 0 +textual 0 +provid 0 +experi 0 +feedback 0 +linguist 0 +util 0 +repositori 0 +survei 0 +natur 0 +nist 0 +other_sw 0 +info_retriev 0 +world 0 +wide 0 +wander 0 +spider 0 +jedi 0 +might 0 +strictli 0 +hartlib 0 +paper 0 +latin 0 +stemmer 0 +multimedia 0 +academ 0 +storag 0 +new 0 +pointcast 0 +check 0 +custom 0 +portfolio 0 +automat 0 +updat 0 +literatur 0 +mark 0 +twainhumor 0 +apolog 0 +citizen 0 +offens 0 +threw 0 +garbag 0 +belong 0 +investorweb 0 +networth 0 +fundscap 0 +brill 0 +editori 0 +servic 0 +stockmastermutu 0 +brokerag 0 +hous 0 +fidel 0 +vanguard 0 +row 0 +price 0 +jack 0 +white 0 +compani 0 +charl 0 +gabelli 0 +mutualsmisc 0 +psnuplast 0 +modifi 0 +sharpgreg 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..40ce5899 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar 1 +gopalsridhar 1 +gopalgsri 1 +wisc 1 +edubon 1 +marrow 1 +pageresumest 1 +wisconsin 1 +pagecalvin 1 +hobbesbookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..8041dfdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,58 @@ +univers 1 +madison 1 +home 0 +visitor 0 +sinc 0 +guangshun 0 +page 0 +depart 0 +comput 0 +wisconsin 0 +interest 0 +project 0 +data 0 +relat 0 +send 0 +email 0 +number 0 +welcom 0 +graduat 0 +student 0 +scienc 0 +dayton 0 +phone 0 +offic 0 +educ 0 +california 0 +state 0 +angel 0 +peke 0 +physic 0 +grade 0 +research 0 +databas 0 +manag 0 +system 0 +advis 0 +raghu 0 +ramakrishnan 0 +miron 0 +livni 0 +analysi 0 +famili 0 +medicin 0 +devis 0 +explor 0 +visual 0 +environ 0 +class 0 +link 0 +stuff 0 +career 0 +plan 0 +chines 0 +miscellani 0 +around 0 +weather 0 +forecast 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..bfd28c9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,44 @@ +data 1 +guhan 0 +viswanathan 0 +thesi 0 +research 0 +design 0 +implement 0 +parallellanguag 0 +home 0 +page 0 +gviswana 0 +wisc 0 +graduat 0 +studentdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +laru 0 +focus 0 +involv 0 +base 0 +develop 0 +local 0 +compil 0 +target 0 +investig 0 +parallelappl 0 +execut 0 +effici 0 +hand 0 +code 0 +parallelprogram 0 +amor 0 +detail 0 +summari 0 +list 0 +public 0 +us 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..80c92117 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,73 @@ +page 1 +harit 0 +univers 0 +comput 0 +prof 0 +home 0 +student 0 +cours 0 +take 0 +advanc 0 +architectur 0 +link 0 +access 0 +time 0 +sinc 0 +sept 0 +counter 0 +mail 0 +graduat 0 +wisconsin 0 +madison 0 +depart 0 +scienc 0 +would 0 +like 0 +list 0 +classmat 0 +fall 0 +databas 0 +manag 0 +system 0 +raghu 0 +ramakrishnan 0 +mark 0 +hill 0 +spring 0 +jame 0 +goodman 0 +undergradu 0 +world 0 +famou 0 +mvsr 0 +engin 0 +colleg 0 +osmania 0 +hyderabad 0 +india 0 +meet 0 +draw 0 +line 0 +thing 0 +interest 0 +indian 0 +newspap 0 +stuff 0 +sport 0 +sastri 0 +roommat 0 +saeed 0 +mirza 0 +murthi 0 +zubber 0 +dust 0 +photo 0 +photograph 0 +warn 0 +click 0 +year 0 +folk 0 +courtesi 0 +electron 0 +address 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..3d520a4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,43 @@ +madison 1 +hasti 0 +wisconsin 0 +rebecca 0 +scienc 0 +java 0 +univers 0 +mathemat 0 +home 0 +page 0 +graduat 0 +student 0 +research 0 +assistantcomput 0 +departmentunivers 0 +dayton 0 +offic 0 +mail 0 +wisc 0 +edutelephon 0 +telephon 0 +dept 0 +first 0 +applet 0 +click 0 +fall 0 +schedul 0 +engr 0 +noland 0 +seminar 0 +comput 0 +carleton 0 +colleg 0 +interest 0 +program 0 +languag 0 +basketbal 0 +volleybal 0 +softbal 0 +linkag 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..47cebb2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,108 @@ +comput 1 +univers 1 +lane 0 +stuff 0 +scienc 0 +madison 0 +phone 0 +offic 0 +inform 0 +home 0 +page 0 +chad 0 +academ 0 +person 0 +neat 0 +dept 0 +wisconsin 0 +research 0 +program 0 +state 0 +claud 0 +info 0 +west 0 +dayton 0 +mail 0 +address 0 +hour 0 +wisc 0 +welcomethank 0 +stop 0 +hope 0 +enjoi 0 +bestbet 0 +link 0 +section 0 +biggest 0 +new 0 +life 0 +right 0 +get 0 +marri 0 +onmai 0 +nichol 0 +final 0 +want 0 +tell 0 +good 0 +luck 0 +count 0 +fall 0 +cours 0 +retriev 0 +technolog 0 +seek 0 +databas 0 +manag 0 +system 0 +ling 0 +audit 0 +advanc 0 +semant 0 +interest 0 +linguist 0 +discours 0 +process 0 +us 0 +advic 0 +barwis 0 +epigram 0 +alan 0 +perli 0 +educ 0 +mathemat 0 +minor 0 +philosophi 0 +laud 0 +truman 0 +formerli 0 +northeast 0 +missouri 0 +expect 0 +stand 0 +accord 0 +truli 0 +click 0 +imag 0 +cyber 0 +poop 0 +creation 0 +unabash 0 +brother 0 +bart 0 +arthur 0 +download 0 +psychot 0 +talk 0 +rais 0 +plant 0 +internet 0 +deep 0 +thought 0 +jack 0 +handi 0 +reload 0 +differ 0 +on 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..3990ffc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,103 @@ +robot 1 +hert 0 +susan 0 +comput 0 +vladimir 0 +lumelski 0 +motion 0 +plan 0 +interest 0 +geometri 0 +algorithm 0 +tether 0 +page 0 +research 0 +madison 0 +link 0 +multipl 0 +appear 0 +confer 0 +intern 0 +autonom 0 +scienc 0 +univers 0 +wisconsin 0 +public 0 +curv 0 +paper 0 +august 0 +journal 0 +system 0 +version 0 +proc 0 +librari 0 +book 0 +home 0 +wisc 0 +assist 0 +depart 0 +dayton 0 +telephon 0 +curriculum 0 +vita 0 +postscript 0 +softwar 0 +appli 0 +experiment 0 +analysi 0 +design 0 +graphic 0 +geometr 0 +advisor 0 +current 0 +work 0 +develop 0 +alogirthm 0 +common 0 +environ 0 +select 0 +deform 0 +plane 0 +extend 0 +abstract 0 +proceed 0 +canadian 0 +planar 0 +rout 0 +applic 0 +ti 0 +bind 0 +publish 0 +ieee 0 +autom 0 +sanjai 0 +tiwari 0 +terrain 0 +cover 0 +special 0 +issu 0 +underwat 0 +move 0 +arbitrari 0 +configur 0 +intellig 0 +reznik 0 +simul 0 +basi 0 +anim 0 +program 0 +technic 0 +report 0 +laboratori 0 +juli 0 +educ 0 +refer 0 +shelf 0 +congress 0 +line 0 +travel 0 +samantha 0 +cook 0 +epicuri 0 +veggi 0 +unit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..7aba39a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,49 @@ +madison 1 +kirk 0 +hogenson 0 +offic 0 +mail 0 +graduat 0 +wisconsin 0 +student 0 +hour 0 +studentcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +wisc 0 +edutelephon 0 +depart 0 +section 0 +tue 0 +also 0 +look 0 +myschedul 0 +none 0 +workout 0 +tryto 0 +appoint 0 +time 0 +finger 0 +send 0 +visit 0 +ghana 0 +countri 0 +serv 0 +peac 0 +corp 0 +usernam 0 +check 0 +pnhp 0 +group 0 +page 0 +maintain 0 +wife 0 +eilun 0 +experi 0 +counter 0 +sai 0 +accessedtim 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..1ee64648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,12 @@ +horn 1 +jeffrei 0 +swanton 0 +road 0 +madison 0 +wisconsin 0 +phone 0 +email 0 +wisc 0 +wise 0 +linear 0 +familyemploymenteducationresearchgenealog 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..1b8d38a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,148 @@ +program 1 +horwitz 0 +analysi 0 +languag 0 +dataflow 0 +work 0 +rep 0 +confer 0 +problem 0 +graph 0 +interprocedur 0 +sagiv 0 +symposium 0 +proceed 0 +depend 0 +algorithm 0 +precis 0 +effici 0 +record 0 +januari 0 +softwar 0 +base 0 +slice 0 +mainli 0 +involv 0 +understand 0 +textual 0 +semant 0 +differ 0 +version 0 +call 0 +class 0 +develop 0 +implement 0 +twenti 0 +intern 0 +principlesof 0 +engin 0 +susan 0 +horwitzsusan 0 +horwitzprofessorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +wisc 0 +telephon 0 +secretari 0 +depart 0 +cornel 0 +univers 0 +research 0 +interest 0 +environmentsprogram 0 +differenc 0 +mergingstat 0 +programsinterprocedur 0 +analysisresearch 0 +summarymi 0 +design 0 +implementationof 0 +tool 0 +help 0 +programm 0 +like 0 +exist 0 +would 0 +affectedbi 0 +propos 0 +modif 0 +structur 0 +betweentwo 0 +retest 0 +chang 0 +combin 0 +piec 0 +produc 0 +certainsemant 0 +guarante 0 +represent 0 +theprogram 0 +oper 0 +also 0 +interproceduraldataflow 0 +previou 0 +concentratedeith 0 +specif 0 +individu 0 +necessarili 0 +gener 0 +thoma 0 +mooli 0 +newalgorithm 0 +appli 0 +larg 0 +recent 0 +publicationsm 0 +shapiro 0 +fast 0 +accur 0 +flow 0 +insensit 0 +point 0 +appear 0 +fourth 0 +onprincipl 0 +pari 0 +franc 0 +demand 0 +sigsoft 0 +foundat 0 +softwareengin 0 +washington 0 +octob 0 +applic 0 +constantpropag 0 +sixth 0 +joint 0 +theoryand 0 +practic 0 +aarhu 0 +denmark 0 +reachabl 0 +second 0 +francisco 0 +bate 0 +increment 0 +test 0 +us 0 +twentieth 0 +charleston 0 +fourteenth 0 +conferenceon 0 +melbourn 0 +australia 0 +identifi 0 +aprogram 0 +sigplan 0 +languagedesign 0 +white 0 +plain 0 +june 0 +teach 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..32008726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid 1 +pagenam 1 +sidnei 1 +hummertoffic 1 +phone 1 +offic 1 +email 1 +hummert 1 +wisc 1 +edua 1 +postscript 1 +version 1 +resum 1 +pictur 1 +click 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..a87fd658 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,46 @@ +home 1 +alien 1 +construct 1 +mail 1 +univers 1 +wisconsin 1 +phone 1 +wisc 1 +igor 0 +ivanisev 0 +pageigorivanisev 0 +work 0 +newest 0 +project 0 +disclaim 0 +speak 0 +particular 0 +needless 0 +page 0 +ever 0 +feel 0 +like 0 +actual 0 +alreadi 0 +link 0 +research 0 +interest 0 +robot 0 +vision 0 +stuff 0 +generalgradu 0 +slave 0 +departmentwa 0 +undergrad 0 +drake 0 +math 0 +departmentaddress 0 +comput 0 +scienc 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +offic 0 +iigor 0 +eduiigor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..e9f8abef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..953d2b24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,37 @@ +sharenow 1 +comput 0 +meet 0 +offic 0 +section 0 +tuesdai 0 +home 0 +page 0 +wisc 0 +teach 0 +assist 0 +peterson 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +madisonmadison 0 +sciencestelephon 0 +hour 0 +thursdai 0 +pmsection 0 +pmboth 0 +class 0 +room 0 +sciencesc 0 +announcementshandoutsmoth 0 +jone 0 +profil 0 +recreat 0 +site 0 +pleas 0 +send 0 +email 0 +comment 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..4a1fc18a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,44 @@ +scienc 1 +research 0 +madison 0 +comput 0 +basneyjim 0 +basneygradu 0 +student 0 +assistantcomput 0 +departmentunivers 0 +wisconsin 0 +dayton 0 +email 0 +jbasnei 0 +wisc 0 +eduoffic 0 +statisticsoffic 0 +phone 0 +interest 0 +area 0 +oper 0 +system 0 +andnetwork 0 +current 0 +work 0 +condor 0 +directionof 0 +prof 0 +miron 0 +livni 0 +receiv 0 +fromoberlin 0 +colleg 0 +english 0 +webpag 0 +oberlin 0 +resum 0 +codefrom 0 +previou 0 +project 0 +avail 0 +onlin 0 +last 0 +modifi 0 +basnei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..a4faa3d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,83 @@ +jerel 1 +mackai 1 +comput 1 +work 1 +scienc 0 +databas 0 +system 0 +plai 0 +also 0 +home 0 +pagejerel 0 +assist 0 +research 0 +special 0 +oper 0 +fulltim 0 +univers 0 +wisconsin 0 +madison 0 +depart 0 +respons 0 +includ 0 +develop 0 +support 0 +sybas 0 +ingr 0 +instal 0 +backup 0 +softwar 0 +train 0 +student 0 +hourli 0 +electr 0 +guitar 0 +thrash 0 +metal 0 +specialti 0 +violin 0 +classic 0 +baroqu 0 +mainli 0 +seen 0 +error 0 +evil 0 +wai 0 +click 0 +shock 0 +case 0 +didn 0 +believ 0 +ey 0 +first 0 +time 0 +like 0 +record 0 +mostli 0 +funni 0 +cover 0 +stuff 0 +abba 0 +metallica 0 +origin 0 +soon 0 +abl 0 +sampl 0 +hit 0 +watch 0 +favorit 0 +show 0 +raquetbal 0 +golf 0 +shoot 0 +pool 0 +stand 0 +around 0 +towel 0 +yeah 0 +know 0 +much 0 +finger 0 +jerellast 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..ceb03a0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home 1 +page 1 +johan 1 +larson 1 +homepag 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..f6971487 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,131 @@ +page 1 +link 1 +cool 0 +pictur 0 +home 0 +frame 0 +year 0 +realli 0 +would 0 +default 0 +number 0 +paus 0 +imag 0 +stuff 0 +time 0 +friend 0 +homepag 0 +roomat 0 +notr 0 +dame 0 +search 0 +engin 0 +cano 0 +relief 0 +happi 0 +java 0 +good 0 +censorship 0 +mail 0 +jherro 0 +wisc 0 +note 0 +class 0 +made 0 +relax 0 +let 0 +thing 0 +apictur 0 +girlfriend 0 +half 0 +afraid 0 +date 0 +though 0 +almost 0 +mani 0 +tortur 0 +disembody 0 +becam 0 +float 0 +head 0 +death 0 +directori 0 +anim 0 +seri 0 +jpeg 0 +format 0 +name 0 +start 0 +millisecond 0 +overriden 0 +repeat 0 +sequenc 0 +explicit 0 +order 0 +vital 0 +inform 0 +aquir 0 +nicknam 0 +like 0 +take 0 +apolog 0 +lame 0 +pleas 0 +bear 0 +pretti 0 +jack 0 +skellington 0 +kermit 0 +frog 0 +interest 0 +someth 0 +els 0 +neat 0 +write 0 +haiku 0 +said 0 +thath 0 +go 0 +click 0 +mine 0 +grad 0 +memori 0 +forgotten 0 +cult 0 +hippothi 0 +exploratori 0 +intervent 0 +chaotic 0 +exist 0 +realiti 0 +follow 0 +enjoi 0 +benefit 0 +matriarch 0 +societi 0 +join 0 +todai 0 +exclus 0 +club 0 +hierarchi 0 +rule 0 +semi 0 +yahooooooooooooo 0 +work 0 +contain 0 +free 0 +softwar 0 +shack 0 +bazillion 0 +mpeg 0 +movi 0 +archiv 0 +great 0 +muppet 0 +sound 0 +rachel 0 +want 0 +select 0 +trip 0 +look 0 +bout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..fd34712f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,130 @@ +storag 1 +dewitt 1 +bing 0 +research 0 +public 0 +madison 0 +dbm 0 +databas 0 +system 0 +paradis 0 +queri 0 +data 0 +page 0 +gener 0 +inform 0 +advisor 0 +interest 0 +project 0 +pointer 0 +comput 0 +wisconsin 0 +jieb 0 +wisc 0 +tertiari 0 +manag 0 +object 0 +technolog 0 +patel 0 +kabra 0 +naughton 0 +submit 0 +octob 0 +process 0 +size 0 +appear 0 +septemb 0 +constraint 0 +februari 0 +client 0 +server 0 +proceed 0 +confer 0 +tenni 0 +pictur 0 +home 0 +index 0 +educ 0 +hobbi 0 +informationresearch 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +edueduc 0 +scienc 0 +univers 0 +prof 0 +david 0 +dewittresearch 0 +support 0 +parallel 0 +orient 0 +geograph 0 +systemsresearch 0 +shorepublicationsbuild 0 +scaleabl 0 +spatial 0 +implment 0 +evalu 0 +tuft 0 +burger 0 +hall 0 +ramasami 0 +lueder 0 +ellman 0 +kupsch 0 +execut 0 +batch 0 +prong 0 +approach 0 +effici 0 +tape 0 +resid 0 +set 0 +satellit 0 +imag 0 +studi 0 +impact 0 +tile 0 +perform 0 +nasa 0 +goddard 0 +conferenceon 0 +mass 0 +us 0 +tree 0 +goldstein 0 +ramakrishnan 0 +shaft 0 +shorter 0 +version 0 +workshop 0 +larg 0 +base 0 +santiago 0 +chile 0 +reclam 0 +reorgan 0 +serverpersist 0 +store 0 +yong 0 +ieee 0 +engin 0 +houston 0 +eosdi 0 +sigmod 0 +grouphobbi 0 +volleybal 0 +volleyballweb 0 +white 0 +water 0 +raft 0 +whitewat 0 +find 0 +click 0 +full 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..40f610d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,58 @@ +jignesh 1 +paradis 1 +publish 1 +home 0 +page 0 +research 0 +madison 0 +system 0 +databas 0 +relat 0 +public 0 +paper 0 +join 0 +patel 0 +wisc 0 +welcom 0 +assist 0 +depart 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +telephon 0 +advisor 0 +david 0 +dewitt 0 +interest 0 +parallel 0 +object 0 +current 0 +work 0 +project 0 +client 0 +server 0 +vldb 0 +partit 0 +base 0 +spatial 0 +merg 0 +sigmod 0 +accur 0 +model 0 +hybrid 0 +hash 0 +algorithm 0 +sigmetr 0 +miscellan 0 +stuff 0 +virtual 0 +tourist 0 +inlin 0 +skate 0 +madhuri 0 +kashmir 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..30676e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,10 @@ +page 1 +georg 0 +varghes 0 +peopl 0 +download 0 +netscap 0 +click 0 +warn 0 +pretti 0 +lame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..9f74f9ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,76 @@ +gehrk 1 +madison 1 +johann 0 +comput 0 +system 0 +time 0 +intern 0 +univers 0 +area 0 +interest 0 +inform 0 +public 0 +wisconsin 0 +scienc 0 +depart 0 +baruah 0 +plaxton 0 +share 0 +resourc 0 +real 0 +ieee 0 +version 0 +homepagejohann 0 +gehrkewelcom 0 +graduat 0 +studentat 0 +sciencesdepart 0 +ofwisconsin 0 +databasemanag 0 +work 0 +data 0 +mine 0 +underprofessor 0 +raghuramakrishnan 0 +page 0 +construct 0 +contact 0 +linkscontact 0 +email 0 +utexa 0 +offic 0 +west 0 +dayton 0 +street 0 +room 0 +home 0 +eagl 0 +height 0 +stoica 0 +abdel 0 +wahab 0 +jeffai 0 +proport 0 +alloc 0 +algorithmfor 0 +proceed 0 +symposium 0 +washington 0 +decemb 0 +appear 0 +anexpand 0 +fastschedul 0 +period 0 +task 0 +multipl 0 +inproceed 0 +parallel 0 +processingsymposium 0 +april 0 +expand 0 +avail 0 +technicalreport 0 +universityof 0 +texa 0 +austin 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..86c4de16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,71 @@ +madison 1 +home 0 +page 0 +comput 0 +student 0 +list 0 +appl 0 +site 0 +pagewelcom 0 +first 0 +year 0 +graduat 0 +univers 0 +wisconsin 0 +studi 0 +scienc 0 +us 0 +also 0 +maintain 0 +frequent 0 +ask 0 +question 0 +latest 0 +powerbook 0 +model 0 +releas 0 +thing 0 +look 0 +section 0 +click 0 +herei 0 +amass 0 +good 0 +number 0 +catagori 0 +check 0 +depart 0 +alma 0 +mater 0 +visit 0 +often 0 +needsth 0 +nando 0 +time 0 +great 0 +new 0 +coverageth 0 +spot 0 +mind 0 +numb 0 +soap 0 +operaish 0 +drivelziffnet 0 +industri 0 +newsc 0 +databas 0 +manag 0 +system 0 +construct 0 +compil 0 +keep 0 +classworktodai 0 +dilbert 0 +chucklejon 0 +bodner 0 +jonb 0 +wisc 0 +mound 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..58e1b0f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,85 @@ +depart 1 +scienc 0 +home 0 +madison 0 +comput 0 +student 0 +chines 0 +academi 0 +china 0 +nanj 0 +univers 0 +advanc 0 +network 0 +spring 0 +welcom 0 +page 0 +first 0 +year 0 +graduat 0 +came 0 +frombeij 0 +hometown 0 +capitol 0 +jiangsu 0 +provinc 0 +degre 0 +wisconsin 0 +institut 0 +autom 0 +beij 0 +specil 0 +pattern 0 +recognit 0 +imag 0 +process 0 +biomed 0 +engin 0 +southeast 0 +chinacurr 0 +activ 0 +cours 0 +topic 0 +databas 0 +manag 0 +oper 0 +system 0 +teach 0 +assist 0 +data 0 +structur 0 +current 0 +address 0 +work 0 +west 0 +dayton 0 +street 0 +tele 0 +offic 0 +could 0 +finger 0 +wisc 0 +refer 0 +inform 0 +class 0 +technic 0 +stuffjava 0 +placeshor 0 +tutorialchina 0 +affairchina 0 +democracybeij 0 +place 0 +interest 0 +stanford 0 +groupstanford 0 +medic 0 +informaticsmit 0 +commun 0 +control 0 +signal 0 +processingjob 0 +site 0 +newsyou 0 +visitor 0 +number 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..d57958fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,79 @@ +jose 1 +comput 0 +mercuri 0 +new 0 +home 0 +andnando 0 +jeff 0 +shabel 0 +offic 0 +scienc 0 +graduat 0 +view 0 +engin 0 +cupertino 0 +high 0 +school 0 +favorit 0 +columbia 0 +hous 0 +join 0 +pagech 0 +welcom 0 +page 0 +wisconsinch 0 +theme 0 +song 0 +hour 0 +tue 0 +thur 0 +appoint 0 +person 0 +informationmajor 0 +architectur 0 +emphasi 0 +statu 0 +second 0 +year 0 +student 0 +fall 0 +schedul 0 +academ 0 +background 0 +receiv 0 +diego 0 +electr 0 +depart 0 +town 0 +monta 0 +vista 0 +plan 0 +sport 0 +team 0 +golden 0 +state 0 +warrior 0 +basketbal 0 +shark 0 +hockei 0 +francisco 0 +footbal 0 +oakland 0 +link 0 +newsmus 0 +find 0 +deal 0 +also 0 +tip 0 +info 0 +music 0 +club 0 +miscellan 0 +print 0 +postscript 0 +document 0 +window 0 +send 0 +mail 0 +jshabel 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..08915ef2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,193 @@ +data 1 +disk 0 +report 0 +tape 0 +livni 0 +perform 0 +technolog 0 +larg 0 +proceed 0 +confer 0 +jussi 0 +comput 0 +depart 0 +univers 0 +tertiari 0 +miron 0 +join 0 +home 0 +myllymaki 0 +wisconsin 0 +explor 0 +structur 0 +visual 0 +submit 0 +research 0 +wisc 0 +analysi 0 +includ 0 +buffer 0 +dataset 0 +storageto 0 +recent 0 +relat 0 +appli 0 +organ 0 +set 0 +andtap 0 +intern 0 +integr 0 +parallel 0 +theintern 0 +engin 0 +ramakrishnan 0 +synchron 0 +technic 0 +master 0 +yoav 0 +weiss 0 +class 0 +link 0 +scsi 0 +digit 0 +myllymakijussi 0 +assist 0 +scienc 0 +west 0 +dayton 0 +street 0 +madison 0 +telephon 0 +email 0 +eduresearch 0 +summaryi 0 +interest 0 +dbm 0 +oper 0 +onadvanc 0 +arrai 0 +mcurrent 0 +studi 0 +memori 0 +us 0 +andvisu 0 +deviseproject 0 +advisor 0 +prof 0 +mironlivni 0 +work 0 +improv 0 +joinsof 0 +volum 0 +resid 0 +public 0 +listbelow 0 +solv 0 +problem 0 +associ 0 +divers 0 +characterist 0 +andfunct 0 +limit 0 +media 0 +paper 0 +datavisu 0 +discuss 0 +metadata 0 +managementissu 0 +complex 0 +involv 0 +refere 0 +publicationseffici 0 +concurr 0 +theori 0 +measur 0 +evalu 0 +commun 0 +system 0 +octob 0 +programperform 0 +karen 0 +karavan 0 +bartonp 0 +miller 0 +third 0 +workshop 0 +environ 0 +andtool 0 +scientif 0 +august 0 +tertiarystorag 0 +daniel 0 +ford 0 +februari 0 +alsoavail 0 +almaden 0 +withmiron 0 +raghu 0 +spie 0 +societi 0 +optic 0 +januari 0 +access 0 +acmsigmetr 0 +publicationdevis 0 +queri 0 +beyer 0 +chen 0 +donjerkov 0 +lawand 0 +wenger 0 +sigmod 0 +storag 0 +andmiron 0 +dataengin 0 +publicationsdisk 0 +tapeaccess 0 +project 0 +degreeproject 0 +client 0 +server 0 +model 0 +networkarchitectur 0 +thesi 0 +helsinki 0 +industri 0 +manag 0 +finnish 0 +documentsimplement 0 +treealgorithm 0 +jeff 0 +schwarz 0 +experi 0 +implement 0 +filesystem 0 +trishul 0 +chilimbi 0 +overview 0 +current 0 +productsoverview 0 +raid 0 +supplier 0 +productssom 0 +frequent 0 +need 0 +unifi 0 +search 0 +adaptec 0 +adapt 0 +alpha 0 +workstationsandpcsandtechn 0 +journaland 0 +whitepap 0 +researchandcyberjourn 0 +quantum 0 +linear 0 +tapeanddlt 0 +faqandwhitepap 0 +solarisandsparcstationsandtechn 0 +faqandstorag 0 +faqand 0 +otherusenet 0 +faqsmani 0 +found 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..e8e7b2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag 1 +jyothithi 1 +page 1 +construct 1 +info 1 +student 1 +cours 1 +grade 1 +other 1 +sorri 1 +dissappoint 1 +email 1 +jyothi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..16947a90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,88 @@ +comput 1 +school 0 +madison 0 +scienc 0 +high 0 +karen 0 +parallel 0 +perform 0 +support 0 +ship 0 +karavaniceveryth 0 +need 0 +know 0 +learn 0 +public 0 +karavanicresearch 0 +assist 0 +paradyn 0 +tool 0 +project 0 +univers 0 +wisconsin 0 +depart 0 +west 0 +dayton 0 +street 0 +karavan 0 +wisc 0 +current 0 +pursu 0 +research 0 +interest 0 +includ 0 +environ 0 +autom 0 +tune 0 +process 0 +oper 0 +system 0 +databasesask 0 +women 0 +wic 0 +frontier 0 +cool 0 +program 0 +dane 0 +counti 0 +studentstrio 0 +student 0 +servic 0 +free 0 +tutor 0 +undergradu 0 +miss 0 +site 0 +page 0 +could 0 +save 0 +life 0 +safer 0 +pagefor 0 +chocol 0 +lover 0 +onlystuyves 0 +alumni 0 +associationstuyves 0 +class 0 +thoma 0 +legisl 0 +inform 0 +internetth 0 +constitut 0 +cure 0 +anyth 0 +salt 0 +water 0 +sweat 0 +tear 0 +isak 0 +dinesen 0 +port 0 +safe 0 +sail 0 +thing 0 +admir 0 +grace 0 +hopper 0 +pioneer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..94fe465d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,109 @@ +extens 1 +stefano 0 +kaxira 0 +jame 0 +kiloprocessor 0 +intern 0 +wisconsin 0 +memori 0 +coher 0 +cach 0 +parallel 0 +glow 0 +hierarch 0 +goodman 0 +perform 0 +comput 0 +papakonstantin 0 +research 0 +interest 0 +summari 0 +recent 0 +share 0 +design 0 +processor 0 +architectur 0 +appear 0 +proceed 0 +workshop 0 +base 0 +high 0 +cost 0 +softwar 0 +synthesi 0 +us 0 +prolog 0 +tsanaka 0 +home 0 +page 0 +wisc 0 +editor 0 +ieee 0 +sciresearch 0 +assist 0 +univers 0 +publicationsresearch 0 +multiprocess 0 +scalabl 0 +interfac 0 +aspect 0 +galileo 0 +introduc 0 +collaborationwith 0 +work 0 +examin 0 +depth 0 +option 0 +develop 0 +upcom 0 +standard 0 +incolabor 0 +david 0 +stein 0 +gjess 0 +public 0 +protocol 0 +wide 0 +data 0 +goodmanto 0 +confer 0 +supercomput 0 +also 0 +technic 0 +report 0 +kaxirasto 0 +process 0 +symposium 0 +april 0 +implement 0 +wind 0 +tunnel 0 +goodmannd 0 +march 0 +goodmanst 0 +august 0 +kaxirasunivers 0 +scienc 0 +dept 0 +juli 0 +tool 0 +simul 0 +prototyp 0 +monitor 0 +multiprocessor 0 +system 0 +stafylopati 0 +kaxirasinform 0 +technolog 0 +autom 0 +dedic 0 +specif 0 +pekmestzi 0 +kaxirasp 0 +greec 0 +hardwar 0 +methodolog 0 +kaxirasmicroprocess 0 +microprogram 0 +north 0 +holland 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..10a43184 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,251 @@ +page 1 +caltech 0 +inform 0 +probabl 0 +comput 0 +slide 0 +like 0 +world 0 +make 0 +us 0 +would 0 +databas 0 +talk 0 +gave 0 +index 0 +quot 0 +keeper 0 +home 0 +steven 0 +everyth 0 +made 0 +note 0 +markup 0 +look 0 +peopl 0 +allow 0 +start 0 +year 0 +graduat 0 +student 0 +ever 0 +leav 0 +surpris 0 +aliv 0 +want 0 +hedgehog 0 +contact 0 +pager 0 +write 0 +number 0 +list 0 +project 0 +insid 0 +user 0 +foughtthei 0 +heaven 0 +perfect 0 +littl 0 +doesn 0 +realli 0 +need 0 +light 0 +lauri 0 +anderson 0 +strang 0 +angel 0 +possibl 0 +structur 0 +indic 0 +piec 0 +rather 0 +access 0 +bitmap 0 +displai 0 +includ 0 +theblind 0 +standard 0 +tag 0 +whateverbrows 0 +wish 0 +also 0 +literari 0 +convent 0 +ironi 0 +satir 0 +sarcasm 0 +butnoth 0 +contain 0 +herein 0 +meant 0 +offens 0 +areoffend 0 +stupid 0 +third 0 +scienc 0 +depart 0 +firsttwo 0 +week 0 +sinc 0 +support 0 +take 0 +care 0 +varieti 0 +machin 0 +vari 0 +degre 0 +success 0 +side 0 +never 0 +publish 0 +done 0 +anyth 0 +impress 0 +miracl 0 +pass 0 +prelim 0 +research 0 +addup 0 +hill 0 +bean 0 +fizzl 0 +result 0 +areobtain 0 +junior 0 +level 0 +programm 0 +creatingkiosk 0 +front 0 +end 0 +visual 0 +primit 0 +system 0 +perman 0 +skill 0 +free 0 +discov 0 +especi 0 +wasn 0 +convinc 0 +thosewho 0 +know 0 +well 0 +argu 0 +proof 0 +mybe 0 +anywai 0 +might 0 +read 0 +thoughtson 0 +electron 0 +mail 0 +reliabl 0 +specif 0 +locat 0 +often 0 +work 0 +ifyou 0 +person 0 +someth 0 +address 0 +wisc 0 +becom 0 +clear 0 +wantto 0 +short 0 +notic 0 +give 0 +variou 0 +creation 0 +thought 0 +todo 0 +updat 0 +sporad 0 +associ 0 +rsum 0 +postscript 0 +document 0 +html 0 +section 0 +long 0 +outof 0 +date 0 +unfortun 0 +danenet 0 +dilhr 0 +jobnet 0 +soon 0 +defunct 0 +institut 0 +archiv 0 +photonet 0 +personnel 0 +directori 0 +much 0 +better 0 +anyon 0 +enter 0 +databaseus 0 +form 0 +interfac 0 +distribut 0 +object 0 +call 0 +java 0 +danger 0 +love 0 +come 0 +hell 0 +freez 0 +rate 0 +break 0 +hierarchi 0 +consult 0 +somewher 0 +fought 0 +unifi 0 +attribut 0 +sfuai 0 +informationag 0 +intellectu 0 +properti 0 +assigna 0 +uniqu 0 +serial 0 +refer 0 +atth 0 +provid 0 +sourc 0 +contextu 0 +pointer 0 +relev 0 +bui 0 +adob 0 +distil 0 +translat 0 +rsuminto 0 +chanc 0 +ofread 0 +suppos 0 +print 0 +pinch 0 +certaintruth 0 +psycholog 0 +softwar 0 +eventuallypick 0 +aren 0 +taught 0 +explicitli 0 +think 0 +possibleto 0 +easier 0 +cheap 0 +shot 0 +thing 0 +hate 0 +idea 0 +mull 0 +accessibleto 0 +small 0 +subset 0 +tough 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..86244ab8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,24 @@ +perform 1 +krishna 0 +kunchithapadamkrishna 0 +kunchithapadamgreet 0 +welcom 0 +page 0 +interest 0 +read 0 +languag 0 +indian 0 +classic 0 +music 0 +miscellaneouspubl 0 +data 0 +distribut 0 +steer 0 +toolsresum 0 +gzip 0 +postscript 0 +contact 0 +search 0 +last 0 +modifi 0 +bykk 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..e9046bcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,31 @@ +tuft 1 +madison 0 +kristin 0 +home 0 +research 0 +scienc 0 +wisc 0 +inform 0 +pagekristin 0 +assist 0 +comput 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +eduadvisor 0 +david 0 +dewitt 0 +miscellani 0 +serveruw 0 +dbm 0 +groupacm 0 +sigmod 0 +server 0 +pageeo 0 +project 0 +officelast 0 +modifi 0 +tuftekristin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..134b8eb6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,47 @@ +krung 1 +comput 1 +page 0 +inform 0 +follow 0 +cours 0 +work 0 +scienc 0 +depart 0 +person 0 +madison 0 +wisconsin 0 +homepageupd 0 +novemb 0 +homepag 0 +underconstructioni 0 +keep 0 +short 0 +good 0 +serf 0 +year 0 +cometh 0 +relat 0 +topic 0 +research 0 +mathemat 0 +program 0 +project 0 +pursu 0 +compani 0 +favorit 0 +hobbi 0 +opinion 0 +life 0 +linkedth 0 +import 0 +link 0 +univers 0 +whole 0 +uniqu 0 +entiti 0 +electron 0 +librari 0 +system 0 +sinapiromsaran 0 +emailkrung 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..f76cf4b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,137 @@ +kunen 1 +autom 0 +mathemat 0 +logic 0 +comput 0 +theori 0 +reason 0 +appear 0 +univers 0 +hart 0 +wisconsin 0 +deduct 0 +program 0 +applic 0 +semant 0 +measur 0 +axiom 0 +wisc 0 +interest 0 +topolog 0 +research 0 +work 0 +theorem 0 +singl 0 +group 0 +expon 0 +fundamenta 0 +mathematica 0 +technic 0 +report 0 +quasigroup 0 +algebra 0 +loop 0 +preprint 0 +review 0 +math 0 +home 0 +page 0 +kenneth 0 +professormath 0 +scienc 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +stanford 0 +summari 0 +involv 0 +typic 0 +tool 0 +like 0 +resolutionto 0 +prove 0 +studi 0 +languag 0 +likeprolog 0 +specif 0 +topic 0 +consid 0 +prologus 0 +negat 0 +failur 0 +incompat 0 +betweenleast 0 +fix 0 +point 0 +prolog 0 +style 0 +backtrack 0 +axiomat 0 +besid 0 +right 0 +thissubject 0 +relat 0 +variou 0 +abstract 0 +area 0 +theoret 0 +mani 0 +basic 0 +question 0 +turn 0 +independ 0 +usualaxiom 0 +select 0 +recent 0 +public 0 +follow 0 +postscript 0 +file 0 +shortest 0 +ramsei 0 +boyer 0 +moor 0 +mill 0 +corson 0 +compact 0 +space 0 +local 0 +constant 0 +function 0 +answer 0 +liter 0 +construct 0 +moufang 0 +associ 0 +law 0 +structur 0 +conjugaci 0 +close 0 +complet 0 +result 0 +link 0 +resolut 0 +press 0 +weak 0 +extens 0 +rough 0 +draft 0 +book 0 +note 0 +moschovaki 0 +american 0 +monthli 0 +cours 0 +taught 0 +fall 0 +geometr 0 +infer 0 +foundat 0 +spring 0 +comp 0 +artifici 0 +intellig 0 +last 0 +chang 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..3e76dfed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,245 @@ +laru 1 +program 0 +jame 0 +parallel 0 +languag 0 +research 0 +memori 0 +compil 0 +support 0 +system 0 +comput 0 +softwar 0 +share 0 +david 0 +wood 0 +mark 0 +hill 0 +confer 0 +intern 0 +architectur 0 +tool 0 +august 0 +wisconsin 0 +project 0 +profil 0 +execut 0 +edit 0 +data 0 +richard 0 +control 0 +employ 0 +wisc 0 +interest 0 +machin 0 +wind 0 +tunnel 0 +grain 0 +ball 0 +brad 0 +guhan 0 +viswanathan 0 +sigplan 0 +implement 0 +novemb 0 +alvin 0 +lebeck 0 +steven 0 +reinhardt 0 +sixth 0 +forprogram 0 +oper 0 +asplo 0 +octob 0 +first 0 +develop 0 +juli 0 +educ 0 +cours 0 +recent 0 +univers 0 +california 0 +berkelei 0 +colleg 0 +design 0 +larg 0 +path 0 +thoma 0 +appear 0 +micro 0 +protocol 0 +satish 0 +chandra 0 +pldi 0 +eric 0 +schnarr 0 +effici 0 +applic 0 +distribut 0 +symposium 0 +user 0 +level 0 +babak 0 +falsafi 0 +ioanni 0 +schoina 0 +ann 0 +roger 0 +annot 0 +hardwar 0 +lorenz 0 +bell 0 +lab 0 +flow 0 +perform 0 +home 0 +page 0 +associ 0 +professor 0 +sciencedepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usalaru 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +offic 0 +upcom 0 +paper 0 +graduat 0 +summari 0 +harvard 0 +languagesand 0 +particular 0 +symbol 0 +trace 0 +librari 0 +structuresc 0 +java 0 +spim 0 +wartsrec 0 +paperseffici 0 +decemb 0 +programminglanguag 0 +gregori 0 +wilson 0 +us 0 +press 0 +teapot 0 +write 0 +coher 0 +instruct 0 +schedul 0 +andjam 0 +workshop 0 +wcsss 0 +februari 0 +irregular 0 +shubhendu 0 +mukherje 0 +shamik 0 +sharma 0 +annerog 0 +joel 0 +saltz 0 +fifth 0 +principl 0 +practiceof 0 +ppopp 0 +independ 0 +languagesdesign 0 +june 0 +tempest 0 +substrat 0 +portabl 0 +compcon 0 +spring 0 +march 0 +static 0 +branch 0 +frequenc 0 +analysi 0 +youfeng 0 +annual 0 +ieee 0 +microarchitectur 0 +specif 0 +markhil 0 +supercomput 0 +time 0 +spent 0 +messag 0 +pass 0 +fine 0 +access 0 +jameslaru 0 +cachier 0 +automat 0 +insert 0 +cico 0 +trishul 0 +chilimbi 0 +icpp 0 +bibliographi 0 +unpublish 0 +manuscript 0 +revis 0 +frequent 0 +cooper 0 +scalabl 0 +multiprocessor 0 +transact 0 +toc 0 +wart 0 +madhusudhan 0 +talluri 0 +new 0 +graduatesbrad 0 +vassar 0 +septemb 0 +techniqu 0 +languagesfirst 0 +oracl 0 +huelsbergen 0 +dynam 0 +depend 0 +tball 0 +summarymi 0 +focus 0 +problem 0 +part 0 +thewisconsin 0 +havehelp 0 +hybrid 0 +computerarchitectur 0 +facilit 0 +parallelmachin 0 +current 0 +student 0 +demonstr 0 +exploit 0 +power 0 +coherencepolici 0 +also 0 +evalu 0 +help 0 +programmersunderstand 0 +improv 0 +andi 0 +algorithm 0 +provid 0 +moredetail 0 +understand 0 +within 0 +routin 0 +hasidentifi 0 +possibl 0 +better 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..68106d8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick 1 +pagenick 1 +pageoffic 1 +phone 1 +email 1 +leavi 1 +wisc 1 +eduoffic 1 +hour 1 +tuesdai 1 +wednessdai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..5b8b2966 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,77 @@ +lederman 1 +steven 0 +huss 0 +research 0 +madison 0 +forum 0 +home 0 +page 0 +interest 0 +univ 0 +wisconsin 0 +also 0 +mpistandard 0 +book 0 +inform 0 +complet 0 +sourc 0 +file 0 +comput 0 +scienc 0 +wisc 0 +includ 0 +relat 0 +thewisconsin 0 +wind 0 +tunnel 0 +project 0 +area 0 +parallel 0 +linear 0 +algebra 0 +iscov 0 +prismproject 0 +heavili 0 +invol 0 +sever 0 +other 0 +recent 0 +publish 0 +origin 0 +order 0 +press 0 +isbn 0 +look 0 +refer 0 +editor 0 +current 0 +draft 0 +pleas 0 +keep 0 +mind 0 +work 0 +ongo 0 +andit 0 +document 0 +intend 0 +ongoingwork 0 +committe 0 +member 0 +compress 0 +postscript 0 +compressedtar 0 +individu 0 +avail 0 +would 0 +finger 0 +dept 0 +dayton 0 +phone 0 +messag 0 +desper 0 +mail 0 +http 0 +html 0 +offic 0 +statist 0 +build 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..c33aaf74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,84 @@ +network 1 +design 0 +control 0 +virtual 0 +proceed 0 +lawrenc 0 +landweb 0 +comput 0 +univers 0 +wisconsin 0 +protocol 0 +high 0 +speed 0 +research 0 +project 0 +implement 0 +congest 0 +admiss 0 +infocom 0 +confer 0 +dynam 0 +time 0 +window 0 +faber 0 +mukherje 0 +loop 0 +home 0 +page 0 +professor 0 +scienc 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +email 0 +wisc 0 +purdu 0 +interest 0 +electronicmail 0 +summari 0 +program 0 +focus 0 +participatingin 0 +gigabit 0 +darpa 0 +nation 0 +involvesth 0 +testb 0 +oper 0 +atgigabit 0 +second 0 +data 0 +rate 0 +work 0 +onissu 0 +visualizationof 0 +atmospher 0 +phenomena 0 +conferenc 0 +sampl 0 +recent 0 +public 0 +fast 0 +circuit 0 +establishmentmethod 0 +olsen 0 +theieee 0 +francisco 0 +april 0 +packet 0 +feedback 0 +witht 0 +sigcommconfer 0 +baltimor 0 +august 0 +gener 0 +clock 0 +combin 0 +close 0 +open 0 +ieee 0 +florenc 0 +coursesconnect 0 +tabl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..b5e62be4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,49 @@ +comput 1 +scienc 0 +lloyd 0 +univers 0 +depart 0 +linguist 0 +shannon 0 +madison 0 +comp 0 +utah 0 +languag 0 +home 0 +page 0 +work 0 +address 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +wisc 0 +respons 0 +us 0 +lectur 0 +section 0 +offic 0 +hour 0 +wednesdai 0 +thursdai 0 +appoint 0 +fall 0 +cours 0 +construct 0 +compil 0 +variou 0 +link 0 +women 0 +chemistri 0 +person 0 +engin 0 +career 0 +servic 0 +archiv 0 +natur 0 +process 0 +artifici 0 +intellig 0 +cognit 0 +xsoft 0 +lexdemo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..c3db08f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,193 @@ +page 1 +option 1 +cool 0 +fill 0 +luka 0 +fall 0 +also 0 +mail 0 +list 0 +click 0 +entertain 0 +check 0 +thing 0 +lone 0 +meet 0 +women 0 +interact 0 +realli 0 +like 0 +know 0 +feel 0 +search 0 +checkbox 0 +includ 0 +christoph 0 +home 0 +pagechristoph 0 +lukasrelev 0 +inform 0 +offic 0 +phone 0 +email 0 +wisc 0 +edui 0 +appar 0 +coordin 0 +mspl 0 +workshipi 0 +defeat 0 +rival 0 +quest 0 +becom 0 +semest 0 +sunivers 0 +wisconsin 0 +program 0 +languag 0 +seminar 0 +czar 0 +cours 0 +go 0 +take 0 +festiv 0 +java 0 +taught 0 +advisor 0 +tuft 0 +univers 0 +site 0 +save 0 +tiger 0 +number 0 +free 0 +prisonerthi 0 +stock 0 +quoteserv 0 +maintain 0 +fabul 0 +wealth 0 +todd 0 +amus 0 +friend 0 +pagebet 0 +polit 0 +candid 0 +legal 0 +iowa 0 +electron 0 +market 0 +identitycaptain 0 +kirk 0 +sing 0 +gui 0 +troubl 0 +throughamaz 0 +technolog 0 +longer 0 +need 0 +concern 0 +withtri 0 +real 0 +virtual 0 +girlfriend 0 +traci 0 +teri 0 +wait 0 +wife 0 +incred 0 +jump 0 +catthi 0 +anywai 0 +well 0 +pleas 0 +send 0 +case 0 +someth 0 +current 0 +name 0 +address 0 +favorit 0 +appli 0 +killer 0 +buttmunchextrem 0 +dudemichael 0 +nesmith 0 +fanfoolmyth 0 +figurewick 0 +good 0 +basketbal 0 +playervalu 0 +studentment 0 +defectivea 0 +wkrp 0 +cincinatti 0 +tragic 0 +figuregeek 0 +tradesgonzo 0 +admirernetscap 0 +junki 0 +child 0 +pornpersonifi 0 +organ 0 +condom 0 +stretch 0 +much 0 +readi 0 +blowflam 0 +testicl 0 +outer 0 +space 0 +tast 0 +goodpoetri 0 +guruhogwildthi 0 +kick 0 +assman 0 +manbig 0 +dudeuh 0 +ohprofession 0 +muff 0 +diverregress 0 +higher 0 +lifeformherald 0 +alien 0 +invas 0 +forcechri 0 +html 0 +formsalienherpetophiletodd 0 +turnidg 0 +hatth 0 +mancreepi 0 +laugh 0 +headsmal 0 +planetdr 0 +companioneast 0 +bunnycyberweenietcl 0 +hellbeast 0 +simpli 0 +submit 0 +reload 0 +mayb 0 +figur 0 +automat 0 +keyword 0 +interest 0 +superhighwai 0 +drug 0 +cosmo 0 +irrit 0 +gross 0 +nake 0 +scatolog 0 +pervert 0 +offspr 0 +food 0 +etymolog 0 +phat 0 +gnarli 0 +bogu 0 +wierd 0 +cybermuffin 0 +pictur 0 +erotica 0 +chees 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..61d0b683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,84 @@ +comput 1 +univers 1 +scienc 0 +home 0 +page 0 +want 0 +wuhan 0 +dept 0 +ling 0 +zheng 0 +madison 0 +mail 0 +know 0 +click 0 +research 0 +paradyn 0 +take 0 +look 0 +alumni 0 +best 0 +graduat 0 +welcom 0 +depart 0 +sheboygan 0 +dayton 0 +offic 0 +phone 0 +lzheng 0 +wisc 0 +shameless 0 +self 0 +promot 0 +resum 0 +text 0 +version 0 +side 0 +interest 0 +assist 0 +group 0 +current 0 +hack 0 +onto 0 +hpux 0 +port 0 +boss 0 +barton 0 +miller 0 +also 0 +charg 0 +chinaand 0 +girlfriend 0 +pictur 0 +temporarili 0 +architectur 0 +educ 0 +prese 0 +winsconsin 0 +iowa 0 +officem 0 +marcelo 0 +goncalv 0 +ignor 0 +china 0 +place 0 +surf 0 +compani 0 +hereif 0 +school 0 +sthe 0 +infom 0 +could 0 +america 0 +schoolssend 0 +suggest 0 +homepag 0 +bother 0 +thank 0 +last 0 +updat 0 +march 0 +visitor 0 +number 0 +sinc 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..6b3c4619 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,44 @@ +know 1 +manuvir 0 +look 0 +like 0 +golf 0 +home 0 +pagemanuvir 0 +dasnow 0 +name 0 +andwhat 0 +hello 0 +feelfre 0 +around 0 +need 0 +inform 0 +somethingsend 0 +email 0 +passion 0 +anact 0 +photo 0 +later 0 +manuvirwhat 0 +gener 0 +start 0 +advisor 0 +better 0 +thisto 0 +keep 0 +monei 0 +come 0 +turn 0 +theorigin 0 +america 0 +team 0 +cours 0 +leagu 0 +plai 0 +dai 0 +sundai 0 +round 0 +final 0 +consin 0 +said 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..5110c5ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,394 @@ +comput 1 +hill 0 +mark 0 +architectur 0 +memori 0 +system 0 +wisconsin 0 +david 0 +wood 0 +parallel 0 +page 0 +research 0 +jame 0 +perform 0 +share 0 +laru 0 +intern 0 +sarita 0 +adv 0 +scienc 0 +hardwar 0 +june 0 +symposium 0 +cach 0 +wind 0 +tunnel 0 +isca 0 +home 0 +univers 0 +address 0 +project 0 +juli 0 +implement 0 +support 0 +confer 0 +talluri 0 +ieee 0 +transact 0 +august 0 +current 0 +inform 0 +recent 0 +engin 0 +program 0 +tabl 0 +oper 0 +shubhendu 0 +mukherje 0 +distribut 0 +madhusudhan 0 +alvin 0 +lebeck 0 +steven 0 +reinhardt 0 +kessler 0 +model 0 +wisc 0 +offic 0 +like 0 +data 0 +advanc 0 +languag 0 +machin 0 +design 0 +level 0 +space 0 +interfac 0 +softwar 0 +supercomput 0 +subblock 0 +babak 0 +falsafi 0 +simul 0 +novemb 0 +sigmetr 0 +consist 0 +first 0 +employ 0 +email 0 +markhil 0 +associ 0 +professor 0 +content 0 +hour 0 +interest 0 +sampler 0 +us 0 +group 0 +tool 0 +wart 0 +patterson 0 +spec 0 +benchmark 0 +suit 0 +california 0 +berkelei 0 +high 0 +larg 0 +analysi 0 +requir 0 +work 0 +expect 0 +workstat 0 +process 0 +compil 0 +tempest 0 +madhu 0 +translat 0 +cluster 0 +base 0 +tlb 0 +experiment 0 +coher 0 +applic 0 +ann 0 +roger 0 +protocol 0 +superpag 0 +comparison 0 +trace 0 +sampl 0 +multi 0 +megabyt 0 +cooper 0 +multiprocessor 0 +lewi 0 +weak 0 +rice 0 +richard 0 +crai 0 +pagemark 0 +andelectr 0 +engineeringat 0 +wisconsint 0 +teach 0 +catalog 0 +educ 0 +andsummari 0 +paper 0 +graduateslink 0 +world 0 +wide 0 +stuff 0 +oralpresent 0 +advic 0 +includ 0 +show 0 +give 0 +talk 0 +onlin 0 +forcach 0 +proof 0 +sound 0 +depart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usamarkhil 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +fall 0 +mondai 0 +wednesdai 0 +appoint 0 +educurr 0 +teachingfal 0 +ifal 0 +topic 0 +java 0 +icatalog 0 +cours 0 +teachc 0 +organ 0 +programmingc 0 +introduct 0 +architecturec 0 +iieduc 0 +michigan 0 +evaluationresearch 0 +summarymi 0 +target 0 +multiprocessorsand 0 +uniprocessor 0 +import 0 +determin 0 +sustain 0 +mywork 0 +emphas 0 +quantit 0 +often 0 +evaluationtechniqu 0 +much 0 +part 0 +windtunnel 0 +projectwith 0 +prof 0 +manystud 0 +futur 0 +massiv 0 +computerswil 0 +built 0 +node 0 +levelparallel 0 +inwhich 0 +uniformli 0 +refer 0 +seek 0 +todevelop 0 +consensu 0 +middl 0 +languagesand 0 +recentlypropos 0 +enabl 0 +programm 0 +librari 0 +messag 0 +pass 0 +transpar 0 +hybrid 0 +combin 0 +aredevelop 0 +think 0 +aclust 0 +hypothet 0 +platform 0 +name 0 +toolsto 0 +cull 0 +manner 0 +similarto 0 +aeronaut 0 +convent 0 +designairplan 0 +talluritarget 0 +improv 0 +lookasid 0 +buffer 0 +align 0 +option 0 +chang 0 +complet 0 +superpagesand 0 +partial 0 +asplosandsosppap 0 +papersth 0 +annot 0 +bibliographi 0 +unpublish 0 +manuscript 0 +revis 0 +frequent 0 +bidirect 0 +technolog 0 +transfer 0 +sabbat 0 +industri 0 +network 0 +fine 0 +grain 0 +commun 0 +anddavid 0 +optimist 0 +execut 0 +sashikanth 0 +chandrasekaran 0 +workshop 0 +pad 0 +yousef 0 +khalidi 0 +princip 0 +sosp 0 +decemb 0 +presidenti 0 +young 0 +investig 0 +award 0 +final 0 +report 0 +effici 0 +irregular 0 +shamik 0 +sharma 0 +joel 0 +saltz 0 +ppopp 0 +cost 0 +effect 0 +februari 0 +solv 0 +microstructur 0 +electrostat 0 +propos 0 +frank 0 +traenkl 0 +sangta 0 +chemic 0 +specif 0 +user 0 +ioanni 0 +schoina 0 +surpass 0 +less 0 +forprogram 0 +asplo 0 +octob 0 +evalu 0 +directori 0 +medium 0 +scale 0 +memorymultiprocessor 0 +techniqu 0 +scalabl 0 +toc 0 +new 0 +jeffrei 0 +dionisio 0 +pnevmatikato 0 +alan 0 +smith 0 +micro 0 +unifi 0 +formal 0 +four 0 +tpd 0 +implic 0 +toler 0 +fault 0 +andrea 0 +farid 0 +pour 0 +march 0 +mechan 0 +satish 0 +chandra 0 +subbarao 0 +palacharla 0 +virtual 0 +prototyp 0 +placement 0 +algorithm 0 +real 0 +index 0 +differ 0 +kourosh 0 +gharachorloo 0 +anoop 0 +gupta 0 +john 0 +hennessi 0 +journal 0 +tradeoff 0 +size 0 +shing 0 +kong 0 +detect 0 +race 0 +barton 0 +miller 0 +robert 0 +netzer 0 +scheme 0 +vikram 0 +mari 0 +vernon 0 +estim 0 +miss 0 +ratio 0 +kessleracm 0 +stack 0 +highli 0 +extend 0 +abstract 0 +sequenti 0 +order 0 +definit 0 +graduatesmadhusudhan 0 +hierarchi 0 +microsystem 0 +assist 0 +secondari 0 +click 0 +last 0 +updatedw 0 +keyword 0 +help 0 +search 0 +rank 0 +higher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..7bca8f2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,18 @@ +marko 1 +page 0 +wisc 0 +home 0 +zaharioudaki 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +mail 0 +note 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..800ce5c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,13 @@ +home 1 +page 1 +michael 0 +birk 0 +section 0 +project 0 +list 0 +program 0 +languag 0 +link 0 +alltraxx 0 +mbirk 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..081ffca7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,55 @@ +mcauliff 1 +mark 0 +solomon 0 +madison 0 +carei 0 +sigmod 0 +marvin 0 +proceed 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +wisc 0 +research 0 +interest 0 +design 0 +implement 0 +object 0 +orient 0 +databas 0 +system 0 +public 0 +dewitt 0 +franklin 0 +hall 0 +naughton 0 +schuh 0 +tsatalo 0 +white 0 +zwill 0 +shoringup 0 +persist 0 +applic 0 +proc 0 +atrac 0 +base 0 +simul 0 +pointer 0 +swizzl 0 +techniqu 0 +ieee 0 +data 0 +engin 0 +march 0 +michael 0 +towardseffect 0 +effici 0 +free 0 +space 0 +manag 0 +appear 0 +confer 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..47817b73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,116 @@ +marc 1 +shapiro 0 +wisc 0 +page 0 +tautolog 0 +languag 0 +fast 0 +pointer 0 +think 0 +program 0 +html 0 +madison 0 +hous 0 +believ 0 +current 0 +obsess 0 +fond 0 +disappear 0 +fear 0 +repuls 0 +ponder 0 +analys 0 +watch 0 +lot 0 +jacki 0 +chan 0 +movi 0 +design 0 +read 0 +much 0 +try 0 +teach 0 +elementari 0 +school 0 +student 0 +term 0 +recurs 0 +hope 0 +interrupt 0 +hoar 0 +wrote 0 +introduct 0 +high 0 +level 0 +step 0 +backward 0 +never 0 +recov 0 +home 0 +schedul 0 +todd 0 +automat 0 +accid 0 +gener 0 +elain 0 +dimasi 0 +twisti 0 +littl 0 +amanda 0 +peet 0 +retreather 0 +hyper 0 +mode 0 +emac 0 +thepul 0 +menu 0 +doesn 0 +cool 0 +tag 0 +submiss 0 +softwarei 0 +cobbl 0 +togeth 0 +pldi 0 +abl 0 +work 0 +nowinclud 0 +previous 0 +mostli 0 +miss 0 +file 0 +submit 0 +popl 0 +paper 0 +accur 0 +flow 0 +insensit 0 +point 0 +analysi 0 +shapiroand 0 +susan 0 +horwitz 0 +appear 0 +symposium 0 +principl 0 +variou 0 +address 0 +dept 0 +dayton 0 +mail 0 +talk 0 +finger 0 +marion 0 +list 0 +peopl 0 +know 0 +realli 0 +meet 0 +jonathan 0 +goldstein 0 +paul 0 +ferguson 0 +lawrenc 0 +brown 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..4312ffb3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,22 @@ +offic 1 +home 0 +mellen 0 +wisc 0 +pagerob 0 +minimalist 0 +page 0 +last 0 +modifi 0 +august 0 +mellencamp 0 +taship 0 +introduct 0 +oper 0 +system 0 +email 0 +comput 0 +scienc 0 +build 0 +phone 0 +hour 0 +appoint 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..18c838f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,82 @@ +melski 1 +page 1 +also 1 +david 0 +person 0 +comput 0 +scienc 0 +madison 0 +work 0 +russian 0 +somedai 0 +info 0 +pagedavid 0 +melskicurr 0 +depart 0 +mill 0 +statisticsmadison 0 +dayton 0 +permen 0 +west 0 +ivesmarshfield 0 +michel 0 +awesom 0 +current 0 +construct 0 +sister 0 +kasei 0 +great 0 +home 0 +brother 0 +eric 0 +semest 0 +teach 0 +coupl 0 +section 0 +rep 0 +program 0 +languag 0 +myexact 0 +schedul 0 +still 0 +need 0 +determin 0 +undergrad 0 +major 0 +studiesher 0 +univers 0 +wisconsin 0 +even 0 +spent 0 +fall 0 +semesterof 0 +russia 0 +chanc 0 +often 0 +miss 0 +make 0 +back 0 +interest 0 +includ 0 +chess 0 +soccer 0 +recent 0 +beenbik 0 +distract 0 +numerousbook 0 +hasti 0 +rewrit 0 +want 0 +link 0 +tomapquest 0 +plan 0 +steal 0 +alot 0 +map 0 +second 0 +give 0 +direct 0 +marshfield 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..8b9cf569 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,203 @@ +milo 1 +martin 1 +comput 1 +interest 1 +scienc 0 +player 0 +java 0 +wisc 0 +gustavu 0 +adolphu 0 +colleg 0 +compil 0 +architectur 0 +program 0 +year 0 +system 0 +mani 0 +fink 0 +footbal 0 +plai 0 +game 0 +atlanti 0 +ultim 0 +home 0 +student 0 +offic 0 +charl 0 +advanc 0 +mark 0 +hill 0 +technolog 0 +oper 0 +publicationsresearch 0 +perform 0 +advis 0 +humm 0 +micklich 0 +evalu 0 +illicitsubst 0 +detect 0 +fast 0 +neutron 0 +hailperin 0 +next 0 +softwar 0 +direct 0 +quot 0 +page 0 +live 0 +minnesota 0 +land 0 +explor 0 +rule 0 +everyon 0 +pagemilo 0 +graduat 0 +teach 0 +assistantcomput 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaemail 0 +eduoffic 0 +phone 0 +hour 0 +tuesdai 0 +thursdai 0 +byappointmentba 0 +class 0 +construct 0 +fischer 0 +sit 0 +jame 0 +larusteach 0 +algebra 0 +languag 0 +section 0 +research 0 +interestsi 0 +first 0 +programminglanguag 0 +specif 0 +optim 0 +beinfluenc 0 +hardwar 0 +mobil 0 +addit 0 +challeng 0 +present 0 +design 0 +thing 0 +even 0 +know 0 +summer 0 +argonn 0 +nation 0 +laboratori 0 +develop 0 +divis 0 +view 0 +reconstruct 0 +paramet 0 +us 0 +transmiss 0 +spectroscopi 0 +ieee 0 +nuclear 0 +symposium 0 +medic 0 +imag 0 +confer 0 +yule 0 +sagalovski 0 +techniqu 0 +nucl 0 +inst 0 +meth 0 +school 0 +languageflex 0 +determinist 0 +dynam 0 +parallel 0 +senior 0 +honor 0 +thesi 0 +mathemat 0 +depart 0 +postscript 0 +resourc 0 +compani 0 +found 0 +anintern 0 +scientif 0 +educ 0 +organ 0 +dedic 0 +toadvanc 0 +engin 0 +applic 0 +informationtechnolog 0 +serv 0 +profession 0 +public 0 +fosteringth 0 +open 0 +interchang 0 +inform 0 +promot 0 +highestprofession 0 +ethic 0 +standard 0 +person 0 +bignfl 0 +sinc 0 +myfavorit 0 +team 0 +vike 0 +eventhough 0 +chees 0 +head 0 +colon 0 +conquer 0 +multi 0 +mail 0 +space 0 +combat 0 +wrote 0 +babylon 0 +best 0 +show 0 +imho 0 +email 0 +mythic 0 +world 0 +build 0 +armi 0 +engaug 0 +trade 0 +fight 0 +wonder 0 +monster 0 +train 0 +wizard 0 +discov 0 +underworld 0 +right 0 +current 0 +list 0 +frisbe 0 +associ 0 +combin 0 +element 0 +ofsocc 0 +basketbal 0 +pace 0 +afrisbe 0 +quarterback 0 +receiv 0 +ultimatein 0 +simpl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..bd7ea067 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,108 @@ +comput 1 +mino 0 +home 0 +page 0 +madison 0 +databas 0 +garofalaki 0 +research 0 +resourc 0 +parallel 0 +queri 0 +scienc 0 +schedul 0 +yanni 0 +wisconsin 0 +interest 0 +multimedia 0 +system 0 +optim 0 +univers 0 +dept 0 +decemb 0 +patra 0 +june 0 +public 0 +ioannidi 0 +sigmod 0 +paper 0 +postscript 0 +technic 0 +report 0 +garofalakismino 0 +wisc 0 +eduphd 0 +candid 0 +assist 0 +depart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaoffic 0 +stat 0 +phone 0 +workresearch 0 +effect 0 +manag 0 +complex 0 +process 0 +algorithm 0 +theoryeduc 0 +engin 0 +informat 0 +refere 0 +multi 0 +dimension 0 +proceed 0 +confer 0 +montreal 0 +canada 0 +abstract 0 +issu 0 +survei 0 +enhanc 0 +view 0 +continu 0 +media 0 +banu 0 +ozden 0 +silberschatz 0 +submit 0 +octob 0 +model 0 +check 0 +sequenti 0 +probabilist 0 +real 0 +time 0 +technolog 0 +institut 0 +februari 0 +advisor 0 +ioannidismor 0 +feel 0 +free 0 +peek 0 +resum 0 +pointer 0 +stuff 0 +dbm 0 +reasearch 0 +hellen 0 +societi 0 +vldb 0 +almaden 0 +center 0 +watson 0 +centerdr 0 +michael 0 +bibliograpi 0 +server 0 +logic 0 +program 0 +perpetu 0 +construct 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..146c8dff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,21 @@ +madison 1 +marcelo 0 +gonalv 0 +comput 0 +depart 0 +phone 0 +mjrg 0 +wisc 0 +associ 0 +research 0 +paradyn 0 +project 0 +addresswork 0 +home 0 +scienc 0 +sheboygan 0 +west 0 +dayton 0 +street 0 +sciencesunivers 0 +wisconsin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..52ed981b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,35 @@ +brian 1 +morgan 1 +wisconsin 1 +conferenc 1 +home 0 +page 0 +morgangradu 0 +studentcomput 0 +scienc 0 +depart 0 +univers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +mail 0 +wisc 0 +telephon 0 +advisor 0 +chuck 0 +dyerresearch 0 +interestsvirtu 0 +system 0 +imag 0 +compress 0 +video 0 +high 0 +bandwidth 0 +network 0 +relat 0 +link 0 +interest 0 +comput 0 +vision 0 +group 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..9b9817f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,121 @@ +comput 1 +wisconsin 0 +work 0 +processor 0 +support 0 +univers 0 +andrea 0 +page 0 +multiscalar 0 +architectur 0 +data 0 +specul 0 +download 0 +postscript 0 +interest 0 +scienc 0 +crete 0 +greec 0 +greek 0 +mani 0 +moshovo 0 +home 0 +moshovosresearch 0 +assist 0 +depart 0 +sciencesunivers 0 +madisonadvisor 0 +guri 0 +sohigroup 0 +kestrel 0 +address 0 +leav 0 +notese 0 +aroundw 0 +peek 0 +futur 0 +clickheremi 0 +brother 0 +write 0 +poetri 0 +click 0 +herefor 0 +sampl 0 +current 0 +depend 0 +technic 0 +report 0 +compress 0 +uncompress 0 +talk 0 +slide 0 +load 0 +balanc 0 +gener 0 +instruct 0 +level 0 +parallel 0 +compil 0 +explot 0 +vlsi 0 +fall 0 +spring 0 +graduat 0 +student 0 +thecour 0 +instituteof 0 +york 0 +earn 0 +degre 0 +sinc 0 +transfer 0 +howev 0 +theopportun 0 +excel 0 +peopl 0 +meet 0 +wife 0 +implement 0 +numer 0 +algorithm 0 +access 0 +decoupl 0 +architecturethat 0 +softwar 0 +pipelin 0 +advisor 0 +kateveni 0 +short 0 +descript 0 +found 0 +viha 0 +like 0 +editor 0 +edit 0 +link 0 +hellen 0 +resouc 0 +network 0 +sure 0 +visit 0 +obtain 0 +instal 0 +font 0 +local 0 +copi 0 +resid 0 +atwww 0 +hyper 0 +devil 0 +dictionari 0 +bookmark 0 +mess 0 +nation 0 +fraud 0 +inform 0 +centerusenet 0 +chang 0 +want 0 +send 0 +afax 0 +free 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..0dc86389 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,38 @@ +page 1 +updat 0 +toni 0 +chang 0 +contact 0 +home 0 +pagewhat 0 +newoctob 0 +back 0 +inmadison 0 +sever 0 +background 0 +black 0 +better 0 +contrast 0 +inform 0 +minor 0 +variou 0 +list 0 +older 0 +prefer 0 +keep 0 +main 0 +brief 0 +herear 0 +link 0 +second 0 +level 0 +navig 0 +index 0 +friend 0 +favorit 0 +interest 0 +informationlast 0 +modifi 0 +octob 0 +wisc 0 +educopyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..b48c8733 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,151 @@ +coke 1 +wisconsin 0 +martin 0 +ream 0 +page 0 +poobah 0 +comput 0 +scienc 0 +interest 0 +databas 0 +design 0 +student 0 +madison 0 +telephon 0 +dept 0 +mream 0 +wisc 0 +compil 0 +exam 0 +year 0 +gone 0 +want 0 +even 0 +thing 0 +might 0 +home 0 +graduat 0 +teach 0 +assist 0 +also 0 +finger 0 +machin 0 +departmentunivers 0 +dayton 0 +offic 0 +email 0 +edufal 0 +scheduleresearch 0 +particular 0 +digit 0 +terrain 0 +model 0 +tin 0 +program 0 +languag 0 +logic 0 +logicprogram 0 +qualifi 0 +spring 0 +previou 0 +softwar 0 +develop 0 +product 0 +orientedenviron 0 +exploit 0 +educ 0 +interestsin 0 +resum 0 +postscriptand 0 +html 0 +distribut 0 +affili 0 +mathemat 0 +wesleyan 0 +univers 0 +faint 0 +heart 0 +section 0 +alink 0 +senior 0 +honorsthesi 0 +gener 0 +unif 0 +poobahlook 0 +work 0 +mighti 0 +afraid 0 +dear 0 +tomi 0 +head 0 +usual 0 +realli 0 +talk 0 +tosomeon 0 +better 0 +adjust 0 +crucial 0 +role 0 +life 0 +youshould 0 +probabl 0 +elton 0 +doesn 0 +mention 0 +imaginethat 0 +besid 0 +aforement 0 +poobahship 0 +mental 0 +ill 0 +afew 0 +know 0 +third 0 +yeargradu 0 +depart 0 +concentr 0 +indatabas 0 +current 0 +studi 0 +qual 0 +sometim 0 +inearli 0 +februari 0 +exercis 0 +relax 0 +plai 0 +squash 0 +reason 0 +well 0 +round 0 +ultim 0 +frisbe 0 +summer 0 +basketbal 0 +poorli 0 +andinfrequ 0 +notic 0 +rapidlyrid 0 +mountain 0 +bike 0 +around 0 +campu 0 +chilliest 0 +weather 0 +alwai 0 +helmet 0 +wish 0 +learn 0 +feel 0 +free 0 +examin 0 +mynot 0 +often 0 +updat 0 +hierarchi 0 +stuff 0 +ilik 0 +enjoi 0 +line 0 +librarylast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..111718a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,34 @@ +wisconsin 1 +multiscalar 1 +project 1 +comput 1 +architectur 0 +inform 0 +sohi 0 +home 0 +pagewisconsin 0 +technic 0 +paper 0 +talk 0 +given 0 +peopl 0 +contributor 0 +fund 0 +sourc 0 +relat 0 +avail 0 +softwar 0 +group 0 +scienc 0 +departmentat 0 +univers 0 +world 0 +wide 0 +interest 0 +local 0 +user 0 +last 0 +updat 0 +februari 0 +guri 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..c42d472d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,134 @@ +depart 1 +comput 1 +scienc 1 +steel 0 +maryland 0 +oper 0 +system 0 +univers 0 +come 0 +mail 0 +wisc 0 +class 0 +wisconsin 0 +around 0 +introduct 0 +friend 0 +favorit 0 +usenet 0 +frequent 0 +ask 0 +question 0 +list 0 +mike 0 +homepagemik 0 +homepagemsteel 0 +eduoffic 0 +comp 0 +stat 0 +build 0 +sit 0 +univ 0 +struggl 0 +undergradu 0 +sometimearound 0 +april 0 +note 0 +time 0 +stamp 0 +lower 0 +right 0 +corner 0 +sai 0 +folk 0 +graduat 0 +student 0 +madison 0 +school 0 +motto 0 +freezein 0 +land 0 +chees 0 +research 0 +studi 0 +interest 0 +center 0 +artificialintellig 0 +vision 0 +hope 0 +narrow 0 +year 0 +semest 0 +graduateinstructor 0 +section 0 +algebra 0 +languag 0 +program 0 +receiv 0 +bachelor 0 +degre 0 +theunivers 0 +scomput 0 +publicationsgrindston 0 +test 0 +suit 0 +parallel 0 +perform 0 +tool 0 +jefferyk 0 +hollingsworth 0 +michael 0 +technic 0 +reportc 0 +gzip 0 +postscriptfil 0 +semesterc 0 +mari 0 +vernonc 0 +artifici 0 +intellig 0 +chuck 0 +dyermi 0 +pagesinform 0 +gettingin 0 +touch 0 +back 0 +marylandwhom 0 +forgot 0 +address 0 +linksmi 0 +sport 0 +teamssom 0 +dave 0 +barri 0 +listth 0 +billi 0 +joel 0 +listi 0 +also 0 +administr 0 +thefruit 0 +still 0 +host 0 +ofmaryland 0 +start 0 +insidejok 0 +offic 0 +hand 0 +sometim 0 +andnow 0 +member 0 +world 0 +wide 0 +predat 0 +kill 0 +someinfrar 0 +photo 0 +know 0 +looklik 0 +infrar 0 +pictur 0 +memik 0 +steelemsteel 0 +eduunivers 0 +madisoncomput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..a927f8d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,16 @@ +home 1 +visit 1 +univers 1 +maria 0 +pagemaria 0 +pagehow 0 +maryland 0 +colleg 0 +park 0 +mayb 0 +wisconsin 0 +madison 0 +section 0 +might 0 +want 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..a0d152c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,79 @@ +venezuela 1 +barquisimeto 0 +like 0 +naim 0 +work 0 +research 0 +guitar 0 +oscar 0 +home 0 +page 0 +bienvenido 0 +staff 0 +member 0 +paradyn 0 +project 0 +finish 0 +myph 0 +univers 0 +southampton 0 +england 0 +undergrad 0 +universidad 0 +simon 0 +bolivar 0 +caraca 0 +born 0 +beauti 0 +citi 0 +barquisimetoi 0 +locat 0 +central 0 +western 0 +part 0 +popul 0 +ofabout 0 +million 0 +peopl 0 +also 0 +known 0 +music 0 +capit 0 +main 0 +area 0 +perform 0 +analysi 0 +visual 0 +parallel 0 +program 0 +howev 0 +apart 0 +playclass 0 +fact 0 +studi 0 +year 0 +excellentmaestro 0 +rodrigo 0 +riera 0 +antonio 0 +lauro 0 +spend 0 +time 0 +plai 0 +read 0 +good 0 +book 0 +sherlock 0 +holm 0 +stori 0 +cook 0 +watch 0 +basebal 0 +beati 0 +pictur 0 +pleaseclick 0 +finger 0 +wisc 0 +check 0 +around 0 +mundo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..ab76cb21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,39 @@ +home 1 +anastassia 0 +ailamaki 0 +madison 0 +realli 0 +welcom 0 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +phone 0 +want 0 +pictur 0 +import 0 +notic 0 +find 0 +time 0 +make 0 +decent 0 +page 0 +nice 0 +link 0 +georg 0 +rochest 0 +alex 0 +guid 0 +greek 0 +islandsar 0 +worth 0 +visit 0 +send 0 +mail 0 +natassa 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..8ebf8597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,99 @@ +databas 1 +object 0 +system 0 +parallel 0 +algorithm 0 +relat 0 +dimension 0 +data 0 +analysi 0 +current 0 +techniqu 0 +perform 0 +includ 0 +storag 0 +benchmark 0 +dbm 0 +public 0 +multidimension 0 +prasad 0 +deshpand 0 +intern 0 +confer 0 +larg 0 +mumbai 0 +bombai 0 +jeffrei 0 +naughtonjeffrei 0 +naughtonnaughton 0 +wisc 0 +eduresearch 0 +interestsolap 0 +multi 0 +relationaldbm 0 +overal 0 +goal 0 +research 0 +develop 0 +ofdatabas 0 +surpass 0 +inperform 0 +eas 0 +three 0 +main 0 +area 0 +ofinterest 0 +improv 0 +ofmulti 0 +arrai 0 +base 0 +andprocess 0 +indic 0 +computingth 0 +cube 0 +valuedattribut 0 +workload 0 +spatial 0 +inform 0 +recent 0 +comput 0 +aggreg 0 +withsameet 0 +agarw 0 +rakesh 0 +agraw 0 +ashish 0 +gupta 0 +raghu 0 +ramakrishnan 0 +sunita 0 +sarawagi 0 +proceed 0 +thend 0 +estim 0 +aggregatesin 0 +presenc 0 +hierarchi 0 +amit 0 +shukla 0 +karthikeyan 0 +ramasami 0 +bucki 0 +michael 0 +carei 0 +david 0 +dewitt 0 +johann 0 +gerhk 0 +dhaval 0 +shah 0 +moham 0 +asgarian 0 +prepar 0 +toward 0 +molap 0 +withyihong 0 +zhao 0 +kristin 0 +tuft 0 +submit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..354ccb26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,39 @@ +navin 1 +kabranavin 0 +kabragradu 0 +student 0 +depart 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madisonadvisor 0 +david 0 +dewittresearch 0 +area 0 +databas 0 +research 0 +interest 0 +customiz 0 +queri 0 +optim 0 +paradis 0 +project 0 +plan 0 +address 0 +noth 0 +better 0 +explor 0 +bookmark 0 +could 0 +look 0 +indian 0 +stuff 0 +includ 0 +among 0 +thing 0 +archiv 0 +hindi 0 +song 0 +wisc 0 +public 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..5451e89b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,28 @@ +newhal 1 +research 1 +perform 1 +newhalltia 0 +wisc 0 +graduat 0 +student 0 +dayton 0 +madison 0 +telephon 0 +interest 0 +parallel 0 +distribut 0 +system 0 +tool 0 +scalabl 0 +analysi 0 +predict 0 +java 0 +group 0 +paradynadvisor 0 +bart 0 +millermummi 0 +pictur 0 +guanajuato 0 +last 0 +chang 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..2821c9ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,16 @@ +madison 1 +nanci 0 +hallcomput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +project 0 +shore 0 +scalabl 0 +heterogen 0 +object 0 +repositori 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..30bb1cbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,207 @@ +mathemat 1 +program 0 +report 0 +mangasarian 0 +technic 0 +comput 0 +optim 0 +revis 0 +problem 0 +august 0 +scienc 0 +complementar 0 +juli 0 +machin 0 +learn 0 +linear 0 +street 0 +minim 0 +novemb 0 +olvi 0 +decemb 0 +page 0 +univers 0 +wisconsin 0 +parallel 0 +research 0 +converg 0 +smooth 0 +nonlinear 0 +cancer 0 +system 0 +bradlei 0 +method 0 +octob 0 +submit 0 +neural 0 +inform 0 +journal 0 +chunhui 0 +chen 0 +view 0 +group 0 +home 0 +wisc 0 +solv 0 +aspect 0 +bound 0 +inequ 0 +gener 0 +breast 0 +current 0 +public 0 +function 0 +constraint 0 +convex 0 +concav 0 +process 0 +wolberg 0 +januari 0 +editor 0 +proceed 0 +nick 0 +april 0 +appear 0 +applic 0 +june 0 +misclassif 0 +februari 0 +advanc 0 +septemb 0 +global 0 +solodov 0 +backpropag 0 +perturb 0 +paper 0 +john 0 +neumann 0 +professor 0 +member 0 +center 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +email 0 +harvard 0 +interest 0 +summari 0 +theori 0 +rich 0 +effectivecomputation 0 +mani 0 +real 0 +life 0 +interestsin 0 +topic 0 +rang 0 +broad 0 +spectrum 0 +encompassestheoret 0 +error 0 +programsand 0 +variat 0 +proof 0 +parallelgradi 0 +variabl 0 +distribut 0 +algorithm 0 +techniqu 0 +constrain 0 +problemsa 0 +differenti 0 +equat 0 +well 0 +applicationsto 0 +specif 0 +context 0 +animport 0 +programmingtechniqu 0 +diagnos 0 +result 0 +ahighli 0 +accur 0 +computer 0 +diagnost 0 +useat 0 +hospit 0 +student 0 +paul 0 +recent 0 +solodova 0 +linearli 0 +descent 0 +strongli 0 +monotonecomplementar 0 +jong 0 +pangexact 0 +penalti 0 +programswith 0 +mangasarianmathemat 0 +data 0 +miningmathemat 0 +mangasarianerror 0 +nondifferenti 0 +strong 0 +slater 0 +qualif 0 +cluster 0 +individu 0 +collect 0 +prognost 0 +predict 0 +featur 0 +select 0 +polyhedr 0 +appli 0 +festschrift 0 +klau 0 +ritter 0 +fischer 0 +riedmuel 0 +schaeffler 0 +physica 0 +verlag 0 +germani 0 +pose 0 +siam 0 +internationalsymposium 0 +baltimor 0 +improv 0 +toler 0 +train 0 +workshop 0 +eric 0 +plenum 0 +press 0 +hybrid 0 +siag 0 +new 0 +class 0 +mix 0 +diagnosi 0 +prognosi 0 +oper 0 +separ 0 +bilinear 0 +determinist 0 +nonmonoton 0 +cowan 0 +tesauro 0 +alspector 0 +morgan 0 +kaufmann 0 +publish 0 +francisco 0 +california 0 +inequalitiesand 0 +serial 0 +net 0 +vianonmonoton 0 +minimn 0 +softwar 0 +chronolog 0 +bibliographi 0 +download 0 +period 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..464c13f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,421 @@ +cancer 1 +diagnosi 0 +comput 0 +breast 0 +mangasarian 0 +wolberg 0 +us 0 +page 0 +program 0 +imag 0 +abstract 0 +prognosi 0 +research 0 +street 0 +predict 0 +learn 0 +case 0 +malign 0 +time 0 +machin 0 +result 0 +sampl 0 +probabl 0 +patient 0 +new 0 +linear 0 +featur 0 +approxim 0 +valu 0 +recurr 0 +surviv 0 +medic 0 +march 0 +base 0 +work 0 +relat 0 +diagnos 0 +data 0 +analysi 0 +slide 0 +nuclei 0 +nuclear 0 +benign 0 +diseas 0 +free 0 +prognost 0 +cytolog 0 +biopsi 0 +wisconsin 0 +madison 0 +scienc 0 +depart 0 +surgeri 0 +oncolog 0 +provid 0 +link 0 +needl 0 +aspir 0 +construct 0 +separ 0 +known 0 +xcyt 0 +extrem 0 +year 0 +curv 0 +april 0 +report 0 +univers 0 +januari 0 +variou 0 +appli 0 +collabor 0 +human 0 +american 0 +citat 0 +popular 0 +press 0 +local 0 +mass 0 +fine 0 +nine 0 +characterist 0 +setiono 0 +method 0 +pattern 0 +small 0 +system 0 +process 0 +differenti 0 +user 0 +isol 0 +individu 0 +boundari 0 +ofeach 0 +approach 0 +textur 0 +mean 0 +train 0 +plane 0 +allow 0 +shown 0 +segment 0 +recognit 0 +algorithm 0 +pleas 0 +wisc 0 +problem 0 +term 0 +surfac 0 +plot 0 +ofdiseas 0 +repres 0 +factor 0 +lymph 0 +node 0 +could 0 +paper 0 +postscript 0 +proceed 0 +siam 0 +analyt 0 +quantit 0 +histolog 0 +heisei 0 +deriv 0 +august 0 +mathemat 0 +technic 0 +intern 0 +confer 0 +aid 0 +submit 0 +medicin 0 +milwauke 0 +hunt 0 +journal 0 +interpret 0 +group 0 +biomed 0 +prognosismachin 0 +prognosisthi 0 +describ 0 +learningapproach 0 +ofbreast 0 +theunivers 0 +betweenprof 0 +olvi 0 +anddr 0 +william 0 +wolbergof 0 +copi 0 +thepress 0 +releas 0 +distribut 0 +societi 0 +writer 0 +seminar 0 +inmarch 0 +good 0 +overview 0 +tabl 0 +content 0 +bibliographi 0 +linksdiagnosisthi 0 +grew 0 +desir 0 +accur 0 +diagnosebreast 0 +sole 0 +heidentifi 0 +visual 0 +assess 0 +consideredrelev 0 +prof 0 +andtwo 0 +graduat 0 +student 0 +rudi 0 +kristin 0 +bennett 0 +aclassifi 0 +multisurfac 0 +thatsuccessfulli 0 +iswel 0 +began 0 +addit 0 +nick 0 +streetto 0 +team 0 +goal 0 +adigit 0 +section 0 +ofthi 0 +consolid 0 +softwar 0 +current 0 +clinicalpractic 0 +perform 0 +follow 0 +taken 0 +materi 0 +thenmount 0 +microscop 0 +stain 0 +highlight 0 +cellularnuclei 0 +portion 0 +cell 0 +arewel 0 +scan 0 +digit 0 +camera 0 +afram 0 +grabber 0 +board 0 +mous 0 +pointer 0 +draw 0 +nucleu 0 +vision 0 +snake 0 +converg 0 +exact 0 +interact 0 +take 0 +five 0 +minut 0 +showingxcyt 0 +thisfas 0 +measur 0 +size 0 +shape 0 +standarderror 0 +total 0 +classifi 0 +wasconstruct 0 +thisclassifi 0 +consist 0 +singl 0 +space 0 +threeof 0 +area 0 +smooth 0 +project 0 +onto 0 +thenorm 0 +densiti 0 +ofth 0 +point 0 +simpl 0 +bayesiancomput 0 +thesedens 0 +judg 0 +confid 0 +comparison 0 +hundr 0 +previou 0 +date 0 +correctli 0 +consecut 0 +newpati 0 +eight 0 +didxcyt 0 +return 0 +suspici 0 +estimatedprob 0 +subset 0 +sourc 0 +found 0 +goodtest 0 +object 0 +petsegment 0 +automat 0 +identifi 0 +inthes 0 +email 0 +togeth 0 +prognosisth 0 +second 0 +consid 0 +long 0 +behavior 0 +haveapproach 0 +function 0 +inputfeatur 0 +includ 0 +atim 0 +right 0 +censor 0 +solut 0 +util 0 +linearprogram 0 +fornew 0 +examin 0 +actual 0 +caseswith 0 +similar 0 +anindividu 0 +capabl 0 +incorpor 0 +intoxcyt 0 +exampl 0 +versu 0 +black 0 +ourorigin 0 +studi 0 +particular 0 +thereforeha 0 +averag 0 +freeafter 0 +equal 0 +procedur 0 +also 0 +compar 0 +power 0 +ofvari 0 +indic 0 +precis 0 +detail 0 +inform 0 +type 0 +xcytgiv 0 +better 0 +accuraci 0 +tradit 0 +tumors 0 +statu 0 +corrobor 0 +remov 0 +need 0 +often 0 +pain 0 +axillari 0 +chronolog 0 +bibliographylink 0 +format 0 +viewer 0 +download 0 +file 0 +shift 0 +click 0 +netscap 0 +print 0 +ascii 0 +text 0 +obtain 0 +notlink 0 +contact 0 +first 0 +author 0 +theori 0 +applic 0 +workshop 0 +larg 0 +scale 0 +numer 0 +optim 0 +philadelphia 0 +distinguish 0 +patholog 0 +grade 0 +oper 0 +juli 0 +avail 0 +induct 0 +twelfth 0 +priediti 0 +russel 0 +morgan 0 +kaufmann 0 +teagu 0 +call 0 +indetermin 0 +collect 0 +icml 0 +aaai 0 +prime 0 +without 0 +friend 0 +todai 0 +detect 0 +imit 0 +prospect 0 +man 0 +sentinel 0 +analyz 0 +detroit 0 +high 0 +tech 0 +marilynn 0 +marchion 0 +computer 0 +progress 0 +ruth 0 +sorel 0 +houston 0 +chronicl 0 +improv 0 +suggest 0 +replac 0 +surgic 0 +associ 0 +perspect 0 +column 0 +june 0 +cope 0 +septemb 0 +octob 0 +seek 0 +capit 0 +angel 0 +schooloth 0 +nation 0 +librari 0 +nevada 0 +center 0 +model 0 +oncolink 0 +washington 0 +institut 0 +paulb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..0b7eff3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,160 @@ +point 1 +separ 0 +plane 0 +linear 0 +program 0 +method 0 +set 0 +page 0 +region 0 +pattern 0 +dimension 0 +parallel 0 +mathemat 0 +optim 0 +us 0 +surfac 0 +also 0 +multisurfac 0 +contain 0 +tree 0 +implement 0 +mangasarian 0 +linearli 0 +euclidean 0 +space 0 +follow 0 +misclassifi 0 +assign 0 +node 0 +neural 0 +network 0 +bennett 0 +describ 0 +comput 0 +scienc 0 +approach 0 +construct 0 +nonlinear 0 +paramet 0 +disjoint 0 +finit 0 +togeth 0 +euclideanspac 0 +sequenc 0 +minim 0 +li 0 +side 0 +mostli 0 +gener 0 +view 0 +decis 0 +split 0 +artifici 0 +packag 0 +matlab 0 +paul 0 +bradlei 0 +programmingpattern 0 +programmingthi 0 +work 0 +section 0 +univers 0 +wisconsin 0 +madison 0 +depart 0 +brief 0 +histori 0 +outlinemathemat 0 +particular 0 +long 0 +problem 0 +whenev 0 +appear 0 +quadrat 0 +polynomi 0 +formul 0 +howev 0 +could 0 +failon 0 +avoid 0 +difficulti 0 +choos 0 +close 0 +discard 0 +repeat 0 +process 0 +variant 0 +develop 0 +goal 0 +todetermin 0 +determin 0 +averag 0 +distanc 0 +similarli 0 +stop 0 +otherwis 0 +anoth 0 +error 0 +eachnod 0 +best 0 +reach 0 +found 0 +solv 0 +branch 0 +thesam 0 +procedur 0 +appli 0 +oneset 0 +astrain 0 +hidden 0 +layer 0 +shown 0 +learn 0 +concept 0 +well 0 +better 0 +traditionallearn 0 +cart 0 +advantag 0 +backpropag 0 +inthat 0 +train 0 +proce 0 +much 0 +faster 0 +mino 0 +numer 0 +nick 0 +street 0 +kristin 0 +descript 0 +file 0 +requir 0 +chronolog 0 +bibliographi 0 +oper 0 +research 0 +june 0 +ieee 0 +transact 0 +inform 0 +theori 0 +novemb 0 +proceed 0 +midwest 0 +intellig 0 +cognit 0 +societi 0 +confer 0 +robust 0 +discrimin 0 +insepar 0 +softwar 0 +orsa 0 +journal 0 +fall 0 +last 0 +modifi 0 +paulb 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..d353c550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,84 @@ +paradyn 1 +project 0 +tool 0 +parallel 0 +arpa 0 +page 0 +perform 0 +meet 0 +csto 0 +super 0 +comput 0 +program 0 +presentationthi 0 +present 0 +made 0 +wisc 0 +home 0 +releas 0 +informationthi 0 +contain 0 +describ 0 +copi 0 +ofreleas 0 +goalsth 0 +explor 0 +newapproach 0 +build 0 +scalabl 0 +technic 0 +paper 0 +manualsstatu 0 +reporta 0 +recent 0 +statu 0 +report 0 +blizzard 0 +panel 0 +insan 0 +antonio 0 +symbol 0 +tabl 0 +inflorida 0 +effort 0 +develop 0 +common 0 +access 0 +routin 0 +tocompil 0 +gener 0 +inform 0 +us 0 +high 0 +level 0 +parallellanguag 0 +staff 0 +postera 0 +hypertext 0 +version 0 +poster 0 +relat 0 +elsewher 0 +spdt 0 +sigmetr 0 +symposium 0 +distribut 0 +toolsyou 0 +also 0 +restaur 0 +includ 0 +temporari 0 +placehold 0 +contact 0 +informationparadyn 0 +projectdepart 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +edufax 0 +last 0 +modifi 0 +bart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..6f1126f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,32 @@ +parker 1 +home 1 +page 1 +steven 0 +wisconsin 0 +madison 0 +prism 0 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +west 0 +dayton 0 +street 0 +offic 0 +wisc 0 +depth 0 +area 0 +numer 0 +analysi 0 +employ 0 +projectfal 0 +schedul 0 +math 0 +relat 0 +link 0 +send 0 +mail 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..d11e8eaa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,90 @@ +wisconsin 1 +program 0 +comput 0 +univers 0 +mathemat 0 +street 0 +page 0 +paul 0 +scienc 0 +depart 0 +madison 0 +abstract 0 +bradlei 0 +paulb 0 +wisc 0 +fish 0 +us 0 +work 0 +nick 0 +postscript 0 +mangasarian 0 +technic 0 +report 0 +inform 0 +home 0 +bradleygradu 0 +student 0 +eduoffic 0 +csphone 0 +advisor 0 +mangasarianinterestsmathemat 0 +programmingmachin 0 +learningfli 0 +interest 0 +techniqu 0 +specif 0 +nonlinear 0 +linear 0 +induct 0 +learn 0 +summari 0 +currentlyb 0 +done 0 +area 0 +pleas 0 +madisonmathemat 0 +thiswork 0 +guid 0 +professor 0 +olvimangasarian 0 +publicationsal 0 +paper 0 +store 0 +format 0 +ascii 0 +text 0 +viewer 0 +download 0 +file 0 +shift 0 +click 0 +netscap 0 +print 0 +featur 0 +select 0 +decemb 0 +revis 0 +march 0 +submit 0 +journal 0 +cluster 0 +concav 0 +minim 0 +accept 0 +present 0 +neural 0 +process 0 +system 0 +picksthes 0 +site 0 +backcountri 0 +grate 0 +dead 0 +nasa 0 +frog 0 +espnet 0 +timesfax 0 +uroullett 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..4d66b786 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,117 @@ +develop 1 +scout 0 +internet 0 +devri 0 +tool 0 +specialist 0 +comput 0 +scienc 0 +intern 0 +everyth 0 +articl 0 +work 0 +molecularbiolog 0 +resourc 0 +fortun 0 +prof 0 +imag 0 +embryo 0 +journal 0 +site 0 +microscop 0 +pete 0 +home 0 +page 0 +peter 0 +room 0 +westdayton 0 +madison 0 +pdevri 0 +wisc 0 +mean 0 +isthat 0 +read 0 +technolog 0 +make 0 +sens 0 +andthen 0 +write 0 +overview 0 +toolkit 0 +great 0 +thing 0 +iread 0 +think 0 +anywai 0 +rather 0 +foolish 0 +topai 0 +tell 0 +eric 0 +hazen 0 +alsoprovid 0 +technic 0 +webmast 0 +servic 0 +group 0 +although 0 +excel 0 +help 0 +system 0 +folksat 0 +recent 0 +join 0 +team 0 +laboratori 0 +integr 0 +microscopi 0 +biomed 0 +nearli 0 +eight 0 +year 0 +seancarrol 0 +techniqu 0 +creat 0 +multipl 0 +label 0 +confoc 0 +basic 0 +cool 0 +look 0 +lotof 0 +book 0 +cover 0 +also 0 +molecular 0 +biologi 0 +johnwhit 0 +rest 0 +imrstaff 0 +receiv 0 +star 0 +inth 0 +magellan 0 +guid 0 +last 0 +major 0 +project 0 +involv 0 +dimension 0 +studi 0 +isdescrib 0 +appear 0 +augustnd 0 +issu 0 +photo 0 +guest 0 +lab 0 +standard 0 +info 0 +resum 0 +relat 0 +experi 0 +public 0 +present 0 +updat 0 +tuesdai 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..1771a061 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,99 @@ +comput 1 +madison 1 +page 0 +scienc 0 +home 0 +plakal 0 +graduat 0 +kanpur 0 +slave 0 +calcutta 0 +bosco 0 +univers 0 +wisconsin 0 +stuff 0 +class 0 +link 0 +friend 0 +wisc 0 +yumpe 0 +manoj 0 +dept 0 +universityofwisconsin 0 +blah 0 +countri 0 +india 0 +though 0 +origin 0 +state 0 +kerala 0 +stai 0 +life 0 +studi 0 +school 0 +salesian 0 +undergrad 0 +major 0 +engin 0 +current 0 +first 0 +year 0 +student 0 +support 0 +teach 0 +assistantship 0 +depart 0 +stare 0 +barrel 0 +either 0 +architectur 0 +program 0 +languag 0 +interest 0 +music 0 +rock 0 +metal 0 +altern 0 +blue 0 +movi 0 +book 0 +acad 0 +hack 0 +industri 0 +geeki 0 +nerdi 0 +featur 0 +chat 0 +gatewai 0 +seealso 0 +iitk 0 +snap 0 +pinup 0 +galleri 0 +everi 0 +nerd 0 +need 0 +check 0 +bookmark 0 +access 0 +log 0 +visit 0 +contact 0 +north 0 +randal 0 +avenu 0 +dayton 0 +street 0 +acknowledg 0 +suresh 0 +venkat 0 +nifti 0 +tabl 0 +igor 0 +ivanisev 0 +wisecrack 0 +icon 0 +variou 0 +corner 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..b902965b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,70 @@ +madison 1 +page 0 +comput 0 +theori 0 +prasad 0 +home 0 +scienc 0 +univers 0 +wisconsin 0 +address 0 +interest 0 +databas 0 +music 0 +vldb 0 +time 0 +constuct 0 +meanwhil 0 +deshpand 0 +graduat 0 +student 0 +depar 0 +princeton 0 +offic 0 +build 0 +dayton 0 +academ 0 +system 0 +research 0 +area 0 +current 0 +schedul 0 +invest 0 +manageri 0 +econom 0 +meet 0 +prof 0 +jeff 0 +naughton 0 +introduct 0 +public 0 +multidimension 0 +aggreg 0 +storag 0 +estim 0 +multidimensionalaggreg 0 +presenc 0 +hierarchi 0 +cours 0 +project 0 +packag 0 +java 0 +download 0 +want 0 +spend 0 +timex 0 +world 0 +find 0 +india 0 +dilbert 0 +comix 0 +explor 0 +bookmark 0 +random 0 +link 0 +finger 0 +sinc 0 +hakuna 0 +matata 0 +info 0 +creat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..dcdc70e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,38 @@ +inform 1 +home 0 +page 0 +poosala 0 +research 0 +madison 0 +vishi 0 +viswanath 0 +assist 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +wisc 0 +reseach 0 +summari 0 +resum 0 +html 0 +postscript 0 +relat 0 +databas 0 +advisor 0 +prof 0 +yanni 0 +ioannidi 0 +asha 0 +voluntari 0 +organ 0 +help 0 +improv 0 +basic 0 +educ 0 +india 0 +interestsuw 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..c84cf0d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 1 +queri 0 +data 0 +databas 0 +relat 0 +oper 0 +model 0 +record 0 +system 0 +optim 0 +project 0 +order 0 +sequin 0 +us 0 +time 0 +manag 0 +effici 0 +languag 0 +posit 0 +praveen 0 +seshadri 0 +express 0 +evalu 0 +variou 0 +support 0 +process 0 +miron 0 +livni 0 +ramakrishnan 0 +object 0 +import 0 +requir 0 +includ 0 +scienc 0 +kind 0 +techniqu 0 +implement 0 +nest 0 +complex 0 +storag 0 +earthquak 0 +similar 0 +click 0 +raghu 0 +wisc 0 +domain 0 +exist 0 +allow 0 +need 0 +like 0 +next 0 +built 0 +defin 0 +also 0 +propos 0 +shore 0 +extens 0 +volcano 0 +recent 0 +plan 0 +scan 0 +singl 0 +mani 0 +view 0 +orient 0 +zoom 0 +group 0 +involv 0 +server 0 +proceed 0 +confer 0 +madison 0 +document 0 +collect 0 +set 0 +declar 0 +manner 0 +advantag 0 +user 0 +tempor 0 +previou 0 +demonstr 0 +feasibl 0 +form 0 +embed 0 +base 0 +exampl 0 +inform 0 +meteorolog 0 +phenomena 0 +sequenti 0 +strength 0 +greater 0 +would 0 +sort 0 +join 0 +store 0 +buffer 0 +gener 0 +answer 0 +detail 0 +aredescrib 0 +publish 0 +paper 0 +postscript 0 +version 0 +map 0 +call 0 +could 0 +flavor 0 +explor 0 +collaps 0 +expand 0 +last 0 +work 0 +probabl 0 +devis 0 +result 0 +client 0 +insid 0 +provid 0 +depart 0 +home 0 +pageth 0 +construct 0 +content 0 +objectivescurr 0 +statusmotiv 0 +exampleseq 0 +languageoptim 0 +techniquesseq 0 +developmentpublicationsrel 0 +workcontact 0 +informationproject 0 +number 0 +applic 0 +processingof 0 +larg 0 +amount 0 +theseappl 0 +financi 0 +histor 0 +analysi 0 +econom 0 +social 0 +metereolog 0 +medic 0 +andbiolog 0 +inadequ 0 +regard 0 +treat 0 +consequ 0 +tediou 0 +ineffici 0 +abstract 0 +util 0 +semanticstak 0 +uniqu 0 +opportun 0 +avail 0 +evaluationintegr 0 +canstor 0 +combin 0 +sequencesthes 0 +serv 0 +goal 0 +themost 0 +notion 0 +natur 0 +consid 0 +issu 0 +studi 0 +theori 0 +theoret 0 +idea 0 +statusth 0 +current 0 +statu 0 +algebraicqueri 0 +compos 0 +analogousto 0 +composit 0 +algebra 0 +describ 0 +identifi 0 +candeclar 0 +likesql 0 +vice 0 +versa 0 +build 0 +disk 0 +architectur 0 +sever 0 +megabyt 0 +integr 0 +motiv 0 +querya 0 +weather 0 +monitor 0 +occurr 0 +event 0 +scientist 0 +ask 0 +erupt 0 +didth 0 +richter 0 +scale 0 +featur 0 +groupbi 0 +claus 0 +correl 0 +subqueri 0 +aggregatefunct 0 +convent 0 +find 0 +execut 0 +even 0 +given 0 +knowledg 0 +howev 0 +sequencesord 0 +lock 0 +step 0 +merg 0 +temporari 0 +whenev 0 +valu 0 +check 0 +possibl 0 +therefor 0 +littl 0 +memori 0 +modelth 0 +present 0 +gist 0 +basic 0 +ordereddomain 0 +relationship 0 +andposit 0 +dual 0 +distinct 0 +wai 0 +recordsmap 0 +respect 0 +give 0 +rise 0 +either 0 +relationaloper 0 +overlap 0 +contain 0 +andaggreg 0 +researchersin 0 +commun 0 +offset 0 +movingaggreg 0 +mean 0 +associ 0 +instanc 0 +daili 0 +weekli 0 +hourli 0 +part 0 +deal 0 +make 0 +easi 0 +case 0 +real 0 +worldsitu 0 +extend 0 +instead 0 +extensionof 0 +indic 0 +practic 0 +ofseq 0 +languagew 0 +usingwhich 0 +specifi 0 +languagei 0 +except 0 +input 0 +queriesa 0 +well 0 +descript 0 +techniquesw 0 +thathav 0 +transform 0 +meta 0 +cach 0 +intermedi 0 +algorithm 0 +reli 0 +cost 0 +estim 0 +observ 0 +access 0 +stream 0 +strategi 0 +take 0 +account 0 +developmentth 0 +serverarchitectur 0 +multipl 0 +viaa 0 +multi 0 +thread 0 +ontop 0 +subset 0 +languageswhich 0 +mode 0 +arbitrarylevel 0 +viceversa 0 +supportfor 0 +type 0 +function 0 +detailson 0 +publicationssequ 0 +sigmod 0 +framework 0 +datapraveen 0 +ieee 0 +engin 0 +march 0 +design 0 +systempraveen 0 +submit 0 +vldb 0 +queriesraghu 0 +michael 0 +cheng 0 +intern 0 +comad 0 +decemb 0 +workthedevis 0 +complementari 0 +visualizationenviron 0 +front 0 +pose 0 +examin 0 +graphic 0 +peopl 0 +research 0 +servercontact 0 +informationfor 0 +contact 0 +eduraghu 0 +edumiron 0 +educomput 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +modifi 0 +seshadripraveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..17c058ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,20 @@ +andrew 1 +prock 1 +school 1 +class 1 +alta 1 +vista 1 +home 0 +page 0 +clemen 0 +hockert 0 +prockoffic 0 +hour 0 +person 0 +histori 0 +madison 0 +bookmark 0 +resum 0 +doonesburi 0 +trot 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..df1518ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,121 @@ +comput 1 +theori 1 +system 1 +parallel 0 +problem 0 +condon 0 +scienc 0 +interest 0 +interact 0 +proof 0 +us 0 +work 0 +algorithm 0 +page 0 +ann 0 +univers 0 +email 0 +wisc 0 +complex 0 +random 0 +model 0 +recent 0 +hard 0 +approxim 0 +result 0 +graph 0 +automata 0 +develop 0 +journal 0 +home 0 +associ 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +washington 0 +complexityclass 0 +research 0 +summari 0 +interactiveproof 0 +combin 0 +nondetermin 0 +suchmodel 0 +proven 0 +surprisingli 0 +solv 0 +classicproblem 0 +exampl 0 +although 0 +theoryof 0 +complet 0 +long 0 +identifi 0 +computationalproblem 0 +much 0 +progress 0 +understand 0 +whichhard 0 +solut 0 +easi 0 +recentresult 0 +novel 0 +modelsof 0 +turn 0 +prove 0 +approximabilityresult 0 +sever 0 +developingboth 0 +posit 0 +neg 0 +hardcombinatori 0 +aris 0 +game 0 +theoryand 0 +also 0 +design 0 +analysi 0 +current 0 +forsort 0 +minimum 0 +span 0 +tree 0 +goal 0 +well 0 +practic 0 +commun 0 +synchron 0 +costscan 0 +expens 0 +sampl 0 +public 0 +polynomi 0 +bound 0 +strategi 0 +ladner 0 +finit 0 +state 0 +nondeterminist 0 +probabilisticst 0 +hellerstein 0 +pottl 0 +wigderson 0 +proceedingsof 0 +annual 0 +symposium 0 +pspace 0 +provabl 0 +prover 0 +round 0 +caiand 0 +lipton 0 +februari 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..641f5a5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,137 @@ +comput 1 +structur 0 +research 0 +scienc 0 +complex 0 +class 0 +sequenc 0 +joseph 0 +properti 0 +algorithm 0 +page 0 +deborah 0 +mathemat 0 +univers 0 +email 0 +wisc 0 +interest 0 +biologi 0 +geometri 0 +concern 0 +area 0 +set 0 +time 0 +recent 0 +question 0 +proof 0 +techniqu 0 +method 0 +genom 0 +develop 0 +proceed 0 +similar 0 +home 0 +associ 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +purdu 0 +appli 0 +theori 0 +logic 0 +summari 0 +theoret 0 +studi 0 +design 0 +analysi 0 +biolog 0 +problem 0 +last 0 +twenti 0 +year 0 +great 0 +deal 0 +work 0 +gone 0 +studyingth 0 +decid 0 +determinist 0 +andnondeterminist 0 +polynomi 0 +despit 0 +effort 0 +stillknow 0 +littl 0 +fact 0 +computerscientist 0 +adequaci 0 +known 0 +techniquesfor 0 +resolv 0 +whether 0 +investigatesth 0 +exploresin 0 +formal 0 +type 0 +necessari 0 +resolveproblem 0 +primarili 0 +inth 0 +theseinclud 0 +dynam 0 +data 0 +algorithmsfor 0 +fragment 0 +assembl 0 +larg 0 +scale 0 +project 0 +specif 0 +handlingrepetit 0 +addit 0 +util 0 +graphtheoret 0 +rapid 0 +homolog 0 +detect 0 +analysisof 0 +anonym 0 +sampl 0 +public 0 +collaps 0 +degre 0 +subexponenti 0 +pruim 0 +young 0 +ninth 0 +theoryconfer 0 +spars 0 +spanner 0 +weight 0 +graph 0 +althof 0 +dobkin 0 +soar 0 +discret 0 +obtain 0 +global 0 +local 0 +meidanisand 0 +tiwari 0 +fourth 0 +scandinavianworkshop 0 +springer 0 +verlag 0 +lectur 0 +note 0 +incomput 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..ecb9fcd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,108 @@ +simul 1 +manag 0 +polici 0 +miron 0 +resourc 0 +perform 0 +process 0 +page 0 +livni 0 +scienc 0 +email 0 +wisc 0 +interest 0 +algorithm 0 +discret 0 +event 0 +research 0 +data 0 +system 0 +laboratori 0 +disk 0 +tape 0 +proceed 0 +confer 0 +home 0 +professor 0 +comput 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +weizmann 0 +institut 0 +rehovot 0 +israel 0 +model 0 +analysi 0 +summari 0 +major 0 +emphasi 0 +design 0 +evaluationof 0 +involv 0 +developmentof 0 +managementsystem 0 +type 0 +gener 0 +purpos 0 +well 0 +asreal 0 +time 0 +schedul 0 +consid 0 +researchinvolv 0 +studi 0 +differ 0 +specialemphasi 0 +interplai 0 +properti 0 +systemand 0 +sinc 0 +performancestudi 0 +emploi 0 +modelingand 0 +techniqu 0 +current 0 +implementinga 0 +base 0 +novel 0 +languag 0 +includ 0 +util 0 +visualizationtool 0 +graphic 0 +interfac 0 +sampl 0 +recent 0 +public 0 +join 0 +synchron 0 +access 0 +myllymaki 0 +sigmetr 0 +sequenc 0 +queri 0 +sashadri 0 +ramakrishnan 0 +sigmod 0 +foundat 0 +visual 0 +metaphor 0 +schema 0 +displai 0 +haberand 0 +ioannidi 0 +journal 0 +intellig 0 +inform 0 +juli 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..879d4e03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,102 @@ +method 1 +equat 0 +precondit 0 +parter 0 +numer 0 +differenti 0 +ellipt 0 +condit 0 +page 0 +seymour 0 +comput 0 +scienc 0 +univers 0 +email 0 +wisc 0 +partial 0 +research 0 +work 0 +indefinit 0 +discret 0 +system 0 +posit 0 +definit 0 +effect 0 +siam 0 +analysi 0 +distribut 0 +singular 0 +valu 0 +journal 0 +home 0 +professor 0 +mathemat 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +york 0 +interest 0 +summari 0 +time 0 +major 0 +emphasi 0 +solutionof 0 +classicalit 0 +multigrid 0 +effectivelywhen 0 +also 0 +bemad 0 +real 0 +symmetr 0 +part 0 +operatori 0 +hand 0 +casedirect 0 +attempt 0 +preserv 0 +spars 0 +thesystem 0 +encount 0 +small 0 +pivot 0 +thu 0 +challengingproblem 0 +mix 0 +concept 0 +procedur 0 +linearalgebra 0 +nowinvolv 0 +sever 0 +project 0 +attack 0 +class 0 +problem 0 +includ 0 +studi 0 +specialmultigrid 0 +sampl 0 +recent 0 +public 0 +chebyshev 0 +collact 0 +ellipticparti 0 +appear 0 +journalon 0 +boundari 0 +without 0 +estim 0 +number 0 +second 0 +order 0 +oper 0 +numbersand 0 +scientificcomput 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..31986854 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,128 @@ +parallel 1 +system 0 +comput 0 +techniqu 0 +analysi 0 +vernon 0 +applic 0 +perform 0 +model 0 +issu 0 +processor 0 +polici 0 +proceed 0 +page 0 +mari 0 +scienc 0 +univers 0 +email 0 +wisc 0 +interest 0 +research 0 +repres 0 +also 0 +recent 0 +schedul 0 +network 0 +confer 0 +june 0 +home 0 +professor 0 +industri 0 +engin 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +california 0 +angel 0 +architectur 0 +operatingsystem 0 +summari 0 +analyt 0 +applicationto 0 +emphasi 0 +paralleland 0 +distribut 0 +design 0 +techniquesi 0 +develop 0 +togeth 0 +graduat 0 +student 0 +colleaguesinclud 0 +gener 0 +time 0 +petri 0 +customizedmean 0 +valu 0 +gtpn 0 +systemfeatur 0 +synchron 0 +prioriti 0 +servic 0 +precis 0 +custom 0 +yield 0 +intuit 0 +equationsthat 0 +featur 0 +approxim 0 +butcan 0 +solv 0 +effici 0 +proposedth 0 +call 0 +interpol 0 +approximationsfor 0 +alloc 0 +techniquemai 0 +broader 0 +performanceanalysi 0 +current 0 +project 0 +includ 0 +character 0 +high 0 +performanceparallel 0 +workload 0 +schedulingpolici 0 +multimedia 0 +server 0 +memorymanag 0 +workstat 0 +sampl 0 +public 0 +fair 0 +dqdb 0 +slot 0 +reus 0 +brewster 0 +ieee 0 +infocom 0 +august 0 +accur 0 +hybrid 0 +hash 0 +join 0 +algorithm 0 +pateland 0 +carei 0 +sigmetr 0 +characterist 0 +limit 0 +preemption 0 +forrun 0 +complet 0 +with 0 +chiang 0 +mansharamani 0 +sigmetricsconfer 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..d822739b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,9 @@ +qinqin 1 +wang 1 +home 1 +page 0 +welcom 0 +pageqw 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..89b6e946 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,198 @@ +data 1 +project 0 +databas 0 +research 0 +queri 0 +system 0 +interest 0 +develop 0 +imag 0 +work 0 +hill 0 +first 0 +us 0 +base 0 +sequenc 0 +languag 0 +next 0 +august 0 +employ 0 +raghu 0 +comput 0 +depart 0 +educ 0 +graduat 0 +text 0 +relat 0 +logic 0 +integr 0 +content 0 +set 0 +access 0 +joint 0 +prof 0 +livni 0 +program 0 +evalu 0 +bell 0 +lab 0 +murrai 0 +ramakrishnan 0 +scienc 0 +madison 0 +teach 0 +activ 0 +univers 0 +cours 0 +manag 0 +second 0 +level 0 +dbm 0 +coral 0 +deal 0 +sever 0 +heterogen 0 +sourc 0 +analysi 0 +larg 0 +constraint 0 +techniqu 0 +support 0 +visual 0 +appli 0 +design 0 +part 0 +gener 0 +explor 0 +algorithm 0 +call 0 +bottom 0 +optim 0 +cornel 0 +home 0 +page 0 +associ 0 +professor 0 +wisc 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +usaphon 0 +texa 0 +austin 0 +tech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +softwar 0 +minibaseand 0 +coralth 0 +publish 0 +mcgraw 0 +aimedat 0 +undergraduateand 0 +minibas 0 +inconjunct 0 +also 0 +coursesthat 0 +school 0 +index 0 +exploratori 0 +mine 0 +extend 0 +deductiona 0 +grow 0 +diversifi 0 +increasinglyimport 0 +abl 0 +dispers 0 +independ 0 +easili 0 +rodin 0 +successor 0 +investig 0 +severalissu 0 +formal 0 +practic 0 +toolkit 0 +forsemant 0 +multipl 0 +serviceand 0 +networkedclust 0 +machin 0 +ioannidi 0 +recent 0 +result 0 +explorationfrom 0 +area 0 +complex 0 +assequ 0 +seqsystem 0 +focus 0 +optimizationissu 0 +import 0 +aspect 0 +identifyingtrend 0 +identifi 0 +pattern 0 +ofinform 0 +goal 0 +retriev 0 +fromlarg 0 +focu 0 +implementingan 0 +express 0 +definit 0 +customizea 0 +take 0 +advantag 0 +specializedinform 0 +given 0 +collect 0 +indexedand 0 +cover 0 +andmin 0 +power 0 +cluster 0 +birchfor 0 +dataset 0 +tool 0 +devisea 0 +long 0 +stand 0 +extens 0 +databasequeri 0 +featuressuch 0 +structur 0 +term 0 +recurs 0 +ofarithmet 0 +specifi 0 +morecompactli 0 +effici 0 +ongo 0 +involv 0 +continu 0 +coraldeduct 0 +upon 0 +fixpointevalu 0 +make 0 +efficientacross 0 +broad 0 +rang 0 +sudarshan 0 +time 0 +sudarsha 0 +srivastava 0 +deduct 0 +object 0 +orient 0 +divesh 0 +transit 0 +closur 0 +seshadri 0 +managementfirst 0 +praveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..7e39af89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,163 @@ +madison 1 +know 0 +fall 0 +india 0 +love 0 +rahul 0 +home 0 +long 0 +littl 0 +offici 0 +final 0 +semest 0 +comput 0 +came 0 +kanpur 0 +interest 0 +kind 0 +livabl 0 +think 0 +summer 0 +california 0 +great 0 +place 0 +comedi 0 +show 0 +contact 0 +page 0 +kapoorhello 0 +internet 0 +surfer 0 +welcom 0 +cyber 0 +hope 0 +stai 0 +enough 0 +third 0 +master 0 +student 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +get 0 +bachelor 0 +degre 0 +indianinstitut 0 +technolog 0 +employ 0 +pleas 0 +check 0 +resum 0 +cours 0 +schedulemydepartmentmyuniversityiitkanpuriitkclass 0 +relatedlink 0 +menow 0 +gone 0 +want 0 +person 0 +well 0 +normal 0 +born 0 +andrais 0 +small 0 +famili 0 +compris 0 +parent 0 +elder 0 +sister 0 +nice 0 +town 0 +call 0 +state 0 +good 0 +fortun 0 +live 0 +moneymagazin 0 +rate 0 +citi 0 +year 0 +editormust 0 +come 0 +greenland 0 +winter 0 +guess 0 +shouldn 0 +complain 0 +spring 0 +isawesom 0 +jose 0 +work 0 +almaden 0 +research 0 +centr 0 +cannot 0 +much 0 +suppos 0 +anywai 0 +regret 0 +time 0 +area 0 +whatev 0 +monei 0 +magazin 0 +sai 0 +northern 0 +like 0 +music 0 +take 0 +look 0 +collect 0 +movi 0 +almost 0 +genr 0 +though 0 +prefer 0 +romanc 0 +file 0 +star 0 +trek 0 +read 0 +stuff 0 +novel 0 +philosophi 0 +surf 0 +sport 0 +concern 0 +watch 0 +cricket 0 +plai 0 +win 0 +tenni 0 +figur 0 +skate 0 +gymnast 0 +try 0 +learn 0 +swim 0 +bridg 0 +natur 0 +lover 0 +enjoi 0 +go 0 +walk 0 +hike 0 +cloudi 0 +slightli 0 +breezi 0 +wish 0 +could 0 +job 0 +televis 0 +travel 0 +youget 0 +world 0 +paid 0 +musicstuffmovi 0 +televisioninternettravelotherbookmark 0 +meget 0 +form 0 +rest 0 +guestbookrahul 0 +wisc 0 +eduh 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..8879afa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,76 @@ +raman 1 +rajesh 0 +comput 0 +scienc 0 +univers 0 +current 0 +page 0 +depart 0 +madison 0 +wisc 0 +system 0 +home 0 +old 0 +homm 0 +off 0 +west 0 +dayton 0 +street 0 +email 0 +telephon 0 +ohio 0 +wesleyan 0 +major 0 +mathemat 0 +minor 0 +music 0 +first 0 +year 0 +graduat 0 +student 0 +winsonsin 0 +person 0 +curriculum 0 +vita 0 +postscript 0 +specif 0 +cours 0 +architectur 0 +saluja 0 +perform 0 +evalu 0 +model 0 +livni 0 +distribut 0 +inform 0 +work 0 +team 0 +member 0 +condor 0 +project 0 +integr 0 +part 0 +committe 0 +bookmark 0 +chimera 0 +novelti 0 +monster 0 +chao 0 +subject 0 +contradict 0 +prodigi 0 +judg 0 +thing 0 +feebleworm 0 +earth 0 +depositari 0 +truth 0 +cloaca 0 +uncertainti 0 +error 0 +theglori 0 +shame 0 +blais 0 +pascal 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..d2141983 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,78 @@ +databas 1 +jeffrei 1 +naughton 1 +comput 0 +scienc 0 +depart 0 +wisconsin 0 +karthik 0 +pagekarthikeyan 0 +ramasamyabouti 0 +student 0 +univers 0 +research 0 +interest 0 +mostli 0 +oper 0 +system 0 +work 0 +paradis 0 +project 0 +projectshack 0 +david 0 +dewitt 0 +connectivityparadis 0 +parallel 0 +pthread 0 +wrapperspublicationsstorag 0 +estim 0 +multidimension 0 +aggreg 0 +presenc 0 +hierarchi 0 +amit 0 +shukla 0 +prasad 0 +deshpand 0 +karthikeyan 0 +ramasami 0 +intern 0 +confer 0 +larg 0 +mumbai 0 +bombai 0 +avail 0 +presentationsweb 0 +picturearchitectur 0 +altern 0 +scalabl 0 +serversphoto 0 +albumencount 0 +leafperson 0 +inforesum 0 +financemonei 0 +wall 0 +street 0 +journal 0 +person 0 +interestshack 0 +photographycontact 0 +informationstreet 0 +address 0 +dayton 0 +madison 0 +electron 0 +mail 0 +addresskarthik 0 +wisc 0 +eduoffic 0 +phone 0 +number 0 +comment 0 +suggestionspleas 0 +tell 0 +think 0 +home 0 +page 0 +might 0 +improv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..e1525863 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,21 @@ +kelli 1 +page 0 +home 0 +ratliffoffic 0 +phone 0 +email 0 +wisc 0 +edulast 0 +login 0 +offic 0 +hour 0 +inform 0 +student 0 +genealog 0 +interest 0 +place 0 +visit 0 +space 0 +construct 0 +stai 0 +tune 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..a745b5e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,45 @@ +compress 1 +ratnakar 0 +madison 0 +viresh 0 +page 0 +research 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +imag 0 +invok 0 +avail 0 +assist 0 +area 0 +digit 0 +video 0 +advisor 0 +miron 0 +livni 0 +main 0 +interest 0 +base 0 +vector 0 +quantiz 0 +fractal 0 +qualiti 0 +control 0 +lossi 0 +product 0 +mode 0 +public 0 +home 0 +qclicauthor 0 +qclic 0 +qclicbrows 0 +thing 0 +rever 0 +reveal 0 +click 0 +west 0 +dayton 0 +street 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..7c0c71b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,78 @@ +offic 1 +omin 0 +haven 0 +address 0 +number 0 +monasteriu 0 +doominu 0 +brother 0 +richard 0 +madison 0 +comput 0 +mondai 0 +wednesdai 0 +welcom 0 +without 0 +beard 0 +person 0 +univers 0 +wisconsin 0 +scienc 0 +depart 0 +west 0 +dayton 0 +street 0 +rcarl 0 +wisc 0 +hour 0 +thur 0 +home 0 +page 0 +current 0 +cours 0 +load 0 +distribut 0 +oper 0 +system 0 +underwat 0 +fire 0 +prevent 0 +saturdai 0 +advanc 0 +architectur 0 +tuesdai 0 +thursdai 0 +math 0 +introduct 0 +whole 0 +emphasi 0 +sundai 0 +subsurfac 0 +depositori 0 +engin 0 +grave 0 +dig 0 +fridai 0 +mani 0 +shade 0 +profession 0 +doom 0 +polit 0 +goofi 0 +solitari 0 +innebri 0 +vampir 0 +seriou 0 +nostalg 0 +funki 0 +monk 0 +fellow 0 +order 0 +ever 0 +need 0 +graphic 0 +artist 0 +desktop 0 +publish 0 +check 0 +best 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..fd48478c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,558 @@ +program 1 +rep 0 +comput 0 +languag 0 +scienc 0 +softwar 0 +slice 0 +analysi 0 +horwitz 0 +proceed 0 +york 0 +wisconsin 0 +univers 0 +depart 0 +interprocedur 0 +also 0 +engin 0 +note 0 +popl 0 +confer 0 +graph 0 +integr 0 +madison 0 +system 0 +algorithm 0 +increment 0 +problem 0 +symposium 0 +base 0 +sigsoft 0 +semant 0 +sigplan 0 +juli 0 +environ 0 +dagstuhl 0 +binklei 0 +synthes 0 +gener 0 +teitelbaum 0 +topla 0 +paper 0 +springer 0 +verlag 0 +januari 0 +depend 0 +sagiv 0 +click 0 +ramalingam 0 +record 0 +access 0 +dataflow 0 +develop 0 +us 0 +transact 0 +ieee 0 +lectur 0 +june 0 +shape 0 +yang 0 +editor 0 +reprint 0 +principl 0 +merg 0 +attribut 0 +acta 0 +version 0 +evalu 0 +intern 0 +april 0 +cornel 0 +research 0 +public 0 +manipul 0 +solv 0 +reachabl 0 +thesi 0 +applic 0 +prin 0 +novemb 0 +societi 0 +symposiumon 0 +germani 0 +differenc 0 +grammar 0 +implement 0 +precis 0 +demand 0 +manual 0 +journal 0 +appear 0 +second 0 +notic 0 +foundat 0 +decemb 0 +august 0 +project 0 +associ 0 +tool 0 +algebra 0 +current 0 +idfa 0 +book 0 +third 0 +publish 0 +procedur 0 +interf 0 +modif 0 +direct 0 +alamito 0 +colloquium 0 +washington 0 +portland 0 +octob 0 +wilhelm 0 +updat 0 +releas 0 +page 0 +pointer 0 +categor 0 +post 0 +support 0 +special 0 +chop 0 +tosem 0 +licens 0 +pfeiffer 0 +logic 0 +compcon 0 +refer 0 +informatica 0 +dynam 0 +methodolog 0 +inform 0 +process 0 +demer 0 +syntax 0 +septemb 0 +march 0 +fromacm 0 +berzin 0 +sigsoftsymposium 0 +francisco 0 +seminar 0 +schloss 0 +wadern 0 +destruct 0 +twenti 0 +rosai 0 +denmark 0 +parallel 0 +copenhagen 0 +ofprogram 0 +submit 0 +center 0 +dissert 0 +tech 0 +thoma 0 +home 0 +interest 0 +alia 0 +index 0 +list 0 +visitor 0 +doctor 0 +aim 0 +complex 0 +particular 0 +work 0 +oper 0 +element 0 +instanc 0 +reus 0 +technolog 0 +larg 0 +solut 0 +ics 0 +david 0 +fseb 0 +thesiswuu 0 +esop 0 +amast 0 +poplb 0 +pepma 0 +handl 0 +distribut 0 +transform 0 +fsea 0 +diku 0 +fase 0 +pepmb 0 +lape 0 +psde 0 +spaa 0 +world 0 +toconst 0 +propag 0 +theoret 0 +path 0 +accommod 0 +properti 0 +tree 0 +commun 0 +european 0 +databas 0 +theori 0 +combin 0 +paradigmsfor 0 +brighton 0 +abramski 0 +maibaum 0 +scientif 0 +interact 0 +hill 0 +wherefor 0 +sigoa 0 +text 0 +fourth 0 +petersburg 0 +time 0 +pepm 0 +onparti 0 +jolla 0 +california 0 +fifth 0 +iowa 0 +citi 0 +bricker 0 +workshop 0 +variabl 0 +fifteenth 0 +diego 0 +principlesof 0 +latest 0 +report 0 +ibfi 0 +twentieth 0 +charleston 0 +tutori 0 +represent 0 +multi 0 +univ 0 +professor 0 +repsprofessorcomput 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +wisc 0 +telephon 0 +secretari 0 +curriculum 0 +vita 0 +thehom 0 +content 0 +summari 0 +student 0 +summarymi 0 +creat 0 +thedevelop 0 +object 0 +createtool 0 +provid 0 +power 0 +specif 0 +manipulationoper 0 +explor 0 +slicingcan 0 +serv 0 +basi 0 +respect 0 +elementss 0 +includ 0 +thatmight 0 +affect 0 +either 0 +directli 0 +transit 0 +valu 0 +thevari 0 +member 0 +allow 0 +findsemant 0 +meaning 0 +decomposit 0 +thedecomposit 0 +consist 0 +textual 0 +contigu 0 +fundament 0 +solvingmani 0 +applicationsin 0 +understand 0 +mainten 0 +debug 0 +test 0 +worker 0 +carri 0 +atimprov 0 +underli 0 +relatedoper 0 +slicer 0 +method 0 +andbuild 0 +clickherefor 0 +recent 0 +establish 0 +unexpect 0 +connect 0 +betweeninterprocedur 0 +previou 0 +oninterprocedur 0 +show 0 +class 0 +interproceduraldataflow 0 +transformingthem 0 +kind 0 +polynomi 0 +timebi 0 +origin 0 +subject 0 +mean 0 +make 0 +probleminst 0 +find 0 +nearbi 0 +publicationsprogram 0 +overview 0 +slicing_pat 0 +pldi 0 +thesismerg 0 +iwscm 0 +popla 0 +iwsvcc 0 +ccpsd 0 +npfo_submiss 0 +ccipl 0 +prog_integration_system 0 +prog_integration_manu 0 +describ 0 +small 0 +subsetof 0 +pascal 0 +obtain 0 +clickingher 0 +retarget 0 +andexpect 0 +anddifferenc 0 +probabl 0 +miscellan 0 +thesesdavid 0 +thesisphil 0 +thesisinterprocedur 0 +analysisdemand 0 +bottom 0 +magic 0 +set 0 +exhaust 0 +tcs_ide_pap 0 +ptime 0 +complet 0 +acta_pap 0 +pfeiffer_thesi 0 +cacm 0 +jalg_pap 0 +popl_not 0 +publicationsbooksrep 0 +constructinglanguag 0 +edit 0 +chines 0 +corpor 0 +beij 0 +china 0 +press 0 +cambridg 0 +publicationssagiv 0 +sequenti 0 +natur 0 +shortest 0 +j_alg 0 +call 0 +competit 0 +line 0 +prioriti 0 +order 0 +letter 0 +preservingtransform 0 +effici 0 +comparison 0 +grammarswith 0 +unrestrict 0 +movement 0 +sublinear 0 +space 0 +context 0 +invit 0 +papershorwitz 0 +fourteenth 0 +conferenceon 0 +melbourn 0 +australia 0 +nanci 0 +franc 0 +ganzing 0 +chaptersrep 0 +ramakrishnan 0 +kluwer 0 +academ 0 +boston 0 +chang 0 +impact 0 +bohner 0 +arnold 0 +fromproceed 0 +architectur 0 +ichikawa 0 +tsubotani 0 +compani 0 +singapor 0 +barstow 0 +sandewal 0 +shrobe 0 +mcgraw 0 +wasserman 0 +publicationssiff 0 +turnidg 0 +partial 0 +danvi 0 +glueck 0 +thiemann 0 +hentenryck 0 +bind 0 +imper 0 +formalapproach 0 +aarhu 0 +moss 0 +nielsen 0 +schwartzbach 0 +tapsoft 0 +speed 0 +onth 0 +orlean 0 +compilerconstruct 0 +edinburgh 0 +scotland 0 +fritzson 0 +maintain 0 +domin 0 +reducibleflowgraph 0 +first 0 +scan 0 +data 0 +andarchitectur 0 +velen 0 +onalgebra 0 +preserv 0 +irvin 0 +jone 0 +illustr 0 +interfer 0 +softwareconfigur 0 +manag 0 +princeton 0 +languagedesign 0 +issuesin 0 +barcelona 0 +spain 0 +diaz 0 +oreja 0 +programminglanguag 0 +design 0 +atlanta 0 +variant 0 +forprogram 0 +versionand 0 +configur 0 +control 0 +grassau 0 +bericht 0 +german 0 +chapter 0 +winkler 0 +teubner 0 +stuttgart 0 +adequaci 0 +repres 0 +marceau 0 +remot 0 +thirteenth 0 +engineeringsymposium 0 +practic 0 +pittsburgh 0 +alpern 0 +proof 0 +check 0 +eleventh 0 +onprincipl 0 +salt 0 +lake 0 +utah 0 +static 0 +digest 0 +spring 0 +optim 0 +ninth 0 +albuquerqu 0 +tosyntax 0 +eighth 0 +williamsburg 0 +softwarerep 0 +site 0 +herefor 0 +patentsrep 0 +patent 0 +number 0 +pend 0 +submissionsrep 0 +reportsrep 0 +abstract 0 +analys 0 +leeuwen 0 +mehlhorn 0 +datalogisk 0 +institut 0 +psramalingam 0 +bibliographi 0 +unpublish 0 +present 0 +klint 0 +snelt 0 +identifi 0 +differ 0 +extendedabstract 0 +ball 0 +correct 0 +reconstitut 0 +equival 0 +theorem 0 +demonstr 0 +prototyp 0 +doc 0 +studentsvisitor 0 +mooli 0 +israel 0 +jiazhen 0 +robert 0 +paig 0 +nation 0 +chiao 0 +tung 0 +taiwan 0 +north 0 +carolina 0 +chapel 0 +studentsramalingam 0 +bound 0 +programintegr 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..bff5edf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,69 @@ +saeed 1 +madison 1 +home 0 +window 0 +depart 0 +contact 0 +function 0 +statusclock 0 +comput 0 +scienc 0 +indian 0 +univ 0 +wisconsin 0 +time 0 +love 0 +beauti 0 +right 0 +pagespe 0 +statu 0 +date 0 +settimeout 0 +speed 0 +clearid 0 +cleartimeout 0 +mirza 0 +tech 0 +engin 0 +institut 0 +technolog 0 +kanpur 0 +graduat 0 +student 0 +lucknow 0 +india 0 +like 0 +spend 0 +listn 0 +film 0 +song 0 +netsurf 0 +read 0 +comic 0 +hero 0 +calvin 0 +peopl 0 +seem 0 +offic 0 +randal 0 +apart 0 +comp 0 +dayton 0 +street 0 +best 0 +email 0 +wisc 0 +friend 0 +pictur 0 +wismad 0 +suggest 0 +send 0 +check 0 +guestbook 0 +page 0 +access 0 +sinc 0 +last 0 +updat 0 +copi 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..4ccb5f46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,32 @@ +salli 1 +peterson 0 +wisc 0 +comput 0 +home 0 +page 0 +goodwin 0 +lecturercomput 0 +scienc 0 +univers 0 +wisconsin 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +interest 0 +desktop 0 +real 0 +time 0 +oper 0 +system 0 +program 0 +languag 0 +cours 0 +taught 0 +fall 0 +comp 0 +lectur 0 +us 0 +last 0 +chang 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..031a72bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,129 @@ +databas 1 +marathon 0 +madison 0 +scienc 0 +amit 0 +home 0 +page 0 +comput 0 +univers 0 +interest 0 +process 0 +confer 0 +protocol 0 +wisconsin 0 +educ 0 +naughton 0 +technolog 0 +research 0 +onlin 0 +intern 0 +larg 0 +vldb 0 +server 0 +sort 0 +data 0 +inform 0 +transact 0 +olap 0 +warn 0 +reach 0 +offic 0 +email 0 +wisc 0 +snail 0 +mail 0 +dept 0 +princeton 0 +phone 0 +work 0 +toward 0 +guidanc 0 +jeff 0 +master 0 +bachelor 0 +engin 0 +indian 0 +institut 0 +madra 0 +analyt 0 +queri 0 +perform 0 +evalu 0 +public 0 +storag 0 +estim 0 +multidimension 0 +aggreg 0 +presenc 0 +hierarchi 0 +shukla 0 +prasad 0 +deshpand 0 +jeffrei 0 +karthikeyan 0 +ramasami 0 +mumbai 0 +bombai 0 +paper 0 +postscript 0 +slide 0 +present 0 +link 0 +run 0 +boston 0 +chicago 0 +york 0 +seattl 0 +georg 0 +utah 0 +relat 0 +pointer 0 +network 0 +activ 0 +bibliographi 0 +logic 0 +program 0 +competit 0 +profil 0 +spec 0 +idea 0 +name 0 +trier 0 +date 0 +sigmod 0 +base 0 +endow 0 +articl 0 +archiv 0 +massiv 0 +digit 0 +system 0 +mdd 0 +initi 0 +multimedia 0 +sourc 0 +nation 0 +industri 0 +infrastructur 0 +niiip 0 +consortium 0 +council 0 +transcoop 0 +manag 0 +support 0 +cooper 0 +applic 0 +introduct 0 +pilot 0 +softwar 0 +help 0 +needi 0 +children 0 +look 0 +asha 0 +person 0 +pageand 0 +bookmarksar 0 +also 0 +garfield 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..47e9c8b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,38 @@ +ashwin 1 +page 1 +depart 1 +home 0 +construct 0 +name 0 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madison 0 +come 0 +india 0 +hadmi 0 +undergradu 0 +educ 0 +indianinstitut 0 +technolog 0 +bombai 0 +iitb 0 +fantast 0 +place 0 +worth 0 +visit 0 +like 0 +contact 0 +canfing 0 +meto 0 +find 0 +whereabout 0 +altern 0 +send 0 +email 0 +sashwin 0 +wisc 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..ece45b53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,166 @@ +like 1 +also 0 +photo 0 +iitk 0 +much 0 +sastri 0 +year 0 +album 0 +scienc 0 +time 0 +cricket 0 +someth 0 +read 0 +subramanya 0 +home 0 +mine 0 +come 0 +hospet 0 +awai 0 +tungabhadra 0 +place 0 +dont 0 +long 0 +kanpur 0 +depart 0 +comput 0 +graduat 0 +wisconsin 0 +madison 0 +interest 0 +field 0 +favourit 0 +anyth 0 +compani 0 +friend 0 +pleasant 0 +watch 0 +back 0 +pagei 0 +mugshot 0 +town 0 +karnataka 0 +india 0 +school 0 +near 0 +hampi 0 +ruin 0 +vijayanagara 0 +empir 0 +built 0 +across 0 +river 0 +beauti 0 +unfortun 0 +photograph 0 +would 0 +scan 0 +undergradu 0 +indian 0 +institut 0 +technolog 0 +engin 0 +wonder 0 +fewphotograph 0 +gokul 0 +maintain 0 +contain 0 +mani 0 +class 0 +homepag 0 +inform 0 +classmatesat 0 +presentcurr 0 +student 0 +univers 0 +plan 0 +cours 0 +registeredfor 0 +spring 0 +interestsmi 0 +academ 0 +architectur 0 +program 0 +languag 0 +compil 0 +hope 0 +sport 0 +us 0 +playphatta 0 +tenni 0 +ball 0 +version 0 +thati 0 +champ 0 +provid 0 +entertainmentin 0 +bookmark 0 +link 0 +site 0 +enjoi 0 +listen 0 +music 0 +must 0 +consid 0 +hard 0 +rock 0 +metal 0 +realli 0 +donot 0 +whatev 0 +seinfeld 0 +sshow 0 +goe 0 +voraci 0 +reader 0 +rather 0 +whati 0 +better 0 +horror 0 +fantasi 0 +neither 0 +fiction 0 +unsuccesfulli 0 +tri 0 +grip 0 +earth 0 +know 0 +mean 0 +hint 0 +romanc 0 +ifposs 0 +jeffrei 0 +archer 0 +author 0 +date 0 +jane 0 +austen 0 +pride 0 +prejudic 0 +talk 0 +ramesh 0 +mahadeven 0 +sarticl 0 +make 0 +plai 0 +bridg 0 +pick 0 +wonderfulgam 0 +itagain 0 +solv 0 +crossword 0 +cryptic 0 +kind 0 +inth 0 +past 0 +devot 0 +hobbi 0 +last 0 +updat 0 +januari 0 +send 0 +comment 0 +suggest 0 +wisc 0 +eduunivers 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..e3a38f15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,219 @@ +scout 1 +servic 0 +internet 0 +project 0 +inform 0 +report 0 +scienc 0 +madison 0 +research 0 +wisconsin 0 +nation 0 +includ 0 +staff 0 +susan 0 +page 0 +support 0 +time 0 +join 0 +year 0 +intern 0 +open 0 +posit 0 +departmentunivers 0 +wisc 0 +manag 0 +comput 0 +depart 0 +sciencefound 0 +user 0 +educ 0 +commun 0 +provid 0 +tool 0 +goal 0 +andeduc 0 +work 0 +week 0 +organ 0 +toolkit 0 +kid 0 +peopl 0 +receiv 0 +email 0 +read 0 +resourc 0 +public 0 +devot 0 +merit 0 +also 0 +network 0 +base 0 +diego 0 +agreement 0 +reloc 0 +speciallibrarian 0 +systemadministr 0 +addit 0 +specif 0 +calcari 0 +home 0 +calcarimanag 0 +servicescomput 0 +madisonsc 0 +edumi 0 +titl 0 +univers 0 +scoutservic 0 +internicand 0 +thehigh 0 +bestresourc 0 +help 0 +effect 0 +filter 0 +hundr 0 +item 0 +edit 0 +import 0 +present 0 +multipl 0 +usabl 0 +format 0 +happen 0 +come 0 +soonth 0 +sprout 0 +newslett 0 +written 0 +andthousand 0 +annotatedlist 0 +best 0 +newli 0 +discov 0 +kind 0 +select 0 +itemsinclud 0 +issu 0 +happeningspost 0 +everi 0 +weekdai 0 +thousand 0 +orth 0 +newsgroup 0 +moreinform 0 +profession 0 +background 0 +involv 0 +wide 0 +sinc 0 +wheni 0 +arbor 0 +thensfnet 0 +backbon 0 +informationservic 0 +divis 0 +spent 0 +three 0 +speak 0 +tonat 0 +higher 0 +audienc 0 +internetand 0 +develop 0 +produc 0 +seminarseri 0 +first 0 +seminar 0 +seri 0 +focus 0 +need 0 +internetend 0 +later 0 +becam 0 +director 0 +forcerfnet 0 +respect 0 +wrote 0 +propos 0 +result 0 +award 0 +part 0 +internicproject 0 +cooper 0 +third 0 +termin 0 +theport 0 +chose 0 +continu 0 +workof 0 +elect 0 +andrequest 0 +approv 0 +thecomput 0 +heartilyagre 0 +futur 0 +plan 0 +servicesat 0 +jack 0 +solock 0 +expand 0 +theaddit 0 +june 0 +matthew 0 +livesei 0 +aproject 0 +assist 0 +expans 0 +thescout 0 +disciplin 0 +asscout 0 +area 0 +studi 0 +branch 0 +researcharea 0 +collabor 0 +group 0 +campus 0 +potenti 0 +topic 0 +includenetwork 0 +discoveri 0 +retriev 0 +nidr 0 +anddisciplin 0 +orient 0 +gather 0 +depend 0 +onth 0 +natur 0 +hire 0 +willincludecomput 0 +graduat 0 +undergradu 0 +level 0 +visit 0 +site 0 +find 0 +ofour 0 +mail 0 +list 0 +ifyou 0 +interest 0 +appli 0 +theonlin 0 +descript 0 +special 0 +librarian 0 +send 0 +aresum 0 +write 0 +sampl 0 +address 0 +feel 0 +free 0 +contactm 0 +telephon 0 +calcariinternet 0 +dayton 0 +street 0 +scal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..34dcf4a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,53 @@ +chandrasekar 1 +comput 1 +scienc 1 +page 0 +coimbator 0 +school 0 +educ 0 +madison 0 +home 0 +welcom 0 +homepag 0 +worri 0 +happi 0 +present 0 +graduat 0 +student 0 +depart 0 +past 0 +born 0 +june 0 +southern 0 +state 0 +tamilnadu 0 +inindia 0 +high 0 +higher 0 +secondari 0 +undergradu 0 +theindian 0 +institut 0 +technolog 0 +kharagpur 0 +major 0 +dept 0 +engin 0 +person 0 +stuff 0 +resid 0 +kendal 0 +avenu 0 +officedept 0 +dayton 0 +sivasankaran 0 +schandra 0 +wisc 0 +last 0 +updat 0 +finger 0 +find 0 +whereabout 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..88c6195f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,31 @@ +schnarr 1 +wisc 0 +wisconsin 0 +eric 0 +home 0 +pageer 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +advisor 0 +larusresearch 0 +interest 0 +architectur 0 +descript 0 +languagesfunct 0 +languag 0 +designinterest 0 +link 0 +wind 0 +tunnel 0 +sacm 0 +hockei 0 +club 0 +dragon 0 +byte 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..d1c0747d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,66 @@ +schoina 1 +yanni 0 +page 0 +research 0 +mark 0 +hill 0 +share 0 +memori 0 +ioanni 0 +babak 0 +falsafi 0 +alvin 0 +lebeck 0 +steven 0 +reinhardt 0 +jame 0 +laru 0 +david 0 +wood 0 +univers 0 +crete 0 +iraklio 0 +home 0 +wisc 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +note 0 +construct 0 +advisor 0 +interest 0 +parallel 0 +systemspubl 0 +fine 0 +grain 0 +access 0 +control 0 +distribut 0 +sixth 0 +intern 0 +confer 0 +architectur 0 +support 0 +programminglanguag 0 +oper 0 +system 0 +asplo 0 +applic 0 +specif 0 +protocol 0 +user 0 +level 0 +ann 0 +roger 0 +supercomput 0 +educ 0 +last 0 +updat 0 +juli 0 +cretan 0 +cook 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..4ddcca2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,37 @@ +comput 1 +beverli 0 +seavei 0 +danc 0 +ramayana 0 +india 0 +southeast 0 +asia 0 +finger 0 +home 0 +page 0 +current 0 +regist 0 +grad 0 +student 0 +scienc 0 +switch 0 +special 0 +degre 0 +biologi 0 +interest 0 +mine 0 +includ 0 +asian 0 +classic 0 +differ 0 +version 0 +drama 0 +ramakien 0 +wish 0 +could 0 +give 0 +account 0 +hairbal 0 +keyboard 0 +keeper 0 +instead 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..ed3e60c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,62 @@ +home 1 +page 0 +madison 0 +colvil 0 +wisc 0 +welcom 0 +school 0 +uwisc 0 +world 0 +scott 0 +pagescott 0 +mail 0 +eduoffic 0 +address 0 +comput 0 +scienc 0 +offic 0 +dayton 0 +franc 0 +well 0 +back 0 +univers 0 +wisconsin 0 +seen 0 +largest 0 +ball 0 +chees 0 +want 0 +link 0 +pagein 0 +addit 0 +list 0 +find 0 +interest 0 +hopefulli 0 +also 0 +enjoi 0 +beer 0 +drink 0 +game 0 +absolut 0 +add 0 +caffein 0 +rate 0 +soda 0 +guid 0 +lock 0 +pickingand 0 +educ 0 +artsi 0 +fact 0 +book 0 +constitut 0 +english 0 +dictionari 0 +roget 0 +thesauru 0 +poetri 0 +databas 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..f6977dbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,59 @@ +motion 1 +anim 0 +steve 0 +seitz 0 +imag 0 +analysi 0 +problem 0 +graphic 0 +model 0 +virtual 0 +writeup 0 +charact 0 +control 0 +chuck 0 +dyerour 0 +research 0 +motiv 0 +teachinga 0 +perform 0 +realist 0 +hasit 0 +root 0 +cartoon 0 +modern 0 +applic 0 +tocomput 0 +realiti 0 +teleconferenc 0 +robot 0 +task 0 +endow 0 +knowledg 0 +performa 0 +repertoir 0 +interest 0 +learn 0 +beinvok 0 +directli 0 +high 0 +level 0 +cue 0 +smile 0 +walk 0 +infer 0 +anabstract 0 +goal 0 +store 0 +cu 0 +levelev 0 +input 0 +devic 0 +sequenc 0 +project 0 +includ 0 +period 0 +track 0 +rigid 0 +nonrigid 0 +object 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..76f86ed4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,85 @@ +view 1 +interpol 1 +imag 1 +scene 0 +seitz 0 +synthesi 0 +techniqu 0 +morph 0 +work 0 +origin 0 +steve 0 +autom 0 +produc 0 +physic 0 +valid 0 +describ 0 +us 0 +investig 0 +chuck 0 +dyerw 0 +devis 0 +provabl 0 +correct 0 +creat 0 +basi 0 +reli 0 +geometr 0 +known 0 +graphicscommun 0 +intermedi 0 +although 0 +techniquescurr 0 +enjoi 0 +widespread 0 +theoret 0 +validityha 0 +establish 0 +particular 0 +viewsof 0 +sequenc 0 +ofthat 0 +surprisingli 0 +answer 0 +provid 0 +first 0 +undergo 0 +simplerectif 0 +procedur 0 +certain 0 +assumpt 0 +visibl 0 +theproject 0 +process 0 +satisfi 0 +stereo 0 +todetermin 0 +correspond 0 +recent 0 +consid 0 +user 0 +interact 0 +guid 0 +comput 0 +three 0 +differ 0 +pair 0 +therectifi 0 +shown 0 +left 0 +right 0 +click 0 +theinterpol 0 +center 0 +mpeg 0 +movi 0 +show 0 +computedinterpol 0 +dyer 0 +proc 0 +workshop 0 +represent 0 +visual 0 +last 0 +chang 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..3a971583 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,94 @@ +morph 1 +view 0 +imag 0 +mpeg 0 +movi 0 +interpol 0 +scene 0 +techniqu 0 +differ 0 +viewpoint 0 +click 0 +resolut 0 +steve 0 +seitz 0 +chuck 0 +dyer 0 +appear 0 +us 0 +object 0 +pose 0 +simpl 0 +camera 0 +shape 0 +chang 0 +produc 0 +mona 0 +lisa 0 +frame 0 +investig 0 +relat 0 +public 0 +siggraph 0 +toward 0 +base 0 +represent 0 +icpr 0 +gener 0 +compel 0 +transit 0 +betweenimag 0 +howev 0 +often 0 +causeunnatur 0 +distort 0 +difficult 0 +correct 0 +manual 0 +basic 0 +principl 0 +projectivegeometri 0 +paper 0 +introduc 0 +extens 0 +morphingthat 0 +correctli 0 +handl 0 +project 0 +transform 0 +call 0 +work 0 +prewarp 0 +imagesprior 0 +comput 0 +postwarp 0 +knowledg 0 +requir 0 +appliedto 0 +photograph 0 +draw 0 +well 0 +render 0 +abil 0 +synthes 0 +structureafford 0 +wide 0 +varieti 0 +interest 0 +effect 0 +imagetransform 0 +taken 0 +illus 0 +physic 0 +move 0 +virtual 0 +face 0 +simultan 0 +facial 0 +color 0 +reflect 0 +high 0 +jude 0 +shavlik 0 +last 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..375faf6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,127 @@ +period 1 +imag 0 +motion 0 +trace 0 +inform 0 +refer 0 +cycl 0 +cyclic 0 +frame 0 +determin 0 +correspond 0 +tempor 0 +composit 0 +real 0 +move 0 +sequenc 0 +object 0 +poscript 0 +paper 0 +recov 0 +turntabl 0 +scene 0 +pure 0 +us 0 +enhanc 0 +invari 0 +analysi 0 +steve 0 +seitz 0 +chuck 0 +dyermani 0 +life 0 +instanc 0 +human 0 +locomotori 0 +walk 0 +run 0 +skip 0 +shuffl 0 +areperiod 0 +person 0 +havedevelop 0 +approach 0 +could 0 +beenproduc 0 +whose 0 +unlik 0 +previou 0 +attempt 0 +ourapproach 0 +allow 0 +camera 0 +film 0 +click 0 +tracethi 0 +show 0 +line 0 +imagesequ 0 +phonograph 0 +ramp 0 +moment 0 +timewher 0 +momentarili 0 +slow 0 +shownsuperimpos 0 +error 0 +surfac 0 +repeat 0 +tend 0 +perfectli 0 +even 0 +variesslightli 0 +next 0 +physic 0 +import 0 +changesin 0 +gener 0 +defin 0 +motionsthat 0 +make 0 +variat 0 +explicit 0 +represent 0 +call 0 +compact 0 +describ 0 +evolutionof 0 +without 0 +spatial 0 +quantiti 0 +asposit 0 +veloc 0 +delimit 0 +identifi 0 +correspondencesacross 0 +provid 0 +mean 0 +parsinga 0 +addit 0 +sever 0 +featur 0 +canb 0 +deriv 0 +relat 0 +natur 0 +locat 0 +irregular 0 +tracecan 0 +also 0 +medic 0 +fromdiffer 0 +furthermor 0 +reliabl 0 +recoveredfrom 0 +view 0 +fashion 0 +theori 0 +affin 0 +clickher 0 +heart 0 +angiograph 0 +bottom 0 +note 0 +additionalstructur 0 +visibl 0 +appar 0 +singl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..1a81a189 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,47 @@ +seitz 1 +imag 0 +comput 0 +steve 0 +motion 0 +analysi 0 +vision 0 +view 0 +mpeg 0 +movi 0 +click 0 +home 0 +page 0 +wisc 0 +graduat 0 +student 0 +berkelei 0 +math 0 +area 0 +interest 0 +base 0 +render 0 +machin 0 +graphic 0 +research 0 +project 0 +morph 0 +synthesi 0 +show 0 +interpol 0 +left 0 +exampl 0 +cyclic 0 +recent 0 +public 0 +stuff 0 +frequent 0 +link 0 +wisconsin 0 +group 0 +surreal 0 +cach 0 +closer 0 +look 0 +last 0 +chang 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..090c5818 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,102 @@ +madison 1 +group 0 +mlrg 0 +learn 0 +machin 0 +page 0 +schedul 0 +graduat 0 +home 0 +archiv 0 +recent 0 +paper 0 +local 0 +link 0 +contain 0 +relev 0 +read 0 +seminar 0 +shavlik 0 +also 0 +comput 0 +research 0 +wisconsin 0 +tabl 0 +content 0 +member 0 +dataset 0 +domain 0 +cours 0 +august 0 +jude 0 +access 0 +current 0 +dept 0 +librari 0 +wisc 0 +agent 0 +inform 0 +thememb 0 +univers 0 +theori 0 +us 0 +extern 0 +carolyn 0 +allex 0 +jonathon 0 +bodner 0 +kevin 0 +cherkauer 0 +mark 0 +craven 0 +tina 0 +eliassi 0 +richard 0 +maclin 0 +david 0 +opitz 0 +papersvisit 0 +describ 0 +public 0 +ascii 0 +file 0 +list 0 +recentabstractsi 0 +avail 0 +theoriesy 0 +directori 0 +severalml 0 +testb 0 +breast 0 +cancer 0 +databas 0 +prof 0 +olvi 0 +mangasarian 0 +sgroup 0 +line 0 +math 0 +program 0 +biologi 0 +neurosci 0 +vision 0 +robot 0 +doit 0 +center 0 +mathemat 0 +scienc 0 +gopher 0 +school 0 +abstract 0 +select 0 +journal 0 +mostli 0 +wendt 0 +readabl 0 +proc 0 +workshop 0 +held 0 +intern 0 +confer 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..4a08d31c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,96 @@ +wisconsin 1 +shubu 0 +mukherje 0 +madison 0 +research 0 +comput 0 +isca 0 +home 0 +page 0 +wind 0 +tunnel 0 +public 0 +architect 0 +interest 0 +univers 0 +coher 0 +share 0 +memori 0 +cach 0 +protocol 0 +ppopp 0 +simul 0 +wisc 0 +fiance 0 +mimi 0 +nephew 0 +avirup 0 +month 0 +graduat 0 +assist 0 +scienc 0 +departmentunivers 0 +west 0 +dayton 0 +street 0 +usaphon 0 +shubhendu 0 +click 0 +button 0 +jump 0 +correspond 0 +articl 0 +advisor 0 +mark 0 +hill 0 +project 0 +world 0 +wide 0 +badger 0 +ballroom 0 +danc 0 +team 0 +person 0 +hobbi 0 +morph 0 +dionisio 0 +courtesi 0 +steve 0 +seitz 0 +random 0 +linkseducationph 0 +spring 0 +expect 0 +tech 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +india 0 +summari 0 +network 0 +interfac 0 +dissert 0 +cachabl 0 +queue 0 +design 0 +space 0 +progress 0 +distribut 0 +mechan 0 +cooper 0 +commod 0 +workstat 0 +submit 0 +custom 0 +irregular 0 +applic 0 +grai 0 +softwar 0 +dirsw 0 +parallel 0 +tutori 0 +copyright 0 +copi 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..a2fc8b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,25 @@ +michael 1 +siff 1 +home 0 +page 0 +philosophi 0 +research 0 +academ 0 +interest 0 +run 0 +club 0 +fall 0 +midwest 0 +seminar 0 +wonder 0 +wai 0 +wast 0 +time 0 +new 0 +inform 0 +resourc 0 +book 0 +movi 0 +televis 0 +sport 0 +humor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..4f26c9be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,24 @@ +skrentni 1 +wisconsin 1 +madison 0 +wisc 0 +univers 0 +home 0 +page 0 +lecturerc 0 +coordinatorgradu 0 +studentcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +offic 0 +comput 0 +sciencesemail 0 +edutelephon 0 +relat 0 +link 0 +depart 0 +groupskrentni 0 +last 0 +updat 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..8e6b86ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,53 @@ +wisconsin 1 +univers 1 +madison 0 +scienc 0 +comput 0 +system 0 +unix 0 +bryan 0 +intellig 0 +help 0 +knowledg 0 +represent 0 +travi 0 +util 0 +home 0 +page 0 +graduat 0 +studentcomput 0 +departmentunivers 0 +dayton 0 +offic 0 +mail 0 +wisc 0 +edutelephon 0 +purdu 0 +interest 0 +human 0 +interact 0 +oper 0 +activ 0 +select 0 +recent 0 +public 0 +step 0 +toward 0 +technic 0 +report 0 +april 0 +miller 0 +fredriksen 0 +empir 0 +studi 0 +reliabl 0 +commun 0 +relat 0 +link 0 +depart 0 +group 0 +professor 0 +larri 0 +advisor 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..b76e6a4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,128 @@ +system 1 +intellig 1 +artifici 0 +wisc 0 +expert 0 +comput 0 +data 0 +databas 0 +larri 0 +travi 0 +interest 0 +procedur 0 +control 0 +deduct 0 +support 0 +social 0 +research 0 +recent 0 +larg 0 +inform 0 +integr 0 +develop 0 +project 0 +altern 0 +metaphor 0 +west 0 +magazin 0 +home 0 +page 0 +travisprofessorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +univers 0 +californa 0 +angel 0 +automat 0 +understand 0 +complex 0 +philosoph 0 +foundat 0 +ofartifici 0 +manag 0 +implic 0 +summari 0 +center 0 +around 0 +us 0 +logic 0 +basi 0 +knowledg 0 +formal 0 +augment 0 +databasesystem 0 +work 0 +focus 0 +automaticdeduct 0 +design 0 +contruct 0 +displai 0 +test 0 +high 0 +level 0 +abstract 0 +pattern 0 +form 0 +informationcontain 0 +heterogen 0 +special 0 +attent 0 +beingdevot 0 +represent 0 +geograph 0 +waysthat 0 +enhanc 0 +visualiz 0 +map 0 +activ 0 +involv 0 +sever 0 +andwith 0 +incorpor 0 +model 0 +visual 0 +aid 0 +singl 0 +organiz 0 +issu 0 +associ 0 +introduct 0 +technolog 0 +analysi 0 +suppositionsunderli 0 +approach 0 +current 0 +student 0 +chuck 0 +ohar 0 +bryan 0 +scott 0 +swanson 0 +andi 0 +whitsitt 0 +derek 0 +zahn 0 +public 0 +could 0 +failur 0 +implement 0 +oravec 0 +appear 0 +journal 0 +softwar 0 +reflex 0 +examin 0 +falsework 0 +societi 0 +landscap 0 +link 0 +dept 0 +group 0 +last 0 +chang 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..b44b1251 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,63 @@ +comput 1 +sodani 0 +depart 0 +scienc 0 +madison 0 +india 0 +link 0 +avinash 0 +page 0 +univers 0 +wisconsin 0 +interest 0 +project 0 +packag 0 +relat 0 +home 0 +wisc 0 +graduat 0 +student 0 +west 0 +dayton 0 +street 0 +educ 0 +tech 0 +hon 0 +indian 0 +institut 0 +technolog 0 +kharagpur 0 +juli 0 +academ 0 +architectur 0 +multiscalar 0 +kestrel 0 +program 0 +languag 0 +compil 0 +cours 0 +java 0 +download 0 +meet 0 +batch 0 +mate 0 +info 0 +center 0 +rank 0 +new 0 +hindu 0 +onlin 0 +edit 0 +random 0 +look 0 +kgpite 0 +follow 0 +toll 0 +free 0 +directori 0 +will 0 +world 0 +cricket 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..857308db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,252 @@ +sohi 1 +comput 0 +symposium 0 +austin 0 +intern 0 +architectur 0 +processor 0 +multiscalar 0 +scienc 0 +parallel 0 +research 0 +current 0 +univers 0 +instruct 0 +memori 0 +program 0 +microarchitectur 0 +recent 0 +depart 0 +wisconsin 0 +madison 0 +execut 0 +breach 0 +annual 0 +decemb 0 +dynam 0 +franklin 0 +pnevmatikato 0 +engin 0 +system 0 +gener 0 +micro 0 +technic 0 +report 0 +address 0 +level 0 +multiprocessor 0 +design 0 +perform 0 +cycl 0 +character 0 +exploit 0 +novemb 0 +file 0 +load 0 +ieee 0 +vijaykumar 0 +access 0 +guard 0 +predict 0 +fine 0 +grain 0 +wisc 0 +educ 0 +interest 0 +summari 0 +graduat 0 +student 0 +talk 0 +illinoi 0 +urbana 0 +electr 0 +process 0 +compil 0 +investig 0 +ordinari 0 +group 0 +sever 0 +issu 0 +studi 0 +avail 0 +paradigm 0 +given 0 +compress 0 +postscript 0 +framemak 0 +appear 0 +zero 0 +reduc 0 +hardwar 0 +mechan 0 +transact 0 +streamlin 0 +regist 0 +effici 0 +detect 0 +pointer 0 +arrai 0 +branch 0 +analysi 0 +april 0 +guri 0 +home 0 +page 0 +gurindar 0 +associ 0 +professor 0 +andelectr 0 +public 0 +graduatesaddress 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usasohi 0 +eduphon 0 +department 0 +offic 0 +elect 0 +electron 0 +birla 0 +institut 0 +technolog 0 +pilani 0 +india 0 +share 0 +focus 0 +thehighest 0 +uniprocessor 0 +circa 0 +plenti 0 +transistor 0 +availableon 0 +chip 0 +challeng 0 +resourc 0 +getth 0 +highest 0 +possibl 0 +sequenti 0 +target 0 +sustain 0 +ofov 0 +numer 0 +applic 0 +needto 0 +resolv 0 +goal 0 +achiev 0 +thenatur 0 +numericappl 0 +order 0 +understand 0 +andhow 0 +could 0 +bulk 0 +effort 0 +expend 0 +continu 0 +thedevelop 0 +model 0 +novel 0 +develop 0 +andcarri 0 +detail 0 +simul 0 +assessth 0 +potenti 0 +concept 0 +todd 0 +scott 0 +andrea 0 +moshovo 0 +vijaykumarrec 0 +talkswil 0 +set 0 +import 0 +futur 0 +risc 0 +held 0 +watson 0 +researchcent 0 +yorktown 0 +height 0 +place 0 +publicationshigh 0 +bandwidth 0 +translat 0 +multipl 0 +inrd 0 +appendix 0 +ofdetail 0 +resultsi 0 +also 0 +support 0 +latencyt 0 +superscalar 0 +processorsj 0 +smith 0 +proceed 0 +reorder 0 +referencesm 0 +data 0 +cach 0 +fast 0 +calcul 0 +anatomi 0 +request 0 +combin 0 +arbitrari 0 +interconnect 0 +network 0 +lebeck 0 +distribut 0 +error 0 +sigplan 0 +confer 0 +languag 0 +implement 0 +goodman 0 +handbook 0 +press 0 +control 0 +flow 0 +traffic 0 +inter 0 +oper 0 +communicationin 0 +expand 0 +split 0 +window 0 +depend 0 +errorst 0 +processorsd 0 +knapsack 0 +hierarchi 0 +componentt 0 +tetra 0 +evalu 0 +serial 0 +processorst 0 +juli 0 +gradstodd 0 +softwar 0 +latencydionisio 0 +incorpor 0 +exist 0 +setsmanoj 0 +architecturemark 0 +friedman 0 +januari 0 +prolog 0 +executionsriram 0 +vajapeyam 0 +crai 0 +processormen 0 +chow 0 +chiang 0 +septemb 0 +base 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..17c6a28a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,118 @@ +solomon 1 +comput 0 +manag 0 +home 0 +page 0 +marvin 0 +scienc 0 +softwar 0 +program 0 +proc 0 +conf 0 +data 0 +michael 0 +andmarvin 0 +astech 0 +report 0 +chair 0 +depart 0 +wisconsin 0 +madison 0 +interest 0 +object 0 +databas 0 +system 0 +develop 0 +environ 0 +languag 0 +sigmod 0 +june 0 +mark 0 +mcauliff 0 +carei 0 +odyssea 0 +tsatalo 0 +version 0 +appear 0 +persist 0 +paul 0 +adam 0 +avail 0 +shore 0 +project 0 +professor 0 +former 0 +goodman 0 +univers 0 +west 0 +dayton 0 +street 0 +phone 0 +wisc 0 +research 0 +orient 0 +support 0 +distribut 0 +oper 0 +network 0 +design 0 +implement 0 +theori 0 +recent 0 +publicationstoward 0 +effect 0 +effici 0 +free 0 +space 0 +abstractpostscriptth 0 +gmap 0 +versatil 0 +tool 0 +physic 0 +independ 0 +larg 0 +septemb 0 +andyanni 0 +ioannidi 0 +abstractpostscriptexpand 0 +inth 0 +vldb 0 +journalv 0 +april 0 +abstractpostscriptshor 0 +applic 0 +david 0 +dewitt 0 +franklin 0 +nanci 0 +hall 0 +jeffrei 0 +naughton 0 +daniel 0 +schuh 0 +seth 0 +white 0 +andmichael 0 +zwillingavail 0 +overview 0 +capitl 0 +fourth 0 +intern 0 +workshop 0 +configur 0 +updat 0 +lectur 0 +note 0 +logic 0 +point 0 +graphic 0 +interfac 0 +room 0 +built 0 +us 0 +java 0 +spring 0 +univ 0 +photoalbum 0 +todai 0 +dilbert 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..88bc42fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,6 @@ +sowmya 1 +home 0 +page 0 +welcom 0 +subramanian 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..f8cb1d37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,78 @@ +shilpa 1 +comput 1 +scienc 1 +lawand 0 +home 0 +page 0 +wisconsin 0 +link 0 +stuff 0 +wisc 0 +welcom 0 +pagei 0 +graduat 0 +student 0 +depart 0 +univers 0 +madison 0 +person 0 +stuffa 0 +pastfor 0 +info 0 +schoolher 0 +resum 0 +html 0 +ascii 0 +second 0 +love 0 +us 0 +resours 0 +want 0 +place 0 +syster 0 +women 0 +relat 0 +madisonsurf 0 +madisonst 0 +inform 0 +serverth 0 +hoofer 0 +sail 0 +clubowl 0 +music 0 +book 0 +movi 0 +java 0 +signatur 0 +meet 0 +first 0 +lovesnowi 0 +homepag 0 +cool 0 +linksher 0 +iswher 0 +finger 0 +three 0 +judg 0 +panel 0 +philadelphia 0 +vote 0 +constitut 0 +follow 0 +read 0 +decis 0 +access 0 +time 0 +sinc 0 +june 0 +send 0 +comment 0 +suggest 0 +email 0 +tossl 0 +shilpal 0 +thru 0 +guest 0 +formlast 0 +modifi 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..7828782b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,32 @@ +home 1 +comput 0 +scienc 0 +page 0 +jeremi 0 +stenglein 0 +depart 0 +madison 0 +section 0 +graduat 0 +student 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +offic 0 +phone 0 +mail 0 +stenglei 0 +wisc 0 +teach 0 +gener 0 +take 0 +construct 0 +compil 0 +link 0 +pageth 0 +simpson 0 +pageespn 0 +sport 0 +hotwir 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..718363a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,57 @@ +reinhardt 1 +page 1 +wisconsin 1 +depart 1 +steve 0 +home 0 +steven 0 +graduat 0 +student 0 +computerarchitectur 0 +work 0 +wind 0 +tunnelgroup 0 +advisor 0 +david 0 +wood 0 +although 0 +project 0 +mark 0 +hill 0 +andjim 0 +laru 0 +often 0 +feel 0 +free 0 +tell 0 +mewhat 0 +well 0 +plan 0 +finish 0 +fall 0 +join 0 +faculti 0 +ofth 0 +univers 0 +michigan 0 +eec 0 +januari 0 +interest 0 +find 0 +publicationsresearch 0 +summari 0 +email 0 +stever 0 +wisc 0 +click 0 +finger 0 +phone 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usalast 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..87c08fee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,58 @@ +comput 1 +john 0 +home 0 +strikwerda 0 +page 0 +scienc 0 +wisconsin 0 +inform 0 +numer 0 +analysi 0 +chicago 0 +nathan 0 +professor 0 +strikwerdadepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +strik 0 +wisc 0 +telephon 0 +fall 0 +teach 0 +problem 0 +solv 0 +us 0 +begin 0 +januari 0 +assign 0 +nation 0 +foundat 0 +year 0 +click 0 +qualifi 0 +exam 0 +research 0 +interest 0 +fluid 0 +dynamicsmyoffici 0 +depart 0 +pageoth 0 +stuff 0 +field 0 +museum 0 +point 0 +search 0 +rate 0 +best 0 +tribun 0 +talk 0 +radio 0 +show 0 +car 0 +footballmi 0 +kid 0 +drew 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..5cb4ef65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,30 @@ +interest 1 +subba 0 +home 0 +page 0 +thing 0 +enjoi 0 +calvin 0 +hobb 0 +late 0 +show 0 +david 0 +letterman 0 +seinfeld 0 +prooocessor 0 +histor 0 +paper 0 +evalu 0 +stream 0 +buffer 0 +secondari 0 +cach 0 +replac 0 +decoupl 0 +integ 0 +execut 0 +superscalar 0 +processor 0 +subbarao 0 +cambridg 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..1df680a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,83 @@ +parallel 1 +taiwan 1 +chiang 0 +page 0 +processor 0 +schedul 0 +mari 0 +vernon 0 +home 0 +depart 0 +univers 0 +wisconsin 0 +madisonoffic 0 +stelephon 0 +mail 0 +suhui 0 +wisc 0 +educlick 0 +send 0 +emailoffic 0 +hour 0 +thur 0 +still 0 +construct 0 +ta 0 +fall 0 +public 0 +applic 0 +characterist 0 +limit 0 +preemption 0 +complet 0 +polici 0 +rajesh 0 +mansharamani 0 +proc 0 +sigmetr 0 +conf 0 +measur 0 +model 0 +comput 0 +system 0 +nashvil 0 +dynam 0 +static 0 +quantum 0 +base 0 +alloc 0 +workshop 0 +strategi 0 +process 0 +conjunct 0 +ipp 0 +april 0 +search 0 +engin 0 +yahoo 0 +sourc 0 +resourc 0 +bibliographi 0 +world 0 +wide 0 +virtual 0 +librari 0 +subject 0 +catalogu 0 +link 0 +relat 0 +network 0 +servic 0 +sinanet 0 +shop 0 +magzin 0 +new 0 +job 0 +calendar 0 +seednet 0 +vistor 0 +guid 0 +academia 0 +sinica 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..652fe768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,192 @@ +link 1 +page 0 +site 0 +new 0 +madison 0 +weather 0 +scienc 0 +current 0 +info 0 +variou 0 +interest 0 +articl 0 +david 0 +sundaram 0 +stukel 0 +browbeck 0 +feet 0 +class 0 +take 0 +comput 0 +view 0 +specif 0 +philosoph 0 +scientif 0 +onlin 0 +nation 0 +astronomi 0 +channel 0 +homepag 0 +upon 0 +effronteri 0 +push 0 +hand 0 +sever 0 +patient 0 +femor 0 +arteri 0 +blood 0 +spurt 0 +blind 0 +anesthetist 0 +hall 0 +scream 0 +tri 0 +knee 0 +groin 0 +manag 0 +hamstr 0 +scalpel 0 +crawl 0 +floor 0 +stab 0 +leg 0 +voilet 0 +baboon 0 +assist 0 +woman 0 +ever 0 +care 0 +damn 0 +realli 0 +wig 0 +climb 0 +tabl 0 +pois 0 +jump 0 +stomp 0 +cop 0 +rush 0 +william 0 +burrough 0 +nake 0 +lunch 0 +construct 0 +catapult 0 +reader 0 +choos 0 +index 0 +brief 0 +relat 0 +dedic 0 +smart 0 +cloth 0 +also 0 +steve 0 +mann 0 +see 0 +wearabl 0 +camera 0 +inform 0 +artifici 0 +life 0 +santa 0 +institut 0 +project 0 +call 0 +tierra 0 +thoma 0 +recent 0 +dilbert 0 +strip 0 +technic 0 +math 0 +joke 0 +somewher 0 +artist 0 +natur 0 +physic 0 +conscious 0 +surviv 0 +research 0 +laboratori 0 +destruct 0 +show 0 +organ 0 +arcosanti 0 +arcolog 0 +outsid 0 +phoenix 0 +krishnamurti 0 +foundat 0 +tell 0 +centuri 0 +beat 0 +writer 0 +includ 0 +pictur 0 +fill 0 +wait 0 +sourc 0 +packer 0 +scientist 0 +regist 0 +harass 0 +mail 0 +reward 0 +dozen 0 +factoid 0 +astound 0 +friend 0 +american 0 +advantag 0 +hypertext 0 +addit 0 +provid 0 +select 0 +print 0 +edit 0 +publish 0 +weekli 0 +contain 0 +smaller 0 +hindu 0 +newspap 0 +india 0 +onion 0 +local 0 +depart 0 +washburn 0 +observatori 0 +public 0 +univers 0 +len 0 +insignific 0 +piec 0 +histori 0 +obtain 0 +follow 0 +webweath 0 +servic 0 +home 0 +late 0 +timothi 0 +leari 0 +numer 0 +written 0 +note 0 +optimist 0 +noam 0 +chomski 0 +disinform 0 +great 0 +list 0 +conspiraci 0 +theori 0 +buri 0 +within 0 +ultra 0 +trendi 0 +movi 0 +review 0 +back 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..9668eebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,9 @@ +swander 1 +offic 1 +brian 0 +home 0 +pagebrian 0 +think 0 +hour 0 +bookmark 0 +mark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..7a9ac597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,107 @@ +system 1 +perform 1 +snowboard 1 +tamch 0 +oper 0 +talk 0 +paper 0 +ariel 0 +comput 0 +scienc 0 +fall 0 +parallel 0 +distribut 0 +differ 0 +municip 0 +bond 0 +share 0 +memori 0 +spring 0 +network 0 +file 0 +version 0 +tamchesari 0 +research 0 +assistantemail 0 +wisc 0 +depart 0 +west 0 +dayton 0 +street 0 +madison 0 +typic 0 +pose 0 +angri 0 +posei 0 +organ 0 +colleg 0 +park 0 +offic 0 +sresearch 0 +paradyn 0 +toolsstatu 0 +search 0 +thesi 0 +topic 0 +els 0 +interest 0 +toolsparallel 0 +systemsbluesth 0 +simpsonsseinfeldskiingskinetkeyston 0 +favorit 0 +area 0 +joke 0 +vacum 0 +cleaner 0 +dirt 0 +attach 0 +greet 0 +peopl 0 +whoa 0 +sorri 0 +dude 0 +eventu 0 +matur 0 +gener 0 +incom 0 +hate 0 +countri 0 +music 0 +fortran 0 +cool 0 +link 0 +yahooespncpu 0 +infoskinetoth 0 +stuff 0 +exokernel 0 +architectur 0 +applic 0 +level 0 +resourc 0 +manag 0 +octob 0 +techniqu 0 +tool 0 +improv 0 +callaghan 0 +supercomput 0 +interconnect 0 +april 0 +zebra 0 +stripe 0 +need 0 +structur 0 +raid 0 +block 0 +wait 0 +free 0 +highli 0 +concurr 0 +object 0 +asynchron 0 +multiprocessor 0 +postscript 0 +analysi 0 +risc 0 +instruct 0 +enhanc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..6bb9ce08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,102 @@ +pictur 1 +home 0 +page 0 +look 0 +show 0 +lampert 0 +good 0 +like 0 +babi 0 +someon 0 +tick 0 +turn 0 +click 0 +take 0 +work 0 +music 0 +song 0 +sound 0 +jeff 0 +pagejeff 0 +ricardo 0 +montalban 0 +voic 0 +welcom 0 +know 0 +nota 0 +least 0 +still 0 +foron 0 +incrimin 0 +doesn 0 +make 0 +aconvict 0 +high 0 +school 0 +yearbook 0 +lasttim 0 +never 0 +heard 0 +cute 0 +think 0 +well 0 +found 0 +coupl 0 +threaten 0 +intoa 0 +human 0 +dispens 0 +took 0 +henc 0 +befound 0 +separ 0 +anautograph 0 +copi 0 +sign 0 +name 0 +monitor 0 +pictureappear 0 +choos 0 +link 0 +weasel 0 +seek 0 +pace 0 +basic 0 +factswho 0 +person 0 +last 0 +night 0 +academ 0 +relatedwhat 0 +class 0 +dept 0 +resum 0 +entertainmentbook 0 +movi 0 +program 0 +newsgroup 0 +import 0 +subjectsfriendsno 0 +sick 0 +theme 0 +hobbi 0 +club 0 +organizationsgroup 0 +plu 0 +wish 0 +inmi 0 +favorit 0 +linksugh 0 +servo 0 +juli 0 +andrew 0 +fire 0 +crow 0 +mstk 0 +eclect 0 +paraphenaliai 0 +would 0 +miscellan 0 +straight 0 +forward 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..b9bf0cad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,18 @@ +mathematicalprogram 1 +todd 0 +homepagetodd 0 +homepagein 0 +fall 0 +teach 0 +section 0 +sinc 0 +area 0 +mathemat 0 +program 0 +plug 0 +page 0 +contain 0 +wealth 0 +inform 0 +tmunson 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..efcb0523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian 1 +home 1 +pagebrian 1 +toonen 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +streetmadison 1 +offic 1 +cswhatev 1 +chief 1 +seattleth 1 +ground 1 +tipi 1 +medit 1 +life 1 +itsmean 1 +accept 1 +kinship 1 +creatur 1 +acknowledgingun 1 +univers 1 +thing 1 +infus 1 +thetru 1 +essenc 1 +civil 1 +luther 1 +stand 1 +bear 1 +oglala 1 +siouxlast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..30a0f2fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,15 @@ +page 1 +home 0 +netscap 0 +thano 0 +tsioli 0 +site 0 +enhanc 0 +read 0 +shouldconsid 0 +upgrad 0 +browser 0 +latest 0 +version 0 +ifthat 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..ad6b18b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,56 @@ +madison 1 +todd 0 +turnidg 0 +wisconsin 0 +axi 0 +ohio 0 +dougla 0 +turnidgeschoolcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +homemuppet 0 +babylon 0 +milton 0 +eyesightright 0 +left 0 +graduat 0 +student 0 +depart 0 +comput 0 +sciencesat 0 +univers 0 +year 0 +work 0 +professorthoma 0 +rep 0 +studyingprogram 0 +languag 0 +teach 0 +section 0 +hold 0 +mathematicsand 0 +computersci 0 +case 0 +western 0 +reserveunivers 0 +locat 0 +cleveland 0 +origin 0 +kent 0 +myfamili 0 +live 0 +judg 0 +compani 0 +keep 0 +click 0 +enough 0 +evid 0 +awai 0 +long 0 +time 0 +amus 0 +shortcut 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..64284045 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,37 @@ +home 1 +page 1 +taxiao 0 +wang 0 +sinc 0 +madison 0 +offic 0 +phone 0 +visitor 0 +number 0 +welcom 0 +heavi 0 +construct 0 +click 0 +finger 0 +contact 0 +inform 0 +graduat 0 +student 0 +teach 0 +assist 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +bldg 0 +dayton 0 +street 0 +mail 0 +twang 0 +wisc 0 +visit 0 +time 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..a31d2d95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,12 @@ +shaft 1 +home 1 +trek 1 +meet 1 +pageuri 0 +pageemail 0 +wisc 0 +eduinterest 0 +diversionsstart 0 +microsoft 0 +start 0 +window 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..e1c8cee7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,60 @@ +madison 1 +india 0 +asha 0 +ganti 0 +graduat 0 +student 0 +madra 0 +godav 0 +homepag 0 +databas 0 +real 0 +time 0 +venkatesh 0 +home 0 +pagevenkatesh 0 +vganti 0 +wisc 0 +studentoffic 0 +comput 0 +scienc 0 +depart 0 +dayton 0 +usaphon 0 +note 0 +page 0 +construct 0 +past 0 +present 0 +univers 0 +wisconsin 0 +fall 0 +earlier 0 +undergradu 0 +nativ 0 +kakinada 0 +andhra 0 +pradesh 0 +info 0 +basic 0 +educ 0 +click 0 +know 0 +hostel 0 +yearbook 0 +hope 0 +onlin 0 +sometim 0 +research 0 +interest 0 +work 0 +till 0 +btech 0 +project 0 +want 0 +look 0 +genesi 0 +group 0 +last 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..d8cbe45c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,62 @@ +multiscalar 1 +regist 0 +vijaykumar 0 +sohi 0 +commun 0 +vijai 0 +comput 0 +scienc 0 +madison 0 +compil 0 +architectur 0 +processor 0 +breach 0 +intern 0 +symposium 0 +wisc 0 +depart 0 +univers 0 +wisconsin 0 +project 0 +file 0 +annual 0 +microarchitectur 0 +micro 0 +home 0 +page 0 +profession 0 +affili 0 +contact 0 +address 0 +dayton 0 +street 0 +phone 0 +email 0 +advisor 0 +guri 0 +educ 0 +doctor 0 +august 0 +undergradu 0 +birla 0 +institut 0 +technolog 0 +pilani 0 +india 0 +research 0 +dissert 0 +distribut 0 +design 0 +anatomi 0 +strategi 0 +submit 0 +schedul 0 +architecturet 0 +go 0 +work 0 +memori 0 +data 0 +depend 0 +predict 0 +person 0 +side 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..fe848cd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,74 @@ +comput 1 +quantum 1 +watrou 0 +john 0 +scienc 0 +link 0 +bibliographi 0 +home 0 +theori 0 +inform 0 +page 0 +laboratori 0 +pagejohn 0 +wisc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +dayton 0 +streetmadison 0 +telephon 0 +public 0 +dimension 0 +cellular 0 +automata 0 +proc 0 +symp 0 +foundat 0 +polynomi 0 +time 0 +algorithm 0 +artin 0 +whapl 0 +approxim 0 +theorem 0 +number 0 +fourth 0 +confer 0 +canadiannumb 0 +associ 0 +assort 0 +archiv 0 +stanford 0 +oxford 0 +particl 0 +beam 0 +physic 0 +ucla 0 +theoret 0 +montreal 0 +lanl 0 +preprint 0 +hypertext 0 +project 0 +hypatia 0 +gener 0 +refer 0 +element 0 +stylehypertext 0 +webster 0 +interfaceroget 0 +thesauru 0 +random 0 +parasol 0 +recordsplayst 0 +linksweath 0 +forecast 0 +madisonth 0 +isthmu 0 +daili 0 +pagemathemat 0 +quotat 0 +servermathematician 0 +biographiesgeek 0 +site 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..26afe9b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,108 @@ +entertain 1 +univers 1 +send 0 +date 0 +food 0 +affect 0 +home 0 +page 0 +recit 0 +plai 0 +ship 0 +world 0 +show 0 +movi 0 +weiru 0 +eiru 0 +email 0 +ppppleas 0 +find 0 +around 0 +sometim 0 +think 0 +english 0 +speaker 0 +commit 0 +asylum 0 +verbal 0 +insan 0 +languag 0 +peopl 0 +truck 0 +cargo 0 +havenos 0 +feet 0 +smell 0 +richard 0 +leder 0 +three 0 +possibl 0 +part 0 +least 0 +must 0 +beoffer 0 +customari 0 +begina 0 +seri 0 +great 0 +deal 0 +moder 0 +amountof 0 +merest 0 +suggest 0 +amount 0 +ofaffect 0 +increas 0 +reduc 0 +proportion 0 +longer 0 +call 0 +circumst 0 +omit 0 +miss 0 +manner 0 +guid 0 +excruciatingli 0 +correct 0 +behaviour 0 +peke 0 +friend 0 +physic 0 +depart 0 +alumni 0 +associ 0 +atmadison 0 +littl 0 +grei 0 +cell 0 +pictur 0 +game 0 +late 0 +david 0 +letterman 0 +studio 0 +wish 0 +postcard 0 +someon 0 +review 0 +favorit 0 +hockei 0 +player 0 +steve 0 +francai 0 +dictionnairefrancai 0 +anglai 0 +dictionnair 0 +softwar 0 +relatif 0 +lafrancophoni 0 +test 0 +degrammair 0 +francais 0 +french 0 +lesson 0 +weather 0 +forecast 0 +madison 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..ea672d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,82 @@ +comput 1 +depart 0 +scienc 0 +univers 0 +manag 0 +home 0 +page 0 +student 0 +peopl 0 +republ 0 +jose 0 +technolog 0 +system 0 +platform 0 +includ 0 +us 0 +softwar 0 +corpor 0 +memori 0 +welcom 0 +zhang 0 +first 0 +year 0 +graduat 0 +hometown 0 +shanghai 0 +china 0 +educ 0 +wisconsin 0 +madison 0 +state 0 +california 0 +tsinghua 0 +beij 0 +chinaemail 0 +weiz 0 +wisc 0 +eduwork 0 +experiencecontractor 0 +develop 0 +variou 0 +inform 0 +differ 0 +windowsnt 0 +solari 0 +tuxedo 0 +pathwai 0 +design 0 +tandem 0 +engin 0 +sherpa 0 +oper 0 +nasa 0 +am 0 +research 0 +center 0 +hobbiesma 0 +jiangbridg 0 +card 0 +game 0 +tabl 0 +tenni 0 +pingpong 0 +joggingth 0 +ultim 0 +challengesolv 0 +mine 0 +sweeper 0 +expert 0 +level 0 +puzzl 0 +within 0 +second 0 +without 0 +cheat 0 +quot 0 +dayth 0 +best 0 +ackowledgementthi 0 +written 0 +framework 0 +provid 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..37d5b2e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,62 @@ +home 1 +page 1 +kent 0 +wenger 0 +madison 0 +scienc 0 +wisconsin 0 +project 0 +work 0 +data 0 +welcom 0 +note 0 +definit 0 +still 0 +construct 0 +preparedfor 0 +pothol 0 +need 0 +pictur 0 +scan 0 +wengerassoci 0 +researchercomput 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +telephon 0 +email 0 +wisc 0 +edufing 0 +workth 0 +main 0 +arecod 0 +cluster 0 +provid 0 +anddevis 0 +explor 0 +andvisu 0 +come 0 +good 0 +acronym 0 +importantpart 0 +wouldn 0 +agre 0 +visualizationproduc 0 +devis 0 +softwar 0 +peopl 0 +yanni 0 +ioannidi 0 +miron 0 +livnyraghu 0 +ramakrishnanmor 0 +inform 0 +univers 0 +dbm 0 +research 0 +groupuw 0 +comput 0 +pagewiscinfo 0 +personallinksimageslast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..38cd045a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,117 @@ +wisconsin 1 +project 0 +comput 0 +tempest 0 +wind 0 +tunnel 0 +share 0 +interfac 0 +hardwar 0 +memori 0 +slide 0 +page 0 +like 0 +level 0 +compil 0 +softwar 0 +implement 0 +paradyn 0 +architectur 0 +home 0 +projectmost 0 +futur 0 +massiv 0 +parallel 0 +built 0 +fromworkst 0 +node 0 +program 0 +high 0 +parallellanguag 0 +support 0 +address 0 +space 0 +whichprocess 0 +uniformli 0 +refer 0 +data 0 +seek 0 +develop 0 +consensu 0 +aboutth 0 +middl 0 +languag 0 +abovesystem 0 +first 0 +propos 0 +wascoop 0 +evolutionari 0 +extens 0 +toconvent 0 +recent 0 +havebeen 0 +work 0 +revolutionari 0 +call 0 +provid 0 +mechan 0 +allow 0 +programm 0 +andprogram 0 +librari 0 +messag 0 +pass 0 +transparentshar 0 +hybrid 0 +combin 0 +developingimplement 0 +think 0 +machin 0 +cluster 0 +ofworkst 0 +wisconsincow 0 +hypothet 0 +platform 0 +approach 0 +cowus 0 +snoop 0 +logic 0 +fpga 0 +sram 0 +collaboratingwith 0 +adapt 0 +perform 0 +tool 0 +overviewand 0 +annot 0 +bibliographi 0 +overview 0 +talk 0 +novemb 0 +pageor 0 +four 0 +complet 0 +technic 0 +paper 0 +contributor 0 +fund 0 +sourc 0 +origin 0 +name 0 +week 0 +articl 0 +relat 0 +group 0 +scienc 0 +departmentat 0 +univers 0 +world 0 +wide 0 +inform 0 +last 0 +updat 0 +juli 0 +mark 0 +hill 0 +markhil 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..7e614f20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,85 @@ +first 1 +televis 1 +felix 0 +star 0 +trick 0 +make 0 +finger 0 +xuelin 0 +home 0 +page 0 +charact 0 +creat 0 +otto 0 +messmer 0 +base 0 +anim 0 +human 0 +person 0 +featur 0 +save 0 +whichwa 0 +shown 0 +famou 0 +rival 0 +chaplin 0 +keaton 0 +princ 0 +wale 0 +pick 0 +polo 0 +team 0 +mascot 0 +pictur 0 +accompani 0 +charl 0 +lindbergh 0 +across 0 +theatlant 0 +statu 0 +imag 0 +successfulli 0 +transmit 0 +develop 0 +seri 0 +somehow 0 +obtain 0 +magic 0 +didn 0 +oneev 0 +seem 0 +agre 0 +whether 0 +teeth 0 +whisker 0 +like 0 +spend 0 +time 0 +film 0 +program 0 +appear 0 +newspap 0 +comic 0 +strip 0 +advertis 0 +hundr 0 +product 0 +thing 0 +remov 0 +tail 0 +ear 0 +put 0 +back 0 +wish 0 +could 0 +give 0 +account 0 +hairbal 0 +keyboard 0 +keeper 0 +instead 0 +sui 0 +vritabl 0 +chat 0 +pass 0 +partout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..74069b2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,329 @@ +ioannidi 1 +proc 0 +system 0 +queri 0 +intern 0 +databas 0 +visual 0 +optim 0 +septemb 0 +confer 0 +complex 0 +histogram 0 +schema 0 +livni 0 +issu 0 +data 0 +ramakrishnan 0 +inform 0 +problem 0 +estim 0 +comput 0 +integr 0 +transact 0 +haber 0 +sigmod 0 +poosala 0 +vldbconfer 0 +manag 0 +scientif 0 +user 0 +interfac 0 +primarili 0 +altern 0 +parallel 0 +investig 0 +simul 0 +special 0 +schedul 0 +error 0 +size 0 +studi 0 +identifi 0 +tool 0 +decemb 0 +tod 0 +miller 0 +translat 0 +result 0 +august 0 +yanni 0 +heterogen 0 +research 0 +focus 0 +support 0 +futur 0 +sever 0 +number 0 +algorithm 0 +well 0 +valu 0 +queryoptim 0 +current 0 +solut 0 +also 0 +aris 0 +multimedia 0 +environ 0 +propag 0 +distribut 0 +disciplin 0 +gener 0 +need 0 +develop 0 +desktop 0 +scientist 0 +concentr 0 +metaphor 0 +object 0 +dynam 0 +base 0 +survei 0 +symposium 0 +march 0 +garofalaki 0 +ieee 0 +engin 0 +ofheterogen 0 +bridg 0 +theori 0 +practic 0 +join 0 +ondatabas 0 +bombai 0 +india 0 +balanc 0 +orient 0 +montreal 0 +canada 0 +workshop 0 +opossum 0 +tsatalo 0 +dublin 0 +ireland 0 +ioannidisyanni 0 +wisc 0 +eduresearch 0 +interestsdatabas 0 +andinform 0 +area 0 +scientificdata 0 +applic 0 +pose 0 +challeng 0 +toqueri 0 +ask 0 +significantli 0 +higher 0 +thanin 0 +tradit 0 +evalu 0 +much 0 +highera 0 +especi 0 +attempt 0 +tooptim 0 +time 0 +paramet 0 +parametr 0 +thu 0 +access 0 +plan 0 +process 0 +querywil 0 +extrem 0 +larg 0 +us 0 +algorithmsfor 0 +find 0 +optimum 0 +among 0 +inadequ 0 +random 0 +algorithmsa 0 +viabl 0 +interest 0 +anneal 0 +genet 0 +take 0 +advantag 0 +propertiesof 0 +look 0 +especiallythos 0 +cost 0 +alsopart 0 +try 0 +appropriateinform 0 +must 0 +maintain 0 +limit 0 +thepropag 0 +properti 0 +ofoptim 0 +approxim 0 +inrel 0 +attribut 0 +mode 0 +expect 0 +part 0 +manyexperi 0 +variou 0 +mani 0 +aspectsthat 0 +technolog 0 +readi 0 +provid 0 +involv 0 +experi 0 +managementenviron 0 +help 0 +throughout 0 +life 0 +cycl 0 +theirexperiment 0 +primari 0 +compon 0 +major 0 +work 0 +address 0 +andsemant 0 +former 0 +right 0 +arefor 0 +repres 0 +scientistsso 0 +natur 0 +power 0 +latter 0 +facilitatetransl 0 +differ 0 +format 0 +although 0 +experimentalscientif 0 +effort 0 +guid 0 +specificproject 0 +associ 0 +particular 0 +basedperform 0 +model 0 +plantgrowth 0 +spectroscopi 0 +sequenc 0 +microscop 0 +imag 0 +recent 0 +publicationsi 0 +issueon 0 +anniversari 0 +multimediasystem 0 +contain 0 +conjunct 0 +beyondrel 0 +set 0 +foundat 0 +forschema 0 +displai 0 +journal 0 +intellig 0 +juli 0 +tsangari 0 +design 0 +implement 0 +performanceevalu 0 +bermuda 0 +knowledg 0 +tkde 0 +februari 0 +januari 0 +christodoulaki 0 +limitingworst 0 +case 0 +winger 0 +transit 0 +closur 0 +algorithmsbas 0 +graph 0 +travers 0 +record 0 +divers 0 +databaseestim 0 +gupta 0 +ponnekanti 0 +experimentmanag 0 +vldb 0 +itsappl 0 +load 0 +anjur 0 +frog 0 +turtl 0 +bridgesbetween 0 +file 0 +conferenceon 0 +statist 0 +stockholm 0 +sweden 0 +june 0 +multi 0 +dimension 0 +resourc 0 +forparallel 0 +haa 0 +shekita 0 +improv 0 +forselect 0 +rang 0 +predic 0 +internationalacm 0 +layoutat 0 +multipl 0 +granular 0 +advancedvisu 0 +gubbio 0 +itali 0 +desk 0 +managementthrough 0 +customiz 0 +zurich 0 +switzerland 0 +practicalityfor 0 +sigmodconfer 0 +jose 0 +solomon 0 +gmap 0 +versatil 0 +forphys 0 +independ 0 +santiago 0 +chile 0 +unifi 0 +framework 0 +index 0 +databasesystem 0 +dexa 0 +athen 0 +greec 0 +lashkari 0 +incomplet 0 +path 0 +express 0 +theirdisambigu 0 +minneapoli 0 +flexibl 0 +schemavisu 0 +edit 0 +boston 0 +april 0 +edbt 0 +cambridg 0 +england 0 +univers 0 +serial 0 +internationalvldb 0 +capacityin 0 +wiener 0 +moos 0 +withdata 0 +program 0 +languag 0 +york 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..1f0ddd68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin 1 +zhongbin 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..ad074152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,76 @@ +home 1 +good 0 +person 0 +cours 0 +time 0 +madison 0 +last 0 +yinng 0 +pageindexofyinongwei 0 +spagehi 0 +welcom 0 +homepag 0 +pleas 0 +look 0 +info 0 +especi 0 +employ 0 +give 0 +alsolink 0 +classmat 0 +take 0 +telephon 0 +work 0 +address 0 +offic 0 +comp 0 +stat 0 +bldg 0 +univ 0 +inforesumehobbiestravel 0 +usathi 0 +collect 0 +pictur 0 +took 0 +travel 0 +articl 0 +wrote 0 +trip 0 +chicago 0 +seattl 0 +pointersr 0 +computingmacin 0 +learningpattern 0 +recognitioncomputatin 0 +geometrydatabasevisionacadem 0 +diarythi 0 +diari 0 +everi 0 +month 0 +sometim 0 +amaz 0 +mani 0 +littl 0 +read 0 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 0 +linksmi 0 +beida 0 +classmatespek 0 +univers 0 +alumni 0 +page 0 +oversea 0 +chines 0 +organ 0 +ciumi 0 +bookmarkcom 0 +press 0 +client 0 +support 0 +send 0 +comment 0 +visitor 0 +number 0 +access 0 +modifi 0 +yinong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..0ffa589c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,44 @@ +home 1 +madison 1 +wisc 1 +convuls 1 +sera 1 +food 1 +poor 1 +call 1 +matt 0 +pageuntil 0 +around 0 +updat 0 +basic 0 +inform 0 +offic 0 +matthew 0 +zeidenbergcent 0 +wisconsin 0 +strategi 0 +observatori 0 +drive 0 +room 0 +voic 0 +gilson 0 +email 0 +zeiden 0 +eduzeidenb 0 +eduwhen 0 +california 0 +parent 0 +hous 0 +coho 0 +huntington 0 +beach 0 +beauti 0 +breton 0 +nadja 0 +beaut 0 +give 0 +saint 0 +whyth 0 +communist 0 +helder 0 +camara 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..f8b9f211 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,161 @@ +zhang 1 +tian 0 +data 0 +larg 0 +research 0 +databas 0 +raghu 0 +ramakrishnan 0 +miron 0 +livni 0 +mine 0 +cluster 0 +proc 0 +robot 0 +analysi 0 +dataset 0 +effici 0 +densiti 0 +birch 0 +method 0 +joint 0 +artifici 0 +intellig 0 +comput 0 +algorithm 0 +applic 0 +journal 0 +june 0 +ling 0 +home 0 +prof 0 +scienc 0 +wisconsin 0 +madison 0 +telephon 0 +us 0 +pattern 0 +interest 0 +design 0 +multi 0 +estim 0 +recent 0 +system 0 +sigmod 0 +canada 0 +technic 0 +motion 0 +plan 0 +topolog 0 +confer 0 +jianwei 0 +educ 0 +symposium 0 +beij 0 +page 0 +gener 0 +inform 0 +student 0 +assistantadvisor 0 +major 0 +concentr 0 +compilerminor 0 +financi 0 +invest 0 +bankingoffic 0 +room 0 +dept 0 +univ 0 +mail 0 +wisc 0 +eduoffic 0 +depart 0 +intereststher 0 +grow 0 +need 0 +exploratori 0 +discov 0 +territori 0 +develop 0 +purpos 0 +ortool 0 +integr 0 +techniqu 0 +statist 0 +thesi 0 +topic 0 +densityanalysi 0 +given 0 +dimension 0 +limit 0 +amount 0 +resourc 0 +run 0 +time 0 +memori 0 +implement 0 +accur 0 +identifi 0 +spars 0 +crowd 0 +region 0 +function 0 +overal 0 +distribut 0 +import 0 +practic 0 +branch 0 +appli 0 +mani 0 +domain 0 +dataclassif 0 +imag 0 +compress 0 +recognit 0 +project 0 +select 0 +public 0 +submit 0 +knowledg 0 +discoveri 0 +conf 0 +manag 0 +interact 0 +classif 0 +workshop 0 +issu 0 +knowledgediscoveri 0 +cooper 0 +fast 0 +probabl 0 +kernel 0 +report 0 +juli 0 +dimensionreduct 0 +ijcai 0 +findpath 0 +manipul 0 +finit 0 +divis 0 +configur 0 +space 0 +manufactur 0 +trend 0 +andmanufactur 0 +dimens 0 +reduct 0 +technolog 0 +find 0 +collis 0 +free 0 +path 0 +mobil 0 +young 0 +profession 0 +relev 0 +link 0 +document 0 +organ 0 +china 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..e808b0a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,72 @@ +research 1 +data 1 +site 1 +server 1 +yihong 0 +zhao 0 +madison 0 +relat 0 +dbm 0 +mine 0 +financi 0 +pathfind 0 +daili 0 +new 0 +home 0 +page 0 +wisc 0 +assist 0 +depart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +streetmadison 0 +advis 0 +prof 0 +jeff 0 +naughton 0 +interest 0 +parallel 0 +object 0 +line 0 +analyt 0 +process 0 +olap 0 +benchmark 0 +educationb 0 +univers 0 +north 0 +carolina 0 +chapel 0 +hillm 0 +fall 0 +wiscosin 0 +group 0 +sigmod 0 +maryland 0 +datamin 0 +microstrategi 0 +rolap 0 +arbor 0 +molap 0 +stock 0 +lombard 0 +graph 0 +kiwi 0 +club 0 +todai 0 +monei 0 +chines 0 +taiwan 0 +search 0 +engin 0 +lyco 0 +excit 0 +yahoo 0 +surf 0 +ters 0 +detail 0 +comment 0 +pgmo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..3234e976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,19 @@ +home 1 +address 1 +madison 0 +phone 0 +offic 0 +wisc 0 +page 0 +still 0 +construct 0 +wang 0 +homepag 0 +offer 0 +inform 0 +johnson 0 +dayton 0 +street 0 +email 0 +zhewang 0 +student 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..675728c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,102 @@ +comput 1 +perform 0 +languag 0 +area 0 +parallel 0 +system 0 +studi 0 +distribut 0 +zhichen 0 +scienc 0 +research 0 +confer 0 +interest 0 +program 0 +recent 0 +techniqu 0 +environ 0 +high 0 +softwar 0 +univers 0 +fudan 0 +compil 0 +version 0 +home 0 +page 0 +depart 0 +dayton 0 +madison 0 +offic 0 +phone 0 +assist 0 +advisor 0 +professor 0 +jame 0 +larusprofessor 0 +barton 0 +millerawardbest 0 +paper 0 +award 0 +intern 0 +supercomput 0 +press 0 +juli 0 +issu 0 +anddistribut 0 +detect 0 +eliminateperform 0 +bottleneck 0 +share 0 +memori 0 +combin 0 +paradyn 0 +toolwith 0 +blizzard 0 +wisconsinwind 0 +tunnel 0 +think 0 +machin 0 +andth 0 +cluster 0 +workstat 0 +public 0 +field 0 +interestprogram 0 +tool 0 +network 0 +oper 0 +architectur 0 +evalu 0 +benchmark 0 +place 0 +work 0 +laboratori 0 +texa 0 +antonio 0 +publish 0 +ofparallel 0 +predict 0 +model 0 +simul 0 +departmentat 0 +particip 0 +sever 0 +nation 0 +project 0 +china 0 +develop 0 +levelprogram 0 +object 0 +orient 0 +technolog 0 +andimcrement 0 +click 0 +postscript 0 +html 0 +link 0 +asplo 0 +programjourn 0 +researchchines 0 +novel 0 +friend 0 +java 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..064bcb88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,25 @@ +zhang 1 +univers 1 +wisconsin 1 +madison 1 +home 0 +page 0 +hello 0 +name 0 +pictur 0 +taken 0 +invit 0 +supper 0 +theth 0 +restaur 0 +tsinghua 0 +chen 0 +weihai 0 +wang 0 +tong 0 +depart 0 +comput 0 +scienc 0 +west 0 +dayton 0 +street 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..1a407af6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,13 @@ +inform 1 +krzysztof 0 +zmudzinskikrzysztof 0 +zmudzinskispin 0 +student 0 +inc 0 +pictur 0 +poland 0 +pole 0 +thank 0 +stop 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..5fab9c5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,131 @@ +test 1 +design 0 +circuit 0 +built 0 +self 0 +perform 0 +engin 0 +comput 0 +research 0 +testabl 0 +vlsi 0 +gener 0 +area 0 +system 0 +investig 0 +saluja 0 +kewal 0 +colleg 0 +mail 0 +engr 0 +wisc 0 +data 0 +compress 0 +fault 0 +digit 0 +make 0 +littl 0 +oper 0 +laboratori 0 +engineeringunivers 0 +wisconsin 0 +madison 0 +salujaprofessor 0 +hall 0 +drivemadison 0 +eduportrait 0 +jpgdepartmentselectr 0 +engineeringcomput 0 +scienc 0 +educ 0 +univers 0 +iowa 0 +interestsdesign 0 +architectur 0 +integr 0 +toler 0 +interest 0 +testableand 0 +reliabl 0 +carri 0 +thisarea 0 +extens 0 +analysi 0 +tool 0 +theresearch 0 +involv 0 +model 0 +testgener 0 +modif 0 +enhanc 0 +inself 0 +fabric 0 +applic 0 +techniqu 0 +andfault 0 +simul 0 +process 0 +effici 0 +combin 0 +andsequenti 0 +compact 0 +methodsapplic 0 +testenviron 0 +concentr 0 +regularstructur 0 +programm 0 +logic 0 +arrai 0 +ram 0 +areinvestig 0 +algorithm 0 +implement 0 +inhardwar 0 +penalti 0 +anoth 0 +projectw 0 +wai 0 +hardwar 0 +asystem 0 +normal 0 +goal 0 +thatth 0 +continu 0 +noimpact 0 +much 0 +work 0 +us 0 +facil 0 +digitalsystem 0 +hous 0 +number 0 +station 0 +withcolor 0 +monitor 0 +termin 0 +program 0 +dept 0 +center 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +fridai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +support 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..ea65d723 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,135 @@ +system 1 +control 0 +duffi 0 +manufactur 0 +comput 0 +wisconsin 0 +autom 0 +engin 0 +research 0 +center 0 +engr 0 +univers 0 +develop 0 +perform 0 +neil 0 +colleg 0 +mail 0 +wisc 0 +integr 0 +space 0 +sensor 0 +product 0 +machin 0 +robot 0 +distribut 0 +experiment 0 +feedback 0 +human 0 +telerobot 0 +work 0 +engineeringunivers 0 +madison 0 +professor 0 +build 0 +drivemadison 0 +eduportrait 0 +departmentsmechan 0 +engineeringeducationb 0 +madisonm 0 +madisonphd 0 +madisonresearch 0 +interestsrobot 0 +precis 0 +micromechanismscent 0 +consortiamanufactur 0 +programwisconsin 0 +roboticsprofessor 0 +involv 0 +actuat 0 +data 0 +base 0 +advanc 0 +self 0 +guid 0 +inspect 0 +weld 0 +high 0 +materi 0 +handl 0 +finish 0 +mold 0 +rework 0 +studi 0 +highli 0 +hierarch 0 +architectur 0 +hope 0 +reduc 0 +cost 0 +complex 0 +larg 0 +scale 0 +increas 0 +flexibl 0 +fault 0 +toler 0 +construct 0 +sever 0 +incorpor 0 +real 0 +time 0 +fulli 0 +schedul 0 +optim 0 +theori 0 +explain 0 +properti 0 +associ 0 +director 0 +nasa 0 +fund 0 +emphas 0 +agricultur 0 +tactil 0 +oper 0 +method 0 +evalu 0 +well 0 +factor 0 +sensori 0 +fatigu 0 +test 0 +carri 0 +close 0 +aerospac 0 +industri 0 +teach 0 +cours 0 +automat 0 +author 0 +process 0 +dept 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +tuesdai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +support 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..b9788975 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,203 @@ +model 1 +mechan 0 +engin 0 +design 0 +comput 0 +physic 0 +geometr 0 +system 0 +represent 0 +behavior 0 +research 0 +part 0 +shapiro 0 +wisc 0 +aid 0 +manufactur 0 +analysi 0 +specif 0 +vadim 0 +colleg 0 +professor 0 +univers 0 +mail 0 +engr 0 +cornel 0 +geometri 0 +solid 0 +simul 0 +award 0 +chain 0 +april 0 +function 0 +center 0 +artifact 0 +algorithm 0 +us 0 +current 0 +formal 0 +investig 0 +support 0 +form 0 +process 0 +combinatori 0 +competit 0 +systemat 0 +develop 0 +engineeringunivers 0 +wisconsin 0 +madison 0 +assist 0 +avenuemadison 0 +vshapiro 0 +eduportrait 0 +jpgurl 0 +http 0 +departmentscomput 0 +sciencemechan 0 +engineeringeducationba 0 +york 0 +universitym 0 +california 0 +angelesm 0 +universityphd 0 +univeristyresearch 0 +interestscomput 0 +appli 0 +product 0 +automationcent 0 +consortiamathemat 0 +graduat 0 +programmanufactur 0 +programspati 0 +autom 0 +laboratoryselect 0 +honorsn 0 +scienc 0 +foundat 0 +career 0 +gener 0 +motor 0 +fellow 0 +select 0 +public 0 +mainten 0 +space 0 +decomposit 0 +intern 0 +journal 0 +applic 0 +palmer 0 +real 0 +rigid 0 +separ 0 +boundari 0 +convers 0 +transact 0 +graphic 0 +januari 0 +vossler 0 +interest 0 +relationship 0 +betweengeometri 0 +phenomena 0 +bemodel 0 +repres 0 +analyz 0 +manipul 0 +manufacturedbas 0 +ongo 0 +project 0 +includ 0 +abil 0 +creat 0 +convert 0 +maintain 0 +consist 0 +ofdistinct 0 +major 0 +technologicalbarri 0 +undermin 0 +reliabl 0 +commercialgeometr 0 +effort 0 +focu 0 +eliminatingambigu 0 +commun 0 +ofparametr 0 +famili 0 +novel 0 +methodsand 0 +techniqu 0 +todai 0 +fabric 0 +cannot 0 +bedescrib 0 +term 0 +discret 0 +simpl 0 +interactingprimit 0 +appar 0 +lack 0 +structur 0 +amajor 0 +roadblock 0 +collabor 0 +industri 0 +present 0 +deal 0 +withtheoret 0 +practic 0 +aspect 0 +designand 0 +seek 0 +establish 0 +basi 0 +make 0 +andmanufactur 0 +smoothintegr 0 +activ 0 +contain 0 +inform 0 +need 0 +captur 0 +thedesir 0 +tomanufactur 0 +recent 0 +studi 0 +algebra 0 +topolog 0 +call 0 +suggest 0 +possibl 0 +tounifi 0 +thu 0 +facilit 0 +ofnew 0 +tool 0 +theseand 0 +languagesand 0 +physicalobject 0 +dept 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +thursdai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..99794aa4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,107 @@ +prover 1 +theorem 0 +prove 0 +incomplet 0 +list 0 +mathemat 0 +tech 0 +autom 0 +groupth 0 +comput 0 +scienc 0 +method 0 +first 0 +order 0 +logic 0 +report 0 +student 0 +woodi 0 +bledso 0 +hine 0 +proof 0 +hein 0 +borel 0 +groupautom 0 +group 0 +part 0 +depart 0 +univers 0 +texa 0 +ataustin 0 +produc 0 +system 0 +intend 0 +higher 0 +intent 0 +appli 0 +systemsand 0 +problem 0 +primarili 0 +also 0 +computersci 0 +technolog 0 +herei 0 +index 0 +electron 0 +avail 0 +site 0 +seri 0 +continu 0 +current 0 +techreport 0 +ad 0 +reportseri 0 +present 0 +grouplarri 0 +hinesmarti 0 +mayberrybenjamin 0 +shultsalumniprevi 0 +previou 0 +robert 0 +boyer 0 +other 0 +relat 0 +late 0 +faculti 0 +profil 0 +robertboyerj 0 +strother 0 +moorethi 0 +past 0 +visitor 0 +collaboratorswhat 0 +done 0 +implyth 0 +natur 0 +deduct 0 +proverstrivelarri 0 +inequ 0 +struvelarri 0 +theori 0 +chou 0 +geometri 0 +proverand 0 +variou 0 +improv 0 +theretoinclud 0 +mcphee 0 +feng 0 +theoryimplement 0 +descript 0 +theoremprecondit 0 +proverbledso 0 +analog 0 +theoremnqthmboy 0 +andmoor 0 +develop 0 +clinc 0 +iprshult 0 +knowledg 0 +us 0 +relatedlinksdo 0 +feedback 0 +want 0 +inform 0 +contact 0 +benjamin 0 +shult 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..a34d0ea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,5 @@ +document 1 +moveddocu 0 +movedthi 0 +perman 0 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..a34d0ea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_aug/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,5 @@ +document 1 +moveddocu 0 +movedthi 0 +perman 0 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..f5212ab7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +home 1 +pagec 1 +system 1 +program 1 +oper 1 +systemsc 1 +practicum 1 +systemkenneth 1 +birmanc 1 +new 1 +groupcours 1 +syllabuslectur 1 +note 1 +unix 1 +filesystem 1 +structur 1 +link 1 +static 1 +dynam 1 +assign 1 +homework 1 +solut 1 +prelim 1 +taslili 1 +upson 1 +hall 1 +phone 1 +mail 1 +lili 1 +cornel 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +cheng 1 +huang 1 +ychuang 1 +tuesdai 1 +thursdai 1 +mihai 1 +budiu 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..63f731f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,45 @@ +home 1 +pagec 1 +system 1 +program 1 +oper 1 +systemsc 1 +practicum 1 +systemsselect 1 +displai 1 +symbol 1 +correspond 1 +postcriptdocu 1 +hand 1 +phase 1 +hocacours 1 +inform 1 +cours 1 +schedul 1 +last 1 +chang 1 +group 1 +handout 1 +format 1 +postcript 1 +penn 1 +broccoli 1 +question 1 +answer 1 +chip 1 +comput 1 +consol 1 +window 1 +exampl 1 +us 1 +tutori 1 +principl 1 +configur 1 +fileth 1 +hoca 1 +systemth 1 +specif 1 +page 1 +maintain 1 +lorenzo 1 +alvisi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..c4a1022e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,240 @@ +home 1 +pagec 1 +databas 1 +system 1 +inform 1 +retrievaldepart 1 +computersci 1 +cornel 1 +universityspr 1 +gradeshav 1 +nice 1 +summer 1 +introductionthi 1 +three 1 +credit 1 +cours 1 +cover 1 +fundament 1 +databasesystem 1 +retriev 1 +roughli 1 +twothird 1 +third 1 +topic 1 +systemsinclud 1 +follow 1 +data 1 +model 1 +entiti 1 +relationship 1 +relat 1 +physic 1 +organ 1 +index 1 +hash 1 +design 1 +queri 1 +languag 1 +queryoptim 1 +crash 1 +recoveri 1 +concurr 1 +control 1 +transactionprocess 1 +part 1 +deal 1 +find 1 +usefulinform 1 +larg 1 +textual 1 +willcov 1 +invert 1 +file 1 +vector 1 +space 1 +smartsystem 1 +similar 1 +weight 1 +rank 1 +relevancefeedback 1 +phrase 1 +gener 1 +term 1 +thesaurusconstruct 1 +evalu 1 +time 1 +permit 1 +automatictext 1 +structur 1 +summar 1 +link 1 +materi 1 +class 1 +note 1 +homework 1 +solut 1 +placetuesdai 1 +thursdai 1 +minut 1 +thurston 1 +prerequisitesc 1 +recommend 1 +booksdatabas 1 +concept 1 +korth 1 +silberschatz 1 +mcgrawhil 1 +second 1 +edit 1 +requir 1 +elmasri 1 +andnavath 1 +benjamin 1 +cum 1 +reserv 1 +principl 1 +knowledg 1 +base 1 +byullman 1 +comput 1 +scienc 1 +press 1 +photocopiedmateri 1 +salton 1 +book 1 +research 1 +paper 1 +instructor 1 +amitsingh 1 +singhal 1 +upson 1 +offic 1 +hour 1 +tuesdai 1 +teach 1 +assist 1 +sophia 1 +georgiakaki 1 +wednesdai 1 +appoint 1 +send 1 +mail 1 +marco 1 +aguilera 1 +forc 1 +amith 1 +yamasani 1 +officehour 1 +gradingexam 1 +midterm 1 +worth 1 +ofyour 1 +final 1 +grade 1 +exam 1 +yourfin 1 +five 1 +semest 1 +policiesy 1 +work 1 +group 1 +peopl 1 +clearli 1 +indic 1 +name 1 +thegroup 1 +member 1 +entir 1 +receiv 1 +samegrad 1 +avail 1 +page 1 +tuesdayand 1 +week 1 +along 1 +guid 1 +ofcours 1 +date 1 +throughth 1 +late 1 +accept 1 +illeg 1 +hard 1 +even 1 +though 1 +iti 1 +encourag 1 +type 1 +latexif 1 +possibl 1 +alreadi 1 +know 1 +goodopportun 1 +learn 1 +latex 1 +submissionpleas 1 +attach 1 +sort 1 +alphabet 1 +last 1 +also 1 +write 1 +exampl 1 +bill 1 +clinton 1 +dole 1 +ross 1 +perot 1 +homeworksgrad 1 +return 1 +sortedalphabet 1 +first 1 +thecov 1 +list 1 +pagefollow 1 +want 1 +pleas 1 +sendmail 1 +regrad 1 +policyal 1 +request 1 +submit 1 +inwrit 1 +within 1 +back 1 +schedulethi 1 +tent 1 +schedul 1 +chapter 1 +referto 1 +januari 1 +introduct 1 +read 1 +algebra 1 +availablethursdai 1 +februari 1 +tupl 1 +calculu 1 +domain 1 +integr 1 +constraint 1 +duetuesdai 1 +optim 1 +prelim 1 +march 1 +spring 1 +break 1 +transact 1 +process 1 +april 1 +retrievalthursdai 1 +modelhomework 1 +weightingthursdai 1 +indexinghomework 1 +evaluationtuesdai 1 +relev 1 +feedbackthursdai 1 +document 1 +clusteringhomework 1 +advanc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..3c9e84a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,64 @@ +home 1 +page 1 +look 1 +admin 1 +handout 1 +inform 1 +incl 1 +offic 1 +hour 1 +lectur 1 +note 1 +assign 1 +recit 1 +ethic 1 +profession 1 +social 1 +respons 1 +mayb 1 +electron 1 +submiss 1 +procedur 1 +group 1 +perform 1 +evalu 1 +resourc 1 +quot 1 +stuff 1 +collect 1 +joke 1 +start 1 +submit 1 +sumedh 1 +break 1 +new 1 +misc 1 +convert 1 +text 1 +postscript 1 +peopl 1 +ask 1 +simpl 1 +unix 1 +program 1 +enscript 1 +suggest 1 +wai 1 +pfile 1 +file 1 +first 1 +print 1 +good 1 +sourc 1 +code 1 +second 1 +give 1 +nice 1 +header 1 +gener 1 +leav 1 +send 1 +printer 1 +come 1 +last 1 +modif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..36d9c2c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,59 @@ +practicum 1 +distribut 1 +systemsor 1 +hand 1 +dirti 1 +real 1 +work 1 +cours 1 +practic 1 +aspect 1 +system 1 +studi 1 +design 1 +implement 1 +signific 1 +student 1 +also 1 +take 1 +offersa 1 +varieti 1 +project 1 +rang 1 +simpl 1 +internetworkingto 1 +complex 1 +teamsof 1 +person 1 +choos 1 +interest 1 +theywil 1 +trough 1 +semest 1 +credit 1 +hour 1 +earn 1 +depend 1 +size 1 +complexityof 1 +develop 1 +us 1 +offcial 1 +inform 1 +interact 1 +pageslink 1 +page 1 +find 1 +basic 1 +instruct 1 +descript 1 +plan 1 +progress 1 +report 1 +final 1 +present 1 +tabl 1 +contentspag 1 +comment 1 +werner 1 +vogel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..d94e4e25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,94 @@ +home 1 +page 1 +intro 1 +comput 1 +architectur 1 +fall 1 +professor 1 +saluja 1 +note 1 +contain 1 +link 1 +inform 1 +cours 1 +electr 1 +andcomput 1 +engin 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +materi 1 +intend 1 +sole 1 +studentsenrol 1 +semest 1 +prof 1 +kewal 1 +sorin 1 +generalinform 1 +outlin 1 +conduct 1 +midtermsyllabu 1 +avail 1 +midtermi 1 +project 1 +specif 1 +homework 1 +assign 1 +problem 1 +solut 1 +part 1 +valid 1 +mentor 1 +help 1 +theproject 1 +need 1 +graphic 1 +tool 1 +caeworkst 1 +pleas 1 +refer 1 +duedat 1 +follow 1 +literatur 1 +assist 1 +anyon 1 +whomai 1 +manual 1 +onlin 1 +throughbold_brows 1 +check 1 +gettingstart 1 +design 1 +architect 1 +train 1 +workbook 1 +get 1 +start 1 +quicksim 1 +trainingworkbook 1 +exersis 1 +addition 1 +thesedocu 1 +document 1 +click 1 +send 1 +email 1 +surf 1 +homepag 1 +announc 1 +new 1 +group 1 +wiscinfo 1 +gopher 1 +site 1 +uwengin 1 +server 1 +file 1 +last 1 +modifi 1 +septemb 1 +pmcst 1 +question 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..9e1cdcca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,43 @@ +homepag 1 +fall 1 +run 1 +netscap 1 +click 1 +herelink 1 +individu 1 +page 1 +frame 1 +cours 1 +info 1 +gener 1 +stuff 1 +section 1 +offic 1 +hour 1 +motw 1 +homework 1 +download 1 +handout 1 +class 1 +syllabu 1 +exam 1 +requir 1 +grade 1 +criteria 1 +schedul 1 +link 1 +refer 1 +goofi 1 +stuffnot 1 +preced 1 +contain 1 +tabl 1 +browser 1 +abl 1 +handl 1 +pleas 1 +email 1 +cornel 1 +edupag 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..8d54b813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,43 @@ +home 1 +pagec 1 +structur 1 +interpret 1 +comput 1 +program 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +cours 1 +materi 1 +access 1 +requir 1 +user 1 +password 1 +request 1 +attempt 1 +info 1 +dylan 1 +window 1 +note 1 +browser 1 +includ 1 +netscap 1 +correctli 1 +check 1 +chang 1 +java 1 +class 1 +file 1 +thu 1 +noodll 1 +inconsist 1 +behavior 1 +work 1 +parter 1 +link 1 +directori 1 +partnerjoin 1 +util 1 +announc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..0f8c330e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,359 @@ +comput 1 +scienc 1 +fall 1 +cours 1 +informationaugust 1 +introductori 1 +cover 1 +broad 1 +rang 1 +computersci 1 +concept 1 +techniqu 1 +includ 1 +data 1 +abstract 1 +recurs 1 +program 1 +correct 1 +gener 1 +function 1 +object 1 +orient 1 +pattern 1 +match 1 +languag 1 +evalu 1 +useth 1 +dylan 1 +dynam 1 +developedat 1 +appl 1 +well 1 +suit 1 +rangeof 1 +topic 1 +courseabout 1 +happen 1 +notationthat 1 +chosen 1 +write 1 +major 1 +goal 1 +ofth 1 +teach 1 +student 1 +think 1 +clearli 1 +programsand 1 +provid 1 +toolbox 1 +modern 1 +programmingtechniqu 1 +applic 1 +take 1 +often 1 +wonder 1 +whether 1 +takec 1 +focus 1 +skill 1 +orientedlanguag 1 +java 1 +wherea 1 +exposur 1 +problem 1 +us 1 +number 1 +programmingparadigm 1 +imperativeprogram 1 +good 1 +background 1 +goodform 1 +mathemat 1 +physic 1 +probablytak 1 +transfer 1 +either 1 +direct 1 +encourag 1 +first 1 +week 1 +reach 1 +best 1 +staff 1 +post 1 +questionsor 1 +comment 1 +site 1 +http 1 +cornel 1 +info 1 +current 1 +contain 1 +materi 1 +run 1 +conot 1 +serverwhich 1 +allow 1 +question 1 +answersa 1 +annot 1 +handout 1 +set 1 +order 1 +access 1 +need 1 +request 1 +user 1 +idand 1 +password 1 +system 1 +simpli 1 +toth 1 +home 1 +page 1 +follow 1 +instruct 1 +creat 1 +mondai 1 +thisweek 1 +also 1 +send 1 +email 1 +edubut 1 +ask 1 +aboutproblem 1 +huttenloch 1 +professor 1 +upson 1 +tobia 1 +mayr 1 +upsonjam 1 +hamblin 1 +ugrad 1 +tarobert 1 +szewczyk 1 +tajustin 1 +voskuhl 1 +taandra 1 +ferencz 1 +consult 1 +melissa 1 +consultantwhen 1 +meetlectur 1 +tuesdai 1 +thursdai 1 +kimbal 1 +andrecit 1 +wednesdai 1 +recitationsexpand 1 +lectur 1 +opportunityto 1 +hour 1 +help 1 +held 1 +midnight 1 +even 1 +eachproblem 1 +schedul 1 +thu 1 +setsdu 1 +sundai 1 +mondayeven 1 +consultinghour 1 +inth 1 +public 1 +offic 1 +jame 1 +robert 1 +justin 1 +voskuhltba 1 +materialsther 1 +textbook 1 +handoutsand 1 +note 1 +avail 1 +hardcopi 1 +andon 1 +interpret 1 +free 1 +develop 1 +class 1 +implementedin 1 +capabl 1 +browser 1 +chang 1 +almost 1 +weekli 1 +netscap 1 +window 1 +borland 1 +time 1 +compil 1 +standalon 1 +version 1 +downloadonto 1 +want 1 +word 1 +warn 1 +download 1 +ontoyour 1 +machin 1 +make 1 +sure 1 +recentvers 1 +check 1 +requirementsstud 1 +respons 1 +assign 1 +read 1 +recit 1 +therewil 1 +preliminari 1 +exam 1 +final 1 +given 1 +combinationof 1 +written 1 +exercis 1 +gradeswil 1 +base 1 +combin 1 +score 1 +account 1 +approxim 1 +half 1 +thetot 1 +grade 1 +late 1 +accept 1 +willgener 1 +immedi 1 +return 1 +followingclass 1 +period 1 +complet 1 +earli 1 +work 1 +andth 1 +sittingdown 1 +matter 1 +mani 1 +long 1 +sink 1 +beforesit 1 +polici 1 +joint 1 +workmuch 1 +learn 1 +come 1 +programmingproblem 1 +jointli 1 +person 1 +peopl 1 +togeth 1 +howev 1 +youwork 1 +someon 1 +must 1 +submit 1 +singl 1 +jointassign 1 +name 1 +circumstancesmai 1 +hand 1 +done 1 +els 1 +yourown 1 +doubt 1 +credit 1 +yougot 1 +would 1 +amaz 1 +easi 1 +tell 1 +whenpeopl 1 +pleas 1 +lifeunpleas 1 +break 1 +rule 1 +facilitiescit 1 +variou 1 +colleg 1 +campu 1 +macintosh 1 +andpc 1 +facil 1 +on 1 +depart 1 +thiscours 1 +upsonmac 1 +date 1 +datesal 1 +exampl 1 +electron 1 +server 1 +mondaynight 1 +submityour 1 +solut 1 +prelim 1 +outlin 1 +studi 1 +introduct 1 +substitut 1 +model 1 +procedur 1 +process 1 +iter 1 +induct 1 +higher 1 +argument 1 +valu 1 +analysi 1 +algorithm 1 +growth 1 +structur 1 +contract 1 +implement 1 +hierarch 1 +list 1 +tree 1 +quotat 1 +reason 1 +symbol 1 +differenti 1 +extend 1 +oper 1 +type 1 +dispatch 1 +polynomi 1 +arithmet 1 +environ 1 +local 1 +state 1 +variabl 1 +inherit 1 +multimethod 1 +mutabl 1 +stack 1 +queue 1 +heap 1 +heapsort 1 +prioriti 1 +metacircular 1 +variat 1 +express 1 +optim 1 +stream 1 +infinit 1 +nonloc 1 +exit 1 +catch 1 +throw 1 +garbag 1 +collect 1 +illus 1 +memori 1 +random 1 +quicksort 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..10695e43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,88 @@ +home 1 +page 1 +introduct 1 +digit 1 +system 1 +comput 1 +organizationthorsten 1 +eickenfal 1 +kimbal 1 +btopic 1 +includ 1 +represent 1 +inform 1 +machin 1 +assembl 1 +languag 1 +processor 1 +organ 1 +interrupt 1 +memori 1 +hierarchi 1 +combinatori 1 +sequenti 1 +circuit 1 +data 1 +path 1 +control 1 +unit 1 +design 1 +microprogram 1 +helpif 1 +problem 1 +relat 1 +lectur 1 +homework 1 +project 1 +best 1 +help 1 +annot 1 +appropri 1 +point 1 +cours 1 +materi 1 +allow 1 +staff 1 +also 1 +class 1 +mate 1 +otherwis 1 +send 1 +email 1 +cornel 1 +talk 1 +consult 1 +materialsal 1 +separ 1 +server 1 +want 1 +bookmark 1 +us 1 +conot 1 +document 1 +note 1 +section 1 +file 1 +date 1 +pleas 1 +check 1 +account 1 +request 1 +post 1 +saturdai 1 +process 1 +encount 1 +difficulti 1 +read 1 +follow 1 +hidden 1 +instruct 1 +sign 1 +start 1 +mondai 1 +registr 1 +listlist 1 +made 1 +maintain 1 +thorsten 1 +eicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..d0a38177 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,69 @@ +home 1 +page 1 +look 1 +admin 1 +handout 1 +inform 1 +incl 1 +offic 1 +hour 1 +lectur 1 +note 1 +assign 1 +recit 1 +ethic 1 +profession 1 +social 1 +respons 1 +mayb 1 +electron 1 +submiss 1 +procedur 1 +group 1 +perform 1 +evalu 1 +resourc 1 +quot 1 +stuff 1 +collect 1 +joke 1 +start 1 +submit 1 +sumedh 1 +break 1 +new 1 +mondai 1 +held 1 +csuglab 1 +floor 1 +upson 1 +misc 1 +convert 1 +text 1 +postscript 1 +peopl 1 +ask 1 +simpl 1 +unix 1 +program 1 +enscript 1 +suggest 1 +wai 1 +pfile 1 +file 1 +first 1 +print 1 +good 1 +sourc 1 +code 1 +second 1 +give 1 +nice 1 +header 1 +gener 1 +leav 1 +send 1 +printer 1 +come 1 +last 1 +modif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..037806d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,42 @@ +home 1 +pagec 1 +system 1 +program 1 +oper 1 +systemsc 1 +practicum 1 +systemkenneth 1 +birmanc 1 +new 1 +groupcours 1 +syllabuslectur 1 +note 1 +unix 1 +filesystem 1 +structur 1 +link 1 +static 1 +dynam 1 +assign 1 +solut 1 +prelim 1 +taslili 1 +upson 1 +hall 1 +phone 1 +mail 1 +lili 1 +cornel 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +cheng 1 +huang 1 +ychuang 1 +tuesdai 1 +thursdai 1 +mihai 1 +budiu 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..201ab3e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,135 @@ +home 1 +pagecsfound 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +welcom 1 +cours 1 +inform 1 +materi 1 +code 1 +academ 1 +integr 1 +pleas 1 +read 1 +announc 1 +clair 1 +date 1 +program 1 +move 1 +mondai 1 +atth 1 +begin 1 +class 1 +scott 1 +solut 1 +homework 1 +kevin 1 +assign 1 +notethat 1 +file 1 +rubix 1 +oper 1 +need 1 +load 1 +thefunct 1 +rearrang 1 +slightli 1 +also 1 +variabl 1 +previous 1 +defin 1 +constant 1 +chang 1 +paramet 1 +appeas 1 +compil 1 +var 1 +still 1 +bracket 1 +youus 1 +us 1 +machinesshould 1 +netscap 1 +find 1 +bind 1 +sbin 1 +found 1 +ksaunder 1 +sbinfor 1 +account 1 +seriou 1 +gremlin 1 +codefor 1 +abl 1 +look 1 +andget 1 +start 1 +planner 1 +success 1 +uponcomplet 1 +appli 1 +schema 1 +meanwhil 1 +hunt 1 +thoseus 1 +machin 1 +shouldn 1 +problem 1 +special 1 +offer 1 +limit 1 +time 1 +concern 1 +get 1 +zeroon 1 +third 1 +worri 1 +longer 1 +complet 1 +asspecifi 1 +page 1 +posit 1 +grade 1 +result 1 +guarante 1 +avail 1 +novemb 1 +oneassign 1 +group 1 +clarif 1 +unless 1 +otherwis 1 +specifi 1 +assum 1 +either 1 +system 1 +add 1 +fact 1 +dodg 1 +vanto 1 +queri 1 +alreadi 1 +exist 1 +postscript 1 +document 1 +modifi 1 +includ 1 +thisclarif 1 +newhomework 1 +coursemateri 1 +midterm 1 +portion 1 +statu 1 +report 1 +tuesdai 1 +thec 1 +section 1 +remind 1 +right 1 +pagesc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..2c3d578b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,34 @@ +fall 1 +home 1 +pagec 1 +automata 1 +comput 1 +theorywelcom 1 +click 1 +cours 1 +inform 1 +lectur 1 +note 1 +homework 1 +exam 1 +studi 1 +guideannounc 1 +avail 1 +hardcopi 1 +set 1 +offic 1 +hour 1 +incorrect 1 +date 1 +prelim 1 +revis 1 +erratum 1 +chang 1 +room 1 +nikolai 1 +hourscod 1 +academ 1 +integr 1 +pleas 1 +read 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..c35f5985 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,403 @@ +advanc 1 +databas 1 +system 1 +time 1 +tuesdai 1 +thursdai 1 +place 1 +upson 1 +survei 1 +propos 1 +project 1 +prelim 1 +exam 1 +paper 1 +evalu 1 +complet 1 +final 1 +result 1 +stat 1 +content 1 +lectur 1 +schedul 1 +samplequest 1 +answer 1 +outsidefirewal 1 +info 1 +predatordbm 1 +currentproject 1 +list 1 +refer 1 +materi 1 +handout 1 +note 1 +mail 1 +archiv 1 +cours 1 +descript 1 +prerequisit 1 +textbook 1 +inform 1 +grade 1 +professor 1 +teach 1 +assist 1 +coursedescript 1 +offer 1 +first 1 +fall 1 +intendedto 1 +give 1 +student 1 +solid 1 +background 1 +design 1 +develop 1 +databasemanag 1 +dbm 1 +possibl 1 +world 1 +slargest 1 +piec 1 +softwar 1 +certainli 1 +among 1 +valuabl 1 +piecesof 1 +sens 1 +giant 1 +applic 1 +program 1 +surprisingli 1 +mani 1 +principl 1 +behind 1 +industri 1 +grow 1 +thrive 1 +demand 1 +knowledgeabledatabas 1 +engin 1 +much 1 +greater 1 +suppli 1 +researchcommun 1 +also 1 +activ 1 +alwai 1 +problem 1 +addressedbecaus 1 +explos 1 +amount 1 +data 1 +peopl 1 +wish 1 +access 1 +thiscours 1 +form 1 +essenti 1 +anyon 1 +want 1 +becom 1 +asystem 1 +compani 1 +informedus 1 +research 1 +domain 1 +manipul 1 +larg 1 +find 1 +teller 1 +machin 1 +realli 1 +work 1 +number 1 +fundament 1 +concept 1 +cover 1 +although 1 +intend 1 +introductori 1 +newcours 1 +class 1 +differ 1 +consequ 1 +discuss 1 +variou 1 +topic 1 +begin 1 +quickreview 1 +basic 1 +taught 1 +click 1 +herefor 1 +tent 1 +term 1 +workload 1 +involv 1 +midterm 1 +examin 1 +test 1 +abreadth 1 +specif 1 +advancedtop 1 +thepurpos 1 +awar 1 +coursei 1 +fraction 1 +three 1 +weeksaft 1 +requireread 1 +journal 1 +confer 1 +proceed 1 +engineeringlibrari 1 +suggest 1 +initi 1 +pursueaddit 1 +forinform 1 +librari 1 +look 1 +written 1 +homework 1 +assign 1 +take 1 +turn 1 +write 1 +depend 1 +enrol 1 +mean 1 +person 1 +twice 1 +semest 1 +hopefulli 1 +addit 1 +us 1 +around 1 +examtim 1 +detail 1 +developmentproject 1 +import 1 +part 1 +involvea 1 +signific 1 +choos 1 +whether 1 +wishto 1 +alon 1 +team 1 +howev 1 +willinvolv 1 +proportion 1 +goal 1 +fold 1 +hand 1 +experi 1 +build 1 +compon 1 +comfort 1 +exist 1 +code 1 +base 1 +andmodifi 1 +modular 1 +manner 1 +second 1 +thefirst 1 +huge 1 +andrar 1 +luxuri 1 +start 1 +scratch 1 +forc 1 +youto 1 +understand 1 +interact 1 +thediffer 1 +inevit 1 +bug 1 +appear 1 +prototyp 1 +minibas 1 +associ 1 +simpl 1 +singl 1 +user 1 +provid 1 +fromth 1 +parser 1 +disk 1 +manag 1 +avail 1 +abl 1 +compil 1 +comput 1 +environ 1 +interfac 1 +varioussystem 1 +actual 1 +come 1 +like 1 +buffermanag 1 +predat 1 +queri 1 +process 1 +enginethat 1 +possibleproject 1 +could 1 +lead 1 +builton 1 +focu 1 +high 1 +function 1 +likecomplex 1 +type 1 +familiar 1 +recommend 1 +becauseth 1 +need 1 +minim 1 +think 1 +interestedin 1 +ifyou 1 +neither 1 +categori 1 +decid 1 +aproject 1 +lower 1 +level 1 +storag 1 +buffer 1 +area 1 +thehigh 1 +optim 1 +betweenminibas 1 +higher 1 +somegener 1 +ideaon 1 +suitabl 1 +talk 1 +well 1 +advanceof 1 +date 1 +certain 1 +step 1 +follow 1 +submitan 1 +order 1 +produc 1 +review 1 +meet 1 +discussth 1 +progress 1 +made 1 +toward 1 +must 1 +convent 1 +particularsystem 1 +documentwil 1 +close 1 +someth 1 +thati 1 +picki 1 +contribut 1 +geton 1 +submiss 1 +includ 1 +demo 1 +reason 1 +oftest 1 +home 1 +page 1 +homepag 1 +coursetextbook 1 +primari 1 +text 1 +beta 1 +edit 1 +book 1 +raghu 1 +ramakrishnan 1 +bookcontain 1 +databasebook 1 +free 1 +instruct 1 +databasesystem 1 +might 1 +thecampu 1 +store 1 +korth 1 +silberschatz 1 +mcgraw 1 +hill 1 +secondedit 1 +standard 1 +lack 1 +tobe 1 +graduat 1 +michael 1 +stonebrak 1 +read 1 +morgan 1 +kaufmann 1 +collect 1 +rel 1 +recent 1 +collectedand 1 +introduc 1 +ingr 1 +postgr 1 +andillustra 1 +corearea 1 +elmasri 1 +navath 1 +benjamin 1 +cum 1 +altern 1 +grai 1 +reuter 1 +transact 1 +techniqu 1 +bibl 1 +long 1 +tellsyou 1 +know 1 +wonderfulrefer 1 +clear 1 +confus 1 +aspect 1 +concurr 1 +control 1 +recoveri 1 +semant 1 +resourc 1 +tutori 1 +languag 1 +construct 1 +debuggingwith 1 +make 1 +gradingpolici 1 +percentag 1 +even 1 +finish 1 +anextra 1 +half 1 +hour 1 +likewis 1 +thefin 1 +period 1 +thur 1 +confirm 1 +willfocu 1 +coveredin 1 +earlier 1 +question 1 +professorpraveen 1 +seshadri 1 +offic 1 +phone 1 +praveen 1 +teachingassist 1 +weitsang 1 +hall 1 +noon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..0d223e5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,183 @@ +semant 1 +program 1 +languag 1 +content 1 +descript 1 +text 1 +prerequisiteshandoutsscrib 1 +lectur 1 +noteshomework 1 +assignmentscontact 1 +informationrelev 1 +link 1 +upson 1 +though 1 +call 1 +advanc 1 +cours 1 +book 1 +better 1 +entitl 1 +goalof 1 +conduct 1 +broad 1 +survei 1 +tech 1 +programminglanguag 1 +like 1 +java 1 +directli 1 +studi 1 +implement 1 +mechan 1 +compress 1 +dispatch 1 +tabl 1 +multipleinherit 1 +rather 1 +goal 1 +principlesof 1 +formal 1 +notat 1 +describ 1 +comput 1 +tool 1 +analyz 1 +prove 1 +properti 1 +concern 1 +subsum 1 +thestudi 1 +specif 1 +henc 1 +lead 1 +deeper 1 +understand 1 +logic 1 +mathemat 1 +proof 1 +theori 1 +exampl 1 +abstractli 1 +specifi 1 +howprogram 1 +oper 1 +well 1 +asnot 1 +denot 1 +turn 1 +abstract 1 +preciser 1 +allow 1 +techniqu 1 +induct 1 +relat 1 +forform 1 +interest 1 +relev 1 +type 1 +safeti 1 +compil 1 +correct 1 +ideal 1 +student 1 +come 1 +learn 1 +somethingabout 1 +make 1 +inform 1 +concept 1 +precis 1 +tomanipul 1 +demonstr 1 +us 1 +textbook 1 +carl 1 +gunter 1 +work 1 +programm 1 +second 1 +edit 1 +larri 1 +paulson 1 +prerequisit 1 +side 1 +assum 1 +experi 1 +least 1 +pascal 1 +prefer 1 +knowledg 1 +witha 1 +function 1 +scheme 1 +haskel 1 +theoret 1 +basic 1 +profici 1 +undergraduatemathemat 1 +scienc 1 +ture 1 +machin 1 +recurs 1 +andlog 1 +predic 1 +calculu 1 +mathematicalmatur 1 +requir 1 +design 1 +math 1 +meng 1 +undergradu 1 +anmeng 1 +must 1 +talk 1 +instructor 1 +find 1 +ifth 1 +suitabl 1 +contact 1 +newsgroup 1 +cornel 1 +class 1 +greg 1 +morrisett 1 +offic 1 +hour 1 +appoint 1 +admin 1 +assist 1 +linda 1 +competillo 1 +lfar 1 +erlingsson 1 +ulfar 1 +pmrelev 1 +mark 1 +leon 1 +resourc 1 +research 1 +emac 1 +mode 1 +comint 1 +need 1 +project 1 +line 1 +standard 1 +refer 1 +postscript 1 +user 1 +guid 1 +base 1 +environ 1 +system 1 +librari 1 +document 1 +avail 1 +indexdocument 1 +toolsa 1 +gentl 1 +introduct 1 +andrew 1 +cum 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..c867f5a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,29 @@ +home 1 +pagecsmultimedia 1 +systemscomput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +final 1 +project 1 +present 1 +schedul 1 +cours 1 +staff 1 +info 1 +materi 1 +student 1 +page 1 +us 1 +link 1 +newsgroup 1 +anounc 1 +access 1 +rivl 1 +bugcom 1 +question 1 +send 1 +mail 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..8456119b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,26 @@ +home 1 +pagecsmultimedia 1 +systemscomput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +cours 1 +staff 1 +info 1 +materi 1 +student 1 +page 1 +project 1 +us 1 +link 1 +newsgroup 1 +anounc 1 +access 1 +rivl 1 +bugcom 1 +question 1 +send 1 +mail 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..8e35a0b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,44 @@ +design 1 +analysi 1 +algorithm 1 +homepag 1 +instructor 1 +ronitt 1 +rubinfeld 1 +evan 1 +moran 1 +time 1 +locat 1 +upson 1 +text 1 +kozen 1 +springer 1 +verlag 1 +handout 1 +cours 1 +announc 1 +syllabu 1 +homework 1 +last 1 +modifi 1 +addendum 1 +copi 1 +solut 1 +exam 1 +thursdai 1 +inupson 1 +talk 1 +tome 1 +reschedul 1 +cannot 1 +make 1 +refer 1 +cheat 1 +sheet 1 +class 1 +note 1 +rajeev 1 +motwani 1 +lectur 1 +approxim 1 +paper 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..b163d663 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,146 @@ +home 1 +pagefronti 1 +parallel 1 +system 1 +thorsten 1 +eickenfal 1 +locat 1 +upson 1 +pmoffic 1 +hour 1 +pmcours 1 +descriptionparallel 1 +machin 1 +stai 1 +underscor 1 +fact 1 +manufactur 1 +offer 1 +multiprocessor 1 +product 1 +line 1 +howev 1 +debat 1 +futur 1 +look 1 +like 1 +heat 1 +consider 1 +past 1 +month 1 +feder 1 +spend 1 +cut 1 +erad 1 +perform 1 +price 1 +massiv 1 +processor 1 +competitor 1 +glorifi 1 +workstat 1 +farm 1 +smile 1 +cannot 1 +level 1 +eas 1 +busi 1 +suffer 1 +much 1 +competit 1 +leverag 1 +latest 1 +microprocessor 1 +develop 1 +quickli 1 +core 1 +technolog 1 +larg 1 +number 1 +issu 1 +integr 1 +shelf 1 +cost 1 +effect 1 +easili 1 +program 1 +high 1 +languag 1 +host 1 +vari 1 +applic 1 +workload 1 +cours 1 +although 1 +topic 1 +first 1 +week 1 +algorithm 1 +architectur 1 +matur 1 +last 1 +year 1 +point 1 +almost 1 +usabl 1 +support 1 +adequ 1 +allow 1 +gener 1 +purpos 1 +focu 1 +oper 1 +aspect 1 +requir 1 +featur 1 +taken 1 +grant 1 +sequenti 1 +comput 1 +portabl 1 +power 1 +debugg 1 +multi 1 +user 1 +access 1 +virtual 1 +memori 1 +fast 1 +part 1 +examin 1 +complet 1 +split 1 +network 1 +us 1 +vertic 1 +approach 1 +studi 1 +interact 1 +model 1 +associ 1 +execut 1 +hardwar 1 +implement 1 +focuss 1 +layer 1 +second 1 +specif 1 +slice 1 +horizont 1 +across 1 +select 1 +analysi 1 +design 1 +altern 1 +depth 1 +dash 1 +provid 1 +share 1 +contrast 1 +materialscours 1 +formatlectur 1 +note 1 +problem 1 +set 1 +term 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..07042adf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,116 @@ +home 1 +pageintroduct 1 +digit 1 +system 1 +comput 1 +organ 1 +thorsten 1 +eickenfal 1 +kimbal 1 +btopic 1 +includ 1 +representationof 1 +inform 1 +machin 1 +assembl 1 +languag 1 +processor 1 +interrupt 1 +memori 1 +hierarchi 1 +combinatori 1 +sequentialcircuit 1 +data 1 +path 1 +control 1 +unit 1 +design 1 +andmicroprogram 1 +helpif 1 +problem 1 +relat 1 +lectur 1 +homework 1 +aproject 1 +best 1 +help 1 +annot 1 +theappropri 1 +point 1 +cours 1 +materi 1 +allow 1 +gethelp 1 +staff 1 +also 1 +class 1 +mate 1 +otherwis 1 +send 1 +email 1 +cornel 1 +talk 1 +toon 1 +consult 1 +informationcoursemateri 1 +announcementsannounc 1 +note 1 +video 1 +assign 1 +part 1 +us 1 +conot 1 +document 1 +small 1 +get 1 +start 1 +avail 1 +case 1 +never 1 +tutori 1 +onlinean 1 +introduct 1 +marshal 1 +brain 1 +great 1 +forpeopl 1 +know 1 +procedur 1 +like 1 +pascal 1 +fortran 1 +program 1 +david 1 +cclass 1 +cardiff 1 +univers 1 +lot 1 +exampl 1 +learnc 1 +todai 1 +guid 1 +book 1 +programsand 1 +onlin 1 +refer 1 +well 1 +theyahoo 1 +page 1 +wish 1 +surf 1 +search 1 +ofmor 1 +place 1 +frequent 1 +ask 1 +question 1 +inansw 1 +common 1 +come 1 +learn 1 +contain 1 +link 1 +sever 1 +newsgroup 1 +maintain 1 +voneicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..1a7274c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,36 @@ +home 1 +pagecsfound 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +welcom 1 +cours 1 +inform 1 +materi 1 +code 1 +academ 1 +integr 1 +pleas 1 +read 1 +announc 1 +clair 1 +final 1 +grade 1 +avail 1 +sometim 1 +saturdai 1 +send 1 +yourgrad 1 +mail 1 +request 1 +exam 1 +upson 1 +altern 1 +date 1 +pagesc 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..ce724d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,43 @@ +home 1 +pagec 1 +softwar 1 +engin 1 +technolog 1 +techniquescomput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +cours 1 +staff 1 +samuel 1 +weber 1 +professor 1 +upson 1 +offic 1 +hour 1 +vineet 1 +buch 1 +thursdai 1 +yaron 1 +minski 1 +none 1 +materi 1 +overview 1 +handout 1 +lectur 1 +note 1 +recit 1 +line 1 +resourc 1 +assign 1 +grade 1 +remark 1 +stuff 1 +frequent 1 +ask 1 +question 1 +borland 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..31eae593 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,31 @@ +home 1 +page 1 +brian 1 +smith 1 +tour 1 +guid 1 +cours 1 +inform 1 +homework 1 +assign 1 +project 1 +spec 1 +lectur 1 +tabl 1 +content 1 +postscript 1 +slide 1 +introduct 1 +comput 1 +system 1 +organ 1 +program 1 +procedur 1 +recurs 1 +stack 1 +assembl 1 +linker 1 +loader 1 +interrupt 1 +logic 1 +design 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..6a3dea14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,43 @@ +home 1 +page 1 +spring 1 +messag 1 +welcom 1 +rememb 1 +check 1 +frequentlyfor 1 +import 1 +inform 1 +regard 1 +cours 1 +prelim 1 +tuesdai 1 +april 1 +review 1 +session 1 +held 1 +onsundai 1 +baker 1 +instructor 1 +teach 1 +assist 1 +offic 1 +hour 1 +get 1 +materi 1 +theworld 1 +wide 1 +codewarrior 1 +personalmac 1 +program 1 +lectur 1 +exam 1 +tue 1 +februari 1 +thur 1 +march 1 +final 1 +last 1 +updat 1 +pierc 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..ca654e91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,260 @@ +spring 1 +home 1 +page 1 +comput 1 +program 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +question 1 +problem 1 +email 1 +jeff 1 +foster 1 +jfoster 1 +troubl 1 +view 1 +tabl 1 +us 1 +earlyvers 1 +netscap 1 +contest 1 +first 1 +prelim 1 +thursdai 1 +march 1 +announcetim 1 +place 1 +soon 1 +topic 1 +cover 1 +theprelim 1 +second 1 +tuesdai 1 +april 1 +room 1 +wereannounc 1 +lectur 1 +thesecond 1 +final 1 +exam 1 +mondai 1 +noon 1 +olin 1 +lastnam 1 +last 1 +name 1 +covereveryth 1 +cours 1 +addit 1 +topicsconv 1 +offic 1 +hour 1 +daywhenwherewhomondai 1 +upson 1 +davetuesdai 1 +jeffwednesdai 1 +davethursdai 1 +halfridai 1 +halsaturdai 1 +breview 1 +session 1 +consult 1 +chri 1 +jose 1 +thank 1 +chrisand 1 +take 1 +time 1 +studi 1 +descript 1 +staff 1 +note 1 +handout 1 +code 1 +sampl 1 +gofer 1 +enhanc 1 +server 1 +also 1 +engrd 1 +fall 1 +summer 1 +credit 1 +grant 1 +bothcom 1 +prerequisit 1 +equival 1 +programmingexperi 1 +intermedi 1 +high 1 +level 1 +languag 1 +introduct 1 +tocomput 1 +includ 1 +develop 1 +proof 1 +ofprogram 1 +correct 1 +structur 1 +recurs 1 +abstract 1 +datatyp 1 +object 1 +orient 1 +data 1 +analysi 1 +ofalgorithm 1 +princip 1 +instructor 1 +perkin 1 +appoint 1 +teach 1 +assist 1 +held 1 +alan 1 +kwan 1 +david 1 +walker 1 +section 1 +sectionsdaytimeroominstructortuesdai 1 +ahal 1 +perkinstuesdai 1 +perkinswednesdai 1 +hollist 1 +walkerwednesdai 1 +walkerthursdai 1 +fosterfridai 1 +regular 1 +schedul 1 +effect 1 +ofclass 1 +sundai 1 +fridai 1 +consultingsundaymondaytuesdaywednesdaythursdayfridai 1 +steveerickylechrisjpkyl 1 +steveerickylechrisjpvasantha 1 +josejosekayjosejpvasantha 1 +josejosekayjosejp 1 +none 1 +kaykylesteveericvasantha 1 +danerickaychrisdan 1 +avail 1 +three 1 +format 1 +binhqx 1 +macbinari 1 +file 1 +contain 1 +microsoft 1 +word 1 +rich 1 +text 1 +parseabl 1 +other 1 +plain 1 +date 1 +list 1 +next 1 +waspost 1 +given 1 +macintosh 1 +preliminari 1 +basic 1 +class 1 +pointer 1 +arrai 1 +dynam 1 +storag 1 +alloc 1 +dynamicdata 1 +fine 1 +point 1 +deriv 1 +algorithm 1 +notat 1 +tripl 1 +assign 1 +condit 1 +loop 1 +prove 1 +function 1 +type 1 +curri 1 +filter 1 +iter 1 +applic 1 +architectur 1 +framework 1 +link 1 +binari 1 +tree 1 +industri 1 +strength 1 +java 1 +process 1 +stuffit 1 +expand 1 +http 1 +address 1 +foraladdin 1 +system 1 +find 1 +window 1 +version 1 +armandonunez 1 +postscript 1 +print 1 +almost 1 +anylas 1 +printer 1 +want 1 +need 1 +applicationlik 1 +ghostview 1 +codewarrior 1 +intro 1 +cell 1 +complex 1 +set 1 +charact 1 +simpl 1 +dictionari 1 +flavor 1 +macgof 1 +inth 1 +public 1 +lab 1 +unix 1 +site 1 +ishaskel 1 +systemsz 1 +yale 1 +haskel 1 +pleas 1 +help 1 +piec 1 +ofgof 1 +think 1 +gener 1 +interest 1 +know 1 +make 1 +itavail 1 +sourc 1 +manual 1 +onth 1 +project 1 +line 1 +csdepart 1 +enhance_assign 1 +cuinfo 1 +metrowerk 1 +homepag 1 +aladdin 1 +maker 1 +comment 1 +suggest 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..a815b3ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,32 @@ +home 1 +pagec 1 +structur 1 +interpretationof 1 +comput 1 +programscomput 1 +scienc 1 +departmentcornel 1 +universityspr 1 +cours 1 +staff 1 +info 1 +materi 1 +announc 1 +emac 1 +macmarlai 1 +demo 1 +section 1 +room 1 +chang 1 +prelim 1 +time 1 +place 1 +make 1 +grader 1 +happi 1 +gener 1 +exam 1 +extens 1 +date 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..372b08e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,92 @@ +advanc 1 +languag 1 +implement 1 +content 1 +descriptionhandoutsadministriviaweb 1 +link 1 +descript 1 +modern 1 +program 1 +java 1 +haskel 1 +dylan 1 +provid 1 +high 1 +level 1 +featur 1 +object 1 +closur 1 +polymorph 1 +abstract 1 +data 1 +type 1 +class 1 +garbag 1 +collect 1 +except 1 +continu 1 +thread 1 +synchron 1 +construct 1 +survei 1 +techniqu 1 +effici 1 +implementationof 1 +focu 1 +ofmodern 1 +function 1 +make 1 +connectionsto 1 +kind 1 +notabl 1 +orient 1 +handout 1 +separ 1 +page 1 +administrivia 1 +instructor 1 +greg 1 +morrisett 1 +offic 1 +upson 1 +email 1 +cornel 1 +phone 1 +admin 1 +assist 1 +linda 1 +competillo 1 +hour 1 +send 1 +appoint 1 +evan 1 +moran 1 +tuesdai 1 +thursdai 1 +pmweb 1 +mark 1 +leon 1 +resourc 1 +research 1 +project 1 +line 1 +inform 1 +standard 1 +refer 1 +postscript 1 +user 1 +guid 1 +base 1 +environ 1 +system 1 +librari 1 +document 1 +avail 1 +tool 1 +indexdocument 1 +toolsa 1 +gentl 1 +introduct 1 +andrew 1 +cum 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..c9360659 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,24 @@ +practic 1 +distribut 1 +systemspract 1 +system 1 +registr 1 +necessari 1 +student 1 +takingc 1 +gener 1 +informationcours 1 +overview 1 +logist 1 +read 1 +homeworkshomework 1 +homework 1 +postscript 1 +amexaminationsmidterm 1 +examin 1 +final 1 +annot 1 +bibliographiesselect 1 +bibliographi 1 +prepar 1 +class 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..42913e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,166 @@ +home 1 +pagehigh 1 +perform 1 +comput 1 +system 1 +thorsten 1 +eickenspr 1 +poster 1 +sessionthu 1 +upson 1 +tbdpleas 1 +sign 1 +session 1 +outsid 1 +willdetermin 1 +present 1 +order 1 +begin 1 +postersess 1 +pleas 1 +arriv 1 +late 1 +instruct 1 +pick 1 +board 1 +cindywilliam 1 +limit 1 +hold 1 +ithorizont 1 +hang 1 +corridor 1 +minut 1 +presentyour 1 +plu 1 +question 1 +give 1 +everyon 1 +asens 1 +problem 1 +attack 1 +solut 1 +contempl 1 +andth 1 +result 1 +gotten 1 +learn 1 +presentationswil 1 +judg 1 +well 1 +messag 1 +across 1 +everi 1 +memberof 1 +group 1 +particip 1 +nativespeak 1 +difficulti 1 +taken 1 +consider 1 +final 1 +report 1 +info 1 +cours 1 +current 1 +mondai 1 +noon 1 +absolut 1 +page 1 +must 1 +subdirectori 1 +willb 1 +import 1 +contribut 1 +project 1 +thelongest 1 +last 1 +remain 1 +server 1 +year 1 +tocom 1 +mani 1 +peopl 1 +find 1 +search 1 +engin 1 +finalreport 1 +start 1 +usual 1 +introduct 1 +aretri 1 +solv 1 +follow 1 +thorough 1 +discuss 1 +trade 1 +off 1 +part 1 +need 1 +explain 1 +chose 1 +thesolut 1 +option 1 +consid 1 +youreject 1 +futur 1 +webread 1 +convic 1 +bestsolut 1 +showcas 1 +work 1 +us 1 +ampl 1 +experiment 1 +data 1 +goodexplan 1 +exactli 1 +measur 1 +know 1 +whatyou 1 +think 1 +shown 1 +left 1 +open 1 +projectsproject 1 +reportsproject 1 +proposalsiniti 1 +ideascours 1 +materialshomework 1 +homework 1 +check 1 +cuc 1 +pagebefor 1 +split 1 +machin 1 +might 1 +also 1 +sampl 1 +program 1 +introc 1 +casec 1 +technologyc 1 +cachesc 1 +netsc 1 +spc 1 +cyou 1 +inform 1 +paper 1 +parallel 1 +programmingin 1 +emdc 1 +sortingc 1 +spamc 1 +msgpassc 1 +mpic 1 +cachecohc 1 +locksc 1 +threadsc 1 +atmc 1 +netc 1 +scoreboardc 1 +tomasuloc 1 +predc 1 +superscalarc 1 +busesc 1 +pentiummaintain 1 +eicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..e4079dd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,74 @@ +machin 1 +vision 1 +cours 1 +staff 1 +instructor 1 +ramin 1 +zabihteach 1 +assist 1 +justin 1 +millerclass 1 +time 1 +place 1 +phillip 1 +project 1 +suggestionsproblem 1 +set 1 +problem 1 +class 1 +note 1 +scribe 1 +week 1 +januari 1 +regular 1 +simul 1 +anneal 1 +februari 1 +comput 1 +motion 1 +calculu 1 +variat 1 +maximum 1 +likelihood 1 +estim 1 +markov 1 +random 1 +field 1 +snake 1 +stereo 1 +introduct 1 +correl 1 +mestim 1 +march 1 +parametr 1 +method 1 +guest 1 +lectur 1 +transform 1 +cont 1 +censu 1 +geometri 1 +april 1 +geometr 1 +segment 1 +track 1 +edg 1 +detect 1 +continu 1 +model 1 +base 1 +hausdorff 1 +distanc 1 +eigenhausdorff 1 +face 1 +recognitionsect 1 +optic 1 +flow 1 +constraint 1 +equationoth 1 +sourc 1 +home 1 +page 1 +histori 1 +object 1 +recognit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..5da5b64d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,73 @@ +home 1 +pagecsintroduct 1 +natur 1 +languag 1 +understandingcomput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +spring 1 +welcom 1 +cours 1 +inform 1 +materi 1 +code 1 +academ 1 +integr 1 +pleas 1 +read 1 +announcementsher 1 +list 1 +resourc 1 +avail 1 +project 1 +brill 1 +directori 1 +part 1 +speech 1 +taggerbrown 1 +brown 1 +corpu 1 +tag 1 +small 1 +annot 1 +withpart 1 +text 1 +execut 1 +wordnet 1 +sure 1 +environ 1 +variabl 1 +wnsearchdir 1 +archiv 1 +dict 1 +final 1 +site 1 +contain 1 +descript 1 +ofth 1 +content 1 +penn 1 +treebank 1 +iicollect 1 +canus 1 +like 1 +talk 1 +francisabout 1 +access 1 +us 1 +databas 1 +recent 1 +paper 1 +computationallinguist 1 +repositori 1 +pointer 1 +variou 1 +system 1 +compon 1 +present 1 +schedulewhat 1 +turn 1 +pagesc 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..7037b3cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,36 @@ +cours 1 +home 1 +page 1 +info 1 +syllabu 1 +lab 1 +link 1 +announcementsroom 1 +updat 1 +unforseen 1 +circumst 1 +still 1 +unableto 1 +upson 1 +class 1 +therefor 1 +follow 1 +room 1 +assign 1 +tuesdai 1 +wednesdai 1 +section 1 +philip 1 +thursdai 1 +meet 1 +maclab 1 +usual 1 +prelim 1 +first 1 +close 1 +book 1 +exam 1 +cover 1 +materialcov 1 +need 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..5a9c8faa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,38 @@ +fall 1 +professor 1 +monika 1 +rauch 1 +henzingeremail 1 +cornel 1 +cours 1 +informationhomework 1 +solut 1 +lectur 1 +graph 1 +explor 1 +greedi 1 +algorithm 1 +matroid 1 +dijkstra 1 +bellman 1 +ford 1 +matrix 1 +closur 1 +binomi 1 +heap 1 +fibonacci 1 +treap 1 +randomizedsearch 1 +tree 1 +union 1 +find 1 +maxflow 1 +mincut 1 +theorem 1 +edmond 1 +karp 1 +dinitz 1 +preflow 1 +push 1 +dynam 1 +implement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..f3d8b497 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,212 @@ +introduct 1 +scientif 1 +comput 1 +computationsumm 1 +class 1 +inform 1 +syllabu 1 +sourc 1 +code 1 +handout 1 +problem 1 +setsan 1 +elementari 1 +numer 1 +analysi 1 +scientificcomput 1 +topic 1 +includ 1 +interpol 1 +quadratur 1 +linear 1 +andnonlinear 1 +equat 1 +solv 1 +least 1 +squar 1 +fit 1 +ordinarydifferenti 1 +matlab 1 +environ 1 +us 1 +vector 1 +effici 1 +reliabl 1 +stabil 1 +stress 1 +informationstaff 1 +niko 1 +pitsiani 1 +instructor 1 +offic 1 +upson 1 +hall 1 +cornel 1 +hour 1 +time 1 +appoint 1 +ozan 1 +hafizogullari 1 +teach 1 +assist 1 +lecturesclass 1 +meet 1 +everi 1 +cours 1 +administrationlauri 1 +buck 1 +question 1 +concern 1 +grade 1 +record 1 +account 1 +addressedto 1 +administr 1 +prerequisitesc 1 +corequisit 1 +math 1 +materialstext 1 +matrix 1 +approachus 1 +charl 1 +loan 1 +distribut 1 +softwar 1 +purchas 1 +student 1 +eitherth 1 +macintosh 1 +version 1 +though 1 +labsthi 1 +design 1 +three 1 +lab 1 +siblei 1 +martha 1 +renssela 1 +setsther 1 +assign 1 +hand 1 +lectur 1 +orfrom 1 +page 1 +extra 1 +avail 1 +rack 1 +outsid 1 +collect 1 +computingproblem 1 +done 1 +return 1 +work 1 +behandl 1 +begin 1 +duedat 1 +late 1 +accept 1 +credit 1 +worst 1 +gradefrom 1 +ignor 1 +final 1 +alon 1 +partner 1 +printyour 1 +name 1 +copi 1 +pair 1 +firstpag 1 +chang 1 +addit 1 +partnernam 1 +examsther 1 +midterm 1 +exam 1 +dai 1 +list 1 +gradingyour 1 +total 1 +score 1 +follow 1 +best 1 +beassign 1 +accord 1 +rel 1 +rank 1 +base 1 +onyour 1 +calendar 1 +june 1 +program 1 +error 1 +float 1 +point 1 +number 1 +registr 1 +deadlin 1 +polynomi 1 +juli 1 +vandermond 1 +newton 1 +piecewis 1 +cubic 1 +hermit 1 +spline 1 +integr 1 +cote 1 +composit 1 +rule 1 +adapt 1 +review 1 +drop 1 +classroom 1 +matric 1 +oper 1 +system 1 +given 1 +choleski 1 +find 1 +root 1 +minim 1 +function 1 +variabl 1 +multivari 1 +initi 1 +valu 1 +euler 1 +backward 1 +rung 1 +kutta 1 +method 1 +adam 1 +exampl 1 +computingat 1 +rennselaerhal 1 +locat 1 +folder 1 +applic 1 +chapter 1 +plan 1 +stand 1 +otherthan 1 +on 1 +scmv 1 +file 1 +unix 1 +uncompress 1 +untar 1 +command 1 +zcat 1 +highli 1 +recommend 1 +brows 1 +session 1 +need 1 +postscript 1 +viewer 1 +instal 1 +order 1 +randperm 1 +length 1 +set 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..e827388e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,182 @@ +summer 1 +home 1 +page 1 +system 1 +program 1 +oper 1 +prereq 1 +permiss 1 +instructor 1 +indupraka 1 +kodukula 1 +praka 1 +cornel 1 +teach 1 +assist 1 +nawaaz 1 +ahm 1 +motd 1 +archiv 1 +subject 1 +descript 1 +prerequsit 1 +cours 1 +outlin 1 +textbook 1 +schedul 1 +quizz 1 +grade 1 +polici 1 +statement 1 +collabor 1 +offic 1 +hour 1 +materi 1 +send 1 +comment 1 +anintroduct 1 +logic 1 +design 1 +emphasison 1 +multiprogram 1 +topic 1 +includ 1 +processsynchron 1 +deadlock 1 +memori 1 +manag 1 +input 1 +output 1 +method 1 +inform 1 +share 1 +protect 1 +secur 1 +file 1 +theimpact 1 +network 1 +distribut 1 +comput 1 +environ 1 +operatingsystem 1 +also 1 +discuss 1 +fast 1 +pace 1 +requiringconst 1 +attent 1 +prerequsitescomplet 1 +familiar 1 +assum 1 +inparticular 1 +knowledg 1 +architectur 1 +assembl 1 +programminglanguag 1 +structur 1 +requir 1 +cover 1 +theintroductori 1 +class 1 +purpos 1 +thatwil 1 +remind 1 +audienc 1 +outlineth 1 +organ 1 +roughli 1 +follow 1 +depend 1 +feedback 1 +chang 1 +theorder 1 +content 1 +particular 1 +section 1 +start 1 +overview 1 +concurr 1 +issu 1 +lldiscuss 1 +synchron 1 +ensur 1 +mutualexclus 1 +detect 1 +prevent 1 +algorithm 1 +multiprocessor 1 +well 1 +next 1 +memorymanag 1 +virtual 1 +variou 1 +usedto 1 +implement 1 +segment 1 +final 1 +look 1 +evolut 1 +thetradit 1 +micro 1 +kernel 1 +timepermit 1 +lectur 1 +advanc 1 +multithread 1 +serverless 1 +textbooksth 1 +princip 1 +text 1 +book 1 +conceptsbook 1 +abraham 1 +silberschatz 1 +peter 1 +galvin 1 +distributeclass 1 +note 1 +complet 1 +noteswil 1 +avail 1 +world 1 +wide 1 +pageat 1 +meet 1 +mondaythru 1 +thursdai 1 +week 1 +first 1 +second 1 +addit 1 +weekli 1 +assign 1 +thesewil 1 +hand 1 +thursdayat 1 +gradingeach 1 +homework 1 +carri 1 +weightag 1 +combinedweightag 1 +worth 1 +twomidterm 1 +surpris 1 +todetermin 1 +understand 1 +collaborationat 1 +peopl 1 +form 1 +group 1 +eachhomework 1 +need 1 +submit 1 +copi 1 +thehomework 1 +close 1 +closednot 1 +mondai 1 +tuesdai 1 +wednesdai 1 +upson 1 +maintain 1 +induprakaskodukula 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..860d18e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,41 @@ +csc 1 +advanc 1 +program 1 +languagesfal 1 +upson 1 +instructor 1 +henzingerupson 1 +cornel 1 +offic 1 +hour 1 +class 1 +appoint 1 +teach 1 +assist 1 +neal 1 +glewupson 1 +glew 1 +handoutshandout 1 +septemb 1 +cours 1 +informationhandout 1 +get 1 +start 1 +mlhandout 1 +octob 1 +meta 1 +lambdahomeworkshomework 1 +solut 1 +homework 1 +novemb 1 +grieshomework 1 +decemb 1 +notesraw 1 +note 1 +scribe 1 +noteslectur 1 +lectur 1 +introduct 1 +mllectur 1 +midterm 1 +grieslectur 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..0c99f208 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,29 @@ +fall 1 +advanc 1 +program 1 +languag 1 +faculti 1 +prof 1 +robert 1 +constabl 1 +upson 1 +offic 1 +hour 1 +mondai 1 +teach 1 +assist 1 +ravi 1 +kumar 1 +thur 1 +note 1 +assign 1 +nuprl 1 +classic 1 +comment 1 +question 1 +suggest 1 +page 1 +pleas 1 +mail 1 +pavel 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..9f6ac0b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,14 @@ +coursesc 1 +cours 1 +fall 1 +spring 1 +addit 1 +inform 1 +maintain 1 +individualfaculti 1 +member 1 +consult 1 +class 1 +page 1 +contactgloria 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..be927eae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,33 @@ +analysi 1 +program 1 +fall 1 +instructor 1 +vicki 1 +almstrum 1 +linyuan 1 +yang 1 +syllabu 1 +announc 1 +homework 1 +assign 1 +handout 1 +interest 1 +tutori 1 +new 1 +utexa 1 +class 1 +homepag 1 +last 1 +updat 1 +page 1 +prepar 1 +suggest 1 +comment 1 +welcom 1 +click 1 +send 1 +mail 1 +depart 1 +comput 1 +scienc 1 +austin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..257795bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,114 @@ +spring 1 +foundat 1 +ofmathemat 1 +taylor 1 +cours 1 +blurb 1 +mani 1 +approach 1 +formal 1 +reason 1 +theobject 1 +specifi 1 +comput 1 +program 1 +includ 1 +formalizationof 1 +world 1 +interact 1 +creationof 1 +numer 1 +tool 1 +examin 1 +systemsfor 1 +number 1 +mechan 1 +formalmethod 1 +support 1 +differ 1 +system 1 +exampl 1 +suchsystem 1 +pair 1 +primit 1 +recurs 1 +arithmet 1 +boyer 1 +moor 1 +prover 1 +first 1 +order 1 +logic 1 +otter 1 +nelson 1 +higher 1 +imp 1 +equat 1 +theori 1 +mizar 1 +quaif 1 +type 1 +nuprl 1 +lego 1 +coqstud 1 +choos 1 +help 1 +instructor 1 +ortool 1 +grade 1 +base 1 +upon 1 +present 1 +aboutthes 1 +projecthtml 1 +version 1 +theqe 1 +manifestoplain 1 +text 1 +qedmanifestobowen 1 +method 1 +page 1 +backup 1 +copi 1 +chief 1 +assign 1 +select 1 +bowen 1 +report 1 +class 1 +oral 1 +either 1 +good 1 +freeli 1 +avail 1 +implement 1 +consult 1 +make 1 +final 1 +choic 1 +test 1 +hope 1 +guest 1 +localform 1 +commun 1 +tent 1 +schedul 1 +april 1 +rick 1 +tannei 1 +continu 1 +trevor 1 +hick 1 +ruben 1 +gamboa 1 +squar 1 +root 1 +samuel 1 +guyer 1 +circal 1 +process 1 +algebra 1 +sawada 1 +russel 1 +turpin 1 +galoi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..e1306062 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,151 @@ +advanc 1 +comput 1 +architectur 1 +architecturethi 1 +cours 1 +focus 1 +techniqu 1 +quantit 1 +analysi 1 +evaluationof 1 +modern 1 +system 1 +select 1 +appropri 1 +benchmarksto 1 +reveal 1 +compar 1 +perform 1 +altern 1 +design 1 +choic 1 +insystem 1 +emphasi 1 +major 1 +compon 1 +subsystem 1 +highperform 1 +pipelin 1 +instruct 1 +level 1 +parallel 1 +memoryhierarchi 1 +input 1 +output 1 +network 1 +orient 1 +interconnect 1 +studentswil 1 +undertak 1 +project 1 +oftheir 1 +choos 1 +administr 1 +informationuniqu 1 +number 1 +meet 1 +place 1 +instructor 1 +mikedahlinoffic 1 +hour 1 +appoint 1 +tbdtaoffic 1 +tbdreadingstextbook 1 +hennessi 1 +patteson 1 +computerarchitectur 1 +approach 1 +second 1 +edit 1 +note 1 +significantli 1 +differ 1 +stedit 1 +recommend 1 +attempt 1 +textbook 1 +errata 1 +sheetfor 1 +pattersonin 1 +addit 1 +read 1 +current 1 +paper 1 +variou 1 +aspect 1 +currentcomput 1 +research 1 +watch 1 +space 1 +pointer 1 +readinglist 1 +grade 1 +class 1 +particip 1 +homework 1 +work 1 +pair 1 +exam 1 +midterm 1 +scheduleweekdatetopicreadingduejan 1 +intro 1 +admin 1 +review 1 +perf 1 +cost 1 +amdahl 1 +tech 1 +trendsch 1 +cach 1 +memori 1 +isa 1 +hazard 1 +branch 1 +predictionch 1 +mlkholidayf 1 +static 1 +proposalfeb 1 +scoreboard 1 +tomasulu 1 +speculationch 1 +dynam 1 +predict 1 +limit 1 +vector 1 +processorsch 1 +dfeb 1 +hierarchych 1 +surveyfeb 1 +dram 1 +banksf 1 +revieww 1 +spring 1 +breakm 1 +breakmar 1 +metric 1 +queu 1 +buss 1 +disk 1 +raidch 1 +tertiari 1 +networksf 1 +networksch 1 +checkpointapr 1 +architecturesf 1 +mppsch 1 +mpp 1 +preseantationsm 1 +presentationsfri 1 +last 1 +classesm 1 +written 1 +reportaddit 1 +resourcescours 1 +page 1 +product 1 +confer 1 +bibliographi 1 +reportsyahoo 1 +businessand 1 +economi 1 +compani 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..378f26df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,71 @@ +oper 1 +system 1 +systemsuniqu 1 +recent 1 +explos 1 +interest 1 +world 1 +wide 1 +resultedin 1 +evolv 1 +protocol 1 +us 1 +address 1 +thetradit 1 +concern 1 +interprocess 1 +commun 1 +resourc 1 +alloc 1 +secur 1 +gener 1 +contextof 1 +internet 1 +goal 1 +class 1 +provid 1 +understandingof 1 +current 1 +state 1 +also 1 +addressproblem 1 +must 1 +solv 1 +matur 1 +purpos 1 +operatingsystem 1 +hypothesi 1 +behind 1 +design 1 +mani 1 +theissu 1 +context 1 +addressedin 1 +tradit 1 +area 1 +occasionallyread 1 +relat 1 +paper 1 +bear 1 +understandingcurr 1 +problem 1 +project 1 +reportspoint 1 +research 1 +refer 1 +inform 1 +syllabu 1 +read 1 +list 1 +schedul 1 +longer 1 +less 1 +organ 1 +rosterhandout 1 +verif 1 +sslprotocol 1 +proofsketch 1 +guidelin 1 +final 1 +talk 1 +report 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..725f5a55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,217 @@ +fall 1 +comput 1 +organ 1 +program 1 +page 1 +constantli 1 +construct 1 +last 1 +updat 1 +onmon 1 +cours 1 +titl 1 +prerequisit 1 +grade 1 +least 1 +professor 1 +chri 1 +edmondson 1 +yurkanan 1 +dragon 1 +utexa 1 +import 1 +date 1 +final 1 +exam 1 +lectur 1 +noon 1 +tue 1 +handout 1 +new 1 +exampl 1 +solut 1 +post 1 +section 1 +object 1 +check 1 +extra 1 +offic 1 +hour 1 +fantasm 1 +visit 1 +version 1 +content 1 +class 1 +info 1 +meet 1 +time 1 +place 1 +discuss 1 +session 1 +oper 1 +syllabu 1 +email 1 +back 1 +locat 1 +thursdai 1 +attend 1 +regist 1 +yoonsuck 1 +choe 1 +yschoe 1 +edum 1 +cynthia 1 +deepa 1 +ramani 1 +dparam 1 +eduw 1 +zhang 1 +gzhang 1 +eduf 1 +conduct 1 +pleas 1 +calendar 1 +labor 1 +holidai 1 +us 1 +drop 1 +refund 1 +rare 1 +extenu 1 +circumst 1 +automat 1 +period 1 +begin 1 +academ 1 +penalti 1 +start 1 +reason 1 +withdraw 1 +univers 1 +chang 1 +registr 1 +pass 1 +fail 1 +deadlin 1 +appli 1 +graduat 1 +thanksgiv 1 +appeal 1 +homework 1 +test 1 +schedul 1 +glanc 1 +correct 1 +mondai 1 +typo 1 +bit 1 +remov 1 +rightmost 1 +make 1 +part 1 +wednesdai 1 +earli 1 +bonu 1 +fridai 1 +late 1 +saturdai 1 +practic 1 +problem 1 +night 1 +announc 1 +slide 1 +door 1 +turn 1 +procudur 1 +thur 1 +avail 1 +electronc 1 +note 1 +assign 1 +electron 1 +boxin 1 +front 1 +prof 1 +letter 1 +student 1 +overview 1 +descript 1 +topic 1 +singl 1 +ascii 1 +code 1 +chart 1 +home 1 +work 1 +endia 1 +memori 1 +hierarchi 1 +pick 1 +submit 1 +user 1 +manual 1 +also 1 +system 1 +disk 1 +document 1 +folder 1 +powermac 1 +quadra 1 +mac 1 +p_global 1 +paramet 1 +stack 1 +function 1 +macsbug 1 +instruct 1 +requir 1 +comment 1 +real 1 +compil 1 +proc 1 +func 1 +call 1 +might 1 +risc 1 +architectur 1 +includ 1 +valu 1 +practiv 1 +obsolet 1 +spec 1 +turnin 1 +procedur 1 +model 1 +gener 1 +interfac 1 +electoron 1 +charact 1 +orient 1 +cheat 1 +polici 1 +questionair 1 +must 1 +offici 1 +except 1 +go 1 +held 1 +newsgroup 1 +free 1 +toward 1 +next 1 +thank 1 +brett 1 +jame 1 +pascal 1 +sourc 1 +output 1 +data 1 +subroutine_fil 1 +exception_fil 1 +avali 1 +resourc 1 +studi 1 +guid 1 +maintain 1 +austin 1 +utc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..7f2207a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,206 @@ +porter 1 +homepag 1 +warn 1 +page 1 +construct 1 +link 1 +us 1 +right 1 +becom 1 +activ 1 +semest 1 +progress 1 +andther 1 +relev 1 +inform 1 +regard 1 +topic 1 +import 1 +announc 1 +take 1 +todai 1 +home 1 +unabl 1 +maintain 1 +forthes 1 +last 1 +coupl 1 +dai 1 +howev 1 +put 1 +class 1 +relat 1 +avail 1 +follow 1 +good 1 +luckfor 1 +final 1 +next 1 +week 1 +special 1 +review 1 +cover 1 +held 1 +painter 1 +hall 1 +exact 1 +locat 1 +dependon 1 +room 1 +availib 1 +note 1 +post 1 +door 1 +offic 1 +therewil 1 +someon 1 +time 1 +moreov 1 +almost 1 +total 1 +coverag 1 +uptoth 1 +exam 1 +need 1 +help 1 +feel 1 +free 1 +come 1 +glad 1 +luck 1 +resolutio 1 +bruce 1 +complex 1 +theori 1 +nimar 1 +arora 1 +parallel 1 +process 1 +dwip 1 +banerje 1 +boolean 1 +circuit 1 +porterquest 1 +sheet 1 +rotat 1 +bit 1 +disregard 1 +somewhat 1 +beyond 1 +scope 1 +slide 1 +present 1 +lectur 1 +decemb 1 +summar 1 +thecont 1 +whole 1 +reserv 1 +desk 1 +atugl 1 +hope 1 +webpag 1 +soon 1 +experienc 1 +technic 1 +difficulti 1 +caus 1 +length 1 +file 1 +click 1 +schedul 1 +also 1 +pleas 1 +check 1 +assign 1 +addendum 1 +question 1 +html 1 +version 1 +postscript 1 +ad 1 +sostai 1 +tune 1 +program 1 +download 1 +tutori 1 +prolog 1 +comput 1 +scienc 1 +iinstructorbruc 1 +utexa 1 +mondai 1 +email 1 +tasoffic 1 +hourslab 1 +discuss 1 +section 1 +schedulec 1 +thursdai 1 +uniqu 1 +number 1 +welch 1 +cours 1 +descriptionclass 1 +scheduleclass 1 +session 1 +includ 1 +new 1 +articlesclass 1 +newsgroupprogram 1 +assignmentsprogram 1 +solut 1 +studi 1 +guid 1 +midterm 1 +test 1 +pascal 1 +pascaltutori 1 +text 1 +format 1 +ansi 1 +faqyou 1 +sampl 1 +programm 1 +gener 1 +turbo 1 +languag 1 +materi 1 +frequent 1 +ask 1 +zipe 1 +concept 1 +structur 1 +base 1 +newsgroup 1 +newgroup 1 +might 1 +interest 1 +comp 1 +lang 1 +isocomp 1 +maccomp 1 +borlandcomp 1 +misccomp 1 +delphi 1 +miscfj 1 +rememb 1 +access 1 +dell 1 +serverto 1 +mail 1 +prefer 1 +item 1 +option 1 +menu 1 +look 1 +articl 1 +usual 1 +lead 1 +importantstuff 1 +descript 1 +send 1 +comment 1 +critic 1 +suggest 1 +addit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..7dfd6728 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,70 @@ +comput 1 +graphic 1 +spring 1 +cscomput 1 +graphicsspr 1 +instructor 1 +donald 1 +fussel 1 +gener 1 +cours 1 +inform 1 +descript 1 +syllabu 1 +mesa 1 +librari 1 +year 1 +us 1 +provid 1 +anopengl 1 +like 1 +platform 1 +hasbeen 1 +instal 1 +public 1 +workstat 1 +sciencesdepart 1 +instruct 1 +utc 1 +sampl 1 +makefil 1 +machin 1 +opengl 1 +page 1 +specif 1 +home 1 +center 1 +refer 1 +manual 1 +ousterhout 1 +book 1 +welch 1 +assign 1 +turn 1 +note 1 +option 1 +second 1 +exam 1 +oneor 1 +requir 1 +student 1 +show 1 +examwil 1 +submit 1 +wish 1 +bothmai 1 +higher 1 +score 1 +curv 1 +willcount 1 +exampl 1 +xlib 1 +code 1 +updat 1 +driver 1 +billthecat 1 +copi 1 +file 1 +directori 1 +contain 1 +slate 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..617cb285 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,67 @@ +comput 1 +graphic 1 +fall 1 +gcomput 1 +graphicsfal 1 +instructor 1 +donald 1 +fussel 1 +gener 1 +cours 1 +inform 1 +descript 1 +syllabu 1 +mesa 1 +librari 1 +year 1 +us 1 +provid 1 +anopengl 1 +like 1 +platform 1 +hasbeen 1 +instal 1 +public 1 +workstat 1 +sciencesdepart 1 +instruct 1 +utc 1 +sampl 1 +makefil 1 +machin 1 +opengl 1 +page 1 +specif 1 +home 1 +center 1 +refer 1 +manual 1 +ousterhout 1 +book 1 +welch 1 +program 1 +exampl 1 +assign 1 +turn 1 +xlib 1 +code 1 +driver 1 +billthecat 1 +copi 1 +file 1 +directori 1 +contain 1 +slate 1 +new 1 +turnin 1 +work 1 +libtcl 1 +libtk 1 +reinstal 1 +tclsh 1 +wish 1 +compil 1 +walker 1 +demo 1 +repair 1 +sourc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..a0171d7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,45 @@ +cryptographi 1 +professor 1 +david 1 +zuckerman 1 +offic 1 +hour 1 +taylor 1 +email 1 +utexa 1 +huiqun 1 +station 1 +hall 1 +basement 1 +hqliu 1 +syllabu 1 +mathemat 1 +background 1 +homework 1 +textbook 1 +ciphertext 1 +problem 1 +notic 1 +answer 1 +last 1 +french 1 +abl 1 +recogn 1 +word 1 +canada 1 +appear 1 +frequenc 1 +common 1 +letter 1 +chang 1 +drastic 1 +english 1 +howev 1 +digram 1 +like 1 +page 1 +modifi 1 +septemb 1 +comment 1 +welcom 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..097e3a3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,314 @@ +distribut 1 +comput 1 +ispr 1 +instructor 1 +lorenzo 1 +alvisiteach 1 +assist 1 +rajeev 1 +joshicont 1 +offic 1 +hour 1 +locat 1 +mechan 1 +requir 1 +textbook 1 +cours 1 +content 1 +grade 1 +problem 1 +set 1 +inform 1 +pertain 1 +final 1 +exam 1 +suggest 1 +solut 1 +midterm 1 +newsgroup 1 +utexa 1 +class 1 +instruct 1 +stafflorenzo 1 +alvisi 1 +taylor 1 +hall 1 +phone 1 +tuesdai 1 +joshi 1 +mondai 1 +thursdai 1 +meet 1 +arrang 1 +appoint 1 +mechanicsi 1 +expect 1 +cover 1 +materi 1 +requiredtextbook 1 +remaind 1 +come 1 +sourc 1 +paper 1 +refer 1 +given 1 +classat 1 +appropri 1 +time 1 +lectur 1 +wednesdai 1 +robert 1 +moor 1 +isutexa 1 +system 1 +second 1 +edit 1 +mullend 1 +editor 1 +acmpress 1 +addison 1 +weslei 1 +publish 1 +compani 1 +read 1 +contentc 1 +abstract 1 +prove 1 +us 1 +tobe 1 +design 1 +build 1 +tomorrow 1 +distributedsystem 1 +includ 1 +global 1 +state 1 +cut 1 +logic 1 +vector 1 +clock 1 +causal 1 +messagedeliveri 1 +properti 1 +detect 1 +messag 1 +log 1 +checkpoint 1 +replic 1 +manag 1 +machin 1 +approach 1 +primari 1 +backupapproach 1 +agreement 1 +protocol 1 +byzantin 1 +order 1 +multicast 1 +group 1 +program 1 +techniqu 1 +applic 1 +file 1 +cach 1 +disconnect 1 +oper 1 +servic 1 +synchron 1 +secur 1 +encrypt 1 +authent 1 +integr 1 +discuss 1 +gener 1 +principl 1 +thepresent 1 +case 1 +studi 1 +exemplifi 1 +principleshav 1 +implement 1 +real 1 +topic 1 +depend 1 +interest 1 +present 1 +meor 1 +size 1 +allow 1 +give 1 +apresent 1 +share 1 +memori 1 +object 1 +kernel 1 +support 1 +weak 1 +consist 1 +replica 1 +electron 1 +commerc 1 +wide 1 +area 1 +networksgradingther 1 +written 1 +homework 1 +assign 1 +begrad 1 +demonstr 1 +credibl 1 +effort 1 +onbehalf 1 +author 1 +whether 1 +right 1 +wrong 1 +willrec 1 +better 1 +collabor 1 +three 1 +student 1 +ispermit 1 +encourag 1 +acollabor 1 +singl 1 +submit 1 +forgrad 1 +name 1 +collaborationswil 1 +consid 1 +violat 1 +academ 1 +take 1 +home 1 +examin 1 +nocollabor 1 +howev 1 +towrit 1 +page 1 +survei 1 +issuesthat 1 +list 1 +bedistribut 1 +start 1 +ofth 1 +last 1 +henc 1 +week 1 +tocomplet 1 +also 1 +team 1 +colleagu 1 +prepar 1 +twolectur 1 +previous 1 +choosethi 1 +option 1 +write 1 +asingl 1 +warmli 1 +toconsid 1 +volunt 1 +excellentopportun 1 +improv 1 +commun 1 +skill 1 +setsin 1 +subsequ 1 +shouldconform 1 +follow 1 +guidelin 1 +show 1 +synonym 1 +precis 1 +proof 1 +isrequir 1 +ask 1 +someth 1 +imposs 1 +thatmak 1 +clear 1 +cannot 1 +solv 1 +matter 1 +algorithmi 1 +insuffici 1 +particular 1 +algorithm 1 +work 1 +develop 1 +must 1 +accompani 1 +ofcorrect 1 +unless 1 +explicitli 1 +told 1 +otherwis 1 +snapshot 1 +thetextbook 1 +assum 1 +channel 1 +fifo 1 +deriv 1 +asnapshot 1 +asynchron 1 +onth 1 +assumpt 1 +correct 1 +theprotocol 1 +produc 1 +atmost 1 +point 1 +note 1 +book 1 +contain 1 +mattern 1 +thatcontain 1 +urg 1 +resist 1 +thetempt 1 +visit 1 +librari 1 +agener 1 +stabl 1 +predic 1 +moreeffici 1 +specif 1 +often 1 +conceptu 1 +simpler 1 +effici 1 +term 1 +number 1 +exchang 1 +base 1 +special 1 +deadlock 1 +ideal 1 +would 1 +need 1 +central 1 +monitorprocess 1 +cost 1 +process 1 +monitor 1 +basedsnapshot 1 +nowonlin 1 +link 1 +postscript 1 +filedescrib 1 +examth 1 +constitut 1 +fridaymai 1 +thepostscript 1 +describ 1 +question 1 +feel 1 +freeto 1 +send 1 +email 1 +idea 1 +pleas 1 +yoursuggest 1 +edurajeev 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..2a3ba53a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,57 @@ +fall 1 +note 1 +page 1 +reflect 1 +taught 1 +current 1 +version 1 +class 1 +differ 1 +content 1 +scope 1 +welcom 1 +homepag 1 +austin 1 +bywil 1 +adam 1 +jacob 1 +kornerup 1 +practic 1 +informationabout 1 +cours 1 +look 1 +syllabu 1 +avail 1 +linea 1 +technic 1 +compil 1 +program 1 +turn 1 +inhomework 1 +electron 1 +homework 1 +solut 1 +time 1 +crude 1 +interfac 1 +newsgrouputexa 1 +correspond 1 +takesplac 1 +exampl 1 +textbook 1 +pascalprogramm 1 +organ 1 +chapter 1 +read 1 +link 1 +home 1 +requir 1 +overhead 1 +viewinginform 1 +projecthow 1 +find 1 +offic 1 +studi 1 +midterm 1 +examand 1 +answer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..d73b661a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,198 @@ +cours 1 +descriptionc 1 +object 1 +orient 1 +design 1 +programminglast 1 +updat 1 +professor 1 +greg 1 +lavend 1 +utexa 1 +eduoffic 1 +hour 1 +class 1 +appt 1 +gokul 1 +rajaram 1 +mondai 1 +wednesdai 1 +station 1 +newsgroup 1 +lavendercours 1 +infocours 1 +syllabusannouncementslectur 1 +noteshomework 1 +solutionsprogram 1 +assignmentsgnu 1 +manualsstandard 1 +templat 1 +librari 1 +manual 1 +sourc 1 +codesocket 1 +code 1 +manualdescript 1 +intend 1 +student 1 +alreadi 1 +anintroductori 1 +program 1 +offer 1 +introduct 1 +give 1 +opportun 1 +think 1 +solut 1 +comput 1 +problem 1 +manner 1 +captur 1 +reusablepattern 1 +construct 1 +polymorph 1 +typehierarchi 1 +write 1 +profici 1 +professionallyus 1 +challeng 1 +us 1 +java 1 +text 1 +follow 1 +avail 1 +coop 1 +bookstor 1 +horstmann 1 +master 1 +john 1 +wilei 1 +associ 1 +relat 1 +materiali 1 +drawn 1 +lectur 1 +materi 1 +stroustrup 1 +languag 1 +edit 1 +addison 1 +weslei 1 +elli 1 +annot 1 +refer 1 +evolut 1 +cargil 1 +style 1 +cline 1 +lomow 1 +faq 1 +coplien 1 +advanc 1 +idiom 1 +plauger 1 +draft 1 +standard 1 +prentic 1 +hall 1 +gamma 1 +helm 1 +johnson 1 +vlissid 1 +pattern 1 +element 1 +reusabl 1 +softwar 1 +setup 1 +forum 1 +open 1 +discuss 1 +announcementsabout 1 +strongli 1 +encourag 1 +particip 1 +linediscuss 1 +fellow 1 +classmat 1 +lavendery 1 +also 1 +interest 1 +usenet 1 +comp 1 +lang 1 +helpjava 1 +hotjava 1 +advocaci 1 +misc 1 +programm 1 +secur 1 +tech 1 +javascript 1 +internet 1 +inform 1 +oopth 1 +link 1 +date 1 +pleas 1 +know 1 +dead 1 +site 1 +ansi 1 +postscript 1 +home 1 +page 1 +lab 1 +renssela 1 +polytechn 1 +institut 1 +product 1 +info 1 +objectspac 1 +compil 1 +libg 1 +libstdc 1 +server 1 +mitgnu 1 +cygnusgnu 1 +document 1 +doug 1 +pagec 1 +archiv 1 +mirror 1 +list 1 +ftpobject 1 +system 1 +developmentindex 1 +librariesth 1 +virtual 1 +libraryindex 1 +sourcesth 1 +talig 1 +frameworkjava 1 +javasoft 1 +gamelan 1 +huge 1 +registri 1 +applet 1 +digit 1 +espresso 1 +good 1 +summari 1 +current 1 +centr 1 +new 1 +event 1 +jar 1 +rate 1 +denni 1 +kafura 1 +virginia 1 +techdoug 1 +schmidt 1 +note 1 +irvin 1 +washington 1 +universitydoug 1 +sunyintroductori 1 +univers 1 +groningen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..a10157fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,56 @@ +parallel 1 +languag 1 +compilerscst 1 +compilersfal 1 +lectur 1 +tuesdai 1 +thursdai 1 +instructor 1 +calvin 1 +offic 1 +taylor 1 +phone 1 +email 1 +utexa 1 +hour 1 +handout 1 +gener 1 +inform 1 +case 1 +tera 1 +comput 1 +copyright 1 +program 1 +assign 1 +posix 1 +thread 1 +skeleton 1 +code 1 +tutori 1 +exampl 1 +hello 1 +world 1 +commun 1 +ironman 1 +interfac 1 +onlin 1 +manual 1 +postscript 1 +logp 1 +paper 1 +time 1 +spent 1 +messag 1 +pass 1 +share 1 +memori 1 +foundat 1 +practic 1 +partit 1 +dynam 1 +adapt 1 +grid 1 +hierarchieslast 1 +modifi 1 +decemb 1 +linlin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..26ec7e4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,16 @@ +home 1 +page 1 +csintroduct 1 +oper 1 +system 1 +class 1 +inform 1 +handout 1 +assign 1 +read 1 +project 1 +group 1 +send 1 +mail 1 +prof 1 +newsgroup 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..87292027 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,16 @@ +read 1 +list 1 +fall 1 +thot 1 +topic 1 +distribut 1 +systemsfil 1 +systemstopolog 1 +systemselectron 1 +commenrcefailur 1 +detectorsdistribut 1 +objectsconsistencysecuregroup 1 +communicationlanguag 1 +system 1 +dsmmobil 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..108de73e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,13 @@ +mine 1 +monitor 1 +databas 1 +databasesprof 1 +daniel 1 +mirankernew 1 +seminarschedul 1 +term 1 +project 1 +materi 1 +overviewtentativeread 1 +list 1 +homeworkproject 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..8a4d2713 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,25 @@ +introduct 1 +artifici 1 +intellig 1 +instructor 1 +raymond 1 +mooneytim 1 +placespr 1 +tuth 1 +taylor 1 +hall 1 +cours 1 +informationclick 1 +inform 1 +sheetand 1 +syllabu 1 +last 1 +year 1 +updat 1 +file 1 +moonei 1 +code 1 +depart 1 +networkfor 1 +trace 1 +assign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..d1704dd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,35 @@ +lisp 1 +symbol 1 +program 1 +instructor 1 +raymond 1 +mooneyteach 1 +assist 1 +sowmya 1 +ramachandrantim 1 +placetu 1 +cours 1 +informationclick 1 +inform 1 +sheet 1 +syllabu 1 +informationon 1 +alsout 1 +allegro 1 +info 1 +page 1 +textparadigm 1 +artifici 1 +intellig 1 +case 1 +studi 1 +common 1 +lispassignmentsse 1 +file 1 +moonei 1 +code 1 +depart 1 +networkfor 1 +trace 1 +homework 1 +test 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..dcc7baf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,52 @@ +machin 1 +learn 1 +instructor 1 +raymond 1 +mooneytim 1 +placetu 1 +cours 1 +informationclick 1 +inform 1 +sheetand 1 +syllabu 1 +textmachinelearninglectur 1 +slide 1 +introduct 1 +concept 1 +gener 1 +order 1 +decis 1 +tree 1 +experiment 1 +evalu 1 +comput 1 +theori 1 +rule 1 +induct 1 +logic 1 +program 1 +neural 1 +network 1 +cluster 1 +unsupervis 1 +bayesian 1 +instanc 1 +base 1 +explan 1 +learningassignmentsse 1 +file 1 +moonei 1 +code 1 +depart 1 +networkfor 1 +trace 1 +homework 1 +final 1 +project 1 +suggest 1 +spring 1 +paper 1 +format 1 +outlin 1 +talk 1 +version 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..245d7811 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,100 @@ +foundat 1 +comput 1 +sciencec 1 +introduct 1 +scienc 1 +program 1 +section 1 +intend 1 +major 1 +atleast 1 +semest 1 +high 1 +school 1 +cours 1 +number 1 +willb 1 +chang 1 +next 1 +year 1 +never 1 +taken 1 +programmingcours 1 +take 1 +porter 1 +instead 1 +strong 1 +math 1 +background 1 +least 1 +precalculu 1 +requir 1 +languag 1 +scheme 1 +dialect 1 +lisp 1 +theschem 1 +implement 1 +call 1 +gambit 1 +run 1 +macintoshcomput 1 +move 1 +faster 1 +previou 1 +coursesand 1 +emphas 1 +concept 1 +syntax 1 +work 1 +hard 1 +hopefulli 1 +learninga 1 +syllabu 1 +directori 1 +softwar 1 +tutorcopi 1 +pcassign 1 +machin 1 +simulationassign 1 +surf 1 +webassign 1 +basic 1 +schemeassign 1 +plai 1 +peano 1 +gamblingassign 1 +turtl 1 +graphicsassign 1 +snow 1 +treesassign 1 +list 1 +manipulationstudi 1 +guid 1 +exam 1 +vocabulari 1 +assign 1 +plot 1 +thickensassign 1 +treasur 1 +huntassign 1 +symbol 1 +algebraassign 1 +data 1 +abstract 1 +matricesstudi 1 +draw 1 +express 1 +unparsingassign 1 +translationstudi 1 +final 1 +thur 1 +gordon 1 +novak 1 +assignmentsprogram 1 +file 1 +descriptionsprogram 1 +submiss 1 +gradingmidterm 1 +studi 1 +guidefin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..7cd56d5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,41 @@ +compilersc 1 +cover 1 +design 1 +construct 1 +compil 1 +programminglanguag 1 +student 1 +write 1 +pascal 1 +codei 1 +gener 1 +powerpc 1 +processor 1 +server 1 +incorpor 1 +chip 1 +cours 1 +heavi 1 +program 1 +workload 1 +especi 1 +summer 1 +plan 1 +take 1 +expect 1 +dedicatetheir 1 +live 1 +five 1 +week 1 +syllabusprogram 1 +assignmentsprogram 1 +file 1 +descript 1 +directori 1 +submiss 1 +gradingmidterm 1 +studi 1 +guidefin 1 +exam 1 +guidegordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..48267462 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,53 @@ +artifici 1 +intelligencec 1 +intelligenceartifici 1 +intellig 1 +defin 1 +studi 1 +thecomput 1 +requir 1 +behavior 1 +attempt 1 +todupl 1 +comput 1 +us 1 +connectspercept 1 +environ 1 +action 1 +appropri 1 +achiev 1 +thegoal 1 +actor 1 +cours 1 +survei 1 +major 1 +topic 1 +includ 1 +search 1 +logic 1 +andknowledg 1 +represent 1 +natur 1 +languag 1 +process 1 +withbrief 1 +coverag 1 +brain 1 +machin 1 +vision 1 +syllabusprogram 1 +assignmentsprogram 1 +file 1 +descriptionsmidterm 1 +guidefin 1 +exam 1 +guidepred 1 +calculu 1 +stori 1 +problemssolut 1 +select 1 +problemsnot 1 +bibliographi 1 +human 1 +braingordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..326fc98f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,52 @@ +automat 1 +programmingc 1 +programmingautomat 1 +program 1 +gener 1 +execut 1 +programsfrom 1 +specif 1 +higher 1 +level 1 +ordinari 1 +languag 1 +cours 1 +consist 1 +lectur 1 +first 1 +third 1 +semest 1 +homework 1 +problem 1 +assign 1 +given 1 +illustrateth 1 +materi 1 +long 1 +requirelearn 1 +sever 1 +kind 1 +system 1 +latter 1 +partof 1 +cover 1 +read 1 +research 1 +literatur 1 +student 1 +expect 1 +present 1 +paper 1 +class 1 +syllabusbibliographyassign 1 +compil 1 +optim 1 +done 1 +handpattern 1 +matchingobject 1 +orient 1 +programmingintroduct 1 +glispview 1 +graphic 1 +programminggordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..be82a34d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,675 @@ +introduct 1 +graduat 1 +comput 1 +sciencecst 1 +sciencefal 1 +lectur 1 +mondai 1 +instructor 1 +robert 1 +blumof 1 +offic 1 +taylor 1 +phone 1 +email 1 +utexa 1 +hour 1 +thursdai 1 +feel 1 +free 1 +stop 1 +time 1 +semest 1 +seminar 1 +cours 1 +taken 1 +apass 1 +fail 1 +basi 1 +student 1 +scienc 1 +undergradu 1 +topic 1 +honor 1 +receiv 1 +credit 1 +must 1 +beregist 1 +attend 1 +least 1 +schedulespeakertitleseptemb 1 +mirankeralamo 1 +data 1 +warehouseseptemb 1 +kuipersth 1 +spatial 1 +semant 1 +hierarchi 1 +humanand 1 +robot 1 +cognit 1 +mapsseptemb 1 +blumofecilk 1 +cilk 1 +adapt 1 +reliableparallel 1 +network 1 +workstationsseptemb 1 +risto 1 +miikkulainenlearn 1 +sequenti 1 +decis 1 +task 1 +throughsymbiot 1 +evolut 1 +neural 1 +networksoctob 1 +vladimir 1 +lifschitzmathemat 1 +principl 1 +logic 1 +programmingoctob 1 +paul 1 +wilsonextens 1 +languag 1 +open 1 +compil 1 +reflectionoctob 1 +mooneylearn 1 +process 1 +natur 1 +usinginduct 1 +mike 1 +dahlindistribut 1 +cluster 1 +internetsnovemb 1 +gordon 1 +novaksoftwar 1 +reus 1 +special 1 +genericprocedur 1 +viewsnovemb 1 +vijaya 1 +ramachandranth 1 +design 1 +evalu 1 +parallelalgorithmsnovemb 1 +lorenzo 1 +alvisilighweight 1 +fault 1 +tolerancenovemb 1 +calvin 1 +linadapt 1 +librari 1 +high 1 +level 1 +optimizationdecemb 1 +greg 1 +plaxtonanalysi 1 +algorithmslighweight 1 +tolerancelorenzo 1 +alvisidistribut 1 +system 1 +move 1 +beyond 1 +confin 1 +academia 1 +andresearch 1 +lab 1 +revolution 1 +busi 1 +government 1 +organ 1 +simpl 1 +citizen 1 +andcollect 1 +inform 1 +current 1 +technolog 1 +trend 1 +promis 1 +todramat 1 +increas 1 +pace 1 +revolut 1 +enabl 1 +thedesign 1 +highli 1 +cooper 1 +distribut 1 +applic 1 +beyondth 1 +client 1 +server 1 +paradigm 1 +har 1 +power 1 +ofdistribut 1 +environ 1 +scope 1 +emphasi 1 +toleranttechniqu 1 +undergo 1 +dramat 1 +chang 1 +toler 1 +willceas 1 +expens 1 +featur 1 +requir 1 +hand 1 +applicationsto 1 +exot 1 +failur 1 +user 1 +distributedinform 1 +infrastructur 1 +translat 1 +acompetit 1 +advantag 1 +guarante 1 +reliabl 1 +access 1 +criticalinform 1 +talk 1 +explor 1 +engineerfault 1 +solut 1 +call 1 +lightweight 1 +goal 1 +dedic 1 +resourc 1 +negligibleimpact 1 +perform 1 +execut 1 +scale 1 +cost 1 +depend 1 +sever 1 +number 1 +offailur 1 +need 1 +integr 1 +transpar 1 +theapplic 1 +programm 1 +support 1 +emerg 1 +communicatethrough 1 +messag 1 +well 1 +file 1 +address 1 +softwar 1 +gener 1 +effect 1 +parallel 1 +onnetwork 1 +workstationsrobert 1 +blumofethi 1 +present 1 +overview 1 +pronouncedsilk 1 +algorithm 1 +multithread 1 +andcilk 1 +runtim 1 +functionalsubset 1 +workstat 1 +providesadapt 1 +tranpar 1 +touser 1 +program 1 +mean 1 +ofworkst 1 +run 1 +grow 1 +shrinkdynam 1 +avail 1 +idl 1 +onth 1 +amount 1 +within 1 +addit 1 +cilkprogram 1 +continu 1 +even 1 +workstationscrash 1 +automat 1 +detect 1 +andrecov 1 +includ 1 +livedemonstr 1 +internetsmik 1 +dahlinthi 1 +give 1 +issu 1 +indistribut 1 +applicationsmotiv 1 +aggress 1 +wide 1 +area 1 +inclust 1 +fast 1 +allow 1 +machin 1 +close 1 +servicei 1 +request 1 +us 1 +among 1 +nodesto 1 +provid 1 +better 1 +singl 1 +centralserv 1 +challeng 1 +goodperform 1 +consist 1 +despit 1 +limit 1 +networkperform 1 +node 1 +projectwil 1 +human 1 +mapsbenjamin 1 +kuipershuman 1 +map 1 +reli 1 +differ 1 +represent 1 +forlarg 1 +space 1 +ontolog 1 +similarli 1 +varietyof 1 +approach 1 +propos 1 +andmap 1 +unknown 1 +cast 1 +diverserepresent 1 +structur 1 +spatialsemant 1 +object 1 +relat 1 +andassumpt 1 +abstract 1 +mathemat 1 +foundat 1 +thecontrol 1 +formal 1 +dynam 1 +whose 1 +stabl 1 +equilibrium 1 +point 1 +beabstract 1 +discret 1 +distinct 1 +state 1 +trajectori 1 +link 1 +action 1 +givinga 1 +causal 1 +graph 1 +causalgraph 1 +turn 1 +topologicalnetwork 1 +place 1 +path 1 +local 1 +metric 1 +model 1 +occupancygrid 1 +neighborhood 1 +built 1 +theframework 1 +topolog 1 +without 1 +usual 1 +problem 1 +ofglob 1 +programmingvladimir 1 +lifschitzlog 1 +sister 1 +functionalprogram 1 +base 1 +view 1 +notne 1 +contain 1 +explicit 1 +oper 1 +instruct 1 +instead 1 +itcan 1 +simpli 1 +fact 1 +sufficientto 1 +solv 1 +declar 1 +executedus 1 +method 1 +autom 1 +reason 1 +prolog 1 +best 1 +known 1 +logicprogram 1 +theori 1 +concern 1 +withdefin 1 +describ 1 +thereason 1 +implement 1 +investig 1 +thesound 1 +optimizationcalvin 1 +linthi 1 +build 1 +make 1 +andtheir 1 +interfac 1 +differenthardwar 1 +platform 1 +produc 1 +efficientand 1 +usabl 1 +framework 1 +suchlibrari 1 +three 1 +plan 1 +experi 1 +appli 1 +thesetechniqu 1 +scientif 1 +weexplain 1 +facilit 1 +optim 1 +learn 1 +symbiot 1 +ofneur 1 +networksristo 1 +miikkulainena 1 +novel 1 +reinforc 1 +sane 1 +neuro 1 +evolv 1 +popul 1 +neuronsthrough 1 +genet 1 +form 1 +given 1 +promot 1 +inth 1 +result 1 +effici 1 +search 1 +anddiscourag 1 +converg 1 +suboptim 1 +abl 1 +toextract 1 +domain 1 +specif 1 +spars 1 +broad 1 +rang 1 +sequentialdecis 1 +control 1 +game 1 +plai 1 +resourcemanag 1 +alamo 1 +warehousedan 1 +mirankerth 1 +effort 1 +direct 1 +intra 1 +develop 1 +andint 1 +enumer 1 +interest 1 +site 1 +datasourc 1 +sourc 1 +theuser 1 +illus 1 +virtual 1 +databas 1 +follow 1 +byqueri 1 +analysi 1 +tool 1 +central 1 +architectur 1 +theabstract 1 +corba 1 +compliant 1 +interfacethat 1 +uniform 1 +heterogen 1 +embodi 1 +higher 1 +ofabstract 1 +code 1 +clever 1 +algorithmsand 1 +separ 1 +isol 1 +buffer 1 +anddata 1 +prefetch 1 +claim 1 +often 1 +advanc 1 +facil 1 +anobject 1 +orient 1 +queri 1 +engin 1 +deduct 1 +infer 1 +activedatabas 1 +mine 1 +constructedus 1 +common 1 +final 1 +sinc 1 +output 1 +databasefacil 1 +serv 1 +compon 1 +thealamo 1 +compos 1 +resolv 1 +dataintegr 1 +particular 1 +anticip 1 +elementsof 1 +repres 1 +meta 1 +andsemant 1 +conflict 1 +ultim 1 +furthercomposit 1 +complex 1 +knowledg 1 +answerhigh 1 +induct 1 +logicprogrammingraymond 1 +mooneyinduct 1 +learningprolog 1 +exampl 1 +offirst 1 +order 1 +offer 1 +standard 1 +learningmethod 1 +constrain 1 +fix 1 +length 1 +vector 1 +areappli 1 +believethi 1 +richer 1 +import 1 +havedevelop 1 +chill 1 +determinist 1 +parsersfrom 1 +corpu 1 +pars 1 +sentenc 1 +obtain 1 +superior 1 +onsever 1 +artifici 1 +corpora 1 +previous 1 +test 1 +networkmethod 1 +encourag 1 +realist 1 +ati 1 +ofairlin 1 +also 1 +automaticallydevelop 1 +complet 1 +englishdatabas 1 +moreaccur 1 +parser 1 +smallgeograph 1 +foidl 1 +past 1 +tens 1 +english 1 +surpass 1 +previou 1 +treemethod 1 +procedur 1 +throughviewsgordon 1 +novak 1 +clearli 1 +good 1 +idea 1 +difficult 1 +toachiev 1 +practic 1 +assumpt 1 +thesoftwar 1 +type 1 +typesus 1 +agener 1 +version 1 +custom 1 +graphic 1 +easi 1 +specifyview 1 +theworld 1 +write 1 +adesir 1 +algorithmsgreg 1 +plaxtona 1 +major 1 +focu 1 +theoret 1 +andanalysi 1 +asymptot 1 +random 1 +forspecif 1 +research 1 +notuncommon 1 +come 1 +across 1 +written 1 +paper 1 +main 1 +underli 1 +conceptu 1 +straightforward 1 +theform 1 +surprisingli 1 +lengthi 1 +deal 1 +minor 1 +side 1 +case 1 +havelittl 1 +noth 1 +suchpap 1 +seem 1 +signific 1 +andform 1 +difficulti 1 +gapsinher 1 +convent 1 +notat 1 +inadequatefor 1 +succinctli 1 +certain 1 +straightforwardalgorithm 1 +significantli 1 +reduc 1 +theconceptu 1 +associ 1 +trivialclass 1 +concret 1 +consid 1 +analysisof 1 +linear 1 +select 1 +blum 1 +floyd 1 +pratt 1 +rivest 1 +tarjan 1 +algorithmsvijaya 1 +forcombinatori 1 +extens 1 +studi 1 +recentyear 1 +larg 1 +pram 1 +willdescrib 1 +work 1 +parallelalgorithm 1 +thesealgorithm 1 +massiv 1 +maspar 1 +thendescrib 1 +queu 1 +variant 1 +wepropos 1 +appropri 1 +parallelshar 1 +memori 1 +tradit 1 +reflectionpaul 1 +ad 1 +portabl 1 +fairli 1 +modif 1 +addnew 1 +analys 1 +reflect 1 +examin 1 +ofinterest 1 +part 1 +affect 1 +structureaccordingli 1 +discuss 1 +thing 1 +modular 1 +adapat 1 +recent 1 +workon 1 +rscheme 1 +extensiblelanguag 1 +last 1 +modifi 1 +novemb 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..34a0a80c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,68 @@ +introduct 1 +oper 1 +systemsc 1 +systemsfal 1 +lectur 1 +mondai 1 +wednesdai 1 +instructor 1 +robert 1 +blumof 1 +offic 1 +taylor 1 +phone 1 +email 1 +utexa 1 +hour 1 +thursdai 1 +feel 1 +free 1 +stop 1 +time 1 +teach 1 +assist 1 +subramanyam 1 +gooti 1 +tuesdai 1 +station 1 +exampl 1 +solut 1 +program 1 +assign 1 +solari 1 +canb 1 +found 1 +crypt 1 +implement 1 +support 1 +multiplemap 1 +assum 1 +map 1 +file 1 +least 1 +long 1 +themap 1 +test 1 +encrypt 1 +decrypt 1 +problem 1 +handout 1 +gener 1 +inform 1 +topic 1 +cover 1 +midterm 1 +exam 1 +final 1 +solutionsread 1 +book 1 +chapter 1 +date 1 +septemb 1 +except 1 +octob 1 +novemb 1 +decemb 1 +last 1 +modifi 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..d9083e02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,45 @@ +neural 1 +network 1 +fall 1 +networksfal 1 +uniqu 1 +number 1 +instructor 1 +risto 1 +miikkulainen 1 +utexa 1 +offic 1 +bednar 1 +jbednar 1 +station 1 +text 1 +lauren 1 +fausett 1 +fundament 1 +ofneur 1 +architectur 1 +algorithm 1 +applic 1 +englewood 1 +cliff 1 +prenticehal 1 +select 1 +paper 1 +class 1 +note 1 +copi 1 +slide 1 +us 1 +lectur 1 +grade 1 +homework 1 +midterm 1 +final 1 +detail 1 +schedulehomework 1 +assignmentsexamsclass 1 +resourcesa 1 +postscript 1 +versionof 1 +syllabusristo 1 +edusun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..c7bd6d6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,76 @@ +introduct 1 +cognit 1 +scienc 1 +fall 1 +sciencefal 1 +instructor 1 +nichola 1 +asher 1 +philosophi 1 +dept 1 +waggen 1 +hall 1 +nasher 1 +berti 1 +utexa 1 +offic 1 +hour 1 +appt 1 +risto 1 +miikkulainen 1 +comput 1 +taylor 1 +text 1 +posner 1 +foundat 1 +mitpress 1 +packet 1 +read 1 +requir 1 +student 1 +regular 1 +interv 1 +submit 1 +discuss 1 +note 1 +short 1 +page 1 +critic 1 +commentari 1 +collabor 1 +withanoth 1 +also 1 +write 1 +paper 1 +approxim 1 +signific 1 +research 1 +topic 1 +find 1 +ofinterest 1 +count 1 +toward 1 +final 1 +grade 1 +thepap 1 +class 1 +attend 1 +particip 1 +alsorequir 1 +detail 1 +cours 1 +descriptioncours 1 +schedulediscuss 1 +notesperson 1 +adscollabor 1 +paperclass 1 +resourcesstud 1 +questionnaireus 1 +link 1 +center 1 +list 1 +sciencefaculti 1 +pointer 1 +resourc 1 +gener 1 +edusun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..81121339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,190 @@ +pascal 1 +programmingcsp 1 +pascalintroductori 1 +comput 1 +programminginstructor 1 +suzi 1 +gallagherwelcom 1 +program 1 +excit 1 +intellectu 1 +challeng 1 +cspi 1 +design 1 +give 1 +firm 1 +foundat 1 +andso 1 +need 1 +effort 1 +read 1 +page 1 +thecours 1 +syllabu 1 +carefulli 1 +summari 1 +cours 1 +contain 1 +detail 1 +assign 1 +requir 1 +wella 1 +import 1 +polici 1 +schedul 1 +date 1 +exam 1 +otherdeadlin 1 +respons 1 +everyth 1 +thesyllabu 1 +without 1 +delai 1 +avail 1 +jenn 1 +copi 1 +guadalup 1 +hundr 1 +student 1 +takethi 1 +courseeach 1 +semest 1 +highli 1 +structur 1 +foral 1 +monitor 1 +thenewsgroup 1 +utexa 1 +class 1 +frequent 1 +updat 1 +take 1 +work 1 +expect 1 +depend 1 +howwel 1 +prepar 1 +event 1 +becom 1 +extrem 1 +difficult 1 +behind 1 +grade 1 +procedur 1 +riski 1 +wait 1 +near 1 +deadlineto 1 +quiz 1 +turn 1 +late 1 +get 1 +half 1 +credit 1 +line 1 +long 1 +hour 1 +deadlin 1 +warn 1 +unfortun 1 +construct 1 +mani 1 +link 1 +nowher 1 +apolog 1 +everyon 1 +attend 1 +lectur 1 +gallagh 1 +everi 1 +thursdayeven 1 +welch 1 +mondai 1 +wednesdai 1 +break 1 +intosmal 1 +group 1 +section 1 +discuss 1 +ofth 1 +materi 1 +ateach 1 +assist 1 +nine 1 +written 1 +debug 1 +theprogram 1 +laboratori 1 +note 1 +thatlaboratori 1 +limit 1 +thatgrad 1 +even 1 +less 1 +andyou 1 +within 1 +eight 1 +quizz 1 +thetest 1 +room 1 +thattest 1 +limitedand 1 +often 1 +foravail 1 +proctor 1 +sever 1 +hoursbefor 1 +submit 1 +andquizz 1 +earli 1 +enough 1 +three 1 +must 1 +betaken 1 +prescrib 1 +time 1 +make 1 +soon 1 +possibl 1 +begin 1 +file 1 +openedfor 1 +uniqu 1 +identifi 1 +yourstud 1 +access 1 +orsak 1 +requiredtextbook 1 +dale 1 +weem 1 +wewil 1 +cover 1 +chapter 1 +individu 1 +background 1 +vari 1 +consider 1 +thiscours 1 +partial 1 +self 1 +pace 1 +feel 1 +well 1 +click 1 +howev 1 +still 1 +liabl 1 +commun 1 +exampl 1 +could 1 +form 1 +studi 1 +also 1 +gripe 1 +thought 1 +articl 1 +gener 1 +interest 1 +elicit 1 +repli 1 +staff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..cb02a384 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,59 @@ +databas 1 +system 1 +implement 1 +fall 1 +professor 1 +batori 1 +tong 1 +wang 1 +syllabu 1 +homework 1 +problem 1 +solut 1 +project 1 +pleas 1 +read 1 +first 1 +test 1 +file 1 +retriev 1 +sampl 1 +output 1 +ret_into 1 +replac 1 +append 1 +delet 1 +recoveri 1 +data 1 +dept 1 +student 1 +contest 1 +mdb 1 +benchmark 1 +script 1 +time 1 +us 1 +measur 1 +run 1 +sinc 1 +order 1 +tupl 1 +attribut 1 +differ 1 +anoth 1 +wrote 1 +perl 1 +transform 1 +compar 1 +diff 1 +turn 1 +program 1 +pass 1 +without 1 +error 1 +fail 1 +reason 1 +email 1 +suggest 1 +comment 1 +medec 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..30f25483 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,302 @@ +advanc 1 +oper 1 +system 1 +instructor 1 +harrick 1 +tabl 1 +content 1 +cours 1 +descript 1 +gener 1 +inform 1 +prerequisit 1 +synopsi 1 +textbook 1 +requir 1 +read 1 +list 1 +fall 1 +earli 1 +idea 1 +distribut 1 +file 1 +real 1 +time 1 +support 1 +wireless 1 +mobil 1 +comput 1 +titl 1 +professor 1 +last 1 +offer 1 +prerequisitegradu 1 +stand 1 +undergradu 1 +systemssuch 1 +student 1 +expect 1 +familiar 1 +materialin 1 +chapter 1 +concept 1 +peterson 1 +andsilberschatz 1 +synopsisc 1 +breadth 1 +coveringboth 1 +theoret 1 +practic 1 +issu 1 +systemdesign 1 +topic 1 +cover 1 +includ 1 +design 1 +implement 1 +ofdistribut 1 +formobil 1 +environ 1 +case 1 +studi 1 +anemphasi 1 +place 1 +current 1 +research 1 +collect 1 +articl 1 +made 1 +avail 1 +theinstructor 1 +requirementsstud 1 +number 1 +paper 1 +area 1 +anddiscuss 1 +grade 1 +determin 1 +examin 1 +aterm 1 +project 1 +present 1 +systemsfernando 1 +corbato 1 +marjori 1 +merwin 1 +daggett 1 +robert 1 +dalei 1 +anexperiment 1 +share 1 +afip 1 +proceed 1 +spring 1 +joint 1 +confer 1 +page 1 +brinch 1 +hansen 1 +nucleu 1 +multiprogram 1 +commun 1 +april 1 +bensoussan 1 +clingen 1 +multic 1 +virtualmemori 1 +denni 1 +ritchi 1 +thompson 1 +unix 1 +juli 1 +overview 1 +andrew 1 +tannenbaum 1 +robbert 1 +reness 1 +operatingsystem 1 +survei 1 +decemb 1 +levi 1 +silberschatz 1 +andexampl 1 +process 1 +thread 1 +managementa 1 +tucker 1 +gupta 1 +control 1 +schedul 1 +formultiprogram 1 +memori 1 +multiprocessor 1 +theth 1 +sosp 1 +review 1 +thoma 1 +anderson 1 +edward 1 +lazowska 1 +henri 1 +theperform 1 +implic 1 +manag 1 +altern 1 +forshar 1 +ieee 1 +transact 1 +schedulingr 1 +bunt 1 +techniqu 1 +octob 1 +black 1 +concurr 1 +parallel 1 +inth 1 +mach 1 +inter 1 +communicationj 1 +barrera 1 +fast 1 +network 1 +inproceed 1 +usenix 1 +symposium 1 +novemb 1 +cheriton 1 +group 1 +kernel 1 +acmtransact 1 +remot 1 +procedur 1 +call 1 +birel 1 +bruce 1 +nelson 1 +rpc 1 +oncomput 1 +februari 1 +bershad 1 +lightweightremot 1 +onoper 1 +principl 1 +migrationf 1 +dougli 1 +ousterhout 1 +migrat 1 +spriteoper 1 +internationalconfer 1 +berlin 1 +germani 1 +septemb 1 +theimer 1 +lantz 1 +preemptabl 1 +execut 1 +fault 1 +tolerancef 1 +cristian 1 +basic 1 +toler 1 +distributedsystem 1 +intern 1 +workshop 1 +sand 1 +beyond 1 +karshmer 1 +nehmer 1 +springer 1 +verlag 1 +birman 1 +joseph 1 +reliabl 1 +presenc 1 +offailur 1 +systemsr 1 +sandberg 1 +goldberg 1 +kleiman 1 +ofsun 1 +summer 1 +june 1 +mckusick 1 +leffler 1 +fabri 1 +fastfil 1 +august 1 +rosenblum 1 +alog 1 +structur 1 +cach 1 +systemsm 1 +schroeder 1 +gifford 1 +needham 1 +fora 1 +programm 1 +workstat 1 +terri 1 +hint 1 +ieeetransact 1 +softwar 1 +engin 1 +januari 1 +protect 1 +securityr 1 +us 1 +encrypt 1 +authent 1 +inlarg 1 +butler 1 +lampson 1 +origin 1 +proc 1 +princeton 1 +oninform 1 +scienc 1 +march 1 +accetta 1 +baron 1 +boloski 1 +golub 1 +rashid 1 +tevanian 1 +young 1 +foundat 1 +develop 1 +systemsh 1 +kopetz 1 +event 1 +trigger 1 +versu 1 +timesystem 1 +layland 1 +algorithm 1 +hard 1 +journal 1 +theacm 1 +zhao 1 +ramamritham 1 +stankov 1 +preemptiv 1 +schedulingund 1 +resourc 1 +constraint 1 +tokuda 1 +mercer 1 +art 1 +computingb 1 +badrinath 1 +acharya 1 +imielinski 1 +impact 1 +ondistribut 1 +satyanarayanan 1 +kistler 1 +kumar 1 +okasaki 1 +siegel 1 +steer 1 +coda 1 +highli 1 +distributedworkst 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..22aab400 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,403 @@ +multimedia 1 +system 1 +instructor 1 +harrick 1 +tabl 1 +content 1 +cours 1 +descript 1 +gener 1 +inform 1 +prerequisit 1 +synopsi 1 +textbook 1 +requir 1 +offic 1 +hour 1 +teach 1 +assist 1 +read 1 +list 1 +spring 1 +overview 1 +technolog 1 +trend 1 +problem 1 +media 1 +compress 1 +storag 1 +server 1 +network 1 +architectur 1 +applic 1 +protocol 1 +oper 1 +support 1 +databas 1 +class 1 +handout 1 +note 1 +postscript 1 +format 1 +septemb 1 +video 1 +fundament 1 +jpeg 1 +mpeg 1 +algorithm 1 +scalabl 1 +design 1 +issu 1 +placement 1 +retriev 1 +octob 1 +possibl 1 +project 1 +cach 1 +batch 1 +techniqu 1 +integr 1 +servic 1 +introduct 1 +novemb 1 +real 1 +time 1 +channel 1 +establish 1 +packet 1 +schedul 1 +analysi 1 +processor 1 +transport 1 +decemb 1 +descriptiongener 1 +informationcours 1 +titl 1 +professor 1 +detail 1 +offer 1 +fall 1 +uniqu 1 +number 1 +place 1 +taylor 1 +hall 1 +prerequisitesgradu 1 +stand 1 +familiar 1 +basic 1 +concept 1 +incomput 1 +synopsisc 1 +advanc 1 +cover 1 +boththeoret 1 +practic 1 +multimediasystem 1 +topic 1 +includ 1 +digit 1 +systemsupport 1 +audio 1 +well 1 +transportprotocol 1 +emphasi 1 +current 1 +designissu 1 +research 1 +textbooka 1 +collect 1 +recent 1 +articl 1 +madeavail 1 +requirementsth 1 +introduc 1 +thetop 1 +follow 1 +discuss 1 +relatedpap 1 +question 1 +answer 1 +student 1 +expect 1 +tounderstand 1 +describ 1 +critiqu 1 +contribut 1 +ofpap 1 +addition 1 +carri 1 +asemest 1 +long 1 +implement 1 +grade 1 +determin 1 +examin 1 +andclass 1 +particip 1 +vintuesdai 1 +appoint 1 +phone 1 +mail 1 +utexa 1 +assistantmr 1 +prashant 1 +shenoi 1 +eduread 1 +cntain 1 +copi 1 +paper 1 +theread 1 +avail 1 +mondai 1 +speedwai 1 +locat 1 +dobi 1 +mall 1 +guadalup 1 +austin 1 +pleas 1 +callthem 1 +make 1 +sure 1 +packag 1 +readi 1 +compressionr 1 +steinmetz 1 +data 1 +comput 1 +principl 1 +page 1 +wallac 1 +still 1 +pictur 1 +standard 1 +commun 1 +april 1 +gall 1 +multimediaappl 1 +chiang 1 +anastassi 1 +hierarch 1 +code 1 +digitaltelevis 1 +ieee 1 +magazin 1 +serversoverview 1 +goyal 1 +serverdesign 1 +survei 1 +gemmel 1 +kandlur 1 +venkat 1 +rangan 1 +row 1 +tutori 1 +effici 1 +optim 1 +ofmultimedia 1 +object 1 +disk 1 +arrai 1 +proceed 1 +ieeeintern 1 +confer 1 +icmc 1 +washington 1 +failur 1 +recoveri 1 +inmulti 1 +annualintern 1 +symposium 1 +fault 1 +toler 1 +ftc 1 +pasadena 1 +california 1 +june 1 +chiueh 1 +katz 1 +multi 1 +resolut 1 +represent 1 +forparallel 1 +anaheim 1 +august 1 +admiss 1 +control 1 +chen 1 +groupedsweep 1 +scheme 1 +manag 1 +ofthird 1 +intern 1 +workshop 1 +supportfor 1 +diego 1 +narasimha 1 +reddi 1 +wylli 1 +multimediai 1 +statist 1 +admissioncontrol 1 +acmmultimedia 1 +francisco 1 +designinglarg 1 +scale 1 +march 1 +scan 1 +inmultimedia 1 +sanfrancisco 1 +stream 1 +convers 1 +interactivevideo 1 +playout 1 +summer 1 +buffer 1 +space 1 +sitaram 1 +shahabuddin 1 +dynam 1 +polici 1 +foran 1 +demand 1 +demandvideo 1 +report 1 +papadimitri 1 +ramanathan 1 +informationcach 1 +deliveri 1 +person 1 +program 1 +homeentertain 1 +internationalconfer 1 +boston 1 +multimedianetwork 1 +layer 1 +shenker 1 +futur 1 +internet 1 +journal 1 +select 1 +area 1 +ferrari 1 +verma 1 +channelestablish 1 +wide 1 +areasin 1 +zhang 1 +keshav 1 +comparison 1 +rate 1 +base 1 +servicedisciplin 1 +sigcomm 1 +delaybound 1 +heterogen 1 +toappear 1 +also 1 +workshopon 1 +nossdav 1 +durham 1 +hampshir 1 +chow 1 +losslesssmooth 1 +london 1 +salehi 1 +kuros 1 +towslei 1 +storedvideo 1 +reduc 1 +variabl 1 +resourc 1 +requirementsthrough 1 +smooth 1 +sigmetr 1 +philadelphia 1 +grossglaus 1 +rcbr 1 +simpl 1 +efficientservic 1 +multipl 1 +traffic 1 +acmsigcomm 1 +kanakia 1 +misra 1 +reibman 1 +adapt 1 +congestioncontrol 1 +proceedingsof 1 +review 1 +clark 1 +tennenhous 1 +consider 1 +newgener 1 +campbel 1 +coulson 1 +hutchison 1 +qualiti 1 +servicearchitectur 1 +turner 1 +peterson 1 +imag 1 +transfer 1 +floyd 1 +jacobson 1 +mccann 1 +reliablemulticast 1 +framework 1 +light 1 +weight 1 +session 1 +levelfram 1 +buss 1 +deffner 1 +schulzrinn 1 +januari 1 +blakowski 1 +synchron 1 +refer 1 +model 1 +specif 1 +case 1 +studi 1 +onselect 1 +januaryoper 1 +multimediag 1 +robin 1 +blair 1 +papathoma 1 +andd 1 +shepherd 1 +basedcommun 1 +choru 1 +incommun 1 +druschel 1 +abbott 1 +pagel 1 +subsystem 1 +workstat 1 +ofth 1 +third 1 +systemssupport 1 +govindan 1 +anderson 1 +mechan 1 +forcontinu 1 +operatingsystem 1 +pacif 1 +grove 1 +formultimedia 1 +second 1 +symposiumon 1 +osdi 1 +seattl 1 +conferencingh 1 +zellweg 1 +swinehart 1 +venkatrangan 1 +conferenc 1 +etherphon 1 +environ 1 +flexibl 1 +packetvideo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..91a37667 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,296 @@ +multimedia 1 +commun 1 +databas 1 +fall 1 +instructor 1 +harrick 1 +tabl 1 +content 1 +cours 1 +descript 1 +gener 1 +inform 1 +prerequisit 1 +synopsi 1 +textbook 1 +requir 1 +offic 1 +hour 1 +read 1 +list 1 +conferenc 1 +rout 1 +multicast 1 +internet 1 +servic 1 +processor 1 +schedul 1 +support 1 +titl 1 +professor 1 +semest 1 +meet 1 +time 1 +fridai 1 +prerequisitesgradu 1 +stand 1 +familiar 1 +basic 1 +concept 1 +networkprotocol 1 +oper 1 +system 1 +design 1 +multimediasystem 1 +synopsisthi 1 +advanc 1 +topic 1 +bediscuss 1 +includ 1 +transport 1 +protocol 1 +formultimedia 1 +mobil 1 +network 1 +andmultimedia 1 +multimediadatabas 1 +emphasi 1 +place 1 +current 1 +issu 1 +andresearch 1 +collect 1 +research 1 +articl 1 +made 1 +avail 1 +theinstructor 1 +requirementsstud 1 +number 1 +paper 1 +area 1 +aswel 1 +present 1 +discuss 1 +class 1 +grade 1 +determinedbas 1 +particip 1 +studentsenrol 1 +letter 1 +submit 1 +orcarri 1 +project 1 +hoursfridai 1 +appoint 1 +phone 1 +mail 1 +utexa 1 +mccann 1 +jacobson 1 +flexibleframework 1 +packet 1 +video 1 +proceed 1 +acmmultimedia 1 +francisco 1 +novemb 1 +handlei 1 +wakeman 1 +crowcroft 1 +confer 1 +controlchannel 1 +cccp 1 +scalabl 1 +base 1 +build 1 +conferencecontrol 1 +applic 1 +sigcomm 1 +boston 1 +gajewska 1 +kistler 1 +manass 1 +redel 1 +argo 1 +systemfor 1 +distribut 1 +collabor 1 +gong 1 +multipoint 1 +audio 1 +control 1 +basedmultimedia 1 +zellweg 1 +swinehart 1 +venkat 1 +rangan 1 +etherphon 1 +environ 1 +ieeecomput 1 +octob 1 +deer 1 +cheriton 1 +datagraminternetwork 1 +extend 1 +lan 1 +transact 1 +computersystem 1 +page 1 +ballardi 1 +franci 1 +core 1 +tree 1 +architectur 1 +inter 1 +domain 1 +thyagarajan 1 +hierarch 1 +distanc 1 +vector 1 +mbone 1 +widyono 1 +andevalu 1 +algorithm 1 +real 1 +channel 1 +msthesi 1 +berkelei 1 +kompella 1 +pasqual 1 +polyzo 1 +multimediacommun 1 +technic 1 +report 1 +univers 1 +california 1 +diego 1 +floyd 1 +zhang 1 +reliabl 1 +framework 1 +light 1 +weightsess 1 +level 1 +frame 1 +ofacm 1 +holbrook 1 +singhal 1 +receiv 1 +fordistribut 1 +interact 1 +simul 1 +theacm 1 +herzog 1 +estrin 1 +shenker 1 +share 1 +cost 1 +axiomat 1 +analysi 1 +gupta 1 +how 1 +moran 1 +nguyen 1 +resourc 1 +multi 1 +parti 1 +timecommun 1 +infocom 1 +servicesj 1 +guyton 1 +schwartz 1 +locat 1 +nearbi 1 +copi 1 +replic 1 +server 1 +mogul 1 +case 1 +forpersist 1 +connect 1 +http 1 +acmsigcomm 1 +supportc 1 +warldersburg 1 +weihl 1 +lotteri 1 +effici 1 +flexibleproport 1 +mangement 1 +ofoper 1 +implement 1 +osdi 1 +strideschedul 1 +determinist 1 +proport 1 +resourcemanag 1 +golestani 1 +self 1 +clock 1 +fair 1 +queue 1 +scheme 1 +high 1 +speedappl 1 +govindan 1 +anderson 1 +mechan 1 +forcontinu 1 +media 1 +symposium 1 +onoper 1 +principl 1 +sosp 1 +monterei 1 +jeffai 1 +timeproduc 1 +consum 1 +paradigm 1 +construct 1 +ofeffici 1 +predict 1 +ofth 1 +sigapp 1 +appli 1 +comput 1 +latenc 1 +manag 1 +intim 1 +workshop 1 +timeoper 1 +softwar 1 +seattl 1 +databasesw 1 +niblack 1 +qbic 1 +queri 1 +imag 1 +contentus 1 +color 1 +textur 1 +shape 1 +februari 1 +cawkel 1 +pictur 1 +journal 1 +ofinform 1 +scienc 1 +bach 1 +paul 1 +jain 1 +managementsystem 1 +face 1 +retriev 1 +ieee 1 +knowledgeand 1 +data 1 +engin 1 +august 1 +weymouth 1 +semant 1 +vimsi 1 +model 1 +intern 1 +onveri 1 +larg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..970ea6b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,86 @@ +algorithm 1 +techniqu 1 +theori 1 +fall 1 +instructor 1 +vijaya 1 +ramachandranuniqu 1 +number 1 +cours 1 +descript 1 +handout 1 +home 1 +work 1 +final 1 +exam 1 +instruct 1 +updat 1 +respons 1 +question 1 +receiv 1 +pose 1 +quot 1 +problem 1 +last 1 +sentenc 1 +take 1 +time 1 +sigma 1 +onsigma 1 +word 1 +refer 1 +amort 1 +total 1 +oroth 1 +answer 1 +well 1 +known 1 +face 1 +data 1 +structur 1 +disjoint 1 +set 1 +requir 1 +omega 1 +inth 1 +worst 1 +case 1 +text 1 +book 1 +isther 1 +differ 1 +meant 1 +first 1 +second 1 +paragraphof 1 +chapter 1 +note 1 +vertic 1 +cycl 1 +containdistinct 1 +label 1 +cancontain 1 +right 1 +think 1 +littl 1 +unclear 1 +denot 1 +size 1 +largest 1 +true 1 +submatrix 1 +equal 1 +somek 1 +entri 1 +fridai 1 +decemb 1 +mondai 1 +bepost 1 +either 1 +youhav 1 +sent 1 +pleas 1 +address 1 +us 1 +yourbest 1 +judgment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..20b021e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,227 @@ +wilson 1 +class 1 +pagec 1 +program 1 +languag 1 +pagethi 1 +home 1 +page 1 +paulwilson 1 +construct 1 +thing 1 +subject 1 +chang 1 +reload 1 +button 1 +yourbrows 1 +come 1 +make 1 +sure 1 +see 1 +mostrec 1 +version 1 +onlin 1 +cours 1 +read 1 +refer 1 +materi 1 +syllabu 1 +lectur 1 +note 1 +ondeclar 1 +scheme 1 +implement 1 +html 1 +brows 1 +first 1 +section 1 +arereason 1 +well 1 +index 1 +later 1 +willchang 1 +goe 1 +along 1 +ahead 1 +islik 1 +adventur 1 +suggest 1 +browser 1 +usinga 1 +system 1 +interact 1 +especi 1 +work 1 +throughchapt 1 +tutori 1 +allow 1 +text 1 +outof 1 +document 1 +past 1 +run 1 +error 1 +chapter 1 +sanoth 1 +reason 1 +rather 1 +thanprint 1 +hardcopi 1 +correct 1 +weget 1 +definit 1 +format 1 +onlinebrows 1 +list 1 +standard 1 +featur 1 +ters 1 +describ 1 +stuff 1 +coursenot 1 +help 1 +want 1 +know 1 +miscellanousfunct 1 +exactli 1 +author 1 +practic 1 +question 1 +also 1 +answer 1 +second 1 +quiz 1 +third 1 +name 1 +convent 1 +indent 1 +shouldconsult 1 +homework 1 +code 1 +grade 1 +itsens 1 +andnot 1 +draw 1 +data 1 +structur 1 +assign 1 +write 1 +merg 1 +sort 1 +solut 1 +three 1 +problem 1 +comment 1 +simpl 1 +reader 1 +includ 1 +regular 1 +express 1 +grammar 1 +actual 1 +backward 1 +chain 1 +proposit 1 +calculu 1 +theoremprov 1 +essenti 1 +littl 1 +subset 1 +prolog 1 +setofrul 1 +classifi 1 +anim 1 +logic 1 +kind 1 +ofanim 1 +plai 1 +us 1 +theorem 1 +prover 1 +pictur 1 +object 1 +instanc 1 +illustr 1 +simpleobject 1 +anoth 1 +show 1 +metaclass 1 +circular 1 +latter 1 +self 1 +base 1 +onclass 1 +gener 1 +procedur 1 +inherit 1 +type 1 +subtyp 1 +take 1 +explan 1 +test 1 +main 1 +default 1 +rscheme 1 +whichi 1 +instal 1 +public 1 +sparc 1 +solari 1 +machin 1 +command 1 +runschem 1 +linux 1 +orani 1 +sever 1 +unix 1 +andinstal 1 +free 1 +find 1 +itfrom 1 +donovan 1 +kolbl 1 +qing 1 +patch 1 +friendlier 1 +fornewbi 1 +recommend 1 +gettinggambit 1 +marc 1 +feelei 1 +youcan 1 +repositori 1 +window 1 +bestschem 1 +bunch 1 +avail 1 +guil 1 +might 1 +gambit 1 +mark 1 +mzscheme 1 +rice 1 +someth 1 +besid 1 +get 1 +meroon 1 +start 1 +doingobject 1 +orient 1 +tous 1 +advantag 1 +abl 1 +univers 1 +indiana 1 +lot 1 +freeimplement 1 +variou 1 +getinterest 1 +learn 1 +cover 1 +place 1 +look 1 +comp 1 +lang 1 +internet 1 +newsgroup 1 +devot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..91e5209e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,97 @@ +fall 1 +comput 1 +program 1 +welcom 1 +homepag 1 +austin 1 +class 1 +taught 1 +adam 1 +ajit 1 +georg 1 +cours 1 +announc 1 +final 1 +surpris 1 +contact 1 +xunnow 1 +make 1 +test 1 +like 1 +solut 1 +homeworksreview 1 +session 1 +slidesth 1 +slide 1 +second 1 +half 1 +semest 1 +pleas 1 +view 1 +onlineif 1 +possibl 1 +print 1 +realli 1 +need 1 +file 1 +found 1 +updatedhomework 1 +sourc 1 +filemidterm 1 +webta 1 +inform 1 +weekli 1 +timetableta 1 +section 1 +offic 1 +hour 1 +locat 1 +guana 1 +kumar 1 +natarajan 1 +tuesdai 1 +thursdai 1 +station 1 +utexa 1 +eduxun 1 +feng 1 +xfeng 1 +detail 1 +time 1 +tabl 1 +guid 1 +new 1 +group 1 +note 1 +homework 1 +also 1 +tip 1 +postscript 1 +fridai 1 +download 1 +score 1 +model 1 +requir 1 +wordlist 1 +linux 1 +provid 1 +warren 1 +wang 1 +wwang 1 +midterm 1 +review 1 +answer 1 +exercis 1 +made 1 +modif 1 +mondai 1 +afternoon 1 +decimalinteg 1 +hexinteg 1 +octalinteg 1 +format 1 +assign 1 +maintain 1 +edudepart 1 +sciencesunivers 1 +texa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..7bb3a04e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,46 @@ +comput 1 +system 1 +architectur 1 +fall 1 +instructor 1 +herb 1 +schwetman 1 +mesquit 1 +softwar 1 +offic 1 +hour 1 +class 1 +appointmentcontact 1 +station 1 +contact 1 +utexa 1 +yang 1 +yangyang 1 +syllabu 1 +assign 1 +solut 1 +file 1 +print 1 +statist 1 +program 1 +statisticsassign 1 +asga 1 +statisticsyour 1 +final 1 +gradesect 1 +section 1 +microsparc 1 +datasheetonlin 1 +resourc 1 +classmat 1 +email 1 +ruiliu 1 +postmessag 1 +new 1 +group 1 +pagei 1 +creat 1 +august 1 +comment 1 +welcom 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..bd6bebb1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,70 @@ +csnet 1 +network 1 +protocol 1 +implement 1 +gener 1 +inform 1 +professor 1 +offic 1 +hour 1 +tuesdai 1 +thursdai 1 +teach 1 +assist 1 +mondai 1 +wensdai 1 +station 1 +class 1 +descript 1 +text 1 +background 1 +read 1 +newsgroup 1 +utexa 1 +prerequisit 1 +grade 1 +refer 1 +draft 1 +multicast 1 +rout 1 +texa 1 +internet 1 +platform 1 +netsim 1 +corejava 1 +mobil 1 +support 1 +specif 1 +address 1 +alloc 1 +manag 1 +tutori 1 +http 1 +digest 1 +access 1 +authent 1 +comp 1 +java 1 +schedul 1 +present 1 +individu 1 +paper 1 +handout 1 +homework 1 +turn 1 +sampl 1 +solut 1 +group 1 +project 1 +info 1 +configur 1 +file 1 +pleas 1 +note 1 +first 1 +fengyufeng 1 +edufing 1 +public 1 +ring 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..6d882400 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,93 @@ +home 1 +pageclick 1 +help 1 +comput 1 +techniqu 1 +spring 1 +quarterwelcom 1 +page 1 +world 1 +wide 1 +hypermedia 1 +document 1 +whichcontain 1 +bounti 1 +inform 1 +class 1 +keep 1 +mind 1 +thatthi 1 +static 1 +addedfrequ 1 +problem 1 +send 1 +mail 1 +weld 1 +click 1 +highlight 1 +item 1 +personnel 1 +professor 1 +cours 1 +syllabu 1 +read 1 +assign 1 +homework 1 +polici 1 +announc 1 +check 1 +regularli 1 +last 1 +chang 1 +handout 1 +lectur 1 +note 1 +gradesoth 1 +us 1 +link 1 +offici 1 +mathematica 1 +mvi 1 +visitor 1 +room 1 +schedul 1 +scienc 1 +engin 1 +depart 1 +degre 1 +program 1 +offer 1 +colleg 1 +art 1 +mosaic 1 +avail 1 +follow 1 +topic 1 +basic 1 +hypertext 1 +markup 1 +languag 1 +html 1 +uniform 1 +resourc 1 +locat 1 +usinglynx 1 +charact 1 +base 1 +browserport 1 +engr 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +univers 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..12097363 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,139 @@ +engr 1 +home 1 +page 1 +autumn 1 +comput 1 +program 1 +dugan 1 +martin 1 +tompa 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +bounti 1 +inform 1 +theclass 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +especi 1 +class 1 +messag 1 +ad 1 +frequent 1 +problem 1 +send 1 +mail 1 +webmast 1 +click 1 +highlight 1 +item 1 +help 1 +check 1 +last 1 +updat 1 +cours 1 +syllabu 1 +offic 1 +hour 1 +staff 1 +lectur 1 +slide 1 +homework 1 +midterm 1 +exam 1 +studi 1 +guid 1 +solut 1 +final 1 +tip 1 +mac 1 +think 1 +debugg 1 +netscap 1 +bulletin 1 +board 1 +textbook 1 +code 1 +refer 1 +regularli 1 +schedul 1 +time 1 +place 1 +special 1 +demo 1 +raini 1 +activ 1 +web 1 +earlier 1 +quarter 1 +intact 1 +less 1 +notic 1 +instanc 1 +link 1 +work 1 +pleas 1 +washington 1 +might 1 +like 1 +assign 1 +test 1 +us 1 +previous 1 +winter 1 +spring 1 +summer 1 +search 1 +previou 1 +miscellan 1 +info 1 +case 1 +insensit 1 +match 1 +whole 1 +word 1 +avail 1 +univers 1 +scienc 1 +engin 1 +depart 1 +degre 1 +offer 1 +colleg 1 +art 1 +relat 1 +major 1 +nonmajor 1 +consid 1 +take 1 +preview 1 +run 1 +find 1 +itemsund 1 +balloon 1 +menu 1 +particular 1 +onlin 1 +handbook 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..6d9cf243 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,112 @@ +engr 1 +home 1 +page 1 +autumn 1 +comput 1 +program 1 +martin 1 +dickei 1 +richard 1 +ladner 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermediadocu 1 +contain 1 +bounti 1 +informationabout 1 +class 1 +click 1 +highlight 1 +item 1 +moreinform 1 +messag 1 +check 1 +frequent 1 +cours 1 +syllabu 1 +sort 1 +schedulesth 1 +week 1 +glanceweek 1 +activ 1 +schedulecomput 1 +lab 1 +includinglab 1 +hour 1 +watch 1 +chang 1 +staff 1 +includ 1 +instructor 1 +andta 1 +offic 1 +lectur 1 +slide 1 +audiofrom 1 +summer 1 +homework 1 +examin 1 +midtermand 1 +final 1 +originallyschedul 1 +place 1 +time 1 +studi 1 +guid 1 +test 1 +andtim 1 +kind 1 +tip 1 +usingth 1 +compil 1 +macintosh 1 +user 1 +textbook 1 +code 1 +refer 1 +tutori 1 +special 1 +demo 1 +web 1 +earlier 1 +quarter 1 +less 1 +intactand 1 +invit 1 +brows 1 +notic 1 +problem 1 +forinst 1 +link 1 +work 1 +pleas 1 +send 1 +mail 1 +webmast 1 +might 1 +like 1 +look 1 +assign 1 +andth 1 +us 1 +previous 1 +winter 1 +spring 1 +inform 1 +avail 1 +univers 1 +washington 1 +scienc 1 +engin 1 +depart 1 +degre 1 +colleg 1 +art 1 +andrel 1 +major 1 +nonmajor 1 +comment 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..96d1ebe9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,58 @@ +autumn 1 +discret 1 +structur 1 +instructorpaul 1 +beam 1 +washington 1 +edulectur 1 +offic 1 +sieg 1 +phone 1 +hour 1 +thursdai 1 +appoint 1 +teach 1 +assistantjonathan 1 +nowitz 1 +edusect 1 +johnson 1 +section 1 +loew 1 +tuesdai 1 +wednesdai 1 +handout 1 +syllabu 1 +induct 1 +recurs 1 +defin 1 +set 1 +postscript 1 +acrobat 1 +reader 1 +part 1 +ofyour 1 +browser 1 +midterm 1 +novemb 1 +class 1 +sampl 1 +question 1 +homework 1 +assign 1 +previou 1 +cours 1 +web 1 +fall 1 +karp 1 +spring 1 +ruzzo 1 +winter 1 +leveson 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..5ce5b10a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,96 @@ +winter 1 +intro 1 +formal 1 +model 1 +richard 1 +ladnerclass 1 +messag 1 +check 1 +email 1 +frequent 1 +last 1 +updat 1 +handout 1 +cours 1 +syllabu 1 +latex 1 +construct 1 +regular 1 +grammar 1 +express 1 +midterm 1 +extra 1 +rambl 1 +regard 1 +question 1 +exam 1 +construc 1 +pars 1 +review 1 +final 1 +proof 1 +halt 1 +problem 1 +undecidableexam 1 +solut 1 +homework 1 +comment 1 +notat 1 +text 1 +state 1 +diagram 1 +file 1 +format 1 +materi 1 +provid 1 +three 1 +html 1 +hypertext 1 +markup 1 +languag 1 +document 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +origin 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +web 1 +previou 1 +quarter 1 +autumn 1 +ladner 1 +washington 1 +edufix 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..ee7cd433 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,46 @@ +introduct 1 +formal 1 +model 1 +fall 1 +intro 1 +ann 1 +condon 1 +welcom 1 +home 1 +page 1 +check 1 +regularli 1 +findhomework 1 +solut 1 +set 1 +pointer 1 +upcom 1 +lectur 1 +exam 1 +class 1 +messag 1 +sent 1 +mail 1 +list 1 +willb 1 +log 1 +subscrib 1 +send 1 +majordomo 1 +includ 1 +userid 1 +email 1 +frequent 1 +last 1 +updat 1 +homework 1 +handout 1 +content 1 +previou 1 +web 1 +quarter 1 +winter 1 +autumn 1 +washington 1 +edukaye 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..757aa431 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,26 @@ +autumn 1 +data 1 +structur 1 +martin 1 +tompaclass 1 +messag 1 +check 1 +mail 1 +frequent 1 +last 1 +updat 1 +cours 1 +informationlab 1 +technot 1 +unix 1 +questionnaireloc 1 +cdeletemin 1 +algorithm 1 +treeshomework 1 +web 1 +previou 1 +quarter 1 +winter 1 +spring 1 +request 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..c82ace96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,62 @@ +spring 1 +home 1 +pagecs 1 +data 1 +structuresrichard 1 +ladner 1 +instructordan 1 +fasulo 1 +teach 1 +assistantthi 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +theclass 1 +taught 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +especi 1 +class 1 +messag 1 +ad 1 +frequent 1 +click 1 +help 1 +check 1 +offic 1 +hour 1 +suggest 1 +read 1 +project 1 +homework 1 +exam 1 +lectur 1 +overheadsport 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..3bd0e65d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,37 @@ +home 1 +pagecs 1 +program 1 +languagesfal 1 +quarter 1 +current 1 +offer 1 +page 1 +autumn 1 +informationth 1 +languag 1 +listinfo 1 +everi 1 +research 1 +pagehom 1 +spring 1 +ofcs 1 +winter 1 +depart 1 +computersci 1 +engineeringport 1 +reprint 1 +adapt 1 +academicnonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +dulycredit 1 +copyright 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +washington 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..d1abd4dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,114 @@ +home 1 +pagecs 1 +program 1 +languagesspr 1 +quarter 1 +lectur 1 +section 1 +sieg 1 +final 1 +exam 1 +review 1 +session 1 +mondai 1 +june 1 +tuesdai 1 +thursdai 1 +instructor 1 +steve 1 +hanks 1 +mail 1 +hank 1 +washington 1 +eduoffic 1 +offic 1 +hour 1 +dave 1 +grove 1 +administr 1 +cours 1 +syllabu 1 +overviewcours 1 +newsgroup 1 +help 1 +documentsgeneralintroduct 1 +new 1 +us 1 +netscap 1 +last 1 +updat 1 +unix 1 +turnin 1 +electron 1 +submiss 1 +homework 1 +assign 1 +emac 1 +clip 1 +save 1 +lisp 1 +relatedrun 1 +reason 1 +thing 1 +smalltalk 1 +relat 1 +html 1 +postscript 1 +sampl 1 +solut 1 +done 1 +prolog 1 +hand 1 +miss 1 +figur 1 +includ 1 +partial 1 +quiz 1 +test 1 +htmlpostscript 1 +daili 1 +class 1 +note 1 +code 1 +transcript 1 +march 1 +suggest 1 +read 1 +april 1 +transcipt 1 +full 1 +interfac 1 +build 1 +databas 1 +method 1 +employe 1 +informationth 1 +languag 1 +listinfo 1 +everi 1 +research 1 +pagehom 1 +page 1 +winter 1 +offer 1 +ofcs 1 +autumn 1 +spring 1 +depart 1 +computersci 1 +engineeringport 1 +reprint 1 +adapt 1 +academicnonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +dulycredit 1 +copyright 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..4153c277 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,51 @@ +index 1 +page 1 +pagecurr 1 +quarterth 1 +current 1 +quarter 1 +previou 1 +quarterscours 1 +web 1 +earlier 1 +intact 1 +less 1 +younotic 1 +problem 1 +instanc 1 +link 1 +work 1 +pleas 1 +send 1 +mail 1 +webmast 1 +washington 1 +spring 1 +inform 1 +avail 1 +univers 1 +depart 1 +comput 1 +scienc 1 +engin 1 +degre 1 +program 1 +offer 1 +colleg 1 +art 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +comment 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..34a741dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,124 @@ +home 1 +page 1 +autumn 1 +introduct 1 +digit 1 +design 1 +quarter 1 +gaetano 1 +borriello 1 +corei 1 +andersonwelcom 1 +contain 1 +whole 1 +bunch 1 +us 1 +inform 1 +class 1 +keep 1 +mind 1 +document 1 +static 1 +especi 1 +announc 1 +messag 1 +ad 1 +frequent 1 +problem 1 +gener 1 +send 1 +mail 1 +tocs 1 +webmast 1 +notic 1 +instructor 1 +system 1 +administr 1 +last 1 +updat 1 +archiv 1 +messagess 1 +washington 1 +everyon 1 +cours 1 +goal 1 +syllabu 1 +meet 1 +time 1 +lectur 1 +final 1 +exam 1 +mondai 1 +decemb 1 +workload 1 +grade 1 +expect 1 +laboratori 1 +softwar 1 +tool 1 +polici 1 +collabor 1 +cheat 1 +address 1 +overal 1 +schedul 1 +topic 1 +offic 1 +hour 1 +sieg 1 +anderson 1 +corin 1 +aweekli 1 +assign 1 +weekli 1 +quizz 1 +onlin 1 +version 1 +slide 1 +textbook 1 +contemporari 1 +logic 1 +katz 1 +benjamin 1 +cum 1 +addison 1 +weslei 1 +maintain 1 +author 1 +publish 1 +note 1 +interest 1 +evolut 1 +implement 1 +technolog 1 +comput 1 +aid 1 +synario 1 +feedback 1 +tell 1 +think 1 +thing 1 +go 1 +even 1 +anonym 1 +desir 1 +link 1 +previou 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..e56af3b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,77 @@ +cours 1 +pagecs 1 +data 1 +structur 1 +algorithmsautumn 1 +basic 1 +inform 1 +instructor 1 +steve 1 +tanimoto 1 +washington 1 +sieg 1 +hall 1 +room 1 +offic 1 +hour 1 +appoint 1 +teach 1 +assist 1 +anhai 1 +doan 1 +announc 1 +place 1 +dai 1 +time 1 +smith 1 +comput 1 +facil 1 +unix 1 +account 1 +mscc 1 +option 1 +student 1 +languag 1 +requir 1 +lisp 1 +textbook 1 +shaffer 1 +practic 1 +introduct 1 +algorithm 1 +analysi 1 +publish 1 +summer 1 +prentic 1 +grade 1 +breakdown 1 +tent 1 +assign 1 +midterm 1 +project 1 +final 1 +late 1 +polici 1 +keep 1 +manag 1 +encourag 1 +punctual 1 +work 1 +point 1 +deduct 1 +penalti 1 +schedul 1 +updat 1 +aboutth 1 +topic 1 +studi 1 +examinform 1 +exambas 1 +us 1 +compilerassignmentssolut 1 +assignmentsteach 1 +informationscheduleweb 1 +previou 1 +offer 1 +winter 1 +autumn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..0e5beff9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,40 @@ +home 1 +pagecs 1 +data 1 +structur 1 +algorithmsspr 1 +instructor 1 +alistair 1 +holden 1 +washington 1 +jonathan 1 +nowitz 1 +class 1 +messag 1 +last 1 +updat 1 +mondai 1 +cours 1 +materi 1 +comput 1 +syllabu 1 +homework 1 +demo 1 +exam 1 +inform 1 +depart 1 +scienc 1 +engin 1 +degre 1 +program 1 +offer 1 +colleg 1 +art 1 +relat 1 +major 1 +mosaic 1 +help 1 +interest 1 +page 1 +raini 1 +funnowitz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..d37cf89a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,64 @@ +cours 1 +pagecs 1 +data 1 +structur 1 +algorithmswint 1 +meet 1 +time 1 +place 1 +sieg 1 +instructor 1 +linda 1 +shapirooffic 1 +siegtelephon 1 +email 1 +shapiro 1 +washington 1 +eduoffic 1 +hour 1 +denis 1 +pinneloffic 1 +denisep 1 +syllabustransparencieshomework 1 +assignmentshomework 1 +answer 1 +homework 1 +program 1 +assignmentsprogram 1 +assign 1 +test 1 +file 1 +note 1 +set 1 +follow 1 +line 1 +begin 1 +indic 1 +insert 1 +tree 1 +next 1 +enclos 1 +inquot 1 +state 1 +charact 1 +long 1 +come 1 +integ 1 +length 1 +associatedvalu 1 +final 1 +string 1 +valu 1 +also 1 +quot 1 +linebegin 1 +find 1 +search 1 +isfollow 1 +object 1 +model 1 +graphimag 1 +graphreview 1 +listsfin 1 +studi 1 +sheet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..8e5a904f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,25 @@ +compil 1 +classhomethi 1 +world 1 +wide 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +class 1 +keep 1 +inmind 1 +document 1 +static 1 +willb 1 +ad 1 +frequent 1 +urgent 1 +announc 1 +assign 1 +onlin 1 +meet 1 +admin 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..cf608cff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,234 @@ +home 1 +pagecs 1 +experiment 1 +boe 1 +softwar 1 +engin 1 +project 1 +cours 1 +professor 1 +nanci 1 +leveson 1 +offic 1 +sieg 1 +phone 1 +hour 1 +appoint 1 +mail 1 +washington 1 +educours 1 +descriptioninstruct 1 +object 1 +teach 1 +terminolog 1 +concept 1 +fundament 1 +manag 1 +work 1 +team 1 +provid 1 +experi 1 +real 1 +industri 1 +oral 1 +written 1 +commun 1 +skill 1 +produc 1 +portfolio 1 +studi 1 +method 1 +tool 1 +specif 1 +design 1 +construct 1 +test 1 +analysi 1 +document 1 +larg 1 +softwaresystem 1 +includ 1 +also 1 +technic 1 +topic 1 +essenti 1 +tocreat 1 +complex 1 +system 1 +successfulli 1 +effectiveor 1 +group 1 +interact 1 +latter 1 +topicsar 1 +feedback 1 +sai 1 +import 1 +employersand 1 +often 1 +lack 1 +graduat 1 +version 1 +last 1 +quarter 1 +realbo 1 +exampl 1 +particip 1 +largegroup 1 +sever 1 +reason 1 +try 1 +approach 1 +first 1 +isthat 1 +short 1 +realist 1 +student 1 +cannotlearn 1 +enough 1 +class 1 +session 1 +devotedto 1 +discuss 1 +organ 1 +regular 1 +thegroup 1 +usual 1 +learn 1 +hard 1 +isto 1 +effectivelytogeth 1 +head 1 +instructor 1 +disast 1 +beavoid 1 +correct 1 +worktogeth 1 +addit 1 +requirementsanalysi 1 +develop 1 +possibl 1 +thenorm 1 +set 1 +areal 1 +search 1 +done 1 +engineeringinstitut 1 +master 1 +program 1 +providedat 1 +meet 1 +assign 1 +role 1 +playthat 1 +allow 1 +leadership 1 +posit 1 +attach 1 +listof 1 +howev 1 +everyon 1 +phase 1 +projectso 1 +part 1 +theproject 1 +outlin 1 +natur 1 +qualiti 1 +principl 1 +process 1 +model 1 +plan 1 +risk 1 +assess 1 +cost 1 +estim 1 +metric 1 +requir 1 +verif 1 +valid 1 +configur 1 +review 1 +mainten 1 +evolut 1 +reus 1 +ethic 1 +profession 1 +embed 1 +safeti 1 +take 1 +activ 1 +ofth 1 +aspect 1 +softwaredevelop 1 +member 1 +responsiblefor 1 +present 1 +administr 1 +assist 1 +respons 1 +control 1 +primari 1 +duti 1 +write 1 +updat 1 +track 1 +statu 1 +make 1 +sure 1 +proper 1 +held 1 +get 1 +time 1 +princip 1 +architect 1 +creation 1 +product 1 +overal 1 +consist 1 +hardwar 1 +platform 1 +issu 1 +transit 1 +exist 1 +augment 1 +necessari 1 +evalu 1 +current 1 +secur 1 +chang 1 +ensur 1 +human 1 +factor 1 +specialist 1 +user 1 +interfac 1 +respect 1 +survei 1 +interview 1 +employe 1 +assur 1 +releas 1 +duri 1 +conduct 1 +characterist 1 +deliver 1 +normal 1 +mockup 1 +prototyp 1 +expertis 1 +languag 1 +implement 1 +handl 1 +appear 1 +clariti 1 +manual 1 +determin 1 +us 1 +readabl 1 +understand 1 +support 1 +reliabl 1 +creat 1 +guid 1 +deliv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..278967ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,95 @@ +home 1 +pagecs 1 +softwar 1 +engineeringmeet 1 +time 1 +locat 1 +loew 1 +mondai 1 +wednesdai 1 +fridai 1 +professor 1 +nanci 1 +leveson 1 +offic 1 +sieg 1 +phone 1 +hour 1 +appoint 1 +mail 1 +washington 1 +eduta 1 +adam 1 +carlson 1 +educours 1 +descriptionthi 1 +cours 1 +studi 1 +concept 1 +method 1 +tool 1 +specif 1 +design 1 +construct 1 +test 1 +analysi 1 +document 1 +larg 1 +system 1 +includ 1 +also 1 +technic 1 +topic 1 +essenti 1 +creat 1 +complex 1 +successfulli 1 +project 1 +manag 1 +textbookghezzi 1 +jazayeri 1 +mandrioli 1 +fundament 1 +engin 1 +prentic 1 +hall 1 +note 1 +requir 1 +sampl 1 +interview 1 +question 1 +produc 1 +consum 1 +petri 1 +axiomat 1 +coupl 1 +cohes 1 +assign 1 +link 1 +interest 1 +syllabu 1 +updat 1 +pleas 1 +read 1 +newsgroup 1 +access 1 +machin 1 +send 1 +class 1 +mailinglist 1 +new 1 +comp 1 +risk 1 +militari 1 +standard 1 +defens 1 +develop 1 +inform 1 +avail 1 +winter 1 +spring 1 +comput 1 +scienc 1 +departmentsuggest 1 +feedback 1 +request 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..1fb285e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,346 @@ +winter 1 +home 1 +pagecs 1 +program 1 +languag 1 +implement 1 +instructor 1 +steve 1 +tanimoto 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +ruth 1 +andersonmeet 1 +tuesdai 1 +thursdai 1 +sieg 1 +except 1 +meet 1 +thompson 1 +hall 1 +room 1 +time 1 +schedul 1 +offic 1 +hour 1 +mondai 1 +wednesdai 1 +cours 1 +mail 1 +list 1 +archiv 1 +tent 1 +topic 1 +examin 1 +transpar 1 +past 1 +lectur 1 +also 1 +post 1 +number 1 +credit 1 +grade 1 +homework 1 +midterm 1 +exam 1 +final 1 +project 1 +class 1 +particip 1 +hardcopi 1 +read 1 +text 1 +select 1 +section 1 +mani 1 +onlin 1 +first 1 +ofread 1 +part 1 +recent 1 +book 1 +lisp 1 +artifici 1 +intellig 1 +sever 1 +throughout 1 +includ 1 +logic 1 +prolog 1 +grammar 1 +andpars 1 +understand 1 +system 1 +shell 1 +expert 1 +either 1 +purchas 1 +whole 1 +bookstor 1 +approxim 1 +copi 1 +order 1 +combin 1 +get 1 +fromth 1 +center 1 +librari 1 +entitl 1 +element 1 +us 1 +common 1 +edit 1 +freeman 1 +chapter 1 +introduct 1 +incommon 1 +glossari 1 +purchasedsepar 1 +basement 1 +commun 1 +build 1 +refer 1 +materi 1 +provid 1 +answer 1 +question 1 +andit 1 +standard 1 +referenceon 1 +seem 1 +best 1 +access 1 +tabl 1 +contentspag 1 +rather 1 +try 1 +download 1 +entir 1 +html 1 +file 1 +orpostscript 1 +sourc 1 +code 1 +usingcommon 1 +site 1 +fordigitool 1 +compani 1 +thatmaintain 1 +support 1 +macintosh 1 +currentinform 1 +interest 1 +link 1 +info 1 +world 1 +wide 1 +applic 1 +introductionto 1 +tutori 1 +anoth 1 +websit 1 +thatdoesn 1 +alwai 1 +respond 1 +promptli 1 +java 1 +trail 1 +announc 1 +januari 1 +welcom 1 +call 1 +compil 1 +although 1 +catalog 1 +theirimplement 1 +cover 1 +interpret 1 +techniqu 1 +buildingprogram 1 +attent 1 +focu 1 +mainli 1 +particular 1 +programminglanguag 1 +explor 1 +issu 1 +relev 1 +tointepret 1 +investig 1 +addit 1 +tradit 1 +alsolook 1 +current 1 +visual 1 +programmingfacil 1 +facil 1 +quarter 1 +student 1 +mscc 1 +run 1 +unix 1 +themathemat 1 +mac 1 +locat 1 +thebas 1 +offer 1 +varieti 1 +mathematica 1 +host 1 +yacc 1 +allegrocommon 1 +softwar 1 +particularli 1 +powerfulenviron 1 +full 1 +integr 1 +editor 1 +fred 1 +extens 1 +graphicsand 1 +user 1 +interfac 1 +construct 1 +network 1 +easili 1 +transfer 1 +machinesof 1 +difficulti 1 +inthompson 1 +supplement 1 +work 1 +theirown 1 +packag 1 +xlisp 1 +stat 1 +window 1 +micro 1 +emac 1 +howev 1 +limit 1 +abil 1 +altern 1 +wish 1 +need 1 +theseresourc 1 +internet 1 +note 1 +thatxlisp 1 +bare 1 +bone 1 +nothav 1 +develop 1 +fortun 1 +power 1 +disadvantag 1 +must 1 +labunless 1 +fromdigitool 1 +normal 1 +cost 1 +special 1 +dealallow 1 +point 1 +free 1 +version 1 +allegro 1 +lispfor 1 +franz 1 +attract 1 +tool 1 +given 1 +march 1 +close 1 +test 1 +introduc 1 +thelaboratori 1 +thistim 1 +instead 1 +go 1 +regular 1 +classroom 1 +week 1 +move 1 +beginn 1 +guid 1 +might 1 +help 1 +assign 1 +messag 1 +sent 1 +new 1 +group 1 +regardingread 1 +path 1 +remind 1 +pleas 1 +email 1 +todai 1 +even 1 +turn 1 +hard 1 +click 1 +printout 1 +token 1 +exampl 1 +februari 1 +onthursdai 1 +option 1 +review 1 +session 1 +held 1 +insieg 1 +bring 1 +becov 1 +convert 1 +string 1 +symbol 1 +check 1 +import 1 +found 1 +page 1 +send 1 +receiv 1 +modif 1 +deadlin 1 +announcedearli 1 +plu 1 +postscript 1 +viewer 1 +avail 1 +pictur 1 +koch 1 +snowflak 1 +detail 1 +complet 1 +projectgener 1 +descript 1 +find 1 +aboutdemonstr 1 +writeup 1 +fridai 1 +onmondai 1 +multipl 1 +choic 1 +format 1 +mark 1 +sens 1 +form 1 +pencil 1 +solut 1 +exercisestokenizerassign 1 +andpart 1 +parsertokenizerpart 1 +snowflakeassign 1 +local 1 +gener 1 +ondemonstr 1 +mileston 1 +show 1 +displai 1 +demonstr 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..3a914785 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,66 @@ +introduct 1 +artifici 1 +intelligencecs 1 +intellig 1 +spring 1 +professor 1 +alistair 1 +holden 1 +csoffic 1 +offic 1 +hour 1 +noon 1 +noonta 1 +joshua 1 +redston 1 +msoffic 1 +thompson 1 +mondai 1 +thursdai 1 +text 1 +rich 1 +knight 1 +secondedit 1 +touretzki 1 +common 1 +lisp 1 +gentl 1 +symboliccomput 1 +gener 1 +inform 1 +basic 1 +comput 1 +cours 1 +outlin 1 +turnin 1 +instruct 1 +project 1 +first 1 +program 1 +assign 1 +april 1 +solut 1 +homework 1 +second 1 +third 1 +sampl 1 +final 1 +june 1 +exam 1 +us 1 +allegro 1 +emacsinterfac 1 +postscript 1 +emac 1 +refcard 1 +interfac 1 +download 1 +standalonelisp 1 +macintosh 1 +note 1 +check 1 +grade 1 +record 1 +type 1 +gradesredston 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..6ffb0138 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,76 @@ +winter 1 +intro 1 +algorithm 1 +larri 1 +ruzzo 1 +martin 1 +tompaclass 1 +messag 1 +check 1 +email 1 +frequent 1 +last 1 +updat 1 +text 1 +book 1 +errata 1 +list 1 +handout 1 +latex 1 +syllabu 1 +midtem 1 +solut 1 +acrobat 1 +postscript 1 +homework 1 +web 1 +previou 1 +quarter 1 +karlin 1 +file 1 +format 1 +thecours 1 +materi 1 +provid 1 +three 1 +plain 1 +ascii 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +adob 1 +latest 1 +greatest 1 +free 1 +viewer 1 +page 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +tompa 1 +aberman 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..9a817668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,88 @@ +intro 1 +theori 1 +comput 1 +spring 1 +introduct 1 +larri 1 +ruzzo 1 +gener 1 +inform 1 +instructor 1 +jayram 1 +thathachar 1 +meet 1 +time 1 +offic 1 +hour 1 +tent 1 +sieg 1 +welcom 1 +home 1 +page 1 +problem 1 +document 1 +sendmail 1 +washington 1 +class 1 +mail 1 +last 1 +updat 1 +messag 1 +sent 1 +list 1 +textbook 1 +errata 1 +handout 1 +admin 1 +solut 1 +everyth 1 +latex 1 +sourc 1 +syllabu 1 +midterm 1 +final 1 +acrobat 1 +postscript 1 +file 1 +format 1 +thecours 1 +materi 1 +provid 1 +three 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +adob 1 +latest 1 +greatest 1 +free 1 +viewer 1 +ghostscript 1 +window 1 +linux 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +cours 1 +web 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..2e993f70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,48 @@ +introduct 1 +databas 1 +systemscs 1 +systemsfal 1 +quarter 1 +instructor 1 +prof 1 +linda 1 +shapiro 1 +offic 1 +sieg 1 +telephon 1 +hour 1 +patrick 1 +crowlei 1 +pcrowlei 1 +announc 1 +syllabu 1 +assign 1 +homework 1 +word 1 +shift 1 +left 1 +click 1 +link 1 +save 1 +potenti 1 +us 1 +unisql 1 +home 1 +page 1 +qbic 1 +queri 1 +imag 1 +content 1 +manag 1 +system 1 +dbm 1 +probabl 1 +interest 1 +want 1 +know 1 +window 1 +back 1 +cours 1 +webcs 1 +request 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..4938fed5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,77 @@ +home 1 +page 1 +introduct 1 +oper 1 +system 1 +autumn 1 +instructor 1 +brian 1 +bershad 1 +washington 1 +lectur 1 +offic 1 +hour 1 +sung 1 +choi 1 +section 1 +sieg 1 +appoint 1 +cours 1 +intro 1 +adminth 1 +class 1 +outlin 1 +administr 1 +info 1 +textbook 1 +grade 1 +andoth 1 +word 1 +wisdom 1 +messag 1 +mail 1 +sent 1 +archiv 1 +midterm 1 +solut 1 +scale 1 +first 1 +schedulewhat 1 +cover 1 +schedul 1 +aggress 1 +andwil 1 +updat 1 +regularli 1 +reflect 1 +actual 1 +pace 1 +note 1 +handout 1 +slide 1 +projectsdescript 1 +project 1 +relat 1 +materi 1 +solutionsto 1 +avail 1 +notesnot 1 +watchthi 1 +space 1 +carefulli 1 +inform 1 +vital 1 +surviv 1 +andgrad 1 +hint 1 +appear 1 +person 1 +receiv 1 +feedback 1 +onproject 1 +send 1 +anonym 1 +wish 1 +lost 1 +click 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..fde3d505 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,87 @@ +home 1 +page 1 +autumn 1 +quarterwelcom 1 +world 1 +wide 1 +hypermedia 1 +document 1 +contain 1 +bounti 1 +inform 1 +class 1 +keepin 1 +mind 1 +mean 1 +static 1 +informationwil 1 +ad 1 +frequent 1 +problem 1 +thisdocu 1 +send 1 +mail 1 +pighin 1 +click 1 +help 1 +classpersonnel 1 +professor 1 +student 1 +cours 1 +syllabuscours 1 +calendarta 1 +offic 1 +hourshandout 1 +assignmentslectur 1 +notesread 1 +assignmentshomework 1 +assignmentsprojectsproject 1 +handoutsproject 1 +artifactsproject 1 +sessionsproject 1 +grade 1 +policyproject 1 +write 1 +upslibui 1 +documentationoth 1 +relat 1 +informationget 1 +classhearn 1 +baker 1 +erratath 1 +instruct 1 +labus 1 +indi 1 +guid 1 +opengl 1 +exampl 1 +program 1 +spring 1 +quarter 1 +winter 1 +pagegraph 1 +linkssgi 1 +silicon 1 +surfgrafica 1 +obscurasiggraphgrailgraph 1 +site 1 +indexoth 1 +us 1 +linksmvi 1 +visitor 1 +room 1 +schedul 1 +comput 1 +scienc 1 +engin 1 +departmentth 1 +degre 1 +programth 1 +programweb 1 +helpbas 1 +helpmosa 1 +netscap 1 +lynxus 1 +indyspighin 1 +washington 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..0bbf9699 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,106 @@ +autumn 1 +intro 1 +comput 1 +commun 1 +networksautumn 1 +instructor 1 +arun 1 +somani 1 +cslectur 1 +offic 1 +sieg 1 +eebphon 1 +hour 1 +jari 1 +kristensen 1 +csoffic 1 +chang 1 +tomatch 1 +prof 1 +thu 1 +cover 1 +everi 1 +week 1 +andprovid 1 +larger 1 +timewindow 1 +consult 1 +class 1 +messag 1 +check 1 +email 1 +frequent 1 +lectur 1 +overheadshomeworksprojectsinterest 1 +stuffattentionif 1 +would 1 +like 1 +request 1 +academ 1 +accommod 1 +disabl 1 +pleasecontact 1 +student 1 +servic 1 +schmitz 1 +havea 1 +letter 1 +indic 1 +requiresacadem 1 +pleas 1 +present 1 +discuss 1 +might 1 +need 1 +file 1 +format 1 +cours 1 +materi 1 +provid 1 +three 1 +html 1 +hypertext 1 +markup 1 +languag 1 +document 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +handout 1 +origin 1 +latex 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..0be9becd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,78 @@ +spring 1 +intro 1 +comput 1 +commun 1 +networksspr 1 +instructor 1 +richard 1 +ladner 1 +cslectur 1 +sieg 1 +offic 1 +phone 1 +hour 1 +noonta 1 +william 1 +chan 1 +wchan 1 +csoffic 1 +class 1 +messag 1 +check 1 +email 1 +frequent 1 +lectur 1 +overheadshomeworksprojectsabout 1 +file 1 +format 1 +cours 1 +materi 1 +provid 1 +three 1 +html 1 +hypertext 1 +markup 1 +languag 1 +document 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +handout 1 +origin 1 +latex 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +washington 1 +eduwchan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..8af20002 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,10 @@ +home 1 +pagecs 1 +advanc 1 +digit 1 +designt 1 +kehl 1 +fall 1 +page 1 +found 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..593e6d06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,124 @@ +home 1 +pagecs 1 +advanc 1 +digit 1 +designt 1 +kehl 1 +fall 1 +welcom 1 +page 1 +cours 1 +inform 1 +time 1 +place 1 +johnson 1 +import 1 +announc 1 +last 1 +updat 1 +summari 1 +syllabusschedul 1 +polici 1 +hour 1 +staff 1 +instructor 1 +offic 1 +mark 1 +savoi 1 +tue 1 +savac 1 +richard 1 +chinn 1 +thur 1 +richin 1 +howard 1 +chang 1 +gener 1 +shchang 1 +csjason 1 +aaron 1 +scott 1 +stephen 1 +hardwar 1 +laboratori 1 +manag 1 +student 1 +work 1 +group 1 +final 1 +exam 1 +review 1 +topic 1 +cover 1 +quarterhomework 1 +assignmentsweb 1 +march 1 +homework 1 +duehomework 1 +fridai 1 +writeup 1 +februari 1 +midnight 1 +written 1 +assign 1 +project 1 +sampl 1 +abel 1 +state 1 +machin 1 +test 1 +fixtur 1 +option 1 +optionlab 1 +handout 1 +combin 1 +logic 1 +sequenti 1 +fpga 1 +memori 1 +communicationoth 1 +depart 1 +comput 1 +scienc 1 +engin 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +collect 1 +resourc 1 +care 1 +gaetano 1 +borriello 1 +list 1 +vlsi 1 +link 1 +comprehensivelist 1 +icmanufactur 1 +nation 1 +semiconductor 1 +data 1 +sheet 1 +motorola 1 +book 1 +server 1 +philip 1 +semiconduct 1 +micron 1 +technolog 1 +sheetsth 1 +copyright 1 +univers 1 +washington 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..984c5b83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,102 @@ +comput 1 +design 1 +organ 1 +gener 1 +inform 1 +meet 1 +loew 1 +instructor 1 +larri 1 +snyder 1 +offic 1 +hour 1 +appoint 1 +mail 1 +address 1 +sieg 1 +assist 1 +judi 1 +watson 1 +jwatson 1 +robert 1 +chenoffic 1 +tuesdai 1 +thursdays 1 +chensg 1 +catalog 1 +descript 1 +instruct 1 +model 1 +structur 1 +function 1 +arithmet 1 +logic 1 +unit 1 +regist 1 +transfer 1 +level 1 +hardwar 1 +microprogram 1 +control 1 +memori 1 +hierarchi 1 +andorgan 1 +system 1 +compon 1 +interconnect 1 +laboratoryproject 1 +involv 1 +simul 1 +setprocessor 1 +prerequisit 1 +class 1 +note 1 +mondai 1 +postscript 1 +read 1 +wednesdai 1 +fridai 1 +review 1 +sheet 1 +answer 1 +homework 1 +html 1 +skim 1 +appendix 1 +color 1 +chap 1 +revis 1 +midterm 1 +fast 1 +holidai 1 +materi 1 +follow 1 +file 1 +avail 1 +verilog 1 +pipelin 1 +mip 1 +addit 1 +modul 1 +common 1 +sampl 1 +program 1 +sourc 1 +form 1 +test 1 +segment 1 +binari 1 +prog 1 +data 1 +simpl 1 +assembl 1 +languag 1 +page 1 +previou 1 +quarter 1 +fall 1 +referencesthi 1 +free 1 +refer 1 +card 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..e509bc4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,232 @@ +spring 1 +home 1 +pagecs 1 +introduct 1 +artifici 1 +intellig 1 +instructor 1 +steve 1 +tanimoto 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +jeremi 1 +baermeet 1 +mondai 1 +wednesdai 1 +fridai 1 +sieg 1 +hall 1 +room 1 +offic 1 +hour 1 +tuesdai 1 +thursdai 1 +cours 1 +newsgroup 1 +creat 1 +access 1 +machin 1 +messag 1 +post 1 +send 1 +mail 1 +csor 1 +us 1 +new 1 +interfac 1 +pnew 1 +list 1 +archiv 1 +implement 1 +schedul 1 +tent 1 +topic 1 +number 1 +credit 1 +grade 1 +homework 1 +midterm 1 +exam 1 +final 1 +project 1 +class 1 +particip 1 +hardcopi 1 +read 1 +requir 1 +text 1 +element 1 +common 1 +lisp 1 +edit 1 +freeman 1 +work 1 +hard 1 +textbook 1 +self 1 +contain 1 +purchas 1 +separ 1 +book 1 +exampl 1 +program 1 +youdon 1 +mathemat 1 +theori 1 +onlin 1 +refer 1 +materi 1 +provid 1 +answer 1 +mani 1 +question 1 +andit 1 +languag 1 +standard 1 +referenceon 1 +seem 1 +best 1 +tabl 1 +contentspag 1 +rather 1 +try 1 +download 1 +entir 1 +html 1 +file 1 +orpostscript 1 +sourc 1 +code 1 +usingcommon 1 +interest 1 +link 1 +info 1 +forprogram 1 +world 1 +wide 1 +applic 1 +facil 1 +order 1 +take 1 +advantag 1 +allegro 1 +windowsimplement 1 +excel 1 +featur 1 +programdevelop 1 +construct 1 +theintel 1 +pentium 1 +laboratori 1 +free 1 +version 1 +isfor 1 +window 1 +bedownload 1 +franz 1 +kind 1 +least 1 +attract 1 +option 1 +examin 1 +givenaccord 1 +close 1 +multipl 1 +choic 1 +test 1 +bring 1 +mark 1 +sens 1 +form 1 +alist 1 +know 1 +announc 1 +march 1 +welcom 1 +coversboth 1 +implementationof 1 +techniqu 1 +includ 1 +programmingtechniqu 1 +knowledg 1 +represent 1 +search 1 +logicalreason 1 +probabilist 1 +reason 1 +case 1 +base 1 +plan 1 +learn 1 +understand 1 +vision 1 +neural 1 +net 1 +expert 1 +system 1 +clo 1 +assign 1 +page 1 +staff 1 +april 1 +term 1 +given 1 +review 1 +session 1 +held 1 +inour 1 +regular 1 +meet 1 +note 1 +continu 1 +programmingpart 1 +turn 1 +paper 1 +follow 1 +solut 1 +part 1 +exercis 1 +state 1 +screenshot 1 +user 1 +descript 1 +ofhow 1 +go 1 +gener 1 +move 1 +current 1 +statu 1 +workload 1 +reduct 1 +propos 1 +circul 1 +email 1 +approv 1 +rest 1 +lectur 1 +preliminari 1 +demo 1 +ofproject 1 +either 1 +give 1 +right 1 +orturn 1 +progress 1 +report 1 +memori 1 +holidai 1 +insieg 1 +explan 1 +peer 1 +evalu 1 +wrap 1 +demonstr 1 +evaluationof 1 +june 1 +assignmentsassign 1 +portion 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..3ba62bb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,89 @@ +home 1 +pagecs 1 +digit 1 +system 1 +designstev 1 +burn 1 +spring 1 +welcom 1 +page 1 +cours 1 +inform 1 +time 1 +place 1 +loew 1 +import 1 +announc 1 +last 1 +updat 1 +summari 1 +syllabu 1 +schedul 1 +polici 1 +hour 1 +staff 1 +steve 1 +instructor 1 +kent 1 +smith 1 +casei 1 +anderson 1 +stephen 1 +hardwar 1 +laboratori 1 +manag 1 +offic 1 +studentslab 1 +assign 1 +mchc 1 +info 1 +nice 1 +introduct 1 +fred 1 +martinrobot 1 +societi 1 +seattl 1 +robot 1 +portland 1 +area 1 +societyoth 1 +depart 1 +comput 1 +scienc 1 +engin 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +collect 1 +resourc 1 +care 1 +gaetano 1 +borriello 1 +comprehens 1 +list 1 +sourc 1 +nation 1 +semiconductor 1 +data 1 +sheet 1 +motorola 1 +book 1 +server 1 +philip 1 +semiconduct 1 +serverth 1 +copyright 1 +univers 1 +washington 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..7e08ede7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,34 @@ +home 1 +pagewelcom 1 +cours 1 +contain 1 +bevi 1 +inform 1 +relatingto 1 +usual 1 +document 1 +frequentlychang 1 +send 1 +mail 1 +bswest 1 +csif 1 +encount 1 +problem 1 +classpersonnelsyllabuslectur 1 +scheduleguest 1 +lectur 1 +scheduleoffic 1 +hoursproject 1 +project 1 +handout 1 +schedul 1 +help 1 +session 1 +final 1 +projectoth 1 +bug 1 +erratarefer 1 +pagesmidterm 1 +questionnairebswest 1 +washington 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..263955eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,94 @@ +home 1 +pagecs 1 +implement 1 +program 1 +languageswint 1 +quarter 1 +import 1 +cours 1 +informationmeet 1 +time 1 +instructor 1 +craigchamb 1 +chamber 1 +offic 1 +hour 1 +start 1 +second 1 +week 1 +sieg 1 +vass 1 +litvinov 1 +cubicl 1 +floor 1 +archivesslid 1 +lectur 1 +avail 1 +handout 1 +read 1 +assign 1 +full 1 +list 1 +class 1 +homework 1 +messag 1 +sent 1 +mail 1 +archivedher 1 +last 1 +year 1 +midterm 1 +exam 1 +answer 1 +final 1 +note 1 +test 1 +closedbook 1 +affect 1 +kind 1 +question 1 +wereask 1 +sampl 1 +solut 1 +cecil 1 +vortex 1 +informationhandout 1 +tutorialsth 1 +languag 1 +tutorialth 1 +compil 1 +tutorialhow 1 +front 1 +enda 1 +file 1 +interestdead 1 +elim 1 +simpl 1 +exampl 1 +idfacfg 1 +travers 1 +interfac 1 +frameworkvortex 1 +textual 1 +descript 1 +grammarcecil 1 +refer 1 +documentationdocument 1 +html 1 +postscript 1 +format 1 +manual 1 +version 1 +standard 1 +librari 1 +resourcesth 1 +previou 1 +page 1 +includ 1 +slide 1 +inform 1 +research 1 +found 1 +onmark 1 +leon 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..c1911e20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,62 @@ +home 1 +pagecs 1 +implement 1 +program 1 +languagesimport 1 +cours 1 +informationmeet 1 +time 1 +sieg 1 +instructor 1 +craig 1 +chamber 1 +offic 1 +hour 1 +jeff 1 +dean 1 +jdean 1 +dave 1 +grove 1 +come 1 +find 1 +chateau 1 +confer 1 +room 1 +archivesslid 1 +lectur 1 +avail 1 +slide 1 +cecil 1 +languag 1 +turori 1 +handout 1 +read 1 +assign 1 +full 1 +list 1 +class 1 +messag 1 +sent 1 +mail 1 +archiv 1 +projectth 1 +project 1 +sort 1 +analysi 1 +andtransform 1 +vortex 1 +compil 1 +optimizingcompil 1 +object 1 +orient 1 +written 1 +inform 1 +found 1 +cecilproject 1 +page 1 +manual 1 +resourcesmor 1 +research 1 +onmark 1 +leon 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..0e2fd7b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,13 @@ +home 1 +pagecs 1 +softwar 1 +engineeringdavid 1 +notkin 1 +spring 1 +introductori 1 +handout 1 +kwic 1 +assign 1 +sampl 1 +projectsnotkin 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..4e049ec2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,86 @@ +home 1 +pagecs 1 +concept 1 +program 1 +languagesautumn 1 +loew 1 +instructor 1 +david 1 +notkin 1 +offic 1 +hour 1 +sieg 1 +byappoint 1 +kurt 1 +partridg 1 +kepart 1 +floor 1 +cubiclescours 1 +handout 1 +introductori 1 +html 1 +postscript 1 +assign 1 +readingsmail 1 +list 1 +thread 1 +archivesw 1 +us 1 +mail 1 +administr 1 +instructionalpurpos 1 +wish 1 +refer 1 +previous 1 +sent 1 +messag 1 +archiv 1 +send 1 +washington 1 +subscrib 1 +emailto 1 +majordomo 1 +singl 1 +line 1 +bodi 1 +subject 1 +csegener 1 +languag 1 +research 1 +resourc 1 +yahoo 1 +page 1 +mark 1 +leon 1 +excel 1 +pagesprogram 1 +critiquesgari 1 +leaven 1 +self 1 +studi 1 +pagefunct 1 +resourcesmit 1 +scheme 1 +pagecmu 1 +standard 1 +pagea 1 +gentl 1 +introduct 1 +mlhaskel 1 +lambda 1 +calculu 1 +univers 1 +monash 1 +universityobject 1 +orient 1 +geneva 1 +object 1 +info 1 +cecil 1 +project 1 +dylan 1 +carnegi 1 +mellon 1 +appl 1 +comput 1 +question 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..5a7e5b9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,118 @@ +design 1 +analysi 1 +algorithmscs 1 +algorithm 1 +winter 1 +instructor 1 +richard 1 +anderson 1 +washington 1 +lectur 1 +seig 1 +offic 1 +hour 1 +mondai 1 +time 1 +appoint 1 +teach 1 +assist 1 +william 1 +chan 1 +wchan 1 +wednesdai 1 +chateau 1 +confer 1 +room 1 +sieg 1 +floor 1 +cubicl 1 +somebodi 1 +els 1 +us 1 +cours 1 +inform 1 +prerequisit 1 +go 1 +assum 1 +alreadi 1 +undergradu 1 +wrong 1 +know 1 +soon 1 +possibl 1 +suggest 1 +readingtextbook 1 +errata 1 +list 1 +project 1 +realli 1 +preview 1 +check 1 +outer 1 +sapplet 1 +assign 1 +handout 1 +written 1 +homework 1 +set 1 +gener 1 +tuesdai 1 +class 1 +background 1 +quiz 1 +post 1 +script 1 +solut 1 +midterm 1 +exam 1 +cancel 1 +lack 1 +interest 1 +final 1 +told 1 +march 1 +probabl 1 +verifi 1 +close 1 +book 1 +cover 1 +materi 1 +willconsist 1 +short 1 +answer 1 +problem 1 +solv 1 +question 1 +bureaucrat 1 +stuffgrad 1 +base 1 +upon 1 +particip 1 +work 1 +togeth 1 +okai 1 +discuss 1 +homeworkproblem 1 +classmat 1 +must 1 +write 1 +upindepend 1 +gilligan 1 +island 1 +rule 1 +could 1 +invok 1 +betweenani 1 +mustwatch 1 +least 1 +half 1 +theori 1 +thatan 1 +episod 1 +equival 1 +reboot 1 +anyth 1 +thatsurv 1 +learn 1 +understood 1 +eduwchan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..3635fd29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,218 @@ +parallel 1 +algorithmscs 1 +algorithm 1 +spring 1 +gener 1 +inform 1 +meet 1 +sieg 1 +instructor 1 +richard 1 +anderson 1 +offic 1 +hour 1 +appointment 1 +mail 1 +address 1 +homework 1 +exam 1 +catalog 1 +descript 1 +design 1 +analysi 1 +fundament 1 +algorithmsfor 1 +sort 1 +arithmet 1 +matrix 1 +graph 1 +problem 1 +addit 1 +select 1 +topic 1 +emphasi 1 +techniqu 1 +approach 1 +us 1 +developingfast 1 +effici 1 +limit 1 +theirefficaci 1 +prerequisit 1 +equival 1 +major 1 +assign 1 +note 1 +syllabu 1 +thursdai 1 +april 1 +plu 1 +rambl 1 +commentsabout 1 +cours 1 +lectur 1 +transpar 1 +code 1 +analysisfor 1 +list 1 +rank 1 +connect 1 +compon 1 +algorithmi 1 +simpler 1 +correct 1 +section 1 +latex 1 +version 1 +pointer 1 +paper 1 +referencesfor 1 +erew 1 +crew 1 +ullman 1 +yannakaki 1 +tuesdai 1 +union 1 +find 1 +certifi 1 +write 1 +impli 1 +exist 1 +consensu 1 +base 1 +upon 1 +swap 1 +although 1 +likelysometh 1 +go 1 +insid 1 +next 1 +supercomput 1 +asynchron 1 +refer 1 +martel 1 +foc 1 +buss 1 +manuscript 1 +memori 1 +model 1 +real 1 +special 1 +content 1 +whim 1 +titl 1 +year 1 +would 1 +theori 1 +share 1 +comput 1 +mayb 1 +smpc 1 +start 1 +collect 1 +basic 1 +spend 1 +time 1 +give 1 +could 1 +cover 1 +term 1 +indic 1 +lookingat 1 +pertain 1 +specif 1 +interconnect 1 +topolog 1 +wewil 1 +consid 1 +situat 1 +cost 1 +access 1 +isnon 1 +uniform 1 +sens 1 +notconsid 1 +particular 1 +machin 1 +prove 1 +theorem 1 +andyou 1 +expect 1 +howev 1 +motiv 1 +practic 1 +consider 1 +goal 1 +indevelop 1 +come 1 +algorithmswhich 1 +conceiv 1 +three 1 +four 1 +set 1 +contain 1 +routin 1 +challeng 1 +goingto 1 +requir 1 +project 1 +happi 1 +student 1 +outsidework 1 +relat 1 +text 1 +introduct 1 +parallelalgorithm 1 +nice 1 +book 1 +befollow 1 +close 1 +feel 1 +exception 1 +cheap 1 +youcould 1 +probabl 1 +without 1 +purchas 1 +copi 1 +origin 1 +plan 1 +volunt 1 +teach 1 +textwould 1 +progress 1 +fast 1 +volum 1 +artof 1 +program 1 +chose 1 +instead 1 +quit 1 +flexibl 1 +taught 1 +mychoic 1 +influenc 1 +interestingor 1 +uninterest 1 +also 1 +choic 1 +aseith 1 +tradit 1 +work 1 +researchcont 1 +number 1 +open 1 +mind 1 +turninto 1 +research 1 +result 1 +present 1 +half 1 +bake 1 +ideason 1 +provid 1 +other 1 +interest 1 +andenergi 1 +think 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..353971f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,31 @@ +home 1 +page 1 +automata 1 +comput 1 +complex 1 +move 1 +current 1 +quarter 1 +autumn 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accuratelyquot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +comment 1 +webmast 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..b1a7037c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,19 @@ +home 1 +page 1 +fall 1 +automataautumn 1 +instructor 1 +paul 1 +beam 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +exam 1 +quiz 1 +postscript 1 +final 1 +latex 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..54ce0c26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,105 @@ +fall 1 +automata 1 +comput 1 +complex 1 +larri 1 +ruzzo 1 +tuth 1 +sieg 1 +staffnameemailphoneoffic 1 +hour 1 +instructor 1 +nitin 1 +sharma 1 +csmw 1 +class 1 +mail 1 +last 1 +updat 1 +messag 1 +sent 1 +list 1 +washington 1 +textbook 1 +errata 1 +handout 1 +administrivia 1 +homework 1 +midterm 1 +latex 1 +sourc 1 +cours 1 +organ 1 +syllabu 1 +collabor 1 +acrobat 1 +postscript 1 +file 1 +format 1 +thecours 1 +materi 1 +provid 1 +three 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +adob 1 +latest 1 +greatest 1 +free 1 +viewer 1 +avail 1 +depart 1 +unix 1 +system 1 +acroread 1 +perhap 1 +aavail 1 +page 1 +ghostview 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +web 1 +autumn 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +accuratelyquot 1 +duli 1 +credit 1 +copyright 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +comment 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..65d1cebc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,94 @@ +topic 1 +complex 1 +autumn 1 +proposit 1 +theorem 1 +prove 1 +satisfi 1 +test 1 +proof 1 +gener 1 +inform 1 +instructor 1 +paul 1 +beam 1 +meet 1 +time 1 +tuesdai 1 +thursdai 1 +loew 1 +autom 1 +comput 1 +aid 1 +verif 1 +vlsi 1 +andsoftwar 1 +engin 1 +give 1 +algorithm 1 +attempt 1 +decid 1 +truthof 1 +logic 1 +statement 1 +first 1 +higher 1 +order 1 +cours 1 +concentr 1 +issu 1 +casea 1 +well 1 +flip 1 +side 1 +even 1 +us 1 +oftheorem 1 +often 1 +involv 1 +finitedomain 1 +interpret 1 +anywai 1 +consid 1 +varieti 1 +system 1 +theoremprov 1 +within 1 +good 1 +choic 1 +search 1 +strategi 1 +consider 1 +theoret 1 +practic 1 +work 1 +thesequest 1 +complexityand 1 +rel 1 +also 1 +examin 1 +anumb 1 +implement 1 +compar 1 +theoryand 1 +paper 1 +thing 1 +urquhart 1 +survei 1 +talk 1 +slide 1 +instal 1 +softwar 1 +amus 1 +prover 1 +sato 1 +andboy 1 +moor 1 +tester 1 +gsat 1 +june 1 +thedirectori 1 +proversther 1 +scatter 1 +process 1 +ofinstal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..0206dd6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,38 @@ +home 1 +pagecs 1 +comput 1 +systemperform 1 +modelingspr 1 +host 1 +lazowskaandmaryvernonwelcom 1 +page 1 +system 1 +performancemodel 1 +meet 1 +mondai 1 +wednesdai 1 +fridai 1 +loew 1 +hall 1 +offic 1 +hourstent 1 +topic 1 +schedulecom 1 +goingsassignmentsproject 1 +informationmap 1 +queue 1 +network 1 +solut 1 +packag 1 +emailoth 1 +inform 1 +avail 1 +sigmetr 1 +confer 1 +measur 1 +model 1 +computersystemsuw 1 +depart 1 +scienc 1 +engineeringlazowska 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..e9f24ae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,76 @@ +home 1 +page 1 +comput 1 +system 1 +architecturewint 1 +instructorsusan 1 +egger 1 +washington 1 +sieg 1 +offic 1 +hour 1 +tuth 1 +tajoshua 1 +redston 1 +cours 1 +inform 1 +overview 1 +postscript 1 +schedul 1 +continu 1 +updat 1 +lectur 1 +note 1 +problem 1 +set 1 +previou 1 +test 1 +architectur 1 +histori 1 +specmark 1 +rate 1 +tool 1 +shade 1 +instuct 1 +simul 1 +sparc 1 +atom 1 +build 1 +analysi 1 +alpha 1 +tullsen 1 +execut 1 +driven 1 +instruct 1 +level 1 +superscalar 1 +close 1 +etch 1 +binari 1 +rewrit 1 +analyz 1 +pentium 1 +code 1 +hardwar 1 +monitor 1 +multiflow 1 +compil 1 +pixi 1 +user 1 +manual 1 +dinero 1 +uniprocessor 1 +cach 1 +local 1 +machin 1 +powerpc 1 +applic 1 +multiprocessor 1 +spec 1 +benchmark 1 +neat 1 +info 1 +center 1 +current 1 +futur 1 +processor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..1ee1e970 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,50 @@ +home 1 +pagecs 1 +oper 1 +system 1 +instructor 1 +hank 1 +levi 1 +spring 1 +freder 1 +pighin 1 +meet 1 +time 1 +offic 1 +hour 1 +chateau 1 +confer 1 +room 1 +number 1 +unit 1 +welcom 1 +page 1 +world 1 +wide 1 +short 1 +hypermedia 1 +document 1 +forcs 1 +contain 1 +inform 1 +class 1 +keep 1 +mind 1 +thisdocu 1 +static 1 +especi 1 +classmessag 1 +ad 1 +frequent 1 +problem 1 +send 1 +mail 1 +announc 1 +april 1 +first 1 +assign 1 +readi 1 +iti 1 +cours 1 +projectlevi 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..ea629662 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,78 @@ +home 1 +pagecs 1 +comput 1 +graphicsautumn 1 +quarter 1 +welcom 1 +page 1 +world 1 +wide 1 +hypermedia 1 +document 1 +whichcontain 1 +wealth 1 +inform 1 +class 1 +keep 1 +mind 1 +thatthi 1 +static 1 +addedfrequ 1 +problem 1 +send 1 +mail 1 +deros 1 +click 1 +help 1 +avail 1 +professor 1 +cours 1 +syllabu 1 +lectur 1 +note 1 +written 1 +homework 1 +assign 1 +solut 1 +last 1 +year 1 +project 1 +handout 1 +grade 1 +polici 1 +test 1 +cool 1 +imag 1 +addit 1 +get 1 +instruct 1 +us 1 +indi 1 +mvi 1 +visitor 1 +room 1 +schedul 1 +scienc 1 +engin 1 +depart 1 +degre 1 +program 1 +offer 1 +colleg 1 +art 1 +mosaic 1 +follow 1 +topic 1 +basic 1 +hypertext 1 +markup 1 +languag 1 +html 1 +uniform 1 +resourc 1 +locat 1 +read 1 +usinglynx 1 +charact 1 +base 1 +browser 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..75ae21ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,110 @@ +home 1 +page 1 +principl 1 +digit 1 +system 1 +design 1 +carl 1 +ebel 1 +fall 1 +welcom 1 +cours 1 +inform 1 +time 1 +place 1 +loew 1 +import 1 +announc 1 +summari 1 +syllabu 1 +text 1 +book 1 +staff 1 +instructor 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +sieg 1 +paul 1 +franklin 1 +mondai 1 +thursdai 1 +hine 1 +hineskj 1 +tuesdai 1 +larri 1 +mcmurchi 1 +research 1 +tool 1 +guru 1 +document 1 +simul 1 +synthesi 1 +pamett 1 +board 1 +mostli 1 +complet 1 +still 1 +construct 1 +student 1 +work 1 +groupsfin 1 +exam 1 +review 1 +topic 1 +cover 1 +quarter 1 +homework 1 +assign 1 +note 1 +begin 1 +class 1 +hand 1 +handout 1 +combin 1 +logic 1 +sequenti 1 +fpga 1 +memori 1 +commun 1 +depart 1 +comput 1 +scienc 1 +engin 1 +mother 1 +site 1 +list 1 +vlsi 1 +link 1 +comprehensivelist 1 +icmanufactur 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +nation 1 +semiconductor 1 +data 1 +sheet 1 +motorola 1 +server 1 +philip 1 +semiconduct 1 +micron 1 +technolog 1 +copyright 1 +univers 1 +washington 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..190ad5ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,50 @@ +index 1 +pagecs 1 +artifici 1 +intelligencefal 1 +quarter 1 +intellig 1 +pose 1 +fundament 1 +andchalleng 1 +question 1 +comput 1 +scienc 1 +build 1 +intelligentmachin 1 +cours 1 +address 1 +provid 1 +anin 1 +depth 1 +introduct 1 +select 1 +topic 1 +includ 1 +agentarchitectur 1 +knowledg 1 +represent 1 +search 1 +plan 1 +machinelearn 1 +reason 1 +uncertainti 1 +methodolog 1 +staff 1 +weldweld 1 +sieg 1 +hour 1 +marc 1 +friedmanfriedman 1 +nick 1 +kushmericknick 1 +outlin 1 +topicsread 1 +assignmentsassign 1 +examsgradingresourcesth 1 +class 1 +mailinglist 1 +also 1 +archiv 1 +past 1 +messag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..e3d5a314 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,102 @@ +uncertainti 1 +decis 1 +make 1 +artifici 1 +intellig 1 +winter 1 +professor 1 +steve 1 +hank 1 +offic 1 +sieg 1 +hour 1 +whenev 1 +around 1 +appoint 1 +email 1 +address 1 +mail 1 +washington 1 +goe 1 +class 1 +member 1 +send 1 +request 1 +list 1 +read 1 +materi 1 +pearl 1 +probabilist 1 +reason 1 +systemsthi 1 +requir 1 +text 1 +sever 1 +chapter 1 +probabl 1 +without 1 +bui 1 +strappedfor 1 +cash 1 +though 1 +nice 1 +refer 1 +book 1 +shafer 1 +uncertain 1 +reasoningthi 1 +collect 1 +foundat 1 +paper 1 +select 1 +copi 1 +avail 1 +grail 1 +librari 1 +jayn 1 +theori 1 +logic 1 +scienc 1 +fragmentari 1 +edit 1 +juli 1 +extrem 1 +interest 1 +technic 1 +histor 1 +look 1 +foundationsof 1 +statist 1 +definit 1 +worth 1 +perspect 1 +alon 1 +math 1 +heavi 1 +go 1 +place 1 +beautifulli 1 +written 1 +neapolitan 1 +expert 1 +system 1 +algorithmsa 1 +signific 1 +overlap 1 +good 1 +secondari 1 +sourc 1 +inform 1 +graphic 1 +model 1 +propagationalgorithm 1 +research 1 +arrang 1 +cours 1 +summari 1 +topic 1 +cover 1 +html 1 +postscript 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..1c12ef01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,271 @@ +home 1 +pagecs 1 +imag 1 +understandingwelcom 1 +page 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +theclass 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +especi 1 +class 1 +messag 1 +ad 1 +frequent 1 +problem 1 +send 1 +mail 1 +mock 1 +copyright 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +washington 1 +assign 1 +first 1 +read 1 +chapter 1 +note 1 +doexercis 1 +second 1 +wednesdai 1 +april 1 +exercis 1 +next 1 +determin 1 +conveni 1 +torun 1 +khoro 1 +cantata 1 +run 1 +sun 1 +aslillith 1 +edit 1 +local 1 +workstat 1 +login 1 +file 1 +containxhost 1 +lilliththen 1 +cshrc 1 +follow 1 +setenv 1 +khoros_hom 1 +manpath 1 +path 1 +rlogin 1 +onto 1 +lillith 1 +rhost 1 +assignmentsand 1 +displai 1 +environ 1 +variabl 1 +appropri 1 +typecantata 1 +unix 1 +prompt 1 +machin 1 +georg 1 +haskhoro 1 +also 1 +wwwhttp 1 +educ 1 +index 1 +htmland 1 +link 1 +tutori 1 +itscours 1 +outlin 1 +experi 1 +least 1 +twotop 1 +spatial 1 +resolut 1 +ideal 1 +take 1 +pagesand 1 +anoth 1 +window 1 +noth 1 +turn 1 +part 1 +third 1 +articl 1 +huerta 1 +andnevatia 1 +cvpr 1 +proceed 1 +tolook 1 +wolff 1 +fourth 1 +mondai 1 +make 1 +comparison 1 +three 1 +process 1 +softwar 1 +msvc 1 +fast 1 +oper 1 +level 1 +learn 1 +effort 1 +requir 1 +announc 1 +final 1 +examin 1 +june 1 +pmin 1 +regular 1 +meet 1 +room 1 +exam 1 +cover 1 +combinationof 1 +midterm 1 +post 1 +list 1 +topic 1 +studi 1 +remind 1 +approv 1 +sundai 1 +time 1 +review 1 +plan 1 +insieg 1 +select 1 +period 1 +avail 1 +fridai 1 +copi 1 +overhead 1 +transpar 1 +lectur 1 +onneur 1 +net 1 +engin 1 +librari 1 +center 1 +floor 1 +packet 1 +number 1 +trainabl 1 +classifi 1 +student 1 +permit 1 +temporari 1 +ofmatlab 1 +requirethat 1 +fill 1 +form 1 +sign 1 +contract 1 +know 1 +interest 1 +term 1 +project 1 +import 1 +ofth 1 +start 1 +week 1 +correct 1 +introduc 1 +pentium 1 +laboratori 1 +includingth 1 +develop 1 +evan 1 +mclain 1 +documentexplain 1 +transform 1 +applic 1 +current 1 +statu 1 +recent 1 +get 1 +withkhoro 1 +accompani 1 +pleas 1 +march 1 +alreadi 1 +comput 1 +account 1 +accesskhoro 1 +contact 1 +rene 1 +reed 1 +onthursdai 1 +arrang 1 +pick 1 +youraccount 1 +name 1 +password 1 +itov 1 +weekend 1 +earli 1 +hour 1 +arelimit 1 +ahead 1 +email 1 +address 1 +andsh 1 +sieg 1 +back 1 +offic 1 +kept 1 +lock 1 +either 1 +need 1 +knock 1 +orhav 1 +prior 1 +mani 1 +card 1 +willhav 1 +care 1 +slide 1 +resourc 1 +understand 1 +onlin 1 +intro 1 +delft 1 +univ 1 +pattern 1 +recognit 1 +vision 1 +store 1 +thedepart 1 +scienc 1 +version 1 +undergradu 1 +brochur 1 +brochuremosa 1 +help 1 +mosaic 1 +find 1 +itemsund 1 +balloon 1 +menu 1 +macmosa 1 +itemund 1 +navig 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..178d6c14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,174 @@ +home 1 +pagecs 1 +parallel 1 +comput 1 +imag 1 +processingwelcom 1 +page 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +theclass 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +ad 1 +time 1 +schedul 1 +informationon 1 +octob 1 +novemb 1 +class 1 +begin 1 +half 1 +hourearli 1 +normal 1 +start 1 +dai 1 +meet 1 +guest 1 +speaker 1 +prof 1 +nian 1 +simon 1 +fraser 1 +univers 1 +burnabi 1 +canada 1 +tuesdai 1 +bharath 1 +modayur 1 +titl 1 +present 1 +effici 1 +object 1 +recognit 1 +simd 1 +mimd 1 +machin 1 +topic 1 +complet 1 +discuss 1 +pyramid 1 +algorithm 1 +scale 1 +invariantoper 1 +segment 1 +hierarchicalrelax 1 +us 1 +isodata 1 +approach 1 +burt 1 +hong 1 +rosenfeld 1 +introduct 1 +embed 1 +virtual 1 +process 1 +overview 1 +neural 1 +network 1 +architectur 1 +mesh 1 +brief 1 +treatment 1 +icon 1 +symbol 1 +thursdai 1 +analysi 1 +digit 1 +librari 1 +demo 1 +find 1 +term 1 +project 1 +topicsdur 1 +week 1 +student 1 +activelyexplor 1 +written 1 +descript 1 +hand 1 +inon 1 +templat 1 +writeupsi 1 +avail 1 +resourcespvm 1 +virtualmachin 1 +softwar 1 +layear 1 +permit 1 +user 1 +program 1 +aviru 1 +made 1 +heterogen 1 +collect 1 +moreworkst 1 +conveni 1 +implement 1 +studydistribut 1 +intel 1 +technicalpubl 1 +includ 1 +paragonparallel 1 +system 1 +languag 1 +good 1 +arrai 1 +orient 1 +paragon 1 +variousvendor 1 +supercomput 1 +info 1 +onth 1 +maspar 1 +nation 1 +center 1 +sweden 1 +onlin 1 +theunivers 1 +tennesse 1 +resourc 1 +found 1 +neal 1 +friedman 1 +report 1 +also 1 +error 1 +correctionsto 1 +cours 1 +note 1 +copyright 1 +notic 1 +materi 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +washington 1 +import 1 +part 1 +ofth 1 +review 1 +session 1 +final 1 +fridai 1 +decemb 1 +sieg 1 +hall 1 +exam 1 +wednesdai 1 +room 1 +close 1 +book 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..bb91b1e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,65 @@ +graphic 1 +seminarc 1 +rspring 1 +numer 1 +method 1 +april 1 +matrix 1 +comput 1 +intro 1 +definit 1 +properti 1 +invers 1 +brad 1 +solv 1 +linear 1 +system 1 +eric 1 +eigenvalu 1 +eigenvector 1 +singular 1 +valu 1 +decomposit 1 +joel 1 +root 1 +find 1 +nonlinear 1 +equat 1 +corei 1 +shuichi 1 +optim 1 +unconstrain 1 +kari 1 +constrain 1 +global 1 +kevin 1 +quadrat 1 +program 1 +chuck 1 +ronen 1 +exampl 1 +daniel 1 +data 1 +fit 1 +conclus 1 +mike 1 +regress 1 +calibr 1 +ordinari 1 +differenti 1 +paper 1 +adam 1 +joanna 1 +discret 1 +finit 1 +element 1 +radios 1 +fred 1 +pde 1 +differ 1 +interv 1 +arithmet 1 +troi 1 +jonathan 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..dfd5f4cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,45 @@ +quarterscs 1 +special 1 +topicssteven 1 +tanimoto 1 +instructorcs 1 +autumn 1 +transcript 1 +base 1 +educ 1 +winter 1 +mathemat 1 +experi 1 +imag 1 +process 1 +spring 1 +technolog 1 +collabor 1 +learn 1 +copyright 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +washington 1 +graduat 1 +seminar 1 +explor 1 +varieti 1 +topic 1 +relat 1 +useof 1 +comput 1 +specif 1 +activ 1 +varyfrom 1 +quarter 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..18d4b725 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,160 @@ +home 1 +page 1 +autumn 1 +transcript 1 +base 1 +educ 1 +wwwwelcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +theclass 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +ad 1 +time 1 +copyright 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +washington 1 +read 1 +octob 1 +mccalla 1 +central 1 +importanceof 1 +student 1 +model 1 +intellig 1 +tutor 1 +present 1 +sandi 1 +youngquist 1 +meet 1 +discuss 1 +paul 1 +barton 1 +davi 1 +aboutinternet 1 +servic 1 +labord 1 +problem 1 +solv 1 +geometri 1 +microworld 1 +tointellig 1 +comput 1 +environ 1 +tessa 1 +novemb 1 +bartel 1 +promot 1 +mathematicsconnect 1 +concept 1 +map 1 +plu 1 +gari 1 +anderson 1 +onlin 1 +first 1 +paper 1 +combin 1 +degre 1 +vision 1 +littl 1 +technolog 1 +noth 1 +particularli 1 +ambiti 1 +descript 1 +state 1 +second 1 +technic 1 +piec 1 +thethem 1 +learner 1 +take 1 +respons 1 +someth 1 +increasingli 1 +import 1 +futur 1 +choic 1 +third 1 +moresophist 1 +either 1 +beyond 1 +brows 1 +elabor 1 +possibl 1 +group 1 +annot 1 +ofwww 1 +toolkit 1 +describ 1 +intechn 1 +term 1 +layer 1 +internet 1 +infrastructur 1 +couldmak 1 +smart 1 +distribut 1 +tutori 1 +applicationsthat 1 +mosaic 1 +netscap 1 +achiev 1 +pleas 1 +option 1 +advanc 1 +us 1 +webhttp 1 +proceed 1 +html 1 +presentor 1 +jeremi 1 +baer 1 +empow 1 +agehttp 1 +ncsa 1 +uiuc 1 +ward 1 +marla 1 +baker 1 +share 1 +comment 1 +soap 1 +trail 1 +line 1 +communitieshttp 1 +john 1 +dietz 1 +enhanc 1 +protocol 1 +lower 1 +serviceshttp 1 +dcewebkit 1 +adam 1 +carlson 1 +hong 1 +zhumeet 1 +michael 1 +aboutcurriculum 1 +navig 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..6ba1c449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,78 @@ +home 1 +page 1 +autumn 1 +technolog 1 +collabor 1 +learningwelcom 1 +copyright 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +washington 1 +gener 1 +descript 1 +comput 1 +internet 1 +methodologiesfor 1 +teach 1 +learn 1 +current 1 +come 1 +togeth 1 +innew 1 +wai 1 +seminar 1 +explor 1 +read 1 +number 1 +paper 1 +forcollabor 1 +particip 1 +student 1 +willtak 1 +respons 1 +make 1 +present 1 +group 1 +ofthes 1 +cover 1 +subset 1 +also 1 +possibl 1 +applic 1 +ofai 1 +visual 1 +techniqu 1 +analysi 1 +evid 1 +ofstud 1 +onlin 1 +context 1 +meet 1 +schedul 1 +tuesdai 1 +howev 1 +decid 1 +move 1 +time 1 +better 1 +intopeopl 1 +visit 1 +meani 1 +middl 1 +school 1 +schoolmai 1 +depend 1 +interest 1 +participatingstud 1 +last 1 +updat 1 +septemb 1 +tanimoto 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..591c5682 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,191 @@ +mvmv 1 +global 1 +resourc 1 +manag 1 +distribut 1 +systemsprofessor 1 +mari 1 +vernontim 1 +pmlocat 1 +processor 1 +alloc 1 +gang 1 +schedul 1 +now 1 +ousterhout 1 +techniqu 1 +concurr 1 +system 1 +inrd 1 +conf 1 +page 1 +arpaci 1 +dusseau 1 +vahdat 1 +anderson 1 +patterson 1 +interact 1 +parallel 1 +andsequenti 1 +workload 1 +network 1 +workstat 1 +proc 1 +ofth 1 +sigmetr 1 +dynam 1 +equi 1 +partit 1 +nguyen 1 +tucker 1 +gupta 1 +process 1 +control 1 +issuesfor 1 +multiprogram 1 +share 1 +memori 1 +multiprocessor 1 +symp 1 +oper 1 +principl 1 +vaswani 1 +zahorjan 1 +us 1 +runtim 1 +measur 1 +workloadcharacterist 1 +univ 1 +ofwashington 1 +technic 1 +report 1 +tutori 1 +applic 1 +shun 1 +leung 1 +evangelo 1 +markato 1 +thoma 1 +leblanc 1 +affin 1 +loopschedul 1 +supercomput 1 +expand 1 +version 1 +iniee 1 +tran 1 +han 1 +zima 1 +barbara 1 +chapman 1 +compil 1 +ieee 1 +edjlali 1 +agraw 1 +sussman 1 +saltz 1 +data 1 +parallelprogram 1 +adapt 1 +environ 1 +santa 1 +april 1 +polici 1 +comparisonsshikharesh 1 +majumdar 1 +derek 1 +eager 1 +richard 1 +bunt 1 +confer 1 +model 1 +ofcomput 1 +eric 1 +parson 1 +kenneth 1 +sevcik 1 +high 1 +variabilityservic 1 +time 1 +ipp 1 +workshop 1 +strategi 1 +dror 1 +feitelson 1 +bill 1 +nitzberg 1 +characterist 1 +product 1 +scientif 1 +thenasa 1 +am 1 +ipsc 1 +follow 1 +also 1 +cover 1 +requir 1 +read 1 +leutenegg 1 +vernon 1 +perform 1 +mccann 1 +memorymultiprocessor 1 +transact 1 +comput 1 +demand 1 +base 1 +patrick 1 +sobalvarro 1 +william 1 +weihl 1 +coschedul 1 +ofparallel 1 +job 1 +impact 1 +migrat 1 +burger 1 +hyder 1 +miller 1 +wood 1 +tradeoff 1 +rohit 1 +chandra 1 +scott 1 +devin 1 +verghes 1 +anoop 1 +mendel 1 +rosenblum 1 +multiprocessorcomput 1 +server 1 +architectur 1 +support 1 +programminglanguag 1 +asplo 1 +jose 1 +coordin 1 +alverson 1 +kahan 1 +korri 1 +smith 1 +tera 1 +octob 1 +discuss 1 +open 1 +problem 1 +culler 1 +effectivedistribut 1 +computersystem 1 +philadelphia 1 +june 1 +appear 1 +rudolph 1 +identif 1 +activ 1 +work 1 +set 1 +program 1 +theoret 1 +result 1 +karlin 1 +paper 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..d1f4dbf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,67 @@ +winter 1 +algorithm 1 +molecular 1 +biologi 1 +richard 1 +karp 1 +larri 1 +ruzzo 1 +martin 1 +tompaclass 1 +bboard 1 +last 1 +updat 1 +handout 1 +administr 1 +lectur 1 +note 1 +draft 1 +homework 1 +html 1 +syllabu 1 +schedul 1 +acrobat 1 +titl 1 +postscript 1 +slide 1 +file 1 +format 1 +cours 1 +materi 1 +provid 1 +sever 1 +usual 1 +load 1 +fast 1 +readabl 1 +mani 1 +part 1 +gener 1 +automat 1 +translat 1 +latex 1 +faith 1 +origin 1 +adob 1 +latest 1 +greatest 1 +free 1 +viewer 1 +page 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +legibl 1 +print 1 +ghostscriptcan 1 +exampl 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..f4ac05ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,172 @@ +architectur 1 +lunchcs 1 +lunchcours 1 +organ 1 +jean 1 +loupbaermeet 1 +time 1 +tuesdai 1 +cseg 1 +lunch 1 +continu 1 +quarter 1 +withalmost 1 +format 1 +previou 1 +year 1 +select 1 +paper 1 +discussedat 1 +begin 1 +distribut 1 +week 1 +ofth 1 +tobe 1 +read 1 +discuss 1 +might 1 +formal 1 +present 1 +work 1 +progress 1 +byesteem 1 +member 1 +mostlyw 1 +hopefulli 1 +heat 1 +discussionson 1 +literatur 1 +differ 1 +quartersi 1 +start 1 +posit 1 +fromparticip 1 +recent 1 +workshop 1 +oncrit 1 +issu 1 +comput 1 +research 1 +copi 1 +hereread 1 +lead 1 +morethem 1 +mani 1 +thank 1 +ruth 1 +anderson 1 +molli 1 +brown 1 +gershoni 1 +matthai 1 +philipos 1 +tabular 1 +summari 1 +guru 1 +found 1 +herefor 1 +usual 1 +thestud 1 +either 1 +informallyor 1 +slide 1 +credit 1 +cours 1 +variabl 1 +ifyou 1 +first 1 +meet 1 +octob 1 +valu 1 +local 1 +load 1 +predict 1 +lipasti 1 +wilkerson 1 +shen 1 +asplo 1 +line 1 +follow 1 +link 1 +advanceprogrami 1 +short 1 +bibliographi 1 +processor 1 +memori 1 +appreci 1 +volunt 1 +thesaulsburi 1 +burger 1 +machin 1 +readashlei 1 +saulsburi 1 +fong 1 +pong 1 +andrea 1 +nowatzyk 1 +miss 1 +wall 1 +case 1 +integr 1 +isca 1 +readm 1 +fillo 1 +keckler 1 +dalli 1 +multicomput 1 +micro 1 +avail 1 +machinelink 1 +readdoug 1 +stefano 1 +kaxira 1 +jame 1 +goodman 1 +datascalar 1 +spsd 1 +execut 1 +model 1 +univers 1 +wisconsin 1 +madison 1 +scienc 1 +depart 1 +technic 1 +report 1 +juli 1 +neton 1 +intellig 1 +iram 1 +chip 1 +rememb 1 +patterson 1 +cardwel 1 +fromm 1 +keeton 1 +kozyraki 1 +thomasand 1 +yelick 1 +availableher 1 +fortun 1 +author 1 +prof 1 +subscrib 1 +mail 1 +list 1 +send 1 +email 1 +themajordomo 1 +majordomo 1 +content 1 +shouldinclud 1 +leav 1 +subject 1 +lineblank 1 +shortli 1 +receiv 1 +messag 1 +back 1 +sai 1 +welcom 1 +baer 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..191ba271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,13 @@ +home 1 +pagecs 1 +page 1 +spring 1 +offer 1 +experiment 1 +graduat 1 +cours 1 +human 1 +comput 1 +interact 1 +born 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..27b2bbec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,127 @@ +compil 1 +seminarcs 1 +seminarcours 1 +organ 1 +susan 1 +eggersand 1 +craigchambersmeet 1 +time 1 +wednesdai 1 +offici 1 +loew 1 +butreal 1 +meet 1 +second 1 +floor 1 +atrium 1 +scheduleweek 1 +memspi 1 +analyz 1 +memori 1 +system 1 +bottleneck 1 +program 1 +margaretmartonosi 1 +anoop 1 +gupta 1 +thoma 1 +anderson 1 +week 1 +gener 1 +approach 1 +special 1 +applic 1 +charl 1 +consel 1 +francoi 1 +noel 1 +practic 1 +data 1 +flow 1 +framework 1 +arrai 1 +refer 1 +analysi 1 +itsus 1 +optim 1 +evelyn 1 +duesterwald 1 +rajiv 1 +maryl 1 +soffa 1 +valu 1 +depend 1 +graph 1 +represent 1 +without 1 +taxat 1 +danielweis 1 +roger 1 +crew 1 +michael 1 +ernst 1 +bjarn 1 +steensgaard 1 +litvinov 1 +iter 1 +regist 1 +coalesc 1 +georg 1 +andrew 1 +appel 1 +garrett 1 +machin 1 +specif 1 +hooverand 1 +kenneth 1 +zadeck 1 +dean 1 +grant 1 +paradigm 1 +distribut 1 +multicomput 1 +byprivthviraj 1 +banerje 1 +lewi 1 +minimum 1 +cost 1 +interprocedur 1 +alloc 1 +stevenkurland 1 +fischer 1 +secoski 1 +todd 1 +knoblock 1 +erik 1 +grove 1 +lazi 1 +strength 1 +reduct 1 +jen 1 +knoop 1 +oliv 1 +ruth 1 +andbernhard 1 +steffen 1 +mock 1 +tullsen 1 +subscrib 1 +mail 1 +list 1 +send 1 +email 1 +majordomo 1 +content 1 +includ 1 +line 1 +subscribecsek 1 +leav 1 +subject 1 +blank 1 +shortlyrec 1 +messag 1 +back 1 +sai 1 +welcom 1 +melodi 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..a7a97737 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,178 @@ +parallel 1 +program 1 +environmentslarri 1 +snyderautumn 1 +quarter 1 +mondai 1 +loew 1 +welcom 1 +home 1 +page 1 +read 1 +select 1 +paper 1 +recent 1 +ipp 1 +ppopp 1 +supercomput 1 +icpp 1 +lcpc 1 +ten 1 +schedul 1 +atmospher 1 +casual 1 +andwil 1 +hopefulli 1 +ignit 1 +live 1 +discuss 1 +everyon 1 +attend 1 +seminar 1 +expect 1 +present 1 +thepap 1 +still 1 +spot 1 +open 1 +hurri 1 +sign 1 +pleas 1 +send 1 +mail 1 +majordomo 1 +subscrib 1 +cseo 1 +bodi 1 +messag 1 +subscribeto 1 +class 1 +list 1 +datepaperpresentor 1 +compil 1 +matlab 1 +scalapack 1 +exploit 1 +task 1 +data 1 +ramaswami 1 +hodg 1 +banerje 1 +falcon 1 +interact 1 +restructur 1 +deros 1 +gallivan 1 +gallopoulo 1 +marsolf 1 +padua 1 +portabl 1 +driven 1 +ramkumar 1 +forb 1 +kale 1 +sung 1 +cross 1 +loop 1 +reus 1 +analysi 1 +applic 1 +cach 1 +optim 1 +cooper 1 +kennedi 1 +mcintosh 1 +ruth 1 +global 1 +commun 1 +chakarabarti 1 +gupta 1 +choi 1 +pldi 1 +sean 1 +integer 1 +perform 1 +environ 1 +adv 1 +input 1 +output 1 +characterist 1 +scalabl 1 +crandal 1 +aydt 1 +chien 1 +reed 1 +jason 1 +holidai 1 +stream 1 +librari 1 +complex 1 +distribut 1 +structur 1 +gotwal 1 +sriniva 1 +gannon 1 +brad 1 +model 1 +strategi 1 +core 1 +bordawekar 1 +choudahari 1 +koelbel 1 +paleczni 1 +local 1 +iter 1 +comput 1 +block 1 +cyclic 1 +midkiff 1 +util 1 +thread 1 +fahring 1 +hain 1 +mehrotra 1 +eric 1 +cilk 1 +effici 1 +multithread 1 +runtim 1 +system 1 +blumof 1 +joerg 1 +kuszmaul 1 +leiserson 1 +randal 1 +zhou 1 +gener 1 +code 1 +object 1 +orient 1 +mathemat 1 +andersson 1 +fritzson 1 +realign 1 +base 1 +kamachi 1 +kusano 1 +suehiro 1 +tamura 1 +sakon 1 +us 1 +access 1 +inform 1 +rinard 1 +tool 1 +rel 1 +debug 1 +develop 1 +larg 1 +numer 1 +abramson 1 +foster 1 +michalak 1 +sosic 1 +potpourri 1 +last 1 +modifi 1 +tuesdai 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..76ffc26b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,27 @@ +system 1 +seminar 1 +preliminariesif 1 +alreadi 1 +mail 1 +list 1 +need 1 +variou 1 +crucial 1 +bit 1 +ofinform 1 +week 1 +cancel 1 +besent 1 +send 1 +request 1 +line 1 +subscrib 1 +systemsin 1 +messag 1 +bodi 1 +quarterli 1 +web 1 +spring 1 +summer 1 +autumn 1 +winter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..11cb1d62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,70 @@ +summer 1 +quarterw 1 +meet 1 +fridai 1 +loew 1 +quarter 1 +wewil 1 +read 1 +final 1 +paper 1 +appear 1 +upcomingacm 1 +symposium 1 +oper 1 +system 1 +principl 1 +sosp 1 +pleas 1 +havean 1 +interact 1 +discuss 1 +scheduleoct 1 +implement 1 +global 1 +memori 1 +manag 1 +workstat 1 +cluster 1 +present 1 +feelei 1 +log 1 +virtual 1 +savag 1 +autoraid 1 +hierarch 1 +storag 1 +wilk 1 +serverless 1 +network 1 +file 1 +franklin 1 +montgomeri 1 +tiwari 1 +hypervisor 1 +base 1 +fault 1 +toler 1 +chan 1 +philipos 1 +wolman 1 +exploit 1 +weak 1 +connect 1 +mobil 1 +access 1 +voelker 1 +litvinov 1 +perform 1 +cach 1 +coher 1 +stackabl 1 +sriram 1 +fiuczynski 1 +impact 1 +architectur 1 +trend 1 +anderson 1 +romer 1 +return 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..e56027fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,146 @@ +high 1 +perform 1 +scientif 1 +comput 1 +zphigh 1 +zpllarri 1 +snyder 1 +teamautumn 1 +quarter 1 +wednesdai 1 +sieg 1 +loew 1 +dai 1 +welcom 1 +home 1 +page 1 +pleas 1 +send 1 +mail 1 +majordomo 1 +subscrib 1 +csezpl 1 +bodi 1 +messag 1 +subscribeto 1 +class 1 +list 1 +student 1 +also 1 +interest 1 +join 1 +usersmail 1 +us 1 +distribut 1 +informationabout 1 +compil 1 +librarai 1 +releas 1 +relatedinform 1 +ad 1 +user 1 +descriptionzpl 1 +scientificprogram 1 +languag 1 +suitabl 1 +previous 1 +written 1 +infortran 1 +arrai 1 +dramaticallysimplifi 1 +program 1 +elimin 1 +nuisanc 1 +loop 1 +index 1 +run 1 +fast 1 +modern 1 +machin 1 +includ 1 +parallel 1 +supercomput 1 +allow 1 +programm 1 +develop 1 +code 1 +workstat 1 +andtrivi 1 +migrat 1 +largest 1 +simpli 1 +byrecompil 1 +toth 1 +commun 1 +design 1 +scientist 1 +engin 1 +want 1 +learn 1 +effect 1 +cover 1 +follow 1 +topic 1 +state 1 +syntax 1 +semant 1 +algorithm 1 +exploit 1 +wysiwyg 1 +write 1 +easili 1 +well 1 +scienc 1 +faster 1 +prototyp 1 +matlab 1 +text 1 +booknon 1 +reli 1 +materi 1 +document 1 +found 1 +onin 1 +specif 1 +close 1 +zplprogram 1 +guid 1 +version 1 +prerequisitesfamiliar 1 +fortran 1 +ormatlab 1 +unix 1 +platform 1 +assum 1 +variabl 1 +credit 1 +audit 1 +debug 1 +select 1 +technic 1 +disciplin 1 +rang 1 +whole 1 +applic 1 +kernel 1 +inner 1 +informationcours 1 +syllabu 1 +lectur 1 +note 1 +appli 1 +ncsa 1 +block 1 +grant 1 +account 1 +faculti 1 +staff 1 +remotezpl 1 +compileroth 1 +import 1 +link 1 +sung 1 +choi 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..00e1ee10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,127 @@ +home 1 +page 1 +autumn 1 +introduct 1 +digit 1 +design 1 +quarter 1 +gaetano 1 +borriello 1 +corei 1 +andersonwelcom 1 +contain 1 +whole 1 +bunch 1 +us 1 +inform 1 +class 1 +keep 1 +mind 1 +document 1 +static 1 +especi 1 +announc 1 +messag 1 +ad 1 +frequent 1 +problem 1 +gener 1 +send 1 +mail 1 +tocs 1 +webmast 1 +notic 1 +instructor 1 +system 1 +administr 1 +last 1 +updat 1 +archiv 1 +messagess 1 +washington 1 +everyon 1 +cours 1 +goal 1 +syllabu 1 +meet 1 +time 1 +lectur 1 +final 1 +exam 1 +mondai 1 +decemb 1 +workload 1 +grade 1 +expect 1 +laboratori 1 +softwar 1 +tool 1 +polici 1 +collabor 1 +cheat 1 +address 1 +overal 1 +schedul 1 +topic 1 +offic 1 +hour 1 +sieg 1 +anderson 1 +corin 1 +aweekli 1 +assign 1 +weekli 1 +quizz 1 +onlin 1 +version 1 +slide 1 +textbook 1 +contemporari 1 +logic 1 +katz 1 +benjamin 1 +cum 1 +addison 1 +weslei 1 +maintain 1 +author 1 +publish 1 +note 1 +interest 1 +evolut 1 +implement 1 +technolog 1 +comput 1 +aid 1 +synario 1 +feedback 1 +tell 1 +think 1 +thing 1 +go 1 +even 1 +anonym 1 +desir 1 +question 1 +evalu 1 +complet 1 +link 1 +previou 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..b474ac8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,36 @@ +comput 1 +societycs 1 +societywelcom 1 +home 1 +page 1 +societi 1 +cours 1 +wintercs 1 +focu 1 +social 1 +econom 1 +ethic 1 +legal 1 +implic 1 +present 1 +internet 1 +futur 1 +nation 1 +andglob 1 +inform 1 +highwai 1 +instructor 1 +alan 1 +born 1 +class 1 +time 1 +tue 1 +thur 1 +sieg 1 +syllabusclass 1 +schedulelink 1 +relev 1 +sitesbook 1 +journal 1 +avail 1 +referenceassignmentsassign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..15aec279 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,50 @@ +index 1 +pagecs 1 +artifici 1 +intelligencefal 1 +quarter 1 +intellig 1 +pose 1 +fundament 1 +andchalleng 1 +question 1 +comput 1 +scienc 1 +build 1 +intelligentmachin 1 +cours 1 +address 1 +provid 1 +anin 1 +depth 1 +introduct 1 +select 1 +topic 1 +includ 1 +agentarchitectur 1 +knowledg 1 +represent 1 +search 1 +plan 1 +machinelearn 1 +reason 1 +uncertainti 1 +methodolog 1 +staff 1 +weldweld 1 +sieg 1 +hour 1 +marc 1 +friedmanfriedman 1 +nick 1 +kushmericknick 1 +outlin 1 +topicsprojectread 1 +assignmentsassign 1 +examsgradingresourcesth 1 +class 1 +mailinglist 1 +also 1 +archiv 1 +past 1 +messag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..19d8d250 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,132 @@ +ics 1 +intellig 1 +inform 1 +internet 1 +server 1 +meet 1 +tuesdai 1 +siegcreat 1 +side 1 +scriptspleas 1 +read 1 +guidelin 1 +towrit 1 +program 1 +execut 1 +someon 1 +follow 1 +link 1 +tothem 1 +peopl 1 +place 1 +collect 1 +mail 1 +list 1 +gener 1 +paul 1 +check 1 +futur 1 +begun 1 +updat 1 +file 1 +rememb 1 +want 1 +first 1 +us 1 +index 1 +html 1 +back 1 +ad 1 +phoenix 1 +impress 1 +older 1 +topic 1 +offici 1 +releas 1 +glimps 1 +instal 1 +manual 1 +page 1 +well 1 +develop 1 +home 1 +interest 1 +detail 1 +work 1 +winter 1 +usenix 1 +paper 1 +design 1 +implement 1 +wide 1 +area 1 +wai 1 +zwhere 1 +mosiac 1 +interfac 1 +zephyr 1 +locat 1 +databas 1 +show 1 +user 1 +current 1 +regist 1 +make 1 +guess 1 +room 1 +anoth 1 +version 1 +znol 1 +zwatch 1 +zlocat 1 +extra 1 +info 1 +except 1 +statu 1 +on 1 +anyon 1 +note 1 +lectur 1 +discuss 1 +sent 1 +displai 1 +belief 1 +short 1 +mike 1 +releg 1 +review 1 +site 1 +comment 1 +good 1 +miscellan 1 +rather 1 +rambl 1 +kurt 1 +grumbl 1 +problem 1 +improv 1 +mosaic 1 +bring 1 +class 1 +provid 1 +luddit 1 +perspect 1 +idea 1 +filter 1 +network 1 +sourc 1 +nick 1 +vagu 1 +relat 1 +decemb 1 +cacm 1 +summari 1 +articl 1 +chang 1 +document 1 +itout 1 +withci 1 +send 1 +theentir 1 +address 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..643dea6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,35 @@ +home 1 +page 1 +machin 1 +organ 1 +program 1 +credit 1 +introduct 1 +current 1 +system 1 +structur 1 +control 1 +commun 1 +memori 1 +processor 1 +devic 1 +project 1 +involv 1 +detail 1 +studi 1 +specif 1 +small 1 +computerhardwar 1 +softwar 1 +prerequisit 1 +consent 1 +instructor 1 +open 1 +student 1 +taken 1 +freshmen 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..f32f9865 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,26 @@ +page 1 +introduct 1 +comput 1 +architectur 1 +credit 1 +design 1 +system 1 +compon 1 +processor 1 +instruct 1 +address 1 +control 1 +structur 1 +microprogram 1 +memori 1 +manag 1 +cach 1 +hierarchi 1 +interrupt 1 +prerequisit 1 +andc 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..b5490d74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,26 @@ +home 1 +page 1 +advanc 1 +comput 1 +architectur 1 +credit 1 +techniqu 1 +design 1 +parallel 1 +process 1 +andpipelin 1 +multiprocessor 1 +multi 1 +network 1 +high 1 +performancemachin 1 +special 1 +purpos 1 +processor 1 +data 1 +flow 1 +prerequisit 1 +semesterli 1 +cours 1 +inform 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..01bd6c96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,31 @@ +home 1 +page 1 +advanc 1 +comput 1 +architectur 1 +credit 1 +parallel 1 +algorithm 1 +principl 1 +detect 1 +vectorizingcompil 1 +interconnect 1 +network 1 +simd 1 +mimd 1 +machin 1 +processorsynchron 1 +data 1 +coher 1 +multi 1 +dataflow 1 +special 1 +purposeprocessor 1 +prerequisit 1 +consent 1 +instructor 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..aa592524 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,243 @@ +introduct 1 +oper 1 +system 1 +spring 1 +univers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +bart 1 +millerc 1 +systemsnew 1 +stufffin 1 +grade 1 +post 1 +problem 1 +set 1 +avail 1 +program 1 +assign 1 +quizz 1 +answer 1 +section 1 +lectur 1 +note 1 +readi 1 +read 1 +print 1 +class 1 +staffinstructor 1 +milleremail 1 +wisc 1 +eduoffic 1 +csphone 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +noonor 1 +appoint 1 +karuna 1 +muthiahemail 1 +muthiah 1 +mondai 1 +jonathan 1 +weyersemail 1 +weyer 1 +cours 1 +materialsth 1 +organ 1 +around 1 +thelectur 1 +notesar 1 +page 1 +need 1 +textbookmodern 1 +tanenbaum 1 +programmingassign 1 +purchas 1 +copi 1 +ofobject 1 +orient 1 +us 1 +pohl 1 +whatev 1 +favorit 1 +book 1 +first 1 +come 1 +modern 1 +systemsandobject 1 +store 1 +discuss 1 +sectionslectur 1 +time 1 +tuesdai 1 +thursdai 1 +comput 1 +sciencesdiscuss 1 +nolandnot 1 +extra 1 +mainli 1 +recit 1 +materialcov 1 +weekli 1 +occas 1 +import 1 +detail 1 +ofth 1 +homework 1 +make 1 +sure 1 +leav 1 +room 1 +schedul 1 +attend 1 +exam 1 +quizzesther 1 +midterm 1 +final 1 +option 1 +week 1 +start 1 +second 1 +quiz 1 +thediscuss 1 +last 1 +minut 1 +follow 1 +past 1 +process 1 +concurr 1 +februari 1 +semaphor 1 +monitor 1 +messag 1 +usetrac 1 +activ 1 +real 1 +unix 1 +drive 1 +simul 1 +goal 1 +learn 1 +algorithm 1 +trace 1 +driven 1 +experi 1 +quantit 1 +analyz 1 +written 1 +setsdur 1 +semest 1 +hand 1 +severalwritten 1 +base 1 +turn 1 +though 1 +find 1 +poorli 1 +youdon 1 +problemssolut 1 +theproblem 1 +happi 1 +question 1 +andlook 1 +solut 1 +variou 1 +synchronizationprimit 1 +solv 1 +memori 1 +manag 1 +hardwar 1 +softwar 1 +late 1 +workassign 1 +date 1 +list 1 +handout 1 +entir 1 +havethre 1 +daysof 1 +credit 1 +dai 1 +differ 1 +eachof 1 +three 1 +absolut 1 +work 1 +accept 1 +cannot 1 +assignmentthat 1 +weekof 1 +cheatingprogram 1 +done 1 +partner 1 +group 1 +independ 1 +cheater 1 +receiv 1 +maximum 1 +penalti 1 +includ 1 +receivingan 1 +mark 1 +transcript 1 +facilitiesw 1 +probabl 1 +solari 1 +workstat 1 +run 1 +window 1 +student 1 +regist 1 +account 1 +policyif 1 +take 1 +lowest 1 +drop 1 +averag 1 +beno 1 +break 1 +count 1 +taught 1 +inth 1 +rang 1 +scheduleth 1 +tent 1 +could 1 +chang 1 +januari 1 +overview 1 +processesweek 1 +dispatch 1 +creationweek 1 +cooper 1 +synchronizationweek 1 +semaphoresweek 1 +monitorsweek 1 +deadlocksweek 1 +march 1 +debug 1 +strategi 1 +dynam 1 +alloc 1 +breakweek 1 +relocationweek 1 +segment 1 +tlbsweek 1 +april 1 +virtual 1 +replac 1 +thrash 1 +devic 1 +filesweek 1 +disk 1 +directoriesweek 1 +protectionweek 1 +secur 1 +advanc 1 +topic 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..89412544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,82 @@ +internet 1 +honor 1 +seminarunivers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +spring 1 +bart 1 +millerc 1 +seminarinstructor 1 +milleremail 1 +wisc 1 +eduoffic 1 +csphone 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +noonor 1 +appoint 1 +lectureslectur 1 +time 1 +mondai 1 +comput 1 +sciencesclass 1 +schedulether 1 +written 1 +assign 1 +class 1 +requir 1 +attendal 1 +lectur 1 +particip 1 +discuss 1 +follow 1 +schedul 1 +mostli 1 +right 1 +could 1 +chang 1 +week 1 +januari 1 +miller 1 +introduct 1 +overviewweek 1 +larri 1 +landweb 1 +architectur 1 +protocolsweek 1 +februari 1 +client 1 +server 1 +remot 1 +procedur 1 +callsweek 1 +system 1 +securityweek 1 +eric 1 +bach 1 +secur 1 +encryptionweek 1 +march 1 +breakweek 1 +miron 1 +livni 1 +imag 1 +pictur 1 +netweek 1 +april 1 +high 1 +perform 1 +file 1 +systemsweek 1 +david 1 +wood 1 +supercomputerweek 1 +laru 1 +javaweek 1 +discussionslast 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..2f51868c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,168 @@ +advanc 1 +oper 1 +system 1 +fall 1 +univers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +bart 1 +millerc 1 +systemssummarythi 1 +cours 1 +intend 1 +give 1 +broad 1 +exposur 1 +advancedoper 1 +topic 1 +read 1 +discuss 1 +protect 1 +secur 1 +memori 1 +manag 1 +kernel 1 +file 1 +synchron 1 +name 1 +distribut 1 +pleas 1 +rest 1 +inform 1 +sheet 1 +carefulli 1 +textther 1 +realli 1 +satisfactori 1 +textbook 1 +graduat 1 +level 1 +operatingsystemsclass 1 +current 1 +literatur 1 +text 1 +structur 1 +around 1 +journal 1 +articl 1 +andconfer 1 +proceed 1 +abl 1 +purchas 1 +doit 1 +handout 1 +class 1 +relev 1 +paper 1 +lectur 1 +detail 1 +review 1 +willinstead 1 +adiscuss 1 +major 1 +theme 1 +us 1 +focal 1 +point 1 +form 1 +group 1 +classmat 1 +meetonc 1 +twice 1 +week 1 +assign 1 +especi 1 +import 1 +part 1 +listaccord 1 +post 1 +schedul 1 +formula 1 +success 1 +papersindepend 1 +try 1 +identifyth 1 +issu 1 +particip 1 +thepap 1 +discussionsclass 1 +meet 1 +talk 1 +besupport 1 +comment 1 +opinion 1 +will 1 +activ 1 +daili 1 +geta 1 +expect 1 +quietli 1 +listen 1 +beveri 1 +unhappi 1 +papersdur 1 +write 1 +short 1 +page 1 +andon 1 +longer 1 +first 1 +paperwil 1 +design 1 +base 1 +idea 1 +work 1 +well 1 +understood 1 +facilityand 1 +extens 1 +area 1 +second 1 +involv 1 +project 1 +summaryof 1 +aselect 1 +topicsfrom 1 +choos 1 +good 1 +least 1 +refere 1 +fellowstud 1 +writer 1 +critic 1 +anoth 1 +person 1 +giveth 1 +reader 1 +look 1 +someon 1 +els 1 +revis 1 +pass 1 +examsther 1 +exam 1 +keep 1 +busi 1 +gradesscor 1 +final 1 +grade 1 +assignmenti 1 +availbl 1 +summari 1 +score 1 +fromth 1 +proposalsi 1 +also 1 +gradesar 1 +avail 1 +detailstim 1 +tuesdai 1 +thursdai 1 +place 1 +csoffic 1 +hour 1 +noonlast 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..c98d033b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,146 @@ +section 1 +home 1 +pagec 1 +introduct 1 +comput 1 +programmingsect 1 +fortran 1 +credit 1 +cours 1 +cover 1 +basic 1 +program 1 +structur 1 +need 1 +prepar 1 +student 1 +elementari 1 +engin 1 +prior 1 +experi 1 +requir 1 +knowledg 1 +assum 1 +materi 1 +enabl 1 +write 1 +simpl 1 +solv 1 +problem 1 +done 1 +intend 1 +receiv 1 +littl 1 +instruct 1 +high 1 +school 1 +taught 1 +entir 1 +languag 1 +primarili 1 +scienc 1 +major 1 +click 1 +descript 1 +menu 1 +import 1 +announc 1 +read 1 +lectur 1 +instructor 1 +grade 1 +polici 1 +syllabu 1 +text 1 +note 1 +assign 1 +exercis 1 +pointer 1 +interest 1 +psycholog 1 +march 1 +pleas 1 +punctual 1 +avoid 1 +disturb 1 +class 1 +gareth 1 +bestor 1 +offic 1 +overal 1 +gener 1 +code 1 +though 1 +want 1 +time 1 +solut 1 +algorithm 1 +even 1 +depend 1 +particular 1 +follow 1 +mondai 1 +week 1 +question 1 +subroutin 1 +function 1 +labyou 1 +us 1 +vectra 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +microsoft 1 +window 1 +open 1 +seven 1 +dai 1 +except 1 +certain 1 +holidai 1 +printer 1 +room 1 +locat 1 +across 1 +hall 1 +also 1 +dorm 1 +howev 1 +probabl 1 +purchas 1 +copi 1 +lahei 1 +person 1 +insid 1 +textbook 1 +work 1 +lab 1 +campu 1 +compil 1 +first 1 +softwar 1 +includ 1 +mail 1 +netscap 1 +page 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +long 1 +night 1 +copyright 1 +wisc 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..08cf3186 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,265 @@ +section 1 +home 1 +pagec 1 +algebra 1 +languag 1 +programmingsect 1 +fortran 1 +taught 1 +entir 1 +program 1 +intend 1 +primarili 1 +engin 1 +student 1 +comput 1 +scienc 1 +major 1 +click 1 +cours 1 +descript 1 +menu 1 +import 1 +announc 1 +read 1 +lectur 1 +instructor 1 +grade 1 +polici 1 +syllabu 1 +text 1 +note 1 +exam 1 +assign 1 +problem 1 +solv 1 +exercis 1 +pointer 1 +interest 1 +psycholog 1 +pleas 1 +punctual 1 +avoid 1 +disturb 1 +class 1 +gareth 1 +bestor 1 +offic 1 +lowest 1 +score 1 +contribut 1 +must 1 +complet 1 +hand 1 +elig 1 +receiv 1 +pass 1 +final 1 +curv 1 +mean 1 +rang 1 +thur 1 +februari 1 +april 1 +amclick 1 +list 1 +current 1 +identif 1 +tent 1 +follow 1 +topic 1 +approxim 1 +cover 1 +week 1 +semest 1 +relev 1 +come 1 +abl 1 +question 1 +anyth 1 +unsur 1 +instead 1 +wait 1 +try 1 +work 1 +discov 1 +didn 1 +realli 1 +understand 1 +someth 1 +term 1 +notestext 1 +applic 1 +edit 1 +koffman 1 +friedman 1 +copi 1 +avail 1 +line 1 +substitut 1 +show 1 +overhead 1 +projector 1 +includ 1 +exampl 1 +addit 1 +board 1 +respons 1 +materi 1 +assignmentsther 1 +three 1 +constitut 1 +well 1 +ensur 1 +good 1 +regardless 1 +perform 1 +hour 1 +long 1 +though 1 +stai 1 +longer 1 +need 1 +extra 1 +time 1 +close 1 +book 1 +bring 1 +pencil 1 +calcul 1 +necessari 1 +even 1 +us 1 +solut 1 +seven 1 +attempt 1 +everi 1 +compil 1 +without 1 +error 1 +automat 1 +zero 1 +risk 1 +fail 1 +gradesheet 1 +handin 1 +directori 1 +onlin 1 +late 1 +academ 1 +misconduct 1 +cheat 1 +specif 1 +mondai 1 +fridai 1 +wednesdai 1 +pmhow 1 +help 1 +consult 1 +wear 1 +name 1 +tag 1 +duti 1 +answer 1 +short 1 +messag 1 +syntax 1 +login 1 +printer 1 +send 1 +mail 1 +netscap 1 +inform 1 +gener 1 +requir 1 +explan 1 +best 1 +normal 1 +except 1 +dissert 1 +research 1 +modem 1 +therefor 1 +want 1 +outsid 1 +make 1 +appoint 1 +first 1 +easili 1 +contact 1 +regularli 1 +exerciseson 1 +skill 1 +learn 1 +distinguish 1 +programm 1 +doesn 1 +matter 1 +familiar 1 +particular 1 +write 1 +techniqu 1 +weekli 1 +small 1 +trivial 1 +give 1 +look 1 +think 1 +right 1 +step 1 +would 1 +overal 1 +structur 1 +code 1 +algorithm 1 +depend 1 +subroutin 1 +function 1 +labyou 1 +vectra 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +microsoft 1 +window 1 +open 1 +dai 1 +certain 1 +holidai 1 +room 1 +locat 1 +across 1 +hall 1 +also 1 +dorm 1 +howev 1 +probabl 1 +purchas 1 +lahei 1 +person 1 +insid 1 +textbook 1 +lab 1 +campu 1 +softwar 1 +page 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +night 1 +copyright 1 +wisc 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..3d202ee3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,73 @@ +fall 1 +section 1 +algebra 1 +languag 1 +program 1 +name 1 +dave 1 +egglestonemail 1 +burnett 1 +wisc 1 +eduoffic 1 +offic 1 +phone 1 +hour 1 +announc 1 +updat 1 +note 1 +origin 1 +output 1 +prog 1 +page 1 +error 1 +dai 1 +week 1 +correct 1 +valu 1 +inform 1 +exam 1 +question 1 +ask 1 +hourlywork 1 +classread 1 +scan 1 +thursdai 1 +class 1 +avail 1 +solut 1 +quiz 1 +grade 1 +gener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuswork 1 +homeclass 1 +handout 1 +gradeshomeworkexam 1 +quizzesmiscellan 1 +archivepolici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +policytextproblem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +eggleston 1 +base 1 +greg 1 +sharp 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..1849f373 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,117 @@ +spring 1 +advanc 1 +oper 1 +system 1 +summari 1 +cours 1 +intend 1 +give 1 +broad 1 +exposur 1 +advancedoper 1 +topic 1 +import 1 +compon 1 +read 1 +discuss 1 +ofvari 1 +research 1 +paper 1 +project 1 +involv 1 +implement 1 +anexperiment 1 +cover 1 +topicsinclud 1 +synchron 1 +commun 1 +memori 1 +manag 1 +file 1 +protect 1 +secur 1 +distribut 1 +requir 1 +tochoos 1 +problem 1 +propos 1 +solut 1 +prototyp 1 +lectur 1 +info 1 +class 1 +relev 1 +current 1 +detail 1 +review 1 +rathera 1 +major 1 +theme 1 +us 1 +focal 1 +point 1 +activ 1 +particip 1 +strongli 1 +encourag 1 +tuesdai 1 +thursdai 1 +engin 1 +halloffic 1 +hour 1 +appoint 1 +comput 1 +scienc 1 +text 1 +select 1 +classic 1 +design 1 +purchas 1 +doit 1 +formerli 1 +macc 1 +document 1 +deskfor 1 +semest 1 +differ 1 +previou 1 +pleas 1 +copi 1 +grade 1 +exam 1 +instead 1 +assign 1 +first 1 +benchmark 1 +suit 1 +measur 1 +performanceof 1 +variou 1 +suno 1 +solari 1 +linux 1 +window 1 +manya 1 +hand 1 +second 1 +final 1 +report 1 +present 1 +total 1 +count 1 +schedul 1 +tent 1 +list 1 +suggest 1 +make 1 +well 1 +either 1 +case 1 +need 1 +come 1 +choos 1 +team 1 +peopl 1 +allow 1 +slide 1 +assig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..c3cb2e57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,45 @@ +introduct 1 +comput 1 +program 1 +scienc 1 +fall 1 +credit 1 +cours 1 +design 1 +cover 1 +basic 1 +programmingstructur 1 +need 1 +prepar 1 +student 1 +elementaryengin 1 +materi 1 +suffici 1 +enableth 1 +write 1 +simpl 1 +solv 1 +engin 1 +problem 1 +inelementari 1 +essenti 1 +first 1 +half 1 +list 1 +section 1 +lectur 1 +fortran 1 +jeff 1 +lampert 1 +toni 1 +silva 1 +sidnei 1 +hummert 1 +michael 1 +birk 1 +russel 1 +man 1 +martin 1 +reameslast 1 +modifi 1 +anthoni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..7c145a96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,96 @@ +fall 1 +midterm 1 +exam 1 +answer 1 +keyinstructorprofessor 1 +desautelsoffic 1 +comput 1 +sciencesoffic 1 +hour 1 +mondai 1 +wednesdai 1 +appoint 1 +phone 1 +dept 1 +offic 1 +mail 1 +wisc 1 +teach 1 +assistantsfollow 1 +link 1 +home 1 +page 1 +name 1 +kelli 1 +ratliff 1 +email 1 +section 1 +grade 1 +nathan 1 +bockrath 1 +rehnuma 1 +rahman 1 +jaim 1 +fink 1 +jfink 1 +ashraf 1 +aboulnaga 1 +andrew 1 +geeri 1 +jame 1 +herro 1 +jherro 1 +abhinav 1 +gupta 1 +agupta 1 +jyothi 1 +krothap 1 +chiang 1 +suhui 1 +thano 1 +tsioli 1 +gradesexplor 1 +compani 1 +whose 1 +softwar 1 +hardwar 1 +borland 1 +hewlett 1 +packard 1 +intel 1 +microsoft 1 +novel 1 +us 1 +explor 1 +lyco 1 +enorm 1 +databas 1 +site 1 +yahoo 1 +internet 1 +resourc 1 +classifi 1 +categori 1 +lookup 1 +search 1 +virtual 1 +tourist 1 +find 1 +around 1 +world 1 +click 1 +mother 1 +larg 1 +alphabet 1 +list 1 +cool 1 +especi 1 +excel 1 +univers 1 +wisconsin 1 +madison 1 +origin 1 +creat 1 +maintain 1 +teitelbaum 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..7c145a96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,96 @@ +fall 1 +midterm 1 +exam 1 +answer 1 +keyinstructorprofessor 1 +desautelsoffic 1 +comput 1 +sciencesoffic 1 +hour 1 +mondai 1 +wednesdai 1 +appoint 1 +phone 1 +dept 1 +offic 1 +mail 1 +wisc 1 +teach 1 +assistantsfollow 1 +link 1 +home 1 +page 1 +name 1 +kelli 1 +ratliff 1 +email 1 +section 1 +grade 1 +nathan 1 +bockrath 1 +rehnuma 1 +rahman 1 +jaim 1 +fink 1 +jfink 1 +ashraf 1 +aboulnaga 1 +andrew 1 +geeri 1 +jame 1 +herro 1 +jherro 1 +abhinav 1 +gupta 1 +agupta 1 +jyothi 1 +krothap 1 +chiang 1 +suhui 1 +thano 1 +tsioli 1 +gradesexplor 1 +compani 1 +whose 1 +softwar 1 +hardwar 1 +borland 1 +hewlett 1 +packard 1 +intel 1 +microsoft 1 +novel 1 +us 1 +explor 1 +lyco 1 +enorm 1 +databas 1 +site 1 +yahoo 1 +internet 1 +resourc 1 +classifi 1 +categori 1 +lookup 1 +search 1 +virtual 1 +tourist 1 +find 1 +around 1 +world 1 +click 1 +mother 1 +larg 1 +alphabet 1 +list 1 +cool 1 +especi 1 +excel 1 +univers 1 +wisconsin 1 +madison 1 +origin 1 +creat 1 +maintain 1 +teitelbaum 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..e9cf17e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,169 @@ +us 1 +comput 1 +lectur 1 +computersinstructor 1 +info 1 +instructor 1 +salli 1 +petersonoffic 1 +sciencephon 1 +mail 1 +wisc 1 +slpeter 1 +facstaff 1 +eduoffic 1 +hour 1 +tuesdai 1 +thursdai 1 +appointmentvit 1 +class 1 +time 1 +place 1 +held 1 +engin 1 +halllectur 1 +text 1 +inform 1 +technolog 1 +societi 1 +laudon 1 +traver 1 +laudonlab 1 +point 1 +click 1 +drag 1 +macintosh 1 +petersoncours 1 +introduct 1 +design 1 +take 1 +zero 1 +knowledg 1 +computersto 1 +crack 1 +shot 1 +user 1 +skill 1 +throughcolleg 1 +arena 1 +section 1 +taught 1 +macintoshcomput 1 +avail 1 +csuse 1 +cours 1 +compon 1 +part 1 +discuss 1 +gener 1 +term 1 +scienc 1 +topic 1 +work 1 +includ 1 +follow 1 +necessarili 1 +order 1 +applic 1 +program 1 +word 1 +processor 1 +spreadsheet 1 +graphic 1 +databas 1 +hardwar 1 +input 1 +output 1 +storag 1 +devic 1 +oper 1 +system 1 +languag 1 +network 1 +telecommun 1 +artifici 1 +intellig 1 +expert 1 +relat 1 +social 1 +issu 1 +laboratori 1 +hand 1 +experienceon 1 +iici 1 +process 1 +electron 1 +newsgroup 1 +world 1 +wide 1 +eudora 1 +netscap 1 +paint 1 +draw 1 +aldu 1 +superpaint 1 +chart 1 +excel 1 +filemak 1 +present 1 +manag 1 +hypercard 1 +desktop 1 +publish 1 +pagemak 1 +integr 1 +learn 1 +well 1 +addit 1 +special 1 +tool 1 +scanner 1 +teach 1 +thegoal 1 +provid 1 +high 1 +qualiti 1 +instruct 1 +rich 1 +educationalexperi 1 +namesectiontimedai 1 +bodner 1 +mwnick 1 +leavi 1 +mwtrshannon 1 +lloyd 1 +trtrjeff 1 +reminga 1 +mwfmwira 1 +sharenow 1 +trtrbrian 1 +swander 1 +mwfmwfbrad 1 +thayer 1 +mwfmwfjoe 1 +varghes 1 +trtrgeoff 1 +weinberg 1 +mwftrmaria 1 +yuin 1 +mwfmwrecommend 1 +background 1 +necessari 1 +assign 1 +quizz 1 +exam 1 +grade 1 +base 1 +regular 1 +assignmentsand 1 +syllabu 1 +glanc 1 +contain 1 +nitti 1 +gritti 1 +detail 1 +superpaintassign 1 +excellast 1 +modifi 1 +octob 1 +jonbodn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..e9cf17e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,169 @@ +us 1 +comput 1 +lectur 1 +computersinstructor 1 +info 1 +instructor 1 +salli 1 +petersonoffic 1 +sciencephon 1 +mail 1 +wisc 1 +slpeter 1 +facstaff 1 +eduoffic 1 +hour 1 +tuesdai 1 +thursdai 1 +appointmentvit 1 +class 1 +time 1 +place 1 +held 1 +engin 1 +halllectur 1 +text 1 +inform 1 +technolog 1 +societi 1 +laudon 1 +traver 1 +laudonlab 1 +point 1 +click 1 +drag 1 +macintosh 1 +petersoncours 1 +introduct 1 +design 1 +take 1 +zero 1 +knowledg 1 +computersto 1 +crack 1 +shot 1 +user 1 +skill 1 +throughcolleg 1 +arena 1 +section 1 +taught 1 +macintoshcomput 1 +avail 1 +csuse 1 +cours 1 +compon 1 +part 1 +discuss 1 +gener 1 +term 1 +scienc 1 +topic 1 +work 1 +includ 1 +follow 1 +necessarili 1 +order 1 +applic 1 +program 1 +word 1 +processor 1 +spreadsheet 1 +graphic 1 +databas 1 +hardwar 1 +input 1 +output 1 +storag 1 +devic 1 +oper 1 +system 1 +languag 1 +network 1 +telecommun 1 +artifici 1 +intellig 1 +expert 1 +relat 1 +social 1 +issu 1 +laboratori 1 +hand 1 +experienceon 1 +iici 1 +process 1 +electron 1 +newsgroup 1 +world 1 +wide 1 +eudora 1 +netscap 1 +paint 1 +draw 1 +aldu 1 +superpaint 1 +chart 1 +excel 1 +filemak 1 +present 1 +manag 1 +hypercard 1 +desktop 1 +publish 1 +pagemak 1 +integr 1 +learn 1 +well 1 +addit 1 +special 1 +tool 1 +scanner 1 +teach 1 +thegoal 1 +provid 1 +high 1 +qualiti 1 +instruct 1 +rich 1 +educationalexperi 1 +namesectiontimedai 1 +bodner 1 +mwnick 1 +leavi 1 +mwtrshannon 1 +lloyd 1 +trtrjeff 1 +reminga 1 +mwfmwira 1 +sharenow 1 +trtrbrian 1 +swander 1 +mwfmwfbrad 1 +thayer 1 +mwfmwfjoe 1 +varghes 1 +trtrgeoff 1 +weinberg 1 +mwftrmaria 1 +yuin 1 +mwfmwrecommend 1 +background 1 +necessari 1 +assign 1 +quizz 1 +exam 1 +grade 1 +base 1 +regular 1 +assignmentsand 1 +syllabu 1 +glanc 1 +contain 1 +nitti 1 +gritti 1 +detail 1 +superpaintassign 1 +excellast 1 +modifi 1 +octob 1 +jonbodn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..35cbbf08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,64 @@ +home 1 +pagecomput 1 +scienc 1 +algebra 1 +languag 1 +program 1 +section 1 +instructorsw 1 +would 1 +like 1 +comment 1 +suggest 1 +complaint 1 +feedback 1 +provid 1 +click 1 +skrentni 1 +coordin 1 +offic 1 +email 1 +csinform 1 +frequent 1 +ask 1 +question 1 +cours 1 +overview 1 +microcomput 1 +laboratori 1 +consult 1 +fall 1 +schedul 1 +tutor 1 +mainli 1 +polici 1 +academ 1 +misconduct 1 +offer 1 +depart 1 +softwar 1 +introduct 1 +microsoft 1 +window 1 +hint 1 +compil 1 +oper 1 +system 1 +netscap 1 +creat 1 +us 1 +subdirectoriesc 1 +inform 1 +savitch 1 +text 1 +book 1 +borland 1 +integr 1 +develop 1 +environmentfortran 1 +jeff 1 +lampert 1 +page 1 +last 1 +updat 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..1917c3ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,38 @@ +cours 1 +infocours 1 +inform 1 +cscours 1 +descriptionfrom 1 +guidebook 1 +undergradu 1 +student 1 +construct 1 +algorithm 1 +problem 1 +solv 1 +instruct 1 +experi 1 +least 1 +procedur 1 +orient 1 +languag 1 +pascal 1 +fortran 1 +survei 1 +advanc 1 +program 1 +techniqu 1 +prereq 1 +high 1 +school 1 +mathemat 1 +prepar 1 +colleg 1 +work 1 +statist 1 +logic 1 +consent 1 +instructor 1 +open 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..35cbbf08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,64 @@ +home 1 +pagecomput 1 +scienc 1 +algebra 1 +languag 1 +program 1 +section 1 +instructorsw 1 +would 1 +like 1 +comment 1 +suggest 1 +complaint 1 +feedback 1 +provid 1 +click 1 +skrentni 1 +coordin 1 +offic 1 +email 1 +csinform 1 +frequent 1 +ask 1 +question 1 +cours 1 +overview 1 +microcomput 1 +laboratori 1 +consult 1 +fall 1 +schedul 1 +tutor 1 +mainli 1 +polici 1 +academ 1 +misconduct 1 +offer 1 +depart 1 +softwar 1 +introduct 1 +microsoft 1 +window 1 +hint 1 +compil 1 +oper 1 +system 1 +netscap 1 +creat 1 +us 1 +subdirectoriesc 1 +inform 1 +savitch 1 +text 1 +book 1 +borland 1 +integr 1 +develop 1 +environmentfortran 1 +jeff 1 +lampert 1 +page 1 +last 1 +updat 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..38b956ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,47 @@ +home 1 +page 1 +problem 1 +solv 1 +us 1 +comput 1 +fall 1 +scienc 1 +check 1 +follow 1 +inform 1 +instructor 1 +teach 1 +assist 1 +includ 1 +offic 1 +hour 1 +assign 1 +suggest 1 +copi 1 +explan 1 +grade 1 +polici 1 +work 1 +examin 1 +past 1 +exam 1 +lab 1 +handout 1 +document 1 +syllabu 1 +mani 1 +postscript 1 +need 1 +viewer 1 +obtain 1 +site 1 +local 1 +servic 1 +section 1 +depart 1 +ghost 1 +directori 1 +read 1 +readm 1 +file 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..38b956ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,47 @@ +home 1 +page 1 +problem 1 +solv 1 +us 1 +comput 1 +fall 1 +scienc 1 +check 1 +follow 1 +inform 1 +instructor 1 +teach 1 +assist 1 +includ 1 +offic 1 +hour 1 +assign 1 +suggest 1 +copi 1 +explan 1 +grade 1 +polici 1 +work 1 +examin 1 +past 1 +exam 1 +lab 1 +handout 1 +document 1 +syllabu 1 +mani 1 +postscript 1 +need 1 +viewer 1 +obtain 1 +site 1 +local 1 +servic 1 +section 1 +depart 1 +ghost 1 +directori 1 +read 1 +readm 1 +file 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..fa78232d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,104 @@ +home 1 +page 1 +fall 1 +cours 1 +inform 1 +instructor 1 +offic 1 +hour 1 +class 1 +cancel 1 +handout 1 +assign 1 +solut 1 +exam 1 +grade 1 +simul 1 +help 1 +lectur 1 +note 1 +frequent 1 +ask 1 +question 1 +section 1 +jerri 1 +tusch 1 +phone 1 +mail 1 +wisc 1 +tutsch 1 +execpc 1 +nolandsect 1 +karen 1 +miller 1 +smoler 1 +time 1 +psycholog 1 +sunlung 1 +suen 1 +tuth 1 +ssuen 1 +edusridevi 1 +bhamidipati 1 +bsri 1 +edumohammad 1 +asgarian 1 +mondai 1 +septemb 1 +wednesdai 1 +novemb 1 +schedul 1 +revis 1 +chapter 1 +postscript 1 +overview 1 +html 1 +program 1 +homework 1 +quiz 1 +programs 1 +programm 1 +examsal 1 +quizz 1 +open 1 +book 1 +calcul 1 +fridai 1 +octob 1 +probabl 1 +decemb 1 +last 1 +syllabu 1 +previou 1 +format 1 +spring 1 +summer 1 +midterm 1 +answer 1 +final 1 +lookup 1 +graphic 1 +interfac 1 +manual 1 +noteskaren 1 +number 1 +system 1 +data 1 +represent 1 +integ 1 +arithmet 1 +float 1 +point 1 +structur 1 +regist 1 +procedur 1 +updat 1 +assembl 1 +updatedmondai 1 +except 1 +process 1 +featur 1 +perform 1 +architecur 1 +case 1 +studi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..1fcef032 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,115 @@ +home 1 +page 1 +fall 1 +cours 1 +inform 1 +instructor 1 +offic 1 +hour 1 +class 1 +cancel 1 +handout 1 +assign 1 +solut 1 +exam 1 +grade 1 +simul 1 +help 1 +lectur 1 +note 1 +frequent 1 +ask 1 +question 1 +section 1 +jerri 1 +tusch 1 +phone 1 +mail 1 +wisc 1 +tutsch 1 +execpc 1 +nolandsect 1 +karen 1 +miller 1 +smoler 1 +time 1 +psycholog 1 +sunlung 1 +suen 1 +tuth 1 +ssuen 1 +edusridevi 1 +bhamidipati 1 +bsri 1 +edumohammad 1 +asgarian 1 +mondai 1 +septemb 1 +wednesdai 1 +novemb 1 +schedul 1 +revis 1 +chapter 1 +postscript 1 +overview 1 +html 1 +program 1 +homework 1 +quiz 1 +programs 1 +programm 1 +programa 1 +programb 1 +examsal 1 +quizz 1 +open 1 +book 1 +calcul 1 +fridai 1 +octob 1 +probabl 1 +decemb 1 +last 1 +option 1 +final 1 +thursdai 1 +difficult 1 +cumul 1 +offer 1 +desperateto 1 +rais 1 +sign 1 +advanc 1 +syllabu 1 +previou 1 +format 1 +spring 1 +summer 1 +midterm 1 +answer 1 +lookup 1 +graphic 1 +interfac 1 +manual 1 +noteskaren 1 +number 1 +system 1 +data 1 +represent 1 +integ 1 +arithmet 1 +float 1 +point 1 +structur 1 +regist 1 +procedur 1 +updat 1 +assembl 1 +updatedmondai 1 +except 1 +process 1 +featur 1 +perform 1 +architecur 1 +case 1 +studi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..7d883228 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,126 @@ +data 1 +structur 1 +lec 1 +introduct 1 +structureslectur 1 +psychologylectur 1 +psychologycours 1 +inform 1 +announc 1 +read 1 +assign 1 +get 1 +start 1 +help 1 +exam 1 +program 1 +sampl 1 +code 1 +lectur 1 +cours 1 +materi 1 +comput 1 +lab 1 +home 1 +gener 1 +place 1 +recent 1 +first 1 +problem 1 +found 1 +locat 1 +page 1 +binari 1 +search 1 +tree 1 +onlin 1 +last 1 +makeup 1 +done 1 +copi 1 +solut 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +queue 1 +stack 1 +topic 1 +list 1 +handin 1 +directori 1 +creat 1 +common 1 +error 1 +suggest 1 +addit 1 +welcom 1 +either 1 +vega 1 +work 1 +line 1 +mondai 1 +wednesdai 1 +magic 1 +number 1 +sourc 1 +file 1 +must 1 +abl 1 +compil 1 +otherwis 1 +unusu 1 +look 1 +stale 1 +forget 1 +reload 1 +updat 1 +browser 1 +cach 1 +becom 1 +outdat 1 +attend 1 +unix 1 +tutori 1 +need 1 +time 1 +thur 1 +futur 1 +balanc 1 +chapter 1 +tabl 1 +discuss 1 +comparison 1 +implement 1 +skip 1 +simul 1 +overload 1 +oper 1 +hash 1 +link 1 +pointer 1 +dynam 1 +memori 1 +alloc 1 +sort 1 +analysi 1 +algorithm 1 +basic 1 +recurs 1 +focu 1 +appendix 1 +skrentni 1 +wisc 1 +offic 1 +scienc 1 +hour 1 +teach 1 +assist 1 +baicheng 1 +billi 1 +liao 1 +bail 1 +cheng 1 +jiacheng 1 +pmcopyright 1 +jame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..0a00be0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,548 @@ +lectur 1 +introduct 1 +data 1 +structuresfal 1 +cours 1 +email 1 +address 1 +wisc 1 +home 1 +page 1 +http 1 +htmlinstructor 1 +yanni 1 +ioannidi 1 +offic 1 +comput 1 +sciencesoffic 1 +hour 1 +tuesdai 1 +thursdai 1 +amoffic 1 +phone 1 +html 1 +content 1 +new 1 +teach 1 +assist 1 +inform 1 +languag 1 +text 1 +grade 1 +exam 1 +schedul 1 +assign 1 +program 1 +late 1 +polici 1 +cheat 1 +help 1 +style 1 +extern 1 +document 1 +intern 1 +us 1 +unix 1 +develop 1 +cycl 1 +newsassign 1 +readi 1 +midterm 1 +statisticssom 1 +interest 1 +statist 1 +section 1 +median 1 +mean 1 +midterma 1 +sampl 1 +oldmidterm 1 +avail 1 +prepar 1 +ownmidterm 1 +note 1 +notat 1 +binari 1 +searchth 1 +search 1 +want 1 +print 1 +either 1 +open 1 +filemenu 1 +ghostview 1 +window 1 +show 1 +andchoos 1 +menu 1 +item 1 +women 1 +sciencesom 1 +femal 1 +faculti 1 +graduat 1 +student 1 +undergradu 1 +haveform 1 +group 1 +call 1 +wic 1 +scienc 1 +oneof 1 +goal 1 +encourag 1 +becomecomput 1 +major 1 +thisclass 1 +would 1 +like 1 +talk 1 +someon 1 +incomput 1 +studi 1 +extra 1 +withtheir 1 +classwork 1 +suzan 1 +computersci 1 +grad 1 +tomak 1 +appoint 1 +mail 1 +stodder 1 +eduand 1 +grow 1 +tremend 1 +field 1 +theodd 1 +ever 1 +write 1 +anoth 1 +thiscours 1 +end 1 +abl 1 +statementi 1 +true 1 +pascal 1 +also 1 +wide 1 +aniniti 1 +startup 1 +period 1 +product 1 +take 1 +exceptionsy 1 +requir 1 +textth 1 +book 1 +isdata 1 +abstract 1 +problem 1 +solv 1 +wall 1 +mirror 1 +frank 1 +carrano 1 +isbn 1 +well 1 +written 1 +cover 1 +materi 1 +includ 1 +separ 1 +notnecessari 1 +often 1 +alwai 1 +follow 1 +fall 1 +david 1 +dewitt 1 +actual 1 +consider 1 +complet 1 +simpl 1 +lecturenot 1 +still 1 +short 1 +isveri 1 +littl 1 +narr 1 +exercis 1 +recommend 1 +addit 1 +sourc 1 +purchas 1 +whichar 1 +doit 1 +desk 1 +near 1 +dayton 1 +street 1 +entranceof 1 +build 1 +first 1 +experi 1 +needsom 1 +activ 1 +account 1 +log 1 +creat 1 +edit 1 +manipul 1 +file 1 +compil 1 +run 1 +debug 1 +handoutc 1 +notesar 1 +contain 1 +find 1 +invalu 1 +mention 1 +althoughi 1 +supplement 1 +handout 1 +courseof 1 +semest 1 +nonetheless 1 +respons 1 +base 1 +onth 1 +read 1 +andth 1 +gradingther 1 +even 1 +final 1 +five 1 +determin 1 +approxim 1 +equal 1 +weight 1 +programmingassign 1 +count 1 +octob 1 +chemistri 1 +wednesdai 1 +decemb 1 +place 1 +list 1 +topic 1 +detail 1 +provid 1 +later 1 +administr 1 +gener 1 +familiar 1 +basic 1 +stuff 1 +function 1 +apoint 1 +record 1 +equival 1 +madison 1 +prerequisitecours 1 +must 1 +done 1 +design 1 +machin 1 +thesear 1 +room 1 +floor 1 +thec 1 +prefer 1 +certainrestrict 1 +univers 1 +emailand 1 +copi 1 +thatyou 1 +turn 1 +electron 1 +youwork 1 +make 1 +provis 1 +download 1 +toyour 1 +sure 1 +runwith 1 +sparcstat 1 +notifi 1 +chang 1 +inassign 1 +hint 1 +assum 1 +allelectron 1 +send 1 +policyno 1 +accept 1 +exactli 1 +order 1 +avoid 1 +caus 1 +load 1 +coincid 1 +duedat 1 +sever 1 +class 1 +simpli 1 +start 1 +right 1 +awai 1 +oneach 1 +thing 1 +certain 1 +wrong 1 +wait 1 +thelast 1 +minut 1 +except 1 +approv 1 +need 1 +good 1 +excus 1 +troubl 1 +soon 1 +possibl 1 +cheatingth 1 +depart 1 +hard 1 +linest 1 +welcom 1 +tocommun 1 +algorithm 1 +datastructur 1 +butther 1 +share 1 +code 1 +expect 1 +learn 1 +understand 1 +obei 1 +thecomput 1 +system 1 +policiesgovern 1 +helpif 1 +work 1 +pleas 1 +know 1 +earli 1 +policiesif 1 +best 1 +tovisit 1 +along 1 +currenthard 1 +intend 1 +time 1 +explain 1 +conceptsthat 1 +present 1 +confus 1 +answer 1 +specif 1 +question 1 +reliabl 1 +contact 1 +respond 1 +emailsever 1 +daili 1 +almost 1 +everi 1 +week 1 +gradingprogram 1 +criteria 1 +correct 1 +behav 1 +correctli 1 +normal 1 +typicalinput 1 +state 1 +projectspecif 1 +clariti 1 +easi 1 +informationabout 1 +robust 1 +behavior 1 +extrem 1 +unusu 1 +situat 1 +handl 1 +reason 1 +andlog 1 +manner 1 +blow 1 +qualiti 1 +test 1 +shoulddemonstr 1 +facet 1 +capabl 1 +includingunusu 1 +case 1 +effici 1 +unnecessarili 1 +ineffici 1 +construct 1 +howev 1 +never 1 +pursu 1 +expens 1 +modular 1 +effect 1 +useof 1 +paramet 1 +incorpor 1 +sort 1 +paper 1 +subject 1 +considerationof 1 +arbitrari 1 +limit 1 +bound 1 +size 1 +orcomplex 1 +input 1 +whenev 1 +necessari 1 +express 1 +definedconst 1 +easili 1 +numer 1 +liter 1 +appear 1 +thosevalu 1 +styleus 1 +meaning 1 +identifi 1 +name 1 +consist 1 +scheme 1 +suggest 1 +convent 1 +variable_nam 1 +function_nam 1 +argument 1 +const 1 +defined_const 1 +enum 1 +enumtyp 1 +valu 1 +classnam 1 +multipl 1 +statement 1 +singl 1 +line 1 +skip 1 +clear 1 +indent 1 +notesfor 1 +continu 1 +loop 1 +label 1 +meaningfulli 1 +documentationthi 1 +long 1 +comment 1 +begin 1 +yourprogram 1 +typic 1 +user 1 +someonewho 1 +superfici 1 +full 1 +give 1 +descript 1 +tell 1 +format 1 +bug 1 +special 1 +featur 1 +assumpt 1 +made 1 +describ 1 +neg 1 +posit 1 +aspect 1 +unawar 1 +descriptionne 1 +repeat 1 +briefli 1 +summar 1 +point 1 +refer 1 +thensuffici 1 +appli 1 +documentationther 1 +four 1 +main 1 +type 1 +header 1 +structuresshould 1 +purpos 1 +outlin 1 +declar 1 +next 1 +variabl 1 +membershould 1 +convei 1 +sname 1 +much 1 +withoutmak 1 +suppli 1 +exampl 1 +index 1 +last 1 +element 1 +ad 1 +stackyou 1 +local 1 +within 1 +segment 1 +tricki 1 +opaqu 1 +beavoid 1 +sometim 1 +commentcan 1 +reader 1 +go 1 +clarifi 1 +level 1 +outlineof 1 +vimani 1 +peopl 1 +thefirst 1 +becomecomfort 1 +particularli 1 +youronli 1 +previou 1 +macpasc 1 +macintosh 1 +strongli 1 +urg 1 +inth 1 +becom 1 +comfort 1 +withunix 1 +pain 1 +wellspent 1 +wish 1 +attend 1 +tutori 1 +held 1 +comp 1 +session 1 +thefollow 1 +dai 1 +tbayou 1 +pick 1 +environ 1 +error 1 +inputfil 1 +outputfil 1 +look 1 +output 1 +break 1 +tire 1 +goto 1 +quit 1 +result 1 +submiss 1 +instruct 1 +given 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..9eee41de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,284 @@ +introduct 1 +data 1 +structur 1 +http 1 +wisc 1 +html 1 +revis 1 +fall 1 +jame 1 +larusinstructor 1 +laruslaru 1 +comput 1 +scienc 1 +laru 1 +offic 1 +hour 1 +tuesdai 1 +fridai 1 +amcontentsteach 1 +assistantstextlectur 1 +informationelectron 1 +mailth 1 +languagegradingexamscours 1 +scheduleassign 1 +assign 1 +program 1 +assignmentscours 1 +objectivesc 1 +object 1 +present 1 +concept 1 +gener 1 +wide 1 +us 1 +detail 1 +fundament 1 +build 1 +block 1 +cours 1 +abl 1 +identifi 1 +situat 1 +necessari 1 +determin 1 +requir 1 +select 1 +appropri 1 +cover 1 +reiter 1 +abstract 1 +type 1 +modular 1 +principl 1 +introduc 1 +essenti 1 +write 1 +clear 1 +correct 1 +maintain 1 +softwar 1 +close 1 +connect 1 +place 1 +strong 1 +emphasi 1 +appli 1 +exercis 1 +teach 1 +assistantswei 1 +zhang 1 +chin 1 +tang 1 +assist 1 +forthi 1 +section 1 +grade 1 +homework 1 +assignmentsand 1 +happi 1 +answer 1 +question 1 +theassign 1 +aspect 1 +give 1 +troubl 1 +zhangoffic 1 +compuer 1 +sciencesoffic 1 +wednesdai 1 +thursdai 1 +sundai 1 +phone 1 +email 1 +address 1 +weiz 1 +mondai 1 +amoffic 1 +cchin 1 +home 1 +page 1 +textth 1 +text 1 +book 1 +problem 1 +solv 1 +wall 1 +mirror 1 +frank 1 +carrano 1 +isbn 1 +well 1 +written 1 +littl 1 +long 1 +wind 1 +materi 1 +also 1 +includ 1 +background 1 +separ 1 +languag 1 +lectur 1 +often 1 +alwai 1 +follow 1 +david 1 +dewitt 1 +note 1 +complet 1 +simpl 1 +short 1 +true 1 +contain 1 +narr 1 +basi 1 +feel 1 +free 1 +skip 1 +portion 1 +addit 1 +want 1 +purchas 1 +avail 1 +doit 1 +document 1 +desk 1 +dayton 1 +street 1 +entranc 1 +first 1 +experi 1 +unix 1 +need 1 +inform 1 +activ 1 +account 1 +log 1 +creat 1 +edit 1 +manipul 1 +file 1 +compil 1 +run 1 +debug 1 +handout 1 +crucial 1 +help 1 +psycholog 1 +mention 1 +attend 1 +strongli 1 +recommend 1 +regularli 1 +appear 1 +textbook 1 +exam 1 +needless 1 +respons 1 +base 1 +read 1 +electron 1 +maili 1 +mail 1 +notifi 1 +student 1 +chang 1 +hint 1 +assum 1 +gradingther 1 +even 1 +semest 1 +final 1 +five 1 +approxim 1 +equal 1 +weight 1 +count 1 +taught 1 +must 1 +know 1 +skrentni 1 +larg 1 +complex 1 +unless 1 +difficult 1 +learn 1 +anoth 1 +gdbthere 1 +describ 1 +debugg 1 +chemistri 1 +decemb 1 +schedul 1 +rough 1 +outlin 1 +topic 1 +provid 1 +later 1 +administrationbas 1 +stuff 1 +function 1 +pointer 1 +record 1 +dynam 1 +storagelectur 1 +list 1 +binari 1 +search 1 +notat 1 +advanc 1 +listslectur 1 +stackslectur 1 +queueslectur 1 +hashinglectur 1 +recursionlectur 1 +treesbinari 1 +tree 1 +sort 1 +searchlectur 1 +treesgraphslectur 1 +sortinglectur 1 +tbaassign 1 +absolut 1 +turn 1 +index 1 +card 1 +name 1 +login 1 +nameyear 1 +school 1 +freshman 1 +sophomor 1 +previou 1 +coursesprevi 1 +experiencerec 1 +photograph 1 +pictur 1 +birthdai 1 +girl 1 +scout 1 +trip 1 +summer 1 +color 1 +black 1 +white 1 +size 1 +given 1 +without 1 +photo 1 +byte 1 +fora 1 +bound 1 +integ 1 +sequenc 1 +line 1 +second 1 +databaseof 1 +score 1 +tenni 1 +tournament 1 +produc 1 +aconcord 1 +hash 1 +tabl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..997ed913 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,121 @@ +data 1 +structur 1 +lec 1 +introduct 1 +structureslectur 1 +psychologylectur 1 +psychologycours 1 +inform 1 +announc 1 +read 1 +assign 1 +get 1 +start 1 +help 1 +exam 1 +program 1 +sampl 1 +code 1 +lectur 1 +cours 1 +materi 1 +comput 1 +lab 1 +home 1 +gener 1 +place 1 +recent 1 +first 1 +problem 1 +found 1 +locat 1 +page 1 +last 1 +makeup 1 +done 1 +copi 1 +solut 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +queue 1 +onlin 1 +stack 1 +topic 1 +list 1 +handin 1 +directori 1 +creat 1 +common 1 +error 1 +suggest 1 +addit 1 +welcom 1 +either 1 +vega 1 +work 1 +line 1 +mondai 1 +wednesdai 1 +magic 1 +number 1 +sourc 1 +file 1 +must 1 +abl 1 +compil 1 +otherwis 1 +unusu 1 +look 1 +stale 1 +forget 1 +reload 1 +updat 1 +browser 1 +cach 1 +becom 1 +outdat 1 +attend 1 +unix 1 +tutori 1 +need 1 +time 1 +thur 1 +futur 1 +tree 1 +chapter 1 +skip 1 +simul 1 +overload 1 +oper 1 +hash 1 +tabl 1 +link 1 +pointer 1 +dynam 1 +memori 1 +alloc 1 +sort 1 +search 1 +analysi 1 +algorithm 1 +basic 1 +recurs 1 +focu 1 +appendix 1 +skrentni 1 +wisc 1 +offic 1 +scienc 1 +hour 1 +teach 1 +assist 1 +baicheng 1 +billi 1 +liao 1 +bail 1 +cheng 1 +jiacheng 1 +pmcopyright 1 +jame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..8b6b0369 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,55 @@ +introduct 1 +theoret 1 +comput 1 +scienc 1 +fall 1 +room 1 +lectur 1 +brian 1 +cole 1 +email 1 +wisc 1 +offic 1 +hour 1 +mondai 1 +fridai 1 +teach 1 +assist 1 +david 1 +sundaram 1 +stukel 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +text 1 +languag 1 +theori 1 +john 1 +martin 1 +north 1 +dakota 1 +state 1 +univers 1 +mcgraw 1 +hill 1 +isbn 1 +tent 1 +schedul 1 +includ 1 +exam 1 +inform 1 +clarif 1 +assign 1 +page 1 +grade 1 +polici 1 +written 1 +term 1 +examin 1 +final 1 +archiv 1 +mail 1 +list 1 +home 1 +septemb 1 +madison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..4e108d7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,72 @@ +also 1 +math 1 +stat 1 +fall 1 +linear 1 +programmingfal 1 +schedul 1 +lectur 1 +mechan 1 +engin 1 +open 1 +book 1 +midterm 1 +exam 1 +time 1 +date 1 +thursdai 1 +octob 1 +locat 1 +final 1 +wednesdai 1 +decemb 1 +instructor 1 +olvi 1 +mangasarian 1 +offic 1 +comp 1 +pphone 1 +mail 1 +wisc 1 +hour 1 +semest 1 +teach 1 +assist 1 +telephon 1 +textbook 1 +program 1 +matlab 1 +ferri 1 +preliminari 1 +version 1 +doit 1 +madison 1 +syllabu 1 +cours 1 +overview 1 +inform 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +setup 1 +homework 1 +septemb 1 +novemb 1 +project 1 +sampl 1 +march 1 +solut 1 +mathemat 1 +home 1 +page 1 +relev 1 +site 1 +searchabl 1 +bibliograph 1 +databas 1 +item 1 +link 1 +variou 1 +updat 1 +period 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..2b9f83cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,68 @@ +fall 1 +introduct 1 +program 1 +languag 1 +compilersspr 1 +stori 1 +month 1 +octob 1 +schedul 1 +lectur 1 +tuth 1 +comp 1 +stat 1 +recit 1 +psycholog 1 +instructor 1 +susan 1 +horwitz 1 +offic 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +tuesdai 1 +fridai 1 +appoint 1 +teach 1 +assist 1 +rahul 1 +kapoor 1 +mondai 1 +wednesdai 1 +text 1 +reserv 1 +wendt 1 +librari 1 +compil 1 +principl 1 +techniqu 1 +tool 1 +sethi 1 +ullman 1 +craft 1 +fischer 1 +leblanc 1 +check 1 +regularli 1 +gener 1 +cours 1 +inform 1 +overview 1 +date 1 +assign 1 +exam 1 +grade 1 +includ 1 +late 1 +polici 1 +get 1 +start 1 +read 1 +homework 1 +examin 1 +note 1 +us 1 +email 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..efc08eee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,526 @@ +introduct 1 +oper 1 +system 1 +fall 1 +systemssect 1 +instructormarvin 1 +solomon 1 +offic 1 +comput 1 +sciencesoffic 1 +hour 1 +troffic 1 +phone 1 +email 1 +address 1 +wisc 1 +tarob 1 +mellencamp 1 +mwfoffic 1 +mellen 1 +new 1 +watch 1 +space 1 +latest 1 +updat 1 +answer 1 +midterm 1 +exam 1 +summari 1 +score 1 +avail 1 +detail 1 +breakdown 1 +grade 1 +distributioni 1 +also 1 +specif 1 +forproject 1 +avaiabl 1 +date 1 +project 1 +move 1 +thursdai 1 +typograph 1 +error 1 +note 1 +deadlock 1 +avoid 1 +correct 1 +importantli 1 +arraywa 1 +call 1 +place 1 +other 1 +popular 1 +demand 1 +isavail 1 +look 1 +warn 1 +take 1 +exampl 1 +larg 1 +grain 1 +salt 1 +long 1 +time 1 +courseus 1 +differ 1 +text 1 +cover 1 +topic 1 +order 1 +semest 1 +likelyb 1 +quit 1 +determin 1 +room 1 +comp 1 +octob 1 +discuss 1 +issu 1 +presentedin 1 +class 1 +electron 1 +hand 1 +direct 1 +forprogram 1 +post 1 +procedur 1 +givefork 1 +algorithm 1 +theproject 1 +specificationshould 1 +contain 1 +notifi 1 +page 1 +show 1 +sept 1 +fix 1 +bug 1 +minor 1 +import 1 +first 1 +caus 1 +paragraph 1 +slightli 1 +garbl 1 +thank 1 +jake 1 +dawlei 1 +carr 1 +point 1 +second 1 +line 1 +omit 1 +sampl 1 +code 1 +theprogram 1 +detailssect 1 +creat 1 +threadschedul 1 +start 1 +sched 1 +specifi 1 +correctli 1 +later 1 +section 1 +lipe 1 +zhang 1 +test 1 +data 1 +file 1 +directori 1 +public 1 +srccontain 1 +three 1 +java 1 +read 1 +graph 1 +javacontain 1 +definit 1 +classgraphdescrib 1 +petersoncycl 1 +graphcontain 1 +peterson 1 +shown 1 +mention 1 +initi 1 +placement 1 +fork 1 +notacycl 1 +petersonacycl 1 +acycl 1 +star 1 +topolog 1 +central 1 +philosoph 1 +sharingfork 1 +nine 1 +todd 1 +jenner 1 +typo 1 +number 1 +maxthink 1 +replac 1 +maxeat 1 +onlin 1 +versionha 1 +mistak 1 +thejava 1 +tutori 1 +string 1 +argument 1 +version 1 +substr 1 +argumenti 1 +offset 1 +charactersin 1 +franco 1 +tung 1 +chan 1 +occasion 1 +send 1 +urgent 1 +messag 1 +directli 1 +mail 1 +listof 1 +student 1 +regist 1 +cours 1 +archiv 1 +sent 1 +list 1 +receiv 1 +think 1 +sendmail 1 +request 1 +makefil 1 +copi 1 +work 1 +sourc 1 +rememb 1 +separ 1 +edit 1 +describ 1 +comment 1 +type 1 +maketo 1 +compil 1 +program 1 +make 1 +compilewithout 1 +run 1 +assign 1 +simul 1 +preemptiv 1 +multitask 1 +solari 1 +computershav 1 +ad 1 +tutoriali 1 +finish 1 +go 1 +onthread 1 +find 1 +help 1 +hint 1 +structur 1 +awar 1 +weekli 1 +seminar 1 +systemsand 1 +network 1 +meet 1 +mondai 1 +checkth 1 +colloquia 1 +book 1 +final 1 +theunivers 1 +bookstor 1 +begin 1 +ajava 1 +depart 1 +unix 1 +workstat 1 +must 1 +afil 1 +name 1 +cshrc 1 +local 1 +home 1 +onelin 1 +path 1 +chang 1 +effect 1 +either 1 +localor 1 +simpli 1 +back 1 +readi 1 +orient 1 +session 1 +user 1 +schedul 1 +forth 1 +follow 1 +tue 1 +thur 1 +csmon 1 +cslast 1 +content 1 +lectur 1 +inform 1 +intend 1 +gener 1 +techniqu 1 +usedto 1 +implement 1 +relat 1 +kind 1 +softwar 1 +among 1 +beprocess 1 +manag 1 +creation 1 +synchron 1 +commun 1 +processor 1 +prevent 1 +recoveri 1 +main 1 +memori 1 +virtual 1 +swap 1 +segment 1 +replacementalgorithm 1 +control 1 +disk 1 +input 1 +output 1 +devic 1 +protect 1 +secur 1 +tuesdai 1 +sciencesand 1 +statisticsdiscuss 1 +wednesdai 1 +psychologyth 1 +option 1 +least 1 +primari 1 +focu 1 +includ 1 +thejavaprogram 1 +languag 1 +anyquest 1 +regard 1 +rais 1 +thetext 1 +requir 1 +modern 1 +systemsbi 1 +andrew 1 +tanenbaum 1 +prentic 1 +hall 1 +strongli 1 +recommend 1 +languagebi 1 +arnold 1 +jame 1 +gosl 1 +addison 1 +weslei 1 +refer 1 +lot 1 +addit 1 +materi 1 +collect 1 +fast 1 +access 1 +tutorialth 1 +specificationjava 1 +documentationwatch 1 +spot 1 +link 1 +five 1 +sparcstat 1 +dialect 1 +unixoper 1 +provid 1 +anycomput 1 +programminglanguag 1 +howev 1 +scienc 1 +respons 1 +transfer 1 +requireddata 1 +set 1 +packag 1 +easi 1 +acquaint 1 +exercis 1 +designedto 1 +becom 1 +familiar 1 +environ 1 +subsequ 1 +involveprocess 1 +pair 1 +member 1 +feel 1 +free 1 +anyon 1 +butyou 1 +share 1 +partner 1 +cheat 1 +vigor 1 +punish 1 +enough 1 +said 1 +dateind 1 +entir 1 +havethre 1 +late 1 +daysof 1 +credit 1 +dai 1 +eachof 1 +us 1 +last 1 +choos 1 +sever 1 +favor 1 +congeni 1 +runtim 1 +subscript 1 +null 1 +pointer 1 +uniniti 1 +variabl 1 +except 1 +caught 1 +runtimerath 1 +mysteri 1 +crash 1 +random 1 +behavior 1 +much 1 +easier 1 +char 1 +arrai 1 +garbag 1 +storag 1 +extrem 1 +handi 1 +trendi 1 +faster 1 +histori 1 +mani 1 +reason 1 +grow 1 +littl 1 +withth 1 +byproduct 1 +coursewil 1 +knowledg 1 +market 1 +commod 1 +featur 1 +built 1 +particular 1 +wide 1 +withlanguag 1 +level 1 +support 1 +concurr 1 +thread 1 +monitor 1 +switch 1 +alwaysa 1 +disloc 1 +fortun 1 +excel 1 +resourc 1 +eas 1 +thetransit 1 +amazingli 1 +good 1 +neither 1 +introductori 1 +primer 1 +author 1 +assum 1 +youalreadi 1 +know 1 +manual 1 +although 1 +arefer 1 +manuali 1 +readabl 1 +wayfrom 1 +get 1 +everyth 1 +need 1 +write 1 +sophisticatedprogram 1 +univers 1 +encourag 1 +gather 1 +varieti 1 +ofoth 1 +togeth 1 +niceonlin 1 +tutorialabout 1 +anda 1 +manualfor 1 +standard 1 +librari 1 +count 1 +even 1 +statist 1 +timet 1 +decemb 1 +yourgrad 1 +remain 1 +four 1 +tent 1 +check 1 +frequent 1 +chapter 1 +process 1 +learn 1 +javaoct 1 +synchronizationoct 1 +schedulingoct 1 +schedulingdec 1 +systemsdec 1 +bottom 1 +view 1 +outlin 1 +programm 1 +state 1 +race 1 +condit 1 +semaphor 1 +bound 1 +buffer 1 +problem 1 +dine 1 +terminolog 1 +detect 1 +critic 1 +short 1 +term 1 +alloc 1 +compact 1 +come 1 +eduthu 1 +copyright 1 +marvin 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..b02bfecb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,88 @@ +fall 1 +home 1 +pagec 1 +introduct 1 +oper 1 +systemsfal 1 +tuesdai 1 +thursdai 1 +discuss 1 +fridai 1 +host 1 +maryvernon 1 +instructor 1 +andkarunamuthiah 1 +welcom 1 +page 1 +note 1 +lectur 1 +beinterchang 1 +follow 1 +date 1 +solut 1 +quiz 1 +assign 1 +offic 1 +hour 1 +email 1 +textbook 1 +read 1 +grade 1 +project 1 +quizz 1 +mail 1 +archiveapproxim 1 +schedul 1 +topicsweek 1 +oftopicsreadingsep 1 +concurr 1 +thread 1 +address 1 +space 1 +processeschapt 1 +manag 1 +cooper 1 +threadschapt 1 +synchron 1 +implement 1 +mutual 1 +exclusioncont 1 +semaphorescont 1 +monitor 1 +summarycont 1 +doct 1 +deadlock 1 +process 1 +schedulingchapt 1 +memori 1 +protect 1 +translat 1 +cach 1 +tlbschapter 1 +demand 1 +virtual 1 +memorycont 1 +review 1 +survei 1 +systemschapt 1 +file 1 +system 1 +name 1 +directorieschapt 1 +java 1 +object 1 +core 1 +methodstbanov 1 +secur 1 +thanksgiv 1 +class 1 +network 1 +distribut 1 +remot 1 +procedur 1 +call 1 +chapter 1 +global 1 +reviewchapt 1 +vernon 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..c9d16544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,60 @@ +lectur 1 +home 1 +page 1 +fall 1 +cours 1 +inform 1 +instructor 1 +offic 1 +hour 1 +handout 1 +assign 1 +exam 1 +grade 1 +david 1 +wood 1 +tuesdai 1 +wednesdai 1 +appoint 1 +mail 1 +wisc 1 +phone 1 +class 1 +time 1 +thursdai 1 +locat 1 +phil 1 +atkinson 1 +get 1 +start 1 +help 1 +descript 1 +mentor 1 +error 1 +check 1 +correct 1 +sampl 1 +vhdl 1 +code 1 +compil 1 +simul 1 +mentorassign 1 +answer 1 +question 1 +select 1 +projectthi 1 +section 1 +includ 1 +project 1 +deadlin 1 +report 1 +demonstr 1 +decemb 1 +examsth 1 +midterm 1 +room 1 +final 1 +previou 1 +spring 1 +solut 1 +endterm 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..3fd4541d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,80 @@ +databas 1 +manag 1 +system 1 +design 1 +implementationc 1 +implementationcours 1 +inform 1 +postscript 1 +version 1 +class 1 +fridai 1 +instead 1 +offic 1 +hour 1 +time 1 +assign 1 +chang 1 +text 1 +last 1 +updat 1 +handout 1 +mail 1 +list 1 +solut 1 +chapter 1 +exercis 1 +pleas 1 +dont 1 +print 1 +first 1 +overview 1 +prerequisit 1 +topic 1 +cover 1 +grade 1 +import 1 +date 1 +polici 1 +issu 1 +minibas 1 +home 1 +page 1 +check 1 +detail 1 +html 1 +year 1 +midterm 1 +sampl 1 +us 1 +sybas 1 +info 1 +help 1 +yahoo 1 +entri 1 +resourc 1 +tutori 1 +tree 1 +debugg 1 +languag 1 +construct 1 +experi 1 +assignmentoth 1 +code 1 +convent 1 +instructor 1 +raghu 1 +ramakrishnan 1 +phone 1 +lectur 1 +discuss 1 +place 1 +ingraham 1 +teach 1 +assist 1 +xuemei 1 +xbao 1 +tue 1 +thur 1 +modifi 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..1169898d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,47 @@ +lectur 1 +home 1 +page 1 +welcom 1 +obvious 1 +construct 1 +semest 1 +progress 1 +addinginform 1 +need 1 +know 1 +import 1 +thing 1 +class 1 +meetingroom 1 +chang 1 +current 1 +meet 1 +russel 1 +labsfor 1 +option 1 +discuss 1 +fridai 1 +beenmov 1 +still 1 +psycholog 1 +instructor 1 +jeff 1 +naughton 1 +offic 1 +wednesdai 1 +time 1 +place 1 +lab 1 +inform 1 +taught 1 +close 1 +cooper 1 +fact 1 +assign 1 +probabl 1 +exam 1 +gener 1 +minibas 1 +particular 1 +pleas 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..8cee53f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,33 @@ +introduct 1 +algorithm 1 +cours 1 +inform 1 +instructor 1 +eric 1 +bach 1 +offic 1 +phone 1 +mail 1 +wisc 1 +hour 1 +appt 1 +teach 1 +assist 1 +bill 1 +donaldson 1 +raji 1 +gopalakrishnan 1 +midterm 1 +exam 1 +handout 1 +descript 1 +syllabu 1 +book 1 +reserv 1 +organ 1 +homework 1 +solut 1 +graph 1 +fractal 1 +behaviour 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..7104be3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,112 @@ +comput 1 +network 1 +cours 1 +professor 1 +landweb 1 +introduct 1 +advanc 1 +networksintroduct 1 +tabl 1 +content 1 +intern 1 +connect 1 +offer 1 +inform 1 +instructor 1 +teach 1 +assist 1 +syllabu 1 +mail 1 +archiv 1 +assign 1 +program 1 +refer 1 +select 1 +readingsclick 1 +hereto 1 +latest 1 +text 1 +version 1 +networkingcours 1 +madisoncours 1 +informationlecturetim 1 +mwfplace 1 +comp 1 +statclass 1 +email 1 +listinstructor 1 +lawrenc 1 +landweberoffic 1 +statphon 1 +wisc 1 +eduoffic 1 +hour 1 +srinivasa 1 +narayananoffic 1 +phone 1 +mondai 1 +wednesdai 1 +time 1 +conveni 1 +feel 1 +free 1 +appoint 1 +teitelbaumoffic 1 +naemail 1 +tuesdai 1 +thursdai 1 +fall 1 +moder 1 +complet 1 +error 1 +warn 1 +code 1 +class 1 +project 1 +implement 1 +layer 1 +reliabl 1 +adapt 1 +handout 1 +postscript 1 +overview 1 +slide 1 +powerpoint 1 +softwar 1 +engin 1 +design 1 +document 1 +evalu 1 +form 1 +html 1 +pictur 1 +grade 1 +criteria 1 +gradingmidterm 1 +exam 1 +final 1 +term 1 +prior 1 +midterm 1 +option 1 +book 1 +unix 1 +steven 1 +richard 1 +prentic 1 +hall 1 +isbn 1 +socket 1 +interfac 1 +lectur 1 +garbler 1 +packag 1 +annot 1 +bibliographyread 1 +partial 1 +icmp 1 +ospf 1 +ipng 1 +schedul 1 +spring 1 +review 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..977833f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,21 @@ +home 1 +page 1 +fall 1 +instructor 1 +robert 1 +meyer 1 +wisc 1 +time 1 +place 1 +comp 1 +offic 1 +hour 1 +cours 1 +descript 1 +homework 1 +solut 1 +note 1 +comput 1 +project 1 +part 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..4105ec9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,92 @@ +fall 1 +also 1 +math 1 +stat 1 +nonlinear 1 +program 1 +theori 1 +applicationsfal 1 +schedul 1 +lectur 1 +cours 1 +mail 1 +list 1 +wisc 1 +http 1 +instructor 1 +michael 1 +ferri 1 +offic 1 +telephon 1 +hour 1 +mondai 1 +tuesdai 1 +wednesdai 1 +teach 1 +assist 1 +thursdai 1 +class 1 +text 1 +olvi 1 +mangasarian 1 +siam 1 +publish 1 +philadelphia 1 +us 1 +algorithm 1 +bazaraa 1 +sherali 1 +shetti 1 +second 1 +edit 1 +wilei 1 +york 1 +bertseka 1 +athena 1 +scientif 1 +gener 1 +inform 1 +overview 1 +introduct 1 +linear 1 +inequ 1 +theorem 1 +altern 1 +convex 1 +set 1 +concav 1 +function 1 +saddlepoint 1 +optim 1 +criteria 1 +without 1 +differenti 1 +first 1 +order 1 +dualiti 1 +condit 1 +exact 1 +penalti 1 +augment 1 +lagrangian 1 +gradient 1 +project 1 +book 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +assign 1 +grade 1 +homework 1 +week 1 +midterm 1 +examin 1 +novemb 1 +final 1 +mathemat 1 +home 1 +page 1 +updat 1 +period 1 +semest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..4105ec9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,92 @@ +fall 1 +also 1 +math 1 +stat 1 +nonlinear 1 +program 1 +theori 1 +applicationsfal 1 +schedul 1 +lectur 1 +cours 1 +mail 1 +list 1 +wisc 1 +http 1 +instructor 1 +michael 1 +ferri 1 +offic 1 +telephon 1 +hour 1 +mondai 1 +tuesdai 1 +wednesdai 1 +teach 1 +assist 1 +thursdai 1 +class 1 +text 1 +olvi 1 +mangasarian 1 +siam 1 +publish 1 +philadelphia 1 +us 1 +algorithm 1 +bazaraa 1 +sherali 1 +shetti 1 +second 1 +edit 1 +wilei 1 +york 1 +bertseka 1 +athena 1 +scientif 1 +gener 1 +inform 1 +overview 1 +introduct 1 +linear 1 +inequ 1 +theorem 1 +altern 1 +convex 1 +set 1 +concav 1 +function 1 +saddlepoint 1 +optim 1 +criteria 1 +without 1 +differenti 1 +first 1 +order 1 +dualiti 1 +condit 1 +exact 1 +penalti 1 +augment 1 +lagrangian 1 +gradient 1 +project 1 +book 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +assign 1 +grade 1 +homework 1 +week 1 +midterm 1 +examin 1 +novemb 1 +final 1 +mathemat 1 +home 1 +page 1 +updat 1 +period 1 +semest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..023f0de2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,59 @@ +home 1 +page 1 +comput 1 +system 1 +perform 1 +evalu 1 +model 1 +new 1 +sept 1 +assign 1 +postscript 1 +text 1 +mimic 1 +librari 1 +avail 1 +public 1 +cours 1 +inform 1 +lectur 1 +scienc 1 +devis 1 +softwar 1 +html 1 +user 1 +manual 1 +pleas 1 +print 1 +file 1 +contain 1 +mani 1 +imag 1 +take 1 +least 1 +half 1 +hour 1 +initi 1 +instruct 1 +tutori 1 +onlin 1 +help 1 +qnet 1 +exampl 1 +devc 1 +professor 1 +miron 1 +livni 1 +offic 1 +phone 1 +mail 1 +wisc 1 +teach 1 +assist 1 +chee 1 +yong 1 +chan 1 +cychan 1 +suggest 1 +comment 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..35bfb199 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,78 @@ +find 1 +uwisc 1 +inform 1 +retriev 1 +technolog 1 +seek 1 +knowledgerichard 1 +belewvisit 1 +professorc 1 +lectur 1 +univ 1 +wisconsin 1 +comput 1 +scienc 1 +departmentfal 1 +thur 1 +acal 1 +room 1 +engrthi 1 +cours 1 +design 1 +student 1 +interest 1 +understand 1 +aboutth 1 +knowledg 1 +represent 1 +machinelearn 1 +techniqu 1 +underli 1 +much 1 +excit 1 +activ 1 +occur 1 +onth 1 +world 1 +wide 1 +complet 1 +descript 1 +coures 1 +canse 1 +abstract 1 +asyllabu 1 +major 1 +topic 1 +consid 1 +graphic 1 +mapof 1 +thesear 1 +relat 1 +anda 1 +tent 1 +schedul 1 +semesterwil 1 +proce 1 +resourc 1 +read 1 +overview 1 +part 1 +postscript 1 +polit 1 +infidel 1 +imag 1 +assign 1 +class 1 +email 1 +digest 1 +hypermai 1 +suggest 1 +compos 1 +classrel 1 +minut 1 +taken 1 +last 1 +modifi 1 +belew 1 +wisc 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..1b5ad4f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,132 @@ +introduct 1 +numer 1 +method 1 +last 1 +chang 1 +methodsthi 1 +page 1 +contain 1 +inform 1 +fall 1 +cours 1 +smile 1 +tent 1 +syllabu 1 +order 1 +score 1 +orderli 1 +mind 1 +addit 1 +materi 1 +note 1 +cooper 1 +assignmentson 1 +assign 1 +total 1 +midterm 1 +date 1 +well 1 +slightli 1 +point 1 +problem 1 +diari 1 +class 1 +residu 1 +error 1 +condit 1 +rick 1 +carl 1 +offic 1 +hour 1 +list 1 +errata 1 +text 1 +us 1 +email 1 +concern 1 +updat 1 +sinc 1 +question 1 +comput 1 +complex 1 +numericalanalysi 1 +algorithm 1 +post 1 +preprint 1 +foremostmathematician 1 +todai 1 +subject 1 +interest 1 +trickytop 1 +least 1 +squar 1 +solut 1 +approxim 1 +time 1 +place 1 +also 1 +textmai 1 +supplement 1 +byaddit 1 +file 1 +mention 1 +book 1 +areavail 1 +organ 1 +chapter 1 +awar 1 +though 1 +site 1 +name 1 +begin 1 +capitallett 1 +sometim 1 +lower 1 +caselett 1 +matlab 1 +session 1 +present 1 +plan 1 +rather 1 +fortran 1 +kermit 1 +sigmon 1 +primer 1 +edit 1 +avail 1 +doit 1 +look 1 +handout 1 +student 1 +reaction 1 +access 1 +telnet 1 +winor 1 +machin 1 +overviewcours 1 +answer 1 +word 1 +grade 1 +four 1 +digit 1 +current 1 +conduct 1 +unix 1 +orient 1 +user 1 +andp 1 +relat 1 +linksyou 1 +might 1 +wish 1 +explor 1 +csdepart 1 +home 1 +system 1 +frequent 1 +ask 1 +simpl 1 +tutori 1 +advanc 1 +referenceviva 1 +good 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..54d355dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,55 @@ +numer 1 +function 1 +analysi 1 +last 1 +chang 1 +analysisthi 1 +page 1 +contain 1 +inform 1 +fall 1 +version 1 +math 1 +current 1 +class 1 +note 1 +avail 1 +follow 1 +directori 1 +well 1 +hard 1 +copi 1 +doit 1 +recent 1 +announc 1 +post 1 +grade 1 +time 1 +locat 1 +statlectur 1 +carl 1 +boor 1 +email 1 +deboor 1 +wisc 1 +offic 1 +hour 1 +stat 1 +line 1 +classnot 1 +viii 1 +index 1 +assign 1 +none 1 +concern 1 +homework 1 +cours 1 +relat 1 +question 1 +linksyou 1 +might 1 +wish 1 +explor 1 +depart 1 +home 1 +courseoff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..21266501 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,13 @@ +approxim 1 +theori 1 +last 1 +chang 1 +theorythi 1 +page 1 +contain 1 +inform 1 +spring 1 +version 1 +math 1 +cours 1 +note 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..bca886d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,65 @@ +home 1 +page 1 +toni 1 +silva 1 +sectioncsm 1 +instructor 1 +contact 1 +email 1 +dsilva 1 +wisc 1 +offic 1 +comput 1 +scienc 1 +statist 1 +phone 1 +hour 1 +appoint 1 +textbookproblem 1 +solv 1 +object 1 +program 1 +walter 1 +savitch 1 +section 1 +inform 1 +sept 1 +comp 1 +stat 1 +firstdai 1 +noland 1 +specifi 1 +timet 1 +chamberlin 1 +cours 1 +handout 1 +tent 1 +syllabu 1 +semest 1 +late 1 +polici 1 +grade 1 +criteria 1 +academ 1 +misconduct 1 +import 1 +softwar 1 +introduct 1 +microsoft 1 +windowshint 1 +window 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +text 1 +assign 1 +tuesdai 1 +thursdai 1 +solut 1 +quizz 1 +quiz 1 +last 1 +modifi 1 +anthoni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..c046ea57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,432 @@ +comput 1 +visionc 1 +visionfal 1 +instructor 1 +chuck 1 +dyeroffic 1 +csstelephon 1 +email 1 +dyer 1 +wisc 1 +eduoffic 1 +hour 1 +mondai 1 +thursdai 1 +appointmentteach 1 +assist 1 +bryan 1 +sooffic 1 +wednesdai 1 +fridai 1 +appointmentstud 1 +gener 1 +cours 1 +informationfundament 1 +vision 1 +first 1 +introduct 1 +level 1 +imag 1 +analysi 1 +method 1 +includ 1 +format 1 +edg 1 +detect 1 +featuredetect 1 +segment 1 +principl 1 +defin 1 +modul 1 +forreconstruct 1 +three 1 +dimension 1 +scene 1 +inform 1 +usingtechniqu 1 +asshap 1 +shade 1 +depth 1 +stereo 1 +activ 1 +recoveri 1 +focu 1 +andocclud 1 +contour 1 +viewpoint 1 +control 1 +motion 1 +track 1 +model 1 +base 1 +object 1 +recognit 1 +schedul 1 +tuesdai 1 +prerequisit 1 +fundament 1 +calculu 1 +probabl 1 +theori 1 +linear 1 +algebra 1 +grade 1 +midterm 1 +exam 1 +novemb 1 +homework 1 +assign 1 +project 1 +class 1 +particip 1 +syllabu 1 +requir 1 +read 1 +select 1 +part 1 +machin 1 +jain 1 +kasturi 1 +schunck 1 +mcgraw 1 +hill 1 +york 1 +collect 1 +readingsfrom 1 +journal 1 +confer 1 +proceed 1 +sold 1 +doit 1 +document 1 +small 1 +batchessupplementari 1 +sourcesonlin 1 +informationmost 1 +avail 1 +onlin 1 +urlhttp 1 +html 1 +date 1 +chapter 1 +paper 1 +handout 1 +except 1 +primarili 1 +student 1 +score 1 +assignmentshomework 1 +enhanc 1 +histogram 1 +modif 1 +option 1 +make 1 +copi 1 +portrait 1 +public 1 +contrast 1 +face 1 +byfirst 1 +rotat 1 +crop 1 +window 1 +around 1 +head 1 +shoulder 1 +final 1 +interact 1 +adjust 1 +theintens 1 +function 1 +color 1 +editor 1 +thewindow 1 +button 1 +also 1 +free 1 +modifi 1 +thing 1 +colorif 1 +wish 1 +found 1 +good 1 +grayscal 1 +transformationsav 1 +result 1 +andput 1 +directori 1 +whereth 1 +origin 1 +send 1 +tell 1 +qualit 1 +whatintens 1 +transform 1 +appli 1 +improv 1 +qualityof 1 +overal 1 +photo 1 +board 1 +feel 1 +ownweb 1 +home 1 +page 1 +well 1 +skeleton 1 +octob 1 +learn 1 +get 1 +start 1 +vista 1 +softwar 1 +program 1 +manual 1 +correct 1 +chang 1 +condit 1 +least 1 +instead 1 +prevent 1 +type 1 +shape 1 +disappear 1 +altogeth 1 +count 1 +transit 1 +case 1 +matrix 1 +citi 1 +block 1 +distanc 1 +infin 1 +larg 1 +constant 1 +four 1 +corner 1 +chessboard 1 +center 1 +posit 1 +test 1 +us 1 +hand 1 +evalu 1 +thin 1 +might 1 +want 1 +follow 1 +addit 1 +experi 1 +output 1 +convert 1 +ubyt 1 +vconvert 1 +edit 1 +need 1 +emac 1 +clean 1 +header 1 +file 1 +contain 1 +line 1 +right 1 +repn 1 +component_interp 1 +gradient 1 +low_threshold 1 +high_threshold 1 +vlink 1 +vsegedg 1 +exampl 1 +approach 1 +determin 1 +direct 1 +index 1 +finger 1 +point 1 +applic 1 +note 1 +fact 1 +delet 1 +entir 1 +surround 1 +algorithm 1 +zhang 1 +suen 1 +fast 1 +parallel 1 +digit 1 +pattern 1 +comm 1 +wang 1 +comment 1 +comparison 1 +differ 1 +version 1 +spline 1 +mosaic 1 +adelson 1 +pyramid 1 +process 1 +engin 1 +burt 1 +laplacian 1 +compact 1 +code 1 +ieee 1 +tran 1 +multiresolut 1 +graphic 1 +hint 1 +faq 1 +produc 1 +snake 1 +kass 1 +witkin 1 +terzopoulo 1 +william 1 +shah 1 +curvatur 1 +estim 1 +understand 1 +decemb 1 +titl 1 +abstract 1 +supplementari 1 +help 1 +topic 1 +done 1 +stanford 1 +account 1 +sparcstat 1 +call 1 +room 1 +disk 1 +space 1 +quota 1 +store 1 +sure 1 +compress 1 +other 1 +gzip 1 +howev 1 +order 1 +save 1 +sent 1 +list 1 +goe 1 +everyon 1 +printer 1 +print 1 +laserprint 1 +laser 1 +locat 1 +altern 1 +name 1 +shortest 1 +queue 1 +caution 1 +check 1 +job 1 +manner 1 +take 1 +long 1 +consider 1 +environ 1 +tool 1 +execut 1 +displai 1 +system 1 +varieti 1 +imgstar 1 +basic 1 +oper 1 +invok 1 +unix 1 +like 1 +command 1 +khoro 1 +develop 1 +provid 1 +languag 1 +interfac 1 +rapid 1 +prototyp 1 +simpl 1 +cantata 1 +netpbm 1 +toolkit 1 +convers 1 +pbmplu 1 +packag 1 +matlab 1 +numer 1 +visual 1 +signal 1 +toolbox 1 +especi 1 +relev 1 +although 1 +databas 1 +access 1 +examin 1 +solut 1 +held 1 +regular 1 +classroom 1 +earli 1 +time 1 +cover 1 +textbook 1 +bring 1 +sheet 1 +side 1 +main 1 +idea 1 +proof 1 +question 1 +ask 1 +spring 1 +link 1 +interest 1 +highli 1 +recommend 1 +wandel 1 +number 1 +scienc 1 +hdtv 1 +grand 1 +allianc 1 +specif 1 +advanc 1 +televis 1 +committe 1 +atsc 1 +postscript 1 +spie 1 +optic 1 +librari 1 +demo 1 +appl 1 +quicktim 1 +product 1 +panoramix 1 +panoram 1 +decfac 1 +talk 1 +synthet 1 +video 1 +rate 1 +virtual 1 +realiti 1 +qbic 1 +miscellan 1 +relat 1 +boston 1 +univers 1 +cardiff 1 +royal 1 +institut 1 +sweden 1 +virginia 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..a7163db5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,67 @@ +fall 1 +section 1 +algebra 1 +languag 1 +program 1 +instructor 1 +dave 1 +zimmermannemail 1 +dzimm 1 +wisc 1 +educlass 1 +meet 1 +time 1 +place 1 +nolandoffic 1 +offic 1 +phone 1 +hour 1 +announcementsprogram 1 +wednesdai 1 +octob 1 +readi 1 +fridai 1 +novemb 1 +gener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuswork 1 +homeclass 1 +handoutsprogramsexam 1 +quizzeslectur 1 +notesgreg 1 +sharp 1 +style 1 +guidegrad 1 +referenc 1 +last 1 +digit 1 +number 1 +quizzesprogramsexam 1 +polici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +policytext 1 +problem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +zimmermann 1 +base 1 +greg 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..7041a5db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,31 @@ +linear 1 +program 1 +method 1 +gener 1 +cours 1 +inform 1 +offer 1 +fall 1 +spring 1 +semest 1 +page 1 +variou 1 +instructor 1 +michael 1 +ferri 1 +mangasarian 1 +graduat 1 +wisconsin 1 +network 1 +flow 1 +integ 1 +nonlinear 1 +theori 1 +algorithm 1 +comput 1 +larg 1 +spars 1 +system 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..94f4e7b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,115 @@ +spring 1 +network 1 +flowsspr 1 +schedul 1 +lectur 1 +cours 1 +mail 1 +list 1 +wisc 1 +class 1 +fridai 1 +februari 1 +instructor 1 +michael 1 +ferri 1 +offic 1 +telephon 1 +hour 1 +mondai 1 +wednesdai 1 +teach 1 +assist 1 +leei 1 +tuesdai 1 +thursdai 1 +requir 1 +text 1 +flow 1 +ravindra 1 +ahuja 1 +thoma 1 +magnanti 1 +jame 1 +orlin 1 +prentic 1 +hall 1 +us 1 +linear 1 +program 1 +chvatal 1 +freeman 1 +optim 1 +bertseka 1 +press 1 +gener 1 +inform 1 +overview 1 +path 1 +tree 1 +cycl 1 +data 1 +structur 1 +shortest 1 +cost 1 +simplex 1 +method 1 +convex 1 +equilibria 1 +lagrangian 1 +relax 1 +multicommod 1 +applic 1 +prerequisit 1 +knowledg 1 +grade 1 +homework 1 +assign 1 +project 1 +final 1 +examin 1 +close 1 +book 1 +except 1 +sheet 1 +paper 1 +allow 1 +repres 1 +question 1 +march 1 +april 1 +comput 1 +unix 1 +orient 1 +session 1 +first 1 +time 1 +user 1 +novic 1 +previous 1 +workstat 1 +held 1 +week 1 +room 1 +second 1 +last 1 +minut 1 +introduct 1 +login 1 +access 1 +gam 1 +public 1 +cshrc 1 +local 1 +sourc 1 +alter 1 +set 1 +directori 1 +appropri 1 +solari 1 +machin 1 +mathemat 1 +home 1 +page 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..ff416359 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,128 @@ +spring 1 +also 1 +math 1 +comput 1 +method 1 +larg 1 +spars 1 +systemsspr 1 +schedul 1 +lectur 1 +cours 1 +mail 1 +list 1 +wisc 1 +class 1 +fridai 1 +februari 1 +instructor 1 +michael 1 +ferri 1 +offic 1 +telephon 1 +hour 1 +mondai 1 +wednesdai 1 +teach 1 +assist 1 +leei 1 +tuesdai 1 +thursdai 1 +recommend 1 +textbook 1 +matrix 1 +golub 1 +loan 1 +john 1 +hopkinsunivers 1 +press 1 +second 1 +edit 1 +direct 1 +matric 1 +duff 1 +erisman 1 +reid 1 +oxford 1 +scienc 1 +public 1 +finit 1 +dimension 1 +vector 1 +space 1 +halmo 1 +springer 1 +verlag 1 +gener 1 +inform 1 +overview 1 +introduct 1 +storag 1 +scheme 1 +gaussian 1 +elimin 1 +dens 1 +error 1 +analysi 1 +local 1 +pivot 1 +strategi 1 +modif 1 +iter 1 +linear 1 +solver 1 +least 1 +squar 1 +nonlinear 1 +equat 1 +optim 1 +applic 1 +parallel 1 +techniqu 1 +eigenvalu 1 +eigenvector 1 +prerequisit 1 +consent 1 +grade 1 +homework 1 +assign 1 +project 1 +final 1 +examin 1 +close 1 +book 1 +except 1 +sheet 1 +paper 1 +allow 1 +repres 1 +question 1 +march 1 +april 1 +handout 1 +ieee 1 +arithmet 1 +machin 1 +unix 1 +orient 1 +session 1 +first 1 +time 1 +user 1 +novic 1 +previous 1 +us 1 +workstat 1 +held 1 +week 1 +room 1 +last 1 +minut 1 +instruct 1 +matlab 1 +mathemat 1 +program 1 +home 1 +page 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..d0612cd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,56 @@ +fall 1 +construct 1 +compilersfal 1 +schedul 1 +lectur 1 +tuth 1 +csst 1 +instructor 1 +charl 1 +fischer 1 +offic 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +mondai 1 +wednesdai 1 +fridai 1 +appoint 1 +teach 1 +assist 1 +krishna 1 +kunchithapadam 1 +krisna 1 +tuesdai 1 +thursdai 1 +program 1 +assign 1 +homework 1 +read 1 +weyer 1 +class 1 +text 1 +craft 1 +compil 1 +richard 1 +leblanc 1 +benjamin 1 +cum 1 +check 1 +regularli 1 +gener 1 +cours 1 +inform 1 +overview 1 +date 1 +grade 1 +examin 1 +get 1 +start 1 +handout 1 +note 1 +us 1 +tool 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..e515cfcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,57 @@ +fall 1 +section 1 +algebra 1 +languag 1 +program 1 +instructor 1 +greg 1 +sharpemail 1 +wisc 1 +eduoffic 1 +offic 1 +phone 1 +hour 1 +appt 1 +grader 1 +krishna 1 +kunchithapadamemail 1 +krisna 1 +edugener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuscours 1 +difficultywork 1 +homenewsstartup 1 +informationclass 1 +noteshomeworkexam 1 +quizzesstyl 1 +guideemail 1 +archivepolici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +polici 1 +must 1 +read 1 +textproblem 1 +solv 1 +object 1 +porgrammingwalt 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +pleas 1 +list 1 +known 1 +erratalast 1 +modifi 1 +sharpgreg 1 +http 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..1dd47f2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,159 @@ +class 1 +home 1 +page 1 +fall 1 +algebra 1 +languag 1 +program 1 +section 1 +chad 1 +lane 1 +wisc 1 +announc 1 +line 1 +tue 1 +import 1 +need 1 +version 1 +tribbl 1 +compil 1 +problem 1 +enumer 1 +type 1 +recogn 1 +outsid 1 +must 1 +declar 1 +insid 1 +public 1 +result 1 +privat 1 +bump 1 +bottom 1 +shown 1 +also 1 +note 1 +definit 1 +randomintinrang 1 +defin 1 +correspond 1 +function 1 +bodi 1 +call 1 +within 1 +work 1 +file 1 +prog 1 +forgot 1 +chang 1 +valu 1 +uppercas 1 +everyth 1 +copi 1 +want 1 +overwrit 1 +obsolet 1 +us 1 +includ 1 +might 1 +check 1 +help 1 +link 1 +someth 1 +ad 1 +comment 1 +suggest 1 +name 1 +throughout 1 +project 1 +part 1 +directori 1 +updat 1 +descript 1 +consist 1 +pleas 1 +make 1 +sure 1 +discrep 1 +sampl 1 +chri 1 +weaver 1 +shouldn 1 +matter 1 +readi 1 +crucial 1 +read 1 +entir 1 +assign 1 +understand 1 +basic 1 +attempt 1 +earli 1 +start 1 +hard 1 +requir 1 +time 1 +piec 1 +togeth 1 +bring 1 +question 1 +tuesdai 1 +midterm 1 +grade 1 +freshmen 1 +either 1 +mean 1 +fine 1 +great 1 +thumb 1 +noth 1 +freshman 1 +disregard 1 +stuff 1 +tent 1 +semest 1 +syllabu 1 +handout 1 +prepar 1 +quizz 1 +test 1 +solutionscours 1 +inform 1 +polici 1 +text 1 +solv 1 +object 1 +walter 1 +savitch 1 +addison 1 +weslei 1 +publish 1 +compani 1 +meet 1 +vleck 1 +administr 1 +late 1 +mail 1 +attend 1 +academ 1 +misconduct 1 +introduct 1 +microsoft 1 +window 1 +first 1 +borland 1 +second 1 +vectra 1 +sourc 1 +code 1 +consult 1 +extra 1 +refer 1 +materi 1 +mani 1 +answer 1 +sharp 1 +lectur 1 +style 1 +guidelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..2b9f83cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,68 @@ +fall 1 +introduct 1 +program 1 +languag 1 +compilersspr 1 +stori 1 +month 1 +octob 1 +schedul 1 +lectur 1 +tuth 1 +comp 1 +stat 1 +recit 1 +psycholog 1 +instructor 1 +susan 1 +horwitz 1 +offic 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +tuesdai 1 +fridai 1 +appoint 1 +teach 1 +assist 1 +rahul 1 +kapoor 1 +mondai 1 +wednesdai 1 +text 1 +reserv 1 +wendt 1 +librari 1 +compil 1 +principl 1 +techniqu 1 +tool 1 +sethi 1 +ullman 1 +craft 1 +fischer 1 +leblanc 1 +check 1 +regularli 1 +gener 1 +cours 1 +inform 1 +overview 1 +date 1 +assign 1 +exam 1 +grade 1 +includ 1 +late 1 +polici 1 +get 1 +start 1 +read 1 +homework 1 +examin 1 +note 1 +us 1 +email 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..ecefb261 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,51 @@ +home 1 +page 1 +hummert 1 +sectionsc 1 +instructor 1 +contact 1 +email 1 +wisc 1 +offic 1 +comput 1 +scienc 1 +statist 1 +phone 1 +hour 1 +mondai 1 +thursdai 1 +announc 1 +textbookproblem 1 +solv 1 +object 1 +program 1 +walter 1 +savitch 1 +section 1 +inform 1 +psych 1 +grade 1 +cours 1 +handout 1 +tent 1 +syllabu 1 +semest 1 +late 1 +polici 1 +criteria 1 +academ 1 +misconduct 1 +viewgraph 1 +import 1 +softwar 1 +introduct 1 +microsoft 1 +windowshint 1 +window 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +text 1 +assign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..cbd7d05a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,43 @@ +infoc 1 +info 1 +section 1 +name 1 +bodnersect 1 +offic 1 +hour 1 +mondai 1 +thursdai 1 +number 1 +comput 1 +scienc 1 +statist 1 +hall 1 +doit 1 +phone 1 +mail 1 +jonb 1 +wisc 1 +eduher 1 +thing 1 +keep 1 +mind 1 +need 1 +copi 1 +guid 1 +click 1 +choos 1 +print 1 +file 1 +menu 1 +question 1 +pleas 1 +stop 1 +send 1 +grade 1 +avail 1 +bodner 1 +mound 1 +madison 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..a3dd5428 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,139 @@ +intro 1 +kunen 1 +section 1 +introduct 1 +artifici 1 +intellig 1 +notic 1 +inform 1 +spring 1 +detail 1 +coursewil 1 +appear 1 +later 1 +instructor 1 +kunenoffic 1 +stat 1 +buildingtelephon 1 +email 1 +wisc 1 +eduoffic 1 +hour 1 +appoint 1 +grade 1 +four 1 +program 1 +assign 1 +count 1 +three 1 +exam 1 +thirdexam 1 +schedul 1 +time 1 +place 1 +final 1 +turn 1 +midnight 1 +thedai 1 +late 1 +loos 1 +topic 1 +cover 1 +follow 1 +order 1 +entir 1 +logic 1 +buti 1 +design 1 +assignmenti 1 +discuss 1 +lisp 1 +search 1 +game 1 +plai 1 +prolog 1 +natur 1 +languag 1 +understand 1 +learn 1 +neural 1 +network 1 +deduct 1 +plan 1 +reason 1 +uncertain 1 +knowledg 1 +sinc 1 +us 1 +begin 1 +common 1 +would 1 +probabl 1 +usefulto 1 +refer 1 +avail 1 +supplement 1 +lecturesand 1 +line 1 +help 1 +within 1 +manypaperback 1 +like 1 +lispcraft 1 +wilenski 1 +anoth 1 +possibl 1 +ansi 1 +book 1 +graham 1 +code 1 +ultim 1 +steel 1 +edit 1 +page 1 +also 1 +click 1 +sun 1 +addit 1 +textbook 1 +modern 1 +approach 1 +russel 1 +norvig 1 +class 1 +recit 1 +session 1 +engr 1 +psych 1 +essentiallli 1 +materi 1 +present 1 +answer 1 +question 1 +give 1 +hint 1 +review 1 +usual 1 +last 1 +minut 1 +teach 1 +attend 1 +differ 1 +lectur 1 +cours 1 +directori 1 +public 1 +alpha 1 +beta 1 +problem 1 +previou 1 +alpha_beta 1 +best 1 +first 1 +astar 1 +fall 1 +postscript 1 +still 1 +older 1 +chang 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..6bfb61e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,86 @@ +fall 1 +advanc 1 +comput 1 +architectur 1 +ifal 1 +offer 1 +cours 1 +inform 1 +instructor 1 +mark 1 +hilloffic 1 +comp 1 +statemail 1 +markhil 1 +wisc 1 +eduoffic 1 +hour 1 +tuesdai 1 +fridai 1 +appoint 1 +shenoffic 1 +statphon 1 +email 1 +mshen 1 +mondai 1 +thursdai 1 +tabl 1 +content 1 +reader 1 +lectur 1 +note 1 +homework 1 +project 1 +miscellaneawhat 1 +give 1 +talksread 1 +full 1 +paper 1 +doit 1 +introduct 1 +chapter 1 +perform 1 +cost 1 +instruct 1 +set 1 +cach 1 +part 1 +memori 1 +talluri 1 +hill 1 +basic 1 +pipelin 1 +level 1 +parallel 1 +input 1 +output 1 +interconnect 1 +process 1 +assign 1 +solut 1 +solutionproject 1 +propos 1 +novemb 1 +class 1 +talk 1 +decemb 1 +report 1 +noonmiscellanea 1 +spring 1 +final 1 +midterm 1 +us 1 +first 1 +edit 1 +hennessi 1 +patterson 1 +qualifi 1 +exam 1 +sourc 1 +hard 1 +question 1 +seminar 1 +wisconsin 1 +group 1 +world 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..30732682 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,72 @@ +section 1 +instructor 1 +michael 1 +birk 1 +email 1 +mbirk 1 +wisc 1 +offic 1 +comp 1 +phone 1 +home 1 +hour 1 +appoint 1 +assign 1 +program 1 +administr 1 +inform 1 +text 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +room 1 +time 1 +psycholog 1 +tuesdai 1 +thursdai 1 +vectra 1 +syllabu 1 +comput 1 +grade 1 +standard 1 +late 1 +polici 1 +handin 1 +procedur 1 +cheat 1 +academ 1 +misconduct 1 +consult 1 +exampl 1 +string 1 +class 1 +us 1 +dynam 1 +alloc 1 +ration 1 +oper 1 +overload 1 +complex 1 +repres 1 +float 1 +point 1 +number 1 +anoth 1 +intstack 1 +simpl 1 +unlimit 1 +size 1 +data 1 +structur 1 +classinfo 1 +struct 1 +link 1 +page 1 +introduct 1 +microsoft 1 +window 1 +borland 1 +tutori 1 +debugg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..07c6371c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,107 @@ +section 1 +instructor 1 +michael 1 +birk 1 +email 1 +mbirk 1 +wisc 1 +offic 1 +comp 1 +phone 1 +home 1 +hour 1 +appoint 1 +announc 1 +test 1 +case 1 +program 1 +avail 1 +dice 1 +code 1 +discuss 1 +class 1 +rank 1 +last 1 +four 1 +digit 1 +student 1 +number 1 +past 1 +exam 1 +onlin 1 +spring 1 +fall 1 +note 1 +hangman 1 +assign 1 +mondai 1 +octob 1 +room 1 +lectur 1 +instruct 1 +format 1 +syllabu 1 +first 1 +eight 1 +week 1 +second 1 +come 1 +soon 1 +print 1 +output 1 +comput 1 +outsid 1 +late 1 +polici 1 +final 1 +chang 1 +meet 1 +administr 1 +inform 1 +text 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +time 1 +tuesdai 1 +thursdai 1 +vectra 1 +grade 1 +standard 1 +handin 1 +procedur 1 +cheat 1 +academ 1 +misconduct 1 +consult 1 +exampl 1 +string 1 +us 1 +dynam 1 +alloc 1 +ration 1 +oper 1 +overload 1 +complex 1 +repres 1 +float 1 +point 1 +anoth 1 +intstack 1 +simpl 1 +unlimit 1 +size 1 +data 1 +structur 1 +classinfo 1 +struct 1 +link 1 +page 1 +introduct 1 +microsoft 1 +window 1 +borland 1 +tutori 1 +debugg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..55e10050 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,80 @@ +section 1 +dave 1 +melskithes 1 +page 1 +chang 1 +frequent 1 +press 1 +reload 1 +button 1 +daili 1 +get 1 +start 1 +alreadi 1 +stuff 1 +assign 1 +uncomfort 1 +comput 1 +andth 1 +softwar 1 +help 1 +link 1 +info 1 +instructor 1 +david 1 +melski 1 +offic 1 +scienc 1 +statist 1 +floor 1 +phone 1 +hour 1 +email 1 +wisc 1 +click 1 +attach 1 +pleas 1 +meet 1 +noland 1 +psycholog 1 +text 1 +problem 1 +solv 1 +object 1 +program 1 +walter 1 +savitch 1 +addison 1 +wesleypublish 1 +gener 1 +us 1 +window 1 +usingborland 1 +refer 1 +materi 1 +rough 1 +syllabu 1 +archiv 1 +polici 1 +academ 1 +misconduct 1 +must 1 +read 1 +rule 1 +thumb 1 +share 1 +code 1 +consult 1 +grade 1 +late 1 +work 1 +check 1 +often 1 +essenti 1 +solut 1 +handout 1 +list 1 +tutor 1 +avail 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..2e715de1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,142 @@ +home 1 +page 1 +section 1 +fall 1 +algebra 1 +languag 1 +program 1 +instructor 1 +milo 1 +martin 1 +wisc 1 +time 1 +locat 1 +psychologyinstructor 1 +email 1 +eduoffic 1 +offic 1 +hour 1 +tuesdai 1 +thursdai 1 +appoint 1 +phone 1 +announcementsthi 1 +chang 1 +frequent 1 +respons 1 +check 1 +often 1 +novemb 1 +quiz 1 +take 1 +given 1 +class 1 +todai 1 +place 1 +onth 1 +homepag 1 +ad 1 +file 1 +us 1 +project 1 +room 1 +test 1 +scheduledfor 1 +wednesdai 1 +octob 1 +updat 1 +current 1 +grade 1 +haseveryth 1 +pleas 1 +make 1 +sure 1 +isaccur 1 +link 1 +coupl 1 +withinform 1 +html 1 +linksar 1 +titl 1 +document 1 +avail 1 +onfridai 1 +mondai 1 +remind 1 +exam 1 +gener 1 +inform 1 +sheet 1 +turn 1 +assign 1 +syllabu 1 +code 1 +style 1 +guid 1 +vectra 1 +consult 1 +schedul 1 +academ 1 +misconduct 1 +policyclass 1 +final 1 +bankaccount 1 +main 1 +postscript 1 +struct 1 +minmax 1 +exampl 1 +findth 1 +minimum 1 +maximum 1 +list 1 +number 1 +case 1 +enteredfrom 1 +stdin 1 +form 1 +creat 1 +formlett 1 +data 1 +specifi 1 +theopen_fil 1 +function 1 +introduc 1 +call 1 +refer 1 +user 1 +input 1 +withprompt 1 +version 1 +valu 1 +beginn 1 +standard 1 +introduct 1 +toth 1 +latest 1 +releas 1 +good 1 +viru 1 +hoax 1 +ethic 1 +andprofession 1 +conductassign 1 +survei 1 +questionar 1 +requir 1 +septemb 1 +fridai 1 +decemb 1 +solut 1 +score 1 +textbook 1 +problem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +known 1 +errata 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..3932182c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,89 @@ +home 1 +page 1 +martin 1 +ream 1 +classc 1 +section 1 +algebra 1 +languag 1 +programmingspr 1 +teach 1 +assist 1 +absolut 1 +nothingeverydai 1 +informationc 1 +class 1 +inform 1 +pagescommon 1 +programmingmistakesarch 1 +mail 1 +list 1 +messag 1 +sent 1 +semest 1 +calendar 1 +program 1 +tuesdai 1 +januari 1 +februari 1 +thursdai 1 +march 1 +exam 1 +april 1 +final 1 +placeto 1 +announcedcours 1 +detail 1 +contact 1 +email 1 +mream 1 +wisc 1 +offic 1 +comput 1 +scienc 1 +statist 1 +dayton 1 +phone 1 +hour 1 +appoint 1 +talk 1 +send 1 +textbookproblem 1 +solv 1 +object 1 +walter 1 +savitch 1 +noland 1 +csst 1 +contain 1 +vectra 1 +run 1 +window 1 +andborland 1 +addit 1 +cours 1 +tent 1 +syllabu 1 +extra 1 +materi 1 +late 1 +polici 1 +grade 1 +criteria 1 +academicmisconduct 1 +rule 1 +thumb 1 +share 1 +code 1 +assign 1 +work 1 +anyform 1 +former 1 +student 1 +made 1 +bigtodd 1 +thielwendi 1 +staatsabout 1 +instructor 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..004caa4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,107 @@ +fall 1 +section 1 +algebra 1 +languag 1 +program 1 +nolandinstructor 1 +mike 1 +steeleemail 1 +msteel 1 +wisc 1 +eduoffic 1 +comp 1 +stat 1 +buildingoffic 1 +hour 1 +time 1 +appoint 1 +soffic 1 +phone 1 +import 1 +announcementsi 1 +extend 1 +deadlin 1 +pleas 1 +check 1 +mail 1 +read 1 +thenew 1 +informationmidterm 1 +tuesdai 1 +novemb 1 +current 1 +grade 1 +line 1 +includ 1 +everyth 1 +hand 1 +sampl 1 +taken 1 +exampl 1 +pastfew 1 +week 1 +class 1 +fill 1 +stuff 1 +gloss 1 +makefulli 1 +function 1 +find 1 +us 1 +ifyou 1 +miss 1 +even 1 +didn 1 +understand 1 +note 1 +page 1 +near 1 +bottom 1 +rememb 1 +clarif 1 +programmingassign 1 +gener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesabout 1 +vectra 1 +labc 1 +consultantscours 1 +syllabu 1 +assignmentsnot 1 +work 1 +homeclass 1 +handoutsprogram 1 +assignmentsexam 1 +quizzessom 1 +examplespolici 1 +informationemail 1 +policygrad 1 +polici 1 +late 1 +academ 1 +misconduct 1 +policyus 1 +refer 1 +pagesintroduct 1 +microsoft 1 +windowsintroduct 1 +borland 1 +greg 1 +sharp 1 +styleguid 1 +codetextproblem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +steel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..eae79d11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,131 @@ +fall 1 +session 1 +infoc 1 +sessionalgebra 1 +languag 1 +program 1 +instructor 1 +andrew 1 +prockemail 1 +prock 1 +wisc 1 +eduoffic 1 +comput 1 +scienc 1 +statist 1 +offic 1 +phone 1 +hour 1 +thgrader 1 +haihong 1 +wangemail 1 +consult 1 +mtwrannounc 1 +grade 1 +link 1 +pleas 1 +check 1 +verifi 1 +score 1 +modifi 1 +crazi 1 +todai 1 +made 1 +minor 1 +modif 1 +file 1 +assign 1 +copi 1 +alreadi 1 +gotton 1 +five 1 +onlin 1 +exam 1 +result 1 +rang 1 +ad 1 +polici 1 +sampl 1 +quiz 1 +test 1 +gener 1 +perus 1 +anoth 1 +think 1 +give 1 +good 1 +idea 1 +level 1 +knowledg 1 +need 1 +rememb 1 +topic 1 +webpag 1 +seem 1 +done 1 +email 1 +notic 1 +error 1 +question 1 +make 1 +sure 1 +assing 1 +well 1 +like 1 +work 1 +ahead 1 +final 1 +tuesdai 1 +decemb 1 +mark 1 +calendar 1 +everyon 1 +requir 1 +take 1 +page 1 +feel 1 +thing 1 +locat 1 +import 1 +carefulli 1 +read 1 +administr 1 +inform 1 +welcom 1 +class 1 +text 1 +problem 1 +solv 1 +walter 1 +savitch 1 +room 1 +time 1 +section 1 +tent 1 +syllabu 1 +late 1 +mail 1 +academ 1 +misconductcours 1 +materi 1 +cours 1 +info 1 +style 1 +guid 1 +lectur 1 +note 1 +archiv 1 +introduct 1 +microsoft 1 +window 1 +first 1 +borland 1 +second 1 +home 1 +vectra 1 +sourc 1 +code 1 +extra 1 +refer 1 +mani 1 +answer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..fae0ea06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,88 @@ +kelli 1 +page 1 +ratliff 1 +current 1 +grade 1 +keyword 1 +search 1 +mail 1 +messag 1 +exampl 1 +enter 1 +function 1 +without 1 +quot 1 +everi 1 +paragraph 1 +us 1 +word 1 +also 1 +wildcard 1 +link 1 +sent 1 +semest 1 +info 1 +addit 1 +inform 1 +interest 1 +backup 1 +copi 1 +disk 1 +filesviru 1 +world 1 +wide 1 +faqfun 1 +stuff 1 +usenet 1 +oracl 1 +resourc 1 +index 1 +virtual 1 +tourist 1 +mapth 1 +space 1 +shuttl 1 +clickabl 1 +badger 1 +herald 1 +site 1 +comicshumor 1 +abort 1 +retri 1 +ignor 1 +nine 1 +type 1 +usersfin 1 +weeklab 1 +jokesget 1 +softwar 1 +comput 1 +home 1 +might 1 +tryingsom 1 +sharewar 1 +freewar 1 +avail 1 +internet 1 +archiv 1 +program 1 +usual 1 +compress 1 +need 1 +somecompress 1 +unpack 1 +reviewsom 1 +command 1 +try 1 +biggest 1 +best 1 +maintain 1 +simtel 1 +minclud 1 +file 1 +post 1 +infocompress 1 +infofavorit 1 +clickher 1 +visit 1 +desautel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..1eb79c2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,75 @@ +home 1 +page 1 +russ 1 +man 1 +csc 1 +introduct 1 +comput 1 +program 1 +instructor 1 +russel 1 +manningemail 1 +rman 1 +wisc 1 +eduoffic 1 +room 1 +scienceoffic 1 +hour 1 +find 1 +basement 1 +saturdai 1 +except 1 +footbal 1 +game 1 +sundai 1 +come 1 +keep 1 +compani 1 +work 1 +like 1 +vectra 1 +although 1 +student 1 +prioriti 1 +grade 1 +lectur 1 +final 1 +click 1 +textbook 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +section 1 +inform 1 +semest 1 +univers 1 +rotc 1 +build 1 +scienc 1 +statist 1 +assign 1 +mondai 1 +novemb 1 +wednesdai 1 +cours 1 +handout 1 +syllabu 1 +late 1 +polici 1 +academ 1 +misconduct 1 +import 1 +softwar 1 +microsoft 1 +windowshint 1 +window 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +textold 1 +quizz 1 +none 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..a248ddc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,95 @@ +introduct 1 +artifici 1 +intellig 1 +gener 1 +cours 1 +inform 1 +offer 1 +fall 1 +spring 1 +semest 1 +academ 1 +year 1 +section 1 +thefal 1 +topic 1 +cover 1 +principl 1 +knowledg 1 +base 1 +search 1 +techniqu 1 +best 1 +first 1 +alpha 1 +beta 1 +represent 1 +us 1 +predic 1 +logic 1 +semant 1 +network 1 +connectionist 1 +frame 1 +rule 1 +autom 1 +deduct 1 +applic 1 +problem 1 +solv 1 +plan 1 +expert 1 +system 1 +game 1 +plai 1 +vision 1 +natur 1 +languag 1 +understand 1 +learn 1 +robot 1 +program 1 +includ 1 +lisp 1 +possibl 1 +prolog 1 +previou 1 +assumedprerequisit 1 +page 1 +variou 1 +instructor 1 +chuck 1 +dyer 1 +kunen 1 +jude 1 +shavlik 1 +sabbat 1 +bryan 1 +local 1 +relat 1 +link 1 +madison 1 +seminar 1 +qualifi 1 +exam 1 +recent 1 +tabl 1 +content 1 +abstract 1 +journal 1 +mostli 1 +wendt 1 +librari 1 +readabl 1 +wisc 1 +group 1 +comput 1 +machin 1 +biologi 1 +dept 1 +graduat 1 +wisconsin 1 +motion 1 +extern 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..342d39e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,216 @@ +machin 1 +learn 1 +spring 1 +gener 1 +cours 1 +inform 1 +instructor 1 +jude 1 +shavlik 1 +stat 1 +wisc 1 +offic 1 +mondai 1 +wednesdai 1 +teach 1 +assist 1 +geoff 1 +weinberg 1 +highwai 1 +lab 1 +basement 1 +build 1 +geoffrei 1 +phone 1 +home 1 +overview 1 +postscript 1 +syllabu 1 +archiv 1 +class 1 +email 1 +readabl 1 +suggest 1 +project 1 +read 1 +assign 1 +chapter 1 +theori 1 +refin 1 +comput 1 +mitchel 1 +textbook 1 +feedback 1 +author 1 +april 1 +logic 1 +definit 1 +relat 1 +quinlan 1 +knowledg 1 +base 1 +artifici 1 +neural 1 +network 1 +towel 1 +chunk 1 +soar 1 +laird 1 +rosenbloom 1 +newel 1 +level 1 +dietterich 1 +analyt 1 +journal 1 +articl 1 +unsupervis 1 +fisher 1 +rumelhart 1 +zipser 1 +cogsci 1 +lenat 1 +genet 1 +algorithm 1 +march 1 +reinforc 1 +backpropag 1 +basic 1 +februari 1 +also 1 +moonei 1 +empir 1 +compar 1 +backprop 1 +concept 1 +space 1 +januari 1 +experiment 1 +scienc 1 +kibler 1 +introduct 1 +kbann 1 +cobweb 1 +sure 1 +answer 1 +sheet 1 +paper 1 +best 1 +idea 1 +next 1 +summar 1 +sentenc 1 +summari 1 +lead 1 +instead 1 +analyz 1 +late 1 +polici 1 +brr 1 +hand 1 +materi 1 +cover 1 +lectur 1 +homework 1 +train 1 +methodolog 1 +induc 1 +decis 1 +tree 1 +creat 1 +person 1 +start 1 +student 1 +five 1 +free 1 +dai 1 +semest 1 +exhaust 1 +penalti 1 +measur 1 +noon 1 +weekend 1 +make 1 +tractabl 1 +accept 1 +week 1 +previous 1 +us 1 +migrat 1 +progress 1 +heurist 1 +search 1 +version 1 +explan 1 +previou 1 +exam 1 +ineedagoodicon 1 +link 1 +line 1 +page 1 +nip 1 +premier 1 +confer 1 +recent 1 +tabl 1 +content 1 +abstract 1 +select 1 +mostli 1 +wendt 1 +librari 1 +irvin 1 +dataset 1 +pointer 1 +discoveri 1 +databas 1 +resourc 1 +stuff 1 +benchmark 1 +ieee 1 +council 1 +sever 1 +connect 1 +intern 1 +societi 1 +adapt 1 +behavior 1 +bibliographi 1 +server 1 +austrian 1 +institut 1 +canadian 1 +peopl 1 +extern 1 +refer 1 +help 1 +program 1 +akcl 1 +common 1 +lisp 1 +department 1 +workstat 1 +tip 1 +emac 1 +code 1 +write 1 +frequent 1 +ask 1 +question 1 +debugg 1 +novic 1 +steel 1 +languag 1 +edit 1 +manual 1 +print 1 +printer 1 +local 1 +group 1 +math 1 +comp 1 +biologi 1 +includ 1 +dept 1 +last 1 +modifi 1 +shavlikshavlik 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..1645b969 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,247 @@ +fall 1 +advanc 1 +oper 1 +systemsfal 1 +marvin 1 +solomon 1 +offic 1 +comput 1 +sciencesoffic 1 +hour 1 +troffic 1 +phone 1 +email 1 +address 1 +wisc 1 +new 1 +watch 1 +space 1 +latest 1 +updat 1 +last 1 +schedul 1 +project 1 +present 1 +list 1 +final 1 +exam 1 +mondai 1 +inroom 1 +scienc 1 +statist 1 +build 1 +room 1 +fridai 1 +noon 1 +exampl 1 +past 1 +midterm 1 +examtogeth 1 +sampl 1 +answer 1 +wednesdai 1 +octob 1 +pmin 1 +decemb 1 +exact 1 +time 1 +place 1 +bedetermin 1 +suggest 1 +informationabout 1 +avaiabl 1 +readabl 1 +version 1 +figur 1 +multic 1 +memori 1 +manag 1 +paper 1 +avail 1 +content 1 +summari 1 +lectur 1 +inform 1 +text 1 +cours 1 +grade 1 +intend 1 +give 1 +broad 1 +exposur 1 +advancedoper 1 +system 1 +topic 1 +assum 1 +student 1 +good 1 +semest 1 +onoper 1 +equival 1 +cover 1 +normal 1 +inconsider 1 +detail 1 +synchron 1 +interprocess 1 +commun 1 +file 1 +protect 1 +secur 1 +distribut 1 +tuesdai 1 +thursdai 1 +sciencestextther 1 +realli 1 +satisfactori 1 +textbook 1 +graduat 1 +level 1 +operatingsystem 1 +class 1 +usea 1 +select 1 +classic 1 +papersa 1 +structur 1 +around 1 +read 1 +journal 1 +articl 1 +andconfer 1 +proceed 1 +purchas 1 +doit 1 +formerli 1 +macc 1 +document 1 +deskfor 1 +similar 1 +ident 1 +thoseof 1 +previou 1 +us 1 +copi 1 +make 1 +individu 1 +youto 1 +discuss 1 +relev 1 +current 1 +click 1 +herefor 1 +tent 1 +review 1 +willinstead 1 +adiscuss 1 +major 1 +theme 1 +focal 1 +point 1 +activ 1 +particip 1 +strongli 1 +encourag 1 +will 1 +daili 1 +geta 1 +expect 1 +quietli 1 +listen 1 +week 1 +much 1 +lessout 1 +gradingther 1 +worth 1 +total 1 +design 1 +verifi 1 +carefulli 1 +thoroughli 1 +projecty 1 +requir 1 +complet 1 +term 1 +provid 1 +involv 1 +implement 1 +tool 1 +experiment 1 +implementationsof 1 +algorithm 1 +research 1 +literatur 1 +measur 1 +studi 1 +simul 1 +must 1 +compon 1 +survei 1 +unvalid 1 +suffici 1 +done 1 +person 1 +group 1 +larger 1 +smaller 1 +approv 1 +case 1 +basi 1 +write 1 +summar 1 +result 1 +meet 1 +standard 1 +public 1 +qualiti 1 +well 1 +also 1 +ashort 1 +presentationabout 1 +presentationsher 1 +presen 1 +approxim 1 +manyan 1 +stubb 1 +andrew 1 +bigg 1 +franci 1 +salmon 1 +gunawan 1 +agu 1 +qingmin 1 +wang 1 +chien 1 +pang 1 +jame 1 +chen 1 +eric 1 +larsen 1 +conroi 1 +fritz 1 +craig 1 +jordan 1 +prasad 1 +deshpand 1 +avinash 1 +sodani 1 +basnei 1 +rajesh 1 +raman 1 +biswadeep 1 +taxiao 1 +yanm 1 +xinyu 1 +richard 1 +zhang 1 +todd 1 +munson 1 +wenjun 1 +xinyi 1 +yufei 1 +zeyu 1 +sridhar 1 +gopal 1 +michael 1 +leesolomon 1 +eduthu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..8ebe0e0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,4 @@ +chiang 1 +time 1 +gradesgo 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..5c3ff7a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,123 @@ +section 1 +overal 1 +structur 1 +program 1 +primarili 1 +exercis 1 +gener 1 +problem 1 +solv 1 +write 1 +fortran 1 +code 1 +though 1 +want 1 +time 1 +solut 1 +algorithm 1 +even 1 +depend 1 +particular 1 +languag 1 +class 1 +follow 1 +mondai 1 +comput 1 +labyou 1 +us 1 +vectra 1 +scienc 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +microsoft 1 +window 1 +open 1 +seven 1 +dai 1 +week 1 +except 1 +certain 1 +holidai 1 +printer 1 +room 1 +locat 1 +across 1 +hall 1 +quota 1 +page 1 +print 1 +exce 1 +must 1 +contact 1 +either 1 +mail 1 +go 1 +offic 1 +hour 1 +prefer 1 +increas 1 +bewar 1 +machin 1 +aren 1 +configur 1 +correctli 1 +along 1 +wall 1 +closest 1 +outsidehallwai 1 +toward 1 +left 1 +hand 1 +part 1 +avoid 1 +also 1 +home 1 +dorm 1 +howev 1 +probabl 1 +purchas 1 +copi 1 +lahei 1 +person 1 +insid 1 +cover 1 +textbook 1 +work 1 +lab 1 +campu 1 +compil 1 +pleas 1 +first 1 +softwar 1 +includ 1 +netscap 1 +pointer 1 +interest 1 +jeff 1 +lampert 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +long 1 +night 1 +assign 1 +copyright 1 +modifi 1 +gareth 1 +bestor 1 +wisc 1 +last 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..465305bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,52 @@ +homepagec 1 +homepagewelcom 1 +homepag 1 +purpos 1 +provid 1 +student 1 +inform 1 +pertain 1 +section 1 +sinc 1 +page 1 +chang 1 +frequent 1 +responsibilityto 1 +check 1 +often 1 +gener 1 +informationinstructor 1 +todd 1 +munsonemail 1 +tmunson 1 +wisc 1 +eduoffic 1 +comput 1 +scienc 1 +statisticsoffic 1 +phone 1 +offic 1 +hour 1 +appointmentsect 1 +textbook 1 +problem 1 +solv 1 +walter 1 +savitchclass 1 +informationexpectationssyllabusexam 1 +schedule 1 +mailgradingl 1 +assignmentsextra 1 +creditpoliciesconsult 1 +responsibilitiesacadem 1 +misconductoth 1 +informationdaili 1 +note 1 +assignmentshomework 1 +assignmentsprogram 1 +document 1 +us 1 +classoth 1 +program 1 +resourcesc 1 +homepagetmunson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..bd1210a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,64 @@ +section 1 +comput 1 +scienc 1 +time 1 +place 1 +nolandinstructor 1 +todd 1 +turnidgeoffic 1 +hour 1 +tbalab 1 +tbaannouncementsclass 1 +note 1 +class 1 +handout 1 +struct 1 +avail 1 +program 1 +get 1 +start 1 +us 1 +inform 1 +read 1 +gener 1 +text 1 +facil 1 +grade 1 +polici 1 +syllabu 1 +tent 1 +cours 1 +assign 1 +solut 1 +collect 1 +date 1 +mail 1 +list 1 +send 1 +messag 1 +classa 1 +whole 1 +home 1 +page 1 +muchinform 1 +interest 1 +includ 1 +tutor 1 +consult 1 +window 1 +oper 1 +system 1 +email 1 +netscap 1 +find 1 +provid 1 +byother 1 +instructor 1 +help 1 +exampl 1 +gregorysharp 1 +difficulti 1 +last 1 +modifi 1 +turnidg 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..d111bb20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,80 @@ +section 1 +algebra 1 +languag 1 +program 1 +announc 1 +exam 1 +thursdai 1 +psych 1 +reload 1 +page 1 +everi 1 +time 1 +login 1 +instructor 1 +chri 1 +weaver 1 +email 1 +wisc 1 +polici 1 +offic 1 +comput 1 +scienc 1 +statist 1 +phone 1 +hour 1 +appoint 1 +first 1 +week 1 +grader 1 +zhang 1 +locat 1 +noland 1 +vectra 1 +seven 1 +dai 1 +staf 1 +consult 1 +gener 1 +cours 1 +info 1 +syllabu 1 +text 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +addison 1 +weslei 1 +isbn 1 +includ 1 +errata 1 +sourc 1 +code 1 +misconduct 1 +policyassign 1 +grade 1 +handout 1 +assign 1 +homework 1 +read 1 +lectur 1 +note 1 +exampl 1 +quiz 1 +kei 1 +late 1 +style 1 +guidelin 1 +still 1 +rough 1 +print 1 +paper 1 +statement 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +last 1 +chang 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..33662c45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,84 @@ +spring 1 +advanc 1 +comput 1 +architectur 1 +offer 1 +cours 1 +inform 1 +instructor 1 +prof 1 +jame 1 +smith 1 +offic 1 +engin 1 +hall 1 +hour 1 +tue 1 +thur 1 +phone 1 +email 1 +wisc 1 +princeoffic 1 +mail 1 +address 1 +princ 1 +tabl 1 +content 1 +new 1 +read 1 +lectur 1 +note 1 +homework 1 +project 1 +miscellaneousnew 1 +soln 1 +special 1 +final 1 +exam 1 +biochemistri 1 +pmread 1 +full 1 +paper 1 +doit 1 +overview 1 +introduct 1 +perform 1 +cost 1 +instruct 1 +set 1 +pipelin 1 +part 1 +vector 1 +vliw 1 +limit 1 +softwar 1 +cach 1 +memori 1 +main 1 +system 1 +disk 1 +arrai 1 +interconnect 1 +technolog 1 +network 1 +multiprocessor 1 +assign 1 +solut 1 +trace 1 +list 1 +miscellan 1 +us 1 +tool 1 +review 1 +midterm 1 +specmark 1 +consid 1 +harm 1 +analysi 1 +clock 1 +detail 1 +design 1 +reserv 1 +station 1 +rout 1 +summari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..b6f4f157 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,244 @@ +main 1 +pagecomput 1 +scienc 1 +comput 1 +graphic 1 +page 1 +evolv 1 +incomplet 1 +hopefulli 1 +us 1 +begin 1 +home 1 +forc 1 +deal 1 +scientif 1 +visual 1 +atth 1 +level 1 +principl 1 +practic 1 +folei 1 +computergraph 1 +alan 1 +watt 1 +current 1 +taught 1 +bruceland 1 +also 1 +project 1 +leader 1 +group 1 +cornel 1 +theori 1 +center 1 +content 1 +cours 1 +summari 1 +administrivia 1 +textbook 1 +error 1 +homework 1 +staff 1 +schedul 1 +relev 1 +math 1 +topic 1 +requir 1 +mathemat 1 +program 1 +artist 1 +skill 1 +among 1 +other 1 +fundament 1 +focus 1 +associ 1 +designedto 1 +help 1 +illumin 1 +cover 1 +follow 1 +year 1 +construct 1 +surfac 1 +explicit 1 +polygon 1 +list 1 +parametr 1 +oper 1 +quadric 1 +figur 1 +rotat 1 +swept 1 +tensor 1 +product 1 +viewer 1 +implicit 1 +blobbi 1 +model 1 +tessel 1 +normal 1 +hierarch 1 +simpl 1 +object 1 +form 1 +complex 1 +scene 1 +composit 1 +anim 1 +introduct 1 +homogen 1 +coordin 1 +geometr 1 +transform 1 +build 1 +kinemat 1 +combin 1 +prototyp 1 +mimic 1 +connect 1 +rigid 1 +part 1 +invers 1 +dynam 1 +system 1 +differenti 1 +equat 1 +cellular 1 +automata 1 +view 1 +camera 1 +clip 1 +volum 1 +onto 1 +screen 1 +parallel 1 +perspect 1 +simul 1 +stereo 1 +vision 1 +render 1 +shade 1 +light 1 +human 1 +color 1 +devic 1 +limit 1 +optic 1 +wave 1 +gourand 1 +phong 1 +hidden 1 +remov 1 +buffer 1 +transpar 1 +shadow 1 +scan 1 +convers 1 +anti 1 +alias 1 +pixel 1 +vernier 1 +hyper 1 +acuiti 1 +imag 1 +space 1 +method 1 +properti 1 +modif 1 +textur 1 +map 1 +bump 1 +data 1 +aspect 1 +dimension 1 +field 1 +scalar 1 +vector 1 +wall 1 +channel 1 +contour 1 +line 1 +mispercept 1 +difficulti 1 +arrow 1 +particl 1 +advect 1 +multiparamet 1 +high 1 +assign 1 +march 1 +april 1 +prelim 1 +first 1 +test 1 +spring 1 +serv 1 +gener 1 +guid 1 +style 1 +break 1 +religi 1 +holidai 1 +student 1 +educationlaw 1 +mandat 1 +faculti 1 +make 1 +avail 1 +opportun 1 +tomak 1 +examin 1 +miss 1 +belief 1 +inord 1 +facilit 1 +prepar 1 +makeup 1 +exam 1 +intendingto 1 +absent 1 +order 1 +observ 1 +requestedto 1 +notifi 1 +instructor 1 +last 1 +lectur 1 +final 1 +period 1 +tuesdai 1 +upson 1 +mean 1 +standard 1 +deviat 1 +bruce 1 +land 1 +rhode 1 +jing 1 +huang 1 +justin 1 +mccune 1 +jmccune 1 +csrelev 1 +univers 1 +california 1 +davi 1 +waterloo 1 +wale 1 +colleg 1 +cardiff 1 +manchest 1 +oregon 1 +state 1 +universityrel 1 +comment 1 +onlin 1 +document 1 +welcom 1 +sent 1 +todoc 1 +modifi 1 +copyright 1 +statement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..be8a7527 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,96 @@ +main 1 +pagecomput 1 +scienc 1 +comput 1 +graphic 1 +laboratori 1 +exercisesthi 1 +site 1 +cornel 1 +universityundergradu 1 +page 1 +contain 1 +materi 1 +includ 1 +procedur 1 +softwar 1 +student 1 +result 1 +section 1 +deal 1 +computergraph 1 +scientif 1 +visual 1 +level 1 +principl 1 +practic 1 +folei 1 +dcomput 1 +watt 1 +current 1 +taught 1 +bruceland 1 +also 1 +project 1 +leader 1 +group 1 +atth 1 +theori 1 +center 1 +first 1 +place 1 +sigucc 1 +basededuc 1 +train 1 +competit 1 +exercis 1 +get 1 +start 1 +build 1 +polygon 1 +object 1 +parametr 1 +surfac 1 +transform 1 +model 1 +us 1 +virtual 1 +camera 1 +perspect 1 +light 1 +textur 1 +bump 1 +map 1 +design 1 +physic 1 +base 1 +anim 1 +implicit 1 +done 1 +order 1 +note 1 +mark 1 +areinclud 1 +refer 1 +chat 1 +facil 1 +commun 1 +aboutc 1 +relat 1 +topic 1 +spring 1 +semesteraccess 1 +restrict 1 +enrol 1 +final 1 +comment 1 +onlin 1 +document 1 +welcom 1 +sent 1 +todoc 1 +last 1 +modifi 1 +land 1 +copyright 1 +statement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..e08c2615 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,65 @@ +cornel 1 +fall 1 +topic 1 +comput 1 +graphic 1 +semestereach 1 +group 1 +student 1 +chose 1 +current 1 +research 1 +computergraph 1 +read 1 +appropri 1 +paper 1 +implement 1 +code 1 +deliv 1 +lectur 1 +chosen 1 +anddocu 1 +work 1 +document 1 +metabal 1 +model 1 +window 1 +michael 1 +arcuri 1 +alex 1 +benton 1 +human 1 +facial 1 +express 1 +huang 1 +hung 1 +content 1 +base 1 +imag 1 +retriev 1 +system 1 +interior 1 +design 1 +sean 1 +landi 1 +interdepend 1 +particl 1 +justin 1 +mccune 1 +visual 1 +diffus 1 +distribut 1 +pollut 1 +us 1 +spatial 1 +explicit 1 +landscap 1 +modelsfu 1 +tsai 1 +antialias 1 +video 1 +stochast 1 +sampl 1 +arun 1 +vermach 1 +hsun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..1fbd94d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,65 @@ +patti 1 +houghpatti 1 +hough 1 +sandia 1 +nation 1 +laboratori 1 +livermor 1 +cornel 1 +student 1 +center 1 +appli 1 +mathemat 1 +whichi 1 +hous 1 +frankh 1 +rhode 1 +hall 1 +cornellunivers 1 +thesi 1 +advisor 1 +steve 1 +vavasi 1 +member 1 +committe 1 +nicktrefethen 1 +schatz 1 +research 1 +interest 1 +fall 1 +area 1 +numer 1 +linearalgebra 1 +scientif 1 +comput 1 +optimizationi 1 +current 1 +work 1 +postdoc 1 +juan 1 +meza 1 +depart 1 +nationallaboratori 1 +resum 1 +statement 1 +goal 1 +tech 1 +report 1 +complet 1 +orthogon 1 +decomposit 1 +weight 1 +least 1 +squar 1 +appear 1 +siam 1 +matrix 1 +anal 1 +stabl 1 +effici 1 +solut 1 +ofweight 1 +problem 1 +applic 1 +interior 1 +pointmethod 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..7a3e5a7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,85 @@ +jeff 1 +baggettjeff 1 +baggett 1 +center 1 +appli 1 +mathemat 1 +frank 1 +rhode 1 +hall 1 +cornel 1 +univers 1 +ithaca 1 +sixth 1 +year 1 +graduat 1 +student 1 +depart 1 +atcornel 1 +supervis 1 +nick 1 +trefethen 1 +expect 1 +finish 1 +thesi 1 +titl 1 +normal 1 +dynam 1 +applic 1 +hydrodynam 1 +stabil 1 +summer 1 +would 1 +like 1 +continu 1 +research 1 +seek 1 +posit 1 +detail 1 +outlin 1 +postscript 1 +page 1 +curriculum 1 +vita 1 +interest 1 +activ 1 +background 1 +unusu 1 +blend 1 +scientif 1 +comput 1 +system 1 +fluid 1 +mechan 1 +propos 1 +work 1 +iwould 1 +next 1 +coupl 1 +paper 1 +mostli 1 +linear 1 +model 1 +transit 1 +turbul 1 +driscol 1 +physic 1 +april 1 +exponenti 1 +type 1 +versu 1 +spectral 1 +abscissa 1 +hill 1 +andphillip 1 +exampl 1 +submit 1 +integr 1 +equat 1 +oper 1 +theori 1 +dimension 1 +subcrit 1 +misc 1 +link 1 +satish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..cb66a64c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,71 @@ +network 1 +comput 1 +scienc 1 +technic 1 +report 1 +librari 1 +libraryncstrl 1 +pronounc 1 +ancestr 1 +internationalcollect 1 +departmentsand 1 +industri 1 +govern 1 +research 1 +laboratori 1 +made 1 +availablefor 1 +commerci 1 +eduat 1 +ncstrlcollect 1 +distribut 1 +among 1 +interoper 1 +serversoper 1 +participatinginstitut 1 +read 1 +offici 1 +ncstrlpress 1 +packag 1 +descript 1 +background 1 +goal 1 +andorgan 1 +ncstrl 1 +search 1 +collect 1 +field 1 +form 1 +allow 1 +perform 1 +sever 1 +bibliograph 1 +data 1 +limit 1 +specif 1 +institut 1 +enter 1 +word 1 +list 1 +document 1 +whose 1 +author 1 +titl 1 +abstract 1 +contain 1 +brows 1 +theparticip 1 +want 1 +join 1 +tell 1 +moreread 1 +forinstitut 1 +interest 1 +particip 1 +informationfind 1 +snew 1 +relat 1 +cornel 1 +send 1 +email 1 +totech 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..65d51add --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu @@ -0,0 +1,33 @@ +design 1 +research 1 +institut 1 +instituteabout 1 +researchersat 1 +brows 1 +searchal 1 +public 1 +file 1 +technic 1 +reportssearch 1 +report 1 +ipic 1 +home 1 +page 1 +intern 1 +work 1 +confer 1 +integr 1 +enterpris 1 +informationand 1 +process 1 +anoth 1 +site 1 +inform 1 +itisingapor 1 +altavistaforum 1 +send 1 +question 1 +comment 1 +server 1 +mike 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..7ac677e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,177 @@ +davi 1 +design 1 +research 1 +institutejim 1 +davisxerox 1 +corporationphd 1 +media 1 +cornel 1 +edumi 1 +goal 1 +gener 1 +build 1 +softwar 1 +system 1 +improvecommun 1 +among 1 +peopl 1 +believ 1 +commun 1 +medium 1 +ofth 1 +futur 1 +increas 1 +understand 1 +structur 1 +andcont 1 +messag 1 +transmit 1 +manipul 1 +reformat 1 +even 1 +content 1 +interest 1 +inhypertext 1 +network 1 +inform 1 +access 1 +collabor 1 +work 1 +thecstr 1 +project 1 +anarpa 1 +sponsor 1 +effort 1 +make 1 +comput 1 +scienc 1 +technic 1 +report 1 +moreeasili 1 +part 1 +distribut 1 +server 1 +whichi 1 +run 1 +mani 1 +univers 1 +corpor 1 +group 1 +memori 1 +mean 1 +electronicsystem 1 +captur 1 +knowledg 1 +us 1 +produc 1 +worker 1 +institut 1 +order 1 +qualiti 1 +ofor 1 +reduc 1 +time 1 +requir 1 +memoryinclud 1 +intellectu 1 +product 1 +engin 1 +lawyer 1 +contract 1 +author 1 +sscreenplai 1 +also 1 +process 1 +producedth 1 +dead 1 +end 1 +explor 1 +tool 1 +andjustif 1 +support 1 +final 1 +decis 1 +begun 1 +huttenloch 1 +developingcorpor 1 +sharedannot 1 +document 1 +investig 1 +howpeopl 1 +share 1 +read 1 +write 1 +annot 1 +inelectron 1 +initi 1 +prototypeimplement 1 +class 1 +shareddocu 1 +problem 1 +set 1 +cours 1 +note 1 +nnotat 1 +might 1 +berequest 1 +clarif 1 +student 1 +question 1 +orcorrect 1 +made 1 +staff 1 +whether 1 +aus 1 +obtain 1 +answer 1 +willfind 1 +sourc 1 +learn 1 +whetherstud 1 +often 1 +abl 1 +correctli 1 +find 1 +usefulmean 1 +feedback 1 +improv 1 +evid 1 +isthat 1 +natur 1 +languag 1 +designof 1 +proxi 1 +agent 1 +safe 1 +reliablycarri 1 +remot 1 +foreign 1 +machin 1 +without 1 +risk 1 +toeither 1 +owner 1 +alsopap 1 +onlin 1 +copi 1 +publicatiion 1 +resourc 1 +list 1 +seem 1 +especi 1 +thedrimi 1 +collect 1 +meprofession 1 +historythi 1 +narr 1 +resum 1 +contact 1 +improvisationi 1 +sport 1 +resumeno 1 +market 1 +thank 1 +ask 1 +likeit 1 +fine 1 +xerox 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..50b3fbbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,28 @@ +metacrawl 1 +searchingmetacrawlerbi 1 +erik 1 +selberg 1 +greg 1 +lauckhartand 1 +prof 1 +oren 1 +etzioniif 1 +search 1 +person 1 +home 1 +page 1 +ahoi 1 +phrase 1 +word 1 +wordssort 1 +result 1 +relev 1 +locationcontrol 1 +java 1 +configur 1 +problemswebmast 1 +comcopyright 1 +etzioni 1 +lauckhart 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..0739c1df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,99 @@ +univers 1 +wisconsin 1 +robot 1 +home 1 +page 1 +madison 1 +mechan 1 +engin 1 +avenuemadison 1 +vladimir 1 +lumelski 1 +director 1 +brief 1 +review 1 +research 1 +activ 1 +peopl 1 +technic 1 +report 1 +compress 1 +postscript 1 +avail 1 +simul 1 +librari 1 +relat 1 +cours 1 +seminar 1 +recent 1 +project 1 +select 1 +public 1 +abstract 1 +maze 1 +search 1 +algorithm 1 +effect 1 +kinemat 1 +sensor 1 +base 1 +motion 1 +plan 1 +dynam 1 +control 1 +jogger 1 +model 1 +sens 1 +decentr 1 +intellig 1 +group 1 +special 1 +topic 1 +tether 1 +underwat 1 +redund 1 +sensit 1 +skin 1 +human 1 +center 1 +system 1 +comput 1 +geometri 1 +neil 1 +duffi 1 +manufactur 1 +chuck 1 +dyer 1 +vision 1 +robert 1 +lorenz 1 +actuat 1 +jude 1 +shavlik 1 +machin 1 +learn 1 +link 1 +ieee 1 +societi 1 +autom 1 +tech 1 +committe 1 +path 1 +internet 1 +resourc 1 +server 1 +nasa 1 +telerobot 1 +program 1 +frequent 1 +ask 1 +question 1 +list 1 +local 1 +dept 1 +colleg 1 +comment 1 +suggest 1 +errata 1 +hert 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..7d779b2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,40 @@ +vladimir 1 +lumelski 1 +home 1 +page 1 +lumelskyprofessormechan 1 +engin 1 +comput 1 +scienc 1 +underwat 1 +robot 1 +kinemat 1 +redund 1 +sensit 1 +skin 1 +project 1 +human 1 +center 1 +system 1 +geometri 1 +global 1 +link 1 +ieee 1 +societi 1 +autom 1 +tech 1 +committe 1 +motion 1 +path 1 +plan 1 +wisconsin 1 +colleg 1 +mechan 1 +dept 1 +electr 1 +mathemat 1 +graduat 1 +program 1 +mace 1 +grant 1 +institut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..9eafa9d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,196 @@ +werner 1 +vogel 1 +researchera 1 +upson 1 +halldept 1 +comput 1 +sciencecornel 1 +univers 1 +ithaca 1 +phone 1 +email 1 +cornel 1 +protocol 1 +design 1 +perfect 1 +reach 1 +noth 1 +left 1 +take 1 +awai 1 +involv 1 +major 1 +system 1 +project 1 +scienc 1 +depart 1 +thehorusand 1 +cluster 1 +think 1 +myresearch 1 +interest 1 +best 1 +describ 1 +latenc 1 +high 1 +bandwith 1 +commun 1 +support 1 +highli 1 +reliabl 1 +distribut 1 +real 1 +time 1 +requir 1 +focu 1 +engin 1 +issu 1 +thing 1 +amwork 1 +predict 1 +execut 1 +environ 1 +horu 1 +integr 1 +horuswith 1 +need 1 +lead 1 +situat 1 +reason 1 +advanc 1 +oper 1 +guarante 1 +mechan 1 +effici 1 +data 1 +transfer 1 +speed 1 +network 1 +devic 1 +applic 1 +level 1 +messag 1 +bandwidth 1 +small 1 +fallen 1 +behind 1 +softwar 1 +adapt 1 +achiev 1 +desir 1 +latencyfor 1 +pass 1 +protocolsar 1 +abl 1 +exploit 1 +structureand 1 +interact 1 +pattern 1 +method 1 +deal 1 +failur 1 +miss 1 +deadlin 1 +meet 1 +guarant 1 +gave 1 +tell 1 +anyon 1 +acur 1 +detect 1 +want 1 +aglob 1 +scope 1 +find 1 +gener 1 +supportfailur 1 +suspis 1 +manag 1 +process 1 +node 1 +experi 1 +group 1 +extract 1 +workwith 1 +middlewar 1 +packag 1 +regardless 1 +function 1 +brainchild 1 +robbert 1 +reness 1 +andken 1 +birman 1 +work 1 +done 1 +cooper 1 +withthorsten 1 +eicken 1 +multimedia 1 +video 1 +demand 1 +horusexperi 1 +concert 1 +brian 1 +smith 1 +respons 1 +practicum 1 +teach 1 +number 1 +lectureson 1 +technolog 1 +practic 1 +recent 1 +public 1 +world 1 +wide 1 +appear 1 +proceed 1 +sigop 1 +workshopconnamoran 1 +ierland 1 +septemb 1 +structur 1 +virtual 1 +synchroni 1 +explor 1 +bound 1 +virtuallysynchron 1 +katherin 1 +user 1 +interfacefor 1 +parallel 1 +anindya 1 +basu 1 +vineet 1 +buch 1 +thorsten 1 +symposium 1 +princpl 1 +copper 1 +mountain 1 +decemb 1 +deliv 1 +perform 1 +third 1 +ieee 1 +workshop 1 +architectur 1 +implementationof 1 +subsystem 1 +hpc 1 +august 1 +flexibl 1 +kenneth 1 +brad 1 +glade 1 +kati 1 +mark 1 +hayden 1 +takako 1 +hickei 1 +dalia 1 +malki 1 +alex 1 +vaysburd 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..51c52602 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,265 @@ +horu 1 +projectth 1 +compani 1 +ofth 1 +god 1 +rejoic 1 +come 1 +osiri 1 +whose 1 +heart 1 +firm 1 +triumphant 1 +ofisi 1 +heir 1 +project 1 +develop 1 +modular 1 +extens 1 +process 1 +groupcommun 1 +system 1 +address 1 +requir 1 +wide 1 +varieti 1 +robust 1 +distribut 1 +applic 1 +isi 1 +attribut 1 +appealedstrongli 1 +egyptian 1 +egypt 1 +becausein 1 +everi 1 +woman 1 +type 1 +wish 1 +possess 1 +renew 1 +life 1 +oppos 1 +death 1 +movementa 1 +inact 1 +provid 1 +framework 1 +applicationsbas 1 +group 1 +commun 1 +style 1 +comput 1 +aris 1 +infault 1 +toler 1 +manag 1 +thatexploit 1 +data 1 +replic 1 +coher 1 +cach 1 +groupwar 1 +within 1 +theoveral 1 +larg 1 +collect 1 +applicationprotocol 1 +allow 1 +design 1 +toconstruct 1 +modul 1 +exactli 1 +meet 1 +applicationrequir 1 +minim 1 +cost 1 +origin 1 +launch 1 +effort 1 +redesign 1 +theisi 1 +evolv 1 +gener 1 +purpos 1 +communicationarchitectur 1 +advanc 1 +support 1 +robustdistribut 1 +set 1 +unsuit 1 +asappl 1 +special 1 +secur 1 +real 1 +time 1 +besidesth 1 +practic 1 +us 1 +softwar 1 +contribut 1 +toward 1 +theori 1 +virtual 1 +synchroni 1 +runtim 1 +model 1 +usedfor 1 +implement 1 +fault 1 +sametim 1 +much 1 +faster 1 +lighter 1 +weight 1 +exist 1 +initi 1 +version 1 +code 1 +beus 1 +research 1 +restrict 1 +commericalright 1 +call 1 +ensembl 1 +written 1 +usabl 1 +manyoth 1 +languag 1 +avail 1 +class 1 +user 1 +nofe 1 +activ 1 +seri 1 +ofreleas 1 +fall 1 +spring 1 +earli 1 +ensemblewil 1 +outstand 1 +environ 1 +build 1 +java 1 +base 1 +groupwareappl 1 +multimedia 1 +conferenc 1 +platform 1 +independ 1 +areavail 1 +differentclass 1 +workstat 1 +person 1 +parallel 1 +processor 1 +onnext 1 +cluster 1 +standard 1 +high 1 +speedcommun 1 +network 1 +collabor 1 +close 1 +mani 1 +systemsproject 1 +includ 1 +transi 1 +navtech 1 +stormcast 1 +tacomaproject 1 +link 1 +found 1 +elsewher 1 +thesepag 1 +circumst 1 +begotten 1 +gain 1 +good 1 +idea 1 +froma 1 +sorrow 1 +mother 1 +herhusband 1 +describ 1 +goddess 1 +greatli 1 +distress 1 +equippedwith 1 +mighti 1 +word 1 +power 1 +knew 1 +utter 1 +mighthav 1 +greatest 1 +effect 1 +search 1 +dead 1 +bodi 1 +never 1 +rest 1 +hair 1 +made 1 +light 1 +wing 1 +stir 1 +lament 1 +brother 1 +length 1 +brought 1 +state 1 +unit 1 +thu 1 +becam 1 +child 1 +born 1 +secret 1 +place 1 +suckl 1 +rear 1 +introduct 1 +horusvisit 1 +papersand 1 +abstractpag 1 +overview 1 +public 1 +report 1 +relatedto 1 +follow 1 +recent 1 +articl 1 +present 1 +level 1 +kenneth 1 +birman 1 +robbert 1 +reness 1 +reliabl 1 +scientif 1 +american 1 +silvano 1 +maffei 1 +flexiblegroup 1 +april 1 +final 1 +sentenc 1 +hyme 1 +osirisfrom 1 +papyru 1 +better 1 +know 1 +book 1 +walli 1 +budg 1 +studiesin 1 +mytholog 1 +volum 1 +page 1 +open 1 +court 1 +publish 1 +london 1 +comment 1 +werner 1 +vogel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..490cfb8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,87 @@ +cornel 1 +depart 1 +comput 1 +scienc 1 +gener 1 +info 1 +academ 1 +faculti 1 +research 1 +project 1 +tech 1 +report 1 +annual 1 +welcom 1 +universitydepart 1 +site 1 +feel 1 +free 1 +brows 1 +around 1 +know 1 +infoget 1 +inform 1 +locat 1 +size 1 +also 1 +find 1 +contactswithin 1 +standard 1 +disclaim 1 +facultyfind 1 +list 1 +check 1 +offici 1 +home 1 +page 1 +ortheir 1 +person 1 +researchcheck 1 +go 1 +aboutour 1 +collabor 1 +publicationsfind 1 +link 1 +public 1 +researcherseith 1 +technic 1 +projector 1 +degreeslook 1 +degre 1 +program 1 +doctor 1 +master 1 +engin 1 +orundergradu 1 +academicsrefer 1 +cours 1 +taught 1 +webfor 1 +semest 1 +read 1 +generalcoursedescript 1 +appear 1 +studi 1 +peopleget 1 +outstand 1 +peopl 1 +keep 1 +includ 1 +staff 1 +student 1 +directorylist 1 +activitiesfind 1 +activ 1 +theassoci 1 +undergradu 1 +excellenthockei 1 +team 1 +serverscheck 1 +server 1 +gopherserv 1 +anonym 1 +ftpserver 1 +sitesquest 1 +comment 1 +informationpres 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..d95fc9dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,242 @@ +gerard 1 +saltongerard 1 +saltonprofessorg 1 +cornel 1 +eduph 1 +harvard 1 +univers 1 +natur 1 +languag 1 +text 1 +process 1 +rapidli 1 +expand 1 +field 1 +research 1 +develop 1 +larg 1 +mass 1 +machin 1 +readabl 1 +exist 1 +cheapli 1 +store 1 +high 1 +densiti 1 +optic 1 +storag 1 +media 1 +retriev 1 +demand 1 +furthermor 1 +sophist 1 +method 1 +avail 1 +analyz 1 +document 1 +formul 1 +appropri 1 +user 1 +queri 1 +conduct 1 +rapid 1 +file 1 +search 1 +rank 1 +item 1 +decreas 1 +order 1 +import 1 +design 1 +oper 1 +gener 1 +purpos 1 +environ 1 +handl 1 +without 1 +restrict 1 +size 1 +subject 1 +matter 1 +absenc 1 +knowledg 1 +base 1 +would 1 +us 1 +unrestrict 1 +databas 1 +corpu 1 +analysi 1 +system 1 +determin 1 +mean 1 +word 1 +express 1 +refin 1 +context 1 +statist 1 +probabilist 1 +criteria 1 +approach 1 +abl 1 +similar 1 +degre 1 +accuraci 1 +main 1 +applic 1 +automat 1 +structur 1 +collect 1 +hypertext 1 +semant 1 +piec 1 +link 1 +represent 1 +provid 1 +flexibl 1 +brows 1 +capabl 1 +access 1 +interest 1 +excerpt 1 +respons 1 +done 1 +extens 1 +work 1 +autom 1 +encyclopedia 1 +consist 1 +articl 1 +funk 1 +wagnal 1 +addit 1 +also 1 +trec 1 +full 1 +cover 1 +number 1 +differ 1 +area 1 +gigabyt 1 +servic 1 +well 1 +relat 1 +section 1 +paragraph 1 +sentenc 1 +test 1 +vehicl 1 +continu 1 +current 1 +version 1 +smart 1 +unix 1 +sparc 1 +station 1 +termin 1 +equip 1 +activitiesmemb 1 +engin 1 +colleg 1 +librari 1 +committeeprofession 1 +activitiesassoci 1 +editor 1 +transact 1 +inform 1 +systemsprogram 1 +committe 1 +sigir 1 +seventeenth 1 +confer 1 +dublin 1 +ireland 1 +electron 1 +publish 1 +darmstadt 1 +germani 1 +genom 1 +nation 1 +medicin 1 +bethesda 1 +maryland 1 +multimedia 1 +hypermedia 1 +virtual 1 +realiti 1 +moscow 1 +septemb 1 +lecturesautomat 1 +construct 1 +feder 1 +institut 1 +technolog 1 +zurich 1 +switzerland 1 +june 1 +progress 1 +konstanz 1 +asi 1 +meet 1 +columbu 1 +ohio 1 +octob 1 +util 1 +comput 1 +scienc 1 +colloquium 1 +state 1 +lectur 1 +cours 1 +seattl 1 +washington 1 +novemb 1 +microsoft 1 +corpor 1 +workshop 1 +publicationsapproach 1 +passag 1 +proceed 1 +annual 1 +associ 1 +machineri 1 +york 1 +allan 1 +bucklei 1 +select 1 +travers 1 +commun 1 +februari 1 +vector 1 +model 1 +third 1 +symposium 1 +nevada 1 +vega 1 +april 1 +softwareth 1 +made 1 +free 1 +charg 1 +sever 1 +hundr 1 +copi 1 +distribut 1 +around 1 +world 1 +return 1 +list 1 +faculti 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..bf1554a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,87 @@ +yui 1 +liyui 1 +liresearch 1 +associateyui 1 +cornel 1 +eduph 1 +univers 1 +waterloo 1 +gener 1 +research 1 +interest 1 +numer 1 +optim 1 +scientif 1 +comput 1 +also 1 +appli 1 +techniqu 1 +solv 1 +real 1 +world 1 +engin 1 +problem 1 +current 1 +includ 1 +trust 1 +region 1 +theori 1 +unconstrain 1 +minim 1 +nonlinearli 1 +constrain 1 +particular 1 +exist 1 +accept 1 +condit 1 +investig 1 +affin 1 +scale 1 +method 1 +nonlinear 1 +function 1 +converg 1 +analysi 1 +us 1 +exact 1 +penalti 1 +approach 1 +applic 1 +consid 1 +imag 1 +enhanc 1 +lecturesan 1 +interior 1 +subject 1 +bound 1 +confer 1 +chines 1 +young 1 +scientist 1 +beij 1 +china 1 +august 1 +publicationsa 1 +global 1 +siam 1 +journal 1 +center 1 +reflect 1 +proceed 1 +return 1 +list 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..46c2855a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,178 @@ +richard 1 +zippelrichard 1 +zippelsenior 1 +research 1 +associaterz 1 +cornel 1 +eduph 1 +focus 1 +us 1 +symbol 1 +mathemat 1 +modern 1 +softwar 1 +techniqu 1 +autom 1 +gener 1 +scientif 1 +current 1 +comput 1 +fluid 1 +dynam 1 +work 1 +allow 1 +scientist 1 +specifi 1 +differenti 1 +equat 1 +studi 1 +perform 1 +numer 1 +architectur 1 +machin 1 +calcul 1 +done 1 +toolkit 1 +convert 1 +suppli 1 +method 1 +special 1 +code 1 +colleagu 1 +depart 1 +mechan 1 +aerospac 1 +engin 1 +system 1 +aris 1 +boundari 1 +layer 1 +compon 1 +algebra 1 +substrat 1 +call 1 +weyl 1 +extend 1 +data 1 +structur 1 +avail 1 +common 1 +lisp 1 +includ 1 +object 1 +like 1 +polynomi 1 +matric 1 +ration 1 +function 1 +ring 1 +vector 1 +space 1 +ideal 1 +introduct 1 +program 1 +languag 1 +provid 1 +number 1 +challeng 1 +type 1 +opportun 1 +deduct 1 +reason 1 +pursu 1 +profession 1 +activitieseditori 1 +board 1 +journal 1 +transact 1 +softwareprogram 1 +committe 1 +principl 1 +practic 1 +constraint 1 +workshop 1 +refere 1 +review 1 +algorithm 1 +error 1 +correct 1 +intern 1 +symposium 1 +computationlecturesalgebra 1 +decomposit 1 +american 1 +societi 1 +region 1 +meet 1 +syracus 1 +york 1 +septemb 1 +dexter 1 +kozen 1 +susan 1 +landau 1 +present 1 +effect 1 +irreduc 1 +test 1 +vista 1 +microstorag 1 +orient 1 +oper 1 +durham 1 +north 1 +carolina 1 +decemb 1 +joint 1 +dawson 1 +dean 1 +center 1 +haifa 1 +israel 1 +januari 1 +electr 1 +technion 1 +synthes 1 +scienc 1 +weizmann 1 +institut 1 +rehovot 1 +suni 1 +albani 1 +april 1 +modular 1 +interpol 1 +factor 1 +multivari 1 +theori 1 +ithaca 1 +ronitt 1 +rubinfeld 1 +publicationseffect 1 +kluwer 1 +academ 1 +publish 1 +boston 1 +massachusett 1 +june 1 +page 1 +implement 1 +file 1 +databas 1 +proceed 1 +return 1 +list 1 +annual 1 +report 1 +home 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..abd81531 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,217 @@ +kenneth 1 +birmankenneth 1 +birmanprofessorphd 1 +univ 1 +california 1 +berkelei 1 +research 1 +concern 1 +fault 1 +toler 1 +distribut 1 +comput 1 +oper 1 +system 1 +focu 1 +us 1 +program 1 +model 1 +base 1 +upon 1 +virtual 1 +synchron 1 +process 1 +group 1 +solv 1 +problem 1 +manag 1 +replic 1 +data 1 +coordin 1 +action 1 +set 1 +perform 1 +dynam 1 +reconfigur 1 +done 1 +provid 1 +although 1 +limit 1 +certain 1 +class 1 +reason 1 +benign 1 +failur 1 +effort 1 +theoret 1 +practic 1 +side 1 +work 1 +start 1 +develop 1 +call 1 +isi 1 +toolkit 1 +softwar 1 +tool 1 +support 1 +synchroni 1 +becam 1 +wide 1 +popular 1 +horu 1 +intend 1 +flexibl 1 +address 1 +issu 1 +real 1 +time 1 +commun 1 +secur 1 +import 1 +featur 1 +extens 1 +layer 1 +permit 1 +special 1 +purpos 1 +basic 1 +idea 1 +user 1 +actual 1 +avail 1 +broad 1 +collect 1 +option 1 +also 1 +seek 1 +leverag 1 +emerg 1 +network 1 +technolog 1 +techniqu 1 +activ 1 +messag 1 +origin 1 +parallel 1 +supercomput 1 +embodi 1 +advanc 1 +graduat 1 +student 1 +mike 1 +reiter 1 +david 1 +cooper 1 +unusu 1 +combin 1 +privaci 1 +high 1 +singl 1 +packag 1 +fundament 1 +look 1 +specifi 1 +prove 1 +properti 1 +structur 1 +languag 1 +execut 1 +refer 1 +implement 1 +major 1 +goal 1 +constabl 1 +nuprl 1 +latter 1 +correctli 1 +former 1 +studi 1 +notabl 1 +friedman 1 +ad 1 +guarante 1 +mark 1 +hayden 1 +probabilist 1 +broadcast 1 +primit 1 +much 1 +collabor 1 +architectur 1 +head 1 +robbert 1 +reness 1 +werner 1 +vogel 1 +aspect 1 +includ 1 +object 1 +orient 1 +multimedia 1 +applic 1 +speed 1 +protocol 1 +exploit 1 +within 1 +depart 1 +thorsten 1 +eicken 1 +brian 1 +smith 1 +univers 1 +activitieschair 1 +engin 1 +polici 1 +committe 1 +act 1 +scienc 1 +master 1 +member 1 +faculti 1 +recruit 1 +cornel 1 +academ 1 +leadership 1 +profession 1 +activitieseditor 1 +chief 1 +transact 1 +scientist 1 +isat 1 +robust 1 +critic 1 +element 1 +nation 1 +inform 1 +infrastructur 1 +publicationsth 1 +approach 1 +reliabl 1 +decemb 1 +integr 1 +runtim 1 +consist 1 +journal 1 +birman 1 +ieee 1 +societi 1 +press 1 +alamito 1 +glade 1 +return 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..ba8ba15b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,150 @@ +donald 1 +greenbergdonald 1 +greenberg 1 +jacob 1 +gould 1 +schurman 1 +prof 1 +comput 1 +scienc 1 +director 1 +technolog 1 +center 1 +graphic 1 +scientif 1 +visual 1 +cornel 1 +univers 1 +past 1 +decad 1 +activ 1 +involv 1 +develop 1 +wide 1 +rang 1 +input 1 +displai 1 +techniqu 1 +number 1 +method 1 +implement 1 +progress 1 +made 1 +larg 1 +varieti 1 +routin 1 +research 1 +topic 1 +previous 1 +investig 1 +includ 1 +polygon 1 +clip 1 +hidden 1 +surfac 1 +algorithm 1 +textur 1 +spatial 1 +tempor 1 +alias 1 +problem 1 +geometr 1 +model 1 +parametr 1 +descript 1 +color 1 +current 1 +focu 1 +three 1 +dimension 1 +complex 1 +environ 1 +realist 1 +imag 1 +synthesi 1 +modular 1 +testb 1 +suffici 1 +flexibl 1 +evalu 1 +differ 1 +gener 1 +creat 1 +laboratori 1 +conduct 1 +light 1 +reflect 1 +determin 1 +interact 1 +improv 1 +effici 1 +trace 1 +parallel 1 +process 1 +strategi 1 +perceptu 1 +studi 1 +micro 1 +geometri 1 +motion 1 +control 1 +dynam 1 +constraint 1 +anti 1 +host 1 +relat 1 +applic 1 +start 1 +volum 1 +render 1 +medic 1 +digit 1 +photographi 1 +anim 1 +tool 1 +well 1 +core 1 +multi 1 +media 1 +within 1 +facil 1 +program 1 +member 1 +nation 1 +foundat 1 +particip 1 +brown 1 +california 1 +institut 1 +north 1 +carolina 1 +chapel 1 +hill 1 +utah 1 +activitiesdirector 1 +visualizationprofession 1 +activitieseditori 1 +board 1 +journal 1 +academi 1 +engin 1 +found 1 +fellow 1 +american 1 +biolog 1 +acmreturn 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..e69a03c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,200 @@ +juri 1 +hartmanisjuri 1 +hartmani 1 +walter 1 +read 1 +professor 1 +engin 1 +california 1 +institut 1 +technolog 1 +strateg 1 +goal 1 +research 1 +contribut 1 +develop 1 +comprehens 1 +theori 1 +comput 1 +complex 1 +studi 1 +quantit 1 +law 1 +govern 1 +essenti 1 +part 1 +scienc 1 +base 1 +need 1 +guid 1 +har 1 +exploit 1 +explos 1 +grow 1 +classifi 1 +problem 1 +amount 1 +variou 1 +resourc 1 +solv 1 +classif 1 +yield 1 +class 1 +consist 1 +within 1 +given 1 +bound 1 +gain 1 +deeper 1 +understand 1 +make 1 +hard 1 +explor 1 +relat 1 +intern 1 +structur 1 +also 1 +trade 1 +off 1 +differ 1 +particular 1 +attent 1 +sequenti 1 +time 1 +parallel 1 +nondeterminist 1 +memori 1 +requir 1 +random 1 +interact 1 +univers 1 +activitiesmemb 1 +faculti 1 +council 1 +representativeschair 1 +depart 1 +recruit 1 +committeehonorsacm 1 +ture 1 +award 1 +stearn 1 +member 1 +nation 1 +academi 1 +foreign 1 +latvian 1 +fellow 1 +american 1 +art 1 +york 1 +state 1 +associ 1 +advanc 1 +aaa 1 +charter 1 +profession 1 +activitieseditor 1 +springer 1 +verlag 1 +lectur 1 +note 1 +siam 1 +journal 1 +system 1 +advisori 1 +board 1 +eatc 1 +monograph 1 +theoret 1 +director 1 +ifip 1 +technic 1 +committe 1 +foundat 1 +georg 1 +brown 1 +school 1 +rice 1 +houston 1 +texa 1 +peer 1 +visit 1 +physic 1 +divis 1 +chicago 1 +advisor 1 +world 1 +scientif 1 +presseditori 1 +electron 1 +press 1 +editor 1 +sciencegoedel 1 +prize 1 +telecommun 1 +awardshonorari 1 +doctor 1 +degre 1 +dortmund 1 +germani 1 +lecturessom 1 +observ 1 +banquet 1 +speech 1 +logic 1 +program 1 +symposium 1 +cornel 1 +novemb 1 +scope 1 +natur 1 +futur 1 +distinguish 1 +seri 1 +virginia 1 +februari 1 +tennesse 1 +april 1 +publicationson 1 +commun 1 +octob 1 +oracl 1 +hypothesi 1 +fals 1 +august 1 +richard 1 +chang 1 +benni 1 +chor 1 +od 1 +goldreich 1 +johan 1 +hastad 1 +desh 1 +ranjan 1 +pankaj 1 +rohatgi 1 +hausdorff 1 +topolog 1 +dimens 1 +kolmogorov 1 +real 1 +line 1 +decemb 1 +weight 1 +bulletin 1 +return 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..b31a2902 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,130 @@ +john 1 +hopcroftjohn 1 +hopcroftjoseph 1 +silbert 1 +dean 1 +engineeringprofessor 1 +comput 1 +sciencephd 1 +stanford 1 +univers 1 +januari 1 +appoint 1 +colleg 1 +engin 1 +overse 1 +academ 1 +depart 1 +compris 1 +well 1 +variou 1 +research 1 +unit 1 +involv 1 +scienc 1 +continu 1 +robust 1 +geometr 1 +algorithm 1 +model 1 +simul 1 +inform 1 +captur 1 +access 1 +collabor 1 +design 1 +institut 1 +develop 1 +technolog 1 +facilit 1 +within 1 +environ 1 +among 1 +distribut 1 +databas 1 +persist 1 +object 1 +storag 1 +document 1 +imag 1 +process 1 +manag 1 +multimedia 1 +user 1 +interfac 1 +heterogen 1 +data 1 +knowledg 1 +represent 1 +organ 1 +remot 1 +profession 1 +activitiesmemb 1 +nation 1 +council 1 +commiss 1 +physic 1 +mathemat 1 +applicationsmemb 1 +boardmemb 1 +scientif 1 +advisori 1 +board 1 +state 1 +forcememb 1 +academi 1 +engineeringfellow 1 +american 1 +art 1 +sciencesfellow 1 +associ 1 +advanc 1 +aaa 1 +fellow 1 +electr 1 +electron 1 +ieee 1 +machinerychairman 1 +siam 1 +trusteesmemb 1 +committe 1 +david 1 +lucil 1 +packard 1 +foundationmemb 1 +sloan 1 +fellowship 1 +committeeadvisori 1 +supercomput 1 +center 1 +defens 1 +analysiseditor 1 +oxford 1 +press 1 +intern 1 +seri 1 +algorithmica 1 +discret 1 +geometryassoci 1 +editor 1 +journal 1 +geometri 1 +applic 1 +system 1 +sciencesreturn 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +cornel 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..76b1d349 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,257 @@ +keshav 1 +pingalikeshav 1 +pingaliassoci 1 +professorphd 1 +research 1 +group 1 +work 1 +area 1 +program 1 +languag 1 +compil 1 +parallel 1 +architectur 1 +goal 1 +develop 1 +tool 1 +gener 1 +code 1 +applic 1 +deal 1 +larg 1 +spars 1 +matric 1 +scientif 1 +involv 1 +numer 1 +solut 1 +partial 1 +differenti 1 +equat 1 +techniqu 1 +us 1 +almost 1 +alwai 1 +produc 1 +system 1 +algebra 1 +unfortun 1 +exist 1 +technolog 1 +poor 1 +matrix 1 +take 1 +radic 1 +differ 1 +approach 1 +problem 1 +sequenti 1 +dens 1 +inform 1 +user 1 +sparsiti 1 +structur 1 +enabl 1 +restructur 1 +preliminari 1 +experi 1 +krylov 1 +space 1 +solver 1 +show 1 +competit 1 +hand 1 +librari 1 +like 1 +argonn 1 +petsc 1 +extend 1 +direct 1 +method 1 +solv 1 +linear 1 +requir 1 +adapt 1 +mesh 1 +refin 1 +project 1 +build 1 +earlier 1 +distribut 1 +memori 1 +uniform 1 +access 1 +numa 1 +processor 1 +local 1 +faster 1 +good 1 +perform 1 +must 1 +also 1 +ensur 1 +refer 1 +match 1 +data 1 +made 1 +block 1 +transfer 1 +prefer 1 +mani 1 +small 1 +messag 1 +recent 1 +best 1 +algorithm 1 +known 1 +automat 1 +align 1 +comput 1 +incorpor 1 +test 1 +novel 1 +loop 1 +call 1 +normal 1 +transform 1 +nest 1 +increas 1 +potenti 1 +implement 1 +lambda 1 +toolkit 1 +paper 1 +summar 1 +result 1 +prize 1 +asplo 1 +hewlett 1 +packard 1 +fortran 1 +product 1 +line 1 +uniprocessor 1 +multiprocessor 1 +framework 1 +analysi 1 +optim 1 +base 1 +depend 1 +flow 1 +graph 1 +knit 1 +togeth 1 +control 1 +permit 1 +better 1 +possibl 1 +compet 1 +independ 1 +interest 1 +exampl 1 +answer 1 +foundat 1 +question 1 +open 1 +decad 1 +time 1 +static 1 +singl 1 +assign 1 +form 1 +number 1 +includ 1 +microsoft 1 +flavor 1 +profession 1 +activitiespanel 1 +member 1 +organ 1 +symposium 1 +principl 1 +practic 1 +nation 1 +young 1 +investig 1 +award 1 +panel 1 +consult 1 +lab 1 +intel 1 +corpor 1 +armi 1 +ballist 1 +odyssei 1 +math 1 +scienc 1 +institut 1 +refere 1 +review 1 +topla 1 +ieee 1 +transact 1 +journal 1 +supercomput 1 +computereditori 1 +board 1 +intern 1 +awardsn 1 +presidenti 1 +faculti 1 +lecturesfast 1 +chelmsford 1 +massachusett 1 +januari 1 +depart 1 +wayn 1 +state 1 +univers 1 +detroit 1 +michigan 1 +februari 1 +rutger 1 +brunswick 1 +jersei 1 +laboratori 1 +redmond 1 +washington 1 +june 1 +publicationssolv 1 +elementari 1 +proceed 1 +seventh 1 +annual 1 +workshop 1 +lcpc 1 +lectur 1 +note 1 +ithaca 1 +august 1 +david 1 +indupraka 1 +kodukula 1 +vladimir 1 +kotlyar 1 +paul 1 +stodghil 1 +sigplan 1 +confer 1 +design 1 +pldi 1 +gianfranco 1 +bilardi 1 +return 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +comment 1 +pleas 1 +contact 1 +cornel 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..cf0bd9eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,366 @@ +fred 1 +schneiderfr 1 +schneider 1 +professorphd 1 +state 1 +univ 1 +stoni 1 +brook 1 +techniqu 1 +understand 1 +concurr 1 +program 1 +becom 1 +increasingli 1 +import 1 +distribut 1 +comput 1 +system 1 +widespread 1 +mission 1 +critic 1 +applic 1 +research 1 +focus 1 +develop 1 +heavili 1 +involv 1 +appli 1 +assert 1 +reason 1 +design 1 +fault 1 +toler 1 +real 1 +time 1 +complet 1 +textbook 1 +subject 1 +along 1 +david 1 +gri 1 +continu 1 +investig 1 +concern 1 +first 1 +order 1 +equat 1 +logic 1 +past 1 +year 1 +streamlin 1 +infer 1 +rule 1 +evalu 1 +number 1 +handl 1 +undefin 1 +term 1 +partial 1 +function 1 +thoma 1 +bressoud 1 +build 1 +analyz 1 +hypervisor 1 +base 1 +implement 1 +replic 1 +manag 1 +risc 1 +architectur 1 +protocol 1 +ensur 1 +sequenc 1 +instruct 1 +execut 1 +virtual 1 +machin 1 +run 1 +differ 1 +physic 1 +processor 1 +ident 1 +also 1 +coordin 1 +issu 1 +replica 1 +attract 1 +least 1 +theori 1 +instantli 1 +avail 1 +hardwar 1 +realiz 1 +given 1 +includ 1 +exist 1 +written 1 +second 1 +singl 1 +suffic 1 +everi 1 +oper 1 +final 1 +programm 1 +freed 1 +task 1 +jointli 1 +johansen 1 +univers 1 +trom 1 +norwai 1 +robbert 1 +reness 1 +start 1 +tacoma 1 +project 1 +cornel 1 +move 1 +agent 1 +support 1 +mobil 1 +process 1 +structur 1 +construct 1 +commun 1 +network 1 +bandwidth 1 +conserv 1 +data 1 +access 1 +site 1 +resid 1 +typic 1 +filter 1 +otherwis 1 +reduc 1 +read 1 +carri 1 +relev 1 +inform 1 +roam 1 +prototyp 1 +third 1 +experi 1 +scott 1 +stoller 1 +algorithm 1 +detect 1 +whether 1 +particular 1 +asynchron 1 +could 1 +pass 1 +global 1 +satisfi 1 +predic 1 +allow 1 +effici 1 +possibl 1 +previou 1 +activitiessabbat 1 +leav 1 +profession 1 +activitieseditor 1 +chief 1 +computingeditor 1 +letterseditor 1 +ieee 1 +transact 1 +softwar 1 +engineeringeditor 1 +high 1 +integr 1 +systemseditor 1 +annal 1 +surveysco 1 +editor 1 +text 1 +monograph 1 +scienc 1 +springer 1 +verlagprogram 1 +committe 1 +member 1 +intern 1 +school 1 +symposium 1 +formal 1 +systemsprogram 1 +confer 1 +mathemat 1 +constructionprogram 1 +workshop 1 +respons 1 +compos 1 +resili 1 +fifth 1 +ifip 1 +work 1 +depend 1 +applicationsprogram 1 +sixteenth 1 +symposiumprogram 1 +dimac 1 +verif 1 +control 1 +hybrid 1 +systemsst 1 +center 1 +assur 1 +chissa 1 +nation 1 +institut 1 +standard 1 +technologymemb 1 +isat 1 +defens 1 +warfar 1 +studi 1 +group 1 +advanc 1 +agencyreview 1 +leibniz 1 +hebrew 1 +universitymemb 1 +methodolog 1 +awardsfellow 1 +american 1 +associ 1 +sciencefellow 1 +machinerylecturesproof 1 +outlin 1 +lectur 1 +summer 1 +marktoberdorf 1 +germani 1 +juli 1 +origin 1 +tradit 1 +banquet 1 +speech 1 +exploit 1 +environ 1 +afosr 1 +grante 1 +contractor 1 +meet 1 +washington 1 +sept 1 +verifi 1 +lubeck 1 +panelist 1 +compar 1 +merit 1 +synchron 1 +model 1 +safeti 1 +moder 1 +write 1 +specif 1 +refin 1 +reactiv 1 +dagstuhl 1 +merg 1 +polici 1 +analysi 1 +georg 1 +mason 1 +virginia 1 +avoid 1 +mistak 1 +invit 1 +speaker 1 +traffic 1 +nasa 1 +am 1 +technic 1 +munich 1 +proof 1 +north 1 +carolina 1 +chapel 1 +hill 1 +march 1 +ad 1 +distinguish 1 +seri 1 +panel 1 +organ 1 +teach 1 +tool 1 +sigcs 1 +educ 1 +nashvil 1 +tennesse 1 +technion 1 +haifa 1 +israel 1 +april 1 +place 1 +airplan 1 +view 1 +successor 1 +arpa 1 +june 1 +publicationsreason 1 +proceed 1 +colloquium 1 +icalp 1 +jerusalem 1 +note 1 +verlag 1 +york 1 +materi 1 +summari 1 +boll 1 +forc 1 +offic 1 +scientif 1 +septemb 1 +volum 1 +limor 1 +proposit 1 +letter 1 +februari 1 +aircraft 1 +hand 1 +foundat 1 +ultradepend 1 +parallel 1 +paradigm 1 +kluwer 1 +academ 1 +publish 1 +marzullo 1 +dehn 1 +bulletin 1 +topic 1 +hoto 1 +orca 1 +island 1 +causal 1 +messag 1 +art 1 +newslett 1 +spring 1 +approach 1 +discret 1 +primu 1 +return 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..193df85a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,115 @@ +charl 1 +loancharl 1 +loanprofessorphd 1 +univers 1 +michigan 1 +continu 1 +develop 1 +method 1 +variou 1 +kroneck 1 +product 1 +comput 1 +particular 1 +solv 1 +nearest 1 +problem 1 +factor 1 +matric 1 +subject 1 +inhomogen 1 +constraint 1 +applic 1 +signal 1 +process 1 +markov 1 +result 1 +descript 1 +fast 1 +wavelet 1 +transform 1 +also 1 +deriv 1 +proce 1 +analog 1 +plai 1 +import 1 +role 1 +high 1 +perform 1 +algorithm 1 +undergradu 1 +text 1 +scienc 1 +work 1 +last 1 +four 1 +year 1 +current 1 +translat 1 +anticip 1 +fall 1 +semest 1 +activitiescomput 1 +depart 1 +curriculum 1 +committeedepart 1 +repres 1 +art 1 +chair 1 +meetingfreshman 1 +admiss 1 +reader 1 +profession 1 +activitieseditor 1 +siam 1 +journal 1 +matrix 1 +analysismemb 1 +wilkinson 1 +prize 1 +committe 1 +member 1 +diprima 1 +organ 1 +household 1 +confer 1 +lecturesappl 1 +linkop 1 +sweden 1 +januari 1 +umea 1 +build 1 +intuit 1 +ohio 1 +state 1 +april 1 +publicationsoptim 1 +close 1 +loop 1 +adapt 1 +optic 1 +multipl 1 +control 1 +bandwidth 1 +societi 1 +america 1 +ellerbroek 1 +pitsiani 1 +plemmon 1 +return 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +cornel 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..3b4db2f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,57 @@ +catherin 1 +wagnercatherin 1 +wagnersenior 1 +lecturerphd 1 +cornel 1 +univers 1 +primari 1 +respons 1 +comput 1 +scienc 1 +depart 1 +teach 1 +work 1 +other 1 +revis 1 +curriculum 1 +lower 1 +level 1 +cours 1 +specif 1 +interest 1 +develop 1 +student 1 +prepar 1 +introductori 1 +program 1 +activitiescomput 1 +undergradu 1 +committe 1 +profession 1 +activitiesassoci 1 +symbol 1 +logic 1 +associ 1 +machineri 1 +institut 1 +electr 1 +electron 1 +engin 1 +women 1 +mathemat 1 +return 1 +annual 1 +report 1 +home 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +denis 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..22aef4c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,350 @@ +faculti 1 +research 1 +interest 1 +brian 1 +smith 1 +bsmith 1 +cornel 1 +xerox 1 +professor 1 +comput 1 +scienc 1 +univers 1 +california 1 +berkelei 1 +offic 1 +upson 1 +hall 1 +phone 1 +hour 1 +semest 1 +tue 1 +thur 1 +intereststeachingselect 1 +publicationsresearch 1 +talksmisc 1 +linksresearch 1 +interestsmi 1 +goal 1 +make 1 +video 1 +first 1 +class 1 +data 1 +type 1 +ourcomput 1 +environ 1 +group 1 +project 1 +zeno 1 +build 1 +technolog 1 +support 1 +storag 1 +commun 1 +andprocess 1 +continu 1 +media 1 +contrast 1 +commercialand 1 +approach 1 +requir 1 +special 1 +hardwar 1 +operatingsystem 1 +network 1 +usabl 1 +aredesign 1 +current 1 +premis 1 +isthat 1 +softwar 1 +infrastructurei 1 +suffici 1 +system 1 +andappl 1 +verifi 1 +hypothesi 1 +workingsystem 1 +direct 1 +toward 1 +zenodistribut 1 +file 1 +server 1 +architectur 1 +us 1 +ofworkst 1 +connect 1 +gener 1 +local 1 +area 1 +anethernet 1 +common 1 +laboratori 1 +workstat 1 +simultan 1 +client 1 +serverof 1 +plai 1 +videostor 1 +fileserv 1 +receiv 1 +store 1 +severalserv 1 +servic 1 +sever 1 +compar 1 +withlarg 1 +central 1 +advantag 1 +design 1 +scalabl 1 +ad 1 +automat 1 +load 1 +balanc 1 +serv 1 +distribut 1 +across 1 +machin 1 +case 1 +locat 1 +differ 1 +initi 1 +invest 1 +util 1 +exist 1 +infrastructur 1 +promot 1 +earli 1 +adopt 1 +almost 1 +center 1 +around 1 +best 1 +effortdeliveri 1 +protocol 1 +built 1 +networkprotocol 1 +mani 1 +effort 1 +needto 1 +reserv 1 +resourc 1 +establish 1 +resourcereserv 1 +well 1 +suit 1 +nation 1 +communicationinfrastructur 1 +user 1 +charg 1 +call 1 +basi 1 +forbandwidth 1 +poorli 1 +networkenviron 1 +share 1 +equal 1 +accessiblebi 1 +appropri 1 +latterenviron 1 +commonli 1 +found 1 +thecommun 1 +develop 1 +cyclic 1 +builton 1 +datagram 1 +transport 1 +audioand 1 +playback 1 +applic 1 +metropolitan 1 +andwid 1 +todeliv 1 +audio 1 +paper 1 +describ 1 +availableonlin 1 +slide 1 +talk 1 +process 1 +fold 1 +aredevelop 1 +algorithm 1 +compressedrepresent 1 +without 1 +decompress 1 +lead 1 +todramat 1 +speed 1 +perform 1 +sinc 1 +remov 1 +thetim 1 +consum 1 +compress 1 +reducesth 1 +amount 1 +must 1 +experi 1 +animplement 1 +idea 1 +jpeg 1 +imag 1 +indicatesthat 1 +order 1 +magnitud 1 +fasterthan 1 +possibl 1 +previou 1 +currentlyextend 1 +parallel 1 +method 1 +transcod 1 +insoftwar 1 +translat 1 +onecompress 1 +format 1 +anoth 1 +oper 1 +heterogen 1 +compresseddomain 1 +avail 1 +onlin 1 +also 1 +explor 1 +simplifyexperiment 1 +programminglanguag 1 +languag 1 +calledrivl 1 +pronounc 1 +rival 1 +allowsvideo 1 +effect 1 +specifi 1 +independ 1 +resolutionand 1 +sourc 1 +materi 1 +whatpostscript 1 +text 1 +graphic 1 +provid 1 +resolutionindepend 1 +thu 1 +sameprogram 1 +qualiti 1 +quicktim 1 +quickli 1 +whileedit 1 +decis 1 +made 1 +high 1 +qualityfinish 1 +product 1 +line 1 +much 1 +postscript 1 +bepreview 1 +sent 1 +dpiprint 1 +camera 1 +readi 1 +copi 1 +review 1 +onvideo 1 +domain 1 +rivl 1 +theme 1 +tool 1 +videous 1 +teachingat 1 +teach 1 +undergradu 1 +cours 1 +computerarchitectur 1 +graduat 1 +multimedia 1 +select 1 +public 1 +jonathan 1 +swartz 1 +resolut 1 +proc 1 +third 1 +intern 1 +confer 1 +francisco 1 +novemb 1 +html 1 +version 1 +asif 1 +ghia 1 +logan 1 +david 1 +chamberlin 1 +queri 1 +hum 1 +larg 1 +music 1 +inform 1 +retriev 1 +databas 1 +peter 1 +lawrenc 1 +row 1 +name 1 +workshop 1 +toronto 1 +ontario 1 +canada 1 +juli 1 +prioriti 1 +driven 1 +fast 1 +motion 1 +second 1 +sanfrancisco 1 +octob 1 +ketan 1 +patel 1 +mpeg 1 +represent 1 +transmiss 1 +spie 1 +symposium 1 +electron 1 +jose 1 +februari 1 +stephen 1 +program 1 +june 1 +decod 1 +internationalconfer 1 +anaheim 1 +august 1 +famili 1 +manipul 1 +ieee 1 +septemb 1 +player 1 +supportfor 1 +digit 1 +diego 1 +recent 1 +webster 1 +site 1 +minnesota 1 +colloquium 1 +misc 1 +link 1 +work 1 +annett 1 +hanna 1 +manual 1 +mmcn 1 +proceed 1 +documentationth 1 +priceweb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..5a8f94c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,279 @@ +robert 1 +constablerobert 1 +constabledepart 1 +chair 1 +professorrc 1 +cornel 1 +eduph 1 +univers 1 +wisconsin 1 +madison 1 +researchw 1 +engag 1 +studi 1 +comput 1 +system 1 +providemechan 1 +assist 1 +problem 1 +solv 1 +especi 1 +programmingand 1 +mathemat 1 +involv 1 +long 1 +term 1 +wai 1 +make 1 +theform 1 +feasibl 1 +us 1 +implementedthre 1 +past 1 +year 1 +nuprl 1 +major 1 +experiment 1 +line 1 +lispprogram 1 +implement 1 +construct 1 +theori 1 +type 1 +sucha 1 +formal 1 +canexpress 1 +wide 1 +varieti 1 +proof 1 +program 1 +build 1 +method 1 +asmetalevel 1 +provid 1 +consider 1 +theoremprov 1 +power 1 +moreov 1 +canevalu 1 +content 1 +theorem 1 +principl 1 +nuprli 1 +fomal 1 +languag 1 +continu 1 +improv 1 +current 1 +version 1 +iscal 1 +differ 1 +predecessor 1 +termeditor 1 +design 1 +stuart 1 +allen 1 +richard 1 +eaton 1 +itsintern 1 +structur 1 +modular 1 +suitabl 1 +hedefinit 1 +logic 1 +beyond 1 +built 1 +inconstruct 1 +also 1 +entir 1 +prove 1 +mechanismha 1 +rebuilt 1 +stream 1 +paul 1 +jackson 1 +thework 1 +dougla 1 +how 1 +contribut 1 +gener 1 +natur 1 +final 1 +refer 1 +isan 1 +intern 1 +descript 1 +builtprincip 1 +william 1 +aitken 1 +develop 1 +link 1 +internaldescript 1 +possibleto 1 +process 1 +three 1 +excit 1 +joint 1 +ventur 1 +withmiriam 1 +leeser 1 +electr 1 +engin 1 +incomput 1 +scienc 1 +davidgri 1 +polya 1 +richardzippel 1 +weyl 1 +withless 1 +hardwar 1 +synthesi 1 +verif 1 +student 1 +mark 1 +aagard 1 +thecorrect 1 +boolean 1 +circuit 1 +minim 1 +packag 1 +compon 1 +bedrocsystem 1 +weak 1 +divis 1 +algorithm 1 +widelyus 1 +efforttaught 1 +great 1 +deal 1 +effect 1 +technolog 1 +inth 1 +hand 1 +expert 1 +user 1 +applic 1 +domain 1 +second 1 +model 1 +aprogram 1 +refin 1 +mechan 1 +david 1 +gri 1 +enabl 1 +write 1 +handbook 1 +themann 1 +devis 1 +programmingprocess 1 +givn 1 +theoret 1 +definit 1 +ofpolya 1 +expect 1 +experi 1 +soon 1 +transform 1 +tryingto 1 +captur 1 +style 1 +want 1 +recent 1 +begun 1 +collabor 1 +hope 1 +relat 1 +thepolya 1 +effort 1 +conal 1 +mannion 1 +explor 1 +possibl 1 +ofus 1 +discussingproblem 1 +zippel 1 +connect 1 +ssymbol 1 +algebra 1 +near 1 +futur 1 +thiswil 1 +scientif 1 +computingsoftwar 1 +togeth 1 +tool 1 +isbuild 1 +profession 1 +activitieseditor 1 +journal 1 +symbol 1 +computationeditor 1 +academ 1 +presseditor 1 +oxford 1 +pressgener 1 +licsprogram 1 +committe 1 +north 1 +american 1 +jumelageprogram 1 +aspect 1 +softwarerefere 1 +review 1 +nserc 1 +canada 1 +scienceunivers 1 +activitieschair 1 +recruit 1 +committeecomput 1 +facil 1 +committeeprovost 1 +mathematicslecturesform 1 +softwar 1 +fundament 1 +betweencomput 1 +inria 1 +anniversari 1 +celebr 1 +pari 1 +franc 1 +decemb 1 +colloquium 1 +bengurion 1 +sheva 1 +israel 1 +januari 1 +state 1 +symposium 1 +aviv 1 +associ 1 +annual 1 +meet 1 +notr 1 +dame 1 +indiana 1 +march 1 +metaprogram 1 +york 1 +buffalo 1 +explan 1 +engineeringworkshop 1 +pennsylvania 1 +philadelphia 1 +publicationsform 1 +tendenc 1 +control 1 +appli 1 +bensoussan 1 +verju 1 +lectur 1 +note 1 +springer 1 +verlag 1 +metalevel 1 +andmathemat 1 +manfr 1 +broi 1 +nato 1 +seri 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..1dbef469 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,31 @@ +adam 1 +florenc 1 +upson 1 +hall 1 +cornel 1 +univers 1 +ithaca 1 +aflorenc 1 +first 1 +year 1 +student 1 +incomput 1 +scienc 1 +atcornel 1 +professionalinterest 1 +includ 1 +simul 1 +numer 1 +analysi 1 +follow 1 +link 1 +find 1 +academicsresearchworkinterest 1 +athlet 1 +last 1 +updat 1 +sept 1 +mail 1 +mewith 1 +comment 1 +correct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..405df4ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,30 @@ +marco 1 +kawazo 1 +aguilera 1 +home 1 +page 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +second 1 +year 1 +student 1 +distribut 1 +system 1 +algorithmsrandom 1 +failur 1 +detect 1 +hybrid 1 +approach 1 +solv 1 +consensusgo 1 +tour 1 +brazil 1 +check 1 +suggest 1 +warn 1 +perman 1 +constructionmarco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..08553b18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,33 @@ +alfr 1 +home 1 +page 1 +hong 1 +get 1 +dizzi 1 +offic 1 +upson 1 +hall 1 +cornel 1 +univers 1 +ithaca 1 +usaoffic 1 +phone 1 +worthwhil 1 +site 1 +check 1 +new 1 +sinanet 1 +taiwan 1 +chines 1 +requir 1 +nandonet 1 +sunworld 1 +javaworldcours 1 +stuff 1 +corba 1 +essenti 1 +annot 1 +bibliographyc 1 +project 1 +reportalfr 1 +ahong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..e13b031a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,58 @@ +vineet 1 +home 1 +pagevineet 1 +ahujam 1 +engg 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +address 1 +hasbrouck 1 +apt 1 +ithaca 1 +mail 1 +ahuja 1 +cornel 1 +academ 1 +student 1 +scienc 1 +univers 1 +main 1 +area 1 +interest 1 +parallel 1 +process 1 +advanc 1 +architectur 1 +oper 1 +system 1 +object 1 +orient 1 +program 1 +coursework 1 +fall 1 +automata 1 +theori 1 +engin 1 +network 1 +spring 1 +high 1 +perform 1 +final 1 +project 1 +report 1 +softwar 1 +design 1 +reportfal 1 +capac 1 +inform 1 +multimedia 1 +resum 1 +postscript 1 +recent 1 +html 1 +page 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..7f4339e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,21 @@ +ching 1 +chinglan 1 +cornel 1 +edumast 1 +engin 1 +student 1 +comput 1 +scienc 1 +depart 1 +beau 1 +pair 1 +address 1 +seneca 1 +ithaca 1 +telephon 1 +page 1 +still 1 +construct 1 +java 1 +examplegraph 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..c7ea752d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,82 @@ +alexei 1 +home 1 +pagealexei 1 +vaysburdalexei 1 +cornel 1 +program 1 +comput 1 +scienc 1 +univers 1 +research 1 +interest 1 +includ 1 +distribut 1 +system 1 +group 1 +commun 1 +larg 1 +scale 1 +wide 1 +area 1 +andobject 1 +orient 1 +tool 1 +work 1 +within 1 +horu 1 +lead 1 +birman 1 +androbbert 1 +reness 1 +paper 1 +implement 1 +replic 1 +state 1 +machin 1 +partition 1 +network 1 +friedman 1 +vaysburd 1 +link 1 +search 1 +engin 1 +lyco 1 +technic 1 +report 1 +broadcast 1 +seri 1 +ecol 1 +polytechniqu 1 +federal 1 +lausann 1 +hebrew 1 +transi 1 +page 1 +cuinfo 1 +gopher 1 +direct 1 +cornellcornel 1 +mail 1 +directorycornel 1 +staff 1 +directori 1 +student 1 +directorycours 1 +class 1 +examscornel 1 +calendarcornel 1 +art 1 +musicbailei 1 +hall 1 +concertscornel 1 +music 1 +event 1 +new 1 +weather 1 +ithaca 1 +current 1 +condit 1 +ithacaworld 1 +brief 1 +odessa 1 +odessaweb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..fbe2c8e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,170 @@ +yama 1 +hell 1 +accord 1 +hindu 1 +mytholog 1 +death 1 +come 1 +downto 1 +earth 1 +give 1 +ride 1 +heaven 1 +though 1 +kind 1 +imparti 1 +better 1 +mess 1 +amithyamasanim 1 +engg 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +yorki 1 +make 1 +monei 1 +microsystem 1 +garcia 1 +avenu 1 +mountain 1 +view 1 +mailstop 1 +ucup 1 +cupertino 1 +javasoft 1 +watch 1 +warburton 1 +santa 1 +clara 1 +mail 1 +amith 1 +yamasani 1 +comi 1 +currentlyemploi 1 +california 1 +workingin 1 +javamedia 1 +groupeducationfal 1 +coursesvlsi 1 +multimedia 1 +cssoftwar 1 +engin 1 +csspring 1 +high 1 +perform 1 +system 1 +cscomput 1 +graphic 1 +csproject 1 +train 1 +simul 1 +final 1 +project 1 +mpeg 1 +descript 1 +parallel 1 +trace 1 +propos 1 +virtual 1 +realiti 1 +railroad 1 +master 1 +involv 1 +railroadsystem 1 +includ 1 +track 1 +switch 1 +station 1 +landscap 1 +user 1 +interact 1 +wall 1 +cave 1 +environ 1 +stereo 1 +vision 1 +done 1 +processor 1 +onyx 1 +softwarei 1 +written 1 +us 1 +openinventord 1 +librari 1 +silicon 1 +gener 1 +dynam 1 +ofth 1 +through 1 +documentimag 1 +compress 1 +chipdevelop 1 +imag 1 +chip 1 +vlsi 1 +cours 1 +basicallycompress 1 +data 1 +stream 1 +algorithm 1 +thisalgorithm 1 +suit 1 +control 1 +extern 1 +cachecam 1 +content 1 +address 1 +memori 1 +store 1 +pattern 1 +inputstream 1 +hope 1 +might 1 +occur 1 +capableof 1 +rate 1 +byte 1 +nano 1 +second 1 +nowai 1 +softwar 1 +routin 1 +speed 1 +snap 1 +video 1 +transit 1 +rivlproposalpresentationand 1 +sampl 1 +pyramania 1 +dthi 1 +game 1 +develop 1 +itswritten 1 +interfac 1 +xlib 1 +thed 1 +render 1 +take 1 +look 1 +snapshot 1 +screen 1 +parallelomania 1 +resumehtmlpostscript 1 +past 1 +present 1 +futur 1 +pal 1 +home 1 +page 1 +satyaprasad 1 +avinashgupta 1 +kartikh 1 +kapadia 1 +hrishikeshdixit 1 +joselui 1 +fernandez 1 +vineetahuja 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..fe4f5122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,89 @@ +ankit 1 +patel 1 +homepag 1 +east 1 +state 1 +street 1 +ithaca 1 +apatel 1 +cornel 1 +person 1 +photo 1 +galleria 1 +chronologia 1 +curriculum 1 +vita 1 +universityresumedepart 1 +comput 1 +scienc 1 +cours 1 +enrolledgradu 1 +research 1 +assist 1 +prof 1 +brian 1 +smith 1 +multimedia 1 +group 1 +project 1 +zeno 1 +canvasd 1 +toolkit 1 +interact 1 +platform 1 +independ 1 +develop 1 +high 1 +perform 1 +graphic 1 +virtual 1 +realiti 1 +applic 1 +video 1 +conferencingmultimedia 1 +system 1 +read 1 +assignmentsproject 1 +kernel 1 +endpoint 1 +netan 1 +annot 1 +bibliographi 1 +common 1 +object 1 +request 1 +broker 1 +architectur 1 +corba 1 +critiqu 1 +understand 1 +limit 1 +causal 1 +total 1 +order 1 +commun 1 +david 1 +cheriton 1 +dale 1 +skeen 1 +carnegi 1 +mellon 1 +summer 1 +school 1 +scienceworld 1 +wide 1 +technolog 1 +spring 1 +link 1 +real 1 +time 1 +support 1 +multimediamaharaja 1 +sayajirao 1 +univers 1 +academicsfriend 1 +techoreli 1 +industri 1 +limitedjob 1 +profilelif 1 +relianc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..fd6b3e07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,15 @@ +ashish 1 +jhaveriashish 1 +jhaveridepart 1 +comput 1 +sciencemast 1 +engineeringresumehtmlpost 1 +scriptcourseworkadvanc 1 +databas 1 +systemscsmultimedia 1 +systemscsengin 1 +networkscsprogram 1 +languag 1 +softwareengin 1 +csashish 1 +jhaveri 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..a6991c57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,39 @@ +aswin 1 +berg 1 +graduat 1 +student 1 +cornel 1 +eduoffic 1 +upson 1 +hall 1 +phone 1 +home 1 +skyacr 1 +drive 1 +ithaca 1 +interest 1 +program 1 +languag 1 +transform 1 +systemmi 1 +person 1 +life 1 +famili 1 +album 1 +annek 1 +page 1 +swing 1 +danc 1 +server 1 +jean 1 +deejay 1 +guid 1 +isdn 1 +record 1 +hop 1 +pictur 1 +nederlands 1 +club 1 +dutch 1 +clubi 1 +atcornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..9da0527c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,122 @@ +welcomeavinash 1 +guptam 1 +engg 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +home 1 +address 1 +magazin 1 +streetcambridg 1 +mail 1 +avinash 1 +thecia 1 +resumemi 1 +main 1 +area 1 +interest 1 +graphic 1 +multimedia 1 +distribut 1 +system 1 +cours 1 +oper 1 +softwar 1 +engin 1 +high 1 +perform 1 +project 1 +hoca 1 +chip 1 +pronounc 1 +hodja 1 +full 1 +fledg 1 +cornel 1 +hypothet 1 +instruct 1 +processor 1 +support 1 +featur 1 +like 1 +multipl 1 +user 1 +multitask 1 +virtual 1 +memori 1 +scene 1 +transit 1 +effect 1 +rivl 1 +presentationpent 1 +network 1 +game 1 +skillpent 1 +mean 1 +five 1 +board 1 +skill 1 +requir 1 +player 1 +object 1 +point 1 +piec 1 +horizont 1 +vertic 1 +diagon 1 +earn 1 +trap 1 +oppon 1 +even 1 +number 1 +either 1 +pair 1 +reach 1 +first 1 +win 1 +window 1 +screenshot 1 +gamedownload 1 +ipng 1 +space 1 +implement 1 +next 1 +gener 1 +ipvimpl 1 +internet 1 +protocol 1 +us 1 +interfac 1 +make 1 +abl 1 +stream 1 +proposalprogress 1 +reportsam 1 +caveat 1 +appli 1 +page 1 +almost 1 +everi 1 +pageon 1 +still 1 +construct 1 +brows 1 +internethytelnetth 1 +librari 1 +subject 1 +catalogeinet 1 +galaxyplanet 1 +earth 1 +pagejoel 1 +hierarch 1 +indexyahoo 1 +guid 1 +wwwwebcrawlerlycosmi 1 +friend 1 +link 1 +sign 1 +guestbook 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..19be4d9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,257 @@ +jonathan 1 +barber 1 +fleshpooooooooooooooch 1 +upson 1 +hall 1 +cours 1 +research 1 +interest 1 +futur 1 +cool 1 +dude 1 +page 1 +felt 1 +inclin 1 +chang 1 +first 1 +sentenc 1 +birth 1 +name 1 +peopl 1 +usual 1 +callm 1 +also 1 +call 1 +ponch 1 +orpooch 1 +us 1 +lake 1 +fork 1 +guppi 1 +skin 1 +mama 1 +phin 1 +list 1 +goe 1 +finish 1 +degre 1 +graduat 1 +student 1 +master 1 +engin 1 +program 1 +computersci 1 +cornellunivers 1 +undergrad 1 +cornel 1 +comput 1 +scienc 1 +well 1 +grew 1 +coupl 1 +town 1 +jersei 1 +attendedmontgomeri 1 +high 1 +school 1 +current 1 +live 1 +collegetown 1 +adjac 1 +campu 1 +cheap 1 +summer 1 +sublet 1 +oncolleg 1 +ultra 1 +conveni 1 +sinc 1 +work 1 +plai 1 +half 1 +mile 1 +radiu 1 +went 1 +greek 1 +spent 1 +time 1 +epsilon 1 +fratern 1 +border 1 +cayuga 1 +height 1 +kind 1 +hard 1 +believ 1 +dai 1 +hand 1 +project 1 +prof 1 +thorsten 1 +eicken 1 +develop 1 +packet 1 +filter 1 +thefilt 1 +bandwidth 1 +network 1 +protocol 1 +bring 1 +parallel 1 +home 1 +person 1 +taught 1 +week 1 +session 1 +ofc 1 +fundament 1 +concept 1 +introductori 1 +past 1 +year 1 +teach 1 +assist 1 +forc 1 +introduct 1 +digit 1 +system 1 +organ 1 +fall 1 +spring 1 +intereststhi 1 +semest 1 +rivl 1 +resolut 1 +independ 1 +video 1 +languag 1 +product 1 +multimedia 1 +group 1 +final 1 +labor 1 +report 1 +implement 1 +continu 1 +media 1 +rivlan 1 +improv 1 +object 1 +tracker 1 +rivli 1 +taken 1 +perform 1 +compil 1 +good 1 +friend 1 +sugata 1 +mukhopadhyai 1 +still 1 +class 1 +smpd 1 +code 1 +generatorfor 1 +fortran 1 +base 1 +linear 1 +algebra 1 +framework 1 +paper 1 +come 1 +soon 1 +previou 1 +greg 1 +weber 1 +webar 1 +encrypt 1 +smart 1 +frame 1 +depend 1 +stream 1 +mpeg 1 +unfortun 1 +electron 1 +form 1 +trust 1 +look 1 +bright 1 +pack 1 +gear 1 +take 1 +perman 1 +road 1 +trip 1 +oregon 1 +hopefulli 1 +buddi 1 +surfer 1 +inform 1 +like 1 +share 1 +pleas 1 +write 1 +intel 1 +corp 1 +ahead 1 +pictur 1 +portland 1 +clarif 1 +actual 1 +locat 1 +hous 1 +thought 1 +would 1 +inde 1 +pretti 1 +resourceful 1 +camera 1 +room 1 +window 1 +pipe 1 +meanth 1 +gener 1 +favorit 1 +line 1 +comrad 1 +georg 1 +bush 1 +realli 1 +nice 1 +keep 1 +ofhi 1 +found 1 +lucki 1 +consid 1 +least 1 +know 1 +decid 1 +otherwis 1 +captain 1 +swirl 1 +nefari 1 +toilet 1 +mukhopadyai 1 +bonei 1 +magoo 1 +fletop 1 +bigro 1 +dave 1 +koster 1 +bot 1 +offici 1 +tffl 1 +bulli 1 +pageuuencod 1 +graphic 1 +need 1 +pagetar 1 +zip 1 +pageif 1 +brows 1 +feel 1 +urg 1 +send 1 +mail 1 +downloadsgraphicsbarb 1 +gifponch 1 +htmlres_htmlres_curemmittemmitt 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..6b7097e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,202 @@ +anindya 1 +homepag 1 +realli 1 +look 1 +like 1 +click 1 +photograph 1 +recent 1 +mugshot 1 +locat 1 +work 1 +home 1 +upson 1 +hall 1 +delawar 1 +avenu 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +phone 1 +offic 1 +mail 1 +basu 1 +eduwhat 1 +graduat 1 +student 1 +scienceat 1 +hope 1 +complet 1 +turn 1 +centuri 1 +listen 1 +rock 1 +musicor 1 +goof 1 +thecornel 1 +cluster 1 +projectwith 1 +advisor 1 +thorsten 1 +eicken 1 +think 1 +coollik 1 +indiawho 1 +went 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +hardpink 1 +floydfanat 1 +final 1 +realiz 1 +childhood 1 +dream 1 +see 1 +perform 1 +livelast 1 +summer 1 +philadelphia 1 +love 1 +plum 1 +known 1 +thegreat 1 +unwash 1 +pelham 1 +grenvil 1 +wodehous 1 +also 1 +attendedwoodstock 1 +truli 1 +motiv 1 +onlinewoodstock 1 +review 1 +woodstock 1 +experienc 1 +metallica 1 +live 1 +first 1 +time 1 +would 1 +thesocc 1 +worldcup 1 +last 1 +year 1 +butunfortun 1 +happen 1 +cook 1 +tri 1 +putsomerecip 1 +onlin 1 +fanci 1 +connoisseurof 1 +good 1 +whiski 1 +especi 1 +singl 1 +malt 1 +link 1 +cool 1 +stuff 1 +calvin 1 +hobb 1 +archiv 1 +south 1 +asian 1 +writer 1 +cheer 1 +monti 1 +python 1 +beavi 1 +buttheadoth 1 +us 1 +cuinfo 1 +gopherand 1 +someth 1 +differ 1 +projectwhich 1 +aim 1 +provid 1 +high 1 +commun 1 +layer 1 +ofworkst 1 +parallel 1 +program 1 +compar 1 +state 1 +mpp 1 +acheiv 1 +develop 1 +fast 1 +messag 1 +passinglay 1 +call 1 +implement 1 +activ 1 +split 1 +thatshow 1 +run 1 +meiko 1 +addit 1 +show 1 +latenc 1 +saturateth 1 +fibr 1 +segment 1 +size 1 +collabor 1 +withth 1 +berkelei 1 +project 1 +team 1 +specificationfor 1 +enabl 1 +processesboth 1 +trust 1 +untrust 1 +environ 1 +public 1 +mechan 1 +integr 1 +david 1 +culler 1 +seth 1 +goldstein 1 +klau 1 +schauser 1 +proceed 1 +symp 1 +architectur 1 +gold 1 +coast 1 +australia 1 +network 1 +veena 1 +avula 1 +vineet 1 +buch 1 +present 1 +interconnect 1 +palo 1 +alto 1 +abridgedvers 1 +paper 1 +appear 1 +ieee 1 +micro 1 +februari 1 +user 1 +level 1 +interfac 1 +distribut 1 +werner 1 +vogel 1 +sosp 1 +back 1 +homepagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..cada2b3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,105 @@ +simpl 1 +homepageben 1 +haogradu 1 +studentbhao 1 +cornel 1 +educornel 1 +universitydept 1 +comput 1 +scienc 1 +upson 1 +hallithaca 1 +insid 1 +flea 1 +forth 1 +year 1 +student 1 +incomput 1 +work 1 +code 1 +synthesi 1 +massiv 1 +parallel 1 +processor 1 +advisor 1 +richard 1 +zippel 1 +first 1 +heard 1 +georg 1 +taylorwhen 1 +microsystem 1 +famili 1 +francisco 1 +area 1 +find 1 +inform 1 +aboutth 1 +read 1 +local 1 +newspap 1 +forget 1 +check 1 +itsgorg 1 +weather 1 +life 1 +cornellwhat 1 +ithaca 1 +movi 1 +studi 1 +librari 1 +dept 1 +home 1 +page 1 +gener 1 +neat 1 +stuff 1 +stock 1 +map 1 +interact 1 +frog 1 +dissectionmagazin 1 +intertext 1 +magazin 1 +wire 1 +magazinea 1 +peek 1 +galaxi 1 +view 1 +solar 1 +system 1 +shoemak 1 +levi 1 +music 1 +weblouvr 1 +xmorphia 1 +galleri 1 +line 1 +geometri 1 +kaleidospac 1 +jpop 1 +overview 1 +bonsai 1 +seiyuu 1 +internet 1 +underground 1 +archivenetwork 1 +navig 1 +global 1 +network 1 +wander 1 +spider 1 +edgelibrari 1 +congress 1 +martial 1 +scientif 1 +info 1 +tutori 1 +infonih 1 +courseth 1 +intern 1 +guidecern 1 +seminar 1 +last 1 +modifi 1 +bhao 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..45ce2df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,23 @@ +aastha 1 +home 1 +pageaastha 1 +bhardwajdepart 1 +comput 1 +sciencemast 1 +ofengineeeringresumehtmlpost 1 +scriptcourseworkadvanceddatabas 1 +system 1 +csmultimediasystem 1 +csengineeringcomput 1 +network 1 +cssoftwareengin 1 +program 1 +languag 1 +cscontact 1 +inform 1 +hasbrouck 1 +apart 1 +ithaca 1 +york 1 +bhardwaj 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..94c79ae7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,283 @@ +bruce 1 +randal 1 +donaldbruc 1 +donaldassoci 1 +professorbrd 1 +cornel 1 +eduph 1 +weather 1 +palo 1 +alto 1 +offici 1 +department 1 +home 1 +page 1 +robot 1 +vision 1 +laboratorydan 1 +huttenloch 1 +found 1 +thecornel 1 +laboratori 1 +researchmi 1 +interest 1 +includ 1 +professor 1 +noel 1 +macdonald 1 +build 1 +massiv 1 +parallel 1 +arrai 1 +microactu 1 +nation 1 +nanofabr 1 +arrayi 1 +scream 1 +chip 1 +contain 1 +actuat 1 +squarecentemet 1 +orient 1 +small 1 +part 1 +without 1 +sensoryfeedback 1 +microfabr 1 +could 1 +us 1 +toconstruct 1 +programm 1 +feeder 1 +scale 1 +buildself 1 +propel 1 +walk 1 +vlsi 1 +graduat 1 +student 1 +amybrigg 1 +work 1 +group 1 +develop 1 +sensor 1 +plan 1 +surveil 1 +system 1 +team 1 +ofmobil 1 +board 1 +detect 1 +andintercept 1 +target 1 +demo 1 +micro 1 +fabric 1 +mpeg 1 +video 1 +tommi 1 +chase 1 +lili 1 +mobil 1 +built 1 +algorithm 1 +developedbi 1 +ourlab 1 +track 1 +follow 1 +visual 1 +informationalon 1 +show 1 +view 1 +face 1 +morph 1 +select 1 +recent 1 +public 1 +bhringer 1 +donald 1 +upper 1 +andlow 1 +bound 1 +vector 1 +field 1 +applic 1 +memsand 1 +vibratori 1 +intern 1 +workshop 1 +thealgorithm 1 +foundat 1 +toulous 1 +franc 1 +brigg 1 +robustgeometr 1 +internationalworkshop 1 +singl 1 +crystal 1 +silicon 1 +manipul 1 +task 1 +ieee 1 +electro 1 +mechan 1 +mem 1 +diego 1 +california 1 +februari 1 +classif 1 +lower 1 +cannot 1 +confer 1 +autom 1 +icra 1 +minneapoli 1 +minnesota 1 +april 1 +andimprov 1 +partsfeed 1 +partii 1 +provablygood 1 +approxim 1 +optim 1 +kinodynam 1 +robotswith 1 +decoupl 1 +dynam 1 +xavier 1 +algorithmica 1 +forcartesian 1 +open 1 +chain 1 +motion 1 +canni 1 +reif 1 +journal 1 +inform 1 +invari 1 +distribut 1 +jen 1 +research 1 +inpress 1 +minim 1 +supermodular 1 +experiment 1 +andtheoret 1 +artifici 1 +intellig 1 +jetai 1 +press 1 +write 1 +book 1 +entitl 1 +draft 1 +firstquart 1 +appear 1 +paper 1 +artificialintellig 1 +revis 1 +base 1 +experi 1 +inminim 1 +symposium 1 +iser 1 +stanford 1 +move 1 +furnitur 1 +automon 1 +proc 1 +societi 1 +ofjapan 1 +iro 1 +pittsburgh 1 +sensorlessmanipul 1 +actuatorarrai 1 +mihailovich 1 +andautom 1 +detail 1 +explan 1 +program 1 +scheme 1 +ree 1 +automationnic 1 +first 1 +ofrobot 1 +peter 1 +boston 1 +wilson 1 +andj 1 +latomb 1 +automat 1 +configur 1 +direct 1 +proceed 1 +otherpubl 1 +these 1 +post 1 +doc 1 +train 1 +daniela 1 +jonathan 1 +dinesh 1 +aval 1 +server 1 +list 1 +version 1 +onlin 1 +tech 1 +report 1 +librari 1 +catalogc 1 +indexobtain 1 +copi 1 +paperscopi 1 +avail 1 +anonym 1 +pictur 1 +teamof 1 +autonom 1 +movefurnitur 1 +around 1 +portrait 1 +click 1 +hereto 1 +mobot 1 +push 1 +couch 1 +rotat 1 +apictur 1 +drawn 1 +loretta 1 +pompilio 1 +peopl 1 +discoverychannel 1 +beyond 1 +find 1 +funa 1 +poem 1 +alfr 1 +mail 1 +agent 1 +famili 1 +plai 1 +harm 1 +swallow 1 +ithaca 1 +sometim 1 +moreoth 1 +depart 1 +herefor 1 +search 1 +tool 1 +access 1 +stuff 1 +return 1 +level 1 +clickher 1 +tallest 1 +darkest 1 +lead 1 +hollywood 1 +merian 1 +cooper 1 +wrai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..637f3669 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,242 @@ +clair 1 +cardieclair 1 +cardi 1 +assist 1 +professor 1 +upson 1 +hallphon 1 +email 1 +cornel 1 +educlick 1 +research 1 +interestscours 1 +teachselect 1 +publicationsnlp 1 +link 1 +entri 1 +depart 1 +annual 1 +report 1 +amalgam 1 +softbal 1 +stat 1 +finger 1 +lake 1 +tandem 1 +tour 1 +westi 1 +interestsalthough 1 +span 1 +number 1 +subfield 1 +within 1 +artifici 1 +intellig 1 +includ 1 +machin 1 +learn 1 +case 1 +base 1 +reason 1 +cognitivemodel 1 +focu 1 +area 1 +naturallanguag 1 +understand 1 +group 1 +primarili 1 +interest 1 +investig 1 +machinelearn 1 +techniqu 1 +tool 1 +guid 1 +natur 1 +languag 1 +system 1 +develop 1 +forexplor 1 +mechan 1 +underli 1 +acquisit 1 +work 1 +focus 1 +tworel 1 +design 1 +user 1 +train 1 +effici 1 +reliablyextract 1 +import 1 +inform 1 +document 1 +extract 1 +part 1 +cstr 1 +project 1 +us 1 +tosupport 1 +content 1 +brows 1 +technic 1 +text 1 +kenmor 1 +autom 1 +knowledgeacquisit 1 +task 1 +compris 1 +build 1 +gener 1 +kenmoreacquir 1 +linguist 1 +knowledg 1 +combin 1 +symbol 1 +learningtechniqu 1 +robust 1 +sentenc 1 +analysi 1 +corpora 1 +tworeal 1 +world 1 +domain 1 +perform 1 +speech 1 +tag 1 +semant 1 +featur 1 +andconcept 1 +activ 1 +find 1 +anteced 1 +rel 1 +pronoun 1 +current 1 +extend 1 +handl 1 +larger 1 +addit 1 +disambiguationtask 1 +evalu 1 +learningcompon 1 +context 1 +applic 1 +isembed 1 +goal 1 +determin 1 +condit 1 +expect 1 +offer 1 +cost 1 +effect 1 +approach 1 +teachingc 1 +spring 1 +foundat 1 +practicum 1 +inartifici 1 +fall 1 +seminar 1 +understandingselect 1 +publicationsautom 1 +select 1 +proceed 1 +conferenceon 1 +empir 1 +method 1 +process 1 +univers 1 +pennsylvania 1 +embed 1 +agener 1 +framework 1 +wermter 1 +riloff 1 +scheler 1 +gabriel 1 +connectionist 1 +statist 1 +andsymbol 1 +lectur 1 +note 1 +springer 1 +origin 1 +present 1 +workshop 1 +tolearn 1 +intern 1 +jointconfer 1 +ijcai 1 +aaai 1 +press 1 +chapter 1 +introduct 1 +thesi 1 +specif 1 +conceptu 1 +massachusett 1 +amherst 1 +file 1 +contain 1 +introductori 1 +conceptualsent 1 +avail 1 +cmpsci 1 +page 1 +eleventh 1 +nation 1 +confer 1 +washington 1 +decis 1 +tree 1 +improv 1 +tenth 1 +morgan 1 +kaufmann 1 +corpu 1 +disambigu 1 +heurist 1 +associ 1 +comput 1 +newark 1 +jose 1 +cognit 1 +bias 1 +fourteenth 1 +scienc 1 +societi 1 +bloomington 1 +lawrenc 1 +erlbaumassoci 1 +onconstrain 1 +prior 1 +plausibl 1 +complic 1 +syntax 1 +lehnert 1 +ninth 1 +anaheim 1 +analyz 1 +paper 1 +citat 1 +twelfth 1 +cambridg 1 +linkscomput 1 +linguistics 1 +print 1 +archiv 1 +databas 1 +recent 1 +aclspeci 1 +learningmachin 1 +digestmachinelearn 1 +resourc 1 +researchersmachin 1 +home 1 +penn 1 +treebank 1 +repositori 1 +pointer 1 +code 1 +variou 1 +compon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..696832e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,88 @@ +paul 1 +chew 1 +senior 1 +research 1 +associatephd 1 +purdu 1 +univers 1 +cornel 1 +eduappletsy 1 +need 1 +java 1 +compat 1 +beta 1 +version 1 +browser 1 +asnetscap 1 +make 1 +work 1 +voronoi 1 +delaunai 1 +applet 1 +creat 1 +avoronoi 1 +diagram 1 +triangul 1 +click 1 +point 1 +agenda 1 +primari 1 +interest 1 +geometr 1 +algorithm 1 +emphasi 1 +onpract 1 +applic 1 +practic 1 +includedplac 1 +motion 1 +plan 1 +shape 1 +comparison 1 +vision 1 +sens 1 +mesh 1 +gener 1 +exampl 1 +issu 1 +thataris 1 +part 1 +problem 1 +automat 1 +scientificsoftwar 1 +goal 1 +rais 1 +level 1 +softwar 1 +isspecifi 1 +develop 1 +environ 1 +scientif 1 +canb 1 +us 1 +natur 1 +high 1 +mathemat 1 +concept 1 +ofphys 1 +engin 1 +thu 1 +program 1 +specifi 1 +implicitli 1 +acollect 1 +equat 1 +symbol 1 +techniquesar 1 +transform 1 +express 1 +effectiveprogram 1 +myonlin 1 +tech 1 +reportscornel 1 +depart 1 +computerscienceth 1 +simlabprojectaddress 1 +rhode 1 +hall 1 +ithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..8a92a544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,76 @@ +chao 1 +chang 1 +home 1 +page 1 +chichao 1 +cornel 1 +edui 1 +student 1 +thedepart 1 +comput 1 +scienceat 1 +univers 1 +faculti 1 +advisor 1 +isthorsten 1 +eicken 1 +summer 1 +microsoft 1 +network 1 +group 1 +click 1 +addressesand 1 +phone 1 +number 1 +research 1 +interest 1 +interact 1 +compil 1 +runtim 1 +andoper 1 +system 1 +toward 1 +effici 1 +concurr 1 +program 1 +overheterogen 1 +tham 1 +composit 1 +activ 1 +messageslow 1 +latenc 1 +commun 1 +risc 1 +multimatlab 1 +matlab 1 +multipl 1 +processor 1 +design 1 +perform 1 +messag 1 +soccer 1 +anyon 1 +sport 1 +server 1 +latest 1 +newsbraziliansocc 1 +portugues 1 +world 1 +result 1 +andhomepagesoliv 1 +guid 1 +cool 1 +stuff 1 +lubrasa 1 +luso 1 +brazilian 1 +associationu 1 +chess 1 +centerjorn 1 +brasilmi 1 +carstockmasterjayhawk 1 +basketballwww 1 +tennisserverback 1 +scienc 1 +homepagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..42f77649 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,58 @@ +chee 1 +keong 1 +liau 1 +liauwelcom 1 +home 1 +page 1 +graduat 1 +student 1 +master 1 +engin 1 +programm 1 +apolog 1 +forth 1 +construct 1 +work 1 +hopefulli 1 +thing 1 +improv 1 +soon 1 +school 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +ithaca 1 +fall 1 +class 1 +foundat 1 +artifici 1 +intelligencec 1 +program 1 +languag 1 +softwar 1 +engineeringc 1 +networksc 1 +advanc 1 +databas 1 +systemsbaccalaur 1 +cours 1 +inform 1 +tokyo 1 +institut 1 +technolog 1 +japanhomei 1 +come 1 +small 1 +countri 1 +call 1 +singapor 1 +know 1 +find 1 +avail 1 +homepag 1 +mapl 1 +avenu 1 +edulast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..78ed9a75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,83 @@ +thoma 1 +colemanthoma 1 +colemancornel 1 +universityi 1 +professcp 1 +match 1 +http 1 +cornel 1 +comput 1 +scienc 1 +depart 1 +center 1 +appli 1 +mathemat 1 +also 1 +strong 1 +affili 1 +theori 1 +director 1 +research 1 +applic 1 +group 1 +advanc 1 +institut 1 +final 1 +member 1 +optim 1 +project 1 +ccop 1 +faculti 1 +interest 1 +broadfield 1 +discret 1 +continu 1 +programmi 1 +program 1 +concern 1 +design 1 +understand 1 +practic 1 +effici 1 +numer 1 +algorithm 1 +problem 1 +primari 1 +develop 1 +computationalmethod 1 +tool 1 +larg 1 +scale 1 +automat 1 +differenti 1 +imag 1 +reconstruct 1 +biomed 1 +parallel 1 +linear 1 +minim 1 +inequ 1 +nonlinear 1 +equal 1 +constraint 1 +student 1 +postdoc 1 +profession 1 +activ 1 +recent 1 +paper 1 +book 1 +current 1 +former 1 +associ 1 +softwar 1 +link 1 +curriculum 1 +vita 1 +best 1 +coleman 1 +rhode 1 +hall 1 +univers 1 +ithaca 1 +york 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..65edf1f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,166 @@ +karl 1 +crari 1 +home 1 +pagekarl 1 +crarycrari 1 +cornel 1 +eduoffic 1 +address 1 +upson 1 +halloffic 1 +phone 1 +researchbroadli 1 +speak 1 +primari 1 +research 1 +interest 1 +type 1 +theori 1 +design 1 +implementationand 1 +semant 1 +program 1 +languag 1 +recent 1 +emphasi 1 +area 1 +subtyp 1 +object 1 +orient 1 +modular 1 +implement 1 +practic 1 +kmlwhich 1 +combin 1 +featur 1 +function 1 +formul 1 +atyp 1 +theoret 1 +view 1 +tractabl 1 +approxim 1 +intract 1 +richworld 1 +foundat 1 +whole 1 +mathemat 1 +perform 1 +newprogram 1 +develop 1 +often 1 +result 1 +map 1 +construct 1 +anapproxim 1 +set 1 +convers 1 +paradigm 1 +programminglanguag 1 +well 1 +understood 1 +aminterest 1 +deepen 1 +understand 1 +relationship 1 +particularli 1 +issu 1 +mitig 1 +also 1 +model 1 +compil 1 +seri 1 +translat 1 +lower 1 +intermedi 1 +calculi 1 +calculu 1 +embed 1 +andth 1 +correspond 1 +interpret 1 +invari 1 +modelallow 1 +relat 1 +stage 1 +origin 1 +allowsth 1 +standard 1 +techniqu 1 +optim 1 +guarante 1 +safeti 1 +andcorrect 1 +care 1 +make 1 +possibl 1 +additionaloptim 1 +unavail 1 +strategi 1 +work 1 +form 1 +part 1 +thenuprl 1 +project 1 +hereat 1 +name 1 +come 1 +nuprl 1 +system 1 +formal 1 +logic 1 +base 1 +martin 1 +automatedreason 1 +committe 1 +consist 1 +ofrobert 1 +constabl 1 +greg 1 +morrisett 1 +dexter 1 +kozen 1 +close 1 +jasonhickei 1 +select 1 +papersoth 1 +linksmark 1 +leon 1 +maintain 1 +collect 1 +ofprogram 1 +resourc 1 +cansearch 1 +comput 1 +scienc 1 +technic 1 +report 1 +onlin 1 +grad 1 +life 1 +biblestudi 1 +pageth 1 +lurker 1 +guid 1 +babylon 1 +command 1 +import 1 +answer 1 +jesu 1 +hear 1 +israel 1 +thelord 1 +lord 1 +love 1 +heart 1 +andwith 1 +soul 1 +mind 1 +strength 1 +thesecond 1 +neighbor 1 +commandmentgreat 1 +mark 1 +univers 1 +pagedepart 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..ccdf866f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,130 @@ +chunguang 1 +sunchunguang 1 +sunphd 1 +pennsylvania 1 +state 1 +univers 1 +welcom 1 +home 1 +page 1 +research 1 +associ 1 +advanc 1 +comput 1 +institut 1 +cornel 1 +theori 1 +center 1 +also 1 +affili 1 +thecornel 1 +optim 1 +project 1 +work 1 +close 1 +professorthoma 1 +coleman 1 +interest 1 +parallel 1 +scientif 1 +spars 1 +matrix 1 +algorithm 1 +numer 1 +linear 1 +algebra 1 +mathemat 1 +softwar 1 +current 1 +ppcx 1 +program 1 +solut 1 +rank 1 +defici 1 +least 1 +squar 1 +problem 1 +bound 1 +packag 1 +pssl 1 +psspd 1 +symmetr 1 +posit 1 +definit 1 +systemsrec 1 +lecturesparallel 1 +contain 1 +dens 1 +row 1 +second 1 +siam 1 +confer 1 +matric 1 +coeur 1 +alen 1 +idaho 1 +octob 1 +multifront 1 +ondistribut 1 +memori 1 +multiprocessor 1 +seventh 1 +parallelprocess 1 +francisco 1 +februari 1 +select 1 +public 1 +orthogon 1 +factor 1 +distribut 1 +journal 1 +deal 1 +solutionof 1 +technic 1 +report 1 +ctctr 1 +decemb 1 +cornellunivers 1 +proceed 1 +conferenceon 1 +process 1 +bailei 1 +bjorstad 1 +gilbert 1 +mascagni 1 +schreiber 1 +simon 1 +torczon 1 +watson 1 +philadelphia 1 +map 1 +choleskyfactor 1 +pothen 1 +septemb 1 +larg 1 +matriceson 1 +sixth 1 +processingfor 1 +sinovec 1 +key 1 +leuz 1 +petzold 1 +reed 1 +us 1 +cliqu 1 +tree 1 +fifth 1 +dongarra 1 +kennedi 1 +messina 1 +sorensen 1 +voigt 1 +compact 1 +data 1 +structuresin 1 +scale 1 +univeristi 1 +ithaca 1 +mail 1 +csun 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..4eae52f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,182 @@ +david 1 +cooper 1 +home 1 +page 1 +postdoctor 1 +associ 1 +upson 1 +hall 1 +phone 1 +email 1 +dcooper 1 +cornel 1 +current 1 +research 1 +involv 1 +design 1 +implement 1 +securityarchitectur 1 +horu 1 +goal 1 +work 1 +provid 1 +layer 1 +horuswhich 1 +interact 1 +kerberosnetwork 1 +authent 1 +servic 1 +cryptograph 1 +tool 1 +order 1 +toprovid 1 +privaci 1 +process 1 +group 1 +set 1 +origin 1 +secur 1 +architectur 1 +mike 1 +reiter 1 +fault 1 +toler 1 +system 1 +originalimplement 1 +support 1 +virtual 1 +synchroni 1 +model 1 +ofcomput 1 +maintain 1 +crash 1 +failuremodel 1 +us 1 +necessari 1 +within 1 +honest 1 +result 1 +make 1 +assumpt 1 +anyprocess 1 +allow 1 +join 1 +trust 1 +member 1 +version 1 +isposs 1 +whose 1 +semant 1 +weaker 1 +ofvirtu 1 +desir 1 +permit 1 +untrustedprocess 1 +exampl 1 +might 1 +untrust 1 +clientsto 1 +client 1 +server 1 +would 1 +communicatewith 1 +accept 1 +limit 1 +command 1 +fromth 1 +respons 1 +screen 1 +messag 1 +horussecur 1 +arbitrari 1 +relationshipsamong 1 +accomplish 1 +keymanag 1 +scheme 1 +impersonateanoth 1 +trivial 1 +achieveth 1 +howev 1 +witha 1 +slightli 1 +higher 1 +overhead 1 +unlik 1 +enabl 1 +asclient 1 +mani 1 +complic 1 +thesi 1 +propos 1 +solut 1 +problem 1 +inherentin 1 +mobil 1 +network 1 +static 1 +basic 1 +type 1 +ofinform 1 +user 1 +wish 1 +keep 1 +privat 1 +first 1 +contentsof 1 +send 1 +inform 1 +hiddenwith 1 +proper 1 +encrypt 1 +also 1 +prevent 1 +outsidersfrom 1 +determin 1 +commun 1 +maintainingth 1 +unlink 1 +sender 1 +recipi 1 +chaum 1 +februari 1 +sinc 1 +severaloth 1 +made 1 +improv 1 +addit 1 +staticnetwork 1 +locat 1 +carri 1 +mobilecommun 1 +devic 1 +gener 1 +themessag 1 +receiv 1 +reveal 1 +informationabout 1 +owner 1 +develop 1 +along 1 +advisorken 1 +birman 1 +protocol 1 +attack 1 +internaland 1 +extern 1 +adversari 1 +public 1 +kenneth 1 +preserv 1 +ofmobil 1 +comput 1 +proceed 1 +ieee 1 +symposium 1 +securityand 1 +apriv 1 +wireless 1 +anthoni 1 +mobilecomput 1 +dissert 1 +univers 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..face4679 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,131 @@ +cyber 1 +abodedan 1 +abodegreet 1 +travel 1 +digit 1 +space 1 +welcom 1 +mine 1 +humbl 1 +home 1 +prithe 1 +gentl 1 +surf 1 +shore 1 +instead 1 +blink 1 +teari 1 +ey 1 +rest 1 +weari 1 +kei 1 +mice 1 +born 1 +hand 1 +make 1 +thyselv 1 +abod 1 +brief 1 +autobiographi 1 +resum 1 +project 1 +deidr 1 +model 1 +facial 1 +express 1 +univers 1 +emot 1 +simnet 1 +builder 1 +type 1 +game 1 +faiththei 1 +hardli 1 +faith 1 +prize 1 +ferro 1 +concret 1 +sai 1 +wise 1 +scorn 1 +bend 1 +ear 1 +lawyer 1 +tone 1 +scientist 1 +word 1 +need 1 +unseen 1 +unheard 1 +untouch 1 +silenc 1 +night 1 +dread 1 +unknown 1 +question 1 +uncertain 1 +yearn 1 +true 1 +direct 1 +field 1 +lordlovewarm 1 +friendship 1 +mindless 1 +infatu 1 +sensual 1 +romanc 1 +burn 1 +passion 1 +love 1 +soft 1 +sigh 1 +belov 1 +poetri 1 +hopemyth 1 +favor 1 +beauteou 1 +pandora 1 +ever 1 +place 1 +fault 1 +human 1 +role 1 +releas 1 +demon 1 +hope 1 +mani 1 +astrai 1 +pretti 1 +glimmer 1 +fals 1 +tread 1 +tortur 1 +broken 1 +road 1 +amidst 1 +thorn 1 +dark 1 +filthi 1 +soul 1 +diseas 1 +pain 1 +horror 1 +suffer 1 +reach 1 +fear 1 +tear 1 +cannot 1 +blame 1 +deed 1 +told 1 +heart 1 +take 1 +hold 1 +world 1 +would 1 +never 1 +frozen 1 +miseri 1 +cold 1 +spring 1 +etern 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..5427864e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,78 @@ +dean 1 +krafft 1 +select 1 +urlsdean 1 +point 1 +interestcornel 1 +server 1 +cornel 1 +home 1 +page 1 +cuinfo 1 +engin 1 +librari 1 +legal 1 +inform 1 +institut 1 +directori 1 +project 1 +public 1 +dimund 1 +document 1 +imag 1 +understand 1 +charact 1 +recognit 1 +siam 1 +gopher 1 +captur 1 +access 1 +cours 1 +illinoi 1 +digit 1 +stanford 1 +michigan 1 +berkelei 1 +librarysearch 1 +tool 1 +lyco 1 +search 1 +veronica 1 +archi 1 +gatewai 1 +anonym 1 +site 1 +depart 1 +togeth 1 +excel 1 +collect 1 +network 1 +scout 1 +report 1 +intern 1 +planet 1 +earth 1 +whole 1 +internet 1 +catalog 1 +part 1 +global 1 +navig 1 +cern 1 +refer 1 +faqsvari 1 +stuff 1 +head 1 +mail 1 +list 1 +audio 1 +gear 1 +folkbook 1 +folk 1 +music 1 +ithaca 1 +weather 1 +forecast 1 +elsewher 1 +secur 1 +index 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..d58a9c24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,98 @@ +dean 1 +krafft 1 +home 1 +page 1 +cornel 1 +director 1 +comput 1 +facilitiesaddress 1 +upson 1 +halldepart 1 +sciencecornel 1 +universityithaca 1 +phone 1 +current 1 +serv 1 +research 1 +administr 1 +inth 1 +scienc 1 +depart 1 +guis 1 +anadministr 1 +manag 1 +facil 1 +support 1 +group 1 +andworri 1 +number 1 +issu 1 +includ 1 +secur 1 +network 1 +build 1 +servic 1 +side 1 +princip 1 +investig 1 +spart 1 +project 1 +arpa 1 +fund 1 +consortium 1 +five 1 +sciencedepart 1 +thecorpor 1 +nation 1 +initi 1 +cnri 1 +researchi 1 +intend 1 +rapid 1 +dissemin 1 +break 1 +technicalresearch 1 +internet 1 +well 1 +make 1 +avail 1 +line 1 +theexist 1 +librari 1 +technic 1 +report 1 +member 1 +part 1 +work 1 +davi 1 +xerox 1 +employe 1 +thedesign 1 +institut 1 +carl 1 +lagoz 1 +emploi 1 +develop 1 +implement 1 +protocol 1 +system 1 +disseminationov 1 +similar 1 +materi 1 +call 1 +dienst 1 +eight 1 +univers 1 +site 1 +atechn 1 +inform 1 +ondienst 1 +pleas 1 +send 1 +email 1 +togethera 1 +select 1 +url 1 +relat 1 +thing 1 +interestedin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..45bc6070 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,24 @@ +divakar 1 +home 1 +pagedivakar 1 +viswanathdivakar 1 +cornel 1 +address 1 +upson 1 +hall 1 +univers 1 +ithaca 1 +graduat 1 +student 1 +comput 1 +scienc 1 +area 1 +interest 1 +isnumer 1 +analysi 1 +advis 1 +page 1 +good 1 +place 1 +find 1 +numer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..f7213167 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,90 @@ +diyu 1 +home 1 +pagediyu 1 +daisi 1 +welcom 1 +spring 1 +cours 1 +practic 1 +distribut 1 +comput 1 +databas 1 +system 1 +compil 1 +translat 1 +practicum 1 +translatorsfal 1 +coursesc 1 +oper 1 +systemc 1 +softwar 1 +engineeringc 1 +advanc 1 +systemsel 1 +network 1 +telecommunicationsm 1 +projectorigin 1 +virtual 1 +realiti 1 +railroad 1 +projectsinc 1 +mayb 1 +like 1 +know 1 +littl 1 +doubt 1 +anywai 1 +current 1 +master 1 +engin 1 +student 1 +depart 1 +scienc 1 +cornel 1 +univers 1 +locat 1 +ithaca 1 +central 1 +york 1 +gorgeou 1 +place 1 +live 1 +except 1 +winter 1 +last 1 +year 1 +receiv 1 +appli 1 +physic 1 +jersei 1 +institut 1 +technolog 1 +newark 1 +brought 1 +beauti 1 +campu 1 +tsinghua 1 +unviers 1 +beij 1 +china 1 +also 1 +want 1 +friend 1 +miss 1 +us 1 +linksjava 1 +html 1 +tkfavorit 1 +sitestimecnnlondon 1 +timeswashington 1 +postchines 1 +digestchina 1 +new 1 +digestfeng 1 +yuanxin 1 +siart 1 +chinaloc 1 +connectionsctc 1 +sunlabweathermovi 1 +miller 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..7a5f7d2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,113 @@ +huttenloch 1 +home 1 +page 1 +daniel 1 +associ 1 +professordph 1 +cornel 1 +research 1 +main 1 +area 1 +visual 1 +match 1 +recognit 1 +work 1 +rang 1 +theoret 1 +algorithm 1 +us 1 +techniqu 1 +comput 1 +geometri 1 +applic 1 +system 1 +remot 1 +collabor 1 +view 1 +document 1 +imag 1 +wide 1 +network 1 +video 1 +monitor 1 +target 1 +also 1 +interest 1 +type 1 +electron 1 +commun 1 +educ 1 +compar 1 +geometr 1 +structur 1 +hausdorff 1 +base 1 +method 1 +implement 1 +avail 1 +fast 1 +index 1 +eigenspac 1 +approxim 1 +fraction 1 +matlab 1 +perform 1 +evalu 1 +model 1 +object 1 +track 1 +identif 1 +digipap 1 +highli 1 +compact 1 +univers 1 +viewabl 1 +format 1 +conot 1 +support 1 +share 1 +teach 1 +brian 1 +smith 1 +develop 1 +cours 1 +author 1 +offer 1 +first 1 +time 1 +spring 1 +introduct 1 +program 1 +vision 1 +profession 1 +activ 1 +xerox 1 +parc 1 +process 1 +start 1 +small 1 +group 1 +investig 1 +problem 1 +chair 1 +cvpr 1 +ieee 1 +confer 1 +pattern 1 +held 1 +juan 1 +june 1 +favorit 1 +geek 1 +snowboard 1 +mountain 1 +bike 1 +without 1 +extrem 1 +sport 1 +cool 1 +stupid 1 +attitud 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..aa15f2b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,94 @@ +ashvin 1 +dsouza 1 +cornel 1 +edui 1 +graduat 1 +student 1 +work 1 +bard 1 +bloom 1 +focu 1 +thesi 1 +research 1 +develop 1 +oftool 1 +support 1 +process 1 +algebra 1 +method 1 +specif 1 +andverif 1 +concurr 1 +system 1 +design 1 +tool 1 +withrespect 1 +metatheori 1 +becom 1 +immediatelyavail 1 +wide 1 +class 1 +allevi 1 +theproblem 1 +duplic 1 +effort 1 +inher 1 +custom 1 +exampl 1 +prototyp 1 +base 1 +calculu 1 +model 1 +checker 1 +simpl 1 +gso 1 +semant 1 +form 1 +part 1 +input 1 +make 1 +applic 1 +mani 1 +commonli 1 +us 1 +includ 1 +basic 1 +loto 1 +addit 1 +investig 1 +express 1 +power 1 +order 1 +better 1 +understand 1 +compar 1 +final 1 +exploringappl 1 +techniqu 1 +gener 1 +bdd 1 +algebraterm 1 +full 1 +postscipt 1 +lite 1 +postscript 1 +version 1 +also 1 +written 1 +result 1 +presentedth 1 +former 1 +comput 1 +aid 1 +verif 1 +lnc 1 +latter 1 +foundat 1 +softwar 1 +technolog 1 +theoret 1 +computersci 1 +june 1 +present 1 +verifi 1 +compass 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..e606f429 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,53 @@ +funda 1 +ever 1 +improv 1 +pagefunda 1 +ergn 1 +mail 1 +ergun 1 +cornel 1 +eduhi 1 +welcom 1 +home 1 +page 1 +name 1 +studentin 1 +comput 1 +scienc 1 +dept 1 +work 1 +programcheck 1 +prof 1 +ronitt 1 +rubinfeld 1 +researchpag 1 +also 1 +minor 1 +paint 1 +depart 1 +fine 1 +art 1 +origin 1 +come 1 +izmir 1 +turkei 1 +undergrad 1 +bilkentunivers 1 +ankara 1 +research 1 +relat 1 +stuff 1 +warn 1 +might 1 +encounterpag 1 +written 1 +turkish 1 +angri 1 +dog 1 +risk 1 +person 1 +visit 1 +sinc 1 +alwai 1 +heavi 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..5fda547b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,183 @@ +activ 1 +tardo 1 +tardosassoci 1 +professor 1 +depart 1 +comput 1 +scienc 1 +upson 1 +hallcornel 1 +universityithaca 1 +phone 1 +email 1 +cornel 1 +school 1 +oper 1 +research 1 +industri 1 +engineeringphon 1 +ori 1 +click 1 +daughter 1 +rebecca 1 +julia 1 +shmoi 1 +current 1 +researchrec 1 +public 1 +broadli 1 +speak 1 +interest 1 +theori 1 +algorithm 1 +includ 1 +mani 1 +aspect 1 +complex 1 +mostlywork 1 +combinatori 1 +optim 1 +problem 1 +particular 1 +networkproblem 1 +approxim 1 +linear 1 +integ 1 +programmingproblem 1 +recent 1 +paperssurvei 1 +paper 1 +thegener 1 +assign 1 +mathemat 1 +program 1 +preliminari 1 +version 1 +appear 1 +proceed 1 +annual 1 +siam 1 +symposium 1 +discret 1 +januari 1 +plotkin 1 +improv 1 +bound 1 +flow 1 +cutratio 1 +multicommod 1 +combinatorica 1 +klein 1 +stein 1 +fasterapproxim 1 +unit 1 +capac 1 +concurr 1 +problemwith 1 +applic 1 +rout 1 +find 1 +spars 1 +cut 1 +journal 1 +oncomput 1 +appearedin 1 +leighton 1 +makedon 1 +tragouda 1 +fast 1 +flowproblem 1 +system 1 +stoc 1 +special 1 +issu 1 +annualacm 1 +approximationalgorithm 1 +fraction 1 +pack 1 +cover 1 +inmathemat 1 +hasappear 1 +ieee 1 +thefound 1 +goeman 1 +goldberg 1 +williamson 1 +network 1 +designproblem 1 +discretealgorithm 1 +hopp 1 +polynomi 1 +time 1 +someevacu 1 +ondiscret 1 +quickest 1 +transship 1 +theproceed 1 +steiner 1 +direct 1 +multicut 1 +kleinberg 1 +disjoint 1 +pathsproblem 1 +high 1 +diamet 1 +planar 1 +proceedingsof 1 +path 1 +dens 1 +embed 1 +graph 1 +annualiee 1 +foundat 1 +rabani 1 +distribut 1 +packet 1 +switch 1 +arbitrari 1 +fleischer 1 +separ 1 +maxim 1 +violat 1 +comb 1 +inequ 1 +ipco 1 +june 1 +survei 1 +tarjan 1 +sept 1 +vlsi 1 +design 1 +kort 1 +lovaszand 1 +schrijver 1 +springer 1 +verlag 1 +strongli 1 +inoptim 1 +intern 1 +congress 1 +ofmathematician 1 +kyoto 1 +tokyo 1 +handbook 1 +combinator 1 +graham 1 +grotschel 1 +lovasz 1 +north 1 +holland 1 +computersci 1 +theorem 1 +annot 1 +bibliographi 1 +inproc 1 +summer 1 +maastricht 1 +netherland 1 +proc 1 +networkoptim 1 +practic 1 +netflow 1 +miniato 1 +itali 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..d0107456 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,14 @@ +franci 1 +home 1 +page 1 +graduat 1 +student 1 +univers 1 +california 1 +berkeleymathemat 1 +departmentcomput 1 +scienc 1 +departmentcornel 1 +universitycomput 1 +departmenthumorfcc 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..57859a0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,2 @@ +felix 1 +world 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..e2e95655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,57 @@ +frederick 1 +smith 1 +homepagefrederick 1 +grad 1 +student 1 +cornel 1 +upson 1 +halldepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +paper 1 +erni 1 +cohen 1 +dexter 1 +kozen 1 +complex 1 +kleen 1 +algebra 1 +test 1 +technic 1 +report 1 +univers 1 +juli 1 +complet 1 +decid 1 +april 1 +person 1 +us 1 +link 1 +homepag 1 +greg 1 +morrisett 1 +program 1 +languag 1 +research 1 +page 1 +class 1 +take 1 +system 1 +multimedia 1 +semant 1 +math 1 +introduct 1 +analysi 1 +epicuri 1 +food 1 +zine 1 +cartalk 1 +home 1 +click 1 +clack 1 +catch 1 +sundai 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..a2f40bd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,25 @@ +frank 1 +page 1 +planet 1 +either 1 +matter 1 +adelstein 1 +post 1 +doctor 1 +associ 1 +cornel 1 +xerox 1 +design 1 +research 1 +institut 1 +offic 1 +phone 1 +electron 1 +mail 1 +actual 1 +inform 1 +checkout 1 +improv 1 +happi 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..8ad69c0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,65 @@ +yuan 1 +fred 1 +softwar 1 +engin 1 +current 1 +version 1 +feet 1 +shown 1 +resum 1 +project 1 +other 1 +appear 1 +soon 1 +distribut 1 +http 1 +server 1 +scramo 1 +midi 1 +choreograph 1 +anim 1 +model 1 +postscript 1 +vpla 1 +visual 1 +program 1 +languag 1 +animationlink 1 +affili 1 +massachusett 1 +comput 1 +hewlett 1 +packardlink 1 +previou 1 +cornel 1 +scienc 1 +theori 1 +center 1 +group 1 +univers 1 +binghamton 1 +lawrenc 1 +berkelei 1 +laboratoryinterest 1 +hobbi 1 +section 1 +develop 1 +mayb 1 +next 1 +year 1 +photographi 1 +cello 1 +guitar 1 +aquarium 1 +sciencecornel 1 +home 1 +burl 1 +work 1 +chelmsford 1 +email 1 +fredhsu 1 +apollo 1 +snail 1 +drive 1 +peopl 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..6ddd000e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,149 @@ +deepak 1 +balakrishna 1 +balakrishnamast 1 +engineeringdepart 1 +comput 1 +sciencecornel 1 +univers 1 +resumeeducationcoursesperson 1 +cornel 1 +resum 1 +html 1 +postscript 1 +back 1 +main 1 +page 1 +educ 1 +undergradu 1 +complet 1 +june 1 +karnataka 1 +region 1 +engin 1 +colleg 1 +surathk 1 +india 1 +major 1 +scienc 1 +interest 1 +multimedia 1 +cours 1 +relat 1 +oper 1 +system 1 +artifici 1 +intellig 1 +compil 1 +construct 1 +data 1 +commun 1 +graphic 1 +graduat 1 +present 1 +pursu 1 +master 1 +degre 1 +specialis 1 +want 1 +involv 1 +project 1 +deal 1 +server 1 +program 1 +follow 1 +list 1 +taken 1 +fall 1 +semest 1 +prof 1 +brian 1 +smith 1 +advanc 1 +databas 1 +praveen 1 +seshadri 1 +network 1 +srinivasan 1 +keshav 1 +softwar 1 +michael 1 +godfrei 1 +person 1 +well 1 +start 1 +goe 1 +upon 1 +time 1 +long 1 +actual 1 +novemb 1 +land 1 +call 1 +bharat 1 +outsid 1 +world 1 +precis 1 +born 1 +cute 1 +chubbi 1 +littl 1 +babi 1 +weigh 1 +approxim 1 +four 1 +pound 1 +took 1 +name 1 +mean 1 +light 1 +went 1 +still 1 +process 1 +chang 1 +incident 1 +probabl 1 +divin 1 +interfer 1 +aishwarya 1 +miss 1 +lucki 1 +leav 1 +miniscul 1 +detail 1 +earlier 1 +life 1 +dive 1 +straight 1 +high 1 +school 1 +nation 1 +public 1 +bangalor 1 +greater 1 +part 1 +place 1 +someon 1 +noth 1 +better 1 +krec 1 +that 1 +wonder 1 +anoth 1 +year 1 +holidai 1 +conquer 1 +class 1 +never 1 +match 1 +hope 1 +get 1 +somewher 1 +final 1 +here 1 +link 1 +friend 1 +ashish 1 +aastha 1 +indira 1 +ankit 1 +vineet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..84faf4c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,58 @@ +asif 1 +uddin 1 +ghiasasif 1 +ghia 1 +welcom 1 +mywww 1 +home 1 +page 1 +warn 1 +constructioni 1 +student 1 +comput 1 +scienc 1 +area 1 +interest 1 +distribut 1 +system 1 +multimedia 1 +bachelor 1 +degre 1 +engin 1 +univers 1 +technolog 1 +karachi 1 +pakistan 1 +sinc 1 +work 1 +global 1 +inform 1 +solut 1 +present 1 +studi 1 +leav 1 +master 1 +program 1 +cornel 1 +respons 1 +includ 1 +applic 1 +unix 1 +administr 1 +support 1 +educ 1 +network 1 +manag 1 +installationso 1 +number 1 +project 1 +plan 1 +onlin 1 +good 1 +hopefulli 1 +year 1 +publicationsth 1 +follow 1 +music 1 +cricket 1 +astronomyasif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..0651646b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,9 @@ +home 1 +page 1 +move 1 +http 1 +berkelei 1 +dglaser 1 +htmlpleas 1 +visit 1 +million 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..ec63374f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,333 @@ +david 1 +gri 1 +home 1 +pagedavid 1 +grieswilliam 1 +lewi 1 +professor 1 +engineeringdr 1 +munich 1 +institut 1 +technolog 1 +interest 1 +program 1 +methodolog 1 +particular 1 +formaldevelop 1 +relat 1 +area 1 +programminglanguag 1 +languag 1 +semant 1 +logic 1 +asinterest 1 +teach 1 +topic 1 +researchin 1 +fact 1 +understand 1 +formal 1 +taughta 1 +us 1 +tool 1 +freshman 1 +sophomor 1 +colleg 1 +level 1 +anoverrid 1 +concern 1 +mine 1 +click 1 +follow 1 +item 1 +inform 1 +curriculum 1 +vita 1 +short 1 +biographi 1 +text 1 +written 1 +polya 1 +announc 1 +dimac 1 +symposium 1 +link 1 +paper 1 +cornel 1 +depart 1 +pagecomput 1 +scienc 1 +upson 1 +hallcornel 1 +universityithaca 1 +edushort 1 +griesi 1 +born 1 +flush 1 +york 1 +spent 1 +year 1 +iescap 1 +receiv 1 +queen 1 +went 1 +workfor 1 +naval 1 +weapon 1 +laboratori 1 +civilian 1 +amathematician 1 +programm 1 +wife 1 +elain 1 +fewmonth 1 +later 1 +marri 1 +novemb 1 +illinoi 1 +educ 1 +master 1 +degreein 1 +math 1 +assistantship 1 +help 1 +twogerman 1 +manfr 1 +paul 1 +ruedig 1 +wiehl 1 +write 1 +full 1 +algol 1 +compilerfor 1 +comput 1 +figur 1 +implementrecurs 1 +effici 1 +mani 1 +end 1 +go 1 +almost 1 +three 1 +doctor 1 +bauer 1 +joseph 1 +stoer 1 +germani 1 +june 1 +wasin 1 +numer 1 +analysi 1 +sinc 1 +these 1 +notyet 1 +kosher 1 +assist 1 +stanford 1 +twin 1 +susan 1 +made 1 +excit 1 +usual 1 +thebirthdai 1 +april 1 +intown 1 +make 1 +four 1 +birthdai 1 +cake 1 +left 1 +weather 1 +move 1 +whichha 1 +snow 1 +ever 1 +wasdepart 1 +chair 1 +becam 1 +william 1 +lewisprofessor 1 +engin 1 +guggenheim 1 +fellowship 1 +return 1 +tabl 1 +contentsi 1 +better 1 +known 1 +mytext 1 +writingand 1 +contribut 1 +thewond 1 +research 1 +good 1 +bloom 1 +wherey 1 +plant 1 +number 1 +award 1 +contributionsto 1 +ieee 1 +taylor 1 +booth 1 +sigcseaward 1 +outstand 1 +clarkaward 1 +art 1 +theamerican 1 +feder 1 +process 1 +societi 1 +afip 1 +proud 1 +advise 1 +stand 1 +susanowicki 1 +thesi 1 +laid 1 +foundat 1 +proof 1 +correct 1 +ofparallel 1 +notion 1 +interfer 1 +freeness 1 +author 1 +bestpap 1 +langaug 1 +system 1 +andt 1 +raman 1 +sthesi 1 +best 1 +dissert 1 +designedand 1 +implement 1 +speak 1 +latex 1 +document 1 +includ 1 +technic 1 +articl 1 +book 1 +printedor 1 +spoken 1 +abl 1 +speakmathemat 1 +effect 1 +manner 1 +import 1 +goal 1 +work 1 +read 1 +blind 1 +alreadi 1 +produc 1 +audiocassett 1 +serv 1 +associ 1 +thecomput 1 +board 1 +late 1 +open 1 +officein 1 +washington 1 +began 1 +serious 1 +repres 1 +researchinterest 1 +also 1 +conduct 1 +taulbe 1 +survei 1 +period 1 +obtain 1 +essenti 1 +complet 1 +responsesfrom 1 +grant 1 +noother 1 +compar 1 +respons 1 +rate 1 +itrequir 1 +telephon 1 +call 1 +sendin 1 +questionnair 1 +researchassoci 1 +servic 1 +forchair 1 +toward 1 +respect 1 +andrespons 1 +current 1 +editor 1 +acta 1 +informatica 1 +aspect 1 +softwar 1 +concept 1 +andtool 1 +edit 1 +keep 1 +busi 1 +enjoi 1 +takean 1 +individu 1 +know 1 +willsuggest 1 +substanti 1 +rewrit 1 +believ 1 +servewher 1 +fredb 1 +schneider 1 +springer 1 +verlag 1 +andmonograph 1 +spare 1 +time 1 +sport 1 +like 1 +golf 1 +softbal 1 +volleybal 1 +swim 1 +tenni 1 +china 1 +isplit 1 +pant 1 +plai 1 +ping 1 +pong 1 +hour 1 +give 1 +alectur 1 +mention 1 +audienc 1 +laugh 1 +turnedaround 1 +explain 1 +interpret 1 +spoke 1 +everyonelaugh 1 +howev 1 +whether 1 +told 1 +truth 1 +justsaid 1 +joke 1 +sing 1 +barbershop 1 +andgilbert 1 +sullivan 1 +around 1 +hous 1 +carpentri 1 +wire 1 +remodel 1 +taken 1 +yield 1 +considerablesatisfact 1 +content 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..b6c61185 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,23 @@ +alex 1 +grinzayd 1 +homepagealex 1 +grinzaydm 1 +student 1 +comput 1 +sciencecornel 1 +universitytel 1 +email 1 +cornel 1 +first 1 +week 1 +link 1 +necx 1 +directinternet 1 +shop 1 +networkcomput 1 +express 1 +damarkwarn 1 +page 1 +bore 1 +learn 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..e44f140a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,46 @@ +grzegorz 1 +czajkowski 1 +homepag 1 +czajkowskidepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +offic 1 +grze 1 +cornel 1 +second 1 +year 1 +student 1 +program 1 +depart 1 +scienceat 1 +univers 1 +ithaca 1 +york 1 +complet 1 +master 1 +degre 1 +scienc 1 +krakow 1 +poland 1 +current 1 +involv 1 +sever 1 +project 1 +also 1 +charg 1 +administ 1 +cuc 1 +advisor 1 +thorsten 1 +eicken 1 +link 1 +relat 1 +research 1 +architectur 1 +activ 1 +messag 1 +split 1 +last 1 +modifi 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..a87c891e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,77 @@ +halpern 1 +home 1 +pagejoseph 1 +professorcornel 1 +universitycomput 1 +scienc 1 +depart 1 +upson 1 +hallithaca 1 +cornel 1 +research 1 +focus 1 +reason 1 +knowledg 1 +uncertainti 1 +applic 1 +distribut 1 +comput 1 +game 1 +theori 1 +although 1 +also 1 +done 1 +work 1 +continu 1 +interest 1 +topic 1 +fault 1 +toler 1 +program 1 +languag 1 +semant 1 +li 1 +boundari 1 +number 1 +field 1 +recent 1 +gave 1 +talk 1 +econom 1 +princeton 1 +describ 1 +someon 1 +mathemat 1 +call 1 +scientist 1 +give 1 +economist 1 +abouta 1 +subject 1 +mainli 1 +studi 1 +philosoph 1 +probabl 1 +best 1 +sentenc 1 +descript 1 +like 1 +detail 1 +check 1 +list 1 +public 1 +pointer 1 +abstract 1 +paper 1 +mani 1 +case 1 +avail 1 +activ 1 +resum 1 +fall 1 +teach 1 +cours 1 +sequel 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..3823efc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,10 @@ +tsuneshi 1 +hashimototsuneshi 1 +hashimotothi 1 +home 1 +page 1 +hashimoto 1 +construct 1 +cstsuneshi 1 +hashi 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..c55b7e8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,24 @@ +mark 1 +hayden 1 +cornel 1 +offic 1 +upson 1 +univers 1 +ithaca 1 +fall 1 +teach 1 +tast 1 +unix 1 +interest 1 +horu 1 +distribut 1 +commun 1 +system 1 +ensembl 1 +nuprl 1 +proof 1 +develop 1 +hockei 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..76ee2fc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,6 @@ +heji 1 +improv 1 +home 1 +page 1 +cyber 1 +pond 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..dd2bcb4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,27 @@ +deyu 1 +home 1 +page 1 +graduat 1 +student 1 +cornel 1 +universitydept 1 +comput 1 +scienc 1 +upson 1 +hallithaca 1 +third 1 +year 1 +born 1 +shanghai 1 +china 1 +receiv 1 +undergradu 1 +degre 1 +berkelei 1 +faculti 1 +advisor 1 +thorsten 1 +eicken 1 +come 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..f72d1535 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,59 @@ +jing 1 +huang 1 +home 1 +page 1 +upson 1 +hall 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +student 1 +thedepart 1 +scienceat 1 +receiv 1 +bachelorand 1 +master 1 +degre 1 +appli 1 +mathemat 1 +tsinghua 1 +beij 1 +chinami 1 +academ 1 +interest 1 +vision 1 +multimedia 1 +system 1 +work 1 +professor 1 +ramin 1 +zabih 1 +imag 1 +retriev 1 +video 1 +process 1 +motion 1 +track 1 +us 1 +link 1 +annot 1 +bibliographi 1 +pattern 1 +recognit 1 +relat 1 +machin 1 +learn 1 +optim 1 +check 1 +chines 1 +christian 1 +fellowship 1 +evangel 1 +resourc 1 +center 1 +mission 1 +back 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..4c98e70f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,35 @@ +chin 1 +chen 1 +home 1 +page 1 +welcom 1 +current 1 +address 1 +mapl 1 +ithaca 1 +mail 1 +icchen 1 +cornel 1 +perman 1 +sung 1 +taipei 1 +taiwan 1 +class 1 +spring 1 +comput 1 +graphic 1 +practic 1 +distribut 1 +system 1 +practicum 1 +databas 1 +manag 1 +album 1 +resum 1 +new 1 +china 1 +time 1 +nctu 1 +construct 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..84ba6d83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,27 @@ +home 1 +page 1 +indira 1 +malik 1 +depart 1 +comput 1 +scienc 1 +master 1 +engin 1 +imalik 1 +cornel 1 +resum 1 +post 1 +script 1 +cours 1 +program 1 +system 1 +softwar 1 +advanc 1 +databas 1 +network 1 +multimedia 1 +visit 1 +high 1 +school 1 +tap 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..ce0a0f38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,80 @@ +indira 1 +vidyaprakash 1 +vidyaprakashmast 1 +engineeringclass 1 +dept 1 +comput 1 +sciencecornel 1 +universitywelcom 1 +homepag 1 +current 1 +student 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +ithaca 1 +degre 1 +engin 1 +colleg 1 +technolog 1 +coimbator 1 +india 1 +inform 1 +cours 1 +taken 1 +cornelluniversityfal 1 +oper 1 +system 1 +practicum 1 +project 1 +specif 1 +hoca 1 +softwar 1 +multimedia 1 +audio 1 +process 1 +toolkit 1 +manag 1 +polici 1 +spring 1 +graphic 1 +cspracticum 1 +anim 1 +magic 1 +carpet 1 +colloqium 1 +manageri 1 +financesumm 1 +independ 1 +research 1 +tracingin 1 +camera 1 +perspectivetransform 1 +java 1 +click 1 +postscript 1 +version 1 +myresumeclick 1 +applet 1 +perspect 1 +transformssom 1 +interest 1 +site 1 +cool 1 +sgamelan 1 +directori 1 +calvinand 1 +hobb 1 +galleri 1 +gif 1 +indian 1 +recip 1 +chicker 1 +wood 1 +drive 1 +nashvil 1 +tennesse 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..358fdc45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,90 @@ +homeless 1 +pageioi 1 +home 1 +lamioi 1 +cornel 1 +current 1 +research 1 +assist 1 +prof 1 +brian 1 +smith 1 +comput 1 +scienc 1 +depart 1 +univers 1 +interest 1 +multi 1 +media 1 +parallel 1 +program 1 +us 1 +softwar 1 +system 1 +instruct 1 +set 1 +environ 1 +knowledg 1 +base 1 +engin 1 +manual 1 +guidelin 1 +write 1 +good 1 +extens 1 +code 1 +doesn 1 +cover 1 +much 1 +script 1 +section 1 +test 1 +suit 1 +valuabl 1 +programm 1 +postscript 1 +version 1 +complet 1 +packag 1 +includ 1 +templat 1 +sourc 1 +file 1 +document 1 +short 1 +introduct 1 +descript 1 +user 1 +remot 1 +machin 1 +index 1 +inform 1 +tutori 1 +right 1 +week 1 +put 1 +togeth 1 +knowledgebas 1 +repositori 1 +try 1 +spam 1 +site 1 +multim 1 +directori 1 +get 1 +start 1 +virtual 1 +realiti 1 +conferenc 1 +detail 1 +come 1 +work 1 +prototyp 1 +mpeg 1 +video 1 +server 1 +http 1 +protocol 1 +spring 1 +homework 1 +solut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..585ab6a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,135 @@ +paul 1 +jackson 1 +home 1 +pagepaul 1 +post 1 +doctor 1 +associatecornel 1 +univers 1 +mail 1 +cornel 1 +eduwww 1 +http 1 +info 1 +peopl 1 +htmladdress 1 +depart 1 +comput 1 +scienc 1 +upson 1 +hall 1 +ithaca 1 +usaphon 1 +research 1 +intereststheorem 1 +prove 1 +environ 1 +formal 1 +method 1 +softwar 1 +andhardwar 1 +develop 1 +algebra 1 +synthesi 1 +scientif 1 +program 1 +linkag 1 +tool 1 +engin 1 +design 1 +thesi 1 +informationmi 1 +entitl 1 +enhanc 1 +nuprl 1 +proof 1 +developmentsystem 1 +appli 1 +abstract 1 +avail 1 +full 1 +text 1 +postscript 1 +format 1 +paper 1 +explor 1 +construct 1 +type 1 +theori 1 +bundi 1 +editor 1 +intern 1 +confer 1 +automateddeduct 1 +lectur 1 +note 1 +artif 1 +intellig 1 +springer 1 +verlag 1 +june 1 +circuit 1 +bout 1 +stavrid 1 +melham 1 +proceed 1 +inter 1 +theorem 1 +prover 1 +ifip 1 +transactionsa 1 +north 1 +holland 1 +toolkit 1 +float 1 +point 1 +hardwar 1 +thenuprl 1 +system 1 +theadvanc 1 +workshop 1 +correct 1 +methodolog 1 +elsevi 1 +nuprlth 1 +project 1 +world 1 +wide 1 +page 1 +access 1 +document 1 +commun 1 +live 1 +session 1 +basic 1 +load 1 +collect 1 +still 1 +need 1 +workon 1 +make 1 +someon 1 +els 1 +getround 1 +pai 1 +attent 1 +sometim 1 +next 1 +month 1 +hypertext 1 +list 1 +thetheori 1 +foreach 1 +includ 1 +introduct 1 +summari 1 +definit 1 +andtheorem 1 +thepolynomi 1 +relat 1 +moment 1 +shouldb 1 +coupl 1 +dai 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..c33fb575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,3 @@ +hani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..a52e3298 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,21 @@ +tibor 1 +jnositibor 1 +jnosiwelcom 1 +mywww 1 +home 1 +page 1 +perman 1 +constructionoffic 1 +upson 1 +hall 1 +cornel 1 +univers 1 +ithaca 1 +usaoffic 1 +phone 1 +interest 1 +site 1 +project 1 +zenotibor 1 +jnosi 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..a929bf77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,13 @@ +janwun 1 +cornel 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +depart 1 +address 1 +mapl 1 +avenu 1 +ithaca 1 +telephon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..7bb24d7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,183 @@ +faculti 1 +research 1 +interest 1 +greg 1 +morrisett 1 +cornel 1 +assist 1 +professor 1 +comput 1 +scienc 1 +univers 1 +ithaca 1 +offic 1 +upson 1 +hall 1 +phone 1 +tabl 1 +content 1 +intereststeachingselect 1 +papersrel 1 +linksperson 1 +informationresearch 1 +interestsmi 1 +primari 1 +develop 1 +ofadvanc 1 +program 1 +languag 1 +particularli 1 +interestedin 1 +high 1 +level 1 +standard 1 +forbuild 1 +system 1 +softwar 1 +includ 1 +time 1 +operatingsystem 1 +distribut 1 +late 1 +focus 1 +onth 1 +implement 1 +issu 1 +kept 1 +safelanguag 1 +us 1 +construct 1 +concentr 1 +produc 1 +code 1 +faster 1 +consum 1 +less 1 +memori 1 +support 1 +hack 1 +bit 1 +also 1 +bring 1 +power 1 +semant 1 +base 1 +toolsfrom 1 +theori 1 +type 1 +direct 1 +compil 1 +partial 1 +evalu 1 +abstract 1 +interpret 1 +gener 1 +design 1 +specif 1 +real 1 +systemssoftwar 1 +teachingc 1 +fall 1 +advanc 1 +spring 1 +select 1 +paperssemant 1 +manag 1 +polymorph 1 +robert 1 +harper 1 +technic 1 +report 1 +appear 1 +septemb 1 +gzip 1 +postscript 1 +thesi 1 +publish 1 +decemb 1 +optim 1 +tarditi 1 +cheng 1 +stone 1 +sigplan 1 +confer 1 +perform 1 +safetythrough 1 +workshop 1 +closur 1 +convers 1 +yasuhiko 1 +minamid 1 +symposium 1 +principl 1 +extend 1 +version 1 +juli 1 +model 1 +matthia 1 +felleisen 1 +conf 1 +function 1 +andcomput 1 +architectur 1 +reportcmu 1 +notecmu 1 +intensionaltyp 1 +analysi 1 +proc 1 +annual 1 +francisco 1 +januari 1 +optimist 1 +parallelizationgreg 1 +mauric 1 +herlihi 1 +octob 1 +refin 1 +first 1 +class 1 +store 1 +gregori 1 +proceed 1 +state 1 +copenhagen 1 +denmark 1 +june 1 +lock 1 +portabl 1 +multiprocess 1 +platform 1 +jersei 1 +andrew 1 +tolmach 1 +fourth 1 +practic 1 +parallel 1 +diego 1 +interfac 1 +princeton 1 +ad 1 +thread 1 +eric 1 +cooper 1 +relat 1 +link 1 +mark 1 +leon 1 +resourc 1 +member 1 +project 1 +carnegi 1 +mellon 1 +line 1 +inform 1 +home 1 +page 1 +orient 1 +bibliographi 1 +depart 1 +scienceperson 1 +informationhom 1 +address 1 +warren 1 +road 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..21647519 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,15 @@ +jiun 1 +resum 1 +java 1 +current 1 +address 1 +mapl 1 +avenu 1 +ithaca 1 +email 1 +jhlin 1 +cornel 1 +perman 1 +shing 1 +taipei 1 +taiwan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..befaf646 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,5 @@ +jerri 1 +cornel 1 +edujerri 1 +project 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..94fc2f61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,115 @@ +wang 1 +home 1 +page 1 +twin 1 +sister 1 +wangphd 1 +student 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +upson 1 +hallithaca 1 +offic 1 +phone 1 +email 1 +jiawang 1 +first 1 +year 1 +graduat 1 +state 1 +york 1 +binghamton 1 +degre 1 +transfer 1 +suni 1 +mathemat 1 +nankai 1 +tianjin 1 +china 1 +honor 1 +award 1 +barri 1 +goldwat 1 +scholar 1 +engin 1 +nation 1 +sciencefound 1 +research 1 +fellowship 1 +famili 1 +cool 1 +link 1 +hongkong 1 +taiwan 1 +beij 1 +review 1 +chinaand 1 +chines 1 +relat 1 +site 1 +daili 1 +cbnet 1 +internet 1 +forum 1 +chinanet 1 +stamp 1 +time 1 +window 1 +chinesecalendar 1 +directori 1 +magazin 1 +mediainform 1 +music 1 +new 1 +digest 1 +homepag 1 +servic 1 +educ 1 +histori 1 +cultur 1 +hongkonglaserdisccent 1 +internetdistribut 1 +multilingu 1 +softwar 1 +ryan 1 +smovieplex 1 +sceneri 1 +pictur 1 +tour 1 +entertain 1 +sheng 1 +tian 1 +diwww 1 +futur 1 +interest 1 +america 1 +best 1 +school 1 +rank 1 +liber 1 +art 1 +film 1 +ieee 1 +societi 1 +monei 1 +foundat 1 +peterson 1 +guid 1 +postcard 1 +program 1 +incomput 1 +thesenior 1 +virtual 1 +tourist 1 +worldmap 1 +yahoo 1 +christian 1 +mandarin 1 +cssa 1 +weather 1 +stoni 1 +brook 1 +ucla 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..cc16d080 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,45 @@ +justin 1 +miller 1 +home 1 +page 1 +navi 1 +uniform 1 +current 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +colleg 1 +ofengin 1 +cornel 1 +univers 1 +semest 1 +teach 1 +assist 1 +com 1 +machin 1 +vision 1 +mani 1 +long 1 +night 1 +found 1 +robot 1 +csrvl 1 +research 1 +assistantwork 1 +prof 1 +ramin 1 +zabih 1 +primari 1 +interest 1 +ismachin 1 +particularli 1 +level 1 +imag 1 +process 1 +gener 1 +informationsom 1 +rant 1 +project 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..02eab0ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,137 @@ +jeff 1 +moorejeff 1 +moorewel 1 +current 1 +graduat 1 +student 1 +cornel 1 +work 1 +mastersof 1 +engin 1 +comput 1 +scienc 1 +receiv 1 +purdu 1 +univers 1 +west 1 +lafayett 1 +indiana 1 +went 1 +high 1 +school 1 +hillsboro 1 +oregon 1 +suburb 1 +portland 1 +employmentmi 1 +resum 1 +anyon 1 +interest 1 +intel 1 +internet 1 +product 1 +divis 1 +creat 1 +cool 1 +softwar 1 +spring 1 +classesnba 1 +thrive 1 +inform 1 +revolut 1 +entertain 1 +sectorc 1 +practic 1 +distribut 1 +systemsc 1 +practicum 1 +system 1 +cornellopoli 1 +game 1 +perform 1 +architectur 1 +network 1 +optim 1 +parallel 1 +mpeg 1 +encod 1 +researchfal 1 +classesc 1 +technolog 1 +techniquec 1 +formal 1 +methodsc 1 +multimedia 1 +research 1 +paperc 1 +colloquiumc 1 +tool 1 +seminar 1 +present 1 +opendoc 1 +mfcoptim 1 +researchsoftwar 1 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 1 +companiesintelsilicon 1 +graphicsibmsunapplemagazinespc 1 +magazinepc 1 +weekpc 1 +computingcomput 1 +shopperwindow 1 +sourcescomput 1 +lifemacusermacweekinteract 1 +weekfamili 1 +pccomput 1 +worldelectron 1 +newspapersusa 1 +todaywal 1 +street 1 +journalnew 1 +york 1 +timesphiladelphia 1 +onlineth 1 +daili 1 +new 1 +worldwideth 1 +dalla 1 +morn 1 +opinionsth 1 +detroit 1 +free 1 +press 1 +gopherth 1 +knoxvil 1 +sentinelth 1 +leader 1 +onlinelat 1 +serviceth 1 +nugget 1 +newspap 1 +sister 1 +oregonrworld 1 +orang 1 +counti 1 +registerth 1 +francisco 1 +chronicl 1 +examinersan 1 +jose 1 +mercuryth 1 +seattl 1 +timesnando 1 +netusa 1 +todayboston 1 +globeportland 1 +herald 1 +main 1 +sundai 1 +telegramvisitor 1 +sinc 1 +januari 1 +campu 1 +address 1 +mapl 1 +fdithaca 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..d97ee9b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,81 @@ +jose 1 +lui 1 +fernandez 1 +home 1 +pagejos 1 +fernandezjos 1 +fernandezmast 1 +engin 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +address 1 +mapl 1 +avenu 1 +ebithaca 1 +mail 1 +joselui 1 +cornel 1 +us 1 +java 1 +enabl 1 +browser 1 +would 1 +anim 1 +scroll 1 +text 1 +sign 1 +resum 1 +curriculum 1 +vita 1 +interest 1 +current 1 +distribut 1 +system 1 +multimedia 1 +graphic 1 +project 1 +imag 1 +video 1 +transit 1 +rivl 1 +exampl 1 +page 1 +presentationc 1 +pyramania 1 +game 1 +spaceship 1 +battl 1 +report 1 +hoca 1 +design 1 +code 1 +oper 1 +implement 1 +multitask 1 +virtual 1 +memori 1 +meng 1 +autonom 1 +vehicl 1 +simul 1 +hobbi 1 +photographi 1 +click 1 +view 1 +picturesmusiccomputerswrit 1 +direct 1 +crazi 1 +movi 1 +actor 1 +recruit 1 +friend 1 +time 1 +clock 1 +courtesi 1 +bill 1 +giel 1 +visitor 1 +number 1 +better 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..4a98120f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,30 @@ +julin 1 +hurtado 1 +home 1 +pagejulin 1 +click 1 +curriculum 1 +vita 1 +cornel 1 +universitymast 1 +busi 1 +administr 1 +johnson 1 +graduat 1 +school 1 +managementmast 1 +engin 1 +depart 1 +comput 1 +science 1 +mail 1 +colombia 1 +linda 1 +er 1 +master 1 +project 1 +distribut 1 +system 1 +autonom 1 +vehicl 1 +simul 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..7ffa1d0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,28 @@ +janeen 1 +homepagejaneen 1 +reich 1 +welcom 1 +home 1 +page 1 +current 1 +cornel 1 +univers 1 +complet 1 +comput 1 +scienc 1 +august 1 +septemb 1 +join 1 +system 1 +group 1 +texa 1 +instrument 1 +dalla 1 +send 1 +email 1 +jreich 1 +edumi 1 +resum 1 +ad 1 +favorit 1 +thing 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..62c71256 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,120 @@ +jodi 1 +shapirojodi 1 +shapiroeduc 1 +receiv 1 +comput 1 +system 1 +engin 1 +univers 1 +massachusett 1 +amherst 1 +current 1 +cornel 1 +get 1 +meng 1 +scienc 1 +graduat 1 +resum 1 +fall 1 +cours 1 +multimedia 1 +systemsc 1 +softwar 1 +engineeringe 1 +network 1 +telecommunicationc 1 +master 1 +researchspr 1 +machin 1 +visionc 1 +high 1 +perform 1 +systemse 1 +capac 1 +networksnba 1 +thrive 1 +inform 1 +revolutionc 1 +researchma 1 +automot 1 +engineeringinterest 1 +project 1 +design 1 +implement 1 +dynam 1 +gener 1 +synchron 1 +speech 1 +facial 1 +animationlow 1 +cost 1 +portabl 1 +desktop 1 +videoconferenc 1 +window 1 +parallel 1 +object 1 +recognit 1 +applic 1 +recognitioninterest 1 +main 1 +interest 1 +obvious 1 +car 1 +memberof 1 +bodi 1 +mail 1 +list 1 +camaro 1 +firebird 1 +yourselfelectron 1 +fuel 1 +inject 1 +although 1 +alwayshav 1 +time 1 +particip 1 +also 1 +designingan 1 +ground 1 +page 1 +home 1 +pageefi 1 +pagethes 1 +pictur 1 +chevi 1 +sold 1 +stock 1 +speed 1 +gearsmodif 1 +hypertech 1 +stage 1 +chip 1 +flowmast 1 +exhaust 1 +hurst 1 +shifter 1 +grant 1 +steer 1 +wheel 1 +filter 1 +ford 1 +mustang 1 +bought 1 +septemb 1 +still 1 +speedmodif 1 +gear 1 +accel 1 +plug 1 +motorsport 1 +wiresbest 1 +mile 1 +mphbest 1 +come 1 +januari 1 +pagenumb 1 +visit 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..b954d3a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,8 @@ +julia 1 +home 1 +pagejulia 1 +komissarchik 1 +juliak 1 +cornel 1 +eduto 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..3636d696 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,104 @@ +julian 1 +pelenur 1 +emilio 1 +cornel 1 +better 1 +pictur 1 +avail 1 +master 1 +engin 1 +univers 1 +comput 1 +scienc 1 +graduat 1 +campu 1 +adress 1 +summit 1 +ithaca 1 +offic 1 +theori 1 +center 1 +centerithaca 1 +upson 1 +hallcornel 1 +current 1 +occup 1 +fulltim 1 +student 1 +teach 1 +assist 1 +databas 1 +administr 1 +recent 1 +project 1 +global 1 +pointer 1 +complet 1 +toolkit 1 +write 1 +parallel 1 +program 1 +network 1 +workstat 1 +independ 1 +platform 1 +topolog 1 +compil 1 +develop 1 +sparcstat 1 +ethernet 1 +wfinger 1 +system 1 +search 1 +home 1 +page 1 +document 1 +world 1 +wide 1 +cyberserv 1 +grow 1 +need 1 +faster 1 +httpserver 1 +fulfil 1 +increas 1 +demand 1 +servic 1 +addit 1 +commerci 1 +fault 1 +toler 1 +high 1 +becom 1 +critic 1 +paper 1 +describ 1 +design 1 +implement 1 +distribut 1 +http 1 +server 1 +us 1 +horu 1 +prvf 1 +poss 1 +realli 1 +fast 1 +video 1 +thegoal 1 +techniqu 1 +achiev 1 +full 1 +screenmot 1 +cluster 1 +showthat 1 +innov 1 +snarf 1 +blast 1 +capit 1 +hardwar 1 +produc 1 +transferwith 1 +compress 1 +color 1 +screen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..423d7743 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,92 @@ +jason 1 +hickei 1 +home 1 +page 1 +graduat 1 +student 1 +cornel 1 +comput 1 +scienc 1 +depart 1 +supervis 1 +robertconst 1 +summari 1 +current 1 +statu 1 +interest 1 +includ 1 +theori 1 +practic 1 +program 1 +languag 1 +great 1 +resourc 1 +thefox 1 +project 1 +especi 1 +markleon 1 +research 1 +work 1 +mainli 1 +softwar 1 +verif 1 +tool 1 +specif 1 +type 1 +formalsystem 1 +nuprl 1 +develop 1 +universitydepart 1 +make 1 +higher 1 +levelmodul 1 +abstract 1 +data 1 +relat 1 +recent 1 +paper 1 +publish 1 +cornella 1 +bibliographi 1 +publishedat 1 +bellcor 1 +also 1 +slide 1 +talk 1 +havegiven 1 +seminar 1 +pretti 1 +technic 1 +theygiv 1 +overview 1 +done 1 +want 1 +sequenc 1 +identif 1 +fine 1 +art 1 +galleryof 1 +mine 1 +tryth 1 +orth 1 +fineart 1 +forum 1 +cucshockei 1 +backcountri 1 +take 1 +look 1 +thebackcountri 1 +perform 1 +servic 1 +publicli 1 +maintainedsoftwar 1 +czar 1 +hockei 1 +equip 1 +back 1 +hockeyfor 1 +info 1 +theatr 1 +schedul 1 +forth 1 +center 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..eacbb074 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,36 @@ +koichi 1 +kamijokoichi 1 +kamijo 1 +welcom 1 +home 1 +page 1 +construct 1 +reach 1 +juli 1 +english 1 +japanes 1 +us 1 +thing 1 +sell 1 +back 1 +japan 1 +sold 1 +click 1 +like 1 +sale 1 +class 1 +papershometownseduc 1 +work 1 +experienceskoichi 1 +muriel 1 +ithaca 1 +cornel 1 +kkamijoh 1 +vnet 1 +go 1 +accept 1 +access 1 +time 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..66eb5d06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,115 @@ +karl 1 +friedrich 1 +bhringer 1 +cornel 1 +univers 1 +dipl 1 +inform 1 +karlsruh 1 +graduat 1 +student 1 +dept 1 +comput 1 +scienc 1 +upson 1 +hall 1 +ithaca 1 +email 1 +educurr 1 +address 1 +stanford 1 +robot 1 +laboratori 1 +gate 1 +build 1 +current 1 +research 1 +interest 1 +micro 1 +manipul 1 +andassembl 1 +nanofabr 1 +facil 1 +microfabr 1 +actuat 1 +arrai 1 +implementmicro 1 +strategi 1 +gener 1 +innew 1 +devic 1 +handl 1 +part 1 +withprogramm 1 +forc 1 +vector 1 +field 1 +also 1 +investig 1 +design 1 +autom 1 +structur 1 +earlier 1 +work 1 +germani 1 +includ 1 +develop 1 +better 1 +graph 1 +layout 1 +algorithm 1 +thesi 1 +advisor 1 +professorbruc 1 +donald 1 +founder 1 +director 1 +vision 1 +project 1 +close 1 +collabor 1 +professor 1 +noel 1 +macdonaldand 1 +hisresearch 1 +group 1 +public 1 +document 1 +confer 1 +announc 1 +call 1 +paper 1 +anim 1 +video 1 +sculptur 1 +invis 1 +cantilev 1 +model 1 +frank 1 +lloyd 1 +wright 1 +fallingwat 1 +articl 1 +york 1 +time 1 +magazin 1 +march 1 +wire 1 +octob 1 +offic 1 +nano 1 +outin 1 +kwon 1 +club 1 +find 1 +lindseth 1 +climb 1 +wall 1 +navig 1 +page 1 +previou 1 +higher 1 +level 1 +deeper 1 +next 1 +pagekarl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..4311a869 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,232 @@ +david 1 +karrdavid 1 +karrphd 1 +studentdepart 1 +comput 1 +sciencecornel 1 +univers 1 +upson 1 +hall 1 +ithaca 1 +mail 1 +karr 1 +cornel 1 +edui 1 +student 1 +depart 1 +scienceat 1 +work 1 +thehoru 1 +project 1 +layer 1 +architectur 1 +reliabl 1 +distribut 1 +system 1 +withprofessor 1 +kenneth 1 +birmananddr 1 +robbert 1 +reness 1 +minor 1 +field 1 +mathemat 1 +concentr 1 +statist 1 +research 1 +interest 1 +engin 1 +commun 1 +protocol 1 +weak 1 +consist 1 +perform 1 +html 1 +java 1 +protocolsmi 1 +includ 1 +problem 1 +specifi 1 +implement 1 +verifi 1 +applic 1 +dissert 1 +formalspecif 1 +verif 1 +properti 1 +ofhoru 1 +us 1 +tempor 1 +logic 1 +action 1 +variousinterest 1 +fundament 1 +usedin 1 +horu 1 +stack 1 +furthermor 1 +write 1 +formula 1 +assum 1 +guarante 1 +stylefor 1 +might 1 +provid 1 +itsinterfac 1 +depend 1 +andbelow 1 +emploi 1 +straightforward 1 +techniqu 1 +agiven 1 +certain 1 +desir 1 +thetop 1 +condit 1 +even 1 +unusualcombin 1 +atyp 1 +order 1 +ultim 1 +user 1 +systemsshould 1 +abl 1 +call 1 +help 1 +constructcustom 1 +omit 1 +unnecessari 1 +avoid 1 +theirassoci 1 +cost 1 +confid 1 +sufficientto 1 +intend 1 +part 1 +basi 1 +thesecur 1 +harden 1 +develop 1 +applet 1 +give 1 +rough 1 +demonstr 1 +propos 1 +method 1 +ofverifi 1 +initi 1 +stem 1 +thepromis 1 +suit 1 +variousguarante 1 +programm 1 +messag 1 +passingenviron 1 +host 1 +crash 1 +delayedor 1 +lost 1 +softwar 1 +haswork 1 +whose 1 +componentswer 1 +prone 1 +failur 1 +feel 1 +featur 1 +offer 1 +considerablepromis 1 +consistencywhil 1 +becom 1 +ofdistribut 1 +wide 1 +area 1 +network 1 +look 1 +revis 1 +control 1 +filesin 1 +environ 1 +gener 1 +distributedenviron 1 +partitionedinto 1 +disconnect 1 +portion 1 +notion 1 +wouldallow 1 +multipl 1 +temporarili 1 +site 1 +make 1 +progress 1 +concurr 1 +performancemi 1 +correct 1 +measur 1 +high 1 +avail 1 +respons 1 +time 1 +andeffici 1 +resourc 1 +clearli 1 +equal 1 +import 1 +larg 1 +appar 1 +random 1 +ofsystem 1 +load 1 +activ 1 +notabl 1 +except 1 +dedic 1 +parallelmachin 1 +behavior 1 +also 1 +suscept 1 +analysi 1 +though 1 +differ 1 +kind 1 +encourag 1 +javath 1 +world 1 +applicationwith 1 +mani 1 +possibl 1 +explor 1 +experi 1 +simpl 1 +wai 1 +hypertext 1 +tonavig 1 +inform 1 +appear 1 +myweb 1 +lego 1 +toi 1 +hack 1 +execut 1 +code 1 +anetscap 1 +browser 1 +download 1 +exampl 1 +abirthdai 1 +puzzl 1 +calcul 1 +tool 1 +forverifi 1 +profession 1 +affiliationsi 1 +member 1 +ieee 1 +andmaa 1 +informationseemi 1 +linksfor 1 +topic 1 +find 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..aabba7c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,137 @@ +welcom 1 +home 1 +pagekartik 1 +kapadiamast 1 +engineeringclass 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +address 1 +dabnei 1 +drive 1 +diego 1 +california 1 +phone 1 +work 1 +mail 1 +kkapadia 1 +qualcomm 1 +comcurr 1 +softwar 1 +engin 1 +incorporatedmi 1 +main 1 +area 1 +interest 1 +graphic 1 +distribut 1 +system 1 +network 1 +cours 1 +took 1 +spring 1 +architectur 1 +high 1 +capac 1 +inform 1 +fall 1 +oper 1 +multimedia 1 +projectshoca 1 +chiphoca 1 +pronounc 1 +hodja 1 +full 1 +fledg 1 +chip 1 +cornel 1 +hypothet 1 +instruct 1 +processor 1 +support 1 +featur 1 +like 1 +multitask 1 +virtual 1 +memori 1 +hogman 1 +enjoy 1 +game 1 +quick 1 +break 1 +singl 1 +player 1 +window 1 +platform 1 +code 1 +interfac 1 +good 1 +sourc 1 +entertain 1 +take 1 +click 1 +screenshot 1 +gameboard 1 +help 1 +screen 1 +view 1 +postscript 1 +design 1 +document 1 +scene 1 +transit 1 +effect 1 +rivlrivl 1 +stand 1 +resolut 1 +independ 1 +video 1 +languag 1 +rivl 1 +develop 1 +univers 1 +jonathan 1 +swartz 1 +brian 1 +smith 1 +excel 1 +applic 1 +project 1 +enhanc 1 +incorpor 1 +primit 1 +implement 1 +present 1 +simul 1 +railroad 1 +master 1 +visual 1 +captur 1 +scientif 1 +aspect 1 +lai 1 +track 1 +vehicl 1 +model 1 +dynam 1 +motion 1 +us 1 +combin 1 +open 1 +inventor 1 +opengl 1 +realiti 1 +facil 1 +resumesom 1 +favorit 1 +site 1 +star 1 +cool 1 +mpeg 1 +clip 1 +lot 1 +music 1 +page 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..ea64243f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,63 @@ +publish 1 +true 1 +writingsi 1 +much 1 +written 1 +thing 1 +proudof 1 +flame 1 +mine 1 +other 1 +morethought 1 +conscienti 1 +objector 1 +arm 1 +conflict 1 +myfirst 1 +letter 1 +grand 1 +rapid 1 +press 1 +fewyear 1 +back 1 +gulf 1 +sinc 1 +time 1 +howev 1 +vestart 1 +keep 1 +work 1 +onlin 1 +lest 1 +wonder 1 +also 1 +write 1 +poetri 1 +dprobabl 1 +rather 1 +famou 1 +essayist 1 +anyhow 1 +like 1 +argu 1 +dread 1 +mess 1 +go 1 +byron 1 +center 1 +asuburb 1 +mile 1 +went 1 +high 1 +school 1 +wrote 1 +unabomb 1 +suspect 1 +mathematician 1 +mathematiciansar 1 +terrorist 1 +think 1 +editor 1 +newspap 1 +wide 1 +read 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..2f71f06f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,268 @@ +katherin 1 +home 1 +page 1 +guokguo 1 +cornel 1 +educornel 1 +universitydept 1 +comput 1 +scienc 1 +upson 1 +hall 1 +ithaca 1 +student 1 +work 1 +distribut 1 +system 1 +interest 1 +scalabl 1 +reliabl 1 +multicastprotocol 1 +horu 1 +project 1 +direct 1 +birman 1 +robbert 1 +reness 1 +werner 1 +vogel 1 +recent 1 +publicationskatherin 1 +structur 1 +virtual 1 +synchroni 1 +explor 1 +bound 1 +ofvirtu 1 +synchron 1 +group 1 +commun 1 +appear 1 +proceed 1 +sigop 1 +european 1 +workshop 1 +connemara 1 +ireland 1 +septemb 1 +lui 1 +rodrigu 1 +antonio 1 +sargento 1 +brad 1 +glade 1 +paulo 1 +verisimo 1 +transpar 1 +light 1 +weight 1 +servic 1 +ieee 1 +symposiumon 1 +niagara 1 +lake 1 +canada 1 +octob 1 +also 1 +avail 1 +technic 1 +report 1 +depart 1 +univers 1 +kenneth 1 +mark 1 +hayden 1 +takako 1 +hickei 1 +dalia 1 +malki 1 +alex 1 +vaysburd 1 +flexibl 1 +march 1 +research 1 +relat 1 +infodistribut 1 +systemscomput 1 +networkscool 1 +toolsbibliographyconferencesjournalsacademia 1 +industri 1 +infocompani 1 +infoschool 1 +infojob 1 +searchinterest 1 +place 1 +austin 1 +lisboa 1 +colorado 1 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 1 +inforesearch 1 +systempointershoru 1 +commerci 1 +productspringtotemtransisx 1 +kernel 1 +arizona 1 +microsystemslab 1 +networksmulticast 1 +protocolsn 1 +fromlblgun 1 +cool 1 +tool 1 +consortium 1 +lectur 1 +gener 1 +info 1 +theproject 1 +html 1 +simpl 1 +beginn 1 +sguid 1 +quickrefer 1 +htmldocument 1 +tabl 1 +content 1 +common 1 +gatewai 1 +interfac 1 +httpd 1 +ncsa 1 +overview 1 +find 1 +imag 1 +file 1 +finder 1 +mosaic 1 +sourc 1 +binari 1 +document 1 +xmosaic 1 +uiuc 1 +other 1 +cern 1 +java 1 +hotjava 1 +bibliographybibliographi 1 +oldindex 1 +index 1 +confer 1 +pointer 1 +hpdc 1 +ftc 1 +sosp 1 +srd 1 +icdc 1 +jsac 1 +journal 1 +elsevi 1 +scienceacademia 1 +motorola 1 +openingsibmdelltandemtiapplebel 1 +atlant 1 +school 1 +texa 1 +dept 1 +ucsd 1 +gradschool 1 +advic 1 +gradjob 1 +search 1 +databas 1 +ukinterest 1 +life 1 +weather 1 +moviesbailei 1 +concertslibrari 1 +hightechin 1 +institut 1 +inesc 1 +copper 1 +mountain 1 +resort 1 +summit 1 +counti 1 +coloradooth 1 +infoart 1 +weblouvreth 1 +world 1 +women 1 +linebook 1 +amazon 1 +book 1 +calvinhobb 1 +archivecardsmagicchinaart 1 +china 1 +cook 1 +electron 1 +gourmetl 1 +cordonbleu 1 +itali 1 +dessert 1 +fashional 1 +linksa 1 +cjlutz 1 +fashion 1 +wwweb 1 +pagewith 1 +heart 1 +tmexpressfirst 1 +view 1 +wireirc 1 +faqfashion 1 +nethair 1 +crew 1 +diesel 1 +jean 1 +guessfriend 1 +alan 1 +cheng 1 +david 1 +deng 1 +shiji 1 +insur 1 +plan 1 +email 1 +grove 1 +edulibrari 1 +librari 1 +congressmagazin 1 +intertext 1 +wire 1 +timegeorg 1 +gilder 1 +discoveri 1 +mail 1 +postcard 1 +map 1 +music 1 +internet 1 +underground 1 +archivesinanet 1 +newsworld 1 +new 1 +brief 1 +sport 1 +open 1 +olymp 1 +stock 1 +wall 1 +streetheadlin 1 +street 1 +weatherhunt 1 +infoth 1 +lyco 1 +hunt 1 +informationglob 1 +network 1 +navigatorhom 1 +global 1 +navig 1 +scout 1 +wanderersand 1 +spider 1 +edg 1 +yahoo 1 +refer 1 +netscap 1 +last 1 +modifi 1 +kguo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..65a5e0c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,43 @@ +wirefram 1 +rotat 1 +introductionthi 1 +project 1 +desgin 1 +learn 1 +tool 1 +comput 1 +graphic 1 +provid 1 +understand 1 +polygon 1 +form 1 +list 1 +vertic 1 +written 1 +java 1 +simpl 1 +power 1 +languag 1 +creat 1 +safe 1 +portabl 1 +interact 1 +object 1 +orient 1 +multi 1 +threader 1 +program 1 +environ 1 +platform 1 +speific 1 +applet 1 +react 1 +user 1 +input 1 +dynam 1 +chang 1 +cone 1 +cube 1 +cylind 1 +tetra 1 +toru 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..3766e9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,242 @@ +kleinberg 1 +homepag 1 +kleinber 1 +cornel 1 +assist 1 +professor 1 +comput 1 +scienc 1 +univers 1 +ithaca 1 +research 1 +interest 1 +algorithm 1 +combinatori 1 +optim 1 +emphasi 1 +approxim 1 +geometri 1 +network 1 +distribut 1 +molecular 1 +biologi 1 +recent 1 +work 1 +includ 1 +rout 1 +anddisjoint 1 +path 1 +problem 1 +adversari 1 +queue 1 +theori 1 +approach 1 +analyz 1 +stabilityof 1 +protocol 1 +without 1 +probabilist 1 +assumpt 1 +geometr 1 +method 1 +particularlyth 1 +posit 1 +semi 1 +definit 1 +program 1 +studi 1 +conform 1 +spend 1 +academ 1 +year 1 +visit 1 +almaden 1 +center 1 +click 1 +seeselect 1 +publicationsmiscellan 1 +linkspapersapproxim 1 +singl 1 +sourc 1 +unsplitt 1 +flow 1 +proc 1 +ieee 1 +symposium 1 +foundat 1 +appear 1 +rubinfeld 1 +short 1 +expand 1 +graph 1 +tardo 1 +disjointpath 1 +dens 1 +embed 1 +disjoint 1 +high 1 +diamet 1 +planar 1 +aggarw 1 +williamson 1 +node 1 +mesh 1 +trade 1 +vlsi 1 +layout 1 +goeman 1 +improvedapproxim 1 +ratio 1 +minimum 1 +latenc 1 +siam 1 +discret 1 +lovasz 1 +thetafunct 1 +relax 1 +vertex 1 +cover 1 +math 1 +line 1 +local 1 +formobil 1 +robot 1 +computersci 1 +search 1 +simplepolygon 1 +lower 1 +bound 1 +serverbalanc 1 +inform 1 +process 1 +letter 1 +yaniv 1 +serveralgorithm 1 +robotnavig 1 +server 1 +master 1 +thesi 1 +parallel 1 +andrew 1 +awerbuch 1 +fernandez 1 +leighton 1 +stabil 1 +result 1 +greedi 1 +content 1 +resolut 1 +borodin 1 +raghavan 1 +sudan 1 +attiya 1 +lynch 1 +offsbetween 1 +messag 1 +deliveri 1 +quiesc 1 +time 1 +connect 1 +managementprotocol 1 +israel 1 +system 1 +mullainathan 1 +resourc 1 +boundsand 1 +combin 1 +consensu 1 +object 1 +onprincipl 1 +berger 1 +reconstruct 1 +athre 1 +dimension 1 +model 1 +arbitrari 1 +error 1 +huttenloch 1 +compar 1 +point 1 +set 1 +project 1 +kedem 1 +dynam 1 +voronoi 1 +diagram 1 +hausdorff 1 +distanc 1 +pointset 1 +euclidean 1 +motion 1 +plane 1 +symposiumon 1 +invariantsof 1 +segment 1 +universitycomput 1 +technic 1 +report 1 +juli 1 +linkssearch 1 +tool 1 +bibliographiesaltavista 1 +infoseek 1 +excit 1 +yahoo 1 +nynex 1 +yellow 1 +page 1 +glimps 1 +bibliographi 1 +ncstrl 1 +librari 1 +david 1 +jone 1 +hypertext 1 +sitescornel 1 +oper 1 +group 1 +stanford 1 +berkelei 1 +associ 1 +nation 1 +computingtc 1 +virtual 1 +address 1 +book 1 +crescenzi 1 +kann 1 +compendium 1 +foc 1 +confer 1 +soda 1 +stoc 1 +biologycomput 1 +carb 1 +biocomput 1 +sdsc 1 +list 1 +geometrydavid 1 +eppstein 1 +junkyard 1 +jeff 1 +erickson 1 +internet 1 +securitymitr 1 +corp 1 +secur 1 +princeton 1 +safe 1 +rivest 1 +cryptographi 1 +link 1 +miscellaneousnetscap 1 +intellicast 1 +interact 1 +tenni 1 +chess 1 +onlin 1 +talk 1 +kleinbergdepart 1 +scienceupson 1 +hallcornel 1 +universityithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..4e8df1d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,52 @@ +homepag 1 +kazushi 1 +otakota 1 +cornel 1 +edukazushi 1 +melco 1 +current 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +cornellunivers 1 +receiv 1 +univers 1 +tokyo 1 +back 1 +march 1 +work 1 +mitusbishi 1 +electr 1 +corpor 1 +japan 1 +return 1 +degre 1 +worth 1 +page 1 +isund 1 +construct 1 +start 1 +assign 1 +acquaint 1 +html 1 +forc 1 +depart 1 +inform 1 +superhighwai 1 +cours 1 +interest 1 +pictur 1 +music 1 +move 1 +sale 1 +come 1 +take 1 +februari 1 +thing 1 +want 1 +sell 1 +think 1 +advert 1 +peopl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..8d527031 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,54 @@ +faculti 1 +research 1 +interest 1 +dexter 1 +kozendext 1 +kozenjoseph 1 +newton 1 +professor 1 +engineeringphd 1 +cornel 1 +univers 1 +interestsmi 1 +includ 1 +algorithm 1 +complex 1 +especiallycomplex 1 +decis 1 +problem 1 +logic 1 +algebra 1 +andsemant 1 +program 1 +languag 1 +paper 1 +avail 1 +onlinekleen 1 +constraint 1 +type 1 +infer 1 +comput 1 +algebraautomata 1 +theori 1 +logicbibliographylist 1 +public 1 +technic 1 +reportscours 1 +notesc 1 +structur 1 +interpret 1 +programsc 1 +automata 1 +theoryfun 1 +stufffamili 1 +pictur 1 +rugbi 1 +effectcomput 1 +scienc 1 +departmentupson 1 +hallcornel 1 +universityithaca 1 +york 1 +usakozen 1 +work 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..2534f3e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,42 @@ +home 1 +page 1 +christoph 1 +kreitz 1 +pictur 1 +soon 1 +research 1 +associ 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +offic 1 +phone 1 +email 1 +upson 1 +hall 1 +topic 1 +program 1 +synthesi 1 +autom 1 +deduct 1 +type 1 +theori 1 +teach 1 +learn 1 +german 1 +lehr 1 +lernen 1 +vorlesungsskript 1 +medienunterst 1 +uumltzt 1 +lehren 1 +person 1 +inform 1 +avail 1 +last 1 +modifi 1 +novemb 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..6846ecd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,34 @@ +kuen 1 +heng 1 +cornel 1 +isi 1 +master 1 +engin 1 +comput 1 +scienc 1 +depart 1 +univers 1 +address 1 +grove 1 +street 1 +newton 1 +telephon 1 +welcom 1 +visit 1 +place 1 +myproject 1 +multimedia 1 +system 1 +cours 1 +would 1 +like 1 +read 1 +daili 1 +new 1 +taiwan 1 +home 1 +countri 1 +enjoi 1 +page 1 +still 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..e51fb9c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,182 @@ +carl 1 +lagoz 1 +person 1 +home 1 +page 1 +project 1 +leader 1 +digit 1 +librari 1 +research 1 +group 1 +depart 1 +comput 1 +scienc 1 +upson 1 +hall 1 +cornel 1 +universityithaca 1 +phone 1 +internet 1 +edui 1 +lead 1 +groupin 1 +departmentat 1 +univers 1 +ourgroup 1 +manag 1 +oper 1 +technic 1 +develop 1 +network 1 +report 1 +ncstrl 1 +intern 1 +consortium 1 +maintain 1 +adistribut 1 +collaborateson 1 +number 1 +issu 1 +davi 1 +thedienstsoftwar 1 +protocol 1 +refer 1 +implement 1 +providesdistribut 1 +server 1 +access 1 +worldwid 1 +dienst 1 +current 1 +enabl 1 +technolog 1 +author 1 +paper 1 +drop 1 +publish 1 +world 1 +wide 1 +confer 1 +architectur 1 +distribut 1 +document 1 +commun 1 +april 1 +manual 1 +build 1 +product 1 +chapter 1 +advanc 1 +springer 1 +verlag 1 +primari 1 +involv 1 +defin 1 +servic 1 +protocolsfor 1 +interoper 1 +infrastructur 1 +area 1 +collabor 1 +corpor 1 +nation 1 +initiativesto 1 +extend 1 +object 1 +framework 1 +developeda 1 +part 1 +darpa 1 +fund 1 +open 1 +design 1 +secur 1 +store 1 +iso 1 +repositori 1 +dlib 1 +magazin 1 +decemb 1 +work 1 +also 1 +member 1 +dlibwork 1 +interfacesand 1 +releas 1 +final 1 +metadata 1 +workshop 1 +iiin 1 +warwick 1 +amveri 1 +interest 1 +us 1 +distributedobject 1 +read 1 +posit 1 +paperfor 1 +joint 1 +mobil 1 +codeworkshop 1 +know 1 +meetm 1 +meet 1 +find 1 +poor 1 +substitut 1 +contact 1 +littl 1 +moreabout 1 +charact 1 +pictur 1 +pagei 1 +luci 1 +daughter 1 +rule 1 +major 1 +life 1 +outsideof 1 +time 1 +toddler 1 +constant 1 +challeng 1 +lucyg 1 +mean 1 +never 1 +provid 1 +avid 1 +outdoor 1 +site 1 +fast 1 +movingwat 1 +quiet 1 +lakeand 1 +itch 1 +cano 1 +give 1 +beauti 1 +think 1 +bike 1 +ridingalong 1 +road 1 +backwood 1 +trail 1 +tell 1 +sparehour 1 +run 1 +shoe 1 +breath 1 +deeplyth 1 +fresh 1 +spend 1 +much 1 +joi 1 +physicalnor 1 +ever 1 +interfer 1 +desir 1 +fight 1 +itspreserv 1 +hope 1 +sometim 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..7b32ee10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,101 @@ +lidong 1 +zhou 1 +homepag 1 +welcom 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +fall 1 +cours 1 +system 1 +concept 1 +local 1 +access 1 +multimedia 1 +research 1 +relat 1 +materi 1 +sigop 1 +paper 1 +oasi 1 +cambridg 1 +report 1 +adag 1 +author 1 +distribut 1 +applic 1 +group 1 +project 1 +level 1 +secur 1 +java 1 +safe 1 +internet 1 +program 1 +legion 1 +sirac 1 +kerbero 1 +network 1 +authent 1 +servic 1 +massiv 1 +ocaml 1 +advanc 1 +standard 1 +robot 1 +exclus 1 +career 1 +document 1 +cornel 1 +opportun 1 +jobtrak 1 +colleg 1 +grad 1 +hunter 1 +open 1 +center 1 +onlin 1 +careermosa 1 +page 1 +jobweb 1 +home 1 +xjob 1 +friend 1 +yingjun 1 +fudan 1 +classmat 1 +inform 1 +resours 1 +tutori 1 +languag 1 +tool 1 +yellow 1 +book 1 +isso 1 +sunris 1 +chines 1 +soccer 1 +world 1 +edmund 1 +automobil 1 +buyer 1 +guid 1 +autosit 1 +ultim 1 +auto 1 +insur 1 +basic 1 +legal 1 +surviv 1 +link 1 +travel 1 +agenc 1 +rank 1 +succe 1 +graduat 1 +school 1 +back 1 +indexlast 1 +updat 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..559eca66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,163 @@ +hsian 1 +wangthi 1 +page 1 +major 1 +constructionlin 1 +wang 1 +master 1 +student 1 +comput 1 +scienceat 1 +cornel 1 +univers 1 +degre 1 +inform 1 +ohio 1 +state 1 +born 1 +fangliao 1 +small 1 +villag 1 +southern 1 +coast 1 +taiwan 1 +still 1 +construct 1 +fall 1 +classesc 1 +multimedia 1 +system 1 +final 1 +project 1 +orwel 1 +remov 1 +track 1 +object 1 +digit 1 +videoe 1 +networkse 1 +vision 1 +moment 1 +base 1 +edg 1 +oper 1 +amidonc 1 +autom 1 +video 1 +transcrib 1 +annot 1 +research 1 +advisor 1 +prof 1 +ramin 1 +zabihspr 1 +natur 1 +languag 1 +processingc 1 +practic 1 +distribut 1 +computingc 1 +practicum 1 +network 1 +visual 1 +managementc 1 +machin 1 +visionc 1 +scienc 1 +colloquimc 1 +move 1 +scene 1 +high 1 +perform 1 +audit 1 +program 1 +java 1 +webspac 1 +interest 1 +site 1 +link 1 +us 1 +stuffscornel 1 +info 1 +depart 1 +annual 1 +reportiee 1 +societytaiwan 1 +headlin 1 +new 1 +sinanet 1 +comth 1 +musicmovi 1 +connect 1 +swartz 1 +movieweb 1 +cool 1 +movi 1 +moviemania 1 +also 1 +click 1 +collect 1 +think 1 +picturesth 1 +list 1 +best 1 +sell 1 +book 1 +releas 1 +publish 1 +world 1 +journal 1 +bookstor 1 +quot 1 +chines 1 +classic 1 +linux 1 +linkstcl 1 +line 1 +resourc 1 +softwar 1 +engin 1 +galleri 1 +hacksth 1 +earth 1 +home 1 +pagemiscellan 1 +hongkong 1 +bridg 1 +hong 1 +kong 1 +linksfor 1 +like 1 +japanes 1 +anim 1 +take 1 +look 1 +carlo 1 +jump 1 +cja 1 +calanimag 1 +alpha 1 +chapter 1 +berkelei 1 +totoro 1 +pagelaputa 1 +castl 1 +nausicaa 1 +vallei 1 +wind 1 +conan 1 +slump 1 +kiki 1 +legend 1 +galact 1 +hero 1 +ming 1 +pagecampu 1 +address 1 +uptown 1 +eithaca 1 +york 1 +linhsian 1 +edulast 1 +updat 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..18322faf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,33 @@ +libbi 1 +home 1 +pagewelcom 1 +page 1 +collect 1 +thought 1 +essai 1 +last 1 +updat 1 +sept 1 +show 1 +download 1 +theme 1 +song 1 +check 1 +video 1 +clip 1 +take 1 +look 1 +read 1 +lista 1 +littl 1 +thing 1 +septemb 1 +june 1 +april 1 +interest 1 +projectemail 1 +mehit 1 +counter 1 +courtesi 1 +http 1 +digit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..45bfd64b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,12 @@ +name 1 +offic 1 +upson 1 +hall 1 +hour 1 +mail 1 +lili 1 +cornel 1 +oper 1 +system 1 +take 1 +cours 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..5d6c532e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,100 @@ +lloyd 1 +trefethen 1 +trefethenprofessorlnt 1 +cornel 1 +edumi 1 +appoint 1 +thecomput 1 +scienc 1 +depart 1 +also 1 +affili 1 +thecent 1 +appli 1 +mathemat 1 +thecornel 1 +theori 1 +center 1 +field 1 +numer 1 +analysi 1 +scientif 1 +comput 1 +havea 1 +person 1 +view 1 +mean 1 +specif 1 +interest 1 +includ 1 +linear 1 +algebra 1 +numericalsolut 1 +conform 1 +map 1 +approxim 1 +fluid 1 +mechan 1 +recent 1 +year 1 +much 1 +work 1 +hasbeen 1 +relat 1 +normal 1 +matric 1 +oper 1 +whose 1 +eigenvector 1 +notorthogon 1 +applic 1 +textbooksfinit 1 +differ 1 +spectral 1 +method 1 +textbook 1 +siam 1 +papersmultimatlab 1 +matlab 1 +multipl 1 +processorsmatrix 1 +iter 1 +gap 1 +betweenpotenti 1 +convergencepseudospectra 1 +operatorssom 1 +papersoth 1 +itemsclass 1 +paper 1 +analysiscurriculum 1 +vitaepseudospectra 1 +bibliographi 1 +peter 1 +alfeldcurr 1 +student 1 +vicki 1 +howlegubjrn 1 +jnsson 1 +yohan 1 +kimdivakar 1 +viswanathprevi 1 +jeff 1 +baggetttobi 1 +driscollalan 1 +edelman 1 +loui 1 +howel 1 +walter 1 +mascarenhasnoel 1 +nachtigalsatish 1 +reddi 1 +chuan 1 +tohsom 1 +colleaguesjim 1 +demmelann 1 +greenbaummartin 1 +gutknechtd 1 +nick 1 +highamann 1 +trefethenandr 1 +weideman 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..26b24f5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,91 @@ +luci 1 +home 1 +page 1 +welcom 1 +student 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +ithaca 1 +interest 1 +topic 1 +network 1 +distribut 1 +system 1 +programminglanguag 1 +internet 1 +applic 1 +hobbi 1 +ping 1 +pong 1 +badminton 1 +swim 1 +travel 1 +photograph 1 +read 1 +music 1 +resum 1 +whiz 1 +stock 1 +search 1 +analysi 1 +tool 1 +degre 1 +project 1 +spring 1 +class 1 +practic 1 +practicum 1 +databas 1 +manag 1 +fall 1 +oper 1 +softwar 1 +engin 1 +multimedia 1 +systemscontact 1 +yuwu 1 +favorit 1 +site 1 +stuff 1 +java 1 +corba 1 +silvano 1 +tkcgi 1 +html 1 +vrml 1 +object 1 +orient 1 +languag 1 +product 1 +server 1 +securitypc 1 +lube 1 +tune 1 +ipngip_atmcomput 1 +compani 1 +netscap 1 +busi 1 +cube 1 +sapient 1 +microsoft 1 +novel 1 +china 1 +chines 1 +relat 1 +misc 1 +jobtrack 1 +new 1 +artvark 1 +galleri 1 +underground 1 +archiv 1 +person 1 +connect 1 +librari 1 +catalog 1 +mail 1 +sunlab 1 +caltech 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..c043569f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,50 @@ +linda 1 +home 1 +page 1 +cornel 1 +universitylinda 1 +lxwu 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +depart 1 +univsers 1 +receiv 1 +univers 1 +massachusett 1 +lowel 1 +sinc 1 +work 1 +digit 1 +equip 1 +corp 1 +banyan 1 +system 1 +main 1 +research 1 +interest 1 +network 1 +mulitimedia 1 +click 1 +resum 1 +project 1 +nativ 1 +protocol 1 +stack 1 +window 1 +us 1 +multicast 1 +group 1 +layer 1 +video 1 +electron 1 +commerc 1 +kramer 1 +mart 1 +coursesfal 1 +oper 1 +multimedia 1 +photoesus 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..18348b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,13 @@ +nikolai 1 +mateevnikolai 1 +mateevgradu 1 +studentmateev 1 +cornel 1 +upson 1 +halldepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..c7e5d4f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,54 @@ +matthew 1 +morgenstern 1 +home 1 +pagematthew 1 +morgensternresearch 1 +project 1 +leaderaddress 1 +engin 1 +theori 1 +centerxerox 1 +design 1 +research 1 +institutecornel 1 +universityithaca 1 +phone 1 +email 1 +cornel 1 +edustatu 1 +visit 1 +fellow 1 +comput 1 +scienc 1 +princip 1 +scientist 1 +xerox 1 +laboratori 1 +scienceproject 1 +distribut 1 +heterogen 1 +databas 1 +system 1 +arpa 1 +fund 1 +metadata 1 +manag 1 +multimedia 1 +document 1 +supervis 1 +select 1 +student 1 +relat 1 +area 1 +fundedresearch 1 +work 1 +academ 1 +year 1 +summer 1 +avail 1 +stop 1 +chat 1 +inform 1 +come 1 +page 1 +soon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..7a8eaf49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,40 @@ +monika 1 +henzing 1 +homepagemonika 1 +rauch 1 +henzingerassist 1 +professorcomput 1 +scienc 1 +departmentcornel 1 +universityithaca 1 +email 1 +cornel 1 +eduphon 1 +current 1 +leav 1 +digit 1 +equip 1 +corpor 1 +system 1 +research 1 +centerhomepageresearch 1 +interestscombinatori 1 +graph 1 +algorithm 1 +especi 1 +dynam 1 +random 1 +data 1 +structur 1 +theori 1 +lower 1 +bound 1 +recent 1 +public 1 +project 1 +pageprogram 1 +committe 1 +stoc 1 +soda 1 +homepag 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..f50014d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,76 @@ +stanlei 1 +huang 1 +home 1 +page 1 +huangmast 1 +engin 1 +studentmhuang 1 +cornel 1 +sheldon 1 +courtcornel 1 +univers 1 +ithaca 1 +master 1 +comput 1 +scienc 1 +class 1 +bachelor 1 +kentucki 1 +area 1 +interest 1 +oper 1 +systemsdistribut 1 +systemsdatabas 1 +system 1 +inform 1 +retrievalgraph 1 +user 1 +interfacesoth 1 +movi 1 +tenni 1 +horse_back 1 +ride 1 +travel 1 +read 1 +work 1 +project 1 +distribut 1 +plan 1 +myadvisor 1 +werner 1 +vogel 1 +robbertvan 1 +reness 1 +object 1 +integr 1 +horu 1 +link 1 +relat 1 +planplan 1 +distributionplan 1 +updateplan 1 +faqhorusc 1 +final 1 +exam 1 +paper 1 +collect 1 +share 1 +memorydistribut 1 +memorysom 1 +technic 1 +group 1 +communicationsnapshotu 1 +level 1 +network 1 +interfac 1 +architecturejobscar 1 +pathbai 1 +jobscyberezumescar 1 +opportunitiesus 1 +stufftechn 1 +field 1 +searchbel 1 +labsspbsd 1 +sourcesjavarfclast 1 +modifi 1 +mhuang 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..6d422339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,62 @@ +lynett 1 +millett 1 +homepag 1 +millettdepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +cornel 1 +participatoryform 1 +mass 1 +speech 1 +develop 1 +internetdeserv 1 +highest 1 +protect 1 +government 1 +intrus 1 +decis 1 +aclu 1 +reno 1 +challeng 1 +script 1 +second 1 +year 1 +skit 1 +cuc 1 +holidai 1 +parti 1 +person 1 +inform 1 +pictur 1 +cat 1 +last 1 +updat 1 +list 1 +link 1 +never 1 +abl 1 +find 1 +precis 1 +femin 1 +know 1 +peopl 1 +call 1 +feminist 1 +whenver 1 +express 1 +sentiment 1 +differenti 1 +doormat 1 +prostitut 1 +rebecca 1 +west 1 +modifi 1 +octob 1 +comment 1 +welcom 1 +copi 1 +public 1 +pleas 1 +look 1 +copyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..917dcbba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,94 @@ +mishaal 1 +home 1 +pagemisha 1 +page 1 +kuwaiti 1 +student 1 +cornel 1 +univers 1 +master 1 +engin 1 +computersci 1 +program 1 +mengc 1 +graduat 1 +doubl 1 +major 1 +electr 1 +andcomput 1 +scienc 1 +worcest 1 +polytechn 1 +institut 1 +inworcest 1 +coolest 1 +place 1 +earth 1 +list 1 +stuff 1 +involv 1 +interest 1 +temporari 1 +link 1 +servic 1 +bearaccess 1 +menu 1 +cours 1 +take 1 +machin 1 +vision 1 +newgroupc 1 +practic 1 +distribut 1 +system 1 +practicum 1 +high 1 +perform 1 +comput 1 +advanc 1 +languag 1 +implement 1 +newgroup 1 +capac 1 +network 1 +newgroupnba 1 +databas 1 +manag 1 +newgroupoptim 1 +video 1 +transmiss 1 +meng 1 +project 1 +extens 1 +kuwait 1 +pagemi 1 +resum 1 +check 1 +stock 1 +quotescool 1 +public 1 +server 1 +hope 1 +offer 1 +conot 1 +soon 1 +weather 1 +ithaca 1 +latest 1 +new 1 +cann 1 +intern 1 +film 1 +festiv 1 +everyth 1 +wrong 1 +reason 1 +want 1 +ever 1 +accus 1 +nerd 1 +well 1 +sure 1 +cool 1 +almashanmisha 1 +educornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..63fc4dc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,131 @@ +mike 1 +pagethi 1 +page 1 +yeah 1 +date 1 +version 1 +pleas 1 +recent 1 +browser 1 +stuff 1 +quit 1 +would 1 +like 1 +sign 1 +myguestbook 1 +cool 1 +chanc 1 +make 1 +opinion 1 +count 1 +dark 1 +stockholm 1 +right 1 +take 1 +look 1 +vote 1 +poll 1 +result 1 +coupl 1 +link 1 +peopl 1 +know 1 +guess 1 +pretti 1 +ryan 1 +call 1 +beavi 1 +vitya 1 +better 1 +construct 1 +danc 1 +frog 1 +maria 1 +mark 1 +korbi 1 +realli 1 +last 1 +name 1 +andrew 1 +corbett 1 +suck 1 +card 1 +nicknam 1 +kevin 1 +donnel 1 +complet 1 +love 1 +pictur 1 +eryn 1 +crave 1 +attent 1 +want 1 +movi 1 +graphic 1 +class 1 +golf 1 +amaz 1 +anim 1 +plai 1 +mpeg 1 +place 1 +univers 1 +stop 1 +expand 1 +click 1 +find 1 +world 1 +need 1 +person 1 +pick 1 +site 1 +made 1 +onlin 1 +student 1 +homepag 1 +list 1 +lame 1 +guttermouth 1 +brought 1 +byjust 1 +kid 1 +cours 1 +member 1 +internet 1 +exchang 1 +peic 1 +connect 1 +whole 1 +bunch 1 +other 1 +line 1 +thing 1 +come 1 +join 1 +hand 1 +sing 1 +togeth 1 +spirit 1 +harmoni 1 +someth 1 +visitor 1 +number 1 +keep 1 +reset 1 +check 1 +statist 1 +accuar 1 +send 1 +mail 1 +atmak 1 +cornel 1 +edubas 1 +much 1 +random 1 +imag 1 +relat 1 +anyth 1 +thank 1 +everybodi 1 +idea 1 +us 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..3dcb799e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,22 @@ +nobuhiko 1 +mukainobuhiko 1 +mukai 1 +home 1 +page 1 +construct 1 +research 1 +effect 1 +jpeg 1 +compressionon 1 +multimedia 1 +system 1 +last 1 +fall 1 +semest 1 +made 1 +anim 1 +titl 1 +magicon 1 +comput 1 +graphic 1 +spring 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..15d936dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,22 @@ +homepag 1 +nichola 1 +how 1 +click 1 +photo 1 +imag 1 +graduat 1 +studentdepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +email 1 +nihow 1 +cornel 1 +eduoffic 1 +upson 1 +hall 1 +phone 1 +resum 1 +refer 1 +person 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..82a763f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,23 @@ +niko 1 +home 1 +page 1 +research 1 +pitsiani 1 +upson 1 +hall 1 +dept 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +work 1 +public 1 +lectur 1 +teach 1 +java 1 +vita 1 +pointer 1 +sinc 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..18a9f28b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,186 @@ +niko 1 +chrisochoid 1 +comput 1 +scienc 1 +depart 1 +purdu 1 +univers 1 +work 1 +touch 1 +mani 1 +facet 1 +parallel 1 +distribut 1 +includ 1 +research 1 +runtim 1 +support 1 +back 1 +system 1 +compil 1 +problem 1 +solv 1 +environ 1 +multicomput 1 +algorithm 1 +schedul 1 +load 1 +balanc 1 +data 1 +adapt 1 +design 1 +implement 1 +softwar 1 +compon 1 +well 1 +known 1 +simul 1 +ellpack 1 +current 1 +project 1 +workshop 1 +bernoulli 1 +prema 1 +portabl 1 +architecur 1 +sensit 1 +messag 1 +dynam 1 +grid 1 +gener 1 +cornel 1 +binari 1 +black 1 +hole 1 +grand 1 +challeng 1 +student 1 +master 1 +engin 1 +florian 1 +sukup 1 +kodukula 1 +indupraka 1 +pingali 1 +vineet 1 +ahuja 1 +reza 1 +behforooz 1 +undergradu 1 +former 1 +animesh 1 +chatterje 1 +rajani 1 +vaidyanathan 1 +select 1 +paper 1 +task 1 +bowyer 1 +watson 1 +appear 1 +proceed 1 +offifth 1 +intern 1 +confer 1 +numer 1 +incomput 1 +fluid 1 +relat 1 +field 1 +multithread 1 +model 1 +ctctr 1 +octob 1 +appli 1 +mathemat 1 +journal 1 +object 1 +orient 1 +tool 1 +kale 1 +kohl 1 +yellick 1 +scientif 1 +program 1 +menu 1 +map 1 +unstructur 1 +structur 1 +thompson 1 +contemporari 1 +key 1 +iter 1 +solver 1 +housti 1 +rice 1 +special 1 +issu 1 +april 1 +toolkit 1 +collid 1 +haupt 1 +aiaa 1 +colorado 1 +spring 1 +june 1 +altern 1 +scalabl 1 +scalableparallel 1 +librari 1 +nation 1 +foundat 1 +engineeringresearch 1 +center 1 +mississippi 1 +state 1 +partit 1 +heurist 1 +base 1 +parallelhardwar 1 +geometri 1 +characterist 1 +advanc 1 +method 1 +partial 1 +differentialequ 1 +vichnevetski 1 +knight 1 +richter 1 +imac 1 +brunswick 1 +page 1 +domain 1 +decompos 1 +architectur 1 +papachi 1 +kortesi 1 +decomposit 1 +differenti 1 +equat 1 +symposium 1 +domaindecomposit 1 +moscow 1 +ussr 1 +glowinski 1 +siam 1 +public 1 +programmingenviron 1 +mimd 1 +machin 1 +karathanas 1 +samartzi 1 +vavali 1 +yang 1 +wang 1 +and 1 +weerawarana 1 +onsupercomput 1 +nikosc 1 +institut 1 +theori 1 +univeristi 1 +rhode 1 +hall 1 +room 1 +ithaca 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..3ec38b84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,68 @@ +welcom 1 +prema 1 +parallel 1 +runtim 1 +support 1 +system 1 +portabl 1 +environ 1 +multicomput 1 +architectur 1 +advanc 1 +comput 1 +research 1 +institut 1 +cornel 1 +theori 1 +center 1 +univers 1 +overview 1 +compil 1 +andproblem 1 +solv 1 +target 1 +scientif 1 +computingappl 1 +build 1 +port 1 +implement 1 +design 1 +varieti 1 +suppot 1 +global 1 +address 1 +space 1 +memori 1 +model 1 +data 1 +task 1 +program 1 +multi 1 +thread 1 +style 1 +execut 1 +automat 1 +work 1 +share 1 +mechan 1 +dynam 1 +load 1 +balanc 1 +paper 1 +multithread 1 +adapt 1 +pdecomput 1 +niko 1 +chrisochoid 1 +ctctr 1 +journal 1 +appli 1 +numer 1 +mathemat 1 +relat 1 +group 1 +pcrc 1 +consortium 1 +copyright 1 +copi 1 +nikosc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..a185b3b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,30 @@ +pavel 1 +naumov 1 +welcom 1 +home 1 +page 1 +invit 1 +learn 1 +locat 1 +cyberspac 1 +real 1 +world 1 +look 1 +work 1 +nuprl 1 +project 1 +tire 1 +take 1 +rest 1 +galleri 1 +visit 1 +cinema 1 +photo 1 +orplai 1 +game 1 +java 1 +written 1 +sundai 1 +send 1 +mail 1 +place 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..88008485 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,194 @@ +david 1 +pearson 1 +research 1 +interest 1 +thesi 1 +investig 1 +highli 1 +scalabl 1 +parallel 1 +comput 1 +consistingof 1 +simpl 1 +processor 1 +connect 1 +dimension 1 +mesh 1 +guid 1 +vision 1 +work 1 +time 1 +perhap 1 +year 1 +henc 1 +materi 1 +scienc 1 +taken 1 +place 1 +architectur 1 +crystal 1 +molecul 1 +lattic 1 +long 1 +goal 1 +realiz 1 +prepar 1 +theubiquit 1 +offer 1 +algorithm 1 +must 1 +heed 1 +lawsof 1 +physic 1 +attent 1 +chip 1 +design 1 +spatial 1 +layoutand 1 +current 1 +hidden 1 +cost 1 +commun 1 +accomplishedbi 1 +pursu 1 +requir 1 +theoret 1 +practic 1 +could 1 +character 1 +feasibl 1 +studi 1 +ihav 1 +produc 1 +cellular 1 +couldb 1 +effici 1 +hardwar 1 +simul 1 +thisarchitectur 1 +program 1 +oper 1 +system 1 +designfor 1 +gener 1 +purpos 1 +believ 1 +problem 1 +like 1 +proteinstructur 1 +grand 1 +challeng 1 +parallelcomput 1 +power 1 +realli 1 +succe 1 +becom 1 +commodityand 1 +sold 1 +desktop 1 +machin 1 +video 1 +game 1 +direct 1 +futur 1 +includ 1 +vlsi 1 +implement 1 +architectureand 1 +languag 1 +wide 1 +us 1 +hideth 1 +detail 1 +instruct 1 +reflect 1 +underlyingvon 1 +neumann 1 +architectureha 1 +good 1 +thing 1 +exploit 1 +parallelmachin 1 +need 1 +easyto 1 +estim 1 +public 1 +dunten 1 +arm 1 +kiewit 1 +network 1 +high 1 +speed 1 +campu 1 +ieee 1 +societi 1 +intern 1 +confer 1 +compcon 1 +fall 1 +pillai 1 +near 1 +optim 1 +placement 1 +sensor 1 +element 1 +transact 1 +inform 1 +theori 1 +vazirani 1 +fast 1 +find 1 +maxim 1 +bipartit 1 +foundat 1 +softwar 1 +technolog 1 +publish 1 +lectur 1 +note 1 +sequenti 1 +set 1 +journal 1 +johnson 1 +pingali 1 +region 1 +singl 1 +entri 1 +exit 1 +control 1 +linear 1 +cornel 1 +tech 1 +report 1 +structur 1 +tree 1 +proceed 1 +sigplan 1 +pldi 1 +notic 1 +polynomi 1 +chang 1 +make 1 +schedul 1 +global 1 +regist 1 +alloc 1 +simd 1 +multiprocessor 1 +workshop 1 +irregularli 1 +irregular 1 +sept 1 +zippel 1 +allerton 1 +press 1 +select 1 +area 1 +cryptographi 1 +appear 1 +depart 1 +upson 1 +hallcornel 1 +universityithaca 1 +york 1 +usaemail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..043329d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,126 @@ +todd 1 +peskin 1 +page 1 +pictur 1 +content 1 +work 1 +favorit 1 +site 1 +resum 1 +cours 1 +taken 1 +student 1 +year 1 +meng 1 +program 1 +cornel 1 +univers 1 +joint 1 +degre 1 +offer 1 +jointli 1 +colleg 1 +engin 1 +johnson 1 +graduat 1 +school 1 +manag 1 +receiv 1 +master 1 +comput 1 +scienc 1 +follow 1 +busi 1 +administr 1 +also 1 +current 1 +presid 1 +chapter 1 +acacia 1 +fratern 1 +brother 1 +would 1 +like 1 +becom 1 +part 1 +mail 1 +list 1 +pleas 1 +contact 1 +eduand 1 +soon 1 +suppli 1 +andyour 1 +roll 1 +number 1 +abl 1 +complet 1 +request 1 +quickli 1 +well 1 +best 1 +reach 1 +check 1 +round 1 +alwai 1 +sinc 1 +found 1 +mani 1 +time 1 +log 1 +depart 1 +fall 1 +semest 1 +junior 1 +workeda 1 +intern 1 +throughth 1 +cooper 1 +enabl 1 +cornellundergradu 1 +supplement 1 +classroom 1 +knowledg 1 +practic 1 +experi 1 +compani 1 +theirfield 1 +develop 1 +enhanc 1 +nation 1 +level 1 +client 1 +server 1 +databas 1 +system 1 +softwar 1 +isrun 1 +microsystem 1 +run 1 +window 1 +espn 1 +stock 1 +quot 1 +onlin 1 +java 1 +final 1 +project 1 +larg 1 +mpeg 1 +file 1 +creat 1 +visitor 1 +februari 1 +still 1 +construct 1 +hope 1 +possibl 1 +includ 1 +us 1 +applet 1 +case 1 +wonder 1 +ticker 1 +tape 1 +borrow 1 +permiss 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..89472992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,54 @@ +dave 1 +pierc 1 +home 1 +page 1 +david 1 +student 1 +cornel 1 +univers 1 +comput 1 +scienc 1 +address 1 +stuff 1 +offic 1 +upson 1 +hall 1 +valentin 1 +ithaca 1 +person 1 +second 1 +year 1 +recent 1 +pittsburgh 1 +pennsylvania 1 +although 1 +familycurr 1 +resid 1 +read 1 +citi 1 +approxim 1 +halfwai 1 +philadelphia 1 +andharrisburg 1 +famou 1 +shop 1 +outlet 1 +otherwis 1 +younev 1 +want 1 +howev 1 +great 1 +place 1 +mani 1 +alreadi 1 +know 1 +sinceit 1 +imposs 1 +without 1 +go 1 +throughpittsburgh 1 +month 1 +favorit 1 +quot 1 +list 1 +work 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..c532c5d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,123 @@ +indupraka 1 +kodukula 1 +home 1 +page 1 +engin 1 +theori 1 +center 1 +cornel 1 +univers 1 +ithaca 1 +praka 1 +student 1 +depart 1 +comput 1 +scienc 1 +prior 1 +tothat 1 +undergradu 1 +madra 1 +work 1 +bernoulli 1 +group 1 +prof 1 +keshav 1 +pingali 1 +member 1 +nawaaz 1 +ahm 1 +vladimir 1 +kotlyar 1 +vijai 1 +menon 1 +paul 1 +stodghil 1 +also 1 +affili 1 +advanc 1 +research 1 +institut 1 +interplai 1 +applic 1 +compil 1 +runtim 1 +system 1 +tradit 1 +andmultiprocessor 1 +architectur 1 +deriv 1 +fromscientif 1 +imag 1 +process 1 +multimedia 1 +withibm 1 +vliw 1 +hasinterest 1 +well 1 +public 1 +talk 1 +given 1 +seri 1 +dens 1 +technolog 1 +first 1 +athp 1 +chelmsford 1 +wasabout 1 +necess 1 +deal 1 +imperfectli 1 +nest 1 +looptransform 1 +abl 1 +handl 1 +trivial 1 +code 1 +present 1 +framework 1 +perform 1 +loop 1 +transform 1 +loopparallel 1 +seminar 1 +schloss 1 +dagstuhl 1 +april 1 +summer 1 +watson 1 +regard 1 +useof 1 +octob 1 +lab 1 +palo 1 +alto 1 +regardingdata 1 +centric 1 +multi 1 +level 1 +block 1 +teach 1 +taught 1 +program 1 +andoper 1 +project 1 +czar 1 +instal 1 +maintain 1 +support 1 +packag 1 +availableund 1 +gener 1 +licens 1 +departmentmachin 1 +check 1 +andfind 1 +handi 1 +tip 1 +alsofind 1 +extens 1 +info 1 +random 1 +link 1 +person 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..8ff00e3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,49 @@ +praveen 1 +seshadri 1 +home 1 +page 1 +assist 1 +professor 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +upson 1 +hall 1 +ithaca 1 +offic 1 +advanc 1 +databas 1 +system 1 +fall 1 +predat 1 +dbm 1 +project 1 +adt 1 +know 1 +case 1 +enhanc 1 +abstract 1 +data 1 +type 1 +sigmod 1 +submiss 1 +profession 1 +public 1 +time 1 +order 1 +manag 1 +sequenc 1 +postscript 1 +thesi 1 +tree 1 +save 1 +format 1 +person 1 +warren 1 +road 1 +ranjani 1 +ramamurthi 1 +green 1 +packer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..d0c2ecee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,353 @@ +home 1 +pageth 1 +project 1 +queri 1 +sequenc 1 +data 1 +document 1 +construct 1 +time 1 +order 1 +databas 1 +content 1 +objectivescurr 1 +statusmotiv 1 +exampleseq 1 +model 1 +sequin 1 +languageoptim 1 +techniquesseq 1 +system 1 +developmentpublicationsrel 1 +workcontact 1 +informationproject 1 +object 1 +number 1 +import 1 +applic 1 +requir 1 +processingof 1 +larg 1 +amount 1 +domain 1 +theseappl 1 +includ 1 +financi 1 +manag 1 +histor 1 +analysi 1 +econom 1 +social 1 +scienc 1 +metereolog 1 +medic 1 +andbiolog 1 +exist 1 +relat 1 +inadequ 1 +regard 1 +collect 1 +treat 1 +set 1 +consequ 1 +express 1 +tediou 1 +evalu 1 +ineffici 1 +us 1 +abstract 1 +allow 1 +declar 1 +manner 1 +util 1 +semanticstak 1 +advantag 1 +uniqu 1 +opportun 1 +avail 1 +optim 1 +evaluationintegr 1 +user 1 +canstor 1 +combin 1 +sequencesthes 1 +serv 1 +goal 1 +variou 1 +kind 1 +need 1 +support 1 +tempor 1 +themost 1 +notion 1 +like 1 +next 1 +previou 1 +natur 1 +consid 1 +effici 1 +issu 1 +studi 1 +theori 1 +built 1 +demonstr 1 +feasibl 1 +theoret 1 +idea 1 +statusth 1 +current 1 +statu 1 +defin 1 +also 1 +algebraicqueri 1 +oper 1 +compos 1 +form 1 +analogousto 1 +composit 1 +algebra 1 +describ 1 +process 1 +identifi 1 +techniqu 1 +languag 1 +candeclar 1 +embed 1 +likesql 1 +vice 1 +versa 1 +build 1 +disk 1 +base 1 +propos 1 +implement 1 +nest 1 +complex 1 +architectur 1 +shore 1 +storag 1 +sever 1 +megabyt 1 +integr 1 +extens 1 +motiv 1 +exampl 1 +querya 1 +weather 1 +monitor 1 +record 1 +inform 1 +meteorolog 1 +phenomena 1 +sequenti 1 +occurr 1 +event 1 +scientist 1 +ask 1 +volcano 1 +erupt 1 +didth 1 +recent 1 +earthquak 1 +strength 1 +greater 1 +richter 1 +scale 1 +featur 1 +groupbi 1 +claus 1 +correl 1 +subqueri 1 +aggregatefunct 1 +convent 1 +would 1 +find 1 +execut 1 +plan 1 +even 1 +given 1 +knowledg 1 +sort 1 +howev 1 +sequencesord 1 +scan 1 +lock 1 +step 1 +similar 1 +merg 1 +join 1 +store 1 +temporari 1 +buffer 1 +whenev 1 +valu 1 +check 1 +possibl 1 +gener 1 +answer 1 +therefor 1 +singl 1 +littl 1 +memori 1 +modelth 1 +detail 1 +aredescrib 1 +publish 1 +paper 1 +click 1 +postscript 1 +version 1 +present 1 +gist 1 +basic 1 +map 1 +ordereddomain 1 +posit 1 +mani 1 +relationship 1 +andposit 1 +view 1 +dual 1 +distinct 1 +wai 1 +recordsmap 1 +call 1 +orient 1 +respect 1 +give 1 +rise 1 +could 1 +either 1 +flavor 1 +relationaloper 1 +overlap 1 +contain 1 +andaggreg 1 +explor 1 +researchersin 1 +commun 1 +offset 1 +movingaggreg 1 +zoom 1 +mean 1 +collaps 1 +expand 1 +associ 1 +instanc 1 +daili 1 +weekli 1 +hourli 1 +last 1 +part 1 +deal 1 +group 1 +make 1 +easi 1 +involv 1 +case 1 +real 1 +worldsitu 1 +extend 1 +work 1 +instead 1 +extensionof 1 +indic 1 +practic 1 +ofseq 1 +probabl 1 +languagew 1 +devis 1 +usingwhich 1 +specifi 1 +languagei 1 +except 1 +input 1 +queriesa 1 +well 1 +result 1 +descript 1 +techniquesw 1 +thathav 1 +transform 1 +meta 1 +cach 1 +intermedi 1 +algorithm 1 +reli 1 +cost 1 +estim 1 +observ 1 +access 1 +stream 1 +strategi 1 +take 1 +account 1 +developmentth 1 +client 1 +serverarchitectur 1 +multipl 1 +viaa 1 +multi 1 +thread 1 +server 1 +ontop 1 +subset 1 +languageswhich 1 +insid 1 +mode 1 +arbitrarylevel 1 +viceversa 1 +provid 1 +supportfor 1 +type 1 +function 1 +detailson 1 +publicationssequ 1 +praveen 1 +seshadri 1 +miron 1 +livni 1 +raghu 1 +ramakrishnan 1 +proceed 1 +sigmod 1 +confer 1 +framework 1 +datapraveen 1 +ieee 1 +engin 1 +march 1 +design 1 +systempraveen 1 +submit 1 +vldb 1 +queriesraghu 1 +michael 1 +cheng 1 +intern 1 +comad 1 +decemb 1 +workthedevis 1 +complementari 1 +visualizationenviron 1 +front 1 +pose 1 +examin 1 +graphic 1 +peopl 1 +madison 1 +research 1 +depart 1 +servercontact 1 +informationfor 1 +contact 1 +wisc 1 +eduraghu 1 +edumiron 1 +educomput 1 +univers 1 +wisconsin 1 +dayton 1 +street 1 +modifi 1 +seshadripraveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..a2100011 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,50 @@ +ralph 1 +benzingerralph 1 +benzingerw 1 +sich 1 +seinen 1 +lorbeeren 1 +ausruht 1 +trgt 1 +derfalschen 1 +stell 1 +stori 1 +exchang 1 +student 1 +univers 1 +karlsruh 1 +germani 1 +german 1 +comput 1 +scienc 1 +august 1 +fellow 1 +studienstiftung 1 +deutschen 1 +volk 1 +fulbright 1 +member 1 +siemen 1 +international 1 +studentenkrei 1 +alumnusat 1 +cornel 1 +graduat 1 +depart 1 +cours 1 +taken 1 +advanc 1 +program 1 +languag 1 +design 1 +analysi 1 +algorithm 1 +reason 1 +knowledg 1 +contact 1 +inform 1 +mail 1 +offic 1 +upson 1 +hall 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..c0a6ad56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,291 @@ +mathemat 1 +comput 1 +gener 1 +spoken 1 +document 1 +titl 1 +audio 1 +aster 1 +demonstrationi 1 +dedic 1 +guid 1 +system 1 +technic 1 +read 1 +forrend 1 +develop 1 +myphd 1 +page 1 +format 1 +version 1 +thesi 1 +approxim 1 +hour 1 +produc 1 +made 1 +avail 1 +record 1 +blind 1 +first 1 +talk 1 +book 1 +abstract 1 +print 1 +hypertext 1 +demonstr 1 +render 1 +enhanc 1 +demo 1 +us 1 +inlin 1 +imag 1 +exampl 1 +three 1 +compon 1 +origin 1 +latex 1 +input 1 +output 1 +speech 1 +dectalk 1 +digit 1 +mulaw 1 +stereo 1 +tabl 1 +effect 1 +convei 1 +mono 1 +encod 1 +visual 1 +dvip 1 +progress 1 +difficult 1 +suggest 1 +initi 1 +section 1 +sequenti 1 +short 1 +typic 1 +show 1 +peopl 1 +andround 1 +continu 1 +fraction 1 +quick 1 +overview 1 +faad 1 +bruno 1 +formula 1 +postscript 1 +file 1 +contain 1 +casey 1 +want 1 +look 1 +place 1 +singl 1 +examplessinc 1 +would 1 +simpl 1 +express 1 +voic 1 +inflect 1 +paus 1 +toconvei 1 +group 1 +succinctli 1 +state 1 +vari 1 +along 1 +dimens 1 +space 1 +renderingsub 1 +superscript 1 +subscript 1 +attribut 1 +audiost 1 +orthogon 1 +independ 1 +dimensionus 1 +allow 1 +nest 1 +mutual 1 +concept 1 +knuth 1 +expon 1 +taken 1 +verbatim 1 +donald 1 +power 1 +layoutoper 1 +notic 1 +compris 1 +symbol 1 +verydiffer 1 +unambigu 1 +move 1 +defin 1 +percept 1 +monotonicchang 1 +notion 1 +monoton 1 +vital 1 +school 1 +algebra 1 +squar 1 +root 1 +choic 1 +follow 1 +trigonometr 1 +ident 1 +written 1 +notat 1 +ambigu 1 +hard 1 +recogn 1 +complet 1 +absenc 1 +parenthesi 1 +sever 1 +heurist 1 +construct 1 +correct 1 +tree 1 +structur 1 +forthes 1 +logarithm 1 +context 1 +specif 1 +speak 1 +base 1 +chosen 1 +reduc 1 +cognit 1 +load 1 +oppos 1 +seri 1 +rule 1 +interpret 1 +asexpon 1 +wire 1 +isfulli 1 +customiz 1 +user 1 +integr 1 +probabl 1 +innocu 1 +also 1 +mostdifficult 1 +imposs 1 +determin 1 +variabl 1 +ofintegr 1 +tripl 1 +applic 1 +theintegr 1 +oper 1 +brows 1 +listen 1 +piec 1 +shown 1 +trick 1 +experienc 1 +ofhuman 1 +reader 1 +error 1 +summat 1 +limit 1 +cross 1 +referenc 1 +equat 1 +meant 1 +illustr 1 +ofcross 1 +refer 1 +interact 1 +enabl 1 +give 1 +meaning 1 +name 1 +referenceableobject 1 +object 1 +latercross 1 +distanc 1 +good 1 +inton 1 +text 1 +thati 1 +intermix 1 +quantifi 1 +present 1 +interest 1 +challeng 1 +exponenti 1 +followingdeepli 1 +emac 1 +calcul 1 +full 1 +fledgedsymbol 1 +interfac 1 +directli 1 +justa 1 +well 1 +matrix 1 +dimension 1 +thematrix 1 +commenc 1 +left 1 +right 1 +aseach 1 +element 1 +ofcomput 1 +program 1 +heard 1 +took 1 +secondsto 1 +sinc 1 +util 1 +featur 1 +spacenot 1 +human 1 +still 1 +changeth 1 +size 1 +shape 1 +head 1 +take 1 +second 1 +hear 1 +soon 1 +even 1 +long 1 +forget 1 +begin 1 +thetim 1 +later 1 +substitut 1 +techniquefor 1 +level 1 +complex 1 +proper 1 +glori 1 +process 1 +like 1 +upon 1 +request 1 +replac 1 +identifi 1 +renderingsconvei 1 +thesub 1 +separ 1 +perform 1 +orpostscript 1 +equival 1 +case 1 +lower 1 +constraint 1 +numer 1 +denomin 1 +raman 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..553e4ecd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,63 @@ +ravi 1 +kumar 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +program 1 +check 1 +approxim 1 +polynomi 1 +function 1 +equat 1 +ieee 1 +foundat 1 +octob 1 +funda 1 +uumln 1 +ronitt 1 +rubinfeld 1 +effici 1 +self 1 +test 1 +correct 1 +linear 1 +recurr 1 +sivakumar 1 +without 1 +gener 1 +bottleneck 1 +softwar 1 +technolog 1 +theoret 1 +lnc 1 +decemb 1 +learn 1 +theori 1 +bound 1 +width 1 +branch 1 +confer 1 +juli 1 +combinator 1 +latin 1 +squar 1 +extens 1 +june 1 +alexand 1 +russel 1 +sundaram 1 +parallel 1 +process 1 +scalabl 1 +studi 1 +intern 1 +august 1 +jeyakumar 1 +muthukumarasami 1 +umakishor 1 +ramachandran 1 +gautam 1 +shah 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..cf279bfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,147 @@ +ramin 1 +zabih 1 +home 1 +pageramin 1 +zabihassist 1 +professorrdz 1 +cornel 1 +researchmi 1 +research 1 +interest 1 +comput 1 +vision 1 +multimedia 1 +current 1 +construct 1 +search 1 +engin 1 +imag 1 +us 1 +method 1 +develop 1 +recent 1 +think 1 +econom 1 +impact 1 +freeli 1 +avail 1 +price 1 +inform 1 +essai 1 +subject 1 +appear 1 +phil 1 +agr 1 +electron 1 +newslett 1 +network 1 +observ 1 +march 1 +studentsi 1 +work 1 +student 1 +jing 1 +huang 1 +vera 1 +kettnak 1 +olga 1 +veksler 1 +also 1 +spend 1 +fair 1 +amount 1 +time 1 +variou 1 +undergradu 1 +princip 1 +greg 1 +pass 1 +justin 1 +voskuhl 1 +includ 1 +scott 1 +cytacki 1 +miller 1 +szewczyk 1 +publicationsmost 1 +public 1 +postscript 1 +acrobat 1 +format 1 +free 1 +reader 1 +varieti 1 +differ 1 +architectur 1 +adob 1 +histogram 1 +refin 1 +content 1 +base 1 +retriev 1 +ieee 1 +workshop 1 +applic 1 +sarasota 1 +florida 1 +decemb 1 +compar 1 +color 1 +coher 1 +vector 1 +fourth 1 +confer 1 +boston 1 +massachusett 1 +novemb 1 +featur 1 +algorithm 1 +detect 1 +classifi 1 +scene 1 +break 1 +kevin 1 +third 1 +francisco 1 +california 1 +parametr 1 +local 1 +transform 1 +visual 1 +correspond 1 +john 1 +woodfil 1 +european 1 +stockholm 1 +sweden 1 +teachingi 1 +teach 1 +introduct 1 +program 1 +spring 1 +cours 1 +scribe 1 +note 1 +lectur 1 +page 1 +taught 1 +profession 1 +activitiesi 1 +comitte 1 +cvpr 1 +pattern 1 +recognit 1 +held 1 +juan 1 +june 1 +organ 1 +committe 1 +access 1 +video 1 +librari 1 +conjunct 1 +acknowledgementsthi 1 +design 1 +courtesi 1 +huttenlocherlast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..723aa152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,4 @@ +roderick 1 +moten 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..fb4f152f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,45 @@ +ronitt 1 +rubinfeld 1 +homepageronitt 1 +rubinfeldi 1 +assist 1 +professor 1 +cornel 1 +depart 1 +comput 1 +scienc 1 +recent 1 +paper 1 +talk 1 +cours 1 +random 1 +spring 1 +fall 1 +engin 1 +graduat 1 +student 1 +funda 1 +ergun 1 +ravi 1 +kumar 1 +fair 1 +homepag 1 +wasserman 1 +page 1 +describ 1 +work 1 +research 1 +area 1 +result 1 +check 1 +address 1 +rubinfeldcomput 1 +upson 1 +hallcornel 1 +universityithaca 1 +york 1 +telephon 1 +email 1 +edupictur 1 +nephew 1 +eitan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..fd25424f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,73 @@ +friedmanroi 1 +friedmanpost 1 +doctor 1 +associ 1 +cornel 1 +universityroi 1 +edui 1 +post 1 +depart 1 +comput 1 +scienceatcornel 1 +univers 1 +work 1 +withken 1 +birman 1 +androbbert 1 +rennessein 1 +area 1 +distribut 1 +system 1 +mainli 1 +thehoru 1 +project 1 +receiv 1 +thedepart 1 +scienc 1 +thetechnion 1 +israel 1 +institut 1 +technolog 1 +advisor 1 +washagit 1 +attiya 1 +thesi 1 +titl 1 +wasconsist 1 +condit 1 +share 1 +memori 1 +current 1 +also 1 +involv 1 +themilliped 1 +withassaf 1 +schuster 1 +recent 1 +papersr 1 +friedman 1 +trade 1 +consist 1 +avail 1 +technic 1 +report 1 +cornellunivers 1 +us 1 +group 1 +commun 1 +implement 1 +reliabl 1 +scalabledistribut 1 +coprocessor 1 +appear 1 +tina 1 +vaysburd 1 +replic 1 +state 1 +machin 1 +partition 1 +network 1 +full 1 +list 1 +public 1 +clickher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..e15a894c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,28 @@ +daniela 1 +home 1 +page 1 +research 1 +associ 1 +comput 1 +scienc 1 +cornel 1 +photograph 1 +address 1 +upson 1 +hall 1 +depart 1 +univers 1 +ithaca 1 +model 1 +simul 1 +recent 1 +paper 1 +version 1 +onlin 1 +tech 1 +report 1 +librari 1 +catalogc 1 +dept 1 +infodesign 1 +institut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..a353054e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,92 @@ +robbert 1 +renesserobbert 1 +renessesenior 1 +research 1 +associatecornel 1 +universityrvr 1 +cornel 1 +edui 1 +senior 1 +associ 1 +depart 1 +comput 1 +scienceatcornel 1 +universityinithaca 1 +work 1 +withken 1 +birmanin 1 +area 1 +distribut 1 +system 1 +advisor 1 +wasandi 1 +tanenbaum 1 +interestsmi 1 +brand 1 +babi 1 +girl 1 +hous 1 +horu 1 +tacoma 1 +project 1 +caml 1 +applet 1 +nynetth 1 +ithaca 1 +ageless 1 +jazz 1 +band 1 +swing 1 +danc 1 +network 1 +guitar 1 +accordion 1 +sharewar 1 +dutch 1 +stuffcornel 1 +club 1 +contain 1 +mani 1 +link 1 +netherland 1 +ithacaithacanet 1 +spinner 1 +market 1 +place 1 +paperssoftwar 1 +reliabl 1 +scientif 1 +american 1 +design 1 +perform 1 +lightweight 1 +group 1 +commun 1 +html 1 +version 1 +framework 1 +protocol 1 +composit 1 +incorpor 1 +resourc 1 +inform 1 +flow 1 +control 1 +strong 1 +weak 1 +virtual 1 +synchroni 1 +flexibl 1 +secur 1 +architectur 1 +fault 1 +toler 1 +support 1 +complex 1 +multi 1 +media 1 +applic 1 +us 1 +oper 1 +mobil 1 +agent 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..4cfce835 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,100 @@ +laura 1 +sabel 1 +cornel 1 +profession 1 +inform 1 +doctor 1 +research 1 +professorkeith 1 +marzullo 1 +univers 1 +california 1 +diego 1 +failur 1 +detect 1 +asynchron 1 +distribut 1 +system 1 +formor 1 +detector 1 +tushar 1 +chandra 1 +sfailur 1 +page 1 +final 1 +finish 1 +thesi 1 +approxim 1 +perfect 1 +asynchronousdistribut 1 +public 1 +elect 1 +consensu 1 +keith 1 +comput 1 +scienc 1 +technic 1 +report 1 +februari 1 +submit 1 +process 1 +letter 1 +postscript 1 +copi 1 +click 1 +version 1 +appear 1 +proceed 1 +annual 1 +symposium 1 +principl 1 +distributedcomput 1 +august 1 +reliabl 1 +octob 1 +march 1 +revis 1 +june 1 +us 1 +consist 1 +subcut 1 +stabl 1 +properti 1 +intern 1 +workshop 1 +algorithm 1 +wdag 1 +publish 1 +springer 1 +verlag 1 +lecturenot 1 +seri 1 +expand 1 +horizon 1 +cow 1 +interest 1 +strawberri 1 +tart 1 +blow 1 +torch 1 +experi 1 +alpacanet 1 +electron 1 +gourmet 1 +guid 1 +thebobbi 1 +award 1 +especi 1 +spam 1 +jelli 1 +belli 1 +bean 1 +free 1 +sampl 1 +answer 1 +survei 1 +bingo 1 +zone 1 +canplai 1 +cash 1 +prize 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..889ec8c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,168 @@ +faculti 1 +research 1 +interest 1 +toueg 1 +professorph 1 +princeton 1 +univers 1 +interestsmi 1 +includ 1 +distribut 1 +comput 1 +fault 1 +toleranceand 1 +real 1 +time 1 +work 1 +methodolog 1 +paradigm 1 +algorithm 1 +forfault 1 +toler 1 +system 1 +messag 1 +pass 1 +andshar 1 +memori 1 +long 1 +term 1 +goal 1 +bridg 1 +gapbetween 1 +theoret 1 +result 1 +need 1 +effici 1 +practicalsolut 1 +collabor 1 +withtushar 1 +chandraand 1 +prasad 1 +jayanti 1 +scienc 1 +student 1 +continu 1 +onunreli 1 +failuredetector 1 +wait 1 +free 1 +object 1 +share 1 +fundament 1 +computingst 1 +consensu 1 +problem 1 +cannot 1 +solv 1 +adeterminist 1 +asynchron 1 +impossibilityresult 1 +inher 1 +difficulti 1 +determin 1 +whether 1 +aprocess 1 +crash 1 +mere 1 +slow 1 +inour 1 +abl 1 +exactli 1 +much 1 +informationabout 1 +failur 1 +necessari 1 +suffici 1 +wefirst 1 +show 1 +unreli 1 +detector 1 +canmak 1 +infinit 1 +number 1 +mistak 1 +systemswith 1 +major 1 +correct 1 +process 1 +prove 1 +solveconsensu 1 +provid 1 +least 1 +muchinform 1 +thu 1 +weakest 1 +amajor 1 +explor 1 +practicalityof 1 +implement 1 +applic 1 +reli 1 +theircorrect 1 +concurr 1 +consist 1 +commun 1 +sharedobject 1 +accessesthi 1 +guarante 1 +respons 1 +even 1 +otherprocess 1 +hierarchi 1 +ofobject 1 +type 1 +assign 1 +level 1 +thatcorrespond 1 +abil 1 +particular 1 +shown 1 +well 1 +known 1 +herlihi 1 +robust 1 +inform 1 +anobject 1 +us 1 +atani 1 +question 1 +whetherrobust 1 +exist 1 +select 1 +public 1 +bracha 1 +broadcast 1 +protocol 1 +journal 1 +srikanth 1 +optim 1 +clock 1 +synchron 1 +abbadi 1 +maintain 1 +avail 1 +partit 1 +replic 1 +databas 1 +transact 1 +neiger 1 +automat 1 +increas 1 +chandra 1 +proceed 1 +symposium 1 +principl 1 +august 1 +montreal 1 +canada 1 +hadzilaco 1 +detectorfor 1 +vancouv 1 +ieee 1 +foundat 1 +octob 1 +pittsburgh 1 +pennsylvania 1 +simul 1 +common 1 +knowledg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..d41cf8a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,100 @@ +samuel 1 +weber 1 +act 1 +assist 1 +professor 1 +upson 1 +hallphon 1 +email 1 +cornel 1 +educurr 1 +univers 1 +director 1 +master 1 +engin 1 +program 1 +comput 1 +scienc 1 +research 1 +interest 1 +softwar 1 +design 1 +specif 1 +verif 1 +languag 1 +semant 1 +distribut 1 +system 1 +cours 1 +technolog 1 +techniqu 1 +fall 1 +introduct 1 +spring 1 +public 1 +bloom 1 +metatheori 1 +calculu 1 +technic 1 +report 1 +submit 1 +confer 1 +brown 1 +compil 1 +silicon 1 +formal 1 +verifi 1 +delai 1 +insensit 1 +circuit 1 +cornellunivers 1 +journal 1 +process 1 +algebra 1 +meta 1 +theori 1 +practic 1 +thesi 1 +august 1 +exercis 1 +appli 1 +structur 1 +oper 1 +workshop 1 +foundat 1 +applic 1 +bakker 1 +roever 1 +rozenberg 1 +editor 1 +lectur 1 +note 1 +springer 1 +verlag 1 +page 1 +scheme 1 +knight 1 +savag 1 +proceed 1 +advanc 1 +vlsi 1 +parallel 1 +amdur 1 +hadzilaco 1 +messag 1 +complex 1 +binari 1 +byzantin 1 +agreement 1 +crash 1 +failur 1 +bound 1 +toronto 1 +septemb 1 +seshadri 1 +wortman 1 +small 1 +analysi 1 +concurr 1 +sigplan 1 +implement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..452c050e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,81 @@ +sean 1 +landissean 1 +landi 1 +master 1 +engin 1 +comput 1 +sciencewelcom 1 +cornel 1 +home 1 +page 1 +resum 1 +inform 1 +current 1 +cours 1 +advanc 1 +databas 1 +systemsc 1 +project 1 +render 1 +system 1 +window 1 +past 1 +machin 1 +percept 1 +final 1 +analyz 1 +color 1 +book 1 +draw 1 +clickherefor 1 +postscript 1 +version 1 +sampl 1 +weanalyz 1 +topic 1 +graphic 1 +content 1 +base 1 +imag 1 +retriev 1 +interior 1 +design 1 +educ 1 +interest 1 +object 1 +orient 1 +program 1 +patternsprofession 1 +work 1 +isi 1 +distribut 1 +divis 1 +stratu 1 +lead 1 +orbix 1 +develop 1 +team 1 +product 1 +combin 1 +acorba 1 +compliant 1 +request 1 +broker 1 +iona 1 +technolog 1 +releas 1 +person 1 +basebal 1 +favorit 1 +alpin 1 +ski 1 +golf 1 +plai 1 +softbal 1 +card 1 +collectingi 1 +reach 1 +comeduc 1 +sheet 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..08eef873 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,62 @@ +seena 1 +cherangara 1 +cherangaramast 1 +engineeringclass 1 +dept 1 +comput 1 +sciencecornel 1 +univers 1 +welcom 1 +homepagecurr 1 +student 1 +scienc 1 +depart 1 +cornel 1 +ithaca 1 +tech 1 +degre 1 +engin 1 +colleg 1 +trivandrum 1 +kerala 1 +india 1 +inform 1 +cours 1 +taken 1 +cornelluniversityfal 1 +oper 1 +system 1 +practicum 1 +project 1 +specif 1 +hoca 1 +softwar 1 +multimedia 1 +post 1 +processingalgorithm 1 +jpeg 1 +artifact 1 +reduct 1 +spring 1 +graphic 1 +cspracticum 1 +anim 1 +magic 1 +carpet 1 +distribut 1 +colloqium 1 +summer 1 +model 1 +java 1 +parametr 1 +equat 1 +viewer 1 +click 1 +postscript 1 +version 1 +myresum 1 +mapl 1 +york 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..a2613700 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,238 @@ +rosen 1 +sharma 1 +offic 1 +upson 1 +hall 1 +email 1 +cornel 1 +came 1 +stanford 1 +spent 1 +coupl 1 +year 1 +student 1 +work 1 +research 1 +stream 1 +live 1 +media 1 +like 1 +audio 1 +video 1 +internet 1 +part 1 +modifiedigmp 1 +multicast 1 +protocol 1 +develop 1 +unicast 1 +layer 1 +virtual 1 +classroom 1 +system 1 +initi 1 +prototyp 1 +deploi 1 +spring 1 +fall 1 +quarter 1 +us 1 +altern 1 +instruct 1 +televis 1 +network 1 +sitn 1 +program 1 +microwav 1 +link 1 +remot 1 +site 1 +softwar 1 +asynchron 1 +access 1 +class 1 +lectur 1 +note 1 +also 1 +stumbl 1 +upon 1 +fact 1 +text 1 +slide 1 +portion 1 +enhanc 1 +greatli 1 +leadto 1 +format 1 +compani 1 +vxtreme 1 +palo 1 +alto 1 +silicon 1 +vallei 1 +start 1 +client 1 +sever 1 +applic 1 +multimedia 1 +deliveri 1 +paper 1 +signal 1 +oper 1 +support 1 +nativ 1 +mode 1 +keshav 1 +sigcomm 1 +segment 1 +mix 1 +imag 1 +navin 1 +chaddha 1 +avneesh 1 +agarw 1 +anoop 1 +gupta 1 +asilomar 1 +igmp 1 +group 1 +membership 1 +design 1 +steve 1 +deer 1 +releas 1 +internetdraft 1 +written 1 +bill 1 +fenner 1 +optic 1 +charact 1 +recognit 1 +statist 1 +structur 1 +method 1 +niten 1 +malhan 1 +bachelor 1 +thesi 1 +dept 1 +comput 1 +scienc 1 +indian 1 +institut 1 +technolog 1 +delhiunpublish 1 +stuff 1 +character 1 +variabl 1 +rate 1 +sourc 1 +term 1 +preform 1 +conferenc 1 +intern 1 +report 1 +cool 1 +manipul 1 +languag 1 +first 1 +data 1 +type 1 +allow 1 +sequenc 1 +blur 1 +speckl 1 +transform 1 +affin 1 +subband 1 +motion 1 +estmat 1 +make 1 +fast 1 +effici 1 +implement 1 +writen 1 +current 1 +test 1 +machin 1 +displai 1 +give 1 +flavour 1 +look 1 +imagefram 1 +width 1 +height 1 +ifram 1 +nodisplai 1 +file 1 +filenam 1 +putimageincanva 1 +predecessor 1 +hate 1 +motif 1 +interpret 1 +yacc 1 +limit 1 +shell 1 +script 1 +gener 1 +widget 1 +given 1 +name 1 +compil 1 +dummi 1 +event 1 +snooper 1 +record 1 +player 1 +consid 1 +coolest 1 +thing 1 +ever 1 +someth 1 +similar 1 +microsoft 1 +window 1 +doesnt 1 +replai 1 +lot 1 +kludg 1 +fool 1 +server 1 +postscript 1 +fractal 1 +creat 1 +call 1 +directori 1 +hole 1 +viewer 1 +fix 1 +dissalow 1 +write 1 +semant 1 +question 1 +often 1 +ask 1 +peopl 1 +gaveth 1 +chines 1 +friend 1 +wonder 1 +claim 1 +mean 1 +small 1 +hors 1 +smart 1 +hindi 1 +tongu 1 +sharm 1 +shyness 1 +sharmila 1 +actress 1 +tagor 1 +nicknam 1 +frozen 1 +stupid 1 +ealri 1 +jewish 1 +leader 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..d974ca78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,114 @@ +eric 1 +shim 1 +home 1 +pagewelcom 1 +page 1 +young 1 +sang 1 +shimmast 1 +engin 1 +comput 1 +sciencecornel 1 +univers 1 +view 1 +cornel 1 +address 1 +dryden 1 +citi 1 +ithaca 1 +phone 1 +korean 1 +version 1 +click 1 +onthi 1 +receiv 1 +scienc 1 +degre 1 +california 1 +irvinestudi 1 +inform 1 +meng 1 +project 1 +system 1 +camera 1 +transform 1 +abstract 1 +final 1 +graphic 1 +classi 1 +love 1 +plai 1 +follow 1 +music 1 +instrument 1 +acoust 1 +guitar 1 +piano 1 +keyboard 1 +listen 1 +stan 1 +getz 1 +antonio 1 +carlo 1 +jobim 1 +john 1 +coltran 1 +mile 1 +davi 1 +earl 1 +klugh 1 +metheni 1 +archemi 1 +chopin 1 +watch 1 +movi 1 +cinema 1 +paradiso 1 +french 1 +kiss 1 +miser 1 +miss 1 +saigon 1 +favorit 1 +korea 1 +newswant 1 +know 1 +graduat 1 +student 1 +associ 1 +anybodi 1 +like 1 +jazz 1 +check 1 +interest 1 +java 1 +cyberspac 1 +friend 1 +hana 1 +work 1 +melco 1 +last 1 +time 1 +went 1 +world 1 +jung 1 +hwan 1 +middl 1 +school 1 +back 1 +victor 1 +kwan 1 +hong 1 +jiyang 1 +kang 1 +homepag 1 +access 1 +timessinc 1 +still 1 +construct 1 +resum 1 +avail 1 +near 1 +futur 1 +also 1 +soon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..8ee4a5e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,169 @@ +amit 1 +singhal 1 +home 1 +pageamit 1 +singhaldepart 1 +comput 1 +scienc 1 +cornel 1 +universitysingh 1 +eduphon 1 +research 1 +interest 1 +area 1 +inform 1 +retriev 1 +andtext 1 +process 1 +thesi 1 +advisor 1 +late 1 +prof 1 +gerardsalton 1 +current 1 +supervisor 1 +chri 1 +bucklei 1 +clairecardieher 1 +postscript 1 +copi 1 +resum 1 +smart 1 +group 1 +depart 1 +beenon 1 +foremost 1 +field 1 +informationretriev 1 +last 1 +thirti 1 +year 1 +involv 1 +document 1 +length 1 +normal 1 +fairli 1 +text 1 +vari 1 +size 1 +lengthnorm 1 +commonli 1 +us 1 +term 1 +weight 1 +show 1 +thateffect 1 +system 1 +chancessimilar 1 +likelihood 1 +relev 1 +propos 1 +pivot 1 +techniqu 1 +modifi 1 +exist 1 +normalizationfunct 1 +yield 1 +substanti 1 +improv 1 +retrievaleffect 1 +also 1 +effect 1 +normalizationtechniqu 1 +paper 1 +trecparticipationtext 1 +confer 1 +trec 1 +nist 1 +arpa 1 +sponsoredeffort 1 +object 1 +evalu 1 +variou 1 +retrievaltechniqu 1 +independ 1 +testb 1 +hasconsist 1 +best 1 +somepap 1 +automat 1 +structur 1 +summarizationnon 1 +expositori 1 +usual 1 +read 1 +cover 1 +tocov 1 +reader 1 +help 1 +circumst 1 +provid 1 +selectiveaccess 1 +excerpt 1 +need 1 +develop 1 +toanalyz 1 +tool 1 +select 1 +texttravers 1 +papersnorm 1 +mandarmitra 1 +gerard 1 +salton 1 +documentlength 1 +mandar 1 +mitraand 1 +degrad 1 +collect 1 +come 1 +soon 1 +approach 1 +usingsmart 1 +queri 1 +expans 1 +jame 1 +allan 1 +proceedingsof 1 +third 1 +special 1 +public 1 +decomposit 1 +segment 1 +textthem 1 +mitra 1 +hypertext 1 +appear 1 +andmanag 1 +brows 1 +vectorspac 1 +model 1 +proceed 1 +ofth 1 +dual 1 +technolog 1 +applic 1 +travers 1 +theme 1 +gener 1 +analysi 1 +summar 1 +machineread 1 +amitsingh 1 +june 1 +groupmemb 1 +senior 1 +associ 1 +student 1 +david 1 +master 1 +engin 1 +other 1 +slowli 1 +fluctuat 1 +thank 1 +visit 1 +page 1 +visitor 1 +sinc 1 +iinstal 1 +counter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..a37204f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,153 @@ +keshav 1 +keshavemail 1 +skeshav 1 +cornel 1 +work 1 +upson 1 +hall 1 +depart 1 +comput 1 +scienc 1 +univers 1 +ithaca 1 +home 1 +christoph 1 +lane 1 +edui 1 +current 1 +associ 1 +professor 1 +spentfiv 1 +year 1 +sinc 1 +build 1 +network 1 +xunet 1 +wide 1 +area 1 +built 1 +scratch 1 +router 1 +switch 1 +softwar 1 +oper 1 +idlinet 1 +incollabor 1 +delhi 1 +base 1 +name 1 +equip 1 +fore 1 +systemsand 1 +zeitnet 1 +idlinetsourc 1 +code 1 +public 1 +domain 1 +avail 1 +nativ 1 +mode 1 +page 1 +featur 1 +protocol 1 +stack 1 +applicationget 1 +write 1 +directli 1 +virtual 1 +circuit 1 +also 1 +support 1 +independ 1 +signal 1 +span 1 +compliant 1 +final 1 +goof 1 +talk 1 +head 1 +facial 1 +anim 1 +snoop 1 +send 1 +video 1 +format 1 +mbone 1 +canb 1 +driven 1 +remot 1 +site 1 +internet 1 +linkspapersher 1 +linkto 1 +paper 1 +postscript 1 +reali 1 +real 1 +packet 1 +level 1 +simul 1 +still 1 +maintain 1 +instal 1 +idea 1 +mani 1 +peopl 1 +actual 1 +link 1 +version 1 +latest 1 +releas 1 +fall 1 +includ 1 +beout 1 +goe 1 +well 1 +native_mod 1 +pagemi 1 +namein 1 +part 1 +world 1 +come 1 +south 1 +india 1 +thanjavur 1 +district 1 +beprecis 1 +prefix 1 +father 1 +sonli 1 +sometim 1 +villag 1 +surnam 1 +thu 1 +myfath 1 +srinivasan 1 +unfortun 1 +round 1 +intoth 1 +squar 1 +hole 1 +custom 1 +first 1 +last 1 +beconfus 1 +quotabl 1 +quot 1 +ought 1 +everi 1 +least 1 +hear 1 +littl 1 +song 1 +read 1 +good 1 +poem 1 +possibl 1 +speak 1 +reason 1 +word 1 +johann 1 +wolfgang 1 +goeth 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..68f0097f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,23 @@ +kenneth 1 +home 1 +page 1 +road 1 +success 1 +alwai 1 +construct 1 +meng 1 +electr 1 +engin 1 +depart 1 +sinc 1 +work 1 +prof 1 +zabih 1 +place 1 +student 1 +came 1 +univers 1 +wisconsin 1 +madison 1 +cornel 1 +sunlab 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..7df3774e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,329 @@ +come 1 +clear 1 +step 1 +forest 1 +around 1 +clearinglook 1 +realiz 1 +must 1 +walk 1 +much 1 +thanyou 1 +plan 1 +wide 1 +varieti 1 +tree 1 +surround 1 +onal 1 +side 1 +especi 1 +theweath 1 +seem 1 +fairli 1 +overcast 1 +somehow 1 +figur 1 +ifit 1 +go 1 +rain 1 +snow 1 +perhap 1 +distanc 1 +larg 1 +mountain 1 +quiteclear 1 +question 1 +hear 1 +bird 1 +chirp 1 +quit 1 +near 1 +cours 1 +theymai 1 +respond 1 +sound 1 +insid 1 +louder 1 +nearbywaterfal 1 +gotta 1 +could 1 +follow 1 +want 1 +apath 1 +leav 1 +direct 1 +path 1 +label 1 +asign 1 +sai 1 +hillschool 1 +sign 1 +hell 1 +school 1 +also 1 +small 1 +wormhol 1 +connect 1 +nearbyhous 1 +presum 1 +traffic 1 +place 1 +clearinginsid 1 +coupl 1 +structur 1 +shack 1 +door 1 +fall 1 +offand 1 +complet 1 +modern 1 +glass 1 +hous 1 +withno 1 +stone 1 +front 1 +pile 1 +book 1 +score 1 +magazin 1 +random 1 +paper 1 +scatter 1 +throughout 1 +theclear 1 +well 1 +like 1 +vagu 1 +rhyme 1 +reasonto 1 +glanc 1 +topic 1 +sortsof 1 +look 1 +recent 1 +read 1 +probabl 1 +betteridea 1 +make 1 +person 1 +tick 1 +resum 1 +itseem 1 +somewhat 1 +corner 1 +importantth 1 +worri 1 +kind 1 +strang 1 +clearingh 1 +smile 1 +hello 1 +oftendescrib 1 +thin 1 +mother 1 +mostdistinct 1 +featur 1 +hair 1 +bright 1 +golden 1 +quitelong 1 +elfin 1 +seen 1 +peoplebefor 1 +warn 1 +link 1 +next 1 +paragraph 1 +written 1 +theresoon 1 +obviou 1 +alwai 1 +thing 1 +startstel 1 +stori 1 +stop 1 +hum 1 +tune 1 +start 1 +tell 1 +whynichola 1 +negropont 1 +moron 1 +ifyou 1 +never 1 +heard 1 +talk 1 +polit 1 +late 1 +twentieth 1 +centuri 1 +america 1 +thenh 1 +paus 1 +obscur 1 +theorem 1 +theoret 1 +comput 1 +scienc 1 +rather 1 +listen 1 +hetend 1 +appearanceinstead 1 +wear 1 +mostli 1 +color 1 +purpl 1 +dark 1 +turquois 1 +everyth 1 +either 1 +silk 1 +linen 1 +contrast 1 +nice 1 +heha 1 +gold 1 +ring 1 +imageof 1 +beaver 1 +right 1 +finger 1 +point 1 +awai 1 +fromhim 1 +left 1 +silver 1 +ocean 1 +wave 1 +pattern 1 +pewter 1 +pentacl 1 +neck 1 +andlook 1 +altogeth 1 +hippi 1 +someon 1 +asclass 1 +intellectu 1 +peopl 1 +visit 1 +clearingdan 1 +occasion 1 +spent 1 +time 1 +briani 1 +friend 1 +us 1 +live 1 +anundergrad 1 +nowadai 1 +work 1 +pointcast 1 +internet 1 +newsprovid 1 +ancamosoiu 1 +pronounc 1 +schwa 1 +best 1 +backwhen 1 +mani 1 +week 1 +twoand 1 +half 1 +year 1 +went 1 +europ 1 +togeth 1 +summer 1 +wegradu 1 +inner 1 +child 1 +shejust 1 +onewav 1 +blame 1 +dread 1 +name 1 +actual 1 +usedto 1 +busi 1 +object 1 +power 1 +daniel 1 +issomeon 1 +gotten 1 +know 1 +better 1 +last 1 +severalmonth 1 +think 1 +sometim 1 +novemb 1 +becamemuch 1 +switch 1 +eedepart 1 +wise 1 +multimediastud 1 +dept 1 +commun 1 +reason 1 +inth 1 +own 1 +friendof 1 +perri 1 +finlei 1 +page 1 +notanymor 1 +instead 1 +brian 1 +pictur 1 +steelcas 1 +isth 1 +largest 1 +manufactur 1 +offic 1 +furnitur 1 +world 1 +dserver 1 +kinda 1 +cheesi 1 +haveth 1 +pyramid 1 +still 1 +neat 1 +thebuild 1 +ius 1 +sing 1 +cornel 1 +japan 1 +programcal 1 +hire 1 +teach 1 +english 1 +high 1 +student 1 +ideal 1 +winter 1 +hani 1 +graduatedfrom 1 +june 1 +couldn 1 +happen 1 +neededto 1 +great 1 +even 1 +nick 1 +agood 1 +cuter 1 +thanth 1 +blurri 1 +would 1 +indic 1 +music 1 +maker 1 +dreamer 1 +ofdream 1 +aphex 1 +twindan 1 +brown 1 +snowman 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..690c5502 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,25 @@ +autobiographi 1 +upkeep 1 +lookin 1 +lot 1 +inform 1 +ultra 1 +cool 1 +home 1 +page 1 +soon 1 +keep 1 +take 1 +long 1 +setuup 1 +doingajaymanishanujmom 1 +daddepart 1 +comput 1 +sciencesearch 1 +netentertain 1 +weeklycricket 1 +rate 1 +ashish 1 +soni 1 +sonia 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..c63be7dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,11 @@ +scott 1 +dawson 1 +padif 1 +us 1 +form 1 +capabl 1 +browser 1 +would 1 +better 1 +scottdawson 1 +shomebas 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..87338e03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,25 @@ +paul 1 +stodghil 1 +home 1 +pagepaul 1 +stodghillstodghil 1 +cornel 1 +rhode 1 +hall 1 +affili 1 +depart 1 +comput 1 +scienc 1 +atcornel 1 +univers 1 +advanc 1 +research 1 +institut 1 +acri 1 +theori 1 +center 1 +bernoulli 1 +projectinterest 1 +ultim 1 +hockei 1 +scheme 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..8d73ad35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,13 @@ +scott 1 +stoller 1 +former 1 +home 1 +pagescott 1 +page 1 +move 1 +http 1 +indiana 1 +hyplan 1 +htmllast 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..f2341128 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,49 @@ +sugata 1 +mukhopadhyai 1 +home 1 +page 1 +welcom 1 +graduat 1 +student 1 +depart 1 +ofcomput 1 +scienc 1 +cornel 1 +univers 1 +work 1 +multimedia 1 +system 1 +prof 1 +brian 1 +smith 1 +marri 1 +wonder 1 +person 1 +earth 1 +ritu 1 +spring 1 +take 1 +high 1 +perform 1 +comput 1 +compil 1 +design 1 +architectur 1 +also 1 +advanc 1 +option 1 +price 1 +theori 1 +czar 1 +progress 1 +seminar 1 +previou 1 +semest 1 +contact 1 +mehom 1 +phone 1 +mailsugata 1 +eduaddress 1 +hichori 1 +estat 1 +owego 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..2d6a3d6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,28 @@ +home 1 +page 1 +paul 1 +sukhpal 1 +sanghera 1 +physic 1 +carleton 1 +univers 1 +present 1 +student 1 +comput 1 +scienc 1 +cornel 1 +ithaca 1 +background 1 +project 1 +philosophi 1 +life 1 +resum 1 +rout 1 +clock 1 +tick 1 +need 1 +java 1 +capabl 1 +browser 1 +view 1 +anim 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..873a7e51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,132 @@ +bilth 1 +gater 1 +galact 1 +empir 1 +empirewritten 1 +illustr 1 +sumedh 1 +kanetkaremail 1 +kanetkar 1 +cornel 1 +eduthi 1 +seri 1 +weekli 1 +comic 1 +strip 1 +drew 1 +intern 1 +atmicrosoft 1 +summer 1 +post 1 +theintern 1 +social 1 +alia 1 +read 1 +regularli 1 +peopl 1 +thesumm 1 +progress 1 +notic 1 +artwork 1 +begun 1 +leak 1 +theful 1 +time 1 +employe 1 +well 1 +whether 1 +high 1 +lord 1 +never 1 +found 1 +episod 1 +first 1 +stripi 1 +within 1 +week 1 +arriv 1 +redmond 1 +tri 1 +persuad 1 +themicrosoft 1 +newslett 1 +print 1 +perceiv 1 +problemand 1 +declin 1 +didn 1 +want 1 +microsoft 1 +portrai 1 +evilempir 1 +understand 1 +viewpoint 1 +told 1 +comicstrip 1 +attempt 1 +show 1 +compani 1 +view 1 +mani 1 +theoutsid 1 +world 1 +person 1 +bitter 1 +feel 1 +toward 1 +eitherbil 1 +gate 1 +corpor 1 +heck 1 +realli 1 +enjoi 1 +summersof 1 +work 1 +strongli 1 +recommend 1 +internship 1 +program 1 +anyoneinterest 1 +industri 1 +make 1 +orient 1 +thateveri 1 +suffer 1 +long 1 +session 1 +theyshow 1 +video 1 +fill 1 +kind 1 +trivia 1 +also 1 +makey 1 +sign 1 +disclosur 1 +agreeement 1 +would 1 +fit 1 +theymad 1 +everyon 1 +stand 1 +place 1 +hand 1 +theirheart 1 +pledg 1 +alleig 1 +comput 1 +everydesk 1 +everi 1 +home 1 +run 1 +softwar 1 +anywai 1 +space 1 +roosterepisod 1 +rebel 1 +threatepisod 1 +flame 1 +parti 1 +part 1 +imperi 1 +insigniaepisod 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..413d6c1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,167 @@ +kristen 1 +summer 1 +student 1 +cornel 1 +univers 1 +upson 1 +hall 1 +research 1 +interest 1 +work 1 +inform 1 +captur 1 +accessresearch 1 +group 1 +document 1 +analysi 1 +mylong 1 +term 1 +goal 1 +provid 1 +support 1 +forsophist 1 +electron 1 +manipulationtool 1 +index 1 +brows 1 +link 1 +primari 1 +discov 1 +logicalstructur 1 +arbitrari 1 +take 1 +documentrepresent 1 +input 1 +return 1 +hierarchyof 1 +logic 1 +piec 1 +output 1 +exampl 1 +given 1 +scan 1 +postscriptvers 1 +technic 1 +report 1 +would 1 +like 1 +tobe 1 +abl 1 +divid 1 +section 1 +paragraph 1 +similarli 1 +busi 1 +letter 1 +address 1 +head 1 +bodi 1 +close 1 +identifi 1 +problem 1 +compon 1 +segment 1 +andclassif 1 +categor 1 +also 1 +rais 1 +question 1 +evalu 1 +previou 1 +differ 1 +descript 1 +correct 1 +hierarchi 1 +type 1 +structur 1 +theoret 1 +limit 1 +task 1 +relev 1 +bruce 1 +croft 1 +stop 1 +issu 1 +retriev 1 +novemb 1 +magazin 1 +number 1 +interfac 1 +effici 1 +flexibl 1 +determininglog 1 +enabl 1 +hierarch 1 +soin 1 +gener 1 +system 1 +handl 1 +ofmultipl 1 +paper 1 +us 1 +textual 1 +cue 1 +browsingco 1 +author 1 +daniela 1 +digit 1 +librari 1 +current 1 +nabil 1 +adam 1 +bharat 1 +bhargava 1 +yelena 1 +yesha 1 +editor 1 +chapter 1 +lectur 1 +note 1 +comput 1 +scienc 1 +seri 1 +springer 1 +verlag 1 +version 1 +geometr 1 +algorithm 1 +experi 1 +autom 1 +mathemat 1 +model 1 +forthcom 1 +white 1 +space 1 +proceed 1 +workshop 1 +principl 1 +process 1 +seeheim 1 +podp 1 +toward 1 +taxonomi 1 +structureselectron 1 +publish 1 +superhighwai 1 +dartmouth 1 +institut 1 +advanc 1 +graduat 1 +studi 1 +boston 1 +donald 1 +johnson 1 +memori 1 +dag 1 +scholaraward 1 +best 1 +recipi 1 +near 1 +wordless 1 +classif 1 +intern 1 +confer 1 +analysisand 1 +recognit 1 +montral 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..30f8b01b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,55 @@ +masafumi 1 +suzukither 1 +would 1 +applet 1 +browser 1 +suppot 1 +java 1 +suzukisuzuki 1 +cornel 1 +educlassesfal 1 +case 1 +studi 1 +oper 1 +research 1 +optim 1 +engin 1 +probabl 1 +statist 1 +design 1 +analysi 1 +simul 1 +system 1 +project 1 +spring 1 +introduct 1 +stochast 1 +model 1 +comput 1 +graphic 1 +summer 1 +data 1 +structur 1 +fall 1 +softwar 1 +technolog 1 +techniqu 1 +program 1 +multimedia 1 +report 1 +network 1 +telecommun 1 +polici 1 +databas 1 +thrive 1 +inform 1 +revolut 1 +sector 1 +site 1 +manag 1 +independ 1 +polygon 1 +displai 1 +us 1 +prototyp 1 +resum 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..b28a3cb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,28 @@ +jonathan 1 +swartz 1 +home 1 +page 1 +cornel 1 +edui 1 +student 1 +departmentof 1 +comput 1 +scienc 1 +univers 1 +spend 1 +time 1 +heredevelopingrivl 1 +languag 1 +multimedia 1 +process 1 +myaddress 1 +phone 1 +number 1 +littl 1 +humor 1 +brighten 1 +dayjon 1 +movi 1 +connectioncool 1 +siteslast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..b375fd46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,23 @@ +sunil 1 +srivastava 1 +home 1 +pagewelcom 1 +page 1 +srivastavamast 1 +engin 1 +studentcomput 1 +scienc 1 +departmentcornel 1 +univers 1 +academ 1 +class 1 +project 1 +person 1 +inform 1 +us 1 +linkscom 1 +question 1 +send 1 +mail 1 +sxsriva 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..3ca5c7db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,218 @@ +huangszu 1 +huang 1 +defend 1 +truth 1 +champion 1 +justic 1 +around 1 +nice 1 +steven 1 +year 1 +nevermind 1 +long 1 +exactli 1 +iarriv 1 +second 1 +proud 1 +parent 1 +soundslik 1 +mobi 1 +dick 1 +assur 1 +nointent 1 +find 1 +ship 1 +hunt 1 +whale 1 +digress 1 +brought 1 +taiwan 1 +tender 1 +seven 1 +wholefamili 1 +migrat 1 +south 1 +tropic 1 +island 1 +philippin 1 +made 1 +home 1 +live 1 +fifteen 1 +pictur 1 +aroundsix 1 +thu 1 +becam 1 +quit 1 +fluentli 1 +bilingu 1 +love 1 +read 1 +thepoetri 1 +tang 1 +dynasti 1 +arabian 1 +night 1 +natur 1 +children 1 +version 1 +host 1 +stori 1 +somewhat 1 +fulfil 1 +name 1 +mean 1 +literatur 1 +class 1 +grade 1 +cours 1 +life 1 +doveright 1 +marvel 1 +comput 1 +four 1 +later 1 +ienter 1 +univers 1 +philippineswith 1 +major 1 +like 1 +career 1 +found 1 +talent 1 +draw 1 +scienc 1 +ofcomput 1 +unabashedli 1 +knew 1 +good 1 +alsoin 1 +colleg 1 +whirlwind 1 +happi 1 +peac 1 +three 1 +run 1 +awoman 1 +becom 1 +import 1 +part 1 +effortlessli 1 +defeat 1 +hobbi 1 +eek 1 +ideal 1 +higher 1 +pai 1 +blunt 1 +ienrol 1 +cornel 1 +graduat 1 +program 1 +reward 1 +almost 1 +everyth 1 +ever 1 +want 1 +andwork 1 +lucki 1 +septemb 1 +welcom 1 +page 1 +segreg 1 +everydaygeek 1 +think 1 +mani 1 +interest 1 +asid 1 +fromactu 1 +write 1 +happili 1 +myspar 1 +time 1 +anyth 1 +calvin 1 +hobb 1 +unix 1 +network 1 +internet 1 +relai 1 +chat 1 +gener 1 +linuxnet 1 +build 1 +lego 1 +thing 1 +practic 1 +object 1 +orient 1 +design 1 +plastic 1 +model 1 +weapon 1 +watch 1 +suspens 1 +thriller 1 +film 1 +listen 1 +music 1 +sting 1 +other 1 +sesam 1 +street 1 +discoveri 1 +channel 1 +railroad 1 +rich 1 +enough 1 +field 1 +also 1 +rather 1 +broad 1 +spectrum 1 +ofinterest 1 +though 1 +studi 1 +concentr 1 +area 1 +graphic 1 +wish 1 +offend 1 +bysom 1 +blatant 1 +self 1 +promot 1 +check 1 +myresum 1 +highlyinterest 1 +linux 1 +freeli 1 +avail 1 +oper 1 +system 1 +intel 1 +compatiblecomput 1 +master 1 +engin 1 +project 1 +blobbi 1 +metaballsund 1 +supervis 1 +bruce 1 +land 1 +current 1 +involv 1 +anoth 1 +survei 1 +techniquesin 1 +human 1 +face 1 +resolut 1 +independ 1 +andport 1 +audio 1 +effect 1 +editor 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..b00230b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,6 @@ +henzing 1 +hytechhytech 1 +hybrid 1 +technolog 1 +toolw 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..a6f48337 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,55 @@ +henzingerthoma 1 +henzing 1 +movedassist 1 +professorcomput 1 +scienc 1 +departmentcornel 1 +universityithaca 1 +email 1 +cornel 1 +eduphon 1 +researchform 1 +support 1 +develop 1 +analysi 1 +concurr 1 +real 1 +time 1 +embed 1 +system 1 +relat 1 +researchat 1 +cornelland 1 +worldwid 1 +resumepublicationsreact 1 +modul 1 +formal 1 +methodolog 1 +systemsr 1 +logic 1 +transit 1 +systemsclock 1 +automata 1 +systemshybrid 1 +systemsbibliographi 1 +bibtex 1 +list 1 +publicationstoolshytech 1 +symbol 1 +model 1 +checker 1 +linear 1 +hybrid 1 +systemscoursesc 1 +fall 1 +advanc 1 +program 1 +languagesconferenceshybrid 1 +verif 1 +control 1 +systemscav 1 +comput 1 +aid 1 +verificationlast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..096f8569 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,38 @@ +takako 1 +hickei 1 +homepag 1 +email 1 +cornel 1 +eduoffic 1 +upson 1 +hallphon 1 +student 1 +depart 1 +comput 1 +scienc 1 +atcornel 1 +univers 1 +advis 1 +byrobbert 1 +reness 1 +andfr 1 +schneider 1 +research 1 +interest 1 +distribut 1 +system 1 +program 1 +environ 1 +resourc 1 +manag 1 +horu 1 +project 1 +previou 1 +life 1 +social 1 +psycholog 1 +backcountri 1 +hockei 1 +quot 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..850f8dc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,23 @@ +tim_teitelbaum 1 +teitelbaumassoci 1 +professor 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +eduresearch 1 +interest 1 +increment 1 +transform 1 +program 1 +environ 1 +languag 1 +base 1 +editor 1 +compil 1 +attribut 1 +grammar 1 +adavita 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..80172912 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,207 @@ +thorsten 1 +eicken 1 +eickenassist 1 +professor 1 +upson 1 +hallphon 1 +email 1 +cornel 1 +eduprojectsth 1 +architectureprovid 1 +user 1 +level 1 +network 1 +interfacefor 1 +cluster 1 +workstat 1 +offer 1 +latencyand 1 +high 1 +bandwidth 1 +commun 1 +speed 1 +lan 1 +currentimplement 1 +us 1 +interconnect 1 +activ 1 +messag 1 +sever 1 +project 1 +port 1 +platform 1 +includingth 1 +extend 1 +model 1 +tonon 1 +spmd 1 +program 1 +split 1 +simpl 1 +extensionto 1 +parallel 1 +comput 1 +newplatform 1 +includ 1 +share 1 +memori 1 +multprocessor 1 +run 1 +coursesc 1 +introduct 1 +digit 1 +system 1 +computerorgan 1 +fall 1 +perform 1 +architectur 1 +spring 1 +frontier 1 +guest 1 +lectur 1 +maynd 1 +department 1 +talk 1 +report 1 +departement 1 +annual 1 +entri 1 +forum 1 +slide 1 +person 1 +pagestv 1 +pond 1 +real 1 +water 1 +fish 1 +plant 1 +tire 1 +firewal 1 +macpppwhich 1 +gener 1 +time 1 +password 1 +automat 1 +without 1 +everhav 1 +think 1 +well 1 +month 1 +passwordssuddenli 1 +installationinstruct 1 +select 1 +publicationsu 1 +interfac 1 +distributedcomput 1 +anindya 1 +basu 1 +vineet 1 +buch 1 +werner 1 +vogel 1 +appear 1 +june 1 +latenc 1 +atmnetwork 1 +avula 1 +present 1 +palo 1 +alto 1 +abridg 1 +version 1 +paper 1 +ieee 1 +micro 1 +magazin 1 +mechan 1 +integr 1 +andcomput 1 +culler 1 +goldstein 1 +schauser 1 +proceed 1 +symp 1 +gold 1 +coast 1 +australia 1 +effici 1 +communicationarchitectur 1 +multiprocessor 1 +thesi 1 +novemb 1 +univers 1 +california 1 +berkelei 1 +publish 1 +link 1 +lead 1 +postscript 1 +dusseau 1 +krishnamurthi 1 +lumetta 1 +yelick 1 +supercomput 1 +compil 1 +controlledthread 1 +abstract 1 +machin 1 +journal 1 +distribut 1 +special 1 +issu 1 +dataflow 1 +evalu 1 +fine 1 +grain 1 +spertu 1 +dalli 1 +proc 1 +diego 1 +logp 1 +toward 1 +realist 1 +modelof 1 +karp 1 +patterson 1 +sahai 1 +santo 1 +subramonian 1 +fourth 1 +sigplan 1 +principl 1 +practic 1 +fundament 1 +limit 1 +dataflowmultiprocess 1 +ifip 1 +work 1 +conf 1 +techniqu 1 +medium 1 +orlando 1 +forintegr 1 +symposium 1 +control 1 +multithread 1 +forleni 1 +languag 1 +confer 1 +function 1 +cambridg 1 +august 1 +minimalhardwar 1 +support 1 +thread 1 +wawrzynek 1 +oper 1 +santa 1 +clara 1 +april 1 +analysi 1 +architecturesfor 1 +saavedra 1 +barrera 1 +algorithm 1 +crete 1 +greec 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..f5f1bebe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,43 @@ +lfar 1 +erlingsson 1 +pagelfar 1 +specificationi 1 +student 1 +incomput 1 +scienc 1 +cornel 1 +univers 1 +apart 1 +enjoi 1 +somewhat 1 +incongruousiceland 1 +link 1 +inform 1 +implementationbackgroundwher 1 +come 1 +current 1 +activitieswhat 1 +moment 1 +schedulewher 1 +time 1 +researchwhat 1 +real 1 +work 1 +done 1 +interestswhat 1 +actual 1 +like 1 +acquaintancesthos 1 +know 1 +contact 1 +infohow 1 +touch 1 +pleas 1 +note 1 +page 1 +often 1 +date 1 +gener 1 +assum 1 +disclaim 1 +appli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..9364b035 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,49 @@ +project 1 +mesh 1 +gener 1 +relat 1 +softwar 1 +packag 1 +finit 1 +element 1 +threedimens 1 +includ 1 +geometr 1 +model 1 +themesh 1 +solver 1 +free 1 +softwaredownload 1 +run 1 +unix 1 +window 1 +releas 1 +andqmg 1 +novemb 1 +us 1 +websit 1 +robert 1 +schneider 1 +home 1 +page 1 +mcphedran 1 +offinit 1 +resourc 1 +univers 1 +minnesota 1 +geometri 1 +center 1 +list 1 +ofsoftwar 1 +computationalgeometri 1 +jonathan 1 +shewchuk 1 +triangl 1 +back 1 +vavasi 1 +stephen 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +ithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..39a2c176 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,147 @@ +stephen 1 +vavasi 1 +associ 1 +professor 1 +depart 1 +comput 1 +scienc 1 +rhode 1 +hall 1 +cornel 1 +univers 1 +ithaca 1 +email 1 +phone 1 +period 1 +onsabbat 1 +divis 1 +bldg 1 +argonn 1 +nation 1 +laboratori 1 +cass 1 +note 1 +chang 1 +area 1 +code 1 +effect 1 +research 1 +interest 1 +numer 1 +analysi 1 +aren 1 +tsure 1 +pleas 1 +essaybi 1 +colleagu 1 +trefethen 1 +specif 1 +optim 1 +complex 1 +issuesnumer 1 +method 1 +boundari 1 +valu 1 +problemsgeometr 1 +problem 1 +aris 1 +scientif 1 +computingspars 1 +matrix 1 +computationsi 1 +recent 1 +manuscript 1 +avail 1 +line 1 +primal 1 +dual 1 +acceler 1 +interiorpoint 1 +whose 1 +run 1 +time 1 +depend 1 +click 1 +hough 1 +complet 1 +orthogon 1 +decompositionfor 1 +weight 1 +least 1 +squar 1 +mitchel 1 +aspect 1 +ratio 1 +bound 1 +triangul 1 +gridcut 1 +hyperplan 1 +driscol 1 +conform 1 +map 1 +us 1 +cross 1 +delaunai 1 +packagei 1 +softwar 1 +project 1 +mesh 1 +gener 1 +forth 1 +finit 1 +element 1 +three 1 +dimens 1 +packag 1 +call 1 +sourc 1 +level 1 +anonym 1 +construct 1 +polyhedr 1 +geometr 1 +object 1 +verycompl 1 +topolog 1 +hole 1 +intern 1 +andautomat 1 +creat 1 +unstructuredtetrahedr 1 +base 1 +algorithm 1 +work 1 +scott 1 +also 1 +solv 1 +ellipt 1 +boundaryvalu 1 +grad 1 +domain 1 +iswritten 1 +matlab 1 +distributedfor 1 +free 1 +distributionbegan 1 +releas 1 +novemb 1 +featur 1 +manyimprov 1 +includ 1 +faster 1 +vrml 1 +graphic 1 +much 1 +cleaner 1 +compat 1 +microsoft 1 +window 1 +well 1 +unix 1 +compatibilitywith 1 +pleasese 1 +document 1 +page 1 +annual 1 +reportback 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..befeca06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,10 @@ +arun 1 +verma 1 +homepag 1 +need 1 +browser 1 +support 1 +frame 1 +netscap 1 +higher 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..eb3d6123 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,10 @@ +vinc 1 +browser 1 +us 1 +suck 1 +download 1 +date 1 +netscap 1 +read 1 +page 1 +thank 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..8dcd8408 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,15 @@ +vitrano 1 +home 1 +pagehei 1 +start 1 +thing 1 +give 1 +break 1 +internet 1 +engin 1 +pagec 1 +advanc 1 +databas 1 +multimedia 1 +pageer 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..18dca5a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,101 @@ +kolla 1 +home 1 +ground 1 +copyright 1 +right 1 +reserv 1 +held 1 +respons 1 +unwant 1 +effect 1 +usag 1 +applet 1 +deriv 1 +warrante 1 +usabl 1 +specif 1 +applic 1 +given 1 +impli 1 +function 1 +scrollit 1 +seed 1 +reach 1 +page 1 +vivek 1 +view 1 +million 1 +zillion 1 +wonder 1 +call 1 +send 1 +mail 1 +cornel 1 +dont 1 +expect 1 +back 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +visitor 1 +number 1 +happi 1 +contact 1 +info 1 +transmit 1 +thoughtsfriend 1 +foeslinksa 1 +small 1 +collect 1 +relev 1 +life 1 +us 1 +maintain 1 +larg 1 +list 1 +favorit 1 +link 1 +think 1 +wast 1 +time 1 +search 1 +someth 1 +might 1 +tryalta 1 +vista 1 +yahoo 1 +theinktomiresumein 1 +htmlin 1 +postscriptin 1 +word 1 +perfectin 1 +asciith 1 +current 1 +good 1 +browser 1 +know 1 +java 1 +clock 1 +wanna 1 +around 1 +world 1 +need 1 +capabl 1 +anim 1 +site 1 +construct 1 +mani 1 +imag 1 +heavi 1 +file 1 +like 1 +note 1 +promis 1 +made 1 +regard 1 +qualiti 1 +visit 1 +fulli 1 +support 1 +technolog 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..a7cabbbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,81 @@ +vlad 1 +home 1 +pagevladimir 1 +kotlyarvladimir 1 +cornel 1 +look 1 +like 1 +fall 1 +david 1 +wereteach 1 +might 1 +guess 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +cornellunivers 1 +work 1 +prof 1 +keshav 1 +pingali 1 +research 1 +interest 1 +compil 1 +high 1 +perform 1 +architectur 1 +particular 1 +parallel 1 +spars 1 +matrix 1 +code 1 +part 1 +bernoulli 1 +project 1 +member 1 +group 1 +paul 1 +stodghil 1 +andindu 1 +kodukulapubl 1 +henri 1 +kissing 1 +said 1 +lawyer 1 +professor 1 +friend 1 +legal 1 +profess 1 +remind 1 +comment 1 +abritish 1 +judg 1 +differ 1 +sveri 1 +simpl 1 +lord 1 +den 1 +function 1 +find 1 +asolut 1 +everi 1 +difficulti 1 +present 1 +wherea 1 +functionof 1 +solut 1 +todayth 1 +number 1 +seem 1 +outpac 1 +ofsolut 1 +either 1 +hardenough 1 +mani 1 +govern 1 +support 1 +privaci 1 +strong 1 +encrypt 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..09633dbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,17 @@ +vijai 1 +menonvijai 1 +menon 1 +graduat 1 +student 1 +cornel 1 +offic 1 +address 1 +home 1 +rhode 1 +hall 1 +mapl 1 +univers 1 +ithaca 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..4f87fc23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,49 @@ +chen 1 +home 1 +pagewei 1 +upson 1 +hall 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +weichen 1 +cornel 1 +current 1 +third 1 +year 1 +student 1 +computersci 1 +univers 1 +receiv 1 +bachelorand 1 +master 1 +degre 1 +scienc 1 +tsinghua 1 +beij 1 +chinami 1 +interest 1 +academ 1 +distributedsystem 1 +fault 1 +toler 1 +algorithm 1 +work 1 +professor 1 +toueg 1 +failur 1 +detect 1 +group 1 +membership 1 +inpartition 1 +network 1 +system 1 +spare 1 +time 1 +soccer 1 +resum 1 +bookmark 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..4edcdefc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,21 @@ +weitsang 1 +homepag 1 +lwhere 1 +fromwhat 1 +watchwhat 1 +movi 1 +likec 1 +page 1 +wrotepictur 1 +drawa 1 +window 1 +motifcomput 1 +theoryhom 1 +vimi 1 +tsearch 1 +webcoolest 1 +sitessharewar 1 +archivem 1 +newspap 1 +onlineunivers 1 +site 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..fedc1f34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,82 @@ +hung 1 +glavin 1 +address 1 +mapl 1 +avenu 1 +ithaca 1 +telephon 1 +photo 1 +academ 1 +background 1 +graduat 1 +nation 1 +taiwan 1 +univers 1 +comput 1 +scienc 1 +plan 1 +cornel 1 +habit 1 +sport 1 +basketbal 1 +billiard 1 +tabl 1 +tenni 1 +bowl 1 +swim 1 +volleybal 1 +other 1 +sing 1 +drive 1 +danc 1 +except 1 +studi 1 +favorit 1 +team 1 +orlando 1 +magic 1 +atlanta 1 +brave 1 +player 1 +anferne 1 +hardawai 1 +technic 1 +skill 1 +understand 1 +distribut 1 +oper 1 +system 1 +graphic 1 +multimedia 1 +network 1 +databas 1 +vision 1 +financi 1 +calcul 1 +extens 1 +window 1 +java 1 +program 1 +final 1 +project 1 +paper 1 +warp 1 +morph 1 +rivl 1 +partial 1 +result 1 +meng 1 +webpaint 1 +job 1 +interest 1 +market 1 +relat 1 +field 1 +softwar 1 +develop 1 +resum 1 +page 1 +still 1 +construct 1 +email 1 +whkao 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..1ed1a812 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,150 @@ +william 1 +visitor 1 +jersei 1 +exit 1 +minut 1 +awai 1 +princeton 1 +master 1 +student 1 +comput 1 +scienceat 1 +cornel 1 +univers 1 +degre 1 +engineeringand 1 +mathemat 1 +sciencefrom 1 +carneig 1 +mellon 1 +didresearch 1 +project 1 +engin 1 +design 1 +research 1 +center 1 +robot 1 +institu 1 +spent 1 +year 1 +write 1 +oper 1 +system 1 +xsro 1 +hpux 1 +motorola 1 +atft 1 +lauderdal 1 +florida 1 +besid 1 +sleep 1 +school 1 +work 1 +like 1 +optim 1 +parallel 1 +mpeg 1 +encod 1 +cornellopoli 1 +network 1 +databas 1 +sector 1 +analysi 1 +partner 1 +compet 1 +usta 1 +tenni 1 +tournment 1 +south 1 +could 1 +never 1 +somehow 1 +parti 1 +enjoi 1 +weekli 1 +match 1 +mani 1 +beauti 1 +place 1 +plai 1 +faviorit 1 +on 1 +boca 1 +ratonkei 1 +west 1 +get 1 +coral 1 +spring 1 +live 1 +collect 1 +piano 1 +concerto 1 +beethoven 1 +chopin 1 +gershwin 1 +liszt 1 +mendelssohn 1 +mozart 1 +rachmaninoff 1 +ravel 1 +tchaikovski 1 +also 1 +violinconcerto 1 +probabl 1 +guess 1 +type 1 +even 1 +wrote 1 +graduat 1 +applic 1 +essai 1 +base 1 +reason 1 +reject 1 +fall 1 +classesc 1 +softwar 1 +technolog 1 +techniquec 1 +formal 1 +methodsc 1 +multimedia 1 +systemsc 1 +scienc 1 +colloquiumc 1 +cool 1 +tool 1 +seminar 1 +purifi 1 +quantifi 1 +wart 1 +present 1 +practic 1 +distribut 1 +computingc 1 +practicum 1 +high 1 +perform 1 +computerc 1 +thrive 1 +inform 1 +revolut 1 +sectorcool 1 +links_leap 1 +copi 1 +frogski 1 +serverident 1 +crisi 1 +testweath 1 +undergroundinktomi 1 +search 1 +enginequest 1 +week 1 +archiveslast 1 +updat 1 +campu 1 +address 1 +mapl 1 +ecithaca 1 +york 1 +wwlee 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..afd1e9b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,54 @@ +welcom 1 +xichun 1 +jennif 1 +home 1 +page 1 +upson 1 +hall 1 +depart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +offic 1 +cornel 1 +edui 1 +current 1 +master 1 +engin 1 +student 1 +scienc 1 +atcornel 1 +receiv 1 +bachelor 1 +degre 1 +zhejiang 1 +univers 1 +hangzhou 1 +china 1 +site 1 +javaworldsunhigh 1 +school 1 +alumni 1 +alumnimeng 1 +project 1 +phong 1 +shade 1 +gouraud 1 +cours 1 +spring 1 +graphic 1 +high 1 +capac 1 +inform 1 +network 1 +databas 1 +manag 1 +taken 1 +fall 1 +multimedia 1 +systemsc 1 +softwar 1 +engineeringc 1 +oper 1 +systeme 1 +communicationby 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..cdbbcf17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,2 @@ +topic 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..80e9e7c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,80 @@ +cachet 1 +relat 1 +project 1 +effici 1 +increment 1 +computationderiv 1 +programsa 1 +gener 1 +systemat 1 +transform 1 +approach 1 +improv 1 +comput 1 +themeprogram 1 +analysi 1 +attribut 1 +base 1 +interact 1 +system 1 +usessystemat 1 +program 1 +techniqu 1 +deriveincrement 1 +written 1 +function 1 +languag 1 +select 1 +public 1 +teitelbaum 1 +deriv 1 +scienc 1 +februari 1 +cach 1 +intermedi 1 +result 1 +proceed 1 +sigplan 1 +symposium 1 +partial 1 +evalu 1 +semant 1 +manipul 1 +page 1 +jolla 1 +california 1 +june 1 +stoller 1 +discov 1 +auxiliari 1 +inform 1 +annual 1 +sigact 1 +principl 1 +petersburg 1 +beach 1 +florida 1 +januari 1 +knowledg 1 +softwar 1 +engin 1 +confer 1 +boston 1 +massachusett 1 +novemb 1 +ieee 1 +societi 1 +press 1 +strength 1 +reduct 1 +juli 1 +peoplei 1 +anni 1 +liutim 1 +teitelbaumkeyword 1 +optim 1 +cacheti 1 +yanhong 1 +cornel 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..3229a886 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,191 @@ +yanhong 1 +anni 1 +home 1 +pageyanhong 1 +post 1 +doctor 1 +associ 1 +work 1 +professor 1 +teitelbaum 1 +research 1 +interest 1 +gener 1 +systemat 1 +approach 1 +improv 1 +effici 1 +ofcomput 1 +program 1 +analysi 1 +transform 1 +techniqu 1 +forincrement 1 +comput 1 +parallel 1 +concurr 1 +applic 1 +optim 1 +compil 1 +languag 1 +base 1 +interactivesystem 1 +algorithm 1 +design 1 +develop 1 +softwar 1 +systemorgan 1 +mainten 1 +select 1 +public 1 +talksph 1 +dissert 1 +increment 1 +semant 1 +basedsystemat 1 +cornel 1 +univers 1 +ithaca 1 +york 1 +januari 1 +also 1 +appear 1 +technic 1 +report 1 +octob 1 +abstractjourn 1 +deriv 1 +scienc 1 +februari 1 +refere 1 +confer 1 +stoller 1 +discov 1 +auxiliari 1 +inform 1 +proceed 1 +annual 1 +sigplan 1 +sigact 1 +symposium 1 +principl 1 +page 1 +petersburg 1 +beach 1 +florida 1 +cachet 1 +interact 1 +attribut 1 +system 1 +knowledg 1 +engin 1 +boston 1 +massachusett 1 +novemb 1 +ieee 1 +societi 1 +press 1 +cach 1 +intermedi 1 +result 1 +intern 1 +young 1 +scientist 1 +beij 1 +china 1 +juli 1 +peke 1 +partial 1 +evalu 1 +manipul 1 +jolla 1 +california 1 +june 1 +tsinghua 1 +zhang 1 +wang 1 +formal 1 +uncertainti 1 +reason 1 +model 1 +combin 1 +qualit 1 +partit 1 +quantit 1 +descript 1 +multi 1 +factor 1 +problem 1 +fuzzi 1 +world 1 +congress 1 +seattl 1 +washington 1 +august 1 +inexact 1 +profession 1 +publish 1 +hous 1 +survei 1 +map 1 +depart 1 +march 1 +septemb 1 +revis 1 +wakayama 1 +line 1 +break 1 +xerox 1 +webster 1 +center 1 +talk 1 +kestrel 1 +institut 1 +palo 1 +alto 1 +dagstuhl 1 +seminar 1 +dynam 1 +schloss 1 +germani 1 +automat 1 +laboratori 1 +document 1 +us 1 +obtain 1 +present 1 +oggeb 1 +expert 1 +basin 1 +implement 1 +test 1 +usag 1 +manual 1 +summari 1 +explor 1 +ri 1 +tshinghua 1 +author 1 +song 1 +huang 1 +current 1 +project 1 +compos 1 +build 1 +compon 1 +upson 1 +hallcornel 1 +universityithaca 1 +offic 1 +last 1 +updat 1 +move 1 +tocomput 1 +department 1 +lindlei 1 +hallindiana 1 +universitybloomington 1 +indiana 1 +eduhttp 1 +peopl 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..857c9fa6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,24 @@ +home 1 +page 1 +cheng 1 +huangyi 1 +huang 1 +upson 1 +hall 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +email 1 +ychuang 1 +edui 1 +graduat 1 +student 1 +favorit 1 +link 1 +onlin 1 +documentscoursesprojectaccess 1 +byvisitorslast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..6956620b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,120 @@ +chung 1 +home 1 +pagewelcom 1 +everybodi 1 +name 1 +chungyou 1 +thvisitor 1 +sinc 1 +april 1 +studi 1 +comput 1 +scienc 1 +master 1 +degre 1 +cornel 1 +univers 1 +pleas 1 +check 1 +school 1 +engin 1 +sciencecornel 1 +universitywher 1 +origin 1 +came 1 +seoul 1 +korea 1 +graduat 1 +high 1 +america 1 +went 1 +kindergarten 1 +elementari 1 +sang 1 +myoung 1 +junior 1 +kang 1 +sung 1 +women 1 +georg 1 +mason 1 +universityin 1 +virginia 1 +happiest 1 +thing 1 +life 1 +marri 1 +wonder 1 +husband 1 +chang 1 +work 1 +forsaic 1 +programm 1 +pictur 1 +would 1 +like 1 +beauti 1 +moment 1 +java 1 +avail 1 +browser 1 +click 1 +free 1 +plai 1 +keyboard 1 +actual 1 +piano 1 +better 1 +listen 1 +music 1 +kind 1 +korean 1 +love 1 +shin 1 +seung 1 +hoon 1 +moon 1 +classic 1 +forth 1 +brows 1 +world 1 +wide 1 +us 1 +link 1 +interest 1 +search 1 +want 1 +onlin 1 +newpap 1 +hangook 1 +ilbo 1 +chosun 1 +joongang 1 +hire 1 +resum 1 +word 1 +perfect 1 +version 1 +meng 1 +project 1 +still 1 +titl 1 +imag 1 +process 1 +appletyoosun 1 +person 1 +infom 1 +triphamm 1 +sbithaca 1 +phone 1 +emerg 1 +email 1 +ychung 1 +forward 1 +yooschung 1 +automat 1 +page 1 +construct 1 +last 1 +modifi 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..e5bd2a7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,221 @@ +yaron 1 +minski 1 +home 1 +page 1 +graduat 1 +student 1 +yminski 1 +cornel 1 +edudepart 1 +computersci 1 +upson 1 +hall 1 +ithaca 1 +univers 1 +phone 1 +comstock 1 +place 1 +syracus 1 +current 1 +focus 1 +onfault 1 +toler 1 +distribut 1 +comput 1 +particular 1 +work 1 +thetacoma 1 +project 1 +attempt 1 +build 1 +oper 1 +system 1 +support 1 +forfault 1 +agent 1 +base 1 +flapdragon 1 +longer 1 +year 1 +livether 1 +still 1 +great 1 +veggi 1 +coop 1 +crash 1 +often 1 +nowadai 1 +slightli 1 +outof 1 +date 1 +webpag 1 +howev 1 +time 1 +notic 1 +anopen 1 +start 1 +need 1 +live 1 +highli 1 +recommendit 1 +though 1 +plai 1 +much 1 +ancientchines 1 +game 1 +extremlysimpl 1 +rule 1 +complic 1 +satisfi 1 +strategi 1 +like 1 +learn 1 +intro 1 +also 1 +want 1 +internet 1 +know 1 +nota 1 +good 1 +real 1 +person 1 +front 1 +better 1 +thannoth 1 +unix 1 +machin 1 +take 1 +look 1 +cgoban 1 +nicest 1 +goboard 1 +program 1 +seen 1 +make 1 +trivial 1 +easi 1 +thenet 1 +newli 1 +marri 1 +wife 1 +lisa 1 +go 1 +medic 1 +school 1 +suni 1 +health 1 +scienc 1 +center 1 +uniqu 1 +qualifi 1 +within 1 +hour 1 +twenti 1 +minutesof 1 +love 1 +everi 1 +favoritepoem 1 +lafiglia 1 +piang 1 +theidea 1 +order 1 +west 1 +advicefor 1 +resumesom 1 +interestinglink 1 +movi 1 +critic 1 +impress 1 +exampl 1 +fairli 1 +simpl 1 +technolog 1 +us 1 +effect 1 +site 1 +engin 1 +rate 1 +compar 1 +other 1 +come 1 +recommend 1 +found 1 +almost 1 +alarmingli 1 +contrast 1 +firefli 1 +tri 1 +thing 1 +fail 1 +miser 1 +yahoo 1 +yellow 1 +address 1 +give 1 +direct 1 +seven 1 +closest 1 +bakeri 1 +perfect 1 +bigbook 1 +bigyellow 1 +advertis 1 +york 1 +read 1 +free 1 +plu 1 +save 1 +paper 1 +note 1 +download 1 +text 1 +slate 1 +magazin 1 +hate 1 +admit 1 +microsoft 1 +someth 1 +right 1 +thought 1 +well 1 +execut 1 +expect 1 +michael 1 +kinslei 1 +run 1 +linux 1 +compani 1 +instal 1 +maintain 1 +list 1 +amazon 1 +book 1 +solid 1 +discount 1 +virtual 1 +bookstor 1 +hope 1 +local 1 +booksel 1 +town 1 +brother 1 +follow 1 +public 1 +begin 1 +block 1 +version 1 +comment 1 +process 1 +mailcrypt 1 +emac 1 +interfacemqbtazgjohoaaaedalfhlgjmdg 1 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 1 +rbylf 1 +zwqujcioczoecv 1 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 1 +gkgarsokrinnoazihja 1 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 1 +wumjgzsnvispwkrvzgdrojswmc 1 +eigsqsb 1 +bsbpw 1 +jcwz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..a0b43bb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,43 @@ +yuichi 1 +tsuchimoto 1 +home 1 +pageyuichi 1 +pagecours 1 +workfal 1 +current 1 +semest 1 +system 1 +program 1 +oper 1 +systemsc 1 +languag 1 +softwar 1 +engineeringspr 1 +introduct 1 +compil 1 +translatorsc 1 +practicum 1 +machin 1 +visionfal 1 +foundat 1 +artifici 1 +intelligencec 1 +theori 1 +computingi 1 +look 1 +unit 1 +state 1 +resum 1 +format 1 +postscript 1 +address 1 +cornel 1 +eduhttp 1 +info 1 +peopl 1 +last 1 +modif 1 +novemb 1 +http 1 +welcom 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..a94b6ebe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,326 @@ +cornel 1 +activ 1 +messagescornel 1 +messag 1 +implementationsact 1 +neta 1 +sourc 1 +code 1 +releas 1 +part 1 +thegener 1 +conform 1 +spec 1 +moreinform 1 +project 1 +page 1 +object 1 +codereleas 1 +avail 1 +thegam 1 +pleas 1 +read 1 +readm 1 +instal 1 +file 1 +distribut 1 +instructionson 1 +us 1 +inform 1 +contact 1 +chao 1 +chang 1 +grzegorz 1 +czajkowski 1 +thorsten 1 +eicken 1 +releasenot 1 +fileto 1 +find 1 +previou 1 +version 1 +currentvers 1 +also 1 +major 1 +differencebetween 1 +modifi 1 +libmpci 1 +includ 1 +thedistribut 1 +document 1 +packag 1 +fordetail 1 +interest 1 +know 1 +current 1 +pleaseclick 1 +send 1 +briefnot 1 +let 1 +someth 1 +organ 1 +theus 1 +intend 1 +select 1 +public 1 +messageslow 1 +latenc 1 +commun 1 +ibmrisc 1 +system 1 +chri 1 +hawblitzel 1 +appear 1 +ieeesupercomput 1 +pittsburgh 1 +novemb 1 +abstract 1 +power 1 +commerci 1 +mpp 1 +spiteof 1 +fast 1 +processor 1 +high 1 +network 1 +bandwidth 1 +scommun 1 +inferior 1 +older 1 +machin 1 +tmccm 1 +meiko 1 +paper 1 +investig 1 +primit 1 +altern 1 +standardmessag 1 +pass 1 +order 1 +reduc 1 +overhead 1 +tooffer 1 +good 1 +build 1 +block 1 +higher 1 +layer 1 +softwar 1 +first 1 +describ 1 +implement 1 +activemessag 1 +directli 1 +networkadapt 1 +compar 1 +yieldsa 1 +round 1 +trip 1 +lower 1 +secondpart 1 +demonstr 1 +communicationsubstr 1 +split 1 +well 1 +cbenchmark 1 +show 1 +lowmessag 1 +throughput 1 +compens 1 +networklat 1 +base 1 +freeli 1 +availablempich 1 +achiev 1 +perform 1 +equival 1 +onth 1 +benchmark 1 +design 1 +technic 1 +report 1 +februari 1 +andevalu 1 +implementationbenchmark 1 +standard 1 +adapt 1 +firmwar 1 +butdo 1 +assumefamiliar 1 +concept 1 +underli 1 +mainperform 1 +characterist 1 +word 1 +timeof 1 +asymptot 1 +present 1 +detail 1 +focuseson 1 +analysi 1 +comparison 1 +smessag 1 +gener 1 +specif 1 +defin 1 +interfac 1 +portabl 1 +across 1 +varieti 1 +parallel 1 +theu 1 +cluster 1 +themeiko 1 +thehpam 1 +fddi 1 +ring 1 +theparagon 1 +thesp 1 +networksus 1 +veena 1 +avula 1 +anyndia 1 +basu 1 +vineet 1 +buch 1 +interconnect 1 +palo 1 +alto 1 +abridg 1 +ieee 1 +micro 1 +magazin 1 +slide 1 +talk 1 +recent 1 +develop 1 +architectur 1 +forparallel 1 +made 1 +signific 1 +progress 1 +thecommun 1 +magnitud 1 +ascompar 1 +earlier 1 +propos 1 +examin 1 +whether 1 +thesetechniqu 1 +carri 1 +workstat 1 +connect 1 +anatm 1 +even 1 +though 1 +oper 1 +systemsoftwar 1 +equip 1 +optim 1 +streamcommun 1 +allow 1 +direct 1 +protect 1 +user 1 +level 1 +access 1 +thenetwork 1 +without 1 +reliabl 1 +transmiss 1 +flowcontrol 1 +differ 1 +incommun 1 +builtfrom 1 +hardwar 1 +compon 1 +state 1 +artmultiprocessor 1 +lack 1 +flow 1 +control 1 +systemcoordin 1 +affect 1 +significantli 1 +andrequir 1 +larger 1 +buffer 1 +multiprocessor 1 +evalu 1 +prototyp 1 +model 1 +clusterinterconnect 1 +measur 1 +showappl 1 +applic 1 +microsecond 1 +smallmessag 1 +roughli 1 +messagesimplement 1 +think 1 +mechan 1 +integr 1 +andcomput 1 +culler 1 +goldstein 1 +schauser 1 +proceed 1 +symp 1 +comput 1 +gold 1 +coast 1 +australia 1 +abstractth 1 +challeng 1 +larg 1 +scale 1 +tominim 1 +overlapcomput 1 +coordin 1 +sacrificingprocessor 1 +cost 1 +exist 1 +passingmultiprocessor 1 +unnecessarili 1 +researchprototyp 1 +driven 1 +communicationoverhead 1 +poor 1 +introduc 1 +simplecommun 1 +isintrins 1 +effect 1 +thehardwar 1 +offer 1 +tremend 1 +flexibl 1 +ncube 1 +phase 1 +share 1 +memoryextens 1 +messagesar 1 +suffici 1 +dynam 1 +schedul 1 +languag 1 +forwhich 1 +toler 1 +becom 1 +program 1 +compil 1 +concern 1 +hardwaresupport 1 +desir 1 +outlin 1 +rang 1 +ofenhanc 1 +mainstream 1 +efficientcommun 1 +thesi 1 +univers 1 +california 1 +berkelei 1 +sitesact 1 +messagesin 1 +projectfor 1 +contactthorsten 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..62ddb18c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,202 @@ +horu 1 +distribut 1 +comput 1 +environmenthoru 1 +environ 1 +kenneth 1 +birman 1 +cornel 1 +robbert 1 +reness 1 +shoru 1 +effort 1 +develop 1 +program 1 +reliabledistribut 1 +last 1 +year 1 +us 1 +demonstrategroupwar 1 +fault 1 +toler 1 +high 1 +perform 1 +network 1 +foundto 1 +offer 1 +higher 1 +similar 1 +system 1 +novel 1 +featur 1 +ofhoru 1 +flexibl 1 +softwar 1 +architectur 1 +applic 1 +support 1 +virtual 1 +synchronousprocess 1 +group 1 +technolog 1 +prior 1 +work 1 +isi 1 +toolkit 1 +becom 1 +signific 1 +commerci 1 +success 1 +also 1 +offersa 1 +securityand 1 +privaci 1 +view 1 +importantresearch 1 +advanc 1 +extend 1 +provid 1 +extrem 1 +latenc 1 +performancer 1 +time 1 +capabl 1 +approach 1 +combin 1 +element 1 +commun 1 +calledact 1 +messageswith 1 +multi 1 +media 1 +playbacksystem 1 +calledcontinu 1 +expect 1 +demonstr 1 +speed 1 +interact 1 +remot 1 +multimediaserv 1 +might 1 +telemedicin 1 +videoon 1 +demand 1 +retain 1 +exist 1 +andsecur 1 +option 1 +synchroni 1 +model 1 +creat 1 +substanti 1 +user 1 +base 1 +expectrapid 1 +uptak 1 +within 1 +matur 1 +spana 1 +wide 1 +rang 1 +industri 1 +includ 1 +telecommun 1 +financialtrad 1 +stock 1 +market 1 +autom 1 +factori 1 +floor 1 +process 1 +control 1 +fordiscret 1 +electron 1 +compon 1 +manufactur 1 +traffic 1 +space 1 +basedcommun 1 +manag 1 +beingexplor 1 +sever 1 +branch 1 +militari 1 +well 1 +othernon 1 +govern 1 +among 1 +visibl 1 +isth 1 +naval 1 +hiper 1 +project 1 +explor 1 +systemthat 1 +prototyp 1 +futur 1 +enhanc 1 +aegi 1 +battleradar 1 +would 1 +benefitfrom 1 +access 1 +initi 1 +plan 1 +make 1 +possibl 1 +migrateisi 1 +chang 1 +thu 1 +benefit 1 +communityin 1 +direct 1 +transit 1 +occur 1 +licens 1 +agreementswith 1 +subsidiari 1 +stratu 1 +howev 1 +avail 1 +research 1 +isdescrib 1 +detail 1 +public 1 +manual 1 +look 1 +hope 1 +mixtur 1 +technologieswil 1 +permit 1 +beseen 1 +next 1 +gener 1 +groupwar 1 +illustr 1 +belowshow 1 +mission 1 +integr 1 +data 1 +varieti 1 +ground 1 +resourc 1 +andus 1 +coordin 1 +action 1 +variou 1 +theatr 1 +asset 1 +thissort 1 +utmost 1 +reliabl 1 +secur 1 +whilealso 1 +failur 1 +rapidli 1 +reconfigur 1 +respond 1 +impact 1 +civilianand 1 +dept 1 +scienc 1 +univers 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..5763b464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,77 @@ +cornel 1 +medianet 1 +projectmedianet 1 +high 1 +perform 1 +platform 1 +network 1 +media 1 +process 1 +combin 1 +technolog 1 +develop 1 +research 1 +todevelop 1 +flexibl 1 +testb 1 +store 1 +transport 1 +us 1 +multimedia 1 +data 1 +user 1 +level 1 +architectur 1 +access 1 +dramat 1 +improv 1 +facilit 1 +commun 1 +protocolsth 1 +order 1 +magnitud 1 +communicationmak 1 +parallel 1 +comput 1 +workstat 1 +cluster 1 +practic 1 +horu 1 +group 1 +communicationprimit 1 +adapt 1 +industri 1 +strength 1 +tool 1 +applic 1 +secur 1 +reliabl 1 +primit 1 +critic 1 +foradvanc 1 +militari 1 +commerci 1 +toolkit 1 +approach 1 +distribut 1 +audio 1 +video 1 +portabl 1 +build 1 +includeaudio 1 +rapid 1 +prototyp 1 +multimediaappl 1 +fund 1 +project 1 +provid 1 +contract 1 +fromth 1 +darpa 1 +inform 1 +technologyofficefor 1 +contact 1 +thorstenvon 1 +eicken 1 +brian 1 +smith 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..4c7cffc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,31 @@ +cornel 1 +nuprl 1 +autom 1 +reason 1 +project 1 +introduct 1 +theori 1 +theorem 1 +browser 1 +design 1 +written 1 +vaughn 1 +articl 1 +user 1 +document 1 +relat 1 +public 1 +link 1 +class 1 +note 1 +linux 1 +announc 1 +suggest 1 +feedback 1 +help 1 +main 1 +index 1 +curiou 1 +mani 1 +page 1 +askaltavista 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..295edd2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,217 @@ +cuc 1 +comput 1 +scienc 1 +us 1 +machin 1 +call 1 +granita 1 +eight 1 +node 1 +granitathrough 1 +login 1 +design 1 +asinteract 1 +shell 1 +instal 1 +tcsh 1 +bash 1 +experi 1 +problemsdur 1 +first 1 +remov 1 +oper 1 +system 1 +specif 1 +stufffrom 1 +configur 1 +file 1 +exampl 1 +haveth 1 +arch 1 +command 1 +unam 1 +instead 1 +readm 1 +contain 1 +informationabout 1 +releas 1 +addit 1 +manyou 1 +infoexplor 1 +inform 1 +commandsand 1 +usag 1 +program 1 +remot 1 +displai 1 +properli 1 +type 1 +info 1 +parallel 1 +job 1 +neither 1 +activemassag 1 +split 1 +peor 1 +read 1 +activ 1 +messagesor 1 +hardwar 1 +cornel 1 +theori 1 +center 1 +homegrown 1 +softwarein 1 +gener 1 +local 1 +softwar 1 +besur 1 +path 1 +csplit 1 +simpl 1 +extens 1 +forparallel 1 +provid 1 +global 1 +address 1 +space 1 +though 1 +globalpoint 1 +dereferenc 1 +like 1 +regular 1 +pointer 1 +phase 1 +assign 1 +statement 1 +allow 1 +programm 1 +hide 1 +latencyof 1 +access 1 +overlap 1 +commun 1 +makefil 1 +found 1 +bench 1 +work 1 +sourc 1 +setenv 1 +user 1 +shellsshould 1 +execut 1 +compil 1 +creat 1 +look 1 +sampl 1 +variou 1 +directori 1 +gmake 1 +must 1 +includ 1 +make 1 +asact 1 +messag 1 +scriptsloc 1 +programfoo 1 +processor 1 +foodebug 1 +debug 1 +follow 1 +step 1 +need 1 +done 1 +insert 1 +splitc_debug 1 +aftersplitc_main 1 +describ 1 +previou 1 +section 1 +commonli 1 +ongranita 1 +enter 1 +continu 1 +hit 1 +return 1 +onto 1 +want 1 +youwant 1 +master 1 +open 1 +locat 1 +insid 1 +am_run 1 +thenattach 1 +theth 1 +proc 1 +process 1 +proce 1 +attach 1 +stop 1 +andyou 1 +breakpoint 1 +stack 1 +frame 1 +messagesact 1 +overhead 1 +layerthat 1 +offer 1 +high 1 +perform 1 +mani 1 +nativ 1 +layer 1 +spam 1 +avail 1 +main 1 +characterist 1 +word 1 +round 1 +triplat 1 +asymptot 1 +network 1 +bandwidth 1 +librari 1 +libspgam 1 +aand 1 +header 1 +beforerun 1 +runningprgm 1 +script 1 +also 1 +mpimpi 1 +popularmessag 1 +pass 1 +interfac 1 +portabl 1 +animplement 1 +base 1 +mpich 1 +run 1 +overact 1 +easiest 1 +link 1 +ampicc 1 +whichi 1 +built 1 +fooyou 1 +pleas 1 +lookat 1 +examplesin 1 +ampi 1 +exactli 1 +likeordinari 1 +sure 1 +softwaresoftwar 1 +fortran 1 +xpdbx 1 +matlab 1 +emac 1 +bison 1 +replic 1 +problemsif 1 +difficulti 1 +contact 1 +czar 1 +grzegorz 1 +czajkowski 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..6db0f5d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,105 @@ +cornel 1 +model 1 +simul 1 +project 1 +home 1 +page 1 +enorm 1 +effort 1 +current 1 +expend 1 +creat 1 +scientificsoftwar 1 +particularli 1 +physic 1 +system 1 +defin 1 +oncomplex 1 +geometri 1 +us 1 +advanc 1 +comput 1 +hardwar 1 +thegoal 1 +simlab 1 +reduc 1 +bringingtogeth 1 +technolog 1 +geometr 1 +symbolicmathemat 1 +numer 1 +analysi 1 +compil 1 +code 1 +gener 1 +andform 1 +method 1 +tool 1 +rais 1 +semant 1 +levelat 1 +possibl 1 +scientif 1 +softwar 1 +overview 1 +softwarepackag 1 +select 1 +research 1 +activ 1 +collabor 1 +mathemat 1 +environ 1 +propos 1 +postscript 1 +version 1 +guarante 1 +qualiti 1 +mesh 1 +microstoragearchitectur 1 +weyl 1 +computeralgebra 1 +substrat 1 +high 1 +levelprogram 1 +languag 1 +synthes 1 +thechain 1 +algebra 1 +topolog 1 +program 1 +present 1 +compon 1 +thearpa 1 +nist 1 +madefast 1 +design 1 +manufactur 1 +exercis 1 +longer 1 +direct 1 +insystem 1 +richard 1 +zippel 1 +ideason 1 +proce 1 +includ 1 +brief 1 +discuss 1 +ofnon 1 +contemporan 1 +commun 1 +microstorag 1 +architectur 1 +theus 1 +transform 1 +chainsprogram 1 +complextopolog 1 +engin 1 +numericalalgorithm 1 +rick 1 +palmer 1 +peopl 1 +public 1 +report 1 +paul 1 +chew 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..fd967056 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,51 @@ +cornel 1 +split 1 +ccornel 1 +implementationssplit 1 +neta 1 +sourc 1 +code 1 +releas 1 +prepar 1 +isimpl 1 +activ 1 +messagesfor 1 +inform 1 +contact 1 +thorsten 1 +eicken 1 +ofsplit 1 +distr 1 +implementedon 1 +spam 1 +contactchi 1 +chao 1 +chang 1 +grzegorz 1 +czajkowski 1 +thorstenvon 1 +share 1 +memori 1 +multiprocessorsa 1 +multiprocessor 1 +runningsolari 1 +mattwelsh 1 +page 1 +select 1 +public 1 +cparallel 1 +program 1 +culler 1 +dusseau 1 +goldstein 1 +krishnamurthi 1 +lumetta 1 +yelick 1 +proceed 1 +supercomput 1 +novemb 1 +abstractproject 1 +sitessplit 1 +chome 1 +berkelei 1 +contactthorsten 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..32c1466f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,9 @@ +page 1 +move 1 +browser 1 +redirect 1 +second 1 +http 1 +cornel 1 +default 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..89a1dd26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,220 @@ +cornel 1 +csrvlcornel 1 +robot 1 +vision 1 +laboratorywelcom 1 +nich 1 +laboratori 1 +match 1 +rrentli 1 +develop 1 +pleas 1 +hard 1 +question 1 +comment 1 +direct 1 +thank 1 +csrvl 1 +comput 1 +scienc 1 +locat 1 +univers 1 +ithaca 1 +three 1 +main 1 +area 1 +ofresearch 1 +prof 1 +daniel 1 +huttenloch 1 +multimedia 1 +applic 1 +ramin 1 +zabih 1 +includ 1 +distribut 1 +manipul 1 +micro 1 +electro 1 +mechan 1 +system 1 +mem 1 +bruce 1 +donald 1 +pictor 1 +tour 1 +current 1 +projectsth 1 +follow 1 +project 1 +activ 1 +supervis 1 +byramin 1 +automat 1 +detect 1 +andclassif 1 +scene 1 +break 1 +digit 1 +video 1 +mpeg 1 +browser 1 +allowingscen 1 +global 1 +motion 1 +base 1 +queri 1 +real 1 +time 1 +sourc 1 +transmiss 1 +full 1 +frame 1 +parallel 1 +onplatform 1 +nynet 1 +cluster 1 +number 1 +involv 1 +high 1 +perform 1 +imag 1 +implement 1 +split 1 +foru 1 +symmetr 1 +multiprocessor 1 +list 1 +potenti 1 +master 1 +sproject 1 +maintain 1 +justin 1 +miller 1 +work 1 +done 1 +unix 1 +currentlyconsid 1 +move 1 +windowsnt 1 +discuss 1 +theissuesher 1 +hope 1 +support 1 +microsoft 1 +select 1 +publicationsth 1 +paper 1 +research 1 +thecsrvl 1 +mani 1 +avail 1 +anonym 1 +public 1 +tech 1 +report 1 +server 1 +serverar 1 +program 1 +mobil 1 +scheme 1 +ree 1 +proc 1 +ieee 1 +intern 1 +confer 1 +automationnic 1 +franc 1 +complex 1 +homolog 1 +type 1 +triangul 1 +chang 1 +revis 1 +symposium 1 +foundat 1 +juan 1 +octob 1 +inform 1 +invari 1 +jen 1 +first 1 +workshop 1 +algorithm 1 +peter 1 +boston 1 +wilson 1 +andj 1 +latomb 1 +submit 1 +artifici 1 +intellig 1 +sensor 1 +configur 1 +task 1 +plan 1 +brigg 1 +proceed 1 +autom 1 +diego 1 +sensorless 1 +us 1 +massiv 1 +microfabr 1 +actuatorarrai 1 +bhringer 1 +mihailovich 1 +macdonald 1 +theori 1 +control 1 +actuat 1 +arrai 1 +oiso 1 +japan 1 +januari 1 +approach 1 +design 1 +micromechan 1 +hing 1 +structur 1 +extend 1 +abstract 1 +siggraph 1 +solid 1 +model 1 +montral 1 +quebc 1 +canada 1 +technic 1 +authorthes 1 +gener 1 +dynam 1 +index 1 +search 1 +author 1 +titl 1 +keyword 1 +scott 1 +cytacki 1 +associ 1 +professor 1 +pedro 1 +felzenszwalb 1 +ryan 1 +lilien 1 +michel 1 +maharbiz 1 +greg 1 +pass 1 +scharstein 1 +aaron 1 +stump 1 +szewczyk 1 +fernando 1 +viton 1 +voskuhl 1 +wayt 1 +matt 1 +welsh 1 +whelan 1 +assist 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..7530570e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,91 @@ +inform 1 +captur 1 +access 1 +projectinform 1 +accessth 1 +research 1 +group 1 +work 1 +waysthat 1 +comput 1 +locat 1 +ever 1 +increas 1 +volum 1 +ofonlin 1 +data 1 +determin 1 +structur 1 +extract 1 +forhuman 1 +user 1 +found 1 +john 1 +hopcroft 1 +davisin 1 +current 1 +area 1 +researchextract 1 +materi 1 +onlin 1 +document 1 +thestructur 1 +explicit 1 +extractinginform 1 +present 1 +tabular 1 +form 1 +relat 1 +databas 1 +construct 1 +summari 1 +overview 1 +collectionsof 1 +text 1 +nationwid 1 +librari 1 +sciencetechn 1 +report 1 +begun 1 +digit 1 +cornel 1 +computersci 1 +technic 1 +collect 1 +order 1 +make 1 +moreaccess 1 +internet 1 +avail 1 +server 1 +addit 1 +toit 1 +util 1 +gener 1 +commun 1 +thisdocu 1 +test 1 +consist 1 +dean 1 +krafft 1 +visitingscientist 1 +jimdavi 1 +well 1 +number 1 +graduat 1 +undergradu 1 +student 1 +fall 1 +project 1 +activ 1 +longer 1 +jrdpublicationsjam 1 +allan 1 +informationag 1 +build 1 +hyperlink 1 +proceed 1 +confer 1 +oninform 1 +knowledg 1 +manag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..3f3d768f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,15 @@ +zeno 1 +research 1 +groupzeno 1 +cornel 1 +multimedia 1 +group 1 +peopl 1 +mission 1 +project 1 +paper 1 +softwar 1 +curricula 1 +develop 1 +potpourri 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..51d519b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,102 @@ +home 1 +page 1 +pagewelcom 1 +depart 1 +issu 1 +pictur 1 +left 1 +see 1 +quit 1 +date 1 +frame 1 +challeng 1 +viewer 1 +syosset 1 +york 1 +town 1 +long 1 +island 1 +receiv 1 +bachelor 1 +scienc 1 +degre 1 +decemb 1 +work 1 +month 1 +california 1 +decid 1 +come 1 +back 1 +fall 1 +current 1 +master 1 +engin 1 +leav 1 +land 1 +ithaca 1 +mayb 1 +miss 1 +season 1 +rain 1 +wind 1 +snow 1 +actual 1 +enough 1 +santa 1 +barbara 1 +anywai 1 +plan 1 +graduat 1 +meng 1 +project 1 +prof 1 +ramin 1 +zabih 1 +cornel 1 +robot 1 +vision 1 +csrvl 1 +interest 1 +topic 1 +motion 1 +video 1 +segment 1 +gener 1 +process 1 +paper 1 +relat 1 +research 1 +area 1 +link 1 +compani 1 +green 1 +hill 1 +softwar 1 +californialockhe 1 +martin 1 +control 1 +system 1 +binghamton 1 +yorkaltera 1 +corp 1 +jose 1 +californiafun 1 +stuff 1 +game 1 +domainvth 1 +babylon 1 +siteoth 1 +place 1 +univers 1 +worldcareermosaictop 1 +site 1 +student 1 +email 1 +kmai 1 +cours 1 +still 1 +construct 1 +last 1 +modifi 1 +januari 1 +access 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..6b7ee29b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,42 @@ +utc 1 +home 1 +pagegener 1 +inform 1 +faculti 1 +recruit 1 +depart 1 +overview 1 +research 1 +group 1 +relat 1 +program 1 +academ 1 +admiss 1 +requir 1 +cours 1 +descript 1 +catalog 1 +public 1 +comput 1 +facil 1 +upcom 1 +event 1 +calendar 1 +seminar 1 +talk 1 +visitor 1 +schedulespag 1 +peopl 1 +class 1 +person 1 +page 1 +student 1 +organ 1 +alumni 1 +link 1 +find 1 +staff 1 +directoryth 1 +universitywww 1 +informationgrip 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..9f982fb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,67 @@ +woodrow 1 +bledso 1 +bledsoepet 1 +donnel 1 +centenni 1 +chair 1 +emeritu 1 +comput 1 +system 1 +professor 1 +mathemat 1 +univers 1 +utah 1 +salt 1 +lake 1 +citi 1 +california 1 +berkelei 1 +honor 1 +award 1 +profession 1 +servic 1 +third 1 +mileston 1 +autom 1 +theorem 1 +prove 1 +americanmathemat 1 +societi 1 +distinguish 1 +intern 1 +jointconfer 1 +presid 1 +american 1 +associ 1 +artifici 1 +intellig 1 +board 1 +truste 1 +joint 1 +confer 1 +artificialintellig 1 +onartifici 1 +editor 1 +journal 1 +presentarea 1 +interestautomat 1 +summari 1 +researchmi 1 +research 1 +focus 1 +automat 1 +theoremproof 1 +check 1 +involv 1 +heurist 1 +higher 1 +levelplan 1 +well 1 +exampl 1 +analog 1 +alsointerest 1 +learn 1 +previou 1 +profil 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..40d7c336 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,90 @@ +john 1 +werth 1 +werthsenior 1 +lectur 1 +research 1 +scientist 1 +mathemat 1 +emori 1 +univers 1 +washington 1 +profession 1 +servic 1 +chair 1 +educ 1 +board 1 +comput 1 +associ 1 +scienc 1 +accredit 1 +vice 1 +technic 1 +committe 1 +softwareengin 1 +ieee 1 +present 1 +area 1 +interestparallel 1 +program 1 +softwar 1 +engin 1 +compil 1 +computersci 1 +summari 1 +researchmi 1 +current 1 +interest 1 +environ 1 +parallelprogram 1 +andimplement 1 +issu 1 +also 1 +activ 1 +set 1 +direct 1 +incomput 1 +local 1 +nation 1 +level 1 +select 1 +recent 1 +publicationss 1 +hyder 1 +brown 1 +unifi 1 +model 1 +concurr 1 +debug 1 +proceed 1 +intern 1 +confer 1 +parallel 1 +process 1 +societi 1 +august 1 +sobek 1 +newton 1 +jain 1 +interact 1 +formal 1 +practic 1 +develop 1 +code 1 +note 1 +york 1 +springer 1 +verlag 1 +schedul 1 +oper 1 +multipl 1 +system 1 +journal 1 +distribut 1 +decemb 1 +gener 1 +applic 1 +thirteenth 1 +previou 1 +profil 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..6f19b8f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,53 @@ +alfr 1 +dale 1 +daleno 1 +person 1 +page 1 +trammel 1 +crow 1 +regent 1 +professor 1 +emeritu 1 +comput 1 +scienc 1 +exet 1 +colleg 1 +oxford 1 +england 1 +univers 1 +texa 1 +austin 1 +area 1 +interestdatabas 1 +manag 1 +system 1 +databas 1 +architectur 1 +summari 1 +researchmi 1 +interest 1 +involv 1 +applic 1 +parallel 1 +multi 1 +stagei 1 +problem 1 +studiedinclud 1 +data 1 +distribut 1 +strategi 1 +index 1 +andmap 1 +relat 1 +algebra 1 +oper 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..ae7a7c07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,92 @@ +jeffrei 1 +brumfield 1 +brumfieldsenior 1 +lectur 1 +math 1 +comput 1 +scienc 1 +mathemat 1 +univers 1 +georgia 1 +purdu 1 +honor 1 +award 1 +colleg 1 +natur 1 +teach 1 +excel 1 +area 1 +interestperform 1 +analysi 1 +distribut 1 +system 1 +oper 1 +summari 1 +researchi 1 +interest 1 +role 1 +model 1 +plai 1 +designersof 1 +studi 1 +performanceof 1 +exist 1 +propos 1 +queue 1 +network 1 +eachresourc 1 +repres 1 +tasksawait 1 +servic 1 +solut 1 +involv 1 +computationof 1 +respons 1 +time 1 +length 1 +throughput 1 +select 1 +recent 1 +publicationsj 1 +shen 1 +richter 1 +graf 1 +verdi 1 +visual 1 +environ 1 +design 1 +journal 1 +ofparallel 1 +miller 1 +chou 1 +perform 1 +modelingof 1 +object 1 +orient 1 +databas 1 +intern 1 +symposium 1 +parallel 1 +distributedsystem 1 +austin 1 +texa 1 +decemb 1 +concurr 1 +program 1 +modula 1 +inproceed 1 +sigcs 1 +technic 1 +loui 1 +bulletin 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..a535909c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,113 @@ +alan 1 +cline 1 +clinedavid 1 +bruton 1 +centenni 1 +professor 1 +comput 1 +scienc 1 +mathemat 1 +appli 1 +univers 1 +michigan 1 +profession 1 +servic 1 +editor 1 +algorithm 1 +commun 1 +associ 1 +transact 1 +softwar 1 +editori 1 +board 1 +siam 1 +journal 1 +scientif 1 +statisticalcomput 1 +director 1 +special 1 +interest 1 +group 1 +numer 1 +southern 1 +region 1 +socialrespons 1 +area 1 +interestmathemat 1 +analysi 1 +summari 1 +researchi 1 +transform 1 +tool 1 +whichcan 1 +problem 1 +involv 1 +constructionof 1 +explor 1 +methodolog 1 +formathemat 1 +particular 1 +major 1 +developmentha 1 +packag 1 +hundr 1 +subprogram 1 +curv 1 +andsurfac 1 +fit 1 +emploi 1 +tension 1 +spline 1 +select 1 +recent 1 +publicationsr 1 +renka 1 +scatter 1 +data 1 +us 1 +constrain 1 +delaunai 1 +triangul 1 +imac 1 +expert 1 +system 1 +symbol 1 +north 1 +holland 1 +king 1 +meyer 1 +rout 1 +schedul 1 +coast 1 +guard 1 +buoi 1 +tender 1 +interfac 1 +dimension 1 +solut 1 +closest 1 +node 1 +presenc 1 +barrier 1 +counter 1 +exampl 1 +three 1 +condit 1 +number 1 +estim 1 +statist 1 +moler 1 +stewart 1 +wilkinson 1 +matrix 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..c4b28dd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,69 @@ +edsger 1 +wybe 1 +dijkstra 1 +dijkstraschlumberg 1 +centenni 1 +chair 1 +comput 1 +sciencesprofessor 1 +mathematicskandidaatsexamen 1 +mathemat 1 +physic 1 +doctora 1 +examen 1 +theoret 1 +univers 1 +leydenph 1 +amsterdamhonor 1 +awardsacm 1 +ture 1 +award 1 +foreign 1 +honorari 1 +member 1 +american 1 +academi 1 +art 1 +sciencesmemb 1 +royal 1 +netherland 1 +sciencesdistinguish 1 +fellow 1 +british 1 +societyafip 1 +harri 1 +good 1 +memori 1 +doctor 1 +scienc 1 +honori 1 +causa 1 +queen 1 +belfastarea 1 +interest 1 +program 1 +correct 1 +methodolog 1 +algorithm 1 +systemssummari 1 +research 1 +area 1 +focus 1 +streamlin 1 +argumentso 1 +increas 1 +power 1 +reason 1 +particular 1 +ofform 1 +techniqu 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..12182abb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,60 @@ +chri 1 +edmondson 1 +yurkanan 1 +yurkananlectur 1 +mathemat 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +profession 1 +servic 1 +secretari 1 +treasur 1 +sigcomm 1 +area 1 +interestcomput 1 +network 1 +educ 1 +manag 1 +larg 1 +softwar 1 +project 1 +mobil 1 +databas 1 +design 1 +summari 1 +researchmi 1 +research 1 +interest 1 +protocol 1 +high 1 +speed 1 +commun 1 +specif 1 +internetwork 1 +select 1 +recent 1 +public 1 +cobb 1 +andm 1 +gouda 1 +address 1 +internet 1 +inproceed 1 +annual 1 +theori 1 +informaticsconfer 1 +press 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..f328f55c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,50 @@ +suzi 1 +gallagh 1 +gallagherlectur 1 +coordin 1 +academ 1 +program 1 +secondari 1 +educ 1 +loyola 1 +univers 1 +comput 1 +scienc 1 +southwestern 1 +louisiana 1 +profession 1 +servic 1 +sigcs 1 +confer 1 +committe 1 +necc 1 +area 1 +interestcomput 1 +librari 1 +inform 1 +process 1 +summari 1 +researchmi 1 +interest 1 +student 1 +recruit 1 +andretent 1 +women 1 +minor 1 +improv 1 +scienceeduc 1 +school 1 +local 1 +system 1 +retriev 1 +techniqu 1 +addit 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..6086657f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,114 @@ +jenevein 1 +senior 1 +lectur 1 +chemistri 1 +louisiana 1 +state 1 +univers 1 +orlean 1 +area 1 +interestinterconnect 1 +network 1 +parallel 1 +process 1 +comput 1 +architectur 1 +summari 1 +researchmi 1 +research 1 +focus 1 +interconnectionnetwork 1 +success 1 +failur 1 +system 1 +restsin 1 +abil 1 +devis 1 +appropri 1 +cost 1 +perform 1 +interconnectionstructur 1 +recent 1 +work 1 +interconnect 1 +involv 1 +thedevelop 1 +wafer 1 +scale 1 +optic 1 +special 1 +kindof 1 +laser 1 +wave 1 +guid 1 +design 1 +beinginvestig 1 +techniqu 1 +lead 1 +fault 1 +toler 1 +parallelsystem 1 +beingappli 1 +buss 1 +communicationswitch 1 +processor 1 +iscontinu 1 +methodolog 1 +measur 1 +performanceport 1 +across 1 +machin 1 +develop 1 +contrast 1 +tobenchmark 1 +repres 1 +true 1 +memorysystem 1 +select 1 +publicationsr 1 +menez 1 +kyklo 1 +multicomput 1 +strategi 1 +properti 1 +applic 1 +ieee 1 +transact 1 +june 1 +laranjeira 1 +malek 1 +nest 1 +predic 1 +scheme 1 +press 1 +ullah 1 +metrix 1 +precis 1 +proceed 1 +intern 1 +confer 1 +industri 1 +engin 1 +decemb 1 +johnson 1 +impact 1 +multiprocessor 1 +journal 1 +qualiti 1 +reliabl 1 +octob 1 +campbel 1 +prototyp 1 +integr 1 +januari 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..8591b2df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,66 @@ +norman 1 +martin 1 +martinprofessor 1 +emeritu 1 +comput 1 +scienc 1 +professor 1 +ofphilosophi 1 +philosophi 1 +univers 1 +chicago 1 +california 1 +angel 1 +area 1 +interestmathemat 1 +logic 1 +architectur 1 +summari 1 +researchmi 1 +current 1 +activ 1 +concentr 1 +abstract 1 +structur 1 +asinterpret 1 +theori 1 +center 1 +closur 1 +space 1 +whichexploit 1 +notion 1 +deduct 1 +oper 1 +andon 1 +intension 1 +model 1 +classic 1 +mathemat 1 +significantearli 1 +research 1 +design 1 +especi 1 +missil 1 +vehicl 1 +applic 1 +trackingalgorithm 1 +track 1 +scan 1 +radar 1 +function 1 +complet 1 +inmani 1 +valu 1 +delai 1 +metatheori 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..6b71c2d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,140 @@ +aloysiu 1 +mokassoci 1 +professorfaculti 1 +fellow 1 +comput 1 +scienc 1 +electr 1 +engin 1 +massachusett 1 +institut 1 +technolog 1 +profession 1 +servic 1 +associ 1 +editor 1 +real 1 +time 1 +system 1 +intern 1 +journal 1 +critic 1 +present 1 +editori 1 +board 1 +formal 1 +method 1 +systemdesign 1 +program 1 +committe 1 +symposium 1 +taiwan 1 +vice 1 +chair 1 +ieee 1 +technic 1 +work 1 +group 1 +federationof 1 +automat 1 +control 1 +presentarea 1 +interestfault 1 +toler 1 +hard 1 +architectur 1 +aid 1 +design 1 +tool 1 +softwar 1 +summari 1 +researchi 1 +current 1 +conduct 1 +fundament 1 +research 1 +area 1 +ofdistribut 1 +primari 1 +concern 1 +includespecif 1 +techniqu 1 +algorithm 1 +forguarante 1 +stringent 1 +constraint 1 +understand 1 +thetrad 1 +robust 1 +respons 1 +criticalsystem 1 +goal 1 +develop 1 +framework 1 +autom 1 +theanalysi 1 +synthesi 1 +applic 1 +areasinclud 1 +robot 1 +avion 1 +industrialprocess 1 +fund 1 +provid 1 +offic 1 +ofnav 1 +highli 1 +environ 1 +forreal 1 +select 1 +recent 1 +publicationsa 1 +toward 1 +mechan 1 +foundat 1 +specif 1 +tilborg 1 +kluwer 1 +academ 1 +publish 1 +heitmey 1 +labaw 1 +clement 1 +case 1 +support 1 +proceed 1 +fifth 1 +workshop 1 +montreal 1 +juli 1 +wang 1 +emerson 1 +asynchron 1 +distribut 1 +aptl 1 +confer 1 +melbourn 1 +load 1 +adjust 1 +adapt 1 +antonio 1 +decemb 1 +tsou 1 +brown 1 +analysi 1 +bound 1 +nasa 1 +expert 1 +sigsoft 1 +orlean 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..fc83aca1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,75 @@ +hamilton 1 +richard 1 +senior 1 +lecturerb 1 +engin 1 +appli 1 +physic 1 +harvard 1 +collegem 1 +aero 1 +astronaut 1 +stanford 1 +universityph 1 +comput 1 +scienc 1 +iowa 1 +state 1 +universityprofession 1 +servicecoordin 1 +univers 1 +texa 1 +austin 1 +year 1 +program 1 +seri 1 +editor 1 +vol 1 +addison 1 +weslei 1 +area 1 +interest 1 +function 1 +concurr 1 +process 1 +object 1 +orient 1 +undergradu 1 +educationsummari 1 +research 1 +maintain 1 +long 1 +stand 1 +potentialfor 1 +suitabl 1 +formal 1 +reason 1 +infal 1 +us 1 +languag 1 +teach 1 +sectionof 1 +work 1 +time 1 +permit 1 +implementationof 1 +real 1 +microcomput 1 +applic 1 +longer 1 +term 1 +project 1 +book 1 +onfunct 1 +algorithm 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..651b18e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,126 @@ +abraham 1 +silberschatz 1 +silberschatzprofessorship 1 +comput 1 +sciencesm 1 +stoni 1 +brookhonor 1 +award 1 +profession 1 +serviceiee 1 +societi 1 +outstand 1 +paper 1 +ieee 1 +journal 1 +advisori 1 +committe 1 +nation 1 +scienc 1 +foundat 1 +divis 1 +inform 1 +robot 1 +intellig 1 +system 1 +gener 1 +confer 1 +chair 1 +seventh 1 +eighth 1 +sigact 1 +sigmod 1 +symposiumon 1 +principl 1 +databas 1 +pod 1 +organ 1 +ullman 1 +invit 1 +workshop 1 +futureof 1 +research 1 +program 1 +symposium 1 +parallel 1 +distributedsystem 1 +intern 1 +knowledg 1 +manag 1 +area 1 +interest 1 +oper 1 +distribut 1 +basedsystemssummari 1 +main 1 +special 1 +concurr 1 +process 1 +recentresearch 1 +concentr 1 +multidatabas 1 +transactionmanag 1 +base 1 +real 1 +time 1 +databasesystem 1 +multiresolut 1 +continu 1 +media 1 +storag 1 +server 1 +high 1 +perform 1 +transact 1 +select 1 +recent 1 +publicationss 1 +ganguli 1 +tsur 1 +map 1 +datalog 1 +programexecut 1 +network 1 +processor 1 +knowledgeand 1 +data 1 +engin 1 +june 1 +jagadish 1 +lieuwen 1 +rastogi 1 +sudarshan 1 +dali 1 +memori 1 +internationalconfer 1 +larg 1 +septemb 1 +ozden 1 +biliri 1 +cost 1 +storageserv 1 +movi 1 +demand 1 +onveri 1 +framework 1 +storageand 1 +retriev 1 +conferenceon 1 +multimedia 1 +read 1 +fussel 1 +multi 1 +resolut 1 +relationaldata 1 +model 1 +august 1 +addit 1 +obtain 1 +fromindividu 1 +faculti 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..b7aaab8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,17 @@ +robert 1 +simmon 1 +simmonsquinci 1 +centenni 1 +professor 1 +emeritu 1 +comput 1 +scienc 1 +professoremeritu 1 +psychologymai 1 +novemb 1 +bledso 1 +rememb 1 +back 1 +list 1 +faculti 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..07df886f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,53 @@ +adam 1 +seligman 1 +home 1 +pageadam 1 +page 1 +click 1 +log 1 +gradual 1 +student 1 +austin 1 +program 1 +aweekli 1 +happi 1 +hour 1 +depart 1 +claim 1 +fame 1 +undergradu 1 +thesi 1 +specifiedth 1 +type 1 +rule 1 +oper 1 +semant 1 +core 1 +avail 1 +gzip 1 +fileor 1 +postscript 1 +file 1 +knowwhat 1 +think 1 +touch 1 +email 1 +utexa 1 +call 1 +pagemart 1 +graphic 1 +phone 1 +number 1 +read 1 +progress 1 +vrml 1 +paper 1 +new 1 +junki 1 +fromreut 1 +yahoo 1 +altern 1 +could 1 +check 1 +nando 1 +time 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..d73a5278 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,30 @@ +agapito 1 +sustaita 1 +univers 1 +texa 1 +austincognit 1 +scienc 1 +interest 1 +machin 1 +learn 1 +languag 1 +acquisit 1 +chill 1 +specif 1 +connection 1 +commonsens 1 +reasoningschoolingph 1 +comput 1 +austin 1 +hopefulli 1 +colleg 1 +station 1 +california 1 +santa 1 +barbara 1 +miscellaneouspost 1 +addressth 1 +depart 1 +mail 1 +utexa 1 +eduphon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..8bdf1073 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,90 @@ +artifici 1 +intellig 1 +laboratoryut 1 +laboratoryth 1 +laboratori 1 +atth 1 +univers 1 +texa 1 +austinha 1 +distinguish 1 +histori 1 +larg 1 +number 1 +excel 1 +faculti 1 +andgradu 1 +student 1 +new 1 +world 1 +report 1 +rank 1 +program 1 +nation 1 +close 1 +link 1 +comput 1 +scienc 1 +depart 1 +boyer 1 +autom 1 +theorem 1 +prove 1 +robert 1 +causei 1 +logic 1 +philosoph 1 +foundat 1 +benjamin 1 +kuiper 1 +qualit 1 +reason 1 +vladimir 1 +lifschitz 1 +action 1 +risto 1 +miikkulainen 1 +neural 1 +network 1 +mirank 1 +rule 1 +base 1 +system 1 +moonei 1 +machin 1 +learn 1 +gordon 1 +novak 1 +automat 1 +physic 1 +problem 1 +solv 1 +bruce 1 +porter 1 +multi 1 +function 1 +knowledg 1 +emeritu 1 +woodi 1 +bledso 1 +deceas 1 +dream 1 +aaai 1 +presidenti 1 +address 1 +simmon 1 +memoriam 1 +postdoc 1 +peter 1 +clark 1 +souther 1 +technic 1 +softwar 1 +directori 1 +current 1 +avail 1 +porterpoint 1 +lab 1 +fund 1 +agenciescontact 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..8d92d368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,40 @@ +ajita 1 +johnajita 1 +john 1 +candid 1 +parallel 1 +program 1 +group 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +hello 1 +research 1 +work 1 +system 1 +automat 1 +programmingframework 1 +base 1 +constraint 1 +compil 1 +parallelprocedur 1 +advisor 1 +professor 1 +brownemi 1 +papersmi 1 +us 1 +translat 1 +routin 1 +code 1 +want 1 +contact 1 +postal 1 +usavoic 1 +main 1 +offic 1 +taylor 1 +ajohn 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..d19fbbd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,112 @@ +vicki 1 +almstrum 1 +utc 1 +home 1 +page 1 +almstrumabout 1 +educ 1 +comput 1 +scientist 1 +interest 1 +understand 1 +peopl 1 +learn 1 +particularli 1 +interestedin 1 +mathemat 1 +logic 1 +formal 1 +method 1 +doctoralresearch 1 +topic 1 +limit 1 +mathematicallog 1 +novic 1 +scienc 1 +student 1 +lectur 1 +univers 1 +texa 1 +austin 1 +addit 1 +ispent 1 +fall 1 +semest 1 +teach 1 +uppsala 1 +sweden 1 +pagether 1 +link 1 +includ 1 +encourag 1 +other 1 +excel 1 +computersci 1 +garden 1 +travel 1 +craft 1 +sew 1 +woodwork 1 +pictur 1 +hubbi 1 +torgni 1 +stadler 1 +check 1 +site 1 +itics 1 +confer 1 +integr 1 +technolog 1 +educationjun 1 +work 1 +group 1 +june 1 +swedenoth 1 +maintain 1 +class 1 +camp 1 +field 1 +research 1 +evalu 1 +mentor 1 +issu 1 +jump 1 +point 1 +area 1 +suffer 1 +spurt 1 +construct 1 +frenzi 1 +organ 1 +belong 1 +sigcs 1 +special 1 +educationsigsoft 1 +softwar 1 +engineeringacm 1 +associ 1 +machineryieeeth 1 +institut 1 +electr 1 +electron 1 +engineerscpsrcomput 1 +profession 1 +social 1 +responsibilityconnect 1 +elsewhereto 1 +contact 1 +offic 1 +depart 1 +main 1 +direct 1 +seldom 1 +alwai 1 +connect 1 +need 1 +forewarn 1 +leav 1 +plenti 1 +time 1 +email 1 +address 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..755b199a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,15 @@ +hung 1 +hing 1 +anthoni 1 +pang 1 +home 1 +pagehung 1 +offic 1 +hour 1 +mondai 1 +wednesdai 1 +email 1 +utexa 1 +inform 1 +compil 1 +cours 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..fc986126 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,32 @@ +aruna 1 +homepag 1 +addalacurr 1 +graduat 1 +studentth 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +educ 1 +bachelor 1 +engin 1 +sciencess 1 +colleg 1 +engineeringmysorework 1 +experi 1 +lectur 1 +fall 1 +studi 1 +sciencesunivers 1 +mysoreindiai 1 +come 1 +mysor 1 +cityindiato 1 +contact 1 +email 1 +utexa 1 +eduvoic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..14cd756d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,21 @@ +home 1 +page 1 +ashi 1 +tarafdarashi 1 +tarafdarabout 1 +get 1 +round 1 +let 1 +exist 1 +known 1 +patienc 1 +pleas 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +austin 1 +usavoic 1 +main 1 +offic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..847927f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,64 @@ +roberto 1 +bayardo 1 +home 1 +pageroberto 1 +candid 1 +expect 1 +complet 1 +date 1 +fall 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +current 1 +also 1 +work 1 +within 1 +infosleuth 1 +project 1 +research 1 +interest 1 +queri 1 +process 1 +activ 1 +expert 1 +databas 1 +system 1 +data 1 +mine 1 +constraint 1 +satisfactionmi 1 +thesi 1 +advisor 1 +prof 1 +daniel 1 +mirank 1 +paper 1 +line 1 +along 1 +toolkit 1 +generatingand 1 +solv 1 +exception 1 +hard 1 +instanc 1 +contact 1 +inform 1 +mail 1 +address 1 +utexa 1 +campu 1 +dept 1 +taylor 1 +hall 1 +histori 1 +engin 1 +electr 1 +center 1 +coordin 1 +number 1 +sinc 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..0973a040 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,40 @@ +bert 1 +kayresearch 1 +reason 1 +refin 1 +imprecis 1 +model 1 +physic 1 +process 1 +overviewof 1 +research 1 +vitami 1 +network 1 +retriev 1 +paper 1 +dissert 1 +entitl 1 +behavior 1 +abstract 1 +stuffsonia 1 +andnina 1 +page 1 +drink 1 +ofth 1 +month 1 +springbank 1 +scotchdrinksof 1 +past 1 +contact 1 +informationemail 1 +address 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..d7a68001 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,39 @@ +home 1 +page 1 +bhanu 1 +welcom 1 +homepagethi 1 +akhil 1 +reddythank 1 +visit 1 +homepag 1 +visitor 1 +number 1 +school 1 +univers 1 +texa 1 +austinm 1 +comput 1 +scienc 1 +third 1 +semest 1 +coursesc 1 +multimedia 1 +system 1 +harrick 1 +vinc 1 +introduct 1 +mathemat 1 +logic 1 +vladimir 1 +lifschitz 1 +datacommun 1 +network 1 +anitish 1 +barua 1 +architectur 1 +schwetmani 1 +term 1 +project 1 +databas 1 +manag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..a6be078a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,228 @@ +michael 1 +bogomolnymichael 1 +bogomolni 1 +cogsci 1 +advert 1 +although 1 +pictur 1 +sometim 1 +feel 1 +read 1 +articl 1 +current 1 +first 1 +semest 1 +comput 1 +scienc 1 +program 1 +univers 1 +texa 1 +austin 1 +physic 1 +amherst 1 +colleg 1 +research 1 +interestsnot 1 +intend 1 +work 1 +jenef 1 +husman 1 +risk 1 +avers 1 +decis 1 +final 1 +project 1 +quarter 1 +outcom 1 +coin 1 +toss 1 +would 1 +probabl 1 +accept 1 +reject 1 +peopl 1 +fair 1 +bet 1 +well 1 +sound 1 +econom 1 +theori 1 +involv 1 +maxim 1 +util 1 +diminish 1 +return 1 +explain 1 +howev 1 +ask 1 +question 1 +formul 1 +take 1 +prevent 1 +manner 1 +respond 1 +differ 1 +exampl 1 +belov 1 +tverski 1 +kahneman 1 +taken 1 +almost 1 +verbatimfrom 1 +frame 1 +psycholog 1 +choic 1 +imagin 1 +prepar 1 +outbreak 1 +unusu 1 +diseas 1 +expect 1 +kill 1 +altern 1 +combat 1 +beenpropos 1 +assum 1 +exact 1 +scientif 1 +estim 1 +consequ 1 +programsar 1 +follow 1 +problem 1 +adopt 1 +save 1 +besav 1 +nobodi 1 +favor 1 +analog 1 +digitalif 1 +human 1 +brain 1 +made 1 +neuron 1 +fire 1 +depend 1 +level 1 +electrochem 1 +charg 1 +built 1 +axon 1 +make 1 +biolog 1 +foundat 1 +shaki 1 +ahead 1 +scream 1 +hypothesi 1 +wrong 1 +transfer 1 +inform 1 +particular 1 +wire 1 +either 1 +high 1 +voltag 1 +interpret 1 +accur 1 +process 1 +inaccur 1 +simpli 1 +come 1 +answer 1 +mistak 1 +subtract 1 +balanc 1 +checkbook 1 +rememb 1 +invalid 1 +telephon 1 +number 1 +mayb 1 +gave 1 +wasn 1 +real 1 +anoth 1 +stori 1 +nevertheless 1 +hard 1 +press 1 +point 1 +misfir 1 +account 1 +error 1 +correct 1 +lead 1 +incorrect 1 +result 1 +cognit 1 +even 1 +complet 1 +remind 1 +quot 1 +italic 1 +class 1 +append 1 +introduct 1 +cours 1 +graduat 1 +researchcognit 1 +sciencearitifici 1 +intelligencemathemat 1 +logictopolog 1 +ghrist 1 +oper 1 +system 1 +paper 1 +symbol 1 +differenti 1 +puzzl 1 +theorem 1 +prover 1 +contact 1 +email 1 +bogo 1 +utexa 1 +better 1 +send 1 +postcard 1 +phone 1 +postal 1 +address 1 +wilshir 1 +parkwai 1 +updat 1 +informationthi 1 +page 1 +written 1 +us 1 +text 1 +editor 1 +last 1 +insert 1 +empti 1 +promis 1 +construct 1 +soon 1 +suppos 1 +list 1 +hidden 1 +talentsdefinit 1 +quantum 1 +bogodynamicsdefinit 1 +sortwhil 1 +free 1 +look 1 +bogos 1 +bogomet 1 +bogu 1 +bogon 1 +filter 1 +flux 1 +bogotifi 1 +autobogotiphobia 1 +blinkenlight 1 +lasher 1 +pleas 1 +connect 1 +stupid 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..2c72b697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,186 @@ +home 1 +page 1 +robert 1 +boyerhom 1 +stephen 1 +boyer 1 +professor 1 +comput 1 +scienc 1 +mathemat 1 +philosophydepart 1 +univers 1 +texa 1 +austinhow 1 +reach 1 +mepap 1 +mail 1 +dept 1 +univ 1 +austin 1 +usaemail 1 +utexa 1 +edufax 1 +physic 1 +locationsclassescurriculum 1 +vitaeperson 1 +dataeducationpublicationshonorsjobsgradu 1 +studentsth 1 +moor 1 +prover 1 +also 1 +knowna 1 +nqthm 1 +photo 1 +recommend 1 +read 1 +project 1 +vote 1 +smart 1 +webth 1 +projectmi 1 +view 1 +undergradu 1 +educ 1 +john 1 +mccarthi 1 +pageth 1 +moffett 1 +build 1 +controversyni 1 +time 1 +articl 1 +mccune 1 +robbin 1 +algebra 1 +result 1 +andsom 1 +technic 1 +detail 1 +verif 1 +float 1 +point 1 +divis 1 +algorithm 1 +microprocessor 1 +wonder 1 +softwar 1 +licens 1 +polici 1 +permitsth 1 +public 1 +close 1 +zero 1 +administrativeoverhead 1 +short 1 +cours 1 +howthi 1 +work 1 +much 1 +intellectu 1 +properti 1 +thegreat 1 +book 1 +variou 1 +enumer 1 +thereof 1 +confess 1 +acanon 1 +thumper 1 +possibl 1 +end 1 +tenur 1 +universitiesstandard 1 +disclaim 1 +natur 1 +noth 1 +shouldb 1 +taken 1 +repres 1 +offici 1 +posit 1 +oftexa 1 +part 1 +govern 1 +state 1 +furthermor 1 +steal 1 +joke 1 +peter 1 +deutsch 1 +aweb 1 +own 1 +anind 1 +endors 1 +everyth 1 +formal 1 +method 1 +alwai 1 +riski 1 +peano 1 +first 1 +call 1 +symbol 1 +logic 1 +introduc 1 +instanc 1 +mean 1 +habitu 1 +wrote 1 +hislectur 1 +note 1 +teach 1 +militaryacademi 1 +student 1 +incens 1 +hisformalist 1 +approach 1 +rebel 1 +despit 1 +hispromis 1 +pass 1 +fire 1 +subsequ 1 +found 1 +amor 1 +congeni 1 +set 1 +turin 1 +sincomplet 1 +theorem 1 +rudi 1 +rucker 1 +death 1 +fundament 1 +verg 1 +extinct 1 +said 1 +harold 1 +kroto 1 +britain 1 +sussex 1 +share 1 +chemistrypr 1 +curl 1 +richard 1 +smallei 1 +rice 1 +inhouston 1 +discoveri 1 +carbon 1 +atom 1 +bound 1 +shape 1 +asocc 1 +ball 1 +scientist 1 +lament 1 +loss 1 +fund 1 +associ 1 +press 1 +decemb 1 +daili 1 +texan 1 +upup 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..ecdf5713 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,156 @@ +jame 1 +brown 1 +brownereg 1 +chair 1 +comput 1 +scienc 1 +professor 1 +physic 1 +electr 1 +engin 1 +hendrix 1 +collegeph 1 +univers 1 +texa 1 +austinhonor 1 +award 1 +fellow 1 +british 1 +societi 1 +american 1 +societyarea 1 +interestparallel 1 +major 1 +focu 1 +parallel 1 +program 1 +high 1 +level 1 +specif 1 +languag 1 +integr 1 +sciencewith 1 +applic 1 +area 1 +summari 1 +researchi 1 +work 1 +tenyear 1 +computation 1 +orient 1 +displai 1 +environ 1 +code 1 +anabstract 1 +declar 1 +graphic 1 +evolv 1 +three 1 +gener 1 +ongo 1 +research 1 +includesmethod 1 +optim 1 +structur 1 +highlevel 1 +abstract 1 +throughdata 1 +partit 1 +data 1 +flow 1 +model 1 +debug 1 +visual 1 +compositionalapproach 1 +addit 1 +intelligenceprocess 1 +control 1 +fluiddynam 1 +also 1 +design 1 +develop 1 +narrow 1 +domaincompil 1 +includ 1 +logic 1 +basedlanguag 1 +robust 1 +method 1 +intellig 1 +real 1 +timedecis 1 +system 1 +select 1 +recent 1 +publicationsj 1 +hyder 1 +dongarra 1 +moor 1 +newton 1 +ieee 1 +distribut 1 +technolog 1 +spring 1 +volum 1 +number 1 +compar 1 +henc 1 +technic 1 +report 1 +dept 1 +univ 1 +austin 1 +longer 1 +version 1 +paper 1 +refer 1 +werth 1 +interact 1 +formal 1 +andpract 1 +proceed 1 +fourthworkshop 1 +compil 1 +santacruz 1 +california 1 +august 1 +jain 1 +experiment 1 +studi 1 +theeffect 1 +ofth 1 +siam 1 +confer 1 +process 1 +mirank 1 +parallelizingcompil 1 +rule 1 +base 1 +intern 1 +unifi 1 +concurr 1 +kleyn 1 +specifi 1 +graph 1 +softwar 1 +baltimor 1 +april 1 +postscript 1 +file 1 +extend 1 +proc 1 +conf 1 +supercomput 1 +juli 1 +describ 1 +prototyp 1 +implement 1 +notat 1 +chang 1 +idea 1 +remain 1 +good 1 +broad 1 +introduct 1 +brief 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..6b41dc05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,70 @@ +utc 1 +home 1 +page 1 +vlsi 1 +research 1 +group 1 +addressdepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +peopl 1 +supervis 1 +prof 1 +martin 1 +wong 1 +member 1 +chang 1 +chung 1 +ping 1 +chenyao 1 +chen 1 +yung 1 +ming 1 +fang 1 +depart 1 +shashidhar 1 +thakur 1 +zhou 1 +researchth 1 +current 1 +interest 1 +wide 1 +rang 1 +area 1 +broadli 1 +classifi 1 +follow 1 +fpga 1 +placement 1 +rout 1 +architectur 1 +partit 1 +logic 1 +synthesi 1 +issu 1 +high 1 +perform 1 +abstract 1 +recent 1 +public 1 +groupcan 1 +found 1 +trace 1 +link 1 +sigda 1 +special 1 +design 1 +autom 1 +ieee 1 +institut 1 +electr 1 +electron 1 +engin 1 +inform 1 +comment 1 +austinclick 1 +mail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..fe9431da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,93 @@ +bill 1 +canfieldhom 1 +businessmi 1 +resum 1 +postscript 1 +spring 1 +give 1 +report 1 +softwar 1 +highli 1 +avail 1 +distribut 1 +system 1 +class 1 +slide 1 +talk 1 +effort 1 +mach 1 +implement 1 +flaviu 1 +cristian 1 +algorithm 1 +work 1 +done 1 +prof 1 +built 1 +guangtian 1 +current 1 +hardwar 1 +verif 1 +project 1 +ther 1 +divis 1 +ti 1 +research 1 +professor 1 +allen 1 +emerson 1 +pleasuredomest 1 +bliss 1 +depart 1 +photo 1 +wife 1 +carla 1 +newborn 1 +daughter 1 +ruth 1 +clair 1 +parenthood 1 +struck 1 +travel 1 +beer 1 +high 1 +prioriti 1 +somewher 1 +li 1 +enjoy 1 +peel 1 +label 1 +bottl 1 +foreign 1 +land 1 +humor 1 +variou 1 +sourcesth 1 +sofaspher 1 +haiku 1 +olestra 1 +approv 1 +substitut 1 +speak 1 +poetri 1 +interest 1 +women 1 +disinform 1 +dole 1 +canfield 1 +utexa 1 +last 1 +updat 1 +april 1 +thank 1 +todd 1 +peter 1 +peterst 1 +mail 1 +mani 1 +link 1 +andth 1 +home 1 +pictur 1 +cool 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..dac25ec9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,61 @@ +carruth 1 +carruthpleas 1 +send 1 +mail 1 +utexa 1 +question 1 +suggest 1 +introduct 1 +candid 1 +austin 1 +depart 1 +computersci 1 +supervis 1 +professor 1 +jayadev 1 +misra 1 +mydissert 1 +topic 1 +real 1 +time 1 +uniti 1 +member 1 +research 1 +group 1 +extend 1 +theori 1 +order 1 +express 1 +finit 1 +boundson 1 +usual 1 +oper 1 +progress 1 +safeti 1 +alsointerest 1 +function 1 +program 1 +languag 1 +partial 1 +ordersemant 1 +autom 1 +theorem 1 +prove 1 +contact 1 +inform 1 +person 1 +home 1 +page 1 +offic 1 +address 1 +phone 1 +email 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +univers 1 +texa 1 +link 1 +world 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..ac894988 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,43 @@ +chung 1 +ping 1 +chen 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +fiance 1 +meng 1 +tsai 1 +current 1 +intel 1 +summer 1 +intern 1 +work 1 +bufferinsert 1 +problem 1 +syllabu 1 +grade 1 +polici 1 +exam 1 +schedul 1 +homework 1 +exercis 1 +offic 1 +hour 1 +locat 1 +new 1 +utexa 1 +class 1 +fall 1 +syllabustopicschung 1 +clen 1 +last 1 +updat 1 +idea 1 +improv 1 +page 1 +send 1 +suggest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..6c77e86c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,36 @@ +home 1 +page 1 +deji 1 +chen 1 +chenabout 1 +mehello 1 +homepag 1 +student 1 +tongji 1 +univers 1 +shanghai 1 +chinaa 1 +bullet 1 +list 1 +easi 1 +includ 1 +well 1 +first 1 +item 1 +anoth 1 +third 1 +paragraph 1 +forget 1 +break 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +austin 1 +usahom 1 +lake 1 +blvd 1 +usaphon 1 +main 1 +offic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..c045a635 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,139 @@ +cliff 1 +chaputcliff 1 +chaputth 1 +univers 1 +texa 1 +austindepart 1 +comput 1 +sciencestaylor 1 +hall 1 +austin 1 +robotlab 1 +home 1 +dullchaput 1 +utexa 1 +studi 1 +northwestern 1 +gothimself 1 +emploi 1 +programm 1 +anywai 1 +spent 1 +year 1 +write 1 +anemail 1 +client 1 +portabl 1 +visual 1 +object 1 +librari 1 +odesta 1 +system 1 +corpor 1 +left 1 +institut 1 +thelearn 1 +scienc 1 +hewrot 1 +educ 1 +trane 1 +softwar 1 +macintosh 1 +common 1 +lisp 1 +thenimpl 1 +simul 1 +environ 1 +high 1 +school 1 +studentscal 1 +gamesproject 1 +graduat 1 +student 1 +program 1 +hang 1 +robot 1 +labannoi 1 +peopl 1 +hair 1 +brain 1 +scheme 1 +mean 1 +symbol 1 +represent 1 +artifici 1 +life 1 +sleep 1 +dream 1 +read 1 +fiction 1 +listen 1 +farka 1 +tour 1 +medeski 1 +martin 1 +wood 1 +watch 1 +mstk 1 +rerun 1 +plai 1 +korg 1 +ride 1 +bike 1 +turnon 1 +includ 1 +breakfast 1 +version 1 +raspi 1 +voic 1 +starfleet 1 +captain 1 +turnoff 1 +republican 1 +microsoft 1 +hangov 1 +fave 1 +site 1 +current 1 +eventsdaili 1 +new 1 +reutersintellicast 1 +weatheraustin 1 +txchicago 1 +ilperiodicalssucksalonmirski 1 +worst 1 +webth 1 +onionmacweekmacuserreferencehypertext 1 +webster 1 +interfaceyahooalta 1 +vistacardiff 1 +movi 1 +databaselyco 1 +road 1 +mapalt 1 +culturemacintosh 1 +dataappl 1 +computercyberdogquicktimequickdraw 1 +dappl 1 +supportmacintouchmacintosh 1 +resourcecyberdog 1 +poundinfo 1 +archiv 1 +rootcool 1 +weird 1 +stufffringewareth 1 +actlabpbsnprnow 1 +catch 1 +phrase 1 +catalogpap 1 +softwareth 1 +rsumsymbol 1 +emerg 1 +groundingrobotmap 1 +peopledav 1 +falooncharl 1 +lewisjeff 1 +lindjeff 1 +sherwoodbrian 1 +slatorsandi 1 +stone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..2d9cc29c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,104 @@ +chuanjun 1 +wang 1 +homepag 1 +welcom 1 +page 1 +pictur 1 +captur 1 +gloriou 1 +moment 1 +came 1 +earth 1 +stun 1 +detail 1 +place 1 +origin 1 +come 1 +orient 1 +countri 1 +call 1 +china 1 +check 1 +know 1 +hometown 1 +hubei 1 +provinc 1 +graduat 1 +student 1 +tsinghua 1 +univ 1 +decid 1 +time 1 +chang 1 +better 1 +thought 1 +texa 1 +end 1 +beautifulunivers 1 +austin 1 +current 1 +work 1 +comput 1 +scienc 1 +take 1 +break 1 +read 1 +enjoi 1 +view 1 +tower 1 +opinion 1 +nifti 1 +thing 1 +like 1 +televis 1 +surf 1 +mind 1 +numb 1 +faceless 1 +howev 1 +find 1 +brilliant 1 +us 1 +materi 1 +inform 1 +search 1 +miner 1 +diamond 1 +among 1 +million 1 +rock 1 +unemploi 1 +internet 1 +philosoph 1 +well 1 +person 1 +look 1 +real 1 +unix 1 +program 1 +magazin 1 +jump 1 +dobb 1 +journal 1 +word 1 +need 1 +fresh 1 +world 1 +hard 1 +fine 1 +graphic 1 +design 1 +unusu 1 +prose 1 +cours 1 +list 1 +would 1 +complet 1 +without 1 +link 1 +pope 1 +porsch 1 +write 1 +return 1 +depart 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..751c20d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,103 @@ +welcom 1 +homepag 1 +chin 1 +tser 1 +huang 1 +last 1 +updat 1 +decemb 1 +educ 1 +june 1 +degre 1 +dept 1 +comput 1 +scienc 1 +inform 1 +engin 1 +nation 1 +taiwan 1 +univers 1 +taipei 1 +current 1 +master 1 +student 1 +depart 1 +texa 1 +austin 1 +research 1 +interest 1 +natur 1 +languag 1 +process 1 +human 1 +interfac 1 +network 1 +distribut 1 +systemsexperiencei 1 +ever 1 +work 1 +chines 1 +knowledg 1 +group 1 +instituteof 1 +academia 1 +sinica 1 +assist 1 +major 1 +worki 1 +design 1 +system 1 +capabl 1 +word 1 +segment 1 +categori 1 +tag 1 +usinghidden 1 +markov 1 +model 1 +improv 1 +user 1 +friendli 1 +tool 1 +allow 1 +toexecut 1 +line 1 +proof 1 +read 1 +result 1 +automat 1 +automatictag 1 +reach 1 +accuraci 1 +improvedbecaus 1 +continu 1 +expans 1 +train 1 +data 1 +person 1 +interestsmovi 1 +book 1 +music 1 +literatur 1 +semiolog 1 +basebal 1 +basketbal 1 +tabl 1 +tenni 1 +pinbal 1 +favorit 1 +siteschina 1 +timesminsheng 1 +dailyth 1 +york 1 +timesusa 1 +todayth 1 +economistth 1 +atlant 1 +monthlymak 1 +contact 1 +chuang 1 +utexa 1 +edufing 1 +meyou 1 +visitor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..29ff1ab9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,23 @@ +cilkcilkcilk 1 +pronounc 1 +silk 1 +parallel 1 +multithread 1 +base 1 +languageand 1 +runtim 1 +system 1 +find 1 +time 1 +us 1 +inform 1 +inthi 1 +page 1 +check 1 +thecilk 1 +last 1 +modifi 1 +august 1 +robert 1 +blumoferdb 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..21651df5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,33 @@ +chung 1 +keung 1 +poon 1 +home 1 +page 1 +poondepart 1 +comput 1 +sciencesunivers 1 +texa 1 +austinaustin 1 +offic 1 +ckpoon 1 +utexa 1 +edumi 1 +plan 1 +hungri 1 +fish 1 +askvinc 1 +gogan 1 +pleas 1 +thesi 1 +complex 1 +connect 1 +problemsom 1 +interest 1 +site 1 +theoret 1 +scienc 1 +hong 1 +kong 1 +harmonica 1 +high 1 +school 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..14536d4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,62 @@ +home 1 +page 1 +chung 1 +wongchung 1 +wonglast 1 +modifi 1 +graduat 1 +student 1 +thedepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +member 1 +thenetwork 1 +research 1 +labwhich 1 +head 1 +byprof 1 +simon 1 +relat 1 +link 1 +java 1 +secur 1 +project 1 +nist 1 +divis 1 +resourc 1 +clearinghous 1 +role 1 +base 1 +access 1 +control 1 +rbac 1 +prof 1 +rivest 1 +cryptographi 1 +contact 1 +meemail 1 +ckwong 1 +utexa 1 +edupost 1 +usavoic 1 +offic 1 +dept 1 +hyde 1 +park 1 +baptist 1 +church 1 +chines 1 +mission 1 +hong 1 +kong 1 +associ 1 +linux 1 +netbsd 1 +freebsd 1 +openbsd 1 +send 1 +email 1 +tockwong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..20da06c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,75 @@ +clanci 1 +clancyresearch 1 +qualit 1 +reason 1 +us 1 +incomplet 1 +knowledg 1 +comput 1 +descriptionof 1 +possibl 1 +behavior 1 +dynam 1 +system 1 +complex 1 +containinga 1 +larg 1 +number 1 +variabl 1 +constraint 1 +simul 1 +frequentlyi 1 +intract 1 +result 1 +incomprehens 1 +descript 1 +abstract 1 +aggreg 1 +techniqu 1 +requir 1 +simulationto 1 +elimin 1 +irrelev 1 +detail 1 +focu 1 +distinctionsof 1 +interest 1 +develop 1 +whichaddress 1 +problem 1 +particular 1 +abstractiontechniqu 1 +automat 1 +appli 1 +thiswil 1 +facilit 1 +integr 1 +withlarg 1 +scale 1 +base 1 +model 1 +build 1 +followingtechniqu 1 +address 1 +issu 1 +vita 1 +list 1 +network 1 +retriev 1 +real 1 +paper 1 +contact 1 +informationemail 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +depart 1 +scienc 1 +univers 1 +texa 1 +austin 1 +finger 1 +inform 1 +hotlist 1 +netscap 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..55c659f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,11 @@ +page 1 +construct 1 +jimbo 1 +click 1 +three 1 +four 1 +five 1 +seven 1 +eight 1 +nine 1 +eleven 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..da2fd91d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,38 @@ +chri 1 +chuwelcom 1 +home 1 +page 1 +myselfmi 1 +photo 1 +student 1 +phone 1 +number 1 +address 1 +call 1 +offic 1 +mail 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +interest 1 +link 1 +chines 1 +campu 1 +christian 1 +fellowship 1 +church 1 +hong 1 +kong 1 +china 1 +author 1 +chuemail 1 +cnchu 1 +utexa 1 +edulast 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..be1a393f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,179 @@ +code 1 +visual 1 +parallel 1 +program 1 +systemmast 1 +lawless 1 +scienc 1 +codeless 1 +myriad 1 +preced 1 +wilder 1 +singl 1 +instanc 1 +alfr 1 +lord 1 +tennysoncod 1 +system 1 +allow 1 +user 1 +compos 1 +sequentialprogram 1 +direct 1 +graph 1 +wheredata 1 +flow 1 +arc 1 +connect 1 +node 1 +repres 1 +sequenti 1 +thesequenti 1 +written 1 +languag 1 +produc 1 +parallelprogram 1 +varieti 1 +architectur 1 +model 1 +independ 1 +click 1 +screen 1 +shot 1 +tutori 1 +base 1 +network 1 +machin 1 +well 1 +sequent 1 +symmetri 1 +newest 1 +version 1 +releas 1 +avail 1 +support 1 +crai 1 +smp 1 +announc 1 +free 1 +download 1 +softwar 1 +major 1 +revis 1 +featur 1 +sophist 1 +interfac 1 +provid 1 +mani 1 +improv 1 +previou 1 +make 1 +easier 1 +pleasant 1 +includ 1 +like 1 +macdraw 1 +multipl 1 +window 1 +subgraph 1 +edit 1 +hierarchi 1 +browser 1 +articl 1 +hpcwire 1 +line 1 +journal 1 +high 1 +perform 1 +comput 1 +recent 1 +publish 1 +entitl 1 +come 1 +kind 1 +enough 1 +reproduc 1 +introduct 1 +us 1 +mail 1 +list 1 +current 1 +prospect 1 +notifi 1 +backend 1 +join 1 +fill 1 +form 1 +also 1 +ad 1 +first 1 +name 1 +last 1 +address 1 +relat 1 +xcodelib 1 +compon 1 +librari 1 +document 1 +publicationscod 1 +construct 1 +directori 1 +compress 1 +postscript 1 +file 1 +made 1 +lieu 1 +prepar 1 +stage 1 +despit 1 +chang 1 +manual 1 +still 1 +quit 1 +refer 1 +public 1 +link 1 +contact 1 +informationfor 1 +specif 1 +comment 1 +regard 1 +send 1 +emeri 1 +berger 1 +utexa 1 +snail 1 +group 1 +member 1 +depart 1 +univers 1 +texa 1 +austin 1 +research 1 +groupgroup 1 +leaderprofessor 1 +jame 1 +brown 1 +affili 1 +faculti 1 +john 1 +werth 1 +project 1 +manag 1 +bergerstud 1 +dwip 1 +banerje 1 +incorpor 1 +dynam 1 +data 1 +partit 1 +ajita 1 +develop 1 +constraint 1 +automat 1 +alumni 1 +overview 1 +home 1 +page 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..3b3dcf16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,35 @@ +steve 1 +correlstev 1 +correlresearchph 1 +student 1 +work 1 +multifunct 1 +knowledg 1 +base 1 +group 1 +addit 1 +inform 1 +current 1 +construct 1 +hotlist 1 +search 1 +site 1 +page 1 +email 1 +address 1 +public 1 +tech 1 +reportcontact 1 +mail 1 +correl 1 +utexa 1 +offic 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +austin 1 +taylor 1 +hall 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..2c277ed7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,75 @@ +carlo 1 +pucholcarlo 1 +pucholresearch 1 +interest 1 +respons 1 +real 1 +time 1 +reactiv 1 +system 1 +gener 1 +formal 1 +method 1 +specif 1 +implement 1 +distribut 1 +control 1 +robot 1 +esterel 1 +synchron 1 +program 1 +languag 1 +mawl 1 +applic 1 +develop 1 +check 1 +utc 1 +group 1 +home 1 +page 1 +publicationsi 1 +list 1 +public 1 +avail 1 +forbrows 1 +softwareth 1 +tempest 1 +toolset 1 +packag 1 +verifyingsafeti 1 +properti 1 +written 1 +wrote 1 +half 1 +linux 1 +devic 1 +driver 1 +thequantavisionfram 1 +grabber 1 +part 1 +thejoystickdevic 1 +contact 1 +informationoffic 1 +dreal 1 +taylor 1 +hall 1 +offic 1 +univers 1 +texa 1 +austindepart 1 +comput 1 +sciencesaustin 1 +utexa 1 +austin 1 +lot 1 +phun 1 +interestsmemb 1 +theth 1 +latest 1 +interesti 1 +origin 1 +fromgandia 1 +inth 1 +provinc 1 +valencia 1 +spain 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..b506bdd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,79 @@ +utc 1 +real 1 +time 1 +system 1 +research 1 +groupth 1 +group 1 +head 1 +byprof 1 +aloysiu 1 +past 1 +year 1 +work 1 +toward 1 +lai 1 +groundworkfor 1 +establish 1 +firm 1 +theoret 1 +foundat 1 +systemsand 1 +also 1 +build 1 +design 1 +tool 1 +base 1 +canb 1 +categor 1 +three 1 +area 1 +follow 1 +specif 1 +model 1 +precis 1 +formul 1 +properti 1 +analysi 1 +verif 1 +reason 1 +synthesi 1 +enforc 1 +stringent 1 +constraint 1 +project 1 +logic 1 +modechart 1 +toolset 1 +editor 1 +verifi 1 +simul 1 +compil 1 +timetool 1 +scenario 1 +languagepublicationsabstract 1 +ofth 1 +paper 1 +availableonlin 1 +postscript 1 +current 1 +member 1 +deji 1 +chen 1 +carlo 1 +puchol 1 +doug 1 +stuart 1 +chung 1 +tsou 1 +guangtian 1 +wang 1 +yangalumni 1 +paul 1 +clement 1 +chih 1 +farn 1 +supoj 1 +suthandavibul 1 +farnam 1 +jahanian 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..62f48dec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,118 @@ +cindi 1 +thompsoncindi 1 +thompsonmachin 1 +learn 1 +research 1 +groupunivers 1 +texa 1 +austini 1 +particip 1 +candlelight 1 +vigil 1 +across 1 +internetto 1 +help 1 +increas 1 +awar 1 +violenc 1 +women 1 +researchmi 1 +current 1 +interest 1 +artifici 1 +intellig 1 +primarilyin 1 +area 1 +machin 1 +specif 1 +interestedin 1 +natur 1 +languag 1 +acquisit 1 +produc 1 +deep 1 +semanticrepresent 1 +input 1 +sentenc 1 +would 1 +us 1 +mani 1 +task 1 +propos 1 +corpu 1 +base 1 +lexic 1 +wrote 1 +master 1 +thesi 1 +system 1 +rule 1 +suitabl 1 +diagnost 1 +expert 1 +also 1 +mobil 1 +robot 1 +exhibit 1 +atrobofest 1 +spring 1 +semest 1 +build 1 +agent 1 +finger 1 +inform 1 +pictur 1 +vita 1 +list 1 +public 1 +page 1 +group 1 +educ 1 +comput 1 +scienc 1 +univers 1 +austin 1 +north 1 +carolina 1 +state 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +cthomp 1 +utexa 1 +postal 1 +depart 1 +hotlist 1 +start 1 +point 1 +internet 1 +explor 1 +misc 1 +consortium 1 +collect 1 +bibliographi 1 +project 1 +resourc 1 +associ 1 +repositori 1 +knowledg 1 +laboratori 1 +home 1 +georgia 1 +tech 1 +journal 1 +linguist 1 +folk 1 +cognit 1 +miscellan 1 +stuff 1 +wolv 1 +truth 1 +evalu 1 +counsel 1 +expand 1 +horizon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..e78237fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,18 @@ +home 1 +page 1 +xingshan 1 +welcom 1 +browser 1 +doesn 1 +seem 1 +support 1 +frame 1 +want 1 +downloadth 1 +latest 1 +netscap 1 +school 1 +work 1 +famili 1 +friend 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..630efe86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,71 @@ +mike 1 +dahlin 1 +dahlingener 1 +informationassist 1 +professor 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +berkelei 1 +electr 1 +engin 1 +rice 1 +teachingfal 1 +oper 1 +systemsspr 1 +advanc 1 +architectureeveryon 1 +read 1 +technic 1 +classic 1 +researchxf 1 +serverless 1 +network 1 +file 1 +systemweb 1 +systemsth 1 +experiment 1 +softwar 1 +system 1 +less 1 +public 1 +list 1 +informationtechnolog 1 +trend 1 +pagethi 1 +pagesummar 1 +recent 1 +technolog 1 +interest 1 +operatingsystem 1 +research 1 +compter 1 +architect 1 +includinghistor 1 +data 1 +gather 1 +price 1 +capac 1 +disk 1 +memori 1 +person 1 +informationif 1 +page 1 +seem 1 +bore 1 +probabl 1 +want 1 +work 1 +internet 1 +root 1 +link 1 +world 1 +email 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +postal 1 +austinaustin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..9c141323 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,50 @@ +home 1 +page 1 +damani 1 +howdi 1 +pagal 1 +dekho 1 +student 1 +busi 1 +read 1 +lazi 1 +updateth 1 +homepag 1 +regularli 1 +suffic 1 +anyth 1 +crazi 1 +appeal 1 +phrase 1 +us 1 +probabl 1 +meant 1 +research 1 +activ 1 +work 1 +parallel 1 +distribut 1 +sytem 1 +laboratori 1 +vijai 1 +garg 1 +interest 1 +system 1 +network 1 +public 1 +follow 1 +time 1 +honor 1 +tradit 1 +feel 1 +oblig 1 +providesometh 1 +servic 1 +contact 1 +mehom 1 +guadulp 1 +austin 1 +offic 1 +austinphon 1 +dept 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..da89ef1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,55 @@ +dane 1 +marshalldan 1 +marshal 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +research 1 +multiresolut 1 +render 1 +system 1 +autom 1 +model 1 +tree 1 +real 1 +time 1 +global 1 +illumin 1 +electromechan 1 +pinbal 1 +machin 1 +mainten 1 +view 1 +thelogist 1 +equat 1 +escap 1 +attractor 1 +complex 1 +plane 1 +main 1 +area 1 +make 1 +nice 1 +imag 1 +contact 1 +inform 1 +work 1 +address 1 +appli 1 +laboratori 1 +burnet 1 +phone 1 +email 1 +utexa 1 +school 1 +unrel 1 +link 1 +pastur 1 +jupit 1 +probe 1 +happi 1 +station 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..1994ee85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,67 @@ +doug 1 +stuart 1 +welcom 1 +pagedoug 1 +home 1 +pagewelcom 1 +page 1 +construct 1 +bear 1 +sure 1 +number 1 +oflinksto 1 +interest 1 +place 1 +well 1 +inform 1 +aboutsport 1 +scienc 1 +fiction 1 +booksin 1 +gener 1 +fewjok 1 +testof 1 +latexhtml 1 +aweath 1 +mapandcondit 1 +austinandnew 1 +orlean 1 +guess 1 +sort 1 +us 1 +person 1 +archiv 1 +amgraci 1 +share 1 +perhap 1 +link 1 +process 1 +provid 1 +index 1 +puttingit 1 +simpl 1 +keep 1 +webbrows 1 +databas 1 +browser 1 +know 1 +thisi 1 +good 1 +idea 1 +go 1 +save 1 +someth 1 +justa 1 +easi 1 +access 1 +manner 1 +stuffmom 1 +click 1 +comput 1 +calendarlink 1 +video 1 +fictionbooksjokessportsfoodvideout 1 +libraryresumelast 1 +updat 1 +dasdastuart 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..9ee67009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,4 @@ +doug 1 +swhich 1 +annoi 1 +thisorthi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..317ffdd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,57 @@ +dian 1 +lawdian 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +research 1 +intereststh 1 +symbol 1 +ground 1 +problemnavig 1 +robot 1 +agent 1 +us 1 +neural 1 +network 1 +evolv 1 +theus 1 +genet 1 +algorithm 1 +educ 1 +universityof 1 +spanish 1 +literatur 1 +washingtonst 1 +fine 1 +art 1 +washington 1 +stateunivers 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +dianelaw 1 +utexa 1 +postal 1 +local 1 +link 1 +utc 1 +homepag 1 +home 1 +page 1 +gann 1 +illig 1 +santa 1 +institut 1 +digest 1 +archiv 1 +michigan 1 +group 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..1307d73c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,47 @@ +home 1 +page 1 +dionisi 1 +papadopoulosdionisi 1 +papadopoulosabout 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +undergradu 1 +engin 1 +informat 1 +univers 1 +patra 1 +greec 1 +also 1 +work 1 +technolog 1 +institut 1 +member 1 +softwar 1 +applic 1 +reasearch 1 +unit 1 +contact 1 +medionisi 1 +papadopoulo 1 +texa 1 +austin 1 +mail 1 +utexa 1 +link 1 +mine 1 +monitor 1 +databas 1 +homework 1 +panhellen 1 +associationpanathinaiko 1 +athlet 1 +clubgreek 1 +newshellen 1 +resourc 1 +networkeveryth 1 +alwai 1 +want 1 +know 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..f5d4c8e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,119 @@ +david 1 +zuckermandavid 1 +zuckermanassist 1 +professor 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +utexa 1 +postal 1 +depart 1 +finger 1 +cours 1 +fall 1 +cryptographyresearch 1 +intereststh 1 +role 1 +random 1 +complex 1 +theori 1 +expand 1 +applic 1 +walk 1 +graph 1 +cryptographi 1 +paragraph 1 +descript 1 +well 1 +myprofil 1 +annual 1 +report 1 +also 1 +look 1 +recent 1 +public 1 +asymptot 1 +good 1 +code 1 +correct 1 +insert 1 +delet 1 +transposit 1 +soda 1 +optim 1 +sampl 1 +extractor 1 +construct 1 +leader 1 +elect 1 +stoc 1 +multipl 1 +cover 1 +time 1 +structur 1 +algorithm 1 +appear 1 +linear 1 +space 1 +jcss 1 +preliminari 1 +version 1 +call 1 +determinist 1 +simul 1 +logspac 1 +us 1 +gener 1 +weak 1 +sourc 1 +algorithmica 1 +foc 1 +tight 1 +analys 1 +local 1 +load 1 +balanc 1 +derandom 1 +product 1 +revis 1 +beat 1 +eigenvalu 1 +bound 1 +explicit 1 +constructionand 1 +combinatorica 1 +utc 1 +technic 1 +effici 1 +small 1 +hit 1 +setfor 1 +combinatori 1 +rectangl 1 +high 1 +dimens 1 +lower 1 +mutual 1 +exclus 1 +sicomp 1 +unapproxim 1 +complet 1 +problem 1 +list 1 +abstract 1 +visit 1 +page 1 +sinc 1 +april 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..1135eda4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,70 @@ +multimedia 1 +home 1 +page 1 +distribut 1 +comput 1 +laboratori 1 +univers 1 +texa 1 +austin 1 +welcom 1 +main 1 +object 1 +research 1 +investig 1 +wide 1 +rangeof 1 +issu 1 +area 1 +system 1 +currentresearch 1 +focus 1 +design 1 +storag 1 +server 1 +network 1 +transport 1 +protocol 1 +digit 1 +audio 1 +video 1 +andmultiresolut 1 +databas 1 +multimediacomput 1 +dmcl 1 +part 1 +departmentof 1 +scienc 1 +sponsor 1 +work 1 +carri 1 +variou 1 +industri 1 +federalinstitut 1 +includ 1 +foundat 1 +intel 1 +nation 1 +nasa 1 +microsoft 1 +mitsubishi 1 +electr 1 +merl 1 +microsystemsinc 1 +tabl 1 +content 1 +agenda 1 +paper 1 +relev 1 +technic 1 +report 1 +list 1 +member 1 +call 1 +would 1 +like 1 +hear 1 +send 1 +yourcom 1 +suggest 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..0a20b4b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,85 @@ +batorydon 1 +batorysoftwar 1 +gener 1 +improv 1 +programm 1 +product 1 +reduc 1 +mainten 1 +cost 1 +enhanc 1 +applic 1 +perform 1 +student 1 +investig 1 +wai 1 +realiz 1 +practic 1 +domain 1 +specif 1 +compon 1 +base 1 +design 1 +methodolog 1 +technolog 1 +larg 1 +scale 1 +softwar 1 +system 1 +synthesi 1 +span 1 +topic 1 +architectur 1 +pattern 1 +extens 1 +languag 1 +subject 1 +model 1 +parameter 1 +program 1 +object 1 +orient 1 +framework 1 +current 1 +interest 1 +databas 1 +manag 1 +data 1 +structur 1 +avion 1 +research 1 +build 1 +support 1 +goal 1 +jakarta 1 +project 1 +preprocessor 1 +java 1 +would 1 +encapsul 1 +pluggabl 1 +fund 1 +darpa 1 +microsoft 1 +univers 1 +texa 1 +appli 1 +laboratori 1 +schlumberg 1 +public 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +batori 1 +utexa 1 +phone 1 +number 1 +postal 1 +austin 1 +depart 1 +comput 1 +scienc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..d57db074 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,41 @@ +home 1 +page 1 +dwip 1 +banerje 1 +photograph 1 +banerjeeabout 1 +methi 1 +info 1 +work 1 +code 1 +parallel 1 +programminggroup 1 +methodolog 1 +includ 1 +data 1 +partit 1 +graphicalparallel 1 +program 1 +system 1 +paper 1 +present 1 +theintern 1 +process 1 +symposium 1 +list 1 +favorit 1 +site 1 +insert 1 +know 1 +contact 1 +departmentpost 1 +comput 1 +scienc 1 +austin 1 +usavoic 1 +main 1 +offic 1 +homepost 1 +enfield 1 +road 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..d88d2c05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,50 @@ +emilio 1 +camahort 1 +gurrea 1 +promis 1 +set 1 +decent 1 +home 1 +page 1 +summer 1 +mmmmm 1 +multipl 1 +complaint 1 +meet 1 +previou 1 +deadlin 1 +come 1 +anoth 1 +excus 1 +know 1 +siggraph 1 +paper 1 +finish 1 +januari 1 +thing 1 +think 1 +bout 1 +right 1 +make 1 +time 1 +els 1 +lose 1 +credibl 1 +left 1 +first 1 +item 1 +third 1 +paragraph 1 +forget 1 +break 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +taylor 1 +austin 1 +usavoic 1 +main 1 +offic 1 +ecamahor 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..86e575f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,32 @@ +posnak 1 +graduat 1 +student 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +interest 1 +network 1 +oper 1 +system 1 +support 1 +multimedia 1 +work 1 +distribut 1 +multimediacomput 1 +laboratori 1 +head 1 +harrick 1 +research 1 +supervis 1 +greg 1 +lavend 1 +isod 1 +consortium 1 +base 1 +view 1 +summari 1 +public 1 +utexa 1 +eduphon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..a3bd9284 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,73 @@ +allen 1 +emerson 1 +emersonbruton 1 +centenni 1 +professor 1 +comput 1 +scienc 1 +depart 1 +taylor 1 +hall 1 +univers 1 +texa 1 +austin 1 +mail 1 +utexa 1 +phone 1 +direct 1 +secretari 1 +area 1 +research 1 +interest 1 +formal 1 +method 1 +aid 1 +verif 1 +tempor 1 +logic 1 +automata 1 +infinit 1 +object 1 +concurr 1 +distribut 1 +systemsselect 1 +recent 1 +publications 1 +sistla 1 +srinivasan 1 +quantit 1 +reason 1 +journal 1 +real 1 +time 1 +system 1 +sadler 1 +effici 1 +satisfi 1 +calculu 1 +theori 1 +practic 1 +bakker 1 +york 1 +springer 1 +verlag 1 +lectur 1 +note 1 +jutla 1 +tree 1 +determinaci 1 +annual 1 +ieee 1 +symposium 1 +foundat 1 +foc 1 +juan 1 +modal 1 +handbook 1 +theoret 1 +leeuwen 1 +elsevi 1 +press 1 +amsterdam 1 +cambridg 1 +mass 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..5a07424d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,103 @@ +emeri 1 +berger 1 +home 1 +pageemeri 1 +person 1 +contact 1 +info 1 +mail 1 +address 1 +dept 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +univers 1 +texa 1 +austin 1 +phone 1 +work 1 +utexa 1 +system 1 +analyst 1 +parallel 1 +program 1 +groupi 1 +research 1 +group 1 +code 1 +visual 1 +inform 1 +page 1 +ticam 1 +composit 1 +materi 1 +also 1 +affili 1 +project 1 +click 1 +name 1 +tool 1 +search 1 +lyco 1 +databas 1 +file 1 +randomli 1 +select 1 +mirror 1 +near 1 +view 1 +unix 1 +user 1 +academ 1 +function 1 +haskel 1 +uttr 1 +abstract 1 +languag 1 +add 1 +object 1 +orient 1 +us 1 +concept 1 +known 1 +type 1 +class 1 +pure 1 +framework 1 +paper 1 +describ 1 +extens 1 +analyz 1 +accomplish 1 +well 1 +problem 1 +compress 1 +postscript 1 +html 1 +othermi 1 +youngest 1 +brother 1 +doug 1 +aspir 1 +artist 1 +graphic 1 +handiwork 1 +linksth 1 +systemtexbook 1 +textbook 1 +exchangegrac 1 +graduat 1 +repres 1 +associ 1 +last 1 +updat 1 +octob 1 +believ 1 +macintosh 1 +check 1 +http 1 +evangelist 1 +macaddict 1 +join 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..93f043a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,108 @@ +emma 1 +home 1 +page 1 +wuabout 1 +myselfhi 1 +welcom 1 +chines 1 +girl 1 +come 1 +august 1 +studi 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +interest 1 +china 1 +immedi 1 +degre 1 +zhongshan 1 +becam 1 +market 1 +repres 1 +inibm 1 +compani 1 +south 1 +branch 1 +try 1 +deliv 1 +solut 1 +small 1 +planet 1 +costom 1 +telecommun 1 +media 1 +industri 1 +would 1 +surpris 1 +find 1 +manyalumni 1 +enter 1 +alumni 1 +club 1 +nice 1 +thing 1 +attend 1 +graduat 1 +school 1 +lot 1 +intern 1 +opportun 1 +engin 1 +student 1 +semest 1 +work 1 +part 1 +time 1 +programm 1 +nation 1 +instrumentsinc 1 +cours 1 +schedul 1 +spring 1 +distribut 1 +computingmanag 1 +informationautomat 1 +program 1 +tool 1 +baseyahoogalaxi 1 +librari 1 +onlin 1 +universityyellow 1 +mini 1 +introduct 1 +us 1 +fortran 1 +tutori 1 +infoleisur 1 +timenewspagepeopl 1 +dailyartstim 1 +magazinechines 1 +magazinepc 1 +magazinec 1 +visit 1 +orlean 1 +houston 1 +antoniosan 1 +franciscomarina 1 +peac 1 +citysan 1 +jose 1 +capit 1 +silicon 1 +vallei 1 +love 1 +francisco 1 +contact 1 +pointemail 1 +emmawu 1 +utexa 1 +eduphon 1 +mail 1 +last 1 +date 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..3447f8fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,30 @@ +home 1 +page 1 +emilio 1 +remolinaemilio 1 +remolinaabout 1 +mehi 1 +first 1 +bullet 1 +list 1 +easi 1 +includ 1 +well 1 +item 1 +anoth 1 +third 1 +paragraph 1 +forget 1 +break 1 +curriculum 1 +vita 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +austin 1 +usavoic 1 +main 1 +offic 1 +eremolin 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..21b8278d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,26 @@ +levent 1 +sayfasi 1 +welcom 1 +home 1 +page 1 +erkok 1 +graduat 1 +student 1 +depart 1 +comput 1 +sciencesat 1 +universityof 1 +texa 1 +austin 1 +former 1 +locat 1 +inturkei 1 +person 1 +inform 1 +reach 1 +http 1 +ceng 1 +metu 1 +erkokto 1 +find 1 +thank 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..d090fc4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,39 @@ +esra 1 +erdem 1 +homepag 1 +student 1 +thedepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +educ 1 +depart 1 +engin 1 +andinform 1 +bilkent 1 +turkei 1 +area 1 +interest 1 +machin 1 +learninginduct 1 +logic 1 +program 1 +monoton 1 +reason 1 +topic 1 +cognit 1 +sciencelearningreason 1 +children 1 +theori 1 +mind 1 +commonsens 1 +reasoningknowledg 1 +representationemotionsphilosophi 1 +mindcontact 1 +inform 1 +postal 1 +voic 1 +mail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..b0d4e01f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,69 @@ +carl 1 +home 1 +pagestephen 1 +carlpardon 1 +dust 1 +current 1 +student 1 +work 1 +toward 1 +master 1 +art 1 +degre 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +thesi 1 +describ 1 +system 1 +perform 1 +syntact 1 +extens 1 +scheme 1 +program 1 +languag 1 +wasn 1 +alwai 1 +life 1 +myresum 1 +believ 1 +item 1 +person 1 +interest 1 +planmi 1 +resum 1 +research 1 +interestsa 1 +psuedo 1 +random 1 +collect 1 +linksth 1 +household 1 +daili 1 +dose 1 +thing 1 +world 1 +wide 1 +snow 1 +pike 1 +peak 1 +houston 1 +chronicl 1 +interact 1 +sport 1 +worth 1 +rice 1 +athlet 1 +march 1 +bandget 1 +touchpost 1 +austin 1 +usavoic 1 +main 1 +offic 1 +know 1 +esteban 1 +utexa 1 +edureturn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..7800f7e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,63 @@ +tara 1 +estlintara 1 +estlinmachin 1 +learn 1 +research 1 +groupth 1 +univers 1 +texa 1 +austinresearchcontrol 1 +knowledg 1 +improv 1 +perform 1 +problem 1 +solver 1 +byguid 1 +effici 1 +accur 1 +solut 1 +researchinvolv 1 +us 1 +combin 1 +analyt 1 +induct 1 +machinelearn 1 +techniqu 1 +acquir 1 +control 1 +inform 1 +amparticularli 1 +interest 1 +method 1 +theperform 1 +plan 1 +schedul 1 +system 1 +includ 1 +detail 1 +descript 1 +myresearch 1 +vita 1 +list 1 +public 1 +also 1 +check 1 +machin 1 +group 1 +page 1 +educ 1 +comput 1 +scienc 1 +austin 1 +tulan 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +estlin 1 +utexa 1 +postal 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..5295728f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,85 @@ +home 1 +page 1 +francoi 1 +barbanson 1 +utc 1 +versionhom 1 +versionthi 1 +locat 1 +directori 1 +spool 1 +user 1 +francoisabout 1 +mecurr 1 +research 1 +interest 1 +black 1 +forest 1 +cake 1 +central 1 +market 1 +genuin 1 +find 1 +real 1 +pastri 1 +fruit 1 +mouss 1 +austin 1 +pack 1 +groceri 1 +well 1 +swim 1 +forthcom 1 +trip 1 +shed 1 +lighton 1 +issu 1 +stop 1 +shop 1 +food 1 +women 1 +current 1 +crawl 1 +join 1 +foreign 1 +legion 1 +todai 1 +chines 1 +wisdom 1 +suggest 1 +watch 1 +plai 1 +basketbal 1 +hyogo 1 +japan 1 +check 1 +tank 1 +polic 1 +action 1 +atdominion 1 +hqcheck 1 +dilberti 1 +knew 1 +databas 1 +class 1 +would 1 +noth 1 +troubl 1 +mentionthat 1 +parallel 1 +comput 1 +contact 1 +mepost 1 +guadalup 1 +street 1 +suit 1 +texa 1 +voic 1 +theori 1 +number 1 +assum 1 +machin 1 +work 1 +mail 1 +utexa 1 +edufrancoi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..e7e4934d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,58 @@ +donald 1 +fussel 1 +trammel 1 +crow 1 +regent 1 +professor 1 +depart 1 +comput 1 +scienc 1 +director 1 +advanc 1 +technolog 1 +divis 1 +inform 1 +group 1 +appli 1 +research 1 +laboratori 1 +member 1 +engin 1 +center 1 +electr 1 +texa 1 +institut 1 +mathemat 1 +univers 1 +austin 1 +phone 1 +mail 1 +utexa 1 +eduinform 1 +http 1 +user 1 +fussellb 1 +social 1 +dartmouth 1 +collegem 1 +dalla 1 +area 1 +interest 1 +architectur 1 +graphic 1 +databas 1 +system 1 +design 1 +autom 1 +fault 1 +toler 1 +cours 1 +introduct 1 +journal 1 +public 1 +confer 1 +work 1 +progress 1 +current 1 +former 1 +student 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..475ff271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,33 @@ +ajit 1 +georgemi 1 +gener 1 +useless 1 +pagethi 1 +page 1 +go 1 +youand 1 +construct 1 +someth 1 +odd 1 +goodthat 1 +find 1 +anyth 1 +start 1 +research 1 +address 1 +georg 1 +wickersham 1 +lane 1 +austin 1 +gajit 1 +utexa 1 +eduher 1 +file 1 +softwar 1 +document 1 +foundus 1 +recent 1 +david 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..dac4445f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,27 @@ +geeta 1 +arora 1 +home 1 +page 1 +graduat 1 +student 1 +current 1 +year 1 +still 1 +try 1 +tofigur 1 +research 1 +undergrad 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +india 1 +contact 1 +mehom 1 +river 1 +oak 1 +medic 1 +art 1 +austin 1 +phone 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..f305a8e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,62 @@ +gokul 1 +home 1 +page 1 +final 1 +receiv 1 +countless 1 +flame 1 +gripe 1 +theexcess 1 +verbos 1 +decid 1 +thecollect 1 +wish 1 +mass 1 +democrat 1 +world 1 +putonli 1 +barest 1 +minimum 1 +adieu 1 +outpour 1 +critic 1 +head 1 +plakal 1 +hag 1 +hopey 1 +sleep 1 +easi 1 +untroubl 1 +conscienc 1 +send 1 +perfectli 1 +good 1 +untim 1 +demis 1 +actual 1 +quit 1 +want 1 +kind 1 +could 1 +merit 1 +vitriol 1 +click 1 +risk 1 +mayb 1 +comment 1 +help 1 +reinstat 1 +earlier 1 +signin 1 +lesscrit 1 +contact 1 +medic 1 +art 1 +austin 1 +visitor 1 +number 1 +suggest 1 +utexa 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..94bd45ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,176 @@ +gooti 1 +home 1 +page 1 +subramanyam 1 +intro 1 +past 1 +present 1 +like 1 +futur 1 +hideout 1 +visitor 1 +number 1 +welcom 1 +bold 1 +name 1 +start 1 +suggest 1 +continu 1 +read 1 +know 1 +hopefulli 1 +wont 1 +disappoint 1 +gold 1 +well 1 +born 1 +sept 1 +somebodi 1 +interest 1 +hyderabad 1 +place 1 +andhra 1 +pradesh 1 +peopl 1 +geographi 1 +southern 1 +state 1 +india 1 +curiou 1 +famili 1 +school 1 +join 1 +osmania 1 +univers 1 +colleg 1 +technolog 1 +bachelor 1 +chemic 1 +engin 1 +came 1 +contact 1 +vari 1 +background 1 +thought 1 +made 1 +friend 1 +alwai 1 +proud 1 +call 1 +batch 1 +nebraska 1 +lincoln 1 +great 1 +gala 1 +time 1 +becam 1 +addict 1 +american 1 +footbal 1 +except 1 +cold 1 +winter 1 +everi 1 +thing 1 +els 1 +wasjust 1 +studi 1 +year 1 +comput 1 +scienc 1 +transfer 1 +texa 1 +austin 1 +life 1 +enrol 1 +master 1 +program 1 +depart 1 +real 1 +cool 1 +hang 1 +around 1 +especi 1 +weather 1 +also 1 +usual 1 +love 1 +acad 1 +care 1 +list 1 +alphabet 1 +order 1 +abraham 1 +gokul 1 +kumar 1 +mehul 1 +neeraj 1 +shantanu 1 +shailesh 1 +vipin 1 +best 1 +keep 1 +chat 1 +mani 1 +make 1 +travel 1 +plai 1 +game 1 +anoth 1 +likechess 1 +question 1 +carrom 1 +board 1 +racquet 1 +ball 1 +tenni 1 +tabl 1 +cricket 1 +soccer 1 +watch 1 +book 1 +definetli 1 +text 1 +want 1 +check 1 +horoscop 1 +todai 1 +compatabil 1 +sign 1 +listen 1 +hindi 1 +song 1 +write 1 +would 1 +bore 1 +narrow 1 +option 1 +golden 1 +goe 1 +without 1 +sai 1 +control 1 +destini 1 +ever 1 +success 1 +cours 1 +attribut 1 +hardwork 1 +power 1 +good 1 +happen 1 +propos 1 +dispos 1 +pleas 1 +spend 1 +fill 1 +valuabl 1 +comment 1 +guest 1 +hide 1 +medic 1 +art 1 +utexa 1 +finger 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..a474651f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,108 @@ +posit 1 +statement 1 +goudaacm 1 +comput 1 +surveysa 1 +decemb 1 +http 1 +survei 1 +goudanetwork 1 +copyright 1 +associ 1 +machineri 1 +permiss 1 +citat 1 +page 1 +fornetwork 1 +protocol 1 +exact 1 +specificationsand 1 +pragmat 1 +implementationsmoham 1 +goudath 1 +univers 1 +texa 1 +austin 1 +depart 1 +sciencesaustin 1 +usagouda 1 +utexa 1 +user 1 +utc 1 +report 1 +profil 1 +gouda 1 +htmlabstract 1 +argu 1 +studi 1 +evolv 1 +bridgeth 1 +specif 1 +implement 1 +networkprotocol 1 +gener 1 +term 1 +network 1 +formal 1 +implementationsaddit 1 +word 1 +phrase 1 +compil 1 +softwar 1 +tool 1 +develop 1 +methodologypubl 1 +inform 1 +submiss 1 +date 1 +june 1 +revis 1 +octob 1 +accept 1 +public 1 +sourc 1 +html 1 +avail 1 +make 1 +digitalor 1 +hard 1 +copi 1 +part 1 +work 1 +person 1 +classroomus 1 +grant 1 +without 1 +provid 1 +made 1 +ordistribut 1 +profit 1 +commerci 1 +advantag 1 +bearthi 1 +notic 1 +full 1 +first 1 +forcompon 1 +own 1 +other 1 +must 1 +honor 1 +abstract 1 +credit 1 +permit 1 +otherwis 1 +torepublish 1 +post 1 +server 1 +redistribut 1 +list 1 +requiresprior 1 +request 1 +frompubl 1 +dept 1 +orpermiss 1 +last 1 +modifi 1 +moham 1 +goudagouda 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..4ad76bdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,63 @@ +john 1 +gunnel 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +utexa 1 +plapack 1 +minut 1 +transpos 1 +case 1 +assign 1 +connect 1 +imag 1 +report 1 +except 1 +guess 1 +drank 1 +depict 1 +product 1 +latter 1 +author 1 +collect 1 +code 1 +spars 1 +matrix 1 +computationsif 1 +would 1 +like 1 +meet 1 +best 1 +friend 1 +take 1 +look 1 +data 1 +pageam 1 +log 1 +check 1 +class 1 +also 1 +glimps 1 +mysteri 1 +land 1 +hail 1 +central 1 +oregon 1 +home 1 +towni 1 +less 1 +redmond 1 +doesn 1 +much 1 +page 1 +talk 1 +visitor 1 +rememb 1 +test 1 +plan 1 +file 1 +long 1 +bore 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..43216b5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,34 @@ +frank 1 +tropschuhfrank 1 +tropschuh 1 +gunther 1 +utexa 1 +schweiz 1 +clayton 1 +austin 1 +waldhofstrass 1 +rheinfelden 1 +curriculum 1 +vitaeenglishdeutschlinkscarnegi 1 +mellon 1 +univers 1 +undergradu 1 +student 1 +school 1 +comput 1 +scienc 1 +universitterlangen 1 +nrnberg 1 +junior 1 +year 1 +abroad 1 +institut 1 +mathematisch 1 +maschinen 1 +datenverarbeitung 1 +depart 1 +oper 1 +system 1 +texa 1 +graduat 1 +tropschuhgunth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..b77dde29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,41 @@ +yongxiang 1 +home 1 +pagemerri 1 +christmashappi 1 +year 1 +welcom 1 +homepagegao 1 +yongxiangsever 1 +pointsto 1 +contact 1 +addresspictur 1 +mine 1 +ceremoni 1 +grant 1 +master 1 +degre 1 +chinadepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +gener 1 +inform 1 +name 1 +male 1 +birthdai 1 +birth 1 +place 1 +huanan 1 +jiangsu 1 +china 1 +hobbi 1 +tabl 1 +tenniseduc 1 +background 1 +juli 1 +softwar 1 +directori 1 +servic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..50b59cfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,26 @@ +home 1 +page 1 +zhang 1 +schoolth 1 +univers 1 +texa 1 +austin 1 +comput 1 +scienc 1 +second 1 +semestercoursesc 1 +languag 1 +linc 1 +distribut 1 +alvis 1 +databas 1 +manag 1 +mirankerfil 1 +term 1 +project 1 +databs 1 +queri 1 +formthank 1 +stop 1 +gzhang 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..8b2d51c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,107 @@ +zhou 1 +home 1 +pagealan 1 +headlin 1 +new 1 +year 1 +ture 1 +award 1 +given 1 +amir 1 +pnueli 1 +aprofessor 1 +weizmann 1 +institut 1 +scienc 1 +israel 1 +comput 1 +theoret 1 +compuer 1 +tsinghua 1 +univers 1 +prestig 1 +china 1 +incompletelist 1 +undergradu 1 +classmat 1 +kept 1 +alex 1 +zhao 1 +current 1 +student 1 +depart 1 +texa 1 +austin 1 +research 1 +interest 1 +focus 1 +vlsi 1 +find 1 +mani 1 +applic 1 +mathemat 1 +algorithm 1 +design 1 +analysi 1 +combinatori 1 +optim 1 +complex 1 +even 1 +mathematicallog 1 +researchgroup 1 +head 1 +prof 1 +martin 1 +wong 1 +publicationshai 1 +forriv 1 +rout 1 +crosstalk 1 +constraint 1 +ieee 1 +internationalconfer 1 +aid 1 +jose 1 +chen 1 +optimalnon 1 +uniform 1 +wire 1 +size 1 +elmor 1 +delai 1 +model 1 +acmintern 1 +confer 1 +studi 1 +room 1 +academ 1 +refer 1 +techniqu 1 +industri 1 +directori 1 +bulletin 1 +live 1 +period 1 +chines 1 +staff 1 +movi 1 +search 1 +engin 1 +internet 1 +contact 1 +inform 1 +campu 1 +sciencesunivers 1 +austintaylor 1 +hall 1 +staustin 1 +voic 1 +mail 1 +haizhou 1 +utexa 1 +edulast 1 +modifi 1 +number 1 +visit 1 +homepag 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..cf808f2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,41 @@ +welcom 1 +home 1 +page 1 +construct 1 +myselfnow 1 +first 1 +year 1 +student 1 +departmentof 1 +comput 1 +scienc 1 +universityof 1 +texa 1 +austin 1 +want 1 +know 1 +click 1 +hear 1 +educ 1 +pre 1 +dept 1 +univ 1 +nation 1 +softwar 1 +engin 1 +wuhan 1 +china 1 +alumni 1 +pal 1 +univers 1 +alumnihom 1 +pagecontact 1 +wait 1 +email 1 +haosun 1 +utexa 1 +edunow 1 +call 1 +visitor 1 +sinc 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..46a799c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,91 @@ +micheal 1 +hewett 1 +hewetthewett 1 +utexa 1 +educlick 1 +fingerm 1 +click 1 +email 1 +fourth 1 +year 1 +student 1 +departmentof 1 +comput 1 +scienc 1 +universityof 1 +texa 1 +austin 1 +educ 1 +stanfordunivers 1 +kansa 1 +electr 1 +engin 1 +mathemat 1 +honor 1 +washburnunivers 1 +first 1 +place 1 +intern 1 +collegiateprogram 1 +contest 1 +nation 1 +competit 1 +utc 1 +bowl 1 +champion 1 +sawada 1 +ioanni 1 +smaragdaki 1 +thoma 1 +wahlutc 1 +tower 1 +hanoi 1 +lanc 1 +tokudaut 1 +intramur 1 +volleybal 1 +faculti 1 +grad 1 +divis 1 +fall 1 +open 1 +summer 1 +spring 1 +club 1 +finish 1 +motorola 1 +marathon 1 +hour 1 +minut 1 +might 1 +want 1 +visit 1 +myfavorit 1 +page 1 +locatem 1 +learnabout 1 +research 1 +interest 1 +view 1 +downloadmi 1 +public 1 +learnmor 1 +address 1 +phone 1 +number 1 +call 1 +offic 1 +home 1 +central 1 +timefax 1 +mail 1 +univers 1 +depart 1 +taylor 1 +hall 1 +author 1 +hewettemail 1 +edulast 1 +updat 1 +wednesdai 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..4d16e2c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,167 @@ +hiep 1 +nguyenhiep 1 +nguyenabout 1 +meabout 1 +vietnames 1 +american 1 +born 1 +came 1 +unit 1 +state 1 +five 1 +resid 1 +texasfor 1 +life 1 +current 1 +live 1 +austin 1 +texa 1 +work 1 +contract 1 +programm 1 +activ 1 +seek 1 +client 1 +process 1 +start 1 +busi 1 +providinghigh 1 +internet 1 +softwar 1 +solut 1 +product 1 +rang 1 +video 1 +game 1 +databas 1 +detail 1 +resum 1 +link 1 +hypertextresum 1 +occup 1 +gordon 1 +novak 1 +compil 1 +class 1 +educ 1 +receiv 1 +univers 1 +packag 1 +havedevelop 1 +year 1 +onlin 1 +natur 1 +scienc 1 +placement 1 +center 1 +address 1 +http 1 +utexa 1 +con 1 +nsplace 1 +rexi 1 +real 1 +time 1 +emptiv 1 +oper 1 +system 1 +board 1 +us 1 +robot 1 +research 1 +gdraw 1 +object 1 +orient 1 +cross 1 +platform 1 +graphic 1 +librari 1 +xwindow 1 +postscript 1 +legion 1 +data 1 +flow 1 +languag 1 +control 1 +flat 1 +simul 1 +realist 1 +specular 1 +reflect 1 +sonar 1 +xgcl 1 +xakcl 1 +gunu 1 +common 1 +lisp 1 +function 1 +interfac 1 +akcl 1 +standalon 1 +written 1 +provid 1 +john 1 +ousterhout 1 +theunivers 1 +specif 1 +anonlin 1 +access 1 +student 1 +prototyp 1 +moredetail 1 +researchwith 1 +java 1 +like 1 +allow 1 +easi 1 +build 1 +andmaintain 1 +network 1 +program 1 +port 1 +netrek 1 +explor 1 +methodolog 1 +larg 1 +also 1 +currentlyact 1 +search 1 +expertis 1 +internetsoftwar 1 +might 1 +best 1 +leverag 1 +technic 1 +interest 1 +especi 1 +window 1 +fast 1 +textur 1 +mappingroutin 1 +anim 1 +processor 1 +assembl 1 +write 1 +poetri 1 +make 1 +potteri 1 +outdoor 1 +list 1 +spot 1 +group 1 +page 1 +factoryx 1 +virtual 1 +realiti 1 +vrml 1 +sdsc 1 +vrmlto 1 +contact 1 +mepost 1 +comput 1 +usavoic 1 +main 1 +offic 1 +edulast 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..1a8f7696 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,72 @@ +huiqun 1 +home 1 +page 1 +nice 1 +meet 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +member 1 +vlsi 1 +design 1 +research 1 +group 1 +guid 1 +professor 1 +martin 1 +wong 1 +inform 1 +world 1 +new 1 +virtual 1 +tour 1 +citi 1 +collect 1 +chines 1 +site 1 +sunris 1 +stuff 1 +internet 1 +societi 1 +ieee 1 +giant 1 +search 1 +tool 1 +yahoo 1 +infoseek 1 +directori 1 +onlin 1 +career 1 +center 1 +mosaic 1 +bookmark 1 +entertain 1 +languag 1 +unix 1 +book 1 +java 1 +perl 1 +expect 1 +rosett 1 +refer 1 +manual 1 +program 1 +exampl 1 +contact 1 +mail 1 +hqliu 1 +utexa 1 +phone 1 +address 1 +campu 1 +taylor 1 +last 1 +modifi 1 +comment 1 +welcom 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..b7c51f12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,58 @@ +hudson 1 +home 1 +pagehudson 1 +turnerphd 1 +student 1 +comput 1 +scienc 1 +colleg 1 +natur 1 +sciencesat 1 +univers 1 +texa 1 +austin 1 +advisor 1 +vladimir 1 +lifschitz 1 +expect 1 +thesi 1 +titl 1 +infer 1 +rule 1 +causal 1 +represent 1 +ofcommonsens 1 +knowledg 1 +action 1 +msc 1 +mli 1 +librari 1 +inform 1 +english 1 +philosophi 1 +vita 1 +postscript 1 +avail 1 +onlin 1 +draft 1 +dissert 1 +also 1 +research 1 +interestscommonsens 1 +reason 1 +actionlog 1 +program 1 +nonmonoton 1 +reasoningmi 1 +paper 1 +linkseuropean 1 +colloquium 1 +spatialand 1 +tempor 1 +reasoningto 1 +contact 1 +mepost 1 +usavoic 1 +main 1 +offic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..3743ccc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,63 @@ +yanbin 1 +zhang 1 +welcom 1 +home 1 +page 1 +littl 1 +cutti 1 +allen 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +current 1 +seek 1 +part 1 +time 1 +full 1 +spring 1 +cours 1 +left 1 +resum 1 +click 1 +postscript 1 +format 1 +world 1 +ieee 1 +onlin 1 +career 1 +center 1 +compani 1 +help 1 +languag 1 +internet 1 +librari 1 +webmuseum 1 +travel 1 +beauti 1 +homeland 1 +contact 1 +mail 1 +hyanbin 1 +utexa 1 +phone 1 +offic 1 +address 1 +campu 1 +tarlor 1 +lake 1 +blvd 1 +number 1 +visit 1 +homepag 1 +sinc 1 +last 1 +modifi 1 +septemb 1 +comment 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..d1c79747 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,59 @@ +isaac 1 +sheldon 1 +contact 1 +inform 1 +phone 1 +mail 1 +isheldon 1 +utexa 1 +http 1 +user 1 +profession 1 +current 1 +graduat 1 +student 1 +univeristi 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +reciv 1 +master 1 +decemb 1 +coursework 1 +concentr 1 +graphic 1 +reciev 1 +undergradu 1 +degre 1 +unives 1 +mass 1 +lowel 1 +summer 1 +intern 1 +scientif 1 +engin 1 +softwar 1 +small 1 +compani 1 +creat 1 +schlaeor 1 +mellor 1 +case 1 +tool 1 +project 1 +construct 1 +solid 1 +geometri 1 +us 1 +bsptree 1 +modular 1 +trace 1 +framework 1 +butt 1 +person 1 +babi 1 +page 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..bc90d5ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,43 @@ +john 1 +adair 1 +crinkum 1 +crankum 1 +homepag 1 +live 1 +compound 1 +wife 1 +holli 1 +eileen 1 +taylor 1 +evan 1 +jame 1 +rice 1 +alumni 1 +friend 1 +includ 1 +carl 1 +white 1 +also 1 +internet 1 +consult 1 +matthew 1 +mengerink 1 +fish 1 +fanat 1 +work 1 +dejanew 1 +steve 1 +traylen 1 +get 1 +doctor 1 +book 1 +email 1 +jadair 1 +utexa 1 +back 1 +graduat 1 +student 1 +page 1 +home 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..08711b58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,22 @@ +john 1 +chamber 1 +home 1 +pagejohn 1 +chamberssenior 1 +oper 1 +system 1 +specialistb 1 +physic 1 +univers 1 +texa 1 +paso 1 +comput 1 +scienc 1 +yale 1 +universityph 1 +research 1 +austin 1 +paper 1 +vita 1 +link 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..41f4c443 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,164 @@ +bednarjim 1 +bednar 1 +candid 1 +dept 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +philosophi 1 +electr 1 +engin 1 +decemb 1 +research 1 +concentr 1 +biolog 1 +realist 1 +model 1 +ofcognit 1 +process 1 +us 1 +artifici 1 +neural 1 +network 1 +seek 1 +useth 1 +dramat 1 +advanc 1 +technolog 1 +past 1 +fewdecad 1 +make 1 +equal 1 +understand 1 +thehuman 1 +mind 1 +power 1 +avail 1 +soon 1 +beavail 1 +simul 1 +cortic 1 +becomingpract 1 +enabl 1 +necessari 1 +refut 1 +testabl 1 +hypothes 1 +brain 1 +function 1 +overal 1 +goal 1 +makecognit 1 +empir 1 +rather 1 +purelyphilosoph 1 +domain 1 +centuri 1 +master 1 +thesi 1 +tilt 1 +aftereffect 1 +self 1 +organ 1 +ofth 1 +primari 1 +visual 1 +cortex 1 +nearli 1 +complet 1 +abstract 1 +illus 1 +long 1 +studi 1 +psychologist 1 +vision 1 +appar 1 +failur 1 +might 1 +offer 1 +insight 1 +carri 1 +particular 1 +class 1 +call 1 +thought 1 +aris 1 +human 1 +thu 1 +serv 1 +test 1 +case 1 +theori 1 +area 1 +specif 1 +sever 1 +propos 1 +result 1 +later 1 +inhibit 1 +neuron 1 +receiv 1 +input 1 +examin 1 +lissom 1 +sirosh 1 +miikkulainen 1 +incorpor 1 +interact 1 +demonstr 1 +principl 1 +drive 1 +qualit 1 +quantit 1 +similar 1 +measur 1 +basi 1 +explan 1 +indirect 1 +effect 1 +line 1 +differ 1 +orient 1 +also 1 +appli 1 +figur 1 +spatial 1 +frequenc 1 +predict 1 +preliminari 1 +report 1 +postscript 1 +file 1 +begin 1 +doctor 1 +includ 1 +detail 1 +level 1 +visualbehavior 1 +extens 1 +contact 1 +inform 1 +email 1 +jbednar 1 +utexa 1 +mail 1 +address 1 +depart 1 +finger 1 +command 1 +machin 1 +log 1 +departmentmi 1 +resum 1 +ascii 1 +format 1 +link 1 +probabl 1 +outdat 1 +paper 1 +interest 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..00dca218 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,95 @@ +homepag 1 +home 1 +ofjunfanghi 1 +welcom 1 +test 1 +frame 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +austin 1 +librari 1 +largest 1 +academ 1 +north 1 +america 1 +catalog 1 +resum 1 +cours 1 +professor 1 +novak 1 +assign 1 +topic 1 +excel 1 +sourc 1 +ethernet 1 +info 1 +technolog 1 +special 1 +sysadm 1 +domain 1 +name 1 +system 1 +inform 1 +free 1 +unixish 1 +oper 1 +linux 1 +document 1 +project 1 +page 1 +us 1 +debug 1 +transfer 1 +latex 1 +file 1 +html 1 +help 1 +unix 1 +email 1 +stuff 1 +visit 1 +utc 1 +kristina 1 +ross 1 +tutori 1 +learn 1 +construct 1 +take 1 +jeff 1 +network 1 +administr 1 +last 1 +summer 1 +florida 1 +state 1 +edmund 1 +automobil 1 +buyer 1 +guid 1 +want 1 +java 1 +packag 1 +languag 1 +specif 1 +public 1 +ascii 1 +format 1 +look 1 +pretti 1 +good 1 +privaci 1 +like 1 +sceneri 1 +pictur 1 +jfang 1 +utexa 1 +start 1 +visitor 1 +number 1 +sinc 1 +applet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..3e32656f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,37 @@ +john 1 +priorjohn 1 +priormi 1 +resum 1 +accumul 1 +knowledg 1 +year 1 +dog 1 +good 1 +someth 1 +long 1 +enough 1 +start 1 +hurt 1 +probabl 1 +chew 1 +nacho 1 +chip 1 +swallow 1 +beer 1 +sleep 1 +contact 1 +inform 1 +email 1 +jprior 1 +utexa 1 +mail 1 +address 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +home 1 +phone 1 +swisher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..092eeee9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,41 @@ +jeff 1 +thoma 1 +homepagejeff 1 +homepagecontact 1 +informationpublicationssoftwar 1 +system 1 +gener 1 +research 1 +groupphoto 1 +albumfavorit 1 +internet 1 +sitesuniversityof 1 +texa 1 +comput 1 +scienc 1 +departmentappliedresearch 1 +laboratori 1 +electricaland 1 +engin 1 +departmentedsfinanci 1 +trade 1 +technolog 1 +center 1 +fttc 1 +keyword 1 +search 1 +utacademiccalendarsut 1 +sportshook 1 +ultim 1 +longhorn 1 +site 1 +utfootbal 1 +scheduleaustintexa 1 +depart 1 +univers 1 +austin 1 +last 1 +modifi 1 +octob 1 +jthoma 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..c5651e8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,66 @@ +jiani 1 +homepagewelcom 1 +homepag 1 +first 1 +year 1 +student 1 +indepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +ataustin 1 +peke 1 +beij 1 +china 1 +alumni 1 +depart 1 +ofpek 1 +chinesechines 1 +scenerychines 1 +novelschines 1 +classicschines 1 +magazineschines 1 +newspapersus 1 +link 1 +registrar 1 +gradaut 1 +studiesut 1 +libraryut 1 +campusutaccessabout 1 +austin 1 +weather 1 +todai 1 +citylimit 1 +lot 1 +excit 1 +stuff 1 +miscellaneousyahoojava 1 +page 1 +sunjavascript 1 +netscapeth 1 +perl 1 +languag 1 +home 1 +pagecomput 1 +research 1 +associationcomput 1 +journal 1 +magzin 1 +webnetwork 1 +technicalreport 1 +libraryth 1 +collect 1 +bibliographiesintern 1 +contact 1 +street 1 +jyluo 1 +utexa 1 +finger 1 +meyour 1 +comment 1 +suggestionswould 1 +highli 1 +appreci 1 +visitorsinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..c4dab67f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,54 @@ +kedar 1 +namjoshiabout 1 +mehi 1 +thank 1 +check 1 +doctor 1 +student 1 +austin 1 +depart 1 +comput 1 +scienc 1 +research 1 +advisor 1 +professor 1 +allen 1 +emerson 1 +interest 1 +tempor 1 +logic 1 +reason 1 +concurr 1 +program 1 +semant 1 +distributedalgorithm 1 +automatatheori 1 +came 1 +fall 1 +receiv 1 +bachelor 1 +degre 1 +indian 1 +institut 1 +technolog 1 +madra 1 +wonder 1 +home 1 +page 1 +lot 1 +stuff 1 +would 1 +like 1 +know 1 +person 1 +inform 1 +contact 1 +offic 1 +phone 1 +address 1 +west 1 +street 1 +todai 1 +amul 1 +adkedar 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..c7691f09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,47 @@ +kenneth 1 +harker 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +kharker 1 +utexa 1 +amateur 1 +radio 1 +babylon 1 +linux 1 +rocketri 1 +cyberspac 1 +public 1 +academ 1 +work 1 +polit 1 +stuff 1 +resum 1 +last 1 +updat 1 +world 1 +wide 1 +facil 1 +provid 1 +servic 1 +faculti 1 +student 1 +staff 1 +guest 1 +view 1 +opinion 1 +express 1 +page 1 +sole 1 +respons 1 +author 1 +necessarili 1 +reflect 1 +system 1 +board 1 +regent 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..6704776d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,92 @@ +david 1 +kincaid 1 +senior 1 +lecturerassoci 1 +director 1 +center 1 +numer 1 +analysi 1 +lamar 1 +univers 1 +texa 1 +austin 1 +honor 1 +award 1 +profession 1 +servic 1 +certif 1 +recognit 1 +creativ 1 +develop 1 +technicalinnov 1 +basic 1 +linear 1 +algebra 1 +subprogram 1 +nasa 1 +technic 1 +committe 1 +comput 1 +imac 1 +session 1 +organ 1 +world 1 +congress 1 +andappli 1 +mathemat 1 +area 1 +interestmathemat 1 +softwar 1 +high 1 +perform 1 +summari 1 +researchmi 1 +interest 1 +focus 1 +research 1 +us 1 +iter 1 +algorithm 1 +solv 1 +system 1 +equat 1 +larg 1 +spars 1 +coeffici 1 +matric 1 +aris 1 +solut 1 +ellipt 1 +partial 1 +differenti 1 +implement 1 +parallel 1 +anoth 1 +select 1 +recent 1 +publicationsw 1 +chenei 1 +pacif 1 +grove 1 +brook 1 +cole 1 +hay 1 +young 1 +itpack 1 +proceed 1 +coput 1 +atlanta 1 +stationari 1 +second 1 +degre 1 +method 1 +topic 1 +polynomi 1 +sever 1 +variabl 1 +applic 1 +rassia 1 +scientif 1 +river 1 +edg 1 +jersei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..330efc34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,67 @@ +mike 1 +kistler 1 +home 1 +page 1 +construct 1 +first 1 +year 1 +student 1 +univers 1 +texa 1 +ataustin 1 +thedepart 1 +comput 1 +scienc 1 +also 1 +current 1 +emploi 1 +theperson 1 +softwar 1 +productsdivis 1 +academ 1 +backgroundba 1 +mathemat 1 +susquehanna 1 +selinsgrov 1 +inform 1 +syracus 1 +master 1 +busi 1 +administr 1 +stern 1 +school 1 +businessnew 1 +york 1 +interestsi 1 +interest 1 +parallel 1 +algorithm 1 +particularli 1 +us 1 +commerci 1 +data 1 +process 1 +press 1 +random 1 +collect 1 +link 1 +coursesfal 1 +distribut 1 +iwith 1 +prof 1 +jayadev 1 +misra 1 +numer 1 +analysi 1 +linear 1 +algebrawith 1 +alan 1 +cline 1 +visitor 1 +number 1 +contact 1 +juli 1 +walk 1 +pflugervil 1 +email 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..e7877b97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,9 @@ +jacob 1 +kornerupjacob 1 +kornerup 1 +welcom 1 +home 1 +page 1 +time 1 +sinc 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..9aebd98a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,78 @@ +benjamin 1 +kuipersbenjamin 1 +kuipersbruton 1 +centenni 1 +professor 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +mathemat 1 +swarthmor 1 +colleg 1 +research 1 +interest 1 +represent 1 +commonsens 1 +expert 1 +knowledg 1 +withparticular 1 +emphasi 1 +effect 1 +incomplet 1 +thequalit 1 +reason 1 +grouphom 1 +page 1 +describ 1 +topic 1 +paper 1 +student 1 +andavail 1 +softwar 1 +consider 1 +detail 1 +accomplish 1 +includ 1 +tour 1 +model 1 +spatial 1 +cognit 1 +qsim 1 +algorithm 1 +qualit 1 +simul 1 +access 1 +limit 1 +logic 1 +robot 1 +explor 1 +map 1 +strategi 1 +base 1 +recognit 1 +distinct 1 +place 1 +kuiper 1 +qualitativereason 1 +cambridg 1 +press 1 +teach 1 +plan 1 +spring 1 +build 1 +intellig 1 +agent 1 +fall 1 +physic 1 +world 1 +contact 1 +inform 1 +mail 1 +prof 1 +depart 1 +email 1 +utexa 1 +phone 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..6ca5dd6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,48 @@ +simon 1 +professor 1 +comput 1 +sciencesdepart 1 +sciencesunivers 1 +texa 1 +austin 1 +email 1 +utexa 1 +eduphon 1 +offic 1 +taylor 1 +hall 1 +campu 1 +mail 1 +scienc 1 +photo 1 +profil 1 +network 1 +research 1 +laboratori 1 +fall 1 +spring 1 +administr 1 +assist 1 +also 1 +editori 1 +ieee 1 +transact 1 +kata 1 +carbon 1 +inform 1 +electron 1 +submissionnew 1 +clip 1 +tune 1 +turn 1 +toss 1 +internet 1 +empt 1 +american 1 +statesman 1 +februari 1 +front 1 +page 1 +compress 1 +postscript 1 +cont 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..ed58f888 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,54 @@ +network 1 +research 1 +laboratori 1 +austin 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +activ 1 +span 1 +entir 1 +develop 1 +cycl 1 +protocol 1 +design 1 +specif 1 +verif 1 +test 1 +perform 1 +analysi 1 +implement 1 +tune 1 +currentinterest 1 +architectur 1 +address 1 +chang 1 +underli 1 +commun 1 +technolog 1 +well 1 +applic 1 +project 1 +supervis 1 +simon 1 +professor 1 +fund 1 +provid 1 +nation 1 +foundat 1 +nsaunivers 1 +program 1 +advanc 1 +lockhe 1 +current 1 +recent 1 +paper 1 +support 1 +videoservic 1 +secur 1 +theori 1 +workshop 1 +integr 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..48d02aa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,32 @@ +home 1 +page 1 +robert 1 +landrum 1 +stori 1 +mail 1 +viruspictur 1 +mountain 1 +empirepch 1 +retreattexa 1 +republican 1 +convent 1 +backbon 1 +rockrsumfamilyinterest 1 +christian 1 +council 1 +awai 1 +graham 1 +gordon 1 +pageth 1 +comput 1 +scienc 1 +depart 1 +ofth 1 +univers 1 +texa 1 +austin 1 +councillandrum 1 +utexa 1 +edulast 1 +updat 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..2b55360a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,30 @@ +greg 1 +lavend 1 +univers 1 +texa 1 +austinr 1 +lavenderadjunct 1 +assist 1 +professordepart 1 +comput 1 +scienc 1 +anddepart 1 +electr 1 +engineeringth 1 +austin 1 +contact 1 +address 1 +research 1 +activ 1 +engin 1 +cours 1 +recommend 1 +read 1 +biograph 1 +informationsuggest 1 +improv 1 +page 1 +welcom 1 +last 1 +updat 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..54ba6360 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,64 @@ +home 1 +page 1 +jame 1 +welcom 1 +pageyeap 1 +normal 1 +look 1 +student 1 +depart 1 +comput 1 +sciencesat 1 +univers 1 +texa 1 +austin 1 +bachelor 1 +scienc 1 +master 1 +atth 1 +inform 1 +system 1 +disc 1 +nation 1 +singapor 1 +research 1 +interest 1 +algorithm 1 +data 1 +structur 1 +vlsi 1 +designalgorithm 1 +small 1 +tropic 1 +island 1 +call 1 +locat 1 +degre 1 +north 1 +equat 1 +internet 1 +communityi 1 +much 1 +aliv 1 +particip 1 +know 1 +aboutthi 1 +peopl 1 +wife 1 +come 1 +hong 1 +kong 1 +month 1 +activ 1 +lovesto 1 +smile 1 +contact 1 +mail 1 +leekk 1 +utexa 1 +phone 1 +campu 1 +addr 1 +taylor 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..5f882eb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,30 @@ +laboratori 1 +experiment 1 +softwar 1 +system 1 +less 1 +main 1 +object 1 +research 1 +investig 1 +wai 1 +buildreli 1 +high 1 +perform 1 +parallel 1 +distributedsystem 1 +apart 1 +depart 1 +computersci 1 +univers 1 +oftexa 1 +austin 1 +projectsmemb 1 +lablessss 1 +seminar 1 +seriessponsorslast 1 +modifi 1 +decemb 1 +robert 1 +blumoferdb 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..f7fc0d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,81 @@ +calvin 1 +lincalvin 1 +linassist 1 +professor 1 +comput 1 +sciencesth 1 +import 1 +thing 1 +iswhat 1 +studi 1 +_study_ 1 +plai 1 +_play_ 1 +pete 1 +carrilresearch 1 +interestscompil 1 +languag 1 +parallel 1 +performanceanalysi 1 +scientif 1 +program 1 +project 1 +home 1 +page 1 +select 1 +publicationsth 1 +portabl 1 +implement 1 +novel 1 +mathemat 1 +biologyalgorithm 1 +dikaiako 1 +manoussaki 1 +woodward 1 +conf 1 +supercomput 1 +accommod 1 +polymorph 1 +data 1 +decomposit 1 +explicitli 1 +parallelprogram 1 +snyder 1 +proceed 1 +internationalparallel 1 +process 1 +symposium 1 +april 1 +arrai 1 +sublanguag 1 +compilersfor 1 +banerje 1 +gelernt 1 +nicolau 1 +padua 1 +springer 1 +verlag 1 +simpl 1 +intern 1 +journal 1 +comparison 1 +model 1 +share 1 +memori 1 +multiprocessor 1 +withl 1 +confer 1 +parallelprocess 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +utexa 1 +postal 1 +depart 1 +univers 1 +texa 1 +austinaustin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..e1978318 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,62 @@ +guangtian 1 +home 1 +page 1 +homepagehi 1 +welcom 1 +current 1 +construct 1 +content 1 +time 1 +permit 1 +apolog 1 +incomplet 1 +result 1 +inconveni 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +texa 1 +austin 1 +researchi 1 +member 1 +professor 1 +real 1 +system 1 +research 1 +group 1 +interest 1 +includ 1 +timeschedul 1 +algorithm 1 +oper 1 +network 1 +perform 1 +distribut 1 +also 1 +work 1 +data 1 +replic 1 +knowledg 1 +mine 1 +last 1 +summer 1 +internship 1 +contact 1 +inform 1 +offic 1 +good 1 +view 1 +phone 1 +email 1 +liugt 1 +utexa 1 +mail 1 +address 1 +univers 1 +updat 1 +pleas 1 +send 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..7bd91b1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,45 @@ +lorenzo 1 +alvisi 1 +home 1 +page 1 +assist 1 +professor 1 +depart 1 +comput 1 +scienc 1 +cornel 1 +laurea 1 +physic 1 +universit 1 +agrav 1 +bologna 1 +itali 1 +offic 1 +taylorhal 1 +campusshow 1 +locat 1 +taylor 1 +hall 1 +phone 1 +mail 1 +utexa 1 +research 1 +interestsi 1 +interest 1 +distribut 1 +special 1 +emphasi 1 +fault 1 +toler 1 +cours 1 +spring 1 +oper 1 +system 1 +fall 1 +topic 1 +sytem 1 +public 1 +photo 1 +maria 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..6181dcff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,62 @@ +home 1 +page 1 +graduat 1 +student 1 +current 1 +work 1 +comput 1 +networksoth 1 +class 1 +tsinghua 1 +univers 1 +undergradu 1 +institut 1 +china 1 +depart 1 +scienc 1 +studyut 1 +austin 1 +universityaustin 1 +texa 1 +live 1 +academ 1 +link 1 +internet 1 +area 1 +languag 1 +system 1 +ieee 1 +sigcomm 1 +sigmod 1 +siglink 1 +siggraph 1 +sigmm 1 +sigir 1 +compani 1 +onlin 1 +shop 1 +cool 1 +site 1 +chines 1 +music 1 +newsjob 1 +hunt 1 +weather 1 +forcast 1 +dictionari 1 +contact 1 +inform 1 +campu 1 +dept 1 +univ 1 +addr 1 +microsoft 1 +corpor 1 +mail 1 +luxu 1 +utexa 1 +xuelu 1 +thank 1 +come 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..9054e68b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,89 @@ +lauri 1 +honour 1 +werthlauri 1 +werthlectur 1 +lwerth 1 +utexa 1 +educurr 1 +semest 1 +fall 1 +offic 1 +hour 1 +time 1 +taylor 1 +phone 1 +link 1 +class 1 +softwar 1 +engineeringc 1 +contemporari 1 +issu 1 +comput 1 +scienceprofession 1 +servicevic 1 +chair 1 +educ 1 +ieee 1 +technic 1 +committe 1 +engin 1 +presentco 1 +confer 1 +profession 1 +develop 1 +presentarea 1 +interestsoftwar 1 +cognit 1 +scienc 1 +summari 1 +researchmi 1 +current 1 +work 1 +center 1 +tool 1 +andenviron 1 +area 1 +includ 1 +human 1 +interfac 1 +andsoftwar 1 +metric 1 +select 1 +recent 1 +publicationsl 1 +werth 1 +qualiti 1 +assur 1 +project 1 +transact 1 +januari 1 +lectur 1 +note 1 +process 1 +improv 1 +industri 1 +strength 1 +case 1 +tomayko 1 +springer 1 +verlag 1 +john 1 +direct 1 +proceed 1 +workshop 1 +ics 1 +object 1 +orient 1 +program 1 +macintosh 1 +journal 1 +us 1 +univers 1 +texa 1 +depart 1 +home 1 +pagefaculti 1 +profilesc 1 +classeslast 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..2310e422 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,42 @@ +madhukar 1 +reddi 1 +korupoluwelcom 1 +home 1 +page 1 +reach 1 +avenu 1 +austin 1 +texa 1 +offic 1 +taylor 1 +hall 1 +dept 1 +comp 1 +scienc 1 +univ 1 +ahom 1 +utexa 1 +link 1 +offici 1 +madrashomepag 1 +ganga 1 +alumniclass 1 +madra 1 +utalgorithm 1 +comput 1 +theori 1 +group 1 +colloquium 1 +oncomput 1 +complex 1 +info 1 +cricket 1 +worldwid 1 +site 1 +espnet 1 +sportszon 1 +interact 1 +batchu 1 +india 1 +author 1 +korupoluemail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..d9f71ef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,23 @@ +richard 1 +malloryrichard 1 +malloryresearchthesi 1 +research 1 +produc 1 +quasi 1 +natur 1 +languag 1 +explan 1 +qsimsimul 1 +current 1 +implement 1 +work 1 +simpl 1 +system 1 +contact 1 +email 1 +mallori 1 +utexa 1 +offic 1 +taylor 1 +austin 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..2761df40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,120 @@ +home 1 +page 1 +marco 1 +schneidermarco 1 +schneiderph 1 +candid 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austinresearchth 1 +titl 1 +dissert 1 +flow 1 +rout 1 +network 1 +research 1 +interest 1 +area 1 +protocol 1 +distribut 1 +fault 1 +toler 1 +particular 1 +self 1 +stabil 1 +system 1 +implicit 1 +design 1 +label 1 +itsstat 1 +legitim 1 +illegitim 1 +identifi 1 +state 1 +occur 1 +correct 1 +intend 1 +execut 1 +consid 1 +said 1 +whenregardless 1 +initi 1 +guarante 1 +converg 1 +finit 1 +number 1 +step 1 +systemwhich 1 +stai 1 +forev 1 +vita 1 +postscript 1 +public 1 +survei 1 +march 1 +real 1 +time 1 +decis 1 +respons 1 +toward 1 +tolerantr 1 +kluwer 1 +academ 1 +publish 1 +earlier 1 +version 1 +appear 1 +proceed 1 +third 1 +intern 1 +workshop 1 +octob 1 +maximum 1 +tree 1 +invit 1 +talk 1 +annual 1 +joint 1 +confer 1 +inform 1 +novemb 1 +submit 1 +journal 1 +author 1 +moham 1 +gouda 1 +second 1 +minimum 1 +depth 1 +prepar 1 +anish 1 +arora 1 +memori 1 +requir 1 +silent 1 +fifteenth 1 +symposium 1 +principl 1 +shlomi 1 +dolev 1 +span 1 +implement 1 +internet 1 +person 1 +list 1 +link 1 +construct 1 +contact 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +postal 1 +address 1 +austin 1 +ctaylor 1 +usamarco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..88c62e3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,92 @@ +home 1 +page 1 +mark 1 +johnstonemark 1 +johnstonecontact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +postal 1 +address 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +usual 1 +find 1 +best 1 +reach 1 +isvia 1 +email 1 +markj 1 +utexa 1 +johnston 1 +also 1 +look 1 +full 1 +finger 1 +semest 1 +oper 1 +system 1 +taught 1 +byrichard 1 +brice 1 +addit 1 +object 1 +orient 1 +design 1 +analysisclass 1 +glenn 1 +down 1 +appl 1 +somerset 1 +compani 1 +pleas 1 +class 1 +graduat 1 +spring 1 +work 1 +motorola 1 +somersetdesign 1 +centerresearch 1 +informationi 1 +member 1 +oop 1 +research 1 +group 1 +part 1 +build 1 +real 1 +time 1 +garbagecollector 1 +perform 1 +number 1 +ofstudi 1 +memori 1 +alloc 1 +routin 1 +postscript 1 +copi 1 +dissertationpropos 1 +avail 1 +listof 1 +public 1 +along 1 +brief 1 +descript 1 +develop 1 +librari 1 +allow 1 +precis 1 +timingof 1 +intel 1 +pentium 1 +run 1 +linux 1 +code 1 +publicli 1 +stuff 1 +relat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..d5116415 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,13 @@ +mark 1 +home 1 +page 1 +point 1 +interest 1 +visit 1 +also 1 +friend 1 +click 1 +last 1 +modifi 1 +markng 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..b85d7631 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,85 @@ +marku 1 +kaltenbachmarku 1 +kaltenbachintroductionwelcom 1 +home 1 +page 1 +current 1 +construct 1 +andwil 1 +entri 1 +link 1 +time 1 +permit 1 +iapolog 1 +incomplet 1 +result 1 +inconveni 1 +researchi 1 +member 1 +prof 1 +misra 1 +spsp 1 +research 1 +groupand 1 +emerson 1 +stempor 1 +reason 1 +group 1 +part 1 +work 1 +develop 1 +model 1 +checkerfor 1 +finit 1 +state 1 +uniti 1 +program 1 +proposit 1 +logic 1 +verifi 1 +system 1 +recent 1 +version 1 +thesi 1 +isalso 1 +avail 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +utexa 1 +postal 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +place 1 +interest 1 +find 1 +internet 1 +worth 1 +avisit 1 +theut 1 +departmenthom 1 +softwar 1 +archivefor 1 +macintosh 1 +appl 1 +sworld 1 +wide 1 +technic 1 +supporthom 1 +actansit 1 +comprehens 1 +archiv 1 +network 1 +theatt 1 +distribut 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..cf09672e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,50 @@ +memarti 1 +mayberri 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +researchal 1 +kind 1 +stuff 1 +educ 1 +math 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +martym 1 +utexa 1 +postal 1 +click 1 +applet 1 +paus 1 +resum 1 +displai 1 +local 1 +link 1 +utc 1 +neural 1 +network 1 +homepag 1 +home 1 +page 1 +hotlist 1 +downtown 1 +anywher 1 +virtualc 1 +internetrestaur 1 +guid 1 +virtual 1 +tnstechnolog 1 +demonstr 1 +read 1 +daili 1 +texan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..dfdfd8a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,45 @@ +home 1 +page 1 +norm 1 +mccain 1 +mccainabout 1 +mephd 1 +student 1 +comput 1 +scienc 1 +colleg 1 +natur 1 +sciencesat 1 +univers 1 +texa 1 +austin 1 +advisor 1 +vladimir 1 +lifschitz 1 +expect 1 +thesi 1 +titl 1 +causal 1 +commonsens 1 +reason 1 +action 1 +kansa 1 +philosophi 1 +baker 1 +vita 1 +postscript 1 +avail 1 +onlin 1 +research 1 +interestscommonsens 1 +actionlog 1 +program 1 +nonmonoton 1 +reasoningmi 1 +paper 1 +contact 1 +mepost 1 +usavoic 1 +main 1 +offic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..dafe29e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,42 @@ +mari 1 +elain 1 +califfmari 1 +califfmachin 1 +learn 1 +research 1 +groupunivers 1 +texa 1 +austinresearchmi 1 +current 1 +interest 1 +us 1 +machin 1 +especiallyinduct 1 +logic 1 +program 1 +natur 1 +languag 1 +acquisit 1 +formor 1 +info 1 +check 1 +vita 1 +educ 1 +comput 1 +scienc 1 +baylor 1 +univers 1 +english 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +mecaliff 1 +utexa 1 +postal 1 +austin 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..976fcd74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,209 @@ +knowledg 1 +base 1 +system 1 +groupknowledg 1 +group 1 +bruce 1 +porter 1 +rich 1 +mallori 1 +peter 1 +clark 1 +souther 1 +fred 1 +prado 1 +charl 1 +callawai 1 +shown 1 +carl 1 +andersen 1 +steve 1 +correl 1 +overviewour 1 +part 1 +depart 1 +comput 1 +scienc 1 +atuniv 1 +texa 1 +austin 1 +long 1 +term 1 +goal 1 +research 1 +develop 1 +technolog 1 +forconstruct 1 +us 1 +larg 1 +multifunct 1 +oncomput 1 +would 1 +significantli 1 +improv 1 +currentexpert 1 +tutor 1 +contain 1 +broadknowledg 1 +domain 1 +requir 1 +perform 1 +multipl 1 +task 1 +toexplain 1 +viewpoint 1 +past 1 +eight 1 +year 1 +built 1 +inon 1 +area 1 +biologi 1 +method 1 +automat 1 +answeringa 1 +varieti 1 +question 1 +fact 1 +concern 1 +concept 1 +ofth 1 +largest 1 +kind 1 +content 1 +structur 1 +formallyrepres 1 +addit 1 +expand 1 +arealso 1 +begin 1 +construct 1 +similar 1 +notabl 1 +distribut 1 +especi 1 +encourag 1 +result 1 +knowledgebas 1 +recent 1 +jame 1 +lester 1 +thebiolog 1 +test 1 +explan 1 +gener 1 +express 1 +english 1 +biolog 1 +object 1 +event 1 +control 1 +experi 1 +expert 1 +found 1 +littl 1 +differ 1 +andthos 1 +written 1 +colleagu 1 +current 1 +extend 1 +type 1 +beanswer 1 +autom 1 +reason 1 +jeffrickel 1 +composit 1 +model 1 +taskof 1 +appropri 1 +answer 1 +predict 1 +well 1 +build 1 +thesimplest 1 +adequ 1 +dauntingrequir 1 +sinc 1 +like 1 +implicitli 1 +manymodel 1 +numer 1 +level 1 +detail 1 +qualit 1 +process 1 +compilerand 1 +qsim 1 +simul 1 +jeff 1 +program 1 +final 1 +bybuild 1 +anoth 1 +computingenviron 1 +focuss 1 +help 1 +deskassist 1 +proport 1 +custom 1 +squestion 1 +otherwis 1 +phone 1 +normal 1 +desk 1 +projectsour 1 +complet 1 +ongo 1 +project 1 +includ 1 +represent 1 +languag 1 +kned 1 +editor 1 +kastl 1 +retriev 1 +knight 1 +text 1 +plan 1 +fare 1 +natur 1 +lex 1 +aid 1 +mainten 1 +lexicon 1 +tripel 1 +assist 1 +utexa 1 +theorist 1 +searcher 1 +alumni 1 +alumna 1 +lian 1 +acker 1 +erik 1 +eilert 1 +bareiss 1 +brad 1 +blumenth 1 +karl 1 +brant 1 +eolu 1 +uwyo 1 +clarkp 1 +redwood 1 +boe 1 +ncsu 1 +murrai 1 +rickel 1 +publicationsclick 1 +select 1 +public 1 +relat 1 +projectsclick 1 +herefor 1 +extens 1 +collect 1 +pointer 1 +aroundth 1 +world 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..f4382b67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,99 @@ +daniel 1 +mirank 1 +home 1 +page 1 +welcom 1 +belat 1 +presenc 1 +construct 1 +someth 1 +like 1 +finish 1 +hold 1 +breath 1 +send 1 +note 1 +utexa 1 +fashionwai 1 +student 1 +wouldn 1 +place 1 +either 1 +rule 1 +match 1 +learn 1 +treat 1 +algorithm 1 +itscomparison 1 +rete 1 +warn 1 +render 1 +obsolet 1 +byth 1 +leap 1 +current 1 +research 1 +interest 1 +goal 1 +encompass 1 +venu 1 +languag 1 +usea 1 +basi 1 +activ 1 +distribut 1 +databas 1 +fundamentalcomput 1 +scienc 1 +problem 1 +corollari 1 +evolv 1 +thatgoal 1 +constraint 1 +satisfact 1 +search 1 +queri 1 +optim 1 +relat 1 +object 1 +orient 1 +parallel 1 +execut 1 +base 1 +program 1 +knowledg 1 +compil 1 +bibliographi 1 +sometim 1 +link 1 +paper 1 +come 1 +soon 1 +group 1 +text 1 +bibtex 1 +candid 1 +roberto 1 +bayardo 1 +david 1 +gadboi 1 +lanc 1 +obermey 1 +vasili 1 +samoladi 1 +robert 1 +schrag 1 +master 1 +srinivasan 1 +vaidyaraman 1 +lane 1 +warshaw 1 +past 1 +archi 1 +andrewsdavid 1 +brantchin 1 +ming 1 +kuoshiow 1 +yang 1 +salvator 1 +stolfo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..720efad0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,85 @@ +jayadev 1 +misra 1 +misrareg 1 +chair 1 +comput 1 +scienc 1 +depart 1 +tech 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +john 1 +hopkin 1 +univers 1 +honor 1 +award 1 +profession 1 +servic 1 +simon 1 +guggenheim 1 +fellow 1 +ieee 1 +fellowarea 1 +interestparallel 1 +program 1 +summari 1 +researchmi 1 +interest 1 +appli 1 +formal 1 +method 1 +practic 1 +particularli 1 +inth 1 +specif 1 +design 1 +synchron 1 +asynchronoussystem 1 +select 1 +recent 1 +publicationsj 1 +powerlist 1 +structur 1 +parallel 1 +recurs 1 +classic 1 +mind 1 +essai 1 +hoar 1 +prentic 1 +hall 1 +januari 1 +loos 1 +coupl 1 +process 1 +futur 1 +gener 1 +system 1 +north 1 +holland 1 +phase 1 +inform 1 +letter 1 +equat 1 +reason 1 +nondeterminist 1 +aspect 1 +chandi 1 +foundat 1 +addison 1 +weslei 1 +research 1 +group 1 +homepag 1 +work 1 +electron 1 +access 1 +otherpap 1 +current 1 +project 1 +seuss 1 +anoverview 1 +apostscript 1 +versionaccess 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..a45af205 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,175 @@ +utc 1 +machin 1 +learn 1 +research 1 +group 1 +supervis 1 +professor 1 +moonei 1 +focuseson 1 +combin 1 +empir 1 +knowledg 1 +base 1 +techniqu 1 +includ 1 +applic 1 +natur 1 +languag 1 +acquisit 1 +knowledgerefin 1 +plan 1 +part 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +atth 1 +univers 1 +texa 1 +ataustin 1 +pictur 1 +click 1 +graduat 1 +student 1 +mari 1 +elain 1 +califf 1 +mecaliff 1 +utexa 1 +tara 1 +estlin 1 +hermjakob 1 +sowmya 1 +ramachandran 1 +cindi 1 +thompson 1 +cthomp 1 +alumni 1 +paul 1 +baff 1 +scicomp 1 +jeff 1 +mahonei 1 +firstadvisor 1 +hwee 1 +nhweetou 1 +trantor 1 +dirk 1 +ourston 1 +dirk_ourston 1 +cpqm 1 +saic 1 +bradlei 1 +richard 1 +furtwangen 1 +siddarth 1 +subramanian 1 +georgetown 1 +john 1 +zell 1 +acad 1 +drake 1 +area 1 +public 1 +abduct 1 +induct 1 +logic 1 +program 1 +qualit 1 +model 1 +diagnosi 1 +speedup 1 +tutor 1 +system 1 +theori 1 +refin 1 +uncertain 1 +reasoningher 1 +complet 1 +list 1 +softwar 1 +accel 1 +reason 1 +neither 1 +revis 1 +proposit 1 +rule 1 +fort 1 +first 1 +order 1 +chillin 1 +predic 1 +invent 1 +foidl 1 +decis 1 +dolphin 1 +ad 1 +search 1 +control 1 +prolog 1 +standard 1 +classif 1 +algorithm 1 +autom 1 +experiment 1 +comparison 1 +data 1 +repositori 1 +form 1 +relat 1 +site 1 +associ 1 +aaai 1 +american 1 +linguist 1 +ilpnet 1 +european 1 +scientif 1 +network 1 +sigart 1 +special 1 +interest 1 +signll 1 +confer 1 +intern 1 +joint 1 +aritfici 1 +ijcai 1 +nation 1 +icml 1 +fourth 1 +inform 1 +sourc 1 +subject 1 +index 1 +biblio 1 +queri 1 +machinelearn 1 +home 1 +page 1 +servic 1 +paper 1 +archiv 1 +journal 1 +jair 1 +foil 1 +quinlan 1 +learner 1 +prodigi 1 +problem 1 +solv 1 +carnegi 1 +mellon 1 +ucpop 1 +partial 1 +planner 1 +washington 1 +explan 1 +illinoi 1 +oxford 1 +irvin 1 +austin 1 +wisconsin 1 +madison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..a1db0fae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,110 @@ +raymond 1 +moonei 1 +homepageraymond 1 +mooneyassoci 1 +professor 1 +computersci 1 +univers 1 +texa 1 +austin 1 +comput 1 +engin 1 +illinoi 1 +urbana 1 +champaign 1 +scienc 1 +research 1 +interestsmi 1 +current 1 +interest 1 +artifici 1 +intellig 1 +primarilyin 1 +area 1 +machin 1 +learn 1 +includ 1 +natur 1 +languag 1 +acquisit 1 +parser 1 +lexicon 1 +inform 1 +extract 1 +word 1 +sens 1 +disambigu 1 +exampl 1 +induct 1 +logic 1 +program 1 +prolog 1 +knowledg 1 +base 1 +theori 1 +refin 1 +automat 1 +modifi 1 +rule 1 +bayesian 1 +network 1 +empir 1 +data 1 +search 1 +control 1 +improv 1 +plan 1 +effici 1 +qualiti 1 +compar 1 +combin 1 +neural 1 +symbol 1 +public 1 +home 1 +page 1 +vita 1 +finger 1 +cours 1 +informationfal 1 +lisp 1 +learningspr 1 +iiperson 1 +historyi 1 +grew 1 +small 1 +town 1 +fallon 1 +wherestart 1 +attend 1 +fallontownship 1 +highschool 1 +start 1 +fall 1 +went 1 +urbanato 1 +obtain 1 +degre 1 +list 1 +decemb 1 +complet 1 +myph 1 +thesi 1 +explan 1 +learninggroup 1 +direct 1 +prof 1 +gerald 1 +dejong 1 +began 1 +posit 1 +depart 1 +contact 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +utexa 1 +postal 1 +meadowfir 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..985b3f46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,100 @@ +dave 1 +moriartydav 1 +moriarti 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +researchsequenti 1 +decis 1 +task 1 +appear 1 +mani 1 +practic 1 +real 1 +world 1 +problemsinclud 1 +control 1 +resourc 1 +alloc 1 +rout 1 +canb 1 +character 1 +follow 1 +scenario 1 +agent 1 +observ 1 +stateof 1 +dynam 1 +system 1 +select 1 +finit 1 +action 1 +thesystem 1 +enter 1 +state 1 +upon 1 +must 1 +selectanoth 1 +return 1 +payoff 1 +madeor 1 +sequenc 1 +object 1 +thesequ 1 +highest 1 +total 1 +cumulativepayoff 1 +research 1 +evolv 1 +neural 1 +network 1 +geneticalgorithm 1 +learn 1 +perform 1 +sequenti 1 +amparticularli 1 +interest 1 +problem 1 +specif 1 +knowledg 1 +iscurr 1 +unavail 1 +costli 1 +obtain 1 +domain 1 +havestudi 1 +includ 1 +game 1 +plai 1 +intellig 1 +constraintsatisfact 1 +inform 1 +list 1 +public 1 +educ 1 +universityof 1 +tulan 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +utexa 1 +postal 1 +local 1 +link 1 +utc 1 +homepag 1 +home 1 +page 1 +us 1 +sport 1 +misc 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..b3a9e5a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,38 @@ +wade 1 +barnesm 1 +barnesmwbarn 1 +utexa 1 +eduresearch 1 +workhelp 1 +pagestyp 1 +map 1 +literatureliteratur 1 +research 1 +notesclassesbackground 1 +informationph 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +educ 1 +mine 1 +engin 1 +utah 1 +reach 1 +mehom 1 +tanglebriar 1 +trail 1 +campu 1 +offic 1 +yete 1 +mail 1 +mwbarn 1 +eduauthor 1 +barnesemail 1 +edulast 1 +updat 1 +mondai 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..a113e7be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,83 @@ +professor 1 +nell 1 +dale 1 +websit 1 +seed 1 +scrollit_rl 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +home 1 +pagesunivers 1 +texa 1 +comput 1 +scienc 1 +departmentwelcom 1 +page 1 +reach 1 +address 1 +retir 1 +senior 1 +lectur 1 +univers 1 +oftexa 1 +austin 1 +receiv 1 +utaustin 1 +faculti 1 +sinc 1 +fromful 1 +time 1 +teach 1 +summer 1 +full 1 +load 1 +falland 1 +spend 1 +spring 1 +write 1 +travel 1 +pleas 1 +feel 1 +free 1 +brows 1 +room 1 +resum 1 +contain 1 +curriculum 1 +vita 1 +bibliographi 1 +inform 1 +text 1 +book 1 +author 1 +research 1 +abstract 1 +ofdissert 1 +chair 1 +recent 1 +person 1 +whichcontain 1 +memento 1 +nontechn 1 +interest 1 +direct 1 +anycorrespond 1 +mail 1 +account 1 +ndale 1 +utexa 1 +profession 1 +profilepublicationsresearch 1 +interestsperson 1 +interestsnel 1 +westlak 1 +offic 1 +document 1 +creat 1 +assist 1 +right 1 +reserv 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..9de14583 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,27 @@ +neeraj 1 +page 1 +browser 1 +support 1 +frame 1 +view 1 +home 1 +either 1 +download 1 +netscap 1 +navig 1 +without 1 +note 1 +choos 1 +latter 1 +pleas 1 +keep 1 +mind 1 +design 1 +pretti 1 +background 1 +color 1 +chosen 1 +work 1 +obnoxi 1 +chartreus 1 +blame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..181cd282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,20 @@ +gnan 1 +home 1 +pagegnana 1 +kumar 1 +natarajan 1 +page 1 +depart 1 +comput 1 +sciencesunivers 1 +texa 1 +austini 1 +graduat 1 +student 1 +sciencedepart 1 +univers 1 +austin 1 +mail 1 +utexa 1 +edufind 1 +log 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..203dbc0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,34 @@ +nimar 1 +singh 1 +arora 1 +home 1 +page 1 +medic 1 +art 1 +austin 1 +typic 1 +first 1 +year 1 +student 1 +doesn 1 +quit 1 +knowwhat 1 +area 1 +interest 1 +look 1 +resum 1 +know 1 +altern 1 +bookmarksto 1 +clearer 1 +pictur 1 +contact 1 +click 1 +queri 1 +hit 1 +term 1 +score 1 +ters 1 +output 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..617a205c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,77 @@ +utc 1 +natur 1 +languag 1 +acquist 1 +groupnatur 1 +acquisit 1 +groupat 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austinw 1 +explor 1 +discuss 1 +paper 1 +area 1 +acquisitionand 1 +learn 1 +fall 1 +meet 1 +usual 1 +everi 1 +wednesdai 1 +havedrawn 1 +close 1 +probabl 1 +resum 1 +second 1 +third 1 +week 1 +januari 1 +current 1 +propos 1 +previous 1 +particip 1 +includ 1 +prof 1 +moonei 1 +risto 1 +miikkulainen 1 +bobbi 1 +bryant 1 +mari 1 +elain 1 +califf 1 +hermjakob 1 +marti 1 +mayberri 1 +rupert 1 +tang 1 +poon 1 +cindi 1 +thompson 1 +inform 1 +pleas 1 +contact 1 +group 1 +coordin 1 +utexa 1 +relat 1 +site 1 +associ 1 +linguist 1 +signll 1 +special 1 +interest 1 +print 1 +archiv 1 +machin 1 +research 1 +neural 1 +network 1 +ofth 1 +ataustinlast 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..63e97ecc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,70 @@ +utc 1 +neural 1 +net 1 +research 1 +groupth 1 +group 1 +supervis 1 +prof 1 +ristomiikkulainen 1 +part 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +ataustin 1 +concentr 1 +andcognit 1 +includ 1 +natur 1 +languag 1 +process 1 +schema 1 +basedvis 1 +cortic 1 +self 1 +organ 1 +episod 1 +memori 1 +decis 1 +make 1 +evolv 1 +network 1 +genet 1 +algorithm 1 +click 1 +mapbelow 1 +detail 1 +check 1 +hypertext 1 +book 1 +later 1 +interact 1 +thecortex 1 +structur 1 +function 1 +risto 1 +miikkulainen 1 +graduat 1 +student 1 +alumni 1 +visitor 1 +public 1 +demo 1 +poster 1 +softwar 1 +home 1 +page 1 +confer 1 +newsgroup 1 +archiv 1 +inform 1 +sourc 1 +gener 1 +tool 1 +privat 1 +linkswusagemartym 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..f4726fcf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,77 @@ +gordon 1 +novak 1 +support 1 +free 1 +speech 1 +internet 1 +associ 1 +professor 1 +ofcomput 1 +scienc 1 +atth 1 +univers 1 +texa 1 +austin 1 +director 1 +artifici 1 +intelligencelaboratori 1 +highest 1 +honor 1 +comput 1 +research 1 +automat 1 +program 1 +reus 1 +genericalgorithmssolv 1 +physic 1 +problem 1 +specifiedinformallyartifici 1 +intelligencecurriculum 1 +vita 1 +publicationsemploymentgrantsprofession 1 +activ 1 +honorscurriculum 1 +vitaefre 1 +softwar 1 +tmycin 1 +emycin 1 +like 1 +expert 1 +system 1 +shell 1 +interfac 1 +common 1 +lispconvers 1 +unit 1 +measurementsoftwar 1 +demo 1 +construct 1 +server 1 +creat 1 +lisp 1 +write 1 +connect 1 +diagram 1 +convers 1 +convert 1 +measur 1 +isaac 1 +solv 1 +state 1 +english 1 +interact 1 +graphic 1 +draw 1 +us 1 +class 1 +schemec 1 +compilersc 1 +intelligencec 1 +programmingweb 1 +linksweatheraddress 1 +ctai 1 +univ 1 +austinaustintexa 1 +offic 1 +faxnovak 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..04478f85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,51 @@ +meghan 1 +brienhi 1 +welcom 1 +home 1 +page 1 +pleas 1 +wipe 1 +feet 1 +enter 1 +mani 1 +complaint 1 +crappi 1 +laugh 1 +stock 1 +internet 1 +remov 1 +link 1 +chanc 1 +work 1 +thank 1 +sent 1 +insult 1 +panic 1 +still 1 +download 1 +gorgeou 1 +pictur 1 +blow 1 +poster 1 +size 1 +beauti 1 +queen 1 +date 1 +wait 1 +hear 1 +paul 1 +get 1 +marri 1 +august 1 +hope 1 +come 1 +except 1 +anyon 1 +want 1 +give 1 +feel 1 +free 1 +resum 1 +email 1 +obrien 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..6842ef6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,13 @@ +oguer 1 +gutierrezogu 1 +gutierrezth 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +texa 1 +austinprojectsomioswwhlinksconfer 1 +databas 1 +systemsth 1 +worldemail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..ca06f3c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,209 @@ +oop 1 +research 1 +groupoop 1 +groupthi 1 +home 1 +page 1 +group 1 +supervis 1 +prof 1 +paul 1 +wilson 1 +graduat 1 +studentsin 1 +stephen 1 +carl 1 +ajit 1 +georg 1 +mark 1 +johnston 1 +sheetal 1 +kakkad 1 +scott 1 +kaplan 1 +donovan 1 +kolbl 1 +michael 1 +neeli 1 +qing 1 +dougla 1 +wieren 1 +area 1 +memori 1 +hierarchi 1 +especi 1 +persist 1 +object 1 +store 1 +distribut 1 +virtual 1 +cach 1 +develop 1 +simpl 1 +high 1 +perform 1 +call 1 +texa 1 +us 1 +pointer 1 +swizzl 1 +fault 1 +time 1 +toimplement 1 +larg 1 +address 1 +space 1 +effici 1 +stock 1 +hardwar 1 +andoper 1 +system 1 +standard 1 +compil 1 +basic 1 +studi 1 +program 1 +behavior 1 +alloc 1 +whichattempt 1 +repair 1 +damag 1 +done 1 +three 1 +decad 1 +mostli 1 +unsoundstudi 1 +extens 1 +survei 1 +mike 1 +master 1 +thesi 1 +automat 1 +storag 1 +manag 1 +real 1 +generationaland 1 +small 1 +ongarbag 1 +collect 1 +paper 1 +local 1 +garbag 1 +collector 1 +recent 1 +hard 1 +written 1 +smart 1 +interfac 1 +orient 1 +scheme 1 +adapt 1 +managementfor 1 +file 1 +dynam 1 +andcompress 1 +structur 1 +checkpoint 1 +forfault 1 +toler 1 +travel 1 +debug 1 +implement 1 +highli 1 +portabl 1 +programmingsystem 1 +includ 1 +extend 1 +rscheme 1 +thread 1 +socket 1 +homepag 1 +info 1 +alpha 1 +releas 1 +sourc 1 +code 1 +cours 1 +noteson 1 +interpret 1 +rawascii 1 +text 1 +andrschemear 1 +also 1 +avail 1 +descript 1 +integr 1 +macro 1 +process 1 +algorithm 1 +tosupport 1 +languag 1 +open 1 +note 1 +thesiscontain 1 +refer 1 +coupl 1 +write 1 +whicharen 1 +draft 1 +form 1 +anywai 1 +sometimesoon 1 +onlin 1 +book 1 +progress 1 +introduct 1 +htmlformat 1 +brows 1 +contain 1 +materialfrom 1 +ascii 1 +much 1 +improv 1 +expandedpresent 1 +texinfo 1 +materiali 1 +work 1 +intro 1 +metaobject 1 +besid 1 +good 1 +gener 1 +providesa 1 +thing 1 +like 1 +make 1 +backgroundread 1 +list 1 +brief 1 +bibliographi 1 +heap 1 +fortexa 1 +anonym 1 +utexa 1 +readm 1 +materi 1 +subdirectori 1 +whichcontain 1 +oopsla 1 +workshop 1 +peopl 1 +interest 1 +henri 1 +baker 1 +sftp 1 +site 1 +although 1 +overload 1 +notb 1 +access 1 +keep 1 +try 1 +anoth 1 +great 1 +han 1 +boehm 1 +sever 1 +well 1 +free 1 +severalgarbag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..3d5d90ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,11 @@ +robert 1 +otuomagieaddress 1 +austin 1 +phone 1 +emailotu 1 +utexa 1 +eduuniververs 1 +depart 1 +infouniversityth 1 +univers 1 +txa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..d40715c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,74 @@ +padgettdon 1 +padgett 1 +dissert 1 +researchi 1 +work 1 +professor 1 +brown 1 +design 1 +andimplement 1 +devic 1 +control 1 +softwar 1 +driver 1 +investig 1 +creation 1 +domain 1 +specif 1 +program 1 +environ 1 +construct 1 +class 1 +focu 1 +thu 1 +languag 1 +compil 1 +technolog 1 +devis 1 +prototyp 1 +specifi 1 +softar 1 +call 1 +virtual 1 +contain 1 +variou 1 +featur 1 +reduc 1 +effort 1 +requir 1 +refer 1 +manualfor 1 +current 1 +postscript 1 +draft 1 +manual 1 +avail 1 +view 1 +exampl 1 +also 1 +counter 1 +compon 1 +follow 1 +file 1 +physic 1 +interfac 1 +implement 1 +export 1 +multifunct 1 +transpar 1 +us 1 +recent 1 +present 1 +creat 1 +microsoft 1 +powerpointvers 1 +window 1 +contact 1 +meemail 1 +utexa 1 +edupost 1 +depart 1 +comput 1 +scienc 1 +austin 1 +usafax 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..f29498be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,90 @@ +home 1 +page 1 +philip 1 +hardinphilip 1 +hardinabout 1 +plan 1 +elimin 1 +bug 1 +softwar 1 +everywher 1 +fail 1 +fallback 1 +write 1 +game 1 +battlebal 1 +access 1 +binari 1 +execut 1 +file 1 +oper 1 +system 1 +unfortun 1 +avail 1 +want 1 +port 1 +suno 1 +solari 1 +work 1 +go 1 +school 1 +time 1 +multiplay 1 +wrote 1 +runsund 1 +window 1 +line 1 +code 1 +us 1 +standard 1 +templat 1 +librari 1 +anda 1 +campbel 1 +space 1 +partit 1 +tree 1 +imag 1 +screenshot 1 +student 1 +univers 1 +texa 1 +austin 1 +guess 1 +comput 1 +scienc 1 +depart 1 +interest 1 +research 1 +area 1 +geometr 1 +model 1 +graphic 1 +realli 1 +cool 1 +main 1 +graphicssoftwar 1 +reus 1 +engin 1 +gener 1 +group 1 +automat 1 +programmingto 1 +contact 1 +meemail 1 +pahardin 1 +utexa 1 +edupost 1 +usanetrek 1 +server 1 +pita 1 +curli 1 +handl 1 +digitaldisast 1 +look 1 +get 1 +plaster 1 +congradul 1 +smartest 1 +person 1 +inth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..95ade896 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,82 @@ +paul 1 +mcquestenpaul 1 +mcquestenphd 1 +studentdepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +interest 1 +interact 1 +learn 1 +evolut 1 +think 1 +mechan 1 +natur 1 +might 1 +bepract 1 +addit 1 +current 1 +techniqu 1 +exampl 1 +death 1 +usual 1 +studi 1 +explicitli 1 +email 1 +paulmcq 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +phone 1 +postal 1 +address 1 +austindepart 1 +spring 1 +head 1 +forcsp 1 +introduct 1 +pascal 1 +programmingmor 1 +neuro 1 +inmoriarti 1 +research 1 +link 1 +atcnr 1 +rome 1 +neural 1 +network 1 +group 1 +artifici 1 +intellig 1 +cours 1 +schedul 1 +depart 1 +handi 1 +access 1 +tout 1 +librari 1 +onlin 1 +seriou 1 +reflect 1 +dave 1 +winer 1 +websit 1 +need 1 +pointer 1 +wast 1 +hour 1 +surf 1 +check 1 +cynb 1 +humong 1 +hotlist 1 +mix 1 +knowledg 1 +knick 1 +knack 1 +nut 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..de575291 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,32 @@ +pawan 1 +goyal 1 +home 1 +page 1 +research 1 +summari 1 +public 1 +multimedia 1 +network 1 +protocol 1 +file 1 +system 1 +oper 1 +affili 1 +group 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +get 1 +touch 1 +email 1 +pawang 1 +utexa 1 +inform 1 +finger 1 +also 1 +check 1 +log 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..ed03f8f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,115 @@ +jose 1 +pecina 1 +pecinaabout 1 +obtain 1 +physic 1 +univers 1 +texa 1 +austin 1 +previous 1 +complet 1 +master 1 +innuclear 1 +engin 1 +current 1 +finish 1 +thesi 1 +comput 1 +scienc 1 +depart 1 +graduat 1 +studi 1 +workedinvestig 1 +gaug 1 +theori 1 +graviti 1 +base 1 +group 1 +quantiz 1 +gravit 1 +field 1 +calcul 1 +invari 1 +unitari 1 +irreduc 1 +represent 1 +publish 1 +three 1 +joint 1 +paper 1 +dissert 1 +advisor 1 +yuval 1 +eman 1 +georg 1 +sudarshan 1 +jurgen 1 +lemk 1 +fromcologn 1 +germani 1 +previou 1 +posit 1 +bureau 1 +econom 1 +geologi 1 +spent 1 +year 1 +half 1 +work 1 +seismic 1 +invers 1 +tomographi 1 +supervisor 1 +hardag 1 +editor 1 +geophys 1 +journal 1 +societi 1 +explor 1 +geophysicist 1 +research 1 +interest 1 +algorithm 1 +numer 1 +analysi 1 +parallel 1 +cryptographi 1 +quantum 1 +also 1 +visitor 1 +thephys 1 +carnegi 1 +mellon 1 +pittsburgh 1 +open 1 +compani 1 +scientif 1 +softwar 1 +develop 1 +fill 1 +comerci 1 +symmetri 1 +algebra 1 +sequenti 1 +solutionsin 1 +gener 1 +rel 1 +problem 1 +chromodynamicsmi 1 +curriculum 1 +vita 1 +click 1 +want 1 +print 1 +contact 1 +center 1 +particl 1 +usavoic 1 +main 1 +offic 1 +home 1 +mail 1 +utexa 1 +orpecina 1 +defo 1 +phy 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..1111f960 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,56 @@ +nettl 1 +welcom 1 +unix 1 +cornerinfolik 1 +anyon 1 +would 1 +want 1 +know 1 +make 1 +realli 1 +short 1 +blahblah 1 +system 1 +woof 1 +window 1 +blah 1 +stuff 1 +eeek 1 +staff 1 +escap 1 +floor 1 +mean 1 +ibm 1 +file 1 +afteri 1 +instal 1 +stori 1 +true 1 +name 1 +chang 1 +protect 1 +theinnoc 1 +experiment 1 +work 1 +pleas 1 +think 1 +us 1 +neat 1 +page 1 +cool 1 +comic 1 +buena 1 +vista 1 +movieplex 1 +find 1 +meyour 1 +chanc 1 +increas 1 +drastic 1 +could 1 +employan 1 +improb 1 +drive 1 +send 1 +mail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..71e05f36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,25 @@ +greg 1 +plaxtongreg 1 +plaxtoncontact 1 +informationemail 1 +plaxton 1 +utexa 1 +eduphon 1 +offic 1 +taylor 1 +hall 1 +postal 1 +depart 1 +comput 1 +sciencetaylor 1 +univers 1 +texa 1 +austinaustin 1 +inform 1 +annual 1 +report 1 +profilepubl 1 +last 1 +modifi 1 +decemb 1 +plaxtonplaxton 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..9e6ef6c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,85 @@ +bruce 1 +porter 1 +porterassoci 1 +professor 1 +faculti 1 +fellow 1 +comput 1 +scienc 1 +univers 1 +california 1 +irvin 1 +honor 1 +award 1 +profession 1 +servic 1 +presidenti 1 +young 1 +investig 1 +editor 1 +machin 1 +learn 1 +presentarea 1 +interestartifici 1 +intellig 1 +knowledg 1 +base 1 +system 1 +summari 1 +researchhead 1 +research 1 +group 1 +develop 1 +method 1 +build 1 +larg 1 +basesand 1 +us 1 +solv 1 +problem 1 +answer 1 +question 1 +researchinterest 1 +case 1 +select 1 +recent 1 +public 1 +rickel 1 +autom 1 +model 1 +predict 1 +thetim 1 +scale 1 +boundari 1 +aaai 1 +cambridg 1 +aait 1 +press 1 +abstract 1 +andpostscript 1 +brant 1 +rule 1 +preced 1 +complementari 1 +warrant 1 +bareiss 1 +holt 1 +concept 1 +heurist 1 +classif 1 +weak 1 +theori 1 +domain 1 +artifici 1 +journal 1 +abstractand 1 +postscript 1 +hotlist 1 +search 1 +site 1 +page 1 +email 1 +address 1 +tech 1 +reportport 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..e1ad7ba9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,197 @@ +group 1 +austinpsp 1 +austinthi 1 +home 1 +page 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +ataustin 1 +stand 1 +program 1 +specif 1 +proof 1 +emphasi 1 +work 1 +deriveparallel 1 +distribut 1 +rigor 1 +manner 1 +issupervis 1 +jayadev 1 +misra 1 +develop 1 +theori 1 +research 1 +area 1 +uniti 1 +powerlist 1 +seuss 1 +current 1 +former 1 +member 1 +groupinclud 1 +adam 1 +carruth 1 +erni 1 +cohen 1 +graduat 1 +rajeev 1 +joshi 1 +marku 1 +kaltenbach 1 +edgar 1 +knapp 1 +jacob 1 +kornerup 1 +ingolf 1 +krger 1 +josyula 1 +mark 1 +staskauska 1 +publicationsbelow 1 +summar 1 +wherev 1 +possibl 1 +give 1 +link 1 +topap 1 +avail 1 +electron 1 +notat 1 +logic 1 +reason 1 +parallel 1 +anddistribut 1 +present 1 +book 1 +chandi 1 +design 1 +foundat 1 +addison 1 +weslei 1 +note 1 +seri 1 +ofpap 1 +variou 1 +result 1 +applic 1 +thenot 1 +assum 1 +basic 1 +understand 1 +inchandi 1 +sinc 1 +public 1 +sever 1 +improv 1 +made 1 +inth 1 +reflect 1 +written 1 +amanuscript 1 +newun 1 +includ 1 +introduct 1 +tempor 1 +operatorco 1 +specifi 1 +safeti 1 +refer 1 +forrefer 1 +paper 1 +implement 1 +write 1 +asymbol 1 +model 1 +checker 1 +forfinit 1 +state 1 +call 1 +unityverifi 1 +extend 1 +toinclud 1 +real 1 +time 1 +aspect 1 +hybrid 1 +system 1 +synchron 1 +circuit 1 +data 1 +structur 1 +list 1 +length 1 +equal 1 +power 1 +twodiffer 1 +oper 1 +balanc 1 +divis 1 +mani 1 +parallelalgorithm 1 +succinct 1 +simpl 1 +thepowerlist 1 +recurs 1 +givesnumer 1 +exampl 1 +algorithm 1 +correct 1 +fast 1 +fourier 1 +transform 1 +batcher 1 +sort 1 +network 1 +studi 1 +differ 1 +arithmet 1 +asadd 1 +multipli 1 +prove 1 +verifi 1 +addercircuit 1 +us 1 +programscan 1 +map 1 +effici 1 +architectur 1 +speciallyhypercub 1 +detail 1 +offspr 1 +address 1 +issu 1 +ofprogram 1 +composit 1 +restrict 1 +compon 1 +caninterfer 1 +read 1 +overview 1 +chapter 1 +froma 1 +monograph 1 +adisciplin 1 +multiprogram 1 +alsoavail 1 +compil 1 +genrat 1 +code 1 +callsfor 1 +messag 1 +commun 1 +describ 1 +thesi 1 +anexperi 1 +concurr 1 +object 1 +basedprogram 1 +languag 1 +ingolfkrg 1 +site 1 +found 1 +thepsp 1 +sitejacob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..74785440 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,71 @@ +qime 1 +huang 1 +depart 1 +comput 1 +sciencesunivers 1 +texa 1 +austin 1 +phone 1 +email 1 +utexa 1 +edureceiv 1 +scienc 1 +nankai 1 +univers 1 +tianjin 1 +inform 1 +univeris 1 +hawaii 1 +manoa 1 +hawaiiwork 1 +austincours 1 +spring 1 +advanc 1 +telecommun 1 +client 1 +server 1 +system 1 +develop 1 +appli 1 +data 1 +commun 1 +cours 1 +academ 1 +resourc 1 +teamweb 1 +center 1 +utcssadaili 1 +texanstock 1 +room 1 +attract 1 +picturesimageschines 1 +popsend 1 +card 1 +electr 1 +postcard 1 +line 1 +job 1 +jobtrakut 1 +placement 1 +connect 1 +gopherftp 1 +csc 1 +newstelnet 1 +cschen 1 +junk 1 +staffyour 1 +person 1 +visit 1 +page 1 +pleas 1 +sign 1 +guest 1 +book 1 +commentsguest 1 +name 1 +comment 1 +construct 1 +last 1 +modifi 1 +march 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..d0df055c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,63 @@ +qualit 1 +reason 1 +research 1 +utexasqualit 1 +utexasth 1 +group 1 +sever 1 +area 1 +thephys 1 +world 1 +qsim 1 +index 1 +bibliographi 1 +ourresearch 1 +paper 1 +user 1 +applic 1 +like 1 +system 1 +spatial 1 +intelligentrobot 1 +tour 1 +access 1 +limit 1 +logic 1 +knowledgerepresent 1 +algernon 1 +supervis 1 +professor 1 +benjaminkuip 1 +kuiper 1 +utexa 1 +part 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +atth 1 +univers 1 +texa 1 +ataustin 1 +pointer 1 +book 1 +graduat 1 +student 1 +robot 1 +knowledg 1 +represent 1 +alumni 1 +includ 1 +dissert 1 +visitor 1 +abstract 1 +yellow 1 +page 1 +softwar 1 +easili 1 +areadescript 1 +also 1 +visit 1 +directori 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..a79f90d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,62 @@ +qiang 1 +john 1 +home 1 +page 1 +welcom 1 +pleas 1 +feel 1 +free 1 +brows 1 +around 1 +leav 1 +comment 1 +suggest 1 +joke 1 +thank 1 +visit 1 +come 1 +seed 1 +scrollit_rl 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +sinc 1 +com 1 +austin 1 +time 1 +self 1 +introduct 1 +current 1 +master 1 +program 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +click 1 +inform 1 +seriousjunk 1 +cours 1 +languag 1 +unix 1 +graphic 1 +linux 1 +technic 1 +java 1 +realjunk 1 +sport 1 +game 1 +new 1 +struggleforliv 1 +institut 1 +qzuo 1 +utexa 1 +guestbook 1 +still 1 +underconstruct 1 +back 1 +last 1 +modif 1 +copyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..37a1eac6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,57 @@ +robert 1 +blumoferobert 1 +blumofei 1 +gener 1 +bobbi 1 +last 1 +name 1 +pronounc 1 +bloom 1 +informationassist 1 +professor 1 +ofcomput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +comput 1 +massachusett 1 +institut 1 +technolog 1 +brown 1 +work 1 +cilkmultithread 1 +languag 1 +runtim 1 +system 1 +laboratori 1 +experiment 1 +softwar 1 +less 1 +compil 1 +list 1 +paper 1 +document 1 +also 1 +avail 1 +directori 1 +semest 1 +spring 1 +teach 1 +abstract 1 +data 1 +type 1 +contact 1 +informationemail 1 +utexa 1 +eduphon 1 +offic 1 +taylor 1 +hallpost 1 +depart 1 +sciencestaylor 1 +hall 1 +austinaustin 1 +modifi 1 +decemb 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..e6fcbbdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,41 @@ +wang 1 +homepag 1 +wangwelcom 1 +current 1 +construct 1 +page 1 +content 1 +time 1 +permit 1 +candid 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +texa 1 +austin 1 +research 1 +interest 1 +includ 1 +real 1 +system 1 +rule 1 +base 1 +program 1 +analysi 1 +softwar 1 +engin 1 +artifici 1 +intellig 1 +publicationsi 1 +list 1 +public 1 +avail 1 +brows 1 +last 1 +updat 1 +pleas 1 +send 1 +comment 1 +rhwang 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..f08fd6c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,77 @@ +risto 1 +miikkulainenristo 1 +miikkulainenassoci 1 +professor 1 +computersci 1 +univers 1 +oftexa 1 +austin 1 +comput 1 +scienc 1 +ucla 1 +appli 1 +mathemat 1 +helsinki 1 +technolog 1 +research 1 +intereststh 1 +group 1 +concentr 1 +model 1 +cognit 1 +processeswith 1 +artifici 1 +neural 1 +network 1 +current 1 +work 1 +includ 1 +languageacquisit 1 +episod 1 +memori 1 +self 1 +organ 1 +visual 1 +cortex 1 +schema 1 +base 1 +vision 1 +also 1 +evolv 1 +networkswith 1 +genet 1 +algorithm 1 +goal 1 +automat 1 +discoversequenti 1 +decis 1 +strategi 1 +problem 1 +solv 1 +robot 1 +detail 1 +utc 1 +home 1 +page 1 +classessumm 1 +intellig 1 +undergradu 1 +lectur 1 +cours 1 +fall 1 +graduat 1 +seminar 1 +spring 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +utexa 1 +phone 1 +postal 1 +depart 1 +texa 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..2cff8874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,2 @@ +rong 1 +bigfoot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..068ece1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,52 @@ +rajmohan 1 +rajaraman 1 +home 1 +pagerajmohan 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +atth 1 +univers 1 +texa 1 +ataustin 1 +plan 1 +complet 1 +spring 1 +mydissert 1 +supervisor 1 +gregplaxton 1 +research 1 +member 1 +algorithm 1 +andcomput 1 +theori 1 +group 1 +particularli 1 +interest 1 +incombinator 1 +distribut 1 +network 1 +onlin 1 +parallel 1 +model 1 +random 1 +list 1 +mypubl 1 +curriculum 1 +vita 1 +us 1 +link 1 +relat 1 +sciencemiscellan 1 +linkscontact 1 +inform 1 +email 1 +rraj 1 +utexa 1 +austin 1 +phone 1 +offic 1 +ephon 1 +postal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..3dc760ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,86 @@ +rong 1 +homepagea 1 +homepag 1 +chinachina 1 +jinan 1 +myhometown 1 +capit 1 +shandong 1 +provinc 1 +tsinghua 1 +univers 1 +gotmi 1 +depart 1 +computersci 1 +technolog 1 +beij 1 +ofth 1 +peopl 1 +republ 1 +ofchina 1 +spent 1 +five 1 +colleg 1 +year 1 +zhai 1 +sinanet 1 +austin 1 +texasaustin 1 +citi 1 +live 1 +weather 1 +apart 1 +rent 1 +graduat 1 +utcsth 1 +texa 1 +comput 1 +scienc 1 +utnetcat 1 +browsabl 1 +onlin 1 +catalog 1 +librari 1 +utaccess 1 +austininform 1 +servic 1 +login 1 +utcat 1 +cours 1 +fall 1 +oper 1 +systemsdynam 1 +file 1 +replic 1 +final 1 +project 1 +graphicsc 1 +introduct 1 +mathemat 1 +logicc 1 +moviesaustin 1 +chronicl 1 +film 1 +time 1 +yahoo 1 +entertain 1 +movi 1 +filmsmicrosoft 1 +cinemania 1 +onlineal 1 +guidehollywood 1 +onlineinternet 1 +databaserog 1 +ebert 1 +moviesvisit 1 +page 1 +contactmail 1 +address 1 +river 1 +aaustin 1 +telephon 1 +emailrtan 1 +utexa 1 +fingerclick 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..7be5e770 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,66 @@ +rupert 1 +tang 1 +home 1 +page 1 +student 1 +dept 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +almost 1 +miracl 1 +modern 1 +teach 1 +method 1 +entir 1 +strangl 1 +holi 1 +curious 1 +inquiri 1 +delic 1 +littl 1 +plant 1 +need 1 +anyth 1 +besid 1 +stimul 1 +freedom 1 +think 1 +life 1 +would 1 +realli 1 +empti 1 +depriv 1 +opportun 1 +alwai 1 +choos 1 +altern 1 +distast 1 +deni 1 +actual 1 +wish 1 +aspir 1 +fear 1 +duress 1 +fate 1 +much 1 +differ 1 +truck 1 +wash 1 +machin 1 +nice 1 +meet 1 +complet 1 +cool 1 +servic 1 +know 1 +academ 1 +interest 1 +research 1 +messi 1 +area 1 +construct 1 +utexa 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..82d0b0ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,16 @@ +ruwei 1 +homepag 1 +frame 1 +alert 1 +see 1 +messag 1 +us 1 +browser 1 +support 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..c1c63b7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,87 @@ +robert 1 +geijn 1 +geijnassoci 1 +professor 1 +depart 1 +comput 1 +scienc 1 +texa 1 +institut 1 +appliedmathemat 1 +univers 1 +austin 1 +phone 1 +mail 1 +rvdg 1 +utexa 1 +http 1 +user 1 +mathemat 1 +wisconsin 1 +madison 1 +appli 1 +maryland 1 +colleg 1 +park 1 +area 1 +interestnumer 1 +analysi 1 +parallel 1 +supercomput 1 +scientif 1 +summari 1 +researchth 1 +introduct 1 +forc 1 +evalu 1 +oftradit 1 +numer 1 +method 1 +develop 1 +sequentialmachin 1 +case 1 +techniqu 1 +continu 1 +us 1 +inoth 1 +prove 1 +perform 1 +better 1 +researchconcentr 1 +forimpl 1 +well 1 +environ 1 +allowssuch 1 +easili 1 +implement 1 +variou 1 +parallelprocessor 1 +inform 1 +graduat 1 +program 1 +workshop 1 +infrastructur 1 +applic 1 +april 1 +current 1 +project 1 +intercom 1 +plapack 1 +sl_librari 1 +public 1 +book 1 +journal 1 +confer 1 +technic 1 +report 1 +tutori 1 +major 1 +softwar 1 +effort 1 +class 1 +fall 1 +schedul 1 +former 1 +student 1 +meet 1 +famili 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..e568dcd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,40 @@ +sundeep 1 +abraham 1 +home 1 +page 1 +seed 1 +scrollit_rl 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +sundeepabraham 1 +master 1 +student 1 +computersci 1 +dept 1 +universityof 1 +texa 1 +austin 1 +undergradu 1 +studi 1 +comput 1 +sciencesand 1 +engin 1 +region 1 +engg 1 +colleg 1 +calicut 1 +india 1 +countri 1 +hail 1 +state 1 +kerala 1 +know 1 +contact 1 +click 1 +construct 1 +tinkerwith 1 +time 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..1692b6bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,135 @@ +sammi 1 +altavista 1 +startingpoint 1 +yahoo 1 +lyco 1 +map 1 +refer 1 +weather 1 +white 1 +page 1 +yellow 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +utexa 1 +congress 1 +shall 1 +make 1 +respect 1 +establish 1 +religion 1 +orprohibit 1 +free 1 +exercis 1 +thereof 1 +abridg 1 +freedom 1 +ofspeech 1 +press 1 +right 1 +peopl 1 +peaceabl 1 +toassembl 1 +petit 1 +govern 1 +redress 1 +grievanc 1 +constitut 1 +unit 1 +state 1 +america 1 +research 1 +prof 1 +calvin 1 +robert 1 +geijn 1 +project 1 +parallel 1 +linear 1 +algebra 1 +packag 1 +schedul 1 +artifici 1 +intellig 1 +topic 1 +distribut 1 +languag 1 +compil 1 +advanc 1 +oper 1 +system 1 +experi 1 +world 1 +wide 1 +herbarium 1 +plapack 1 +sign 1 +hypertext 1 +code 1 +anagram 1 +server 1 +librari 1 +friend 1 +nil 1 +virtual 1 +reker 1 +yanni 1 +musician 1 +jeff 1 +hockei 1 +andrea 1 +hamilton 1 +technolog 1 +famili 1 +pop 1 +guyer 1 +public 1 +health 1 +northwestern 1 +anthropolog 1 +kate 1 +nate 1 +activ 1 +swim 1 +link 1 +entertain 1 +showbiz 1 +chronicl 1 +pollstar 1 +concert 1 +databas 1 +html 1 +quick 1 +guid 1 +dell 1 +fring 1 +ryder 1 +laptop 1 +info 1 +consortium 1 +miscellan 1 +boston 1 +hotlist 1 +list 1 +traveloc 1 +construct 1 +offic 1 +address 1 +taylor 1 +hall 1 +home 1 +great 1 +hill 1 +eduth 1 +opinion 1 +express 1 +mine 1 +necessarili 1 +repres 1 +view 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..7b1ac140 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,50 @@ +sawadajun 1 +sawadacontact 1 +inform 1 +email 1 +address 1 +sawada 1 +utexa 1 +offic 1 +main 1 +mail 1 +univ 1 +texa 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +austin 1 +home 1 +wooten 1 +dissert 1 +oral 1 +propos 1 +time 1 +place 1 +abstract 1 +paper 1 +file 1 +supplementari 1 +technic 1 +report 1 +kbresourc 1 +common 1 +lisp 1 +languag 1 +edit 1 +bowen 1 +formal 1 +method 1 +page 1 +around 1 +world 1 +pvsother 1 +frequent 1 +access 1 +teacher 1 +fellow 1 +logic 1 +boyer 1 +class 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..3d8c7277 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,169 @@ +ssgrg 1 +titl 1 +pagewelcom 1 +home 1 +page 1 +softwar 1 +system 1 +gener 1 +research 1 +group 1 +tool 1 +assembl 1 +complex 1 +interchang 1 +reusabl 1 +compon 1 +develop 1 +genvoca 1 +domain 1 +independ 1 +model 1 +construct 1 +defin 1 +algebra 1 +equat 1 +term 1 +successfulli 1 +appli 1 +mani 1 +includ 1 +databas 1 +manag 1 +avion 1 +data 1 +structur 1 +result 1 +demonstr 1 +substanti 1 +improv 1 +product 1 +applic 1 +time 1 +perform 1 +first 1 +visit 1 +question 1 +best 1 +place 1 +start 1 +take 1 +look 1 +get 1 +member 1 +project 1 +index 1 +public 1 +distribut 1 +relat 1 +utc 1 +batori 1 +professorangela 1 +dappert 1 +studentguillermo 1 +jimenez 1 +perezph 1 +studentjeff 1 +thomasph 1 +studentl 1 +tokuda 1 +studentyanni 1 +smaragdaki 1 +studentk 1 +shepherdresearch 1 +associateform 1 +graduat 1 +datesdinesh 1 +dasph 1 +milli 1 +villarrealph 1 +decemb 1 +bart 1 +geracipostdoc 1 +marti 1 +sirkinph 1 +march 1 +sankar 1 +dasarim 1 +overview 1 +us 1 +build 1 +typic 1 +modul 1 +encapsul 1 +featur 1 +share 1 +possibl 1 +must 1 +refin 1 +differ 1 +part 1 +class 1 +requir 1 +manipul 1 +metadata 1 +reflect 1 +comput 1 +thu 1 +like 1 +basic 1 +approach 1 +goe 1 +beyond 1 +simpl 1 +object 1 +orient 1 +larg 1 +scale 1 +program 1 +transform 1 +feel 1 +issu 1 +involv 1 +breadth 1 +recommend 1 +follow 1 +paper 1 +starter 1 +read 1 +order 1 +scalabl 1 +librari 1 +creat 1 +refer 1 +architectur 1 +design 1 +implement 1 +composit 1 +valid 1 +subject 1 +specif 1 +deliv 1 +relationship 1 +work 1 +pattern 1 +check 1 +reengin 1 +lightweight 1 +dbm 1 +memori 1 +simul 1 +generatorsautom 1 +evolut 1 +inform 1 +pleas 1 +contact 1 +utexa 1 +period 1 +releas 1 +lectur 1 +note 1 +tutori 1 +reus 1 +avail 1 +file 1 +contain 1 +compress 1 +postscript 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..80e3530d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,142 @@ +scott 1 +realli 1 +basic 1 +home 1 +pagescott 1 +pageokai 1 +long 1 +overdu 1 +slight 1 +updat 1 +page 1 +doesn 1 +mean 1 +go 1 +look 1 +fanci 1 +expect 1 +littl 1 +least 1 +current 1 +interest 1 +inform 1 +work 1 +stuff 1 +univers 1 +texa 1 +austin 1 +depart 1 +finish 1 +year 1 +try 1 +think 1 +mani 1 +oop 1 +group 1 +research 1 +within 1 +utc 1 +system 1 +languag 1 +class 1 +object 1 +orient 1 +design 1 +instructor 1 +glenn 1 +down 1 +amherst 1 +colleg 1 +great 1 +place 1 +spend 1 +undergradu 1 +dai 1 +time 1 +spent 1 +unpredict 1 +santa 1 +institut 1 +amaz 1 +summer 1 +differ 1 +kind 1 +hobbi 1 +stuffit 1 +perfect 1 +linux 1 +invalu 1 +squash 1 +pageth 1 +mead 1 +like 1 +wine 1 +beer 1 +psion 1 +maker 1 +cool 1 +palmtop 1 +without 1 +would 1 +forget 1 +name 1 +peopl 1 +anastasi 1 +want 1 +check 1 +well 1 +sharp 1 +much 1 +free 1 +neat 1 +miscellan 1 +item 1 +particular 1 +order 1 +dine 1 +guid 1 +actual 1 +import 1 +part 1 +citizen 1 +poke 1 +good 1 +humor 1 +publish 1 +appl 1 +comput 1 +still 1 +thing 1 +right 1 +iici 1 +last 1 +forev 1 +ala 1 +longer 1 +sure 1 +bright 1 +futur 1 +type 1 +machin 1 +bebox 1 +could 1 +simpl 1 +better 1 +noth 1 +probabl 1 +didn 1 +wait 1 +five 1 +minut 1 +load 1 +send 1 +email 1 +maintain 1 +sfkaplan 1 +utexa 1 +might 1 +grab 1 +includ 1 +link 1 +encrypt 1 +gener 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..9c94df7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,34 @@ +prashant 1 +shenoi 1 +home 1 +page 1 +welcom 1 +get 1 +touch 1 +email 1 +utexa 1 +river 1 +austin 1 +univers 1 +texa 1 +depart 1 +comput 1 +scienc 1 +offic 1 +main 1 +tower 1 +floor 1 +build 1 +inform 1 +finger 1 +also 1 +check 1 +log 1 +multimedia 1 +group 1 +list 1 +recent 1 +public 1 +avail 1 +onlin 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..bc2eb7e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,35 @@ +shaob 1 +cyberhom 1 +welcom 1 +current 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +want 1 +know 1 +check 1 +ongo 1 +work 1 +hardvar 1 +verifc 1 +network 1 +vlsi 1 +final 1 +project 1 +fall 1 +bookshelf 1 +coffe 1 +tabl 1 +campu 1 +citi 1 +make 1 +contact 1 +pleasant 1 +vallei 1 +shma 1 +utexa 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..8ea91868 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,93 @@ +shailesh 1 +kumarshailesh 1 +kumarth 1 +univers 1 +texa 1 +austindepart 1 +comput 1 +sciencestaylor 1 +hall 1 +austin 1 +skumar 1 +utexa 1 +edumi 1 +resumeresearch 1 +interest 1 +publicationscontact 1 +mesrcm 1 +spiritu 1 +affili 1 +internet 1 +offersom 1 +linkscognit 1 +scienceutc 1 +neural 1 +network 1 +researchutc 1 +machin 1 +learn 1 +research 1 +groupresearch 1 +artifici 1 +intellig 1 +life 1 +neuroevolut 1 +applic 1 +genet 1 +algorithm 1 +cellular 1 +automata 1 +chao 1 +nonlinear 1 +dynam 1 +fuzzi 1 +logic 1 +massiv 1 +parallel 1 +processor 1 +publicationson 1 +line 1 +adapt 1 +signal 1 +predistort 1 +dual 1 +reinforc 1 +page 1 +patrick 1 +goetz 1 +kumar 1 +risto 1 +miikkulainen 1 +appli 1 +mathemat 1 +depart 1 +scienc 1 +proceed 1 +annual 1 +confer 1 +bari 1 +itali 1 +object 1 +base 1 +evolut 1 +program 1 +bord 1 +singh 1 +symposium 1 +aprl 1 +india 1 +contact 1 +snail 1 +mail 1 +whiti 1 +avenu 1 +phone 1 +home 1 +offic 1 +offernet 1 +assistancesearch 1 +tool 1 +qualiti 1 +institutewww 1 +infoindia 1 +music 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..747ff082 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,47 @@ +smaragdaki 1 +yanni 1 +utc 1 +posit 1 +graduat 1 +student 1 +interest 1 +alchemi 1 +project 1 +turn 1 +lead 1 +gold 1 +moder 1 +success 1 +plan 1 +scheme 1 +make 1 +arrang 1 +someth 1 +program 1 +webster 1 +world 1 +dictionari 1 +serious 1 +depart 1 +comput 1 +scienc 1 +main 1 +research 1 +area 1 +meta 1 +system 1 +applic 1 +particularli 1 +softwar 1 +gener 1 +photo 1 +album 1 +favorit 1 +sitessmaragd 1 +utexa 1 +eduyanni 1 +smaragdakisunivers 1 +texa 1 +austin 1 +departmenttai 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..0e9cd60e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,58 @@ +homepag 1 +edward 1 +danger 1 +construct 1 +site 1 +fall 1 +asphalt 1 +bodi 1 +resum 1 +tell 1 +like 1 +danc 1 +shadow 1 +moonlight 1 +click 1 +email 1 +utexa 1 +person 1 +data 1 +strictli 1 +need 1 +know 1 +basi 1 +pleas 1 +send 1 +effort 1 +duli 1 +note 1 +pictur 1 +interest 1 +link 1 +greatest 1 +experi 1 +stimul 1 +nerv 1 +center 1 +wouldn 1 +mean 1 +absolut 1 +ever 1 +dy 1 +ignor 1 +previou 1 +minion 1 +recent 1 +addit 1 +field 1 +trip 1 +pania 1 +haiku 1 +leaf 1 +afloat 1 +wind 1 +stream 1 +eddi 1 +waterfal 1 +life 1 +visitor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..75ac8ac8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,29 @@ +southerart 1 +southerresearchbuild 1 +member 1 +knowledg 1 +base 1 +system 1 +research 1 +group 1 +contact 1 +inform 1 +email 1 +souther 1 +utexa 1 +work 1 +mail 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +austin 1 +hotlist 1 +search 1 +site 1 +page 1 +address 1 +public 1 +tech 1 +reportsouth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..2ca478de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,60 @@ +sowmya 1 +ramachandransowmya 1 +ramachandranmachin 1 +learn 1 +research 1 +groupunivers 1 +texa 1 +austinresearchmi 1 +area 1 +machin 1 +field 1 +ofartif 1 +intellig 1 +interest 1 +problem 1 +learningbayesian 1 +network 1 +exampl 1 +bayesian 1 +withhidden 1 +variabl 1 +challeng 1 +approach 1 +appli 1 +symbol 1 +connectionist 1 +theori 1 +revis 1 +techniqu 1 +address 1 +thisproblem 1 +also 1 +design 1 +creat 1 +multimediaappl 1 +resum 1 +list 1 +paper 1 +educ 1 +comput 1 +scienc 1 +rutger 1 +univers 1 +tech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +india 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +utexa 1 +postal 1 +austin 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..18207bdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,52 @@ +sriram 1 +home 1 +page 1 +raocurr 1 +research 1 +involv 1 +design 1 +implement 1 +multimedia 1 +file 1 +systemoper 1 +system 1 +support 1 +multimediai 1 +work 1 +multimediagroup 1 +comput 1 +sciencesdepart 1 +univers 1 +texa 1 +austin 1 +advisor 1 +prof 1 +harrickvinpublicationsminegroupcontact 1 +informationofficetai 1 +email 1 +utexa 1 +edudepart 1 +scienc 1 +austinaustin 1 +miscellaneousotherinterest 1 +pagespicturesof 1 +toweraustin 1 +isth 1 +capit 1 +locat 1 +central 1 +hill 1 +countri 1 +click 1 +herefor 1 +inform 1 +kannada 1 +koota 1 +informationabout 1 +tamil 1 +sangam 1 +comment 1 +pleas 1 +free 1 +send 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..5416459d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,15 @@ +tiger 1 +frame 1 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..0da49b57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,50 @@ +sunghe 1 +choisunghe 1 +choiwelcom 1 +home 1 +page 1 +construct 1 +master 1 +program 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +educ 1 +engin 1 +seoul 1 +nation 1 +korea 1 +august 1 +work 1 +experi 1 +present 1 +system 1 +administr 1 +chemic 1 +graduat 1 +research 1 +assist 1 +prof 1 +aloysiu 1 +utc 1 +real 1 +time 1 +group 1 +contact 1 +inform 1 +nuec 1 +click 1 +list 1 +machin 1 +current 1 +log 1 +finger 1 +author 1 +choiemail 1 +utexa 1 +edulast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..30e8be52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,70 @@ +home 1 +page 1 +sheetal 1 +kakkadsheet 1 +kakkadcontact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +postal 1 +address 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +usual 1 +avail 1 +best 1 +reach 1 +isvia 1 +email 1 +full 1 +finger 1 +research 1 +informationi 1 +member 1 +oop 1 +group 1 +inth 1 +part 1 +implement 1 +persist 1 +storagesystem 1 +call 1 +provid 1 +easi 1 +us 1 +novel 1 +techniqu 1 +pointer 1 +swizzl 1 +faulttim 1 +effici 1 +support 1 +larg 1 +standard 1 +hardwar 1 +pleas 1 +list 1 +mypubl 1 +along 1 +brief 1 +descript 1 +plan 1 +graduat 1 +myresum 1 +postscript 1 +current 1 +work 1 +motorola 1 +somerset 1 +design 1 +center 1 +whilefinish 1 +januari 1 +kakkad 1 +svkakkad 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..3f7164ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,62 @@ +home 1 +page 1 +shengm 1 +welcom 1 +homepageabout 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +want 1 +know 1 +check 1 +classmatesclass 1 +technolog 1 +china 1 +ustc 1 +class 1 +graduat 1 +school 1 +chines 1 +academi 1 +sciencesus 1 +linksut 1 +campu 1 +registrar 1 +gradaut 1 +studiesut 1 +libraryutaccesschines 1 +scholar 1 +associationchina 1 +chinesechinainternet 1 +distribut 1 +magazinestsinghua 1 +bbsncic 1 +bbschines 1 +novelschines 1 +classicsabout 1 +austinwhat 1 +weather 1 +todai 1 +citylimitsclassifi 1 +item 1 +sale 1 +austinto 1 +contact 1 +address 1 +medic 1 +art 1 +voic 1 +email 1 +utexa 1 +finger 1 +meyour 1 +comment 1 +suggest 1 +highli 1 +appreci 1 +visitorsinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..aa569537 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,15 @@ +wang 1 +frame 1 +alert 1 +see 1 +messag 1 +us 1 +browser 1 +support 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..6d5a6234 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,4 @@ +welcom 1 +home 1 +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..5c2f4d2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,30 @@ +renu 1 +tewarirenu 1 +tewariwhat 1 +addresshom 1 +austin 1 +home 1 +email 1 +tewari 1 +utexa 1 +work 1 +multimedia 1 +comput 1 +dept 1 +scienc 1 +univers 1 +texa 1 +public 1 +done 1 +internship 1 +watson 1 +research 1 +center 1 +plai 1 +interest 1 +site 1 +bore 1 +send 1 +comment 1 +name 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..f48a8461 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,85 @@ +tumlin 1 +pierc 1 +photo 1 +brenda 1 +ladd 1 +photographi 1 +interest 1 +random 1 +stuff 1 +come 1 +soon 1 +stai 1 +tune 1 +research 1 +issu 1 +secur 1 +distribut 1 +system 1 +present 1 +studi 1 +formal 1 +logic 1 +develop 1 +analyz 1 +authenticationprotocol 1 +done 1 +work 1 +method 1 +synthesi 1 +specif 1 +verif 1 +parallel 1 +amwork 1 +paper 1 +resourc 1 +control 1 +communicatewith 1 +client 1 +mean 1 +queu 1 +messag 1 +draft 1 +addit 1 +student 1 +assist 1 +appli 1 +laboratori 1 +current 1 +project 1 +investig 1 +us 1 +evolutionari 1 +comput 1 +techniqu 1 +genet 1 +algorithm 1 +finit 1 +state 1 +machin 1 +inform 1 +click 1 +resum 1 +avail 1 +html 1 +postscript 1 +format 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +address 1 +utexa 1 +postal 1 +metric 1 +blvd 1 +austin 1 +page 1 +last 1 +updat 1 +novemb 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..12757cbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,173 @@ +tong 1 +wang 1 +home 1 +page 1 +netscap 1 +recommend 1 +view 1 +chines 1 +listen 1 +contain 1 +java 1 +applet 1 +visit 1 +mpeg 1 +viewer 1 +demo 1 +written 1 +pure 1 +nanj 1 +nank 1 +peopl 1 +republ 1 +china 1 +current 1 +program 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +austin 1 +seek 1 +full 1 +time 1 +resum 1 +html 1 +format 1 +click 1 +postscript 1 +shanghai 1 +jiao 1 +univeristi 1 +tsinghua 1 +beij 1 +jersei 1 +summerluc 1 +technolog 1 +system 1 +compani 1 +form 1 +result 1 +plan 1 +restructur 1 +bell 1 +laboratori 1 +work 1 +thissumm 1 +anoth 1 +homepag 1 +lucent 1 +life 1 +cours 1 +spring 1 +fall 1 +distribut 1 +prof 1 +misra 1 +commun 1 +network 1 +gouda 1 +theori 1 +zuckerman 1 +databas 1 +implement 1 +batori 1 +algorithm 1 +techniqu 1 +plexton 1 +graphic 1 +fussel 1 +perform 1 +multimedia 1 +teach 1 +assist 1 +introduct 1 +oper 1 +present 1 +fault 1 +toler 1 +clock 1 +synchron 1 +real 1 +april 1 +mobil 1 +host 1 +protocol 1 +project 1 +mini 1 +manag 1 +unix 1 +design 1 +tool 1 +standard 1 +compon 1 +libari 1 +robot 1 +opengl 1 +glut 1 +decod 1 +player 1 +plai 1 +semest 1 +know 1 +troubl 1 +made 1 +music 1 +favorit 1 +mariah 1 +boyz 1 +babyfac 1 +movi 1 +sound 1 +clip 1 +sampl 1 +misc 1 +zodiac 1 +person 1 +associ 1 +differ 1 +anim 1 +kind 1 +find 1 +friend 1 +high 1 +school 1 +attach 1 +normal 1 +haiq 1 +maintain 1 +shenfeng 1 +chen 1 +thank 1 +quit 1 +learn 1 +info 1 +perl 1 +tutori 1 +reach 1 +lake 1 +blvd 1 +twang 1 +utexa 1 +still 1 +underconstruct 1 +check 1 +like 1 +never 1 +leav 1 +eagl 1 +copyright 1 +creat 1 +last 1 +modifi 1 +background 1 +song 1 +deskmat 1 +lang 1 +visitor 1 +accord 1 +counter 1 +sinc 1 +trust 1 +book 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..fdcbe69b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,84 @@ +hermjakob 1 +home 1 +pageulf 1 +hermjakobhello 1 +welcom 1 +graduat 1 +student 1 +thedept 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austinand 1 +work 1 +dissert 1 +aboutexampl 1 +base 1 +decis 1 +make 1 +context 1 +orient 1 +pars 1 +machin 1 +translationund 1 +supervis 1 +prof 1 +raymond 1 +moonei 1 +activ 1 +utc 1 +natur 1 +languag 1 +acquisit 1 +groupand 1 +learn 1 +research 1 +group 1 +place 1 +interest 1 +associ 1 +linguist 1 +signll 1 +special 1 +print 1 +archiv 1 +european 1 +search 1 +engin 1 +altavista 1 +einet 1 +galaxi 1 +infoseek 1 +lyco 1 +yahoo 1 +new 1 +thing 1 +consid 1 +dernir 1 +nouvel 1 +alsac 1 +deutsch 1 +well 1 +focu 1 +york 1 +time 1 +spiegel 1 +svenska 1 +dagbladet 1 +tagesspiegel 1 +vanguardia 1 +welt 1 +zeitplusacm 1 +austin 1 +weather 1 +resourc 1 +perman 1 +address 1 +moltkestr 1 +bnde 1 +germanyphon 1 +voic 1 +last 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..766f0d52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,38 @@ +lanc 1 +tokuda 1 +univers 1 +texa 1 +austin 1 +softwar 1 +system 1 +gener 1 +research 1 +group 1 +depart 1 +comput 1 +scienc 1 +unicron 1 +utexa 1 +twelv 1 +time 1 +intramur 1 +sport 1 +champion 1 +directori 1 +entertain 1 +financ 1 +magic 1 +gather 1 +new 1 +organ 1 +peopl 1 +refer 1 +schedul 1 +home 1 +offic 1 +taylor 1 +perman 1 +heeia 1 +street 1 +kaneoh 1 +hawaii 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..e5014296 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,29 @@ +home 1 +page 1 +balayoghanv 1 +balayoghancontact 1 +informationemail 1 +utexa 1 +eduoffic 1 +painter 1 +hall 1 +telephon 1 +postal 1 +address 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +click 1 +send 1 +email 1 +finger 1 +account 1 +find 1 +whether 1 +log 1 +ineosdi 1 +bookmarksvbb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..b5e7b6f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,173 @@ +harrick 1 +vinharrick 1 +assist 1 +professor 1 +comput 1 +scienc 1 +atth 1 +univers 1 +texa 1 +ataustin 1 +director 1 +distribut 1 +multimediacomput 1 +laboratori 1 +educ 1 +tech 1 +engin 1 +indian 1 +institut 1 +technolog 1 +bombai 1 +colorado 1 +state 1 +california 1 +diego 1 +honor 1 +award 1 +profession 1 +servic 1 +career 1 +faculti 1 +develop 1 +research 1 +initi 1 +supercomput 1 +center 1 +creativ 1 +innov 1 +editori 1 +board 1 +ieee 1 +multimedia 1 +vice 1 +chair 1 +area 1 +system 1 +intern 1 +confer 1 +icdc 1 +program 1 +committe 1 +andnetwork 1 +member 1 +electronicimag 1 +beij 1 +china 1 +novemb 1 +network 1 +kaohsiung 1 +taiwan 1 +decemb 1 +eurograph 1 +workshop 1 +rostock 1 +germani 1 +second 1 +internationalconfer 1 +applic 1 +third 1 +symposium 1 +multimediasystem 1 +interestmultimedia 1 +high 1 +speed 1 +databas 1 +mobilecomput 1 +summari 1 +main 1 +object 1 +design 1 +implement 1 +anend 1 +architectur 1 +enabl 1 +wide 1 +rang 1 +ofdistribut 1 +specif 1 +integr 1 +file 1 +algorithm 1 +protocolsfor 1 +effici 1 +transmiss 1 +digit 1 +audio 1 +video 1 +larg 1 +scale 1 +select 1 +recent 1 +public 1 +shenoi 1 +failur 1 +recoveri 1 +inmulti 1 +disk 1 +server 1 +proceed 1 +annualintern 1 +fault 1 +toler 1 +ftc 1 +pasadena 1 +page 1 +june 1 +gemmel 1 +kandlur 1 +venkat 1 +rangan 1 +row 1 +storag 1 +tutori 1 +goyal 1 +optim 1 +placement 1 +ofmultimedia 1 +arrai 1 +ieeeintern 1 +icmc 1 +washington 1 +determin 1 +delaybound 1 +heterogen 1 +thintern 1 +oper 1 +support 1 +fordigit 1 +nossdav 1 +durham 1 +hampshir 1 +april 1 +designingmultimedia 1 +commun 1 +march 1 +sponsor 1 +work 1 +variou 1 +industri 1 +federalinstitut 1 +includ 1 +intel 1 +nation 1 +foundationresearch 1 +nasa 1 +mitsubishi 1 +electricresearch 1 +merl 1 +microsystem 1 +electrospacesystem 1 +austin 1 +cours 1 +advanc 1 +contact 1 +inform 1 +email 1 +utexa 1 +phone 1 +mail 1 +address 1 +depart 1 +taylor 1 +hall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..11156733 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,67 @@ +vipin 1 +home 1 +page 1 +haven 1 +found 1 +time 1 +thing 1 +shall 1 +updat 1 +soon 1 +yeah 1 +know 1 +color 1 +match 1 +pictur 1 +cours 1 +interest 1 +report 1 +activ 1 +guestbook 1 +resum 1 +pleas 1 +give 1 +graduat 1 +student 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +undergraduatefrom 1 +delhi 1 +india 1 +academ 1 +interestscours 1 +work 1 +list 1 +less 1 +incomplet 1 +pass 1 +semest 1 +take 1 +risk 1 +put 1 +interestsreportsy 1 +visitor 1 +number 1 +go 1 +reset 1 +increas 1 +never 1 +decreas 1 +contact 1 +medic 1 +art 1 +street 1 +log 1 +sure 1 +sign 1 +though 1 +mani 1 +guest 1 +comment 1 +suggest 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..ff24b9e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,155 @@ +vladimir 1 +lifschitzwhen 1 +feel 1 +burden 1 +downcast 1 +human 1 +mind 1 +gladli 1 +turn 1 +therealm 1 +mathemat 1 +lucid 1 +precis 1 +grasp 1 +object 1 +isobtain 1 +insight 1 +gain 1 +pleasantli 1 +appropri 1 +conceptform 1 +spirit 1 +home 1 +paul 1 +bernai 1 +lifschitzgottesman 1 +famili 1 +centenni 1 +professor 1 +incomput 1 +sciencesat 1 +univers 1 +texasat 1 +austin 1 +fellow 1 +theamerican 1 +associ 1 +forartifici 1 +intelligenceb 1 +petersburg 1 +russia 1 +branchof 1 +steklov 1 +institut 1 +area 1 +interesttempor 1 +reasoningand 1 +reason 1 +aboutactionslog 1 +programmingand 1 +nonmonoton 1 +reasoningteachingoth 1 +profession 1 +activitiespap 1 +line 1 +lectur 1 +note 1 +survei 1 +research 1 +paper 1 +lifschitz 1 +dissert 1 +bylifschitz 1 +studentsrecommend 1 +read 1 +edsger 1 +dijkstra 1 +convoc 1 +speechgood 1 +new 1 +madelein 1 +albright 1 +nomin 1 +secretari 1 +state 1 +germani 1 +becam 1 +better 1 +place 1 +live 1 +regain 1 +book 1 +taken 1 +soviet 1 +armi 1 +daniel 1 +ortega 1 +lost 1 +need 1 +recycl 1 +helm 1 +burton 1 +actbad 1 +wang 1 +sentenc 1 +year 1 +prison 1 +tortur 1 +us 1 +polic 1 +democrat 1 +countri 1 +sequest 1 +import 1 +archeolog 1 +evid 1 +world 1 +close 1 +societynot 1 +race 1 +problem 1 +america 1 +black 1 +elect 1 +recent 1 +redrawn 1 +district 1 +california 1 +civil 1 +right 1 +initi 1 +ratio 1 +white 1 +finish 1 +high 1 +school 1 +admit 1 +student 1 +neutral 1 +basisoth 1 +amnesti 1 +intern 1 +scientist 1 +scienc 1 +favorit 1 +stori 1 +three 1 +silli 1 +joke 1 +quot 1 +monthcontact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +number 1 +postal 1 +address 1 +depart 1 +comput 1 +sciencesunivers 1 +texa 1 +austinaustin 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..e122041b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,56 @@ +vijaya 1 +ramachandranvijaya 1 +ramachandranblakemor 1 +regent 1 +professor 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +princeton 1 +research 1 +interestsmi 1 +interest 1 +algorithm 1 +theori 1 +primarilyin 1 +area 1 +parallel 1 +design 1 +includ 1 +analysi 1 +effici 1 +sequenti 1 +evalu 1 +model 1 +machin 1 +experiment 1 +access 1 +copi 1 +recent 1 +paper 1 +mine 1 +complet 1 +list 1 +public 1 +avail 1 +vita 1 +offici 1 +faculti 1 +profil 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +address 1 +utexa 1 +postal 1 +depart 1 +number 1 +visit 1 +page 1 +sinc 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..5e22dfd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,203 @@ +algorithm 1 +comput 1 +theori 1 +grouput 1 +groupth 1 +group 1 +focus 1 +theoret 1 +foundat 1 +scienc 1 +current 1 +research 1 +interest 1 +faculti 1 +includ 1 +design 1 +complex 1 +parallel 1 +graph 1 +probabilist 1 +method 1 +major 1 +focu 1 +analysi 1 +provabl 1 +effici 1 +solv 1 +fundament 1 +problem 1 +measur 1 +term 1 +differ 1 +resourc 1 +time 1 +space 1 +number 1 +processor 1 +random 1 +bit 1 +greg 1 +plaxton 1 +utexa 1 +combinator 1 +lower 1 +bound 1 +vijaya 1 +ramachandran 1 +machin 1 +model 1 +david 1 +zuckerman 1 +walk 1 +cryptographi 1 +affili 1 +folk 1 +postdoc 1 +student 1 +alumni 1 +sanjoi 1 +baruah 1 +emba 1 +tsan 1 +sheng 1 +tshsu 1 +sinica 1 +pierr 1 +kelsen 1 +madhukar 1 +korupolu 1 +phil 1 +mackenzi 1 +philmac 1 +idbsu 1 +ramgop 1 +mettu 1 +poon 1 +ckpoon 1 +rajmohan 1 +rajaraman 1 +rraj 1 +santanu 1 +sinha 1 +ssinha 1 +torsten 1 +suel 1 +berkelei 1 +yuke 1 +zhou 1 +mail 1 +list 1 +electron 1 +announc 1 +relat 1 +seminar 1 +activ 1 +post 1 +lowvolum 1 +typic 1 +dozen 1 +messag 1 +semest 1 +send 1 +express 1 +ad 1 +remov 1 +name 1 +request 1 +gripe 1 +warm 1 +stand 1 +workshop 1 +themidsouth 1 +midsouthwest 1 +forum 1 +texa 1 +surround 1 +state 1 +meet 1 +twice 1 +year 1 +locat 1 +consist 1 +sever 1 +talk 1 +region 1 +recent 1 +result 1 +often 1 +distinguish 1 +keynot 1 +speaker 1 +first 1 +organ 1 +atut 1 +austin 1 +spring 1 +organizedanoth 1 +fall 1 +held 1 +dalla 1 +southern 1 +methodist 1 +univers 1 +north 1 +louisiana 1 +southwestern 1 +oklahoma 1 +next 1 +schedul 1 +beheld 1 +novemb 1 +program 1 +sent 1 +algorithmsmail 1 +becom 1 +avail 1 +also 1 +usuallytri 1 +pool 1 +attend 1 +take 1 +place 1 +outsid 1 +ofaustin 1 +regard 1 +arrang 1 +sigact 1 +special 1 +algorithmsand 1 +thatinclud 1 +mani 1 +scientist 1 +sponsorsth 1 +symposium 1 +stoc 1 +sponsor 1 +siam 1 +discret 1 +soda 1 +andarchitectur 1 +spaa 1 +import 1 +confer 1 +interestar 1 +ieee 1 +foc 1 +serv 1 +elect 1 +member 1 +thesigact 1 +execut 1 +committe 1 +us 1 +pointer 1 +calendar 1 +eccc 1 +colloquium 1 +virtual 1 +rolodex 1 +bibliographi 1 +hypertext 1 +project 1 +dept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..21057847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,7 @@ +srinivasan 1 +vaidyaraman 1 +email 1 +utexa 1 +offic 1 +phone 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..abce50f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,45 @@ +sengul 1 +vurgun 1 +sengulvurgun 1 +background 1 +interestsi 1 +student 1 +comput 1 +scienc 1 +depart 1 +ammainli 1 +interest 1 +artifici 1 +intellig 1 +neural 1 +network 1 +evolutionaryalgorithm 1 +term 1 +paper 1 +topic 1 +order 1 +ofprefer 1 +memori 1 +represent 1 +knowledg 1 +howto 1 +retriev 1 +learn 1 +theori 1 +problem 1 +solv 1 +cognit 1 +skillacquisit 1 +search 1 +understand 1 +visual 1 +attent 1 +connectionist 1 +approach 1 +architectur 1 +mindto 1 +contact 1 +mepost 1 +austin 1 +usavoic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..b759708a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,10 @@ +walbourn 1 +home 1 +page 1 +chuck 1 +walbournmi 1 +person 1 +locat 1 +charybdi 1 +enterpris 1 +server 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..2d9f8135 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,50 @@ +henri 1 +mackai 1 +walker 1 +visit 1 +senior 1 +lectur 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +professor 1 +mathemat 1 +grinnel 1 +colleg 1 +math 1 +grin 1 +edua 1 +regular 1 +tenur 1 +member 1 +faculti 1 +professorwalk 1 +period 1 +teachand 1 +variou 1 +profession 1 +activ 1 +formal 1 +appoint 1 +follow 1 +academ 1 +year 1 +summer 1 +fall 1 +complet 1 +inform 1 +avail 1 +home 1 +page 1 +atgrinnel 1 +http 1 +creat 1 +septemb 1 +last 1 +revis 1 +photograph 1 +jack 1 +robertson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..959b67a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,61 @@ +home 1 +page 1 +lane 1 +warshawlan 1 +warshaw 1 +mike 1 +cool 1 +senior 1 +comput 1 +scienc 1 +student 1 +recent 1 +accept 1 +graduat 1 +school 1 +univers 1 +texa 1 +austin 1 +work 1 +area 1 +activ 1 +databas 1 +rule 1 +base 1 +system 1 +current 1 +posit 1 +appli 1 +research 1 +laboratoryinvolv 1 +maintain 1 +venu 1 +languag 1 +developedat 1 +mirank 1 +andat 1 +laboratori 1 +lanc 1 +obermey 1 +first 1 +item 1 +anoth 1 +third 1 +follow 1 +list 1 +paper 1 +case 1 +studi 1 +declar 1 +basi 1 +modul 1 +unpublish 1 +confer 1 +inform 1 +knowledg 1 +manag 1 +contact 1 +mepost 1 +usavoic 1 +arlut 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..70fad95e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,49 @@ +chen 1 +master 1 +student 1 +comput 1 +scienc 1 +dept 1 +austin 1 +texa 1 +decemb 1 +mathemat 1 +august 1 +fudan 1 +univers 1 +china 1 +juli 1 +offic 1 +phone 1 +email 1 +math 1 +utexa 1 +wchen 1 +mail 1 +address 1 +center 1 +numer 1 +analysi 1 +us 1 +link 1 +unix 1 +book 1 +expect 1 +perl 1 +site 1 +java 1 +demo 1 +rosett 1 +refer 1 +manual 1 +program 1 +exampl 1 +common 1 +gatewai 1 +interfac 1 +sampl 1 +pleas 1 +click 1 +load 1 +file 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..3f0d6960 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,48 @@ +home 1 +page 1 +paul 1 +wilson 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +postal 1 +address 1 +univers 1 +texa 1 +austin 1 +depart 1 +comput 1 +scienc 1 +best 1 +reach 1 +email 1 +ltwilson 1 +utexa 1 +usual 1 +headshot 1 +novelti 1 +thought 1 +cross 1 +section 1 +research 1 +informationi 1 +lead 1 +oop 1 +group 1 +object 1 +orient 1 +program 1 +system 1 +workson 1 +memori 1 +manag 1 +languag 1 +design 1 +implement 1 +teachingin 1 +fall 1 +teach 1 +sciencesnot 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..487d1379 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,137 @@ +arthur 1 +homepag 1 +cent 1 +becam 1 +christiani 1 +made 1 +major 1 +decis 1 +life 1 +believ 1 +jesu 1 +christ 1 +alittl 1 +month 1 +come 1 +totallyunexpect 1 +though 1 +religi 1 +studi 1 +compulsori 1 +class 1 +thechristian 1 +high 1 +school 1 +attend 1 +hong 1 +kong 1 +intent 1 +tobecom 1 +christian 1 +time 1 +thought 1 +record 1 +slife 1 +bibl 1 +quit 1 +credibl 1 +teach 1 +good 1 +take 1 +think 1 +deepli 1 +relat 1 +mylif 1 +also 1 +misconcept 1 +mean 1 +becom 1 +christianwa 1 +clear 1 +came 1 +past 1 +differ 1 +christianand 1 +wrong 1 +lovedeveri 1 +matter 1 +whether 1 +decid 1 +achristian 1 +us 1 +true 1 +shouldb 1 +virtuou 1 +holi 1 +enough 1 +howev 1 +listen 1 +friend 1 +church 1 +thefellowship 1 +realiz 1 +thing 1 +sin 1 +born 1 +theywil 1 +stumbl 1 +flesh 1 +hei 1 +sinless 1 +abl 1 +sympath 1 +weak 1 +weconfess 1 +faith 1 +forgiv 1 +andto 1 +cleans 1 +unright 1 +john 1 +differencebetween 1 +peopl 1 +trust 1 +astheir 1 +saviour 1 +gratefulli 1 +accept 1 +redempt 1 +fortheir 1 +therefor 1 +justifi 1 +without 1 +deed 1 +ofth 1 +roman 1 +doubt 1 +live 1 +wedo 1 +thecontrari 1 +reli 1 +strength 1 +givesu 1 +said 1 +whole 1 +need 1 +nota 1 +physician 1 +sick 1 +matthew 1 +count 1 +onour 1 +know 1 +number 1 +dai 1 +ought 1 +seek 1 +thetruth 1 +earli 1 +hesit 1 +start 1 +thankgod 1 +lead 1 +give 1 +opportun 1 +realli 1 +wkmak 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..a7e8c003 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,148 @@ +hello 1 +visitor 1 +number 1 +topic 1 +research 1 +interest 1 +pictur 1 +robot 1 +work 1 +educ 1 +public 1 +hotlist 1 +person 1 +miscellan 1 +send 1 +comment 1 +contact 1 +inform 1 +mobil 1 +explor 1 +learn 1 +navig 1 +intellig 1 +control 1 +worki 1 +primarili 1 +spatial 1 +semant 1 +hierarchi 1 +approach 1 +mobilerobot 1 +softwar 1 +engin 1 +develop 1 +area 1 +rang 1 +artifici 1 +includ 1 +machin 1 +neural 1 +network 1 +qualit 1 +reason 1 +vision 1 +oper 1 +system 1 +embed 1 +graphic 1 +user 1 +interfac 1 +multimedia 1 +spot 1 +real 1 +world 1 +ring 1 +ultrason 1 +sensor 1 +rover 1 +home 1 +built 1 +tall 1 +rhino 1 +manipul 1 +commerci 1 +robocac 1 +worm 1 +specif 1 +robofest 1 +organ 1 +group 1 +austin 1 +robokreta 1 +besar 1 +kicik 1 +autonom 1 +car 1 +us 1 +chassi 1 +motor 1 +fast 1 +remot 1 +race 1 +paper 1 +andqualit 1 +avail 1 +onlin 1 +doctor 1 +dissert 1 +titl 1 +fora 1 +physic 1 +also 1 +page 1 +offici 1 +start 1 +point 1 +internet 1 +technolog 1 +resourc 1 +wide 1 +catalog 1 +usenet 1 +frequent 1 +ask 1 +question 1 +meta 1 +index 1 +ncsa 1 +malaysia 1 +badminton 1 +homepag 1 +yahoo 1 +note 1 +common 1 +pleas 1 +email 1 +wyle 1 +utexa 1 +eduperson 1 +interestsavid 1 +player 1 +member 1 +unit 1 +state 1 +associ 1 +usba 1 +love 1 +plai 1 +guitar 1 +clarinet 1 +miscellaneousinterest 1 +martial 1 +art 1 +well 1 +movi 1 +offic 1 +taylor 1 +hall 1 +phone 1 +mail 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +finger 1 +back 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..c6914a7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,8 @@ +contact 1 +xfeng 1 +utexa 1 +address 1 +qaustin 1 +phone 1 +west 1 +austin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..fc4bed13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,92 @@ +xingang 1 +home 1 +page 1 +photographi 1 +classic 1 +music 1 +audio 1 +sport 1 +travel 1 +visual 1 +welcom 1 +hang 1 +around 1 +univers 1 +texa 1 +austin 1 +student 1 +depart 1 +comput 1 +scienc 1 +work 1 +distribut 1 +multimedia 1 +head 1 +harrick 1 +year 1 +graduat 1 +program 1 +rank 1 +nation 1 +delight 1 +surpris 1 +realli 1 +pretti 1 +well 1 +pictur 1 +hard 1 +take 1 +aswel 1 +interest 1 +stuff 1 +line 1 +soon 1 +right 1 +temporaryresort 1 +imagin 1 +hopefulli 1 +time 1 +goe 1 +llgradual 1 +walk 1 +paper 1 +present 1 +clearer 1 +imag 1 +creat 1 +link 1 +find 1 +feel 1 +havesometh 1 +watch 1 +frequent 1 +access 1 +pointer 1 +serious 1 +foliag 1 +marvel 1 +search 1 +engin 1 +alta 1 +vista 1 +string 1 +infoseek 1 +keyword 1 +miata 1 +club 1 +unit 1 +morn 1 +newspap 1 +american 1 +express 1 +financi 1 +card 1 +york 1 +atlant 1 +monthli 1 +china 1 +soccer 1 +major 1 +leagu 1 +group 1 +xguo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..8468f755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,4 @@ +geoffrei 1 +home 1 +pagemov 1 +address 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..617bfbfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,15 @@ +yang 1 +frame 1 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..78928a87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,87 @@ +david 1 +corner 1 +wwwdavid 1 +hello 1 +whatev 1 +took 1 +welcom 1 +make 1 +home 1 +establish 1 +contact 1 +street 1 +austin 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +internet 1 +utexa 1 +utc 1 +get 1 +busi 1 +research 1 +outsid 1 +work 1 +still 1 +seriou 1 +favorit 1 +quot 1 +word 1 +hope 1 +daili 1 +medit 1 +stuff 1 +classic 1 +guitar 1 +concert 1 +byth 1 +assad 1 +brothersin 1 +hong 1 +kong 1 +art 1 +festiv 1 +must 1 +first 1 +introduc 1 +beauti 1 +christoph 1 +parkeningi 1 +guitarist 1 +interest 1 +life 1 +stori 1 +tell 1 +grew 1 +tire 1 +ofconcert 1 +retir 1 +reconcili 1 +jesu 1 +christ 1 +rekindl 1 +passion 1 +also 1 +theamsterdam 1 +trio 1 +french 1 +rich 1 +artist 1 +flair 1 +like 1 +nation 1 +footbal 1 +team 1 +michel 1 +platini 1 +label 1 +franc 1 +magazinepublish 1 +minist 1 +align 1 +absmiddl 1 +sinc 1 +sept 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..15babda7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,15 @@ +yanbin 1 +frame 1 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..29ac7163 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,9 @@ +yuan 1 +home 1 +page 1 +oop 1 +us 1 +browser 1 +pleas 1 +click 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..0e718b98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,64 @@ +yong 1 +homepageto 1 +homepagey 1 +number 1 +visitor 1 +sinc 1 +beij 1 +china 1 +current 1 +program 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +settl 1 +stai 1 +mathemat 1 +graduat 1 +rutger 1 +brunswick 1 +year 1 +jersei 1 +beauti 1 +place 1 +wife 1 +tsinghua 1 +milanitalian 1 +soccerk 1 +soccernba 1 +sitefox 1 +sportschicago 1 +bullsmichael 1 +jordannflnhlc 1 +rankingmarri 1 +childrenseinfeldcomput 1 +sciencesutilitieshtml 1 +convertersimag 1 +collectionssystemshtmllatexcgitcl 1 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 1 +tmiscinternet 1 +travel 1 +network 1 +unit 1 +parcel 1 +servic 1 +state 1 +postal 1 +usp 1 +fedexus 1 +guidefun 1 +todayu 1 +newsstarwavesupermodel 1 +contact 1 +river 1 +street 1 +finger 1 +yonglu 1 +utexa 1 +page 1 +heavi 1 +construct 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..aed360ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,138 @@ +david 1 +young 1 +youngashbel 1 +smith 1 +professor 1 +comput 1 +scienc 1 +ofmathemat 1 +director 1 +center 1 +numer 1 +analysi 1 +webb 1 +institut 1 +naval 1 +architectur 1 +mathemat 1 +harvard 1 +univers 1 +honor 1 +award 1 +profession 1 +servic 1 +fellow 1 +american 1 +associ 1 +advanc 1 +outstand 1 +contribut 1 +journal 1 +linear 1 +algebra 1 +applic 1 +special 1 +issueded 1 +chair 1 +appli 1 +committe 1 +mathematicalsocieti 1 +board 1 +truste 1 +argonn 1 +editor 1 +siam 1 +matric 1 +matrixappl 1 +area 1 +interestnumer 1 +partial 1 +differenti 1 +equat 1 +numericallinear 1 +summari 1 +researchmi 1 +research 1 +activ 1 +focus 1 +solut 1 +partialdifferenti 1 +base 1 +finit 1 +differ 1 +methodsand 1 +iter 1 +method 1 +solv 1 +system 1 +oflinear 1 +involv 1 +larg 1 +andspars 1 +sever 1 +softwar 1 +packag 1 +develop 1 +basedon 1 +part 1 +itpack 1 +project 1 +beingextend 1 +includ 1 +suitabl 1 +share 1 +memori 1 +distributedmemori 1 +parallel 1 +rapidli 1 +converg 1 +methodsbas 1 +multilevel 1 +procedur 1 +also 1 +beingdevelop 1 +select 1 +recent 1 +publicationsd 1 +kincaid 1 +stationari 1 +second 1 +degre 1 +topic 1 +polynomi 1 +variabl 1 +rassia 1 +srivasiava 1 +yanushauska 1 +world 1 +scientif 1 +publ 1 +compani 1 +singapor 1 +vona 1 +ration 1 +spars 1 +search 1 +omega 1 +academ 1 +press 1 +carei 1 +sepehrnoori 1 +vector 1 +pde 1 +engin 1 +crai 1 +minneapoli 1 +high 1 +level 1 +solver 1 +supercomput 1 +algorithm 1 +graham 1 +john 1 +wilei 1 +son 1 +previou 1 +profil 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..ef56ea77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,125 @@ +yoonsuck 1 +choe 1 +photo 1 +ad 1 +student 1 +dept 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +decemb 1 +yonsei 1 +seoul 1 +korea 1 +august 1 +welcom 1 +homepag 1 +visitor 1 +sinc 1 +research 1 +interest 1 +model 1 +cortic 1 +structur 1 +neural 1 +network 1 +current 1 +work 1 +hand 1 +written 1 +digit 1 +recognit 1 +systembas 1 +laterali 1 +interconnect 1 +synerget 1 +self 1 +organ 1 +featur 1 +lissom 1 +develop 1 +joseph 1 +sirosh 1 +prof 1 +risto 1 +miikkulainen 1 +utc 1 +group 1 +recent 1 +includ 1 +extend 1 +actualspik 1 +event 1 +call 1 +spike 1 +slissom 1 +beself 1 +segment 1 +multipl 1 +object 1 +retinabi 1 +synchron 1 +within 1 +repres 1 +desynchron 1 +differ 1 +outlin 1 +also 1 +check 1 +html 1 +book 1 +relat 1 +edit 1 +later 1 +interact 1 +cortex 1 +function 1 +comment 1 +public 1 +page 1 +connect 1 +neuron 1 +technic 1 +report 1 +depart 1 +septemb 1 +electron 1 +isbn 1 +map 1 +appear 1 +touretzki 1 +mozer 1 +hasselmo 1 +editor 1 +advanc 1 +inform 1 +process 1 +system 1 +cambridg 1 +press 1 +handwritten 1 +techic 1 +master 1 +thesi 1 +bunch 1 +link 1 +total 1 +unord 1 +click 1 +find 1 +interestingcontact 1 +offic 1 +phone 1 +email 1 +yschoe 1 +utexa 1 +mail 1 +address 1 +maintain 1 +last 1 +updat 1 +home 1 +newsgroup 1 +summari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..44924f43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,76 @@ +yuanj 1 +xuwint 1 +break 1 +updat 1 +addresspictur 1 +famili 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +spring 1 +time 1 +tabl 1 +select 1 +cours 1 +schedulec 1 +object 1 +orient 1 +softwar 1 +engr 1 +brown 1 +logic 1 +synthesi 1 +aziz 1 +client 1 +server 1 +system 1 +develop 1 +gang 1 +previou 1 +semest 1 +china 1 +pagechines 1 +student 1 +associ 1 +alumni 1 +page 1 +work 1 +studi 1 +universityof 1 +technolog 1 +hefei 1 +institut 1 +mathemat 1 +chines 1 +academi 1 +beij 1 +chinaunivers 1 +munich 1 +atmunich 1 +germanyunivers 1 +manchest 1 +prof 1 +nick 1 +higham 1 +wang 1 +lifan 1 +hong 1 +chen 1 +guizhongustc 1 +yuan 1 +hailiang 1 +yang 1 +yuhongfriend 1 +linsoftwar 1 +program 1 +java 1 +perl 1 +common 1 +gatewai 1 +interfac 1 +link 1 +yahoo 1 +publish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..2705b740 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,11 @@ +feng 1 +visitor 1 +number 1 +sinc 1 +yufeng 1 +utexa 1 +edufing 1 +public 1 +ring 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..f01aea37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,68 @@ +zhii 1 +chen 1 +home 1 +pagezhii 1 +chenabout 1 +mefrom 1 +guangzhou 1 +canton 1 +peopl 1 +republ 1 +china 1 +current 1 +master 1 +program 1 +comput 1 +scienc 1 +depart 1 +univers 1 +texa 1 +austin 1 +seek 1 +full 1 +time 1 +resum 1 +click 1 +postcript 1 +format 1 +zhongshan 1 +pleas 1 +view 1 +chines 1 +life 1 +fall 1 +spring 1 +calculu 1 +architectur 1 +misc 1 +zodiac 1 +person 1 +associ 1 +differ 1 +anim 1 +kind 1 +find 1 +page 1 +friend 1 +maintain 1 +john 1 +dong 1 +thank 1 +els 1 +world 1 +wide 1 +info 1 +contact 1 +burton 1 +zchen 1 +utexa 1 +still 1 +construct 1 +copyright 1 +creat 1 +last 1 +modifi 1 +visitor 1 +accord 1 +counter 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..8cb0f418 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,94 @@ +maggi 1 +xiao 1 +zhou 1 +home 1 +page 1 +educ 1 +assistantship 1 +graduat 1 +student 1 +teach 1 +assist 1 +databas 1 +manag 1 +depart 1 +comput 1 +sciencesat 1 +univers 1 +texa 1 +austin 1 +buaa 1 +beij 1 +china 1 +life 1 +current 1 +work 1 +fall 1 +multimedia 1 +system 1 +algorithm 1 +data 1 +commun 1 +network 1 +distribut 1 +process 1 +spring 1 +look 1 +around 1 +campu 1 +kaleidoscop 1 +land 1 +beauti 1 +visit 1 +world 1 +peopl 1 +daili 1 +new 1 +digest 1 +zhai 1 +chines 1 +magazin 1 +newspag 1 +time 1 +entertain 1 +movi 1 +stamp 1 +societi 1 +ieee 1 +giant 1 +onlin 1 +career 1 +center 1 +compani 1 +internet 1 +search 1 +yahoo 1 +galaxi 1 +lyco 1 +directori 1 +guid 1 +html 1 +script 1 +librari 1 +contact 1 +inform 1 +mail 1 +zhouxiao 1 +utexa 1 +http 1 +user 1 +offic 1 +main 1 +build 1 +room 1 +phone 1 +address 1 +scienc 1 +taylor 1 +last 1 +modifi 1 +sept 1 +comment 1 +welcom 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..07802829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,82 @@ +qing 1 +homepag 1 +qinghi 1 +friend 1 +welcom 1 +know 1 +graduat 1 +student 1 +inth 1 +depart 1 +comput 1 +scinc 1 +univers 1 +texa 1 +austin 1 +born 1 +beij 1 +capit 1 +citi 1 +china 1 +bachelor 1 +degre 1 +peke 1 +meet 1 +ofmi 1 +former 1 +classmat 1 +findmor 1 +peopl 1 +pekingunivers 1 +alumni 1 +home 1 +page 1 +oversea 1 +html 1 +enjoi 1 +live 1 +peek 1 +follow 1 +site 1 +like 1 +well 1 +find 1 +lot 1 +valuabl 1 +informationand 1 +professionalinternetpc 1 +relatedmac 1 +relatedaft 1 +worknetscap 1 +dynam 1 +document 1 +testtwin 1 +eldertwin 1 +youngernetscap 1 +testanim 1 +danc 1 +titledanc 1 +titl 1 +testanoth 1 +netscap 1 +testyet 1 +anoth 1 +testfriendsthi 1 +travel 1 +maintain 1 +xiaohai 1 +best 1 +shan 1 +shinan 1 +clike 1 +visitor 1 +number 1 +sinc 1 +octob 1 +construct 1 +last 1 +modifi 1 +qingunivers 1 +sciencesaustin 1 +zhuqe 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..64f12586 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,63 @@ +univ 1 +washington 1 +comput 1 +scienc 1 +organizationsinclud 1 +faculti 1 +staff 1 +student 1 +visitor 1 +organ 1 +ouraffili 1 +program 1 +graduat 1 +regioninclud 1 +local 1 +inform 1 +desktop 1 +refer 1 +link 1 +elsewher 1 +spotlightuwwin 1 +pacif 1 +region 1 +intern 1 +programmingcontesttwovideo 1 +highlight 1 +educ 1 +initiativesourcolloquia 1 +live 1 +mbonemajordon 1 +intel 1 +corporationdickkarp 1 +receiv 1 +nation 1 +medal 1 +scienceprofessionalmast 1 +applic 1 +deadlin 1 +autumn 1 +departmentoverview 1 +theimpact 1 +research 1 +univers 1 +perspect 1 +staffposit 1 +avail 1 +half 1 +centuri 1 +exponenti 1 +progress 1 +technolog 1 +page 1 +peopl 1 +cours 1 +laboratori 1 +newscan 1 +handl 1 +tabl 1 +click 1 +seattl 1 +voic 1 +comment 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..670382dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,57 @@ +andrew 1 +berman 1 +home 1 +pageandrew 1 +aberman 1 +washington 1 +educomput 1 +scienc 1 +bourassa 1 +virgil 1 +selberg 1 +erik 1 +tron 1 +process 1 +specif 1 +file 1 +protect 1 +unix 1 +oper 1 +system 1 +bothpostscript 1 +andhtml 1 +proceed 1 +winter 1 +usenix 1 +confer 1 +data 1 +structur 1 +fast 1 +approxim 1 +match 1 +postscript 1 +format 1 +shapiro 1 +linda 1 +effici 1 +imag 1 +retriev 1 +multipl 1 +distanc 1 +measur 1 +avail 1 +appear 1 +spie 1 +special 1 +link 1 +wife 1 +debbi 1 +beauti 1 +daughter 1 +melani 1 +miscellan 1 +poison 1 +donut 1 +stupid 1 +stupidmi 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..ee36afba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,107 @@ +adam 1 +finkelstein 1 +washington 1 +depart 1 +comput 1 +scienc 1 +univers 1 +seattl 1 +start 1 +drink 1 +cup 1 +coffe 1 +hair 1 +limp 1 +andlack 1 +bodi 1 +year 1 +live 1 +good 1 +life 1 +graduat 1 +student 1 +final 1 +finish 1 +doctor 1 +graphic 1 +fall 1 +quarter 1 +post 1 +earli 1 +join 1 +thecomput 1 +atprinceton 1 +softwar 1 +engin 1 +tibco 1 +formerli 1 +teknekron 1 +system 1 +palo 1 +alto 1 +wrote 1 +peopl 1 +trade 1 +stock 1 +undergradu 1 +swarthmor 1 +colleg 1 +class 1 +studi 1 +physic 1 +occasion 1 +recent 1 +research 1 +project 1 +find 1 +specif 1 +imag 1 +alarg 1 +databas 1 +sinc 1 +work 1 +someth 1 +call 1 +multiresolut 1 +video 1 +photo 1 +plai 1 +ultim 1 +frisbe 1 +team 1 +calledumatata 1 +address 1 +phone 1 +number 1 +look 1 +plan 1 +file 1 +across 1 +photocopi 1 +thehilari 1 +menu 1 +least 1 +visit 1 +hous 1 +caff 1 +lardo 1 +chilli 1 +night 1 +snoqualmi 1 +pass 1 +made 1 +excel 1 +view 1 +comet 1 +hyakutak 1 +great 1 +pictur 1 +taken 1 +friend 1 +marcu 1 +cool 1 +glass 1 +sculptur 1 +dither 1 +mona 1 +gothic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..79a7182f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,146 @@ +jame 1 +ahren 1 +home 1 +page 1 +address 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +email 1 +phone 1 +research 1 +interest 1 +visual 1 +parallel 1 +distribut 1 +scientif 1 +databas 1 +manag 1 +project 1 +environ 1 +vision 1 +alamo 1 +nation 1 +laboratori 1 +public 1 +charl 1 +hansen 1 +cost 1 +effect 1 +data 1 +load 1 +balanc 1 +intern 1 +confer 1 +process 1 +august 1 +algorithm 1 +typic 1 +improv 1 +program 1 +perform 1 +onunbalanc 1 +dataset 1 +degrad 1 +unnecessari 1 +redistribut 1 +occur 1 +paper 1 +present 1 +whichperform 1 +possibl 1 +save 1 +outweighth 1 +experi 1 +polygonrender 1 +show 1 +factor 1 +loss 1 +percent 1 +onbalanc 1 +us 1 +linda 1 +shapiro 1 +steven 1 +tanimoto 1 +brinklei 1 +jakobovit 1 +lara 1 +lewi 1 +system 1 +model 1 +base 1 +proceed 1 +second 1 +workshop 1 +februari 1 +design 1 +gener 1 +motiv 1 +intend 1 +provid 1 +unifi 1 +highli 1 +graphic 1 +user 1 +interfac 1 +advanc 1 +queri 1 +facil 1 +interact 1 +notebook 1 +aid 1 +experiment 1 +promot 1 +share 1 +commun 1 +frank 1 +ortega 1 +fast 1 +polygon 1 +render 1 +supercomput 1 +novemb 1 +describ 1 +method 1 +massiv 1 +machin 1 +simpl 1 +shade 1 +target 1 +applic 1 +requir 1 +extrem 1 +larg 1 +set 1 +found 1 +mani 1 +handl 1 +arbitrarili 1 +complex 1 +need 1 +mesh 1 +issu 1 +involv 1 +toolkit 1 +enabl 1 +scientist 1 +displai 1 +directli 1 +avoid 1 +transmiss 1 +huge 1 +amount 1 +post 1 +ofwashington 1 +april 1 +longer 1 +version 1 +icpp 1 +also 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..59694272 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,100 @@ +databas 1 +environ 1 +vision 1 +research 1 +overview 1 +wasdesign 1 +gener 1 +scientif 1 +system 1 +motiv 1 +andintend 1 +model 1 +base 1 +comput 1 +provid 1 +unifieddata 1 +highli 1 +graphic 1 +user 1 +interfac 1 +advanc 1 +queryfacil 1 +interact 1 +laboratori 1 +notebook 1 +databaseenviron 1 +aid 1 +experiment 1 +andpromot 1 +data 1 +share 1 +commun 1 +devr 1 +entiti 1 +store 1 +hierarch 1 +relat 1 +datastructur 1 +schema 1 +contain 1 +name 1 +ofproperti 1 +part 1 +attribut 1 +among 1 +thepart 1 +definit 1 +describ 1 +buildinst 1 +specif 1 +visual 1 +studi 1 +mani 1 +differ 1 +imag 1 +topic 1 +includ 1 +multi 1 +level 1 +queri 1 +experi 1 +manag 1 +peopl 1 +princip 1 +investig 1 +linda 1 +shapiro 1 +steven 1 +tanimoto 1 +brinklei 1 +graduat 1 +student 1 +jame 1 +ahren 1 +jakobovit 1 +lara 1 +lewi 1 +public 1 +inmodel 1 +proceed 1 +secondcad 1 +workshop 1 +februari 1 +present 1 +project 1 +flexibledata 1 +organ 1 +support 1 +databasesystem 1 +spie 1 +symposium 1 +electron 1 +scienceand 1 +technolog 1 +implement 1 +scienc 1 +email 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..90b8ad05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,77 @@ +page 1 +bernheim 1 +washington 1 +start 1 +comput 1 +scienc 1 +graduat 1 +school 1 +univers 1 +still 1 +construct 1 +william 1 +colleg 1 +math 1 +front 1 +plai 1 +ultim 1 +frisbe 1 +autumn 1 +quarter 1 +class 1 +programminglanguag 1 +automata 1 +principl 1 +ofdigit 1 +system 1 +design 1 +graphic 1 +seminar 1 +parallel 1 +program 1 +environ 1 +outdoor 1 +relat 1 +link 1 +nation 1 +park 1 +home 1 +gorp 1 +guideto 1 +recreationfun 1 +christian 1 +scott 1 +interact 1 +list 1 +abig 1 +pile 1 +cool 1 +blast 1 +past 1 +scoobi 1 +dooeduc 1 +great 1 +refer 1 +women 1 +undergrad 1 +peterson 1 +educ 1 +center 1 +sourc 1 +inform 1 +opportun 1 +distribut 1 +mentorship 1 +project 1 +allow 1 +undergradu 1 +spend 1 +summerwork 1 +research 1 +femal 1 +mentor 1 +experi 1 +highlyrecommend 1 +back 1 +pagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..2dc8af88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,128 @@ +amir 1 +michail 1 +michailgradu 1 +studenti 1 +second 1 +year 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienceat 1 +univers 1 +washington 1 +research 1 +interest 1 +includ 1 +followingarea 1 +algorithm 1 +design 1 +softwar 1 +engin 1 +educ 1 +master 1 +degre 1 +universityof 1 +toronto 1 +thesi 1 +optim 1 +broadcast 1 +summationfor 1 +hierarch 1 +ring 1 +architectur 1 +shift 1 +click 1 +hereto 1 +obtain 1 +compress 1 +postscript 1 +file 1 +recent 1 +experi 1 +wai 1 +teach 1 +particular 1 +built 1 +opsi 1 +java 1 +appletdesign 1 +balanc 1 +binari 1 +tree 1 +combinesprogram 1 +proof 1 +anim 1 +final 1 +lunar 1 +lander 1 +style 1 +gamethat 1 +wrote 1 +part 1 +undergradu 1 +graphic 1 +cours 1 +quotat 1 +scientist 1 +mathematician 1 +tend 1 +conserv 1 +mani 1 +unwillingto 1 +consid 1 +might 1 +better 1 +write 1 +told 1 +embarrass 1 +learn 1 +publishedincorrect 1 +theorem 1 +motiv 1 +avoid 1 +error 1 +believ 1 +theywil 1 +like 1 +structur 1 +persuad 1 +will 1 +explor 1 +unconvent 1 +proofstyl 1 +unfortun 1 +found 1 +care 1 +whether 1 +theyhav 1 +publish 1 +incorrect 1 +result 1 +often 1 +seem 1 +glad 1 +wasnot 1 +caught 1 +refere 1 +sinc 1 +would 1 +meant 1 +fewer 1 +public 1 +fear 1 +stylethat 1 +reveal 1 +mistak 1 +lesli 1 +lamport 1 +construct 1 +wayi 1 +make 1 +simpl 1 +obvious 1 +defici 1 +theother 1 +complic 1 +obviou 1 +hoar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..5a3d81b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,62 @@ +richard 1 +anderson 1 +home 1 +page 1 +associ 1 +professor 1 +graduat 1 +inmathemat 1 +reed 1 +colleg 1 +comput 1 +scienc 1 +stanfordin 1 +join 1 +univers 1 +washington 1 +aon 1 +year 1 +postdoc 1 +mathemat 1 +research 1 +institut 1 +inberkelei 1 +receiv 1 +presidenti 1 +younginvestig 1 +award 1 +spent 1 +academ 1 +yeara 1 +visit 1 +indian 1 +bangalor 1 +india 1 +main 1 +interest 1 +theori 1 +implementationof 1 +algorithm 1 +includ 1 +parallel 1 +geometri 1 +scientif 1 +applic 1 +work 1 +engin 1 +depart 1 +seattl 1 +teach 1 +paper 1 +progress 1 +project 1 +qualifi 1 +evalu 1 +travel 1 +note 1 +theindian 1 +resum 1 +tourist 1 +pictur 1 +recent 1 +talksanderson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..c30e891c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,128 @@ +anhai 1 +doan 1 +homepageanhai 1 +page 1 +reconstruct 1 +pleas 1 +revisit 1 +soon 1 +born 1 +brought 1 +vietnam 1 +finish 1 +high 1 +school 1 +iwent 1 +hungari 1 +studi 1 +comput 1 +scienc 1 +graduat 1 +kossuth 1 +lajo 1 +univers 1 +art 1 +andscienc 1 +debrecen 1 +receiv 1 +also 1 +wisconsin 1 +milwauke 1 +start 1 +fall 1 +program 1 +depart 1 +andengin 1 +ofwashington 1 +seattl 1 +research 1 +interest 1 +mostli 1 +artifici 1 +intellig 1 +amcurr 1 +investig 1 +decis 1 +make 1 +underuncertainti 1 +theoret 1 +plan 1 +markov 1 +process 1 +qualit 1 +theori 1 +academ 1 +includ 1 +read 1 +travel 1 +listen 1 +music 1 +jazz 1 +blue 1 +thing 1 +first 1 +name 1 +mean 1 +calm 1 +invietnames 1 +made 1 +combin 1 +last 1 +syllabl 1 +ofmi 1 +mother 1 +birthplac 1 +nghean 1 +father 1 +haiphong 1 +show 1 +creativ 1 +folkswer 1 +thought 1 +birth 1 +younger 1 +brother 1 +theysimpli 1 +switch 1 +gave 1 +namehaian 1 +content 1 +probabilist 1 +knowledg 1 +represent 1 +recent 1 +paper 1 +librari 1 +curriculum 1 +vita 1 +educ 1 +employ 1 +histori 1 +award 1 +honor 1 +public 1 +teach 1 +data 1 +structur 1 +algorithm 1 +take 1 +cours 1 +check 1 +inform 1 +offic 1 +hour 1 +locat 1 +person 1 +comtemporari 1 +vietnames 1 +affair 1 +literatur 1 +write 1 +paint 1 +foreign 1 +languag 1 +gener 1 +purpos 1 +life 1 +snapshotsanhai 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..c5989653 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,128 @@ +joel 1 +ausland 1 +hope 1 +depart 1 +comput 1 +scienc 1 +engineeringat 1 +univers 1 +washington 1 +pictur 1 +spring 1 +anim 1 +class 1 +click 1 +onit 1 +origin 1 +last 1 +quarter 1 +complet 1 +qual 1 +project 1 +time 1 +code 1 +gener 1 +multiflow 1 +compil 1 +offic 1 +sieg 1 +hall 1 +home 1 +univ 1 +seattl 1 +resum 1 +written 1 +follow 1 +paper 1 +fast 1 +effect 1 +dynam 1 +philipos 1 +chamber 1 +egger 1 +andb 1 +bershad 1 +pldi 1 +automat 1 +support 1 +event 1 +dispatch 1 +inextens 1 +system 1 +mock 1 +andp 1 +pardyak 1 +workshop 1 +compilersupport 1 +softwar 1 +februari 1 +experi 1 +control 1 +base 1 +motion 1 +synthesisfor 1 +articul 1 +figur 1 +fukunaga 1 +partovi 1 +christensen 1 +reiss 1 +shuman 1 +mark 1 +acmtransact 1 +graphic 1 +page 1 +also 1 +site 1 +optim 1 +leapfrog 1 +benjamin 1 +wilkerson 1 +mathemat 1 +magazin 1 +lossili 1 +compress 1 +mpeg 1 +animationthat 1 +goe 1 +synthesi 1 +sequenc 1 +show 1 +mywork 1 +piec 1 +togeth 1 +cartwheel 1 +jump 1 +andshuffl 1 +without 1 +fall 1 +andcollaps 1 +brown 1 +us 1 +algorithm 1 +orang 1 +isjust 1 +try 1 +switch 1 +consider 1 +tosmooth 1 +physic 1 +autumn 1 +took 1 +super 1 +short 1 +doubl 1 +speed 1 +small 1 +version 1 +final 1 +find 1 +better 1 +place 1 +slide 1 +thetalk 1 +singular 1 +valu 1 +decomposit 1 +gave 1 +seminar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..d6e36ae0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,96 @@ +girl 1 +name 1 +brian 1 +boinge 1 +michalowskidepart 1 +comput 1 +scienc 1 +engineeringmail 1 +stop 1 +univers 1 +washingtonseattl 1 +offic 1 +locat 1 +sieg 1 +phone 1 +current 1 +take 1 +ling 1 +edit 1 +mossi 1 +bitsthank 1 +visit 1 +page 1 +visitor 1 +number 1 +worst 1 +view 1 +us 1 +headscapewhenev 1 +second 1 +year 1 +gradstud 1 +actual 1 +liber 1 +artist 1 +interest 1 +inlinguist 1 +confus 1 +good 1 +get 1 +know 1 +alreadi 1 +ultrahotlist 1 +favorit 1 +site 1 +ofal 1 +time 1 +search 1 +look 1 +onlin 1 +refer 1 +forsometh 1 +glorifi 1 +hotlist 1 +doesn 1 +thave 1 +urouletteto 1 +random 1 +find 1 +past 1 +institut 1 +ofwhich 1 +mental 1 +person 1 +info 1 +quot 1 +file 1 +songsand 1 +poem 1 +fictiti 1 +thrash 1 +band 1 +puriti 1 +test 1 +origin 1 +work 1 +tokeep 1 +touch 1 +finger 1 +mail 1 +guestbook 1 +pagesfrom 1 +friend 1 +idea 1 +includ 1 +aslfingerspel 1 +snapshot 1 +blatantli 1 +stolen 1 +brad 1 +chamberlain 1 +michalowski 1 +dept 1 +complet 1 +sanityerad 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..c202c351 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,52 @@ +paul 1 +beamepaul 1 +beam 1 +associ 1 +professor 1 +thedepart 1 +comput 1 +scienc 1 +engineeringat 1 +theunivers 1 +washington 1 +receiv 1 +mathemat 1 +sciencein 1 +univers 1 +toronto 1 +post 1 +doctor 1 +research 1 +academicyear 1 +join 1 +autumn 1 +presidentialyoung 1 +investig 1 +award 1 +concern 1 +primarili 1 +computationalcomplex 1 +theoret 1 +aspect 1 +paralleland 1 +distribut 1 +recent 1 +concentr 1 +connect 1 +proof 1 +theori 1 +particular 1 +complex 1 +inproposit 1 +system 1 +enjoi 1 +squash 1 +softbal 1 +sport 1 +enthusiasm 1 +cancompens 1 +lack 1 +talent 1 +paper 1 +qual 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..8da96124 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,84 @@ +david 1 +beckerdavid 1 +beckercontact 1 +info 1 +mark 1 +spot 1 +stuff 1 +spin 1 +much 1 +time 1 +goe 1 +makingspina 1 +real 1 +oper 1 +system 1 +respons 1 +level 1 +code 1 +borrow 1 +devic 1 +model 1 +drvier 1 +support 1 +build 1 +environ 1 +manag 1 +keep 1 +develop 1 +platform 1 +function 1 +somedai 1 +getto 1 +perform 1 +measur 1 +optim 1 +sport 1 +tri 1 +bunch 1 +ultim 1 +frisbe 1 +favorit 1 +confer 1 +tripl 1 +jump 1 +minnesota 1 +athlet 1 +bethel 1 +colleg 1 +champion 1 +volleybal 1 +men 1 +grad 1 +team 1 +plai 1 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 1 +handbal 1 +also 1 +playracquetballgolftenni 1 +done 1 +bridgecampingcanoeingdisc 1 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 1 +skiingweightliftingwhitewat 1 +raftinghorseback 1 +ridingmountain 1 +bikingin 1 +line 1 +skate 1 +interest 1 +librari 1 +econom 1 +topic 1 +particularli 1 +free 1 +bank 1 +anti 1 +trust 1 +currenc 1 +ssto 1 +rlv 1 +theologi 1 +centurai 1 +railroad 1 +boot 1 +locomot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..6bd35b88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,280 @@ +brian 1 +bershad 1 +washington 1 +eduwork 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +home 1 +street 1 +assist 1 +professor 1 +ofwashington 1 +sinc 1 +receiv 1 +took 1 +brief 1 +respit 1 +experi 1 +post 1 +industri 1 +cultur 1 +northeast 1 +return 1 +northwest 1 +coffe 1 +research 1 +oper 1 +system 1 +distribut 1 +network 1 +parallel 1 +architectur 1 +work 1 +hasappear 1 +toc 1 +sosp 1 +asplo 1 +isca 1 +although 1 +seem 1 +asigmetr 1 +paper 1 +publish 1 +save 1 +life 1 +besid 1 +run 1 +plai 1 +squash 1 +hang 1 +thestairmast 1 +project 1 +includ 1 +spin 1 +extensibleoper 1 +mobil 1 +memori 1 +manag 1 +midwai 1 +carnegi 1 +mellon 1 +parallelnetwork 1 +scalabl 1 +rocki 1 +thesequel 1 +etch 1 +binari 1 +instrument 1 +optimizationcours 1 +winter 1 +look 1 +master 1 +qual 1 +click 1 +list 1 +youmight 1 +degre 1 +recent 1 +trace 1 +driven 1 +comparison 1 +algorithm 1 +prefetch 1 +cachingtraci 1 +kimbrel 1 +andrew 1 +tomkin 1 +hugo 1 +patterson 1 +edward 1 +felten 1 +garth 1 +gibson 1 +anna 1 +karlin 1 +sigop 1 +usenix 1 +associ 1 +symposium 1 +onoper 1 +design 1 +implemen 1 +octob 1 +dynam 1 +bind 1 +extensiblesystem 1 +przemyslaw 1 +pardyak 1 +osdi 1 +structur 1 +perform 1 +interpret 1 +theodor 1 +romer 1 +denni 1 +geoffrei 1 +voelker 1 +alec 1 +wolman 1 +wayn 1 +wong 1 +jean 1 +loup 1 +baer 1 +henri 1 +levi 1 +fast 1 +effect 1 +dynamiccompil 1 +confer 1 +program 1 +languag 1 +implementationj 1 +ausland 1 +philipos 1 +chamber 1 +egger 1 +extens 1 +protocol 1 +applic 1 +specif 1 +marc 1 +fiuczynski 1 +write 1 +us 1 +modula 1 +emin 1 +sirer 1 +stefan 1 +savag 1 +greg 1 +defouw 1 +mari 1 +alapat 1 +appear 1 +workshop 1 +compil 1 +support 1 +softwar 1 +februari 1 +wilson 1 +hsieh 1 +charl 1 +garrett 1 +david 1 +becker 1 +safe 1 +link 1 +automat 1 +event 1 +dispatch 1 +systemsc 1 +mock 1 +safeti 1 +reduc 1 +overhead 1 +onlinesuperpag 1 +promot 1 +ohlrich 1 +detect 1 +sharedmemori 1 +zekauska 1 +sawdon 1 +appearedin 1 +page 1 +map 1 +polici 1 +cach 1 +conflictresolut 1 +standard 1 +hardwar 1 +chen 1 +mobisa 1 +inth 1 +issu 1 +avoid 1 +conflict 1 +miss 1 +larg 1 +direct 1 +mappedcach 1 +microkernel 1 +forappl 1 +servic 1 +uwtechn 1 +report 1 +effici 1 +packet 1 +demultiplex 1 +multipl 1 +endpoint 1 +messag 1 +yuhara 1 +maeda 1 +moss 1 +impact 1 +decomposit 1 +high 1 +practic 1 +consider 1 +block 1 +concurr 1 +object 1 +interrupt 1 +prioriti 1 +kernel 1 +share 1 +ieee 1 +compcon 1 +local 1 +area 1 +stock 1 +andmostli 1 +watson 1 +moblic 1 +consist 1 +virtual 1 +index 1 +wheeler 1 +mutual 1 +exclus 1 +uniprocessor 1 +redel 1 +elli 1 +mach 1 +commun 1 +primit 1 +ginsburg 1 +baron 1 +machnix 1 +microbenchmark 1 +evalu 1 +drave 1 +forin 1 +wwo 1 +increas 1 +irrelev 1 +micro 1 +base 1 +golub 1 +continu 1 +implement 1 +thread 1 +inoper 1 +rashid 1 +dean 1 +inform 1 +arpa 1 +relat 1 +rain 1 +citi 1 +hash 1 +hous 1 +harrier 1 +rel 1 +abduct 1 +alien 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..5d9e96a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,152 @@ +doorenbo 1 +home 1 +pagebob 1 +page 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +sieg 1 +hall 1 +bobd 1 +washington 1 +current 1 +work 1 +netbot 1 +union 1 +place 1 +suit 1 +voic 1 +daili 1 +comedi 1 +dilbert 1 +strip 1 +boffo 1 +david 1 +letterman 1 +list 1 +geeki 1 +new 1 +zdnet 1 +anchordesk 1 +magazin 1 +good 1 +stuff 1 +shortcut 1 +todai 1 +cool 1 +tool 1 +research 1 +postdoc 1 +oren 1 +etzioni 1 +steve 1 +hank 1 +weld 1 +softbot 1 +project 1 +also 1 +particular 1 +shopbot 1 +internet 1 +shop 1 +agent 1 +previou 1 +soar 1 +thesi 1 +site 1 +collect 1 +pointer 1 +repositori 1 +canada 1 +gopher 1 +scientif 1 +american 1 +sigma 1 +scientist 1 +miscellan 1 +link 1 +meta 1 +search 1 +metacrawl 1 +savvysearch 1 +alta 1 +vista 1 +lyco 1 +inktomi 1 +open 1 +text 1 +infoseek 1 +excit 1 +crawler 1 +hotbot 1 +directori 1 +hierarch 1 +select 1 +yahoo 1 +magellan 1 +pointcom 1 +engin 1 +guid 1 +onlin 1 +telephon 1 +world 1 +yellow 1 +network 1 +switchboard 1 +white 1 +cnnfn 1 +newshour 1 +post 1 +reuter 1 +headlin 1 +social 1 +cafe 1 +report 1 +boston 1 +globe 1 +span 1 +time 1 +view 1 +slate 1 +feed 1 +salon 1 +atlant 1 +monthli 1 +harper 1 +sport 1 +espn 1 +zone 1 +govern 1 +fedworld 1 +index 1 +hous 1 +congress 1 +arpa 1 +feder 1 +budget 1 +deficit 1 +nation 1 +debt 1 +clock 1 +concord 1 +coalit 1 +hand 1 +balanc 1 +bipartisan 1 +commiss 1 +entitl 1 +reform 1 +univers 1 +museum 1 +past 1 +life 1 +pittsburgh 1 +upcom 1 +birthdai 1 +person 1 +andfun 1 +pagebobd 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..aca90f31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,100 @@ +alan 1 +born 1 +home 1 +pagealan 1 +pagei 1 +professor 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +principalresearch 1 +interest 1 +constraint 1 +base 1 +languag 1 +system 1 +object 1 +orient 1 +logic 1 +program 1 +human 1 +computerinteract 1 +societi 1 +current 1 +research 1 +activitiesuwconstraint 1 +page 1 +contain 1 +link 1 +recent 1 +paper 1 +public 1 +domainsourc 1 +code 1 +satisfact 1 +algorithm 1 +media 1 +technolog 1 +democraci 1 +groupuw 1 +student 1 +also 1 +idea 1 +qualsproject 1 +teachingher 1 +cours 1 +taught 1 +autumn 1 +concept 1 +winter 1 +interact 1 +spring 1 +informationhistori 1 +grew 1 +idaho 1 +graduat 1 +reed 1 +colleg 1 +mathemat 1 +work 1 +atstanford 1 +receiv 1 +degre 1 +dissert 1 +done 1 +associ 1 +xerox 1 +paloalto 1 +center 1 +concern 1 +simulationlaboratori 1 +spent 1 +year 1 +post 1 +doctoralfellow 1 +artifici 1 +intellig 1 +ofedinburgh 1 +scotland 1 +mechan 1 +problem 1 +solv 1 +symbolicalgebra 1 +join 1 +andexcept 1 +sabbat 1 +europarc 1 +cambridg 1 +england 1 +havebeen 1 +sinc 1 +address 1 +dept 1 +seattl 1 +phone 1 +email 1 +eduwww 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..3570dd24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,20 @@ +brad 1 +home 1 +pagebrad 1 +chamberlainphoto 1 +credit 1 +mike 1 +perkowitzth 1 +probabl 1 +couldn 1 +care 1 +less 1 +offic 1 +address 1 +thing 1 +work 1 +like 1 +ad 1 +subset 1 +ofth 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..49032cae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,125 @@ +brendan 1 +home 1 +pagebrendan 1 +mumeyi 1 +student 1 +depart 1 +comput 1 +scienceat 1 +theunivers 1 +washington 1 +enter 1 +fall 1 +expect 1 +graduat 1 +around 1 +contact 1 +information 1 +mail 1 +edufor 1 +address 1 +click 1 +curriculum 1 +vitaein 1 +htmlorpostscriptformat 1 +academ 1 +interestsi 1 +would 1 +call 1 +appli 1 +theoret 1 +scientist 1 +current 1 +work 1 +biologi 1 +moment 1 +look 1 +problem 1 +physic 1 +map 1 +build 1 +rough 1 +locat 1 +landmark 1 +genom 1 +gener 1 +speak 1 +interest 1 +us 1 +theori 1 +math 1 +tosolv 1 +reason 1 +practic 1 +also 1 +done 1 +incomput 1 +astrophys 1 +hpcc 1 +groupher 1 +onlin 1 +papersb 1 +mumei 1 +power 1 +clone 1 +overlap 1 +test 1 +html 1 +postscript 1 +poster 1 +present 1 +ismb 1 +confer 1 +aspect 1 +probe 1 +survei 1 +paper 1 +written 1 +fulfil 1 +candidaci 1 +requir 1 +find 1 +cluster 1 +quickli 1 +parallel 1 +dimac 1 +challeng 1 +klaw 1 +upper 1 +lower 1 +bound 1 +construct 1 +alphabet 1 +binari 1 +tree 1 +soda 1 +siam 1 +ofdiscret 1 +mathemat 1 +note 1 +version 1 +produc 1 +latexhtml 1 +containsom 1 +error 1 +readabl 1 +part 1 +recreationhik 1 +cycl 1 +ski 1 +climb 1 +drink 1 +coffeeto 1 +name 1 +sailingand 1 +hope 1 +sometim 1 +like 1 +plai 1 +bridg 1 +older 1 +photo 1 +first 1 +galleri 1 +second 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..a2c9f0aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,88 @@ +lauren 1 +brickerlauren 1 +bricker 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +click 1 +need 1 +clue 1 +research 1 +info 1 +interest 1 +primarli 1 +graphic 1 +userinterfac 1 +although 1 +self 1 +proclaim 1 +groupi 1 +current 1 +work 1 +stevetanimoto 1 +mathematicsexperi 1 +imag 1 +process 1 +metip 1 +project 1 +goal 1 +ofthi 1 +junior 1 +high 1 +school 1 +kid 1 +mathemat 1 +usingexploratori 1 +rather 1 +rote 1 +learn 1 +method 1 +particular 1 +minterest 1 +develop 1 +supportedcollabor 1 +cscl 1 +user 1 +interfac 1 +us 1 +inthi 1 +well 1 +workin 1 +lawk 1 +dawg 1 +interfacea 1 +fairli 1 +extens 1 +resumeschool 1 +dazethi 1 +quarterdoth 1 +quartershuman 1 +interact 1 +cours 1 +spring 1 +quarter 1 +writeup 1 +final 1 +writeupwhat 1 +asystem 1 +insocieti 1 +excit 1 +hobbi 1 +enjoi 1 +busi 1 +lifesportscookingpotteri 1 +even 1 +studio 1 +garag 1 +year 1 +stuffbecaus 1 +ask 1 +itaddress 1 +last 1 +modifi 1 +mondai 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..358115e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,6 @@ +adam 1 +carlsonadam 1 +carlson 1 +washington 1 +comput 1 +scienc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..475c6424 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,44 @@ +andrew 1 +certain 1 +home 1 +page 1 +washington 1 +server 1 +fix 1 +give 1 +note 1 +interest 1 +follow 1 +direct 1 +download 1 +theview 1 +look 1 +model 1 +current 1 +work 1 +tonyderos 1 +david 1 +salesin 1 +werner 1 +stuetzl 1 +duchamp 1 +jovan 1 +popov 1 +scanningproject 1 +build 1 +viewer 1 +requir 1 +sgigraph 1 +workstat 1 +paper 1 +modifi 1 +netscap 1 +shouldalso 1 +browser 1 +similar 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..9349de58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,91 @@ +chou 1 +welcom 1 +homepag 1 +grad 1 +student 1 +computersci 1 +washington 1 +seattl 1 +pictur 1 +right 1 +first 1 +school 1 +fall 1 +quarter 1 +ross 1 +think 1 +scari 1 +relat 1 +infoth 1 +chinook 1 +projectmi 1 +schedulemi 1 +list 1 +publicationscod 1 +workshop 1 +codesignpersonalperson 1 +info 1 +taiwan 1 +greec 1 +resumefoodi 1 +enjoi 1 +cook 1 +peopl 1 +open 1 +restaur 1 +ofpeopl 1 +favorit 1 +dish 1 +includ 1 +stir 1 +fri 1 +rice 1 +noodl 1 +beefskew 1 +recip 1 +line 1 +toysb 1 +comput 1 +geek 1 +computersand 1 +cool 1 +toi 1 +taiwanesei 1 +also 1 +promot 1 +taiwaneselanguag 1 +current 1 +develop 1 +tool 1 +taiwanes 1 +sureto 1 +check 1 +experiment 1 +taiwanesedictionari 1 +though 1 +absolut 1 +requir 1 +page 1 +best 1 +viewedif 1 +instal 1 +chines 1 +charact 1 +font 1 +us 1 +java 1 +enabl 1 +browser 1 +like 1 +netscap 1 +beabl 1 +applet 1 +yellow 1 +ball 1 +bouncingov 1 +barnei 1 +purpl 1 +dynosaur 1 +last 1 +updat 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..ecc8e385 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,115 @@ +corei 1 +andersoncorei 1 +andersonth 1 +interest 1 +late 1 +set 1 +home 1 +page 1 +research 1 +color 1 +percept 1 +automat 1 +correct 1 +induc 1 +compet 1 +programm 1 +month 1 +contest 1 +search 1 +info 1 +localtalk 1 +adapt 1 +linux 1 +plai 1 +wavelet 1 +intern 1 +collegi 1 +program 1 +pacif 1 +region 1 +problem 1 +result 1 +final 1 +html 1 +version 1 +previou 1 +year 1 +duke 1 +internet 1 +rsum 1 +onlin 1 +recent 1 +august 1 +review 1 +comput 1 +graphic 1 +text 1 +refer 1 +book 1 +thing 1 +done 1 +graduat 1 +univser 1 +washington 1 +highlin 1 +commun 1 +colleg 1 +self 1 +tune 1 +fpga 1 +fall 1 +help 1 +polli 1 +organ 1 +depart 1 +contribut 1 +engin 1 +open 1 +hous 1 +april 1 +manag 1 +chapter 1 +treasuri 1 +spring 1 +develop 1 +read 1 +macintosh 1 +good 1 +servic 1 +providercool 1 +found 1 +usag 1 +statist 1 +lurker 1 +guid 1 +babylon 1 +sunsit 1 +archiv 1 +dilbert 1 +zone 1 +brother 1 +pageus 1 +link 1 +peek 1 +insid 1 +term 1 +lab 1 +featur 1 +netscap 1 +scienc 1 +univers 1 +washinton 1 +uwtv 1 +tech 1 +notesmi 1 +autumn 1 +schedul 1 +mondai 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +fridai 1 +meet 1 +math 1 +corin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..be17db7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,65 @@ +craig 1 +experiencecraig 1 +kaplancurr 1 +locat 1 +student 1 +depart 1 +comput 1 +scienc 1 +universityof 1 +washington 1 +seattl 1 +well 1 +page 1 +copyof 1 +homepag 1 +univers 1 +waterloo 1 +time 1 +modifi 1 +appropri 1 +tomi 1 +current 1 +situat 1 +enjoy 1 +experi 1 +near 1 +undergraduatewa 1 +grad 1 +ball 1 +photo 1 +fromth 1 +second 1 +occur 1 +saturdai 1 +address 1 +convoc 1 +valedictorian 1 +cannot 1 +express 1 +honour 1 +felt 1 +wonder 1 +graduat 1 +class 1 +choos 1 +repres 1 +incident 1 +didn 1 +know 1 +parent 1 +found 1 +minut 1 +start 1 +ceremoni 1 +sai 1 +never 1 +forgiv 1 +text 1 +anyon 1 +curiou 1 +visitor 1 +number 1 +last 1 +updat 1 +cskaplan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..e0e4a016 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,56 @@ +codi 1 +kwok 1 +home 1 +page 1 +first 1 +thing 1 +thought 1 +peopl 1 +think 1 +mean 1 +aliv 1 +asami 1 +chiaki 1 +welcom 1 +chung 1 +ctkwok 1 +washington 1 +edui 1 +graduat 1 +student 1 +work 1 +weld 1 +andoren 1 +etzioni 1 +plan 1 +andsoftwar 1 +agent 1 +sanctuari 1 +ingram 1 +softbot 1 +aiuw 1 +contact 1 +informationleisur 1 +nausicaa 1 +vallei 1 +windlaputa 1 +castl 1 +skyhyp 1 +futur 1 +vision 1 +gunnm 1 +wind 1 +arch 1 +vile 1 +java 1 +applet 1 +anim 1 +take 1 +load 1 +last 1 +modifi 1 +visitor 1 +sinc 1 +figur 1 +doom 1 +numer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..376355d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,30 @@ +darren 1 +cronquist 1 +darrenc 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +welcom 1 +home 1 +page 1 +last 1 +updat 1 +employ 1 +inform 1 +current 1 +plan 1 +complet 1 +myph 1 +html 1 +postscript 1 +resum 1 +curriculum 1 +vita 1 +vitaperson 1 +rest 1 +homepag 1 +underconstruct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..c4f3ab22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,193 @@ +david 1 +christianson 1 +second 1 +till 1 +midnight 1 +nowher 1 +babi 1 +christiansondbc 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +current 1 +workin 1 +spare 1 +time 1 +third 1 +year 1 +graduat 1 +student 1 +atth 1 +interest 1 +inaiand 1 +plan 1 +gotten 1 +user 1 +interfac 1 +mayb 1 +even 1 +graphicsa 1 +well 1 +studi 1 +knowledg 1 +represent 1 +goal 1 +directedbehavior 1 +mix 1 +initi 1 +activ 1 +recognit 1 +buzzwordacquisit 1 +context 1 +human 1 +interact 1 +recent 1 +work 1 +prototyp 1 +us 1 +intellig 1 +bobdoorenbo 1 +shopbot 1 +rather 1 +build 1 +somehowintegr 1 +variou 1 +line 1 +store 1 +shop 1 +assist 1 +simpl 1 +built 1 +applicationthat 1 +automat 1 +read 1 +pars 1 +basket 1 +order 1 +determinewhat 1 +product 1 +also 1 +collabor 1 +sean 1 +anderson 1 +weld 1 +salesin 1 +michael 1 +cohen 1 +develop 1 +moviethat 1 +demonstr 1 +camera 1 +appl 1 +intern 1 +russel 1 +technologyinto 1 +experi 1 +perpetr 1 +graphic 1 +debugg 1 +theucpop 1 +famili 1 +planner 1 +programm 1 +client 1 +number 1 +peopl 1 +whose 1 +live 1 +touch 1 +grow 1 +everi 1 +manual 1 +isher 1 +spent 1 +undergradu 1 +career 1 +theunivers 1 +chicago 1 +carboload 1 +harold 1 +chicken 1 +theanim 1 +agent 1 +publicationschristianson 1 +declar 1 +control 1 +cinematographi 1 +appear 1 +aaai 1 +firbi 1 +mcdougal 1 +fast 1 +local 1 +map 1 +support 1 +navig 1 +object 1 +symposium 1 +sensor 1 +fusion 1 +boston 1 +novemb 1 +find 1 +dave 1 +thechateau 1 +cynic 1 +offic 1 +withfreder 1 +william 1 +darren 1 +adam 1 +gloriou 1 +leader 1 +juan 1 +import 1 +thing 1 +favorit 1 +practic 1 +judo 1 +compet 1 +senior 1 +nation 1 +sibl 1 +sisterjust 1 +school 1 +inform 1 +librari 1 +michigan 1 +surf 1 +cut 1 +edg 1 +research 1 +supercollid 1 +realli 1 +feel 1 +like 1 +slack 1 +mirski 1 +help 1 +watch 1 +hero 1 +youth 1 +duel 1 +death 1 +wwwf 1 +grudg 1 +match 1 +fame 1 +fortun 1 +check 1 +respons 1 +week 1 +game 1 +domain 1 +straight 1 +doomgat 1 +sai 1 +evil 1 +book 1 +tick 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..22e9c9ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,39 @@ +home 1 +page 1 +dave 1 +johnson 1 +david 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +research 1 +interest 1 +navig 1 +assist 1 +hypertext 1 +readersproject 1 +activ 1 +racquetbal 1 +golf 1 +basketbal 1 +softbal 1 +tutori 1 +script 1 +fit 1 +togeth 1 +take 1 +theracquetbal 1 +quiz 1 +creat 1 +look 1 +thecreat 1 +assess 1 +form 1 +give 1 +last 1 +modifi 1 +mondai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..a779d2a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,161 @@ +home 1 +page 1 +david 1 +dion 1 +ddion 1 +washington 1 +person 1 +yeah 1 +right 1 +like 1 +anyth 1 +okai 1 +mayb 1 +littl 1 +academ 1 +current 1 +research 1 +past 1 +year 1 +work 1 +brian 1 +bershad 1 +primari 1 +respons 1 +construct 1 +unix 1 +server 1 +thespinoper 1 +system 1 +spin 1 +oper 1 +applic 1 +achiev 1 +impress 1 +perform 1 +dynam 1 +extend 1 +kernel 1 +safeti 1 +protect 1 +maintain 1 +languag 1 +level 1 +extens 1 +written 1 +modula 1 +user 1 +slight 1 +variant 1 +think 1 +run 1 +mach 1 +link 1 +us 1 +intercept 1 +call 1 +emul 1 +environ 1 +previou 1 +first 1 +havework 1 +undergradu 1 +studi 1 +notr 1 +dame 1 +help 1 +implement 1 +memori 1 +manag 1 +commun 1 +subsystem 1 +stuff 1 +afraid 1 +time 1 +around 1 +surf 1 +wouldn 1 +claim 1 +know 1 +cool 1 +ipromis 1 +soon 1 +netscap 1 +enhancedthi 1 +hold 1 +breath 1 +meanwhil 1 +site 1 +visit 1 +occasion 1 +distract 1 +univers 1 +comput 1 +scienc 1 +engin 1 +reason 1 +seattl 1 +homepag 1 +featur 1 +date 1 +view 1 +campu 1 +weather 1 +condit 1 +occupi 1 +vast 1 +major 1 +program 1 +dai 1 +debug 1 +manual 1 +solv 1 +countless 1 +problem 1 +institut 1 +band 1 +life 1 +trumpet 1 +section 1 +racquetbal 1 +ladder 1 +main 1 +outlet 1 +athlet 1 +espn 1 +sportzon 1 +stai 1 +touch 1 +sport 1 +world 1 +todai 1 +rest 1 +dilbert 1 +learn 1 +real 1 +restaur 1 +fine 1 +eateri 1 +recommend 1 +other 1 +movi 1 +region 1 +list 1 +transport 1 +line 1 +guid 1 +excel 1 +public 1 +traffic 1 +statu 1 +infam 1 +marin 1 +leagu 1 +basebal 1 +team 1 +bean 1 +shop 1 +last 1 +modifi 1 +mondai 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..1a01dd2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,127 @@ +derrick 1 +index 1 +weathersbi 1 +bullssupersonicsi 1 +pursu 1 +phdin 1 +comput 1 +scienc 1 +univers 1 +washington 1 +seattl 1 +beauti 1 +campu 1 +li 1 +heart 1 +ofseattl 1 +offer 1 +mani 1 +divers 1 +graduat 1 +student 1 +often 1 +fall 1 +prei 1 +therebyextend 1 +time 1 +averag 1 +career 1 +practic 1 +experi 1 +interestssignific 1 +project 1 +skill 1 +data 1 +sheet 1 +share 1 +server 1 +parallel 1 +arrai 1 +languag 1 +compil 1 +host 1 +token 1 +ring 1 +commun 1 +protocol 1 +develop 1 +base 1 +design 1 +administr 1 +system 1 +network 1 +tool 1 +securityresearch 1 +interestsmi 1 +research 1 +center 1 +around 1 +distribut 1 +challengespres 1 +field 1 +on 1 +perform 1 +portabl 1 +conveni 1 +typic 1 +foremost 1 +goal 1 +run 1 +ordistribut 1 +environ 1 +howev 1 +suffer 1 +final 1 +theseenviron 1 +extra 1 +challeng 1 +asynchron 1 +independ 1 +event 1 +daunt 1 +task 1 +distributedenviron 1 +issu 1 +address 1 +group 1 +page 1 +spaa 1 +paper 1 +collect 1 +gener 1 +topic 1 +comm 1 +dissert 1 +integr 1 +softwar 1 +engin 1 +projectacadem 1 +achievementsinstructor 1 +extens 1 +colleg 1 +advanc 1 +summer 1 +curriculum 1 +cours 1 +certif 1 +program 1 +collegeinstructor 1 +teach 1 +assist 1 +start 1 +undergradu 1 +tutor 1 +women 1 +minoritystud 1 +depart 1 +engineeringoutstand 1 +award 1 +person 1 +interest 1 +interact 1 +cnnfinanciala 1 +newslet 1 +would 1 +javaw 1 +weathersbyderrick 1 +edutu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..68009462 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,123 @@ +brian 1 +dewei 1 +home 1 +pagebrian 1 +deweyabout 1 +first 1 +year 1 +student 1 +depart 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +washington 1 +doyou 1 +need 1 +know 1 +like 1 +music 1 +book 1 +ilov 1 +plai 1 +game 1 +ride 1 +bike 1 +write 1 +letter 1 +drink 1 +myfavorit 1 +color 1 +blue 1 +favorit 1 +ocean 1 +atlant 1 +oldroomm 1 +think 1 +anim 1 +public 1 +avail 1 +finger 1 +northern 1 +irelandi 1 +return 1 +trip 1 +belfast 1 +june 1 +carri 1 +sixti 1 +pagesof 1 +note 1 +interview 1 +luggag 1 +getthos 1 +semi 1 +chaotic 1 +readabl 1 +hopefulli 1 +enlighteningformat 1 +work 1 +draft 1 +feel 1 +free 1 +read 1 +shoulder 1 +period 1 +make 1 +html 1 +postscript 1 +format 1 +goal 1 +encourag 1 +frequent 1 +feedback 1 +soon 1 +possibl 1 +pleas 1 +mail 1 +well 1 +much 1 +time 1 +thisproject 1 +late 1 +progress 1 +near 1 +ofth 1 +summer 1 +imag 1 +ireland 1 +thecurr 1 +sinn 1 +fein 1 +inform 1 +bibliographi 1 +statist 1 +terrorist 1 +relat 1 +death 1 +injuri 1 +alreadi 1 +addict 1 +recuri 1 +check 1 +link 1 +fascin 1 +thorough 1 +histori 1 +develop 1 +china 1 +ancient 1 +page 1 +imageek 1 +york 1 +cuni 1 +provid 1 +mani 1 +interest 1 +site 1 +jansteen 1 +seen 1 +edulast 1 +modifi 1 +tuesdai 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..d0e967dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,119 @@ +home 1 +pagewelcom 1 +fasulo 1 +page 1 +dfasulo 1 +washington 1 +third 1 +year 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +williamscolleg 1 +computersci 1 +appli 1 +mathemat 1 +class 1 +note 1 +portrait 1 +contain 1 +slight 1 +inaccuraci 1 +find 1 +eastlak 1 +seattl 1 +work 1 +engin 1 +ofwashington 1 +offic 1 +chateau 1 +email 1 +academ 1 +interest 1 +graphic 1 +biologi 1 +person 1 +fiction 1 +fantasi 1 +written 1 +otherwis 1 +fact 1 +probabl 1 +honest 1 +identifi 1 +illustr 1 +merlin 1 +corwin 1 +pictur 1 +favorit 1 +charact 1 +mine 1 +roger 1 +zelazni 1 +chronicl 1 +amber 1 +imag 1 +taken 1 +drpg 1 +publish 1 +phage 1 +press 1 +would 1 +recommend 1 +anyon 1 +like 1 +book 1 +also 1 +seri 1 +babylon 1 +creativ 1 +write 1 +poetri 1 +absolut 1 +link 1 +athlet 1 +particular 1 +order 1 +tenni 1 +kwon 1 +distanc 1 +run 1 +role 1 +plai 1 +random 1 +thing 1 +depend 1 +cat 1 +homepag 1 +friend 1 +fellow 1 +william 1 +alumnu 1 +sean 1 +sandi 1 +look 1 +woman 1 +former 1 +grad 1 +wendi 1 +belluomini 1 +dress 1 +dogbert 1 +peopl 1 +ask 1 +theori 1 +worthwhil 1 +area 1 +research 1 +whether 1 +abstract 1 +us 1 +better 1 +explan 1 +goal 1 +futur 1 +ever 1 +given 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..b22b5548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,30 @@ +martin 1 +dickei 1 +home 1 +page 1 +dickeycomput 1 +scienc 1 +engineeringunivers 1 +washingtonwelcom 1 +weekli 1 +schedulenarr 1 +resum 1 +blurbcs 1 +engr 1 +autumn 1 +favorit 1 +seattl 1 +coffe 1 +housesfavorit 1 +internet 1 +site 1 +sister 1 +bookspirograph 1 +java 1 +script 1 +garg 1 +plai 1 +washington 1 +eduupd 1 +tuesdai 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..2d743e74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,187 @@ +denni 1 +primari 1 +interest 1 +high 1 +perform 1 +comput 1 +architectur 1 +researchwith 1 +smart 1 +peopl 1 +likejean 1 +loup 1 +baer 1 +brian 1 +bershad 1 +brad 1 +calder 1 +chen 1 +alan 1 +eustac 1 +dirk 1 +grunwald 1 +andt 1 +romer 1 +recent 1 +paper 1 +energi 1 +manag 1 +issu 1 +system 1 +gener 1 +instruct 1 +cach 1 +fetch 1 +polici 1 +specul 1 +execut 1 +isca 1 +dynam 1 +page 1 +map 1 +conflict 1 +resolutionon 1 +standard 1 +hardwar 1 +osdi 1 +avoid 1 +miss 1 +larg 1 +direct 1 +mappedcach 1 +asplo 1 +effect 1 +differ 1 +code 1 +reorder 1 +algorithm 1 +qualifi 1 +project 1 +report 1 +univers 1 +washington 1 +contact 1 +work 1 +scienc 1 +engin 1 +depart 1 +seattl 1 +offic 1 +sieg 1 +dlee 1 +home 1 +index 1 +pointer 1 +hotlist 1 +good 1 +entri 1 +point 1 +explor 1 +yahoo 1 +yellow 1 +internet 1 +lyco 1 +realli 1 +search 1 +enginefor 1 +guid 1 +click 1 +vote 1 +million 1 +sensibl 1 +mind 1 +conced 1 +thatpolit 1 +almost 1 +alwai 1 +choic 1 +lesser 1 +evil 1 +tweedledumand 1 +tweedlede 1 +abstain 1 +theyar 1 +present 1 +presid 1 +appoint 1 +go 1 +torummag 1 +around 1 +live 1 +next 1 +four 1 +year 1 +consid 1 +allth 1 +stew 1 +rather 1 +huberthumphrei 1 +show 1 +humphrei 1 +taught 1 +lesson 1 +still 1 +enjoi 1 +nixon 1 +suprem 1 +court 1 +whentricia 1 +juli 1 +begin 1 +find 1 +silver 1 +thread 1 +among 1 +gold 1 +theblack 1 +russel 1 +baker 1 +ford 1 +without 1 +flummeri 1 +commit 1 +hesit 1 +chanc 1 +draw 1 +back 1 +ineffect 1 +concern 1 +act 1 +initi 1 +element 1 +truth 1 +ignor 1 +kill 1 +countless 1 +idea 1 +splendid 1 +plan 1 +moment 1 +definit 1 +provid 1 +move 1 +sort 1 +thing 1 +occur 1 +help 1 +would 1 +never 1 +otherwis 1 +whole 1 +stream 1 +event 1 +decis 1 +rais 1 +favor 1 +manner 1 +unforeseen 1 +incid 1 +meet 1 +materi 1 +assist 1 +magic 1 +could 1 +dream 1 +come 1 +whatev 1 +goeth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..c111e487 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,119 @@ +anoth 1 +unnecessari 1 +home 1 +pagececi 1 +page 1 +well 1 +much 1 +anywai 1 +sure 1 +casual 1 +mention 1 +name 1 +buti 1 +feel 1 +strongli 1 +bold 1 +noless 1 +doug 1 +zongker 1 +exhaust 1 +list 1 +usual 1 +research 1 +public 1 +classeshow 1 +contact 1 +meth 1 +unusu 1 +toxic 1 +custard 1 +workshop 1 +filesth 1 +mento 1 +galleryvisit 1 +inform 1 +supercolliderth 1 +useless 1 +todai 1 +date 1 +wast 1 +time 1 +cron 1 +player 1 +databas 1 +test 1 +ground 1 +caveat 1 +user 1 +sarcasm 1 +lucki 1 +brows 1 +avirtu 1 +intend 1 +treasur 1 +trove 1 +whichmai 1 +actual 1 +us 1 +realli 1 +first 1 +year 1 +grad 1 +student 1 +comput 1 +scienc 1 +engineeringdepart 1 +univers 1 +ofwashington 1 +graduat 1 +michigan 1 +state 1 +imajor 1 +andminor 1 +math 1 +dubiou 1 +honorsjunior 1 +apprentic 1 +keeper 1 +brotherhood 1 +crouton 1 +death 1 +cart 1 +pizzicato 1 +intern 1 +club 1 +member 1 +bryan 1 +worst 1 +execut 1 +vice 1 +presid 1 +charg 1 +emerg 1 +backup 1 +clicker 1 +cruis 1 +highwai 1 +inhigh 1 +gear 1 +sit 1 +buttstar 1 +screen 1 +tast 1 +background 1 +stolen 1 +depart 1 +labor 1 +homepag 1 +wheremi 1 +sister 1 +work 1 +dougz 1 +washington 1 +class 1 +last 1 +edit 1 +thursdai 1 +novemb 1 +hit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..bdc4327f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,92 @@ +dylan 1 +mcname 1 +jame 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +person 1 +inform 1 +research 1 +concentr 1 +interact 1 +applicationsand 1 +oper 1 +system 1 +implement 1 +mechanismthat 1 +allow 1 +applic 1 +page 1 +replacementpolici 1 +kernel 1 +polici 1 +caus 1 +perform 1 +poorli 1 +help 1 +schedul 1 +activ 1 +machoper 1 +mechan 1 +thathelp 1 +user 1 +level 1 +thread 1 +properli 1 +kernelthread 1 +experi 1 +lead 1 +work 1 +spin 1 +project 1 +folk 1 +build 1 +architecturethat 1 +gener 1 +idea 1 +tailor 1 +current 1 +carri 1 +start 1 +opal 1 +thesi 1 +investig 1 +betweenobject 1 +orient 1 +databas 1 +oodb 1 +virtual 1 +memori 1 +demonstr 1 +signific 1 +improv 1 +achiv 1 +us 1 +commod 1 +differ 1 +done 1 +addit 1 +improvementscan 1 +come 1 +modifi 1 +slightli 1 +betterserv 1 +paperscv 1 +cours 1 +geoff 1 +voelker 1 +built 1 +winter 1 +quarter 1 +seminar 1 +dedic 1 +java 1 +gave 1 +lectureintroduc 1 +languag 1 +environ 1 +slide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..0e1ac513 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,32 @@ +christoph 1 +lewi 1 +home 1 +page 1 +graduat 1 +student 1 +dept 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +echri 1 +hello 1 +glad 1 +could 1 +make 1 +work 1 +program 1 +languag 1 +project 1 +offic 1 +hour 1 +tent 1 +mondai 1 +wednesdai 1 +sieg 1 +last 1 +modifi 1 +thur 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..a41a4c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page 1 +blank 1 +ecrock 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..c18f21c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,118 @@ +eddi 1 +hong 1 +home 1 +page 1 +know 1 +second 1 +year 1 +graduat 1 +comput 1 +scienc 1 +studentat 1 +univers 1 +washington 1 +well 1 +tosai 1 +busi 1 +type 1 +peopl 1 +littl 1 +time 1 +hand 1 +hadto 1 +includ 1 +resum 1 +link 1 +postcriptand 1 +plain 1 +text 1 +form 1 +offic 1 +room 1 +seig 1 +hall 1 +current 1 +work 1 +anna 1 +karlin 1 +craig 1 +chamber 1 +theoret 1 +model 1 +dynam 1 +compil 1 +specif 1 +workingon 1 +develop 1 +line 1 +algorithm 1 +fordynam 1 +plan 1 +qual 1 +project 1 +access 1 +sinc 1 +august 1 +histor 1 +fact 1 +free 1 +activit 1 +vine 1 +branchesmi 1 +sister 1 +want 1 +help 1 +creat 1 +also 1 +list 1 +variou 1 +anoth 1 +interest 1 +keep 1 +industri 1 +site 1 +give 1 +insight 1 +commentari 1 +happen 1 +knowof 1 +place 1 +pleas 1 +mail 1 +daveneti 1 +power 1 +macintosh 1 +guess 1 +make 1 +bias 1 +towardslik 1 +mac 1 +howev 1 +think 1 +better 1 +eveneasi 1 +come 1 +sometim 1 +visit 1 +appl 1 +check 1 +seattl 1 +freewai 1 +traffic 1 +look 1 +advic 1 +import 1 +book 1 +worldher 1 +us 1 +inform 1 +alwai 1 +found 1 +find 1 +address 1 +domain 1 +name 1 +countri 1 +friend 1 +stand 1 +edhong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..d6517931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,74 @@ +susan 1 +egger 1 +http 1 +washington 1 +home 1 +depart 1 +computersci 1 +engin 1 +univers 1 +seattl 1 +voic 1 +email 1 +offic 1 +sieg 1 +hall 1 +research 1 +interest 1 +comput 1 +architectur 1 +back 1 +compil 1 +emphasi 1 +onexperiment 1 +perform 1 +analysi 1 +current 1 +work 1 +issu 1 +incompil 1 +optim 1 +dynam 1 +share 1 +data 1 +optimizationsand 1 +instruct 1 +schedul 1 +processor 1 +design 1 +multithreadedarchitectur 1 +project 1 +time 1 +algorithm 1 +reduc 1 +fals 1 +multithread 1 +spinprevi 1 +cach 1 +coher 1 +code 1 +prefetch 1 +memori 1 +machin 1 +miscellan 1 +tool 1 +workload 1 +new 1 +asplo 1 +program 1 +committe 1 +call 1 +paper 1 +homepag 1 +inform 1 +look 1 +click 1 +list 1 +might 1 +qual 1 +amast 1 +degre 1 +begin 1 +thesi 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..2731cf4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,197 @@ +emin 1 +sirer 1 +sirereg 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +backgroundi 1 +current 1 +third 1 +year 1 +graduat 1 +student 1 +grew 1 +istanbul 1 +turkei 1 +receiv 1 +princeton 1 +work 1 +toward 1 +spinproject 1 +prof 1 +brian 1 +bershad 1 +spent 1 +summer 1 +bell 1 +labswork 1 +theplan 1 +oper 1 +system 1 +help 1 +build 1 +prototyp 1 +displai 1 +thesumm 1 +research 1 +center 1 +jersei 1 +recent 1 +thevesta 1 +project 1 +projectsmi 1 +goal 1 +develop 1 +safe 1 +adapt 1 +extens 1 +thread 1 +schedul 1 +synchron 1 +dynam 1 +link 1 +andprotect 1 +domain 1 +subsystem 1 +spin 1 +also 1 +wrote 1 +machin 1 +specificaspect 1 +kernel 1 +call 1 +interrupt 1 +path 1 +andsom 1 +miscellan 1 +interfac 1 +alarm 1 +mach 1 +compat 1 +support 1 +novel 1 +aspect 1 +provid 1 +mean 1 +ofextend 1 +arbitrari 1 +implement 1 +schedulingpolici 1 +allowsu 1 +achiev 1 +high 1 +perform 1 +strand 1 +isdesign 1 +fault 1 +isol 1 +protect 1 +allowsisol 1 +fine 1 +grain 1 +share 1 +time 1 +withconflict 1 +symbol 1 +simultan 1 +activ 1 +hide 1 +code 1 +data 1 +beassur 1 +possibl 1 +access 1 +clincher 1 +extensionsthat 1 +want 1 +protectionenforc 1 +overhead 1 +performanceweb 1 +server 1 +networkingstack 1 +main 1 +object 1 +design 1 +reduc 1 +http 1 +latenc 1 +andminim 1 +load 1 +mip 1 +instruct 1 +simul 1 +coupl 1 +calledmipsi 1 +robust 1 +enough 1 +spec 1 +benchmark 1 +standard 1 +ofnew 1 +us 1 +educ 1 +tool 1 +researchplatform 1 +page 1 +describ 1 +mipsi 1 +featuresand 1 +avail 1 +paper 1 +wcsss 1 +namespac 1 +manag 1 +mechan 1 +write 1 +modula 1 +experi 1 +safeti 1 +sosp 1 +softwar 1 +issu 1 +hoto 1 +posit 1 +compar 1 +hardwar 1 +microkernel 1 +applic 1 +specif 1 +servic 1 +sigop 1 +european 1 +workshop 1 +review 1 +version 1 +technic 1 +report 1 +march 1 +measur 1 +limit 1 +parallel 1 +senior 1 +independ 1 +june 1 +talkslanguag 1 +slide 1 +present 1 +first 1 +compil 1 +tucson 1 +arizona 1 +interestswhenev 1 +find 1 +opportun 1 +follow 1 +sail 1 +windsurf 1 +dive 1 +ski 1 +bikingmak 1 +outdoor 1 +cloth 1 +andhik 1 +dylan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..3ede1f7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,140 @@ +eric 1 +anderson 1 +home 1 +page 1 +andersonwher 1 +find 1 +sieg 1 +hall 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +street 1 +longer 1 +black 1 +honor 1 +recent 1 +june 1 +decisionin 1 +aclu 1 +reno 1 +mind 1 +thedecis 1 +mere 1 +interim 1 +step 1 +could 1 +read 1 +take 1 +probabl 1 +still 1 +commit 1 +feloni 1 +care 1 +speech 1 +freedom 1 +themarketplac 1 +idea 1 +fact 1 +third 1 +year 1 +graduat 1 +student 1 +mean 1 +imostli 1 +panic 1 +qual 1 +project 1 +also 1 +try 1 +write 1 +paper 1 +prof 1 +henri 1 +greensideof 1 +duke 1 +finish 1 +master 1 +thesi 1 +onsteadi 1 +state 1 +solut 1 +particular 1 +nonlinear 1 +biharmon 1 +stabil 1 +criterion 1 +explicit 1 +method 1 +restrict 1 +fourth 1 +power 1 +spatial 1 +resolut 1 +implicit 1 +timesteppingmethod 1 +backward 1 +euler 1 +necessari 1 +numer 1 +analysisissu 1 +involv 1 +newton 1 +solv 1 +nonlinearequ 1 +spars 1 +matrix 1 +newtonstep 1 +interplai 1 +pictur 1 +realli 1 +spiffi 1 +work 1 +bodi 1 +code 1 +astrophys 1 +simul 1 +support 1 +data 1 +structuresbi 1 +richard 1 +andersoni 1 +signal 1 +process 1 +music 1 +aim 1 +automat 1 +transcript 1 +acoust 1 +anna 1 +karlin 1 +isth 1 +musician 1 +interest 1 +like 1 +everybodi 1 +els 1 +applet 1 +first 1 +link 1 +text 1 +small 1 +graphic 1 +section 1 +materi 1 +preparedfor 1 +last 1 +fall 1 +snapshot 1 +mostli 1 +famili 1 +prove 1 +brother 1 +final 1 +weather 1 +meander 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..ace34e7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,197 @@ +oren 1 +etzioni 1 +home 1 +pageoren 1 +pagedepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +seattl 1 +washington 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +brief 1 +bioand 1 +photo 1 +current 1 +research 1 +internet 1 +softbot 1 +enabl 1 +human 1 +user 1 +state 1 +heor 1 +want 1 +accomplish 1 +disambigu 1 +request 1 +anddynam 1 +determin 1 +satisfi 1 +finalist 1 +discoveraward 1 +technolog 1 +innov 1 +softwar 1 +metacrawl 1 +field 1 +servic 1 +searchmultipl 1 +indic 1 +parallel 1 +provid 1 +sophist 1 +pruningopt 1 +netrecommend 1 +search 1 +choic 1 +ahoi 1 +white 1 +page 1 +locatesindividu 1 +high 1 +accuraci 1 +bruteforc 1 +learn 1 +brute 1 +analyz 1 +hypothes 1 +second 1 +whenrun 1 +sparc 1 +select 1 +public 1 +effici 1 +inform 1 +gather 1 +foc 1 +move 1 +food 1 +chain 1 +deploi 1 +theweb 1 +aaai 1 +ascal 1 +comparison 1 +shop 1 +agent 1 +world 1 +wide 1 +autonom 1 +multi 1 +us 1 +postscript 1 +html 1 +base 1 +interfac 1 +cacm 1 +juli 1 +intellig 1 +fact 1 +fiction 1 +forecast 1 +ieee 1 +expert 1 +august 1 +without 1 +robot 1 +repli 1 +brook 1 +magazin 1 +decemb 1 +understand 1 +ijcai 1 +sound 1 +close 1 +reason 1 +plan 1 +toappear 1 +first 1 +addit 1 +paper 1 +student 1 +advis 1 +richardseg 1 +master 1 +thesi 1 +bernard 1 +fileretriev 1 +neal 1 +lesh 1 +planner 1 +unix 1 +keith 1 +golden 1 +universalquantif 1 +incomplet 1 +terranc 1 +goan 1 +error 1 +mikeperkowitz 1 +erik 1 +selberg 1 +zamir 1 +jonathan 1 +shake 1 +undergradu 1 +stephen 1 +soderland 1 +program 1 +umass 1 +amherst 1 +roomi 1 +hewlett 1 +packard 1 +bruce 1 +lesourd 1 +robert 1 +spiger 1 +lockhe 1 +center 1 +william 1 +alford 1 +wisconsin 1 +greg 1 +fitchenholtz 1 +guido 1 +hunt 1 +dymitr 1 +mozdyniewicz 1 +quark 1 +machin 1 +resourc 1 +databas 1 +repositori 1 +irvin 1 +knowledg 1 +discoveri 1 +minecontain 1 +neuroprosearch 1 +contain 1 +recent 1 +relev 1 +neural 1 +network 1 +illinoi 1 +induct 1 +group 1 +statlib 1 +data 1 +algorithm 1 +statist 1 +learningtoolbox 1 +bonn 1 +german 1 +list 1 +usenet 1 +faq 1 +access 1 +count 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..753ef815 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,35 @@ +mike 1 +feelei 1 +home 1 +pagemik 1 +comput 1 +scienc 1 +finish 1 +done 1 +soon 1 +thesi 1 +concern 1 +global 1 +memori 1 +manag 1 +workstationclust 1 +also 1 +work 1 +distribut 1 +opalproject 1 +join 1 +faculti 1 +univers 1 +british 1 +columbia 1 +injanuari 1 +inform 1 +avail 1 +us 1 +link 1 +papersmi 1 +research 1 +summarycvsoutheast 1 +idaholast 1 +modifi 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..f1d33228 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,20 @@ +chri 1 +fisher 1 +home 1 +page 1 +pictur 1 +fisherdepart 1 +comput 1 +scienc 1 +engineeringbox 1 +univers 1 +washington 1 +seattl 1 +voic 1 +mail 1 +sieg 1 +hall 1 +room 1 +current 1 +construct 1 +return 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..5724a0ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,73 @@ +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washington 1 +seattl 1 +sieg 1 +hall 1 +schedulethi 1 +quarter 1 +autumn 1 +ta 1 +cours 1 +rather 1 +work 1 +gener 1 +exam 1 +check 1 +scheduleto 1 +otherwis 1 +around 1 +probablyb 1 +found 1 +librari 1 +somewher 1 +nice 1 +read 1 +paper 1 +research 1 +activitiesmi 1 +main 1 +interest 1 +algorithm 1 +specif 1 +areasof 1 +parallel 1 +geometri 1 +public 1 +meander 1 +place 1 +denni 1 +outta 1 +mind 1 +vista 1 +pea 1 +music 1 +site 1 +chateau 1 +galleri 1 +fund 1 +drive 1 +thing 1 +alec 1 +wolman 1 +might 1 +server 1 +seven 1 +lost 1 +soul 1 +captur 1 +html 1 +listen 1 +phone 1 +booth 1 +mofo 1 +peopl 1 +luci 1 +paul 1 +peach 1 +ruel 1 +look 1 +like 1 +moment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..e1c9c20c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,84 @@ +francesmari 1 +modugno 1 +home 1 +pagefrancesmari 1 +page 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +sieg 1 +hall 1 +phone 1 +mail 1 +washington 1 +research 1 +interest 1 +main 1 +human 1 +interact 1 +also 1 +user 1 +program 1 +formal 1 +model 1 +softwar 1 +specif 1 +verif 1 +technolog 1 +polici 1 +welcom 1 +opportun 1 +collabor 1 +relat 1 +topic 1 +current 1 +project 1 +safeti 1 +machin 1 +interfac 1 +previou 1 +public 1 +avail 1 +onlin 1 +summari 1 +ofmi 1 +thesi 1 +real 1 +time 1 +concurr 1 +distribut 1 +system 1 +parallel 1 +algorthim 1 +profession 1 +activ 1 +basic 1 +symposium 1 +chair 1 +uist 1 +demonstr 1 +educ 1 +carnegi 1 +mellon 1 +univers 1 +march 1 +august 1 +mathemat 1 +cornel 1 +anyth 1 +recent 1 +includecycl 1 +ski 1 +languag 1 +cultur 1 +spanish 1 +previouslyitalian 1 +vegetarian 1 +cook 1 +elleri 1 +line 1 +greet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..8ee95616 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,45 @@ +georg 1 +forman 1 +home 1 +pagegeorg 1 +pagei 1 +receiv 1 +comput 1 +scienc 1 +optim 1 +compil 1 +ariadn 1 +scalabl 1 +pattern 1 +match 1 +parallel 1 +trace 1 +debugg 1 +public 1 +mobil 1 +hyperlink 1 +librari 1 +someth 1 +interest 1 +free 1 +handi 1 +softwar 1 +script 1 +written 1 +word 1 +puzzl 1 +water 1 +song 1 +chang 1 +netscap 1 +anim 1 +gforman 1 +comhom 1 +page 1 +mail 1 +finger 1 +weather 1 +dept 1 +live 1 +pictur 1 +gener 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..5ddb6f75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,48 @@ +home 1 +page 1 +marc 1 +friedman 1 +cours 1 +watercolor 1 +applet 1 +camp 1 +checklist 1 +spanish 1 +english 1 +collabor 1 +dictionari 1 +poetri 1 +research 1 +favorit 1 +quot 1 +link 1 +elsewher 1 +occam 1 +inform 1 +gather 1 +agent 1 +keith 1 +golden 1 +wordbot 1 +bike 1 +trip 1 +artifici 1 +intellig 1 +codi 1 +kwok 1 +weld 1 +ucpop 1 +planner 1 +tool 1 +chang 1 +life 1 +work 1 +nietzschein 1 +netscap 1 +bookmark 1 +file 1 +everi 1 +refer 1 +visitor 1 +sinc 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..81c5430c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,28 @@ +charli 1 +garrett 1 +home 1 +page 1 +address 1 +seattl 1 +research 1 +interest 1 +compil 1 +graphic 1 +neural 1 +network 1 +genet 1 +algorithm 1 +game 1 +plai 1 +algorithmspap 1 +line 1 +algorithmsformerli 1 +member 1 +cecil 1 +group 1 +univers 1 +ofwashington 1 +bookshelf 1 +audio 1 +file 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..e2cfbf57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,49 @@ +georg 1 +winkenbach 1 +winkenbachdepart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +mail 1 +georgew 1 +washington 1 +eduphon 1 +interest 1 +graphic 1 +multimedia 1 +thesi 1 +work 1 +doneund 1 +supervis 1 +professor 1 +david 1 +salesin 1 +deal 1 +appli 1 +tradit 1 +illustr 1 +techniqu 1 +theautomat 1 +render 1 +three 1 +dimension 1 +model 1 +imagescr 1 +prototyp 1 +system 1 +found 1 +link 1 +follow 1 +imag 1 +galleri 1 +grail 1 +laboratori 1 +depart 1 +engin 1 +wife 1 +home 1 +page 1 +taweewan 1 +siwadun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..9df6e7ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,211 @@ +greg 1 +badro 1 +home 1 +pagegreg 1 +welcom 1 +page 1 +last 1 +updat 1 +email 1 +washington 1 +eduaddress 1 +nesbit 1 +seattl 1 +hello 1 +homepag 1 +pleas 1 +feel 1 +free 1 +send 1 +feedback 1 +address 1 +alwai 1 +isuppos 1 +construct 1 +keep 1 +check 1 +back 1 +excitingfeatur 1 +recent 1 +graduat 1 +dukeunivers 1 +complet 1 +degre 1 +doubl 1 +major 1 +incomput 1 +scienc 1 +mathemat 1 +spring 1 +amcurr 1 +emploi 1 +part 1 +time 1 +senior 1 +research 1 +scientist 1 +fortransworld 1 +numer 1 +small 1 +compani 1 +origin 1 +locat 1 +indurham 1 +headquart 1 +bermuda 1 +work 1 +myapart 1 +also 1 +full 1 +student 1 +computersci 1 +engin 1 +depart 1 +univers 1 +ofwashington 1 +fall 1 +cours 1 +pagecs 1 +softwar 1 +seminarcs 1 +compil 1 +seminar 1 +newer 1 +stuff 1 +philosophi 1 +note 1 +misc 1 +patch 1 +fvwm 1 +first 1 +place 1 +best 1 +show 1 +redhat 1 +desktop 1 +configur 1 +competit 1 +entri 1 +emac 1 +readm 1 +file 1 +archiv 1 +zshell 1 +chronicl 1 +duke 1 +newspap 1 +articl 1 +transworldnumer 1 +ieeenat 1 +program 1 +victori 1 +vertic 1 +winter 1 +issu 1 +magazin 1 +contain 1 +geneticalgorithm 1 +person 1 +link 1 +rsum 1 +data 1 +date 1 +busi 1 +sampl 1 +drew 1 +bycomput 1 +simpl 1 +magic 1 +creat 1 +canterburi 1 +progress 1 +variou 1 +random 1 +pictur 1 +life 1 +definitelynot 1 +mani 1 +hobbi 1 +includ 1 +tenni 1 +ski 1 +especi 1 +jackson 1 +hole 1 +volleybal 1 +juggl 1 +piano 1 +plai 1 +game 1 +rubik 1 +cube 1 +linux 1 +freewar 1 +unix 1 +music 1 +sarahmclachlan 1 +billi 1 +joel 1 +yahoo 1 +list 1 +parliamentari 1 +procedur 1 +ncaa 1 +basketbal 1 +interest 1 +lyco 1 +search 1 +comput 1 +commun 1 +daili 1 +univ 1 +unoffici 1 +microsoft 1 +corpor 1 +world 1 +wide 1 +server 1 +gatewai 1 +user 1 +group 1 +histor 1 +imag 1 +hotjava 1 +global 1 +network 1 +navig 1 +perl 1 +practic 1 +extract 1 +report 1 +languag 1 +virtual 1 +librari 1 +inter 1 +unif 1 +devic 1 +connect 1 +write 1 +html 1 +sgml 1 +seinfeld 1 +index 1 +friend 1 +sitcom 1 +materi 1 +base 1 +upon 1 +support 1 +nation 1 +foundat 1 +fellowship 1 +opinion 1 +find 1 +conclus 1 +recommend 1 +express 1 +public 1 +author 1 +necessarili 1 +reflect 1 +view 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..0b416aa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,294 @@ +greg 1 +linden 1 +home 1 +page 1 +lindenmi 1 +love 1 +wifecorina 1 +current 1 +third 1 +year 1 +graduat 1 +program 1 +polit 1 +scienceher 1 +thecomput 1 +scienc 1 +depart 1 +univers 1 +ofwashington 1 +slave 1 +awai 1 +toward 1 +lofti 1 +goal 1 +complet 1 +undergraduatedegre 1 +california 1 +diego 1 +anodd 1 +doubl 1 +major 1 +comput 1 +go 1 +leav 1 +school 1 +decemb 1 +mactiv 1 +look 1 +posit 1 +softwar 1 +develop 1 +interest 1 +check 1 +resum 1 +java 1 +applet 1 +webview 1 +allow 1 +link 1 +orset 1 +addit 1 +famili 1 +altavistawebviewand 1 +metawebview 1 +instead 1 +enter 1 +keyword 1 +foraltavista 1 +metacrawl 1 +hit 1 +search 1 +servic 1 +return 1 +graph 1 +displai 1 +found 1 +searchservic 1 +autom 1 +travel 1 +assist 1 +emul 1 +dialog 1 +travelag 1 +client 1 +gradual 1 +elicit 1 +flight 1 +prefer 1 +whileallow 1 +brows 1 +real 1 +data 1 +research 1 +prototyp 1 +quit 1 +function 1 +even 1 +earli 1 +stage 1 +highli 1 +rate 1 +gamelan 1 +andjar 1 +wasrat 1 +jar 1 +cool 1 +andwa 1 +staff 1 +pick 1 +altavistawebview 1 +winner 1 +thejava 1 +repositori 1 +contest 1 +also 1 +publish 1 +book 1 +walsh 1 +foundat 1 +meilleur 1 +best 1 +first 1 +linear 1 +ballet 1 +oop 1 +us 1 +capabl 1 +browser 1 +sorri 1 +sourc 1 +demonstr 1 +buffer 1 +avoid 1 +flicker 1 +thread 1 +give 1 +time 1 +run 1 +code 1 +certainli 1 +could 1 +cleaner 1 +though 1 +expect 1 +work 1 +thought 1 +might 1 +enough 1 +standardsto 1 +impress 1 +mylgramm 1 +particl 1 +tree 1 +draw 1 +lgrammer 1 +much 1 +realist 1 +theparticletre 1 +recent 1 +start 1 +judg 1 +evalu 1 +submit 1 +thejar 1 +archiv 1 +summer 1 +dawn 1 +civil 1 +ademonstr 1 +applic 1 +show 1 +plan 1 +techniqu 1 +cansuccessfulli 1 +appli 1 +entertain 1 +myriadsoftwar 1 +ucsd 1 +professor 1 +belew 1 +filippo 1 +menzer 1 +latentenergi 1 +environ 1 +project 1 +tool 1 +developingartifici 1 +life 1 +model 1 +experi 1 +artifici 1 +neuralnetwork 1 +evolutionari 1 +learn 1 +enviro 1 +paper 1 +hank 1 +lesh 1 +user 1 +theautom 1 +assit 1 +majeski 1 +spitzer 1 +localizedinteract 1 +spatial 1 +constraint 1 +iter 1 +prison 1 +dilemma 1 +associ 1 +econom 1 +scientist 1 +krishnamoorthi 1 +paturi 1 +blume 1 +liden 1 +esen 1 +hardwaretradeoff 1 +boolean 1 +concept 1 +world 1 +congress 1 +recurr 1 +neural 1 +network 1 +sdilemma 1 +unpublish 1 +honor 1 +thesi 1 +adam 1 +carlson 1 +sujai 1 +parekh 1 +wrote 1 +funrai 1 +tracer 1 +ofth 1 +inc 1 +graphic 1 +imag 1 +headless 1 +horseman 1 +closeup 1 +chess 1 +duel 1 +assembl 1 +requir 1 +sphere 1 +withreflect 1 +transpar 1 +shadow 1 +distribut 1 +trace 1 +adaptivesampl 1 +mess 1 +thing 1 +pattern 1 +thespher 1 +reflect 1 +causingth 1 +rai 1 +refract 1 +multipl 1 +surfaceand 1 +intern 1 +made 1 +second 1 +anim 1 +call 1 +strike 1 +theanim 1 +written 1 +inventor 1 +manipul 1 +thed 1 +origin 1 +movi 1 +file 1 +alow 1 +qualiti 1 +quicktim 1 +avail 1 +mbquicktim 1 +doesn 1 +compress 1 +anyfurth 1 +least 1 +anyth 1 +resembl 1 +reason 1 +stuff 1 +dilbert 1 +cognit 1 +info 1 +occasion 1 +chateau 1 +guggenheim 1 +annex 1 +engin 1 +washington 1 +seattl 1 +glinden 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..8d105640 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,27 @@ +brian 1 +kri 1 +grant 1 +home 1 +pagebrian 1 +awai 1 +homework 1 +relat 1 +infowork 1 +backgrounduwdynam 1 +compil 1 +groupuw 1 +depart 1 +computersci 1 +engineeringperson 1 +stuffperson 1 +backgroundmi 1 +daughter 1 +isismi 1 +trip 1 +singaporemi 1 +bookmarksmi 1 +public 1 +keylast 1 +updat 1 +octob 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..b7de02e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,98 @@ +dave 1 +grove 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +offic 1 +chateau 1 +sieg 1 +worki 1 +spend 1 +time 1 +plai 1 +cecil 1 +pure 1 +object 1 +orient 1 +langaug 1 +us 1 +vehicl 1 +integr 1 +research 1 +area 1 +languag 1 +design 1 +program 1 +environ 1 +optim 1 +compil 1 +also 1 +hord 1 +consult 1 +hang 1 +aroundth 1 +fring 1 +spinproject 1 +actual 1 +attempt 1 +graduat 1 +sometim 1 +soonish 1 +much 1 +less 1 +frequent 1 +paper 1 +author 1 +wouldn 1 +complet 1 +without 1 +dilbertfix 1 +strip 1 +thathit 1 +littl 1 +close 1 +home 1 +current 1 +manag 1 +underacheiv 1 +fantasi 1 +footbal 1 +team 1 +summer 1 +hampshir 1 +work 1 +gui 1 +scoutreserv 1 +greaterlowel 1 +council 1 +pictur 1 +casunset 1 +taken 1 +right 1 +cabin 1 +kick 1 +anoth 1 +everi 1 +boi 1 +someth 1 +silli 1 +white 1 +water 1 +raft 1 +trip 1 +especi 1 +cool 1 +month 1 +toronto 1 +drove 1 +back 1 +toseattl 1 +took 1 +number 1 +detour 1 +along 1 +somehihglight 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..5b904a84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,61 @@ +scott 1 +hauck 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +year 1 +graduat 1 +student 1 +current 1 +work 1 +multi 1 +fpga 1 +system 1 +rapid 1 +prototyp 1 +board 1 +level 1 +design 1 +thoughi 1 +also 1 +interest 1 +asynchron 1 +circuit 1 +architectur 1 +parallel 1 +june 1 +person 1 +biographi 1 +educ 1 +experi 1 +public 1 +curriculum 1 +vitaeresearch 1 +survei 1 +methodolog 1 +well 1 +first 1 +triptych 1 +montag 1 +develop 1 +improv 1 +densiti 1 +commerci 1 +springbok 1 +partit 1 +assign 1 +rout 1 +topolog 1 +gener 1 +chinook 1 +project 1 +hardwar 1 +softwar 1 +synthesi 1 +simul 1 +embed 1 +applic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..aae46406 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin 1 +hinshaw 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..d41785ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,66 @@ +place 1 +macduff 1 +ultrasound 1 +imag 1 +emma 1 +elspeth 1 +name 1 +subject 1 +chang 1 +without 1 +notic 1 +unborn 1 +daughter 1 +week 1 +fromconcept 1 +ripe 1 +inmid 1 +decemb 1 +view 1 +profil 1 +ly 1 +back 1 +lookingup 1 +head 1 +right 1 +upper 1 +half 1 +torso 1 +theleft 1 +busi 1 +obsess 1 +impend 1 +fatherhood 1 +master 1 +thesi 1 +part 1 +chinook 1 +project 1 +pass 1 +time 1 +silli 1 +possibl 1 +also 1 +rememb 1 +myspam 1 +unfortun 1 +wast 1 +html 1 +brows 1 +around 1 +interest 1 +stuff 1 +usingwebcrawl 1 +pointer 1 +neat 1 +frogstv 1 +nationpenn 1 +tellermus 1 +lyricsian 1 +washington 1 +dept 1 +comput 1 +scienc 1 +engin 1 +univ 1 +washingtonseattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..7cf537d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,145 @@ +herv 1 +jamrozikherv 1 +jamrozik 1 +postdoc 1 +univers 1 +washington 1 +sinc 1 +septemb 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +sieg 1 +hall 1 +phone 1 +mail 1 +research 1 +main 1 +interest 1 +distribut 1 +system 1 +object 1 +orient 1 +languag 1 +softwar 1 +engin 1 +current 1 +work 1 +global 1 +memoi 1 +manag 1 +workstat 1 +cluster 1 +hank 1 +levi 1 +mari 1 +vernon 1 +anna 1 +karlin 1 +mike 1 +feelei 1 +geoff 1 +voelker 1 +high 1 +speed 1 +network 1 +greatli 1 +encourag 1 +memori 1 +cach 1 +virtual 1 +file 1 +page 1 +therebi 1 +reduc 1 +need 1 +disk 1 +access 1 +node 1 +intens 1 +applic 1 +primari 1 +lightli 1 +load 1 +temporari 1 +back 1 +store 1 +introduc 1 +level 1 +hierarchi 1 +name 1 +li 1 +logic 1 +local 1 +fundament 1 +transfer 1 +unit 1 +remot 1 +size 1 +perform 1 +factor 1 +recent 1 +modern 1 +processor 1 +increas 1 +order 1 +provid 1 +coverag 1 +amort 1 +cost 1 +unfortun 1 +small 1 +latenc 1 +trend 1 +thu 1 +odd 1 +studi 1 +subpag 1 +mean 1 +environ 1 +us 1 +evan 1 +inproceed 1 +seventh 1 +confer 1 +architectur 1 +support 1 +program 1 +oper 1 +octob 1 +postscript 1 +thesi 1 +debug 1 +theuniversit 1 +joseph 1 +fourier 1 +grenobl 1 +involv 1 +guideproject 1 +laboratoir 1 +bull 1 +imag 1 +part 1 +imaginstitut 1 +extrem 1 +peopl 1 +area 1 +snot 1 +visit 1 +louvr 1 +galleri 1 +look 1 +map 1 +franc 1 +europ 1 +world 1 +somefamili 1 +pictur 1 +somefriend 1 +eduv 1 +march 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..f35a6eae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,25 @@ +jason 1 +secoski 1 +home 1 +pagejason 1 +washington 1 +eduaddress 1 +comput 1 +scienc 1 +engin 1 +depart 1 +sieg 1 +hall 1 +cunivers 1 +boxseattl 1 +offic 1 +frequent 1 +us 1 +page 1 +projectseattl 1 +weather 1 +forecast 1 +channel 1 +secoskylast 1 +modifi 1 +thursdai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..9b7df386 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,82 @@ +jeremi 1 +baerjeremi 1 +baer 1 +never 1 +school 1 +interfer 1 +educ 1 +mark 1 +twain 1 +stuff 1 +dream 1 +made 1 +william 1 +shakespearei 1 +current 1 +graduat 1 +student 1 +comput 1 +scienceat 1 +univers 1 +washington 1 +interest 1 +includ 1 +artifici 1 +intellig 1 +human 1 +computerinteract 1 +multimedia 1 +softwar 1 +engineeringtool 1 +gener 1 +music 1 +person 1 +creativ 1 +cool 1 +place 1 +spend 1 +signific 1 +time 1 +pierian 1 +spring 1 +softwareoregon 1 +museum 1 +scienc 1 +industri 1 +omsi 1 +pomona 1 +collegeher 1 +look 1 +project 1 +mine 1 +eight 1 +puzzl 1 +java 1 +applet 1 +work 1 +progress 1 +experiment 1 +virtual 1 +travel 1 +copi 1 +effect 1 +demo 1 +question 1 +static 1 +layer 1 +analysi 1 +program 1 +feel 1 +stress 1 +realli 1 +silli 1 +littl 1 +macintosh 1 +thati 1 +wrote 1 +year 1 +download 1 +like 1 +metacrawl 1 +searchcopyright 1 +jbaer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..376b1b86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,60 @@ +jeremi 1 +buhler 1 +home 1 +pagejeremi 1 +pagedo 1 +attempt 1 +adjust 1 +browser 1 +control 1 +transmiss 1 +statu 1 +first 1 +year 1 +student 1 +institut 1 +univers 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +offic 1 +sieg 1 +hall 1 +phone 1 +mail 1 +address 1 +jbuhler 1 +finger 1 +tako 1 +import 1 +stufflectur 1 +note 1 +suffix 1 +tree 1 +postscript 1 +latex 1 +research 1 +come 1 +soon 1 +cours 1 +schedulemi 1 +public 1 +keycyb 1 +activ 1 +electron 1 +frontier 1 +foundat 1 +grinsrecommend 1 +readingmi 1 +undergradu 1 +alma 1 +mater 1 +rice 1 +universityquot 1 +quotesmi 1 +page 1 +return 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..fbb4c7ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,270 @@ +jeff 1 +dean 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +offic 1 +chateau 1 +sieg 1 +view 1 +would 1 +dang 1 +build 1 +weren 1 +futur 1 +plansi 1 +plan 1 +graduat 1 +summer 1 +join 1 +western 1 +research 1 +laboratori 1 +sunni 1 +palo 1 +alto 1 +bought 1 +hous 1 +nearbi 1 +menlo 1 +park 1 +curriculum 1 +vita 1 +postscript 1 +also 1 +summari 1 +teach 1 +experi 1 +projectsi 1 +work 1 +primarili 1 +cecil 1 +project 1 +pure 1 +object 1 +orient 1 +langaug 1 +us 1 +avehicl 1 +integr 1 +area 1 +languag 1 +design 1 +program 1 +environ 1 +optim 1 +compil 1 +techniqu 1 +weintend 1 +scale 1 +larg 1 +real 1 +world 1 +andto 1 +keep 1 +true 1 +goal 1 +implement 1 +vortexcompil 1 +current 1 +line 1 +codein 1 +much 1 +group 1 +involv 1 +track 1 +bug 1 +hang 1 +spinproject 1 +meet 1 +spin 1 +extens 1 +oper 1 +systemmicrokernel 1 +support 1 +dynam 1 +adapt 1 +system 1 +interfacesand 1 +direct 1 +applic 1 +control 1 +stillmaintain 1 +inter 1 +isol 1 +spring 1 +quarter 1 +organ 1 +seminar 1 +interest 1 +concern 1 +effici 1 +ofobject 1 +particular 1 +explor 1 +whole 1 +analysi 1 +usedto 1 +improv 1 +perform 1 +affect 1 +increment 1 +andhow 1 +interact 1 +especiallyprofil 1 +guid 1 +howwhol 1 +assumedthat 1 +access 1 +entir 1 +manycompromis 1 +made 1 +exist 1 +becom 1 +unnecessari 1 +appli 1 +lesson 1 +learn 1 +wholeprogram 1 +toward 1 +programminglanguag 1 +flexibl 1 +base 1 +underlyingimplement 1 +valid 1 +three 1 +principaldesign 1 +develop 1 +vortex 1 +forobject 1 +defin 1 +independentintermedi 1 +ishigh 1 +enough 1 +level 1 +permit 1 +featur 1 +messagesend 1 +closur 1 +creation 1 +contain 1 +uniqu 1 +wayof 1 +compos 1 +pass 1 +parallel 1 +obtain 1 +better 1 +result 1 +repeatedli 1 +run 1 +passessepar 1 +exampl 1 +intraprocedur 1 +classanalysi 1 +profil 1 +receiv 1 +class 1 +predict 1 +inlin 1 +aliasanalysi 1 +split 1 +singl 1 +combin 1 +part 1 +wai 1 +structuringoptim 1 +kind 1 +composit 1 +stillallow 1 +independ 1 +eachoth 1 +nice 1 +framework 1 +specifi 1 +iter 1 +data 1 +flowanalys 1 +client 1 +withrel 1 +littl 1 +effort 1 +dead 1 +assignmentelimin 1 +approxim 1 +code 1 +publicationssom 1 +recent 1 +paper 1 +author 1 +personali 1 +love 1 +spici 1 +food 1 +mild 1 +four 1 +letter 1 +word 1 +coke 1 +probabl 1 +kick 1 +caffein 1 +habit 1 +enjoy 1 +moment 1 +spent 1 +wife 1 +heidi 1 +daughter 1 +victoria 1 +realli 1 +like 1 +somedai 1 +honeymoon 1 +kauai 1 +hurrican 1 +iniki 1 +biplan 1 +ride 1 +galvin 1 +fly 1 +guess 1 +never 1 +anyth 1 +anymor 1 +took 1 +flight 1 +consist 1 +minut 1 +around 1 +downtown 1 +puget 1 +sound 1 +travel 1 +model 1 +feel 1 +dare 1 +sadli 1 +insur 1 +coverag 1 +doesn 1 +passeng 1 +walk 1 +wing 1 +back 1 +enjoi 1 +fantast 1 +even 1 +highli 1 +recommend 1 +look 1 +someth 1 +number 1 +rather 1 +lengthi 1 +hotlist 1 +jdean 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..1127a78f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,176 @@ +damon 1 +rees 1 +home 1 +pagejon 1 +reesepost 1 +doctor 1 +research 1 +safeti 1 +critic 1 +softwar 1 +groupdepart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +chateau 1 +guggenheim 1 +annex 1 +phone 1 +mail 1 +jdrees 1 +washington 1 +interest 1 +problem 1 +awar 1 +devic 1 +system 1 +structur 1 +caus 1 +catastroph 1 +fail 1 +hardwar 1 +becom 1 +less 1 +expens 1 +expect 1 +place 1 +control 1 +wider 1 +rang 1 +applic 1 +advantag 1 +convent 1 +technolog 1 +flexibl 1 +come 1 +price 1 +behavior 1 +complex 1 +unpredict 1 +perhap 1 +best 1 +public 1 +exampl 1 +three 1 +mile 1 +island 1 +incid 1 +oper 1 +great 1 +difficulti 1 +diagnos 1 +state 1 +emerg 1 +requir 1 +stage 1 +develop 1 +project 1 +success 1 +especi 1 +respect 1 +reason 1 +colleagu 1 +concentr 1 +commun 1 +specif 1 +base 1 +languag 1 +call 1 +machin 1 +rsml 1 +valid 1 +us 1 +specifi 1 +tca 1 +avion 1 +thesi 1 +invent 1 +hazard 1 +analysi 1 +procedur 1 +hazop 1 +studi 1 +signific 1 +concept 1 +borrow 1 +deviat 1 +henc 1 +name 1 +link 1 +html 1 +transcript 1 +dissert 1 +current 1 +write 1 +confer 1 +articl 1 +summar 1 +make 1 +avail 1 +group 1 +possibl 1 +dynam 1 +displai 1 +search 1 +siang 1 +tool 1 +integr 1 +kurt 1 +partridg 1 +alpha 1 +version 1 +publicli 1 +sean 1 +sandi 1 +semant 1 +draft 1 +document 1 +includ 1 +discuss 1 +variant 1 +improv 1 +academ 1 +histori 1 +inform 1 +univers 1 +california 1 +irvin 1 +postscript 1 +linguist 1 +rice 1 +waxahachi 1 +high 1 +school 1 +nanci 1 +leveson 1 +mat 1 +heimdahl 1 +holli 1 +hildreth 1 +process 1 +ieee 1 +transact 1 +engin 1 +septemb 1 +steven 1 +dolin 1 +curv 1 +interpret 1 +diagnost 1 +techniqu 1 +industri 1 +januari 1 +februari 1 +ortega 1 +experi 1 +statechart 1 +sixth 1 +intern 1 +workshop 1 +design 1 +como 1 +itali 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..c79355d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,104 @@ +home 1 +page 1 +jack 1 +lojack 1 +lojlo 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +current 1 +construct 1 +research 1 +paper 1 +convert 1 +thread 1 +level 1 +parallel 1 +instruct 1 +simultan 1 +multithread 1 +abstract 1 +postscript 1 +susan 1 +egger 1 +joel 1 +emer 1 +henri 1 +levi 1 +rebecca 1 +stamm 1 +anddean 1 +tullsen 1 +submit 1 +public 1 +juli 1 +exploit 1 +choic 1 +fetch 1 +issu 1 +implement 1 +processor 1 +dean 1 +proceed 1 +annual 1 +intern 1 +symposium 1 +architectur 1 +philadelphia 1 +compil 1 +first 1 +suif 1 +workshop 1 +stanford 1 +januari 1 +improv 1 +balanc 1 +schedul 1 +optim 1 +increas 1 +sigplan 1 +confer 1 +program 1 +languag 1 +design 1 +jolla 1 +california 1 +june 1 +compar 1 +static 1 +dynam 1 +superscalar 1 +gener 1 +examin 1 +written 1 +report 1 +interact 1 +loph 1 +qualifi 1 +work 1 +support 1 +interest 1 +also 1 +includ 1 +vliw 1 +well 1 +particular 1 +investig 1 +person 1 +find 1 +franklin 1 +eseattl 1 +orsieg 1 +hall 1 +room 1 +phone 1 +coupl 1 +pictur 1 +recent 1 +paintbal 1 +experi 1 +yahoojlo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..52046d5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,45 @@ +sherman 1 +home 1 +page 1 +shermanjoebob 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +usami 1 +research 1 +interest 1 +user 1 +interfac 1 +designinform 1 +navig 1 +visual 1 +project 1 +activ 1 +inform 1 +local 1 +survei 1 +useclass 1 +hcreat 1 +impress 1 +pagequ 1 +time 1 +sarahsoftballstuff 1 +might 1 +want 1 +automat 1 +suggest 1 +link 1 +relat 1 +topic 1 +directori 1 +us 1 +pagesif 1 +browser 1 +support 1 +send 1 +mail 1 +tojoebob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..6642c2c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,61 @@ +home 1 +page 1 +joshua 1 +seim 1 +abstract 1 +biolog 1 +base 1 +neural 1 +network 1 +system 1 +current 1 +test 1 +theunivers 1 +washington 1 +depart 1 +comput 1 +scienc 1 +begunin 1 +lockean 1 +blank 1 +slate 1 +josh 1 +learn 1 +emul 1 +observedbehavior 1 +successfulli 1 +accomplish 1 +sever 1 +task 1 +graduatingfrom 1 +colleg 1 +travel 1 +volit 1 +recent 1 +start 1 +demonstr 1 +potenti 1 +independ 1 +creativ 1 +thought 1 +taskw 1 +present 1 +earn 1 +expect 1 +take 1 +year 1 +document 1 +provid 1 +overviewof 1 +cognit 1 +ambulatori 1 +achiev 1 +organ 1 +person 1 +academichierarchi 1 +addition 1 +futur 1 +work 1 +discuss 1 +within 1 +context 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..08e8aab0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,4 @@ +jovan 1 +home 1 +page 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..42224b9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,67 @@ +joanna 1 +powerjoanna 1 +pagehi 1 +cat 1 +academ 1 +interestsmi 1 +main 1 +interest 1 +comput 1 +scienc 1 +graphic 1 +grad 1 +school 1 +uwneat 1 +stuff 1 +alma 1 +matercool 1 +link 1 +jonathan 1 +shadegraph 1 +research 1 +uwduoton 1 +reproductionmi 1 +biologi 1 +especi 1 +genet 1 +molecular 1 +matermost 1 +recent 1 +site 1 +gain 1 +employmentpubl 1 +power 1 +brad 1 +west 1 +eric 1 +stollnitz 1 +david 1 +salesin 1 +reproduc 1 +color 1 +imag 1 +duoton 1 +proceed 1 +siggraph 1 +page 1 +york 1 +real 1 +lifepast 1 +homesdiversionsgend 1 +issuesstatu 1 +women 1 +sciencenow 1 +home 1 +pagefeminist 1 +major 1 +onlineultim 1 +frisbeefun 1 +stufffroggi 1 +sean 1 +quotesbrad 1 +comic 1 +musicevan 1 +jokes 1 +pagesmi 1 +herojpow 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..700e6aa4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,16 @@ +jonathan 1 +shake 1 +sieg 1 +hall 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +ahoi 1 +homepag 1 +finderresumlinkslast 1 +updat 1 +august 1 +jshake 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..125d5ff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan 1 +alemanyjuan 1 +alemani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..dd8a72c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,116 @@ +kari 1 +pulli 1 +home 1 +pagekari 1 +antero 1 +pullii 1 +third 1 +year 1 +graduat 1 +student 1 +comput 1 +scienc 1 +engineeringdepart 1 +univers 1 +ofwashington 1 +work 1 +interest 1 +graphic 1 +vision 1 +andmathemat 1 +try 1 +combin 1 +aspect 1 +thesedisciplin 1 +research 1 +professor 1 +depart 1 +closest 1 +tonyderos 1 +actual 1 +left 1 +uwfor 1 +pixar 1 +lindashapiro 1 +addition 1 +werner 1 +stuetzl 1 +andjohn 1 +mcdonald 1 +statist 1 +duchamp 1 +mathemat 1 +andhugu 1 +hopp 1 +rick 1 +szeliski 1 +microsoft 1 +qual 1 +project 1 +tribor 1 +triplet 1 +base 1 +object 1 +recognitionsystem 1 +linda 1 +report 1 +technic 1 +engin 1 +universityof 1 +washington 1 +current 1 +surfacereconstruct 1 +rang 1 +data 1 +multipl 1 +baselin 1 +camerasystem 1 +obtain 1 +subdivis 1 +surfac 1 +waveletanalysi 1 +geometri 1 +reflect 1 +function 1 +pass 1 +gener 1 +examin 1 +topic 1 +rigidregistr 1 +click 1 +find 1 +class 1 +architecturesystem 1 +taught 1 +susanegg 1 +distribut 1 +theclass 1 +brianbershad 1 +imag 1 +understand 1 +steven 1 +tanimoto 1 +present 1 +sketch 1 +siggraph 1 +getto 1 +slide 1 +remov 1 +wavelet 1 +herear 1 +speaker 1 +note 1 +eacutesum 1 +eacut 1 +sieg 1 +hall 1 +seattl 1 +email 1 +kapu 1 +union 1 +folk 1 +takavainionti 1 +oulu 1 +finland 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..6a5e2e60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,17 @@ +anna 1 +karlinanna 1 +rochel 1 +karlinassoci 1 +professor 1 +univers 1 +washington 1 +sincejuli 1 +work 1 +comput 1 +scienc 1 +engin 1 +depart 1 +seattl 1 +home 1 +page 1 +paperskarlin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..a8b07998 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,10 @@ +home 1 +page 1 +yeunghom 1 +yeungperson 1 +infomi 1 +picturemi 1 +researchtelnet 1 +machinessend 1 +email 1 +back 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..c00256fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,83 @@ +kurt 1 +partridg 1 +academ 1 +inform 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +washington 1 +interest 1 +includ 1 +softwar 1 +engin 1 +specif 1 +usabl 1 +readabl 1 +applic 1 +formal 1 +method 1 +safeti 1 +page 1 +describ 1 +other 1 +work 1 +area 1 +also 1 +dabbl 1 +user 1 +interfac 1 +design 1 +human 1 +interact 1 +java 1 +program 1 +recent 1 +public 1 +bddtcl 1 +environ 1 +visual 1 +manipul 1 +binari 1 +decisiondiagram 1 +poster 1 +html 1 +postscript 1 +preview 1 +nanci 1 +leveson 1 +bauer 1 +mat 1 +heimdahl 1 +wayn 1 +ohlrich 1 +vivek 1 +ratan 1 +rees 1 +critic 1 +nasa 1 +confer 1 +qualiti 1 +background 1 +start 1 +school 1 +complet 1 +berkelei 1 +live 1 +love 1 +suburban 1 +life 1 +thousand 1 +oak 1 +parent 1 +sister 1 +name 1 +oti 1 +right 1 +humor 1 +corner 1 +seattl 1 +voic 1 +kepart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..926979db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,50 @@ +keith 1 +golden 1 +home 1 +page 1 +questa 1 +pagina 1 +anch 1 +italiano 1 +researchsoftbotsplanningkrselect 1 +publicationscurriculum 1 +vita 1 +also 1 +inpostscriptrandom 1 +hackingwordbot 1 +collabor 1 +dictionari 1 +like 1 +bicycl 1 +tour 1 +languag 1 +paint 1 +photographi 1 +natur 1 +coffe 1 +godless 1 +pinko 1 +stuff 1 +dislik 1 +suit 1 +lawyer 1 +car 1 +friend 1 +ellenmarcruben 1 +laurennickrich 1 +joannavivek 1 +advisor 1 +oren 1 +etzioni 1 +weld 1 +keithgolden 1 +depart 1 +ofcomput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +kgolden 1 +complet 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..981a4747 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,143 @@ +kingsum 1 +chow 1 +washington 1 +educomput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +usathi 1 +inform 1 +highwai 1 +alwai 1 +construct 1 +tabl 1 +content 1 +person 1 +research 1 +upcom 1 +confer 1 +resum 1 +current 1 +schedul 1 +bridg 1 +glossari 1 +hong 1 +kong 1 +suggest 1 +feedbackresearchmi 1 +advisor 1 +david 1 +notkin 1 +asynchron 1 +softwar 1 +evolut 1 +develop 1 +toolspap 1 +line 1 +qualiti 1 +manag 1 +respons 1 +driven 1 +readi 1 +semi 1 +automat 1 +updat 1 +applic 1 +librari 1 +chang 1 +technic 1 +report 1 +revis 1 +version 1 +appear 1 +icsm 1 +asia 1 +pacif 1 +workshop 1 +march 1 +program 1 +transform 1 +mainten 1 +proceed 1 +ics 1 +william 1 +griswold 1 +editor 1 +intern 1 +april 1 +us 1 +site 1 +pcct 1 +sorcererpcct 1 +home 1 +page 1 +terrenc 1 +parr 1 +note 1 +newbiesresumepleasedrop 1 +mailto 1 +specifi 1 +text 1 +postscript 1 +format 1 +kongchines 1 +technolog 1 +polytechn 1 +citi 1 +kongsingapor 1 +sitessingapor 1 +onlin 1 +world 1 +wide 1 +server 1 +alumnu 1 +websom 1 +campu 1 +friendstom 1 +liew 1 +fook 1 +wang 1 +jiang 1 +weidongu 1 +relatedunivers 1 +style 1 +polici 1 +manual 1 +these 1 +dissert 1 +graduat 1 +school 1 +webserv 1 +book 1 +storeinvestmentsfre 1 +minut 1 +delai 1 +quot 1 +watch 1 +market 1 +data 1 +experiment 1 +mutual 1 +fund 1 +chart 1 +invest 1 +center 1 +stock 1 +commod 1 +analysismisc 1 +read 1 +chines 1 +list 1 +thoma 1 +china 1 +new 1 +servic 1 +welcom 1 +movi 1 +visit 1 +sinc 1 +last 1 +modifi 1 +date 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..ad0daec5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,90 @@ +kevin 1 +bold 1 +boldingkwb 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +juvenil 1 +squirt 1 +wander 1 +search 1 +suitabl 1 +rock 1 +hunk 1 +coral 1 +cling 1 +make 1 +home 1 +life 1 +task 1 +rudimentari 1 +nervou 1 +system 1 +find 1 +spot 1 +take 1 +root 1 +doesn 1 +need 1 +brain 1 +eat 1 +rather 1 +like 1 +get 1 +tenur 1 +dennett 1 +conscious 1 +explain 1 +research 1 +current 1 +work 1 +build 1 +high 1 +speed 1 +latencylan 1 +chaotic 1 +router 1 +previou 1 +researchha 1 +chaoticrout 1 +form 1 +minim 1 +adapt 1 +rout 1 +formass 1 +parallel 1 +multicomput 1 +profession 1 +assist 1 +professor 1 +electr 1 +pacif 1 +also 1 +part 1 +time 1 +researchassoci 1 +ofwashington 1 +signific 1 +paper 1 +written 1 +archiv 1 +ofth 1 +group 1 +spend 1 +teach 1 +engineeringat 1 +person 1 +photo 1 +took 1 +comethyakutak 1 +moustach 1 +real 1 +case 1 +want 1 +visit 1 +anoth 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..f841a2e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,34 @@ +richard 1 +ladnerrichard 1 +ladnerprofessor 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +mail 1 +ladner 1 +phone 1 +offic 1 +sieg 1 +hall 1 +room 1 +person 1 +short 1 +biographyresearch 1 +public 1 +studentsteachingcomput 1 +program 1 +fall 1 +quarter 1 +introduct 1 +commun 1 +network 1 +spring 1 +formal 1 +model 1 +winter 1 +data 1 +structur 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..9c276e66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,112 @@ +larri 1 +mcmurchi 1 +home 1 +page 1 +mcmurchiedepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +seattl 1 +washington 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +current 1 +research 1 +director 1 +laboratori 1 +integratedsystem 1 +chemistri 1 +western 1 +hework 1 +area 1 +quantum 1 +graduat 1 +studi 1 +primari 1 +focu 1 +number 1 +evalu 1 +class 1 +ofintegr 1 +gaussian 1 +function 1 +later 1 +appli 1 +work 1 +theconstruct 1 +larg 1 +spars 1 +hamiltonian 1 +matric 1 +coauthorof 1 +comprehens 1 +packag 1 +program 1 +meld 1 +us 1 +abinitio 1 +calcul 1 +small 1 +molecul 1 +sinc 1 +join 1 +staff 1 +depart 1 +andengin 1 +supervis 1 +technic 1 +ofth 1 +integr 1 +system 1 +coauthor 1 +wirec 1 +aschemat 1 +captur 1 +allow 1 +design 1 +code 1 +withschemat 1 +symbol 1 +creat 1 +concis 1 +parameteriz 1 +representationof 1 +also 1 +involv 1 +develop 1 +andcommerci 1 +mactest 1 +softwar 1 +hardwareenviron 1 +test 1 +chip 1 +board 1 +andsubsystem 1 +recent 1 +fpga 1 +andha 1 +gener 1 +purpos 1 +perform 1 +driven 1 +router 1 +northwest 1 +cost 1 +vlsi 1 +tester 1 +triptych 1 +high 1 +densiti 1 +architectur 1 +public 1 +journal 1 +articl 1 +upcom 1 +confer 1 +return 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..1621db36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,207 @@ +nanci 1 +leveson 1 +home 1 +page 1 +levesondepart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +express 1 +mail 1 +sieg 1 +hall 1 +seattl 1 +washington 1 +professor 1 +join 1 +faculti 1 +come 1 +california 1 +search 1 +rain 1 +receiv 1 +degre 1 +mathand 1 +ucla 1 +spent 1 +form 1 +year 1 +univers 1 +irvin 1 +start 1 +area 1 +research 1 +softwar 1 +safeti 1 +concern 1 +problem 1 +build 1 +real 1 +time 1 +system 1 +failur 1 +result 1 +loss 1 +life 1 +properti 1 +advantag 1 +topic 1 +nobodi 1 +question 1 +goal 1 +except 1 +misanthrop 1 +matter 1 +anywai 1 +student 1 +recent 1 +produc 1 +aform 1 +requir 1 +specif 1 +tca 1 +collis 1 +avoid 1 +commerci 1 +aircraft 1 +airspac 1 +lesson 1 +learn 1 +project 1 +never 1 +anyth 1 +like 1 +seem 1 +pleas 1 +though 1 +adopt 1 +theiroffici 1 +current 1 +work 1 +analysi 1 +specifi 1 +behavior 1 +claim 1 +thatyou 1 +read 1 +fact 1 +take 1 +train 1 +late 1 +safetyresearch 1 +also 1 +model 1 +autom 1 +highwai 1 +automobil 1 +variou 1 +aerospac 1 +subtop 1 +includ 1 +safe 1 +design 1 +fault 1 +toler 1 +verif 1 +valid 1 +editor 1 +chief 1 +ieee 1 +transact 1 +softwareengin 1 +elect 1 +member 1 +board 1 +director 1 +computingresearch 1 +associ 1 +nation 1 +council 1 +commissionon 1 +engin 1 +technic 1 +committe 1 +public 1 +polici 1 +chair 1 +studi 1 +evalu 1 +space 1 +shuttl 1 +process 1 +levesoni 1 +fellow 1 +award 1 +aiaa 1 +inform 1 +systemsaward 1 +contribut 1 +aeronaut 1 +technolog 1 +andscienc 1 +develop 1 +field 1 +promotingrespons 1 +practic 1 +propertyar 1 +stake 1 +book 1 +safewar 1 +addison 1 +weslei 1 +publish 1 +paper 1 +avail 1 +list 1 +isalso 1 +copi 1 +favorit 1 +actual 1 +keynoteaddress 1 +conf 1 +melbourn 1 +titl 1 +high 1 +pressur 1 +steam 1 +click 1 +qual 1 +follow 1 +appli 1 +hazardanalysi 1 +techniqu 1 +writtenin 1 +state 1 +machin 1 +style 1 +languag 1 +call 1 +rsml 1 +determin 1 +wai 1 +tree 1 +analys 1 +gener 1 +newrequir 1 +human 1 +interfac 1 +deriv 1 +principl 1 +hazard 1 +control 1 +cockpit 1 +analyz 1 +accid 1 +report 1 +involv 1 +mode 1 +awar 1 +problemsand 1 +issu 1 +interact 1 +finger 1 +citi 1 +airport 1 +perhap 1 +contact 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..76102d72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,202 @@ +hank 1 +levi 1 +home 1 +page 1 +henri 1 +professor 1 +join 1 +faculti 1 +current 1 +research 1 +project 1 +focu 1 +oper 1 +system 1 +parallel 1 +distribut 1 +comput 1 +architectur 1 +particularli 1 +simultan 1 +multithread 1 +object 1 +base 1 +languag 1 +environ 1 +recent 1 +projectcal 1 +opal 1 +deal 1 +singl 1 +address 1 +space 1 +theetch 1 +projecti 1 +produc 1 +tool 1 +perform 1 +instrument 1 +optim 1 +binari 1 +execut 1 +author 1 +book 1 +numer 1 +paper 1 +includ 1 +outstand 1 +select 1 +four 1 +consecutiveacm 1 +symposia 1 +principl 1 +former 1 +chair 1 +sigop 1 +special 1 +interest 1 +group 1 +onoper 1 +program 1 +theth 1 +symposium 1 +tobe 1 +held 1 +hold 1 +carnegi 1 +mellon 1 +universityand 1 +univers 1 +washington 1 +come 1 +consult 1 +engin 1 +digit 1 +equip 1 +corpor 1 +work 1 +span 1 +rang 1 +workstat 1 +fellow 1 +associ 1 +machineryand 1 +recipi 1 +fulbright 1 +scholar 1 +award 1 +eleven 1 +master 1 +student 1 +nine 1 +survivedlevi 1 +supervis 1 +haveal 1 +escap 1 +academ 1 +posit 1 +major 1 +lab 1 +glu 1 +usual 1 +befound 1 +ski 1 +bike 1 +plai 1 +tenni 1 +help 1 +lead 1 +thedepart 1 +infam 1 +softbal 1 +team 1 +smile 1 +potato 1 +death 1 +sampl 1 +dessert 1 +seattl 1 +mani 1 +parlor 1 +publicationsreduc 1 +network 1 +latenc 1 +us 1 +subpag 1 +global 1 +memori 1 +jamrozik 1 +feelei 1 +voelker 1 +evan 1 +karlin 1 +vernon 1 +inproceed 1 +seventh 1 +confer 1 +support 1 +octob 1 +postscript 1 +implement 1 +manag 1 +cluster 1 +michael 1 +william 1 +morgan 1 +freder 1 +pighin 1 +anna 1 +chandramohan 1 +thekkath 1 +appear 1 +proc 1 +decemb 1 +maxim 1 +chip 1 +dean 1 +tullsen 1 +susan 1 +egger 1 +annual 1 +intern 1 +june 1 +exploit 1 +choic 1 +instruct 1 +fetch 1 +issu 1 +implementablesimultan 1 +processor 1 +joen 1 +emer 1 +jack 1 +rebecca 1 +stamm 1 +share 1 +protect 1 +jeffrei 1 +chase 1 +edwardd 1 +lazowska 1 +transact 1 +novemb 1 +integr 1 +coher 1 +recover 1 +vivek 1 +narasayya 1 +first 1 +design 1 +hardwar 1 +softwar 1 +effici 1 +except 1 +handl 1 +conf 1 +arch 1 +prog 1 +asplo 1 +separ 1 +data 1 +control 1 +transfer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..ba8e9706 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,114 @@ +opal 1 +oper 1 +system 1 +projectop 1 +project 1 +explor 1 +structur 1 +tunedto 1 +need 1 +complex 1 +applic 1 +numberof 1 +cooper 1 +program 1 +manipul 1 +larg 1 +share 1 +persist 1 +databaseof 1 +object 1 +code 1 +data 1 +exist 1 +singl 1 +huge 1 +address 1 +space 1 +enhanc 1 +andcooper 1 +uniqu 1 +time 1 +interpret 1 +thu 1 +pointer 1 +base 1 +directlycommun 1 +bestor 1 +directli 1 +secondari 1 +storag 1 +without 1 +translat 1 +simplifi 1 +avail 1 +addressspac 1 +provid 1 +alpha 1 +mip 1 +risc 1 +protect 1 +independ 1 +thread 1 +execut 1 +within 1 +domainthat 1 +defin 1 +virtual 1 +page 1 +right 1 +access 1 +easili 1 +transmit 1 +oneprocess 1 +anoth 1 +result 1 +much 1 +flexibl 1 +protectionstructur 1 +permit 1 +differ 1 +dynam 1 +chang 1 +option 1 +depend 1 +trust 1 +relationshipbetween 1 +parti 1 +believ 1 +organ 1 +canimprov 1 +perform 1 +cooperatingappl 1 +prototyp 1 +built 1 +platform 1 +ofth 1 +mach 1 +inform 1 +sourc 1 +list 1 +relat 1 +paper 1 +faculti 1 +member 1 +hank 1 +levi 1 +lazowska 1 +jeff 1 +chase 1 +duke 1 +univers 1 +current 1 +graduat 1 +student 1 +mike 1 +feelei 1 +ashutosh 1 +tiwari 1 +vivek 1 +narasayya 1 +dylan 1 +mcname 1 +mail 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..f5191e8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,139 @@ +lopezgu 1 +lopezlopez 1 +washington 1 +school 1 +sieg 1 +hall 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +home 1 +student 1 +dissertationresearch 1 +design 1 +implement 1 +constraint 1 +imper 1 +object 1 +orient 1 +languag 1 +curriculum 1 +vita 1 +publicationsgu 1 +lopez 1 +bjorn 1 +freeman 1 +benson 1 +alan 1 +born 1 +kaleidoscop 1 +program 1 +brian 1 +mayoh 1 +tougu 1 +jann 1 +penjam 1 +editor 1 +constraintprogram 1 +springer 1 +verlag 1 +nato 1 +advanc 1 +studi 1 +instituteseri 1 +seri 1 +system 1 +also 1 +publisheda 1 +technic 1 +report 1 +ident 1 +inproceed 1 +european 1 +confer 1 +bologna 1 +itali 1 +juli 1 +virtual 1 +machin 1 +programmingsystem 1 +applic 1 +portland 1 +oregon 1 +octob 1 +oopsla 1 +tutorialsi 1 +tutori 1 +chair 1 +upcom 1 +conferencein 1 +jose 1 +california 1 +peopl 1 +technolog 1 +andsoftwar 1 +develop 1 +meet 1 +speak 1 +well 1 +known 1 +breadth 1 +depth 1 +high 1 +qualiti 1 +itsextens 1 +previou 1 +year 1 +tutorialshav 1 +cover 1 +aspect 1 +introductorysurvei 1 +industri 1 +softwar 1 +practic 1 +lead 1 +edg 1 +academicresearch 1 +topic 1 +respons 1 +request 1 +past 1 +attende 1 +weespeci 1 +encourag 1 +propos 1 +issu 1 +anyon 1 +consid 1 +submit 1 +requestguidelin 1 +submiss 1 +theoopsla 1 +electron 1 +hotlin 1 +mail 1 +enthusiast 1 +accept 1 +proposalswithout 1 +email 1 +address 1 +march 1 +notif 1 +withcamera 1 +readi 1 +note 1 +august 1 +interest 1 +link 1 +green 1 +direct 1 +jimi 1 +hendrix 1 +grave 1 +star 1 +war 1 +collector 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..be1f752d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,40 @@ +omid 1 +home 1 +page 1 +madani 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +chateau 1 +suit 1 +bhello 1 +curiou 1 +browser 1 +welcom 1 +fourth 1 +year 1 +graduat 1 +student 1 +enjoytheori 1 +also 1 +like 1 +keep 1 +touch 1 +areasinclud 1 +graphic 1 +life 1 +work 1 +academ 1 +want 1 +look 1 +islamicarchitectur 1 +isfahan 1 +best 1 +nomine 1 +citi 1 +countri 1 +iran 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..6dce003c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,18 @@ +mike 1 +perkowitz 1 +page 1 +perkowitznewsflash 1 +goe 1 +blond 1 +area 1 +research 1 +academia 1 +music 1 +creativ 1 +randomfavorit 1 +sheba 1 +voyeur 1 +written 1 +grooveneedl 1 +espressoresumemik 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..041ae8ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,64 @@ +marc 1 +langheinrich 1 +homepagemarc 1 +langheinrichuniversitt 1 +bielefeld 1 +univers 1 +washingtontechnisch 1 +fakultt 1 +depart 1 +comput 1 +scienceemail 1 +imlangh 1 +techfak 1 +email 1 +marclang 1 +washington 1 +eduabout 1 +myselfi 1 +spent 1 +last 1 +year 1 +scienc 1 +theunivers 1 +visit 1 +graduat 1 +student 1 +thefulbright 1 +program 1 +check 1 +follow 1 +link 1 +depthinform 1 +resum 1 +project 1 +short 1 +biopost 1 +addressa 1 +septemb 1 +back 1 +germani 1 +finish 1 +mastersat 1 +pleas 1 +contact 1 +german 1 +address 1 +homeschoolgermanyringstra 1 +maintalphon 1 +paulusplatz 1 +bielefeldphon 1 +woodlawn 1 +seattl 1 +phone 1 +sieg 1 +hall 1 +browser 1 +support 1 +tabl 1 +access 1 +data 1 +list 1 +formatmarc 1 +http 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..2f621886 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,109 @@ +home 1 +marla 1 +baker 1 +washington 1 +chief 1 +editor 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +partner 1 +crime 1 +bentlei 1 +academ 1 +interestsgraph 1 +user 1 +interfac 1 +human 1 +interact 1 +educ 1 +softwar 1 +support 1 +collabor 1 +learn 1 +cscl 1 +graphic 1 +visual 1 +techniqu 1 +program 1 +languag 1 +current 1 +work 1 +stevetanimoto 1 +lauren 1 +bricker 1 +coimag 1 +project 1 +devleop 1 +activ 1 +order 1 +explor 1 +cooper 1 +contol 1 +object 1 +goal 1 +investig 1 +differ 1 +wai 1 +multipl 1 +cansimultan 1 +share 1 +manipul 1 +given 1 +assess 1 +also 1 +part 1 +time 1 +packard 1 +bell 1 +compani 1 +resum 1 +publicationsbak 1 +stephen 1 +eick 1 +space 1 +fill 1 +journal 1 +june 1 +burnett 1 +bohu 1 +carlson 1 +yang 1 +scale 1 +ieee 1 +special 1 +issu 1 +march 1 +margaret 1 +classif 1 +system 1 +septemb 1 +tool 1 +larg 1 +proceed 1 +intern 1 +confer 1 +sorento 1 +itali 1 +method 1 +apparatu 1 +displai 1 +hierarch 1 +inform 1 +patent 1 +applic 1 +submit 1 +octob 1 +tutori 1 +geometr 1 +transform 1 +imag 1 +metip 1 +environ 1 +check 1 +page 1 +offic 1 +sieg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..455c27d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,87 @@ +matthai 1 +philipos 1 +home 1 +page 1 +work 1 +dynam 1 +compil 1 +project 1 +compileri 1 +beast 1 +gener 1 +optim 1 +code 1 +runtim 1 +shortterm 1 +interest 1 +figur 1 +produc 1 +good 1 +modern 1 +processor 1 +architectur 1 +applic 1 +side 1 +think 1 +interpret 1 +basedsystem 1 +real 1 +time 1 +constraint 1 +like 1 +java 1 +browser 1 +canbenefit 1 +select 1 +wire 1 +asystem 1 +goe 1 +withprofessor 1 +susan 1 +eggersand 1 +craig 1 +chamber 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +phone 1 +public 1 +ausland 1 +egger 1 +bershad 1 +fast 1 +effect 1 +confer 1 +onprogram 1 +languag 1 +design 1 +implement 1 +mock 1 +andp 1 +pardyak 1 +automaticdynam 1 +support 1 +event 1 +dispatch 1 +extens 1 +system 1 +workshop 1 +softwar 1 +februari 1 +bookmark 1 +stuff 1 +plai 1 +frequentlymiscellan 1 +link 1 +local 1 +importancefrom 1 +past 1 +abuwhi 1 +black 1 +blue 1 +ribbon 1 +campaign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..c99db83b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,174 @@ +neil 1 +mckenzi 1 +menu 1 +fine 1 +dine 1 +index 1 +page 1 +contact 1 +futur 1 +project 1 +current 1 +past 1 +public 1 +list 1 +person 1 +inform 1 +game 1 +mitsubishi 1 +electr 1 +research 1 +laboratori 1 +broadwai 1 +floor 1 +cambridg 1 +phone 1 +mail 1 +merl 1 +projectsgonna 1 +teenag 1 +lobotomi 1 +ramonesi 1 +live 1 +east 1 +coast 1 +mile 1 +seattl 1 +andwork 1 +note 1 +involv 1 +projectconcern 1 +real 1 +time 1 +volum 1 +render 1 +medic 1 +data 1 +copiou 1 +free 1 +expatri 1 +graduat 1 +student 1 +work 1 +onchaot 1 +routingwith 1 +faculti 1 +advisor 1 +carl 1 +ebel 1 +larri 1 +snyder 1 +chaotic 1 +rout 1 +packet 1 +algorithm 1 +mesh 1 +torusnetwork 1 +dissert 1 +design 1 +implement 1 +thecranium 1 +messag 1 +pass 1 +interfac 1 +compatiblewith 1 +network 1 +us 1 +projectsi 1 +teach 1 +assist 1 +summer 1 +chip 1 +tester 1 +call 1 +mactest 1 +maintain 1 +netlist 1 +graph 1 +isomorph 1 +tool 1 +calledgemini 1 +industri 1 +speak 1 +knowna 1 +layout 1 +schemat 1 +gemini 1 +avail 1 +interest 1 +pleas 1 +send 1 +mcmurchi 1 +washington 1 +cranium 1 +adapt 1 +packetrout 1 +proceed 1 +parallel 1 +comput 1 +andcommun 1 +workshop 1 +link 1 +tomactest 1 +home 1 +user 1 +guid 1 +last 1 +updat 1 +march 1 +angel 1 +marri 1 +pictur 1 +hous 1 +arlington 1 +massachusett 1 +head 1 +livein 1 +fashion 1 +neighborhood 1 +ofballard 1 +creativ 1 +contain 1 +exampl 1 +artworkcr 1 +adob 1 +photoshop 1 +ownedthi 1 +year 1 +onlyth 1 +memori 1 +remain 1 +shirt 1 +correctlyguess 1 +answer 1 +toriddl 1 +jour 1 +octob 1 +label 1 +place 1 +jar 1 +countri 1 +farm 1 +honei 1 +produc 1 +myuncl 1 +edmonton 1 +alberta 1 +canada 1 +amus 1 +linkschairman 1 +shot 1 +linksnorm 1 +gregori 1 +bookmark 1 +halcyon 1 +eugen 1 +spafford 1 +purdu 1 +randi 1 +pausch 1 +virginia 1 +wallach 1 +scool 1 +princeton 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..b3a6f15f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,151 @@ +marc 1 +fiuczynski 1 +home 1 +page 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +backgroundi 1 +graduat 1 +student 1 +grewup 1 +germani 1 +near 1 +sseldorf 1 +spent 1 +year 1 +highschool 1 +princeton 1 +receiv 1 +fromrutg 1 +sever 1 +summer 1 +bell 1 +lab 1 +mitr 1 +corpor 1 +work 1 +rang 1 +ofproject 1 +sole 1 +proprietor 1 +system 1 +companythat 1 +creat 1 +decemb 1 +sell 1 +distribut 1 +fault 1 +toler 1 +network 1 +base 1 +telephon 1 +built 1 +scratch 1 +setof 1 +chasi 1 +processor 1 +us 1 +univoic 1 +interfac 1 +cardsand 1 +vxwork 1 +oper 1 +time 1 +spend 1 +hack 1 +spin 1 +safe 1 +adapt 1 +extens 1 +primari 1 +contribut 1 +protocol 1 +architectur 1 +forappl 1 +specif 1 +applic 1 +achiev 1 +compellingperform 1 +improv 1 +structur 1 +compar 1 +tosimilar 1 +run 1 +commerci 1 +platform 1 +demonstr 1 +servic 1 +http 1 +request 1 +contacthttp 1 +recent 1 +report 1 +paper 1 +design 1 +implement 1 +perform 1 +describ 1 +anextens 1 +allow 1 +anyon 1 +custom 1 +anin 1 +kernel 1 +graph 1 +enabl 1 +betterperform 1 +similar 1 +conventionaloper 1 +appear 1 +proceed 1 +winter 1 +usenix 1 +technicalconfer 1 +safeti 1 +fifteenth 1 +symposium 1 +principl 1 +languag 1 +support 1 +pretti 1 +happi 1 +deal 1 +shortcom 1 +inord 1 +describeshow 1 +address 1 +dynam 1 +link 1 +linker 1 +load 1 +code 1 +point 1 +isth 1 +abil 1 +manag 1 +linkabl 1 +namespac 1 +andcollect 1 +protect 1 +softwar 1 +issu 1 +posit 1 +hardwar 1 +mechan 1 +fifth 1 +ieee 1 +workshop 1 +topic 1 +region 1 +analysi 1 +parallel 1 +elimin 1 +method 1 +data 1 +flow 1 +transact 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..7e2900ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,31 @@ +melani 1 +fulgham 1 +comput 1 +scienc 1 +minim 1 +versu 1 +rout 1 +algorithm 1 +method 1 +model 1 +develop 1 +help 1 +predict 1 +compar 1 +perform 1 +router 1 +real 1 +parallel 1 +machin 1 +deflect 1 +upper 1 +lower 1 +bound 1 +practic 1 +requir 1 +sort 1 +mesh 1 +topolog 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..49f726c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,64 @@ +meng 1 +heng 1 +homepag 1 +homepagemenghe 1 +washington 1 +edubox 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonseattl 1 +second 1 +year 1 +student 1 +scienceat 1 +univers 1 +undergrad 1 +pennsylvania 1 +research 1 +interestsi 1 +interest 1 +imag 1 +retriev 1 +problem 1 +try 1 +findimag 1 +huge 1 +databas 1 +virag 1 +andqbicar 1 +commerci 1 +exampl 1 +similar 1 +kind 1 +stuff 1 +work 1 +snapshot 1 +done 1 +singaporesingapor 1 +infomap 1 +provid 1 +fact 1 +andstatist 1 +singapor 1 +singaporeonlin 1 +guid 1 +plan 1 +take 1 +trip 1 +nation 1 +boardi 1 +charg 1 +transform 1 +anintellig 1 +island 1 +graduat 1 +strait 1 +time 1 +main 1 +english 1 +newspap 1 +visit 1 +sinc 1 +menghe 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..cc4116ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,70 @@ +michael 1 +ernst 1 +home 1 +pagemichael 1 +ernsti 1 +graduat 1 +student 1 +univers 1 +washington 1 +comput 1 +scienc 1 +depart 1 +previous 1 +lectur 1 +riceunivers 1 +sciencedepart 1 +research 1 +programanalysi 1 +group 1 +microsoft 1 +laboratori 1 +eec 1 +page 1 +frequent 1 +updat 1 +technic 1 +interest 1 +includ 1 +compil 1 +static 1 +analysi 1 +slice 1 +debug 1 +optim 1 +code 1 +serial 1 +parallel 1 +program 1 +chair 1 +intermedi 1 +represent 1 +workshop 1 +coloc 1 +popl 1 +intellectu 1 +properti 1 +particularli 1 +area 1 +game 1 +theori 1 +cryptographi 1 +philosophi 1 +denot 1 +semanticsi 1 +maintain 1 +list 1 +resourcesfor 1 +confer 1 +organ 1 +occasion 1 +manag 1 +slip 1 +awai 1 +work 1 +carri 1 +real 1 +life 1 +link 1 +possibleinterest 1 +mernst 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..9504926f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,124 @@ +marku 1 +mock 1 +home 1 +page 1 +washington 1 +comput 1 +scienc 1 +rttemberg 1 +grew 1 +anotherpart 1 +state 1 +district 1 +biberach 1 +upper 1 +swabia 1 +oberschwaben 1 +come 1 +studi 1 +univers 1 +karlsruh 1 +whichi 1 +obtain 1 +diplom 1 +also 1 +spent 1 +year 1 +umass 1 +fulbright 1 +grante 1 +main 1 +research 1 +interest 1 +parallel 1 +distribut 1 +object 1 +orient 1 +system 1 +compil 1 +current 1 +work 1 +dynam 1 +includ 1 +spanish 1 +latin 1 +american 1 +cultur 1 +travel 1 +good 1 +book 1 +labyrinth 1 +solitud 1 +chess 1 +mainstream 1 +movi 1 +dieangst 1 +torwart 1 +beim 1 +elfmet 1 +know 1 +handk 1 +salsa 1 +merengu 1 +danc 1 +still 1 +time 1 +left 1 +check 1 +els 1 +seattl 1 +publicationssepar 1 +list 1 +link 1 +stuff 1 +event 1 +olympiad 1 +yerewan 1 +colloquia 1 +oopsla 1 +volunt 1 +mossi 1 +bit 1 +grad 1 +journal 1 +csek 1 +csebi 1 +cse 1 +cours 1 +graduat 1 +studentsimag 1 +depart 1 +electr 1 +engineeringy 1 +wouldn 1 +expect 1 +squar 1 +live 1 +view 1 +metacrawl 1 +search 1 +altavista 1 +deutsch 1 +well 1 +realaudio 1 +cool 1 +linksand 1 +quot 1 +consid 1 +lili 1 +field 1 +grow 1 +toil 1 +neither 1 +spin 1 +unto 1 +even 1 +solomon 1 +glorywa 1 +arrai 1 +like 1 +matthew 1 +access 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..f38da7c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,15 @@ +vivek 1 +narasayya 1 +home 1 +page 1 +nara 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +person 1 +informationresearch 1 +interestspap 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..fa868cd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,69 @@ +himanshu 1 +nautiy 1 +home 1 +pagehimanshu 1 +nautiyalthi 1 +page 1 +heavi 1 +construct 1 +nautiyaldept 1 +comput 1 +scienc 1 +engin 1 +mail 1 +stop 1 +univers 1 +washington 1 +seattl 1 +offic 1 +sieg 1 +phone 1 +cours 1 +take 1 +autumn 1 +quarter 1 +principl 1 +digit 1 +system 1 +design 1 +artifici 1 +intellig 1 +finger 1 +edugod 1 +gift 1 +personkind 1 +dougla 1 +adam 1 +terri 1 +pratchett 1 +pelham 1 +grenvil 1 +wodehouseth 1 +order 1 +alphabet 1 +last 1 +name 1 +impli 1 +favorit 1 +link 1 +place 1 +india 1 +internet 1 +radio 1 +search 1 +friend 1 +delhi 1 +finish 1 +tech 1 +astronomi 1 +skate 1 +aviat 1 +travel 1 +numismat 1 +sound 1 +much 1 +profound 1 +coin 1 +collect 1 +cook 1 +movi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..9e23c814 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,90 @@ +labyrinth 1 +mediocr 1 +bemoan 1 +hype 1 +skeptic 1 +cynic 1 +sinc 1 +research 1 +current 1 +work 1 +automaticconstruct 1 +wrapper 1 +inform 1 +resourc 1 +alsointerest 1 +sever 1 +area 1 +artifici 1 +intellig 1 +andcognit 1 +scienc 1 +paper 1 +beeninvolv 1 +stuff 1 +avail 1 +anonym 1 +servic 1 +provid 1 +glbal 1 +infrmatin 1 +sperhighwai 1 +preliminari 1 +version 1 +divers 1 +meter 1 +pictur 1 +hand 1 +shortli 1 +surgeri 1 +need 1 +random 1 +number 1 +alwai 1 +handi 1 +know 1 +date 1 +time 1 +week 1 +favorit 1 +color 1 +line 1 +lost 1 +easili 1 +return 1 +page 1 +ronald 1 +wilson 1 +reagan 1 +temperatur 1 +look 1 +javascript 1 +enabl 1 +browser 1 +automat 1 +send 1 +mail 1 +great 1 +republican 1 +tell 1 +like 1 +miscellani 1 +contact 1 +bookmark 1 +technolog 1 +societi 1 +awar 1 +bitter 1 +ironi 1 +involv 1 +nonetheless 1 +madeavail 1 +wendel 1 +berri 1 +guidelin 1 +constitutesgood 1 +comment 1 +nichola 1 +kushmerick 1 +uwcs 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..f8500e33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,75 @@ +wayn 1 +ohlrich 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +offic 1 +guggenheim 1 +annex 1 +chateau 1 +public 1 +research 1 +current 1 +work 1 +nanci 1 +leveson 1 +model 1 +check 1 +automat 1 +test 1 +gener 1 +specif 1 +spare 1 +time 1 +brian 1 +bershad 1 +anna 1 +karlin 1 +romer 1 +memori 1 +perform 1 +analysi 1 +project 1 +local 1 +known 1 +sever 1 +damag 1 +group 1 +paper 1 +make 1 +debut 1 +isca 1 +itali 1 +summer 1 +safeti 1 +home 1 +page 1 +sytem 1 +reduc 1 +overhead 1 +us 1 +onlin 1 +superpag 1 +promot 1 +class 1 +inform 1 +cours 1 +person 1 +interest 1 +game 1 +world 1 +wonder 1 +contain 1 +sort 1 +link 1 +invest 1 +found 1 +creat 1 +octob 1 +last 1 +modifi 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..31005782 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,86 @@ +gershoni 1 +washington 1 +graduat 1 +studentcomput 1 +scienc 1 +engin 1 +departmentunivers 1 +washingtonoffic 1 +sieg 1 +home 1 +seattl 1 +second 1 +year 1 +student 1 +univers 1 +ofwashington 1 +move 1 +live 1 +california 1 +seven 1 +fouryear 1 +berkelei 1 +three 1 +lancast 1 +northeast 1 +angel 1 +origin 1 +israel 1 +haifa 1 +holon 1 +like 1 +practic 1 +kwon 1 +plai 1 +basketbal 1 +hike 1 +quarter 1 +take 1 +class 1 +whole 1 +bunch 1 +seminar 1 +amta 1 +comput 1 +architectur 1 +usual 1 +find 1 +offic 1 +hour 1 +aremondai 1 +wednesdai 1 +potenti 1 +employ 1 +welcom 1 +look 1 +resum 1 +pictur 1 +took 1 +last 1 +summer 1 +click 1 +tose 1 +cool 1 +shirt 1 +design 1 +made 1 +graphicsprogram 1 +call 1 +virtual 1 +realiti 1 +interest 1 +link 1 +time 1 +daili 1 +new 1 +summari 1 +york 1 +riderlink 1 +seattletransport 1 +option 1 +inform 1 +mathemat 1 +depart 1 +access 1 +sinc 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..5574279c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,78 @@ +ross 1 +ortega 1 +wear 1 +jean 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +knew 1 +would 1 +call 1 +research 1 +albert 1 +einstein 1 +welcom 1 +home 1 +pageaft 1 +go 1 +school 1 +work 1 +boston 1 +year 1 +decid 1 +head 1 +west 1 +realli 1 +northwest 1 +came 1 +fall 1 +leav 1 +sometim 1 +accord 1 +advisor 1 +gaetano 1 +borriello 1 +offici 1 +chinook 1 +project 1 +tool 1 +real 1 +time 1 +embed 1 +control 1 +system 1 +unoffici 1 +brew 1 +beer 1 +learn 1 +hack 1 +try 1 +teach 1 +german 1 +shepherd 1 +tequila 1 +behav 1 +profession 1 +section 1 +myresum 1 +file 1 +educ 1 +experi 1 +public 1 +paper 1 +puppi 1 +pictur 1 +color 1 +offic 1 +sieg 1 +check 1 +page 1 +link 1 +find 1 +interest 1 +last 1 +updatedthu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..3fdca0f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,121 @@ +untitl 1 +document 1 +flat 1 +morri 1 +minor 1 +pardodepart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +washingtonusapardo 1 +washington 1 +edunot 1 +show 1 +blue 1 +ribbon 1 +asimgsrc 1 +http 1 +graphic 1 +icon 1 +blueribbon 1 +rib_trn_plain_sm 1 +quiet 1 +opposit 1 +free 1 +speechprohibit 1 +nation 1 +telecommun 1 +bill 1 +likewis 1 +white 1 +letter 1 +black 1 +background 1 +beth 1 +everybodi 1 +need 1 +pardo 1 +around 1 +sometim 1 +share 1 +academicsom 1 +papersi 1 +work 1 +find 1 +particularli 1 +interest 1 +runtim 1 +code 1 +gener 1 +rtcg 1 +instruct 1 +simul 1 +trace 1 +tool 1 +home 1 +page 1 +oncomput 1 +architectureandcompil 1 +quick 1 +link 1 +otherpeopl 1 +class 1 +thesi 1 +stylenon 1 +academicfeatur 1 +item 1 +featur 1 +month 1 +weak 1 +site 1 +week 1 +doesn 1 +blink 1 +anymor 1 +regular 1 +itemsbicyclesbusinessescomputersfoodhumori 1 +famou 1 +thing 1 +relat 1 +legal 1 +ethic 1 +weirdnesslinux 1 +journalmusicgoofi 1 +politicssci 1 +think 1 +though 1 +unrel 1 +stuff 1 +transport 1 +movi 1 +list 1 +film 1 +festiv 1 +dant 1 +search 1 +truli 1 +gross 1 +stori 1 +trepan 1 +privaci 1 +log 1 +mail 1 +address 1 +wors 1 +take 1 +data 1 +disk 1 +everi 1 +time 1 +consid 1 +weather 1 +courtesei 1 +particular 1 +also 1 +newhous 1 +newspap 1 +courtesi 1 +yesterdai 1 +stuffpardo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..1dd2f87d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,88 @@ +przemek 1 +pardyak 1 +home 1 +page 1 +przemyslaw 1 +pardi 1 +washington 1 +first 1 +week 1 +grad 1 +school 1 +coupl 1 +year 1 +later 1 +third 1 +graduat 1 +student 1 +comput 1 +scienc 1 +theunivers 1 +current 1 +research 1 +area 1 +ofoper 1 +system 1 +also 1 +interast 1 +distribut 1 +languag 1 +compil 1 +besid 1 +life 1 +fill 1 +withth 1 +seattl 1 +drizzl 1 +hike 1 +outdoor 1 +activ 1 +notbusi 1 +enjoi 1 +book 1 +music 1 +find 1 +short 1 +descript 1 +interest 1 +resum 1 +list 1 +paper 1 +outdat 1 +happenswhen 1 +busi 1 +schedul 1 +projectsspinan 1 +extens 1 +oper 1 +built 1 +gloriou 1 +leadership 1 +brian 1 +bershad 1 +group 1 +mechan 1 +object 1 +base 1 +systemsgroup 1 +commun 1 +emerald 1 +basedprogram 1 +time 1 +link 1 +polish 1 +connect 1 +variou 1 +resourc 1 +somehow 1 +relat 1 +poland 1 +project 1 +mine 1 +univers 1 +unrel 1 +miscellan 1 +work 1 +engin 1 +depart 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..b5fd7ff7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,167 @@ +paul 1 +franklin 1 +home 1 +pagepaul 1 +pagei 1 +current 1 +graduat 1 +student 1 +univ 1 +washington 1 +inth 1 +depart 1 +comput 1 +scienc 1 +offici 1 +myoffic 1 +sieg 1 +work 1 +rapid 1 +project 1 +us 1 +first 1 +year 1 +thesumm 1 +second 1 +someon 1 +express 1 +concern 1 +aboutthi 1 +character 1 +usual 1 +somewher 1 +north 1 +seattl 1 +iliv 1 +school 1 +anoth 1 +pictur 1 +best 1 +oneof 1 +dai 1 +around 1 +scan 1 +better 1 +on 1 +norwegian 1 +poem 1 +likea 1 +collect 1 +fortun 1 +receiv 1 +friend 1 +localchines 1 +restaur 1 +mundan 1 +stuffi 1 +hope 1 +folk 1 +might 1 +find 1 +stuff 1 +hotlink 1 +pagesstuff 1 +maintainmi 1 +schedul 1 +rememb 1 +updat 1 +contact 1 +love 1 +travel 1 +necessarili 1 +tell 1 +everyon 1 +hire 1 +mewher 1 +come 1 +high 1 +diploma 1 +live 1 +inmorgan 1 +hill 1 +taught 1 +prolog 1 +time 1 +engin 1 +fromuc 1 +davi 1 +andy 1 +glad 1 +ididn 1 +univers 1 +bergen 1 +ialso 1 +research 1 +professor 1 +electr 1 +andcomput 1 +stuffwhil 1 +partner 1 +variou 1 +relatedact 1 +tend 1 +matt 1 +chri 1 +jame 1 +evengot 1 +togeth 1 +recent 1 +marriag 1 +joann 1 +anexcus 1 +brother 1 +also 1 +made 1 +itin 1 +photo 1 +throughout 1 +undergradu 1 +kept 1 +bike 1 +never 1 +flat 1 +exchang 1 +ofbergen 1 +hillier 1 +longer 1 +rout 1 +returnedto 1 +took 1 +rollerblad 1 +sinc 1 +town 1 +wasnow 1 +easi 1 +drop 1 +hewlettpackard 1 +return 1 +vengeanc 1 +move 1 +toseattl 1 +done 1 +annual 1 +portland 1 +ride 1 +intwo 1 +inseason 1 +march 1 +april 1 +june 1 +rest 1 +justcommut 1 +lot 1 +danc 1 +particularli 1 +lindyhop 1 +know 1 +everi 1 +html 1 +document 1 +header 1 +linethat 1 +look 1 +someth 1 +like 1 +doctyp 1 +public 1 +ietf 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..ae1c0f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,98 @@ +frdric 1 +pighin 1 +washington 1 +comput 1 +scienc 1 +lcommun 1 +wonder 1 +citi 1 +tourist 1 +quarter 1 +ta 1 +taught 1 +anna 1 +karlin 1 +gui 1 +found 1 +much 1 +often 1 +dani 1 +qual 1 +report 1 +rest 1 +like 1 +british 1 +movi 1 +monti 1 +python 1 +swim 1 +nick 1 +cave 1 +corto 1 +maltes 1 +italian 1 +comic 1 +cat 1 +pari 1 +berlin 1 +venis 1 +simpson 1 +mpeg 1 +rain 1 +surpris 1 +traditionn 1 +french 1 +marin 1 +song 1 +collect 1 +otherwis 1 +work 1 +graphic 1 +supervis 1 +although 1 +formerli 1 +studi 1 +systemher 1 +name 1 +paper 1 +implement 1 +global 1 +memori 1 +manag 1 +workstat 1 +cluster 1 +michael 1 +feelei 1 +william 1 +morgan 1 +freder 1 +henri 1 +levi 1 +chandramohan 1 +thekkath 1 +proceed 1 +symposium 1 +oper 1 +system 1 +principl 1 +decemb 1 +postscript 1 +live 1 +action 1 +breath 1 +take 1 +pictur 1 +door 1 +refresh 1 +everi 1 +minut 1 +lucki 1 +might 1 +even 1 +look 1 +darren 1 +juan 1 +note 1 +dark 1 +squar 1 +five 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..927692f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,14 @@ +ruth 1 +anderson 1 +home 1 +page 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +wxyc 1 +map 1 +brother 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..56e04b7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,12 @@ +home 1 +josh 1 +page 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +redston 1 +joshua 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..7cb9eeee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,162 @@ +jakobovit 1 +home 1 +page 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +usai 1 +get 1 +departmentof 1 +ofwashington 1 +wonder 1 +citi 1 +alwai 1 +rain 1 +thisup 1 +date 1 +weatherreport 1 +sneak 1 +peek 1 +live 1 +imag 1 +video 1 +camera 1 +mount 1 +outsid 1 +point 1 +beauti 1 +drumhel 1 +fountain 1 +clear 1 +youcan 1 +catch 1 +glimps 1 +rainier 1 +glori 1 +probabl 1 +cleardai 1 +nice 1 +color 1 +pictur 1 +research 1 +develop 1 +base 1 +repositori 1 +manag 1 +program 1 +toolkit 1 +build 1 +multi 1 +media 1 +consol 1 +construct 1 +databas 1 +part 1 +astructur 1 +inform 1 +framework 1 +brain 1 +map 1 +knowledg 1 +support 1 +digit 1 +anatomist 1 +line 1 +interact 1 +atla 1 +human 1 +bodi 1 +implement 1 +databaseenviron 1 +vision 1 +local 1 +expert 1 +persistentprogram 1 +languag 1 +interest 1 +els 1 +proud 1 +creator 1 +internetracquetbal 1 +ladder 1 +taught 1 +advanc 1 +extens 1 +wrote 1 +perl 1 +script 1 +rotisseriebasebal 1 +leagu 1 +stand 1 +updat 1 +daili 1 +stat 1 +fromusa 1 +todai 1 +rais 1 +happi 1 +famili 1 +africancichlid 1 +visit 1 +town 1 +honolulu 1 +everi 1 +chanc 1 +camp 1 +magic 1 +kalalau 1 +vallei 1 +movi 1 +gambl 1 +stock 1 +market 1 +darn 1 +good 1 +fantasi 1 +footbal 1 +team 1 +newslet 1 +would 1 +javafamili 1 +link 1 +mydad 1 +leon 1 +jame 1 +professor 1 +psycholog 1 +hawaii 1 +whoi 1 +write 1 +book 1 +traffic 1 +foster 1 +onlin 1 +polem 1 +emanuel 1 +swedenborg 1 +step 1 +dian 1 +nahl 1 +librari 1 +whoprovid 1 +great 1 +index 1 +judi 1 +realtor 1 +uncl 1 +eddi 1 +run 1 +site 1 +bioscienc 1 +profession 1 +bookmarksif 1 +java 1 +click 1 +drag 1 +word 1 +make 1 +poem 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..10a84c88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight 1 +welcom 1 +galleri 1 +twenti 1 +photograph 1 +five 1 +head 1 +robert 1 +grimm 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..68293894 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,142 @@ +romer 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +home 1 +offic 1 +eduoffic 1 +chateau 1 +sieg 1 +research 1 +interestsi 1 +oper 1 +system 1 +supportfor 1 +high 1 +perform 1 +memori 1 +systemswith 1 +realli 1 +smart 1 +peopl 1 +likebrian 1 +bershad 1 +brad 1 +chen 1 +alan 1 +eustac 1 +anna 1 +karlin 1 +denni 1 +wayn 1 +ohlrich 1 +andwayn 1 +wong 1 +three 1 +recent 1 +paper 1 +subject 1 +reduc 1 +overhead 1 +us 1 +onlinesuperpag 1 +promot 1 +isca 1 +dynam 1 +page 1 +map 1 +polici 1 +cach 1 +conflict 1 +resolutionon 1 +standard 1 +hardwar 1 +osdi 1 +avoid 1 +miss 1 +larg 1 +direct 1 +mappedcach 1 +asplo 1 +addit 1 +friend 1 +studi 1 +ofinterpret 1 +learn 1 +theproject 1 +rockyhom 1 +also 1 +wrote 1 +togeth 1 +structur 1 +interpret 1 +voelker 1 +wolman 1 +baer 1 +levi 1 +appear 1 +abstract 1 +postscript 1 +bibliographi 1 +lobo 1 +read 1 +listrandom 1 +stuffa 1 +hous 1 +scientist 1 +rai 1 +limb 1 +knee 1 +arthroscop 1 +surgeri 1 +mark 1 +hill 1 +wrist 1 +dylansaid 1 +hair 1 +couldn 1 +flowbe 1 +said 1 +could 1 +beingexperiment 1 +conduct 1 +experi 1 +judg 1 +result 1 +attend 1 +travel 1 +europ 1 +took 1 +somepictur 1 +eatsomeon 1 +els 1 +food 1 +accompani 1 +sincer 1 +ration 1 +forexampl 1 +lunch 1 +thought 1 +leftth 1 +countri 1 +would 1 +didn 1 +origin 1 +unknown 1 +edward 1 +tuft 1 +tip 1 +public 1 +speak 1 +father 1 +edit 1 +american 1 +journal 1 +physic 1 +place 1 +ticker 1 +symbol 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..dc75f909 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,157 @@ +memori 1 +system 1 +research 1 +univers 1 +washingtonmemori 1 +researchdepart 1 +comput 1 +scienc 1 +engin 1 +washington 1 +seattl 1 +welcom 1 +home 1 +page 1 +descript 1 +group 1 +investig 1 +techniqu 1 +operatingsystem 1 +improv 1 +perform 1 +work 1 +sharesth 1 +follow 1 +featur 1 +reli 1 +combin 1 +simpl 1 +hardwar 1 +support 1 +oper 1 +modif 1 +monitor 1 +dynam 1 +behavior 1 +applic 1 +mechan 1 +incur 1 +small 1 +overhead 1 +runtim 1 +inform 1 +collect 1 +us 1 +identifi 1 +sourc 1 +delai 1 +cach 1 +miss 1 +resolv 1 +bottleneck 1 +also 1 +significantli 1 +overal 1 +recent 1 +project 1 +explor 1 +polici 1 +monitorappl 1 +refer 1 +pattern 1 +order 1 +resolvetlb 1 +problem 1 +poor 1 +result 1 +tlbi 1 +cover 1 +current 1 +severalmodern 1 +architectur 1 +superpag 1 +whose 1 +size 1 +amultipl 1 +base 1 +tlbperform 1 +larger 1 +cost 1 +ofwast 1 +intern 1 +fragment 1 +simul 1 +sever 1 +adapt 1 +todiffer 1 +region 1 +address 1 +space 1 +constructingsuperpag 1 +copi 1 +compon 1 +contigu 1 +ofmemori 1 +develop 1 +balancesth 1 +potenti 1 +benefit 1 +reduct 1 +futur 1 +tlbmiss 1 +construct 1 +memorycopi 1 +misspattern 1 +warrant 1 +attain 1 +largepag 1 +without 1 +detail 1 +paper 1 +reduc 1 +onlin 1 +promot 1 +isca 1 +appear 1 +look 1 +someon 1 +implement 1 +algorithm 1 +would 1 +makea 1 +good 1 +qual 1 +master 1 +peoplefaculti 1 +brian 1 +bershad 1 +anna 1 +karlin 1 +student 1 +denni 1 +dlee 1 +wayn 1 +ohlrich 1 +romer 1 +wong 1 +waynew 1 +map 1 +conflict 1 +resolut 1 +standard 1 +chen 1 +osdi 1 +avoid 1 +larg 1 +direct 1 +asplo 1 +comparison 1 +mip 1 +alpha 1 +report 1 +instruct 1 +effect 1 +differ 1 +code 1 +reorder 1 +bibliographi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..31844f53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,96 @@ +project 1 +rocki 1 +architectur 1 +perform 1 +interpret 1 +languag 1 +descript 1 +becom 1 +increasingli 1 +popular 1 +last 1 +sever 1 +year 1 +part 1 +demand 1 +portabl 1 +safeti 1 +eas 1 +examin 1 +environ 1 +perspect 1 +strategi 1 +implement 1 +processor 1 +util 1 +basi 1 +studi 1 +collect 1 +benchmark 1 +microbenchmark 1 +perl 1 +java 1 +mipsi 1 +us 1 +variou 1 +instrument 1 +trace 1 +techniqu 1 +evalu 1 +characterist 1 +order 1 +gain 1 +insight 1 +similar 1 +differ 1 +execut 1 +peoplefaculti 1 +jean 1 +loup 1 +baer 1 +washington 1 +brian 1 +bershad 1 +henri 1 +levi 1 +student 1 +denni 1 +dlee 1 +romer 1 +geoff 1 +voelker 1 +alec 1 +wolman 1 +wayn 1 +wong 1 +waynew 1 +papersrom 1 +structur 1 +asplo 1 +appear 1 +abstractpostscriptjava 1 +measur 1 +xjava 1 +sourc 1 +file 1 +benchmarkstoolsto 1 +inform 1 +applic 1 +vebeen 1 +build 1 +binari 1 +rewrit 1 +tool 1 +call 1 +etch 1 +yetpublicli 1 +avail 1 +read 1 +etchhom 1 +page 1 +intern 1 +documentationproject 1 +document 1 +peopl 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..675d0d78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,98 @@ +richard 1 +roger 1 +home 1 +page 1 +rogersrrog 1 +washington 1 +educomput 1 +scienc 1 +engin 1 +departmentunivers 1 +seattl 1 +usaoffic 1 +chateau 1 +sieg 1 +phone 1 +intellig 1 +system 1 +laboratri 1 +research 1 +develop 1 +softwar 1 +systol 1 +cellular 1 +arrai 1 +machin 1 +scam 1 +massiv 1 +parallel 1 +imag 1 +process 1 +comput 1 +includ 1 +compil 1 +basic 1 +morpholog 1 +librari 1 +simul 1 +obtain 1 +paper 1 +current 1 +work 1 +document 1 +layout 1 +extract 1 +also 1 +help 1 +produc 1 +groundtruth 1 +databas 1 +optic 1 +charact 1 +recognit 1 +commun 1 +camp 1 +director 1 +facil 1 +northwest 1 +center 1 +environment 1 +educ 1 +ncee 1 +offer 1 +summer 1 +student 1 +ag 1 +beauti 1 +juan 1 +island 1 +splash 1 +program 1 +univers 1 +year 1 +long 1 +nation 1 +foundat 1 +fund 1 +grade 1 +minor 1 +girl 1 +area 1 +interest 1 +corn 1 +snake 1 +jessica 1 +squishi 1 +order 1 +increas 1 +length 1 +kuow 1 +public 1 +radio 1 +stationi 1 +bake 1 +best 1 +pecan 1 +seattlelast 1 +modifi 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..fb9e416b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,19 @@ +mike 1 +home 1 +page 1 +salisburysalisbur 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +usaoffic 1 +chateau 1 +sieg 1 +lifehistori 1 +school 1 +friend 1 +vitacool 1 +stuff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..f909328d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,177 @@ +stefan 1 +savag 1 +washington 1 +work 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +home 1 +sampl 1 +rich 1 +post 1 +industri 1 +cultur 1 +modern 1 +pittsburghfor 1 +year 1 +caught 1 +ride 1 +bershad 1 +migrat 1 +mnow 1 +gradual 1 +student 1 +first 1 +rank 1 +strongbackground 1 +centuri 1 +american 1 +histori 1 +provid 1 +witha 1 +firm 1 +irrelev 1 +platform 1 +trash 1 +talk 1 +peer 1 +fool 1 +tocqeuvil 1 +statement 1 +find 1 +tiresom 1 +inconveni 1 +exercisepolit 1 +right 1 +distract 1 +quit 1 +similar 1 +tocurr 1 +microprocessor 1 +architectur 1 +trend 1 +favor 1 +need 1 +ofappl 1 +code 1 +oper 1 +system 1 +brian 1 +rest 1 +merri 1 +band 1 +onan 1 +project 1 +call 1 +spin 1 +projectsspinspin 1 +extens 1 +omnifemtokernel 1 +whichsupport 1 +dynam 1 +adapt 1 +interfac 1 +andimplement 1 +direct 1 +applic 1 +control 1 +stillmaintain 1 +integr 1 +inter 1 +isol 1 +thing 1 +writingspin 1 +paper 1 +safeti 1 +perform 1 +proceed 1 +symposium 1 +principl 1 +sosp 1 +copper 1 +mountain 1 +decemb 1 +slide 1 +languag 1 +support 1 +workshop 1 +compil 1 +softwar 1 +wcsss 1 +tucson 1 +write 1 +modula 1 +protect 1 +issu 1 +fifth 1 +topic 1 +hoto 1 +orca 1 +island 1 +design 1 +usenix 1 +implement 1 +osdi 1 +monterei 1 +novemb 1 +panel 1 +abstract 1 +longer 1 +unpublish 1 +version 1 +microkernel 1 +specif 1 +servic 1 +sixth 1 +sigop 1 +european 1 +match 1 +appear 1 +review 1 +januari 1 +tech 1 +report 1 +march 1 +afraid 1 +frequent 1 +redund 1 +arrai 1 +independ 1 +disk 1 +winter 1 +technic 1 +confer 1 +diego 1 +best 1 +reserv 1 +processor 1 +capac 1 +multimedia 1 +ieee 1 +intern 1 +boston 1 +manag 1 +usag 1 +fourth 1 +workstat 1 +wwo 1 +napa 1 +octob 1 +carnegi 1 +mellon 1 +real 1 +time 1 +mach 1 +timer 1 +export 1 +user 1 +third 1 +santa 1 +april 1 +interest 1 +music 1 +hikingthi 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..1aeb7466 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,9 @@ +sean 1 +sandi 1 +david 1 +washington 1 +comput 1 +scienc 1 +last 1 +revis 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..32c9c23b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,33 @@ +richard 1 +segal 1 +home 1 +page 1 +segaldepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washingtonbox 1 +seattl 1 +washington 1 +person 1 +biographi 1 +better 1 +half 1 +famili 1 +pictur 1 +research 1 +overview 1 +brute 1 +internet 1 +softbot 1 +public 1 +curriculum 1 +vita 1 +postscript 1 +amus 1 +archeri 1 +bicycl 1 +racquetbal 1 +ski 1 +softbal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..e4513d82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,116 @@ +stefan 1 +bergstefan 1 +berg 1 +work 1 +home 1 +sieg 1 +hall 1 +univers 1 +washington 1 +seattl 1 +phone 1 +email 1 +sgberg 1 +content 1 +address 1 +inform 1 +past 1 +project 1 +activ 1 +current 1 +interest 1 +page 1 +finger 1 +born 1 +cologn 1 +germani 1 +spring 1 +complet 1 +mittler 1 +reif 1 +thgrade 1 +schillergymnasium 1 +come 1 +unit 1 +statesto 1 +receiv 1 +high 1 +school 1 +diploma 1 +bloomington 1 +north 1 +indiana 1 +bachelor 1 +scienc 1 +honor 1 +distinctionin 1 +field 1 +comput 1 +fromindiana 1 +momenth 1 +toward 1 +expect 1 +date 1 +sometim 1 +thiscenturi 1 +implement 1 +reduct 1 +machin 1 +teach 1 +assist 1 +comparison 1 +hardwar 1 +softwar 1 +solut 1 +fals 1 +share 1 +studi 1 +linear 1 +time 1 +sort 1 +algorithm 1 +qual 1 +pictur 1 +peopl 1 +around 1 +raft 1 +bookmark 1 +excit 1 +squar 1 +moment 1 +weather 1 +print 1 +yourselfsometh 1 +crazi 1 +didn 1 +even 1 +particularsolut 1 +done 1 +sall 1 +line 1 +shouldn 1 +contain 1 +trail 1 +carriag 1 +return 1 +byte 1 +compil 1 +without 1 +warn 1 +program 1 +exact 1 +sourc 1 +code 1 +itin 1 +fewer 1 +like 1 +putchar 1 +char 1 +els 1 +main 1 +printf 1 +resum 1 +avail 1 +inpostscript 1 +andtex 1 +format 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..9f829a0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,62 @@ +ward 1 +shadegreet 1 +salut 1 +third 1 +year 1 +grad 1 +student 1 +hereat 1 +dubcs 1 +interact 1 +renderingof 1 +complex 1 +scene 1 +current 1 +thing 1 +follow 1 +project 1 +link 1 +walkthruproject 1 +amonglot 1 +pictur 1 +anim 1 +find 1 +siggraph 1 +paperdescrib 1 +recent 1 +work 1 +shortcut 1 +click 1 +thepictur 1 +island 1 +lower 1 +left 1 +corner 1 +page 1 +lot 1 +interest 1 +go 1 +mani 1 +differ 1 +aspectsof 1 +comput 1 +graphic 1 +thegraph 1 +imag 1 +laboratori 1 +get 1 +done 1 +contact 1 +info 1 +daili 1 +schedul 1 +travel 1 +plan 1 +public 1 +look 1 +scrunch 1 +make 1 +browser 1 +least 1 +pixel 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..db0b651c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,29 @@ +shun 1 +leungshun 1 +leung 1 +student 1 +depart 1 +comput 1 +scienc 1 +andengin 1 +univers 1 +ofwashington 1 +work 1 +prof 1 +johnzahorjan 1 +pointer 1 +research 1 +summari 1 +public 1 +curriculum 1 +vita 1 +upon 1 +request 1 +engin 1 +washington 1 +seattl 1 +email 1 +shuntak 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..e83d20d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,70 @@ +welcom 1 +shuichi 1 +home 1 +page 1 +koga 1 +skoga 1 +washington 1 +graduat 1 +studentcomput 1 +scienc 1 +engin 1 +departmentunivers 1 +name 1 +haven 1 +notic 1 +bynow 1 +start 1 +studi 1 +univers 1 +ofwashington 1 +quit 1 +figur 1 +myqual 1 +much 1 +less 1 +dissert 1 +universityof 1 +virginia 1 +degre 1 +mathemat 1 +alsoheavili 1 +involv 1 +asian 1 +foreign 1 +relat 1 +andgovern 1 +depart 1 +origin 1 +slate 1 +also 1 +degreein 1 +heavili 1 +user 1 +interfac 1 +groupand 1 +comput 1 +sciencedepart 1 +work 1 +project 1 +call 1 +alic 1 +sinc 1 +anywai 1 +take 1 +look 1 +pictur 1 +smaller 1 +mean 1 +finger 1 +info 1 +current 1 +schedul 1 +neat 1 +hypertext 1 +link 1 +hunt 1 +destroi 1 +bug 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..cde56a12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,48 @@ +soha 1 +hassoun 1 +home 1 +page 1 +washington 1 +hassounit 1 +year 1 +graduat 1 +school 1 +univers 1 +comput 1 +scienc 1 +engin 1 +dept 1 +circuit 1 +design 1 +whoturn 1 +develop 1 +current 1 +work 1 +onarchitectur 1 +retim 1 +professor 1 +carlebel 1 +weekli 1 +schedul 1 +busi 1 +previou 1 +research 1 +educ 1 +experi 1 +public 1 +patent 1 +chao 1 +group 1 +profession 1 +interest 1 +vlsi 1 +site 1 +inform 1 +littl 1 +deede 1 +photo 1 +galleri 1 +address 1 +depart 1 +seattl 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..f59b1cc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,115 @@ +sujai 1 +parekh 1 +home 1 +page 1 +work 1 +depart 1 +comput 1 +scienc 1 +engin 1 +sieg 1 +hall 1 +chateau 1 +guggenheim 1 +annex 1 +univers 1 +washington 1 +seattl 1 +quarter 1 +class 1 +seminar 1 +system 1 +french 1 +research 1 +simultaneousmultithread 1 +particular 1 +issu 1 +relat 1 +tomultithread 1 +architectur 1 +softbot 1 +project 1 +evalu 1 +simon 1 +emploi 1 +procedur 1 +search 1 +controlsystem 1 +control 1 +action 1 +report 1 +construct 1 +design 1 +remov 1 +patio 1 +convent 1 +workspac 1 +interest 1 +fund 1 +pleas 1 +contact 1 +sport 1 +spud 1 +soccer 1 +right 1 +sort 1 +bookmark 1 +like 1 +keep 1 +track 1 +academ 1 +cognit 1 +distribut 1 +parallel 1 +psycholog 1 +philosophi 1 +tenni 1 +sail 1 +squash 1 +volleybal 1 +ballroom 1 +danc 1 +food 1 +cornel 1 +oracl 1 +corpor 1 +stottler 1 +henk 1 +associ 1 +done 1 +resum 1 +random 1 +person 1 +info 1 +favorit 1 +oondhiu 1 +mango 1 +phad 1 +thai 1 +kung 1 +chicken 1 +beverag 1 +screwdriv 1 +scotch 1 +long 1 +island 1 +ic 1 +tango 1 +swing 1 +east 1 +west 1 +coast 1 +salsa 1 +rock 1 +music 1 +dire 1 +strait 1 +pink 1 +floyd 1 +phil 1 +collin 1 +genesi 1 +peter 1 +gabriel 1 +petti 1 +sparekh 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..5df5839a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,64 @@ +erik 1 +selberg 1 +improv 1 +home 1 +page 1 +name 1 +speed 1 +washington 1 +academ 1 +almost 1 +live 1 +fishcam 1 +address 1 +lara 1 +lewi 1 +memorialhappi 1 +link 1 +peopl 1 +sport 1 +food 1 +drink 1 +cultur 1 +mari 1 +kay 1 +offic 1 +friend 1 +famili 1 +fish 1 +disc 1 +drive 1 +ultim 1 +pasti 1 +power 1 +ur 1 +machin 1 +mountain 1 +bike 1 +spud 1 +softbal 1 +ski 1 +utah 1 +raquetbal 1 +pro 1 +colleg 1 +wedgwood 1 +hous 1 +diet 1 +pepper 1 +salt 1 +lake 1 +roast 1 +compani 1 +bean 1 +bagel 1 +racer 1 +star 1 +war 1 +tini 1 +toon 1 +pinki 1 +brain 1 +phantom 1 +babylon 1 +comic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..64ec3c26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,134 @@ +home 1 +page 1 +sung 1 +choiwelcom 1 +thehomepag 1 +ofsung 1 +eunchoi 1 +myschoollifemi 1 +primari 1 +research 1 +interest 1 +compil 1 +parallel 1 +program 1 +languag 1 +involv 1 +zplcompil 1 +project 1 +univers 1 +washington 1 +late 1 +beenspend 1 +time 1 +think 1 +optim 1 +communicationgener 1 +us 1 +architechtur 1 +independ 1 +communicationlibrari 1 +ironman 1 +addit 1 +experi 1 +simul 1 +data 1 +programson 1 +superscalar 1 +processor 1 +goal 1 +work 1 +improv 1 +nodeperform 1 +come 1 +gener 1 +machin 1 +alsobeen 1 +seen 1 +hang 1 +chaosrout 1 +group 1 +includ 1 +graphic 1 +front 1 +visual 1 +thatexperi 1 +current 1 +implement 1 +anoth 1 +router 1 +inzpl 1 +final 1 +also 1 +littl 1 +astronomi 1 +quarter 1 +ta 1 +enjoi 1 +watch 1 +movi 1 +mostli 1 +comfort 1 +like 1 +yeah 1 +vegetarian 1 +sinc 1 +myjunior 1 +year 1 +colleg 1 +drink 1 +dinner 1 +would 1 +samewithout 1 +good 1 +wine 1 +result 1 +must 1 +exercis 1 +quit 1 +plai 1 +twosoccerteam 1 +cousin 1 +scrub 1 +divis 1 +cooper 1 +recdivis 1 +last 1 +season 1 +came 1 +second 1 +place 1 +andcoop 1 +unfortun 1 +recent 1 +sacrifiedmi 1 +left 1 +knee 1 +game 1 +soccer 1 +take 1 +usualstep 1 +aerobicsclass 1 +instead 1 +find 1 +try 1 +swim 1 +weight 1 +trainingclass 1 +peopl 1 +world 1 +read 1 +book 1 +abit 1 +shakespear 1 +publictelevis 1 +listen 1 +classicalmus 1 +myotherlif 1 +choi 1 +sungeun 1 +depart 1 +comput 1 +scienc 1 +engin 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..d69dc202 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,58 @@ +nguyen 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +washington 1 +work 1 +world 1 +research 1 +interest 1 +includ 1 +oper 1 +system 1 +distribut 1 +andparallel 1 +network 1 +secur 1 +current 1 +help 1 +frommi 1 +advisor 1 +johnzahorjan 1 +build 1 +support 1 +run 1 +soft 1 +real 1 +timeappl 1 +visual 1 +partial 1 +idl 1 +workstat 1 +innow 1 +recent 1 +complet 1 +studi 1 +runtim 1 +measur 1 +ofappl 1 +characterist 1 +us 1 +tominim 1 +applic 1 +execut 1 +time 1 +uniprogram 1 +multiprocessorsenviron 1 +well 1 +schedul 1 +make 1 +goodglob 1 +decis 1 +multiprogram 1 +cvpublic 1 +worldvietnameseresourc 1 +netcyclingplayground 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..3dab0e33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,13 @@ +tian 1 +homepageyour 1 +browser 1 +rusti 1 +yellow 1 +turkei 1 +frame 1 +even 1 +part 1 +html 1 +standard 1 +click 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..a8f2afa7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,103 @@ +ashutosh 1 +tiwari 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +mostli 1 +full 1 +time 1 +fourth 1 +year 1 +graduat 1 +student 1 +work 1 +area 1 +singl 1 +address 1 +space 1 +oper 1 +system 1 +opal 1 +persist 1 +object 1 +orient 1 +databas 1 +applic 1 +workload 1 +measur 1 +support 1 +past 1 +infrastructur 1 +user 1 +interfac 1 +distribut 1 +spare 1 +group 1 +research 1 +technolog 1 +organ 1 +boe 1 +servic 1 +oopsla 1 +workshop 1 +build 1 +larg 1 +softwar 1 +us 1 +projectsopali 1 +project 1 +issu 1 +opportun 1 +involv 1 +creat 1 +global 1 +across 1 +multipl 1 +machin 1 +jeff 1 +chase 1 +primari 1 +architect 1 +hank 1 +levi 1 +advisor 1 +close 1 +also 1 +character 1 +behavior 1 +gener 1 +techniqu 1 +paper 1 +sever 1 +profession 1 +career 1 +thisexperi 1 +basi 1 +follow 1 +distrbut 1 +public 1 +virtual 1 +refer 1 +proc 1 +intern 1 +septemb 1 +except 1 +handl 1 +parallel 1 +environ 1 +ecoop 1 +juli 1 +bosch 1 +addendum 1 +proceed 1 +oop 1 +messeng 1 +octob 1 +evalu 1 +benchmark 1 +narasayya 1 +perform 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..3f642db6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,91 @@ +tessa 1 +anoth 1 +dead 1 +inform 1 +superhighwai 1 +second 1 +yeargradu 1 +student 1 +comput 1 +scienc 1 +univers 1 +washington 1 +research 1 +interest 1 +includ 1 +sort 1 +relatedgoodi 1 +current 1 +work 1 +clio 1 +system 1 +search 1 +andbrows 1 +person 1 +histori 1 +avail 1 +seek 1 +gain 1 +employ 1 +myresum 1 +onlin 1 +curiou 1 +kittyi 1 +honor 1 +share 1 +apart 1 +gambit 1 +siames 1 +cat 1 +great 1 +memor 1 +found 1 +therear 1 +pictur 1 +page 1 +tofind 1 +appar 1 +alsor 1 +scotland 1 +classesi 1 +still 1 +qual 1 +quarter 1 +take 1 +last 1 +ofeight 1 +class 1 +fulfil 1 +breadth 1 +requir 1 +digit 1 +seminarlinux 1 +gameseverybodi 1 +plai 1 +game 1 +maintain 1 +linux 1 +gametom 1 +commit 1 +advanc 1 +pretti 1 +coolgam 1 +platform 1 +also 1 +first 1 +attempt 1 +java 1 +program 1 +simpl 1 +maze 1 +applet 1 +sleepingi 1 +known 1 +frequent 1 +seattl 1 +area 1 +bookstor 1 +knit 1 +crochet 1 +copyright 1 +tlau 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..9f6ccf81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,44 @@ +martin 1 +tompa 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +phone 1 +receptionist 1 +lectur 1 +note 1 +articl 1 +trajectori 1 +thelma 1 +louis 1 +recent 1 +holidai 1 +moon 1 +pearl 1 +among 1 +wash 1 +oyster 1 +collabor 1 +surrealist 1 +electron 1 +propheci 1 +build 1 +across 1 +pierc 1 +lane 1 +carol 1 +photograph 1 +photo 1 +courtesi 1 +health 1 +center 1 +educ 1 +resourc 1 +provid 1 +mani 1 +imag 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..bb5f7722 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,106 @@ +traci 1 +kimbrel 1 +held 1 +prison 1 +univers 1 +washingtonsinc 1 +without 1 +charg 1 +trial 1 +move 1 +year 1 +toanoth 1 +seattl 1 +area 1 +inmat 1 +forc 1 +tomanufactur 1 +airplan 1 +escap 1 +institut 1 +wasrecaptur 1 +return 1 +washington 1 +help 1 +hisplight 1 +rescu 1 +imprison 1 +list 1 +thing 1 +done 1 +curriculum 1 +vita 1 +detail 1 +statement 1 +ofwhat 1 +promis 1 +histori 1 +goal 1 +free 1 +captor 1 +depart 1 +comput 1 +scienc 1 +engin 1 +tracyk 1 +eduher 1 +trace 1 +driven 1 +comparison 1 +algorithm 1 +parallel 1 +prefetch 1 +cachingtraci 1 +andrew 1 +tomkin 1 +hugo 1 +patterson 1 +brian 1 +bershad 1 +edward 1 +felten 1 +garth 1 +gibson 1 +anna 1 +karlin 1 +appear 1 +sigop 1 +usenix 1 +associ 1 +symposium 1 +onoper 1 +system 1 +design 1 +implemen 1 +near 1 +optim 1 +ieeesymposium 1 +foundat 1 +longer 1 +version 1 +integr 1 +cach 1 +page 1 +extend 1 +abstract 1 +proceed 1 +sigmetr 1 +confer 1 +measurementand 1 +model 1 +probabilist 1 +verifi 1 +matrix 1 +product 1 +usingo 1 +squar 1 +time 1 +base 1 +random 1 +bit 1 +rakesh 1 +kumar 1 +sinha 1 +inform 1 +process 1 +letter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..4b276524 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,49 @@ +travi 1 +craig 1 +home 1 +page 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +research 1 +interest 1 +mechan 1 +predict 1 +real 1 +time 1 +system 1 +cach 1 +restor 1 +queu 1 +spin 1 +lock 1 +arctic 1 +submarin 1 +current 1 +cours 1 +take 1 +quarter 1 +dissert 1 +work 1 +consum 1 +side 1 +project 1 +half 1 +esca 1 +corpor 1 +help 1 +keep 1 +volvo 1 +run 1 +press 1 +latest 1 +motor 1 +pool 1 +statu 1 +understand 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..58c625e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,12 @@ +dean 1 +tullsen 1 +home 1 +page 1 +biograph 1 +inform 1 +research 1 +interest 1 +bibliographi 1 +download 1 +resumemi 1 +hobbi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..1a11be3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,235 @@ +michael 1 +vanhilstmichael 1 +vanhilstvanhilst 1 +washington 1 +edumvh 1 +harvard 1 +eduperson 1 +research 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +usaclick 1 +send 1 +email 1 +messag 1 +mike 1 +vanhilst 1 +personalmik 1 +start 1 +year 1 +graduat 1 +student 1 +hopefulli 1 +littl 1 +luck 1 +finish 1 +around 1 +theend 1 +winter 1 +quarter 1 +immedi 1 +prior 1 +come 1 +udub 1 +work 1 +contractor 1 +atibm 1 +wrote 1 +motif 1 +widget 1 +user 1 +unterfac 1 +sdata 1 +explor 1 +programm 1 +fix 1 +maintainingcomput 1 +hardwar 1 +smithsonian 1 +astrophys 1 +observatori 1 +part 1 +locat 1 +within 1 +learn 1 +debug 1 +softwar 1 +could 1 +convinc 1 +realli 1 +correctli 1 +stai 1 +time 1 +program 1 +call 1 +saoimagewhich 1 +us 1 +lot 1 +astronom 1 +look 1 +imag 1 +saoimag 1 +gnudistribut 1 +would 1 +like 1 +thank 1 +bill 1 +wyatt 1 +eric 1 +mandel 1 +schwarz 1 +doug 1 +minkfor 1 +guid 1 +continu 1 +project 1 +mention 1 +countless 1 +other 1 +contribut 1 +took 1 +group 1 +seismologistsin 1 +pari 1 +franc 1 +data 1 +acquisit 1 +calibr 1 +analysi 1 +truli 1 +wonder 1 +wife 1 +angela 1 +french 1 +languag 1 +class 1 +theallianc 1 +francais 1 +nativ 1 +colombiain 1 +south 1 +america 1 +summer 1 +front 1 +studentsbrows 1 +schedul 1 +base 1 +uwin 1 +talent 1 +staff 1 +commun 1 +folk 1 +brought 1 +pine 1 +special 1 +shirei 1 +design 1 +traci 1 +stenvik 1 +screen 1 +librari 1 +machin 1 +also 1 +taught 1 +begin 1 +extens 1 +motiv 1 +frommicrosoft 1 +product 1 +support 1 +sacrif 1 +recent 1 +present 1 +paper 1 +theintern 1 +symposium 1 +object 1 +technolog 1 +advanc 1 +isota 1 +confer 1 +orient 1 +system 1 +applic 1 +oopsla 1 +theacm 1 +sigsoft 1 +foundat 1 +poster 1 +made 1 +subject 1 +workshop 1 +particip 1 +doctor 1 +demo 1 +uist 1 +steve 1 +earlier 1 +life 1 +earn 1 +degre 1 +inarchitectur 1 +wooden 1 +kind 1 +citi 1 +planningfrom 1 +mitand 1 +develop 1 +director 1 +forth 1 +grinnel 1 +iowa 1 +thing 1 +differ 1 +skill 1 +visualdesign 1 +problem 1 +solv 1 +valu 1 +still 1 +get 1 +talk 1 +chri 1 +alexand 1 +activ 1 +chapter 1 +associ 1 +improv 1 +ti 1 +larg 1 +small 1 +compani 1 +area 1 +enjoi 1 +hike 1 +cross 1 +countri 1 +ski 1 +sail 1 +andkayak 1 +swim 1 +lake 1 +bronson 1 +free 1 +taken 1 +marco 1 +harold 1 +sebastien 1 +hilst 1 +born 1 +post 1 +pictur 1 +soon 1 +anoth 1 +photo 1 +scanner 1 +visit 1 +sinc 1 +novemb 1 +last 1 +modifi 1 +fridai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..93b018b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,103 @@ +hello 1 +vassilylong 1 +live 1 +start 1 +realli 1 +person 1 +best 1 +linki 1 +come 1 +thu 1 +http 1 +washington 1 +home 1 +vass 1 +us 1 +document 1 +also 1 +shortest 1 +write 1 +young 1 +collect 1 +quit 1 +link 1 +download 1 +fewfil 1 +expand 1 +sinc 1 +thisstuff 1 +select 1 +quotesrussian 1 +pagesvari 1 +linksguid 1 +html 1 +formsoth 1 +pageuw 1 +pagencsa 1 +mosaic 1 +pagerec 1 +join 1 +cecilproject 1 +cecil 1 +cool 1 +pure 1 +object 1 +orient 1 +languag 1 +andvortex 1 +even 1 +cooler 1 +compil 1 +infrastructur 1 +befast 1 +mayb 1 +paper 1 +written 1 +member 1 +staff 1 +design 1 +implement 1 +themvi 1 +system 1 +assist 1 +access 1 +ourdepartment 1 +room 1 +visitor 1 +databas 1 +recent 1 +beenupgrad 1 +peopl 1 +qual 1 +project 1 +thezpl 1 +languageto 1 +handl 1 +irregular 1 +data 1 +structur 1 +repres 1 +graph 1 +anddynam 1 +repartit 1 +arrai 1 +myqual 1 +writeup 1 +page 1 +short 1 +overview 1 +check 1 +theslidesfrom 1 +present 1 +slide 1 +toresourc 1 +relat 1 +eduobject 1 +mirror 1 +closer 1 +appear 1 +pastor 1 +vybrasyvalsya 1 +okna 1 +pyatyi 1 +deystvov 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..214e3b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,55 @@ +virgil 1 +bourassa 1 +home 1 +page 1 +uwvirgil 1 +evan 1 +bourassavirgil 1 +washington 1 +student 1 +depart 1 +comput 1 +scienc 1 +engin 1 +theunivers 1 +seattl 1 +research 1 +interestsinclud 1 +oper 1 +system 1 +architectur 1 +join 1 +boeingin 1 +work 1 +scientist 1 +scienceorgan 1 +inform 1 +support 1 +servic 1 +divis 1 +bellevu 1 +receiv 1 +electr 1 +arizonast 1 +univers 1 +temp 1 +arizona 1 +electricalengin 1 +accesswhat 1 +interest 1 +expertis 1 +resum 1 +patent 1 +invent 1 +public 1 +present 1 +profession 1 +histori 1 +educ 1 +achiev 1 +recommend 1 +letter 1 +statusoccasion 1 +updat 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..eb8ab424 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,201 @@ +vivek 1 +ratan 1 +home 1 +page 1 +particular 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +washington 1 +current 1 +academ 1 +leav 1 +work 1 +research 1 +scientistat 1 +bellcor 1 +morristown 1 +researchwork 1 +interest 1 +distribut 1 +issu 1 +system 1 +applic 1 +develop 1 +high 1 +avail 1 +group 1 +distributedsoftwar 1 +simpli 1 +highli 1 +continu 1 +presenc 1 +failur 1 +toolset 1 +anatida 1 +provid 1 +mechan 1 +adher 1 +corba 1 +standard 1 +also 1 +look 1 +activ 1 +replic 1 +scheme 1 +indc 1 +recent 1 +effort 1 +focuss 1 +enhanc 1 +fault 1 +toler 1 +cell 1 +directori 1 +servic 1 +detail 1 +project 1 +foundher 1 +integrationof 1 +technolog 1 +server 1 +murphi 1 +softwar 1 +safeti 1 +methodolog 1 +head 1 +bydr 1 +nanci 1 +leveson 1 +found 1 +much 1 +involv 1 +automat 1 +tree 1 +gener 1 +fromrequir 1 +specif 1 +written 1 +rsml 1 +well 1 +languag 1 +simul 1 +public 1 +list 1 +person 1 +inform 1 +born 1 +brought 1 +india 1 +last 1 +eight 1 +year 1 +undergradu 1 +attend 1 +angelo 1 +state 1 +wesleyan 1 +middletown 1 +receiv 1 +physic 1 +math 1 +right 1 +came 1 +seattl 1 +purus 1 +studi 1 +like 1 +plai 1 +tenni 1 +whenev 1 +squash 1 +lesserext 1 +racquetbal 1 +suffic 1 +ardent 1 +folow 1 +game 1 +cricket 1 +particip 1 +ultra 1 +email 1 +mani 1 +follow 1 +exploit 1 +superson 1 +marin 1 +cowboi 1 +half 1 +taken 1 +keen 1 +learn 1 +ballroom 1 +danc 1 +waltz 1 +foxtrot 1 +chacha 1 +rhumba 1 +tango 1 +swing 1 +west 1 +coast 1 +pleas 1 +occasion 1 +dabbl 1 +mambo 1 +area 1 +best 1 +place 1 +center 1 +us 1 +band 1 +session 1 +everi 1 +saturdai 1 +model 1 +unit 1 +nation 1 +chapter 1 +intern 1 +educ 1 +organ 1 +confer 1 +held 1 +throughout 1 +topic 1 +restructur 1 +reform 1 +part 1 +secur 1 +council 1 +ecosoc 1 +world 1 +bank 1 +rapidpopul 1 +growth 1 +nuclear 1 +prolifer 1 +read 1 +poetri 1 +mirza 1 +ghalib 1 +centuryindian 1 +poet 1 +english 1 +literatur 1 +especi 1 +romant 1 +victorian 1 +period 1 +link 1 +obligatori 1 +collect 1 +sitesthat 1 +tend 1 +visit 1 +often 1 +engin 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..0f7e98bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,30 @@ +geoff 1 +skywhoi 1 +graduat 1 +student 1 +univers 1 +washington 1 +inseattl 1 +whati 1 +master 1 +thesi 1 +wireless 1 +mobil 1 +comput 1 +design 1 +andbuild 1 +system 1 +call 1 +mobisa 1 +current 1 +avoid 1 +settl 1 +topic 1 +wherechateau 1 +guggenheim 1 +annex 1 +washingtonseattl 1 +look 1 +emac 1 +window 1 +voelker 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..833149e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,55 @@ +home 1 +wayn 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +waynew 1 +stuff 1 +current 1 +look 1 +memori 1 +system 1 +perform 1 +particular 1 +differentmemori 1 +organ 1 +investig 1 +work 1 +beingdon 1 +jean 1 +loup 1 +baer 1 +also 1 +interpret 1 +other 1 +denni 1 +geoff 1 +alec 1 +rightnow 1 +thing 1 +littl 1 +rocki 1 +actual 1 +earli 1 +version 1 +paper 1 +keep 1 +jump 1 +alpha 1 +interest 1 +place 1 +fish 1 +list 1 +cool 1 +site 1 +howev 1 +peoplewho 1 +peopl 1 +well 1 +test 1 +testwayn 1 +wong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..81f288e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,13 @@ +william 1 +chan 1 +home 1 +pagewilliam 1 +pagei 1 +spend 1 +time 1 +hell 1 +spare 1 +hang 1 +heaven 1 +wchan 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..97b4df7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,140 @@ +daniel 1 +weld 1 +associ 1 +professor 1 +comput 1 +scienc 1 +engineeringat 1 +univers 1 +ofwashington 1 +receiv 1 +bachelor 1 +degre 1 +biochemistri 1 +yale 1 +land 1 +artifici 1 +intellig 1 +presidenti 1 +young 1 +investig 1 +award 1 +offic 1 +naval 1 +research 1 +younginvestig 1 +theadvisori 1 +board 1 +journal 1 +airesearch 1 +guest 1 +editor 1 +edit 1 +aaai 1 +report 1 +role 1 +ofintellig 1 +system 1 +nation 1 +inform 1 +infrastructur 1 +isco 1 +program 1 +chair 1 +publish 1 +book 1 +scad 1 +technic 1 +paper 1 +person 1 +data 1 +reach 1 +sieg 1 +hall 1 +phone 1 +work 1 +home 1 +mail 1 +dept 1 +engin 1 +washington 1 +seattlewa 1 +interest 1 +current 1 +specif 1 +softwar 1 +agent 1 +plan 1 +exampl 1 +group 1 +support 1 +ucpop 1 +planner 1 +us 1 +almost 1 +hundr 1 +sitesworldwid 1 +mani 1 +avail 1 +electron 1 +arehi 1 +favorit 1 +repres 1 +sens 1 +action 1 +middl 1 +ground 1 +revisit 1 +gather 1 +august 1 +base 1 +control 1 +aip 1 +ascal 1 +comparison 1 +shop 1 +world 1 +wide 1 +januari 1 +softbot 1 +interfac 1 +internet 1 +cacm 1 +juli 1 +anintroduct 1 +least 1 +commit 1 +magazin 1 +winter 1 +select 1 +exhaustivelist 1 +recreat 1 +absent 1 +foundat 1 +cafe 1 +allegro 1 +stormymountain 1 +climb 1 +past 1 +enjoi 1 +travel 1 +theworld 1 +like 1 +found 1 +plai 1 +twin 1 +boi 1 +adam 1 +galen 1 +invit 1 +visit 1 +galleri 1 +pacif 1 +northwest 1 +desert 1 +wilder 1 +photograph 1 +also 1 +illustr 1 +stori 1 +morocco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..d4ce53f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,10 @@ +wendi 1 +belluomini 1 +washington 1 +graduat 1 +master 1 +current 1 +work 1 +univ 1 +utah 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..2a45463a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,55 @@ +wilson 1 +hsiehwilson 1 +hsiehi 1 +postdoc 1 +thedepart 1 +comput 1 +scienc 1 +engin 1 +theunivers 1 +washington 1 +inseattl 1 +member 1 +thespin 1 +project 1 +receiv 1 +electr 1 +sciencein 1 +theschool 1 +engineeringatmit 1 +work 1 +thelaboratori 1 +advisor 1 +werefran 1 +kaashoekandbil 1 +weihl 1 +research 1 +compil 1 +parallel 1 +system 1 +myresearch 1 +interest 1 +interact 1 +among 1 +programminglanguag 1 +runtim 1 +oper 1 +architectur 1 +select 1 +publicationsselect 1 +linksperson 1 +interestswilson 1 +hsieh 1 +depart 1 +univers 1 +seattl 1 +offic 1 +sieg 1 +move 1 +phone 1 +numberha 1 +chang 1 +voic 1 +whsieh 1 +public 1 +keyoctob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..e94586e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,83 @@ +alec 1 +wolman 1 +home 1 +page 1 +wolmanwolman 1 +washington 1 +eduworkcomput 1 +scienc 1 +engin 1 +departmentunivers 1 +washingtonbox 1 +seattl 1 +current 1 +graduat 1 +student 1 +thecomput 1 +departmentat 1 +univers 1 +offic 1 +isroom 1 +thechateau 1 +gradual 1 +school 1 +work 1 +fordigit 1 +equip 1 +corp 1 +cambridg 1 +research 1 +interest 1 +includ 1 +oper 1 +system 1 +network 1 +architectur 1 +recent 1 +project 1 +scalabl 1 +perform 1 +etch 1 +binari 1 +instrument 1 +optim 1 +executablesrocki 1 +interpret 1 +performanceon 1 +line 1 +paper 1 +firewal 1 +applic 1 +relai 1 +trees 1 +summer 1 +usenix 1 +latenc 1 +analysi 1 +voelker 1 +thekkath 1 +winter 1 +structur 1 +romer 1 +wong 1 +baer 1 +bershad 1 +levi 1 +appear 1 +asplo 1 +hungri 1 +otter 1 +fixha 1 +strang 1 +idea 1 +nervou 1 +habit 1 +realli 1 +plai 1 +guitar 1 +link 1 +hallwolman 1 +diseasewolman 1 +pressur 1 +treat 1 +lumber 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..f12a44e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,85 @@ +xiaohan 1 +xqin 1 +washington 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +offic 1 +sieg 1 +phone 1 +year 1 +graduat 1 +student 1 +work 1 +jean 1 +loup 1 +baer 1 +research 1 +interest 1 +includ 1 +architectur 1 +parallel 1 +distribut 1 +system 1 +perform 1 +evalu 1 +method 1 +model 1 +simul 1 +short 1 +term 1 +goal 1 +school 1 +soon 1 +possibl 1 +paper 1 +cluster 1 +base 1 +submit 1 +confer 1 +explicit 1 +communicationprimit 1 +cach 1 +coher 1 +multiprocessor 1 +appear 1 +proceed 1 +hpca 1 +compar 1 +studi 1 +conserv 1 +optimist 1 +trace 1 +driven 1 +award 1 +symposium 1 +page 1 +tech 1 +report 1 +dept 1 +univ 1 +implement 1 +intern 1 +process 1 +graph 1 +toolfor 1 +monitor 1 +visual 1 +basedmultiprocessor 1 +zhang 1 +nalluri 1 +journal 1 +june 1 +predict 1 +processingon 1 +numa 1 +ieee 1 +tran 1 +softwar 1 +stuff 1 +photo 1 +chinaread 1 +chinesesearch 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..60823f98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,71 @@ +yasushi 1 +saitoyasushi 1 +saito 1 +second 1 +year 1 +graduat 1 +student 1 +atdepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +current 1 +workingwith 1 +brian 1 +bershad 1 +thespin 1 +project 1 +address 1 +andperson 1 +info 1 +meta 1 +link 1 +metacrawl 1 +yahoo 1 +desktop 1 +refer 1 +index 1 +alta 1 +vista 1 +lyco 1 +archi 1 +tech 1 +research 1 +spin 1 +intern 1 +document 1 +modula 1 +time 1 +schedul 1 +survei 1 +oper 1 +system 1 +transact 1 +servic 1 +qual 1 +sightse 1 +japanes 1 +random 1 +javascript 1 +apprentic 1 +page 1 +us 1 +linux 1 +connect 1 +gatewai 1 +japan 1 +perl 1 +patch 1 +touch 1 +type 1 +trainer 1 +dvorak 1 +lesson 1 +text 1 +want 1 +finger 1 +talk 1 +trycanva 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..e7b29f36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,71 @@ +oren 1 +zamir 1 +home 1 +pageoren 1 +page 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +chateau 1 +washington 1 +edui 1 +isra 1 +graduat 1 +student 1 +computersci 1 +engin 1 +univers 1 +myundergradu 1 +degre 1 +physic 1 +mathemat 1 +hebrewunivers 1 +jerusalem 1 +israel 1 +interest 1 +field 1 +artifici 1 +intellig 1 +softwareengin 1 +current 1 +work 1 +line 1 +cluster 1 +algorithmsfor 1 +internet 1 +document 1 +retriev 1 +basic 1 +idea 1 +help 1 +userwith 1 +search 1 +result 1 +hundr 1 +worki 1 +done 1 +part 1 +metacrawl 1 +parallel 1 +servic 1 +along 1 +orenetzioni 1 +erik 1 +selberg 1 +resum 1 +avail 1 +pictur 1 +thing 1 +like 1 +dive 1 +sinai 1 +jeeptour 1 +ski 1 +last 1 +raft 1 +trip 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..84504b56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,126 @@ +titl 1 +jean 1 +loup 1 +baer 1 +professorand 1 +adjunct 1 +professor 1 +electr 1 +engin 1 +receiv 1 +diplom 1 +ingnieur 1 +electricalengin 1 +doctorat 1 +cycl 1 +comput 1 +scienc 1 +theuniversit 1 +grenobl 1 +franc 1 +ucla 1 +prior 1 +join 1 +univers 1 +washington 1 +research 1 +laboratoir 1 +decalcul 1 +universit 1 +member 1 +digit 1 +technologygroup 1 +present 1 +interest 1 +parallel 1 +anddistribut 1 +process 1 +system 1 +architectur 1 +author 1 +coauthor 1 +paper 1 +thesearea 1 +textbook 1 +press 1 +serv 1 +ieee 1 +distinguishedvisitor 1 +nation 1 +lectur 1 +guggenheim 1 +fellow 1 +editor 1 +journal 1 +distribut 1 +languag 1 +asprogram 1 +chairman 1 +intern 1 +confer 1 +parallelprocess 1 +program 1 +internationalsymposium 1 +gener 1 +ofth 1 +current 1 +chair 1 +sigarch 1 +eighteen 1 +student 1 +complet 1 +dissert 1 +professorba 1 +direct 1 +twelv 1 +work 1 +industri 1 +laboratoriesand 1 +inacademia 1 +although 1 +year 1 +hashad 1 +difficulti 1 +retain 1 +french 1 +accent 1 +cours 1 +recent 1 +project 1 +look 1 +page 1 +involv 1 +cach 1 +coher 1 +protocol 1 +cluster 1 +improv 1 +singl 1 +perform 1 +softwar 1 +primit 1 +appear 1 +hpca 1 +prefetch 1 +uniprocessor 1 +hardwar 1 +also 1 +comparisonwith 1 +block 1 +asplo 1 +multiprocessor 1 +isca 1 +impact 1 +specul 1 +execut 1 +denni 1 +home 1 +andisca 1 +trace 1 +driven 1 +simul 1 +conserv 1 +approach 1 +icpp 1 +optimisticapproach 1 +comparison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..460b9bad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,118 @@ +craig 1 +chamber 1 +assist 1 +professor 1 +join 1 +faculti 1 +receiv 1 +degre 1 +comput 1 +scienc 1 +stanford 1 +research 1 +interest 1 +design 1 +implementationof 1 +advanc 1 +program 1 +system 1 +incorpor 1 +express 1 +programminglanguag 1 +effici 1 +implement 1 +support 1 +programmingenviron 1 +current 1 +investig 1 +object 1 +orient 1 +languagesand 1 +lead 1 +ceciland 1 +vortex 1 +project 1 +cecil 1 +pure 1 +languageserv 1 +vehicl 1 +multi 1 +method 1 +static 1 +type 1 +modul 1 +featur 1 +optim 1 +compilersystem 1 +languag 1 +intra 1 +andinterprocedur 1 +analys 1 +profil 1 +guid 1 +withfront 1 +end 1 +modula 1 +java 1 +previous 1 +chamberswa 1 +member 1 +self 1 +also 1 +spinproject 1 +spin 1 +extens 1 +oper 1 +microkernel 1 +whichsupport 1 +dynam 1 +adapt 1 +interfac 1 +implementationsund 1 +direct 1 +applic 1 +control 1 +still 1 +maintain 1 +systemintegr 1 +isol 1 +util 1 +dialect 1 +themodula 1 +pointer 1 +safe 1 +kernel 1 +spinalso 1 +reli 1 +dynamiccompil 1 +achiev 1 +high 1 +perform 1 +despit 1 +fine 1 +grainedextens 1 +click 1 +herefor 1 +inform 1 +undergradu 1 +graduat 1 +level 1 +researchproject 1 +area 1 +contact 1 +informationprof 1 +chambersdepart 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +washington 1 +mail 1 +requir 1 +street 1 +address 1 +sieg 1 +hall 1 +room 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..703c69be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,116 @@ +carl 1 +ebel 1 +home 1 +page 1 +ebelingdepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +seattl 1 +washington 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +associ 1 +professor 1 +physic 1 +wheatoncolleg 1 +southern 1 +illinoisunivers 1 +carnegi 1 +mellonunivers 1 +join 1 +research 1 +interest 1 +fall 1 +categori 1 +vlsiarchitectur 1 +aid 1 +design 1 +digit 1 +system 1 +haswork 1 +number 1 +vlsi 1 +project 1 +includ 1 +hitech 1 +chessmachin 1 +apex 1 +graphic 1 +chip 1 +draw 1 +spline 1 +curv 1 +andsurfac 1 +triptych 1 +field 1 +programm 1 +gate 1 +arrai 1 +current 1 +hei 1 +involv 1 +chao 1 +build 1 +multicomput 1 +routingnetwork 1 +focu 1 +method 1 +optim 1 +theperform 1 +circuit 1 +us 1 +level 1 +sensit 1 +latch 1 +placementand 1 +rout 1 +algorithm 1 +fpga 1 +particularli 1 +teachingspr 1 +advanc 1 +logic 1 +designoffic 1 +hour 1 +mondai 1 +thursdai 1 +travel 1 +april 1 +fccm 1 +napamai 1 +burlington 1 +chicagojun 1 +vegasresearch 1 +northwest 1 +laboratori 1 +integr 1 +router 1 +high 1 +densiti 1 +architectur 1 +public 1 +journal 1 +articl 1 +confer 1 +workshop 1 +paper 1 +graduat 1 +student 1 +soha 1 +hassoun 1 +neil 1 +mckenzi 1 +darren 1 +cronquist 1 +paul 1 +franklin 1 +amara 1 +galleryelan 1 +galleryebel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..0e40b772 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,39 @@ +steve 1 +hanksunivers 1 +washingtondepart 1 +comput 1 +scienc 1 +design 1 +agent 1 +architecturesai 1 +magazin 1 +seriou 1 +link 1 +home 1 +page 1 +spring 1 +uncertainti 1 +confer 1 +inform 1 +group 1 +seattl 1 +restaur 1 +symphoni 1 +schedul 1 +wine 1 +opera 1 +server 1 +edita 1 +gruberova 1 +photo 1 +carlo 1 +maria 1 +giulini 1 +discographi 1 +sumac 1 +tenni 1 +new 1 +hank 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..74ae7e47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,71 @@ +titl 1 +alistair 1 +holden 1 +professor 1 +electr 1 +engin 1 +origin 1 +highland 1 +scotland 1 +receivedhi 1 +degre 1 +univers 1 +glasgow 1 +spent 1 +year 1 +withth 1 +british 1 +broadcast 1 +corpor 1 +divis 1 +graduateapprentic 1 +research 1 +yale 1 +edison 1 +fellowship 1 +phddegre 1 +washington 1 +dissert 1 +learningin 1 +artifici 1 +intellig 1 +interest 1 +began 1 +take 1 +coursefrom 1 +colin 1 +cherri 1 +imperi 1 +colleg 1 +london 1 +thebbc 1 +initi 1 +comput 1 +scienc 1 +program 1 +theuw 1 +time 1 +group 1 +faculti 1 +mostli 1 +math 1 +departmentsform 1 +within 1 +graduat 1 +school 1 +current 1 +work 1 +applic 1 +knowledg 1 +base 1 +system 1 +verif 1 +expert 1 +integr 1 +symbol 1 +neural 1 +netmethodolog 1 +speech 1 +understand 1 +aid 1 +design 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..18427c21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,116 @@ +dick 1 +karprichard 1 +karpprofessor 1 +ofcomputersci 1 +engin 1 +andadjunct 1 +professor 1 +ofmolecularbiotechnologyunivers 1 +ofwashington 1 +karp 1 +washington 1 +eduaward 1 +membershipsn 1 +medal 1 +scienc 1 +babbag 1 +prize 1 +berkelei 1 +univers 1 +fellow 1 +ture 1 +award 1 +member 1 +nation 1 +academi 1 +sciencesmemb 1 +engineeringfellow 1 +american 1 +art 1 +sciencesfellow 1 +associ 1 +advanc 1 +sciencedistinguish 1 +teach 1 +academ 1 +senat 1 +class 1 +chair 1 +berkeleylanchest 1 +oper 1 +research 1 +societi 1 +america 1 +institut 1 +manag 1 +fulkerson 1 +mathemat 1 +program 1 +john 1 +neumann 1 +theori 1 +faculti 1 +lectur 1 +hermann 1 +weyl 1 +studi 1 +industri 1 +appliedmathemat 1 +miller 1 +honorari 1 +doctor 1 +georgetown 1 +massachusett 1 +technion 1 +pennsylvania 1 +advisori 1 +board 1 +comput 1 +profession 1 +forsoci 1 +respons 1 +presentmemb 1 +governor 1 +weizmann 1 +truste 1 +intern 1 +scienceinstitut 1 +presentselect 1 +public 1 +combinator 1 +complex 1 +random 1 +turingaward 1 +commun 1 +construct 1 +perfect 1 +match 1 +upfal 1 +wigderson 1 +combinatorica 1 +probabilist 1 +analysi 1 +partit 1 +algorithm 1 +travel 1 +salesman 1 +problem 1 +plane 1 +ofoper 1 +theoret 1 +improv 1 +effici 1 +fornetwork 1 +flow 1 +edmond 1 +journal 1 +theacm 1 +reduc 1 +among 1 +combinatori 1 +plenum 1 +press 1 +minimum 1 +spanningtre 1 +part 1 +held 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..ce96ee93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,250 @@ +lazowska 1 +ahalf 1 +centuri 1 +exponentialprogress 1 +inform 1 +technolog 1 +univers 1 +washington 1 +annualfaculti 1 +lectur 1 +vicepresid 1 +gore 1 +speech 1 +eniac 1 +thanniversari 1 +celebr 1 +congress 1 +talk 1 +like 1 +georgejetson 1 +support 1 +polici 1 +appropri 1 +forfr 1 +flintston 1 +universityof 1 +california 1 +berkelei 1 +invent 1 +chines 1 +cook 1 +nathanmyhrvold 1 +joinsedlazowska 1 +theuwcs 1 +faculti 1 +trip 1 +memori 1 +lane 1 +professor 1 +chair 1 +thedepart 1 +comput 1 +scienc 1 +mostlywearsti 1 +push 1 +paper 1 +rack 1 +frequent 1 +flier 1 +mile 1 +graduat 1 +student 1 +seem 1 +pick 1 +onthi 1 +mbquicktim 1 +healso 1 +host 1 +lot 1 +visitor 1 +surpris 1 +number 1 +havefunnynos 1 +member 1 +board 1 +director 1 +researchassoci 1 +includ 1 +essenti 1 +allgradu 1 +depart 1 +industri 1 +research 1 +laboratoriesin 1 +field 1 +ofcra 1 +govern 1 +affair 1 +committe 1 +serv 1 +nation 1 +foundat 1 +advisorycommitte 1 +andengin 1 +council 1 +scomputersci 1 +telecommun 1 +person 1 +technic 1 +advisori 1 +formicrosoft 1 +personnationalsemiconductor 1 +system 1 +academicadvisori 1 +ofdata 1 +corpor 1 +scientif 1 +forcabl 1 +hows 1 +ventur 1 +cascadia 1 +fund 1 +belong 1 +stand 1 +committeesfor 1 +eecsat 1 +andth 1 +atstanford 1 +virginia 1 +hongkong 1 +ture 1 +award 1 +select 1 +complet 1 +servic 1 +councilpanel 1 +review 1 +multi 1 +agencyhigh 1 +perform 1 +computingand 1 +commun 1 +program 1 +brook 1 +sutherland 1 +andha 1 +recent 1 +examinersfor 1 +record 1 +examin 1 +test 1 +sigmetr 1 +associ 1 +machineri 1 +sspecial 1 +interest 1 +group 1 +concern 1 +softwar 1 +chairof 1 +symposium 1 +oper 1 +principl 1 +andeditor 1 +ieee 1 +transact 1 +addit 1 +servinga 1 +engin 1 +onacadem 1 +thecommitte 1 +deanship 1 +colleg 1 +artsand 1 +forth 1 +molecular 1 +biotechnolog 1 +amemb 1 +deanof 1 +deliv 1 +theunivers 1 +annual 1 +fellowof 1 +associationfor 1 +theinstitut 1 +electr 1 +andelectron 1 +seventeenph 1 +studentshav 1 +degre 1 +work 1 +miscellan 1 +link 1 +integratedoverview 1 +region 1 +also 1 +apersuas 1 +player 1 +version 1 +intend 1 +forloc 1 +consumpt 1 +theimpact 1 +perspect 1 +uwcs 1 +profession 1 +master 1 +persuas 1 +playertopten 1 +reason 1 +major 1 +csebuild 1 +project 1 +abbrevi 1 +cvcomputingresearch 1 +drive 1 +informationtechnolog 1 +forwardmassi 1 +goldmanreport 1 +alleg 1 +cseph 1 +product 1 +issu 1 +flaw 1 +data 1 +medianyear 1 +confer 1 +boardstudi 1 +doctor 1 +think 1 +driver 1 +highwai 1 +saturdayseminar 1 +novemb 1 +testimonyto 1 +houseappropri 1 +april 1 +hous 1 +hpcc 1 +octob 1 +februari 1 +interestinghom 1 +page 1 +sometim 1 +demo 1 +purpos 1 +odeto 1 +execut 1 +vice 1 +presid 1 +tallman 1 +trask 1 +departsfor 1 +duke 1 +lanelazowska 1 +down 1 +famili 1 +home 1 +pagedirect 1 +houseshilshol 1 +aquat 1 +club 1 +pagerec 1 +discoveredreview 1 +grade 1 +poetryfing 1 +scheduleinform 1 +offic 1 +reflector 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..2eabf430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,94 @@ +titl 1 +alan 1 +shaw 1 +professor 1 +graduat 1 +bachelor 1 +sdegre 1 +engin 1 +physic 1 +univers 1 +toronto 1 +amast 1 +mathemat 1 +stanford 1 +incomput 1 +scienc 1 +addit 1 +facultyappoint 1 +washington 1 +start 1 +hasbeen 1 +assist 1 +comput 1 +cornel 1 +visit 1 +fulbright 1 +research 1 +scholar 1 +pari 1 +guest 1 +informat 1 +zurich 1 +associ 1 +atth 1 +linear 1 +acceler 1 +center 1 +system 1 +theibm 1 +corpor 1 +current 1 +interest 1 +real 1 +time 1 +softwar 1 +specif 1 +method 1 +publicationsinclud 1 +textbook 1 +oper 1 +book 1 +softwareengin 1 +introductori 1 +text 1 +andan 1 +edit 1 +document 1 +prepar 1 +serv 1 +memberof 1 +editori 1 +committe 1 +member 1 +sciencescreen 1 +award 1 +associateeditor 1 +journal 1 +editor 1 +ieee 1 +transact 1 +among 1 +thing 1 +supervis 1 +mani 1 +these 1 +project 1 +fifteen 1 +dissert 1 +includ 1 +distinguish 1 +half 1 +former 1 +student 1 +academ 1 +posit 1 +work 1 +live 1 +hobbi 1 +good 1 +food 1 +trumpet 1 +hike 1 +bike 1 +tenni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..1162247c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,97 @@ +titl 1 +lawrenc 1 +snyder 1 +professor 1 +receiv 1 +bachelor 1 +sdegre 1 +univers 1 +iowa 1 +mathemat 1 +econom 1 +andin 1 +carnegi 1 +mellon 1 +computersci 1 +visit 1 +scholar 1 +washington 1 +join 1 +faculti 1 +perman 1 +serv 1 +onth 1 +yale 1 +purdu 1 +scholarat 1 +harvard 1 +research 1 +rang 1 +proof 1 +theundecid 1 +properti 1 +program 1 +design 1 +developmentof 1 +singl 1 +chip 1 +cmo 1 +microprocessor 1 +quarter 1 +hors 1 +hecreat 1 +configur 1 +highli 1 +parallel 1 +architectur 1 +thepok 1 +environ 1 +inventor 1 +chaoticrout 1 +follow 1 +complet 1 +blue 1 +project 1 +nowprincip 1 +investig 1 +orca 1 +nwli 1 +associ 1 +editor 1 +journal 1 +computerand 1 +system 1 +scienc 1 +ofth 1 +area 1 +ieee 1 +transact 1 +anddistribut 1 +nation 1 +foundationadvisori 1 +committe 1 +divis 1 +comput 1 +particip 1 +numer 1 +advisorycommitte 1 +futur 1 +direct 1 +andcomput 1 +polici 1 +distinguish 1 +doctoraldissert 1 +award 1 +select 1 +chair 1 +first 1 +symposium 1 +algorithmsand 1 +addit 1 +dozen 1 +student 1 +doctor 1 +degreesund 1 +guid 1 +master 1 +seniorproject 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..79a55410 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,81 @@ +arun 1 +somani 1 +professor 1 +electr 1 +engin 1 +earn 1 +msee 1 +degre 1 +mcgill 1 +univers 1 +montreal 1 +canada 1 +respect 1 +prior 1 +work 1 +scientif 1 +offic 1 +govt 1 +india 1 +delhi 1 +period 1 +design 1 +develop 1 +anti 1 +submarin 1 +warfar 1 +system 1 +indian 1 +navi 1 +research 1 +interest 1 +area 1 +offault 1 +toler 1 +comput 1 +interconnect 1 +network 1 +architectur 1 +parallel 1 +algorithm 1 +current 1 +involv 1 +three 1 +major 1 +project 1 +high 1 +integr 1 +address 1 +issu 1 +relat 1 +tocach 1 +memori 1 +redund 1 +evalu 1 +tool 1 +congest 1 +control 1 +fault 1 +broadband 1 +proteu 1 +multiprocessor 1 +autom 1 +classif 1 +object 1 +base 1 +generalizedenhanc 1 +hypercub 1 +reconfigur 1 +explor 1 +coars 1 +grain 1 +like 1 +cook 1 +food 1 +hike 1 +plai 1 +bridg 1 +tabl 1 +tenni 1 +inform 1 +dpcnl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..390fbe06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,173 @@ +steven 1 +tanimoto 1 +home 1 +page 1 +professor 1 +comput 1 +scienc 1 +engin 1 +adjunct 1 +electricalengin 1 +receiv 1 +degre 1 +fromharvard 1 +princeton 1 +join 1 +theunivers 1 +washington 1 +faculti 1 +year 1 +teach 1 +atth 1 +univers 1 +connecticut 1 +visit 1 +professorat 1 +institut 1 +programm 1 +pari 1 +franc 1 +anda 1 +scholar 1 +linkp 1 +sweden 1 +sinc 1 +hasalso 1 +serv 1 +member 1 +depart 1 +scientist 1 +atkob 1 +japan 1 +think 1 +machin 1 +corpor 1 +cambridg 1 +massachusett 1 +june 1 +theinstitut 1 +research 1 +enseign 1 +superieur 1 +techniquesd 1 +electroniqu 1 +irest 1 +nant 1 +addit 1 +assist 1 +variou 1 +confer 1 +hasrec 1 +forimag 1 +devot 1 +imag 1 +processingand 1 +commun 1 +take 1 +place 1 +bordeaux 1 +interest 1 +includ 1 +analysi 1 +ofimag 1 +particularli 1 +us 1 +parallel 1 +processor 1 +educ 1 +technolog 1 +visual 1 +program 1 +artifici 1 +intellig 1 +currentlydirect 1 +sponsor 1 +project 1 +mathemat 1 +experi 1 +throughimag 1 +process 1 +whose 1 +object 1 +develop 1 +person 1 +softwarethat 1 +motiv 1 +grade 1 +student 1 +studi 1 +written 1 +coauthor 1 +paper 1 +edit 1 +thebook 1 +structur 1 +vision 1 +author 1 +textbook 1 +entitl 1 +element 1 +introductionus 1 +lisp 1 +publish 1 +common 1 +accompanyingsoftwar 1 +current 1 +work 1 +book 1 +subject 1 +ofparallel 1 +organ 1 +ieee 1 +societi 1 +internationalworkshop 1 +languag 1 +held 1 +seattl 1 +serveda 1 +gener 1 +chair 1 +meet 1 +bergen 1 +norwai 1 +also 1 +intern 1 +conferenceon 1 +pattern 1 +recognit 1 +subconfer 1 +programcommitte 1 +numer 1 +patternrecognit 1 +chairman 1 +societyworkshop 1 +architectur 1 +machineintellig 1 +steer 1 +committe 1 +theieee 1 +symposiaon 1 +editorialboard 1 +journal 1 +cvgip 1 +understand 1 +editor 1 +chief 1 +ieeetransact 1 +relat 1 +activ 1 +colleg 1 +engineeringeduc 1 +polici 1 +vice 1 +council 1 +elect 1 +fellow 1 +outsid 1 +steve 1 +enjoi 1 +plai 1 +jazz 1 +andclass 1 +piano 1 +music 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..7162292b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,133 @@ +titl 1 +paul 1 +young 1 +professor 1 +graduateof 1 +antioch 1 +colleg 1 +receiv 1 +hejoin 1 +univers 1 +washington 1 +seventeen 1 +year 1 +atpurdu 1 +earli 1 +faculti 1 +member 1 +inperhap 1 +first 1 +comput 1 +scienc 1 +depart 1 +unit 1 +state 1 +also 1 +nation 1 +foundat 1 +postdoctor 1 +fellow 1 +atstanford 1 +serv 1 +reed 1 +briefli 1 +aschairman 1 +inform 1 +theunivers 1 +mexico 1 +twice 1 +taught 1 +visit 1 +professorin 1 +divis 1 +california 1 +berkelei 1 +becam 1 +associ 1 +dean 1 +research 1 +facil 1 +engin 1 +interest 1 +theoret 1 +emphasi 1 +question 1 +complex 1 +thegener 1 +theori 1 +algorithm 1 +connect 1 +mathematicallog 1 +author 1 +coauthor 1 +paper 1 +area 1 +iscoauthor 1 +graduat 1 +textbook 1 +gener 1 +three 1 +time 1 +program 1 +committe 1 +symposiumon 1 +executivecommitte 1 +nomin 1 +special 1 +interestgroup 1 +sigact 1 +chairmanof 1 +ieee 1 +societi 1 +annualsymposium 1 +foc 1 +hasserv 1 +vice 1 +chairman 1 +stechnic 1 +mathemat 1 +chair 1 +programcommitte 1 +structur 1 +confer 1 +advisorysubcommitte 1 +thiscommitte 1 +ofth 1 +editori 1 +board 1 +issu 1 +control 1 +annal 1 +histori 1 +ofcomput 1 +current 1 +notr 1 +dame 1 +journal 1 +formallog 1 +system 1 +eleven 1 +student 1 +complet 1 +doctor 1 +dissert 1 +underprofessor 1 +direct 1 +sever 1 +gone 1 +dopostdoctor 1 +work 1 +cornel 1 +ofcalifornia 1 +eight 1 +hold 1 +posit 1 +avarieti 1 +chosen 1 +industri 1 +employ 1 +leather 1 +motorcycl 1 +jacket 1 +read 1 +ratherthan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..20d23c33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,69 @@ +titl 1 +john 1 +zahorjan 1 +professor 1 +graduat 1 +frombrown 1 +univers 1 +receiv 1 +oftoronto 1 +presidenti 1 +young 1 +investigatoraward 1 +primari 1 +research 1 +interest 1 +area 1 +schedul 1 +parallelsystem 1 +runtim 1 +support 1 +parallel 1 +comput 1 +applic 1 +supportfor 1 +mobil 1 +current 1 +focu 1 +polici 1 +continu 1 +mediaappl 1 +involv 1 +real 1 +time 1 +audio 1 +video 1 +thegoal 1 +provid 1 +system 1 +interfac 1 +allow 1 +torespond 1 +easili 1 +chang 1 +load 1 +activ 1 +topic 1 +includ 1 +techniqu 1 +parallelizationof 1 +code 1 +written 1 +sequenti 1 +languag 1 +program 1 +exhibit 1 +bothcontrol 1 +data 1 +construct 1 +develop 1 +intend 1 +formobil 1 +platform 1 +editori 1 +board 1 +ieee 1 +transactionson 1 +softwar 1 +engin 1 +survei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..1b3567a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,175 @@ +mathemat 1 +experi 1 +imag 1 +process 1 +metip 1 +project 1 +director 1 +steven 1 +tanimoto 1 +depart 1 +comput 1 +scienc 1 +emphas 1 +practic 1 +us 1 +encourag 1 +discuss 1 +group 1 +learn 1 +exploratori 1 +open 1 +end 1 +goal 1 +digit 1 +help 1 +meet 1 +theseobject 1 +particular 1 +develop 1 +seri 1 +applicationsdesign 1 +allow 1 +student 1 +manipul 1 +choic 1 +materi 1 +intend 1 +enrich 1 +activ 1 +rather 1 +part 1 +astandard 1 +classroom 1 +curriculum 1 +teacher 1 +plai 1 +variou 1 +role 1 +withthes 1 +exampl 1 +catalyz 1 +bylead 1 +theconcept 1 +explor 1 +current 1 +number 1 +program 1 +toexplor 1 +pixel 1 +calcul 1 +click 1 +order 1 +free 1 +copi 1 +todai 1 +warper 1 +transform 1 +programm 1 +applic 1 +primarili 1 +pentium 1 +base 1 +srun 1 +microsoft 1 +window 1 +alsoavail 1 +appl 1 +macintosh 1 +volunt 1 +mathematicsteach 1 +particip 1 +test 1 +experiment 1 +transcriptproject 1 +design 1 +record 1 +keep 1 +framework 1 +willfacilit 1 +storag 1 +person 1 +academicinform 1 +hard 1 +disk 1 +floppi 1 +own 1 +list 1 +peopl 1 +work 1 +themetip 1 +close 1 +relat 1 +involv 1 +studi 1 +ofmultiplay 1 +educ 1 +tointegr 1 +idea 1 +describ 1 +prospect 1 +forth 1 +direct 1 +distribut 1 +databas 1 +collect 1 +user 1 +itsxform 1 +softwar 1 +done 1 +somethingfun 1 +pleas 1 +know 1 +put 1 +version 1 +document 1 +onlin 1 +link 1 +littl 1 +demonstr 1 +xform 1 +beenput 1 +togeth 1 +graduat 1 +took 1 +seminar 1 +winter 1 +environ 1 +integr 1 +witha 1 +subset 1 +common 1 +lisp 1 +offer 1 +technic 1 +essenti 1 +newapproach 1 +teach 1 +fundamentalattract 1 +approach 1 +thecomput 1 +pursuit 1 +creat 1 +neat 1 +visual 1 +effect 1 +digitalimag 1 +portrai 1 +thing 1 +interest 1 +successfulli 1 +instal 1 +would 1 +like 1 +discussteach 1 +contact 1 +support 1 +nation 1 +foundat 1 +undergr 1 +bricker 1 +washington 1 +last 1 +modifi 1 +tuesdai 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..fa72313a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,53 @@ +univers 1 +washington 1 +research 1 +mobil 1 +ubiquit 1 +comput 1 +washingtonher 1 +overview 1 +computingresearch 1 +project 1 +mobisa 1 +inform 1 +system 1 +wireless 1 +environ 1 +infrastructur 1 +handheld 1 +task 1 +graph 1 +manag 1 +applic 1 +cope 1 +resourc 1 +variabl 1 +paper 1 +survei 1 +describ 1 +fundament 1 +challeng 1 +field 1 +program 1 +methodolog 1 +disconnect 1 +operationdistribut 1 +transact 1 +mobilecomput 1 +systemcontact 1 +prof 1 +brian 1 +bershadprof 1 +gaetano 1 +borriellomarc 1 +fiuczynskigeorg 1 +formanprof 1 +hank 1 +levygeoff 1 +voelkerterri 1 +watsonprof 1 +john 1 +zahorjan 1 +last 1 +updat 1 +forman 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..39532608 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,138 @@ +cecil 1 +vortex 1 +projectuw 1 +projectwelcom 1 +home 1 +page 1 +project 1 +conduct 1 +research 1 +program 1 +languag 1 +design 1 +implement 1 +emphasi 1 +issu 1 +object 1 +orient 1 +pure 1 +intend 1 +support 1 +rapidconstruct 1 +high 1 +qualiti 1 +extens 1 +softwar 1 +incorporatesmulti 1 +method 1 +simpl 1 +prototyp 1 +base 1 +model 1 +mechan 1 +tosupport 1 +structur 1 +form 1 +comput 1 +inherit 1 +modul 1 +basedencapsul 1 +flexibl 1 +static 1 +type 1 +system 1 +allowsstat 1 +dynam 1 +code 1 +freeli 1 +optim 1 +compil 1 +infrastructur 1 +forobject 1 +level 1 +target 1 +pureobject 1 +like 1 +hybrid 1 +orientedlanguag 1 +modula 1 +java 1 +current 1 +incorporateshigh 1 +class 1 +analysi 1 +hierachyanalysi 1 +profil 1 +guid 1 +receiv 1 +predict 1 +guidedselect 1 +procedur 1 +special 1 +intraprocedur 1 +messag 1 +split 1 +automat 1 +inlin 1 +closur 1 +analys 1 +also 1 +includ 1 +acollect 1 +standard 1 +commonsubexpress 1 +elimin 1 +dead 1 +assign 1 +vortexcompil 1 +written 1 +entir 1 +initi 1 +beta 1 +releas 1 +currentlyavail 1 +sparc 1 +run 1 +either 1 +suno 1 +solari 1 +send 1 +mail 1 +interest 1 +request 1 +bodi 1 +subscrib 1 +list 1 +ofinterest 1 +parti 1 +inform 1 +obtain 1 +thebeta 1 +recent 1 +finish 1 +technic 1 +report 1 +describ 1 +much 1 +overview 1 +detail 1 +goal 1 +direct 1 +postscript 1 +version 1 +avail 1 +member 1 +past 1 +paper 1 +sampl 1 +peopl 1 +uwcs 1 +intern 1 +document 1 +sourc 1 +relat 1 +pointer 1 +projectslast 1 +updat 1 +august 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..685aa420 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,34 @@ +grail 1 +graphic 1 +imag 1 +laboratorywelcom 1 +home 1 +page 1 +laboratori 1 +theunivers 1 +washington 1 +depart 1 +comput 1 +scienc 1 +engin 1 +inform 1 +peopl 1 +cours 1 +research 1 +project 1 +public 1 +these 1 +softwar 1 +data 1 +cool 1 +neighborhood 1 +univers 1 +seattl 1 +local 1 +interest 1 +disk 1 +usag 1 +polici 1 +comment 1 +mtwong 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..f25b2a0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,71 @@ +chaotic 1 +rout 1 +project 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +washington 1 +seattl 1 +chao 1 +friend 1 +mine 1 +dylan 1 +peopleal 1 +sort 1 +peopl 1 +work 1 +research 1 +paper 1 +technic 1 +report 1 +repositori 1 +papersand 1 +chaoticrout 1 +avail 1 +router 1 +chip 1 +implement 1 +algorithm 1 +hardwar 1 +built 1 +test 1 +micron 1 +cmo 1 +redesign 1 +process 1 +better 1 +perform 1 +simul 1 +power 1 +allsort 1 +network 1 +includ 1 +nice 1 +graphicalfront 1 +standard 1 +present 1 +result 1 +pcrcw 1 +discuss 1 +presentationof 1 +abl 1 +come 1 +upwith 1 +guidelin 1 +group 1 +build 1 +list 1 +thathav 1 +web 1 +describ 1 +interconnect 1 +parallel 1 +commun 1 +workshop 1 +held 1 +univeristi 1 +proceed 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..2f67fa14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,201 @@ +chinook 1 +project 1 +shinook 1 +salmon 1 +larg 1 +oncorhynchu 1 +tshawytscha 1 +pacif 1 +amer 1 +name 1 +tribe 1 +warm 1 +wind 1 +blow 1 +east 1 +rocki 1 +mountain 1 +southerli 1 +west 1 +rare 1 +american 1 +sled 1 +doga 1 +hardwar 1 +softwar 1 +synthesi 1 +toolfor 1 +real 1 +time 1 +embed 1 +system 1 +cadtool 1 +design 1 +control 1 +domin 1 +reactivesystem 1 +constraint 1 +map 1 +behavior 1 +descriptionto 1 +user 1 +target 1 +architectur 1 +fill 1 +detail 1 +neededto 1 +build 1 +complet 1 +enabl 1 +make 1 +inform 1 +designdecis 1 +high 1 +level 1 +earli 1 +cycl 1 +rather 1 +reiterateaft 1 +work 1 +retarget 1 +willnot 1 +maintain 1 +also 1 +designerto 1 +take 1 +advantag 1 +technolog 1 +instead 1 +ti 1 +legacycod 1 +current 1 +activ 1 +develop 1 +gener 1 +currentlyw 1 +interprocessorcommun 1 +effici 1 +accur 1 +simul 1 +moredetail 1 +becom 1 +avail 1 +shortli 1 +character 1 +follow 1 +meet 1 +ratherthan 1 +try 1 +maxim 1 +averag 1 +perform 1 +util 1 +assumesmanu 1 +partit 1 +believ 1 +issu 1 +intricateand 1 +sometim 1 +even 1 +technic 1 +want 1 +evalu 1 +differentarchitectur 1 +forc 1 +singl 1 +fix 1 +processor 1 +asicarchitectur 1 +synthes 1 +reli 1 +onoff 1 +shelf 1 +kernel 1 +discourag 1 +first 1 +version 1 +tool 1 +oper 1 +innovemb 1 +shownat 1 +autom 1 +confer 1 +diego 1 +june 1 +mainfeatur 1 +includ 1 +automat 1 +connect 1 +peripheraldevic 1 +sequenti 1 +code 1 +concurr 1 +descript 1 +andsynthes 1 +devic 1 +driver 1 +input 1 +verilog 1 +output 1 +hardwarenetlist 1 +need 1 +compon 1 +togeth 1 +softwareprogram 1 +main 1 +topic 1 +interfacingproblem 1 +schedul 1 +timingconstraint 1 +function 1 +improv 1 +demonstr 1 +nato 1 +summer 1 +school 1 +swcodedesign 1 +tremezzo 1 +itali 1 +incorpor 1 +severalmor 1 +interfac 1 +techniqu 1 +memori 1 +moreeffici 1 +chinookersfacultygaetano 1 +borriellogradu 1 +student 1 +chou 1 +ross 1 +ortegaken 1 +hinesian 1 +macduff 1 +recent 1 +selizabeth 1 +walkupscott 1 +hauck 1 +henrik 1 +hulgaardstafflarri 1 +mcmurchielist 1 +paperschinook 1 +sponsorsarpa 1 +contract 1 +nation 1 +scienc 1 +foundat 1 +grant 1 +graduat 1 +fellowship 1 +walkup 1 +patricia 1 +robert 1 +harri 1 +ortega 1 +link 1 +depart 1 +comput 1 +engin 1 +universityof 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..c1fb1b12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,135 @@ +emerald 1 +projectid 1 +develop 1 +phase 1 +fpga 1 +architectur 1 +would 1 +makeus 1 +reliabl 1 +map 1 +tool 1 +produc 1 +accur 1 +performanceevalu 1 +propos 1 +design 1 +unfortun 1 +given 1 +quickproduct 1 +time 1 +frame 1 +face 1 +construct 1 +isoften 1 +postpon 1 +mani 1 +featur 1 +beenfrozen 1 +satisfi 1 +need 1 +fast 1 +prototyp 1 +havedesign 1 +power 1 +driven 1 +system 1 +quickdevelop 1 +heart 1 +provid 1 +basicfeatur 1 +logic 1 +block 1 +analysi 1 +synthesisand 1 +technolog 1 +global 1 +placement 1 +partit 1 +anddetail 1 +rout 1 +environ 1 +aneffici 1 +thoroughli 1 +specifi 1 +blockarchitectur 1 +well 1 +specif 1 +metric 1 +tailorplac 1 +moreov 1 +parameter 1 +schematicspecif 1 +allow 1 +variat 1 +quickli 1 +capturedand 1 +evalu 1 +public 1 +document 1 +contain 1 +page 1 +includ 1 +byth 1 +contribut 1 +author 1 +mean 1 +ensur 1 +dissemin 1 +ofscholarli 1 +technic 1 +work 1 +commerci 1 +basi 1 +copyright 1 +andal 1 +right 1 +therein 1 +maintain 1 +copyrighthold 1 +notwithstand 1 +offer 1 +hereelectron 1 +understood 1 +person 1 +copi 1 +thisinform 1 +adher 1 +term 1 +constraint 1 +invok 1 +eachauthor 1 +repost 1 +without 1 +theexplicit 1 +permiss 1 +holder 1 +definit 1 +paper 1 +darren 1 +cronquist 1 +larri 1 +mcmurchi 1 +compil 1 +appear 1 +proceed 1 +sigda 1 +fourth 1 +intern 1 +symposium 1 +field 1 +programm 1 +gate 1 +arrai 1 +februari 1 +router 1 +us 1 +emeraldlarri 1 +carl 1 +ebel 1 +pathfind 1 +negoti 1 +basedperform 1 +third 1 +arraysaid 1 +research 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..e3dedb3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,133 @@ +northwest 1 +laboratori 1 +integr 1 +system 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +usath 1 +engag 1 +larg 1 +scale 1 +vlsi 1 +aid 1 +design 1 +research 1 +develop 1 +educ 1 +sinc 1 +late 1 +todai 1 +focu 1 +wide 1 +varieti 1 +architectur 1 +embed 1 +sytem 1 +current 1 +project 1 +asynchron 1 +circuit 1 +verificationtim 1 +separ 1 +event 1 +specif 1 +synthesi 1 +verif 1 +time 1 +survei 1 +methodolog 1 +well 1 +first 1 +fpga 1 +rapid 1 +prototypingtriptych 1 +montag 1 +triptych 1 +improv 1 +densiti 1 +commerci 1 +multi 1 +prototyp 1 +springbok 1 +board 1 +level 1 +partit 1 +assign 1 +rout 1 +topolog 1 +work 1 +gener 1 +emerald 1 +adapt 1 +toolset 1 +complet 1 +map 1 +placement 1 +toolscan 1 +automat 1 +descript 1 +fpgaarchitectur 1 +metric 1 +incorporatedinto 1 +variou 1 +tool 1 +result 1 +systemsth 1 +chinook 1 +hardwar 1 +softwar 1 +simul 1 +applic 1 +perform 1 +optim 1 +synchron 1 +circuitsretim 1 +clock 1 +effici 1 +algorithm 1 +retim 1 +uselevel 1 +sensit 1 +latch 1 +reduc 1 +cost 1 +andincreas 1 +toler 1 +skew 1 +method 1 +synchronouscircuit 1 +latenc 1 +feedback 1 +contraint 1 +network 1 +routerth 1 +chaoticrout 1 +self 1 +tune 1 +systemsself 1 +direct 1 +kehlprevi 1 +gemini 1 +valid 1 +layout 1 +compar 1 +implement 1 +mactest 1 +digit 1 +function 1 +tester 1 +chip 1 +cmo 1 +voltag 1 +arpa 1 +reportsarpa 1 +bluebook 1 +paragraph 1 +overview 1 +accomplish 1 +high 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..bd295cb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,61 @@ +softwar 1 +safeti 1 +univers 1 +washington 1 +plai 1 +increasingli 1 +import 1 +role 1 +system 1 +nuclear 1 +reactor 1 +aircraft 1 +defenc 1 +space 1 +chemic 1 +plant 1 +medic 1 +equip 1 +consequ 1 +malfunct 1 +critic 1 +must 1 +pass 1 +rigor 1 +test 1 +review 1 +us 1 +although 1 +engin 1 +techniqu 1 +exist 1 +decad 1 +appli 1 +contain 1 +digit 1 +comput 1 +goal 1 +project 1 +develop 1 +theoret 1 +foundat 1 +methodolog 1 +build 1 +built 1 +upon 1 +safewar 1 +nanci 1 +leveson 1 +summar 1 +issu 1 +involv 1 +lai 1 +work 1 +analysi 1 +support 1 +prototyp 1 +tool 1 +valid 1 +specif 1 +scienc 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..37998daf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,169 @@ +internet 1 +softbotinternet 1 +softbotth 1 +softbot 1 +finalist 1 +discoveraward 1 +technolog 1 +innov 1 +comput 1 +softwar 1 +build 1 +autonom 1 +agent 1 +interact 1 +real 1 +world 1 +softwareenviron 1 +oper 1 +system 1 +databas 1 +pragmaticallyconveni 1 +intellectu 1 +challeng 1 +substrat 1 +research 1 +support 1 +claim 1 +util 1 +plan 1 +machin 1 +learningtechniqu 1 +develop 1 +robot 1 +acustomiz 1 +moder 1 +intellig 1 +assist 1 +internetaccess 1 +accept 1 +goal 1 +high 1 +level 1 +languag 1 +generatesand 1 +execut 1 +achiev 1 +learn 1 +itsexperi 1 +enabl 1 +human 1 +user 1 +state 1 +want 1 +accomplish 1 +disambigu 1 +requestand 1 +dynam 1 +determin 1 +satisfyit 1 +us 1 +unix 1 +shell 1 +wide 1 +interactwith 1 +rang 1 +resourc 1 +take 1 +tour 1 +sgraphic 1 +interfac 1 +princip 1 +investig 1 +oren 1 +etzioni 1 +daniel 1 +weld 1 +also 1 +check 1 +metacrawl 1 +field 1 +servic 1 +tosearch 1 +multipl 1 +indic 1 +parallel 1 +provid 1 +sophisticatedprun 1 +option 1 +inform 1 +contact 1 +washington 1 +access 1 +introduct 1 +project 1 +found 1 +base 1 +cacm 1 +juli 1 +methodolog 1 +motiv 1 +without 1 +repli 1 +brook 1 +magazin 1 +decemb 1 +technic 1 +paper 1 +cartoonrepresent 1 +taken 1 +blanchard 1 +articl 1 +appear 1 +issu 1 +ofcolumn 1 +univers 1 +alumni 1 +group 1 +current 1 +graphic 1 +toth 1 +allow 1 +easili 1 +specifi 1 +extend 1 +maintain 1 +xiiplann 1 +keith 1 +golden 1 +work 1 +specif 1 +search 1 +control 1 +implement 1 +advanc 1 +space 1 +browser 1 +debug 1 +planner 1 +dave 1 +christianson 1 +compar 1 +rule 1 +versu 1 +procedur 1 +sujai 1 +parekh 1 +ilalearn 1 +design 1 +protocol 1 +multi 1 +collabor 1 +negoti 1 +ying 1 +experi 1 +reactiv 1 +domain 1 +kwok 1 +goan 1 +optim 1 +ingram 1 +gather 1 +hacker 1 +info 1 +local 1 +back 1 +home 1 +page 1 +mike 1 +perkowitz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..9427d636 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,213 @@ +spin 1 +washington 1 +oper 1 +systemspin 1 +extens 1 +system 1 +kernel 1 +thatsupport 1 +dynam 1 +adapt 1 +interfac 1 +andimplement 1 +direct 1 +applic 1 +control 1 +stillmaintain 1 +integr 1 +inter 1 +isol 1 +allow 1 +load 1 +code 1 +atruntim 1 +run 1 +accesshardwar 1 +servic 1 +almost 1 +nooverhead 1 +basic 1 +procedur 1 +call 1 +data 1 +pass 1 +byrefer 1 +rather 1 +copi 1 +providesa 1 +core 1 +capabl 1 +resourc 1 +thesear 1 +us 1 +collect 1 +implement 1 +gener 1 +systemservic 1 +address 1 +space 1 +thread 1 +network 1 +allextens 1 +written 1 +typesaf 1 +languag 1 +modula 1 +properti 1 +oftypesafeti 1 +prevent 1 +crash 1 +attemptingto 1 +manipul 1 +arbitrari 1 +piec 1 +alpha 1 +workstat 1 +writeboth 1 +special 1 +video 1 +wella 1 +support 1 +purpos 1 +unix 1 +program 1 +connect 1 +machinerun 1 +server 1 +quick 1 +result 1 +structur 1 +extend 1 +withlow 1 +overhead 1 +exampl 1 +handl 1 +recov 1 +page 1 +fault 1 +microsecond 1 +take 1 +creat 1 +executeit 1 +termin 1 +synchron 1 +fork 1 +join 1 +protectedprocedur 1 +anoth 1 +function 1 +cross 1 +machin 1 +overethernet 1 +realli 1 +oldadapt 1 +less 1 +operationsund 1 +mach 1 +time 1 +longer 1 +samehardwar 1 +benchmark 1 +saveyourself 1 +effort 1 +recent 1 +report 1 +paper 1 +bind 1 +invoc 1 +mechan 1 +provid 1 +flexibl 1 +effici 1 +andsimpl 1 +execut 1 +appear 1 +osdi 1 +safeti 1 +perform 1 +design 1 +sosp 1 +protocol 1 +architectur 1 +forappl 1 +specif 1 +usenix 1 +winter 1 +confer 1 +write 1 +describ 1 +experi 1 +build 1 +high 1 +make 1 +clear 1 +distinct 1 +pretti 1 +happi 1 +deal 1 +shortcom 1 +order 1 +safe 1 +link 1 +linker 1 +point 1 +abil 1 +manag 1 +linkabl 1 +namespac 1 +runtim 1 +interposit 1 +intern 1 +commun 1 +facil 1 +show 1 +improv 1 +critic 1 +inform 1 +compil 1 +wait 1 +trail 1 +project 1 +talk 1 +member 1 +interest 1 +bottom 1 +line 1 +arpa 1 +overview 1 +summari 1 +regular 1 +friend 1 +gotten 1 +assist 1 +academia 1 +industri 1 +sai 1 +involv 1 +relat 1 +pointer 1 +peopl 1 +barb 1 +arrow 1 +document 1 +latest 1 +statu 1 +avail 1 +could 1 +qualif 1 +credit 1 +master 1 +degre 1 +fund 1 +raship 1 +posit 1 +undergradu 1 +mascot 1 +encourag 1 +mani 1 +decid 1 +adopt 1 +ourmascot 1 +maintain 1 +brian 1 +bershad 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..a51d95c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,117 @@ +dynam 1 +compil 1 +projectth 1 +projectmor 1 +inform 1 +project 1 +member 1 +paper 1 +relat 1 +projectsuw 1 +section 1 +student 1 +group 1 +webdynam 1 +enabl 1 +optim 1 +base 1 +valu 1 +ofinvari 1 +data 1 +comput 1 +time 1 +us 1 +theserun 1 +constant 1 +elimin 1 +memoryload 1 +perform 1 +propag 1 +fold 1 +remov 1 +branch 1 +theydetermin 1 +fulli 1 +unrol 1 +loop 1 +bound 1 +howev 1 +performancebenefit 1 +effici 1 +code 1 +offsetbi 1 +cost 1 +approach 1 +dynamiccompil 1 +strive 1 +fast 1 +high 1 +qualitydynam 1 +programm 1 +annot 1 +region 1 +theprogram 1 +static 1 +optimizingcompil 1 +automat 1 +produc 1 +machin 1 +templat 1 +pair 1 +dataflow 1 +analys 1 +identifi 1 +variabl 1 +willb 1 +simpl 1 +copi 1 +thetempl 1 +patch 1 +execut 1 +work 1 +target 1 +gener 1 +purpos 1 +imper 1 +program 1 +languag 1 +initi 1 +initialexperi 1 +appli 1 +producedspeedup 1 +rang 1 +part 1 +spinproject 1 +eventu 1 +system 1 +dynamicallycompil 1 +spin 1 +kernel 1 +exampl 1 +spinev 1 +dispatch 1 +also 1 +activ 1 +explor 1 +otherposs 1 +applic 1 +invirtu 1 +interpret 1 +prototyp 1 +systemi 1 +describ 1 +pldi 1 +arenow 1 +start 1 +design 1 +build 1 +second 1 +wewil 1 +releas 1 +detail 1 +soon 1 +last 1 +updat 1 +august 1 +grant 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..10e54d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,4 @@ +move 1 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..eca52595 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,93 @@ +project 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washington 1 +seattl 1 +info 1 +eduzpl 1 +arrai 1 +base 1 +program 1 +languag 1 +suitabl 1 +would 1 +previous 1 +written 1 +fortran 1 +fast 1 +sequenti 1 +parallel 1 +without 1 +modif 1 +special 1 +direct 1 +machin 1 +independ 1 +recompil 1 +necessari 1 +higher 1 +level 1 +concept 1 +like 1 +border 1 +elimin 1 +error 1 +prone 1 +index 1 +tediou 1 +loop 1 +typic 1 +shorter 1 +easili 1 +understood 1 +modifi 1 +scientist 1 +find 1 +region 1 +shatter 1 +control 1 +flow 1 +conclus 1 +ideal 1 +scientif 1 +walk 1 +small 1 +write 1 +compil 1 +yourmachin 1 +programm 1 +area 1 +shouldconsid 1 +enrol 1 +zpthi 1 +autumn 1 +check 1 +recent 1 +chang 1 +overview 1 +high 1 +minut 1 +introduct 1 +browser 1 +right 1 +paper 1 +manual 1 +relat 1 +detail 1 +line 1 +inform 1 +sampl 1 +peopl 1 +member 1 +horizon 1 +descript 1 +group 1 +futur 1 +acknowledg 1 +list 1 +help 1 +support 1 +work 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..8aa2ff3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,149 @@ +simultan 1 +multithread 1 +home 1 +pagesimultan 1 +projectoverviewpeoplepubl 1 +overviewth 1 +crucial 1 +problem 1 +face 1 +todai 1 +high 1 +speed 1 +microprocessor 1 +maintain 1 +processor 1 +util 1 +long 1 +instruct 1 +memori 1 +latenc 1 +allevi 1 +modern 1 +issu 1 +multipl 1 +cycl 1 +superscalar 1 +interleav 1 +execut 1 +differentthread 1 +differ 1 +ultim 1 +though 1 +techniqu 1 +limit 1 +amount 1 +parallel 1 +avail 1 +within 1 +singl 1 +thread 1 +permit 1 +independ 1 +function 1 +unit 1 +combin 1 +issuefeatur 1 +wide 1 +hide 1 +abilityof 1 +architectur 1 +hardwar 1 +contextsar 1 +activ 1 +compet 1 +resourc 1 +dynam 1 +share 1 +enabl 1 +exploitthread 1 +level 1 +interchang 1 +formsof 1 +effect 1 +us 1 +increas 1 +studi 1 +havedemonstr 1 +significantli 1 +improvesprocessor 1 +throughput 1 +perform 1 +multiprogram 1 +parallelworkload 1 +shown 1 +gain 1 +achievedin 1 +minim 1 +extens 1 +ordersuperscalar 1 +current 1 +futur 1 +work 1 +includ 1 +investig 1 +fast 1 +synchronizationtechniqu 1 +also 1 +conduct 1 +research 1 +otherarchitectur 1 +compil 1 +peoplefaculti 1 +susan 1 +egger 1 +hank 1 +levygradu 1 +student 1 +jack 1 +dean 1 +tullsenindustri 1 +collabor 1 +digit 1 +equip 1 +corpor 1 +joel 1 +emer 1 +rebecca 1 +stamm 1 +public 1 +convert 1 +abstract 1 +postscript 1 +levi 1 +andd 1 +tullsen 1 +submit 1 +juli 1 +exploit 1 +choic 1 +fetch 1 +implement 1 +proceed 1 +annual 1 +intern 1 +symposium 1 +comput 1 +philadelphia 1 +first 1 +suif 1 +workshop 1 +stanford 1 +januari 1 +maxim 1 +chip 1 +andh 1 +santa 1 +margherita 1 +ligur 1 +itali 1 +june 1 +check 1 +list 1 +project 1 +still 1 +doon 1 +affair 1 +page 1 +lojlo 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..cec36258 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,73 @@ +home 1 +pagecomput 1 +scienc 1 +departmentabout 1 +comput 1 +departmentour 1 +depart 1 +form 1 +consist 1 +rank 1 +countri 1 +faculti 1 +member 1 +receiv 1 +fourteen 1 +presidenti 1 +young 1 +investig 1 +award 1 +packard 1 +fellowship 1 +women 1 +scientist 1 +engin 1 +incent 1 +excel 1 +three 1 +doctor 1 +dissert 1 +develop 1 +area 1 +research 1 +project 1 +inform 1 +peopl 1 +cours 1 +offer 1 +fall 1 +class 1 +futur 1 +timet 1 +technic 1 +report 1 +system 1 +answer 1 +frequent 1 +ask 1 +question 1 +alumni 1 +graduat 1 +guidebook 1 +undergradu 1 +annual 1 +onlin 1 +util 1 +madison 1 +local 1 +servic 1 +relat 1 +organ 1 +colophon 1 +statist 1 +server 1 +us 1 +infocomput 1 +departmentunivers 1 +wisconsin 1 +madisona 1 +west 1 +dayton 1 +streetmadison 1 +wisc 1 +voic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..81d1e47a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,114 @@ +object 1 +explor 1 +purpos 1 +viewpoint 1 +control 1 +kyro 1 +kutulako 1 +chuck 1 +dyer 1 +consider 1 +interest 1 +recent 1 +emploi 1 +simpleobserv 1 +behavior 1 +either 1 +make 1 +recoveri 1 +scene 1 +propertieseasi 1 +fixat 1 +combin 1 +simpl 1 +order 1 +toperform 1 +complex 1 +task 1 +navig 1 +obstacl 1 +avoid 1 +ourwork 1 +focus 1 +abil 1 +activ 1 +observ 1 +pointof 1 +perform 1 +involv 1 +develop 1 +provabl 1 +correct 1 +makesimpl 1 +motion 1 +decis 1 +base 1 +local 1 +geometryof 1 +requir 1 +minim 1 +process 1 +imag 1 +first 1 +consid 1 +recov 1 +shape 1 +thesurfac 1 +select 1 +point 1 +approach 1 +generalobserv 1 +posit 1 +provid 1 +inform 1 +objectthan 1 +other 1 +exist 1 +special 1 +beexploit 1 +mobil 1 +effici 1 +anddeterminist 1 +strategi 1 +reach 1 +show 1 +localshap 1 +achiev 1 +us 1 +qualitativestrategi 1 +smoothli 1 +viewingdirect 1 +align 1 +princip 1 +direct 1 +selectedpoint 1 +second 1 +deriv 1 +global 1 +descriptionof 1 +formul 1 +surfac 1 +reconstruct 1 +thequalit 1 +observationso 1 +visibl 1 +slide 1 +maxim 1 +connect 1 +region 1 +arbitrari 1 +smooth 1 +attempt 1 +maintain 1 +well 1 +defin 1 +geometr 1 +relationship 1 +observationand 1 +view 1 +suggest 1 +lead 1 +also 1 +simplifi 1 +frame 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..6acb595c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,99 @@ +deform 1 +contour 1 +model 1 +extract 1 +detect 1 +classif 1 +fung 1 +roland 1 +chin 1 +develop 1 +integr 1 +approach 1 +classifi 1 +directli 1 +noisi 1 +imag 1 +conduct 1 +case 1 +studi 1 +regular 1 +formul 1 +initi 1 +ofact 1 +snake 1 +us 1 +minimax 1 +principl 1 +deriv 1 +criterion 1 +wherebi 1 +valu 1 +automat 1 +implicitli 1 +determin 1 +along 1 +furthermor 1 +energi 1 +function 1 +yield 1 +contain 1 +hough 1 +transform 1 +special 1 +subsequ 1 +consid 1 +problem 1 +arbitrari 1 +combin 1 +stabl 1 +invari 1 +anduniqu 1 +markov 1 +random 1 +field 1 +priordistribut 1 +exert 1 +influenc 1 +global 1 +allow 1 +bayesian 1 +framework 1 +turn 1 +posterior 1 +estim 1 +equival 1 +minim 1 +gener 1 +activ 1 +final 1 +lower 1 +level 1 +visual 1 +task 1 +withpattern 1 +recognit 1 +process 1 +base 1 +nearman 1 +pearson 1 +lemma 1 +optim 1 +classificationtest 1 +summat 1 +peak 1 +practic 1 +applic 1 +small 1 +region 1 +need 1 +margin 1 +distribut 1 +valid 1 +confirm 1 +extens 1 +rigor 1 +experiment 1 +gsnake 1 +softwar 1 +avail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..81390c44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,198 @@ +data 1 +visual 1 +base 1 +lattic 1 +bill 1 +hibbard 1 +brian 1 +paul 1 +chuck 1 +dyer 1 +defin 1 +foundat 1 +theidea 1 +process 1 +function 1 +ofdata 1 +object 1 +call 1 +model 1 +displai 1 +calleda 1 +prototyp 1 +system 1 +adha 1 +implement 1 +upon 1 +idea 1 +guid 1 +follow 1 +principl 1 +scientist 1 +develop 1 +mathemat 1 +natur 1 +objectsrepres 1 +objectsfrequ 1 +contain 1 +infinit 1 +precis 1 +real 1 +number 1 +functionswith 1 +domain 1 +wherea 1 +comput 1 +containfinit 1 +amount 1 +inform 1 +thu 1 +approxim 1 +themathemat 1 +repres 1 +finit 1 +pixel 1 +color 1 +chosenfrom 1 +palett 1 +anim 1 +sequenc 1 +numbersof 1 +frame 1 +ideal 1 +close 1 +relationship 1 +appropri 1 +computationalmodel 1 +program 1 +languag 1 +purpos 1 +commun 1 +informationcont 1 +particular 1 +expressivenesscondit 1 +map 1 +thatdisplai 1 +encod 1 +fact 1 +onlythos 1 +complet 1 +sens 1 +itimpl 1 +satisfyingth 1 +expressivenss 1 +condit 1 +order 1 +relat 1 +onhow 1 +wecan 1 +howprecis 1 +voxelresolut 1 +therefor 1 +visualizationprocess 1 +objectsto 1 +interpret 1 +context 1 +show 1 +satisfi 1 +theexpress 1 +isomorph 1 +scientificdata 1 +built 1 +primitivevari 1 +scalar 1 +time 1 +latitud 1 +radianc 1 +temperatur 1 +tupl 1 +arrai 1 +type 1 +constructor 1 +appropriatefor 1 +seri 1 +containsth 1 +scientif 1 +canalso 1 +displayi 1 +voxel 1 +specifi 1 +graphicsprimit 1 +locationand 1 +size 1 +volum 1 +place 1 +animationsequ 1 +class 1 +fromth 1 +us 1 +primit 1 +variabl 1 +thedisplai 1 +graphic 1 +ofcours 1 +design 1 +alreadi 1 +assum 1 +exampl 1 +given 1 +isnatur 1 +graph 1 +along 1 +axi 1 +andtemperatur 1 +anoth 1 +remark 1 +thing 1 +wedo 1 +take 1 +assumpt 1 +consequ 1 +fundament 1 +calledvi 1 +adthat 1 +allow 1 +experi 1 +algorithm 1 +steer 1 +theircomput 1 +creat 1 +theirprogram 1 +thevi 1 +vvof 1 +thatsatisfi 1 +express 1 +howev 1 +implementationi 1 +quit 1 +flow 1 +auser 1 +interfac 1 +control 1 +abstractionof 1 +render 1 +pipelin 1 +user 1 +interfacefor 1 +abstract 1 +ofmap 1 +possibl 1 +recurs 1 +defineddata 1 +complex 1 +link 1 +tree 1 +ingener 1 +datatyp 1 +orient 1 +provid 1 +rigor 1 +help 1 +analyt 1 +altern 1 +usualapproach 1 +construct 1 +bywrit 1 +special 1 +fora 1 +specif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..a7944235 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,52 @@ +condor 1 +project 1 +homepag 1 +object 1 +goal 1 +develop 1 +implement 1 +deploi 1 +evaluatemechan 1 +polici 1 +support 1 +high 1 +throughput 1 +comput 1 +larg 1 +collect 1 +distribut 1 +own 1 +resourc 1 +guid 1 +technologicaland 1 +sociolog 1 +challeng 1 +environ 1 +team 1 +build 1 +softwar 1 +tool 1 +enabl 1 +scientist 1 +engin 1 +increas 1 +introduct 1 +start 1 +research 1 +system 1 +pool 1 +univers 1 +wisconsin 1 +madison 1 +help 1 +page 1 +home 1 +world 1 +mail 1 +list 1 +comment 1 +suggestionscondor 1 +admin 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..72c448f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,9 @@ +next 1 +homepag 1 +peopl 1 +project 1 +last 1 +modifi 1 +septemb 1 +miron 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..848eae46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,170 @@ +coral 1 +databas 1 +projectcor 1 +projectdocu 1 +content 1 +objectiveoverviewreleas 1 +informationse 1 +also 1 +public 1 +relat 1 +coralpeopl 1 +work 1 +coraloth 1 +research 1 +madisonobject 1 +object 1 +project 1 +develop 1 +robust 1 +efficientdeduct 1 +system 1 +investig 1 +variou 1 +applic 1 +domain 1 +sever 1 +algorithm 1 +underli 1 +coralsystem 1 +member 1 +group 1 +durationof 1 +sinc 1 +overview 1 +deduct 1 +support 1 +rich 1 +declar 1 +languag 1 +interfac 1 +allow 1 +combin 1 +declaritiveand 1 +imper 1 +program 1 +queri 1 +supportsgener 1 +horn 1 +claus 1 +augment 1 +complex 1 +term 1 +aggreg 1 +negat 1 +tupl 1 +contain 1 +univers 1 +quantifi 1 +variabl 1 +canb 1 +organ 1 +collect 1 +interact 1 +modul 1 +coralimplement 1 +wide 1 +rang 1 +evalu 1 +strategi 1 +andautomat 1 +choos 1 +effici 1 +modulein 1 +addit 1 +user 1 +permit 1 +guid 1 +optim 1 +desir 1 +select 1 +among 1 +control 1 +choic 1 +atth 1 +level 1 +provid 1 +construct 1 +updat 1 +insertand 1 +delet 1 +rule 1 +canprogram 1 +extend 1 +withcor 1 +primit 1 +high 1 +degre 1 +extens 1 +allowingc 1 +programm 1 +class 1 +structur 1 +enhanc 1 +coralimplemen 1 +main 1 +memori 1 +disk 1 +resid 1 +data 1 +us 1 +theexodusstorag 1 +manang 1 +transact 1 +manag 1 +aclient 1 +server 1 +environ 1 +releas 1 +inform 1 +current 1 +version 1 +octob 1 +instal 1 +grab 1 +file 1 +want 1 +nobin 1 +sourc 1 +code 1 +requiringy 1 +compil 1 +includ 1 +made 1 +binari 1 +forth 1 +indic 1 +machin 1 +type 1 +click 1 +readm 1 +gener 1 +manual 1 +instruct 1 +hpux 1 +seri 1 +suno 1 +solari 1 +linux 1 +stai 1 +announcemnt 1 +mail 1 +listwhich 1 +reciev 1 +announc 1 +relev 1 +newsgroup 1 +comp 1 +lang 1 +misc 1 +submit 1 +question 1 +comment 1 +report 1 +send 1 +wisc 1 +edulast 1 +modifi 1 +shawn 1 +flisakowski 1 +flisakow 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..f0e7c066 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,232 @@ +complementar 1 +problem 1 +algorithm 1 +applic 1 +neta 1 +result 1 +three 1 +decad 1 +research 1 +subject 1 +divers 1 +engin 1 +econom 1 +scienc 1 +becom 1 +well 1 +establish 1 +fruitfuldisciplin 1 +within 1 +mathemat 1 +program 1 +sever 1 +monograph 1 +survei 1 +document 1 +basic 1 +theori 1 +role 1 +optim 1 +page 1 +serv 1 +center 1 +inform 1 +regard 1 +incomplementar 1 +list 1 +meetingsof 1 +interest 1 +commun 1 +pointer 1 +softwar 1 +forcomplementar 1 +link 1 +area 1 +also 1 +given 1 +tabl 1 +content 1 +directori 1 +relev 1 +researcherssoftwar 1 +mcplib 1 +collect 1 +nonlinear 1 +mix 1 +problemdescript 1 +access 1 +gam 1 +sourc 1 +file 1 +forthes 1 +toolbox 1 +matlab 1 +evolv 1 +freeli 1 +avail 1 +consist 1 +andm 1 +allow 1 +frommatlab 1 +without 1 +give 1 +functionevalu 1 +spars 1 +jacobian 1 +evalu 1 +machin 1 +specificvers 1 +download 1 +path 1 +solver 1 +detail 1 +interfac 1 +describ 1 +librari 1 +routin 1 +areavail 1 +help 1 +hook 1 +model 1 +languag 1 +contact 1 +steve 1 +rutherford 1 +colorado 1 +ferri 1 +wisc 1 +edufor 1 +mile 1 +extens 1 +classicaljosephi 1 +newton 1 +method 1 +solut 1 +linearizedsubproblem 1 +comput 1 +lemk 1 +almost 1 +complementari 1 +pivot 1 +point 1 +us 1 +defineth 1 +direct 1 +dampedlinesearch 1 +merit 1 +function 1 +measur 1 +violat 1 +infeas 1 +emploi 1 +restartprocedur 1 +case 1 +cannot 1 +totermin 1 +secondari 1 +everi 1 +linear 1 +subproblem 1 +rescal 1 +equilibr 1 +elementsappear 1 +data 1 +run 1 +mcpor 1 +directli 1 +appli 1 +techniqu 1 +similarto 1 +smooth 1 +system 1 +anonsmooth 1 +reformul 1 +algorithmconsist 1 +sequenc 1 +major 1 +iter 1 +anapproxim 1 +step 1 +similar 1 +construct 1 +pathto 1 +approxim 1 +aposs 1 +search 1 +exist 1 +thepath 1 +entir 1 +along 1 +partiallycomput 1 +taken 1 +relinear 1 +anonmonoton 1 +watchdog 1 +strategi 1 +avoid 1 +converg 1 +local 1 +minima 1 +norm 1 +forth 1 +underli 1 +nonsmooth 1 +equat 1 +keep 1 +number 1 +requir 1 +small 1 +possibl 1 +option 1 +base 1 +uponreformul 1 +implement 1 +robustnessimprov 1 +proxim 1 +perturb 1 +qpcomp 1 +ishandl 1 +deriv 1 +thenapproxim 1 +solv 1 +leadto 1 +zero 1 +origin 1 +form 1 +theaccuraci 1 +determin 1 +residu 1 +thecurr 1 +subsystem 1 +compar 1 +paper 1 +engineeringand 1 +mani 1 +known 1 +mpsge 1 +preprocessor 1 +thatallow 1 +equilibrium 1 +formul 1 +easili 1 +thegam 1 +home 1 +nemsth 1 +nation 1 +energi 1 +relat 1 +algorithmsand 1 +overview 1 +project 1 +look 1 +michael 1 +trick 1 +oper 1 +interior 1 +pointmethod 1 +argonn 1 +laboratori 1 +archiv 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..2c58a9cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,56 @@ +exodu 1 +project 1 +home 1 +pageexodu 1 +extens 1 +object 1 +orient 1 +databas 1 +system 1 +toolkitnot 1 +document 1 +construct 1 +succed 1 +theshor 1 +still 1 +provid 1 +minim 1 +support 1 +user 1 +theexodu 1 +storag 1 +manag 1 +compil 1 +persistentprogram 1 +languag 1 +avail 1 +wisc 1 +licens 1 +requir 1 +inform 1 +need 1 +contact 1 +eduprincip 1 +investig 1 +mike 1 +carei 1 +david 1 +dewittse 1 +also 1 +public 1 +relat 1 +exodusshor 1 +successor 1 +exoduslatest 1 +compilercontribut 1 +softwar 1 +managera 1 +mail 1 +list 1 +exodus_al 1 +benchmark 1 +oodbsdat 1 +prepar 1 +april 1 +michael 1 +zwill 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..ad303a78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,196 @@ +paradis 1 +projectparadis 1 +parallel 1 +databas 1 +system 1 +applic 1 +document 1 +content 1 +object 1 +client 1 +server 1 +sampl 1 +frontend 1 +contact 1 +informationse 1 +also 1 +public 1 +relat 1 +peopl 1 +work 1 +shore 1 +project 1 +manag 1 +us 1 +extens 1 +optim 1 +report 1 +examin 1 +sequoia 1 +benchmark 1 +script 1 +vldb 1 +paper 1 +inform 1 +madison 1 +research 1 +group 1 +depart 1 +serverobject 1 +design 1 +implement 1 +andevalu 1 +scalabl 1 +geograph 1 +iscap 1 +store 1 +manipul 1 +massiv 1 +data 1 +set 1 +applyingobject 1 +orient 1 +technolog 1 +problem 1 +ofstor 1 +hope 1 +tosignificantli 1 +advanc 1 +size 1 +complex 1 +thatcan 1 +successfulli 1 +brows 1 +queri 1 +databasesystem 1 +aim 1 +handl 1 +type 1 +providesa 1 +graphic 1 +user 1 +interfac 1 +andsupport 1 +subset 1 +issu 1 +paradiseprovid 1 +extend 1 +model 1 +gisappl 1 +addit 1 +support 1 +base 1 +asinteg 1 +real 1 +string 1 +built 1 +raster 1 +polygon 1 +polylin 1 +point 1 +circl 1 +video 1 +mpeg 1 +imag 1 +underli 1 +persist 1 +front 1 +allow 1 +displai 1 +spatialattribut 1 +provid 1 +layer 1 +foroverlap 1 +spatial 1 +attribut 1 +correspond 1 +custom 1 +order 1 +selectingcolor 1 +label 1 +either 1 +withad 1 +issueimplicit 1 +zoom 1 +click 1 +sketch 1 +arubb 1 +band 1 +querycompos 1 +menu 1 +compos 1 +access 1 +databaseschema 1 +assist 1 +composit 1 +result 1 +beview 1 +bedisplai 1 +tabl 1 +tupl 1 +context 1 +sensit 1 +help 1 +sqlwe 1 +ad 1 +abil 1 +invok 1 +method 1 +defin 1 +extendedset 1 +exampl 1 +calcul 1 +area 1 +byus 1 +standarddatabas 1 +oper 1 +includ 1 +creat 1 +drop 1 +anddrop 1 +extent 1 +indic 1 +insert 1 +updat 1 +current 1 +version 1 +emploi 1 +architectur 1 +ship 1 +syntax 1 +paradiseserv 1 +execut 1 +theresult 1 +back 1 +ismulti 1 +thread 1 +multipl 1 +connect 1 +sameserv 1 +sever 1 +carefulattent 1 +paid 1 +insur 1 +could 1 +effici 1 +processqueri 1 +especi 1 +involv 1 +largevolum 1 +frontendeurop 1 +pressher 1 +projectattn 1 +prof 1 +david 1 +dewittunivers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +wisc 1 +edumor 1 +come 1 +biswadeep 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..8d9dc3ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,81 @@ +scout 1 +servic 1 +homepagego 1 +text 1 +versionnewslett 1 +newand 1 +newli 1 +discov 1 +internet 1 +resourc 1 +network 1 +toolsinternet 1 +announc 1 +updat 1 +daili 1 +effectiveinternet 1 +tool 1 +availablea 1 +report 1 +student 1 +studentssurf 1 +smarter 1 +longer 1 +intern 1 +project 1 +universityof 1 +wisconsin 1 +madison 1 +show 1 +best 1 +canchoos 1 +filter 1 +hundr 1 +annoucementseach 1 +week 1 +look 1 +valuabl 1 +onlin 1 +networktool 1 +organ 1 +summar 1 +annot 1 +vefound 1 +offer 1 +commun 1 +sever 1 +us 1 +format 1 +goal 1 +support 1 +effect 1 +byeduc 1 +research 1 +howev 1 +everyon 1 +welcom 1 +useth 1 +public 1 +site 1 +provid 1 +encouragefeedback 1 +suggest 1 +entir 1 +three 1 +primari 1 +includ 1 +happen 1 +thescout 1 +toolkit 1 +ournewest 1 +know 1 +locat 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +comment 1 +feedbackscout 1 +servicesfor 1 +inform 1 +educ 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..1e090001 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,481 @@ +shore 1 +project 1 +home 1 +pageshor 1 +high 1 +perform 1 +scalabl 1 +persist 1 +object 1 +repositorydocu 1 +content 1 +objectiveoverviewreleas 1 +informationmail 1 +listsse 1 +also 1 +version 1 +line 1 +inform 1 +sourc 1 +binari 1 +document 1 +beta 1 +releas 1 +public 1 +relat 1 +shorepeopl 1 +work 1 +shorelatest 1 +research 1 +summari 1 +arpaparadis 1 +built 1 +shoreexodu 1 +predecessor 1 +shoreoo 1 +benchmark 1 +oodbsshor 1 +photo 1 +albumuw 1 +madison 1 +databas 1 +group 1 +depart 1 +serverobject 1 +design 1 +implement 1 +andevalu 1 +system 1 +serv 1 +need 1 +widevarieti 1 +target 1 +applic 1 +includ 1 +hardwar 1 +softwar 1 +cadsystem 1 +program 1 +languag 1 +geograph 1 +informationsystem 1 +satellit 1 +data 1 +repositori 1 +multi 1 +media 1 +expand 1 +basic 1 +capabl 1 +wide 1 +usedexodusstorag 1 +manag 1 +develop 1 +wisconsin 1 +fund 1 +arpa 1 +number 1 +ofwai 1 +support 1 +type 1 +multipl 1 +programminglanguag 1 +unix 1 +like 1 +hierarch 1 +name 1 +space 1 +anda 1 +compat 1 +interfac 1 +text 1 +field 1 +thisinterfac 1 +intend 1 +eas 1 +transit 1 +theunix 1 +file 1 +environ 1 +exist 1 +tool 1 +viand 1 +abl 1 +store 1 +withoutmodif 1 +becom 1 +either 1 +singl 1 +shoreobject 1 +complex 1 +overview 1 +someth 1 +hybrid 1 +natur 1 +inheritingcharacterist 1 +orient 1 +fromfil 1 +section 1 +briefli 1 +describ 1 +featur 1 +ofshor 1 +paper 1 +much 1 +greater 1 +detail 1 +three 1 +major 1 +goal 1 +scalabilitysupport 1 +heterogeneitysupport 1 +base 1 +applicationswhen 1 +began 1 +year 1 +uniqueamong 1 +commerci 1 +oodbm 1 +commun 1 +odmg 1 +effort 1 +concentr 1 +provid 1 +degre 1 +languageheterogen 1 +turn 1 +facilit 1 +heterogen 1 +remain 1 +distinguish 1 +focu 1 +supportfor 1 +depend 1 +persistentstorag 1 +furthermor 1 +sinc 1 +model 1 +basicallycompat 1 +expect 1 +technolog 1 +eventu 1 +betransf 1 +sector 1 +architectureshor 1 +architectur 1 +uniqu 1 +sever 1 +wai 1 +first 1 +us 1 +symmetr 1 +peer 1 +distributedarchitectur 1 +everi 1 +particip 1 +processor 1 +run 1 +ashor 1 +server 1 +process 1 +whether 1 +disksattach 1 +network 1 +workstat 1 +larg 1 +parallel 1 +intel 1 +paragon 1 +contrast 1 +client 1 +architectureus 1 +exodu 1 +vendor 1 +serverarchitectur 1 +fine 1 +typicallyus 1 +second 1 +notionof 1 +valu 1 +ad 1 +structur 1 +runsin 1 +extens 1 +mind 1 +rel 1 +simpl 1 +forus 1 +build 1 +specif 1 +exampl 1 +theparadis 1 +alreadi 1 +nasa 1 +seosdi 1 +feel 1 +piec 1 +plai 1 +aimport 1 +role 1 +varieti 1 +futur 1 +endeavor 1 +digit 1 +librari 1 +almost 1 +certainlydepend 1 +avail 1 +go 1 +retriev 1 +manipul 1 +transmitobject 1 +contain 1 +video 1 +pictur 1 +well 1 +whilecurr 1 +product 1 +could 1 +orientedtoward 1 +deal 1 +gigabyt 1 +terabyt 1 +customiz 1 +equal 1 +import 1 +index 1 +queri 1 +mechan 1 +libraryar 1 +differ 1 +requir 1 +heterogeneityobject 1 +neutraltyp 1 +defin 1 +embodi 1 +enhanc 1 +databasefeatur 1 +bulk 1 +set 1 +list 1 +provis 1 +simplifi 1 +task 1 +ofsupport 1 +make 1 +feasibleto 1 +access 1 +mention 1 +earlier 1 +quit 1 +close 1 +neutral 1 +definit 1 +wasrec 1 +propos 1 +standard 1 +oodb 1 +consortium 1 +term 1 +emphasi 1 +howev 1 +onprovid 1 +inter 1 +share 1 +withina 1 +applicationsa 1 +enabl 1 +currentlyus 1 +untyp 1 +byte 1 +flatten 1 +time 1 +stop 1 +structuredobject 1 +conveni 1 +safe 1 +intra 1 +ultim 1 +hope 1 +displac 1 +orientedfil 1 +servic 1 +standpoint 1 +world 1 +manypersist 1 +flexibl 1 +tree 1 +reachabl 1 +directli 1 +indirectli 1 +give 1 +usersa 1 +familiar 1 +framework 1 +regist 1 +individualpersist 1 +root 1 +oflarg 1 +unnam 1 +anonym 1 +realiz 1 +involvessever 1 +kind 1 +includingdirectori 1 +pool 1 +symbol 1 +link 1 +cross 1 +refer 1 +legaci 1 +unixappl 1 +compil 1 +editor 1 +fromtradit 1 +stream 1 +standardunix 1 +open 1 +read 1 +write 1 +mkdir 1 +chdir 1 +order 1 +callsposs 1 +option 1 +onevari 1 +length 1 +string 1 +charact 1 +attribut 1 +asb 1 +attempt 1 +objectthrough 1 +counterpart 1 +callswil 1 +portion 1 +thatwish 1 +without 1 +possibl 1 +mount 1 +datacontain 1 +feasibl 1 +bothnew 1 +componentof 1 +morestructur 1 +latest 1 +tabl 1 +date 1 +approxim 1 +subject 1 +chang 1 +question 1 +contact 1 +shore_support 1 +wisc 1 +rleas 1 +sept 1 +improv 1 +completeimplement 1 +mani 1 +fix 1 +port 1 +tosolari 1 +linux 1 +august 1 +gzip 1 +sparc 1 +andpentium 1 +solari 1 +found 1 +atftp 1 +mail 1 +liststher 1 +eduand 1 +shore_al 1 +eduthi 1 +reach 1 +team 1 +usebi 1 +user 1 +submit 1 +comment 1 +report 1 +cannot 1 +subscrib 1 +interest 1 +listproc 1 +madisonc 1 +current 1 +unmoder 1 +unlikelyev 1 +get 1 +clutter 1 +junk 1 +moder 1 +messag 1 +mailbox 1 +isalreadi 1 +sign 1 +weekli 1 +digest 1 +belowfor 1 +sentwhen 1 +purpos 1 +notifi 1 +parti 1 +archiv 1 +request 1 +help 1 +default 1 +repli 1 +sent 1 +sender 1 +rather 1 +beingpost 1 +entir 1 +want 1 +yourrepli 1 +copi 1 +thu 1 +anyon 1 +maysubscrib 1 +post 1 +existenceof 1 +shown 1 +return 1 +whenit 1 +yoursubscript 1 +conceal 1 +subscriberscannot 1 +obtain 1 +membership 1 +subscript 1 +must 1 +specialmessag 1 +look 1 +receiv 1 +individu 1 +sendthi 1 +along 1 +send 1 +separ 1 +unsubscrib 1 +messageshould 1 +helplast 1 +modifi 1 +nanci 1 +hall 1 +nhall 1 +footnot 1 +odlshor 1 +concurr 1 +decid 1 +modelidl 1 +start 1 +point 1 +henc 1 +odlar 1 +similar 1 +anoth 1 +stabilizesw 1 +convert 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..d7e3e867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,31 @@ +abhinav 1 +home 1 +page 1 +welcom 1 +gupta 1 +agupta 1 +wisc 1 +construct 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +contact 1 +residenceoffic 1 +kendal 1 +avenu 1 +dayton 1 +street 1 +interest 1 +link 1 +indian 1 +newspap 1 +stuff 1 +sport 1 +finger 1 +find 1 +whereabout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..676e8476 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,13 @@ +alain 1 +home 1 +pagealain 1 +click 1 +larger 1 +pictur 1 +largest 1 +carnivor 1 +ever 1 +live 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..8f1e7ff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,49 @@ +carolyn 1 +allex 1 +home 1 +page 1 +graduat 1 +studentbiotechnolog 1 +train 1 +program 1 +traineecomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +mail 1 +wisc 1 +edutelephon 1 +advisor 1 +professor 1 +jude 1 +shavlikinterest 1 +comput 1 +biologi 1 +sequenc 1 +protein 1 +fold 1 +artifici 1 +intellig 1 +machin 1 +learn 1 +neural 1 +networkseduc 1 +univers 1 +madisonb 1 +purdu 1 +universityb 1 +educ 1 +mankato 1 +state 1 +relat 1 +link 1 +depart 1 +group 1 +research 1 +system 1 +molecular 1 +ismb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..045d9853 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,182 @@ +amir 1 +home 1 +page 1 +roth 1 +delphi 1 +maven 1 +show 1 +erin 1 +madison 1 +wisc 1 +occasionali 1 +updat 1 +copi 1 +resum 1 +cvte 1 +truth 1 +group 1 +seminar 1 +arch 1 +week 1 +research 1 +topic 1 +implement 1 +compil 1 +us 1 +preprocessor 1 +deleg 1 +work 1 +project 1 +partner 1 +set 1 +airport 1 +metal 1 +detector 1 +out 1 +existencei 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +advisor 1 +guri 1 +sohi 1 +look 1 +method 1 +allevi 1 +data 1 +depend 1 +multiscalar 1 +program 1 +distribut 1 +regist 1 +file 1 +side 1 +curli 1 +fri 1 +physic 1 +yale 1 +advanc 1 +degre 1 +nail 1 +design 1 +vallei 1 +beauti 1 +school 1 +interest 1 +much 1 +practic 1 +live 1 +girlfriend 1 +marci 1 +cat 1 +charli 1 +also 1 +went 1 +get 1 +master 1 +public 1 +polici 1 +lafollett 1 +institut 1 +go 1 +presid 1 +meantim 1 +like 1 +solv 1 +linear 1 +regress 1 +problem 1 +wacki 1 +recip 1 +find 1 +magazin 1 +watch 1 +parti 1 +five 1 +eggplant 1 +peopl 1 +think 1 +weird 1 +anywai 1 +promis 1 +subba 1 +officem 1 +daddi 1 +novemb 1 +titanium 1 +screw 1 +desi 1 +relaford 1 +terri 1 +mulholland 1 +oxygen 1 +carbon 1 +dioxid 1 +area 1 +vagu 1 +languag 1 +analysi 1 +super 1 +whack 1 +optim 1 +parallel 1 +algorithm 1 +theori 1 +good 1 +soul 1 +evalu 1 +model 1 +perform 1 +enhanc 1 +three 1 +point 1 +shot 1 +thing 1 +scaryarea 1 +rabid 1 +interestth 1 +love 1 +know 1 +talk 1 +better 1 +leav 1 +never 1 +return 1 +hmmm 1 +handyinformatik 1 +index 1 +journal 1 +author 1 +madcat 1 +architectur 1 +resourc 1 +minut 1 +score 1 +sportslin 1 +philli 1 +everybodi 1 +favorit 1 +engin 1 +ickyth 1 +new 1 +friend 1 +barb 1 +write 1 +articl 1 +gui 1 +read 1 +want 1 +kid 1 +drew 1 +cornel 1 +david 1 +wierd 1 +featur 1 +associ 1 +kemin 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..237c8614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..246b3551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,131 @@ +amo 1 +home 1 +page 1 +word 1 +search 1 +engin 1 +approxim 1 +theori 1 +spline 1 +wavelet 1 +boxsplin 1 +radial 1 +basi 1 +function 1 +shift 1 +invari 1 +space 1 +toscatt 1 +data 1 +multiquadr 1 +thin 1 +plate 1 +splinesthi 1 +netscap 1 +enhanc 1 +homepag 1 +associ 1 +professordepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +wisc 1 +telephon 1 +present 1 +tabl 1 +linksat 1 +item 1 +access 1 +order 1 +download 1 +paperaffin 1 +system 1 +analysi 1 +operatorof 1 +zuowei 1 +shen 1 +choos 1 +follow 1 +version 1 +us 1 +unix 1 +clickher 1 +compress 1 +otherwis 1 +uncompress 1 +fromher 1 +none 1 +work 1 +server 1 +copi 1 +directlyfrom 1 +accounther 1 +handout 1 +email 1 +click 1 +vita 1 +want 1 +list 1 +variou 1 +public 1 +includ 1 +abstract 1 +select 1 +articlesof 1 +mine 1 +inform 1 +anonym 1 +site 1 +carl 1 +boor 1 +maintain 1 +containspostscript 1 +postscript 1 +file 1 +articl 1 +theapproxim 1 +group 1 +also 1 +found 1 +technic 1 +filesconcern 1 +gener 1 +recommend 1 +read 1 +provid 1 +avail 1 +wish 1 +view 1 +line 1 +student 1 +research 1 +andpubl 1 +main 1 +area 1 +interest 1 +togeth 1 +short 1 +summari 1 +futur 1 +goal 1 +activ 1 +univeristi 1 +ofwisconsin 1 +numer 1 +link 1 +peopl 1 +commun 1 +miscellan 1 +topic 1 +final 1 +offici 1 +pleas 1 +deposit 1 +comment 1 +mailbox 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..e587fddd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi 1 +therber 1 +home 1 +pageandi 1 +therberoffic 1 +sphone 1 +email 1 +andyt 1 +wisc 1 +eduzooresumebookmarksapplet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..6714ce4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,18 @@ +ranga 1 +arvind 1 +ranganathan 1 +erstwhil 1 +workplac 1 +present 1 +indiaworld 1 +fascin 1 +world 1 +escher 1 +collect 1 +classic 1 +paper 1 +comput 1 +scienc 1 +finger 1 +log 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..d1711361 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,35 @@ +ashish 1 +home 1 +page 1 +thusoo 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +come 1 +india 1 +hadmi 1 +undergradu 1 +educ 1 +indianinstitut 1 +technolog 1 +delhi 1 +iitd 1 +fantast 1 +place 1 +worth 1 +visit 1 +like 1 +contact 1 +canfing 1 +find 1 +whereabout 1 +altern 1 +send 1 +email 1 +ashisht 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..7f31fb08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,34 @@ +ashraf 1 +aboulnaga 1 +home 1 +pageashraf 1 +aboulnagacomput 1 +scienc 1 +depart 1 +room 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +usaphon 1 +mail 1 +wisc 1 +edueduc 1 +comput 1 +alexandria 1 +egypt 1 +juli 1 +june 1 +info 1 +section 1 +view 1 +grade 1 +offic 1 +hour 1 +desautel 1 +page 1 +last 1 +modifi 1 +septemb 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..157f3ec5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,24 @@ +phil 1 +atkinson 1 +home 1 +page 1 +pageucla 1 +bannon 1 +win 1 +ncaa 1 +basketbal 1 +championship 1 +seattl 1 +gener 1 +infooffic 1 +phone 1 +email 1 +wisc 1 +educurr 1 +researchsailinghors 1 +back 1 +ridingscuba 1 +divingc 1 +hour 1 +tuth 1 +appoint 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..ec599150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,118 @@ +home 1 +page 1 +eric 1 +bach 1 +professor 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +california 1 +berkelei 1 +interest 1 +theoret 1 +number 1 +theori 1 +algebraicalgorithm 1 +complex 1 +cryptographi 1 +string 1 +automata 1 +research 1 +summari 1 +us 1 +effici 1 +solvealgebra 1 +problem 1 +exampl 1 +onetel 1 +digit 1 +prime 1 +without 1 +examin 1 +possiblefactor 1 +intrins 1 +mathemat 1 +well 1 +applic 1 +random 1 +gener 1 +code 1 +forreli 1 +secur 1 +inform 1 +transmiss 1 +algebra 1 +area 1 +also 1 +appli 1 +probabl 1 +designand 1 +analysi 1 +algorithm 1 +larg 1 +iscomposit 1 +prove 1 +simpl 1 +test 1 +auxiliarynumb 1 +call 1 +wit 1 +practic 1 +usual 1 +find 1 +witnessbi 1 +direct 1 +search 1 +among 1 +small 1 +lead 1 +followingnatur 1 +question 1 +least 1 +functionof 1 +recent 1 +work 1 +given 1 +accurateheurist 1 +model 1 +base 1 +probabilist 1 +assumpt 1 +allowsthi 1 +similar 1 +answer 1 +public 1 +improv 1 +approxim 1 +euler 1 +product 1 +proc 1 +cnta 1 +canadian 1 +math 1 +proceed 1 +complet 1 +condon 1 +glaser 1 +tanguai 1 +annual 1 +conf 1 +volum 1 +shallit 1 +press 1 +info 1 +click 1 +curriculum 1 +vita 1 +creat 1 +juli 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..70f164d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,58 @@ +bart 1 +miller 1 +home 1 +page 1 +barton 1 +wisc 1 +professorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usath 1 +follow 1 +list 1 +thing 1 +research 1 +project 1 +paradyn 1 +parallel 1 +perform 1 +tool 1 +fuzz 1 +random 1 +softwar 1 +testingteach 1 +introduct 1 +oper 1 +system 1 +spring 1 +honor 1 +internet 1 +seminar 1 +advanc 1 +fall 1 +distribut 1 +director 1 +undergradu 1 +graduatesprofession 1 +symposium 1 +monona 1 +terrac 1 +frank 1 +lloyd 1 +wright 1 +convent 1 +center 1 +technic 1 +advisori 1 +groupperson 1 +offici 1 +depart 1 +famili 1 +photosbart 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..4be27108 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,44 @@ +hyper 1 +home 1 +page 1 +benjamin 1 +teitelbaum 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +usaben 1 +wisc 1 +edursumquinc 1 +internet 1 +ultim 1 +word 1 +gamezillion 1 +bookmarksspr 1 +schedul 1 +browser 1 +support 1 +tabl 1 +look 1 +like 1 +garbag 1 +click 1 +someth 1 +readabl 1 +mondai 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +fridai 1 +offic 1 +hour 1 +dbseminar 1 +osseminar 1 +condormeet 1 +miron 1 +plseminar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..05ecf167 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,206 @@ +gareth 1 +bestor 1 +home 1 +pagewelcom 1 +pagegareth 1 +dissert 1 +teach 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +telephon 1 +mail 1 +wisc 1 +click 1 +finger 1 +world 1 +wide 1 +http 1 +system 1 +administr 1 +data 1 +program 1 +librari 1 +servic 1 +observatori 1 +drive 1 +dpl 1 +dacc 1 +edueduc 1 +honor 1 +massei 1 +zealand 1 +curriculum 1 +vita 1 +postscript 1 +resum 1 +graduat 1 +coursework 1 +research 1 +titl 1 +structur 1 +motion 1 +invers 1 +project 1 +problem 1 +abstract 1 +import 1 +vision 1 +recov 1 +scene 1 +posit 1 +observ 1 +within 1 +imag 1 +essenti 1 +exist 1 +techniqu 1 +solv 1 +examin 1 +multipl 1 +rigid 1 +howev 1 +us 1 +extens 1 +practic 1 +sensit 1 +nois 1 +accur 1 +model 1 +optic 1 +restrict 1 +call 1 +concurr 1 +projector 1 +make 1 +assumpt 1 +base 1 +instead 1 +camera 1 +tradition 1 +result 1 +algorithm 1 +defin 1 +geometr 1 +transform 1 +dimens 1 +perspect 1 +given 1 +identifi 1 +constrain 1 +specifi 1 +minimum 1 +number 1 +point 1 +requir 1 +also 1 +addit 1 +minim 1 +type 1 +error 1 +occur 1 +real 1 +applic 1 +allow 1 +approxim 1 +intersect 1 +current 1 +appli 1 +robot 1 +navig 1 +explor 1 +determin 1 +unknown 1 +environ 1 +time 1 +advisor 1 +prof 1 +charl 1 +dyer 1 +interest 1 +machin 1 +graphic 1 +virtual 1 +realiti 1 +artifici 1 +intellig 1 +group 1 +learn 1 +duti 1 +spring 1 +introduct 1 +section 1 +fortran 1 +credit 1 +cours 1 +cover 1 +basic 1 +need 1 +prepar 1 +student 1 +elementari 1 +engin 1 +prior 1 +experi 1 +knowledg 1 +assum 1 +materi 1 +enabl 1 +write 1 +simpl 1 +done 1 +intend 1 +receiv 1 +littl 1 +instruct 1 +high 1 +school 1 +taught 1 +entir 1 +languag 1 +primarili 1 +major 1 +page 1 +algebra 1 +construct 1 +least 1 +procedur 1 +orient 1 +pascal 1 +survei 1 +advanc 1 +prereq 1 +mathemat 1 +colleg 1 +work 1 +statist 1 +logic 1 +consent 1 +instructor 1 +open 1 +pointer 1 +wiscinfo 1 +inform 1 +hoofer 1 +out 1 +club 1 +nextstep 1 +next 1 +softwar 1 +start 1 +internet 1 +lyco 1 +search 1 +keyword 1 +copyright 1 +copi 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..31e5b32b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,33 @@ +kevin 1 +beyer 1 +home 1 +pagekevin 1 +beyerbey 1 +wisc 1 +caution 1 +work 1 +graduat 1 +student 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +advisor 1 +raghu 1 +ramakrishnan 1 +area 1 +interest 1 +databas 1 +researchresearch 1 +project 1 +coral 1 +local 1 +cours 1 +inform 1 +undergradu 1 +coursesinstruct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..af6e426b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,62 @@ +todd 1 +bezenek 1 +home 1 +page 1 +toddm 1 +back 1 +introduc 1 +actual 1 +faster 1 +cpu 1 +_great 1 +microprocessor 1 +past 1 +present_ 1 +uregina 1 +bayko 1 +html 1 +window 1 +express 1 +locomot 1 +squeez 1 +skateboard 1 +size 1 +packag 1 +helen 1 +custer 1 +_insid 1 +microsoft 1 +press 1 +current 1 +cours 1 +advanc 1 +oper 1 +system 1 +bart 1 +miller 1 +pithi 1 +pith 1 +consist 1 +abound 1 +take 1 +yeah 1 +point 1 +skew 1 +associ 1 +cach 1 +access 1 +inform 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +madison 1 +offic 1 +phone 1 +mail 1 +wisc 1 +edubezenek 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..b7f3dd39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,54 @@ +nathan 1 +bockrath 1 +graduat 1 +student 1 +teach 1 +assist 1 +averag 1 +pictur 1 +nate 1 +jpeg 1 +send 1 +email 1 +click 1 +wisc 1 +section 1 +held 1 +grade 1 +quiz 1 +review 1 +viru 1 +info 1 +word 1 +macro 1 +make 1 +page 1 +offic 1 +hour 1 +anywai 1 +mondai 1 +wednesdai 1 +schedul 1 +distribut 1 +system 1 +simul 1 +model 1 +support 1 +free 1 +speech 1 +onlin 1 +site 1 +anoth 1 +dai 1 +back 1 +home 1 +pageback 1 +depart 1 +pageoth 1 +neat 1 +stuff 1 +condor 1 +project 1 +internet 1 +oraclesend 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..cb641ccb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,288 @@ +bolobologreet 1 +bolo 1 +although 1 +parent 1 +christen 1 +josef 1 +thoma 1 +burger 1 +roll 1 +wai 1 +call 1 +mebolo 1 +everyon 1 +includ 1 +quit 1 +sure 1 +bestexplan 1 +develop 1 +mani 1 +question 1 +person 1 +defin 1 +bywhat 1 +case 1 +softwar 1 +engin 1 +semi 1 +real 1 +timeoper 1 +system 1 +util 1 +last 1 +year 1 +kernel 1 +hacker 1 +unix 1 +administr 1 +creat 1 +thing 1 +appli 1 +scienc 1 +design 1 +construct 1 +realli 1 +scientist 1 +though 1 +acomput 1 +degre 1 +pai 1 +david 1 +dewitt 1 +shudder 1 +methodolog 1 +hand 1 +right 1 +time 1 +work 1 +architect 1 +implement 1 +newoper 1 +type 1 +often 1 +sameto 1 +forth 1 +woodwork 1 +home 1 +control 1 +draw 1 +brew 1 +beer 1 +complet 1 +relax 1 +sleep 1 +wonder 1 +enough 1 +myroomm 1 +disagre 1 +pursuit 1 +enjoi 1 +fly 1 +read 1 +fiction 1 +comic 1 +book 1 +railroad 1 +prototyp 1 +model 1 +role 1 +plai 1 +game 1 +notic 1 +imag 1 +sublim 1 +stripe 1 +creatur 1 +thetig 1 +appear 1 +throughout 1 +page 1 +tiger 1 +taken 1 +william 1 +blake 1 +poemtyg 1 +tyger 1 +put 1 +word 1 +road 1 +againin 1 +tremend 1 +leap 1 +insan 1 +purchas 1 +ahous 1 +address 1 +place 1 +isjosef 1 +east 1 +gate 1 +roadmonona 1 +voic 1 +number 1 +bore 1 +workwork 1 +drive 1 +banana 1 +us 1 +grung 1 +either 1 +matur 1 +job 1 +chang 1 +perhapssom 1 +seem 1 +like 1 +parallel 1 +comput 1 +everi 1 +othermonth 1 +beat 1 +intosubmiss 1 +everyth 1 +els 1 +moon 1 +andstar 1 +current 1 +follow 1 +project 1 +fordav 1 +world 1 +famou 1 +databas 1 +gamma 1 +relat 1 +queri 1 +interpret 1 +object 1 +store 1 +paradis 1 +geograph 1 +inform 1 +shore 1 +orient 1 +data 1 +wiss 1 +wisconsin 1 +storag 1 +whatev 1 +need 1 +done 1 +whole 1 +occur 1 +thecomput 1 +departmentof 1 +themadison 1 +campusof 1 +univers 1 +campu 1 +locat 1 +madison 1 +peninsula 1 +five 1 +lake 1 +workin 1 +addit 1 +also 1 +consult 1 +provid 1 +solut 1 +rather 1 +advic 1 +technicalexpertis 1 +help 1 +internet 1 +port 1 +newsystem 1 +reviv 1 +oddbal 1 +tasksar 1 +kind 1 +tell 1 +tovisit 1 +serverbut 1 +haven 1 +anyth 1 +mostlyempti 1 +except 1 +friend 1 +activitiesuwvaxi 1 +oper 1 +uwvax 1 +usenet 1 +new 1 +uucp 1 +site 1 +free 1 +that 1 +print 1 +someth 1 +along 1 +line 1 +depart 1 +part 1 +longer 1 +svolunt 1 +master 1 +much 1 +goe 1 +run 1 +howev 1 +try 1 +take 1 +care 1 +reader 1 +across 1 +differentarchitectur 1 +task 1 +organizationsi 1 +member 1 +organ 1 +alwai 1 +agre 1 +oftenhav 1 +good 1 +benefit 1 +usersof 1 +commun 1 +aopa 1 +aircraft 1 +owner 1 +pilot 1 +associ 1 +experiment 1 +usenix 1 +blitz 1 +drinkingwhen 1 +school 1 +hord 1 +friendsand 1 +visit 1 +local 1 +thursdai 1 +night 1 +essen 1 +hau 1 +import 1 +slowli 1 +entir 1 +select 1 +acquaint 1 +becam 1 +loftili 1 +labelledblitz 1 +drink 1 +societi 1 +divers 1 +meet 1 +ofoctoberfest 1 +weekend 1 +chud 1 +accumulateda 1 +short 1 +histori 1 +whatnotof 1 +charad 1 +pagelast 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..65edab27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,47 @@ +brad 1 +home 1 +page 1 +welcom 1 +thayer 1 +homepag 1 +much 1 +mail 1 +link 1 +comput 1 +scienc 1 +intro 1 +network 1 +possibl 1 +advanc 1 +oper 1 +system 1 +would 1 +foolish 1 +neglect 1 +model 1 +interest 1 +thec 1 +us 1 +probabl 1 +bore 1 +check 1 +seminaranywai 1 +aim 1 +beaucoup 1 +boir 1 +pepper 1 +badger 1 +packer 1 +pagesom 1 +search 1 +altavista 1 +enginefind 1 +email 1 +adress 1 +world 1 +wideth 1 +jazz 1 +duan 1 +mclaughlin 1 +pageuw 1 +athlet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..3eee3dd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,64 @@ +scott 1 +breach 1 +home 1 +pagescott 1 +wisc 1 +addresseseducationresearch 1 +interest 1 +public 1 +recreat 1 +associatesaddressesscott 1 +breachdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usatel 1 +educationph 1 +scienc 1 +univers 1 +engin 1 +carnegi 1 +mellon 1 +advisorguri 1 +sohiresearch 1 +interestscomput 1 +architecturemultiscalarpublicationsmultiscalar 1 +processorsgurindar 1 +sohi 1 +vijaykumarnd 1 +intern 1 +symposium 1 +architectur 1 +anatomi 1 +regist 1 +file 1 +multiscalar 1 +processorscott 1 +vijaykumar 1 +gurindar 1 +sohith 1 +microarchitectur 1 +effici 1 +detect 1 +pointer 1 +arrai 1 +access 1 +errorstodd 1 +austin 1 +sohiconfer 1 +program 1 +languag 1 +design 1 +implement 1 +recreationwingsbeersquidtvassociatestodd 1 +austindoug 1 +burgerbabak 1 +falsafialain 1 +kagit 1 +vijaykumarlast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..d2565fe7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,83 @@ +bleed 1 +nontrivi 1 +waysher 1 +temporarili 1 +underst 1 +page 1 +unadorn 1 +provid 1 +section 1 +pizza 1 +pool 1 +brief 1 +hobbi 1 +schedul 1 +spring 1 +stinkin 1 +bookmark 1 +suni 1 +albani 1 +fall 1 +poor 1 +unfortun 1 +name 1 +myclass 1 +hypersensit 1 +rockjock 1 +cretin 1 +brood 1 +glare 1 +clenchesfist 1 +crack 1 +knuckl 1 +tragic 1 +flightyfemm 1 +get 1 +razz 1 +asskick 1 +sinc 1 +thirdgrad 1 +perhap 1 +smooth 1 +skin 1 +hardbodi 1 +leatherboi 1 +leer 1 +atm 1 +whenev 1 +call 1 +roll 1 +differ 1 +make 1 +todayi 1 +giggl 1 +said 1 +becam 1 +aprostitut 1 +societi 1 +bigotri 1 +pedagodi 1 +isaac 1 +theblack 1 +goat 1 +refus 1 +stai 1 +claw 1 +hand 1 +sssuuuhhh 1 +mmuuuhhhh 1 +dddduuuuuhhhhh 1 +mmmmuuuhhhh 1 +maaaahhhjaaaaaahhhhh 1 +fffuuuhhhhh 1 +yyyyyyyuuuuuhhhhh 1 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 1 +uuuhhh 1 +uuummmm 1 +uuuhhhh 1 +wwwwwhhhhuuuuuhhhhh 1 +zhang 1 +wouldn 1 +notic 1 +eggleston 1 +smile 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..6cccba93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,154 @@ +home 1 +page 1 +wisc 1 +assist 1 +professor 1 +comput 1 +sciencedepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usacao 1 +eduphon 1 +department 1 +offic 1 +educ 1 +research 1 +interest 1 +cours 1 +recent 1 +paper 1 +talk 1 +summari 1 +collect 1 +link 1 +princeton 1 +univers 1 +tsinghua 1 +beij 1 +china 1 +oper 1 +system 1 +high 1 +perform 1 +file 1 +memori 1 +resourc 1 +alloc 1 +parallel 1 +project 1 +optim 1 +prefetch 1 +cachingacf 1 +applic 1 +control 1 +cach 1 +topic 1 +distribut 1 +fall 1 +advanc 1 +spring 1 +trace 1 +simul 1 +access 1 +tracesrec 1 +papersintegr 1 +cachingtraci 1 +kimbrel 1 +anna 1 +karlin 1 +felten 1 +depart 1 +tech 1 +report 1 +novemb 1 +shorter 1 +version 1 +proceed 1 +sigmetr 1 +confer 1 +thesi 1 +also 1 +implement 1 +integr 1 +disk 1 +schedulingpei 1 +edward 1 +princetonunivers 1 +appear 1 +toc 1 +studi 1 +strategiespei 1 +peform 1 +first 1 +osdi 1 +symposium 1 +slide 1 +present 1 +polici 1 +usenix 1 +summer 1 +technic 1 +tickertaip 1 +raid 1 +architectur 1 +swee 1 +boon 1 +shivakumar 1 +venkataraman 1 +john 1 +wilk 1 +isca 1 +talksslid 1 +postscript 1 +andpostscript 1 +summarymi 1 +focus 1 +storag 1 +manag 1 +uniprocessor 1 +andparallel 1 +particular 1 +investig 1 +techniqu 1 +improvefil 1 +specif 1 +replac 1 +filecach 1 +aggress 1 +data 1 +havedevelop 1 +kernel 1 +physic 1 +individualappl 1 +respons 1 +decid 1 +useit 1 +us 1 +fairglob 1 +carefulli 1 +cachereplac 1 +schedul 1 +prototyp 1 +implementationon 1 +demonstratedthat 1 +good 1 +chosen 1 +strategi 1 +informationcan 1 +significantli 1 +improv 1 +mani 1 +current 1 +extend 1 +amdevelop 1 +algorithm 1 +diskarrai 1 +addit 1 +global 1 +managementproblem 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..399a6cbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,284 @@ +mike 1 +careymichael 1 +careyprofessor 1 +leav 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +research 1 +staff 1 +member 1 +almaden 1 +center 1 +harri 1 +road 1 +jose 1 +phone 1 +primari 1 +altern 1 +mail 1 +carei 1 +wisc 1 +eduresearch 1 +interestsdatabas 1 +manag 1 +system 1 +parallel 1 +distribut 1 +appli 1 +perform 1 +evalu 1 +interest 1 +main 1 +area 1 +databas 1 +performanceand 1 +next 1 +gener 1 +topicsof 1 +current 1 +includ 1 +tradeoff 1 +techniqu 1 +forobject 1 +orient 1 +design 1 +algorithmsrel 1 +transact 1 +process 1 +schedul 1 +complex 1 +multi 1 +userdatabas 1 +workload 1 +base 1 +user 1 +specifi 1 +goal 1 +involv 1 +theexodu 1 +extens 1 +dbm 1 +project 1 +shore 1 +aimedat 1 +develop 1 +scalabl 1 +repositori 1 +storag 1 +share 1 +persistentobject 1 +heterogen 1 +environ 1 +effort 1 +whichi 1 +build 1 +upon 1 +experi 1 +exodu 1 +meet 1 +objectmanag 1 +need 1 +replac 1 +unix 1 +file 1 +applicationssuch 1 +case 1 +recent 1 +move 1 +academia 1 +industri 1 +twelv 1 +greatyear 1 +part 1 +becom 1 +best 1 +academ 1 +researchgroup 1 +known 1 +time 1 +come 1 +tackl 1 +anddiffer 1 +challeng 1 +work 1 +thesourc 1 +number 1 +paper 1 +teach 1 +student 1 +forth 1 +past 1 +year 1 +relat 1 +object 1 +signific 1 +fraction 1 +spent 1 +rel 1 +projectther 1 +call 1 +garlic 1 +multimediainform 1 +allow 1 +data 1 +live 1 +varieti 1 +tobe 1 +queri 1 +manipul 1 +though 1 +resid 1 +homogen 1 +objectdatabas 1 +sabbat 1 +continuedto 1 +focus 1 +graduat 1 +aqueri 1 +browser 1 +front 1 +tool 1 +pesto 1 +thegarl 1 +locat 1 +public 1 +extend 1 +oodb 1 +access 1 +implement 1 +kiernan 1 +proc 1 +conf 1 +orientedprogram 1 +languag 1 +applic 1 +oopsla 1 +austin 1 +octob 1 +appear 1 +multimedia 1 +multipl 1 +content 1 +codi 1 +haa 1 +niblack 1 +arya 1 +fagin 1 +flickner 1 +petkov 1 +schwarz 1 +thoma 1 +tork 1 +roth 1 +william 1 +wimmer 1 +ifip 1 +confer 1 +visualdatabas 1 +lausann 1 +switzerland 1 +march 1 +toward 1 +inform 1 +garlicapproach 1 +luniewski 1 +and 1 +ieee 1 +workshop 1 +issu 1 +dataengin 1 +ride 1 +taipei 1 +taiwan 1 +statu 1 +report 1 +oodbm 1 +benchmark 1 +withd 1 +dewitt 1 +kant 1 +naughton 1 +onobject 1 +program 1 +portland 1 +autom 1 +tune 1 +brown 1 +mehta 1 +livni 1 +thint 1 +larg 1 +santiago 1 +chile 1 +septemb 1 +make 1 +real 1 +persist 1 +initi 1 +smrc 1 +withb 1 +reinwald 1 +desslock 1 +lehman 1 +pirahesh 1 +srinivasan 1 +tarascon 1 +provenc 1 +franc 1 +franklin 1 +hall 1 +mcauliff 1 +schuh 1 +solomon 1 +tsatalo 1 +white 1 +zwill 1 +sigmodint 1 +minneapoli 1 +fine 1 +grain 1 +page 1 +server 1 +andm 1 +zaharioudaki 1 +sigmod 1 +managementof 1 +memori 1 +pang 1 +accur 1 +model 1 +hybrid 1 +hash 1 +join 1 +algorithm 1 +patel 1 +vernon 1 +sigmetr 1 +measur 1 +modelingof 1 +nashvil 1 +index 1 +multivers 1 +lock 1 +bober 1 +technolog 1 +cambridg 1 +england 1 +client 1 +cach 1 +revisit 1 +indistribut 1 +oszu 1 +dayal 1 +andp 1 +valduriez 1 +morgan 1 +kaufmann 1 +publish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..ea279d3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,30 @@ +chin 1 +tang 1 +home 1 +pagechin 1 +tanggradu 1 +student 1 +univers 1 +wisconsin 1 +depart 1 +west 1 +dayton 1 +streetmadison 1 +offic 1 +bldg 1 +mail 1 +cchin 1 +wisc 1 +edutelephon 1 +current 1 +assign 1 +introduct 1 +data 1 +structur 1 +hour 1 +mondai 1 +tuesdai 1 +fridai 1 +ameduc 1 +biochemistri 1 +madison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..d3b9ae63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,37 @@ +satish 1 +chandra 1 +home 1 +page 1 +wisc 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +interest 1 +summari 1 +public 1 +come 1 +soon 1 +real 1 +stuff 1 +wodehous 1 +book 1 +internet 1 +movi 1 +databas 1 +nostalgia 1 +york 1 +time 1 +altavista 1 +italian 1 +languag 1 +cultur 1 +miscellan 1 +linksclick 1 +log 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..e8670013 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,91 @@ +trishul 1 +chilimbi 1 +home 1 +page 1 +wisc 1 +click 1 +real 1 +megradu 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +laru 1 +interest 1 +program 1 +languag 1 +compil 1 +architectur 1 +parallel 1 +integr 1 +share 1 +memori 1 +messag 1 +pass 1 +perform 1 +analysi 1 +enhanc 1 +visual 1 +designresearch 1 +project 1 +wind 1 +tunneleduc 1 +univers 1 +tech 1 +indian 1 +institut 1 +technolog 1 +bombai 1 +summari 1 +publicationscachi 1 +tool 1 +automat 1 +insert 1 +cico 1 +annot 1 +jame 1 +intern 1 +confer 1 +process 1 +icpp 1 +august 1 +stormwatch 1 +system 1 +protocolstrishul 1 +thoma 1 +ball 1 +stephen 1 +eick 1 +supercomput 1 +appear 1 +decemb 1 +award 1 +honor 1 +certif 1 +merit 1 +state 1 +mathemat 1 +olympiadpresid 1 +gold 1 +medal 1 +nation 1 +physic 1 +examinationcertif 1 +examin 1 +chemistrycertif 1 +electron 1 +miscellan 1 +movi 1 +dream 1 +curriculum 1 +vita 1 +last 1 +updat 1 +mail 1 +suggest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..8d87d497 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,31 @@ +chandrasekaran 1 +sashikanth 1 +home 1 +page 1 +csashi 1 +wisc 1 +graduat 1 +studentdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +mark 1 +hill 1 +project 1 +educ 1 +btech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +june 1 +univeristi 1 +depart 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..8543a6ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,106 @@ +curt 1 +ellmann 1 +paradis 1 +databas 1 +project 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +wisc 1 +item 1 +focu 1 +java 1 +relat 1 +home 1 +page 1 +develop 1 +webgnat 1 +defect 1 +track 1 +prototyp 1 +index 1 +shore 1 +previou 1 +life 1 +doit 1 +eosdi 1 +opengi 1 +consortium 1 +global 1 +posit 1 +system 1 +calmit 1 +nebraska 1 +lincoln 1 +feder 1 +approach 1 +object 1 +manag 1 +group 1 +free 1 +list 1 +standard 1 +transact 1 +process 1 +perform 1 +council 1 +illustra 1 +white 1 +papersmiscellan 1 +sitescampu 1 +site 1 +wyrm 1 +hoard 1 +wiscinfo 1 +gopher 1 +librari 1 +wiscnet 1 +netcorpor 1 +appl 1 +microsoft 1 +research 1 +land 1 +paww 1 +commerc 1 +metrowerk 1 +taligentsearch 1 +savvi 1 +search 1 +webcrawl 1 +open 1 +text 1 +worm 1 +network 1 +inform 1 +intern 1 +organ 1 +internet 1 +draft 1 +dilbert 1 +world 1 +onlin 1 +winsock 1 +applic 1 +current 1 +weather 1 +map 1 +dienst 1 +implement 1 +geolog 1 +survei 1 +govern 1 +locat 1 +gil 1 +oakridg 1 +nation 1 +center 1 +stock 1 +market 1 +datacurt 1 +ellmanncurt 1 +eduparadis 1 +projectdepart 1 +sciencesunivers 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..08cf288e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,22 @@ +chee 1 +yong 1 +home 1 +pagechan 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +email 1 +cychan 1 +wisc 1 +offic 1 +phone 1 +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..79128fdb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,45 @@ +sara 1 +bauman 1 +home 1 +page 1 +dailei 1 +baumandailei 1 +wisc 1 +edugradu 1 +program 1 +mathemat 1 +comput 1 +engin 1 +mace 1 +univers 1 +wisconsin 1 +madison 1 +scienc 1 +depart 1 +mechan 1 +astronaut 1 +nuclear 1 +physic 1 +educ 1 +math 1 +lewi 1 +clark 1 +colleg 1 +research 1 +work 1 +public 1 +current 1 +schedul 1 +link 1 +friend 1 +pagessend 1 +mail 1 +offic 1 +address 1 +statist 1 +west 1 +dayton 1 +street 1 +last 1 +modifi 1 +daileytu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..6c902116 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,313 @@ +david 1 +wood 1 +home 1 +page 1 +wisc 1 +associ 1 +professor 1 +comput 1 +scienceand 1 +electr 1 +engineeringdepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usadavid 1 +eduphon 1 +secretari 1 +juli 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +offic 1 +research 1 +interest 1 +architectur 1 +especi 1 +memori 1 +system 1 +design 1 +uniprocessor 1 +multiprocessor 1 +implement 1 +program 1 +parallel 1 +oper 1 +perform 1 +evalu 1 +tool 1 +techniqu 1 +analysi 1 +vlsi 1 +includ 1 +power 1 +portabl 1 +project 1 +wind 1 +tunnel 1 +wart 1 +educ 1 +univers 1 +california 1 +berkelei 1 +current 1 +graduat 1 +student 1 +babak 1 +falsafi 1 +steve 1 +reinhardt 1 +brian 1 +toonenrec 1 +rahmat 1 +hyder 1 +intel 1 +alvi 1 +lebeck 1 +duke 1 +pfile 1 +microsystem 1 +mark 1 +callaghan 1 +informix 1 +cours 1 +teach 1 +fall 1 +introduct 1 +architecturec 1 +machin 1 +organ 1 +programmingc 1 +advanc 1 +select 1 +recent 1 +paper 1 +decoupl 1 +hardwar 1 +support 1 +distribut 1 +share 1 +memorysteven 1 +robert 1 +anddavid 1 +ieee 1 +intern 1 +symposium 1 +isca 1 +coher 1 +network 1 +interfac 1 +fine 1 +grain 1 +communicationshubhendu 1 +mukherje 1 +hill 1 +synchron 1 +workstat 1 +costrahmat 1 +confer 1 +supercomput 1 +dynam 1 +self 1 +invalid 1 +reduc 1 +overhead 1 +multiprocessorsalvin 1 +june 1 +activ 1 +abstract 1 +simulationalvin 1 +sigmetricsmai 1 +accuraci 1 +simul 1 +interconnect 1 +dougla 1 +burger 1 +proceed 1 +process 1 +april 1 +applic 1 +specif 1 +protocol 1 +user 1 +level 1 +alvin 1 +steven 1 +ioanni 1 +schoina 1 +jame 1 +laru 1 +ann 1 +roger 1 +access 1 +control 1 +asplo 1 +tempest 1 +typhoon 1 +cach 1 +profil 1 +spec 1 +benchmark 1 +case 1 +studi 1 +octob 1 +cooper 1 +softwar 1 +scalabl 1 +transact 1 +toc 1 +novemb 1 +annot 1 +bibliographi 1 +new 1 +decemb 1 +line 1 +version 1 +revis 1 +frequent 1 +madhusudhan 1 +talluri 1 +august 1 +summari 1 +main 1 +goal 1 +develop 1 +cost 1 +effect 1 +computerarchitectur 1 +take 1 +advantag 1 +rapidli 1 +chang 1 +technolog 1 +myresearch 1 +major 1 +thrust 1 +feasibl 1 +correct 1 +facilit 1 +focuss 1 +follow 1 +three 1 +area 1 +multi 1 +paradigm 1 +effici 1 +integr 1 +messag 1 +pass 1 +hybridprogram 1 +virtual 1 +prototyp 1 +exploit 1 +similaritesof 1 +exist 1 +hypothet 1 +understand 1 +tune 1 +result 1 +calledtempest 1 +handler 1 +suppliedmechan 1 +provid 1 +mechan 1 +allow 1 +programm 1 +compil 1 +librari 1 +transpar 1 +hybrid 1 +combin 1 +tempestmechan 1 +bulk 1 +data 1 +transfer 1 +virtualmemori 1 +manag 1 +novelmechan 1 +tagblock 1 +byte 1 +read 1 +write 1 +theloc 1 +us 1 +remot 1 +explor 1 +altern 1 +wai 1 +first 1 +call 1 +propos 1 +hardwareplatform 1 +fulli 1 +processor 1 +revers 1 +translationt 1 +rtlb 1 +invok 1 +detect 1 +grainaccess 1 +fault 1 +found 1 +thata 1 +run 1 +performscompar 1 +anal 1 +five 1 +memoryprogram 1 +also 1 +method 1 +thatoptim 1 +common 1 +hit 1 +significantli 1 +reducingsimul 1 +time 1 +fast 1 +tightli 1 +refer 1 +gener 1 +byprovid 1 +tag 1 +block 1 +referenceinvok 1 +specifi 1 +function 1 +depend 1 +upon 1 +type 1 +andmemori 1 +state 1 +processedbi 1 +manipul 1 +special 1 +null 1 +functionfor 1 +action 1 +usingbinari 1 +rewrit 1 +tabl 1 +lookup 1 +memoryrefer 1 +sparcstat 1 +tothre 1 +faster 1 +convent 1 +trace 1 +driven 1 +thatcal 1 +procedur 1 +onlythre 1 +slower 1 +origin 1 +instrument 1 +investig 1 +binari 1 +techniquesto 1 +platform 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..7e1e56ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,30 @@ +doug 1 +burger 1 +home 1 +page 1 +pageprofession 1 +inform 1 +research 1 +summaryresum 1 +cvtranscriptcours 1 +projectsadvisoraffili 1 +project 1 +galileo 1 +sciwisconsin 1 +wind 1 +tunnelpag 1 +maintain 1 +comput 1 +architectureuw 1 +architecturesimplescalar 1 +tool 1 +setgenericasacmperson 1 +stuff 1 +meus 1 +linksphoto 1 +galleryrid 1 +demonhunt 1 +damn 1 +catsbewar 1 +grad 1 +school 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..2d65ced8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,172 @@ +carl 1 +boor 1 +home 1 +page 1 +last 1 +chang 1 +professor 1 +comput 1 +scienc 1 +mathematicsdepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaoffic 1 +hour 1 +fall 1 +town 1 +schoenberg 1 +work 1 +death 1 +email 1 +deboor 1 +wisc 1 +telephon 1 +schedul 1 +teach 1 +look 1 +former 1 +present 1 +student 1 +select 1 +recent 1 +articl 1 +approxim 1 +theori 1 +written 1 +areavail 1 +anonym 1 +approx 1 +read 1 +file 1 +provid 1 +access 1 +individu 1 +theclick 1 +button 1 +clickabl 1 +version 1 +small 1 +subset 1 +author 1 +click 1 +list 1 +errata 1 +third 1 +edit 1 +cont 1 +elementari 1 +numer 1 +analysi 1 +algorithm 1 +approach 1 +variou 1 +print 1 +practic 1 +guid 1 +spline 1 +check 1 +latest 1 +thevari 1 +program 1 +driver 1 +latter 1 +book 1 +journal 1 +ofapproxim 1 +publish 1 +academ 1 +press 1 +inform 1 +includ 1 +accept 1 +publishedpap 1 +well 1 +postal 1 +address 1 +mani 1 +andmuch 1 +much 1 +ditto 1 +forconstruct 1 +springer 1 +verlag 1 +foreast 1 +search 1 +theirtabl 1 +content 1 +singli 1 +combin 1 +thank 1 +paul 1 +nevai 1 +thishandi 1 +tool 1 +alsoapproxim 1 +amo 1 +slist 1 +homepag 1 +bibliographi 1 +avail 1 +link 1 +peopl 1 +resourc 1 +ila 1 +center 1 +seek 1 +shall 1 +find 1 +organ 1 +introduct 1 +joi 1 +seeviva_vi 1 +alsoon 1 +screen 1 +tutori 1 +great 1 +pictur 1 +hermit 1 +place 1 +also 1 +contain 1 +us 1 +html 1 +thehtml 1 +primermight 1 +even 1 +better 1 +unusu 1 +ever_chang 1 +david 1 +griffeath 1 +sprimordi 1 +soup 1 +kitchen 1 +interest 1 +seeodd 1 +end 1 +allan 1 +pinku 1 +techunix 1 +technion 1 +nevaiif 1 +makehi 1 +mathemat 1 +outputavail 1 +cours 1 +math 1 +hous 1 +next 1 +door 1 +occupi 1 +taki 1 +souganid 1 +andthaleia 1 +zariphopoul 1 +szego 1 +bust 1 +stand 1 +inscript 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..e18abbc7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,175 @@ +devis 1 +home 1 +pagedevis 1 +environ 1 +data 1 +explor 1 +visualizationt 1 +content 1 +featuresexamplesin 1 +depthpublicationsrel 1 +workreleasecontactsfeaturesthes 1 +featur 1 +distinguish 1 +visual 1 +queri 1 +interfac 1 +construct 1 +oneset 1 +save 1 +appli 1 +input 1 +larger 1 +memori 1 +effici 1 +handl 1 +map 1 +graphic 1 +record 1 +level 1 +cancontrol 1 +color 1 +shape 1 +individu 1 +abil 1 +us 1 +repres 1 +flexibl 1 +layout 1 +mechan 1 +within 1 +window 1 +help 1 +user 1 +group 1 +togeth 1 +comparison 1 +asid 1 +need 1 +link 1 +ax 1 +cursor 1 +compar 1 +relationship 1 +differ 1 +viewsof 1 +base 1 +direct 1 +ascii 1 +file 1 +integ 1 +float 1 +date 1 +string 1 +type 1 +examplescheck 1 +follow 1 +exampl 1 +cool 1 +pictur 1 +quick 1 +introduct 1 +tree 1 +validationmolecular 1 +biologi 1 +cell 1 +imag 1 +soil 1 +scienc 1 +birch 1 +clusteringfinanci 1 +explorationfamili 1 +medicin 1 +nation 1 +climatedata 1 +centergeograph 1 +inform 1 +systemsoil 1 +sciencefil 1 +serverprogram 1 +tracesclin 1 +mani 1 +moreexampl 1 +viewer 1 +famili 1 +depthfor 1 +detail 1 +descript 1 +model 1 +visualizationvisu 1 +interfaceperform 1 +issuespublicationsmiron 1 +livni 1 +raghu 1 +ramakrishnan 1 +jussi 1 +myllymaki 1 +larg 1 +dataset 1 +proceed 1 +spie 1 +confer 1 +dataexplor 1 +analysi 1 +januari 1 +michael 1 +cheng 1 +miron 1 +stream 1 +inproceed 1 +andanalysi 1 +februari 1 +praveenseshadri 1 +next 1 +sequencequeri 1 +intern 1 +themanag 1 +comad 1 +decemb 1 +relat 1 +workth 1 +seqproject 1 +complementari 1 +design 1 +queryrecord 1 +sequenc 1 +output 1 +bevisu 1 +releas 1 +informationw 1 +current 1 +version 1 +executablesfor 1 +solari 1 +platform 1 +dynam 1 +ld_library_path 1 +variabl 1 +appropri 1 +rundevis 1 +support 1 +architectur 1 +execut 1 +arestat 1 +requir 1 +shareabl 1 +librari 1 +time 1 +download 1 +click 1 +contactsfor 1 +research 1 +project 1 +contactmiron 1 +guangshun 1 +chen 1 +kent 1 +wenger 1 +hotlin 1 +send 1 +mail 1 +usersupport 1 +page 1 +access 1 +sinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..a8b39b18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,200 @@ +home 1 +page 1 +david 1 +dewitt 1 +professor 1 +romn 1 +fellow 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +michigan 1 +interest 1 +object 1 +orient 1 +databas 1 +system 1 +parallel 1 +databasebenchmark 1 +geograph 1 +inform 1 +research 1 +summari 1 +main 1 +project 1 +shore 1 +paradis 1 +objectiveof 1 +design 1 +implement 1 +evalu 1 +persist 1 +objectsystem 1 +serv 1 +need 1 +wide 1 +varieti 1 +target 1 +applicationsinclud 1 +hardwar 1 +softwar 1 +programminglanguag 1 +satellit 1 +data 1 +repositori 1 +multimedia 1 +applic 1 +expand 1 +basic 1 +capabilitiesof 1 +us 1 +exodu 1 +storag 1 +manag 1 +develop 1 +fund 1 +arpa 1 +number 1 +wai 1 +includ 1 +support 1 +typedobject 1 +multipl 1 +program 1 +languag 1 +unix 1 +like 1 +hierarchicalnam 1 +space 1 +name 1 +compat 1 +interfaceto 1 +text 1 +field 1 +interfac 1 +intend 1 +toeas 1 +transit 1 +file 1 +systemenviron 1 +exist 1 +tool 1 +ccwill 1 +abl 1 +store 1 +without 1 +modif 1 +becom 1 +either 1 +singl 1 +orth 1 +complex 1 +rang 1 +environ 1 +scale 1 +fromindividu 1 +workstat 1 +heterogen 1 +client 1 +server 1 +networksto 1 +larg 1 +multiprocessor 1 +intel 1 +paragon 1 +ajoint 1 +prof 1 +carei 1 +naughton 1 +solomon 1 +attempt 1 +appli 1 +technolog 1 +developeda 1 +part 1 +gamma 1 +relationaldatabas 1 +thetask 1 +manipul 1 +set 1 +current 1 +mani 1 +relat 1 +databasesystem 1 +hold 1 +excel 1 +formanag 1 +busi 1 +poor 1 +match 1 +modelingne 1 +must 1 +capabl 1 +manipulatingmuch 1 +polygon 1 +polylin 1 +instead 1 +emploi 1 +model 1 +provid 1 +muchbett 1 +type 1 +anoth 1 +signific 1 +differencefrom 1 +parallelismto 1 +facilit 1 +execut 1 +process 1 +assatellit 1 +imag 1 +platform 1 +projecti 1 +cluster 1 +sparc 1 +connect 1 +sampl 1 +recent 1 +public 1 +benchmark 1 +withm 1 +proceed 1 +sigmod 1 +confer 1 +washington 1 +persistentappl 1 +franklin 1 +hall 1 +mcauliff 1 +chuh 1 +tsatalo 1 +white 1 +zwill 1 +intern 1 +conferenceon 1 +minneapoli 1 +kabra 1 +patel 1 +proceedingsof 1 +base 1 +santiego 1 +chile 1 +august 1 +talk 1 +vldb 1 +invit 1 +summit 1 +present 1 +automat 1 +creat 1 +januari 1 +pub 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..6b8a2815 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,326 @@ +charl 1 +dyer 1 +home 1 +pagecharl 1 +dyerprofessordepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +wisc 1 +edutelephon 1 +finger 1 +infoph 1 +univers 1 +maryland 1 +curriculum 1 +vita 1 +area 1 +interest 1 +vision 1 +three 1 +dimension 1 +shape 1 +represent 1 +appear 1 +model 1 +view 1 +synthesi 1 +activ 1 +visualizationgroup 1 +leader 1 +groupprogram 1 +chair 1 +cvpr 1 +research 1 +synthesisth 1 +goal 1 +work 1 +develop 1 +basic 1 +tool 1 +controllingin 1 +real 1 +time 1 +either 1 +autonom 1 +interact 1 +virtual 1 +cameraof 1 +environ 1 +input 1 +imag 1 +videostream 1 +acquir 1 +fix 1 +mobil 1 +camera 1 +around 1 +site 1 +output 1 +panoram 1 +visual 1 +scene 1 +whicha 1 +user 1 +control 1 +move 1 +technolog 1 +could 1 +navig 1 +througha 1 +custom 1 +path 1 +thesit 1 +predetermin 1 +main 1 +researchquest 1 +adapt 1 +combin 1 +basi 1 +synthesizenew 1 +without 1 +reconstructiona 1 +intermedi 1 +step 1 +recent 1 +innovativetechniqu 1 +callview 1 +morph 1 +take 1 +basisimag 1 +interpol 1 +continu 1 +rang 1 +correspond 1 +linear 1 +connect 1 +center 1 +explorationcomput 1 +start 1 +investig 1 +howto 1 +acquisit 1 +process 1 +controllingcamera 1 +paramet 1 +studi 1 +purposefulli 1 +theposit 1 +orient 1 +order 1 +dynam 1 +adjustviewpoint 1 +base 1 +theus 1 +viewpoint 1 +behavior 1 +import 1 +forsolv 1 +task 1 +explor 1 +unknown 1 +object 1 +findspecif 1 +surfac 1 +mark 1 +build 1 +global 1 +unknownshap 1 +recogn 1 +coordin 1 +simpl 1 +observ 1 +chang 1 +appearanceof 1 +well 1 +defin 1 +simplifi 1 +computationsrequir 1 +make 1 +precis 1 +progress 1 +algorithm 1 +andelimin 1 +need 1 +accur 1 +differenti 1 +measur 1 +thecamera 1 +motion 1 +believ 1 +approach 1 +towardsviewpoint 1 +close 1 +relat 1 +geometri 1 +viewedobject 1 +gener 1 +us 1 +thisapproach 1 +provabl 1 +correct 1 +asid 1 +revolut 1 +recov 1 +reconstruct 1 +smooth 1 +arbitrarili 1 +visualizationin 1 +map 1 +techniquescap 1 +displai 1 +possibl 1 +data 1 +type 1 +specificgraph 1 +procedur 1 +capabl 1 +displayingarbitrari 1 +commonfram 1 +refer 1 +coupl 1 +algorithmexecut 1 +provid 1 +power 1 +understand 1 +especi 1 +experi 1 +scientif 1 +dataanalysi 1 +implement 1 +system 1 +call 1 +forexperi 1 +techniqu 1 +visualizingintermedi 1 +final 1 +result 1 +analysi 1 +forproblem 1 +discrimin 1 +cloud 1 +satellit 1 +public 1 +seitz 1 +cyclic 1 +period 1 +trace 1 +recognit 1 +shah 1 +jain 1 +kluwer 1 +boston 1 +invari 1 +proc 1 +siggraph 1 +toward 1 +conf 1 +pattern 1 +track 1 +recoveri 1 +stationari 1 +contour 1 +advanc 1 +festschrift 1 +azriel 1 +rosenfeld 1 +ieee 1 +societi 1 +press 1 +alamito 1 +kutulako 1 +purpos 1 +artifici 1 +intellig 1 +complet 1 +four 1 +point 1 +physic 1 +valid 1 +workshop 1 +adjust 1 +seal 1 +lumelski 1 +strategi 1 +guid 1 +dimens 1 +robot 1 +autom 1 +occlud 1 +detect 1 +affin 1 +irregular 1 +rigid 1 +articul 1 +hibbard 1 +paul 1 +battaiola 1 +santek 1 +voidrot 1 +martinez 1 +earth 1 +space 1 +scienc 1 +juli 1 +lattic 1 +includ 1 +abstract 1 +groupcours 1 +taught 1 +introduct 1 +spring 1 +fall 1 +current 1 +student 1 +gareth 1 +bestor 1 +brian 1 +morgan 1 +steve 1 +liangyin 1 +yuph 1 +graduat 1 +bill 1 +whibbard 1 +macc 1 +onlattic 1 +structur 1 +kiriako 1 +kyro 1 +rochest 1 +ofobserv 1 +allmen 1 +iutech 1 +sequenc 1 +descript 1 +spatiotempor 1 +flow 1 +curv 1 +brent 1 +dimensionalshap 1 +machin 1 +graphic 1 +harri 1 +plantinga 1 +wheaton 1 +viewer 1 +representationfor 1 +stewart 1 +connectionist 1 +stereo 1 +bradlei 1 +kjell 1 +ccsua 1 +ctstateu 1 +edg 1 +separ 1 +textur 1 +measureslink 1 +interestmi 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..ee954bcf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,19 @@ +welcom 1 +friend 1 +machinew 1 +appreci 1 +patienc 1 +long 1 +arduou 1 +task 1 +bring 1 +better 1 +home 1 +page 1 +check 1 +educ 1 +curriculum 1 +vitaecheck 1 +class 1 +teach 1 +section 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..dcdf54aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,165 @@ +eric 1 +hazen 1 +home 1 +pageer 1 +hazennon 1 +professorroom 1 +comput 1 +scienc 1 +west 1 +dayton 1 +madison 1 +current 1 +work 1 +fornet 1 +scout 1 +servic 1 +project 1 +intern 1 +wait 1 +part 1 +registr 1 +couldn 1 +help 1 +domain 1 +name 1 +problem 1 +even 1 +want 1 +neat 1 +thing 1 +monei 1 +locat 1 +depart 1 +univers 1 +ofwisconsin 1 +see 1 +could 1 +never 1 +explain 1 +elegantli 1 +fearless 1 +leader 1 +susan 1 +calcari 1 +check 1 +page 1 +offici 1 +explan 1 +design 1 +mainten 1 +site 1 +email 1 +list 1 +report 1 +also 1 +assist 1 +pete 1 +devri 1 +withtech 1 +support 1 +mac 1 +unix 1 +machin 1 +come 1 +spent 1 +half 1 +year 1 +laboratori 1 +molecular 1 +biologi 1 +time 1 +full 1 +student 1 +well 1 +call 1 +technic 1 +digit 1 +video 1 +specialist 1 +meant 1 +around 1 +fix 1 +peopl 1 +broken 1 +mice 1 +answer 1 +question 1 +lucki 1 +make 1 +cool 1 +interest 1 +us 1 +instruct 1 +purpos 1 +multimedia 1 +graduat 1 +wisconsin 1 +philosophi 1 +program 1 +philosoph 1 +real 1 +commod 1 +capitalist 1 +societi 1 +complet 1 +requir 1 +curriculum 1 +shameless 1 +pragmatist 1 +talk 1 +sinc 1 +good 1 +metaphys 1 +discuss 1 +link 1 +serv 1 +practic 1 +makethi 1 +look 1 +standard 1 +resum 1 +date 1 +made 1 +anim 1 +shown 1 +world 1 +among 1 +drosophila 1 +geneticist 1 +told 1 +wonder 1 +girlfriend 1 +salon 1 +magazin 1 +entertain 1 +inform 1 +ezin 1 +creat 1 +lauri 1 +anderson 1 +green 1 +room 1 +shockwav 1 +kudon 1 +know 1 +wit 1 +quicktimevr 1 +documentari 1 +plight 1 +bosnia 1 +uproot 1 +popul 1 +billi 1 +holidai 1 +homepag 1 +nation 1 +secur 1 +archiv 1 +nixon 1 +preslei 1 +meetingsejhazen 1 +facstaff 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..0dcb841c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,13 @@ +tina 1 +eliassi 1 +home 1 +pagetina 1 +univers 1 +illinoi 1 +urbana 1 +champaign 1 +wisconsin 1 +madison 1 +offic 1 +bldgphone 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..b57c2f85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,72 @@ +eric 1 +home 1 +page 1 +passsth 1 +anoth 1 +cold 1 +budweisth 1 +address 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +electr 1 +engin 1 +johnson 1 +drive 1 +offic 1 +phone 1 +mail 1 +ericro 1 +wisc 1 +research 1 +area 1 +architectur 1 +advisor 1 +professor 1 +smithresearch 1 +topic 1 +kestrel 1 +multiscalar 1 +project 1 +instruct 1 +level 1 +parallel 1 +high 1 +bandwidth 1 +fetch 1 +mechan 1 +branch 1 +predict 1 +confid 1 +mispredict 1 +tolerancepubl 1 +trace 1 +cach 1 +latenc 1 +approach 1 +rotenberg 1 +steve 1 +bennett 1 +jame 1 +smith 1 +appear 1 +proceed 1 +annual 1 +intern 1 +symposium 1 +microarchitectur 1 +decemb 1 +assign 1 +condit 1 +erik 1 +jacobsen 1 +technic 1 +report 1 +april 1 +resum 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..6d677f5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,69 @@ +babak 1 +falsafi 1 +home 1 +page 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usatel 1 +email 1 +wisc 1 +work 1 +peopl 1 +mentorcultresearch 1 +interest 1 +architectur 1 +perform 1 +evalu 1 +measur 1 +system 1 +parallel 1 +program 1 +modelseduc 1 +scienc 1 +univers 1 +decemb 1 +suni 1 +buffalo 1 +june 1 +electr 1 +engin 1 +miscellan 1 +public 1 +would 1 +rather 1 +drink 1 +read 1 +listen 1 +us 1 +high 1 +school 1 +idea 1 +like 1 +fail 1 +morf 1 +shubu 1 +dionosi 1 +hillari 1 +profan 1 +phone 1 +convers 1 +check 1 +american 1 +french 1 +queen 1 +architect 1 +look 1 +hack 1 +partner 1 +crime 1 +next 1 +gener 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..d40e9961 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,117 @@ +home 1 +page 1 +michael 1 +ferri 1 +associ 1 +professor 1 +comput 1 +scienc 1 +industri 1 +engineeringand 1 +member 1 +center 1 +mathemat 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +madison 1 +telephon 1 +email 1 +wisc 1 +cambridg 1 +interest 1 +theori 1 +algorithm 1 +applic 1 +program 1 +research 1 +summari 1 +look 1 +robust 1 +method 1 +solv 1 +larg 1 +scale 1 +variationalinequ 1 +nonlinear 1 +problem 1 +toproblem 1 +econom 1 +engin 1 +pivot 1 +path 1 +followingtechniqu 1 +investig 1 +base 1 +success 1 +linear 1 +emphasi 1 +numer 1 +properti 1 +andinterfac 1 +model 1 +languag 1 +particular 1 +beingconsid 1 +includ 1 +equilibria 1 +effect 1 +taxat 1 +oncarbon 1 +emiss 1 +traffic 1 +congest 1 +toll 1 +structur 1 +optim 1 +contact 1 +chemic 1 +process 1 +design 1 +consid 1 +parallel 1 +architectur 1 +solvingproblem 1 +graph 1 +partitioningtechniqu 1 +determin 1 +underli 1 +tool 1 +gener 1 +purpos 1 +techniqu 1 +forexploit 1 +machin 1 +directli 1 +within 1 +system 1 +arealso 1 +consider 1 +prototyp 1 +us 1 +condor 1 +extens 1 +complementar 1 +framework 1 +also 1 +beinginvestig 1 +identifi 1 +exploit 1 +underlyingmodel 1 +public 1 +complet 1 +list 1 +paper 1 +mostli 1 +electron 1 +avail 1 +relev 1 +link 1 +cpnet 1 +prgram 1 +pagec 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..0dac257a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,254 @@ +david 1 +finton 1 +home 1 +page 1 +wisc 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +welcom 1 +grad 1 +student 1 +research 1 +nerdin 1 +artifici 1 +intelligenceher 1 +grew 1 +grand 1 +rapid 1 +michigan 1 +late 1 +show 1 +offic 1 +earn 1 +degre 1 +math 1 +state 1 +master 1 +scienceher 1 +dissert 1 +institut 1 +take 1 +littl 1 +year 1 +develop 1 +traffic 1 +measur 1 +softwarefor 1 +first 1 +thesi 1 +advisor 1 +left 1 +trusti 1 +nextstationor 1 +librari 1 +enjoyplai 1 +trumpet 1 +piano 1 +listen 1 +longhair 1 +music 1 +plai 1 +volleybal 1 +intervarsityfolk 1 +contribut 1 +supersoak 1 +arm 1 +race 1 +comment 1 +feel 1 +free 1 +form 1 +send 1 +mail 1 +finger 1 +accountto 1 +current 1 +plan 1 +whether 1 +system 1 +gain 1 +employ 1 +introduct 1 +intellig 1 +project 1 +smart 1 +understand 1 +make 1 +machin 1 +intelligenti 1 +goal 1 +essenc 1 +intelligencei 1 +abil 1 +learn 1 +adapt 1 +actappropri 1 +order 1 +reach 1 +reinforc 1 +treat 1 +problem 1 +gener 1 +case 1 +output 1 +control 1 +action 1 +chang 1 +environ 1 +input 1 +sens 1 +also 1 +weak 1 +kind 1 +feedback 1 +express 1 +posit 1 +neg 1 +number 1 +instead 1 +teacher 1 +present 1 +thesystem 1 +pair 1 +receiv 1 +thumb 1 +irregular 1 +interv 1 +work 1 +focuss 1 +need 1 +todistinguish 1 +good 1 +on 1 +direct 1 +process 1 +build 1 +agood 1 +represent 1 +term 1 +relev 1 +orimport 1 +featur 1 +note 1 +import 1 +basedfeatur 1 +extract 1 +appli 1 +notion 1 +balanc 1 +explor 1 +world 1 +perform 1 +optim 1 +exploit 1 +investig 1 +wai 1 +us 1 +learningprocess 1 +effici 1 +allow 1 +specifi 1 +start 1 +point 1 +experi 1 +activ 1 +better 1 +intelligentadapt 1 +hope 1 +provid 1 +basi 1 +whichwil 1 +benefit 1 +knowledg 1 +base 1 +task 1 +realli 1 +date 1 +sorri 1 +pagefor 1 +inform 1 +hotlistthi 1 +browser 1 +independ 1 +hotlist 1 +keep 1 +copi 1 +access 1 +platform 1 +combin 1 +actual 1 +bookmark 1 +file 1 +omniweb 1 +eleg 1 +function 1 +netscap 1 +opinion 1 +omniwebi 1 +avail 1 +nextstep 1 +foral 1 +openstep 1 +variant 1 +releas 1 +editori 1 +responseto 1 +jehovah 1 +wit 1 +deiti 1 +christwisconsin 1 +site 1 +intervars 1 +graduat 1 +fellowship 1 +check 1 +weatherin 1 +citi 1 +star 1 +trek 1 +program 1 +link 1 +isthmu 1 +daili 1 +pagesom 1 +favorit 1 +place 1 +visit 1 +nebula 1 +nasa 1 +pictur 1 +wide 1 +studi 1 +bibl 1 +crosssearch 1 +minor 1 +glenn 1 +gould 1 +homepag 1 +farsid 1 +voyagerent 1 +dilbert 1 +zoneroam 1 +virtual 1 +tourist 1 +stereogram 1 +tell 1 +head 1 +blow 1 +true 1 +next 1 +think 1 +bill 1 +gate 1 +word 1 +sponsor 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..bd426d87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,242 @@ +home 1 +page 1 +charl 1 +fischer 1 +nbsp 1 +nbspcharl 1 +nbspprofessor 1 +comput 1 +scienc 1 +nbspunivers 1 +wisconsin 1 +depart 1 +univers 1 +dayton 1 +madison 1 +telephon 1 +messag 1 +email 1 +wisc 1 +teach 1 +semest 1 +teachingc 1 +graduat 1 +cours 1 +compil 1 +spring 1 +undergradu 1 +research 1 +interest 1 +focu 1 +design 1 +implement 1 +recent 1 +best 1 +exploit 1 +enormouscap 1 +provid 1 +modern 1 +architectur 1 +issu 1 +student 1 +investig 1 +includ 1 +code 1 +schedul 1 +import 1 +avoid 1 +unnecessari 1 +pipelin 1 +delai 1 +haveinvestig 1 +optim 1 +arithmet 1 +express 1 +domin 1 +model 1 +global 1 +procedur 1 +level 1 +regist 1 +alloc 1 +graph 1 +color 1 +mayb 1 +explicitli 1 +quantifi 1 +likelihood 1 +benefit 1 +registerresid 1 +attract 1 +care 1 +vital 1 +gener 1 +loadsand 1 +store 1 +must 1 +ultim 1 +interprocedur 1 +theprocedur 1 +program 1 +analyz 1 +studiedinterprocedur 1 +modelsthat 1 +optimallyalloc 1 +among 1 +polynomi 1 +time 1 +approach 1 +seem 1 +effect 1 +practic 1 +anyon 1 +know 1 +easi 1 +make 1 +mistak 1 +error 1 +involv 1 +pointer 1 +arrai 1 +indic 1 +especi 1 +common 1 +studi 1 +wai 1 +toautomat 1 +check 1 +oper 1 +share 1 +memori 1 +multiprocessor 1 +workstat 1 +us 1 +processor 1 +anoth 1 +monitor 1 +possibl 1 +routin 1 +execut 1 +littl 1 +orno 1 +appar 1 +slowdown 1 +public 1 +minimum 1 +cost 1 +steven 1 +kurland 1 +popl 1 +sigplan 1 +sigact 1 +symposium 1 +principl 1 +programminglanguag 1 +januari 1 +concurr 1 +access 1 +harish 1 +patil 1 +appear 1 +insoftwar 1 +experi 1 +demand 1 +driven 1 +todd 1 +proebst 1 +inacm 1 +transact 1 +languag 1 +system 1 +effici 1 +instruct 1 +load 1 +zero 1 +rang 1 +split 1 +confer 1 +june 1 +activitiesa 1 +complet 1 +revis 1 +second 1 +edit 1 +craft 1 +author 1 +cytronand 1 +richard 1 +leblanc 1 +almost 1 +publish 1 +benjamin 1 +cum 1 +look 1 +soon 1 +better 1 +bookstor 1 +everywher 1 +short 1 +commun 1 +editor 1 +topla 1 +educationph 1 +cornel 1 +pars 1 +context 1 +free 1 +parallel 1 +environ 1 +supervis 1 +john 1 +william 1 +studentsdonn 1 +milton 1 +syntact 1 +specif 1 +analysi 1 +attribut 1 +grammar 1 +juli 1 +bruce 1 +rowland 1 +semant 1 +evalu 1 +stephen 1 +skedzielewski 1 +definit 1 +reevalu 1 +septemb 1 +bernard 1 +dion 1 +local 1 +least 1 +corrector 1 +sensitivepars 1 +decemb 1 +mahadevan 1 +ganapathi 1 +retarget 1 +novemb 1 +vimal 1 +begwami 1 +correct 1 +august 1 +maunei 1 +extend 1 +right 1 +gregori 1 +johnson 1 +sensit 1 +flow 1 +anil 1 +facil 1 +integr 1 +winsborough 1 +automat 1 +transpar 1 +logic 1 +venkatesh 1 +framework 1 +algorithm 1 +techniqu 1 +steve 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..1a1415e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,259 @@ +galileo 1 +home 1 +page 1 +project 1 +wisconsint 1 +contentsgalileoproject 1 +descriptionpublicationsrel 1 +projectssci 1 +wisconsinproject 1 +descriptionpublicationsproject 1 +membersgalileo 1 +wisconsingalileo 1 +conduct 1 +comput 1 +architectur 1 +groupat 1 +univers 1 +wisconsin 1 +madison 1 +focus 1 +medium 1 +long 1 +term 1 +evolut 1 +processor 1 +system 1 +emphasison 1 +memori 1 +specif 1 +studi 1 +therelationship 1 +main 1 +futuresystem 1 +complet 1 +separ 1 +todai 1 +integr 1 +extent 1 +process 1 +capabl 1 +storag 1 +merg 1 +least 1 +wai 1 +increas 1 +chip 1 +penalti 1 +issuabl 1 +instruct 1 +orlimit 1 +bandwidth 1 +design 1 +place 1 +capacityon 1 +modul 1 +eventu 1 +sizabl 1 +fractionof 1 +resid 1 +repres 1 +arrow 1 +label 1 +mopin 1 +diagram 1 +differ 1 +possibl 1 +migrat 1 +ofprocessor 1 +onto 1 +dram 1 +eventuallyobvi 1 +central 1 +iram 1 +research 1 +current 1 +follow 1 +area 1 +examin 1 +perform 1 +impact 1 +larg 1 +latenc 1 +andlimit 1 +futur 1 +microprocessor 1 +base 1 +systemsperform 1 +model 1 +variou 1 +point 1 +along 1 +theprocessor 1 +spectrumcach 1 +hierarchi 1 +systemsdesign 1 +bank 1 +systemprogram 1 +execut 1 +multipl 1 +datascalar 1 +public 1 +exploit 1 +optic 1 +interconnect 1 +elimin 1 +serial 1 +bottlenecksdoug 1 +burger 1 +jame 1 +goodman 1 +appear 1 +intern 1 +confer 1 +massiv 1 +parallel 1 +us 1 +octob 1 +spsd 1 +modeldoug 1 +stefano 1 +kaxira 1 +scienc 1 +depart 1 +technic 1 +report 1 +juli 1 +quantifi 1 +limit 1 +microprocessorsdoug 1 +alain 1 +symposium 1 +declin 1 +effect 1 +dynam 1 +cach 1 +gener 1 +purpos 1 +microprocessorsdougla 1 +januari 1 +relat 1 +berkeleyppram 1 +kyushu 1 +univeristi 1 +japansci 1 +wisconsinour 1 +group 1 +also 1 +close 1 +involv 1 +coherentshar 1 +multiprocessor 1 +scalabl 1 +coherentinterfac 1 +coher 1 +transport 1 +layer 1 +ieee 1 +standard 1 +platform 1 +explor 1 +idea 1 +specifi 1 +link 1 +list 1 +hardwar 1 +protocol 1 +includ 1 +support 1 +effici 1 +synchron 1 +primit 1 +queue 1 +lock 1 +qolb 1 +aswel 1 +optim 1 +share 1 +pattern 1 +pairwis 1 +fresh 1 +read 1 +definitionfor 1 +extrem 1 +high 1 +betweenprocess 1 +element 1 +individu 1 +cluster 1 +topic 1 +extend 1 +logarithm 1 +grow 1 +structureseffici 1 +multiprocessorsa 1 +extensionsaggress 1 +consist 1 +multiprocessorswisconsin 1 +mechan 1 +minim 1 +overhead 1 +applic 1 +best 1 +paper 1 +proceed 1 +supercomput 1 +simul 1 +wind 1 +tunneldougla 1 +second 1 +workshop 1 +cost 1 +march 1 +hierarch 1 +extens 1 +scijam 1 +interfac 1 +scale 1 +memoryross 1 +evan 1 +johnson 1 +februari 1 +nagi 1 +aboulenein 1 +stein 1 +gjess 1 +philip 1 +woest 1 +novemb 1 +topolog 1 +ringsross 1 +decemb 1 +analysi 1 +ringsteven 1 +scott 1 +mari 1 +vernon 1 +lower 1 +bound 1 +coherenceross 1 +june 1 +multiprocessorsphilip 1 +multiprocessorjam 1 +third 1 +program 1 +languag 1 +oper 1 +april 1 +particip 1 +faculti 1 +graduat 1 +student 1 +doug 1 +alumni 1 +abouleneinross 1 +johnsonstev 1 +scottlast 1 +modifi 1 +dburger 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..2002f307 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,34 @@ +andrew 1 +geeri 1 +home 1 +page 1 +wisc 1 +west 1 +dayton 1 +street 1 +madison 1 +regent 1 +madisonin 1 +comput 1 +scienc 1 +current 1 +work 1 +compsci 1 +grade 1 +schedul 1 +pontif 1 +peopl 1 +interest 1 +jacqu 1 +derrida 1 +post 1 +structur 1 +martin 1 +heidegg 1 +albert 1 +camu 1 +jean 1 +paul 1 +sartr 1 +friedrich 1 +nietzsch 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..1c7b4327 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,148 @@ +gideon 1 +glass 1 +homepag 1 +continu 1 +tweak 1 +thank 1 +stop 1 +collect 1 +imag 1 +find 1 +deposit 1 +directori 1 +sampl 1 +pictur 1 +gui 1 +claim 1 +cooler 1 +accord 1 +toonion 1 +seethi 1 +movi 1 +usual 1 +suspect 1 +walk 1 +nearest 1 +blockbust 1 +note 1 +recent 1 +move 1 +none 1 +roommat 1 +dutch 1 +cheap 1 +either 1 +given 1 +predica 1 +dismal 1 +prospect 1 +improv 1 +withno 1 +outsid 1 +influenc 1 +pleas 1 +consid 1 +make 1 +small 1 +donationto 1 +help 1 +defrai 1 +cost 1 +purchas 1 +check 1 +monei 1 +orderscan 1 +sent 1 +follow 1 +address 1 +send 1 +cash 1 +monro 1 +floor 1 +madison 1 +usathank 1 +support 1 +grad 1 +student 1 +sometim 1 +read 1 +paper 1 +eventhough 1 +shelf 1 +feet 1 +unread 1 +book 1 +wait 1 +anyhow 1 +look 1 +someth 1 +christian 1 +achil 1 +huge 1 +index 1 +might 1 +also 1 +unifi 1 +cstechreport 1 +class 1 +project 1 +report 1 +otherstuff 1 +avail 1 +program 1 +machin 1 +load 1 +averagewil 1 +grow 1 +fast 1 +main 1 +fork 1 +doofu 1 +actual 1 +time 1 +share 1 +back 1 +calvin 1 +great 1 +killer 1 +zippi 1 +pinheadha 1 +reload 1 +sever 1 +justtri 1 +last 1 +fall 1 +kill 1 +netscap 1 +noth 1 +think 1 +work 1 +mozilla 1 +higher 1 +well 1 +dabbl 1 +object 1 +orient 1 +programmingin 1 +mostli 1 +exercis 1 +suppos 1 +buttonher 1 +thing 1 +right 1 +suffic 1 +case 1 +told 1 +somethingin 1 +bookmark 1 +denni 1 +ritchi 1 +creator 1 +unix 1 +wrote 1 +anti 1 +forward 1 +hater 1 +handbook 1 +mailand 1 +mayb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..db565643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,406 @@ +andi 1 +krazi 1 +glew 1 +univers 1 +wisconsin 1 +home 1 +pageandi 1 +pagethi 1 +first 1 +page 1 +largelyform 1 +snippet 1 +intel 1 +stylishor 1 +organ 1 +summarycontact 1 +touch 1 +ship 1 +thing 1 +addressescalendar 1 +arrang 1 +meet 1 +filesystem 1 +access 1 +public 1 +html 1 +http 1 +wisc 1 +approach 1 +anyon 1 +read 1 +file 1 +unless 1 +specif 1 +open 1 +default 1 +scribe 1 +minut 1 +taker 1 +sohi 1 +research 1 +group 1 +weekli 1 +interest 1 +rather 1 +gener 1 +form 1 +adapt 1 +applic 1 +dynam 1 +versu 1 +static 1 +comput 1 +architectur 1 +wannab 1 +author 1 +high 1 +level 1 +edit 1 +thought 1 +higher 1 +educ 1 +suggest 1 +effect 1 +work 1 +patent 1 +claim 1 +fame 1 +miscellan 1 +info 1 +stuff 1 +self 1 +imag 1 +configur 1 +manag 1 +resum 1 +favorit 1 +quot 1 +sai 1 +beef 1 +montreal 1 +peopl 1 +care 1 +pope 1 +priest 1 +parson 1 +king 1 +william 1 +boyn 1 +want 1 +coin 1 +trust 1 +summer 1 +time 1 +keep 1 +harm 1 +see 1 +frost 1 +snow 1 +poor 1 +almighti 1 +dollar 1 +good 1 +mapl 1 +wood 1 +bellow 1 +warm 1 +church 1 +chapel 1 +ranter 1 +preacher 1 +beecher 1 +alreadi 1 +enough 1 +keeper 1 +harbour 1 +deplor 1 +churchmen 1 +notori 1 +atheist 1 +less 1 +well 1 +known 1 +chariti 1 +strand 1 +sailor 1 +knew 1 +could 1 +alwai 1 +exchang 1 +chop 1 +chord 1 +firewood 1 +meal 1 +place 1 +sleep 1 +print 1 +manifesto 1 +handbil 1 +advertis 1 +architect 1 +hackeralthough 1 +aspir 1 +someth 1 +formerlyhad 1 +fake 1 +motorola 1 +busi 1 +card 1 +ever 1 +sinc 1 +start 1 +learn 1 +assembl 1 +redesign 1 +chip 1 +unix 1 +kernel 1 +hacker 1 +gould 1 +real 1 +andstil 1 +think 1 +wistfulli 1 +beard 1 +frequent 1 +wear 1 +suspend 1 +bald 1 +architectureonc 1 +futur 1 +involv 1 +microarchitectur 1 +pentium 1 +adopt 1 +architecturei 1 +constantli 1 +verg 1 +write 1 +book 1 +entitl 1 +grabbag 1 +trick 1 +techniqu 1 +sort 1 +antidot 1 +hennessi 1 +patterson 1 +cannot 1 +afford 1 +diskspac 1 +internet 1 +servic 1 +provideror 1 +charg 1 +connect 1 +system 1 +appreci 1 +piec 1 +architectureon 1 +best 1 +wai 1 +textbook 1 +datasheet 1 +instruct 1 +refer 1 +wander 1 +netscapebookmarksstockscod 1 +standardsroi 1 +wilkinson 1 +code 1 +standardsi 1 +disagre 1 +mani 1 +although 1 +perhap 1 +quit 1 +quickli 1 +defunct 1 +startup 1 +compani 1 +call 1 +enfopris 1 +build 1 +workstat 1 +chang 1 +assign 1 +driver 1 +writingto 1 +integr 1 +longstand 1 +love 1 +hate 1 +relationship 1 +configurationmanag 1 +tool 1 +like 1 +scc 1 +publish 1 +paper 1 +box 1 +link 1 +parallel 1 +tree 1 +element 1 +usenix 1 +workshop 1 +softwar 1 +describ 1 +central 1 +databas 1 +multipl 1 +view 1 +hardlink 1 +clone 1 +save 1 +space 1 +us 1 +divis 1 +team 1 +brian 1 +berlin 1 +deprec 1 +mainli 1 +advoc 1 +optimist 1 +concurr 1 +control 1 +wherea 1 +lock 1 +actual 1 +also 1 +case 1 +version 1 +get 1 +livelock 1 +usual 1 +insist 1 +singl 1 +identifi 1 +serial 1 +schedul 1 +sourc 1 +checkinsso 1 +test 1 +proce 1 +linear 1 +manner 1 +requir 1 +programm 1 +previou 1 +fix 1 +appli 1 +recogn 1 +even 1 +relax 1 +often 1 +strip 1 +approachin 1 +apolog 1 +never 1 +creat 1 +truli 1 +portabl 1 +accomplish 1 +similar 1 +mike 1 +fetterman 1 +mark 1 +aitken 1 +deserv 1 +credit 1 +enhanc 1 +sever 1 +featur 1 +went 1 +notabl 1 +number 1 +becam 1 +overal 1 +suffici 1 +everyth 1 +includ 1 +cshrc 1 +login 1 +calendar 1 +wisconsinhow 1 +seem 1 +ubiquit 1 +programat 1 +depart 1 +variou 1 +cmtool 1 +domain 1 +ical 1 +plan 1 +critic 1 +mass 1 +anyof 1 +isol 1 +pilot 1 +associ 1 +least 1 +mean 1 +commit 1 +possibl 1 +record 1 +voic 1 +therefor 1 +must 1 +prefer 1 +email 1 +phone 1 +person 1 +manuallyadd 1 +download 1 +softwareto 1 +microsoft 1 +watch 1 +intelat 1 +devout 1 +user 1 +program 1 +last 1 +synchron 1 +ontim 1 +past 1 +weak 1 +disconnect 1 +oper 1 +allow 1 +major 1 +meetingswith 1 +without 1 +manual 1 +intervent 1 +algorithm 1 +tell 1 +reserveth 1 +right 1 +blindli 1 +invit 1 +urgent 1 +make 1 +admin 1 +teresa 1 +check 1 +proposeif 1 +week 1 +avoid 1 +bother 1 +send 1 +realiz 1 +miss 1 +sent 1 +advanc 1 +overallschedul 1 +topic 1 +fascin 1 +bring 1 +effici 1 +advantag 1 +secretariesand 1 +aid 1 +camp 1 +header 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..f43a9cbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,24 @@ +goodman 1 +home 1 +page 1 +jame 1 +wisc 1 +professor 1 +comput 1 +sciencesdepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaresearch 1 +interest 1 +lot 1 +good 1 +stuff 1 +current 1 +project 1 +galileo 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..65a5de8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,338 @@ +greg 1 +sharp 1 +home 1 +pagegreg 1 +pagenam 1 +sharpemail 1 +wisc 1 +eduoffic 1 +offic 1 +phone 1 +hour 1 +appt 1 +tgif 1 +everi 1 +fridai 1 +dept 1 +section 1 +instructor 1 +fall 1 +lectur 1 +note 1 +spring 1 +notesclass 1 +topic 1 +databas 1 +manag 1 +system 1 +find 1 +aboutsearch 1 +engin 1 +altavista 1 +dejanew 1 +excit 1 +infoseek 1 +lyco 1 +metacrawl 1 +yahoo 1 +usenet 1 +mirror 1 +html 1 +format 1 +ohioc 1 +program 1 +learn 1 +todai 1 +librari 1 +draft 1 +standard 1 +april 1 +stanford 1 +cygnu 1 +info 1 +refer 1 +mumit 1 +newbi 1 +guideplatform 1 +independ 1 +portabl 1 +develop 1 +kit 1 +amulet 1 +dclap 1 +string 1 +attach 1 +requir 1 +motif 1 +suit 1 +wxwindow 1 +yacl 1 +class 1 +introduct 1 +comput 1 +geometri 1 +architectur 1 +project 1 +machin 1 +projectclass 1 +numer 1 +linear 1 +algebra 1 +theoret 1 +scienc 1 +vision 1 +imag 1 +homework 1 +projectmisc 1 +freewar 1 +sharewar 1 +cygwin 1 +softwar 1 +internet 1 +directori 1 +gimp 1 +harmonai 1 +harmoni 1 +unix 1 +client 1 +hyper 1 +browser 1 +vasc 1 +altern 1 +link 1 +video 1 +pic 1 +research 1 +rang 1 +shape 1 +shade 1 +give 1 +pretti 1 +cool 1 +idea 1 +specifi 1 +night 1 +japan 1 +jaida 1 +year 1 +worth 1 +atmospher 1 +data 1 +multiresolut 1 +seamless 1 +click 1 +zoom 1 +resolut 1 +view 1 +solar 1 +nice 1 +moon 1 +planet 1 +comet 1 +meteor 1 +wyom 1 +version 1 +also 1 +lot 1 +misc 1 +stuff 1 +like 1 +overhead 1 +shot 1 +antarctica 1 +satelit 1 +catalogu 1 +niae 1 +gothic 1 +electr 1 +postcard 1 +card 1 +rack 1 +select 1 +thank 1 +todd 1 +vistex 1 +textur 1 +databaseartifici 1 +gener 1 +primoridi 1 +soup 1 +kitchen 1 +math 1 +depart 1 +awesom 1 +site 1 +medic 1 +line 1 +histori 1 +medicin 1 +document 1 +pictur 1 +diagon 1 +dermatolog 1 +onlin 1 +atla 1 +base 1 +erlang 1 +germani 1 +enter 1 +diagnosi 1 +back 1 +orthopaed 1 +ecvnet 1 +list 1 +optic 1 +charact 1 +recognit 1 +handwrit 1 +page 1 +nici 1 +groupimag 1 +process 1 +washington 1 +state 1 +univers 1 +graphic 1 +raytrac 1 +rayshad 1 +utah 1 +raster 1 +toolkit 1 +radianc 1 +tracer 1 +radios 1 +packag 1 +avalon 1 +archiv 1 +object 1 +grimstead 1 +massiv 1 +trace 1 +dsite 1 +hardwar 1 +board 1 +intergraph 1 +lockhe 1 +glint 1 +chipset 1 +nvidia 1 +chipsetcomput 1 +geometeri 1 +center 1 +applic 1 +challeng 1 +geometrylispuseless 1 +pagescomput 1 +hennessi 1 +patterson 1 +resourc 1 +text 1 +superdlx 1 +parallel 1 +simul 1 +parl 1 +mexico 1 +univ 1 +includ 1 +washingt 1 +group 1 +index 1 +georgia 1 +tech 1 +groupjapanes 1 +guid 1 +japanes 1 +unvers 1 +monash 1 +infowav 1 +edict 1 +window 1 +english 1 +dictionari 1 +shodouka 1 +asiasoftinform 1 +retrev 1 +peregrin 1 +robot 1 +travers 1 +written 1 +perl 1 +trec 1 +retriev 1 +infomin 1 +gigabyt 1 +search 1 +textual 1 +provid 1 +experi 1 +thoma 1 +feedback 1 +linguist 1 +util 1 +repositori 1 +inform 1 +tool 1 +survei 1 +natur 1 +languag 1 +nist 1 +other_sw 1 +info_retriev 1 +world 1 +wide 1 +wander 1 +spider 1 +jedi 1 +might 1 +strictli 1 +relat 1 +hartlib 1 +paper 1 +latin 1 +stemmer 1 +multimedia 1 +academ 1 +cours 1 +storag 1 +new 1 +pointcast 1 +check 1 +custom 1 +portfolio 1 +automat 1 +updat 1 +literatur 1 +mark 1 +twainhumor 1 +apolog 1 +citizen 1 +offens 1 +threw 1 +garbag 1 +belong 1 +invest 1 +investorweb 1 +networth 1 +fundscap 1 +brill 1 +editori 1 +servic 1 +stockmastermutu 1 +fund 1 +brokerag 1 +hous 1 +fidel 1 +vanguard 1 +row 1 +price 1 +jack 1 +white 1 +compani 1 +schwab 1 +charl 1 +gabelli 1 +mutualsmisc 1 +psnuplast 1 +modifi 1 +sharpgreg 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..40ce5899 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar 1 +gopalsridhar 1 +gopalgsri 1 +wisc 1 +edubon 1 +marrow 1 +pageresumest 1 +wisconsin 1 +pagecalvin 1 +hobbesbookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..80e70c0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,58 @@ +guangshun 1 +home 1 +page 1 +welcom 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +dayton 1 +univers 1 +wisconsin 1 +madison 1 +phone 1 +offic 1 +educ 1 +california 1 +state 1 +angel 1 +peke 1 +physic 1 +grade 1 +research 1 +interest 1 +databas 1 +manag 1 +system 1 +advis 1 +raghu 1 +ramakrishnan 1 +miron 1 +livni 1 +project 1 +data 1 +analysi 1 +famili 1 +medicin 1 +devis 1 +explor 1 +visual 1 +environ 1 +class 1 +link 1 +stuff 1 +relat 1 +career 1 +plan 1 +chines 1 +miscellani 1 +send 1 +email 1 +around 1 +weather 1 +forecast 1 +visitor 1 +sinc 1 +june 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..4bcf09e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,44 @@ +guhan 1 +viswanathan 1 +home 1 +page 1 +gviswana 1 +wisc 1 +graduat 1 +studentdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +laru 1 +thesi 1 +research 1 +focus 1 +design 1 +implement 1 +data 1 +parallellanguag 1 +involv 1 +base 1 +develop 1 +local 1 +compil 1 +target 1 +investig 1 +parallelappl 1 +execut 1 +effici 1 +hand 1 +code 1 +parallelprogram 1 +amor 1 +detail 1 +summari 1 +list 1 +public 1 +us 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..074c5653 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,73 @@ +harit 1 +home 1 +page 1 +graduat 1 +student 1 +univers 1 +wisconsin 1 +madison 1 +depart 1 +comput 1 +scienc 1 +would 1 +like 1 +list 1 +classmat 1 +cours 1 +take 1 +fall 1 +databas 1 +manag 1 +system 1 +prof 1 +raghu 1 +ramakrishnan 1 +advanc 1 +architectur 1 +mark 1 +hill 1 +spring 1 +jame 1 +goodman 1 +undergradu 1 +world 1 +famou 1 +mvsr 1 +engin 1 +colleg 1 +osmania 1 +hyderabad 1 +india 1 +meet 1 +draw 1 +line 1 +thing 1 +interest 1 +indian 1 +newspap 1 +stuff 1 +sport 1 +sastri 1 +link 1 +roommat 1 +saeed 1 +mirza 1 +murthi 1 +zubber 1 +dust 1 +photo 1 +photograph 1 +warn 1 +click 1 +year 1 +folk 1 +access 1 +time 1 +sinc 1 +sept 1 +counter 1 +courtesi 1 +electron 1 +mail 1 +address 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..164b7dfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,43 @@ +rebecca 1 +hasti 1 +home 1 +page 1 +graduat 1 +student 1 +research 1 +assistantcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +offic 1 +mail 1 +wisc 1 +edutelephon 1 +telephon 1 +dept 1 +first 1 +java 1 +applet 1 +click 1 +fall 1 +schedul 1 +engr 1 +noland 1 +seminar 1 +comput 1 +univers 1 +mathemat 1 +carleton 1 +colleg 1 +interest 1 +program 1 +languag 1 +basketbal 1 +volleybal 1 +softbal 1 +linkag 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..6cd5c7e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,108 @@ +home 1 +page 1 +chad 1 +lane 1 +academ 1 +person 1 +info 1 +neat 1 +stuff 1 +dept 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +phone 1 +mail 1 +offic 1 +address 1 +hour 1 +wisc 1 +welcomethank 1 +stop 1 +hope 1 +enjoi 1 +bestbet 1 +link 1 +section 1 +biggest 1 +new 1 +life 1 +right 1 +get 1 +marri 1 +onmai 1 +nichol 1 +final 1 +want 1 +tell 1 +good 1 +luck 1 +count 1 +fall 1 +cours 1 +inform 1 +retriev 1 +technolog 1 +seek 1 +databas 1 +manag 1 +system 1 +ling 1 +audit 1 +advanc 1 +semant 1 +research 1 +interest 1 +linguist 1 +discours 1 +process 1 +us 1 +advic 1 +barwis 1 +epigram 1 +program 1 +alan 1 +perli 1 +educ 1 +mathemat 1 +minor 1 +philosophi 1 +laud 1 +truman 1 +state 1 +formerli 1 +northeast 1 +missouri 1 +expect 1 +stand 1 +accord 1 +truli 1 +click 1 +imag 1 +cyber 1 +poop 1 +creation 1 +unabash 1 +brother 1 +bart 1 +arthur 1 +download 1 +claud 1 +psychot 1 +talk 1 +rais 1 +plant 1 +internet 1 +deep 1 +thought 1 +jack 1 +handi 1 +reload 1 +differ 1 +on 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..69f3b5f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,103 @@ +susan 1 +hert 1 +home 1 +page 1 +wisc 1 +research 1 +assist 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +dayton 1 +telephon 1 +curriculum 1 +vita 1 +postscript 1 +interest 1 +public 1 +softwar 1 +link 1 +appli 1 +experiment 1 +geometri 1 +analysi 1 +algorithm 1 +design 1 +motion 1 +plan 1 +graphic 1 +geometr 1 +advisor 1 +vladimir 1 +lumelski 1 +current 1 +work 1 +robot 1 +develop 1 +alogirthm 1 +multipl 1 +common 1 +environ 1 +select 1 +deform 1 +curv 1 +plane 1 +tether 1 +extend 1 +abstract 1 +paper 1 +appear 1 +proceed 1 +canadian 1 +confer 1 +august 1 +planar 1 +rout 1 +intern 1 +journal 1 +applic 1 +ti 1 +bind 1 +autonom 1 +system 1 +version 1 +publish 1 +proc 1 +ieee 1 +autom 1 +sanjai 1 +tiwari 1 +terrain 1 +cover 1 +special 1 +issu 1 +underwat 1 +move 1 +arbitrari 1 +configur 1 +intellig 1 +reznik 1 +simul 1 +librari 1 +basi 1 +anim 1 +program 1 +technic 1 +report 1 +laboratori 1 +juli 1 +educ 1 +book 1 +refer 1 +shelf 1 +congress 1 +line 1 +travel 1 +samantha 1 +cook 1 +epicuri 1 +veggi 1 +unit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..ccf357a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,49 @@ +kirk 1 +hogenson 1 +graduat 1 +studentcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +offic 1 +mail 1 +wisc 1 +edutelephon 1 +student 1 +depart 1 +section 1 +hour 1 +tue 1 +also 1 +look 1 +myschedul 1 +none 1 +workout 1 +tryto 1 +appoint 1 +time 1 +finger 1 +send 1 +visit 1 +ghana 1 +countri 1 +serv 1 +peac 1 +corp 1 +usernam 1 +check 1 +pnhp 1 +group 1 +page 1 +maintain 1 +wife 1 +eilun 1 +experi 1 +counter 1 +sai 1 +accessedtim 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..700265a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,12 @@ +jeffrei 1 +horn 1 +swanton 1 +road 1 +madison 1 +wisconsin 1 +phone 1 +email 1 +wisc 1 +wise 1 +linear 1 +familyemploymenteducationresearchgenealog 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..110905e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,148 @@ +susan 1 +horwitzsusan 1 +horwitzprofessorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +horwitz 1 +wisc 1 +telephon 1 +secretari 1 +depart 1 +cornel 1 +univers 1 +research 1 +interest 1 +languag 1 +base 1 +program 1 +environmentsprogram 1 +slice 1 +differenc 1 +mergingstat 1 +analysi 1 +programsinterprocedur 1 +dataflow 1 +analysisresearch 1 +summarymi 1 +work 1 +mainli 1 +involv 1 +design 1 +implementationof 1 +tool 1 +help 1 +programm 1 +problem 1 +like 1 +understand 1 +exist 1 +would 1 +affectedbi 1 +propos 1 +modif 1 +textual 1 +structur 1 +semant 1 +differ 1 +betweentwo 1 +version 1 +retest 1 +chang 1 +combin 1 +piec 1 +produc 1 +certainsemant 1 +guarante 1 +represent 1 +call 1 +theprogram 1 +depend 1 +graph 1 +oper 1 +also 1 +algorithm 1 +precis 1 +interproceduraldataflow 1 +previou 1 +interprocedur 1 +concentratedeith 1 +effici 1 +specif 1 +individu 1 +necessarili 1 +gener 1 +class 1 +thoma 1 +rep 1 +mooli 1 +sagiv 1 +develop 1 +implement 1 +newalgorithm 1 +appli 1 +larg 1 +recent 1 +publicationsm 1 +shapiro 1 +fast 1 +accur 1 +flow 1 +insensit 1 +point 1 +appear 1 +confer 1 +record 1 +twenti 1 +fourth 1 +symposium 1 +onprincipl 1 +pari 1 +franc 1 +januari 1 +demand 1 +proceed 1 +sigsoft 1 +foundat 1 +softwareengin 1 +washington 1 +octob 1 +applic 1 +constantpropag 1 +sixth 1 +intern 1 +joint 1 +theoryand 1 +practic 1 +softwar 1 +aarhu 1 +denmark 1 +reachabl 1 +second 1 +principlesof 1 +francisco 1 +bate 1 +increment 1 +test 1 +us 1 +twentieth 1 +charleston 1 +engin 1 +fourteenth 1 +conferenceon 1 +melbourn 1 +australia 1 +identifi 1 +aprogram 1 +sigplan 1 +languagedesign 1 +white 1 +plain 1 +june 1 +teach 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..32008726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid 1 +pagenam 1 +sidnei 1 +hummertoffic 1 +phone 1 +offic 1 +email 1 +hummert 1 +wisc 1 +edua 1 +postscript 1 +version 1 +resum 1 +pictur 1 +click 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..999676d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,46 @@ +igor 1 +ivanisev 1 +home 1 +pageigorivanisev 1 +work 1 +newest 1 +project 1 +disclaim 1 +alien 1 +speak 1 +particular 1 +needless 1 +page 1 +construct 1 +ever 1 +feel 1 +like 1 +actual 1 +alreadi 1 +link 1 +mail 1 +research 1 +interest 1 +robot 1 +vision 1 +stuff 1 +generalgradu 1 +slave 1 +univers 1 +wisconsin 1 +departmentwa 1 +undergrad 1 +drake 1 +math 1 +departmentaddress 1 +comput 1 +scienc 1 +departmentunivers 1 +west 1 +dayton 1 +streetmadison 1 +offic 1 +phone 1 +iigor 1 +wisc 1 +eduiigor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..e9f8abef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..6e175f1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,37 @@ +sharenow 1 +home 1 +page 1 +wisc 1 +teach 1 +assist 1 +peterson 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madisonmadison 1 +offic 1 +sciencestelephon 1 +hour 1 +section 1 +tuesdai 1 +thursdai 1 +pmsection 1 +meet 1 +pmboth 1 +class 1 +room 1 +sciencesc 1 +announcementshandoutsmoth 1 +jone 1 +profil 1 +recreat 1 +site 1 +pleas 1 +send 1 +email 1 +comment 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..de542236 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,44 @@ +basneyjim 1 +basneygradu 1 +student 1 +research 1 +assistantcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +email 1 +jbasnei 1 +wisc 1 +eduoffic 1 +comput 1 +statisticsoffic 1 +phone 1 +interest 1 +area 1 +oper 1 +system 1 +andnetwork 1 +current 1 +work 1 +condor 1 +directionof 1 +prof 1 +miron 1 +livni 1 +receiv 1 +fromoberlin 1 +colleg 1 +english 1 +webpag 1 +oberlin 1 +resum 1 +codefrom 1 +previou 1 +project 1 +avail 1 +onlin 1 +last 1 +modifi 1 +basnei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..9289d55f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,83 @@ +jerel 1 +mackai 1 +home 1 +pagejerel 1 +assist 1 +research 1 +comput 1 +scienc 1 +special 1 +databas 1 +oper 1 +system 1 +work 1 +fulltim 1 +univers 1 +wisconsin 1 +madison 1 +depart 1 +respons 1 +includ 1 +develop 1 +support 1 +sybas 1 +ingr 1 +instal 1 +backup 1 +softwar 1 +train 1 +student 1 +hourli 1 +plai 1 +electr 1 +guitar 1 +thrash 1 +metal 1 +specialti 1 +also 1 +violin 1 +classic 1 +baroqu 1 +mainli 1 +seen 1 +error 1 +evil 1 +wai 1 +click 1 +shock 1 +case 1 +didn 1 +believ 1 +ey 1 +first 1 +time 1 +like 1 +record 1 +mostli 1 +funni 1 +cover 1 +stuff 1 +abba 1 +metallica 1 +origin 1 +soon 1 +abl 1 +sampl 1 +hit 1 +watch 1 +favorit 1 +show 1 +raquetbal 1 +golf 1 +shoot 1 +pool 1 +stand 1 +around 1 +towel 1 +yeah 1 +know 1 +much 1 +finger 1 +jerellast 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..ceb03a0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home 1 +page 1 +johan 1 +larson 1 +homepag 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..81e71714 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,131 @@ +home 1 +page 1 +relief 1 +happi 1 +java 1 +good 1 +censorship 1 +realli 1 +mail 1 +jherro 1 +wisc 1 +note 1 +class 1 +made 1 +relax 1 +let 1 +would 1 +cool 1 +thing 1 +apictur 1 +pictur 1 +girlfriend 1 +half 1 +year 1 +afraid 1 +date 1 +though 1 +almost 1 +mani 1 +tortur 1 +disembody 1 +becam 1 +float 1 +head 1 +death 1 +directori 1 +anim 1 +frame 1 +seri 1 +jpeg 1 +format 1 +default 1 +name 1 +number 1 +start 1 +millisecond 1 +paus 1 +imag 1 +overriden 1 +repeat 1 +sequenc 1 +explicit 1 +order 1 +stuff 1 +vital 1 +inform 1 +aquir 1 +nicknam 1 +like 1 +take 1 +time 1 +apolog 1 +lame 1 +pleas 1 +bear 1 +pretti 1 +jack 1 +skellington 1 +kermit 1 +frog 1 +interest 1 +someth 1 +els 1 +neat 1 +link 1 +friend 1 +homepag 1 +write 1 +haiku 1 +said 1 +thath 1 +go 1 +click 1 +roomat 1 +mine 1 +grad 1 +notr 1 +dame 1 +memori 1 +forgotten 1 +cult 1 +hippothi 1 +exploratori 1 +intervent 1 +chaotic 1 +exist 1 +realiti 1 +follow 1 +enjoi 1 +benefit 1 +matriarch 1 +societi 1 +join 1 +todai 1 +exclus 1 +club 1 +hierarchi 1 +rule 1 +semi 1 +yahooooooooooooo 1 +work 1 +contain 1 +free 1 +softwar 1 +shack 1 +bazillion 1 +search 1 +engin 1 +mpeg 1 +movi 1 +archiv 1 +great 1 +muppet 1 +sound 1 +rachel 1 +want 1 +select 1 +cano 1 +trip 1 +look 1 +bout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..81b23675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,130 @@ +bing 1 +home 1 +page 1 +index 1 +gener 1 +inform 1 +educ 1 +advisor 1 +research 1 +interest 1 +project 1 +public 1 +pointer 1 +hobbi 1 +informationresearch 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +jieb 1 +wisc 1 +edueduc 1 +scienc 1 +univers 1 +prof 1 +david 1 +dewittresearch 1 +tertiari 1 +storag 1 +support 1 +dbm 1 +parallel 1 +databas 1 +manag 1 +system 1 +object 1 +orient 1 +geograph 1 +systemsresearch 1 +paradis 1 +shorepublicationsbuild 1 +scaleabl 1 +spatial 1 +technolog 1 +implment 1 +evalu 1 +patel 1 +kabra 1 +tuft 1 +burger 1 +hall 1 +ramasami 1 +lueder 1 +ellman 1 +kupsch 1 +dewitt 1 +naughton 1 +submit 1 +octob 1 +queri 1 +execut 1 +batch 1 +prong 1 +approach 1 +effici 1 +process 1 +tape 1 +resid 1 +data 1 +set 1 +satellit 1 +imag 1 +studi 1 +impact 1 +tile 1 +size 1 +perform 1 +appear 1 +nasa 1 +goddard 1 +conferenceon 1 +mass 1 +septemb 1 +us 1 +constraint 1 +tree 1 +goldstein 1 +ramakrishnan 1 +shaft 1 +shorter 1 +version 1 +workshop 1 +februari 1 +client 1 +server 1 +proceed 1 +larg 1 +base 1 +confer 1 +santiago 1 +chile 1 +reclam 1 +reorgan 1 +serverpersist 1 +store 1 +yong 1 +ieee 1 +engin 1 +houston 1 +eosdi 1 +sigmod 1 +grouphobbi 1 +tenni 1 +volleybal 1 +volleyballweb 1 +white 1 +water 1 +raft 1 +whitewat 1 +find 1 +pictur 1 +click 1 +full 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..b223abd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,58 @@ +jignesh 1 +home 1 +page 1 +patel 1 +wisc 1 +welcom 1 +research 1 +assist 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +telephon 1 +advisor 1 +david 1 +dewitt 1 +interest 1 +system 1 +parallel 1 +databas 1 +object 1 +relat 1 +current 1 +work 1 +paradis 1 +project 1 +public 1 +client 1 +server 1 +paper 1 +publish 1 +vldb 1 +partit 1 +base 1 +spatial 1 +merg 1 +join 1 +sigmod 1 +accur 1 +model 1 +hybrid 1 +hash 1 +algorithm 1 +sigmetr 1 +miscellan 1 +stuff 1 +virtual 1 +tourist 1 +inlin 1 +skate 1 +madhuri 1 +kashmir 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..49adfdbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,10 @@ +georg 1 +varghes 1 +peopl 1 +download 1 +netscap 1 +page 1 +click 1 +warn 1 +pretti 1 +lame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..15b13e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,76 @@ +johann 1 +gehrk 1 +homepagejohann 1 +gehrkewelcom 1 +intern 1 +graduat 1 +studentat 1 +comput 1 +sciencesdepart 1 +univers 1 +ofwisconsin 1 +madison 1 +area 1 +interest 1 +databasemanag 1 +system 1 +work 1 +data 1 +mine 1 +underprofessor 1 +raghuramakrishnan 1 +page 1 +construct 1 +contact 1 +inform 1 +public 1 +linkscontact 1 +email 1 +utexa 1 +offic 1 +wisconsin 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +room 1 +home 1 +eagl 1 +height 1 +stoica 1 +abdel 1 +wahab 1 +jeffai 1 +baruah 1 +plaxton 1 +proport 1 +share 1 +resourc 1 +alloc 1 +algorithmfor 1 +real 1 +time 1 +proceed 1 +ieee 1 +symposium 1 +washington 1 +decemb 1 +appear 1 +anexpand 1 +version 1 +fastschedul 1 +period 1 +task 1 +multipl 1 +inproceed 1 +parallel 1 +processingsymposium 1 +april 1 +expand 1 +avail 1 +technicalreport 1 +universityof 1 +texa 1 +austin 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..fc4692c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,71 @@ +home 1 +pagewelcom 1 +page 1 +first 1 +year 1 +graduat 1 +student 1 +univers 1 +wisconsin 1 +madison 1 +studi 1 +comput 1 +scienc 1 +us 1 +also 1 +maintain 1 +frequent 1 +ask 1 +question 1 +list 1 +latest 1 +powerbook 1 +model 1 +releas 1 +appl 1 +thing 1 +look 1 +section 1 +click 1 +herei 1 +amass 1 +good 1 +site 1 +number 1 +catagori 1 +check 1 +depart 1 +alma 1 +mater 1 +visit 1 +often 1 +needsth 1 +nando 1 +time 1 +great 1 +new 1 +coverageth 1 +spot 1 +mind 1 +numb 1 +soap 1 +operaish 1 +drivelziffnet 1 +industri 1 +newsc 1 +databas 1 +manag 1 +system 1 +construct 1 +compil 1 +keep 1 +classworktodai 1 +dilbert 1 +chucklejon 1 +bodner 1 +jonb 1 +wisc 1 +mound 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..c34adf90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,85 @@ +welcom 1 +home 1 +page 1 +first 1 +year 1 +graduat 1 +student 1 +depart 1 +came 1 +frombeij 1 +chines 1 +academi 1 +scienc 1 +china 1 +hometown 1 +nanj 1 +capitol 1 +jiangsu 1 +provinc 1 +degre 1 +univers 1 +wisconsin 1 +madison 1 +institut 1 +autom 1 +beij 1 +specil 1 +pattern 1 +recognit 1 +imag 1 +process 1 +biomed 1 +engin 1 +southeast 1 +chinacurr 1 +activ 1 +cours 1 +advanc 1 +comput 1 +network 1 +topic 1 +databas 1 +manag 1 +oper 1 +system 1 +teach 1 +assist 1 +data 1 +structur 1 +current 1 +address 1 +spring 1 +work 1 +west 1 +dayton 1 +street 1 +tele 1 +offic 1 +could 1 +finger 1 +wisc 1 +refer 1 +inform 1 +class 1 +technic 1 +stuffjava 1 +placeshor 1 +tutorialchina 1 +affairchina 1 +democracybeij 1 +place 1 +interest 1 +stanford 1 +groupstanford 1 +medic 1 +informaticsmit 1 +commun 1 +control 1 +signal 1 +processingjob 1 +site 1 +newsyou 1 +visitor 1 +number 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..897d3305 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,79 @@ +jeff 1 +shabel 1 +home 1 +pagech 1 +welcom 1 +page 1 +wisconsinch 1 +theme 1 +song 1 +offic 1 +hour 1 +tue 1 +thur 1 +appoint 1 +person 1 +informationmajor 1 +comput 1 +scienc 1 +architectur 1 +emphasi 1 +statu 1 +second 1 +year 1 +graduat 1 +student 1 +view 1 +fall 1 +schedul 1 +academ 1 +background 1 +receiv 1 +engin 1 +diego 1 +electr 1 +depart 1 +town 1 +cupertino 1 +jose 1 +high 1 +school 1 +monta 1 +vista 1 +plan 1 +favorit 1 +sport 1 +team 1 +golden 1 +state 1 +warrior 1 +basketbal 1 +mercuri 1 +new 1 +andnando 1 +shark 1 +hockei 1 +francisco 1 +footbal 1 +oakland 1 +link 1 +newsmus 1 +columbia 1 +hous 1 +find 1 +join 1 +deal 1 +also 1 +tip 1 +info 1 +music 1 +club 1 +miscellan 1 +print 1 +postscript 1 +document 1 +window 1 +send 1 +mail 1 +jshabel 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..b24299c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,193 @@ +jussi 1 +myllymakijussi 1 +myllymaki 1 +research 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +madison 1 +telephon 1 +email 1 +wisc 1 +eduresearch 1 +summaryi 1 +interest 1 +perform 1 +analysi 1 +dbm 1 +oper 1 +onadvanc 1 +tape 1 +disk 1 +technolog 1 +includ 1 +arrai 1 +mcurrent 1 +studi 1 +buffer 1 +larg 1 +dataset 1 +tertiari 1 +storageto 1 +memori 1 +data 1 +us 1 +explor 1 +andvisu 1 +deviseproject 1 +advisor 1 +prof 1 +mironlivni 1 +recent 1 +work 1 +improv 1 +relat 1 +joinsof 1 +volum 1 +resid 1 +public 1 +listbelow 1 +appli 1 +structur 1 +organ 1 +solv 1 +problem 1 +associ 1 +divers 1 +characterist 1 +andfunct 1 +limit 1 +media 1 +paper 1 +datavisu 1 +discuss 1 +metadata 1 +managementissu 1 +complex 1 +set 1 +involv 1 +refere 1 +publicationseffici 1 +concurr 1 +andtap 1 +miron 1 +livni 1 +proceed 1 +intern 1 +confer 1 +theori 1 +measur 1 +evalu 1 +commun 1 +system 1 +octob 1 +integr 1 +visual 1 +parallel 1 +programperform 1 +karen 1 +karavan 1 +bartonp 1 +miller 1 +third 1 +workshop 1 +environ 1 +andtool 1 +scientif 1 +august 1 +tertiarystorag 1 +daniel 1 +ford 1 +theintern 1 +engin 1 +februari 1 +alsoavail 1 +almaden 1 +report 1 +withmiron 1 +raghu 1 +ramakrishnan 1 +spie 1 +societi 1 +optic 1 +januari 1 +join 1 +synchron 1 +access 1 +acmsigmetr 1 +submit 1 +publicationdevis 1 +queri 1 +beyer 1 +chen 1 +donjerkov 1 +lawand 1 +wenger 1 +sigmod 1 +storag 1 +andmiron 1 +dataengin 1 +publicationsdisk 1 +tapeaccess 1 +technic 1 +project 1 +master 1 +degreeproject 1 +client 1 +server 1 +model 1 +networkarchitectur 1 +thesi 1 +helsinki 1 +industri 1 +manag 1 +finnish 1 +documentsimplement 1 +treealgorithm 1 +jeff 1 +schwarz 1 +yoav 1 +weiss 1 +class 1 +experi 1 +implement 1 +filesystem 1 +trishul 1 +chilimbi 1 +overview 1 +current 1 +productsoverview 1 +raid 1 +supplier 1 +productssom 1 +frequent 1 +need 1 +link 1 +unifi 1 +search 1 +adaptec 1 +scsi 1 +adapt 1 +home 1 +digit 1 +alpha 1 +workstationsandpcsandtechn 1 +journaland 1 +whitepap 1 +researchandcyberjourn 1 +quantum 1 +linear 1 +tapeanddlt 1 +faqandwhitepap 1 +solarisandsparcstationsandtechn 1 +faqandstorag 1 +faqand 1 +otherusenet 1 +faqsmani 1 +found 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..e8e7b2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag 1 +jyothithi 1 +page 1 +construct 1 +info 1 +student 1 +cours 1 +grade 1 +other 1 +sorri 1 +dissappoint 1 +email 1 +jyothi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..42b7abfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,88 @@ +karen 1 +karavaniceveryth 1 +need 1 +know 1 +learn 1 +public 1 +school 1 +karavanicresearch 1 +assist 1 +paradyn 1 +parallel 1 +perform 1 +tool 1 +project 1 +univers 1 +wisconsin 1 +madison 1 +comput 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +karavan 1 +wisc 1 +current 1 +pursu 1 +research 1 +interest 1 +includ 1 +environ 1 +autom 1 +tune 1 +process 1 +oper 1 +system 1 +databasesask 1 +women 1 +wic 1 +frontier 1 +cool 1 +program 1 +dane 1 +counti 1 +high 1 +studentstrio 1 +student 1 +support 1 +servic 1 +free 1 +tutor 1 +undergradu 1 +miss 1 +site 1 +page 1 +could 1 +save 1 +life 1 +safer 1 +pagefor 1 +chocol 1 +lover 1 +onlystuyves 1 +alumni 1 +associationstuyves 1 +class 1 +thoma 1 +legisl 1 +inform 1 +internetth 1 +constitut 1 +cure 1 +anyth 1 +salt 1 +water 1 +sweat 1 +tear 1 +isak 1 +dinesen 1 +ship 1 +port 1 +safe 1 +sail 1 +thing 1 +admir 1 +grace 1 +hopper 1 +pioneer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..4844c95f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,109 @@ +stefano 1 +kaxira 1 +home 1 +page 1 +wisc 1 +editor 1 +ieee 1 +kiloprocessor 1 +extens 1 +sciresearch 1 +assist 1 +univers 1 +wisconsin 1 +research 1 +interest 1 +summari 1 +recent 1 +publicationsresearch 1 +share 1 +memori 1 +multiprocess 1 +scalabl 1 +coher 1 +interfac 1 +cach 1 +design 1 +aspect 1 +parallel 1 +processor 1 +architectur 1 +galileo 1 +introduc 1 +glow 1 +hierarch 1 +collaborationwith 1 +goodman 1 +work 1 +examin 1 +depth 1 +option 1 +develop 1 +upcom 1 +standard 1 +incolabor 1 +david 1 +jame 1 +stein 1 +gjess 1 +public 1 +protocol 1 +wide 1 +data 1 +goodmanto 1 +appear 1 +proceed 1 +intern 1 +confer 1 +supercomput 1 +also 1 +technic 1 +report 1 +kaxirasto 1 +process 1 +symposium 1 +april 1 +implement 1 +perform 1 +wind 1 +tunnel 1 +goodmannd 1 +workshop 1 +base 1 +high 1 +cost 1 +comput 1 +march 1 +goodmanst 1 +august 1 +kaxirasunivers 1 +scienc 1 +dept 1 +juli 1 +softwar 1 +tool 1 +simul 1 +prototyp 1 +monitor 1 +multiprocessor 1 +system 1 +stafylopati 1 +papakonstantin 1 +kaxirasinform 1 +technolog 1 +autom 1 +synthesi 1 +dedic 1 +us 1 +prolog 1 +specif 1 +tsanaka 1 +pekmestzi 1 +kaxirasp 1 +greec 1 +hardwar 1 +methodolog 1 +kaxirasmicroprocess 1 +microprogram 1 +north 1 +holland 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..561955b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,251 @@ +keeper 1 +home 1 +page 1 +steven 1 +foughtthei 1 +heaven 1 +like 1 +perfect 1 +littl 1 +world 1 +doesn 1 +realli 1 +need 1 +everyth 1 +made 1 +light 1 +lauri 1 +anderson 1 +strang 1 +angel 1 +note 1 +possibl 1 +make 1 +structur 1 +markup 1 +indic 1 +piec 1 +inform 1 +rather 1 +look 1 +access 1 +peopl 1 +bitmap 1 +displai 1 +includ 1 +theblind 1 +us 1 +standard 1 +tag 1 +allow 1 +whateverbrows 1 +wish 1 +also 1 +literari 1 +convent 1 +ironi 1 +satir 1 +sarcasm 1 +butnoth 1 +contain 1 +herein 1 +meant 1 +offens 1 +areoffend 1 +probabl 1 +stupid 1 +start 1 +third 1 +year 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +firsttwo 1 +week 1 +sinc 1 +support 1 +take 1 +care 1 +varieti 1 +machin 1 +vari 1 +degre 1 +success 1 +side 1 +never 1 +publish 1 +ever 1 +done 1 +anyth 1 +impress 1 +miracl 1 +pass 1 +prelim 1 +research 1 +addup 1 +hill 1 +bean 1 +fizzl 1 +result 1 +areobtain 1 +leav 1 +junior 1 +level 1 +programm 1 +creatingkiosk 1 +front 1 +end 1 +visual 1 +primit 1 +system 1 +perman 1 +skill 1 +free 1 +surpris 1 +discov 1 +especi 1 +wasn 1 +convinc 1 +aliv 1 +thosewho 1 +know 1 +well 1 +would 1 +argu 1 +proof 1 +mybe 1 +anywai 1 +might 1 +want 1 +read 1 +thoughtson 1 +hedgehog 1 +contact 1 +electron 1 +mail 1 +pager 1 +reliabl 1 +specif 1 +locat 1 +often 1 +work 1 +ifyou 1 +person 1 +write 1 +someth 1 +address 1 +wisc 1 +becom 1 +clear 1 +wantto 1 +short 1 +notic 1 +give 1 +number 1 +variou 1 +creation 1 +thought 1 +todo 1 +list 1 +updat 1 +sporad 1 +associ 1 +rsum 1 +postscript 1 +document 1 +html 1 +section 1 +long 1 +outof 1 +date 1 +unfortun 1 +danenet 1 +dilhr 1 +jobnet 1 +databas 1 +soon 1 +defunct 1 +caltech 1 +project 1 +institut 1 +archiv 1 +photonet 1 +personnel 1 +directori 1 +much 1 +better 1 +insid 1 +anyon 1 +enter 1 +databaseus 1 +form 1 +interfac 1 +slide 1 +talk 1 +gave 1 +distribut 1 +object 1 +call 1 +java 1 +danger 1 +love 1 +come 1 +hell 1 +freez 1 +rate 1 +break 1 +hierarchi 1 +consult 1 +somewher 1 +fought 1 +unifi 1 +attribut 1 +index 1 +sfuai 1 +informationag 1 +intellectu 1 +properti 1 +assigna 1 +uniqu 1 +serial 1 +refer 1 +quot 1 +atth 1 +provid 1 +sourc 1 +contextu 1 +pointer 1 +relev 1 +bui 1 +adob 1 +distil 1 +translat 1 +rsuminto 1 +user 1 +chanc 1 +ofread 1 +suppos 1 +print 1 +pinch 1 +certaintruth 1 +psycholog 1 +softwar 1 +eventuallypick 1 +aren 1 +taught 1 +explicitli 1 +think 1 +possibleto 1 +easier 1 +cheap 1 +shot 1 +thing 1 +hate 1 +idea 1 +mull 1 +accessibleto 1 +small 1 +subset 1 +tough 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..103cf967 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,24 @@ +krishna 1 +kunchithapadamkrishna 1 +kunchithapadamgreet 1 +welcom 1 +page 1 +interest 1 +read 1 +languag 1 +indian 1 +classic 1 +music 1 +miscellaneouspubl 1 +data 1 +distribut 1 +perform 1 +steer 1 +toolsresum 1 +gzip 1 +postscript 1 +contact 1 +search 1 +last 1 +modifi 1 +bykk 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..305ee34c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,31 @@ +kristin 1 +home 1 +pagekristin 1 +tuft 1 +research 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +wisc 1 +eduadvisor 1 +david 1 +dewitt 1 +miscellani 1 +inform 1 +serveruw 1 +dbm 1 +groupacm 1 +sigmod 1 +server 1 +pageeo 1 +project 1 +officelast 1 +modifi 1 +tuftekristin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..de957a3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,47 @@ +krung 1 +homepageupd 1 +novemb 1 +homepag 1 +underconstructioni 1 +keep 1 +page 1 +short 1 +inform 1 +good 1 +serf 1 +year 1 +cometh 1 +follow 1 +relat 1 +topic 1 +research 1 +mathemat 1 +program 1 +project 1 +pursu 1 +cours 1 +work 1 +comput 1 +scienc 1 +depart 1 +compani 1 +favorit 1 +hobbi 1 +person 1 +opinion 1 +life 1 +madison 1 +wisconsin 1 +linkedth 1 +import 1 +link 1 +univers 1 +whole 1 +uniqu 1 +entiti 1 +electron 1 +librari 1 +system 1 +sinapiromsaran 1 +emailkrung 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..0d4a4c9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,137 @@ +kunen 1 +home 1 +page 1 +kenneth 1 +professormath 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +mail 1 +wisc 1 +edutelephon 1 +stanford 1 +interest 1 +autom 1 +deduct 1 +logic 1 +program 1 +theori 1 +topolog 1 +research 1 +summari 1 +work 1 +involv 1 +applic 1 +typic 1 +tool 1 +like 1 +resolutionto 1 +prove 1 +mathemat 1 +theorem 1 +studi 1 +semant 1 +languag 1 +likeprolog 1 +specif 1 +topic 1 +consid 1 +prologus 1 +negat 1 +failur 1 +incompat 1 +betweenleast 1 +fix 1 +point 1 +prolog 1 +style 1 +backtrack 1 +axiomat 1 +besid 1 +right 1 +thissubject 1 +relat 1 +variou 1 +abstract 1 +area 1 +theoret 1 +measur 1 +mani 1 +basic 1 +question 1 +turn 1 +independ 1 +usualaxiom 1 +select 1 +recent 1 +public 1 +follow 1 +postscript 1 +file 1 +shortest 1 +singl 1 +axiom 1 +group 1 +expon 1 +hart 1 +reason 1 +ramsei 1 +boyer 1 +moor 1 +appear 1 +mill 1 +corson 1 +compact 1 +space 1 +fundamenta 1 +mathematica 1 +local 1 +constant 1 +function 1 +answer 1 +liter 1 +technic 1 +report 1 +construct 1 +moufang 1 +quasigroup 1 +algebra 1 +loop 1 +associ 1 +law 1 +preprint 1 +structur 1 +conjugaci 1 +close 1 +complet 1 +result 1 +link 1 +resolut 1 +press 1 +weak 1 +extens 1 +rough 1 +draft 1 +book 1 +review 1 +note 1 +moschovaki 1 +american 1 +monthli 1 +cours 1 +taught 1 +fall 1 +math 1 +geometr 1 +infer 1 +foundat 1 +spring 1 +comp 1 +artifici 1 +intellig 1 +last 1 +chang 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..9cd75445 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,245 @@ +jame 1 +laru 1 +home 1 +page 1 +wisc 1 +associ 1 +professor 1 +comput 1 +sciencedepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usalaru 1 +eduphon 1 +secretari 1 +juli 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +offic 1 +educ 1 +research 1 +interest 1 +project 1 +upcom 1 +cours 1 +softwar 1 +recent 1 +paper 1 +graduat 1 +summari 1 +univers 1 +california 1 +berkelei 1 +harvard 1 +colleg 1 +program 1 +languagesand 1 +compil 1 +particular 1 +languag 1 +parallel 1 +machin 1 +design 1 +share 1 +memori 1 +symbol 1 +profil 1 +trace 1 +execut 1 +edit 1 +wind 1 +tunnel 1 +larg 1 +grain 1 +data 1 +librari 1 +structuresc 1 +java 1 +spim 1 +wartsrec 1 +paperseffici 1 +path 1 +thoma 1 +ball 1 +appear 1 +micro 1 +decemb 1 +programminglanguag 1 +brad 1 +richard 1 +guhan 1 +viswanathan 1 +gregori 1 +wilson 1 +us 1 +press 1 +teapot 1 +support 1 +write 1 +coher 1 +protocol 1 +satish 1 +chandra 1 +sigplan 1 +implement 1 +pldi 1 +instruct 1 +schedul 1 +eric 1 +schnarr 1 +andjam 1 +workshop 1 +system 1 +wcsss 1 +februari 1 +effici 1 +irregular 1 +applic 1 +distribut 1 +shubhendu 1 +mukherje 1 +shamik 1 +sharma 1 +mark 1 +hill 1 +annerog 1 +joel 1 +saltz 1 +fifth 1 +symposium 1 +principl 1 +practiceof 1 +ppopp 1 +independ 1 +confer 1 +languagesdesign 1 +june 1 +tempest 1 +substrat 1 +portabl 1 +david 1 +wood 1 +compcon 1 +spring 1 +march 1 +static 1 +branch 1 +frequenc 1 +analysi 1 +youfeng 1 +annual 1 +ieee 1 +intern 1 +microarchitectur 1 +novemb 1 +specif 1 +user 1 +level 1 +babak 1 +falsafi 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +ioanni 1 +schoina 1 +markhil 1 +ann 1 +roger 1 +supercomput 1 +time 1 +spent 1 +messag 1 +pass 1 +sixth 1 +architectur 1 +forprogram 1 +oper 1 +asplo 1 +octob 1 +fine 1 +access 1 +control 1 +jameslaru 1 +cachier 1 +tool 1 +automat 1 +insert 1 +cico 1 +annot 1 +trishul 1 +chilimbi 1 +icpp 1 +august 1 +bibliographi 1 +unpublish 1 +manuscript 1 +revis 1 +frequent 1 +cooper 1 +hardwar 1 +scalabl 1 +multiprocessor 1 +transact 1 +toc 1 +wart 1 +madhusudhan 1 +talluri 1 +new 1 +graduatesbrad 1 +first 1 +employ 1 +vassar 1 +septemb 1 +techniqu 1 +languagesfirst 1 +oracl 1 +lorenz 1 +huelsbergen 1 +dynam 1 +bell 1 +lab 1 +flow 1 +depend 1 +tball 1 +summarymi 1 +focus 1 +problem 1 +part 1 +thewisconsin 1 +havehelp 1 +develop 1 +hybrid 1 +computerarchitectur 1 +facilit 1 +parallelmachin 1 +current 1 +student 1 +demonstr 1 +exploit 1 +power 1 +coherencepolici 1 +also 1 +perform 1 +evalu 1 +help 1 +programmersunderstand 1 +improv 1 +andi 1 +algorithm 1 +provid 1 +moredetail 1 +understand 1 +within 1 +routin 1 +hasidentifi 1 +possibl 1 +better 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..68106d8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick 1 +pagenick 1 +pageoffic 1 +phone 1 +email 1 +leavi 1 +wisc 1 +eduoffic 1 +hour 1 +tuesdai 1 +wednessdai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..94aa510b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,77 @@ +steven 1 +huss 1 +lederman 1 +home 1 +page 1 +research 1 +interest 1 +includ 1 +univ 1 +wisconsin 1 +madison 1 +relat 1 +thewisconsin 1 +wind 1 +tunnel 1 +project 1 +area 1 +parallel 1 +linear 1 +algebra 1 +iscov 1 +prismproject 1 +also 1 +heavili 1 +invol 1 +mpistandard 1 +sever 1 +other 1 +recent 1 +publish 1 +book 1 +origin 1 +inform 1 +order 1 +press 1 +isbn 1 +look 1 +complet 1 +refer 1 +editor 1 +current 1 +draft 1 +forum 1 +pleas 1 +keep 1 +mind 1 +work 1 +ongo 1 +andit 1 +document 1 +intend 1 +ongoingwork 1 +committe 1 +member 1 +compress 1 +postscript 1 +sourc 1 +compressedtar 1 +file 1 +individu 1 +avail 1 +would 1 +finger 1 +comput 1 +scienc 1 +dept 1 +dayton 1 +phone 1 +messag 1 +desper 1 +mail 1 +wisc 1 +http 1 +html 1 +offic 1 +statist 1 +build 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..5b6df471 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,84 @@ +home 1 +page 1 +lawrenc 1 +landweb 1 +professor 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +purdu 1 +interest 1 +network 1 +protocol 1 +high 1 +speed 1 +electronicmail 1 +research 1 +summari 1 +program 1 +focus 1 +participatingin 1 +gigabit 1 +project 1 +darpa 1 +nation 1 +involvesth 1 +design 1 +implement 1 +testb 1 +oper 1 +atgigabit 1 +second 1 +data 1 +rate 1 +work 1 +onissu 1 +congest 1 +admiss 1 +control 1 +visualizationof 1 +atmospher 1 +phenomena 1 +virtual 1 +conferenc 1 +sampl 1 +recent 1 +public 1 +fast 1 +circuit 1 +establishmentmethod 1 +olsen 1 +proceed 1 +theieee 1 +infocom 1 +confer 1 +francisco 1 +april 1 +dynam 1 +time 1 +window 1 +packet 1 +feedback 1 +witht 1 +faber 1 +mukherje 1 +sigcommconfer 1 +baltimor 1 +august 1 +gener 1 +clock 1 +combin 1 +close 1 +loop 1 +open 1 +ieee 1 +florenc 1 +coursesconnect 1 +tabl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..b4c7053b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,49 @@ +shannon 1 +lloyd 1 +home 1 +page 1 +work 1 +address 1 +univers 1 +wisconsin 1 +madison 1 +comput 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +wisc 1 +respons 1 +us 1 +lectur 1 +section 1 +comp 1 +offic 1 +hour 1 +wednesdai 1 +thursdai 1 +appoint 1 +fall 1 +cours 1 +construct 1 +compil 1 +linguist 1 +variou 1 +link 1 +women 1 +utah 1 +chemistri 1 +person 1 +engin 1 +career 1 +servic 1 +languag 1 +archiv 1 +natur 1 +process 1 +artifici 1 +intellig 1 +cognit 1 +xsoft 1 +lexdemo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..0ea1ebdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,193 @@ +christoph 1 +luka 1 +home 1 +pagechristoph 1 +lukasrelev 1 +inform 1 +offic 1 +phone 1 +email 1 +wisc 1 +edui 1 +appar 1 +coordin 1 +mspl 1 +fall 1 +workshipi 1 +also 1 +defeat 1 +rival 1 +quest 1 +becom 1 +semest 1 +sunivers 1 +wisconsin 1 +program 1 +languag 1 +seminar 1 +czar 1 +cours 1 +go 1 +take 1 +festiv 1 +java 1 +taught 1 +advisor 1 +tuft 1 +univers 1 +mail 1 +list 1 +click 1 +site 1 +entertain 1 +save 1 +tiger 1 +number 1 +free 1 +prisonerthi 1 +stock 1 +quoteserv 1 +maintain 1 +fabul 1 +wealth 1 +todd 1 +amus 1 +page 1 +check 1 +friend 1 +cool 1 +thing 1 +pagebet 1 +polit 1 +candid 1 +legal 1 +iowa 1 +electron 1 +market 1 +identitycaptain 1 +kirk 1 +sing 1 +lone 1 +gui 1 +troubl 1 +meet 1 +women 1 +throughamaz 1 +technolog 1 +longer 1 +need 1 +concern 1 +withtri 1 +interact 1 +real 1 +virtual 1 +girlfriend 1 +traci 1 +teri 1 +wait 1 +wife 1 +incred 1 +jump 1 +catthi 1 +realli 1 +fill 1 +anywai 1 +like 1 +know 1 +well 1 +pleas 1 +send 1 +option 1 +case 1 +feel 1 +someth 1 +current 1 +name 1 +address 1 +favorit 1 +appli 1 +killer 1 +buttmunchextrem 1 +dudemichael 1 +nesmith 1 +fanfoolmyth 1 +figurewick 1 +good 1 +basketbal 1 +playervalu 1 +studentment 1 +defectivea 1 +wkrp 1 +cincinatti 1 +tragic 1 +figuregeek 1 +tradesgonzo 1 +admirernetscap 1 +junki 1 +search 1 +child 1 +pornpersonifi 1 +organ 1 +condom 1 +stretch 1 +much 1 +readi 1 +blowflam 1 +testicl 1 +outer 1 +space 1 +tast 1 +goodpoetri 1 +guruhogwildthi 1 +kick 1 +assman 1 +manbig 1 +dudeuh 1 +ohprofession 1 +muff 1 +diverregress 1 +higher 1 +lifeformherald 1 +alien 1 +invas 1 +forcechri 1 +html 1 +formsalienherpetophiletodd 1 +turnidg 1 +hatth 1 +mancreepi 1 +laugh 1 +headsmal 1 +planetdr 1 +companioneast 1 +bunnycyberweenietcl 1 +hellbeast 1 +checkbox 1 +simpli 1 +submit 1 +reload 1 +mayb 1 +figur 1 +automat 1 +keyword 1 +includ 1 +interest 1 +superhighwai 1 +drug 1 +cosmo 1 +irrit 1 +gross 1 +nake 1 +scatolog 1 +pervert 1 +offspr 1 +food 1 +etymolog 1 +phat 1 +gnarli 1 +bogu 1 +wierd 1 +cybermuffin 1 +pictur 1 +erotica 1 +chees 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..2ac0c38e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,84 @@ +welcom 1 +ling 1 +zheng 1 +home 1 +page 1 +depart 1 +comput 1 +scienc 1 +sheboygan 1 +dayton 1 +madison 1 +offic 1 +phone 1 +mail 1 +lzheng 1 +wisc 1 +shameless 1 +self 1 +promot 1 +resum 1 +text 1 +version 1 +want 1 +know 1 +side 1 +click 1 +research 1 +interest 1 +assist 1 +paradyn 1 +group 1 +current 1 +hack 1 +onto 1 +hpux 1 +port 1 +boss 1 +barton 1 +miller 1 +also 1 +charg 1 +wuhan 1 +univers 1 +chinaand 1 +take 1 +look 1 +girlfriend 1 +pictur 1 +temporarili 1 +architectur 1 +educ 1 +prese 1 +dept 1 +winsconsin 1 +iowa 1 +officem 1 +marcelo 1 +goncalv 1 +alumni 1 +ignor 1 +china 1 +place 1 +surf 1 +compani 1 +hereif 1 +best 1 +graduat 1 +school 1 +sthe 1 +infom 1 +could 1 +america 1 +schoolssend 1 +suggest 1 +homepag 1 +bother 1 +thank 1 +last 1 +updat 1 +march 1 +visitor 1 +number 1 +sinc 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..39d7faa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,44 @@ +manuvir 1 +home 1 +pagemanuvir 1 +dasnow 1 +know 1 +name 1 +andwhat 1 +look 1 +like 1 +hello 1 +feelfre 1 +around 1 +need 1 +inform 1 +somethingsend 1 +email 1 +passion 1 +golf 1 +anact 1 +photo 1 +later 1 +manuvirwhat 1 +gener 1 +start 1 +advisor 1 +better 1 +thisto 1 +keep 1 +monei 1 +come 1 +turn 1 +theorigin 1 +america 1 +team 1 +cours 1 +leagu 1 +plai 1 +dai 1 +sundai 1 +round 1 +final 1 +consin 1 +said 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..73f676fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,394 @@ +mark 1 +hill 1 +home 1 +pagemark 1 +markhil 1 +wisc 1 +associ 1 +professor 1 +comput 1 +scienc 1 +andelectr 1 +engineeringat 1 +univers 1 +wisconsint 1 +content 1 +address 1 +offic 1 +hour 1 +current 1 +teach 1 +catalog 1 +inform 1 +educ 1 +research 1 +interest 1 +andsummari 1 +wisconsin 1 +wind 1 +tunnel 1 +project 1 +sampler 1 +recent 1 +paper 1 +graduateslink 1 +us 1 +world 1 +wide 1 +architectur 1 +group 1 +tool 1 +wart 1 +stuff 1 +like 1 +oralpresent 1 +advic 1 +includ 1 +david 1 +patterson 1 +show 1 +give 1 +talk 1 +onlin 1 +data 1 +forcach 1 +perform 1 +spec 1 +benchmark 1 +suit 1 +proof 1 +hardwar 1 +sound 1 +depart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usamarkhil 1 +eduphon 1 +secretari 1 +juli 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +fall 1 +mondai 1 +wednesdai 1 +appoint 1 +educurr 1 +teachingfal 1 +advanc 1 +ifal 1 +topic 1 +java 1 +languag 1 +implement 1 +icatalog 1 +cours 1 +teachc 1 +machin 1 +organ 1 +programmingc 1 +introduct 1 +architecturec 1 +iieduc 1 +california 1 +berkelei 1 +engin 1 +michigan 1 +parallel 1 +memori 1 +system 1 +evaluationresearch 1 +summarymi 1 +target 1 +share 1 +multiprocessorsand 1 +high 1 +uniprocessor 1 +design 1 +import 1 +larg 1 +determin 1 +sustain 1 +mywork 1 +emphas 1 +quantit 1 +analysi 1 +often 1 +requir 1 +evaluationtechniqu 1 +level 1 +much 1 +work 1 +part 1 +windtunnel 1 +projectwith 1 +prof 1 +laru 1 +wood 1 +manystud 1 +expect 1 +futur 1 +massiv 1 +computerswil 1 +built 1 +workstat 1 +node 1 +program 1 +levelparallel 1 +support 1 +space 1 +inwhich 1 +process 1 +uniformli 1 +refer 1 +seek 1 +todevelop 1 +consensu 1 +middl 1 +interfac 1 +languagesand 1 +compil 1 +softwar 1 +recentlypropos 1 +tempest 1 +enabl 1 +programm 1 +librari 1 +messag 1 +pass 1 +transpar 1 +hybrid 1 +combin 1 +aredevelop 1 +think 1 +aclust 1 +hypothet 1 +platform 1 +name 1 +toolsto 1 +cull 1 +supercomput 1 +manner 1 +similarto 1 +aeronaut 1 +convent 1 +designairplan 1 +madhu 1 +talluritarget 1 +improv 1 +translat 1 +lookasid 1 +buffer 1 +page 1 +tabl 1 +cluster 1 +align 1 +base 1 +option 1 +chang 1 +complet 1 +subblock 1 +tlb 1 +oper 1 +superpagesand 1 +partial 1 +asplosandsosppap 1 +papersth 1 +annot 1 +bibliographi 1 +jame 1 +unpublish 1 +manuscript 1 +revis 1 +frequent 1 +confer 1 +experiment 1 +june 1 +bidirect 1 +technolog 1 +transfer 1 +sabbat 1 +industri 1 +coher 1 +network 1 +fine 1 +grain 1 +commun 1 +shubhendu 1 +mukherje 1 +babak 1 +falsafi 1 +anddavid 1 +intern 1 +symposium 1 +isca 1 +optimist 1 +simul 1 +execut 1 +sashikanth 1 +chandrasekaran 1 +workshop 1 +distribut 1 +pad 1 +madhusudhan 1 +talluri 1 +yousef 1 +khalidi 1 +princip 1 +sosp 1 +decemb 1 +presidenti 1 +young 1 +investig 1 +award 1 +final 1 +report 1 +effici 1 +irregular 1 +applic 1 +shamik 1 +sharma 1 +ann 1 +roger 1 +joel 1 +saltz 1 +ppopp 1 +cost 1 +effect 1 +ieee 1 +februari 1 +solv 1 +microstructur 1 +electrostat 1 +propos 1 +frank 1 +traenkl 1 +sangta 1 +chemic 1 +specif 1 +protocol 1 +user 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +ioanni 1 +schoina 1 +surpass 1 +superpag 1 +less 1 +forprogram 1 +asplo 1 +octob 1 +evalu 1 +directori 1 +medium 1 +scale 1 +memorymultiprocessor 1 +comparison 1 +trace 1 +sampl 1 +techniqu 1 +multi 1 +megabyt 1 +cach 1 +kessler 1 +transact 1 +cooper 1 +scalabl 1 +multiprocessor 1 +toc 1 +novemb 1 +new 1 +august 1 +jeffrei 1 +dionisio 1 +pnevmatikato 1 +alan 1 +smith 1 +micro 1 +unifi 1 +formal 1 +four 1 +model 1 +sarita 1 +adv 1 +tpd 1 +implic 1 +toler 1 +fault 1 +andrea 1 +farid 1 +pour 1 +march 1 +mechan 1 +satish 1 +chandra 1 +lewi 1 +subbarao 1 +palacharla 1 +virtual 1 +prototyp 1 +sigmetr 1 +placement 1 +algorithm 1 +real 1 +index 1 +differ 1 +consist 1 +kourosh 1 +gharachorloo 1 +anoop 1 +gupta 1 +john 1 +hennessi 1 +journal 1 +tradeoff 1 +size 1 +shing 1 +kong 1 +detect 1 +race 1 +weak 1 +barton 1 +miller 1 +robert 1 +netzer 1 +scheme 1 +vikram 1 +mari 1 +vernon 1 +estim 1 +miss 1 +ratio 1 +kessleracm 1 +stack 1 +highli 1 +extend 1 +abstract 1 +sequenti 1 +order 1 +definit 1 +graduatesmadhusudhan 1 +hierarchi 1 +first 1 +employ 1 +microsystem 1 +email 1 +assist 1 +rice 1 +richard 1 +secondari 1 +click 1 +crai 1 +last 1 +updatedw 1 +keyword 1 +help 1 +search 1 +rank 1 +higher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..24adbe18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,18 @@ +home 1 +page 1 +marko 1 +zaharioudaki 1 +wisc 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +mail 1 +note 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..86b86f9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,13 @@ +michael 1 +birk 1 +home 1 +page 1 +section 1 +project 1 +list 1 +program 1 +languag 1 +link 1 +alltraxx 1 +mbirk 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..89404a0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,55 @@ +mark 1 +mcauliff 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +dayton 1 +wisc 1 +research 1 +interest 1 +design 1 +implement 1 +object 1 +orient 1 +databas 1 +system 1 +public 1 +carei 1 +dewitt 1 +franklin 1 +hall 1 +naughton 1 +schuh 1 +solomon 1 +tsatalo 1 +white 1 +zwill 1 +shoringup 1 +persist 1 +applic 1 +proc 1 +sigmod 1 +marvin 1 +atrac 1 +base 1 +simul 1 +pointer 1 +swizzl 1 +techniqu 1 +proceed 1 +ieee 1 +data 1 +engin 1 +march 1 +michael 1 +towardseffect 1 +effici 1 +free 1 +space 1 +manag 1 +appear 1 +confer 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..80aa216f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,116 @@ +marc 1 +shapiro 1 +page 1 +believ 1 +tautolog 1 +current 1 +obsess 1 +fond 1 +disappear 1 +fear 1 +repuls 1 +ponder 1 +fast 1 +pointer 1 +analys 1 +watch 1 +lot 1 +jacki 1 +chan 1 +movi 1 +think 1 +program 1 +languag 1 +design 1 +read 1 +much 1 +try 1 +teach 1 +elementari 1 +school 1 +student 1 +term 1 +recurs 1 +hope 1 +interrupt 1 +hoar 1 +wrote 1 +introduct 1 +high 1 +level 1 +step 1 +backward 1 +never 1 +recov 1 +home 1 +schedul 1 +todd 1 +automat 1 +accid 1 +gener 1 +elain 1 +dimasi 1 +twisti 1 +littl 1 +amanda 1 +peet 1 +retreather 1 +hyper 1 +mode 1 +emac 1 +thepul 1 +menu 1 +doesn 1 +cool 1 +html 1 +tag 1 +submiss 1 +softwarei 1 +cobbl 1 +togeth 1 +pldi 1 +abl 1 +work 1 +nowinclud 1 +previous 1 +mostli 1 +miss 1 +file 1 +submit 1 +popl 1 +paper 1 +accur 1 +flow 1 +insensit 1 +point 1 +analysi 1 +shapiroand 1 +susan 1 +horwitz 1 +appear 1 +symposium 1 +principl 1 +variou 1 +address 1 +dept 1 +dayton 1 +madison 1 +mail 1 +wisc 1 +talk 1 +hous 1 +finger 1 +marion 1 +list 1 +peopl 1 +know 1 +realli 1 +meet 1 +jonathan 1 +goldstein 1 +paul 1 +ferguson 1 +lawrenc 1 +brown 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..b41671d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,22 @@ +home 1 +pagerob 1 +minimalist 1 +page 1 +last 1 +modifi 1 +august 1 +mellencamp 1 +taship 1 +introduct 1 +oper 1 +system 1 +email 1 +mellen 1 +wisc 1 +offic 1 +comput 1 +scienc 1 +build 1 +phone 1 +hour 1 +appoint 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..9ab7ea40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,82 @@ +david 1 +melski 1 +person 1 +info 1 +pagedavid 1 +melskicurr 1 +depart 1 +mill 1 +comput 1 +scienc 1 +statisticsmadison 1 +dayton 1 +madison 1 +permen 1 +west 1 +ivesmarshfield 1 +michel 1 +awesom 1 +page 1 +current 1 +construct 1 +sister 1 +kasei 1 +great 1 +home 1 +brother 1 +eric 1 +semest 1 +teach 1 +coupl 1 +section 1 +also 1 +work 1 +rep 1 +program 1 +languag 1 +myexact 1 +schedul 1 +still 1 +need 1 +determin 1 +undergrad 1 +major 1 +russian 1 +studiesher 1 +univers 1 +wisconsin 1 +even 1 +spent 1 +fall 1 +semesterof 1 +russia 1 +chanc 1 +often 1 +miss 1 +somedai 1 +make 1 +back 1 +interest 1 +includ 1 +chess 1 +soccer 1 +recent 1 +beenbik 1 +distract 1 +numerousbook 1 +hasti 1 +rewrit 1 +want 1 +link 1 +tomapquest 1 +plan 1 +steal 1 +alot 1 +map 1 +second 1 +give 1 +direct 1 +marshfield 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..d2fee16d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,203 @@ +milo 1 +martin 1 +home 1 +pagemilo 1 +wisc 1 +graduat 1 +student 1 +teach 1 +assistantcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaemail 1 +eduoffic 1 +offic 1 +phone 1 +hour 1 +tuesdai 1 +thursdai 1 +byappointmentba 1 +comput 1 +gustavu 1 +adolphu 1 +colleg 1 +class 1 +compil 1 +construct 1 +charl 1 +fischer 1 +advanc 1 +architectur 1 +mark 1 +hill 1 +java 1 +sit 1 +jame 1 +larusteach 1 +algebra 1 +languag 1 +program 1 +section 1 +research 1 +interestsi 1 +first 1 +year 1 +interest 1 +programminglanguag 1 +system 1 +specif 1 +optim 1 +technolog 1 +beinfluenc 1 +hardwar 1 +oper 1 +mobil 1 +addit 1 +challeng 1 +present 1 +design 1 +mani 1 +thing 1 +even 1 +know 1 +publicationsresearch 1 +perform 1 +summer 1 +argonn 1 +nation 1 +laboratori 1 +develop 1 +divis 1 +advis 1 +fink 1 +humm 1 +micklich 1 +evalu 1 +view 1 +reconstruct 1 +paramet 1 +illicitsubst 1 +detect 1 +us 1 +fast 1 +neutron 1 +transmiss 1 +spectroscopi 1 +ieee 1 +nuclear 1 +symposium 1 +medic 1 +imag 1 +confer 1 +yule 1 +sagalovski 1 +techniqu 1 +nucl 1 +inst 1 +meth 1 +school 1 +hailperin 1 +languageflex 1 +determinist 1 +dynam 1 +parallel 1 +senior 1 +honor 1 +thesi 1 +mathemat 1 +depart 1 +postscript 1 +resourc 1 +next 1 +softwar 1 +compani 1 +found 1 +anintern 1 +scientif 1 +educ 1 +organ 1 +dedic 1 +toadvanc 1 +engin 1 +applic 1 +informationtechnolog 1 +serv 1 +profession 1 +public 1 +fosteringth 1 +open 1 +interchang 1 +inform 1 +promot 1 +highestprofession 1 +ethic 1 +standard 1 +direct 1 +quot 1 +page 1 +person 1 +footbal 1 +bignfl 1 +sinc 1 +live 1 +minnesota 1 +myfavorit 1 +team 1 +vike 1 +eventhough 1 +land 1 +chees 1 +head 1 +colon 1 +conquer 1 +multi 1 +player 1 +plai 1 +mail 1 +space 1 +explor 1 +combat 1 +game 1 +wrote 1 +babylon 1 +best 1 +show 1 +imho 1 +atlanti 1 +email 1 +mythic 1 +world 1 +build 1 +armi 1 +engaug 1 +trade 1 +fight 1 +wonder 1 +monster 1 +train 1 +wizard 1 +discov 1 +underworld 1 +right 1 +rule 1 +current 1 +list 1 +ultim 1 +frisbe 1 +associ 1 +combin 1 +element 1 +ofsocc 1 +basketbal 1 +pace 1 +afrisbe 1 +everyon 1 +quarterback 1 +receiv 1 +ultimatein 1 +simpl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..dac64fe6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,108 @@ +mino 1 +home 1 +page 1 +garofalakismino 1 +wisc 1 +eduphd 1 +candid 1 +research 1 +assist 1 +depart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaoffic 1 +stat 1 +phone 1 +workresearch 1 +interest 1 +effect 1 +resourc 1 +manag 1 +parallel 1 +multimedia 1 +databas 1 +system 1 +complex 1 +queri 1 +process 1 +optim 1 +algorithm 1 +theoryeduc 1 +scienc 1 +univers 1 +dept 1 +decemb 1 +patra 1 +engin 1 +informat 1 +june 1 +refere 1 +public 1 +multi 1 +dimension 1 +schedul 1 +garofalaki 1 +yanni 1 +ioannidi 1 +proceed 1 +sigmod 1 +confer 1 +montreal 1 +canada 1 +abstract 1 +paper 1 +postscript 1 +issu 1 +survei 1 +technic 1 +report 1 +enhanc 1 +view 1 +continu 1 +media 1 +banu 1 +ozden 1 +silberschatz 1 +submit 1 +octob 1 +model 1 +check 1 +sequenti 1 +probabilist 1 +real 1 +time 1 +technolog 1 +institut 1 +februari 1 +advisor 1 +ioannidismor 1 +feel 1 +free 1 +peek 1 +resum 1 +pointer 1 +stuff 1 +dbm 1 +reasearch 1 +hellen 1 +societi 1 +vldb 1 +almaden 1 +center 1 +watson 1 +centerdr 1 +michael 1 +bibliograpi 1 +server 1 +logic 1 +program 1 +perpetu 1 +construct 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..5c6954c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,21 @@ +marcelo 1 +gonalv 1 +mjrg 1 +wisc 1 +associ 1 +research 1 +paradyn 1 +project 1 +addresswork 1 +home 1 +comput 1 +scienc 1 +depart 1 +sheboygan 1 +west 1 +dayton 1 +street 1 +madison 1 +phone 1 +sciencesunivers 1 +wisconsin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..f339a7c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,35 @@ +brian 1 +morgan 1 +home 1 +page 1 +morgangradu 1 +studentcomput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +mail 1 +wisc 1 +telephon 1 +advisor 1 +chuck 1 +dyerresearch 1 +interestsvirtu 1 +conferenc 1 +system 1 +imag 1 +compress 1 +video 1 +high 1 +bandwidth 1 +network 1 +relat 1 +link 1 +interest 1 +comput 1 +vision 1 +group 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..4f14f543 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,121 @@ +andrea 1 +moshovo 1 +home 1 +page 1 +moshovosresearch 1 +assist 1 +depart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madisonadvisor 1 +guri 1 +sohigroup 1 +multiscalar 1 +kestrel 1 +architectur 1 +address 1 +leav 1 +notese 1 +aroundw 1 +peek 1 +futur 1 +clickheremi 1 +brother 1 +write 1 +poetri 1 +click 1 +herefor 1 +sampl 1 +work 1 +current 1 +data 1 +depend 1 +specul 1 +processor 1 +download 1 +technic 1 +report 1 +compress 1 +postscript 1 +uncompress 1 +talk 1 +slide 1 +load 1 +balanc 1 +gener 1 +interest 1 +instruct 1 +level 1 +parallel 1 +compil 1 +support 1 +explot 1 +vlsi 1 +fall 1 +spring 1 +graduat 1 +student 1 +thecour 1 +instituteof 1 +york 1 +univers 1 +earn 1 +degre 1 +sinc 1 +transfer 1 +howev 1 +theopportun 1 +excel 1 +peopl 1 +meet 1 +wife 1 +scienc 1 +crete 1 +greec 1 +implement 1 +numer 1 +algorithm 1 +access 1 +decoupl 1 +architecturethat 1 +softwar 1 +pipelin 1 +advisor 1 +kateveni 1 +short 1 +descript 1 +found 1 +viha 1 +like 1 +editor 1 +edit 1 +greek 1 +mani 1 +link 1 +hellen 1 +resouc 1 +network 1 +sure 1 +visit 1 +obtain 1 +instal 1 +font 1 +local 1 +copi 1 +resid 1 +atwww 1 +hyper 1 +devil 1 +dictionari 1 +bookmark 1 +mess 1 +nation 1 +fraud 1 +inform 1 +centerusenet 1 +chang 1 +want 1 +send 1 +afax 1 +free 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..0516c239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,38 @@ +toni 1 +home 1 +pagewhat 1 +newoctob 1 +back 1 +inmadison 1 +updat 1 +sever 1 +page 1 +chang 1 +background 1 +black 1 +better 1 +contrast 1 +contact 1 +inform 1 +minor 1 +variou 1 +list 1 +older 1 +prefer 1 +keep 1 +main 1 +brief 1 +herear 1 +link 1 +second 1 +level 1 +navig 1 +index 1 +friend 1 +favorit 1 +interest 1 +informationlast 1 +modifi 1 +octob 1 +wisc 1 +educopyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..11a287a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,151 @@ +martin 1 +ream 1 +home 1 +page 1 +graduat 1 +student 1 +teach 1 +assist 1 +also 1 +coke 1 +poobah 1 +finger 1 +machin 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +offic 1 +telephon 1 +dept 1 +email 1 +mream 1 +wisc 1 +edufal 1 +scheduleresearch 1 +interest 1 +databas 1 +particular 1 +digit 1 +terrain 1 +model 1 +tin 1 +program 1 +languag 1 +compil 1 +design 1 +logic 1 +logicprogram 1 +qualifi 1 +exam 1 +spring 1 +previou 1 +year 1 +softwar 1 +develop 1 +product 1 +orientedenviron 1 +exploit 1 +educ 1 +interestsin 1 +resum 1 +postscriptand 1 +html 1 +distribut 1 +affili 1 +mathemat 1 +wesleyan 1 +univers 1 +faint 1 +heart 1 +section 1 +alink 1 +senior 1 +honorsthesi 1 +gener 1 +unif 1 +poobahlook 1 +work 1 +mighti 1 +afraid 1 +dear 1 +gone 1 +tomi 1 +head 1 +usual 1 +realli 1 +want 1 +talk 1 +tosomeon 1 +better 1 +adjust 1 +crucial 1 +role 1 +life 1 +youshould 1 +probabl 1 +elton 1 +doesn 1 +even 1 +mention 1 +imaginethat 1 +besid 1 +aforement 1 +poobahship 1 +mental 1 +ill 1 +afew 1 +thing 1 +might 1 +know 1 +third 1 +yeargradu 1 +depart 1 +concentr 1 +indatabas 1 +current 1 +studi 1 +qual 1 +sometim 1 +inearli 1 +februari 1 +exercis 1 +relax 1 +plai 1 +squash 1 +reason 1 +well 1 +round 1 +ultim 1 +frisbe 1 +summer 1 +basketbal 1 +poorli 1 +andinfrequ 1 +notic 1 +rapidlyrid 1 +mountain 1 +bike 1 +around 1 +campu 1 +chilliest 1 +weather 1 +alwai 1 +helmet 1 +wish 1 +learn 1 +feel 1 +free 1 +examin 1 +mynot 1 +often 1 +updat 1 +hierarchi 1 +stuff 1 +ilik 1 +enjoi 1 +line 1 +librarylast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..5ae0663c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,34 @@ +wisconsin 1 +multiscalar 1 +project 1 +home 1 +pagewisconsin 1 +technic 1 +paper 1 +talk 1 +given 1 +peopl 1 +contributor 1 +fund 1 +sourc 1 +relat 1 +avail 1 +softwar 1 +comput 1 +architectur 1 +group 1 +scienc 1 +departmentat 1 +univers 1 +world 1 +wide 1 +inform 1 +interest 1 +local 1 +user 1 +last 1 +updat 1 +februari 1 +guri 1 +sohi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..d31b75c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,134 @@ +mike 1 +steel 1 +homepagemik 1 +homepagemsteel 1 +wisc 1 +eduoffic 1 +comp 1 +stat 1 +build 1 +sit 1 +univ 1 +maryland 1 +depart 1 +comput 1 +struggl 1 +undergradu 1 +oper 1 +system 1 +class 1 +sometimearound 1 +april 1 +note 1 +time 1 +stamp 1 +lower 1 +right 1 +corner 1 +sai 1 +folk 1 +graduat 1 +student 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +school 1 +motto 1 +come 1 +freezein 1 +land 1 +chees 1 +research 1 +studi 1 +interest 1 +center 1 +around 1 +artificialintellig 1 +vision 1 +hope 1 +narrow 1 +year 1 +semest 1 +graduateinstructor 1 +section 1 +algebra 1 +languag 1 +program 1 +receiv 1 +bachelor 1 +degre 1 +theunivers 1 +scomput 1 +publicationsgrindston 1 +test 1 +suit 1 +parallel 1 +perform 1 +tool 1 +jefferyk 1 +hollingsworth 1 +michael 1 +technic 1 +reportc 1 +gzip 1 +postscriptfil 1 +semesterc 1 +introduct 1 +mari 1 +vernonc 1 +artifici 1 +intellig 1 +chuck 1 +dyermi 1 +pagesinform 1 +gettingin 1 +touch 1 +friend 1 +back 1 +marylandwhom 1 +forgot 1 +mail 1 +address 1 +favorit 1 +linksmi 1 +sport 1 +teamssom 1 +usenet 1 +dave 1 +barri 1 +frequent 1 +ask 1 +question 1 +listth 1 +billi 1 +joel 1 +listi 1 +also 1 +list 1 +administr 1 +thefruit 1 +still 1 +host 1 +ofmaryland 1 +start 1 +insidejok 1 +offic 1 +hand 1 +sometim 1 +andnow 1 +member 1 +world 1 +wide 1 +predat 1 +kill 1 +someinfrar 1 +photo 1 +know 1 +looklik 1 +infrar 1 +pictur 1 +memik 1 +steelemsteel 1 +eduunivers 1 +madisoncomput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..e5823077 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,16 @@ +maria 1 +home 1 +pagemaria 1 +pagehow 1 +visit 1 +univers 1 +maryland 1 +colleg 1 +park 1 +mayb 1 +wisconsin 1 +madison 1 +section 1 +might 1 +want 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..f23102cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,79 @@ +oscar 1 +naim 1 +home 1 +page 1 +bienvenido 1 +staff 1 +member 1 +work 1 +paradyn 1 +project 1 +finish 1 +myph 1 +univers 1 +southampton 1 +england 1 +undergrad 1 +universidad 1 +simon 1 +bolivar 1 +caraca 1 +venezuela 1 +born 1 +beauti 1 +citi 1 +barquisimeto 1 +barquisimetoi 1 +locat 1 +central 1 +western 1 +part 1 +popul 1 +ofabout 1 +million 1 +peopl 1 +also 1 +known 1 +music 1 +capit 1 +main 1 +research 1 +area 1 +perform 1 +analysi 1 +visual 1 +parallel 1 +program 1 +howev 1 +apart 1 +like 1 +playclass 1 +guitar 1 +fact 1 +studi 1 +year 1 +excellentmaestro 1 +rodrigo 1 +riera 1 +antonio 1 +lauro 1 +spend 1 +time 1 +plai 1 +read 1 +good 1 +book 1 +sherlock 1 +holm 1 +stori 1 +cook 1 +watch 1 +basebal 1 +beati 1 +pictur 1 +pleaseclick 1 +finger 1 +wisc 1 +check 1 +around 1 +mundo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..6b154083 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,39 @@ +anastassia 1 +ailamaki 1 +home 1 +welcom 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +dayton 1 +street 1 +phone 1 +realli 1 +want 1 +pictur 1 +import 1 +notic 1 +find 1 +time 1 +make 1 +decent 1 +page 1 +nice 1 +link 1 +georg 1 +rochest 1 +alex 1 +guid 1 +greek 1 +islandsar 1 +worth 1 +visit 1 +send 1 +mail 1 +natassa 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..dd231496 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,99 @@ +jeffrei 1 +naughtonjeffrei 1 +naughtonnaughton 1 +wisc 1 +eduresearch 1 +interestsolap 1 +multi 1 +dimension 1 +data 1 +analysi 1 +parallel 1 +object 1 +relationaldbm 1 +overal 1 +goal 1 +research 1 +develop 1 +ofdatabas 1 +system 1 +surpass 1 +current 1 +databas 1 +inperform 1 +eas 1 +three 1 +main 1 +area 1 +ofinterest 1 +techniqu 1 +improv 1 +perform 1 +ofmulti 1 +includ 1 +arrai 1 +base 1 +storag 1 +andprocess 1 +algorithm 1 +indic 1 +computingth 1 +cube 1 +relat 1 +benchmark 1 +dbm 1 +valuedattribut 1 +workload 1 +spatial 1 +inform 1 +recent 1 +public 1 +comput 1 +multidimension 1 +aggreg 1 +withsameet 1 +agarw 1 +rakesh 1 +agraw 1 +prasad 1 +deshpand 1 +ashish 1 +gupta 1 +raghu 1 +ramakrishnan 1 +sunita 1 +sarawagi 1 +proceed 1 +thend 1 +intern 1 +confer 1 +larg 1 +mumbai 1 +bombai 1 +estim 1 +aggregatesin 1 +presenc 1 +hierarchi 1 +amit 1 +shukla 1 +karthikeyan 1 +ramasami 1 +bucki 1 +michael 1 +carei 1 +david 1 +dewitt 1 +johann 1 +gerhk 1 +dhaval 1 +shah 1 +moham 1 +asgarian 1 +prepar 1 +toward 1 +molap 1 +withyihong 1 +zhao 1 +kristin 1 +tuft 1 +submit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..67f07cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,39 @@ +navin 1 +kabranavin 1 +kabragradu 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madisonadvisor 1 +david 1 +dewittresearch 1 +area 1 +databas 1 +research 1 +interest 1 +customiz 1 +queri 1 +optim 1 +paradis 1 +project 1 +plan 1 +address 1 +noth 1 +better 1 +explor 1 +bookmark 1 +could 1 +look 1 +indian 1 +stuff 1 +includ 1 +among 1 +thing 1 +archiv 1 +hindi 1 +song 1 +wisc 1 +public 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..b1f5e4d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,28 @@ +newhalltia 1 +newhal 1 +wisc 1 +graduat 1 +student 1 +dayton 1 +madison 1 +telephon 1 +research 1 +interest 1 +parallel 1 +distribut 1 +system 1 +perform 1 +tool 1 +scalabl 1 +analysi 1 +predict 1 +java 1 +group 1 +paradynadvisor 1 +bart 1 +millermummi 1 +pictur 1 +guanajuato 1 +last 1 +chang 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..28fd1bbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,16 @@ +nanci 1 +hallcomput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +project 1 +shore 1 +scalabl 1 +heterogen 1 +object 1 +repositori 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..6b8c273c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,207 @@ +olvi 1 +mangasarian 1 +home 1 +page 1 +john 1 +neumann 1 +professor 1 +mathemat 1 +comput 1 +scienc 1 +member 1 +center 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +harvard 1 +interest 1 +program 1 +machin 1 +learn 1 +parallel 1 +research 1 +summari 1 +optim 1 +theori 1 +rich 1 +effectivecomputation 1 +solv 1 +mani 1 +real 1 +life 1 +problem 1 +interestsin 1 +topic 1 +rang 1 +broad 1 +spectrum 1 +encompassestheoret 1 +aspect 1 +error 1 +bound 1 +programsand 1 +variat 1 +inequ 1 +converg 1 +proof 1 +parallelgradi 1 +variabl 1 +distribut 1 +algorithm 1 +smooth 1 +techniqu 1 +constrain 1 +problemsa 1 +differenti 1 +nonlinear 1 +equat 1 +well 1 +applicationsto 1 +gener 1 +specif 1 +context 1 +animport 1 +programmingtechniqu 1 +diagnos 1 +breast 1 +cancer 1 +result 1 +ahighli 1 +accur 1 +computer 1 +diagnost 1 +system 1 +current 1 +useat 1 +hospit 1 +student 1 +paul 1 +bradlei 1 +recent 1 +public 1 +solodova 1 +linearli 1 +descent 1 +method 1 +strongli 1 +monotonecomplementar 1 +technic 1 +report 1 +octob 1 +jong 1 +pangexact 1 +penalti 1 +function 1 +programswith 1 +linear 1 +complementar 1 +constraint 1 +august 1 +mangasarianmathemat 1 +data 1 +miningmathemat 1 +mangasarianerror 1 +nondifferenti 1 +convex 1 +strong 1 +slater 1 +qualif 1 +juli 1 +street 1 +cluster 1 +concav 1 +minim 1 +submit 1 +neural 1 +inform 1 +process 1 +wolberg 1 +individu 1 +collect 1 +prognost 1 +predict 1 +januari 1 +featur 1 +select 1 +decemb 1 +journal 1 +polyhedr 1 +novemb 1 +appli 1 +festschrift 1 +klau 1 +ritter 1 +fischer 1 +riedmuel 1 +schaeffler 1 +editor 1 +physica 1 +verlag 1 +germani 1 +pose 1 +siam 1 +proceed 1 +internationalsymposium 1 +baltimor 1 +revis 1 +nick 1 +improv 1 +toler 1 +train 1 +april 1 +appear 1 +applic 1 +workshop 1 +eric 1 +june 1 +plenum 1 +press 1 +chunhui 1 +chen 1 +hybrid 1 +misclassif 1 +februari 1 +advanc 1 +siag 1 +view 1 +new 1 +class 1 +mix 1 +septemb 1 +diagnosi 1 +prognosi 1 +oper 1 +separ 1 +bilinear 1 +global 1 +solodov 1 +backpropag 1 +determinist 1 +nonmonoton 1 +perturb 1 +cowan 1 +tesauro 1 +alspector 1 +morgan 1 +kaufmann 1 +publish 1 +francisco 1 +california 1 +inequalitiesand 1 +serial 1 +net 1 +vianonmonoton 1 +minimn 1 +softwar 1 +chronolog 1 +bibliographi 1 +group 1 +paper 1 +download 1 +period 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..b1bc59ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,421 @@ +machin 1 +learn 1 +cancer 1 +diagnosi 1 +prognosismachin 1 +prognosisthi 1 +page 1 +describ 1 +variou 1 +linear 1 +program 1 +base 1 +learningapproach 1 +appli 1 +prognosi 1 +ofbreast 1 +work 1 +result 1 +collabor 1 +theunivers 1 +wisconsin 1 +madison 1 +betweenprof 1 +olvi 1 +mangasarian 1 +comput 1 +scienc 1 +depart 1 +anddr 1 +william 1 +wolbergof 1 +surgeri 1 +human 1 +oncolog 1 +copi 1 +thepress 1 +releas 1 +distribut 1 +american 1 +societi 1 +writer 1 +seminar 1 +inmarch 1 +provid 1 +good 1 +overview 1 +research 1 +tabl 1 +content 1 +bibliographi 1 +citat 1 +popular 1 +press 1 +local 1 +relat 1 +link 1 +linksdiagnosisthi 1 +grew 1 +desir 1 +wolberg 1 +accur 1 +diagnosebreast 1 +mass 1 +sole 1 +fine 1 +needl 1 +aspir 1 +heidentifi 1 +nine 1 +visual 1 +assess 1 +characterist 1 +sampl 1 +consideredrelev 1 +prof 1 +andtwo 1 +graduat 1 +student 1 +rudi 1 +setiono 1 +kristin 1 +bennett 1 +aclassifi 1 +construct 1 +us 1 +multisurfac 1 +method 1 +pattern 1 +separ 1 +featur 1 +thatsuccessfulli 1 +diagnos 1 +case 1 +data 1 +iswel 1 +known 1 +breast 1 +imag 1 +analysi 1 +began 1 +addit 1 +nick 1 +streetto 1 +team 1 +goal 1 +adigit 1 +small 1 +section 1 +slide 1 +ofthi 1 +consolid 1 +softwar 1 +system 1 +xcyt 1 +current 1 +clinicalpractic 1 +process 1 +perform 1 +follow 1 +taken 1 +materi 1 +thenmount 1 +microscop 1 +stain 1 +highlight 1 +cellularnuclei 1 +portion 1 +cell 1 +arewel 1 +differenti 1 +scan 1 +digit 1 +camera 1 +afram 1 +grabber 1 +board 1 +user 1 +isol 1 +individu 1 +nuclei 1 +mous 1 +pointer 1 +draw 1 +approxim 1 +boundari 1 +ofeach 1 +nucleu 1 +vision 1 +approach 1 +snake 1 +converg 1 +exact 1 +nuclear 1 +interact 1 +take 1 +five 1 +minut 1 +showingxcyt 1 +thisfas 1 +valu 1 +measur 1 +size 1 +shape 1 +textur 1 +mean 1 +standarderror 1 +extrem 1 +total 1 +train 1 +classifi 1 +wasconstruct 1 +benign 1 +malign 1 +thisclassifi 1 +consist 1 +singl 1 +plane 1 +space 1 +threeof 1 +area 1 +smooth 1 +project 1 +onto 1 +thenorm 1 +probabl 1 +densiti 1 +ofth 1 +point 1 +allow 1 +simpl 1 +bayesiancomput 1 +patient 1 +thesedens 1 +shown 1 +judg 1 +confid 1 +comparison 1 +hundr 1 +previou 1 +date 1 +correctli 1 +consecut 1 +newpati 1 +eight 1 +didxcyt 1 +return 1 +suspici 1 +estimatedprob 1 +subset 1 +sourc 1 +found 1 +goodtest 1 +segment 1 +object 1 +recognit 1 +algorithm 1 +petsegment 1 +automat 1 +identifi 1 +inthes 1 +pleas 1 +email 1 +street 1 +wisc 1 +togeth 1 +prognosisth 1 +second 1 +problem 1 +consid 1 +predict 1 +long 1 +term 1 +behavior 1 +diseas 1 +haveapproach 1 +function 1 +inputfeatur 1 +includ 1 +atim 1 +recurr 1 +right 1 +censor 1 +solut 1 +surfac 1 +util 1 +linearprogram 1 +time 1 +fornew 1 +examin 1 +actual 1 +caseswith 1 +similar 1 +plot 1 +ofdiseas 1 +free 1 +surviv 1 +year 1 +anindividu 1 +capabl 1 +incorpor 1 +intoxcyt 1 +exampl 1 +curv 1 +versu 1 +black 1 +repres 1 +ourorigin 1 +studi 1 +particular 1 +thereforeha 1 +averag 1 +freeafter 1 +equal 1 +procedur 1 +also 1 +compar 1 +power 1 +ofvari 1 +prognost 1 +factor 1 +indic 1 +precis 1 +detail 1 +cytolog 1 +inform 1 +type 1 +xcytgiv 1 +better 1 +accuraci 1 +tradit 1 +tumors 1 +lymph 1 +node 1 +statu 1 +corrobor 1 +could 1 +remov 1 +need 1 +often 1 +pain 1 +axillari 1 +chronolog 1 +bibliographylink 1 +paper 1 +postscript 1 +format 1 +viewer 1 +download 1 +file 1 +shift 1 +click 1 +netscap 1 +print 1 +abstract 1 +ascii 1 +text 1 +obtain 1 +notlink 1 +contact 1 +first 1 +author 1 +theori 1 +applic 1 +medic 1 +proceed 1 +workshop 1 +larg 1 +scale 1 +numer 1 +optim 1 +philadelphia 1 +siam 1 +new 1 +analyt 1 +quantit 1 +histolog 1 +april 1 +heisei 1 +deriv 1 +distinguish 1 +patholog 1 +grade 1 +august 1 +oper 1 +juli 1 +avail 1 +mathemat 1 +technic 1 +report 1 +induct 1 +twelfth 1 +intern 1 +confer 1 +priediti 1 +russel 1 +morgan 1 +kaufmann 1 +teagu 1 +call 1 +indetermin 1 +aid 1 +submit 1 +collect 1 +univers 1 +januari 1 +icml 1 +aaai 1 +medicin 1 +prime 1 +march 1 +biopsi 1 +without 1 +friend 1 +todai 1 +detect 1 +imit 1 +prospect 1 +man 1 +milwauke 1 +sentinel 1 +analyz 1 +detroit 1 +high 1 +tech 1 +hunt 1 +marilynn 1 +marchion 1 +journal 1 +computer 1 +interpret 1 +progress 1 +ruth 1 +sorel 1 +houston 1 +chronicl 1 +improv 1 +suggest 1 +replac 1 +surgic 1 +associ 1 +perspect 1 +column 1 +june 1 +cope 1 +septemb 1 +octob 1 +seek 1 +capit 1 +angel 1 +group 1 +schooloth 1 +nation 1 +librari 1 +nevada 1 +center 1 +biomed 1 +model 1 +oncolink 1 +washington 1 +institut 1 +paulb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..072754f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,160 @@ +pattern 1 +separ 1 +mathemat 1 +programmingpattern 1 +programmingthi 1 +page 1 +describ 1 +work 1 +linear 1 +program 1 +section 1 +univers 1 +wisconsin 1 +madison 1 +comput 1 +scienc 1 +depart 1 +brief 1 +histori 1 +method 1 +outlinemathemat 1 +optim 1 +approach 1 +particular 1 +long 1 +us 1 +problem 1 +construct 1 +plane 1 +linearli 1 +point 1 +set 1 +nonlinear 1 +surfac 1 +also 1 +whenev 1 +paramet 1 +appear 1 +quadrat 1 +polynomi 1 +formul 1 +howev 1 +could 1 +failon 1 +multisurfac 1 +avoid 1 +difficulti 1 +disjoint 1 +finit 1 +dimension 1 +euclidean 1 +space 1 +follow 1 +choos 1 +parallel 1 +close 1 +togeth 1 +region 1 +contain 1 +discard 1 +repeat 1 +process 1 +tree 1 +variant 1 +develop 1 +euclideanspac 1 +goal 1 +todetermin 1 +sequenc 1 +determin 1 +minim 1 +averag 1 +distanc 1 +misclassifi 1 +li 1 +side 1 +assign 1 +similarli 1 +mostli 1 +stop 1 +otherwis 1 +gener 1 +anoth 1 +error 1 +view 1 +decis 1 +eachnod 1 +best 1 +split 1 +reach 1 +node 1 +found 1 +solv 1 +branch 1 +thesam 1 +procedur 1 +appli 1 +oneset 1 +astrain 1 +neural 1 +network 1 +hidden 1 +layer 1 +shown 1 +learn 1 +concept 1 +well 1 +better 1 +traditionallearn 1 +cart 1 +advantag 1 +artifici 1 +backpropag 1 +inthat 1 +train 1 +proce 1 +much 1 +faster 1 +implement 1 +mino 1 +numer 1 +packag 1 +nick 1 +street 1 +kristin 1 +bennett 1 +matlab 1 +paul 1 +bradlei 1 +descript 1 +file 1 +requir 1 +chronolog 1 +bibliographi 1 +mangasarian 1 +oper 1 +research 1 +june 1 +ieee 1 +transact 1 +inform 1 +theori 1 +novemb 1 +proceed 1 +midwest 1 +intellig 1 +cognit 1 +societi 1 +confer 1 +robust 1 +discrimin 1 +insepar 1 +softwar 1 +orsa 1 +journal 1 +fall 1 +last 1 +modifi 1 +paulb 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..f7f8527d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,84 @@ +paradyn 1 +project 1 +home 1 +page 1 +parallel 1 +perform 1 +tool 1 +releas 1 +informationthi 1 +contain 1 +describ 1 +copi 1 +ofreleas 1 +goalsth 1 +program 1 +explor 1 +newapproach 1 +build 1 +scalabl 1 +technic 1 +paper 1 +manualsstatu 1 +reporta 1 +recent 1 +statu 1 +report 1 +blizzard 1 +arpa 1 +meet 1 +panel 1 +presentationthi 1 +present 1 +made 1 +csto 1 +insan 1 +antonio 1 +super 1 +symbol 1 +tabl 1 +inflorida 1 +effort 1 +develop 1 +common 1 +access 1 +routin 1 +tocompil 1 +gener 1 +inform 1 +us 1 +high 1 +level 1 +parallellanguag 1 +staff 1 +comput 1 +postera 1 +hypertext 1 +version 1 +poster 1 +relat 1 +elsewher 1 +spdt 1 +sigmetr 1 +symposium 1 +distribut 1 +toolsyou 1 +also 1 +restaur 1 +includ 1 +temporari 1 +placehold 1 +contact 1 +informationparadyn 1 +projectdepart 1 +sciencesunivers 1 +wisconsin 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +wisc 1 +edufax 1 +last 1 +modifi 1 +bart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..d3f2ec27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,32 @@ +steven 1 +parker 1 +home 1 +page 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +offic 1 +wisc 1 +depth 1 +area 1 +numer 1 +analysi 1 +employ 1 +prism 1 +projectfal 1 +schedul 1 +math 1 +relat 1 +link 1 +send 1 +mail 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..8ef38d0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,90 @@ +home 1 +page 1 +paul 1 +bradleygradu 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +paulb 1 +wisc 1 +eduoffic 1 +csphone 1 +advisor 1 +mangasarianinterestsmathemat 1 +programmingmachin 1 +learningfli 1 +fish 1 +interest 1 +us 1 +mathemat 1 +program 1 +techniqu 1 +specif 1 +nonlinear 1 +linear 1 +induct 1 +learn 1 +summari 1 +work 1 +currentlyb 1 +done 1 +area 1 +pleas 1 +madisonmathemat 1 +thiswork 1 +guid 1 +professor 1 +olvimangasarian 1 +nick 1 +street 1 +publicationsal 1 +paper 1 +store 1 +postscript 1 +format 1 +abstract 1 +ascii 1 +text 1 +viewer 1 +download 1 +file 1 +shift 1 +click 1 +netscap 1 +print 1 +bradlei 1 +mangasarian 1 +featur 1 +select 1 +technic 1 +report 1 +decemb 1 +revis 1 +march 1 +submit 1 +inform 1 +journal 1 +cluster 1 +concav 1 +minim 1 +accept 1 +present 1 +neural 1 +process 1 +system 1 +picksthes 1 +site 1 +backcountri 1 +grate 1 +dead 1 +nasa 1 +frog 1 +espnet 1 +timesfax 1 +uroullett 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..6a9c9c92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,117 @@ +pete 1 +devri 1 +home 1 +page 1 +peter 1 +internet 1 +tool 1 +specialist 1 +room 1 +comput 1 +scienc 1 +westdayton 1 +madison 1 +pdevri 1 +wisc 1 +intern 1 +scout 1 +mean 1 +isthat 1 +read 1 +everyth 1 +technolog 1 +make 1 +sens 1 +andthen 1 +write 1 +overview 1 +articl 1 +toolkit 1 +great 1 +thing 1 +iread 1 +think 1 +anywai 1 +rather 1 +foolish 1 +topai 1 +tell 1 +eric 1 +hazen 1 +alsoprovid 1 +technic 1 +webmast 1 +servic 1 +group 1 +although 1 +excel 1 +help 1 +system 1 +folksat 1 +recent 1 +join 1 +team 1 +work 1 +laboratori 1 +molecularbiolog 1 +integr 1 +microscopi 1 +resourc 1 +biomed 1 +nearli 1 +eight 1 +year 1 +fortun 1 +develop 1 +prof 1 +seancarrol 1 +techniqu 1 +creat 1 +multipl 1 +label 1 +confoc 1 +imag 1 +basic 1 +cool 1 +look 1 +embryo 1 +lotof 1 +journal 1 +book 1 +cover 1 +also 1 +molecular 1 +biologi 1 +site 1 +johnwhit 1 +rest 1 +imrstaff 1 +receiv 1 +star 1 +inth 1 +magellan 1 +guid 1 +last 1 +major 1 +project 1 +involv 1 +dimension 1 +microscop 1 +studi 1 +isdescrib 1 +appear 1 +augustnd 1 +issu 1 +photo 1 +guest 1 +lab 1 +standard 1 +info 1 +resum 1 +relat 1 +experi 1 +public 1 +present 1 +updat 1 +tuesdai 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..896e5983 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,99 @@ +yumpe 1 +home 1 +page 1 +manoj 1 +plakal 1 +graduat 1 +slave 1 +dept 1 +comput 1 +scienc 1 +universityofwisconsin 1 +madison 1 +blah 1 +countri 1 +india 1 +though 1 +origin 1 +state 1 +kerala 1 +stai 1 +life 1 +calcutta 1 +studi 1 +bosco 1 +school 1 +salesian 1 +undergrad 1 +kanpur 1 +major 1 +engin 1 +current 1 +first 1 +year 1 +student 1 +support 1 +teach 1 +assistantship 1 +depart 1 +univers 1 +wisconsin 1 +stare 1 +barrel 1 +either 1 +architectur 1 +program 1 +languag 1 +interest 1 +music 1 +rock 1 +metal 1 +altern 1 +blue 1 +movi 1 +book 1 +stuff 1 +acad 1 +hack 1 +industri 1 +geeki 1 +nerdi 1 +featur 1 +chat 1 +gatewai 1 +class 1 +seealso 1 +iitk 1 +link 1 +friend 1 +snap 1 +pinup 1 +galleri 1 +everi 1 +nerd 1 +need 1 +check 1 +bookmark 1 +access 1 +log 1 +visit 1 +contact 1 +north 1 +randal 1 +avenu 1 +dayton 1 +street 1 +wisc 1 +acknowledg 1 +suresh 1 +venkat 1 +nifti 1 +tabl 1 +igor 1 +ivanisev 1 +wisecrack 1 +icon 1 +variou 1 +corner 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..d689f580 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,70 @@ +prasad 1 +home 1 +page 1 +constuct 1 +meanwhil 1 +deshpand 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depar 1 +univers 1 +wisconsin 1 +madison 1 +address 1 +princeton 1 +offic 1 +build 1 +dayton 1 +academ 1 +interest 1 +databas 1 +system 1 +theori 1 +research 1 +area 1 +current 1 +schedul 1 +invest 1 +manageri 1 +econom 1 +meet 1 +prof 1 +jeff 1 +naughton 1 +music 1 +introduct 1 +public 1 +multidimension 1 +aggreg 1 +vldb 1 +storag 1 +estim 1 +multidimensionalaggreg 1 +presenc 1 +hierarchi 1 +cours 1 +project 1 +packag 1 +java 1 +download 1 +want 1 +spend 1 +time 1 +timex 1 +world 1 +find 1 +india 1 +dilbert 1 +comix 1 +explor 1 +bookmark 1 +random 1 +link 1 +finger 1 +sinc 1 +hakuna 1 +matata 1 +info 1 +creat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..8aa18295 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,38 @@ +vishi 1 +home 1 +page 1 +viswanath 1 +poosala 1 +research 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +wisc 1 +inform 1 +reseach 1 +summari 1 +resum 1 +html 1 +postscript 1 +relat 1 +databas 1 +advisor 1 +prof 1 +yanni 1 +ioannidi 1 +asha 1 +voluntari 1 +organ 1 +help 1 +improv 1 +basic 1 +educ 1 +india 1 +interestsuw 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..d0c2ecee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,353 @@ +home 1 +pageth 1 +project 1 +queri 1 +sequenc 1 +data 1 +document 1 +construct 1 +time 1 +order 1 +databas 1 +content 1 +objectivescurr 1 +statusmotiv 1 +exampleseq 1 +model 1 +sequin 1 +languageoptim 1 +techniquesseq 1 +system 1 +developmentpublicationsrel 1 +workcontact 1 +informationproject 1 +object 1 +number 1 +import 1 +applic 1 +requir 1 +processingof 1 +larg 1 +amount 1 +domain 1 +theseappl 1 +includ 1 +financi 1 +manag 1 +histor 1 +analysi 1 +econom 1 +social 1 +scienc 1 +metereolog 1 +medic 1 +andbiolog 1 +exist 1 +relat 1 +inadequ 1 +regard 1 +collect 1 +treat 1 +set 1 +consequ 1 +express 1 +tediou 1 +evalu 1 +ineffici 1 +us 1 +abstract 1 +allow 1 +declar 1 +manner 1 +util 1 +semanticstak 1 +advantag 1 +uniqu 1 +opportun 1 +avail 1 +optim 1 +evaluationintegr 1 +user 1 +canstor 1 +combin 1 +sequencesthes 1 +serv 1 +goal 1 +variou 1 +kind 1 +need 1 +support 1 +tempor 1 +themost 1 +notion 1 +like 1 +next 1 +previou 1 +natur 1 +consid 1 +effici 1 +issu 1 +studi 1 +theori 1 +built 1 +demonstr 1 +feasibl 1 +theoret 1 +idea 1 +statusth 1 +current 1 +statu 1 +defin 1 +also 1 +algebraicqueri 1 +oper 1 +compos 1 +form 1 +analogousto 1 +composit 1 +algebra 1 +describ 1 +process 1 +identifi 1 +techniqu 1 +languag 1 +candeclar 1 +embed 1 +likesql 1 +vice 1 +versa 1 +build 1 +disk 1 +base 1 +propos 1 +implement 1 +nest 1 +complex 1 +architectur 1 +shore 1 +storag 1 +sever 1 +megabyt 1 +integr 1 +extens 1 +motiv 1 +exampl 1 +querya 1 +weather 1 +monitor 1 +record 1 +inform 1 +meteorolog 1 +phenomena 1 +sequenti 1 +occurr 1 +event 1 +scientist 1 +ask 1 +volcano 1 +erupt 1 +didth 1 +recent 1 +earthquak 1 +strength 1 +greater 1 +richter 1 +scale 1 +featur 1 +groupbi 1 +claus 1 +correl 1 +subqueri 1 +aggregatefunct 1 +convent 1 +would 1 +find 1 +execut 1 +plan 1 +even 1 +given 1 +knowledg 1 +sort 1 +howev 1 +sequencesord 1 +scan 1 +lock 1 +step 1 +similar 1 +merg 1 +join 1 +store 1 +temporari 1 +buffer 1 +whenev 1 +valu 1 +check 1 +possibl 1 +gener 1 +answer 1 +therefor 1 +singl 1 +littl 1 +memori 1 +modelth 1 +detail 1 +aredescrib 1 +publish 1 +paper 1 +click 1 +postscript 1 +version 1 +present 1 +gist 1 +basic 1 +map 1 +ordereddomain 1 +posit 1 +mani 1 +relationship 1 +andposit 1 +view 1 +dual 1 +distinct 1 +wai 1 +recordsmap 1 +call 1 +orient 1 +respect 1 +give 1 +rise 1 +could 1 +either 1 +flavor 1 +relationaloper 1 +overlap 1 +contain 1 +andaggreg 1 +explor 1 +researchersin 1 +commun 1 +offset 1 +movingaggreg 1 +zoom 1 +mean 1 +collaps 1 +expand 1 +associ 1 +instanc 1 +daili 1 +weekli 1 +hourli 1 +last 1 +part 1 +deal 1 +group 1 +make 1 +easi 1 +involv 1 +case 1 +real 1 +worldsitu 1 +extend 1 +work 1 +instead 1 +extensionof 1 +indic 1 +practic 1 +ofseq 1 +probabl 1 +languagew 1 +devis 1 +usingwhich 1 +specifi 1 +languagei 1 +except 1 +input 1 +queriesa 1 +well 1 +result 1 +descript 1 +techniquesw 1 +thathav 1 +transform 1 +meta 1 +cach 1 +intermedi 1 +algorithm 1 +reli 1 +cost 1 +estim 1 +observ 1 +access 1 +stream 1 +strategi 1 +take 1 +account 1 +developmentth 1 +client 1 +serverarchitectur 1 +multipl 1 +viaa 1 +multi 1 +thread 1 +server 1 +ontop 1 +subset 1 +languageswhich 1 +insid 1 +mode 1 +arbitrarylevel 1 +viceversa 1 +provid 1 +supportfor 1 +type 1 +function 1 +detailson 1 +publicationssequ 1 +praveen 1 +seshadri 1 +miron 1 +livni 1 +raghu 1 +ramakrishnan 1 +proceed 1 +sigmod 1 +confer 1 +framework 1 +datapraveen 1 +ieee 1 +engin 1 +march 1 +design 1 +systempraveen 1 +submit 1 +vldb 1 +queriesraghu 1 +michael 1 +cheng 1 +intern 1 +comad 1 +decemb 1 +workthedevis 1 +complementari 1 +visualizationenviron 1 +front 1 +pose 1 +examin 1 +graphic 1 +peopl 1 +madison 1 +research 1 +depart 1 +servercontact 1 +informationfor 1 +contact 1 +wisc 1 +eduraghu 1 +edumiron 1 +educomput 1 +univers 1 +wisconsin 1 +dayton 1 +street 1 +modifi 1 +seshadripraveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..94c76c99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,20 @@ +andrew 1 +prock 1 +home 1 +page 1 +clemen 1 +hockert 1 +prockoffic 1 +hour 1 +person 1 +histori 1 +school 1 +class 1 +madison 1 +bookmark 1 +resum 1 +doonesburi 1 +trot 1 +alta 1 +vista 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..5653df11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,121 @@ +home 1 +page 1 +ann 1 +condon 1 +associ 1 +professor 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +washington 1 +interest 1 +complex 1 +theori 1 +interact 1 +proof 1 +system 1 +random 1 +complexityclass 1 +parallel 1 +research 1 +summari 1 +model 1 +interactiveproof 1 +combin 1 +nondetermin 1 +suchmodel 1 +recent 1 +proven 1 +surprisingli 1 +us 1 +solv 1 +classicproblem 1 +exampl 1 +although 1 +theoryof 1 +complet 1 +long 1 +identifi 1 +hard 1 +computationalproblem 1 +much 1 +progress 1 +understand 1 +whichhard 1 +problem 1 +solut 1 +easi 1 +approxim 1 +recentresult 1 +result 1 +novel 1 +modelsof 1 +turn 1 +prove 1 +approximabilityresult 1 +sever 1 +work 1 +developingboth 1 +posit 1 +neg 1 +hardcombinatori 1 +aris 1 +game 1 +graph 1 +theoryand 1 +automata 1 +also 1 +design 1 +analysi 1 +algorithm 1 +current 1 +develop 1 +forsort 1 +minimum 1 +span 1 +tree 1 +goal 1 +well 1 +practic 1 +commun 1 +synchron 1 +costscan 1 +expens 1 +sampl 1 +public 1 +polynomi 1 +bound 1 +strategi 1 +ladner 1 +journal 1 +finit 1 +state 1 +nondeterminist 1 +probabilisticst 1 +hellerstein 1 +pottl 1 +wigderson 1 +proceedingsof 1 +annual 1 +symposium 1 +pspace 1 +provabl 1 +prover 1 +round 1 +caiand 1 +lipton 1 +februari 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..c6af1c08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,137 @@ +home 1 +page 1 +deborah 1 +joseph 1 +associ 1 +professor 1 +comput 1 +scienc 1 +mathemat 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +purdu 1 +interest 1 +structur 1 +appli 1 +complex 1 +theori 1 +biologi 1 +geometri 1 +logic 1 +research 1 +summari 1 +concern 1 +area 1 +theoret 1 +studi 1 +properti 1 +class 1 +design 1 +analysi 1 +algorithm 1 +biolog 1 +problem 1 +last 1 +twenti 1 +year 1 +great 1 +deal 1 +work 1 +gone 1 +studyingth 1 +set 1 +decid 1 +determinist 1 +andnondeterminist 1 +polynomi 1 +time 1 +despit 1 +effort 1 +stillknow 1 +littl 1 +recent 1 +fact 1 +computerscientist 1 +question 1 +adequaci 1 +known 1 +proof 1 +techniquesfor 1 +resolv 1 +whether 1 +investigatesth 1 +exploresin 1 +formal 1 +type 1 +techniqu 1 +necessari 1 +resolveproblem 1 +primarili 1 +inth 1 +method 1 +genom 1 +sequenc 1 +theseinclud 1 +develop 1 +dynam 1 +data 1 +algorithmsfor 1 +fragment 1 +assembl 1 +larg 1 +scale 1 +project 1 +specif 1 +handlingrepetit 1 +addit 1 +util 1 +graphtheoret 1 +rapid 1 +homolog 1 +detect 1 +analysisof 1 +anonym 1 +sampl 1 +public 1 +collaps 1 +degre 1 +subexponenti 1 +pruim 1 +young 1 +proceed 1 +ninth 1 +theoryconfer 1 +spars 1 +spanner 1 +weight 1 +graph 1 +althof 1 +dobkin 1 +soar 1 +discret 1 +obtain 1 +global 1 +similar 1 +local 1 +meidanisand 1 +tiwari 1 +fourth 1 +scandinavianworkshop 1 +springer 1 +verlag 1 +lectur 1 +note 1 +incomput 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..715920c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,108 @@ +home 1 +page 1 +miron 1 +livni 1 +professor 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +weizmann 1 +institut 1 +rehovot 1 +israel 1 +interest 1 +resourc 1 +manag 1 +algorithm 1 +perform 1 +model 1 +analysi 1 +discret 1 +event 1 +simul 1 +research 1 +summari 1 +major 1 +emphasi 1 +design 1 +evaluationof 1 +polici 1 +involv 1 +developmentof 1 +process 1 +data 1 +managementsystem 1 +type 1 +system 1 +gener 1 +purpos 1 +well 1 +asreal 1 +time 1 +schedul 1 +consid 1 +researchinvolv 1 +studi 1 +differ 1 +specialemphasi 1 +interplai 1 +properti 1 +systemand 1 +sinc 1 +performancestudi 1 +emploi 1 +modelingand 1 +techniqu 1 +current 1 +implementinga 1 +laboratori 1 +base 1 +novel 1 +languag 1 +includ 1 +util 1 +visualizationtool 1 +graphic 1 +interfac 1 +sampl 1 +recent 1 +public 1 +disk 1 +tape 1 +join 1 +synchron 1 +access 1 +myllymaki 1 +proceed 1 +sigmetr 1 +confer 1 +sequenc 1 +queri 1 +sashadri 1 +ramakrishnan 1 +sigmod 1 +foundat 1 +visual 1 +metaphor 1 +schema 1 +displai 1 +haberand 1 +ioannidi 1 +journal 1 +intellig 1 +inform 1 +juli 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..52771177 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,102 @@ +home 1 +page 1 +seymour 1 +parter 1 +professor 1 +comput 1 +scienc 1 +mathemat 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +york 1 +interest 1 +numer 1 +method 1 +partial 1 +differenti 1 +equat 1 +research 1 +summari 1 +time 1 +major 1 +emphasi 1 +work 1 +solutionof 1 +indefinit 1 +discret 1 +ellipt 1 +system 1 +classicalit 1 +multigrid 1 +effectivelywhen 1 +posit 1 +definit 1 +also 1 +bemad 1 +effect 1 +real 1 +symmetr 1 +part 1 +operatori 1 +hand 1 +casedirect 1 +attempt 1 +preserv 1 +spars 1 +thesystem 1 +encount 1 +small 1 +pivot 1 +thu 1 +challengingproblem 1 +mix 1 +concept 1 +procedur 1 +linearalgebra 1 +nowinvolv 1 +sever 1 +project 1 +attack 1 +class 1 +problem 1 +includ 1 +precondit 1 +studi 1 +specialmultigrid 1 +sampl 1 +recent 1 +public 1 +chebyshev 1 +collact 1 +ellipticparti 1 +appear 1 +siam 1 +journalon 1 +analysi 1 +boundari 1 +condit 1 +without 1 +estim 1 +number 1 +distribut 1 +singular 1 +valu 1 +journal 1 +second 1 +order 1 +oper 1 +numbersand 1 +scientificcomput 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..e0d97dd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,128 @@ +home 1 +page 1 +mari 1 +vernon 1 +professor 1 +comput 1 +scienc 1 +industri 1 +engin 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +california 1 +angel 1 +interest 1 +techniqu 1 +applic 1 +system 1 +perform 1 +analysi 1 +parallel 1 +architectur 1 +operatingsystem 1 +research 1 +summari 1 +analyt 1 +model 1 +applicationto 1 +issu 1 +emphasi 1 +paralleland 1 +distribut 1 +design 1 +techniquesi 1 +develop 1 +togeth 1 +graduat 1 +student 1 +colleaguesinclud 1 +gener 1 +time 1 +petri 1 +customizedmean 1 +valu 1 +gtpn 1 +repres 1 +systemfeatur 1 +synchron 1 +prioriti 1 +servic 1 +precis 1 +custom 1 +yield 1 +intuit 1 +equationsthat 1 +featur 1 +approxim 1 +butcan 1 +solv 1 +effici 1 +also 1 +recent 1 +proposedth 1 +call 1 +interpol 1 +approximationsfor 1 +processor 1 +alloc 1 +polici 1 +techniquemai 1 +broader 1 +performanceanalysi 1 +current 1 +project 1 +includ 1 +character 1 +high 1 +performanceparallel 1 +workload 1 +schedulingpolici 1 +schedul 1 +multimedia 1 +server 1 +memorymanag 1 +network 1 +workstat 1 +sampl 1 +public 1 +fair 1 +dqdb 1 +slot 1 +reus 1 +brewster 1 +proceed 1 +ieee 1 +infocom 1 +confer 1 +august 1 +accur 1 +hybrid 1 +hash 1 +join 1 +algorithm 1 +pateland 1 +carei 1 +sigmetr 1 +june 1 +characterist 1 +limit 1 +preemption 1 +forrun 1 +complet 1 +with 1 +chiang 1 +mansharamani 1 +sigmetricsconfer 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..98b6e7ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,9 @@ +qinqin 1 +wang 1 +home 1 +page 1 +welcom 1 +pageqw 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..58b1fbee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,198 @@ +raghu 1 +ramakrishnan 1 +home 1 +page 1 +associ 1 +professor 1 +comput 1 +scienc 1 +wisc 1 +depart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +usaphon 1 +educ 1 +teach 1 +activ 1 +research 1 +interest 1 +project 1 +graduat 1 +univers 1 +texa 1 +austin 1 +tech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +cours 1 +text 1 +databas 1 +manag 1 +system 1 +softwar 1 +minibaseand 1 +coralth 1 +publish 1 +mcgraw 1 +hill 1 +aimedat 1 +first 1 +second 1 +undergraduateand 1 +level 1 +minibas 1 +relat 1 +dbm 1 +develop 1 +inconjunct 1 +coral 1 +also 1 +us 1 +coursesthat 1 +deal 1 +logic 1 +sever 1 +school 1 +integr 1 +heterogen 1 +data 1 +sourc 1 +content 1 +base 1 +queri 1 +index 1 +sequenc 1 +imag 1 +exploratori 1 +analysi 1 +larg 1 +set 1 +mine 1 +extend 1 +languag 1 +constraint 1 +deductiona 1 +grow 1 +diversifi 1 +increasinglyimport 1 +abl 1 +access 1 +dispers 1 +independ 1 +easili 1 +rodin 1 +successor 1 +investig 1 +severalissu 1 +formal 1 +techniqu 1 +practic 1 +toolkit 1 +forsemant 1 +support 1 +multipl 1 +serviceand 1 +networkedclust 1 +machin 1 +joint 1 +work 1 +prof 1 +ioannidi 1 +livni 1 +recent 1 +result 1 +visual 1 +explorationfrom 1 +next 1 +appli 1 +area 1 +complex 1 +assequ 1 +seqsystem 1 +focus 1 +design 1 +optimizationissu 1 +part 1 +import 1 +aspect 1 +identifyingtrend 1 +gener 1 +identifi 1 +pattern 1 +ofinform 1 +goal 1 +retriev 1 +fromlarg 1 +focu 1 +implementingan 1 +express 1 +definit 1 +customizea 1 +take 1 +advantag 1 +specializedinform 1 +given 1 +collect 1 +indexedand 1 +cover 1 +explor 1 +andmin 1 +power 1 +cluster 1 +algorithm 1 +call 1 +birchfor 1 +dataset 1 +tool 1 +devisea 1 +long 1 +stand 1 +extens 1 +databasequeri 1 +program 1 +featuressuch 1 +structur 1 +term 1 +recurs 1 +ofarithmet 1 +specifi 1 +morecompactli 1 +effici 1 +ongo 1 +involv 1 +continu 1 +coraldeduct 1 +evalu 1 +upon 1 +bottom 1 +fixpointevalu 1 +optim 1 +make 1 +efficientacross 1 +broad 1 +rang 1 +sudarshan 1 +august 1 +time 1 +employ 1 +bell 1 +lab 1 +murrai 1 +sudarsha 1 +srivastava 1 +deduct 1 +object 1 +orient 1 +divesh 1 +transit 1 +closur 1 +seshadri 1 +managementfirst 1 +cornel 1 +praveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..63c12a3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,163 @@ +rahul 1 +home 1 +page 1 +kapoorhello 1 +internet 1 +surfer 1 +welcom 1 +cyber 1 +hope 1 +stai 1 +long 1 +enough 1 +know 1 +littl 1 +offici 1 +third 1 +final 1 +semest 1 +master 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +came 1 +fall 1 +get 1 +bachelor 1 +degre 1 +indianinstitut 1 +technolog 1 +kanpur 1 +interest 1 +employ 1 +pleas 1 +check 1 +resum 1 +cours 1 +schedulemydepartmentmyuniversityiitkanpuriitkclass 1 +india 1 +relatedlink 1 +menow 1 +gone 1 +want 1 +person 1 +well 1 +normal 1 +kind 1 +born 1 +andrais 1 +small 1 +love 1 +famili 1 +compris 1 +parent 1 +elder 1 +sister 1 +nice 1 +town 1 +call 1 +state 1 +good 1 +fortun 1 +live 1 +moneymagazin 1 +rate 1 +livabl 1 +citi 1 +year 1 +editormust 1 +come 1 +greenland 1 +think 1 +winter 1 +guess 1 +shouldn 1 +complain 1 +spring 1 +isawesom 1 +summer 1 +jose 1 +california 1 +work 1 +almaden 1 +research 1 +centr 1 +cannot 1 +much 1 +suppos 1 +great 1 +anywai 1 +regret 1 +time 1 +area 1 +whatev 1 +monei 1 +magazin 1 +sai 1 +northern 1 +place 1 +like 1 +music 1 +take 1 +look 1 +collect 1 +movi 1 +almost 1 +genr 1 +though 1 +prefer 1 +romanc 1 +comedi 1 +show 1 +file 1 +star 1 +trek 1 +read 1 +stuff 1 +novel 1 +philosophi 1 +surf 1 +sport 1 +concern 1 +watch 1 +cricket 1 +plai 1 +win 1 +tenni 1 +figur 1 +skate 1 +gymnast 1 +try 1 +learn 1 +swim 1 +bridg 1 +natur 1 +lover 1 +enjoi 1 +go 1 +walk 1 +hike 1 +cloudi 1 +slightli 1 +breezi 1 +wish 1 +could 1 +job 1 +televis 1 +travel 1 +youget 1 +world 1 +paid 1 +musicstuffmovi 1 +televisioninternettravelotherbookmark 1 +contact 1 +meget 1 +form 1 +rest 1 +guestbookrahul 1 +wisc 1 +eduh 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..6c25ba83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,76 @@ +rajesh 1 +raman 1 +home 1 +page 1 +old 1 +homm 1 +off 1 +comput 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +madison 1 +email 1 +wisc 1 +telephon 1 +ohio 1 +wesleyan 1 +univers 1 +major 1 +mathemat 1 +minor 1 +music 1 +current 1 +first 1 +year 1 +graduat 1 +student 1 +winsonsin 1 +person 1 +curriculum 1 +vita 1 +postscript 1 +specif 1 +cours 1 +architectur 1 +saluja 1 +system 1 +perform 1 +evalu 1 +model 1 +livni 1 +distribut 1 +inform 1 +work 1 +team 1 +member 1 +condor 1 +project 1 +integr 1 +part 1 +committe 1 +bookmark 1 +chimera 1 +novelti 1 +monster 1 +chao 1 +subject 1 +contradict 1 +prodigi 1 +judg 1 +thing 1 +feebleworm 1 +earth 1 +depositari 1 +truth 1 +cloaca 1 +uncertainti 1 +error 1 +theglori 1 +shame 1 +blais 1 +pascal 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..791ac5ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,78 @@ +karthik 1 +pagekarthikeyan 1 +ramasamyabouti 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +research 1 +interest 1 +mostli 1 +databas 1 +oper 1 +system 1 +work 1 +jeffrei 1 +naughton 1 +paradis 1 +project 1 +projectshack 1 +david 1 +dewitt 1 +connectivityparadis 1 +parallel 1 +pthread 1 +wrapperspublicationsstorag 1 +estim 1 +multidimension 1 +aggreg 1 +presenc 1 +hierarchi 1 +amit 1 +shukla 1 +prasad 1 +deshpand 1 +karthikeyan 1 +ramasami 1 +intern 1 +confer 1 +larg 1 +mumbai 1 +bombai 1 +avail 1 +presentationsweb 1 +picturearchitectur 1 +altern 1 +scalabl 1 +serversphoto 1 +albumencount 1 +leafperson 1 +inforesum 1 +financemonei 1 +wall 1 +street 1 +journal 1 +person 1 +interestshack 1 +photographycontact 1 +informationstreet 1 +address 1 +dayton 1 +madison 1 +electron 1 +mail 1 +addresskarthik 1 +wisc 1 +eduoffic 1 +phone 1 +number 1 +comment 1 +suggestionspleas 1 +tell 1 +think 1 +home 1 +page 1 +might 1 +improv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..9ec9c9bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,21 @@ +kelli 1 +home 1 +page 1 +ratliffoffic 1 +phone 1 +email 1 +wisc 1 +edulast 1 +login 1 +offic 1 +hour 1 +inform 1 +student 1 +genealog 1 +interest 1 +place 1 +visit 1 +space 1 +construct 1 +stai 1 +tune 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..68a5761e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,45 @@ +viresh 1 +ratnakar 1 +page 1 +research 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +area 1 +digit 1 +imag 1 +video 1 +compress 1 +advisor 1 +miron 1 +livni 1 +main 1 +interest 1 +base 1 +vector 1 +quantiz 1 +fractal 1 +qualiti 1 +control 1 +lossi 1 +product 1 +mode 1 +public 1 +home 1 +invok 1 +qclicauthor 1 +avail 1 +qclic 1 +qclicbrows 1 +thing 1 +rever 1 +reveal 1 +click 1 +west 1 +dayton 1 +street 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..78bc65d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,78 @@ +monasteriu 1 +omin 1 +doominu 1 +welcom 1 +brother 1 +richard 1 +without 1 +beard 1 +person 1 +haven 1 +address 1 +offic 1 +univers 1 +wisconsin 1 +madison 1 +comput 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +number 1 +rcarl 1 +wisc 1 +hour 1 +thur 1 +home 1 +page 1 +current 1 +cours 1 +load 1 +distribut 1 +oper 1 +system 1 +mondai 1 +wednesdai 1 +underwat 1 +fire 1 +prevent 1 +saturdai 1 +advanc 1 +architectur 1 +tuesdai 1 +thursdai 1 +math 1 +introduct 1 +whole 1 +emphasi 1 +sundai 1 +subsurfac 1 +depositori 1 +engin 1 +grave 1 +dig 1 +fridai 1 +mani 1 +shade 1 +profession 1 +doom 1 +polit 1 +goofi 1 +solitari 1 +innebri 1 +vampir 1 +seriou 1 +nostalg 1 +funki 1 +monk 1 +fellow 1 +order 1 +ever 1 +need 1 +graphic 1 +artist 1 +desktop 1 +publish 1 +check 1 +best 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..8700b7d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,558 @@ +thoma 1 +rep 1 +home 1 +page 1 +repsprofessorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +wisc 1 +telephon 1 +secretari 1 +depart 1 +cornel 1 +univers 1 +curriculum 1 +vita 1 +research 1 +interest 1 +program 1 +slice 1 +differenc 1 +merg 1 +interprocedur 1 +dataflow 1 +analysi 1 +alia 1 +pointer 1 +shape 1 +languag 1 +base 1 +develop 1 +environ 1 +increment 1 +comput 1 +attribut 1 +grammar 1 +also 1 +thehom 1 +project 1 +content 1 +summari 1 +categor 1 +index 1 +public 1 +list 1 +visitor 1 +post 1 +doctor 1 +associ 1 +student 1 +summarymi 1 +aim 1 +creat 1 +tool 1 +support 1 +thedevelop 1 +complex 1 +softwar 1 +system 1 +object 1 +createtool 1 +provid 1 +power 1 +specif 1 +manipulationoper 1 +particular 1 +work 1 +explor 1 +slicingcan 1 +serv 1 +basi 1 +manipul 1 +oper 1 +respect 1 +elementss 1 +includ 1 +element 1 +thatmight 1 +affect 1 +either 1 +directli 1 +transit 1 +valu 1 +thevari 1 +us 1 +member 1 +allow 1 +findsemant 1 +meaning 1 +decomposit 1 +thedecomposit 1 +consist 1 +textual 1 +contigu 1 +fundament 1 +solvingmani 1 +engin 1 +problem 1 +instanc 1 +applicationsin 1 +understand 1 +mainten 1 +debug 1 +test 1 +special 1 +reus 1 +worker 1 +carri 1 +atimprov 1 +underli 1 +technolog 1 +relatedoper 1 +implement 1 +slicer 1 +method 1 +andbuild 1 +clickherefor 1 +recent 1 +establish 1 +unexpect 1 +connect 1 +betweeninterprocedur 1 +previou 1 +oninterprocedur 1 +show 1 +larg 1 +class 1 +interproceduraldataflow 1 +solv 1 +transformingthem 1 +kind 1 +graph 1 +reachabl 1 +precis 1 +polynomi 1 +timebi 1 +algorithm 1 +origin 1 +subject 1 +mean 1 +make 1 +solut 1 +probleminst 1 +find 1 +nearbi 1 +publicationsprogram 1 +overview 1 +ics 1 +dagstuhl 1 +slicing_pat 1 +david 1 +binklei 1 +thesi 1 +acta 1 +topla 1 +pldi 1 +chop 1 +fseb 1 +yang 1 +thesismerg 1 +tosem 1 +sigsoft 1 +thesiswuu 1 +iwscm 1 +popla 1 +esop 1 +iwsvcc 1 +algebra 1 +applic 1 +ccpsd 1 +amast 1 +npfo_submiss 1 +semant 1 +ccipl 1 +poplb 1 +pepma 1 +prog_integration_system 1 +prog_integration_manu 1 +note 1 +describ 1 +paper 1 +handl 1 +small 1 +subsetof 1 +pascal 1 +distribut 1 +licens 1 +obtain 1 +clickingher 1 +current 1 +retarget 1 +andexpect 1 +anddifferenc 1 +probabl 1 +integr 1 +miscellan 1 +thesesdavid 1 +thesisphil 1 +pfeiffer 1 +thesisinterprocedur 1 +analysisdemand 1 +idfa 1 +bottom 1 +logic 1 +magic 1 +set 1 +transform 1 +exhaust 1 +demand 1 +fsea 1 +popl 1 +diku 1 +tcs_ide_pap 1 +fase 1 +ptime 1 +complet 1 +acta_pap 1 +pepmb 1 +pfeiffer_thesi 1 +sigplan 1 +synthes 1 +gener 1 +book 1 +manual 1 +lape 1 +psde 1 +compcon 1 +cacm 1 +ramalingam 1 +jalg_pap 1 +popl_not 1 +spaa 1 +publicationsbooksrep 1 +teitelbaum 1 +constructinglanguag 1 +editor 1 +springer 1 +verlag 1 +york 1 +refer 1 +third 1 +edit 1 +chines 1 +reprint 1 +publish 1 +world 1 +corpor 1 +beij 1 +china 1 +press 1 +cambridg 1 +journal 1 +publicationssagiv 1 +horwitz 1 +toconst 1 +propag 1 +appear 1 +theoret 1 +sequenti 1 +natur 1 +informatica 1 +shortest 1 +path 1 +j_alg 1 +dynam 1 +procedur 1 +call 1 +transact 1 +methodolog 1 +januari 1 +competit 1 +line 1 +prioriti 1 +order 1 +inform 1 +process 1 +letter 1 +accommod 1 +preservingtransform 1 +juli 1 +properti 1 +effici 1 +comparison 1 +depend 1 +prin 1 +interf 1 +version 1 +evalu 1 +grammarswith 1 +unrestrict 1 +movement 1 +tree 1 +modif 1 +ieee 1 +novemb 1 +demer 1 +sublinear 1 +space 1 +context 1 +syntax 1 +direct 1 +commun 1 +septemb 1 +invit 1 +papershorwitz 1 +proceed 1 +fourteenth 1 +intern 1 +conferenceon 1 +melbourn 1 +australia 1 +second 1 +european 1 +symposium 1 +nanci 1 +franc 1 +march 1 +lectur 1 +ganzing 1 +chaptersrep 1 +databas 1 +ramakrishnan 1 +kluwer 1 +academ 1 +boston 1 +chang 1 +impact 1 +bohner 1 +arnold 1 +societi 1 +alamito 1 +fromacm 1 +berzin 1 +theori 1 +fromproceed 1 +colloquium 1 +combin 1 +paradigmsfor 1 +brighton 1 +april 1 +abramski 1 +maibaum 1 +architectur 1 +ichikawa 1 +tsubotani 1 +scientif 1 +compani 1 +singapor 1 +interact 1 +barstow 1 +sandewal 1 +shrobe 1 +mcgraw 1 +hill 1 +wherefor 1 +wasserman 1 +washington 1 +sigoa 1 +symposiumon 1 +text 1 +portland 1 +june 1 +notic 1 +confer 1 +publicationssiff 1 +fourth 1 +sigsoftsymposium 1 +foundat 1 +francisco 1 +octob 1 +turnidg 1 +seminar 1 +partial 1 +schloss 1 +wadern 1 +germani 1 +danvi 1 +glueck 1 +thiemann 1 +sagiv 1 +wilhelm 1 +destruct 1 +updat 1 +record 1 +twenti 1 +principl 1 +petersburg 1 +rosai 1 +hentenryck 1 +bind 1 +time 1 +imper 1 +pepm 1 +onparti 1 +jolla 1 +california 1 +formalapproach 1 +aarhu 1 +denmark 1 +moss 1 +nielsen 1 +schwartzbach 1 +tapsoft 1 +speed 1 +onth 1 +orlean 1 +decemb 1 +fifth 1 +compilerconstruct 1 +edinburgh 1 +scotland 1 +fritzson 1 +maintain 1 +domin 1 +reducibleflowgraph 1 +first 1 +scan 1 +parallel 1 +data 1 +andarchitectur 1 +velen 1 +onalgebra 1 +iowa 1 +citi 1 +preserv 1 +irvin 1 +copenhagen 1 +jone 1 +bricker 1 +illustr 1 +interfer 1 +workshop 1 +softwareconfigur 1 +manag 1 +princeton 1 +variabl 1 +languagedesign 1 +issuesin 1 +barcelona 1 +spain 1 +diaz 1 +oreja 1 +programminglanguag 1 +design 1 +atlanta 1 +variant 1 +forprogram 1 +versionand 1 +configur 1 +control 1 +grassau 1 +bericht 1 +german 1 +chapter 1 +winkler 1 +teubner 1 +stuttgart 1 +fifteenth 1 +ofprogram 1 +diego 1 +adequaci 1 +repres 1 +marceau 1 +remot 1 +thirteenth 1 +engineeringsymposium 1 +practic 1 +pittsburgh 1 +alpern 1 +proof 1 +check 1 +eleventh 1 +onprincipl 1 +salt 1 +lake 1 +utah 1 +static 1 +digest 1 +spring 1 +optim 1 +ninth 1 +principlesof 1 +albuquerqu 1 +tosyntax 1 +eighth 1 +williamsburg 1 +softwarerep 1 +releas 1 +site 1 +click 1 +herefor 1 +patentsrep 1 +patent 1 +number 1 +pend 1 +submissionsrep 1 +august 1 +submit 1 +access 1 +latest 1 +reportsrep 1 +abstract 1 +analys 1 +leeuwen 1 +mehlhorn 1 +report 1 +center 1 +ibfi 1 +datalogisk 1 +institut 1 +psramalingam 1 +bibliographi 1 +twentieth 1 +charleston 1 +tutori 1 +unpublish 1 +present 1 +klint 1 +snelt 1 +identifi 1 +differ 1 +extendedabstract 1 +ball 1 +correct 1 +reconstitut 1 +represent 1 +multi 1 +equival 1 +theorem 1 +demonstr 1 +prototyp 1 +doc 1 +studentsvisitor 1 +mooli 1 +israel 1 +jiazhen 1 +robert 1 +paig 1 +univ 1 +professor 1 +nation 1 +chiao 1 +tung 1 +taiwan 1 +north 1 +carolina 1 +chapel 1 +studentsramalingam 1 +bound 1 +dissert 1 +tech 1 +programintegr 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..da9ad227 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,69 @@ +saeed 1 +home 1 +pagespe 1 +function 1 +statusclock 1 +window 1 +statu 1 +date 1 +settimeout 1 +speed 1 +clearid 1 +cleartimeout 1 +mirza 1 +tech 1 +depart 1 +comput 1 +scienc 1 +engin 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +graduat 1 +student 1 +univ 1 +wisconsin 1 +madison 1 +lucknow 1 +india 1 +like 1 +spend 1 +time 1 +listn 1 +film 1 +song 1 +netsurf 1 +read 1 +comic 1 +hero 1 +calvin 1 +love 1 +peopl 1 +beauti 1 +seem 1 +contact 1 +offic 1 +randal 1 +apart 1 +comp 1 +dayton 1 +street 1 +best 1 +email 1 +wisc 1 +friend 1 +right 1 +pictur 1 +wismad 1 +suggest 1 +send 1 +check 1 +guestbook 1 +page 1 +access 1 +sinc 1 +last 1 +updat 1 +copi 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..ee0af070 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,32 @@ +salli 1 +peterson 1 +home 1 +page 1 +goodwin 1 +lecturercomput 1 +scienc 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +mail 1 +wisc 1 +edutelephon 1 +interest 1 +desktop 1 +comput 1 +real 1 +time 1 +oper 1 +system 1 +program 1 +languag 1 +cours 1 +taught 1 +fall 1 +comp 1 +lectur 1 +us 1 +last 1 +chang 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..396fd167 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,129 @@ +amit 1 +home 1 +page 1 +warn 1 +reach 1 +protocol 1 +offic 1 +email 1 +wisc 1 +snail 1 +mail 1 +comput 1 +scienc 1 +dept 1 +univers 1 +wisconsin 1 +madison 1 +princeton 1 +phone 1 +educ 1 +work 1 +toward 1 +guidanc 1 +jeff 1 +naughton 1 +master 1 +bachelor 1 +technolog 1 +engin 1 +indian 1 +institut 1 +madra 1 +research 1 +interest 1 +onlin 1 +analyt 1 +process 1 +queri 1 +perform 1 +evalu 1 +public 1 +storag 1 +estim 1 +multidimension 1 +aggreg 1 +presenc 1 +hierarchi 1 +shukla 1 +prasad 1 +deshpand 1 +jeffrei 1 +karthikeyan 1 +ramasami 1 +intern 1 +confer 1 +larg 1 +databas 1 +mumbai 1 +bombai 1 +paper 1 +postscript 1 +slide 1 +present 1 +vldb 1 +link 1 +run 1 +boston 1 +marathon 1 +chicago 1 +york 1 +seattl 1 +georg 1 +utah 1 +relat 1 +pointer 1 +network 1 +activ 1 +bibliographi 1 +server 1 +logic 1 +program 1 +competit 1 +profil 1 +spec 1 +idea 1 +sort 1 +name 1 +trier 1 +date 1 +sigmod 1 +data 1 +base 1 +endow 1 +articl 1 +archiv 1 +massiv 1 +digit 1 +system 1 +mdd 1 +initi 1 +multimedia 1 +inform 1 +sourc 1 +nation 1 +industri 1 +infrastructur 1 +niiip 1 +consortium 1 +transact 1 +council 1 +transcoop 1 +manag 1 +support 1 +cooper 1 +applic 1 +olap 1 +introduct 1 +pilot 1 +softwar 1 +help 1 +needi 1 +children 1 +look 1 +asha 1 +person 1 +pageand 1 +bookmarksar 1 +also 1 +garfield 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..c1df6e96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,38 @@ +ashwin 1 +home 1 +page 1 +construct 1 +name 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +come 1 +india 1 +hadmi 1 +undergradu 1 +educ 1 +indianinstitut 1 +technolog 1 +bombai 1 +iitb 1 +fantast 1 +place 1 +worth 1 +visit 1 +like 1 +contact 1 +canfing 1 +meto 1 +find 1 +whereabout 1 +altern 1 +send 1 +email 1 +sashwin 1 +wisc 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..a67df589 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,166 @@ +subramanya 1 +sastri 1 +home 1 +pagei 1 +mugshot 1 +mine 1 +come 1 +hospet 1 +town 1 +karnataka 1 +india 1 +year 1 +school 1 +near 1 +awai 1 +hampi 1 +ruin 1 +vijayanagara 1 +empir 1 +also 1 +tungabhadra 1 +built 1 +across 1 +river 1 +place 1 +beauti 1 +unfortun 1 +dont 1 +photograph 1 +would 1 +scan 1 +photo 1 +album 1 +long 1 +undergradu 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +depart 1 +comput 1 +scienc 1 +engin 1 +wonder 1 +fewphotograph 1 +time 1 +gokul 1 +maintain 1 +contain 1 +mani 1 +iitk 1 +class 1 +homepag 1 +inform 1 +classmatesat 1 +presentcurr 1 +graduat 1 +student 1 +univers 1 +wisconsin 1 +madison 1 +plan 1 +cours 1 +registeredfor 1 +spring 1 +interestsmi 1 +academ 1 +interest 1 +field 1 +architectur 1 +program 1 +languag 1 +compil 1 +hope 1 +cricket 1 +favourit 1 +sport 1 +us 1 +playphatta 1 +tenni 1 +ball 1 +version 1 +thati 1 +champ 1 +anyth 1 +provid 1 +entertainmentin 1 +compani 1 +friend 1 +bookmark 1 +link 1 +site 1 +enjoi 1 +listen 1 +music 1 +pleasant 1 +must 1 +consid 1 +hard 1 +rock 1 +metal 1 +someth 1 +realli 1 +donot 1 +watch 1 +much 1 +whatev 1 +like 1 +seinfeld 1 +sshow 1 +read 1 +goe 1 +voraci 1 +reader 1 +rather 1 +whati 1 +better 1 +horror 1 +fantasi 1 +neither 1 +fiction 1 +unsuccesfulli 1 +tri 1 +grip 1 +earth 1 +know 1 +mean 1 +hint 1 +romanc 1 +ifposs 1 +jeffrei 1 +archer 1 +author 1 +date 1 +jane 1 +austen 1 +pride 1 +prejudic 1 +talk 1 +ramesh 1 +mahadeven 1 +sarticl 1 +make 1 +plai 1 +bridg 1 +back 1 +pick 1 +wonderfulgam 1 +itagain 1 +solv 1 +crossword 1 +cryptic 1 +kind 1 +inth 1 +past 1 +devot 1 +hobbi 1 +last 1 +updat 1 +januari 1 +send 1 +comment 1 +suggest 1 +wisc 1 +eduunivers 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..6f10a0ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,219 @@ +susan 1 +calcari 1 +home 1 +page 1 +calcarimanag 1 +scout 1 +servicescomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madisonsc 1 +wisc 1 +edumi 1 +titl 1 +internet 1 +manag 1 +servic 1 +comput 1 +depart 1 +univers 1 +madison 1 +scoutservic 1 +project 1 +internicand 1 +support 1 +nation 1 +sciencefound 1 +user 1 +thehigh 1 +educ 1 +commun 1 +provid 1 +time 1 +inform 1 +bestresourc 1 +tool 1 +goal 1 +help 1 +research 1 +andeduc 1 +effect 1 +work 1 +week 1 +filter 1 +hundr 1 +item 1 +edit 1 +organ 1 +import 1 +present 1 +multipl 1 +usabl 1 +format 1 +includ 1 +report 1 +toolkit 1 +happen 1 +come 1 +soonth 1 +sprout 1 +newslett 1 +written 1 +kid 1 +peopl 1 +receiv 1 +email 1 +andthousand 1 +read 1 +annotatedlist 1 +best 1 +newli 1 +discov 1 +resourc 1 +public 1 +kind 1 +devot 1 +select 1 +itemsinclud 1 +issu 1 +happeningspost 1 +everi 1 +weekdai 1 +thousand 1 +orth 1 +newsgroup 1 +moreinform 1 +profession 1 +background 1 +involv 1 +wide 1 +sinc 1 +wheni 1 +join 1 +merit 1 +arbor 1 +thensfnet 1 +backbon 1 +informationservic 1 +divis 1 +spent 1 +three 1 +year 1 +speak 1 +tonat 1 +intern 1 +higher 1 +audienc 1 +internetand 1 +also 1 +develop 1 +produc 1 +network 1 +seminarseri 1 +first 1 +seminar 1 +seri 1 +focus 1 +need 1 +internetend 1 +later 1 +becam 1 +director 1 +forcerfnet 1 +respect 1 +base 1 +diego 1 +wrote 1 +propos 1 +result 1 +award 1 +part 1 +internicproject 1 +cooper 1 +agreement 1 +third 1 +termin 1 +theport 1 +chose 1 +continu 1 +workof 1 +elect 1 +reloc 1 +andrequest 1 +approv 1 +thecomput 1 +heartilyagre 1 +futur 1 +plan 1 +servicesat 1 +staff 1 +jack 1 +solock 1 +speciallibrarian 1 +expand 1 +theaddit 1 +open 1 +systemadministr 1 +posit 1 +june 1 +matthew 1 +livesei 1 +aproject 1 +assist 1 +expans 1 +thescout 1 +addit 1 +disciplin 1 +specif 1 +asscout 1 +area 1 +studi 1 +branch 1 +researcharea 1 +collabor 1 +group 1 +campus 1 +potenti 1 +topic 1 +includenetwork 1 +discoveri 1 +retriev 1 +nidr 1 +anddisciplin 1 +orient 1 +gather 1 +depend 1 +onth 1 +natur 1 +hire 1 +willincludecomput 1 +graduat 1 +undergradu 1 +level 1 +visit 1 +site 1 +find 1 +ofour 1 +mail 1 +list 1 +ifyou 1 +interest 1 +appli 1 +theonlin 1 +descript 1 +special 1 +librarian 1 +send 1 +aresum 1 +write 1 +sampl 1 +address 1 +feel 1 +free 1 +contactm 1 +telephon 1 +calcariinternet 1 +dayton 1 +street 1 +scal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..1a21ed25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,53 @@ +chandrasekar 1 +home 1 +page 1 +welcom 1 +homepag 1 +worri 1 +happi 1 +present 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +past 1 +born 1 +june 1 +coimbator 1 +southern 1 +state 1 +tamilnadu 1 +inindia 1 +high 1 +school 1 +educ 1 +higher 1 +secondari 1 +undergradu 1 +theindian 1 +institut 1 +technolog 1 +kharagpur 1 +major 1 +dept 1 +engin 1 +person 1 +stuff 1 +resid 1 +kendal 1 +avenu 1 +madison 1 +officedept 1 +dayton 1 +sivasankaran 1 +schandra 1 +wisc 1 +last 1 +updat 1 +finger 1 +find 1 +whereabout 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..bd6613e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,31 @@ +eric 1 +schnarr 1 +home 1 +pageer 1 +wisc 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +advisor 1 +larusresearch 1 +interest 1 +architectur 1 +descript 1 +languagesfunct 1 +languag 1 +designinterest 1 +link 1 +wind 1 +tunnel 1 +sacm 1 +hockei 1 +club 1 +dragon 1 +byte 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..d4f90ac9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,66 @@ +yanni 1 +schoina 1 +home 1 +page 1 +wisc 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +note 1 +construct 1 +advisor 1 +mark 1 +hill 1 +interest 1 +parallel 1 +systemspubl 1 +fine 1 +grain 1 +access 1 +control 1 +distribut 1 +share 1 +memori 1 +ioanni 1 +babak 1 +falsafi 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +jame 1 +laru 1 +david 1 +wood 1 +sixth 1 +intern 1 +confer 1 +architectur 1 +support 1 +programminglanguag 1 +oper 1 +system 1 +asplo 1 +applic 1 +specif 1 +protocol 1 +user 1 +level 1 +ann 1 +roger 1 +supercomput 1 +educ 1 +univers 1 +crete 1 +iraklio 1 +last 1 +updat 1 +juli 1 +cretan 1 +cook 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..8cb93b08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,37 @@ +beverli 1 +seavei 1 +home 1 +page 1 +current 1 +regist 1 +grad 1 +student 1 +comput 1 +scienc 1 +switch 1 +special 1 +degre 1 +biologi 1 +interest 1 +mine 1 +includ 1 +asian 1 +classic 1 +danc 1 +differ 1 +version 1 +ramayana 1 +india 1 +southeast 1 +asia 1 +drama 1 +ramakien 1 +wish 1 +could 1 +finger 1 +give 1 +account 1 +hairbal 1 +keyboard 1 +keeper 1 +instead 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..ecfd0bf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,62 @@ +scott 1 +colvil 1 +home 1 +pagescott 1 +page 1 +mail 1 +wisc 1 +eduoffic 1 +address 1 +comput 1 +scienc 1 +offic 1 +dayton 1 +madison 1 +franc 1 +welcom 1 +well 1 +school 1 +back 1 +univers 1 +wisconsin 1 +seen 1 +largest 1 +ball 1 +chees 1 +want 1 +link 1 +uwisc 1 +pagein 1 +addit 1 +list 1 +find 1 +interest 1 +hopefulli 1 +also 1 +enjoi 1 +beer 1 +world 1 +drink 1 +game 1 +absolut 1 +add 1 +caffein 1 +rate 1 +soda 1 +guid 1 +lock 1 +pickingand 1 +educ 1 +artsi 1 +fact 1 +book 1 +constitut 1 +english 1 +dictionari 1 +roget 1 +thesauru 1 +poetri 1 +databas 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..903f34ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,59 @@ +steve 1 +seitz 1 +anim 1 +writeup 1 +imag 1 +motion 1 +analysi 1 +charact 1 +control 1 +chuck 1 +dyerour 1 +research 1 +motiv 1 +problem 1 +teachinga 1 +graphic 1 +model 1 +perform 1 +realist 1 +hasit 1 +root 1 +cartoon 1 +modern 1 +applic 1 +tocomput 1 +virtual 1 +realiti 1 +teleconferenc 1 +robot 1 +task 1 +endow 1 +knowledg 1 +performa 1 +repertoir 1 +interest 1 +learn 1 +beinvok 1 +directli 1 +high 1 +level 1 +cue 1 +smile 1 +walk 1 +infer 1 +anabstract 1 +goal 1 +store 1 +cu 1 +levelev 1 +input 1 +devic 1 +sequenc 1 +project 1 +includ 1 +period 1 +track 1 +rigid 1 +nonrigid 1 +object 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..b6f39f1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,85 @@ +steve 1 +seitz 1 +view 1 +interpol 1 +synthesi 1 +imag 1 +investig 1 +chuck 1 +dyerw 1 +devis 1 +provabl 1 +correct 1 +autom 1 +techniqu 1 +creat 1 +scene 1 +basi 1 +reli 1 +geometr 1 +known 1 +morph 1 +graphicscommun 1 +produc 1 +intermedi 1 +although 1 +techniquescurr 1 +enjoi 1 +widespread 1 +theoret 1 +validityha 1 +establish 1 +particular 1 +viewsof 1 +sequenc 1 +physic 1 +valid 1 +ofthat 1 +surprisingli 1 +answer 1 +provid 1 +first 1 +undergo 1 +simplerectif 1 +procedur 1 +certain 1 +assumpt 1 +visibl 1 +theproject 1 +process 1 +satisfi 1 +work 1 +describ 1 +us 1 +stereo 1 +todetermin 1 +correspond 1 +recent 1 +consid 1 +user 1 +interact 1 +guid 1 +comput 1 +three 1 +differ 1 +pair 1 +therectifi 1 +origin 1 +shown 1 +left 1 +right 1 +click 1 +theinterpol 1 +center 1 +mpeg 1 +movi 1 +show 1 +computedinterpol 1 +dyer 1 +proc 1 +workshop 1 +represent 1 +visual 1 +last 1 +chang 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..ed0cd1a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,94 @@ +view 1 +morph 1 +steve 1 +seitz 1 +investig 1 +chuck 1 +dyer 1 +relat 1 +public 1 +appear 1 +siggraph 1 +toward 1 +imag 1 +base 1 +scene 1 +represent 1 +us 1 +icpr 1 +techniqu 1 +gener 1 +compel 1 +transit 1 +betweenimag 1 +howev 1 +differ 1 +object 1 +pose 1 +viewpoint 1 +often 1 +causeunnatur 1 +distort 1 +difficult 1 +correct 1 +manual 1 +basic 1 +principl 1 +projectivegeometri 1 +paper 1 +introduc 1 +simpl 1 +extens 1 +morphingthat 1 +correctli 1 +handl 1 +project 1 +camera 1 +transform 1 +call 1 +work 1 +prewarp 1 +imagesprior 1 +comput 1 +postwarp 1 +interpol 1 +knowledg 1 +shape 1 +requir 1 +appliedto 1 +photograph 1 +draw 1 +well 1 +render 1 +abil 1 +synthes 1 +chang 1 +structureafford 1 +wide 1 +varieti 1 +interest 1 +effect 1 +imagetransform 1 +taken 1 +produc 1 +illus 1 +physic 1 +move 1 +virtual 1 +click 1 +mpeg 1 +movi 1 +face 1 +simultan 1 +facial 1 +color 1 +resolut 1 +mona 1 +lisa 1 +reflect 1 +high 1 +frame 1 +jude 1 +shavlik 1 +last 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..1882513f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,127 @@ +period 1 +motion 1 +inform 1 +cyclic 1 +analysi 1 +steve 1 +seitz 1 +chuck 1 +dyermani 1 +real 1 +life 1 +frame 1 +refer 1 +instanc 1 +human 1 +locomotori 1 +walk 1 +run 1 +skip 1 +shuffl 1 +areperiod 1 +move 1 +person 1 +havedevelop 1 +approach 1 +determin 1 +imag 1 +sequenc 1 +could 1 +beenproduc 1 +object 1 +whose 1 +unlik 1 +previou 1 +attempt 1 +ourapproach 1 +allow 1 +camera 1 +film 1 +poscript 1 +paper 1 +click 1 +tracethi 1 +show 1 +trace 1 +line 1 +recov 1 +imagesequ 1 +phonograph 1 +turntabl 1 +ramp 1 +correspond 1 +moment 1 +timewher 1 +momentarili 1 +slow 1 +shownsuperimpos 1 +error 1 +surfac 1 +repeat 1 +tend 1 +perfectli 1 +even 1 +variesslightli 1 +cycl 1 +next 1 +physic 1 +import 1 +changesin 1 +scene 1 +gener 1 +defin 1 +motionsthat 1 +make 1 +variat 1 +explicit 1 +represent 1 +call 1 +compact 1 +pure 1 +tempor 1 +describ 1 +evolutionof 1 +without 1 +spatial 1 +quantiti 1 +asposit 1 +veloc 1 +delimit 1 +identifi 1 +correspondencesacross 1 +provid 1 +mean 1 +parsinga 1 +addit 1 +sever 1 +featur 1 +canb 1 +deriv 1 +relat 1 +natur 1 +locat 1 +irregular 1 +tracecan 1 +also 1 +us 1 +medic 1 +enhanc 1 +composit 1 +fromdiffer 1 +furthermor 1 +reliabl 1 +recoveredfrom 1 +view 1 +invari 1 +fashion 1 +theori 1 +affin 1 +clickher 1 +heart 1 +angiograph 1 +bottom 1 +note 1 +additionalstructur 1 +visibl 1 +appar 1 +singl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..35c1ea56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,47 @@ +steve 1 +seitz 1 +home 1 +page 1 +wisc 1 +graduat 1 +student 1 +berkelei 1 +math 1 +area 1 +interest 1 +imag 1 +motion 1 +analysi 1 +base 1 +render 1 +machin 1 +vision 1 +comput 1 +graphic 1 +research 1 +project 1 +view 1 +morph 1 +synthesi 1 +mpeg 1 +movi 1 +show 1 +interpol 1 +left 1 +click 1 +exampl 1 +cyclic 1 +recent 1 +public 1 +stuff 1 +frequent 1 +link 1 +wisconsin 1 +group 1 +surreal 1 +cach 1 +closer 1 +look 1 +last 1 +chang 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..cafdc8d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,102 @@ +home 1 +page 1 +madison 1 +machin 1 +learn 1 +research 1 +group 1 +contain 1 +relev 1 +inform 1 +thememb 1 +mlrg 1 +univers 1 +wisconsin 1 +tabl 1 +content 1 +member 1 +archiv 1 +recent 1 +paper 1 +dataset 1 +domain 1 +theori 1 +read 1 +schedul 1 +seminar 1 +graduat 1 +cours 1 +local 1 +link 1 +us 1 +extern 1 +carolyn 1 +allex 1 +jonathon 1 +bodner 1 +kevin 1 +cherkauer 1 +mark 1 +craven 1 +tina 1 +eliassi 1 +richard 1 +maclin 1 +august 1 +david 1 +opitz 1 +jude 1 +shavlik 1 +papersvisit 1 +describ 1 +public 1 +ascii 1 +file 1 +list 1 +recentabstractsi 1 +also 1 +avail 1 +theoriesy 1 +access 1 +directori 1 +severalml 1 +testb 1 +breast 1 +cancer 1 +databas 1 +prof 1 +olvi 1 +mangasarian 1 +sgroup 1 +current 1 +line 1 +math 1 +program 1 +comput 1 +biologi 1 +dept 1 +neurosci 1 +vision 1 +robot 1 +doit 1 +center 1 +mathemat 1 +scienc 1 +gopher 1 +school 1 +librari 1 +abstract 1 +select 1 +journal 1 +mostli 1 +wendt 1 +readabl 1 +wisc 1 +proc 1 +workshop 1 +agent 1 +held 1 +intern 1 +confer 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..e332daf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,96 @@ +shubu 1 +mukherje 1 +home 1 +page 1 +wisc 1 +fiance 1 +mimi 1 +nephew 1 +avirup 1 +month 1 +graduat 1 +research 1 +assist 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +usaphon 1 +shubhendu 1 +click 1 +button 1 +jump 1 +correspond 1 +articl 1 +advisor 1 +mark 1 +hill 1 +project 1 +wind 1 +tunnel 1 +public 1 +architect 1 +world 1 +wide 1 +badger 1 +ballroom 1 +danc 1 +team 1 +person 1 +interest 1 +hobbi 1 +morph 1 +dionisio 1 +courtesi 1 +steve 1 +seitz 1 +random 1 +linkseducationph 1 +univers 1 +spring 1 +expect 1 +tech 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +india 1 +summari 1 +coher 1 +network 1 +interfac 1 +dissert 1 +cachabl 1 +queue 1 +design 1 +space 1 +isca 1 +progress 1 +distribut 1 +share 1 +memori 1 +mechan 1 +cooper 1 +commod 1 +workstat 1 +submit 1 +cach 1 +protocol 1 +custom 1 +irregular 1 +applic 1 +ppopp 1 +grai 1 +softwar 1 +dirsw 1 +parallel 1 +simul 1 +tutori 1 +copyright 1 +copi 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..fb0c9d48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,25 @@ +michael 1 +siff 1 +home 1 +page 1 +philosophi 1 +research 1 +academ 1 +interest 1 +run 1 +club 1 +fall 1 +midwest 1 +seminar 1 +wonder 1 +wai 1 +wast 1 +time 1 +new 1 +inform 1 +resourc 1 +book 1 +movi 1 +televis 1 +sport 1 +humor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..c09e4c12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,24 @@ +skrentni 1 +home 1 +page 1 +lecturerc 1 +coordinatorgradu 1 +studentcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +offic 1 +comput 1 +sciencesemail 1 +wisc 1 +edutelephon 1 +relat 1 +link 1 +univers 1 +depart 1 +groupskrentni 1 +last 1 +updat 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..63eb25c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,53 @@ +bryan 1 +home 1 +page 1 +graduat 1 +studentcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +offic 1 +mail 1 +wisc 1 +edutelephon 1 +comput 1 +univers 1 +purdu 1 +interest 1 +intellig 1 +help 1 +system 1 +human 1 +interact 1 +knowledg 1 +represent 1 +oper 1 +activ 1 +select 1 +recent 1 +public 1 +travi 1 +step 1 +toward 1 +unix 1 +util 1 +technic 1 +report 1 +april 1 +miller 1 +fredriksen 1 +empir 1 +studi 1 +reliabl 1 +commun 1 +relat 1 +link 1 +depart 1 +group 1 +professor 1 +larri 1 +advisor 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..fa93718c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,128 @@ +larri 1 +travi 1 +home 1 +page 1 +travisprofessorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +madison 1 +mail 1 +wisc 1 +edutelephon 1 +univers 1 +californa 1 +angel 1 +interest 1 +expert 1 +system 1 +procedur 1 +control 1 +automat 1 +deduct 1 +comput 1 +support 1 +understand 1 +complex 1 +data 1 +philosoph 1 +foundat 1 +ofartifici 1 +intellig 1 +manag 1 +social 1 +implic 1 +research 1 +summari 1 +center 1 +around 1 +us 1 +logic 1 +basi 1 +knowledg 1 +formal 1 +augment 1 +databasesystem 1 +recent 1 +work 1 +focus 1 +automaticdeduct 1 +design 1 +contruct 1 +displai 1 +test 1 +high 1 +level 1 +abstract 1 +pattern 1 +form 1 +informationcontain 1 +larg 1 +heterogen 1 +databas 1 +special 1 +attent 1 +beingdevot 1 +represent 1 +geograph 1 +inform 1 +waysthat 1 +enhanc 1 +integr 1 +visualiz 1 +map 1 +activ 1 +involv 1 +sever 1 +develop 1 +project 1 +andwith 1 +incorpor 1 +model 1 +visual 1 +aid 1 +singl 1 +organiz 1 +issu 1 +associ 1 +introduct 1 +technolog 1 +analysi 1 +suppositionsunderli 1 +altern 1 +approach 1 +artifici 1 +current 1 +student 1 +chuck 1 +ohar 1 +bryan 1 +scott 1 +swanson 1 +andi 1 +whitsitt 1 +derek 1 +zahn 1 +public 1 +could 1 +failur 1 +implement 1 +oravec 1 +appear 1 +journal 1 +softwar 1 +metaphor 1 +reflex 1 +examin 1 +falsework 1 +west 1 +magazin 1 +societi 1 +landscap 1 +link 1 +dept 1 +group 1 +last 1 +chang 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..681833ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,63 @@ +avinash 1 +sodani 1 +home 1 +page 1 +wisc 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +educ 1 +tech 1 +hon 1 +indian 1 +institut 1 +technolog 1 +kharagpur 1 +india 1 +juli 1 +academ 1 +interest 1 +architectur 1 +multiscalar 1 +kestrel 1 +project 1 +program 1 +languag 1 +compil 1 +cours 1 +packag 1 +java 1 +download 1 +meet 1 +batch 1 +mate 1 +relat 1 +link 1 +info 1 +center 1 +rank 1 +new 1 +hindu 1 +onlin 1 +edit 1 +random 1 +look 1 +kgpite 1 +follow 1 +toll 1 +free 1 +directori 1 +will 1 +world 1 +cricket 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..bfb9b8a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,252 @@ +guri 1 +sohi 1 +home 1 +page 1 +gurindar 1 +wisc 1 +associ 1 +professor 1 +comput 1 +scienc 1 +andelectr 1 +engin 1 +address 1 +educ 1 +research 1 +interest 1 +summari 1 +current 1 +graduat 1 +student 1 +recent 1 +talk 1 +public 1 +graduatesaddress 1 +depart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usasohi 1 +eduphon 1 +department 1 +offic 1 +univers 1 +illinoi 1 +urbana 1 +elect 1 +electr 1 +electron 1 +birla 1 +institut 1 +technolog 1 +pilani 1 +india 1 +instruct 1 +level 1 +parallel 1 +process 1 +compil 1 +architectur 1 +share 1 +memori 1 +multiprocessor 1 +system 1 +focus 1 +design 1 +thehighest 1 +perform 1 +uniprocessor 1 +gener 1 +investig 1 +circa 1 +processor 1 +plenti 1 +transistor 1 +availableon 1 +chip 1 +challeng 1 +resourc 1 +getth 1 +highest 1 +possibl 1 +execut 1 +sequenti 1 +program 1 +target 1 +sustain 1 +ofov 1 +cycl 1 +ordinari 1 +numer 1 +applic 1 +group 1 +sever 1 +issu 1 +needto 1 +resolv 1 +goal 1 +achiev 1 +studi 1 +character 1 +thenatur 1 +numericappl 1 +order 1 +understand 1 +avail 1 +andhow 1 +could 1 +exploit 1 +bulk 1 +effort 1 +expend 1 +continu 1 +thedevelop 1 +multiscalar 1 +model 1 +novel 1 +paradigm 1 +develop 1 +andcarri 1 +detail 1 +simul 1 +assessth 1 +potenti 1 +concept 1 +todd 1 +austin 1 +scott 1 +breach 1 +andrea 1 +moshovo 1 +vijaykumarrec 1 +talkswil 1 +set 1 +import 1 +futur 1 +given 1 +risc 1 +symposium 1 +held 1 +watson 1 +researchcent 1 +yorktown 1 +height 1 +novemb 1 +file 1 +compress 1 +postscript 1 +framemak 1 +place 1 +publicationshigh 1 +bandwidth 1 +translat 1 +multipl 1 +appear 1 +inrd 1 +annual 1 +intern 1 +appendix 1 +ofdetail 1 +resultsi 1 +also 1 +zero 1 +load 1 +microarchitectur 1 +support 1 +reduc 1 +latencyt 1 +micro 1 +superscalar 1 +processorsj 1 +smith 1 +proceed 1 +ieee 1 +decemb 1 +hardwar 1 +mechan 1 +dynam 1 +reorder 1 +referencesm 1 +franklin 1 +transact 1 +vijaykumar 1 +streamlin 1 +data 1 +cach 1 +access 1 +fast 1 +calcul 1 +pnevmatikato 1 +anatomi 1 +regist 1 +request 1 +combin 1 +arbitrari 1 +interconnect 1 +network 1 +lebeck 1 +distribut 1 +effici 1 +detect 1 +pointer 1 +arrai 1 +error 1 +sigplan 1 +confer 1 +languag 1 +implement 1 +guard 1 +branch 1 +predict 1 +goodman 1 +handbook 1 +press 1 +control 1 +flow 1 +traffic 1 +analysi 1 +inter 1 +oper 1 +communicationin 1 +fine 1 +grain 1 +expand 1 +split 1 +window 1 +depend 1 +errorst 1 +technic 1 +report 1 +processorsd 1 +knapsack 1 +hierarchi 1 +componentt 1 +tetra 1 +evalu 1 +serial 1 +processorst 1 +juli 1 +gradstodd 1 +april 1 +softwar 1 +latencydionisio 1 +incorpor 1 +exist 1 +setsmanoj 1 +architecturemark 1 +friedman 1 +januari 1 +prolog 1 +executionsriram 1 +vajapeyam 1 +crai 1 +processormen 1 +chow 1 +chiang 1 +septemb 1 +base 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..2133564c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,118 @@ +solomon 1 +home 1 +page 1 +marvin 1 +professor 1 +former 1 +chair 1 +goodman 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +phone 1 +wisc 1 +research 1 +interest 1 +object 1 +orient 1 +databas 1 +system 1 +softwar 1 +develop 1 +support 1 +environ 1 +distribut 1 +oper 1 +network 1 +design 1 +implement 1 +program 1 +languag 1 +theori 1 +recent 1 +publicationstoward 1 +effect 1 +effici 1 +free 1 +space 1 +manag 1 +proc 1 +sigmod 1 +conf 1 +data 1 +june 1 +mark 1 +mcauliff 1 +michael 1 +carei 1 +andmarvin 1 +abstractpostscriptth 1 +gmap 1 +versatil 1 +tool 1 +physic 1 +independ 1 +larg 1 +septemb 1 +odyssea 1 +tsatalo 1 +andyanni 1 +ioannidi 1 +abstractpostscriptexpand 1 +version 1 +appear 1 +inth 1 +vldb 1 +journalv 1 +april 1 +abstractpostscriptshor 1 +persist 1 +applic 1 +david 1 +dewitt 1 +franklin 1 +nanci 1 +hall 1 +jeffrei 1 +naughton 1 +daniel 1 +schuh 1 +seth 1 +white 1 +andmichael 1 +zwillingavail 1 +astech 1 +report 1 +overview 1 +capitl 1 +fourth 1 +intern 1 +workshop 1 +configur 1 +paul 1 +adam 1 +avail 1 +updat 1 +lectur 1 +note 1 +logic 1 +point 1 +graphic 1 +interfac 1 +room 1 +built 1 +us 1 +java 1 +spring 1 +univ 1 +shore 1 +project 1 +photoalbum 1 +todai 1 +dilbert 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..7f16254f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,6 @@ +sowmya 1 +home 1 +page 1 +welcom 1 +subramanian 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..a1aa60ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,78 @@ +shilpa 1 +lawand 1 +home 1 +page 1 +welcom 1 +pagei 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +person 1 +stuffa 1 +link 1 +pastfor 1 +info 1 +schoolher 1 +resum 1 +html 1 +ascii 1 +second 1 +love 1 +us 1 +resours 1 +stuff 1 +want 1 +place 1 +syster 1 +women 1 +relat 1 +madisonsurf 1 +madisonst 1 +inform 1 +serverth 1 +hoofer 1 +sail 1 +clubowl 1 +music 1 +book 1 +movi 1 +java 1 +signatur 1 +meet 1 +first 1 +lovesnowi 1 +homepag 1 +cool 1 +linksher 1 +iswher 1 +finger 1 +three 1 +judg 1 +panel 1 +philadelphia 1 +vote 1 +constitut 1 +follow 1 +read 1 +decis 1 +access 1 +time 1 +sinc 1 +june 1 +send 1 +comment 1 +suggest 1 +email 1 +tossl 1 +wisc 1 +shilpal 1 +thru 1 +guest 1 +formlast 1 +modifi 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..d01e2c36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,32 @@ +jeremi 1 +stenglein 1 +home 1 +page 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +street 1 +offic 1 +phone 1 +mail 1 +stenglei 1 +wisc 1 +teach 1 +section 1 +gener 1 +take 1 +construct 1 +compil 1 +link 1 +pageth 1 +simpson 1 +pageespn 1 +sport 1 +hotwir 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..4408f942 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,57 @@ +steve 1 +reinhardt 1 +home 1 +page 1 +steven 1 +graduat 1 +student 1 +computerarchitectur 1 +work 1 +wisconsin 1 +wind 1 +tunnelgroup 1 +advisor 1 +david 1 +wood 1 +although 1 +project 1 +mark 1 +hill 1 +andjim 1 +laru 1 +often 1 +feel 1 +free 1 +tell 1 +mewhat 1 +well 1 +plan 1 +finish 1 +fall 1 +join 1 +faculti 1 +ofth 1 +univers 1 +michigan 1 +eec 1 +depart 1 +januari 1 +interest 1 +find 1 +publicationsresearch 1 +summari 1 +email 1 +stever 1 +wisc 1 +click 1 +finger 1 +phone 1 +comput 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usalast 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..1c72043f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,58 @@ +john 1 +strikwerda 1 +home 1 +page 1 +professor 1 +comput 1 +scienc 1 +strikwerdadepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +strik 1 +wisc 1 +telephon 1 +fall 1 +teach 1 +problem 1 +solv 1 +us 1 +begin 1 +januari 1 +assign 1 +nation 1 +foundat 1 +year 1 +click 1 +inform 1 +numer 1 +analysi 1 +qualifi 1 +exam 1 +research 1 +interest 1 +fluid 1 +dynamicsmyoffici 1 +depart 1 +pageoth 1 +stuff 1 +field 1 +museum 1 +point 1 +search 1 +rate 1 +chicago 1 +best 1 +tribun 1 +talk 1 +radio 1 +show 1 +car 1 +footballmi 1 +kid 1 +nathan 1 +drew 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..6d07f2a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,30 @@ +subba 1 +home 1 +page 1 +thing 1 +enjoi 1 +calvin 1 +hobb 1 +late 1 +show 1 +david 1 +letterman 1 +seinfeld 1 +interest 1 +prooocessor 1 +histor 1 +paper 1 +evalu 1 +stream 1 +buffer 1 +secondari 1 +cach 1 +replac 1 +decoupl 1 +integ 1 +execut 1 +superscalar 1 +processor 1 +subbarao 1 +cambridg 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..4925fc90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,83 @@ +chiang 1 +home 1 +page 1 +depart 1 +univers 1 +wisconsin 1 +madisonoffic 1 +stelephon 1 +mail 1 +suhui 1 +wisc 1 +educlick 1 +send 1 +emailoffic 1 +hour 1 +thur 1 +still 1 +construct 1 +ta 1 +fall 1 +public 1 +applic 1 +characterist 1 +limit 1 +preemption 1 +complet 1 +parallel 1 +processor 1 +schedul 1 +polici 1 +rajesh 1 +mansharamani 1 +mari 1 +vernon 1 +proc 1 +sigmetr 1 +conf 1 +measur 1 +model 1 +comput 1 +system 1 +nashvil 1 +dynam 1 +static 1 +quantum 1 +base 1 +alloc 1 +workshop 1 +strategi 1 +process 1 +conjunct 1 +ipp 1 +april 1 +search 1 +engin 1 +yahoo 1 +sourc 1 +resourc 1 +bibliographi 1 +world 1 +wide 1 +virtual 1 +librari 1 +subject 1 +catalogu 1 +link 1 +relat 1 +taiwan 1 +network 1 +servic 1 +sinanet 1 +shop 1 +magzin 1 +new 1 +job 1 +calendar 1 +seednet 1 +vistor 1 +guid 1 +academia 1 +sinica 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..9707373e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,192 @@ +david 1 +sundaram 1 +stukel 1 +homepag 1 +page 1 +upon 1 +effronteri 1 +push 1 +hand 1 +sever 1 +patient 1 +femor 1 +arteri 1 +blood 1 +spurt 1 +blind 1 +anesthetist 1 +hall 1 +scream 1 +browbeck 1 +tri 1 +knee 1 +groin 1 +manag 1 +hamstr 1 +scalpel 1 +crawl 1 +floor 1 +stab 1 +feet 1 +leg 1 +voilet 1 +baboon 1 +assist 1 +woman 1 +ever 1 +care 1 +damn 1 +realli 1 +wig 1 +climb 1 +tabl 1 +pois 1 +jump 1 +stomp 1 +cop 1 +rush 1 +william 1 +burrough 1 +nake 1 +lunch 1 +construct 1 +catapult 1 +reader 1 +choos 1 +index 1 +brief 1 +class 1 +take 1 +link 1 +relat 1 +comput 1 +scienc 1 +site 1 +dedic 1 +smart 1 +cloth 1 +also 1 +steve 1 +mann 1 +view 1 +current 1 +see 1 +wearabl 1 +camera 1 +inform 1 +artifici 1 +life 1 +santa 1 +institut 1 +specif 1 +project 1 +call 1 +tierra 1 +thoma 1 +recent 1 +dilbert 1 +strip 1 +technic 1 +math 1 +joke 1 +somewher 1 +philosoph 1 +scientif 1 +artist 1 +natur 1 +physic 1 +conscious 1 +surviv 1 +research 1 +laboratori 1 +info 1 +variou 1 +destruct 1 +show 1 +organ 1 +arcosanti 1 +arcolog 1 +outsid 1 +phoenix 1 +krishnamurti 1 +foundat 1 +tell 1 +centuri 1 +beat 1 +writer 1 +includ 1 +pictur 1 +fill 1 +wait 1 +new 1 +sourc 1 +packer 1 +scientist 1 +onlin 1 +regist 1 +harass 1 +mail 1 +reward 1 +dozen 1 +interest 1 +factoid 1 +astound 1 +friend 1 +american 1 +advantag 1 +hypertext 1 +addit 1 +provid 1 +select 1 +articl 1 +print 1 +edit 1 +publish 1 +weekli 1 +contain 1 +smaller 1 +hindu 1 +nation 1 +newspap 1 +india 1 +onion 1 +local 1 +madison 1 +astronomi 1 +depart 1 +washburn 1 +observatori 1 +public 1 +univers 1 +len 1 +insignific 1 +piec 1 +histori 1 +weather 1 +obtain 1 +follow 1 +webweath 1 +servic 1 +channel 1 +home 1 +late 1 +timothi 1 +leari 1 +numer 1 +written 1 +note 1 +optimist 1 +noam 1 +chomski 1 +disinform 1 +great 1 +list 1 +conspiraci 1 +theori 1 +buri 1 +within 1 +ultra 1 +trendi 1 +movi 1 +review 1 +back 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..9342e99d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,9 @@ +brian 1 +swander 1 +home 1 +pagebrian 1 +think 1 +offic 1 +hour 1 +bookmark 1 +mark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..0b2af829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,107 @@ +ariel 1 +tamchesari 1 +tamch 1 +research 1 +assistantemail 1 +wisc 1 +comput 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +madison 1 +typic 1 +pose 1 +angri 1 +posei 1 +organ 1 +fall 1 +oper 1 +system 1 +colleg 1 +park 1 +offic 1 +sresearch 1 +paradyn 1 +parallel 1 +perform 1 +toolsstatu 1 +search 1 +thesi 1 +topic 1 +els 1 +interest 1 +toolsparallel 1 +distribut 1 +systemsbluesth 1 +simpsonsseinfeldskiingskinetkeyston 1 +favorit 1 +area 1 +snowboard 1 +joke 1 +differ 1 +vacum 1 +cleaner 1 +dirt 1 +attach 1 +greet 1 +peopl 1 +whoa 1 +sorri 1 +dude 1 +municip 1 +bond 1 +eventu 1 +matur 1 +gener 1 +incom 1 +hate 1 +countri 1 +music 1 +fortran 1 +cool 1 +link 1 +yahooespncpu 1 +infoskinetoth 1 +stuff 1 +talk 1 +exokernel 1 +architectur 1 +applic 1 +level 1 +resourc 1 +manag 1 +octob 1 +paper 1 +techniqu 1 +tool 1 +share 1 +memori 1 +improv 1 +spring 1 +callaghan 1 +supercomput 1 +interconnect 1 +network 1 +april 1 +zebra 1 +stripe 1 +file 1 +need 1 +structur 1 +raid 1 +block 1 +wait 1 +free 1 +highli 1 +concurr 1 +object 1 +asynchron 1 +multiprocessor 1 +version 1 +postscript 1 +analysi 1 +risc 1 +instruct 1 +enhanc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..245b8ef5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,102 @@ +jeff 1 +lampert 1 +home 1 +pagejeff 1 +page 1 +ricardo 1 +montalban 1 +voic 1 +welcom 1 +know 1 +nota 1 +pictur 1 +least 1 +good 1 +still 1 +look 1 +foron 1 +incrimin 1 +doesn 1 +make 1 +like 1 +aconvict 1 +babi 1 +high 1 +school 1 +yearbook 1 +lasttim 1 +show 1 +someon 1 +never 1 +heard 1 +cute 1 +think 1 +well 1 +found 1 +coupl 1 +tick 1 +threaten 1 +turn 1 +intoa 1 +human 1 +dispens 1 +took 1 +henc 1 +befound 1 +separ 1 +click 1 +anautograph 1 +copi 1 +sign 1 +name 1 +monitor 1 +pictureappear 1 +choos 1 +link 1 +weasel 1 +seek 1 +take 1 +pace 1 +basic 1 +factswho 1 +person 1 +last 1 +night 1 +academ 1 +work 1 +relatedwhat 1 +class 1 +dept 1 +resum 1 +entertainmentbook 1 +movi 1 +music 1 +program 1 +newsgroup 1 +import 1 +subjectsfriendsno 1 +sick 1 +theme 1 +song 1 +hobbi 1 +club 1 +organizationsgroup 1 +plu 1 +wish 1 +inmi 1 +favorit 1 +linksugh 1 +sound 1 +servo 1 +juli 1 +andrew 1 +fire 1 +crow 1 +mstk 1 +eclect 1 +paraphenaliai 1 +would 1 +miscellan 1 +straight 1 +forward 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..288ae582 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,18 @@ +todd 1 +homepagetodd 1 +homepagein 1 +fall 1 +teach 1 +section 1 +sinc 1 +area 1 +mathemat 1 +program 1 +plug 1 +mathematicalprogram 1 +page 1 +contain 1 +wealth 1 +inform 1 +tmunson 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..efcb0523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian 1 +home 1 +pagebrian 1 +toonen 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +streetmadison 1 +offic 1 +cswhatev 1 +chief 1 +seattleth 1 +ground 1 +tipi 1 +medit 1 +life 1 +itsmean 1 +accept 1 +kinship 1 +creatur 1 +acknowledgingun 1 +univers 1 +thing 1 +infus 1 +thetru 1 +essenc 1 +civil 1 +luther 1 +stand 1 +bear 1 +oglala 1 +siouxlast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..b5d56c5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,15 @@ +thano 1 +tsioli 1 +home 1 +page 1 +site 1 +netscap 1 +enhanc 1 +read 1 +shouldconsid 1 +upgrad 1 +browser 1 +latest 1 +version 1 +ifthat 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..461af4d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,56 @@ +todd 1 +turnidg 1 +dougla 1 +turnidgeschoolcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +homemuppet 1 +babylon 1 +milton 1 +eyesightright 1 +axi 1 +left 1 +graduat 1 +student 1 +depart 1 +comput 1 +sciencesat 1 +univers 1 +year 1 +work 1 +professorthoma 1 +rep 1 +studyingprogram 1 +languag 1 +teach 1 +section 1 +hold 1 +mathematicsand 1 +computersci 1 +case 1 +western 1 +reserveunivers 1 +locat 1 +cleveland 1 +ohio 1 +origin 1 +kent 1 +myfamili 1 +live 1 +judg 1 +compani 1 +keep 1 +click 1 +enough 1 +evid 1 +awai 1 +long 1 +time 1 +amus 1 +shortcut 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..864648a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,37 @@ +taxiao 1 +wang 1 +home 1 +page 1 +welcom 1 +heavi 1 +construct 1 +click 1 +finger 1 +contact 1 +inform 1 +graduat 1 +student 1 +teach 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +offic 1 +bldg 1 +dayton 1 +street 1 +phone 1 +mail 1 +twang 1 +wisc 1 +visitor 1 +number 1 +sinc 1 +visit 1 +time 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..76964fd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,12 @@ +shaft 1 +home 1 +pageuri 1 +pageemail 1 +wisc 1 +eduinterest 1 +diversionsstart 1 +trek 1 +meet 1 +microsoft 1 +start 1 +window 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..f424c474 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,60 @@ +venkatesh 1 +ganti 1 +home 1 +pagevenkatesh 1 +vganti 1 +wisc 1 +graduat 1 +studentoffic 1 +comput 1 +scienc 1 +depart 1 +dayton 1 +madison 1 +usaphon 1 +note 1 +page 1 +construct 1 +past 1 +present 1 +student 1 +univers 1 +wisconsin 1 +fall 1 +earlier 1 +undergradu 1 +madra 1 +india 1 +nativ 1 +kakinada 1 +andhra 1 +pradesh 1 +info 1 +asha 1 +basic 1 +educ 1 +click 1 +know 1 +godav 1 +homepag 1 +hostel 1 +yearbook 1 +hope 1 +onlin 1 +sometim 1 +research 1 +interest 1 +databas 1 +work 1 +till 1 +btech 1 +project 1 +real 1 +time 1 +want 1 +look 1 +genesi 1 +group 1 +last 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..2287870f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,62 @@ +vijai 1 +home 1 +page 1 +vijaykumar 1 +wisc 1 +profession 1 +affili 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +contact 1 +address 1 +dayton 1 +street 1 +phone 1 +email 1 +advisor 1 +guri 1 +sohi 1 +project 1 +multiscalar 1 +educ 1 +doctor 1 +august 1 +undergradu 1 +birla 1 +institut 1 +technolog 1 +pilani 1 +india 1 +research 1 +compil 1 +architectur 1 +dissert 1 +distribut 1 +regist 1 +file 1 +design 1 +anatomi 1 +processor 1 +breach 1 +annual 1 +intern 1 +symposium 1 +microarchitectur 1 +micro 1 +commun 1 +strategi 1 +submit 1 +schedul 1 +architecturet 1 +go 1 +work 1 +memori 1 +data 1 +depend 1 +predict 1 +person 1 +side 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..1a057ed3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,74 @@ +john 1 +watrou 1 +home 1 +pagejohn 1 +wisc 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +streetmadison 1 +telephon 1 +public 1 +dimension 1 +quantum 1 +cellular 1 +automata 1 +proc 1 +symp 1 +foundat 1 +polynomi 1 +time 1 +algorithm 1 +artin 1 +whapl 1 +approxim 1 +theorem 1 +number 1 +theori 1 +fourth 1 +confer 1 +canadiannumb 1 +associ 1 +assort 1 +link 1 +archiv 1 +stanford 1 +inform 1 +page 1 +oxford 1 +particl 1 +beam 1 +physic 1 +laboratori 1 +ucla 1 +theoret 1 +montreal 1 +lanl 1 +preprint 1 +bibliographi 1 +hypertext 1 +project 1 +hypatia 1 +gener 1 +refer 1 +element 1 +stylehypertext 1 +webster 1 +interfaceroget 1 +thesauru 1 +random 1 +parasol 1 +recordsplayst 1 +linksweath 1 +forecast 1 +madisonth 1 +isthmu 1 +daili 1 +pagemathemat 1 +quotat 1 +servermathematician 1 +biographiesgeek 1 +site 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..77973c9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,108 @@ +weiru 1 +home 1 +page 1 +eiru 1 +send 1 +email 1 +ppppleas 1 +find 1 +around 1 +sometim 1 +think 1 +english 1 +speaker 1 +commit 1 +asylum 1 +verbal 1 +insan 1 +languag 1 +peopl 1 +recit 1 +plai 1 +ship 1 +truck 1 +cargo 1 +havenos 1 +feet 1 +smell 1 +richard 1 +leder 1 +three 1 +possibl 1 +part 1 +date 1 +least 1 +must 1 +beoffer 1 +entertain 1 +food 1 +affect 1 +customari 1 +begina 1 +seri 1 +great 1 +deal 1 +moder 1 +amountof 1 +merest 1 +suggest 1 +amount 1 +ofaffect 1 +increas 1 +reduc 1 +proportion 1 +longer 1 +call 1 +circumst 1 +omit 1 +miss 1 +manner 1 +guid 1 +excruciatingli 1 +correct 1 +behaviour 1 +univers 1 +peke 1 +friend 1 +physic 1 +depart 1 +alumni 1 +associ 1 +atmadison 1 +world 1 +littl 1 +grei 1 +cell 1 +pictur 1 +game 1 +late 1 +show 1 +david 1 +letterman 1 +studio 1 +wish 1 +postcard 1 +someon 1 +movi 1 +review 1 +favorit 1 +hockei 1 +player 1 +steve 1 +francai 1 +dictionnairefrancai 1 +anglai 1 +dictionnair 1 +softwar 1 +relatif 1 +lafrancophoni 1 +test 1 +degrammair 1 +francais 1 +french 1 +lesson 1 +weather 1 +forecast 1 +madison 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..fa253d17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,82 @@ +welcom 1 +zhang 1 +home 1 +page 1 +first 1 +year 1 +graduat 1 +student 1 +depart 1 +hometown 1 +shanghai 1 +peopl 1 +republ 1 +china 1 +educ 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +jose 1 +state 1 +california 1 +technolog 1 +tsinghua 1 +beij 1 +chinaemail 1 +weiz 1 +wisc 1 +eduwork 1 +experiencecontractor 1 +develop 1 +variou 1 +inform 1 +manag 1 +system 1 +differ 1 +platform 1 +includ 1 +windowsnt 1 +solari 1 +us 1 +tuxedo 1 +pathwai 1 +softwar 1 +design 1 +tandem 1 +corpor 1 +engin 1 +sherpa 1 +oper 1 +nasa 1 +am 1 +research 1 +center 1 +hobbiesma 1 +jiangbridg 1 +card 1 +game 1 +tabl 1 +tenni 1 +pingpong 1 +joggingth 1 +ultim 1 +challengesolv 1 +mine 1 +sweeper 1 +expert 1 +level 1 +puzzl 1 +within 1 +second 1 +without 1 +cheat 1 +quot 1 +dayth 1 +best 1 +memori 1 +ackowledgementthi 1 +written 1 +framework 1 +provid 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..aae7bf65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,62 @@ +kent 1 +wenger 1 +home 1 +page 1 +welcom 1 +note 1 +definit 1 +still 1 +construct 1 +preparedfor 1 +pothol 1 +need 1 +pictur 1 +scan 1 +wengerassoci 1 +researchercomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +west 1 +dayton 1 +streetmadison 1 +telephon 1 +email 1 +wisc 1 +edufing 1 +workth 1 +main 1 +project 1 +work 1 +arecod 1 +cluster 1 +data 1 +provid 1 +anddevis 1 +explor 1 +andvisu 1 +come 1 +good 1 +acronym 1 +importantpart 1 +wouldn 1 +agre 1 +visualizationproduc 1 +devis 1 +softwar 1 +peopl 1 +yanni 1 +ioannidi 1 +miron 1 +livnyraghu 1 +ramakrishnanmor 1 +inform 1 +univers 1 +madison 1 +dbm 1 +research 1 +groupuw 1 +comput 1 +pagewiscinfo 1 +personallinksimageslast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..29a8d7f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,117 @@ +wisconsin 1 +wind 1 +tunnel 1 +project 1 +home 1 +page 1 +projectmost 1 +futur 1 +massiv 1 +parallel 1 +comput 1 +built 1 +fromworkst 1 +like 1 +node 1 +program 1 +high 1 +level 1 +parallellanguag 1 +support 1 +share 1 +address 1 +space 1 +whichprocess 1 +uniformli 1 +refer 1 +data 1 +seek 1 +develop 1 +consensu 1 +aboutth 1 +middl 1 +interfac 1 +languag 1 +compil 1 +abovesystem 1 +softwar 1 +hardwar 1 +first 1 +propos 1 +wascoop 1 +memori 1 +evolutionari 1 +extens 1 +toconvent 1 +recent 1 +havebeen 1 +work 1 +revolutionari 1 +call 1 +tempest 1 +provid 1 +mechan 1 +allow 1 +programm 1 +andprogram 1 +librari 1 +implement 1 +messag 1 +pass 1 +transparentshar 1 +hybrid 1 +combin 1 +developingimplement 1 +think 1 +machin 1 +cluster 1 +ofworkst 1 +wisconsincow 1 +hypothet 1 +platform 1 +approach 1 +cowus 1 +snoop 1 +logic 1 +fpga 1 +sram 1 +collaboratingwith 1 +paradyn 1 +adapt 1 +perform 1 +tool 1 +overviewand 1 +annot 1 +bibliographi 1 +slide 1 +overview 1 +talk 1 +novemb 1 +pageor 1 +four 1 +complet 1 +technic 1 +paper 1 +contributor 1 +fund 1 +sourc 1 +origin 1 +name 1 +week 1 +articl 1 +relat 1 +architectur 1 +group 1 +scienc 1 +departmentat 1 +univers 1 +world 1 +wide 1 +inform 1 +last 1 +updat 1 +juli 1 +mark 1 +hill 1 +markhil 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..acbaf895 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,85 @@ +xuelin 1 +home 1 +page 1 +felix 1 +charact 1 +creat 1 +otto 1 +messmer 1 +first 1 +base 1 +anim 1 +human 1 +person 1 +featur 1 +save 1 +whichwa 1 +shown 1 +famou 1 +star 1 +rival 1 +chaplin 1 +keaton 1 +princ 1 +wale 1 +pick 1 +polo 1 +team 1 +mascot 1 +pictur 1 +accompani 1 +charl 1 +lindbergh 1 +across 1 +theatlant 1 +statu 1 +imag 1 +successfulli 1 +transmit 1 +develop 1 +televis 1 +seri 1 +somehow 1 +obtain 1 +magic 1 +trick 1 +didn 1 +oneev 1 +seem 1 +agre 1 +whether 1 +teeth 1 +whisker 1 +like 1 +spend 1 +time 1 +make 1 +film 1 +program 1 +appear 1 +newspap 1 +comic 1 +strip 1 +advertis 1 +hundr 1 +product 1 +thing 1 +remov 1 +tail 1 +ear 1 +put 1 +back 1 +wish 1 +could 1 +finger 1 +give 1 +account 1 +hairbal 1 +keyboard 1 +keeper 1 +instead 1 +sui 1 +vritabl 1 +chat 1 +pass 1 +partout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..426795a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,329 @@ +yanni 1 +ioannidisyanni 1 +ioannidi 1 +wisc 1 +eduresearch 1 +interestsdatabas 1 +manag 1 +system 1 +scientif 1 +databas 1 +user 1 +interfac 1 +andinform 1 +visual 1 +complex 1 +queri 1 +optim 1 +heterogen 1 +research 1 +primarili 1 +focus 1 +area 1 +support 1 +scientificdata 1 +futur 1 +applic 1 +pose 1 +sever 1 +challeng 1 +toqueri 1 +ask 1 +significantli 1 +higher 1 +thanin 1 +tradit 1 +number 1 +altern 1 +evalu 1 +algorithm 1 +much 1 +highera 1 +well 1 +especi 1 +parallel 1 +attempt 1 +tooptim 1 +valu 1 +time 1 +paramet 1 +parametr 1 +queryoptim 1 +thu 1 +access 1 +plan 1 +process 1 +querywil 1 +extrem 1 +larg 1 +current 1 +us 1 +algorithmsfor 1 +find 1 +optimum 1 +among 1 +inadequ 1 +investig 1 +random 1 +algorithmsa 1 +viabl 1 +solut 1 +problem 1 +interest 1 +simul 1 +anneal 1 +genet 1 +take 1 +advantag 1 +special 1 +propertiesof 1 +also 1 +look 1 +schedul 1 +especiallythos 1 +aris 1 +multimedia 1 +environ 1 +error 1 +propag 1 +size 1 +cost 1 +estim 1 +alsopart 1 +studi 1 +try 1 +identifi 1 +appropriateinform 1 +must 1 +maintain 1 +limit 1 +thepropag 1 +properti 1 +ofoptim 1 +histogram 1 +approxim 1 +distribut 1 +inrel 1 +attribut 1 +comput 1 +mode 1 +expect 1 +part 1 +manyexperi 1 +variou 1 +disciplin 1 +gener 1 +need 1 +mani 1 +aspectsthat 1 +technolog 1 +readi 1 +provid 1 +involv 1 +develop 1 +desktop 1 +experi 1 +managementenviron 1 +help 1 +scientist 1 +throughout 1 +life 1 +cycl 1 +theirexperiment 1 +primari 1 +compon 1 +major 1 +issu 1 +work 1 +address 1 +andsemant 1 +former 1 +concentr 1 +right 1 +metaphor 1 +arefor 1 +repres 1 +schema 1 +object 1 +scientistsso 1 +natur 1 +power 1 +dynam 1 +latter 1 +tool 1 +facilitatetransl 1 +integr 1 +differ 1 +data 1 +format 1 +although 1 +experimentalscientif 1 +effort 1 +guid 1 +specificproject 1 +associ 1 +particular 1 +basedperform 1 +base 1 +model 1 +plantgrowth 1 +spectroscopi 1 +sequenc 1 +microscop 1 +imag 1 +recent 1 +publicationsi 1 +survei 1 +symposium 1 +issueon 1 +anniversari 1 +march 1 +garofalaki 1 +multimediasystem 1 +decemb 1 +ramakrishnan 1 +contain 1 +conjunct 1 +beyondrel 1 +set 1 +transact 1 +tod 1 +septemb 1 +haber 1 +livni 1 +foundat 1 +forschema 1 +displai 1 +journal 1 +intellig 1 +inform 1 +juli 1 +tsangari 1 +design 1 +implement 1 +performanceevalu 1 +bermuda 1 +ieee 1 +knowledg 1 +engin 1 +tkde 1 +februari 1 +miller 1 +translat 1 +ofheterogen 1 +bridg 1 +theori 1 +practic 1 +januari 1 +christodoulaki 1 +limitingworst 1 +case 1 +join 1 +result 1 +winger 1 +transit 1 +closur 1 +algorithmsbas 1 +graph 1 +travers 1 +ondatabas 1 +sigmod 1 +record 1 +poosala 1 +divers 1 +databaseestim 1 +gupta 1 +ponnekanti 1 +experimentmanag 1 +proc 1 +intern 1 +vldb 1 +confer 1 +bombai 1 +india 1 +itsappl 1 +load 1 +balanc 1 +vldbconfer 1 +anjur 1 +frog 1 +turtl 1 +bridgesbetween 1 +file 1 +orient 1 +conferenceon 1 +statist 1 +stockholm 1 +sweden 1 +june 1 +multi 1 +dimension 1 +resourc 1 +forparallel 1 +montreal 1 +canada 1 +haa 1 +shekita 1 +improv 1 +forselect 1 +rang 1 +predic 1 +internationalacm 1 +layoutat 1 +multipl 1 +granular 1 +workshop 1 +advancedvisu 1 +gubbio 1 +itali 1 +opossum 1 +desk 1 +managementthrough 1 +customiz 1 +zurich 1 +switzerland 1 +practicalityfor 1 +sigmodconfer 1 +jose 1 +tsatalo 1 +solomon 1 +gmap 1 +versatil 1 +forphys 1 +independ 1 +santiago 1 +chile 1 +unifi 1 +framework 1 +index 1 +databasesystem 1 +dexa 1 +athen 1 +greec 1 +lashkari 1 +incomplet 1 +path 1 +express 1 +theirdisambigu 1 +minneapoli 1 +flexibl 1 +schemavisu 1 +edit 1 +boston 1 +april 1 +edbt 1 +cambridg 1 +england 1 +univers 1 +serial 1 +internationalvldb 1 +dublin 1 +ireland 1 +august 1 +capacityin 1 +wiener 1 +moos 1 +withdata 1 +program 1 +languag 1 +york 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..1f0ddd68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin 1 +zhongbin 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..4162e38d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,76 @@ +yinng 1 +home 1 +pageindexofyinongwei 1 +spagehi 1 +welcom 1 +homepag 1 +pleas 1 +good 1 +look 1 +person 1 +info 1 +especi 1 +employ 1 +give 1 +alsolink 1 +classmat 1 +cours 1 +take 1 +time 1 +telephon 1 +work 1 +address 1 +offic 1 +comp 1 +stat 1 +bldg 1 +madison 1 +univ 1 +inforesumehobbiestravel 1 +usathi 1 +collect 1 +pictur 1 +took 1 +travel 1 +articl 1 +wrote 1 +trip 1 +chicago 1 +seattl 1 +pointersr 1 +computingmacin 1 +learningpattern 1 +recognitioncomputatin 1 +geometrydatabasevisionacadem 1 +diarythi 1 +diari 1 +everi 1 +month 1 +sometim 1 +amaz 1 +mani 1 +littl 1 +read 1 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 1 +linksmi 1 +beida 1 +classmatespek 1 +univers 1 +alumni 1 +page 1 +oversea 1 +chines 1 +organ 1 +ciumi 1 +bookmarkcom 1 +press 1 +client 1 +support 1 +send 1 +comment 1 +visitor 1 +number 1 +last 1 +access 1 +modifi 1 +yinong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..756a4e2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,44 @@ +matt 1 +home 1 +pageuntil 1 +around 1 +updat 1 +basic 1 +inform 1 +offic 1 +matthew 1 +zeidenbergcent 1 +wisconsin 1 +strategi 1 +observatori 1 +drive 1 +room 1 +madison 1 +voic 1 +gilson 1 +email 1 +zeiden 1 +wisc 1 +eduzeidenb 1 +eduwhen 1 +california 1 +parent 1 +hous 1 +coho 1 +huntington 1 +beach 1 +beauti 1 +convuls 1 +breton 1 +nadja 1 +beaut 1 +sera 1 +give 1 +food 1 +poor 1 +call 1 +saint 1 +whyth 1 +communist 1 +helder 1 +camara 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..d5e5bbb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,161 @@ +tian 1 +zhang 1 +home 1 +page 1 +gener 1 +inform 1 +student 1 +research 1 +assistantadvisor 1 +prof 1 +raghu 1 +ramakrishnan 1 +miron 1 +livni 1 +joint 1 +major 1 +concentr 1 +databas 1 +artifici 1 +intellig 1 +compilerminor 1 +financi 1 +invest 1 +bankingoffic 1 +room 1 +comput 1 +scienc 1 +dept 1 +univ 1 +wisconsin 1 +madison 1 +mail 1 +wisc 1 +eduoffic 1 +telephon 1 +depart 1 +intereststher 1 +grow 1 +need 1 +exploratori 1 +analysi 1 +larg 1 +dataset 1 +discov 1 +us 1 +pattern 1 +data 1 +mine 1 +territori 1 +develop 1 +purpos 1 +interest 1 +design 1 +effici 1 +algorithm 1 +ortool 1 +integr 1 +techniqu 1 +statist 1 +thesi 1 +topic 1 +cluster 1 +densityanalysi 1 +given 1 +multi 1 +dimension 1 +limit 1 +amount 1 +resourc 1 +run 1 +time 1 +memori 1 +implement 1 +accur 1 +identifi 1 +spars 1 +crowd 1 +region 1 +estim 1 +densiti 1 +function 1 +overal 1 +distribut 1 +import 1 +practic 1 +branch 1 +appli 1 +mani 1 +domain 1 +dataclassif 1 +imag 1 +compress 1 +recognit 1 +recent 1 +project 1 +birch 1 +system 1 +select 1 +public 1 +applic 1 +submit 1 +knowledg 1 +discoveri 1 +journal 1 +june 1 +method 1 +proc 1 +sigmod 1 +conf 1 +manag 1 +canada 1 +interact 1 +classif 1 +workshop 1 +issu 1 +knowledgediscoveri 1 +cooper 1 +fast 1 +probabl 1 +kernel 1 +technic 1 +report 1 +juli 1 +motion 1 +plan 1 +robot 1 +topolog 1 +dimensionreduct 1 +ling 1 +confer 1 +ijcai 1 +findpath 1 +manipul 1 +finit 1 +divis 1 +configur 1 +space 1 +jianwei 1 +manufactur 1 +trend 1 +educ 1 +symposium 1 +andmanufactur 1 +dimens 1 +reduct 1 +technolog 1 +find 1 +collis 1 +free 1 +path 1 +mobil 1 +young 1 +profession 1 +beij 1 +relev 1 +link 1 +document 1 +organ 1 +china 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..e7297694 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,72 @@ +yihong 1 +home 1 +page 1 +zhao 1 +wisc 1 +research 1 +assist 1 +depart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +advis 1 +prof 1 +jeff 1 +naughton 1 +interest 1 +parallel 1 +object 1 +relat 1 +dbm 1 +line 1 +analyt 1 +process 1 +olap 1 +data 1 +mine 1 +financi 1 +benchmark 1 +educationb 1 +univers 1 +north 1 +carolina 1 +chapel 1 +hillm 1 +fall 1 +site 1 +wiscosin 1 +group 1 +sigmod 1 +maryland 1 +datamin 1 +microstrategi 1 +rolap 1 +arbor 1 +molap 1 +stock 1 +lombard 1 +graph 1 +server 1 +pathfind 1 +kiwi 1 +club 1 +daili 1 +new 1 +todai 1 +monei 1 +chines 1 +taiwan 1 +search 1 +engin 1 +lyco 1 +excit 1 +yahoo 1 +surf 1 +ters 1 +detail 1 +comment 1 +pgmo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..98922f48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,19 @@ +home 1 +page 1 +still 1 +construct 1 +wang 1 +homepag 1 +offer 1 +inform 1 +address 1 +johnson 1 +madison 1 +phone 1 +offic 1 +dayton 1 +street 1 +email 1 +wisc 1 +zhewang 1 +student 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..cdb37eca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,102 @@ +zhichen 1 +home 1 +page 1 +depart 1 +comput 1 +scienc 1 +dayton 1 +madison 1 +offic 1 +phone 1 +research 1 +assist 1 +advisor 1 +professor 1 +jame 1 +larusprofessor 1 +barton 1 +millerawardbest 1 +paper 1 +award 1 +intern 1 +confer 1 +supercomput 1 +press 1 +juli 1 +interest 1 +area 1 +program 1 +languag 1 +perform 1 +issu 1 +parallel 1 +anddistribut 1 +system 1 +recent 1 +studi 1 +techniqu 1 +detect 1 +eliminateperform 1 +bottleneck 1 +distribut 1 +share 1 +memori 1 +combin 1 +paradyn 1 +toolwith 1 +blizzard 1 +wisconsinwind 1 +tunnel 1 +think 1 +machin 1 +andth 1 +cluster 1 +workstat 1 +public 1 +field 1 +interestprogram 1 +environ 1 +tool 1 +network 1 +oper 1 +architectur 1 +evalu 1 +benchmark 1 +place 1 +work 1 +high 1 +softwar 1 +laboratori 1 +univers 1 +texa 1 +antonio 1 +publish 1 +ofparallel 1 +predict 1 +model 1 +simul 1 +departmentat 1 +fudan 1 +particip 1 +sever 1 +nation 1 +project 1 +china 1 +develop 1 +levelprogram 1 +object 1 +orient 1 +technolog 1 +andimcrement 1 +compil 1 +click 1 +postscript 1 +version 1 +html 1 +link 1 +asplo 1 +programjourn 1 +researchchines 1 +novel 1 +friend 1 +java 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..36c588d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,25 @@ +zhang 1 +home 1 +page 1 +hello 1 +name 1 +pictur 1 +taken 1 +invit 1 +supper 1 +theth 1 +restaur 1 +tsinghua 1 +univers 1 +chen 1 +weihai 1 +wang 1 +tong 1 +wisconsin 1 +madison 1 +depart 1 +comput 1 +scienc 1 +west 1 +dayton 1 +street 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..26ffcae7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,13 @@ +krzysztof 1 +zmudzinskikrzysztof 1 +zmudzinskispin 1 +inform 1 +student 1 +inc 1 +pictur 1 +poland 1 +pole 1 +thank 1 +stop 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..0313e256 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,131 @@ +saluja 1 +kewal 1 +colleg 1 +engineeringunivers 1 +wisconsin 1 +madison 1 +salujaprofessor 1 +engin 1 +hall 1 +drivemadison 1 +mail 1 +engr 1 +wisc 1 +eduportrait 1 +jpgdepartmentselectr 1 +comput 1 +engineeringcomput 1 +scienc 1 +educ 1 +univers 1 +iowa 1 +research 1 +interestsdesign 1 +testabl 1 +architectur 1 +data 1 +compress 1 +integr 1 +circuit 1 +vlsi 1 +fault 1 +toler 1 +gener 1 +area 1 +interest 1 +test 1 +testableand 1 +reliabl 1 +design 1 +digit 1 +system 1 +carri 1 +thisarea 1 +make 1 +extens 1 +analysi 1 +tool 1 +theresearch 1 +involv 1 +model 1 +testgener 1 +modif 1 +enhanc 1 +built 1 +inself 1 +fabric 1 +applic 1 +investig 1 +techniqu 1 +andfault 1 +simul 1 +process 1 +effici 1 +combin 1 +andsequenti 1 +compact 1 +methodsapplic 1 +self 1 +testenviron 1 +concentr 1 +regularstructur 1 +programm 1 +logic 1 +arrai 1 +ram 1 +areinvestig 1 +algorithm 1 +implement 1 +inhardwar 1 +littl 1 +perform 1 +penalti 1 +anoth 1 +projectw 1 +wai 1 +hardwar 1 +asystem 1 +normal 1 +oper 1 +goal 1 +thatth 1 +continu 1 +noimpact 1 +much 1 +work 1 +us 1 +facil 1 +digitalsystem 1 +laboratori 1 +hous 1 +number 1 +station 1 +withcolor 1 +monitor 1 +termin 1 +program 1 +dept 1 +center 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +fridai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +support 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..2a086c84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,135 @@ +duffi 1 +neil 1 +colleg 1 +engineeringunivers 1 +wisconsin 1 +madison 1 +professor 1 +engin 1 +research 1 +build 1 +drivemadison 1 +mail 1 +engr 1 +wisc 1 +eduportrait 1 +departmentsmechan 1 +engineeringeducationb 1 +univers 1 +madisonm 1 +madisonphd 1 +madisonresearch 1 +interestsrobot 1 +comput 1 +control 1 +manufactur 1 +system 1 +precis 1 +integr 1 +micromechanismscent 1 +consortiamanufactur 1 +programwisconsin 1 +center 1 +space 1 +autom 1 +roboticsprofessor 1 +involv 1 +sensor 1 +actuat 1 +data 1 +base 1 +advanc 1 +product 1 +develop 1 +self 1 +guid 1 +inspect 1 +machin 1 +weld 1 +robot 1 +high 1 +perform 1 +materi 1 +handl 1 +finish 1 +mold 1 +rework 1 +studi 1 +highli 1 +distribut 1 +hierarch 1 +architectur 1 +hope 1 +reduc 1 +cost 1 +complex 1 +larg 1 +scale 1 +increas 1 +flexibl 1 +fault 1 +toler 1 +construct 1 +sever 1 +experiment 1 +incorpor 1 +real 1 +time 1 +fulli 1 +schedul 1 +optim 1 +theori 1 +explain 1 +properti 1 +associ 1 +director 1 +nasa 1 +fund 1 +emphas 1 +agricultur 1 +tactil 1 +feedback 1 +human 1 +oper 1 +telerobot 1 +method 1 +evalu 1 +well 1 +factor 1 +sensori 1 +fatigu 1 +test 1 +work 1 +carri 1 +close 1 +aerospac 1 +industri 1 +teach 1 +cours 1 +automat 1 +author 1 +process 1 +dept 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +tuesdai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +support 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..003ca7a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,203 @@ +shapiro 1 +vadim 1 +colleg 1 +engineeringunivers 1 +wisconsin 1 +madison 1 +assist 1 +professor 1 +mechan 1 +engin 1 +univers 1 +avenuemadison 1 +mail 1 +vshapiro 1 +engr 1 +wisc 1 +eduportrait 1 +jpgurl 1 +http 1 +departmentscomput 1 +sciencemechan 1 +engineeringeducationba 1 +york 1 +universitym 1 +california 1 +angelesm 1 +cornel 1 +universityphd 1 +univeristyresearch 1 +interestscomput 1 +aid 1 +design 1 +manufactur 1 +appli 1 +comput 1 +geometri 1 +geometr 1 +solid 1 +model 1 +physic 1 +analysi 1 +simul 1 +product 1 +automationcent 1 +consortiamathemat 1 +graduat 1 +programmanufactur 1 +system 1 +programspati 1 +autom 1 +laboratoryselect 1 +award 1 +honorsn 1 +scienc 1 +foundat 1 +career 1 +gener 1 +motor 1 +fellow 1 +select 1 +public 1 +mainten 1 +represent 1 +space 1 +decomposit 1 +intern 1 +journal 1 +applic 1 +chain 1 +behavior 1 +research 1 +april 1 +palmer 1 +real 1 +function 1 +rigid 1 +separ 1 +boundari 1 +convers 1 +transact 1 +graphic 1 +januari 1 +vossler 1 +interest 1 +center 1 +relationship 1 +betweengeometri 1 +phenomena 1 +artifact 1 +bemodel 1 +repres 1 +analyz 1 +manipul 1 +manufacturedbas 1 +algorithm 1 +specif 1 +ongo 1 +project 1 +includ 1 +abil 1 +creat 1 +convert 1 +maintain 1 +consist 1 +ofdistinct 1 +part 1 +major 1 +technologicalbarri 1 +undermin 1 +us 1 +reliabl 1 +commercialgeometr 1 +current 1 +effort 1 +focu 1 +eliminatingambigu 1 +commun 1 +formal 1 +ofparametr 1 +famili 1 +investig 1 +novel 1 +methodsand 1 +techniqu 1 +support 1 +todai 1 +form 1 +fabric 1 +process 1 +cannot 1 +bedescrib 1 +combinatori 1 +term 1 +discret 1 +simpl 1 +interactingprimit 1 +appar 1 +lack 1 +structur 1 +amajor 1 +roadblock 1 +competit 1 +collabor 1 +industri 1 +present 1 +deal 1 +withtheoret 1 +practic 1 +aspect 1 +designand 1 +seek 1 +establish 1 +basi 1 +make 1 +andmanufactur 1 +systemat 1 +smoothintegr 1 +activ 1 +contain 1 +inform 1 +need 1 +captur 1 +thedesir 1 +tomanufactur 1 +recent 1 +studi 1 +algebra 1 +topolog 1 +call 1 +suggest 1 +possibl 1 +tounifi 1 +thu 1 +facilit 1 +develop 1 +ofnew 1 +tool 1 +theseand 1 +languagesand 1 +physicalobject 1 +dept 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +thursdai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..bc8c9bf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,107 @@ +autom 1 +theorem 1 +prove 1 +groupautom 1 +groupth 1 +group 1 +part 1 +comput 1 +scienc 1 +mathemat 1 +depart 1 +univers 1 +texa 1 +ataustin 1 +produc 1 +method 1 +system 1 +intend 1 +first 1 +higher 1 +order 1 +logic 1 +intent 1 +appli 1 +systemsand 1 +problem 1 +primarili 1 +also 1 +computersci 1 +technolog 1 +herei 1 +index 1 +electron 1 +avail 1 +tech 1 +report 1 +site 1 +seri 1 +continu 1 +current 1 +techreport 1 +ad 1 +reportseri 1 +present 1 +grouplarri 1 +hinesmarti 1 +mayberrybenjamin 1 +shultsalumniprevi 1 +student 1 +woodi 1 +bledso 1 +previou 1 +robert 1 +boyer 1 +incomplet 1 +list 1 +other 1 +relat 1 +late 1 +faculti 1 +profil 1 +robertboyerj 1 +strother 1 +moorethi 1 +past 1 +visitor 1 +collaboratorswhat 1 +done 1 +implyth 1 +natur 1 +deduct 1 +proverstrivelarri 1 +hine 1 +inequ 1 +prover 1 +struvelarri 1 +theori 1 +chou 1 +geometri 1 +proverand 1 +variou 1 +improv 1 +theretoinclud 1 +mcphee 1 +feng 1 +theoryimplement 1 +descript 1 +proof 1 +hein 1 +borel 1 +theoremprecondit 1 +proverbledso 1 +analog 1 +theoremnqthmboy 1 +andmoor 1 +develop 1 +clinc 1 +iprshult 1 +knowledg 1 +us 1 +relatedlinksdo 1 +feedback 1 +want 1 +inform 1 +contact 1 +benjamin 1 +shult 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..c2c426d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,5 @@ +document 1 +moveddocu 1 +movedthi 1 +perman 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..c2c426d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_bool/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,5 @@ +document 1 +moveddocu 1 +movedthi 1 +perman 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..68e8d7cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +solut 2 +homework 1 +upson 1 +hall 1 +phone 1 +mail 1 +cornel 1 +offic 1 +hour 1 +oper 1 +assign 1 +prelim 1 +wednesdai 1 +thursdai 1 +budiu 1 +home 0 +pagec 0 +system 0 +program 0 +systemsc 0 +practicum 0 +systemkenneth 0 +birmanc 0 +new 0 +groupcours 0 +syllabuslectur 0 +note 0 +unix 0 +filesystem 0 +structur 0 +link 0 +static 0 +dynam 0 +taslili 0 +lili 0 +fridai 0 +cheng 0 +huang 0 +ychuang 0 +tuesdai 0 +mihai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..a2d586ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,45 @@ +oper 1 +system 1 +chip 1 +last 1 +chang 1 +handout 1 +format 1 +consol 1 +hoca 1 +home 0 +pagec 0 +program 0 +systemsc 0 +practicum 0 +systemsselect 0 +displai 0 +symbol 0 +correspond 0 +postcriptdocu 0 +hand 0 +phase 0 +hocacours 0 +inform 0 +cours 0 +schedul 0 +group 0 +postcript 0 +penn 0 +broccoli 0 +question 0 +answer 0 +comput 0 +window 0 +exampl 0 +us 0 +tutori 0 +principl 0 +configur 0 +fileth 0 +systemth 0 +specif 0 +page 0 +maintain 0 +lorenzo 0 +alvisi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..4d5a0715 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,240 @@ +homework 3 +chapter 2 +read 2 +databas 2 +tuesdai 2 +relat 2 +februari 2 +april 2 +inform 2 +cours 2 +thursdai 2 +grade 2 +model 2 +cover 1 +retriev 1 +page 1 +march 1 +system 1 +cornel 1 +availablethursdai 1 +relationship 1 +design 1 +class 1 +group 1 +name 1 +duetuesdai 1 +home 1 +entiti 1 +index 1 +queri 1 +crash 1 +recoveri 1 +concurr 1 +control 1 +part 1 +vector 1 +instructor 1 +worth 1 +final 1 +member 1 +januari 1 +fundament 1 +follow 1 +file 1 +space 1 +term 1 +time 1 +structur 1 +solut 1 +korth 1 +silberschatz 1 +second 1 +edit 1 +requir 1 +reserv 1 +upson 1 +offic 1 +hour 1 +appoint 1 +send 1 +mail 1 +aguilera 1 +amith 1 +work 1 +thegroup 1 +avail 1 +week 1 +last 1 +return 1 +first 1 +regrad 1 +introduct 1 +calculu 1 +optim 1 +prelim 1 +transact 1 +process 1 +retrievalthursdai 1 +advanc 1 +pagec 0 +retrievaldepart 0 +computersci 0 +universityspr 0 +gradeshav 0 +nice 0 +summer 0 +introductionthi 0 +three 0 +credit 0 +databasesystem 0 +roughli 0 +twothird 0 +third 0 +topic 0 +systemsinclud 0 +data 0 +physic 0 +organ 0 +hash 0 +languag 0 +queryoptim 0 +transactionprocess 0 +deal 0 +find 0 +usefulinform 0 +larg 0 +textual 0 +willcov 0 +invert 0 +smartsystem 0 +similar 0 +weight 0 +rank 0 +relevancefeedback 0 +phrase 0 +gener 0 +thesaurusconstruct 0 +evalu 0 +permit 0 +automatictext 0 +summar 0 +link 0 +materi 0 +note 0 +placetuesdai 0 +minut 0 +thurston 0 +prerequisitesc 0 +recommend 0 +booksdatabas 0 +concept 0 +mcgrawhil 0 +elmasri 0 +andnavath 0 +benjamin 0 +cum 0 +principl 0 +knowledg 0 +base 0 +byullman 0 +comput 0 +scienc 0 +press 0 +photocopiedmateri 0 +salton 0 +book 0 +research 0 +paper 0 +amitsingh 0 +singhal 0 +teach 0 +assist 0 +sophia 0 +georgiakaki 0 +wednesdai 0 +marco 0 +forc 0 +yamasani 0 +officehour 0 +gradingexam 0 +midterm 0 +ofyour 0 +exam 0 +yourfin 0 +five 0 +semest 0 +policiesy 0 +peopl 0 +clearli 0 +indic 0 +entir 0 +receiv 0 +samegrad 0 +tuesdayand 0 +along 0 +guid 0 +ofcours 0 +date 0 +throughth 0 +late 0 +accept 0 +illeg 0 +hard 0 +even 0 +though 0 +iti 0 +encourag 0 +type 0 +latexif 0 +possibl 0 +alreadi 0 +know 0 +goodopportun 0 +learn 0 +latex 0 +submissionpleas 0 +attach 0 +sort 0 +alphabet 0 +also 0 +write 0 +exampl 0 +bill 0 +clinton 0 +dole 0 +ross 0 +perot 0 +homeworksgrad 0 +sortedalphabet 0 +thecov 0 +list 0 +pagefollow 0 +want 0 +pleas 0 +sendmail 0 +policyal 0 +request 0 +submit 0 +inwrit 0 +within 0 +back 0 +schedulethi 0 +tent 0 +schedul 0 +referto 0 +algebra 0 +tupl 0 +domain 0 +integr 0 +constraint 0 +spring 0 +break 0 +modelhomework 0 +weightingthursdai 0 +indexinghomework 0 +evaluationtuesdai 0 +relev 0 +feedbackthursdai 0 +document 0 +clusteringhomework 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..4dc280a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,64 @@ +file 1 +page 1 +inform 1 +postscript 1 +enscript 1 +pfile 1 +home 1 +offic 1 +hour 1 +recit 1 +stuff 1 +convert 1 +text 1 +print 1 +look 0 +admin 0 +handout 0 +incl 0 +lectur 0 +note 0 +assign 0 +ethic 0 +profession 0 +social 0 +respons 0 +mayb 0 +electron 0 +submiss 0 +procedur 0 +group 0 +perform 0 +evalu 0 +resourc 0 +quot 0 +collect 0 +joke 0 +start 0 +submit 0 +sumedh 0 +break 0 +new 0 +misc 0 +peopl 0 +ask 0 +simpl 0 +unix 0 +program 0 +suggest 0 +wai 0 +first 0 +good 0 +sourc 0 +code 0 +second 0 +give 0 +nice 0 +header 0 +gener 0 +leav 0 +send 0 +printer 0 +come 0 +last 0 +modif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..63c25716 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,59 @@ +project 1 +distribut 1 +cours 1 +system 1 +work 1 +inform 1 +practicum 1 +practic 1 +design 1 +student 1 +rang 1 +systemsor 0 +hand 0 +dirti 0 +real 0 +aspect 0 +studi 0 +implement 0 +signific 0 +also 0 +take 0 +offersa 0 +varieti 0 +simpl 0 +internetworkingto 0 +complex 0 +teamsof 0 +person 0 +choos 0 +interest 0 +theywil 0 +trough 0 +semest 0 +credit 0 +hour 0 +earn 0 +depend 0 +size 0 +complexityof 0 +develop 0 +us 0 +offcial 0 +interact 0 +pageslink 0 +page 0 +find 0 +basic 0 +instruct 0 +descript 0 +plan 0 +progress 0 +report 0 +final 0 +present 0 +tabl 0 +contentspag 0 +comment 0 +werner 0 +vogel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..c0ab2b31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,94 @@ +avail 1 +problem 1 +solut 1 +cours 1 +page 1 +fall 1 +professor 1 +saluja 1 +sorin 1 +part 1 +mentor 1 +contain 1 +link 1 +engin 1 +kewal 1 +outlin 1 +project 1 +help 1 +tool 1 +refer 1 +email 1 +home 0 +intro 0 +comput 0 +architectur 0 +note 0 +inform 0 +electr 0 +andcomput 0 +depart 0 +univers 0 +wisconsin 0 +madison 0 +materi 0 +intend 0 +sole 0 +studentsenrol 0 +semest 0 +prof 0 +generalinform 0 +conduct 0 +midtermsyllabu 0 +midtermi 0 +specif 0 +homework 0 +assign 0 +valid 0 +theproject 0 +need 0 +graphic 0 +caeworkst 0 +pleas 0 +duedat 0 +follow 0 +literatur 0 +assist 0 +anyon 0 +whomai 0 +manual 0 +onlin 0 +throughbold_brows 0 +check 0 +gettingstart 0 +design 0 +architect 0 +train 0 +workbook 0 +get 0 +start 0 +quicksim 0 +trainingworkbook 0 +exersis 0 +addition 0 +thesedocu 0 +document 0 +click 0 +send 0 +surf 0 +homepag 0 +announc 0 +new 0 +group 0 +wiscinfo 0 +gopher 0 +site 0 +uwengin 0 +server 0 +file 0 +last 0 +modifi 0 +septemb 0 +pmcst 0 +question 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..be7d740b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,43 @@ +info 1 +homework 1 +page 1 +cours 1 +handout 1 +syllabu 1 +exam 1 +grade 1 +tabl 1 +homepag 0 +fall 0 +run 0 +netscap 0 +click 0 +herelink 0 +individu 0 +frame 0 +gener 0 +stuff 0 +section 0 +offic 0 +hour 0 +motw 0 +download 0 +class 0 +requir 0 +criteria 0 +schedul 0 +link 0 +refer 0 +goofi 0 +stuffnot 0 +preced 0 +contain 0 +browser 0 +abl 0 +handl 0 +pleas 0 +email 0 +cornel 0 +edupag 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..06f87671 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,43 @@ +interpret 1 +cours 1 +materi 1 +comput 1 +access 1 +dylan 1 +window 1 +chang 1 +home 0 +pagec 0 +structur 0 +program 0 +scienc 0 +depart 0 +cornel 0 +univers 0 +fall 0 +requir 0 +user 0 +password 0 +request 0 +attempt 0 +info 0 +note 0 +browser 0 +includ 0 +netscap 0 +correctli 0 +check 0 +java 0 +class 0 +file 0 +thu 0 +noodll 0 +inconsist 0 +behavior 0 +work 0 +parter 0 +link 0 +directori 0 +partnerjoin 0 +util 0 +announc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..458f1465 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,359 @@ +cours 2 +assign 2 +problem 2 +comput 2 +program 2 +object 2 +set 2 +dylan 2 +site 2 +orient 1 +time 1 +exam 1 +data 1 +function 1 +languag 1 +evalu 1 +provid 1 +consult 1 +tuesdai 1 +work 1 +gener 1 +java 1 +us 1 +question 1 +lectur 1 +hour 1 +public 1 +avail 1 +late 1 +scienc 1 +cover 1 +broad 1 +abstract 1 +recurs 1 +topic 1 +student 1 +think 1 +reach 1 +best 1 +staff 1 +materi 1 +order 1 +ugrad 1 +schedul 1 +browser 1 +machin 1 +make 1 +final 1 +accept 1 +togeth 1 +date 1 +list 1 +introductori 1 +techniqu 1 +includ 1 +correct 1 +well 1 +rangeof 1 +take 1 +skill 1 +post 1 +cornel 1 +current 1 +conot 1 +handout 1 +need 1 +request 1 +user 1 +idand 1 +password 1 +system 1 +toth 1 +mondai 1 +also 1 +huttenloch 1 +upson 1 +tobia 1 +mayr 1 +hamblin 1 +szewczyk 1 +voskuhl 1 +thursdai 1 +wednesdai 1 +help 1 +even 1 +thu 1 +justin 1 +compil 1 +standalon 1 +version 1 +sure 1 +grade 1 +person 1 +someon 1 +must 1 +submit 1 +name 1 +facil 1 +exampl 1 +prelim 1 +model 1 +procedur 1 +process 1 +oper 1 +state 1 +mutabl 1 +queue 1 +stream 1 +infinit 1 +fall 0 +informationaugust 0 +rang 0 +computersci 0 +concept 0 +pattern 0 +match 0 +useth 0 +dynam 0 +developedat 0 +appl 0 +suit 0 +courseabout 0 +happen 0 +notationthat 0 +chosen 0 +write 0 +major 0 +goal 0 +ofth 0 +teach 0 +clearli 0 +programsand 0 +toolbox 0 +modern 0 +programmingtechniqu 0 +applic 0 +often 0 +wonder 0 +whether 0 +takec 0 +focus 0 +orientedlanguag 0 +wherea 0 +exposur 0 +number 0 +programmingparadigm 0 +imperativeprogram 0 +good 0 +background 0 +goodform 0 +mathemat 0 +physic 0 +probablytak 0 +transfer 0 +either 0 +direct 0 +encourag 0 +first 0 +week 0 +questionsor 0 +comment 0 +http 0 +info 0 +contain 0 +run 0 +serverwhich 0 +allow 0 +answersa 0 +annot 0 +access 0 +simpli 0 +home 0 +page 0 +follow 0 +instruct 0 +creat 0 +thisweek 0 +send 0 +email 0 +edubut 0 +ask 0 +aboutproblem 0 +professor 0 +upsonjam 0 +tarobert 0 +tajustin 0 +taandra 0 +ferencz 0 +melissa 0 +consultantwhen 0 +meetlectur 0 +kimbal 0 +andrecit 0 +recitationsexpand 0 +opportunityto 0 +held 0 +midnight 0 +eachproblem 0 +setsdu 0 +sundai 0 +mondayeven 0 +consultinghour 0 +inth 0 +offic 0 +jame 0 +robert 0 +voskuhltba 0 +materialsther 0 +textbook 0 +handoutsand 0 +note 0 +hardcopi 0 +andon 0 +interpret 0 +free 0 +develop 0 +class 0 +implementedin 0 +capabl 0 +chang 0 +almost 0 +weekli 0 +netscap 0 +window 0 +borland 0 +downloadonto 0 +want 0 +word 0 +warn 0 +download 0 +ontoyour 0 +recentvers 0 +check 0 +requirementsstud 0 +respons 0 +read 0 +recit 0 +therewil 0 +preliminari 0 +given 0 +combinationof 0 +written 0 +exercis 0 +gradeswil 0 +base 0 +combin 0 +score 0 +account 0 +approxim 0 +half 0 +thetot 0 +willgener 0 +immedi 0 +return 0 +followingclass 0 +period 0 +complet 0 +earli 0 +andth 0 +sittingdown 0 +matter 0 +mani 0 +long 0 +sink 0 +beforesit 0 +polici 0 +joint 0 +workmuch 0 +learn 0 +come 0 +programmingproblem 0 +jointli 0 +peopl 0 +howev 0 +youwork 0 +singl 0 +jointassign 0 +circumstancesmai 0 +hand 0 +done 0 +els 0 +yourown 0 +doubt 0 +credit 0 +yougot 0 +would 0 +amaz 0 +easi 0 +tell 0 +whenpeopl 0 +pleas 0 +lifeunpleas 0 +break 0 +rule 0 +facilitiescit 0 +variou 0 +colleg 0 +campu 0 +macintosh 0 +andpc 0 +on 0 +depart 0 +thiscours 0 +upsonmac 0 +datesal 0 +electron 0 +server 0 +mondaynight 0 +submityour 0 +solut 0 +outlin 0 +studi 0 +introduct 0 +substitut 0 +iter 0 +induct 0 +higher 0 +argument 0 +valu 0 +analysi 0 +algorithm 0 +growth 0 +structur 0 +contract 0 +implement 0 +hierarch 0 +tree 0 +quotat 0 +reason 0 +symbol 0 +differenti 0 +extend 0 +type 0 +dispatch 0 +polynomi 0 +arithmet 0 +environ 0 +local 0 +variabl 0 +inherit 0 +multimethod 0 +stack 0 +heap 0 +heapsort 0 +prioriti 0 +metacircular 0 +variat 0 +express 0 +optim 0 +nonloc 0 +exit 0 +catch 0 +throw 0 +garbag 0 +collect 0 +illus 0 +memori 0 +random 0 +quicksort 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..bba9f50f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,88 @@ +cours 2 +materi 1 +inform 1 +homework 1 +conot 1 +page 1 +includ 1 +lectur 1 +project 1 +help 1 +annot 1 +allow 1 +send 1 +email 1 +note 1 +section 1 +date 1 +pleas 1 +process 1 +registr 1 +home 0 +introduct 0 +digit 0 +system 0 +comput 0 +organizationthorsten 0 +eickenfal 0 +kimbal 0 +btopic 0 +represent 0 +machin 0 +assembl 0 +languag 0 +processor 0 +organ 0 +interrupt 0 +memori 0 +hierarchi 0 +combinatori 0 +sequenti 0 +circuit 0 +data 0 +path 0 +control 0 +unit 0 +design 0 +microprogram 0 +helpif 0 +problem 0 +relat 0 +best 0 +appropri 0 +point 0 +staff 0 +also 0 +class 0 +mate 0 +otherwis 0 +cornel 0 +talk 0 +consult 0 +materialsal 0 +separ 0 +server 0 +want 0 +bookmark 0 +us 0 +document 0 +file 0 +check 0 +account 0 +request 0 +post 0 +saturdai 0 +encount 0 +difficulti 0 +read 0 +follow 0 +hidden 0 +instruct 0 +sign 0 +start 0 +mondai 0 +listlist 0 +made 0 +maintain 0 +thorsten 0 +eicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..1c7fc8ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,69 @@ +file 1 +page 1 +inform 1 +postscript 1 +enscript 1 +pfile 1 +home 1 +offic 1 +hour 1 +recit 1 +stuff 1 +convert 1 +text 1 +print 1 +look 0 +admin 0 +handout 0 +incl 0 +lectur 0 +note 0 +assign 0 +ethic 0 +profession 0 +social 0 +respons 0 +mayb 0 +electron 0 +submiss 0 +procedur 0 +group 0 +perform 0 +evalu 0 +resourc 0 +quot 0 +collect 0 +joke 0 +start 0 +submit 0 +sumedh 0 +break 0 +new 0 +mondai 0 +held 0 +csuglab 0 +floor 0 +upson 0 +misc 0 +peopl 0 +ask 0 +simpl 0 +unix 0 +program 0 +suggest 0 +wai 0 +first 0 +good 0 +sourc 0 +code 0 +second 0 +give 0 +nice 0 +header 0 +gener 0 +leav 0 +send 0 +printer 0 +come 0 +last 0 +modif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..e102f3cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,42 @@ +solut 2 +assign 1 +upson 1 +hall 1 +phone 1 +mail 1 +cornel 1 +offic 1 +hour 1 +oper 1 +prelim 1 +wednesdai 1 +thursdai 1 +budiu 1 +home 0 +pagec 0 +system 0 +program 0 +systemsc 0 +practicum 0 +systemkenneth 0 +birmanc 0 +new 0 +groupcours 0 +syllabuslectur 0 +note 0 +unix 0 +filesystem 0 +structur 0 +link 0 +static 0 +dynam 0 +taslili 0 +lili 0 +fridai 0 +cheng 0 +huang 0 +ychuang 0 +tuesdai 0 +mihai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..78a51a8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,135 @@ +home 1 +clair 1 +program 1 +solut 1 +homework 1 +kevin 1 +assign 1 +page 1 +cours 1 +avail 1 +code 1 +scott 1 +file 1 +depart 1 +inform 1 +materi 1 +pleas 1 +mondai 1 +class 1 +oper 1 +still 1 +us 1 +find 1 +bind 1 +problem 1 +offer 1 +time 1 +fact 1 +dodg 1 +postscript 1 +pagecsfound 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +cornel 0 +univers 0 +fall 0 +welcom 0 +academ 0 +integr 0 +read 0 +announc 0 +date 0 +move 0 +atth 0 +begin 0 +notethat 0 +rubix 0 +need 0 +load 0 +thefunct 0 +rearrang 0 +slightli 0 +also 0 +variabl 0 +previous 0 +defin 0 +constant 0 +chang 0 +paramet 0 +appeas 0 +compil 0 +var 0 +bracket 0 +youus 0 +machinesshould 0 +netscap 0 +sbin 0 +found 0 +ksaunder 0 +sbinfor 0 +account 0 +seriou 0 +gremlin 0 +codefor 0 +abl 0 +look 0 +andget 0 +start 0 +planner 0 +success 0 +uponcomplet 0 +appli 0 +schema 0 +meanwhil 0 +hunt 0 +thoseus 0 +machin 0 +shouldn 0 +special 0 +limit 0 +concern 0 +get 0 +zeroon 0 +third 0 +worri 0 +longer 0 +complet 0 +asspecifi 0 +posit 0 +grade 0 +result 0 +guarante 0 +novemb 0 +oneassign 0 +group 0 +clarif 0 +unless 0 +otherwis 0 +specifi 0 +assum 0 +either 0 +system 0 +add 0 +vanto 0 +queri 0 +alreadi 0 +exist 0 +document 0 +modifi 0 +includ 0 +thisclarif 0 +newhomework 0 +coursemateri 0 +midterm 0 +portion 0 +statu 0 +report 0 +tuesdai 0 +thec 0 +section 0 +remind 0 +right 0 +pagesc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..59a50202 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,34 @@ +homework 1 +cours 1 +note 1 +fall 1 +offic 1 +prelim 1 +home 0 +pagec 0 +automata 0 +comput 0 +theorywelcom 0 +click 0 +inform 0 +lectur 0 +exam 0 +studi 0 +guideannounc 0 +avail 0 +hardcopi 0 +set 0 +hour 0 +incorrect 0 +date 0 +revis 0 +erratum 0 +chang 0 +room 0 +nikolai 0 +hourscod 0 +academ 0 +integr 0 +pleas 0 +read 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..0e514e86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,403 @@ +project 3 +databas 2 +system 2 +exam 2 +refer 2 +paper 2 +cours 2 +develop 2 +survei 1 +prelim 1 +list 1 +materi 1 +dbm 1 +work 1 +us 1 +code 1 +minibas 1 +final 1 +softwar 1 +also 1 +amount 1 +research 1 +term 1 +detail 1 +compon 1 +predat 1 +upson 1 +propos 1 +note 1 +textbook 1 +student 1 +background 1 +program 1 +data 1 +concept 1 +introductori 1 +topic 1 +involv 1 +write 1 +part 1 +process 1 +level 1 +follow 1 +transact 1 +offic 1 +advanc 1 +complet 1 +lectur 1 +mail 1 +inform 1 +grade 1 +possibl 1 +mani 1 +becom 1 +fundament 1 +class 1 +click 1 +herefor 1 +tent 1 +specif 1 +assign 1 +import 1 +base 1 +second 1 +provid 1 +queri 1 +need 1 +area 1 +text 1 +edit 1 +hour 1 +time 1 +tuesdai 1 +thursdai 1 +place 1 +result 1 +stat 1 +outsidefirewal 1 +descript 1 +fall 1 +design 1 +piec 1 +engin 1 +access 1 +form 1 +want 1 +larg 1 +number 1 +cover 1 +consequ 1 +discuss 1 +basic 1 +taught 1 +midterm 1 +test 1 +librari 1 +take 1 +turn 1 +depend 1 +person 1 +hopefulli 1 +choos 1 +whether 1 +goal 1 +comfort 1 +modular 1 +prototyp 1 +associ 1 +manag 1 +avail 1 +interfac 1 +could 1 +function 1 +neither 1 +lower 1 +convent 1 +page 1 +stonebrak 1 +morgan 1 +kaufmann 1 +phone 1 +weitsang 1 +evalu 0 +content 0 +schedul 0 +samplequest 0 +answer 0 +info 0 +predatordbm 0 +currentproject 0 +handout 0 +archiv 0 +prerequisit 0 +professor 0 +teach 0 +assist 0 +coursedescript 0 +offer 0 +first 0 +intendedto 0 +give 0 +solid 0 +databasemanag 0 +world 0 +slargest 0 +certainli 0 +among 0 +valuabl 0 +piecesof 0 +sens 0 +giant 0 +applic 0 +surprisingli 0 +principl 0 +behind 0 +industri 0 +grow 0 +thrive 0 +demand 0 +knowledgeabledatabas 0 +much 0 +greater 0 +suppli 0 +researchcommun 0 +activ 0 +alwai 0 +problem 0 +addressedbecaus 0 +explos 0 +peopl 0 +wish 0 +thiscours 0 +essenti 0 +anyon 0 +asystem 0 +compani 0 +informedus 0 +domain 0 +manipul 0 +find 0 +teller 0 +machin 0 +realli 0 +although 0 +intend 0 +newcours 0 +differ 0 +variou 0 +begin 0 +quickreview 0 +workload 0 +examin 0 +abreadth 0 +advancedtop 0 +thepurpos 0 +awar 0 +coursei 0 +fraction 0 +three 0 +weeksaft 0 +requireread 0 +journal 0 +confer 0 +proceed 0 +engineeringlibrari 0 +suggest 0 +initi 0 +pursueaddit 0 +forinform 0 +look 0 +written 0 +homework 0 +enrol 0 +mean 0 +twice 0 +semest 0 +addit 0 +around 0 +examtim 0 +developmentproject 0 +involvea 0 +signific 0 +wishto 0 +alon 0 +team 0 +howev 0 +willinvolv 0 +proportion 0 +fold 0 +hand 0 +experi 0 +build 0 +exist 0 +andmodifi 0 +manner 0 +thefirst 0 +huge 0 +andrar 0 +luxuri 0 +start 0 +scratch 0 +forc 0 +youto 0 +understand 0 +interact 0 +thediffer 0 +inevit 0 +bug 0 +appear 0 +simpl 0 +singl 0 +user 0 +fromth 0 +parser 0 +disk 0 +abl 0 +compil 0 +comput 0 +environ 0 +varioussystem 0 +actual 0 +come 0 +like 0 +buffermanag 0 +enginethat 0 +possibleproject 0 +lead 0 +builton 0 +focu 0 +high 0 +likecomplex 0 +type 0 +familiar 0 +recommend 0 +becauseth 0 +minim 0 +think 0 +interestedin 0 +ifyou 0 +categori 0 +decid 0 +aproject 0 +storag 0 +buffer 0 +thehigh 0 +optim 0 +betweenminibas 0 +higher 0 +somegener 0 +ideaon 0 +suitabl 0 +talk 0 +well 0 +advanceof 0 +date 0 +certain 0 +step 0 +submitan 0 +order 0 +produc 0 +review 0 +meet 0 +discussth 0 +progress 0 +made 0 +toward 0 +must 0 +particularsystem 0 +documentwil 0 +close 0 +someth 0 +thati 0 +picki 0 +contribut 0 +geton 0 +submiss 0 +includ 0 +demo 0 +reason 0 +oftest 0 +home 0 +homepag 0 +coursetextbook 0 +primari 0 +beta 0 +book 0 +raghu 0 +ramakrishnan 0 +bookcontain 0 +databasebook 0 +free 0 +instruct 0 +databasesystem 0 +might 0 +thecampu 0 +store 0 +korth 0 +silberschatz 0 +mcgraw 0 +hill 0 +secondedit 0 +standard 0 +lack 0 +tobe 0 +graduat 0 +michael 0 +read 0 +collect 0 +rel 0 +recent 0 +collectedand 0 +introduc 0 +ingr 0 +postgr 0 +andillustra 0 +corearea 0 +elmasri 0 +navath 0 +benjamin 0 +cum 0 +altern 0 +grai 0 +reuter 0 +techniqu 0 +bibl 0 +long 0 +tellsyou 0 +know 0 +wonderfulrefer 0 +clear 0 +confus 0 +aspect 0 +concurr 0 +control 0 +recoveri 0 +semant 0 +resourc 0 +tutori 0 +languag 0 +construct 0 +debuggingwith 0 +make 0 +gradingpolici 0 +percentag 0 +even 0 +finish 0 +anextra 0 +half 0 +likewis 0 +thefin 0 +period 0 +thur 0 +confirm 0 +willfocu 0 +coveredin 0 +earlier 0 +question 0 +professorpraveen 0 +seshadri 0 +praveen 0 +teachingassist 0 +hall 0 +noon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..09515e68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,183 @@ +program 2 +languag 2 +semant 1 +cours 1 +comput 1 +notat 1 +student 1 +descript 1 +upson 1 +studi 1 +inform 1 +properti 1 +logic 1 +cornel 1 +lectur 1 +link 1 +like 1 +implement 1 +mechan 1 +describ 1 +tool 1 +prove 1 +specif 1 +well 1 +work 1 +side 1 +assum 1 +experi 1 +knowledg 1 +function 1 +basic 1 +undergradu 1 +instructor 1 +class 1 +offic 1 +hour 1 +refer 1 +environ 1 +document 1 +avail 1 +content 0 +text 0 +prerequisiteshandoutsscrib 0 +noteshomework 0 +assignmentscontact 0 +informationrelev 0 +though 0 +call 0 +advanc 0 +book 0 +better 0 +entitl 0 +goalof 0 +conduct 0 +broad 0 +survei 0 +tech 0 +programminglanguag 0 +java 0 +directli 0 +compress 0 +dispatch 0 +tabl 0 +multipleinherit 0 +rather 0 +goal 0 +principlesof 0 +formal 0 +analyz 0 +concern 0 +subsum 0 +thestudi 0 +henc 0 +lead 0 +deeper 0 +understand 0 +mathemat 0 +proof 0 +theori 0 +exampl 0 +abstractli 0 +specifi 0 +howprogram 0 +oper 0 +asnot 0 +denot 0 +turn 0 +abstract 0 +preciser 0 +allow 0 +techniqu 0 +induct 0 +relat 0 +forform 0 +interest 0 +relev 0 +type 0 +safeti 0 +compil 0 +correct 0 +ideal 0 +come 0 +learn 0 +somethingabout 0 +make 0 +concept 0 +precis 0 +tomanipul 0 +demonstr 0 +us 0 +textbook 0 +carl 0 +gunter 0 +programm 0 +second 0 +edit 0 +larri 0 +paulson 0 +prerequisit 0 +least 0 +pascal 0 +prefer 0 +witha 0 +scheme 0 +haskel 0 +theoret 0 +profici 0 +undergraduatemathemat 0 +scienc 0 +ture 0 +machin 0 +recurs 0 +andlog 0 +predic 0 +calculu 0 +mathematicalmatur 0 +requir 0 +design 0 +math 0 +meng 0 +anmeng 0 +must 0 +talk 0 +find 0 +ifth 0 +suitabl 0 +contact 0 +newsgroup 0 +greg 0 +morrisett 0 +appoint 0 +admin 0 +assist 0 +linda 0 +competillo 0 +lfar 0 +erlingsson 0 +ulfar 0 +pmrelev 0 +mark 0 +leon 0 +resourc 0 +research 0 +emac 0 +mode 0 +comint 0 +need 0 +project 0 +line 0 +standard 0 +postscript 0 +user 0 +guid 0 +base 0 +system 0 +librari 0 +indexdocument 0 +toolsa 0 +gentl 0 +introduct 0 +andrew 0 +cum 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..303a1f6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,29 @@ +cours 1 +page 1 +cornel 1 +project 1 +newsgroup 1 +home 0 +pagecsmultimedia 0 +systemscomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +final 0 +present 0 +schedul 0 +staff 0 +info 0 +materi 0 +student 0 +us 0 +link 0 +anounc 0 +access 0 +rivl 0 +bugcom 0 +question 0 +send 0 +mail 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..c5a1813a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,26 @@ +cours 1 +page 1 +cornel 1 +newsgroup 1 +home 0 +pagecsmultimedia 0 +systemscomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +staff 0 +info 0 +materi 0 +student 0 +project 0 +us 0 +link 0 +anounc 0 +access 0 +rivl 0 +bugcom 0 +question 0 +send 0 +mail 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..be9761bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,44 @@ +homework 2 +last 2 +modifi 2 +solut 2 +addendum 1 +design 1 +analysi 1 +algorithm 1 +evan 1 +time 1 +text 1 +kozen 1 +announc 1 +note 1 +homepag 0 +instructor 0 +ronitt 0 +rubinfeld 0 +moran 0 +locat 0 +upson 0 +springer 0 +verlag 0 +handout 0 +cours 0 +syllabu 0 +copi 0 +exam 0 +thursdai 0 +inupson 0 +talk 0 +tome 0 +reschedul 0 +cannot 0 +make 0 +refer 0 +cheat 0 +sheet 0 +class 0 +rajeev 0 +motwani 0 +lectur 0 +approxim 0 +paper 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..764ef08f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,146 @@ +parallel 2 +system 2 +machin 2 +cours 1 +program 1 +languag 1 +architectur 1 +support 1 +manufactur 1 +offer 1 +debat 1 +consider 1 +perform 1 +price 1 +processor 1 +workstat 1 +level 1 +larg 1 +number 1 +issu 1 +topic 1 +first 1 +algorithm 1 +focu 1 +oper 1 +requir 1 +memori 1 +part 1 +studi 1 +model 1 +home 0 +pagefronti 0 +thorsten 0 +eickenfal 0 +locat 0 +upson 0 +pmoffic 0 +hour 0 +pmcours 0 +descriptionparallel 0 +stai 0 +underscor 0 +fact 0 +multiprocessor 0 +product 0 +line 0 +howev 0 +futur 0 +look 0 +like 0 +heat 0 +past 0 +month 0 +feder 0 +spend 0 +cut 0 +erad 0 +massiv 0 +competitor 0 +glorifi 0 +farm 0 +smile 0 +cannot 0 +eas 0 +busi 0 +suffer 0 +much 0 +competit 0 +leverag 0 +latest 0 +microprocessor 0 +develop 0 +quickli 0 +core 0 +technolog 0 +integr 0 +shelf 0 +cost 0 +effect 0 +easili 0 +high 0 +host 0 +vari 0 +applic 0 +workload 0 +although 0 +week 0 +matur 0 +last 0 +year 0 +point 0 +almost 0 +usabl 0 +adequ 0 +allow 0 +gener 0 +purpos 0 +aspect 0 +featur 0 +taken 0 +grant 0 +sequenti 0 +comput 0 +portabl 0 +power 0 +debugg 0 +multi 0 +user 0 +access 0 +virtual 0 +fast 0 +examin 0 +complet 0 +split 0 +network 0 +us 0 +vertic 0 +approach 0 +interact 0 +associ 0 +execut 0 +hardwar 0 +implement 0 +focuss 0 +layer 0 +second 0 +specif 0 +slice 0 +horizont 0 +across 0 +select 0 +analysi 0 +design 0 +altern 0 +depth 0 +dash 0 +provid 0 +share 0 +contrast 0 +materialscours 0 +formatlectur 0 +note 0 +problem 0 +set 0 +term 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..110182e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,116 @@ +cours 1 +us 1 +lectur 1 +annot 1 +tutori 1 +page 1 +organ 1 +thorsten 1 +languag 1 +materi 1 +allow 1 +also 1 +note 1 +conot 1 +document 1 +start 1 +introduct 1 +marshal 1 +program 1 +exampl 1 +onlin 1 +question 1 +home 0 +pageintroduct 0 +digit 0 +system 0 +comput 0 +eickenfal 0 +kimbal 0 +btopic 0 +includ 0 +representationof 0 +inform 0 +machin 0 +assembl 0 +processor 0 +interrupt 0 +memori 0 +hierarchi 0 +combinatori 0 +sequentialcircuit 0 +data 0 +path 0 +control 0 +unit 0 +design 0 +andmicroprogram 0 +helpif 0 +problem 0 +relat 0 +homework 0 +aproject 0 +best 0 +help 0 +theappropri 0 +point 0 +gethelp 0 +staff 0 +class 0 +mate 0 +otherwis 0 +send 0 +email 0 +cornel 0 +talk 0 +toon 0 +consult 0 +informationcoursemateri 0 +announcementsannounc 0 +video 0 +assign 0 +part 0 +small 0 +get 0 +avail 0 +case 0 +never 0 +onlinean 0 +brain 0 +great 0 +forpeopl 0 +know 0 +procedur 0 +like 0 +pascal 0 +fortran 0 +david 0 +cclass 0 +cardiff 0 +univers 0 +lot 0 +learnc 0 +todai 0 +guid 0 +book 0 +programsand 0 +refer 0 +well 0 +theyahoo 0 +wish 0 +surf 0 +search 0 +ofmor 0 +place 0 +frequent 0 +ask 0 +inansw 0 +common 0 +come 0 +learn 0 +contain 0 +link 0 +sever 0 +newsgroup 0 +maintain 0 +voneicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..0d06db15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,36 @@ +home 1 +cours 1 +depart 1 +clair 1 +final 1 +mail 1 +upson 1 +pagecsfound 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +cornel 0 +univers 0 +fall 0 +welcom 0 +inform 0 +materi 0 +code 0 +academ 0 +integr 0 +pleas 0 +read 0 +announc 0 +grade 0 +avail 0 +sometim 0 +saturdai 0 +send 0 +yourgrad 0 +request 0 +exam 0 +altern 0 +date 0 +pagesc 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..bfb314b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,43 @@ +cornel 1 +cours 1 +offic 1 +hour 1 +upson 1 +samuel 1 +weber 1 +buch 1 +overview 1 +note 1 +home 0 +pagec 0 +softwar 0 +engin 0 +technolog 0 +techniquescomput 0 +scienc 0 +depart 0 +univers 0 +fall 0 +staff 0 +professor 0 +vineet 0 +thursdai 0 +yaron 0 +minski 0 +none 0 +materi 0 +handout 0 +lectur 0 +recit 0 +line 0 +resourc 0 +assign 0 +grade 0 +remark 0 +stuff 0 +frequent 0 +ask 0 +question 0 +borland 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..022b5434 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,31 @@ +assign 1 +project 1 +home 1 +page 1 +introduct 1 +brian 0 +smith 0 +tour 0 +guid 0 +cours 0 +inform 0 +homework 0 +spec 0 +lectur 0 +tabl 0 +content 0 +postscript 0 +slide 0 +comput 0 +system 0 +organ 0 +program 0 +procedur 0 +recurs 0 +stack 0 +assembl 0 +linker 0 +loader 0 +interrupt 0 +logic 0 +design 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..7eb536a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,43 @@ +program 2 +prelim 1 +april 1 +home 1 +page 1 +spring 1 +cours 1 +inform 1 +exam 1 +tue 1 +messag 0 +welcom 0 +rememb 0 +check 0 +frequentlyfor 0 +import 0 +regard 0 +tuesdai 0 +review 0 +session 0 +held 0 +onsundai 0 +baker 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +get 0 +materi 0 +theworld 0 +wide 0 +codewarrior 0 +personalmac 0 +lectur 0 +februari 0 +thur 0 +march 0 +final 0 +last 0 +updat 0 +pierc 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..8d06ecdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,260 @@ +lectur 4 +class 3 +upson 2 +program 2 +gofer 2 +assign 2 +handout 2 +link 2 +cornel 2 +correct 2 +list 2 +offic 2 +hour 2 +algorithm 1 +loop 1 +spring 1 +email 1 +jeff 1 +cours 1 +server 1 +structur 1 +none 1 +avail 1 +comput 1 +foster 1 +final 1 +consult 1 +introduct 1 +recurs 1 +data 1 +analysi 1 +david 1 +file 1 +text 1 +preliminari 1 +macgof 1 +page 1 +scienc 1 +depart 1 +jfoster 1 +prelim 1 +thursdai 1 +topic 1 +note 1 +hollist 1 +format 1 +binhqx 1 +date 1 +basic 1 +pointer 1 +arrai 1 +dynam 1 +storag 1 +alloc 1 +dynamicdata 1 +fine 1 +point 1 +deriv 1 +notat 1 +tripl 1 +condit 1 +prove 1 +function 1 +type 1 +curri 1 +filter 1 +iter 1 +applic 1 +architectur 1 +framework 1 +binari 1 +tree 1 +industri 1 +strength 1 +java 1 +home 1 +view 1 +cover 1 +tuesdai 1 +exam 1 +mondai 1 +olin 1 +last 1 +jose 1 +thank 1 +descript 1 +staff 1 +code 1 +sampl 1 +credit 1 +languag 1 +kwan 1 +walker 1 +section 1 +ahal 1 +walkerwednesdai 1 +kaykylesteveericvasantha 1 +danerickaychrisdan 1 +microsoft 1 +word 1 +rich 1 +plain 1 +stuffit 1 +expand 1 +system 1 +find 1 +codewarrior 1 +site 1 +univers 0 +question 0 +problem 0 +troubl 0 +tabl 0 +us 0 +earlyvers 0 +netscap 0 +contest 0 +first 0 +march 0 +announcetim 0 +place 0 +soon 0 +theprelim 0 +second 0 +april 0 +room 0 +wereannounc 0 +thesecond 0 +noon 0 +lastnam 0 +name 0 +covereveryth 0 +addit 0 +topicsconv 0 +daywhenwherewhomondai 0 +davetuesdai 0 +jeffwednesdai 0 +davethursdai 0 +halfridai 0 +halsaturdai 0 +breview 0 +session 0 +chri 0 +chrisand 0 +take 0 +time 0 +studi 0 +enhanc 0 +also 0 +engrd 0 +fall 0 +summer 0 +grant 0 +bothcom 0 +prerequisit 0 +equival 0 +programmingexperi 0 +intermedi 0 +high 0 +level 0 +tocomput 0 +includ 0 +develop 0 +proof 0 +ofprogram 0 +abstract 0 +datatyp 0 +object 0 +orient 0 +ofalgorithm 0 +princip 0 +instructor 0 +perkin 0 +appoint 0 +teach 0 +assist 0 +held 0 +alan 0 +sectionsdaytimeroominstructortuesdai 0 +perkinstuesdai 0 +perkinswednesdai 0 +walkerthursdai 0 +fosterfridai 0 +regular 0 +schedul 0 +effect 0 +ofclass 0 +sundai 0 +fridai 0 +consultingsundaymondaytuesdaywednesdaythursdayfridai 0 +steveerickylechrisjpkyl 0 +steveerickylechrisjpvasantha 0 +josejosekayjosejpvasantha 0 +josejosekayjosejp 0 +three 0 +macbinari 0 +contain 0 +parseabl 0 +other 0 +next 0 +waspost 0 +given 0 +macintosh 0 +process 0 +http 0 +address 0 +foraladdin 0 +window 0 +version 0 +armandonunez 0 +postscript 0 +print 0 +almost 0 +anylas 0 +printer 0 +want 0 +need 0 +applicationlik 0 +ghostview 0 +intro 0 +cell 0 +complex 0 +set 0 +charact 0 +simpl 0 +dictionari 0 +flavor 0 +inth 0 +public 0 +lab 0 +unix 0 +ishaskel 0 +systemsz 0 +yale 0 +haskel 0 +pleas 0 +help 0 +piec 0 +ofgof 0 +think 0 +gener 0 +interest 0 +know 0 +make 0 +itavail 0 +sourc 0 +manual 0 +onth 0 +project 0 +line 0 +csdepart 0 +enhance_assign 0 +cuinfo 0 +metrowerk 0 +homepag 0 +aladdin 0 +maker 0 +comment 0 +suggest 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..19e0d310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,32 @@ +cours 1 +announc 1 +home 0 +pagec 0 +structur 0 +interpretationof 0 +comput 0 +programscomput 0 +scienc 0 +departmentcornel 0 +universityspr 0 +staff 0 +info 0 +materi 0 +emac 0 +macmarlai 0 +demo 0 +section 0 +room 0 +chang 0 +prelim 0 +time 0 +place 0 +make 0 +grader 0 +happi 0 +gener 0 +exam 0 +extens 0 +date 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..5c098c2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,92 @@ +languag 1 +offic 1 +implement 1 +descript 1 +upson 1 +email 1 +advanc 1 +link 1 +modern 1 +program 1 +featur 1 +object 1 +class 1 +handout 1 +cornel 1 +phone 1 +hour 1 +evan 1 +inform 1 +refer 1 +environ 1 +document 1 +avail 1 +content 0 +descriptionhandoutsadministriviaweb 0 +java 0 +haskel 0 +dylan 0 +provid 0 +high 0 +level 0 +closur 0 +polymorph 0 +abstract 0 +data 0 +type 0 +garbag 0 +collect 0 +except 0 +continu 0 +thread 0 +synchron 0 +construct 0 +survei 0 +techniqu 0 +effici 0 +implementationof 0 +focu 0 +ofmodern 0 +function 0 +make 0 +connectionsto 0 +kind 0 +notabl 0 +orient 0 +separ 0 +page 0 +administrivia 0 +instructor 0 +greg 0 +morrisett 0 +admin 0 +assist 0 +linda 0 +competillo 0 +send 0 +appoint 0 +moran 0 +tuesdai 0 +thursdai 0 +pmweb 0 +mark 0 +leon 0 +resourc 0 +research 0 +project 0 +line 0 +standard 0 +postscript 0 +user 0 +guid 0 +base 0 +system 0 +librari 0 +tool 0 +indexdocument 0 +toolsa 0 +gentl 0 +introduct 0 +andrew 0 +cum 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..9733cefd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,24 @@ +postscript 1 +distribut 1 +homework 1 +examin 1 +annot 1 +practic 0 +systemspract 0 +system 0 +registr 0 +necessari 0 +student 0 +takingc 0 +gener 0 +informationcours 0 +overview 0 +logist 0 +read 0 +homeworkshomework 0 +amexaminationsmidterm 0 +final 0 +bibliographiesselect 0 +bibliographi 0 +prepar 0 +class 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..5aa2cead --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,166 @@ +poster 1 +split 1 +upson 1 +pleas 1 +instruct 1 +board 1 +well 1 +final 1 +project 1 +homework 1 +thorsten 1 +session 1 +present 1 +arriv 1 +minut 1 +problem 1 +judg 1 +report 1 +page 1 +import 1 +find 1 +start 1 +futur 1 +work 1 +check 1 +home 0 +pagehigh 0 +perform 0 +comput 0 +system 0 +eickenspr 0 +sessionthu 0 +tbdpleas 0 +sign 0 +outsid 0 +willdetermin 0 +order 0 +begin 0 +postersess 0 +late 0 +pick 0 +cindywilliam 0 +limit 0 +hold 0 +ithorizont 0 +hang 0 +corridor 0 +presentyour 0 +plu 0 +question 0 +give 0 +everyon 0 +asens 0 +attack 0 +solut 0 +contempl 0 +andth 0 +result 0 +gotten 0 +learn 0 +presentationswil 0 +messag 0 +across 0 +everi 0 +memberof 0 +group 0 +particip 0 +nativespeak 0 +difficulti 0 +taken 0 +consider 0 +info 0 +cours 0 +current 0 +mondai 0 +noon 0 +absolut 0 +must 0 +subdirectori 0 +willb 0 +contribut 0 +thelongest 0 +last 0 +remain 0 +server 0 +year 0 +tocom 0 +mani 0 +peopl 0 +search 0 +engin 0 +finalreport 0 +usual 0 +introduct 0 +aretri 0 +solv 0 +follow 0 +thorough 0 +discuss 0 +trade 0 +off 0 +part 0 +need 0 +explain 0 +chose 0 +thesolut 0 +option 0 +consid 0 +youreject 0 +webread 0 +convic 0 +bestsolut 0 +showcas 0 +us 0 +ampl 0 +experiment 0 +data 0 +goodexplan 0 +exactli 0 +measur 0 +know 0 +whatyou 0 +think 0 +shown 0 +left 0 +open 0 +projectsproject 0 +reportsproject 0 +proposalsiniti 0 +ideascours 0 +materialshomework 0 +cuc 0 +pagebefor 0 +machin 0 +might 0 +also 0 +sampl 0 +program 0 +introc 0 +casec 0 +technologyc 0 +cachesc 0 +netsc 0 +spc 0 +cyou 0 +inform 0 +paper 0 +parallel 0 +programmingin 0 +emdc 0 +sortingc 0 +spamc 0 +msgpassc 0 +mpic 0 +cachecohc 0 +locksc 0 +threadsc 0 +atmc 0 +netc 0 +scoreboardc 0 +tomasuloc 0 +predc 0 +superscalarc 0 +busesc 0 +pentiummaintain 0 +eicken 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..eaa018e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,74 @@ +april 2 +februari 2 +march 1 +vision 1 +motion 1 +cont 1 +lectur 1 +transform 1 +machin 1 +cours 1 +problem 1 +note 1 +januari 1 +regular 1 +comput 1 +stereo 1 +correl 1 +parametr 1 +guest 1 +track 1 +staff 0 +instructor 0 +ramin 0 +zabihteach 0 +assist 0 +justin 0 +millerclass 0 +time 0 +place 0 +phillip 0 +project 0 +suggestionsproblem 0 +set 0 +class 0 +scribe 0 +week 0 +simul 0 +anneal 0 +calculu 0 +variat 0 +maximum 0 +likelihood 0 +estim 0 +markov 0 +random 0 +field 0 +snake 0 +introduct 0 +mestim 0 +method 0 +censu 0 +geometri 0 +geometr 0 +segment 0 +edg 0 +detect 0 +continu 0 +model 0 +base 0 +hausdorff 0 +distanc 0 +eigenhausdorff 0 +face 0 +recognitionsect 0 +optic 0 +flow 0 +constraint 0 +equationoth 0 +sourc 0 +home 0 +page 0 +histori 0 +object 0 +recognit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..48868dd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,73 @@ +project 1 +home 1 +cours 1 +inform 1 +directori 1 +part 1 +speech 1 +corpu 1 +depart 1 +cornel 1 +code 1 +brill 1 +annot 1 +text 1 +wordnet 1 +contain 1 +pagecsintroduct 0 +natur 0 +languag 0 +understandingcomput 0 +scienc 0 +univers 0 +spring 0 +welcom 0 +materi 0 +academ 0 +integr 0 +pleas 0 +read 0 +announcementsher 0 +list 0 +resourc 0 +avail 0 +taggerbrown 0 +brown 0 +tag 0 +small 0 +withpart 0 +execut 0 +sure 0 +environ 0 +variabl 0 +wnsearchdir 0 +archiv 0 +dict 0 +final 0 +site 0 +descript 0 +ofth 0 +content 0 +penn 0 +treebank 0 +iicollect 0 +canus 0 +like 0 +talk 0 +francisabout 0 +access 0 +us 0 +databas 0 +recent 0 +paper 0 +computationallinguist 0 +repositori 0 +pointer 0 +variou 0 +system 0 +compon 0 +present 0 +schedulewhat 0 +turn 0 +pagesc 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..d29b672b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,36 @@ +class 1 +cours 1 +lab 1 +upson 1 +assign 1 +tuesdai 1 +wednesdai 1 +section 1 +prelim 1 +first 1 +home 0 +page 0 +info 0 +syllabu 0 +link 0 +announcementsroom 0 +updat 0 +unforseen 0 +circumst 0 +still 0 +unableto 0 +therefor 0 +follow 0 +room 0 +philip 0 +thursdai 0 +meet 0 +maclab 0 +usual 0 +close 0 +book 0 +exam 0 +cover 0 +materialcov 0 +need 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..6b1d507b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,38 @@ +lectur 3 +maxflow 2 +algorithm 1 +heap 1 +union 1 +find 1 +fall 1 +matroid 1 +binomi 1 +tree 1 +preflow 1 +push 1 +professor 0 +monika 0 +rauch 0 +henzingeremail 0 +cornel 0 +cours 0 +informationhomework 0 +solut 0 +graph 0 +explor 0 +greedi 0 +dijkstra 0 +bellman 0 +ford 0 +matrix 0 +closur 0 +fibonacci 0 +treap 0 +randomizedsearch 0 +mincut 0 +theorem 0 +edmond 0 +karp 0 +dinitz 0 +dynam 0 +implement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..e20cdac0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,212 @@ +juli 3 +assign 2 +class 2 +comput 2 +matlab 2 +upson 2 +grade 2 +final 2 +introduct 1 +cours 1 +june 1 +scientif 1 +sourc 1 +code 1 +problem 1 +linear 1 +offic 1 +deadlin 1 +interpol 1 +hall 1 +time 1 +work 1 +midterm 1 +exam 1 +system 1 +scmv 1 +file 1 +syllabu 1 +handout 1 +numer 1 +includ 1 +quadratur 1 +equat 1 +solv 1 +least 1 +squar 1 +vector 1 +niko 1 +cornel 1 +hour 1 +appoint 1 +ozan 1 +math 1 +student 1 +lab 1 +siblei 1 +martha 1 +hand 1 +done 1 +credit 1 +alon 1 +name 1 +chang 1 +total 1 +score 1 +newton 1 +cubic 1 +review 1 +classroom 1 +minim 1 +function 1 +euler 1 +method 1 +exampl 1 +unix 1 +zcat 1 +computationsumm 0 +inform 0 +setsan 0 +elementari 0 +analysi 0 +scientificcomput 0 +topic 0 +andnonlinear 0 +fit 0 +ordinarydifferenti 0 +environ 0 +us 0 +effici 0 +reliabl 0 +stabil 0 +stress 0 +informationstaff 0 +pitsiani 0 +instructor 0 +hafizogullari 0 +teach 0 +assist 0 +lecturesclass 0 +meet 0 +everi 0 +administrationlauri 0 +buck 0 +question 0 +concern 0 +record 0 +account 0 +addressedto 0 +administr 0 +prerequisitesc 0 +corequisit 0 +materialstext 0 +matrix 0 +approachus 0 +charl 0 +loan 0 +distribut 0 +softwar 0 +purchas 0 +eitherth 0 +macintosh 0 +version 0 +though 0 +labsthi 0 +design 0 +three 0 +renssela 0 +setsther 0 +lectur 0 +orfrom 0 +page 0 +extra 0 +avail 0 +rack 0 +outsid 0 +collect 0 +computingproblem 0 +return 0 +behandl 0 +begin 0 +duedat 0 +late 0 +accept 0 +worst 0 +gradefrom 0 +ignor 0 +partner 0 +printyour 0 +copi 0 +pair 0 +firstpag 0 +addit 0 +partnernam 0 +examsther 0 +dai 0 +list 0 +gradingyour 0 +follow 0 +best 0 +beassign 0 +accord 0 +rel 0 +rank 0 +base 0 +onyour 0 +calendar 0 +program 0 +error 0 +float 0 +point 0 +number 0 +registr 0 +polynomi 0 +vandermond 0 +piecewis 0 +hermit 0 +spline 0 +integr 0 +cote 0 +composit 0 +rule 0 +adapt 0 +drop 0 +matric 0 +oper 0 +given 0 +choleski 0 +find 0 +root 0 +variabl 0 +multivari 0 +initi 0 +valu 0 +backward 0 +rung 0 +kutta 0 +adam 0 +computingat 0 +rennselaerhal 0 +locat 0 +folder 0 +applic 0 +chapter 0 +plan 0 +stand 0 +otherthan 0 +on 0 +uncompress 0 +untar 0 +command 0 +highli 0 +recommend 0 +brows 0 +session 0 +need 0 +postscript 0 +viewer 0 +instal 0 +order 0 +randperm 0 +length 0 +set 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..6a7ecd2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,182 @@ +system 2 +class 2 +cours 2 +materi 2 +oper 1 +quizz 1 +program 1 +also 1 +final 1 +nawaaz 1 +subject 1 +memori 1 +file 1 +cover 1 +issu 1 +thursdai 1 +summer 1 +home 1 +page 1 +instructor 1 +indupraka 1 +kodukula 1 +cornel 1 +ahm 1 +motd 1 +descript 1 +schedul 1 +polici 1 +statement 1 +collabor 1 +offic 1 +hour 1 +send 1 +comment 1 +topic 1 +deadlock 1 +method 1 +comput 1 +discuss 1 +requir 1 +roughli 1 +follow 1 +particular 1 +start 1 +lldiscuss 1 +next 1 +virtual 1 +look 1 +book 1 +week 1 +first 1 +worth 1 +group 1 +prereq 0 +permiss 0 +praka 0 +teach 0 +assist 0 +archiv 0 +prerequsit 0 +outlin 0 +textbook 0 +grade 0 +anintroduct 0 +logic 0 +design 0 +emphasison 0 +multiprogram 0 +includ 0 +processsynchron 0 +manag 0 +input 0 +output 0 +inform 0 +share 0 +protect 0 +secur 0 +theimpact 0 +network 0 +distribut 0 +environ 0 +operatingsystem 0 +fast 0 +pace 0 +requiringconst 0 +attent 0 +prerequsitescomplet 0 +familiar 0 +assum 0 +inparticular 0 +knowledg 0 +architectur 0 +assembl 0 +programminglanguag 0 +structur 0 +theintroductori 0 +purpos 0 +thatwil 0 +remind 0 +audienc 0 +outlineth 0 +organ 0 +depend 0 +feedback 0 +chang 0 +theorder 0 +content 0 +section 0 +overview 0 +concurr 0 +synchron 0 +ensur 0 +mutualexclus 0 +detect 0 +prevent 0 +algorithm 0 +multiprocessor 0 +well 0 +memorymanag 0 +variou 0 +usedto 0 +implement 0 +segment 0 +evolut 0 +thetradit 0 +micro 0 +kernel 0 +timepermit 0 +lectur 0 +advanc 0 +multithread 0 +serverless 0 +textbooksth 0 +princip 0 +text 0 +conceptsbook 0 +abraham 0 +silberschatz 0 +peter 0 +galvin 0 +distributeclass 0 +note 0 +complet 0 +noteswil 0 +avail 0 +world 0 +wide 0 +pageat 0 +meet 0 +mondaythru 0 +second 0 +addit 0 +weekli 0 +assign 0 +thesewil 0 +hand 0 +thursdayat 0 +gradingeach 0 +homework 0 +carri 0 +weightag 0 +combinedweightag 0 +twomidterm 0 +surpris 0 +todetermin 0 +understand 0 +collaborationat 0 +peopl 0 +form 0 +eachhomework 0 +need 0 +submit 0 +copi 0 +thehomework 0 +close 0 +closednot 0 +mondai 0 +tuesdai 0 +wednesdai 0 +upson 0 +maintain 0 +induprakaskodukula 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..7d95495d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,41 @@ +lectur 3 +septemb 3 +note 3 +octob 2 +novemb 2 +solut 2 +decemb 2 +homework 2 +cornel 1 +offic 1 +hour 1 +appoint 1 +csc 0 +advanc 0 +program 0 +languagesfal 0 +upson 0 +instructor 0 +henzingerupson 0 +class 0 +teach 0 +assist 0 +neal 0 +glewupson 0 +glew 0 +handoutshandout 0 +cours 0 +informationhandout 0 +get 0 +start 0 +mlhandout 0 +meta 0 +lambdahomeworkshomework 0 +grieshomework 0 +notesraw 0 +scribe 0 +noteslectur 0 +introduct 0 +mllectur 0 +midterm 0 +grieslectur 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..b55a9ce4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,29 @@ +fall 2 +note 1 +upson 1 +offic 1 +hour 1 +ravi 1 +advanc 0 +program 0 +languag 0 +faculti 0 +prof 0 +robert 0 +constabl 0 +mondai 0 +teach 0 +assist 0 +kumar 0 +thur 0 +assign 0 +nuprl 0 +classic 0 +comment 0 +question 0 +suggest 0 +page 0 +pleas 0 +mail 0 +pavel 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..91a43807 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,14 @@ +cours 1 +addit 1 +inform 1 +coursesc 0 +fall 0 +spring 0 +maintain 0 +individualfaculti 0 +member 0 +consult 0 +class 0 +page 0 +contactgloria 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..a0cdd087 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,33 @@ +almstrum 1 +utexa 1 +analysi 1 +program 1 +instructor 1 +vicki 1 +linyuan 1 +fall 0 +yang 0 +syllabu 0 +announc 0 +homework 0 +assign 0 +handout 0 +interest 0 +tutori 0 +new 0 +class 0 +homepag 0 +last 0 +updat 0 +page 0 +prepar 0 +suggest 0 +comment 0 +welcom 0 +click 0 +send 0 +mail 0 +depart 0 +comput 0 +scienc 0 +austin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..c0ffee6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,114 @@ +formal 1 +system 1 +april 1 +reason 1 +tool 1 +present 1 +method 1 +examin 1 +logic 1 +otter 1 +foundat 1 +mani 1 +program 1 +number 1 +order 1 +theori 1 +upon 1 +version 1 +page 1 +final 1 +spring 0 +ofmathemat 0 +taylor 0 +cours 0 +blurb 0 +approach 0 +theobject 0 +specifi 0 +comput 0 +includ 0 +formalizationof 0 +world 0 +interact 0 +creationof 0 +numer 0 +systemsfor 0 +mechan 0 +formalmethod 0 +support 0 +differ 0 +exampl 0 +suchsystem 0 +pair 0 +primit 0 +recurs 0 +arithmet 0 +boyer 0 +moor 0 +prover 0 +first 0 +nelson 0 +higher 0 +imp 0 +equat 0 +mizar 0 +quaif 0 +type 0 +nuprl 0 +lego 0 +coqstud 0 +choos 0 +help 0 +instructor 0 +ortool 0 +grade 0 +base 0 +aboutthes 0 +projecthtml 0 +theqe 0 +manifestoplain 0 +text 0 +qedmanifestobowen 0 +backup 0 +copi 0 +chief 0 +assign 0 +select 0 +bowen 0 +report 0 +class 0 +oral 0 +either 0 +good 0 +freeli 0 +avail 0 +implement 0 +consult 0 +make 0 +choic 0 +test 0 +hope 0 +guest 0 +localform 0 +commun 0 +tent 0 +schedul 0 +rick 0 +tannei 0 +continu 0 +trevor 0 +hick 0 +ruben 0 +gamboa 0 +squar 0 +root 0 +samuel 0 +guyer 0 +circal 0 +process 0 +algebra 0 +sawada 0 +russel 0 +turpin 0 +galoi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..548c97f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,151 @@ +project 2 +comput 1 +memori 1 +cours 1 +system 1 +design 1 +pipelin 1 +midterm 1 +advanc 1 +architectur 1 +quantit 1 +analysi 1 +major 1 +parallel 1 +network 1 +hour 1 +hennessi 1 +edit 1 +stedit 1 +research 1 +work 1 +pair 1 +tech 1 +hazard 1 +branch 1 +predictionch 1 +revieww 1 +spring 1 +architecturethi 0 +focus 0 +techniqu 0 +evaluationof 0 +modern 0 +select 0 +appropri 0 +benchmarksto 0 +reveal 0 +compar 0 +perform 0 +altern 0 +choic 0 +insystem 0 +emphasi 0 +compon 0 +subsystem 0 +highperform 0 +instruct 0 +level 0 +memoryhierarchi 0 +input 0 +output 0 +orient 0 +interconnect 0 +studentswil 0 +undertak 0 +oftheir 0 +choos 0 +administr 0 +informationuniqu 0 +number 0 +meet 0 +place 0 +instructor 0 +mikedahlinoffic 0 +appoint 0 +tbdtaoffic 0 +tbdreadingstextbook 0 +patteson 0 +computerarchitectur 0 +approach 0 +second 0 +note 0 +significantli 0 +differ 0 +recommend 0 +attempt 0 +textbook 0 +errata 0 +sheetfor 0 +pattersonin 0 +addit 0 +read 0 +current 0 +paper 0 +variou 0 +aspect 0 +currentcomput 0 +watch 0 +space 0 +pointer 0 +readinglist 0 +grade 0 +class 0 +particip 0 +homework 0 +exam 0 +scheduleweekdatetopicreadingduejan 0 +intro 0 +admin 0 +review 0 +perf 0 +cost 0 +amdahl 0 +trendsch 0 +cach 0 +isa 0 +mlkholidayf 0 +static 0 +proposalfeb 0 +scoreboard 0 +tomasulu 0 +speculationch 0 +dynam 0 +predict 0 +limit 0 +vector 0 +processorsch 0 +dfeb 0 +hierarchych 0 +surveyfeb 0 +dram 0 +banksf 0 +breakm 0 +breakmar 0 +metric 0 +queu 0 +buss 0 +disk 0 +raidch 0 +tertiari 0 +networksf 0 +networksch 0 +checkpointapr 0 +architecturesf 0 +mppsch 0 +mpp 0 +preseantationsm 0 +presentationsfri 0 +last 0 +classesm 0 +written 0 +reportaddit 0 +resourcescours 0 +page 0 +product 0 +confer 0 +bibliographi 0 +reportsyahoo 0 +businessand 0 +economi 0 +compani 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..53226c93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,71 @@ +oper 1 +class 1 +system 1 +protocol 1 +address 1 +gener 1 +internet 1 +provid 1 +also 1 +paper 1 +project 1 +list 1 +systemsuniqu 0 +recent 0 +explos 0 +interest 0 +world 0 +wide 0 +resultedin 0 +evolv 0 +us 0 +thetradit 0 +concern 0 +interprocess 0 +commun 0 +resourc 0 +alloc 0 +secur 0 +contextof 0 +goal 0 +understandingof 0 +current 0 +state 0 +addressproblem 0 +must 0 +solv 0 +matur 0 +purpos 0 +operatingsystem 0 +hypothesi 0 +behind 0 +design 0 +mani 0 +theissu 0 +context 0 +addressedin 0 +tradit 0 +area 0 +occasionallyread 0 +relat 0 +bear 0 +understandingcurr 0 +problem 0 +reportspoint 0 +research 0 +refer 0 +inform 0 +syllabu 0 +read 0 +schedul 0 +longer 0 +less 0 +organ 0 +rosterhandout 0 +verif 0 +sslprotocol 0 +proofsketch 0 +guidelin 0 +final 0 +talk 0 +report 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..ffb3eb6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,217 @@ +handout 4 +program 3 +exam 3 +solut 3 +date 2 +class 2 +homework 2 +avail 2 +content 2 +back 2 +last 2 +object 2 +pick 2 +lectur 2 +part 2 +utexa 2 +exampl 2 +fantasm 2 +discuss 2 +offic 2 +start 2 +test 2 +earli 2 +late 2 +problem 2 +cours 2 +hour 2 +p_global 2 +noon 1 +session 1 +drop 1 +mondai 1 +wednesdai 1 +practic 1 +note 1 +page 1 +final 1 +info 1 +regist 1 +electron 1 +section 1 +check 1 +place 1 +pass 1 +bit 1 +also 1 +import 1 +tue 1 +meet 1 +time 1 +thursdai 1 +us 1 +academ 1 +chang 1 +bonu 1 +slide 1 +turn 1 +electronc 1 +user 1 +manual 1 +disk 1 +paramet 1 +macsbug 1 +electoron 1 +sourc 1 +output 1 +fall 1 +comput 1 +organ 1 +updat 1 +professor 1 +yurkanan 1 +dragon 1 +version 1 +oper 1 +email 1 +yoonsuck 1 +choe 1 +yschoe 1 +edum 1 +pleas 1 +holidai 1 +period 1 +withdraw 1 +typo 1 +remov 1 +rightmost 1 +make 1 +fridai 1 +saturdai 1 +announc 1 +door 1 +procudur 1 +thur 1 +assign 1 +front 1 +system 1 +document 1 +folder 1 +powermac 1 +quadra 1 +mac 1 +instruct 1 +compil 1 +call 1 +model 1 +newsgroup 1 +pascal 1 +constantli 0 +construct 0 +onmon 0 +titl 0 +prerequisit 0 +grade 0 +least 0 +chri 0 +edmondson 0 +new 0 +post 0 +extra 0 +visit 0 +syllabu 0 +locat 0 +attend 0 +cynthia 0 +deepa 0 +ramani 0 +dparam 0 +eduw 0 +zhang 0 +gzhang 0 +eduf 0 +conduct 0 +calendar 0 +labor 0 +refund 0 +rare 0 +extenu 0 +circumst 0 +automat 0 +begin 0 +penalti 0 +reason 0 +univers 0 +registr 0 +fail 0 +deadlin 0 +appli 0 +graduat 0 +thanksgiv 0 +appeal 0 +schedul 0 +glanc 0 +correct 0 +night 0 +boxin 0 +prof 0 +letter 0 +student 0 +overview 0 +descript 0 +topic 0 +singl 0 +ascii 0 +code 0 +chart 0 +home 0 +work 0 +endia 0 +memori 0 +hierarchi 0 +submit 0 +stack 0 +function 0 +requir 0 +comment 0 +real 0 +proc 0 +func 0 +might 0 +risc 0 +architectur 0 +includ 0 +valu 0 +practiv 0 +obsolet 0 +spec 0 +turnin 0 +procedur 0 +gener 0 +interfac 0 +charact 0 +orient 0 +cheat 0 +polici 0 +questionair 0 +must 0 +offici 0 +except 0 +go 0 +held 0 +free 0 +toward 0 +next 0 +thank 0 +brett 0 +jame 0 +data 0 +subroutine_fil 0 +exception_fil 0 +avali 0 +resourc 0 +studi 0 +guid 0 +maintain 0 +austin 0 +utc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..302e5635 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,206 @@ +pascal 2 +porter 2 +final 2 +review 2 +question 2 +link 1 +class 1 +version 1 +program 1 +new 1 +lang 1 +week 1 +offic 1 +exam 1 +page 1 +import 1 +avail 1 +room 1 +post 1 +assign 1 +utexa 1 +discuss 1 +midterm 1 +turbo 1 +us 1 +topic 1 +next 1 +note 1 +dwip 1 +lectur 1 +html 1 +postscript 1 +welch 1 +solut 1 +test 1 +newsgroup 1 +right 1 +semest 1 +inform 1 +announc 1 +take 1 +home 1 +last 1 +howev 1 +relat 1 +good 1 +special 1 +cover 1 +time 1 +need 1 +help 1 +bruce 1 +decemb 1 +click 1 +also 1 +addendum 1 +tutori 1 +session 1 +studi 1 +guid 1 +ansi 1 +articl 1 +homepag 0 +warn 0 +construct 0 +becom 0 +activ 0 +progress 0 +andther 0 +relev 0 +regard 0 +todai 0 +unabl 0 +maintain 0 +forthes 0 +coupl 0 +dai 0 +put 0 +follow 0 +luckfor 0 +held 0 +painter 0 +hall 0 +exact 0 +locat 0 +dependon 0 +availib 0 +door 0 +therewil 0 +someon 0 +moreov 0 +almost 0 +total 0 +coverag 0 +uptoth 0 +feel 0 +free 0 +come 0 +glad 0 +luck 0 +resolutio 0 +complex 0 +theori 0 +nimar 0 +arora 0 +parallel 0 +process 0 +banerje 0 +boolean 0 +circuit 0 +porterquest 0 +sheet 0 +rotat 0 +bit 0 +disregard 0 +somewhat 0 +beyond 0 +scope 0 +slide 0 +present 0 +summar 0 +thecont 0 +whole 0 +reserv 0 +desk 0 +atugl 0 +hope 0 +webpag 0 +soon 0 +experienc 0 +technic 0 +difficulti 0 +caus 0 +length 0 +file 0 +schedul 0 +pleas 0 +check 0 +ad 0 +sostai 0 +tune 0 +download 0 +prolog 0 +comput 0 +scienc 0 +iinstructorbruc 0 +mondai 0 +email 0 +tasoffic 0 +hourslab 0 +section 0 +schedulec 0 +thursdai 0 +uniqu 0 +number 0 +cours 0 +descriptionclass 0 +scheduleclass 0 +includ 0 +articlesclass 0 +newsgroupprogram 0 +assignmentsprogram 0 +pascaltutori 0 +text 0 +format 0 +faqyou 0 +sampl 0 +programm 0 +gener 0 +languag 0 +materi 0 +frequent 0 +ask 0 +zipe 0 +concept 0 +structur 0 +base 0 +newgroup 0 +might 0 +interest 0 +comp 0 +isocomp 0 +maccomp 0 +borlandcomp 0 +misccomp 0 +delphi 0 +miscfj 0 +rememb 0 +access 0 +dell 0 +serverto 0 +mail 0 +prefer 0 +item 0 +option 0 +menu 0 +look 0 +usual 0 +lead 0 +importantstuff 0 +descript 0 +send 0 +comment 0 +critic 0 +suggest 0 +addit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..ffadbdc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,70 @@ +assign 2 +mesa 1 +cours 1 +librari 1 +exampl 1 +graphic 1 +opengl 1 +page 1 +updat 1 +comput 1 +gener 1 +inform 1 +us 1 +utc 1 +book 1 +second 1 +requir 1 +student 1 +code 1 +spring 0 +cscomput 0 +graphicsspr 0 +instructor 0 +donald 0 +fussel 0 +descript 0 +syllabu 0 +year 0 +provid 0 +anopengl 0 +like 0 +platform 0 +hasbeen 0 +instal 0 +public 0 +workstat 0 +sciencesdepart 0 +instruct 0 +sampl 0 +makefil 0 +machin 0 +specif 0 +home 0 +center 0 +refer 0 +manual 0 +ousterhout 0 +welch 0 +turn 0 +note 0 +option 0 +exam 0 +oneor 0 +show 0 +examwil 0 +submit 0 +wish 0 +bothmai 0 +higher 0 +score 0 +curv 0 +willcount 0 +xlib 0 +driver 0 +billthecat 0 +copi 0 +file 0 +directori 0 +contain 0 +slate 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..392a58b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,67 @@ +graphic 1 +mesa 1 +exampl 1 +assign 1 +cours 1 +librari 1 +inform 1 +machin 1 +opengl 1 +page 1 +book 1 +comput 1 +gener 1 +us 1 +utc 1 +welch 1 +code 1 +reinstal 1 +walker 1 +fall 0 +gcomput 0 +graphicsfal 0 +instructor 0 +donald 0 +fussel 0 +descript 0 +syllabu 0 +year 0 +provid 0 +anopengl 0 +like 0 +platform 0 +hasbeen 0 +instal 0 +public 0 +workstat 0 +sciencesdepart 0 +instruct 0 +sampl 0 +makefil 0 +specif 0 +home 0 +center 0 +refer 0 +manual 0 +ousterhout 0 +program 0 +turn 0 +xlib 0 +driver 0 +billthecat 0 +copi 0 +file 0 +directori 0 +contain 0 +slate 0 +new 0 +turnin 0 +work 0 +libtcl 0 +libtk 0 +tclsh 0 +wish 0 +compil 0 +demo 0 +repair 0 +sourc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..616cb585 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,45 @@ +email 1 +utexa 1 +french 1 +offic 1 +hour 1 +taylor 1 +hqliu 1 +homework 1 +problem 1 +last 1 +appear 1 +cryptographi 0 +professor 0 +david 0 +zuckerman 0 +huiqun 0 +station 0 +hall 0 +basement 0 +syllabu 0 +mathemat 0 +background 0 +textbook 0 +ciphertext 0 +notic 0 +answer 0 +abl 0 +recogn 0 +word 0 +canada 0 +frequenc 0 +common 0 +letter 0 +chang 0 +drastic 0 +english 0 +howev 0 +digram 0 +like 0 +page 0 +modifi 0 +septemb 0 +comment 0 +welcom 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..2b246d7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,314 @@ +problem 2 +class 2 +distribut 2 +protocol 2 +solut 2 +comput 1 +requir 1 +paper 1 +system 1 +final 1 +prove 1 +assign 1 +rajeev 1 +exam 1 +global 1 +topic 1 +show 1 +snapshot 1 +point 1 +lorenzo 1 +offic 1 +hour 1 +textbook 1 +suggest 1 +joshi 1 +mondai 1 +cover 1 +time 1 +us 1 +state 1 +detect 1 +messag 1 +file 1 +discuss 1 +give 1 +homework 1 +page 1 +proof 1 +deriv 1 +link 1 +cours 1 +set 1 +midterm 1 +newsgroup 1 +utexa 1 +hall 1 +phone 1 +expect 1 +sourc 1 +refer 1 +wednesdai 1 +second 1 +design 1 +distributedsystem 1 +includ 1 +clock 1 +manag 1 +agreement 1 +byzantin 1 +group 1 +program 1 +techniqu 1 +secur 1 +integr 1 +gener 1 +depend 1 +present 1 +allow 1 +consist 1 +written 1 +collabor 1 +student 1 +encourag 1 +take 1 +survei 1 +ofth 1 +last 1 +colleagu 1 +improv 1 +commun 1 +someth 1 +solv 1 +algorithm 1 +assum 1 +fifo 1 +asynchron 1 +predic 1 +number 1 +deadlock 1 +would 1 +cost 1 +postscript 1 +send 1 +ispr 0 +instructor 0 +alvisiteach 0 +assist 0 +joshicont 0 +locat 0 +mechan 0 +content 0 +grade 0 +inform 0 +pertain 0 +instruct 0 +stafflorenzo 0 +alvisi 0 +taylor 0 +tuesdai 0 +thursdai 0 +meet 0 +arrang 0 +appoint 0 +mechanicsi 0 +materi 0 +requiredtextbook 0 +remaind 0 +come 0 +given 0 +classat 0 +appropri 0 +lectur 0 +robert 0 +moor 0 +isutexa 0 +edit 0 +mullend 0 +editor 0 +acmpress 0 +addison 0 +weslei 0 +publish 0 +compani 0 +read 0 +contentc 0 +abstract 0 +tobe 0 +build 0 +tomorrow 0 +cut 0 +logic 0 +vector 0 +causal 0 +messagedeliveri 0 +properti 0 +log 0 +checkpoint 0 +replic 0 +machin 0 +approach 0 +primari 0 +backupapproach 0 +order 0 +multicast 0 +applic 0 +cach 0 +disconnect 0 +oper 0 +servic 0 +synchron 0 +encrypt 0 +authent 0 +principl 0 +thepresent 0 +case 0 +studi 0 +exemplifi 0 +principleshav 0 +implement 0 +real 0 +interest 0 +meor 0 +size 0 +apresent 0 +share 0 +memori 0 +object 0 +kernel 0 +support 0 +weak 0 +replica 0 +electron 0 +commerc 0 +wide 0 +area 0 +networksgradingther 0 +begrad 0 +demonstr 0 +credibl 0 +effort 0 +onbehalf 0 +author 0 +whether 0 +right 0 +wrong 0 +willrec 0 +better 0 +three 0 +ispermit 0 +acollabor 0 +singl 0 +submit 0 +forgrad 0 +name 0 +collaborationswil 0 +consid 0 +violat 0 +academ 0 +home 0 +examin 0 +nocollabor 0 +howev 0 +towrit 0 +issuesthat 0 +list 0 +bedistribut 0 +start 0 +henc 0 +week 0 +tocomplet 0 +also 0 +team 0 +prepar 0 +twolectur 0 +previous 0 +choosethi 0 +option 0 +write 0 +asingl 0 +warmli 0 +toconsid 0 +volunt 0 +excellentopportun 0 +skill 0 +setsin 0 +subsequ 0 +shouldconform 0 +follow 0 +guidelin 0 +synonym 0 +precis 0 +isrequir 0 +ask 0 +imposs 0 +thatmak 0 +clear 0 +cannot 0 +matter 0 +algorithmi 0 +insuffici 0 +particular 0 +work 0 +develop 0 +must 0 +accompani 0 +ofcorrect 0 +unless 0 +explicitli 0 +told 0 +otherwis 0 +thetextbook 0 +channel 0 +asnapshot 0 +onth 0 +assumpt 0 +correct 0 +theprotocol 0 +produc 0 +atmost 0 +note 0 +book 0 +contain 0 +mattern 0 +thatcontain 0 +urg 0 +resist 0 +thetempt 0 +visit 0 +librari 0 +agener 0 +stabl 0 +moreeffici 0 +specif 0 +often 0 +conceptu 0 +simpler 0 +effici 0 +term 0 +exchang 0 +base 0 +special 0 +ideal 0 +need 0 +central 0 +monitorprocess 0 +process 0 +monitor 0 +basedsnapshot 0 +nowonlin 0 +filedescrib 0 +examth 0 +constitut 0 +fridaymai 0 +thepostscript 0 +describ 0 +question 0 +feel 0 +freeto 0 +email 0 +idea 0 +pleas 0 +yoursuggest 0 +edurajeev 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..521d65fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,57 @@ +class 1 +fall 1 +jacob 1 +kornerup 1 +note 1 +page 1 +taught 1 +avail 1 +read 1 +midterm 1 +reflect 0 +current 0 +version 0 +differ 0 +content 0 +scope 0 +welcom 0 +homepag 0 +austin 0 +bywil 0 +adam 0 +practic 0 +informationabout 0 +cours 0 +look 0 +syllabu 0 +linea 0 +technic 0 +compil 0 +program 0 +turn 0 +inhomework 0 +electron 0 +homework 0 +solut 0 +time 0 +crude 0 +interfac 0 +newsgrouputexa 0 +correspond 0 +takesplac 0 +exampl 0 +textbook 0 +pascalprogramm 0 +organ 0 +chapter 0 +link 0 +home 0 +requir 0 +overhead 0 +viewinginform 0 +projecthow 0 +find 0 +offic 0 +studi 0 +examand 0 +answer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..17db32aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,198 @@ +java 3 +cours 2 +comp 2 +lang 2 +program 2 +addison 2 +weslei 2 +object 1 +orient 1 +newsgroup 1 +librari 1 +design 1 +sourc 1 +utexa 1 +inform 1 +class 1 +manual 1 +code 1 +student 1 +follow 1 +relat 1 +stroustrup 1 +faq 1 +standard 1 +applet 1 +lavend 1 +eduoffic 1 +hour 1 +gokul 1 +templat 1 +opportun 1 +solut 1 +problem 1 +us 1 +text 1 +avail 1 +style 1 +draft 1 +setup 1 +link 1 +site 1 +postscript 1 +home 1 +compil 1 +libg 1 +server 1 +archiv 1 +note 1 +descriptionc 0 +programminglast 0 +updat 0 +professor 0 +greg 0 +appt 0 +rajaram 0 +mondai 0 +wednesdai 0 +station 0 +lavendercours 0 +infocours 0 +syllabusannouncementslectur 0 +noteshomework 0 +solutionsprogram 0 +assignmentsgnu 0 +manualsstandard 0 +codesocket 0 +manualdescript 0 +intend 0 +alreadi 0 +anintroductori 0 +offer 0 +introduct 0 +give 0 +think 0 +comput 0 +manner 0 +captur 0 +reusablepattern 0 +construct 0 +polymorph 0 +typehierarchi 0 +write 0 +profici 0 +professionallyus 0 +challeng 0 +coop 0 +bookstor 0 +horstmann 0 +master 0 +john 0 +wilei 0 +associ 0 +materiali 0 +drawn 0 +lectur 0 +materi 0 +languag 0 +edit 0 +elli 0 +annot 0 +refer 0 +evolut 0 +cargil 0 +cline 0 +lomow 0 +coplien 0 +advanc 0 +idiom 0 +plauger 0 +prentic 0 +hall 0 +gamma 0 +helm 0 +johnson 0 +vlissid 0 +pattern 0 +element 0 +reusabl 0 +softwar 0 +forum 0 +open 0 +discuss 0 +announcementsabout 0 +strongli 0 +encourag 0 +particip 0 +linediscuss 0 +fellow 0 +classmat 0 +lavendery 0 +also 0 +interest 0 +usenet 0 +helpjava 0 +hotjava 0 +advocaci 0 +misc 0 +programm 0 +secur 0 +tech 0 +javascript 0 +internet 0 +oopth 0 +date 0 +pleas 0 +know 0 +dead 0 +ansi 0 +page 0 +lab 0 +renssela 0 +polytechn 0 +institut 0 +product 0 +info 0 +objectspac 0 +libstdc 0 +mitgnu 0 +cygnusgnu 0 +document 0 +doug 0 +pagec 0 +mirror 0 +list 0 +ftpobject 0 +system 0 +developmentindex 0 +librariesth 0 +virtual 0 +libraryindex 0 +sourcesth 0 +talig 0 +frameworkjava 0 +javasoft 0 +gamelan 0 +huge 0 +registri 0 +digit 0 +espresso 0 +good 0 +summari 0 +current 0 +centr 0 +new 0 +event 0 +jar 0 +rate 0 +denni 0 +kafura 0 +virginia 0 +techdoug 0 +schmidt 0 +irvin 0 +washington 0 +universitydoug 0 +sunyintroductori 0 +univers 0 +groningen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..d8b0f8b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,56 @@ +program 1 +parallel 1 +languag 1 +assign 1 +tuesdai 1 +thursdai 1 +calvin 1 +offic 1 +utexa 1 +exampl 1 +commun 1 +manual 1 +compilerscst 0 +compilersfal 0 +lectur 0 +instructor 0 +taylor 0 +phone 0 +email 0 +hour 0 +handout 0 +gener 0 +inform 0 +case 0 +tera 0 +comput 0 +copyright 0 +posix 0 +thread 0 +skeleton 0 +code 0 +tutori 0 +hello 0 +world 0 +ironman 0 +interfac 0 +onlin 0 +postscript 0 +logp 0 +paper 0 +time 0 +spent 0 +messag 0 +pass 0 +share 0 +memori 0 +foundat 0 +practic 0 +partit 0 +dynam 0 +adapt 0 +grid 0 +hierarchieslast 0 +modifi 0 +decemb 0 +linlin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..ce81ce6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,16 @@ +inform 1 +send 1 +mail 1 +home 0 +page 0 +csintroduct 0 +oper 0 +system 0 +class 0 +handout 0 +assign 0 +read 0 +project 0 +group 0 +prof 0 +newsgroup 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..5d8171f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,16 @@ +distribut 1 +read 0 +list 0 +fall 0 +thot 0 +topic 0 +systemsfil 0 +systemstopolog 0 +systemselectron 0 +commenrcefailur 0 +detectorsdistribut 0 +objectsconsistencysecuregroup 0 +communicationlanguag 0 +system 0 +dsmmobil 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..3aaf04a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,13 @@ +mine 1 +monitor 1 +databas 0 +databasesprof 0 +daniel 0 +mirankernew 0 +seminarschedul 0 +term 0 +project 0 +materi 0 +overviewtentativeread 0 +list 0 +homeworkproject 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..233151d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,25 @@ +cours 1 +code 1 +introduct 0 +artifici 0 +intellig 0 +instructor 0 +raymond 0 +mooneytim 0 +placespr 0 +tuth 0 +taylor 0 +hall 0 +informationclick 0 +inform 0 +sheetand 0 +syllabu 0 +last 0 +year 0 +updat 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 +assign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..cfcf89f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,35 @@ +homework 1 +cours 1 +test 1 +lisp 1 +program 1 +code 1 +symbol 0 +instructor 0 +raymond 0 +mooneyteach 0 +assist 0 +sowmya 0 +ramachandrantim 0 +placetu 0 +informationclick 0 +inform 0 +sheet 0 +syllabu 0 +informationon 0 +alsout 0 +allegro 0 +info 0 +page 0 +textparadigm 0 +artifici 0 +intellig 0 +case 0 +studi 0 +common 0 +lispassignmentsse 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..fbe3107e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,52 @@ +learn 2 +homework 1 +cours 1 +machin 1 +base 1 +code 1 +project 1 +instructor 0 +raymond 0 +mooneytim 0 +placetu 0 +informationclick 0 +inform 0 +sheetand 0 +syllabu 0 +textmachinelearninglectur 0 +slide 0 +introduct 0 +concept 0 +gener 0 +order 0 +decis 0 +tree 0 +experiment 0 +evalu 0 +comput 0 +theori 0 +rule 0 +induct 0 +logic 0 +program 0 +neural 0 +network 0 +cluster 0 +unsupervis 0 +bayesian 0 +instanc 0 +explan 0 +learningassignmentsse 0 +file 0 +moonei 0 +depart 0 +networkfor 0 +trace 0 +final 0 +suggest 0 +spring 0 +paper 0 +format 0 +outlin 0 +talk 0 +version 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..82376bd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,100 @@ +program 1 +exam 1 +languag 1 +guid 1 +comput 1 +scheme 1 +foundat 1 +sciencec 1 +section 1 +cours 1 +treesassign 1 +assign 1 +studi 1 +introduct 0 +scienc 0 +intend 0 +major 0 +atleast 0 +semest 0 +high 0 +school 0 +number 0 +willb 0 +chang 0 +next 0 +year 0 +never 0 +taken 0 +programmingcours 0 +take 0 +porter 0 +instead 0 +strong 0 +math 0 +background 0 +least 0 +precalculu 0 +requir 0 +dialect 0 +lisp 0 +theschem 0 +implement 0 +call 0 +gambit 0 +run 0 +macintoshcomput 0 +move 0 +faster 0 +previou 0 +coursesand 0 +emphas 0 +concept 0 +syntax 0 +work 0 +hard 0 +hopefulli 0 +learninga 0 +syllabu 0 +directori 0 +softwar 0 +tutorcopi 0 +pcassign 0 +machin 0 +simulationassign 0 +surf 0 +webassign 0 +basic 0 +schemeassign 0 +plai 0 +peano 0 +gamblingassign 0 +turtl 0 +graphicsassign 0 +snow 0 +list 0 +manipulationstudi 0 +vocabulari 0 +plot 0 +thickensassign 0 +treasur 0 +huntassign 0 +symbol 0 +algebraassign 0 +data 0 +abstract 0 +matricesstudi 0 +draw 0 +express 0 +unparsingassign 0 +translationstudi 0 +final 0 +thur 0 +gordon 0 +novak 0 +assignmentsprogram 0 +file 0 +descriptionsprogram 0 +submiss 0 +gradingmidterm 0 +guidefin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..f4cc6130 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,41 @@ +cours 1 +program 1 +compilersc 1 +compil 1 +student 1 +powerpc 1 +summer 1 +file 1 +studi 1 +cover 0 +design 0 +construct 0 +programminglanguag 0 +write 0 +pascal 0 +codei 0 +gener 0 +processor 0 +server 0 +incorpor 0 +chip 0 +heavi 0 +workload 0 +especi 0 +plan 0 +take 0 +expect 0 +dedicatetheir 0 +live 0 +five 0 +week 0 +syllabusprogram 0 +assignmentsprogram 0 +descript 0 +directori 0 +submiss 0 +gradingmidterm 0 +guidefin 0 +exam 0 +guidegordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..d97631f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,53 @@ +intellig 1 +studi 1 +artifici 1 +comput 1 +stori 1 +intelligencec 0 +intelligenceartifici 0 +defin 0 +thecomput 0 +requir 0 +behavior 0 +attempt 0 +todupl 0 +us 0 +connectspercept 0 +environ 0 +action 0 +appropri 0 +achiev 0 +thegoal 0 +actor 0 +cours 0 +survei 0 +major 0 +topic 0 +includ 0 +search 0 +logic 0 +andknowledg 0 +represent 0 +natur 0 +languag 0 +process 0 +withbrief 0 +coverag 0 +brain 0 +machin 0 +vision 0 +syllabusprogram 0 +assignmentsprogram 0 +file 0 +descriptionsmidterm 0 +guidefin 0 +exam 0 +guidepred 0 +calculu 0 +problemssolut 0 +select 0 +problemsnot 0 +bibliographi 0 +human 0 +braingordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..104cb6e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,52 @@ +program 1 +automat 1 +lectur 1 +semest 1 +programmingc 0 +programmingautomat 0 +gener 0 +execut 0 +programsfrom 0 +specif 0 +higher 0 +level 0 +ordinari 0 +languag 0 +cours 0 +consist 0 +first 0 +third 0 +homework 0 +problem 0 +assign 0 +given 0 +illustrateth 0 +materi 0 +long 0 +requirelearn 0 +sever 0 +kind 0 +system 0 +latter 0 +partof 0 +cover 0 +read 0 +research 0 +literatur 0 +student 0 +expect 0 +present 0 +paper 0 +class 0 +syllabusbibliographyassign 0 +compil 0 +optim 0 +done 0 +handpattern 0 +matchingobject 0 +orient 0 +programmingintroduct 0 +glispview 0 +graphic 0 +programminggordon 0 +novak 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..a8a45f9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,675 @@ +languag 2 +system 2 +program 2 +comput 2 +data 2 +parallel 2 +level 2 +algorithm 2 +network 2 +fault 2 +cilk 2 +toler 2 +logic 2 +compil 2 +softwar 2 +us 2 +problem 2 +adapt 1 +special 1 +design 1 +librari 1 +present 1 +approach 1 +describ 1 +implement 1 +learn 1 +cours 1 +robot 1 +natur 1 +applic 1 +user 1 +call 1 +file 1 +allow 1 +machin 1 +provid 1 +represent 1 +model 1 +interfac 1 +effici 1 +queri 1 +graduat 1 +task 1 +evolut 1 +neural 1 +open 1 +process 1 +reus 1 +high 1 +cooper 1 +featur 1 +failur 1 +talk 1 +well 1 +avail 1 +wide 1 +area 1 +abstract 1 +mathemat 1 +state 1 +result 1 +alamo 1 +sourc 1 +analysi 1 +idea 1 +introduct 1 +robert 1 +scienc 1 +semant 1 +hierarchi 1 +cognit 1 +sequenti 1 +decis 1 +cluster 1 +evalu 1 +inform 1 +current 1 +enabl 1 +distribut 1 +environ 1 +advantag 1 +explor 1 +perform 1 +execut 1 +gener 1 +issu 1 +differ 1 +formal 1 +method 1 +build 1 +make 1 +produc 1 +optim 1 +symbiot 1 +search 1 +develop 1 +databas 1 +engin 1 +chill 1 +also 1 +procedur 1 +conceptu 1 +notat 1 +pram 1 +lectur 1 +offic 1 +utexa 1 +free 1 +time 1 +student 1 +undergradu 1 +spatial 1 +principl 1 +programmingoctob 1 +wilsonextens 1 +ramachandranth 1 +beyond 1 +simpl 1 +technolog 1 +trend 1 +highli 1 +server 1 +power 1 +requir 1 +hand 1 +translat 1 +reliabl 1 +access 1 +solut 1 +lightweight 1 +goal 1 +scale 1 +depend 1 +sever 1 +number 1 +need 1 +integr 1 +theapplic 1 +support 1 +address 1 +effect 1 +overview 1 +runtim 1 +workstat 1 +within 1 +continu 1 +even 1 +automat 1 +includ 1 +fast 1 +close 1 +among 1 +singl 1 +consist 1 +space 1 +structur 1 +discret 1 +action 1 +place 1 +path 1 +built 1 +base 1 +view 1 +simpli 1 +prolog 1 +known 1 +concern 1 +experi 1 +appli 1 +reinforc 1 +sane 1 +popul 1 +genet 1 +form 1 +abl 1 +broad 1 +interest 1 +datasourc 1 +architectur 1 +theabstract 1 +embodi 1 +higher 1 +code 1 +advanc 1 +facil 1 +serv 1 +resolv 1 +exampl 1 +offer 1 +determinist 1 +corpu 1 +difficult 1 +easi 1 +asymptot 1 +main 1 +underli 1 +theform 1 +extens 1 +portabl 1 +discuss 1 +sciencecst 0 +sciencefal 0 +mondai 0 +instructor 0 +blumof 0 +taylor 0 +phone 0 +email 0 +hour 0 +thursdai 0 +feel 0 +stop 0 +semest 0 +seminar 0 +taken 0 +apass 0 +fail 0 +basi 0 +topic 0 +honor 0 +receiv 0 +credit 0 +must 0 +beregist 0 +attend 0 +least 0 +schedulespeakertitleseptemb 0 +mirankeralamo 0 +warehouseseptemb 0 +kuipersth 0 +humanand 0 +mapsseptemb 0 +blumofecilk 0 +reliableparallel 0 +workstationsseptemb 0 +risto 0 +miikkulainenlearn 0 +throughsymbiot 0 +networksoctob 0 +vladimir 0 +lifschitzmathemat 0 +paul 0 +reflectionoctob 0 +mooneylearn 0 +usinginduct 0 +mike 0 +dahlindistribut 0 +internetsnovemb 0 +gordon 0 +novaksoftwar 0 +genericprocedur 0 +viewsnovemb 0 +vijaya 0 +parallelalgorithmsnovemb 0 +lorenzo 0 +alvisilighweight 0 +tolerancenovemb 0 +calvin 0 +linadapt 0 +optimizationdecemb 0 +greg 0 +plaxtonanalysi 0 +algorithmslighweight 0 +tolerancelorenzo 0 +alvisidistribut 0 +move 0 +confin 0 +academia 0 +andresearch 0 +lab 0 +revolution 0 +busi 0 +government 0 +organ 0 +citizen 0 +andcollect 0 +promis 0 +todramat 0 +increas 0 +pace 0 +revolut 0 +thedesign 0 +beyondth 0 +client 0 +paradigm 0 +har 0 +ofdistribut 0 +scope 0 +emphasi 0 +toleranttechniqu 0 +undergo 0 +dramat 0 +chang 0 +willceas 0 +expens 0 +applicationsto 0 +exot 0 +distributedinform 0 +infrastructur 0 +acompetit 0 +guarante 0 +criticalinform 0 +engineerfault 0 +dedic 0 +resourc 0 +negligibleimpact 0 +cost 0 +offailur 0 +transpar 0 +programm 0 +emerg 0 +communicatethrough 0 +messag 0 +onnetwork 0 +workstationsrobert 0 +blumofethi 0 +pronouncedsilk 0 +multithread 0 +andcilk 0 +functionalsubset 0 +providesadapt 0 +tranpar 0 +touser 0 +mean 0 +ofworkst 0 +run 0 +grow 0 +shrinkdynam 0 +idl 0 +onth 0 +amount 0 +addit 0 +cilkprogram 0 +workstationscrash 0 +detect 0 +andrecov 0 +livedemonstr 0 +internetsmik 0 +dahlinthi 0 +give 0 +indistribut 0 +applicationsmotiv 0 +aggress 0 +inclust 0 +servicei 0 +request 0 +nodesto 0 +better 0 +centralserv 0 +challeng 0 +goodperform 0 +despit 0 +limit 0 +networkperform 0 +node 0 +projectwil 0 +human 0 +mapsbenjamin 0 +kuipershuman 0 +map 0 +reli 0 +forlarg 0 +ontolog 0 +similarli 0 +varietyof 0 +propos 0 +andmap 0 +unknown 0 +cast 0 +diverserepresent 0 +spatialsemant 0 +object 0 +relat 0 +andassumpt 0 +foundat 0 +thecontrol 0 +dynam 0 +whose 0 +stabl 0 +equilibrium 0 +point 0 +beabstract 0 +distinct 0 +trajectori 0 +link 0 +givinga 0 +causal 0 +graph 0 +causalgraph 0 +turn 0 +topologicalnetwork 0 +local 0 +metric 0 +occupancygrid 0 +neighborhood 0 +theframework 0 +topolog 0 +without 0 +usual 0 +ofglob 0 +programmingvladimir 0 +lifschitzlog 0 +sister 0 +functionalprogram 0 +notne 0 +contain 0 +explicit 0 +oper 0 +instruct 0 +instead 0 +itcan 0 +fact 0 +sufficientto 0 +solv 0 +declar 0 +executedus 0 +autom 0 +reason 0 +best 0 +logicprogram 0 +theori 0 +withdefin 0 +thereason 0 +investig 0 +thesound 0 +optimizationcalvin 0 +linthi 0 +andtheir 0 +differenthardwar 0 +platform 0 +efficientand 0 +usabl 0 +framework 0 +suchlibrari 0 +three 0 +plan 0 +thesetechniqu 0 +scientif 0 +weexplain 0 +facilit 0 +ofneur 0 +networksristo 0 +miikkulainena 0 +novel 0 +neuro 0 +evolv 0 +neuronsthrough 0 +given 0 +promot 0 +inth 0 +anddiscourag 0 +converg 0 +suboptim 0 +toextract 0 +domain 0 +specif 0 +spars 0 +rang 0 +sequentialdecis 0 +control 0 +game 0 +plai 0 +resourcemanag 0 +warehousedan 0 +mirankerth 0 +effort 0 +direct 0 +intra 0 +andint 0 +enumer 0 +site 0 +theuser 0 +illus 0 +virtual 0 +follow 0 +byqueri 0 +tool 0 +central 0 +corba 0 +compliant 0 +interfacethat 0 +uniform 0 +heterogen 0 +ofabstract 0 +clever 0 +algorithmsand 0 +separ 0 +isol 0 +buffer 0 +anddata 0 +prefetch 0 +claim 0 +often 0 +anobject 0 +orient 0 +deduct 0 +infer 0 +activedatabas 0 +mine 0 +constructedus 0 +common 0 +final 0 +sinc 0 +output 0 +databasefacil 0 +compon 0 +thealamo 0 +compos 0 +dataintegr 0 +particular 0 +anticip 0 +elementsof 0 +repres 0 +meta 0 +andsemant 0 +conflict 0 +ultim 0 +furthercomposit 0 +complex 0 +knowledg 0 +answerhigh 0 +induct 0 +logicprogrammingraymond 0 +mooneyinduct 0 +learningprolog 0 +offirst 0 +order 0 +standard 0 +learningmethod 0 +constrain 0 +fix 0 +length 0 +vector 0 +areappli 0 +believethi 0 +richer 0 +import 0 +havedevelop 0 +parsersfrom 0 +pars 0 +sentenc 0 +obtain 0 +superior 0 +onsever 0 +artifici 0 +corpora 0 +previous 0 +test 0 +networkmethod 0 +encourag 0 +realist 0 +ati 0 +ofairlin 0 +automaticallydevelop 0 +complet 0 +englishdatabas 0 +moreaccur 0 +parser 0 +smallgeograph 0 +foidl 0 +past 0 +tens 0 +english 0 +surpass 0 +previou 0 +treemethod 0 +throughviewsgordon 0 +novak 0 +clearli 0 +good 0 +toachiev 0 +practic 0 +assumpt 0 +thesoftwar 0 +type 0 +typesus 0 +agener 0 +version 0 +custom 0 +graphic 0 +specifyview 0 +theworld 0 +write 0 +adesir 0 +algorithmsgreg 0 +plaxtona 0 +major 0 +focu 0 +theoret 0 +andanalysi 0 +random 0 +forspecif 0 +research 0 +notuncommon 0 +come 0 +across 0 +written 0 +paper 0 +straightforward 0 +surprisingli 0 +lengthi 0 +deal 0 +minor 0 +side 0 +case 0 +havelittl 0 +noth 0 +suchpap 0 +seem 0 +signific 0 +andform 0 +difficulti 0 +gapsinher 0 +convent 0 +inadequatefor 0 +succinctli 0 +certain 0 +straightforwardalgorithm 0 +significantli 0 +reduc 0 +theconceptu 0 +associ 0 +trivialclass 0 +concret 0 +consid 0 +analysisof 0 +linear 0 +select 0 +blum 0 +floyd 0 +pratt 0 +rivest 0 +tarjan 0 +algorithmsvijaya 0 +forcombinatori 0 +studi 0 +recentyear 0 +larg 0 +willdescrib 0 +work 0 +parallelalgorithm 0 +thesealgorithm 0 +massiv 0 +maspar 0 +thendescrib 0 +queu 0 +variant 0 +wepropos 0 +appropri 0 +parallelshar 0 +memori 0 +tradit 0 +reflectionpaul 0 +ad 0 +fairli 0 +modif 0 +addnew 0 +analys 0 +reflect 0 +examin 0 +ofinterest 0 +part 0 +affect 0 +structureaccordingli 0 +thing 0 +modular 0 +adapat 0 +recent 0 +workon 0 +rscheme 0 +extensiblelanguag 0 +last 0 +modifi 0 +novemb 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..d3344a94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,68 @@ +problem 2 +chapter 2 +except 1 +solut 1 +offic 1 +program 1 +assign 1 +septemb 1 +octob 1 +utexa 1 +exam 1 +novemb 1 +introduct 1 +oper 1 +lectur 1 +robert 1 +phone 1 +email 1 +hour 1 +thursdai 1 +gooti 1 +exampl 1 +found 1 +crypt 1 +topic 1 +cover 1 +midterm 1 +decemb 1 +systemsc 0 +systemsfal 0 +mondai 0 +wednesdai 0 +instructor 0 +blumof 0 +taylor 0 +feel 0 +free 0 +stop 0 +time 0 +teach 0 +assist 0 +subramanyam 0 +tuesdai 0 +station 0 +solari 0 +canb 0 +implement 0 +support 0 +multiplemap 0 +assum 0 +map 0 +file 0 +least 0 +long 0 +themap 0 +test 0 +encrypt 0 +decrypt 0 +handout 0 +gener 0 +inform 0 +final 0 +solutionsread 0 +book 0 +date 0 +last 0 +modifi 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..f9fceca5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,45 @@ +neural 1 +network 1 +utexa 1 +risto 1 +offic 1 +class 1 +fall 0 +networksfal 0 +uniqu 0 +number 0 +instructor 0 +miikkulainen 0 +bednar 0 +jbednar 0 +station 0 +text 0 +lauren 0 +fausett 0 +fundament 0 +ofneur 0 +architectur 0 +algorithm 0 +applic 0 +englewood 0 +cliff 0 +prenticehal 0 +select 0 +paper 0 +note 0 +copi 0 +slide 0 +us 0 +lectur 0 +grade 0 +homework 0 +midterm 0 +final 0 +detail 0 +schedulehomework 0 +assignmentsexamsclass 0 +resourcesa 0 +postscript 0 +versionof 0 +syllabusristo 0 +edusun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..a35bf80b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,76 @@ +cognit 1 +scienc 1 +utexa 1 +risto 1 +read 1 +discuss 1 +introduct 1 +dept 1 +hall 1 +offic 1 +hour 1 +appt 1 +student 1 +note 1 +short 1 +page 1 +fall 0 +sciencefal 0 +instructor 0 +nichola 0 +asher 0 +philosophi 0 +waggen 0 +nasher 0 +berti 0 +miikkulainen 0 +comput 0 +taylor 0 +text 0 +posner 0 +foundat 0 +mitpress 0 +packet 0 +requir 0 +regular 0 +interv 0 +submit 0 +critic 0 +commentari 0 +collabor 0 +withanoth 0 +also 0 +write 0 +paper 0 +approxim 0 +signific 0 +research 0 +topic 0 +find 0 +ofinterest 0 +count 0 +toward 0 +final 0 +grade 0 +thepap 0 +class 0 +attend 0 +particip 0 +alsorequir 0 +detail 0 +cours 0 +descriptioncours 0 +schedulediscuss 0 +notesperson 0 +adscollabor 0 +paperclass 0 +resourcesstud 0 +questionnaireus 0 +link 0 +center 0 +list 0 +sciencefaculti 0 +pointer 0 +resourc 0 +gener 0 +edusun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..d0bce74b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,190 @@ +program 1 +respons 1 +pascal 1 +page 1 +cours 1 +assign 1 +student 1 +take 1 +grade 1 +hour 1 +syllabu 1 +detail 1 +exam 1 +semest 1 +class 1 +wait 1 +deadlin 1 +quizz 1 +room 1 +suzi 1 +need 1 +requir 1 +schedul 1 +thenewsgroup 1 +utexa 1 +updat 1 +work 1 +prepar 1 +becom 1 +quiz 1 +long 1 +warn 1 +mani 1 +group 1 +note 1 +limit 1 +thetest 1 +programmingcsp 0 +pascalintroductori 0 +comput 0 +programminginstructor 0 +gallagherwelcom 0 +excit 0 +intellectu 0 +challeng 0 +cspi 0 +design 0 +give 0 +firm 0 +foundat 0 +andso 0 +effort 0 +read 0 +thecours 0 +carefulli 0 +summari 0 +contain 0 +wella 0 +import 0 +polici 0 +date 0 +otherdeadlin 0 +everyth 0 +thesyllabu 0 +without 0 +delai 0 +avail 0 +jenn 0 +copi 0 +guadalup 0 +hundr 0 +takethi 0 +courseeach 0 +highli 0 +structur 0 +foral 0 +monitor 0 +frequent 0 +expect 0 +depend 0 +howwel 0 +event 0 +extrem 0 +difficult 0 +behind 0 +procedur 0 +riski 0 +near 0 +deadlineto 0 +turn 0 +late 0 +get 0 +half 0 +credit 0 +line 0 +unfortun 0 +construct 0 +link 0 +nowher 0 +apolog 0 +everyon 0 +attend 0 +lectur 0 +gallagh 0 +everi 0 +thursdayeven 0 +welch 0 +mondai 0 +wednesdai 0 +break 0 +intosmal 0 +section 0 +discuss 0 +ofth 0 +materi 0 +ateach 0 +assist 0 +nine 0 +written 0 +debug 0 +theprogram 0 +laboratori 0 +thatlaboratori 0 +thatgrad 0 +even 0 +less 0 +andyou 0 +within 0 +eight 0 +thattest 0 +limitedand 0 +often 0 +foravail 0 +proctor 0 +sever 0 +hoursbefor 0 +submit 0 +andquizz 0 +earli 0 +enough 0 +three 0 +must 0 +betaken 0 +prescrib 0 +time 0 +make 0 +soon 0 +possibl 0 +begin 0 +file 0 +openedfor 0 +uniqu 0 +identifi 0 +yourstud 0 +access 0 +orsak 0 +requiredtextbook 0 +dale 0 +weem 0 +wewil 0 +cover 0 +chapter 0 +individu 0 +background 0 +vari 0 +consider 0 +thiscours 0 +partial 0 +self 0 +pace 0 +feel 0 +well 0 +click 0 +howev 0 +still 0 +liabl 0 +commun 0 +exampl 0 +could 0 +form 0 +studi 0 +also 0 +gripe 0 +thought 0 +articl 0 +gener 0 +interest 0 +elicit 0 +repli 0 +staff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..727a2172 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,59 @@ +output 2 +data 2 +sampl 1 +project 1 +file 1 +recoveri 1 +problem 1 +solut 1 +test 1 +databas 1 +system 1 +implement 1 +tong 1 +wang 1 +pleas 1 +dept 1 +student 1 +contest 1 +benchmark 1 +script 1 +time 1 +order 1 +tupl 1 +differ 1 +program 1 +pass 1 +fall 0 +professor 0 +batori 0 +syllabu 0 +homework 0 +read 0 +first 0 +retriev 0 +ret_into 0 +replac 0 +append 0 +delet 0 +mdb 0 +us 0 +measur 0 +run 0 +sinc 0 +attribut 0 +anoth 0 +wrote 0 +perl 0 +transform 0 +compar 0 +diff 0 +turn 0 +without 0 +error 0 +fail 0 +reason 0 +email 0 +suggest 0 +comment 0 +medec 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..fc6fe0e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,302 @@ +system 3 +page 3 +oper 3 +comput 2 +distribut 2 +proceed 2 +time 2 +cours 2 +file 2 +decemb 2 +review 2 +real 1 +commun 1 +process 1 +ieee 1 +transact 1 +design 1 +implement 1 +advanc 1 +support 1 +concept 1 +issu 1 +schedul 1 +sosp 1 +mach 1 +kernel 1 +harrick 1 +read 1 +fall 1 +wireless 1 +mobil 1 +environ 1 +share 1 +confer 1 +april 1 +unix 1 +levi 1 +usenix 1 +symposium 1 +cheriton 1 +cach 1 +januari 1 +instructor 1 +descript 1 +gener 1 +inform 1 +textbook 1 +requir 1 +list 1 +earli 1 +idea 1 +topic 1 +research 1 +avail 1 +paper 1 +project 1 +dalei 1 +juli 1 +andrew 1 +operatingsystem 1 +survei 1 +thread 1 +formultiprogram 1 +memori 1 +multiprocessor 1 +anderson 1 +lazowska 1 +network 1 +novemb 1 +remot 1 +procedur 1 +call 1 +februari 1 +onoper 1 +ousterhout 1 +germani 1 +fault 1 +intern 1 +workshop 1 +beyond 1 +karshmer 1 +nehmer 1 +springer 1 +verlag 1 +summer 1 +june 1 +august 1 +schroeder 1 +needham 1 +protect 1 +princeton 1 +march 1 +trigger 1 +tabl 0 +content 0 +prerequisit 0 +synopsi 0 +titl 0 +professor 0 +last 0 +offer 0 +prerequisitegradu 0 +stand 0 +undergradu 0 +systemssuch 0 +student 0 +expect 0 +familiar 0 +materialin 0 +chapter 0 +peterson 0 +andsilberschatz 0 +synopsisc 0 +breadth 0 +coveringboth 0 +theoret 0 +practic 0 +systemdesign 0 +cover 0 +includ 0 +ofdistribut 0 +formobil 0 +case 0 +studi 0 +anemphasi 0 +place 0 +current 0 +collect 0 +articl 0 +made 0 +theinstructor 0 +requirementsstud 0 +number 0 +area 0 +anddiscuss 0 +grade 0 +determin 0 +examin 0 +aterm 0 +present 0 +systemsfernando 0 +corbato 0 +marjori 0 +merwin 0 +daggett 0 +robert 0 +anexperiment 0 +afip 0 +spring 0 +joint 0 +brinch 0 +hansen 0 +nucleu 0 +multiprogram 0 +bensoussan 0 +clingen 0 +multic 0 +virtualmemori 0 +denni 0 +ritchi 0 +thompson 0 +overview 0 +tannenbaum 0 +robbert 0 +reness 0 +silberschatz 0 +andexampl 0 +managementa 0 +tucker 0 +gupta 0 +control 0 +theth 0 +thoma 0 +edward 0 +henri 0 +theperform 0 +implic 0 +manag 0 +altern 0 +forshar 0 +schedulingr 0 +bunt 0 +techniqu 0 +octob 0 +black 0 +concurr 0 +parallel 0 +inth 0 +inter 0 +communicationj 0 +barrera 0 +fast 0 +inproceed 0 +group 0 +acmtransact 0 +birel 0 +bruce 0 +nelson 0 +rpc 0 +oncomput 0 +bershad 0 +lightweightremot 0 +principl 0 +migrationf 0 +dougli 0 +migrat 0 +spriteoper 0 +internationalconfer 0 +berlin 0 +septemb 0 +theimer 0 +lantz 0 +preemptabl 0 +execut 0 +tolerancef 0 +cristian 0 +basic 0 +toler 0 +distributedsystem 0 +sand 0 +birman 0 +joseph 0 +reliabl 0 +presenc 0 +offailur 0 +systemsr 0 +sandberg 0 +goldberg 0 +kleiman 0 +ofsun 0 +mckusick 0 +leffler 0 +fabri 0 +fastfil 0 +rosenblum 0 +alog 0 +structur 0 +systemsm 0 +gifford 0 +fora 0 +programm 0 +workstat 0 +terri 0 +hint 0 +ieeetransact 0 +softwar 0 +engin 0 +securityr 0 +us 0 +encrypt 0 +authent 0 +inlarg 0 +butler 0 +lampson 0 +origin 0 +proc 0 +oninform 0 +scienc 0 +accetta 0 +baron 0 +boloski 0 +golub 0 +rashid 0 +tevanian 0 +young 0 +foundat 0 +develop 0 +systemsh 0 +kopetz 0 +event 0 +versu 0 +timesystem 0 +layland 0 +algorithm 0 +hard 0 +journal 0 +theacm 0 +zhao 0 +ramamritham 0 +stankov 0 +preemptiv 0 +schedulingund 0 +resourc 0 +constraint 0 +tokuda 0 +mercer 0 +art 0 +computingb 0 +badrinath 0 +acharya 0 +imielinski 0 +impact 0 +ondistribut 0 +satyanarayanan 0 +kistler 0 +kumar 0 +okasaki 0 +siegel 0 +steer 0 +coda 0 +highli 0 +distributedworkst 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..939d925c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,403 @@ +multimedia 3 +page 3 +proceed 3 +system 2 +video 2 +comput 2 +server 2 +design 2 +commun 2 +network 2 +oper 2 +octob 2 +septemb 2 +novemb 2 +goyal 2 +cours 2 +compress 2 +algorithm 2 +issu 2 +ieee 2 +support 2 +techniqu 2 +sigcomm 1 +list 1 +time 1 +schedul 1 +digit 1 +shenoi 1 +april 1 +august 1 +instructor 1 +harrick 1 +storag 1 +applic 1 +protocol 1 +audio 1 +research 1 +disk 1 +journal 1 +area 1 +offic 1 +hour 1 +media 1 +architectur 1 +mpeg 1 +placement 1 +project 1 +real 1 +packet 1 +analysi 1 +transport 1 +survei 1 +kandlur 1 +effici 1 +control 1 +scheme 1 +manag 1 +intern 1 +select 1 +zhang 1 +review 1 +descript 1 +teach 1 +read 1 +overview 1 +technolog 1 +trend 1 +problem 1 +class 1 +format 1 +fundament 1 +jpeg 1 +retriev 1 +cach 1 +batch 1 +introduct 1 +channel 1 +decemb 1 +fall 1 +number 1 +place 1 +basic 1 +concept 1 +cover 1 +multimediasystem 1 +topic 1 +student 1 +expect 1 +implement 1 +determin 1 +appoint 1 +phone 1 +mail 1 +utexa 1 +copi 1 +steinmetz 1 +principl 1 +standard 1 +hierarch 1 +magazin 1 +venkat 1 +rangan 1 +optim 1 +ofmultimedia 1 +arrai 1 +icmc 1 +washington 1 +symposium 1 +june 1 +anaheim 1 +chen 1 +workshop 1 +diego 1 +francisco 1 +scale 1 +buffer 1 +sitaram 1 +dynam 1 +polici 1 +boston 1 +keshav 1 +rate 1 +base 1 +campbel 1 +coulson 1 +peterson 1 +jacobson 1 +mccann 1 +framework 1 +tabl 0 +content 0 +gener 0 +inform 0 +prerequisit 0 +synopsi 0 +textbook 0 +requir 0 +assist 0 +spring 0 +databas 0 +handout 0 +note 0 +postscript 0 +scalabl 0 +possibl 0 +integr 0 +servic 0 +establish 0 +processor 0 +descriptiongener 0 +informationcours 0 +titl 0 +professor 0 +detail 0 +offer 0 +uniqu 0 +taylor 0 +hall 0 +prerequisitesgradu 0 +stand 0 +familiar 0 +incomput 0 +synopsisc 0 +advanc 0 +boththeoret 0 +practic 0 +includ 0 +systemsupport 0 +well 0 +transportprotocol 0 +emphasi 0 +current 0 +designissu 0 +textbooka 0 +collect 0 +recent 0 +articl 0 +madeavail 0 +requirementsth 0 +introduc 0 +thetop 0 +follow 0 +discuss 0 +relatedpap 0 +question 0 +answer 0 +tounderstand 0 +describ 0 +critiqu 0 +contribut 0 +ofpap 0 +addition 0 +carri 0 +asemest 0 +long 0 +grade 0 +examin 0 +andclass 0 +particip 0 +vintuesdai 0 +assistantmr 0 +prashant 0 +eduread 0 +cntain 0 +paper 0 +theread 0 +avail 0 +mondai 0 +speedwai 0 +locat 0 +dobi 0 +mall 0 +guadalup 0 +austin 0 +pleas 0 +callthem 0 +make 0 +sure 0 +packag 0 +readi 0 +compressionr 0 +data 0 +wallac 0 +still 0 +pictur 0 +gall 0 +multimediaappl 0 +chiang 0 +anastassi 0 +code 0 +digitaltelevis 0 +serversoverview 0 +serverdesign 0 +gemmel 0 +row 0 +tutori 0 +object 0 +ieeeintern 0 +confer 0 +failur 0 +recoveri 0 +inmulti 0 +annualintern 0 +fault 0 +toler 0 +ftc 0 +pasadena 0 +california 0 +chiueh 0 +katz 0 +multi 0 +resolut 0 +represent 0 +forparallel 0 +admiss 0 +groupedsweep 0 +ofthird 0 +supportfor 0 +narasimha 0 +reddi 0 +wylli 0 +multimediai 0 +statist 0 +admissioncontrol 0 +acmmultimedia 0 +designinglarg 0 +march 0 +scan 0 +inmultimedia 0 +sanfrancisco 0 +stream 0 +convers 0 +interactivevideo 0 +playout 0 +summer 0 +space 0 +shahabuddin 0 +foran 0 +demand 0 +demandvideo 0 +report 0 +papadimitri 0 +ramanathan 0 +informationcach 0 +deliveri 0 +person 0 +program 0 +homeentertain 0 +internationalconfer 0 +multimedianetwork 0 +layer 0 +shenker 0 +futur 0 +internet 0 +ferrari 0 +verma 0 +channelestablish 0 +wide 0 +areasin 0 +comparison 0 +servicedisciplin 0 +delaybound 0 +heterogen 0 +toappear 0 +also 0 +workshopon 0 +nossdav 0 +durham 0 +hampshir 0 +chow 0 +losslesssmooth 0 +london 0 +salehi 0 +kuros 0 +towslei 0 +storedvideo 0 +reduc 0 +variabl 0 +resourc 0 +requirementsthrough 0 +smooth 0 +sigmetr 0 +philadelphia 0 +grossglaus 0 +rcbr 0 +simpl 0 +efficientservic 0 +multipl 0 +traffic 0 +acmsigcomm 0 +kanakia 0 +misra 0 +reibman 0 +adapt 0 +congestioncontrol 0 +proceedingsof 0 +clark 0 +tennenhous 0 +consider 0 +newgener 0 +hutchison 0 +qualiti 0 +servicearchitectur 0 +turner 0 +imag 0 +transfer 0 +floyd 0 +reliablemulticast 0 +light 0 +weight 0 +session 0 +levelfram 0 +buss 0 +deffner 0 +schulzrinn 0 +januari 0 +blakowski 0 +synchron 0 +refer 0 +model 0 +specif 0 +case 0 +studi 0 +onselect 0 +januaryoper 0 +multimediag 0 +robin 0 +blair 0 +papathoma 0 +andd 0 +shepherd 0 +basedcommun 0 +choru 0 +incommun 0 +druschel 0 +abbott 0 +pagel 0 +subsystem 0 +workstat 0 +ofth 0 +third 0 +systemssupport 0 +govindan 0 +anderson 0 +mechan 0 +forcontinu 0 +operatingsystem 0 +pacif 0 +grove 0 +formultimedia 0 +second 0 +symposiumon 0 +osdi 0 +seattl 0 +conferencingh 0 +zellweg 0 +swinehart 0 +venkatrangan 0 +conferenc 0 +etherphon 0 +environ 0 +flexibl 0 +packetvideo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..a5974a6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,296 @@ +proceed 2 +multimedia 2 +multicast 2 +cours 2 +rout 2 +system 2 +sigcomm 2 +databas 1 +design 1 +boston 1 +real 1 +share 1 +conferenc 1 +schedul 1 +page 1 +commun 1 +harrick 1 +inform 1 +requir 1 +read 1 +internet 1 +time 1 +oper 1 +paper 1 +acmmultimedia 1 +francisco 1 +novemb 1 +base 1 +technic 1 +report 1 +queri 1 +pictur 1 +fall 1 +instructor 1 +descript 1 +gener 1 +textbook 1 +offic 1 +list 1 +processor 1 +support 1 +topic 1 +protocol 1 +present 1 +class 1 +grade 1 +project 1 +mccann 1 +jacobson 1 +packet 1 +video 1 +crowcroft 1 +confer 1 +scalabl 1 +applic 1 +deer 1 +cheriton 1 +transact 1 +tree 1 +reliabl 1 +interact 1 +gupta 1 +resourc 1 +infocom 1 +warldersburg 1 +weihl 1 +symposium 1 +jeffai 1 +paradigm 1 +imag 1 +jain 1 +tabl 0 +content 0 +prerequisit 0 +synopsi 0 +hour 0 +servic 0 +titl 0 +professor 0 +semest 0 +meet 0 +fridai 0 +prerequisitesgradu 0 +stand 0 +familiar 0 +basic 0 +concept 0 +networkprotocol 0 +multimediasystem 0 +synopsisthi 0 +advanc 0 +bediscuss 0 +includ 0 +transport 0 +formultimedia 0 +mobil 0 +network 0 +andmultimedia 0 +multimediadatabas 0 +emphasi 0 +place 0 +current 0 +issu 0 +andresearch 0 +collect 0 +research 0 +articl 0 +made 0 +avail 0 +theinstructor 0 +requirementsstud 0 +number 0 +area 0 +aswel 0 +discuss 0 +determinedbas 0 +particip 0 +studentsenrol 0 +letter 0 +submit 0 +orcarri 0 +hoursfridai 0 +appoint 0 +phone 0 +mail 0 +utexa 0 +flexibleframework 0 +handlei 0 +wakeman 0 +controlchannel 0 +cccp 0 +build 0 +conferencecontrol 0 +gajewska 0 +kistler 0 +manass 0 +redel 0 +argo 0 +systemfor 0 +distribut 0 +collabor 0 +gong 0 +multipoint 0 +audio 0 +control 0 +basedmultimedia 0 +zellweg 0 +swinehart 0 +venkat 0 +rangan 0 +etherphon 0 +environ 0 +ieeecomput 0 +octob 0 +datagraminternetwork 0 +extend 0 +lan 0 +computersystem 0 +ballardi 0 +franci 0 +core 0 +architectur 0 +inter 0 +domain 0 +thyagarajan 0 +hierarch 0 +distanc 0 +vector 0 +mbone 0 +widyono 0 +andevalu 0 +algorithm 0 +channel 0 +msthesi 0 +berkelei 0 +kompella 0 +pasqual 0 +polyzo 0 +multimediacommun 0 +univers 0 +california 0 +diego 0 +floyd 0 +zhang 0 +framework 0 +light 0 +weightsess 0 +level 0 +frame 0 +ofacm 0 +holbrook 0 +singhal 0 +receiv 0 +fordistribut 0 +simul 0 +theacm 0 +herzog 0 +estrin 0 +shenker 0 +cost 0 +axiomat 0 +analysi 0 +how 0 +moran 0 +nguyen 0 +multi 0 +parti 0 +timecommun 0 +servicesj 0 +guyton 0 +schwartz 0 +locat 0 +nearbi 0 +copi 0 +replic 0 +server 0 +mogul 0 +case 0 +forpersist 0 +connect 0 +http 0 +acmsigcomm 0 +supportc 0 +lotteri 0 +effici 0 +flexibleproport 0 +mangement 0 +ofoper 0 +implement 0 +osdi 0 +strideschedul 0 +determinist 0 +proport 0 +resourcemanag 0 +golestani 0 +self 0 +clock 0 +fair 0 +queue 0 +scheme 0 +high 0 +speedappl 0 +govindan 0 +anderson 0 +mechan 0 +forcontinu 0 +media 0 +onoper 0 +principl 0 +sosp 0 +monterei 0 +timeproduc 0 +consum 0 +construct 0 +ofeffici 0 +predict 0 +ofth 0 +sigapp 0 +appli 0 +comput 0 +latenc 0 +manag 0 +intim 0 +workshop 0 +timeoper 0 +softwar 0 +seattl 0 +databasesw 0 +niblack 0 +qbic 0 +contentus 0 +color 0 +textur 0 +shape 0 +februari 0 +cawkel 0 +journal 0 +ofinform 0 +scienc 0 +bach 0 +paul 0 +managementsystem 0 +face 0 +retriev 0 +ieee 0 +knowledgeand 0 +data 0 +engin 0 +august 0 +weymouth 0 +semant 0 +vimsi 0 +model 0 +intern 0 +onveri 0 +larg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..270cddf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,86 @@ +home 1 +work 1 +question 1 +time 1 +answer 1 +final 1 +exam 1 +problem 1 +true 1 +cours 1 +updat 1 +sentenc 1 +omega 1 +submatrix 1 +algorithm 1 +techniqu 1 +theori 1 +fall 1 +handout 1 +receiv 1 +last 1 +total 1 +vertic 1 +cycl 1 +label 1 +right 1 +size 1 +largest 1 +decemb 1 +instructor 0 +vijaya 0 +ramachandranuniqu 0 +number 0 +descript 0 +instruct 0 +respons 0 +pose 0 +quot 0 +take 0 +sigma 0 +onsigma 0 +word 0 +refer 0 +amort 0 +oroth 0 +well 0 +known 0 +face 0 +data 0 +structur 0 +disjoint 0 +set 0 +requir 0 +inth 0 +worst 0 +case 0 +text 0 +book 0 +isther 0 +differ 0 +meant 0 +first 0 +second 0 +paragraphof 0 +chapter 0 +note 0 +containdistinct 0 +cancontain 0 +think 0 +littl 0 +unclear 0 +denot 0 +equal 0 +somek 0 +entri 0 +fridai 0 +mondai 0 +bepost 0 +either 0 +youhav 0 +sent 0 +pleas 0 +address 0 +us 0 +yourbest 0 +judgment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..5908eda5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,227 @@ +scheme 3 +rscheme 2 +version 2 +repositori 2 +system 2 +run 2 +class 2 +implement 2 +object 2 +machin 2 +note 2 +us 2 +program 1 +page 1 +cours 1 +answer 1 +sure 1 +code 1 +languag 1 +home 1 +thing 1 +make 1 +read 1 +later 1 +solari 1 +unix 1 +free 1 +recommend 1 +feelei 1 +meroon 1 +comp 1 +lang 1 +html 1 +first 1 +document 1 +standard 1 +practic 1 +question 1 +also 1 +assign 1 +simpl 1 +take 1 +wilson 1 +construct 1 +chang 1 +brows 1 +section 1 +browser 1 +especi 1 +text 1 +error 1 +chapter 1 +describ 1 +quiz 1 +homework 1 +includ 1 +pictur 1 +illustr 1 +base 1 +inherit 1 +explan 1 +test 1 +main 1 +default 1 +whichi 1 +instal 1 +public 1 +sparc 1 +command 1 +runschem 1 +linux 1 +orani 1 +sever 1 +andinstal 1 +find 1 +itfrom 1 +donovan 1 +kolbl 1 +qing 1 +patch 1 +friendlier 1 +fornewbi 1 +gettinggambit 1 +marc 1 +youcan 1 +window 1 +bestschem 1 +bunch 1 +avail 1 +guil 1 +might 1 +gambit 1 +mark 1 +mzscheme 1 +rice 1 +someth 1 +besid 1 +get 1 +start 1 +doingobject 1 +orient 1 +tous 1 +advantag 1 +abl 1 +univers 1 +indiana 1 +lot 1 +freeimplement 1 +variou 1 +getinterest 1 +learn 1 +cover 1 +place 1 +look 1 +internet 1 +newsgroup 1 +devot 1 +pagec 0 +pagethi 0 +paulwilson 0 +subject 0 +reload 0 +button 0 +yourbrows 0 +come 0 +see 0 +mostrec 0 +onlin 0 +refer 0 +materi 0 +syllabu 0 +lectur 0 +ondeclar 0 +arereason 0 +well 0 +index 0 +willchang 0 +goe 0 +along 0 +ahead 0 +islik 0 +adventur 0 +suggest 0 +usinga 0 +interact 0 +work 0 +throughchapt 0 +tutori 0 +allow 0 +outof 0 +past 0 +sanoth 0 +reason 0 +rather 0 +thanprint 0 +hardcopi 0 +correct 0 +weget 0 +definit 0 +format 0 +onlinebrows 0 +list 0 +featur 0 +ters 0 +stuff 0 +coursenot 0 +help 0 +want 0 +know 0 +miscellanousfunct 0 +exactli 0 +author 0 +second 0 +third 0 +name 0 +convent 0 +indent 0 +shouldconsult 0 +grade 0 +itsens 0 +andnot 0 +draw 0 +data 0 +structur 0 +write 0 +merg 0 +sort 0 +solut 0 +three 0 +problem 0 +comment 0 +reader 0 +regular 0 +express 0 +grammar 0 +actual 0 +backward 0 +chain 0 +proposit 0 +calculu 0 +theoremprov 0 +essenti 0 +littl 0 +subset 0 +prolog 0 +setofrul 0 +classifi 0 +anim 0 +logic 0 +kind 0 +ofanim 0 +plai 0 +theorem 0 +prover 0 +instanc 0 +simpleobject 0 +anoth 0 +show 0 +metaclass 0 +circular 0 +latter 0 +self 0 +onclass 0 +gener 0 +procedur 0 +type 0 +subtyp 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..75a19f56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,97 @@ +homework 2 +solut 2 +file 2 +postscript 2 +session 1 +kumar 1 +utexa 1 +class 1 +note 1 +review 1 +cours 1 +slide 1 +section 1 +model 1 +format 1 +fall 1 +comput 1 +austin 1 +ajit 1 +contact 1 +test 1 +print 1 +weekli 1 +tuesdai 1 +thursdai 1 +station 1 +feng 1 +xfeng 1 +new 1 +midterm 1 +decimalinteg 1 +hexinteg 1 +octalinteg 1 +program 0 +welcom 0 +homepag 0 +taught 0 +adam 0 +georg 0 +announc 0 +final 0 +surpris 0 +xunnow 0 +make 0 +like 0 +homeworksreview 0 +slidesth 0 +second 0 +half 0 +semest 0 +pleas 0 +view 0 +onlineif 0 +possibl 0 +realli 0 +need 0 +found 0 +updatedhomework 0 +sourc 0 +filemidterm 0 +webta 0 +inform 0 +timetableta 0 +offic 0 +hour 0 +locat 0 +guana 0 +natarajan 0 +eduxun 0 +detail 0 +time 0 +tabl 0 +guid 0 +group 0 +also 0 +tip 0 +fridai 0 +download 0 +score 0 +requir 0 +wordlist 0 +linux 0 +provid 0 +warren 0 +wang 0 +wwang 0 +answer 0 +exercis 0 +made 0 +modif 0 +mondai 0 +afternoon 0 +assign 0 +maintain 0 +edudepart 0 +sciencesunivers 0 +texa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..681cc265 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,46 @@ +solut 1 +yang 1 +statist 1 +offic 1 +hour 1 +utexa 1 +assign 1 +program 1 +schwetman 1 +mesquit 1 +station 1 +contact 1 +yangyang 1 +file 1 +print 1 +email 1 +comput 0 +system 0 +architectur 0 +fall 0 +instructor 0 +herb 0 +softwar 0 +class 0 +appointmentcontact 0 +syllabu 0 +statisticsassign 0 +asga 0 +statisticsyour 0 +final 0 +gradesect 0 +section 0 +microsparc 0 +datasheetonlin 0 +resourc 0 +classmat 0 +ruiliu 0 +postmessag 0 +new 0 +group 0 +pagei 0 +creat 0 +august 0 +comment 0 +welcom 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..d301405c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,70 @@ +handout 2 +homework 1 +project 1 +protocol 1 +class 1 +group 1 +csnet 1 +offic 1 +hour 1 +read 1 +newsgroup 1 +utexa 1 +draft 1 +http 1 +schedul 1 +network 0 +implement 0 +gener 0 +inform 0 +professor 0 +tuesdai 0 +thursdai 0 +teach 0 +assist 0 +mondai 0 +wensdai 0 +station 0 +descript 0 +text 0 +background 0 +prerequisit 0 +grade 0 +refer 0 +multicast 0 +rout 0 +texa 0 +internet 0 +platform 0 +netsim 0 +corejava 0 +mobil 0 +support 0 +specif 0 +address 0 +alloc 0 +manag 0 +tutori 0 +digest 0 +access 0 +authent 0 +comp 0 +java 0 +present 0 +individu 0 +paper 0 +turn 0 +sampl 0 +solut 0 +info 0 +configur 0 +file 0 +pleas 0 +note 0 +first 0 +fengyufeng 0 +edufing 0 +public 0 +ring 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..20c681b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,93 @@ +comput 1 +inform 1 +home 1 +scienc 1 +engin 1 +help 1 +page 1 +document 1 +class 1 +read 1 +assign 1 +homework 1 +degre 1 +program 1 +offer 1 +colleg 1 +mosaic 1 +engr 1 +pageclick 0 +techniqu 0 +spring 0 +quarterwelcom 0 +world 0 +wide 0 +hypermedia 0 +whichcontain 0 +bounti 0 +keep 0 +mind 0 +thatthi 0 +static 0 +addedfrequ 0 +problem 0 +send 0 +mail 0 +weld 0 +click 0 +highlight 0 +item 0 +personnel 0 +professor 0 +cours 0 +syllabu 0 +polici 0 +announc 0 +check 0 +regularli 0 +last 0 +chang 0 +handout 0 +lectur 0 +note 0 +gradesoth 0 +us 0 +link 0 +offici 0 +mathematica 0 +mvi 0 +visitor 0 +room 0 +schedul 0 +depart 0 +art 0 +avail 0 +follow 0 +topic 0 +basic 0 +hypertext 0 +markup 0 +languag 0 +html 0 +uniform 0 +resourc 0 +locat 0 +usinglynx 0 +charact 0 +base 0 +browserport 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +univers 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..4efa10da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,139 @@ +comput 1 +engr 1 +help 1 +exam 1 +autumn 1 +washington 1 +scienc 1 +engin 1 +home 1 +page 1 +program 1 +inform 1 +class 1 +messag 1 +webmast 1 +cours 1 +netscap 1 +document 1 +frequent 1 +problem 1 +send 1 +mail 1 +click 1 +last 1 +updat 1 +studi 1 +guid 1 +solut 1 +final 1 +tip 1 +think 1 +raini 1 +activ 1 +quarter 1 +like 1 +univers 1 +degre 1 +offer 1 +colleg 1 +dugan 0 +martin 0 +tompa 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +bounti 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +especi 0 +ad 0 +highlight 0 +item 0 +check 0 +syllabu 0 +offic 0 +hour 0 +staff 0 +lectur 0 +slide 0 +homework 0 +midterm 0 +mac 0 +debugg 0 +bulletin 0 +board 0 +textbook 0 +code 0 +refer 0 +regularli 0 +schedul 0 +time 0 +place 0 +special 0 +demo 0 +web 0 +earlier 0 +intact 0 +less 0 +notic 0 +instanc 0 +link 0 +work 0 +pleas 0 +might 0 +assign 0 +test 0 +us 0 +previous 0 +winter 0 +spring 0 +summer 0 +search 0 +previou 0 +miscellan 0 +info 0 +case 0 +insensit 0 +match 0 +whole 0 +word 0 +avail 0 +depart 0 +art 0 +relat 0 +major 0 +nonmajor 0 +consid 0 +take 0 +preview 0 +run 0 +find 0 +itemsund 0 +balloon 0 +menu 0 +particular 0 +onlin 0 +handbook 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..0ba2c4e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,112 @@ +comput 1 +engr 1 +home 1 +autumn 1 +program 1 +cours 1 +hour 1 +scienc 1 +engin 1 +page 1 +class 1 +week 1 +lectur 1 +slide 1 +summer 1 +place 1 +test 1 +webmast 1 +like 1 +washington 1 +degre 1 +colleg 1 +martin 0 +dickei 0 +richard 0 +ladner 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermediadocu 0 +contain 0 +bounti 0 +informationabout 0 +click 0 +highlight 0 +item 0 +moreinform 0 +messag 0 +check 0 +frequent 0 +syllabu 0 +sort 0 +schedulesth 0 +glanceweek 0 +activ 0 +schedulecomput 0 +lab 0 +includinglab 0 +watch 0 +chang 0 +staff 0 +includ 0 +instructor 0 +andta 0 +offic 0 +audiofrom 0 +homework 0 +examin 0 +midtermand 0 +final 0 +originallyschedul 0 +time 0 +studi 0 +guid 0 +andtim 0 +kind 0 +tip 0 +usingth 0 +compil 0 +macintosh 0 +user 0 +textbook 0 +code 0 +refer 0 +tutori 0 +special 0 +demo 0 +web 0 +earlier 0 +quarter 0 +less 0 +intactand 0 +invit 0 +brows 0 +notic 0 +problem 0 +forinst 0 +link 0 +work 0 +pleas 0 +send 0 +mail 0 +might 0 +look 0 +assign 0 +andth 0 +us 0 +previous 0 +winter 0 +spring 0 +inform 0 +avail 0 +univers 0 +depart 0 +art 0 +andrel 0 +major 0 +nonmajor 0 +comment 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..62a191fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,58 @@ +assign 2 +beam 1 +washington 1 +offic 1 +sieg 1 +thursdai 1 +autumn 1 +hour 1 +nowitz 1 +wednesdai 1 +acrobat 1 +midterm 1 +spring 1 +discret 0 +structur 0 +instructorpaul 0 +edulectur 0 +phone 0 +appoint 0 +teach 0 +assistantjonathan 0 +edusect 0 +johnson 0 +section 0 +loew 0 +tuesdai 0 +handout 0 +syllabu 0 +induct 0 +recurs 0 +defin 0 +set 0 +postscript 0 +reader 0 +part 0 +ofyour 0 +browser 0 +novemb 0 +class 0 +sampl 0 +question 0 +homework 0 +previou 0 +cours 0 +web 0 +fall 0 +karp 0 +ruzzo 0 +winter 0 +leveson 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..e9ef38eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,96 @@ +latex 3 +solut 2 +homework 2 +text 2 +handout 1 +format 1 +exam 1 +html 1 +winter 1 +state 1 +diagram 1 +document 1 +autumn 1 +last 1 +updat 1 +cours 1 +regular 1 +grammar 1 +midterm 1 +final 1 +provid 1 +washington 1 +intro 0 +formal 0 +model 0 +richard 0 +ladnerclass 0 +messag 0 +check 0 +email 0 +frequent 0 +syllabu 0 +construct 0 +express 0 +extra 0 +rambl 0 +regard 0 +question 0 +construc 0 +pars 0 +review 0 +proof 0 +halt 0 +problem 0 +undecidableexam 0 +comment 0 +notat 0 +file 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +web 0 +previou 0 +quarter 0 +ladner 0 +edufix 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..78aea329 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,46 @@ +lectur 1 +messag 1 +mail 1 +list 1 +autumn 1 +formal 1 +model 1 +fall 1 +condon 1 +page 1 +check 1 +upcom 1 +class 1 +subscrib 1 +last 1 +updat 1 +previou 1 +winter 1 +washington 1 +introduct 0 +intro 0 +ann 0 +welcom 0 +home 0 +regularli 0 +findhomework 0 +solut 0 +set 0 +pointer 0 +exam 0 +sent 0 +willb 0 +log 0 +send 0 +majordomo 0 +includ 0 +userid 0 +email 0 +frequent 0 +homework 0 +handout 0 +content 0 +web 0 +quarter 0 +edukaye 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..67bb9383 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,26 @@ +autumn 1 +last 1 +updat 1 +winter 1 +spring 1 +data 0 +structur 0 +martin 0 +tompaclass 0 +messag 0 +check 0 +mail 0 +frequent 0 +cours 0 +informationlab 0 +technot 0 +unix 0 +questionnaireloc 0 +cdeletemin 0 +algorithm 0 +treeshomework 0 +web 0 +previou 0 +quarter 0 +request 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..5b096e4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,62 @@ +spring 1 +ladner 1 +class 1 +messag 1 +frequent 1 +washington 1 +home 0 +pagecs 0 +data 0 +structuresrichard 0 +instructordan 0 +fasulo 0 +teach 0 +assistantthi 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +inform 0 +theclass 0 +taught 0 +keep 0 +mind 0 +document 0 +static 0 +newinform 0 +especi 0 +ad 0 +click 0 +help 0 +check 0 +offic 0 +hour 0 +suggest 0 +read 0 +project 0 +homework 0 +exam 0 +lectur 0 +overheadsport 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quotedand 0 +duli 0 +credit 0 +copyright 0 +departmentof 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..250bbac3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,37 @@ +home 2 +page 2 +offer 1 +ofcs 1 +program 1 +languag 1 +autumn 1 +spring 1 +winter 1 +depart 1 +washington 1 +pagecs 0 +languagesfal 0 +quarter 0 +current 0 +informationth 0 +listinfo 0 +everi 0 +research 0 +pagehom 0 +computersci 0 +engineeringport 0 +reprint 0 +adapt 0 +academicnonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +dulycredit 0 +copyright 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..8b3c1e05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,114 @@ +note 3 +last 2 +updat 2 +code 2 +april 2 +assign 2 +smalltalk 2 +transcript 2 +suggest 2 +read 2 +html 2 +postscript 2 +sieg 1 +lisp 1 +home 1 +us 1 +prolog 1 +page 1 +washington 1 +solut 1 +transcipt 1 +method 1 +offer 1 +ofcs 1 +program 1 +june 1 +grove 1 +emac 1 +sampl 1 +quiz 1 +languag 1 +section 1 +final 1 +exam 1 +mail 1 +eduoffic 1 +offic 1 +hour 1 +dave 1 +cours 1 +figur 1 +htmlpostscript 1 +march 1 +winter 1 +depart 1 +pagecs 0 +languagesspr 0 +quarter 0 +lectur 0 +review 0 +session 0 +mondai 0 +tuesdai 0 +thursdai 0 +instructor 0 +steve 0 +hanks 0 +hank 0 +administr 0 +syllabu 0 +overviewcours 0 +newsgroup 0 +help 0 +documentsgeneralintroduct 0 +new 0 +netscap 0 +unix 0 +turnin 0 +electron 0 +submiss 0 +homework 0 +clip 0 +save 0 +relatedrun 0 +reason 0 +thing 0 +relat 0 +done 0 +hand 0 +miss 0 +includ 0 +partial 0 +test 0 +daili 0 +class 0 +full 0 +interfac 0 +build 0 +databas 0 +employe 0 +informationth 0 +listinfo 0 +everi 0 +research 0 +pagehom 0 +autumn 0 +spring 0 +computersci 0 +engineeringport 0 +reprint 0 +adapt 0 +academicnonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +dulycredit 0 +copyright 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..8b2ecd36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,51 @@ +washington 1 +comput 1 +scienc 1 +engin 1 +index 1 +quarter 1 +webmast 1 +univers 1 +depart 1 +degre 1 +program 1 +offer 1 +colleg 1 +page 0 +pagecurr 0 +quarterth 0 +current 0 +previou 0 +quarterscours 0 +web 0 +earlier 0 +intact 0 +less 0 +younotic 0 +problem 0 +instanc 0 +link 0 +work 0 +pleas 0 +send 0 +mail 0 +spring 0 +inform 0 +avail 0 +art 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +comment 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..84200382 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,124 @@ +class 1 +design 1 +mail 1 +instructor 1 +lectur 1 +home 1 +page 1 +gaetano 1 +announc 1 +last 1 +updat 1 +washington 1 +autumn 1 +quarter 1 +borriello 1 +corei 1 +us 1 +inform 1 +document 1 +send 1 +webmast 1 +administr 1 +final 1 +exam 1 +tool 1 +topic 1 +offic 1 +hour 1 +sieg 1 +logic 1 +katz 1 +benjamin 1 +cum 1 +addison 1 +weslei 1 +maintain 1 +comput 1 +introduct 0 +digit 0 +andersonwelcom 0 +contain 0 +whole 0 +bunch 0 +keep 0 +mind 0 +static 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +gener 0 +tocs 0 +notic 0 +system 0 +archiv 0 +messagess 0 +everyon 0 +cours 0 +goal 0 +syllabu 0 +meet 0 +time 0 +mondai 0 +decemb 0 +workload 0 +grade 0 +expect 0 +laboratori 0 +softwar 0 +polici 0 +collabor 0 +cheat 0 +address 0 +overal 0 +schedul 0 +anderson 0 +corin 0 +aweekli 0 +assign 0 +weekli 0 +quizz 0 +onlin 0 +version 0 +slide 0 +textbook 0 +contemporari 0 +author 0 +publish 0 +note 0 +interest 0 +evolut 0 +implement 0 +technolog 0 +aid 0 +synario 0 +feedback 0 +tell 0 +think 0 +thing 0 +go 0 +even 0 +anonym 0 +desir 0 +link 0 +previou 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..51a54d93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,77 @@ +assign 1 +inform 1 +data 1 +structur 1 +tanimoto 1 +hall 1 +offic 1 +hour 1 +assist 1 +option 1 +grade 1 +midterm 1 +project 1 +final 1 +late 1 +cours 0 +pagecs 0 +algorithmsautumn 0 +basic 0 +instructor 0 +steve 0 +washington 0 +sieg 0 +room 0 +appoint 0 +teach 0 +anhai 0 +doan 0 +announc 0 +place 0 +dai 0 +time 0 +smith 0 +comput 0 +facil 0 +unix 0 +account 0 +mscc 0 +student 0 +languag 0 +requir 0 +lisp 0 +textbook 0 +shaffer 0 +practic 0 +introduct 0 +algorithm 0 +analysi 0 +publish 0 +summer 0 +prentic 0 +breakdown 0 +tent 0 +polici 0 +keep 0 +manag 0 +encourag 0 +punctual 0 +work 0 +point 0 +deduct 0 +penalti 0 +schedul 0 +updat 0 +aboutth 0 +topic 0 +studi 0 +examinform 0 +exambas 0 +us 0 +compilerassignmentssolut 0 +assignmentsteach 0 +informationscheduleweb 0 +previou 0 +offer 0 +winter 0 +autumn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..36c26a4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,40 @@ +comput 1 +washington 1 +scienc 1 +engin 1 +holden 1 +nowitz 1 +cours 1 +degre 1 +program 1 +offer 1 +colleg 1 +major 1 +home 0 +pagecs 0 +data 0 +structur 0 +algorithmsspr 0 +instructor 0 +alistair 0 +jonathan 0 +class 0 +messag 0 +last 0 +updat 0 +mondai 0 +materi 0 +syllabu 0 +homework 0 +demo 0 +exam 0 +inform 0 +depart 0 +art 0 +relat 0 +mosaic 0 +help 0 +interest 0 +page 0 +raini 0 +funnowitz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..cc54e805 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,64 @@ +test 2 +file 2 +assign 1 +homework 1 +program 1 +indic 1 +enclos 1 +data 1 +meet 1 +siegtelephon 1 +email 1 +washington 1 +eduoffic 1 +hour 1 +next 1 +quot 1 +search 1 +cours 0 +pagecs 0 +structur 0 +algorithmswint 0 +time 0 +place 0 +sieg 0 +instructor 0 +linda 0 +shapirooffic 0 +shapiro 0 +denis 0 +pinneloffic 0 +denisep 0 +syllabustransparencieshomework 0 +assignmentshomework 0 +answer 0 +assignmentsprogram 0 +note 0 +set 0 +follow 0 +line 0 +begin 0 +insert 0 +tree 0 +inquot 0 +state 0 +charact 0 +long 0 +come 0 +integ 0 +length 0 +associatedvalu 0 +final 0 +string 0 +valu 0 +also 0 +linebegin 0 +find 0 +isfollow 0 +object 0 +model 0 +graphimag 0 +graphreview 0 +listsfin 0 +studi 0 +sheet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..5ba87396 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,25 @@ +inform 1 +class 1 +compil 0 +classhomethi 0 +world 0 +wide 0 +hypermedia 0 +documentfor 0 +contain 0 +keep 0 +inmind 0 +document 0 +static 0 +willb 0 +ad 0 +frequent 0 +urgent 0 +announc 0 +assign 0 +onlin 0 +meet 0 +admin 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..276f03d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,234 @@ +project 2 +softwar 2 +manag 2 +respons 2 +engin 2 +design 2 +includ 2 +plan 2 +primari 2 +document 2 +system 2 +class 2 +duti 2 +work 1 +experi 1 +student 1 +review 1 +provid 1 +group 1 +organ 1 +learn 1 +program 1 +role 1 +qualiti 1 +configur 1 +product 1 +user 1 +boe 1 +teach 1 +portfolio 1 +specif 1 +analysi 1 +quarter 1 +particip 1 +develop 1 +meet 1 +requir 1 +mainten 1 +control 1 +secur 1 +specialist 1 +experiment 1 +cours 1 +leveson 1 +concept 1 +team 1 +real 1 +industri 1 +written 1 +commun 1 +tool 1 +test 1 +also 1 +technic 1 +topic 1 +interact 1 +exampl 1 +first 1 +enough 1 +instructor 1 +addit 1 +set 1 +done 1 +leadership 1 +howev 1 +everyon 1 +activ 1 +write 1 +make 1 +creation 1 +overal 1 +issu 1 +evalu 1 +chang 1 +ensur 1 +human 1 +interfac 1 +assur 1 +conduct 1 +deliver 1 +mockup 1 +prototyp 1 +expertis 1 +reliabl 1 +home 0 +pagecs 0 +professor 0 +nanci 0 +offic 0 +sieg 0 +phone 0 +hour 0 +appoint 0 +mail 0 +washington 0 +educours 0 +descriptioninstruct 0 +object 0 +terminolog 0 +fundament 0 +oral 0 +skill 0 +produc 0 +studi 0 +method 0 +construct 0 +larg 0 +softwaresystem 0 +essenti 0 +tocreat 0 +complex 0 +successfulli 0 +effectiveor 0 +latter 0 +topicsar 0 +feedback 0 +sai 0 +import 0 +employersand 0 +often 0 +lack 0 +graduat 0 +version 0 +last 0 +realbo 0 +largegroup 0 +sever 0 +reason 0 +try 0 +approach 0 +isthat 0 +short 0 +realist 0 +cannotlearn 0 +session 0 +devotedto 0 +discuss 0 +regular 0 +thegroup 0 +usual 0 +hard 0 +isto 0 +effectivelytogeth 0 +head 0 +disast 0 +beavoid 0 +correct 0 +worktogeth 0 +requirementsanalysi 0 +possibl 0 +thenorm 0 +areal 0 +search 0 +engineeringinstitut 0 +master 0 +providedat 0 +assign 0 +playthat 0 +allow 0 +posit 0 +attach 0 +listof 0 +phase 0 +projectso 0 +part 0 +theproject 0 +outlin 0 +natur 0 +principl 0 +process 0 +model 0 +risk 0 +assess 0 +cost 0 +estim 0 +metric 0 +verif 0 +valid 0 +evolut 0 +reus 0 +ethic 0 +profession 0 +embed 0 +safeti 0 +take 0 +ofth 0 +aspect 0 +softwaredevelop 0 +member 0 +responsiblefor 0 +present 0 +administr 0 +assist 0 +updat 0 +track 0 +statu 0 +sure 0 +proper 0 +held 0 +get 0 +time 0 +princip 0 +architect 0 +consist 0 +hardwar 0 +platform 0 +transit 0 +exist 0 +augment 0 +necessari 0 +current 0 +factor 0 +respect 0 +survei 0 +interview 0 +employe 0 +releas 0 +duri 0 +characterist 0 +normal 0 +languag 0 +implement 0 +handl 0 +appear 0 +clariti 0 +manual 0 +determin 0 +us 0 +readabl 0 +understand 0 +support 0 +creat 0 +guid 0 +deliv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..807401e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,95 @@ +assign 1 +softwar 1 +note 1 +mail 1 +washington 1 +cours 1 +system 1 +time 1 +leveson 1 +offic 1 +sieg 1 +hour 1 +appoint 1 +carlson 1 +specif 1 +includ 1 +engin 1 +requir 1 +winter 1 +home 0 +pagecs 0 +engineeringmeet 0 +locat 0 +loew 0 +mondai 0 +wednesdai 0 +fridai 0 +professor 0 +nanci 0 +phone 0 +eduta 0 +adam 0 +educours 0 +descriptionthi 0 +studi 0 +concept 0 +method 0 +tool 0 +design 0 +construct 0 +test 0 +analysi 0 +document 0 +larg 0 +also 0 +technic 0 +topic 0 +essenti 0 +creat 0 +complex 0 +successfulli 0 +project 0 +manag 0 +textbookghezzi 0 +jazayeri 0 +mandrioli 0 +fundament 0 +prentic 0 +hall 0 +sampl 0 +interview 0 +question 0 +produc 0 +consum 0 +petri 0 +axiomat 0 +coupl 0 +cohes 0 +link 0 +interest 0 +syllabu 0 +updat 0 +pleas 0 +read 0 +newsgroup 0 +access 0 +machin 0 +send 0 +class 0 +mailinglist 0 +new 0 +comp 0 +risk 0 +militari 0 +standard 0 +defens 0 +develop 0 +inform 0 +avail 0 +spring 0 +comput 0 +scienc 0 +departmentsuggest 0 +feedback 0 +request 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..1ae803df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,346 @@ +lisp 3 +januari 2 +program 2 +languag 2 +copi 2 +common 2 +assign 2 +februari 2 +tuesdai 2 +sieg 2 +cours 2 +exam 2 +march 2 +thursdai 2 +project 2 +onlin 2 +part 2 +macintosh 2 +mscc 2 +final 1 +read 1 +book 1 +system 1 +student 1 +implement 1 +ruth 1 +hall 1 +room 1 +schedul 1 +mail 1 +list 1 +materi 1 +file 1 +facil 1 +email 1 +archiv 1 +examin 1 +info 1 +tutori 1 +mac 1 +window 1 +pleas 1 +review 1 +session 1 +solut 1 +comput 1 +thompson 1 +offic 1 +hour 1 +mondai 1 +topic 1 +also 1 +midterm 1 +class 1 +mani 1 +artifici 1 +intellig 1 +sever 1 +includ 1 +purchas 1 +center 1 +us 1 +edit 1 +refer 1 +provid 1 +support 1 +java 1 +call 1 +compil 1 +cover 1 +issu 1 +unix 1 +work 1 +note 1 +help 1 +turn 1 +page 1 +writeup 1 +winter 1 +steve 1 +tanimoto 1 +depart 1 +scienc 1 +washington 1 +meet 1 +wednesdai 1 +post 1 +number 1 +homework 1 +text 1 +get 1 +element 1 +question 1 +standard 1 +access 1 +download 1 +html 1 +world 1 +wide 1 +techniqu 1 +offer 1 +host 1 +extens 1 +inthompson 1 +stat 1 +free 1 +given 1 +close 1 +messag 1 +click 1 +token 1 +bring 1 +check 1 +koch 1 +descript 1 +fridai 1 +mileston 1 +home 0 +pagecs 0 +instructor 0 +engin 0 +univers 0 +seattl 0 +andersonmeet 0 +except 0 +time 0 +tent 0 +transpar 0 +past 0 +lectur 0 +credit 0 +grade 0 +particip 0 +hardcopi 0 +select 0 +section 0 +first 0 +ofread 0 +recent 0 +throughout 0 +logic 0 +prolog 0 +grammar 0 +andpars 0 +understand 0 +shell 0 +expert 0 +either 0 +whole 0 +bookstor 0 +approxim 0 +order 0 +combin 0 +fromth 0 +librari 0 +entitl 0 +freeman 0 +chapter 0 +introduct 0 +incommon 0 +glossari 0 +purchasedsepar 0 +basement 0 +commun 0 +build 0 +answer 0 +andit 0 +referenceon 0 +seem 0 +best 0 +tabl 0 +contentspag 0 +rather 0 +try 0 +entir 0 +orpostscript 0 +sourc 0 +code 0 +usingcommon 0 +site 0 +fordigitool 0 +compani 0 +thatmaintain 0 +currentinform 0 +interest 0 +link 0 +applic 0 +introductionto 0 +anoth 0 +websit 0 +thatdoesn 0 +alwai 0 +respond 0 +promptli 0 +trail 0 +announc 0 +welcom 0 +although 0 +catalog 0 +theirimplement 0 +interpret 0 +buildingprogram 0 +attent 0 +focu 0 +mainli 0 +particular 0 +programminglanguag 0 +explor 0 +relev 0 +tointepret 0 +investig 0 +addit 0 +tradit 0 +alsolook 0 +current 0 +visual 0 +programmingfacil 0 +quarter 0 +run 0 +themathemat 0 +locat 0 +thebas 0 +varieti 0 +mathematica 0 +yacc 0 +allegrocommon 0 +softwar 0 +particularli 0 +powerfulenviron 0 +full 0 +integr 0 +editor 0 +fred 0 +graphicsand 0 +user 0 +interfac 0 +construct 0 +network 0 +easili 0 +transfer 0 +machinesof 0 +difficulti 0 +supplement 0 +theirown 0 +packag 0 +xlisp 0 +micro 0 +emac 0 +howev 0 +limit 0 +abil 0 +altern 0 +wish 0 +need 0 +theseresourc 0 +internet 0 +thatxlisp 0 +bare 0 +bone 0 +nothav 0 +develop 0 +fortun 0 +power 0 +disadvantag 0 +must 0 +labunless 0 +fromdigitool 0 +normal 0 +cost 0 +special 0 +dealallow 0 +point 0 +version 0 +allegro 0 +lispfor 0 +franz 0 +attract 0 +tool 0 +test 0 +introduc 0 +thelaboratori 0 +thistim 0 +instead 0 +go 0 +regular 0 +classroom 0 +week 0 +move 0 +beginn 0 +guid 0 +might 0 +sent 0 +new 0 +group 0 +regardingread 0 +path 0 +remind 0 +todai 0 +even 0 +hard 0 +printout 0 +exampl 0 +onthursdai 0 +option 0 +held 0 +insieg 0 +becov 0 +convert 0 +string 0 +symbol 0 +import 0 +found 0 +send 0 +receiv 0 +modif 0 +deadlin 0 +announcedearli 0 +plu 0 +postscript 0 +viewer 0 +avail 0 +pictur 0 +snowflak 0 +detail 0 +complet 0 +projectgener 0 +find 0 +aboutdemonstr 0 +onmondai 0 +multipl 0 +choic 0 +format 0 +mark 0 +sens 0 +form 0 +pencil 0 +exercisestokenizerassign 0 +andpart 0 +parsertokenizerpart 0 +snowflakeassign 0 +local 0 +gener 0 +ondemonstr 0 +show 0 +displai 0 +demonstr 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..7b1e0643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,66 @@ +assign 2 +program 2 +introduct 1 +inform 1 +april 1 +artifici 1 +hour 1 +solut 1 +second 1 +postscript 1 +intellig 1 +holden 1 +offic 1 +redston 1 +mondai 1 +instruct 1 +project 1 +first 1 +homework 1 +third 1 +final 1 +allegro 1 +emac 1 +refcard 1 +intelligencecs 0 +spring 0 +professor 0 +alistair 0 +csoffic 0 +noon 0 +noonta 0 +joshua 0 +msoffic 0 +thompson 0 +thursdai 0 +text 0 +rich 0 +knight 0 +secondedit 0 +touretzki 0 +common 0 +lisp 0 +gentl 0 +symboliccomput 0 +gener 0 +basic 0 +comput 0 +cours 0 +outlin 0 +turnin 0 +sampl 0 +june 0 +exam 0 +us 0 +emacsinterfac 0 +interfac 0 +download 0 +standalonelisp 0 +macintosh 0 +note 0 +check 0 +grade 0 +record 0 +type 0 +gradesredston 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..65b2ab5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,76 @@ +acrobat 1 +format 1 +winter 1 +latex 1 +syllabu 1 +midtem 1 +solut 1 +postscript 1 +ruzzo 1 +last 1 +updat 1 +text 1 +file 1 +legibl 1 +adob 1 +free 1 +viewer 1 +page 1 +intro 0 +algorithm 0 +larri 0 +martin 0 +tompaclass 0 +messag 0 +check 0 +email 0 +frequent 0 +book 0 +errata 0 +list 0 +handout 0 +homework 0 +web 0 +previou 0 +quarter 0 +karlin 0 +thecours 0 +materi 0 +provid 0 +three 0 +plain 0 +ascii 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +tompa 0 +aberman 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..429a8a5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,88 @@ +midterm 1 +final 1 +spring 1 +everyth 1 +acrobat 1 +format 1 +jayram 1 +page 1 +washington 1 +syllabu 1 +theori 1 +comput 1 +larri 1 +ruzzo 1 +gener 1 +instructor 1 +time 1 +offic 1 +hour 1 +sieg 1 +home 1 +problem 1 +class 1 +mail 1 +latex 1 +postscript 1 +file 1 +legibl 1 +adob 1 +free 1 +viewer 1 +intro 0 +introduct 0 +inform 0 +thathachar 0 +meet 0 +tent 0 +welcom 0 +document 0 +sendmail 0 +last 0 +updat 0 +messag 0 +sent 0 +list 0 +textbook 0 +errata 0 +handout 0 +admin 0 +solut 0 +sourc 0 +thecours 0 +materi 0 +provid 0 +three 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +quit 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +ghostscript 0 +window 0 +linux 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +cours 0 +web 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..6e0a3b9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,48 @@ +databas 1 +offic 1 +link 1 +introduct 1 +shapiro 1 +sieg 1 +hour 1 +page 1 +imag 1 +systemscs 0 +systemsfal 0 +quarter 0 +instructor 0 +prof 0 +linda 0 +telephon 0 +patrick 0 +crowlei 0 +pcrowlei 0 +announc 0 +syllabu 0 +assign 0 +homework 0 +word 0 +shift 0 +left 0 +click 0 +save 0 +potenti 0 +us 0 +unisql 0 +home 0 +qbic 0 +queri 0 +content 0 +manag 0 +system 0 +dbm 0 +probabl 0 +interest 0 +want 0 +know 0 +window 0 +back 0 +cours 0 +webcs 0 +request 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..3b906164 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,77 @@ +project 1 +page 1 +lectur 1 +section 1 +instructor 1 +washington 1 +cours 1 +solut 1 +feedback 1 +home 1 +brian 1 +bershad 1 +offic 1 +hour 1 +midterm 1 +materi 1 +avail 1 +person 1 +introduct 0 +oper 0 +system 0 +autumn 0 +sung 0 +choi 0 +sieg 0 +appoint 0 +intro 0 +adminth 0 +class 0 +outlin 0 +administr 0 +info 0 +textbook 0 +grade 0 +andoth 0 +word 0 +wisdom 0 +messag 0 +mail 0 +sent 0 +archiv 0 +scale 0 +first 0 +schedulewhat 0 +cover 0 +schedul 0 +aggress 0 +andwil 0 +updat 0 +regularli 0 +reflect 0 +actual 0 +pace 0 +note 0 +handout 0 +slide 0 +projectsdescript 0 +relat 0 +solutionsto 0 +notesnot 0 +watchthi 0 +space 0 +carefulli 0 +inform 0 +vital 0 +surviv 0 +andgrad 0 +hint 0 +appear 0 +receiv 0 +onproject 0 +send 0 +anonym 0 +wish 0 +lost 0 +click 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..c4a5e06f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,87 @@ +home 1 +page 1 +quarter 1 +comput 1 +autumn 1 +document 1 +help 1 +cours 1 +spring 1 +scienc 1 +engin 1 +degre 1 +netscap 1 +quarterwelcom 0 +world 0 +wide 0 +hypermedia 0 +contain 0 +bounti 0 +inform 0 +class 0 +keepin 0 +mind 0 +mean 0 +static 0 +informationwil 0 +ad 0 +frequent 0 +problem 0 +thisdocu 0 +send 0 +mail 0 +pighin 0 +click 0 +classpersonnel 0 +professor 0 +student 0 +syllabuscours 0 +calendarta 0 +offic 0 +hourshandout 0 +assignmentslectur 0 +notesread 0 +assignmentshomework 0 +assignmentsprojectsproject 0 +handoutsproject 0 +artifactsproject 0 +sessionsproject 0 +grade 0 +policyproject 0 +write 0 +upslibui 0 +documentationoth 0 +relat 0 +informationget 0 +classhearn 0 +baker 0 +erratath 0 +instruct 0 +labus 0 +indi 0 +guid 0 +opengl 0 +exampl 0 +program 0 +winter 0 +pagegraph 0 +linkssgi 0 +silicon 0 +surfgrafica 0 +obscurasiggraphgrailgraph 0 +site 0 +indexoth 0 +us 0 +linksmvi 0 +visitor 0 +room 0 +schedul 0 +departmentth 0 +programth 0 +programweb 0 +helpbas 0 +helpmosa 0 +lynxus 0 +indyspighin 0 +washington 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..b7180599 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,106 @@ +offic 1 +format 1 +hour 1 +disabl 1 +html 1 +somani 1 +jari 1 +accommod 1 +document 1 +class 1 +student 1 +servic 1 +letter 1 +provid 1 +latex 1 +autumn 0 +intro 0 +comput 0 +commun 0 +networksautumn 0 +instructor 0 +arun 0 +cslectur 0 +sieg 0 +eebphon 0 +kristensen 0 +csoffic 0 +chang 0 +tomatch 0 +prof 0 +thu 0 +cover 0 +everi 0 +week 0 +andprovid 0 +larger 0 +timewindow 0 +consult 0 +messag 0 +check 0 +email 0 +frequent 0 +lectur 0 +overheadshomeworksprojectsinterest 0 +stuffattentionif 0 +would 0 +like 0 +request 0 +academ 0 +pleasecontact 0 +schmitz 0 +havea 0 +indic 0 +requiresacadem 0 +pleas 0 +present 0 +discuss 0 +might 0 +need 0 +file 0 +cours 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +handout 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..d91f05f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,78 @@ +format 1 +sieg 1 +html 1 +ladner 1 +document 1 +offic 1 +hour 1 +provid 1 +latex 1 +washington 1 +spring 0 +intro 0 +comput 0 +commun 0 +networksspr 0 +instructor 0 +richard 0 +cslectur 0 +phone 0 +noonta 0 +william 0 +chan 0 +wchan 0 +csoffic 0 +class 0 +messag 0 +check 0 +email 0 +frequent 0 +lectur 0 +overheadshomeworksprojectsabout 0 +file 0 +cours 0 +materi 0 +three 0 +hypertext 0 +markup 0 +languag 0 +handl 0 +browser 0 +fact 0 +current 0 +look 0 +view 0 +mani 0 +handout 0 +origin 0 +convert 0 +us 0 +latexhtml 0 +strang 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +legibl 0 +figur 0 +complex 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +postscript 0 +ghostscript 0 +home 0 +page 0 +free 0 +viewer 0 +window 0 +linux 0 +eduwchan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..5733a077 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,10 @@ +fall 1 +home 0 +pagecs 0 +advanc 0 +digit 0 +designt 0 +kehl 0 +page 0 +found 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..a1514921 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,124 @@ +logic 1 +data 1 +home 1 +page 1 +march 1 +homework 1 +kehl 1 +inform 1 +hour 1 +offic 1 +final 1 +fridai 1 +februari 1 +midnight 1 +sampl 1 +option 1 +optionlab 1 +combin 1 +sequenti 1 +fpga 1 +depart 1 +comput 1 +scienc 1 +engin 1 +book 1 +server 1 +washington 1 +pagecs 0 +advanc 0 +digit 0 +designt 0 +fall 0 +welcom 0 +cours 0 +time 0 +place 0 +johnson 0 +import 0 +announc 0 +last 0 +updat 0 +summari 0 +syllabusschedul 0 +polici 0 +staff 0 +instructor 0 +mark 0 +savoi 0 +tue 0 +savac 0 +richard 0 +chinn 0 +thur 0 +richin 0 +howard 0 +chang 0 +gener 0 +shchang 0 +csjason 0 +aaron 0 +scott 0 +stephen 0 +hardwar 0 +laboratori 0 +manag 0 +student 0 +work 0 +group 0 +exam 0 +review 0 +topic 0 +cover 0 +quarterhomework 0 +assignmentsweb 0 +duehomework 0 +writeup 0 +written 0 +assign 0 +project 0 +abel 0 +state 0 +machin 0 +test 0 +fixtur 0 +handout 0 +memori 0 +communicationoth 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +collect 0 +resourc 0 +care 0 +gaetano 0 +borriello 0 +list 0 +vlsi 0 +link 0 +comprehensivelist 0 +icmanufactur 0 +nation 0 +semiconductor 0 +sheet 0 +motorola 0 +philip 0 +semiconduct 0 +micron 0 +technolog 0 +sheetsth 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..375e03f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,102 @@ +postscript 3 +mondai 2 +wednesdai 2 +read 2 +fridai 2 +design 2 +homework 1 +verilog 1 +pipelin 1 +program 1 +comput 1 +address 1 +sieg 1 +review 1 +answer 1 +sampl 1 +assembl 1 +organ 1 +snyder 1 +offic 1 +hour 1 +mail 1 +instruct 1 +simul 1 +sheet 1 +skim 1 +holidai 1 +mip 1 +segment 1 +binari 1 +data 1 +fall 1 +gener 0 +inform 0 +meet 0 +loew 0 +instructor 0 +larri 0 +appoint 0 +assist 0 +judi 0 +watson 0 +jwatson 0 +robert 0 +chenoffic 0 +tuesdai 0 +thursdays 0 +chensg 0 +catalog 0 +descript 0 +model 0 +structur 0 +function 0 +arithmet 0 +logic 0 +unit 0 +regist 0 +transfer 0 +level 0 +hardwar 0 +microprogram 0 +control 0 +memori 0 +hierarchi 0 +andorgan 0 +system 0 +compon 0 +interconnect 0 +laboratoryproject 0 +involv 0 +setprocessor 0 +prerequisit 0 +class 0 +note 0 +html 0 +appendix 0 +color 0 +chap 0 +revis 0 +midterm 0 +fast 0 +materi 0 +follow 0 +file 0 +avail 0 +addit 0 +modul 0 +common 0 +sourc 0 +form 0 +test 0 +prog 0 +simpl 0 +languag 0 +page 0 +previou 0 +quarter 0 +referencesthi 0 +free 0 +refer 0 +card 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..db042ee5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,232 @@ +lisp 2 +class 2 +april 2 +cours 2 +newsgroup 2 +assign 2 +sieg 2 +final 2 +mondai 1 +fridai 1 +exam 1 +wednesdai 1 +us 1 +project 1 +common 1 +artifici 1 +intellig 1 +tuesdai 1 +post 1 +mail 1 +interfac 1 +schedul 1 +book 1 +program 1 +creat 1 +access 1 +topic 1 +midterm 1 +edit 1 +separ 1 +examin 1 +test 1 +march 1 +system 1 +review 1 +spring 1 +home 1 +steve 1 +tanimoto 1 +comput 1 +washington 1 +jeremi 1 +hall 1 +room 1 +offic 1 +hour 1 +machin 1 +messag 1 +send 1 +csor 1 +new 1 +pnew 1 +list 1 +implement 1 +read 1 +element 1 +purchas 1 +mathemat 1 +theori 1 +languag 1 +link 1 +allegro 1 +window 1 +bring 1 +mark 1 +sens 1 +form 1 +represent 1 +search 1 +reason 1 +neural 1 +net 1 +expert 1 +page 1 +term 1 +session 1 +part 1 +lectur 1 +preliminari 1 +demo 1 +peer 1 +pagecs 0 +introduct 0 +instructor 0 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +baermeet 0 +thursdai 0 +archiv 0 +tent 0 +number 0 +credit 0 +grade 0 +homework 0 +particip 0 +hardcopi 0 +requir 0 +text 0 +freeman 0 +work 0 +hard 0 +textbook 0 +self 0 +contain 0 +exampl 0 +youdon 0 +onlin 0 +refer 0 +materi 0 +provid 0 +answer 0 +mani 0 +question 0 +andit 0 +standard 0 +referenceon 0 +seem 0 +best 0 +tabl 0 +contentspag 0 +rather 0 +try 0 +download 0 +entir 0 +html 0 +file 0 +orpostscript 0 +sourc 0 +code 0 +usingcommon 0 +interest 0 +info 0 +forprogram 0 +world 0 +wide 0 +applic 0 +facil 0 +order 0 +take 0 +advantag 0 +windowsimplement 0 +excel 0 +featur 0 +programdevelop 0 +construct 0 +theintel 0 +pentium 0 +laboratori 0 +free 0 +version 0 +isfor 0 +bedownload 0 +franz 0 +kind 0 +least 0 +attract 0 +option 0 +givenaccord 0 +close 0 +multipl 0 +choic 0 +alist 0 +know 0 +announc 0 +welcom 0 +coversboth 0 +implementationof 0 +techniqu 0 +includ 0 +programmingtechniqu 0 +knowledg 0 +logicalreason 0 +probabilist 0 +case 0 +base 0 +plan 0 +learn 0 +understand 0 +vision 0 +clo 0 +staff 0 +given 0 +held 0 +inour 0 +regular 0 +meet 0 +note 0 +continu 0 +programmingpart 0 +turn 0 +paper 0 +follow 0 +solut 0 +exercis 0 +state 0 +screenshot 0 +user 0 +descript 0 +ofhow 0 +go 0 +gener 0 +move 0 +current 0 +statu 0 +workload 0 +reduct 0 +propos 0 +circul 0 +email 0 +approv 0 +rest 0 +ofproject 0 +either 0 +give 0 +right 0 +orturn 0 +progress 0 +report 0 +memori 0 +holidai 0 +insieg 0 +explan 0 +evalu 0 +wrap 0 +demonstr 0 +evaluationof 0 +june 0 +assignmentsassign 0 +portion 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..95f31a6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,89 @@ +home 1 +burn 1 +data 1 +page 1 +inform 1 +hour 1 +societi 1 +robot 1 +depart 1 +comput 1 +scienc 1 +engin 1 +sourc 1 +book 1 +washington 1 +pagecs 0 +digit 0 +system 0 +designstev 0 +spring 0 +welcom 0 +cours 0 +time 0 +place 0 +loew 0 +import 0 +announc 0 +last 0 +updat 0 +summari 0 +syllabu 0 +schedul 0 +polici 0 +staff 0 +steve 0 +instructor 0 +kent 0 +smith 0 +casei 0 +anderson 0 +stephen 0 +hardwar 0 +laboratori 0 +manag 0 +offic 0 +studentslab 0 +assign 0 +mchc 0 +info 0 +nice 0 +introduct 0 +fred 0 +martinrobot 0 +seattl 0 +portland 0 +area 0 +societyoth 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +collect 0 +resourc 0 +care 0 +gaetano 0 +borriello 0 +comprehens 0 +list 0 +nation 0 +semiconductor 0 +sheet 0 +motorola 0 +server 0 +philip 0 +semiconduct 0 +serverth 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..eef16262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,34 @@ +cours 1 +inform 1 +project 1 +home 0 +pagewelcom 0 +contain 0 +bevi 0 +relatingto 0 +usual 0 +document 0 +frequentlychang 0 +send 0 +mail 0 +bswest 0 +csif 0 +encount 0 +problem 0 +classpersonnelsyllabuslectur 0 +scheduleguest 0 +lectur 0 +scheduleoffic 0 +hoursproject 0 +handout 0 +schedul 0 +help 0 +session 0 +final 0 +projectoth 0 +bug 0 +erratarefer 0 +pagesmidterm 0 +questionnairebswest 0 +washington 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..b05fc521 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,94 @@ +avail 2 +cecil 1 +year 1 +exam 1 +vortex 1 +languag 1 +compil 1 +postscript 1 +assign 1 +list 1 +answer 1 +refer 1 +version 1 +program 1 +quarter 1 +chamber 1 +offic 1 +hour 1 +sieg 1 +vass 1 +read 1 +last 1 +midterm 1 +final 1 +travers 1 +manual 1 +page 1 +research 1 +home 0 +pagecs 0 +implement 0 +languageswint 0 +import 0 +cours 0 +informationmeet 0 +time 0 +instructor 0 +craigchamb 0 +start 0 +second 0 +week 0 +litvinov 0 +cubicl 0 +floor 0 +archivesslid 0 +lectur 0 +handout 0 +full 0 +class 0 +homework 0 +messag 0 +sent 0 +mail 0 +archivedher 0 +note 0 +test 0 +closedbook 0 +affect 0 +kind 0 +question 0 +wereask 0 +sampl 0 +solut 0 +informationhandout 0 +tutorialsth 0 +tutorialth 0 +tutorialhow 0 +front 0 +enda 0 +file 0 +interestdead 0 +elim 0 +simpl 0 +exampl 0 +idfacfg 0 +interfac 0 +frameworkvortex 0 +textual 0 +descript 0 +grammarcecil 0 +documentationdocument 0 +html 0 +format 0 +standard 0 +librari 0 +resourcesth 0 +previou 0 +includ 0 +slide 0 +inform 0 +found 0 +onmark 0 +leon 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..44fc2d62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,62 @@ +avail 1 +languag 1 +program 1 +cecil 1 +vortex 1 +compil 1 +found 1 +home 1 +implement 1 +cours 1 +sieg 1 +chamber 1 +offic 1 +hour 1 +jdean 1 +grove 1 +chateau 1 +read 1 +list 1 +inform 1 +page 1 +research 1 +pagecs 0 +languagesimport 0 +informationmeet 0 +time 0 +instructor 0 +craig 0 +jeff 0 +dean 0 +dave 0 +come 0 +find 0 +confer 0 +room 0 +archivesslid 0 +lectur 0 +slide 0 +turori 0 +handout 0 +assign 0 +full 0 +class 0 +messag 0 +sent 0 +mail 0 +archiv 0 +projectth 0 +project 0 +sort 0 +analysi 0 +andtransform 0 +optimizingcompil 0 +object 0 +orient 0 +written 0 +cecilproject 0 +manual 0 +resourcesmor 0 +onmark 0 +leon 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..0df548a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,13 @@ +assign 1 +home 0 +pagecs 0 +softwar 0 +engineeringdavid 0 +notkin 0 +spring 0 +introductori 0 +handout 0 +kwic 0 +sampl 0 +projectsnotkin 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..9b9b6e86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,86 @@ +languag 1 +list 1 +page 1 +program 1 +mail 1 +send 1 +washington 1 +home 1 +notkin 1 +offic 1 +hour 1 +handout 1 +thread 1 +messag 1 +subscrib 1 +research 1 +resourc 1 +lambda 1 +calculu 1 +univers 1 +orient 1 +pagecs 0 +concept 0 +languagesautumn 0 +loew 0 +instructor 0 +david 0 +sieg 0 +byappoint 0 +kurt 0 +partridg 0 +kepart 0 +floor 0 +cubiclescours 0 +introductori 0 +html 0 +postscript 0 +assign 0 +readingsmail 0 +archivesw 0 +us 0 +administr 0 +instructionalpurpos 0 +wish 0 +refer 0 +previous 0 +sent 0 +archiv 0 +emailto 0 +majordomo 0 +singl 0 +line 0 +bodi 0 +subject 0 +csegener 0 +yahoo 0 +mark 0 +leon 0 +excel 0 +pagesprogram 0 +critiquesgari 0 +leaven 0 +self 0 +studi 0 +pagefunct 0 +resourcesmit 0 +scheme 0 +pagecmu 0 +standard 0 +pagea 0 +gentl 0 +introduct 0 +mlhaskel 0 +monash 0 +universityobject 0 +geneva 0 +object 0 +info 0 +cecil 0 +project 0 +dylan 0 +carnegi 0 +mellon 0 +appl 0 +comput 0 +question 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..c9721713 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,118 @@ +homework 2 +solut 2 +exam 1 +anderson 1 +washington 1 +hour 1 +class 1 +mondai 1 +project 1 +gilligan 1 +design 1 +analysi 1 +algorithm 1 +lectur 1 +offic 1 +time 1 +confer 1 +room 1 +cours 1 +set 1 +discuss 1 +write 1 +island 1 +algorithmscs 0 +winter 0 +instructor 0 +richard 0 +seig 0 +appoint 0 +teach 0 +assist 0 +william 0 +chan 0 +wchan 0 +wednesdai 0 +chateau 0 +sieg 0 +floor 0 +cubicl 0 +somebodi 0 +els 0 +us 0 +inform 0 +prerequisit 0 +go 0 +assum 0 +alreadi 0 +undergradu 0 +wrong 0 +know 0 +soon 0 +possibl 0 +suggest 0 +readingtextbook 0 +errata 0 +list 0 +realli 0 +preview 0 +check 0 +outer 0 +sapplet 0 +assign 0 +handout 0 +written 0 +gener 0 +tuesdai 0 +background 0 +quiz 0 +post 0 +script 0 +midterm 0 +cancel 0 +lack 0 +interest 0 +final 0 +told 0 +march 0 +probabl 0 +verifi 0 +close 0 +book 0 +cover 0 +materi 0 +willconsist 0 +short 0 +answer 0 +problem 0 +solv 0 +question 0 +bureaucrat 0 +stuffgrad 0 +base 0 +upon 0 +particip 0 +work 0 +togeth 0 +okai 0 +homeworkproblem 0 +classmat 0 +must 0 +upindepend 0 +rule 0 +could 0 +invok 0 +betweenani 0 +mustwatch 0 +least 0 +half 0 +theori 0 +thatan 0 +episod 0 +equival 0 +reboot 0 +anyth 0 +thatsurv 0 +learn 0 +understood 0 +eduwchan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..175ea6cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,218 @@ +cours 2 +parallel 2 +topic 2 +homework 2 +algorithm 1 +memori 1 +anderson 1 +problem 1 +thursdai 1 +paper 1 +theori 1 +comput 1 +could 1 +descript 1 +effici 1 +note 1 +april 1 +lectur 1 +share 1 +machin 1 +book 1 +gener 1 +sieg 1 +instructor 1 +offic 1 +syllabu 1 +list 1 +connect 1 +pointer 1 +although 1 +go 1 +model 1 +real 1 +year 1 +consid 1 +expect 1 +howev 1 +nice 1 +teach 1 +algorithmscs 0 +spring 0 +inform 0 +meet 0 +richard 0 +hour 0 +appointment 0 +mail 0 +address 0 +exam 0 +catalog 0 +design 0 +analysi 0 +fundament 0 +algorithmsfor 0 +sort 0 +arithmet 0 +matrix 0 +graph 0 +addit 0 +select 0 +emphasi 0 +techniqu 0 +approach 0 +us 0 +developingfast 0 +limit 0 +theirefficaci 0 +prerequisit 0 +equival 0 +major 0 +assign 0 +plu 0 +rambl 0 +commentsabout 0 +transpar 0 +code 0 +analysisfor 0 +rank 0 +compon 0 +algorithmi 0 +simpler 0 +correct 0 +section 0 +latex 0 +version 0 +referencesfor 0 +erew 0 +crew 0 +ullman 0 +yannakaki 0 +tuesdai 0 +union 0 +find 0 +certifi 0 +write 0 +impli 0 +exist 0 +consensu 0 +base 0 +upon 0 +swap 0 +likelysometh 0 +insid 0 +next 0 +supercomput 0 +asynchron 0 +refer 0 +martel 0 +foc 0 +buss 0 +manuscript 0 +special 0 +content 0 +whim 0 +titl 0 +would 0 +mayb 0 +smpc 0 +start 0 +collect 0 +basic 0 +spend 0 +time 0 +give 0 +cover 0 +term 0 +indic 0 +lookingat 0 +pertain 0 +specif 0 +interconnect 0 +topolog 0 +wewil 0 +situat 0 +cost 0 +access 0 +isnon 0 +uniform 0 +sens 0 +notconsid 0 +particular 0 +prove 0 +theorem 0 +andyou 0 +motiv 0 +practic 0 +consider 0 +goal 0 +indevelop 0 +come 0 +algorithmswhich 0 +conceiv 0 +three 0 +four 0 +set 0 +contain 0 +routin 0 +challeng 0 +goingto 0 +requir 0 +project 0 +happi 0 +student 0 +outsidework 0 +relat 0 +text 0 +introduct 0 +parallelalgorithm 0 +befollow 0 +close 0 +feel 0 +exception 0 +cheap 0 +youcould 0 +probabl 0 +without 0 +purchas 0 +copi 0 +origin 0 +plan 0 +volunt 0 +textwould 0 +progress 0 +fast 0 +volum 0 +artof 0 +program 0 +chose 0 +instead 0 +quit 0 +flexibl 0 +taught 0 +mychoic 0 +influenc 0 +interestingor 0 +uninterest 0 +also 0 +choic 0 +aseith 0 +tradit 0 +work 0 +researchcont 0 +number 0 +open 0 +mind 0 +turninto 0 +research 0 +result 0 +present 0 +half 0 +bake 0 +ideason 0 +provid 0 +other 0 +interest 0 +andenergi 0 +think 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..a0f140e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,31 @@ +page 1 +comput 1 +autumn 1 +home 0 +automata 0 +complex 0 +move 0 +current 0 +quarter 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accuratelyquot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +ofwashington 0 +comment 0 +webmast 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..edf9ae2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,19 @@ +postscript 1 +home 1 +page 1 +beam 1 +quiz 1 +fall 0 +automataautumn 0 +instructor 0 +paul 0 +welcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +exam 0 +final 0 +latex 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..35752992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,105 @@ +midterm 1 +cours 1 +acrobat 1 +format 1 +ruzzo 1 +sieg 1 +organ 1 +syllabu 1 +collabor 1 +fall 1 +comput 1 +complex 1 +larri 1 +nitin 1 +class 1 +mail 1 +last 1 +updat 1 +washington 1 +latex 1 +sourc 1 +postscript 1 +file 1 +provid 1 +legibl 1 +adob 1 +free 1 +viewer 1 +depart 1 +system 1 +page 1 +autumn 1 +automata 0 +tuth 0 +staffnameemailphoneoffic 0 +hour 0 +instructor 0 +sharma 0 +csmw 0 +messag 0 +sent 0 +list 0 +textbook 0 +errata 0 +handout 0 +administrivia 0 +homework 0 +thecours 0 +materi 0 +three 0 +plain 0 +ascii 0 +text 0 +includ 0 +command 0 +simpl 0 +thing 0 +assign 0 +gener 0 +quit 0 +figur 0 +math 0 +stuff 0 +hard 0 +imposs 0 +read 0 +latest 0 +greatest 0 +avail 0 +unix 0 +acroread 0 +perhap 0 +aavail 0 +ghostview 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +print 0 +ghostscriptcan 0 +exampl 0 +web 0 +portion 0 +reprint 0 +adapt 0 +foracadem 0 +nonprofit 0 +purpos 0 +accuratelyquot 0 +duli 0 +credit 0 +copyright 0 +scienc 0 +engin 0 +univers 0 +ofwashington 0 +comment 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..00d4dfe3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,94 @@ +complex 1 +proposit 1 +proof 1 +logic 1 +theorem 1 +satisfi 1 +prove 1 +test 1 +order 1 +issu 1 +well 1 +system 1 +algorithm 1 +first 1 +higher 1 +cours 1 +concentr 1 +search 1 +strategi 1 +theoret 1 +practic 1 +paper 1 +instal 1 +prover 1 +topic 0 +autumn 0 +gener 0 +inform 0 +instructor 0 +paul 0 +beam 0 +meet 0 +time 0 +tuesdai 0 +thursdai 0 +loew 0 +autom 0 +comput 0 +aid 0 +verif 0 +vlsi 0 +andsoftwar 0 +engin 0 +give 0 +attempt 0 +decid 0 +truthof 0 +statement 0 +casea 0 +flip 0 +side 0 +even 0 +us 0 +oftheorem 0 +often 0 +involv 0 +finitedomain 0 +interpret 0 +anywai 0 +consid 0 +varieti 0 +theoremprov 0 +within 0 +good 0 +choic 0 +consider 0 +work 0 +thesequest 0 +complexityand 0 +rel 0 +also 0 +examin 0 +anumb 0 +implement 0 +compar 0 +theoryand 0 +thing 0 +urquhart 0 +survei 0 +talk 0 +slide 0 +softwar 0 +amus 0 +sato 0 +andboy 0 +moor 0 +tester 0 +gsat 0 +june 0 +thedirectori 0 +proversther 0 +scatter 0 +process 0 +ofinstal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..a63449b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,38 @@ +comput 1 +home 1 +pagecs 0 +systemperform 0 +modelingspr 0 +host 0 +lazowskaandmaryvernonwelcom 0 +page 0 +system 0 +performancemodel 0 +meet 0 +mondai 0 +wednesdai 0 +fridai 0 +loew 0 +hall 0 +offic 0 +hourstent 0 +topic 0 +schedulecom 0 +goingsassignmentsproject 0 +informationmap 0 +queue 0 +network 0 +solut 0 +packag 0 +emailoth 0 +inform 0 +avail 0 +sigmetr 0 +confer 0 +measur 0 +model 0 +computersystemsuw 0 +depart 0 +scienc 0 +engineeringlazowska 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..80b0d08a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,76 @@ +postscript 1 +simul 1 +alpha 1 +sieg 1 +page 1 +system 1 +egger 1 +washington 1 +offic 1 +hour 1 +redston 1 +cours 1 +inform 1 +architectur 1 +tool 1 +sparc 1 +pentium 1 +uniprocessor 1 +info 1 +home 0 +comput 0 +architecturewint 0 +instructorsusan 0 +tuth 0 +tajoshua 0 +overview 0 +schedul 0 +continu 0 +updat 0 +lectur 0 +note 0 +problem 0 +set 0 +previou 0 +test 0 +histori 0 +specmark 0 +rate 0 +shade 0 +instuct 0 +atom 0 +build 0 +analysi 0 +tullsen 0 +execut 0 +driven 0 +instruct 0 +level 0 +superscalar 0 +close 0 +etch 0 +binari 0 +rewrit 0 +analyz 0 +code 0 +hardwar 0 +monitor 0 +multiflow 0 +compil 0 +pixi 0 +user 0 +manual 0 +dinero 0 +cach 0 +local 0 +machin 0 +powerpc 0 +applic 0 +multiprocessor 0 +spec 0 +benchmark 0 +neat 0 +center 0 +current 0 +futur 0 +processor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..bf700206 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,50 @@ +inform 1 +home 1 +instructor 1 +pighin 1 +offic 1 +hour 1 +thisdocu 1 +mail 1 +april 1 +assign 1 +cours 1 +pagecs 0 +oper 0 +system 0 +hank 0 +levi 0 +spring 0 +freder 0 +meet 0 +time 0 +chateau 0 +confer 0 +room 0 +number 0 +unit 0 +welcom 0 +page 0 +world 0 +wide 0 +short 0 +hypermedia 0 +document 0 +forcs 0 +contain 0 +class 0 +keep 0 +mind 0 +static 0 +especi 0 +classmessag 0 +ad 0 +frequent 0 +problem 0 +send 0 +announc 0 +first 0 +readi 0 +iti 0 +projectlevi 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..eff603e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,78 @@ +inform 1 +home 1 +comput 1 +document 1 +help 1 +scienc 1 +engin 1 +page 1 +class 1 +avail 1 +last 1 +year 1 +project 1 +degre 1 +program 1 +offer 1 +colleg 1 +mosaic 1 +pagecs 0 +graphicsautumn 0 +quarter 0 +welcom 0 +world 0 +wide 0 +hypermedia 0 +whichcontain 0 +wealth 0 +keep 0 +mind 0 +thatthi 0 +static 0 +addedfrequ 0 +problem 0 +send 0 +mail 0 +deros 0 +click 0 +professor 0 +cours 0 +syllabu 0 +lectur 0 +note 0 +written 0 +homework 0 +assign 0 +solut 0 +handout 0 +grade 0 +polici 0 +test 0 +cool 0 +imag 0 +addit 0 +get 0 +instruct 0 +us 0 +indi 0 +mvi 0 +visitor 0 +room 0 +schedul 0 +depart 0 +art 0 +follow 0 +topic 0 +basic 0 +hypertext 0 +markup 0 +languag 0 +html 0 +uniform 0 +resourc 0 +locat 0 +read 0 +usinglynx 0 +charact 0 +base 0 +browser 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..82211352 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,110 @@ +homework 2 +class 2 +begin 2 +fridai 1 +ebel 1 +logic 1 +data 1 +home 1 +page 1 +book 1 +offic 1 +hour 1 +wednesdai 1 +sieg 1 +mondai 1 +design 1 +carl 1 +inform 1 +staff 1 +paul 1 +larri 1 +combin 1 +sequenti 1 +fpga 1 +depart 1 +comput 1 +scienc 1 +engin 1 +sheet 1 +server 1 +washington 1 +principl 0 +digit 0 +system 0 +fall 0 +welcom 0 +cours 0 +time 0 +place 0 +loew 0 +import 0 +announc 0 +summari 0 +syllabu 0 +text 0 +instructor 0 +franklin 0 +thursdai 0 +hine 0 +hineskj 0 +tuesdai 0 +mcmurchi 0 +research 0 +tool 0 +guru 0 +document 0 +simul 0 +synthesi 0 +pamett 0 +board 0 +mostli 0 +complet 0 +still 0 +construct 0 +student 0 +work 0 +groupsfin 0 +exam 0 +review 0 +topic 0 +cover 0 +quarter 0 +assign 0 +note 0 +hand 0 +handout 0 +memori 0 +commun 0 +mother 0 +site 0 +list 0 +vlsi 0 +link 0 +comprehensivelist 0 +icmanufactur 0 +murphi 0 +recent 0 +dilbert 0 +comic 0 +nation 0 +semiconductor 0 +motorola 0 +philip 0 +semiconduct 0 +micron 0 +technolog 0 +copyright 0 +univers 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..bd640668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,50 @@ +sieg 1 +hour 1 +artifici 1 +question 1 +index 0 +pagecs 0 +intelligencefal 0 +quarter 0 +intellig 0 +pose 0 +fundament 0 +andchalleng 0 +comput 0 +scienc 0 +build 0 +intelligentmachin 0 +cours 0 +address 0 +provid 0 +anin 0 +depth 0 +introduct 0 +select 0 +topic 0 +includ 0 +agentarchitectur 0 +knowledg 0 +represent 0 +search 0 +plan 0 +machinelearn 0 +reason 0 +uncertainti 0 +methodolog 0 +staff 0 +weldweld 0 +marc 0 +friedmanfriedman 0 +nick 0 +kushmericknick 0 +outlin 0 +topicsread 0 +assignmentsassign 0 +examsgradingresourcesth 0 +class 0 +mailinglist 0 +also 0 +archiv 0 +past 0 +messag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..3c65cff9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,102 @@ +read 1 +theori 1 +uncertainti 1 +decis 1 +hank 1 +washington 1 +pearl 1 +reason 1 +probabl 1 +make 1 +intellig 1 +offic 1 +mail 1 +class 1 +list 1 +probabilist 1 +sever 1 +nice 1 +refer 1 +book 1 +paper 1 +avail 1 +librari 1 +histor 1 +look 1 +math 1 +summari 1 +artifici 0 +winter 0 +professor 0 +steve 0 +sieg 0 +hour 0 +whenev 0 +around 0 +appoint 0 +email 0 +address 0 +goe 0 +member 0 +send 0 +request 0 +materi 0 +systemsthi 0 +requir 0 +text 0 +chapter 0 +without 0 +bui 0 +strappedfor 0 +cash 0 +though 0 +shafer 0 +uncertain 0 +reasoningthi 0 +collect 0 +foundat 0 +select 0 +copi 0 +grail 0 +jayn 0 +logic 0 +scienc 0 +fragmentari 0 +edit 0 +juli 0 +extrem 0 +interest 0 +technic 0 +foundationsof 0 +statist 0 +definit 0 +worth 0 +perspect 0 +alon 0 +heavi 0 +go 0 +place 0 +beautifulli 0 +written 0 +neapolitan 0 +expert 0 +system 0 +algorithmsa 0 +signific 0 +overlap 0 +good 0 +secondari 0 +sourc 0 +inform 0 +graphic 0 +model 0 +propagationalgorithm 0 +research 0 +arrang 0 +cours 0 +topic 0 +cover 0 +html 0 +postscript 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..78379240 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,271 @@ +cours 2 +khoro 2 +mondai 2 +assign 2 +fridai 2 +home 1 +imag 1 +page 1 +inform 1 +april 1 +class 1 +read 1 +wednesdai 1 +cantata 1 +comput 1 +part 1 +meet 1 +instal 1 +note 1 +next 1 +sun 1 +local 1 +environ 1 +copi 1 +engin 1 +week 1 +march 1 +onlin 1 +help 1 +contain 1 +document 1 +copyright 1 +materi 1 +washington 1 +first 1 +chapter 1 +run 1 +login 1 +file 1 +follow 1 +setenv 1 +khoros_hom 1 +path 1 +also 1 +tutori 1 +outlin 1 +experi 1 +take 1 +articl 1 +make 1 +process 1 +softwar 1 +msvc 1 +final 1 +june 1 +exam 1 +midterm 1 +plan 1 +select 1 +avail 1 +lectur 1 +student 1 +sign 1 +start 1 +applic 1 +account 1 +rene 1 +reed 1 +arrang 1 +sieg 1 +scienc 1 +version 1 +undergradu 1 +menu 1 +pagecs 0 +understandingwelcom 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +send 0 +mail 0 +mock 0 +notic 0 +subjectto 0 +view 0 +public 0 +site 0 +otherthan 0 +univers 0 +doexercis 0 +second 0 +exercis 0 +determin 0 +conveni 0 +torun 0 +aslillith 0 +edit 0 +workstat 0 +containxhost 0 +lilliththen 0 +cshrc 0 +manpath 0 +rlogin 0 +onto 0 +lillith 0 +rhost 0 +assignmentsand 0 +displai 0 +variabl 0 +appropri 0 +typecantata 0 +unix 0 +prompt 0 +machin 0 +georg 0 +haskhoro 0 +wwwhttp 0 +educ 0 +index 0 +htmland 0 +link 0 +itscours 0 +least 0 +twotop 0 +spatial 0 +resolut 0 +ideal 0 +pagesand 0 +anoth 0 +window 0 +noth 0 +turn 0 +third 0 +huerta 0 +andnevatia 0 +cvpr 0 +proceed 0 +tolook 0 +wolff 0 +fourth 0 +comparison 0 +three 0 +fast 0 +oper 0 +level 0 +learn 0 +effort 0 +requir 0 +announc 0 +examin 0 +pmin 0 +regular 0 +room 0 +cover 0 +combinationof 0 +post 0 +list 0 +topic 0 +studi 0 +remind 0 +approv 0 +sundai 0 +time 0 +review 0 +insieg 0 +period 0 +overhead 0 +transpar 0 +onneur 0 +net 0 +librari 0 +center 0 +floor 0 +packet 0 +number 0 +trainabl 0 +classifi 0 +permit 0 +temporari 0 +ofmatlab 0 +requirethat 0 +fill 0 +form 0 +contract 0 +know 0 +interest 0 +term 0 +project 0 +import 0 +ofth 0 +correct 0 +introduc 0 +pentium 0 +laboratori 0 +includingth 0 +develop 0 +evan 0 +mclain 0 +documentexplain 0 +transform 0 +current 0 +statu 0 +recent 0 +get 0 +withkhoro 0 +accompani 0 +pleas 0 +alreadi 0 +accesskhoro 0 +contact 0 +onthursdai 0 +pick 0 +youraccount 0 +name 0 +password 0 +itov 0 +weekend 0 +earli 0 +hour 0 +arelimit 0 +ahead 0 +email 0 +address 0 +andsh 0 +back 0 +offic 0 +kept 0 +lock 0 +either 0 +need 0 +knock 0 +orhav 0 +prior 0 +mani 0 +card 0 +willhav 0 +care 0 +slide 0 +resourc 0 +understand 0 +intro 0 +delft 0 +univ 0 +pattern 0 +recognit 0 +vision 0 +store 0 +thedepart 0 +brochur 0 +brochuremosa 0 +mosaic 0 +find 0 +itemsund 0 +balloon 0 +macmosa 0 +itemund 0 +navig 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..4a7906ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,174 @@ +novemb 2 +class 1 +parallel 1 +octob 1 +topic 1 +algorithm 1 +schedul 1 +begin 1 +tuesdai 1 +term 1 +project 1 +comput 1 +document 1 +machin 1 +neural 1 +network 1 +intel 1 +cours 1 +decemb 1 +home 1 +imag 1 +inform 1 +time 1 +normal 1 +start 1 +meet 1 +guest 1 +speaker 1 +univers 1 +complet 1 +pyramid 1 +embed 1 +overview 1 +architectur 1 +thursdai 1 +week 1 +implement 1 +languag 1 +paragon 1 +supercomput 1 +maspar 1 +copyright 1 +final 1 +exam 1 +pagecs 0 +processingwelcom 0 +page 0 +world 0 +wide 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +theclass 0 +keep 0 +mind 0 +static 0 +newinform 0 +ad 0 +informationon 0 +half 0 +hourearli 0 +dai 0 +prof 0 +nian 0 +simon 0 +fraser 0 +burnabi 0 +canada 0 +bharath 0 +modayur 0 +titl 0 +present 0 +effici 0 +object 0 +recognit 0 +simd 0 +mimd 0 +discuss 0 +scale 0 +invariantoper 0 +segment 0 +hierarchicalrelax 0 +us 0 +isodata 0 +approach 0 +burt 0 +hong 0 +rosenfeld 0 +introduct 0 +virtual 0 +process 0 +mesh 0 +brief 0 +treatment 0 +icon 0 +symbol 0 +analysi 0 +digit 0 +librari 0 +demo 0 +find 0 +topicsdur 0 +student 0 +activelyexplor 0 +written 0 +descript 0 +hand 0 +inon 0 +templat 0 +writeupsi 0 +avail 0 +resourcespvm 0 +virtualmachin 0 +softwar 0 +layear 0 +permit 0 +user 0 +program 0 +aviru 0 +made 0 +heterogen 0 +collect 0 +moreworkst 0 +conveni 0 +studydistribut 0 +technicalpubl 0 +includ 0 +paragonparallel 0 +system 0 +good 0 +arrai 0 +orient 0 +variousvendor 0 +info 0 +onth 0 +nation 0 +center 0 +sweden 0 +onlin 0 +theunivers 0 +tennesse 0 +resourc 0 +found 0 +neal 0 +friedman 0 +report 0 +also 0 +error 0 +correctionsto 0 +note 0 +notic 0 +materi 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +washington 0 +import 0 +part 0 +ofth 0 +review 0 +session 0 +fridai 0 +sieg 0 +hall 0 +wednesdai 0 +room 0 +close 0 +book 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..97a3dbbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,65 @@ +linear 1 +method 1 +april 1 +intro 1 +optim 1 +program 1 +graphic 1 +matrix 1 +comput 1 +brad 1 +equat 1 +quadrat 1 +exampl 1 +paper 1 +finit 1 +seminarc 0 +rspring 0 +numer 0 +definit 0 +properti 0 +invers 0 +solv 0 +system 0 +eric 0 +eigenvalu 0 +eigenvector 0 +singular 0 +valu 0 +decomposit 0 +joel 0 +root 0 +find 0 +nonlinear 0 +corei 0 +shuichi 0 +unconstrain 0 +kari 0 +constrain 0 +global 0 +kevin 0 +chuck 0 +ronen 0 +daniel 0 +data 0 +fit 0 +conclus 0 +mike 0 +regress 0 +calibr 0 +ordinari 0 +differenti 0 +adam 0 +joanna 0 +discret 0 +element 0 +radios 0 +fred 0 +pde 0 +differ 0 +interv 0 +arithmet 0 +troi 0 +jonathan 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..c8bf58a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,45 @@ +tanimoto 1 +autumn 1 +educ 1 +mathemat 1 +experi 1 +imag 1 +process 1 +copyright 1 +washington 1 +topic 1 +quarter 1 +quarterscs 0 +special 0 +topicssteven 0 +instructorcs 0 +transcript 0 +base 0 +winter 0 +spring 0 +technolog 0 +collabor 0 +learn 0 +notic 0 +materi 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +graduat 0 +seminar 0 +explor 0 +varieti 0 +relat 0 +useof 0 +comput 0 +specif 0 +activ 0 +varyfrom 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..62fafb56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,160 @@ +paper 2 +read 2 +novemb 1 +presentor 1 +educ 1 +octob 1 +proceed 1 +html 1 +present 1 +meet 1 +either 1 +home 1 +page 1 +autumn 1 +world 1 +wide 1 +inform 1 +time 1 +copyright 1 +materi 1 +student 1 +discuss 1 +labord 1 +promot 1 +concept 1 +map 1 +first 1 +degre 1 +beyond 1 +brows 1 +possibl 1 +toolkit 1 +layer 1 +ward 1 +transcript 0 +base 0 +wwwwelcom 0 +short 0 +hypermedia 0 +documentfor 0 +contain 0 +theclass 0 +keep 0 +mind 0 +document 0 +static 0 +newinform 0 +ad 0 +notic 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +washington 0 +mccalla 0 +central 0 +importanceof 0 +model 0 +intellig 0 +tutor 0 +sandi 0 +youngquist 0 +paul 0 +barton 0 +davi 0 +aboutinternet 0 +servic 0 +problem 0 +solv 0 +geometri 0 +microworld 0 +tointellig 0 +comput 0 +environ 0 +tessa 0 +bartel 0 +mathematicsconnect 0 +plu 0 +gari 0 +anderson 0 +onlin 0 +combin 0 +vision 0 +littl 0 +technolog 0 +noth 0 +particularli 0 +ambiti 0 +descript 0 +state 0 +second 0 +technic 0 +piec 0 +thethem 0 +learner 0 +take 0 +respons 0 +someth 0 +increasingli 0 +import 0 +futur 0 +choic 0 +third 0 +moresophist 0 +elabor 0 +group 0 +annot 0 +ofwww 0 +describ 0 +intechn 0 +term 0 +internet 0 +infrastructur 0 +couldmak 0 +smart 0 +distribut 0 +tutori 0 +applicationsthat 0 +mosaic 0 +netscap 0 +achiev 0 +pleas 0 +option 0 +advanc 0 +us 0 +webhttp 0 +jeremi 0 +baer 0 +empow 0 +agehttp 0 +ncsa 0 +uiuc 0 +marla 0 +baker 0 +share 0 +comment 0 +soap 0 +trail 0 +line 0 +communitieshttp 0 +john 0 +dietz 0 +enhanc 0 +protocol 0 +lower 0 +serviceshttp 0 +dcewebkit 0 +adam 0 +carlson 0 +hong 0 +zhumeet 0 +michael 0 +aboutcurriculum 0 +navig 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..f2d575af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,78 @@ +autumn 1 +technolog 1 +learn 1 +paper 1 +schedul 1 +home 1 +page 1 +copyright 1 +washington 1 +current 1 +explor 1 +middl 1 +collabor 0 +learningwelcom 0 +notic 0 +materi 0 +cours 0 +subjectto 0 +view 0 +public 0 +instal 0 +site 0 +otherthan 0 +univers 0 +gener 0 +descript 0 +comput 0 +internet 0 +methodologiesfor 0 +teach 0 +come 0 +togeth 0 +innew 0 +wai 0 +seminar 0 +read 0 +number 0 +forcollabor 0 +particip 0 +student 0 +willtak 0 +respons 0 +make 0 +present 0 +group 0 +ofthes 0 +cover 0 +subset 0 +also 0 +possibl 0 +applic 0 +ofai 0 +visual 0 +techniqu 0 +analysi 0 +evid 0 +ofstud 0 +onlin 0 +context 0 +meet 0 +tuesdai 0 +howev 0 +decid 0 +move 0 +time 0 +better 0 +intopeopl 0 +visit 0 +meani 0 +school 0 +schoolmai 0 +depend 0 +interest 0 +participatingstud 0 +last 0 +updat 0 +septemb 0 +tanimoto 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..80564232 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,191 @@ +schedul 2 +system 2 +proc 2 +parallel 2 +processor 2 +alloc 2 +distribut 1 +page 1 +memori 1 +multiprocessor 1 +barbara 1 +santa 1 +conf 1 +multiprogram 1 +sigmetr 1 +share 1 +measur 1 +ipp 1 +workshop 1 +strategi 1 +base 1 +workload 1 +polici 1 +model 1 +demand 1 +mari 1 +arpaci 1 +dusseau 1 +dynam 1 +nguyen 1 +gupta 1 +process 1 +symp 1 +oper 1 +vaswani 1 +zahorjan 1 +us 1 +runtim 1 +tutori 1 +supercomput 1 +ofcomput 1 +eric 1 +parson 1 +kenneth 1 +sevcik 1 +feitelson 1 +mccann 1 +memorymultiprocessor 1 +coschedul 1 +migrat 1 +coordin 1 +mvmv 0 +global 0 +resourc 0 +manag 0 +systemsprofessor 0 +vernontim 0 +pmlocat 0 +gang 0 +now 0 +ousterhout 0 +techniqu 0 +concurr 0 +inrd 0 +vahdat 0 +anderson 0 +patterson 0 +interact 0 +andsequenti 0 +network 0 +workstat 0 +ofth 0 +equi 0 +partit 0 +tucker 0 +control 0 +issuesfor 0 +principl 0 +workloadcharacterist 0 +univ 0 +ofwashington 0 +technic 0 +report 0 +applic 0 +shun 0 +leung 0 +evangelo 0 +markato 0 +thoma 0 +leblanc 0 +affin 0 +loopschedul 0 +expand 0 +version 0 +iniee 0 +tran 0 +han 0 +zima 0 +chapman 0 +compil 0 +ieee 0 +edjlali 0 +agraw 0 +sussman 0 +saltz 0 +data 0 +parallelprogram 0 +adapt 0 +environ 0 +april 0 +comparisonsshikharesh 0 +majumdar 0 +derek 0 +eager 0 +richard 0 +bunt 0 +confer 0 +high 0 +variabilityservic 0 +time 0 +dror 0 +bill 0 +nitzberg 0 +characterist 0 +product 0 +scientif 0 +thenasa 0 +am 0 +ipsc 0 +follow 0 +also 0 +cover 0 +requir 0 +read 0 +leutenegg 0 +vernon 0 +perform 0 +transact 0 +comput 0 +patrick 0 +sobalvarro 0 +william 0 +weihl 0 +ofparallel 0 +job 0 +impact 0 +burger 0 +hyder 0 +miller 0 +wood 0 +tradeoff 0 +rohit 0 +chandra 0 +scott 0 +devin 0 +verghes 0 +anoop 0 +mendel 0 +rosenblum 0 +multiprocessorcomput 0 +server 0 +architectur 0 +support 0 +programminglanguag 0 +asplo 0 +jose 0 +alverson 0 +kahan 0 +korri 0 +smith 0 +tera 0 +octob 0 +discuss 0 +open 0 +problem 0 +culler 0 +effectivedistribut 0 +computersystem 0 +philadelphia 0 +june 0 +appear 0 +rudolph 0 +identif 0 +activ 0 +work 0 +set 0 +program 0 +theoret 0 +result 0 +karlin 0 +paper 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..11d337fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,67 @@ +acrobat 1 +format 1 +syllabu 1 +schedul 1 +winter 1 +ruzzo 1 +last 1 +updat 1 +html 1 +titl 1 +postscript 1 +file 1 +usual 1 +translat 1 +adob 1 +free 1 +viewer 1 +page 1 +algorithm 0 +molecular 0 +biologi 0 +richard 0 +karp 0 +larri 0 +martin 0 +tompaclass 0 +bboard 0 +handout 0 +administr 0 +lectur 0 +note 0 +draft 0 +homework 0 +slide 0 +cours 0 +materi 0 +provid 0 +sever 0 +load 0 +fast 0 +readabl 0 +mani 0 +part 0 +gener 0 +automat 0 +latex 0 +faith 0 +origin 0 +latest 0 +greatest 0 +ghostscript 0 +home 0 +window 0 +linux 0 +time 0 +support 0 +fewer 0 +system 0 +isprefer 0 +smaller 0 +render 0 +isfast 0 +legibl 0 +print 0 +ghostscriptcan 0 +exampl 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..806f5217 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,172 @@ +paper 2 +tuesdai 2 +read 1 +architectur 1 +quarter 1 +week 1 +present 1 +posit 1 +comput 1 +anderson 1 +asplo 1 +line 1 +memori 1 +mail 1 +organ 1 +cseg 1 +lunch 1 +format 1 +previou 1 +ofth 1 +discuss 1 +lead 1 +credit 1 +meet 1 +valu 1 +follow 1 +processor 1 +burger 1 +machin 1 +avail 1 +subscrib 1 +list 1 +lunchcs 0 +lunchcours 0 +jean 0 +loupbaermeet 0 +time 0 +continu 0 +withalmost 0 +year 0 +select 0 +discussedat 0 +begin 0 +distribut 0 +tobe 0 +might 0 +formal 0 +work 0 +progress 0 +byesteem 0 +member 0 +mostlyw 0 +hopefulli 0 +heat 0 +discussionson 0 +literatur 0 +differ 0 +quartersi 0 +start 0 +fromparticip 0 +recent 0 +workshop 0 +oncrit 0 +issu 0 +research 0 +copi 0 +hereread 0 +morethem 0 +mani 0 +thank 0 +ruth 0 +molli 0 +brown 0 +gershoni 0 +matthai 0 +philipos 0 +tabular 0 +summari 0 +guru 0 +found 0 +herefor 0 +usual 0 +thestud 0 +either 0 +informallyor 0 +slide 0 +cours 0 +variabl 0 +ifyou 0 +first 0 +octob 0 +local 0 +load 0 +predict 0 +lipasti 0 +wilkerson 0 +shen 0 +link 0 +advanceprogrami 0 +short 0 +bibliographi 0 +appreci 0 +volunt 0 +thesaulsburi 0 +readashlei 0 +saulsburi 0 +fong 0 +pong 0 +andrea 0 +nowatzyk 0 +miss 0 +wall 0 +case 0 +integr 0 +isca 0 +readm 0 +fillo 0 +keckler 0 +dalli 0 +multicomput 0 +micro 0 +machinelink 0 +readdoug 0 +stefano 0 +kaxira 0 +jame 0 +goodman 0 +datascalar 0 +spsd 0 +execut 0 +model 0 +univers 0 +wisconsin 0 +madison 0 +scienc 0 +depart 0 +technic 0 +report 0 +juli 0 +neton 0 +intellig 0 +iram 0 +chip 0 +rememb 0 +patterson 0 +cardwel 0 +fromm 0 +keeton 0 +kozyraki 0 +thomasand 0 +yelick 0 +availableher 0 +fortun 0 +author 0 +prof 0 +send 0 +email 0 +themajordomo 0 +majordomo 0 +content 0 +shouldinclud 0 +leav 0 +subject 0 +lineblank 0 +shortli 0 +receiv 0 +messag 0 +back 0 +sai 0 +welcom 0 +baer 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..369847a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,13 @@ +home 1 +pagecs 0 +page 0 +spring 0 +offer 0 +experiment 0 +graduat 0 +cours 0 +human 0 +comput 0 +interact 0 +born 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..c66da3b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,127 @@ +week 2 +compil 1 +time 1 +memori 1 +gupta 1 +anderson 1 +gener 1 +special 1 +charl 1 +data 1 +optim 1 +roger 1 +regist 1 +mail 1 +line 1 +seminarcs 0 +seminarcours 0 +organ 0 +susan 0 +eggersand 0 +craigchambersmeet 0 +wednesdai 0 +offici 0 +loew 0 +butreal 0 +meet 0 +second 0 +floor 0 +atrium 0 +scheduleweek 0 +memspi 0 +analyz 0 +system 0 +bottleneck 0 +program 0 +margaretmartonosi 0 +anoop 0 +thoma 0 +approach 0 +applic 0 +consel 0 +francoi 0 +noel 0 +practic 0 +flow 0 +framework 0 +arrai 0 +refer 0 +analysi 0 +itsus 0 +evelyn 0 +duesterwald 0 +rajiv 0 +maryl 0 +soffa 0 +valu 0 +depend 0 +graph 0 +represent 0 +without 0 +taxat 0 +danielweis 0 +crew 0 +michael 0 +ernst 0 +bjarn 0 +steensgaard 0 +litvinov 0 +iter 0 +coalesc 0 +georg 0 +andrew 0 +appel 0 +garrett 0 +machin 0 +specif 0 +hooverand 0 +kenneth 0 +zadeck 0 +dean 0 +grant 0 +paradigm 0 +distribut 0 +multicomput 0 +byprivthviraj 0 +banerje 0 +lewi 0 +minimum 0 +cost 0 +interprocedur 0 +alloc 0 +stevenkurland 0 +fischer 0 +secoski 0 +todd 0 +knoblock 0 +erik 0 +grove 0 +lazi 0 +strength 0 +reduct 0 +jen 0 +knoop 0 +oliv 0 +ruth 0 +andbernhard 0 +steffen 0 +mock 0 +tullsen 0 +subscrib 0 +list 0 +send 0 +email 0 +majordomo 0 +content 0 +includ 0 +subscribecsek 0 +leav 0 +subject 0 +blank 0 +shortlyrec 0 +messag 0 +back 0 +sai 0 +welcom 0 +melodi 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..34ee3a46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,178 @@ +parallel 2 +program 2 +data 2 +compil 2 +analysi 2 +ppopp 1 +optim 1 +applic 1 +commun 1 +ipp 1 +icpp 1 +lcpc 1 +kennedi 1 +distribut 1 +model 1 +quarter 1 +matlab 1 +choi 1 +comput 1 +mail 1 +messag 1 +scalapack 1 +exploit 1 +task 1 +ramaswami 1 +hodg 1 +banerje 1 +sung 1 +cross 1 +loop 1 +reus 1 +cach 1 +cooper 1 +mcintosh 1 +global 1 +chakarabarti 1 +gupta 1 +pldi 1 +integer 1 +perform 1 +environ 1 +adv 1 +input 1 +output 1 +characterist 1 +scalabl 1 +crandal 1 +aydt 1 +chien 1 +reed 1 +strategi 1 +core 1 +bordawekar 1 +choudahari 1 +koelbel 1 +paleczni 1 +local 1 +iter 1 +block 1 +cyclic 1 +midkiff 1 +util 1 +thread 1 +fahring 1 +hain 1 +mehrotra 1 +gener 1 +environmentslarri 0 +snyderautumn 0 +mondai 0 +loew 0 +welcom 0 +home 0 +page 0 +read 0 +select 0 +paper 0 +recent 0 +supercomput 0 +ten 0 +schedul 0 +atmospher 0 +casual 0 +andwil 0 +hopefulli 0 +ignit 0 +live 0 +discuss 0 +everyon 0 +attend 0 +seminar 0 +expect 0 +present 0 +thepap 0 +still 0 +spot 0 +open 0 +hurri 0 +sign 0 +pleas 0 +send 0 +majordomo 0 +subscrib 0 +cseo 0 +bodi 0 +subscribeto 0 +class 0 +list 0 +datepaperpresentor 0 +falcon 0 +interact 0 +restructur 0 +deros 0 +gallivan 0 +gallopoulo 0 +marsolf 0 +padua 0 +portabl 0 +driven 0 +ramkumar 0 +forb 0 +kale 0 +ruth 0 +sean 0 +jason 0 +holidai 0 +stream 0 +librari 0 +complex 0 +structur 0 +gotwal 0 +sriniva 0 +gannon 0 +brad 0 +eric 0 +cilk 0 +effici 0 +multithread 0 +runtim 0 +system 0 +blumof 0 +joerg 0 +kuszmaul 0 +leiserson 0 +randal 0 +zhou 0 +code 0 +object 0 +orient 0 +mathemat 0 +andersson 0 +fritzson 0 +realign 0 +base 0 +kamachi 0 +kusano 0 +suehiro 0 +tamura 0 +sakon 0 +us 0 +access 0 +inform 0 +rinard 0 +tool 0 +rel 0 +debug 0 +develop 0 +larg 0 +numer 0 +abramson 0 +foster 0 +michalak 0 +sosic 0 +potpourri 0 +last 0 +modifi 0 +tuesdai 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..bc0ca38b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,27 @@ +system 1 +seminar 1 +mail 1 +list 1 +autumn 1 +preliminariesif 0 +alreadi 0 +need 0 +variou 0 +crucial 0 +bit 0 +ofinform 0 +week 0 +cancel 0 +besent 0 +send 0 +request 0 +line 0 +subscrib 0 +systemsin 0 +messag 0 +bodi 0 +quarterli 0 +web 0 +spring 0 +summer 0 +winter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..92e7b7f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,70 @@ +present 2 +system 1 +file 1 +meet 1 +quarter 1 +read 1 +paper 1 +oper 1 +memori 1 +perform 1 +summer 0 +quarterw 0 +fridai 0 +loew 0 +wewil 0 +final 0 +appear 0 +upcomingacm 0 +symposium 0 +principl 0 +sosp 0 +pleas 0 +havean 0 +interact 0 +discuss 0 +scheduleoct 0 +implement 0 +global 0 +manag 0 +workstat 0 +cluster 0 +feelei 0 +log 0 +virtual 0 +savag 0 +autoraid 0 +hierarch 0 +storag 0 +wilk 0 +serverless 0 +network 0 +franklin 0 +montgomeri 0 +tiwari 0 +hypervisor 0 +base 0 +fault 0 +toler 0 +chan 0 +philipos 0 +wolman 0 +exploit 0 +weak 0 +connect 0 +mobil 0 +access 0 +voelker 0 +litvinov 0 +cach 0 +coher 0 +stackabl 0 +sriram 0 +fiuczynski 0 +impact 0 +architectur 0 +trend 0 +anderson 0 +romer 0 +return 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..40c015e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,146 @@ +comput 2 +program 2 +scientif 2 +perform 1 +mail 1 +class 1 +list 1 +high 1 +student 1 +us 1 +languag 1 +machin 1 +parallel 1 +supercomput 1 +develop 1 +wednesdai 1 +page 1 +send 1 +majordomo 1 +subscrib 1 +bodi 1 +messag 1 +compil 1 +releas 1 +suitabl 1 +loop 1 +fast 1 +modern 1 +includ 1 +workstat 1 +scientist 1 +follow 1 +write 1 +zphigh 0 +zpllarri 0 +snyder 0 +teamautumn 0 +quarter 0 +sieg 0 +loew 0 +dai 0 +welcom 0 +home 0 +pleas 0 +csezpl 0 +subscribeto 0 +also 0 +interest 0 +join 0 +usersmail 0 +distribut 0 +informationabout 0 +librarai 0 +relatedinform 0 +ad 0 +user 0 +descriptionzpl 0 +scientificprogram 0 +previous 0 +written 0 +infortran 0 +arrai 0 +dramaticallysimplifi 0 +elimin 0 +nuisanc 0 +index 0 +run 0 +allow 0 +programm 0 +code 0 +andtrivi 0 +migrat 0 +largest 0 +simpli 0 +byrecompil 0 +toth 0 +commun 0 +design 0 +engin 0 +want 0 +learn 0 +effect 0 +cover 0 +topic 0 +state 0 +syntax 0 +semant 0 +algorithm 0 +exploit 0 +wysiwyg 0 +easili 0 +well 0 +scienc 0 +faster 0 +prototyp 0 +matlab 0 +text 0 +booknon 0 +reli 0 +materi 0 +document 0 +found 0 +onin 0 +specif 0 +close 0 +zplprogram 0 +guid 0 +version 0 +prerequisitesfamiliar 0 +fortran 0 +ormatlab 0 +unix 0 +platform 0 +assum 0 +variabl 0 +credit 0 +audit 0 +debug 0 +select 0 +technic 0 +disciplin 0 +rang 0 +whole 0 +applic 0 +kernel 0 +inner 0 +informationcours 0 +syllabu 0 +lectur 0 +note 0 +appli 0 +ncsa 0 +block 0 +grant 0 +account 0 +faculti 0 +staff 0 +remotezpl 0 +compileroth 0 +import 0 +link 0 +sung 0 +choi 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..bab0d03b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,127 @@ +class 1 +design 1 +mail 1 +instructor 1 +last 1 +lectur 1 +home 1 +page 1 +gaetano 1 +announc 1 +updat 1 +washington 1 +autumn 1 +quarter 1 +borriello 1 +corei 1 +us 1 +inform 1 +document 1 +send 1 +webmast 1 +administr 1 +cours 1 +final 1 +exam 1 +tool 1 +topic 1 +offic 1 +hour 1 +sieg 1 +logic 1 +katz 1 +benjamin 1 +cum 1 +addison 1 +weslei 1 +maintain 1 +comput 1 +introduct 0 +digit 0 +andersonwelcom 0 +contain 0 +whole 0 +bunch 0 +keep 0 +mind 0 +static 0 +especi 0 +messag 0 +ad 0 +frequent 0 +problem 0 +gener 0 +tocs 0 +notic 0 +system 0 +archiv 0 +messagess 0 +everyon 0 +goal 0 +syllabu 0 +meet 0 +time 0 +mondai 0 +decemb 0 +workload 0 +grade 0 +expect 0 +laboratori 0 +softwar 0 +polici 0 +collabor 0 +cheat 0 +address 0 +overal 0 +schedul 0 +anderson 0 +corin 0 +aweekli 0 +assign 0 +weekli 0 +quizz 0 +onlin 0 +version 0 +slide 0 +textbook 0 +contemporari 0 +author 0 +publish 0 +note 0 +interest 0 +evolut 0 +implement 0 +technolog 0 +aid 0 +synario 0 +feedback 0 +tell 0 +think 0 +thing 0 +go 0 +even 0 +anonym 0 +desir 0 +question 0 +evalu 0 +complet 0 +link 0 +previou 0 +portion 0 +reprint 0 +adapt 0 +academ 0 +nonprofit 0 +purpos 0 +provid 0 +sourc 0 +accur 0 +quot 0 +duli 0 +credit 0 +copyright 0 +depart 0 +scienc 0 +engin 0 +univers 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..894860f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,36 @@ +comput 1 +societi 1 +cours 1 +societycs 0 +societywelcom 0 +home 0 +page 0 +wintercs 0 +focu 0 +social 0 +econom 0 +ethic 0 +legal 0 +implic 0 +present 0 +internet 0 +futur 0 +nation 0 +andglob 0 +inform 0 +highwai 0 +instructor 0 +alan 0 +born 0 +class 0 +time 0 +tue 0 +thur 0 +sieg 0 +syllabusclass 0 +schedulelink 0 +relev 0 +sitesbook 0 +journal 0 +avail 0 +referenceassignmentsassign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..b9c43fea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,50 @@ +sieg 1 +hour 1 +artifici 1 +question 1 +index 0 +pagecs 0 +intelligencefal 0 +quarter 0 +intellig 0 +pose 0 +fundament 0 +andchalleng 0 +comput 0 +scienc 0 +build 0 +intelligentmachin 0 +cours 0 +address 0 +provid 0 +anin 0 +depth 0 +introduct 0 +select 0 +topic 0 +includ 0 +agentarchitectur 0 +knowledg 0 +represent 0 +search 0 +plan 0 +machinelearn 0 +reason 0 +uncertainti 0 +methodolog 0 +staff 0 +weldweld 0 +marc 0 +friedmanfriedman 0 +nick 0 +kushmericknick 0 +outlin 0 +topicsprojectread 0 +assignmentsassign 0 +examsgradingresourcesth 0 +class 0 +mailinglist 0 +also 0 +archiv 0 +past 0 +messag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..d4db7a10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,132 @@ +check 2 +inform 1 +index 1 +page 1 +server 1 +mail 1 +html 1 +glimps 1 +zephyr 1 +us 1 +show 1 +regist 1 +comment 1 +intellig 1 +read 1 +program 1 +link 1 +list 1 +paul 1 +file 1 +rememb 1 +want 1 +first 1 +back 1 +interfac 1 +user 1 +make 1 +anoth 1 +class 1 +provid 1 +filter 1 +ics 0 +internet 0 +meet 0 +tuesdai 0 +siegcreat 0 +side 0 +scriptspleas 0 +guidelin 0 +towrit 0 +execut 0 +someon 0 +follow 0 +tothem 0 +peopl 0 +place 0 +collect 0 +gener 0 +futur 0 +begun 0 +updat 0 +ad 0 +phoenix 0 +impress 0 +older 0 +topic 0 +offici 0 +releas 0 +instal 0 +manual 0 +well 0 +develop 0 +home 0 +interest 0 +detail 0 +work 0 +winter 0 +usenix 0 +paper 0 +design 0 +implement 0 +wide 0 +area 0 +wai 0 +zwhere 0 +mosiac 0 +locat 0 +databas 0 +current 0 +guess 0 +room 0 +version 0 +znol 0 +zwatch 0 +zlocat 0 +extra 0 +info 0 +except 0 +statu 0 +on 0 +anyon 0 +note 0 +lectur 0 +discuss 0 +sent 0 +displai 0 +belief 0 +short 0 +mike 0 +releg 0 +review 0 +site 0 +good 0 +miscellan 0 +rather 0 +rambl 0 +kurt 0 +grumbl 0 +problem 0 +improv 0 +mosaic 0 +bring 0 +luddit 0 +perspect 0 +idea 0 +network 0 +sourc 0 +nick 0 +vagu 0 +relat 0 +decemb 0 +cacm 0 +summari 0 +articl 0 +chang 0 +document 0 +itout 0 +withci 0 +send 0 +theentir 0 +address 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..b11de321 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,35 @@ +system 1 +open 1 +home 0 +page 0 +machin 0 +organ 0 +program 0 +credit 0 +introduct 0 +current 0 +structur 0 +control 0 +commun 0 +memori 0 +processor 0 +devic 0 +project 0 +involv 0 +detail 0 +studi 0 +specif 0 +small 0 +computerhardwar 0 +softwar 0 +prerequisit 0 +consent 0 +instructor 0 +student 0 +taken 0 +freshmen 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..7bbdc0a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,26 @@ +design 1 +comput 1 +structur 1 +memori 1 +page 0 +introduct 0 +architectur 0 +credit 0 +system 0 +compon 0 +processor 0 +instruct 0 +address 0 +control 0 +microprogram 0 +manag 0 +cach 0 +hierarchi 0 +interrupt 0 +prerequisit 0 +andc 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..3be046fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,26 @@ +comput 1 +advanc 1 +architectur 1 +home 0 +page 0 +credit 0 +techniqu 0 +design 0 +parallel 0 +process 0 +andpipelin 0 +multiprocessor 0 +multi 0 +network 0 +high 0 +performancemachin 0 +special 0 +purpos 0 +processor 0 +data 0 +flow 0 +prerequisit 0 +semesterli 0 +cours 0 +inform 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..488603d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,31 @@ +parallel 1 +machin 1 +home 0 +page 0 +advanc 0 +comput 0 +architectur 0 +credit 0 +algorithm 0 +principl 0 +detect 0 +vectorizingcompil 0 +interconnect 0 +network 0 +simd 0 +mimd 0 +processorsynchron 0 +data 0 +coher 0 +multi 0 +dataflow 0 +special 0 +purposeprocessor 0 +prerequisit 0 +consent 0 +instructor 0 +semesterli 0 +cours 0 +inform 0 +info 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..a4dd5588 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,243 @@ +assign 2 +class 2 +problem 2 +week 2 +februari 2 +avail 2 +section 2 +final 2 +quizz 2 +us 2 +program 1 +lectur 1 +oper 1 +system 1 +set 1 +wednesdai 1 +schedul 1 +late 1 +april 1 +spring 1 +grade 1 +read 1 +cours 1 +discuss 1 +learn 1 +dai 1 +march 1 +introduct 1 +bart 1 +answer 1 +note 1 +wisc 1 +eduoffic 1 +csphone 1 +offic 1 +hour 1 +page 1 +comput 1 +exam 1 +quiz 1 +last 1 +process 1 +unix 1 +goal 1 +memori 1 +three 1 +work 1 +take 1 +fridai 1 +mondai 1 +need 1 +orient 1 +book 1 +first 1 +tuesdai 1 +thursdai 1 +follow 1 +past 1 +semaphor 1 +messag 1 +simul 1 +semest 1 +hand 1 +group 1 +probabl 1 +solari 1 +workstat 1 +januari 1 +alloc 1 +advanc 1 +topic 1 +univers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +millerc 0 +systemsnew 0 +stufffin 0 +post 0 +readi 0 +print 0 +staffinstructor 0 +milleremail 0 +noonor 0 +appoint 0 +karuna 0 +muthiahemail 0 +muthiah 0 +jonathan 0 +weyersemail 0 +weyer 0 +materialsth 0 +organ 0 +around 0 +thelectur 0 +notesar 0 +textbookmodern 0 +tanenbaum 0 +programmingassign 0 +purchas 0 +copi 0 +ofobject 0 +pohl 0 +whatev 0 +favorit 0 +come 0 +modern 0 +systemsandobject 0 +store 0 +sectionslectur 0 +time 0 +sciencesdiscuss 0 +nolandnot 0 +extra 0 +mainli 0 +recit 0 +materialcov 0 +weekli 0 +occas 0 +import 0 +detail 0 +ofth 0 +homework 0 +make 0 +sure 0 +leav 0 +room 0 +attend 0 +quizzesther 0 +midterm 0 +option 0 +start 0 +second 0 +thediscuss 0 +minut 0 +concurr 0 +monitor 0 +usetrac 0 +activ 0 +real 0 +drive 0 +algorithm 0 +trace 0 +driven 0 +experi 0 +quantit 0 +analyz 0 +written 0 +setsdur 0 +severalwritten 0 +base 0 +turn 0 +though 0 +find 0 +poorli 0 +youdon 0 +problemssolut 0 +theproblem 0 +happi 0 +question 0 +andlook 0 +solut 0 +variou 0 +synchronizationprimit 0 +solv 0 +manag 0 +hardwar 0 +softwar 0 +workassign 0 +date 0 +list 0 +handout 0 +entir 0 +havethre 0 +daysof 0 +credit 0 +differ 0 +eachof 0 +absolut 0 +accept 0 +cannot 0 +assignmentthat 0 +weekof 0 +cheatingprogram 0 +done 0 +partner 0 +independ 0 +cheater 0 +receiv 0 +maximum 0 +penalti 0 +includ 0 +receivingan 0 +mark 0 +transcript 0 +facilitiesw 0 +run 0 +window 0 +student 0 +regist 0 +account 0 +policyif 0 +lowest 0 +drop 0 +averag 0 +beno 0 +break 0 +count 0 +taught 0 +inth 0 +rang 0 +scheduleth 0 +tent 0 +could 0 +chang 0 +overview 0 +processesweek 0 +dispatch 0 +creationweek 0 +cooper 0 +synchronizationweek 0 +semaphoresweek 0 +monitorsweek 0 +deadlocksweek 0 +debug 0 +strategi 0 +dynam 0 +breakweek 0 +relocationweek 0 +segment 0 +tlbsweek 0 +virtual 0 +replac 0 +thrash 0 +devic 0 +filesweek 0 +disk 0 +directoriesweek 0 +protectionweek 0 +secur 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..d3508e98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,82 @@ +bart 1 +week 1 +april 1 +internet 1 +februari 1 +march 1 +miller 1 +honor 1 +spring 1 +januari 1 +seminarunivers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +millerc 0 +seminarinstructor 0 +milleremail 0 +wisc 0 +eduoffic 0 +csphone 0 +offic 0 +hour 0 +wednesdai 0 +fridai 0 +noonor 0 +appoint 0 +lectureslectur 0 +time 0 +mondai 0 +comput 0 +sciencesclass 0 +schedulether 0 +written 0 +assign 0 +class 0 +requir 0 +attendal 0 +lectur 0 +particip 0 +discuss 0 +follow 0 +schedul 0 +mostli 0 +right 0 +could 0 +chang 0 +introduct 0 +overviewweek 0 +larri 0 +landweb 0 +architectur 0 +protocolsweek 0 +client 0 +server 0 +remot 0 +procedur 0 +callsweek 0 +system 0 +securityweek 0 +eric 0 +bach 0 +secur 0 +encryptionweek 0 +breakweek 0 +miron 0 +livni 0 +imag 0 +pictur 0 +netweek 0 +high 0 +perform 0 +file 0 +systemsweek 0 +david 0 +wood 0 +supercomputerweek 0 +laru 0 +javaweek 0 +discussionslast 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..bd011760 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,168 @@ +read 2 +paper 2 +class 2 +discuss 2 +system 1 +topic 1 +oper 1 +write 1 +project 1 +cours 1 +detail 1 +import 1 +first 1 +advanc 1 +fall 1 +give 1 +current 1 +lectur 1 +review 1 +form 1 +group 1 +twice 1 +week 1 +assign 1 +post 1 +particip 1 +comment 1 +design 1 +idea 1 +well 1 +second 1 +final 1 +grade 1 +availbl 1 +tuesdai 1 +thursdai 1 +univers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +departmentc 0 +bart 0 +millerc 0 +systemssummarythi 0 +intend 0 +broad 0 +exposur 0 +advancedoper 0 +protect 0 +secur 0 +memori 0 +manag 0 +kernel 0 +file 0 +synchron 0 +name 0 +distribut 0 +pleas 0 +rest 0 +inform 0 +sheet 0 +carefulli 0 +textther 0 +realli 0 +satisfactori 0 +textbook 0 +graduat 0 +level 0 +operatingsystemsclass 0 +literatur 0 +text 0 +structur 0 +around 0 +journal 0 +articl 0 +andconfer 0 +proceed 0 +abl 0 +purchas 0 +doit 0 +handout 0 +relev 0 +willinstead 0 +adiscuss 0 +major 0 +theme 0 +us 0 +focal 0 +point 0 +classmat 0 +meetonc 0 +especi 0 +part 0 +listaccord 0 +schedul 0 +formula 0 +success 0 +papersindepend 0 +try 0 +identifyth 0 +issu 0 +thepap 0 +discussionsclass 0 +meet 0 +talk 0 +besupport 0 +opinion 0 +will 0 +activ 0 +daili 0 +geta 0 +expect 0 +quietli 0 +listen 0 +beveri 0 +unhappi 0 +papersdur 0 +short 0 +page 0 +andon 0 +longer 0 +paperwil 0 +base 0 +work 0 +understood 0 +facilityand 0 +extens 0 +area 0 +involv 0 +summaryof 0 +aselect 0 +topicsfrom 0 +choos 0 +good 0 +least 0 +refere 0 +fellowstud 0 +writer 0 +critic 0 +anoth 0 +person 0 +giveth 0 +reader 0 +look 0 +someon 0 +els 0 +revis 0 +pass 0 +examsther 0 +exam 0 +keep 0 +busi 0 +gradesscor 0 +assignmenti 0 +summari 0 +score 0 +fromth 0 +proposalsi 0 +also 0 +gradesar 0 +avail 0 +detailstim 0 +place 0 +csoffic 0 +hour 0 +noonlast 0 +modifi 0 +bybart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..411905fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,146 @@ +comput 2 +fortran 2 +program 2 +solut 2 +click 2 +week 2 +question 2 +home 1 +cours 1 +microsoft 1 +scienc 1 +lectur 1 +bestor 1 +section 1 +cover 1 +student 1 +engin 1 +write 1 +solv 1 +problem 1 +exercis 1 +gareth 1 +us 1 +page 1 +basic 1 +structur 1 +elementari 1 +intend 1 +languag 1 +primarili 1 +instructor 1 +assign 1 +pointer 1 +interest 1 +pleas 1 +class 1 +vectra 1 +window 1 +also 1 +howev 1 +copi 1 +pagec 0 +introduct 0 +programmingsect 0 +credit 0 +need 0 +prepar 0 +prior 0 +experi 0 +requir 0 +knowledg 0 +assum 0 +materi 0 +enabl 0 +simpl 0 +done 0 +receiv 0 +littl 0 +instruct 0 +high 0 +school 0 +taught 0 +entir 0 +major 0 +descript 0 +menu 0 +import 0 +announc 0 +read 0 +grade 0 +polici 0 +syllabu 0 +text 0 +note 0 +psycholog 0 +march 0 +punctual 0 +avoid 0 +disturb 0 +offic 0 +overal 0 +gener 0 +code 0 +though 0 +want 0 +time 0 +algorithm 0 +even 0 +depend 0 +particular 0 +follow 0 +mondai 0 +subroutin 0 +function 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +seven 0 +dai 0 +except 0 +certain 0 +holidai 0 +printer 0 +room 0 +locat 0 +across 0 +hall 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +textbook 0 +work 0 +lab 0 +campu 0 +compil 0 +first 0 +softwar 0 +includ 0 +mail 0 +netscap 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +long 0 +night 0 +copyright 0 +wisc 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..43a14ec2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,265 @@ +week 3 +program 3 +exam 2 +question 2 +click 2 +solut 2 +comput 2 +assign 2 +problem 2 +grade 2 +fortran 2 +class 2 +solv 2 +lectur 2 +home 2 +final 2 +section 1 +mail 1 +languag 1 +scienc 1 +note 1 +offic 1 +term 1 +microsoft 1 +cours 1 +import 1 +text 1 +exercis 1 +pleas 1 +bestor 1 +complet 1 +hand 1 +cover 1 +hour 1 +us 1 +fridai 1 +consult 1 +skill 1 +read 1 +instructor 1 +polici 1 +gareth 1 +must 1 +receiv 1 +approxim 1 +copi 1 +includ 1 +good 1 +long 1 +even 1 +compil 1 +mondai 1 +help 1 +send 1 +write 1 +page 1 +primarili 1 +engin 1 +student 1 +syllabu 1 +pointer 1 +interest 1 +contribut 1 +elig 1 +pass 1 +curv 1 +april 1 +come 1 +abl 1 +work 1 +understand 1 +line 1 +well 1 +though 1 +need 1 +time 1 +seven 1 +error 1 +wednesdai 1 +answer 1 +login 1 +printer 1 +netscap 1 +gener 1 +except 1 +want 1 +first 1 +learn 1 +particular 1 +vectra 1 +window 1 +also 1 +howev 1 +pagec 0 +algebra 0 +programmingsect 0 +taught 0 +entir 0 +intend 0 +major 0 +descript 0 +menu 0 +announc 0 +psycholog 0 +punctual 0 +avoid 0 +disturb 0 +lowest 0 +score 0 +mean 0 +rang 0 +thur 0 +februari 0 +amclick 0 +list 0 +current 0 +identif 0 +tent 0 +follow 0 +topic 0 +semest 0 +relev 0 +anyth 0 +unsur 0 +instead 0 +wait 0 +try 0 +discov 0 +didn 0 +realli 0 +someth 0 +notestext 0 +applic 0 +edit 0 +koffman 0 +friedman 0 +avail 0 +substitut 0 +show 0 +overhead 0 +projector 0 +exampl 0 +addit 0 +board 0 +respons 0 +materi 0 +assignmentsther 0 +three 0 +constitut 0 +ensur 0 +regardless 0 +perform 0 +stai 0 +longer 0 +extra 0 +close 0 +book 0 +bring 0 +pencil 0 +calcul 0 +necessari 0 +attempt 0 +everi 0 +without 0 +automat 0 +zero 0 +risk 0 +fail 0 +gradesheet 0 +handin 0 +directori 0 +onlin 0 +late 0 +academ 0 +misconduct 0 +cheat 0 +specif 0 +pmhow 0 +wear 0 +name 0 +tag 0 +duti 0 +short 0 +messag 0 +syntax 0 +inform 0 +requir 0 +explan 0 +best 0 +normal 0 +dissert 0 +research 0 +modem 0 +therefor 0 +outsid 0 +make 0 +appoint 0 +easili 0 +contact 0 +regularli 0 +exerciseson 0 +distinguish 0 +programm 0 +doesn 0 +matter 0 +familiar 0 +techniqu 0 +weekli 0 +small 0 +trivial 0 +give 0 +look 0 +think 0 +right 0 +step 0 +would 0 +overal 0 +structur 0 +code 0 +algorithm 0 +depend 0 +subroutin 0 +function 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +dai 0 +certain 0 +holidai 0 +room 0 +locat 0 +across 0 +hall 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +textbook 0 +lab 0 +campu 0 +softwar 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +night 0 +copyright 0 +wisc 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..dc2865e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,73 @@ +program 1 +page 1 +fall 1 +section 1 +dave 1 +burnett 1 +wisc 1 +offic 1 +updat 1 +home 1 +algebra 0 +languag 0 +name 0 +egglestonemail 0 +eduoffic 0 +phone 0 +hour 0 +announc 0 +note 0 +origin 0 +output 0 +prog 0 +error 0 +dai 0 +week 0 +correct 0 +valu 0 +inform 0 +exam 0 +question 0 +ask 0 +hourlywork 0 +classread 0 +scan 0 +thursdai 0 +class 0 +avail 0 +solut 0 +quiz 0 +grade 0 +gener 0 +cours 0 +informationc 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuswork 0 +homeclass 0 +handout 0 +gradeshomeworkexam 0 +quizzesmiscellan 0 +archivepolici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +policytextproblem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +eggleston 0 +base 0 +greg 0 +sharp 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..957b4900 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,117 @@ +project 2 +system 2 +paper 2 +discuss 1 +assign 1 +implement 1 +lectur 1 +oper 1 +cours 1 +topic 1 +read 1 +research 1 +us 1 +first 1 +count 1 +spring 1 +involv 1 +propos 1 +class 1 +particip 1 +tuesdai 1 +text 1 +semest 1 +grade 1 +schedul 1 +slide 1 +advanc 0 +summari 0 +intend 0 +give 0 +broad 0 +exposur 0 +advancedoper 0 +import 0 +compon 0 +ofvari 0 +anexperiment 0 +cover 0 +topicsinclud 0 +synchron 0 +commun 0 +memori 0 +manag 0 +file 0 +protect 0 +secur 0 +distribut 0 +requir 0 +tochoos 0 +problem 0 +solut 0 +prototyp 0 +info 0 +relev 0 +current 0 +detail 0 +review 0 +rathera 0 +major 0 +theme 0 +focal 0 +point 0 +activ 0 +strongli 0 +encourag 0 +thursdai 0 +engin 0 +halloffic 0 +hour 0 +appoint 0 +comput 0 +scienc 0 +select 0 +classic 0 +design 0 +purchas 0 +doit 0 +formerli 0 +macc 0 +document 0 +deskfor 0 +differ 0 +previou 0 +pleas 0 +copi 0 +exam 0 +instead 0 +benchmark 0 +suit 0 +measur 0 +performanceof 0 +variou 0 +suno 0 +solari 0 +linux 0 +window 0 +manya 0 +hand 0 +second 0 +final 0 +report 0 +present 0 +total 0 +tent 0 +list 0 +suggest 0 +make 0 +well 0 +either 0 +case 0 +need 0 +come 0 +choos 0 +team 0 +peopl 0 +allow 0 +assig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..9fda7f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,45 @@ +lectur 2 +silva 1 +toni 1 +sidnei 1 +hummert 1 +cours 1 +comput 1 +program 1 +fall 1 +cover 1 +student 1 +materi 1 +fortran 1 +jeff 1 +lampert 1 +michael 1 +birk 1 +russel 1 +man 1 +introduct 0 +scienc 0 +credit 0 +design 0 +basic 0 +programmingstructur 0 +need 0 +prepar 0 +elementaryengin 0 +suffici 0 +enableth 0 +write 0 +simpl 0 +solv 0 +engin 0 +problem 0 +inelementari 0 +essenti 0 +first 0 +half 0 +list 0 +section 0 +martin 0 +reameslast 0 +modifi 0 +anthoni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..4dadbafb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,96 @@ +offic 3 +hour 2 +phone 2 +wisc 2 +name 2 +email 2 +section 2 +grade 2 +site 1 +page 1 +kelli 1 +tsioli 1 +link 1 +home 1 +ratliff 1 +bockrath 1 +rehnuma 1 +ashraf 1 +geeri 1 +jyothi 1 +thano 1 +world 1 +list 1 +maintain 1 +fall 0 +midterm 0 +exam 0 +answer 0 +keyinstructorprofessor 0 +desautelsoffic 0 +comput 0 +sciencesoffic 0 +mondai 0 +wednesdai 0 +appoint 0 +dept 0 +mail 0 +teach 0 +assistantsfollow 0 +nathan 0 +rahman 0 +jaim 0 +fink 0 +jfink 0 +aboulnaga 0 +andrew 0 +jame 0 +herro 0 +jherro 0 +abhinav 0 +gupta 0 +agupta 0 +krothap 0 +chiang 0 +suhui 0 +gradesexplor 0 +compani 0 +whose 0 +softwar 0 +hardwar 0 +borland 0 +hewlett 0 +packard 0 +intel 0 +microsoft 0 +novel 0 +us 0 +explor 0 +lyco 0 +enorm 0 +databas 0 +yahoo 0 +internet 0 +resourc 0 +classifi 0 +categori 0 +lookup 0 +search 0 +virtual 0 +tourist 0 +find 0 +around 0 +click 0 +mother 0 +larg 0 +alphabet 0 +cool 0 +especi 0 +excel 0 +univers 0 +wisconsin 0 +madison 0 +origin 0 +creat 0 +teitelbaum 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..4dadbafb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,96 @@ +offic 3 +hour 2 +phone 2 +wisc 2 +name 2 +email 2 +section 2 +grade 2 +site 1 +page 1 +kelli 1 +tsioli 1 +link 1 +home 1 +ratliff 1 +bockrath 1 +rehnuma 1 +ashraf 1 +geeri 1 +jyothi 1 +thano 1 +world 1 +list 1 +maintain 1 +fall 0 +midterm 0 +exam 0 +answer 0 +keyinstructorprofessor 0 +desautelsoffic 0 +comput 0 +sciencesoffic 0 +mondai 0 +wednesdai 0 +appoint 0 +dept 0 +mail 0 +teach 0 +assistantsfollow 0 +nathan 0 +rahman 0 +jaim 0 +fink 0 +jfink 0 +aboulnaga 0 +andrew 0 +jame 0 +herro 0 +jherro 0 +abhinav 0 +gupta 0 +agupta 0 +krothap 0 +chiang 0 +suhui 0 +gradesexplor 0 +compani 0 +whose 0 +softwar 0 +hardwar 0 +borland 0 +hewlett 0 +packard 0 +intel 0 +microsoft 0 +novel 0 +us 0 +explor 0 +lyco 0 +enorm 0 +databas 0 +yahoo 0 +internet 0 +resourc 0 +classifi 0 +categori 0 +lookup 0 +search 0 +virtual 0 +tourist 0 +find 0 +around 0 +click 0 +mother 0 +larg 0 +alphabet 0 +cool 0 +especi 0 +excel 0 +univers 0 +wisconsin 0 +madison 0 +origin 0 +creat 0 +teitelbaum 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..1dcb94de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,169 @@ +comput 2 +lectur 2 +us 1 +section 1 +system 1 +class 1 +macintosh 1 +part 1 +discuss 1 +program 1 +word 1 +assign 1 +info 1 +salli 1 +mail 1 +wisc 1 +text 1 +click 1 +avail 1 +cours 1 +gener 1 +topic 1 +includ 1 +follow 1 +spreadsheet 1 +databas 1 +oper 1 +aldu 1 +background 1 +quizz 1 +exam 1 +syllabu 1 +computersinstructor 0 +instructor 0 +petersonoffic 0 +sciencephon 0 +slpeter 0 +facstaff 0 +eduoffic 0 +hour 0 +tuesdai 0 +thursdai 0 +appointmentvit 0 +time 0 +place 0 +held 0 +engin 0 +halllectur 0 +inform 0 +technolog 0 +societi 0 +laudon 0 +traver 0 +laudonlab 0 +point 0 +drag 0 +petersoncours 0 +introduct 0 +design 0 +take 0 +zero 0 +knowledg 0 +computersto 0 +crack 0 +shot 0 +user 0 +skill 0 +throughcolleg 0 +arena 0 +taught 0 +macintoshcomput 0 +csuse 0 +compon 0 +term 0 +scienc 0 +work 0 +necessarili 0 +order 0 +applic 0 +processor 0 +graphic 0 +hardwar 0 +input 0 +output 0 +storag 0 +devic 0 +languag 0 +network 0 +telecommun 0 +artifici 0 +intellig 0 +expert 0 +relat 0 +social 0 +issu 0 +laboratori 0 +hand 0 +experienceon 0 +iici 0 +process 0 +electron 0 +newsgroup 0 +world 0 +wide 0 +eudora 0 +netscap 0 +paint 0 +draw 0 +superpaint 0 +chart 0 +excel 0 +filemak 0 +present 0 +manag 0 +hypercard 0 +desktop 0 +publish 0 +pagemak 0 +integr 0 +learn 0 +well 0 +addit 0 +special 0 +tool 0 +scanner 0 +teach 0 +thegoal 0 +provid 0 +high 0 +qualiti 0 +instruct 0 +rich 0 +educationalexperi 0 +namesectiontimedai 0 +bodner 0 +mwnick 0 +leavi 0 +mwtrshannon 0 +lloyd 0 +trtrjeff 0 +reminga 0 +mwfmwira 0 +sharenow 0 +trtrbrian 0 +swander 0 +mwfmwfbrad 0 +thayer 0 +mwfmwfjoe 0 +varghes 0 +trtrgeoff 0 +weinberg 0 +mwftrmaria 0 +yuin 0 +mwfmwrecommend 0 +necessari 0 +grade 0 +base 0 +regular 0 +assignmentsand 0 +glanc 0 +contain 0 +nitti 0 +gritti 0 +detail 0 +superpaintassign 0 +excellast 0 +modifi 0 +octob 0 +jonbodn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..1dcb94de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,169 @@ +comput 2 +lectur 2 +us 1 +section 1 +system 1 +class 1 +macintosh 1 +part 1 +discuss 1 +program 1 +word 1 +assign 1 +info 1 +salli 1 +mail 1 +wisc 1 +text 1 +click 1 +avail 1 +cours 1 +gener 1 +topic 1 +includ 1 +follow 1 +spreadsheet 1 +databas 1 +oper 1 +aldu 1 +background 1 +quizz 1 +exam 1 +syllabu 1 +computersinstructor 0 +instructor 0 +petersonoffic 0 +sciencephon 0 +slpeter 0 +facstaff 0 +eduoffic 0 +hour 0 +tuesdai 0 +thursdai 0 +appointmentvit 0 +time 0 +place 0 +held 0 +engin 0 +halllectur 0 +inform 0 +technolog 0 +societi 0 +laudon 0 +traver 0 +laudonlab 0 +point 0 +drag 0 +petersoncours 0 +introduct 0 +design 0 +take 0 +zero 0 +knowledg 0 +computersto 0 +crack 0 +shot 0 +user 0 +skill 0 +throughcolleg 0 +arena 0 +taught 0 +macintoshcomput 0 +csuse 0 +compon 0 +term 0 +scienc 0 +work 0 +necessarili 0 +order 0 +applic 0 +processor 0 +graphic 0 +hardwar 0 +input 0 +output 0 +storag 0 +devic 0 +languag 0 +network 0 +telecommun 0 +artifici 0 +intellig 0 +expert 0 +relat 0 +social 0 +issu 0 +laboratori 0 +hand 0 +experienceon 0 +iici 0 +process 0 +electron 0 +newsgroup 0 +world 0 +wide 0 +eudora 0 +netscap 0 +paint 0 +draw 0 +superpaint 0 +chart 0 +excel 0 +filemak 0 +present 0 +manag 0 +hypercard 0 +desktop 0 +publish 0 +pagemak 0 +integr 0 +learn 0 +well 0 +addit 0 +special 0 +tool 0 +scanner 0 +teach 0 +thegoal 0 +provid 0 +high 0 +qualiti 0 +instruct 0 +rich 0 +educationalexperi 0 +namesectiontimedai 0 +bodner 0 +mwnick 0 +leavi 0 +mwtrshannon 0 +lloyd 0 +trtrjeff 0 +reminga 0 +mwfmwira 0 +sharenow 0 +trtrbrian 0 +swander 0 +mwfmwfbrad 0 +thayer 0 +mwfmwfjoe 0 +varghes 0 +trtrgeoff 0 +weinberg 0 +mwftrmaria 0 +yuin 0 +mwfmwrecommend 0 +necessari 0 +grade 0 +base 0 +regular 0 +assignmentsand 0 +glanc 0 +contain 0 +nitti 0 +gritti 0 +detail 0 +superpaintassign 0 +excellast 0 +modifi 0 +octob 0 +jonbodn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..44549f6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,64 @@ +section 1 +skrentni 1 +window 1 +home 1 +languag 1 +coordin 1 +email 1 +cours 1 +consult 1 +introduct 1 +inform 1 +borland 1 +pagecomput 0 +scienc 0 +algebra 0 +program 0 +instructorsw 0 +would 0 +like 0 +comment 0 +suggest 0 +complaint 0 +feedback 0 +provid 0 +click 0 +offic 0 +csinform 0 +frequent 0 +ask 0 +question 0 +overview 0 +microcomput 0 +laboratori 0 +fall 0 +schedul 0 +tutor 0 +mainli 0 +polici 0 +academ 0 +misconduct 0 +offer 0 +depart 0 +softwar 0 +microsoft 0 +hint 0 +compil 0 +oper 0 +system 0 +netscap 0 +creat 0 +us 0 +subdirectoriesc 0 +savitch 0 +text 0 +book 0 +integr 0 +develop 0 +environmentfortran 0 +jeff 0 +lampert 0 +page 0 +last 0 +updat 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..0ab5fa36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,38 @@ +languag 1 +advanc 1 +mathemat 1 +cours 0 +infocours 0 +inform 0 +cscours 0 +descriptionfrom 0 +guidebook 0 +undergradu 0 +student 0 +construct 0 +algorithm 0 +problem 0 +solv 0 +instruct 0 +experi 0 +least 0 +procedur 0 +orient 0 +pascal 0 +fortran 0 +survei 0 +program 0 +techniqu 0 +prereq 0 +high 0 +school 0 +prepar 0 +colleg 0 +work 0 +statist 0 +logic 0 +consent 0 +instructor 0 +open 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..44549f6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,64 @@ +section 1 +skrentni 1 +window 1 +home 1 +languag 1 +coordin 1 +email 1 +cours 1 +consult 1 +introduct 1 +inform 1 +borland 1 +pagecomput 0 +scienc 0 +algebra 0 +program 0 +instructorsw 0 +would 0 +like 0 +comment 0 +suggest 0 +complaint 0 +feedback 0 +provid 0 +click 0 +offic 0 +csinform 0 +frequent 0 +ask 0 +question 0 +overview 0 +microcomput 0 +laboratori 0 +fall 0 +schedul 0 +tutor 0 +mainli 0 +polici 0 +academ 0 +misconduct 0 +offer 0 +depart 0 +softwar 0 +microsoft 0 +hint 0 +compil 0 +oper 0 +system 0 +netscap 0 +creat 0 +us 0 +subdirectoriesc 0 +savitch 0 +text 0 +book 0 +integr 0 +develop 0 +environmentfortran 0 +jeff 0 +lampert 0 +page 0 +last 0 +updat 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..6d899f40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,47 @@ +page 1 +inform 1 +includ 1 +check 1 +assign 1 +copi 1 +home 1 +comput 1 +document 1 +postscript 1 +local 1 +servic 1 +problem 0 +solv 0 +us 0 +fall 0 +scienc 0 +follow 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +suggest 0 +explan 0 +grade 0 +polici 0 +work 0 +examin 0 +past 0 +exam 0 +lab 0 +handout 0 +syllabu 0 +mani 0 +need 0 +viewer 0 +obtain 0 +site 0 +section 0 +depart 0 +ghost 0 +directori 0 +read 0 +readm 0 +file 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..6d899f40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,47 @@ +page 1 +inform 1 +includ 1 +check 1 +assign 1 +copi 1 +home 1 +comput 1 +document 1 +postscript 1 +local 1 +servic 1 +problem 0 +solv 0 +us 0 +fall 0 +scienc 0 +follow 0 +instructor 0 +teach 0 +assist 0 +offic 0 +hour 0 +suggest 0 +explan 0 +grade 0 +polici 0 +work 0 +examin 0 +past 0 +exam 0 +lab 0 +handout 0 +syllabu 0 +mani 0 +need 0 +viewer 0 +obtain 0 +site 0 +section 0 +depart 0 +ghost 0 +directori 0 +read 0 +readm 0 +file 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..c5d2d729 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,104 @@ +section 2 +chapter 2 +class 2 +assign 2 +exam 2 +quiz 2 +html 2 +fall 2 +program 2 +offic 2 +hour 2 +fridai 2 +jerri 1 +phone 1 +mail 1 +wisc 1 +karen 1 +homework 1 +cancel 1 +grade 1 +septemb 1 +novemb 1 +octob 1 +spring 1 +cours 1 +instructor 1 +handout 1 +solut 1 +simul 1 +help 1 +lectur 1 +note 1 +miller 1 +tuth 1 +wednesdai 1 +postscript 1 +syllabu 1 +answer 1 +data 1 +arithmet 1 +home 0 +page 0 +inform 0 +frequent 0 +ask 0 +question 0 +tusch 0 +tutsch 0 +execpc 0 +nolandsect 0 +smoler 0 +time 0 +psycholog 0 +sunlung 0 +suen 0 +ssuen 0 +edusridevi 0 +bhamidipati 0 +bsri 0 +edumohammad 0 +asgarian 0 +mondai 0 +schedul 0 +revis 0 +overview 0 +programs 0 +programm 0 +examsal 0 +quizz 0 +open 0 +book 0 +calcul 0 +probabl 0 +decemb 0 +last 0 +previou 0 +format 0 +summer 0 +midterm 0 +final 0 +lookup 0 +graphic 0 +interfac 0 +manual 0 +noteskaren 0 +number 0 +system 0 +represent 0 +integ 0 +float 0 +point 0 +structur 0 +regist 0 +procedur 0 +updat 0 +assembl 0 +updatedmondai 0 +except 0 +process 0 +featur 0 +perform 0 +architecur 0 +case 0 +studi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..d51e27e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,115 @@ +section 3 +chapter 2 +assign 2 +quiz 2 +class 2 +exam 2 +html 2 +program 2 +fall 2 +offic 2 +hour 2 +fridai 2 +jerri 1 +phone 1 +mail 1 +wisc 1 +homework 1 +grade 1 +karen 1 +cancel 1 +septemb 1 +novemb 1 +octob 1 +decemb 1 +final 1 +spring 1 +cours 1 +instructor 1 +handout 1 +solut 1 +simul 1 +help 1 +lectur 1 +note 1 +miller 1 +tuth 1 +wednesdai 1 +postscript 1 +syllabu 1 +answer 1 +data 1 +arithmet 1 +home 0 +page 0 +inform 0 +frequent 0 +ask 0 +question 0 +tusch 0 +tutsch 0 +execpc 0 +nolandsect 0 +smoler 0 +time 0 +psycholog 0 +sunlung 0 +suen 0 +ssuen 0 +edusridevi 0 +bhamidipati 0 +bsri 0 +edumohammad 0 +asgarian 0 +mondai 0 +schedul 0 +revis 0 +overview 0 +programs 0 +programm 0 +programa 0 +programb 0 +examsal 0 +quizz 0 +open 0 +book 0 +calcul 0 +probabl 0 +last 0 +option 0 +thursdai 0 +difficult 0 +cumul 0 +offer 0 +desperateto 0 +rais 0 +sign 0 +advanc 0 +previou 0 +format 0 +summer 0 +midterm 0 +lookup 0 +graphic 0 +interfac 0 +manual 0 +noteskaren 0 +number 0 +system 0 +represent 0 +integ 0 +float 0 +point 0 +structur 0 +regist 0 +procedur 0 +updat 0 +assembl 0 +updatedmondai 0 +except 0 +process 0 +featur 0 +perform 0 +architecur 0 +case 0 +studi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..f66ef76f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,126 @@ +page 2 +lectur 2 +chapter 2 +sampl 2 +code 1 +comput 1 +onlin 1 +offic 1 +announc 1 +assign 1 +exam 1 +search 1 +list 1 +skrentni 1 +wisc 1 +inform 1 +get 1 +program 1 +place 1 +tree 1 +copi 1 +error 1 +basic 1 +scienc 1 +hour 1 +data 1 +read 1 +lab 1 +queue 1 +stack 1 +stale 1 +attend 1 +thur 1 +futur 1 +tabl 1 +skip 1 +sort 1 +algorithm 1 +structur 0 +lec 0 +introduct 0 +structureslectur 0 +psychologylectur 0 +psychologycours 0 +start 0 +help 0 +cours 0 +materi 0 +home 0 +gener 0 +recent 0 +first 0 +problem 0 +found 0 +locat 0 +binari 0 +last 0 +makeup 0 +done 0 +solut 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +topic 0 +handin 0 +directori 0 +creat 0 +common 0 +suggest 0 +addit 0 +welcom 0 +either 0 +vega 0 +work 0 +line 0 +mondai 0 +wednesdai 0 +magic 0 +number 0 +sourc 0 +file 0 +must 0 +abl 0 +compil 0 +otherwis 0 +unusu 0 +look 0 +forget 0 +reload 0 +updat 0 +browser 0 +cach 0 +becom 0 +outdat 0 +unix 0 +tutori 0 +need 0 +time 0 +balanc 0 +discuss 0 +comparison 0 +implement 0 +simul 0 +overload 0 +oper 0 +hash 0 +link 0 +pointer 0 +dynam 0 +memori 0 +alloc 0 +analysi 0 +recurs 0 +focu 0 +appendix 0 +teach 0 +assist 0 +baicheng 0 +billi 0 +liao 0 +bail 0 +cheng 0 +jiacheng 0 +pmcopyright 0 +jame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..b090c0b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,548 @@ +program 3 +assign 2 +cours 2 +lectur 2 +exam 2 +note 2 +comput 2 +inform 2 +data 2 +home 2 +help 2 +document 2 +scienc 2 +time 2 +comment 2 +wisc 1 +offic 1 +hour 1 +text 1 +unix 1 +avail 1 +compil 1 +must 1 +email 1 +yanni 1 +women 1 +also 1 +take 1 +problem 1 +includ 1 +follow 1 +dewitt 1 +semest 1 +read 1 +machin 1 +possibl 1 +code 1 +name 1 +address 1 +grade 1 +late 1 +style 1 +want 1 +student 1 +materi 1 +often 1 +account 1 +final 1 +gener 1 +class 1 +algorithm 1 +clariti 1 +outputfil 1 +tuesdai 1 +schedul 1 +intern 1 +us 1 +develop 1 +cycl 1 +section 1 +print 1 +group 1 +encourag 1 +major 1 +like 1 +extra 1 +mail 1 +true 1 +wall 1 +well 1 +cover 1 +first 1 +debug 1 +function 1 +done 1 +copi 1 +turn 1 +make 1 +chang 1 +avoid 1 +need 1 +understand 1 +work 1 +effici 1 +paramet 1 +limit 1 +identifi 1 +statement 1 +line 1 +continu 1 +variabl 1 +introduct 1 +page 1 +http 1 +languag 1 +cheat 1 +extern 1 +readi 1 +notat 1 +binari 1 +graduat 1 +call 1 +would 1 +suzan 1 +write 1 +thiscours 1 +pascal 1 +requir 1 +book 1 +mirror 1 +complet 1 +still 1 +addit 1 +doit 1 +desk 1 +near 1 +dayton 1 +build 1 +experi 1 +edit 1 +file 1 +find 1 +list 1 +topic 1 +provid 1 +later 1 +design 1 +room 1 +thec 1 +univers 1 +electron 1 +sure 1 +assum 1 +simpli 1 +start 1 +know 1 +earli 1 +explain 1 +correct 1 +behav 1 +situat 1 +test 1 +case 1 +modular 1 +necessari 1 +consist 1 +suggest 1 +valu 1 +indent 1 +long 1 +begin 1 +user 1 +give 1 +descript 1 +tell 1 +assumpt 1 +describ 1 +neg 1 +main 1 +header 1 +declar 1 +segment 1 +error 1 +inputfil 1 +structuresfal 0 +htmlinstructor 0 +ioannidi 0 +sciencesoffic 0 +thursdai 0 +amoffic 0 +phone 0 +html 0 +content 0 +new 0 +teach 0 +assist 0 +polici 0 +newsassign 0 +midterm 0 +statisticssom 0 +interest 0 +statist 0 +median 0 +mean 0 +midterma 0 +sampl 0 +oldmidterm 0 +prepar 0 +ownmidterm 0 +searchth 0 +search 0 +either 0 +open 0 +filemenu 0 +ghostview 0 +window 0 +show 0 +andchoos 0 +menu 0 +item 0 +sciencesom 0 +femal 0 +faculti 0 +undergradu 0 +haveform 0 +wic 0 +oneof 0 +goal 0 +becomecomput 0 +thisclass 0 +talk 0 +someon 0 +incomput 0 +studi 0 +withtheir 0 +classwork 0 +computersci 0 +grad 0 +tomak 0 +appoint 0 +stodder 0 +eduand 0 +grow 0 +tremend 0 +field 0 +theodd 0 +ever 0 +anoth 0 +end 0 +abl 0 +statementi 0 +wide 0 +aniniti 0 +startup 0 +period 0 +product 0 +exceptionsy 0 +textth 0 +isdata 0 +abstract 0 +solv 0 +frank 0 +carrano 0 +isbn 0 +written 0 +separ 0 +notnecessari 0 +alwai 0 +fall 0 +david 0 +actual 0 +consider 0 +simpl 0 +lecturenot 0 +short 0 +isveri 0 +littl 0 +narr 0 +exercis 0 +recommend 0 +sourc 0 +purchas 0 +whichar 0 +street 0 +entranceof 0 +needsom 0 +activ 0 +log 0 +creat 0 +manipul 0 +run 0 +handoutc 0 +notesar 0 +contain 0 +invalu 0 +mention 0 +althoughi 0 +supplement 0 +handout 0 +courseof 0 +nonetheless 0 +respons 0 +base 0 +onth 0 +andth 0 +gradingther 0 +even 0 +five 0 +determin 0 +approxim 0 +equal 0 +weight 0 +programmingassign 0 +count 0 +octob 0 +chemistri 0 +wednesdai 0 +decemb 0 +place 0 +detail 0 +administr 0 +familiar 0 +basic 0 +stuff 0 +apoint 0 +record 0 +equival 0 +madison 0 +prerequisitecours 0 +thesear 0 +floor 0 +prefer 0 +certainrestrict 0 +emailand 0 +thatyou 0 +youwork 0 +provis 0 +download 0 +toyour 0 +runwith 0 +sparcstat 0 +notifi 0 +inassign 0 +hint 0 +allelectron 0 +send 0 +policyno 0 +accept 0 +exactli 0 +order 0 +caus 0 +load 0 +coincid 0 +duedat 0 +sever 0 +right 0 +awai 0 +oneach 0 +thing 0 +certain 0 +wrong 0 +wait 0 +thelast 0 +minut 0 +except 0 +approv 0 +good 0 +excus 0 +troubl 0 +soon 0 +cheatingth 0 +depart 0 +hard 0 +linest 0 +welcom 0 +tocommun 0 +datastructur 0 +butther 0 +share 0 +expect 0 +learn 0 +obei 0 +thecomput 0 +system 0 +policiesgovern 0 +helpif 0 +pleas 0 +policiesif 0 +best 0 +tovisit 0 +along 0 +currenthard 0 +intend 0 +conceptsthat 0 +present 0 +confus 0 +answer 0 +specif 0 +question 0 +reliabl 0 +contact 0 +respond 0 +emailsever 0 +daili 0 +almost 0 +everi 0 +week 0 +gradingprogram 0 +criteria 0 +correctli 0 +normal 0 +typicalinput 0 +state 0 +projectspecif 0 +easi 0 +informationabout 0 +robust 0 +behavior 0 +extrem 0 +unusu 0 +handl 0 +reason 0 +andlog 0 +manner 0 +blow 0 +qualiti 0 +shoulddemonstr 0 +facet 0 +capabl 0 +includingunusu 0 +unnecessarili 0 +ineffici 0 +construct 0 +howev 0 +never 0 +pursu 0 +expens 0 +effect 0 +useof 0 +incorpor 0 +sort 0 +paper 0 +subject 0 +considerationof 0 +arbitrari 0 +bound 0 +size 0 +orcomplex 0 +input 0 +whenev 0 +express 0 +definedconst 0 +easili 0 +numer 0 +liter 0 +appear 0 +thosevalu 0 +styleus 0 +meaning 0 +scheme 0 +convent 0 +variable_nam 0 +function_nam 0 +argument 0 +const 0 +defined_const 0 +enum 0 +enumtyp 0 +classnam 0 +multipl 0 +singl 0 +skip 0 +clear 0 +notesfor 0 +loop 0 +label 0 +meaningfulli 0 +documentationthi 0 +yourprogram 0 +typic 0 +someonewho 0 +superfici 0 +full 0 +format 0 +bug 0 +special 0 +featur 0 +made 0 +posit 0 +aspect 0 +unawar 0 +descriptionne 0 +repeat 0 +briefli 0 +summar 0 +point 0 +refer 0 +thensuffici 0 +appli 0 +documentationther 0 +four 0 +type 0 +structuresshould 0 +purpos 0 +outlin 0 +next 0 +membershould 0 +convei 0 +sname 0 +much 0 +withoutmak 0 +suppli 0 +exampl 0 +index 0 +last 0 +element 0 +ad 0 +stackyou 0 +local 0 +within 0 +tricki 0 +opaqu 0 +beavoid 0 +sometim 0 +commentcan 0 +reader 0 +go 0 +clarifi 0 +level 0 +outlineof 0 +vimani 0 +peopl 0 +thefirst 0 +becomecomfort 0 +particularli 0 +youronli 0 +previou 0 +macpasc 0 +macintosh 0 +strongli 0 +urg 0 +inth 0 +becom 0 +comfort 0 +withunix 0 +pain 0 +wellspent 0 +wish 0 +attend 0 +tutori 0 +held 0 +comp 0 +session 0 +thefollow 0 +dai 0 +tbayou 0 +pick 0 +environ 0 +look 0 +output 0 +break 0 +tire 0 +goto 0 +quit 0 +result 0 +submiss 0 +instruct 0 +given 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..c96b85dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,284 @@ +assign 3 +program 3 +lectur 2 +data 2 +cours 2 +exam 2 +structur 2 +note 2 +text 2 +wisc 1 +cover 1 +also 1 +inform 1 +us 1 +materi 1 +languag 1 +comput 1 +tuesdai 1 +abstract 1 +write 1 +chin 1 +section 1 +grade 1 +follow 1 +even 1 +http 1 +html 1 +fall 1 +offic 1 +hour 1 +teach 1 +cchin 1 +page 1 +book 1 +often 1 +dewitt 1 +electron 1 +final 1 +line 1 +introduct 1 +jame 1 +scienc 1 +laru 1 +fridai 1 +present 1 +concept 1 +detail 1 +build 1 +necessari 1 +determin 1 +requir 1 +type 1 +principl 1 +maintain 1 +place 1 +exercis 1 +tang 1 +sciencesoffic 1 +wednesdai 1 +thursdai 1 +phone 1 +email 1 +address 1 +written 1 +littl 1 +simpl 1 +contain 1 +addit 1 +avail 1 +doit 1 +desk 1 +dayton 1 +first 1 +experi 1 +regularli 1 +read 1 +mail 1 +schedul 1 +topic 1 +second 1 +revis 0 +larusinstructor 0 +laruslaru 0 +amcontentsteach 0 +assistantstextlectur 0 +informationelectron 0 +mailth 0 +languagegradingexamscours 0 +scheduleassign 0 +assignmentscours 0 +objectivesc 0 +object 0 +gener 0 +wide 0 +fundament 0 +block 0 +abl 0 +identifi 0 +situat 0 +select 0 +appropri 0 +reiter 0 +modular 0 +introduc 0 +essenti 0 +clear 0 +correct 0 +softwar 0 +close 0 +connect 0 +strong 0 +emphasi 0 +appli 0 +assistantswei 0 +zhang 0 +assist 0 +forthi 0 +homework 0 +assignmentsand 0 +happi 0 +answer 0 +question 0 +theassign 0 +aspect 0 +give 0 +troubl 0 +zhangoffic 0 +compuer 0 +sundai 0 +weiz 0 +mondai 0 +amoffic 0 +home 0 +textth 0 +problem 0 +solv 0 +wall 0 +mirror 0 +frank 0 +carrano 0 +isbn 0 +well 0 +long 0 +wind 0 +includ 0 +background 0 +separ 0 +alwai 0 +david 0 +complet 0 +short 0 +true 0 +narr 0 +basi 0 +feel 0 +free 0 +skip 0 +portion 0 +want 0 +purchas 0 +document 0 +street 0 +entranc 0 +unix 0 +need 0 +activ 0 +account 0 +log 0 +creat 0 +edit 0 +manipul 0 +file 0 +compil 0 +run 0 +debug 0 +handout 0 +crucial 0 +help 0 +psycholog 0 +mention 0 +attend 0 +strongli 0 +recommend 0 +appear 0 +textbook 0 +needless 0 +respons 0 +base 0 +maili 0 +notifi 0 +student 0 +chang 0 +hint 0 +assum 0 +gradingther 0 +semest 0 +five 0 +approxim 0 +equal 0 +weight 0 +count 0 +taught 0 +must 0 +know 0 +skrentni 0 +larg 0 +complex 0 +unless 0 +difficult 0 +learn 0 +anoth 0 +gdbthere 0 +describ 0 +debugg 0 +chemistri 0 +decemb 0 +rough 0 +outlin 0 +provid 0 +later 0 +administrationbas 0 +stuff 0 +function 0 +pointer 0 +record 0 +dynam 0 +storagelectur 0 +list 0 +binari 0 +search 0 +notat 0 +advanc 0 +listslectur 0 +stackslectur 0 +queueslectur 0 +hashinglectur 0 +recursionlectur 0 +treesbinari 0 +tree 0 +sort 0 +searchlectur 0 +treesgraphslectur 0 +sortinglectur 0 +tbaassign 0 +absolut 0 +turn 0 +index 0 +card 0 +name 0 +login 0 +nameyear 0 +school 0 +freshman 0 +sophomor 0 +previou 0 +coursesprevi 0 +experiencerec 0 +photograph 0 +pictur 0 +birthdai 0 +girl 0 +scout 0 +trip 0 +summer 0 +color 0 +black 0 +white 0 +size 0 +given 0 +without 0 +photo 0 +byte 0 +fora 0 +bound 0 +integ 0 +sequenc 0 +databaseof 0 +score 0 +tenni 0 +tournament 0 +produc 0 +aconcord 0 +hash 0 +tabl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..74d2fd5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,121 @@ +page 2 +lectur 2 +chapter 2 +sampl 2 +comput 1 +offic 1 +announc 1 +assign 1 +exam 1 +code 1 +onlin 1 +list 1 +skrentni 1 +wisc 1 +inform 1 +get 1 +program 1 +place 1 +copi 1 +error 1 +basic 1 +scienc 1 +hour 1 +data 1 +read 1 +lab 1 +queue 1 +stack 1 +stale 1 +attend 1 +thur 1 +skip 1 +sort 1 +search 1 +algorithm 1 +structur 0 +lec 0 +introduct 0 +structureslectur 0 +psychologylectur 0 +psychologycours 0 +start 0 +help 0 +cours 0 +materi 0 +home 0 +gener 0 +recent 0 +first 0 +problem 0 +found 0 +locat 0 +last 0 +makeup 0 +done 0 +solut 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +topic 0 +handin 0 +directori 0 +creat 0 +common 0 +suggest 0 +addit 0 +welcom 0 +either 0 +vega 0 +work 0 +line 0 +mondai 0 +wednesdai 0 +magic 0 +number 0 +sourc 0 +file 0 +must 0 +abl 0 +compil 0 +otherwis 0 +unusu 0 +look 0 +forget 0 +reload 0 +updat 0 +browser 0 +cach 0 +becom 0 +outdat 0 +unix 0 +tutori 0 +need 0 +time 0 +futur 0 +tree 0 +simul 0 +overload 0 +oper 0 +hash 0 +tabl 0 +link 0 +pointer 0 +dynam 0 +memori 0 +alloc 0 +analysi 0 +recurs 0 +focu 0 +appendix 0 +teach 0 +assist 0 +baicheng 0 +billi 0 +liao 0 +bail 0 +cheng 0 +jiacheng 0 +pmcopyright 0 +jame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..1b7e2cf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,55 @@ +comput 1 +offic 1 +page 1 +introduct 1 +scienc 1 +lectur 1 +home 1 +theoret 1 +brian 1 +cole 1 +email 1 +wisc 1 +hour 1 +sundaram 1 +assign 1 +examin 1 +madison 1 +fall 0 +room 0 +mondai 0 +fridai 0 +teach 0 +assist 0 +david 0 +stukel 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +text 0 +languag 0 +theori 0 +john 0 +martin 0 +north 0 +dakota 0 +state 0 +univers 0 +mcgraw 0 +hill 0 +isbn 0 +tent 0 +schedul 0 +includ 0 +exam 0 +inform 0 +clarif 0 +grade 0 +polici 0 +written 0 +term 0 +final 0 +archiv 0 +mail 0 +list 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..ad0f57f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,72 @@ +homework 2 +exam 2 +midterm 2 +octob 2 +stat 1 +final 1 +offic 1 +cours 1 +septemb 1 +march 1 +solut 1 +book 1 +decemb 1 +program 1 +novemb 1 +sampl 1 +also 1 +math 1 +fall 1 +linear 1 +mechan 1 +engin 1 +open 1 +time 1 +date 1 +locat 1 +wednesdai 1 +olvi 1 +mangasarian 1 +comp 1 +mail 1 +wisc 1 +hour 1 +semest 1 +matlab 1 +inform 1 +page 1 +site 1 +programmingfal 0 +schedul 0 +lectur 0 +thursdai 0 +instructor 0 +pphone 0 +teach 0 +assist 0 +telephon 0 +textbook 0 +ferri 0 +preliminari 0 +version 0 +doit 0 +madison 0 +syllabu 0 +overview 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +setup 0 +project 0 +mathemat 0 +home 0 +relev 0 +searchabl 0 +bibliograph 0 +databas 0 +item 0 +link 0 +variou 0 +updat 0 +period 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..38a5712c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,68 @@ +offic 1 +program 1 +lectur 1 +horwitz 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +appoint 1 +rahul 1 +compil 1 +tool 1 +cours 1 +inform 1 +assign 1 +grade 1 +fall 0 +introduct 0 +languag 0 +compilersspr 0 +stori 0 +month 0 +octob 0 +schedul 0 +tuth 0 +comp 0 +stat 0 +recit 0 +psycholog 0 +instructor 0 +susan 0 +tuesdai 0 +fridai 0 +teach 0 +assist 0 +kapoor 0 +mondai 0 +wednesdai 0 +text 0 +reserv 0 +wendt 0 +librari 0 +principl 0 +techniqu 0 +sethi 0 +ullman 0 +craft 0 +fischer 0 +leblanc 0 +check 0 +regularli 0 +gener 0 +overview 0 +date 0 +exam 0 +includ 0 +late 0 +polici 0 +get 0 +start 0 +read 0 +homework 0 +examin 0 +note 0 +us 0 +email 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..ad281686 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,526 @@ +java 3 +project 3 +sept 2 +avail 2 +file 2 +program 2 +schedul 2 +languag 2 +introduct 2 +system 2 +comput 2 +specif 2 +implement 2 +process 2 +midterm 2 +exam 2 +deadlock 2 +section 2 +assign 2 +lectur 2 +manag 2 +memori 2 +oper 1 +grade 1 +note 1 +class 1 +page 1 +first 1 +public 1 +cours 1 +synchron 1 +solomon 1 +room 1 +wisc 1 +summari 1 +correct 1 +take 1 +time 1 +differ 1 +semest 1 +discuss 1 +thank 1 +point 1 +threadschedul 1 +start 1 +directori 1 +fork 1 +string 1 +messag 1 +final 1 +disk 1 +refer 1 +us 1 +chapter 1 +updat 1 +detail 1 +also 1 +forproject 1 +avaiabl 1 +avoid 1 +call 1 +place 1 +text 1 +topic 1 +quit 1 +hand 1 +algorithm 1 +theproject 1 +creat 1 +three 1 +graph 1 +graphcontain 1 +number 1 +onlin 1 +student 1 +receiv 1 +makefil 1 +help 1 +seminar 1 +unix 1 +follow 1 +processor 1 +wednesdai 1 +collect 1 +late 1 +dai 1 +monitor 1 +count 1 +fall 1 +offic 1 +sciencesoffic 1 +hour 1 +phone 1 +email 1 +address 1 +new 1 +answer 1 +thursdai 1 +error 1 +other 1 +popular 1 +cover 1 +comp 1 +octob 1 +issu 1 +contain 1 +fix 1 +import 1 +caus 1 +second 1 +sampl 1 +code 1 +sched 1 +later 1 +data 1 +read 1 +peterson 1 +initi 1 +placement 1 +star 1 +philosoph 1 +todd 1 +thejava 1 +tutori 1 +argument 1 +substr 1 +work 1 +sourc 1 +type 1 +make 1 +run 1 +solari 1 +finish 1 +structur 1 +meet 1 +mondai 1 +book 1 +bookstor 1 +begin 1 +depart 1 +workstat 1 +must 1 +cshrc 1 +local 1 +path 1 +back 1 +thur 1 +inform 1 +relat 1 +softwar 1 +prevent 1 +recoveri 1 +main 1 +virtual 1 +swap 1 +devic 1 +protect 1 +secur 1 +tuesdai 1 +includ 1 +thejavaprogram 1 +requir 1 +strongli 1 +languagebi 1 +arnold 1 +gosl 1 +addit 1 +access 1 +scienc 1 +becom 1 +familiar 1 +environ 1 +pair 1 +anyon 1 +caught 1 +garbag 1 +histori 1 +resourc 1 +get 1 +view 1 +semaphor 1 +systemssect 0 +instructormarvin 0 +troffic 0 +tarob 0 +mellencamp 0 +mwfoffic 0 +mellen 0 +watch 0 +space 0 +latest 0 +score 0 +breakdown 0 +distributioni 0 +date 0 +move 0 +typograph 0 +importantli 0 +arraywa 0 +demand 0 +isavail 0 +look 0 +warn 0 +exampl 0 +larg 0 +grain 0 +salt 0 +long 0 +courseus 0 +order 0 +likelyb 0 +determin 0 +presentedin 0 +electron 0 +direct 0 +forprogram 0 +post 0 +procedur 0 +givefork 0 +specificationshould 0 +notifi 0 +show 0 +bug 0 +minor 0 +paragraph 0 +slightli 0 +garbl 0 +jake 0 +dawlei 0 +carr 0 +line 0 +omit 0 +theprogram 0 +detailssect 0 +specifi 0 +correctli 0 +lipe 0 +zhang 0 +test 0 +srccontain 0 +javacontain 0 +definit 0 +classgraphdescrib 0 +petersoncycl 0 +shown 0 +mention 0 +notacycl 0 +petersonacycl 0 +acycl 0 +topolog 0 +central 0 +sharingfork 0 +nine 0 +jenner 0 +typo 0 +maxthink 0 +replac 0 +maxeat 0 +versionha 0 +mistak 0 +version 0 +argumenti 0 +offset 0 +charactersin 0 +franco 0 +tung 0 +chan 0 +occasion 0 +send 0 +urgent 0 +directli 0 +mail 0 +listof 0 +regist 0 +archiv 0 +sent 0 +list 0 +think 0 +sendmail 0 +request 0 +copi 0 +rememb 0 +separ 0 +edit 0 +describ 0 +comment 0 +maketo 0 +compil 0 +compilewithout 0 +simul 0 +preemptiv 0 +multitask 0 +computershav 0 +ad 0 +tutoriali 0 +go 0 +onthread 0 +find 0 +hint 0 +awar 0 +weekli 0 +systemsand 0 +network 0 +checkth 0 +colloquia 0 +theunivers 0 +ajava 0 +afil 0 +name 0 +home 0 +onelin 0 +chang 0 +effect 0 +either 0 +localor 0 +simpli 0 +readi 0 +orient 0 +session 0 +user 0 +forth 0 +tue 0 +csmon 0 +cslast 0 +content 0 +intend 0 +gener 0 +techniqu 0 +usedto 0 +kind 0 +among 0 +beprocess 0 +creation 0 +commun 0 +segment 0 +replacementalgorithm 0 +control 0 +input 0 +output 0 +sciencesand 0 +statisticsdiscuss 0 +psychologyth 0 +option 0 +least 0 +primari 0 +focu 0 +anyquest 0 +regard 0 +rais 0 +thetext 0 +modern 0 +systemsbi 0 +andrew 0 +tanenbaum 0 +prentic 0 +hall 0 +recommend 0 +jame 0 +addison 0 +weslei 0 +lot 0 +materi 0 +fast 0 +tutorialth 0 +specificationjava 0 +documentationwatch 0 +spot 0 +link 0 +five 0 +sparcstat 0 +dialect 0 +unixoper 0 +provid 0 +anycomput 0 +programminglanguag 0 +howev 0 +respons 0 +transfer 0 +requireddata 0 +set 0 +packag 0 +easi 0 +acquaint 0 +exercis 0 +designedto 0 +subsequ 0 +involveprocess 0 +member 0 +feel 0 +free 0 +butyou 0 +share 0 +partner 0 +cheat 0 +vigor 0 +punish 0 +enough 0 +said 0 +dateind 0 +entir 0 +havethre 0 +daysof 0 +credit 0 +eachof 0 +last 0 +choos 0 +sever 0 +favor 0 +congeni 0 +runtim 0 +subscript 0 +null 0 +pointer 0 +uniniti 0 +variabl 0 +except 0 +runtimerath 0 +mysteri 0 +crash 0 +random 0 +behavior 0 +much 0 +easier 0 +char 0 +arrai 0 +storag 0 +extrem 0 +handi 0 +trendi 0 +faster 0 +mani 0 +reason 0 +grow 0 +littl 0 +withth 0 +byproduct 0 +coursewil 0 +knowledg 0 +market 0 +commod 0 +featur 0 +built 0 +particular 0 +wide 0 +withlanguag 0 +level 0 +support 0 +concurr 0 +thread 0 +switch 0 +alwaysa 0 +disloc 0 +fortun 0 +excel 0 +eas 0 +thetransit 0 +amazingli 0 +good 0 +neither 0 +introductori 0 +primer 0 +author 0 +assum 0 +youalreadi 0 +know 0 +manual 0 +although 0 +arefer 0 +manuali 0 +readabl 0 +wayfrom 0 +everyth 0 +need 0 +write 0 +sophisticatedprogram 0 +univers 0 +encourag 0 +gather 0 +varieti 0 +ofoth 0 +togeth 0 +niceonlin 0 +tutorialabout 0 +anda 0 +manualfor 0 +standard 0 +librari 0 +even 0 +statist 0 +timet 0 +decemb 0 +yourgrad 0 +remain 0 +four 0 +tent 0 +check 0 +frequent 0 +learn 0 +javaoct 0 +synchronizationoct 0 +schedulingoct 0 +schedulingdec 0 +systemsdec 0 +bottom 0 +outlin 0 +programm 0 +state 0 +race 0 +condit 0 +bound 0 +buffer 0 +problem 0 +dine 0 +terminolog 0 +detect 0 +critic 0 +short 0 +term 0 +alloc 0 +compact 0 +come 0 +eduthu 0 +copyright 0 +marvin 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..f30b249c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,88 @@ +system 1 +thread 1 +home 1 +introduct 1 +thursdai 1 +discuss 1 +fridai 1 +page 1 +concurr 1 +address 1 +manag 1 +memori 1 +protect 1 +file 1 +java 1 +distribut 1 +fall 0 +pagec 0 +oper 0 +systemsfal 0 +tuesdai 0 +host 0 +maryvernon 0 +instructor 0 +andkarunamuthiah 0 +welcom 0 +note 0 +lectur 0 +beinterchang 0 +follow 0 +date 0 +solut 0 +quiz 0 +assign 0 +offic 0 +hour 0 +email 0 +textbook 0 +read 0 +grade 0 +project 0 +quizz 0 +mail 0 +archiveapproxim 0 +schedul 0 +topicsweek 0 +oftopicsreadingsep 0 +space 0 +processeschapt 0 +cooper 0 +threadschapt 0 +synchron 0 +implement 0 +mutual 0 +exclusioncont 0 +semaphorescont 0 +monitor 0 +summarycont 0 +doct 0 +deadlock 0 +process 0 +schedulingchapt 0 +translat 0 +cach 0 +tlbschapter 0 +demand 0 +virtual 0 +memorycont 0 +review 0 +survei 0 +systemschapt 0 +name 0 +directorieschapt 0 +object 0 +core 0 +methodstbanov 0 +secur 0 +thanksgiv 0 +class 0 +network 0 +remot 0 +procedur 0 +call 0 +chapter 0 +global 0 +reviewchapt 0 +vernon 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..17c8ec84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,60 @@ +assign 2 +midterm 2 +spring 1 +project 1 +fall 1 +offic 1 +hour 1 +exam 1 +tuesdai 1 +answer 1 +cours 1 +inform 1 +select 1 +solut 1 +instructor 1 +handout 1 +david 1 +wednesdai 1 +appoint 1 +mail 1 +wisc 1 +phone 1 +time 1 +thursdai 1 +atkinson 1 +get 1 +start 1 +descript 1 +vhdl 1 +room 1 +lectur 0 +home 0 +page 0 +grade 0 +wood 0 +class 0 +locat 0 +phil 0 +help 0 +mentor 0 +error 0 +check 0 +correct 0 +sampl 0 +code 0 +compil 0 +simul 0 +mentorassign 0 +question 0 +projectthi 0 +section 0 +includ 0 +deadlin 0 +report 0 +demonstr 0 +decemb 0 +examsth 0 +final 0 +previou 0 +endterm 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..0b9779e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,80 @@ +assign 2 +postscript 2 +offic 1 +last 1 +handout 1 +hour 1 +info 1 +updat 1 +mail 1 +databas 1 +manag 1 +system 1 +design 1 +inform 1 +class 1 +fridai 1 +time 1 +solut 1 +chapter 1 +exercis 1 +grade 1 +import 1 +html 1 +year 1 +midterm 1 +sybas 1 +tree 1 +raghu 1 +phone 1 +lectur 1 +xbao 1 +implementationc 0 +implementationcours 0 +version 0 +instead 0 +chang 0 +text 0 +list 0 +pleas 0 +dont 0 +print 0 +first 0 +overview 0 +prerequisit 0 +topic 0 +cover 0 +date 0 +polici 0 +issu 0 +minibas 0 +home 0 +page 0 +check 0 +detail 0 +sampl 0 +us 0 +help 0 +yahoo 0 +entri 0 +resourc 0 +tutori 0 +debugg 0 +languag 0 +construct 0 +experi 0 +assignmentoth 0 +code 0 +convent 0 +instructor 0 +ramakrishnan 0 +discuss 0 +place 0 +ingraham 0 +teach 0 +assist 0 +xuemei 0 +tue 0 +thur 0 +modifi 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..848509c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,47 @@ +lectur 2 +page 1 +discuss 1 +home 1 +know 1 +russel 1 +option 1 +psycholog 1 +time 1 +place 1 +inform 1 +assign 1 +welcom 0 +obvious 0 +construct 0 +semest 0 +progress 0 +addinginform 0 +need 0 +import 0 +thing 0 +class 0 +meetingroom 0 +chang 0 +current 0 +meet 0 +labsfor 0 +fridai 0 +beenmov 0 +still 0 +instructor 0 +jeff 0 +naughton 0 +offic 0 +wednesdai 0 +lab 0 +taught 0 +close 0 +cooper 0 +fact 0 +probabl 0 +exam 0 +gener 0 +minibas 0 +particular 0 +pleas 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..d3215ee1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,33 @@ +homework 1 +cours 1 +mail 1 +offic 1 +phone 1 +wisc 1 +hour 1 +introduct 1 +algorithm 1 +bach 1 +teach 1 +assist 1 +raji 1 +inform 0 +instructor 0 +eric 0 +appt 0 +bill 0 +donaldson 0 +gopalakrishnan 0 +midterm 0 +exam 0 +handout 0 +descript 0 +syllabu 0 +book 0 +reserv 0 +organ 0 +solut 0 +graph 0 +fractal 0 +behaviour 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..1128add0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,112 @@ +network 2 +comput 1 +cours 1 +email 1 +wisc 1 +fall 1 +project 1 +assign 1 +program 1 +postscript 1 +teach 1 +assist 1 +mail 1 +archiv 1 +refer 1 +eduoffic 1 +hour 1 +slide 1 +midterm 1 +advanc 1 +syllabu 1 +version 1 +comp 1 +phone 1 +time 1 +conveni 1 +feel 1 +free 1 +appoint 1 +layer 1 +powerpoint 1 +document 1 +form 1 +exam 1 +socket 1 +interfac 1 +lectur 1 +professor 0 +landweb 0 +introduct 0 +networksintroduct 0 +tabl 0 +content 0 +intern 0 +connect 0 +offer 0 +inform 0 +instructor 0 +select 0 +readingsclick 0 +hereto 0 +latest 0 +text 0 +networkingcours 0 +madisoncours 0 +informationlecturetim 0 +mwfplace 0 +statclass 0 +listinstructor 0 +lawrenc 0 +landweberoffic 0 +statphon 0 +srinivasa 0 +narayananoffic 0 +mondai 0 +wednesdai 0 +teitelbaumoffic 0 +naemail 0 +tuesdai 0 +thursdai 0 +moder 0 +complet 0 +error 0 +warn 0 +code 0 +class 0 +implement 0 +reliabl 0 +adapt 0 +handout 0 +overview 0 +softwar 0 +engin 0 +design 0 +evalu 0 +html 0 +pictur 0 +grade 0 +criteria 0 +gradingmidterm 0 +final 0 +term 0 +prior 0 +option 0 +book 0 +unix 0 +steven 0 +richard 0 +prentic 0 +hall 0 +isbn 0 +garbler 0 +packag 0 +annot 0 +bibliographyread 0 +partial 0 +icmp 0 +ospf 0 +ipng 0 +schedul 0 +spring 0 +review 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..38e3c4e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,21 @@ +homework 1 +fall 1 +solut 1 +note 1 +comput 1 +project 1 +part 1 +home 0 +page 0 +instructor 0 +robert 0 +meyer 0 +wisc 0 +time 0 +place 0 +comp 0 +offic 0 +hour 0 +cours 0 +descript 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..66c3a7d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,92 @@ +program 1 +homework 1 +nonlinear 1 +cours 1 +wisc 1 +offic 1 +convex 1 +optim 1 +differenti 1 +assign 1 +grade 1 +mail 1 +function 1 +criteria 1 +theori 1 +ferri 1 +telephon 1 +hour 1 +wednesdai 1 +text 1 +second 1 +gener 1 +concav 1 +order 1 +examin 1 +page 1 +fall 0 +also 0 +math 0 +stat 0 +applicationsfal 0 +schedul 0 +lectur 0 +list 0 +http 0 +instructor 0 +michael 0 +mondai 0 +tuesdai 0 +teach 0 +assist 0 +thursdai 0 +class 0 +olvi 0 +mangasarian 0 +siam 0 +publish 0 +philadelphia 0 +us 0 +algorithm 0 +bazaraa 0 +sherali 0 +shetti 0 +edit 0 +wilei 0 +york 0 +bertseka 0 +athena 0 +scientif 0 +inform 0 +overview 0 +introduct 0 +linear 0 +inequ 0 +theorem 0 +altern 0 +set 0 +saddlepoint 0 +without 0 +first 0 +dualiti 0 +condit 0 +exact 0 +penalti 0 +augment 0 +lagrangian 0 +gradient 0 +project 0 +book 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +week 0 +midterm 0 +novemb 0 +final 0 +mathemat 0 +home 0 +updat 0 +period 0 +semest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..66c3a7d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,92 @@ +program 1 +homework 1 +nonlinear 1 +cours 1 +wisc 1 +offic 1 +convex 1 +optim 1 +differenti 1 +assign 1 +grade 1 +mail 1 +function 1 +criteria 1 +theori 1 +ferri 1 +telephon 1 +hour 1 +wednesdai 1 +text 1 +second 1 +gener 1 +concav 1 +order 1 +examin 1 +page 1 +fall 0 +also 0 +math 0 +stat 0 +applicationsfal 0 +schedul 0 +lectur 0 +list 0 +http 0 +instructor 0 +michael 0 +mondai 0 +tuesdai 0 +teach 0 +assist 0 +thursdai 0 +class 0 +olvi 0 +mangasarian 0 +siam 0 +publish 0 +philadelphia 0 +us 0 +algorithm 0 +bazaraa 0 +sherali 0 +shetti 0 +edit 0 +wilei 0 +york 0 +bertseka 0 +athena 0 +scientif 0 +inform 0 +overview 0 +introduct 0 +linear 0 +inequ 0 +theorem 0 +altern 0 +set 0 +saddlepoint 0 +without 0 +first 0 +dualiti 0 +condit 0 +exact 0 +penalti 0 +augment 0 +lagrangian 0 +gradient 0 +project 0 +book 0 +reserv 0 +kurt 0 +wendt 0 +librari 0 +week 0 +midterm 0 +novemb 0 +final 0 +mathemat 0 +home 0 +updat 0 +period 0 +semest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..d3c32e77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,59 @@ +comput 1 +html 1 +postscript 1 +mimic 1 +scienc 1 +hour 1 +wisc 1 +home 1 +page 1 +sept 1 +text 1 +softwar 1 +pleas 1 +miron 1 +offic 1 +phone 1 +mail 1 +cychan 1 +system 0 +perform 0 +evalu 0 +model 0 +new 0 +assign 0 +librari 0 +avail 0 +public 0 +cours 0 +inform 0 +lectur 0 +devis 0 +user 0 +manual 0 +print 0 +file 0 +contain 0 +mani 0 +imag 0 +take 0 +least 0 +half 0 +initi 0 +instruct 0 +tutori 0 +onlin 0 +help 0 +qnet 0 +exampl 0 +devc 0 +professor 0 +livni 0 +teach 0 +assist 0 +chee 0 +yong 0 +chan 0 +suggest 0 +comment 0 +send 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..cd25ff30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,78 @@ +student 1 +postscript 1 +find 1 +inform 1 +retriev 1 +cours 1 +resourc 1 +overview 1 +part 1 +class 1 +email 1 +uwisc 0 +technolog 0 +seek 0 +knowledgerichard 0 +belewvisit 0 +professorc 0 +lectur 0 +univ 0 +wisconsin 0 +comput 0 +scienc 0 +departmentfal 0 +thur 0 +acal 0 +room 0 +engrthi 0 +design 0 +interest 0 +understand 0 +aboutth 0 +knowledg 0 +represent 0 +machinelearn 0 +techniqu 0 +underli 0 +much 0 +excit 0 +activ 0 +occur 0 +onth 0 +world 0 +wide 0 +complet 0 +descript 0 +coures 0 +canse 0 +abstract 0 +asyllabu 0 +major 0 +topic 0 +consid 0 +graphic 0 +mapof 0 +thesear 0 +relat 0 +anda 0 +tent 0 +schedul 0 +semesterwil 0 +proce 0 +read 0 +polit 0 +infidel 0 +imag 0 +assign 0 +digest 0 +hypermai 0 +suggest 0 +compos 0 +classrel 0 +minut 0 +taken 0 +last 0 +modifi 0 +belew 0 +wisc 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..d0ec6b13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,132 @@ +chang 2 +matlab 1 +assign 1 +diari 1 +class 1 +introduct 1 +page 1 +materi 1 +well 1 +list 1 +question 1 +comput 1 +also 1 +book 1 +student 1 +numer 1 +last 1 +cours 1 +tent 1 +syllabu 1 +order 1 +score 1 +addit 1 +note 1 +problem 1 +email 1 +concern 1 +mention 1 +name 1 +begin 1 +session 1 +edit 1 +avail 1 +look 1 +access 1 +telnet 1 +unix 1 +method 0 +methodsthi 0 +contain 0 +inform 0 +fall 0 +smile 0 +orderli 0 +mind 0 +cooper 0 +assignmentson 0 +total 0 +midterm 0 +date 0 +slightli 0 +point 0 +residu 0 +error 0 +condit 0 +rick 0 +carl 0 +offic 0 +hour 0 +errata 0 +text 0 +us 0 +updat 0 +sinc 0 +complex 0 +numericalanalysi 0 +algorithm 0 +post 0 +preprint 0 +foremostmathematician 0 +todai 0 +subject 0 +interest 0 +trickytop 0 +least 0 +squar 0 +solut 0 +approxim 0 +time 0 +place 0 +textmai 0 +supplement 0 +byaddit 0 +file 0 +areavail 0 +organ 0 +chapter 0 +awar 0 +though 0 +site 0 +capitallett 0 +sometim 0 +lower 0 +caselett 0 +present 0 +plan 0 +rather 0 +fortran 0 +kermit 0 +sigmon 0 +primer 0 +doit 0 +handout 0 +reaction 0 +winor 0 +machin 0 +overviewcours 0 +answer 0 +word 0 +grade 0 +four 0 +digit 0 +current 0 +conduct 0 +orient 0 +user 0 +andp 0 +relat 0 +linksyou 0 +might 0 +wish 0 +explor 0 +csdepart 0 +home 0 +system 0 +frequent 0 +ask 0 +simpl 0 +tutori 0 +advanc 0 +referenceviva 0 +good 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..c7fba341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,55 @@ +numer 1 +function 1 +page 1 +class 1 +email 1 +relat 1 +analysi 0 +last 0 +chang 0 +analysisthi 0 +contain 0 +inform 0 +fall 0 +version 0 +math 0 +current 0 +note 0 +avail 0 +follow 0 +directori 0 +well 0 +hard 0 +copi 0 +doit 0 +recent 0 +announc 0 +post 0 +grade 0 +time 0 +locat 0 +statlectur 0 +carl 0 +boor 0 +deboor 0 +wisc 0 +offic 0 +hour 0 +stat 0 +line 0 +classnot 0 +viii 0 +index 0 +assign 0 +none 0 +concern 0 +homework 0 +cours 0 +question 0 +linksyou 0 +might 0 +wish 0 +explor 0 +depart 0 +home 0 +courseoff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..eff8261d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,13 @@ +approxim 1 +cours 1 +theori 0 +last 0 +chang 0 +theorythi 0 +page 0 +contain 0 +inform 0 +spring 0 +version 0 +math 0 +note 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..afd5b04b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,65 @@ +program 1 +silva 1 +comput 1 +inform 1 +tuesdai 1 +solut 1 +toni 1 +dsilva 1 +wisc 1 +offic 1 +scienc 1 +statist 1 +savitch 1 +sept 1 +handout 1 +introduct 1 +window 1 +thursdai 1 +quiz 1 +home 0 +page 0 +sectioncsm 0 +instructor 0 +contact 0 +email 0 +phone 0 +hour 0 +appoint 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +section 0 +comp 0 +stat 0 +firstdai 0 +noland 0 +specifi 0 +timet 0 +chamberlin 0 +cours 0 +tent 0 +syllabu 0 +semest 0 +late 0 +polici 0 +grade 0 +criteria 0 +academ 0 +misconduct 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +text 0 +assign 0 +quizz 0 +last 0 +modifi 0 +anthoni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..c9ae5a21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,432 @@ +imag 3 +vision 3 +exam 2 +comput 2 +paper 2 +us 2 +method 2 +homework 2 +process 2 +project 2 +read 2 +doit 2 +chapter 2 +tool 2 +vista 2 +cours 1 +student 1 +test 1 +algorithm 1 +email 1 +format 1 +assign 1 +avail 1 +thin 1 +mosaic 1 +khoro 1 +univers 1 +thursdai 1 +includ 1 +class 1 +result 1 +directori 1 +send 1 +page 1 +program 1 +code 1 +account 1 +printer 1 +print 1 +environ 1 +execut 1 +spring 1 +dyer 1 +wisc 1 +detect 1 +activ 1 +contour 1 +base 1 +novemb 1 +document 1 +handout 1 +window 1 +interact 1 +also 1 +start 1 +softwar 1 +least 1 +shape 1 +larg 1 +hand 1 +might 1 +line 1 +exampl 1 +note 1 +fast 1 +digit 1 +comm 1 +spline 1 +adelson 1 +graphic 1 +laser 1 +locat 1 +system 1 +instructor 1 +chuck 1 +csstelephon 1 +eduoffic 1 +hour 1 +gener 1 +introduct 1 +analysi 1 +segment 1 +modul 1 +three 1 +dimension 1 +scene 1 +inform 1 +shade 1 +depth 1 +stereo 1 +focu 1 +model 1 +grade 1 +requir 1 +select 1 +machin 1 +collect 1 +sold 1 +onlin 1 +enhanc 1 +modif 1 +public 1 +face 1 +rotat 1 +color 1 +free 1 +origin 1 +appli 1 +home 1 +well 1 +skeleton 1 +octob 1 +manual 1 +condit 1 +type 1 +disappear 1 +matrix 1 +block 1 +distanc 1 +four 1 +want 1 +follow 1 +addit 1 +output 1 +ubyt 1 +file 1 +point 1 +applic 1 +delet 1 +parallel 1 +pattern 1 +differ 1 +pyramid 1 +engin 1 +burt 1 +tran 1 +hint 1 +faq 1 +snake 1 +topic 1 +stanford 1 +room 1 +space 1 +sure 1 +list 1 +queue 1 +displai 1 +varieti 1 +imgstar 1 +basic 1 +convers 1 +matlab 1 +numer 1 +databas 1 +examin 1 +link 1 +interest 1 +scienc 1 +hdtv 1 +atsc 1 +demo 1 +visionc 0 +visionfal 0 +dyeroffic 0 +mondai 0 +appointmentteach 0 +assist 0 +bryan 0 +sooffic 0 +wednesdai 0 +fridai 0 +appointmentstud 0 +informationfundament 0 +first 0 +level 0 +edg 0 +featuredetect 0 +principl 0 +defin 0 +forreconstruct 0 +usingtechniqu 0 +asshap 0 +recoveri 0 +andocclud 0 +viewpoint 0 +control 0 +motion 0 +track 0 +object 0 +recognit 0 +schedul 0 +tuesdai 0 +prerequisit 0 +fundament 0 +calculu 0 +probabl 0 +theori 0 +linear 0 +algebra 0 +midterm 0 +particip 0 +syllabu 0 +part 0 +jain 0 +kasturi 0 +schunck 0 +mcgraw 0 +hill 0 +york 0 +readingsfrom 0 +journal 0 +confer 0 +proceed 0 +small 0 +batchessupplementari 0 +sourcesonlin 0 +informationmost 0 +urlhttp 0 +html 0 +date 0 +except 0 +primarili 0 +score 0 +assignmentshomework 0 +histogram 0 +option 0 +make 0 +copi 0 +portrait 0 +contrast 0 +byfirst 0 +crop 0 +around 0 +head 0 +shoulder 0 +final 0 +adjust 0 +theintens 0 +function 0 +editor 0 +thewindow 0 +button 0 +modifi 0 +thing 0 +colorif 0 +wish 0 +found 0 +good 0 +grayscal 0 +transformationsav 0 +andput 0 +whereth 0 +tell 0 +qualit 0 +whatintens 0 +transform 0 +improv 0 +qualityof 0 +overal 0 +photo 0 +board 0 +feel 0 +ownweb 0 +learn 0 +get 0 +correct 0 +chang 0 +instead 0 +prevent 0 +altogeth 0 +count 0 +transit 0 +case 0 +citi 0 +infin 0 +constant 0 +corner 0 +chessboard 0 +center 0 +posit 0 +evalu 0 +experi 0 +convert 0 +vconvert 0 +edit 0 +need 0 +emac 0 +clean 0 +header 0 +contain 0 +right 0 +repn 0 +component_interp 0 +gradient 0 +low_threshold 0 +high_threshold 0 +vlink 0 +vsegedg 0 +approach 0 +determin 0 +direct 0 +index 0 +finger 0 +fact 0 +entir 0 +surround 0 +zhang 0 +suen 0 +wang 0 +comment 0 +comparison 0 +version 0 +laplacian 0 +compact 0 +ieee 0 +multiresolut 0 +produc 0 +kass 0 +witkin 0 +terzopoulo 0 +william 0 +shah 0 +curvatur 0 +estim 0 +understand 0 +decemb 0 +titl 0 +abstract 0 +supplementari 0 +help 0 +done 0 +sparcstat 0 +call 0 +disk 0 +quota 0 +store 0 +compress 0 +other 0 +gzip 0 +howev 0 +order 0 +save 0 +sent 0 +goe 0 +everyon 0 +laserprint 0 +altern 0 +name 0 +shortest 0 +caution 0 +check 0 +job 0 +manner 0 +take 0 +long 0 +consider 0 +oper 0 +invok 0 +unix 0 +like 0 +command 0 +develop 0 +provid 0 +languag 0 +interfac 0 +rapid 0 +prototyp 0 +simpl 0 +cantata 0 +netpbm 0 +toolkit 0 +pbmplu 0 +packag 0 +visual 0 +signal 0 +toolbox 0 +especi 0 +relev 0 +although 0 +access 0 +solut 0 +held 0 +regular 0 +classroom 0 +earli 0 +time 0 +cover 0 +textbook 0 +bring 0 +sheet 0 +side 0 +main 0 +idea 0 +proof 0 +question 0 +ask 0 +highli 0 +recommend 0 +wandel 0 +number 0 +grand 0 +allianc 0 +specif 0 +advanc 0 +televis 0 +committe 0 +postscript 0 +spie 0 +optic 0 +librari 0 +appl 0 +quicktim 0 +product 0 +panoramix 0 +panoram 0 +decfac 0 +talk 0 +synthet 0 +video 0 +rate 0 +virtual 0 +realiti 0 +qbic 0 +miscellan 0 +relat 0 +boston 0 +cardiff 0 +royal 0 +institut 0 +sweden 0 +virginia 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..2ae2ea25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,67 @@ +program 1 +fall 1 +section 1 +dave 1 +dzimm 1 +wisc 1 +offic 1 +home 1 +sharp 1 +algebra 0 +languag 0 +instructor 0 +zimmermannemail 0 +educlass 0 +meet 0 +time 0 +place 0 +nolandoffic 0 +phone 0 +hour 0 +announcementsprogram 0 +wednesdai 0 +octob 0 +readi 0 +fridai 0 +novemb 0 +gener 0 +cours 0 +informationc 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuswork 0 +homeclass 0 +handoutsprogramsexam 0 +quizzeslectur 0 +notesgreg 0 +style 0 +guidegrad 0 +referenc 0 +last 0 +digit 0 +number 0 +quizzesprogramsexam 0 +polici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +policytext 0 +problem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +zimmermann 0 +base 0 +greg 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..ed500fa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,31 @@ +program 1 +cours 1 +linear 1 +method 1 +fall 1 +spring 1 +nonlinear 1 +gener 0 +inform 0 +offer 0 +semest 0 +page 0 +variou 0 +instructor 0 +michael 0 +ferri 0 +mangasarian 0 +graduat 0 +wisconsin 0 +network 0 +flow 0 +integ 0 +theori 0 +algorithm 0 +comput 0 +larg 0 +spars 0 +system 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..c490f5f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,115 @@ +homework 2 +fridai 2 +mondai 1 +cours 1 +unix 1 +network 1 +class 1 +offic 1 +flow 1 +grade 1 +march 1 +mail 1 +wisc 1 +thursdai 1 +linear 1 +program 1 +path 1 +assign 1 +april 1 +cshrc 1 +local 1 +ferri 1 +telephon 1 +hour 1 +wednesdai 1 +text 1 +us 1 +gener 1 +inform 1 +orient 1 +session 1 +first 1 +user 1 +week 1 +room 1 +gam 1 +machin 1 +page 1 +spring 0 +flowsspr 0 +schedul 0 +lectur 0 +list 0 +februari 0 +instructor 0 +michael 0 +teach 0 +assist 0 +leei 0 +tuesdai 0 +requir 0 +ravindra 0 +ahuja 0 +thoma 0 +magnanti 0 +jame 0 +orlin 0 +prentic 0 +hall 0 +chvatal 0 +freeman 0 +optim 0 +bertseka 0 +press 0 +overview 0 +tree 0 +cycl 0 +data 0 +structur 0 +shortest 0 +cost 0 +simplex 0 +method 0 +convex 0 +equilibria 0 +lagrangian 0 +relax 0 +multicommod 0 +applic 0 +prerequisit 0 +knowledg 0 +project 0 +final 0 +examin 0 +close 0 +book 0 +except 0 +sheet 0 +paper 0 +allow 0 +repres 0 +question 0 +comput 0 +time 0 +novic 0 +previous 0 +workstat 0 +held 0 +second 0 +last 0 +minut 0 +introduct 0 +login 0 +access 0 +public 0 +sourc 0 +alter 0 +set 0 +directori 0 +appropri 0 +solari 0 +mathemat 0 +home 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..30546664 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,128 @@ +homework 2 +fridai 2 +mondai 2 +spars 1 +cours 1 +unix 1 +class 1 +offic 1 +grade 1 +march 1 +comput 1 +mail 1 +wisc 1 +thursdai 1 +assign 1 +april 1 +handout 1 +math 1 +method 1 +februari 1 +instructor 1 +ferri 1 +telephon 1 +hour 1 +wednesdai 1 +matrix 1 +second 1 +inform 1 +introduct 1 +orient 1 +session 1 +first 1 +user 1 +week 1 +room 1 +page 1 +spring 0 +also 0 +larg 0 +systemsspr 0 +schedul 0 +lectur 0 +list 0 +michael 0 +teach 0 +assist 0 +leei 0 +tuesdai 0 +recommend 0 +textbook 0 +golub 0 +loan 0 +john 0 +hopkinsunivers 0 +press 0 +edit 0 +direct 0 +matric 0 +duff 0 +erisman 0 +reid 0 +oxford 0 +scienc 0 +public 0 +finit 0 +dimension 0 +vector 0 +space 0 +halmo 0 +springer 0 +verlag 0 +gener 0 +overview 0 +storag 0 +scheme 0 +gaussian 0 +elimin 0 +dens 0 +error 0 +analysi 0 +local 0 +pivot 0 +strategi 0 +modif 0 +iter 0 +linear 0 +solver 0 +least 0 +squar 0 +nonlinear 0 +equat 0 +optim 0 +applic 0 +parallel 0 +techniqu 0 +eigenvalu 0 +eigenvector 0 +prerequisit 0 +consent 0 +project 0 +final 0 +examin 0 +close 0 +book 0 +except 0 +sheet 0 +paper 0 +allow 0 +repres 0 +question 0 +ieee 0 +arithmet 0 +machin 0 +time 0 +novic 0 +previous 0 +us 0 +workstat 0 +held 0 +last 0 +minut 0 +instruct 0 +matlab 0 +mathemat 0 +program 0 +home 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..4f746d33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,56 @@ +offic 1 +fischer 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +lectur 1 +charl 1 +mondai 1 +wednesdai 1 +fridai 1 +appoint 1 +teach 1 +assist 1 +program 1 +weyer 1 +cours 1 +fall 0 +construct 0 +compilersfal 0 +schedul 0 +tuth 0 +csst 0 +instructor 0 +krishna 0 +kunchithapadam 0 +krisna 0 +tuesdai 0 +thursdai 0 +assign 0 +homework 0 +read 0 +class 0 +text 0 +craft 0 +compil 0 +richard 0 +leblanc 0 +benjamin 0 +cum 0 +check 0 +regularli 0 +gener 0 +inform 0 +overview 0 +date 0 +grade 0 +examin 0 +get 0 +start 0 +handout 0 +note 0 +us 0 +tool 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..741b92a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,57 @@ +greg 1 +wisc 1 +fall 1 +section 1 +offic 1 +algebra 0 +languag 0 +program 0 +instructor 0 +sharpemail 0 +eduoffic 0 +phone 0 +hour 0 +appt 0 +grader 0 +krishna 0 +kunchithapadamemail 0 +krisna 0 +edugener 0 +cours 0 +informationc 0 +home 0 +pagecours 0 +objectivesvectra 0 +labc 0 +consultantssyllabuscours 0 +difficultywork 0 +homenewsstartup 0 +informationclass 0 +noteshomeworkexam 0 +quizzesstyl 0 +guideemail 0 +archivepolici 0 +informationemail 0 +policygrad 0 +policyl 0 +policyacadem 0 +misconduct 0 +polici 0 +must 0 +read 0 +textproblem 0 +solv 0 +object 0 +porgrammingwalt 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +pleas 0 +list 0 +known 0 +erratalast 0 +modifi 0 +sharpgreg 0 +http 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..efc42801 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,159 @@ +program 2 +class 2 +prog 2 +name 1 +polici 1 +section 1 +line 1 +need 1 +inform 1 +home 1 +version 1 +also 1 +chang 1 +assign 1 +grade 1 +mean 1 +page 1 +fall 1 +chad 1 +lane 1 +import 1 +tribbl 1 +compil 1 +problem 1 +enumer 1 +type 1 +public 1 +privat 1 +note 1 +call 1 +work 1 +file 1 +everyth 1 +copi 1 +want 1 +us 1 +link 1 +project 1 +directori 1 +consist 1 +sampl 1 +read 1 +question 1 +quizz 1 +test 1 +text 1 +introduct 1 +sharp 1 +algebra 0 +languag 0 +wisc 0 +announc 0 +tue 0 +recogn 0 +outsid 0 +must 0 +declar 0 +insid 0 +result 0 +bump 0 +bottom 0 +shown 0 +definit 0 +randomintinrang 0 +defin 0 +correspond 0 +function 0 +bodi 0 +within 0 +forgot 0 +valu 0 +uppercas 0 +overwrit 0 +obsolet 0 +includ 0 +might 0 +check 0 +help 0 +someth 0 +ad 0 +comment 0 +suggest 0 +throughout 0 +part 0 +updat 0 +descript 0 +pleas 0 +make 0 +sure 0 +discrep 0 +chri 0 +weaver 0 +shouldn 0 +matter 0 +readi 0 +crucial 0 +entir 0 +understand 0 +basic 0 +attempt 0 +earli 0 +start 0 +hard 0 +requir 0 +time 0 +piec 0 +togeth 0 +bring 0 +tuesdai 0 +midterm 0 +freshmen 0 +either 0 +fine 0 +great 0 +thumb 0 +noth 0 +freshman 0 +disregard 0 +stuff 0 +tent 0 +semest 0 +syllabu 0 +handout 0 +prepar 0 +solutionscours 0 +solv 0 +object 0 +walter 0 +savitch 0 +addison 0 +weslei 0 +publish 0 +compani 0 +meet 0 +vleck 0 +administr 0 +late 0 +mail 0 +attend 0 +academ 0 +misconduct 0 +microsoft 0 +window 0 +first 0 +borland 0 +second 0 +vectra 0 +sourc 0 +code 0 +consult 0 +extra 0 +refer 0 +materi 0 +mani 0 +answer 0 +lectur 0 +style 0 +guidelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..38a5712c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,68 @@ +offic 1 +program 1 +lectur 1 +horwitz 1 +telephon 1 +mail 1 +wisc 1 +hour 1 +appoint 1 +rahul 1 +compil 1 +tool 1 +cours 1 +inform 1 +assign 1 +grade 1 +fall 0 +introduct 0 +languag 0 +compilersspr 0 +stori 0 +month 0 +octob 0 +schedul 0 +tuth 0 +comp 0 +stat 0 +recit 0 +psycholog 0 +instructor 0 +susan 0 +tuesdai 0 +fridai 0 +teach 0 +assist 0 +kapoor 0 +mondai 0 +wednesdai 0 +text 0 +reserv 0 +wendt 0 +librari 0 +principl 0 +techniqu 0 +sethi 0 +ullman 0 +craft 0 +fischer 0 +leblanc 0 +check 0 +regularli 0 +gener 0 +overview 0 +date 0 +exam 0 +includ 0 +late 0 +polici 0 +get 0 +start 0 +read 0 +homework 0 +examin 0 +note 0 +us 0 +email 0 +link 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..64f4ebfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,51 @@ +hummert 1 +offic 1 +comput 1 +program 1 +inform 1 +home 1 +scienc 1 +statist 1 +phone 1 +savitch 1 +psych 1 +grade 1 +introduct 1 +window 1 +page 0 +sectionsc 0 +instructor 0 +contact 0 +email 0 +wisc 0 +hour 0 +mondai 0 +thursdai 0 +announc 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +section 0 +cours 0 +handout 0 +tent 0 +syllabu 0 +semest 0 +late 0 +polici 0 +criteria 0 +academ 0 +misconduct 0 +viewgraph 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +text 0 +assign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..c0b84810 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,43 @@ +section 1 +offic 1 +click 1 +hour 1 +mail 1 +jonb 1 +wisc 1 +infoc 0 +info 0 +name 0 +bodnersect 0 +mondai 0 +thursdai 0 +number 0 +comput 0 +scienc 0 +statist 0 +hall 0 +doit 0 +phone 0 +eduher 0 +thing 0 +keep 0 +mind 0 +need 0 +copi 0 +guid 0 +choos 0 +print 0 +file 0 +menu 0 +question 0 +pleas 0 +stop 0 +send 0 +grade 0 +avail 0 +bodner 0 +mound 0 +madison 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..c5152491 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,139 @@ +program 2 +lisp 2 +exam 2 +common 1 +line 1 +cours 1 +kunen 1 +section 1 +inform 1 +avail 1 +directori 1 +introduct 1 +assign 1 +time 1 +topic 1 +us 1 +click 1 +artifici 1 +intellig 1 +coursewil 1 +email 1 +wisc 1 +count 1 +final 1 +late 1 +logic 1 +discuss 1 +search 1 +languag 1 +sinc 1 +probabl 1 +refer 1 +book 1 +recit 1 +last 1 +problem 1 +previou 1 +intro 0 +notic 0 +spring 0 +detail 0 +appear 0 +later 0 +instructor 0 +kunenoffic 0 +stat 0 +buildingtelephon 0 +eduoffic 0 +hour 0 +appoint 0 +grade 0 +four 0 +three 0 +thirdexam 0 +schedul 0 +place 0 +turn 0 +midnight 0 +thedai 0 +loos 0 +cover 0 +follow 0 +order 0 +entir 0 +buti 0 +design 0 +assignmenti 0 +game 0 +plai 0 +prolog 0 +natur 0 +understand 0 +learn 0 +neural 0 +network 0 +deduct 0 +plan 0 +reason 0 +uncertain 0 +knowledg 0 +begin 0 +would 0 +usefulto 0 +supplement 0 +lecturesand 0 +help 0 +within 0 +manypaperback 0 +like 0 +lispcraft 0 +wilenski 0 +anoth 0 +possibl 0 +ansi 0 +graham 0 +code 0 +ultim 0 +steel 0 +edit 0 +page 0 +also 0 +sun 0 +addit 0 +textbook 0 +modern 0 +approach 0 +russel 0 +norvig 0 +class 0 +session 0 +engr 0 +psych 0 +essentiallli 0 +materi 0 +present 0 +answer 0 +question 0 +give 0 +hint 0 +review 0 +usual 0 +minut 0 +teach 0 +attend 0 +differ 0 +lectur 0 +public 0 +alpha 0 +beta 0 +alpha_beta 0 +best 0 +first 0 +astar 0 +fall 0 +postscript 0 +still 0 +older 0 +chang 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..86ddfd6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,86 @@ +chapter 2 +homework 2 +assign 2 +part 1 +architectur 1 +tabl 1 +content 1 +reader 1 +comput 1 +full 1 +paper 1 +doit 1 +solut 1 +note 1 +instruct 1 +parallel 1 +spring 1 +inform 1 +comp 1 +wisc 1 +eduoffic 1 +hour 1 +appoint 1 +lectur 1 +project 1 +give 1 +basic 1 +pipelin 1 +level 1 +class 1 +talk 1 +decemb 1 +fall 0 +advanc 0 +ifal 0 +offer 0 +cours 0 +instructor 0 +mark 0 +hilloffic 0 +statemail 0 +markhil 0 +tuesdai 0 +fridai 0 +shenoffic 0 +statphon 0 +email 0 +mshen 0 +mondai 0 +thursdai 0 +miscellaneawhat 0 +talksread 0 +introduct 0 +perform 0 +cost 0 +set 0 +cach 0 +memori 0 +talluri 0 +hill 0 +input 0 +output 0 +interconnect 0 +process 0 +solutionproject 0 +propos 0 +novemb 0 +report 0 +noonmiscellanea 0 +final 0 +midterm 0 +us 0 +first 0 +edit 0 +hennessi 0 +patterson 0 +qualifi 0 +exam 0 +sourc 0 +hard 0 +question 0 +seminar 0 +wisconsin 0 +group 0 +world 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..b4f155be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,72 @@ +us 1 +exampl 1 +class 1 +offic 1 +home 1 +program 1 +section 1 +mbirk 1 +wisc 1 +comp 1 +assign 1 +grade 1 +dynam 1 +alloc 1 +oper 1 +overload 1 +complex 1 +page 1 +introduct 1 +instructor 0 +michael 0 +birk 0 +email 0 +phone 0 +hour 0 +appoint 0 +administr 0 +inform 0 +text 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +room 0 +time 0 +psycholog 0 +tuesdai 0 +thursdai 0 +vectra 0 +syllabu 0 +comput 0 +standard 0 +late 0 +polici 0 +handin 0 +procedur 0 +cheat 0 +academ 0 +misconduct 0 +consult 0 +string 0 +ration 0 +repres 0 +float 0 +point 0 +number 0 +anoth 0 +intstack 0 +simpl 0 +unlimit 0 +size 0 +data 0 +structur 0 +classinfo 0 +struct 0 +link 0 +microsoft 0 +window 0 +borland 0 +tutori 0 +debugg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..b099d361 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,107 @@ +program 2 +class 2 +us 1 +comp 1 +exampl 1 +offic 1 +assign 1 +room 1 +section 1 +mbirk 1 +wisc 1 +home 1 +avail 1 +number 1 +exam 1 +spring 1 +instruct 1 +syllabu 1 +eight 1 +week 1 +comput 1 +late 1 +polici 1 +grade 1 +dynam 1 +alloc 1 +oper 1 +overload 1 +complex 1 +introduct 1 +instructor 0 +michael 0 +birk 0 +email 0 +phone 0 +hour 0 +appoint 0 +announc 0 +test 0 +case 0 +dice 0 +code 0 +discuss 0 +rank 0 +last 0 +four 0 +digit 0 +student 0 +past 0 +onlin 0 +fall 0 +note 0 +hangman 0 +mondai 0 +octob 0 +lectur 0 +format 0 +first 0 +second 0 +come 0 +soon 0 +print 0 +output 0 +outsid 0 +final 0 +chang 0 +meet 0 +administr 0 +inform 0 +text 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +time 0 +tuesdai 0 +thursdai 0 +vectra 0 +standard 0 +handin 0 +procedur 0 +cheat 0 +academ 0 +misconduct 0 +consult 0 +string 0 +ration 0 +repres 0 +float 0 +point 0 +anoth 0 +intstack 0 +simpl 0 +unlimit 0 +size 0 +data 0 +structur 0 +classinfo 0 +struct 0 +link 0 +page 0 +microsoft 0 +window 0 +borland 0 +tutori 0 +debugg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..4d4a8bb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,80 @@ +section 2 +melski 1 +email 1 +page 1 +info 1 +stuff 1 +assign 1 +comput 1 +help 1 +link 1 +david 1 +offic 1 +wisc 1 +meet 1 +us 1 +archiv 1 +dave 0 +melskithes 0 +chang 0 +frequent 0 +press 0 +reload 0 +button 0 +daili 0 +get 0 +start 0 +alreadi 0 +uncomfort 0 +andth 0 +softwar 0 +instructor 0 +scienc 0 +statist 0 +floor 0 +phone 0 +hour 0 +click 0 +attach 0 +pleas 0 +noland 0 +psycholog 0 +text 0 +problem 0 +solv 0 +object 0 +program 0 +walter 0 +savitch 0 +addison 0 +wesleypublish 0 +gener 0 +window 0 +usingborland 0 +refer 0 +materi 0 +rough 0 +syllabu 0 +polici 0 +academ 0 +misconduct 0 +must 0 +read 0 +rule 0 +thumb 0 +share 0 +code 0 +consult 0 +grade 0 +late 0 +work 0 +check 0 +often 0 +essenti 0 +solut 0 +handout 0 +list 0 +tutor 0 +avail 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..9b407a51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,142 @@ +program 2 +novemb 2 +wednesdai 2 +quiz 2 +octob 2 +class 2 +page 2 +code 2 +bankaccount 2 +solut 2 +score 2 +milo 1 +mondai 1 +home 1 +html 1 +exam 1 +septemb 1 +guid 1 +refer 1 +section 1 +languag 1 +martin 1 +wisc 1 +take 1 +file 1 +us 1 +document 1 +postscript 1 +function 1 +call 1 +fall 1 +time 1 +offic 1 +tuesdai 1 +check 1 +homepag 1 +ad 1 +current 1 +grade 1 +gener 1 +inform 1 +consult 1 +exampl 1 +list 1 +user 1 +input 1 +withprompt 1 +version 1 +standard 1 +fridai 1 +decemb 1 +algebra 0 +instructor 0 +locat 0 +psychologyinstructor 0 +email 0 +eduoffic 0 +hour 0 +thursdai 0 +appoint 0 +phone 0 +announcementsthi 0 +chang 0 +frequent 0 +respons 0 +often 0 +given 0 +todai 0 +place 0 +onth 0 +project 0 +room 0 +test 0 +scheduledfor 0 +updat 0 +haseveryth 0 +pleas 0 +make 0 +sure 0 +isaccur 0 +link 0 +coupl 0 +withinform 0 +linksar 0 +titl 0 +avail 0 +onfridai 0 +remind 0 +sheet 0 +turn 0 +assign 0 +syllabu 0 +style 0 +vectra 0 +schedul 0 +academ 0 +misconduct 0 +policyclass 0 +final 0 +main 0 +struct 0 +minmax 0 +findth 0 +minimum 0 +maximum 0 +number 0 +case 0 +enteredfrom 0 +stdin 0 +form 0 +creat 0 +formlett 0 +data 0 +specifi 0 +theopen_fil 0 +introduc 0 +valu 0 +beginn 0 +introduct 0 +toth 0 +latest 0 +releas 0 +good 0 +viru 0 +hoax 0 +ethic 0 +andprofession 0 +conductassign 0 +survei 0 +questionar 0 +requir 0 +textbook 0 +problem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +known 0 +errata 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..89ec5299 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,89 @@ +program 2 +tuesdai 2 +section 1 +thursdai 1 +april 1 +martin 1 +ream 1 +class 1 +inform 1 +februari 1 +march 1 +exam 1 +list 1 +semest 1 +email 1 +mream 1 +wisc 1 +offic 1 +comput 1 +noland 1 +home 0 +page 0 +classc 0 +algebra 0 +languag 0 +programmingspr 0 +teach 0 +assist 0 +absolut 0 +nothingeverydai 0 +informationc 0 +pagescommon 0 +programmingmistakesarch 0 +mail 0 +messag 0 +sent 0 +calendar 0 +januari 0 +final 0 +placeto 0 +announcedcours 0 +detail 0 +contact 0 +scienc 0 +statist 0 +dayton 0 +phone 0 +hour 0 +appoint 0 +talk 0 +send 0 +textbookproblem 0 +solv 0 +object 0 +walter 0 +savitch 0 +csst 0 +contain 0 +vectra 0 +run 0 +window 0 +andborland 0 +addit 0 +cours 0 +tent 0 +syllabu 0 +extra 0 +materi 0 +late 0 +polici 0 +grade 0 +criteria 0 +academicmisconduct 0 +rule 0 +thumb 0 +share 0 +code 0 +assign 0 +work 0 +anyform 0 +former 0 +student 0 +made 0 +bigtodd 0 +thielwendi 0 +staatsabout 0 +instructor 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..6549eed9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,107 @@ +program 1 +exampl 1 +fall 1 +section 1 +mike 1 +msteel 1 +wisc 1 +comp 1 +stat 1 +deadlin 1 +check 1 +mail 1 +read 1 +tuesdai 1 +novemb 1 +grade 1 +note 1 +polici 1 +algebra 0 +languag 0 +nolandinstructor 0 +steeleemail 0 +eduoffic 0 +buildingoffic 0 +hour 0 +time 0 +appoint 0 +soffic 0 +phone 0 +import 0 +announcementsi 0 +extend 0 +pleas 0 +thenew 0 +informationmidterm 0 +current 0 +line 0 +includ 0 +everyth 0 +hand 0 +sampl 0 +taken 0 +pastfew 0 +week 0 +class 0 +fill 0 +stuff 0 +gloss 0 +makefulli 0 +function 0 +find 0 +us 0 +ifyou 0 +miss 0 +even 0 +didn 0 +understand 0 +page 0 +near 0 +bottom 0 +rememb 0 +clarif 0 +programmingassign 0 +gener 0 +cours 0 +informationc 0 +home 0 +pagecours 0 +objectivesabout 0 +vectra 0 +labc 0 +consultantscours 0 +syllabu 0 +assignmentsnot 0 +work 0 +homeclass 0 +handoutsprogram 0 +assignmentsexam 0 +quizzessom 0 +examplespolici 0 +informationemail 0 +policygrad 0 +late 0 +academ 0 +misconduct 0 +policyus 0 +refer 0 +pagesintroduct 0 +microsoft 0 +windowsintroduct 0 +borland 0 +greg 0 +sharp 0 +styleguid 0 +codetextproblem 0 +solv 0 +object 0 +walter 0 +savitchaddison 0 +weslei 0 +publish 0 +compani 0 +list 0 +known 0 +erratalast 0 +modifi 0 +steel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..b5193506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,131 @@ +grade 1 +section 1 +assign 1 +polici 1 +offic 1 +onlin 1 +test 1 +wisc 1 +comput 1 +scienc 1 +statist 1 +hour 1 +check 1 +email 1 +fall 1 +prock 1 +eduoffic 1 +phone 1 +consult 1 +link 1 +crazi 1 +todai 1 +quiz 1 +gener 1 +question 1 +final 1 +page 1 +inform 1 +class 1 +text 1 +materi 1 +info 1 +archiv 1 +introduct 1 +session 0 +infoc 0 +sessionalgebra 0 +languag 0 +program 0 +instructor 0 +andrew 0 +prockemail 0 +thgrader 0 +haihong 0 +wangemail 0 +mtwrannounc 0 +pleas 0 +verifi 0 +score 0 +modifi 0 +made 0 +minor 0 +modif 0 +file 0 +copi 0 +alreadi 0 +gotton 0 +five 0 +exam 0 +result 0 +rang 0 +ad 0 +sampl 0 +perus 0 +anoth 0 +think 0 +give 0 +good 0 +idea 0 +level 0 +knowledg 0 +need 0 +rememb 0 +topic 0 +webpag 0 +seem 0 +done 0 +notic 0 +error 0 +make 0 +sure 0 +assing 0 +well 0 +like 0 +work 0 +ahead 0 +tuesdai 0 +decemb 0 +mark 0 +calendar 0 +everyon 0 +requir 0 +take 0 +feel 0 +thing 0 +locat 0 +import 0 +carefulli 0 +read 0 +administr 0 +welcom 0 +problem 0 +solv 0 +walter 0 +savitch 0 +room 0 +time 0 +tent 0 +syllabu 0 +late 0 +mail 0 +academ 0 +misconductcours 0 +cours 0 +style 0 +guid 0 +lectur 0 +note 0 +microsoft 0 +window 0 +first 0 +borland 0 +second 0 +home 0 +vectra 0 +sourc 0 +code 0 +extra 0 +refer 0 +mani 0 +answer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..d51679d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,88 @@ +info 2 +site 1 +softwar 1 +archiv 1 +simtel 1 +kelli 1 +page 1 +mail 1 +messag 1 +function 1 +link 1 +inform 1 +interest 1 +world 1 +usenet 1 +need 1 +ratliff 0 +current 0 +grade 0 +keyword 0 +search 0 +exampl 0 +enter 0 +without 0 +quot 0 +everi 0 +paragraph 0 +us 0 +word 0 +also 0 +wildcard 0 +sent 0 +semest 0 +addit 0 +backup 0 +copi 0 +disk 0 +filesviru 0 +wide 0 +faqfun 0 +stuff 0 +oracl 0 +resourc 0 +index 0 +virtual 0 +tourist 0 +mapth 0 +space 0 +shuttl 0 +clickabl 0 +badger 0 +herald 0 +comicshumor 0 +abort 0 +retri 0 +ignor 0 +nine 0 +type 0 +usersfin 0 +weeklab 0 +jokesget 0 +comput 0 +home 0 +might 0 +tryingsom 0 +sharewar 0 +freewar 0 +avail 0 +internet 0 +program 0 +usual 0 +compress 0 +somecompress 0 +unpack 0 +reviewsom 0 +command 0 +try 0 +biggest 0 +best 0 +maintain 0 +minclud 0 +file 0 +post 0 +infocompress 0 +infofavorit 0 +clickher 0 +visit 0 +desautel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..27ea1ade --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,75 @@ +program 2 +comput 1 +introduct 1 +inform 1 +home 1 +room 1 +savitch 1 +novemb 1 +window 1 +page 0 +russ 0 +man 0 +csc 0 +instructor 0 +russel 0 +manningemail 0 +rman 0 +wisc 0 +eduoffic 0 +scienceoffic 0 +hour 0 +find 0 +basement 0 +saturdai 0 +except 0 +footbal 0 +game 0 +sundai 0 +come 0 +keep 0 +compani 0 +work 0 +like 0 +vectra 0 +although 0 +student 0 +prioriti 0 +grade 0 +lectur 0 +final 0 +click 0 +textbook 0 +problem 0 +solv 0 +object 0 +walter 0 +section 0 +semest 0 +univers 0 +rotc 0 +build 0 +scienc 0 +statist 0 +assign 0 +mondai 0 +wednesdai 0 +cours 0 +handout 0 +syllabu 0 +late 0 +polici 0 +academ 0 +misconduct 0 +import 0 +softwar 0 +microsoft 0 +windowshint 0 +compilersth 0 +oper 0 +systememailmosaicnetscap 0 +borland 0 +languageth 0 +textold 0 +quizz 0 +none 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..dbf3ecac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,95 @@ +wisc 2 +spring 1 +fall 1 +group 1 +cours 1 +knowledg 1 +search 1 +vision 1 +learn 1 +robot 1 +shavlik 1 +comput 1 +introduct 1 +artifici 1 +intellig 1 +network 1 +deduct 1 +problem 1 +solv 1 +plan 1 +languag 1 +includ 1 +jude 1 +relat 1 +link 1 +machin 1 +gener 0 +inform 0 +offer 0 +semest 0 +academ 0 +year 0 +section 0 +thefal 0 +topic 0 +cover 0 +principl 0 +base 0 +techniqu 0 +best 0 +first 0 +alpha 0 +beta 0 +represent 0 +us 0 +predic 0 +logic 0 +semant 0 +connectionist 0 +frame 0 +rule 0 +autom 0 +applic 0 +expert 0 +system 0 +game 0 +plai 0 +natur 0 +understand 0 +program 0 +lisp 0 +possibl 0 +prolog 0 +previou 0 +assumedprerequisit 0 +page 0 +variou 0 +instructor 0 +chuck 0 +dyer 0 +kunen 0 +sabbat 0 +bryan 0 +local 0 +madison 0 +seminar 0 +qualifi 0 +exam 0 +recent 0 +tabl 0 +content 0 +abstract 0 +journal 0 +mostli 0 +wendt 0 +librari 0 +readabl 0 +biologi 0 +dept 0 +graduat 0 +wisconsin 0 +motion 0 +extern 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..02162520 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,216 @@ +assign 2 +learn 2 +homework 2 +read 2 +wisc 2 +spring 2 +april 2 +postscript 2 +chapter 2 +textbook 2 +feedback 2 +author 2 +neural 2 +journal 2 +mondai 2 +mitchel 2 +network 2 +shavlik 1 +februari 1 +lisp 1 +machin 1 +cours 1 +wednesdai 1 +articl 1 +march 1 +towel 1 +januari 1 +late 1 +us 1 +link 1 +page 1 +offic 1 +class 1 +theori 1 +relat 1 +knowledg 1 +algorithm 1 +reinforc 1 +concept 1 +space 1 +paper 1 +server 1 +group 1 +jude 1 +home 1 +archiv 1 +readabl 1 +comput 1 +base 1 +fisher 1 +rumelhart 1 +moonei 1 +empir 1 +compar 1 +backprop 1 +experiment 1 +cobweb 1 +polici 1 +train 1 +induc 1 +decis 1 +tree 1 +free 1 +semest 1 +noon 1 +librari 1 +resourc 1 +bibliographi 1 +austrian 1 +institut 1 +refer 1 +help 1 +program 1 +akcl 1 +common 1 +tip 1 +frequent 1 +ask 1 +question 1 +print 1 +local 1 +gener 0 +inform 0 +instructor 0 +stat 0 +teach 0 +assist 0 +geoff 0 +weinberg 0 +highwai 0 +lab 0 +basement 0 +build 0 +geoffrei 0 +phone 0 +overview 0 +syllabu 0 +email 0 +suggest 0 +project 0 +refin 0 +logic 0 +definit 0 +quinlan 0 +artifici 0 +chunk 0 +soar 0 +laird 0 +rosenbloom 0 +newel 0 +level 0 +dietterich 0 +analyt 0 +unsupervis 0 +zipser 0 +cogsci 0 +lenat 0 +genet 0 +backpropag 0 +basic 0 +also 0 +scienc 0 +kibler 0 +introduct 0 +kbann 0 +sure 0 +answer 0 +sheet 0 +best 0 +idea 0 +next 0 +summar 0 +sentenc 0 +summari 0 +lead 0 +instead 0 +analyz 0 +brr 0 +hand 0 +materi 0 +cover 0 +lectur 0 +methodolog 0 +creat 0 +person 0 +start 0 +student 0 +five 0 +dai 0 +exhaust 0 +penalti 0 +measur 0 +weekend 0 +make 0 +tractabl 0 +accept 0 +week 0 +previous 0 +migrat 0 +progress 0 +heurist 0 +search 0 +version 0 +explan 0 +previou 0 +exam 0 +ineedagoodicon 0 +line 0 +nip 0 +premier 0 +confer 0 +recent 0 +tabl 0 +content 0 +abstract 0 +select 0 +mostli 0 +wendt 0 +irvin 0 +dataset 0 +pointer 0 +discoveri 0 +databas 0 +stuff 0 +benchmark 0 +ieee 0 +council 0 +sever 0 +connect 0 +intern 0 +societi 0 +adapt 0 +behavior 0 +canadian 0 +peopl 0 +extern 0 +department 0 +workstat 0 +emac 0 +code 0 +write 0 +debugg 0 +novic 0 +steel 0 +languag 0 +edit 0 +manual 0 +printer 0 +math 0 +comp 0 +biologi 0 +includ 0 +dept 0 +last 0 +modifi 0 +shavlikshavlik 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..18705d66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,247 @@ +project 2 +paper 2 +comput 1 +present 1 +exam 1 +cours 1 +topic 1 +class 1 +read 1 +schedul 1 +lectur 1 +system 1 +final 1 +scienc 1 +room 1 +midterm 1 +suggest 1 +grade 1 +semest 1 +detail 1 +make 1 +wang 1 +chen 1 +solomon 1 +wisc 1 +new 1 +updat 1 +list 1 +mondai 1 +statist 1 +time 1 +memori 1 +manag 1 +avail 1 +content 1 +summari 1 +inform 1 +text 1 +us 1 +copi 1 +discuss 1 +activ 1 +particip 1 +strongli 1 +encourag 1 +design 1 +term 1 +experiment 1 +research 1 +literatur 1 +must 1 +group 1 +write 1 +fall 0 +advanc 0 +oper 0 +systemsfal 0 +marvin 0 +offic 0 +sciencesoffic 0 +hour 0 +troffic 0 +phone 0 +email 0 +address 0 +watch 0 +space 0 +latest 0 +last 0 +inroom 0 +build 0 +fridai 0 +noon 0 +exampl 0 +past 0 +examtogeth 0 +sampl 0 +answer 0 +wednesdai 0 +octob 0 +pmin 0 +decemb 0 +exact 0 +place 0 +bedetermin 0 +informationabout 0 +avaiabl 0 +readabl 0 +version 0 +figur 0 +multic 0 +intend 0 +give 0 +broad 0 +exposur 0 +advancedoper 0 +assum 0 +student 0 +good 0 +onoper 0 +equival 0 +cover 0 +normal 0 +inconsider 0 +synchron 0 +interprocess 0 +commun 0 +file 0 +protect 0 +secur 0 +distribut 0 +tuesdai 0 +thursdai 0 +sciencestextther 0 +realli 0 +satisfactori 0 +textbook 0 +graduat 0 +level 0 +operatingsystem 0 +usea 0 +select 0 +classic 0 +papersa 0 +structur 0 +around 0 +journal 0 +articl 0 +andconfer 0 +proceed 0 +purchas 0 +doit 0 +formerli 0 +macc 0 +document 0 +deskfor 0 +similar 0 +ident 0 +thoseof 0 +previou 0 +individu 0 +youto 0 +relev 0 +current 0 +click 0 +herefor 0 +tent 0 +review 0 +willinstead 0 +adiscuss 0 +major 0 +theme 0 +focal 0 +point 0 +will 0 +daili 0 +geta 0 +expect 0 +quietli 0 +listen 0 +week 0 +much 0 +lessout 0 +gradingther 0 +worth 0 +total 0 +verifi 0 +carefulli 0 +thoroughli 0 +projecty 0 +requir 0 +complet 0 +provid 0 +involv 0 +implement 0 +tool 0 +implementationsof 0 +algorithm 0 +measur 0 +studi 0 +simul 0 +compon 0 +survei 0 +unvalid 0 +suffici 0 +done 0 +person 0 +larger 0 +smaller 0 +approv 0 +case 0 +basi 0 +summar 0 +result 0 +meet 0 +standard 0 +public 0 +qualiti 0 +well 0 +also 0 +ashort 0 +presentationabout 0 +presentationsher 0 +presen 0 +approxim 0 +manyan 0 +stubb 0 +andrew 0 +bigg 0 +franci 0 +salmon 0 +gunawan 0 +agu 0 +qingmin 0 +chien 0 +pang 0 +jame 0 +eric 0 +larsen 0 +conroi 0 +fritz 0 +craig 0 +jordan 0 +prasad 0 +deshpand 0 +avinash 0 +sodani 0 +basnei 0 +rajesh 0 +raman 0 +biswadeep 0 +taxiao 0 +yanm 0 +xinyu 0 +richard 0 +zhang 0 +todd 0 +munson 0 +wenjun 0 +xinyi 0 +yufei 0 +zeyu 0 +sridhar 0 +gopal 0 +michael 0 +leesolomon 0 +eduthu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..223bbd2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,4 @@ +homepag 1 +chiang 0 +time 0 +gradesgo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..eda5d022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,123 @@ +fortran 2 +comput 1 +microsoft 1 +page 1 +home 1 +program 1 +us 1 +vectra 1 +room 1 +quota 1 +mail 1 +exercis 1 +write 1 +solut 1 +particular 1 +scienc 1 +window 1 +machin 1 +also 1 +howev 1 +copi 1 +modifi 1 +bestor 1 +section 0 +overal 0 +structur 0 +primarili 0 +gener 0 +problem 0 +solv 0 +code 0 +though 0 +want 0 +time 0 +algorithm 0 +even 0 +depend 0 +languag 0 +class 0 +follow 0 +mondai 0 +labyou 0 +statist 0 +contain 0 +hewlett 0 +packard 0 +run 0 +open 0 +seven 0 +dai 0 +week 0 +except 0 +certain 0 +holidai 0 +printer 0 +locat 0 +across 0 +hall 0 +print 0 +exce 0 +must 0 +contact 0 +either 0 +go 0 +offic 0 +hour 0 +prefer 0 +increas 0 +bewar 0 +aren 0 +configur 0 +correctli 0 +along 0 +wall 0 +closest 0 +outsidehallwai 0 +toward 0 +left 0 +hand 0 +part 0 +avoid 0 +dorm 0 +probabl 0 +purchas 0 +lahei 0 +person 0 +insid 0 +cover 0 +textbook 0 +work 0 +lab 0 +campu 0 +compil 0 +pleas 0 +first 0 +softwar 0 +includ 0 +netscap 0 +pointer 0 +interest 0 +jeff 0 +lampert 0 +depart 0 +start 0 +point 0 +internet 0 +explor 0 +lyco 0 +search 0 +world 0 +wide 0 +keyword 0 +dilbert 0 +comic 0 +relief 0 +long 0 +night 0 +assign 0 +copyright 0 +gareth 0 +wisc 0 +last 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..dba21e7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,52 @@ +homepag 1 +page 1 +wisc 1 +homepagec 0 +homepagewelcom 0 +purpos 0 +provid 0 +student 0 +inform 0 +pertain 0 +section 0 +sinc 0 +chang 0 +frequent 0 +responsibilityto 0 +check 0 +often 0 +gener 0 +informationinstructor 0 +todd 0 +munsonemail 0 +tmunson 0 +eduoffic 0 +comput 0 +scienc 0 +statisticsoffic 0 +phone 0 +offic 0 +hour 0 +appointmentsect 0 +textbook 0 +problem 0 +solv 0 +walter 0 +savitchclass 0 +informationexpectationssyllabusexam 0 +schedule 0 +mailgradingl 0 +assignmentsextra 0 +creditpoliciesconsult 0 +responsibilitiesacadem 0 +misconductoth 0 +informationdaili 0 +note 0 +assignmentshomework 0 +assignmentsprogram 0 +document 0 +us 0 +classoth 0 +program 0 +resourcesc 0 +homepagetmunson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..5c145f6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,64 @@ +inform 1 +section 1 +handout 1 +class 1 +cours 1 +todd 1 +hour 1 +avail 1 +program 1 +get 1 +start 1 +read 1 +gener 1 +text 1 +syllabu 1 +assign 1 +home 1 +page 1 +turnidg 1 +comput 0 +scienc 0 +time 0 +place 0 +nolandinstructor 0 +turnidgeoffic 0 +tbalab 0 +tbaannouncementsclass 0 +note 0 +struct 0 +us 0 +facil 0 +grade 0 +polici 0 +tent 0 +solut 0 +collect 0 +date 0 +mail 0 +list 0 +send 0 +messag 0 +classa 0 +whole 0 +muchinform 0 +interest 0 +includ 0 +tutor 0 +consult 0 +window 0 +oper 0 +system 0 +email 0 +netscap 0 +find 0 +provid 0 +byother 0 +instructor 0 +help 0 +exampl 0 +gregorysharp 0 +difficulti 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..95993093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,80 @@ +offic 1 +comput 1 +section 1 +program 1 +weaver 1 +scienc 1 +chri 1 +email 1 +polici 1 +statist 1 +hour 1 +exam 1 +wisc 1 +phone 1 +week 1 +noland 1 +text 1 +grade 1 +handout 1 +assign 1 +algebra 0 +languag 0 +announc 0 +thursdai 0 +psych 0 +reload 0 +page 0 +everi 0 +time 0 +login 0 +instructor 0 +appoint 0 +first 0 +grader 0 +zhang 0 +locat 0 +vectra 0 +seven 0 +dai 0 +staf 0 +consult 0 +gener 0 +cours 0 +info 0 +syllabu 0 +problem 0 +solv 0 +object 0 +walter 0 +savitch 0 +addison 0 +weslei 0 +isbn 0 +includ 0 +errata 0 +sourc 0 +code 0 +misconduct 0 +policyassign 0 +homework 0 +read 0 +lectur 0 +note 0 +exampl 0 +quiz 0 +kei 0 +late 0 +style 0 +guidelin 0 +still 0 +rough 0 +print 0 +paper 0 +statement 0 +depart 0 +univers 0 +wisconsin 0 +madison 0 +last 0 +chang 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..006813be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,84 @@ +homework 2 +offic 1 +part 1 +assign 1 +advanc 1 +tabl 1 +content 1 +read 1 +project 1 +solut 1 +lectur 1 +full 1 +paper 1 +doit 1 +pipelin 1 +cach 1 +network 1 +multiprocessor 1 +hour 1 +note 1 +exam 1 +memori 1 +spring 1 +comput 1 +architectur 1 +cours 1 +inform 1 +engin 1 +hall 1 +phone 1 +wisc 1 +final 1 +main 1 +disk 1 +arrai 1 +interconnect 1 +technolog 1 +midterm 1 +rout 1 +offer 0 +instructor 0 +prof 0 +jame 0 +smith 0 +tue 0 +thur 0 +email 0 +princeoffic 0 +mail 0 +address 0 +princ 0 +new 0 +miscellaneousnew 0 +soln 0 +special 0 +biochemistri 0 +pmread 0 +overview 0 +introduct 0 +perform 0 +cost 0 +instruct 0 +set 0 +vector 0 +vliw 0 +limit 0 +softwar 0 +system 0 +trace 0 +list 0 +miscellan 0 +us 0 +tool 0 +review 0 +specmark 0 +consid 0 +harm 0 +analysi 0 +clock 0 +detail 0 +design 0 +reserv 0 +station 0 +summari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..f6e6ccf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,244 @@ +surfac 2 +homework 2 +cours 2 +graphic 2 +cornel 2 +comput 2 +project 1 +object 1 +math 1 +field 1 +univers 1 +scientif 1 +visual 1 +schedul 1 +anim 1 +color 1 +upson 1 +group 1 +theori 1 +center 1 +topic 1 +model 1 +transform 1 +volum 1 +data 1 +march 1 +prelim 1 +final 1 +page 1 +atth 1 +alan 1 +watt 1 +also 1 +content 1 +summari 1 +administrivia 1 +textbook 1 +error 1 +staff 1 +mathemat 1 +program 1 +skill 1 +polygon 1 +list 1 +parametr 1 +oper 1 +quadric 1 +hierarch 1 +geometr 1 +kinemat 1 +dynam 1 +system 1 +view 1 +camera 1 +clip 1 +vision 1 +render 1 +shade 1 +light 1 +human 1 +space 1 +method 1 +textur 1 +map 1 +dimension 1 +scalar 1 +vector 1 +line 1 +april 1 +test 1 +spring 1 +religi 1 +holidai 1 +student 1 +exam 1 +last 1 +bruce 1 +land 1 +huang 1 +comment 1 +main 0 +pagecomput 0 +scienc 0 +evolv 0 +incomplet 0 +hopefulli 0 +us 0 +begin 0 +home 0 +forc 0 +deal 0 +level 0 +principl 0 +practic 0 +folei 0 +computergraph 0 +current 0 +taught 0 +bruceland 0 +leader 0 +relev 0 +requir 0 +artist 0 +among 0 +other 0 +fundament 0 +focus 0 +associ 0 +designedto 0 +help 0 +illumin 0 +cover 0 +follow 0 +year 0 +construct 0 +explicit 0 +figur 0 +rotat 0 +swept 0 +tensor 0 +product 0 +viewer 0 +implicit 0 +blobbi 0 +tessel 0 +normal 0 +simpl 0 +form 0 +complex 0 +scene 0 +composit 0 +introduct 0 +homogen 0 +coordin 0 +build 0 +combin 0 +prototyp 0 +mimic 0 +connect 0 +rigid 0 +part 0 +invers 0 +differenti 0 +equat 0 +cellular 0 +automata 0 +onto 0 +screen 0 +parallel 0 +perspect 0 +simul 0 +stereo 0 +devic 0 +limit 0 +optic 0 +wave 0 +gourand 0 +phong 0 +hidden 0 +remov 0 +buffer 0 +transpar 0 +shadow 0 +scan 0 +convers 0 +anti 0 +alias 0 +pixel 0 +vernier 0 +hyper 0 +acuiti 0 +imag 0 +properti 0 +modif 0 +bump 0 +aspect 0 +wall 0 +channel 0 +contour 0 +mispercept 0 +difficulti 0 +arrow 0 +particl 0 +advect 0 +multiparamet 0 +high 0 +assign 0 +first 0 +serv 0 +gener 0 +guid 0 +style 0 +break 0 +educationlaw 0 +mandat 0 +faculti 0 +make 0 +avail 0 +opportun 0 +tomak 0 +examin 0 +miss 0 +belief 0 +inord 0 +facilit 0 +prepar 0 +makeup 0 +intendingto 0 +absent 0 +order 0 +observ 0 +requestedto 0 +notifi 0 +instructor 0 +lectur 0 +period 0 +tuesdai 0 +mean 0 +standard 0 +deviat 0 +rhode 0 +jing 0 +justin 0 +mccune 0 +jmccune 0 +csrelev 0 +california 0 +davi 0 +waterloo 0 +wale 0 +colleg 0 +cardiff 0 +manchest 0 +oregon 0 +state 0 +universityrel 0 +onlin 0 +document 0 +welcom 0 +sent 0 +todoc 0 +modifi 0 +copyright 0 +statement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..3d28cb34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,96 @@ +cornel 1 +visual 1 +exercis 1 +graphic 1 +current 1 +project 1 +theori 1 +center 1 +comput 1 +laboratori 1 +page 1 +materi 1 +procedur 1 +student 1 +computergraph 1 +scientif 1 +surfac 1 +transform 1 +model 1 +camera 1 +textur 1 +anim 1 +mark 1 +relat 1 +topic 1 +comment 1 +main 0 +pagecomput 0 +scienc 0 +exercisesthi 0 +site 0 +universityundergradu 0 +contain 0 +includ 0 +softwar 0 +result 0 +section 0 +deal 0 +level 0 +principl 0 +practic 0 +folei 0 +dcomput 0 +watt 0 +taught 0 +bruceland 0 +also 0 +leader 0 +group 0 +atth 0 +first 0 +place 0 +sigucc 0 +basededuc 0 +train 0 +competit 0 +get 0 +start 0 +build 0 +polygon 0 +object 0 +parametr 0 +us 0 +virtual 0 +perspect 0 +light 0 +bump 0 +map 0 +design 0 +physic 0 +base 0 +implicit 0 +done 0 +order 0 +note 0 +areinclud 0 +refer 0 +chat 0 +facil 0 +commun 0 +aboutc 0 +spring 0 +semesteraccess 0 +restrict 0 +enrol 0 +final 0 +onlin 0 +document 0 +welcom 0 +sent 0 +todoc 0 +last 0 +modifi 0 +land 0 +copyright 0 +statement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..c0160dea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,65 @@ +topic 1 +fall 1 +group 1 +student 1 +model 1 +imag 1 +system 1 +us 1 +cornel 0 +comput 0 +graphic 0 +semestereach 0 +chose 0 +current 0 +research 0 +computergraph 0 +read 0 +appropri 0 +paper 0 +implement 0 +code 0 +deliv 0 +lectur 0 +chosen 0 +anddocu 0 +work 0 +document 0 +metabal 0 +window 0 +michael 0 +arcuri 0 +alex 0 +benton 0 +human 0 +facial 0 +express 0 +huang 0 +hung 0 +content 0 +base 0 +retriev 0 +interior 0 +design 0 +sean 0 +landi 0 +interdepend 0 +particl 0 +justin 0 +mccune 0 +visual 0 +diffus 0 +distribut 0 +pollut 0 +spatial 0 +explicit 0 +landscap 0 +modelsfu 0 +tsai 0 +antialias 0 +video 0 +stochast 0 +sampl 0 +arun 0 +vermach 0 +hsun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..517bee27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,65 @@ +sandia 1 +livermor 1 +thesi 1 +vavasi 1 +research 1 +scientif 1 +comput 1 +least 1 +squar 1 +patti 0 +houghpatti 0 +hough 0 +nation 0 +laboratori 0 +cornel 0 +student 0 +center 0 +appli 0 +mathemat 0 +whichi 0 +hous 0 +frankh 0 +rhode 0 +hall 0 +cornellunivers 0 +advisor 0 +steve 0 +member 0 +committe 0 +nicktrefethen 0 +schatz 0 +interest 0 +fall 0 +area 0 +numer 0 +linearalgebra 0 +optimizationi 0 +current 0 +work 0 +postdoc 0 +juan 0 +meza 0 +depart 0 +nationallaboratori 0 +resum 0 +statement 0 +goal 0 +tech 0 +report 0 +complet 0 +orthogon 0 +decomposit 0 +weight 0 +appear 0 +siam 0 +matrix 0 +anal 0 +stabl 0 +effici 0 +solut 0 +ofweight 0 +problem 0 +applic 0 +interior 0 +pointmethod 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..88835356 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,85 @@ +postscript 1 +trefethen 1 +research 1 +page 1 +fluid 1 +baggett 1 +mathemat 1 +cornel 1 +univers 1 +year 1 +thesi 1 +dynam 1 +like 1 +interest 1 +model 1 +transit 1 +turbul 1 +physic 1 +submit 1 +jeff 0 +baggettjeff 0 +center 0 +appli 0 +frank 0 +rhode 0 +hall 0 +ithaca 0 +sixth 0 +graduat 0 +student 0 +depart 0 +atcornel 0 +supervis 0 +nick 0 +expect 0 +finish 0 +titl 0 +normal 0 +applic 0 +hydrodynam 0 +stabil 0 +summer 0 +would 0 +continu 0 +seek 0 +posit 0 +detail 0 +outlin 0 +curriculum 0 +vita 0 +activ 0 +background 0 +unusu 0 +blend 0 +scientif 0 +comput 0 +system 0 +mechan 0 +propos 0 +work 0 +iwould 0 +next 0 +coupl 0 +paper 0 +mostli 0 +linear 0 +driscol 0 +april 0 +exponenti 0 +type 0 +versu 0 +spectral 0 +abscissa 0 +hill 0 +andphillip 0 +exampl 0 +integr 0 +equat 0 +oper 0 +theori 0 +dimension 0 +subcrit 0 +misc 0 +link 0 +satish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..a8c4d486 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,71 @@ +ncstrl 2 +report 1 +search 1 +comput 1 +scienc 1 +technic 1 +collect 1 +network 1 +field 1 +sever 1 +institut 1 +word 1 +list 1 +document 1 +brows 1 +cornel 1 +librari 0 +libraryncstrl 0 +pronounc 0 +ancestr 0 +internationalcollect 0 +departmentsand 0 +industri 0 +govern 0 +research 0 +laboratori 0 +made 0 +availablefor 0 +commerci 0 +eduat 0 +ncstrlcollect 0 +distribut 0 +among 0 +interoper 0 +serversoper 0 +participatinginstitut 0 +read 0 +offici 0 +ncstrlpress 0 +packag 0 +descript 0 +background 0 +goal 0 +andorgan 0 +form 0 +allow 0 +perform 0 +bibliograph 0 +data 0 +limit 0 +specif 0 +enter 0 +whose 0 +author 0 +titl 0 +abstract 0 +contain 0 +theparticip 0 +want 0 +join 0 +tell 0 +moreread 0 +forinstitut 0 +interest 0 +particip 0 +informationfind 0 +snew 0 +relat 0 +send 0 +email 0 +totech 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..dc7e2e56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu @@ -0,0 +1,33 @@ +design 1 +research 1 +institut 1 +technic 1 +instituteabout 0 +researchersat 0 +brows 0 +searchal 0 +public 0 +file 0 +reportssearch 0 +report 0 +ipic 0 +home 0 +page 0 +intern 0 +work 0 +confer 0 +integr 0 +enterpris 0 +informationand 0 +process 0 +anoth 0 +site 0 +inform 0 +itisingapor 0 +altavistaforum 0 +send 0 +question 0 +comment 0 +server 0 +mike 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..087fbef4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,177 @@ +us 1 +question 1 +resourc 1 +design 1 +gener 1 +interest 1 +access 1 +work 1 +project 1 +comput 1 +technic 1 +knowledg 1 +also 1 +cours 1 +student 1 +whether 1 +davi 1 +cornel 1 +system 1 +futur 1 +increas 1 +structur 1 +inform 1 +report 1 +corpor 1 +group 1 +memori 1 +mean 1 +institut 1 +product 1 +document 1 +share 1 +staff 1 +answer 1 +remot 1 +machin 1 +seem 1 +research 0 +institutejim 0 +davisxerox 0 +corporationphd 0 +media 0 +edumi 0 +goal 0 +build 0 +softwar 0 +improvecommun 0 +among 0 +peopl 0 +believ 0 +commun 0 +medium 0 +ofth 0 +understand 0 +andcont 0 +messag 0 +transmit 0 +manipul 0 +reformat 0 +even 0 +content 0 +inhypertext 0 +network 0 +collabor 0 +thecstr 0 +anarpa 0 +sponsor 0 +effort 0 +make 0 +scienc 0 +moreeasili 0 +part 0 +distribut 0 +server 0 +whichi 0 +run 0 +mani 0 +univers 0 +electronicsystem 0 +captur 0 +produc 0 +worker 0 +order 0 +qualiti 0 +ofor 0 +reduc 0 +time 0 +requir 0 +memoryinclud 0 +intellectu 0 +engin 0 +lawyer 0 +contract 0 +author 0 +sscreenplai 0 +process 0 +producedth 0 +dead 0 +end 0 +explor 0 +tool 0 +andjustif 0 +support 0 +final 0 +decis 0 +begun 0 +huttenloch 0 +developingcorpor 0 +sharedannot 0 +investig 0 +howpeopl 0 +read 0 +write 0 +annot 0 +inelectron 0 +initi 0 +prototypeimplement 0 +class 0 +shareddocu 0 +problem 0 +set 0 +note 0 +nnotat 0 +might 0 +berequest 0 +clarif 0 +orcorrect 0 +made 0 +aus 0 +obtain 0 +willfind 0 +sourc 0 +learn 0 +whetherstud 0 +often 0 +abl 0 +correctli 0 +find 0 +usefulmean 0 +feedback 0 +improv 0 +evid 0 +isthat 0 +natur 0 +languag 0 +designof 0 +proxi 0 +agent 0 +safe 0 +reliablycarri 0 +foreign 0 +without 0 +risk 0 +toeither 0 +owner 0 +alsopap 0 +onlin 0 +copi 0 +publicatiion 0 +list 0 +especi 0 +thedrimi 0 +collect 0 +meprofession 0 +historythi 0 +narr 0 +resum 0 +contact 0 +improvisationi 0 +sport 0 +resumeno 0 +market 0 +thank 0 +ask 0 +likeit 0 +fine 0 +xerox 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..159839b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,28 @@ +search 1 +metacrawl 1 +erik 1 +selberg 1 +greg 1 +oren 1 +home 1 +searchingmetacrawlerbi 0 +lauckhartand 0 +prof 0 +etzioniif 0 +person 0 +page 0 +ahoi 0 +phrase 0 +word 0 +wordssort 0 +result 0 +relev 0 +locationcontrol 0 +java 0 +configur 0 +problemswebmast 0 +comcopyright 0 +etzioni 0 +lauckhart 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..91ab0dc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,99 @@ +robot 2 +madison 1 +sensor 1 +plan 1 +research 1 +base 1 +motion 1 +ieee 1 +univers 1 +page 1 +engin 1 +relat 1 +project 1 +kinemat 1 +group 1 +system 1 +comput 1 +link 1 +wisconsin 0 +home 0 +mechan 0 +avenuemadison 0 +vladimir 0 +lumelski 0 +director 0 +brief 0 +review 0 +activ 0 +peopl 0 +technic 0 +report 0 +compress 0 +postscript 0 +avail 0 +simul 0 +librari 0 +cours 0 +seminar 0 +recent 0 +select 0 +public 0 +abstract 0 +maze 0 +search 0 +algorithm 0 +effect 0 +dynam 0 +control 0 +jogger 0 +model 0 +sens 0 +decentr 0 +intellig 0 +special 0 +topic 0 +tether 0 +underwat 0 +redund 0 +sensit 0 +skin 0 +human 0 +center 0 +geometri 0 +neil 0 +duffi 0 +manufactur 0 +chuck 0 +dyer 0 +vision 0 +robert 0 +lorenz 0 +actuat 0 +jude 0 +shavlik 0 +machin 0 +learn 0 +societi 0 +autom 0 +tech 0 +committe 0 +path 0 +internet 0 +resourc 0 +server 0 +nasa 0 +telerobot 0 +program 0 +frequent 0 +ask 0 +question 0 +list 0 +local 0 +dept 0 +colleg 0 +comment 0 +suggest 0 +errata 0 +hert 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..89090cd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,40 @@ +engin 1 +comput 1 +robot 1 +dept 1 +ieee 1 +vladimir 1 +home 1 +page 1 +scienc 1 +link 1 +mathemat 1 +lumelski 0 +lumelskyprofessormechan 0 +underwat 0 +kinemat 0 +redund 0 +sensit 0 +skin 0 +project 0 +human 0 +center 0 +system 0 +geometri 0 +global 0 +societi 0 +autom 0 +tech 0 +committe 0 +motion 0 +path 0 +plan 0 +wisconsin 0 +colleg 0 +mechan 0 +electr 0 +graduat 0 +program 0 +mace 0 +grant 0 +institut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..b6d30f3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,196 @@ +system 2 +vogel 2 +high 2 +werner 2 +commun 1 +network 1 +distribut 1 +speed 1 +proceed 1 +comput 1 +cornel 1 +protocol 1 +design 1 +cluster 1 +horu 1 +need 1 +mechan 1 +level 1 +messag 1 +failur 1 +group 1 +robbert 1 +reness 1 +eicken 1 +noth 1 +left 1 +take 1 +project 1 +think 1 +latenc 1 +support 1 +highli 1 +real 1 +time 1 +issu 1 +environ 1 +oper 1 +guarante 1 +applic 1 +abl 1 +detect 1 +birman 1 +appear 1 +sigop 1 +workshopconnamoran 1 +ierland 1 +septemb 1 +thorsten 1 +perform 1 +researchera 0 +upson 0 +halldept 0 +sciencecornel 0 +univers 0 +ithaca 0 +phone 0 +email 0 +perfect 0 +reach 0 +awai 0 +involv 0 +major 0 +scienc 0 +depart 0 +thehorusand 0 +myresearch 0 +interest 0 +best 0 +describ 0 +bandwith 0 +reliabl 0 +requir 0 +focu 0 +engin 0 +thing 0 +amwork 0 +predict 0 +execut 0 +integr 0 +horuswith 0 +lead 0 +situat 0 +reason 0 +advanc 0 +effici 0 +data 0 +transfer 0 +devic 0 +bandwidth 0 +small 0 +fallen 0 +behind 0 +softwar 0 +adapt 0 +achiev 0 +desir 0 +latencyfor 0 +pass 0 +protocolsar 0 +exploit 0 +structureand 0 +interact 0 +pattern 0 +method 0 +deal 0 +miss 0 +deadlin 0 +meet 0 +guarant 0 +gave 0 +tell 0 +anyon 0 +acur 0 +want 0 +aglob 0 +scope 0 +find 0 +gener 0 +supportfailur 0 +suspis 0 +manag 0 +process 0 +node 0 +experi 0 +extract 0 +workwith 0 +middlewar 0 +packag 0 +regardless 0 +function 0 +brainchild 0 +andken 0 +work 0 +done 0 +cooper 0 +withthorsten 0 +multimedia 0 +video 0 +demand 0 +horusexperi 0 +concert 0 +brian 0 +smith 0 +respons 0 +practicum 0 +teach 0 +number 0 +lectureson 0 +technolog 0 +practic 0 +recent 0 +public 0 +world 0 +wide 0 +structur 0 +virtual 0 +synchroni 0 +explor 0 +bound 0 +virtuallysynchron 0 +katherin 0 +user 0 +interfacefor 0 +parallel 0 +anindya 0 +basu 0 +vineet 0 +buch 0 +symposium 0 +princpl 0 +copper 0 +mountain 0 +decemb 0 +deliv 0 +third 0 +ieee 0 +workshop 0 +architectur 0 +implementationof 0 +subsystem 0 +hpc 0 +august 0 +flexibl 0 +kenneth 0 +brad 0 +glade 0 +kati 0 +mark 0 +hayden 0 +takako 0 +hickei 0 +dalia 0 +malki 0 +alex 0 +vaysburd 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..0be2a1d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,265 @@ +horu 2 +system 2 +osiri 1 +project 1 +develop 1 +distribut 1 +isi 1 +commun 1 +applic 1 +egyptian 1 +softwar 1 +ensembl 1 +compani 1 +god 1 +rejoic 1 +whose 1 +groupcommun 1 +requir 1 +life 1 +oppos 1 +death 1 +framework 1 +comput 1 +toler 1 +data 1 +replic 1 +design 1 +effort 1 +gener 1 +purpos 1 +us 1 +version 1 +activ 1 +environ 1 +high 1 +network 1 +found 1 +dead 1 +bodi 1 +made 1 +introduct 1 +kenneth 1 +birman 1 +robbert 1 +reness 1 +projectth 0 +ofth 0 +come 0 +heart 0 +firm 0 +triumphant 0 +ofisi 0 +heir 0 +modular 0 +extens 0 +process 0 +address 0 +wide 0 +varieti 0 +robust 0 +attribut 0 +appealedstrongli 0 +egypt 0 +becausein 0 +everi 0 +woman 0 +type 0 +wish 0 +possess 0 +renew 0 +movementa 0 +inact 0 +provid 0 +applicationsbas 0 +group 0 +style 0 +aris 0 +infault 0 +manag 0 +thatexploit 0 +coher 0 +cach 0 +groupwar 0 +within 0 +theoveral 0 +larg 0 +collect 0 +applicationprotocol 0 +allow 0 +toconstruct 0 +modul 0 +exactli 0 +meet 0 +applicationrequir 0 +minim 0 +cost 0 +origin 0 +launch 0 +redesign 0 +theisi 0 +evolv 0 +communicationarchitectur 0 +advanc 0 +support 0 +robustdistribut 0 +set 0 +unsuit 0 +asappl 0 +special 0 +secur 0 +real 0 +time 0 +besidesth 0 +practic 0 +contribut 0 +toward 0 +theori 0 +virtual 0 +synchroni 0 +runtim 0 +model 0 +usedfor 0 +implement 0 +fault 0 +sametim 0 +much 0 +faster 0 +lighter 0 +weight 0 +exist 0 +initi 0 +code 0 +beus 0 +research 0 +restrict 0 +commericalright 0 +call 0 +written 0 +usabl 0 +manyoth 0 +languag 0 +avail 0 +class 0 +user 0 +nofe 0 +seri 0 +ofreleas 0 +fall 0 +spring 0 +earli 0 +ensemblewil 0 +outstand 0 +build 0 +java 0 +base 0 +groupwareappl 0 +multimedia 0 +conferenc 0 +platform 0 +independ 0 +areavail 0 +differentclass 0 +workstat 0 +person 0 +parallel 0 +processor 0 +onnext 0 +cluster 0 +standard 0 +speedcommun 0 +collabor 0 +close 0 +mani 0 +systemsproject 0 +includ 0 +transi 0 +navtech 0 +stormcast 0 +tacomaproject 0 +link 0 +elsewher 0 +thesepag 0 +circumst 0 +begotten 0 +gain 0 +good 0 +idea 0 +froma 0 +sorrow 0 +mother 0 +herhusband 0 +describ 0 +goddess 0 +greatli 0 +distress 0 +equippedwith 0 +mighti 0 +word 0 +power 0 +knew 0 +utter 0 +mighthav 0 +greatest 0 +effect 0 +search 0 +never 0 +rest 0 +hair 0 +light 0 +wing 0 +stir 0 +lament 0 +brother 0 +length 0 +brought 0 +state 0 +unit 0 +thu 0 +becam 0 +child 0 +born 0 +secret 0 +place 0 +suckl 0 +rear 0 +horusvisit 0 +papersand 0 +abstractpag 0 +overview 0 +public 0 +report 0 +relatedto 0 +follow 0 +recent 0 +articl 0 +present 0 +level 0 +reliabl 0 +scientif 0 +american 0 +silvano 0 +maffei 0 +flexiblegroup 0 +april 0 +final 0 +sentenc 0 +hyme 0 +osirisfrom 0 +papyru 0 +better 0 +know 0 +book 0 +walli 0 +budg 0 +studiesin 0 +mytholog 0 +volum 0 +page 0 +open 0 +court 0 +publish 0 +london 0 +comment 0 +werner 0 +vogel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..ad2ed90b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,87 @@ +depart 2 +cornel 1 +report 1 +research 1 +page 1 +gener 1 +faculti 1 +annual 1 +home 1 +comput 1 +scienc 1 +info 1 +project 1 +know 1 +inform 1 +also 1 +find 1 +check 1 +go 1 +cours 1 +peopl 1 +server 1 +academ 0 +tech 0 +welcom 0 +universitydepart 0 +site 0 +feel 0 +free 0 +brows 0 +around 0 +infoget 0 +locat 0 +size 0 +contactswithin 0 +standard 0 +disclaim 0 +facultyfind 0 +list 0 +offici 0 +ortheir 0 +person 0 +researchcheck 0 +aboutour 0 +collabor 0 +publicationsfind 0 +link 0 +public 0 +researcherseith 0 +technic 0 +projector 0 +degreeslook 0 +degre 0 +program 0 +doctor 0 +master 0 +engin 0 +orundergradu 0 +academicsrefer 0 +taught 0 +webfor 0 +semest 0 +read 0 +generalcoursedescript 0 +appear 0 +studi 0 +peopleget 0 +outstand 0 +keep 0 +includ 0 +staff 0 +student 0 +directorylist 0 +activitiesfind 0 +activ 0 +theassoci 0 +undergradu 0 +excellenthockei 0 +team 0 +serverscheck 0 +gopherserv 0 +anonym 0 +ftpserver 0 +sitesquest 0 +comment 0 +informationpres 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..fe3324be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,242 @@ +text 3 +retriev 2 +inform 2 +automat 2 +hypertext 1 +univers 1 +research 1 +larg 1 +us 1 +analysi 1 +system 1 +cornel 1 +process 1 +nation 1 +novemb 1 +allan 1 +develop 1 +avail 1 +document 1 +search 1 +gener 1 +purpos 1 +base 1 +databas 1 +link 1 +encyclopedia 1 +full 1 +smart 1 +librari 1 +ohio 1 +util 1 +comput 1 +proceed 1 +annual 1 +rapidli 1 +exist 1 +high 1 +sophist 1 +user 1 +queri 1 +file 1 +oper 1 +subject 1 +corpu 1 +determin 1 +similar 1 +main 1 +structur 1 +collect 1 +capabl 1 +consist 1 +differ 1 +version 1 +sigir 1 +confer 1 +germani 1 +genom 1 +medicin 1 +bethesda 1 +maryland 1 +june 1 +columbu 1 +octob 1 +seattl 1 +washington 1 +associ 1 +machineri 1 +york 1 +bucklei 1 +nevada 1 +home 1 +denis 1 +gerard 0 +saltongerard 0 +saltonprofessorg 0 +eduph 0 +harvard 0 +natur 0 +languag 0 +expand 0 +field 0 +mass 0 +machin 0 +readabl 0 +cheapli 0 +store 0 +densiti 0 +optic 0 +storag 0 +media 0 +demand 0 +furthermor 0 +method 0 +analyz 0 +formul 0 +appropri 0 +conduct 0 +rapid 0 +rank 0 +item 0 +decreas 0 +order 0 +import 0 +design 0 +environ 0 +handl 0 +without 0 +restrict 0 +size 0 +matter 0 +absenc 0 +knowledg 0 +would 0 +unrestrict 0 +mean 0 +word 0 +express 0 +refin 0 +context 0 +statist 0 +probabilist 0 +criteria 0 +approach 0 +abl 0 +degre 0 +accuraci 0 +applic 0 +semant 0 +piec 0 +represent 0 +provid 0 +flexibl 0 +brows 0 +access 0 +interest 0 +excerpt 0 +respons 0 +done 0 +extens 0 +work 0 +autom 0 +articl 0 +funk 0 +wagnal 0 +addit 0 +also 0 +trec 0 +cover 0 +number 0 +area 0 +gigabyt 0 +servic 0 +well 0 +relat 0 +section 0 +paragraph 0 +sentenc 0 +test 0 +vehicl 0 +continu 0 +current 0 +unix 0 +sparc 0 +station 0 +termin 0 +equip 0 +activitiesmemb 0 +engin 0 +colleg 0 +committeeprofession 0 +activitiesassoci 0 +editor 0 +transact 0 +systemsprogram 0 +committe 0 +seventeenth 0 +dublin 0 +ireland 0 +electron 0 +publish 0 +darmstadt 0 +multimedia 0 +hypermedia 0 +virtual 0 +realiti 0 +moscow 0 +septemb 0 +lecturesautomat 0 +construct 0 +feder 0 +institut 0 +technolog 0 +zurich 0 +switzerland 0 +progress 0 +konstanz 0 +asi 0 +meet 0 +scienc 0 +colloquium 0 +state 0 +lectur 0 +cours 0 +microsoft 0 +corpor 0 +workshop 0 +publicationsapproach 0 +passag 0 +select 0 +travers 0 +commun 0 +februari 0 +vector 0 +model 0 +third 0 +symposium 0 +vega 0 +april 0 +softwareth 0 +made 0 +free 0 +charg 0 +sever 0 +hundr 0 +copi 0 +distribut 0 +around 0 +world 0 +return 0 +list 0 +faculti 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..8ed9198d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,87 @@ +minim 1 +problem 1 +trust 1 +region 1 +method 1 +optim 1 +cornel 1 +gener 1 +interest 1 +scientif 1 +comput 1 +engin 1 +nonlinearli 1 +constrain 1 +nonlinear 1 +research 1 +techniqu 1 +solv 1 +includ 1 +converg 1 +us 1 +subject 1 +bound 1 +confer 1 +chines 1 +young 1 +scientist 1 +home 1 +denis 1 +yui 0 +liyui 0 +liresearch 0 +associateyui 0 +eduph 0 +univers 0 +waterloo 0 +numer 0 +also 0 +appli 0 +real 0 +world 0 +current 0 +theori 0 +unconstrain 0 +particular 0 +exist 0 +accept 0 +condit 0 +investig 0 +affin 0 +scale 0 +function 0 +analysi 0 +exact 0 +penalti 0 +approach 0 +applic 0 +consid 0 +imag 0 +enhanc 0 +lecturesan 0 +interior 0 +beij 0 +china 0 +august 0 +publicationsa 0 +global 0 +siam 0 +journal 0 +center 0 +reflect 0 +proceed 0 +return 0 +list 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..a6da38b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,178 @@ +comput 2 +mathemat 1 +depart 1 +object 1 +york 1 +us 1 +architectur 1 +system 1 +algebra 1 +polynomi 1 +present 1 +israel 1 +research 1 +cornel 1 +symbol 1 +scientif 1 +toolkit 1 +engin 1 +program 1 +workshop 1 +algorithm 1 +intern 1 +american 1 +societi 1 +region 1 +meet 1 +syracus 1 +decemb 1 +dawson 1 +dean 1 +haifa 1 +januari 1 +focus 1 +softwar 1 +techniqu 1 +gener 1 +fluid 1 +dynam 1 +differenti 1 +equat 1 +studi 1 +code 1 +weyl 1 +function 1 +languag 1 +provid 1 +number 1 +journal 1 +symposium 1 +septemb 1 +dexter 1 +kozen 1 +susan 1 +landau 1 +vista 1 +microstorag 1 +orient 1 +joint 1 +electr 1 +technion 1 +scienc 1 +ronitt 1 +rubinfeld 1 +page 1 +databas 1 +home 1 +denis 1 +richard 0 +zippelrichard 0 +zippelsenior 0 +associaterz 0 +eduph 0 +modern 0 +autom 0 +current 0 +work 0 +allow 0 +scientist 0 +specifi 0 +perform 0 +numer 0 +machin 0 +calcul 0 +done 0 +convert 0 +suppli 0 +method 0 +special 0 +colleagu 0 +mechan 0 +aerospac 0 +aris 0 +boundari 0 +layer 0 +compon 0 +substrat 0 +call 0 +extend 0 +data 0 +structur 0 +avail 0 +common 0 +lisp 0 +includ 0 +like 0 +matric 0 +ration 0 +ring 0 +vector 0 +space 0 +ideal 0 +introduct 0 +challeng 0 +type 0 +opportun 0 +deduct 0 +reason 0 +pursu 0 +profession 0 +activitieseditori 0 +board 0 +transact 0 +softwareprogram 0 +committe 0 +principl 0 +practic 0 +constraint 0 +refere 0 +review 0 +error 0 +correct 0 +computationlecturesalgebra 0 +decomposit 0 +effect 0 +irreduc 0 +test 0 +oper 0 +durham 0 +north 0 +carolina 0 +center 0 +synthes 0 +weizmann 0 +institut 0 +rehovot 0 +suni 0 +albani 0 +april 0 +modular 0 +interpol 0 +factor 0 +multivari 0 +theori 0 +ithaca 0 +publicationseffect 0 +kluwer 0 +academ 0 +publish 0 +boston 0 +massachusett 0 +june 0 +implement 0 +file 0 +proceed 0 +return 0 +list 0 +annual 0 +report 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..0667102d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,217 @@ +comput 2 +horu 2 +distribut 2 +system 2 +develop 1 +group 1 +work 1 +isi 1 +us 1 +program 1 +commun 1 +secur 1 +fault 1 +toler 1 +model 1 +virtual 1 +process 1 +effort 1 +side 1 +softwar 1 +tool 1 +also 1 +depart 1 +committe 1 +member 1 +cornel 1 +reliabl 1 +california 1 +problem 1 +reconfigur 1 +practic 1 +toolkit 1 +support 1 +synchroni 1 +real 1 +time 1 +featur 1 +extens 1 +layer 1 +avail 1 +technolog 1 +techniqu 1 +parallel 1 +graduat 1 +student 1 +privaci 1 +high 1 +prove 1 +implement 1 +studi 1 +notabl 1 +friedman 1 +collabor 1 +head 1 +reness 1 +engin 1 +scienc 1 +chief 1 +consist 1 +ieee 1 +home 1 +denis 1 +kenneth 0 +birmankenneth 0 +birmanprofessorphd 0 +univ 0 +berkelei 0 +research 0 +concern 0 +oper 0 +focu 0 +base 0 +upon 0 +synchron 0 +solv 0 +manag 0 +replic 0 +data 0 +coordin 0 +action 0 +set 0 +perform 0 +dynam 0 +done 0 +provid 0 +although 0 +limit 0 +certain 0 +class 0 +reason 0 +benign 0 +failur 0 +theoret 0 +start 0 +call 0 +becam 0 +wide 0 +popular 0 +intend 0 +flexibl 0 +address 0 +issu 0 +import 0 +permit 0 +special 0 +purpos 0 +basic 0 +idea 0 +user 0 +actual 0 +broad 0 +collect 0 +option 0 +seek 0 +leverag 0 +emerg 0 +network 0 +activ 0 +messag 0 +origin 0 +supercomput 0 +embodi 0 +advanc 0 +mike 0 +reiter 0 +david 0 +cooper 0 +unusu 0 +combin 0 +singl 0 +packag 0 +fundament 0 +look 0 +specifi 0 +properti 0 +structur 0 +languag 0 +execut 0 +refer 0 +major 0 +goal 0 +constabl 0 +nuprl 0 +latter 0 +correctli 0 +former 0 +ad 0 +guarante 0 +mark 0 +hayden 0 +probabilist 0 +broadcast 0 +primit 0 +much 0 +architectur 0 +robbert 0 +werner 0 +vogel 0 +aspect 0 +includ 0 +object 0 +orient 0 +multimedia 0 +applic 0 +speed 0 +protocol 0 +exploit 0 +within 0 +thorsten 0 +eicken 0 +brian 0 +smith 0 +univers 0 +activitieschair 0 +polici 0 +act 0 +master 0 +faculti 0 +recruit 0 +academ 0 +leadership 0 +profession 0 +activitieseditor 0 +transact 0 +scientist 0 +isat 0 +robust 0 +critic 0 +element 0 +nation 0 +inform 0 +infrastructur 0 +publicationsth 0 +approach 0 +decemb 0 +integr 0 +runtim 0 +journal 0 +birman 0 +societi 0 +press 0 +alamito 0 +glade 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..4927559e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,150 @@ +graphic 2 +comput 2 +scienc 2 +model 2 +univers 1 +technolog 1 +research 1 +scientif 1 +surfac 1 +imag 1 +center 1 +visual 1 +cornel 1 +displai 1 +techniqu 1 +nation 1 +director 1 +involv 1 +develop 1 +input 1 +method 1 +topic 1 +algorithm 1 +alias 1 +complex 1 +environ 1 +realist 1 +gener 1 +conduct 1 +reflect 1 +strategi 1 +medic 1 +program 1 +foundat 1 +institut 1 +engin 1 +fellow 1 +home 1 +denis 1 +donald 0 +greenbergdonald 0 +greenberg 0 +jacob 0 +gould 0 +schurman 0 +prof 0 +past 0 +decad 0 +activ 0 +wide 0 +rang 0 +number 0 +implement 0 +progress 0 +made 0 +larg 0 +varieti 0 +routin 0 +previous 0 +investig 0 +includ 0 +polygon 0 +clip 0 +hidden 0 +textur 0 +spatial 0 +tempor 0 +problem 0 +geometr 0 +parametr 0 +descript 0 +color 0 +current 0 +focu 0 +three 0 +dimension 0 +synthesi 0 +modular 0 +testb 0 +suffici 0 +flexibl 0 +evalu 0 +differ 0 +creat 0 +laboratori 0 +light 0 +determin 0 +interact 0 +improv 0 +effici 0 +trace 0 +parallel 0 +process 0 +perceptu 0 +studi 0 +micro 0 +geometri 0 +motion 0 +control 0 +dynam 0 +constraint 0 +anti 0 +host 0 +relat 0 +applic 0 +start 0 +volum 0 +render 0 +digit 0 +photographi 0 +anim 0 +tool 0 +well 0 +core 0 +multi 0 +media 0 +within 0 +facil 0 +member 0 +particip 0 +brown 0 +california 0 +north 0 +carolina 0 +chapel 0 +hill 0 +utah 0 +activitiesdirector 0 +visualizationprofession 0 +activitieseditori 0 +board 0 +journal 0 +academi 0 +found 0 +american 0 +biolog 0 +acmreturn 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..db6f4be1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,200 @@ +comput 3 +scienc 3 +complex 2 +univers 2 +journal 2 +engin 1 +academi 1 +board 1 +problem 1 +resourc 1 +class 1 +council 1 +fellow 1 +lectur 1 +committe 1 +foundat 1 +research 1 +solv 1 +intern 1 +time 1 +member 1 +nation 1 +system 1 +eatc 1 +cornel 1 +technolog 1 +studi 1 +need 1 +variou 1 +random 1 +ture 1 +award 1 +american 1 +associ 1 +springer 1 +verlag 1 +advisori 1 +theoret 1 +chicago 1 +electron 1 +novemb 1 +natur 1 +distinguish 1 +seri 1 +februari 1 +home 1 +denis 1 +juri 0 +hartmanisjuri 0 +hartmani 0 +walter 0 +read 0 +professor 0 +california 0 +institut 0 +strateg 0 +goal 0 +contribut 0 +develop 0 +comprehens 0 +theori 0 +quantit 0 +law 0 +govern 0 +essenti 0 +part 0 +base 0 +guid 0 +har 0 +exploit 0 +explos 0 +grow 0 +classifi 0 +amount 0 +classif 0 +yield 0 +consist 0 +within 0 +given 0 +bound 0 +gain 0 +deeper 0 +understand 0 +make 0 +hard 0 +explor 0 +relat 0 +structur 0 +also 0 +trade 0 +off 0 +differ 0 +particular 0 +attent 0 +sequenti 0 +parallel 0 +nondeterminist 0 +memori 0 +requir 0 +interact 0 +activitiesmemb 0 +faculti 0 +representativeschair 0 +depart 0 +recruit 0 +committeehonorsacm 0 +stearn 0 +foreign 0 +latvian 0 +art 0 +york 0 +state 0 +advanc 0 +aaa 0 +charter 0 +profession 0 +activitieseditor 0 +note 0 +siam 0 +monograph 0 +director 0 +ifip 0 +technic 0 +georg 0 +brown 0 +school 0 +rice 0 +houston 0 +texa 0 +peer 0 +visit 0 +physic 0 +divis 0 +advisor 0 +world 0 +scientif 0 +presseditori 0 +press 0 +editor 0 +sciencegoedel 0 +prize 0 +telecommun 0 +awardshonorari 0 +doctor 0 +degre 0 +dortmund 0 +germani 0 +lecturessom 0 +observ 0 +banquet 0 +speech 0 +logic 0 +program 0 +symposium 0 +scope 0 +futur 0 +virginia 0 +tennesse 0 +april 0 +publicationson 0 +commun 0 +octob 0 +oracl 0 +hypothesi 0 +fals 0 +august 0 +richard 0 +chang 0 +benni 0 +chor 0 +od 0 +goldreich 0 +johan 0 +hastad 0 +desh 0 +ranjan 0 +pankaj 0 +rohatgi 0 +hausdorff 0 +topolog 0 +dimens 0 +kolmogorov 0 +real 0 +line 0 +decemb 0 +weight 0 +bulletin 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..22aac233 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,130 @@ +comput 2 +research 2 +scienc 2 +inform 1 +technolog 1 +engin 1 +institut 1 +nation 1 +board 1 +journal 1 +dean 1 +univers 1 +colleg 1 +depart 1 +unit 1 +captur 1 +access 1 +collabor 1 +design 1 +scientif 1 +advisori 1 +academi 1 +american 1 +associ 1 +fellow 1 +intern 1 +home 1 +cornel 1 +denis 1 +john 0 +hopcroftjohn 0 +hopcroftjoseph 0 +silbert 0 +engineeringprofessor 0 +sciencephd 0 +stanford 0 +januari 0 +appoint 0 +overse 0 +academ 0 +compris 0 +well 0 +variou 0 +involv 0 +continu 0 +robust 0 +geometr 0 +algorithm 0 +model 0 +simul 0 +develop 0 +facilit 0 +within 0 +environ 0 +among 0 +distribut 0 +databas 0 +persist 0 +object 0 +storag 0 +document 0 +imag 0 +process 0 +manag 0 +multimedia 0 +user 0 +interfac 0 +heterogen 0 +data 0 +knowledg 0 +represent 0 +organ 0 +remot 0 +profession 0 +activitiesmemb 0 +council 0 +commiss 0 +physic 0 +mathemat 0 +applicationsmemb 0 +boardmemb 0 +state 0 +forcememb 0 +engineeringfellow 0 +art 0 +sciencesfellow 0 +advanc 0 +aaa 0 +electr 0 +electron 0 +ieee 0 +machinerychairman 0 +siam 0 +trusteesmemb 0 +committe 0 +david 0 +lucil 0 +packard 0 +foundationmemb 0 +sloan 0 +fellowship 0 +committeeadvisori 0 +supercomput 0 +center 0 +defens 0 +analysiseditor 0 +oxford 0 +press 0 +seri 0 +algorithmica 0 +discret 0 +geometryassoci 0 +editor 0 +geometri 0 +applic 0 +system 0 +sciencesreturn 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..2588bff6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,257 @@ +program 2 +compil 2 +parallel 2 +develop 2 +comput 2 +work 1 +code 1 +local 1 +algorithm 1 +depend 1 +research 1 +spars 1 +techniqu 1 +matrix 1 +restructur 1 +memori 1 +data 1 +optim 1 +control 1 +scienc 1 +languag 1 +applic 1 +matric 1 +us 1 +produc 1 +approach 1 +problem 1 +linear 1 +distribut 1 +access 1 +must 1 +transfer 1 +best 1 +loop 1 +paper 1 +result 1 +hewlett 1 +packard 1 +journal 1 +area 1 +architectur 1 +tool 1 +gener 1 +larg 1 +involv 1 +equat 1 +almost 1 +system 1 +algebra 1 +technolog 1 +dens 1 +inform 1 +structur 1 +librari 1 +like 1 +earlier 1 +refer 1 +block 1 +recent 1 +align 1 +incorpor 1 +transform 1 +implement 1 +prize 1 +asplo 1 +foundat 1 +question 1 +microsoft 1 +member 1 +young 1 +investig 1 +award 1 +lab 1 +corpor 1 +ieee 1 +univers 1 +june 1 +annual 1 +home 1 +cornel 1 +denis 1 +keshav 0 +pingalikeshav 0 +pingaliassoci 0 +professorphd 0 +group 0 +goal 0 +deal 0 +scientif 0 +numer 0 +solut 0 +partial 0 +differenti 0 +alwai 0 +unfortun 0 +exist 0 +poor 0 +take 0 +radic 0 +differ 0 +sequenti 0 +user 0 +sparsiti 0 +enabl 0 +preliminari 0 +experi 0 +krylov 0 +space 0 +solver 0 +show 0 +competit 0 +hand 0 +argonn 0 +petsc 0 +extend 0 +direct 0 +method 0 +solv 0 +requir 0 +adapt 0 +mesh 0 +refin 0 +project 0 +build 0 +uniform 0 +numa 0 +processor 0 +faster 0 +good 0 +perform 0 +also 0 +ensur 0 +match 0 +made 0 +prefer 0 +mani 0 +small 0 +messag 0 +known 0 +automat 0 +test 0 +novel 0 +call 0 +normal 0 +nest 0 +increas 0 +potenti 0 +lambda 0 +toolkit 0 +summar 0 +fortran 0 +product 0 +line 0 +uniprocessor 0 +multiprocessor 0 +framework 0 +analysi 0 +base 0 +flow 0 +graph 0 +knit 0 +togeth 0 +permit 0 +better 0 +possibl 0 +compet 0 +independ 0 +interest 0 +exampl 0 +answer 0 +open 0 +decad 0 +time 0 +static 0 +singl 0 +assign 0 +form 0 +number 0 +includ 0 +flavor 0 +profession 0 +activitiespanel 0 +organ 0 +symposium 0 +principl 0 +practic 0 +nation 0 +panel 0 +consult 0 +intel 0 +armi 0 +ballist 0 +odyssei 0 +math 0 +institut 0 +refere 0 +review 0 +topla 0 +transact 0 +supercomput 0 +computereditori 0 +board 0 +intern 0 +awardsn 0 +presidenti 0 +faculti 0 +lecturesfast 0 +chelmsford 0 +massachusett 0 +januari 0 +depart 0 +wayn 0 +state 0 +detroit 0 +michigan 0 +februari 0 +rutger 0 +brunswick 0 +jersei 0 +laboratori 0 +redmond 0 +washington 0 +publicationssolv 0 +elementari 0 +proceed 0 +seventh 0 +workshop 0 +lcpc 0 +lectur 0 +note 0 +ithaca 0 +august 0 +david 0 +indupraka 0 +kodukula 0 +vladimir 0 +kotlyar 0 +paul 0 +stodghil 0 +sigplan 0 +confer 0 +design 0 +pldi 0 +gianfranco 0 +bilardi 0 +return 0 +report 0 +page 0 +department 0 +pageif 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..076864c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,366 @@ +system 2 +comput 2 +program 2 +fault 2 +toler 2 +real 2 +time 2 +committe 2 +intern 2 +univers 2 +scienc 2 +germani 2 +techniqu 2 +implement 2 +member 2 +research 1 +hypervisor 1 +trom 1 +agent 1 +workshop 1 +distribut 1 +applic 1 +gri 1 +logic 1 +coordin 1 +inform 1 +softwar 1 +symposium 1 +formal 1 +exploit 1 +environ 1 +sept 1 +march 1 +critic 1 +virtual 1 +replica 1 +school 1 +center 1 +outlin 1 +lectur 1 +juli 1 +washington 1 +proof 1 +north 1 +carolina 1 +teach 1 +state 1 +concurr 1 +becom 1 +reason 1 +complet 1 +past 1 +base 1 +architectur 1 +protocol 1 +instruct 1 +execut 1 +oper 1 +norwai 1 +cornel 1 +support 1 +process 1 +data 1 +algorithm 1 +springer 1 +systemsprogram 1 +confer 1 +work 1 +hybrid 1 +group 1 +summer 1 +marktoberdorf 1 +lubeck 1 +specif 1 +technic 1 +sigcs 1 +proceed 1 +note 1 +mission 1 +develop 1 +design 1 +investig 1 +order 1 +equat 1 +term 1 +partial 1 +build 1 +manag 1 +machin 1 +issu 1 +realiz 1 +given 1 +final 1 +johansen 1 +robbert 1 +reness 1 +tacoma 1 +project 1 +mobil 1 +network 1 +stoller 1 +detect 1 +asynchron 1 +pass 1 +ieee 1 +engineeringeditor 1 +high 1 +integr 1 +mathemat 1 +fifth 1 +ifip 1 +depend 1 +verif 1 +isat 1 +defens 1 +warfar 1 +studi 1 +advanc 1 +associ 1 +meet 1 +verifi 1 +synchron 1 +moder 1 +refin 1 +polici 1 +munich 1 +chapel 1 +hill 1 +ad 1 +tool 1 +educ 1 +nashvil 1 +tennesse 1 +israel 1 +april 1 +june 1 +verlag 1 +york 1 +forc 1 +home 1 +denis 1 +fred 0 +schneiderfr 0 +schneider 0 +professorphd 0 +univ 0 +stoni 0 +brook 0 +understand 0 +increasingli 0 +import 0 +widespread 0 +focus 0 +heavili 0 +involv 0 +appli 0 +assert 0 +textbook 0 +subject 0 +along 0 +david 0 +continu 0 +concern 0 +first 0 +year 0 +streamlin 0 +infer 0 +rule 0 +evalu 0 +number 0 +handl 0 +undefin 0 +function 0 +thoma 0 +bressoud 0 +analyz 0 +replic 0 +risc 0 +ensur 0 +sequenc 0 +run 0 +differ 0 +physic 0 +processor 0 +ident 0 +also 0 +attract 0 +least 0 +theori 0 +instantli 0 +avail 0 +hardwar 0 +includ 0 +exist 0 +written 0 +second 0 +singl 0 +suffic 0 +everi 0 +programm 0 +freed 0 +task 0 +jointli 0 +start 0 +move 0 +structur 0 +construct 0 +commun 0 +bandwidth 0 +conserv 0 +access 0 +site 0 +resid 0 +typic 0 +filter 0 +otherwis 0 +reduc 0 +read 0 +carri 0 +relev 0 +roam 0 +prototyp 0 +third 0 +experi 0 +scott 0 +whether 0 +particular 0 +could 0 +global 0 +satisfi 0 +predic 0 +allow 0 +effici 0 +possibl 0 +previou 0 +activitiessabbat 0 +leav 0 +profession 0 +activitieseditor 0 +chief 0 +computingeditor 0 +letterseditor 0 +transact 0 +systemseditor 0 +annal 0 +surveysco 0 +editor 0 +text 0 +monograph 0 +verlagprogram 0 +constructionprogram 0 +respons 0 +compos 0 +resili 0 +applicationsprogram 0 +sixteenth 0 +symposiumprogram 0 +dimac 0 +control 0 +systemsst 0 +assur 0 +chissa 0 +nation 0 +institut 0 +standard 0 +technologymemb 0 +agencyreview 0 +leibniz 0 +hebrew 0 +universitymemb 0 +methodolog 0 +awardsfellow 0 +american 0 +sciencefellow 0 +machinerylecturesproof 0 +origin 0 +tradit 0 +banquet 0 +speech 0 +afosr 0 +grante 0 +contractor 0 +panelist 0 +compar 0 +merit 0 +model 0 +safeti 0 +write 0 +reactiv 0 +dagstuhl 0 +merg 0 +analysi 0 +georg 0 +mason 0 +virginia 0 +avoid 0 +mistak 0 +invit 0 +speaker 0 +traffic 0 +nasa 0 +am 0 +distinguish 0 +seri 0 +panel 0 +organ 0 +technion 0 +haifa 0 +place 0 +airplan 0 +view 0 +successor 0 +arpa 0 +publicationsreason 0 +colloquium 0 +icalp 0 +jerusalem 0 +materi 0 +summari 0 +boll 0 +offic 0 +scientif 0 +septemb 0 +volum 0 +limor 0 +proposit 0 +letter 0 +februari 0 +aircraft 0 +hand 0 +foundat 0 +ultradepend 0 +parallel 0 +paradigm 0 +kluwer 0 +academ 0 +publish 0 +marzullo 0 +dehn 0 +bulletin 0 +topic 0 +hoto 0 +orca 0 +island 0 +causal 0 +messag 0 +art 0 +newslett 0 +spring 0 +approach 0 +discret 0 +primu 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..5ab8dff7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,115 @@ +univers 2 +product 1 +scienc 1 +kroneck 1 +comput 1 +committe 1 +siam 1 +prize 1 +member 1 +sweden 1 +januari 1 +develop 1 +method 1 +variou 1 +solv 1 +applic 1 +process 1 +descript 1 +perform 1 +undergradu 1 +last 1 +art 1 +journal 1 +household 1 +umea 1 +build 1 +intuit 1 +ohio 1 +state 1 +april 1 +optic 1 +home 1 +cornel 1 +denis 1 +charl 0 +loancharl 0 +loanprofessorphd 0 +michigan 0 +continu 0 +particular 0 +nearest 0 +problem 0 +factor 0 +matric 0 +subject 0 +inhomogen 0 +constraint 0 +signal 0 +markov 0 +result 0 +fast 0 +wavelet 0 +transform 0 +also 0 +deriv 0 +proce 0 +analog 0 +plai 0 +import 0 +role 0 +high 0 +algorithm 0 +text 0 +work 0 +four 0 +year 0 +current 0 +translat 0 +anticip 0 +fall 0 +semest 0 +activitiescomput 0 +depart 0 +curriculum 0 +committeedepart 0 +repres 0 +chair 0 +meetingfreshman 0 +admiss 0 +reader 0 +profession 0 +activitieseditor 0 +matrix 0 +analysismemb 0 +wilkinson 0 +diprima 0 +organ 0 +confer 0 +lecturesappl 0 +linkop 0 +publicationsoptim 0 +close 0 +loop 0 +adapt 0 +multipl 0 +control 0 +bandwidth 0 +societi 0 +america 0 +ellerbroek 0 +pitsiani 0 +plemmon 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..6602b622 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,57 @@ +cornel 1 +cours 1 +univers 1 +comput 1 +scienc 1 +depart 1 +curriculum 1 +associ 1 +home 1 +denis 1 +catherin 0 +wagnercatherin 0 +wagnersenior 0 +lecturerphd 0 +primari 0 +respons 0 +teach 0 +work 0 +other 0 +revis 0 +lower 0 +level 0 +specif 0 +interest 0 +develop 0 +student 0 +prepar 0 +introductori 0 +program 0 +activitiescomput 0 +undergradu 0 +committe 0 +profession 0 +activitiesassoci 0 +symbol 0 +logic 0 +machineri 0 +institut 0 +electr 0 +electron 0 +engin 0 +women 0 +mathemat 0 +return 0 +annual 0 +report 0 +page 0 +department 0 +pageif 0 +question 0 +comment 0 +pleas 0 +contact 0 +last 0 +modifi 0 +novemb 0 +moor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..74b96949 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,350 @@ +video 3 +research 3 +process 2 +data 2 +brian 2 +smith 2 +network 2 +server 2 +client 2 +multimedia 2 +system 1 +protocol 1 +talk 1 +avail 1 +proc 1 +lawrenc 1 +row 1 +environ 1 +continu 1 +media 1 +us 1 +comput 1 +first 1 +zeno 1 +softwar 1 +workstat 1 +effort 1 +cyclic 1 +compress 1 +onlin 1 +also 1 +languag 1 +intern 1 +cornel 1 +xerox 1 +univers 1 +offic 1 +build 1 +technolog 1 +commun 1 +approach 1 +connect 1 +distribut 1 +best 1 +develop 1 +audio 1 +paper 1 +describ 1 +algorithm 1 +imag 1 +method 1 +format 1 +independ 1 +review 1 +cours 1 +third 1 +confer 1 +workshop 1 +scienc 1 +california 1 +berkelei 1 +make 1 +class 1 +type 1 +support 1 +storag 1 +contrast 1 +hardwar 1 +current 1 +file 1 +architectur 1 +ofworkst 1 +gener 1 +local 1 +area 1 +common 1 +laboratori 1 +fileserv 1 +servic 1 +design 1 +ad 1 +load 1 +across 1 +initi 1 +invest 1 +exist 1 +center 1 +resourc 1 +well 1 +suit 1 +call 1 +playback 1 +applic 1 +decompress 1 +perform 1 +must 1 +idea 1 +jpeg 1 +transcod 1 +oper 1 +specifi 1 +graphic 1 +provid 1 +qualiti 1 +select 1 +jonathan 1 +resolut 1 +francisco 1 +novemb 1 +html 1 +version 1 +ketan 1 +patel 1 +mpeg 1 +transmiss 1 +electron 1 +june 1 +faculti 0 +interest 0 +bsmith 0 +professor 0 +upson 0 +hall 0 +phone 0 +hour 0 +semest 0 +tue 0 +thur 0 +intereststeachingselect 0 +publicationsresearch 0 +talksmisc 0 +linksresearch 0 +interestsmi 0 +goal 0 +ourcomput 0 +group 0 +project 0 +andprocess 0 +commercialand 0 +requir 0 +special 0 +operatingsystem 0 +usabl 0 +aredesign 0 +premis 0 +isthat 0 +infrastructurei 0 +suffici 0 +andappl 0 +verifi 0 +hypothesi 0 +workingsystem 0 +direct 0 +toward 0 +zenodistribut 0 +anethernet 0 +simultan 0 +serverof 0 +plai 0 +videostor 0 +receiv 0 +store 0 +severalserv 0 +sever 0 +compar 0 +withlarg 0 +central 0 +advantag 0 +scalabl 0 +automat 0 +balanc 0 +serv 0 +machin 0 +case 0 +locat 0 +differ 0 +util 0 +infrastructur 0 +promot 0 +earli 0 +adopt 0 +almost 0 +around 0 +effortdeliveri 0 +built 0 +networkprotocol 0 +mani 0 +needto 0 +reserv 0 +establish 0 +resourcereserv 0 +nation 0 +communicationinfrastructur 0 +user 0 +charg 0 +basi 0 +forbandwidth 0 +poorli 0 +networkenviron 0 +share 0 +equal 0 +accessiblebi 0 +appropri 0 +latterenviron 0 +commonli 0 +found 0 +thecommun 0 +builton 0 +datagram 0 +transport 0 +audioand 0 +metropolitan 0 +andwid 0 +todeliv 0 +availableonlin 0 +slide 0 +fold 0 +aredevelop 0 +compressedrepresent 0 +without 0 +lead 0 +todramat 0 +speed 0 +sinc 0 +remov 0 +thetim 0 +consum 0 +reducesth 0 +amount 0 +experi 0 +animplement 0 +indicatesthat 0 +order 0 +magnitud 0 +fasterthan 0 +possibl 0 +previou 0 +currentlyextend 0 +parallel 0 +insoftwar 0 +translat 0 +onecompress 0 +anoth 0 +heterogen 0 +compresseddomain 0 +explor 0 +simplifyexperiment 0 +programminglanguag 0 +calledrivl 0 +pronounc 0 +rival 0 +allowsvideo 0 +effect 0 +resolutionand 0 +sourc 0 +materi 0 +whatpostscript 0 +text 0 +resolutionindepend 0 +thu 0 +sameprogram 0 +quicktim 0 +quickli 0 +whileedit 0 +decis 0 +made 0 +high 0 +qualityfinish 0 +product 0 +line 0 +much 0 +postscript 0 +bepreview 0 +sent 0 +dpiprint 0 +camera 0 +readi 0 +copi 0 +onvideo 0 +domain 0 +rivl 0 +theme 0 +tool 0 +videous 0 +teachingat 0 +teach 0 +undergradu 0 +computerarchitectur 0 +graduat 0 +public 0 +swartz 0 +asif 0 +ghia 0 +logan 0 +david 0 +chamberlin 0 +queri 0 +hum 0 +larg 0 +music 0 +inform 0 +retriev 0 +databas 0 +peter 0 +name 0 +toronto 0 +ontario 0 +canada 0 +juli 0 +prioriti 0 +driven 0 +fast 0 +motion 0 +second 0 +sanfrancisco 0 +octob 0 +represent 0 +spie 0 +symposium 0 +jose 0 +februari 0 +stephen 0 +program 0 +decod 0 +internationalconfer 0 +anaheim 0 +august 0 +famili 0 +manipul 0 +ieee 0 +septemb 0 +player 0 +supportfor 0 +digit 0 +diego 0 +recent 0 +webster 0 +site 0 +minnesota 0 +colloquium 0 +misc 0 +link 0 +work 0 +annett 0 +hanna 0 +manual 0 +mmcn 0 +proceed 0 +documentationth 0 +priceweb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..04f604ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,279 @@ +system 2 +nuprl 2 +comput 2 +us 2 +scienc 2 +theori 2 +logic 2 +program 2 +univers 1 +softwar 1 +mathemat 1 +type 1 +formal 1 +theorem 1 +design 1 +prove 1 +studi 1 +involv 1 +make 1 +line 1 +implement 1 +build 1 +method 1 +languag 1 +allen 1 +richard 1 +develop 1 +leeser 1 +weyl 1 +circuit 1 +theoret 1 +zippel 1 +connect 1 +committe 1 +israel 1 +chair 1 +cornel 1 +engag 1 +especi 1 +year 1 +major 1 +construct 1 +wide 1 +varieti 1 +version 1 +stuart 1 +eaton 1 +also 1 +how 1 +joint 1 +ventur 1 +polya 1 +algorithm 1 +gri 1 +hope 1 +explor 1 +futur 1 +journal 1 +symbol 1 +computationeditor 1 +fundament 1 +betweencomput 1 +decemb 1 +januari 1 +state 1 +notr 1 +dame 1 +march 1 +york 1 +pennsylvania 1 +springer 1 +verlag 1 +robert 0 +constablerobert 0 +constabledepart 0 +professorrc 0 +eduph 0 +wisconsin 0 +madison 0 +researchw 0 +providemechan 0 +assist 0 +problem 0 +solv 0 +programmingand 0 +long 0 +term 0 +wai 0 +theform 0 +feasibl 0 +implementedthre 0 +past 0 +experiment 0 +lispprogram 0 +sucha 0 +canexpress 0 +proof 0 +asmetalevel 0 +provid 0 +consider 0 +theoremprov 0 +power 0 +moreov 0 +canevalu 0 +content 0 +principl 0 +nuprli 0 +fomal 0 +continu 0 +improv 0 +current 0 +iscal 0 +differ 0 +predecessor 0 +termeditor 0 +itsintern 0 +structur 0 +modular 0 +suitabl 0 +hedefinit 0 +beyond 0 +built 0 +inconstruct 0 +entir 0 +mechanismha 0 +rebuilt 0 +stream 0 +paul 0 +jackson 0 +thework 0 +dougla 0 +contribut 0 +gener 0 +natur 0 +final 0 +refer 0 +isan 0 +intern 0 +descript 0 +builtprincip 0 +william 0 +aitken 0 +link 0 +internaldescript 0 +possibleto 0 +process 0 +three 0 +excit 0 +withmiriam 0 +electr 0 +engin 0 +incomput 0 +davidgri 0 +richardzippel 0 +withless 0 +hardwar 0 +synthesi 0 +verif 0 +student 0 +mark 0 +aagard 0 +thecorrect 0 +boolean 0 +minim 0 +packag 0 +compon 0 +bedrocsystem 0 +weak 0 +divis 0 +widelyus 0 +efforttaught 0 +great 0 +deal 0 +effect 0 +technolog 0 +inth 0 +hand 0 +expert 0 +user 0 +applic 0 +domain 0 +second 0 +model 0 +aprogram 0 +refin 0 +mechan 0 +david 0 +enabl 0 +write 0 +handbook 0 +themann 0 +devis 0 +programmingprocess 0 +givn 0 +definit 0 +ofpolya 0 +expect 0 +experi 0 +soon 0 +transform 0 +tryingto 0 +captur 0 +style 0 +want 0 +recent 0 +begun 0 +collabor 0 +relat 0 +thepolya 0 +effort 0 +conal 0 +mannion 0 +possibl 0 +ofus 0 +discussingproblem 0 +ssymbol 0 +algebra 0 +near 0 +thiswil 0 +scientif 0 +computingsoftwar 0 +togeth 0 +tool 0 +isbuild 0 +profession 0 +activitieseditor 0 +academ 0 +presseditor 0 +oxford 0 +pressgener 0 +licsprogram 0 +north 0 +american 0 +jumelageprogram 0 +aspect 0 +softwarerefere 0 +review 0 +nserc 0 +canada 0 +scienceunivers 0 +activitieschair 0 +recruit 0 +committeecomput 0 +facil 0 +committeeprovost 0 +mathematicslecturesform 0 +inria 0 +anniversari 0 +celebr 0 +pari 0 +franc 0 +colloquium 0 +bengurion 0 +sheva 0 +symposium 0 +aviv 0 +associ 0 +annual 0 +meet 0 +indiana 0 +metaprogram 0 +buffalo 0 +explan 0 +engineeringworkshop 0 +philadelphia 0 +publicationsform 0 +tendenc 0 +control 0 +appli 0 +bensoussan 0 +verju 0 +lectur 0 +note 0 +metalevel 0 +andmathemat 0 +manfr 0 +broi 0 +nato 0 +seri 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..c861de36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,31 @@ +adam 1 +florenc 1 +cornel 1 +univers 1 +upson 0 +hall 0 +ithaca 0 +aflorenc 0 +first 0 +year 0 +student 0 +incomput 0 +scienc 0 +atcornel 0 +professionalinterest 0 +includ 0 +simul 0 +numer 0 +analysi 0 +follow 0 +link 0 +find 0 +academicsresearchworkinterest 0 +athlet 0 +last 0 +updat 0 +sept 0 +mail 0 +mewith 0 +comment 0 +correct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..4cf0adde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,30 @@ +aguilera 1 +kawazo 1 +cornel 1 +marco 1 +page 1 +univers 1 +home 0 +depart 0 +comput 0 +scienc 0 +ithaca 0 +second 0 +year 0 +student 0 +distribut 0 +system 0 +algorithmsrandom 0 +failur 0 +detect 0 +hybrid 0 +approach 0 +solv 0 +consensusgo 0 +tour 0 +brazil 0 +check 0 +suggest 0 +warn 0 +perman 0 +constructionmarco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..93a27d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,33 @@ +alfr 1 +hong 1 +cornel 1 +new 1 +chines 1 +home 0 +page 0 +get 0 +dizzi 0 +offic 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +usaoffic 0 +phone 0 +worthwhil 0 +site 0 +check 0 +sinanet 0 +taiwan 0 +requir 0 +nandonet 0 +sunworld 0 +javaworldcours 0 +stuff 0 +corba 0 +essenti 0 +annot 0 +bibliographyc 0 +project 0 +reportalfr 0 +ahong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..256e31f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,58 @@ +comput 1 +cornel 1 +system 1 +high 1 +depart 1 +architectur 1 +network 1 +perform 1 +final 1 +project 1 +postscript 1 +vineet 0 +home 0 +pagevineet 0 +ahujam 0 +engg 0 +sciencecornel 0 +universityithaca 0 +address 0 +hasbrouck 0 +apt 0 +ithaca 0 +mail 0 +ahuja 0 +academ 0 +student 0 +scienc 0 +univers 0 +main 0 +area 0 +interest 0 +parallel 0 +process 0 +advanc 0 +oper 0 +object 0 +orient 0 +program 0 +coursework 0 +fall 0 +automata 0 +theori 0 +engin 0 +spring 0 +report 0 +softwar 0 +design 0 +reportfal 0 +capac 0 +inform 0 +multimedia 0 +resum 0 +recent 0 +html 0 +page 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..bb070fb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,21 @@ +ching 1 +chinglan 0 +cornel 0 +edumast 0 +engin 0 +student 0 +comput 0 +scienc 0 +depart 0 +beau 0 +pair 0 +address 0 +seneca 0 +ithaca 0 +telephon 0 +page 0 +still 0 +construct 0 +java 0 +examplegraph 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..1eb208ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,82 @@ +cornel 2 +univers 1 +technic 1 +report 1 +alexei 1 +home 1 +comput 1 +distribut 1 +system 1 +group 1 +new 1 +weather 1 +pagealexei 0 +vaysburdalexei 0 +program 0 +scienc 0 +research 0 +interest 0 +includ 0 +commun 0 +larg 0 +scale 0 +wide 0 +area 0 +andobject 0 +orient 0 +tool 0 +work 0 +within 0 +horu 0 +lead 0 +birman 0 +androbbert 0 +reness 0 +paper 0 +implement 0 +replic 0 +state 0 +machin 0 +partition 0 +network 0 +friedman 0 +vaysburd 0 +link 0 +search 0 +engin 0 +lyco 0 +broadcast 0 +seri 0 +ecol 0 +polytechniqu 0 +federal 0 +lausann 0 +hebrew 0 +transi 0 +page 0 +cuinfo 0 +gopher 0 +direct 0 +cornellcornel 0 +mail 0 +directorycornel 0 +staff 0 +directori 0 +student 0 +directorycours 0 +class 0 +examscornel 0 +calendarcornel 0 +art 0 +musicbailei 0 +hall 0 +concertscornel 0 +music 0 +event 0 +ithaca 0 +current 0 +condit 0 +ithacaworld 0 +brief 0 +odessa 0 +odessaweb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..6c7c54e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,170 @@ +simul 1 +compress 1 +chip 1 +graphic 1 +yama 1 +ride 1 +train 1 +us 1 +imag 1 +data 1 +hell 1 +heaven 1 +better 1 +comput 1 +microsystem 1 +avenu 1 +cupertino 1 +javasoft 1 +coursesvlsi 1 +engin 1 +system 1 +project 1 +includ 1 +landscap 1 +environ 1 +cours 1 +memori 1 +pattern 1 +softwar 1 +pyramania 1 +game 1 +accord 0 +hindu 0 +mytholog 0 +death 0 +come 0 +downto 0 +earth 0 +give 0 +though 0 +kind 0 +imparti 0 +mess 0 +amithyamasanim 0 +engg 0 +depart 0 +scienc 0 +cornel 0 +univers 0 +ithaca 0 +yorki 0 +make 0 +monei 0 +garcia 0 +mountain 0 +view 0 +mailstop 0 +ucup 0 +watch 0 +warburton 0 +santa 0 +clara 0 +mail 0 +amith 0 +yamasani 0 +comi 0 +currentlyemploi 0 +california 0 +workingin 0 +javamedia 0 +groupeducationfal 0 +multimedia 0 +cssoftwar 0 +csspring 0 +high 0 +perform 0 +cscomput 0 +csproject 0 +final 0 +mpeg 0 +descript 0 +parallel 0 +trace 0 +propos 0 +virtual 0 +realiti 0 +railroad 0 +master 0 +involv 0 +railroadsystem 0 +track 0 +switch 0 +station 0 +user 0 +interact 0 +wall 0 +cave 0 +stereo 0 +vision 0 +done 0 +processor 0 +onyx 0 +softwarei 0 +written 0 +openinventord 0 +librari 0 +silicon 0 +gener 0 +dynam 0 +ofth 0 +through 0 +documentimag 0 +chipdevelop 0 +vlsi 0 +basicallycompress 0 +stream 0 +algorithm 0 +thisalgorithm 0 +suit 0 +control 0 +extern 0 +cachecam 0 +content 0 +address 0 +store 0 +inputstream 0 +hope 0 +might 0 +occur 0 +capableof 0 +rate 0 +byte 0 +nano 0 +second 0 +nowai 0 +routin 0 +speed 0 +snap 0 +video 0 +transit 0 +rivlproposalpresentationand 0 +sampl 0 +dthi 0 +develop 0 +itswritten 0 +interfac 0 +xlib 0 +thed 0 +render 0 +take 0 +look 0 +snapshot 0 +screen 0 +parallelomania 0 +resumehtmlpostscript 0 +past 0 +present 0 +futur 0 +pal 0 +home 0 +page 0 +satyaprasad 0 +avinashgupta 0 +kartikh 0 +kapadia 0 +hrishikeshdixit 0 +joselui 0 +fernandez 0 +vineetahuja 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..14a71b05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,89 @@ +ankit 1 +patel 1 +cornel 1 +comput 1 +cours 1 +multimedia 1 +project 1 +virtual 1 +realiti 1 +homepag 0 +east 0 +state 0 +street 0 +ithaca 0 +apatel 0 +person 0 +photo 0 +galleria 0 +chronologia 0 +curriculum 0 +vita 0 +universityresumedepart 0 +scienc 0 +enrolledgradu 0 +research 0 +assist 0 +prof 0 +brian 0 +smith 0 +group 0 +zeno 0 +canvasd 0 +toolkit 0 +interact 0 +platform 0 +independ 0 +develop 0 +high 0 +perform 0 +graphic 0 +applic 0 +video 0 +conferencingmultimedia 0 +system 0 +read 0 +assignmentsproject 0 +kernel 0 +endpoint 0 +netan 0 +annot 0 +bibliographi 0 +common 0 +object 0 +request 0 +broker 0 +architectur 0 +corba 0 +critiqu 0 +understand 0 +limit 0 +causal 0 +total 0 +order 0 +commun 0 +david 0 +cheriton 0 +dale 0 +skeen 0 +carnegi 0 +mellon 0 +summer 0 +school 0 +scienceworld 0 +wide 0 +technolog 0 +spring 0 +link 0 +real 0 +time 0 +support 0 +multimediamaharaja 0 +sayajirao 0 +univers 0 +academicsfriend 0 +techoreli 0 +industri 0 +limitedjob 0 +profilelif 0 +relianc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..8bdb648a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,15 @@ +comput 1 +ashish 0 +jhaveriashish 0 +jhaveridepart 0 +sciencemast 0 +engineeringresumehtmlpost 0 +scriptcourseworkadvanc 0 +databas 0 +systemscsmultimedia 0 +systemscsengin 0 +networkscsprogram 0 +languag 0 +softwareengin 0 +csashish 0 +jhaveri 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..abdcf324 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,39 @@ +aswin 1 +swing 1 +cornel 1 +danc 1 +berg 1 +graduat 1 +student 1 +phone 1 +program 1 +transform 1 +person 1 +page 1 +eduoffic 0 +upson 0 +hall 0 +home 0 +skyacr 0 +drive 0 +ithaca 0 +interest 0 +languag 0 +systemmi 0 +life 0 +famili 0 +album 0 +annek 0 +server 0 +jean 0 +deejay 0 +guid 0 +isdn 0 +record 0 +hop 0 +pictur 0 +nederlands 0 +club 0 +dutch 0 +clubi 0 +atcornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..6a0daf0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,122 @@ +system 1 +piec 1 +game 1 +five 1 +point 1 +home 1 +oper 1 +user 1 +trap 1 +comput 1 +multimedia 1 +distribut 1 +hoca 1 +chip 1 +like 1 +network 1 +requir 1 +player 1 +ipng 1 +space 1 +next 1 +gener 1 +interfac 1 +page 1 +subject 1 +welcomeavinash 0 +guptam 0 +engg 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +magazin 0 +streetcambridg 0 +mail 0 +avinash 0 +thecia 0 +resumemi 0 +main 0 +area 0 +interest 0 +graphic 0 +cours 0 +softwar 0 +engin 0 +high 0 +perform 0 +project 0 +pronounc 0 +hodja 0 +full 0 +fledg 0 +cornel 0 +hypothet 0 +instruct 0 +processor 0 +support 0 +featur 0 +multipl 0 +multitask 0 +virtual 0 +memori 0 +scene 0 +transit 0 +effect 0 +rivl 0 +presentationpent 0 +skillpent 0 +mean 0 +board 0 +skill 0 +object 0 +horizont 0 +vertic 0 +diagon 0 +earn 0 +oppon 0 +even 0 +number 0 +either 0 +pair 0 +reach 0 +first 0 +win 0 +window 0 +screenshot 0 +gamedownload 0 +implement 0 +ipvimpl 0 +internet 0 +protocol 0 +us 0 +make 0 +abl 0 +stream 0 +proposalprogress 0 +reportsam 0 +caveat 0 +appli 0 +almost 0 +everi 0 +pageon 0 +still 0 +construct 0 +brows 0 +internethytelnetth 0 +librari 0 +catalogeinet 0 +galaxyplanet 0 +earth 0 +pagejoel 0 +hierarch 0 +indexyahoo 0 +guid 0 +wwwwebcrawlerlycosmi 0 +friend 0 +link 0 +sign 0 +guestbook 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..ed5434db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,257 @@ +peopl 2 +also 1 +comput 1 +page 1 +cornel 1 +high 1 +live 1 +work 1 +home 1 +jonathan 1 +barber 1 +cours 1 +research 1 +futur 1 +call 1 +list 1 +finish 1 +program 1 +summer 1 +project 1 +implement 1 +good 1 +favorit 1 +bulli 1 +upson 1 +interest 1 +ponch 1 +goe 1 +student 1 +master 1 +engin 1 +time 1 +develop 1 +protocol 1 +parallel 1 +past 1 +semest 1 +video 1 +product 1 +taken 1 +perform 1 +friend 1 +sugata 1 +paper 1 +encrypt 1 +oregon 1 +like 1 +write 1 +locat 1 +hous 1 +inde 1 +georg 1 +bush 1 +keep 1 +graphic 1 +need 1 +fleshpooooooooooooooch 0 +hall 0 +cool 0 +dude 0 +felt 0 +inclin 0 +chang 0 +first 0 +sentenc 0 +birth 0 +name 0 +usual 0 +callm 0 +orpooch 0 +us 0 +lake 0 +fork 0 +guppi 0 +skin 0 +mama 0 +phin 0 +degre 0 +graduat 0 +computersci 0 +cornellunivers 0 +undergrad 0 +scienc 0 +well 0 +grew 0 +coupl 0 +town 0 +jersei 0 +attendedmontgomeri 0 +school 0 +current 0 +collegetown 0 +adjac 0 +campu 0 +cheap 0 +sublet 0 +oncolleg 0 +ultra 0 +conveni 0 +sinc 0 +plai 0 +half 0 +mile 0 +radiu 0 +went 0 +greek 0 +spent 0 +epsilon 0 +fratern 0 +border 0 +cayuga 0 +height 0 +kind 0 +hard 0 +believ 0 +dai 0 +hand 0 +prof 0 +thorsten 0 +eicken 0 +packet 0 +filter 0 +thefilt 0 +bandwidth 0 +network 0 +bring 0 +person 0 +taught 0 +week 0 +session 0 +ofc 0 +fundament 0 +concept 0 +introductori 0 +year 0 +teach 0 +assist 0 +forc 0 +introduct 0 +digit 0 +system 0 +organ 0 +fall 0 +spring 0 +intereststhi 0 +rivl 0 +resolut 0 +independ 0 +languag 0 +multimedia 0 +group 0 +final 0 +labor 0 +report 0 +continu 0 +media 0 +rivlan 0 +improv 0 +object 0 +tracker 0 +rivli 0 +compil 0 +mukhopadhyai 0 +still 0 +class 0 +smpd 0 +code 0 +generatorfor 0 +fortran 0 +base 0 +linear 0 +algebra 0 +framework 0 +come 0 +soon 0 +previou 0 +greg 0 +weber 0 +webar 0 +smart 0 +frame 0 +depend 0 +stream 0 +mpeg 0 +unfortun 0 +electron 0 +form 0 +trust 0 +look 0 +bright 0 +pack 0 +gear 0 +take 0 +perman 0 +road 0 +trip 0 +hopefulli 0 +buddi 0 +surfer 0 +inform 0 +share 0 +pleas 0 +intel 0 +corp 0 +ahead 0 +pictur 0 +portland 0 +clarif 0 +actual 0 +thought 0 +would 0 +pretti 0 +resourceful 0 +camera 0 +room 0 +window 0 +pipe 0 +meanth 0 +gener 0 +line 0 +comrad 0 +realli 0 +nice 0 +ofhi 0 +found 0 +lucki 0 +consid 0 +least 0 +know 0 +decid 0 +otherwis 0 +captain 0 +swirl 0 +nefari 0 +toilet 0 +mukhopadyai 0 +bonei 0 +magoo 0 +fletop 0 +bigro 0 +dave 0 +koster 0 +bot 0 +offici 0 +tffl 0 +pageuuencod 0 +pagetar 0 +zip 0 +pageif 0 +brows 0 +feel 0 +urg 0 +send 0 +mail 0 +downloadsgraphicsbarb 0 +gifponch 0 +htmlres_htmlres_curemmittemmitt 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..cbfdc4c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,202 @@ +anindya 2 +comput 1 +cornel 1 +commun 1 +messag 1 +work 1 +cluster 1 +thorsten 1 +eicken 1 +perform 1 +activ 1 +like 1 +basu 1 +also 1 +parallel 1 +homepag 1 +realli 1 +photograph 1 +scienc 1 +univers 1 +ithaca 1 +phone 1 +graduat 1 +student 1 +complet 1 +love 1 +would 1 +stuff 1 +us 1 +layer 1 +program 1 +develop 1 +fast 1 +implement 1 +latenc 1 +proceed 1 +network 1 +vineet 1 +buch 1 +appear 1 +look 0 +click 0 +recent 0 +mugshot 0 +locat 0 +home 0 +upson 0 +hall 0 +delawar 0 +avenu 0 +depart 0 +offic 0 +mail 0 +eduwhat 0 +scienceat 0 +hope 0 +turn 0 +centuri 0 +listen 0 +rock 0 +musicor 0 +goof 0 +thecornel 0 +projectwith 0 +advisor 0 +think 0 +coollik 0 +indiawho 0 +went 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +hardpink 0 +floydfanat 0 +final 0 +realiz 0 +childhood 0 +dream 0 +see 0 +livelast 0 +summer 0 +philadelphia 0 +plum 0 +known 0 +thegreat 0 +unwash 0 +pelham 0 +grenvil 0 +wodehous 0 +attendedwoodstock 0 +truli 0 +motiv 0 +onlinewoodstock 0 +review 0 +woodstock 0 +experienc 0 +metallica 0 +live 0 +first 0 +time 0 +thesocc 0 +worldcup 0 +last 0 +year 0 +butunfortun 0 +happen 0 +cook 0 +tri 0 +putsomerecip 0 +onlin 0 +fanci 0 +connoisseurof 0 +good 0 +whiski 0 +especi 0 +singl 0 +malt 0 +link 0 +cool 0 +calvin 0 +hobb 0 +archiv 0 +south 0 +asian 0 +writer 0 +cheer 0 +monti 0 +python 0 +beavi 0 +buttheadoth 0 +cuinfo 0 +gopherand 0 +someth 0 +differ 0 +projectwhich 0 +aim 0 +provid 0 +high 0 +ofworkst 0 +compar 0 +state 0 +mpp 0 +acheiv 0 +passinglay 0 +call 0 +split 0 +thatshow 0 +run 0 +meiko 0 +addit 0 +show 0 +saturateth 0 +fibr 0 +segment 0 +size 0 +collabor 0 +withth 0 +berkelei 0 +project 0 +team 0 +specificationfor 0 +enabl 0 +processesboth 0 +trust 0 +untrust 0 +environ 0 +public 0 +mechan 0 +integr 0 +david 0 +culler 0 +seth 0 +goldstein 0 +klau 0 +schauser 0 +symp 0 +architectur 0 +gold 0 +coast 0 +australia 0 +veena 0 +avula 0 +present 0 +interconnect 0 +palo 0 +alto 0 +abridgedvers 0 +paper 0 +ieee 0 +micro 0 +februari 0 +user 0 +level 0 +interfac 0 +distribut 0 +werner 0 +vogel 0 +sosp 0 +back 0 +homepagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..b1f222e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,105 @@ +cornel 1 +check 1 +weather 1 +jpop 1 +internet 1 +comput 1 +scienc 1 +area 1 +librari 1 +interact 1 +music 1 +navig 1 +info 1 +simpl 0 +homepageben 0 +haogradu 0 +studentbhao 0 +educornel 0 +universitydept 0 +upson 0 +hallithaca 0 +insid 0 +flea 0 +forth 0 +year 0 +student 0 +incomput 0 +work 0 +code 0 +synthesi 0 +massiv 0 +parallel 0 +processor 0 +advisor 0 +richard 0 +zippel 0 +first 0 +heard 0 +georg 0 +taylorwhen 0 +microsystem 0 +famili 0 +francisco 0 +find 0 +inform 0 +aboutth 0 +read 0 +local 0 +newspap 0 +forget 0 +itsgorg 0 +life 0 +cornellwhat 0 +ithaca 0 +movi 0 +studi 0 +dept 0 +home 0 +page 0 +gener 0 +neat 0 +stuff 0 +stock 0 +map 0 +frog 0 +dissectionmagazin 0 +intertext 0 +magazin 0 +wire 0 +magazinea 0 +peek 0 +galaxi 0 +view 0 +solar 0 +system 0 +shoemak 0 +levi 0 +weblouvr 0 +xmorphia 0 +galleri 0 +line 0 +geometri 0 +kaleidospac 0 +overview 0 +bonsai 0 +seiyuu 0 +underground 0 +archivenetwork 0 +global 0 +network 0 +wander 0 +spider 0 +edgelibrari 0 +congress 0 +martial 0 +scientif 0 +tutori 0 +infonih 0 +courseth 0 +intern 0 +guidecern 0 +seminar 0 +last 0 +modifi 0 +bhao 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..e5eef4a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,23 @@ +aastha 0 +home 0 +pageaastha 0 +bhardwajdepart 0 +comput 0 +sciencemast 0 +ofengineeeringresumehtmlpost 0 +scriptcourseworkadvanceddatabas 0 +system 0 +csmultimediasystem 0 +csengineeringcomput 0 +network 0 +cssoftwareengin 0 +program 0 +languag 0 +cscontact 0 +inform 0 +hasbrouck 0 +apart 0 +ithaca 0 +york 0 +bhardwaj 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..18e30a84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,283 @@ +robot 3 +cornel 2 +intern 2 +vision 2 +algorithm 2 +ieee 2 +paper 2 +macdonald 1 +arrai 1 +plan 1 +tommi 1 +lili 1 +donald 1 +manipul 1 +inform 1 +part 1 +mobil 1 +bhringer 1 +confer 1 +invari 1 +pictur 1 +click 1 +laboratori 1 +actuat 1 +us 1 +programm 1 +micro 1 +workshop 1 +distribut 1 +jen 1 +massiv 1 +parallel 1 +feeder 1 +group 1 +sensor 1 +system 1 +bound 1 +vector 1 +field 1 +vibratori 1 +foundat 1 +franc 1 +mem 1 +diego 1 +autom 1 +kinodynam 1 +xavier 1 +journal 1 +proc 1 +hereto 1 +peopl 1 +randal 1 +home 1 +page 1 +huttenloch 1 +thecornel 1 +noel 1 +chip 1 +small 1 +student 1 +work 1 +develop 1 +team 1 +demo 1 +fabric 1 +video 1 +chase 1 +ourlab 1 +recent 1 +toulous 1 +brigg 1 +task 1 +california 1 +cannot 1 +icra 1 +minneapoli 1 +minnesota 1 +april 1 +provablygood 1 +approxim 1 +optim 1 +algorithmica 1 +experiment 1 +intellig 1 +book 1 +artificialintellig 1 +ree 1 +couch 1 +plai 1 +bruce 0 +donaldbruc 0 +donaldassoci 0 +professorbrd 0 +eduph 0 +weather 0 +palo 0 +alto 0 +offici 0 +department 0 +laboratorydan 0 +found 0 +researchmi 0 +interest 0 +includ 0 +professor 0 +build 0 +microactu 0 +nation 0 +nanofabr 0 +arrayi 0 +scream 0 +contain 0 +squarecentemet 0 +orient 0 +without 0 +sensoryfeedback 0 +microfabr 0 +could 0 +toconstruct 0 +scale 0 +buildself 0 +propel 0 +walk 0 +vlsi 0 +graduat 0 +amybrigg 0 +surveil 0 +ofmobil 0 +board 0 +detect 0 +andintercept 0 +target 0 +mpeg 0 +built 0 +developedbi 0 +track 0 +follow 0 +visual 0 +informationalon 0 +show 0 +view 0 +face 0 +morph 0 +select 0 +public 0 +upper 0 +andlow 0 +applic 0 +memsand 0 +thealgorithm 0 +robustgeometr 0 +internationalworkshop 0 +singl 0 +crystal 0 +silicon 0 +electro 0 +mechan 0 +februari 0 +classif 0 +lower 0 +andimprov 0 +partsfeed 0 +partii 0 +robotswith 0 +decoupl 0 +dynam 0 +forcartesian 0 +open 0 +chain 0 +motion 0 +canni 0 +reif 0 +research 0 +inpress 0 +minim 0 +supermodular 0 +andtheoret 0 +artifici 0 +jetai 0 +press 0 +write 0 +entitl 0 +draft 0 +firstquart 0 +appear 0 +revis 0 +base 0 +experi 0 +inminim 0 +symposium 0 +iser 0 +stanford 0 +move 0 +furnitur 0 +automon 0 +societi 0 +ofjapan 0 +iro 0 +pittsburgh 0 +sensorlessmanipul 0 +actuatorarrai 0 +mihailovich 0 +andautom 0 +detail 0 +explan 0 +program 0 +scheme 0 +automationnic 0 +first 0 +ofrobot 0 +peter 0 +boston 0 +wilson 0 +andj 0 +latomb 0 +automat 0 +configur 0 +direct 0 +proceed 0 +otherpubl 0 +these 0 +post 0 +doc 0 +train 0 +daniela 0 +jonathan 0 +dinesh 0 +aval 0 +server 0 +list 0 +version 0 +onlin 0 +tech 0 +report 0 +librari 0 +catalogc 0 +indexobtain 0 +copi 0 +paperscopi 0 +avail 0 +anonym 0 +teamof 0 +autonom 0 +movefurnitur 0 +around 0 +portrait 0 +mobot 0 +push 0 +rotat 0 +apictur 0 +drawn 0 +loretta 0 +pompilio 0 +discoverychannel 0 +beyond 0 +find 0 +funa 0 +poem 0 +alfr 0 +mail 0 +agent 0 +famili 0 +harm 0 +swallow 0 +ithaca 0 +sometim 0 +moreoth 0 +depart 0 +herefor 0 +search 0 +tool 0 +access 0 +stuff 0 +return 0 +level 0 +clickher 0 +tallest 0 +darkest 0 +lead 0 +hollywood 0 +merian 0 +cooper 0 +wrai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..0e8ef491 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,242 @@ +cardi 2 +learn 2 +languag 2 +intellig 2 +natur 2 +proceed 2 +artifici 2 +machin 2 +press 2 +confer 2 +research 1 +base 1 +system 1 +us 1 +knowledg 1 +acquisit 1 +approach 1 +aaai 1 +annual 1 +case 1 +techniqu 1 +work 1 +project 1 +linguist 1 +sentenc 1 +analysi 1 +domain 1 +process 1 +univers 1 +thesi 1 +cognit 1 +cornel 1 +understand 1 +inform 1 +kenmor 1 +featur 1 +rel 1 +pronoun 1 +note 1 +specif 1 +massachusett 1 +amherst 1 +nation 1 +link 1 +report 1 +focu 1 +area 1 +naturallanguag 1 +group 1 +interest 1 +guid 1 +develop 1 +extract 1 +part 1 +technic 1 +text 1 +knowledgeacquisit 1 +corpora 1 +tag 1 +larger 1 +select 1 +riloff 1 +workshop 1 +intern 1 +chapter 1 +contain 1 +page 1 +tenth 1 +disambigu 1 +associ 1 +comput 1 +jose 1 +scienc 1 +societi 1 +lawrenc 1 +erlbaumassoci 1 +lehnert 1 +paper 1 +clair 0 +cardieclair 0 +assist 0 +professor 0 +upson 0 +hallphon 0 +email 0 +educlick 0 +interestscours 0 +teachselect 0 +publicationsnlp 0 +entri 0 +depart 0 +amalgam 0 +softbal 0 +stat 0 +finger 0 +lake 0 +tandem 0 +tour 0 +westi 0 +interestsalthough 0 +span 0 +number 0 +subfield 0 +within 0 +includ 0 +reason 0 +cognitivemodel 0 +primarili 0 +investig 0 +machinelearn 0 +tool 0 +forexplor 0 +mechan 0 +underli 0 +focus 0 +tworel 0 +design 0 +user 0 +train 0 +effici 0 +reliablyextract 0 +import 0 +document 0 +cstr 0 +tosupport 0 +content 0 +brows 0 +autom 0 +task 0 +compris 0 +build 0 +gener 0 +kenmoreacquir 0 +combin 0 +symbol 0 +learningtechniqu 0 +robust 0 +tworeal 0 +world 0 +perform 0 +speech 0 +semant 0 +andconcept 0 +activ 0 +find 0 +anteced 0 +current 0 +extend 0 +handl 0 +addit 0 +disambiguationtask 0 +evalu 0 +learningcompon 0 +context 0 +applic 0 +isembed 0 +goal 0 +determin 0 +condit 0 +expect 0 +offer 0 +cost 0 +effect 0 +teachingc 0 +spring 0 +foundat 0 +practicum 0 +inartifici 0 +fall 0 +seminar 0 +understandingselect 0 +publicationsautom 0 +conferenceon 0 +empir 0 +method 0 +pennsylvania 0 +embed 0 +agener 0 +framework 0 +wermter 0 +scheler 0 +gabriel 0 +connectionist 0 +statist 0 +andsymbol 0 +lectur 0 +springer 0 +origin 0 +present 0 +tolearn 0 +jointconfer 0 +ijcai 0 +introduct 0 +conceptu 0 +file 0 +introductori 0 +conceptualsent 0 +avail 0 +cmpsci 0 +eleventh 0 +washington 0 +decis 0 +tree 0 +improv 0 +morgan 0 +kaufmann 0 +corpu 0 +heurist 0 +newark 0 +bias 0 +fourteenth 0 +bloomington 0 +onconstrain 0 +prior 0 +plausibl 0 +complic 0 +syntax 0 +ninth 0 +anaheim 0 +analyz 0 +citat 0 +twelfth 0 +cambridg 0 +linkscomput 0 +linguistics 0 +print 0 +archiv 0 +databas 0 +recent 0 +aclspeci 0 +learningmachin 0 +digestmachinelearn 0 +resourc 0 +researchersmachin 0 +home 0 +penn 0 +treebank 0 +repositori 0 +pointer 0 +code 0 +variou 0 +compon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..849ff369 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,88 @@ +chew 1 +geometr 1 +gener 1 +mathemat 1 +paul 1 +research 1 +univers 1 +cornel 1 +work 1 +delaunai 1 +creat 1 +applic 1 +mesh 1 +level 1 +softwar 1 +us 1 +senior 0 +associatephd 0 +purdu 0 +eduappletsy 0 +need 0 +java 0 +compat 0 +beta 0 +version 0 +browser 0 +asnetscap 0 +make 0 +voronoi 0 +applet 0 +avoronoi 0 +diagram 0 +triangul 0 +click 0 +point 0 +agenda 0 +primari 0 +interest 0 +algorithm 0 +emphasi 0 +onpract 0 +practic 0 +includedplac 0 +motion 0 +plan 0 +shape 0 +comparison 0 +vision 0 +sens 0 +exampl 0 +issu 0 +thataris 0 +part 0 +problem 0 +automat 0 +scientificsoftwar 0 +goal 0 +rais 0 +isspecifi 0 +develop 0 +environ 0 +scientif 0 +canb 0 +natur 0 +high 0 +concept 0 +ofphys 0 +engin 0 +thu 0 +program 0 +specifi 0 +implicitli 0 +acollect 0 +equat 0 +symbol 0 +techniquesar 0 +transform 0 +express 0 +effectiveprogram 0 +myonlin 0 +tech 0 +reportscornel 0 +depart 0 +computerscienceth 0 +simlabprojectaddress 0 +rhode 0 +hall 0 +ithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..e17bca0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,76 @@ +cornel 1 +soccer 1 +chao 1 +chang 1 +page 1 +chichao 1 +student 1 +comput 1 +network 1 +interest 1 +system 1 +activ 1 +home 0 +edui 0 +thedepart 0 +scienceat 0 +univers 0 +faculti 0 +advisor 0 +isthorsten 0 +eicken 0 +summer 0 +microsoft 0 +group 0 +click 0 +addressesand 0 +phone 0 +number 0 +research 0 +interact 0 +compil 0 +runtim 0 +andoper 0 +toward 0 +effici 0 +concurr 0 +program 0 +overheterogen 0 +tham 0 +composit 0 +messageslow 0 +latenc 0 +commun 0 +risc 0 +multimatlab 0 +matlab 0 +multipl 0 +processor 0 +design 0 +perform 0 +messag 0 +anyon 0 +sport 0 +server 0 +latest 0 +newsbraziliansocc 0 +portugues 0 +world 0 +result 0 +andhomepagesoliv 0 +guid 0 +cool 0 +stuff 0 +lubrasa 0 +luso 0 +brazilian 0 +associationu 0 +chess 0 +centerjorn 0 +brasilmi 0 +carstockmasterjayhawk 0 +basketballwww 0 +tennisserverback 0 +scienc 0 +homepagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..2fe0d958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,58 @@ +chee 1 +keong 1 +graduat 1 +engin 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +ithaca 1 +inform 1 +tokyo 1 +singapor 1 +liau 0 +liauwelcom 0 +home 0 +page 0 +student 0 +master 0 +programm 0 +apolog 0 +forth 0 +construct 0 +work 0 +hopefulli 0 +thing 0 +improv 0 +soon 0 +school 0 +univers 0 +fall 0 +class 0 +foundat 0 +artifici 0 +intelligencec 0 +program 0 +languag 0 +softwar 0 +engineeringc 0 +networksc 0 +advanc 0 +databas 0 +systemsbaccalaur 0 +cours 0 +institut 0 +technolog 0 +japanhomei 0 +come 0 +small 0 +countri 0 +call 0 +know 0 +find 0 +avail 0 +homepag 0 +mapl 0 +avenu 0 +edulast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..a6dbe927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,83 @@ +research 1 +cornel 1 +comput 1 +optim 1 +thoma 1 +center 1 +mathemat 1 +group 1 +project 1 +interest 1 +continu 1 +program 1 +problem 1 +larg 1 +scale 1 +imag 1 +linear 1 +student 1 +postdoc 1 +coleman 1 +colemanthoma 0 +colemancornel 0 +universityi 0 +professcp 0 +match 0 +http 0 +scienc 0 +depart 0 +appli 0 +also 0 +strong 0 +affili 0 +theori 0 +director 0 +applic 0 +advanc 0 +institut 0 +final 0 +member 0 +ccop 0 +faculti 0 +broadfield 0 +discret 0 +programmi 0 +concern 0 +design 0 +understand 0 +practic 0 +effici 0 +numer 0 +algorithm 0 +primari 0 +develop 0 +computationalmethod 0 +tool 0 +automat 0 +differenti 0 +reconstruct 0 +biomed 0 +parallel 0 +minim 0 +inequ 0 +nonlinear 0 +equal 0 +constraint 0 +profession 0 +activ 0 +recent 0 +paper 0 +book 0 +current 0 +former 0 +associ 0 +softwar 0 +link 0 +curriculum 0 +vita 0 +best 0 +rhode 0 +hall 0 +univers 0 +ithaca 0 +york 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..072ad9b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,166 @@ +type 2 +program 2 +languag 2 +theori 1 +home 1 +cornel 1 +theoret 1 +also 1 +compil 1 +interest 1 +semant 1 +formul 1 +intermedi 1 +research 1 +implement 1 +atyp 1 +view 1 +tractabl 1 +approxim 1 +mathemat 1 +often 1 +programminglanguag 1 +translat 1 +calculi 1 +work 1 +project 1 +system 1 +comput 1 +scienc 1 +import 1 +lord 1 +love 1 +karl 0 +crari 0 +pagekarl 0 +crarycrari 0 +eduoffic 0 +address 0 +upson 0 +halloffic 0 +phone 0 +researchbroadli 0 +speak 0 +primari 0 +design 0 +implementationand 0 +recent 0 +emphasi 0 +area 0 +subtyp 0 +object 0 +orient 0 +modular 0 +practic 0 +kmlwhich 0 +combin 0 +featur 0 +function 0 +intract 0 +richworld 0 +foundat 0 +whole 0 +perform 0 +newprogram 0 +develop 0 +result 0 +map 0 +construct 0 +anapproxim 0 +set 0 +convers 0 +paradigm 0 +well 0 +understood 0 +aminterest 0 +deepen 0 +understand 0 +relationship 0 +particularli 0 +issu 0 +mitig 0 +model 0 +seri 0 +lower 0 +calculu 0 +embed 0 +andth 0 +correspond 0 +interpret 0 +invari 0 +modelallow 0 +relat 0 +stage 0 +origin 0 +allowsth 0 +standard 0 +techniqu 0 +optim 0 +guarante 0 +safeti 0 +andcorrect 0 +care 0 +make 0 +possibl 0 +additionaloptim 0 +unavail 0 +strategi 0 +form 0 +part 0 +thenuprl 0 +hereat 0 +name 0 +come 0 +nuprl 0 +formal 0 +logic 0 +base 0 +martin 0 +automatedreason 0 +committe 0 +consist 0 +ofrobert 0 +constabl 0 +greg 0 +morrisett 0 +dexter 0 +kozen 0 +close 0 +jasonhickei 0 +select 0 +papersoth 0 +linksmark 0 +leon 0 +maintain 0 +collect 0 +ofprogram 0 +resourc 0 +cansearch 0 +technic 0 +report 0 +onlin 0 +grad 0 +life 0 +biblestudi 0 +pageth 0 +lurker 0 +guid 0 +babylon 0 +command 0 +answer 0 +jesu 0 +hear 0 +israel 0 +thelord 0 +heart 0 +andwith 0 +soul 0 +mind 0 +strength 0 +thesecond 0 +neighbor 0 +commandmentgreat 0 +mark 0 +univers 0 +pagedepart 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..0f6f3201 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,130 @@ +spars 2 +parallel 2 +comput 2 +siam 2 +linear 2 +solut 2 +least 2 +squar 2 +problem 2 +cornel 2 +scientif 2 +memori 1 +multiprocessor 1 +distribut 1 +research 1 +theori 1 +center 1 +confer 1 +factor 1 +philadelphia 1 +coleman 1 +matrix 1 +multifront 1 +proceed 1 +pothen 1 +chunguang 1 +univers 1 +advanc 1 +institut 1 +optim 1 +project 1 +algorithm 1 +numer 1 +softwar 1 +dens 1 +row 1 +seventh 1 +orthogon 1 +journal 1 +technic 1 +report 1 +ctctr 1 +larg 1 +processingfor 1 +cliqu 1 +tree 1 +sunchunguang 0 +sunphd 0 +pennsylvania 0 +state 0 +welcom 0 +home 0 +page 0 +associ 0 +also 0 +affili 0 +thecornel 0 +work 0 +close 0 +professorthoma 0 +interest 0 +algebra 0 +mathemat 0 +current 0 +ppcx 0 +program 0 +rank 0 +defici 0 +bound 0 +packag 0 +pssl 0 +psspd 0 +symmetr 0 +posit 0 +definit 0 +systemsrec 0 +lecturesparallel 0 +contain 0 +second 0 +matric 0 +coeur 0 +alen 0 +idaho 0 +octob 0 +ondistribut 0 +parallelprocess 0 +francisco 0 +februari 0 +select 0 +public 0 +deal 0 +solutionof 0 +decemb 0 +cornellunivers 0 +conferenceon 0 +process 0 +bailei 0 +bjorstad 0 +gilbert 0 +mascagni 0 +schreiber 0 +simon 0 +torczon 0 +watson 0 +map 0 +choleskyfactor 0 +septemb 0 +matriceson 0 +sixth 0 +sinovec 0 +key 0 +leuz 0 +petzold 0 +reed 0 +us 0 +fifth 0 +dongarra 0 +kennedi 0 +messina 0 +sorensen 0 +voigt 0 +compact 0 +data 0 +structuresin 0 +scale 0 +univeristi 0 +ithaca 0 +mail 0 +csun 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..ec2527cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,182 @@ +group 2 +process 2 +architectur 2 +david 1 +cooper 1 +implement 1 +horu 1 +privaci 1 +origin 1 +secur 1 +messag 1 +network 1 +research 1 +servic 1 +user 1 +current 1 +design 1 +synchroni 1 +us 1 +allow 1 +join 1 +trust 1 +member 1 +client 1 +server 1 +would 1 +scheme 1 +howev 1 +mobil 1 +privat 1 +inform 1 +devic 1 +birman 1 +page 1 +cornel 1 +involv 1 +provid 1 +authent 1 +order 1 +set 1 +virtual 1 +maintain 1 +within 1 +semant 1 +desir 1 +permit 1 +untrust 1 +relationshipsamong 1 +thesi 1 +propos 1 +solut 1 +type 1 +wish 1 +first 1 +send 1 +also 1 +prevent 1 +commun 1 +kenneth 1 +comput 1 +home 0 +postdoctor 0 +associ 0 +upson 0 +hall 0 +phone 0 +email 0 +dcooper 0 +securityarchitectur 0 +goal 0 +work 0 +layer 0 +horuswhich 0 +interact 0 +kerberosnetwork 0 +cryptograph 0 +tool 0 +toprovid 0 +mike 0 +reiter 0 +fault 0 +toler 0 +system 0 +originalimplement 0 +support 0 +model 0 +ofcomput 0 +crash 0 +failuremodel 0 +necessari 0 +honest 0 +result 0 +make 0 +assumpt 0 +anyprocess 0 +version 0 +isposs 0 +whose 0 +weaker 0 +ofvirtu 0 +untrustedprocess 0 +exampl 0 +might 0 +clientsto 0 +communicatewith 0 +accept 0 +limit 0 +command 0 +fromth 0 +respons 0 +screen 0 +horussecur 0 +arbitrari 0 +accomplish 0 +keymanag 0 +impersonateanoth 0 +trivial 0 +achieveth 0 +witha 0 +slightli 0 +higher 0 +overhead 0 +unlik 0 +enabl 0 +asclient 0 +mani 0 +complic 0 +problem 0 +inherentin 0 +static 0 +basic 0 +ofinform 0 +keep 0 +contentsof 0 +hiddenwith 0 +proper 0 +encrypt 0 +outsidersfrom 0 +determin 0 +maintainingth 0 +unlink 0 +sender 0 +recipi 0 +chaum 0 +februari 0 +sinc 0 +severaloth 0 +made 0 +improv 0 +addit 0 +staticnetwork 0 +locat 0 +carri 0 +mobilecommun 0 +gener 0 +themessag 0 +receiv 0 +reveal 0 +informationabout 0 +owner 0 +develop 0 +along 0 +advisorken 0 +protocol 0 +attack 0 +internaland 0 +extern 0 +adversari 0 +public 0 +preserv 0 +ofmobil 0 +proceed 0 +ieee 0 +symposium 0 +securityand 0 +apriv 0 +wireless 0 +anthoni 0 +mobilecomput 0 +dissert 0 +univers 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..f6fb3c2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,131 @@ +project 1 +love 1 +ever 1 +hope 1 +cyber 1 +mine 1 +home 1 +instead 1 +deidr 1 +model 1 +faith 1 +dread 1 +burn 1 +pandora 1 +abodedan 0 +abodegreet 0 +travel 0 +digit 0 +space 0 +welcom 0 +humbl 0 +prithe 0 +gentl 0 +surf 0 +shore 0 +blink 0 +teari 0 +ey 0 +rest 0 +weari 0 +kei 0 +mice 0 +born 0 +hand 0 +make 0 +thyselv 0 +abod 0 +brief 0 +autobiographi 0 +resum 0 +facial 0 +express 0 +univers 0 +emot 0 +simnet 0 +builder 0 +type 0 +game 0 +faiththei 0 +hardli 0 +prize 0 +ferro 0 +concret 0 +sai 0 +wise 0 +scorn 0 +bend 0 +ear 0 +lawyer 0 +tone 0 +scientist 0 +word 0 +need 0 +unseen 0 +unheard 0 +untouch 0 +silenc 0 +night 0 +unknown 0 +question 0 +uncertain 0 +yearn 0 +true 0 +direct 0 +field 0 +lordlovewarm 0 +friendship 0 +mindless 0 +infatu 0 +sensual 0 +romanc 0 +passion 0 +soft 0 +sigh 0 +belov 0 +poetri 0 +hopemyth 0 +favor 0 +beauteou 0 +place 0 +fault 0 +human 0 +role 0 +releas 0 +demon 0 +mani 0 +astrai 0 +pretti 0 +glimmer 0 +fals 0 +tread 0 +tortur 0 +broken 0 +road 0 +amidst 0 +thorn 0 +dark 0 +filthi 0 +soul 0 +diseas 0 +pain 0 +horror 0 +suffer 0 +reach 0 +fear 0 +tear 0 +cannot 0 +blame 0 +deed 0 +told 0 +heart 0 +take 0 +hold 0 +world 0 +would 0 +never 0 +frozen 0 +miseri 0 +cold 0 +spring 0 +etern 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..e80ebf09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,78 @@ +server 1 +page 1 +librari 1 +digit 1 +search 1 +cornel 1 +inform 1 +tool 1 +home 1 +engin 1 +gopher 1 +network 1 +refer 1 +weather 1 +dean 0 +krafft 0 +select 0 +urlsdean 0 +point 0 +interestcornel 0 +cuinfo 0 +legal 0 +institut 0 +directori 0 +project 0 +public 0 +dimund 0 +document 0 +imag 0 +understand 0 +charact 0 +recognit 0 +siam 0 +captur 0 +access 0 +cours 0 +illinoi 0 +stanford 0 +michigan 0 +berkelei 0 +librarysearch 0 +lyco 0 +veronica 0 +archi 0 +gatewai 0 +anonym 0 +site 0 +depart 0 +togeth 0 +excel 0 +collect 0 +scout 0 +report 0 +intern 0 +planet 0 +earth 0 +whole 0 +internet 0 +catalog 0 +part 0 +global 0 +navig 0 +cern 0 +faqsvari 0 +stuff 0 +head 0 +mail 0 +list 0 +audio 0 +gear 0 +folkbook 0 +folk 0 +music 0 +ithaca 0 +forecast 0 +elsewher 0 +secur 0 +index 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..252b11a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,98 @@ +comput 2 +research 1 +cornel 1 +project 1 +dean 1 +report 1 +krafft 1 +page 1 +facil 1 +consortium 1 +avail 1 +technic 1 +system 1 +dienst 1 +home 0 +director 0 +facilitiesaddress 0 +upson 0 +halldepart 0 +sciencecornel 0 +universityithaca 0 +phone 0 +current 0 +serv 0 +administr 0 +inth 0 +scienc 0 +depart 0 +guis 0 +anadministr 0 +manag 0 +support 0 +group 0 +andworri 0 +number 0 +issu 0 +includ 0 +secur 0 +network 0 +build 0 +servic 0 +side 0 +princip 0 +investig 0 +spart 0 +arpa 0 +fund 0 +five 0 +sciencedepart 0 +thecorpor 0 +nation 0 +initi 0 +cnri 0 +researchi 0 +intend 0 +rapid 0 +dissemin 0 +break 0 +technicalresearch 0 +internet 0 +well 0 +make 0 +line 0 +theexist 0 +librari 0 +member 0 +part 0 +work 0 +davi 0 +xerox 0 +employe 0 +thedesign 0 +institut 0 +carl 0 +lagoz 0 +emploi 0 +develop 0 +implement 0 +protocol 0 +disseminationov 0 +similar 0 +materi 0 +call 0 +eight 0 +univers 0 +site 0 +atechn 0 +inform 0 +ondienst 0 +pleas 0 +send 0 +email 0 +togethera 0 +select 0 +url 0 +relat 0 +thing 0 +interestedin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..1319fedb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,24 @@ +cornel 1 +home 1 +analysi 1 +divakar 0 +pagedivakar 0 +viswanathdivakar 0 +address 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +graduat 0 +student 0 +comput 0 +scienc 0 +area 0 +interest 0 +isnumer 0 +advis 0 +page 0 +good 0 +place 0 +find 0 +numer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..9d92c464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,90 @@ +comput 1 +cornel 1 +diyu 1 +home 1 +databas 1 +compil 1 +engin 1 +ithaca 1 +receiv 1 +physic 1 +jersei 1 +china 1 +pagediyu 0 +daisi 0 +welcom 0 +spring 0 +cours 0 +practic 0 +distribut 0 +system 0 +translat 0 +practicum 0 +translatorsfal 0 +coursesc 0 +oper 0 +systemc 0 +softwar 0 +engineeringc 0 +advanc 0 +systemsel 0 +network 0 +telecommunicationsm 0 +projectorigin 0 +virtual 0 +realiti 0 +railroad 0 +projectsinc 0 +mayb 0 +like 0 +know 0 +littl 0 +doubt 0 +anywai 0 +current 0 +master 0 +student 0 +depart 0 +scienc 0 +univers 0 +locat 0 +central 0 +york 0 +gorgeou 0 +place 0 +live 0 +except 0 +winter 0 +last 0 +year 0 +appli 0 +institut 0 +technolog 0 +newark 0 +brought 0 +beauti 0 +campu 0 +tsinghua 0 +unviers 0 +beij 0 +also 0 +want 0 +friend 0 +miss 0 +us 0 +linksjava 0 +html 0 +tkfavorit 0 +sitestimecnnlondon 0 +timeswashington 0 +postchines 0 +digestchina 0 +new 0 +digestfeng 0 +yuanxin 0 +siart 0 +chinaloc 0 +connectionsctc 0 +sunlabweathermovi 0 +miller 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..a7b55d53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,113 @@ +document 1 +recognit 1 +comput 1 +area 1 +visual 1 +match 1 +us 1 +collabor 1 +imag 1 +huttenloch 1 +cornel 1 +research 1 +work 1 +algorithm 1 +system 1 +remot 1 +video 1 +monitor 1 +also 1 +interest 1 +electron 1 +hausdorff 1 +method 1 +implement 1 +avail 1 +teach 1 +cours 1 +program 1 +vision 1 +activ 1 +home 0 +page 0 +daniel 0 +associ 0 +professordph 0 +main 0 +rang 0 +theoret 0 +techniqu 0 +geometri 0 +applic 0 +view 0 +wide 0 +network 0 +target 0 +type 0 +commun 0 +educ 0 +compar 0 +geometr 0 +structur 0 +base 0 +fast 0 +index 0 +eigenspac 0 +approxim 0 +fraction 0 +matlab 0 +perform 0 +evalu 0 +model 0 +object 0 +track 0 +identif 0 +digipap 0 +highli 0 +compact 0 +univers 0 +viewabl 0 +format 0 +conot 0 +support 0 +share 0 +brian 0 +smith 0 +develop 0 +author 0 +offer 0 +first 0 +time 0 +spring 0 +introduct 0 +profession 0 +xerox 0 +parc 0 +process 0 +start 0 +small 0 +group 0 +investig 0 +problem 0 +chair 0 +cvpr 0 +ieee 0 +confer 0 +pattern 0 +held 0 +juan 0 +june 0 +favorit 0 +geek 0 +snowboard 0 +mountain 0 +bike 0 +without 0 +extrem 0 +sport 0 +cool 0 +stupid 0 +attitud 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..23c65c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,94 @@ +process 2 +algebra 2 +dsouza 1 +work 1 +tool 1 +ashvin 1 +specif 1 +design 1 +express 1 +postscript 1 +lnc 1 +cornel 0 +edui 0 +graduat 0 +student 0 +bard 0 +bloom 0 +focu 0 +thesi 0 +research 0 +develop 0 +oftool 0 +support 0 +method 0 +andverif 0 +concurr 0 +system 0 +withrespect 0 +metatheori 0 +becom 0 +immediatelyavail 0 +wide 0 +class 0 +allevi 0 +theproblem 0 +duplic 0 +effort 0 +inher 0 +custom 0 +exampl 0 +prototyp 0 +base 0 +calculu 0 +model 0 +checker 0 +simpl 0 +gso 0 +semant 0 +form 0 +part 0 +input 0 +make 0 +applic 0 +mani 0 +commonli 0 +us 0 +includ 0 +basic 0 +loto 0 +addit 0 +investig 0 +power 0 +order 0 +better 0 +understand 0 +compar 0 +final 0 +exploringappl 0 +techniqu 0 +gener 0 +bdd 0 +algebraterm 0 +full 0 +postscipt 0 +lite 0 +version 0 +also 0 +written 0 +result 0 +presentedth 0 +former 0 +comput 0 +aid 0 +verif 0 +latter 0 +foundat 0 +softwar 0 +technolog 0 +theoret 0 +computersci 0 +june 0 +present 0 +verifi 0 +compass 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..2c914f32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,53 @@ +page 1 +funda 1 +ergun 1 +cornel 1 +turkei 1 +ever 0 +improv 0 +pagefunda 0 +ergn 0 +mail 0 +eduhi 0 +welcom 0 +home 0 +name 0 +studentin 0 +comput 0 +scienc 0 +dept 0 +work 0 +programcheck 0 +prof 0 +ronitt 0 +rubinfeld 0 +researchpag 0 +also 0 +minor 0 +paint 0 +depart 0 +fine 0 +art 0 +origin 0 +come 0 +izmir 0 +undergrad 0 +bilkentunivers 0 +ankara 0 +research 0 +relat 0 +stuff 0 +warn 0 +might 0 +encounterpag 0 +written 0 +turkish 0 +angri 0 +dog 0 +risk 0 +person 0 +visit 0 +sinc 0 +alwai 0 +heavi 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..dc8631dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,183 @@ +tardo 3 +algorithm 2 +comput 2 +symposium 2 +ori 2 +appear 2 +proceed 2 +theori 2 +annual 2 +problem 2 +approxim 2 +version 2 +flow 2 +research 1 +shmoi 1 +plotkin 1 +preliminari 1 +siam 1 +scienc 1 +multicommod 1 +fast 1 +approximationalgorithm 1 +network 1 +current 1 +combinatori 1 +paper 1 +januari 1 +combinator 1 +lovasz 1 +activ 1 +cornel 1 +school 1 +oper 1 +public 1 +complex 1 +optim 1 +discret 1 +improv 1 +klein 1 +stein 1 +journal 1 +goldberg 1 +hopp 1 +polynomi 1 +kleinberg 1 +disjoint 1 +planar 1 +path 1 +graph 1 +springer 1 +verlag 1 +handbook 1 +graham 1 +grotschel 1 +north 1 +holland 1 +theorem 1 +tardosassoci 0 +professor 0 +depart 0 +upson 0 +hallcornel 0 +universityithaca 0 +phone 0 +email 0 +industri 0 +engineeringphon 0 +click 0 +daughter 0 +rebecca 0 +julia 0 +researchrec 0 +broadli 0 +speak 0 +interest 0 +includ 0 +mani 0 +aspect 0 +mostlywork 0 +particular 0 +networkproblem 0 +linear 0 +integ 0 +programmingproblem 0 +recent 0 +paperssurvei 0 +thegener 0 +assign 0 +mathemat 0 +program 0 +bound 0 +cutratio 0 +combinatorica 0 +fasterapproxim 0 +unit 0 +capac 0 +concurr 0 +problemwith 0 +applic 0 +rout 0 +find 0 +spars 0 +cut 0 +oncomput 0 +appearedin 0 +leighton 0 +makedon 0 +tragouda 0 +flowproblem 0 +system 0 +stoc 0 +special 0 +issu 0 +annualacm 0 +fraction 0 +pack 0 +cover 0 +inmathemat 0 +hasappear 0 +ieee 0 +thefound 0 +goeman 0 +williamson 0 +designproblem 0 +discretealgorithm 0 +time 0 +someevacu 0 +ondiscret 0 +quickest 0 +transship 0 +theproceed 0 +steiner 0 +direct 0 +multicut 0 +pathsproblem 0 +high 0 +diamet 0 +proceedingsof 0 +dens 0 +embed 0 +annualiee 0 +foundat 0 +rabani 0 +distribut 0 +packet 0 +switch 0 +arbitrari 0 +fleischer 0 +separ 0 +maxim 0 +violat 0 +comb 0 +inequ 0 +ipco 0 +june 0 +survei 0 +tarjan 0 +sept 0 +vlsi 0 +design 0 +kort 0 +lovaszand 0 +schrijver 0 +strongli 0 +inoptim 0 +intern 0 +congress 0 +ofmathematician 0 +kyoto 0 +tokyo 0 +computersci 0 +annot 0 +bibliographi 0 +inproc 0 +summer 0 +maastricht 0 +netherland 0 +proc 0 +networkoptim 0 +practic 0 +netflow 0 +miniato 0 +itali 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..5a72b230 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,14 @@ +franci 1 +scienc 1 +home 0 +page 0 +graduat 0 +student 0 +univers 0 +california 0 +berkeleymathemat 0 +departmentcomput 0 +departmentcornel 0 +universitycomput 0 +departmenthumorfcc 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..406c4df2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,2 @@ +felix 0 +world 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..bac04cbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,57 @@ +smith 1 +frederick 1 +cornel 1 +dexter 1 +kozen 1 +kleen 1 +algebra 1 +test 1 +technic 1 +report 1 +univers 1 +link 1 +homepag 1 +program 1 +languag 1 +homepagefrederick 0 +grad 0 +student 0 +upson 0 +halldepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +paper 0 +erni 0 +cohen 0 +complex 0 +juli 0 +complet 0 +decid 0 +april 0 +person 0 +us 0 +greg 0 +morrisett 0 +research 0 +page 0 +class 0 +take 0 +system 0 +multimedia 0 +semant 0 +math 0 +introduct 0 +analysi 0 +epicuri 0 +food 0 +zine 0 +cartalk 0 +home 0 +click 0 +clack 0 +catch 0 +sundai 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..89c7103b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,25 @@ +frank 2 +page 1 +cornel 1 +planet 0 +either 0 +matter 0 +adelstein 0 +post 0 +doctor 0 +associ 0 +xerox 0 +design 0 +research 0 +institut 0 +offic 0 +phone 0 +electron 0 +mail 0 +actual 0 +inform 0 +checkout 0 +improv 0 +happi 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..23f5d0d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,65 @@ +fred 2 +cornel 1 +languag 1 +comput 1 +apollo 1 +yuan 1 +softwar 1 +engin 1 +current 1 +soon 1 +http 1 +visual 1 +affili 1 +theori 1 +center 1 +univers 1 +chelmsford 1 +version 0 +feet 0 +shown 0 +resum 0 +project 0 +other 0 +appear 0 +distribut 0 +server 0 +scramo 0 +midi 0 +choreograph 0 +anim 0 +model 0 +postscript 0 +vpla 0 +program 0 +animationlink 0 +massachusett 0 +hewlett 0 +packardlink 0 +previou 0 +scienc 0 +group 0 +binghamton 0 +lawrenc 0 +berkelei 0 +laboratoryinterest 0 +hobbi 0 +section 0 +develop 0 +mayb 0 +next 0 +year 0 +photographi 0 +cello 0 +guitar 0 +aquarium 0 +sciencecornel 0 +home 0 +burl 0 +work 0 +email 0 +fredhsu 0 +snail 0 +drive 0 +peopl 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..044619d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,149 @@ +comput 2 +scienc 1 +deepak 1 +back 1 +main 1 +page 1 +engin 1 +multimedia 1 +prof 1 +univers 1 +cornel 1 +major 1 +cours 1 +system 1 +well 1 +long 1 +world 1 +born 1 +school 1 +balakrishna 1 +undergradu 1 +colleg 1 +india 1 +graduat 1 +pursu 1 +master 1 +degre 1 +actual 1 +four 1 +lucki 1 +part 1 +balakrishnamast 0 +engineeringdepart 0 +sciencecornel 0 +resumeeducationcoursesperson 0 +resum 0 +html 0 +postscript 0 +educ 0 +complet 0 +june 0 +karnataka 0 +region 0 +surathk 0 +interest 0 +relat 0 +oper 0 +artifici 0 +intellig 0 +compil 0 +construct 0 +data 0 +commun 0 +graphic 0 +present 0 +specialis 0 +want 0 +involv 0 +project 0 +deal 0 +server 0 +program 0 +follow 0 +list 0 +taken 0 +fall 0 +semest 0 +brian 0 +smith 0 +advanc 0 +databas 0 +praveen 0 +seshadri 0 +network 0 +srinivasan 0 +keshav 0 +softwar 0 +michael 0 +godfrei 0 +person 0 +start 0 +goe 0 +upon 0 +time 0 +novemb 0 +land 0 +call 0 +bharat 0 +outsid 0 +precis 0 +cute 0 +chubbi 0 +littl 0 +babi 0 +weigh 0 +approxim 0 +pound 0 +took 0 +name 0 +mean 0 +light 0 +went 0 +still 0 +process 0 +chang 0 +incident 0 +probabl 0 +divin 0 +interfer 0 +aishwarya 0 +miss 0 +leav 0 +miniscul 0 +detail 0 +earlier 0 +life 0 +dive 0 +straight 0 +high 0 +nation 0 +public 0 +bangalor 0 +greater 0 +place 0 +someon 0 +noth 0 +better 0 +krec 0 +that 0 +wonder 0 +anoth 0 +year 0 +holidai 0 +conquer 0 +class 0 +never 0 +match 0 +hope 0 +get 0 +somewher 0 +final 0 +here 0 +link 0 +friend 0 +ashish 0 +aastha 0 +indira 0 +ankit 0 +vineet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..dec664a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,58 @@ +system 1 +uddin 1 +ghia 1 +interest 1 +engin 1 +cornel 1 +comput 1 +work 1 +program 1 +asif 0 +ghiasasif 0 +welcom 0 +mywww 0 +home 0 +page 0 +warn 0 +constructioni 0 +student 0 +scienc 0 +area 0 +distribut 0 +multimedia 0 +bachelor 0 +degre 0 +univers 0 +technolog 0 +karachi 0 +pakistan 0 +sinc 0 +global 0 +inform 0 +solut 0 +present 0 +studi 0 +leav 0 +master 0 +respons 0 +includ 0 +applic 0 +unix 0 +administr 0 +support 0 +educ 0 +network 0 +manag 0 +installationso 0 +number 0 +project 0 +plan 0 +onlin 0 +good 0 +hopefulli 0 +year 0 +publicationsth 0 +follow 0 +music 0 +cricket 0 +astronomyasif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..e0463b7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,9 @@ +home 1 +page 1 +move 0 +http 0 +berkelei 0 +dglaser 0 +htmlpleas 0 +visit 0 +million 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..85e357c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,333 @@ +comput 2 +scienc 2 +award 2 +program 1 +cornel 1 +receiv 1 +educ 1 +gri 1 +logic 1 +us 1 +paper 1 +munich 1 +teach 1 +topic 1 +colleg 1 +depart 1 +born 1 +year 1 +stanford 1 +twin 1 +tabl 1 +system 1 +work 1 +survei 1 +david 1 +home 1 +professor 1 +institut 1 +technolog 1 +interest 1 +area 1 +languag 1 +formal 1 +tool 1 +inform 1 +biographi 1 +text 1 +symposium 1 +went 1 +wife 1 +elain 1 +later 1 +illinoi 1 +math 1 +help 1 +paul 1 +sinc 1 +made 1 +weather 1 +move 1 +chair 1 +return 1 +research 1 +proud 1 +raman 1 +best 1 +document 1 +serv 1 +associ 1 +also 1 +period 1 +editor 1 +know 1 +time 1 +laugh 1 +pagedavid 0 +grieswilliam 0 +lewi 0 +engineeringdr 0 +methodolog 0 +particular 0 +formaldevelop 0 +relat 0 +programminglanguag 0 +semant 0 +asinterest 0 +researchin 0 +fact 0 +understand 0 +taughta 0 +freshman 0 +sophomor 0 +level 0 +anoverrid 0 +concern 0 +mine 0 +click 0 +follow 0 +item 0 +curriculum 0 +vita 0 +short 0 +written 0 +polya 0 +announc 0 +dimac 0 +link 0 +pagecomput 0 +upson 0 +hallcornel 0 +universityithaca 0 +edushort 0 +griesi 0 +flush 0 +york 0 +spent 0 +iescap 0 +queen 0 +workfor 0 +naval 0 +weapon 0 +laboratori 0 +civilian 0 +amathematician 0 +programm 0 +fewmonth 0 +marri 0 +novemb 0 +master 0 +degreein 0 +assistantship 0 +twogerman 0 +manfr 0 +ruedig 0 +wiehl 0 +write 0 +full 0 +algol 0 +compilerfor 0 +figur 0 +implementrecurs 0 +effici 0 +mani 0 +end 0 +go 0 +almost 0 +three 0 +doctor 0 +bauer 0 +joseph 0 +stoer 0 +germani 0 +june 0 +wasin 0 +numer 0 +analysi 0 +these 0 +notyet 0 +kosher 0 +assist 0 +susan 0 +excit 0 +usual 0 +thebirthdai 0 +april 0 +intown 0 +make 0 +four 0 +birthdai 0 +cake 0 +left 0 +whichha 0 +snow 0 +ever 0 +wasdepart 0 +becam 0 +william 0 +lewisprofessor 0 +engin 0 +guggenheim 0 +fellowship 0 +contentsi 0 +better 0 +known 0 +mytext 0 +writingand 0 +contribut 0 +thewond 0 +good 0 +bloom 0 +wherey 0 +plant 0 +number 0 +contributionsto 0 +ieee 0 +taylor 0 +booth 0 +sigcseaward 0 +outstand 0 +clarkaward 0 +art 0 +theamerican 0 +feder 0 +process 0 +societi 0 +afip 0 +advise 0 +stand 0 +susanowicki 0 +thesi 0 +laid 0 +foundat 0 +proof 0 +correct 0 +ofparallel 0 +notion 0 +interfer 0 +freeness 0 +author 0 +bestpap 0 +langaug 0 +andt 0 +sthesi 0 +dissert 0 +designedand 0 +implement 0 +speak 0 +latex 0 +includ 0 +technic 0 +articl 0 +book 0 +printedor 0 +spoken 0 +abl 0 +speakmathemat 0 +effect 0 +manner 0 +import 0 +goal 0 +read 0 +blind 0 +alreadi 0 +produc 0 +audiocassett 0 +thecomput 0 +board 0 +late 0 +open 0 +officein 0 +washington 0 +began 0 +serious 0 +repres 0 +researchinterest 0 +conduct 0 +taulbe 0 +obtain 0 +essenti 0 +complet 0 +responsesfrom 0 +grant 0 +noother 0 +compar 0 +respons 0 +rate 0 +itrequir 0 +telephon 0 +call 0 +sendin 0 +questionnair 0 +researchassoci 0 +servic 0 +forchair 0 +toward 0 +respect 0 +andrespons 0 +current 0 +acta 0 +informatica 0 +aspect 0 +softwar 0 +concept 0 +andtool 0 +edit 0 +keep 0 +busi 0 +enjoi 0 +takean 0 +individu 0 +willsuggest 0 +substanti 0 +rewrit 0 +believ 0 +servewher 0 +fredb 0 +schneider 0 +springer 0 +verlag 0 +andmonograph 0 +spare 0 +sport 0 +like 0 +golf 0 +softbal 0 +volleybal 0 +swim 0 +tenni 0 +china 0 +isplit 0 +pant 0 +plai 0 +ping 0 +pong 0 +hour 0 +give 0 +alectur 0 +mention 0 +audienc 0 +turnedaround 0 +explain 0 +interpret 0 +spoke 0 +everyonelaugh 0 +howev 0 +whether 0 +told 0 +truth 0 +justsaid 0 +joke 0 +sing 0 +barbershop 0 +andgilbert 0 +sullivan 0 +around 0 +hous 0 +carpentri 0 +wire 0 +remodel 0 +taken 0 +yield 0 +considerablesatisfact 0 +content 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..2986cd6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,23 @@ +grinzayd 1 +cornel 1 +alex 0 +homepagealex 0 +grinzaydm 0 +student 0 +comput 0 +sciencecornel 0 +universitytel 0 +email 0 +first 0 +week 0 +link 0 +necx 0 +directinternet 0 +shop 0 +networkcomput 0 +express 0 +damarkwarn 0 +page 0 +bore 0 +learn 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..9825bf47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,46 @@ +comput 1 +cornel 1 +grzegorz 1 +grze 1 +czajkowski 0 +homepag 0 +czajkowskidepart 0 +sciencecornel 0 +universityithaca 0 +offic 0 +second 0 +year 0 +student 0 +program 0 +depart 0 +scienceat 0 +univers 0 +ithaca 0 +york 0 +complet 0 +master 0 +degre 0 +scienc 0 +krakow 0 +poland 0 +current 0 +involv 0 +sever 0 +project 0 +also 0 +charg 0 +administ 0 +cuc 0 +advisor 0 +thorsten 0 +eicken 0 +link 0 +relat 0 +research 0 +architectur 0 +activ 0 +messag 0 +split 0 +last 0 +modifi 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..3d362d83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,77 @@ +halpern 1 +reason 1 +comput 1 +depart 1 +knowledg 1 +uncertainti 1 +distribut 1 +also 1 +work 1 +talk 1 +give 1 +detail 1 +paper 1 +fall 1 +teach 1 +home 0 +pagejoseph 0 +professorcornel 0 +universitycomput 0 +scienc 0 +upson 0 +hallithaca 0 +cornel 0 +research 0 +focus 0 +applic 0 +game 0 +theori 0 +although 0 +done 0 +continu 0 +interest 0 +topic 0 +fault 0 +toler 0 +program 0 +languag 0 +semant 0 +li 0 +boundari 0 +number 0 +field 0 +recent 0 +gave 0 +econom 0 +princeton 0 +describ 0 +someon 0 +mathemat 0 +call 0 +scientist 0 +economist 0 +abouta 0 +subject 0 +mainli 0 +studi 0 +philosoph 0 +probabl 0 +best 0 +sentenc 0 +descript 0 +like 0 +check 0 +list 0 +public 0 +pointer 0 +abstract 0 +mani 0 +case 0 +avail 0 +activ 0 +resum 0 +cours 0 +sequel 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..813e1298 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,10 @@ +tsuneshi 1 +hashimoto 1 +hashimototsuneshi 0 +hashimotothi 0 +home 0 +page 0 +construct 0 +cstsuneshi 0 +hashi 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..29f409c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,24 @@ +hayden 1 +system 1 +mark 1 +cornel 1 +distribut 1 +commun 1 +offic 0 +upson 0 +univers 0 +ithaca 0 +fall 0 +teach 0 +tast 0 +unix 0 +interest 0 +horu 0 +ensembl 0 +nuprl 0 +proof 0 +develop 0 +hockei 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..34f17551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,6 @@ +heji 1 +improv 0 +home 0 +page 0 +cyber 0 +pond 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..34209e18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,27 @@ +cornel 1 +deyu 1 +student 1 +comput 1 +scienc 1 +home 0 +page 0 +graduat 0 +universitydept 0 +upson 0 +hallithaca 0 +third 0 +year 0 +born 0 +shanghai 0 +china 0 +receiv 0 +undergradu 0 +degre 0 +berkelei 0 +faculti 0 +advisor 0 +thorsten 0 +eicken 0 +come 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..671f3de5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,59 @@ +cornel 1 +comput 1 +univers 1 +huang 1 +chines 1 +christian 1 +jing 1 +depart 1 +scienc 1 +vision 1 +fellowship 1 +home 0 +page 0 +upson 0 +hall 0 +ithaca 0 +student 0 +thedepart 0 +scienceat 0 +receiv 0 +bachelorand 0 +master 0 +degre 0 +appli 0 +mathemat 0 +tsinghua 0 +beij 0 +chinami 0 +academ 0 +interest 0 +multimedia 0 +system 0 +work 0 +professor 0 +ramin 0 +zabih 0 +imag 0 +retriev 0 +video 0 +process 0 +motion 0 +track 0 +us 0 +link 0 +annot 0 +bibliographi 0 +pattern 0 +recognit 0 +relat 0 +machin 0 +learn 0 +optim 0 +check 0 +evangel 0 +resourc 0 +center 0 +mission 0 +back 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..75b2b169 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,35 @@ +page 1 +chin 1 +chen 1 +home 1 +address 1 +practic 1 +distribut 1 +system 1 +welcom 0 +current 0 +mapl 0 +ithaca 0 +mail 0 +icchen 0 +cornel 0 +perman 0 +sung 0 +taipei 0 +taiwan 0 +class 0 +spring 0 +comput 0 +graphic 0 +practicum 0 +databas 0 +manag 0 +album 0 +resum 0 +new 0 +china 0 +time 0 +nctu 0 +construct 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..22991950 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,27 @@ +engin 1 +system 1 +page 1 +comput 1 +home 0 +indira 0 +malik 0 +depart 0 +scienc 0 +master 0 +imalik 0 +cornel 0 +resum 0 +post 0 +script 0 +cours 0 +program 0 +softwar 0 +advanc 0 +databas 0 +network 0 +multimedia 0 +visit 0 +high 0 +school 0 +tap 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..367fc5c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,80 @@ +comput 2 +system 1 +project 1 +indira 1 +scienc 1 +oper 1 +graphic 1 +java 1 +cornel 1 +engin 1 +inform 1 +camera 1 +applet 1 +vidyaprakash 0 +vidyaprakashmast 0 +engineeringclass 0 +dept 0 +sciencecornel 0 +universitywelcom 0 +homepag 0 +current 0 +student 0 +depart 0 +univers 0 +ithaca 0 +degre 0 +colleg 0 +technolog 0 +coimbator 0 +india 0 +cours 0 +taken 0 +cornelluniversityfal 0 +practicum 0 +specif 0 +hoca 0 +softwar 0 +multimedia 0 +audio 0 +process 0 +toolkit 0 +manag 0 +polici 0 +spring 0 +cspracticum 0 +anim 0 +magic 0 +carpet 0 +colloqium 0 +manageri 0 +financesumm 0 +independ 0 +research 0 +tracingin 0 +perspectivetransform 0 +click 0 +postscript 0 +version 0 +myresumeclick 0 +perspect 0 +transformssom 0 +interest 0 +site 0 +cool 0 +sgamelan 0 +directori 0 +calvinand 0 +hobb 0 +galleri 0 +gif 0 +indian 0 +recip 0 +chicker 0 +wood 0 +drive 0 +nashvil 0 +tennesse 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..20c4724d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,90 @@ +us 1 +system 1 +manual 1 +write 1 +pageioi 1 +home 1 +cornel 1 +softwar 1 +base 1 +engin 1 +test 1 +packag 1 +file 1 +short 1 +detail 1 +come 1 +homeless 0 +lamioi 0 +current 0 +research 0 +assist 0 +prof 0 +brian 0 +smith 0 +comput 0 +scienc 0 +depart 0 +univers 0 +interest 0 +multi 0 +media 0 +parallel 0 +program 0 +instruct 0 +set 0 +environ 0 +knowledg 0 +guidelin 0 +good 0 +extens 0 +code 0 +doesn 0 +cover 0 +much 0 +script 0 +section 0 +suit 0 +valuabl 0 +programm 0 +postscript 0 +version 0 +complet 0 +includ 0 +templat 0 +sourc 0 +document 0 +introduct 0 +descript 0 +user 0 +remot 0 +machin 0 +index 0 +inform 0 +tutori 0 +right 0 +week 0 +put 0 +togeth 0 +knowledgebas 0 +repositori 0 +try 0 +spam 0 +site 0 +multim 0 +directori 0 +get 0 +start 0 +virtual 0 +realiti 0 +conferenc 0 +work 0 +prototyp 0 +mpeg 0 +video 0 +server 0 +http 0 +protocol 0 +spring 0 +homework 0 +solut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..e1119697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,135 @@ +jackson 2 +nuprl 1 +paul 1 +abstract 1 +cornel 1 +develop 1 +design 1 +avail 1 +format 1 +theori 1 +comput 1 +algebra 1 +thesi 1 +proof 1 +full 1 +text 1 +postscript 1 +list 1 +home 1 +univers 1 +depart 1 +research 1 +softwar 1 +editor 1 +confer 1 +circuit 1 +proceed 1 +hardwar 1 +page 1 +access 1 +next 1 +includ 1 +pagepaul 0 +post 0 +doctor 0 +associatecornel 0 +mail 0 +eduwww 0 +http 0 +info 0 +peopl 0 +htmladdress 0 +scienc 0 +upson 0 +hall 0 +ithaca 0 +usaphon 0 +intereststheorem 0 +prove 0 +environ 0 +formal 0 +method 0 +andhardwar 0 +synthesi 0 +scientif 0 +program 0 +linkag 0 +tool 0 +engin 0 +informationmi 0 +entitl 0 +enhanc 0 +developmentsystem 0 +appli 0 +paper 0 +explor 0 +construct 0 +type 0 +bundi 0 +intern 0 +automateddeduct 0 +lectur 0 +note 0 +artif 0 +intellig 0 +springer 0 +verlag 0 +june 0 +bout 0 +stavrid 0 +melham 0 +inter 0 +theorem 0 +prover 0 +ifip 0 +transactionsa 0 +north 0 +holland 0 +toolkit 0 +float 0 +point 0 +thenuprl 0 +system 0 +theadvanc 0 +workshop 0 +correct 0 +methodolog 0 +elsevi 0 +nuprlth 0 +project 0 +world 0 +wide 0 +document 0 +commun 0 +live 0 +session 0 +basic 0 +load 0 +collect 0 +still 0 +need 0 +workon 0 +make 0 +someon 0 +els 0 +getround 0 +pai 0 +attent 0 +sometim 0 +month 0 +hypertext 0 +thetheori 0 +foreach 0 +introduct 0 +summari 0 +definit 0 +andtheorem 0 +thepolynomi 0 +relat 0 +moment 0 +shouldb 0 +coupl 0 +dai 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..10acfb27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,3 @@ +hani 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..c21e7d90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,21 @@ +cornel 1 +tibor 0 +jnositibor 0 +jnosiwelcom 0 +mywww 0 +home 0 +page 0 +perman 0 +constructionoffic 0 +upson 0 +hall 0 +univers 0 +ithaca 0 +usaoffic 0 +phone 0 +interest 0 +site 0 +project 0 +zenotibor 0 +jnosi 0 +janosi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..d71218a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,13 @@ +janwun 1 +cornel 0 +master 0 +engin 0 +student 0 +comput 0 +scienc 0 +depart 0 +address 0 +mapl 0 +avenu 0 +ithaca 0 +telephon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..9b5559f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,183 @@ +languag 2 +morrisett 2 +program 2 +research 2 +greg 2 +compil 1 +harper 1 +standard 1 +system 1 +type 1 +also 1 +robert 1 +technic 1 +version 1 +gregori 1 +interest 1 +cornel 1 +high 1 +level 1 +softwar 1 +implement 1 +us 1 +memori 1 +report 1 +appear 1 +publish 1 +sigplan 1 +symposium 1 +principl 1 +comput 1 +ithaca 1 +offic 1 +phone 1 +time 1 +construct 1 +code 1 +support 1 +semant 1 +direct 1 +abstract 1 +design 1 +manag 1 +polymorph 1 +decemb 1 +tarditi 1 +cheng 1 +stone 1 +workshop 1 +extend 1 +proc 1 +proceed 1 +june 1 +portabl 1 +multiprocess 1 +jersei 1 +andrew 1 +tolmach 1 +project 1 +home 1 +page 1 +faculti 0 +assist 0 +professor 0 +scienc 0 +univers 0 +upson 0 +hall 0 +tabl 0 +content 0 +intereststeachingselect 0 +papersrel 0 +linksperson 0 +informationresearch 0 +interestsmi 0 +primari 0 +develop 0 +ofadvanc 0 +particularli 0 +interestedin 0 +forbuild 0 +includ 0 +operatingsystem 0 +distribut 0 +late 0 +focus 0 +onth 0 +issu 0 +kept 0 +safelanguag 0 +concentr 0 +produc 0 +faster 0 +consum 0 +less 0 +hack 0 +bit 0 +bring 0 +power 0 +base 0 +toolsfrom 0 +theori 0 +partial 0 +evalu 0 +interpret 0 +gener 0 +specif 0 +real 0 +systemssoftwar 0 +teachingc 0 +fall 0 +advanc 0 +spring 0 +select 0 +paperssemant 0 +septemb 0 +gzip 0 +postscript 0 +thesi 0 +optim 0 +confer 0 +perform 0 +safetythrough 0 +closur 0 +convers 0 +yasuhiko 0 +minamid 0 +juli 0 +model 0 +matthia 0 +felleisen 0 +conf 0 +function 0 +andcomput 0 +architectur 0 +reportcmu 0 +notecmu 0 +intensionaltyp 0 +analysi 0 +annual 0 +francisco 0 +januari 0 +optimist 0 +parallelizationgreg 0 +mauric 0 +herlihi 0 +octob 0 +refin 0 +first 0 +class 0 +store 0 +state 0 +copenhagen 0 +denmark 0 +lock 0 +platform 0 +fourth 0 +practic 0 +parallel 0 +diego 0 +interfac 0 +princeton 0 +ad 0 +thread 0 +eric 0 +cooper 0 +relat 0 +link 0 +mark 0 +leon 0 +resourc 0 +member 0 +carnegi 0 +mellon 0 +line 0 +inform 0 +orient 0 +bibliographi 0 +depart 0 +scienceperson 0 +informationhom 0 +address 0 +warren 0 +road 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..12f3fe85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,15 @@ +jiun 1 +address 1 +resum 0 +java 0 +current 0 +mapl 0 +avenu 0 +ithaca 0 +email 0 +jhlin 0 +cornel 0 +perman 0 +shing 0 +taipei 0 +taiwan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..796f1910 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,5 @@ +jerri 1 +cornel 0 +edujerri 0 +project 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..a07d3bd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,115 @@ +china 2 +chines 2 +univers 2 +scienc 2 +comput 1 +cornel 1 +graduat 1 +student 1 +depart 1 +binghamton 1 +taiwan 1 +new 1 +rank 1 +home 1 +page 1 +twin 1 +sister 1 +phone 1 +suni 1 +mathemat 1 +nation 1 +fellowship 1 +site 1 +internet 1 +magazin 1 +music 1 +digest 1 +homepag 1 +servic 1 +wang 0 +wangphd 0 +upson 0 +hallithaca 0 +offic 0 +email 0 +jiawang 0 +first 0 +year 0 +state 0 +york 0 +degre 0 +transfer 0 +nankai 0 +tianjin 0 +honor 0 +award 0 +barri 0 +goldwat 0 +scholar 0 +engin 0 +sciencefound 0 +research 0 +famili 0 +cool 0 +link 0 +hongkong 0 +beij 0 +review 0 +chinaand 0 +relat 0 +daili 0 +cbnet 0 +forum 0 +chinanet 0 +stamp 0 +time 0 +window 0 +chinesecalendar 0 +directori 0 +mediainform 0 +educ 0 +histori 0 +cultur 0 +hongkonglaserdisccent 0 +internetdistribut 0 +multilingu 0 +softwar 0 +ryan 0 +smovieplex 0 +sceneri 0 +pictur 0 +tour 0 +entertain 0 +sheng 0 +tian 0 +diwww 0 +futur 0 +interest 0 +america 0 +best 0 +school 0 +liber 0 +art 0 +film 0 +ieee 0 +societi 0 +monei 0 +foundat 0 +peterson 0 +guid 0 +postcard 0 +program 0 +incomput 0 +thesenior 0 +virtual 0 +tourist 0 +worldmap 0 +yahoo 0 +christian 0 +mandarin 0 +cssa 0 +weather 0 +stoni 0 +brook 0 +ucla 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..bbcc88fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,45 @@ +vision 1 +justin 1 +miller 1 +home 1 +page 1 +mani 1 +research 1 +navi 0 +uniform 0 +current 0 +master 0 +engin 0 +student 0 +comput 0 +scienc 0 +colleg 0 +ofengin 0 +cornel 0 +univers 0 +semest 0 +teach 0 +assist 0 +com 0 +machin 0 +long 0 +night 0 +found 0 +robot 0 +csrvl 0 +assistantwork 0 +prof 0 +ramin 0 +zabih 0 +primari 0 +interest 0 +ismachin 0 +particularli 0 +level 0 +imag 0 +process 0 +gener 0 +informationsom 0 +rant 0 +project 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..bee3c47d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,137 @@ +comput 1 +new 1 +scienc 1 +high 1 +softwar 1 +cornel 1 +work 1 +engin 1 +school 1 +hillsboro 1 +cool 1 +distribut 1 +system 1 +game 1 +parallel 1 +mpeg 1 +encod 1 +york 1 +press 1 +jeff 0 +moorejeff 0 +moorewel 0 +current 0 +graduat 0 +student 0 +mastersof 0 +receiv 0 +purdu 0 +univers 0 +west 0 +lafayett 0 +indiana 0 +went 0 +oregon 0 +suburb 0 +portland 0 +employmentmi 0 +resum 0 +anyon 0 +interest 0 +intel 0 +internet 0 +product 0 +divis 0 +creat 0 +spring 0 +classesnba 0 +thrive 0 +inform 0 +revolut 0 +entertain 0 +sectorc 0 +practic 0 +systemsc 0 +practicum 0 +cornellopoli 0 +perform 0 +architectur 0 +network 0 +optim 0 +researchfal 0 +classesc 0 +technolog 0 +techniquec 0 +formal 0 +methodsc 0 +multimedia 0 +research 0 +paperc 0 +colloquiumc 0 +tool 0 +seminar 0 +present 0 +opendoc 0 +mfcoptim 0 +researchsoftwar 0 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 0 +companiesintelsilicon 0 +graphicsibmsunapplemagazinespc 0 +magazinepc 0 +weekpc 0 +computingcomput 0 +shopperwindow 0 +sourcescomput 0 +lifemacusermacweekinteract 0 +weekfamili 0 +pccomput 0 +worldelectron 0 +newspapersusa 0 +todaywal 0 +street 0 +journalnew 0 +timesphiladelphia 0 +onlineth 0 +daili 0 +worldwideth 0 +dalla 0 +morn 0 +opinionsth 0 +detroit 0 +free 0 +gopherth 0 +knoxvil 0 +sentinelth 0 +leader 0 +onlinelat 0 +serviceth 0 +nugget 0 +newspap 0 +sister 0 +oregonrworld 0 +orang 0 +counti 0 +registerth 0 +francisco 0 +chronicl 0 +examinersan 0 +jose 0 +mercuryth 0 +seattl 0 +timesnando 0 +netusa 0 +todayboston 0 +globeportland 0 +herald 0 +main 0 +sundai 0 +telegramvisitor 0 +sinc 0 +januari 0 +campu 0 +address 0 +mapl 0 +fdithaca 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..9becc8e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,81 @@ +project 2 +lui 1 +java 1 +enabl 1 +browser 1 +system 1 +home 1 +comput 1 +us 1 +would 1 +anim 1 +interest 1 +current 1 +distribut 1 +video 1 +page 1 +view 1 +clock 1 +jose 0 +fernandez 0 +pagejos 0 +fernandezjos 0 +fernandezmast 0 +engin 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +mapl 0 +avenu 0 +ebithaca 0 +mail 0 +joselui 0 +cornel 0 +scroll 0 +text 0 +sign 0 +resum 0 +curriculum 0 +vita 0 +multimedia 0 +graphic 0 +imag 0 +transit 0 +rivl 0 +exampl 0 +presentationc 0 +pyramania 0 +game 0 +spaceship 0 +battl 0 +report 0 +hoca 0 +design 0 +code 0 +oper 0 +implement 0 +multitask 0 +virtual 0 +memori 0 +meng 0 +autonom 0 +vehicl 0 +simul 0 +hobbi 0 +photographi 0 +click 0 +picturesmusiccomputerswrit 0 +direct 0 +crazi 0 +movi 0 +actor 0 +recruit 0 +friend 0 +time 0 +courtesi 0 +bill 0 +giel 0 +visitor 0 +number 0 +better 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..f7b18353 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,30 @@ +hurtado 1 +cornel 1 +engin 1 +project 1 +julin 0 +home 0 +pagejulin 0 +click 0 +curriculum 0 +vita 0 +universitymast 0 +busi 0 +administr 0 +johnson 0 +graduat 0 +school 0 +managementmast 0 +depart 0 +comput 0 +science 0 +mail 0 +colombia 0 +linda 0 +er 0 +master 0 +distribut 0 +system 0 +autonom 0 +vehicl 0 +simul 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..bc7617e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,28 @@ +page 1 +current 1 +cornel 1 +texa 1 +janeen 0 +homepagejaneen 0 +reich 0 +welcom 0 +home 0 +univers 0 +complet 0 +comput 0 +scienc 0 +august 0 +septemb 0 +join 0 +system 0 +group 0 +instrument 0 +dalla 0 +send 0 +email 0 +jreich 0 +edumi 0 +resum 0 +ad 0 +favorit 0 +thing 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..fc50eebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,120 @@ +comput 2 +home 1 +list 1 +engin 1 +bodi 1 +mail 1 +camaro 1 +page 1 +chevi 1 +system 1 +cours 1 +master 1 +high 1 +facial 1 +car 1 +stock 1 +ford 1 +septemb 1 +come 1 +jodi 0 +shapirojodi 0 +shapiroeduc 0 +receiv 0 +univers 0 +massachusett 0 +amherst 0 +current 0 +cornel 0 +get 0 +meng 0 +scienc 0 +graduat 0 +resum 0 +fall 0 +multimedia 0 +systemsc 0 +softwar 0 +engineeringe 0 +network 0 +telecommunicationc 0 +researchspr 0 +machin 0 +visionc 0 +perform 0 +systemse 0 +capac 0 +networksnba 0 +thrive 0 +inform 0 +revolutionc 0 +researchma 0 +automot 0 +engineeringinterest 0 +project 0 +design 0 +implement 0 +dynam 0 +gener 0 +synchron 0 +speech 0 +animationlow 0 +cost 0 +portabl 0 +desktop 0 +videoconferenc 0 +window 0 +parallel 0 +object 0 +recognit 0 +applic 0 +recognitioninterest 0 +main 0 +interest 0 +obvious 0 +memberof 0 +firebird 0 +yourselfelectron 0 +fuel 0 +inject 0 +although 0 +alwayshav 0 +time 0 +particip 0 +also 0 +designingan 0 +ground 0 +pageefi 0 +pagethes 0 +pictur 0 +sold 0 +speed 0 +gearsmodif 0 +hypertech 0 +stage 0 +chip 0 +flowmast 0 +exhaust 0 +hurst 0 +shifter 0 +grant 0 +steer 0 +wheel 0 +filter 0 +mustang 0 +bought 0 +still 0 +speedmodif 0 +gear 0 +accel 0 +plug 0 +motorsport 0 +wiresbest 0 +mile 0 +mphbest 0 +januari 0 +pagenumb 0 +visit 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..4adb9925 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,8 @@ +julia 0 +home 0 +pagejulia 0 +komissarchik 0 +juliak 0 +cornel 0 +eduto 0 +continu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..e8644b1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,104 @@ +cornel 1 +network 1 +julian 1 +engin 1 +comput 1 +scienc 1 +theori 1 +video 1 +full 1 +pelenur 1 +avail 1 +univers 1 +offic 1 +center 1 +current 1 +workstat 1 +develop 1 +grow 1 +fault 1 +toler 1 +design 1 +implement 1 +fast 1 +techniqu 1 +emilio 0 +better 0 +pictur 0 +master 0 +graduat 0 +campu 0 +adress 0 +summit 0 +ithaca 0 +centerithaca 0 +upson 0 +hallcornel 0 +occup 0 +fulltim 0 +student 0 +teach 0 +assist 0 +databas 0 +administr 0 +recent 0 +project 0 +global 0 +pointer 0 +complet 0 +toolkit 0 +write 0 +parallel 0 +program 0 +independ 0 +platform 0 +topolog 0 +compil 0 +sparcstat 0 +ethernet 0 +wfinger 0 +system 0 +search 0 +home 0 +page 0 +document 0 +world 0 +wide 0 +cyberserv 0 +need 0 +faster 0 +httpserver 0 +fulfil 0 +increas 0 +demand 0 +servic 0 +addit 0 +commerci 0 +high 0 +becom 0 +critic 0 +paper 0 +describ 0 +distribut 0 +http 0 +server 0 +us 0 +horu 0 +prvf 0 +poss 0 +realli 0 +thegoal 0 +achiev 0 +screenmot 0 +cluster 0 +showthat 0 +innov 0 +snarf 0 +blast 0 +capit 0 +hardwar 0 +produc 0 +transferwith 0 +compress 0 +color 0 +screen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..76d2f2f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,92 @@ +cornel 1 +home 1 +page 1 +theori 1 +program 1 +languag 1 +resourc 1 +type 1 +paper 1 +art 1 +czar 1 +comput 1 +scienc 1 +depart 1 +interest 1 +includ 1 +research 1 +work 1 +nuprl 1 +slide 1 +fine 1 +hockei 1 +theatr 1 +jason 0 +hickei 0 +graduat 0 +student 0 +supervis 0 +robertconst 0 +summari 0 +current 0 +statu 0 +practic 0 +great 0 +thefox 0 +project 0 +especi 0 +markleon 0 +mainli 0 +softwar 0 +verif 0 +tool 0 +specif 0 +formalsystem 0 +develop 0 +universitydepart 0 +make 0 +higher 0 +levelmodul 0 +abstract 0 +data 0 +relat 0 +recent 0 +publish 0 +cornella 0 +bibliographi 0 +publishedat 0 +bellcor 0 +also 0 +talk 0 +havegiven 0 +seminar 0 +pretti 0 +technic 0 +theygiv 0 +overview 0 +done 0 +want 0 +sequenc 0 +identif 0 +galleryof 0 +mine 0 +tryth 0 +orth 0 +fineart 0 +forum 0 +cucshockei 0 +backcountri 0 +take 0 +look 0 +thebackcountri 0 +perform 0 +servic 0 +publicli 0 +maintainedsoftwar 0 +equip 0 +back 0 +hockeyfor 0 +info 0 +schedul 0 +forth 0 +center 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..0701623e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,36 @@ +kamijo 1 +koichi 1 +juli 1 +english 1 +japanes 1 +back 1 +japan 1 +kamijokoichi 0 +welcom 0 +home 0 +page 0 +construct 0 +reach 0 +us 0 +thing 0 +sell 0 +sold 0 +click 0 +like 0 +sale 0 +class 0 +papershometownseduc 0 +work 0 +experienceskoichi 0 +muriel 0 +ithaca 0 +cornel 0 +kkamijoh 0 +vnet 0 +go 0 +accept 0 +access 0 +time 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..370b4fbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,115 @@ +cornel 1 +univers 1 +karl 1 +robot 1 +micro 1 +manipul 1 +friedrich 1 +bhringer 1 +karlsruh 1 +comput 1 +scienc 1 +stanford 1 +laboratori 1 +build 1 +interest 1 +andassembl 1 +microfabr 1 +strategi 1 +work 1 +page 1 +level 1 +dipl 0 +inform 0 +graduat 0 +student 0 +dept 0 +upson 0 +hall 0 +ithaca 0 +email 0 +educurr 0 +address 0 +gate 0 +current 0 +research 0 +nanofabr 0 +facil 0 +actuat 0 +arrai 0 +implementmicro 0 +gener 0 +innew 0 +devic 0 +handl 0 +part 0 +withprogramm 0 +forc 0 +vector 0 +field 0 +also 0 +investig 0 +design 0 +autom 0 +structur 0 +earlier 0 +germani 0 +includ 0 +develop 0 +better 0 +graph 0 +layout 0 +algorithm 0 +thesi 0 +advisor 0 +professorbruc 0 +donald 0 +founder 0 +director 0 +vision 0 +project 0 +close 0 +collabor 0 +professor 0 +noel 0 +macdonaldand 0 +hisresearch 0 +group 0 +public 0 +document 0 +confer 0 +announc 0 +call 0 +paper 0 +anim 0 +video 0 +sculptur 0 +invis 0 +cantilev 0 +model 0 +frank 0 +lloyd 0 +wright 0 +fallingwat 0 +articl 0 +york 0 +time 0 +magazin 0 +march 0 +wire 0 +octob 0 +offic 0 +nano 0 +outin 0 +kwon 0 +club 0 +find 0 +lindseth 0 +climb 0 +wall 0 +navig 0 +previou 0 +higher 0 +deeper 0 +next 0 +pagekarl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..30918862 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,232 @@ +layer 2 +stack 2 +interest 2 +protocol 2 +horu 2 +properti 2 +cornel 1 +distribut 1 +comput 1 +problem 1 +specifi 1 +applic 1 +provid 1 +karr 1 +work 1 +project 1 +concentr 1 +research 1 +engin 1 +commun 1 +weak 1 +consist 1 +java 1 +develop 1 +wide 1 +david 1 +univers 1 +system 1 +mathemat 1 +statist 1 +html 1 +includ 1 +verifi 1 +verif 1 +us 1 +guarante 1 +techniqu 1 +desir 1 +order 1 +part 1 +applet 1 +messag 1 +whose 1 +prone 1 +area 1 +network 1 +gener 1 +disconnect 1 +site 1 +time 1 +karrdavid 0 +karrphd 0 +studentdepart 0 +sciencecornel 0 +upson 0 +hall 0 +ithaca 0 +mail 0 +edui 0 +student 0 +depart 0 +scienceat 0 +thehoru 0 +architectur 0 +reliabl 0 +withprofessor 0 +kenneth 0 +birmananddr 0 +robbert 0 +reness 0 +minor 0 +field 0 +perform 0 +protocolsmi 0 +implement 0 +dissert 0 +formalspecif 0 +ofhoru 0 +tempor 0 +logic 0 +action 0 +variousinterest 0 +fundament 0 +usedin 0 +furthermor 0 +write 0 +formula 0 +assum 0 +stylefor 0 +might 0 +itsinterfac 0 +depend 0 +andbelow 0 +emploi 0 +straightforward 0 +agiven 0 +certain 0 +thetop 0 +condit 0 +even 0 +unusualcombin 0 +atyp 0 +ultim 0 +user 0 +systemsshould 0 +abl 0 +call 0 +help 0 +constructcustom 0 +omit 0 +unnecessari 0 +avoid 0 +theirassoci 0 +cost 0 +confid 0 +sufficientto 0 +intend 0 +basi 0 +thesecur 0 +harden 0 +give 0 +rough 0 +demonstr 0 +propos 0 +method 0 +ofverifi 0 +initi 0 +stem 0 +thepromis 0 +suit 0 +variousguarante 0 +programm 0 +passingenviron 0 +host 0 +crash 0 +delayedor 0 +lost 0 +softwar 0 +haswork 0 +componentswer 0 +failur 0 +feel 0 +featur 0 +offer 0 +considerablepromis 0 +consistencywhil 0 +becom 0 +ofdistribut 0 +look 0 +revis 0 +control 0 +filesin 0 +environ 0 +distributedenviron 0 +partitionedinto 0 +portion 0 +notion 0 +wouldallow 0 +multipl 0 +temporarili 0 +make 0 +progress 0 +concurr 0 +performancemi 0 +correct 0 +measur 0 +high 0 +avail 0 +respons 0 +andeffici 0 +resourc 0 +clearli 0 +equal 0 +import 0 +larg 0 +appar 0 +random 0 +ofsystem 0 +load 0 +activ 0 +notabl 0 +except 0 +dedic 0 +parallelmachin 0 +behavior 0 +also 0 +suscept 0 +analysi 0 +though 0 +differ 0 +kind 0 +encourag 0 +javath 0 +world 0 +applicationwith 0 +mani 0 +possibl 0 +explor 0 +experi 0 +simpl 0 +wai 0 +hypertext 0 +tonavig 0 +inform 0 +appear 0 +myweb 0 +lego 0 +toi 0 +hack 0 +execut 0 +code 0 +anetscap 0 +browser 0 +download 0 +exampl 0 +abirthdai 0 +puzzl 0 +calcul 0 +tool 0 +forverifi 0 +profession 0 +affiliationsi 0 +member 0 +ieee 0 +andmaa 0 +informationseemi 0 +linksfor 0 +topic 0 +find 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..dcfa2762 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,137 @@ +system 2 +click 1 +comput 1 +work 1 +graphic 1 +home 1 +engin 1 +oper 1 +cornel 1 +hogman 1 +qualcomm 1 +softwar 1 +distribut 1 +network 1 +cours 1 +took 1 +multimedia 1 +virtual 1 +game 1 +break 1 +code 1 +interfac 1 +screenshot 1 +view 1 +postscript 1 +scene 1 +transit 1 +effect 1 +languag 1 +rivl 1 +develop 1 +project 1 +favorit 1 +cool 1 +lot 1 +welcom 0 +pagekartik 0 +kapadiamast 0 +engineeringclass 0 +depart 0 +sciencecornel 0 +universityithaca 0 +address 0 +dabnei 0 +drive 0 +diego 0 +california 0 +phone 0 +mail 0 +kkapadia 0 +comcurr 0 +incorporatedmi 0 +main 0 +area 0 +interest 0 +spring 0 +architectur 0 +high 0 +capac 0 +inform 0 +fall 0 +projectshoca 0 +chiphoca 0 +pronounc 0 +hodja 0 +full 0 +fledg 0 +chip 0 +hypothet 0 +instruct 0 +processor 0 +support 0 +featur 0 +like 0 +multitask 0 +memori 0 +enjoy 0 +quick 0 +singl 0 +player 0 +window 0 +platform 0 +good 0 +sourc 0 +entertain 0 +take 0 +gameboard 0 +help 0 +screen 0 +design 0 +document 0 +rivlrivl 0 +stand 0 +resolut 0 +independ 0 +video 0 +univers 0 +jonathan 0 +swartz 0 +brian 0 +smith 0 +excel 0 +applic 0 +enhanc 0 +incorpor 0 +primit 0 +implement 0 +present 0 +simul 0 +railroad 0 +master 0 +visual 0 +captur 0 +scientif 0 +aspect 0 +lai 0 +track 0 +vehicl 0 +model 0 +dynam 0 +motion 0 +us 0 +combin 0 +open 0 +inventor 0 +opengl 0 +realiti 0 +facil 0 +resumesom 0 +site 0 +star 0 +mpeg 0 +clip 0 +music 0 +page 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..3a2cf489 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,63 @@ +publish 1 +letter 1 +thing 1 +write 1 +wrote 1 +suspect 1 +true 0 +writingsi 0 +much 0 +written 0 +proudof 0 +flame 0 +mine 0 +other 0 +morethought 0 +conscienti 0 +objector 0 +arm 0 +conflict 0 +myfirst 0 +grand 0 +rapid 0 +press 0 +fewyear 0 +back 0 +gulf 0 +sinc 0 +time 0 +howev 0 +vestart 0 +keep 0 +work 0 +onlin 0 +lest 0 +wonder 0 +also 0 +poetri 0 +dprobabl 0 +rather 0 +famou 0 +essayist 0 +anyhow 0 +like 0 +argu 0 +dread 0 +mess 0 +go 0 +byron 0 +center 0 +asuburb 0 +mile 0 +went 0 +high 0 +school 0 +unabomb 0 +mathematician 0 +mathematiciansar 0 +terrorist 0 +think 0 +editor 0 +newspap 0 +wide 0 +read 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..25a028c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,268 @@ +cornel 1 +katherin 1 +comput 1 +info 1 +html 1 +page 1 +scienc 1 +ithaca 1 +system 1 +robbert 1 +reness 1 +austin 1 +home 1 +distribut 1 +birman 1 +werner 1 +vogel 1 +group 1 +hall 1 +work 1 +reliabl 1 +horu 1 +commun 1 +appear 1 +proceed 1 +sigop 1 +brad 1 +glade 1 +ieee 1 +report 1 +depart 1 +univers 1 +relat 1 +infodistribut 1 +industri 1 +infocompani 1 +place 1 +lisboa 1 +colorado 1 +lectur 1 +httpd 1 +ncsa 1 +imag 1 +sourc 1 +xmosaic 1 +other 1 +journal 1 +school 1 +databas 1 +women 1 +cook 1 +electron 1 +fashion 1 +music 1 +wall 1 +network 1 +guokguo 0 +educornel 0 +universitydept 0 +upson 0 +student 0 +interest 0 +scalabl 0 +multicastprotocol 0 +project 0 +direct 0 +recent 0 +publicationskatherin 0 +structur 0 +virtual 0 +synchroni 0 +explor 0 +bound 0 +ofvirtu 0 +synchron 0 +european 0 +workshop 0 +connemara 0 +ireland 0 +septemb 0 +lui 0 +rodrigu 0 +antonio 0 +sargento 0 +paulo 0 +verisimo 0 +transpar 0 +light 0 +weight 0 +servic 0 +symposiumon 0 +niagara 0 +lake 0 +canada 0 +octob 0 +also 0 +avail 0 +technic 0 +kenneth 0 +mark 0 +hayden 0 +takako 0 +hickei 0 +dalia 0 +malki 0 +alex 0 +vaysburd 0 +flexibl 0 +march 0 +research 0 +systemscomput 0 +networkscool 0 +toolsbibliographyconferencesjournalsacademia 0 +infoschool 0 +infojob 0 +searchinterest 0 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 0 +inforesearch 0 +systempointershoru 0 +commerci 0 +productspringtotemtransisx 0 +kernel 0 +arizona 0 +microsystemslab 0 +networksmulticast 0 +protocolsn 0 +fromlblgun 0 +cool 0 +tool 0 +consortium 0 +gener 0 +theproject 0 +simpl 0 +beginn 0 +sguid 0 +quickrefer 0 +htmldocument 0 +tabl 0 +content 0 +common 0 +gatewai 0 +interfac 0 +overview 0 +find 0 +file 0 +finder 0 +mosaic 0 +binari 0 +document 0 +uiuc 0 +cern 0 +java 0 +hotjava 0 +bibliographybibliographi 0 +oldindex 0 +index 0 +confer 0 +pointer 0 +hpdc 0 +ftc 0 +sosp 0 +srd 0 +icdc 0 +jsac 0 +elsevi 0 +scienceacademia 0 +motorola 0 +openingsibmdelltandemtiapplebel 0 +atlant 0 +texa 0 +dept 0 +ucsd 0 +gradschool 0 +advic 0 +gradjob 0 +search 0 +ukinterest 0 +life 0 +weather 0 +moviesbailei 0 +concertslibrari 0 +hightechin 0 +institut 0 +inesc 0 +copper 0 +mountain 0 +resort 0 +summit 0 +counti 0 +coloradooth 0 +infoart 0 +weblouvreth 0 +world 0 +linebook 0 +amazon 0 +book 0 +calvinhobb 0 +archivecardsmagicchinaart 0 +china 0 +gourmetl 0 +cordonbleu 0 +itali 0 +dessert 0 +fashional 0 +linksa 0 +cjlutz 0 +wwweb 0 +pagewith 0 +heart 0 +tmexpressfirst 0 +view 0 +wireirc 0 +faqfashion 0 +nethair 0 +crew 0 +diesel 0 +jean 0 +guessfriend 0 +alan 0 +cheng 0 +david 0 +deng 0 +shiji 0 +insur 0 +plan 0 +email 0 +grove 0 +edulibrari 0 +librari 0 +congressmagazin 0 +intertext 0 +wire 0 +timegeorg 0 +gilder 0 +discoveri 0 +mail 0 +postcard 0 +map 0 +internet 0 +underground 0 +archivesinanet 0 +newsworld 0 +new 0 +brief 0 +sport 0 +open 0 +olymp 0 +stock 0 +streetheadlin 0 +street 0 +weatherhunt 0 +infoth 0 +lyco 0 +hunt 0 +informationglob 0 +navigatorhom 0 +global 0 +navig 0 +scout 0 +wanderersand 0 +spider 0 +edg 0 +yahoo 0 +refer 0 +netscap 0 +last 0 +modifi 0 +kguo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..7cb57911 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,43 @@ +java 1 +program 1 +wirefram 1 +rotat 1 +creat 1 +introductionthi 0 +project 0 +desgin 0 +learn 0 +tool 0 +comput 0 +graphic 0 +provid 0 +understand 0 +polygon 0 +form 0 +list 0 +vertic 0 +written 0 +simpl 0 +power 0 +languag 0 +safe 0 +portabl 0 +interact 0 +object 0 +orient 0 +multi 0 +threader 0 +environ 0 +platform 0 +speific 0 +applet 0 +react 0 +user 0 +input 0 +dynam 0 +chang 0 +cone 0 +cube 0 +cylind 0 +tetra 0 +toru 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..5a684d25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,242 @@ +comput 3 +kleinberg 3 +proc 2 +scienc 2 +symposium 2 +algorithm 2 +theori 2 +foundat 2 +cornel 1 +problem 1 +optim 1 +network 1 +ieee 1 +research 1 +geometri 1 +geometr 1 +appear 1 +siam 1 +discret 1 +line 1 +univers 1 +combinatori 1 +approxim 1 +distribut 1 +biologi 1 +path 1 +program 1 +inform 1 +resourc 1 +huttenloch 1 +project 1 +bibliographi 1 +confer 1 +kleinber 1 +molecular 1 +rout 1 +adversari 1 +queue 1 +protocol 1 +semi 1 +definit 1 +academ 1 +graph 1 +tardo 1 +disjointpath 1 +williamson 1 +trade 1 +goeman 1 +minimum 1 +process 1 +letter 1 +server 1 +leighton 1 +point 1 +technic 1 +report 1 +page 1 +group 1 +associ 1 +internet 1 +secur 1 +homepag 0 +assist 0 +professor 0 +ithaca 0 +interest 0 +emphasi 0 +recent 0 +work 0 +includ 0 +anddisjoint 0 +approach 0 +analyz 0 +stabilityof 0 +without 0 +probabilist 0 +assumpt 0 +method 0 +particularlyth 0 +posit 0 +studi 0 +conform 0 +spend 0 +year 0 +visit 0 +almaden 0 +center 0 +click 0 +seeselect 0 +publicationsmiscellan 0 +linkspapersapproxim 0 +singl 0 +sourc 0 +unsplitt 0 +flow 0 +rubinfeld 0 +short 0 +expand 0 +dens 0 +embed 0 +disjoint 0 +high 0 +diamet 0 +planar 0 +aggarw 0 +node 0 +mesh 0 +vlsi 0 +layout 0 +improvedapproxim 0 +ratio 0 +latenc 0 +lovasz 0 +thetafunct 0 +relax 0 +vertex 0 +cover 0 +math 0 +local 0 +formobil 0 +robot 0 +computersci 0 +search 0 +simplepolygon 0 +lower 0 +bound 0 +serverbalanc 0 +yaniv 0 +serveralgorithm 0 +robotnavig 0 +master 0 +thesi 0 +parallel 0 +andrew 0 +awerbuch 0 +fernandez 0 +stabil 0 +result 0 +greedi 0 +content 0 +resolut 0 +borodin 0 +raghavan 0 +sudan 0 +attiya 0 +lynch 0 +offsbetween 0 +messag 0 +deliveri 0 +quiesc 0 +time 0 +connect 0 +managementprotocol 0 +israel 0 +system 0 +mullainathan 0 +boundsand 0 +combin 0 +consensu 0 +object 0 +onprincipl 0 +berger 0 +reconstruct 0 +athre 0 +dimension 0 +model 0 +arbitrari 0 +error 0 +compar 0 +set 0 +kedem 0 +dynam 0 +voronoi 0 +diagram 0 +hausdorff 0 +distanc 0 +pointset 0 +euclidean 0 +motion 0 +plane 0 +symposiumon 0 +invariantsof 0 +segment 0 +universitycomput 0 +juli 0 +linkssearch 0 +tool 0 +bibliographiesaltavista 0 +infoseek 0 +excit 0 +yahoo 0 +nynex 0 +yellow 0 +glimps 0 +ncstrl 0 +librari 0 +david 0 +jone 0 +hypertext 0 +sitescornel 0 +oper 0 +stanford 0 +berkelei 0 +nation 0 +computingtc 0 +virtual 0 +address 0 +book 0 +crescenzi 0 +kann 0 +compendium 0 +foc 0 +soda 0 +stoc 0 +biologycomput 0 +carb 0 +biocomput 0 +sdsc 0 +list 0 +geometrydavid 0 +eppstein 0 +junkyard 0 +jeff 0 +erickson 0 +securitymitr 0 +corp 0 +princeton 0 +safe 0 +rivest 0 +cryptographi 0 +link 0 +miscellaneousnetscap 0 +intellicast 0 +interact 0 +tenni 0 +chess 0 +onlin 0 +talk 0 +kleinbergdepart 0 +scienceupson 0 +hallcornel 0 +universityithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..8b9213b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,52 @@ +homepag 1 +cornel 1 +comput 1 +scienc 1 +japan 1 +page 1 +kazushi 0 +otakota 0 +edukazushi 0 +melco 0 +current 0 +master 0 +engin 0 +student 0 +cornellunivers 0 +receiv 0 +univers 0 +tokyo 0 +back 0 +march 0 +work 0 +mitusbishi 0 +electr 0 +corpor 0 +return 0 +degre 0 +worth 0 +isund 0 +construct 0 +start 0 +assign 0 +acquaint 0 +html 0 +forc 0 +depart 0 +inform 0 +superhighwai 0 +cours 0 +interest 0 +pictur 0 +music 0 +move 0 +sale 0 +come 0 +take 0 +februari 0 +thing 0 +want 0 +sell 0 +think 0 +advert 0 +peopl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..5f72774e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,54 @@ +research 1 +cornel 1 +comput 1 +interest 1 +algorithm 1 +complex 1 +logic 1 +algebra 1 +faculti 0 +dexter 0 +kozendext 0 +kozenjoseph 0 +newton 0 +professor 0 +engineeringphd 0 +univers 0 +interestsmi 0 +includ 0 +especiallycomplex 0 +decis 0 +problem 0 +andsemant 0 +program 0 +languag 0 +paper 0 +avail 0 +onlinekleen 0 +constraint 0 +type 0 +infer 0 +algebraautomata 0 +theori 0 +logicbibliographylist 0 +public 0 +technic 0 +reportscours 0 +notesc 0 +structur 0 +interpret 0 +programsc 0 +automata 0 +theoryfun 0 +stufffamili 0 +pictur 0 +rugbi 0 +effectcomput 0 +scienc 0 +departmentupson 0 +hallcornel 0 +universityithaca 0 +york 0 +usakozen 0 +work 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..a747cb6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,42 @@ +kreitz 1 +christoph 1 +cornel 1 +home 1 +page 1 +soon 1 +research 1 +comput 1 +theori 1 +pictur 0 +associ 0 +depart 0 +scienc 0 +univers 0 +ithaca 0 +offic 0 +phone 0 +email 0 +upson 0 +hall 0 +topic 0 +program 0 +synthesi 0 +autom 0 +deduct 0 +type 0 +teach 0 +learn 0 +german 0 +lehr 0 +lernen 0 +vorlesungsskript 0 +medienunterst 0 +uumltzt 0 +lehren 0 +person 0 +inform 0 +avail 0 +last 0 +modifi 0 +novemb 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..390f18ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,34 @@ +kuen 1 +heng 1 +cornel 1 +isi 0 +master 0 +engin 0 +comput 0 +scienc 0 +depart 0 +univers 0 +address 0 +grove 0 +street 0 +newton 0 +telephon 0 +welcom 0 +visit 0 +place 0 +myproject 0 +multimedia 0 +system 0 +cours 0 +would 0 +like 0 +read 0 +daili 0 +new 0 +taiwan 0 +home 0 +countri 0 +enjoi 0 +page 0 +still 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..95c92835 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,182 @@ +digit 2 +librari 2 +comput 2 +scienc 2 +technic 2 +report 2 +research 2 +cornel 2 +dienst 1 +author 1 +implement 1 +object 1 +work 1 +carl 1 +lagoz 1 +person 1 +number 1 +server 1 +world 1 +distribut 1 +part 1 +also 1 +workshop 1 +page 1 +project 1 +group 1 +develop 1 +ncstrl 1 +issu 1 +protocol 1 +refer 1 +technolog 1 +paper 1 +wide 1 +confer 1 +architectur 1 +servic 1 +interoper 1 +area 1 +extend 1 +framework 1 +design 1 +secur 1 +repositori 1 +final 1 +meet 1 +substitut 1 +life 1 +time 1 +never 1 +quiet 1 +home 0 +leader 0 +depart 0 +upson 0 +hall 0 +universityithaca 0 +phone 0 +internet 0 +edui 0 +lead 0 +groupin 0 +departmentat 0 +univers 0 +ourgroup 0 +manag 0 +oper 0 +network 0 +intern 0 +consortium 0 +maintain 0 +adistribut 0 +collaborateson 0 +davi 0 +thedienstsoftwar 0 +providesdistribut 0 +access 0 +worldwid 0 +current 0 +enabl 0 +drop 0 +publish 0 +document 0 +commun 0 +april 0 +manual 0 +build 0 +product 0 +chapter 0 +advanc 0 +springer 0 +verlag 0 +primari 0 +involv 0 +defin 0 +protocolsfor 0 +infrastructur 0 +collabor 0 +corpor 0 +nation 0 +initiativesto 0 +developeda 0 +darpa 0 +fund 0 +open 0 +store 0 +iso 0 +dlib 0 +magazin 0 +decemb 0 +member 0 +dlibwork 0 +interfacesand 0 +releas 0 +metadata 0 +iiin 0 +warwick 0 +amveri 0 +interest 0 +us 0 +distributedobject 0 +read 0 +posit 0 +paperfor 0 +joint 0 +mobil 0 +codeworkshop 0 +know 0 +meetm 0 +find 0 +poor 0 +contact 0 +littl 0 +moreabout 0 +charact 0 +pictur 0 +pagei 0 +luci 0 +daughter 0 +rule 0 +major 0 +outsideof 0 +toddler 0 +constant 0 +challeng 0 +lucyg 0 +mean 0 +provid 0 +avid 0 +outdoor 0 +site 0 +fast 0 +movingwat 0 +lakeand 0 +itch 0 +cano 0 +give 0 +beauti 0 +think 0 +bike 0 +ridingalong 0 +road 0 +backwood 0 +trail 0 +tell 0 +sparehour 0 +run 0 +shoe 0 +breath 0 +deeplyth 0 +fresh 0 +spend 0 +much 0 +joi 0 +physicalnor 0 +ever 0 +interfer 0 +desir 0 +fight 0 +itspreserv 0 +hope 0 +sometim 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..91bf29c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,101 @@ +homepag 2 +career 1 +comput 1 +system 1 +cornel 1 +page 1 +guid 1 +lidong 1 +zhou 1 +depart 1 +research 1 +materi 1 +distribut 1 +project 1 +secur 1 +internet 1 +network 1 +servic 1 +friend 1 +fudan 1 +buyer 1 +auto 1 +welcom 0 +sciencecornel 0 +universityithaca 0 +fall 0 +cours 0 +concept 0 +local 0 +access 0 +multimedia 0 +relat 0 +sigop 0 +paper 0 +oasi 0 +cambridg 0 +report 0 +adag 0 +author 0 +applic 0 +group 0 +level 0 +java 0 +safe 0 +program 0 +legion 0 +sirac 0 +kerbero 0 +authent 0 +massiv 0 +ocaml 0 +advanc 0 +standard 0 +robot 0 +exclus 0 +document 0 +opportun 0 +jobtrak 0 +colleg 0 +grad 0 +hunter 0 +open 0 +center 0 +onlin 0 +careermosa 0 +jobweb 0 +home 0 +xjob 0 +yingjun 0 +classmat 0 +inform 0 +resours 0 +tutori 0 +languag 0 +tool 0 +yellow 0 +book 0 +isso 0 +sunris 0 +chines 0 +soccer 0 +world 0 +edmund 0 +automobil 0 +autosit 0 +ultim 0 +insur 0 +basic 0 +legal 0 +surviv 0 +link 0 +travel 0 +agenc 0 +rank 0 +succe 0 +graduat 0 +school 0 +back 0 +indexlast 0 +updat 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..f077139e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,163 @@ +comput 1 +project 1 +link 1 +system 1 +distribut 1 +home 1 +anim 1 +hsian 1 +page 1 +scienceat 1 +cornel 1 +univers 1 +taiwan 1 +classesc 1 +final 1 +orwel 1 +remov 1 +object 1 +site 1 +swartz 1 +cool 1 +totoro 1 +wangthi 0 +major 0 +constructionlin 0 +wang 0 +master 0 +student 0 +degre 0 +inform 0 +ohio 0 +state 0 +born 0 +fangliao 0 +small 0 +villag 0 +southern 0 +coast 0 +still 0 +construct 0 +fall 0 +multimedia 0 +track 0 +digit 0 +videoe 0 +networkse 0 +vision 0 +moment 0 +base 0 +edg 0 +oper 0 +amidonc 0 +autom 0 +video 0 +transcrib 0 +annot 0 +research 0 +advisor 0 +prof 0 +ramin 0 +zabihspr 0 +natur 0 +languag 0 +processingc 0 +practic 0 +computingc 0 +practicum 0 +network 0 +visual 0 +managementc 0 +machin 0 +visionc 0 +scienc 0 +colloquimc 0 +move 0 +scene 0 +high 0 +perform 0 +audit 0 +program 0 +java 0 +webspac 0 +interest 0 +us 0 +stuffscornel 0 +info 0 +depart 0 +annual 0 +reportiee 0 +societytaiwan 0 +headlin 0 +new 0 +sinanet 0 +comth 0 +musicmovi 0 +connect 0 +movieweb 0 +movi 0 +moviemania 0 +also 0 +click 0 +collect 0 +think 0 +picturesth 0 +list 0 +best 0 +sell 0 +book 0 +releas 0 +publish 0 +world 0 +journal 0 +bookstor 0 +quot 0 +chines 0 +classic 0 +linux 0 +linkstcl 0 +line 0 +resourc 0 +softwar 0 +engin 0 +galleri 0 +hacksth 0 +earth 0 +pagemiscellan 0 +hongkong 0 +bridg 0 +hong 0 +kong 0 +linksfor 0 +like 0 +japanes 0 +take 0 +look 0 +carlo 0 +jump 0 +cja 0 +calanimag 0 +alpha 0 +chapter 0 +berkelei 0 +pagelaputa 0 +castl 0 +nausicaa 0 +vallei 0 +wind 0 +conan 0 +slump 0 +kiki 0 +legend 0 +galact 0 +hero 0 +ming 0 +pagecampu 0 +address 0 +uptown 0 +eithaca 0 +york 0 +linhsian 0 +edulast 0 +updat 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..3f9ccc12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,33 @@ +page 1 +thought 1 +libbi 1 +home 1 +essai 1 +show 1 +take 1 +look 1 +pagewelcom 0 +collect 0 +last 0 +updat 0 +sept 0 +download 0 +theme 0 +song 0 +check 0 +video 0 +clip 0 +read 0 +lista 0 +littl 0 +thing 0 +septemb 0 +june 0 +april 0 +interest 0 +projectemail 0 +mehit 0 +counter 0 +courtesi 0 +http 0 +digit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..e521a3b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,12 @@ +offic 1 +name 0 +upson 0 +hall 0 +hour 0 +mail 0 +lili 0 +cornel 0 +oper 0 +system 0 +take 0 +cours 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..cacc35c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,100 @@ +numer 1 +cornel 1 +theori 1 +linear 1 +recent 1 +lloyd 1 +trefethen 1 +depart 1 +mathemat 1 +algebra 1 +matric 1 +oper 1 +textbook 1 +student 1 +trefethenprofessorlnt 0 +edumi 0 +appoint 0 +thecomput 0 +scienc 0 +also 0 +affili 0 +thecent 0 +appli 0 +thecornel 0 +center 0 +field 0 +analysi 0 +scientif 0 +comput 0 +havea 0 +person 0 +view 0 +mean 0 +specif 0 +interest 0 +includ 0 +numericalsolut 0 +conform 0 +map 0 +approxim 0 +fluid 0 +mechan 0 +year 0 +much 0 +work 0 +hasbeen 0 +relat 0 +normal 0 +whose 0 +eigenvector 0 +notorthogon 0 +applic 0 +textbooksfinit 0 +differ 0 +spectral 0 +method 0 +siam 0 +papersmultimatlab 0 +matlab 0 +multipl 0 +processorsmatrix 0 +iter 0 +gap 0 +betweenpotenti 0 +convergencepseudospectra 0 +operatorssom 0 +papersoth 0 +itemsclass 0 +paper 0 +analysiscurriculum 0 +vitaepseudospectra 0 +bibliographi 0 +peter 0 +alfeldcurr 0 +vicki 0 +howlegubjrn 0 +jnsson 0 +yohan 0 +kimdivakar 0 +viswanathprevi 0 +jeff 0 +baggetttobi 0 +driscollalan 0 +edelman 0 +loui 0 +howel 0 +walter 0 +mascarenhasnoel 0 +nachtigalsatish 0 +reddi 0 +chuan 0 +tohsom 0 +colleaguesjim 0 +demmelann 0 +greenbaummartin 0 +gutknechtd 0 +nick 0 +highamann 0 +trefethenandr 0 +weideman 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..a15b17af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,91 @@ +system 1 +network 1 +home 1 +page 1 +comput 1 +cornel 1 +distribut 1 +internet 1 +databas 1 +luci 1 +travel 1 +music 1 +spring 1 +class 1 +manag 1 +oper 1 +softwar 1 +site 1 +stuff 1 +languag 1 +china 1 +chines 1 +welcom 0 +student 0 +scienc 0 +depart 0 +univers 0 +ithaca 0 +interest 0 +topic 0 +programminglanguag 0 +applic 0 +hobbi 0 +ping 0 +pong 0 +badminton 0 +swim 0 +photograph 0 +read 0 +resum 0 +whiz 0 +stock 0 +search 0 +analysi 0 +tool 0 +degre 0 +project 0 +practic 0 +practicum 0 +fall 0 +engin 0 +multimedia 0 +systemscontact 0 +yuwu 0 +favorit 0 +java 0 +corba 0 +silvano 0 +tkcgi 0 +html 0 +vrml 0 +object 0 +orient 0 +product 0 +server 0 +securitypc 0 +lube 0 +tune 0 +ipngip_atmcomput 0 +compani 0 +netscap 0 +busi 0 +cube 0 +sapient 0 +microsoft 0 +novel 0 +relat 0 +misc 0 +jobtrack 0 +new 0 +artvark 0 +galleri 0 +underground 0 +archiv 0 +person 0 +connect 0 +librari 0 +catalog 0 +mail 0 +sunlab 0 +caltech 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..12ce13b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,50 @@ +cornel 1 +engin 1 +comput 1 +system 1 +network 1 +linda 0 +home 0 +page 0 +universitylinda 0 +lxwu 0 +master 0 +student 0 +scienc 0 +depart 0 +univsers 0 +receiv 0 +univers 0 +massachusett 0 +lowel 0 +sinc 0 +work 0 +digit 0 +equip 0 +corp 0 +banyan 0 +main 0 +research 0 +interest 0 +mulitimedia 0 +click 0 +resum 0 +project 0 +nativ 0 +protocol 0 +stack 0 +window 0 +us 0 +multicast 0 +group 0 +layer 0 +video 0 +electron 0 +commerc 0 +kramer 0 +mart 0 +coursesfal 0 +oper 0 +multimedia 0 +photoesus 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..5749885b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,13 @@ +nikolai 0 +mateevnikolai 0 +mateevgradu 0 +studentmateev 0 +cornel 0 +upson 0 +halldepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..9c2fc12b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,54 @@ +project 1 +morgenstern 1 +cornel 1 +comput 1 +databas 1 +arpa 1 +matthew 0 +home 0 +pagematthew 0 +morgensternresearch 0 +leaderaddress 0 +engin 0 +theori 0 +centerxerox 0 +design 0 +research 0 +institutecornel 0 +universityithaca 0 +phone 0 +email 0 +edustatu 0 +visit 0 +fellow 0 +scienc 0 +princip 0 +scientist 0 +xerox 0 +laboratori 0 +scienceproject 0 +distribut 0 +heterogen 0 +system 0 +fund 0 +metadata 0 +manag 0 +multimedia 0 +document 0 +supervis 0 +select 0 +student 0 +relat 0 +area 0 +fundedresearch 0 +work 0 +academ 0 +year 0 +summer 0 +avail 0 +stop 0 +chat 0 +inform 0 +come 0 +page 0 +soon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..daaea9be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,40 @@ +graph 1 +algorithm 1 +dynam 1 +data 1 +structur 1 +monika 0 +henzing 0 +homepagemonika 0 +rauch 0 +henzingerassist 0 +professorcomput 0 +scienc 0 +departmentcornel 0 +universityithaca 0 +email 0 +cornel 0 +eduphon 0 +current 0 +leav 0 +digit 0 +equip 0 +corpor 0 +system 0 +research 0 +centerhomepageresearch 0 +interestscombinatori 0 +especi 0 +random 0 +theori 0 +lower 0 +bound 0 +recent 0 +public 0 +project 0 +pageprogram 0 +committe 0 +stoc 0 +soda 0 +homepag 0 +fall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..e9be5c88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,76 @@ +stanlei 1 +cornel 1 +univers 1 +comput 1 +scienc 1 +interest 1 +project 1 +paper 1 +huang 1 +engin 1 +kentucki 1 +area 1 +user 1 +distribut 1 +plan 1 +share 1 +home 0 +page 0 +huangmast 0 +studentmhuang 0 +sheldon 0 +courtcornel 0 +ithaca 0 +master 0 +class 0 +bachelor 0 +oper 0 +systemsdistribut 0 +systemsdatabas 0 +system 0 +inform 0 +retrievalgraph 0 +interfacesoth 0 +movi 0 +tenni 0 +horse_back 0 +ride 0 +travel 0 +read 0 +work 0 +myadvisor 0 +werner 0 +vogel 0 +robbertvan 0 +reness 0 +object 0 +integr 0 +horu 0 +link 0 +relat 0 +planplan 0 +distributionplan 0 +updateplan 0 +faqhorusc 0 +final 0 +exam 0 +collect 0 +memorydistribut 0 +memorysom 0 +technic 0 +group 0 +communicationsnapshotu 0 +level 0 +network 0 +interfac 0 +architecturejobscar 0 +pathbai 0 +jobscyberezumescar 0 +opportunitiesus 0 +stufftechn 0 +field 0 +searchbel 0 +labsspbsd 0 +sourcesjavarfclast 0 +modifi 0 +mhuang 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..11390ee4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,62 @@ +millett 1 +lynett 1 +homepag 1 +cornel 1 +person 1 +last 1 +list 1 +millettdepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +participatoryform 0 +mass 0 +speech 0 +develop 0 +internetdeserv 0 +highest 0 +protect 0 +government 0 +intrus 0 +decis 0 +aclu 0 +reno 0 +challeng 0 +script 0 +second 0 +year 0 +skit 0 +cuc 0 +holidai 0 +parti 0 +inform 0 +pictur 0 +cat 0 +updat 0 +link 0 +never 0 +abl 0 +find 0 +precis 0 +femin 0 +know 0 +peopl 0 +call 0 +feminist 0 +whenver 0 +express 0 +sentiment 0 +differenti 0 +doormat 0 +prostitut 0 +rebecca 0 +west 0 +modifi 0 +octob 0 +comment 0 +welcom 0 +copi 0 +public 0 +pleas 0 +look 0 +copyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..94166790 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,94 @@ +home 1 +link 1 +cornel 1 +newgroupc 1 +mishaal 1 +page 1 +univers 1 +engin 1 +interest 1 +system 1 +high 1 +pagemisha 0 +kuwaiti 0 +student 0 +master 0 +computersci 0 +program 0 +mengc 0 +graduat 0 +doubl 0 +major 0 +electr 0 +andcomput 0 +scienc 0 +worcest 0 +polytechn 0 +institut 0 +inworcest 0 +coolest 0 +place 0 +earth 0 +list 0 +stuff 0 +involv 0 +temporari 0 +servic 0 +bearaccess 0 +menu 0 +cours 0 +take 0 +machin 0 +vision 0 +practic 0 +distribut 0 +practicum 0 +perform 0 +comput 0 +advanc 0 +languag 0 +implement 0 +newgroup 0 +capac 0 +network 0 +newgroupnba 0 +databas 0 +manag 0 +newgroupoptim 0 +video 0 +transmiss 0 +meng 0 +project 0 +extens 0 +kuwait 0 +pagemi 0 +resum 0 +check 0 +stock 0 +quotescool 0 +public 0 +server 0 +hope 0 +offer 0 +conot 0 +soon 0 +weather 0 +ithaca 0 +latest 0 +new 0 +cann 0 +intern 0 +film 0 +festiv 0 +everyth 0 +wrong 0 +reason 0 +want 0 +ever 0 +accus 0 +nerd 0 +well 0 +sure 0 +cool 0 +almashanmisha 0 +educornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..5c0d35f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,131 @@ +page 2 +cool 1 +link 1 +pleas 1 +stuff 1 +make 1 +look 1 +pretti 1 +realli 1 +thing 1 +mike 1 +date 1 +would 1 +like 1 +coupl 1 +know 1 +beavi 1 +korbi 1 +last 1 +name 1 +complet 1 +golf 1 +click 1 +list 1 +kid 1 +pagethi 0 +yeah 0 +version 0 +recent 0 +browser 0 +quit 0 +sign 0 +myguestbook 0 +chanc 0 +opinion 0 +count 0 +dark 0 +stockholm 0 +right 0 +take 0 +vote 0 +poll 0 +result 0 +peopl 0 +guess 0 +ryan 0 +call 0 +vitya 0 +better 0 +construct 0 +danc 0 +frog 0 +maria 0 +mark 0 +andrew 0 +corbett 0 +suck 0 +card 0 +nicknam 0 +kevin 0 +donnel 0 +love 0 +pictur 0 +eryn 0 +crave 0 +attent 0 +want 0 +movi 0 +graphic 0 +class 0 +amaz 0 +anim 0 +plai 0 +mpeg 0 +place 0 +univers 0 +stop 0 +expand 0 +find 0 +world 0 +need 0 +person 0 +pick 0 +site 0 +made 0 +onlin 0 +student 0 +homepag 0 +lame 0 +guttermouth 0 +brought 0 +byjust 0 +cours 0 +member 0 +internet 0 +exchang 0 +peic 0 +connect 0 +whole 0 +bunch 0 +other 0 +line 0 +come 0 +join 0 +hand 0 +sing 0 +togeth 0 +spirit 0 +harmoni 0 +someth 0 +visitor 0 +number 0 +keep 0 +reset 0 +check 0 +statist 0 +accuar 0 +send 0 +mail 0 +atmak 0 +cornel 0 +edubas 0 +much 0 +random 0 +imag 0 +relat 0 +anyth 0 +thank 0 +everybodi 0 +idea 0 +us 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..fdd32a37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,22 @@ +nobuhiko 1 +mukai 1 +semest 1 +mukainobuhiko 0 +home 0 +page 0 +construct 0 +research 0 +effect 0 +jpeg 0 +compressionon 0 +multimedia 0 +system 0 +last 0 +fall 0 +made 0 +anim 0 +titl 0 +magicon 0 +comput 0 +graphic 0 +spring 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..6a2b0e65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,22 @@ +nichola 1 +how 1 +homepag 0 +click 0 +photo 0 +imag 0 +graduat 0 +studentdepart 0 +comput 0 +sciencecornel 0 +universityithaca 0 +email 0 +nihow 0 +cornel 0 +eduoffic 0 +upson 0 +hall 0 +phone 0 +resum 0 +refer 0 +person 0 +info 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..4e15e083 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,23 @@ +niko 1 +home 1 +cornel 1 +page 0 +research 0 +pitsiani 0 +upson 0 +hall 0 +dept 0 +comput 0 +scienc 0 +univers 0 +ithaca 0 +work 0 +public 0 +lectur 0 +teach 0 +java 0 +vita 0 +pointer 0 +sinc 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..13ed4008 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,186 @@ +parallel 2 +chrisochoid 2 +comput 2 +environ 1 +project 1 +proceed 1 +algorithm 1 +data 1 +softwar 1 +dynam 1 +grid 1 +gener 1 +cornel 1 +confer 1 +numer 1 +map 1 +housti 1 +rice 1 +niko 1 +scienc 1 +distribut 1 +load 1 +balanc 1 +simul 1 +ellpack 1 +student 1 +appear 1 +intern 1 +journal 1 +method 1 +page 1 +work 1 +mani 1 +research 1 +runtim 1 +multicomput 1 +adapt 1 +implement 1 +black 1 +hole 1 +master 1 +sukup 1 +fluid 1 +field 1 +mathemat 1 +tool 1 +program 1 +iter 1 +solver 1 +center 1 +mississippi 1 +advanc 1 +partial 1 +domain 1 +papachi 1 +public 1 +depart 0 +purdu 0 +univers 0 +touch 0 +facet 0 +includ 0 +support 0 +back 0 +system 0 +compil 0 +problem 0 +solv 0 +schedul 0 +design 0 +compon 0 +well 0 +known 0 +current 0 +workshop 0 +bernoulli 0 +prema 0 +portabl 0 +architecur 0 +sensit 0 +messag 0 +binari 0 +grand 0 +challeng 0 +engin 0 +florian 0 +kodukula 0 +indupraka 0 +pingali 0 +vineet 0 +ahuja 0 +reza 0 +behforooz 0 +undergradu 0 +former 0 +animesh 0 +chatterje 0 +rajani 0 +vaidyanathan 0 +select 0 +paper 0 +task 0 +bowyer 0 +watson 0 +offifth 0 +incomput 0 +relat 0 +multithread 0 +model 0 +ctctr 0 +octob 0 +appli 0 +object 0 +orient 0 +kale 0 +kohl 0 +yellick 0 +scientif 0 +menu 0 +unstructur 0 +structur 0 +thompson 0 +contemporari 0 +key 0 +special 0 +issu 0 +april 0 +toolkit 0 +collid 0 +haupt 0 +aiaa 0 +colorado 0 +spring 0 +june 0 +altern 0 +scalabl 0 +scalableparallel 0 +librari 0 +nation 0 +foundat 0 +engineeringresearch 0 +state 0 +partit 0 +heurist 0 +base 0 +parallelhardwar 0 +geometri 0 +characterist 0 +differentialequ 0 +vichnevetski 0 +knight 0 +richter 0 +imac 0 +brunswick 0 +decompos 0 +architectur 0 +kortesi 0 +decomposit 0 +differenti 0 +equat 0 +symposium 0 +domaindecomposit 0 +moscow 0 +ussr 0 +glowinski 0 +siam 0 +programmingenviron 0 +mimd 0 +machin 0 +karathanas 0 +samartzi 0 +vavali 0 +yang 0 +wang 0 +and 0 +weerawarana 0 +onsupercomput 0 +nikosc 0 +institut 0 +theori 0 +univeristi 0 +rhode 0 +hall 0 +room 0 +ithaca 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..005a1503 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,68 @@ +prema 1 +runtim 1 +parallel 1 +cornel 1 +system 1 +portabl 1 +environ 1 +model 1 +support 1 +multicomput 1 +architectur 1 +comput 1 +research 1 +compil 1 +port 1 +dynam 1 +load 1 +balanc 1 +niko 1 +chrisochoid 1 +welcom 0 +advanc 0 +institut 0 +theori 0 +center 0 +univers 0 +overview 0 +andproblem 0 +solv 0 +target 0 +scientif 0 +computingappl 0 +build 0 +implement 0 +design 0 +varieti 0 +suppot 0 +global 0 +address 0 +space 0 +memori 0 +data 0 +task 0 +program 0 +multi 0 +thread 0 +style 0 +execut 0 +automat 0 +work 0 +share 0 +mechan 0 +paper 0 +multithread 0 +adapt 0 +pdecomput 0 +ctctr 0 +journal 0 +appli 0 +numer 0 +mathemat 0 +relat 0 +group 0 +pcrc 0 +consortium 0 +copyright 0 +copi 0 +nikosc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..2400458b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,30 @@ +look 1 +pavel 0 +naumov 0 +welcom 0 +home 0 +page 0 +invit 0 +learn 0 +locat 0 +cyberspac 0 +real 0 +world 0 +work 0 +nuprl 0 +project 0 +tire 0 +take 0 +rest 0 +galleri 0 +visit 0 +cinema 0 +photo 0 +orplai 0 +game 0 +java 0 +written 0 +sundai 0 +send 0 +mail 0 +place 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..d630497a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,194 @@ +pearson 2 +comput 2 +algorithm 2 +parallel 2 +scienc 1 +design 1 +time 1 +architectur 1 +program 1 +languag 1 +work 1 +problem 1 +machin 1 +implement 1 +ieee 1 +publish 1 +region 1 +cornel 1 +david 1 +research 1 +processor 1 +connect 1 +mesh 1 +vision 1 +realiz 1 +current 1 +cost 1 +theoret 1 +effici 1 +oper 1 +gener 1 +purpos 1 +believ 1 +realli 1 +instruct 1 +neumann 1 +network 1 +intern 1 +confer 1 +vazirani 1 +fast 1 +find 1 +maxim 1 +bipartit 1 +technolog 1 +lectur 1 +note 1 +journal 1 +johnson 1 +pingali 1 +singl 1 +control 1 +linear 1 +tech 1 +report 1 +structur 1 +sigplan 1 +global 1 +regist 1 +alloc 1 +simd 1 +multiprocessor 1 +interest 0 +thesi 0 +investig 0 +highli 0 +scalabl 0 +consistingof 0 +simpl 0 +dimension 0 +guid 0 +perhap 0 +year 0 +henc 0 +materi 0 +taken 0 +place 0 +crystal 0 +molecul 0 +lattic 0 +long 0 +goal 0 +prepar 0 +theubiquit 0 +offer 0 +must 0 +heed 0 +lawsof 0 +physic 0 +attent 0 +chip 0 +spatial 0 +layoutand 0 +hidden 0 +commun 0 +accomplishedbi 0 +pursu 0 +requir 0 +practic 0 +could 0 +character 0 +feasibl 0 +studi 0 +ihav 0 +produc 0 +cellular 0 +couldb 0 +hardwar 0 +simul 0 +thisarchitectur 0 +system 0 +designfor 0 +like 0 +proteinstructur 0 +grand 0 +challeng 0 +parallelcomput 0 +power 0 +succe 0 +becom 0 +commodityand 0 +sold 0 +desktop 0 +video 0 +game 0 +direct 0 +futur 0 +includ 0 +vlsi 0 +architectureand 0 +wide 0 +us 0 +hideth 0 +detail 0 +reflect 0 +underlyingvon 0 +architectureha 0 +good 0 +thing 0 +exploit 0 +parallelmachin 0 +need 0 +easyto 0 +estim 0 +public 0 +dunten 0 +arm 0 +kiewit 0 +high 0 +speed 0 +campu 0 +societi 0 +compcon 0 +fall 0 +pillai 0 +near 0 +optim 0 +placement 0 +sensor 0 +element 0 +transact 0 +inform 0 +theori 0 +foundat 0 +softwar 0 +sequenti 0 +set 0 +entri 0 +exit 0 +tree 0 +proceed 0 +pldi 0 +notic 0 +polynomi 0 +chang 0 +make 0 +schedul 0 +workshop 0 +irregularli 0 +irregular 0 +sept 0 +zippel 0 +allerton 0 +press 0 +select 0 +area 0 +cryptographi 0 +appear 0 +depart 0 +upson 0 +hallcornel 0 +universityithaca 0 +york 0 +usaemail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..f8b01a7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,126 @@ +page 2 +cornel 2 +work 1 +year 1 +program 1 +engin 1 +comput 1 +chapter 1 +mail 1 +todd 1 +peskin 1 +student 1 +client 1 +java 1 +applet 1 +content 1 +favorit 1 +site 1 +meng 1 +colleg 1 +receiv 1 +master 1 +follow 1 +also 1 +acacia 1 +contact 1 +sinc 1 +semest 1 +intern 1 +experi 1 +develop 1 +server 1 +softwar 1 +creat 1 +construct 1 +pictur 0 +resum 0 +cours 0 +taken 0 +univers 0 +joint 0 +degre 0 +offer 0 +jointli 0 +johnson 0 +graduat 0 +school 0 +manag 0 +scienc 0 +busi 0 +administr 0 +current 0 +presid 0 +fratern 0 +brother 0 +would 0 +like 0 +becom 0 +part 0 +list 0 +pleas 0 +eduand 0 +soon 0 +suppli 0 +andyour 0 +roll 0 +number 0 +abl 0 +complet 0 +request 0 +quickli 0 +well 0 +best 0 +reach 0 +check 0 +round 0 +alwai 0 +found 0 +mani 0 +time 0 +log 0 +depart 0 +fall 0 +junior 0 +workeda 0 +throughth 0 +cooper 0 +enabl 0 +cornellundergradu 0 +supplement 0 +classroom 0 +knowledg 0 +practic 0 +compani 0 +theirfield 0 +enhanc 0 +nation 0 +level 0 +databas 0 +system 0 +isrun 0 +microsystem 0 +run 0 +window 0 +espn 0 +stock 0 +quot 0 +onlin 0 +final 0 +project 0 +larg 0 +mpeg 0 +file 0 +visitor 0 +februari 0 +still 0 +hope 0 +possibl 0 +includ 0 +us 0 +case 0 +wonder 0 +ticker 0 +tape 0 +borrow 0 +permiss 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..86957dbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,54 @@ +pierc 1 +home 1 +stuff 1 +cornel 1 +page 1 +david 1 +student 1 +comput 1 +scienc 1 +ithaca 1 +pittsburgh 1 +pennsylvania 1 +read 1 +dave 0 +univers 0 +address 0 +offic 0 +upson 0 +hall 0 +valentin 0 +person 0 +second 0 +year 0 +recent 0 +although 0 +familycurr 0 +resid 0 +citi 0 +approxim 0 +halfwai 0 +philadelphia 0 +andharrisburg 0 +famou 0 +shop 0 +outlet 0 +otherwis 0 +younev 0 +want 0 +howev 0 +great 0 +place 0 +mani 0 +alreadi 0 +know 0 +sinceit 0 +imposs 0 +without 0 +go 0 +throughpittsburgh 0 +month 0 +favorit 0 +quot 0 +list 0 +work 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..86150ff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,123 @@ +cornel 1 +comput 1 +talk 1 +center 1 +group 1 +compil 1 +system 1 +present 1 +indupraka 1 +kodukula 1 +home 1 +page 1 +theori 1 +univers 1 +scienc 1 +work 1 +research 1 +applic 1 +architectur 1 +vliw 1 +public 1 +imperfectli 1 +nest 1 +abl 1 +loop 1 +transform 1 +summer 1 +support 1 +packag 1 +engin 0 +ithaca 0 +praka 0 +student 0 +depart 0 +prior 0 +tothat 0 +undergradu 0 +madra 0 +bernoulli 0 +prof 0 +keshav 0 +pingali 0 +member 0 +nawaaz 0 +ahm 0 +vladimir 0 +kotlyar 0 +vijai 0 +menon 0 +paul 0 +stodghil 0 +also 0 +affili 0 +advanc 0 +institut 0 +interplai 0 +runtim 0 +tradit 0 +andmultiprocessor 0 +deriv 0 +fromscientif 0 +imag 0 +process 0 +multimedia 0 +withibm 0 +hasinterest 0 +well 0 +given 0 +seri 0 +dens 0 +technolog 0 +first 0 +athp 0 +chelmsford 0 +wasabout 0 +necess 0 +deal 0 +looptransform 0 +handl 0 +trivial 0 +code 0 +framework 0 +perform 0 +loopparallel 0 +seminar 0 +schloss 0 +dagstuhl 0 +april 0 +watson 0 +regard 0 +useof 0 +octob 0 +lab 0 +palo 0 +alto 0 +regardingdata 0 +centric 0 +multi 0 +level 0 +block 0 +teach 0 +taught 0 +program 0 +andoper 0 +project 0 +czar 0 +instal 0 +maintain 0 +availableund 0 +gener 0 +licens 0 +departmentmachin 0 +check 0 +andfind 0 +handi 0 +tip 0 +alsofind 0 +extens 0 +info 0 +random 0 +link 0 +person 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..eb3e146e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,49 @@ +praveen 1 +seshadri 1 +page 1 +ithaca 1 +databas 1 +project 1 +data 1 +home 0 +assist 0 +professor 0 +comput 0 +scienc 0 +depart 0 +cornel 0 +univers 0 +upson 0 +hall 0 +offic 0 +advanc 0 +system 0 +fall 0 +predat 0 +dbm 0 +adt 0 +know 0 +case 0 +enhanc 0 +abstract 0 +type 0 +sigmod 0 +submiss 0 +profession 0 +public 0 +time 0 +order 0 +manag 0 +sequenc 0 +postscript 0 +thesi 0 +tree 0 +save 0 +format 0 +person 0 +warren 0 +road 0 +ranjani 0 +ramamurthi 0 +green 0 +packer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..64f038ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 3 +queri 3 +data 3 +databas 2 +relat 2 +oper 2 +model 2 +record 2 +system 2 +optim 2 +project 2 +order 2 +sequin 2 +us 2 +time 1 +manag 1 +effici 1 +languag 1 +posit 1 +praveen 1 +seshadri 1 +express 1 +evalu 1 +variou 1 +support 1 +process 1 +miron 1 +livni 1 +ramakrishnan 1 +object 1 +import 1 +requir 1 +includ 1 +scienc 1 +kind 1 +techniqu 1 +implement 1 +nest 1 +complex 1 +storag 1 +earthquak 1 +similar 1 +click 1 +raghu 1 +wisc 1 +domain 1 +exist 1 +allow 1 +need 1 +like 1 +next 1 +built 1 +defin 1 +also 1 +propos 1 +shore 1 +extens 1 +volcano 1 +recent 1 +plan 1 +scan 1 +singl 1 +mani 1 +view 1 +orient 1 +zoom 1 +group 1 +involv 1 +server 1 +proceed 1 +confer 1 +madison 1 +document 1 +collect 1 +set 1 +declar 1 +manner 1 +advantag 1 +user 1 +tempor 1 +previou 1 +demonstr 1 +feasibl 1 +form 1 +embed 1 +base 1 +exampl 1 +inform 1 +meteorolog 1 +phenomena 1 +sequenti 1 +strength 1 +greater 1 +would 1 +sort 1 +join 1 +store 1 +buffer 1 +gener 1 +answer 1 +detail 1 +aredescrib 1 +publish 1 +paper 1 +postscript 1 +version 1 +map 1 +call 1 +could 1 +flavor 1 +explor 1 +collaps 1 +expand 1 +last 1 +work 1 +probabl 1 +devis 1 +result 1 +client 1 +insid 1 +provid 1 +depart 1 +home 0 +pageth 0 +construct 0 +content 0 +objectivescurr 0 +statusmotiv 0 +exampleseq 0 +languageoptim 0 +techniquesseq 0 +developmentpublicationsrel 0 +workcontact 0 +informationproject 0 +number 0 +applic 0 +processingof 0 +larg 0 +amount 0 +theseappl 0 +financi 0 +histor 0 +analysi 0 +econom 0 +social 0 +metereolog 0 +medic 0 +andbiolog 0 +inadequ 0 +regard 0 +treat 0 +consequ 0 +tediou 0 +ineffici 0 +abstract 0 +util 0 +semanticstak 0 +uniqu 0 +opportun 0 +avail 0 +evaluationintegr 0 +canstor 0 +combin 0 +sequencesthes 0 +serv 0 +goal 0 +themost 0 +notion 0 +natur 0 +consid 0 +issu 0 +studi 0 +theori 0 +theoret 0 +idea 0 +statusth 0 +current 0 +statu 0 +algebraicqueri 0 +compos 0 +analogousto 0 +composit 0 +algebra 0 +describ 0 +identifi 0 +candeclar 0 +likesql 0 +vice 0 +versa 0 +build 0 +disk 0 +architectur 0 +sever 0 +megabyt 0 +integr 0 +motiv 0 +querya 0 +weather 0 +monitor 0 +occurr 0 +event 0 +scientist 0 +ask 0 +erupt 0 +didth 0 +richter 0 +scale 0 +featur 0 +groupbi 0 +claus 0 +correl 0 +subqueri 0 +aggregatefunct 0 +convent 0 +find 0 +execut 0 +even 0 +given 0 +knowledg 0 +howev 0 +sequencesord 0 +lock 0 +step 0 +merg 0 +temporari 0 +whenev 0 +valu 0 +check 0 +possibl 0 +therefor 0 +littl 0 +memori 0 +modelth 0 +present 0 +gist 0 +basic 0 +ordereddomain 0 +relationship 0 +andposit 0 +dual 0 +distinct 0 +wai 0 +recordsmap 0 +respect 0 +give 0 +rise 0 +either 0 +relationaloper 0 +overlap 0 +contain 0 +andaggreg 0 +researchersin 0 +commun 0 +offset 0 +movingaggreg 0 +mean 0 +associ 0 +instanc 0 +daili 0 +weekli 0 +hourli 0 +part 0 +deal 0 +make 0 +easi 0 +case 0 +real 0 +worldsitu 0 +extend 0 +instead 0 +extensionof 0 +indic 0 +practic 0 +ofseq 0 +languagew 0 +usingwhich 0 +specifi 0 +languagei 0 +except 0 +input 0 +queriesa 0 +well 0 +descript 0 +techniquesw 0 +thathav 0 +transform 0 +meta 0 +cach 0 +intermedi 0 +algorithm 0 +reli 0 +cost 0 +estim 0 +observ 0 +access 0 +stream 0 +strategi 0 +take 0 +account 0 +developmentth 0 +serverarchitectur 0 +multipl 0 +viaa 0 +multi 0 +thread 0 +ontop 0 +subset 0 +languageswhich 0 +mode 0 +arbitrarylevel 0 +viceversa 0 +supportfor 0 +type 0 +function 0 +detailson 0 +publicationssequ 0 +sigmod 0 +framework 0 +datapraveen 0 +ieee 0 +engin 0 +march 0 +design 0 +systempraveen 0 +submit 0 +vldb 0 +queriesraghu 0 +michael 0 +cheng 0 +intern 0 +comad 0 +decemb 0 +workthedevis 0 +complementari 0 +visualizationenviron 0 +front 0 +pose 0 +examin 0 +graphic 0 +peopl 0 +research 0 +servercontact 0 +informationfor 0 +contact 0 +eduraghu 0 +edumiron 0 +educomput 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +modifi 0 +seshadripraveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..1ab4fc92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,50 @@ +ralph 1 +student 1 +comput 1 +scienc 1 +fellow 1 +cornel 1 +benzingerralph 0 +benzingerw 0 +sich 0 +seinen 0 +lorbeeren 0 +ausruht 0 +trgt 0 +derfalschen 0 +stell 0 +stori 0 +exchang 0 +univers 0 +karlsruh 0 +germani 0 +german 0 +august 0 +studienstiftung 0 +deutschen 0 +volk 0 +fulbright 0 +member 0 +siemen 0 +international 0 +studentenkrei 0 +alumnusat 0 +graduat 0 +depart 0 +cours 0 +taken 0 +advanc 0 +program 0 +languag 0 +design 0 +analysi 0 +algorithm 0 +reason 0 +knowledg 0 +contact 0 +inform 0 +mail 0 +offic 0 +upson 0 +hall 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..e97da75a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,291 @@ +audio 4 +latex 4 +postscript 4 +section 3 +aster 3 +render 2 +express 2 +exampl 2 +produc 2 +us 2 +notic 2 +convei 2 +formula 1 +structur 1 +second 1 +first 1 +mathemat 1 +gener 1 +document 1 +format 1 +book 1 +demonstr 1 +fraction 1 +bruno 1 +nest 1 +integr 1 +listen 1 +reader 1 +comput 1 +spoken 1 +system 1 +version 1 +talk 1 +demo 1 +output 1 +effect 1 +along 1 +dimens 1 +superscript 1 +allow 1 +knuth 1 +percept 1 +recogn 1 +speak 1 +base 1 +interpret 1 +refer 1 +present 1 +level 1 +complex 1 +technic 1 +made 1 +avail 1 +three 1 +stereo 1 +visual 1 +progress 1 +continu 1 +file 1 +contain 1 +simpl 1 +succinctli 1 +vari 1 +space 1 +subscript 1 +independ 1 +taken 1 +power 1 +unambigu 1 +move 1 +monoton 1 +algebra 1 +follow 1 +written 1 +hard 1 +logarithm 1 +context 1 +specif 1 +user 1 +variabl 1 +tripl 1 +cross 1 +meaning 1 +name 1 +inton 1 +text 1 +intermix 1 +quantifi 1 +calcul 1 +sinc 1 +hear 1 +substitut 1 +process 1 +raman 1 +titl 0 +demonstrationi 0 +dedic 0 +guid 0 +read 0 +forrend 0 +develop 0 +myphd 0 +page 0 +thesi 0 +approxim 0 +hour 0 +record 0 +blind 0 +abstract 0 +print 0 +hypertext 0 +enhanc 0 +inlin 0 +imag 0 +compon 0 +origin 0 +input 0 +speech 0 +dectalk 0 +digit 0 +mulaw 0 +tabl 0 +mono 0 +encod 0 +dvip 0 +difficult 0 +suggest 0 +initi 0 +sequenti 0 +short 0 +typic 0 +show 0 +peopl 0 +andround 0 +quick 0 +overview 0 +faad 0 +casey 0 +want 0 +look 0 +place 0 +singl 0 +examplessinc 0 +would 0 +voic 0 +inflect 0 +paus 0 +toconvei 0 +group 0 +state 0 +renderingsub 0 +attribut 0 +audiost 0 +orthogon 0 +dimensionus 0 +mutual 0 +concept 0 +expon 0 +verbatim 0 +donald 0 +layoutoper 0 +compris 0 +symbol 0 +verydiffer 0 +defin 0 +monotonicchang 0 +notion 0 +vital 0 +school 0 +squar 0 +root 0 +choic 0 +trigonometr 0 +ident 0 +notat 0 +ambigu 0 +complet 0 +absenc 0 +parenthesi 0 +sever 0 +heurist 0 +construct 0 +correct 0 +tree 0 +forthes 0 +chosen 0 +reduc 0 +cognit 0 +load 0 +oppos 0 +seri 0 +rule 0 +asexpon 0 +wire 0 +isfulli 0 +customiz 0 +probabl 0 +innocu 0 +also 0 +mostdifficult 0 +imposs 0 +determin 0 +ofintegr 0 +applic 0 +theintegr 0 +oper 0 +brows 0 +piec 0 +shown 0 +trick 0 +experienc 0 +ofhuman 0 +error 0 +summat 0 +limit 0 +referenc 0 +equat 0 +meant 0 +illustr 0 +ofcross 0 +interact 0 +enabl 0 +give 0 +referenceableobject 0 +object 0 +latercross 0 +distanc 0 +good 0 +thati 0 +interest 0 +challeng 0 +exponenti 0 +followingdeepli 0 +emac 0 +full 0 +fledgedsymbol 0 +interfac 0 +directli 0 +justa 0 +well 0 +matrix 0 +dimension 0 +thematrix 0 +commenc 0 +left 0 +right 0 +aseach 0 +element 0 +ofcomput 0 +program 0 +heard 0 +took 0 +secondsto 0 +util 0 +featur 0 +spacenot 0 +human 0 +still 0 +changeth 0 +size 0 +shape 0 +head 0 +take 0 +soon 0 +even 0 +long 0 +forget 0 +begin 0 +thetim 0 +later 0 +techniquefor 0 +proper 0 +glori 0 +like 0 +upon 0 +request 0 +replac 0 +identifi 0 +renderingsconvei 0 +thesub 0 +separ 0 +perform 0 +orpostscript 0 +equival 0 +case 0 +lower 0 +constraint 0 +numer 0 +denomin 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..baba5282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,63 @@ +comput 1 +ravi 1 +scienc 1 +foundat 1 +self 1 +learn 1 +confer 1 +kumar 1 +cornel 1 +program 1 +check 1 +approxim 1 +ieee 1 +octob 1 +funda 1 +uumln 1 +ronitt 1 +rubinfeld 1 +test 1 +sivakumar 1 +lnc 1 +theori 1 +combinator 1 +parallel 1 +process 1 +depart 0 +univers 0 +ithaca 0 +polynomi 0 +function 0 +equat 0 +effici 0 +correct 0 +linear 0 +recurr 0 +without 0 +gener 0 +bottleneck 0 +softwar 0 +technolog 0 +theoret 0 +decemb 0 +bound 0 +width 0 +branch 0 +juli 0 +latin 0 +squar 0 +extens 0 +june 0 +alexand 0 +russel 0 +sundaram 0 +scalabl 0 +studi 0 +intern 0 +august 0 +jeyakumar 0 +muthukumarasami 0 +umakishor 0 +ramachandran 0 +gautam 0 +shah 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..4f7f7292 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,147 @@ +comput 2 +ramin 1 +zabih 1 +vision 1 +imag 1 +avail 1 +justin 1 +confer 1 +interest 1 +multimedia 1 +also 1 +greg 1 +pass 1 +miller 1 +base 1 +ieee 1 +novemb 1 +program 1 +current 1 +us 1 +undergradu 1 +content 1 +workshop 1 +third 1 +teach 1 +introduct 1 +cours 1 +page 1 +cvpr 1 +held 1 +home 0 +pageramin 0 +zabihassist 0 +professorrdz 0 +cornel 0 +researchmi 0 +research 0 +construct 0 +search 0 +engin 0 +method 0 +develop 0 +recent 0 +think 0 +econom 0 +impact 0 +freeli 0 +price 0 +inform 0 +essai 0 +subject 0 +appear 0 +phil 0 +agr 0 +electron 0 +newslett 0 +network 0 +observ 0 +march 0 +studentsi 0 +work 0 +student 0 +jing 0 +huang 0 +vera 0 +kettnak 0 +olga 0 +veksler 0 +spend 0 +fair 0 +amount 0 +time 0 +variou 0 +princip 0 +voskuhl 0 +includ 0 +scott 0 +cytacki 0 +szewczyk 0 +publicationsmost 0 +public 0 +postscript 0 +acrobat 0 +format 0 +free 0 +reader 0 +varieti 0 +differ 0 +architectur 0 +adob 0 +histogram 0 +refin 0 +retriev 0 +applic 0 +sarasota 0 +florida 0 +decemb 0 +compar 0 +color 0 +coher 0 +vector 0 +fourth 0 +boston 0 +massachusett 0 +featur 0 +algorithm 0 +detect 0 +classifi 0 +scene 0 +break 0 +kevin 0 +francisco 0 +california 0 +parametr 0 +local 0 +transform 0 +visual 0 +correspond 0 +john 0 +woodfil 0 +european 0 +stockholm 0 +sweden 0 +teachingi 0 +spring 0 +scribe 0 +note 0 +lectur 0 +taught 0 +profession 0 +activitiesi 0 +comitte 0 +pattern 0 +recognit 0 +juan 0 +june 0 +organ 0 +committe 0 +access 0 +video 0 +librari 0 +conjunct 0 +acknowledgementsthi 0 +design 0 +courtesi 0 +huttenlocherlast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..96f08d62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,4 @@ +roderick 0 +moten 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..477081cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,45 @@ +ronitt 1 +comput 1 +scienc 1 +rubinfeld 1 +cornel 1 +depart 1 +fall 1 +homepageronitt 0 +rubinfeldi 0 +assist 0 +professor 0 +recent 0 +paper 0 +talk 0 +cours 0 +random 0 +spring 0 +engin 0 +graduat 0 +student 0 +funda 0 +ergun 0 +ravi 0 +kumar 0 +fair 0 +homepag 0 +wasserman 0 +page 0 +describ 0 +work 0 +research 0 +area 0 +result 0 +check 0 +address 0 +rubinfeldcomput 0 +upson 0 +hallcornel 0 +universityithaca 0 +york 0 +telephon 0 +email 0 +edupictur 0 +nephew 0 +eitan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..f156c208 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,73 @@ +comput 1 +scienc 1 +depart 1 +birman 1 +distribut 1 +technolog 1 +friedman 1 +doctor 1 +associ 1 +cornel 1 +work 1 +system 1 +project 1 +thedepart 1 +thetechnion 1 +israel 1 +institut 1 +technic 1 +report 1 +cornellunivers 1 +implement 1 +friedmanroi 0 +friedmanpost 0 +universityroi 0 +edui 0 +post 0 +scienceatcornel 0 +univers 0 +withken 0 +androbbert 0 +rennessein 0 +area 0 +mainli 0 +thehoru 0 +receiv 0 +advisor 0 +washagit 0 +attiya 0 +thesi 0 +titl 0 +wasconsist 0 +condit 0 +share 0 +memori 0 +current 0 +also 0 +involv 0 +themilliped 0 +withassaf 0 +schuster 0 +recent 0 +papersr 0 +trade 0 +consist 0 +avail 0 +us 0 +group 0 +commun 0 +reliabl 0 +scalabledistribut 0 +coprocessor 0 +appear 0 +tina 0 +vaysburd 0 +replic 0 +state 0 +machin 0 +partition 0 +network 0 +full 0 +list 0 +public 0 +clickher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..27012ac8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,28 @@ +cornel 1 +daniela 1 +home 1 +page 1 +research 1 +comput 1 +scienc 1 +associ 0 +photograph 0 +address 0 +upson 0 +hall 0 +depart 0 +univers 0 +ithaca 0 +model 0 +simul 0 +recent 0 +paper 0 +version 0 +onlin 0 +tech 0 +report 0 +librari 0 +catalogc 0 +dept 0 +infodesign 0 +institut 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..6981436b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,92 @@ +system 2 +horu 2 +ithaca 1 +jazz 1 +dutch 1 +group 1 +commun 1 +research 1 +brand 1 +network 1 +design 1 +perform 1 +lightweight 1 +version 1 +protocol 1 +composit 1 +support 1 +robbert 0 +renesserobbert 0 +renessesenior 0 +associatecornel 0 +universityrvr 0 +cornel 0 +edui 0 +senior 0 +associ 0 +depart 0 +comput 0 +scienceatcornel 0 +universityinithaca 0 +work 0 +withken 0 +birmanin 0 +area 0 +distribut 0 +advisor 0 +wasandi 0 +tanenbaum 0 +interestsmi 0 +babi 0 +girl 0 +hous 0 +tacoma 0 +project 0 +caml 0 +applet 0 +nynetth 0 +ageless 0 +band 0 +swing 0 +danc 0 +guitar 0 +accordion 0 +sharewar 0 +stuffcornel 0 +club 0 +contain 0 +mani 0 +link 0 +netherland 0 +ithacaithacanet 0 +spinner 0 +market 0 +place 0 +paperssoftwar 0 +reliabl 0 +scientif 0 +american 0 +html 0 +framework 0 +incorpor 0 +resourc 0 +inform 0 +flow 0 +control 0 +strong 0 +weak 0 +virtual 0 +synchroni 0 +flexibl 0 +secur 0 +architectur 0 +fault 0 +toler 0 +complex 0 +multi 0 +media 0 +applic 0 +us 0 +oper 0 +mobil 0 +agent 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..12293dc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,100 @@ +sabel 1 +laura 1 +system 1 +comput 1 +cornel 1 +marzullo 1 +univers 1 +failur 1 +distribut 1 +page 1 +scienc 1 +detect 1 +detector 1 +keith 1 +technic 1 +report 1 +postscript 1 +copi 1 +click 1 +proceed 1 +inform 1 +research 1 +asynchron 1 +approxim 1 +perfect 1 +asynchronousdistribut 1 +version 1 +appear 1 +symposium 1 +octob 1 +jelli 1 +bingo 1 +profession 0 +doctor 0 +professorkeith 0 +california 0 +diego 0 +formor 0 +tushar 0 +chandra 0 +sfailur 0 +final 0 +finish 0 +thesi 0 +public 0 +elect 0 +consensu 0 +februari 0 +submit 0 +process 0 +letter 0 +annual 0 +principl 0 +distributedcomput 0 +august 0 +reliabl 0 +march 0 +revis 0 +june 0 +us 0 +consist 0 +subcut 0 +stabl 0 +properti 0 +intern 0 +workshop 0 +algorithm 0 +wdag 0 +publish 0 +springer 0 +verlag 0 +lecturenot 0 +seri 0 +expand 0 +horizon 0 +cow 0 +interest 0 +strawberri 0 +tart 0 +blow 0 +torch 0 +experi 0 +alpacanet 0 +electron 0 +gourmet 0 +guid 0 +thebobbi 0 +award 0 +especi 0 +spam 0 +belli 0 +bean 0 +free 0 +sampl 0 +answer 0 +survei 0 +zone 0 +canplai 0 +cash 0 +prize 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..7ecd0ca0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,168 @@ +system 2 +toueg 2 +distribut 2 +wait 2 +free 2 +object 2 +consensu 1 +comput 1 +solv 1 +failur 1 +fault 1 +algorithm 1 +toler 1 +asynchron 1 +process 1 +hierarchi 1 +journal 1 +research 1 +work 1 +jayanti 1 +failuredetector 1 +share 1 +explor 1 +implement 1 +level 1 +chandra 1 +proceed 1 +symposium 1 +interest 1 +messag 1 +pass 1 +memori 1 +result 1 +prasad 1 +scienc 1 +determin 1 +crash 1 +unreli 1 +detector 1 +correct 1 +weakest 1 +type 1 +clock 1 +synchron 1 +databas 1 +neiger 1 +principl 1 +august 1 +canada 1 +faculti 0 +professorph 0 +princeton 0 +univers 0 +interestsmi 0 +includ 0 +toleranceand 0 +real 0 +time 0 +methodolog 0 +paradigm 0 +forfault 0 +andshar 0 +long 0 +term 0 +goal 0 +bridg 0 +gapbetween 0 +theoret 0 +need 0 +effici 0 +practicalsolut 0 +collabor 0 +withtushar 0 +chandraand 0 +student 0 +continu 0 +onunreli 0 +fundament 0 +computingst 0 +problem 0 +cannot 0 +adeterminist 0 +impossibilityresult 0 +inher 0 +difficulti 0 +whether 0 +aprocess 0 +mere 0 +slow 0 +inour 0 +abl 0 +exactli 0 +much 0 +informationabout 0 +necessari 0 +suffici 0 +wefirst 0 +show 0 +canmak 0 +infinit 0 +number 0 +mistak 0 +systemswith 0 +major 0 +prove 0 +solveconsensu 0 +provid 0 +least 0 +muchinform 0 +thu 0 +amajor 0 +practicalityof 0 +applic 0 +reli 0 +theircorrect 0 +concurr 0 +consist 0 +commun 0 +sharedobject 0 +accessesthi 0 +guarante 0 +respons 0 +even 0 +otherprocess 0 +ofobject 0 +assign 0 +thatcorrespond 0 +abil 0 +particular 0 +shown 0 +well 0 +known 0 +herlihi 0 +robust 0 +inform 0 +anobject 0 +us 0 +atani 0 +question 0 +whetherrobust 0 +exist 0 +select 0 +public 0 +bracha 0 +broadcast 0 +protocol 0 +srikanth 0 +optim 0 +abbadi 0 +maintain 0 +avail 0 +partit 0 +replic 0 +transact 0 +automat 0 +increas 0 +montreal 0 +hadzilaco 0 +detectorfor 0 +vancouv 0 +ieee 0 +foundat 0 +octob 0 +pittsburgh 0 +pennsylvania 0 +simul 0 +common 0 +knowledg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..ed06753c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,100 @@ +weber 2 +compil 1 +samuel 1 +cornel 1 +univers 1 +program 1 +comput 1 +semant 1 +bloom 1 +silicon 1 +page 1 +assist 1 +design 1 +public 1 +confer 1 +brown 1 +act 1 +professor 1 +master 1 +engin 1 +scienc 1 +research 1 +softwar 1 +languag 1 +distribut 1 +system 1 +technic 1 +report 1 +submit 1 +verifi 1 +algebra 1 +thesi 1 +editor 1 +proceed 1 +messag 1 +complex 1 +byzantin 1 +agreement 1 +upson 0 +hallphon 0 +email 0 +educurr 0 +director 0 +interest 0 +specif 0 +verif 0 +cours 0 +technolog 0 +techniqu 0 +fall 0 +introduct 0 +spring 0 +metatheori 0 +calculu 0 +formal 0 +delai 0 +insensit 0 +circuit 0 +cornellunivers 0 +journal 0 +process 0 +meta 0 +theori 0 +practic 0 +august 0 +exercis 0 +appli 0 +structur 0 +oper 0 +workshop 0 +foundat 0 +applic 0 +bakker 0 +roever 0 +rozenberg 0 +lectur 0 +note 0 +springer 0 +verlag 0 +scheme 0 +knight 0 +savag 0 +advanc 0 +vlsi 0 +parallel 0 +amdur 0 +hadzilaco 0 +binari 0 +crash 0 +failur 0 +bound 0 +toronto 0 +septemb 0 +seshadri 0 +wortman 0 +small 0 +analysi 0 +concurr 0 +sigplan 0 +implement 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..4d637872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,81 @@ +comput 1 +project 1 +system 1 +graphic 1 +isi 1 +master 1 +engin 1 +cornel 1 +window 1 +interest 1 +object 1 +orbix 1 +sean 1 +landi 1 +current 1 +cours 1 +render 1 +draw 1 +base 1 +design 1 +orient 1 +work 1 +team 1 +basebal 1 +landissean 0 +sciencewelcom 0 +home 0 +page 0 +resum 0 +inform 0 +advanc 0 +databas 0 +systemsc 0 +past 0 +machin 0 +percept 0 +final 0 +analyz 0 +color 0 +book 0 +clickherefor 0 +postscript 0 +version 0 +sampl 0 +weanalyz 0 +topic 0 +content 0 +imag 0 +retriev 0 +interior 0 +educ 0 +program 0 +patternsprofession 0 +distribut 0 +divis 0 +stratu 0 +lead 0 +develop 0 +product 0 +combin 0 +acorba 0 +compliant 0 +request 0 +broker 0 +iona 0 +technolog 0 +releas 0 +person 0 +favorit 0 +alpin 0 +ski 0 +golf 0 +plai 0 +softbal 0 +card 0 +collectingi 0 +reach 0 +comeduc 0 +sheet 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..6b6d0141 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,62 @@ +comput 1 +system 1 +project 1 +seena 1 +scienc 1 +engin 1 +oper 1 +graphic 1 +univers 1 +cornel 1 +ithaca 1 +cherangara 0 +cherangaramast 0 +engineeringclass 0 +dept 0 +sciencecornel 0 +welcom 0 +homepagecurr 0 +student 0 +depart 0 +tech 0 +degre 0 +colleg 0 +trivandrum 0 +kerala 0 +india 0 +inform 0 +cours 0 +taken 0 +cornelluniversityfal 0 +practicum 0 +specif 0 +hoca 0 +softwar 0 +multimedia 0 +post 0 +processingalgorithm 0 +jpeg 0 +artifact 0 +reduct 0 +spring 0 +cspracticum 0 +anim 0 +magic 0 +carpet 0 +distribut 0 +colloqium 0 +summer 0 +model 0 +java 0 +parametr 0 +equat 0 +viewer 0 +click 0 +postscript 0 +version 0 +myresum 0 +mapl 0 +york 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..6c1635d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,238 @@ +imag 2 +rosen 2 +sharma 2 +video 2 +us 2 +like 1 +languag 1 +name 1 +internet 1 +file 1 +sharmila 1 +stanford 1 +multicast 1 +protocol 1 +develop 1 +system 1 +script 1 +widget 1 +call 1 +mean 1 +cornel 1 +student 1 +stream 1 +media 1 +part 1 +link 1 +softwar 1 +class 1 +also 1 +text 1 +vxtreme 1 +applic 1 +paper 1 +oper 1 +support 1 +nativ 1 +mode 1 +releas 1 +written 1 +indian 1 +stuff 1 +cool 1 +manipul 1 +data 1 +make 1 +implement 1 +give 1 +imagefram 1 +interpret 1 +shell 1 +gener 1 +record 1 +postscript 1 +write 1 +peopl 1 +claim 1 +hors 1 +offic 0 +upson 0 +hall 0 +email 0 +came 0 +spent 0 +coupl 0 +year 0 +work 0 +research 0 +live 0 +audio 0 +modifiedigmp 0 +unicast 0 +layer 0 +virtual 0 +classroom 0 +initi 0 +prototyp 0 +deploi 0 +spring 0 +fall 0 +quarter 0 +altern 0 +instruct 0 +televis 0 +network 0 +sitn 0 +program 0 +microwav 0 +remot 0 +site 0 +asynchron 0 +access 0 +lectur 0 +note 0 +stumbl 0 +upon 0 +fact 0 +slide 0 +portion 0 +enhanc 0 +greatli 0 +leadto 0 +format 0 +compani 0 +palo 0 +alto 0 +silicon 0 +vallei 0 +start 0 +client 0 +sever 0 +multimedia 0 +deliveri 0 +signal 0 +keshav 0 +sigcomm 0 +segment 0 +mix 0 +navin 0 +chaddha 0 +avneesh 0 +agarw 0 +anoop 0 +gupta 0 +asilomar 0 +igmp 0 +group 0 +membership 0 +design 0 +steve 0 +deer 0 +internetdraft 0 +bill 0 +fenner 0 +optic 0 +charact 0 +recognit 0 +statist 0 +structur 0 +method 0 +niten 0 +malhan 0 +bachelor 0 +thesi 0 +dept 0 +comput 0 +scienc 0 +institut 0 +technolog 0 +delhiunpublish 0 +character 0 +variabl 0 +rate 0 +sourc 0 +term 0 +preform 0 +conferenc 0 +intern 0 +report 0 +first 0 +type 0 +allow 0 +sequenc 0 +blur 0 +speckl 0 +transform 0 +affin 0 +subband 0 +motion 0 +estmat 0 +fast 0 +effici 0 +writen 0 +current 0 +test 0 +machin 0 +displai 0 +flavour 0 +look 0 +width 0 +height 0 +ifram 0 +nodisplai 0 +filenam 0 +putimageincanva 0 +predecessor 0 +hate 0 +motif 0 +yacc 0 +limit 0 +given 0 +compil 0 +dummi 0 +event 0 +snooper 0 +player 0 +consid 0 +coolest 0 +thing 0 +ever 0 +someth 0 +similar 0 +microsoft 0 +window 0 +doesnt 0 +replai 0 +lot 0 +kludg 0 +fool 0 +server 0 +fractal 0 +creat 0 +directori 0 +hole 0 +viewer 0 +fix 0 +dissalow 0 +semant 0 +question 0 +often 0 +ask 0 +gaveth 0 +chines 0 +friend 0 +wonder 0 +small 0 +smart 0 +hindi 0 +tongu 0 +sharm 0 +shyness 0 +actress 0 +tagor 0 +nicknam 0 +frozen 0 +stupid 0 +ealri 0 +jewish 0 +leader 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..41314615 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,114 @@ +page 1 +comput 1 +eric 1 +home 1 +univers 1 +cornel 1 +korean 1 +project 1 +music 1 +friend 1 +shim 1 +view 1 +version 1 +scienc 1 +meng 1 +acoust 1 +movi 1 +jazz 1 +kwan 1 +pagewelcom 0 +young 0 +sang 0 +shimmast 0 +engin 0 +sciencecornel 0 +address 0 +dryden 0 +citi 0 +ithaca 0 +phone 0 +click 0 +onthi 0 +receiv 0 +degre 0 +california 0 +irvinestudi 0 +inform 0 +system 0 +camera 0 +transform 0 +abstract 0 +final 0 +graphic 0 +classi 0 +love 0 +plai 0 +follow 0 +instrument 0 +guitar 0 +piano 0 +keyboard 0 +listen 0 +stan 0 +getz 0 +antonio 0 +carlo 0 +jobim 0 +john 0 +coltran 0 +mile 0 +davi 0 +earl 0 +klugh 0 +metheni 0 +archemi 0 +chopin 0 +watch 0 +cinema 0 +paradiso 0 +french 0 +kiss 0 +miser 0 +miss 0 +saigon 0 +favorit 0 +korea 0 +newswant 0 +know 0 +graduat 0 +student 0 +associ 0 +anybodi 0 +like 0 +check 0 +interest 0 +java 0 +cyberspac 0 +hana 0 +work 0 +melco 0 +last 0 +time 0 +went 0 +world 0 +jung 0 +hwan 0 +middl 0 +school 0 +back 0 +victor 0 +hong 0 +jiyang 0 +kang 0 +homepag 0 +access 0 +timessinc 0 +still 0 +construct 0 +resum 0 +avail 0 +near 0 +futur 0 +also 0 +soon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..a61a4e06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,169 @@ +text 2 +amit 2 +singhal 2 +gerard 2 +salton 2 +chri 2 +bucklei 2 +retriev 2 +automat 2 +smart 1 +us 1 +paper 1 +trec 1 +structur 1 +research 1 +inform 1 +group 1 +document 1 +cornel 1 +normal 1 +system 1 +confer 1 +mandar 1 +jame 1 +allan 1 +student 1 +home 1 +scienc 1 +process 1 +thesi 1 +prof 1 +gerardsalton 1 +current 1 +field 1 +length 1 +size 1 +lengthnorm 1 +propos 1 +pivot 1 +techniqu 1 +nist 1 +provid 1 +select 1 +mandarmitra 1 +decomposit 1 +mitra 1 +appear 1 +theme 1 +gener 1 +analysi 1 +pageamit 0 +singhaldepart 0 +comput 0 +universitysingh 0 +eduphon 0 +interest 0 +area 0 +andtext 0 +advisor 0 +late 0 +supervisor 0 +clairecardieher 0 +postscript 0 +copi 0 +resum 0 +depart 0 +beenon 0 +foremost 0 +informationretriev 0 +last 0 +thirti 0 +year 0 +involv 0 +fairli 0 +vari 0 +commonli 0 +term 0 +weight 0 +show 0 +thateffect 0 +chancessimilar 0 +likelihood 0 +relev 0 +modifi 0 +exist 0 +normalizationfunct 0 +yield 0 +substanti 0 +improv 0 +retrievaleffect 0 +also 0 +effect 0 +normalizationtechniqu 0 +trecparticipationtext 0 +arpa 0 +sponsoredeffort 0 +object 0 +evalu 0 +variou 0 +retrievaltechniqu 0 +independ 0 +testb 0 +hasconsist 0 +best 0 +somepap 0 +summarizationnon 0 +expositori 0 +usual 0 +read 0 +cover 0 +tocov 0 +reader 0 +help 0 +circumst 0 +selectiveaccess 0 +excerpt 0 +need 0 +develop 0 +toanalyz 0 +tool 0 +texttravers 0 +papersnorm 0 +documentlength 0 +mitraand 0 +degrad 0 +collect 0 +come 0 +soon 0 +approach 0 +usingsmart 0 +queri 0 +expans 0 +proceedingsof 0 +third 0 +special 0 +public 0 +segment 0 +textthem 0 +hypertext 0 +andmanag 0 +brows 0 +vectorspac 0 +model 0 +proceed 0 +ofth 0 +dual 0 +technolog 0 +applic 0 +travers 0 +summar 0 +machineread 0 +amitsingh 0 +june 0 +groupmemb 0 +senior 0 +associ 0 +david 0 +master 0 +engin 0 +other 0 +slowli 0 +fluctuat 0 +thank 0 +visit 0 +page 0 +visitor 0 +sinc 0 +iinstal 0 +counter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..14006ff3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,153 @@ +name 2 +cornel 1 +version 1 +work 1 +home 1 +build 1 +keshav 1 +skeshav 1 +depart 1 +comput 1 +scienc 1 +univers 1 +ithaca 1 +sinc 1 +network 1 +idlinet 1 +base 1 +avail 1 +nativ 1 +mode 1 +also 1 +site 1 +paper 1 +real 1 +simul 1 +peopl 1 +last 1 +keshavemail 0 +upson 0 +hall 0 +christoph 0 +lane 0 +edui 0 +current 0 +associ 0 +professor 0 +spentfiv 0 +year 0 +xunet 0 +wide 0 +area 0 +built 0 +scratch 0 +router 0 +switch 0 +softwar 0 +oper 0 +incollabor 0 +delhi 0 +equip 0 +fore 0 +systemsand 0 +zeitnet 0 +idlinetsourc 0 +code 0 +public 0 +domain 0 +page 0 +featur 0 +protocol 0 +stack 0 +applicationget 0 +write 0 +directli 0 +virtual 0 +circuit 0 +support 0 +independ 0 +signal 0 +span 0 +compliant 0 +final 0 +goof 0 +talk 0 +head 0 +facial 0 +anim 0 +snoop 0 +send 0 +video 0 +format 0 +mbone 0 +canb 0 +driven 0 +remot 0 +internet 0 +linkspapersher 0 +linkto 0 +postscript 0 +reali 0 +packet 0 +level 0 +still 0 +maintain 0 +instal 0 +idea 0 +mani 0 +actual 0 +link 0 +latest 0 +releas 0 +fall 0 +includ 0 +beout 0 +goe 0 +well 0 +native_mod 0 +pagemi 0 +namein 0 +part 0 +world 0 +come 0 +south 0 +india 0 +thanjavur 0 +district 0 +beprecis 0 +prefix 0 +father 0 +sonli 0 +sometim 0 +villag 0 +surnam 0 +thu 0 +myfath 0 +srinivasan 0 +unfortun 0 +round 0 +intoth 0 +squar 0 +hole 0 +custom 0 +first 0 +beconfus 0 +quotabl 0 +quot 0 +ought 0 +everi 0 +least 0 +hear 0 +littl 0 +song 0 +read 0 +good 0 +poem 0 +possibl 0 +speak 0 +reason 0 +word 0 +johann 0 +wolfgang 0 +goeth 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..24b5e10b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,23 @@ +cornel 1 +kenneth 1 +page 1 +engin 1 +depart 1 +home 0 +road 0 +success 0 +alwai 0 +construct 0 +meng 0 +electr 0 +sinc 0 +work 0 +prof 0 +zabih 0 +place 0 +student 0 +came 0 +univers 0 +wisconsin 0 +madison 0 +sunlab 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..9cdd6345 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,329 @@ +seem 2 +clear 1 +much 1 +sound 1 +start 1 +quit 1 +also 1 +like 1 +us 1 +work 1 +pictur 1 +around 1 +school 1 +well 1 +look 1 +talk 1 +wear 1 +ring 1 +visit 1 +friend 1 +last 1 +page 1 +come 1 +tree 1 +especi 1 +figur 1 +snow 1 +insid 1 +leav 1 +label 1 +sai 1 +small 1 +presum 1 +place 1 +glass 1 +book 1 +vagu 1 +topic 1 +read 1 +probabl 1 +somewhat 1 +hair 1 +link 1 +next 1 +obviou 1 +heha 1 +beaver 1 +finger 1 +left 1 +someon 1 +peopl 1 +time 1 +internet 1 +half 1 +year 1 +summer 1 +busi 1 +better 1 +perri 1 +world 1 +cornel 1 +step 0 +forest 0 +clearinglook 0 +realiz 0 +must 0 +walk 0 +thanyou 0 +plan 0 +wide 0 +varieti 0 +surround 0 +onal 0 +side 0 +theweath 0 +fairli 0 +overcast 0 +somehow 0 +ifit 0 +go 0 +rain 0 +perhap 0 +distanc 0 +larg 0 +mountain 0 +quiteclear 0 +question 0 +hear 0 +bird 0 +chirp 0 +near 0 +cours 0 +theymai 0 +respond 0 +louder 0 +nearbywaterfal 0 +gotta 0 +could 0 +follow 0 +want 0 +apath 0 +direct 0 +path 0 +asign 0 +hillschool 0 +sign 0 +hell 0 +wormhol 0 +connect 0 +nearbyhous 0 +traffic 0 +clearinginsid 0 +coupl 0 +structur 0 +shack 0 +door 0 +fall 0 +offand 0 +complet 0 +modern 0 +hous 0 +withno 0 +stone 0 +front 0 +pile 0 +score 0 +magazin 0 +random 0 +paper 0 +scatter 0 +throughout 0 +theclear 0 +rhyme 0 +reasonto 0 +glanc 0 +sortsof 0 +recent 0 +betteridea 0 +make 0 +person 0 +tick 0 +resum 0 +itseem 0 +corner 0 +importantth 0 +worri 0 +kind 0 +strang 0 +clearingh 0 +smile 0 +hello 0 +oftendescrib 0 +thin 0 +mother 0 +mostdistinct 0 +featur 0 +bright 0 +golden 0 +quitelong 0 +elfin 0 +seen 0 +peoplebefor 0 +warn 0 +paragraph 0 +written 0 +theresoon 0 +alwai 0 +thing 0 +startstel 0 +stori 0 +stop 0 +hum 0 +tune 0 +tell 0 +whynichola 0 +negropont 0 +moron 0 +ifyou 0 +never 0 +heard 0 +polit 0 +late 0 +twentieth 0 +centuri 0 +america 0 +thenh 0 +paus 0 +obscur 0 +theorem 0 +theoret 0 +comput 0 +scienc 0 +rather 0 +listen 0 +hetend 0 +appearanceinstead 0 +mostli 0 +color 0 +purpl 0 +dark 0 +turquois 0 +everyth 0 +either 0 +silk 0 +linen 0 +contrast 0 +nice 0 +gold 0 +imageof 0 +right 0 +point 0 +awai 0 +fromhim 0 +silver 0 +ocean 0 +wave 0 +pattern 0 +pewter 0 +pentacl 0 +neck 0 +andlook 0 +altogeth 0 +hippi 0 +asclass 0 +intellectu 0 +clearingdan 0 +occasion 0 +spent 0 +briani 0 +live 0 +anundergrad 0 +nowadai 0 +pointcast 0 +newsprovid 0 +ancamosoiu 0 +pronounc 0 +schwa 0 +best 0 +backwhen 0 +mani 0 +week 0 +twoand 0 +went 0 +europ 0 +togeth 0 +wegradu 0 +inner 0 +child 0 +shejust 0 +onewav 0 +blame 0 +dread 0 +name 0 +actual 0 +usedto 0 +object 0 +power 0 +daniel 0 +issomeon 0 +gotten 0 +know 0 +severalmonth 0 +think 0 +sometim 0 +novemb 0 +becamemuch 0 +switch 0 +eedepart 0 +wise 0 +multimediastud 0 +dept 0 +commun 0 +reason 0 +inth 0 +own 0 +friendof 0 +finlei 0 +notanymor 0 +instead 0 +brian 0 +steelcas 0 +isth 0 +largest 0 +manufactur 0 +offic 0 +furnitur 0 +dserver 0 +kinda 0 +cheesi 0 +haveth 0 +pyramid 0 +still 0 +neat 0 +thebuild 0 +ius 0 +sing 0 +japan 0 +programcal 0 +hire 0 +teach 0 +english 0 +high 0 +student 0 +ideal 0 +winter 0 +hani 0 +graduatedfrom 0 +june 0 +couldn 0 +happen 0 +neededto 0 +great 0 +even 0 +nick 0 +agood 0 +cuter 0 +thanth 0 +blurri 0 +would 0 +indic 0 +music 0 +maker 0 +dreamer 0 +ofdream 0 +aphex 0 +twindan 0 +brown 0 +snowman 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..39b6ac77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,25 @@ +lookin 1 +home 1 +page 1 +autobiographi 0 +upkeep 0 +lot 0 +inform 0 +ultra 0 +cool 0 +soon 0 +keep 0 +take 0 +long 0 +setuup 0 +doingajaymanishanujmom 0 +daddepart 0 +comput 0 +sciencesearch 0 +netentertain 0 +weeklycricket 0 +rate 0 +ashish 0 +soni 0 +sonia 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..2492b10c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,11 @@ +scott 0 +dawson 0 +padif 0 +us 0 +form 0 +capabl 0 +browser 0 +would 0 +better 0 +scottdawson 0 +shomebas 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..3429ba77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,25 @@ +cornel 1 +comput 1 +paul 0 +stodghil 0 +home 0 +pagepaul 0 +stodghillstodghil 0 +rhode 0 +hall 0 +affili 0 +depart 0 +scienc 0 +atcornel 0 +univers 0 +advanc 0 +research 0 +institut 0 +acri 0 +theori 0 +center 0 +bernoulli 0 +projectinterest 0 +ultim 0 +hockei 0 +scheme 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..13d71c37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,13 @@ +stoller 1 +home 1 +former 1 +page 1 +scott 0 +pagescott 0 +move 0 +http 0 +indiana 0 +hyplan 0 +htmllast 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..1d69c81e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,49 @@ +sugata 1 +cornel 1 +work 1 +mukhopadhyai 1 +home 1 +page 1 +system 1 +take 1 +high 1 +perform 1 +phone 1 +welcom 0 +graduat 0 +student 0 +depart 0 +ofcomput 0 +scienc 0 +univers 0 +multimedia 0 +prof 0 +brian 0 +smith 0 +marri 0 +wonder 0 +person 0 +earth 0 +ritu 0 +spring 0 +comput 0 +compil 0 +design 0 +architectur 0 +also 0 +advanc 0 +option 0 +price 0 +theori 0 +czar 0 +progress 0 +seminar 0 +previou 0 +semest 0 +contact 0 +mehom 0 +mailsugata 0 +eduaddress 0 +hichori 0 +estat 0 +owego 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..a92487ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,28 @@ +sukhpal 1 +paul 1 +sanghera 1 +univers 1 +cornel 1 +home 0 +page 0 +physic 0 +carleton 0 +present 0 +student 0 +comput 0 +scienc 0 +ithaca 0 +background 0 +project 0 +philosophi 0 +life 0 +resum 0 +rout 0 +clock 0 +tick 0 +need 0 +java 0 +capabl 0 +browser 0 +view 0 +anim 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..bb5c2fae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,132 @@ +gater 1 +episod 1 +lord 1 +strip 1 +employe 1 +first 1 +microsoft 1 +parti 1 +part 1 +bilth 1 +galact 1 +weekli 1 +drew 1 +read 1 +work 1 +empir 0 +empirewritten 0 +illustr 0 +sumedh 0 +kanetkaremail 0 +kanetkar 0 +cornel 0 +eduthi 0 +seri 0 +comic 0 +intern 0 +atmicrosoft 0 +summer 0 +post 0 +theintern 0 +social 0 +alia 0 +regularli 0 +peopl 0 +thesumm 0 +progress 0 +notic 0 +artwork 0 +begun 0 +leak 0 +theful 0 +time 0 +well 0 +whether 0 +high 0 +never 0 +found 0 +stripi 0 +within 0 +week 0 +arriv 0 +redmond 0 +tri 0 +persuad 0 +themicrosoft 0 +newslett 0 +print 0 +perceiv 0 +problemand 0 +declin 0 +didn 0 +want 0 +portrai 0 +evilempir 0 +understand 0 +viewpoint 0 +told 0 +comicstrip 0 +attempt 0 +show 0 +compani 0 +view 0 +mani 0 +theoutsid 0 +world 0 +person 0 +bitter 0 +feel 0 +toward 0 +eitherbil 0 +gate 0 +corpor 0 +heck 0 +realli 0 +enjoi 0 +summersof 0 +strongli 0 +recommend 0 +internship 0 +program 0 +anyoneinterest 0 +industri 0 +make 0 +orient 0 +thateveri 0 +suffer 0 +long 0 +session 0 +theyshow 0 +video 0 +fill 0 +kind 0 +trivia 0 +also 0 +makey 0 +sign 0 +disclosur 0 +agreeement 0 +would 0 +fit 0 +theymad 0 +everyon 0 +stand 0 +place 0 +hand 0 +theirheart 0 +pledg 0 +alleig 0 +comput 0 +everydesk 0 +everi 0 +home 0 +run 0 +softwar 0 +anywai 0 +space 0 +roosterepisod 0 +rebel 0 +threatepisod 0 +flame 0 +imperi 0 +insigniaepisod 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..61bb9d22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,167 @@ +document 2 +structur 1 +electron 1 +logic 1 +summer 1 +cornel 1 +inform 1 +brows 1 +piec 1 +issu 1 +flexibl 1 +comput 1 +proceed 1 +kristen 1 +student 1 +univers 1 +research 1 +interest 1 +work 1 +goal 1 +support 1 +index 1 +primari 1 +technic 1 +report 1 +divid 1 +type 1 +retriev 1 +number 1 +paper 1 +us 1 +scienc 1 +autom 1 +upson 0 +hall 0 +captur 0 +accessresearch 0 +group 0 +analysi 0 +mylong 0 +term 0 +provid 0 +forsophist 0 +manipulationtool 0 +link 0 +discov 0 +logicalstructur 0 +arbitrari 0 +take 0 +documentrepresent 0 +input 0 +return 0 +hierarchyof 0 +output 0 +exampl 0 +given 0 +scan 0 +postscriptvers 0 +would 0 +like 0 +tobe 0 +abl 0 +section 0 +paragraph 0 +similarli 0 +busi 0 +letter 0 +address 0 +head 0 +bodi 0 +close 0 +identifi 0 +problem 0 +compon 0 +segment 0 +andclassif 0 +categor 0 +also 0 +rais 0 +question 0 +evalu 0 +previou 0 +differ 0 +descript 0 +correct 0 +hierarchi 0 +theoret 0 +limit 0 +task 0 +relev 0 +bruce 0 +croft 0 +stop 0 +novemb 0 +magazin 0 +interfac 0 +effici 0 +determininglog 0 +enabl 0 +hierarch 0 +soin 0 +gener 0 +system 0 +handl 0 +ofmultipl 0 +textual 0 +cue 0 +browsingco 0 +author 0 +daniela 0 +digit 0 +librari 0 +current 0 +nabil 0 +adam 0 +bharat 0 +bhargava 0 +yelena 0 +yesha 0 +editor 0 +chapter 0 +lectur 0 +note 0 +seri 0 +springer 0 +verlag 0 +version 0 +geometr 0 +algorithm 0 +experi 0 +mathemat 0 +model 0 +forthcom 0 +white 0 +space 0 +workshop 0 +principl 0 +process 0 +seeheim 0 +podp 0 +toward 0 +taxonomi 0 +structureselectron 0 +publish 0 +superhighwai 0 +dartmouth 0 +institut 0 +advanc 0 +graduat 0 +studi 0 +boston 0 +donald 0 +johnson 0 +memori 0 +dag 0 +scholaraward 0 +best 0 +recipi 0 +near 0 +wordless 0 +classif 0 +intern 0 +confer 0 +analysisand 0 +recognit 0 +montral 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..cc557cb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,55 @@ +system 1 +project 1 +oper 1 +comput 1 +masafumi 1 +java 1 +research 1 +engin 1 +spring 1 +introduct 1 +graphic 1 +databas 1 +suzukither 0 +would 0 +applet 0 +browser 0 +suppot 0 +suzukisuzuki 0 +cornel 0 +educlassesfal 0 +case 0 +studi 0 +optim 0 +probabl 0 +statist 0 +design 0 +analysi 0 +simul 0 +stochast 0 +model 0 +summer 0 +data 0 +structur 0 +fall 0 +softwar 0 +technolog 0 +techniqu 0 +program 0 +multimedia 0 +report 0 +network 0 +telecommun 0 +polici 0 +thrive 0 +inform 0 +revolut 0 +sector 0 +site 0 +manag 0 +independ 0 +polygon 0 +displai 0 +us 0 +prototyp 0 +resum 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..8cbedcc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,28 @@ +swartz 1 +jonathan 1 +cornel 1 +home 0 +page 0 +edui 0 +student 0 +departmentof 0 +comput 0 +scienc 0 +univers 0 +spend 0 +time 0 +heredevelopingrivl 0 +languag 0 +multimedia 0 +process 0 +myaddress 0 +phone 0 +number 0 +littl 0 +humor 0 +brighten 0 +dayjon 0 +movi 0 +connectioncool 0 +siteslast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..5893725c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,23 @@ +sunil 1 +home 1 +page 1 +class 1 +srivastava 0 +pagewelcom 0 +srivastavamast 0 +engin 0 +studentcomput 0 +scienc 0 +departmentcornel 0 +univers 0 +academ 0 +project 0 +person 0 +inform 0 +us 0 +linkscom 0 +question 0 +send 0 +mail 0 +sxsriva 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..cbc4c7f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,218 @@ +year 1 +model 1 +quit 1 +cours 1 +program 1 +huang 1 +steven 1 +seven 1 +home 1 +thu 1 +love 1 +read 1 +life 1 +comput 1 +univers 1 +like 1 +career 1 +found 1 +ofcomput 1 +want 1 +mani 1 +network 1 +build 1 +watch 1 +also 1 +project 1 +involv 1 +huangszu 0 +defend 0 +truth 0 +champion 0 +justic 0 +around 0 +nice 0 +nevermind 0 +long 0 +exactli 0 +iarriv 0 +second 0 +proud 0 +parent 0 +soundslik 0 +mobi 0 +dick 0 +assur 0 +nointent 0 +find 0 +ship 0 +hunt 0 +whale 0 +digress 0 +brought 0 +taiwan 0 +tender 0 +wholefamili 0 +migrat 0 +south 0 +tropic 0 +island 0 +philippin 0 +made 0 +live 0 +fifteen 0 +pictur 0 +aroundsix 0 +becam 0 +fluentli 0 +bilingu 0 +thepoetri 0 +tang 0 +dynasti 0 +arabian 0 +night 0 +natur 0 +children 0 +version 0 +host 0 +stori 0 +somewhat 0 +fulfil 0 +name 0 +mean 0 +literatur 0 +class 0 +grade 0 +doveright 0 +marvel 0 +four 0 +later 0 +ienter 0 +philippineswith 0 +major 0 +talent 0 +draw 0 +scienc 0 +unabashedli 0 +knew 0 +good 0 +alsoin 0 +colleg 0 +whirlwind 0 +happi 0 +peac 0 +three 0 +run 0 +awoman 0 +becom 0 +import 0 +part 0 +effortlessli 0 +defeat 0 +hobbi 0 +eek 0 +ideal 0 +higher 0 +pai 0 +blunt 0 +ienrol 0 +cornel 0 +graduat 0 +reward 0 +almost 0 +everyth 0 +ever 0 +andwork 0 +lucki 0 +septemb 0 +welcom 0 +page 0 +segreg 0 +everydaygeek 0 +think 0 +interest 0 +asid 0 +fromactu 0 +write 0 +happili 0 +myspar 0 +time 0 +anyth 0 +calvin 0 +hobb 0 +unix 0 +internet 0 +relai 0 +chat 0 +gener 0 +linuxnet 0 +lego 0 +thing 0 +practic 0 +object 0 +orient 0 +design 0 +plastic 0 +weapon 0 +suspens 0 +thriller 0 +film 0 +listen 0 +music 0 +sting 0 +other 0 +sesam 0 +street 0 +discoveri 0 +channel 0 +railroad 0 +rich 0 +enough 0 +field 0 +rather 0 +broad 0 +spectrum 0 +ofinterest 0 +though 0 +studi 0 +concentr 0 +area 0 +graphic 0 +wish 0 +offend 0 +bysom 0 +blatant 0 +self 0 +promot 0 +check 0 +myresum 0 +highlyinterest 0 +linux 0 +freeli 0 +avail 0 +oper 0 +system 0 +intel 0 +compatiblecomput 0 +master 0 +engin 0 +blobbi 0 +metaballsund 0 +supervis 0 +bruce 0 +land 0 +current 0 +anoth 0 +survei 0 +techniquesin 0 +human 0 +face 0 +resolut 0 +independ 0 +andport 0 +audio 0 +effect 0 +editor 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..c2165f07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,6 @@ +henzing 0 +hytechhytech 0 +hybrid 0 +technolog 0 +toolw 0 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..19501cf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,55 @@ +time 1 +analysi 1 +formal 1 +methodolog 1 +real 1 +system 1 +cornel 1 +concurr 1 +embed 1 +automata 1 +hybrid 1 +henzingerthoma 0 +henzing 0 +movedassist 0 +professorcomput 0 +scienc 0 +departmentcornel 0 +universityithaca 0 +email 0 +eduphon 0 +researchform 0 +support 0 +develop 0 +relat 0 +researchat 0 +cornelland 0 +worldwid 0 +resumepublicationsreact 0 +modul 0 +systemsr 0 +logic 0 +transit 0 +systemsclock 0 +systemshybrid 0 +systemsbibliographi 0 +bibtex 0 +list 0 +publicationstoolshytech 0 +symbol 0 +model 0 +checker 0 +linear 0 +systemscoursesc 0 +fall 0 +advanc 0 +program 0 +languagesconferenceshybrid 0 +verif 0 +control 0 +systemscav 0 +comput 0 +aid 0 +verificationlast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..880d66d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,38 @@ +takako 1 +hickei 1 +interest 1 +homepag 0 +email 0 +cornel 0 +eduoffic 0 +upson 0 +hallphon 0 +student 0 +depart 0 +comput 0 +scienc 0 +atcornel 0 +univers 0 +advis 0 +byrobbert 0 +reness 0 +andfr 0 +schneider 0 +research 0 +distribut 0 +system 0 +program 0 +environ 0 +resourc 0 +manag 0 +horu 0 +project 0 +previou 0 +life 0 +social 0 +psycholog 0 +backcountri 0 +hockei 0 +quot 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..718fac84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,23 @@ +comput 1 +cornel 1 +program 1 +tim_teitelbaum 0 +teitelbaumassoci 0 +professor 0 +depart 0 +scienc 0 +univers 0 +eduresearch 0 +interest 0 +increment 0 +transform 0 +environ 0 +languag 0 +base 0 +editor 0 +compil 0 +attribut 0 +grammar 0 +adavita 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..2250d039 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,207 @@ +eicken 2 +parallel 2 +comput 2 +culler 2 +architectur 2 +schauser 2 +activ 1 +messag 1 +program 1 +goldstein 1 +proceed 1 +commun 1 +fall 1 +symp 1 +compil 1 +proc 1 +thorsten 1 +user 1 +level 1 +network 1 +high 1 +split 1 +system 1 +talk 1 +report 1 +annual 1 +paper 1 +mechan 1 +machin 1 +fine 1 +grain 1 +languag 1 +cluster 1 +workstat 1 +us 1 +interconnect 1 +sever 1 +port 1 +departement 1 +entri 1 +forum 1 +slide 1 +basu 1 +buch 1 +appear 1 +june 1 +version 1 +gold 1 +coast 1 +australia 1 +novemb 1 +berkelei 1 +abstract 1 +diego 1 +conf 1 +control 1 +multithread 1 +support 1 +eickenassist 0 +professor 0 +upson 0 +hallphon 0 +email 0 +cornel 0 +eduprojectsth 0 +architectureprovid 0 +interfacefor 0 +offer 0 +latencyand 0 +bandwidth 0 +speed 0 +lan 0 +currentimplement 0 +project 0 +platform 0 +includingth 0 +extend 0 +model 0 +tonon 0 +spmd 0 +simpl 0 +extensionto 0 +newplatform 0 +includ 0 +share 0 +memori 0 +multprocessor 0 +run 0 +coursesc 0 +introduct 0 +digit 0 +computerorgan 0 +perform 0 +spring 0 +frontier 0 +guest 0 +lectur 0 +maynd 0 +department 0 +person 0 +pagestv 0 +pond 0 +real 0 +water 0 +fish 0 +plant 0 +tire 0 +firewal 0 +macpppwhich 0 +gener 0 +time 0 +password 0 +automat 0 +without 0 +everhav 0 +think 0 +well 0 +month 0 +passwordssuddenli 0 +installationinstruct 0 +select 0 +publicationsu 0 +interfac 0 +distributedcomput 0 +anindya 0 +vineet 0 +werner 0 +vogel 0 +latenc 0 +atmnetwork 0 +avula 0 +present 0 +palo 0 +alto 0 +abridg 0 +ieee 0 +micro 0 +magazin 0 +integr 0 +andcomput 0 +effici 0 +communicationarchitectur 0 +multiprocessor 0 +thesi 0 +univers 0 +california 0 +publish 0 +link 0 +lead 0 +postscript 0 +dusseau 0 +krishnamurthi 0 +lumetta 0 +yelick 0 +supercomput 0 +controlledthread 0 +journal 0 +distribut 0 +special 0 +issu 0 +dataflow 0 +evalu 0 +spertu 0 +dalli 0 +logp 0 +toward 0 +realist 0 +modelof 0 +karp 0 +patterson 0 +sahai 0 +santo 0 +subramonian 0 +fourth 0 +sigplan 0 +principl 0 +practic 0 +fundament 0 +limit 0 +dataflowmultiprocess 0 +ifip 0 +work 0 +techniqu 0 +medium 0 +orlando 0 +forintegr 0 +symposium 0 +forleni 0 +confer 0 +function 0 +cambridg 0 +august 0 +minimalhardwar 0 +thread 0 +wawrzynek 0 +oper 0 +santa 0 +clara 0 +april 0 +analysi 0 +architecturesfor 0 +saavedra 0 +barrera 0 +algorithm 0 +crete 0 +greec 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..cb620386 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,43 @@ +erlingsson 1 +lfar 1 +pagelfar 0 +specificationi 0 +student 0 +incomput 0 +scienc 0 +cornel 0 +univers 0 +apart 0 +enjoi 0 +somewhat 0 +incongruousiceland 0 +link 0 +inform 0 +implementationbackgroundwher 0 +come 0 +current 0 +activitieswhat 0 +moment 0 +schedulewher 0 +time 0 +researchwhat 0 +real 0 +work 0 +done 0 +interestswhat 0 +actual 0 +like 0 +acquaintancesthos 0 +know 0 +contact 0 +infohow 0 +touch 0 +pleas 0 +note 0 +page 0 +often 0 +date 0 +gener 0 +assum 0 +disclaim 0 +appli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..d7309ec1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,49 @@ +gener 1 +mesh 1 +softwar 1 +packag 1 +element 1 +releas 1 +page 1 +vavasi 1 +finit 1 +geometr 1 +home 1 +univers 1 +cornel 1 +project 0 +relat 0 +threedimens 0 +includ 0 +model 0 +themesh 0 +solver 0 +free 0 +softwaredownload 0 +run 0 +unix 0 +window 0 +andqmg 0 +novemb 0 +us 0 +websit 0 +robert 0 +schneider 0 +mcphedran 0 +offinit 0 +resourc 0 +minnesota 0 +geometri 0 +center 0 +list 0 +ofsoftwar 0 +computationalgeometri 0 +jonathan 0 +shewchuk 0 +triangl 0 +back 0 +stephen 0 +comput 0 +scienc 0 +depart 0 +ithaca 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..19490b21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,147 @@ +vavasi 2 +mesh 1 +code 1 +numer 1 +click 1 +gener 1 +method 1 +boundari 1 +algorithm 1 +stephen 1 +cornel 1 +email 1 +phone 1 +argonn 1 +interest 1 +analysi 1 +problem 1 +recent 1 +avail 1 +line 1 +complet 1 +mitchel 1 +ratio 1 +triangul 1 +softwar 1 +packag 1 +sourc 1 +level 1 +anonym 1 +matlab 1 +well 1 +page 1 +associ 0 +professor 0 +depart 0 +comput 0 +scienc 0 +rhode 0 +hall 0 +univers 0 +ithaca 0 +period 0 +onsabbat 0 +divis 0 +bldg 0 +nation 0 +laboratori 0 +cass 0 +note 0 +chang 0 +area 0 +effect 0 +research 0 +aren 0 +tsure 0 +pleas 0 +essaybi 0 +colleagu 0 +trefethen 0 +specif 0 +optim 0 +complex 0 +issuesnumer 0 +valu 0 +problemsgeometr 0 +aris 0 +scientif 0 +computingspars 0 +matrix 0 +computationsi 0 +manuscript 0 +primal 0 +dual 0 +acceler 0 +interiorpoint 0 +whose 0 +run 0 +time 0 +depend 0 +hough 0 +orthogon 0 +decompositionfor 0 +weight 0 +least 0 +squar 0 +aspect 0 +bound 0 +gridcut 0 +hyperplan 0 +driscol 0 +conform 0 +map 0 +us 0 +cross 0 +delaunai 0 +packagei 0 +project 0 +forth 0 +finit 0 +element 0 +three 0 +dimens 0 +call 0 +construct 0 +polyhedr 0 +geometr 0 +object 0 +verycompl 0 +topolog 0 +hole 0 +intern 0 +andautomat 0 +creat 0 +unstructuredtetrahedr 0 +base 0 +work 0 +scott 0 +also 0 +solv 0 +ellipt 0 +boundaryvalu 0 +grad 0 +domain 0 +iswritten 0 +distributedfor 0 +free 0 +distributionbegan 0 +releas 0 +novemb 0 +featur 0 +manyimprov 0 +includ 0 +faster 0 +vrml 0 +graphic 0 +much 0 +cleaner 0 +compat 0 +microsoft 0 +window 0 +unix 0 +compatibilitywith 0 +pleasese 0 +document 0 +annual 0 +reportback 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..0d88f92e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,10 @@ +arun 0 +verma 0 +homepag 0 +need 0 +browser 0 +support 0 +frame 0 +netscap 0 +higher 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..23482414 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,10 @@ +vinc 0 +browser 0 +us 0 +suck 0 +download 0 +date 0 +netscap 0 +read 0 +page 0 +thank 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..02d0b799 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,15 @@ +vitrano 1 +pagec 1 +home 0 +pagehei 0 +start 0 +thing 0 +give 0 +break 0 +internet 0 +engin 0 +advanc 0 +databas 0 +multimedia 0 +pageer 0 +cornel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..f6051a28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,101 @@ +seed 1 +page 1 +time 1 +browser 1 +home 1 +us 1 +java 1 +kolla 1 +specif 1 +scrollit 1 +reach 1 +view 1 +wonder 1 +dont 1 +know 1 +anim 1 +ground 0 +copyright 0 +right 0 +reserv 0 +held 0 +respons 0 +unwant 0 +effect 0 +usag 0 +applet 0 +deriv 0 +warrante 0 +usabl 0 +applic 0 +given 0 +impli 0 +function 0 +vivek 0 +million 0 +zillion 0 +call 0 +send 0 +mail 0 +cornel 0 +expect 0 +back 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +visitor 0 +number 0 +happi 0 +contact 0 +info 0 +transmit 0 +thoughtsfriend 0 +foeslinksa 0 +small 0 +collect 0 +relev 0 +life 0 +maintain 0 +larg 0 +list 0 +favorit 0 +link 0 +think 0 +wast 0 +search 0 +someth 0 +might 0 +tryalta 0 +vista 0 +yahoo 0 +theinktomiresumein 0 +htmlin 0 +postscriptin 0 +word 0 +perfectin 0 +asciith 0 +current 0 +good 0 +clock 0 +wanna 0 +around 0 +world 0 +need 0 +capabl 0 +site 0 +construct 0 +mani 0 +imag 0 +heavi 0 +file 0 +like 0 +note 0 +promis 0 +made 0 +regard 0 +qualiti 0 +visit 0 +fulli 0 +support 0 +technolog 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..9de32f41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,81 @@ +work 1 +lawyer 1 +professor 1 +difficulti 1 +like 1 +said 1 +friend 1 +find 1 +everi 1 +number 1 +vlad 0 +home 0 +pagevladimir 0 +kotlyarvladimir 0 +cornel 0 +look 0 +fall 0 +david 0 +wereteach 0 +might 0 +guess 0 +graduat 0 +student 0 +depart 0 +comput 0 +scienc 0 +cornellunivers 0 +prof 0 +keshav 0 +pingali 0 +research 0 +interest 0 +compil 0 +high 0 +perform 0 +architectur 0 +particular 0 +parallel 0 +spars 0 +matrix 0 +code 0 +part 0 +bernoulli 0 +project 0 +member 0 +group 0 +paul 0 +stodghil 0 +andindu 0 +kodukulapubl 0 +henri 0 +kissing 0 +legal 0 +profess 0 +remind 0 +comment 0 +abritish 0 +judg 0 +differ 0 +sveri 0 +simpl 0 +lord 0 +den 0 +function 0 +asolut 0 +present 0 +wherea 0 +functionof 0 +solut 0 +todayth 0 +seem 0 +outpac 0 +ofsolut 0 +either 0 +hardenough 0 +mani 0 +govern 0 +support 0 +privaci 0 +strong 0 +encrypt 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..702bdde2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,17 @@ +cornel 1 +address 1 +ithaca 1 +vijai 0 +menonvijai 0 +menon 0 +graduat 0 +student 0 +offic 0 +home 0 +rhode 0 +hall 0 +mapl 0 +univers 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..529c28da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,49 @@ +depart 1 +chen 1 +comput 1 +cornel 1 +univers 1 +interest 1 +spare 1 +time 1 +home 0 +pagewei 0 +upson 0 +hall 0 +sciencecornel 0 +universityithaca 0 +weichen 0 +current 0 +third 0 +year 0 +student 0 +computersci 0 +receiv 0 +bachelorand 0 +master 0 +degre 0 +scienc 0 +tsinghua 0 +beij 0 +chinami 0 +academ 0 +distributedsystem 0 +fault 0 +toler 0 +algorithm 0 +work 0 +professor 0 +toueg 0 +failur 0 +detect 0 +group 0 +membership 0 +inpartition 0 +network 0 +system 0 +soccer 0 +resum 0 +bookmark 0 +last 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..821f647b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,21 @@ +page 1 +weitsang 0 +homepag 0 +lwhere 0 +fromwhat 0 +watchwhat 0 +movi 0 +likec 0 +wrotepictur 0 +drawa 0 +window 0 +motifcomput 0 +theoryhom 0 +vimi 0 +tsearch 0 +webcoolest 0 +sitessharewar 0 +archivem 0 +newspap 0 +onlineunivers 0 +site 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..89cf1590 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,82 @@ +comput 1 +scienc 1 +hung 1 +glavin 1 +graduat 1 +univers 1 +cornel 1 +tenni 1 +favorit 1 +system 1 +multimedia 1 +project 1 +address 0 +mapl 0 +avenu 0 +ithaca 0 +telephon 0 +photo 0 +academ 0 +background 0 +nation 0 +taiwan 0 +plan 0 +habit 0 +sport 0 +basketbal 0 +billiard 0 +tabl 0 +bowl 0 +swim 0 +volleybal 0 +other 0 +sing 0 +drive 0 +danc 0 +except 0 +studi 0 +team 0 +orlando 0 +magic 0 +atlanta 0 +brave 0 +player 0 +anferne 0 +hardawai 0 +technic 0 +skill 0 +understand 0 +distribut 0 +oper 0 +graphic 0 +network 0 +databas 0 +vision 0 +financi 0 +calcul 0 +extens 0 +window 0 +java 0 +program 0 +final 0 +paper 0 +warp 0 +morph 0 +rivl 0 +partial 0 +result 0 +meng 0 +webpaint 0 +job 0 +interest 0 +market 0 +relat 0 +field 0 +softwar 0 +develop 0 +resum 0 +page 0 +still 0 +construct 0 +email 0 +whkao 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..3dc19ef4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,150 @@ +comput 1 +research 1 +system 1 +florida 1 +school 1 +optim 1 +parallel 1 +mpeg 1 +encod 1 +concerto 1 +william 1 +cornel 1 +univers 1 +project 1 +engin 1 +cornellopoli 1 +network 1 +databas 1 +tenni 1 +south 1 +spring 1 +collect 1 +piano 1 +probabl 1 +classesc 1 +softwar 1 +distribut 1 +visitor 0 +jersei 0 +exit 0 +minut 0 +awai 0 +princeton 0 +master 0 +student 0 +scienceat 0 +degre 0 +engineeringand 0 +mathemat 0 +sciencefrom 0 +carneig 0 +mellon 0 +didresearch 0 +design 0 +center 0 +robot 0 +institu 0 +spent 0 +year 0 +write 0 +oper 0 +xsro 0 +hpux 0 +motorola 0 +atft 0 +lauderdal 0 +besid 0 +sleep 0 +work 0 +like 0 +sector 0 +analysi 0 +partner 0 +compet 0 +usta 0 +tournment 0 +could 0 +never 0 +somehow 0 +parti 0 +enjoi 0 +weekli 0 +match 0 +mani 0 +beauti 0 +place 0 +plai 0 +faviorit 0 +on 0 +boca 0 +ratonkei 0 +west 0 +get 0 +coral 0 +live 0 +beethoven 0 +chopin 0 +gershwin 0 +liszt 0 +mendelssohn 0 +mozart 0 +rachmaninoff 0 +ravel 0 +tchaikovski 0 +also 0 +violinconcerto 0 +guess 0 +type 0 +even 0 +wrote 0 +graduat 0 +applic 0 +essai 0 +base 0 +reason 0 +reject 0 +fall 0 +technolog 0 +techniquec 0 +formal 0 +methodsc 0 +multimedia 0 +systemsc 0 +scienc 0 +colloquiumc 0 +cool 0 +tool 0 +seminar 0 +purifi 0 +quantifi 0 +wart 0 +present 0 +practic 0 +computingc 0 +practicum 0 +high 0 +perform 0 +computerc 0 +thrive 0 +inform 0 +revolut 0 +sectorcool 0 +links_leap 0 +copi 0 +frogski 0 +serverident 0 +crisi 0 +testweath 0 +undergroundinktomi 0 +search 0 +enginequest 0 +week 0 +archiveslast 0 +updat 0 +campu 0 +address 0 +mapl 0 +ecithaca 0 +york 0 +wwlee 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..a576a4d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,54 @@ +comput 1 +xichun 1 +zhejiang 1 +welcom 1 +jennif 1 +home 1 +depart 1 +current 1 +master 1 +scienc 1 +univers 1 +shade 1 +cours 1 +network 1 +page 0 +upson 0 +hall 0 +sciencecornel 0 +universityithaca 0 +offic 0 +cornel 0 +edui 0 +engin 0 +student 0 +atcornel 0 +receiv 0 +bachelor 0 +degre 0 +hangzhou 0 +china 0 +site 0 +javaworldsunhigh 0 +school 0 +alumni 0 +alumnimeng 0 +project 0 +phong 0 +gouraud 0 +spring 0 +graphic 0 +high 0 +capac 0 +inform 0 +databas 0 +manag 0 +taken 0 +fall 0 +multimedia 0 +systemsc 0 +softwar 0 +engineeringc 0 +oper 0 +systeme 0 +communicationby 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..80cbcc62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,2 @@ +topic 0 +interest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..45940b82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,80 @@ +program 2 +increment 2 +transform 1 +comput 1 +base 1 +cachet 1 +effici 1 +improv 1 +analysi 1 +teitelbaum 1 +proceed 1 +systemat 1 +attribut 1 +interact 1 +system 1 +languag 1 +deriv 1 +sigplan 1 +symposium 1 +principl 1 +anni 1 +relat 0 +project 0 +computationderiv 0 +programsa 0 +gener 0 +approach 0 +themeprogram 0 +usessystemat 0 +techniqu 0 +deriveincrement 0 +written 0 +function 0 +select 0 +public 0 +scienc 0 +februari 0 +cach 0 +intermedi 0 +result 0 +partial 0 +evalu 0 +semant 0 +manipul 0 +page 0 +jolla 0 +california 0 +june 0 +stoller 0 +discov 0 +auxiliari 0 +inform 0 +annual 0 +sigact 0 +petersburg 0 +beach 0 +florida 0 +januari 0 +knowledg 0 +softwar 0 +engin 0 +confer 0 +boston 0 +massachusett 0 +novemb 0 +ieee 0 +societi 0 +press 0 +strength 0 +reduct 0 +juli 0 +peoplei 0 +liutim 0 +teitelbaumkeyword 0 +optim 0 +cacheti 0 +yanhong 0 +cornel 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..7f2471ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,191 @@ +program 3 +comput 3 +increment 3 +deriv 2 +scienc 2 +univers 2 +report 2 +teitelbaum 2 +cornel 2 +confer 2 +systemat 2 +base 2 +york 2 +technic 2 +proceed 2 +system 2 +intern 2 +transform 1 +softwar 1 +beij 1 +juli 1 +depart 1 +research 1 +improv 1 +effici 1 +ithaca 1 +symposium 1 +cach 1 +intermedi 1 +result 1 +young 1 +china 1 +august 1 +techniqu 1 +select 1 +page 1 +scientist 1 +webster 1 +yanhong 1 +home 1 +gener 1 +approach 1 +languag 1 +algorithm 1 +develop 1 +public 1 +semant 1 +januari 1 +sigplan 1 +principl 1 +cachet 1 +knowledg 1 +novemb 1 +press 1 +evalu 1 +california 1 +zhang 1 +wang 1 +combin 1 +center 1 +anni 1 +associ 1 +analysi 1 +optim 1 +octob 1 +discov 1 +auxiliari 1 +inform 1 +annual 1 +sigact 1 +petersburg 1 +beach 1 +florida 1 +interact 1 +attribut 1 +engin 1 +boston 1 +massachusett 1 +partial 1 +manipul 1 +jolla 1 +june 1 +reason 1 +qualit 1 +quantit 1 +multi 1 +factor 1 +problem 1 +march 1 +xerox 1 +institut 1 +dagstuhl 1 +expert 1 +offic 1 +indiana 1 +pageyanhong 0 +post 0 +doctor 0 +work 0 +professor 0 +interest 0 +ofcomput 0 +forincrement 0 +parallel 0 +concurr 0 +applic 0 +compil 0 +interactivesystem 0 +design 0 +systemorgan 0 +mainten 0 +talksph 0 +dissert 0 +basedsystemat 0 +also 0 +appear 0 +abstractjourn 0 +februari 0 +refere 0 +stoller 0 +ieee 0 +societi 0 +peke 0 +tsinghua 0 +formal 0 +uncertainti 0 +model 0 +partit 0 +descript 0 +fuzzi 0 +world 0 +congress 0 +seattl 0 +washington 0 +inexact 0 +profession 0 +publish 0 +hous 0 +survei 0 +map 0 +septemb 0 +revis 0 +wakayama 0 +line 0 +break 0 +talk 0 +kestrel 0 +palo 0 +alto 0 +seminar 0 +dynam 0 +schloss 0 +germani 0 +automat 0 +laboratori 0 +document 0 +us 0 +obtain 0 +present 0 +oggeb 0 +basin 0 +implement 0 +test 0 +usag 0 +manual 0 +summari 0 +explor 0 +ri 0 +tshinghua 0 +author 0 +song 0 +huang 0 +current 0 +project 0 +compos 0 +build 0 +compon 0 +upson 0 +hallcornel 0 +universityithaca 0 +last 0 +updat 0 +move 0 +tocomput 0 +department 0 +lindlei 0 +hallindiana 0 +universitybloomington 0 +eduhttp 0 +peopl 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..1a6c3bb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,24 @@ +cornel 1 +cheng 1 +huang 1 +depart 1 +comput 1 +scienc 1 +univers 1 +ychuang 1 +home 0 +page 0 +huangyi 0 +upson 0 +hall 0 +ithaca 0 +email 0 +edui 0 +graduat 0 +student 0 +favorit 0 +link 0 +onlin 0 +documentscoursesprojectaccess 0 +byvisitorslast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..9fb130a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,120 @@ +school 2 +like 1 +comput 1 +korea 1 +high 1 +music 1 +cornel 1 +came 1 +java 1 +ilbo 1 +april 1 +studi 1 +scienc 1 +master 1 +degre 1 +engin 1 +america 1 +myoung 1 +junior 1 +virginia 1 +husband 1 +chang 1 +work 1 +would 1 +plai 1 +keyboard 1 +piano 1 +korean 1 +us 1 +search 1 +resum 1 +email 1 +chung 0 +home 0 +pagewelcom 0 +everybodi 0 +name 0 +chungyou 0 +thvisitor 0 +sinc 0 +univers 0 +pleas 0 +check 0 +sciencecornel 0 +universitywher 0 +origin 0 +seoul 0 +graduat 0 +went 0 +kindergarten 0 +elementari 0 +sang 0 +kang 0 +sung 0 +women 0 +georg 0 +mason 0 +universityin 0 +happiest 0 +thing 0 +life 0 +marri 0 +wonder 0 +forsaic 0 +programm 0 +pictur 0 +beauti 0 +moment 0 +avail 0 +browser 0 +click 0 +free 0 +actual 0 +better 0 +listen 0 +kind 0 +love 0 +shin 0 +seung 0 +hoon 0 +moon 0 +classic 0 +forth 0 +brows 0 +world 0 +wide 0 +link 0 +interest 0 +want 0 +onlin 0 +newpap 0 +hangook 0 +chosun 0 +joongang 0 +hire 0 +word 0 +perfect 0 +version 0 +meng 0 +project 0 +still 0 +titl 0 +imag 0 +process 0 +appletyoosun 0 +person 0 +infom 0 +triphamm 0 +sbithaca 0 +phone 0 +emerg 0 +ychung 0 +forward 0 +yooschung 0 +automat 0 +page 0 +construct 0 +last 0 +modifi 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..354ed6ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,221 @@ +page 1 +great 1 +good 1 +cornel 1 +time 1 +live 1 +plai 1 +much 1 +know 1 +take 1 +movi 1 +public 1 +yaron 1 +minski 1 +home 1 +graduat 1 +student 1 +ithaca 1 +place 1 +syracus 1 +toler 1 +comput 1 +flapdragon 1 +longer 1 +game 1 +better 1 +make 1 +easi 1 +medic 1 +school 1 +love 1 +everi 1 +rate 1 +tri 1 +yellow 1 +linux 1 +block 1 +yminski 0 +edudepart 0 +computersci 0 +upson 0 +hall 0 +univers 0 +phone 0 +comstock 0 +current 0 +focus 0 +onfault 0 +distribut 0 +particular 0 +work 0 +thetacoma 0 +project 0 +attempt 0 +build 0 +oper 0 +system 0 +support 0 +forfault 0 +agent 0 +base 0 +year 0 +livether 0 +still 0 +veggi 0 +coop 0 +crash 0 +often 0 +nowadai 0 +slightli 0 +outof 0 +date 0 +webpag 0 +howev 0 +notic 0 +anopen 0 +start 0 +need 0 +highli 0 +recommendit 0 +though 0 +ancientchines 0 +extremlysimpl 0 +rule 0 +complic 0 +satisfi 0 +strategi 0 +like 0 +learn 0 +intro 0 +also 0 +want 0 +internet 0 +nota 0 +real 0 +person 0 +front 0 +thannoth 0 +unix 0 +machin 0 +look 0 +cgoban 0 +nicest 0 +goboard 0 +program 0 +seen 0 +trivial 0 +thenet 0 +newli 0 +marri 0 +wife 0 +lisa 0 +go 0 +suni 0 +health 0 +scienc 0 +center 0 +uniqu 0 +qualifi 0 +within 0 +hour 0 +twenti 0 +minutesof 0 +favoritepoem 0 +lafiglia 0 +piang 0 +theidea 0 +order 0 +west 0 +advicefor 0 +resumesom 0 +interestinglink 0 +critic 0 +impress 0 +exampl 0 +fairli 0 +simpl 0 +technolog 0 +us 0 +effect 0 +site 0 +engin 0 +compar 0 +other 0 +come 0 +recommend 0 +found 0 +almost 0 +alarmingli 0 +contrast 0 +firefli 0 +thing 0 +fail 0 +miser 0 +yahoo 0 +address 0 +give 0 +direct 0 +seven 0 +closest 0 +bakeri 0 +perfect 0 +bigbook 0 +bigyellow 0 +advertis 0 +york 0 +read 0 +free 0 +plu 0 +save 0 +paper 0 +note 0 +download 0 +text 0 +slate 0 +magazin 0 +hate 0 +admit 0 +microsoft 0 +someth 0 +right 0 +thought 0 +well 0 +execut 0 +expect 0 +michael 0 +kinslei 0 +run 0 +compani 0 +instal 0 +maintain 0 +list 0 +amazon 0 +book 0 +solid 0 +discount 0 +virtual 0 +bookstor 0 +hope 0 +local 0 +booksel 0 +town 0 +brother 0 +follow 0 +begin 0 +version 0 +comment 0 +process 0 +mailcrypt 0 +emac 0 +interfacemqbtazgjohoaaaedalfhlgjmdg 0 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 0 +rbylf 0 +zwqujcioczoecv 0 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 0 +gkgarsokrinnoazihja 0 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 0 +wumjgzsnvispwkrvzgdrojswmc 0 +eigsqsb 0 +bsbpw 0 +jcwz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..799c892b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,43 @@ +yuichi 1 +tsuchimoto 1 +cornel 1 +home 1 +current 1 +program 1 +introduct 1 +compil 1 +translatorsc 1 +practicum 1 +artifici 1 +intelligencec 1 +format 1 +info 1 +peopl 1 +pageyuichi 0 +pagecours 0 +workfal 0 +semest 0 +system 0 +oper 0 +systemsc 0 +languag 0 +softwar 0 +engineeringspr 0 +machin 0 +visionfal 0 +foundat 0 +theori 0 +computingi 0 +look 0 +unit 0 +state 0 +resum 0 +postscript 0 +address 0 +eduhttp 0 +last 0 +modif 0 +novemb 0 +http 0 +welcom 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..1f80e84e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,326 @@ +messag 3 +activ 2 +commun 2 +latenc 2 +implement 2 +eicken 2 +network 2 +paper 2 +machin 1 +us 1 +version 1 +overhead 1 +layer 1 +split 1 +perform 1 +design 1 +releas 1 +chang 1 +thorsten 1 +processor 1 +high 1 +describ 1 +show 1 +cluster 1 +architectur 1 +multiprocessor 1 +cost 1 +cornel 1 +part 1 +pleas 1 +read 1 +chao 1 +grzegorz 1 +czajkowski 1 +abstract 1 +power 1 +bandwidth 1 +softwar 1 +activemessag 1 +compar 1 +standard 1 +workstat 1 +allow 1 +mechan 1 +conform 1 +spec 1 +project 1 +avail 1 +inform 1 +includ 1 +know 1 +select 1 +appear 1 +novemb 1 +mpp 1 +pass 1 +order 1 +reduc 1 +first 1 +round 1 +trip 1 +secondpart 1 +demonstr 1 +benchmark 1 +technic 1 +report 1 +characterist 1 +present 1 +detail 1 +gener 1 +specif 1 +interfac 1 +interconnect 1 +oper 1 +without 1 +evalu 1 +driven 1 +berkelei 1 +messagescornel 0 +implementationsact 0 +neta 0 +sourc 0 +code 0 +thegener 0 +moreinform 0 +page 0 +object 0 +codereleas 0 +thegam 0 +readm 0 +instal 0 +file 0 +distribut 0 +instructionson 0 +contact 0 +releasenot 0 +fileto 0 +find 0 +previou 0 +currentvers 0 +also 0 +major 0 +differencebetween 0 +modifi 0 +libmpci 0 +thedistribut 0 +document 0 +packag 0 +fordetail 0 +interest 0 +current 0 +pleaseclick 0 +send 0 +briefnot 0 +let 0 +someth 0 +organ 0 +theus 0 +intend 0 +public 0 +messageslow 0 +ibmrisc 0 +system 0 +chri 0 +hawblitzel 0 +ieeesupercomput 0 +pittsburgh 0 +commerci 0 +spiteof 0 +fast 0 +scommun 0 +inferior 0 +older 0 +tmccm 0 +meiko 0 +investig 0 +primit 0 +altern 0 +standardmessag 0 +tooffer 0 +good 0 +build 0 +block 0 +higher 0 +directli 0 +networkadapt 0 +yieldsa 0 +lower 0 +communicationsubstr 0 +well 0 +cbenchmark 0 +lowmessag 0 +throughput 0 +compens 0 +networklat 0 +base 0 +freeli 0 +availablempich 0 +achiev 0 +equival 0 +onth 0 +februari 0 +andevalu 0 +implementationbenchmark 0 +adapt 0 +firmwar 0 +butdo 0 +assumefamiliar 0 +concept 0 +underli 0 +mainperform 0 +word 0 +timeof 0 +asymptot 0 +focuseson 0 +analysi 0 +comparison 0 +smessag 0 +defin 0 +portabl 0 +across 0 +varieti 0 +parallel 0 +theu 0 +themeiko 0 +thehpam 0 +fddi 0 +ring 0 +theparagon 0 +thesp 0 +networksus 0 +veena 0 +avula 0 +anyndia 0 +basu 0 +vineet 0 +buch 0 +palo 0 +alto 0 +abridg 0 +ieee 0 +micro 0 +magazin 0 +slide 0 +talk 0 +recent 0 +develop 0 +forparallel 0 +made 0 +signific 0 +progress 0 +thecommun 0 +magnitud 0 +ascompar 0 +earlier 0 +propos 0 +examin 0 +whether 0 +thesetechniqu 0 +carri 0 +connect 0 +anatm 0 +even 0 +though 0 +systemsoftwar 0 +equip 0 +optim 0 +streamcommun 0 +direct 0 +protect 0 +user 0 +level 0 +access 0 +thenetwork 0 +reliabl 0 +transmiss 0 +flowcontrol 0 +differ 0 +incommun 0 +builtfrom 0 +hardwar 0 +compon 0 +state 0 +artmultiprocessor 0 +lack 0 +flow 0 +control 0 +systemcoordin 0 +affect 0 +significantli 0 +andrequir 0 +larger 0 +buffer 0 +prototyp 0 +model 0 +clusterinterconnect 0 +measur 0 +showappl 0 +applic 0 +microsecond 0 +smallmessag 0 +roughli 0 +messagesimplement 0 +think 0 +integr 0 +andcomput 0 +culler 0 +goldstein 0 +schauser 0 +proceed 0 +symp 0 +comput 0 +gold 0 +coast 0 +australia 0 +abstractth 0 +challeng 0 +larg 0 +scale 0 +tominim 0 +overlapcomput 0 +coordin 0 +sacrificingprocessor 0 +exist 0 +passingmultiprocessor 0 +unnecessarili 0 +researchprototyp 0 +communicationoverhead 0 +poor 0 +introduc 0 +simplecommun 0 +isintrins 0 +effect 0 +thehardwar 0 +offer 0 +tremend 0 +flexibl 0 +ncube 0 +phase 0 +share 0 +memoryextens 0 +messagesar 0 +suffici 0 +dynam 0 +schedul 0 +languag 0 +forwhich 0 +toler 0 +becom 0 +program 0 +compil 0 +concern 0 +hardwaresupport 0 +desir 0 +outlin 0 +rang 0 +ofenhanc 0 +mainstream 0 +efficientcommun 0 +thesi 0 +univers 0 +california 0 +sitesact 0 +messagesin 0 +projectfor 0 +contactthorsten 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..c31ac709 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,202 @@ +system 2 +horu 2 +applic 2 +isi 2 +cornel 1 +technolog 1 +comput 1 +demand 1 +militari 1 +toler 1 +control 1 +distribut 1 +effort 1 +develop 1 +program 1 +fault 1 +high 1 +perform 1 +work 1 +user 1 +plan 1 +environ 1 +year 1 +us 1 +featur 1 +virtual 1 +prior 1 +success 1 +also 1 +commun 1 +media 1 +remot 1 +base 1 +wide 1 +rang 1 +space 1 +branch 1 +project 1 +futur 1 +chang 1 +thu 1 +environmenthoru 0 +kenneth 0 +birman 0 +robbert 0 +reness 0 +shoru 0 +reliabledistribut 0 +last 0 +demonstrategroupwar 0 +network 0 +foundto 0 +offer 0 +higher 0 +similar 0 +novel 0 +ofhoru 0 +flexibl 0 +softwar 0 +architectur 0 +support 0 +synchronousprocess 0 +group 0 +toolkit 0 +becom 0 +signific 0 +commerci 0 +offersa 0 +securityand 0 +privaci 0 +view 0 +importantresearch 0 +advanc 0 +extend 0 +provid 0 +extrem 0 +latenc 0 +performancer 0 +time 0 +capabl 0 +approach 0 +combin 0 +element 0 +calledact 0 +messageswith 0 +multi 0 +playbacksystem 0 +calledcontinu 0 +expect 0 +demonstr 0 +speed 0 +interact 0 +multimediaserv 0 +might 0 +telemedicin 0 +videoon 0 +retain 0 +exist 0 +andsecur 0 +option 0 +synchroni 0 +model 0 +creat 0 +substanti 0 +expectrapid 0 +uptak 0 +within 0 +matur 0 +spana 0 +industri 0 +includ 0 +telecommun 0 +financialtrad 0 +stock 0 +market 0 +autom 0 +factori 0 +floor 0 +process 0 +fordiscret 0 +electron 0 +compon 0 +manufactur 0 +traffic 0 +basedcommun 0 +manag 0 +beingexplor 0 +sever 0 +well 0 +othernon 0 +govern 0 +among 0 +visibl 0 +isth 0 +naval 0 +hiper 0 +explor 0 +systemthat 0 +prototyp 0 +enhanc 0 +aegi 0 +battleradar 0 +would 0 +benefitfrom 0 +access 0 +initi 0 +make 0 +possibl 0 +migrateisi 0 +benefit 0 +communityin 0 +direct 0 +transit 0 +occur 0 +licens 0 +agreementswith 0 +subsidiari 0 +stratu 0 +howev 0 +avail 0 +research 0 +isdescrib 0 +detail 0 +public 0 +manual 0 +look 0 +hope 0 +mixtur 0 +technologieswil 0 +permit 0 +beseen 0 +next 0 +gener 0 +groupwar 0 +illustr 0 +belowshow 0 +mission 0 +integr 0 +data 0 +varieti 0 +ground 0 +resourc 0 +andus 0 +coordin 0 +action 0 +variou 0 +theatr 0 +asset 0 +thissort 0 +utmost 0 +reliabl 0 +secur 0 +whilealso 0 +failur 0 +rapidli 0 +reconfigur 0 +respond 0 +impact 0 +civilianand 0 +dept 0 +scienc 0 +univers 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..3ad6e1a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,77 @@ +multimedia 1 +applic 1 +medianet 1 +perform 1 +network 1 +commun 1 +group 1 +cornel 1 +high 1 +process 1 +combin 1 +develop 1 +user 1 +level 1 +improv 1 +facilit 1 +horu 1 +reliabl 1 +toolkit 1 +video 1 +inform 1 +projectmedianet 0 +platform 0 +media 0 +technolog 0 +research 0 +todevelop 0 +flexibl 0 +testb 0 +store 0 +transport 0 +us 0 +data 0 +architectur 0 +access 0 +dramat 0 +protocolsth 0 +order 0 +magnitud 0 +communicationmak 0 +parallel 0 +comput 0 +workstat 0 +cluster 0 +practic 0 +communicationprimit 0 +adapt 0 +industri 0 +strength 0 +tool 0 +secur 0 +primit 0 +critic 0 +foradvanc 0 +militari 0 +commerci 0 +approach 0 +distribut 0 +audio 0 +portabl 0 +build 0 +includeaudio 0 +rapid 0 +prototyp 0 +multimediaappl 0 +fund 0 +project 0 +provid 0 +contract 0 +fromth 0 +darpa 0 +technologyofficefor 0 +contact 0 +thorstenvon 0 +eicken 0 +brian 0 +smith 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..af6c027e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,31 @@ +nuprl 2 +project 1 +theori 1 +cornel 1 +browser 1 +articl 1 +link 1 +autom 0 +reason 0 +introduct 0 +theorem 0 +design 0 +written 0 +vaughn 0 +user 0 +document 0 +relat 0 +public 0 +class 0 +note 0 +linux 0 +announc 0 +suggest 0 +feedback 0 +help 0 +main 0 +index 0 +curiou 0 +mani 0 +page 0 +askaltavista 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..4bed90ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,217 @@ +split 2 +program 2 +node 2 +file 2 +messag 2 +granita 1 +includ 1 +debug 1 +activ 1 +comput 1 +exampl 1 +parallel 1 +bench 1 +sourc 1 +setenv 1 +compil 1 +locat 1 +am_run 1 +cuc 1 +us 1 +machin 1 +shell 1 +instal 1 +inform 1 +type 1 +read 1 +local 1 +commun 1 +makefil 1 +directori 1 +librari 1 +also 1 +login 1 +tcsh 1 +bash 1 +experi 1 +first 1 +command 1 +remot 1 +info 1 +softwar 1 +statement 1 +found 1 +execut 1 +look 1 +gmake 1 +follow 1 +return 1 +perform 1 +spam 1 +avail 1 +header 1 +script 1 +ampicc 1 +pleas 1 +scienc 0 +call 0 +eight 0 +granitathrough 0 +design 0 +asinteract 0 +problemsdur 0 +remov 0 +oper 0 +system 0 +specif 0 +stufffrom 0 +configur 0 +haveth 0 +arch 0 +unam 0 +instead 0 +readm 0 +contain 0 +informationabout 0 +releas 0 +addit 0 +manyou 0 +infoexplor 0 +commandsand 0 +usag 0 +displai 0 +properli 0 +job 0 +neither 0 +activemassag 0 +peor 0 +messagesor 0 +hardwar 0 +cornel 0 +theori 0 +center 0 +homegrown 0 +softwarein 0 +gener 0 +besur 0 +path 0 +csplit 0 +simpl 0 +extens 0 +forparallel 0 +provid 0 +global 0 +address 0 +space 0 +though 0 +globalpoint 0 +dereferenc 0 +like 0 +regular 0 +pointer 0 +phase 0 +assign 0 +allow 0 +programm 0 +hide 0 +latencyof 0 +access 0 +overlap 0 +work 0 +user 0 +shellsshould 0 +creat 0 +sampl 0 +variou 0 +must 0 +make 0 +asact 0 +scriptsloc 0 +programfoo 0 +processor 0 +foodebug 0 +step 0 +need 0 +done 0 +insert 0 +splitc_debug 0 +aftersplitc_main 0 +describ 0 +previou 0 +section 0 +commonli 0 +ongranita 0 +enter 0 +continu 0 +hit 0 +onto 0 +want 0 +youwant 0 +master 0 +open 0 +insid 0 +thenattach 0 +theth 0 +proc 0 +process 0 +proce 0 +attach 0 +stop 0 +andyou 0 +breakpoint 0 +stack 0 +frame 0 +messagesact 0 +overhead 0 +layerthat 0 +offer 0 +high 0 +mani 0 +nativ 0 +layer 0 +main 0 +characterist 0 +word 0 +round 0 +triplat 0 +asymptot 0 +network 0 +bandwidth 0 +libspgam 0 +aand 0 +beforerun 0 +runningprgm 0 +mpimpi 0 +popularmessag 0 +pass 0 +interfac 0 +portabl 0 +animplement 0 +base 0 +mpich 0 +run 0 +overact 0 +easiest 0 +link 0 +whichi 0 +built 0 +fooyou 0 +lookat 0 +examplesin 0 +ampi 0 +exactli 0 +likeordinari 0 +sure 0 +softwaresoftwar 0 +fortran 0 +xpdbx 0 +matlab 0 +emac 0 +bison 0 +replic 0 +problemsif 0 +difficulti 0 +contact 0 +czar 0 +grzegorz 0 +czajkowski 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..bfd9cfed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,105 @@ +model 1 +project 1 +research 1 +languag 1 +cornel 1 +simul 1 +creat 1 +system 1 +simlab 1 +select 1 +effort 1 +comput 1 +gener 1 +scientif 1 +softwar 1 +collabor 1 +version 1 +program 1 +present 1 +chew 1 +home 0 +page 0 +enorm 0 +current 0 +expend 0 +scientificsoftwar 0 +particularli 0 +physic 0 +defin 0 +oncomplex 0 +geometri 0 +us 0 +advanc 0 +hardwar 0 +thegoal 0 +reduc 0 +bringingtogeth 0 +technolog 0 +geometr 0 +symbolicmathemat 0 +numer 0 +analysi 0 +compil 0 +code 0 +andform 0 +method 0 +tool 0 +rais 0 +semant 0 +levelat 0 +possibl 0 +overview 0 +softwarepackag 0 +activ 0 +mathemat 0 +environ 0 +propos 0 +postscript 0 +guarante 0 +qualiti 0 +mesh 0 +microstoragearchitectur 0 +weyl 0 +computeralgebra 0 +substrat 0 +high 0 +levelprogram 0 +synthes 0 +thechain 0 +algebra 0 +topolog 0 +compon 0 +thearpa 0 +nist 0 +madefast 0 +design 0 +manufactur 0 +exercis 0 +longer 0 +direct 0 +insystem 0 +richard 0 +zippel 0 +ideason 0 +proce 0 +includ 0 +brief 0 +discuss 0 +ofnon 0 +contemporan 0 +commun 0 +microstorag 0 +architectur 0 +theus 0 +transform 0 +chainsprogram 0 +complextopolog 0 +engin 0 +numericalalgorithm 0 +rick 0 +palmer 0 +peopl 0 +public 0 +report 0 +paul 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..58ccc282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,51 @@ +split 2 +inform 1 +eicken 1 +sourc 1 +code 1 +releas 1 +cornel 1 +prepar 1 +page 1 +ccornel 0 +implementationssplit 0 +neta 0 +isimpl 0 +activ 0 +messagesfor 0 +contact 0 +thorsten 0 +ofsplit 0 +distr 0 +implementedon 0 +spam 0 +contactchi 0 +chao 0 +chang 0 +grzegorz 0 +czajkowski 0 +thorstenvon 0 +share 0 +memori 0 +multiprocessorsa 0 +multiprocessor 0 +runningsolari 0 +mattwelsh 0 +select 0 +public 0 +cparallel 0 +program 0 +culler 0 +dusseau 0 +goldstein 0 +krishnamurthi 0 +lumetta 0 +yelick 0 +proceed 0 +supercomput 0 +novemb 0 +abstractproject 0 +sitessplit 0 +chome 0 +berkelei 0 +contactthorsten 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..01c5db9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,9 @@ +page 0 +move 0 +browser 0 +redirect 0 +second 0 +http 0 +cornel 0 +default 0 +html 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..0487d5d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,220 @@ +robot 2 +cornel 2 +donald 2 +comput 2 +vision 1 +paper 1 +list 1 +ieee 1 +manipul 1 +laboratori 1 +csrvl 1 +prof 1 +daniel 1 +applic 1 +zabih 1 +video 1 +base 1 +parallel 1 +justin 1 +avail 1 +report 1 +server 1 +intern 1 +confer 1 +proceed 1 +bhringer 1 +professor 1 +match 1 +direct 1 +scienc 1 +huttenloch 1 +ramin 1 +includ 1 +distribut 1 +micro 1 +electro 1 +mechan 1 +system 1 +bruce 1 +follow 1 +project 1 +automat 1 +break 1 +miller 1 +done 1 +move 1 +select 1 +mani 1 +proc 1 +revis 1 +symposium 1 +foundat 1 +inform 1 +invari 1 +workshop 1 +autom 1 +diego 1 +microfabr 1 +mihailovich 1 +macdonald 1 +technic 1 +associ 1 +greg 1 +csrvlcornel 0 +laboratorywelcom 0 +nich 0 +rrentli 0 +develop 0 +pleas 0 +hard 0 +question 0 +comment 0 +thank 0 +locat 0 +univers 0 +ithaca 0 +three 0 +main 0 +area 0 +ofresearch 0 +multimedia 0 +mem 0 +pictor 0 +tour 0 +current 0 +projectsth 0 +activ 0 +supervis 0 +byramin 0 +detect 0 +andclassif 0 +scene 0 +digit 0 +mpeg 0 +browser 0 +allowingscen 0 +global 0 +motion 0 +queri 0 +real 0 +time 0 +sourc 0 +transmiss 0 +full 0 +frame 0 +onplatform 0 +nynet 0 +cluster 0 +number 0 +involv 0 +high 0 +perform 0 +imag 0 +implement 0 +split 0 +foru 0 +symmetr 0 +multiprocessor 0 +potenti 0 +master 0 +sproject 0 +maintain 0 +work 0 +unix 0 +currentlyconsid 0 +windowsnt 0 +discuss 0 +theissuesher 0 +hope 0 +support 0 +microsoft 0 +publicationsth 0 +research 0 +thecsrvl 0 +anonym 0 +public 0 +tech 0 +serverar 0 +program 0 +mobil 0 +scheme 0 +ree 0 +automationnic 0 +franc 0 +complex 0 +homolog 0 +type 0 +triangul 0 +chang 0 +juan 0 +octob 0 +jen 0 +first 0 +algorithm 0 +peter 0 +boston 0 +wilson 0 +andj 0 +latomb 0 +submit 0 +artifici 0 +intellig 0 +sensor 0 +configur 0 +task 0 +plan 0 +brigg 0 +sensorless 0 +us 0 +massiv 0 +actuatorarrai 0 +theori 0 +control 0 +actuat 0 +arrai 0 +oiso 0 +japan 0 +januari 0 +approach 0 +design 0 +micromechan 0 +hing 0 +structur 0 +extend 0 +abstract 0 +siggraph 0 +solid 0 +model 0 +montral 0 +quebc 0 +canada 0 +authorthes 0 +gener 0 +dynam 0 +index 0 +search 0 +author 0 +titl 0 +keyword 0 +scott 0 +cytacki 0 +pedro 0 +felzenszwalb 0 +ryan 0 +lilien 0 +michel 0 +maharbiz 0 +pass 0 +scharstein 0 +aaron 0 +stump 0 +szewczyk 0 +fernando 0 +viton 0 +voskuhl 0 +wayt 0 +matt 0 +welsh 0 +whelan 0 +assist 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..f52b4d8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,91 @@ +inform 1 +research 1 +captur 1 +access 1 +group 1 +collect 1 +work 1 +comput 1 +structur 1 +materi 1 +document 1 +construct 1 +report 1 +cornel 1 +projectinform 0 +accessth 0 +waysthat 0 +locat 0 +ever 0 +increas 0 +volum 0 +ofonlin 0 +data 0 +determin 0 +extract 0 +forhuman 0 +user 0 +found 0 +john 0 +hopcroft 0 +davisin 0 +current 0 +area 0 +researchextract 0 +onlin 0 +thestructur 0 +explicit 0 +extractinginform 0 +present 0 +tabular 0 +form 0 +relat 0 +databas 0 +summari 0 +overview 0 +collectionsof 0 +text 0 +nationwid 0 +librari 0 +sciencetechn 0 +begun 0 +digit 0 +computersci 0 +technic 0 +order 0 +make 0 +moreaccess 0 +internet 0 +avail 0 +server 0 +addit 0 +toit 0 +util 0 +gener 0 +commun 0 +thisdocu 0 +test 0 +consist 0 +dean 0 +krafft 0 +visitingscientist 0 +jimdavi 0 +well 0 +number 0 +graduat 0 +undergradu 0 +student 0 +fall 0 +project 0 +activ 0 +longer 0 +jrdpublicationsjam 0 +allan 0 +informationag 0 +build 0 +hyperlink 0 +proceed 0 +confer 0 +oninform 0 +knowledg 0 +manag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..005b96f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,15 @@ +research 1 +multimedia 1 +zeno 0 +groupzeno 0 +cornel 0 +group 0 +peopl 0 +mission 0 +project 0 +paper 0 +softwar 0 +curricula 0 +develop 0 +potpourri 0 +direct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..955c7076 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,102 @@ +page 1 +work 1 +home 1 +degre 1 +come 1 +back 1 +current 1 +rain 1 +santa 1 +barbara 1 +cornel 1 +video 1 +pagewelcom 0 +depart 0 +issu 0 +pictur 0 +left 0 +see 0 +quit 0 +date 0 +frame 0 +challeng 0 +viewer 0 +syosset 0 +york 0 +town 0 +long 0 +island 0 +receiv 0 +bachelor 0 +scienc 0 +decemb 0 +month 0 +california 0 +decid 0 +fall 0 +master 0 +engin 0 +leav 0 +land 0 +ithaca 0 +mayb 0 +miss 0 +season 0 +wind 0 +snow 0 +actual 0 +enough 0 +anywai 0 +plan 0 +graduat 0 +meng 0 +project 0 +prof 0 +ramin 0 +zabih 0 +robot 0 +vision 0 +csrvl 0 +interest 0 +topic 0 +motion 0 +segment 0 +gener 0 +process 0 +paper 0 +relat 0 +research 0 +area 0 +link 0 +compani 0 +green 0 +hill 0 +softwar 0 +californialockhe 0 +martin 0 +control 0 +system 0 +binghamton 0 +yorkaltera 0 +corp 0 +jose 0 +californiafun 0 +stuff 0 +game 0 +domainvth 0 +babylon 0 +siteoth 0 +place 0 +univers 0 +worldcareermosaictop 0 +site 0 +student 0 +email 0 +kmai 0 +cours 0 +still 0 +construct 0 +last 0 +modifi 0 +januari 0 +access 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..5420262c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,42 @@ +utc 1 +inform 1 +faculti 1 +depart 1 +public 1 +peopl 1 +home 0 +pagegener 0 +recruit 0 +overview 0 +research 0 +group 0 +relat 0 +program 0 +academ 0 +admiss 0 +requir 0 +cours 0 +descript 0 +catalog 0 +comput 0 +facil 0 +upcom 0 +event 0 +calendar 0 +seminar 0 +talk 0 +visitor 0 +schedulespag 0 +class 0 +person 0 +page 0 +student 0 +organ 0 +alumni 0 +link 0 +find 0 +staff 0 +directoryth 0 +universitywww 0 +informationgrip 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..11c9e3e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,67 @@ +intern 1 +mathemat 1 +award 1 +theorem 1 +prove 1 +intellig 1 +board 1 +artificialintellig 1 +woodrow 1 +chair 1 +emeritu 1 +univers 1 +servic 1 +autom 1 +artifici 1 +truste 1 +joint 1 +confer 1 +research 1 +analog 1 +profil 1 +bledso 0 +bledsoepet 0 +donnel 0 +centenni 0 +comput 0 +system 0 +professor 0 +utah 0 +salt 0 +lake 0 +citi 0 +california 0 +berkelei 0 +honor 0 +profession 0 +third 0 +mileston 0 +americanmathemat 0 +societi 0 +distinguish 0 +jointconfer 0 +presid 0 +american 0 +associ 0 +onartifici 0 +editor 0 +journal 0 +presentarea 0 +interestautomat 0 +summari 0 +researchmi 0 +focus 0 +automat 0 +theoremproof 0 +check 0 +involv 0 +heurist 0 +higher 0 +levelplan 0 +well 0 +exampl 0 +alsointerest 0 +learn 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..c3a9a92b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,90 @@ +werth 2 +parallel 2 +comput 1 +educ 1 +softwar 1 +engin 1 +brown 1 +mathemat 1 +chair 1 +board 1 +scienc 1 +program 1 +proceed 1 +intern 1 +confer 1 +jain 1 +john 1 +lectur 1 +research 1 +univers 1 +associ 1 +ieee 1 +compil 1 +environ 1 +direct 1 +model 1 +process 1 +august 1 +schedul 1 +oper 1 +profil 1 +werthsenior 0 +scientist 0 +emori 0 +washington 0 +profession 0 +servic 0 +accredit 0 +vice 0 +technic 0 +committe 0 +softwareengin 0 +present 0 +area 0 +interestparallel 0 +computersci 0 +summari 0 +researchmi 0 +current 0 +interest 0 +parallelprogram 0 +andimplement 0 +issu 0 +also 0 +activ 0 +set 0 +incomput 0 +local 0 +nation 0 +level 0 +select 0 +recent 0 +publicationss 0 +hyder 0 +unifi 0 +concurr 0 +debug 0 +societi 0 +sobek 0 +newton 0 +interact 0 +formal 0 +practic 0 +develop 0 +code 0 +note 0 +york 0 +springer 0 +verlag 0 +multipl 0 +system 0 +journal 0 +distribut 0 +decemb 0 +gener 0 +applic 0 +thirteenth 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..614a7c08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,53 @@ +architectur 1 +alfr 1 +page 1 +area 1 +manag 1 +databas 1 +distribut 1 +faculti 1 +dale 0 +daleno 0 +person 0 +trammel 0 +crow 0 +regent 0 +professor 0 +emeritu 0 +comput 0 +scienc 0 +exet 0 +colleg 0 +oxford 0 +england 0 +univers 0 +texa 0 +austin 0 +interestdatabas 0 +system 0 +summari 0 +researchmi 0 +interest 0 +involv 0 +applic 0 +parallel 0 +multi 0 +stagei 0 +problem 0 +studiedinclud 0 +data 0 +strategi 0 +index 0 +andmap 0 +relat 0 +algebra 0 +oper 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..62558363 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,92 @@ +comput 2 +system 2 +scienc 1 +brumfield 1 +distribut 1 +model 1 +queue 1 +jeffrei 1 +mathemat 1 +univers 1 +award 1 +databas 1 +symposium 1 +sigcs 1 +faculti 1 +brumfieldsenior 0 +lectur 0 +math 0 +georgia 0 +purdu 0 +honor 0 +colleg 0 +natur 0 +teach 0 +excel 0 +area 0 +interestperform 0 +analysi 0 +oper 0 +summari 0 +researchi 0 +interest 0 +role 0 +plai 0 +designersof 0 +studi 0 +performanceof 0 +exist 0 +propos 0 +network 0 +eachresourc 0 +repres 0 +tasksawait 0 +servic 0 +solut 0 +involv 0 +computationof 0 +respons 0 +time 0 +length 0 +throughput 0 +select 0 +recent 0 +publicationsj 0 +shen 0 +richter 0 +graf 0 +verdi 0 +visual 0 +environ 0 +design 0 +journal 0 +ofparallel 0 +miller 0 +chou 0 +perform 0 +modelingof 0 +object 0 +orient 0 +intern 0 +parallel 0 +distributedsystem 0 +austin 0 +texa 0 +decemb 0 +concurr 0 +program 0 +modula 0 +inproceed 0 +technic 0 +loui 0 +bulletin 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..9549e1ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,113 @@ +mathemat 2 +cline 1 +comput 1 +softwar 1 +siam 1 +journal 1 +scientif 1 +numer 1 +analysi 1 +alan 1 +professor 1 +appli 1 +profession 1 +editor 1 +transact 1 +director 1 +interest 1 +problem 1 +fit 1 +renka 1 +constrain 1 +triangul 1 +condit 1 +number 1 +estim 1 +faculti 1 +clinedavid 0 +bruton 0 +centenni 0 +scienc 0 +univers 0 +michigan 0 +servic 0 +algorithm 0 +commun 0 +associ 0 +editori 0 +board 0 +statisticalcomput 0 +special 0 +group 0 +southern 0 +region 0 +socialrespons 0 +area 0 +interestmathemat 0 +summari 0 +researchi 0 +transform 0 +tool 0 +whichcan 0 +involv 0 +constructionof 0 +explor 0 +methodolog 0 +formathemat 0 +particular 0 +major 0 +developmentha 0 +packag 0 +hundr 0 +subprogram 0 +curv 0 +andsurfac 0 +emploi 0 +tension 0 +spline 0 +select 0 +recent 0 +publicationsr 0 +scatter 0 +data 0 +us 0 +delaunai 0 +imac 0 +expert 0 +system 0 +symbol 0 +north 0 +holland 0 +king 0 +meyer 0 +rout 0 +schedul 0 +coast 0 +guard 0 +buoi 0 +tender 0 +interfac 0 +dimension 0 +solut 0 +closest 0 +node 0 +presenc 0 +barrier 0 +counter 0 +exampl 0 +three 0 +statist 0 +moler 0 +stewart 0 +wilkinson 0 +matrix 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..8f481558 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,69 @@ +mathemat 1 +univers 1 +edsger 1 +wybe 1 +comput 1 +physic 1 +award 1 +member 1 +academi 1 +art 1 +interest 1 +faculti 1 +dijkstra 0 +dijkstraschlumberg 0 +centenni 0 +chair 0 +sciencesprofessor 0 +mathematicskandidaatsexamen 0 +doctora 0 +examen 0 +theoret 0 +leydenph 0 +amsterdamhonor 0 +awardsacm 0 +ture 0 +foreign 0 +honorari 0 +american 0 +sciencesmemb 0 +royal 0 +netherland 0 +sciencesdistinguish 0 +fellow 0 +british 0 +societyafip 0 +harri 0 +good 0 +memori 0 +doctor 0 +scienc 0 +honori 0 +causa 0 +queen 0 +belfastarea 0 +program 0 +correct 0 +methodolog 0 +algorithm 0 +systemssummari 0 +research 0 +area 0 +focus 0 +streamlin 0 +argumentso 0 +increas 0 +power 0 +reason 0 +particular 0 +ofform 0 +techniqu 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..7323f8f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,60 @@ +comput 1 +edmondson 1 +scienc 1 +chri 1 +yurkanan 1 +univers 1 +network 1 +mobil 1 +protocol 1 +faculti 1 +yurkananlectur 0 +mathemat 0 +texa 0 +austin 0 +profession 0 +servic 0 +secretari 0 +treasur 0 +sigcomm 0 +area 0 +interestcomput 0 +educ 0 +manag 0 +larg 0 +softwar 0 +project 0 +databas 0 +design 0 +summari 0 +researchmi 0 +research 0 +interest 0 +high 0 +speed 0 +commun 0 +specif 0 +internetwork 0 +select 0 +recent 0 +public 0 +cobb 0 +andm 0 +gouda 0 +address 0 +internet 0 +inproceed 0 +annual 0 +theori 0 +informaticsconfer 0 +press 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..cdb82d92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,50 @@ +univers 1 +confer 1 +committe 1 +area 1 +suzi 1 +secondari 1 +educ 1 +comput 1 +scienc 1 +servic 1 +inform 1 +faculti 1 +gallagh 0 +gallagherlectur 0 +coordin 0 +academ 0 +program 0 +loyola 0 +southwestern 0 +louisiana 0 +profession 0 +sigcs 0 +necc 0 +interestcomput 0 +librari 0 +process 0 +summari 0 +researchmi 0 +interest 0 +student 0 +recruit 0 +andretent 0 +women 0 +minor 0 +improv 0 +scienceeduc 0 +school 0 +local 0 +system 0 +retriev 0 +techniqu 0 +addit 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..496fda6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,114 @@ +jenevein 2 +comput 2 +interconnect 1 +optic 1 +system 1 +wafer 1 +fault 1 +network 1 +perform 1 +scale 1 +toler 1 +processor 1 +measur 1 +chemistri 1 +parallel 1 +architectur 1 +recent 1 +work 1 +methodolog 1 +menez 1 +applic 1 +ieee 1 +transact 1 +malek 1 +nest 1 +proceed 1 +intern 1 +confer 1 +engin 1 +faculti 1 +senior 0 +lectur 0 +louisiana 0 +state 0 +univers 0 +orlean 0 +area 0 +interestinterconnect 0 +process 0 +summari 0 +researchmi 0 +research 0 +focus 0 +interconnectionnetwork 0 +success 0 +failur 0 +restsin 0 +abil 0 +devis 0 +appropri 0 +cost 0 +interconnectionstructur 0 +involv 0 +thedevelop 0 +special 0 +kindof 0 +laser 0 +wave 0 +guid 0 +design 0 +beinginvestig 0 +techniqu 0 +lead 0 +parallelsystem 0 +beingappli 0 +buss 0 +communicationswitch 0 +iscontinu 0 +performanceport 0 +across 0 +machin 0 +develop 0 +contrast 0 +tobenchmark 0 +repres 0 +true 0 +memorysystem 0 +select 0 +publicationsr 0 +kyklo 0 +multicomput 0 +strategi 0 +properti 0 +june 0 +laranjeira 0 +predic 0 +scheme 0 +press 0 +ullah 0 +metrix 0 +precis 0 +industri 0 +decemb 0 +johnson 0 +impact 0 +multiprocessor 0 +journal 0 +qualiti 0 +reliabl 0 +octob 0 +campbel 0 +prototyp 0 +integr 0 +januari 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..979362c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,66 @@ +logic 1 +comput 1 +norman 1 +emeritu 1 +philosophi 1 +univers 1 +architectur 1 +closur 1 +space 1 +faculti 1 +martin 0 +martinprofessor 0 +scienc 0 +professor 0 +ofphilosophi 0 +chicago 0 +california 0 +angel 0 +area 0 +interestmathemat 0 +summari 0 +researchmi 0 +current 0 +activ 0 +concentr 0 +abstract 0 +structur 0 +asinterpret 0 +theori 0 +center 0 +whichexploit 0 +notion 0 +deduct 0 +oper 0 +andon 0 +intension 0 +model 0 +classic 0 +mathemat 0 +significantearli 0 +research 0 +design 0 +especi 0 +missil 0 +vehicl 0 +applic 0 +trackingalgorithm 0 +track 0 +scan 0 +radar 0 +function 0 +complet 0 +inmani 0 +valu 0 +delai 0 +metatheori 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..df27f5c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,140 @@ +time 3 +system 2 +real 2 +comput 1 +intern 1 +engin 1 +formal 1 +softwar 1 +proceed 1 +aloysiu 1 +present 1 +method 1 +committe 1 +ieee 1 +control 1 +design 1 +develop 1 +autom 1 +wang 1 +journal 1 +program 1 +symposium 1 +chair 1 +technic 1 +aid 1 +tool 1 +research 1 +robust 1 +respons 1 +specif 1 +confer 1 +decemb 1 +faculti 1 +mokassoci 0 +professorfaculti 0 +fellow 0 +scienc 0 +electr 0 +massachusett 0 +institut 0 +technolog 0 +profession 0 +servic 0 +associ 0 +editor 0 +critic 0 +editori 0 +board 0 +systemdesign 0 +taiwan 0 +vice 0 +work 0 +group 0 +federationof 0 +automat 0 +presentarea 0 +interestfault 0 +toler 0 +hard 0 +architectur 0 +summari 0 +researchi 0 +current 0 +conduct 0 +fundament 0 +area 0 +ofdistribut 0 +primari 0 +concern 0 +includespecif 0 +techniqu 0 +algorithm 0 +forguarante 0 +stringent 0 +constraint 0 +understand 0 +thetrad 0 +criticalsystem 0 +goal 0 +framework 0 +theanalysi 0 +synthesi 0 +applic 0 +areasinclud 0 +robot 0 +avion 0 +industrialprocess 0 +fund 0 +provid 0 +offic 0 +ofnav 0 +highli 0 +environ 0 +forreal 0 +select 0 +recent 0 +publicationsa 0 +toward 0 +mechan 0 +foundat 0 +tilborg 0 +kluwer 0 +academ 0 +publish 0 +heitmey 0 +labaw 0 +clement 0 +case 0 +support 0 +fifth 0 +workshop 0 +montreal 0 +juli 0 +emerson 0 +asynchron 0 +distribut 0 +aptl 0 +melbourn 0 +load 0 +adjust 0 +adapt 0 +antonio 0 +tsou 0 +brown 0 +analysi 0 +bound 0 +nasa 0 +expert 0 +sigsoft 0 +orlean 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..f1d6faf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,75 @@ +program 1 +function 1 +hamilton 1 +richard 1 +engin 1 +interest 1 +concurr 1 +process 1 +languag 1 +faculti 1 +senior 0 +lecturerb 0 +appli 0 +physic 0 +harvard 0 +collegem 0 +aero 0 +astronaut 0 +stanford 0 +universityph 0 +comput 0 +scienc 0 +iowa 0 +state 0 +universityprofession 0 +servicecoordin 0 +univers 0 +texa 0 +austin 0 +year 0 +seri 0 +editor 0 +vol 0 +addison 0 +weslei 0 +area 0 +object 0 +orient 0 +undergradu 0 +educationsummari 0 +research 0 +maintain 0 +long 0 +stand 0 +potentialfor 0 +suitabl 0 +formal 0 +reason 0 +infal 0 +us 0 +teach 0 +sectionof 0 +work 0 +time 0 +permit 0 +implementationof 0 +real 0 +microcomput 0 +applic 0 +longer 0 +term 0 +project 0 +book 0 +onfunct 0 +algorithm 0 +addit 0 +inform 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..3188c30b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,126 @@ +system 2 +databas 2 +silberschatz 1 +ieee 1 +confer 1 +intern 1 +comput 1 +knowledg 1 +area 1 +rastogi 1 +larg 1 +abraham 1 +award 1 +paper 1 +inform 1 +chair 1 +pod 1 +research 1 +parallel 1 +manag 1 +main 1 +process 1 +continu 1 +media 1 +storag 1 +high 1 +perform 1 +transact 1 +data 1 +septemb 1 +ozden 1 +faculti 1 +silberschatzprofessorship 0 +sciencesm 0 +stoni 0 +brookhonor 0 +profession 0 +serviceiee 0 +societi 0 +outstand 0 +journal 0 +advisori 0 +committe 0 +nation 0 +scienc 0 +foundat 0 +divis 0 +robot 0 +intellig 0 +gener 0 +seventh 0 +eighth 0 +sigact 0 +sigmod 0 +symposiumon 0 +principl 0 +organ 0 +ullman 0 +invit 0 +workshop 0 +futureof 0 +program 0 +symposium 0 +distributedsystem 0 +interest 0 +oper 0 +distribut 0 +basedsystemssummari 0 +special 0 +concurr 0 +recentresearch 0 +concentr 0 +multidatabas 0 +transactionmanag 0 +base 0 +real 0 +time 0 +databasesystem 0 +multiresolut 0 +server 0 +select 0 +recent 0 +publicationss 0 +ganguli 0 +tsur 0 +map 0 +datalog 0 +programexecut 0 +network 0 +processor 0 +knowledgeand 0 +engin 0 +june 0 +jagadish 0 +lieuwen 0 +sudarshan 0 +dali 0 +memori 0 +internationalconfer 0 +biliri 0 +cost 0 +storageserv 0 +movi 0 +demand 0 +onveri 0 +framework 0 +storageand 0 +retriev 0 +conferenceon 0 +multimedia 0 +read 0 +fussel 0 +multi 0 +resolut 0 +relationaldata 0 +model 0 +august 0 +addit 0 +obtain 0 +fromindividu 0 +member 0 +home 0 +page 0 +back 0 +list 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..e63c6ef7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,17 @@ +robert 1 +simmon 1 +simmonsquinci 0 +centenni 0 +professor 0 +emeritu 0 +comput 0 +scienc 0 +professoremeritu 0 +psychologymai 0 +novemb 0 +bledso 0 +rememb 0 +back 0 +list 0 +faculti 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..d2882a52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,53 @@ +adam 1 +seligman 1 +home 1 +page 1 +gzip 1 +email 1 +pageadam 0 +click 0 +log 0 +gradual 0 +student 0 +austin 0 +program 0 +aweekli 0 +happi 0 +hour 0 +depart 0 +claim 0 +fame 0 +undergradu 0 +thesi 0 +specifiedth 0 +type 0 +rule 0 +oper 0 +semant 0 +core 0 +avail 0 +fileor 0 +postscript 0 +file 0 +knowwhat 0 +think 0 +touch 0 +utexa 0 +call 0 +pagemart 0 +graphic 0 +phone 0 +number 0 +read 0 +progress 0 +vrml 0 +paper 0 +new 0 +junki 0 +fromreut 0 +yahoo 0 +altern 0 +could 0 +check 0 +nando 0 +time 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..e2adaa6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,30 @@ +univers 1 +scienc 1 +texa 1 +comput 1 +agapito 1 +austin 1 +sustaita 1 +austincognit 0 +interest 0 +machin 0 +learn 0 +languag 0 +acquisit 0 +chill 0 +specif 0 +connection 0 +commonsens 0 +reasoningschoolingph 0 +hopefulli 0 +colleg 0 +station 0 +california 0 +santa 0 +barbara 0 +miscellaneouspost 0 +addressth 0 +depart 0 +mail 0 +utexa 0 +eduphon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..588a47fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,90 @@ +report 1 +artifici 1 +intellig 1 +faculti 1 +autom 1 +theorem 1 +prove 1 +novak 1 +technic 1 +program 1 +robert 1 +kuiper 1 +reason 1 +miikkulainen 1 +base 1 +moonei 1 +laboratoryut 0 +laboratoryth 0 +laboratori 0 +atth 0 +univers 0 +texa 0 +austinha 0 +distinguish 0 +histori 0 +larg 0 +number 0 +excel 0 +andgradu 0 +student 0 +new 0 +world 0 +rank 0 +nation 0 +close 0 +link 0 +comput 0 +scienc 0 +depart 0 +boyer 0 +causei 0 +logic 0 +philosoph 0 +foundat 0 +benjamin 0 +qualit 0 +vladimir 0 +lifschitz 0 +action 0 +risto 0 +neural 0 +network 0 +mirank 0 +rule 0 +system 0 +machin 0 +learn 0 +gordon 0 +automat 0 +physic 0 +problem 0 +solv 0 +bruce 0 +porter 0 +multi 0 +function 0 +knowledg 0 +emeritu 0 +woodi 0 +bledso 0 +deceas 0 +dream 0 +aaai 0 +presidenti 0 +address 0 +simmon 0 +memoriam 0 +postdoc 0 +peter 0 +clark 0 +souther 0 +softwar 0 +directori 0 +current 0 +avail 0 +porterpoint 0 +lab 0 +fund 0 +agenciescontact 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..31a742a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,40 @@ +parallel 1 +program 1 +austin 1 +comput 1 +scienc 1 +work 1 +system 1 +offic 1 +ajita 0 +johnajita 0 +john 0 +candid 0 +group 0 +depart 0 +univers 0 +texa 0 +hello 0 +research 0 +automat 0 +programmingframework 0 +base 0 +constraint 0 +compil 0 +parallelprocedur 0 +advisor 0 +professor 0 +brownemi 0 +papersmi 0 +us 0 +translat 0 +routin 0 +code 0 +want 0 +contact 0 +postal 0 +usavoic 0 +main 0 +taylor 0 +ajohn 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..c4d00204 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,112 @@ +comput 2 +scienc 1 +austin 1 +home 1 +interest 1 +page 1 +almstrum 1 +learn 1 +univers 1 +texa 1 +group 1 +vicki 1 +educ 1 +understand 1 +mathemat 1 +method 1 +teach 1 +uppsala 1 +includ 1 +technolog 1 +special 1 +offic 1 +utexa 1 +utc 0 +almstrumabout 0 +scientist 0 +peopl 0 +particularli 0 +interestedin 0 +logic 0 +formal 0 +doctoralresearch 0 +topic 0 +limit 0 +mathematicallog 0 +novic 0 +student 0 +lectur 0 +addit 0 +ispent 0 +fall 0 +semest 0 +sweden 0 +pagether 0 +link 0 +encourag 0 +other 0 +excel 0 +computersci 0 +garden 0 +travel 0 +craft 0 +sew 0 +woodwork 0 +pictur 0 +hubbi 0 +torgni 0 +stadler 0 +check 0 +site 0 +itics 0 +confer 0 +integr 0 +educationjun 0 +work 0 +june 0 +swedenoth 0 +maintain 0 +class 0 +camp 0 +field 0 +research 0 +evalu 0 +mentor 0 +issu 0 +jump 0 +point 0 +area 0 +suffer 0 +spurt 0 +construct 0 +frenzi 0 +organ 0 +belong 0 +sigcs 0 +educationsigsoft 0 +softwar 0 +engineeringacm 0 +associ 0 +machineryieeeth 0 +institut 0 +electr 0 +electron 0 +engineerscpsrcomput 0 +profession 0 +social 0 +responsibilityconnect 0 +elsewhereto 0 +contact 0 +depart 0 +main 0 +direct 0 +seldom 0 +alwai 0 +connect 0 +need 0 +forewarn 0 +leav 0 +plenti 0 +time 0 +email 0 +address 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..c3af6742 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,15 @@ +anthoni 1 +hing 1 +pang 1 +offic 1 +hung 0 +home 0 +pagehung 0 +hour 0 +mondai 0 +wednesdai 0 +email 0 +utexa 0 +inform 0 +compil 0 +cours 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..f82b59b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,32 @@ +aruna 1 +comput 1 +austin 1 +depart 1 +fall 1 +homepag 0 +addalacurr 0 +graduat 0 +studentth 0 +univers 0 +texa 0 +scienc 0 +taylor 0 +hall 0 +educ 0 +bachelor 0 +engin 0 +sciencess 0 +colleg 0 +engineeringmysorework 0 +experi 0 +lectur 0 +studi 0 +sciencesunivers 0 +mysoreindiai 0 +come 0 +mysor 0 +cityindiato 0 +contact 0 +email 0 +utexa 0 +eduvoic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..9d2c5834 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,21 @@ +ashi 1 +austin 1 +offic 1 +home 0 +page 0 +tarafdarashi 0 +tarafdarabout 0 +get 0 +round 0 +let 0 +exist 0 +known 0 +patienc 0 +pleas 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..0a67efe6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,64 @@ +scienc 1 +comput 1 +bayardo 1 +austin 1 +univers 1 +texa 1 +work 1 +research 1 +mail 1 +address 1 +engin 1 +roberto 0 +home 0 +pageroberto 0 +candid 0 +expect 0 +complet 0 +date 0 +fall 0 +depart 0 +current 0 +also 0 +within 0 +infosleuth 0 +project 0 +interest 0 +queri 0 +process 0 +activ 0 +expert 0 +databas 0 +system 0 +data 0 +mine 0 +constraint 0 +satisfactionmi 0 +thesi 0 +advisor 0 +prof 0 +daniel 0 +mirank 0 +paper 0 +line 0 +along 0 +toolkit 0 +generatingand 0 +solv 0 +exception 0 +hard 0 +instanc 0 +contact 0 +inform 0 +utexa 0 +campu 0 +dept 0 +taylor 0 +hall 0 +histori 0 +electr 0 +center 0 +coordin 0 +number 0 +sinc 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..bb543a15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,40 @@ +bert 1 +refin 1 +imprecis 1 +model 1 +month 1 +address 1 +austin 1 +kayresearch 0 +reason 0 +physic 0 +process 0 +overviewof 0 +research 0 +vitami 0 +network 0 +retriev 0 +paper 0 +dissert 0 +entitl 0 +behavior 0 +abstract 0 +stuffsonia 0 +andnina 0 +page 0 +drink 0 +ofth 0 +springbank 0 +scotchdrinksof 0 +past 0 +contact 0 +informationemail 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..b05c082b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,39 @@ +bhanu 1 +comput 1 +system 1 +home 0 +page 0 +welcom 0 +homepagethi 0 +akhil 0 +reddythank 0 +visit 0 +homepag 0 +visitor 0 +number 0 +school 0 +univers 0 +texa 0 +austinm 0 +scienc 0 +third 0 +semest 0 +coursesc 0 +multimedia 0 +harrick 0 +vinc 0 +introduct 0 +mathemat 0 +logic 0 +vladimir 0 +lifschitz 0 +datacommun 0 +network 0 +anitish 0 +barua 0 +architectur 0 +schwetmani 0 +term 0 +project 0 +databas 0 +manag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..a5a8a080 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,228 @@ +peopl 2 +program 2 +probabl 1 +comput 1 +scienc 1 +risk 1 +would 1 +adopt 1 +make 1 +decis 1 +analog 1 +neuron 1 +process 1 +number 1 +updat 1 +bogon 1 +cogsci 1 +sometim 1 +feel 1 +austin 1 +work 1 +avers 1 +outcom 1 +coin 1 +toss 1 +reject 1 +well 1 +sound 1 +explain 1 +diseas 1 +problem 1 +save 1 +human 1 +brain 1 +wrong 1 +inform 1 +telephon 1 +bogo 1 +bogu 1 +michael 0 +bogomolnymichael 0 +bogomolni 0 +advert 0 +although 0 +pictur 0 +read 0 +articl 0 +current 0 +first 0 +semest 0 +univers 0 +texa 0 +physic 0 +amherst 0 +colleg 0 +research 0 +interestsnot 0 +intend 0 +jenef 0 +husman 0 +final 0 +project 0 +quarter 0 +accept 0 +fair 0 +bet 0 +econom 0 +theori 0 +involv 0 +maxim 0 +util 0 +diminish 0 +return 0 +howev 0 +ask 0 +question 0 +formul 0 +take 0 +prevent 0 +manner 0 +respond 0 +differ 0 +exampl 0 +belov 0 +tverski 0 +kahneman 0 +taken 0 +almost 0 +verbatimfrom 0 +frame 0 +psycholog 0 +choic 0 +imagin 0 +prepar 0 +outbreak 0 +unusu 0 +expect 0 +kill 0 +altern 0 +combat 0 +beenpropos 0 +assum 0 +exact 0 +scientif 0 +estim 0 +consequ 0 +programsar 0 +follow 0 +besav 0 +nobodi 0 +favor 0 +digitalif 0 +made 0 +fire 0 +depend 0 +level 0 +electrochem 0 +charg 0 +built 0 +axon 0 +biolog 0 +foundat 0 +shaki 0 +ahead 0 +scream 0 +hypothesi 0 +transfer 0 +particular 0 +wire 0 +either 0 +high 0 +voltag 0 +interpret 0 +accur 0 +inaccur 0 +simpli 0 +come 0 +answer 0 +mistak 0 +subtract 0 +balanc 0 +checkbook 0 +rememb 0 +invalid 0 +mayb 0 +gave 0 +wasn 0 +real 0 +anoth 0 +stori 0 +nevertheless 0 +hard 0 +press 0 +point 0 +misfir 0 +account 0 +error 0 +correct 0 +lead 0 +incorrect 0 +result 0 +cognit 0 +even 0 +complet 0 +remind 0 +quot 0 +italic 0 +class 0 +append 0 +introduct 0 +cours 0 +graduat 0 +researchcognit 0 +sciencearitifici 0 +intelligencemathemat 0 +logictopolog 0 +ghrist 0 +oper 0 +system 0 +paper 0 +symbol 0 +differenti 0 +puzzl 0 +theorem 0 +prover 0 +contact 0 +email 0 +utexa 0 +better 0 +send 0 +postcard 0 +phone 0 +postal 0 +address 0 +wilshir 0 +parkwai 0 +informationthi 0 +page 0 +written 0 +us 0 +text 0 +editor 0 +last 0 +insert 0 +empti 0 +promis 0 +construct 0 +soon 0 +suppos 0 +list 0 +hidden 0 +talentsdefinit 0 +quantum 0 +bogodynamicsdefinit 0 +sortwhil 0 +free 0 +look 0 +bogos 0 +bogomet 0 +filter 0 +flux 0 +bogotifi 0 +autobogotiphobia 0 +blinkenlight 0 +lasher 0 +pleas 0 +connect 0 +stupid 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..1a1c7044 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,186 @@ +scienc 1 +univers 1 +comput 1 +page 1 +boyer 1 +robert 1 +texa 1 +austin 1 +govern 1 +symbol 1 +mathemat 1 +time 1 +articl 1 +licens 1 +taken 1 +oftexa 1 +fundament 1 +home 0 +boyerhom 0 +stephen 0 +professor 0 +philosophydepart 0 +austinhow 0 +reach 0 +mepap 0 +mail 0 +dept 0 +univ 0 +usaemail 0 +utexa 0 +edufax 0 +physic 0 +locationsclassescurriculum 0 +vitaeperson 0 +dataeducationpublicationshonorsjobsgradu 0 +studentsth 0 +moor 0 +prover 0 +also 0 +knowna 0 +nqthm 0 +photo 0 +recommend 0 +read 0 +project 0 +vote 0 +smart 0 +webth 0 +projectmi 0 +view 0 +undergradu 0 +educ 0 +john 0 +mccarthi 0 +pageth 0 +moffett 0 +build 0 +controversyni 0 +mccune 0 +robbin 0 +algebra 0 +result 0 +andsom 0 +technic 0 +detail 0 +verif 0 +float 0 +point 0 +divis 0 +algorithm 0 +microprocessor 0 +wonder 0 +softwar 0 +polici 0 +permitsth 0 +public 0 +close 0 +zero 0 +administrativeoverhead 0 +short 0 +cours 0 +howthi 0 +work 0 +much 0 +intellectu 0 +properti 0 +thegreat 0 +book 0 +variou 0 +enumer 0 +thereof 0 +confess 0 +acanon 0 +thumper 0 +possibl 0 +end 0 +tenur 0 +universitiesstandard 0 +disclaim 0 +natur 0 +noth 0 +shouldb 0 +repres 0 +offici 0 +posit 0 +part 0 +state 0 +furthermor 0 +steal 0 +joke 0 +peter 0 +deutsch 0 +aweb 0 +own 0 +anind 0 +endors 0 +everyth 0 +formal 0 +method 0 +alwai 0 +riski 0 +peano 0 +first 0 +call 0 +logic 0 +introduc 0 +instanc 0 +mean 0 +habitu 0 +wrote 0 +hislectur 0 +note 0 +teach 0 +militaryacademi 0 +student 0 +incens 0 +hisformalist 0 +approach 0 +rebel 0 +despit 0 +hispromis 0 +pass 0 +fire 0 +subsequ 0 +found 0 +amor 0 +congeni 0 +set 0 +turin 0 +sincomplet 0 +theorem 0 +rudi 0 +rucker 0 +death 0 +verg 0 +extinct 0 +said 0 +harold 0 +kroto 0 +britain 0 +sussex 0 +share 0 +chemistrypr 0 +curl 0 +richard 0 +smallei 0 +rice 0 +inhouston 0 +discoveri 0 +carbon 0 +atom 0 +bound 0 +shape 0 +asocc 0 +ball 0 +scientist 0 +lament 0 +loss 0 +fund 0 +associ 0 +press 0 +decemb 0 +daili 0 +texan 0 +upup 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..d786c98f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,156 @@ +parallel 3 +program 2 +comput 2 +brown 2 +code 2 +environ 2 +languag 1 +visual 1 +high 1 +level 1 +debug 1 +paper 1 +proceed 1 +confer 1 +graphic 1 +hyder 1 +newton 1 +werth 1 +august 1 +process 1 +intern 1 +jame 1 +scienc 1 +professor 1 +physic 1 +engin 1 +texa 1 +fellow 1 +societi 1 +specif 1 +integr 1 +work 1 +gener 1 +structur 1 +model 1 +develop 1 +system 1 +dongarra 1 +moor 1 +ieee 1 +compar 1 +henc 1 +version 1 +base 1 +brownereg 0 +chair 0 +electr 0 +hendrix 0 +collegeph 0 +univers 0 +austinhonor 0 +award 0 +british 0 +american 0 +societyarea 0 +interestparallel 0 +major 0 +focu 0 +sciencewith 0 +applic 0 +area 0 +summari 0 +researchi 0 +tenyear 0 +computation 0 +orient 0 +displai 0 +anabstract 0 +declar 0 +evolv 0 +three 0 +ongo 0 +research 0 +includesmethod 0 +optim 0 +highlevel 0 +abstract 0 +throughdata 0 +partit 0 +data 0 +flow 0 +compositionalapproach 0 +addit 0 +intelligenceprocess 0 +control 0 +fluiddynam 0 +also 0 +design 0 +narrow 0 +domaincompil 0 +includ 0 +logic 0 +basedlanguag 0 +robust 0 +method 0 +intellig 0 +real 0 +timedecis 0 +select 0 +recent 0 +publicationsj 0 +distribut 0 +technolog 0 +spring 0 +volum 0 +number 0 +technic 0 +report 0 +dept 0 +univ 0 +austin 0 +longer 0 +refer 0 +interact 0 +formal 0 +andpract 0 +fourthworkshop 0 +compil 0 +santacruz 0 +california 0 +jain 0 +experiment 0 +studi 0 +theeffect 0 +ofth 0 +siam 0 +mirank 0 +parallelizingcompil 0 +rule 0 +unifi 0 +concurr 0 +kleyn 0 +specifi 0 +graph 0 +softwar 0 +baltimor 0 +april 0 +postscript 0 +file 0 +extend 0 +proc 0 +conf 0 +supercomput 0 +juli 0 +describ 0 +prototyp 0 +implement 0 +notat 0 +chang 0 +idea 0 +remain 0 +good 0 +broad 0 +introduct 0 +brief 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..96f69ecc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,70 @@ +group 1 +vlsi 1 +interest 1 +austin 1 +ping 1 +depart 1 +thakur 1 +area 1 +fpga 1 +architectur 1 +link 1 +inform 1 +comment 1 +utc 0 +home 0 +page 0 +research 0 +addressdepart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +peopl 0 +supervis 0 +prof 0 +martin 0 +wong 0 +member 0 +chang 0 +chung 0 +chenyao 0 +chen 0 +yung 0 +ming 0 +fang 0 +shashidhar 0 +zhou 0 +researchth 0 +current 0 +wide 0 +rang 0 +broadli 0 +classifi 0 +follow 0 +placement 0 +rout 0 +partit 0 +logic 0 +synthesi 0 +issu 0 +high 0 +perform 0 +abstract 0 +recent 0 +public 0 +groupcan 0 +found 0 +trace 0 +sigda 0 +special 0 +design 0 +autom 0 +ieee 0 +institut 0 +electr 0 +electron 0 +engin 0 +austinclick 0 +mail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..d42c6949 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,93 @@ +work 1 +distribut 1 +project 1 +beer 1 +humor 1 +utexa 1 +home 1 +page 1 +bill 0 +canfieldhom 0 +businessmi 0 +resum 0 +postscript 0 +spring 0 +give 0 +report 0 +softwar 0 +highli 0 +avail 0 +system 0 +class 0 +slide 0 +talk 0 +effort 0 +mach 0 +implement 0 +flaviu 0 +cristian 0 +algorithm 0 +done 0 +prof 0 +built 0 +guangtian 0 +current 0 +hardwar 0 +verif 0 +ther 0 +divis 0 +ti 0 +research 0 +professor 0 +allen 0 +emerson 0 +pleasuredomest 0 +bliss 0 +depart 0 +photo 0 +wife 0 +carla 0 +newborn 0 +daughter 0 +ruth 0 +clair 0 +parenthood 0 +struck 0 +travel 0 +high 0 +prioriti 0 +somewher 0 +li 0 +enjoy 0 +peel 0 +label 0 +bottl 0 +foreign 0 +land 0 +variou 0 +sourcesth 0 +sofaspher 0 +haiku 0 +olestra 0 +approv 0 +substitut 0 +speak 0 +poetri 0 +interest 0 +women 0 +disinform 0 +dole 0 +canfield 0 +last 0 +updat 0 +april 0 +thank 0 +todd 0 +peter 0 +peterst 0 +mail 0 +mani 0 +link 0 +andth 0 +pictur 0 +cool 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..67e4fb80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,61 @@ +carruth 1 +austin 1 +uniti 1 +mail 1 +utexa 1 +depart 1 +professor 1 +misra 1 +time 1 +home 1 +page 1 +offic 1 +address 1 +phone 1 +carruthpleas 0 +send 0 +question 0 +suggest 0 +introduct 0 +candid 0 +computersci 0 +supervis 0 +jayadev 0 +mydissert 0 +topic 0 +real 0 +member 0 +research 0 +group 0 +extend 0 +theori 0 +order 0 +express 0 +finit 0 +boundson 0 +usual 0 +oper 0 +progress 0 +safeti 0 +alsointerest 0 +function 0 +program 0 +languag 0 +partial 0 +ordersemant 0 +autom 0 +theorem 0 +prove 0 +contact 0 +inform 0 +person 0 +email 0 +comput 0 +scienc 0 +taylor 0 +hall 0 +univers 0 +texa 0 +link 0 +world 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..5aae6139 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,43 @@ +ping 1 +chung 1 +chen 1 +schedul 1 +utexa 1 +student 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +austin 0 +fiance 0 +meng 0 +tsai 0 +current 0 +intel 0 +summer 0 +intern 0 +work 0 +bufferinsert 0 +problem 0 +syllabu 0 +grade 0 +polici 0 +exam 0 +homework 0 +exercis 0 +offic 0 +hour 0 +locat 0 +new 0 +class 0 +fall 0 +syllabustopicschung 0 +clen 0 +last 0 +updat 0 +idea 0 +improv 0 +page 0 +send 0 +suggest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..56beb207 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,36 @@ +austin 1 +home 1 +deji 1 +anoth 1 +paragraph 1 +offic 1 +page 0 +chen 0 +chenabout 0 +mehello 0 +homepag 0 +student 0 +tongji 0 +univers 0 +shanghai 0 +chinaa 0 +bullet 0 +list 0 +easi 0 +includ 0 +well 0 +first 0 +item 0 +third 0 +forget 0 +break 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usahom 0 +lake 0 +blvd 0 +usaphon 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..6c2f16c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,139 @@ +cliff 1 +comput 1 +univers 1 +austin 1 +write 1 +system 1 +scienc 1 +educ 1 +macintosh 1 +program 1 +symbol 1 +mstk 1 +plai 1 +chaputcliff 0 +chaputth 0 +texa 0 +austindepart 0 +sciencestaylor 0 +hall 0 +robotlab 0 +home 0 +dullchaput 0 +utexa 0 +studi 0 +northwestern 0 +gothimself 0 +emploi 0 +programm 0 +anywai 0 +spent 0 +year 0 +anemail 0 +client 0 +portabl 0 +visual 0 +object 0 +librari 0 +odesta 0 +corpor 0 +left 0 +institut 0 +thelearn 0 +hewrot 0 +trane 0 +softwar 0 +common 0 +lisp 0 +thenimpl 0 +simul 0 +environ 0 +high 0 +school 0 +studentscal 0 +gamesproject 0 +graduat 0 +student 0 +hang 0 +robot 0 +labannoi 0 +peopl 0 +hair 0 +brain 0 +scheme 0 +mean 0 +represent 0 +artifici 0 +life 0 +sleep 0 +dream 0 +read 0 +fiction 0 +listen 0 +farka 0 +tour 0 +medeski 0 +martin 0 +wood 0 +watch 0 +rerun 0 +korg 0 +ride 0 +bike 0 +turnon 0 +includ 0 +breakfast 0 +version 0 +raspi 0 +voic 0 +starfleet 0 +captain 0 +turnoff 0 +republican 0 +microsoft 0 +hangov 0 +fave 0 +site 0 +current 0 +eventsdaili 0 +new 0 +reutersintellicast 0 +weatheraustin 0 +txchicago 0 +ilperiodicalssucksalonmirski 0 +worst 0 +webth 0 +onionmacweekmacuserreferencehypertext 0 +webster 0 +interfaceyahooalta 0 +vistacardiff 0 +movi 0 +databaselyco 0 +road 0 +mapalt 0 +culturemacintosh 0 +dataappl 0 +computercyberdogquicktimequickdraw 0 +dappl 0 +supportmacintouchmacintosh 0 +resourcecyberdog 0 +poundinfo 0 +archiv 0 +rootcool 0 +weird 0 +stufffringewareth 0 +actlabpbsnprnow 0 +catch 0 +phrase 0 +catalogpap 0 +softwareth 0 +rsumsymbol 0 +emerg 0 +groundingrobotmap 0 +peopledav 0 +falooncharl 0 +lewisjeff 0 +lindjeff 0 +sherwoodbrian 0 +slatorsandi 0 +stone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..7d9a4b73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,104 @@ +page 1 +chuanjun 1 +wang 1 +place 1 +china 1 +check 1 +texa 1 +austin 1 +view 1 +like 1 +find 1 +search 1 +diamond 1 +homepag 0 +welcom 0 +pictur 0 +captur 0 +gloriou 0 +moment 0 +came 0 +earth 0 +stun 0 +detail 0 +origin 0 +come 0 +orient 0 +countri 0 +call 0 +know 0 +hometown 0 +hubei 0 +provinc 0 +graduat 0 +student 0 +tsinghua 0 +univ 0 +decid 0 +time 0 +chang 0 +better 0 +thought 0 +end 0 +beautifulunivers 0 +current 0 +work 0 +comput 0 +scienc 0 +take 0 +break 0 +read 0 +enjoi 0 +tower 0 +opinion 0 +nifti 0 +thing 0 +televis 0 +surf 0 +mind 0 +numb 0 +faceless 0 +howev 0 +brilliant 0 +us 0 +materi 0 +inform 0 +miner 0 +among 0 +million 0 +rock 0 +unemploi 0 +internet 0 +philosoph 0 +well 0 +person 0 +look 0 +real 0 +unix 0 +program 0 +magazin 0 +jump 0 +dobb 0 +journal 0 +word 0 +need 0 +fresh 0 +world 0 +hard 0 +fine 0 +graphic 0 +design 0 +unusu 0 +prose 0 +cours 0 +list 0 +would 0 +complet 0 +without 0 +link 0 +pope 0 +porsch 0 +write 0 +return 0 +depart 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..f615b8d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,103 @@ +comput 1 +scienc 1 +inform 1 +taiwan 1 +univers 1 +texa 1 +austin 1 +research 1 +process 1 +system 1 +tag 1 +user 1 +welcom 0 +homepag 0 +chin 0 +tser 0 +huang 0 +last 0 +updat 0 +decemb 0 +educ 0 +june 0 +degre 0 +dept 0 +engin 0 +nation 0 +taipei 0 +current 0 +master 0 +student 0 +depart 0 +interest 0 +natur 0 +languag 0 +human 0 +interfac 0 +network 0 +distribut 0 +systemsexperiencei 0 +ever 0 +work 0 +chines 0 +knowledg 0 +group 0 +instituteof 0 +academia 0 +sinica 0 +assist 0 +major 0 +worki 0 +design 0 +capabl 0 +word 0 +segment 0 +categori 0 +usinghidden 0 +markov 0 +model 0 +improv 0 +friendli 0 +tool 0 +allow 0 +toexecut 0 +line 0 +proof 0 +read 0 +result 0 +automat 0 +automatictag 0 +reach 0 +accuraci 0 +improvedbecaus 0 +continu 0 +expans 0 +train 0 +data 0 +person 0 +interestsmovi 0 +book 0 +music 0 +literatur 0 +semiolog 0 +basebal 0 +basketbal 0 +tabl 0 +tenni 0 +pinbal 0 +favorit 0 +siteschina 0 +timesminsheng 0 +dailyth 0 +york 0 +timesusa 0 +todayth 0 +economistth 0 +atlant 0 +monthlymak 0 +contact 0 +chuang 0 +utexa 0 +edufing 0 +meyou 0 +visitor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..8ccdfb9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,23 @@ +page 1 +cilkcilkcilk 0 +pronounc 0 +silk 0 +parallel 0 +multithread 0 +base 0 +languageand 0 +runtim 0 +system 0 +find 0 +time 0 +us 0 +inform 0 +inthi 0 +check 0 +thecilk 0 +last 0 +modifi 0 +august 0 +robert 0 +blumoferdb 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..8eb93622 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,33 @@ +chung 1 +keung 1 +comput 1 +poon 0 +home 0 +page 0 +poondepart 0 +sciencesunivers 0 +texa 0 +austinaustin 0 +offic 0 +ckpoon 0 +utexa 0 +edumi 0 +plan 0 +hungri 0 +fish 0 +askvinc 0 +gogan 0 +pleas 0 +thesi 0 +complex 0 +connect 0 +problemsom 0 +interest 0 +site 0 +theoret 0 +scienc 0 +hong 0 +kong 0 +harmonica 0 +high 0 +school 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..94184777 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,62 @@ +page 1 +comput 1 +austin 1 +secur 1 +home 1 +project 1 +student 1 +scienc 1 +research 1 +link 1 +utexa 1 +offic 1 +chung 0 +wongchung 0 +wonglast 0 +modifi 0 +graduat 0 +thedepart 0 +univers 0 +texa 0 +member 0 +thenetwork 0 +labwhich 0 +head 0 +byprof 0 +simon 0 +relat 0 +java 0 +nist 0 +divis 0 +resourc 0 +clearinghous 0 +role 0 +base 0 +access 0 +control 0 +rbac 0 +prof 0 +rivest 0 +cryptographi 0 +contact 0 +meemail 0 +ckwong 0 +edupost 0 +usavoic 0 +dept 0 +hyde 0 +park 0 +baptist 0 +church 0 +chines 0 +mission 0 +hong 0 +kong 0 +associ 0 +linux 0 +netbsd 0 +freebsd 0 +openbsd 0 +send 0 +email 0 +tockwong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..f6f8f396 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,75 @@ +simul 1 +techniqu 1 +address 1 +clanci 1 +qualit 1 +knowledg 1 +comput 1 +behavior 1 +system 1 +larg 1 +abstract 1 +aggreg 1 +interest 1 +develop 1 +automat 1 +austin 1 +clancyresearch 0 +reason 0 +us 0 +incomplet 0 +descriptionof 0 +possibl 0 +dynam 0 +complex 0 +containinga 0 +number 0 +variabl 0 +constraint 0 +frequentlyi 0 +intract 0 +result 0 +incomprehens 0 +descript 0 +requir 0 +simulationto 0 +elimin 0 +irrelev 0 +detail 0 +focu 0 +distinctionsof 0 +whichaddress 0 +problem 0 +particular 0 +abstractiontechniqu 0 +appli 0 +thiswil 0 +facilit 0 +integr 0 +withlarg 0 +scale 0 +base 0 +model 0 +build 0 +followingtechniqu 0 +issu 0 +vita 0 +list 0 +network 0 +retriev 0 +real 0 +paper 0 +contact 0 +informationemail 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +depart 0 +scienc 0 +univers 0 +texa 0 +finger 0 +inform 0 +hotlist 0 +netscap 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..22e22896 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,11 @@ +page 0 +construct 0 +jimbo 0 +click 0 +three 0 +four 0 +five 0 +seven 0 +eight 0 +nine 0 +eleven 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..84f552a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,38 @@ +austin 1 +chri 1 +home 1 +address 1 +depart 1 +comput 1 +scienc 1 +chines 1 +chuwelcom 0 +page 0 +myselfmi 0 +photo 0 +student 0 +phone 0 +number 0 +call 0 +offic 0 +mail 0 +univers 0 +texa 0 +taylor 0 +hall 0 +interest 0 +link 0 +campu 0 +christian 0 +fellowship 0 +church 0 +hong 0 +kong 0 +china 0 +author 0 +chuemail 0 +cnchu 0 +utexa 0 +edulast 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..eabc541e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,179 @@ +code 3 +program 2 +parallel 2 +mail 1 +user 1 +version 1 +system 1 +list 1 +emeri 1 +visual 1 +click 1 +tutori 1 +releas 1 +avail 1 +softwar 1 +interfac 1 +line 1 +document 1 +manual 1 +member 1 +scienc 1 +produc 1 +architectur 1 +model 1 +screen 1 +shot 1 +base 1 +announc 1 +download 1 +featur 1 +provid 1 +previou 1 +includ 1 +articl 1 +comput 1 +us 1 +name 1 +last 1 +address 1 +relat 1 +postscript 1 +public 1 +contact 1 +send 1 +utexa 1 +austin 1 +research 1 +john 1 +systemmast 0 +lawless 0 +codeless 0 +myriad 0 +preced 0 +wilder 0 +singl 0 +instanc 0 +alfr 0 +lord 0 +tennysoncod 0 +allow 0 +compos 0 +sequentialprogram 0 +direct 0 +graph 0 +wheredata 0 +flow 0 +arc 0 +connect 0 +node 0 +repres 0 +sequenti 0 +thesequenti 0 +written 0 +languag 0 +parallelprogram 0 +varieti 0 +independ 0 +network 0 +machin 0 +well 0 +sequent 0 +symmetri 0 +newest 0 +support 0 +crai 0 +smp 0 +free 0 +major 0 +revis 0 +sophist 0 +mani 0 +improv 0 +make 0 +easier 0 +pleasant 0 +like 0 +macdraw 0 +multipl 0 +window 0 +subgraph 0 +edit 0 +hierarchi 0 +browser 0 +hpcwire 0 +journal 0 +high 0 +perform 0 +recent 0 +publish 0 +entitl 0 +come 0 +kind 0 +enough 0 +reproduc 0 +introduct 0 +current 0 +prospect 0 +notifi 0 +backend 0 +join 0 +fill 0 +form 0 +also 0 +ad 0 +first 0 +xcodelib 0 +compon 0 +librari 0 +publicationscod 0 +construct 0 +directori 0 +compress 0 +file 0 +made 0 +lieu 0 +prepar 0 +stage 0 +despit 0 +chang 0 +still 0 +quit 0 +refer 0 +link 0 +informationfor 0 +specif 0 +comment 0 +regard 0 +berger 0 +snail 0 +group 0 +depart 0 +univers 0 +texa 0 +groupgroup 0 +leaderprofessor 0 +jame 0 +brown 0 +affili 0 +faculti 0 +werth 0 +project 0 +manag 0 +bergerstud 0 +dwip 0 +banerje 0 +incorpor 0 +dynam 0 +data 0 +partit 0 +ajita 0 +develop 0 +constraint 0 +automat 0 +alumni 0 +overview 0 +home 0 +page 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..45486c4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,35 @@ +search 1 +austin 1 +inform 1 +address 1 +mail 1 +correl 1 +utexa 1 +texa 1 +home 1 +steve 0 +correlstev 0 +correlresearchph 0 +student 0 +work 0 +multifunct 0 +knowledg 0 +base 0 +group 0 +addit 0 +current 0 +construct 0 +hotlist 0 +site 0 +page 0 +email 0 +public 0 +tech 0 +reportcontact 0 +offic 0 +comput 0 +scienc 0 +depart 0 +univers 0 +taylor 0 +hall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..d4a8964c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,75 @@ +time 1 +system 1 +real 1 +program 1 +languag 1 +group 1 +robot 1 +esterel 1 +check 1 +home 1 +page 1 +driver 1 +offic 1 +carlo 0 +pucholcarlo 0 +pucholresearch 0 +interest 0 +respons 0 +reactiv 0 +gener 0 +formal 0 +method 0 +specif 0 +implement 0 +distribut 0 +control 0 +synchron 0 +mawl 0 +applic 0 +develop 0 +utc 0 +publicationsi 0 +list 0 +public 0 +avail 0 +forbrows 0 +softwareth 0 +tempest 0 +toolset 0 +packag 0 +verifyingsafeti 0 +properti 0 +written 0 +wrote 0 +half 0 +linux 0 +devic 0 +thequantavisionfram 0 +grabber 0 +part 0 +thejoystickdevic 0 +contact 0 +informationoffic 0 +dreal 0 +taylor 0 +hall 0 +univers 0 +texa 0 +austindepart 0 +comput 0 +sciencesaustin 0 +utexa 0 +austin 0 +lot 0 +phun 0 +interestsmemb 0 +theth 0 +latest 0 +interesti 0 +origin 0 +fromgandia 0 +inth 0 +provinc 0 +valencia 0 +spain 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..4488f89a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,79 @@ +time 2 +real 2 +modechart 1 +system 1 +properti 1 +wang 1 +research 1 +group 1 +work 1 +foundat 1 +utc 0 +groupth 0 +head 0 +byprof 0 +aloysiu 0 +past 0 +year 0 +toward 0 +lai 0 +groundworkfor 0 +establish 0 +firm 0 +theoret 0 +systemsand 0 +also 0 +build 0 +design 0 +tool 0 +base 0 +canb 0 +categor 0 +three 0 +area 0 +follow 0 +specif 0 +model 0 +precis 0 +formul 0 +analysi 0 +verif 0 +reason 0 +synthesi 0 +enforc 0 +stringent 0 +constraint 0 +project 0 +logic 0 +toolset 0 +editor 0 +verifi 0 +simul 0 +compil 0 +timetool 0 +scenario 0 +languagepublicationsabstract 0 +ofth 0 +paper 0 +availableonlin 0 +postscript 0 +current 0 +member 0 +deji 0 +chen 0 +carlo 0 +puchol 0 +doug 0 +stuart 0 +chung 0 +tsou 0 +guangtian 0 +yangalumni 0 +paul 0 +clement 0 +chih 0 +farn 0 +supoj 0 +suthandavibul 0 +farnam 0 +jahanian 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..897f7af7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,118 @@ +comput 2 +page 2 +scienc 1 +learn 1 +research 1 +intellig 1 +texa 1 +artifici 1 +system 1 +inform 1 +univers 1 +austin 1 +resourc 1 +home 1 +women 1 +interest 1 +machin 1 +acquisit 1 +lexic 1 +also 1 +robot 1 +address 1 +internet 1 +collect 1 +associ 1 +cindi 0 +thompsoncindi 0 +thompsonmachin 0 +groupunivers 0 +austini 0 +particip 0 +candlelight 0 +vigil 0 +across 0 +internetto 0 +help 0 +increas 0 +awar 0 +violenc 0 +researchmi 0 +current 0 +primarilyin 0 +area 0 +specif 0 +interestedin 0 +natur 0 +languag 0 +produc 0 +deep 0 +semanticrepresent 0 +input 0 +sentenc 0 +would 0 +us 0 +mani 0 +task 0 +propos 0 +corpu 0 +base 0 +wrote 0 +master 0 +thesi 0 +rule 0 +suitabl 0 +diagnost 0 +expert 0 +mobil 0 +exhibit 0 +atrobofest 0 +spring 0 +semest 0 +build 0 +agent 0 +finger 0 +pictur 0 +vita 0 +list 0 +public 0 +group 0 +educ 0 +north 0 +carolina 0 +state 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +cthomp 0 +utexa 0 +postal 0 +depart 0 +hotlist 0 +start 0 +point 0 +explor 0 +misc 0 +consortium 0 +bibliographi 0 +project 0 +repositori 0 +knowledg 0 +laboratori 0 +georgia 0 +tech 0 +journal 0 +linguist 0 +folk 0 +cognit 0 +miscellan 0 +stuff 0 +wolv 0 +truth 0 +evalu 0 +counsel 0 +expand 0 +horizon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..dc9b9a2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,18 @@ +home 1 +page 1 +xingshan 1 +welcom 0 +browser 0 +doesn 0 +seem 0 +support 0 +frame 0 +want 0 +downloadth 0 +latest 0 +netscap 0 +school 0 +work 0 +famili 0 +friend 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..23cf5178 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,71 @@ +comput 1 +scienc 1 +univers 1 +mike 1 +dahlin 1 +texa 1 +berkelei 1 +oper 1 +trend 1 +price 1 +page 1 +taylor 1 +hall 1 +dahlingener 0 +informationassist 0 +professor 0 +austin 0 +electr 0 +engin 0 +rice 0 +teachingfal 0 +systemsspr 0 +advanc 0 +architectureeveryon 0 +read 0 +technic 0 +classic 0 +researchxf 0 +serverless 0 +network 0 +file 0 +systemweb 0 +systemsth 0 +experiment 0 +softwar 0 +system 0 +less 0 +public 0 +list 0 +informationtechnolog 0 +pagethi 0 +pagesummar 0 +recent 0 +technolog 0 +interest 0 +operatingsystem 0 +research 0 +compter 0 +architect 0 +includinghistor 0 +data 0 +gather 0 +capac 0 +disk 0 +memori 0 +person 0 +informationif 0 +seem 0 +bore 0 +probabl 0 +want 0 +work 0 +internet 0 +root 0 +link 0 +world 0 +email 0 +utexa 0 +offic 0 +postal 0 +austinaustin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..641536c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,50 @@ +damani 1 +distribut 1 +home 0 +page 0 +howdi 0 +pagal 0 +dekho 0 +student 0 +busi 0 +read 0 +lazi 0 +updateth 0 +homepag 0 +regularli 0 +suffic 0 +anyth 0 +crazi 0 +appeal 0 +phrase 0 +us 0 +probabl 0 +meant 0 +research 0 +activ 0 +work 0 +parallel 0 +sytem 0 +laboratori 0 +vijai 0 +garg 0 +interest 0 +system 0 +network 0 +public 0 +follow 0 +time 0 +honor 0 +tradit 0 +feel 0 +oblig 0 +providesometh 0 +servic 0 +contact 0 +mehom 0 +guadulp 0 +austin 0 +offic 0 +austinphon 0 +dept 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..1726f5e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,55 @@ +austin 1 +texa 1 +research 1 +address 1 +dane 1 +depart 1 +comput 1 +scienc 1 +univers 1 +system 1 +pinbal 1 +view 1 +marshalldan 0 +marshal 0 +student 0 +multiresolut 0 +render 0 +autom 0 +model 0 +tree 0 +real 0 +time 0 +global 0 +illumin 0 +electromechan 0 +machin 0 +mainten 0 +thelogist 0 +equat 0 +escap 0 +attractor 0 +complex 0 +plane 0 +main 0 +area 0 +make 0 +nice 0 +imag 0 +contact 0 +inform 0 +work 0 +appli 0 +laboratori 0 +burnet 0 +phone 0 +email 0 +utexa 0 +school 0 +unrel 0 +link 0 +pastur 0 +jupit 0 +probe 0 +happi 0 +station 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..883fe0b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,67 @@ +scienc 1 +stuart 1 +page 1 +sure 1 +well 1 +us 1 +link 1 +provid 1 +save 1 +doug 0 +welcom 0 +pagedoug 0 +home 0 +pagewelcom 0 +construct 0 +bear 0 +number 0 +oflinksto 0 +interest 0 +place 0 +inform 0 +aboutsport 0 +fiction 0 +booksin 0 +gener 0 +fewjok 0 +testof 0 +latexhtml 0 +aweath 0 +mapandcondit 0 +austinandnew 0 +orlean 0 +guess 0 +sort 0 +person 0 +archiv 0 +amgraci 0 +share 0 +perhap 0 +process 0 +index 0 +puttingit 0 +simpl 0 +keep 0 +webbrows 0 +databas 0 +browser 0 +know 0 +thisi 0 +good 0 +idea 0 +go 0 +someth 0 +justa 0 +easi 0 +access 0 +manner 0 +stuffmom 0 +click 0 +comput 0 +calendarlink 0 +video 0 +fictionbooksjokessportsfoodvideout 0 +libraryresumelast 0 +updat 0 +dasdastuart 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..f5489a3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,4 @@ +doug 0 +swhich 0 +annoi 0 +thisorthi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..8ffdeb9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,57 @@ +austin 1 +comput 1 +scienc 1 +univers 1 +texa 1 +home 1 +page 1 +neural 1 +network 1 +genet 1 +algorithm 1 +depart 1 +research 1 +universityof 1 +address 1 +link 1 +utc 1 +dian 0 +lawdian 0 +student 0 +intereststh 0 +symbol 0 +ground 0 +problemnavig 0 +robot 0 +agent 0 +us 0 +evolv 0 +theus 0 +educ 0 +spanish 0 +literatur 0 +washingtonst 0 +fine 0 +art 0 +washington 0 +stateunivers 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +dianelaw 0 +utexa 0 +postal 0 +local 0 +homepag 0 +gann 0 +illig 0 +santa 0 +institut 0 +digest 0 +archiv 0 +michigan 0 +group 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..1c5108bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,47 @@ +comput 1 +student 1 +depart 1 +dionisi 1 +scienc 1 +engin 1 +univers 1 +patra 1 +greec 1 +austin 1 +home 0 +page 0 +papadopoulosdionisi 0 +papadopoulosabout 0 +graduat 0 +undergradu 0 +informat 0 +also 0 +work 0 +technolog 0 +institut 0 +member 0 +softwar 0 +applic 0 +reasearch 0 +unit 0 +contact 0 +medionisi 0 +papadopoulo 0 +texa 0 +mail 0 +utexa 0 +link 0 +mine 0 +monitor 0 +databas 0 +homework 0 +panhellen 0 +associationpanathinaiko 0 +athlet 0 +clubgreek 0 +newshellen 0 +resourc 0 +networkeveryth 0 +alwai 0 +want 0 +know 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..10ee97ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,119 @@ +random 2 +version 2 +stoc 1 +preliminari 1 +comput 1 +appear 1 +texa 1 +austin 1 +scienc 1 +univers 1 +inform 1 +address 1 +complex 1 +expand 1 +applic 1 +graph 1 +report 1 +public 1 +construct 1 +structur 1 +algorithm 1 +simul 1 +weak 1 +sourc 1 +foc 1 +revis 1 +bound 1 +combinatorica 1 +sicomp 1 +complet 1 +david 0 +zuckermandavid 0 +zuckermanassist 0 +professor 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +finger 0 +cours 0 +fall 0 +cryptographyresearch 0 +intereststh 0 +role 0 +theori 0 +walk 0 +cryptographi 0 +paragraph 0 +descript 0 +well 0 +myprofil 0 +annual 0 +also 0 +look 0 +recent 0 +asymptot 0 +good 0 +code 0 +correct 0 +insert 0 +delet 0 +transposit 0 +soda 0 +optim 0 +sampl 0 +extractor 0 +leader 0 +elect 0 +multipl 0 +cover 0 +time 0 +linear 0 +space 0 +jcss 0 +call 0 +determinist 0 +logspac 0 +us 0 +gener 0 +algorithmica 0 +tight 0 +analys 0 +local 0 +load 0 +balanc 0 +derandom 0 +product 0 +beat 0 +eigenvalu 0 +explicit 0 +constructionand 0 +utc 0 +technic 0 +effici 0 +small 0 +hit 0 +setfor 0 +combinatori 0 +rectangl 0 +high 0 +dimens 0 +lower 0 +mutual 0 +exclus 0 +unapproxim 0 +problem 0 +list 0 +abstract 0 +visit 0 +page 0 +sinc 0 +april 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..81fb0a67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,70 @@ +multimedia 2 +laboratori 1 +research 1 +distribut 1 +comput 1 +univers 1 +texa 1 +austin 1 +multimediacomput 1 +scienc 1 +sponsor 1 +foundat 1 +paper 1 +home 0 +page 0 +welcom 0 +main 0 +object 0 +investig 0 +wide 0 +rangeof 0 +issu 0 +area 0 +system 0 +currentresearch 0 +focus 0 +design 0 +storag 0 +server 0 +network 0 +transport 0 +protocol 0 +digit 0 +audio 0 +video 0 +andmultiresolut 0 +databas 0 +dmcl 0 +part 0 +departmentof 0 +work 0 +carri 0 +variou 0 +industri 0 +federalinstitut 0 +includ 0 +intel 0 +nation 0 +nasa 0 +microsoft 0 +mitsubishi 0 +electr 0 +merl 0 +microsystemsinc 0 +tabl 0 +content 0 +agenda 0 +relev 0 +technic 0 +report 0 +list 0 +member 0 +call 0 +would 0 +like 0 +hear 0 +send 0 +yourcom 0 +suggest 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..7f85f673 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,85 @@ +domain 1 +softwar 1 +research 1 +gener 1 +extens 1 +languag 1 +program 1 +student 1 +specif 1 +design 1 +current 1 +build 1 +jakarta 1 +project 1 +univers 1 +texa 1 +offic 1 +address 1 +austin 1 +batorydon 0 +batorysoftwar 0 +improv 0 +programm 0 +product 0 +reduc 0 +mainten 0 +cost 0 +enhanc 0 +applic 0 +perform 0 +investig 0 +wai 0 +realiz 0 +practic 0 +compon 0 +base 0 +methodolog 0 +technolog 0 +larg 0 +scale 0 +system 0 +synthesi 0 +span 0 +topic 0 +architectur 0 +pattern 0 +subject 0 +model 0 +parameter 0 +object 0 +orient 0 +framework 0 +interest 0 +databas 0 +manag 0 +data 0 +structur 0 +avion 0 +support 0 +goal 0 +preprocessor 0 +java 0 +would 0 +encapsul 0 +pluggabl 0 +fund 0 +darpa 0 +microsoft 0 +appli 0 +laboratori 0 +schlumberg 0 +public 0 +contact 0 +inform 0 +taylor 0 +hall 0 +email 0 +batori 0 +utexa 0 +phone 0 +number 0 +postal 0 +depart 0 +comput 0 +scienc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..ac405425 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,41 @@ +dwip 1 +austin 1 +info 1 +parallel 1 +usavoic 1 +offic 1 +home 0 +page 0 +banerje 0 +photograph 0 +banerjeeabout 0 +methi 0 +work 0 +code 0 +programminggroup 0 +methodolog 0 +includ 0 +data 0 +partit 0 +graphicalparallel 0 +program 0 +system 0 +paper 0 +present 0 +theintern 0 +process 0 +symposium 0 +list 0 +favorit 0 +site 0 +insert 0 +know 0 +contact 0 +departmentpost 0 +comput 0 +scienc 0 +main 0 +homepost 0 +enfield 0 +road 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..22574a17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,50 @@ +anoth 1 +emilio 1 +camahort 1 +gurrea 1 +promis 1 +home 1 +page 1 +paragraph 1 +austin 1 +offic 1 +set 0 +decent 0 +summer 0 +mmmmm 0 +multipl 0 +complaint 0 +meet 0 +previou 0 +deadlin 0 +come 0 +excus 0 +know 0 +siggraph 0 +paper 0 +finish 0 +januari 0 +thing 0 +think 0 +bout 0 +right 0 +make 0 +time 0 +els 0 +lose 0 +credibl 0 +left 0 +first 0 +item 0 +third 0 +forget 0 +break 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +taylor 0 +usavoic 0 +main 0 +ecamahor 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..e448d546 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,32 @@ +posnak 1 +austin 1 +system 1 +research 1 +view 1 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +texa 0 +interest 0 +network 0 +oper 0 +support 0 +multimedia 0 +work 0 +distribut 0 +multimediacomput 0 +laboratori 0 +head 0 +harrick 0 +supervis 0 +greg 0 +lavend 0 +isod 0 +consortium 0 +base 0 +summari 0 +public 0 +utexa 0 +eduphon 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..4bd27352 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,73 @@ +emerson 2 +comput 1 +tempor 1 +scienc 1 +logic 1 +real 1 +time 1 +allen 1 +texa 1 +austin 1 +automata 1 +srinivasan 1 +journal 1 +calculu 1 +emersonbruton 0 +centenni 0 +professor 0 +depart 0 +taylor 0 +hall 0 +univers 0 +mail 0 +utexa 0 +phone 0 +direct 0 +secretari 0 +area 0 +research 0 +interest 0 +formal 0 +method 0 +aid 0 +verif 0 +infinit 0 +object 0 +concurr 0 +distribut 0 +systemsselect 0 +recent 0 +publications 0 +sistla 0 +quantit 0 +reason 0 +system 0 +sadler 0 +effici 0 +satisfi 0 +theori 0 +practic 0 +bakker 0 +york 0 +springer 0 +verlag 0 +lectur 0 +note 0 +jutla 0 +tree 0 +determinaci 0 +annual 0 +ieee 0 +symposium 0 +foundat 0 +foc 0 +juan 0 +modal 0 +handbook 0 +theoret 0 +leeuwen 0 +elsevi 0 +press 0 +amsterdam 0 +cambridg 0 +mass 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..4044ec19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,103 @@ +program 2 +code 1 +emeri 1 +work 1 +parallel 1 +berger 1 +home 1 +mail 1 +austin 1 +utexa 1 +system 1 +group 1 +page 1 +function 1 +info 1 +comput 1 +scienc 1 +analyst 1 +groupi 1 +visual 1 +inform 1 +ticam 1 +composit 1 +materi 1 +haskel 1 +doug 1 +evangelist 1 +pageemeri 0 +person 0 +contact 0 +address 0 +dept 0 +taylor 0 +hall 0 +univers 0 +texa 0 +phone 0 +research 0 +also 0 +affili 0 +project 0 +click 0 +name 0 +tool 0 +search 0 +lyco 0 +databas 0 +file 0 +randomli 0 +select 0 +mirror 0 +near 0 +view 0 +unix 0 +user 0 +academ 0 +uttr 0 +abstract 0 +languag 0 +add 0 +object 0 +orient 0 +us 0 +concept 0 +known 0 +type 0 +class 0 +pure 0 +framework 0 +paper 0 +describ 0 +extens 0 +analyz 0 +accomplish 0 +well 0 +problem 0 +compress 0 +postscript 0 +html 0 +othermi 0 +youngest 0 +brother 0 +aspir 0 +artist 0 +graphic 0 +handiwork 0 +linksth 0 +systemtexbook 0 +textbook 0 +exchangegrac 0 +graduat 0 +repres 0 +associ 0 +last 0 +updat 0 +octob 0 +believ 0 +macintosh 0 +check 0 +http 0 +macaddict 0 +join 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..035e8a36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,108 @@ +emma 1 +univers 1 +austin 1 +page 1 +comput 1 +scienc 1 +china 1 +home 1 +depart 1 +texa 1 +zhongshan 1 +librari 1 +wuabout 0 +myselfhi 0 +welcom 0 +chines 0 +girl 0 +come 0 +august 0 +studi 0 +interest 0 +immedi 0 +degre 0 +becam 0 +market 0 +repres 0 +inibm 0 +compani 0 +south 0 +branch 0 +try 0 +deliv 0 +solut 0 +small 0 +planet 0 +costom 0 +telecommun 0 +media 0 +industri 0 +would 0 +surpris 0 +find 0 +manyalumni 0 +enter 0 +alumni 0 +club 0 +nice 0 +thing 0 +attend 0 +graduat 0 +school 0 +lot 0 +intern 0 +opportun 0 +engin 0 +student 0 +semest 0 +work 0 +part 0 +time 0 +programm 0 +nation 0 +instrumentsinc 0 +cours 0 +schedul 0 +spring 0 +distribut 0 +computingmanag 0 +informationautomat 0 +program 0 +tool 0 +baseyahoogalaxi 0 +onlin 0 +universityyellow 0 +mini 0 +introduct 0 +us 0 +fortran 0 +tutori 0 +infoleisur 0 +timenewspagepeopl 0 +dailyartstim 0 +magazinechines 0 +magazinepc 0 +magazinec 0 +visit 0 +orlean 0 +houston 0 +antoniosan 0 +franciscomarina 0 +peac 0 +citysan 0 +jose 0 +capit 0 +silicon 0 +vallei 0 +love 0 +francisco 0 +contact 0 +pointemail 0 +emmawu 0 +utexa 0 +eduphon 0 +mail 0 +last 0 +date 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..0ebbe4bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,30 @@ +page 1 +emilio 1 +first 1 +anoth 1 +paragraph 1 +austin 1 +offic 1 +home 0 +remolinaemilio 0 +remolinaabout 0 +mehi 0 +bullet 0 +list 0 +easi 0 +includ 0 +well 0 +item 0 +third 0 +forget 0 +break 0 +curriculum 0 +vita 0 +contact 0 +mepost 0 +comput 0 +scienc 0 +usavoic 0 +main 0 +eremolin 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..f8ee9e3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,26 @@ +levent 1 +home 1 +page 1 +sayfasi 0 +welcom 0 +erkok 0 +graduat 0 +student 0 +depart 0 +comput 0 +sciencesat 0 +universityof 0 +texa 0 +austin 0 +former 0 +locat 0 +inturkei 0 +person 0 +inform 0 +reach 0 +http 0 +ceng 0 +metu 0 +erkokto 0 +find 0 +thank 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..65d4766a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,39 @@ +esra 1 +erdem 1 +comput 1 +scienc 1 +univers 1 +austin 1 +texa 1 +depart 1 +interest 1 +monoton 1 +reason 1 +utexa 1 +homepag 0 +student 0 +thedepart 0 +educ 0 +engin 0 +andinform 0 +bilkent 0 +turkei 0 +area 0 +machin 0 +learninginduct 0 +logic 0 +program 0 +topic 0 +cognit 0 +sciencelearningreason 0 +children 0 +theori 0 +mind 0 +commonsens 0 +reasoningknowledg 0 +representationemotionsphilosophi 0 +mindcontact 0 +inform 0 +postal 0 +voic 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..f1732cd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,69 @@ +carl 1 +student 1 +comput 1 +scienc 1 +univers 1 +rice 1 +austin 1 +offic 1 +home 0 +pagestephen 0 +carlpardon 0 +dust 0 +current 0 +work 0 +toward 0 +master 0 +art 0 +degre 0 +depart 0 +texa 0 +thesi 0 +describ 0 +system 0 +perform 0 +syntact 0 +extens 0 +scheme 0 +program 0 +languag 0 +wasn 0 +alwai 0 +life 0 +myresum 0 +believ 0 +item 0 +person 0 +interest 0 +planmi 0 +resum 0 +research 0 +interestsa 0 +psuedo 0 +random 0 +collect 0 +linksth 0 +household 0 +daili 0 +dose 0 +thing 0 +world 0 +wide 0 +snow 0 +pike 0 +peak 0 +houston 0 +chronicl 0 +interact 0 +sport 0 +worth 0 +athlet 0 +march 0 +bandget 0 +touchpost 0 +usavoic 0 +main 0 +know 0 +esteban 0 +utexa 0 +edureturn 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..34ccc6b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,63 @@ +univers 1 +texa 1 +inform 1 +comput 1 +scienc 1 +austin 1 +learn 1 +research 1 +improv 1 +us 1 +address 1 +estlin 1 +utexa 1 +tara 0 +estlintara 0 +estlinmachin 0 +groupth 0 +austinresearchcontrol 0 +knowledg 0 +perform 0 +problem 0 +solver 0 +byguid 0 +effici 0 +accur 0 +solut 0 +researchinvolv 0 +combin 0 +analyt 0 +induct 0 +machinelearn 0 +techniqu 0 +acquir 0 +control 0 +amparticularli 0 +interest 0 +method 0 +theperform 0 +plan 0 +schedul 0 +system 0 +includ 0 +detail 0 +descript 0 +myresearch 0 +vita 0 +list 0 +public 0 +also 0 +check 0 +machin 0 +group 0 +page 0 +educ 0 +tulan 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..1c6418a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,85 @@ +francoi 1 +barbanson 1 +research 1 +interest 1 +central 1 +market 1 +page 1 +utc 1 +austin 1 +todai 1 +tank 1 +polic 1 +class 1 +utexa 1 +home 0 +versionhom 0 +versionthi 0 +locat 0 +directori 0 +spool 0 +user 0 +francoisabout 0 +mecurr 0 +black 0 +forest 0 +cake 0 +genuin 0 +find 0 +real 0 +pastri 0 +fruit 0 +mouss 0 +pack 0 +groceri 0 +well 0 +swim 0 +forthcom 0 +trip 0 +shed 0 +lighton 0 +issu 0 +stop 0 +shop 0 +food 0 +women 0 +current 0 +crawl 0 +join 0 +foreign 0 +legion 0 +chines 0 +wisdom 0 +suggest 0 +watch 0 +plai 0 +basketbal 0 +hyogo 0 +japan 0 +check 0 +action 0 +atdominion 0 +hqcheck 0 +dilberti 0 +knew 0 +databas 0 +would 0 +noth 0 +troubl 0 +mentionthat 0 +parallel 0 +comput 0 +contact 0 +mepost 0 +guadalup 0 +street 0 +suit 0 +texa 0 +voic 0 +theori 0 +number 0 +assum 0 +machin 0 +work 0 +mail 0 +edufrancoi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..38d41f8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,58 @@ +comput 2 +fussel 1 +scienc 1 +research 1 +texa 1 +graphic 1 +donald 1 +depart 1 +technolog 1 +group 1 +appli 1 +engin 1 +mathemat 1 +univers 1 +austin 1 +utexa 1 +public 1 +trammel 0 +crow 0 +regent 0 +professor 0 +director 0 +advanc 0 +divis 0 +inform 0 +laboratori 0 +member 0 +center 0 +electr 0 +institut 0 +phone 0 +mail 0 +eduinform 0 +http 0 +user 0 +fussellb 0 +social 0 +dartmouth 0 +collegem 0 +dalla 0 +area 0 +interest 0 +architectur 0 +databas 0 +system 0 +design 0 +autom 0 +fault 0 +toler 0 +cours 0 +introduct 0 +journal 0 +confer 0 +work 0 +progress 0 +current 0 +former 0 +student 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..3132b88d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,33 @@ +ajit 1 +gener 1 +useless 1 +file 1 +georgemi 0 +pagethi 0 +page 0 +go 0 +youand 0 +construct 0 +someth 0 +odd 0 +goodthat 0 +find 0 +anyth 0 +start 0 +research 0 +address 0 +georg 0 +wickersham 0 +lane 0 +austin 0 +gajit 0 +utexa 0 +eduher 0 +softwar 0 +document 0 +foundus 0 +recent 0 +david 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..53c9e230 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,27 @@ +geeta 1 +arora 0 +home 0 +page 0 +graduat 0 +student 0 +current 0 +year 0 +still 0 +try 0 +tofigur 0 +research 0 +undergrad 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +india 0 +contact 0 +mehom 0 +river 0 +oak 0 +medic 0 +art 0 +austin 0 +phone 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..7ac02828 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,62 @@ +page 1 +home 1 +comment 1 +gokul 1 +flame 1 +critic 1 +send 1 +click 1 +final 0 +receiv 0 +countless 0 +gripe 0 +theexcess 0 +verbos 0 +decid 0 +thecollect 0 +wish 0 +mass 0 +democrat 0 +world 0 +putonli 0 +barest 0 +minimum 0 +adieu 0 +outpour 0 +head 0 +plakal 0 +hag 0 +hopey 0 +sleep 0 +easi 0 +untroubl 0 +conscienc 0 +perfectli 0 +good 0 +untim 0 +demis 0 +actual 0 +quit 0 +want 0 +kind 0 +could 0 +merit 0 +vitriol 0 +risk 0 +mayb 0 +help 0 +reinstat 0 +earlier 0 +signin 0 +lesscrit 0 +contact 0 +medic 0 +art 0 +austin 0 +visitor 0 +number 0 +suggest 0 +utexa 0 +last 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..fc0b0825 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,176 @@ +like 2 +austin 1 +gooti 1 +know 1 +peopl 1 +univers 1 +friend 1 +thing 1 +also 1 +love 1 +game 1 +book 1 +home 1 +page 1 +subramanyam 1 +intro 1 +present 1 +futur 1 +read 1 +well 1 +interest 1 +place 1 +came 1 +made 1 +alwai 1 +great 1 +time 1 +comput 1 +scienc 1 +texa 1 +life 1 +around 1 +plai 1 +tenni 1 +check 1 +past 0 +hideout 0 +visitor 0 +number 0 +welcom 0 +bold 0 +name 0 +start 0 +suggest 0 +continu 0 +hopefulli 0 +wont 0 +disappoint 0 +gold 0 +born 0 +sept 0 +somebodi 0 +hyderabad 0 +andhra 0 +pradesh 0 +geographi 0 +southern 0 +state 0 +india 0 +curiou 0 +famili 0 +school 0 +join 0 +osmania 0 +colleg 0 +technolog 0 +bachelor 0 +chemic 0 +engin 0 +contact 0 +vari 0 +background 0 +thought 0 +proud 0 +call 0 +batch 0 +nebraska 0 +lincoln 0 +gala 0 +becam 0 +addict 0 +american 0 +footbal 0 +except 0 +cold 0 +winter 0 +everi 0 +els 0 +wasjust 0 +studi 0 +year 0 +transfer 0 +enrol 0 +master 0 +program 0 +depart 0 +real 0 +cool 0 +hang 0 +especi 0 +weather 0 +usual 0 +acad 0 +care 0 +list 0 +alphabet 0 +order 0 +abraham 0 +gokul 0 +kumar 0 +mehul 0 +neeraj 0 +shantanu 0 +shailesh 0 +vipin 0 +best 0 +keep 0 +chat 0 +mani 0 +make 0 +travel 0 +anoth 0 +likechess 0 +question 0 +carrom 0 +board 0 +racquet 0 +ball 0 +tabl 0 +cricket 0 +soccer 0 +watch 0 +definetli 0 +text 0 +want 0 +horoscop 0 +todai 0 +compatabil 0 +sign 0 +listen 0 +hindi 0 +song 0 +write 0 +would 0 +bore 0 +narrow 0 +option 0 +golden 0 +goe 0 +without 0 +sai 0 +control 0 +destini 0 +ever 0 +success 0 +cours 0 +attribut 0 +hardwork 0 +power 0 +good 0 +happen 0 +propos 0 +dispos 0 +pleas 0 +spend 0 +fill 0 +valuabl 0 +comment 0 +guest 0 +hide 0 +medic 0 +art 0 +utexa 0 +finger 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..e1a50bab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,108 @@ +protocol 1 +comput 1 +permiss 1 +specif 1 +copi 1 +http 1 +survei 1 +citat 1 +exact 1 +pragmat 1 +utexa 1 +date 1 +statement 1 +decemb 1 +goudanetwork 1 +copyright 1 +page 1 +texa 1 +gouda 1 +implement 1 +network 1 +octob 1 +work 1 +posit 0 +goudaacm 0 +surveysa 0 +associ 0 +machineri 0 +fornetwork 0 +specificationsand 0 +implementationsmoham 0 +goudath 0 +univers 0 +austin 0 +depart 0 +sciencesaustin 0 +usagouda 0 +user 0 +utc 0 +report 0 +profil 0 +htmlabstract 0 +argu 0 +studi 0 +evolv 0 +bridgeth 0 +networkprotocol 0 +gener 0 +term 0 +formal 0 +implementationsaddit 0 +word 0 +phrase 0 +compil 0 +softwar 0 +tool 0 +develop 0 +methodologypubl 0 +inform 0 +submiss 0 +june 0 +revis 0 +accept 0 +public 0 +sourc 0 +html 0 +avail 0 +make 0 +digitalor 0 +hard 0 +part 0 +person 0 +classroomus 0 +grant 0 +without 0 +provid 0 +made 0 +ordistribut 0 +profit 0 +commerci 0 +advantag 0 +bearthi 0 +notic 0 +full 0 +first 0 +forcompon 0 +own 0 +other 0 +must 0 +honor 0 +abstract 0 +credit 0 +permit 0 +otherwis 0 +torepublish 0 +post 0 +server 0 +redistribut 0 +list 0 +requiresprior 0 +request 0 +frompubl 0 +dept 0 +orpermiss 0 +last 0 +modifi 0 +moham 0 +goudagouda 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..6b11eb4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,63 @@ +gunnel 1 +john 1 +transpos 1 +case 1 +assign 1 +report 1 +look 1 +home 1 +plan 1 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +austin 0 +utexa 0 +plapack 0 +minut 0 +connect 0 +imag 0 +except 0 +guess 0 +drank 0 +depict 0 +product 0 +latter 0 +author 0 +collect 0 +code 0 +spars 0 +matrix 0 +computationsif 0 +would 0 +like 0 +meet 0 +best 0 +friend 0 +take 0 +data 0 +pageam 0 +log 0 +check 0 +class 0 +also 0 +glimps 0 +mysteri 0 +land 0 +hail 0 +central 0 +oregon 0 +towni 0 +less 0 +redmond 0 +doesn 0 +much 0 +page 0 +talk 0 +visitor 0 +rememb 0 +test 0 +file 0 +long 0 +bore 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..816229da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,34 @@ +comput 1 +scienc 1 +frank 1 +utexa 1 +austin 1 +univers 1 +student 1 +depart 1 +tropschuhfrank 0 +tropschuh 0 +gunther 0 +schweiz 0 +clayton 0 +waldhofstrass 0 +rheinfelden 0 +curriculum 0 +vitaeenglishdeutschlinkscarnegi 0 +mellon 0 +undergradu 0 +school 0 +universitterlangen 0 +nrnberg 0 +junior 0 +year 0 +abroad 0 +institut 0 +mathematisch 0 +maschinen 0 +datenverarbeitung 0 +oper 0 +system 0 +texa 0 +graduat 0 +tropschuhgunth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..08aee5e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,41 @@ +yongxiang 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +home 0 +pagemerri 0 +christmashappi 0 +year 0 +welcom 0 +homepagegao 0 +yongxiangsever 0 +pointsto 0 +contact 0 +addresspictur 0 +mine 0 +ceremoni 0 +grant 0 +master 0 +degre 0 +chinadepart 0 +gener 0 +inform 0 +name 0 +male 0 +birthdai 0 +birth 0 +place 0 +huanan 0 +jiangsu 0 +china 0 +hobbi 0 +tabl 0 +tenniseduc 0 +background 0 +juli 0 +softwar 0 +directori 0 +servic 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..7dbb099a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,26 @@ +comput 1 +zhang 1 +manag 1 +home 0 +page 0 +schoolth 0 +univers 0 +texa 0 +austin 0 +scienc 0 +second 0 +semestercoursesc 0 +languag 0 +linc 0 +distribut 0 +alvis 0 +databas 0 +mirankerfil 0 +term 0 +project 0 +databs 0 +queri 0 +formthank 0 +stop 0 +gzhang 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..cbd0f91e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,107 @@ +comput 1 +scienc 1 +zhou 1 +design 1 +wong 1 +institut 1 +univers 1 +depart 1 +texa 1 +austin 1 +vlsi 1 +algorithm 1 +optim 1 +ieee 1 +aid 1 +jose 1 +room 1 +refer 1 +campu 1 +home 0 +pagealan 0 +headlin 0 +new 0 +year 0 +ture 0 +award 0 +given 0 +amir 0 +pnueli 0 +aprofessor 0 +weizmann 0 +israel 0 +theoret 0 +compuer 0 +tsinghua 0 +prestig 0 +china 0 +incompletelist 0 +undergradu 0 +classmat 0 +kept 0 +alex 0 +zhao 0 +current 0 +student 0 +research 0 +interest 0 +focus 0 +find 0 +mani 0 +applic 0 +mathemat 0 +analysi 0 +combinatori 0 +complex 0 +even 0 +mathematicallog 0 +researchgroup 0 +head 0 +prof 0 +martin 0 +publicationshai 0 +forriv 0 +rout 0 +crosstalk 0 +constraint 0 +internationalconfer 0 +chen 0 +optimalnon 0 +uniform 0 +wire 0 +size 0 +elmor 0 +delai 0 +model 0 +acmintern 0 +confer 0 +studi 0 +academ 0 +techniqu 0 +industri 0 +directori 0 +bulletin 0 +live 0 +period 0 +chines 0 +staff 0 +movi 0 +search 0 +engin 0 +internet 0 +contact 0 +inform 0 +sciencesunivers 0 +austintaylor 0 +hall 0 +staustin 0 +voic 0 +mail 0 +haizhou 0 +utexa 0 +edulast 0 +modifi 0 +number 0 +visit 0 +homepag 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..88a0dbdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,41 @@ +comput 1 +scienc 1 +texa 1 +austin 1 +univ 1 +wuhan 1 +china 1 +home 1 +page 1 +dept 1 +welcom 0 +construct 0 +myselfnow 0 +first 0 +year 0 +student 0 +departmentof 0 +universityof 0 +want 0 +know 0 +click 0 +hear 0 +educ 0 +pre 0 +nation 0 +softwar 0 +engin 0 +alumni 0 +pal 0 +univers 0 +alumnihom 0 +pagecontact 0 +wait 0 +email 0 +haosun 0 +utexa 0 +edunow 0 +call 0 +visitor 0 +sinc 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..df857c93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,91 @@ +champion 2 +comput 1 +intramur 1 +volleybal 1 +divis 1 +scienc 1 +austin 1 +micheal 1 +universityof 1 +hewett 1 +utexa 1 +texa 1 +kansa 1 +mathemat 1 +honor 1 +first 1 +place 1 +bowl 1 +faculti 1 +grad 1 +fall 1 +open 1 +summer 1 +finish 1 +address 1 +hewetthewett 0 +educlick 0 +fingerm 0 +click 0 +email 0 +fourth 0 +year 0 +student 0 +departmentof 0 +educ 0 +stanfordunivers 0 +electr 0 +engin 0 +washburnunivers 0 +intern 0 +collegiateprogram 0 +contest 0 +nation 0 +competit 0 +utc 0 +sawada 0 +ioanni 0 +smaragdaki 0 +thoma 0 +wahlutc 0 +tower 0 +hanoi 0 +lanc 0 +tokudaut 0 +spring 0 +club 0 +motorola 0 +marathon 0 +hour 0 +minut 0 +might 0 +want 0 +visit 0 +myfavorit 0 +page 0 +locatem 0 +learnabout 0 +research 0 +interest 0 +view 0 +downloadmi 0 +public 0 +learnmor 0 +phone 0 +number 0 +call 0 +offic 0 +home 0 +central 0 +timefax 0 +mail 0 +univers 0 +depart 0 +taylor 0 +hall 0 +author 0 +hewettemail 0 +edulast 0 +updat 0 +wednesdai 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..0f7da5af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,167 @@ +current 1 +work 1 +austin 1 +databas 1 +robot 1 +java 1 +softwar 1 +resum 1 +packag 1 +http 1 +contract 1 +us 1 +graphic 1 +also 1 +interest 1 +page 1 +hiep 1 +texa 1 +activ 1 +solut 1 +game 1 +receiv 1 +scienc 1 +utexa 1 +system 1 +xwindow 1 +languag 1 +gunu 1 +common 1 +lisp 1 +function 1 +written 1 +like 1 +program 1 +port 1 +netrek 1 +factoryx 1 +offic 1 +nguyenhiep 0 +nguyenabout 0 +meabout 0 +vietnames 0 +american 0 +born 0 +came 0 +unit 0 +state 0 +five 0 +resid 0 +texasfor 0 +life 0 +live 0 +programm 0 +seek 0 +client 0 +process 0 +start 0 +busi 0 +providinghigh 0 +internet 0 +product 0 +rang 0 +video 0 +detail 0 +link 0 +hypertextresum 0 +occup 0 +gordon 0 +novak 0 +compil 0 +class 0 +educ 0 +univers 0 +havedevelop 0 +year 0 +onlin 0 +natur 0 +placement 0 +center 0 +address 0 +con 0 +nsplace 0 +rexi 0 +real 0 +time 0 +emptiv 0 +oper 0 +board 0 +research 0 +gdraw 0 +object 0 +orient 0 +cross 0 +platform 0 +librari 0 +postscript 0 +legion 0 +data 0 +flow 0 +control 0 +flat 0 +simul 0 +realist 0 +specular 0 +reflect 0 +sonar 0 +xgcl 0 +xakcl 0 +interfac 0 +akcl 0 +standalon 0 +provid 0 +john 0 +ousterhout 0 +theunivers 0 +specif 0 +anonlin 0 +access 0 +student 0 +prototyp 0 +moredetail 0 +researchwith 0 +allow 0 +easi 0 +build 0 +andmaintain 0 +network 0 +explor 0 +methodolog 0 +larg 0 +currentlyact 0 +search 0 +expertis 0 +internetsoftwar 0 +might 0 +best 0 +leverag 0 +technic 0 +especi 0 +window 0 +fast 0 +textur 0 +mappingroutin 0 +anim 0 +processor 0 +assembl 0 +write 0 +poetri 0 +make 0 +potteri 0 +outdoor 0 +list 0 +spot 0 +group 0 +virtual 0 +realiti 0 +vrml 0 +sdsc 0 +vrmlto 0 +contact 0 +mepost 0 +comput 0 +usavoic 0 +main 0 +edulast 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..687462c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,72 @@ +comput 1 +austin 1 +depart 1 +univers 1 +world 1 +huiqun 1 +home 1 +page 1 +scienc 1 +texa 1 +research 1 +inform 1 +internet 1 +career 1 +book 1 +java 1 +rosett 1 +hqliu 1 +utexa 1 +nice 0 +meet 0 +student 0 +member 0 +vlsi 0 +design 0 +group 0 +guid 0 +professor 0 +martin 0 +wong 0 +new 0 +virtual 0 +tour 0 +citi 0 +collect 0 +chines 0 +site 0 +sunris 0 +stuff 0 +societi 0 +ieee 0 +giant 0 +search 0 +tool 0 +yahoo 0 +infoseek 0 +directori 0 +onlin 0 +center 0 +mosaic 0 +bookmark 0 +entertain 0 +languag 0 +unix 0 +perl 0 +expect 0 +refer 0 +manual 0 +program 0 +exampl 0 +contact 0 +mail 0 +phone 0 +address 0 +campu 0 +taylor 0 +last 0 +modifi 0 +comment 0 +welcom 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..a26e76b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,58 @@ +austin 2 +scienc 1 +comput 1 +avail 1 +hudson 1 +onlin 1 +research 1 +offic 1 +home 0 +pagehudson 0 +turnerphd 0 +student 0 +colleg 0 +natur 0 +sciencesat 0 +univers 0 +texa 0 +advisor 0 +vladimir 0 +lifschitz 0 +expect 0 +thesi 0 +titl 0 +infer 0 +rule 0 +causal 0 +represent 0 +ofcommonsens 0 +knowledg 0 +action 0 +msc 0 +mli 0 +librari 0 +inform 0 +english 0 +philosophi 0 +vita 0 +postscript 0 +draft 0 +dissert 0 +also 0 +interestscommonsens 0 +reason 0 +actionlog 0 +program 0 +nonmonoton 0 +reasoningmi 0 +paper 0 +linkseuropean 0 +colloquium 0 +spatialand 0 +tempor 0 +reasoningto 0 +contact 0 +mepost 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..104952b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,63 @@ +home 1 +austin 1 +comput 1 +page 1 +time 1 +yanbin 1 +zhang 1 +welcom 1 +graduat 1 +depart 1 +scienc 1 +univers 1 +texa 1 +full 1 +spring 1 +world 1 +compani 1 +hyanbin 1 +utexa 1 +address 1 +littl 0 +cutti 0 +allen 0 +student 0 +current 0 +seek 0 +part 0 +cours 0 +left 0 +resum 0 +click 0 +postscript 0 +format 0 +ieee 0 +onlin 0 +career 0 +center 0 +help 0 +languag 0 +internet 0 +librari 0 +webmuseum 0 +travel 0 +beauti 0 +homeland 0 +contact 0 +mail 0 +phone 0 +offic 0 +campu 0 +tarlor 0 +lake 0 +blvd 0 +number 0 +visit 0 +homepag 0 +sinc 0 +last 0 +modifi 0 +septemb 0 +comment 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..5d8892c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,59 @@ +inform 1 +isaac 1 +sheldon 1 +isheldon 1 +utexa 1 +comput 1 +scienc 1 +austin 1 +contact 0 +phone 0 +mail 0 +http 0 +user 0 +profession 0 +current 0 +graduat 0 +student 0 +univeristi 0 +texa 0 +depart 0 +reciv 0 +master 0 +decemb 0 +coursework 0 +concentr 0 +graphic 0 +reciev 0 +undergradu 0 +degre 0 +unives 0 +mass 0 +lowel 0 +summer 0 +intern 0 +scientif 0 +engin 0 +softwar 0 +small 0 +compani 0 +creat 0 +schlaeor 0 +mellor 0 +case 0 +tool 0 +project 0 +construct 0 +solid 0 +geometri 0 +us 0 +bsptree 0 +modular 0 +trace 0 +framework 0 +butt 0 +person 0 +babi 0 +page 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..d7ffe5b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,43 @@ +john 1 +adair 1 +live 1 +taylor 1 +back 1 +page 1 +crinkum 0 +crankum 0 +homepag 0 +compound 0 +wife 0 +holli 0 +eileen 0 +evan 0 +jame 0 +rice 0 +alumni 0 +friend 0 +includ 0 +carl 0 +white 0 +also 0 +internet 0 +consult 0 +matthew 0 +mengerink 0 +fish 0 +fanat 0 +work 0 +dejanew 0 +steve 0 +traylen 0 +get 0 +doctor 0 +book 0 +email 0 +jadair 0 +utexa 0 +graduat 0 +student 0 +home 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..fe0089a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,22 @@ +oper 1 +univers 1 +texa 1 +research 1 +john 0 +chamber 0 +home 0 +pagejohn 0 +chamberssenior 0 +system 0 +specialistb 0 +physic 0 +paso 0 +comput 0 +scienc 0 +yale 0 +universityph 0 +austin 0 +paper 0 +vita 0 +link 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..d5edc55a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,164 @@ +austin 2 +texa 2 +research 2 +aftereffect 2 +visual 2 +comput 1 +univers 1 +model 1 +scienc 1 +process 1 +tilt 1 +self 1 +organ 1 +illus 1 +result 1 +brain 1 +thesi 1 +primari 1 +cortex 1 +later 1 +lissom 1 +interact 1 +realist 1 +us 1 +dramat 1 +advanc 1 +make 1 +avail 1 +simul 1 +function 1 +call 1 +human 1 +propos 1 +also 1 +postscript 1 +inform 1 +jbednar 1 +utexa 1 +bednarjim 0 +bednar 0 +candid 0 +dept 0 +philosophi 0 +electr 0 +engin 0 +decemb 0 +concentr 0 +biolog 0 +ofcognit 0 +artifici 0 +neural 0 +network 0 +seek 0 +useth 0 +technolog 0 +past 0 +fewdecad 0 +equal 0 +understand 0 +thehuman 0 +mind 0 +power 0 +soon 0 +beavail 0 +cortic 0 +becomingpract 0 +enabl 0 +necessari 0 +refut 0 +testabl 0 +hypothes 0 +overal 0 +goal 0 +makecognit 0 +empir 0 +rather 0 +purelyphilosoph 0 +domain 0 +centuri 0 +master 0 +ofth 0 +nearli 0 +complet 0 +abstract 0 +long 0 +studi 0 +psychologist 0 +vision 0 +appar 0 +failur 0 +might 0 +offer 0 +insight 0 +carri 0 +particular 0 +class 0 +thought 0 +aris 0 +thu 0 +serv 0 +test 0 +case 0 +theori 0 +area 0 +specif 0 +sever 0 +inhibit 0 +neuron 0 +receiv 0 +input 0 +examin 0 +sirosh 0 +miikkulainen 0 +incorpor 0 +demonstr 0 +principl 0 +drive 0 +qualit 0 +quantit 0 +similar 0 +measur 0 +basi 0 +explan 0 +indirect 0 +effect 0 +line 0 +differ 0 +orient 0 +appli 0 +figur 0 +spatial 0 +frequenc 0 +predict 0 +preliminari 0 +report 0 +file 0 +begin 0 +doctor 0 +includ 0 +detail 0 +level 0 +visualbehavior 0 +extens 0 +contact 0 +email 0 +mail 0 +address 0 +depart 0 +finger 0 +command 0 +machin 0 +log 0 +departmentmi 0 +resum 0 +ascii 0 +format 0 +link 0 +probabl 0 +outdat 0 +paper 0 +interest 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..684d204c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,95 @@ +homepag 1 +test 1 +comput 1 +system 1 +visit 1 +java 1 +home 1 +univers 1 +librari 1 +cours 1 +topic 1 +info 1 +document 1 +page 1 +html 1 +help 1 +construct 1 +ofjunfanghi 0 +welcom 0 +frame 0 +graduat 0 +student 0 +scienc 0 +depart 0 +texa 0 +austin 0 +largest 0 +academ 0 +north 0 +america 0 +catalog 0 +resum 0 +professor 0 +novak 0 +assign 0 +excel 0 +sourc 0 +ethernet 0 +technolog 0 +special 0 +sysadm 0 +domain 0 +name 0 +inform 0 +free 0 +unixish 0 +oper 0 +linux 0 +project 0 +us 0 +debug 0 +transfer 0 +latex 0 +file 0 +unix 0 +email 0 +stuff 0 +utc 0 +kristina 0 +ross 0 +tutori 0 +learn 0 +take 0 +jeff 0 +network 0 +administr 0 +last 0 +summer 0 +florida 0 +state 0 +edmund 0 +automobil 0 +buyer 0 +guid 0 +want 0 +packag 0 +languag 0 +specif 0 +public 0 +ascii 0 +format 0 +look 0 +pretti 0 +good 0 +privaci 0 +like 0 +sceneri 0 +pictur 0 +jfang 0 +utexa 0 +start 0 +visitor 0 +number 0 +sinc 0 +applet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..d3f9aa83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,37 @@ +good 1 +austin 1 +john 1 +beer 1 +jprior 1 +utexa 1 +address 1 +priorjohn 0 +priormi 0 +resum 0 +accumul 0 +knowledg 0 +year 0 +dog 0 +someth 0 +long 0 +enough 0 +start 0 +hurt 0 +probabl 0 +chew 0 +nacho 0 +chip 0 +swallow 0 +sleep 0 +contact 0 +inform 0 +email 0 +mail 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +home 0 +phone 0 +swisher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..df123ab8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,41 @@ +thoma 1 +comput 1 +jeff 1 +texa 1 +scienc 1 +homepagejeff 0 +homepagecontact 0 +informationpublicationssoftwar 0 +system 0 +gener 0 +research 0 +groupphoto 0 +albumfavorit 0 +internet 0 +sitesuniversityof 0 +departmentappliedresearch 0 +laboratori 0 +electricaland 0 +engin 0 +departmentedsfinanci 0 +trade 0 +technolog 0 +center 0 +fttc 0 +keyword 0 +search 0 +utacademiccalendarsut 0 +sportshook 0 +ultim 0 +longhorn 0 +site 0 +utfootbal 0 +scheduleaustintexa 0 +depart 0 +univers 0 +austin 0 +last 0 +modifi 0 +octob 0 +jthoma 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..2e7979d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,66 @@ +comput 1 +scienc 1 +univers 1 +austin 1 +peke 1 +jiani 1 +student 1 +texa 1 +china 1 +depart 1 +page 1 +homepagewelcom 0 +homepag 0 +first 0 +year 0 +indepart 0 +ataustin 0 +beij 0 +alumni 0 +ofpek 0 +chinesechines 0 +scenerychines 0 +novelschines 0 +classicschines 0 +magazineschines 0 +newspapersus 0 +link 0 +registrar 0 +gradaut 0 +studiesut 0 +libraryut 0 +campusutaccessabout 0 +weather 0 +todai 0 +citylimit 0 +lot 0 +excit 0 +stuff 0 +miscellaneousyahoojava 0 +sunjavascript 0 +netscapeth 0 +perl 0 +languag 0 +home 0 +pagecomput 0 +research 0 +associationcomput 0 +journal 0 +magzin 0 +webnetwork 0 +technicalreport 0 +libraryth 0 +collect 0 +bibliographiesintern 0 +contact 0 +street 0 +jyluo 0 +utexa 0 +finger 0 +meyour 0 +comment 0 +suggestionswould 0 +highli 0 +appreci 0 +visitorsinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..19300ee5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,54 @@ +home 1 +austin 1 +comput 1 +scienc 1 +interest 1 +concurr 1 +madra 1 +inform 1 +phone 1 +kedar 0 +namjoshiabout 0 +mehi 0 +thank 0 +check 0 +doctor 0 +student 0 +depart 0 +research 0 +advisor 0 +professor 0 +allen 0 +emerson 0 +tempor 0 +logic 0 +reason 0 +program 0 +semant 0 +distributedalgorithm 0 +automatatheori 0 +came 0 +fall 0 +receiv 0 +bachelor 0 +degre 0 +indian 0 +institut 0 +technolog 0 +wonder 0 +page 0 +lot 0 +stuff 0 +would 0 +like 0 +know 0 +person 0 +contact 0 +offic 0 +address 0 +west 0 +street 0 +todai 0 +amul 0 +adkedar 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..22e5c530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,47 @@ +univers 1 +texa 1 +austin 1 +kenneth 1 +harker 1 +depart 1 +comput 1 +scienc 1 +utexa 1 +kharker 1 +public 1 +view 1 +opinion 1 +taylor 0 +hall 0 +amateur 0 +radio 0 +babylon 0 +linux 0 +rocketri 0 +cyberspac 0 +academ 0 +work 0 +polit 0 +stuff 0 +resum 0 +last 0 +updat 0 +world 0 +wide 0 +facil 0 +provid 0 +servic 0 +faculti 0 +student 0 +staff 0 +guest 0 +express 0 +page 0 +sole 0 +respons 0 +author 0 +necessarili 0 +reflect 0 +system 0 +board 0 +regent 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..9fad0a8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,92 @@ +kincaid 1 +linear 1 +comput 1 +numer 1 +algebra 1 +imac 1 +world 1 +mathemat 1 +system 1 +david 1 +analysi 1 +univers 1 +develop 1 +congress 1 +area 1 +softwar 1 +interest 1 +algorithm 1 +equat 1 +larg 1 +solut 1 +young 1 +senior 0 +lecturerassoci 0 +director 0 +center 0 +lamar 0 +texa 0 +austin 0 +honor 0 +award 0 +profession 0 +servic 0 +certif 0 +recognit 0 +creativ 0 +technicalinnov 0 +basic 0 +subprogram 0 +nasa 0 +technic 0 +committe 0 +session 0 +organ 0 +andappli 0 +interestmathemat 0 +high 0 +perform 0 +summari 0 +researchmi 0 +focus 0 +research 0 +us 0 +iter 0 +solv 0 +spars 0 +coeffici 0 +matric 0 +aris 0 +ellipt 0 +partial 0 +differenti 0 +implement 0 +parallel 0 +anoth 0 +select 0 +recent 0 +publicationsw 0 +chenei 0 +pacif 0 +grove 0 +brook 0 +cole 0 +hay 0 +itpack 0 +proceed 0 +coput 0 +atlanta 0 +stationari 0 +second 0 +degre 0 +method 0 +topic 0 +polynomi 0 +sever 0 +variabl 0 +applic 0 +rassia 0 +scientif 0 +river 0 +edg 0 +jersei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..00a1fcf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,67 @@ +comput 1 +univers 1 +kistler 1 +page 1 +scienc 1 +parallel 1 +mike 1 +home 1 +academ 1 +inform 1 +syracus 1 +york 1 +prof 1 +construct 0 +first 0 +year 0 +student 0 +texa 0 +ataustin 0 +thedepart 0 +also 0 +current 0 +emploi 0 +theperson 0 +softwar 0 +productsdivis 0 +backgroundba 0 +mathemat 0 +susquehanna 0 +selinsgrov 0 +master 0 +busi 0 +administr 0 +stern 0 +school 0 +businessnew 0 +interestsi 0 +interest 0 +algorithm 0 +particularli 0 +us 0 +commerci 0 +data 0 +process 0 +press 0 +random 0 +collect 0 +link 0 +coursesfal 0 +distribut 0 +iwith 0 +jayadev 0 +misra 0 +numer 0 +analysi 0 +linear 0 +algebrawith 0 +alan 0 +cline 0 +visitor 0 +number 0 +contact 0 +juli 0 +walk 0 +pflugervil 0 +email 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..5e061df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,9 @@ +jacob 1 +kornerup 1 +kornerupjacob 0 +welcom 0 +home 0 +page 0 +time 0 +sinc 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..8b1b363f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,78 @@ +knowledg 1 +research 1 +texa 1 +austin 1 +kuiper 1 +benjamin 1 +comput 1 +scienc 1 +univers 1 +mathemat 1 +represent 1 +commonsens 1 +incomplet 1 +reason 1 +model 1 +qualit 1 +simul 1 +spring 1 +build 1 +intellig 1 +agent 1 +kuipersbenjamin 0 +kuipersbruton 0 +centenni 0 +professor 0 +swarthmor 0 +colleg 0 +interest 0 +expert 0 +withparticular 0 +emphasi 0 +effect 0 +thequalit 0 +grouphom 0 +page 0 +describ 0 +topic 0 +paper 0 +student 0 +andavail 0 +softwar 0 +consider 0 +detail 0 +accomplish 0 +includ 0 +tour 0 +spatial 0 +cognit 0 +qsim 0 +algorithm 0 +access 0 +limit 0 +logic 0 +robot 0 +explor 0 +map 0 +strategi 0 +base 0 +recognit 0 +distinct 0 +place 0 +qualitativereason 0 +cambridg 0 +press 0 +teach 0 +plan 0 +fall 0 +physic 0 +world 0 +contact 0 +inform 0 +mail 0 +prof 0 +depart 0 +email 0 +utexa 0 +phone 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..6eb590f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,48 @@ +comput 1 +simon 1 +texa 1 +austin 1 +email 1 +utexa 1 +eduphon 1 +network 1 +assist 1 +kata 1 +compress 1 +postscript 1 +professor 0 +sciencesdepart 0 +sciencesunivers 0 +offic 0 +taylor 0 +hall 0 +campu 0 +mail 0 +scienc 0 +photo 0 +profil 0 +research 0 +laboratori 0 +fall 0 +spring 0 +administr 0 +also 0 +editori 0 +ieee 0 +transact 0 +carbon 0 +inform 0 +electron 0 +submissionnew 0 +clip 0 +tune 0 +turn 0 +toss 0 +internet 0 +empt 0 +american 0 +statesman 0 +februari 0 +front 0 +page 0 +cont 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..68300bbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,54 @@ +research 2 +network 1 +laboratori 1 +scienc 1 +protocol 1 +austin 1 +comput 1 +texa 1 +perform 1 +project 1 +foundat 1 +program 1 +depart 0 +univers 0 +activ 0 +span 0 +entir 0 +develop 0 +cycl 0 +design 0 +specif 0 +verif 0 +test 0 +analysi 0 +implement 0 +tune 0 +currentinterest 0 +architectur 0 +address 0 +chang 0 +underli 0 +commun 0 +technolog 0 +well 0 +applic 0 +supervis 0 +simon 0 +professor 0 +fund 0 +provid 0 +nation 0 +nsaunivers 0 +advanc 0 +lockhe 0 +current 0 +recent 0 +paper 0 +support 0 +videoservic 0 +secur 0 +theori 0 +workshop 0 +integr 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..48c92e9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,32 @@ +home 1 +page 1 +landrum 1 +christian 1 +robert 0 +stori 0 +mail 0 +viruspictur 0 +mountain 0 +empirepch 0 +retreattexa 0 +republican 0 +convent 0 +backbon 0 +rockrsumfamilyinterest 0 +council 0 +awai 0 +graham 0 +gordon 0 +pageth 0 +comput 0 +scienc 0 +depart 0 +ofth 0 +univers 0 +texa 0 +austin 0 +councillandrum 0 +utexa 0 +edulast 0 +updat 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..81282f9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,30 @@ +comput 1 +greg 1 +lavend 1 +univers 1 +texa 1 +scienc 1 +austinr 0 +lavenderadjunct 0 +assist 0 +professordepart 0 +anddepart 0 +electr 0 +engineeringth 0 +austin 0 +contact 0 +address 0 +research 0 +activ 0 +engin 0 +cours 0 +recommend 0 +read 0 +biograph 0 +informationsuggest 0 +improv 0 +page 0 +welcom 0 +last 0 +updat 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..16c83b44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,64 @@ +scienc 1 +home 1 +comput 1 +depart 1 +univers 1 +austin 1 +singapor 1 +jame 1 +welcom 1 +texa 1 +inform 1 +nation 1 +island 1 +come 1 +page 0 +pageyeap 0 +normal 0 +look 0 +student 0 +sciencesat 0 +bachelor 0 +master 0 +atth 0 +system 0 +disc 0 +research 0 +interest 0 +algorithm 0 +data 0 +structur 0 +vlsi 0 +designalgorithm 0 +small 0 +tropic 0 +call 0 +locat 0 +degre 0 +north 0 +equat 0 +internet 0 +communityi 0 +much 0 +aliv 0 +particip 0 +know 0 +aboutthi 0 +peopl 0 +wife 0 +hong 0 +kong 0 +month 0 +activ 0 +lovesto 0 +smile 0 +contact 0 +mail 0 +leekk 0 +utexa 0 +phone 0 +campu 0 +addr 0 +taylor 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..4d37db9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,30 @@ +softwar 1 +less 1 +laboratori 1 +experiment 1 +system 1 +research 1 +main 0 +object 0 +investig 0 +wai 0 +buildreli 0 +high 0 +perform 0 +parallel 0 +distributedsystem 0 +apart 0 +depart 0 +computersci 0 +univers 0 +oftexa 0 +austin 0 +projectsmemb 0 +lablessss 0 +seminar 0 +seriessponsorslast 0 +modifi 0 +decemb 0 +robert 0 +blumoferdb 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..c4495a5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,81 @@ +comput 1 +parallel 1 +snyder 1 +languag 1 +program 1 +sciencesth 1 +portabl 1 +implement 1 +proceed 1 +intern 1 +address 1 +calvin 0 +lincalvin 0 +linassist 0 +professor 0 +import 0 +thing 0 +iswhat 0 +studi 0 +_study_ 0 +plai 0 +_play_ 0 +pete 0 +carrilresearch 0 +interestscompil 0 +performanceanalysi 0 +scientif 0 +project 0 +home 0 +page 0 +select 0 +publicationsth 0 +novel 0 +mathemat 0 +biologyalgorithm 0 +dikaiako 0 +manoussaki 0 +woodward 0 +conf 0 +supercomput 0 +accommod 0 +polymorph 0 +data 0 +decomposit 0 +explicitli 0 +parallelprogram 0 +internationalparallel 0 +process 0 +symposium 0 +april 0 +arrai 0 +sublanguag 0 +compilersfor 0 +banerje 0 +gelernt 0 +nicolau 0 +padua 0 +springer 0 +verlag 0 +simpl 0 +journal 0 +comparison 0 +model 0 +share 0 +memori 0 +multiprocessor 0 +withl 0 +confer 0 +parallelprocess 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +univers 0 +texa 0 +austinaustin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..d92c0df6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,62 @@ +page 1 +austin 1 +system 1 +guangtian 1 +home 1 +current 1 +time 1 +depart 1 +comput 1 +scienc 1 +texa 1 +real 1 +research 1 +last 1 +liugt 1 +utexa 1 +homepagehi 0 +welcom 0 +construct 0 +content 0 +permit 0 +apolog 0 +incomplet 0 +result 0 +inconveni 0 +graduat 0 +student 0 +theunivers 0 +researchi 0 +member 0 +professor 0 +group 0 +interest 0 +includ 0 +timeschedul 0 +algorithm 0 +oper 0 +network 0 +perform 0 +distribut 0 +also 0 +work 0 +data 0 +replic 0 +knowledg 0 +mine 0 +summer 0 +internship 0 +contact 0 +inform 0 +offic 0 +good 0 +view 0 +phone 0 +email 0 +mail 0 +address 0 +univers 0 +updat 0 +pleas 0 +send 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..f4312e47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,45 @@ +lorenzo 1 +comput 1 +alvisi 1 +scienc 1 +distribut 1 +cornel 1 +utexa 1 +fall 1 +home 0 +page 0 +assist 0 +professor 0 +depart 0 +laurea 0 +physic 0 +universit 0 +agrav 0 +bologna 0 +itali 0 +offic 0 +taylorhal 0 +campusshow 0 +locat 0 +taylor 0 +hall 0 +phone 0 +mail 0 +research 0 +interestsi 0 +interest 0 +special 0 +emphasi 0 +fault 0 +toler 0 +cours 0 +spring 0 +oper 0 +system 0 +topic 0 +sytem 0 +public 0 +photo 0 +maria 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..9e31c4d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,62 @@ +comput 1 +current 1 +austin 1 +home 1 +page 1 +work 1 +scienc 1 +texa 1 +link 1 +microsoft 1 +luxu 1 +utexa 1 +graduat 0 +student 0 +networksoth 0 +class 0 +tsinghua 0 +univers 0 +undergradu 0 +institut 0 +china 0 +depart 0 +studyut 0 +universityaustin 0 +live 0 +academ 0 +internet 0 +area 0 +languag 0 +system 0 +ieee 0 +sigcomm 0 +sigmod 0 +siglink 0 +siggraph 0 +sigmm 0 +sigir 0 +compani 0 +onlin 0 +shop 0 +cool 0 +site 0 +chines 0 +music 0 +newsjob 0 +hunt 0 +weather 0 +forcast 0 +dictionari 0 +contact 0 +inform 0 +campu 0 +dept 0 +univ 0 +addr 0 +corpor 0 +mail 0 +xuelu 0 +thank 0 +come 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..c4a77cdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,89 @@ +softwar 2 +engin 2 +werth 1 +educ 1 +comput 1 +chair 1 +honour 1 +fall 1 +offic 1 +link 1 +class 1 +ieee 1 +committe 1 +develop 1 +scienc 1 +tool 1 +direct 1 +object 1 +orient 1 +program 1 +lauri 0 +werthlauri 0 +werthlectur 0 +lwerth 0 +utexa 0 +educurr 0 +semest 0 +hour 0 +time 0 +taylor 0 +phone 0 +engineeringc 0 +contemporari 0 +issu 0 +scienceprofession 0 +servicevic 0 +technic 0 +presentco 0 +confer 0 +profession 0 +presentarea 0 +interestsoftwar 0 +cognit 0 +summari 0 +researchmi 0 +current 0 +work 0 +center 0 +andenviron 0 +area 0 +includ 0 +human 0 +interfac 0 +andsoftwar 0 +metric 0 +select 0 +recent 0 +publicationsl 0 +qualiti 0 +assur 0 +project 0 +transact 0 +januari 0 +lectur 0 +note 0 +process 0 +improv 0 +industri 0 +strength 0 +case 0 +tomayko 0 +springer 0 +verlag 0 +john 0 +proceed 0 +workshop 0 +ics 0 +macintosh 0 +journal 0 +us 0 +univers 0 +texa 0 +depart 0 +home 0 +pagefaculti 0 +profilesc 0 +classeslast 0 +updat 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..02ce6eca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,42 @@ +madhukar 1 +austin 1 +texa 1 +reddi 1 +home 1 +page 1 +offic 1 +utexa 1 +offici 1 +korupoluwelcom 0 +reach 0 +avenu 0 +taylor 0 +hall 0 +dept 0 +comp 0 +scienc 0 +univ 0 +ahom 0 +link 0 +madrashomepag 0 +ganga 0 +alumniclass 0 +madra 0 +utalgorithm 0 +comput 0 +theori 0 +group 0 +colloquium 0 +oncomput 0 +complex 0 +info 0 +cricket 0 +worldwid 0 +site 0 +espnet 0 +sportszon 0 +interact 0 +batchu 0 +india 0 +author 0 +korupoluemail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..0d7ba4b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,23 @@ +richard 0 +malloryrichard 0 +malloryresearchthesi 0 +research 0 +produc 0 +quasi 0 +natur 0 +languag 0 +explan 0 +qsimsimul 0 +current 0 +implement 0 +work 0 +simpl 0 +system 0 +contact 0 +email 0 +mallori 0 +utexa 0 +offic 0 +taylor 0 +austin 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..9988b5a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,120 @@ +stabil 2 +system 2 +comput 2 +self 1 +author 1 +moham 1 +gouda 1 +flow 1 +state 1 +scienc 1 +rout 1 +legitim 1 +illegitim 1 +proceed 1 +inform 1 +prepar 1 +marco 1 +depart 1 +univers 1 +texa 1 +network 1 +interest 1 +distribut 1 +fault 1 +step 1 +time 1 +respons 1 +appear 1 +third 1 +workshop 1 +maximum 1 +tree 1 +minimum 1 +utexa 1 +austin 1 +home 0 +page 0 +schneidermarco 0 +schneiderph 0 +candid 0 +austinresearchth 0 +titl 0 +dissert 0 +research 0 +area 0 +protocol 0 +toler 0 +particular 0 +implicit 0 +design 0 +label 0 +itsstat 0 +identifi 0 +occur 0 +correct 0 +intend 0 +execut 0 +consid 0 +said 0 +whenregardless 0 +initi 0 +guarante 0 +converg 0 +finit 0 +number 0 +systemwhich 0 +stai 0 +forev 0 +vita 0 +postscript 0 +public 0 +survei 0 +march 0 +real 0 +decis 0 +toward 0 +tolerantr 0 +kluwer 0 +academ 0 +publish 0 +earlier 0 +version 0 +intern 0 +octob 0 +invit 0 +talk 0 +annual 0 +joint 0 +confer 0 +novemb 0 +submit 0 +journal 0 +second 0 +depth 0 +anish 0 +arora 0 +memori 0 +requir 0 +silent 0 +fifteenth 0 +symposium 0 +principl 0 +shlomi 0 +dolev 0 +span 0 +implement 0 +internet 0 +person 0 +list 0 +link 0 +construct 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +postal 0 +address 0 +ctaylor 0 +usamarco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..4e00b01e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,92 @@ +comput 1 +scienc 1 +inform 1 +austin 1 +depart 1 +research 1 +page 1 +mark 1 +offic 1 +taylor 1 +hall 1 +univers 1 +texa 1 +taught 1 +addit 1 +appl 1 +pleas 1 +class 1 +routin 1 +avail 1 +home 0 +johnstonemark 0 +johnstonecontact 0 +postal 0 +address 0 +usual 0 +find 0 +best 0 +reach 0 +isvia 0 +email 0 +markj 0 +utexa 0 +johnston 0 +also 0 +look 0 +full 0 +finger 0 +semest 0 +oper 0 +system 0 +byrichard 0 +brice 0 +object 0 +orient 0 +design 0 +analysisclass 0 +glenn 0 +down 0 +somerset 0 +compani 0 +graduat 0 +spring 0 +work 0 +motorola 0 +somersetdesign 0 +centerresearch 0 +informationi 0 +member 0 +oop 0 +group 0 +part 0 +build 0 +real 0 +time 0 +garbagecollector 0 +perform 0 +number 0 +ofstudi 0 +memori 0 +alloc 0 +postscript 0 +copi 0 +dissertationpropos 0 +listof 0 +public 0 +along 0 +brief 0 +descript 0 +develop 0 +librari 0 +allow 0 +precis 0 +timingof 0 +intel 0 +pentium 0 +run 0 +linux 0 +code 0 +publicli 0 +stuff 0 +relat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..0a6d8931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,13 @@ +home 1 +page 1 +mark 0 +point 0 +interest 0 +visit 0 +also 0 +friend 0 +click 0 +last 0 +modifi 0 +markng 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..a26971dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,85 @@ +page 1 +comput 1 +marku 1 +link 1 +uniti 1 +time 1 +prof 1 +taylor 1 +hall 1 +phone 1 +address 1 +utexa 1 +austin 1 +scienc 1 +place 1 +interest 1 +archiv 1 +kaltenbachmarku 0 +kaltenbachintroductionwelcom 0 +home 0 +current 0 +construct 0 +andwil 0 +entri 0 +permit 0 +iapolog 0 +incomplet 0 +result 0 +inconveni 0 +researchi 0 +member 0 +misra 0 +spsp 0 +research 0 +groupand 0 +emerson 0 +stempor 0 +reason 0 +group 0 +part 0 +work 0 +develop 0 +model 0 +checkerfor 0 +finit 0 +state 0 +program 0 +proposit 0 +logic 0 +verifi 0 +system 0 +recent 0 +version 0 +thesi 0 +isalso 0 +avail 0 +contact 0 +inform 0 +offic 0 +email 0 +postal 0 +univers 0 +texa 0 +depart 0 +find 0 +internet 0 +worth 0 +avisit 0 +theut 0 +departmenthom 0 +softwar 0 +archivefor 0 +macintosh 0 +appl 0 +sworld 0 +wide 0 +technic 0 +supporthom 0 +actansit 0 +comprehens 0 +network 0 +theatt 0 +distribut 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..1eae9f24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,50 @@ +austin 1 +comput 1 +scienc 1 +univers 1 +texa 1 +home 1 +page 1 +depart 1 +address 1 +utc 1 +memarti 0 +mayberri 0 +student 0 +researchal 0 +kind 0 +stuff 0 +educ 0 +math 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +martym 0 +utexa 0 +postal 0 +click 0 +applet 0 +paus 0 +resum 0 +displai 0 +local 0 +link 0 +neural 0 +network 0 +homepag 0 +hotlist 0 +downtown 0 +anywher 0 +virtualc 0 +internetrestaur 0 +guid 0 +virtual 0 +tnstechnolog 0 +demonstr 0 +read 0 +daili 0 +texan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..90e4ca47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,45 @@ +comput 1 +scienc 1 +austin 1 +univers 1 +norm 1 +mccain 1 +reason 1 +avail 1 +onlin 1 +offic 1 +home 0 +page 0 +mccainabout 0 +mephd 0 +student 0 +colleg 0 +natur 0 +sciencesat 0 +texa 0 +advisor 0 +vladimir 0 +lifschitz 0 +expect 0 +thesi 0 +titl 0 +causal 0 +commonsens 0 +action 0 +kansa 0 +philosophi 0 +baker 0 +vita 0 +postscript 0 +research 0 +interestscommonsens 0 +actionlog 0 +program 0 +nonmonoton 0 +reasoningmi 0 +paper 0 +contact 0 +mepost 0 +usavoic 0 +main 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..61865e36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,42 @@ +univers 1 +baylor 1 +elain 1 +learn 1 +research 1 +texa 1 +comput 1 +scienc 1 +english 1 +address 1 +austin 1 +mari 0 +califfmari 0 +califfmachin 0 +groupunivers 0 +austinresearchmi 0 +current 0 +interest 0 +us 0 +machin 0 +especiallyinduct 0 +logic 0 +program 0 +natur 0 +languag 0 +acquisit 0 +formor 0 +info 0 +check 0 +vita 0 +educ 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +mecaliff 0 +utexa 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..c80253a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,209 @@ +knowledg 2 +base 2 +utexa 2 +system 1 +us 1 +domain 1 +research 1 +question 1 +model 1 +porter 1 +explan 1 +gener 1 +answer 1 +help 1 +group 1 +mallori 1 +souther 1 +prado 1 +correl 1 +comput 1 +develop 1 +larg 1 +contain 1 +task 1 +construct 1 +lester 1 +desk 1 +project 1 +bruce 1 +rich 1 +peter 1 +clark 1 +fred 1 +charl 1 +callawai 1 +carl 1 +andersen 1 +steve 1 +austin 1 +would 1 +requir 1 +perform 1 +multipl 1 +viewpoint 1 +built 1 +biologi 1 +method 1 +automat 1 +varieti 1 +concern 1 +distribut 1 +result 1 +recent 1 +jame 1 +test 1 +composit 1 +predict 1 +jeff 1 +languag 1 +text 1 +plan 1 +assist 1 +acker 1 +eilert 1 +bareiss 1 +karl 1 +murrai 1 +rickel 1 +groupknowledg 0 +shown 0 +overviewour 0 +part 0 +depart 0 +scienc 0 +atuniv 0 +texa 0 +long 0 +term 0 +goal 0 +technolog 0 +forconstruct 0 +multifunct 0 +oncomput 0 +significantli 0 +improv 0 +currentexpert 0 +tutor 0 +broadknowledg 0 +toexplain 0 +past 0 +eight 0 +year 0 +inon 0 +area 0 +answeringa 0 +fact 0 +concept 0 +ofth 0 +largest 0 +kind 0 +content 0 +structur 0 +formallyrepres 0 +addit 0 +expand 0 +arealso 0 +begin 0 +similar 0 +notabl 0 +especi 0 +encourag 0 +knowledgebas 0 +thebiolog 0 +express 0 +english 0 +biolog 0 +object 0 +event 0 +control 0 +experi 0 +expert 0 +found 0 +littl 0 +differ 0 +andthos 0 +written 0 +colleagu 0 +current 0 +extend 0 +type 0 +beanswer 0 +autom 0 +reason 0 +jeffrickel 0 +taskof 0 +appropri 0 +well 0 +build 0 +thesimplest 0 +adequ 0 +dauntingrequir 0 +sinc 0 +like 0 +implicitli 0 +manymodel 0 +numer 0 +level 0 +detail 0 +qualit 0 +process 0 +compilerand 0 +qsim 0 +simul 0 +program 0 +final 0 +bybuild 0 +anoth 0 +computingenviron 0 +focuss 0 +deskassist 0 +proport 0 +custom 0 +squestion 0 +otherwis 0 +phone 0 +normal 0 +projectsour 0 +complet 0 +ongo 0 +includ 0 +represent 0 +kned 0 +editor 0 +kastl 0 +retriev 0 +knight 0 +fare 0 +natur 0 +lex 0 +aid 0 +mainten 0 +lexicon 0 +tripel 0 +theorist 0 +searcher 0 +alumni 0 +alumna 0 +lian 0 +erik 0 +brad 0 +blumenth 0 +brant 0 +eolu 0 +uwyo 0 +clarkp 0 +redwood 0 +boe 0 +ncsu 0 +publicationsclick 0 +select 0 +public 0 +relat 0 +projectsclick 0 +herefor 0 +extens 0 +collect 0 +pointer 0 +aroundth 0 +world 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..badda945 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,99 @@ +mirank 1 +student 1 +rule 1 +current 1 +text 1 +bibtex 1 +daniel 1 +page 1 +match 1 +treat 1 +algorithm 1 +rete 1 +research 1 +candid 1 +past 1 +home 0 +welcom 0 +belat 0 +presenc 0 +construct 0 +someth 0 +like 0 +finish 0 +hold 0 +breath 0 +send 0 +note 0 +utexa 0 +fashionwai 0 +wouldn 0 +place 0 +either 0 +learn 0 +itscomparison 0 +warn 0 +render 0 +obsolet 0 +byth 0 +leap 0 +interest 0 +goal 0 +encompass 0 +venu 0 +languag 0 +usea 0 +basi 0 +activ 0 +distribut 0 +databas 0 +fundamentalcomput 0 +scienc 0 +problem 0 +corollari 0 +evolv 0 +thatgoal 0 +constraint 0 +satisfact 0 +search 0 +queri 0 +optim 0 +relat 0 +object 0 +orient 0 +parallel 0 +execut 0 +base 0 +program 0 +knowledg 0 +compil 0 +bibliographi 0 +sometim 0 +link 0 +paper 0 +come 0 +soon 0 +group 0 +roberto 0 +bayardo 0 +david 0 +gadboi 0 +lanc 0 +obermey 0 +vasili 0 +samoladi 0 +robert 0 +schrag 0 +master 0 +srinivasan 0 +vaidyaraman 0 +lane 0 +warshaw 0 +archi 0 +andrewsdavid 0 +brantchin 0 +ming 0 +kuoshiow 0 +yang 0 +salvator 0 +stolfo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..40511f8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,85 @@ +misra 1 +comput 1 +process 1 +jayadev 1 +chair 1 +john 1 +honor 1 +fellow 1 +program 1 +formal 1 +design 1 +synchron 1 +parallel 1 +inform 1 +research 1 +group 1 +misrareg 0 +scienc 0 +depart 0 +tech 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +hopkin 0 +univers 0 +award 0 +profession 0 +servic 0 +simon 0 +guggenheim 0 +ieee 0 +fellowarea 0 +interestparallel 0 +summari 0 +researchmi 0 +interest 0 +appli 0 +method 0 +practic 0 +particularli 0 +inth 0 +specif 0 +asynchronoussystem 0 +select 0 +recent 0 +publicationsj 0 +powerlist 0 +structur 0 +recurs 0 +classic 0 +mind 0 +essai 0 +hoar 0 +prentic 0 +hall 0 +januari 0 +loos 0 +coupl 0 +futur 0 +gener 0 +system 0 +north 0 +holland 0 +phase 0 +letter 0 +equat 0 +reason 0 +nondeterminist 0 +aspect 0 +chandi 0 +foundat 0 +addison 0 +weslei 0 +homepag 0 +work 0 +electron 0 +access 0 +otherpap 0 +current 0 +project 0 +seuss 0 +anoverview 0 +apostscript 0 +versionaccess 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..6cca6b49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,175 @@ +learn 2 +group 2 +research 2 +system 2 +machin 2 +intellig 2 +univers 2 +induct 2 +languag 1 +utexa 1 +program 1 +artifici 1 +confer 1 +journal 1 +base 1 +natur 1 +plan 1 +comput 1 +logic 1 +acquisit 1 +estlin 1 +theori 1 +order 1 +associ 1 +texa 1 +student 1 +sowmya 1 +baff 1 +mahonei 1 +bradlei 1 +public 1 +abduct 1 +model 1 +speedup 1 +list 1 +softwar 1 +revis 1 +rule 1 +first 1 +data 1 +aaai 1 +linguist 1 +european 1 +special 1 +interest 1 +intern 1 +inform 1 +index 1 +illinoi 1 +utc 0 +supervis 0 +professor 0 +moonei 0 +focuseson 0 +combin 0 +empir 0 +knowledg 0 +techniqu 0 +includ 0 +applic 0 +knowledgerefin 0 +part 0 +scienc 0 +depart 0 +atth 0 +ataustin 0 +pictur 0 +click 0 +graduat 0 +mari 0 +elain 0 +califf 0 +mecaliff 0 +tara 0 +hermjakob 0 +ramachandran 0 +cindi 0 +thompson 0 +cthomp 0 +alumni 0 +paul 0 +scicomp 0 +jeff 0 +firstadvisor 0 +hwee 0 +nhweetou 0 +trantor 0 +dirk 0 +ourston 0 +dirk_ourston 0 +cpqm 0 +saic 0 +richard 0 +furtwangen 0 +siddarth 0 +subramanian 0 +georgetown 0 +john 0 +zell 0 +acad 0 +drake 0 +area 0 +qualit 0 +diagnosi 0 +tutor 0 +refin 0 +uncertain 0 +reasoningher 0 +complet 0 +accel 0 +reason 0 +neither 0 +proposit 0 +fort 0 +chillin 0 +predic 0 +invent 0 +foidl 0 +decis 0 +dolphin 0 +ad 0 +search 0 +control 0 +prolog 0 +standard 0 +classif 0 +algorithm 0 +autom 0 +experiment 0 +comparison 0 +repositori 0 +form 0 +relat 0 +site 0 +american 0 +ilpnet 0 +scientif 0 +network 0 +sigart 0 +signll 0 +joint 0 +aritfici 0 +ijcai 0 +nation 0 +icml 0 +fourth 0 +sourc 0 +subject 0 +biblio 0 +queri 0 +machinelearn 0 +home 0 +page 0 +servic 0 +paper 0 +archiv 0 +jair 0 +foil 0 +quinlan 0 +learner 0 +prodigi 0 +problem 0 +solv 0 +carnegi 0 +mellon 0 +ucpop 0 +partial 0 +planner 0 +washington 0 +explan 0 +oxford 0 +irvin 0 +austin 0 +wisconsin 0 +madison 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..e1e961a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,110 @@ +univers 2 +learn 1 +texa 1 +austin 1 +comput 1 +illinoi 1 +champaign 1 +scienc 1 +inform 1 +urbana 1 +machin 1 +program 1 +base 1 +address 1 +moonei 1 +research 1 +artifici 1 +intellig 1 +acquisit 1 +exampl 1 +network 1 +symbol 1 +home 1 +depart 1 +raymond 0 +homepageraymond 0 +mooneyassoci 0 +professor 0 +computersci 0 +engin 0 +interestsmi 0 +current 0 +interest 0 +primarilyin 0 +area 0 +includ 0 +natur 0 +languag 0 +parser 0 +lexicon 0 +extract 0 +word 0 +sens 0 +disambigu 0 +induct 0 +logic 0 +prolog 0 +knowledg 0 +theori 0 +refin 0 +automat 0 +modifi 0 +rule 0 +bayesian 0 +empir 0 +data 0 +search 0 +control 0 +improv 0 +plan 0 +effici 0 +qualiti 0 +compar 0 +combin 0 +neural 0 +public 0 +page 0 +vita 0 +finger 0 +cours 0 +informationfal 0 +lisp 0 +learningspr 0 +iiperson 0 +historyi 0 +grew 0 +small 0 +town 0 +fallon 0 +wherestart 0 +attend 0 +fallontownship 0 +highschool 0 +start 0 +fall 0 +went 0 +urbanato 0 +obtain 0 +degre 0 +list 0 +decemb 0 +complet 0 +myph 0 +thesi 0 +explan 0 +learninggroup 0 +direct 0 +prof 0 +gerald 0 +dejong 0 +began 0 +posit 0 +contact 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +meadowfir 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..9244e38b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,100 @@ +austin 1 +link 1 +comput 1 +scienc 1 +decis 1 +task 1 +univers 1 +texa 1 +action 1 +home 1 +page 1 +moriarti 1 +depart 1 +control 1 +agent 1 +system 1 +select 1 +return 1 +research 1 +neural 1 +network 1 +inform 1 +address 1 +utc 1 +dave 0 +moriartydav 0 +student 0 +researchsequenti 0 +appear 0 +mani 0 +practic 0 +real 0 +world 0 +problemsinclud 0 +resourc 0 +alloc 0 +rout 0 +canb 0 +character 0 +follow 0 +scenario 0 +observ 0 +stateof 0 +dynam 0 +finit 0 +thesystem 0 +enter 0 +state 0 +upon 0 +must 0 +selectanoth 0 +payoff 0 +madeor 0 +sequenc 0 +object 0 +thesequ 0 +highest 0 +total 0 +cumulativepayoff 0 +evolv 0 +geneticalgorithm 0 +learn 0 +perform 0 +sequenti 0 +amparticularli 0 +interest 0 +problem 0 +specif 0 +knowledg 0 +iscurr 0 +unavail 0 +costli 0 +obtain 0 +domain 0 +havestudi 0 +includ 0 +game 0 +plai 0 +intellig 0 +constraintsatisfact 0 +list 0 +public 0 +educ 0 +universityof 0 +tulan 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +utexa 0 +postal 0 +local 0 +homepag 0 +us 0 +sport 0 +misc 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..4823edf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,38 @@ +univers 1 +wade 1 +utexa 1 +austin 1 +comput 1 +scienc 1 +texa 1 +mine 1 +engin 1 +utah 1 +mwbarn 1 +barnesm 0 +barnesmwbarn 0 +eduresearch 0 +workhelp 0 +pagestyp 0 +map 0 +literatureliteratur 0 +research 0 +notesclassesbackground 0 +informationph 0 +student 0 +depart 0 +educ 0 +reach 0 +mehom 0 +tanglebriar 0 +trail 0 +campu 0 +offic 0 +yete 0 +mail 0 +eduauthor 0 +barnesemail 0 +edulast 0 +updat 0 +mondai 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..9cb3448f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,83 @@ +room 1 +dale 1 +nell 1 +seed 1 +comput 1 +scienc 1 +contain 1 +professor 1 +home 1 +retir 1 +austin 1 +teach 1 +summer 1 +pleas 1 +author 1 +websit 0 +scrollit_rl 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +pagesunivers 0 +texa 0 +departmentwelcom 0 +page 0 +reach 0 +address 0 +senior 0 +lectur 0 +univers 0 +oftexa 0 +receiv 0 +utaustin 0 +faculti 0 +sinc 0 +fromful 0 +time 0 +full 0 +load 0 +falland 0 +spend 0 +spring 0 +write 0 +travel 0 +feel 0 +free 0 +brows 0 +resum 0 +curriculum 0 +vita 0 +bibliographi 0 +inform 0 +text 0 +book 0 +research 0 +abstract 0 +ofdissert 0 +chair 0 +recent 0 +person 0 +whichcontain 0 +memento 0 +nontechn 0 +interest 0 +direct 0 +anycorrespond 0 +mail 0 +account 0 +ndale 0 +utexa 0 +profession 0 +profilepublicationsresearch 0 +interestsperson 0 +interestsnel 0 +westlak 0 +offic 0 +document 0 +creat 0 +assist 0 +right 0 +reserv 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..6acde646 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,27 @@ +page 1 +view 1 +netscap 1 +browser 1 +frame 1 +color 1 +neeraj 0 +support 0 +home 0 +either 0 +download 0 +navig 0 +without 0 +note 0 +choos 0 +latter 0 +pleas 0 +keep 0 +mind 0 +design 0 +pretti 0 +background 0 +chosen 0 +work 0 +obnoxi 0 +chartreus 0 +blame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..5e938bf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,20 @@ +home 1 +comput 1 +texa 1 +gnan 0 +pagegnana 0 +kumar 0 +natarajan 0 +page 0 +depart 0 +sciencesunivers 0 +austini 0 +graduat 0 +student 0 +sciencedepart 0 +univers 0 +austin 0 +mail 0 +utexa 0 +edufind 0 +log 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..b6d58d2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,34 @@ +nimar 1 +arora 1 +home 1 +look 1 +singh 0 +page 0 +medic 0 +art 0 +austin 0 +typic 0 +first 0 +year 0 +student 0 +doesn 0 +quit 0 +knowwhat 0 +area 0 +interest 0 +resum 0 +know 0 +altern 0 +bookmarksto 0 +clearer 0 +pictur 0 +contact 0 +click 0 +queri 0 +hit 0 +term 0 +score 0 +ters 0 +output 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..6af336be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,77 @@ +languag 1 +comput 1 +group 1 +utc 1 +natur 1 +paper 1 +learn 1 +hermjakob 1 +depart 1 +scienc 1 +univers 1 +texa 1 +discuss 1 +meet 1 +current 1 +prof 1 +research 1 +acquist 0 +groupnatur 0 +acquisit 0 +groupat 0 +austinw 0 +explor 0 +area 0 +acquisitionand 0 +fall 0 +usual 0 +everi 0 +wednesdai 0 +havedrawn 0 +close 0 +probabl 0 +resum 0 +second 0 +third 0 +week 0 +januari 0 +propos 0 +previous 0 +particip 0 +includ 0 +moonei 0 +risto 0 +miikkulainen 0 +bobbi 0 +bryant 0 +mari 0 +elain 0 +califf 0 +marti 0 +mayberri 0 +rupert 0 +tang 0 +poon 0 +cindi 0 +thompson 0 +inform 0 +pleas 0 +contact 0 +coordin 0 +utexa 0 +relat 0 +site 0 +associ 0 +linguist 0 +signll 0 +special 0 +interest 0 +print 0 +archiv 0 +machin 0 +neural 0 +network 0 +ofth 0 +ataustinlast 0 +updat 0 +novemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..cb4ef77f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,70 @@ +neural 1 +research 1 +utc 1 +net 1 +group 1 +artifici 1 +intellig 1 +scienc 1 +groupth 0 +supervis 0 +prof 0 +ristomiikkulainen 0 +part 0 +comput 0 +depart 0 +univers 0 +texa 0 +ataustin 0 +concentr 0 +andcognit 0 +includ 0 +natur 0 +languag 0 +process 0 +schema 0 +basedvis 0 +cortic 0 +self 0 +organ 0 +episod 0 +memori 0 +decis 0 +make 0 +evolv 0 +network 0 +genet 0 +algorithm 0 +click 0 +mapbelow 0 +detail 0 +check 0 +hypertext 0 +book 0 +later 0 +interact 0 +thecortex 0 +structur 0 +function 0 +risto 0 +miikkulainen 0 +graduat 0 +student 0 +alumni 0 +visitor 0 +public 0 +demo 0 +poster 0 +softwar 0 +home 0 +page 0 +confer 0 +newsgroup 0 +archiv 0 +inform 0 +sourc 0 +gener 0 +tool 0 +privat 0 +linkswusagemartym 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..f232e5e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,77 @@ +lisp 1 +scienc 1 +demo 1 +comput 1 +program 1 +gordon 1 +novak 1 +texa 1 +automat 1 +physic 1 +unit 1 +interact 1 +univers 1 +austin 1 +artifici 1 +problem 1 +server 1 +graphic 1 +draw 1 +us 1 +offic 1 +support 0 +free 0 +speech 0 +internet 0 +associ 0 +professor 0 +ofcomput 0 +atth 0 +director 0 +intelligencelaboratori 0 +highest 0 +honor 0 +research 0 +reus 0 +genericalgorithmssolv 0 +specifiedinformallyartifici 0 +intelligencecurriculum 0 +vita 0 +publicationsemploymentgrantsprofession 0 +activ 0 +honorscurriculum 0 +vitaefre 0 +softwar 0 +tmycin 0 +emycin 0 +like 0 +expert 0 +system 0 +shell 0 +interfac 0 +common 0 +lispconvers 0 +measurementsoftwar 0 +construct 0 +creat 0 +write 0 +connect 0 +diagram 0 +convers 0 +convert 0 +measur 0 +isaac 0 +solv 0 +state 0 +english 0 +class 0 +schemec 0 +compilersc 0 +intelligencec 0 +programmingweb 0 +linksweatheraddress 0 +ctai 0 +univ 0 +austinaustintexa 0 +faxnovak 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..04e95e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,51 @@ +page 1 +meghan 1 +insult 1 +panic 1 +brienhi 0 +welcom 0 +home 0 +pleas 0 +wipe 0 +feet 0 +enter 0 +mani 0 +complaint 0 +crappi 0 +laugh 0 +stock 0 +internet 0 +remov 0 +link 0 +chanc 0 +work 0 +thank 0 +sent 0 +still 0 +download 0 +gorgeou 0 +pictur 0 +blow 0 +poster 0 +size 0 +beauti 0 +queen 0 +date 0 +wait 0 +hear 0 +paul 0 +get 0 +marri 0 +august 0 +hope 0 +come 0 +except 0 +anyon 0 +want 0 +give 0 +feel 0 +free 0 +resum 0 +email 0 +obrien 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..17c54c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,13 @@ +oguer 1 +gutierrezogu 0 +gutierrezth 0 +depart 0 +comput 0 +scienc 0 +theunivers 0 +texa 0 +austinprojectsomioswwhlinksconfer 0 +databas 0 +systemsth 0 +worldemail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..86b02eb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,209 @@ +scheme 2 +memori 2 +paper 1 +time 1 +system 1 +garbag 1 +avail 1 +research 1 +paul 1 +wilson 1 +object 1 +compil 1 +collect 1 +also 1 +page 1 +group 1 +stephen 1 +carl 1 +especi 1 +persist 1 +store 1 +us 1 +alloc 1 +extens 1 +master 1 +thesi 1 +manag 1 +real 1 +collector 1 +file 1 +includ 1 +sourc 1 +code 1 +languag 1 +introduct 1 +interest 1 +site 1 +oop 1 +sheetal 1 +kakkad 1 +donovan 1 +kolbl 1 +neeli 1 +distribut 1 +virtual 1 +develop 1 +pointer 1 +larg 1 +effici 1 +standard 1 +program 1 +survei 1 +interfac 1 +orient 1 +implement 1 +cours 1 +interpret 1 +descript 1 +macro 1 +note 1 +draft 1 +good 1 +gener 1 +list 1 +whichcontain 1 +groupoop 0 +groupthi 0 +home 0 +supervis 0 +prof 0 +graduat 0 +studentsin 0 +ajit 0 +georg 0 +mark 0 +johnston 0 +scott 0 +kaplan 0 +michael 0 +qing 0 +dougla 0 +wieren 0 +area 0 +hierarchi 0 +cach 0 +simpl 0 +high 0 +perform 0 +call 0 +texa 0 +swizzl 0 +fault 0 +toimplement 0 +address 0 +space 0 +stock 0 +hardwar 0 +andoper 0 +basic 0 +studi 0 +behavior 0 +whichattempt 0 +repair 0 +damag 0 +done 0 +three 0 +decad 0 +mostli 0 +unsoundstudi 0 +mike 0 +automat 0 +storag 0 +generationaland 0 +small 0 +ongarbag 0 +local 0 +recent 0 +hard 0 +written 0 +smart 0 +adapt 0 +managementfor 0 +dynam 0 +andcompress 0 +structur 0 +checkpoint 0 +forfault 0 +toler 0 +travel 0 +debug 0 +highli 0 +portabl 0 +programmingsystem 0 +extend 0 +rscheme 0 +thread 0 +socket 0 +homepag 0 +info 0 +alpha 0 +releas 0 +noteson 0 +rawascii 0 +text 0 +andrschemear 0 +integr 0 +process 0 +algorithm 0 +tosupport 0 +open 0 +thesiscontain 0 +refer 0 +coupl 0 +write 0 +whicharen 0 +form 0 +anywai 0 +sometimesoon 0 +onlin 0 +book 0 +progress 0 +htmlformat 0 +brows 0 +contain 0 +materialfrom 0 +ascii 0 +much 0 +improv 0 +expandedpresent 0 +texinfo 0 +materiali 0 +work 0 +intro 0 +metaobject 0 +besid 0 +providesa 0 +thing 0 +like 0 +make 0 +backgroundread 0 +brief 0 +bibliographi 0 +heap 0 +fortexa 0 +anonym 0 +utexa 0 +readm 0 +materi 0 +subdirectori 0 +oopsla 0 +workshop 0 +peopl 0 +henri 0 +baker 0 +sftp 0 +although 0 +overload 0 +notb 0 +access 0 +keep 0 +try 0 +anoth 0 +great 0 +han 0 +boehm 0 +sever 0 +well 0 +free 0 +severalgarbag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..c2230842 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,11 @@ +depart 1 +robert 0 +otuomagieaddress 0 +austin 0 +phone 0 +emailotu 0 +utexa 0 +eduuniververs 0 +infouniversityth 0 +univers 0 +txa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..430ae14c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,74 @@ +file 2 +devic 2 +virtual 2 +specif 1 +avail 1 +compon 1 +languag 1 +interfac 1 +softwar 1 +construct 1 +view 1 +padgett 1 +work 1 +control 1 +domain 1 +class 1 +also 1 +follow 1 +physic 1 +implement 1 +export 1 +us 1 +austin 1 +padgettdon 0 +dissert 0 +researchi 0 +professor 0 +brown 0 +design 0 +andimplement 0 +driver 0 +investig 0 +creation 0 +program 0 +environ 0 +focu 0 +thu 0 +compil 0 +technolog 0 +devis 0 +prototyp 0 +specifi 0 +softar 0 +call 0 +contain 0 +variou 0 +featur 0 +reduc 0 +effort 0 +requir 0 +refer 0 +manualfor 0 +current 0 +postscript 0 +draft 0 +manual 0 +exampl 0 +counter 0 +multifunct 0 +transpar 0 +recent 0 +present 0 +creat 0 +microsoft 0 +powerpointvers 0 +window 0 +contact 0 +meemail 0 +utexa 0 +edupost 0 +depart 0 +comput 0 +scienc 0 +usafax 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..a4ce62b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,90 @@ +page 1 +battlebal 1 +austin 1 +comput 1 +research 1 +plan 1 +softwar 1 +game 1 +binari 1 +system 1 +librari 1 +tree 1 +univers 1 +scienc 1 +interest 1 +geometr 1 +model 1 +utexa 1 +home 0 +philip 0 +hardinphilip 0 +hardinabout 0 +elimin 0 +bug 0 +everywher 0 +fail 0 +fallback 0 +write 0 +access 0 +execut 0 +file 0 +oper 0 +unfortun 0 +avail 0 +want 0 +port 0 +suno 0 +solari 0 +work 0 +go 0 +school 0 +time 0 +multiplay 0 +wrote 0 +runsund 0 +window 0 +line 0 +code 0 +us 0 +standard 0 +templat 0 +anda 0 +campbel 0 +space 0 +partit 0 +imag 0 +screenshot 0 +student 0 +texa 0 +guess 0 +depart 0 +area 0 +graphic 0 +realli 0 +cool 0 +main 0 +graphicssoftwar 0 +reus 0 +engin 0 +gener 0 +group 0 +automat 0 +programmingto 0 +contact 0 +meemail 0 +pahardin 0 +edupost 0 +usanetrek 0 +server 0 +pita 0 +curli 0 +handl 0 +digitaldisast 0 +look 0 +get 0 +plaster 0 +congradul 0 +smartest 0 +person 0 +inth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..e177d146 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,82 @@ +comput 1 +evolut 1 +scienc 1 +univers 1 +texa 1 +austin 1 +learn 1 +research 1 +paul 0 +mcquestenpaul 0 +mcquestenphd 0 +studentdepart 0 +interest 0 +interact 0 +think 0 +mechan 0 +natur 0 +might 0 +bepract 0 +addit 0 +current 0 +techniqu 0 +exampl 0 +death 0 +usual 0 +studi 0 +explicitli 0 +email 0 +paulmcq 0 +utexa 0 +offic 0 +taylor 0 +hall 0 +phone 0 +postal 0 +address 0 +austindepart 0 +spring 0 +head 0 +forcsp 0 +introduct 0 +pascal 0 +programmingmor 0 +neuro 0 +inmoriarti 0 +link 0 +atcnr 0 +rome 0 +neural 0 +network 0 +group 0 +artifici 0 +intellig 0 +cours 0 +schedul 0 +depart 0 +handi 0 +access 0 +tout 0 +librari 0 +onlin 0 +seriou 0 +reflect 0 +dave 0 +winer 0 +websit 0 +need 0 +pointer 0 +wast 0 +hour 0 +surf 0 +check 0 +cynb 0 +humong 0 +hotlist 0 +mix 0 +knowledg 0 +knick 0 +knack 0 +nut 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..2980d0c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,32 @@ +multimedia 1 +austin 1 +pawang 1 +utexa 1 +system 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +pawan 0 +goyal 0 +home 0 +page 0 +research 0 +summari 0 +public 0 +network 0 +protocol 0 +file 0 +oper 0 +affili 0 +group 0 +get 0 +touch 0 +email 0 +inform 0 +finger 0 +also 0 +check 0 +log 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..b177630c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,115 @@ +austin 2 +physic 1 +comput 1 +univers 1 +group 1 +texa 1 +current 1 +scienc 1 +depart 1 +theori 1 +interest 1 +also 1 +jose 1 +pecina 1 +obtain 1 +invari 1 +represent 1 +dissert 1 +advisor 1 +explor 1 +research 1 +numer 1 +parallel 1 +quantum 1 +scientif 1 +softwar 1 +home 1 +utexa 1 +orpecina 1 +pecinaabout 0 +previous 0 +complet 0 +master 0 +innuclear 0 +engin 0 +finish 0 +thesi 0 +graduat 0 +studi 0 +workedinvestig 0 +gaug 0 +graviti 0 +base 0 +quantiz 0 +gravit 0 +field 0 +calcul 0 +unitari 0 +irreduc 0 +publish 0 +three 0 +joint 0 +paper 0 +yuval 0 +eman 0 +georg 0 +sudarshan 0 +jurgen 0 +lemk 0 +fromcologn 0 +germani 0 +previou 0 +posit 0 +bureau 0 +econom 0 +geologi 0 +spent 0 +year 0 +half 0 +work 0 +seismic 0 +invers 0 +tomographi 0 +supervisor 0 +hardag 0 +editor 0 +geophys 0 +journal 0 +societi 0 +geophysicist 0 +algorithm 0 +analysi 0 +cryptographi 0 +visitor 0 +thephys 0 +carnegi 0 +mellon 0 +pittsburgh 0 +open 0 +compani 0 +develop 0 +fill 0 +comerci 0 +symmetri 0 +algebra 0 +sequenti 0 +solutionsin 0 +gener 0 +rel 0 +problem 0 +chromodynamicsmi 0 +curriculum 0 +vita 0 +click 0 +want 0 +print 0 +contact 0 +center 0 +particl 0 +usavoic 0 +main 0 +offic 0 +mail 0 +defo 0 +phy 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..dd8942ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,56 @@ +know 1 +page 1 +unix 1 +would 1 +woof 1 +find 1 +nettl 0 +welcom 0 +cornerinfolik 0 +anyon 0 +want 0 +make 0 +realli 0 +short 0 +blahblah 0 +system 0 +window 0 +blah 0 +stuff 0 +eeek 0 +staff 0 +escap 0 +floor 0 +mean 0 +ibm 0 +file 0 +afteri 0 +instal 0 +stori 0 +true 0 +name 0 +chang 0 +protect 0 +theinnoc 0 +experiment 0 +work 0 +pleas 0 +think 0 +us 0 +neat 0 +cool 0 +comic 0 +buena 0 +vista 0 +movieplex 0 +meyour 0 +chanc 0 +increas 0 +drastic 0 +could 0 +employan 0 +improb 0 +drive 0 +send 0 +mail 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..f14bd716 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,25 @@ +greg 1 +plaxtongreg 1 +utexa 1 +hall 1 +texa 1 +plaxtoncontact 0 +informationemail 0 +plaxton 0 +eduphon 0 +offic 0 +taylor 0 +postal 0 +depart 0 +comput 0 +sciencetaylor 0 +univers 0 +austinaustin 0 +inform 0 +annual 0 +report 0 +profilepubl 0 +last 0 +modifi 0 +decemb 0 +plaxtonplaxton 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..aab40856 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,85 @@ +learn 1 +search 1 +porter 1 +machin 1 +knowledg 1 +base 1 +system 1 +bruce 1 +comput 1 +scienc 1 +intellig 1 +research 1 +answer 1 +question 1 +select 1 +public 1 +aaai 1 +abstract 1 +porterassoci 0 +professor 0 +faculti 0 +fellow 0 +univers 0 +california 0 +irvin 0 +honor 0 +award 0 +profession 0 +servic 0 +presidenti 0 +young 0 +investig 0 +editor 0 +presentarea 0 +interestartifici 0 +summari 0 +researchhead 0 +group 0 +develop 0 +method 0 +build 0 +larg 0 +basesand 0 +us 0 +solv 0 +problem 0 +researchinterest 0 +case 0 +recent 0 +rickel 0 +autom 0 +model 0 +predict 0 +thetim 0 +scale 0 +boundari 0 +cambridg 0 +aait 0 +press 0 +andpostscript 0 +brant 0 +rule 0 +preced 0 +complementari 0 +warrant 0 +bareiss 0 +holt 0 +concept 0 +heurist 0 +classif 0 +weak 0 +theori 0 +domain 0 +artifici 0 +journal 0 +abstractand 0 +postscript 0 +hotlist 0 +site 0 +page 0 +email 0 +address 0 +tech 0 +reportport 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..2533c5cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,197 @@ +uniti 2 +program 2 +misra 2 +powerlist 1 +seuss 1 +graduat 1 +present 1 +group 1 +jayadev 1 +notat 1 +parallel 1 +work 1 +book 1 +paper 1 +proof 1 +theori 1 +kornerup 1 +list 1 +comput 1 +area 1 +current 1 +adam 1 +carruth 1 +marku 1 +kaltenbach 1 +jacob 1 +avail 1 +logic 1 +design 1 +note 1 +ofpap 1 +written 1 +includ 1 +introduct 1 +specifi 1 +circuit 1 +structur 1 +mani 1 +thepowerlist 1 +correct 1 +network 1 +studi 1 +differ 1 +compil 1 +austinpsp 0 +austinthi 0 +home 0 +page 0 +depart 0 +scienc 0 +univers 0 +texa 0 +ataustin 0 +stand 0 +specif 0 +emphasi 0 +deriveparallel 0 +distribut 0 +rigor 0 +manner 0 +issupervis 0 +develop 0 +research 0 +former 0 +member 0 +groupinclud 0 +erni 0 +cohen 0 +rajeev 0 +joshi 0 +edgar 0 +knapp 0 +ingolf 0 +krger 0 +josyula 0 +mark 0 +staskauska 0 +publicationsbelow 0 +summar 0 +wherev 0 +possibl 0 +give 0 +link 0 +topap 0 +electron 0 +reason 0 +anddistribut 0 +chandi 0 +foundat 0 +addison 0 +weslei 0 +seri 0 +variou 0 +result 0 +applic 0 +thenot 0 +assum 0 +basic 0 +understand 0 +inchandi 0 +sinc 0 +public 0 +sever 0 +improv 0 +made 0 +inth 0 +reflect 0 +amanuscript 0 +newun 0 +tempor 0 +operatorco 0 +safeti 0 +refer 0 +forrefer 0 +implement 0 +write 0 +asymbol 0 +model 0 +checker 0 +forfinit 0 +state 0 +call 0 +unityverifi 0 +extend 0 +toinclud 0 +real 0 +time 0 +aspect 0 +hybrid 0 +system 0 +synchron 0 +data 0 +length 0 +equal 0 +power 0 +twodiffer 0 +oper 0 +balanc 0 +divis 0 +parallelalgorithm 0 +succinct 0 +simpl 0 +recurs 0 +givesnumer 0 +exampl 0 +algorithm 0 +fast 0 +fourier 0 +transform 0 +batcher 0 +sort 0 +arithmet 0 +asadd 0 +multipli 0 +prove 0 +verifi 0 +addercircuit 0 +us 0 +programscan 0 +map 0 +effici 0 +architectur 0 +speciallyhypercub 0 +detail 0 +offspr 0 +address 0 +issu 0 +ofprogram 0 +composit 0 +restrict 0 +compon 0 +caninterfer 0 +read 0 +overview 0 +chapter 0 +froma 0 +monograph 0 +adisciplin 0 +multiprogram 0 +alsoavail 0 +genrat 0 +code 0 +callsfor 0 +messag 0 +commun 0 +describ 0 +thesi 0 +anexperi 0 +concurr 0 +object 0 +basedprogram 0 +languag 0 +ingolfkrg 0 +site 0 +found 0 +thepsp 0 +sitejacob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..0787e655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,71 @@ +qime 1 +comput 1 +guest 1 +texa 1 +austin 1 +scienc 1 +inform 1 +huang 1 +email 1 +utexa 1 +univers 1 +system 1 +center 1 +page 1 +comment 1 +depart 0 +sciencesunivers 0 +phone 0 +edureceiv 0 +nankai 0 +tianjin 0 +univeris 0 +hawaii 0 +manoa 0 +hawaiiwork 0 +austincours 0 +spring 0 +advanc 0 +telecommun 0 +client 0 +server 0 +develop 0 +appli 0 +data 0 +commun 0 +cours 0 +academ 0 +resourc 0 +teamweb 0 +utcssadaili 0 +texanstock 0 +room 0 +attract 0 +picturesimageschines 0 +popsend 0 +card 0 +electr 0 +postcard 0 +line 0 +job 0 +jobtrakut 0 +placement 0 +connect 0 +gopherftp 0 +csc 0 +newstelnet 0 +cschen 0 +junk 0 +staffyour 0 +person 0 +visit 0 +pleas 0 +sign 0 +book 0 +commentsguest 0 +name 0 +construct 0 +last 0 +modifi 0 +march 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..66a2b97c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,63 @@ +reason 2 +qualit 1 +research 1 +paper 1 +qsim 1 +index 1 +bibliographi 1 +ourresearch 1 +group 1 +world 1 +access 1 +dissert 1 +softwar 1 +directori 1 +utexasqualit 0 +utexasth 0 +sever 0 +area 0 +thephys 0 +user 0 +applic 0 +like 0 +system 0 +spatial 0 +intelligentrobot 0 +tour 0 +limit 0 +logic 0 +knowledgerepresent 0 +algernon 0 +supervis 0 +professor 0 +benjaminkuip 0 +kuiper 0 +utexa 0 +part 0 +artifici 0 +intellig 0 +comput 0 +scienc 0 +depart 0 +atth 0 +univers 0 +texa 0 +ataustin 0 +pointer 0 +book 0 +graduat 0 +student 0 +robot 0 +knowledg 0 +represent 0 +alumni 0 +includ 0 +visitor 0 +abstract 0 +yellow 0 +page 0 +easili 0 +areadescript 0 +also 0 +visit 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..65f260f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,62 @@ +page 1 +qiang 1 +home 1 +pleas 1 +seed 1 +program 1 +comput 1 +john 1 +welcom 1 +thank 1 +window 1 +austin 1 +feel 0 +free 0 +brows 0 +around 0 +leav 0 +comment 0 +suggest 0 +joke 0 +visit 0 +come 0 +scrollit_rl 0 +timertwo 0 +settimeout 0 +els 0 +sinc 0 +com 0 +time 0 +self 0 +introduct 0 +current 0 +master 0 +scienc 0 +depart 0 +univers 0 +texa 0 +click 0 +inform 0 +seriousjunk 0 +cours 0 +languag 0 +unix 0 +graphic 0 +linux 0 +technic 0 +java 0 +realjunk 0 +sport 0 +game 0 +new 0 +struggleforliv 0 +institut 0 +qzuo 0 +utexa 0 +guestbook 0 +still 0 +underconstruct 0 +back 0 +last 0 +modif 0 +copyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..a9678d7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,57 @@ +scienc 1 +comput 1 +univers 1 +texa 1 +robert 1 +gener 1 +last 1 +massachusett 1 +institut 1 +technolog 1 +system 1 +paper 1 +utexa 1 +blumoferobert 0 +blumofei 0 +bobbi 0 +name 0 +pronounc 0 +bloom 0 +informationassist 0 +professor 0 +ofcomput 0 +austin 0 +brown 0 +work 0 +cilkmultithread 0 +languag 0 +runtim 0 +laboratori 0 +experiment 0 +softwar 0 +less 0 +compil 0 +list 0 +document 0 +also 0 +avail 0 +directori 0 +semest 0 +spring 0 +teach 0 +abstract 0 +data 0 +type 0 +contact 0 +informationemail 0 +eduphon 0 +offic 0 +taylor 0 +hallpost 0 +depart 0 +sciencestaylor 0 +hall 0 +austinaustin 0 +modifi 0 +decemb 0 +blumoferdb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..cd4a29f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,41 @@ +time 1 +current 1 +page 1 +comput 1 +interest 1 +wang 0 +homepag 0 +wangwelcom 0 +construct 0 +content 0 +permit 0 +candid 0 +depart 0 +scienc 0 +theunivers 0 +texa 0 +austin 0 +research 0 +includ 0 +real 0 +system 0 +rule 0 +base 0 +program 0 +analysi 0 +softwar 0 +engin 0 +artifici 0 +intellig 0 +publicationsi 0 +list 0 +public 0 +avail 0 +brows 0 +last 0 +updat 0 +pleas 0 +send 0 +comment 0 +rhwang 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..2b584508 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,77 @@ +neural 1 +network 1 +lectur 1 +cours 1 +artifici 1 +fall 1 +graduat 1 +univers 1 +austin 1 +scienc 1 +research 1 +intellig 1 +undergradu 1 +risto 1 +comput 1 +group 1 +model 1 +cognit 1 +work 1 +seminar 1 +spring 1 +address 1 +miikkulainenristo 0 +miikkulainenassoci 0 +professor 0 +computersci 0 +oftexa 0 +ucla 0 +appli 0 +mathemat 0 +helsinki 0 +technolog 0 +intereststh 0 +concentr 0 +processeswith 0 +current 0 +includ 0 +languageacquisit 0 +episod 0 +memori 0 +self 0 +organ 0 +visual 0 +cortex 0 +schema 0 +base 0 +vision 0 +also 0 +evolv 0 +networkswith 0 +genet 0 +algorithm 0 +goal 0 +automat 0 +discoversequenti 0 +decis 0 +strategi 0 +problem 0 +solv 0 +robot 0 +detail 0 +utc 0 +home 0 +page 0 +classessumm 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +phone 0 +postal 0 +depart 0 +texa 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..8b1c5822 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,2 @@ +rong 0 +bigfoot 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..5bd39efc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,52 @@ +comput 1 +algorithm 1 +austin 1 +rajaraman 1 +home 1 +depart 1 +scienc 1 +univers 1 +texa 1 +rraj 1 +utexa 1 +rajmohan 0 +pagerajmohan 0 +graduat 0 +student 0 +atth 0 +ataustin 0 +plan 0 +complet 0 +spring 0 +mydissert 0 +supervisor 0 +gregplaxton 0 +research 0 +member 0 +andcomput 0 +theori 0 +group 0 +particularli 0 +interest 0 +incombinator 0 +distribut 0 +network 0 +onlin 0 +parallel 0 +model 0 +random 0 +list 0 +mypubl 0 +curriculum 0 +vita 0 +us 0 +link 0 +relat 0 +sciencemiscellan 0 +linkscontact 0 +inform 0 +email 0 +phone 0 +offic 0 +ephon 0 +postal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..e8d59abb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,86 @@ +austin 1 +movi 1 +homepag 1 +comput 1 +rong 1 +capit 1 +univers 1 +depart 1 +graduat 1 +scienc 1 +utaccess 1 +introduct 1 +homepagea 0 +chinachina 0 +jinan 0 +myhometown 0 +shandong 0 +provinc 0 +tsinghua 0 +gotmi 0 +computersci 0 +technolog 0 +beij 0 +ofth 0 +peopl 0 +republ 0 +ofchina 0 +spent 0 +five 0 +colleg 0 +year 0 +zhai 0 +sinanet 0 +texasaustin 0 +citi 0 +live 0 +weather 0 +apart 0 +rent 0 +utcsth 0 +texa 0 +utnetcat 0 +browsabl 0 +onlin 0 +catalog 0 +librari 0 +austininform 0 +servic 0 +login 0 +utcat 0 +cours 0 +fall 0 +oper 0 +systemsdynam 0 +file 0 +replic 0 +final 0 +project 0 +graphicsc 0 +mathemat 0 +logicc 0 +moviesaustin 0 +chronicl 0 +film 0 +time 0 +yahoo 0 +entertain 0 +filmsmicrosoft 0 +cinemania 0 +onlineal 0 +guidehollywood 0 +onlineinternet 0 +databaserog 0 +ebert 0 +moviesvisit 0 +page 0 +contactmail 0 +address 0 +river 0 +aaustin 0 +telephon 0 +emailrtan 0 +utexa 0 +fingerclick 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..a17a27f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,66 @@ +rupert 1 +austin 1 +tang 1 +page 1 +comput 1 +scienc 1 +univers 1 +texa 1 +littl 1 +think 1 +life 1 +would 1 +alwai 1 +home 0 +student 0 +dept 0 +almost 0 +miracl 0 +modern 0 +teach 0 +method 0 +entir 0 +strangl 0 +holi 0 +curious 0 +inquiri 0 +delic 0 +plant 0 +need 0 +anyth 0 +besid 0 +stimul 0 +freedom 0 +realli 0 +empti 0 +depriv 0 +opportun 0 +choos 0 +altern 0 +distast 0 +deni 0 +actual 0 +wish 0 +aspir 0 +fear 0 +duress 0 +fate 0 +much 0 +differ 0 +truck 0 +wash 0 +machin 0 +nice 0 +meet 0 +complet 0 +cool 0 +servic 0 +know 0 +academ 0 +interest 0 +research 0 +messi 0 +area 0 +construct 0 +utexa 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..cde9937e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,16 @@ +frame 1 +ruwei 0 +homepag 0 +alert 0 +see 0 +messag 0 +us 0 +browser 0 +support 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..f7aab10f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,87 @@ +comput 2 +parallel 1 +method 1 +scienc 1 +univers 1 +austin 1 +mathemat 1 +project 1 +public 1 +robert 1 +texa 1 +rvdg 1 +utexa 1 +appli 1 +numer 1 +develop 1 +case 1 +techniqu 1 +current 1 +geijn 0 +geijnassoci 0 +professor 0 +depart 0 +institut 0 +appliedmathemat 0 +phone 0 +mail 0 +http 0 +user 0 +wisconsin 0 +madison 0 +maryland 0 +colleg 0 +park 0 +area 0 +interestnumer 0 +analysi 0 +supercomput 0 +scientif 0 +summari 0 +researchth 0 +introduct 0 +forc 0 +evalu 0 +oftradit 0 +sequentialmachin 0 +continu 0 +us 0 +inoth 0 +prove 0 +perform 0 +better 0 +researchconcentr 0 +forimpl 0 +well 0 +environ 0 +allowssuch 0 +easili 0 +implement 0 +variou 0 +parallelprocessor 0 +inform 0 +graduat 0 +program 0 +workshop 0 +infrastructur 0 +applic 0 +april 0 +intercom 0 +plapack 0 +sl_librari 0 +book 0 +journal 0 +confer 0 +technic 0 +report 0 +tutori 0 +major 0 +softwar 0 +effort 0 +class 0 +fall 0 +schedul 0 +former 0 +student 0 +meet 0 +famili 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..0cb91a03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,40 @@ +home 1 +seed 1 +india 1 +page 1 +time 1 +sundeep 0 +abraham 0 +scrollit_rl 0 +timertwo 0 +window 0 +settimeout 0 +els 0 +sundeepabraham 0 +master 0 +student 0 +computersci 0 +dept 0 +universityof 0 +texa 0 +austin 0 +undergradu 0 +studi 0 +comput 0 +sciencesand 0 +engin 0 +region 0 +engg 0 +colleg 0 +calicut 0 +countri 0 +hail 0 +state 0 +kerala 0 +know 0 +contact 0 +click 0 +construct 0 +tinkerwith 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..c441c5fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,135 @@ +comput 1 +texa 1 +austin 1 +page 1 +univers 1 +sammi 1 +depart 1 +scienc 1 +swim 1 +refer 1 +yellow 1 +utexa 1 +research 1 +prof 1 +project 1 +parallel 1 +world 1 +wide 1 +librari 1 +construct 1 +address 1 +altavista 0 +startingpoint 0 +yahoo 0 +lyco 0 +map 0 +weather 0 +white 0 +congress 0 +shall 0 +make 0 +respect 0 +establish 0 +religion 0 +orprohibit 0 +free 0 +exercis 0 +thereof 0 +abridg 0 +freedom 0 +ofspeech 0 +press 0 +right 0 +peopl 0 +peaceabl 0 +toassembl 0 +petit 0 +govern 0 +redress 0 +grievanc 0 +constitut 0 +unit 0 +state 0 +america 0 +calvin 0 +robert 0 +geijn 0 +linear 0 +algebra 0 +packag 0 +schedul 0 +artifici 0 +intellig 0 +topic 0 +distribut 0 +languag 0 +compil 0 +advanc 0 +oper 0 +system 0 +experi 0 +herbarium 0 +plapack 0 +sign 0 +hypertext 0 +code 0 +anagram 0 +server 0 +friend 0 +nil 0 +virtual 0 +reker 0 +yanni 0 +musician 0 +jeff 0 +hockei 0 +andrea 0 +hamilton 0 +technolog 0 +famili 0 +pop 0 +guyer 0 +public 0 +health 0 +northwestern 0 +anthropolog 0 +kate 0 +nate 0 +activ 0 +link 0 +entertain 0 +showbiz 0 +chronicl 0 +pollstar 0 +concert 0 +databas 0 +html 0 +quick 0 +guid 0 +dell 0 +fring 0 +ryder 0 +laptop 0 +info 0 +consortium 0 +miscellan 0 +boston 0 +hotlist 0 +list 0 +traveloc 0 +offic 0 +taylor 0 +hall 0 +home 0 +great 0 +hill 0 +eduth 0 +opinion 0 +express 0 +mine 0 +necessarili 0 +repres 0 +view 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..8909f039 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,50 @@ +address 1 +austin 1 +inform 1 +mail 1 +depart 1 +comput 1 +propos 1 +file 1 +formal 1 +method 1 +page 1 +sawadajun 0 +sawadacontact 0 +email 0 +sawada 0 +utexa 0 +offic 0 +main 0 +univ 0 +texa 0 +scienc 0 +taylor 0 +hall 0 +home 0 +wooten 0 +dissert 0 +oral 0 +time 0 +place 0 +abstract 0 +paper 0 +supplementari 0 +technic 0 +report 0 +kbresourc 0 +common 0 +lisp 0 +languag 0 +edit 0 +bowen 0 +around 0 +world 0 +pvsother 0 +frequent 0 +access 0 +teacher 0 +fellow 0 +logic 0 +boyer 0 +class 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..fed785b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,169 @@ +softwar 2 +system 2 +gener 2 +compon 1 +batori 1 +genvoca 1 +domain 1 +start 1 +mani 1 +applic 1 +get 1 +member 1 +design 1 +page 1 +research 1 +group 1 +complex 1 +result 1 +improv 1 +product 1 +perform 1 +look 1 +distribut 1 +decemb 1 +encapsul 1 +refin 1 +basic 1 +order 1 +architectur 1 +pattern 1 +utexa 1 +lectur 1 +note 1 +file 1 +ssgrg 0 +titl 0 +pagewelcom 0 +home 0 +tool 0 +assembl 0 +interchang 0 +reusabl 0 +develop 0 +independ 0 +model 0 +construct 0 +defin 0 +algebra 0 +equat 0 +term 0 +successfulli 0 +appli 0 +includ 0 +databas 0 +manag 0 +avion 0 +data 0 +structur 0 +demonstr 0 +substanti 0 +time 0 +first 0 +visit 0 +question 0 +best 0 +place 0 +take 0 +project 0 +index 0 +public 0 +relat 0 +utc 0 +professorangela 0 +dappert 0 +studentguillermo 0 +jimenez 0 +perezph 0 +studentjeff 0 +thomasph 0 +studentl 0 +tokuda 0 +studentyanni 0 +smaragdaki 0 +studentk 0 +shepherdresearch 0 +associateform 0 +graduat 0 +datesdinesh 0 +dasph 0 +milli 0 +villarrealph 0 +bart 0 +geracipostdoc 0 +marti 0 +sirkinph 0 +march 0 +sankar 0 +dasarim 0 +overview 0 +us 0 +build 0 +typic 0 +modul 0 +featur 0 +share 0 +possibl 0 +must 0 +differ 0 +part 0 +class 0 +requir 0 +manipul 0 +metadata 0 +reflect 0 +comput 0 +thu 0 +like 0 +approach 0 +goe 0 +beyond 0 +simpl 0 +object 0 +orient 0 +larg 0 +scale 0 +program 0 +transform 0 +feel 0 +issu 0 +involv 0 +breadth 0 +recommend 0 +follow 0 +paper 0 +starter 0 +read 0 +scalabl 0 +librari 0 +creat 0 +refer 0 +implement 0 +composit 0 +valid 0 +subject 0 +specif 0 +deliv 0 +relationship 0 +work 0 +check 0 +reengin 0 +lightweight 0 +dbm 0 +memori 0 +simul 0 +generatorsautom 0 +evolut 0 +inform 0 +pleas 0 +contact 0 +period 0 +releas 0 +tutori 0 +reus 0 +avail 0 +contain 0 +compress 0 +postscript 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..c0023393 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,142 @@ +page 2 +home 1 +stuff 1 +austin 1 +inform 1 +work 1 +think 1 +research 1 +amherst 1 +want 1 +realli 1 +basic 1 +doesn 1 +look 1 +littl 1 +interest 1 +texa 1 +depart 1 +try 1 +group 1 +class 1 +orient 1 +colleg 1 +place 1 +time 1 +spent 1 +amaz 1 +like 1 +peopl 1 +check 1 +neat 1 +right 1 +scott 0 +pagescott 0 +pageokai 0 +long 0 +overdu 0 +slight 0 +updat 0 +mean 0 +go 0 +fanci 0 +expect 0 +least 0 +current 0 +univers 0 +finish 0 +year 0 +mani 0 +oop 0 +within 0 +utc 0 +system 0 +languag 0 +object 0 +design 0 +instructor 0 +glenn 0 +down 0 +great 0 +spend 0 +undergradu 0 +dai 0 +unpredict 0 +santa 0 +institut 0 +summer 0 +differ 0 +kind 0 +hobbi 0 +stuffit 0 +perfect 0 +linux 0 +invalu 0 +squash 0 +pageth 0 +mead 0 +wine 0 +beer 0 +psion 0 +maker 0 +cool 0 +palmtop 0 +without 0 +would 0 +forget 0 +name 0 +anastasi 0 +well 0 +sharp 0 +much 0 +free 0 +miscellan 0 +item 0 +particular 0 +order 0 +dine 0 +guid 0 +actual 0 +import 0 +part 0 +citizen 0 +poke 0 +good 0 +humor 0 +publish 0 +appl 0 +comput 0 +still 0 +thing 0 +iici 0 +last 0 +forev 0 +ala 0 +longer 0 +sure 0 +bright 0 +futur 0 +type 0 +machin 0 +bebox 0 +could 0 +simpl 0 +better 0 +noth 0 +probabl 0 +didn 0 +wait 0 +five 0 +minut 0 +load 0 +send 0 +email 0 +maintain 0 +sfkaplan 0 +utexa 0 +might 0 +grab 0 +includ 0 +link 0 +encrypt 0 +gener 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..a9077656 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,34 @@ +shenoi 1 +austin 1 +utexa 1 +prashant 1 +home 1 +page 1 +univers 1 +texa 1 +depart 1 +comput 1 +scienc 1 +main 1 +welcom 0 +get 0 +touch 0 +email 0 +river 0 +offic 0 +tower 0 +floor 0 +build 0 +inform 0 +finger 0 +also 0 +check 0 +log 0 +multimedia 0 +group 0 +list 0 +recent 0 +public 0 +avail 0 +onlin 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..71be12b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,35 @@ +austin 1 +shaob 1 +cyberhom 1 +comput 1 +welcom 0 +current 0 +student 0 +depart 0 +scienc 0 +univers 0 +texa 0 +want 0 +know 0 +check 0 +ongo 0 +work 0 +hardvar 0 +verifc 0 +network 0 +vlsi 0 +final 0 +project 0 +fall 0 +bookshelf 0 +coffe 0 +tabl 0 +campu 0 +citi 0 +make 0 +contact 0 +pleasant 0 +vallei 0 +shma 0 +utexa 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..0c38abec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,93 @@ +austin 1 +shailesh 1 +univers 1 +texa 1 +comput 1 +learn 1 +interest 1 +internet 1 +neural 1 +network 1 +machin 1 +artifici 1 +genet 1 +algorithm 1 +kumar 1 +kumarshailesh 0 +kumarth 0 +austindepart 0 +sciencestaylor 0 +hall 0 +skumar 0 +utexa 0 +edumi 0 +resumeresearch 0 +publicationscontact 0 +mesrcm 0 +spiritu 0 +affili 0 +offersom 0 +linkscognit 0 +scienceutc 0 +researchutc 0 +research 0 +groupresearch 0 +intellig 0 +life 0 +neuroevolut 0 +applic 0 +cellular 0 +automata 0 +chao 0 +nonlinear 0 +dynam 0 +fuzzi 0 +logic 0 +massiv 0 +parallel 0 +processor 0 +publicationson 0 +line 0 +adapt 0 +signal 0 +predistort 0 +dual 0 +reinforc 0 +page 0 +patrick 0 +goetz 0 +risto 0 +miikkulainen 0 +appli 0 +mathemat 0 +depart 0 +scienc 0 +proceed 0 +annual 0 +confer 0 +bari 0 +itali 0 +object 0 +base 0 +evolut 0 +program 0 +bord 0 +singh 0 +symposium 0 +aprl 0 +india 0 +contact 0 +snail 0 +mail 0 +whiti 0 +avenu 0 +phone 0 +home 0 +offic 0 +offernet 0 +assistancesearch 0 +tool 0 +qualiti 0 +institutewww 0 +infoindia 0 +music 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..6878c8ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,47 @@ +smaragdaki 1 +yanni 1 +utc 1 +student 1 +interest 1 +project 1 +program 1 +comput 1 +scienc 1 +research 1 +austin 1 +posit 0 +graduat 0 +alchemi 0 +turn 0 +lead 0 +gold 0 +moder 0 +success 0 +plan 0 +scheme 0 +make 0 +arrang 0 +someth 0 +webster 0 +world 0 +dictionari 0 +serious 0 +depart 0 +main 0 +area 0 +meta 0 +system 0 +applic 0 +particularli 0 +softwar 0 +gener 0 +photo 0 +album 0 +favorit 0 +sitessmaragd 0 +utexa 0 +eduyanni 0 +smaragdakisunivers 0 +texa 0 +departmenttai 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..dab9944b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,58 @@ +link 1 +minion 1 +like 1 +email 1 +pleas 1 +mean 1 +homepag 0 +edward 0 +danger 0 +construct 0 +site 0 +fall 0 +asphalt 0 +bodi 0 +resum 0 +tell 0 +danc 0 +shadow 0 +moonlight 0 +click 0 +utexa 0 +person 0 +data 0 +strictli 0 +need 0 +know 0 +basi 0 +send 0 +effort 0 +duli 0 +note 0 +pictur 0 +interest 0 +greatest 0 +experi 0 +stimul 0 +nerv 0 +center 0 +wouldn 0 +absolut 0 +ever 0 +dy 0 +ignor 0 +previou 0 +recent 0 +addit 0 +field 0 +trip 0 +pania 0 +haiku 0 +leaf 0 +afloat 0 +wind 0 +stream 0 +eddi 0 +waterfal 0 +life 0 +visitor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..90e2aa33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,29 @@ +search 1 +email 1 +utexa 1 +texa 1 +austin 1 +southerart 0 +southerresearchbuild 0 +member 0 +knowledg 0 +base 0 +system 0 +research 0 +group 0 +contact 0 +inform 0 +souther 0 +work 0 +mail 0 +comput 0 +scienc 0 +depart 0 +univers 0 +hotlist 0 +site 0 +page 0 +address 0 +public 0 +tech 0 +reportsouth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..ae71dea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,60 @@ +learn 1 +address 1 +comput 1 +scienc 1 +sowmya 1 +research 1 +texa 1 +interest 1 +network 1 +univers 1 +austin 1 +ramachandransowmya 0 +ramachandranmachin 0 +groupunivers 0 +austinresearchmi 0 +area 0 +machin 0 +field 0 +ofartif 0 +intellig 0 +problem 0 +learningbayesian 0 +exampl 0 +bayesian 0 +withhidden 0 +variabl 0 +challeng 0 +approach 0 +appli 0 +symbol 0 +connectionist 0 +theori 0 +revis 0 +techniqu 0 +thisproblem 0 +also 0 +design 0 +creat 0 +multimediaappl 0 +resum 0 +list 0 +paper 0 +educ 0 +rutger 0 +tech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +india 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +utexa 0 +postal 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..fee72f74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,52 @@ +texa 1 +sriram 1 +austin 1 +comput 1 +univers 1 +click 1 +home 0 +page 0 +raocurr 0 +research 0 +involv 0 +design 0 +implement 0 +multimedia 0 +file 0 +systemoper 0 +system 0 +support 0 +multimediai 0 +work 0 +multimediagroup 0 +sciencesdepart 0 +advisor 0 +prof 0 +harrickvinpublicationsminegroupcontact 0 +informationofficetai 0 +email 0 +utexa 0 +edudepart 0 +scienc 0 +austinaustin 0 +miscellaneousotherinterest 0 +pagespicturesof 0 +toweraustin 0 +isth 0 +capit 0 +locat 0 +central 0 +hill 0 +countri 0 +herefor 0 +inform 0 +kannada 0 +koota 0 +informationabout 0 +tamil 0 +sangam 0 +comment 0 +pleas 0 +free 0 +send 0 +mail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..1dbc16d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,15 @@ +frame 1 +tiger 0 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..7a320a09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,50 @@ +home 1 +sunghe 1 +univers 1 +austin 1 +page 1 +depart 1 +comput 1 +texa 1 +engin 1 +seoul 1 +work 1 +system 1 +research 1 +utc 1 +click 1 +choisunghe 0 +choiwelcom 0 +construct 0 +master 0 +program 0 +scienc 0 +educ 0 +nation 0 +korea 0 +august 0 +experi 0 +present 0 +administr 0 +chemic 0 +graduat 0 +assist 0 +prof 0 +aloysiu 0 +real 0 +time 0 +group 0 +contact 0 +inform 0 +nuec 0 +list 0 +machin 0 +current 0 +log 0 +finger 0 +author 0 +choiemail 0 +utexa 0 +edulast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..31f57bed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,70 @@ +comput 1 +scienc 1 +texa 1 +austin 1 +inform 1 +univers 1 +depart 1 +research 1 +page 1 +sheetal 1 +offic 1 +taylor 1 +hall 1 +address 1 +avail 1 +persist 1 +call 1 +home 0 +kakkadsheet 0 +kakkadcontact 0 +postal 0 +usual 0 +best 0 +reach 0 +isvia 0 +email 0 +full 0 +finger 0 +informationi 0 +member 0 +oop 0 +group 0 +inth 0 +part 0 +implement 0 +storagesystem 0 +provid 0 +easi 0 +us 0 +novel 0 +techniqu 0 +pointer 0 +swizzl 0 +faulttim 0 +effici 0 +support 0 +larg 0 +standard 0 +hardwar 0 +pleas 0 +list 0 +mypubl 0 +along 0 +brief 0 +descript 0 +plan 0 +graduat 0 +myresum 0 +postscript 0 +current 0 +work 0 +motorola 0 +somerset 0 +design 0 +center 0 +whilefinish 0 +januari 0 +kakkad 0 +svkakkad 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..a531b271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,62 @@ +austin 1 +shengm 1 +student 1 +scienc 1 +univers 1 +texa 1 +chines 1 +home 0 +page 0 +welcom 0 +homepageabout 0 +depart 0 +comput 0 +want 0 +know 0 +check 0 +classmatesclass 0 +technolog 0 +china 0 +ustc 0 +class 0 +graduat 0 +school 0 +academi 0 +sciencesus 0 +linksut 0 +campu 0 +registrar 0 +gradaut 0 +studiesut 0 +libraryutaccesschines 0 +scholar 0 +associationchina 0 +chinesechinainternet 0 +distribut 0 +magazinestsinghua 0 +bbsncic 0 +bbschines 0 +novelschines 0 +classicsabout 0 +austinwhat 0 +weather 0 +todai 0 +citylimitsclassifi 0 +item 0 +sale 0 +austinto 0 +contact 0 +address 0 +medic 0 +art 0 +voic 0 +email 0 +utexa 0 +finger 0 +meyour 0 +comment 0 +suggest 0 +highli 0 +appreci 0 +visitorsinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..5f3f7697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,15 @@ +frame 1 +wang 0 +alert 0 +see 0 +messag 0 +us 0 +browser 0 +support 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..01cb1ddb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,4 @@ +page 1 +welcom 0 +home 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..74b506a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,30 @@ +austin 1 +work 1 +comput 1 +renu 0 +tewarirenu 0 +tewariwhat 0 +addresshom 0 +home 0 +email 0 +tewari 0 +utexa 0 +multimedia 0 +dept 0 +scienc 0 +univers 0 +texa 0 +public 0 +done 0 +internship 0 +watson 0 +research 0 +center 0 +plai 0 +interest 0 +site 0 +bore 0 +send 0 +comment 0 +name 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..861f416c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,85 @@ +tumlin 1 +research 1 +distribut 1 +system 1 +synthesi 1 +pierc 1 +interest 1 +formal 1 +develop 1 +work 1 +paper 1 +control 1 +project 1 +inform 1 +address 1 +utexa 1 +page 1 +novemb 1 +photo 0 +brenda 0 +ladd 0 +photographi 0 +random 0 +stuff 0 +come 0 +soon 0 +stai 0 +tune 0 +issu 0 +secur 0 +present 0 +studi 0 +logic 0 +analyz 0 +authenticationprotocol 0 +done 0 +method 0 +specif 0 +verif 0 +parallel 0 +amwork 0 +resourc 0 +communicatewith 0 +client 0 +mean 0 +queu 0 +messag 0 +draft 0 +addit 0 +student 0 +assist 0 +appli 0 +laboratori 0 +current 0 +investig 0 +us 0 +evolutionari 0 +comput 0 +techniqu 0 +genet 0 +algorithm 0 +finit 0 +state 0 +machin 0 +click 0 +resum 0 +avail 0 +html 0 +postscript 0 +format 0 +contact 0 +offic 0 +taylor 0 +hall 0 +phone 0 +email 0 +postal 0 +metric 0 +blvd 0 +austin 0 +last 0 +updat 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..d59dba0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,173 @@ +prof 2 +page 2 +austin 1 +system 1 +tong 1 +java 1 +comput 1 +univers 1 +chines 1 +mpeg 1 +china 1 +cours 1 +fall 1 +project 1 +unix 1 +friend 1 +wang 1 +time 1 +shanghai 1 +technolog 1 +work 1 +distribut 1 +network 1 +databas 1 +netscap 1 +recommend 1 +visit 1 +viewer 1 +demo 1 +nanj 1 +program 1 +format 1 +jiao 1 +tsinghua 1 +compani 1 +lucent 1 +spring 1 +misra 1 +theori 1 +implement 1 +perform 1 +associ 1 +anim 1 +home 0 +view 0 +listen 0 +contain 0 +applet 0 +written 0 +pure 0 +nank 0 +peopl 0 +republ 0 +current 0 +scienc 0 +depart 0 +texa 0 +seek 0 +full 0 +resum 0 +html 0 +click 0 +postscript 0 +univeristi 0 +beij 0 +jersei 0 +summerluc 0 +form 0 +result 0 +plan 0 +restructur 0 +bell 0 +laboratori 0 +thissumm 0 +anoth 0 +homepag 0 +life 0 +commun 0 +gouda 0 +zuckerman 0 +batori 0 +algorithm 0 +techniqu 0 +plexton 0 +graphic 0 +fussel 0 +multimedia 0 +teach 0 +assist 0 +introduct 0 +oper 0 +present 0 +fault 0 +toler 0 +clock 0 +synchron 0 +real 0 +april 0 +mobil 0 +host 0 +protocol 0 +mini 0 +manag 0 +design 0 +tool 0 +standard 0 +compon 0 +libari 0 +robot 0 +opengl 0 +glut 0 +decod 0 +player 0 +plai 0 +semest 0 +know 0 +troubl 0 +made 0 +music 0 +favorit 0 +mariah 0 +boyz 0 +babyfac 0 +movi 0 +sound 0 +clip 0 +sampl 0 +misc 0 +zodiac 0 +person 0 +differ 0 +kind 0 +find 0 +high 0 +school 0 +attach 0 +normal 0 +haiq 0 +maintain 0 +shenfeng 0 +chen 0 +thank 0 +quit 0 +learn 0 +info 0 +perl 0 +tutori 0 +reach 0 +lake 0 +blvd 0 +twang 0 +utexa 0 +still 0 +underconstruct 0 +check 0 +like 0 +never 0 +leav 0 +eagl 0 +copyright 0 +creat 0 +last 0 +modifi 0 +background 0 +song 0 +deskmat 0 +lang 0 +visitor 0 +accord 0 +counter 0 +sinc 0 +trust 0 +book 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..d6ca5d43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,84 @@ +comput 1 +languag 1 +student 1 +machin 1 +utc 1 +natur 1 +learn 1 +group 1 +interest 1 +associ 1 +hermjakob 0 +home 0 +pageulf 0 +hermjakobhello 0 +welcom 0 +graduat 0 +thedept 0 +scienc 0 +univers 0 +texa 0 +austinand 0 +work 0 +dissert 0 +aboutexampl 0 +base 0 +decis 0 +make 0 +context 0 +orient 0 +pars 0 +translationund 0 +supervis 0 +prof 0 +raymond 0 +moonei 0 +activ 0 +acquisit 0 +groupand 0 +research 0 +place 0 +linguist 0 +signll 0 +special 0 +print 0 +archiv 0 +european 0 +search 0 +engin 0 +altavista 0 +einet 0 +galaxi 0 +infoseek 0 +lyco 0 +yahoo 0 +new 0 +thing 0 +consid 0 +dernir 0 +nouvel 0 +alsac 0 +deutsch 0 +well 0 +focu 0 +york 0 +time 0 +spiegel 0 +svenska 0 +dagbladet 0 +tagesspiegel 0 +vanguardia 0 +welt 0 +zeitplusacm 0 +austin 0 +weather 0 +resourc 0 +perman 0 +address 0 +moltkestr 0 +bnde 0 +germanyphon 0 +voic 0 +last 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..f0ef15f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,38 @@ +texa 1 +austin 1 +sport 1 +lanc 1 +tokuda 1 +univers 1 +softwar 1 +research 1 +intramur 1 +schedul 1 +system 0 +gener 0 +group 0 +depart 0 +comput 0 +scienc 0 +unicron 0 +utexa 0 +twelv 0 +time 0 +champion 0 +directori 0 +entertain 0 +financ 0 +magic 0 +gather 0 +new 0 +organ 0 +peopl 0 +refer 0 +home 0 +offic 0 +taylor 0 +perman 0 +heeia 0 +street 0 +kaneoh 0 +hawaii 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..de530bc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,29 @@ +utexa 1 +hall 1 +austin 1 +home 0 +page 0 +balayoghanv 0 +balayoghancontact 0 +informationemail 0 +eduoffic 0 +painter 0 +telephon 0 +postal 0 +address 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +taylor 0 +click 0 +send 0 +email 0 +finger 0 +account 0 +find 0 +whether 0 +log 0 +ineosdi 0 +bookmarksvbb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..d4f048b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,173 @@ +multimedia 2 +comput 2 +system 2 +award 2 +scienc 1 +univers 1 +distribut 1 +network 1 +research 1 +page 1 +goyal 1 +texa 1 +chair 1 +intern 1 +confer 1 +program 1 +committe 1 +databas 1 +server 1 +proceed 1 +austin 1 +laboratori 1 +california 1 +diego 1 +career 1 +develop 1 +initi 1 +ieee 1 +area 1 +andnetwork 1 +eurograph 1 +workshop 1 +applic 1 +symposium 1 +object 1 +algorithm 1 +effici 1 +audio 1 +video 1 +disk 1 +oper 1 +commun 1 +sponsor 1 +harrick 0 +vinharrick 0 +assist 0 +professor 0 +atth 0 +ataustin 0 +director 0 +multimediacomput 0 +educ 0 +tech 0 +engin 0 +indian 0 +institut 0 +technolog 0 +bombai 0 +colorado 0 +state 0 +honor 0 +profession 0 +servic 0 +faculti 0 +supercomput 0 +center 0 +creativ 0 +innov 0 +editori 0 +board 0 +vice 0 +icdc 0 +member 0 +electronicimag 0 +beij 0 +china 0 +novemb 0 +kaohsiung 0 +taiwan 0 +decemb 0 +rostock 0 +germani 0 +second 0 +internationalconfer 0 +third 0 +multimediasystem 0 +interestmultimedia 0 +high 0 +speed 0 +mobilecomput 0 +summari 0 +main 0 +design 0 +implement 0 +anend 0 +architectur 0 +enabl 0 +wide 0 +rang 0 +ofdistribut 0 +specif 0 +integr 0 +file 0 +protocolsfor 0 +transmiss 0 +digit 0 +larg 0 +scale 0 +select 0 +recent 0 +public 0 +shenoi 0 +failur 0 +recoveri 0 +inmulti 0 +annualintern 0 +fault 0 +toler 0 +ftc 0 +pasadena 0 +june 0 +gemmel 0 +kandlur 0 +venkat 0 +rangan 0 +row 0 +storag 0 +tutori 0 +optim 0 +placement 0 +ofmultimedia 0 +arrai 0 +ieeeintern 0 +icmc 0 +washington 0 +determin 0 +delaybound 0 +heterogen 0 +thintern 0 +support 0 +fordigit 0 +nossdav 0 +durham 0 +hampshir 0 +april 0 +designingmultimedia 0 +march 0 +work 0 +variou 0 +industri 0 +federalinstitut 0 +includ 0 +intel 0 +nation 0 +foundationresearch 0 +nasa 0 +mitsubishi 0 +electricresearch 0 +merl 0 +microsystem 0 +electrospacesystem 0 +cours 0 +advanc 0 +contact 0 +inform 0 +email 0 +utexa 0 +phone 0 +mail 0 +address 0 +depart 0 +taylor 0 +hall 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..e12e1689 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,67 @@ +vipin 1 +home 1 +haven 1 +guestbook 1 +pleas 1 +austin 1 +number 1 +page 0 +found 0 +time 0 +thing 0 +shall 0 +updat 0 +soon 0 +yeah 0 +know 0 +color 0 +match 0 +pictur 0 +cours 0 +interest 0 +report 0 +activ 0 +resum 0 +give 0 +graduat 0 +student 0 +univers 0 +texa 0 +depart 0 +comput 0 +scienc 0 +undergraduatefrom 0 +delhi 0 +india 0 +academ 0 +interestscours 0 +work 0 +list 0 +less 0 +incomplet 0 +pass 0 +semest 0 +take 0 +risk 0 +put 0 +interestsreportsy 0 +visitor 0 +go 0 +reset 0 +increas 0 +never 0 +decreas 0 +contact 0 +medic 0 +art 0 +street 0 +log 0 +sure 0 +sign 0 +though 0 +mani 0 +guest 0 +comment 0 +suggest 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..f53e6347 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,155 @@ +mathemat 1 +paper 1 +vladimir 1 +feel 1 +human 1 +univers 1 +petersburg 1 +russia 1 +note 1 +research 1 +new 1 +germani 1 +race 1 +black 1 +offic 1 +lifschitzwhen 0 +burden 0 +downcast 0 +mind 0 +gladli 0 +turn 0 +therealm 0 +lucid 0 +precis 0 +grasp 0 +object 0 +isobtain 0 +insight 0 +gain 0 +pleasantli 0 +appropri 0 +conceptform 0 +spirit 0 +home 0 +paul 0 +bernai 0 +lifschitzgottesman 0 +famili 0 +centenni 0 +professor 0 +incomput 0 +sciencesat 0 +texasat 0 +austin 0 +fellow 0 +theamerican 0 +associ 0 +forartifici 0 +intelligenceb 0 +branchof 0 +steklov 0 +institut 0 +area 0 +interesttempor 0 +reasoningand 0 +reason 0 +aboutactionslog 0 +programmingand 0 +nonmonoton 0 +reasoningteachingoth 0 +profession 0 +activitiespap 0 +line 0 +lectur 0 +survei 0 +lifschitz 0 +dissert 0 +bylifschitz 0 +studentsrecommend 0 +read 0 +edsger 0 +dijkstra 0 +convoc 0 +speechgood 0 +madelein 0 +albright 0 +nomin 0 +secretari 0 +state 0 +becam 0 +better 0 +place 0 +live 0 +regain 0 +book 0 +taken 0 +soviet 0 +armi 0 +daniel 0 +ortega 0 +lost 0 +need 0 +recycl 0 +helm 0 +burton 0 +actbad 0 +wang 0 +sentenc 0 +year 0 +prison 0 +tortur 0 +us 0 +polic 0 +democrat 0 +countri 0 +sequest 0 +import 0 +archeolog 0 +evid 0 +world 0 +close 0 +societynot 0 +problem 0 +america 0 +elect 0 +recent 0 +redrawn 0 +district 0 +california 0 +civil 0 +right 0 +initi 0 +ratio 0 +white 0 +finish 0 +high 0 +school 0 +admit 0 +student 0 +neutral 0 +basisoth 0 +amnesti 0 +intern 0 +scientist 0 +scienc 0 +favorit 0 +stori 0 +three 0 +silli 0 +joke 0 +quot 0 +monthcontact 0 +inform 0 +taylor 0 +hall 0 +phone 0 +number 0 +postal 0 +address 0 +depart 0 +comput 0 +sciencesunivers 0 +texa 0 +austinaustin 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..77d7afc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,56 @@ +comput 1 +algorithm 1 +univers 1 +texa 1 +austin 1 +parallel 1 +design 1 +scienc 1 +research 1 +evalu 1 +address 1 +vijaya 0 +ramachandranvijaya 0 +ramachandranblakemor 0 +regent 0 +professor 0 +princeton 0 +interestsmi 0 +interest 0 +theori 0 +primarilyin 0 +area 0 +includ 0 +analysi 0 +effici 0 +sequenti 0 +model 0 +machin 0 +experiment 0 +access 0 +copi 0 +recent 0 +paper 0 +mine 0 +complet 0 +list 0 +public 0 +avail 0 +vita 0 +offici 0 +faculti 0 +profil 0 +contact 0 +inform 0 +offic 0 +taylor 0 +hall 0 +email 0 +utexa 0 +postal 0 +depart 0 +number 0 +visit 0 +page 0 +sinc 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..d98ce89c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,203 @@ +comput 2 +algorithm 2 +theori 2 +utexa 2 +mail 2 +list 2 +warm 2 +scienc 1 +univers 1 +group 1 +research 1 +symposium 1 +design 1 +complex 1 +parallel 1 +graph 1 +random 1 +sigact 1 +theoret 1 +foundat 1 +interest 1 +analysi 1 +plaxton 1 +vijaya 1 +ramachandran 1 +announc 1 +messag 1 +texa 1 +faculti 1 +includ 1 +effici 1 +differ 1 +time 1 +number 1 +greg 1 +sanjoi 1 +kelsen 1 +madhukar 1 +ramgop 1 +suel 1 +yuke 1 +electron 1 +activ 1 +send 1 +remov 1 +stand 1 +state 1 +meet 1 +locat 1 +sever 1 +distinguish 1 +austin 1 +dalla 1 +louisiana 1 +sent 1 +also 1 +ieee 1 +bibliographi 1 +grouput 0 +groupth 0 +focus 0 +current 0 +probabilist 0 +method 0 +major 0 +focu 0 +provabl 0 +solv 0 +fundament 0 +problem 0 +measur 0 +term 0 +resourc 0 +space 0 +processor 0 +bit 0 +combinator 0 +lower 0 +bound 0 +machin 0 +model 0 +david 0 +zuckerman 0 +walk 0 +cryptographi 0 +affili 0 +folk 0 +postdoc 0 +student 0 +alumni 0 +baruah 0 +emba 0 +tsan 0 +sheng 0 +tshsu 0 +sinica 0 +pierr 0 +korupolu 0 +phil 0 +mackenzi 0 +philmac 0 +idbsu 0 +mettu 0 +poon 0 +ckpoon 0 +rajmohan 0 +rajaraman 0 +rraj 0 +santanu 0 +sinha 0 +ssinha 0 +torsten 0 +berkelei 0 +zhou 0 +relat 0 +seminar 0 +post 0 +lowvolum 0 +typic 0 +dozen 0 +semest 0 +express 0 +ad 0 +name 0 +request 0 +gripe 0 +workshop 0 +themidsouth 0 +midsouthwest 0 +forum 0 +surround 0 +twice 0 +year 0 +consist 0 +talk 0 +region 0 +recent 0 +result 0 +often 0 +keynot 0 +speaker 0 +first 0 +organ 0 +atut 0 +spring 0 +organizedanoth 0 +fall 0 +held 0 +southern 0 +methodist 0 +north 0 +southwestern 0 +oklahoma 0 +next 0 +schedul 0 +beheld 0 +novemb 0 +program 0 +algorithmsmail 0 +becom 0 +avail 0 +usuallytri 0 +pool 0 +attend 0 +take 0 +place 0 +outsid 0 +ofaustin 0 +regard 0 +arrang 0 +special 0 +algorithmsand 0 +thatinclud 0 +mani 0 +scientist 0 +sponsorsth 0 +stoc 0 +sponsor 0 +siam 0 +discret 0 +soda 0 +andarchitectur 0 +spaa 0 +import 0 +confer 0 +interestar 0 +foc 0 +serv 0 +elect 0 +member 0 +thesigact 0 +execut 0 +committe 0 +us 0 +pointer 0 +calendar 0 +eccc 0 +colloquium 0 +virtual 0 +rolodex 0 +hypertext 0 +project 0 +dept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..7c06a037 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,7 @@ +srinivasan 1 +vaidyaraman 1 +offic 1 +phone 1 +email 0 +utexa 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..1c5a8901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,45 @@ +vurgun 1 +comput 1 +scienc 1 +interest 1 +austin 1 +sengul 0 +sengulvurgun 0 +background 0 +interestsi 0 +student 0 +depart 0 +ammainli 0 +artifici 0 +intellig 0 +neural 0 +network 0 +evolutionaryalgorithm 0 +term 0 +paper 0 +topic 0 +order 0 +ofprefer 0 +memori 0 +represent 0 +knowledg 0 +howto 0 +retriev 0 +learn 0 +theori 0 +problem 0 +solv 0 +cognit 0 +skillacquisit 0 +search 0 +understand 0 +visual 0 +attent 0 +connectionist 0 +approach 0 +architectur 0 +mindto 0 +contact 0 +mepost 0 +usavoic 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..f9715528 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,10 @@ +page 1 +walbourn 0 +home 0 +chuck 0 +walbournmi 0 +person 0 +locat 0 +charybdi 0 +enterpris 0 +server 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..67379762 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,50 @@ +walker 1 +comput 1 +scienc 1 +depart 1 +colleg 1 +henri 1 +mackai 1 +visit 1 +univers 1 +texa 1 +austin 1 +professor 1 +mathemat 1 +grinnel 1 +math 1 +grin 1 +septemb 1 +senior 0 +lectur 0 +edua 0 +regular 0 +tenur 0 +member 0 +faculti 0 +professorwalk 0 +period 0 +teachand 0 +variou 0 +profession 0 +activ 0 +formal 0 +appoint 0 +follow 0 +academ 0 +year 0 +summer 0 +fall 0 +complet 0 +inform 0 +avail 0 +home 0 +page 0 +atgrinnel 0 +http 0 +creat 0 +last 0 +revis 0 +photograph 0 +jack 0 +robertson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..444e5c47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,61 @@ +warshaw 1 +austin 1 +rule 1 +research 1 +venu 1 +mirank 1 +home 1 +comput 1 +scienc 1 +graduat 1 +univers 1 +texa 1 +base 1 +appli 1 +case 1 +studi 1 +declar 1 +basi 1 +modul 1 +page 0 +lane 0 +warshawlan 0 +mike 0 +cool 0 +senior 0 +student 0 +recent 0 +accept 0 +school 0 +work 0 +area 0 +activ 0 +databas 0 +system 0 +current 0 +posit 0 +laboratoryinvolv 0 +maintain 0 +languag 0 +developedat 0 +andat 0 +laboratori 0 +lanc 0 +obermey 0 +first 0 +item 0 +anoth 0 +third 0 +follow 0 +list 0 +paper 0 +unpublish 0 +confer 0 +inform 0 +knowledg 0 +manag 0 +contact 0 +mepost 0 +usavoic 0 +arlut 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..ca839c14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,49 @@ +austin 1 +utexa 1 +chen 1 +comput 1 +scienc 1 +texa 1 +mathemat 1 +univers 1 +wchen 1 +java 1 +rosett 1 +program 1 +master 0 +student 0 +dept 0 +decemb 0 +august 0 +fudan 0 +china 0 +juli 0 +offic 0 +phone 0 +email 0 +math 0 +mail 0 +address 0 +center 0 +numer 0 +analysi 0 +us 0 +link 0 +unix 0 +book 0 +expect 0 +perl 0 +site 0 +demo 0 +refer 0 +manual 0 +exampl 0 +common 0 +gatewai 0 +interfac 0 +sampl 0 +pleas 0 +click 0 +load 0 +file 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..171ea4f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,48 @@ +austin 1 +depart 1 +comput 1 +research 1 +program 1 +page 1 +paul 1 +wilson 1 +taylor 1 +hall 1 +univers 1 +texa 1 +scienc 1 +oop 1 +group 1 +languag 1 +home 0 +contact 0 +inform 0 +offic 0 +postal 0 +address 0 +best 0 +reach 0 +email 0 +ltwilson 0 +utexa 0 +usual 0 +headshot 0 +novelti 0 +thought 0 +cross 0 +section 0 +informationi 0 +lead 0 +object 0 +orient 0 +system 0 +workson 0 +memori 0 +manag 0 +design 0 +implement 0 +teachingin 0 +fall 0 +teach 0 +sciencesnot 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..bc45be79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,137 @@ +christian 2 +holi 1 +jesu 1 +also 1 +becom 1 +sin 1 +christ 1 +thought 1 +becam 1 +life 1 +though 1 +time 1 +good 1 +think 1 +wrong 1 +true 1 +howev 1 +abl 1 +faith 1 +strength 1 +know 1 +seek 1 +arthur 0 +homepag 0 +cent 0 +christiani 0 +made 0 +major 0 +decis 0 +believ 0 +alittl 0 +month 0 +come 0 +totallyunexpect 0 +religi 0 +studi 0 +compulsori 0 +class 0 +thechristian 0 +high 0 +school 0 +attend 0 +hong 0 +kong 0 +intent 0 +tobecom 0 +record 0 +slife 0 +bibl 0 +quit 0 +credibl 0 +teach 0 +take 0 +deepli 0 +relat 0 +mylif 0 +misconcept 0 +mean 0 +christianwa 0 +clear 0 +came 0 +past 0 +differ 0 +christianand 0 +lovedeveri 0 +matter 0 +whether 0 +decid 0 +achristian 0 +us 0 +shouldb 0 +virtuou 0 +enough 0 +listen 0 +friend 0 +church 0 +thefellowship 0 +realiz 0 +thing 0 +born 0 +theywil 0 +stumbl 0 +flesh 0 +hei 0 +sinless 0 +sympath 0 +weak 0 +weconfess 0 +forgiv 0 +andto 0 +cleans 0 +unright 0 +john 0 +differencebetween 0 +peopl 0 +trust 0 +astheir 0 +saviour 0 +gratefulli 0 +accept 0 +redempt 0 +fortheir 0 +therefor 0 +justifi 0 +without 0 +deed 0 +ofth 0 +roman 0 +doubt 0 +live 0 +wedo 0 +thecontrari 0 +reli 0 +givesu 0 +said 0 +whole 0 +need 0 +nota 0 +physician 0 +sick 0 +matthew 0 +count 0 +onour 0 +number 0 +dai 0 +ought 0 +thetruth 0 +earli 0 +hesit 0 +start 0 +thankgod 0 +lead 0 +give 0 +opportun 0 +realli 0 +wkmak 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..968f0ef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,148 @@ +robot 3 +research 2 +interest 2 +page 2 +work 1 +group 1 +austin 1 +badminton 1 +utexa 1 +topic 1 +comment 1 +explor 1 +learn 1 +intellig 1 +rang 1 +reason 1 +system 1 +home 1 +built 1 +onlin 1 +internet 1 +resourc 1 +pictur 1 +educ 1 +public 1 +hotlist 1 +send 1 +contact 1 +inform 1 +mobil 1 +navig 1 +control 1 +spatial 1 +semant 1 +hierarchi 1 +engin 1 +machin 1 +network 1 +qualit 1 +interfac 1 +world 1 +ring 1 +ultrason 1 +sensor 1 +rhino 1 +manipul 1 +robokreta 1 +car 1 +email 1 +wyle 1 +member 1 +guitar 1 +clarinet 1 +martial 1 +art 1 +phone 1 +texa 1 +hello 0 +visitor 0 +number 0 +person 0 +miscellan 0 +worki 0 +primarili 0 +approach 0 +mobilerobot 0 +softwar 0 +develop 0 +area 0 +artifici 0 +includ 0 +neural 0 +vision 0 +oper 0 +embed 0 +graphic 0 +user 0 +multimedia 0 +spot 0 +real 0 +rover 0 +tall 0 +commerci 0 +robocac 0 +worm 0 +specif 0 +robofest 0 +organ 0 +besar 0 +kicik 0 +autonom 0 +us 0 +chassi 0 +motor 0 +fast 0 +remot 0 +race 0 +paper 0 +andqualit 0 +avail 0 +doctor 0 +dissert 0 +titl 0 +fora 0 +physic 0 +also 0 +offici 0 +start 0 +point 0 +technolog 0 +wide 0 +catalog 0 +usenet 0 +frequent 0 +ask 0 +question 0 +meta 0 +index 0 +ncsa 0 +malaysia 0 +homepag 0 +yahoo 0 +note 0 +common 0 +pleas 0 +eduperson 0 +interestsavid 0 +player 0 +unit 0 +state 0 +associ 0 +usba 0 +love 0 +plai 0 +miscellaneousinterest 0 +well 0 +movi 0 +offic 0 +taylor 0 +hall 0 +mail 0 +comput 0 +scienc 0 +depart 0 +univers 0 +finger 0 +back 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..c9fb9427 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,8 @@ +address 1 +phone 1 +contact 0 +xfeng 0 +utexa 0 +qaustin 0 +west 0 +austin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..c3d309aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,92 @@ +page 1 +pictur 1 +search 1 +xingang 1 +home 1 +travel 1 +austin 1 +student 1 +comput 1 +work 1 +surpris 1 +well 1 +interest 1 +time 1 +creat 1 +photographi 0 +classic 0 +music 0 +audio 0 +sport 0 +visual 0 +welcom 0 +hang 0 +around 0 +univers 0 +texa 0 +depart 0 +scienc 0 +distribut 0 +multimedia 0 +head 0 +harrick 0 +year 0 +graduat 0 +program 0 +rank 0 +nation 0 +delight 0 +realli 0 +pretti 0 +hard 0 +take 0 +aswel 0 +stuff 0 +line 0 +soon 0 +right 0 +temporaryresort 0 +imagin 0 +hopefulli 0 +goe 0 +llgradual 0 +walk 0 +paper 0 +present 0 +clearer 0 +imag 0 +link 0 +find 0 +feel 0 +havesometh 0 +watch 0 +frequent 0 +access 0 +pointer 0 +serious 0 +foliag 0 +marvel 0 +engin 0 +alta 0 +vista 0 +string 0 +infoseek 0 +keyword 0 +miata 0 +club 0 +unit 0 +morn 0 +newspap 0 +american 0 +express 0 +financi 0 +card 0 +york 0 +atlant 0 +monthli 0 +china 0 +soccer 0 +major 0 +leagu 0 +group 0 +xguo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..d4f2ab2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,4 @@ +geoffrei 0 +home 0 +pagemov 0 +address 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..46e5c258 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,15 @@ +frame 1 +yang 1 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..2fd81339 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,87 @@ +austin 1 +utc 1 +guitar 1 +corner 1 +home 1 +classic 1 +david 0 +wwwdavid 0 +hello 0 +whatev 0 +took 0 +welcom 0 +make 0 +establish 0 +contact 0 +street 0 +depart 0 +comput 0 +scienc 0 +univers 0 +texa 0 +internet 0 +utexa 0 +get 0 +busi 0 +research 0 +outsid 0 +work 0 +still 0 +seriou 0 +favorit 0 +quot 0 +word 0 +hope 0 +daili 0 +medit 0 +stuff 0 +concert 0 +byth 0 +assad 0 +brothersin 0 +hong 0 +kong 0 +art 0 +festiv 0 +must 0 +first 0 +introduc 0 +beauti 0 +christoph 0 +parkeningi 0 +guitarist 0 +interest 0 +life 0 +stori 0 +tell 0 +grew 0 +tire 0 +ofconcert 0 +retir 0 +reconcili 0 +jesu 0 +christ 0 +rekindl 0 +passion 0 +also 0 +theamsterdam 0 +trio 0 +french 0 +rich 0 +artist 0 +flair 0 +like 0 +nation 0 +footbal 0 +team 0 +michel 0 +platini 0 +label 0 +franc 0 +magazinepublish 0 +minist 0 +align 0 +absmiddl 0 +sinc 0 +sept 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..b9247d7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,15 @@ +frame 1 +yanbin 0 +alert 0 +see 0 +messag 0 +us 0 +challeng 0 +browser 0 +click 0 +version 0 +document 0 +could 0 +download 0 +netscap 0 +navig 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..6b54dc68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,9 @@ +yuan 0 +home 0 +page 0 +oop 0 +us 0 +browser 0 +pleas 0 +click 0 +continu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..4cdfd060 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,64 @@ +yong 1 +univers 1 +beij 1 +china 1 +program 1 +austin 1 +unit 1 +servic 1 +homepageto 0 +homepagey 0 +number 0 +visitor 0 +sinc 0 +current 0 +comput 0 +scienc 0 +texa 0 +settl 0 +stai 0 +mathemat 0 +graduat 0 +rutger 0 +brunswick 0 +year 0 +jersei 0 +beauti 0 +place 0 +wife 0 +tsinghua 0 +milanitalian 0 +soccerk 0 +soccernba 0 +sitefox 0 +sportschicago 0 +bullsmichael 0 +jordannflnhlc 0 +rankingmarri 0 +childrenseinfeldcomput 0 +sciencesutilitieshtml 0 +convertersimag 0 +collectionssystemshtmllatexcgitcl 0 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 0 +tmiscinternet 0 +travel 0 +network 0 +parcel 0 +state 0 +postal 0 +usp 0 +fedexus 0 +guidefun 0 +todayu 0 +newsstarwavesupermodel 0 +contact 0 +river 0 +street 0 +finger 0 +yonglu 0 +utexa 0 +page 0 +heavi 0 +construct 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..8fa51e94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,138 @@ +young 1 +linear 1 +iter 1 +method 1 +system 1 +comput 1 +larg 1 +parallel 1 +scienc 1 +mathemat 1 +associ 1 +research 1 +professor 1 +numer 1 +algebra 1 +applic 1 +equat 1 +solut 1 +kincaid 1 +spars 1 +david 1 +analysi 1 +univers 1 +award 1 +american 1 +journal 1 +appli 1 +matric 1 +solv 1 +sever 1 +search 1 +carei 1 +crai 1 +profil 1 +youngashbel 0 +smith 0 +ofmathemat 0 +director 0 +center 0 +webb 0 +institut 0 +naval 0 +architectur 0 +harvard 0 +honor 0 +profession 0 +servic 0 +fellow 0 +advanc 0 +outstand 0 +contribut 0 +special 0 +issueded 0 +chair 0 +committe 0 +mathematicalsocieti 0 +board 0 +truste 0 +argonn 0 +editor 0 +siam 0 +matrixappl 0 +area 0 +interestnumer 0 +partial 0 +differenti 0 +numericallinear 0 +summari 0 +researchmi 0 +activ 0 +focus 0 +partialdifferenti 0 +base 0 +finit 0 +differ 0 +methodsand 0 +oflinear 0 +involv 0 +andspars 0 +softwar 0 +packag 0 +develop 0 +basedon 0 +part 0 +itpack 0 +project 0 +beingextend 0 +includ 0 +suitabl 0 +share 0 +memori 0 +distributedmemori 0 +rapidli 0 +converg 0 +methodsbas 0 +multilevel 0 +procedur 0 +also 0 +beingdevelop 0 +select 0 +recent 0 +publicationsd 0 +stationari 0 +second 0 +degre 0 +topic 0 +polynomi 0 +variabl 0 +rassia 0 +srivasiava 0 +yanushauska 0 +world 0 +scientif 0 +publ 0 +compani 0 +singapor 0 +vona 0 +ration 0 +omega 0 +academ 0 +press 0 +sepehrnoori 0 +vector 0 +pde 0 +engin 0 +minneapoli 0 +high 0 +level 0 +solver 0 +supercomput 0 +algorithm 0 +graham 0 +john 0 +wilei 0 +son 0 +previou 0 +index 0 +next 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..93068e3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,125 @@ +yoonsuck 2 +choe 2 +austin 2 +comput 1 +scienc 1 +univers 1 +texa 1 +research 1 +organ 1 +miikkulainen 1 +group 1 +later 1 +self 1 +lissom 1 +joseph 1 +sirosh 1 +risto 1 +spike 1 +model 1 +structur 1 +neural 1 +digit 1 +recognit 1 +interconnect 1 +utc 1 +object 1 +public 1 +depart 1 +august 1 +network 1 +work 1 +hand 1 +written 1 +featur 1 +prof 1 +segment 1 +repres 1 +book 1 +relat 1 +interact 1 +cortex 1 +function 1 +page 1 +report 1 +inform 1 +yschoe 1 +utexa 1 +home 1 +photo 0 +ad 0 +student 0 +dept 0 +decemb 0 +yonsei 0 +seoul 0 +korea 0 +welcom 0 +homepag 0 +visitor 0 +sinc 0 +interest 0 +cortic 0 +current 0 +systembas 0 +laterali 0 +synerget 0 +develop 0 +recent 0 +includ 0 +extend 0 +actualspik 0 +event 0 +call 0 +slissom 0 +beself 0 +multipl 0 +retinabi 0 +synchron 0 +within 0 +desynchron 0 +differ 0 +outlin 0 +also 0 +check 0 +html 0 +edit 0 +comment 0 +connect 0 +neuron 0 +technic 0 +septemb 0 +electron 0 +isbn 0 +map 0 +appear 0 +touretzki 0 +mozer 0 +hasselmo 0 +editor 0 +advanc 0 +process 0 +system 0 +cambridg 0 +press 0 +handwritten 0 +techic 0 +master 0 +thesi 0 +bunch 0 +link 0 +total 0 +unord 0 +click 0 +find 0 +interestingcontact 0 +offic 0 +phone 0 +email 0 +mail 0 +address 0 +maintain 0 +last 0 +updat 0 +newsgroup 0 +summari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..4b03a96e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,76 @@ +china 1 +scienc 1 +austin 1 +famili 1 +texa 1 +manchest 1 +java 1 +yuanj 0 +xuwint 0 +break 0 +updat 0 +addresspictur 0 +depart 0 +comput 0 +univers 0 +spring 0 +time 0 +tabl 0 +select 0 +cours 0 +schedulec 0 +object 0 +orient 0 +softwar 0 +engr 0 +brown 0 +logic 0 +synthesi 0 +aziz 0 +client 0 +server 0 +system 0 +develop 0 +gang 0 +previou 0 +semest 0 +pagechines 0 +student 0 +associ 0 +alumni 0 +page 0 +work 0 +studi 0 +universityof 0 +technolog 0 +hefei 0 +institut 0 +mathemat 0 +chines 0 +academi 0 +beij 0 +chinaunivers 0 +munich 0 +atmunich 0 +germanyunivers 0 +prof 0 +nick 0 +higham 0 +wang 0 +lifan 0 +hong 0 +chen 0 +guizhongustc 0 +yuan 0 +hailiang 0 +yang 0 +yuhongfriend 0 +linsoftwar 0 +program 0 +perl 0 +common 0 +gatewai 0 +interfac 0 +link 0 +yahoo 0 +publish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..1337b254 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,11 @@ +feng 1 +visitor 0 +number 0 +sinc 0 +yufeng 0 +utexa 0 +edufing 0 +public 0 +ring 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..da444a1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,68 @@ +china 1 +univers 1 +austin 1 +zhii 1 +chen 1 +guangzhou 1 +comput 1 +resum 1 +zhongshan 1 +chines 1 +fall 1 +spring 1 +associ 1 +anim 1 +page 1 +home 0 +pagezhii 0 +chenabout 0 +mefrom 0 +canton 0 +peopl 0 +republ 0 +current 0 +master 0 +program 0 +scienc 0 +depart 0 +texa 0 +seek 0 +full 0 +time 0 +click 0 +postcript 0 +format 0 +pleas 0 +view 0 +life 0 +calculu 0 +architectur 0 +misc 0 +zodiac 0 +person 0 +differ 0 +kind 0 +find 0 +friend 0 +maintain 0 +john 0 +dong 0 +thank 0 +els 0 +world 0 +wide 0 +info 0 +contact 0 +burton 0 +zchen 0 +utexa 0 +still 0 +construct 0 +copyright 0 +creat 0 +last 0 +modifi 0 +visitor 0 +accord 0 +counter 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..01c15f8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,94 @@ +comput 1 +austin 1 +home 1 +page 1 +univers 1 +china 1 +campu 1 +world 1 +magazin 1 +zhouxiao 1 +utexa 1 +maggi 1 +xiao 1 +zhou 1 +depart 1 +texa 1 +work 1 +fall 1 +onlin 1 +internet 1 +offic 1 +educ 0 +assistantship 0 +graduat 0 +student 0 +teach 0 +assist 0 +databas 0 +manag 0 +sciencesat 0 +buaa 0 +beij 0 +life 0 +current 0 +multimedia 0 +system 0 +algorithm 0 +data 0 +commun 0 +network 0 +distribut 0 +process 0 +spring 0 +look 0 +around 0 +kaleidoscop 0 +land 0 +beauti 0 +visit 0 +peopl 0 +daili 0 +new 0 +digest 0 +zhai 0 +chines 0 +newspag 0 +time 0 +entertain 0 +movi 0 +stamp 0 +societi 0 +ieee 0 +giant 0 +career 0 +center 0 +compani 0 +search 0 +yahoo 0 +galaxi 0 +lyco 0 +directori 0 +guid 0 +html 0 +script 0 +librari 0 +contact 0 +inform 0 +mail 0 +http 0 +user 0 +main 0 +build 0 +room 0 +phone 0 +address 0 +scienc 0 +taylor 0 +last 0 +modifi 0 +sept 0 +comment 0 +welcom 0 +send 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..b632576a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,82 @@ +dynam 1 +document 1 +friend 1 +univers 1 +texa 1 +austin 1 +china 1 +site 1 +homepag 1 +know 1 +student 1 +depart 1 +comput 1 +peke 1 +former 1 +classmat 1 +netscap 1 +clike 1 +qing 0 +qinghi 0 +welcom 0 +graduat 0 +inth 0 +scinc 0 +born 0 +beij 0 +capit 0 +citi 0 +bachelor 0 +degre 0 +meet 0 +ofmi 0 +findmor 0 +peopl 0 +pekingunivers 0 +alumni 0 +home 0 +page 0 +oversea 0 +html 0 +enjoi 0 +live 0 +peek 0 +follow 0 +like 0 +well 0 +find 0 +lot 0 +valuabl 0 +informationand 0 +professionalinternetpc 0 +relatedmac 0 +relatedaft 0 +worknetscap 0 +testtwin 0 +eldertwin 0 +youngernetscap 0 +testanim 0 +danc 0 +titledanc 0 +titl 0 +testanoth 0 +testyet 0 +anoth 0 +testfriendsthi 0 +travel 0 +maintain 0 +xiaohai 0 +best 0 +shan 0 +shinan 0 +visitor 0 +number 0 +sinc 0 +octob 0 +construct 0 +last 0 +modifi 0 +qingunivers 0 +sciencesaustin 0 +zhuqe 0 +utexa 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..0adbd81d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,63 @@ +washington 1 +student 1 +faculti 1 +program 1 +inform 1 +region 1 +research 1 +univers 1 +univ 0 +comput 0 +scienc 0 +organizationsinclud 0 +staff 0 +visitor 0 +organ 0 +ouraffili 0 +graduat 0 +regioninclud 0 +local 0 +desktop 0 +refer 0 +link 0 +elsewher 0 +spotlightuwwin 0 +pacif 0 +intern 0 +programmingcontesttwovideo 0 +highlight 0 +educ 0 +initiativesourcolloquia 0 +live 0 +mbonemajordon 0 +intel 0 +corporationdickkarp 0 +receiv 0 +nation 0 +medal 0 +scienceprofessionalmast 0 +applic 0 +deadlin 0 +autumn 0 +departmentoverview 0 +theimpact 0 +perspect 0 +staffposit 0 +avail 0 +half 0 +centuri 0 +exponenti 0 +progress 0 +technolog 0 +page 0 +peopl 0 +cours 0 +laboratori 0 +newscan 0 +handl 0 +tabl 0 +click 0 +seattl 0 +voic 0 +comment 0 +webmast 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..dd164cee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,57 @@ +berman 1 +andrew 1 +postscript 1 +format 1 +debbi 1 +home 0 +pageandrew 0 +aberman 0 +washington 0 +educomput 0 +scienc 0 +bourassa 0 +virgil 0 +selberg 0 +erik 0 +tron 0 +process 0 +specif 0 +file 0 +protect 0 +unix 0 +oper 0 +system 0 +bothpostscript 0 +andhtml 0 +proceed 0 +winter 0 +usenix 0 +confer 0 +data 0 +structur 0 +fast 0 +approxim 0 +match 0 +shapiro 0 +linda 0 +effici 0 +imag 0 +retriev 0 +multipl 0 +distanc 0 +measur 0 +avail 0 +appear 0 +spie 0 +special 0 +link 0 +wife 0 +beauti 0 +daughter 0 +melani 0 +miscellan 0 +poison 0 +donut 0 +stupid 0 +stupidmi 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..41c44953 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,107 @@ +adam 1 +washington 1 +softwar 1 +imag 1 +finkelstein 1 +depart 1 +comput 1 +scienc 1 +univers 1 +seattl 1 +coffe 1 +student 1 +recent 1 +photocopi 1 +visit 1 +made 1 +start 0 +drink 0 +cup 0 +hair 0 +limp 0 +andlack 0 +bodi 0 +year 0 +live 0 +good 0 +life 0 +graduat 0 +final 0 +finish 0 +doctor 0 +graphic 0 +fall 0 +quarter 0 +post 0 +earli 0 +join 0 +thecomput 0 +atprinceton 0 +engin 0 +tibco 0 +formerli 0 +teknekron 0 +system 0 +palo 0 +alto 0 +wrote 0 +peopl 0 +trade 0 +stock 0 +undergradu 0 +swarthmor 0 +colleg 0 +class 0 +studi 0 +physic 0 +occasion 0 +research 0 +project 0 +find 0 +specif 0 +alarg 0 +databas 0 +sinc 0 +work 0 +someth 0 +call 0 +multiresolut 0 +video 0 +photo 0 +plai 0 +ultim 0 +frisbe 0 +team 0 +calledumatata 0 +address 0 +phone 0 +number 0 +look 0 +plan 0 +file 0 +across 0 +thehilari 0 +menu 0 +least 0 +hous 0 +caff 0 +lardo 0 +chilli 0 +night 0 +snoqualmi 0 +pass 0 +excel 0 +view 0 +comet 0 +hyakutak 0 +great 0 +pictur 0 +taken 0 +friend 0 +marcu 0 +cool 0 +glass 0 +sculptur 0 +dither 0 +mona 0 +gothic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..ead3741a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,146 @@ +data 2 +parallel 2 +load 2 +jame 2 +ahren 2 +balanc 2 +comput 1 +system 1 +render 1 +visual 1 +databas 1 +vision 1 +algorithm 1 +polygon 1 +scientif 1 +cost 1 +perform 1 +dataset 1 +redistribut 1 +paper 1 +model 1 +base 1 +univers 1 +washington 1 +manag 1 +charl 1 +hansen 1 +effect 1 +present 1 +experi 1 +fast 1 +home 1 +page 1 +address 1 +scienc 1 +engin 1 +depart 1 +research 1 +project 1 +laboratori 1 +process 1 +improv 1 +onunbalanc 1 +design 1 +describ 1 +method 1 +machin 1 +shade 1 +applic 1 +set 1 +seattl 0 +email 0 +phone 0 +interest 0 +distribut 0 +environ 0 +alamo 0 +nation 0 +public 0 +intern 0 +confer 0 +august 0 +typic 0 +program 0 +degrad 0 +unnecessari 0 +occur 0 +whichperform 0 +possibl 0 +save 0 +outweighth 0 +polygonrender 0 +show 0 +factor 0 +loss 0 +percent 0 +onbalanc 0 +us 0 +linda 0 +shapiro 0 +steven 0 +tanimoto 0 +brinklei 0 +jakobovit 0 +lara 0 +lewi 0 +proceed 0 +second 0 +workshop 0 +februari 0 +gener 0 +motiv 0 +intend 0 +provid 0 +unifi 0 +highli 0 +graphic 0 +user 0 +interfac 0 +advanc 0 +queri 0 +facil 0 +interact 0 +notebook 0 +aid 0 +experiment 0 +promot 0 +share 0 +commun 0 +frank 0 +ortega 0 +supercomput 0 +novemb 0 +massiv 0 +simpl 0 +target 0 +requir 0 +extrem 0 +larg 0 +found 0 +mani 0 +handl 0 +arbitrarili 0 +complex 0 +need 0 +mesh 0 +issu 0 +involv 0 +toolkit 0 +enabl 0 +scientist 0 +displai 0 +directli 0 +avoid 0 +transmiss 0 +huge 0 +amount 0 +post 0 +ofwashington 0 +april 0 +longer 0 +version 0 +icpp 0 +also 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..09d8faf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,100 @@ +vision 2 +databas 2 +research 1 +visual 1 +environ 1 +linda 1 +shapiro 1 +steven 1 +tanimoto 1 +base 1 +comput 1 +imag 1 +jame 1 +ahren 1 +jakobovit 1 +lara 1 +lewi 1 +februari 1 +overview 1 +scientif 1 +system 1 +model 1 +graphic 1 +interfac 1 +data 1 +devr 1 +entiti 1 +relat 1 +multi 1 +level 1 +queri 1 +experi 1 +manag 1 +brinklei 1 +spie 1 +symposium 1 +electron 1 +technolog 1 +wasdesign 0 +gener 0 +motiv 0 +andintend 0 +provid 0 +unifieddata 0 +highli 0 +user 0 +advanc 0 +queryfacil 0 +interact 0 +laboratori 0 +notebook 0 +databaseenviron 0 +aid 0 +experiment 0 +andpromot 0 +share 0 +commun 0 +store 0 +hierarch 0 +datastructur 0 +schema 0 +contain 0 +name 0 +ofproperti 0 +part 0 +attribut 0 +among 0 +thepart 0 +definit 0 +describ 0 +buildinst 0 +specif 0 +studi 0 +mani 0 +differ 0 +topic 0 +includ 0 +peopl 0 +princip 0 +investig 0 +graduat 0 +student 0 +public 0 +inmodel 0 +proceed 0 +secondcad 0 +workshop 0 +present 0 +project 0 +flexibledata 0 +organ 0 +support 0 +databasesystem 0 +scienceand 0 +implement 0 +scienc 0 +email 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..4ae849da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,77 @@ +comput 1 +link 1 +page 1 +scienc 1 +graduat 1 +washington 1 +school 1 +program 1 +outdoor 1 +home 1 +great 1 +women 1 +educ 1 +inform 1 +mentorship 1 +project 1 +bernheim 0 +start 0 +univers 0 +still 0 +construct 0 +william 0 +colleg 0 +math 0 +front 0 +plai 0 +ultim 0 +frisbe 0 +autumn 0 +quarter 0 +class 0 +programminglanguag 0 +automata 0 +principl 0 +ofdigit 0 +system 0 +design 0 +graphic 0 +seminar 0 +parallel 0 +environ 0 +relat 0 +nation 0 +park 0 +gorp 0 +guideto 0 +recreationfun 0 +christian 0 +scott 0 +interact 0 +list 0 +abig 0 +pile 0 +cool 0 +blast 0 +past 0 +scoobi 0 +dooeduc 0 +refer 0 +undergrad 0 +peterson 0 +center 0 +sourc 0 +opportun 0 +distribut 0 +allow 0 +undergradu 0 +spend 0 +summerwork 0 +research 0 +femal 0 +mentor 0 +experi 0 +highlyrecommend 0 +back 0 +pagelast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..feabc0a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,128 @@ +comput 1 +proof 1 +algorithm 1 +softwar 1 +scientist 1 +amir 1 +design 1 +master 1 +wai 1 +teach 1 +mathematician 1 +motiv 1 +error 1 +like 1 +make 1 +defici 1 +michail 0 +michailgradu 0 +studenti 0 +second 0 +year 0 +graduat 0 +student 0 +depart 0 +scienceat 0 +univers 0 +washington 0 +research 0 +interest 0 +includ 0 +followingarea 0 +engin 0 +educ 0 +degre 0 +universityof 0 +toronto 0 +thesi 0 +optim 0 +broadcast 0 +summationfor 0 +hierarch 0 +ring 0 +architectur 0 +shift 0 +click 0 +hereto 0 +obtain 0 +compress 0 +postscript 0 +file 0 +recent 0 +experi 0 +particular 0 +built 0 +opsi 0 +java 0 +appletdesign 0 +balanc 0 +binari 0 +tree 0 +combinesprogram 0 +anim 0 +final 0 +lunar 0 +lander 0 +style 0 +gamethat 0 +wrote 0 +part 0 +undergradu 0 +graphic 0 +cours 0 +quotat 0 +tend 0 +conserv 0 +mani 0 +unwillingto 0 +consid 0 +might 0 +better 0 +write 0 +told 0 +embarrass 0 +learn 0 +publishedincorrect 0 +theorem 0 +avoid 0 +believ 0 +theywil 0 +structur 0 +persuad 0 +will 0 +explor 0 +unconvent 0 +proofstyl 0 +unfortun 0 +found 0 +care 0 +whether 0 +theyhav 0 +publish 0 +incorrect 0 +result 0 +often 0 +seem 0 +glad 0 +wasnot 0 +caught 0 +refere 0 +sinc 0 +would 0 +meant 0 +fewer 0 +public 0 +fear 0 +stylethat 0 +reveal 0 +mistak 0 +lesli 0 +lamport 0 +construct 0 +wayi 0 +simpl 0 +obvious 0 +theother 0 +complic 0 +obviou 0 +hoar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..0da3211c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,62 @@ +scienc 1 +richard 1 +anderson 1 +comput 1 +washington 1 +research 1 +institut 1 +project 1 +professor 1 +univers 1 +year 1 +visit 1 +algorithm 1 +work 1 +travel 1 +home 0 +page 0 +associ 0 +graduat 0 +inmathemat 0 +reed 0 +colleg 0 +stanfordin 0 +join 0 +aon 0 +postdoc 0 +mathemat 0 +inberkelei 0 +receiv 0 +presidenti 0 +younginvestig 0 +award 0 +spent 0 +academ 0 +yeara 0 +indian 0 +bangalor 0 +india 0 +main 0 +interest 0 +theori 0 +implementationof 0 +includ 0 +parallel 0 +geometri 0 +scientif 0 +applic 0 +engin 0 +depart 0 +seattl 0 +teach 0 +paper 0 +progress 0 +qualifi 0 +evalu 0 +note 0 +theindian 0 +resum 0 +tourist 0 +pictur 0 +recent 0 +talksanderson 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..2314b136 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,128 @@ +interest 1 +research 1 +decis 1 +comput 1 +scienc 1 +univers 1 +name 1 +syllabl 1 +anhai 1 +doan 1 +hungari 1 +mostli 1 +plan 1 +travel 1 +music 1 +first 1 +ofmi 1 +birthplac 1 +librari 1 +homepageanhai 0 +page 0 +reconstruct 0 +pleas 0 +revisit 0 +soon 0 +born 0 +brought 0 +vietnam 0 +finish 0 +high 0 +school 0 +iwent 0 +studi 0 +graduat 0 +kossuth 0 +lajo 0 +art 0 +andscienc 0 +debrecen 0 +receiv 0 +also 0 +wisconsin 0 +milwauke 0 +start 0 +fall 0 +program 0 +depart 0 +andengin 0 +ofwashington 0 +seattl 0 +artifici 0 +intellig 0 +amcurr 0 +investig 0 +make 0 +underuncertainti 0 +theoret 0 +markov 0 +process 0 +qualit 0 +theori 0 +academ 0 +includ 0 +read 0 +listen 0 +jazz 0 +blue 0 +thing 0 +mean 0 +calm 0 +invietnames 0 +made 0 +combin 0 +last 0 +mother 0 +nghean 0 +father 0 +haiphong 0 +show 0 +creativ 0 +folkswer 0 +thought 0 +birth 0 +younger 0 +brother 0 +theysimpli 0 +switch 0 +gave 0 +namehaian 0 +content 0 +probabilist 0 +knowledg 0 +represent 0 +recent 0 +paper 0 +curriculum 0 +vita 0 +educ 0 +employ 0 +histori 0 +award 0 +honor 0 +public 0 +teach 0 +data 0 +structur 0 +algorithm 0 +take 0 +cours 0 +check 0 +inform 0 +offic 0 +hour 0 +locat 0 +person 0 +comtemporari 0 +vietnames 0 +affair 0 +literatur 0 +write 0 +paint 0 +foreign 0 +languag 0 +gener 0 +purpos 0 +life 0 +snapshotsanhai 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..ac5407e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,128 @@ +ausland 2 +figur 1 +joel 1 +comput 1 +washington 1 +anim 1 +compil 1 +paper 1 +motion 1 +univers 1 +last 1 +project 1 +seattl 1 +dynam 1 +philipos 1 +chamber 1 +egger 1 +automat 1 +system 1 +graphic 1 +page 1 +without 1 +hope 0 +depart 0 +scienc 0 +engineeringat 0 +pictur 0 +spring 0 +class 0 +click 0 +onit 0 +origin 0 +quarter 0 +complet 0 +qual 0 +time 0 +code 0 +gener 0 +multiflow 0 +offic 0 +sieg 0 +hall 0 +home 0 +univ 0 +resum 0 +written 0 +follow 0 +fast 0 +effect 0 +andb 0 +bershad 0 +pldi 0 +support 0 +event 0 +dispatch 0 +inextens 0 +mock 0 +andp 0 +pardyak 0 +workshop 0 +compilersupport 0 +softwar 0 +februari 0 +experi 0 +control 0 +base 0 +synthesisfor 0 +articul 0 +fukunaga 0 +partovi 0 +christensen 0 +reiss 0 +shuman 0 +mark 0 +acmtransact 0 +also 0 +site 0 +optim 0 +leapfrog 0 +benjamin 0 +wilkerson 0 +mathemat 0 +magazin 0 +lossili 0 +compress 0 +mpeg 0 +animationthat 0 +goe 0 +synthesi 0 +sequenc 0 +show 0 +mywork 0 +piec 0 +togeth 0 +cartwheel 0 +jump 0 +andshuffl 0 +fall 0 +andcollaps 0 +brown 0 +us 0 +algorithm 0 +orang 0 +isjust 0 +try 0 +switch 0 +consider 0 +tosmooth 0 +physic 0 +autumn 0 +took 0 +super 0 +short 0 +doubl 0 +speed 0 +small 0 +version 0 +final 0 +find 0 +better 0 +place 0 +slide 0 +thetalk 0 +singular 0 +valu 0 +decomposit 0 +gave 0 +seminar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..3afe3ece --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,96 @@ +brian 1 +info 1 +boinge 1 +name 1 +offic 1 +current 1 +page 1 +favorit 1 +look 1 +girl 0 +michalowskidepart 0 +comput 0 +scienc 0 +engineeringmail 0 +stop 0 +univers 0 +washingtonseattl 0 +locat 0 +sieg 0 +phone 0 +take 0 +ling 0 +edit 0 +mossi 0 +bitsthank 0 +visit 0 +visitor 0 +number 0 +worst 0 +view 0 +us 0 +headscapewhenev 0 +second 0 +year 0 +gradstud 0 +actual 0 +liber 0 +artist 0 +interest 0 +inlinguist 0 +confus 0 +good 0 +get 0 +know 0 +alreadi 0 +ultrahotlist 0 +site 0 +ofal 0 +time 0 +search 0 +onlin 0 +refer 0 +forsometh 0 +glorifi 0 +hotlist 0 +doesn 0 +thave 0 +urouletteto 0 +random 0 +find 0 +past 0 +institut 0 +ofwhich 0 +mental 0 +person 0 +quot 0 +file 0 +songsand 0 +poem 0 +fictiti 0 +thrash 0 +band 0 +puriti 0 +test 0 +origin 0 +work 0 +tokeep 0 +touch 0 +finger 0 +mail 0 +guestbook 0 +pagesfrom 0 +friend 0 +idea 0 +includ 0 +aslfingerspel 0 +snapshot 0 +blatantli 0 +stolen 0 +brad 0 +chamberlain 0 +michalowski 0 +dept 0 +complet 0 +sanityerad 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..f4dd8d51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,52 @@ +paul 1 +comput 1 +washington 1 +research 1 +proof 1 +beamepaul 1 +beam 1 +associ 1 +scienc 1 +receiv 1 +univers 1 +computationalcomplex 1 +recent 1 +professor 0 +thedepart 0 +engineeringat 0 +theunivers 0 +mathemat 0 +sciencein 0 +toronto 0 +post 0 +doctor 0 +academicyear 0 +join 0 +autumn 0 +presidentialyoung 0 +investig 0 +award 0 +concern 0 +primarili 0 +theoret 0 +aspect 0 +paralleland 0 +distribut 0 +concentr 0 +connect 0 +theori 0 +particular 0 +complex 0 +inproposit 0 +system 0 +enjoi 0 +squash 0 +softbal 0 +sport 0 +enthusiasm 0 +cancompens 0 +lack 0 +talent 0 +paper 0 +qual 0 +project 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..b4a35d28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,84 @@ +code 1 +devic 1 +sport 1 +ultim 1 +frisbe 1 +confer 1 +champion 1 +interest 1 +david 0 +beckerdavid 0 +beckercontact 0 +info 0 +mark 0 +spot 0 +stuff 0 +spin 0 +much 0 +time 0 +goe 0 +makingspina 0 +real 0 +oper 0 +system 0 +respons 0 +level 0 +borrow 0 +model 0 +drvier 0 +support 0 +build 0 +environ 0 +manag 0 +keep 0 +develop 0 +platform 0 +function 0 +somedai 0 +getto 0 +perform 0 +measur 0 +optim 0 +tri 0 +bunch 0 +favorit 0 +tripl 0 +jump 0 +minnesota 0 +athlet 0 +bethel 0 +colleg 0 +volleybal 0 +men 0 +grad 0 +team 0 +plai 0 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 0 +handbal 0 +also 0 +playracquetballgolftenni 0 +done 0 +bridgecampingcanoeingdisc 0 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 0 +skiingweightliftingwhitewat 0 +raftinghorseback 0 +ridingmountain 0 +bikingin 0 +line 0 +skate 0 +librari 0 +econom 0 +topic 0 +particularli 0 +free 0 +bank 0 +anti 0 +trust 0 +currenc 0 +ssto 0 +rlv 0 +theologi 0 +centurai 0 +railroad 0 +boot 0 +locomot 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..06d6cbe6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,280 @@ +bershad 3 +system 3 +oper 2 +brian 2 +perform 2 +extens 2 +workshop 2 +usenix 1 +support 1 +sosp 1 +asplo 1 +paper 1 +project 1 +dynam 1 +us 1 +appear 1 +compil 1 +softwar 1 +comput 1 +network 1 +memori 1 +manag 1 +pardyak 1 +romer 1 +februari 1 +microkernel 1 +univers 1 +seattl 1 +distribut 1 +architectur 1 +spin 1 +mobil 1 +design 1 +octob 1 +przemyslaw 1 +osdi 1 +fast 1 +confer 1 +marc 1 +fiuczynski 1 +savag 1 +chen 1 +maeda 1 +inform 1 +washington 1 +ofwashington 1 +parallel 1 +work 1 +isca 1 +midwai 1 +winter 1 +master 1 +qual 1 +karlin 1 +sigop 1 +associ 1 +symposium 1 +onoper 1 +implemen 1 +structur 1 +voelker 1 +languag 1 +ausland 1 +philipos 1 +chamber 1 +egger 1 +protocol 1 +specif 1 +write 1 +emin 1 +sirer 1 +stefan 1 +zekauska 1 +sawdon 1 +cach 1 +hardwar 1 +larg 1 +servic 1 +kernel 1 +stock 1 +mach 1 +commun 1 +machnix 1 +drave 1 +forin 1 +wwo 1 +relat 1 +eduwork 0 +scienc 0 +engin 0 +depart 0 +home 0 +street 0 +assist 0 +professor 0 +sinc 0 +receiv 0 +took 0 +brief 0 +respit 0 +experi 0 +post 0 +industri 0 +cultur 0 +northeast 0 +return 0 +northwest 0 +coffe 0 +research 0 +hasappear 0 +toc 0 +although 0 +seem 0 +asigmetr 0 +publish 0 +save 0 +life 0 +besid 0 +run 0 +plai 0 +squash 0 +hang 0 +thestairmast 0 +includ 0 +extensibleoper 0 +carnegi 0 +mellon 0 +parallelnetwork 0 +scalabl 0 +rocki 0 +thesequel 0 +etch 0 +binari 0 +instrument 0 +optimizationcours 0 +look 0 +click 0 +list 0 +youmight 0 +degre 0 +recent 0 +trace 0 +driven 0 +comparison 0 +algorithm 0 +prefetch 0 +cachingtraci 0 +kimbrel 0 +andrew 0 +tomkin 0 +hugo 0 +patterson 0 +edward 0 +felten 0 +garth 0 +gibson 0 +anna 0 +bind 0 +extensiblesystem 0 +interpret 0 +theodor 0 +denni 0 +geoffrei 0 +alec 0 +wolman 0 +wayn 0 +wong 0 +jean 0 +loup 0 +baer 0 +henri 0 +levi 0 +effect 0 +dynamiccompil 0 +program 0 +implementationj 0 +applic 0 +modula 0 +greg 0 +defouw 0 +mari 0 +alapat 0 +wilson 0 +hsieh 0 +charl 0 +garrett 0 +david 0 +becker 0 +safe 0 +link 0 +automat 0 +event 0 +dispatch 0 +systemsc 0 +mock 0 +safeti 0 +reduc 0 +overhead 0 +onlinesuperpag 0 +promot 0 +ohlrich 0 +detect 0 +sharedmemori 0 +appearedin 0 +page 0 +map 0 +polici 0 +conflictresolut 0 +standard 0 +mobisa 0 +inth 0 +issu 0 +avoid 0 +conflict 0 +miss 0 +direct 0 +mappedcach 0 +forappl 0 +uwtechn 0 +report 0 +effici 0 +packet 0 +demultiplex 0 +multipl 0 +endpoint 0 +messag 0 +yuhara 0 +moss 0 +impact 0 +decomposit 0 +high 0 +practic 0 +consider 0 +block 0 +concurr 0 +object 0 +interrupt 0 +prioriti 0 +share 0 +ieee 0 +compcon 0 +local 0 +area 0 +andmostli 0 +watson 0 +moblic 0 +consist 0 +virtual 0 +index 0 +wheeler 0 +mutual 0 +exclus 0 +uniprocessor 0 +redel 0 +elli 0 +primit 0 +ginsburg 0 +baron 0 +microbenchmark 0 +evalu 0 +increas 0 +irrelev 0 +micro 0 +base 0 +golub 0 +continu 0 +implement 0 +thread 0 +inoper 0 +rashid 0 +dean 0 +arpa 0 +rain 0 +citi 0 +hash 0 +hous 0 +harrier 0 +rel 0 +abduct 0 +alien 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..03451c9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,152 @@ +page 2 +search 1 +new 1 +home 1 +scienc 1 +seattl 1 +washington 1 +list 1 +american 1 +directori 1 +budget 1 +doorenbo 1 +current 1 +work 1 +todai 1 +project 1 +pointer 1 +link 1 +yahoo 1 +world 1 +yellow 1 +white 1 +sport 1 +pagebob 0 +depart 0 +comput 0 +engineeringunivers 0 +washingtonbox 0 +offic 0 +sieg 0 +hall 0 +bobd 0 +netbot 0 +union 0 +place 0 +suit 0 +voic 0 +daili 0 +comedi 0 +dilbert 0 +strip 0 +boffo 0 +david 0 +letterman 0 +geeki 0 +zdnet 0 +anchordesk 0 +magazin 0 +good 0 +stuff 0 +shortcut 0 +cool 0 +tool 0 +research 0 +postdoc 0 +oren 0 +etzioni 0 +steve 0 +hank 0 +weld 0 +softbot 0 +also 0 +particular 0 +shopbot 0 +internet 0 +shop 0 +agent 0 +previou 0 +soar 0 +thesi 0 +site 0 +collect 0 +repositori 0 +canada 0 +gopher 0 +scientif 0 +sigma 0 +scientist 0 +miscellan 0 +meta 0 +metacrawl 0 +savvysearch 0 +alta 0 +vista 0 +lyco 0 +inktomi 0 +open 0 +text 0 +infoseek 0 +excit 0 +crawler 0 +hotbot 0 +hierarch 0 +select 0 +magellan 0 +pointcom 0 +engin 0 +guid 0 +onlin 0 +telephon 0 +network 0 +switchboard 0 +cnnfn 0 +newshour 0 +post 0 +reuter 0 +headlin 0 +social 0 +cafe 0 +report 0 +boston 0 +globe 0 +span 0 +time 0 +view 0 +slate 0 +feed 0 +salon 0 +atlant 0 +monthli 0 +harper 0 +espn 0 +zone 0 +govern 0 +fedworld 0 +index 0 +hous 0 +congress 0 +arpa 0 +feder 0 +deficit 0 +nation 0 +debt 0 +clock 0 +concord 0 +coalit 0 +hand 0 +balanc 0 +bipartisan 0 +commiss 0 +entitl 0 +reform 0 +univers 0 +museum 0 +past 0 +life 0 +pittsburgh 0 +upcom 0 +birthdai 0 +person 0 +andfun 0 +pagebobd 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..27bd2d98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,100 @@ +comput 2 +languag 1 +born 1 +scienc 1 +univers 1 +washington 1 +home 1 +depart 1 +constraint 1 +program 1 +research 1 +engin 1 +orient 1 +human 1 +societi 1 +page 1 +recent 1 +autumn 1 +graduat 1 +work 1 +receiv 1 +xerox 1 +spent 1 +alan 0 +pagealan 0 +pagei 0 +professor 0 +principalresearch 0 +interest 0 +base 0 +system 0 +object 0 +logic 0 +computerinteract 0 +current 0 +activitiesuwconstraint 0 +contain 0 +link 0 +paper 0 +public 0 +domainsourc 0 +code 0 +satisfact 0 +algorithm 0 +media 0 +technolog 0 +democraci 0 +groupuw 0 +student 0 +also 0 +idea 0 +qualsproject 0 +teachingher 0 +cours 0 +taught 0 +concept 0 +winter 0 +interact 0 +spring 0 +informationhistori 0 +grew 0 +idaho 0 +reed 0 +colleg 0 +mathemat 0 +atstanford 0 +degre 0 +dissert 0 +done 0 +associ 0 +paloalto 0 +center 0 +concern 0 +simulationlaboratori 0 +year 0 +post 0 +doctoralfellow 0 +artifici 0 +intellig 0 +ofedinburgh 0 +scotland 0 +mechan 0 +problem 0 +solv 0 +symbolicalgebra 0 +join 0 +andexcept 0 +sabbat 0 +europarc 0 +cambridg 0 +england 0 +havebeen 0 +sinc 0 +address 0 +dept 0 +seattl 0 +phone 0 +email 0 +eduwww 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..e6d19036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,20 @@ +thing 1 +brad 1 +home 0 +pagebrad 0 +chamberlainphoto 0 +credit 0 +mike 0 +perkowitzth 0 +probabl 0 +couldn 0 +care 0 +less 0 +offic 0 +address 0 +work 0 +like 0 +ad 0 +subset 0 +ofth 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..1bf3a880 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,125 @@ +html 1 +comput 1 +mumei 1 +postscript 1 +present 1 +brendan 1 +washington 1 +fall 1 +work 1 +problem 1 +physic 1 +map 1 +us 1 +also 1 +confer 1 +galleri 1 +home 0 +pagebrendan 0 +mumeyi 0 +student 0 +depart 0 +scienceat 0 +theunivers 0 +enter 0 +expect 0 +graduat 0 +around 0 +contact 0 +information 0 +mail 0 +edufor 0 +address 0 +click 0 +curriculum 0 +vitaein 0 +htmlorpostscriptformat 0 +academ 0 +interestsi 0 +would 0 +call 0 +appli 0 +theoret 0 +scientist 0 +current 0 +biologi 0 +moment 0 +look 0 +build 0 +rough 0 +locat 0 +landmark 0 +genom 0 +gener 0 +speak 0 +interest 0 +theori 0 +math 0 +tosolv 0 +reason 0 +practic 0 +done 0 +incomput 0 +astrophys 0 +hpcc 0 +groupher 0 +onlin 0 +papersb 0 +power 0 +clone 0 +overlap 0 +test 0 +poster 0 +ismb 0 +aspect 0 +probe 0 +survei 0 +paper 0 +written 0 +fulfil 0 +candidaci 0 +requir 0 +find 0 +cluster 0 +quickli 0 +parallel 0 +dimac 0 +challeng 0 +klaw 0 +upper 0 +lower 0 +bound 0 +construct 0 +alphabet 0 +binari 0 +tree 0 +soda 0 +siam 0 +ofdiscret 0 +mathemat 0 +note 0 +version 0 +produc 0 +latexhtml 0 +containsom 0 +error 0 +readabl 0 +part 0 +recreationhik 0 +cycl 0 +ski 0 +climb 0 +drink 0 +coffeeto 0 +name 0 +sailingand 0 +hope 0 +sometim 0 +like 0 +plai 0 +bridg 0 +older 0 +photo 0 +first 0 +second 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..7794f588 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,88 @@ +comput 1 +project 1 +interest 1 +us 1 +cours 1 +bricker 1 +washington 1 +research 1 +graphic 1 +learn 1 +lauren 0 +brickerlauren 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +click 0 +need 0 +clue 0 +info 0 +primarli 0 +userinterfac 0 +although 0 +self 0 +proclaim 0 +groupi 0 +current 0 +work 0 +stevetanimoto 0 +mathematicsexperi 0 +imag 0 +process 0 +metip 0 +goal 0 +ofthi 0 +junior 0 +high 0 +school 0 +kid 0 +mathemat 0 +usingexploratori 0 +rather 0 +rote 0 +method 0 +particular 0 +minterest 0 +develop 0 +supportedcollabor 0 +cscl 0 +user 0 +interfac 0 +inthi 0 +well 0 +workin 0 +lawk 0 +dawg 0 +interfacea 0 +fairli 0 +extens 0 +resumeschool 0 +dazethi 0 +quarterdoth 0 +quartershuman 0 +interact 0 +spring 0 +quarter 0 +writeup 0 +final 0 +writeupwhat 0 +asystem 0 +insocieti 0 +excit 0 +hobbi 0 +enjoi 0 +busi 0 +lifesportscookingpotteri 0 +even 0 +studio 0 +garag 0 +year 0 +stuffbecaus 0 +ask 0 +itaddress 0 +last 0 +modifi 0 +mondai 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..a211d7cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,6 @@ +carlson 1 +washington 1 +adam 0 +carlsonadam 0 +comput 0 +scienc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..fda2053a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,44 @@ +certain 1 +andrew 1 +page 1 +washington 1 +download 1 +look 1 +model 1 +current 1 +work 1 +viewer 1 +modifi 1 +home 0 +server 0 +fix 0 +give 0 +note 0 +interest 0 +follow 0 +direct 0 +theview 0 +tonyderos 0 +david 0 +salesin 0 +werner 0 +stuetzl 0 +duchamp 0 +jovan 0 +popov 0 +scanningproject 0 +build 0 +requir 0 +sgigraph 0 +workstat 0 +paper 0 +netscap 0 +shouldalso 0 +browser 0 +similar 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..cabaed8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,91 @@ +chou 1 +washington 1 +school 1 +fall 1 +line 1 +comput 1 +java 1 +welcom 0 +homepag 0 +grad 0 +student 0 +computersci 0 +seattl 0 +pictur 0 +right 0 +first 0 +quarter 0 +ross 0 +think 0 +scari 0 +relat 0 +infoth 0 +chinook 0 +projectmi 0 +schedulemi 0 +list 0 +publicationscod 0 +workshop 0 +codesignpersonalperson 0 +info 0 +taiwan 0 +greec 0 +resumefoodi 0 +enjoi 0 +cook 0 +peopl 0 +open 0 +restaur 0 +ofpeopl 0 +favorit 0 +dish 0 +includ 0 +stir 0 +fri 0 +rice 0 +noodl 0 +beefskew 0 +recip 0 +toysb 0 +geek 0 +computersand 0 +cool 0 +toi 0 +taiwanesei 0 +also 0 +promot 0 +taiwaneselanguag 0 +current 0 +develop 0 +tool 0 +taiwanes 0 +sureto 0 +check 0 +experiment 0 +taiwanesedictionari 0 +though 0 +absolut 0 +requir 0 +page 0 +best 0 +viewedif 0 +instal 0 +chines 0 +charact 0 +font 0 +us 0 +enabl 0 +browser 0 +like 0 +netscap 0 +beabl 0 +applet 0 +yellow 0 +ball 0 +bouncingov 0 +barnei 0 +purpl 0 +dynosaur 0 +last 0 +updat 0 +email 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..c9c8a3a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,115 @@ +math 1 +home 1 +page 1 +contest 1 +program 1 +problem 1 +html 1 +set 1 +color 1 +search 1 +linux 1 +result 1 +final 1 +version 1 +internet 1 +comput 1 +refer 1 +thing 1 +done 1 +washington 1 +depart 1 +engin 1 +spring 1 +meet 1 +corei 0 +andersoncorei 0 +andersonth 0 +interest 0 +late 0 +research 0 +percept 0 +automat 0 +correct 0 +induc 0 +compet 0 +programm 0 +month 0 +info 0 +localtalk 0 +adapt 0 +plai 0 +wavelet 0 +intern 0 +collegi 0 +pacif 0 +region 0 +previou 0 +year 0 +duke 0 +rsum 0 +onlin 0 +recent 0 +august 0 +review 0 +graphic 0 +text 0 +book 0 +graduat 0 +univser 0 +highlin 0 +commun 0 +colleg 0 +self 0 +tune 0 +fpga 0 +fall 0 +help 0 +polli 0 +organ 0 +contribut 0 +open 0 +hous 0 +april 0 +manag 0 +chapter 0 +treasuri 0 +develop 0 +read 0 +macintosh 0 +good 0 +servic 0 +providercool 0 +found 0 +usag 0 +statist 0 +lurker 0 +guid 0 +babylon 0 +sunsit 0 +archiv 0 +dilbert 0 +zone 0 +brother 0 +pageus 0 +link 0 +peek 0 +insid 0 +term 0 +lab 0 +featur 0 +netscap 0 +scienc 0 +univers 0 +washinton 0 +uwtv 0 +tech 0 +notesmi 0 +autumn 0 +schedul 0 +mondai 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +fridai 0 +corin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..059a357a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,65 @@ +washington 1 +valedictorian 1 +page 1 +enjoy 1 +experi 1 +ball 1 +address 1 +found 1 +craig 0 +experiencecraig 0 +kaplancurr 0 +locat 0 +student 0 +depart 0 +comput 0 +scienc 0 +universityof 0 +seattl 0 +well 0 +copyof 0 +homepag 0 +univers 0 +waterloo 0 +time 0 +modifi 0 +appropri 0 +tomi 0 +current 0 +situat 0 +near 0 +undergraduatewa 0 +grad 0 +photo 0 +fromth 0 +second 0 +occur 0 +saturdai 0 +convoc 0 +cannot 0 +express 0 +honour 0 +felt 0 +wonder 0 +graduat 0 +class 0 +choos 0 +repres 0 +incident 0 +didn 0 +know 0 +parent 0 +minut 0 +start 0 +ceremoni 0 +sai 0 +never 0 +forgiv 0 +text 0 +anyon 0 +curiou 0 +visitor 0 +number 0 +last 0 +updat 0 +cskaplan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..f791ae07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,56 @@ +codi 1 +kwok 1 +welcom 1 +work 1 +sanctuari 1 +nausicaa 1 +vallei 1 +home 0 +page 0 +first 0 +thing 0 +thought 0 +peopl 0 +think 0 +mean 0 +aliv 0 +asami 0 +chiaki 0 +chung 0 +ctkwok 0 +washington 0 +edui 0 +graduat 0 +student 0 +weld 0 +andoren 0 +etzioni 0 +plan 0 +andsoftwar 0 +agent 0 +ingram 0 +softbot 0 +aiuw 0 +contact 0 +informationleisur 0 +windlaputa 0 +castl 0 +skyhyp 0 +futur 0 +vision 0 +gunnm 0 +wind 0 +arch 0 +vile 0 +java 0 +applet 0 +anim 0 +take 0 +load 0 +last 0 +modifi 0 +visitor 0 +sinc 0 +figur 0 +doom 0 +numer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..0769b162 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,30 @@ +darren 1 +cronquist 1 +washington 1 +inform 1 +current 1 +resum 1 +curriculum 1 +darrenc 0 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +welcom 0 +home 0 +page 0 +last 0 +updat 0 +employ 0 +plan 0 +complet 0 +myph 0 +html 0 +postscript 0 +vita 0 +vitaperson 0 +rest 0 +homepag 0 +underconstruct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..82f8f84f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,193 @@ +plan 1 +interfac 1 +david 1 +christianson 1 +user 1 +work 1 +assist 1 +automat 1 +also 1 +second 1 +till 1 +washington 1 +comput 1 +univers 1 +current 1 +graduat 1 +interest 1 +studi 1 +activ 1 +interact 1 +recent 1 +build 1 +shop 1 +simpl 1 +anderson 1 +weld 1 +salesin 1 +cohen 1 +develop 1 +camera 1 +local 1 +dave 1 +inform 1 +check 1 +midnight 0 +nowher 0 +babi 0 +christiansondbc 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +workin 0 +spare 0 +time 0 +third 0 +year 0 +student 0 +atth 0 +inaiand 0 +gotten 0 +mayb 0 +even 0 +graphicsa 0 +well 0 +knowledg 0 +represent 0 +goal 0 +directedbehavior 0 +mix 0 +initi 0 +recognit 0 +buzzwordacquisit 0 +context 0 +human 0 +prototyp 0 +us 0 +intellig 0 +bobdoorenbo 0 +shopbot 0 +rather 0 +somehowintegr 0 +variou 0 +line 0 +store 0 +built 0 +applicationthat 0 +read 0 +pars 0 +basket 0 +order 0 +determinewhat 0 +product 0 +collabor 0 +sean 0 +michael 0 +moviethat 0 +demonstr 0 +appl 0 +intern 0 +russel 0 +technologyinto 0 +experi 0 +perpetr 0 +graphic 0 +debugg 0 +theucpop 0 +famili 0 +planner 0 +programm 0 +client 0 +number 0 +peopl 0 +whose 0 +live 0 +touch 0 +grow 0 +everi 0 +manual 0 +isher 0 +spent 0 +undergradu 0 +career 0 +theunivers 0 +chicago 0 +carboload 0 +harold 0 +chicken 0 +theanim 0 +agent 0 +publicationschristianson 0 +declar 0 +control 0 +cinematographi 0 +appear 0 +aaai 0 +firbi 0 +mcdougal 0 +fast 0 +map 0 +support 0 +navig 0 +object 0 +symposium 0 +sensor 0 +fusion 0 +boston 0 +novemb 0 +find 0 +thechateau 0 +cynic 0 +offic 0 +withfreder 0 +william 0 +darren 0 +adam 0 +gloriou 0 +leader 0 +juan 0 +import 0 +thing 0 +favorit 0 +practic 0 +judo 0 +compet 0 +senior 0 +nation 0 +sibl 0 +sisterjust 0 +school 0 +librari 0 +michigan 0 +surf 0 +cut 0 +edg 0 +research 0 +supercollid 0 +realli 0 +feel 0 +like 0 +slack 0 +mirski 0 +help 0 +watch 0 +hero 0 +youth 0 +duel 0 +death 0 +wwwf 0 +grudg 0 +match 0 +fame 0 +fortun 0 +respons 0 +week 0 +game 0 +domain 0 +straight 0 +doomgat 0 +sai 0 +evil 0 +book 0 +tick 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..3a995ae0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,39 @@ +page 1 +johnson 1 +washington 1 +take 1 +quiz 1 +home 0 +dave 0 +david 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +research 0 +interest 0 +navig 0 +assist 0 +hypertext 0 +readersproject 0 +activ 0 +racquetbal 0 +golf 0 +basketbal 0 +softbal 0 +tutori 0 +script 0 +fit 0 +togeth 0 +theracquetbal 0 +creat 0 +look 0 +thecreat 0 +assess 0 +form 0 +give 0 +last 0 +modifi 0 +mondai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..55785f7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,161 @@ +spin 1 +seattl 1 +system 1 +page 1 +extens 1 +undergradu 1 +notr 1 +dame 1 +david 1 +dion 1 +washington 1 +current 1 +unix 1 +server 1 +level 1 +univers 1 +world 1 +person 1 +research 1 +work 1 +dynam 1 +kernel 1 +languag 1 +modula 1 +user 1 +mach 1 +first 1 +stuff 1 +time 1 +surf 1 +visit 1 +comput 1 +scienc 1 +condit 1 +major 1 +life 1 +stai 1 +touch 1 +movi 1 +transport 1 +traffic 1 +home 0 +ddion 0 +yeah 0 +right 0 +like 0 +anyth 0 +okai 0 +mayb 0 +littl 0 +academ 0 +past 0 +year 0 +brian 0 +bershad 0 +primari 0 +respons 0 +construct 0 +thespinoper 0 +oper 0 +applic 0 +achiev 0 +impress 0 +perform 0 +extend 0 +safeti 0 +protect 0 +maintain 0 +written 0 +slight 0 +variant 0 +think 0 +run 0 +link 0 +us 0 +intercept 0 +call 0 +emul 0 +environ 0 +previou 0 +havework 0 +studi 0 +help 0 +implement 0 +memori 0 +manag 0 +commun 0 +subsystem 0 +afraid 0 +around 0 +wouldn 0 +claim 0 +know 0 +cool 0 +ipromis 0 +soon 0 +netscap 0 +enhancedthi 0 +hold 0 +breath 0 +meanwhil 0 +site 0 +occasion 0 +distract 0 +engin 0 +reason 0 +homepag 0 +featur 0 +date 0 +view 0 +campu 0 +weather 0 +occupi 0 +vast 0 +program 0 +dai 0 +debug 0 +manual 0 +solv 0 +countless 0 +problem 0 +institut 0 +band 0 +trumpet 0 +section 0 +racquetbal 0 +ladder 0 +main 0 +outlet 0 +athlet 0 +espn 0 +sportzon 0 +sport 0 +todai 0 +rest 0 +dilbert 0 +learn 0 +real 0 +restaur 0 +fine 0 +eateri 0 +recommend 0 +other 0 +region 0 +list 0 +line 0 +guid 0 +excel 0 +public 0 +statu 0 +infam 0 +marin 0 +leagu 0 +basebal 0 +team 0 +bean 0 +shop 0 +last 0 +modifi 0 +mondai 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..1c250eab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,127 @@ +parallel 1 +derrick 1 +comput 1 +project 1 +data 1 +administr 1 +extens 1 +colleg 1 +advanc 1 +scienc 1 +univers 1 +washington 1 +offer 1 +graduat 1 +student 1 +fall 1 +experi 1 +commun 1 +develop 1 +design 1 +system 1 +tool 1 +research 1 +perform 1 +portabl 1 +collect 1 +topic 1 +engin 1 +teach 1 +assist 1 +index 0 +weathersbi 0 +bullssupersonicsi 0 +pursu 0 +phdin 0 +seattl 0 +beauti 0 +campu 0 +li 0 +heart 0 +ofseattl 0 +mani 0 +divers 0 +often 0 +prei 0 +therebyextend 0 +time 0 +averag 0 +career 0 +practic 0 +interestssignific 0 +skill 0 +sheet 0 +share 0 +server 0 +arrai 0 +languag 0 +compil 0 +host 0 +token 0 +ring 0 +protocol 0 +base 0 +network 0 +securityresearch 0 +interestsmi 0 +center 0 +around 0 +distribut 0 +challengespres 0 +field 0 +on 0 +conveni 0 +typic 0 +foremost 0 +goal 0 +run 0 +ordistribut 0 +environ 0 +howev 0 +suffer 0 +final 0 +theseenviron 0 +extra 0 +challeng 0 +asynchron 0 +independ 0 +event 0 +daunt 0 +task 0 +distributedenviron 0 +issu 0 +address 0 +group 0 +page 0 +spaa 0 +paper 0 +gener 0 +comm 0 +dissert 0 +integr 0 +softwar 0 +projectacadem 0 +achievementsinstructor 0 +summer 0 +curriculum 0 +cours 0 +certif 0 +program 0 +collegeinstructor 0 +start 0 +undergradu 0 +tutor 0 +women 0 +minoritystud 0 +depart 0 +engineeringoutstand 0 +award 0 +person 0 +interest 0 +interact 0 +cnnfinanciala 0 +newslet 0 +would 0 +javaw 0 +weathersbyderrick 0 +edutu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..b93d6f48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,123 @@ +game 1 +northern 1 +note 1 +postscript 1 +dewei 1 +draft 1 +read 1 +ireland 1 +page 1 +brian 1 +washington 1 +need 1 +like 1 +avail 1 +carri 1 +hopefulli 1 +work 1 +pleas 1 +statist 1 +terrorist 1 +relat 1 +link 1 +thorough 1 +china 1 +home 0 +pagebrian 0 +deweyabout 0 +first 0 +year 0 +student 0 +depart 0 +comput 0 +scienceand 0 +engin 0 +univers 0 +doyou 0 +know 0 +music 0 +book 0 +ilov 0 +plai 0 +ride 0 +bike 0 +write 0 +letter 0 +drink 0 +myfavorit 0 +color 0 +blue 0 +favorit 0 +ocean 0 +atlant 0 +oldroomm 0 +think 0 +anim 0 +public 0 +finger 0 +irelandi 0 +return 0 +trip 0 +belfast 0 +june 0 +sixti 0 +pagesof 0 +interview 0 +luggag 0 +getthos 0 +semi 0 +chaotic 0 +readabl 0 +enlighteningformat 0 +feel 0 +free 0 +shoulder 0 +period 0 +make 0 +html 0 +format 0 +goal 0 +encourag 0 +frequent 0 +feedback 0 +soon 0 +possibl 0 +mail 0 +well 0 +much 0 +time 0 +thisproject 0 +late 0 +progress 0 +near 0 +ofth 0 +summer 0 +imag 0 +thecurr 0 +sinn 0 +fein 0 +inform 0 +bibliographi 0 +death 0 +injuri 0 +alreadi 0 +addict 0 +recuri 0 +check 0 +fascin 0 +histori 0 +develop 0 +ancient 0 +imageek 0 +york 0 +cuni 0 +provid 0 +mani 0 +interest 0 +site 0 +jansteen 0 +seen 0 +edulast 0 +modifi 0 +tuesdai 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..11a54ed6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,119 @@ +washington 1 +comput 1 +home 1 +dfasulo 1 +scienc 1 +fiction 1 +graduat 1 +student 1 +depart 1 +univers 1 +seattl 1 +work 1 +interest 1 +amber 1 +recommend 1 +random 1 +thing 1 +theori 1 +pagewelcom 0 +fasulo 0 +page 0 +third 0 +year 0 +williamscolleg 0 +computersci 0 +appli 0 +mathemat 0 +class 0 +note 0 +portrait 0 +contain 0 +slight 0 +inaccuraci 0 +find 0 +eastlak 0 +engin 0 +ofwashington 0 +offic 0 +chateau 0 +email 0 +academ 0 +graphic 0 +biologi 0 +person 0 +fantasi 0 +written 0 +otherwis 0 +fact 0 +probabl 0 +honest 0 +identifi 0 +illustr 0 +merlin 0 +corwin 0 +pictur 0 +favorit 0 +charact 0 +mine 0 +roger 0 +zelazni 0 +chronicl 0 +imag 0 +taken 0 +drpg 0 +publish 0 +phage 0 +press 0 +would 0 +anyon 0 +like 0 +book 0 +also 0 +seri 0 +babylon 0 +creativ 0 +write 0 +poetri 0 +absolut 0 +link 0 +athlet 0 +particular 0 +order 0 +tenni 0 +kwon 0 +distanc 0 +run 0 +role 0 +plai 0 +depend 0 +cat 0 +homepag 0 +friend 0 +fellow 0 +william 0 +alumnu 0 +sean 0 +sandi 0 +look 0 +woman 0 +former 0 +grad 0 +wendi 0 +belluomini 0 +dress 0 +dogbert 0 +peopl 0 +ask 0 +worthwhil 0 +area 0 +research 0 +whether 0 +abstract 0 +us 0 +better 0 +explan 0 +goal 0 +futur 0 +ever 0 +given 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..a0b1c9a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,30 @@ +martin 1 +dickei 1 +home 1 +page 1 +dickeycomput 0 +scienc 0 +engineeringunivers 0 +washingtonwelcom 0 +weekli 0 +schedulenarr 0 +resum 0 +blurbcs 0 +engr 0 +autumn 0 +favorit 0 +seattl 0 +coffe 0 +housesfavorit 0 +internet 0 +site 0 +sister 0 +bookspirograph 0 +java 0 +script 0 +garg 0 +plai 0 +washington 0 +eduupd 0 +tuesdai 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..05bc47b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,187 @@ +peopl 1 +washington 1 +comput 1 +bershad 1 +chen 1 +romer 1 +cach 1 +seattl 1 +vote 1 +commit 1 +denni 1 +high 1 +baer 1 +brad 1 +calder 1 +grunwald 1 +paper 1 +issu 1 +instruct 1 +polici 1 +dynam 1 +page 1 +conflict 1 +univers 1 +dlee 1 +home 1 +good 1 +alwai 1 +huberthumphrei 1 +begin 1 +occur 1 +would 1 +dream 1 +primari 0 +interest 0 +perform 0 +architectur 0 +researchwith 0 +smart 0 +likejean 0 +loup 0 +brian 0 +alan 0 +eustac 0 +dirk 0 +andt 0 +recent 0 +energi 0 +manag 0 +system 0 +gener 0 +fetch 0 +specul 0 +execut 0 +isca 0 +map 0 +resolutionon 0 +standard 0 +hardwar 0 +osdi 0 +avoid 0 +miss 0 +larg 0 +direct 0 +mappedcach 0 +asplo 0 +effect 0 +differ 0 +code 0 +reorder 0 +algorithm 0 +qualifi 0 +project 0 +report 0 +contact 0 +work 0 +scienc 0 +engin 0 +depart 0 +offic 0 +sieg 0 +index 0 +pointer 0 +hotlist 0 +entri 0 +point 0 +explor 0 +yahoo 0 +yellow 0 +internet 0 +lyco 0 +realli 0 +search 0 +enginefor 0 +guid 0 +click 0 +million 0 +sensibl 0 +mind 0 +conced 0 +thatpolit 0 +almost 0 +choic 0 +lesser 0 +evil 0 +tweedledumand 0 +tweedlede 0 +abstain 0 +theyar 0 +present 0 +presid 0 +appoint 0 +go 0 +torummag 0 +around 0 +live 0 +next 0 +four 0 +year 0 +consid 0 +allth 0 +stew 0 +rather 0 +show 0 +humphrei 0 +taught 0 +lesson 0 +still 0 +enjoi 0 +nixon 0 +suprem 0 +court 0 +whentricia 0 +juli 0 +find 0 +silver 0 +thread 0 +among 0 +gold 0 +theblack 0 +russel 0 +baker 0 +ford 0 +without 0 +flummeri 0 +hesit 0 +chanc 0 +draw 0 +back 0 +ineffect 0 +concern 0 +act 0 +initi 0 +element 0 +truth 0 +ignor 0 +kill 0 +countless 0 +idea 0 +splendid 0 +plan 0 +moment 0 +definit 0 +provid 0 +move 0 +sort 0 +thing 0 +help 0 +never 0 +otherwis 0 +whole 0 +stream 0 +event 0 +decis 0 +rais 0 +favor 0 +manner 0 +unforeseen 0 +incid 0 +meet 0 +materi 0 +assist 0 +magic 0 +could 0 +come 0 +whatev 0 +goeth 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..49e06073 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,119 @@ +home 1 +inform 1 +comput 1 +page 1 +doug 1 +zongker 1 +research 1 +contact 1 +todai 1 +wast 1 +time 1 +sarcasm 1 +actual 1 +scienc 1 +univers 1 +depart 1 +homepag 1 +anoth 0 +unnecessari 0 +pagececi 0 +well 0 +much 0 +anywai 0 +sure 0 +casual 0 +mention 0 +name 0 +buti 0 +feel 0 +strongli 0 +bold 0 +noless 0 +exhaust 0 +list 0 +usual 0 +public 0 +classeshow 0 +meth 0 +unusu 0 +toxic 0 +custard 0 +workshop 0 +filesth 0 +mento 0 +galleryvisit 0 +supercolliderth 0 +useless 0 +date 0 +cron 0 +player 0 +databas 0 +test 0 +ground 0 +caveat 0 +user 0 +lucki 0 +brows 0 +avirtu 0 +intend 0 +treasur 0 +trove 0 +whichmai 0 +us 0 +realli 0 +first 0 +year 0 +grad 0 +student 0 +engineeringdepart 0 +ofwashington 0 +graduat 0 +michigan 0 +state 0 +imajor 0 +andminor 0 +math 0 +dubiou 0 +honorsjunior 0 +apprentic 0 +keeper 0 +brotherhood 0 +crouton 0 +death 0 +cart 0 +pizzicato 0 +intern 0 +club 0 +member 0 +bryan 0 +worst 0 +execut 0 +vice 0 +presid 0 +charg 0 +emerg 0 +backup 0 +clicker 0 +cruis 0 +highwai 0 +inhigh 0 +gear 0 +sit 0 +buttstar 0 +screen 0 +tast 0 +background 0 +stolen 0 +labor 0 +wheremi 0 +sister 0 +work 0 +dougz 0 +washington 0 +class 0 +last 0 +edit 0 +thursdai 0 +novemb 0 +hit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..bfe3821b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,92 @@ +system 2 +oper 2 +dylan 1 +interact 1 +implement 1 +schedul 1 +work 1 +mcname 1 +washington 1 +research 1 +applic 1 +perform 1 +activ 1 +project 1 +current 1 +oodb 1 +us 1 +java 1 +jame 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +person 0 +inform 0 +concentr 0 +applicationsand 0 +mechanismthat 0 +allow 0 +page 0 +replacementpolici 0 +kernel 0 +polici 0 +caus 0 +poorli 0 +help 0 +machoper 0 +mechan 0 +thathelp 0 +user 0 +level 0 +thread 0 +properli 0 +kernelthread 0 +experi 0 +lead 0 +spin 0 +folk 0 +build 0 +architecturethat 0 +gener 0 +idea 0 +tailor 0 +carri 0 +start 0 +opal 0 +thesi 0 +investig 0 +betweenobject 0 +orient 0 +databas 0 +virtual 0 +memori 0 +demonstr 0 +signific 0 +improv 0 +achiv 0 +commod 0 +differ 0 +done 0 +addit 0 +improvementscan 0 +come 0 +modifi 0 +slightli 0 +betterserv 0 +paperscv 0 +cours 0 +geoff 0 +voelker 0 +built 0 +winter 0 +quarter 0 +seminar 0 +dedic 0 +gave 0 +lectureintroduc 0 +languag 0 +environ 0 +slide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..74db56fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,32 @@ +christoph 1 +lewi 1 +graduat 1 +student 1 +washington 1 +home 0 +page 0 +dept 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +echri 0 +hello 0 +glad 0 +could 0 +make 0 +work 0 +program 0 +languag 0 +project 0 +offic 0 +hour 0 +tent 0 +mondai 0 +wednesdai 0 +sieg 0 +last 0 +modifi 0 +thur 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..056d1949 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page 0 +blank 0 +ecrock 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..50bc3c47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,118 @@ +comput 1 +home 1 +page 1 +know 1 +washington 1 +well 1 +peopl 1 +time 1 +includ 1 +current 1 +work 1 +compil 1 +want 1 +place 1 +mail 1 +find 1 +eddi 0 +hong 0 +second 0 +year 0 +graduat 0 +scienc 0 +studentat 0 +univers 0 +tosai 0 +busi 0 +type 0 +littl 0 +hand 0 +hadto 0 +resum 0 +link 0 +postcriptand 0 +plain 0 +text 0 +form 0 +offic 0 +room 0 +seig 0 +hall 0 +anna 0 +karlin 0 +craig 0 +chamber 0 +theoret 0 +model 0 +dynam 0 +specif 0 +workingon 0 +develop 0 +line 0 +algorithm 0 +fordynam 0 +plan 0 +qual 0 +project 0 +access 0 +sinc 0 +august 0 +histor 0 +fact 0 +free 0 +activit 0 +vine 0 +branchesmi 0 +sister 0 +help 0 +creat 0 +also 0 +list 0 +variou 0 +anoth 0 +interest 0 +keep 0 +industri 0 +site 0 +give 0 +insight 0 +commentari 0 +happen 0 +knowof 0 +pleas 0 +daveneti 0 +power 0 +macintosh 0 +guess 0 +make 0 +bias 0 +towardslik 0 +mac 0 +howev 0 +think 0 +better 0 +eveneasi 0 +come 0 +sometim 0 +visit 0 +appl 0 +check 0 +seattl 0 +freewai 0 +traffic 0 +look 0 +advic 0 +import 0 +book 0 +worldher 0 +us 0 +inform 0 +alwai 0 +found 0 +address 0 +domain 0 +name 0 +countri 0 +friend 0 +stand 0 +edhong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..115536d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,74 @@ +compil 1 +egger 1 +research 1 +project 1 +washington 1 +share 1 +asplo 1 +susan 1 +architectur 1 +current 1 +dynam 1 +schedul 1 +time 1 +http 0 +home 0 +depart 0 +computersci 0 +engin 0 +univers 0 +seattl 0 +voic 0 +email 0 +offic 0 +sieg 0 +hall 0 +interest 0 +comput 0 +back 0 +emphasi 0 +onexperiment 0 +perform 0 +analysi 0 +work 0 +issu 0 +incompil 0 +optim 0 +data 0 +optimizationsand 0 +instruct 0 +processor 0 +design 0 +multithreadedarchitectur 0 +algorithm 0 +reduc 0 +fals 0 +multithread 0 +spinprevi 0 +cach 0 +coher 0 +code 0 +prefetch 0 +memori 0 +machin 0 +miscellan 0 +tool 0 +workload 0 +new 0 +program 0 +committe 0 +call 0 +paper 0 +homepag 0 +inform 0 +look 0 +click 0 +list 0 +might 0 +qual 0 +amast 0 +degre 0 +begin 0 +thesi 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..e0938703 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,197 @@ +system 2 +spin 2 +oper 2 +extens 2 +washington 1 +univers 1 +safe 1 +dynam 1 +implement 1 +protect 1 +princeton 1 +work 1 +thread 1 +link 1 +kernel 1 +interfac 1 +support 1 +perform 1 +us 1 +describ 1 +paper 1 +wcsss 1 +softwar 1 +emin 1 +comput 1 +scienc 1 +current 1 +year 1 +summer 1 +jersei 1 +develop 1 +schedul 1 +domain 1 +also 1 +wrote 1 +call 1 +novel 1 +aspect 1 +provid 1 +high 1 +fine 1 +grain 1 +share 1 +time 1 +code 1 +data 1 +design 1 +simul 1 +mechan 1 +modula 1 +microkernel 1 +applic 1 +specif 1 +servic 1 +workshop 1 +sirer 0 +sirereg 0 +depart 0 +engin 0 +seattl 0 +backgroundi 0 +third 0 +graduat 0 +student 0 +grew 0 +istanbul 0 +turkei 0 +receiv 0 +toward 0 +spinproject 0 +prof 0 +brian 0 +bershad 0 +spent 0 +bell 0 +labswork 0 +theplan 0 +help 0 +build 0 +prototyp 0 +displai 0 +thesumm 0 +research 0 +center 0 +recent 0 +thevesta 0 +project 0 +projectsmi 0 +goal 0 +adapt 0 +synchron 0 +andprotect 0 +subsystem 0 +machin 0 +specificaspect 0 +interrupt 0 +path 0 +andsom 0 +miscellan 0 +alarm 0 +mach 0 +compat 0 +mean 0 +ofextend 0 +arbitrari 0 +schedulingpolici 0 +allowsu 0 +achiev 0 +strand 0 +isdesign 0 +fault 0 +isol 0 +allowsisol 0 +withconflict 0 +symbol 0 +simultan 0 +activ 0 +hide 0 +beassur 0 +possibl 0 +access 0 +clincher 0 +extensionsthat 0 +want 0 +protectionenforc 0 +overhead 0 +performanceweb 0 +server 0 +networkingstack 0 +main 0 +object 0 +reduc 0 +http 0 +latenc 0 +andminim 0 +load 0 +mip 0 +instruct 0 +coupl 0 +calledmipsi 0 +robust 0 +enough 0 +spec 0 +benchmark 0 +standard 0 +ofnew 0 +educ 0 +tool 0 +researchplatform 0 +page 0 +mipsi 0 +featuresand 0 +avail 0 +namespac 0 +manag 0 +write 0 +experi 0 +safeti 0 +sosp 0 +issu 0 +hoto 0 +posit 0 +compar 0 +hardwar 0 +sigop 0 +european 0 +review 0 +version 0 +technic 0 +report 0 +march 0 +measur 0 +limit 0 +parallel 0 +senior 0 +independ 0 +june 0 +talkslanguag 0 +slide 0 +present 0 +first 0 +compil 0 +tucson 0 +arizona 0 +interestswhenev 0 +find 0 +opportun 0 +follow 0 +sail 0 +windsurf 0 +dive 0 +ski 0 +bikingmak 0 +outdoor 0 +cloth 0 +andhik 0 +dylan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..dbb86bbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,140 @@ +page 1 +project 1 +eric 1 +comput 1 +washington 1 +seattl 1 +prof 1 +method 1 +work 1 +home 1 +scienc 1 +univers 1 +longer 1 +black 1 +recent 1 +solut 1 +implicit 1 +solv 1 +signal 1 +like 1 +everybodi 1 +els 1 +anderson 0 +andersonwher 0 +find 0 +sieg 0 +hall 0 +depart 0 +engin 0 +street 0 +honor 0 +june 0 +decisionin 0 +aclu 0 +reno 0 +mind 0 +thedecis 0 +mere 0 +interim 0 +step 0 +could 0 +read 0 +take 0 +probabl 0 +still 0 +commit 0 +feloni 0 +care 0 +speech 0 +freedom 0 +themarketplac 0 +idea 0 +fact 0 +third 0 +year 0 +graduat 0 +student 0 +mean 0 +imostli 0 +panic 0 +qual 0 +also 0 +try 0 +write 0 +paper 0 +henri 0 +greensideof 0 +duke 0 +finish 0 +master 0 +thesi 0 +onsteadi 0 +state 0 +particular 0 +nonlinear 0 +biharmon 0 +stabil 0 +criterion 0 +explicit 0 +restrict 0 +fourth 0 +power 0 +spatial 0 +resolut 0 +timesteppingmethod 0 +backward 0 +euler 0 +necessari 0 +numer 0 +analysisissu 0 +involv 0 +newton 0 +nonlinearequ 0 +spars 0 +matrix 0 +newtonstep 0 +interplai 0 +pictur 0 +realli 0 +spiffi 0 +bodi 0 +code 0 +astrophys 0 +simul 0 +support 0 +data 0 +structuresbi 0 +richard 0 +andersoni 0 +process 0 +music 0 +aim 0 +automat 0 +transcript 0 +acoust 0 +anna 0 +karlin 0 +isth 0 +musician 0 +interest 0 +applet 0 +first 0 +link 0 +text 0 +small 0 +graphic 0 +section 0 +materi 0 +preparedfor 0 +last 0 +fall 0 +snapshot 0 +mostli 0 +famili 0 +prove 0 +brother 0 +final 0 +weather 0 +meander 0 +sept 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..1f3eca38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,197 @@ +softbot 2 +learn 2 +inform 2 +internet 1 +servic 1 +master 1 +thesi 1 +machin 1 +etzioni 1 +metacrawl 1 +home 1 +univers 1 +washington 1 +search 1 +comparison 1 +agent 1 +program 1 +oren 1 +comput 1 +research 1 +enabl 1 +softwar 1 +page 1 +effici 1 +aaai 1 +world 1 +multi 1 +us 1 +juli 1 +intellig 1 +robot 1 +understand 1 +plan 1 +paper 1 +student 1 +advis 1 +hewlett 1 +packard 1 +databas 1 +repositori 1 +irvin 1 +knowledg 1 +discoveri 1 +contain 1 +relev 1 +pageoren 0 +pagedepart 0 +scienc 0 +engin 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +brief 0 +bioand 0 +photo 0 +current 0 +human 0 +user 0 +state 0 +heor 0 +want 0 +accomplish 0 +disambigu 0 +request 0 +anddynam 0 +determin 0 +satisfi 0 +finalist 0 +discoveraward 0 +technolog 0 +innov 0 +field 0 +searchmultipl 0 +indic 0 +parallel 0 +provid 0 +sophist 0 +pruningopt 0 +netrecommend 0 +choic 0 +ahoi 0 +white 0 +locatesindividu 0 +high 0 +accuraci 0 +bruteforc 0 +brute 0 +analyz 0 +hypothes 0 +second 0 +whenrun 0 +sparc 0 +select 0 +public 0 +gather 0 +foc 0 +move 0 +food 0 +chain 0 +deploi 0 +theweb 0 +ascal 0 +shop 0 +wide 0 +autonom 0 +postscript 0 +html 0 +base 0 +interfac 0 +cacm 0 +fact 0 +fiction 0 +forecast 0 +ieee 0 +expert 0 +august 0 +without 0 +repli 0 +brook 0 +magazin 0 +decemb 0 +ijcai 0 +sound 0 +close 0 +reason 0 +toappear 0 +first 0 +addit 0 +richardseg 0 +bernard 0 +fileretriev 0 +neal 0 +lesh 0 +planner 0 +unix 0 +keith 0 +golden 0 +universalquantif 0 +incomplet 0 +terranc 0 +goan 0 +error 0 +mikeperkowitz 0 +erik 0 +selberg 0 +zamir 0 +jonathan 0 +shake 0 +undergradu 0 +stephen 0 +soderland 0 +umass 0 +amherst 0 +roomi 0 +bruce 0 +lesourd 0 +robert 0 +spiger 0 +lockhe 0 +center 0 +william 0 +alford 0 +wisconsin 0 +greg 0 +fitchenholtz 0 +guido 0 +hunt 0 +dymitr 0 +mozdyniewicz 0 +quark 0 +resourc 0 +minecontain 0 +neuroprosearch 0 +recent 0 +neural 0 +network 0 +illinoi 0 +induct 0 +group 0 +statlib 0 +data 0 +algorithm 0 +statist 0 +learningtoolbox 0 +bonn 0 +german 0 +list 0 +usenet 0 +faq 0 +access 0 +count 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..921d4e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,35 @@ +feelei 1 +mike 1 +comput 1 +home 0 +pagemik 0 +scienc 0 +finish 0 +done 0 +soon 0 +thesi 0 +concern 0 +global 0 +memori 0 +manag 0 +workstationclust 0 +also 0 +work 0 +distribut 0 +opalproject 0 +join 0 +faculti 0 +univers 0 +british 0 +columbia 0 +injanuari 0 +inform 0 +avail 0 +us 0 +link 0 +papersmi 0 +research 0 +summarycvsoutheast 0 +idaholast 0 +modifi 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..a5120e66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,20 @@ +fisher 1 +page 1 +washington 1 +chri 1 +home 1 +pictur 0 +fisherdepart 0 +comput 0 +scienc 0 +engineeringbox 0 +univers 0 +seattl 0 +voic 0 +mail 0 +sieg 0 +hall 0 +room 0 +current 0 +construct 0 +return 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..7064b67e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,73 @@ +comput 1 +washington 1 +place 1 +might 1 +depart 0 +scienc 0 +engineeringunivers 0 +seattl 0 +sieg 0 +hall 0 +schedulethi 0 +quarter 0 +autumn 0 +ta 0 +cours 0 +rather 0 +work 0 +gener 0 +exam 0 +check 0 +scheduleto 0 +otherwis 0 +around 0 +probablyb 0 +found 0 +librari 0 +somewher 0 +nice 0 +read 0 +paper 0 +research 0 +activitiesmi 0 +main 0 +interest 0 +algorithm 0 +specif 0 +areasof 0 +parallel 0 +geometri 0 +public 0 +meander 0 +denni 0 +outta 0 +mind 0 +vista 0 +pea 0 +music 0 +site 0 +chateau 0 +galleri 0 +fund 0 +drive 0 +thing 0 +alec 0 +wolman 0 +server 0 +seven 0 +lost 0 +soul 0 +captur 0 +html 0 +listen 0 +phone 0 +booth 0 +mofo 0 +peopl 0 +luci 0 +paul 0 +peach 0 +ruel 0 +look 0 +like 0 +moment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..05a76fb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,84 @@ +comput 1 +research 1 +scienc 1 +interest 1 +univers 1 +modugno 1 +home 1 +washington 1 +human 1 +formal 1 +model 1 +softwar 1 +current 1 +activ 1 +chair 1 +carnegi 1 +mellon 1 +francesmari 0 +pagefrancesmari 0 +page 0 +depart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +sieg 0 +hall 0 +phone 0 +mail 0 +main 0 +interact 0 +also 0 +user 0 +program 0 +specif 0 +verif 0 +technolog 0 +polici 0 +welcom 0 +opportun 0 +collabor 0 +relat 0 +topic 0 +project 0 +safeti 0 +machin 0 +interfac 0 +previou 0 +public 0 +avail 0 +onlin 0 +summari 0 +ofmi 0 +thesi 0 +real 0 +time 0 +concurr 0 +distribut 0 +system 0 +parallel 0 +algorthim 0 +profession 0 +basic 0 +symposium 0 +uist 0 +demonstr 0 +educ 0 +march 0 +august 0 +mathemat 0 +cornel 0 +anyth 0 +recent 0 +includecycl 0 +ski 0 +languag 0 +cultur 0 +spanish 0 +previouslyitalian 0 +vegetarian 0 +cook 0 +elleri 0 +line 0 +greet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..9645aa55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,45 @@ +forman 1 +georg 1 +home 1 +comput 1 +pagegeorg 0 +pagei 0 +receiv 0 +scienc 0 +optim 0 +compil 0 +ariadn 0 +scalabl 0 +pattern 0 +match 0 +parallel 0 +trace 0 +debugg 0 +public 0 +mobil 0 +hyperlink 0 +librari 0 +someth 0 +interest 0 +free 0 +handi 0 +softwar 0 +script 0 +written 0 +word 0 +puzzl 0 +water 0 +song 0 +chang 0 +netscap 0 +anim 0 +gforman 0 +comhom 0 +page 0 +mail 0 +finger 0 +weather 0 +dept 0 +live 0 +pictur 0 +gener 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..de639742 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,48 @@ +page 1 +friedman 1 +marc 1 +home 1 +english 1 +research 1 +keith 1 +cours 0 +watercolor 0 +applet 0 +camp 0 +checklist 0 +spanish 0 +collabor 0 +dictionari 0 +poetri 0 +favorit 0 +quot 0 +link 0 +elsewher 0 +occam 0 +inform 0 +gather 0 +agent 0 +golden 0 +wordbot 0 +bike 0 +trip 0 +artifici 0 +intellig 0 +codi 0 +kwok 0 +weld 0 +ucpop 0 +planner 0 +tool 0 +chang 0 +life 0 +work 0 +nietzschein 0 +netscap 0 +bookmark 0 +file 0 +everi 0 +refer 0 +visitor 0 +sinc 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..494c1f29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,28 @@ +garrett 1 +charli 1 +genet 1 +home 0 +page 0 +address 0 +seattl 0 +research 0 +interest 0 +compil 0 +graphic 0 +neural 0 +network 0 +algorithm 0 +game 0 +plai 0 +algorithmspap 0 +line 0 +algorithmsformerli 0 +member 0 +cecil 0 +group 0 +univers 0 +ofwashington 0 +bookshelf 0 +audio 0 +file 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..2b54f6f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,49 @@ +comput 1 +georg 1 +scienc 1 +graphic 1 +render 1 +imag 1 +winkenbach 0 +winkenbachdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +mail 0 +georgew 0 +washington 0 +eduphon 0 +interest 0 +multimedia 0 +thesi 0 +work 0 +doneund 0 +supervis 0 +professor 0 +david 0 +salesin 0 +deal 0 +appli 0 +tradit 0 +illustr 0 +techniqu 0 +theautomat 0 +three 0 +dimension 0 +model 0 +imagescr 0 +prototyp 0 +system 0 +found 0 +link 0 +follow 0 +galleri 0 +grail 0 +laboratori 0 +depart 0 +engin 0 +wife 0 +home 0 +page 0 +taweewan 0 +siwadun 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..74a5b20c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,211 @@ +home 2 +page 2 +washington 1 +scienc 1 +configur 1 +duke 1 +file 1 +badro 1 +seattl 1 +graduat 1 +mathemat 1 +work 1 +engin 1 +univers 1 +pagecs 1 +link 1 +comput 1 +greg 1 +welcom 1 +email 1 +recent 1 +spring 1 +part 1 +time 1 +cours 1 +first 1 +competit 1 +emac 1 +readm 1 +archiv 1 +zshell 1 +chronicl 1 +newspap 1 +articl 1 +especi 1 +jackson 1 +joel 1 +interest 1 +languag 1 +nation 1 +foundat 1 +pagegreg 0 +last 0 +updat 0 +eduaddress 0 +nesbit 0 +hello 0 +homepag 0 +pleas 0 +feel 0 +free 0 +send 0 +feedback 0 +address 0 +alwai 0 +isuppos 0 +construct 0 +keep 0 +check 0 +back 0 +excitingfeatur 0 +dukeunivers 0 +complet 0 +degre 0 +doubl 0 +major 0 +incomput 0 +amcurr 0 +emploi 0 +senior 0 +research 0 +scientist 0 +fortransworld 0 +numer 0 +small 0 +compani 0 +origin 0 +locat 0 +indurham 0 +headquart 0 +bermuda 0 +myapart 0 +also 0 +full 0 +student 0 +computersci 0 +depart 0 +ofwashington 0 +fall 0 +softwar 0 +seminarcs 0 +compil 0 +seminar 0 +newer 0 +stuff 0 +philosophi 0 +note 0 +misc 0 +patch 0 +fvwm 0 +place 0 +best 0 +show 0 +redhat 0 +desktop 0 +entri 0 +transworldnumer 0 +ieeenat 0 +program 0 +victori 0 +vertic 0 +winter 0 +issu 0 +magazin 0 +contain 0 +geneticalgorithm 0 +person 0 +rsum 0 +data 0 +date 0 +busi 0 +sampl 0 +drew 0 +bycomput 0 +simpl 0 +magic 0 +creat 0 +canterburi 0 +progress 0 +variou 0 +random 0 +pictur 0 +life 0 +definitelynot 0 +mani 0 +hobbi 0 +includ 0 +tenni 0 +ski 0 +hole 0 +volleybal 0 +juggl 0 +piano 0 +plai 0 +game 0 +rubik 0 +cube 0 +linux 0 +freewar 0 +unix 0 +music 0 +sarahmclachlan 0 +billi 0 +yahoo 0 +list 0 +parliamentari 0 +procedur 0 +ncaa 0 +basketbal 0 +lyco 0 +search 0 +commun 0 +daili 0 +univ 0 +unoffici 0 +microsoft 0 +corpor 0 +world 0 +wide 0 +server 0 +gatewai 0 +user 0 +group 0 +histor 0 +imag 0 +hotjava 0 +global 0 +network 0 +navig 0 +perl 0 +practic 0 +extract 0 +report 0 +virtual 0 +librari 0 +inter 0 +unif 0 +devic 0 +connect 0 +write 0 +html 0 +sgml 0 +seinfeld 0 +index 0 +friend 0 +sitcom 0 +materi 0 +base 0 +upon 0 +support 0 +fellowship 0 +opinion 0 +find 0 +conclus 0 +recommend 0 +express 0 +public 0 +author 0 +necessarili 0 +reflect 0 +view 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..04abfb0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,294 @@ +java 2 +applet 2 +linden 1 +gamelan 1 +page 1 +scienc 1 +webview 1 +cool 1 +graduat 1 +comput 1 +us 1 +tree 1 +model 1 +program 1 +univers 1 +softwar 1 +interest 1 +jar 1 +staff 1 +pick 1 +work 1 +movi 1 +qualiti 1 +greg 1 +third 1 +year 1 +polit 1 +doubl 1 +look 1 +posit 1 +develop 1 +link 1 +enter 1 +found 1 +travel 1 +elicit 1 +flight 1 +prefer 1 +wasrat 1 +andwa 1 +repositori 1 +also 1 +book 1 +sorri 1 +time 1 +code 1 +submit 1 +ucsd 1 +project 1 +life 1 +artifici 1 +neuralnetwork 1 +learn 1 +user 1 +iter 1 +prison 1 +imag 1 +headless 1 +horseman 1 +transpar 1 +reflect 1 +made 1 +avail 1 +washington 1 +home 0 +lindenmi 0 +love 0 +wifecorina 0 +current 0 +scienceher 0 +thecomput 0 +depart 0 +ofwashington 0 +slave 0 +awai 0 +toward 0 +lofti 0 +goal 0 +complet 0 +undergraduatedegre 0 +california 0 +diego 0 +anodd 0 +major 0 +go 0 +leav 0 +school 0 +decemb 0 +mactiv 0 +check 0 +resum 0 +allow 0 +orset 0 +addit 0 +famili 0 +altavistawebviewand 0 +metawebview 0 +instead 0 +keyword 0 +foraltavista 0 +metacrawl 0 +hit 0 +search 0 +servic 0 +return 0 +graph 0 +displai 0 +searchservic 0 +autom 0 +assist 0 +emul 0 +dialog 0 +travelag 0 +client 0 +gradual 0 +whileallow 0 +brows 0 +real 0 +data 0 +research 0 +prototyp 0 +quit 0 +function 0 +even 0 +earli 0 +stage 0 +highli 0 +rate 0 +andjar 0 +altavistawebview 0 +winner 0 +thejava 0 +contest 0 +publish 0 +walsh 0 +foundat 0 +meilleur 0 +best 0 +first 0 +linear 0 +ballet 0 +oop 0 +capabl 0 +browser 0 +sourc 0 +demonstr 0 +buffer 0 +avoid 0 +flicker 0 +thread 0 +give 0 +run 0 +certainli 0 +could 0 +cleaner 0 +though 0 +expect 0 +thought 0 +might 0 +enough 0 +standardsto 0 +impress 0 +mylgramm 0 +particl 0 +draw 0 +lgrammer 0 +much 0 +realist 0 +theparticletre 0 +recent 0 +start 0 +judg 0 +evalu 0 +thejar 0 +archiv 0 +summer 0 +dawn 0 +civil 0 +ademonstr 0 +applic 0 +show 0 +plan 0 +techniqu 0 +cansuccessfulli 0 +appli 0 +entertain 0 +myriadsoftwar 0 +professor 0 +belew 0 +filippo 0 +menzer 0 +latentenergi 0 +environ 0 +tool 0 +developingartifici 0 +experi 0 +evolutionari 0 +enviro 0 +paper 0 +hank 0 +lesh 0 +theautom 0 +assit 0 +majeski 0 +spitzer 0 +localizedinteract 0 +spatial 0 +constraint 0 +dilemma 0 +associ 0 +econom 0 +scientist 0 +krishnamoorthi 0 +paturi 0 +blume 0 +liden 0 +esen 0 +hardwaretradeoff 0 +boolean 0 +concept 0 +world 0 +congress 0 +recurr 0 +neural 0 +network 0 +sdilemma 0 +unpublish 0 +honor 0 +thesi 0 +adam 0 +carlson 0 +sujai 0 +parekh 0 +wrote 0 +funrai 0 +tracer 0 +ofth 0 +inc 0 +graphic 0 +closeup 0 +chess 0 +duel 0 +assembl 0 +requir 0 +sphere 0 +withreflect 0 +shadow 0 +distribut 0 +trace 0 +adaptivesampl 0 +mess 0 +thing 0 +pattern 0 +thespher 0 +causingth 0 +rai 0 +refract 0 +multipl 0 +surfaceand 0 +intern 0 +second 0 +anim 0 +call 0 +strike 0 +theanim 0 +written 0 +inventor 0 +manipul 0 +thed 0 +origin 0 +file 0 +alow 0 +quicktim 0 +mbquicktim 0 +doesn 0 +compress 0 +anyfurth 0 +least 0 +anyth 0 +resembl 0 +reason 0 +stuff 0 +dilbert 0 +cognit 0 +info 0 +occasion 0 +chateau 0 +guggenheim 0 +annex 0 +engin 0 +seattl 0 +glinden 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..ec016eea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,27 @@ +grant 1 +brian 1 +kri 1 +home 1 +pagebrian 0 +awai 0 +homework 0 +relat 0 +infowork 0 +backgrounduwdynam 0 +compil 0 +groupuw 0 +depart 0 +computersci 0 +engineeringperson 0 +stuffperson 0 +backgroundmi 0 +daughter 0 +isismi 0 +trip 0 +singaporemi 0 +bookmarksmi 0 +public 0 +keylast 0 +updat 0 +octob 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..84229d79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,98 @@ +grove 1 +spend 1 +us 1 +dave 1 +washington 1 +offic 1 +plai 1 +cecil 1 +author 1 +trip 1 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +chateau 0 +sieg 0 +worki 0 +time 0 +pure 0 +object 0 +orient 0 +langaug 0 +vehicl 0 +integr 0 +research 0 +area 0 +languag 0 +design 0 +program 0 +environ 0 +optim 0 +compil 0 +also 0 +hord 0 +consult 0 +hang 0 +aroundth 0 +fring 0 +spinproject 0 +actual 0 +attempt 0 +graduat 0 +sometim 0 +soonish 0 +much 0 +less 0 +frequent 0 +paper 0 +wouldn 0 +complet 0 +without 0 +dilbertfix 0 +strip 0 +thathit 0 +littl 0 +close 0 +home 0 +current 0 +manag 0 +underacheiv 0 +fantasi 0 +footbal 0 +team 0 +summer 0 +hampshir 0 +work 0 +gui 0 +scoutreserv 0 +greaterlowel 0 +council 0 +pictur 0 +casunset 0 +taken 0 +right 0 +cabin 0 +kick 0 +anoth 0 +everi 0 +boi 0 +someth 0 +silli 0 +white 0 +water 0 +raft 0 +especi 0 +cool 0 +month 0 +toronto 0 +drove 0 +back 0 +toseattl 0 +took 0 +number 0 +detour 0 +along 0 +somehihglight 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..e8df2d04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,61 @@ +fpga 2 +system 1 +design 1 +asynchron 1 +architectur 1 +hauck 1 +washington 1 +current 1 +multi 1 +rapid 1 +prototyp 1 +circuit 1 +scott 1 +univers 1 +graduat 1 +work 1 +board 1 +level 1 +well 1 +triptych 1 +montag 1 +develop 1 +comput 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +year 0 +student 0 +thoughi 0 +also 0 +interest 0 +parallel 0 +june 0 +person 0 +biographi 0 +educ 0 +experi 0 +public 0 +curriculum 0 +vitaeresearch 0 +survei 0 +methodolog 0 +first 0 +improv 0 +densiti 0 +commerci 0 +springbok 0 +partit 0 +assign 0 +rout 0 +topolog 0 +gener 0 +chinook 0 +project 0 +hardwar 0 +softwar 0 +synthesi 0 +simul 0 +embed 0 +applic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..75fd03c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin 0 +hinshaw 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..92139e30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,66 @@ +macduff 1 +place 1 +week 1 +time 1 +also 1 +stuff 1 +ultrasound 0 +imag 0 +emma 0 +elspeth 0 +name 0 +subject 0 +chang 0 +without 0 +notic 0 +unborn 0 +daughter 0 +fromconcept 0 +ripe 0 +inmid 0 +decemb 0 +view 0 +profil 0 +ly 0 +back 0 +lookingup 0 +head 0 +right 0 +upper 0 +half 0 +torso 0 +theleft 0 +busi 0 +obsess 0 +impend 0 +fatherhood 0 +master 0 +thesi 0 +part 0 +chinook 0 +project 0 +pass 0 +silli 0 +possibl 0 +rememb 0 +myspam 0 +unfortun 0 +wast 0 +html 0 +brows 0 +around 0 +interest 0 +usingwebcrawl 0 +pointer 0 +neat 0 +frogstv 0 +nationpenn 0 +tellermus 0 +lyricsian 0 +washington 0 +dept 0 +comput 0 +scienc 0 +engin 0 +univ 0 +washingtonseattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..486a5710 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,145 @@ +memori 2 +network 2 +page 1 +jamrozik 1 +system 1 +size 1 +washington 1 +global 1 +high 1 +speed 1 +reduc 1 +disk 1 +access 1 +transfer 1 +latenc 1 +research 1 +distribut 1 +object 1 +orient 1 +languag 1 +levi 1 +vernon 1 +karlin 1 +feelei 1 +voelker 1 +cach 1 +need 1 +node 1 +remot 1 +provid 1 +subpag 1 +environ 1 +pictur 1 +herv 0 +jamrozikherv 0 +postdoc 0 +univers 0 +sinc 0 +septemb 0 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +sieg 0 +hall 0 +phone 0 +mail 0 +main 0 +interest 0 +softwar 0 +engin 0 +current 0 +work 0 +memoi 0 +manag 0 +workstat 0 +cluster 0 +hank 0 +mari 0 +anna 0 +mike 0 +geoff 0 +greatli 0 +encourag 0 +virtual 0 +file 0 +therebi 0 +intens 0 +applic 0 +primari 0 +lightli 0 +load 0 +temporari 0 +back 0 +store 0 +introduc 0 +level 0 +hierarchi 0 +name 0 +li 0 +logic 0 +local 0 +fundament 0 +unit 0 +perform 0 +factor 0 +recent 0 +modern 0 +processor 0 +increas 0 +order 0 +coverag 0 +amort 0 +cost 0 +unfortun 0 +small 0 +trend 0 +thu 0 +odd 0 +studi 0 +mean 0 +us 0 +evan 0 +inproceed 0 +seventh 0 +confer 0 +architectur 0 +support 0 +program 0 +oper 0 +octob 0 +postscript 0 +thesi 0 +debug 0 +theuniversit 0 +joseph 0 +fourier 0 +grenobl 0 +involv 0 +guideproject 0 +laboratoir 0 +bull 0 +imag 0 +part 0 +imaginstitut 0 +extrem 0 +peopl 0 +area 0 +snot 0 +visit 0 +louvr 0 +galleri 0 +look 0 +map 0 +franc 0 +europ 0 +world 0 +somefamili 0 +somefriend 0 +eduv 0 +march 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..4b12eb70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,25 @@ +jason 1 +secoski 1 +washington 1 +weather 1 +home 0 +pagejason 0 +eduaddress 0 +comput 0 +scienc 0 +engin 0 +depart 0 +sieg 0 +hall 0 +cunivers 0 +boxseattl 0 +offic 0 +frequent 0 +us 0 +page 0 +projectseattl 0 +forecast 0 +channel 0 +secoskylast 0 +modifi 0 +thursdai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..78b95fa2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,82 @@ +applet 1 +jeremi 1 +baer 1 +educ 1 +stuff 1 +current 1 +comput 1 +washington 1 +softwar 1 +project 1 +baerjeremi 0 +never 0 +school 0 +interfer 0 +mark 0 +twain 0 +dream 0 +made 0 +william 0 +shakespearei 0 +graduat 0 +student 0 +scienceat 0 +univers 0 +interest 0 +includ 0 +artifici 0 +intellig 0 +human 0 +computerinteract 0 +multimedia 0 +engineeringtool 0 +gener 0 +music 0 +person 0 +creativ 0 +cool 0 +place 0 +spend 0 +signific 0 +time 0 +pierian 0 +spring 0 +softwareoregon 0 +museum 0 +scienc 0 +industri 0 +omsi 0 +pomona 0 +collegeher 0 +look 0 +mine 0 +eight 0 +puzzl 0 +java 0 +work 0 +progress 0 +experiment 0 +virtual 0 +travel 0 +copi 0 +effect 0 +demo 0 +question 0 +static 0 +layer 0 +analysi 0 +program 0 +feel 0 +stress 0 +realli 0 +silli 0 +littl 0 +macintosh 0 +thati 0 +wrote 0 +year 0 +download 0 +like 0 +metacrawl 0 +searchcopyright 0 +jbaer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..f2a4af3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,60 @@ +home 1 +washington 1 +buhler 1 +jeremi 1 +browser 1 +control 1 +offic 1 +phone 1 +jbuhler 1 +pagejeremi 0 +pagedo 0 +attempt 0 +adjust 0 +transmiss 0 +statu 0 +first 0 +year 0 +student 0 +institut 0 +univers 0 +depart 0 +comput 0 +scienc 0 +engin 0 +sieg 0 +hall 0 +mail 0 +address 0 +finger 0 +tako 0 +import 0 +stufflectur 0 +note 0 +suffix 0 +tree 0 +postscript 0 +latex 0 +research 0 +come 0 +soon 0 +cours 0 +schedulemi 0 +public 0 +keycyb 0 +activ 0 +electron 0 +frontier 0 +foundat 0 +grinsrecommend 0 +readingmi 0 +undergradu 0 +alma 0 +mater 0 +rice 0 +universityquot 0 +quotesmi 0 +page 0 +return 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..80e37de8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,270 @@ +optim 2 +compil 2 +languag 2 +research 1 +program 1 +pass 1 +techniqu 1 +seattl 1 +also 1 +cecil 1 +orient 1 +design 1 +develop 1 +vortex 1 +permit 1 +would 1 +work 1 +object 1 +implement 1 +system 1 +interest 1 +wife 1 +jeff 1 +dean 1 +depart 1 +washington 1 +offic 1 +view 1 +postscript 1 +integr 1 +larg 1 +line 1 +applic 1 +effici 1 +explor 1 +whole 1 +analysi 1 +affect 1 +guid 1 +appli 1 +exampl 1 +recent 1 +author 1 +spent 1 +biplan 1 +ride 1 +flight 1 +comput 0 +scienc 0 +engin 0 +univers 0 +chateau 0 +sieg 0 +dang 0 +build 0 +weren 0 +futur 0 +plansi 0 +plan 0 +graduat 0 +summer 0 +join 0 +western 0 +laboratori 0 +sunni 0 +palo 0 +alto 0 +bought 0 +hous 0 +nearbi 0 +menlo 0 +park 0 +curriculum 0 +vita 0 +summari 0 +teach 0 +experi 0 +projectsi 0 +primarili 0 +project 0 +pure 0 +langaug 0 +us 0 +avehicl 0 +area 0 +environ 0 +weintend 0 +scale 0 +real 0 +world 0 +andto 0 +keep 0 +true 0 +goal 0 +vortexcompil 0 +current 0 +codein 0 +much 0 +group 0 +involv 0 +track 0 +bug 0 +hang 0 +spinproject 0 +meet 0 +spin 0 +extens 0 +oper 0 +systemmicrokernel 0 +support 0 +dynam 0 +adapt 0 +interfacesand 0 +direct 0 +control 0 +stillmaintain 0 +inter 0 +isol 0 +spring 0 +quarter 0 +organ 0 +seminar 0 +concern 0 +ofobject 0 +particular 0 +usedto 0 +improv 0 +perform 0 +increment 0 +andhow 0 +interact 0 +especiallyprofil 0 +howwhol 0 +assumedthat 0 +access 0 +entir 0 +manycompromis 0 +made 0 +exist 0 +becom 0 +unnecessari 0 +lesson 0 +learn 0 +wholeprogram 0 +toward 0 +programminglanguag 0 +flexibl 0 +base 0 +underlyingimplement 0 +valid 0 +three 0 +principaldesign 0 +forobject 0 +defin 0 +independentintermedi 0 +ishigh 0 +enough 0 +level 0 +featur 0 +messagesend 0 +closur 0 +creation 0 +contain 0 +uniqu 0 +wayof 0 +compos 0 +parallel 0 +obtain 0 +better 0 +result 0 +repeatedli 0 +run 0 +passessepar 0 +intraprocedur 0 +classanalysi 0 +profil 0 +receiv 0 +class 0 +predict 0 +inlin 0 +aliasanalysi 0 +split 0 +singl 0 +combin 0 +part 0 +wai 0 +structuringoptim 0 +kind 0 +composit 0 +stillallow 0 +independ 0 +eachoth 0 +nice 0 +framework 0 +specifi 0 +iter 0 +data 0 +flowanalys 0 +client 0 +withrel 0 +littl 0 +effort 0 +dead 0 +assignmentelimin 0 +approxim 0 +code 0 +publicationssom 0 +paper 0 +personali 0 +love 0 +spici 0 +food 0 +mild 0 +four 0 +letter 0 +word 0 +coke 0 +probabl 0 +kick 0 +caffein 0 +habit 0 +enjoy 0 +moment 0 +heidi 0 +daughter 0 +victoria 0 +realli 0 +like 0 +somedai 0 +honeymoon 0 +kauai 0 +hurrican 0 +iniki 0 +galvin 0 +fly 0 +guess 0 +never 0 +anyth 0 +anymor 0 +took 0 +consist 0 +minut 0 +around 0 +downtown 0 +puget 0 +sound 0 +travel 0 +model 0 +feel 0 +dare 0 +sadli 0 +insur 0 +coverag 0 +doesn 0 +passeng 0 +walk 0 +wing 0 +back 0 +enjoi 0 +fantast 0 +even 0 +highli 0 +recommend 0 +look 0 +someth 0 +number 0 +rather 0 +lengthi 0 +hotlist 0 +jdean 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..97935d4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,176 @@ +softwar 2 +analysi 2 +deviat 2 +requir 2 +rsml 1 +comput 1 +system 1 +rees 1 +safeti 1 +specif 1 +critic 1 +scienc 1 +control 1 +state 1 +develop 1 +languag 1 +tool 1 +semant 1 +damon 1 +doctor 1 +research 1 +jdrees 1 +washington 1 +place 1 +applic 1 +advantag 1 +public 1 +exampl 1 +oper 1 +project 1 +especi 1 +base 1 +us 1 +hazard 1 +procedur 1 +hazop 1 +studi 1 +dissert 1 +make 1 +avail 1 +kurt 1 +partridg 1 +univers 1 +postscript 1 +waxahachi 1 +leveson 1 +heimdahl 1 +hildreth 1 +process 1 +ieee 1 +transact 1 +industri 1 +home 0 +pagejon 0 +reesepost 0 +groupdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +offic 0 +chateau 0 +guggenheim 0 +annex 0 +phone 0 +mail 0 +interest 0 +problem 0 +awar 0 +devic 0 +structur 0 +caus 0 +catastroph 0 +fail 0 +hardwar 0 +becom 0 +less 0 +expens 0 +expect 0 +wider 0 +rang 0 +convent 0 +technolog 0 +flexibl 0 +come 0 +price 0 +behavior 0 +complex 0 +unpredict 0 +perhap 0 +best 0 +three 0 +mile 0 +island 0 +incid 0 +great 0 +difficulti 0 +diagnos 0 +emerg 0 +stage 0 +success 0 +respect 0 +reason 0 +colleagu 0 +concentr 0 +commun 0 +call 0 +machin 0 +valid 0 +specifi 0 +tca 0 +avion 0 +thesi 0 +invent 0 +signific 0 +concept 0 +borrow 0 +henc 0 +name 0 +link 0 +html 0 +transcript 0 +current 0 +write 0 +confer 0 +articl 0 +summar 0 +group 0 +possibl 0 +dynam 0 +displai 0 +search 0 +siang 0 +integr 0 +alpha 0 +version 0 +publicli 0 +sean 0 +sandi 0 +draft 0 +document 0 +includ 0 +discuss 0 +variant 0 +improv 0 +academ 0 +histori 0 +inform 0 +california 0 +irvin 0 +linguist 0 +rice 0 +high 0 +school 0 +nanci 0 +mat 0 +holli 0 +engin 0 +septemb 0 +steven 0 +dolin 0 +curv 0 +interpret 0 +diagnost 0 +techniqu 0 +januari 0 +februari 0 +ortega 0 +experi 0 +statechart 0 +sixth 0 +intern 0 +workshop 0 +design 0 +como 0 +itali 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..9220e8ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,104 @@ +jack 2 +compil 2 +multithread 1 +simultan 1 +page 1 +level 1 +parallel 1 +instruct 1 +postscript 1 +susan 1 +egger 1 +issu 1 +processor 1 +schedul 1 +washington 1 +abstract 1 +henri 1 +levi 1 +tullsen 1 +proceed 1 +architectur 1 +examin 1 +pictur 1 +home 1 +comput 1 +current 1 +research 1 +joel 1 +emer 1 +rebecca 1 +stamm 1 +anddean 1 +implement 1 +balanc 1 +optim 1 +static 1 +dynam 1 +superscalar 1 +written 1 +report 1 +lojack 0 +lojlo 0 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +construct 0 +paper 0 +convert 0 +thread 0 +submit 0 +public 0 +juli 0 +exploit 0 +choic 0 +fetch 0 +dean 0 +annual 0 +intern 0 +symposium 0 +philadelphia 0 +first 0 +suif 0 +workshop 0 +stanford 0 +januari 0 +improv 0 +increas 0 +sigplan 0 +confer 0 +program 0 +languag 0 +design 0 +jolla 0 +california 0 +june 0 +compar 0 +gener 0 +interact 0 +loph 0 +qualifi 0 +work 0 +support 0 +interest 0 +also 0 +includ 0 +vliw 0 +well 0 +particular 0 +investig 0 +person 0 +find 0 +franklin 0 +eseattl 0 +orsieg 0 +hall 0 +room 0 +phone 0 +coupl 0 +recent 0 +paintbal 0 +experi 0 +yahoojlo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..3b4b1cb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,45 @@ +user 1 +interfac 1 +home 1 +page 1 +washington 1 +research 1 +project 1 +sherman 0 +shermanjoebob 0 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +usami 0 +interest 0 +designinform 0 +navig 0 +visual 0 +activ 0 +inform 0 +local 0 +survei 0 +useclass 0 +hcreat 0 +impress 0 +pagequ 0 +time 0 +sarahsoftballstuff 0 +might 0 +want 0 +automat 0 +suggest 0 +link 0 +relat 0 +topic 0 +directori 0 +us 0 +pagesif 0 +browser 0 +support 0 +send 0 +mail 0 +tojoebob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..a01ac7dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,61 @@ +joshua 1 +seim 1 +josh 1 +home 1 +page 1 +current 1 +comput 1 +scienc 1 +sever 1 +abstract 0 +biolog 0 +base 0 +neural 0 +network 0 +system 0 +test 0 +theunivers 0 +washington 0 +depart 0 +begunin 0 +lockean 0 +blank 0 +slate 0 +learn 0 +emul 0 +observedbehavior 0 +successfulli 0 +accomplish 0 +task 0 +graduatingfrom 0 +colleg 0 +travel 0 +volit 0 +recent 0 +start 0 +demonstr 0 +potenti 0 +independ 0 +creativ 0 +thought 0 +taskw 0 +present 0 +earn 0 +expect 0 +take 0 +year 0 +document 0 +provid 0 +overviewof 0 +cognit 0 +ambulatori 0 +achiev 0 +organ 0 +person 0 +academichierarchi 0 +addition 0 +futur 0 +work 0 +discuss 0 +within 0 +context 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..0fa025ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,4 @@ +jovan 1 +home 1 +page 1 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..965a6927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,67 @@ +joanna 1 +page 1 +academ 1 +main 1 +interest 1 +comput 1 +graphic 1 +alma 1 +biologi 1 +powerjoanna 0 +pagehi 0 +cat 0 +interestsmi 0 +scienc 0 +grad 0 +school 0 +uwneat 0 +stuff 0 +matercool 0 +link 0 +jonathan 0 +shadegraph 0 +research 0 +uwduoton 0 +reproductionmi 0 +especi 0 +genet 0 +molecular 0 +matermost 0 +recent 0 +site 0 +gain 0 +employmentpubl 0 +power 0 +brad 0 +west 0 +eric 0 +stollnitz 0 +david 0 +salesin 0 +reproduc 0 +color 0 +imag 0 +duoton 0 +proceed 0 +siggraph 0 +york 0 +real 0 +lifepast 0 +homesdiversionsgend 0 +issuesstatu 0 +women 0 +sciencenow 0 +home 0 +pagefeminist 0 +major 0 +onlineultim 0 +frisbeefun 0 +stufffroggi 0 +sean 0 +quotesbrad 0 +comic 0 +musicevan 0 +jokes 0 +pagesmi 0 +herojpow 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..e08d1b6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,16 @@ +jonathan 1 +shake 1 +washington 1 +sieg 0 +hall 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +ahoi 0 +homepag 0 +finderresumlinkslast 0 +updat 0 +august 0 +jshake 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..79fb76df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan 0 +alemanyjuan 0 +alemani 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..d84e7127 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,116 @@ +comput 2 +work 1 +scienc 1 +washington 1 +data 1 +surfac 1 +taught 1 +slide 1 +kari 1 +pulli 1 +home 1 +antero 1 +year 1 +univers 1 +graphic 1 +vision 1 +research 1 +depart 1 +project 1 +report 1 +technic 1 +engin 1 +subdivis 1 +find 1 +class 1 +theclass 1 +seattl 1 +pagekari 0 +pullii 0 +third 0 +graduat 0 +student 0 +engineeringdepart 0 +ofwashington 0 +interest 0 +andmathemat 0 +try 0 +combin 0 +aspect 0 +thesedisciplin 0 +professor 0 +closest 0 +tonyderos 0 +actual 0 +left 0 +uwfor 0 +pixar 0 +lindashapiro 0 +addition 0 +werner 0 +stuetzl 0 +andjohn 0 +mcdonald 0 +statist 0 +duchamp 0 +mathemat 0 +andhugu 0 +hopp 0 +rick 0 +szeliski 0 +microsoft 0 +qual 0 +tribor 0 +triplet 0 +base 0 +object 0 +recognitionsystem 0 +linda 0 +universityof 0 +current 0 +surfacereconstruct 0 +rang 0 +multipl 0 +baselin 0 +camerasystem 0 +obtain 0 +waveletanalysi 0 +geometri 0 +reflect 0 +function 0 +pass 0 +gener 0 +examin 0 +topic 0 +rigidregistr 0 +click 0 +architecturesystem 0 +susanegg 0 +distribut 0 +brianbershad 0 +imag 0 +understand 0 +steven 0 +tanimoto 0 +present 0 +sketch 0 +siggraph 0 +getto 0 +remov 0 +wavelet 0 +herear 0 +speaker 0 +note 0 +eacutesum 0 +eacut 0 +sieg 0 +hall 0 +email 0 +kapu 0 +union 0 +folk 0 +takavainionti 0 +oulu 0 +finland 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..cc97f798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,17 @@ +washington 1 +univers 1 +anna 0 +karlinanna 0 +rochel 0 +karlinassoci 0 +professor 0 +sincejuli 0 +work 0 +comput 0 +scienc 0 +engin 0 +depart 0 +seattl 0 +home 0 +page 0 +paperskarlin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..525f26ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,10 @@ +page 1 +home 1 +yeunghom 0 +yeungperson 0 +infomi 0 +picturemi 0 +researchtelnet 0 +machinessend 0 +email 0 +back 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..0cf6a95a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,83 @@ +kurt 1 +partridg 1 +softwar 1 +comput 1 +washington 1 +specif 1 +safeti 1 +postscript 1 +graduat 1 +scienc 1 +univers 1 +interact 1 +environ 1 +poster 1 +academ 0 +inform 0 +student 0 +depart 0 +interest 0 +includ 0 +engin 0 +usabl 0 +readabl 0 +applic 0 +formal 0 +method 0 +page 0 +describ 0 +other 0 +work 0 +area 0 +also 0 +dabbl 0 +user 0 +interfac 0 +design 0 +human 0 +java 0 +program 0 +recent 0 +public 0 +bddtcl 0 +visual 0 +manipul 0 +binari 0 +decisiondiagram 0 +html 0 +preview 0 +nanci 0 +leveson 0 +bauer 0 +mat 0 +heimdahl 0 +wayn 0 +ohlrich 0 +vivek 0 +ratan 0 +rees 0 +critic 0 +nasa 0 +confer 0 +qualiti 0 +background 0 +start 0 +school 0 +complet 0 +berkelei 0 +live 0 +love 0 +suburban 0 +life 0 +thousand 0 +oak 0 +parent 0 +sister 0 +name 0 +oti 0 +right 0 +humor 0 +corner 0 +seattl 0 +voic 0 +kepart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..24b2958e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,50 @@ +keith 1 +golden 1 +washington 1 +home 0 +page 0 +questa 0 +pagina 0 +anch 0 +italiano 0 +researchsoftbotsplanningkrselect 0 +publicationscurriculum 0 +vita 0 +also 0 +inpostscriptrandom 0 +hackingwordbot 0 +collabor 0 +dictionari 0 +like 0 +bicycl 0 +tour 0 +languag 0 +paint 0 +photographi 0 +natur 0 +coffe 0 +godless 0 +pinko 0 +stuff 0 +dislik 0 +suit 0 +lawyer 0 +car 0 +friend 0 +ellenmarcruben 0 +laurennickrich 0 +joannavivek 0 +advisor 0 +oren 0 +etzioni 0 +weld 0 +keithgolden 0 +depart 0 +ofcomput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +kgolden 0 +complet 0 +list 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..94477652 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,143 @@ +univers 2 +softwar 2 +kingsum 2 +hong 2 +chow 1 +kong 1 +washington 1 +evolut 1 +page 1 +engin 1 +david 1 +notkin 1 +asynchron 1 +pcct 1 +movi 1 +scienc 1 +seattl 1 +research 1 +confer 1 +resum 1 +line 1 +respons 1 +technic 1 +workshop 1 +program 1 +transform 1 +home 1 +onlin 1 +quot 1 +educomput 0 +depart 0 +usathi 0 +inform 0 +highwai 0 +alwai 0 +construct 0 +tabl 0 +content 0 +person 0 +upcom 0 +current 0 +schedul 0 +bridg 0 +glossari 0 +suggest 0 +feedbackresearchmi 0 +advisor 0 +develop 0 +toolspap 0 +qualiti 0 +manag 0 +driven 0 +readi 0 +semi 0 +automat 0 +updat 0 +applic 0 +librari 0 +chang 0 +report 0 +revis 0 +version 0 +appear 0 +icsm 0 +asia 0 +pacif 0 +march 0 +mainten 0 +proceed 0 +ics 0 +william 0 +griswold 0 +editor 0 +intern 0 +april 0 +us 0 +site 0 +sorcererpcct 0 +terrenc 0 +parr 0 +note 0 +newbiesresumepleasedrop 0 +mailto 0 +specifi 0 +text 0 +postscript 0 +format 0 +kongchines 0 +technolog 0 +polytechn 0 +citi 0 +kongsingapor 0 +sitessingapor 0 +world 0 +wide 0 +server 0 +alumnu 0 +websom 0 +campu 0 +friendstom 0 +liew 0 +fook 0 +wang 0 +jiang 0 +weidongu 0 +relatedunivers 0 +style 0 +polici 0 +manual 0 +these 0 +dissert 0 +graduat 0 +school 0 +webserv 0 +book 0 +storeinvestmentsfre 0 +minut 0 +delai 0 +watch 0 +market 0 +data 0 +experiment 0 +mutual 0 +fund 0 +chart 0 +invest 0 +center 0 +stock 0 +commod 0 +analysismisc 0 +read 0 +chines 0 +list 0 +thoma 0 +china 0 +new 0 +servic 0 +welcom 0 +visit 0 +sinc 0 +last 0 +modifi 0 +date 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..ac1bb9ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,90 @@ +univers 1 +seattl 1 +home 1 +kevin 1 +washington 1 +comput 1 +engin 1 +work 1 +chaotic 1 +rout 1 +electr 1 +pacif 1 +time 1 +bold 0 +boldingkwb 0 +scienc 0 +depart 0 +juvenil 0 +squirt 0 +wander 0 +search 0 +suitabl 0 +rock 0 +hunk 0 +coral 0 +cling 0 +make 0 +life 0 +task 0 +rudimentari 0 +nervou 0 +system 0 +find 0 +spot 0 +take 0 +root 0 +doesn 0 +need 0 +brain 0 +eat 0 +rather 0 +like 0 +get 0 +tenur 0 +dennett 0 +conscious 0 +explain 0 +research 0 +current 0 +build 0 +high 0 +speed 0 +latencylan 0 +router 0 +previou 0 +researchha 0 +chaoticrout 0 +form 0 +minim 0 +adapt 0 +formass 0 +parallel 0 +multicomput 0 +profession 0 +assist 0 +professor 0 +also 0 +part 0 +researchassoci 0 +ofwashington 0 +signific 0 +paper 0 +written 0 +archiv 0 +ofth 0 +group 0 +spend 0 +teach 0 +engineeringat 0 +person 0 +photo 0 +took 0 +comethyakutak 0 +moustach 0 +real 0 +case 0 +want 0 +visit 0 +anoth 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..ba0b8125 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,34 @@ +quarter 1 +comput 1 +washington 1 +scienc 1 +ladner 1 +introduct 1 +spring 1 +richard 0 +ladnerrichard 0 +ladnerprofessor 0 +depart 0 +engin 0 +univers 0 +seattl 0 +mail 0 +phone 0 +offic 0 +sieg 0 +hall 0 +room 0 +person 0 +short 0 +biographyresearch 0 +public 0 +studentsteachingcomput 0 +program 0 +fall 0 +commun 0 +network 0 +formal 0 +model 0 +winter 0 +data 0 +structur 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..6a5ac48e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,112 @@ +larri 1 +washington 1 +comput 1 +univers 1 +laboratori 1 +chemistri 1 +work 1 +integr 1 +system 1 +fpga 1 +mcmurchi 1 +home 1 +page 1 +scienc 1 +area 1 +function 1 +staff 1 +design 1 +develop 1 +mactest 1 +chip 1 +mcmurchiedepart 0 +engin 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +current 0 +research 0 +director 0 +integratedsystem 0 +western 0 +hework 0 +quantum 0 +graduat 0 +studi 0 +primari 0 +focu 0 +number 0 +evalu 0 +class 0 +ofintegr 0 +gaussian 0 +later 0 +appli 0 +theconstruct 0 +larg 0 +spars 0 +hamiltonian 0 +matric 0 +coauthorof 0 +comprehens 0 +packag 0 +program 0 +meld 0 +us 0 +abinitio 0 +calcul 0 +small 0 +molecul 0 +sinc 0 +join 0 +depart 0 +andengin 0 +supervis 0 +technic 0 +ofth 0 +coauthor 0 +wirec 0 +aschemat 0 +captur 0 +allow 0 +code 0 +withschemat 0 +symbol 0 +creat 0 +concis 0 +parameteriz 0 +representationof 0 +also 0 +involv 0 +andcommerci 0 +softwar 0 +hardwareenviron 0 +test 0 +board 0 +andsubsystem 0 +recent 0 +andha 0 +gener 0 +purpos 0 +perform 0 +driven 0 +router 0 +northwest 0 +cost 0 +vlsi 0 +tester 0 +triptych 0 +high 0 +densiti 0 +architectur 0 +public 0 +journal 0 +articl 0 +upcom 0 +confer 0 +return 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..5a90aa26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,207 @@ +softwar 2 +comput 2 +system 2 +leveson 2 +safeti 2 +model 1 +professor 1 +research 1 +specif 1 +aircraft 1 +analysi 1 +design 1 +engin 1 +human 1 +nanci 1 +recent 1 +project 1 +member 1 +inform 1 +paper 1 +avail 1 +machin 1 +languag 1 +gener 1 +interfac 1 +scienc 1 +washington 1 +california 1 +year 1 +area 1 +build 1 +real 1 +life 1 +topic 1 +student 1 +requir 1 +tca 1 +collis 1 +avoid 1 +anyth 1 +current 1 +work 1 +specifi 1 +includ 1 +safe 1 +fault 1 +nation 1 +council 1 +space 1 +appli 1 +rsml 1 +deriv 1 +finger 1 +home 0 +page 0 +levesondepart 0 +engineeringunivers 0 +washingtonbox 0 +express 0 +mail 0 +sieg 0 +hall 0 +seattl 0 +join 0 +faculti 0 +come 0 +search 0 +rain 0 +receiv 0 +degre 0 +mathand 0 +ucla 0 +spent 0 +form 0 +univers 0 +irvin 0 +start 0 +concern 0 +problem 0 +time 0 +failur 0 +result 0 +loss 0 +properti 0 +advantag 0 +nobodi 0 +question 0 +goal 0 +except 0 +misanthrop 0 +matter 0 +anywai 0 +produc 0 +aform 0 +commerci 0 +airspac 0 +lesson 0 +learn 0 +never 0 +like 0 +seem 0 +pleas 0 +though 0 +adopt 0 +theiroffici 0 +behavior 0 +claim 0 +thatyou 0 +read 0 +fact 0 +take 0 +train 0 +late 0 +safetyresearch 0 +also 0 +autom 0 +highwai 0 +automobil 0 +variou 0 +aerospac 0 +subtop 0 +toler 0 +verif 0 +valid 0 +editor 0 +chief 0 +ieee 0 +transact 0 +softwareengin 0 +elect 0 +board 0 +director 0 +computingresearch 0 +associ 0 +commissionon 0 +technic 0 +committe 0 +public 0 +polici 0 +chair 0 +studi 0 +evalu 0 +shuttl 0 +process 0 +levesoni 0 +fellow 0 +award 0 +aiaa 0 +systemsaward 0 +contribut 0 +aeronaut 0 +technolog 0 +andscienc 0 +develop 0 +field 0 +promotingrespons 0 +practic 0 +propertyar 0 +stake 0 +book 0 +safewar 0 +addison 0 +weslei 0 +publish 0 +list 0 +isalso 0 +copi 0 +favorit 0 +actual 0 +keynoteaddress 0 +conf 0 +melbourn 0 +titl 0 +high 0 +pressur 0 +steam 0 +click 0 +qual 0 +follow 0 +hazardanalysi 0 +techniqu 0 +writtenin 0 +state 0 +style 0 +call 0 +determin 0 +wai 0 +tree 0 +analys 0 +newrequir 0 +principl 0 +hazard 0 +control 0 +cockpit 0 +analyz 0 +accid 0 +report 0 +involv 0 +mode 0 +awar 0 +problemsand 0 +issu 0 +interact 0 +citi 0 +airport 0 +perhap 0 +contact 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..c12e1873 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,202 @@ +system 2 +levi 2 +oper 2 +comput 2 +architectur 2 +henri 1 +proc 1 +symposium 1 +hank 1 +distribut 1 +languag 1 +feelei 1 +support 1 +research 1 +multithread 1 +principl 1 +washington 1 +workstat 1 +student 1 +octob 1 +michael 1 +thekkath 1 +parallel 1 +simultan 1 +environ 1 +recent 1 +singl 1 +address 1 +space 1 +paper 1 +chair 1 +program 1 +dessert 1 +global 1 +memori 1 +karlin 1 +implement 1 +dean 1 +tullsen 1 +susan 1 +egger 1 +intern 1 +jeffrei 1 +chase 1 +lazowska 1 +novemb 1 +conf 1 +arch 1 +prog 1 +asplo 1 +home 0 +page 0 +professor 0 +join 0 +faculti 0 +current 0 +project 0 +focu 0 +particularli 0 +object 0 +base 0 +projectcal 0 +opal 0 +deal 0 +theetch 0 +projecti 0 +produc 0 +tool 0 +perform 0 +instrument 0 +optim 0 +binari 0 +execut 0 +author 0 +book 0 +numer 0 +includ 0 +outstand 0 +select 0 +four 0 +consecutiveacm 0 +symposia 0 +former 0 +sigop 0 +special 0 +interest 0 +group 0 +onoper 0 +theth 0 +tobe 0 +held 0 +hold 0 +carnegi 0 +mellon 0 +universityand 0 +univers 0 +come 0 +consult 0 +engin 0 +digit 0 +equip 0 +corpor 0 +work 0 +span 0 +rang 0 +fellow 0 +associ 0 +machineryand 0 +recipi 0 +fulbright 0 +scholar 0 +award 0 +eleven 0 +master 0 +nine 0 +survivedlevi 0 +supervis 0 +haveal 0 +escap 0 +academ 0 +posit 0 +major 0 +lab 0 +glu 0 +usual 0 +befound 0 +ski 0 +bike 0 +plai 0 +tenni 0 +help 0 +lead 0 +thedepart 0 +infam 0 +softbal 0 +team 0 +smile 0 +potato 0 +death 0 +sampl 0 +seattl 0 +mani 0 +parlor 0 +publicationsreduc 0 +network 0 +latenc 0 +us 0 +subpag 0 +jamrozik 0 +voelker 0 +evan 0 +vernon 0 +inproceed 0 +seventh 0 +confer 0 +postscript 0 +manag 0 +cluster 0 +william 0 +morgan 0 +freder 0 +pighin 0 +anna 0 +chandramohan 0 +appear 0 +decemb 0 +maxim 0 +chip 0 +annual 0 +june 0 +exploit 0 +choic 0 +instruct 0 +fetch 0 +issu 0 +implementablesimultan 0 +processor 0 +joen 0 +emer 0 +jack 0 +rebecca 0 +stamm 0 +share 0 +protect 0 +edwardd 0 +transact 0 +integr 0 +coher 0 +recover 0 +vivek 0 +narasayya 0 +first 0 +design 0 +hardwar 0 +softwar 0 +effici 0 +except 0 +handl 0 +separ 0 +data 0 +control 0 +transfer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..6da6ce02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,114 @@ +opal 2 +address 1 +structur 1 +share 1 +singl 1 +space 1 +oper 1 +system 1 +protect 1 +need 1 +complex 1 +cooper 1 +program 1 +larg 1 +data 1 +time 1 +alpha 1 +page 1 +right 1 +access 1 +inform 1 +list 1 +relat 1 +projectop 0 +project 0 +explor 0 +tunedto 0 +applic 0 +numberof 0 +manipul 0 +persist 0 +databaseof 0 +object 0 +code 0 +exist 0 +huge 0 +enhanc 0 +andcooper 0 +uniqu 0 +interpret 0 +thu 0 +pointer 0 +base 0 +directlycommun 0 +bestor 0 +directli 0 +secondari 0 +storag 0 +without 0 +translat 0 +simplifi 0 +avail 0 +addressspac 0 +provid 0 +mip 0 +risc 0 +independ 0 +thread 0 +execut 0 +within 0 +domainthat 0 +defin 0 +virtual 0 +easili 0 +transmit 0 +oneprocess 0 +anoth 0 +result 0 +much 0 +flexibl 0 +protectionstructur 0 +permit 0 +differ 0 +dynam 0 +chang 0 +option 0 +depend 0 +trust 0 +relationshipbetween 0 +parti 0 +believ 0 +organ 0 +canimprov 0 +perform 0 +cooperatingappl 0 +prototyp 0 +built 0 +platform 0 +ofth 0 +mach 0 +sourc 0 +paper 0 +faculti 0 +member 0 +hank 0 +levi 0 +lazowska 0 +jeff 0 +chase 0 +duke 0 +univers 0 +current 0 +graduat 0 +student 0 +mike 0 +feelei 0 +ashutosh 0 +tiwari 0 +vivek 0 +narasayya 0 +dylan 0 +mcname 0 +mail 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..b30bda4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,139 @@ +tutori 2 +object 1 +oopsla 1 +constraint 1 +orient 1 +languag 1 +program 1 +propos 1 +washington 1 +engin 1 +imper 1 +lopez 1 +bjorn 1 +freeman 1 +benson 1 +alan 1 +born 1 +confer 1 +accept 1 +comput 1 +scienc 1 +univers 1 +implement 1 +kaleidoscop 1 +advanc 1 +also 1 +inproceed 1 +chair 1 +technolog 1 +topic 1 +encourag 1 +submiss 1 +electron 1 +lopezgu 0 +lopezlopez 0 +school 0 +sieg 0 +hall 0 +depart 0 +seattl 0 +home 0 +student 0 +dissertationresearch 0 +design 0 +curriculum 0 +vita 0 +publicationsgu 0 +brian 0 +mayoh 0 +tougu 0 +jann 0 +penjam 0 +editor 0 +constraintprogram 0 +springer 0 +verlag 0 +nato 0 +studi 0 +instituteseri 0 +seri 0 +system 0 +publisheda 0 +technic 0 +report 0 +ident 0 +european 0 +bologna 0 +itali 0 +juli 0 +virtual 0 +machin 0 +programmingsystem 0 +applic 0 +portland 0 +oregon 0 +octob 0 +tutorialsi 0 +upcom 0 +conferencein 0 +jose 0 +california 0 +peopl 0 +andsoftwar 0 +develop 0 +meet 0 +speak 0 +well 0 +known 0 +breadth 0 +depth 0 +high 0 +qualiti 0 +itsextens 0 +previou 0 +year 0 +tutorialshav 0 +cover 0 +aspect 0 +introductorysurvei 0 +industri 0 +softwar 0 +practic 0 +lead 0 +edg 0 +academicresearch 0 +respons 0 +request 0 +past 0 +attende 0 +weespeci 0 +issu 0 +anyon 0 +consid 0 +submit 0 +requestguidelin 0 +theoopsla 0 +hotlin 0 +mail 0 +enthusiast 0 +proposalswithout 0 +email 0 +address 0 +march 0 +notif 0 +withcamera 0 +readi 0 +note 0 +august 0 +interest 0 +link 0 +green 0 +direct 0 +jimi 0 +hendrix 0 +grave 0 +star 0 +war 0 +collector 0 +archiv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..b7a9642d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,40 @@ +omid 1 +home 1 +madani 1 +washington 1 +depart 1 +page 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +chateau 0 +suit 0 +bhello 0 +curiou 0 +browser 0 +welcom 0 +fourth 0 +year 0 +graduat 0 +student 0 +enjoytheori 0 +also 0 +like 0 +keep 0 +touch 0 +areasinclud 0 +graphic 0 +life 0 +work 0 +academ 0 +want 0 +look 0 +islamicarchitectur 0 +isfahan 0 +best 0 +nomine 0 +citi 0 +countri 0 +iran 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..ed15d570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,18 @@ +mike 1 +perkowitz 1 +page 0 +perkowitznewsflash 0 +goe 0 +blond 0 +area 0 +research 0 +academia 0 +music 0 +creativ 0 +randomfavorit 0 +sheba 0 +voyeur 0 +written 0 +grooveneedl 0 +espressoresumemik 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..cd5e8fc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,64 @@ +washington 1 +langheinrich 1 +bielefeld 1 +univers 1 +depart 1 +comput 1 +marclang 1 +homepagemarc 1 +email 1 +scienc 1 +phone 1 +marc 0 +langheinrichuniversitt 0 +washingtontechnisch 0 +fakultt 0 +scienceemail 0 +imlangh 0 +techfak 0 +eduabout 0 +myselfi 0 +spent 0 +last 0 +year 0 +theunivers 0 +visit 0 +graduat 0 +student 0 +thefulbright 0 +program 0 +check 0 +follow 0 +link 0 +depthinform 0 +resum 0 +project 0 +short 0 +biopost 0 +addressa 0 +septemb 0 +back 0 +germani 0 +finish 0 +mastersat 0 +pleas 0 +contact 0 +german 0 +address 0 +homeschoolgermanyringstra 0 +maintalphon 0 +paulusplatz 0 +bielefeldphon 0 +woodlawn 0 +seattl 0 +sieg 0 +hall 0 +browser 0 +support 0 +tabl 0 +access 0 +data 0 +list 0 +formatmarc 0 +http 0 +home 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..afb961ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,109 @@ +visual 2 +marla 2 +baker 2 +comput 2 +softwar 1 +program 1 +languag 1 +work 1 +eick 1 +washington 1 +stephen 1 +system 1 +engin 1 +user 1 +interfac 1 +interact 1 +educ 1 +collabor 1 +current 1 +object 1 +wai 1 +journal 1 +burnett 1 +larg 1 +home 0 +chief 0 +editor 0 +depart 0 +scienc 0 +univers 0 +seattl 0 +partner 0 +crime 0 +bentlei 0 +academ 0 +interestsgraph 0 +human 0 +support 0 +learn 0 +cscl 0 +graphic 0 +techniqu 0 +stevetanimoto 0 +lauren 0 +bricker 0 +coimag 0 +project 0 +devleop 0 +activ 0 +order 0 +explor 0 +cooper 0 +contol 0 +goal 0 +investig 0 +differ 0 +multipl 0 +cansimultan 0 +share 0 +manipul 0 +given 0 +assess 0 +also 0 +part 0 +time 0 +packard 0 +bell 0 +compani 0 +resum 0 +publicationsbak 0 +space 0 +fill 0 +june 0 +bohu 0 +carlson 0 +yang 0 +scale 0 +ieee 0 +special 0 +issu 0 +march 0 +margaret 0 +classif 0 +septemb 0 +tool 0 +proceed 0 +intern 0 +confer 0 +sorento 0 +itali 0 +method 0 +apparatu 0 +displai 0 +hierarch 0 +inform 0 +patent 0 +applic 0 +submit 0 +octob 0 +tutori 0 +geometr 0 +transform 0 +imag 0 +metip 0 +environ 0 +check 0 +page 0 +offic 0 +sieg 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..b0619005 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,87 @@ +compil 1 +philipos 1 +dynam 1 +runtim 1 +matthai 1 +work 1 +chamber 1 +home 1 +page 1 +code 1 +like 1 +washington 1 +seattl 1 +ausland 1 +egger 1 +support 1 +system 1 +stuff 1 +project 0 +compileri 0 +beast 0 +gener 0 +optim 0 +shortterm 0 +interest 0 +figur 0 +produc 0 +good 0 +modern 0 +processor 0 +architectur 0 +applic 0 +side 0 +think 0 +interpret 0 +basedsystem 0 +real 0 +time 0 +constraint 0 +java 0 +browser 0 +canbenefit 0 +select 0 +wire 0 +asystem 0 +goe 0 +withprofessor 0 +susan 0 +eggersand 0 +craig 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +phone 0 +public 0 +bershad 0 +fast 0 +effect 0 +confer 0 +onprogram 0 +languag 0 +design 0 +implement 0 +mock 0 +andp 0 +pardyak 0 +automaticdynam 0 +event 0 +dispatch 0 +extens 0 +workshop 0 +softwar 0 +februari 0 +bookmark 0 +plai 0 +frequentlymiscellan 0 +link 0 +local 0 +importancefrom 0 +past 0 +abuwhi 0 +black 0 +blue 0 +ribbon 0 +campaign 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..1f21a762 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,174 @@ +mckenzi 2 +neil 1 +rout 1 +link 1 +page 1 +project 1 +current 1 +inform 1 +east 1 +seattl 1 +larri 1 +us 1 +contact 1 +past 1 +public 1 +person 1 +mail 1 +merl 1 +coast 1 +time 1 +carl 1 +chaotic 1 +design 1 +implement 1 +messag 1 +pass 1 +interfac 1 +network 1 +tool 1 +gemini 1 +washington 1 +comput 1 +last 1 +updat 1 +angel 1 +shot 1 +menu 0 +fine 0 +dine 0 +index 0 +futur 0 +list 0 +game 0 +mitsubishi 0 +electr 0 +research 0 +laboratori 0 +broadwai 0 +floor 0 +cambridg 0 +phone 0 +projectsgonna 0 +teenag 0 +lobotomi 0 +ramonesi 0 +live 0 +mile 0 +andwork 0 +note 0 +involv 0 +projectconcern 0 +real 0 +volum 0 +render 0 +medic 0 +data 0 +copiou 0 +free 0 +expatri 0 +graduat 0 +student 0 +work 0 +onchaot 0 +routingwith 0 +faculti 0 +advisor 0 +ebel 0 +snyder 0 +packet 0 +algorithm 0 +mesh 0 +torusnetwork 0 +dissert 0 +thecranium 0 +compatiblewith 0 +projectsi 0 +teach 0 +assist 0 +summer 0 +chip 0 +tester 0 +call 0 +mactest 0 +maintain 0 +netlist 0 +graph 0 +isomorph 0 +calledgemini 0 +industri 0 +speak 0 +knowna 0 +layout 0 +schemat 0 +avail 0 +interest 0 +pleas 0 +send 0 +mcmurchi 0 +cranium 0 +adapt 0 +packetrout 0 +proceed 0 +parallel 0 +andcommun 0 +workshop 0 +tomactest 0 +home 0 +user 0 +guid 0 +march 0 +marri 0 +pictur 0 +hous 0 +arlington 0 +massachusett 0 +head 0 +livein 0 +fashion 0 +neighborhood 0 +ofballard 0 +creativ 0 +contain 0 +exampl 0 +artworkcr 0 +adob 0 +photoshop 0 +ownedthi 0 +year 0 +onlyth 0 +memori 0 +remain 0 +shirt 0 +correctlyguess 0 +answer 0 +toriddl 0 +jour 0 +octob 0 +label 0 +place 0 +jar 0 +countri 0 +farm 0 +honei 0 +produc 0 +myuncl 0 +edmonton 0 +alberta 0 +canada 0 +amus 0 +linkschairman 0 +linksnorm 0 +gregori 0 +bookmark 0 +halcyon 0 +eugen 0 +spafford 0 +purdu 0 +randi 0 +pausch 0 +virginia 0 +wallach 0 +scool 0 +princeton 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..50ca84e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,151 @@ +system 2 +oper 2 +extens 2 +washington 1 +spin 1 +paper 1 +protocol 1 +applic 1 +marc 1 +fiuczynski 1 +comput 1 +scienc 1 +engin 1 +univers 1 +work 1 +network 1 +interfac 1 +safe 1 +architectur 1 +compar 1 +perform 1 +describ 1 +proceed 1 +softwar 1 +home 1 +page 1 +depart 1 +spent 1 +creat 1 +telephon 1 +us 1 +forappl 1 +specif 1 +achiev 1 +run 1 +demonstr 1 +servic 1 +http 1 +request 1 +contacthttp 1 +design 1 +implement 1 +kernel 1 +appear 1 +languag 1 +shortcom 1 +dynam 1 +protect 1 +ieee 1 +analysi 1 +seattl 0 +backgroundi 0 +graduat 0 +student 0 +grewup 0 +germani 0 +near 0 +sseldorf 0 +year 0 +highschool 0 +princeton 0 +receiv 0 +fromrutg 0 +sever 0 +summer 0 +bell 0 +lab 0 +mitr 0 +corpor 0 +rang 0 +ofproject 0 +sole 0 +proprietor 0 +companythat 0 +decemb 0 +sell 0 +distribut 0 +fault 0 +toler 0 +base 0 +built 0 +scratch 0 +setof 0 +chasi 0 +processor 0 +univoic 0 +cardsand 0 +vxwork 0 +time 0 +spend 0 +hack 0 +adapt 0 +primari 0 +contribut 0 +compellingperform 0 +improv 0 +structur 0 +tosimilar 0 +commerci 0 +platform 0 +recent 0 +report 0 +anextens 0 +allow 0 +anyon 0 +custom 0 +anin 0 +graph 0 +enabl 0 +betterperform 0 +similar 0 +conventionaloper 0 +winter 0 +usenix 0 +technicalconfer 0 +safeti 0 +fifteenth 0 +symposium 0 +principl 0 +support 0 +pretti 0 +happi 0 +deal 0 +inord 0 +describeshow 0 +address 0 +link 0 +linker 0 +load 0 +code 0 +point 0 +isth 0 +abil 0 +manag 0 +linkabl 0 +namespac 0 +andcollect 0 +issu 0 +posit 0 +hardwar 0 +mechan 0 +fifth 0 +workshop 0 +topic 0 +region 0 +parallel 0 +elimin 0 +method 0 +data 0 +flow 0 +transact 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..2b2d0820 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,31 @@ +rout 1 +minim 1 +algorithm 1 +model 1 +deflect 1 +melani 0 +fulgham 0 +comput 0 +scienc 0 +versu 0 +method 0 +develop 0 +help 0 +predict 0 +compar 0 +perform 0 +router 0 +real 0 +parallel 0 +machin 0 +upper 0 +lower 0 +bound 0 +practic 0 +requir 0 +sort 0 +mesh 0 +topolog 0 +washington 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..edd0aa58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,64 @@ +washington 1 +comput 1 +singapor 1 +meng 1 +univers 1 +imag 1 +work 1 +heng 0 +homepag 0 +homepagemenghe 0 +edubox 0 +depart 0 +scienc 0 +engineeringunivers 0 +washingtonseattl 0 +second 0 +year 0 +student 0 +scienceat 0 +undergrad 0 +pennsylvania 0 +research 0 +interestsi 0 +interest 0 +retriev 0 +problem 0 +try 0 +findimag 0 +huge 0 +databas 0 +virag 0 +andqbicar 0 +commerci 0 +exampl 0 +similar 0 +kind 0 +stuff 0 +snapshot 0 +done 0 +singaporesingapor 0 +infomap 0 +provid 0 +fact 0 +andstatist 0 +singaporeonlin 0 +guid 0 +plan 0 +take 0 +trip 0 +nation 0 +boardi 0 +charg 0 +transform 0 +anintellig 0 +island 0 +graduat 0 +strait 0 +time 0 +main 0 +english 0 +newspap 0 +visit 0 +sinc 0 +menghe 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..b64f14f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,70 @@ +comput 1 +includ 1 +program 1 +michael 1 +ernst 1 +graduat 1 +student 1 +washington 1 +scienc 1 +depart 1 +research 1 +page 1 +workshop 1 +maintain 1 +home 0 +pagemichael 0 +ernsti 0 +univers 0 +previous 0 +lectur 0 +riceunivers 0 +sciencedepart 0 +programanalysi 0 +group 0 +microsoft 0 +laboratori 0 +eec 0 +frequent 0 +updat 0 +technic 0 +interest 0 +compil 0 +static 0 +analysi 0 +slice 0 +debug 0 +optim 0 +code 0 +serial 0 +parallel 0 +chair 0 +intermedi 0 +represent 0 +coloc 0 +popl 0 +intellectu 0 +properti 0 +particularli 0 +area 0 +game 0 +theori 0 +cryptographi 0 +philosophi 0 +denot 0 +semanticsi 0 +list 0 +resourcesfor 0 +confer 0 +organ 0 +occasion 0 +manag 0 +slip 0 +awai 0 +work 0 +carri 0 +real 0 +life 0 +link 0 +possibleinterest 0 +mernst 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..0cea5f99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,124 @@ +page 2 +home 1 +link 1 +mock 1 +comput 1 +scienc 1 +interest 1 +chess 1 +marku 1 +washington 1 +come 1 +compil 1 +current 1 +time 1 +stuff 1 +live 1 +search 1 +rttemberg 0 +grew 0 +anotherpart 0 +state 0 +district 0 +biberach 0 +upper 0 +swabia 0 +oberschwaben 0 +studi 0 +univers 0 +karlsruh 0 +whichi 0 +obtain 0 +diplom 0 +also 0 +spent 0 +year 0 +umass 0 +fulbright 0 +grante 0 +main 0 +research 0 +parallel 0 +distribut 0 +object 0 +orient 0 +system 0 +work 0 +dynam 0 +includ 0 +spanish 0 +latin 0 +american 0 +cultur 0 +travel 0 +good 0 +book 0 +labyrinth 0 +solitud 0 +mainstream 0 +movi 0 +dieangst 0 +torwart 0 +beim 0 +elfmet 0 +know 0 +handk 0 +salsa 0 +merengu 0 +danc 0 +still 0 +left 0 +check 0 +els 0 +seattl 0 +publicationssepar 0 +list 0 +event 0 +olympiad 0 +yerewan 0 +colloquia 0 +oopsla 0 +volunt 0 +mossi 0 +bit 0 +grad 0 +journal 0 +csek 0 +csebi 0 +cse 0 +cours 0 +graduat 0 +studentsimag 0 +depart 0 +electr 0 +engineeringy 0 +wouldn 0 +expect 0 +squar 0 +view 0 +metacrawl 0 +altavista 0 +deutsch 0 +well 0 +realaudio 0 +cool 0 +linksand 0 +quot 0 +consid 0 +lili 0 +field 0 +grow 0 +toil 0 +neither 0 +spin 0 +unto 0 +even 0 +solomon 0 +glorywa 0 +arrai 0 +like 0 +matthew 0 +access 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..831ad677 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,15 @@ +vivek 1 +narasayya 1 +washington 1 +home 0 +page 0 +nara 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +person 0 +informationresearch 0 +interestspap 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..59ae767d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,69 @@ +himanshu 1 +nautiy 1 +comput 1 +scienc 1 +engin 1 +washington 1 +offic 1 +name 1 +india 1 +internet 1 +home 0 +pagehimanshu 0 +nautiyalthi 0 +page 0 +heavi 0 +construct 0 +nautiyaldept 0 +mail 0 +stop 0 +univers 0 +seattl 0 +sieg 0 +phone 0 +cours 0 +take 0 +autumn 0 +quarter 0 +principl 0 +digit 0 +system 0 +design 0 +artifici 0 +intellig 0 +finger 0 +edugod 0 +gift 0 +personkind 0 +dougla 0 +adam 0 +terri 0 +pratchett 0 +pelham 0 +grenvil 0 +wodehouseth 0 +order 0 +alphabet 0 +last 0 +impli 0 +favorit 0 +link 0 +place 0 +radio 0 +search 0 +friend 0 +delhi 0 +finish 0 +tech 0 +astronomi 0 +skate 0 +aviat 0 +travel 0 +numismat 0 +sound 0 +much 0 +profound 0 +coin 0 +collect 0 +cook 0 +movi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..ef153d86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,90 @@ +avail 1 +need 1 +technolog 1 +labyrinth 0 +mediocr 0 +bemoan 0 +hype 0 +skeptic 0 +cynic 0 +sinc 0 +research 0 +current 0 +work 0 +automaticconstruct 0 +wrapper 0 +inform 0 +resourc 0 +alsointerest 0 +sever 0 +area 0 +artifici 0 +intellig 0 +andcognit 0 +scienc 0 +paper 0 +beeninvolv 0 +stuff 0 +anonym 0 +servic 0 +provid 0 +glbal 0 +infrmatin 0 +sperhighwai 0 +preliminari 0 +version 0 +divers 0 +meter 0 +pictur 0 +hand 0 +shortli 0 +surgeri 0 +random 0 +number 0 +alwai 0 +handi 0 +know 0 +date 0 +time 0 +week 0 +favorit 0 +color 0 +line 0 +lost 0 +easili 0 +return 0 +page 0 +ronald 0 +wilson 0 +reagan 0 +temperatur 0 +look 0 +javascript 0 +enabl 0 +browser 0 +automat 0 +send 0 +mail 0 +great 0 +republican 0 +tell 0 +like 0 +miscellani 0 +contact 0 +bookmark 0 +societi 0 +awar 0 +bitter 0 +ironi 0 +involv 0 +nonetheless 0 +madeavail 0 +wendel 0 +berri 0 +guidelin 0 +constitutesgood 0 +comment 0 +nichola 0 +kushmerick 0 +uwcs 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..9fb07b02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,75 @@ +page 1 +inform 1 +ohlrich 1 +wayn 1 +research 1 +memori 1 +home 1 +us 1 +washington 1 +univers 1 +seattl 1 +work 1 +leveson 1 +bershad 1 +karlin 1 +romer 1 +project 1 +isca 1 +contain 1 +invest 1 +depart 0 +comput 0 +scienc 0 +engin 0 +offic 0 +guggenheim 0 +annex 0 +chateau 0 +public 0 +current 0 +nanci 0 +model 0 +check 0 +automat 0 +test 0 +gener 0 +specif 0 +spare 0 +time 0 +brian 0 +anna 0 +perform 0 +analysi 0 +local 0 +known 0 +sever 0 +damag 0 +group 0 +paper 0 +make 0 +debut 0 +itali 0 +summer 0 +safeti 0 +sytem 0 +reduc 0 +overhead 0 +onlin 0 +superpag 0 +promot 0 +class 0 +cours 0 +person 0 +interest 0 +game 0 +world 0 +wonder 0 +sort 0 +link 0 +found 0 +creat 0 +octob 0 +last 0 +modifi 0 +march 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..a0cf2ace --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,86 @@ +gershoni 1 +year 1 +washington 1 +graduat 1 +scienc 1 +sieg 1 +seattl 1 +univers 1 +live 1 +california 1 +berkelei 1 +israel 1 +class 1 +comput 1 +offic 1 +summer 1 +time 1 +studentcomput 0 +engin 0 +departmentunivers 0 +washingtonoffic 0 +home 0 +second 0 +student 0 +ofwashington 0 +move 0 +seven 0 +fouryear 0 +three 0 +lancast 0 +northeast 0 +angel 0 +origin 0 +haifa 0 +holon 0 +like 0 +practic 0 +kwon 0 +plai 0 +basketbal 0 +hike 0 +quarter 0 +take 0 +whole 0 +bunch 0 +seminar 0 +amta 0 +architectur 0 +usual 0 +find 0 +hour 0 +aremondai 0 +wednesdai 0 +potenti 0 +employ 0 +welcom 0 +look 0 +resum 0 +pictur 0 +took 0 +last 0 +click 0 +tose 0 +cool 0 +shirt 0 +design 0 +made 0 +graphicsprogram 0 +call 0 +virtual 0 +realiti 0 +interest 0 +link 0 +daili 0 +new 0 +summari 0 +york 0 +riderlink 0 +seattletransport 0 +option 0 +inform 0 +mathemat 0 +depart 0 +access 0 +sinc 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..a7aa8a26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,78 @@ +tequila 1 +ortega 1 +ross 1 +washington 1 +would 1 +work 1 +chinook 1 +project 1 +color 1 +wear 0 +jean 0 +depart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +knew 0 +call 0 +research 0 +albert 0 +einstein 0 +welcom 0 +home 0 +pageaft 0 +go 0 +school 0 +boston 0 +year 0 +decid 0 +head 0 +west 0 +realli 0 +northwest 0 +came 0 +fall 0 +leav 0 +sometim 0 +accord 0 +advisor 0 +gaetano 0 +borriello 0 +offici 0 +tool 0 +real 0 +time 0 +embed 0 +control 0 +system 0 +unoffici 0 +brew 0 +beer 0 +learn 0 +hack 0 +try 0 +teach 0 +german 0 +shepherd 0 +behav 0 +profession 0 +section 0 +myresum 0 +file 0 +educ 0 +experi 0 +public 0 +paper 0 +puppi 0 +pictur 0 +offic 0 +sieg 0 +check 0 +page 0 +link 0 +find 0 +interest 0 +last 0 +updatedthu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..8946c933 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,121 @@ +seattl 1 +weather 1 +comput 1 +washington 1 +show 1 +beth 1 +pardo 1 +sometim 1 +work 1 +item 1 +site 1 +think 1 +courtesei 1 +also 1 +untitl 0 +document 0 +flat 0 +morri 0 +minor 0 +pardodepart 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +washingtonusapardo 0 +edunot 0 +blue 0 +ribbon 0 +asimgsrc 0 +http 0 +graphic 0 +icon 0 +blueribbon 0 +rib_trn_plain_sm 0 +quiet 0 +opposit 0 +free 0 +speechprohibit 0 +nation 0 +telecommun 0 +bill 0 +likewis 0 +white 0 +letter 0 +black 0 +background 0 +everybodi 0 +need 0 +around 0 +share 0 +academicsom 0 +papersi 0 +find 0 +particularli 0 +interest 0 +runtim 0 +code 0 +gener 0 +rtcg 0 +instruct 0 +simul 0 +trace 0 +tool 0 +home 0 +page 0 +oncomput 0 +architectureandcompil 0 +quick 0 +link 0 +otherpeopl 0 +class 0 +thesi 0 +stylenon 0 +academicfeatur 0 +featur 0 +month 0 +weak 0 +week 0 +doesn 0 +blink 0 +anymor 0 +regular 0 +itemsbicyclesbusinessescomputersfoodhumori 0 +famou 0 +thing 0 +relat 0 +legal 0 +ethic 0 +weirdnesslinux 0 +journalmusicgoofi 0 +politicssci 0 +though 0 +unrel 0 +stuff 0 +transport 0 +movi 0 +list 0 +film 0 +festiv 0 +dant 0 +search 0 +truli 0 +gross 0 +stori 0 +trepan 0 +privaci 0 +log 0 +mail 0 +address 0 +wors 0 +take 0 +data 0 +disk 0 +everi 0 +time 0 +consid 0 +particular 0 +newhous 0 +newspap 0 +courtesi 0 +yesterdai 0 +stuffpardo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..c44c1cb1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,88 @@ +washington 1 +research 1 +system 1 +school 1 +seattl 1 +relat 1 +pardyak 1 +home 1 +pardi 1 +grad 1 +year 1 +comput 1 +scienc 1 +also 1 +languag 1 +drizzl 1 +interest 1 +group 1 +mechan 1 +object 1 +univers 1 +phone 1 +przemek 0 +page 0 +przemyslaw 0 +first 0 +week 0 +coupl 0 +later 0 +third 0 +graduat 0 +student 0 +theunivers 0 +current 0 +area 0 +ofoper 0 +interast 0 +distribut 0 +compil 0 +besid 0 +life 0 +fill 0 +withth 0 +hike 0 +outdoor 0 +activ 0 +notbusi 0 +enjoi 0 +book 0 +music 0 +find 0 +short 0 +descript 0 +resum 0 +list 0 +paper 0 +outdat 0 +happenswhen 0 +busi 0 +schedul 0 +projectsspinan 0 +extens 0 +oper 0 +built 0 +gloriou 0 +leadership 0 +brian 0 +bershad 0 +base 0 +systemsgroup 0 +commun 0 +emerald 0 +basedprogram 0 +time 0 +link 0 +polish 0 +connect 0 +variou 0 +resourc 0 +somehow 0 +poland 0 +project 0 +mine 0 +unrel 0 +miscellan 0 +work 0 +engin 0 +depart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..4656bb8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,167 @@ +year 2 +bike 1 +davi 1 +student 1 +us 1 +school 1 +taught 1 +prolog 1 +chri 1 +html 1 +franklin 1 +home 1 +graduat 1 +depart 1 +comput 1 +scienc 1 +work 1 +first 1 +second 1 +seattl 1 +dai 1 +around 1 +high 1 +time 1 +engin 1 +univers 1 +paul 0 +pagepaul 0 +pagei 0 +current 0 +univ 0 +washington 0 +inth 0 +offici 0 +myoffic 0 +sieg 0 +rapid 0 +project 0 +thesumm 0 +someon 0 +express 0 +concern 0 +aboutthi 0 +character 0 +usual 0 +somewher 0 +north 0 +iliv 0 +anoth 0 +pictur 0 +best 0 +oneof 0 +scan 0 +better 0 +on 0 +norwegian 0 +poem 0 +likea 0 +collect 0 +fortun 0 +receiv 0 +friend 0 +localchines 0 +restaur 0 +mundan 0 +stuffi 0 +hope 0 +folk 0 +might 0 +find 0 +stuff 0 +hotlink 0 +pagesstuff 0 +maintainmi 0 +schedul 0 +rememb 0 +updat 0 +contact 0 +love 0 +travel 0 +necessarili 0 +tell 0 +everyon 0 +hire 0 +mewher 0 +come 0 +diploma 0 +live 0 +inmorgan 0 +hill 0 +fromuc 0 +andy 0 +glad 0 +ididn 0 +bergen 0 +ialso 0 +research 0 +professor 0 +electr 0 +andcomput 0 +stuffwhil 0 +partner 0 +variou 0 +relatedact 0 +tend 0 +matt 0 +jame 0 +evengot 0 +togeth 0 +recent 0 +marriag 0 +joann 0 +anexcus 0 +brother 0 +also 0 +made 0 +itin 0 +photo 0 +throughout 0 +undergradu 0 +kept 0 +never 0 +flat 0 +exchang 0 +ofbergen 0 +hillier 0 +longer 0 +rout 0 +returnedto 0 +took 0 +rollerblad 0 +sinc 0 +town 0 +wasnow 0 +easi 0 +drop 0 +hewlettpackard 0 +return 0 +vengeanc 0 +move 0 +toseattl 0 +done 0 +annual 0 +portland 0 +ride 0 +intwo 0 +inseason 0 +march 0 +april 0 +june 0 +rest 0 +justcommut 0 +lot 0 +danc 0 +particularli 0 +lindyhop 0 +know 0 +everi 0 +document 0 +header 0 +linethat 0 +look 0 +someth 0 +like 0 +doctyp 0 +public 0 +ietf 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..cca7334c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,98 @@ +pighin 1 +anna 1 +karlin 1 +might 1 +like 1 +rain 1 +live 1 +action 1 +pictur 1 +refresh 1 +everi 1 +minut 1 +note 1 +frdric 0 +washington 0 +comput 0 +scienc 0 +lcommun 0 +wonder 0 +citi 0 +tourist 0 +quarter 0 +ta 0 +taught 0 +gui 0 +found 0 +much 0 +often 0 +dani 0 +qual 0 +report 0 +rest 0 +british 0 +movi 0 +monti 0 +python 0 +swim 0 +nick 0 +cave 0 +corto 0 +maltes 0 +italian 0 +comic 0 +cat 0 +pari 0 +berlin 0 +venis 0 +simpson 0 +mpeg 0 +surpris 0 +traditionn 0 +french 0 +marin 0 +song 0 +collect 0 +otherwis 0 +work 0 +graphic 0 +supervis 0 +although 0 +formerli 0 +studi 0 +systemher 0 +name 0 +paper 0 +implement 0 +global 0 +memori 0 +manag 0 +workstat 0 +cluster 0 +michael 0 +feelei 0 +william 0 +morgan 0 +freder 0 +henri 0 +levi 0 +chandramohan 0 +thekkath 0 +proceed 0 +symposium 0 +oper 0 +system 0 +principl 0 +decemb 0 +postscript 0 +breath 0 +take 0 +door 0 +lucki 0 +even 0 +look 0 +darren 0 +juan 0 +dark 0 +squar 0 +five 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..724acb1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,14 @@ +ruth 1 +anderson 1 +washington 1 +home 0 +page 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +wxyc 0 +map 0 +brother 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..fa51ca93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,12 @@ +home 1 +washington 1 +redston 1 +josh 0 +page 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +joshua 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..903db76e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,162 @@ +jakobovit 1 +scienc 1 +univers 1 +home 1 +washington 1 +comput 1 +seattl 1 +live 1 +imag 1 +camera 1 +research 1 +base 1 +manag 1 +program 1 +build 1 +databas 1 +inform 1 +professor 1 +psycholog 1 +hawaii 1 +onlin 1 +librari 1 +page 0 +engin 0 +depart 0 +usai 0 +get 0 +departmentof 0 +ofwashington 0 +wonder 0 +citi 0 +alwai 0 +rain 0 +thisup 0 +date 0 +weatherreport 0 +sneak 0 +peek 0 +video 0 +mount 0 +outsid 0 +point 0 +beauti 0 +drumhel 0 +fountain 0 +clear 0 +youcan 0 +catch 0 +glimps 0 +rainier 0 +glori 0 +probabl 0 +cleardai 0 +nice 0 +color 0 +pictur 0 +develop 0 +repositori 0 +toolkit 0 +multi 0 +media 0 +consol 0 +construct 0 +part 0 +astructur 0 +framework 0 +brain 0 +map 0 +knowledg 0 +support 0 +digit 0 +anatomist 0 +line 0 +interact 0 +atla 0 +human 0 +bodi 0 +implement 0 +databaseenviron 0 +vision 0 +local 0 +expert 0 +persistentprogram 0 +languag 0 +interest 0 +els 0 +proud 0 +creator 0 +internetracquetbal 0 +ladder 0 +taught 0 +advanc 0 +extens 0 +wrote 0 +perl 0 +script 0 +rotisseriebasebal 0 +leagu 0 +stand 0 +updat 0 +daili 0 +stat 0 +fromusa 0 +todai 0 +rais 0 +happi 0 +famili 0 +africancichlid 0 +visit 0 +town 0 +honolulu 0 +everi 0 +chanc 0 +camp 0 +magic 0 +kalalau 0 +vallei 0 +movi 0 +gambl 0 +stock 0 +market 0 +darn 0 +good 0 +fantasi 0 +footbal 0 +team 0 +newslet 0 +would 0 +javafamili 0 +link 0 +mydad 0 +leon 0 +jame 0 +whoi 0 +write 0 +book 0 +traffic 0 +foster 0 +polem 0 +emanuel 0 +swedenborg 0 +step 0 +dian 0 +nahl 0 +whoprovid 0 +great 0 +index 0 +judi 0 +realtor 0 +uncl 0 +eddi 0 +run 0 +site 0 +bioscienc 0 +profession 0 +bookmarksif 0 +java 0 +click 0 +drag 0 +word 0 +make 0 +poem 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..7e06dd02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight 0 +welcom 0 +galleri 0 +twenti 0 +photograph 0 +five 0 +head 0 +robert 0 +grimm 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..2bc6f1ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,142 @@ +romer 2 +bershad 1 +perform 1 +chen 1 +comput 1 +washington 1 +research 1 +memori 1 +karlin 1 +ohlrich 1 +wong 1 +paper 1 +isca 1 +dynam 1 +page 1 +conflict 1 +asplo 1 +friend 1 +scientist 1 +depart 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +home 0 +offic 0 +eduoffic 0 +chateau 0 +sieg 0 +interestsi 0 +oper 0 +system 0 +supportfor 0 +high 0 +systemswith 0 +realli 0 +smart 0 +peopl 0 +likebrian 0 +brad 0 +alan 0 +eustac 0 +anna 0 +denni 0 +wayn 0 +andwayn 0 +three 0 +recent 0 +subject 0 +reduc 0 +overhead 0 +us 0 +onlinesuperpag 0 +promot 0 +map 0 +polici 0 +cach 0 +resolutionon 0 +standard 0 +hardwar 0 +osdi 0 +avoid 0 +miss 0 +larg 0 +direct 0 +mappedcach 0 +addit 0 +studi 0 +ofinterpret 0 +learn 0 +theproject 0 +rockyhom 0 +also 0 +wrote 0 +togeth 0 +structur 0 +interpret 0 +voelker 0 +wolman 0 +baer 0 +levi 0 +appear 0 +abstract 0 +postscript 0 +bibliographi 0 +lobo 0 +read 0 +listrandom 0 +stuffa 0 +hous 0 +rai 0 +limb 0 +knee 0 +arthroscop 0 +surgeri 0 +mark 0 +hill 0 +wrist 0 +dylansaid 0 +hair 0 +couldn 0 +flowbe 0 +said 0 +could 0 +beingexperiment 0 +conduct 0 +experi 0 +judg 0 +result 0 +attend 0 +travel 0 +europ 0 +took 0 +somepictur 0 +eatsomeon 0 +els 0 +food 0 +accompani 0 +sincer 0 +ration 0 +forexampl 0 +lunch 0 +thought 0 +leftth 0 +countri 0 +would 0 +didn 0 +origin 0 +unknown 0 +edward 0 +tuft 0 +tip 0 +public 0 +speak 0 +father 0 +edit 0 +american 0 +journal 0 +physic 0 +place 0 +ticker 0 +symbol 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..1bf640d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,157 @@ +memori 2 +system 2 +washington 2 +page 2 +romer 2 +perform 1 +project 1 +superpag 1 +polici 1 +bershad 1 +univers 1 +monitor 1 +dynam 1 +overhead 1 +us 1 +cach 1 +miss 1 +research 1 +improv 1 +applic 1 +identifi 1 +size 1 +qual 1 +karlin 1 +ohlrich 1 +descript 1 +work 1 +hardwar 1 +support 1 +mechan 1 +small 1 +current 1 +cost 1 +intern 1 +fragment 1 +region 1 +construct 1 +paper 1 +reduc 1 +onlin 1 +promot 1 +isca 1 +appear 1 +algorithm 1 +wayn 1 +wong 1 +map 1 +conflict 1 +chen 1 +report 1 +washingtonmemori 0 +researchdepart 0 +comput 0 +scienc 0 +engin 0 +seattl 0 +welcom 0 +home 0 +group 0 +investig 0 +techniqu 0 +operatingsystem 0 +sharesth 0 +follow 0 +featur 0 +reli 0 +combin 0 +simpl 0 +oper 0 +modif 0 +behavior 0 +incur 0 +runtim 0 +inform 0 +collect 0 +sourc 0 +delai 0 +resolv 0 +bottleneck 0 +also 0 +significantli 0 +overal 0 +recent 0 +explor 0 +monitorappl 0 +refer 0 +pattern 0 +order 0 +resolvetlb 0 +problem 0 +poor 0 +result 0 +tlbi 0 +cover 0 +severalmodern 0 +architectur 0 +whose 0 +amultipl 0 +base 0 +tlbperform 0 +larger 0 +ofwast 0 +simul 0 +sever 0 +adapt 0 +todiffer 0 +address 0 +space 0 +constructingsuperpag 0 +copi 0 +compon 0 +contigu 0 +ofmemori 0 +develop 0 +balancesth 0 +potenti 0 +benefit 0 +reduct 0 +futur 0 +tlbmiss 0 +memorycopi 0 +misspattern 0 +warrant 0 +attain 0 +largepag 0 +without 0 +detail 0 +look 0 +someon 0 +implement 0 +would 0 +makea 0 +good 0 +master 0 +peoplefaculti 0 +brian 0 +anna 0 +student 0 +denni 0 +dlee 0 +waynew 0 +resolut 0 +standard 0 +osdi 0 +avoid 0 +larg 0 +direct 0 +asplo 0 +comparison 0 +mip 0 +alpha 0 +instruct 0 +effect 0 +differ 0 +code 0 +reorder 0 +bibliographi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..dd524fc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,96 @@ +washington 2 +interpret 2 +perform 1 +languag 1 +project 1 +architectur 1 +sever 1 +baer 1 +bershad 1 +levi 1 +romer 1 +voelker 1 +wolman 1 +rocki 1 +last 1 +environ 1 +implement 1 +collect 1 +benchmark 1 +wong 1 +etch 1 +avail 1 +intern 1 +descript 0 +becom 0 +increasingli 0 +popular 0 +year 0 +part 0 +demand 0 +portabl 0 +safeti 0 +eas 0 +examin 0 +perspect 0 +strategi 0 +processor 0 +util 0 +basi 0 +studi 0 +microbenchmark 0 +perl 0 +java 0 +mipsi 0 +us 0 +variou 0 +instrument 0 +trace 0 +techniqu 0 +evalu 0 +characterist 0 +order 0 +gain 0 +insight 0 +similar 0 +differ 0 +execut 0 +peoplefaculti 0 +jean 0 +loup 0 +brian 0 +henri 0 +student 0 +denni 0 +dlee 0 +geoff 0 +alec 0 +wayn 0 +waynew 0 +papersrom 0 +structur 0 +asplo 0 +appear 0 +abstractpostscriptjava 0 +measur 0 +xjava 0 +sourc 0 +file 0 +benchmarkstoolsto 0 +inform 0 +applic 0 +vebeen 0 +build 0 +binari 0 +rewrit 0 +tool 0 +call 0 +yetpublicli 0 +read 0 +etchhom 0 +page 0 +documentationproject 0 +document 0 +peopl 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..c19d7731 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,98 @@ +scienc 1 +washington 1 +seattl 1 +system 1 +softwar 1 +richard 1 +phone 1 +intellig 1 +scam 1 +imag 1 +process 1 +comput 1 +work 1 +document 1 +also 1 +camp 1 +splash 1 +program 1 +nation 1 +roger 0 +home 0 +page 0 +rogersrrog 0 +educomput 0 +engin 0 +departmentunivers 0 +usaoffic 0 +chateau 0 +sieg 0 +laboratri 0 +research 0 +develop 0 +systol 0 +cellular 0 +arrai 0 +machin 0 +massiv 0 +parallel 0 +includ 0 +compil 0 +basic 0 +morpholog 0 +librari 0 +simul 0 +obtain 0 +paper 0 +current 0 +layout 0 +extract 0 +help 0 +produc 0 +groundtruth 0 +databas 0 +optic 0 +charact 0 +recognit 0 +commun 0 +director 0 +facil 0 +northwest 0 +center 0 +environment 0 +educ 0 +ncee 0 +offer 0 +summer 0 +student 0 +ag 0 +beauti 0 +juan 0 +island 0 +univers 0 +year 0 +long 0 +foundat 0 +fund 0 +grade 0 +minor 0 +girl 0 +area 0 +interest 0 +corn 0 +snake 0 +jessica 0 +squishi 0 +order 0 +increas 0 +length 0 +kuow 0 +public 0 +radio 0 +stationi 0 +bake 0 +best 0 +pecan 0 +seattlelast 0 +modifi 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..c18014dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,19 @@ +mike 1 +home 1 +washington 1 +page 0 +salisburysalisbur 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +usaoffic 0 +chateau 0 +sieg 0 +lifehistori 0 +school 0 +friend 0 +vitacool 0 +stuff 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..86359b98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,177 @@ +system 3 +oper 2 +proceed 2 +extens 1 +applic 1 +paper 1 +first 1 +workshop 1 +talk 1 +spin 1 +support 1 +reserv 1 +processor 1 +savag 1 +washington 1 +symposium 1 +slide 1 +softwar 1 +usenix 1 +capac 1 +multimedia 1 +time 1 +mach 1 +stefan 1 +work 1 +comput 1 +univers 1 +seattl 1 +industri 1 +bershad 1 +student 1 +american 1 +need 1 +compil 1 +wcsss 1 +tucson 1 +issu 1 +design 1 +abstract 1 +version 1 +microkernel 1 +specif 1 +servic 1 +januari 1 +tech 1 +report 1 +afraid 1 +confer 1 +real 1 +scienc 0 +engin 0 +depart 0 +home 0 +sampl 0 +rich 0 +post 0 +cultur 0 +modern 0 +pittsburghfor 0 +year 0 +caught 0 +ride 0 +migrat 0 +mnow 0 +gradual 0 +rank 0 +strongbackground 0 +centuri 0 +histori 0 +provid 0 +witha 0 +firm 0 +irrelev 0 +platform 0 +trash 0 +peer 0 +fool 0 +tocqeuvil 0 +statement 0 +find 0 +tiresom 0 +inconveni 0 +exercisepolit 0 +right 0 +distract 0 +quit 0 +similar 0 +tocurr 0 +microprocessor 0 +architectur 0 +trend 0 +favor 0 +ofappl 0 +code 0 +brian 0 +rest 0 +merri 0 +band 0 +onan 0 +project 0 +call 0 +projectsspinspin 0 +omnifemtokernel 0 +whichsupport 0 +dynam 0 +adapt 0 +interfac 0 +andimplement 0 +direct 0 +control 0 +stillmaintain 0 +integr 0 +inter 0 +isol 0 +thing 0 +writingspin 0 +safeti 0 +perform 0 +principl 0 +sosp 0 +copper 0 +mountain 0 +decemb 0 +languag 0 +write 0 +modula 0 +protect 0 +fifth 0 +topic 0 +hoto 0 +orca 0 +island 0 +implement 0 +osdi 0 +monterei 0 +novemb 0 +panel 0 +longer 0 +unpublish 0 +sixth 0 +sigop 0 +european 0 +match 0 +appear 0 +review 0 +march 0 +frequent 0 +redund 0 +arrai 0 +independ 0 +disk 0 +winter 0 +technic 0 +diego 0 +best 0 +ieee 0 +intern 0 +boston 0 +manag 0 +usag 0 +fourth 0 +workstat 0 +wwo 0 +napa 0 +octob 0 +carnegi 0 +mellon 0 +timer 0 +export 0 +user 0 +third 0 +santa 0 +april 0 +interest 0 +music 0 +hikingthi 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..90c4baed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,9 @@ +sean 1 +sandi 1 +washington 1 +david 0 +comput 0 +scienc 0 +last 0 +revis 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..bb0401bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,33 @@ +richard 1 +segal 1 +home 0 +page 0 +segaldepart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +washingtonbox 0 +seattl 0 +washington 0 +person 0 +biographi 0 +better 0 +half 0 +famili 0 +pictur 0 +research 0 +overview 0 +brute 0 +internet 0 +softbot 0 +public 0 +curriculum 0 +vita 0 +postscript 0 +amus 0 +archeri 0 +bicycl 0 +racquetbal 0 +ski 0 +softbal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..f95fb7db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,116 @@ +project 2 +char 1 +univers 1 +stefan 1 +washington 1 +activ 1 +els 1 +printf 1 +seattl 1 +teach 1 +assist 1 +work 1 +phone 1 +inform 1 +past 1 +current 1 +interest 1 +page 1 +cologn 1 +complet 1 +come 1 +receiv 1 +high 1 +school 1 +scienc 1 +implement 1 +comparison 1 +hardwar 1 +softwar 1 +solut 1 +fals 1 +share 1 +pictur 1 +print 1 +byte 1 +putchar 1 +main 1 +bergstefan 0 +berg 0 +home 0 +sieg 0 +hall 0 +email 0 +sgberg 0 +content 0 +address 0 +finger 0 +born 0 +germani 0 +spring 0 +mittler 0 +reif 0 +thgrade 0 +schillergymnasium 0 +unit 0 +statesto 0 +diploma 0 +bloomington 0 +north 0 +indiana 0 +bachelor 0 +honor 0 +distinctionin 0 +field 0 +comput 0 +fromindiana 0 +momenth 0 +toward 0 +expect 0 +date 0 +sometim 0 +thiscenturi 0 +reduct 0 +machin 0 +studi 0 +linear 0 +time 0 +sort 0 +algorithm 0 +qual 0 +peopl 0 +around 0 +raft 0 +bookmark 0 +excit 0 +squar 0 +moment 0 +weather 0 +yourselfsometh 0 +crazi 0 +didn 0 +even 0 +particularsolut 0 +done 0 +sall 0 +line 0 +shouldn 0 +contain 0 +trail 0 +carriag 0 +return 0 +compil 0 +without 0 +warn 0 +program 0 +exact 0 +sourc 0 +code 0 +itin 0 +fewer 0 +like 0 +resum 0 +avail 0 +inpostscript 0 +andtex 0 +format 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..46dc67c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,62 @@ +thing 1 +project 1 +pictur 1 +work 1 +page 1 +ward 0 +shadegreet 0 +salut 0 +third 0 +year 0 +grad 0 +student 0 +hereat 0 +dubcs 0 +interact 0 +renderingof 0 +complex 0 +scene 0 +current 0 +follow 0 +link 0 +walkthruproject 0 +amonglot 0 +anim 0 +find 0 +siggraph 0 +paperdescrib 0 +recent 0 +shortcut 0 +click 0 +thepictur 0 +island 0 +lower 0 +left 0 +corner 0 +lot 0 +interest 0 +go 0 +mani 0 +differ 0 +aspectsof 0 +comput 0 +graphic 0 +thegraph 0 +imag 0 +laboratori 0 +get 0 +done 0 +contact 0 +info 0 +daili 0 +schedul 0 +travel 0 +plan 0 +public 0 +look 0 +scrunch 0 +make 0 +browser 0 +least 0 +pixel 0 +wide 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..0e7e0353 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,29 @@ +shun 1 +leung 1 +depart 1 +comput 1 +scienc 1 +univers 1 +research 1 +washington 1 +leungshun 0 +student 0 +andengin 0 +ofwashington 0 +work 0 +prof 0 +johnzahorjan 0 +pointer 0 +summari 0 +public 0 +curriculum 0 +vita 0 +upon 0 +request 0 +engin 0 +seattl 0 +email 0 +shuntak 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..c248084e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,70 @@ +shuichi 1 +koga 1 +washington 1 +graduat 1 +studi 1 +skoga 1 +haven 1 +involv 1 +asian 1 +also 1 +welcom 0 +home 0 +page 0 +studentcomput 0 +scienc 0 +engin 0 +departmentunivers 0 +name 0 +notic 0 +bynow 0 +start 0 +univers 0 +ofwashington 0 +quit 0 +figur 0 +myqual 0 +much 0 +less 0 +dissert 0 +universityof 0 +virginia 0 +degre 0 +mathemat 0 +alsoheavili 0 +foreign 0 +relat 0 +andgovern 0 +depart 0 +origin 0 +slate 0 +degreein 0 +heavili 0 +user 0 +interfac 0 +groupand 0 +comput 0 +sciencedepart 0 +work 0 +project 0 +call 0 +alic 0 +sinc 0 +anywai 0 +take 0 +look 0 +pictur 0 +smaller 0 +mean 0 +finger 0 +info 0 +current 0 +schedul 0 +neat 0 +hypertext 0 +link 0 +hunt 0 +destroi 0 +bug 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..06f558a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,48 @@ +soha 1 +washington 1 +current 1 +univers 1 +comput 1 +scienc 1 +engin 1 +dept 1 +hassoun 0 +home 0 +page 0 +hassounit 0 +year 0 +graduat 0 +school 0 +circuit 0 +design 0 +whoturn 0 +develop 0 +work 0 +onarchitectur 0 +retim 0 +professor 0 +carlebel 0 +weekli 0 +schedul 0 +busi 0 +previou 0 +research 0 +educ 0 +experi 0 +public 0 +patent 0 +chao 0 +group 0 +profession 0 +interest 0 +vlsi 0 +site 0 +inform 0 +littl 0 +deede 0 +photo 0 +galleri 0 +address 0 +depart 0 +seattl 0 +phone 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..98809939 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,115 @@ +favorit 1 +scienc 1 +sujai 1 +parekh 1 +home 1 +page 1 +comput 1 +washington 1 +seattl 1 +seminar 1 +system 1 +french 1 +softbot 1 +project 1 +interest 1 +soccer 1 +danc 1 +food 1 +work 0 +depart 0 +engin 0 +sieg 0 +hall 0 +chateau 0 +guggenheim 0 +annex 0 +univers 0 +quarter 0 +class 0 +research 0 +simultaneousmultithread 0 +particular 0 +issu 0 +relat 0 +tomultithread 0 +architectur 0 +evalu 0 +simon 0 +emploi 0 +procedur 0 +search 0 +controlsystem 0 +control 0 +action 0 +report 0 +construct 0 +design 0 +remov 0 +patio 0 +convent 0 +workspac 0 +fund 0 +pleas 0 +contact 0 +sport 0 +spud 0 +right 0 +sort 0 +bookmark 0 +like 0 +keep 0 +track 0 +academ 0 +cognit 0 +distribut 0 +parallel 0 +psycholog 0 +philosophi 0 +tenni 0 +sail 0 +squash 0 +volleybal 0 +ballroom 0 +cornel 0 +oracl 0 +corpor 0 +stottler 0 +henk 0 +associ 0 +done 0 +resum 0 +random 0 +person 0 +info 0 +oondhiu 0 +mango 0 +phad 0 +thai 0 +kung 0 +chicken 0 +beverag 0 +screwdriv 0 +scotch 0 +long 0 +island 0 +ic 0 +tango 0 +swing 0 +east 0 +west 0 +coast 0 +salsa 0 +rock 0 +music 0 +dire 0 +strait 0 +pink 0 +floyd 0 +phil 0 +collin 0 +genesi 0 +peter 0 +gabriel 0 +petti 0 +sparekh 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..58d9b9b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,64 @@ +erik 1 +speed 1 +selberg 0 +improv 0 +home 0 +page 0 +name 0 +washington 0 +academ 0 +almost 0 +live 0 +fishcam 0 +address 0 +lara 0 +lewi 0 +memorialhappi 0 +link 0 +peopl 0 +sport 0 +food 0 +drink 0 +cultur 0 +mari 0 +kay 0 +offic 0 +friend 0 +famili 0 +fish 0 +disc 0 +drive 0 +ultim 0 +pasti 0 +power 0 +ur 0 +machin 0 +mountain 0 +bike 0 +spud 0 +softbal 0 +ski 0 +utah 0 +raquetbal 0 +pro 0 +colleg 0 +wedgwood 0 +hous 0 +diet 0 +pepper 0 +salt 0 +lake 0 +roast 0 +compani 0 +bean 0 +bagel 0 +racer 0 +star 0 +war 0 +tini 0 +toon 0 +pinki 0 +brain 0 +phantom 0 +babylon 0 +comic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..a6c2c86a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,134 @@ +parallel 1 +washington 1 +simul 1 +home 1 +sung 1 +univers 1 +work 1 +watch 1 +like 1 +good 1 +plai 1 +scrub 1 +divis 1 +take 1 +page 0 +choiwelcom 0 +thehomepag 0 +ofsung 0 +eunchoi 0 +myschoollifemi 0 +primari 0 +research 0 +interest 0 +compil 0 +program 0 +languag 0 +involv 0 +zplcompil 0 +project 0 +late 0 +beenspend 0 +time 0 +think 0 +optim 0 +communicationgener 0 +us 0 +architechtur 0 +independ 0 +communicationlibrari 0 +ironman 0 +addit 0 +experi 0 +data 0 +programson 0 +superscalar 0 +processor 0 +goal 0 +improv 0 +nodeperform 0 +come 0 +gener 0 +machin 0 +alsobeen 0 +seen 0 +hang 0 +chaosrout 0 +group 0 +includ 0 +graphic 0 +front 0 +visual 0 +thatexperi 0 +current 0 +implement 0 +anoth 0 +router 0 +inzpl 0 +final 0 +also 0 +littl 0 +astronomi 0 +quarter 0 +ta 0 +enjoi 0 +movi 0 +mostli 0 +comfort 0 +yeah 0 +vegetarian 0 +sinc 0 +myjunior 0 +year 0 +colleg 0 +drink 0 +dinner 0 +would 0 +samewithout 0 +wine 0 +result 0 +must 0 +exercis 0 +quit 0 +twosoccerteam 0 +cousin 0 +cooper 0 +recdivis 0 +last 0 +season 0 +came 0 +second 0 +place 0 +andcoop 0 +unfortun 0 +recent 0 +sacrifiedmi 0 +left 0 +knee 0 +game 0 +soccer 0 +usualstep 0 +aerobicsclass 0 +instead 0 +find 0 +try 0 +swim 0 +weight 0 +trainingclass 0 +peopl 0 +world 0 +read 0 +book 0 +abit 0 +shakespear 0 +publictelevis 0 +listen 0 +classicalmus 0 +myotherlif 0 +choi 0 +sungeun 0 +depart 0 +comput 0 +scienc 0 +engin 0 +seattl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..533fe6ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,58 @@ +system 1 +nguyen 1 +runtim 1 +multiprocessorsenviron 1 +schedul 1 +depart 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +washington 0 +work 0 +world 0 +research 0 +interest 0 +includ 0 +oper 0 +distribut 0 +andparallel 0 +network 0 +secur 0 +current 0 +help 0 +frommi 0 +advisor 0 +johnzahorjan 0 +build 0 +support 0 +run 0 +soft 0 +real 0 +timeappl 0 +visual 0 +partial 0 +idl 0 +workstat 0 +innow 0 +recent 0 +complet 0 +studi 0 +measur 0 +ofappl 0 +characterist 0 +us 0 +tominim 0 +applic 0 +execut 0 +time 0 +uniprogram 0 +well 0 +make 0 +goodglob 0 +decis 0 +multiprogram 0 +cvpublic 0 +worldvietnameseresourc 0 +netcyclingplayground 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..bfb47b51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,13 @@ +frame 1 +tian 0 +homepageyour 0 +browser 0 +rusti 0 +yellow 0 +turkei 0 +even 0 +part 0 +html 0 +standard 0 +click 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..2009ca7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,103 @@ +system 2 +object 2 +distribut 2 +work 2 +tiwari 2 +workshop 2 +oopsla 1 +opal 1 +larg 1 +address 1 +oper 1 +persist 1 +applic 1 +softwar 1 +us 1 +levi 1 +comput 1 +databas 1 +measur 1 +build 1 +ashutosh 1 +washington 1 +scienc 1 +time 1 +area 1 +singl 1 +space 1 +orient 1 +workload 1 +user 1 +organ 1 +chase 1 +advisor 1 +also 1 +behavior 1 +except 1 +handl 1 +octob 1 +benchmark 1 +depart 0 +engin 0 +univers 0 +seattl 0 +mostli 0 +full 0 +fourth 0 +year 0 +graduat 0 +student 0 +support 0 +past 0 +infrastructur 0 +interfac 0 +spare 0 +group 0 +research 0 +technolog 0 +boe 0 +servic 0 +projectsopali 0 +project 0 +issu 0 +opportun 0 +involv 0 +creat 0 +global 0 +across 0 +multipl 0 +machin 0 +jeff 0 +primari 0 +architect 0 +hank 0 +close 0 +character 0 +gener 0 +techniqu 0 +paper 0 +sever 0 +profession 0 +career 0 +thisexperi 0 +basi 0 +follow 0 +distrbut 0 +public 0 +virtual 0 +refer 0 +proc 0 +intern 0 +septemb 0 +parallel 0 +environ 0 +ecoop 0 +juli 0 +bosch 0 +addendum 0 +proceed 0 +oop 0 +messeng 0 +evalu 0 +narasayya 0 +perform 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..b1cf4867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,91 @@ +linux 1 +also 1 +tessa 1 +inform 1 +washington 1 +interest 1 +current 1 +work 1 +system 1 +java 1 +anoth 0 +dead 0 +superhighwai 0 +second 0 +yeargradu 0 +student 0 +comput 0 +scienc 0 +univers 0 +research 0 +includ 0 +sort 0 +relatedgoodi 0 +clio 0 +search 0 +andbrows 0 +person 0 +histori 0 +avail 0 +seek 0 +gain 0 +employ 0 +myresum 0 +onlin 0 +curiou 0 +kittyi 0 +honor 0 +share 0 +apart 0 +gambit 0 +siames 0 +cat 0 +great 0 +memor 0 +found 0 +therear 0 +pictur 0 +page 0 +tofind 0 +appar 0 +alsor 0 +scotland 0 +classesi 0 +still 0 +qual 0 +quarter 0 +take 0 +last 0 +ofeight 0 +class 0 +fulfil 0 +breadth 0 +requir 0 +digit 0 +seminarlinux 0 +gameseverybodi 0 +plai 0 +game 0 +maintain 0 +gametom 0 +commit 0 +advanc 0 +pretti 0 +coolgam 0 +platform 0 +first 0 +attempt 0 +program 0 +simpl 0 +maze 0 +applet 0 +sleepingi 0 +known 0 +frequent 0 +seattl 0 +area 0 +bookstor 0 +knit 0 +crochet 0 +copyright 0 +tlau 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..bea91465 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,44 @@ +martin 1 +tompa 1 +washington 1 +comput 1 +scienc 1 +univers 1 +depart 0 +engin 0 +seattl 0 +phone 0 +receptionist 0 +lectur 0 +note 0 +articl 0 +trajectori 0 +thelma 0 +louis 0 +recent 0 +holidai 0 +moon 0 +pearl 0 +among 0 +wash 0 +oyster 0 +collabor 0 +surrealist 0 +electron 0 +propheci 0 +build 0 +across 0 +pierc 0 +lane 0 +carol 0 +photograph 0 +photo 0 +courtesi 0 +health 0 +center 0 +educ 0 +resourc 0 +provid 0 +mani 0 +imag 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..68cad823 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,106 @@ +kimbrel 2 +traci 1 +prison 1 +held 1 +univers 1 +washington 1 +comput 1 +parallel 1 +prefetch 1 +anna 1 +karlin 1 +seattl 1 +forc 1 +imprison 1 +captor 1 +scienc 1 +algorithm 1 +cachingtraci 1 +edward 1 +felten 1 +appear 1 +system 1 +washingtonsinc 0 +without 0 +charg 0 +trial 0 +move 0 +year 0 +toanoth 0 +area 0 +inmat 0 +tomanufactur 0 +airplan 0 +escap 0 +institut 0 +wasrecaptur 0 +return 0 +help 0 +hisplight 0 +rescu 0 +list 0 +thing 0 +done 0 +curriculum 0 +vita 0 +detail 0 +statement 0 +ofwhat 0 +promis 0 +histori 0 +goal 0 +free 0 +depart 0 +engin 0 +tracyk 0 +eduher 0 +trace 0 +driven 0 +comparison 0 +andrew 0 +tomkin 0 +hugo 0 +patterson 0 +brian 0 +bershad 0 +garth 0 +gibson 0 +sigop 0 +usenix 0 +associ 0 +symposium 0 +onoper 0 +design 0 +implemen 0 +near 0 +optim 0 +ieeesymposium 0 +foundat 0 +longer 0 +version 0 +integr 0 +cach 0 +page 0 +extend 0 +abstract 0 +proceed 0 +sigmetr 0 +confer 0 +measurementand 0 +model 0 +probabilist 0 +verifi 0 +matrix 0 +product 0 +usingo 0 +squar 0 +time 0 +base 0 +random 0 +bit 0 +rakesh 0 +kumar 0 +sinha 0 +inform 0 +process 0 +letter 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..fa6dd35b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,49 @@ +travi 1 +time 1 +craig 1 +washington 1 +comput 1 +scienc 1 +real 1 +system 1 +work 1 +home 0 +page 0 +engin 0 +depart 0 +univers 0 +seattl 0 +research 0 +interest 0 +mechan 0 +predict 0 +cach 0 +restor 0 +queu 0 +spin 0 +lock 0 +arctic 0 +submarin 0 +current 0 +cours 0 +take 0 +quarter 0 +dissert 0 +consum 0 +side 0 +project 0 +half 0 +esca 0 +corpor 0 +help 0 +keep 0 +volvo 0 +run 0 +press 0 +latest 0 +motor 0 +pool 0 +statu 0 +understand 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..e026418d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,12 @@ +dean 1 +tullsen 1 +home 1 +page 1 +biograph 0 +inform 0 +research 0 +interest 0 +bibliographi 0 +download 0 +resumemi 0 +hobbi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..bb676cb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,235 @@ +mike 2 +work 2 +softwar 1 +time 1 +oopsla 1 +washington 1 +year 1 +wrote 1 +univers 1 +start 1 +student 1 +smithsonian 1 +program 1 +thank 1 +pari 1 +uwin 1 +also 1 +present 1 +symposium 1 +michael 1 +harvard 1 +research 1 +comput 1 +engin 1 +seattl 1 +vanhilst 1 +come 1 +programm 1 +hardwar 1 +part 1 +locat 1 +learn 1 +bill 1 +continu 1 +group 1 +data 1 +angela 1 +languag 1 +summer 1 +schedul 1 +commun 1 +recent 1 +object 1 +particip 1 +citi 1 +enjoi 1 +novemb 1 +vanhilstmichael 0 +vanhilstvanhilst 0 +edumvh 0 +eduperson 0 +scienc 0 +depart 0 +usaclick 0 +send 0 +email 0 +messag 0 +personalmik 0 +graduat 0 +hopefulli 0 +littl 0 +luck 0 +finish 0 +around 0 +theend 0 +winter 0 +quarter 0 +immedi 0 +prior 0 +udub 0 +contractor 0 +atibm 0 +motif 0 +widget 0 +user 0 +unterfac 0 +sdata 0 +explor 0 +fix 0 +maintainingcomput 0 +astrophys 0 +observatori 0 +within 0 +debug 0 +could 0 +convinc 0 +realli 0 +correctli 0 +stai 0 +call 0 +saoimagewhich 0 +us 0 +lot 0 +astronom 0 +look 0 +imag 0 +saoimag 0 +gnudistribut 0 +would 0 +like 0 +wyatt 0 +eric 0 +mandel 0 +schwarz 0 +doug 0 +minkfor 0 +guid 0 +project 0 +mention 0 +countless 0 +other 0 +contribut 0 +took 0 +seismologistsin 0 +franc 0 +acquisit 0 +calibr 0 +analysi 0 +truli 0 +wonder 0 +wife 0 +french 0 +class 0 +theallianc 0 +francais 0 +nativ 0 +colombiain 0 +south 0 +america 0 +front 0 +studentsbrows 0 +base 0 +talent 0 +staff 0 +folk 0 +brought 0 +pine 0 +special 0 +shirei 0 +design 0 +traci 0 +stenvik 0 +screen 0 +librari 0 +machin 0 +taught 0 +begin 0 +extens 0 +motiv 0 +frommicrosoft 0 +product 0 +support 0 +sacrif 0 +paper 0 +theintern 0 +technolog 0 +advanc 0 +isota 0 +confer 0 +orient 0 +system 0 +applic 0 +theacm 0 +sigsoft 0 +foundat 0 +poster 0 +made 0 +subject 0 +workshop 0 +doctor 0 +demo 0 +uist 0 +steve 0 +earlier 0 +life 0 +earn 0 +degre 0 +inarchitectur 0 +wooden 0 +kind 0 +planningfrom 0 +mitand 0 +develop 0 +director 0 +forth 0 +grinnel 0 +iowa 0 +thing 0 +differ 0 +skill 0 +visualdesign 0 +problem 0 +solv 0 +valu 0 +still 0 +get 0 +talk 0 +chri 0 +alexand 0 +activ 0 +chapter 0 +associ 0 +improv 0 +ti 0 +larg 0 +small 0 +compani 0 +area 0 +hike 0 +cross 0 +countri 0 +ski 0 +sail 0 +andkayak 0 +swim 0 +lake 0 +bronson 0 +free 0 +taken 0 +marco 0 +harold 0 +sebastien 0 +hilst 0 +born 0 +post 0 +pictur 0 +soon 0 +anoth 0 +photo 0 +scanner 0 +visit 0 +sinc 0 +last 0 +modifi 0 +fridai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..4b88d151 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,103 @@ +home 1 +link 1 +collect 1 +hello 1 +washington 1 +vass 1 +also 1 +expand 1 +staff 1 +project 1 +data 1 +graph 1 +page 1 +vassilylong 0 +live 0 +start 0 +realli 0 +person 0 +best 0 +linki 0 +come 0 +thu 0 +http 0 +us 0 +document 0 +shortest 0 +write 0 +young 0 +quit 0 +download 0 +fewfil 0 +sinc 0 +thisstuff 0 +select 0 +quotesrussian 0 +pagesvari 0 +linksguid 0 +html 0 +formsoth 0 +pageuw 0 +pagencsa 0 +mosaic 0 +pagerec 0 +join 0 +cecilproject 0 +cecil 0 +cool 0 +pure 0 +object 0 +orient 0 +languag 0 +andvortex 0 +even 0 +cooler 0 +compil 0 +infrastructur 0 +befast 0 +mayb 0 +paper 0 +written 0 +member 0 +design 0 +implement 0 +themvi 0 +system 0 +assist 0 +access 0 +ourdepartment 0 +room 0 +visitor 0 +databas 0 +recent 0 +beenupgrad 0 +peopl 0 +qual 0 +thezpl 0 +languageto 0 +handl 0 +irregular 0 +structur 0 +repres 0 +anddynam 0 +repartit 0 +arrai 0 +myqual 0 +writeup 0 +short 0 +overview 0 +check 0 +theslidesfrom 0 +present 0 +slide 0 +toresourc 0 +relat 0 +eduobject 0 +mirror 0 +closer 0 +appear 0 +pastor 0 +vybrasyvalsya 0 +okna 0 +pyatyi 0 +deystvov 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..f8738dd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,55 @@ +washington 2 +virgil 1 +comput 1 +bourassa 1 +engin 1 +scienc 1 +theunivers 1 +seattl 1 +work 1 +univers 1 +home 0 +page 0 +uwvirgil 0 +evan 0 +bourassavirgil 0 +student 0 +depart 0 +research 0 +interestsinclud 0 +oper 0 +system 0 +architectur 0 +join 0 +boeingin 0 +scientist 0 +scienceorgan 0 +inform 0 +support 0 +servic 0 +divis 0 +bellevu 0 +receiv 0 +electr 0 +arizonast 0 +temp 0 +arizona 0 +electricalengin 0 +accesswhat 0 +interest 0 +expertis 0 +resum 0 +patent 0 +invent 0 +public 0 +present 0 +profession 0 +histori 0 +educ 0 +achiev 0 +recommend 0 +letter 0 +statusoccasion 0 +updat 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..52bcb035 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,201 @@ +avail 2 +univers 1 +work 1 +comput 1 +washington 1 +interest 1 +distribut 1 +system 1 +high 1 +scienc 1 +also 1 +safeti 1 +year 1 +vivek 1 +page 1 +current 1 +bellcor 1 +applic 1 +group 1 +simul 1 +last 1 +seattl 1 +cricket 1 +ratan 1 +home 1 +graduat 1 +depart 1 +research 1 +issu 1 +develop 1 +look 1 +recent 1 +fault 1 +detail 1 +project 1 +softwar 1 +found 1 +involv 1 +rsml 1 +public 1 +angelo 1 +like 1 +plai 1 +game 1 +learn 1 +danc 1 +model 1 +unit 1 +nation 1 +chapter 1 +particular 0 +student 0 +academ 0 +leav 0 +scientistat 0 +morristown 0 +researchwork 0 +distributedsoftwar 0 +simpli 0 +highli 0 +continu 0 +presenc 0 +failur 0 +toolset 0 +anatida 0 +provid 0 +mechan 0 +adher 0 +corba 0 +standard 0 +activ 0 +replic 0 +scheme 0 +indc 0 +effort 0 +focuss 0 +enhanc 0 +toler 0 +cell 0 +directori 0 +servic 0 +foundher 0 +integrationof 0 +technolog 0 +server 0 +murphi 0 +methodolog 0 +head 0 +bydr 0 +nanci 0 +leveson 0 +much 0 +automat 0 +tree 0 +gener 0 +fromrequir 0 +specif 0 +written 0 +well 0 +languag 0 +list 0 +person 0 +inform 0 +born 0 +brought 0 +india 0 +eight 0 +undergradu 0 +attend 0 +state 0 +wesleyan 0 +middletown 0 +receiv 0 +physic 0 +math 0 +right 0 +came 0 +purus 0 +studi 0 +tenni 0 +whenev 0 +squash 0 +lesserext 0 +racquetbal 0 +suffic 0 +ardent 0 +folow 0 +particip 0 +ultra 0 +email 0 +mani 0 +follow 0 +exploit 0 +superson 0 +marin 0 +cowboi 0 +half 0 +taken 0 +keen 0 +ballroom 0 +waltz 0 +foxtrot 0 +chacha 0 +rhumba 0 +tango 0 +swing 0 +west 0 +coast 0 +pleas 0 +occasion 0 +dabbl 0 +mambo 0 +area 0 +best 0 +place 0 +center 0 +us 0 +band 0 +session 0 +everi 0 +saturdai 0 +intern 0 +educ 0 +organ 0 +confer 0 +held 0 +throughout 0 +topic 0 +restructur 0 +reform 0 +part 0 +secur 0 +council 0 +ecosoc 0 +world 0 +bank 0 +rapidpopul 0 +growth 0 +nuclear 0 +prolifer 0 +read 0 +poetri 0 +mirza 0 +ghalib 0 +centuryindian 0 +poet 0 +english 0 +literatur 0 +especi 0 +romant 0 +victorian 0 +period 0 +link 0 +obligatori 0 +collect 0 +sitesthat 0 +tend 0 +visit 0 +often 0 +engin 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..3c6c7fbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,30 @@ +washington 1 +geoff 1 +univers 1 +thesi 1 +window 1 +voelker 1 +skywhoi 0 +graduat 0 +student 0 +inseattl 0 +whati 0 +master 0 +wireless 0 +mobil 0 +comput 0 +design 0 +andbuild 0 +system 0 +call 0 +mobisa 0 +current 0 +avoid 0 +settl 0 +topic 0 +wherechateau 0 +guggenheim 0 +annex 0 +washingtonseattl 0 +look 0 +emac 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..7fe1cf49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,55 @@ +washington 1 +list 1 +home 1 +waynew 1 +look 1 +system 1 +thing 1 +wayn 0 +comput 0 +scienc 0 +engin 0 +depart 0 +univers 0 +seattl 0 +stuff 0 +current 0 +memori 0 +perform 0 +particular 0 +differentmemori 0 +organ 0 +investig 0 +work 0 +beingdon 0 +jean 0 +loup 0 +baer 0 +also 0 +interpret 0 +other 0 +denni 0 +geoff 0 +alec 0 +rightnow 0 +littl 0 +rocki 0 +actual 0 +earli 0 +version 0 +paper 0 +keep 0 +jump 0 +alpha 0 +interest 0 +place 0 +fish 0 +cool 0 +site 0 +howev 0 +peoplewho 0 +peopl 0 +well 0 +test 0 +testwayn 0 +wong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..4fc209dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,13 @@ +chan 1 +home 1 +time 1 +william 0 +pagewilliam 0 +pagei 0 +spend 0 +hell 0 +spare 0 +hang 0 +heaven 0 +wchan 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..f33ccbb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,140 @@ +weld 2 +comput 1 +plan 1 +scienc 1 +univers 1 +intellig 1 +offic 1 +research 1 +aaai 1 +agent 1 +daniel 1 +receiv 1 +artifici 1 +award 1 +inform 1 +paper 1 +washington 1 +interest 1 +current 1 +softwar 1 +base 1 +associ 0 +professor 0 +engineeringat 0 +ofwashington 0 +bachelor 0 +degre 0 +biochemistri 0 +yale 0 +land 0 +presidenti 0 +young 0 +investig 0 +naval 0 +younginvestig 0 +theadvisori 0 +board 0 +journal 0 +airesearch 0 +guest 0 +editor 0 +edit 0 +report 0 +role 0 +ofintellig 0 +system 0 +nation 0 +infrastructur 0 +isco 0 +program 0 +chair 0 +publish 0 +book 0 +scad 0 +technic 0 +person 0 +data 0 +reach 0 +sieg 0 +hall 0 +phone 0 +work 0 +home 0 +mail 0 +dept 0 +engin 0 +seattlewa 0 +specif 0 +exampl 0 +group 0 +support 0 +ucpop 0 +planner 0 +us 0 +almost 0 +hundr 0 +sitesworldwid 0 +mani 0 +avail 0 +electron 0 +arehi 0 +favorit 0 +repres 0 +sens 0 +action 0 +middl 0 +ground 0 +revisit 0 +gather 0 +august 0 +control 0 +aip 0 +ascal 0 +comparison 0 +shop 0 +world 0 +wide 0 +januari 0 +softbot 0 +interfac 0 +internet 0 +cacm 0 +juli 0 +anintroduct 0 +least 0 +commit 0 +magazin 0 +winter 0 +select 0 +exhaustivelist 0 +recreat 0 +absent 0 +foundat 0 +cafe 0 +allegro 0 +stormymountain 0 +climb 0 +past 0 +enjoi 0 +travel 0 +theworld 0 +like 0 +found 0 +plai 0 +twin 0 +boi 0 +adam 0 +galen 0 +invit 0 +visit 0 +galleri 0 +pacif 0 +northwest 0 +desert 0 +wilder 0 +photograph 0 +also 0 +illustr 0 +stori 0 +morocco 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..d76d6a63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,10 @@ +wendi 1 +belluomini 1 +washington 0 +graduat 0 +master 0 +current 0 +work 0 +univ 0 +utah 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..b82987b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,55 @@ +comput 1 +scienc 1 +engin 1 +washington 1 +thedepart 1 +compil 1 +system 1 +wilson 0 +hsiehwilson 0 +hsiehi 0 +postdoc 0 +theunivers 0 +inseattl 0 +member 0 +thespin 0 +project 0 +receiv 0 +electr 0 +sciencein 0 +theschool 0 +engineeringatmit 0 +work 0 +thelaboratori 0 +advisor 0 +werefran 0 +kaashoekandbil 0 +weihl 0 +research 0 +parallel 0 +myresearch 0 +interest 0 +interact 0 +among 0 +programminglanguag 0 +runtim 0 +oper 0 +architectur 0 +select 0 +publicationsselect 0 +linksperson 0 +interestswilson 0 +hsieh 0 +depart 0 +univers 0 +seattl 0 +offic 0 +sieg 0 +move 0 +phone 0 +numberha 0 +chang 0 +voic 0 +whsieh 0 +public 0 +keyoctob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..efa6ca0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,83 @@ +wolman 2 +washington 1 +network 1 +alec 1 +home 1 +scienc 1 +seattl 1 +current 1 +research 1 +perform 1 +interpret 1 +usenix 1 +voelker 1 +page 0 +wolmanwolman 0 +eduworkcomput 0 +engin 0 +departmentunivers 0 +washingtonbox 0 +graduat 0 +student 0 +thecomput 0 +departmentat 0 +univers 0 +offic 0 +isroom 0 +thechateau 0 +gradual 0 +school 0 +work 0 +fordigit 0 +equip 0 +corp 0 +cambridg 0 +interest 0 +includ 0 +oper 0 +system 0 +architectur 0 +recent 0 +project 0 +scalabl 0 +etch 0 +binari 0 +instrument 0 +optim 0 +executablesrocki 0 +performanceon 0 +line 0 +paper 0 +firewal 0 +applic 0 +relai 0 +trees 0 +summer 0 +latenc 0 +analysi 0 +thekkath 0 +winter 0 +structur 0 +romer 0 +wong 0 +baer 0 +bershad 0 +levi 0 +appear 0 +asplo 0 +hungri 0 +otter 0 +fixha 0 +strang 0 +idea 0 +nervou 0 +habit 0 +realli 0 +plai 0 +guitar 0 +link 0 +hallwolman 0 +diseasewolman 0 +pressur 0 +treat 0 +lumber 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..5dcf759b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,85 @@ +baer 1 +perform 1 +parallel 1 +simul 1 +comput 1 +engin 1 +page 1 +washington 1 +evalu 1 +trace 1 +driven 1 +xiaohan 1 +scienc 1 +interest 1 +architectur 1 +distribut 1 +system 1 +paper 1 +confer 1 +multiprocessor 1 +proceed 1 +optimist 1 +zhang 1 +xqin 0 +depart 0 +univers 0 +seattl 0 +offic 0 +sieg 0 +phone 0 +year 0 +graduat 0 +student 0 +work 0 +jean 0 +loup 0 +research 0 +includ 0 +method 0 +model 0 +short 0 +term 0 +goal 0 +school 0 +soon 0 +possibl 0 +cluster 0 +base 0 +submit 0 +explicit 0 +communicationprimit 0 +cach 0 +coher 0 +appear 0 +hpca 0 +compar 0 +studi 0 +conserv 0 +award 0 +symposium 0 +tech 0 +report 0 +dept 0 +univ 0 +implement 0 +intern 0 +process 0 +graph 0 +toolfor 0 +monitor 0 +visual 0 +basedmultiprocessor 0 +nalluri 0 +journal 0 +june 0 +predict 0 +processingon 0 +numa 0 +ieee 0 +tran 0 +softwar 0 +stuff 0 +photo 0 +chinaread 0 +chinesesearch 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..5a53944b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,71 @@ +washington 1 +info 1 +link 1 +yasushi 1 +project 1 +desktop 1 +index 1 +spin 1 +saitoyasushi 0 +saito 0 +second 0 +year 0 +graduat 0 +student 0 +atdepart 0 +comput 0 +scienc 0 +engin 0 +univers 0 +seattl 0 +current 0 +workingwith 0 +brian 0 +bershad 0 +thespin 0 +address 0 +andperson 0 +meta 0 +metacrawl 0 +yahoo 0 +refer 0 +alta 0 +vista 0 +lyco 0 +archi 0 +tech 0 +research 0 +intern 0 +document 0 +modula 0 +time 0 +schedul 0 +survei 0 +oper 0 +system 0 +transact 0 +servic 0 +qual 0 +sightse 0 +japanes 0 +random 0 +javascript 0 +apprentic 0 +page 0 +us 0 +linux 0 +connect 0 +gatewai 0 +japan 0 +perl 0 +patch 0 +touch 0 +type 0 +trainer 0 +dvorak 0 +lesson 0 +text 0 +want 0 +finger 0 +talk 0 +trycanva 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..fea4663b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,71 @@ +zamir 1 +home 1 +washington 1 +depart 1 +seattl 1 +interest 1 +internet 1 +document 1 +search 1 +pictur 1 +oren 0 +pageoren 0 +page 0 +comput 0 +scienc 0 +engineeringunivers 0 +washingtonbox 0 +offic 0 +chateau 0 +edui 0 +isra 0 +graduat 0 +student 0 +computersci 0 +engin 0 +univers 0 +myundergradu 0 +degre 0 +physic 0 +mathemat 0 +hebrewunivers 0 +jerusalem 0 +israel 0 +field 0 +artifici 0 +intellig 0 +softwareengin 0 +current 0 +work 0 +line 0 +cluster 0 +algorithmsfor 0 +retriev 0 +basic 0 +idea 0 +help 0 +userwith 0 +result 0 +hundr 0 +worki 0 +done 0 +part 0 +metacrawl 0 +parallel 0 +servic 0 +along 0 +orenetzioni 0 +erik 0 +selberg 0 +resum 0 +avail 0 +thing 0 +like 0 +dive 0 +sinai 0 +jeeptour 0 +ski 0 +last 0 +raft 0 +trip 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..741cd118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,126 @@ +comput 2 +architectur 1 +also 1 +baer 1 +scienc 1 +research 1 +parallel 1 +system 1 +ieee 1 +chairman 1 +cach 1 +professor 1 +engin 1 +grenobl 1 +ucla 1 +author 1 +serv 1 +fellow 1 +journal 1 +distribut 1 +internationalsymposium 1 +project 1 +page 1 +protocol 1 +cluster 1 +prefetch 1 +simul 1 +titl 0 +jean 0 +loup 0 +professorand 0 +adjunct 0 +electr 0 +receiv 0 +diplom 0 +ingnieur 0 +electricalengin 0 +doctorat 0 +cycl 0 +theuniversit 0 +franc 0 +prior 0 +join 0 +univers 0 +washington 0 +laboratoir 0 +decalcul 0 +universit 0 +member 0 +digit 0 +technologygroup 0 +present 0 +interest 0 +anddistribut 0 +process 0 +coauthor 0 +paper 0 +thesearea 0 +textbook 0 +press 0 +distinguishedvisitor 0 +nation 0 +lectur 0 +guggenheim 0 +editor 0 +languag 0 +asprogram 0 +intern 0 +confer 0 +parallelprocess 0 +program 0 +gener 0 +ofth 0 +current 0 +chair 0 +sigarch 0 +eighteen 0 +student 0 +complet 0 +dissert 0 +professorba 0 +direct 0 +twelv 0 +work 0 +industri 0 +laboratoriesand 0 +inacademia 0 +although 0 +year 0 +hashad 0 +difficulti 0 +retain 0 +french 0 +accent 0 +cours 0 +recent 0 +look 0 +involv 0 +coher 0 +improv 0 +singl 0 +perform 0 +softwar 0 +primit 0 +appear 0 +hpca 0 +uniprocessor 0 +hardwar 0 +comparisonwith 0 +block 0 +asplo 0 +multiprocessor 0 +isca 0 +impact 0 +specul 0 +execut 0 +denni 0 +home 0 +andisca 0 +trace 0 +driven 0 +conserv 0 +approach 0 +icpp 0 +optimisticapproach 0 +comparison 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..938e23fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,118 @@ +chamber 1 +craig 1 +comput 1 +scienc 1 +system 1 +object 1 +orient 1 +languag 1 +incorpor 1 +investig 1 +vortex 1 +project 1 +cecil 1 +static 1 +optim 1 +member 1 +spin 1 +extens 1 +applic 1 +washington 1 +assist 0 +professor 0 +join 0 +faculti 0 +receiv 0 +degre 0 +stanford 0 +research 0 +interest 0 +design 0 +implementationof 0 +advanc 0 +program 0 +express 0 +programminglanguag 0 +effici 0 +implement 0 +support 0 +programmingenviron 0 +current 0 +languagesand 0 +lead 0 +ceciland 0 +pure 0 +languageserv 0 +vehicl 0 +multi 0 +method 0 +type 0 +modul 0 +featur 0 +compilersystem 0 +intra 0 +andinterprocedur 0 +analys 0 +profil 0 +guid 0 +withfront 0 +end 0 +modula 0 +java 0 +previous 0 +chamberswa 0 +self 0 +also 0 +spinproject 0 +oper 0 +microkernel 0 +whichsupport 0 +dynam 0 +adapt 0 +interfac 0 +implementationsund 0 +direct 0 +control 0 +still 0 +maintain 0 +systemintegr 0 +isol 0 +util 0 +dialect 0 +themodula 0 +pointer 0 +safe 0 +kernel 0 +spinalso 0 +reli 0 +dynamiccompil 0 +achiev 0 +high 0 +perform 0 +despit 0 +fine 0 +grainedextens 0 +click 0 +herefor 0 +inform 0 +undergradu 0 +graduat 0 +level 0 +researchproject 0 +area 0 +contact 0 +informationprof 0 +chambersdepart 0 +engineeringunivers 0 +washingtonbox 0 +seattl 0 +mail 0 +requir 0 +street 0 +address 0 +sieg 0 +hall 0 +room 0 +last 0 +updat 0 +april 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..d7e24768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,116 @@ +carl 1 +ebel 1 +comput 1 +project 1 +scienc 1 +washington 1 +triptych 1 +interest 1 +system 1 +chao 1 +fpga 1 +home 0 +page 0 +ebelingdepart 0 +engin 0 +univers 0 +ofwashington 0 +seattl 0 +voic 0 +mail 0 +offic 0 +sieg 0 +hall 0 +room 0 +associ 0 +professor 0 +physic 0 +wheatoncolleg 0 +southern 0 +illinoisunivers 0 +carnegi 0 +mellonunivers 0 +join 0 +research 0 +fall 0 +categori 0 +vlsiarchitectur 0 +aid 0 +design 0 +digit 0 +haswork 0 +number 0 +vlsi 0 +includ 0 +hitech 0 +chessmachin 0 +apex 0 +graphic 0 +chip 0 +draw 0 +spline 0 +curv 0 +andsurfac 0 +field 0 +programm 0 +gate 0 +arrai 0 +current 0 +hei 0 +involv 0 +build 0 +multicomput 0 +routingnetwork 0 +focu 0 +method 0 +optim 0 +theperform 0 +circuit 0 +us 0 +level 0 +sensit 0 +latch 0 +placementand 0 +rout 0 +algorithm 0 +particularli 0 +teachingspr 0 +advanc 0 +logic 0 +designoffic 0 +hour 0 +mondai 0 +thursdai 0 +travel 0 +april 0 +fccm 0 +napamai 0 +burlington 0 +chicagojun 0 +vegasresearch 0 +northwest 0 +laboratori 0 +integr 0 +router 0 +high 0 +densiti 0 +architectur 0 +public 0 +journal 0 +articl 0 +confer 0 +workshop 0 +paper 0 +graduat 0 +student 0 +soha 0 +hassoun 0 +neil 0 +mckenzi 0 +darren 0 +cronquist 0 +paul 0 +franklin 0 +amara 0 +galleryelan 0 +galleryebel 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..4683cc29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,39 @@ +page 1 +seattl 1 +link 1 +uncertainti 1 +inform 1 +schedul 1 +steve 0 +hanksunivers 0 +washingtondepart 0 +comput 0 +scienc 0 +design 0 +agent 0 +architecturesai 0 +magazin 0 +seriou 0 +home 0 +spring 0 +confer 0 +group 0 +restaur 0 +symphoni 0 +wine 0 +opera 0 +server 0 +edita 0 +gruberova 0 +photo 0 +carlo 0 +maria 0 +giulini 0 +discographi 0 +sumac 0 +tenni 0 +new 0 +hank 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..2e0cf0cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,71 @@ +spent 1 +professor 1 +engin 1 +degre 1 +univers 1 +year 1 +divis 1 +comput 1 +group 1 +system 1 +titl 0 +alistair 0 +holden 0 +electr 0 +origin 0 +highland 0 +scotland 0 +receivedhi 0 +glasgow 0 +withth 0 +british 0 +broadcast 0 +corpor 0 +graduateapprentic 0 +research 0 +yale 0 +edison 0 +fellowship 0 +phddegre 0 +washington 0 +dissert 0 +learningin 0 +artifici 0 +intellig 0 +interest 0 +began 0 +take 0 +coursefrom 0 +colin 0 +cherri 0 +imperi 0 +colleg 0 +london 0 +thebbc 0 +initi 0 +scienc 0 +program 0 +theuw 0 +time 0 +faculti 0 +mostli 0 +math 0 +departmentsform 0 +within 0 +graduat 0 +school 0 +current 0 +work 0 +applic 0 +knowledg 0 +base 0 +verif 0 +expert 0 +integr 0 +symbol 0 +neural 0 +netmethodolog 0 +speech 0 +understand 0 +aid 0 +design 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..b9c9651e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,116 @@ +research 1 +societi 1 +scienc 1 +prize 1 +berkelei 1 +univers 1 +institut 1 +mathemat 1 +lectur 1 +comput 1 +problem 1 +professor 1 +nation 1 +academi 1 +american 1 +board 1 +karp 1 +washington 1 +award 1 +member 1 +advanc 1 +oper 1 +america 1 +manag 1 +program 1 +john 1 +neumann 1 +presentmemb 1 +complex 1 +random 1 +algorithm 1 +travel 1 +salesman 1 +dick 0 +karprichard 0 +karpprofessor 0 +ofcomputersci 0 +engin 0 +andadjunct 0 +ofmolecularbiotechnologyunivers 0 +ofwashington 0 +eduaward 0 +membershipsn 0 +medal 0 +babbag 0 +fellow 0 +ture 0 +sciencesmemb 0 +engineeringfellow 0 +art 0 +sciencesfellow 0 +associ 0 +sciencedistinguish 0 +teach 0 +academ 0 +senat 0 +class 0 +chair 0 +berkeleylanchest 0 +fulkerson 0 +theori 0 +faculti 0 +hermann 0 +weyl 0 +studi 0 +industri 0 +appliedmathemat 0 +miller 0 +honorari 0 +doctor 0 +georgetown 0 +massachusett 0 +technion 0 +pennsylvania 0 +advisori 0 +profession 0 +forsoci 0 +respons 0 +governor 0 +weizmann 0 +truste 0 +intern 0 +scienceinstitut 0 +presentselect 0 +public 0 +combinator 0 +turingaward 0 +commun 0 +construct 0 +perfect 0 +match 0 +upfal 0 +wigderson 0 +combinatorica 0 +probabilist 0 +analysi 0 +partit 0 +plane 0 +ofoper 0 +theoret 0 +improv 0 +effici 0 +fornetwork 0 +flow 0 +edmond 0 +journal 0 +theacm 0 +reduc 0 +among 0 +combinatori 0 +plenum 0 +press 0 +minimum 0 +spanningtre 0 +part 0 +held 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..ed502d5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,250 @@ +comput 2 +lazowska 2 +univers 2 +scienc 2 +committe 2 +washington 2 +chair 2 +inform 2 +board 1 +technolog 1 +member 1 +research 1 +program 1 +thedepart 1 +advisori 1 +system 1 +home 1 +lectur 1 +universityof 1 +berkelei 1 +faculti 1 +serv 1 +nation 1 +review 1 +perform 1 +concern 1 +engin 1 +ahalf 1 +centuri 1 +exponentialprogress 1 +annualfaculti 1 +vicepresid 1 +gore 1 +speech 1 +eniac 1 +thanniversari 1 +celebr 1 +california 1 +invent 1 +chines 1 +cook 1 +nathanmyhrvold 1 +joinsedlazowska 1 +theuwcs 1 +trip 1 +memori 1 +graduat 1 +student 1 +seem 1 +director 1 +depart 1 +industri 1 +council 1 +person 1 +award 1 +select 1 +complet 1 +recent 1 +machineri 1 +testimonyto 1 +page 1 +grade 1 +congress 0 +talk 0 +like 0 +georgejetson 0 +support 0 +polici 0 +appropri 0 +forfr 0 +flintston 0 +lane 0 +professor 0 +mostlywearsti 0 +push 0 +paper 0 +rack 0 +frequent 0 +flier 0 +mile 0 +pick 0 +onthi 0 +mbquicktim 0 +healso 0 +host 0 +lot 0 +visitor 0 +surpris 0 +number 0 +havefunnynos 0 +researchassoci 0 +includ 0 +essenti 0 +allgradu 0 +laboratoriesin 0 +field 0 +ofcra 0 +govern 0 +affair 0 +foundat 0 +advisorycommitte 0 +andengin 0 +scomputersci 0 +telecommun 0 +technic 0 +formicrosoft 0 +personnationalsemiconductor 0 +academicadvisori 0 +ofdata 0 +corpor 0 +scientif 0 +forcabl 0 +hows 0 +ventur 0 +cascadia 0 +fund 0 +belong 0 +stand 0 +committeesfor 0 +eecsat 0 +andth 0 +atstanford 0 +virginia 0 +hongkong 0 +ture 0 +servic 0 +councilpanel 0 +multi 0 +agencyhigh 0 +computingand 0 +commun 0 +brook 0 +sutherland 0 +andha 0 +examinersfor 0 +record 0 +examin 0 +test 0 +sigmetr 0 +associ 0 +sspecial 0 +interest 0 +group 0 +softwar 0 +chairof 0 +symposium 0 +oper 0 +principl 0 +andeditor 0 +ieee 0 +transact 0 +addit 0 +servinga 0 +onacadem 0 +thecommitte 0 +deanship 0 +colleg 0 +artsand 0 +forth 0 +molecular 0 +biotechnolog 0 +amemb 0 +deanof 0 +deliv 0 +theunivers 0 +annual 0 +fellowof 0 +associationfor 0 +theinstitut 0 +electr 0 +andelectron 0 +seventeenph 0 +studentshav 0 +degre 0 +work 0 +miscellan 0 +link 0 +integratedoverview 0 +region 0 +also 0 +apersuas 0 +player 0 +version 0 +intend 0 +forloc 0 +consumpt 0 +theimpact 0 +perspect 0 +uwcs 0 +profession 0 +master 0 +persuas 0 +playertopten 0 +reason 0 +major 0 +csebuild 0 +project 0 +abbrevi 0 +cvcomputingresearch 0 +drive 0 +informationtechnolog 0 +forwardmassi 0 +goldmanreport 0 +alleg 0 +cseph 0 +product 0 +issu 0 +flaw 0 +data 0 +medianyear 0 +confer 0 +boardstudi 0 +doctor 0 +think 0 +driver 0 +highwai 0 +saturdayseminar 0 +novemb 0 +houseappropri 0 +april 0 +hous 0 +hpcc 0 +octob 0 +februari 0 +interestinghom 0 +sometim 0 +demo 0 +purpos 0 +odeto 0 +execut 0 +vice 0 +presid 0 +tallman 0 +trask 0 +departsfor 0 +duke 0 +lanelazowska 0 +down 0 +famili 0 +pagedirect 0 +houseshilshol 0 +aquat 0 +club 0 +pagerec 0 +discoveredreview 0 +poetryfing 0 +scheduleinform 0 +offic 0 +reflector 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..5948be90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,94 @@ +professor 1 +univers 1 +system 1 +shaw 1 +engin 1 +stanford 1 +scienc 1 +comput 1 +research 1 +fulbright 1 +associ 1 +real 1 +time 1 +softwar 1 +book 1 +committe 1 +dissert 1 +includ 1 +half 1 +hobbi 1 +titl 0 +alan 0 +graduat 0 +bachelor 0 +sdegre 0 +physic 0 +toronto 0 +amast 0 +mathemat 0 +incomput 0 +addit 0 +facultyappoint 0 +washington 0 +start 0 +hasbeen 0 +assist 0 +cornel 0 +visit 0 +scholar 0 +pari 0 +guest 0 +informat 0 +zurich 0 +atth 0 +linear 0 +acceler 0 +center 0 +theibm 0 +corpor 0 +current 0 +interest 0 +specif 0 +method 0 +publicationsinclud 0 +textbook 0 +oper 0 +softwareengin 0 +introductori 0 +text 0 +andan 0 +edit 0 +document 0 +prepar 0 +serv 0 +memberof 0 +editori 0 +member 0 +sciencescreen 0 +award 0 +associateeditor 0 +journal 0 +editor 0 +ieee 0 +transact 0 +among 0 +thing 0 +supervis 0 +mani 0 +these 0 +project 0 +fifteen 0 +distinguish 0 +former 0 +student 0 +academ 0 +posit 0 +work 0 +live 0 +good 0 +food 0 +trumpet 0 +hike 0 +bike 0 +tenni 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..101c506a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,97 @@ +parallel 1 +snyder 1 +professor 1 +univers 1 +serv 1 +research 1 +program 1 +chip 1 +editor 1 +system 1 +scienc 1 +receiv 1 +visit 1 +faculti 1 +architectur 1 +complet 1 +project 1 +journal 1 +nation 1 +committe 1 +comput 1 +numer 1 +direct 1 +chair 1 +titl 0 +lawrenc 0 +bachelor 0 +sdegre 0 +iowa 0 +mathemat 0 +econom 0 +andin 0 +carnegi 0 +mellon 0 +computersci 0 +scholar 0 +washington 0 +join 0 +perman 0 +onth 0 +yale 0 +purdu 0 +scholarat 0 +harvard 0 +rang 0 +proof 0 +theundecid 0 +properti 0 +design 0 +developmentof 0 +singl 0 +cmo 0 +microprocessor 0 +quarter 0 +hors 0 +hecreat 0 +configur 0 +highli 0 +thepok 0 +environ 0 +inventor 0 +chaoticrout 0 +follow 0 +blue 0 +nowprincip 0 +investig 0 +orca 0 +nwli 0 +associ 0 +computerand 0 +ofth 0 +area 0 +ieee 0 +transact 0 +anddistribut 0 +foundationadvisori 0 +divis 0 +particip 0 +advisorycommitte 0 +futur 0 +andcomput 0 +polici 0 +distinguish 0 +doctoraldissert 0 +award 0 +select 0 +first 0 +symposium 0 +algorithmsand 0 +addit 0 +dozen 0 +student 0 +doctor 0 +degreesund 0 +guid 0 +master 0 +seniorproject 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..0855a8ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,81 @@ +system 1 +comput 1 +somani 1 +professor 1 +design 1 +network 1 +parallel 1 +arun 1 +electr 1 +engin 1 +develop 1 +indian 1 +toler 1 +interconnect 1 +architectur 1 +proteu 1 +tenni 1 +earn 0 +msee 0 +degre 0 +mcgill 0 +univers 0 +montreal 0 +canada 0 +respect 0 +prior 0 +work 0 +scientif 0 +offic 0 +govt 0 +india 0 +delhi 0 +period 0 +anti 0 +submarin 0 +warfar 0 +navi 0 +research 0 +interest 0 +area 0 +offault 0 +algorithm 0 +current 0 +involv 0 +three 0 +major 0 +project 0 +high 0 +integr 0 +address 0 +issu 0 +relat 0 +tocach 0 +memori 0 +redund 0 +evalu 0 +tool 0 +congest 0 +control 0 +fault 0 +broadband 0 +multiprocessor 0 +autom 0 +classif 0 +object 0 +base 0 +generalizedenhanc 0 +hypercub 0 +reconfigur 0 +explor 0 +coars 0 +grain 0 +like 0 +cook 0 +food 0 +hike 0 +plai 0 +bridg 0 +tabl 0 +inform 0 +dpcnl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..b4010fd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,173 @@ +comput 2 +serv 2 +tanimoto 1 +univers 1 +chair 1 +pattern 1 +visit 1 +confer 1 +imag 1 +visual 1 +ieee 1 +professor 1 +franc 1 +research 1 +analysi 1 +program 1 +intellig 1 +process 1 +languag 1 +recognit 1 +steven 1 +home 1 +page 1 +scienc 1 +adjunct 1 +electricalengin 1 +anda 1 +linkp 1 +sweden 1 +scientist 1 +machin 1 +june 1 +addit 1 +parallel 1 +artifici 1 +mathemat 1 +edit 1 +vision 1 +lisp 1 +current 1 +societi 1 +gener 1 +conferenceon 1 +committe 1 +journal 1 +colleg 1 +engin 0 +receiv 0 +degre 0 +fromharvard 0 +princeton 0 +join 0 +theunivers 0 +washington 0 +faculti 0 +year 0 +teach 0 +atth 0 +connecticut 0 +professorat 0 +institut 0 +programm 0 +pari 0 +scholar 0 +sinc 0 +hasalso 0 +member 0 +depart 0 +atkob 0 +japan 0 +think 0 +corpor 0 +cambridg 0 +massachusett 0 +theinstitut 0 +enseign 0 +superieur 0 +techniquesd 0 +electroniqu 0 +irest 0 +nant 0 +assist 0 +variou 0 +hasrec 0 +forimag 0 +devot 0 +processingand 0 +commun 0 +take 0 +place 0 +bordeaux 0 +interest 0 +includ 0 +ofimag 0 +particularli 0 +us 0 +processor 0 +educ 0 +technolog 0 +currentlydirect 0 +sponsor 0 +project 0 +experi 0 +throughimag 0 +whose 0 +object 0 +develop 0 +person 0 +softwarethat 0 +motiv 0 +grade 0 +student 0 +studi 0 +written 0 +coauthor 0 +paper 0 +thebook 0 +structur 0 +author 0 +textbook 0 +entitl 0 +element 0 +introductionus 0 +publish 0 +common 0 +accompanyingsoftwar 0 +work 0 +book 0 +subject 0 +ofparallel 0 +organ 0 +internationalworkshop 0 +held 0 +seattl 0 +serveda 0 +meet 0 +bergen 0 +norwai 0 +also 0 +intern 0 +subconfer 0 +programcommitte 0 +numer 0 +patternrecognit 0 +chairman 0 +societyworkshop 0 +architectur 0 +machineintellig 0 +steer 0 +theieee 0 +symposiaon 0 +editorialboard 0 +cvgip 0 +understand 0 +editor 0 +chief 0 +ieeetransact 0 +relat 0 +activ 0 +engineeringeduc 0 +polici 0 +vice 0 +council 0 +elect 0 +fellow 0 +outsid 0 +steve 0 +enjoi 0 +plai 0 +jazz 0 +andclass 0 +piano 0 +music 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..e6234428 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,133 @@ +comput 2 +scienc 2 +serv 2 +univers 1 +theori 1 +committe 1 +young 1 +foundat 1 +chairman 1 +professor 1 +colleg 1 +faculti 1 +also 1 +research 1 +program 1 +depart 1 +nation 1 +inform 1 +berkelei 1 +associ 1 +theoret 1 +complex 1 +algorithm 1 +special 1 +societi 1 +editori 1 +board 1 +current 1 +journal 1 +titl 0 +paul 0 +graduateof 0 +antioch 0 +receiv 0 +hejoin 0 +washington 0 +seventeen 0 +year 0 +atpurdu 0 +earli 0 +member 0 +inperhap 0 +first 0 +unit 0 +state 0 +postdoctor 0 +fellow 0 +atstanford 0 +reed 0 +briefli 0 +aschairman 0 +theunivers 0 +mexico 0 +twice 0 +taught 0 +visit 0 +professorin 0 +divis 0 +california 0 +becam 0 +dean 0 +facil 0 +engin 0 +interest 0 +emphasi 0 +question 0 +thegener 0 +connect 0 +mathematicallog 0 +author 0 +coauthor 0 +paper 0 +area 0 +iscoauthor 0 +graduat 0 +textbook 0 +gener 0 +three 0 +time 0 +symposiumon 0 +executivecommitte 0 +nomin 0 +interestgroup 0 +sigact 0 +chairmanof 0 +ieee 0 +annualsymposium 0 +foc 0 +hasserv 0 +vice 0 +stechnic 0 +mathemat 0 +chair 0 +programcommitte 0 +structur 0 +confer 0 +advisorysubcommitte 0 +thiscommitte 0 +ofth 0 +issu 0 +control 0 +annal 0 +histori 0 +ofcomput 0 +notr 0 +dame 0 +formallog 0 +system 0 +eleven 0 +student 0 +complet 0 +doctor 0 +dissert 0 +underprofessor 0 +direct 0 +sever 0 +gone 0 +dopostdoctor 0 +work 0 +cornel 0 +ofcalifornia 0 +eight 0 +hold 0 +posit 0 +avarieti 0 +chosen 0 +industri 0 +employ 0 +leather 0 +motorcycl 0 +jacket 0 +read 0 +ratherthan 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..700ba621 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,69 @@ +comput 1 +support 1 +applic 1 +zahorjan 1 +univers 1 +receiv 1 +research 1 +schedul 1 +runtim 1 +parallel 1 +polici 1 +system 1 +program 1 +titl 0 +john 0 +professor 0 +graduat 0 +frombrown 0 +oftoronto 0 +presidenti 0 +young 0 +investigatoraward 0 +primari 0 +interest 0 +area 0 +parallelsystem 0 +supportfor 0 +mobil 0 +current 0 +focu 0 +continu 0 +mediaappl 0 +involv 0 +real 0 +time 0 +audio 0 +video 0 +thegoal 0 +provid 0 +interfac 0 +allow 0 +torespond 0 +easili 0 +chang 0 +load 0 +activ 0 +topic 0 +includ 0 +techniqu 0 +parallelizationof 0 +code 0 +written 0 +sequenti 0 +languag 0 +exhibit 0 +bothcontrol 0 +data 0 +construct 0 +develop 0 +intend 0 +formobil 0 +platform 0 +editori 0 +board 0 +ieee 0 +transactionson 0 +softwar 0 +engin 0 +survei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..ff6599b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,175 @@ +imag 2 +project 2 +student 2 +metip 1 +learn 1 +process 1 +program 1 +mathemat 1 +us 1 +activ 1 +current 1 +experi 1 +comput 1 +click 1 +order 1 +free 1 +copi 1 +todai 1 +softwar 1 +tanimoto 1 +scienc 1 +encourag 1 +discuss 1 +digit 1 +develop 1 +allow 1 +materi 1 +part 1 +number 1 +pixel 1 +calcul 1 +transform 1 +applic 1 +list 1 +peopl 1 +work 1 +relat 1 +educ 1 +pleas 1 +link 1 +xform 1 +washington 1 +director 0 +steven 0 +depart 0 +emphas 0 +practic 0 +group 0 +exploratori 0 +open 0 +end 0 +goal 0 +help 0 +meet 0 +theseobject 0 +particular 0 +seri 0 +applicationsdesign 0 +manipul 0 +choic 0 +intend 0 +enrich 0 +rather 0 +astandard 0 +classroom 0 +curriculum 0 +teacher 0 +plai 0 +variou 0 +role 0 +withthes 0 +exampl 0 +catalyz 0 +bylead 0 +theconcept 0 +explor 0 +toexplor 0 +warper 0 +programm 0 +primarili 0 +pentium 0 +base 0 +srun 0 +microsoft 0 +window 0 +alsoavail 0 +appl 0 +macintosh 0 +volunt 0 +mathematicsteach 0 +particip 0 +test 0 +experiment 0 +transcriptproject 0 +design 0 +record 0 +keep 0 +framework 0 +willfacilit 0 +storag 0 +person 0 +academicinform 0 +hard 0 +disk 0 +floppi 0 +own 0 +themetip 0 +close 0 +involv 0 +studi 0 +ofmultiplay 0 +tointegr 0 +idea 0 +describ 0 +prospect 0 +forth 0 +direct 0 +distribut 0 +databas 0 +collect 0 +user 0 +itsxform 0 +done 0 +somethingfun 0 +know 0 +put 0 +version 0 +document 0 +onlin 0 +littl 0 +demonstr 0 +beenput 0 +togeth 0 +graduat 0 +took 0 +seminar 0 +winter 0 +environ 0 +integr 0 +witha 0 +subset 0 +common 0 +lisp 0 +offer 0 +technic 0 +essenti 0 +newapproach 0 +teach 0 +fundamentalattract 0 +approach 0 +thecomput 0 +pursuit 0 +creat 0 +neat 0 +visual 0 +effect 0 +digitalimag 0 +portrai 0 +thing 0 +interest 0 +successfulli 0 +instal 0 +would 0 +like 0 +discussteach 0 +contact 0 +support 0 +nation 0 +foundat 0 +undergr 0 +bricker 0 +last 0 +modifi 0 +tuesdai 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..e6e782a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,53 @@ +mobil 1 +comput 1 +univers 1 +washington 1 +system 1 +infrastructur 1 +paper 1 +research 0 +ubiquit 0 +washingtonher 0 +overview 0 +computingresearch 0 +project 0 +mobisa 0 +inform 0 +wireless 0 +environ 0 +handheld 0 +task 0 +graph 0 +manag 0 +applic 0 +cope 0 +resourc 0 +variabl 0 +survei 0 +describ 0 +fundament 0 +challeng 0 +field 0 +program 0 +methodolog 0 +disconnect 0 +operationdistribut 0 +transact 0 +mobilecomput 0 +systemcontact 0 +prof 0 +brian 0 +bershadprof 0 +gaetano 0 +borriellomarc 0 +fiuczynskigeorg 0 +formanprof 0 +hank 0 +levygeoff 0 +voelkerterri 0 +watsonprof 0 +john 0 +zahorjan 0 +last 0 +updat 0 +forman 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..1a6e6d0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,138 @@ +cecil 2 +project 2 +vortex 1 +languag 1 +object 1 +orient 1 +research 1 +implement 1 +support 1 +static 1 +like 1 +class 1 +list 1 +avail 1 +page 1 +high 1 +type 1 +system 1 +optim 1 +level 1 +current 1 +profil 1 +intraprocedur 1 +messag 1 +analys 1 +also 1 +elimin 1 +written 1 +releas 1 +suno 1 +subscrib 1 +inform 1 +overview 1 +member 1 +paper 1 +sampl 1 +peopl 1 +intern 1 +document 1 +projectuw 0 +projectwelcom 0 +home 0 +conduct 0 +program 0 +design 0 +emphasi 0 +issu 0 +pure 0 +intend 0 +rapidconstruct 0 +qualiti 0 +extens 0 +softwar 0 +incorporatesmulti 0 +method 0 +simpl 0 +prototyp 0 +base 0 +model 0 +mechan 0 +tosupport 0 +structur 0 +form 0 +comput 0 +inherit 0 +modul 0 +basedencapsul 0 +flexibl 0 +allowsstat 0 +dynam 0 +code 0 +freeli 0 +compil 0 +infrastructur 0 +forobject 0 +target 0 +pureobject 0 +hybrid 0 +orientedlanguag 0 +modula 0 +java 0 +incorporateshigh 0 +analysi 0 +hierachyanalysi 0 +guid 0 +receiv 0 +predict 0 +guidedselect 0 +procedur 0 +special 0 +split 0 +automat 0 +inlin 0 +closur 0 +includ 0 +acollect 0 +standard 0 +commonsubexpress 0 +dead 0 +assign 0 +vortexcompil 0 +entir 0 +initi 0 +beta 0 +currentlyavail 0 +sparc 0 +run 0 +either 0 +solari 0 +send 0 +mail 0 +interest 0 +request 0 +bodi 0 +ofinterest 0 +parti 0 +obtain 0 +thebeta 0 +recent 0 +finish 0 +technic 0 +report 0 +describ 0 +much 0 +detail 0 +goal 0 +direct 0 +postscript 0 +version 0 +past 0 +uwcs 0 +sourc 0 +relat 0 +pointer 0 +projectslast 0 +updat 0 +august 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..1c80a0d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,34 @@ +grail 1 +imag 1 +washington 1 +graphic 1 +depart 1 +comput 1 +scienc 1 +engin 1 +laboratorywelcom 0 +home 0 +page 0 +laboratori 0 +theunivers 0 +inform 0 +peopl 0 +cours 0 +research 0 +project 0 +public 0 +these 0 +softwar 0 +data 0 +cool 0 +neighborhood 0 +univers 0 +seattl 0 +local 0 +interest 0 +disk 0 +usag 0 +polici 0 +comment 0 +mtwong 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..e9460d76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,71 @@ +rout 2 +chao 1 +research 1 +simul 1 +chaotic 1 +project 1 +washington 1 +router 1 +algorithm 1 +result 1 +pcrcw 1 +comput 1 +univers 1 +seattl 1 +technic 1 +report 1 +avail 1 +chip 1 +micron 1 +network 1 +present 1 +group 1 +scienc 0 +engin 0 +depart 0 +friend 0 +mine 0 +dylan 0 +peopleal 0 +sort 0 +peopl 0 +work 0 +paper 0 +repositori 0 +papersand 0 +chaoticrout 0 +implement 0 +hardwar 0 +built 0 +test 0 +cmo 0 +redesign 0 +process 0 +better 0 +perform 0 +power 0 +allsort 0 +includ 0 +nice 0 +graphicalfront 0 +standard 0 +discuss 0 +presentationof 0 +abl 0 +come 0 +upwith 0 +guidelin 0 +build 0 +list 0 +thathav 0 +web 0 +describ 0 +interconnect 0 +parallel 0 +commun 0 +workshop 0 +held 0 +univeristi 0 +proceed 0 +home 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..918cd2b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,201 @@ +chinook 2 +synthesi 1 +design 1 +softwar 1 +hardwar 1 +time 1 +embed 1 +system 1 +architectur 1 +rather 1 +gener 1 +processor 1 +version 1 +includ 1 +scienc 1 +fellowship 1 +project 1 +salmon 1 +warm 1 +wind 1 +rocki 1 +mountain 1 +real 1 +constraint 1 +map 1 +detail 1 +enabl 1 +make 1 +level 1 +work 1 +retarget 1 +activ 1 +simul 1 +partit 1 +june 1 +connect 1 +code 1 +compon 1 +chou 1 +nation 1 +graduat 1 +shinook 0 +larg 0 +oncorhynchu 0 +tshawytscha 0 +pacif 0 +amer 0 +name 0 +tribe 0 +blow 0 +east 0 +southerli 0 +west 0 +rare 0 +american 0 +sled 0 +doga 0 +toolfor 0 +cadtool 0 +control 0 +domin 0 +reactivesystem 0 +behavior 0 +descriptionto 0 +user 0 +target 0 +fill 0 +neededto 0 +build 0 +complet 0 +inform 0 +designdecis 0 +high 0 +earli 0 +cycl 0 +reiterateaft 0 +willnot 0 +maintain 0 +also 0 +designerto 0 +take 0 +advantag 0 +technolog 0 +instead 0 +ti 0 +legacycod 0 +current 0 +develop 0 +currentlyw 0 +interprocessorcommun 0 +effici 0 +accur 0 +moredetail 0 +becom 0 +avail 0 +shortli 0 +character 0 +follow 0 +meet 0 +ratherthan 0 +try 0 +maxim 0 +averag 0 +perform 0 +util 0 +assumesmanu 0 +believ 0 +issu 0 +intricateand 0 +sometim 0 +even 0 +technic 0 +want 0 +evalu 0 +differentarchitectur 0 +forc 0 +singl 0 +fix 0 +asicarchitectur 0 +synthes 0 +reli 0 +onoff 0 +shelf 0 +kernel 0 +discourag 0 +first 0 +tool 0 +oper 0 +innovemb 0 +shownat 0 +autom 0 +confer 0 +diego 0 +mainfeatur 0 +automat 0 +peripheraldevic 0 +sequenti 0 +concurr 0 +descript 0 +andsynthes 0 +devic 0 +driver 0 +input 0 +verilog 0 +output 0 +hardwarenetlist 0 +need 0 +togeth 0 +softwareprogram 0 +main 0 +topic 0 +interfacingproblem 0 +schedul 0 +timingconstraint 0 +function 0 +improv 0 +demonstr 0 +nato 0 +summer 0 +school 0 +swcodedesign 0 +tremezzo 0 +itali 0 +incorpor 0 +severalmor 0 +interfac 0 +techniqu 0 +memori 0 +moreeffici 0 +chinookersfacultygaetano 0 +borriellogradu 0 +student 0 +ross 0 +ortegaken 0 +hinesian 0 +macduff 0 +recent 0 +selizabeth 0 +walkupscott 0 +hauck 0 +henrik 0 +hulgaardstafflarri 0 +mcmurchielist 0 +paperschinook 0 +sponsorsarpa 0 +contract 0 +foundat 0 +grant 0 +walkup 0 +patricia 0 +robert 0 +harri 0 +ortega 0 +link 0 +depart 0 +comput 0 +engin 0 +universityof 0 +washington 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..546ea28f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,135 @@ +emerald 2 +fpga 1 +architectur 1 +tool 1 +driven 1 +rout 1 +work 1 +copyright 1 +mcmurchi 1 +develop 1 +map 1 +design 1 +time 1 +need 1 +system 1 +provid 1 +logic 1 +placement 1 +author 1 +darren 1 +cronquist 1 +larri 1 +proceed 1 +intern 1 +symposium 1 +field 1 +programm 1 +gate 1 +februari 1 +router 1 +carl 1 +ebel 1 +projectid 0 +phase 0 +would 0 +makeus 0 +reliabl 0 +produc 0 +accur 0 +performanceevalu 0 +propos 0 +unfortun 0 +given 0 +quickproduct 0 +frame 0 +face 0 +construct 0 +isoften 0 +postpon 0 +mani 0 +featur 0 +beenfrozen 0 +satisfi 0 +fast 0 +prototyp 0 +havedesign 0 +power 0 +quickdevelop 0 +heart 0 +basicfeatur 0 +block 0 +analysi 0 +synthesisand 0 +technolog 0 +global 0 +partit 0 +anddetail 0 +environ 0 +aneffici 0 +thoroughli 0 +specifi 0 +blockarchitectur 0 +well 0 +specif 0 +metric 0 +tailorplac 0 +moreov 0 +parameter 0 +schematicspecif 0 +allow 0 +variat 0 +quickli 0 +capturedand 0 +evalu 0 +public 0 +document 0 +contain 0 +page 0 +includ 0 +byth 0 +contribut 0 +mean 0 +ensur 0 +dissemin 0 +ofscholarli 0 +technic 0 +commerci 0 +basi 0 +andal 0 +right 0 +therein 0 +maintain 0 +copyrighthold 0 +notwithstand 0 +offer 0 +hereelectron 0 +understood 0 +person 0 +copi 0 +thisinform 0 +adher 0 +term 0 +constraint 0 +invok 0 +eachauthor 0 +repost 0 +without 0 +theexplicit 0 +permiss 0 +holder 0 +definit 0 +paper 0 +compil 0 +appear 0 +sigda 0 +fourth 0 +arrai 0 +us 0 +emeraldlarri 0 +pathfind 0 +negoti 0 +basedperform 0 +third 0 +arraysaid 0 +research 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..50f5f7e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,133 @@ +system 2 +circuit 2 +fpga 2 +architectur 2 +asynchron 1 +integr 1 +design 1 +research 1 +embed 1 +project 1 +improv 1 +perform 1 +northwest 1 +laboratori 1 +comput 1 +develop 1 +current 1 +specif 1 +rapid 1 +level 1 +depart 1 +scienc 1 +engin 1 +univers 1 +washington 1 +vlsi 1 +synthesi 1 +well 1 +montag 1 +multi 1 +prototyp 1 +rout 1 +gener 1 +clock 1 +retim 1 +cost 1 +tune 1 +seattl 0 +usath 0 +engag 0 +larg 0 +scale 0 +aid 0 +educ 0 +sinc 0 +late 0 +todai 0 +focu 0 +wide 0 +varieti 0 +sytem 0 +verificationtim 0 +separ 0 +event 0 +verif 0 +time 0 +survei 0 +methodolog 0 +first 0 +prototypingtriptych 0 +triptych 0 +densiti 0 +commerci 0 +springbok 0 +board 0 +partit 0 +assign 0 +topolog 0 +work 0 +emerald 0 +adapt 0 +toolset 0 +complet 0 +map 0 +placement 0 +toolscan 0 +automat 0 +descript 0 +fpgaarchitectur 0 +metric 0 +incorporatedinto 0 +variou 0 +tool 0 +result 0 +systemsth 0 +chinook 0 +hardwar 0 +softwar 0 +simul 0 +applic 0 +optim 0 +synchron 0 +circuitsretim 0 +effici 0 +algorithm 0 +uselevel 0 +sensit 0 +latch 0 +reduc 0 +andincreas 0 +toler 0 +skew 0 +method 0 +synchronouscircuit 0 +latenc 0 +feedback 0 +contraint 0 +network 0 +routerth 0 +chaoticrout 0 +self 0 +systemsself 0 +direct 0 +kehlprevi 0 +gemini 0 +valid 0 +layout 0 +compar 0 +implement 0 +mactest 0 +digit 0 +function 0 +tester 0 +chip 0 +cmo 0 +voltag 0 +arpa 0 +reportsarpa 0 +bluebook 0 +paragraph 0 +overview 0 +accomplish 0 +high 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..299e8211 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,61 @@ +safeti 2 +system 2 +softwar 1 +univers 1 +washington 1 +techniqu 1 +comput 1 +foundat 1 +methodolog 1 +critic 1 +us 1 +engin 1 +analysi 1 +plai 0 +increasingli 0 +import 0 +role 0 +nuclear 0 +reactor 0 +aircraft 0 +defenc 0 +space 0 +chemic 0 +plant 0 +medic 0 +equip 0 +consequ 0 +malfunct 0 +must 0 +pass 0 +rigor 0 +test 0 +review 0 +although 0 +exist 0 +decad 0 +appli 0 +contain 0 +digit 0 +goal 0 +project 0 +develop 0 +theoret 0 +build 0 +built 0 +upon 0 +safewar 0 +nanci 0 +leveson 0 +summar 0 +issu 0 +involv 0 +lai 0 +work 0 +support 0 +prototyp 0 +tool 0 +valid 0 +specif 0 +scienc 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..230746f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,169 @@ +softbot 2 +internet 1 +user 1 +softwar 1 +system 1 +plan 1 +goal 1 +interfac 1 +etzioni 1 +inform 1 +washington 1 +found 1 +control 1 +build 1 +agent 1 +world 1 +research 1 +develop 1 +robot 1 +intellig 1 +high 1 +level 1 +enabl 1 +wide 1 +resourc 1 +oren 1 +access 1 +project 1 +base 1 +magazin 1 +decemb 1 +graphic 1 +keith 1 +golden 1 +search 1 +reactiv 1 +kwok 1 +back 1 +home 1 +page 1 +softbotinternet 0 +softbotth 0 +finalist 0 +discoveraward 0 +technolog 0 +innov 0 +comput 0 +autonom 0 +interact 0 +real 0 +softwareenviron 0 +oper 0 +databas 0 +pragmaticallyconveni 0 +intellectu 0 +challeng 0 +substrat 0 +support 0 +claim 0 +util 0 +machin 0 +learningtechniqu 0 +acustomiz 0 +moder 0 +assist 0 +internetaccess 0 +accept 0 +languag 0 +generatesand 0 +execut 0 +achiev 0 +learn 0 +itsexperi 0 +human 0 +state 0 +want 0 +accomplish 0 +disambigu 0 +requestand 0 +dynam 0 +determin 0 +satisfyit 0 +us 0 +unix 0 +shell 0 +interactwith 0 +rang 0 +take 0 +tour 0 +sgraphic 0 +princip 0 +investig 0 +daniel 0 +weld 0 +also 0 +check 0 +metacrawl 0 +field 0 +servic 0 +tosearch 0 +multipl 0 +indic 0 +parallel 0 +provid 0 +sophisticatedprun 0 +option 0 +contact 0 +introduct 0 +cacm 0 +juli 0 +methodolog 0 +motiv 0 +without 0 +repli 0 +brook 0 +technic 0 +paper 0 +cartoonrepresent 0 +taken 0 +blanchard 0 +articl 0 +appear 0 +issu 0 +ofcolumn 0 +univers 0 +alumni 0 +group 0 +current 0 +toth 0 +allow 0 +easili 0 +specifi 0 +extend 0 +maintain 0 +xiiplann 0 +work 0 +specif 0 +implement 0 +advanc 0 +space 0 +browser 0 +debug 0 +planner 0 +dave 0 +christianson 0 +compar 0 +rule 0 +versu 0 +procedur 0 +sujai 0 +parekh 0 +ilalearn 0 +design 0 +protocol 0 +multi 0 +collabor 0 +negoti 0 +ying 0 +experi 0 +domain 0 +goan 0 +optim 0 +ingram 0 +gather 0 +hacker 0 +info 0 +local 0 +mike 0 +perkowitz 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..86d5bb0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,213 @@ +system 2 +extens 2 +spin 2 +oper 2 +kernel 2 +project 2 +code 2 +dynam 2 +applic 2 +paper 1 +languag 1 +modula 1 +microsecond 1 +take 1 +perform 1 +describ 1 +interfac 1 +us 1 +implement 1 +page 1 +run 1 +servic 1 +gener 1 +network 1 +support 1 +time 1 +report 1 +appear 1 +washington 1 +control 1 +integr 1 +allow 1 +load 1 +basic 1 +call 1 +data 1 +collect 1 +address 1 +thread 1 +alpha 1 +unix 1 +program 1 +result 1 +creat 1 +termin 1 +benchmark 1 +execut 1 +design 1 +shortcom 1 +safe 1 +intern 1 +inform 1 +compil 1 +member 1 +sai 1 +peopl 1 +credit 1 +bershad 1 +systemspin 0 +thatsupport 0 +adapt 0 +andimplement 0 +direct 0 +stillmaintain 0 +inter 0 +isol 0 +atruntim 0 +accesshardwar 0 +almost 0 +nooverhead 0 +procedur 0 +pass 0 +byrefer 0 +rather 0 +copi 0 +providesa 0 +core 0 +capabl 0 +resourc 0 +thesear 0 +systemservic 0 +space 0 +allextens 0 +written 0 +typesaf 0 +properti 0 +oftypesafeti 0 +prevent 0 +crash 0 +attemptingto 0 +manipul 0 +arbitrari 0 +piec 0 +workstat 0 +writeboth 0 +special 0 +video 0 +wella 0 +purpos 0 +connect 0 +machinerun 0 +server 0 +quick 0 +structur 0 +extend 0 +withlow 0 +overhead 0 +exampl 0 +handl 0 +recov 0 +fault 0 +executeit 0 +synchron 0 +fork 0 +join 0 +protectedprocedur 0 +anoth 0 +function 0 +cross 0 +machin 0 +overethernet 0 +realli 0 +oldadapt 0 +less 0 +operationsund 0 +mach 0 +longer 0 +samehardwar 0 +saveyourself 0 +effort 0 +recent 0 +bind 0 +invoc 0 +mechan 0 +provid 0 +flexibl 0 +effici 0 +andsimpl 0 +osdi 0 +safeti 0 +sosp 0 +protocol 0 +architectur 0 +forappl 0 +specif 0 +usenix 0 +winter 0 +confer 0 +write 0 +experi 0 +build 0 +high 0 +make 0 +clear 0 +distinct 0 +pretti 0 +happi 0 +deal 0 +order 0 +link 0 +linker 0 +point 0 +abil 0 +manag 0 +linkabl 0 +namespac 0 +runtim 0 +interposit 0 +commun 0 +facil 0 +show 0 +improv 0 +critic 0 +wait 0 +trail 0 +talk 0 +interest 0 +bottom 0 +line 0 +arpa 0 +overview 0 +summari 0 +regular 0 +friend 0 +gotten 0 +assist 0 +academia 0 +industri 0 +involv 0 +relat 0 +pointer 0 +barb 0 +arrow 0 +document 0 +latest 0 +statu 0 +avail 0 +could 0 +qualif 0 +master 0 +degre 0 +fund 0 +raship 0 +posit 0 +undergradu 0 +mascot 0 +encourag 0 +mani 0 +decid 0 +adopt 0 +ourmascot 0 +maintain 0 +brian 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..3c081f5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,117 @@ +compil 2 +dynam 2 +time 1 +code 1 +constant 1 +optim 1 +valu 1 +us 1 +project 1 +paper 1 +comput 1 +howev 1 +produc 1 +machin 1 +gener 1 +program 1 +system 1 +projectth 0 +projectmor 0 +inform 0 +member 0 +relat 0 +projectsuw 0 +section 0 +student 0 +group 0 +webdynam 0 +enabl 0 +base 0 +ofinvari 0 +data 0 +theserun 0 +elimin 0 +memoryload 0 +perform 0 +propag 0 +fold 0 +remov 0 +branch 0 +theydetermin 0 +fulli 0 +unrol 0 +loop 0 +bound 0 +performancebenefit 0 +effici 0 +offsetbi 0 +cost 0 +approach 0 +dynamiccompil 0 +strive 0 +fast 0 +high 0 +qualitydynam 0 +programm 0 +annot 0 +region 0 +theprogram 0 +static 0 +optimizingcompil 0 +automat 0 +templat 0 +pair 0 +dataflow 0 +analys 0 +identifi 0 +variabl 0 +willb 0 +simpl 0 +copi 0 +thetempl 0 +patch 0 +execut 0 +work 0 +target 0 +purpos 0 +imper 0 +languag 0 +initi 0 +initialexperi 0 +appli 0 +producedspeedup 0 +rang 0 +part 0 +spinproject 0 +eventu 0 +dynamicallycompil 0 +spin 0 +kernel 0 +exampl 0 +spinev 0 +dispatch 0 +also 0 +activ 0 +explor 0 +otherposs 0 +applic 0 +invirtu 0 +interpret 0 +prototyp 0 +systemi 0 +describ 0 +pldi 0 +arenow 0 +start 0 +design 0 +build 0 +second 0 +wewil 0 +releas 0 +detail 0 +soon 0 +last 0 +updat 0 +august 0 +grant 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..8c4d7ab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,4 @@ +move 1 +permanentlymov 0 +permanentlyth 0 +document 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..77fa9eff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,93 @@ +program 2 +project 1 +comput 1 +washington 1 +languag 1 +direct 1 +compil 1 +engin 1 +info 1 +arrai 1 +base 1 +written 1 +without 1 +machin 1 +level 1 +concept 1 +border 1 +easili 1 +scientif 1 +walk 1 +overview 1 +paper 1 +depart 0 +scienc 0 +univers 0 +seattl 0 +eduzpl 0 +suitabl 0 +would 0 +previous 0 +fortran 0 +fast 0 +sequenti 0 +parallel 0 +modif 0 +special 0 +independ 0 +recompil 0 +necessari 0 +higher 0 +like 0 +elimin 0 +error 0 +prone 0 +index 0 +tediou 0 +loop 0 +typic 0 +shorter 0 +understood 0 +modifi 0 +scientist 0 +find 0 +region 0 +shatter 0 +control 0 +flow 0 +conclus 0 +ideal 0 +small 0 +write 0 +yourmachin 0 +programm 0 +area 0 +shouldconsid 0 +enrol 0 +zpthi 0 +autumn 0 +check 0 +recent 0 +chang 0 +high 0 +minut 0 +introduct 0 +browser 0 +right 0 +manual 0 +relat 0 +detail 0 +line 0 +inform 0 +sampl 0 +peopl 0 +member 0 +horizon 0 +descript 0 +group 0 +futur 0 +acknowledg 0 +list 0 +help 0 +support 0 +work 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..09ed8f04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,149 @@ +multithread 2 +processor 2 +simultan 2 +instruct 2 +parallel 1 +issu 1 +cycl 1 +egger 1 +architectur 1 +level 1 +postscript 1 +levi 1 +tullsen 1 +multipl 1 +superscalar 1 +singl 1 +thread 1 +compil 1 +student 1 +emer 1 +stamm 1 +abstract 1 +proceed 1 +problem 1 +face 1 +high 1 +maintain 1 +util 1 +latenc 1 +modern 1 +techniqu 1 +avail 1 +resourc 1 +enabl 1 +perform 1 +research 1 +jack 1 +public 1 +andd 1 +annual 1 +intern 1 +symposium 1 +comput 1 +page 1 +home 0 +pagesimultan 0 +projectoverviewpeoplepubl 0 +overviewth 0 +crucial 0 +todai 0 +speed 0 +microprocessor 0 +long 0 +memori 0 +allevi 0 +interleav 0 +execut 0 +differentthread 0 +differ 0 +ultim 0 +though 0 +limit 0 +amount 0 +within 0 +permit 0 +independ 0 +function 0 +unit 0 +combin 0 +issuefeatur 0 +wide 0 +hide 0 +abilityof 0 +hardwar 0 +contextsar 0 +activ 0 +compet 0 +dynam 0 +share 0 +exploitthread 0 +interchang 0 +formsof 0 +effect 0 +us 0 +increas 0 +studi 0 +havedemonstr 0 +significantli 0 +improvesprocessor 0 +throughput 0 +multiprogram 0 +parallelworkload 0 +shown 0 +gain 0 +achievedin 0 +minim 0 +extens 0 +ordersuperscalar 0 +current 0 +futur 0 +work 0 +includ 0 +investig 0 +fast 0 +synchronizationtechniqu 0 +also 0 +conduct 0 +otherarchitectur 0 +peoplefaculti 0 +susan 0 +hank 0 +levygradu 0 +dean 0 +tullsenindustri 0 +collabor 0 +digit 0 +equip 0 +corpor 0 +joel 0 +rebecca 0 +convert 0 +submit 0 +juli 0 +exploit 0 +choic 0 +fetch 0 +implement 0 +philadelphia 0 +first 0 +suif 0 +workshop 0 +stanford 0 +januari 0 +maxim 0 +chip 0 +andh 0 +santa 0 +margherita 0 +ligur 0 +itali 0 +june 0 +check 0 +list 0 +project 0 +still 0 +doon 0 +affair 0 +lojlo 0 +washington 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..3dcdcbe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,73 @@ +scienc 2 +comput 2 +depart 1 +award 1 +faculti 1 +three 1 +inform 1 +report 1 +guidebook 1 +statist 1 +wisc 1 +home 0 +pagecomput 0 +departmentabout 0 +departmentour 0 +form 0 +consist 0 +rank 0 +countri 0 +member 0 +receiv 0 +fourteen 0 +presidenti 0 +young 0 +investig 0 +packard 0 +fellowship 0 +women 0 +scientist 0 +engin 0 +incent 0 +excel 0 +doctor 0 +dissert 0 +develop 0 +area 0 +research 0 +project 0 +peopl 0 +cours 0 +offer 0 +fall 0 +class 0 +futur 0 +timet 0 +technic 0 +system 0 +answer 0 +frequent 0 +ask 0 +question 0 +alumni 0 +graduat 0 +undergradu 0 +annual 0 +onlin 0 +util 0 +madison 0 +local 0 +servic 0 +relat 0 +organ 0 +colophon 0 +server 0 +us 0 +infocomput 0 +departmentunivers 0 +wisconsin 0 +madisona 0 +west 0 +dayton 0 +streetmadison 0 +voic 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..7fe20396 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,114 @@ +task 2 +control 1 +observ 1 +point 1 +object 1 +explor 1 +behavior 1 +viewpoint 1 +scene 1 +provabl 1 +local 1 +achiev 1 +smoothli 1 +global 1 +surfac 1 +purpos 1 +recoveri 1 +simpl 1 +navig 1 +abil 1 +correct 1 +base 1 +consid 1 +approach 1 +show 1 +reconstruct 1 +kyro 0 +kutulako 0 +chuck 0 +dyer 0 +consider 0 +interest 0 +recent 0 +emploi 0 +simpleobserv 0 +either 0 +make 0 +propertieseasi 0 +fixat 0 +combin 0 +order 0 +toperform 0 +complex 0 +obstacl 0 +avoid 0 +ourwork 0 +focus 0 +activ 0 +pointof 0 +perform 0 +involv 0 +develop 0 +makesimpl 0 +motion 0 +decis 0 +geometryof 0 +requir 0 +minim 0 +process 0 +imag 0 +first 0 +recov 0 +shape 0 +thesurfac 0 +select 0 +generalobserv 0 +posit 0 +provid 0 +inform 0 +objectthan 0 +other 0 +exist 0 +special 0 +beexploit 0 +mobil 0 +effici 0 +anddeterminist 0 +strategi 0 +reach 0 +localshap 0 +us 0 +qualitativestrategi 0 +viewingdirect 0 +align 0 +princip 0 +direct 0 +selectedpoint 0 +second 0 +deriv 0 +descriptionof 0 +formul 0 +thequalit 0 +observationso 0 +visibl 0 +slide 0 +maxim 0 +connect 0 +region 0 +arbitrari 0 +smooth 0 +attempt 0 +maintain 0 +well 0 +defin 0 +geometr 0 +relationship 0 +observationand 0 +view 0 +suggest 0 +lead 0 +also 0 +simplifi 0 +frame 0 +comput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..2bfdea6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,99 @@ +contour 2 +model 2 +deform 1 +extract 1 +detect 1 +classif 1 +formul 1 +integr 1 +noisi 1 +imag 1 +case 1 +regular 1 +snake 1 +deriv 1 +energi 1 +yield 1 +consid 1 +arbitrari 1 +turn 1 +fung 0 +roland 0 +chin 0 +develop 0 +approach 0 +classifi 0 +directli 0 +conduct 0 +studi 0 +initi 0 +ofact 0 +us 0 +minimax 0 +principl 0 +criterion 0 +wherebi 0 +valu 0 +automat 0 +implicitli 0 +determin 0 +along 0 +furthermor 0 +function 0 +contain 0 +hough 0 +transform 0 +special 0 +subsequ 0 +problem 0 +combin 0 +stabl 0 +invari 0 +anduniqu 0 +markov 0 +random 0 +field 0 +priordistribut 0 +exert 0 +influenc 0 +global 0 +allow 0 +bayesian 0 +framework 0 +posterior 0 +estim 0 +equival 0 +minim 0 +gener 0 +activ 0 +final 0 +lower 0 +level 0 +visual 0 +task 0 +withpattern 0 +recognit 0 +process 0 +base 0 +nearman 0 +pearson 0 +lemma 0 +optim 0 +classificationtest 0 +summat 0 +peak 0 +practic 0 +applic 0 +small 0 +region 0 +need 0 +margin 0 +distribut 0 +valid 0 +confirm 0 +extens 0 +rigor 0 +experiment 0 +gsnake 0 +softwar 0 +avail 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..fd36982d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,198 @@ +data 3 +object 3 +displai 3 +lattic 2 +defin 2 +visual 2 +base 2 +function 1 +model 1 +system 1 +program 1 +scalar 1 +languag 1 +time 1 +type 1 +call 1 +mathemat 1 +contain 1 +comput 1 +approxim 1 +finit 1 +particular 1 +expressivenesscondit 1 +temperatur 1 +primit 1 +implement 1 +develop 1 +precis 1 +thu 1 +purpos 1 +map 1 +condit 1 +order 1 +relat 1 +arrai 1 +voxel 1 +us 1 +foundat 1 +process 1 +ofdata 1 +scientist 1 +infinit 1 +number 1 +amount 1 +inform 1 +pixel 1 +color 1 +ideal 1 +appropri 1 +fact 1 +show 1 +satisfi 1 +scientif 1 +specifi 1 +class 1 +variabl 1 +graphic 1 +design 1 +along 1 +control 1 +abstract 1 +bill 0 +hibbard 0 +brian 0 +paul 0 +chuck 0 +dyer 0 +theidea 0 +calleda 0 +prototyp 0 +adha 0 +upon 0 +idea 0 +guid 0 +follow 0 +principl 0 +natur 0 +objectsrepres 0 +objectsfrequ 0 +real 0 +functionswith 0 +domain 0 +wherea 0 +containfinit 0 +themathemat 0 +repres 0 +chosenfrom 0 +palett 0 +anim 0 +sequenc 0 +numbersof 0 +frame 0 +close 0 +relationship 0 +computationalmodel 0 +commun 0 +informationcont 0 +thatdisplai 0 +encod 0 +onlythos 0 +complet 0 +sens 0 +itimpl 0 +satisfyingth 0 +expressivenss 0 +onhow 0 +wecan 0 +howprecis 0 +voxelresolut 0 +therefor 0 +visualizationprocess 0 +objectsto 0 +interpret 0 +context 0 +theexpress 0 +isomorph 0 +scientificdata 0 +built 0 +primitivevari 0 +latitud 0 +radianc 0 +tupl 0 +constructor 0 +appropriatefor 0 +seri 0 +containsth 0 +canalso 0 +displayi 0 +graphicsprimit 0 +locationand 0 +size 0 +volum 0 +place 0 +animationsequ 0 +fromth 0 +thedisplai 0 +ofcours 0 +alreadi 0 +assum 0 +exampl 0 +given 0 +isnatur 0 +graph 0 +axi 0 +andtemperatur 0 +anoth 0 +remark 0 +thing 0 +wedo 0 +take 0 +assumpt 0 +consequ 0 +fundament 0 +calledvi 0 +adthat 0 +allow 0 +experi 0 +algorithm 0 +steer 0 +theircomput 0 +creat 0 +theirprogram 0 +thevi 0 +vvof 0 +thatsatisfi 0 +express 0 +howev 0 +implementationi 0 +quit 0 +flow 0 +auser 0 +interfac 0 +abstractionof 0 +render 0 +pipelin 0 +user 0 +interfacefor 0 +ofmap 0 +possibl 0 +recurs 0 +defineddata 0 +complex 0 +link 0 +tree 0 +ingener 0 +datatyp 0 +orient 0 +provid 0 +rigor 0 +help 0 +analyt 0 +altern 0 +usualapproach 0 +construct 0 +bywrit 0 +special 0 +fora 0 +specif 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..82cf54eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,52 @@ +condor 2 +comput 1 +project 1 +throughput 1 +high 1 +page 1 +homepag 0 +object 0 +goal 0 +develop 0 +implement 0 +deploi 0 +evaluatemechan 0 +polici 0 +support 0 +larg 0 +collect 0 +distribut 0 +own 0 +resourc 0 +guid 0 +technologicaland 0 +sociolog 0 +challeng 0 +environ 0 +team 0 +build 0 +softwar 0 +tool 0 +enabl 0 +scientist 0 +engin 0 +increas 0 +introduct 0 +start 0 +research 0 +system 0 +pool 0 +univers 0 +wisconsin 0 +madison 0 +help 0 +home 0 +world 0 +mail 0 +list 0 +comment 0 +suggestionscondor 0 +admin 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..f55193df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,9 @@ +next 1 +homepag 0 +peopl 0 +project 0 +last 0 +modifi 0 +septemb 0 +miron 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..f62dfbf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,170 @@ +coral 3 +includ 2 +binari 2 +releas 1 +also 1 +system 1 +support 1 +declar 1 +provid 1 +inform 1 +databas 1 +program 1 +version 1 +instal 1 +relat 1 +project 1 +develop 1 +group 1 +languag 1 +interfac 1 +combin 1 +imper 1 +queri 1 +contain 1 +modul 1 +wide 1 +rang 1 +evalu 1 +strategi 1 +user 1 +disk 1 +resid 1 +data 1 +octob 1 +grab 1 +file 1 +nobin 1 +solari 1 +linux 1 +mail 1 +announc 1 +wisc 1 +projectcor 0 +projectdocu 0 +content 0 +objectiveoverviewreleas 0 +informationse 0 +public 0 +coralpeopl 0 +work 0 +coraloth 0 +research 0 +madisonobject 0 +object 0 +robust 0 +efficientdeduct 0 +investig 0 +variou 0 +applic 0 +domain 0 +sever 0 +algorithm 0 +underli 0 +coralsystem 0 +member 0 +durationof 0 +sinc 0 +overview 0 +deduct 0 +rich 0 +allow 0 +declaritiveand 0 +supportsgener 0 +horn 0 +claus 0 +augment 0 +complex 0 +term 0 +aggreg 0 +negat 0 +tupl 0 +univers 0 +quantifi 0 +variabl 0 +canb 0 +organ 0 +collect 0 +interact 0 +coralimplement 0 +andautomat 0 +choos 0 +effici 0 +modulein 0 +addit 0 +permit 0 +guid 0 +optim 0 +desir 0 +select 0 +among 0 +control 0 +choic 0 +atth 0 +level 0 +construct 0 +updat 0 +insertand 0 +delet 0 +rule 0 +canprogram 0 +extend 0 +withcor 0 +primit 0 +high 0 +degre 0 +extens 0 +allowingc 0 +programm 0 +class 0 +structur 0 +enhanc 0 +coralimplemen 0 +main 0 +memori 0 +us 0 +theexodusstorag 0 +manang 0 +transact 0 +manag 0 +aclient 0 +server 0 +environ 0 +current 0 +want 0 +sourc 0 +code 0 +requiringy 0 +compil 0 +made 0 +forth 0 +indic 0 +machin 0 +type 0 +click 0 +readm 0 +gener 0 +manual 0 +instruct 0 +hpux 0 +seri 0 +suno 0 +stai 0 +announcemnt 0 +listwhich 0 +reciev 0 +relev 0 +newsgroup 0 +comp 0 +lang 0 +misc 0 +submit 0 +question 0 +comment 0 +report 0 +send 0 +edulast 0 +modifi 0 +shawn 0 +flisakowski 0 +flisakow 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..e4725651 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,232 @@ +complementar 2 +problem 2 +gam 2 +applic 2 +algorithm 2 +solver 2 +newton 2 +system 2 +path 1 +point 1 +us 1 +research 1 +smooth 1 +nonsmooth 1 +page 1 +list 1 +link 1 +mile 1 +approxim 1 +equat 1 +paper 1 +econom 1 +sever 1 +inform 1 +interest 1 +also 1 +access 1 +file 1 +toolbox 1 +give 1 +model 1 +ferri 1 +implement 1 +well 1 +document 1 +theori 1 +softwar 1 +given 1 +directori 1 +relev 1 +mcplib 1 +matlab 1 +avail 1 +consist 1 +functionevalu 1 +detail 1 +help 1 +languag 1 +wisc 1 +method 1 +solut 1 +comput 1 +direct 1 +function 1 +emploi 1 +cannot 1 +linear 1 +subproblem 1 +appli 1 +sequenc 1 +iter 1 +step 1 +construct 1 +search 1 +strategi 1 +base 1 +uponreformul 1 +nation 1 +look 1 +michael 1 +interior 1 +neta 0 +result 0 +three 0 +decad 0 +subject 0 +divers 0 +engin 0 +scienc 0 +becom 0 +establish 0 +fruitfuldisciplin 0 +within 0 +mathemat 0 +program 0 +monograph 0 +survei 0 +basic 0 +role 0 +optim 0 +serv 0 +center 0 +regard 0 +incomplementar 0 +meetingsof 0 +commun 0 +pointer 0 +forcomplementar 0 +area 0 +tabl 0 +content 0 +researcherssoftwar 0 +collect 0 +nonlinear 0 +mix 0 +problemdescript 0 +sourc 0 +forthes 0 +evolv 0 +freeli 0 +andm 0 +allow 0 +frommatlab 0 +without 0 +spars 0 +jacobian 0 +evalu 0 +machin 0 +specificvers 0 +download 0 +interfac 0 +describ 0 +librari 0 +routin 0 +areavail 0 +hook 0 +contact 0 +steve 0 +rutherford 0 +colorado 0 +edufor 0 +extens 0 +classicaljosephi 0 +linearizedsubproblem 0 +lemk 0 +almost 0 +complementari 0 +pivot 0 +defineth 0 +dampedlinesearch 0 +merit 0 +measur 0 +violat 0 +infeas 0 +restartprocedur 0 +case 0 +totermin 0 +secondari 0 +everi 0 +rescal 0 +equilibr 0 +elementsappear 0 +data 0 +run 0 +mcpor 0 +directli 0 +techniqu 0 +similarto 0 +anonsmooth 0 +reformul 0 +algorithmconsist 0 +major 0 +anapproxim 0 +similar 0 +pathto 0 +aposs 0 +exist 0 +thepath 0 +entir 0 +along 0 +partiallycomput 0 +taken 0 +relinear 0 +anonmonoton 0 +watchdog 0 +avoid 0 +converg 0 +local 0 +minima 0 +norm 0 +forth 0 +underli 0 +keep 0 +number 0 +requir 0 +small 0 +possibl 0 +option 0 +robustnessimprov 0 +proxim 0 +perturb 0 +qpcomp 0 +ishandl 0 +deriv 0 +thenapproxim 0 +solv 0 +leadto 0 +zero 0 +origin 0 +form 0 +theaccuraci 0 +determin 0 +residu 0 +thecurr 0 +subsystem 0 +compar 0 +engineeringand 0 +mani 0 +known 0 +mpsge 0 +preprocessor 0 +thatallow 0 +equilibrium 0 +formul 0 +easili 0 +thegam 0 +home 0 +nemsth 0 +energi 0 +relat 0 +algorithmsand 0 +overview 0 +project 0 +trick 0 +oper 0 +pointmethod 0 +argonn 0 +laboratori 0 +archiv 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..ed73c891 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,56 @@ +exodu 1 +wisc 1 +project 1 +storag 1 +user 1 +manag 1 +benchmark 1 +zwill 1 +home 0 +pageexodu 0 +extens 0 +object 0 +orient 0 +databas 0 +system 0 +toolkitnot 0 +document 0 +construct 0 +succed 0 +theshor 0 +still 0 +provid 0 +minim 0 +support 0 +theexodu 0 +compil 0 +persistentprogram 0 +languag 0 +avail 0 +licens 0 +requir 0 +inform 0 +need 0 +contact 0 +eduprincip 0 +investig 0 +mike 0 +carei 0 +david 0 +dewittse 0 +also 0 +public 0 +relat 0 +exodusshor 0 +successor 0 +exoduslatest 0 +compilercontribut 0 +softwar 0 +managera 0 +mail 0 +list 0 +exodus_al 0 +oodbsdat 0 +prepar 0 +april 0 +michael 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..57efb131 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,196 @@ +paradis 3 +queri 2 +data 2 +databas 1 +object 1 +client 1 +server 1 +us 1 +inform 1 +support 1 +displai 1 +provid 1 +spatial 1 +attribut 1 +parallel 1 +system 1 +sampl 1 +also 1 +brows 1 +type 1 +graphic 1 +interfac 1 +polygon 1 +front 1 +layer 1 +creat 1 +applic 1 +contact 1 +relat 1 +shore 1 +project 1 +manag 1 +madison 1 +depart 1 +design 1 +implement 1 +geograph 1 +store 1 +manipul 1 +set 1 +subset 1 +issu 1 +model 1 +either 1 +result 1 +method 1 +area 1 +drop 1 +ship 1 +execut 1 +wisc 1 +projectparadis 0 +document 0 +content 0 +frontend 0 +informationse 0 +public 0 +peopl 0 +work 0 +extens 0 +optim 0 +report 0 +examin 0 +sequoia 0 +benchmark 0 +script 0 +vldb 0 +paper 0 +research 0 +group 0 +serverobject 0 +andevalu 0 +scalabl 0 +iscap 0 +massiv 0 +applyingobject 0 +orient 0 +technolog 0 +problem 0 +ofstor 0 +hope 0 +tosignificantli 0 +advanc 0 +size 0 +complex 0 +thatcan 0 +successfulli 0 +databasesystem 0 +aim 0 +handl 0 +providesa 0 +user 0 +andsupport 0 +paradiseprovid 0 +extend 0 +gisappl 0 +addit 0 +base 0 +asinteg 0 +real 0 +string 0 +built 0 +raster 0 +polylin 0 +point 0 +circl 0 +video 0 +mpeg 0 +imag 0 +underli 0 +persist 0 +allow 0 +spatialattribut 0 +foroverlap 0 +correspond 0 +custom 0 +order 0 +selectingcolor 0 +label 0 +withad 0 +issueimplicit 0 +zoom 0 +click 0 +sketch 0 +arubb 0 +band 0 +querycompos 0 +menu 0 +compos 0 +access 0 +databaseschema 0 +assist 0 +composit 0 +beview 0 +bedisplai 0 +tabl 0 +tupl 0 +context 0 +sensit 0 +help 0 +sqlwe 0 +ad 0 +abil 0 +invok 0 +defin 0 +extendedset 0 +exampl 0 +calcul 0 +byus 0 +standarddatabas 0 +oper 0 +includ 0 +anddrop 0 +extent 0 +indic 0 +insert 0 +updat 0 +current 0 +version 0 +emploi 0 +architectur 0 +syntax 0 +paradiseserv 0 +theresult 0 +back 0 +ismulti 0 +thread 0 +multipl 0 +connect 0 +sameserv 0 +sever 0 +carefulattent 0 +paid 0 +insur 0 +could 0 +effici 0 +processqueri 0 +especi 0 +involv 0 +largevolum 0 +frontendeurop 0 +pressher 0 +projectattn 0 +prof 0 +david 0 +dewittunivers 0 +wisconsin 0 +madisoncomput 0 +scienc 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +edumor 0 +come 0 +biswadeep 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..100cb29a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,81 @@ +scout 2 +internet 1 +servic 1 +intern 1 +report 1 +student 1 +project 1 +best 1 +resourc 1 +wisconsin 1 +madison 1 +commun 1 +us 1 +provid 1 +suggest 1 +inform 1 +homepagego 0 +text 0 +versionnewslett 0 +newand 0 +newli 0 +discov 0 +network 0 +toolsinternet 0 +announc 0 +updat 0 +daili 0 +effectiveinternet 0 +tool 0 +availablea 0 +studentssurf 0 +smarter 0 +longer 0 +universityof 0 +show 0 +canchoos 0 +filter 0 +hundr 0 +annoucementseach 0 +week 0 +look 0 +valuabl 0 +onlin 0 +networktool 0 +organ 0 +summar 0 +annot 0 +vefound 0 +offer 0 +sever 0 +format 0 +goal 0 +support 0 +effect 0 +byeduc 0 +research 0 +howev 0 +everyon 0 +welcom 0 +useth 0 +public 0 +site 0 +encouragefeedback 0 +entir 0 +three 0 +primari 0 +includ 0 +happen 0 +thescout 0 +toolkit 0 +ournewest 0 +know 0 +locat 0 +depart 0 +comput 0 +scienc 0 +theunivers 0 +comment 0 +feedbackscout 0 +servicesfor 0 +educ 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..c6f7ae2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,481 @@ +shore 3 +object 3 +system 3 +list 2 +data 2 +file 2 +applic 2 +unix 2 +persist 2 +type 2 +mail 2 +releas 2 +languag 2 +subscrib 2 +project 2 +wisc 2 +shore_al 2 +support 2 +access 2 +scalabl 1 +hardwar 1 +softwar 1 +provid 1 +server 1 +version 1 +inform 1 +beta 1 +design 1 +name 1 +odmg 1 +processor 1 +messag 1 +content 1 +research 1 +program 1 +space 1 +compat 1 +exist 1 +model 1 +first 1 +us 1 +defin 1 +byte 1 +listproc 1 +document 1 +relat 1 +includ 1 +geograph 1 +basic 1 +manag 1 +develop 1 +like 1 +interfac 1 +text 1 +environ 1 +store 1 +singl 1 +orient 1 +featur 1 +major 1 +goal 1 +commerci 1 +oodbm 1 +technolog 1 +uniqu 1 +process 1 +larg 1 +structur 1 +make 1 +term 1 +link 1 +chang 1 +shore_support 1 +user 1 +interest 1 +digest 1 +also 1 +sourc 1 +binari 1 +public 1 +benchmark 1 +madison 1 +databas 1 +depart 1 +need 1 +informationsystem 1 +multipl 1 +programminglanguag 1 +field 1 +eas 1 +transit 1 +abl 1 +either 1 +describ 1 +much 1 +base 1 +effort 1 +concentr 1 +heterogen 1 +focu 1 +architectur 1 +peer 1 +ashor 1 +client 1 +vendor 1 +second 1 +build 1 +exampl 1 +futur 1 +digit 1 +avail 1 +retriev 1 +contain 1 +mechan 1 +differ 1 +bulk 1 +set 1 +close 1 +standard 1 +oodb 1 +inter 1 +share 1 +flatten 1 +time 1 +directli 1 +framework 1 +regist 1 +anonym 1 +legaci 1 +read 1 +string 1 +attribut 1 +question 1 +mani 1 +clutter 1 +weekli 1 +request 1 +help 1 +default 1 +repli 1 +rather 1 +entir 1 +subscript 1 +home 0 +pageshor 0 +high 0 +perform 0 +repositorydocu 0 +objectiveoverviewreleas 0 +informationmail 0 +listsse 0 +line 0 +shorepeopl 0 +work 0 +shorelatest 0 +summari 0 +arpaparadis 0 +built 0 +shoreexodu 0 +predecessor 0 +shoreoo 0 +oodbsshor 0 +photo 0 +albumuw 0 +group 0 +serverobject 0 +implement 0 +andevalu 0 +serv 0 +widevarieti 0 +target 0 +cadsystem 0 +satellit 0 +repositori 0 +multi 0 +media 0 +expand 0 +capabl 0 +wide 0 +usedexodusstorag 0 +wisconsin 0 +fund 0 +arpa 0 +number 0 +ofwai 0 +hierarch 0 +anda 0 +thisinterfac 0 +intend 0 +theunix 0 +tool 0 +viand 0 +withoutmodif 0 +becom 0 +shoreobject 0 +complex 0 +overview 0 +someth 0 +hybrid 0 +natur 0 +inheritingcharacterist 0 +fromfil 0 +section 0 +briefli 0 +ofshor 0 +paper 0 +greater 0 +detail 0 +three 0 +scalabilitysupport 0 +heterogeneitysupport 0 +applicationswhen 0 +began 0 +year 0 +uniqueamong 0 +commun 0 +degre 0 +languageheterogen 0 +turn 0 +facilit 0 +remain 0 +distinguish 0 +supportfor 0 +depend 0 +persistentstorag 0 +furthermor 0 +sinc 0 +basicallycompat 0 +expect 0 +eventu 0 +betransf 0 +sector 0 +architectureshor 0 +sever 0 +wai 0 +symmetr 0 +distributedarchitectur 0 +everi 0 +particip 0 +run 0 +whether 0 +disksattach 0 +network 0 +workstat 0 +parallel 0 +intel 0 +paragon 0 +contrast 0 +architectureus 0 +exodu 0 +serverarchitectur 0 +fine 0 +typicallyus 0 +notionof 0 +valu 0 +ad 0 +runsin 0 +extens 0 +mind 0 +rel 0 +simpl 0 +forus 0 +specif 0 +theparadis 0 +alreadi 0 +nasa 0 +seosdi 0 +feel 0 +piec 0 +plai 0 +aimport 0 +role 0 +varieti 0 +endeavor 0 +librari 0 +almost 0 +certainlydepend 0 +go 0 +manipul 0 +transmitobject 0 +video 0 +pictur 0 +well 0 +whilecurr 0 +product 0 +could 0 +orientedtoward 0 +deal 0 +gigabyt 0 +terabyt 0 +customiz 0 +equal 0 +import 0 +index 0 +queri 0 +libraryar 0 +requir 0 +heterogeneityobject 0 +neutraltyp 0 +embodi 0 +enhanc 0 +databasefeatur 0 +provis 0 +simplifi 0 +task 0 +ofsupport 0 +feasibleto 0 +mention 0 +earlier 0 +quit 0 +neutral 0 +definit 0 +wasrec 0 +propos 0 +consortium 0 +emphasi 0 +howev 0 +onprovid 0 +withina 0 +applicationsa 0 +enabl 0 +currentlyus 0 +untyp 0 +stop 0 +structuredobject 0 +conveni 0 +safe 0 +intra 0 +ultim 0 +hope 0 +displac 0 +orientedfil 0 +servic 0 +standpoint 0 +world 0 +manypersist 0 +flexibl 0 +tree 0 +reachabl 0 +indirectli 0 +give 0 +usersa 0 +familiar 0 +individualpersist 0 +root 0 +oflarg 0 +unnam 0 +realiz 0 +involvessever 0 +kind 0 +includingdirectori 0 +pool 0 +symbol 0 +cross 0 +refer 0 +unixappl 0 +compil 0 +editor 0 +fromtradit 0 +stream 0 +standardunix 0 +open 0 +write 0 +mkdir 0 +chdir 0 +order 0 +callsposs 0 +option 0 +onevari 0 +length 0 +charact 0 +asb 0 +attempt 0 +objectthrough 0 +counterpart 0 +callswil 0 +portion 0 +thatwish 0 +without 0 +possibl 0 +mount 0 +datacontain 0 +feasibl 0 +bothnew 0 +componentof 0 +morestructur 0 +latest 0 +tabl 0 +date 0 +approxim 0 +subject 0 +contact 0 +rleas 0 +sept 0 +improv 0 +completeimplement 0 +fix 0 +port 0 +tosolari 0 +linux 0 +august 0 +gzip 0 +sparc 0 +andpentium 0 +solari 0 +found 0 +atftp 0 +liststher 0 +eduand 0 +eduthi 0 +reach 0 +team 0 +usebi 0 +submit 0 +comment 0 +report 0 +cannot 0 +madisonc 0 +current 0 +unmoder 0 +unlikelyev 0 +get 0 +junk 0 +moder 0 +mailbox 0 +isalreadi 0 +sign 0 +belowfor 0 +sentwhen 0 +purpos 0 +notifi 0 +parti 0 +archiv 0 +sent 0 +sender 0 +beingpost 0 +want 0 +yourrepli 0 +copi 0 +thu 0 +anyon 0 +maysubscrib 0 +post 0 +existenceof 0 +shown 0 +return 0 +whenit 0 +yoursubscript 0 +conceal 0 +subscriberscannot 0 +obtain 0 +membership 0 +must 0 +specialmessag 0 +look 0 +receiv 0 +individu 0 +sendthi 0 +along 0 +send 0 +separ 0 +unsubscrib 0 +messageshould 0 +helplast 0 +modifi 0 +nanci 0 +hall 0 +nhall 0 +footnot 0 +odlshor 0 +concurr 0 +decid 0 +modelidl 0 +start 0 +point 0 +henc 0 +odlar 0 +similar 0 +anoth 0 +stabilizesw 0 +convert 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..d0b27b3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,31 @@ +madison 1 +abhinav 1 +page 1 +gupta 1 +wisc 1 +depart 1 +comput 1 +scienc 1 +home 0 +welcom 0 +agupta 0 +construct 0 +graduat 0 +student 0 +univers 0 +wisconsin 0 +contact 0 +residenceoffic 0 +kendal 0 +avenu 0 +dayton 0 +street 0 +interest 0 +link 0 +indian 0 +newspap 0 +stuff 0 +sport 0 +finger 0 +find 0 +whereabout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..5cb120c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,13 @@ +alain 1 +home 0 +pagealain 0 +click 0 +larger 0 +pictur 0 +largest 0 +carnivor 0 +ever 0 +live 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..1f8fb872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,49 @@ +wisconsin 2 +univers 1 +biologi 1 +comput 1 +intellig 1 +allex 1 +scienc 1 +system 1 +molecular 1 +ismb 1 +carolyn 1 +machin 1 +learn 1 +group 1 +home 0 +page 0 +graduat 0 +studentbiotechnolog 0 +train 0 +program 0 +traineecomput 0 +departmentunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +mail 0 +wisc 0 +edutelephon 0 +advisor 0 +professor 0 +jude 0 +shavlikinterest 0 +sequenc 0 +protein 0 +fold 0 +artifici 0 +neural 0 +networkseduc 0 +madisonb 0 +purdu 0 +universityb 0 +educ 0 +mankato 0 +state 0 +relat 0 +link 0 +depart 0 +research 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..fbd960ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,182 @@ +amir 1 +page 1 +program 1 +friend 1 +home 1 +interest 1 +super 1 +roth 1 +madison 1 +wisc 1 +group 1 +seminar 1 +arch 1 +compil 1 +us 1 +out 1 +comput 1 +scienc 1 +univers 1 +multiscalar 1 +physic 1 +yale 1 +degre 1 +beauti 1 +marci 1 +go 1 +like 1 +think 1 +analysi 1 +barb 1 +delphi 0 +maven 0 +show 0 +erin 0 +occasionali 0 +updat 0 +copi 0 +resum 0 +cvte 0 +truth 0 +week 0 +research 0 +topic 0 +implement 0 +preprocessor 0 +deleg 0 +work 0 +project 0 +partner 0 +set 0 +airport 0 +metal 0 +detector 0 +existencei 0 +graduat 0 +student 0 +depart 0 +wisconsin 0 +advisor 0 +guri 0 +sohi 0 +look 0 +method 0 +allevi 0 +data 0 +depend 0 +distribut 0 +regist 0 +file 0 +side 0 +curli 0 +fri 0 +advanc 0 +nail 0 +design 0 +vallei 0 +school 0 +much 0 +practic 0 +live 0 +girlfriend 0 +cat 0 +charli 0 +also 0 +went 0 +get 0 +master 0 +public 0 +polici 0 +lafollett 0 +institut 0 +presid 0 +meantim 0 +solv 0 +linear 0 +regress 0 +problem 0 +wacki 0 +recip 0 +find 0 +magazin 0 +watch 0 +parti 0 +five 0 +eggplant 0 +peopl 0 +weird 0 +anywai 0 +promis 0 +subba 0 +officem 0 +daddi 0 +novemb 0 +titanium 0 +screw 0 +desi 0 +relaford 0 +terri 0 +mulholland 0 +oxygen 0 +carbon 0 +dioxid 0 +area 0 +vagu 0 +languag 0 +whack 0 +optim 0 +parallel 0 +algorithm 0 +theori 0 +good 0 +soul 0 +evalu 0 +model 0 +perform 0 +enhanc 0 +three 0 +point 0 +shot 0 +thing 0 +scaryarea 0 +rabid 0 +interestth 0 +love 0 +know 0 +talk 0 +better 0 +leav 0 +never 0 +return 0 +hmmm 0 +handyinformatik 0 +index 0 +journal 0 +author 0 +madcat 0 +architectur 0 +resourc 0 +minut 0 +score 0 +sportslin 0 +philli 0 +everybodi 0 +favorit 0 +engin 0 +ickyth 0 +new 0 +write 0 +articl 0 +gui 0 +read 0 +want 0 +kid 0 +drew 0 +cornel 0 +david 0 +wierd 0 +featur 0 +associ 0 +kemin 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..bbb664ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..f97ed87d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,131 @@ +amo 1 +page 1 +approxim 1 +theori 1 +home 1 +present 1 +download 1 +analysi 1 +version 1 +click 1 +site 1 +file 1 +group 1 +activ 1 +homepag 1 +wisconsin 1 +madison 1 +wisc 1 +us 1 +clickher 1 +compress 1 +vita 1 +list 1 +variou 1 +public 1 +includ 1 +inform 1 +found 1 +wish 1 +view 1 +line 1 +research 1 +word 0 +search 0 +engin 0 +spline 0 +wavelet 0 +boxsplin 0 +radial 0 +basi 0 +function 0 +shift 0 +invari 0 +space 0 +toscatt 0 +data 0 +multiquadr 0 +thin 0 +plate 0 +splinesthi 0 +netscap 0 +enhanc 0 +associ 0 +professordepart 0 +comput 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +telephon 0 +tabl 0 +linksat 0 +item 0 +access 0 +order 0 +paperaffin 0 +system 0 +operatorof 0 +zuowei 0 +shen 0 +choos 0 +follow 0 +unix 0 +otherwis 0 +uncompress 0 +fromher 0 +none 0 +work 0 +server 0 +copi 0 +directlyfrom 0 +accounther 0 +handout 0 +email 0 +want 0 +abstract 0 +select 0 +articlesof 0 +mine 0 +anonym 0 +carl 0 +boor 0 +maintain 0 +containspostscript 0 +postscript 0 +articl 0 +theapproxim 0 +also 0 +technic 0 +filesconcern 0 +gener 0 +recommend 0 +read 0 +provid 0 +avail 0 +student 0 +andpubl 0 +main 0 +area 0 +interest 0 +togeth 0 +short 0 +summari 0 +futur 0 +goal 0 +univeristi 0 +ofwisconsin 0 +numer 0 +link 0 +peopl 0 +commun 0 +miscellan 0 +topic 0 +final 0 +offici 0 +pleas 0 +deposit 0 +comment 0 +mailbox 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..1b95101c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi 0 +therber 0 +home 0 +pageandi 0 +therberoffic 0 +sphone 0 +email 0 +andyt 0 +wisc 0 +eduzooresumebookmarksapplet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..1bedc660 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,18 @@ +arvind 1 +ranganathan 1 +workplac 1 +ranga 0 +erstwhil 0 +present 0 +indiaworld 0 +fascin 0 +world 0 +escher 0 +collect 0 +classic 0 +paper 0 +comput 0 +scienc 0 +finger 0 +log 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..244edda9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,35 @@ +ashish 1 +depart 1 +home 0 +page 0 +thusoo 0 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madison 0 +come 0 +india 0 +hadmi 0 +undergradu 0 +educ 0 +indianinstitut 0 +technolog 0 +delhi 0 +iitd 0 +fantast 0 +place 0 +worth 0 +visit 0 +like 0 +contact 0 +canfing 0 +find 0 +whereabout 0 +altern 0 +send 0 +email 0 +ashisht 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..b10ed289 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,34 @@ +alexandria 1 +ashraf 1 +scienc 1 +univers 1 +aboulnaga 1 +home 1 +madison 1 +comput 1 +egypt 1 +section 1 +view 1 +grade 1 +pageashraf 0 +aboulnagacomput 0 +depart 0 +room 0 +wisconsin 0 +west 0 +dayton 0 +usaphon 0 +mail 0 +wisc 0 +edueduc 0 +juli 0 +june 0 +info 0 +offic 0 +hour 0 +desautel 0 +page 0 +last 0 +modifi 0 +septemb 0 +finger 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..5d67f323 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,24 @@ +atkinson 1 +phil 1 +home 1 +infooffic 1 +page 0 +pageucla 0 +bannon 0 +win 0 +ncaa 0 +basketbal 0 +championship 0 +seattl 0 +gener 0 +phone 0 +email 0 +wisc 0 +educurr 0 +researchsailinghors 0 +back 0 +ridingscuba 0 +divingc 0 +hour 0 +tuth 0 +appoint 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..639b1d1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,118 @@ +number 2 +comput 1 +bach 1 +interest 1 +theori 1 +algorithm 1 +problem 1 +page 1 +eric 1 +scienc 1 +univers 1 +email 1 +wisc 1 +theoret 1 +complex 1 +us 1 +effici 1 +exampl 1 +prime 1 +larg 1 +test 1 +wit 1 +question 1 +recent 1 +model 1 +proc 1 +home 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +california 0 +berkelei 0 +algebraicalgorithm 0 +cryptographi 0 +string 0 +automata 0 +research 0 +summari 0 +solvealgebra 0 +onetel 0 +digit 0 +without 0 +examin 0 +possiblefactor 0 +intrins 0 +mathemat 0 +well 0 +applic 0 +random 0 +gener 0 +code 0 +forreli 0 +secur 0 +inform 0 +transmiss 0 +algebra 0 +area 0 +also 0 +appli 0 +probabl 0 +designand 0 +analysi 0 +iscomposit 0 +prove 0 +simpl 0 +auxiliarynumb 0 +call 0 +practic 0 +usual 0 +find 0 +witnessbi 0 +direct 0 +search 0 +among 0 +small 0 +lead 0 +followingnatur 0 +least 0 +functionof 0 +work 0 +given 0 +accurateheurist 0 +base 0 +probabilist 0 +assumpt 0 +allowsthi 0 +similar 0 +answer 0 +public 0 +improv 0 +approxim 0 +euler 0 +product 0 +cnta 0 +canadian 0 +math 0 +proceed 0 +complet 0 +condon 0 +glaser 0 +tanguai 0 +annual 0 +conf 0 +volum 0 +shallit 0 +press 0 +info 0 +click 0 +curriculum 0 +vita 0 +creat 0 +juli 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..c952b521 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,58 @@ +system 1 +bart 1 +miller 1 +home 1 +page 1 +wisc 1 +project 1 +parallel 1 +tool 1 +oper 1 +distribut 1 +barton 0 +professorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usath 0 +follow 0 +list 0 +thing 0 +research 0 +paradyn 0 +perform 0 +fuzz 0 +random 0 +softwar 0 +testingteach 0 +introduct 0 +spring 0 +honor 0 +internet 0 +seminar 0 +advanc 0 +fall 0 +director 0 +undergradu 0 +graduatesprofession 0 +symposium 0 +monona 0 +terrac 0 +frank 0 +lloyd 0 +wright 0 +convent 0 +center 0 +technic 0 +advisori 0 +groupperson 0 +offici 0 +depart 0 +famili 0 +photosbart 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..41f2e056 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,44 @@ +madison 1 +offic 1 +hour 1 +hyper 0 +home 0 +page 0 +benjamin 0 +teitelbaum 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +usaben 0 +wisc 0 +edursumquinc 0 +internet 0 +ultim 0 +word 0 +gamezillion 0 +bookmarksspr 0 +schedul 0 +browser 0 +support 0 +tabl 0 +look 0 +like 0 +garbag 0 +click 0 +someth 0 +readabl 0 +mondai 0 +tuesdai 0 +wednesdai 0 +thursdai 0 +fridai 0 +dbseminar 0 +osseminar 0 +condormeet 0 +miron 0 +plseminar 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..74b45f6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,206 @@ +comput 2 +program 2 +project 2 +problem 2 +bestor 2 +scienc 2 +home 1 +techniqu 1 +section 1 +fortran 1 +univers 1 +madison 1 +structur 1 +solv 1 +model 1 +student 1 +languag 1 +wisc 1 +research 1 +invers 1 +vision 1 +scene 1 +posit 1 +imag 1 +us 1 +projector 1 +robot 1 +cours 1 +engin 1 +page 1 +gareth 1 +wisconsin 1 +world 1 +zealand 1 +postscript 1 +observ 1 +base 1 +algorithm 1 +point 1 +explor 1 +group 1 +intend 1 +dissert 1 +teach 1 +depart 1 +telephon 1 +mail 1 +wide 1 +massei 1 +motion 1 +examin 1 +rigid 1 +concurr 1 +assumpt 1 +transform 1 +dimens 1 +requir 1 +navig 1 +environ 1 +interest 1 +machin 1 +cover 1 +basic 1 +prepar 1 +elementari 1 +experi 1 +instruct 1 +high 1 +school 1 +taught 1 +entir 1 +primarili 1 +major 1 +advanc 1 +mathemat 1 +pagewelcom 0 +pagegareth 0 +assist 0 +west 0 +dayton 0 +street 0 +click 0 +finger 0 +http 0 +system 0 +administr 0 +data 0 +librari 0 +servic 0 +observatori 0 +drive 0 +dpl 0 +dacc 0 +edueduc 0 +honor 0 +curriculum 0 +vita 0 +resum 0 +graduat 0 +coursework 0 +titl 0 +abstract 0 +import 0 +recov 0 +within 0 +essenti 0 +exist 0 +multipl 0 +howev 0 +extens 0 +practic 0 +sensit 0 +nois 0 +accur 0 +optic 0 +restrict 0 +call 0 +make 0 +instead 0 +camera 0 +tradition 0 +result 0 +defin 0 +geometr 0 +perspect 0 +given 0 +identifi 0 +constrain 0 +specifi 0 +minimum 0 +number 0 +also 0 +addit 0 +minim 0 +type 0 +error 0 +occur 0 +real 0 +applic 0 +allow 0 +approxim 0 +intersect 0 +current 0 +appli 0 +determin 0 +unknown 0 +time 0 +advisor 0 +prof 0 +charl 0 +dyer 0 +graphic 0 +virtual 0 +realiti 0 +artifici 0 +intellig 0 +learn 0 +duti 0 +spring 0 +introduct 0 +credit 0 +need 0 +prior 0 +knowledg 0 +assum 0 +materi 0 +enabl 0 +write 0 +simpl 0 +done 0 +receiv 0 +littl 0 +algebra 0 +construct 0 +least 0 +procedur 0 +orient 0 +pascal 0 +survei 0 +prereq 0 +colleg 0 +work 0 +statist 0 +logic 0 +consent 0 +instructor 0 +open 0 +pointer 0 +wiscinfo 0 +inform 0 +hoofer 0 +out 0 +club 0 +nextstep 0 +next 0 +softwar 0 +start 0 +internet 0 +lyco 0 +search 0 +keyword 0 +copyright 0 +copi 0 +last 0 +modifi 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..eabe5f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,33 @@ +beyer 1 +wisc 1 +graduat 1 +project 1 +cours 1 +kevin 0 +home 0 +pagekevin 0 +beyerbey 0 +caution 0 +work 0 +student 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +advisor 0 +raghu 0 +ramakrishnan 0 +area 0 +interest 0 +databas 0 +researchresearch 0 +coral 0 +local 0 +inform 0 +undergradu 0 +coursesinstruct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..d9269887 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,62 @@ +bezenek 1 +home 1 +window 1 +pith 1 +phone 1 +wisc 1 +todd 0 +page 0 +toddm 0 +back 0 +introduc 0 +actual 0 +faster 0 +cpu 0 +_great 0 +microprocessor 0 +past 0 +present_ 0 +uregina 0 +bayko 0 +html 0 +express 0 +locomot 0 +squeez 0 +skateboard 0 +size 0 +packag 0 +helen 0 +custer 0 +_insid 0 +microsoft 0 +press 0 +current 0 +cours 0 +advanc 0 +oper 0 +system 0 +bart 0 +miller 0 +pithi 0 +consist 0 +abound 0 +take 0 +yeah 0 +point 0 +skew 0 +associ 0 +cach 0 +access 0 +inform 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +madison 0 +offic 0 +mail 0 +edubezenek 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..7edc5cdb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,54 @@ +bockrath 1 +section 1 +nathan 1 +graduat 1 +student 1 +wisc 1 +viru 1 +info 1 +home 1 +teach 0 +assist 0 +averag 0 +pictur 0 +nate 0 +jpeg 0 +send 0 +email 0 +click 0 +held 0 +grade 0 +quiz 0 +review 0 +word 0 +macro 0 +make 0 +page 0 +offic 0 +hour 0 +anywai 0 +mondai 0 +wednesdai 0 +schedul 0 +distribut 0 +system 0 +simul 0 +model 0 +support 0 +free 0 +speech 0 +onlin 0 +site 0 +anoth 0 +dai 0 +back 0 +pageback 0 +depart 0 +pageoth 0 +neat 0 +stuff 0 +condor 0 +project 0 +internet 0 +oraclesend 0 +comment 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..d6b591ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,288 @@ +work 2 +system 1 +thing 1 +scienc 1 +time 1 +bolo 1 +year 1 +comput 1 +new 1 +burger 1 +develop 1 +softwar 1 +engin 1 +scientist 1 +hand 1 +home 1 +uwvax 1 +member 1 +associ 1 +although 1 +parent 1 +josef 1 +wai 1 +call 1 +mani 1 +person 1 +hacker 1 +design 1 +dewitt 1 +implement 1 +beer 1 +sleep 1 +wonder 1 +page 1 +tiger 1 +place 1 +bore 1 +like 1 +parallel 1 +everi 1 +els 1 +follow 1 +world 1 +databas 1 +object 1 +store 1 +shore 1 +wisconsin 1 +madison 1 +also 1 +provid 1 +rather 1 +internet 1 +usenet 1 +uucp 1 +along 1 +line 1 +depart 1 +part 1 +much 1 +try 1 +take 1 +care 1 +aircraft 1 +essen 1 +hau 1 +drink 1 +bolobologreet 0 +christen 0 +thoma 0 +roll 0 +mebolo 0 +everyon 0 +includ 0 +quit 0 +sure 0 +bestexplan 0 +question 0 +defin 0 +bywhat 0 +case 0 +semi 0 +real 0 +timeoper 0 +util 0 +last 0 +kernel 0 +unix 0 +administr 0 +creat 0 +appli 0 +construct 0 +realli 0 +though 0 +acomput 0 +degre 0 +pai 0 +david 0 +shudder 0 +methodolog 0 +right 0 +architect 0 +newoper 0 +type 0 +often 0 +sameto 0 +forth 0 +woodwork 0 +control 0 +draw 0 +brew 0 +complet 0 +relax 0 +enough 0 +myroomm 0 +disagre 0 +pursuit 0 +enjoi 0 +fly 0 +read 0 +fiction 0 +comic 0 +book 0 +railroad 0 +prototyp 0 +model 0 +role 0 +plai 0 +game 0 +notic 0 +imag 0 +sublim 0 +stripe 0 +creatur 0 +thetig 0 +appear 0 +throughout 0 +taken 0 +william 0 +blake 0 +poemtyg 0 +tyger 0 +put 0 +word 0 +road 0 +againin 0 +tremend 0 +leap 0 +insan 0 +purchas 0 +ahous 0 +address 0 +isjosef 0 +east 0 +gate 0 +roadmonona 0 +voic 0 +number 0 +workwork 0 +drive 0 +banana 0 +us 0 +grung 0 +either 0 +matur 0 +job 0 +chang 0 +perhapssom 0 +seem 0 +othermonth 0 +beat 0 +intosubmiss 0 +everyth 0 +moon 0 +andstar 0 +current 0 +project 0 +fordav 0 +famou 0 +gamma 0 +relat 0 +queri 0 +interpret 0 +paradis 0 +geograph 0 +inform 0 +orient 0 +data 0 +wiss 0 +storag 0 +whatev 0 +need 0 +done 0 +whole 0 +occur 0 +thecomput 0 +departmentof 0 +themadison 0 +campusof 0 +univers 0 +campu 0 +locat 0 +peninsula 0 +five 0 +lake 0 +workin 0 +addit 0 +consult 0 +solut 0 +advic 0 +technicalexpertis 0 +help 0 +port 0 +newsystem 0 +reviv 0 +oddbal 0 +tasksar 0 +kind 0 +tell 0 +tovisit 0 +serverbut 0 +haven 0 +anyth 0 +mostlyempti 0 +except 0 +friend 0 +activitiesuwvaxi 0 +oper 0 +site 0 +free 0 +that 0 +print 0 +someth 0 +longer 0 +svolunt 0 +master 0 +goe 0 +run 0 +howev 0 +reader 0 +across 0 +differentarchitectur 0 +task 0 +organizationsi 0 +organ 0 +alwai 0 +agre 0 +oftenhav 0 +good 0 +benefit 0 +usersof 0 +commun 0 +aopa 0 +owner 0 +pilot 0 +experiment 0 +usenix 0 +blitz 0 +drinkingwhen 0 +school 0 +hord 0 +friendsand 0 +visit 0 +local 0 +thursdai 0 +night 0 +import 0 +slowli 0 +entir 0 +select 0 +acquaint 0 +becam 0 +loftili 0 +labelledblitz 0 +societi 0 +divers 0 +meet 0 +ofoctoberfest 0 +weekend 0 +chud 0 +accumulateda 0 +short 0 +histori 0 +whatnotof 0 +charad 0 +pagelast 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..8948197b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,47 @@ +page 2 +home 1 +link 1 +comput 1 +system 1 +brad 1 +oper 1 +search 1 +welcom 0 +thayer 0 +homepag 0 +much 0 +mail 0 +scienc 0 +intro 0 +network 0 +possibl 0 +advanc 0 +would 0 +foolish 0 +neglect 0 +model 0 +interest 0 +thec 0 +us 0 +probabl 0 +bore 0 +check 0 +seminaranywai 0 +aim 0 +beaucoup 0 +boir 0 +pepper 0 +badger 0 +packer 0 +pagesom 0 +altavista 0 +enginefind 0 +email 0 +adress 0 +world 0 +wideth 0 +jazz 0 +duan 0 +mclaughlin 0 +pageuw 0 +athlet 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..553676ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,64 @@ +breach 2 +scott 1 +comput 1 +wisc 1 +wisconsin 1 +madison 1 +univers 1 +intern 1 +symposium 1 +gurindar 1 +home 0 +pagescott 0 +addresseseducationresearch 0 +interest 0 +public 0 +recreat 0 +associatesaddressesscott 0 +breachdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usatel 0 +educationph 0 +scienc 0 +engin 0 +carnegi 0 +mellon 0 +advisorguri 0 +sohiresearch 0 +interestscomput 0 +architecturemultiscalarpublicationsmultiscalar 0 +processorsgurindar 0 +sohi 0 +vijaykumarnd 0 +architectur 0 +anatomi 0 +regist 0 +file 0 +multiscalar 0 +processorscott 0 +vijaykumar 0 +sohith 0 +microarchitectur 0 +effici 0 +detect 0 +pointer 0 +arrai 0 +access 0 +errorstodd 0 +austin 0 +sohiconfer 0 +program 0 +languag 0 +design 0 +implement 0 +recreationwingsbeersquidtvassociatestodd 0 +austindoug 0 +burgerbabak 0 +falsafialain 0 +kagit 0 +vijaykumarlast 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..e1415ac6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,83 @@ +page 1 +suni 1 +albani 1 +fall 1 +name 1 +sinc 1 +bleed 0 +nontrivi 0 +waysher 0 +temporarili 0 +underst 0 +unadorn 0 +provid 0 +section 0 +pizza 0 +pool 0 +brief 0 +hobbi 0 +schedul 0 +spring 0 +stinkin 0 +bookmark 0 +poor 0 +unfortun 0 +myclass 0 +hypersensit 0 +rockjock 0 +cretin 0 +brood 0 +glare 0 +clenchesfist 0 +crack 0 +knuckl 0 +tragic 0 +flightyfemm 0 +get 0 +razz 0 +asskick 0 +thirdgrad 0 +perhap 0 +smooth 0 +skin 0 +hardbodi 0 +leatherboi 0 +leer 0 +atm 0 +whenev 0 +call 0 +roll 0 +differ 0 +make 0 +todayi 0 +giggl 0 +said 0 +becam 0 +aprostitut 0 +societi 0 +bigotri 0 +pedagodi 0 +isaac 0 +theblack 0 +goat 0 +refus 0 +stai 0 +claw 0 +hand 0 +sssuuuhhh 0 +mmuuuhhhh 0 +dddduuuuuhhhhh 0 +mmmmuuuhhhh 0 +maaaahhhjaaaaaahhhhh 0 +fffuuuhhhhh 0 +yyyyyyyuuuuuhhhhh 0 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 0 +uuuhhh 0 +uuummmm 0 +uuuhhhh 0 +wwwwwhhhhuuuuuhhhhh 0 +zhang 0 +wouldn 0 +notic 0 +eggleston 0 +smile 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..540d346c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,154 @@ +system 2 +prefetch 2 +applic 2 +cach 2 +file 2 +research 1 +perform 1 +parallel 1 +control 1 +page 1 +princeton 1 +felten 1 +proceed 1 +univers 1 +oper 1 +integr 1 +edward 1 +wisc 1 +comput 1 +recent 1 +alloc 1 +anna 1 +karlin 1 +disk 1 +polici 1 +educ 1 +interest 1 +cours 1 +high 1 +resourc 1 +depart 1 +tech 1 +report 1 +sigmetr 1 +confer 1 +implement 1 +princetonunivers 1 +osdi 1 +uniprocessor 1 +investig 1 +techniqu 1 +replac 1 +kernel 1 +physic 1 +home 0 +assist 0 +professor 0 +sciencedepart 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usacao 0 +eduphon 0 +department 0 +offic 0 +paper 0 +talk 0 +summari 0 +collect 0 +link 0 +tsinghua 0 +beij 0 +china 0 +memori 0 +project 0 +optim 0 +cachingacf 0 +topic 0 +distribut 0 +fall 0 +advanc 0 +spring 0 +trace 0 +simul 0 +access 0 +tracesrec 0 +papersintegr 0 +cachingtraci 0 +kimbrel 0 +novemb 0 +shorter 0 +version 0 +thesi 0 +also 0 +schedulingpei 0 +appear 0 +toc 0 +studi 0 +strategiespei 0 +peform 0 +first 0 +symposium 0 +slide 0 +present 0 +usenix 0 +summer 0 +technic 0 +tickertaip 0 +raid 0 +architectur 0 +swee 0 +boon 0 +shivakumar 0 +venkataraman 0 +john 0 +wilk 0 +isca 0 +talksslid 0 +postscript 0 +andpostscript 0 +summarymi 0 +focus 0 +storag 0 +manag 0 +andparallel 0 +particular 0 +improvefil 0 +specif 0 +filecach 0 +aggress 0 +data 0 +havedevelop 0 +individualappl 0 +respons 0 +decid 0 +useit 0 +us 0 +fairglob 0 +carefulli 0 +cachereplac 0 +schedul 0 +prototyp 0 +implementationon 0 +demonstratedthat 0 +good 0 +chosen 0 +strategi 0 +informationcan 0 +significantli 0 +improv 0 +mani 0 +current 0 +extend 0 +amdevelop 0 +algorithm 0 +diskarrai 0 +addit 0 +global 0 +managementproblem 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..b50ef899 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,284 @@ +system 2 +proc 2 +conf 2 +databas 2 +data 2 +work 1 +research 1 +manag 1 +perform 1 +project 1 +almaden 1 +object 1 +garlic 1 +comput 1 +madison 1 +area 1 +shore 1 +repositori 1 +heterogen 1 +effort 1 +experi 1 +time 1 +queri 1 +applic 1 +march 1 +persist 1 +franklin 1 +minneapoli 1 +univers 1 +center 1 +altern 1 +carei 1 +evalu 1 +interest 1 +next 1 +gener 1 +orient 1 +design 1 +complex 1 +workload 1 +base 1 +goal 1 +involv 1 +share 1 +build 1 +recent 1 +twelv 1 +student 1 +spent 1 +call 1 +extend 1 +languag 1 +octob 1 +multimedia 1 +codi 1 +haa 1 +niblack 1 +arya 1 +fagin 1 +flickner 1 +petkov 1 +schwarz 1 +thoma 1 +william 1 +wimmer 1 +toward 1 +workshop 1 +oodbm 1 +dewitt 1 +naughton 1 +livni 1 +septemb 1 +real 1 +server 1 +andm 1 +sigmod 1 +mike 0 +careymichael 0 +careyprofessor 0 +leav 0 +scienc 0 +depart 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +staff 0 +member 0 +harri 0 +road 0 +jose 0 +phone 0 +primari 0 +mail 0 +wisc 0 +eduresearch 0 +interestsdatabas 0 +parallel 0 +distribut 0 +appli 0 +main 0 +performanceand 0 +topicsof 0 +current 0 +includ 0 +tradeoff 0 +techniqu 0 +forobject 0 +algorithmsrel 0 +transact 0 +process 0 +schedul 0 +multi 0 +userdatabas 0 +user 0 +specifi 0 +theexodu 0 +extens 0 +dbm 0 +aimedat 0 +develop 0 +scalabl 0 +storag 0 +persistentobject 0 +environ 0 +whichi 0 +upon 0 +exodu 0 +meet 0 +objectmanag 0 +need 0 +replac 0 +unix 0 +file 0 +applicationssuch 0 +case 0 +move 0 +academia 0 +industri 0 +greatyear 0 +part 0 +becom 0 +best 0 +academ 0 +researchgroup 0 +known 0 +come 0 +tackl 0 +anddiffer 0 +challeng 0 +thesourc 0 +number 0 +paper 0 +teach 0 +forth 0 +past 0 +year 0 +relat 0 +signific 0 +fraction 0 +rel 0 +projectther 0 +multimediainform 0 +allow 0 +live 0 +varieti 0 +tobe 0 +manipul 0 +though 0 +resid 0 +homogen 0 +objectdatabas 0 +sabbat 0 +continuedto 0 +focus 0 +graduat 0 +aqueri 0 +browser 0 +front 0 +tool 0 +pesto 0 +thegarl 0 +locat 0 +public 0 +oodb 0 +access 0 +implement 0 +kiernan 0 +orientedprogram 0 +oopsla 0 +austin 0 +appear 0 +multipl 0 +content 0 +tork 0 +roth 0 +ifip 0 +confer 0 +visualdatabas 0 +lausann 0 +switzerland 0 +inform 0 +garlicapproach 0 +luniewski 0 +and 0 +ieee 0 +issu 0 +dataengin 0 +ride 0 +taipei 0 +taiwan 0 +statu 0 +report 0 +benchmark 0 +withd 0 +kant 0 +onobject 0 +program 0 +portland 0 +autom 0 +tune 0 +brown 0 +mehta 0 +thint 0 +larg 0 +santiago 0 +chile 0 +make 0 +initi 0 +smrc 0 +withb 0 +reinwald 0 +desslock 0 +lehman 0 +pirahesh 0 +srinivasan 0 +tarascon 0 +provenc 0 +franc 0 +hall 0 +mcauliff 0 +schuh 0 +solomon 0 +tsatalo 0 +white 0 +zwill 0 +sigmodint 0 +fine 0 +grain 0 +page 0 +zaharioudaki 0 +managementof 0 +memori 0 +pang 0 +accur 0 +model 0 +hybrid 0 +hash 0 +join 0 +algorithm 0 +patel 0 +vernon 0 +sigmetr 0 +measur 0 +modelingof 0 +nashvil 0 +index 0 +multivers 0 +lock 0 +bober 0 +technolog 0 +cambridg 0 +england 0 +client 0 +cach 0 +revisit 0 +indistribut 0 +oszu 0 +dayal 0 +andp 0 +valduriez 0 +morgan 0 +kaufmann 0 +publish 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..09ff4cc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,30 @@ +wisconsin 1 +chin 1 +univers 1 +offic 1 +cchin 1 +wisc 1 +biochemistri 1 +madison 1 +tang 0 +home 0 +pagechin 0 +tanggradu 0 +student 0 +depart 0 +west 0 +dayton 0 +streetmadison 0 +bldg 0 +mail 0 +edutelephon 0 +current 0 +assign 0 +introduct 0 +data 0 +structur 0 +hour 0 +mondai 0 +tuesdai 0 +fridai 0 +ameduc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..d6a770ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,37 @@ +chandra 1 +research 1 +satish 1 +soon 1 +home 0 +page 0 +wisc 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +interest 0 +summari 0 +public 0 +come 0 +real 0 +stuff 0 +wodehous 0 +book 0 +internet 0 +movi 0 +databas 0 +nostalgia 0 +york 0 +time 0 +altavista 0 +italian 0 +languag 0 +cultur 0 +miscellan 0 +linksclick 0 +log 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..b8ff738b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,91 @@ +chilimbi 1 +parallel 1 +trishul 1 +research 1 +comput 1 +wisconsin 1 +laru 1 +memori 1 +merit 1 +state 1 +page 1 +wisc 1 +click 1 +madison 1 +program 1 +compil 1 +share 1 +visual 1 +indian 1 +tool 1 +jame 1 +examin 1 +home 0 +real 0 +megradu 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +interest 0 +languag 0 +architectur 0 +integr 0 +messag 0 +pass 0 +perform 0 +analysi 0 +enhanc 0 +designresearch 0 +project 0 +wind 0 +tunneleduc 0 +univers 0 +tech 0 +institut 0 +technolog 0 +bombai 0 +summari 0 +publicationscachi 0 +automat 0 +insert 0 +cico 0 +annot 0 +intern 0 +confer 0 +process 0 +icpp 0 +august 0 +stormwatch 0 +system 0 +protocolstrishul 0 +thoma 0 +ball 0 +stephen 0 +eick 0 +supercomput 0 +appear 0 +decemb 0 +award 0 +honor 0 +certif 0 +mathemat 0 +olympiadpresid 0 +gold 0 +medal 0 +nation 0 +physic 0 +examinationcertif 0 +chemistrycertif 0 +electron 0 +miscellan 0 +movi 0 +dream 0 +curriculum 0 +vita 0 +last 0 +updat 0 +mail 0 +suggest 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..558d9eb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,31 @@ +chandrasekaran 1 +sashikanth 1 +wisconsin 1 +home 0 +page 0 +csashi 0 +wisc 0 +graduat 0 +studentdepart 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +mark 0 +hill 0 +project 0 +educ 0 +btech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +june 0 +univeristi 0 +depart 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..3c36b6af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,106 @@ +databas 1 +page 1 +paradis 1 +comput 1 +site 1 +curt 1 +scienc 1 +madison 1 +relat 1 +home 1 +wiscinfo 1 +ellmann 1 +univers 1 +wisconsin 1 +wisc 1 +item 1 +eosdi 1 +standard 1 +inform 1 +intern 1 +dienst 1 +project 0 +depart 0 +focu 0 +java 0 +develop 0 +webgnat 0 +defect 0 +track 0 +prototyp 0 +index 0 +shore 0 +previou 0 +life 0 +doit 0 +opengi 0 +consortium 0 +global 0 +posit 0 +system 0 +calmit 0 +nebraska 0 +lincoln 0 +feder 0 +approach 0 +object 0 +manag 0 +group 0 +free 0 +list 0 +transact 0 +process 0 +perform 0 +council 0 +illustra 0 +white 0 +papersmiscellan 0 +sitescampu 0 +wyrm 0 +hoard 0 +gopher 0 +librari 0 +wiscnet 0 +netcorpor 0 +appl 0 +microsoft 0 +research 0 +land 0 +paww 0 +commerc 0 +metrowerk 0 +taligentsearch 0 +savvi 0 +search 0 +webcrawl 0 +open 0 +text 0 +worm 0 +network 0 +organ 0 +internet 0 +draft 0 +dilbert 0 +world 0 +onlin 0 +winsock 0 +applic 0 +current 0 +weather 0 +map 0 +implement 0 +geolog 0 +survei 0 +govern 0 +locat 0 +gil 0 +oakridg 0 +nation 0 +center 0 +stock 0 +market 0 +datacurt 0 +ellmanncurt 0 +eduparadis 0 +projectdepart 0 +sciencesunivers 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..e74b352e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,22 @@ +chee 1 +yong 1 +madison 1 +home 0 +pagechan 0 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +email 0 +cychan 0 +wisc 0 +offic 0 +phone 0 +page 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..5cad9024 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,45 @@ +comput 1 +engin 1 +sara 1 +madison 1 +scienc 1 +home 1 +page 1 +univers 1 +wisconsin 1 +depart 1 +physic 1 +bauman 0 +dailei 0 +baumandailei 0 +wisc 0 +edugradu 0 +program 0 +mathemat 0 +mace 0 +mechan 0 +astronaut 0 +nuclear 0 +educ 0 +math 0 +lewi 0 +clark 0 +colleg 0 +research 0 +work 0 +public 0 +current 0 +schedul 0 +link 0 +friend 0 +pagessend 0 +mail 0 +offic 0 +address 0 +statist 0 +west 0 +dayton 0 +street 0 +last 0 +modifi 0 +daileytu 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..f9363d21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,313 @@ +comput 2 +memori 2 +wood 2 +david 2 +architectur 2 +share 2 +system 2 +simul 2 +cach 2 +perform 2 +lebeck 2 +research 1 +parallel 1 +reinhardt 1 +mark 1 +user 1 +jame 1 +laru 1 +wisconsin 1 +program 1 +hardwar 1 +anddavid 1 +ieee 1 +intern 1 +symposium 1 +network 1 +interfac 1 +fine 1 +hill 1 +time 1 +design 1 +implement 1 +tool 1 +babak 1 +falsafi 1 +grain 1 +proceed 1 +protocol 1 +level 1 +alvin 1 +steven 1 +control 1 +tempest 1 +typhoon 1 +develop 1 +fast 1 +refer 1 +multiprocessor 1 +evalu 1 +techniqu 1 +wind 1 +tunnel 1 +univers 1 +machin 1 +support 1 +isca 1 +coher 1 +abstract 1 +access 1 +case 1 +messag 1 +transpar 1 +block 1 +page 1 +wisc 1 +juli 1 +especi 1 +includ 1 +project 1 +wart 1 +california 1 +berkelei 1 +current 1 +graduat 1 +student 1 +hyder 1 +pfile 1 +introduct 1 +architecturec 1 +advanc 1 +recent 1 +distribut 1 +supercomput 1 +invalid 1 +overhead 1 +ioanni 1 +schoina 1 +softwar 1 +new 1 +paradigm 1 +integr 1 +pass 1 +exist 1 +mechan 1 +allow 1 +programm 1 +data 1 +read 1 +us 1 +processor 1 +also 1 +specifi 1 +state 1 +rewrit 1 +home 0 +associ 0 +professor 0 +scienceand 0 +electr 0 +engineeringdepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usadavid 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +offic 0 +interest 0 +uniprocessor 0 +oper 0 +analysi 0 +vlsi 0 +power 0 +portabl 0 +educ 0 +steve 0 +brian 0 +toonenrec 0 +rahmat 0 +intel 0 +alvi 0 +duke 0 +microsystem 0 +callaghan 0 +informix 0 +cours 0 +teach 0 +fall 0 +organ 0 +programmingc 0 +select 0 +paper 0 +decoupl 0 +memorysteven 0 +robert 0 +communicationshubhendu 0 +mukherje 0 +synchron 0 +workstat 0 +costrahmat 0 +confer 0 +dynam 0 +self 0 +reduc 0 +multiprocessorsalvin 0 +june 0 +activ 0 +simulationalvin 0 +sigmetricsmai 0 +accuraci 0 +interconnect 0 +dougla 0 +burger 0 +process 0 +april 0 +applic 0 +specif 0 +ann 0 +roger 0 +asplo 0 +profil 0 +spec 0 +benchmark 0 +studi 0 +octob 0 +cooper 0 +scalabl 0 +transact 0 +toc 0 +novemb 0 +annot 0 +bibliographi 0 +decemb 0 +line 0 +version 0 +revis 0 +frequent 0 +madhusudhan 0 +talluri 0 +august 0 +summari 0 +main 0 +goal 0 +cost 0 +effect 0 +computerarchitectur 0 +take 0 +advantag 0 +rapidli 0 +chang 0 +technolog 0 +myresearch 0 +major 0 +thrust 0 +feasibl 0 +correct 0 +facilit 0 +focuss 0 +follow 0 +three 0 +area 0 +multi 0 +effici 0 +hybridprogram 0 +virtual 0 +prototyp 0 +exploit 0 +similaritesof 0 +hypothet 0 +understand 0 +tune 0 +result 0 +calledtempest 0 +handler 0 +suppliedmechan 0 +provid 0 +compil 0 +librari 0 +hybrid 0 +combin 0 +tempestmechan 0 +bulk 0 +transfer 0 +virtualmemori 0 +manag 0 +novelmechan 0 +tagblock 0 +byte 0 +write 0 +theloc 0 +remot 0 +explor 0 +altern 0 +wai 0 +first 0 +call 0 +propos 0 +hardwareplatform 0 +fulli 0 +revers 0 +translationt 0 +rtlb 0 +invok 0 +detect 0 +grainaccess 0 +fault 0 +found 0 +thata 0 +run 0 +performscompar 0 +anal 0 +five 0 +memoryprogram 0 +method 0 +thatoptim 0 +common 0 +hit 0 +significantli 0 +reducingsimul 0 +tightli 0 +gener 0 +byprovid 0 +tag 0 +referenceinvok 0 +function 0 +depend 0 +upon 0 +type 0 +andmemori 0 +processedbi 0 +manipul 0 +special 0 +null 0 +functionfor 0 +action 0 +usingbinari 0 +tabl 0 +lookup 0 +memoryrefer 0 +sparcstat 0 +tothre 0 +faster 0 +convent 0 +trace 0 +driven 0 +thatcal 0 +procedur 0 +onlythre 0 +slower 0 +origin 0 +instrument 0 +investig 0 +binari 0 +techniquesto 0 +platform 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..0cd9504f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,30 @@ +doug 1 +burger 1 +home 1 +comput 1 +page 0 +pageprofession 0 +inform 0 +research 0 +summaryresum 0 +cvtranscriptcours 0 +projectsadvisoraffili 0 +project 0 +galileo 0 +sciwisconsin 0 +wind 0 +tunnelpag 0 +maintain 0 +architectureuw 0 +architecturesimplescalar 0 +tool 0 +setgenericasacmperson 0 +stuff 0 +meus 0 +linksphoto 0 +galleryrid 0 +demonhunt 0 +damn 0 +catsbewar 0 +grad 0 +school 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..25744348 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,172 @@ +inform 1 +approxim 1 +boor 1 +click 1 +journal 1 +carl 1 +file 1 +variou 1 +check 1 +publish 1 +paul 1 +home 1 +page 1 +comput 1 +wisconsin 1 +madison 1 +fall 1 +email 1 +wisc 1 +look 1 +recent 1 +theori 1 +read 1 +clickabl 1 +version 1 +list 1 +errata 1 +numer 1 +analysi 1 +spline 1 +ditto 1 +thank 1 +nevai 1 +find 1 +also 1 +us 1 +pinku 1 +last 0 +chang 0 +professor 0 +scienc 0 +mathematicsdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaoffic 0 +hour 0 +town 0 +schoenberg 0 +work 0 +death 0 +deboor 0 +telephon 0 +schedul 0 +teach 0 +former 0 +present 0 +student 0 +select 0 +articl 0 +written 0 +areavail 0 +anonym 0 +approx 0 +provid 0 +access 0 +individu 0 +theclick 0 +button 0 +small 0 +subset 0 +author 0 +third 0 +edit 0 +cont 0 +elementari 0 +algorithm 0 +approach 0 +print 0 +practic 0 +guid 0 +latest 0 +thevari 0 +program 0 +driver 0 +latter 0 +book 0 +ofapproxim 0 +academ 0 +press 0 +includ 0 +accept 0 +publishedpap 0 +well 0 +postal 0 +address 0 +mani 0 +andmuch 0 +much 0 +forconstruct 0 +springer 0 +verlag 0 +foreast 0 +search 0 +theirtabl 0 +content 0 +singli 0 +combin 0 +thishandi 0 +tool 0 +alsoapproxim 0 +amo 0 +slist 0 +homepag 0 +bibliographi 0 +avail 0 +link 0 +peopl 0 +resourc 0 +ila 0 +center 0 +seek 0 +shall 0 +organ 0 +introduct 0 +joi 0 +seeviva_vi 0 +alsoon 0 +screen 0 +tutori 0 +great 0 +pictur 0 +hermit 0 +place 0 +contain 0 +html 0 +thehtml 0 +primermight 0 +even 0 +better 0 +unusu 0 +ever_chang 0 +david 0 +griffeath 0 +sprimordi 0 +soup 0 +kitchen 0 +interest 0 +seeodd 0 +end 0 +allan 0 +techunix 0 +technion 0 +nevaiif 0 +makehi 0 +mathemat 0 +outputavail 0 +cours 0 +math 0 +hous 0 +next 0 +door 0 +occupi 0 +taki 0 +souganid 0 +andthaleia 0 +zariphopoul 0 +szego 0 +bust 0 +stand 0 +inscript 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..dae3bcf6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,175 @@ +data 2 +devis 2 +visual 2 +queri 1 +record 1 +livni 1 +raghu 1 +ramakrishnan 1 +environ 1 +explor 1 +input 1 +link 1 +confer 1 +graphic 1 +help 1 +user 1 +need 1 +base 1 +scienc 1 +medicin 1 +inform 1 +jussi 1 +myllymaki 1 +proceed 1 +spie 1 +analysi 1 +michael 1 +cheng 1 +miron 1 +releas 1 +support 1 +time 1 +hotlin 1 +home 0 +pagedevis 0 +visualizationt 0 +content 0 +featuresexamplesin 0 +depthpublicationsrel 0 +workreleasecontactsfeaturesthes 0 +featur 0 +distinguish 0 +interfac 0 +construct 0 +oneset 0 +save 0 +appli 0 +larger 0 +memori 0 +effici 0 +handl 0 +map 0 +level 0 +cancontrol 0 +color 0 +shape 0 +individu 0 +abil 0 +us 0 +repres 0 +flexibl 0 +layout 0 +mechan 0 +within 0 +window 0 +group 0 +togeth 0 +comparison 0 +asid 0 +ax 0 +cursor 0 +compar 0 +relationship 0 +differ 0 +viewsof 0 +direct 0 +ascii 0 +file 0 +integ 0 +float 0 +date 0 +string 0 +type 0 +examplescheck 0 +follow 0 +exampl 0 +cool 0 +pictur 0 +quick 0 +introduct 0 +tree 0 +validationmolecular 0 +biologi 0 +cell 0 +imag 0 +soil 0 +birch 0 +clusteringfinanci 0 +explorationfamili 0 +nation 0 +climatedata 0 +centergeograph 0 +systemsoil 0 +sciencefil 0 +serverprogram 0 +tracesclin 0 +mani 0 +moreexampl 0 +viewer 0 +famili 0 +depthfor 0 +detail 0 +descript 0 +model 0 +visualizationvisu 0 +interfaceperform 0 +issuespublicationsmiron 0 +larg 0 +dataset 0 +dataexplor 0 +januari 0 +stream 0 +inproceed 0 +andanalysi 0 +februari 0 +praveenseshadri 0 +next 0 +sequencequeri 0 +intern 0 +themanag 0 +comad 0 +decemb 0 +relat 0 +workth 0 +seqproject 0 +complementari 0 +design 0 +queryrecord 0 +sequenc 0 +output 0 +bevisu 0 +informationw 0 +current 0 +version 0 +executablesfor 0 +solari 0 +platform 0 +dynam 0 +ld_library_path 0 +variabl 0 +appropri 0 +rundevis 0 +architectur 0 +execut 0 +arestat 0 +requir 0 +shareabl 0 +librari 0 +download 0 +click 0 +contactsfor 0 +research 0 +project 0 +contactmiron 0 +guangshun 0 +chen 0 +kent 0 +wenger 0 +send 0 +mail 0 +usersupport 0 +page 0 +access 0 +sinc 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..3b0f3a3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,200 @@ +shore 2 +object 2 +system 2 +data 2 +paradis 1 +unix 1 +dewitt 1 +geograph 1 +project 1 +univers 1 +wisconsin 1 +inform 1 +wide 1 +target 1 +hardwar 1 +store 1 +larg 1 +naughton 1 +page 1 +david 1 +email 1 +wisc 1 +orient 1 +databas 1 +parallel 1 +research 1 +persist 1 +need 1 +applic 1 +basic 1 +manag 1 +develop 1 +text 1 +field 1 +file 1 +complex 1 +client 1 +server 1 +carei 1 +gamma 1 +set 1 +current 1 +relat 1 +match 1 +emploi 1 +recent 1 +proceed 1 +sigmod 1 +confer 1 +talk 1 +home 0 +professor 0 +romn 0 +fellow 0 +comput 0 +scienc 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +michigan 0 +interest 0 +databasebenchmark 0 +summari 0 +main 0 +objectiveof 0 +design 0 +implement 0 +evalu 0 +objectsystem 0 +serv 0 +varieti 0 +applicationsinclud 0 +softwar 0 +programminglanguag 0 +satellit 0 +repositori 0 +multimedia 0 +expand 0 +capabilitiesof 0 +us 0 +exodu 0 +storag 0 +fund 0 +arpa 0 +number 0 +wai 0 +includ 0 +support 0 +typedobject 0 +multipl 0 +program 0 +languag 0 +like 0 +hierarchicalnam 0 +space 0 +name 0 +compat 0 +interfaceto 0 +interfac 0 +intend 0 +toeas 0 +transit 0 +systemenviron 0 +exist 0 +tool 0 +ccwill 0 +abl 0 +without 0 +modif 0 +becom 0 +either 0 +singl 0 +orth 0 +rang 0 +environ 0 +scale 0 +fromindividu 0 +workstat 0 +heterogen 0 +networksto 0 +multiprocessor 0 +intel 0 +paragon 0 +ajoint 0 +prof 0 +solomon 0 +attempt 0 +appli 0 +technolog 0 +developeda 0 +part 0 +relationaldatabas 0 +thetask 0 +manipul 0 +mani 0 +databasesystem 0 +hold 0 +excel 0 +formanag 0 +busi 0 +poor 0 +modelingne 0 +must 0 +capabl 0 +manipulatingmuch 0 +polygon 0 +polylin 0 +instead 0 +model 0 +provid 0 +muchbett 0 +type 0 +anoth 0 +signific 0 +differencefrom 0 +parallelismto 0 +facilit 0 +execut 0 +process 0 +assatellit 0 +imag 0 +platform 0 +projecti 0 +cluster 0 +sparc 0 +connect 0 +sampl 0 +public 0 +benchmark 0 +withm 0 +washington 0 +persistentappl 0 +franklin 0 +hall 0 +mcauliff 0 +chuh 0 +tsatalo 0 +white 0 +zwill 0 +intern 0 +conferenceon 0 +minneapoli 0 +kabra 0 +patel 0 +proceedingsof 0 +base 0 +santiego 0 +chile 0 +august 0 +vldb 0 +invit 0 +summit 0 +present 0 +automat 0 +creat 0 +januari 0 +pub 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..946ba5b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,326 @@ +dyer 2 +vision 2 +comput 2 +imag 2 +control 2 +view 2 +motion 2 +proc 2 +object 2 +seitz 2 +model 2 +visual 2 +scene 2 +algorithm 2 +base 1 +surfac 1 +us 1 +kutulako 1 +three 1 +shape 1 +appear 1 +interact 1 +global 1 +data 1 +recognit 1 +conf 1 +wisconsin 1 +develop 1 +real 1 +camera 1 +user 1 +recent 1 +viewpoint 1 +displai 1 +purpos 1 +dimension 1 +represent 1 +activ 1 +environ 1 +move 1 +morph 1 +order 1 +behavior 1 +explor 1 +observ 1 +defin 1 +reconstruct 1 +analysi 1 +cyclic 1 +invari 1 +pattern 1 +workshop 1 +seal 1 +detect 1 +hibbard 1 +charl 1 +wisc 1 +area 1 +interest 1 +synthesi 1 +research 1 +time 1 +virtual 1 +input 1 +path 1 +combin 1 +without 1 +interpol 1 +continu 1 +correspond 1 +center 1 +orient 1 +import 1 +unknown 1 +mark 1 +build 1 +need 1 +gener 1 +provabl 1 +recov 1 +understand 1 +scientif 1 +system 1 +public 1 +period 1 +toward 1 +contour 1 +ieee 1 +artifici 1 +intellig 1 +point 1 +affin 1 +paul 1 +spring 1 +fall 1 +allmen 1 +stewart 1 +kjell 1 +home 0 +pagecharl 0 +dyerprofessordepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +edutelephon 0 +finger 0 +infoph 0 +univers 0 +maryland 0 +curriculum 0 +vita 0 +visualizationgroup 0 +leader 0 +groupprogram 0 +chair 0 +cvpr 0 +synthesisth 0 +goal 0 +work 0 +basic 0 +tool 0 +controllingin 0 +either 0 +autonom 0 +cameraof 0 +videostream 0 +acquir 0 +fix 0 +mobil 0 +around 0 +site 0 +output 0 +panoram 0 +whicha 0 +technolog 0 +could 0 +navig 0 +througha 0 +custom 0 +thesit 0 +predetermin 0 +main 0 +researchquest 0 +adapt 0 +basi 0 +synthesizenew 0 +reconstructiona 0 +intermedi 0 +step 0 +innovativetechniqu 0 +callview 0 +take 0 +basisimag 0 +rang 0 +linear 0 +connect 0 +explorationcomput 0 +start 0 +investig 0 +howto 0 +acquisit 0 +process 0 +controllingcamera 0 +paramet 0 +studi 0 +purposefulli 0 +theposit 0 +dynam 0 +adjustviewpoint 0 +theus 0 +forsolv 0 +task 0 +findspecif 0 +unknownshap 0 +recogn 0 +coordin 0 +simpl 0 +chang 0 +appearanceof 0 +well 0 +simplifi 0 +computationsrequir 0 +make 0 +precis 0 +progress 0 +andelimin 0 +accur 0 +differenti 0 +measur 0 +thecamera 0 +believ 0 +approach 0 +towardsviewpoint 0 +close 0 +relat 0 +geometri 0 +viewedobject 0 +thisapproach 0 +correct 0 +asid 0 +revolut 0 +smooth 0 +arbitrarili 0 +visualizationin 0 +map 0 +techniquescap 0 +possibl 0 +type 0 +specificgraph 0 +procedur 0 +capabl 0 +displayingarbitrari 0 +commonfram 0 +refer 0 +coupl 0 +algorithmexecut 0 +provid 0 +power 0 +especi 0 +experi 0 +dataanalysi 0 +implement 0 +call 0 +forexperi 0 +techniqu 0 +visualizingintermedi 0 +final 0 +result 0 +forproblem 0 +discrimin 0 +cloud 0 +satellit 0 +trace 0 +shah 0 +jain 0 +kluwer 0 +boston 0 +siggraph 0 +track 0 +recoveri 0 +stationari 0 +advanc 0 +festschrift 0 +azriel 0 +rosenfeld 0 +societi 0 +press 0 +alamito 0 +complet 0 +four 0 +physic 0 +valid 0 +adjust 0 +lumelski 0 +strategi 0 +guid 0 +dimens 0 +robot 0 +autom 0 +occlud 0 +irregular 0 +rigid 0 +articul 0 +battaiola 0 +santek 0 +voidrot 0 +martinez 0 +earth 0 +space 0 +scienc 0 +juli 0 +lattic 0 +includ 0 +abstract 0 +groupcours 0 +taught 0 +introduct 0 +current 0 +student 0 +gareth 0 +bestor 0 +brian 0 +morgan 0 +steve 0 +liangyin 0 +yuph 0 +graduat 0 +bill 0 +whibbard 0 +macc 0 +onlattic 0 +structur 0 +kiriako 0 +kyro 0 +rochest 0 +ofobserv 0 +iutech 0 +sequenc 0 +descript 0 +spatiotempor 0 +flow 0 +curv 0 +brent 0 +dimensionalshap 0 +machin 0 +graphic 0 +harri 0 +plantinga 0 +wheaton 0 +viewer 0 +representationfor 0 +connectionist 0 +stereo 0 +bradlei 0 +ccsua 0 +ctstateu 0 +edg 0 +separ 0 +textur 0 +measureslink 0 +interestmi 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..50c9a760 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,19 @@ +home 1 +page 1 +welcom 1 +friend 0 +machinew 0 +appreci 0 +patienc 0 +long 0 +arduou 0 +task 0 +bring 0 +better 0 +check 0 +educ 0 +curriculum 0 +vitaecheck 0 +class 0 +teach 0 +section 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..3498ea14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,165 @@ +scout 1 +page 1 +check 1 +comput 1 +scienc 1 +servic 1 +email 1 +molecular 1 +biologi 1 +time 1 +video 1 +make 1 +madison 1 +intern 1 +wait 1 +thing 1 +depart 1 +univers 1 +explain 1 +also 1 +assist 1 +support 1 +mac 1 +digit 1 +purpos 1 +multimedia 1 +complet 1 +link 1 +eric 0 +hazen 0 +home 0 +pageer 0 +hazennon 0 +professorroom 0 +west 0 +dayton 0 +current 0 +work 0 +fornet 0 +project 0 +part 0 +registr 0 +couldn 0 +help 0 +domain 0 +name 0 +problem 0 +even 0 +want 0 +neat 0 +monei 0 +locat 0 +ofwisconsin 0 +see 0 +could 0 +never 0 +elegantli 0 +fearless 0 +leader 0 +susan 0 +calcari 0 +offici 0 +explan 0 +design 0 +mainten 0 +site 0 +list 0 +report 0 +pete 0 +devri 0 +withtech 0 +unix 0 +machin 0 +come 0 +spent 0 +half 0 +year 0 +laboratori 0 +full 0 +student 0 +well 0 +call 0 +technic 0 +specialist 0 +meant 0 +around 0 +fix 0 +peopl 0 +broken 0 +mice 0 +answer 0 +question 0 +lucki 0 +cool 0 +interest 0 +us 0 +instruct 0 +graduat 0 +wisconsin 0 +philosophi 0 +program 0 +philosoph 0 +real 0 +commod 0 +capitalist 0 +societi 0 +requir 0 +curriculum 0 +shameless 0 +pragmatist 0 +talk 0 +sinc 0 +good 0 +metaphys 0 +discuss 0 +serv 0 +practic 0 +makethi 0 +look 0 +standard 0 +resum 0 +date 0 +made 0 +anim 0 +shown 0 +world 0 +among 0 +drosophila 0 +geneticist 0 +told 0 +wonder 0 +girlfriend 0 +salon 0 +magazin 0 +entertain 0 +inform 0 +ezin 0 +creat 0 +lauri 0 +anderson 0 +green 0 +room 0 +shockwav 0 +kudon 0 +know 0 +wit 0 +quicktimevr 0 +documentari 0 +plight 0 +bosnia 0 +uproot 0 +popul 0 +billi 0 +holidai 0 +homepag 0 +nation 0 +secur 0 +archiv 0 +nixon 0 +preslei 0 +meetingsejhazen 0 +facstaff 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..07e16dcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,13 @@ +eliassi 1 +univers 1 +tina 0 +home 0 +pagetina 0 +illinoi 0 +urbana 0 +champaign 0 +wisconsin 0 +madison 0 +offic 0 +bldgphone 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..df35c674 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,72 @@ +madison 1 +eric 1 +instruct 1 +comput 1 +univers 1 +wisconsin 1 +high 1 +bandwidth 1 +fetch 1 +branch 1 +rotenberg 1 +jame 1 +smith 1 +depart 1 +predict 1 +confid 1 +trace 1 +cach 1 +latenc 1 +approach 1 +steve 1 +bennett 1 +appear 1 +proceed 1 +annual 1 +intern 1 +symposium 1 +microarchitectur 1 +decemb 1 +home 0 +page 0 +passsth 0 +anoth 0 +cold 0 +budweisth 0 +address 0 +scienc 0 +west 0 +dayton 0 +street 0 +electr 0 +engin 0 +johnson 0 +drive 0 +offic 0 +phone 0 +mail 0 +ericro 0 +wisc 0 +research 0 +area 0 +architectur 0 +advisor 0 +professor 0 +smithresearch 0 +topic 0 +kestrel 0 +multiscalar 0 +project 0 +level 0 +parallel 0 +mechan 0 +mispredict 0 +tolerancepubl 0 +assign 0 +condit 0 +erik 0 +jacobsen 0 +technic 0 +report 0 +april 0 +resum 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..ceed8d07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,69 @@ +comput 2 +would 1 +rather 1 +babak 1 +falsafi 1 +wisconsin 1 +madison 1 +wisc 1 +parallel 1 +scienc 1 +suni 1 +buffalo 1 +june 1 +like 1 +home 0 +page 0 +research 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usatel 0 +email 0 +work 0 +peopl 0 +mentorcultresearch 0 +interest 0 +architectur 0 +perform 0 +evalu 0 +measur 0 +system 0 +program 0 +modelseduc 0 +univers 0 +decemb 0 +electr 0 +engin 0 +miscellan 0 +public 0 +drink 0 +read 0 +listen 0 +us 0 +high 0 +school 0 +idea 0 +fail 0 +morf 0 +shubu 0 +dionosi 0 +hillari 0 +profan 0 +phone 0 +convers 0 +check 0 +american 0 +french 0 +queen 0 +architect 0 +look 0 +hack 0 +partner 0 +crime 0 +next 0 +gener 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..37f9af56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,117 @@ +problem 1 +ferri 1 +scienc 1 +mathemat 1 +applic 1 +structur 1 +optim 1 +parallel 1 +home 1 +page 1 +michael 1 +comput 1 +univers 1 +program 1 +larg 1 +scale 1 +nonlinear 1 +econom 1 +investig 1 +emphasi 1 +model 1 +effect 1 +system 1 +complementar 1 +associ 0 +professor 0 +industri 0 +engineeringand 0 +member 0 +center 0 +depart 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +madison 0 +telephon 0 +email 0 +wisc 0 +cambridg 0 +interest 0 +theori 0 +algorithm 0 +research 0 +summari 0 +look 0 +robust 0 +method 0 +solv 0 +variationalinequ 0 +toproblem 0 +engin 0 +pivot 0 +path 0 +followingtechniqu 0 +base 0 +success 0 +linear 0 +numer 0 +properti 0 +andinterfac 0 +languag 0 +particular 0 +beingconsid 0 +includ 0 +equilibria 0 +taxat 0 +oncarbon 0 +emiss 0 +traffic 0 +congest 0 +toll 0 +contact 0 +chemic 0 +process 0 +design 0 +consid 0 +architectur 0 +solvingproblem 0 +graph 0 +partitioningtechniqu 0 +determin 0 +underli 0 +tool 0 +gener 0 +purpos 0 +techniqu 0 +forexploit 0 +machin 0 +directli 0 +within 0 +arealso 0 +consider 0 +prototyp 0 +us 0 +condor 0 +extens 0 +framework 0 +also 0 +beinginvestig 0 +identifi 0 +exploit 0 +underlyingmodel 0 +public 0 +complet 0 +list 0 +paper 0 +mostli 0 +electron 0 +avail 0 +relev 0 +link 0 +cpnet 0 +prgram 0 +pagec 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..e1c3cf63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,254 @@ +learn 2 +page 1 +madison 1 +finton 1 +wisconsin 1 +current 1 +system 1 +wisc 1 +comput 1 +univers 1 +artifici 1 +intellig 1 +goal 1 +reinforc 1 +action 1 +environ 1 +input 1 +also 1 +need 1 +import 1 +world 1 +browser 1 +star 1 +trek 1 +david 1 +home 1 +michigan 1 +show 1 +develop 1 +comment 1 +understand 1 +make 1 +problem 1 +output 1 +instead 1 +thumb 1 +work 1 +explor 1 +base 1 +inform 1 +avail 1 +openstep 1 +daili 1 +head 1 +scienc 0 +depart 0 +west 0 +dayton 0 +street 0 +welcom 0 +grad 0 +student 0 +research 0 +nerdin 0 +intelligenceher 0 +grew 0 +grand 0 +rapid 0 +late 0 +offic 0 +earn 0 +degre 0 +math 0 +state 0 +master 0 +scienceher 0 +dissert 0 +institut 0 +take 0 +littl 0 +year 0 +traffic 0 +measur 0 +softwarefor 0 +first 0 +thesi 0 +advisor 0 +left 0 +trusti 0 +nextstationor 0 +librari 0 +enjoyplai 0 +trumpet 0 +piano 0 +listen 0 +longhair 0 +music 0 +plai 0 +volleybal 0 +intervarsityfolk 0 +contribut 0 +supersoak 0 +arm 0 +race 0 +feel 0 +free 0 +form 0 +send 0 +mail 0 +finger 0 +accountto 0 +plan 0 +whether 0 +gain 0 +employ 0 +introduct 0 +project 0 +smart 0 +machin 0 +intelligenti 0 +essenc 0 +intelligencei 0 +abil 0 +adapt 0 +actappropri 0 +order 0 +reach 0 +treat 0 +gener 0 +case 0 +control 0 +chang 0 +sens 0 +weak 0 +kind 0 +feedback 0 +express 0 +posit 0 +neg 0 +number 0 +teacher 0 +present 0 +thesystem 0 +pair 0 +receiv 0 +irregular 0 +interv 0 +focuss 0 +todistinguish 0 +good 0 +on 0 +direct 0 +process 0 +build 0 +agood 0 +represent 0 +term 0 +relev 0 +orimport 0 +featur 0 +note 0 +basedfeatur 0 +extract 0 +appli 0 +notion 0 +balanc 0 +perform 0 +optim 0 +exploit 0 +investig 0 +wai 0 +us 0 +learningprocess 0 +effici 0 +allow 0 +specifi 0 +start 0 +point 0 +experi 0 +activ 0 +better 0 +intelligentadapt 0 +hope 0 +provid 0 +basi 0 +whichwil 0 +benefit 0 +knowledg 0 +task 0 +realli 0 +date 0 +sorri 0 +pagefor 0 +hotlistthi 0 +independ 0 +hotlist 0 +keep 0 +copi 0 +access 0 +platform 0 +combin 0 +actual 0 +bookmark 0 +file 0 +omniweb 0 +eleg 0 +function 0 +netscap 0 +opinion 0 +omniwebi 0 +nextstep 0 +foral 0 +variant 0 +releas 0 +editori 0 +responseto 0 +jehovah 0 +wit 0 +deiti 0 +christwisconsin 0 +site 0 +intervars 0 +graduat 0 +fellowship 0 +check 0 +weatherin 0 +citi 0 +program 0 +link 0 +isthmu 0 +pagesom 0 +favorit 0 +place 0 +visit 0 +nebula 0 +nasa 0 +pictur 0 +wide 0 +studi 0 +bibl 0 +crosssearch 0 +minor 0 +glenn 0 +gould 0 +homepag 0 +farsid 0 +voyagerent 0 +dilbert 0 +zoneroam 0 +virtual 0 +tourist 0 +stereogram 0 +tell 0 +blow 0 +true 0 +next 0 +think 0 +bill 0 +gate 0 +word 0 +sponsor 0 +last 0 +modifi 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..7439b63c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,242 @@ +nbsp 2 +program 2 +fischer 2 +regist 2 +alloc 2 +attribut 2 +charl 1 +compil 1 +code 1 +cost 1 +languag 1 +context 1 +august 1 +gener 1 +error 1 +kurland 1 +comput 1 +implement 1 +schedul 1 +model 1 +interprocedur 1 +time 1 +approach 1 +pointer 1 +arrai 1 +check 1 +us 1 +steven 1 +januari 1 +todd 1 +proebst 1 +transact 1 +system 1 +syntact 1 +analysi 1 +grammar 1 +scienc 1 +wisconsin 1 +univers 1 +teachingc 1 +cours 1 +research 1 +interest 1 +design 1 +recent 1 +best 1 +architectur 1 +issu 1 +avoid 1 +unnecessari 1 +delai 1 +optim 1 +procedur 1 +practic 1 +monitor 1 +execut 1 +sigplan 1 +harish 1 +patil 1 +appear 1 +effici 1 +june 1 +complet 1 +free 1 +parallel 1 +environ 1 +william 1 +specif 1 +juli 1 +evalu 1 +least 1 +decemb 1 +correct 1 +techniqu 1 +home 0 +page 0 +nbspcharl 0 +nbspprofessor 0 +nbspunivers 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +messag 0 +email 0 +wisc 0 +teach 0 +semest 0 +graduat 0 +spring 0 +undergradu 0 +focu 0 +exploit 0 +enormouscap 0 +provid 0 +modern 0 +student 0 +investig 0 +includ 0 +import 0 +pipelin 0 +haveinvestig 0 +arithmet 0 +express 0 +domin 0 +global 0 +level 0 +graph 0 +color 0 +mayb 0 +explicitli 0 +quantifi 0 +likelihood 0 +benefit 0 +registerresid 0 +attract 0 +care 0 +vital 0 +loadsand 0 +store 0 +must 0 +ultim 0 +theprocedur 0 +analyz 0 +studiedinterprocedur 0 +modelsthat 0 +optimallyalloc 0 +among 0 +polynomi 0 +seem 0 +effect 0 +anyon 0 +know 0 +easi 0 +make 0 +mistak 0 +involv 0 +indic 0 +especi 0 +common 0 +studi 0 +wai 0 +toautomat 0 +oper 0 +share 0 +memori 0 +multiprocessor 0 +workstat 0 +processor 0 +anoth 0 +possibl 0 +routin 0 +littl 0 +orno 0 +appar 0 +slowdown 0 +public 0 +minimum 0 +popl 0 +sigact 0 +symposium 0 +principl 0 +programminglanguag 0 +concurr 0 +access 0 +insoftwar 0 +experi 0 +demand 0 +driven 0 +inacm 0 +instruct 0 +load 0 +zero 0 +rang 0 +split 0 +confer 0 +activitiesa 0 +revis 0 +second 0 +edit 0 +craft 0 +author 0 +cytronand 0 +richard 0 +leblanc 0 +almost 0 +publish 0 +benjamin 0 +cum 0 +look 0 +soon 0 +better 0 +bookstor 0 +everywher 0 +short 0 +commun 0 +editor 0 +topla 0 +educationph 0 +cornel 0 +pars 0 +supervis 0 +john 0 +studentsdonn 0 +milton 0 +bruce 0 +rowland 0 +semant 0 +stephen 0 +skedzielewski 0 +definit 0 +reevalu 0 +septemb 0 +bernard 0 +dion 0 +local 0 +corrector 0 +sensitivepars 0 +mahadevan 0 +ganapathi 0 +retarget 0 +novemb 0 +vimal 0 +begwami 0 +maunei 0 +extend 0 +right 0 +gregori 0 +johnson 0 +sensit 0 +flow 0 +anil 0 +facil 0 +integr 0 +winsborough 0 +automat 0 +transpar 0 +logic 0 +venkatesh 0 +framework 0 +algorithm 0 +steve 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..83d9bf86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,259 @@ +comput 2 +wisconsin 2 +memori 2 +univers 2 +madison 2 +goodman 2 +scienc 2 +depart 2 +technic 2 +report 2 +share 2 +jame 2 +architectur 2 +burger 2 +project 1 +chip 1 +synchron 1 +integr 1 +base 1 +intern 1 +also 1 +scalabl 1 +processor 1 +system 1 +specif 1 +bandwidth 1 +design 1 +current 1 +coher 1 +standard 1 +galileo 1 +studi 1 +main 1 +research 1 +perform 1 +larg 1 +latenc 1 +point 1 +interconnect 1 +appear 1 +confer 1 +stefano 1 +kaxira 1 +juli 1 +alain 1 +cach 1 +relat 1 +transport 1 +layer 1 +hardwar 1 +includ 1 +support 1 +proceed 1 +johnson 1 +woest 1 +focus 1 +process 1 +capabl 1 +arrow 1 +differ 1 +iram 1 +follow 1 +futur 1 +model 1 +execut 1 +datascalar 1 +public 1 +optic 1 +us 1 +link 1 +list 1 +protocol 1 +effici 1 +primit 1 +high 1 +extend 1 +mechan 1 +march 1 +interfac 1 +scale 1 +februari 1 +nagi 1 +philip 1 +novemb 1 +analysi 1 +mari 1 +vernon 1 +doug 1 +home 0 +page 0 +wisconsint 0 +contentsgalileoproject 0 +descriptionpublicationsrel 0 +projectssci 0 +wisconsinproject 0 +descriptionpublicationsproject 0 +membersgalileo 0 +wisconsingalileo 0 +conduct 0 +groupat 0 +medium 0 +long 0 +term 0 +evolut 0 +emphasison 0 +therelationship 0 +futuresystem 0 +complet 0 +separ 0 +todai 0 +extent 0 +storag 0 +merg 0 +least 0 +wai 0 +increas 0 +penalti 0 +issuabl 0 +instruct 0 +orlimit 0 +place 0 +capacityon 0 +modul 0 +eventu 0 +sizabl 0 +fractionof 0 +resid 0 +repres 0 +label 0 +mopin 0 +diagram 0 +possibl 0 +migrat 0 +ofprocessor 0 +onto 0 +dram 0 +eventuallyobvi 0 +central 0 +area 0 +examin 0 +impact 0 +andlimit 0 +microprocessor 0 +systemsperform 0 +variou 0 +along 0 +theprocessor 0 +spectrumcach 0 +hierarchi 0 +systemsdesign 0 +bank 0 +systemprogram 0 +multipl 0 +exploit 0 +elimin 0 +serial 0 +bottlenecksdoug 0 +massiv 0 +parallel 0 +octob 0 +spsd 0 +modeldoug 0 +quantifi 0 +limit 0 +microprocessorsdoug 0 +symposium 0 +declin 0 +effect 0 +dynam 0 +gener 0 +purpos 0 +microprocessorsdougla 0 +januari 0 +berkeleyppram 0 +kyushu 0 +univeristi 0 +japansci 0 +wisconsinour 0 +group 0 +close 0 +involv 0 +coherentshar 0 +multiprocessor 0 +coherentinterfac 0 +ieee 0 +platform 0 +explor 0 +idea 0 +specifi 0 +queue 0 +lock 0 +qolb 0 +aswel 0 +optim 0 +pattern 0 +pairwis 0 +fresh 0 +read 0 +definitionfor 0 +extrem 0 +betweenprocess 0 +element 0 +individu 0 +cluster 0 +topic 0 +logarithm 0 +grow 0 +structureseffici 0 +multiprocessorsa 0 +extensionsaggress 0 +consist 0 +multiprocessorswisconsin 0 +minim 0 +overhead 0 +applic 0 +best 0 +paper 0 +supercomput 0 +simul 0 +wind 0 +tunneldougla 0 +second 0 +workshop 0 +cost 0 +hierarch 0 +extens 0 +scijam 0 +memoryross 0 +evan 0 +aboulenein 0 +stein 0 +gjess 0 +topolog 0 +ringsross 0 +decemb 0 +ringsteven 0 +scott 0 +lower 0 +bound 0 +coherenceross 0 +june 0 +multiprocessorsphilip 0 +multiprocessorjam 0 +third 0 +program 0 +languag 0 +oper 0 +april 0 +particip 0 +faculti 0 +graduat 0 +student 0 +alumni 0 +abouleneinross 0 +johnsonstev 0 +scottlast 0 +modifi 0 +dburger 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..75c081d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,34 @@ +geeri 1 +andrew 1 +madison 1 +home 0 +page 0 +wisc 0 +west 0 +dayton 0 +street 0 +regent 0 +madisonin 0 +comput 0 +scienc 0 +current 0 +work 0 +compsci 0 +grade 0 +schedul 0 +pontif 0 +peopl 0 +interest 0 +jacqu 0 +derrida 0 +post 0 +structur 0 +martin 0 +heidegg 0 +albert 0 +camu 0 +jean 0 +paul 0 +sartr 0 +friedrich 0 +nietzsch 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..a199b94d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,148 @@ +time 1 +pleas 1 +gideon 1 +glass 1 +homepag 1 +tweak 1 +find 1 +sampl 1 +follow 1 +send 1 +read 1 +index 1 +machin 1 +netscap 1 +unix 1 +mayb 1 +continu 0 +thank 0 +stop 0 +collect 0 +imag 0 +deposit 0 +directori 0 +pictur 0 +gui 0 +claim 0 +cooler 0 +accord 0 +toonion 0 +seethi 0 +movi 0 +usual 0 +suspect 0 +walk 0 +nearest 0 +blockbust 0 +note 0 +recent 0 +move 0 +none 0 +roommat 0 +dutch 0 +cheap 0 +either 0 +given 0 +predica 0 +dismal 0 +prospect 0 +improv 0 +withno 0 +outsid 0 +influenc 0 +consid 0 +make 0 +small 0 +donationto 0 +help 0 +defrai 0 +cost 0 +purchas 0 +check 0 +monei 0 +orderscan 0 +sent 0 +address 0 +cash 0 +monro 0 +floor 0 +madison 0 +usathank 0 +support 0 +grad 0 +student 0 +sometim 0 +paper 0 +eventhough 0 +shelf 0 +feet 0 +unread 0 +book 0 +wait 0 +anyhow 0 +look 0 +someth 0 +christian 0 +achil 0 +huge 0 +might 0 +also 0 +unifi 0 +cstechreport 0 +class 0 +project 0 +report 0 +otherstuff 0 +avail 0 +program 0 +load 0 +averagewil 0 +grow 0 +fast 0 +main 0 +fork 0 +doofu 0 +actual 0 +share 0 +back 0 +calvin 0 +great 0 +killer 0 +zippi 0 +pinheadha 0 +reload 0 +sever 0 +justtri 0 +last 0 +fall 0 +kill 0 +noth 0 +think 0 +work 0 +mozilla 0 +higher 0 +well 0 +dabbl 0 +object 0 +orient 0 +programmingin 0 +mostli 0 +exercis 0 +suppos 0 +buttonher 0 +thing 0 +right 0 +suffic 0 +case 0 +told 0 +somethingin 0 +bookmark 0 +denni 0 +ritchi 0 +creator 0 +wrote 0 +anti 0 +forward 0 +hater 0 +handbook 0 +mailand 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..b7b12b9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,406 @@ +meet 2 +comput 2 +calendar 2 +andi 2 +glew 2 +first 2 +intel 1 +page 1 +unix 1 +system 1 +us 1 +schedul 1 +email 1 +wisconsin 1 +home 1 +arrang 1 +approach 1 +architectur 1 +manag 1 +time 1 +advoc 1 +also 1 +pilot 1 +synchron 1 +univers 1 +thing 1 +public 1 +html 1 +work 1 +configur 1 +beef 1 +montreal 1 +peopl 1 +code 1 +although 1 +like 1 +optimist 1 +lock 1 +version 1 +creat 1 +least 1 +possibl 1 +person 1 +krazi 1 +organ 1 +touch 1 +filesystem 1 +access 1 +wisc 1 +read 1 +research 1 +group 1 +interest 1 +wannab 1 +author 1 +thought 1 +suggest 1 +miscellan 1 +info 1 +stuff 1 +self 1 +imag 1 +resum 1 +trust 1 +keep 1 +warm 1 +enough 1 +alwai 1 +architect 1 +someth 1 +busi 1 +sinc 1 +start 1 +learn 1 +hacker 1 +gould 1 +real 1 +futur 1 +cannot 1 +tool 1 +paper 1 +softwar 1 +concurr 1 +control 1 +get 1 +test 1 +requir 1 +even 1 +commit 1 +download 1 +softwareto 1 +user 1 +oper 1 +urgent 1 +teresa 1 +pageandi 0 +pagethi 0 +largelyform 0 +snippet 0 +stylishor 0 +summarycontact 0 +ship 0 +addressescalendar 0 +http 0 +anyon 0 +file 0 +unless 0 +specif 0 +open 0 +default 0 +scribe 0 +minut 0 +taker 0 +sohi 0 +weekli 0 +rather 0 +gener 0 +form 0 +adapt 0 +applic 0 +dynam 0 +versu 0 +static 0 +high 0 +level 0 +edit 0 +higher 0 +educ 0 +effect 0 +patent 0 +claim 0 +fame 0 +favorit 0 +quot 0 +sai 0 +care 0 +pope 0 +priest 0 +parson 0 +king 0 +william 0 +boyn 0 +want 0 +coin 0 +summer 0 +harm 0 +see 0 +frost 0 +snow 0 +poor 0 +almighti 0 +dollar 0 +good 0 +mapl 0 +wood 0 +bellow 0 +church 0 +chapel 0 +ranter 0 +preacher 0 +beecher 0 +alreadi 0 +keeper 0 +harbour 0 +deplor 0 +churchmen 0 +notori 0 +atheist 0 +less 0 +well 0 +known 0 +chariti 0 +strand 0 +sailor 0 +knew 0 +could 0 +exchang 0 +chop 0 +chord 0 +firewood 0 +meal 0 +place 0 +sleep 0 +print 0 +manifesto 0 +handbil 0 +advertis 0 +hackeralthough 0 +aspir 0 +formerlyhad 0 +fake 0 +motorola 0 +card 0 +ever 0 +assembl 0 +redesign 0 +chip 0 +kernel 0 +andstil 0 +think 0 +wistfulli 0 +beard 0 +frequent 0 +wear 0 +suspend 0 +bald 0 +architectureonc 0 +involv 0 +microarchitectur 0 +pentium 0 +adopt 0 +architecturei 0 +constantli 0 +verg 0 +write 0 +book 0 +entitl 0 +grabbag 0 +trick 0 +techniqu 0 +sort 0 +antidot 0 +hennessi 0 +patterson 0 +afford 0 +diskspac 0 +internet 0 +servic 0 +provideror 0 +charg 0 +connect 0 +appreci 0 +piec 0 +architectureon 0 +best 0 +wai 0 +textbook 0 +datasheet 0 +instruct 0 +refer 0 +wander 0 +netscapebookmarksstockscod 0 +standardsroi 0 +wilkinson 0 +standardsi 0 +disagre 0 +mani 0 +perhap 0 +quit 0 +quickli 0 +defunct 0 +startup 0 +compani 0 +call 0 +enfopris 0 +build 0 +workstat 0 +chang 0 +assign 0 +driver 0 +writingto 0 +integr 0 +longstand 0 +love 0 +hate 0 +relationship 0 +configurationmanag 0 +scc 0 +publish 0 +box 0 +link 0 +parallel 0 +tree 0 +element 0 +usenix 0 +workshop 0 +describ 0 +central 0 +databas 0 +multipl 0 +view 0 +hardlink 0 +clone 0 +save 0 +space 0 +divis 0 +team 0 +brian 0 +berlin 0 +deprec 0 +mainli 0 +wherea 0 +actual 0 +case 0 +livelock 0 +usual 0 +insist 0 +singl 0 +identifi 0 +serial 0 +sourc 0 +checkinsso 0 +proce 0 +linear 0 +manner 0 +programm 0 +previou 0 +fix 0 +appli 0 +recogn 0 +relax 0 +often 0 +strip 0 +approachin 0 +apolog 0 +never 0 +truli 0 +portabl 0 +accomplish 0 +similar 0 +mike 0 +fetterman 0 +mark 0 +aitken 0 +deserv 0 +credit 0 +enhanc 0 +sever 0 +featur 0 +went 0 +notabl 0 +number 0 +becam 0 +overal 0 +suffici 0 +everyth 0 +includ 0 +cshrc 0 +login 0 +wisconsinhow 0 +seem 0 +ubiquit 0 +programat 0 +depart 0 +variou 0 +cmtool 0 +domain 0 +ical 0 +plan 0 +critic 0 +mass 0 +anyof 0 +isol 0 +associ 0 +mean 0 +record 0 +voic 0 +therefor 0 +must 0 +prefer 0 +phone 0 +manuallyadd 0 +microsoft 0 +watch 0 +intelat 0 +devout 0 +program 0 +last 0 +ontim 0 +past 0 +weak 0 +disconnect 0 +allow 0 +major 0 +meetingswith 0 +without 0 +manual 0 +intervent 0 +algorithm 0 +tell 0 +reserveth 0 +right 0 +blindli 0 +invit 0 +make 0 +admin 0 +check 0 +proposeif 0 +week 0 +avoid 0 +bother 0 +send 0 +realiz 0 +miss 0 +sent 0 +advanc 0 +overallschedul 0 +topic 0 +fascin 0 +bring 0 +effici 0 +advantag 0 +secretariesand 0 +aid 0 +camp 0 +header 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..0e9b7a30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,24 @@ +goodman 1 +comput 1 +wisconsin 1 +home 0 +page 0 +jame 0 +wisc 0 +professor 0 +sciencesdepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaresearch 0 +interest 0 +lot 0 +good 0 +stuff 0 +current 0 +project 0 +galileo 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..9a738d0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,338 @@ +imag 3 +databas 2 +comput 2 +home 2 +retriev 2 +architectur 2 +link 2 +pic 2 +page 2 +greg 1 +inform 1 +mirror 1 +softwar 1 +index 1 +japanes 1 +relat 1 +wisc 1 +fall 1 +system 1 +learn 1 +librari 1 +info 1 +project 1 +vision 1 +cool 1 +japan 1 +nice 1 +stuff 1 +site 1 +recognit 1 +process 1 +graphic 1 +text 1 +group 1 +cours 1 +sharp 1 +offic 1 +section 1 +lectur 1 +spring 1 +manag 1 +engin 1 +html 1 +refer 1 +string 1 +attach 1 +introduct 1 +geometri 1 +machin 1 +freewar 1 +internet 1 +hyper 1 +rang 1 +shape 1 +shade 1 +pretti 1 +idea 1 +data 1 +planet 1 +wyom 1 +satelit 1 +line 1 +histori 1 +base 1 +list 1 +handwrit 1 +washington 1 +state 1 +tracer 1 +archiv 1 +trace 1 +simul 1 +univ 1 +includ 1 +english 1 +robot 1 +thoma 1 +tool 1 +languag 1 +invest 1 +fund 1 +schwab 1 +pagegreg 0 +pagenam 0 +sharpemail 0 +eduoffic 0 +phone 0 +hour 0 +appt 0 +tgif 0 +everi 0 +fridai 0 +dept 0 +instructor 0 +note 0 +notesclass 0 +topic 0 +find 0 +aboutsearch 0 +altavista 0 +dejanew 0 +excit 0 +infoseek 0 +lyco 0 +metacrawl 0 +yahoo 0 +usenet 0 +format 0 +ohioc 0 +program 0 +todai 0 +draft 0 +standard 0 +april 0 +stanford 0 +cygnu 0 +mumit 0 +newbi 0 +guideplatform 0 +independ 0 +portabl 0 +develop 0 +kit 0 +amulet 0 +dclap 0 +requir 0 +motif 0 +suit 0 +wxwindow 0 +yacl 0 +class 0 +projectclass 0 +numer 0 +linear 0 +algebra 0 +theoret 0 +scienc 0 +homework 0 +projectmisc 0 +sharewar 0 +cygwin 0 +directori 0 +gimp 0 +harmonai 0 +harmoni 0 +unix 0 +client 0 +browser 0 +vasc 0 +altern 0 +video 0 +research 0 +give 0 +specifi 0 +night 0 +jaida 0 +year 0 +worth 0 +atmospher 0 +multiresolut 0 +seamless 0 +click 0 +zoom 0 +resolut 0 +view 0 +solar 0 +moon 0 +comet 0 +meteor 0 +version 0 +also 0 +lot 0 +misc 0 +like 0 +overhead 0 +shot 0 +antarctica 0 +catalogu 0 +niae 0 +gothic 0 +electr 0 +postcard 0 +card 0 +rack 0 +select 0 +thank 0 +todd 0 +vistex 0 +textur 0 +databaseartifici 0 +gener 0 +primoridi 0 +soup 0 +kitchen 0 +math 0 +depart 0 +awesom 0 +medic 0 +medicin 0 +document 0 +pictur 0 +diagon 0 +dermatolog 0 +onlin 0 +atla 0 +erlang 0 +germani 0 +enter 0 +diagnosi 0 +back 0 +orthopaed 0 +ecvnet 0 +optic 0 +charact 0 +nici 0 +groupimag 0 +univers 0 +raytrac 0 +rayshad 0 +utah 0 +raster 0 +toolkit 0 +radianc 0 +radios 0 +packag 0 +avalon 0 +object 0 +grimstead 0 +massiv 0 +dsite 0 +hardwar 0 +board 0 +intergraph 0 +lockhe 0 +glint 0 +chipset 0 +nvidia 0 +chipsetcomput 0 +geometeri 0 +center 0 +applic 0 +challeng 0 +geometrylispuseless 0 +pagescomput 0 +hennessi 0 +patterson 0 +resourc 0 +superdlx 0 +parallel 0 +parl 0 +mexico 0 +washingt 0 +georgia 0 +tech 0 +groupjapanes 0 +guid 0 +unvers 0 +monash 0 +infowav 0 +edict 0 +window 0 +dictionari 0 +shodouka 0 +asiasoftinform 0 +retrev 0 +peregrin 0 +travers 0 +written 0 +perl 0 +trec 0 +infomin 0 +gigabyt 0 +search 0 +textual 0 +provid 0 +experi 0 +feedback 0 +linguist 0 +util 0 +repositori 0 +survei 0 +natur 0 +nist 0 +other_sw 0 +info_retriev 0 +world 0 +wide 0 +wander 0 +spider 0 +jedi 0 +might 0 +strictli 0 +hartlib 0 +paper 0 +latin 0 +stemmer 0 +multimedia 0 +academ 0 +storag 0 +new 0 +pointcast 0 +check 0 +custom 0 +portfolio 0 +automat 0 +updat 0 +literatur 0 +mark 0 +twainhumor 0 +apolog 0 +citizen 0 +offens 0 +threw 0 +garbag 0 +belong 0 +investorweb 0 +networth 0 +fundscap 0 +brill 0 +editori 0 +servic 0 +stockmastermutu 0 +brokerag 0 +hous 0 +fidel 0 +vanguard 0 +row 0 +price 0 +jack 0 +white 0 +compani 0 +charl 0 +gabelli 0 +mutualsmisc 0 +psnuplast 0 +modifi 0 +sharpgreg 0 +http 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..60e2a8f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar 0 +gopalsridhar 0 +gopalgsri 0 +wisc 0 +edubon 0 +marrow 0 +pageresumest 0 +wisconsin 0 +pagecalvin 0 +hobbesbookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..d30d5fdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,58 @@ +univers 1 +madison 1 +home 1 +visitor 1 +sinc 1 +guangshun 1 +page 1 +depart 1 +comput 1 +wisconsin 1 +interest 1 +project 1 +data 1 +relat 1 +send 1 +email 1 +number 1 +welcom 0 +graduat 0 +student 0 +scienc 0 +dayton 0 +phone 0 +offic 0 +educ 0 +california 0 +state 0 +angel 0 +peke 0 +physic 0 +grade 0 +research 0 +databas 0 +manag 0 +system 0 +advis 0 +raghu 0 +ramakrishnan 0 +miron 0 +livni 0 +analysi 0 +famili 0 +medicin 0 +devis 0 +explor 0 +visual 0 +environ 0 +class 0 +link 0 +stuff 0 +career 0 +plan 0 +chines 0 +miscellani 0 +around 0 +weather 0 +forecast 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..caa00c03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,44 @@ +data 1 +guhan 1 +viswanathan 1 +thesi 1 +research 1 +design 1 +implement 1 +parallellanguag 1 +home 0 +page 0 +gviswana 0 +wisc 0 +graduat 0 +studentdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaadvisor 0 +laru 0 +focus 0 +involv 0 +base 0 +develop 0 +local 0 +compil 0 +target 0 +investig 0 +parallelappl 0 +execut 0 +effici 0 +hand 0 +code 0 +parallelprogram 0 +amor 0 +detail 0 +summari 0 +list 0 +public 0 +us 0 +link 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..aa4c76e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,73 @@ +page 1 +harit 1 +univers 1 +comput 1 +prof 1 +home 1 +student 1 +cours 1 +take 1 +advanc 1 +architectur 1 +link 1 +access 1 +time 1 +sinc 1 +sept 1 +counter 1 +mail 1 +graduat 0 +wisconsin 0 +madison 0 +depart 0 +scienc 0 +would 0 +like 0 +list 0 +classmat 0 +fall 0 +databas 0 +manag 0 +system 0 +raghu 0 +ramakrishnan 0 +mark 0 +hill 0 +spring 0 +jame 0 +goodman 0 +undergradu 0 +world 0 +famou 0 +mvsr 0 +engin 0 +colleg 0 +osmania 0 +hyderabad 0 +india 0 +meet 0 +draw 0 +line 0 +thing 0 +interest 0 +indian 0 +newspap 0 +stuff 0 +sport 0 +sastri 0 +roommat 0 +saeed 0 +mirza 0 +murthi 0 +zubber 0 +dust 0 +photo 0 +photograph 0 +warn 0 +click 0 +year 0 +folk 0 +courtesi 0 +electron 0 +address 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..f8097af2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,43 @@ +madison 1 +hasti 1 +wisconsin 1 +rebecca 1 +scienc 1 +java 1 +univers 1 +mathemat 1 +home 0 +page 0 +graduat 0 +student 0 +research 0 +assistantcomput 0 +departmentunivers 0 +dayton 0 +offic 0 +mail 0 +wisc 0 +edutelephon 0 +telephon 0 +dept 0 +first 0 +applet 0 +click 0 +fall 0 +schedul 0 +engr 0 +noland 0 +seminar 0 +comput 0 +carleton 0 +colleg 0 +interest 0 +program 0 +languag 0 +basketbal 0 +volleybal 0 +softbal 0 +linkag 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..333fc6bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,108 @@ +comput 1 +univers 1 +lane 1 +stuff 1 +scienc 1 +madison 1 +phone 1 +offic 1 +inform 1 +home 1 +page 1 +chad 1 +academ 1 +person 1 +neat 1 +dept 1 +wisconsin 1 +research 1 +program 1 +state 1 +claud 1 +info 0 +west 0 +dayton 0 +mail 0 +address 0 +hour 0 +wisc 0 +welcomethank 0 +stop 0 +hope 0 +enjoi 0 +bestbet 0 +link 0 +section 0 +biggest 0 +new 0 +life 0 +right 0 +get 0 +marri 0 +onmai 0 +nichol 0 +final 0 +want 0 +tell 0 +good 0 +luck 0 +count 0 +fall 0 +cours 0 +retriev 0 +technolog 0 +seek 0 +databas 0 +manag 0 +system 0 +ling 0 +audit 0 +advanc 0 +semant 0 +interest 0 +linguist 0 +discours 0 +process 0 +us 0 +advic 0 +barwis 0 +epigram 0 +alan 0 +perli 0 +educ 0 +mathemat 0 +minor 0 +philosophi 0 +laud 0 +truman 0 +formerli 0 +northeast 0 +missouri 0 +expect 0 +stand 0 +accord 0 +truli 0 +click 0 +imag 0 +cyber 0 +poop 0 +creation 0 +unabash 0 +brother 0 +bart 0 +arthur 0 +download 0 +psychot 0 +talk 0 +rais 0 +plant 0 +internet 0 +deep 0 +thought 0 +jack 0 +handi 0 +reload 0 +differ 0 +on 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..26dbabad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,103 @@ +robot 2 +hert 2 +susan 2 +comput 2 +vladimir 1 +lumelski 1 +motion 1 +plan 1 +interest 1 +geometri 1 +algorithm 1 +tether 1 +page 1 +research 1 +madison 1 +link 1 +multipl 1 +appear 1 +confer 1 +intern 1 +autonom 1 +scienc 1 +univers 1 +wisconsin 1 +public 1 +curv 1 +paper 1 +august 1 +journal 1 +system 1 +version 1 +proc 1 +librari 1 +book 1 +home 0 +wisc 0 +assist 0 +depart 0 +dayton 0 +telephon 0 +curriculum 0 +vita 0 +postscript 0 +softwar 0 +appli 0 +experiment 0 +analysi 0 +design 0 +graphic 0 +geometr 0 +advisor 0 +current 0 +work 0 +develop 0 +alogirthm 0 +common 0 +environ 0 +select 0 +deform 0 +plane 0 +extend 0 +abstract 0 +proceed 0 +canadian 0 +planar 0 +rout 0 +applic 0 +ti 0 +bind 0 +publish 0 +ieee 0 +autom 0 +sanjai 0 +tiwari 0 +terrain 0 +cover 0 +special 0 +issu 0 +underwat 0 +move 0 +arbitrari 0 +configur 0 +intellig 0 +reznik 0 +simul 0 +basi 0 +anim 0 +program 0 +technic 0 +report 0 +laboratori 0 +juli 0 +educ 0 +refer 0 +shelf 0 +congress 0 +line 0 +travel 0 +samantha 0 +cook 0 +epicuri 0 +veggi 0 +unit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..1b86a53d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,49 @@ +madison 1 +kirk 1 +hogenson 1 +offic 1 +mail 1 +graduat 1 +wisconsin 1 +student 1 +hour 1 +studentcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +wisc 0 +edutelephon 0 +depart 0 +section 0 +tue 0 +also 0 +look 0 +myschedul 0 +none 0 +workout 0 +tryto 0 +appoint 0 +time 0 +finger 0 +send 0 +visit 0 +ghana 0 +countri 0 +serv 0 +peac 0 +corp 0 +usernam 0 +check 0 +pnhp 0 +group 0 +page 0 +maintain 0 +wife 0 +eilun 0 +experi 0 +counter 0 +sai 0 +accessedtim 0 +sinc 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..8ee76cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,12 @@ +horn 1 +jeffrei 1 +swanton 0 +road 0 +madison 0 +wisconsin 0 +phone 0 +email 0 +wisc 0 +wise 0 +linear 0 +familyemploymenteducationresearchgenealog 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..5c2fc39f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,148 @@ +program 2 +horwitz 2 +analysi 2 +languag 1 +dataflow 1 +work 1 +rep 1 +confer 1 +problem 1 +graph 1 +interprocedur 1 +sagiv 1 +symposium 1 +proceed 1 +depend 1 +algorithm 1 +precis 1 +effici 1 +record 1 +januari 1 +softwar 1 +base 1 +slice 1 +mainli 1 +involv 1 +understand 1 +textual 1 +semant 1 +differ 1 +version 1 +call 1 +class 1 +develop 1 +implement 1 +twenti 1 +intern 1 +principlesof 1 +engin 1 +susan 0 +horwitzsusan 0 +horwitzprofessorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +wisc 0 +telephon 0 +secretari 0 +depart 0 +cornel 0 +univers 0 +research 0 +interest 0 +environmentsprogram 0 +differenc 0 +mergingstat 0 +programsinterprocedur 0 +analysisresearch 0 +summarymi 0 +design 0 +implementationof 0 +tool 0 +help 0 +programm 0 +like 0 +exist 0 +would 0 +affectedbi 0 +propos 0 +modif 0 +structur 0 +betweentwo 0 +retest 0 +chang 0 +combin 0 +piec 0 +produc 0 +certainsemant 0 +guarante 0 +represent 0 +theprogram 0 +oper 0 +also 0 +interproceduraldataflow 0 +previou 0 +concentratedeith 0 +specif 0 +individu 0 +necessarili 0 +gener 0 +thoma 0 +mooli 0 +newalgorithm 0 +appli 0 +larg 0 +recent 0 +publicationsm 0 +shapiro 0 +fast 0 +accur 0 +flow 0 +insensit 0 +point 0 +appear 0 +fourth 0 +onprincipl 0 +pari 0 +franc 0 +demand 0 +sigsoft 0 +foundat 0 +softwareengin 0 +washington 0 +octob 0 +applic 0 +constantpropag 0 +sixth 0 +joint 0 +theoryand 0 +practic 0 +aarhu 0 +denmark 0 +reachabl 0 +second 0 +francisco 0 +bate 0 +increment 0 +test 0 +us 0 +twentieth 0 +charleston 0 +fourteenth 0 +conferenceon 0 +melbourn 0 +australia 0 +identifi 0 +aprogram 0 +sigplan 0 +languagedesign 0 +white 0 +plain 0 +june 0 +teach 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..d575cc7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid 0 +pagenam 0 +sidnei 0 +hummertoffic 0 +phone 0 +offic 0 +email 0 +hummert 0 +wisc 0 +edua 0 +postscript 0 +version 0 +resum 0 +pictur 0 +click 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..a87fd658 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,46 @@ +home 1 +alien 1 +construct 1 +mail 1 +univers 1 +wisconsin 1 +phone 1 +wisc 1 +igor 0 +ivanisev 0 +pageigorivanisev 0 +work 0 +newest 0 +project 0 +disclaim 0 +speak 0 +particular 0 +needless 0 +page 0 +ever 0 +feel 0 +like 0 +actual 0 +alreadi 0 +link 0 +research 0 +interest 0 +robot 0 +vision 0 +stuff 0 +generalgradu 0 +slave 0 +departmentwa 0 +undergrad 0 +drake 0 +math 0 +departmentaddress 0 +comput 0 +scienc 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +offic 0 +iigor 0 +eduiigor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..94d8c2bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..2d5f6a50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,37 @@ +sharenow 1 +comput 1 +meet 1 +offic 1 +section 1 +tuesdai 1 +home 0 +page 0 +wisc 0 +teach 0 +assist 0 +peterson 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +madisonmadison 0 +sciencestelephon 0 +hour 0 +thursdai 0 +pmsection 0 +pmboth 0 +class 0 +room 0 +sciencesc 0 +announcementshandoutsmoth 0 +jone 0 +profil 0 +recreat 0 +site 0 +pleas 0 +send 0 +email 0 +comment 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..5c2893ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,44 @@ +scienc 1 +research 1 +madison 1 +comput 1 +basneyjim 0 +basneygradu 0 +student 0 +assistantcomput 0 +departmentunivers 0 +wisconsin 0 +dayton 0 +email 0 +jbasnei 0 +wisc 0 +eduoffic 0 +statisticsoffic 0 +phone 0 +interest 0 +area 0 +oper 0 +system 0 +andnetwork 0 +current 0 +work 0 +condor 0 +directionof 0 +prof 0 +miron 0 +livni 0 +receiv 0 +fromoberlin 0 +colleg 0 +english 0 +webpag 0 +oberlin 0 +resum 0 +codefrom 0 +previou 0 +project 0 +avail 0 +onlin 0 +last 0 +modifi 0 +basnei 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..cc212de7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,83 @@ +jerel 1 +mackai 1 +comput 1 +work 1 +scienc 1 +databas 1 +system 1 +plai 1 +also 1 +home 0 +pagejerel 0 +assist 0 +research 0 +special 0 +oper 0 +fulltim 0 +univers 0 +wisconsin 0 +madison 0 +depart 0 +respons 0 +includ 0 +develop 0 +support 0 +sybas 0 +ingr 0 +instal 0 +backup 0 +softwar 0 +train 0 +student 0 +hourli 0 +electr 0 +guitar 0 +thrash 0 +metal 0 +specialti 0 +violin 0 +classic 0 +baroqu 0 +mainli 0 +seen 0 +error 0 +evil 0 +wai 0 +click 0 +shock 0 +case 0 +didn 0 +believ 0 +ey 0 +first 0 +time 0 +like 0 +record 0 +mostli 0 +funni 0 +cover 0 +stuff 0 +abba 0 +metallica 0 +origin 0 +soon 0 +abl 0 +sampl 0 +hit 0 +watch 0 +favorit 0 +show 0 +raquetbal 0 +golf 0 +shoot 0 +pool 0 +stand 0 +around 0 +towel 0 +yeah 0 +know 0 +much 0 +finger 0 +jerellast 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..f2f0252a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home 0 +page 0 +johan 0 +larson 0 +homepag 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..860bcb53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,131 @@ +page 1 +link 1 +cool 1 +pictur 1 +home 1 +frame 1 +year 1 +realli 1 +would 1 +default 1 +number 1 +paus 1 +imag 1 +stuff 1 +time 1 +friend 1 +homepag 1 +roomat 1 +notr 1 +dame 1 +search 1 +engin 1 +cano 1 +relief 0 +happi 0 +java 0 +good 0 +censorship 0 +mail 0 +jherro 0 +wisc 0 +note 0 +class 0 +made 0 +relax 0 +let 0 +thing 0 +apictur 0 +girlfriend 0 +half 0 +afraid 0 +date 0 +though 0 +almost 0 +mani 0 +tortur 0 +disembody 0 +becam 0 +float 0 +head 0 +death 0 +directori 0 +anim 0 +seri 0 +jpeg 0 +format 0 +name 0 +start 0 +millisecond 0 +overriden 0 +repeat 0 +sequenc 0 +explicit 0 +order 0 +vital 0 +inform 0 +aquir 0 +nicknam 0 +like 0 +take 0 +apolog 0 +lame 0 +pleas 0 +bear 0 +pretti 0 +jack 0 +skellington 0 +kermit 0 +frog 0 +interest 0 +someth 0 +els 0 +neat 0 +write 0 +haiku 0 +said 0 +thath 0 +go 0 +click 0 +mine 0 +grad 0 +memori 0 +forgotten 0 +cult 0 +hippothi 0 +exploratori 0 +intervent 0 +chaotic 0 +exist 0 +realiti 0 +follow 0 +enjoi 0 +benefit 0 +matriarch 0 +societi 0 +join 0 +todai 0 +exclus 0 +club 0 +hierarchi 0 +rule 0 +semi 0 +yahooooooooooooo 0 +work 0 +contain 0 +free 0 +softwar 0 +shack 0 +bazillion 0 +mpeg 0 +movi 0 +archiv 0 +great 0 +muppet 0 +sound 0 +rachel 0 +want 0 +select 0 +trip 0 +look 0 +bout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..7d116c2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,130 @@ +storag 1 +dewitt 1 +bing 1 +research 1 +public 1 +madison 1 +dbm 1 +databas 1 +system 1 +paradis 1 +queri 1 +data 1 +page 1 +gener 1 +inform 1 +advisor 1 +interest 1 +project 1 +pointer 1 +comput 1 +wisconsin 1 +jieb 1 +wisc 1 +tertiari 1 +manag 1 +object 1 +technolog 1 +patel 1 +kabra 1 +naughton 1 +submit 1 +octob 1 +process 1 +size 1 +appear 1 +septemb 1 +constraint 1 +februari 1 +client 1 +server 1 +proceed 1 +confer 1 +tenni 1 +pictur 1 +home 0 +index 0 +educ 0 +hobbi 0 +informationresearch 0 +assistantdepart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +edueduc 0 +scienc 0 +univers 0 +prof 0 +david 0 +dewittresearch 0 +support 0 +parallel 0 +orient 0 +geograph 0 +systemsresearch 0 +shorepublicationsbuild 0 +scaleabl 0 +spatial 0 +implment 0 +evalu 0 +tuft 0 +burger 0 +hall 0 +ramasami 0 +lueder 0 +ellman 0 +kupsch 0 +execut 0 +batch 0 +prong 0 +approach 0 +effici 0 +tape 0 +resid 0 +set 0 +satellit 0 +imag 0 +studi 0 +impact 0 +tile 0 +perform 0 +nasa 0 +goddard 0 +conferenceon 0 +mass 0 +us 0 +tree 0 +goldstein 0 +ramakrishnan 0 +shaft 0 +shorter 0 +version 0 +workshop 0 +larg 0 +base 0 +santiago 0 +chile 0 +reclam 0 +reorgan 0 +serverpersist 0 +store 0 +yong 0 +ieee 0 +engin 0 +houston 0 +eosdi 0 +sigmod 0 +grouphobbi 0 +volleybal 0 +volleyballweb 0 +white 0 +water 0 +raft 0 +whitewat 0 +find 0 +click 0 +full 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..8a495a0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,58 @@ +jignesh 1 +paradis 1 +publish 1 +home 1 +page 1 +research 1 +madison 1 +system 1 +databas 1 +relat 1 +public 1 +paper 1 +join 1 +patel 0 +wisc 0 +welcom 0 +assist 0 +depart 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +telephon 0 +advisor 0 +david 0 +dewitt 0 +interest 0 +parallel 0 +object 0 +current 0 +work 0 +project 0 +client 0 +server 0 +vldb 0 +partit 0 +base 0 +spatial 0 +merg 0 +sigmod 0 +accur 0 +model 0 +hybrid 0 +hash 0 +algorithm 0 +sigmetr 0 +miscellan 0 +stuff 0 +virtual 0 +tourist 0 +inlin 0 +skate 0 +madhuri 0 +kashmir 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..30676e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,10 @@ +page 1 +georg 0 +varghes 0 +peopl 0 +download 0 +netscap 0 +click 0 +warn 0 +pretti 0 +lame 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..e3338059 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,76 @@ +gehrk 1 +madison 1 +johann 1 +comput 1 +system 1 +time 1 +intern 1 +univers 1 +area 1 +interest 1 +inform 1 +public 1 +wisconsin 1 +scienc 1 +depart 1 +baruah 1 +plaxton 1 +share 1 +resourc 1 +real 1 +ieee 1 +version 1 +homepagejohann 0 +gehrkewelcom 0 +graduat 0 +studentat 0 +sciencesdepart 0 +ofwisconsin 0 +databasemanag 0 +work 0 +data 0 +mine 0 +underprofessor 0 +raghuramakrishnan 0 +page 0 +construct 0 +contact 0 +linkscontact 0 +email 0 +utexa 0 +offic 0 +west 0 +dayton 0 +street 0 +room 0 +home 0 +eagl 0 +height 0 +stoica 0 +abdel 0 +wahab 0 +jeffai 0 +proport 0 +alloc 0 +algorithmfor 0 +proceed 0 +symposium 0 +washington 0 +decemb 0 +appear 0 +anexpand 0 +fastschedul 0 +period 0 +task 0 +multipl 0 +inproceed 0 +parallel 0 +processingsymposium 0 +april 0 +expand 0 +avail 0 +technicalreport 0 +universityof 0 +texa 0 +austin 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..e118ed6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,71 @@ +madison 1 +home 1 +page 1 +comput 1 +student 1 +list 1 +appl 1 +site 1 +pagewelcom 0 +first 0 +year 0 +graduat 0 +univers 0 +wisconsin 0 +studi 0 +scienc 0 +us 0 +also 0 +maintain 0 +frequent 0 +ask 0 +question 0 +latest 0 +powerbook 0 +model 0 +releas 0 +thing 0 +look 0 +section 0 +click 0 +herei 0 +amass 0 +good 0 +number 0 +catagori 0 +check 0 +depart 0 +alma 0 +mater 0 +visit 0 +often 0 +needsth 0 +nando 0 +time 0 +great 0 +new 0 +coverageth 0 +spot 0 +mind 0 +numb 0 +soap 0 +operaish 0 +drivelziffnet 0 +industri 0 +newsc 0 +databas 0 +manag 0 +system 0 +construct 0 +compil 0 +keep 0 +classworktodai 0 +dilbert 0 +chucklejon 0 +bodner 0 +jonb 0 +wisc 0 +mound 0 +last 0 +modifi 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..53b2c621 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,85 @@ +depart 1 +scienc 1 +home 1 +madison 1 +comput 1 +student 1 +chines 1 +academi 1 +china 1 +nanj 1 +univers 1 +advanc 1 +network 1 +spring 1 +welcom 0 +page 0 +first 0 +year 0 +graduat 0 +came 0 +frombeij 0 +hometown 0 +capitol 0 +jiangsu 0 +provinc 0 +degre 0 +wisconsin 0 +institut 0 +autom 0 +beij 0 +specil 0 +pattern 0 +recognit 0 +imag 0 +process 0 +biomed 0 +engin 0 +southeast 0 +chinacurr 0 +activ 0 +cours 0 +topic 0 +databas 0 +manag 0 +oper 0 +system 0 +teach 0 +assist 0 +data 0 +structur 0 +current 0 +address 0 +work 0 +west 0 +dayton 0 +street 0 +tele 0 +offic 0 +could 0 +finger 0 +wisc 0 +refer 0 +inform 0 +class 0 +technic 0 +stuffjava 0 +placeshor 0 +tutorialchina 0 +affairchina 0 +democracybeij 0 +place 0 +interest 0 +stanford 0 +groupstanford 0 +medic 0 +informaticsmit 0 +commun 0 +control 0 +signal 0 +processingjob 0 +site 0 +newsyou 0 +visitor 0 +number 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..63619e87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,79 @@ +jose 1 +comput 1 +mercuri 1 +new 1 +home 1 +andnando 1 +jeff 1 +shabel 1 +offic 1 +scienc 1 +graduat 1 +view 1 +engin 1 +cupertino 1 +high 1 +school 1 +favorit 1 +columbia 1 +hous 1 +join 1 +pagech 0 +welcom 0 +page 0 +wisconsinch 0 +theme 0 +song 0 +hour 0 +tue 0 +thur 0 +appoint 0 +person 0 +informationmajor 0 +architectur 0 +emphasi 0 +statu 0 +second 0 +year 0 +student 0 +fall 0 +schedul 0 +academ 0 +background 0 +receiv 0 +diego 0 +electr 0 +depart 0 +town 0 +monta 0 +vista 0 +plan 0 +sport 0 +team 0 +golden 0 +state 0 +warrior 0 +basketbal 0 +shark 0 +hockei 0 +francisco 0 +footbal 0 +oakland 0 +link 0 +newsmus 0 +find 0 +deal 0 +also 0 +tip 0 +info 0 +music 0 +club 0 +miscellan 0 +print 0 +postscript 0 +document 0 +window 0 +send 0 +mail 0 +jshabel 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..25093b6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,193 @@ +data 2 +disk 2 +report 2 +tape 2 +livni 2 +perform 1 +technolog 1 +larg 1 +proceed 1 +confer 1 +jussi 1 +comput 1 +depart 1 +univers 1 +tertiari 1 +miron 1 +join 1 +home 1 +myllymaki 1 +wisconsin 1 +explor 1 +structur 1 +visual 1 +submit 1 +research 1 +wisc 1 +analysi 1 +includ 1 +buffer 1 +dataset 1 +storageto 1 +recent 1 +relat 1 +appli 1 +organ 1 +set 1 +andtap 1 +intern 1 +integr 1 +parallel 1 +theintern 1 +engin 1 +ramakrishnan 1 +synchron 1 +technic 1 +master 1 +yoav 1 +weiss 1 +class 1 +link 1 +scsi 1 +digit 1 +myllymakijussi 0 +assist 0 +scienc 0 +west 0 +dayton 0 +street 0 +madison 0 +telephon 0 +email 0 +eduresearch 0 +summaryi 0 +interest 0 +dbm 0 +oper 0 +onadvanc 0 +arrai 0 +mcurrent 0 +studi 0 +memori 0 +us 0 +andvisu 0 +deviseproject 0 +advisor 0 +prof 0 +mironlivni 0 +work 0 +improv 0 +joinsof 0 +volum 0 +resid 0 +public 0 +listbelow 0 +solv 0 +problem 0 +associ 0 +divers 0 +characterist 0 +andfunct 0 +limit 0 +media 0 +paper 0 +datavisu 0 +discuss 0 +metadata 0 +managementissu 0 +complex 0 +involv 0 +refere 0 +publicationseffici 0 +concurr 0 +theori 0 +measur 0 +evalu 0 +commun 0 +system 0 +octob 0 +programperform 0 +karen 0 +karavan 0 +bartonp 0 +miller 0 +third 0 +workshop 0 +environ 0 +andtool 0 +scientif 0 +august 0 +tertiarystorag 0 +daniel 0 +ford 0 +februari 0 +alsoavail 0 +almaden 0 +withmiron 0 +raghu 0 +spie 0 +societi 0 +optic 0 +januari 0 +access 0 +acmsigmetr 0 +publicationdevis 0 +queri 0 +beyer 0 +chen 0 +donjerkov 0 +lawand 0 +wenger 0 +sigmod 0 +storag 0 +andmiron 0 +dataengin 0 +publicationsdisk 0 +tapeaccess 0 +project 0 +degreeproject 0 +client 0 +server 0 +model 0 +networkarchitectur 0 +thesi 0 +helsinki 0 +industri 0 +manag 0 +finnish 0 +documentsimplement 0 +treealgorithm 0 +jeff 0 +schwarz 0 +experi 0 +implement 0 +filesystem 0 +trishul 0 +chilimbi 0 +overview 0 +current 0 +productsoverview 0 +raid 0 +supplier 0 +productssom 0 +frequent 0 +need 0 +unifi 0 +search 0 +adaptec 0 +adapt 0 +alpha 0 +workstationsandpcsandtechn 0 +journaland 0 +whitepap 0 +researchandcyberjourn 0 +quantum 0 +linear 0 +tapeanddlt 0 +faqandwhitepap 0 +solarisandsparcstationsandtechn 0 +faqandstorag 0 +faqand 0 +otherusenet 0 +faqsmani 0 +found 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..36a08c91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag 0 +jyothithi 0 +page 0 +construct 0 +info 0 +student 0 +cours 0 +grade 0 +other 0 +sorri 0 +dissappoint 0 +email 0 +jyothi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..9acc9bf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,88 @@ +comput 1 +school 1 +madison 1 +scienc 1 +high 1 +karen 1 +parallel 1 +perform 1 +support 1 +ship 1 +karavaniceveryth 0 +need 0 +know 0 +learn 0 +public 0 +karavanicresearch 0 +assist 0 +paradyn 0 +tool 0 +project 0 +univers 0 +wisconsin 0 +depart 0 +west 0 +dayton 0 +street 0 +karavan 0 +wisc 0 +current 0 +pursu 0 +research 0 +interest 0 +includ 0 +environ 0 +autom 0 +tune 0 +process 0 +oper 0 +system 0 +databasesask 0 +women 0 +wic 0 +frontier 0 +cool 0 +program 0 +dane 0 +counti 0 +studentstrio 0 +student 0 +servic 0 +free 0 +tutor 0 +undergradu 0 +miss 0 +site 0 +page 0 +could 0 +save 0 +life 0 +safer 0 +pagefor 0 +chocol 0 +lover 0 +onlystuyves 0 +alumni 0 +associationstuyves 0 +class 0 +thoma 0 +legisl 0 +inform 0 +internetth 0 +constitut 0 +cure 0 +anyth 0 +salt 0 +water 0 +sweat 0 +tear 0 +isak 0 +dinesen 0 +port 0 +safe 0 +sail 0 +thing 0 +admir 0 +grace 0 +hopper 0 +pioneer 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..2298a7cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,109 @@ +extens 2 +stefano 2 +kaxira 1 +jame 1 +kiloprocessor 1 +intern 1 +wisconsin 1 +memori 1 +coher 1 +cach 1 +parallel 1 +glow 1 +hierarch 1 +goodman 1 +perform 1 +comput 1 +papakonstantin 1 +research 1 +interest 1 +summari 1 +recent 1 +share 1 +design 1 +processor 1 +architectur 1 +appear 1 +proceed 1 +workshop 1 +base 1 +high 1 +cost 1 +softwar 1 +synthesi 1 +us 1 +prolog 1 +tsanaka 1 +home 0 +page 0 +wisc 0 +editor 0 +ieee 0 +sciresearch 0 +assist 0 +univers 0 +publicationsresearch 0 +multiprocess 0 +scalabl 0 +interfac 0 +aspect 0 +galileo 0 +introduc 0 +collaborationwith 0 +work 0 +examin 0 +depth 0 +option 0 +develop 0 +upcom 0 +standard 0 +incolabor 0 +david 0 +stein 0 +gjess 0 +public 0 +protocol 0 +wide 0 +data 0 +goodmanto 0 +confer 0 +supercomput 0 +also 0 +technic 0 +report 0 +kaxirasto 0 +process 0 +symposium 0 +april 0 +implement 0 +wind 0 +tunnel 0 +goodmannd 0 +march 0 +goodmanst 0 +august 0 +kaxirasunivers 0 +scienc 0 +dept 0 +juli 0 +tool 0 +simul 0 +prototyp 0 +monitor 0 +multiprocessor 0 +system 0 +stafylopati 0 +kaxirasinform 0 +technolog 0 +autom 0 +dedic 0 +specif 0 +pekmestzi 0 +kaxirasp 0 +greec 0 +hardwar 0 +methodolog 0 +kaxirasmicroprocess 0 +microprogram 0 +north 0 +holland 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..ac9ee8b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,251 @@ +page 2 +caltech 1 +inform 1 +probabl 1 +comput 1 +slide 1 +like 1 +world 1 +make 1 +us 1 +would 1 +databas 1 +talk 1 +gave 1 +index 1 +quot 1 +keeper 1 +home 1 +steven 1 +everyth 1 +made 1 +note 1 +markup 1 +look 1 +peopl 1 +allow 1 +start 1 +year 1 +graduat 1 +student 1 +ever 1 +leav 1 +surpris 1 +aliv 1 +want 1 +hedgehog 1 +contact 1 +pager 1 +write 1 +number 1 +list 1 +project 1 +insid 1 +user 1 +foughtthei 0 +heaven 0 +perfect 0 +littl 0 +doesn 0 +realli 0 +need 0 +light 0 +lauri 0 +anderson 0 +strang 0 +angel 0 +possibl 0 +structur 0 +indic 0 +piec 0 +rather 0 +access 0 +bitmap 0 +displai 0 +includ 0 +theblind 0 +standard 0 +tag 0 +whateverbrows 0 +wish 0 +also 0 +literari 0 +convent 0 +ironi 0 +satir 0 +sarcasm 0 +butnoth 0 +contain 0 +herein 0 +meant 0 +offens 0 +areoffend 0 +stupid 0 +third 0 +scienc 0 +depart 0 +firsttwo 0 +week 0 +sinc 0 +support 0 +take 0 +care 0 +varieti 0 +machin 0 +vari 0 +degre 0 +success 0 +side 0 +never 0 +publish 0 +done 0 +anyth 0 +impress 0 +miracl 0 +pass 0 +prelim 0 +research 0 +addup 0 +hill 0 +bean 0 +fizzl 0 +result 0 +areobtain 0 +junior 0 +level 0 +programm 0 +creatingkiosk 0 +front 0 +end 0 +visual 0 +primit 0 +system 0 +perman 0 +skill 0 +free 0 +discov 0 +especi 0 +wasn 0 +convinc 0 +thosewho 0 +know 0 +well 0 +argu 0 +proof 0 +mybe 0 +anywai 0 +might 0 +read 0 +thoughtson 0 +electron 0 +mail 0 +reliabl 0 +specif 0 +locat 0 +often 0 +work 0 +ifyou 0 +person 0 +someth 0 +address 0 +wisc 0 +becom 0 +clear 0 +wantto 0 +short 0 +notic 0 +give 0 +variou 0 +creation 0 +thought 0 +todo 0 +updat 0 +sporad 0 +associ 0 +rsum 0 +postscript 0 +document 0 +html 0 +section 0 +long 0 +outof 0 +date 0 +unfortun 0 +danenet 0 +dilhr 0 +jobnet 0 +soon 0 +defunct 0 +institut 0 +archiv 0 +photonet 0 +personnel 0 +directori 0 +much 0 +better 0 +anyon 0 +enter 0 +databaseus 0 +form 0 +interfac 0 +distribut 0 +object 0 +call 0 +java 0 +danger 0 +love 0 +come 0 +hell 0 +freez 0 +rate 0 +break 0 +hierarchi 0 +consult 0 +somewher 0 +fought 0 +unifi 0 +attribut 0 +sfuai 0 +informationag 0 +intellectu 0 +properti 0 +assigna 0 +uniqu 0 +serial 0 +refer 0 +atth 0 +provid 0 +sourc 0 +contextu 0 +pointer 0 +relev 0 +bui 0 +adob 0 +distil 0 +translat 0 +rsuminto 0 +chanc 0 +ofread 0 +suppos 0 +print 0 +pinch 0 +certaintruth 0 +psycholog 0 +softwar 0 +eventuallypick 0 +aren 0 +taught 0 +explicitli 0 +think 0 +possibleto 0 +easier 0 +cheap 0 +shot 0 +thing 0 +hate 0 +idea 0 +mull 0 +accessibleto 0 +small 0 +subset 0 +tough 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..86244ab8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,24 @@ +perform 1 +krishna 0 +kunchithapadamkrishna 0 +kunchithapadamgreet 0 +welcom 0 +page 0 +interest 0 +read 0 +languag 0 +indian 0 +classic 0 +music 0 +miscellaneouspubl 0 +data 0 +distribut 0 +steer 0 +toolsresum 0 +gzip 0 +postscript 0 +contact 0 +search 0 +last 0 +modifi 0 +bykk 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..249f591a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,31 @@ +tuft 1 +madison 1 +kristin 1 +home 1 +research 1 +scienc 1 +wisc 1 +inform 1 +pagekristin 0 +assist 0 +comput 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +eduadvisor 0 +david 0 +dewitt 0 +miscellani 0 +serveruw 0 +dbm 0 +groupacm 0 +sigmod 0 +server 0 +pageeo 0 +project 0 +officelast 0 +modifi 0 +tuftekristin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..d83d5ee8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,47 @@ +krung 1 +comput 1 +page 1 +inform 1 +follow 1 +cours 1 +work 1 +scienc 1 +depart 1 +person 1 +madison 1 +wisconsin 1 +homepageupd 0 +novemb 0 +homepag 0 +underconstructioni 0 +keep 0 +short 0 +good 0 +serf 0 +year 0 +cometh 0 +relat 0 +topic 0 +research 0 +mathemat 0 +program 0 +project 0 +pursu 0 +compani 0 +favorit 0 +hobbi 0 +opinion 0 +life 0 +linkedth 0 +import 0 +link 0 +univers 0 +whole 0 +uniqu 0 +entiti 0 +electron 0 +librari 0 +system 0 +sinapiromsaran 0 +emailkrung 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..c9e9b411 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,137 @@ +kunen 2 +autom 2 +mathemat 2 +logic 1 +comput 1 +theori 1 +reason 1 +appear 1 +univers 1 +hart 1 +wisconsin 1 +deduct 1 +program 1 +applic 1 +semant 1 +measur 1 +axiom 1 +wisc 1 +interest 1 +topolog 1 +research 1 +work 1 +theorem 1 +singl 1 +group 1 +expon 1 +fundamenta 1 +mathematica 1 +technic 1 +report 1 +quasigroup 1 +algebra 1 +loop 1 +preprint 1 +review 1 +math 1 +home 0 +page 0 +kenneth 0 +professormath 0 +scienc 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +stanford 0 +summari 0 +involv 0 +typic 0 +tool 0 +like 0 +resolutionto 0 +prove 0 +studi 0 +languag 0 +likeprolog 0 +specif 0 +topic 0 +consid 0 +prologus 0 +negat 0 +failur 0 +incompat 0 +betweenleast 0 +fix 0 +point 0 +prolog 0 +style 0 +backtrack 0 +axiomat 0 +besid 0 +right 0 +thissubject 0 +relat 0 +variou 0 +abstract 0 +area 0 +theoret 0 +mani 0 +basic 0 +question 0 +turn 0 +independ 0 +usualaxiom 0 +select 0 +recent 0 +public 0 +follow 0 +postscript 0 +file 0 +shortest 0 +ramsei 0 +boyer 0 +moor 0 +mill 0 +corson 0 +compact 0 +space 0 +local 0 +constant 0 +function 0 +answer 0 +liter 0 +construct 0 +moufang 0 +associ 0 +law 0 +structur 0 +conjugaci 0 +close 0 +complet 0 +result 0 +link 0 +resolut 0 +press 0 +weak 0 +extens 0 +rough 0 +draft 0 +book 0 +note 0 +moschovaki 0 +american 0 +monthli 0 +cours 0 +taught 0 +fall 0 +geometr 0 +infer 0 +foundat 0 +spring 0 +comp 0 +artifici 0 +intellig 0 +last 0 +chang 0 +octob 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..cd59dc87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,245 @@ +laru 3 +program 2 +jame 2 +parallel 2 +languag 2 +research 2 +memori 2 +compil 2 +support 2 +system 2 +comput 1 +softwar 1 +share 1 +david 1 +wood 1 +mark 1 +hill 1 +confer 1 +intern 1 +architectur 1 +tool 1 +august 1 +wisconsin 1 +project 1 +profil 1 +execut 1 +edit 1 +data 1 +richard 1 +control 1 +employ 1 +wisc 1 +interest 1 +machin 1 +wind 1 +tunnel 1 +grain 1 +ball 1 +brad 1 +guhan 1 +viswanathan 1 +sigplan 1 +implement 1 +novemb 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +sixth 1 +forprogram 1 +oper 1 +asplo 1 +octob 1 +first 1 +develop 1 +juli 1 +educ 1 +cours 1 +recent 1 +univers 1 +california 1 +berkelei 1 +colleg 1 +design 1 +larg 1 +path 1 +thoma 1 +appear 1 +micro 1 +protocol 1 +satish 1 +chandra 1 +pldi 1 +eric 1 +schnarr 1 +effici 1 +applic 1 +distribut 1 +symposium 1 +user 1 +level 1 +babak 1 +falsafi 1 +ioanni 1 +schoina 1 +ann 1 +roger 1 +annot 1 +hardwar 1 +lorenz 1 +bell 1 +lab 1 +flow 1 +perform 1 +home 0 +page 0 +associ 0 +professor 0 +sciencedepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usalaru 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +offic 0 +upcom 0 +paper 0 +graduat 0 +summari 0 +harvard 0 +languagesand 0 +particular 0 +symbol 0 +trace 0 +librari 0 +structuresc 0 +java 0 +spim 0 +wartsrec 0 +paperseffici 0 +decemb 0 +programminglanguag 0 +gregori 0 +wilson 0 +us 0 +press 0 +teapot 0 +write 0 +coher 0 +instruct 0 +schedul 0 +andjam 0 +workshop 0 +wcsss 0 +februari 0 +irregular 0 +shubhendu 0 +mukherje 0 +shamik 0 +sharma 0 +annerog 0 +joel 0 +saltz 0 +fifth 0 +principl 0 +practiceof 0 +ppopp 0 +independ 0 +languagesdesign 0 +june 0 +tempest 0 +substrat 0 +portabl 0 +compcon 0 +spring 0 +march 0 +static 0 +branch 0 +frequenc 0 +analysi 0 +youfeng 0 +annual 0 +ieee 0 +microarchitectur 0 +specif 0 +markhil 0 +supercomput 0 +time 0 +spent 0 +messag 0 +pass 0 +fine 0 +access 0 +jameslaru 0 +cachier 0 +automat 0 +insert 0 +cico 0 +trishul 0 +chilimbi 0 +icpp 0 +bibliographi 0 +unpublish 0 +manuscript 0 +revis 0 +frequent 0 +cooper 0 +scalabl 0 +multiprocessor 0 +transact 0 +toc 0 +wart 0 +madhusudhan 0 +talluri 0 +new 0 +graduatesbrad 0 +vassar 0 +septemb 0 +techniqu 0 +languagesfirst 0 +oracl 0 +huelsbergen 0 +dynam 0 +depend 0 +tball 0 +summarymi 0 +focus 0 +problem 0 +part 0 +thewisconsin 0 +havehelp 0 +hybrid 0 +computerarchitectur 0 +facilit 0 +parallelmachin 0 +current 0 +student 0 +demonstr 0 +exploit 0 +power 0 +coherencepolici 0 +also 0 +evalu 0 +help 0 +programmersunderstand 0 +improv 0 +andi 0 +algorithm 0 +provid 0 +moredetail 0 +understand 0 +within 0 +routin 0 +hasidentifi 0 +possibl 0 +better 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..5c0ad087 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick 0 +pagenick 0 +pageoffic 0 +phone 0 +email 0 +leavi 0 +wisc 0 +eduoffic 0 +hour 0 +tuesdai 0 +wednessdai 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..a251d3a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,77 @@ +lederman 1 +steven 1 +huss 1 +research 1 +madison 1 +forum 1 +home 1 +page 1 +interest 1 +univ 1 +wisconsin 1 +also 1 +mpistandard 1 +book 1 +inform 1 +complet 1 +sourc 1 +file 1 +comput 1 +scienc 1 +wisc 1 +includ 0 +relat 0 +thewisconsin 0 +wind 0 +tunnel 0 +project 0 +area 0 +parallel 0 +linear 0 +algebra 0 +iscov 0 +prismproject 0 +heavili 0 +invol 0 +sever 0 +other 0 +recent 0 +publish 0 +origin 0 +order 0 +press 0 +isbn 0 +look 0 +refer 0 +editor 0 +current 0 +draft 0 +pleas 0 +keep 0 +mind 0 +work 0 +ongo 0 +andit 0 +document 0 +intend 0 +ongoingwork 0 +committe 0 +member 0 +compress 0 +postscript 0 +compressedtar 0 +individu 0 +avail 0 +would 0 +finger 0 +dept 0 +dayton 0 +phone 0 +messag 0 +desper 0 +mail 0 +http 0 +html 0 +offic 0 +statist 0 +build 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..ac4a6d2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,84 @@ +network 1 +design 1 +control 1 +virtual 1 +proceed 1 +lawrenc 1 +landweb 1 +comput 1 +univers 1 +wisconsin 1 +protocol 1 +high 1 +speed 1 +research 1 +project 1 +implement 1 +congest 1 +admiss 1 +infocom 1 +confer 1 +dynam 1 +time 1 +window 1 +faber 1 +mukherje 1 +loop 1 +home 0 +page 0 +professor 0 +scienc 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +email 0 +wisc 0 +purdu 0 +interest 0 +electronicmail 0 +summari 0 +program 0 +focus 0 +participatingin 0 +gigabit 0 +darpa 0 +nation 0 +involvesth 0 +testb 0 +oper 0 +atgigabit 0 +second 0 +data 0 +rate 0 +work 0 +onissu 0 +visualizationof 0 +atmospher 0 +phenomena 0 +conferenc 0 +sampl 0 +recent 0 +public 0 +fast 0 +circuit 0 +establishmentmethod 0 +olsen 0 +theieee 0 +francisco 0 +april 0 +packet 0 +feedback 0 +witht 0 +sigcommconfer 0 +baltimor 0 +august 0 +gener 0 +clock 0 +combin 0 +close 0 +open 0 +ieee 0 +florenc 0 +coursesconnect 0 +tabl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..cfa51e2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,49 @@ +comput 2 +scienc 1 +lloyd 1 +univers 1 +depart 1 +linguist 1 +shannon 1 +madison 1 +comp 1 +utah 1 +languag 1 +home 0 +page 0 +work 0 +address 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +wisc 0 +respons 0 +us 0 +lectur 0 +section 0 +offic 0 +hour 0 +wednesdai 0 +thursdai 0 +appoint 0 +fall 0 +cours 0 +construct 0 +compil 0 +variou 0 +link 0 +women 0 +chemistri 0 +person 0 +engin 0 +career 0 +servic 0 +archiv 0 +natur 0 +process 0 +artifici 0 +intellig 0 +cognit 0 +xsoft 0 +lexdemo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..0e190240 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,193 @@ +page 1 +option 1 +cool 1 +fill 1 +luka 1 +fall 1 +also 1 +mail 1 +list 1 +click 1 +entertain 1 +check 1 +thing 1 +lone 1 +meet 1 +women 1 +interact 1 +realli 1 +like 1 +know 1 +feel 1 +search 1 +checkbox 1 +includ 1 +christoph 0 +home 0 +pagechristoph 0 +lukasrelev 0 +inform 0 +offic 0 +phone 0 +email 0 +wisc 0 +edui 0 +appar 0 +coordin 0 +mspl 0 +workshipi 0 +defeat 0 +rival 0 +quest 0 +becom 0 +semest 0 +sunivers 0 +wisconsin 0 +program 0 +languag 0 +seminar 0 +czar 0 +cours 0 +go 0 +take 0 +festiv 0 +java 0 +taught 0 +advisor 0 +tuft 0 +univers 0 +site 0 +save 0 +tiger 0 +number 0 +free 0 +prisonerthi 0 +stock 0 +quoteserv 0 +maintain 0 +fabul 0 +wealth 0 +todd 0 +amus 0 +friend 0 +pagebet 0 +polit 0 +candid 0 +legal 0 +iowa 0 +electron 0 +market 0 +identitycaptain 0 +kirk 0 +sing 0 +gui 0 +troubl 0 +throughamaz 0 +technolog 0 +longer 0 +need 0 +concern 0 +withtri 0 +real 0 +virtual 0 +girlfriend 0 +traci 0 +teri 0 +wait 0 +wife 0 +incred 0 +jump 0 +catthi 0 +anywai 0 +well 0 +pleas 0 +send 0 +case 0 +someth 0 +current 0 +name 0 +address 0 +favorit 0 +appli 0 +killer 0 +buttmunchextrem 0 +dudemichael 0 +nesmith 0 +fanfoolmyth 0 +figurewick 0 +good 0 +basketbal 0 +playervalu 0 +studentment 0 +defectivea 0 +wkrp 0 +cincinatti 0 +tragic 0 +figuregeek 0 +tradesgonzo 0 +admirernetscap 0 +junki 0 +child 0 +pornpersonifi 0 +organ 0 +condom 0 +stretch 0 +much 0 +readi 0 +blowflam 0 +testicl 0 +outer 0 +space 0 +tast 0 +goodpoetri 0 +guruhogwildthi 0 +kick 0 +assman 0 +manbig 0 +dudeuh 0 +ohprofession 0 +muff 0 +diverregress 0 +higher 0 +lifeformherald 0 +alien 0 +invas 0 +forcechri 0 +html 0 +formsalienherpetophiletodd 0 +turnidg 0 +hatth 0 +mancreepi 0 +laugh 0 +headsmal 0 +planetdr 0 +companioneast 0 +bunnycyberweenietcl 0 +hellbeast 0 +simpli 0 +submit 0 +reload 0 +mayb 0 +figur 0 +automat 0 +keyword 0 +interest 0 +superhighwai 0 +drug 0 +cosmo 0 +irrit 0 +gross 0 +nake 0 +scatolog 0 +pervert 0 +offspr 0 +food 0 +etymolog 0 +phat 0 +gnarli 0 +bogu 0 +wierd 0 +cybermuffin 0 +pictur 0 +erotica 0 +chees 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..9a175d5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,84 @@ +comput 1 +univers 1 +scienc 1 +home 1 +page 1 +want 1 +wuhan 1 +dept 1 +ling 1 +zheng 1 +madison 1 +mail 1 +know 1 +click 1 +research 1 +paradyn 1 +take 1 +look 1 +alumni 1 +best 1 +graduat 1 +welcom 0 +depart 0 +sheboygan 0 +dayton 0 +offic 0 +phone 0 +lzheng 0 +wisc 0 +shameless 0 +self 0 +promot 0 +resum 0 +text 0 +version 0 +side 0 +interest 0 +assist 0 +group 0 +current 0 +hack 0 +onto 0 +hpux 0 +port 0 +boss 0 +barton 0 +miller 0 +also 0 +charg 0 +chinaand 0 +girlfriend 0 +pictur 0 +temporarili 0 +architectur 0 +educ 0 +prese 0 +winsconsin 0 +iowa 0 +officem 0 +marcelo 0 +goncalv 0 +ignor 0 +china 0 +place 0 +surf 0 +compani 0 +hereif 0 +school 0 +sthe 0 +infom 0 +could 0 +america 0 +schoolssend 0 +suggest 0 +homepag 0 +bother 0 +thank 0 +last 0 +updat 0 +march 0 +visitor 0 +number 0 +sinc 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..2c3d5acd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,44 @@ +know 1 +manuvir 1 +look 1 +like 1 +golf 1 +home 0 +pagemanuvir 0 +dasnow 0 +name 0 +andwhat 0 +hello 0 +feelfre 0 +around 0 +need 0 +inform 0 +somethingsend 0 +email 0 +passion 0 +anact 0 +photo 0 +later 0 +manuvirwhat 0 +gener 0 +start 0 +advisor 0 +better 0 +thisto 0 +keep 0 +monei 0 +come 0 +turn 0 +theorigin 0 +america 0 +team 0 +cours 0 +leagu 0 +plai 0 +dai 0 +sundai 0 +round 0 +final 0 +consin 0 +said 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..076e9289 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,394 @@ +comput 3 +hill 3 +mark 3 +architectur 2 +memori 2 +system 2 +wisconsin 2 +david 2 +wood 2 +parallel 2 +page 2 +research 2 +jame 2 +perform 2 +share 2 +laru 2 +intern 2 +sarita 2 +adv 2 +scienc 2 +hardwar 2 +june 2 +symposium 2 +cach 2 +wind 1 +tunnel 1 +isca 1 +home 1 +univers 1 +address 1 +project 1 +juli 1 +implement 1 +support 1 +confer 1 +talluri 1 +ieee 1 +transact 1 +august 1 +current 1 +inform 1 +recent 1 +engin 1 +program 1 +tabl 1 +oper 1 +shubhendu 1 +mukherje 1 +distribut 1 +madhusudhan 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +kessler 1 +model 1 +wisc 1 +offic 1 +like 1 +data 1 +advanc 1 +languag 1 +machin 1 +design 1 +level 1 +space 1 +interfac 1 +softwar 1 +supercomput 1 +subblock 1 +babak 1 +falsafi 1 +simul 1 +novemb 1 +sigmetr 1 +consist 1 +first 1 +employ 1 +email 1 +markhil 1 +associ 1 +professor 1 +content 1 +hour 1 +interest 1 +sampler 1 +us 1 +group 1 +tool 1 +wart 1 +patterson 1 +spec 1 +benchmark 1 +suit 1 +california 1 +berkelei 1 +high 1 +larg 1 +analysi 1 +requir 1 +work 1 +expect 1 +workstat 1 +process 1 +compil 1 +tempest 1 +madhu 1 +translat 1 +cluster 1 +base 1 +tlb 1 +experiment 1 +coher 1 +applic 1 +ann 1 +roger 1 +protocol 1 +superpag 1 +comparison 1 +trace 1 +sampl 1 +multi 1 +megabyt 1 +cooper 1 +multiprocessor 1 +lewi 1 +weak 1 +rice 1 +richard 1 +crai 1 +pagemark 0 +andelectr 0 +engineeringat 0 +wisconsint 0 +teach 0 +catalog 0 +educ 0 +andsummari 0 +paper 0 +graduateslink 0 +world 0 +wide 0 +stuff 0 +oralpresent 0 +advic 0 +includ 0 +show 0 +give 0 +talk 0 +onlin 0 +forcach 0 +proof 0 +sound 0 +depart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usamarkhil 0 +eduphon 0 +secretari 0 +fingerson 0 +thea 0 +sklenar 0 +department 0 +fall 0 +mondai 0 +wednesdai 0 +appoint 0 +educurr 0 +teachingfal 0 +ifal 0 +topic 0 +java 0 +icatalog 0 +cours 0 +teachc 0 +organ 0 +programmingc 0 +introduct 0 +architecturec 0 +iieduc 0 +michigan 0 +evaluationresearch 0 +summarymi 0 +target 0 +multiprocessorsand 0 +uniprocessor 0 +import 0 +determin 0 +sustain 0 +mywork 0 +emphas 0 +quantit 0 +often 0 +evaluationtechniqu 0 +much 0 +part 0 +windtunnel 0 +projectwith 0 +prof 0 +manystud 0 +futur 0 +massiv 0 +computerswil 0 +built 0 +node 0 +levelparallel 0 +inwhich 0 +uniformli 0 +refer 0 +seek 0 +todevelop 0 +consensu 0 +middl 0 +languagesand 0 +recentlypropos 0 +enabl 0 +programm 0 +librari 0 +messag 0 +pass 0 +transpar 0 +hybrid 0 +combin 0 +aredevelop 0 +think 0 +aclust 0 +hypothet 0 +platform 0 +name 0 +toolsto 0 +cull 0 +manner 0 +similarto 0 +aeronaut 0 +convent 0 +designairplan 0 +talluritarget 0 +improv 0 +lookasid 0 +buffer 0 +align 0 +option 0 +chang 0 +complet 0 +superpagesand 0 +partial 0 +asplosandsosppap 0 +papersth 0 +annot 0 +bibliographi 0 +unpublish 0 +manuscript 0 +revis 0 +frequent 0 +bidirect 0 +technolog 0 +transfer 0 +sabbat 0 +industri 0 +network 0 +fine 0 +grain 0 +commun 0 +anddavid 0 +optimist 0 +execut 0 +sashikanth 0 +chandrasekaran 0 +workshop 0 +pad 0 +yousef 0 +khalidi 0 +princip 0 +sosp 0 +decemb 0 +presidenti 0 +young 0 +investig 0 +award 0 +final 0 +report 0 +effici 0 +irregular 0 +shamik 0 +sharma 0 +joel 0 +saltz 0 +ppopp 0 +cost 0 +effect 0 +februari 0 +solv 0 +microstructur 0 +electrostat 0 +propos 0 +frank 0 +traenkl 0 +sangta 0 +chemic 0 +specif 0 +user 0 +ioanni 0 +schoina 0 +surpass 0 +less 0 +forprogram 0 +asplo 0 +octob 0 +evalu 0 +directori 0 +medium 0 +scale 0 +memorymultiprocessor 0 +techniqu 0 +scalabl 0 +toc 0 +new 0 +jeffrei 0 +dionisio 0 +pnevmatikato 0 +alan 0 +smith 0 +micro 0 +unifi 0 +formal 0 +four 0 +tpd 0 +implic 0 +toler 0 +fault 0 +andrea 0 +farid 0 +pour 0 +march 0 +mechan 0 +satish 0 +chandra 0 +subbarao 0 +palacharla 0 +virtual 0 +prototyp 0 +placement 0 +algorithm 0 +real 0 +index 0 +differ 0 +kourosh 0 +gharachorloo 0 +anoop 0 +gupta 0 +john 0 +hennessi 0 +journal 0 +tradeoff 0 +size 0 +shing 0 +kong 0 +detect 0 +race 0 +barton 0 +miller 0 +robert 0 +netzer 0 +scheme 0 +vikram 0 +mari 0 +vernon 0 +estim 0 +miss 0 +ratio 0 +kessleracm 0 +stack 0 +highli 0 +extend 0 +abstract 0 +sequenti 0 +order 0 +definit 0 +graduatesmadhusudhan 0 +hierarchi 0 +microsystem 0 +assist 0 +secondari 0 +click 0 +last 0 +updatedw 0 +keyword 0 +help 0 +search 0 +rank 0 +higher 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..d07107db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,18 @@ +marko 1 +page 1 +wisc 1 +home 0 +zaharioudaki 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +mail 0 +note 0 +construct 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..aebe31a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,13 @@ +home 1 +page 1 +michael 1 +birk 1 +section 1 +project 0 +list 0 +program 0 +languag 0 +link 0 +alltraxx 0 +mbirk 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..08f8d1f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,55 @@ +mcauliff 1 +mark 1 +solomon 1 +madison 1 +carei 1 +sigmod 1 +marvin 1 +proceed 1 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +wisc 0 +research 0 +interest 0 +design 0 +implement 0 +object 0 +orient 0 +databas 0 +system 0 +public 0 +dewitt 0 +franklin 0 +hall 0 +naughton 0 +schuh 0 +tsatalo 0 +white 0 +zwill 0 +shoringup 0 +persist 0 +applic 0 +proc 0 +atrac 0 +base 0 +simul 0 +pointer 0 +swizzl 0 +techniqu 0 +ieee 0 +data 0 +engin 0 +march 0 +michael 0 +towardseffect 0 +effici 0 +free 0 +space 0 +manag 0 +appear 0 +confer 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..9cd40014 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,116 @@ +marc 1 +shapiro 1 +wisc 1 +page 1 +tautolog 1 +languag 1 +fast 1 +pointer 1 +think 1 +program 1 +html 1 +madison 1 +hous 1 +believ 0 +current 0 +obsess 0 +fond 0 +disappear 0 +fear 0 +repuls 0 +ponder 0 +analys 0 +watch 0 +lot 0 +jacki 0 +chan 0 +movi 0 +design 0 +read 0 +much 0 +try 0 +teach 0 +elementari 0 +school 0 +student 0 +term 0 +recurs 0 +hope 0 +interrupt 0 +hoar 0 +wrote 0 +introduct 0 +high 0 +level 0 +step 0 +backward 0 +never 0 +recov 0 +home 0 +schedul 0 +todd 0 +automat 0 +accid 0 +gener 0 +elain 0 +dimasi 0 +twisti 0 +littl 0 +amanda 0 +peet 0 +retreather 0 +hyper 0 +mode 0 +emac 0 +thepul 0 +menu 0 +doesn 0 +cool 0 +tag 0 +submiss 0 +softwarei 0 +cobbl 0 +togeth 0 +pldi 0 +abl 0 +work 0 +nowinclud 0 +previous 0 +mostli 0 +miss 0 +file 0 +submit 0 +popl 0 +paper 0 +accur 0 +flow 0 +insensit 0 +point 0 +analysi 0 +shapiroand 0 +susan 0 +horwitz 0 +appear 0 +symposium 0 +principl 0 +variou 0 +address 0 +dept 0 +dayton 0 +mail 0 +talk 0 +finger 0 +marion 0 +list 0 +peopl 0 +know 0 +realli 0 +meet 0 +jonathan 0 +goldstein 0 +paul 0 +ferguson 0 +lawrenc 0 +brown 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..40a67e8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,22 @@ +offic 1 +home 1 +mellen 1 +wisc 1 +pagerob 0 +minimalist 0 +page 0 +last 0 +modifi 0 +august 0 +mellencamp 0 +taship 0 +introduct 0 +oper 0 +system 0 +email 0 +comput 0 +scienc 0 +build 0 +phone 0 +hour 0 +appoint 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..05f9bb50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,82 @@ +melski 1 +page 1 +also 1 +david 1 +person 1 +comput 1 +scienc 1 +madison 1 +work 1 +russian 1 +somedai 1 +info 0 +pagedavid 0 +melskicurr 0 +depart 0 +mill 0 +statisticsmadison 0 +dayton 0 +permen 0 +west 0 +ivesmarshfield 0 +michel 0 +awesom 0 +current 0 +construct 0 +sister 0 +kasei 0 +great 0 +home 0 +brother 0 +eric 0 +semest 0 +teach 0 +coupl 0 +section 0 +rep 0 +program 0 +languag 0 +myexact 0 +schedul 0 +still 0 +need 0 +determin 0 +undergrad 0 +major 0 +studiesher 0 +univers 0 +wisconsin 0 +even 0 +spent 0 +fall 0 +semesterof 0 +russia 0 +chanc 0 +often 0 +miss 0 +make 0 +back 0 +interest 0 +includ 0 +chess 0 +soccer 0 +recent 0 +beenbik 0 +distract 0 +numerousbook 0 +hasti 0 +rewrit 0 +want 0 +link 0 +tomapquest 0 +plan 0 +steal 0 +alot 0 +map 0 +second 0 +give 0 +direct 0 +marshfield 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..1758f48c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,203 @@ +milo 1 +martin 1 +comput 1 +interest 1 +scienc 1 +player 1 +java 1 +wisc 1 +gustavu 1 +adolphu 1 +colleg 1 +compil 1 +architectur 1 +program 1 +year 1 +system 1 +mani 1 +fink 1 +footbal 1 +plai 1 +game 1 +atlanti 1 +ultim 1 +home 1 +student 1 +offic 1 +charl 1 +advanc 1 +mark 1 +hill 1 +technolog 1 +oper 1 +publicationsresearch 1 +perform 1 +advis 1 +humm 1 +micklich 1 +evalu 1 +illicitsubst 1 +detect 1 +fast 1 +neutron 1 +hailperin 1 +next 1 +softwar 1 +direct 1 +quot 1 +page 1 +live 1 +minnesota 1 +land 1 +explor 1 +rule 1 +everyon 1 +pagemilo 0 +graduat 0 +teach 0 +assistantcomput 0 +departmentunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaemail 0 +eduoffic 0 +phone 0 +hour 0 +tuesdai 0 +thursdai 0 +byappointmentba 0 +class 0 +construct 0 +fischer 0 +sit 0 +jame 0 +larusteach 0 +algebra 0 +languag 0 +section 0 +research 0 +interestsi 0 +first 0 +programminglanguag 0 +specif 0 +optim 0 +beinfluenc 0 +hardwar 0 +mobil 0 +addit 0 +challeng 0 +present 0 +design 0 +thing 0 +even 0 +know 0 +summer 0 +argonn 0 +nation 0 +laboratori 0 +develop 0 +divis 0 +view 0 +reconstruct 0 +paramet 0 +us 0 +transmiss 0 +spectroscopi 0 +ieee 0 +nuclear 0 +symposium 0 +medic 0 +imag 0 +confer 0 +yule 0 +sagalovski 0 +techniqu 0 +nucl 0 +inst 0 +meth 0 +school 0 +languageflex 0 +determinist 0 +dynam 0 +parallel 0 +senior 0 +honor 0 +thesi 0 +mathemat 0 +depart 0 +postscript 0 +resourc 0 +compani 0 +found 0 +anintern 0 +scientif 0 +educ 0 +organ 0 +dedic 0 +toadvanc 0 +engin 0 +applic 0 +informationtechnolog 0 +serv 0 +profession 0 +public 0 +fosteringth 0 +open 0 +interchang 0 +inform 0 +promot 0 +highestprofession 0 +ethic 0 +standard 0 +person 0 +bignfl 0 +sinc 0 +myfavorit 0 +team 0 +vike 0 +eventhough 0 +chees 0 +head 0 +colon 0 +conquer 0 +multi 0 +mail 0 +space 0 +combat 0 +wrote 0 +babylon 0 +best 0 +show 0 +imho 0 +email 0 +mythic 0 +world 0 +build 0 +armi 0 +engaug 0 +trade 0 +fight 0 +wonder 0 +monster 0 +train 0 +wizard 0 +discov 0 +underworld 0 +right 0 +current 0 +list 0 +frisbe 0 +associ 0 +combin 0 +element 0 +ofsocc 0 +basketbal 0 +pace 0 +afrisbe 0 +quarterback 0 +receiv 0 +ultimatein 0 +simpl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..4026ab4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,108 @@ +comput 2 +mino 1 +home 1 +page 1 +madison 1 +databas 1 +garofalaki 1 +research 1 +resourc 1 +parallel 1 +queri 1 +scienc 1 +schedul 1 +yanni 1 +wisconsin 1 +interest 1 +multimedia 1 +system 1 +optim 1 +univers 1 +dept 1 +decemb 1 +patra 1 +june 1 +public 1 +ioannidi 1 +sigmod 1 +paper 1 +postscript 1 +technic 1 +report 1 +garofalakismino 0 +wisc 0 +eduphd 0 +candid 0 +assist 0 +depart 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usaoffic 0 +stat 0 +phone 0 +workresearch 0 +effect 0 +manag 0 +complex 0 +process 0 +algorithm 0 +theoryeduc 0 +engin 0 +informat 0 +refere 0 +multi 0 +dimension 0 +proceed 0 +confer 0 +montreal 0 +canada 0 +abstract 0 +issu 0 +survei 0 +enhanc 0 +view 0 +continu 0 +media 0 +banu 0 +ozden 0 +silberschatz 0 +submit 0 +octob 0 +model 0 +check 0 +sequenti 0 +probabilist 0 +real 0 +time 0 +technolog 0 +institut 0 +februari 0 +advisor 0 +ioannidismor 0 +feel 0 +free 0 +peek 0 +resum 0 +pointer 0 +stuff 0 +dbm 0 +reasearch 0 +hellen 0 +societi 0 +vldb 0 +almaden 0 +center 0 +watson 0 +centerdr 0 +michael 0 +bibliograpi 0 +server 0 +logic 0 +program 0 +perpetu 0 +construct 0 +last 0 +updat 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..81cc2eed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,21 @@ +madison 1 +marcelo 1 +gonalv 1 +comput 1 +depart 1 +phone 1 +mjrg 0 +wisc 0 +associ 0 +research 0 +paradyn 0 +project 0 +addresswork 0 +home 0 +scienc 0 +sheboygan 0 +west 0 +dayton 0 +street 0 +sciencesunivers 0 +wisconsin 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..52ed981b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,35 @@ +brian 1 +morgan 1 +wisconsin 1 +conferenc 1 +home 0 +page 0 +morgangradu 0 +studentcomput 0 +scienc 0 +depart 0 +univers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +mail 0 +wisc 0 +telephon 0 +advisor 0 +chuck 0 +dyerresearch 0 +interestsvirtu 0 +system 0 +imag 0 +compress 0 +video 0 +high 0 +bandwidth 0 +network 0 +relat 0 +link 0 +interest 0 +comput 0 +vision 0 +group 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..f2ae44f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,121 @@ +comput 1 +wisconsin 1 +work 1 +processor 1 +support 1 +univers 1 +andrea 1 +page 1 +multiscalar 1 +architectur 1 +data 1 +specul 1 +download 1 +postscript 1 +interest 1 +scienc 1 +crete 1 +greec 1 +greek 1 +mani 1 +moshovo 0 +home 0 +moshovosresearch 0 +assist 0 +depart 0 +sciencesunivers 0 +madisonadvisor 0 +guri 0 +sohigroup 0 +kestrel 0 +address 0 +leav 0 +notese 0 +aroundw 0 +peek 0 +futur 0 +clickheremi 0 +brother 0 +write 0 +poetri 0 +click 0 +herefor 0 +sampl 0 +current 0 +depend 0 +technic 0 +report 0 +compress 0 +uncompress 0 +talk 0 +slide 0 +load 0 +balanc 0 +gener 0 +instruct 0 +level 0 +parallel 0 +compil 0 +explot 0 +vlsi 0 +fall 0 +spring 0 +graduat 0 +student 0 +thecour 0 +instituteof 0 +york 0 +earn 0 +degre 0 +sinc 0 +transfer 0 +howev 0 +theopportun 0 +excel 0 +peopl 0 +meet 0 +wife 0 +implement 0 +numer 0 +algorithm 0 +access 0 +decoupl 0 +architecturethat 0 +softwar 0 +pipelin 0 +advisor 0 +kateveni 0 +short 0 +descript 0 +found 0 +viha 0 +like 0 +editor 0 +edit 0 +link 0 +hellen 0 +resouc 0 +network 0 +sure 0 +visit 0 +obtain 0 +instal 0 +font 0 +local 0 +copi 0 +resid 0 +atwww 0 +hyper 0 +devil 0 +dictionari 0 +bookmark 0 +mess 0 +nation 0 +fraud 0 +inform 0 +centerusenet 0 +chang 0 +want 0 +send 0 +afax 0 +free 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..f0104ef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,38 @@ +page 1 +updat 1 +toni 1 +chang 1 +contact 1 +home 0 +pagewhat 0 +newoctob 0 +back 0 +inmadison 0 +sever 0 +background 0 +black 0 +better 0 +contrast 0 +inform 0 +minor 0 +variou 0 +list 0 +older 0 +prefer 0 +keep 0 +main 0 +brief 0 +herear 0 +link 0 +second 0 +level 0 +navig 0 +index 0 +friend 0 +favorit 0 +interest 0 +informationlast 0 +modifi 0 +octob 0 +wisc 0 +educopyright 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..ca033faf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,151 @@ +coke 1 +wisconsin 1 +martin 1 +ream 1 +page 1 +poobah 1 +comput 1 +scienc 1 +interest 1 +databas 1 +design 1 +student 1 +madison 1 +telephon 1 +dept 1 +mream 1 +wisc 1 +compil 1 +exam 1 +year 1 +gone 1 +want 1 +even 1 +thing 1 +might 1 +home 0 +graduat 0 +teach 0 +assist 0 +also 0 +finger 0 +machin 0 +departmentunivers 0 +dayton 0 +offic 0 +email 0 +edufal 0 +scheduleresearch 0 +particular 0 +digit 0 +terrain 0 +model 0 +tin 0 +program 0 +languag 0 +logic 0 +logicprogram 0 +qualifi 0 +spring 0 +previou 0 +softwar 0 +develop 0 +product 0 +orientedenviron 0 +exploit 0 +educ 0 +interestsin 0 +resum 0 +postscriptand 0 +html 0 +distribut 0 +affili 0 +mathemat 0 +wesleyan 0 +univers 0 +faint 0 +heart 0 +section 0 +alink 0 +senior 0 +honorsthesi 0 +gener 0 +unif 0 +poobahlook 0 +work 0 +mighti 0 +afraid 0 +dear 0 +tomi 0 +head 0 +usual 0 +realli 0 +talk 0 +tosomeon 0 +better 0 +adjust 0 +crucial 0 +role 0 +life 0 +youshould 0 +probabl 0 +elton 0 +doesn 0 +mention 0 +imaginethat 0 +besid 0 +aforement 0 +poobahship 0 +mental 0 +ill 0 +afew 0 +know 0 +third 0 +yeargradu 0 +depart 0 +concentr 0 +indatabas 0 +current 0 +studi 0 +qual 0 +sometim 0 +inearli 0 +februari 0 +exercis 0 +relax 0 +plai 0 +squash 0 +reason 0 +well 0 +round 0 +ultim 0 +frisbe 0 +summer 0 +basketbal 0 +poorli 0 +andinfrequ 0 +notic 0 +rapidlyrid 0 +mountain 0 +bike 0 +around 0 +campu 0 +chilliest 0 +weather 0 +alwai 0 +helmet 0 +wish 0 +learn 0 +feel 0 +free 0 +examin 0 +mynot 0 +often 0 +updat 0 +hierarchi 0 +stuff 0 +ilik 0 +enjoi 0 +line 0 +librarylast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..3adfbafa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,34 @@ +wisconsin 1 +multiscalar 1 +project 1 +comput 1 +architectur 1 +inform 1 +sohi 1 +home 0 +pagewisconsin 0 +technic 0 +paper 0 +talk 0 +given 0 +peopl 0 +contributor 0 +fund 0 +sourc 0 +relat 0 +avail 0 +softwar 0 +group 0 +scienc 0 +departmentat 0 +univers 0 +world 0 +wide 0 +interest 0 +local 0 +user 0 +last 0 +updat 0 +februari 0 +guri 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..17de8273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,134 @@ +depart 1 +comput 1 +scienc 1 +steel 1 +maryland 1 +oper 1 +system 1 +univers 1 +come 1 +mail 1 +wisc 1 +class 1 +wisconsin 1 +around 1 +introduct 1 +friend 1 +favorit 1 +usenet 1 +frequent 1 +ask 1 +question 1 +list 1 +mike 0 +homepagemik 0 +homepagemsteel 0 +eduoffic 0 +comp 0 +stat 0 +build 0 +sit 0 +univ 0 +struggl 0 +undergradu 0 +sometimearound 0 +april 0 +note 0 +time 0 +stamp 0 +lower 0 +right 0 +corner 0 +sai 0 +folk 0 +graduat 0 +student 0 +madison 0 +school 0 +motto 0 +freezein 0 +land 0 +chees 0 +research 0 +studi 0 +interest 0 +center 0 +artificialintellig 0 +vision 0 +hope 0 +narrow 0 +year 0 +semest 0 +graduateinstructor 0 +section 0 +algebra 0 +languag 0 +program 0 +receiv 0 +bachelor 0 +degre 0 +theunivers 0 +scomput 0 +publicationsgrindston 0 +test 0 +suit 0 +parallel 0 +perform 0 +tool 0 +jefferyk 0 +hollingsworth 0 +michael 0 +technic 0 +reportc 0 +gzip 0 +postscriptfil 0 +semesterc 0 +mari 0 +vernonc 0 +artifici 0 +intellig 0 +chuck 0 +dyermi 0 +pagesinform 0 +gettingin 0 +touch 0 +back 0 +marylandwhom 0 +forgot 0 +address 0 +linksmi 0 +sport 0 +teamssom 0 +dave 0 +barri 0 +listth 0 +billi 0 +joel 0 +listi 0 +also 0 +administr 0 +thefruit 0 +still 0 +host 0 +ofmaryland 0 +start 0 +insidejok 0 +offic 0 +hand 0 +sometim 0 +andnow 0 +member 0 +world 0 +wide 0 +predat 0 +kill 0 +someinfrar 0 +photo 0 +know 0 +looklik 0 +infrar 0 +pictur 0 +memik 0 +steelemsteel 0 +eduunivers 0 +madisoncomput 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..a927f8d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,16 @@ +home 1 +visit 1 +univers 1 +maria 0 +pagemaria 0 +pagehow 0 +maryland 0 +colleg 0 +park 0 +mayb 0 +wisconsin 0 +madison 0 +section 0 +might 0 +want 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..82ed639a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,79 @@ +venezuela 1 +barquisimeto 1 +like 1 +naim 1 +work 1 +research 1 +guitar 1 +oscar 0 +home 0 +page 0 +bienvenido 0 +staff 0 +member 0 +paradyn 0 +project 0 +finish 0 +myph 0 +univers 0 +southampton 0 +england 0 +undergrad 0 +universidad 0 +simon 0 +bolivar 0 +caraca 0 +born 0 +beauti 0 +citi 0 +barquisimetoi 0 +locat 0 +central 0 +western 0 +part 0 +popul 0 +ofabout 0 +million 0 +peopl 0 +also 0 +known 0 +music 0 +capit 0 +main 0 +area 0 +perform 0 +analysi 0 +visual 0 +parallel 0 +program 0 +howev 0 +apart 0 +playclass 0 +fact 0 +studi 0 +year 0 +excellentmaestro 0 +rodrigo 0 +riera 0 +antonio 0 +lauro 0 +spend 0 +time 0 +plai 0 +read 0 +good 0 +book 0 +sherlock 0 +holm 0 +stori 0 +cook 0 +watch 0 +basebal 0 +beati 0 +pictur 0 +pleaseclick 0 +finger 0 +wisc 0 +check 0 +around 0 +mundo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..1503b6fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,39 @@ +home 1 +anastassia 1 +ailamaki 1 +madison 1 +realli 1 +welcom 0 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +phone 0 +want 0 +pictur 0 +import 0 +notic 0 +find 0 +time 0 +make 0 +decent 0 +page 0 +nice 0 +link 0 +georg 0 +rochest 0 +alex 0 +guid 0 +greek 0 +islandsar 0 +worth 0 +visit 0 +send 0 +mail 0 +natassa 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..a92082e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,99 @@ +databas 1 +object 1 +system 1 +parallel 1 +algorithm 1 +relat 1 +dimension 1 +data 1 +analysi 1 +current 1 +techniqu 1 +perform 1 +includ 1 +storag 1 +benchmark 1 +dbm 1 +public 1 +multidimension 1 +prasad 1 +deshpand 1 +intern 1 +confer 1 +larg 1 +mumbai 1 +bombai 1 +jeffrei 0 +naughtonjeffrei 0 +naughtonnaughton 0 +wisc 0 +eduresearch 0 +interestsolap 0 +multi 0 +relationaldbm 0 +overal 0 +goal 0 +research 0 +develop 0 +ofdatabas 0 +surpass 0 +inperform 0 +eas 0 +three 0 +main 0 +area 0 +ofinterest 0 +improv 0 +ofmulti 0 +arrai 0 +base 0 +andprocess 0 +indic 0 +computingth 0 +cube 0 +valuedattribut 0 +workload 0 +spatial 0 +inform 0 +recent 0 +comput 0 +aggreg 0 +withsameet 0 +agarw 0 +rakesh 0 +agraw 0 +ashish 0 +gupta 0 +raghu 0 +ramakrishnan 0 +sunita 0 +sarawagi 0 +proceed 0 +thend 0 +estim 0 +aggregatesin 0 +presenc 0 +hierarchi 0 +amit 0 +shukla 0 +karthikeyan 0 +ramasami 0 +bucki 0 +michael 0 +carei 0 +david 0 +dewitt 0 +johann 0 +gerhk 0 +dhaval 0 +shah 0 +moham 0 +asgarian 0 +prepar 0 +toward 0 +molap 0 +withyihong 0 +zhao 0 +kristin 0 +tuft 0 +submit 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..354ccb26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,39 @@ +navin 1 +kabranavin 0 +kabragradu 0 +student 0 +depart 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madisonadvisor 0 +david 0 +dewittresearch 0 +area 0 +databas 0 +research 0 +interest 0 +customiz 0 +queri 0 +optim 0 +paradis 0 +project 0 +plan 0 +address 0 +noth 0 +better 0 +explor 0 +bookmark 0 +could 0 +look 0 +indian 0 +stuff 0 +includ 0 +among 0 +thing 0 +archiv 0 +hindi 0 +song 0 +wisc 0 +public 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..5451e89b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,28 @@ +newhal 1 +research 1 +perform 1 +newhalltia 0 +wisc 0 +graduat 0 +student 0 +dayton 0 +madison 0 +telephon 0 +interest 0 +parallel 0 +distribut 0 +system 0 +tool 0 +scalabl 0 +analysi 0 +predict 0 +java 0 +group 0 +paradynadvisor 0 +bart 0 +millermummi 0 +pictur 0 +guanajuato 0 +last 0 +chang 0 +august 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..2821c9ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,16 @@ +madison 1 +nanci 0 +hallcomput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +project 0 +shore 0 +scalabl 0 +heterogen 0 +object 0 +repositori 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..6aaa9bb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,207 @@ +mathemat 3 +program 3 +report 3 +mangasarian 3 +technic 3 +comput 2 +optim 2 +revis 2 +problem 2 +august 2 +scienc 1 +complementar 1 +juli 1 +machin 1 +learn 1 +linear 1 +street 1 +minim 1 +novemb 1 +olvi 1 +decemb 1 +page 1 +univers 1 +wisconsin 1 +parallel 1 +research 1 +converg 1 +smooth 1 +nonlinear 1 +cancer 1 +system 1 +bradlei 1 +method 1 +octob 1 +submit 1 +neural 1 +inform 1 +journal 1 +chunhui 1 +chen 1 +view 1 +group 1 +home 1 +wisc 1 +solv 1 +aspect 1 +bound 1 +inequ 1 +gener 1 +breast 1 +current 1 +public 1 +function 1 +constraint 1 +convex 1 +concav 1 +process 1 +wolberg 1 +januari 1 +editor 1 +proceed 1 +nick 1 +april 1 +appear 1 +applic 1 +june 1 +misclassif 1 +februari 1 +advanc 1 +septemb 1 +global 1 +solodov 1 +backpropag 1 +perturb 1 +paper 1 +john 0 +neumann 0 +professor 0 +member 0 +center 0 +depart 0 +dayton 0 +madison 0 +telephon 0 +email 0 +harvard 0 +interest 0 +summari 0 +theori 0 +rich 0 +effectivecomputation 0 +mani 0 +real 0 +life 0 +interestsin 0 +topic 0 +rang 0 +broad 0 +spectrum 0 +encompassestheoret 0 +error 0 +programsand 0 +variat 0 +proof 0 +parallelgradi 0 +variabl 0 +distribut 0 +algorithm 0 +techniqu 0 +constrain 0 +problemsa 0 +differenti 0 +equat 0 +well 0 +applicationsto 0 +specif 0 +context 0 +animport 0 +programmingtechniqu 0 +diagnos 0 +result 0 +ahighli 0 +accur 0 +computer 0 +diagnost 0 +useat 0 +hospit 0 +student 0 +paul 0 +recent 0 +solodova 0 +linearli 0 +descent 0 +strongli 0 +monotonecomplementar 0 +jong 0 +pangexact 0 +penalti 0 +programswith 0 +mangasarianmathemat 0 +data 0 +miningmathemat 0 +mangasarianerror 0 +nondifferenti 0 +strong 0 +slater 0 +qualif 0 +cluster 0 +individu 0 +collect 0 +prognost 0 +predict 0 +featur 0 +select 0 +polyhedr 0 +appli 0 +festschrift 0 +klau 0 +ritter 0 +fischer 0 +riedmuel 0 +schaeffler 0 +physica 0 +verlag 0 +germani 0 +pose 0 +siam 0 +internationalsymposium 0 +baltimor 0 +improv 0 +toler 0 +train 0 +workshop 0 +eric 0 +plenum 0 +press 0 +hybrid 0 +siag 0 +new 0 +class 0 +mix 0 +diagnosi 0 +prognosi 0 +oper 0 +separ 0 +bilinear 0 +determinist 0 +nonmonoton 0 +cowan 0 +tesauro 0 +alspector 0 +morgan 0 +kaufmann 0 +publish 0 +francisco 0 +california 0 +inequalitiesand 0 +serial 0 +net 0 +vianonmonoton 0 +minimn 0 +softwar 0 +chronolog 0 +bibliographi 0 +download 0 +period 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..b2a5726b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,421 @@ +cancer 2 +diagnosi 2 +comput 2 +breast 2 +mangasarian 2 +wolberg 2 +us 2 +page 2 +program 2 +imag 2 +abstract 2 +prognosi 2 +research 2 +street 2 +predict 2 +learn 2 +case 2 +malign 2 +time 2 +machin 1 +result 1 +sampl 1 +probabl 1 +patient 1 +new 1 +linear 1 +featur 1 +approxim 1 +valu 1 +recurr 1 +surviv 1 +medic 1 +march 1 +base 1 +work 1 +relat 1 +diagnos 1 +data 1 +analysi 1 +slide 1 +nuclei 1 +nuclear 1 +benign 1 +diseas 1 +free 1 +prognost 1 +cytolog 1 +biopsi 1 +wisconsin 1 +madison 1 +scienc 1 +depart 1 +surgeri 1 +oncolog 1 +provid 1 +link 1 +needl 1 +aspir 1 +construct 1 +separ 1 +known 1 +xcyt 1 +extrem 1 +year 1 +curv 1 +april 1 +report 1 +univers 1 +januari 1 +variou 1 +appli 1 +collabor 1 +human 1 +american 1 +citat 1 +popular 1 +press 1 +local 1 +mass 1 +fine 1 +nine 1 +characterist 1 +setiono 1 +method 1 +pattern 1 +small 1 +system 1 +process 1 +differenti 1 +user 1 +isol 1 +individu 1 +boundari 1 +ofeach 1 +approach 1 +textur 1 +mean 1 +train 1 +plane 1 +allow 1 +shown 1 +segment 1 +recognit 1 +algorithm 1 +pleas 1 +wisc 1 +problem 1 +term 1 +surfac 1 +plot 1 +ofdiseas 1 +repres 1 +factor 1 +lymph 1 +node 1 +could 1 +paper 1 +postscript 1 +proceed 1 +siam 1 +analyt 1 +quantit 1 +histolog 1 +heisei 1 +deriv 1 +august 1 +mathemat 1 +technic 1 +intern 1 +confer 1 +aid 1 +submit 1 +medicin 1 +milwauke 1 +hunt 1 +journal 1 +interpret 1 +group 1 +biomed 1 +prognosismachin 0 +prognosisthi 0 +describ 0 +learningapproach 0 +ofbreast 0 +theunivers 0 +betweenprof 0 +olvi 0 +anddr 0 +william 0 +wolbergof 0 +copi 0 +thepress 0 +releas 0 +distribut 0 +societi 0 +writer 0 +seminar 0 +inmarch 0 +good 0 +overview 0 +tabl 0 +content 0 +bibliographi 0 +linksdiagnosisthi 0 +grew 0 +desir 0 +accur 0 +diagnosebreast 0 +sole 0 +heidentifi 0 +visual 0 +assess 0 +consideredrelev 0 +prof 0 +andtwo 0 +graduat 0 +student 0 +rudi 0 +kristin 0 +bennett 0 +aclassifi 0 +multisurfac 0 +thatsuccessfulli 0 +iswel 0 +began 0 +addit 0 +nick 0 +streetto 0 +team 0 +goal 0 +adigit 0 +section 0 +ofthi 0 +consolid 0 +softwar 0 +current 0 +clinicalpractic 0 +perform 0 +follow 0 +taken 0 +materi 0 +thenmount 0 +microscop 0 +stain 0 +highlight 0 +cellularnuclei 0 +portion 0 +cell 0 +arewel 0 +scan 0 +digit 0 +camera 0 +afram 0 +grabber 0 +board 0 +mous 0 +pointer 0 +draw 0 +nucleu 0 +vision 0 +snake 0 +converg 0 +exact 0 +interact 0 +take 0 +five 0 +minut 0 +showingxcyt 0 +thisfas 0 +measur 0 +size 0 +shape 0 +standarderror 0 +total 0 +classifi 0 +wasconstruct 0 +thisclassifi 0 +consist 0 +singl 0 +space 0 +threeof 0 +area 0 +smooth 0 +project 0 +onto 0 +thenorm 0 +densiti 0 +ofth 0 +point 0 +simpl 0 +bayesiancomput 0 +thesedens 0 +judg 0 +confid 0 +comparison 0 +hundr 0 +previou 0 +date 0 +correctli 0 +consecut 0 +newpati 0 +eight 0 +didxcyt 0 +return 0 +suspici 0 +estimatedprob 0 +subset 0 +sourc 0 +found 0 +goodtest 0 +object 0 +petsegment 0 +automat 0 +identifi 0 +inthes 0 +email 0 +togeth 0 +prognosisth 0 +second 0 +consid 0 +long 0 +behavior 0 +haveapproach 0 +function 0 +inputfeatur 0 +includ 0 +atim 0 +right 0 +censor 0 +solut 0 +util 0 +linearprogram 0 +fornew 0 +examin 0 +actual 0 +caseswith 0 +similar 0 +anindividu 0 +capabl 0 +incorpor 0 +intoxcyt 0 +exampl 0 +versu 0 +black 0 +ourorigin 0 +studi 0 +particular 0 +thereforeha 0 +averag 0 +freeafter 0 +equal 0 +procedur 0 +also 0 +compar 0 +power 0 +ofvari 0 +indic 0 +precis 0 +detail 0 +inform 0 +type 0 +xcytgiv 0 +better 0 +accuraci 0 +tradit 0 +tumors 0 +statu 0 +corrobor 0 +remov 0 +need 0 +often 0 +pain 0 +axillari 0 +chronolog 0 +bibliographylink 0 +format 0 +viewer 0 +download 0 +file 0 +shift 0 +click 0 +netscap 0 +print 0 +ascii 0 +text 0 +obtain 0 +notlink 0 +contact 0 +first 0 +author 0 +theori 0 +applic 0 +workshop 0 +larg 0 +scale 0 +numer 0 +optim 0 +philadelphia 0 +distinguish 0 +patholog 0 +grade 0 +oper 0 +juli 0 +avail 0 +induct 0 +twelfth 0 +priediti 0 +russel 0 +morgan 0 +kaufmann 0 +teagu 0 +call 0 +indetermin 0 +collect 0 +icml 0 +aaai 0 +prime 0 +without 0 +friend 0 +todai 0 +detect 0 +imit 0 +prospect 0 +man 0 +sentinel 0 +analyz 0 +detroit 0 +high 0 +tech 0 +marilynn 0 +marchion 0 +computer 0 +progress 0 +ruth 0 +sorel 0 +houston 0 +chronicl 0 +improv 0 +suggest 0 +replac 0 +surgic 0 +associ 0 +perspect 0 +column 0 +june 0 +cope 0 +septemb 0 +octob 0 +seek 0 +capit 0 +angel 0 +schooloth 0 +nation 0 +librari 0 +nevada 0 +center 0 +model 0 +oncolink 0 +washington 0 +institut 0 +paulb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..5977a6f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,160 @@ +point 2 +separ 2 +plane 2 +linear 2 +program 2 +method 2 +set 2 +page 1 +region 1 +pattern 1 +dimension 1 +parallel 1 +mathemat 1 +optim 1 +us 1 +surfac 1 +also 1 +multisurfac 1 +contain 1 +tree 1 +implement 1 +mangasarian 1 +linearli 1 +euclidean 1 +space 1 +follow 1 +misclassifi 1 +assign 1 +node 1 +neural 1 +network 1 +bennett 1 +describ 1 +comput 1 +scienc 1 +approach 1 +construct 1 +nonlinear 1 +paramet 1 +disjoint 1 +finit 1 +togeth 1 +euclideanspac 1 +sequenc 1 +minim 1 +li 1 +side 1 +mostli 1 +gener 1 +view 1 +decis 1 +split 1 +artifici 1 +packag 1 +matlab 1 +paul 1 +bradlei 1 +programmingpattern 0 +programmingthi 0 +work 0 +section 0 +univers 0 +wisconsin 0 +madison 0 +depart 0 +brief 0 +histori 0 +outlinemathemat 0 +particular 0 +long 0 +problem 0 +whenev 0 +appear 0 +quadrat 0 +polynomi 0 +formul 0 +howev 0 +could 0 +failon 0 +avoid 0 +difficulti 0 +choos 0 +close 0 +discard 0 +repeat 0 +process 0 +variant 0 +develop 0 +goal 0 +todetermin 0 +determin 0 +averag 0 +distanc 0 +similarli 0 +stop 0 +otherwis 0 +anoth 0 +error 0 +eachnod 0 +best 0 +reach 0 +found 0 +solv 0 +branch 0 +thesam 0 +procedur 0 +appli 0 +oneset 0 +astrain 0 +hidden 0 +layer 0 +shown 0 +learn 0 +concept 0 +well 0 +better 0 +traditionallearn 0 +cart 0 +advantag 0 +backpropag 0 +inthat 0 +train 0 +proce 0 +much 0 +faster 0 +mino 0 +numer 0 +nick 0 +street 0 +kristin 0 +descript 0 +file 0 +requir 0 +chronolog 0 +bibliographi 0 +oper 0 +research 0 +june 0 +ieee 0 +transact 0 +inform 0 +theori 0 +novemb 0 +proceed 0 +midwest 0 +intellig 0 +cognit 0 +societi 0 +confer 0 +robust 0 +discrimin 0 +insepar 0 +softwar 0 +orsa 0 +journal 0 +fall 0 +last 0 +modifi 0 +paulb 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..909603e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,84 @@ +paradyn 2 +project 2 +tool 1 +parallel 1 +arpa 1 +page 1 +perform 1 +meet 1 +csto 1 +super 1 +comput 1 +program 1 +presentationthi 1 +present 1 +made 1 +wisc 1 +home 0 +releas 0 +informationthi 0 +contain 0 +describ 0 +copi 0 +ofreleas 0 +goalsth 0 +explor 0 +newapproach 0 +build 0 +scalabl 0 +technic 0 +paper 0 +manualsstatu 0 +reporta 0 +recent 0 +statu 0 +report 0 +blizzard 0 +panel 0 +insan 0 +antonio 0 +symbol 0 +tabl 0 +inflorida 0 +effort 0 +develop 0 +common 0 +access 0 +routin 0 +tocompil 0 +gener 0 +inform 0 +us 0 +high 0 +level 0 +parallellanguag 0 +staff 0 +postera 0 +hypertext 0 +version 0 +poster 0 +relat 0 +elsewher 0 +spdt 0 +sigmetr 0 +symposium 0 +distribut 0 +toolsyou 0 +also 0 +restaur 0 +includ 0 +temporari 0 +placehold 0 +contact 0 +informationparadyn 0 +projectdepart 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +edufax 0 +last 0 +modifi 0 +bart 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..ce35b314 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,32 @@ +parker 1 +home 1 +page 1 +steven 1 +wisconsin 1 +madison 1 +prism 1 +graduat 0 +student 0 +comput 0 +scienc 0 +depart 0 +univers 0 +west 0 +dayton 0 +street 0 +offic 0 +wisc 0 +depth 0 +area 0 +numer 0 +analysi 0 +employ 0 +projectfal 0 +schedul 0 +math 0 +relat 0 +link 0 +send 0 +mail 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..fb88d9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,90 @@ +wisconsin 2 +program 1 +comput 1 +univers 1 +mathemat 1 +street 1 +page 1 +paul 1 +scienc 1 +depart 1 +madison 1 +abstract 1 +bradlei 1 +paulb 1 +wisc 1 +fish 1 +us 1 +work 1 +nick 1 +postscript 1 +mangasarian 1 +technic 1 +report 1 +inform 1 +home 0 +bradleygradu 0 +student 0 +eduoffic 0 +csphone 0 +advisor 0 +mangasarianinterestsmathemat 0 +programmingmachin 0 +learningfli 0 +interest 0 +techniqu 0 +specif 0 +nonlinear 0 +linear 0 +induct 0 +learn 0 +summari 0 +currentlyb 0 +done 0 +area 0 +pleas 0 +madisonmathemat 0 +thiswork 0 +guid 0 +professor 0 +olvimangasarian 0 +publicationsal 0 +paper 0 +store 0 +format 0 +ascii 0 +text 0 +viewer 0 +download 0 +file 0 +shift 0 +click 0 +netscap 0 +print 0 +featur 0 +select 0 +decemb 0 +revis 0 +march 0 +submit 0 +journal 0 +cluster 0 +concav 0 +minim 0 +accept 0 +present 0 +neural 0 +process 0 +system 0 +picksthes 0 +site 0 +backcountri 0 +grate 0 +dead 0 +nasa 0 +frog 0 +espnet 0 +timesfax 0 +uroullett 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..7cb99173 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,117 @@ +develop 1 +scout 1 +internet 1 +devri 1 +tool 1 +specialist 1 +comput 1 +scienc 1 +intern 1 +everyth 1 +articl 1 +work 1 +molecularbiolog 1 +resourc 1 +fortun 1 +prof 1 +imag 1 +embryo 1 +journal 1 +site 1 +microscop 1 +pete 0 +home 0 +page 0 +peter 0 +room 0 +westdayton 0 +madison 0 +pdevri 0 +wisc 0 +mean 0 +isthat 0 +read 0 +technolog 0 +make 0 +sens 0 +andthen 0 +write 0 +overview 0 +toolkit 0 +great 0 +thing 0 +iread 0 +think 0 +anywai 0 +rather 0 +foolish 0 +topai 0 +tell 0 +eric 0 +hazen 0 +alsoprovid 0 +technic 0 +webmast 0 +servic 0 +group 0 +although 0 +excel 0 +help 0 +system 0 +folksat 0 +recent 0 +join 0 +team 0 +laboratori 0 +integr 0 +microscopi 0 +biomed 0 +nearli 0 +eight 0 +year 0 +seancarrol 0 +techniqu 0 +creat 0 +multipl 0 +label 0 +confoc 0 +basic 0 +cool 0 +look 0 +lotof 0 +book 0 +cover 0 +also 0 +molecular 0 +biologi 0 +johnwhit 0 +rest 0 +imrstaff 0 +receiv 0 +star 0 +inth 0 +magellan 0 +guid 0 +last 0 +major 0 +project 0 +involv 0 +dimension 0 +studi 0 +isdescrib 0 +appear 0 +augustnd 0 +issu 0 +photo 0 +guest 0 +lab 0 +standard 0 +info 0 +resum 0 +relat 0 +experi 0 +public 0 +present 0 +updat 0 +tuesdai 0 +decemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..955f5db1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,99 @@ +comput 1 +madison 1 +page 1 +scienc 1 +home 1 +plakal 1 +graduat 1 +kanpur 1 +slave 1 +calcutta 1 +bosco 1 +univers 1 +wisconsin 1 +stuff 1 +class 1 +link 1 +friend 1 +wisc 1 +yumpe 0 +manoj 0 +dept 0 +universityofwisconsin 0 +blah 0 +countri 0 +india 0 +though 0 +origin 0 +state 0 +kerala 0 +stai 0 +life 0 +studi 0 +school 0 +salesian 0 +undergrad 0 +major 0 +engin 0 +current 0 +first 0 +year 0 +student 0 +support 0 +teach 0 +assistantship 0 +depart 0 +stare 0 +barrel 0 +either 0 +architectur 0 +program 0 +languag 0 +interest 0 +music 0 +rock 0 +metal 0 +altern 0 +blue 0 +movi 0 +book 0 +acad 0 +hack 0 +industri 0 +geeki 0 +nerdi 0 +featur 0 +chat 0 +gatewai 0 +seealso 0 +iitk 0 +snap 0 +pinup 0 +galleri 0 +everi 0 +nerd 0 +need 0 +check 0 +bookmark 0 +access 0 +log 0 +visit 0 +contact 0 +north 0 +randal 0 +avenu 0 +dayton 0 +street 0 +acknowledg 0 +suresh 0 +venkat 0 +nifti 0 +tabl 0 +igor 0 +ivanisev 0 +wisecrack 0 +icon 0 +variou 0 +corner 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..1bfacd60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,70 @@ +madison 1 +page 1 +comput 1 +theori 1 +prasad 1 +home 1 +scienc 1 +univers 1 +wisconsin 1 +address 1 +interest 1 +databas 1 +music 1 +vldb 1 +time 1 +constuct 0 +meanwhil 0 +deshpand 0 +graduat 0 +student 0 +depar 0 +princeton 0 +offic 0 +build 0 +dayton 0 +academ 0 +system 0 +research 0 +area 0 +current 0 +schedul 0 +invest 0 +manageri 0 +econom 0 +meet 0 +prof 0 +jeff 0 +naughton 0 +introduct 0 +public 0 +multidimension 0 +aggreg 0 +storag 0 +estim 0 +multidimensionalaggreg 0 +presenc 0 +hierarchi 0 +cours 0 +project 0 +packag 0 +java 0 +download 0 +want 0 +spend 0 +timex 0 +world 0 +find 0 +india 0 +dilbert 0 +comix 0 +explor 0 +bookmark 0 +random 0 +link 0 +finger 0 +sinc 0 +hakuna 0 +matata 0 +info 0 +creat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..3d1c1c4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,38 @@ +inform 1 +home 1 +page 1 +poosala 1 +research 1 +madison 1 +vishi 0 +viswanath 0 +assist 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +wisc 0 +reseach 0 +summari 0 +resum 0 +html 0 +postscript 0 +relat 0 +databas 0 +advisor 0 +prof 0 +yanni 0 +ioannidi 0 +asha 0 +voluntari 0 +organ 0 +help 0 +improv 0 +basic 0 +educ 0 +india 0 +interestsuw 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..64f038ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 3 +queri 3 +data 3 +databas 2 +relat 2 +oper 2 +model 2 +record 2 +system 2 +optim 2 +project 2 +order 2 +sequin 2 +us 2 +time 1 +manag 1 +effici 1 +languag 1 +posit 1 +praveen 1 +seshadri 1 +express 1 +evalu 1 +variou 1 +support 1 +process 1 +miron 1 +livni 1 +ramakrishnan 1 +object 1 +import 1 +requir 1 +includ 1 +scienc 1 +kind 1 +techniqu 1 +implement 1 +nest 1 +complex 1 +storag 1 +earthquak 1 +similar 1 +click 1 +raghu 1 +wisc 1 +domain 1 +exist 1 +allow 1 +need 1 +like 1 +next 1 +built 1 +defin 1 +also 1 +propos 1 +shore 1 +extens 1 +volcano 1 +recent 1 +plan 1 +scan 1 +singl 1 +mani 1 +view 1 +orient 1 +zoom 1 +group 1 +involv 1 +server 1 +proceed 1 +confer 1 +madison 1 +document 1 +collect 1 +set 1 +declar 1 +manner 1 +advantag 1 +user 1 +tempor 1 +previou 1 +demonstr 1 +feasibl 1 +form 1 +embed 1 +base 1 +exampl 1 +inform 1 +meteorolog 1 +phenomena 1 +sequenti 1 +strength 1 +greater 1 +would 1 +sort 1 +join 1 +store 1 +buffer 1 +gener 1 +answer 1 +detail 1 +aredescrib 1 +publish 1 +paper 1 +postscript 1 +version 1 +map 1 +call 1 +could 1 +flavor 1 +explor 1 +collaps 1 +expand 1 +last 1 +work 1 +probabl 1 +devis 1 +result 1 +client 1 +insid 1 +provid 1 +depart 1 +home 0 +pageth 0 +construct 0 +content 0 +objectivescurr 0 +statusmotiv 0 +exampleseq 0 +languageoptim 0 +techniquesseq 0 +developmentpublicationsrel 0 +workcontact 0 +informationproject 0 +number 0 +applic 0 +processingof 0 +larg 0 +amount 0 +theseappl 0 +financi 0 +histor 0 +analysi 0 +econom 0 +social 0 +metereolog 0 +medic 0 +andbiolog 0 +inadequ 0 +regard 0 +treat 0 +consequ 0 +tediou 0 +ineffici 0 +abstract 0 +util 0 +semanticstak 0 +uniqu 0 +opportun 0 +avail 0 +evaluationintegr 0 +canstor 0 +combin 0 +sequencesthes 0 +serv 0 +goal 0 +themost 0 +notion 0 +natur 0 +consid 0 +issu 0 +studi 0 +theori 0 +theoret 0 +idea 0 +statusth 0 +current 0 +statu 0 +algebraicqueri 0 +compos 0 +analogousto 0 +composit 0 +algebra 0 +describ 0 +identifi 0 +candeclar 0 +likesql 0 +vice 0 +versa 0 +build 0 +disk 0 +architectur 0 +sever 0 +megabyt 0 +integr 0 +motiv 0 +querya 0 +weather 0 +monitor 0 +occurr 0 +event 0 +scientist 0 +ask 0 +erupt 0 +didth 0 +richter 0 +scale 0 +featur 0 +groupbi 0 +claus 0 +correl 0 +subqueri 0 +aggregatefunct 0 +convent 0 +find 0 +execut 0 +even 0 +given 0 +knowledg 0 +howev 0 +sequencesord 0 +lock 0 +step 0 +merg 0 +temporari 0 +whenev 0 +valu 0 +check 0 +possibl 0 +therefor 0 +littl 0 +memori 0 +modelth 0 +present 0 +gist 0 +basic 0 +ordereddomain 0 +relationship 0 +andposit 0 +dual 0 +distinct 0 +wai 0 +recordsmap 0 +respect 0 +give 0 +rise 0 +either 0 +relationaloper 0 +overlap 0 +contain 0 +andaggreg 0 +researchersin 0 +commun 0 +offset 0 +movingaggreg 0 +mean 0 +associ 0 +instanc 0 +daili 0 +weekli 0 +hourli 0 +part 0 +deal 0 +make 0 +easi 0 +case 0 +real 0 +worldsitu 0 +extend 0 +instead 0 +extensionof 0 +indic 0 +practic 0 +ofseq 0 +languagew 0 +usingwhich 0 +specifi 0 +languagei 0 +except 0 +input 0 +queriesa 0 +well 0 +descript 0 +techniquesw 0 +thathav 0 +transform 0 +meta 0 +cach 0 +intermedi 0 +algorithm 0 +reli 0 +cost 0 +estim 0 +observ 0 +access 0 +stream 0 +strategi 0 +take 0 +account 0 +developmentth 0 +serverarchitectur 0 +multipl 0 +viaa 0 +multi 0 +thread 0 +ontop 0 +subset 0 +languageswhich 0 +mode 0 +arbitrarylevel 0 +viceversa 0 +supportfor 0 +type 0 +function 0 +detailson 0 +publicationssequ 0 +sigmod 0 +framework 0 +datapraveen 0 +ieee 0 +engin 0 +march 0 +design 0 +systempraveen 0 +submit 0 +vldb 0 +queriesraghu 0 +michael 0 +cheng 0 +intern 0 +comad 0 +decemb 0 +workthedevis 0 +complementari 0 +visualizationenviron 0 +front 0 +pose 0 +examin 0 +graphic 0 +peopl 0 +research 0 +servercontact 0 +informationfor 0 +contact 0 +eduraghu 0 +edumiron 0 +educomput 0 +univers 0 +wisconsin 0 +dayton 0 +street 0 +modifi 0 +seshadripraveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..17c058ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,20 @@ +andrew 1 +prock 1 +school 1 +class 1 +alta 1 +vista 1 +home 0 +page 0 +clemen 0 +hockert 0 +prockoffic 0 +hour 0 +person 0 +histori 0 +madison 0 +bookmark 0 +resum 0 +doonesburi 0 +trot 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..27c333ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,121 @@ +comput 1 +theori 1 +system 1 +parallel 1 +problem 1 +condon 1 +scienc 1 +interest 1 +interact 1 +proof 1 +us 1 +work 1 +algorithm 1 +page 1 +ann 1 +univers 1 +email 1 +wisc 1 +complex 1 +random 1 +model 1 +recent 1 +hard 1 +approxim 1 +result 1 +graph 1 +automata 1 +develop 1 +journal 1 +home 0 +associ 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +washington 0 +complexityclass 0 +research 0 +summari 0 +interactiveproof 0 +combin 0 +nondetermin 0 +suchmodel 0 +proven 0 +surprisingli 0 +solv 0 +classicproblem 0 +exampl 0 +although 0 +theoryof 0 +complet 0 +long 0 +identifi 0 +computationalproblem 0 +much 0 +progress 0 +understand 0 +whichhard 0 +solut 0 +easi 0 +recentresult 0 +novel 0 +modelsof 0 +turn 0 +prove 0 +approximabilityresult 0 +sever 0 +developingboth 0 +posit 0 +neg 0 +hardcombinatori 0 +aris 0 +game 0 +theoryand 0 +also 0 +design 0 +analysi 0 +current 0 +forsort 0 +minimum 0 +span 0 +tree 0 +goal 0 +well 0 +practic 0 +commun 0 +synchron 0 +costscan 0 +expens 0 +sampl 0 +public 0 +polynomi 0 +bound 0 +strategi 0 +ladner 0 +finit 0 +state 0 +nondeterminist 0 +probabilisticst 0 +hellerstein 0 +pottl 0 +wigderson 0 +proceedingsof 0 +annual 0 +symposium 0 +pspace 0 +provabl 0 +prover 0 +round 0 +caiand 0 +lipton 0 +februari 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..e0f886f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,137 @@ +comput 2 +structur 1 +research 1 +scienc 1 +complex 1 +class 1 +sequenc 1 +joseph 1 +properti 1 +algorithm 1 +page 1 +deborah 1 +mathemat 1 +univers 1 +email 1 +wisc 1 +interest 1 +biologi 1 +geometri 1 +concern 1 +area 1 +set 1 +time 1 +recent 1 +question 1 +proof 1 +techniqu 1 +method 1 +genom 1 +develop 1 +proceed 1 +similar 1 +home 0 +associ 0 +professor 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +purdu 0 +appli 0 +theori 0 +logic 0 +summari 0 +theoret 0 +studi 0 +design 0 +analysi 0 +biolog 0 +problem 0 +last 0 +twenti 0 +year 0 +great 0 +deal 0 +work 0 +gone 0 +studyingth 0 +decid 0 +determinist 0 +andnondeterminist 0 +polynomi 0 +despit 0 +effort 0 +stillknow 0 +littl 0 +fact 0 +computerscientist 0 +adequaci 0 +known 0 +techniquesfor 0 +resolv 0 +whether 0 +investigatesth 0 +exploresin 0 +formal 0 +type 0 +necessari 0 +resolveproblem 0 +primarili 0 +inth 0 +theseinclud 0 +dynam 0 +data 0 +algorithmsfor 0 +fragment 0 +assembl 0 +larg 0 +scale 0 +project 0 +specif 0 +handlingrepetit 0 +addit 0 +util 0 +graphtheoret 0 +rapid 0 +homolog 0 +detect 0 +analysisof 0 +anonym 0 +sampl 0 +public 0 +collaps 0 +degre 0 +subexponenti 0 +pruim 0 +young 0 +ninth 0 +theoryconfer 0 +spars 0 +spanner 0 +weight 0 +graph 0 +althof 0 +dobkin 0 +soar 0 +discret 0 +obtain 0 +global 0 +local 0 +meidanisand 0 +tiwari 0 +fourth 0 +scandinavianworkshop 0 +springer 0 +verlag 0 +lectur 0 +note 0 +incomput 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..355c3051 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,108 @@ +simul 1 +manag 1 +polici 1 +miron 1 +resourc 1 +perform 1 +process 1 +page 1 +livni 1 +scienc 1 +email 1 +wisc 1 +interest 1 +algorithm 1 +discret 1 +event 1 +research 1 +data 1 +system 1 +laboratori 1 +disk 1 +tape 1 +proceed 1 +confer 1 +home 0 +professor 0 +comput 0 +depart 0 +univers 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +weizmann 0 +institut 0 +rehovot 0 +israel 0 +model 0 +analysi 0 +summari 0 +major 0 +emphasi 0 +design 0 +evaluationof 0 +involv 0 +developmentof 0 +managementsystem 0 +type 0 +gener 0 +purpos 0 +well 0 +asreal 0 +time 0 +schedul 0 +consid 0 +researchinvolv 0 +studi 0 +differ 0 +specialemphasi 0 +interplai 0 +properti 0 +systemand 0 +sinc 0 +performancestudi 0 +emploi 0 +modelingand 0 +techniqu 0 +current 0 +implementinga 0 +base 0 +novel 0 +languag 0 +includ 0 +util 0 +visualizationtool 0 +graphic 0 +interfac 0 +sampl 0 +recent 0 +public 0 +join 0 +synchron 0 +access 0 +myllymaki 0 +sigmetr 0 +sequenc 0 +queri 0 +sashadri 0 +ramakrishnan 0 +sigmod 0 +foundat 0 +visual 0 +metaphor 0 +schema 0 +displai 0 +haberand 0 +ioannidi 0 +journal 0 +intellig 0 +inform 0 +juli 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..deb9a34b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,102 @@ +method 1 +equat 1 +precondit 1 +parter 1 +numer 1 +differenti 1 +ellipt 1 +condit 1 +page 1 +seymour 1 +comput 1 +scienc 1 +univers 1 +email 1 +wisc 1 +partial 1 +research 1 +work 1 +indefinit 1 +discret 1 +system 1 +posit 1 +definit 1 +effect 1 +siam 1 +analysi 1 +distribut 1 +singular 1 +valu 1 +journal 1 +home 0 +professor 0 +mathemat 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +york 0 +interest 0 +summari 0 +time 0 +major 0 +emphasi 0 +solutionof 0 +classicalit 0 +multigrid 0 +effectivelywhen 0 +also 0 +bemad 0 +real 0 +symmetr 0 +part 0 +operatori 0 +hand 0 +casedirect 0 +attempt 0 +preserv 0 +spars 0 +thesystem 0 +encount 0 +small 0 +pivot 0 +thu 0 +challengingproblem 0 +mix 0 +concept 0 +procedur 0 +linearalgebra 0 +nowinvolv 0 +sever 0 +project 0 +attack 0 +class 0 +problem 0 +includ 0 +studi 0 +specialmultigrid 0 +sampl 0 +recent 0 +public 0 +chebyshev 0 +collact 0 +ellipticparti 0 +appear 0 +journalon 0 +boundari 0 +without 0 +estim 0 +number 0 +second 0 +order 0 +oper 0 +numbersand 0 +scientificcomput 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..e20c1b87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,128 @@ +parallel 2 +system 1 +comput 1 +techniqu 1 +analysi 1 +vernon 1 +applic 1 +perform 1 +model 1 +issu 1 +processor 1 +polici 1 +proceed 1 +page 1 +mari 1 +scienc 1 +univers 1 +email 1 +wisc 1 +interest 1 +research 1 +repres 1 +also 1 +recent 1 +schedul 1 +network 1 +confer 1 +june 1 +home 0 +professor 0 +industri 0 +engin 0 +depart 0 +wisconsin 0 +dayton 0 +madison 0 +telephon 0 +california 0 +angel 0 +architectur 0 +operatingsystem 0 +summari 0 +analyt 0 +applicationto 0 +emphasi 0 +paralleland 0 +distribut 0 +design 0 +techniquesi 0 +develop 0 +togeth 0 +graduat 0 +student 0 +colleaguesinclud 0 +gener 0 +time 0 +petri 0 +customizedmean 0 +valu 0 +gtpn 0 +systemfeatur 0 +synchron 0 +prioriti 0 +servic 0 +precis 0 +custom 0 +yield 0 +intuit 0 +equationsthat 0 +featur 0 +approxim 0 +butcan 0 +solv 0 +effici 0 +proposedth 0 +call 0 +interpol 0 +approximationsfor 0 +alloc 0 +techniquemai 0 +broader 0 +performanceanalysi 0 +current 0 +project 0 +includ 0 +character 0 +high 0 +performanceparallel 0 +workload 0 +schedulingpolici 0 +multimedia 0 +server 0 +memorymanag 0 +workstat 0 +sampl 0 +public 0 +fair 0 +dqdb 0 +slot 0 +reus 0 +brewster 0 +ieee 0 +infocom 0 +august 0 +accur 0 +hybrid 0 +hash 0 +join 0 +algorithm 0 +pateland 0 +carei 0 +sigmetr 0 +characterist 0 +limit 0 +preemption 0 +forrun 0 +complet 0 +with 0 +chiang 0 +mansharamani 0 +sigmetricsconfer 0 +automat 0 +creat 0 +octob 0 +pub 0 +eduto 0 +report 0 +error 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..d822739b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,9 @@ +qinqin 1 +wang 1 +home 1 +page 0 +welcom 0 +pageqw 0 +wisc 0 +edulast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..b0a82415 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,198 @@ +data 2 +project 2 +databas 2 +research 2 +queri 2 +system 1 +interest 1 +develop 1 +imag 1 +work 1 +hill 1 +first 1 +us 1 +base 1 +sequenc 1 +languag 1 +next 1 +august 1 +employ 1 +raghu 1 +comput 1 +depart 1 +educ 1 +graduat 1 +text 1 +relat 1 +logic 1 +integr 1 +content 1 +set 1 +access 1 +joint 1 +prof 1 +livni 1 +program 1 +evalu 1 +bell 1 +lab 1 +murrai 1 +ramakrishnan 1 +scienc 1 +madison 1 +teach 1 +activ 1 +univers 1 +cours 1 +manag 1 +second 1 +level 1 +dbm 1 +coral 1 +deal 1 +sever 1 +heterogen 1 +sourc 1 +analysi 1 +larg 1 +constraint 1 +techniqu 1 +support 1 +visual 1 +appli 1 +design 1 +part 1 +gener 1 +explor 1 +algorithm 1 +call 1 +bottom 1 +optim 1 +cornel 1 +home 0 +page 0 +associ 0 +professor 0 +wisc 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +usaphon 0 +texa 0 +austin 0 +tech 0 +indian 0 +institut 0 +technolog 0 +madra 0 +softwar 0 +minibaseand 0 +coralth 0 +publish 0 +mcgraw 0 +aimedat 0 +undergraduateand 0 +minibas 0 +inconjunct 0 +also 0 +coursesthat 0 +school 0 +index 0 +exploratori 0 +mine 0 +extend 0 +deductiona 0 +grow 0 +diversifi 0 +increasinglyimport 0 +abl 0 +dispers 0 +independ 0 +easili 0 +rodin 0 +successor 0 +investig 0 +severalissu 0 +formal 0 +practic 0 +toolkit 0 +forsemant 0 +multipl 0 +serviceand 0 +networkedclust 0 +machin 0 +ioannidi 0 +recent 0 +result 0 +explorationfrom 0 +area 0 +complex 0 +assequ 0 +seqsystem 0 +focus 0 +optimizationissu 0 +import 0 +aspect 0 +identifyingtrend 0 +identifi 0 +pattern 0 +ofinform 0 +goal 0 +retriev 0 +fromlarg 0 +focu 0 +implementingan 0 +express 0 +definit 0 +customizea 0 +take 0 +advantag 0 +specializedinform 0 +given 0 +collect 0 +indexedand 0 +cover 0 +andmin 0 +power 0 +cluster 0 +birchfor 0 +dataset 0 +tool 0 +devisea 0 +long 0 +stand 0 +extens 0 +databasequeri 0 +featuressuch 0 +structur 0 +term 0 +recurs 0 +ofarithmet 0 +specifi 0 +morecompactli 0 +effici 0 +ongo 0 +involv 0 +continu 0 +coraldeduct 0 +upon 0 +fixpointevalu 0 +make 0 +efficientacross 0 +broad 0 +rang 0 +sudarshan 0 +time 0 +sudarsha 0 +srivastava 0 +deduct 0 +object 0 +orient 0 +divesh 0 +transit 0 +closur 0 +seshadri 0 +managementfirst 0 +praveen 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..cca62245 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,163 @@ +madison 1 +know 1 +fall 1 +india 1 +love 1 +rahul 1 +home 1 +long 1 +littl 1 +offici 1 +final 1 +semest 1 +comput 1 +came 1 +kanpur 1 +interest 1 +kind 1 +livabl 1 +think 1 +summer 1 +california 1 +great 1 +place 1 +comedi 1 +show 1 +contact 1 +page 0 +kapoorhello 0 +internet 0 +surfer 0 +welcom 0 +cyber 0 +hope 0 +stai 0 +enough 0 +third 0 +master 0 +student 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +get 0 +bachelor 0 +degre 0 +indianinstitut 0 +technolog 0 +employ 0 +pleas 0 +check 0 +resum 0 +cours 0 +schedulemydepartmentmyuniversityiitkanpuriitkclass 0 +relatedlink 0 +menow 0 +gone 0 +want 0 +person 0 +well 0 +normal 0 +born 0 +andrais 0 +small 0 +famili 0 +compris 0 +parent 0 +elder 0 +sister 0 +nice 0 +town 0 +call 0 +state 0 +good 0 +fortun 0 +live 0 +moneymagazin 0 +rate 0 +citi 0 +year 0 +editormust 0 +come 0 +greenland 0 +winter 0 +guess 0 +shouldn 0 +complain 0 +spring 0 +isawesom 0 +jose 0 +work 0 +almaden 0 +research 0 +centr 0 +cannot 0 +much 0 +suppos 0 +anywai 0 +regret 0 +time 0 +area 0 +whatev 0 +monei 0 +magazin 0 +sai 0 +northern 0 +like 0 +music 0 +take 0 +look 0 +collect 0 +movi 0 +almost 0 +genr 0 +though 0 +prefer 0 +romanc 0 +file 0 +star 0 +trek 0 +read 0 +stuff 0 +novel 0 +philosophi 0 +surf 0 +sport 0 +concern 0 +watch 0 +cricket 0 +plai 0 +win 0 +tenni 0 +figur 0 +skate 0 +gymnast 0 +try 0 +learn 0 +swim 0 +bridg 0 +natur 0 +lover 0 +enjoi 0 +go 0 +walk 0 +hike 0 +cloudi 0 +slightli 0 +breezi 0 +wish 0 +could 0 +job 0 +televis 0 +travel 0 +youget 0 +world 0 +paid 0 +musicstuffmovi 0 +televisioninternettravelotherbookmark 0 +meget 0 +form 0 +rest 0 +guestbookrahul 0 +wisc 0 +eduh 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..b1c5ba35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,76 @@ +raman 1 +rajesh 1 +comput 1 +scienc 1 +univers 1 +current 1 +page 1 +depart 1 +madison 1 +wisc 1 +system 1 +home 0 +old 0 +homm 0 +off 0 +west 0 +dayton 0 +street 0 +email 0 +telephon 0 +ohio 0 +wesleyan 0 +major 0 +mathemat 0 +minor 0 +music 0 +first 0 +year 0 +graduat 0 +student 0 +winsonsin 0 +person 0 +curriculum 0 +vita 0 +postscript 0 +specif 0 +cours 0 +architectur 0 +saluja 0 +perform 0 +evalu 0 +model 0 +livni 0 +distribut 0 +inform 0 +work 0 +team 0 +member 0 +condor 0 +project 0 +integr 0 +part 0 +committe 0 +bookmark 0 +chimera 0 +novelti 0 +monster 0 +chao 0 +subject 0 +contradict 0 +prodigi 0 +judg 0 +thing 0 +feebleworm 0 +earth 0 +depositari 0 +truth 0 +cloaca 0 +uncertainti 0 +error 0 +theglori 0 +shame 0 +blais 0 +pascal 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..76fc4d08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,78 @@ +databas 1 +jeffrei 1 +naughton 1 +comput 1 +scienc 1 +depart 1 +wisconsin 1 +karthik 0 +pagekarthikeyan 0 +ramasamyabouti 0 +student 0 +univers 0 +research 0 +interest 0 +mostli 0 +oper 0 +system 0 +work 0 +paradis 0 +project 0 +projectshack 0 +david 0 +dewitt 0 +connectivityparadis 0 +parallel 0 +pthread 0 +wrapperspublicationsstorag 0 +estim 0 +multidimension 0 +aggreg 0 +presenc 0 +hierarchi 0 +amit 0 +shukla 0 +prasad 0 +deshpand 0 +karthikeyan 0 +ramasami 0 +intern 0 +confer 0 +larg 0 +mumbai 0 +bombai 0 +avail 0 +presentationsweb 0 +picturearchitectur 0 +altern 0 +scalabl 0 +serversphoto 0 +albumencount 0 +leafperson 0 +inforesum 0 +financemonei 0 +wall 0 +street 0 +journal 0 +person 0 +interestshack 0 +photographycontact 0 +informationstreet 0 +address 0 +dayton 0 +madison 0 +electron 0 +mail 0 +addresskarthik 0 +wisc 0 +eduoffic 0 +phone 0 +number 0 +comment 0 +suggestionspleas 0 +tell 0 +think 0 +home 0 +page 0 +might 0 +improv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..a656490f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,21 @@ +kelli 1 +page 1 +home 0 +ratliffoffic 0 +phone 0 +email 0 +wisc 0 +edulast 0 +login 0 +offic 0 +hour 0 +inform 0 +student 0 +genealog 0 +interest 0 +place 0 +visit 0 +space 0 +construct 0 +stai 0 +tune 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..a16563b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,45 @@ +compress 1 +ratnakar 1 +madison 1 +viresh 1 +page 1 +research 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +imag 1 +invok 1 +avail 1 +assist 0 +area 0 +digit 0 +video 0 +advisor 0 +miron 0 +livni 0 +main 0 +interest 0 +base 0 +vector 0 +quantiz 0 +fractal 0 +qualiti 0 +control 0 +lossi 0 +product 0 +mode 0 +public 0 +home 0 +qclicauthor 0 +qclic 0 +qclicbrows 0 +thing 0 +rever 0 +reveal 0 +click 0 +west 0 +dayton 0 +street 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..a882d2ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,78 @@ +offic 1 +omin 1 +haven 1 +address 1 +number 1 +monasteriu 1 +doominu 1 +brother 1 +richard 1 +madison 1 +comput 1 +mondai 1 +wednesdai 1 +welcom 0 +without 0 +beard 0 +person 0 +univers 0 +wisconsin 0 +scienc 0 +depart 0 +west 0 +dayton 0 +street 0 +rcarl 0 +wisc 0 +hour 0 +thur 0 +home 0 +page 0 +current 0 +cours 0 +load 0 +distribut 0 +oper 0 +system 0 +underwat 0 +fire 0 +prevent 0 +saturdai 0 +advanc 0 +architectur 0 +tuesdai 0 +thursdai 0 +math 0 +introduct 0 +whole 0 +emphasi 0 +sundai 0 +subsurfac 0 +depositori 0 +engin 0 +grave 0 +dig 0 +fridai 0 +mani 0 +shade 0 +profession 0 +doom 0 +polit 0 +goofi 0 +solitari 0 +innebri 0 +vampir 0 +seriou 0 +nostalg 0 +funki 0 +monk 0 +fellow 0 +order 0 +ever 0 +need 0 +graphic 0 +artist 0 +desktop 0 +publish 0 +check 0 +best 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..7f02eb4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,558 @@ +program 4 +rep 4 +comput 3 +languag 3 +scienc 3 +softwar 3 +slice 3 +analysi 3 +horwitz 3 +proceed 3 +york 3 +wisconsin 3 +univers 2 +depart 2 +interprocedur 2 +also 2 +engin 2 +note 2 +popl 2 +confer 2 +graph 2 +integr 2 +madison 2 +system 2 +algorithm 2 +increment 2 +problem 2 +symposium 2 +base 2 +sigsoft 2 +semant 2 +sigplan 2 +juli 2 +environ 2 +dagstuhl 2 +binklei 2 +synthes 2 +gener 2 +teitelbaum 2 +topla 2 +paper 2 +springer 2 +verlag 2 +januari 2 +depend 2 +sagiv 2 +click 2 +ramalingam 2 +record 2 +access 2 +dataflow 2 +develop 2 +us 2 +transact 2 +ieee 2 +lectur 2 +june 2 +shape 2 +yang 2 +editor 2 +reprint 2 +principl 2 +merg 2 +attribut 2 +acta 2 +version 2 +evalu 2 +intern 2 +april 2 +cornel 1 +research 1 +public 1 +manipul 1 +solv 1 +reachabl 1 +thesi 1 +applic 1 +prin 1 +novemb 1 +societi 1 +symposiumon 1 +germani 1 +differenc 1 +grammar 1 +implement 1 +precis 1 +demand 1 +manual 1 +journal 1 +appear 1 +second 1 +notic 1 +foundat 1 +decemb 1 +august 1 +project 1 +associ 1 +tool 1 +algebra 1 +current 1 +idfa 1 +book 1 +third 1 +publish 1 +procedur 1 +interf 1 +modif 1 +direct 1 +alamito 1 +colloquium 1 +washington 1 +portland 1 +octob 1 +wilhelm 1 +updat 1 +releas 1 +page 1 +pointer 1 +categor 1 +post 1 +support 1 +special 1 +chop 1 +tosem 1 +licens 1 +pfeiffer 1 +logic 1 +compcon 1 +refer 1 +informatica 1 +dynam 1 +methodolog 1 +inform 1 +process 1 +demer 1 +syntax 1 +septemb 1 +march 1 +fromacm 1 +berzin 1 +sigsoftsymposium 1 +francisco 1 +seminar 1 +schloss 1 +wadern 1 +destruct 1 +twenti 1 +rosai 1 +denmark 1 +parallel 1 +copenhagen 1 +ofprogram 1 +submit 1 +center 1 +dissert 1 +tech 1 +thoma 1 +home 1 +interest 1 +alia 1 +index 1 +list 1 +visitor 1 +doctor 1 +aim 1 +complex 1 +particular 1 +work 1 +oper 1 +element 1 +instanc 1 +reus 1 +technolog 1 +larg 1 +solut 1 +ics 1 +david 1 +fseb 1 +thesiswuu 1 +esop 1 +amast 1 +poplb 1 +pepma 1 +handl 1 +distribut 1 +transform 1 +fsea 1 +diku 1 +fase 1 +pepmb 1 +lape 1 +psde 1 +spaa 1 +world 1 +toconst 1 +propag 1 +theoret 1 +path 1 +accommod 1 +properti 1 +tree 1 +commun 1 +european 1 +databas 1 +theori 1 +combin 1 +paradigmsfor 1 +brighton 1 +abramski 1 +maibaum 1 +scientif 1 +interact 1 +hill 1 +wherefor 1 +sigoa 1 +text 1 +fourth 1 +petersburg 1 +time 1 +pepm 1 +onparti 1 +jolla 1 +california 1 +fifth 1 +iowa 1 +citi 1 +bricker 1 +workshop 1 +variabl 1 +fifteenth 1 +diego 1 +principlesof 1 +latest 1 +report 1 +ibfi 1 +twentieth 1 +charleston 1 +tutori 1 +represent 1 +multi 1 +univ 1 +professor 1 +repsprofessorcomput 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +usa 0 +mail 0 +wisc 0 +telephon 0 +secretari 0 +curriculum 0 +vita 0 +thehom 0 +content 0 +summari 0 +student 0 +summarymi 0 +creat 0 +thedevelop 0 +object 0 +createtool 0 +provid 0 +power 0 +specif 0 +manipulationoper 0 +explor 0 +slicingcan 0 +serv 0 +basi 0 +respect 0 +elementss 0 +includ 0 +thatmight 0 +affect 0 +either 0 +directli 0 +transit 0 +valu 0 +thevari 0 +member 0 +allow 0 +findsemant 0 +meaning 0 +decomposit 0 +thedecomposit 0 +consist 0 +textual 0 +contigu 0 +fundament 0 +solvingmani 0 +applicationsin 0 +understand 0 +mainten 0 +debug 0 +test 0 +worker 0 +carri 0 +atimprov 0 +underli 0 +relatedoper 0 +slicer 0 +method 0 +andbuild 0 +clickherefor 0 +recent 0 +establish 0 +unexpect 0 +connect 0 +betweeninterprocedur 0 +previou 0 +oninterprocedur 0 +show 0 +class 0 +interproceduraldataflow 0 +transformingthem 0 +kind 0 +polynomi 0 +timebi 0 +origin 0 +subject 0 +mean 0 +make 0 +probleminst 0 +find 0 +nearbi 0 +publicationsprogram 0 +overview 0 +slicing_pat 0 +pldi 0 +thesismerg 0 +iwscm 0 +popla 0 +iwsvcc 0 +ccpsd 0 +npfo_submiss 0 +ccipl 0 +prog_integration_system 0 +prog_integration_manu 0 +describ 0 +small 0 +subsetof 0 +pascal 0 +obtain 0 +clickingher 0 +retarget 0 +andexpect 0 +anddifferenc 0 +probabl 0 +miscellan 0 +thesesdavid 0 +thesisphil 0 +thesisinterprocedur 0 +analysisdemand 0 +bottom 0 +magic 0 +set 0 +exhaust 0 +tcs_ide_pap 0 +ptime 0 +complet 0 +acta_pap 0 +pfeiffer_thesi 0 +cacm 0 +jalg_pap 0 +popl_not 0 +publicationsbooksrep 0 +constructinglanguag 0 +edit 0 +chines 0 +corpor 0 +beij 0 +china 0 +press 0 +cambridg 0 +publicationssagiv 0 +sequenti 0 +natur 0 +shortest 0 +j_alg 0 +call 0 +competit 0 +line 0 +prioriti 0 +order 0 +letter 0 +preservingtransform 0 +effici 0 +comparison 0 +grammarswith 0 +unrestrict 0 +movement 0 +sublinear 0 +space 0 +context 0 +invit 0 +papershorwitz 0 +fourteenth 0 +conferenceon 0 +melbourn 0 +australia 0 +nanci 0 +franc 0 +ganzing 0 +chaptersrep 0 +ramakrishnan 0 +kluwer 0 +academ 0 +boston 0 +chang 0 +impact 0 +bohner 0 +arnold 0 +fromproceed 0 +architectur 0 +ichikawa 0 +tsubotani 0 +compani 0 +singapor 0 +barstow 0 +sandewal 0 +shrobe 0 +mcgraw 0 +wasserman 0 +publicationssiff 0 +turnidg 0 +partial 0 +danvi 0 +glueck 0 +thiemann 0 +hentenryck 0 +bind 0 +imper 0 +formalapproach 0 +aarhu 0 +moss 0 +nielsen 0 +schwartzbach 0 +tapsoft 0 +speed 0 +onth 0 +orlean 0 +compilerconstruct 0 +edinburgh 0 +scotland 0 +fritzson 0 +maintain 0 +domin 0 +reducibleflowgraph 0 +first 0 +scan 0 +data 0 +andarchitectur 0 +velen 0 +onalgebra 0 +preserv 0 +irvin 0 +jone 0 +illustr 0 +interfer 0 +softwareconfigur 0 +manag 0 +princeton 0 +languagedesign 0 +issuesin 0 +barcelona 0 +spain 0 +diaz 0 +oreja 0 +programminglanguag 0 +design 0 +atlanta 0 +variant 0 +forprogram 0 +versionand 0 +configur 0 +control 0 +grassau 0 +bericht 0 +german 0 +chapter 0 +winkler 0 +teubner 0 +stuttgart 0 +adequaci 0 +repres 0 +marceau 0 +remot 0 +thirteenth 0 +engineeringsymposium 0 +practic 0 +pittsburgh 0 +alpern 0 +proof 0 +check 0 +eleventh 0 +onprincipl 0 +salt 0 +lake 0 +utah 0 +static 0 +digest 0 +spring 0 +optim 0 +ninth 0 +albuquerqu 0 +tosyntax 0 +eighth 0 +williamsburg 0 +softwarerep 0 +site 0 +herefor 0 +patentsrep 0 +patent 0 +number 0 +pend 0 +submissionsrep 0 +reportsrep 0 +abstract 0 +analys 0 +leeuwen 0 +mehlhorn 0 +datalogisk 0 +institut 0 +psramalingam 0 +bibliographi 0 +unpublish 0 +present 0 +klint 0 +snelt 0 +identifi 0 +differ 0 +extendedabstract 0 +ball 0 +correct 0 +reconstitut 0 +equival 0 +theorem 0 +demonstr 0 +prototyp 0 +doc 0 +studentsvisitor 0 +mooli 0 +israel 0 +jiazhen 0 +robert 0 +paig 0 +nation 0 +chiao 0 +tung 0 +taiwan 0 +north 0 +carolina 0 +chapel 0 +studentsramalingam 0 +bound 0 +programintegr 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..ef64fe1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,69 @@ +saeed 1 +madison 1 +home 1 +window 1 +depart 1 +contact 1 +function 1 +statusclock 1 +comput 1 +scienc 1 +indian 1 +univ 1 +wisconsin 1 +time 1 +love 1 +beauti 1 +right 1 +pagespe 0 +statu 0 +date 0 +settimeout 0 +speed 0 +clearid 0 +cleartimeout 0 +mirza 0 +tech 0 +engin 0 +institut 0 +technolog 0 +kanpur 0 +graduat 0 +student 0 +lucknow 0 +india 0 +like 0 +spend 0 +listn 0 +film 0 +song 0 +netsurf 0 +read 0 +comic 0 +hero 0 +calvin 0 +peopl 0 +seem 0 +offic 0 +randal 0 +apart 0 +comp 0 +dayton 0 +street 0 +best 0 +email 0 +wisc 0 +friend 0 +pictur 0 +wismad 0 +suggest 0 +send 0 +check 0 +guestbook 0 +page 0 +access 0 +sinc 0 +last 0 +updat 0 +copi 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..54fccbb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,32 @@ +salli 1 +peterson 1 +wisc 1 +comput 1 +home 0 +page 0 +goodwin 0 +lecturercomput 0 +scienc 0 +univers 0 +wisconsin 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +interest 0 +desktop 0 +real 0 +time 0 +oper 0 +system 0 +program 0 +languag 0 +cours 0 +taught 0 +fall 0 +comp 0 +lectur 0 +us 0 +last 0 +chang 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..89fc86cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,129 @@ +databas 2 +marathon 1 +madison 1 +scienc 1 +amit 1 +home 1 +page 1 +comput 1 +univers 1 +interest 1 +process 1 +confer 1 +protocol 1 +wisconsin 1 +educ 1 +naughton 1 +technolog 1 +research 1 +onlin 1 +intern 1 +larg 1 +vldb 1 +server 1 +sort 1 +data 1 +inform 1 +transact 1 +olap 1 +warn 0 +reach 0 +offic 0 +email 0 +wisc 0 +snail 0 +mail 0 +dept 0 +princeton 0 +phone 0 +work 0 +toward 0 +guidanc 0 +jeff 0 +master 0 +bachelor 0 +engin 0 +indian 0 +institut 0 +madra 0 +analyt 0 +queri 0 +perform 0 +evalu 0 +public 0 +storag 0 +estim 0 +multidimension 0 +aggreg 0 +presenc 0 +hierarchi 0 +shukla 0 +prasad 0 +deshpand 0 +jeffrei 0 +karthikeyan 0 +ramasami 0 +mumbai 0 +bombai 0 +paper 0 +postscript 0 +slide 0 +present 0 +link 0 +run 0 +boston 0 +chicago 0 +york 0 +seattl 0 +georg 0 +utah 0 +relat 0 +pointer 0 +network 0 +activ 0 +bibliographi 0 +logic 0 +program 0 +competit 0 +profil 0 +spec 0 +idea 0 +name 0 +trier 0 +date 0 +sigmod 0 +base 0 +endow 0 +articl 0 +archiv 0 +massiv 0 +digit 0 +system 0 +mdd 0 +initi 0 +multimedia 0 +sourc 0 +nation 0 +industri 0 +infrastructur 0 +niiip 0 +consortium 0 +council 0 +transcoop 0 +manag 0 +support 0 +cooper 0 +applic 0 +introduct 0 +pilot 0 +softwar 0 +help 0 +needi 0 +children 0 +look 0 +asha 0 +person 0 +pageand 0 +bookmarksar 0 +also 0 +garfield 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..47e9c8b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,38 @@ +ashwin 1 +page 1 +depart 1 +home 0 +construct 0 +name 0 +graduat 0 +student 0 +comput 0 +scienc 0 +univers 0 +wisconsin 0 +madison 0 +come 0 +india 0 +hadmi 0 +undergradu 0 +educ 0 +indianinstitut 0 +technolog 0 +bombai 0 +iitb 0 +fantast 0 +place 0 +worth 0 +visit 0 +like 0 +contact 0 +canfing 0 +meto 0 +find 0 +whereabout 0 +altern 0 +send 0 +email 0 +sashwin 0 +wisc 0 +bookmark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..81365401 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,166 @@ +like 2 +also 1 +photo 1 +iitk 1 +much 1 +sastri 1 +year 1 +album 1 +scienc 1 +time 1 +cricket 1 +someth 1 +read 1 +subramanya 1 +home 1 +mine 1 +come 1 +hospet 1 +awai 1 +tungabhadra 1 +place 1 +dont 1 +long 1 +kanpur 1 +depart 1 +comput 1 +graduat 1 +wisconsin 1 +madison 1 +interest 1 +field 1 +favourit 1 +anyth 1 +compani 1 +friend 1 +pleasant 1 +watch 1 +back 1 +pagei 0 +mugshot 0 +town 0 +karnataka 0 +india 0 +school 0 +near 0 +hampi 0 +ruin 0 +vijayanagara 0 +empir 0 +built 0 +across 0 +river 0 +beauti 0 +unfortun 0 +photograph 0 +would 0 +scan 0 +undergradu 0 +indian 0 +institut 0 +technolog 0 +engin 0 +wonder 0 +fewphotograph 0 +gokul 0 +maintain 0 +contain 0 +mani 0 +class 0 +homepag 0 +inform 0 +classmatesat 0 +presentcurr 0 +student 0 +univers 0 +plan 0 +cours 0 +registeredfor 0 +spring 0 +interestsmi 0 +academ 0 +architectur 0 +program 0 +languag 0 +compil 0 +hope 0 +sport 0 +us 0 +playphatta 0 +tenni 0 +ball 0 +version 0 +thati 0 +champ 0 +provid 0 +entertainmentin 0 +bookmark 0 +link 0 +site 0 +enjoi 0 +listen 0 +music 0 +must 0 +consid 0 +hard 0 +rock 0 +metal 0 +realli 0 +donot 0 +whatev 0 +seinfeld 0 +sshow 0 +goe 0 +voraci 0 +reader 0 +rather 0 +whati 0 +better 0 +horror 0 +fantasi 0 +neither 0 +fiction 0 +unsuccesfulli 0 +tri 0 +grip 0 +earth 0 +know 0 +mean 0 +hint 0 +romanc 0 +ifposs 0 +jeffrei 0 +archer 0 +author 0 +date 0 +jane 0 +austen 0 +pride 0 +prejudic 0 +talk 0 +ramesh 0 +mahadeven 0 +sarticl 0 +make 0 +plai 0 +bridg 0 +pick 0 +wonderfulgam 0 +itagain 0 +solv 0 +crossword 0 +cryptic 0 +kind 0 +inth 0 +past 0 +devot 0 +hobbi 0 +last 0 +updat 0 +januari 0 +send 0 +comment 0 +suggest 0 +wisc 0 +eduunivers 0 +page 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..b0adfb6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,219 @@ +scout 2 +servic 2 +internet 2 +project 1 +inform 1 +report 1 +scienc 1 +madison 1 +research 1 +wisconsin 1 +nation 1 +includ 1 +staff 1 +susan 1 +page 1 +support 1 +time 1 +join 1 +year 1 +intern 1 +open 1 +posit 1 +departmentunivers 1 +wisc 1 +manag 1 +comput 1 +depart 1 +sciencefound 1 +user 1 +educ 1 +commun 1 +provid 1 +tool 1 +goal 1 +andeduc 1 +work 1 +week 1 +organ 1 +toolkit 1 +kid 1 +peopl 1 +receiv 1 +email 1 +read 1 +resourc 1 +public 1 +devot 1 +merit 1 +also 1 +network 1 +base 1 +diego 1 +agreement 1 +reloc 1 +speciallibrarian 1 +systemadministr 1 +addit 1 +specif 1 +calcari 0 +home 0 +calcarimanag 0 +servicescomput 0 +madisonsc 0 +edumi 0 +titl 0 +univers 0 +scoutservic 0 +internicand 0 +thehigh 0 +bestresourc 0 +help 0 +effect 0 +filter 0 +hundr 0 +item 0 +edit 0 +import 0 +present 0 +multipl 0 +usabl 0 +format 0 +happen 0 +come 0 +soonth 0 +sprout 0 +newslett 0 +written 0 +andthousand 0 +annotatedlist 0 +best 0 +newli 0 +discov 0 +kind 0 +select 0 +itemsinclud 0 +issu 0 +happeningspost 0 +everi 0 +weekdai 0 +thousand 0 +orth 0 +newsgroup 0 +moreinform 0 +profession 0 +background 0 +involv 0 +wide 0 +sinc 0 +wheni 0 +arbor 0 +thensfnet 0 +backbon 0 +informationservic 0 +divis 0 +spent 0 +three 0 +speak 0 +tonat 0 +higher 0 +audienc 0 +internetand 0 +develop 0 +produc 0 +seminarseri 0 +first 0 +seminar 0 +seri 0 +focus 0 +need 0 +internetend 0 +later 0 +becam 0 +director 0 +forcerfnet 0 +respect 0 +wrote 0 +propos 0 +result 0 +award 0 +part 0 +internicproject 0 +cooper 0 +third 0 +termin 0 +theport 0 +chose 0 +continu 0 +workof 0 +elect 0 +andrequest 0 +approv 0 +thecomput 0 +heartilyagre 0 +futur 0 +plan 0 +servicesat 0 +jack 0 +solock 0 +expand 0 +theaddit 0 +june 0 +matthew 0 +livesei 0 +aproject 0 +assist 0 +expans 0 +thescout 0 +disciplin 0 +asscout 0 +area 0 +studi 0 +branch 0 +researcharea 0 +collabor 0 +group 0 +campus 0 +potenti 0 +topic 0 +includenetwork 0 +discoveri 0 +retriev 0 +nidr 0 +anddisciplin 0 +orient 0 +gather 0 +depend 0 +onth 0 +natur 0 +hire 0 +willincludecomput 0 +graduat 0 +undergradu 0 +level 0 +visit 0 +site 0 +find 0 +ofour 0 +mail 0 +list 0 +ifyou 0 +interest 0 +appli 0 +theonlin 0 +descript 0 +special 0 +librarian 0 +send 0 +aresum 0 +write 0 +sampl 0 +address 0 +feel 0 +free 0 +contactm 0 +telephon 0 +calcariinternet 0 +dayton 0 +street 0 +scal 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..29f80928 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,53 @@ +chandrasekar 1 +comput 1 +scienc 1 +page 1 +coimbator 1 +school 1 +educ 1 +madison 1 +home 0 +welcom 0 +homepag 0 +worri 0 +happi 0 +present 0 +graduat 0 +student 0 +depart 0 +past 0 +born 0 +june 0 +southern 0 +state 0 +tamilnadu 0 +inindia 0 +high 0 +higher 0 +secondari 0 +undergradu 0 +theindian 0 +institut 0 +technolog 0 +kharagpur 0 +major 0 +dept 0 +engin 0 +person 0 +stuff 0 +resid 0 +kendal 0 +avenu 0 +officedept 0 +dayton 0 +sivasankaran 0 +schandra 0 +wisc 0 +last 0 +updat 0 +finger 0 +find 0 +whereabout 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..9f032529 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,31 @@ +schnarr 1 +wisc 1 +wisconsin 1 +eric 0 +home 0 +pageer 0 +research 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usaphon 0 +advisor 0 +larusresearch 0 +interest 0 +architectur 0 +descript 0 +languagesfunct 0 +languag 0 +designinterest 0 +link 0 +wind 0 +tunnel 0 +sacm 0 +hockei 0 +club 0 +dragon 0 +byte 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..d78e614f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,66 @@ +schoina 1 +yanni 1 +page 1 +research 1 +mark 1 +hill 1 +share 1 +memori 1 +ioanni 1 +babak 1 +falsafi 1 +alvin 1 +lebeck 1 +steven 1 +reinhardt 1 +jame 1 +laru 1 +david 1 +wood 1 +univers 1 +crete 1 +iraklio 1 +home 0 +wisc 0 +assistantdepart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +note 0 +construct 0 +advisor 0 +interest 0 +parallel 0 +systemspubl 0 +fine 0 +grain 0 +access 0 +control 0 +distribut 0 +sixth 0 +intern 0 +confer 0 +architectur 0 +support 0 +programminglanguag 0 +oper 0 +system 0 +asplo 0 +applic 0 +specif 0 +protocol 0 +user 0 +level 0 +ann 0 +roger 0 +supercomput 0 +educ 0 +last 0 +updat 0 +juli 0 +cretan 0 +cook 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..ff896140 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,37 @@ +comput 1 +beverli 1 +seavei 1 +danc 1 +ramayana 1 +india 1 +southeast 1 +asia 1 +finger 1 +home 0 +page 0 +current 0 +regist 0 +grad 0 +student 0 +scienc 0 +switch 0 +special 0 +degre 0 +biologi 0 +interest 0 +mine 0 +includ 0 +asian 0 +classic 0 +differ 0 +version 0 +drama 0 +ramakien 0 +wish 0 +could 0 +give 0 +account 0 +hairbal 0 +keyboard 0 +keeper 0 +instead 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..6bbc991a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,62 @@ +home 1 +page 1 +madison 1 +colvil 1 +wisc 1 +welcom 1 +school 1 +uwisc 1 +world 1 +scott 0 +pagescott 0 +mail 0 +eduoffic 0 +address 0 +comput 0 +scienc 0 +offic 0 +dayton 0 +franc 0 +well 0 +back 0 +univers 0 +wisconsin 0 +seen 0 +largest 0 +ball 0 +chees 0 +want 0 +link 0 +pagein 0 +addit 0 +list 0 +find 0 +interest 0 +hopefulli 0 +also 0 +enjoi 0 +beer 0 +drink 0 +game 0 +absolut 0 +add 0 +caffein 0 +rate 0 +soda 0 +guid 0 +lock 0 +pickingand 0 +educ 0 +artsi 0 +fact 0 +book 0 +constitut 0 +english 0 +dictionari 0 +roget 0 +thesauru 0 +poetri 0 +databas 0 +last 0 +updat 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..e3b67664 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,59 @@ +motion 1 +anim 1 +steve 1 +seitz 1 +imag 1 +analysi 1 +problem 1 +graphic 1 +model 1 +virtual 1 +writeup 0 +charact 0 +control 0 +chuck 0 +dyerour 0 +research 0 +motiv 0 +teachinga 0 +perform 0 +realist 0 +hasit 0 +root 0 +cartoon 0 +modern 0 +applic 0 +tocomput 0 +realiti 0 +teleconferenc 0 +robot 0 +task 0 +endow 0 +knowledg 0 +performa 0 +repertoir 0 +interest 0 +learn 0 +beinvok 0 +directli 0 +high 0 +level 0 +cue 0 +smile 0 +walk 0 +infer 0 +anabstract 0 +goal 0 +store 0 +cu 0 +levelev 0 +input 0 +devic 0 +sequenc 0 +project 0 +includ 0 +period 0 +track 0 +rigid 0 +nonrigid 0 +object 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..ffa6a1b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,85 @@ +view 2 +interpol 2 +imag 2 +scene 1 +seitz 1 +synthesi 1 +techniqu 1 +morph 1 +work 1 +origin 1 +steve 1 +autom 1 +produc 1 +physic 1 +valid 1 +describ 1 +us 1 +investig 0 +chuck 0 +dyerw 0 +devis 0 +provabl 0 +correct 0 +creat 0 +basi 0 +reli 0 +geometr 0 +known 0 +graphicscommun 0 +intermedi 0 +although 0 +techniquescurr 0 +enjoi 0 +widespread 0 +theoret 0 +validityha 0 +establish 0 +particular 0 +viewsof 0 +sequenc 0 +ofthat 0 +surprisingli 0 +answer 0 +provid 0 +first 0 +undergo 0 +simplerectif 0 +procedur 0 +certain 0 +assumpt 0 +visibl 0 +theproject 0 +process 0 +satisfi 0 +stereo 0 +todetermin 0 +correspond 0 +recent 0 +consid 0 +user 0 +interact 0 +guid 0 +comput 0 +three 0 +differ 0 +pair 0 +therectifi 0 +shown 0 +left 0 +right 0 +click 0 +theinterpol 0 +center 0 +mpeg 0 +movi 0 +show 0 +computedinterpol 0 +dyer 0 +proc 0 +workshop 0 +represent 0 +visual 0 +last 0 +chang 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..39ae809d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,94 @@ +morph 2 +view 2 +imag 2 +mpeg 1 +movi 1 +interpol 1 +scene 1 +techniqu 1 +differ 1 +viewpoint 1 +click 1 +resolut 1 +steve 1 +seitz 1 +chuck 1 +dyer 1 +appear 1 +us 1 +object 1 +pose 1 +simpl 1 +camera 1 +shape 1 +chang 1 +produc 1 +mona 1 +lisa 1 +frame 1 +investig 0 +relat 0 +public 0 +siggraph 0 +toward 0 +base 0 +represent 0 +icpr 0 +gener 0 +compel 0 +transit 0 +betweenimag 0 +howev 0 +often 0 +causeunnatur 0 +distort 0 +difficult 0 +correct 0 +manual 0 +basic 0 +principl 0 +projectivegeometri 0 +paper 0 +introduc 0 +extens 0 +morphingthat 0 +correctli 0 +handl 0 +project 0 +transform 0 +call 0 +work 0 +prewarp 0 +imagesprior 0 +comput 0 +postwarp 0 +knowledg 0 +requir 0 +appliedto 0 +photograph 0 +draw 0 +well 0 +render 0 +abil 0 +synthes 0 +structureafford 0 +wide 0 +varieti 0 +interest 0 +effect 0 +imagetransform 0 +taken 0 +illus 0 +physic 0 +move 0 +virtual 0 +face 0 +simultan 0 +facial 0 +color 0 +reflect 0 +high 0 +jude 0 +shavlik 0 +last 0 +septemb 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..560ec9a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,127 @@ +period 2 +imag 2 +motion 2 +trace 1 +inform 1 +refer 1 +cycl 1 +cyclic 1 +frame 1 +determin 1 +correspond 1 +tempor 1 +composit 1 +real 1 +move 1 +sequenc 1 +object 1 +poscript 1 +paper 1 +recov 1 +turntabl 1 +scene 1 +pure 1 +us 1 +enhanc 1 +invari 1 +analysi 0 +steve 0 +seitz 0 +chuck 0 +dyermani 0 +life 0 +instanc 0 +human 0 +locomotori 0 +walk 0 +run 0 +skip 0 +shuffl 0 +areperiod 0 +person 0 +havedevelop 0 +approach 0 +could 0 +beenproduc 0 +whose 0 +unlik 0 +previou 0 +attempt 0 +ourapproach 0 +allow 0 +camera 0 +film 0 +click 0 +tracethi 0 +show 0 +line 0 +imagesequ 0 +phonograph 0 +ramp 0 +moment 0 +timewher 0 +momentarili 0 +slow 0 +shownsuperimpos 0 +error 0 +surfac 0 +repeat 0 +tend 0 +perfectli 0 +even 0 +variesslightli 0 +next 0 +physic 0 +import 0 +changesin 0 +gener 0 +defin 0 +motionsthat 0 +make 0 +variat 0 +explicit 0 +represent 0 +call 0 +compact 0 +describ 0 +evolutionof 0 +without 0 +spatial 0 +quantiti 0 +asposit 0 +veloc 0 +delimit 0 +identifi 0 +correspondencesacross 0 +provid 0 +mean 0 +parsinga 0 +addit 0 +sever 0 +featur 0 +canb 0 +deriv 0 +relat 0 +natur 0 +locat 0 +irregular 0 +tracecan 0 +also 0 +medic 0 +fromdiffer 0 +furthermor 0 +reliabl 0 +recoveredfrom 0 +view 0 +fashion 0 +theori 0 +affin 0 +clickher 0 +heart 0 +angiograph 0 +bottom 0 +note 0 +additionalstructur 0 +visibl 0 +appar 0 +singl 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..f1981fca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,47 @@ +seitz 1 +imag 1 +comput 1 +steve 1 +motion 1 +analysi 1 +vision 1 +view 1 +mpeg 1 +movi 1 +click 1 +home 0 +page 0 +wisc 0 +graduat 0 +student 0 +berkelei 0 +math 0 +area 0 +interest 0 +base 0 +render 0 +machin 0 +graphic 0 +research 0 +project 0 +morph 0 +synthesi 0 +show 0 +interpol 0 +left 0 +exampl 0 +cyclic 0 +recent 0 +public 0 +stuff 0 +frequent 0 +link 0 +wisconsin 0 +group 0 +surreal 0 +cach 0 +closer 0 +look 0 +last 0 +chang 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..c6620cae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,102 @@ +madison 2 +group 2 +mlrg 2 +learn 2 +machin 1 +page 1 +schedul 1 +graduat 1 +home 1 +archiv 1 +recent 1 +paper 1 +local 1 +link 1 +contain 1 +relev 1 +read 1 +seminar 1 +shavlik 1 +also 1 +comput 1 +research 1 +wisconsin 1 +tabl 1 +content 1 +member 1 +dataset 1 +domain 1 +cours 1 +august 1 +jude 1 +access 1 +current 1 +dept 1 +librari 1 +wisc 1 +agent 1 +inform 0 +thememb 0 +univers 0 +theori 0 +us 0 +extern 0 +carolyn 0 +allex 0 +jonathon 0 +bodner 0 +kevin 0 +cherkauer 0 +mark 0 +craven 0 +tina 0 +eliassi 0 +richard 0 +maclin 0 +david 0 +opitz 0 +papersvisit 0 +describ 0 +public 0 +ascii 0 +file 0 +list 0 +recentabstractsi 0 +avail 0 +theoriesy 0 +directori 0 +severalml 0 +testb 0 +breast 0 +cancer 0 +databas 0 +prof 0 +olvi 0 +mangasarian 0 +sgroup 0 +line 0 +math 0 +program 0 +biologi 0 +neurosci 0 +vision 0 +robot 0 +doit 0 +center 0 +mathemat 0 +scienc 0 +gopher 0 +school 0 +abstract 0 +select 0 +journal 0 +mostli 0 +wendt 0 +readabl 0 +proc 0 +workshop 0 +held 0 +intern 0 +confer 0 +last 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..4ba8a4df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,96 @@ +wisconsin 1 +shubu 1 +mukherje 1 +madison 1 +research 1 +comput 1 +isca 1 +home 1 +page 1 +wind 1 +tunnel 1 +public 1 +architect 1 +interest 1 +univers 1 +coher 1 +share 1 +memori 1 +cach 1 +protocol 1 +ppopp 1 +simul 1 +wisc 0 +fiance 0 +mimi 0 +nephew 0 +avirup 0 +month 0 +graduat 0 +assist 0 +scienc 0 +departmentunivers 0 +west 0 +dayton 0 +street 0 +usaphon 0 +shubhendu 0 +click 0 +button 0 +jump 0 +correspond 0 +articl 0 +advisor 0 +mark 0 +hill 0 +project 0 +world 0 +wide 0 +badger 0 +ballroom 0 +danc 0 +team 0 +person 0 +hobbi 0 +morph 0 +dionisio 0 +courtesi 0 +steve 0 +seitz 0 +random 0 +linkseducationph 0 +spring 0 +expect 0 +tech 0 +indian 0 +institut 0 +technolog 0 +kanpur 0 +india 0 +summari 0 +network 0 +interfac 0 +dissert 0 +cachabl 0 +queue 0 +design 0 +space 0 +progress 0 +distribut 0 +mechan 0 +cooper 0 +commod 0 +workstat 0 +submit 0 +custom 0 +irregular 0 +applic 0 +grai 0 +softwar 0 +dirsw 0 +parallel 0 +tutori 0 +copyright 0 +copi 0 +right 0 +reserv 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..a2fc8b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,25 @@ +michael 1 +siff 1 +home 0 +page 0 +philosophi 0 +research 0 +academ 0 +interest 0 +run 0 +club 0 +fall 0 +midwest 0 +seminar 0 +wonder 0 +wai 0 +wast 0 +time 0 +new 0 +inform 0 +resourc 0 +book 0 +movi 0 +televis 0 +sport 0 +humor 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..7df8fdcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,24 @@ +skrentni 1 +wisconsin 1 +madison 1 +wisc 1 +univers 1 +home 0 +page 0 +lecturerc 0 +coordinatorgradu 0 +studentcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +offic 0 +comput 0 +sciencesemail 0 +edutelephon 0 +relat 0 +link 0 +depart 0 +groupskrentni 0 +last 0 +updat 0 +februari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..cbfa6ae6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,53 @@ +wisconsin 1 +univers 1 +madison 1 +scienc 1 +comput 1 +system 1 +unix 1 +bryan 1 +intellig 1 +help 1 +knowledg 1 +represent 1 +travi 1 +util 1 +home 0 +page 0 +graduat 0 +studentcomput 0 +departmentunivers 0 +dayton 0 +offic 0 +mail 0 +wisc 0 +edutelephon 0 +purdu 0 +interest 0 +human 0 +interact 0 +oper 0 +activ 0 +select 0 +recent 0 +public 0 +step 0 +toward 0 +technic 0 +report 0 +april 0 +miller 0 +fredriksen 0 +empir 0 +studi 0 +reliabl 0 +commun 0 +relat 0 +link 0 +depart 0 +group 0 +professor 0 +larri 0 +advisor 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..d7d73e33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,128 @@ +system 2 +intellig 2 +artifici 1 +wisc 1 +expert 1 +comput 1 +data 1 +databas 1 +larri 1 +travi 1 +interest 1 +procedur 1 +control 1 +deduct 1 +support 1 +social 1 +research 1 +recent 1 +larg 1 +inform 1 +integr 1 +develop 1 +project 1 +altern 1 +metaphor 1 +west 1 +magazin 1 +home 0 +page 0 +travisprofessorcomput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +dayton 0 +madison 0 +mail 0 +edutelephon 0 +univers 0 +californa 0 +angel 0 +automat 0 +understand 0 +complex 0 +philosoph 0 +foundat 0 +ofartifici 0 +manag 0 +implic 0 +summari 0 +center 0 +around 0 +us 0 +logic 0 +basi 0 +knowledg 0 +formal 0 +augment 0 +databasesystem 0 +work 0 +focus 0 +automaticdeduct 0 +design 0 +contruct 0 +displai 0 +test 0 +high 0 +level 0 +abstract 0 +pattern 0 +form 0 +informationcontain 0 +heterogen 0 +special 0 +attent 0 +beingdevot 0 +represent 0 +geograph 0 +waysthat 0 +enhanc 0 +visualiz 0 +map 0 +activ 0 +involv 0 +sever 0 +andwith 0 +incorpor 0 +model 0 +visual 0 +aid 0 +singl 0 +organiz 0 +issu 0 +associ 0 +introduct 0 +technolog 0 +analysi 0 +suppositionsunderli 0 +approach 0 +current 0 +student 0 +chuck 0 +ohar 0 +bryan 0 +scott 0 +swanson 0 +andi 0 +whitsitt 0 +derek 0 +zahn 0 +public 0 +could 0 +failur 0 +implement 0 +oravec 0 +appear 0 +journal 0 +softwar 0 +reflex 0 +examin 0 +falsework 0 +societi 0 +landscap 0 +link 0 +dept 0 +group 0 +last 0 +chang 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..a731a44d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,63 @@ +comput 1 +sodani 1 +depart 1 +scienc 1 +madison 1 +india 1 +link 1 +avinash 1 +page 1 +univers 1 +wisconsin 1 +interest 1 +project 1 +packag 1 +relat 1 +home 0 +wisc 0 +graduat 0 +student 0 +west 0 +dayton 0 +street 0 +educ 0 +tech 0 +hon 0 +indian 0 +institut 0 +technolog 0 +kharagpur 0 +juli 0 +academ 0 +architectur 0 +multiscalar 0 +kestrel 0 +program 0 +languag 0 +compil 0 +cours 0 +java 0 +download 0 +meet 0 +batch 0 +mate 0 +info 0 +center 0 +rank 0 +new 0 +hindu 0 +onlin 0 +edit 0 +random 0 +look 0 +kgpite 0 +follow 0 +toll 0 +free 0 +directori 0 +will 0 +world 0 +cricket 0 +access 0 +time 0 +sinc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..b4c0038d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,252 @@ +sohi 3 +comput 2 +symposium 2 +austin 2 +intern 2 +architectur 2 +processor 2 +multiscalar 2 +scienc 2 +parallel 2 +research 1 +current 1 +univers 1 +instruct 1 +memori 1 +program 1 +microarchitectur 1 +recent 1 +depart 1 +wisconsin 1 +madison 1 +execut 1 +breach 1 +annual 1 +decemb 1 +dynam 1 +franklin 1 +pnevmatikato 1 +engin 1 +system 1 +gener 1 +micro 1 +technic 1 +report 1 +address 1 +level 1 +multiprocessor 1 +design 1 +perform 1 +cycl 1 +character 1 +exploit 1 +novemb 1 +file 1 +load 1 +ieee 1 +vijaykumar 1 +access 1 +guard 1 +predict 1 +fine 1 +grain 1 +wisc 1 +educ 1 +interest 1 +summari 1 +graduat 1 +student 1 +talk 1 +illinoi 1 +urbana 1 +electr 1 +process 1 +compil 1 +investig 1 +ordinari 1 +group 1 +sever 1 +issu 1 +studi 1 +avail 1 +paradigm 1 +given 1 +compress 1 +postscript 1 +framemak 1 +appear 1 +zero 1 +reduc 1 +hardwar 1 +mechan 1 +transact 1 +streamlin 1 +regist 1 +effici 1 +detect 1 +pointer 1 +arrai 1 +branch 1 +analysi 1 +april 1 +guri 0 +home 0 +page 0 +gurindar 0 +associ 0 +professor 0 +andelectr 0 +public 0 +graduatesaddress 0 +sciencesunivers 0 +west 0 +dayton 0 +streetmadison 0 +usasohi 0 +eduphon 0 +department 0 +offic 0 +elect 0 +electron 0 +birla 0 +institut 0 +technolog 0 +pilani 0 +india 0 +share 0 +focus 0 +thehighest 0 +uniprocessor 0 +circa 0 +plenti 0 +transistor 0 +availableon 0 +chip 0 +challeng 0 +resourc 0 +getth 0 +highest 0 +possibl 0 +sequenti 0 +target 0 +sustain 0 +ofov 0 +numer 0 +applic 0 +needto 0 +resolv 0 +goal 0 +achiev 0 +thenatur 0 +numericappl 0 +order 0 +understand 0 +andhow 0 +could 0 +bulk 0 +effort 0 +expend 0 +continu 0 +thedevelop 0 +model 0 +novel 0 +develop 0 +andcarri 0 +detail 0 +simul 0 +assessth 0 +potenti 0 +concept 0 +todd 0 +scott 0 +andrea 0 +moshovo 0 +vijaykumarrec 0 +talkswil 0 +set 0 +import 0 +futur 0 +risc 0 +held 0 +watson 0 +researchcent 0 +yorktown 0 +height 0 +place 0 +publicationshigh 0 +bandwidth 0 +translat 0 +multipl 0 +inrd 0 +appendix 0 +ofdetail 0 +resultsi 0 +also 0 +support 0 +latencyt 0 +superscalar 0 +processorsj 0 +smith 0 +proceed 0 +reorder 0 +referencesm 0 +data 0 +cach 0 +fast 0 +calcul 0 +anatomi 0 +request 0 +combin 0 +arbitrari 0 +interconnect 0 +network 0 +lebeck 0 +distribut 0 +error 0 +sigplan 0 +confer 0 +languag 0 +implement 0 +goodman 0 +handbook 0 +press 0 +control 0 +flow 0 +traffic 0 +inter 0 +oper 0 +communicationin 0 +expand 0 +split 0 +window 0 +depend 0 +errorst 0 +processorsd 0 +knapsack 0 +hierarchi 0 +componentt 0 +tetra 0 +evalu 0 +serial 0 +processorst 0 +juli 0 +gradstodd 0 +softwar 0 +latencydionisio 0 +incorpor 0 +exist 0 +setsmanoj 0 +architecturemark 0 +friedman 0 +januari 0 +prolog 0 +executionsriram 0 +vajapeyam 0 +crai 0 +processormen 0 +chow 0 +chiang 0 +septemb 0 +base 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..54757b0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,118 @@ +solomon 2 +comput 1 +manag 1 +home 1 +page 1 +marvin 1 +scienc 1 +softwar 1 +program 1 +proc 1 +conf 1 +data 1 +michael 1 +andmarvin 1 +astech 1 +report 1 +chair 1 +depart 1 +wisconsin 1 +madison 1 +interest 1 +object 1 +databas 1 +system 1 +develop 1 +environ 1 +languag 1 +sigmod 1 +june 1 +mark 1 +mcauliff 1 +carei 1 +odyssea 1 +tsatalo 1 +version 1 +appear 1 +persist 1 +paul 1 +adam 1 +avail 1 +shore 1 +project 1 +professor 0 +former 0 +goodman 0 +univers 0 +west 0 +dayton 0 +street 0 +phone 0 +wisc 0 +research 0 +orient 0 +support 0 +distribut 0 +oper 0 +network 0 +design 0 +implement 0 +theori 0 +recent 0 +publicationstoward 0 +effect 0 +effici 0 +free 0 +space 0 +abstractpostscriptth 0 +gmap 0 +versatil 0 +tool 0 +physic 0 +independ 0 +larg 0 +septemb 0 +andyanni 0 +ioannidi 0 +abstractpostscriptexpand 0 +inth 0 +vldb 0 +journalv 0 +april 0 +abstractpostscriptshor 0 +applic 0 +david 0 +dewitt 0 +franklin 0 +nanci 0 +hall 0 +jeffrei 0 +naughton 0 +daniel 0 +schuh 0 +seth 0 +white 0 +andmichael 0 +zwillingavail 0 +overview 0 +capitl 0 +fourth 0 +intern 0 +workshop 0 +configur 0 +updat 0 +lectur 0 +note 0 +logic 0 +point 0 +graphic 0 +interfac 0 +room 0 +built 0 +us 0 +java 0 +spring 0 +univ 0 +photoalbum 0 +todai 0 +dilbert 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..4e4627f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,6 @@ +sowmya 1 +home 1 +page 1 +welcom 0 +subramanian 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..7b6ebdbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,78 @@ +shilpa 1 +comput 1 +scienc 1 +lawand 1 +home 1 +page 1 +wisconsin 1 +link 1 +stuff 1 +wisc 1 +welcom 0 +pagei 0 +graduat 0 +student 0 +depart 0 +univers 0 +madison 0 +person 0 +stuffa 0 +pastfor 0 +info 0 +schoolher 0 +resum 0 +html 0 +ascii 0 +second 0 +love 0 +us 0 +resours 0 +want 0 +place 0 +syster 0 +women 0 +relat 0 +madisonsurf 0 +madisonst 0 +inform 0 +serverth 0 +hoofer 0 +sail 0 +clubowl 0 +music 0 +book 0 +movi 0 +java 0 +signatur 0 +meet 0 +first 0 +lovesnowi 0 +homepag 0 +cool 0 +linksher 0 +iswher 0 +finger 0 +three 0 +judg 0 +panel 0 +philadelphia 0 +vote 0 +constitut 0 +follow 0 +read 0 +decis 0 +access 0 +time 0 +sinc 0 +june 0 +send 0 +comment 0 +suggest 0 +email 0 +tossl 0 +shilpal 0 +thru 0 +guest 0 +formlast 0 +modifi 0 +juli 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..21801a40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,32 @@ +home 1 +comput 1 +scienc 1 +page 1 +jeremi 1 +stenglein 1 +depart 1 +madison 1 +section 1 +graduat 0 +student 0 +univers 0 +wisconsin 0 +west 0 +dayton 0 +street 0 +offic 0 +phone 0 +mail 0 +stenglei 0 +wisc 0 +teach 0 +gener 0 +take 0 +construct 0 +compil 0 +link 0 +pageth 0 +simpson 0 +pageespn 0 +sport 0 +hotwir 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..718363a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,57 @@ +reinhardt 1 +page 1 +wisconsin 1 +depart 1 +steve 0 +home 0 +steven 0 +graduat 0 +student 0 +computerarchitectur 0 +work 0 +wind 0 +tunnelgroup 0 +advisor 0 +david 0 +wood 0 +although 0 +project 0 +mark 0 +hill 0 +andjim 0 +laru 0 +often 0 +feel 0 +free 0 +tell 0 +mewhat 0 +well 0 +plan 0 +finish 0 +fall 0 +join 0 +faculti 0 +ofth 0 +univers 0 +michigan 0 +eec 0 +januari 0 +interest 0 +find 0 +publicationsresearch 0 +summari 0 +email 0 +stever 0 +wisc 0 +click 0 +finger 0 +phone 0 +comput 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +usalast 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..4ba0f6ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,58 @@ +comput 1 +john 1 +home 1 +strikwerda 1 +page 1 +scienc 1 +wisconsin 1 +inform 1 +numer 1 +analysi 1 +chicago 1 +nathan 1 +professor 0 +strikwerdadepart 0 +sciencesunivers 0 +madison 0 +west 0 +dayton 0 +streetmadison 0 +email 0 +strik 0 +wisc 0 +telephon 0 +fall 0 +teach 0 +problem 0 +solv 0 +us 0 +begin 0 +januari 0 +assign 0 +nation 0 +foundat 0 +year 0 +click 0 +qualifi 0 +exam 0 +research 0 +interest 0 +fluid 0 +dynamicsmyoffici 0 +depart 0 +pageoth 0 +stuff 0 +field 0 +museum 0 +point 0 +search 0 +rate 0 +best 0 +tribun 0 +talk 0 +radio 0 +show 0 +car 0 +footballmi 0 +kid 0 +drew 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..5cb4ef65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,30 @@ +interest 1 +subba 0 +home 0 +page 0 +thing 0 +enjoi 0 +calvin 0 +hobb 0 +late 0 +show 0 +david 0 +letterman 0 +seinfeld 0 +prooocessor 0 +histor 0 +paper 0 +evalu 0 +stream 0 +buffer 0 +secondari 0 +cach 0 +replac 0 +decoupl 0 +integ 0 +execut 0 +superscalar 0 +processor 0 +subbarao 0 +cambridg 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..721288f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,83 @@ +parallel 1 +taiwan 1 +chiang 1 +page 1 +processor 1 +schedul 1 +mari 1 +vernon 1 +home 0 +depart 0 +univers 0 +wisconsin 0 +madisonoffic 0 +stelephon 0 +mail 0 +suhui 0 +wisc 0 +educlick 0 +send 0 +emailoffic 0 +hour 0 +thur 0 +still 0 +construct 0 +ta 0 +fall 0 +public 0 +applic 0 +characterist 0 +limit 0 +preemption 0 +complet 0 +polici 0 +rajesh 0 +mansharamani 0 +proc 0 +sigmetr 0 +conf 0 +measur 0 +model 0 +comput 0 +system 0 +nashvil 0 +dynam 0 +static 0 +quantum 0 +base 0 +alloc 0 +workshop 0 +strategi 0 +process 0 +conjunct 0 +ipp 0 +april 0 +search 0 +engin 0 +yahoo 0 +sourc 0 +resourc 0 +bibliographi 0 +world 0 +wide 0 +virtual 0 +librari 0 +subject 0 +catalogu 0 +link 0 +relat 0 +network 0 +servic 0 +sinanet 0 +shop 0 +magzin 0 +new 0 +job 0 +calendar 0 +seednet 0 +vistor 0 +guid 0 +academia 0 +sinica 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..958d131e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,192 @@ +link 2 +page 2 +site 2 +new 1 +madison 1 +weather 1 +scienc 1 +current 1 +info 1 +variou 1 +interest 1 +articl 1 +david 1 +sundaram 1 +stukel 1 +browbeck 1 +feet 1 +class 1 +take 1 +comput 1 +view 1 +specif 1 +philosoph 1 +scientif 1 +onlin 1 +nation 1 +astronomi 1 +channel 1 +homepag 0 +upon 0 +effronteri 0 +push 0 +hand 0 +sever 0 +patient 0 +femor 0 +arteri 0 +blood 0 +spurt 0 +blind 0 +anesthetist 0 +hall 0 +scream 0 +tri 0 +knee 0 +groin 0 +manag 0 +hamstr 0 +scalpel 0 +crawl 0 +floor 0 +stab 0 +leg 0 +voilet 0 +baboon 0 +assist 0 +woman 0 +ever 0 +care 0 +damn 0 +realli 0 +wig 0 +climb 0 +tabl 0 +pois 0 +jump 0 +stomp 0 +cop 0 +rush 0 +william 0 +burrough 0 +nake 0 +lunch 0 +construct 0 +catapult 0 +reader 0 +choos 0 +index 0 +brief 0 +relat 0 +dedic 0 +smart 0 +cloth 0 +also 0 +steve 0 +mann 0 +see 0 +wearabl 0 +camera 0 +inform 0 +artifici 0 +life 0 +santa 0 +institut 0 +project 0 +call 0 +tierra 0 +thoma 0 +recent 0 +dilbert 0 +strip 0 +technic 0 +math 0 +joke 0 +somewher 0 +artist 0 +natur 0 +physic 0 +conscious 0 +surviv 0 +research 0 +laboratori 0 +destruct 0 +show 0 +organ 0 +arcosanti 0 +arcolog 0 +outsid 0 +phoenix 0 +krishnamurti 0 +foundat 0 +tell 0 +centuri 0 +beat 0 +writer 0 +includ 0 +pictur 0 +fill 0 +wait 0 +sourc 0 +packer 0 +scientist 0 +regist 0 +harass 0 +mail 0 +reward 0 +dozen 0 +factoid 0 +astound 0 +friend 0 +american 0 +advantag 0 +hypertext 0 +addit 0 +provid 0 +select 0 +print 0 +edit 0 +publish 0 +weekli 0 +contain 0 +smaller 0 +hindu 0 +newspap 0 +india 0 +onion 0 +local 0 +depart 0 +washburn 0 +observatori 0 +public 0 +univers 0 +len 0 +insignific 0 +piec 0 +histori 0 +obtain 0 +follow 0 +webweath 0 +servic 0 +home 0 +late 0 +timothi 0 +leari 0 +numer 0 +written 0 +note 0 +optimist 0 +noam 0 +chomski 0 +disinform 0 +great 0 +list 0 +conspiraci 0 +theori 0 +buri 0 +within 0 +ultra 0 +trendi 0 +movi 0 +review 0 +back 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..9668eebb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,9 @@ +swander 1 +offic 1 +brian 0 +home 0 +pagebrian 0 +think 0 +hour 0 +bookmark 0 +mark 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..41e11fd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,107 @@ +system 1 +perform 1 +snowboard 1 +tamch 1 +oper 1 +talk 1 +paper 1 +ariel 1 +comput 1 +scienc 1 +fall 1 +parallel 1 +distribut 1 +differ 1 +municip 1 +bond 1 +share 1 +memori 1 +spring 1 +network 1 +file 1 +version 1 +tamchesari 0 +research 0 +assistantemail 0 +wisc 0 +depart 0 +west 0 +dayton 0 +street 0 +madison 0 +typic 0 +pose 0 +angri 0 +posei 0 +organ 0 +colleg 0 +park 0 +offic 0 +sresearch 0 +paradyn 0 +toolsstatu 0 +search 0 +thesi 0 +topic 0 +els 0 +interest 0 +toolsparallel 0 +systemsbluesth 0 +simpsonsseinfeldskiingskinetkeyston 0 +favorit 0 +area 0 +joke 0 +vacum 0 +cleaner 0 +dirt 0 +attach 0 +greet 0 +peopl 0 +whoa 0 +sorri 0 +dude 0 +eventu 0 +matur 0 +gener 0 +incom 0 +hate 0 +countri 0 +music 0 +fortran 0 +cool 0 +link 0 +yahooespncpu 0 +infoskinetoth 0 +stuff 0 +exokernel 0 +architectur 0 +applic 0 +level 0 +resourc 0 +manag 0 +octob 0 +techniqu 0 +tool 0 +improv 0 +callaghan 0 +supercomput 0 +interconnect 0 +april 0 +zebra 0 +stripe 0 +need 0 +structur 0 +raid 0 +block 0 +wait 0 +free 0 +highli 0 +concurr 0 +object 0 +asynchron 0 +multiprocessor 0 +postscript 0 +analysi 0 +risc 0 +instruct 0 +enhanc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..178cba2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,102 @@ +pictur 1 +home 1 +page 1 +look 1 +show 1 +lampert 1 +good 1 +like 1 +babi 1 +someon 1 +tick 1 +turn 1 +click 1 +take 1 +work 1 +music 1 +song 1 +sound 1 +jeff 0 +pagejeff 0 +ricardo 0 +montalban 0 +voic 0 +welcom 0 +know 0 +nota 0 +least 0 +still 0 +foron 0 +incrimin 0 +doesn 0 +make 0 +aconvict 0 +high 0 +school 0 +yearbook 0 +lasttim 0 +never 0 +heard 0 +cute 0 +think 0 +well 0 +found 0 +coupl 0 +threaten 0 +intoa 0 +human 0 +dispens 0 +took 0 +henc 0 +befound 0 +separ 0 +anautograph 0 +copi 0 +sign 0 +name 0 +monitor 0 +pictureappear 0 +choos 0 +link 0 +weasel 0 +seek 0 +pace 0 +basic 0 +factswho 0 +person 0 +last 0 +night 0 +academ 0 +relatedwhat 0 +class 0 +dept 0 +resum 0 +entertainmentbook 0 +movi 0 +program 0 +newsgroup 0 +import 0 +subjectsfriendsno 0 +sick 0 +theme 0 +hobbi 0 +club 0 +organizationsgroup 0 +plu 0 +wish 0 +inmi 0 +favorit 0 +linksugh 0 +servo 0 +juli 0 +andrew 0 +fire 0 +crow 0 +mstk 0 +eclect 0 +paraphenaliai 0 +would 0 +miscellan 0 +straight 0 +forward 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..b9bf0cad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,18 @@ +mathematicalprogram 1 +todd 0 +homepagetodd 0 +homepagein 0 +fall 0 +teach 0 +section 0 +sinc 0 +area 0 +mathemat 0 +program 0 +plug 0 +page 0 +contain 0 +wealth 0 +inform 0 +tmunson 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..01d110f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian 0 +home 0 +pagebrian 0 +toonen 0 +comput 0 +scienc 0 +departmentunivers 0 +wisconsin 0 +dayton 0 +streetmadison 0 +offic 0 +cswhatev 0 +chief 0 +seattleth 0 +ground 0 +tipi 0 +medit 0 +life 0 +itsmean 0 +accept 0 +kinship 0 +creatur 0 +acknowledgingun 0 +univers 0 +thing 0 +infus 0 +thetru 0 +essenc 0 +civil 0 +luther 0 +stand 0 +bear 0 +oglala 0 +siouxlast 0 +modifi 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..0ba5ce35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,15 @@ +page 1 +home 1 +netscap 1 +thano 0 +tsioli 0 +site 0 +enhanc 0 +read 0 +shouldconsid 0 +upgrad 0 +browser 0 +latest 0 +version 0 +ifthat 0 +option 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..e3952520 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,56 @@ +madison 1 +todd 1 +turnidg 1 +wisconsin 1 +axi 1 +ohio 1 +dougla 0 +turnidgeschoolcomput 0 +scienc 0 +departmentunivers 0 +dayton 0 +homemuppet 0 +babylon 0 +milton 0 +eyesightright 0 +left 0 +graduat 0 +student 0 +depart 0 +comput 0 +sciencesat 0 +univers 0 +year 0 +work 0 +professorthoma 0 +rep 0 +studyingprogram 0 +languag 0 +teach 0 +section 0 +hold 0 +mathematicsand 0 +computersci 0 +case 0 +western 0 +reserveunivers 0 +locat 0 +cleveland 0 +origin 0 +kent 0 +myfamili 0 +live 0 +judg 0 +compani 0 +keep 0 +click 0 +enough 0 +evid 0 +awai 0 +long 0 +time 0 +amus 0 +shortcut 0 +last 0 +modifi 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..fbfc1352 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,37 @@ +home 1 +page 1 +taxiao 1 +wang 1 +sinc 1 +madison 1 +offic 1 +phone 1 +visitor 1 +number 1 +welcom 0 +heavi 0 +construct 0 +click 0 +finger 0 +contact 0 +inform 0 +graduat 0 +student 0 +teach 0 +assist 0 +comput 0 +scienc 0 +depart 0 +univers 0 +wisconsin 0 +bldg 0 +dayton 0 +street 0 +mail 0 +twang 0 +wisc 0 +visit 0 +time 0 +last 0 +updat 0 +june 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..a31d2d95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,12 @@ +shaft 1 +home 1 +trek 1 +meet 1 +pageuri 0 +pageemail 0 +wisc 0 +eduinterest 0 +diversionsstart 0 +microsoft 0 +start 0 +window 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..bff7a013 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,60 @@ +madison 1 +india 1 +asha 1 +ganti 1 +graduat 1 +student 1 +madra 1 +godav 1 +homepag 1 +databas 1 +real 1 +time 1 +venkatesh 0 +home 0 +pagevenkatesh 0 +vganti 0 +wisc 0 +studentoffic 0 +comput 0 +scienc 0 +depart 0 +dayton 0 +usaphon 0 +note 0 +page 0 +construct 0 +past 0 +present 0 +univers 0 +wisconsin 0 +fall 0 +earlier 0 +undergradu 0 +nativ 0 +kakinada 0 +andhra 0 +pradesh 0 +info 0 +basic 0 +educ 0 +click 0 +know 0 +hostel 0 +yearbook 0 +hope 0 +onlin 0 +sometim 0 +research 0 +interest 0 +work 0 +till 0 +btech 0 +project 0 +want 0 +look 0 +genesi 0 +group 0 +last 0 +updat 0 +januari 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..038a2c49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,62 @@ +multiscalar 2 +regist 1 +vijaykumar 1 +sohi 1 +commun 1 +vijai 1 +comput 1 +scienc 1 +madison 1 +compil 1 +architectur 1 +processor 1 +breach 1 +intern 1 +symposium 1 +wisc 1 +depart 1 +univers 1 +wisconsin 1 +project 1 +file 1 +annual 1 +microarchitectur 1 +micro 1 +home 0 +page 0 +profession 0 +affili 0 +contact 0 +address 0 +dayton 0 +street 0 +phone 0 +email 0 +advisor 0 +guri 0 +educ 0 +doctor 0 +august 0 +undergradu 0 +birla 0 +institut 0 +technolog 0 +pilani 0 +india 0 +research 0 +dissert 0 +distribut 0 +design 0 +anatomi 0 +strategi 0 +submit 0 +schedul 0 +architecturet 0 +go 0 +work 0 +memori 0 +data 0 +depend 0 +predict 0 +person 0 +side 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..e7138f2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,74 @@ +comput 1 +quantum 1 +watrou 1 +john 1 +scienc 1 +link 1 +bibliographi 1 +home 1 +theori 1 +inform 1 +page 1 +laboratori 1 +pagejohn 0 +wisc 0 +departmentunivers 0 +wisconsin 0 +madison 0 +dayton 0 +streetmadison 0 +telephon 0 +public 0 +dimension 0 +cellular 0 +automata 0 +proc 0 +symp 0 +foundat 0 +polynomi 0 +time 0 +algorithm 0 +artin 0 +whapl 0 +approxim 0 +theorem 0 +number 0 +fourth 0 +confer 0 +canadiannumb 0 +associ 0 +assort 0 +archiv 0 +stanford 0 +oxford 0 +particl 0 +beam 0 +physic 0 +ucla 0 +theoret 0 +montreal 0 +lanl 0 +preprint 0 +hypertext 0 +project 0 +hypatia 0 +gener 0 +refer 0 +element 0 +stylehypertext 0 +webster 0 +interfaceroget 0 +thesauru 0 +random 0 +parasol 0 +recordsplayst 0 +linksweath 0 +forecast 0 +madisonth 0 +isthmu 0 +daili 0 +pagemathemat 0 +quotat 0 +servermathematician 0 +biographiesgeek 0 +site 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..2f040096 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,108 @@ +entertain 1 +univers 1 +send 1 +date 1 +food 1 +affect 1 +home 1 +page 1 +recit 1 +plai 1 +ship 1 +world 1 +show 1 +movi 1 +weiru 0 +eiru 0 +email 0 +ppppleas 0 +find 0 +around 0 +sometim 0 +think 0 +english 0 +speaker 0 +commit 0 +asylum 0 +verbal 0 +insan 0 +languag 0 +peopl 0 +truck 0 +cargo 0 +havenos 0 +feet 0 +smell 0 +richard 0 +leder 0 +three 0 +possibl 0 +part 0 +least 0 +must 0 +beoffer 0 +customari 0 +begina 0 +seri 0 +great 0 +deal 0 +moder 0 +amountof 0 +merest 0 +suggest 0 +amount 0 +ofaffect 0 +increas 0 +reduc 0 +proportion 0 +longer 0 +call 0 +circumst 0 +omit 0 +miss 0 +manner 0 +guid 0 +excruciatingli 0 +correct 0 +behaviour 0 +peke 0 +friend 0 +physic 0 +depart 0 +alumni 0 +associ 0 +atmadison 0 +littl 0 +grei 0 +cell 0 +pictur 0 +game 0 +late 0 +david 0 +letterman 0 +studio 0 +wish 0 +postcard 0 +someon 0 +review 0 +favorit 0 +hockei 0 +player 0 +steve 0 +francai 0 +dictionnairefrancai 0 +anglai 0 +dictionnair 0 +softwar 0 +relatif 0 +lafrancophoni 0 +test 0 +degrammair 0 +francais 0 +french 0 +lesson 0 +weather 0 +forecast 0 +madison 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..c679b751 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,82 @@ +comput 1 +depart 1 +scienc 1 +univers 1 +manag 1 +home 1 +page 1 +student 1 +peopl 1 +republ 1 +jose 1 +technolog 1 +system 1 +platform 1 +includ 1 +us 1 +softwar 1 +corpor 1 +memori 1 +welcom 0 +zhang 0 +first 0 +year 0 +graduat 0 +hometown 0 +shanghai 0 +china 0 +educ 0 +wisconsin 0 +madison 0 +state 0 +california 0 +tsinghua 0 +beij 0 +chinaemail 0 +weiz 0 +wisc 0 +eduwork 0 +experiencecontractor 0 +develop 0 +variou 0 +inform 0 +differ 0 +windowsnt 0 +solari 0 +tuxedo 0 +pathwai 0 +design 0 +tandem 0 +engin 0 +sherpa 0 +oper 0 +nasa 0 +am 0 +research 0 +center 0 +hobbiesma 0 +jiangbridg 0 +card 0 +game 0 +tabl 0 +tenni 0 +pingpong 0 +joggingth 0 +ultim 0 +challengesolv 0 +mine 0 +sweeper 0 +expert 0 +level 0 +puzzl 0 +within 0 +second 0 +without 0 +cheat 0 +quot 0 +dayth 0 +best 0 +ackowledgementthi 0 +written 0 +framework 0 +provid 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..7faf9fd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,62 @@ +home 1 +page 1 +kent 1 +wenger 1 +madison 1 +scienc 1 +wisconsin 1 +project 1 +work 1 +data 1 +welcom 0 +note 0 +definit 0 +still 0 +construct 0 +preparedfor 0 +pothol 0 +need 0 +pictur 0 +scan 0 +wengerassoci 0 +researchercomput 0 +departmentunivers 0 +west 0 +dayton 0 +streetmadison 0 +telephon 0 +email 0 +wisc 0 +edufing 0 +workth 0 +main 0 +arecod 0 +cluster 0 +provid 0 +anddevis 0 +explor 0 +andvisu 0 +come 0 +good 0 +acronym 0 +importantpart 0 +wouldn 0 +agre 0 +visualizationproduc 0 +devis 0 +softwar 0 +peopl 0 +yanni 0 +ioannidi 0 +miron 0 +livnyraghu 0 +ramakrishnanmor 0 +inform 0 +univers 0 +dbm 0 +research 0 +groupuw 0 +comput 0 +pagewiscinfo 0 +personallinksimageslast 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..fb31ef2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,117 @@ +wisconsin 2 +project 1 +comput 1 +tempest 1 +wind 1 +tunnel 1 +share 1 +interfac 1 +hardwar 1 +memori 1 +slide 1 +page 1 +like 1 +level 1 +compil 1 +softwar 1 +implement 1 +paradyn 1 +architectur 1 +home 0 +projectmost 0 +futur 0 +massiv 0 +parallel 0 +built 0 +fromworkst 0 +node 0 +program 0 +high 0 +parallellanguag 0 +support 0 +address 0 +space 0 +whichprocess 0 +uniformli 0 +refer 0 +data 0 +seek 0 +develop 0 +consensu 0 +aboutth 0 +middl 0 +languag 0 +abovesystem 0 +first 0 +propos 0 +wascoop 0 +evolutionari 0 +extens 0 +toconvent 0 +recent 0 +havebeen 0 +work 0 +revolutionari 0 +call 0 +provid 0 +mechan 0 +allow 0 +programm 0 +andprogram 0 +librari 0 +messag 0 +pass 0 +transparentshar 0 +hybrid 0 +combin 0 +developingimplement 0 +think 0 +machin 0 +cluster 0 +ofworkst 0 +wisconsincow 0 +hypothet 0 +platform 0 +approach 0 +cowus 0 +snoop 0 +logic 0 +fpga 0 +sram 0 +collaboratingwith 0 +adapt 0 +perform 0 +tool 0 +overviewand 0 +annot 0 +bibliographi 0 +overview 0 +talk 0 +novemb 0 +pageor 0 +four 0 +complet 0 +technic 0 +paper 0 +contributor 0 +fund 0 +sourc 0 +origin 0 +name 0 +week 0 +articl 0 +relat 0 +group 0 +scienc 0 +departmentat 0 +univers 0 +world 0 +wide 0 +inform 0 +last 0 +updat 0 +juli 0 +mark 0 +hill 0 +markhil 0 +wisc 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..b765d5d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,85 @@ +first 1 +televis 1 +felix 1 +star 1 +trick 1 +make 1 +finger 1 +xuelin 0 +home 0 +page 0 +charact 0 +creat 0 +otto 0 +messmer 0 +base 0 +anim 0 +human 0 +person 0 +featur 0 +save 0 +whichwa 0 +shown 0 +famou 0 +rival 0 +chaplin 0 +keaton 0 +princ 0 +wale 0 +pick 0 +polo 0 +team 0 +mascot 0 +pictur 0 +accompani 0 +charl 0 +lindbergh 0 +across 0 +theatlant 0 +statu 0 +imag 0 +successfulli 0 +transmit 0 +develop 0 +seri 0 +somehow 0 +obtain 0 +magic 0 +didn 0 +oneev 0 +seem 0 +agre 0 +whether 0 +teeth 0 +whisker 0 +like 0 +spend 0 +time 0 +film 0 +program 0 +appear 0 +newspap 0 +comic 0 +strip 0 +advertis 0 +hundr 0 +product 0 +thing 0 +remov 0 +tail 0 +ear 0 +put 0 +back 0 +wish 0 +could 0 +give 0 +account 0 +hairbal 0 +keyboard 0 +keeper 0 +instead 0 +sui 0 +vritabl 0 +chat 0 +pass 0 +partout 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..38c132bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,329 @@ +ioannidi 3 +proc 2 +system 2 +queri 2 +intern 2 +databas 2 +visual 2 +optim 2 +septemb 2 +confer 2 +complex 1 +histogram 1 +schema 1 +livni 1 +issu 1 +data 1 +ramakrishnan 1 +inform 1 +problem 1 +estim 1 +comput 1 +integr 1 +transact 1 +haber 1 +sigmod 1 +poosala 1 +vldbconfer 1 +manag 1 +scientif 1 +user 1 +interfac 1 +primarili 1 +altern 1 +parallel 1 +investig 1 +simul 1 +special 1 +schedul 1 +error 1 +size 1 +studi 1 +identifi 1 +tool 1 +decemb 1 +tod 1 +miller 1 +translat 1 +result 1 +august 1 +yanni 1 +heterogen 1 +research 1 +focus 1 +support 1 +futur 1 +sever 1 +number 1 +algorithm 1 +well 1 +valu 1 +queryoptim 1 +current 1 +solut 1 +also 1 +aris 1 +multimedia 1 +environ 1 +propag 1 +distribut 1 +disciplin 1 +gener 1 +need 1 +develop 1 +desktop 1 +scientist 1 +concentr 1 +metaphor 1 +object 1 +dynam 1 +base 1 +survei 1 +symposium 1 +march 1 +garofalaki 1 +ieee 1 +engin 1 +ofheterogen 1 +bridg 1 +theori 1 +practic 1 +join 1 +ondatabas 1 +bombai 1 +india 1 +balanc 1 +orient 1 +montreal 1 +canada 1 +workshop 1 +opossum 1 +tsatalo 1 +dublin 1 +ireland 1 +ioannidisyanni 0 +wisc 0 +eduresearch 0 +interestsdatabas 0 +andinform 0 +area 0 +scientificdata 0 +applic 0 +pose 0 +challeng 0 +toqueri 0 +ask 0 +significantli 0 +higher 0 +thanin 0 +tradit 0 +evalu 0 +much 0 +highera 0 +especi 0 +attempt 0 +tooptim 0 +time 0 +paramet 0 +parametr 0 +thu 0 +access 0 +plan 0 +process 0 +querywil 0 +extrem 0 +larg 0 +us 0 +algorithmsfor 0 +find 0 +optimum 0 +among 0 +inadequ 0 +random 0 +algorithmsa 0 +viabl 0 +interest 0 +anneal 0 +genet 0 +take 0 +advantag 0 +propertiesof 0 +look 0 +especiallythos 0 +cost 0 +alsopart 0 +try 0 +appropriateinform 0 +must 0 +maintain 0 +limit 0 +thepropag 0 +properti 0 +ofoptim 0 +approxim 0 +inrel 0 +attribut 0 +mode 0 +expect 0 +part 0 +manyexperi 0 +variou 0 +mani 0 +aspectsthat 0 +technolog 0 +readi 0 +provid 0 +involv 0 +experi 0 +managementenviron 0 +help 0 +throughout 0 +life 0 +cycl 0 +theirexperiment 0 +primari 0 +compon 0 +major 0 +work 0 +address 0 +andsemant 0 +former 0 +right 0 +arefor 0 +repres 0 +scientistsso 0 +natur 0 +power 0 +latter 0 +facilitatetransl 0 +differ 0 +format 0 +although 0 +experimentalscientif 0 +effort 0 +guid 0 +specificproject 0 +associ 0 +particular 0 +basedperform 0 +model 0 +plantgrowth 0 +spectroscopi 0 +sequenc 0 +microscop 0 +imag 0 +recent 0 +publicationsi 0 +issueon 0 +anniversari 0 +multimediasystem 0 +contain 0 +conjunct 0 +beyondrel 0 +set 0 +foundat 0 +forschema 0 +displai 0 +journal 0 +intellig 0 +juli 0 +tsangari 0 +design 0 +implement 0 +performanceevalu 0 +bermuda 0 +knowledg 0 +tkde 0 +februari 0 +januari 0 +christodoulaki 0 +limitingworst 0 +case 0 +winger 0 +transit 0 +closur 0 +algorithmsbas 0 +graph 0 +travers 0 +record 0 +divers 0 +databaseestim 0 +gupta 0 +ponnekanti 0 +experimentmanag 0 +vldb 0 +itsappl 0 +load 0 +anjur 0 +frog 0 +turtl 0 +bridgesbetween 0 +file 0 +conferenceon 0 +statist 0 +stockholm 0 +sweden 0 +june 0 +multi 0 +dimension 0 +resourc 0 +forparallel 0 +haa 0 +shekita 0 +improv 0 +forselect 0 +rang 0 +predic 0 +internationalacm 0 +layoutat 0 +multipl 0 +granular 0 +advancedvisu 0 +gubbio 0 +itali 0 +desk 0 +managementthrough 0 +customiz 0 +zurich 0 +switzerland 0 +practicalityfor 0 +sigmodconfer 0 +jose 0 +solomon 0 +gmap 0 +versatil 0 +forphys 0 +independ 0 +santiago 0 +chile 0 +unifi 0 +framework 0 +index 0 +databasesystem 0 +dexa 0 +athen 0 +greec 0 +lashkari 0 +incomplet 0 +path 0 +express 0 +theirdisambigu 0 +minneapoli 0 +flexibl 0 +schemavisu 0 +edit 0 +boston 0 +april 0 +edbt 0 +cambridg 0 +england 0 +univers 0 +serial 0 +internationalvldb 0 +capacityin 0 +wiener 0 +moos 0 +withdata 0 +program 0 +languag 0 +york 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..edfe4e60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin 0 +zhongbin 0 +homepag 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..5a7eaf19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,76 @@ +home 1 +good 1 +person 1 +cours 1 +time 1 +madison 1 +last 1 +yinng 0 +pageindexofyinongwei 0 +spagehi 0 +welcom 0 +homepag 0 +pleas 0 +look 0 +info 0 +especi 0 +employ 0 +give 0 +alsolink 0 +classmat 0 +take 0 +telephon 0 +work 0 +address 0 +offic 0 +comp 0 +stat 0 +bldg 0 +univ 0 +inforesumehobbiestravel 0 +usathi 0 +collect 0 +pictur 0 +took 0 +travel 0 +articl 0 +wrote 0 +trip 0 +chicago 0 +seattl 0 +pointersr 0 +computingmacin 0 +learningpattern 0 +recognitioncomputatin 0 +geometrydatabasevisionacadem 0 +diarythi 0 +diari 0 +everi 0 +month 0 +sometim 0 +amaz 0 +mani 0 +littl 0 +read 0 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 0 +linksmi 0 +beida 0 +classmatespek 0 +univers 0 +alumni 0 +page 0 +oversea 0 +chines 0 +organ 0 +ciumi 0 +bookmarkcom 0 +press 0 +client 0 +support 0 +send 0 +comment 0 +visitor 0 +number 0 +access 0 +modifi 0 +yinong 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..0ffa589c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,44 @@ +home 1 +madison 1 +wisc 1 +convuls 1 +sera 1 +food 1 +poor 1 +call 1 +matt 0 +pageuntil 0 +around 0 +updat 0 +basic 0 +inform 0 +offic 0 +matthew 0 +zeidenbergcent 0 +wisconsin 0 +strategi 0 +observatori 0 +drive 0 +room 0 +voic 0 +gilson 0 +email 0 +zeiden 0 +eduzeidenb 0 +eduwhen 0 +california 0 +parent 0 +hous 0 +coho 0 +huntington 0 +beach 0 +beauti 0 +breton 0 +nadja 0 +beaut 0 +give 0 +saint 0 +whyth 0 +communist 0 +helder 0 +camara 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..ce4781ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,161 @@ +zhang 3 +tian 2 +data 2 +larg 2 +research 1 +databas 1 +raghu 1 +ramakrishnan 1 +miron 1 +livni 1 +mine 1 +cluster 1 +proc 1 +robot 1 +analysi 1 +dataset 1 +effici 1 +densiti 1 +birch 1 +method 1 +joint 1 +artifici 1 +intellig 1 +comput 1 +algorithm 1 +applic 1 +journal 1 +june 1 +ling 1 +home 1 +prof 1 +scienc 1 +wisconsin 1 +madison 1 +telephon 1 +us 1 +pattern 1 +interest 1 +design 1 +multi 1 +estim 1 +recent 1 +system 1 +sigmod 1 +canada 1 +technic 1 +motion 1 +plan 1 +topolog 1 +confer 1 +jianwei 1 +educ 1 +symposium 1 +beij 1 +page 0 +gener 0 +inform 0 +student 0 +assistantadvisor 0 +major 0 +concentr 0 +compilerminor 0 +financi 0 +invest 0 +bankingoffic 0 +room 0 +dept 0 +univ 0 +mail 0 +wisc 0 +eduoffic 0 +depart 0 +intereststher 0 +grow 0 +need 0 +exploratori 0 +discov 0 +territori 0 +develop 0 +purpos 0 +ortool 0 +integr 0 +techniqu 0 +statist 0 +thesi 0 +topic 0 +densityanalysi 0 +given 0 +dimension 0 +limit 0 +amount 0 +resourc 0 +run 0 +time 0 +memori 0 +implement 0 +accur 0 +identifi 0 +spars 0 +crowd 0 +region 0 +function 0 +overal 0 +distribut 0 +import 0 +practic 0 +branch 0 +appli 0 +mani 0 +domain 0 +dataclassif 0 +imag 0 +compress 0 +recognit 0 +project 0 +select 0 +public 0 +submit 0 +knowledg 0 +discoveri 0 +conf 0 +manag 0 +interact 0 +classif 0 +workshop 0 +issu 0 +knowledgediscoveri 0 +cooper 0 +fast 0 +probabl 0 +kernel 0 +report 0 +juli 0 +dimensionreduct 0 +ijcai 0 +findpath 0 +manipul 0 +finit 0 +divis 0 +configur 0 +space 0 +manufactur 0 +trend 0 +andmanufactur 0 +dimens 0 +reduct 0 +technolog 0 +find 0 +collis 0 +free 0 +path 0 +mobil 0 +young 0 +profession 0 +relev 0 +link 0 +document 0 +organ 0 +china 0 +last 0 +updat 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..5e45d083 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,72 @@ +research 1 +data 1 +site 1 +server 1 +yihong 1 +zhao 1 +madison 1 +relat 1 +dbm 1 +mine 1 +financi 1 +pathfind 1 +daili 1 +new 1 +home 0 +page 0 +wisc 0 +assist 0 +depart 0 +comput 0 +sciencesunivers 0 +wisconsin 0 +west 0 +dayton 0 +streetmadison 0 +advis 0 +prof 0 +jeff 0 +naughton 0 +interest 0 +parallel 0 +object 0 +line 0 +analyt 0 +process 0 +olap 0 +benchmark 0 +educationb 0 +univers 0 +north 0 +carolina 0 +chapel 0 +hillm 0 +fall 0 +wiscosin 0 +group 0 +sigmod 0 +maryland 0 +datamin 0 +microstrategi 0 +rolap 0 +arbor 0 +molap 0 +stock 0 +lombard 0 +graph 0 +kiwi 0 +club 0 +todai 0 +monei 0 +chines 0 +taiwan 0 +search 0 +engin 0 +lyco 0 +excit 0 +yahoo 0 +surf 0 +ters 0 +detail 0 +comment 0 +pgmo 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..5dcc2294 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,19 @@ +home 1 +address 1 +madison 1 +phone 1 +offic 1 +wisc 1 +page 0 +still 0 +construct 0 +wang 0 +homepag 0 +offer 0 +inform 0 +johnson 0 +dayton 0 +street 0 +email 0 +zhewang 0 +student 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..7b9cae41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,102 @@ +comput 1 +perform 1 +languag 1 +area 1 +parallel 1 +system 1 +studi 1 +distribut 1 +zhichen 1 +scienc 1 +research 1 +confer 1 +interest 1 +program 1 +recent 1 +techniqu 1 +environ 1 +high 1 +softwar 1 +univers 1 +fudan 1 +compil 1 +version 1 +home 0 +page 0 +depart 0 +dayton 0 +madison 0 +offic 0 +phone 0 +assist 0 +advisor 0 +professor 0 +jame 0 +larusprofessor 0 +barton 0 +millerawardbest 0 +paper 0 +award 0 +intern 0 +supercomput 0 +press 0 +juli 0 +issu 0 +anddistribut 0 +detect 0 +eliminateperform 0 +bottleneck 0 +share 0 +memori 0 +combin 0 +paradyn 0 +toolwith 0 +blizzard 0 +wisconsinwind 0 +tunnel 0 +think 0 +machin 0 +andth 0 +cluster 0 +workstat 0 +public 0 +field 0 +interestprogram 0 +tool 0 +network 0 +oper 0 +architectur 0 +evalu 0 +benchmark 0 +place 0 +work 0 +laboratori 0 +texa 0 +antonio 0 +publish 0 +ofparallel 0 +predict 0 +model 0 +simul 0 +departmentat 0 +particip 0 +sever 0 +nation 0 +project 0 +china 0 +develop 0 +levelprogram 0 +object 0 +orient 0 +technolog 0 +andimcrement 0 +click 0 +postscript 0 +html 0 +link 0 +asplo 0 +programjourn 0 +researchchines 0 +novel 0 +friend 0 +java 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..064bcb88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,25 @@ +zhang 1 +univers 1 +wisconsin 1 +madison 1 +home 0 +page 0 +hello 0 +name 0 +pictur 0 +taken 0 +invit 0 +supper 0 +theth 0 +restaur 0 +tsinghua 0 +chen 0 +weihai 0 +wang 0 +tong 0 +depart 0 +comput 0 +scienc 0 +west 0 +dayton 0 +street 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..1a407af6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,13 @@ +inform 1 +krzysztof 0 +zmudzinskikrzysztof 0 +zmudzinskispin 0 +student 0 +inc 0 +pictur 0 +poland 0 +pole 0 +thank 0 +stop 0 +visitor 0 +number 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..5c82e253 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,131 @@ +test 2 +design 1 +circuit 1 +built 1 +self 1 +perform 1 +engin 1 +comput 1 +research 1 +testabl 1 +vlsi 1 +gener 1 +area 1 +system 1 +investig 1 +saluja 1 +kewal 1 +colleg 1 +mail 1 +engr 1 +wisc 1 +data 1 +compress 1 +fault 1 +digit 1 +make 1 +littl 1 +oper 1 +laboratori 1 +engineeringunivers 0 +wisconsin 0 +madison 0 +salujaprofessor 0 +hall 0 +drivemadison 0 +eduportrait 0 +jpgdepartmentselectr 0 +engineeringcomput 0 +scienc 0 +educ 0 +univers 0 +iowa 0 +interestsdesign 0 +architectur 0 +integr 0 +toler 0 +interest 0 +testableand 0 +reliabl 0 +carri 0 +thisarea 0 +extens 0 +analysi 0 +tool 0 +theresearch 0 +involv 0 +model 0 +testgener 0 +modif 0 +enhanc 0 +inself 0 +fabric 0 +applic 0 +techniqu 0 +andfault 0 +simul 0 +process 0 +effici 0 +combin 0 +andsequenti 0 +compact 0 +methodsapplic 0 +testenviron 0 +concentr 0 +regularstructur 0 +programm 0 +logic 0 +arrai 0 +ram 0 +areinvestig 0 +algorithm 0 +implement 0 +inhardwar 0 +penalti 0 +anoth 0 +projectw 0 +wai 0 +hardwar 0 +asystem 0 +normal 0 +goal 0 +thatth 0 +continu 0 +noimpact 0 +much 0 +work 0 +us 0 +facil 0 +digitalsystem 0 +hous 0 +number 0 +station 0 +withcolor 0 +monitor 0 +termin 0 +program 0 +dept 0 +center 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +fridai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +support 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..22bcbfd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,135 @@ +system 2 +control 2 +duffi 2 +manufactur 2 +comput 1 +wisconsin 1 +autom 1 +engin 1 +research 1 +center 1 +engr 1 +univers 1 +develop 1 +perform 1 +neil 1 +colleg 1 +mail 1 +wisc 1 +integr 1 +space 1 +sensor 1 +product 1 +machin 1 +robot 1 +distribut 1 +experiment 1 +feedback 1 +human 1 +telerobot 1 +work 1 +engineeringunivers 0 +madison 0 +professor 0 +build 0 +drivemadison 0 +eduportrait 0 +departmentsmechan 0 +engineeringeducationb 0 +madisonm 0 +madisonphd 0 +madisonresearch 0 +interestsrobot 0 +precis 0 +micromechanismscent 0 +consortiamanufactur 0 +programwisconsin 0 +roboticsprofessor 0 +involv 0 +actuat 0 +data 0 +base 0 +advanc 0 +self 0 +guid 0 +inspect 0 +weld 0 +high 0 +materi 0 +handl 0 +finish 0 +mold 0 +rework 0 +studi 0 +highli 0 +hierarch 0 +architectur 0 +hope 0 +reduc 0 +cost 0 +complex 0 +larg 0 +scale 0 +increas 0 +flexibl 0 +fault 0 +toler 0 +construct 0 +sever 0 +incorpor 0 +real 0 +time 0 +fulli 0 +schedul 0 +optim 0 +theori 0 +explain 0 +properti 0 +associ 0 +director 0 +nasa 0 +fund 0 +emphas 0 +agricultur 0 +tactil 0 +oper 0 +method 0 +evalu 0 +well 0 +factor 0 +sensori 0 +fatigu 0 +test 0 +carri 0 +close 0 +aerospac 0 +industri 0 +teach 0 +cours 0 +automat 0 +author 0 +process 0 +dept 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +tuesdai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +support 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..ef14049b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,203 @@ +model 2 +mechan 2 +engin 2 +design 2 +comput 2 +physic 2 +geometr 1 +system 1 +represent 1 +behavior 1 +research 1 +part 1 +shapiro 1 +wisc 1 +aid 1 +manufactur 1 +analysi 1 +specif 1 +vadim 1 +colleg 1 +professor 1 +univers 1 +mail 1 +engr 1 +cornel 1 +geometri 1 +solid 1 +simul 1 +award 1 +chain 1 +april 1 +function 1 +center 1 +artifact 1 +algorithm 1 +us 1 +current 1 +formal 1 +investig 1 +support 1 +form 1 +process 1 +combinatori 1 +competit 1 +systemat 1 +develop 1 +engineeringunivers 0 +wisconsin 0 +madison 0 +assist 0 +avenuemadison 0 +vshapiro 0 +eduportrait 0 +jpgurl 0 +http 0 +departmentscomput 0 +sciencemechan 0 +engineeringeducationba 0 +york 0 +universitym 0 +california 0 +angelesm 0 +universityphd 0 +univeristyresearch 0 +interestscomput 0 +appli 0 +product 0 +automationcent 0 +consortiamathemat 0 +graduat 0 +programmanufactur 0 +programspati 0 +autom 0 +laboratoryselect 0 +honorsn 0 +scienc 0 +foundat 0 +career 0 +gener 0 +motor 0 +fellow 0 +select 0 +public 0 +mainten 0 +space 0 +decomposit 0 +intern 0 +journal 0 +applic 0 +palmer 0 +real 0 +rigid 0 +separ 0 +boundari 0 +convers 0 +transact 0 +graphic 0 +januari 0 +vossler 0 +interest 0 +relationship 0 +betweengeometri 0 +phenomena 0 +bemodel 0 +repres 0 +analyz 0 +manipul 0 +manufacturedbas 0 +ongo 0 +project 0 +includ 0 +abil 0 +creat 0 +convert 0 +maintain 0 +consist 0 +ofdistinct 0 +major 0 +technologicalbarri 0 +undermin 0 +reliabl 0 +commercialgeometr 0 +effort 0 +focu 0 +eliminatingambigu 0 +commun 0 +ofparametr 0 +famili 0 +novel 0 +methodsand 0 +techniqu 0 +todai 0 +fabric 0 +cannot 0 +bedescrib 0 +term 0 +discret 0 +simpl 0 +interactingprimit 0 +appar 0 +lack 0 +structur 0 +amajor 0 +roadblock 0 +collabor 0 +industri 0 +present 0 +deal 0 +withtheoret 0 +practic 0 +aspect 0 +designand 0 +seek 0 +establish 0 +basi 0 +make 0 +andmanufactur 0 +smoothintegr 0 +activ 0 +contain 0 +inform 0 +need 0 +captur 0 +thedesir 0 +tomanufactur 0 +recent 0 +studi 0 +algebra 0 +topolog 0 +call 0 +suggest 0 +possibl 0 +tounifi 0 +thu 0 +facilit 0 +ofnew 0 +tool 0 +theseand 0 +languagesand 0 +physicalobject 0 +dept 0 +consortia 0 +servic 0 +fountain 0 +index 0 +search 0 +credit 0 +help 0 +last 0 +modifi 0 +thursdai 0 +cdtthi 0 +page 0 +best 0 +view 0 +browser 0 +tabl 0 +photograph 0 +address 0 +comment 0 +webmast 0 +eduupd 0 +profil 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..5e27811b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,107 @@ +prover 1 +theorem 1 +prove 1 +incomplet 1 +list 1 +mathemat 1 +tech 1 +autom 1 +groupth 1 +comput 1 +scienc 1 +method 1 +first 1 +order 1 +logic 1 +report 1 +student 1 +woodi 1 +bledso 1 +hine 1 +proof 1 +hein 1 +borel 1 +groupautom 0 +group 0 +part 0 +depart 0 +univers 0 +texa 0 +ataustin 0 +produc 0 +system 0 +intend 0 +higher 0 +intent 0 +appli 0 +systemsand 0 +problem 0 +primarili 0 +also 0 +computersci 0 +technolog 0 +herei 0 +index 0 +electron 0 +avail 0 +site 0 +seri 0 +continu 0 +current 0 +techreport 0 +ad 0 +reportseri 0 +present 0 +grouplarri 0 +hinesmarti 0 +mayberrybenjamin 0 +shultsalumniprevi 0 +previou 0 +robert 0 +boyer 0 +other 0 +relat 0 +late 0 +faculti 0 +profil 0 +robertboyerj 0 +strother 0 +moorethi 0 +past 0 +visitor 0 +collaboratorswhat 0 +done 0 +implyth 0 +natur 0 +deduct 0 +proverstrivelarri 0 +inequ 0 +struvelarri 0 +theori 0 +chou 0 +geometri 0 +proverand 0 +variou 0 +improv 0 +theretoinclud 0 +mcphee 0 +feng 0 +theoryimplement 0 +descript 0 +theoremprecondit 0 +proverbledso 0 +analog 0 +theoremnqthmboy 0 +andmoor 0 +develop 0 +clinc 0 +iprshult 0 +knowledg 0 +us 0 +relatedlinksdo 0 +feedback 0 +want 0 +inform 0 +contact 0 +benjamin 0 +shult 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..a34d0ea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,5 @@ +document 1 +moveddocu 0 +movedthi 0 +perman 0 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..a34d0ea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_log/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,5 @@ +document 1 +moveddocu 0 +movedthi 0 +perman 0 +move 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..8588ac7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +solut 7 +homework 5 +upson 3 +hall 3 +phone 3 +mail 3 +cornel 3 +offic 3 +hour 3 +oper 2 +assign 2 +prelim 2 +wednesdai 2 +thursdai 2 +budiu 2 +home 1 +pagec 1 +system 1 +program 1 +systemsc 1 +practicum 1 +systemkenneth 1 +birmanc 1 +new 1 +groupcours 1 +syllabuslectur 1 +note 1 +unix 1 +filesystem 1 +structur 1 +link 1 +static 1 +dynam 1 +taslili 1 +lili 1 +fridai 1 +cheng 1 +huang 1 +ychuang 1 +tuesdai 1 +mihai 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..93d149f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,45 @@ +oper 5 +system 3 +chip 3 +last 2 +chang 2 +handout 2 +format 2 +consol 2 +hoca 2 +home 1 +pagec 1 +program 1 +systemsc 1 +practicum 1 +systemsselect 1 +displai 1 +symbol 1 +correspond 1 +postcriptdocu 1 +hand 1 +phase 1 +hocacours 1 +inform 1 +cours 1 +schedul 1 +group 1 +postcript 1 +penn 1 +broccoli 1 +question 1 +answer 1 +comput 1 +window 1 +exampl 1 +us 1 +tutori 1 +principl 1 +configur 1 +fileth 1 +systemth 1 +specif 1 +page 1 +maintain 1 +lorenzo 1 +alvisi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..683d3a9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,240 @@ +homework 24 +chapter 18 +read 17 +databas 11 +tuesdai 10 +relat 9 +februari 9 +april 9 +inform 8 +cours 8 +thursdai 8 +grade 8 +model 7 +cover 6 +retriev 6 +page 6 +march 6 +system 5 +cornel 5 +availablethursdai 5 +relationship 4 +design 4 +class 4 +group 4 +name 4 +duetuesdai 4 +home 3 +entiti 3 +index 3 +queri 3 +crash 3 +recoveri 3 +concurr 3 +control 3 +part 3 +vector 3 +instructor 3 +worth 3 +final 3 +member 3 +januari 3 +fundament 2 +follow 2 +file 2 +space 2 +term 2 +time 2 +structur 2 +solut 2 +korth 2 +silberschatz 2 +second 2 +edit 2 +requir 2 +reserv 2 +upson 2 +offic 2 +hour 2 +appoint 2 +send 2 +mail 2 +aguilera 2 +amith 2 +work 2 +thegroup 2 +avail 2 +week 2 +last 2 +return 2 +first 2 +regrad 2 +introduct 2 +calculu 2 +optim 2 +prelim 2 +transact 2 +process 2 +retrievalthursdai 2 +advanc 2 +pagec 1 +retrievaldepart 1 +computersci 1 +universityspr 1 +gradeshav 1 +nice 1 +summer 1 +introductionthi 1 +three 1 +credit 1 +databasesystem 1 +roughli 1 +twothird 1 +third 1 +topic 1 +systemsinclud 1 +data 1 +physic 1 +organ 1 +hash 1 +languag 1 +queryoptim 1 +transactionprocess 1 +deal 1 +find 1 +usefulinform 1 +larg 1 +textual 1 +willcov 1 +invert 1 +smartsystem 1 +similar 1 +weight 1 +rank 1 +relevancefeedback 1 +phrase 1 +gener 1 +thesaurusconstruct 1 +evalu 1 +permit 1 +automatictext 1 +summar 1 +link 1 +materi 1 +note 1 +placetuesdai 1 +minut 1 +thurston 1 +prerequisitesc 1 +recommend 1 +booksdatabas 1 +concept 1 +mcgrawhil 1 +elmasri 1 +andnavath 1 +benjamin 1 +cum 1 +principl 1 +knowledg 1 +base 1 +byullman 1 +comput 1 +scienc 1 +press 1 +photocopiedmateri 1 +salton 1 +book 1 +research 1 +paper 1 +amitsingh 1 +singhal 1 +teach 1 +assist 1 +sophia 1 +georgiakaki 1 +wednesdai 1 +marco 1 +forc 1 +yamasani 1 +officehour 1 +gradingexam 1 +midterm 1 +ofyour 1 +exam 1 +yourfin 1 +five 1 +semest 1 +policiesy 1 +peopl 1 +clearli 1 +indic 1 +entir 1 +receiv 1 +samegrad 1 +tuesdayand 1 +along 1 +guid 1 +ofcours 1 +date 1 +throughth 1 +late 1 +accept 1 +illeg 1 +hard 1 +even 1 +though 1 +iti 1 +encourag 1 +type 1 +latexif 1 +possibl 1 +alreadi 1 +know 1 +goodopportun 1 +learn 1 +latex 1 +submissionpleas 1 +attach 1 +sort 1 +alphabet 1 +also 1 +write 1 +exampl 1 +bill 1 +clinton 1 +dole 1 +ross 1 +perot 1 +homeworksgrad 1 +sortedalphabet 1 +thecov 1 +list 1 +pagefollow 1 +want 1 +pleas 1 +sendmail 1 +policyal 1 +request 1 +submit 1 +inwrit 1 +within 1 +back 1 +schedulethi 1 +tent 1 +schedul 1 +referto 1 +algebra 1 +tupl 1 +domain 1 +integr 1 +constraint 1 +spring 1 +break 1 +modelhomework 1 +weightingthursdai 1 +indexinghomework 1 +evaluationtuesdai 1 +relev 1 +feedbackthursdai 1 +document 1 +clusteringhomework 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..e07faa82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,64 @@ +file 6 +page 3 +inform 3 +postscript 3 +enscript 3 +pfile 3 +home 2 +offic 2 +hour 2 +recit 2 +stuff 2 +convert 2 +text 2 +print 2 +look 1 +admin 1 +handout 1 +incl 1 +lectur 1 +note 1 +assign 1 +ethic 1 +profession 1 +social 1 +respons 1 +mayb 1 +electron 1 +submiss 1 +procedur 1 +group 1 +perform 1 +evalu 1 +resourc 1 +quot 1 +collect 1 +joke 1 +start 1 +submit 1 +sumedh 1 +break 1 +new 1 +misc 1 +peopl 1 +ask 1 +simpl 1 +unix 1 +program 1 +suggest 1 +wai 1 +first 1 +good 1 +sourc 1 +code 1 +second 1 +give 1 +nice 1 +header 1 +gener 1 +leav 1 +send 1 +printer 1 +come 1 +last 1 +modif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..9c809440 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,59 @@ +project 6 +distribut 4 +cours 4 +system 4 +work 3 +inform 3 +practicum 2 +practic 2 +design 2 +student 2 +rang 2 +systemsor 1 +hand 1 +dirti 1 +real 1 +aspect 1 +studi 1 +implement 1 +signific 1 +also 1 +take 1 +offersa 1 +varieti 1 +simpl 1 +internetworkingto 1 +complex 1 +teamsof 1 +person 1 +choos 1 +interest 1 +theywil 1 +trough 1 +semest 1 +credit 1 +hour 1 +earn 1 +depend 1 +size 1 +complexityof 1 +develop 1 +us 1 +offcial 1 +interact 1 +pageslink 1 +page 1 +find 1 +basic 1 +instruct 1 +descript 1 +plan 1 +progress 1 +report 1 +final 1 +present 1 +tabl 1 +contentspag 1 +comment 1 +werner 1 +vogel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..0d37698d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,94 @@ +avail 6 +problem 6 +solut 6 +cours 5 +page 3 +fall 3 +professor 3 +saluja 3 +sorin 3 +part 3 +mentor 3 +contain 2 +link 2 +engin 2 +kewal 2 +outlin 2 +project 2 +help 2 +tool 2 +refer 2 +email 2 +home 1 +intro 1 +comput 1 +architectur 1 +note 1 +inform 1 +electr 1 +andcomput 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +materi 1 +intend 1 +sole 1 +studentsenrol 1 +semest 1 +prof 1 +generalinform 1 +conduct 1 +midtermsyllabu 1 +midtermi 1 +specif 1 +homework 1 +assign 1 +valid 1 +theproject 1 +need 1 +graphic 1 +caeworkst 1 +pleas 1 +duedat 1 +follow 1 +literatur 1 +assist 1 +anyon 1 +whomai 1 +manual 1 +onlin 1 +throughbold_brows 1 +check 1 +gettingstart 1 +design 1 +architect 1 +train 1 +workbook 1 +get 1 +start 1 +quicksim 1 +trainingworkbook 1 +exersis 1 +addition 1 +thesedocu 1 +document 1 +click 1 +send 1 +surf 1 +homepag 1 +announc 1 +new 1 +group 1 +wiscinfo 1 +gopher 1 +site 1 +uwengin 1 +server 1 +file 1 +last 1 +modifi 1 +septemb 1 +pmcst 1 +question 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..ea2a25ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,43 @@ +info 4 +homework 3 +page 2 +cours 2 +handout 2 +syllabu 2 +exam 2 +grade 2 +tabl 2 +homepag 1 +fall 1 +run 1 +netscap 1 +click 1 +herelink 1 +individu 1 +frame 1 +gener 1 +stuff 1 +section 1 +offic 1 +hour 1 +motw 1 +download 1 +class 1 +requir 1 +criteria 1 +schedul 1 +link 1 +refer 1 +goofi 1 +stuffnot 1 +preced 1 +contain 1 +browser 1 +abl 1 +handl 1 +pleas 1 +email 1 +cornel 1 +edupag 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..5047fef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,43 @@ +interpret 3 +cours 3 +materi 3 +comput 2 +access 2 +dylan 2 +window 2 +chang 2 +home 1 +pagec 1 +structur 1 +program 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +fall 1 +requir 1 +user 1 +password 1 +request 1 +attempt 1 +info 1 +note 1 +browser 1 +includ 1 +netscap 1 +correctli 1 +check 1 +java 1 +class 1 +file 1 +thu 1 +noodll 1 +inconsist 1 +behavior 1 +work 1 +parter 1 +link 1 +directori 1 +partnerjoin 1 +util 1 +announc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..4f323253 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,359 @@ +cours 18 +assign 16 +problem 13 +comput 11 +program 11 +object 8 +set 8 +dylan 7 +site 7 +orient 6 +time 6 +exam 6 +data 5 +function 5 +languag 5 +evalu 5 +provid 5 +consult 5 +tuesdai 5 +work 5 +gener 4 +java 4 +us 4 +question 4 +lectur 4 +hour 4 +public 4 +avail 4 +late 4 +scienc 3 +cover 3 +broad 3 +abstract 3 +recurs 3 +topic 3 +student 3 +think 3 +reach 3 +best 3 +staff 3 +materi 3 +order 3 +ugrad 3 +schedul 3 +browser 3 +machin 3 +make 3 +final 3 +accept 3 +togeth 3 +date 3 +list 3 +introductori 2 +techniqu 2 +includ 2 +correct 2 +well 2 +rangeof 2 +take 2 +skill 2 +post 2 +cornel 2 +current 2 +conot 2 +handout 2 +need 2 +request 2 +user 2 +idand 2 +password 2 +system 2 +toth 2 +mondai 2 +also 2 +huttenloch 2 +upson 2 +tobia 2 +mayr 2 +hamblin 2 +szewczyk 2 +voskuhl 2 +thursdai 2 +wednesdai 2 +help 2 +even 2 +thu 2 +justin 2 +compil 2 +standalon 2 +version 2 +sure 2 +grade 2 +person 2 +someon 2 +must 2 +submit 2 +name 2 +facil 2 +exampl 2 +prelim 2 +model 2 +procedur 2 +process 2 +oper 2 +state 2 +mutabl 2 +queue 2 +stream 2 +infinit 2 +fall 1 +informationaugust 1 +rang 1 +computersci 1 +concept 1 +pattern 1 +match 1 +useth 1 +dynam 1 +developedat 1 +appl 1 +suit 1 +courseabout 1 +happen 1 +notationthat 1 +chosen 1 +write 1 +major 1 +goal 1 +ofth 1 +teach 1 +clearli 1 +programsand 1 +toolbox 1 +modern 1 +programmingtechniqu 1 +applic 1 +often 1 +wonder 1 +whether 1 +takec 1 +focus 1 +orientedlanguag 1 +wherea 1 +exposur 1 +number 1 +programmingparadigm 1 +imperativeprogram 1 +good 1 +background 1 +goodform 1 +mathemat 1 +physic 1 +probablytak 1 +transfer 1 +either 1 +direct 1 +encourag 1 +first 1 +week 1 +questionsor 1 +comment 1 +http 1 +info 1 +contain 1 +run 1 +serverwhich 1 +allow 1 +answersa 1 +annot 1 +access 1 +simpli 1 +home 1 +page 1 +follow 1 +instruct 1 +creat 1 +thisweek 1 +send 1 +email 1 +edubut 1 +ask 1 +aboutproblem 1 +professor 1 +upsonjam 1 +tarobert 1 +tajustin 1 +taandra 1 +ferencz 1 +melissa 1 +consultantwhen 1 +meetlectur 1 +kimbal 1 +andrecit 1 +recitationsexpand 1 +opportunityto 1 +held 1 +midnight 1 +eachproblem 1 +setsdu 1 +sundai 1 +mondayeven 1 +consultinghour 1 +inth 1 +offic 1 +jame 1 +robert 1 +voskuhltba 1 +materialsther 1 +textbook 1 +handoutsand 1 +note 1 +hardcopi 1 +andon 1 +interpret 1 +free 1 +develop 1 +class 1 +implementedin 1 +capabl 1 +chang 1 +almost 1 +weekli 1 +netscap 1 +window 1 +borland 1 +downloadonto 1 +want 1 +word 1 +warn 1 +download 1 +ontoyour 1 +recentvers 1 +check 1 +requirementsstud 1 +respons 1 +read 1 +recit 1 +therewil 1 +preliminari 1 +given 1 +combinationof 1 +written 1 +exercis 1 +gradeswil 1 +base 1 +combin 1 +score 1 +account 1 +approxim 1 +half 1 +thetot 1 +willgener 1 +immedi 1 +return 1 +followingclass 1 +period 1 +complet 1 +earli 1 +andth 1 +sittingdown 1 +matter 1 +mani 1 +long 1 +sink 1 +beforesit 1 +polici 1 +joint 1 +workmuch 1 +learn 1 +come 1 +programmingproblem 1 +jointli 1 +peopl 1 +howev 1 +youwork 1 +singl 1 +jointassign 1 +circumstancesmai 1 +hand 1 +done 1 +els 1 +yourown 1 +doubt 1 +credit 1 +yougot 1 +would 1 +amaz 1 +easi 1 +tell 1 +whenpeopl 1 +pleas 1 +lifeunpleas 1 +break 1 +rule 1 +facilitiescit 1 +variou 1 +colleg 1 +campu 1 +macintosh 1 +andpc 1 +on 1 +depart 1 +thiscours 1 +upsonmac 1 +datesal 1 +electron 1 +server 1 +mondaynight 1 +submityour 1 +solut 1 +outlin 1 +studi 1 +introduct 1 +substitut 1 +iter 1 +induct 1 +higher 1 +argument 1 +valu 1 +analysi 1 +algorithm 1 +growth 1 +structur 1 +contract 1 +implement 1 +hierarch 1 +tree 1 +quotat 1 +reason 1 +symbol 1 +differenti 1 +extend 1 +type 1 +dispatch 1 +polynomi 1 +arithmet 1 +environ 1 +local 1 +variabl 1 +inherit 1 +multimethod 1 +stack 1 +heap 1 +heapsort 1 +prioriti 1 +metacircular 1 +variat 1 +express 1 +optim 1 +nonloc 1 +exit 1 +catch 1 +throw 1 +garbag 1 +collect 1 +illus 1 +memori 1 +random 1 +quicksort 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..c1fdd58c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,88 @@ +cours 9 +materi 5 +inform 4 +homework 3 +conot 3 +page 2 +includ 2 +lectur 2 +project 2 +help 2 +annot 2 +allow 2 +send 2 +email 2 +note 2 +section 2 +date 2 +pleas 2 +process 2 +registr 2 +home 1 +introduct 1 +digit 1 +system 1 +comput 1 +organizationthorsten 1 +eickenfal 1 +kimbal 1 +btopic 1 +represent 1 +machin 1 +assembl 1 +languag 1 +processor 1 +organ 1 +interrupt 1 +memori 1 +hierarchi 1 +combinatori 1 +sequenti 1 +circuit 1 +data 1 +path 1 +control 1 +unit 1 +design 1 +microprogram 1 +helpif 1 +problem 1 +relat 1 +best 1 +appropri 1 +point 1 +staff 1 +also 1 +class 1 +mate 1 +otherwis 1 +cornel 1 +talk 1 +consult 1 +materialsal 1 +separ 1 +server 1 +want 1 +bookmark 1 +us 1 +document 1 +file 1 +check 1 +account 1 +request 1 +post 1 +saturdai 1 +encount 1 +difficulti 1 +read 1 +follow 1 +hidden 1 +instruct 1 +sign 1 +start 1 +mondai 1 +listlist 1 +made 1 +maintain 1 +thorsten 1 +eicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..f98e3999 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,69 @@ +file 6 +page 3 +inform 3 +postscript 3 +enscript 3 +pfile 3 +home 2 +offic 2 +hour 2 +recit 2 +stuff 2 +convert 2 +text 2 +print 2 +look 1 +admin 1 +handout 1 +incl 1 +lectur 1 +note 1 +assign 1 +ethic 1 +profession 1 +social 1 +respons 1 +mayb 1 +electron 1 +submiss 1 +procedur 1 +group 1 +perform 1 +evalu 1 +resourc 1 +quot 1 +collect 1 +joke 1 +start 1 +submit 1 +sumedh 1 +break 1 +new 1 +mondai 1 +held 1 +csuglab 1 +floor 1 +upson 1 +misc 1 +peopl 1 +ask 1 +simpl 1 +unix 1 +program 1 +suggest 1 +wai 1 +first 1 +good 1 +sourc 1 +code 1 +second 1 +give 1 +nice 1 +header 1 +gener 1 +leav 1 +send 1 +printer 1 +come 1 +last 1 +modif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..6155e68a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,42 @@ +solut 7 +assign 6 +upson 3 +hall 3 +phone 3 +mail 3 +cornel 3 +offic 3 +hour 3 +oper 2 +prelim 2 +wednesdai 2 +thursdai 2 +budiu 2 +home 1 +pagec 1 +system 1 +program 1 +systemsc 1 +practicum 1 +systemkenneth 1 +birmanc 1 +new 1 +groupcours 1 +syllabuslectur 1 +note 1 +unix 1 +filesystem 1 +structur 1 +link 1 +static 1 +dynam 1 +taslili 1 +lili 1 +fridai 1 +cheng 1 +huang 1 +ychuang 1 +tuesdai 1 +mihai 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..c5e2487d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,135 @@ +home 5 +clair 5 +program 5 +solut 5 +homework 5 +kevin 5 +assign 5 +page 5 +cours 4 +avail 4 +code 3 +scott 3 +file 3 +depart 2 +inform 2 +materi 2 +pleas 2 +mondai 2 +class 2 +oper 2 +still 2 +us 2 +find 2 +bind 2 +problem 2 +offer 2 +time 2 +fact 2 +dodg 2 +postscript 2 +pagecsfound 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +fall 1 +welcom 1 +academ 1 +integr 1 +read 1 +announc 1 +date 1 +move 1 +atth 1 +begin 1 +notethat 1 +rubix 1 +need 1 +load 1 +thefunct 1 +rearrang 1 +slightli 1 +also 1 +variabl 1 +previous 1 +defin 1 +constant 1 +chang 1 +paramet 1 +appeas 1 +compil 1 +var 1 +bracket 1 +youus 1 +machinesshould 1 +netscap 1 +sbin 1 +found 1 +ksaunder 1 +sbinfor 1 +account 1 +seriou 1 +gremlin 1 +codefor 1 +abl 1 +look 1 +andget 1 +start 1 +planner 1 +success 1 +uponcomplet 1 +appli 1 +schema 1 +meanwhil 1 +hunt 1 +thoseus 1 +machin 1 +shouldn 1 +special 1 +limit 1 +concern 1 +get 1 +zeroon 1 +third 1 +worri 1 +longer 1 +complet 1 +asspecifi 1 +posit 1 +grade 1 +result 1 +guarante 1 +novemb 1 +oneassign 1 +group 1 +clarif 1 +unless 1 +otherwis 1 +specifi 1 +assum 1 +either 1 +system 1 +add 1 +vanto 1 +queri 1 +alreadi 1 +exist 1 +document 1 +modifi 1 +includ 1 +thisclarif 1 +newhomework 1 +coursemateri 1 +midterm 1 +portion 1 +statu 1 +report 1 +tuesdai 1 +thec 1 +section 1 +remind 1 +right 1 +pagesc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..71e62cf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,34 @@ +homework 4 +cours 3 +note 3 +fall 2 +offic 2 +prelim 2 +home 1 +pagec 1 +automata 1 +comput 1 +theorywelcom 1 +click 1 +inform 1 +lectur 1 +exam 1 +studi 1 +guideannounc 1 +avail 1 +hardcopi 1 +set 1 +hour 1 +incorrect 1 +date 1 +revis 1 +erratum 1 +chang 1 +room 1 +nikolai 1 +hourscod 1 +academ 1 +integr 1 +pleas 1 +read 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..b7e07e93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,403 @@ +project 26 +databas 19 +system 19 +exam 8 +refer 8 +paper 7 +cours 7 +develop 7 +survei 6 +prelim 6 +list 6 +materi 6 +dbm 6 +work 6 +us 6 +code 6 +minibas 6 +final 5 +softwar 5 +also 5 +amount 5 +research 5 +term 5 +detail 5 +compon 5 +predat 5 +upson 4 +propos 4 +note 4 +textbook 4 +student 4 +background 4 +program 4 +data 4 +concept 4 +introductori 4 +topic 4 +involv 4 +write 4 +part 4 +process 4 +level 4 +follow 4 +transact 4 +offic 4 +advanc 3 +complet 3 +lectur 3 +mail 3 +inform 3 +grade 3 +possibl 3 +mani 3 +becom 3 +fundament 3 +class 3 +click 3 +herefor 3 +tent 3 +specif 3 +assign 3 +import 3 +base 3 +second 3 +provid 3 +queri 3 +need 3 +area 3 +text 3 +edit 3 +hour 3 +time 2 +tuesdai 2 +thursdai 2 +place 2 +result 2 +stat 2 +outsidefirewal 2 +descript 2 +fall 2 +design 2 +piec 2 +engin 2 +access 2 +form 2 +want 2 +larg 2 +number 2 +cover 2 +consequ 2 +discuss 2 +basic 2 +taught 2 +midterm 2 +test 2 +librari 2 +take 2 +turn 2 +depend 2 +person 2 +hopefulli 2 +choos 2 +whether 2 +goal 2 +comfort 2 +modular 2 +prototyp 2 +associ 2 +manag 2 +avail 2 +interfac 2 +could 2 +function 2 +neither 2 +lower 2 +convent 2 +page 2 +stonebrak 2 +morgan 2 +kaufmann 2 +phone 2 +weitsang 2 +evalu 1 +content 1 +schedul 1 +samplequest 1 +answer 1 +info 1 +predatordbm 1 +currentproject 1 +handout 1 +archiv 1 +prerequisit 1 +professor 1 +teach 1 +assist 1 +coursedescript 1 +offer 1 +first 1 +intendedto 1 +give 1 +solid 1 +databasemanag 1 +world 1 +slargest 1 +certainli 1 +among 1 +valuabl 1 +piecesof 1 +sens 1 +giant 1 +applic 1 +surprisingli 1 +principl 1 +behind 1 +industri 1 +grow 1 +thrive 1 +demand 1 +knowledgeabledatabas 1 +much 1 +greater 1 +suppli 1 +researchcommun 1 +activ 1 +alwai 1 +problem 1 +addressedbecaus 1 +explos 1 +peopl 1 +wish 1 +thiscours 1 +essenti 1 +anyon 1 +asystem 1 +compani 1 +informedus 1 +domain 1 +manipul 1 +find 1 +teller 1 +machin 1 +realli 1 +although 1 +intend 1 +newcours 1 +differ 1 +variou 1 +begin 1 +quickreview 1 +workload 1 +examin 1 +abreadth 1 +advancedtop 1 +thepurpos 1 +awar 1 +coursei 1 +fraction 1 +three 1 +weeksaft 1 +requireread 1 +journal 1 +confer 1 +proceed 1 +engineeringlibrari 1 +suggest 1 +initi 1 +pursueaddit 1 +forinform 1 +look 1 +written 1 +homework 1 +enrol 1 +mean 1 +twice 1 +semest 1 +addit 1 +around 1 +examtim 1 +developmentproject 1 +involvea 1 +signific 1 +wishto 1 +alon 1 +team 1 +howev 1 +willinvolv 1 +proportion 1 +fold 1 +hand 1 +experi 1 +build 1 +exist 1 +andmodifi 1 +manner 1 +thefirst 1 +huge 1 +andrar 1 +luxuri 1 +start 1 +scratch 1 +forc 1 +youto 1 +understand 1 +interact 1 +thediffer 1 +inevit 1 +bug 1 +appear 1 +simpl 1 +singl 1 +user 1 +fromth 1 +parser 1 +disk 1 +abl 1 +compil 1 +comput 1 +environ 1 +varioussystem 1 +actual 1 +come 1 +like 1 +buffermanag 1 +enginethat 1 +possibleproject 1 +lead 1 +builton 1 +focu 1 +high 1 +likecomplex 1 +type 1 +familiar 1 +recommend 1 +becauseth 1 +minim 1 +think 1 +interestedin 1 +ifyou 1 +categori 1 +decid 1 +aproject 1 +storag 1 +buffer 1 +thehigh 1 +optim 1 +betweenminibas 1 +higher 1 +somegener 1 +ideaon 1 +suitabl 1 +talk 1 +well 1 +advanceof 1 +date 1 +certain 1 +step 1 +submitan 1 +order 1 +produc 1 +review 1 +meet 1 +discussth 1 +progress 1 +made 1 +toward 1 +must 1 +particularsystem 1 +documentwil 1 +close 1 +someth 1 +thati 1 +picki 1 +contribut 1 +geton 1 +submiss 1 +includ 1 +demo 1 +reason 1 +oftest 1 +home 1 +homepag 1 +coursetextbook 1 +primari 1 +beta 1 +book 1 +raghu 1 +ramakrishnan 1 +bookcontain 1 +databasebook 1 +free 1 +instruct 1 +databasesystem 1 +might 1 +thecampu 1 +store 1 +korth 1 +silberschatz 1 +mcgraw 1 +hill 1 +secondedit 1 +standard 1 +lack 1 +tobe 1 +graduat 1 +michael 1 +read 1 +collect 1 +rel 1 +recent 1 +collectedand 1 +introduc 1 +ingr 1 +postgr 1 +andillustra 1 +corearea 1 +elmasri 1 +navath 1 +benjamin 1 +cum 1 +altern 1 +grai 1 +reuter 1 +techniqu 1 +bibl 1 +long 1 +tellsyou 1 +know 1 +wonderfulrefer 1 +clear 1 +confus 1 +aspect 1 +concurr 1 +control 1 +recoveri 1 +semant 1 +resourc 1 +tutori 1 +languag 1 +construct 1 +debuggingwith 1 +make 1 +gradingpolici 1 +percentag 1 +even 1 +finish 1 +anextra 1 +half 1 +likewis 1 +thefin 1 +period 1 +thur 1 +confirm 1 +willfocu 1 +coveredin 1 +earlier 1 +question 1 +professorpraveen 1 +seshadri 1 +praveen 1 +teachingassist 1 +hall 1 +noon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..21ff5f0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,183 @@ +program 11 +languag 11 +semant 6 +cours 6 +comput 6 +notat 5 +student 5 +descript 4 +upson 4 +studi 4 +inform 4 +properti 3 +logic 3 +cornel 3 +lectur 2 +link 2 +like 2 +implement 2 +mechan 2 +describ 2 +tool 2 +prove 2 +specif 2 +well 2 +work 2 +side 2 +assum 2 +experi 2 +knowledg 2 +function 2 +basic 2 +undergradu 2 +instructor 2 +class 2 +offic 2 +hour 2 +refer 2 +environ 2 +document 2 +avail 2 +content 1 +text 1 +prerequisiteshandoutsscrib 1 +noteshomework 1 +assignmentscontact 1 +informationrelev 1 +though 1 +call 1 +advanc 1 +book 1 +better 1 +entitl 1 +goalof 1 +conduct 1 +broad 1 +survei 1 +tech 1 +programminglanguag 1 +java 1 +directli 1 +compress 1 +dispatch 1 +tabl 1 +multipleinherit 1 +rather 1 +goal 1 +principlesof 1 +formal 1 +analyz 1 +concern 1 +subsum 1 +thestudi 1 +henc 1 +lead 1 +deeper 1 +understand 1 +mathemat 1 +proof 1 +theori 1 +exampl 1 +abstractli 1 +specifi 1 +howprogram 1 +oper 1 +asnot 1 +denot 1 +turn 1 +abstract 1 +preciser 1 +allow 1 +techniqu 1 +induct 1 +relat 1 +forform 1 +interest 1 +relev 1 +type 1 +safeti 1 +compil 1 +correct 1 +ideal 1 +come 1 +learn 1 +somethingabout 1 +make 1 +concept 1 +precis 1 +tomanipul 1 +demonstr 1 +us 1 +textbook 1 +carl 1 +gunter 1 +programm 1 +second 1 +edit 1 +larri 1 +paulson 1 +prerequisit 1 +least 1 +pascal 1 +prefer 1 +witha 1 +scheme 1 +haskel 1 +theoret 1 +profici 1 +undergraduatemathemat 1 +scienc 1 +ture 1 +machin 1 +recurs 1 +andlog 1 +predic 1 +calculu 1 +mathematicalmatur 1 +requir 1 +design 1 +math 1 +meng 1 +anmeng 1 +must 1 +talk 1 +find 1 +ifth 1 +suitabl 1 +contact 1 +newsgroup 1 +greg 1 +morrisett 1 +appoint 1 +admin 1 +assist 1 +linda 1 +competillo 1 +lfar 1 +erlingsson 1 +ulfar 1 +pmrelev 1 +mark 1 +leon 1 +resourc 1 +research 1 +emac 1 +mode 1 +comint 1 +need 1 +project 1 +line 1 +standard 1 +postscript 1 +user 1 +guid 1 +base 1 +system 1 +librari 1 +indexdocument 1 +toolsa 1 +gentl 1 +introduct 1 +andrew 1 +cum 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..d5d889bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,29 @@ +cours 3 +page 3 +cornel 2 +project 2 +newsgroup 2 +home 1 +pagecsmultimedia 1 +systemscomput 1 +scienc 1 +depart 1 +univers 1 +fall 1 +final 1 +present 1 +schedul 1 +staff 1 +info 1 +materi 1 +student 1 +us 1 +link 1 +anounc 1 +access 1 +rivl 1 +bugcom 1 +question 1 +send 1 +mail 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..0d5926fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,26 @@ +cours 3 +page 3 +cornel 2 +newsgroup 2 +home 1 +pagecsmultimedia 1 +systemscomput 1 +scienc 1 +depart 1 +univers 1 +fall 1 +staff 1 +info 1 +materi 1 +student 1 +project 1 +us 1 +link 1 +anounc 1 +access 1 +rivl 1 +bugcom 1 +question 1 +send 1 +mail 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..e69a0360 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,44 @@ +homework 10 +last 9 +modifi 9 +solut 8 +addendum 3 +design 2 +analysi 2 +algorithm 2 +evan 2 +time 2 +text 2 +kozen 2 +announc 2 +note 2 +homepag 1 +instructor 1 +ronitt 1 +rubinfeld 1 +moran 1 +locat 1 +upson 1 +springer 1 +verlag 1 +handout 1 +cours 1 +syllabu 1 +copi 1 +exam 1 +thursdai 1 +inupson 1 +talk 1 +tome 1 +reschedul 1 +cannot 1 +make 1 +refer 1 +cheat 1 +sheet 1 +class 1 +rajeev 1 +motwani 1 +lectur 1 +approxim 1 +paper 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..e8c29691 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,146 @@ +parallel 9 +system 9 +machin 7 +cours 6 +program 4 +languag 3 +architectur 3 +support 3 +manufactur 2 +offer 2 +debat 2 +consider 2 +perform 2 +price 2 +processor 2 +workstat 2 +level 2 +larg 2 +number 2 +issu 2 +topic 2 +first 2 +algorithm 2 +focu 2 +oper 2 +requir 2 +memori 2 +part 2 +studi 2 +model 2 +home 1 +pagefronti 1 +thorsten 1 +eickenfal 1 +locat 1 +upson 1 +pmoffic 1 +hour 1 +pmcours 1 +descriptionparallel 1 +stai 1 +underscor 1 +fact 1 +multiprocessor 1 +product 1 +line 1 +howev 1 +futur 1 +look 1 +like 1 +heat 1 +past 1 +month 1 +feder 1 +spend 1 +cut 1 +erad 1 +massiv 1 +competitor 1 +glorifi 1 +farm 1 +smile 1 +cannot 1 +eas 1 +busi 1 +suffer 1 +much 1 +competit 1 +leverag 1 +latest 1 +microprocessor 1 +develop 1 +quickli 1 +core 1 +technolog 1 +integr 1 +shelf 1 +cost 1 +effect 1 +easili 1 +high 1 +host 1 +vari 1 +applic 1 +workload 1 +although 1 +week 1 +matur 1 +last 1 +year 1 +point 1 +almost 1 +usabl 1 +adequ 1 +allow 1 +gener 1 +purpos 1 +aspect 1 +featur 1 +taken 1 +grant 1 +sequenti 1 +comput 1 +portabl 1 +power 1 +debugg 1 +multi 1 +user 1 +access 1 +virtual 1 +fast 1 +examin 1 +complet 1 +split 1 +network 1 +us 1 +vertic 1 +approach 1 +interact 1 +associ 1 +execut 1 +hardwar 1 +implement 1 +focuss 1 +layer 1 +second 1 +specif 1 +slice 1 +horizont 1 +across 1 +select 1 +analysi 1 +design 1 +altern 1 +depth 1 +dash 1 +provid 1 +share 1 +contrast 1 +materialscours 1 +formatlectur 1 +note 1 +problem 1 +set 1 +term 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..27fd4c38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,116 @@ +cours 4 +us 4 +lectur 3 +annot 3 +tutori 3 +page 3 +organ 2 +thorsten 2 +languag 2 +materi 2 +allow 2 +also 2 +note 2 +conot 2 +document 2 +start 2 +introduct 2 +marshal 2 +program 2 +exampl 2 +onlin 2 +question 2 +home 1 +pageintroduct 1 +digit 1 +system 1 +comput 1 +eickenfal 1 +kimbal 1 +btopic 1 +includ 1 +representationof 1 +inform 1 +machin 1 +assembl 1 +processor 1 +interrupt 1 +memori 1 +hierarchi 1 +combinatori 1 +sequentialcircuit 1 +data 1 +path 1 +control 1 +unit 1 +design 1 +andmicroprogram 1 +helpif 1 +problem 1 +relat 1 +homework 1 +aproject 1 +best 1 +help 1 +theappropri 1 +point 1 +gethelp 1 +staff 1 +class 1 +mate 1 +otherwis 1 +send 1 +email 1 +cornel 1 +talk 1 +toon 1 +consult 1 +informationcoursemateri 1 +announcementsannounc 1 +video 1 +assign 1 +part 1 +small 1 +get 1 +avail 1 +case 1 +never 1 +onlinean 1 +brain 1 +great 1 +forpeopl 1 +know 1 +procedur 1 +like 1 +pascal 1 +fortran 1 +david 1 +cclass 1 +cardiff 1 +univers 1 +lot 1 +learnc 1 +todai 1 +guid 1 +book 1 +programsand 1 +refer 1 +well 1 +theyahoo 1 +wish 1 +surf 1 +search 1 +ofmor 1 +place 1 +frequent 1 +ask 1 +inansw 1 +common 1 +come 1 +learn 1 +contain 1 +link 1 +sever 1 +newsgroup 1 +maintain 1 +voneicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..66fde017 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,36 @@ +home 3 +cours 3 +depart 2 +clair 2 +final 2 +mail 2 +upson 2 +pagecsfound 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +cornel 1 +univers 1 +fall 1 +welcom 1 +inform 1 +materi 1 +code 1 +academ 1 +integr 1 +pleas 1 +read 1 +announc 1 +grade 1 +avail 1 +sometim 1 +saturdai 1 +send 1 +yourgrad 1 +request 1 +exam 1 +altern 1 +date 1 +pagesc 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..8c1ab951 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,43 @@ +cornel 5 +cours 4 +offic 4 +hour 4 +upson 3 +samuel 2 +weber 2 +buch 2 +overview 2 +note 2 +home 1 +pagec 1 +softwar 1 +engin 1 +technolog 1 +techniquescomput 1 +scienc 1 +depart 1 +univers 1 +fall 1 +staff 1 +professor 1 +vineet 1 +thursdai 1 +yaron 1 +minski 1 +none 1 +materi 1 +handout 1 +lectur 1 +recit 1 +line 1 +resourc 1 +assign 1 +grade 1 +remark 1 +stuff 1 +frequent 1 +ask 1 +question 1 +borland 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..63053320 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,31 @@ +assign 5 +project 3 +home 2 +page 2 +introduct 2 +brian 1 +smith 1 +tour 1 +guid 1 +cours 1 +inform 1 +homework 1 +spec 1 +lectur 1 +tabl 1 +content 1 +postscript 1 +slide 1 +comput 1 +system 1 +organ 1 +program 1 +procedur 1 +recurs 1 +stack 1 +assembl 1 +linker 1 +loader 1 +interrupt 1 +logic 1 +design 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..8171acd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,43 @@ +program 8 +prelim 4 +april 4 +home 3 +page 3 +spring 3 +cours 3 +inform 2 +exam 2 +tue 2 +messag 1 +welcom 1 +rememb 1 +check 1 +frequentlyfor 1 +import 1 +regard 1 +tuesdai 1 +review 1 +session 1 +held 1 +onsundai 1 +baker 1 +instructor 1 +teach 1 +assist 1 +offic 1 +hour 1 +get 1 +materi 1 +theworld 1 +wide 1 +codewarrior 1 +personalmac 1 +lectur 1 +februari 1 +thur 1 +march 1 +final 1 +last 1 +updat 1 +pierc 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..a9d4f627 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,260 @@ +lectur 87 +class 22 +upson 15 +program 13 +gofer 10 +assign 10 +handout 9 +link 9 +cornel 8 +correct 8 +list 8 +offic 7 +hour 7 +algorithm 6 +loop 6 +spring 5 +email 5 +jeff 5 +cours 5 +server 5 +structur 5 +none 5 +avail 5 +comput 4 +foster 4 +final 4 +consult 4 +introduct 4 +recurs 4 +data 4 +analysi 4 +david 4 +file 4 +text 4 +preliminari 4 +macgof 4 +page 3 +scienc 3 +depart 3 +jfoster 3 +prelim 3 +thursdai 3 +topic 3 +note 3 +hollist 3 +format 3 +binhqx 3 +date 3 +basic 3 +pointer 3 +arrai 3 +dynam 3 +storag 3 +alloc 3 +dynamicdata 3 +fine 3 +point 3 +deriv 3 +notat 3 +tripl 3 +condit 3 +prove 3 +function 3 +type 3 +curri 3 +filter 3 +iter 3 +applic 3 +architectur 3 +framework 3 +binari 3 +tree 3 +industri 3 +strength 3 +java 3 +home 2 +view 2 +cover 2 +tuesdai 2 +exam 2 +mondai 2 +olin 2 +last 2 +jose 2 +thank 2 +descript 2 +staff 2 +code 2 +sampl 2 +credit 2 +languag 2 +kwan 2 +walker 2 +section 2 +ahal 2 +walkerwednesdai 2 +kaykylesteveericvasantha 2 +danerickaychrisdan 2 +microsoft 2 +word 2 +rich 2 +plain 2 +stuffit 2 +expand 2 +system 2 +find 2 +codewarrior 2 +site 2 +univers 1 +question 1 +problem 1 +troubl 1 +tabl 1 +us 1 +earlyvers 1 +netscap 1 +contest 1 +first 1 +march 1 +announcetim 1 +place 1 +soon 1 +theprelim 1 +second 1 +april 1 +room 1 +wereannounc 1 +thesecond 1 +noon 1 +lastnam 1 +name 1 +covereveryth 1 +addit 1 +topicsconv 1 +daywhenwherewhomondai 1 +davetuesdai 1 +jeffwednesdai 1 +davethursdai 1 +halfridai 1 +halsaturdai 1 +breview 1 +session 1 +chri 1 +chrisand 1 +take 1 +time 1 +studi 1 +enhanc 1 +also 1 +engrd 1 +fall 1 +summer 1 +grant 1 +bothcom 1 +prerequisit 1 +equival 1 +programmingexperi 1 +intermedi 1 +high 1 +level 1 +tocomput 1 +includ 1 +develop 1 +proof 1 +ofprogram 1 +abstract 1 +datatyp 1 +object 1 +orient 1 +ofalgorithm 1 +princip 1 +instructor 1 +perkin 1 +appoint 1 +teach 1 +assist 1 +held 1 +alan 1 +sectionsdaytimeroominstructortuesdai 1 +perkinstuesdai 1 +perkinswednesdai 1 +walkerthursdai 1 +fosterfridai 1 +regular 1 +schedul 1 +effect 1 +ofclass 1 +sundai 1 +fridai 1 +consultingsundaymondaytuesdaywednesdaythursdayfridai 1 +steveerickylechrisjpkyl 1 +steveerickylechrisjpvasantha 1 +josejosekayjosejpvasantha 1 +josejosekayjosejp 1 +three 1 +macbinari 1 +contain 1 +parseabl 1 +other 1 +next 1 +waspost 1 +given 1 +macintosh 1 +process 1 +http 1 +address 1 +foraladdin 1 +window 1 +version 1 +armandonunez 1 +postscript 1 +print 1 +almost 1 +anylas 1 +printer 1 +want 1 +need 1 +applicationlik 1 +ghostview 1 +intro 1 +cell 1 +complex 1 +set 1 +charact 1 +simpl 1 +dictionari 1 +flavor 1 +inth 1 +public 1 +lab 1 +unix 1 +ishaskel 1 +systemsz 1 +yale 1 +haskel 1 +pleas 1 +help 1 +piec 1 +ofgof 1 +think 1 +gener 1 +interest 1 +know 1 +make 1 +itavail 1 +sourc 1 +manual 1 +onth 1 +project 1 +line 1 +csdepart 1 +enhance_assign 1 +cuinfo 1 +metrowerk 1 +homepag 1 +aladdin 1 +maker 1 +comment 1 +suggest 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..fbe72222 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,32 @@ +cours 3 +announc 2 +home 1 +pagec 1 +structur 1 +interpretationof 1 +comput 1 +programscomput 1 +scienc 1 +departmentcornel 1 +universityspr 1 +staff 1 +info 1 +materi 1 +emac 1 +macmarlai 1 +demo 1 +section 1 +room 1 +chang 1 +prelim 1 +time 1 +place 1 +make 1 +grader 1 +happi 1 +gener 1 +exam 1 +extens 1 +date 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..af58907b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,92 @@ +languag 6 +offic 4 +implement 3 +descript 3 +upson 3 +email 3 +advanc 2 +link 2 +modern 2 +program 2 +featur 2 +object 2 +class 2 +handout 2 +cornel 2 +phone 2 +hour 2 +evan 2 +inform 2 +refer 2 +environ 2 +document 2 +avail 2 +content 1 +descriptionhandoutsadministriviaweb 1 +java 1 +haskel 1 +dylan 1 +provid 1 +high 1 +level 1 +closur 1 +polymorph 1 +abstract 1 +data 1 +type 1 +garbag 1 +collect 1 +except 1 +continu 1 +thread 1 +synchron 1 +construct 1 +survei 1 +techniqu 1 +effici 1 +implementationof 1 +focu 1 +ofmodern 1 +function 1 +make 1 +connectionsto 1 +kind 1 +notabl 1 +orient 1 +separ 1 +page 1 +administrivia 1 +instructor 1 +greg 1 +morrisett 1 +admin 1 +assist 1 +linda 1 +competillo 1 +send 1 +appoint 1 +moran 1 +tuesdai 1 +thursdai 1 +pmweb 1 +mark 1 +leon 1 +resourc 1 +research 1 +project 1 +line 1 +standard 1 +postscript 1 +user 1 +guid 1 +base 1 +system 1 +librari 1 +tool 1 +indexdocument 1 +toolsa 1 +gentl 1 +introduct 1 +andrew 1 +cum 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..db094e7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,24 @@ +postscript 3 +distribut 2 +homework 2 +examin 2 +annot 2 +practic 1 +systemspract 1 +system 1 +registr 1 +necessari 1 +student 1 +takingc 1 +gener 1 +informationcours 1 +overview 1 +logist 1 +read 1 +homeworkshomework 1 +amexaminationsmidterm 1 +final 1 +bibliographiesselect 1 +bibliographi 1 +prepar 1 +class 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..230c66e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,166 @@ +poster 5 +split 4 +upson 3 +pleas 3 +instruct 3 +board 3 +well 3 +final 3 +project 3 +homework 3 +thorsten 2 +session 2 +present 2 +arriv 2 +minut 2 +problem 2 +judg 2 +report 2 +page 2 +import 2 +find 2 +start 2 +futur 2 +work 2 +check 2 +home 1 +pagehigh 1 +perform 1 +comput 1 +system 1 +eickenspr 1 +sessionthu 1 +tbdpleas 1 +sign 1 +outsid 1 +willdetermin 1 +order 1 +begin 1 +postersess 1 +late 1 +pick 1 +cindywilliam 1 +limit 1 +hold 1 +ithorizont 1 +hang 1 +corridor 1 +presentyour 1 +plu 1 +question 1 +give 1 +everyon 1 +asens 1 +attack 1 +solut 1 +contempl 1 +andth 1 +result 1 +gotten 1 +learn 1 +presentationswil 1 +messag 1 +across 1 +everi 1 +memberof 1 +group 1 +particip 1 +nativespeak 1 +difficulti 1 +taken 1 +consider 1 +info 1 +cours 1 +current 1 +mondai 1 +noon 1 +absolut 1 +must 1 +subdirectori 1 +willb 1 +contribut 1 +thelongest 1 +last 1 +remain 1 +server 1 +year 1 +tocom 1 +mani 1 +peopl 1 +search 1 +engin 1 +finalreport 1 +usual 1 +introduct 1 +aretri 1 +solv 1 +follow 1 +thorough 1 +discuss 1 +trade 1 +off 1 +part 1 +need 1 +explain 1 +chose 1 +thesolut 1 +option 1 +consid 1 +youreject 1 +webread 1 +convic 1 +bestsolut 1 +showcas 1 +us 1 +ampl 1 +experiment 1 +data 1 +goodexplan 1 +exactli 1 +measur 1 +know 1 +whatyou 1 +think 1 +shown 1 +left 1 +open 1 +projectsproject 1 +reportsproject 1 +proposalsiniti 1 +ideascours 1 +materialshomework 1 +cuc 1 +pagebefor 1 +machin 1 +might 1 +also 1 +sampl 1 +program 1 +introc 1 +casec 1 +technologyc 1 +cachesc 1 +netsc 1 +spc 1 +cyou 1 +inform 1 +paper 1 +parallel 1 +programmingin 1 +emdc 1 +sortingc 1 +spamc 1 +msgpassc 1 +mpic 1 +cachecohc 1 +locksc 1 +threadsc 1 +atmc 1 +netc 1 +scoreboardc 1 +tomasuloc 1 +predc 1 +superscalarc 1 +busesc 1 +pentiummaintain 1 +eicken 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..238596dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,74 @@ +april 9 +februari 8 +march 6 +vision 4 +motion 4 +cont 4 +lectur 3 +transform 3 +machin 2 +cours 2 +problem 2 +note 2 +januari 2 +regular 2 +comput 2 +stereo 2 +correl 2 +parametr 2 +guest 2 +track 2 +staff 1 +instructor 1 +ramin 1 +zabihteach 1 +assist 1 +justin 1 +millerclass 1 +time 1 +place 1 +phillip 1 +project 1 +suggestionsproblem 1 +set 1 +class 1 +scribe 1 +week 1 +simul 1 +anneal 1 +calculu 1 +variat 1 +maximum 1 +likelihood 1 +estim 1 +markov 1 +random 1 +field 1 +snake 1 +introduct 1 +mestim 1 +method 1 +censu 1 +geometri 1 +geometr 1 +segment 1 +edg 1 +detect 1 +continu 1 +model 1 +base 1 +hausdorff 1 +distanc 1 +eigenhausdorff 1 +face 1 +recognitionsect 1 +optic 1 +flow 1 +constraint 1 +equationoth 1 +sourc 1 +home 1 +page 1 +histori 1 +object 1 +recognit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..26f2c7e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,73 @@ +project 4 +home 3 +cours 3 +inform 3 +directori 3 +part 3 +speech 3 +corpu 3 +depart 2 +cornel 2 +code 2 +brill 2 +annot 2 +text 2 +wordnet 2 +contain 2 +pagecsintroduct 1 +natur 1 +languag 1 +understandingcomput 1 +scienc 1 +univers 1 +spring 1 +welcom 1 +materi 1 +academ 1 +integr 1 +pleas 1 +read 1 +announcementsher 1 +list 1 +resourc 1 +avail 1 +taggerbrown 1 +brown 1 +tag 1 +small 1 +withpart 1 +execut 1 +sure 1 +environ 1 +variabl 1 +wnsearchdir 1 +archiv 1 +dict 1 +final 1 +site 1 +descript 1 +ofth 1 +content 1 +penn 1 +treebank 1 +iicollect 1 +canus 1 +like 1 +talk 1 +francisabout 1 +access 1 +us 1 +databas 1 +recent 1 +paper 1 +computationallinguist 1 +repositori 1 +pointer 1 +variou 1 +system 1 +compon 1 +present 1 +schedulewhat 1 +turn 1 +pagesc 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..d58630e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,36 @@ +class 4 +cours 2 +lab 2 +upson 2 +assign 2 +tuesdai 2 +wednesdai 2 +section 2 +prelim 2 +first 2 +home 1 +page 1 +info 1 +syllabu 1 +link 1 +announcementsroom 1 +updat 1 +unforseen 1 +circumst 1 +still 1 +unableto 1 +therefor 1 +follow 1 +room 1 +philip 1 +thursdai 1 +meet 1 +maclab 1 +usual 1 +close 1 +book 1 +exam 1 +cover 1 +materialcov 1 +need 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..9923becb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,38 @@ +lectur 23 +maxflow 7 +algorithm 5 +heap 3 +union 3 +find 3 +fall 2 +matroid 2 +binomi 2 +tree 2 +preflow 2 +push 2 +professor 1 +monika 1 +rauch 1 +henzingeremail 1 +cornel 1 +cours 1 +informationhomework 1 +solut 1 +graph 1 +explor 1 +greedi 1 +dijkstra 1 +bellman 1 +ford 1 +matrix 1 +closur 1 +fibonacci 1 +treap 1 +randomizedsearch 1 +mincut 1 +theorem 1 +edmond 1 +karp 1 +dinitz 1 +dynam 1 +implement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..14607dc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,212 @@ +juli 23 +assign 13 +class 9 +comput 8 +matlab 7 +upson 7 +grade 7 +final 7 +introduct 6 +cours 6 +june 5 +scientif 4 +sourc 4 +code 4 +problem 4 +linear 4 +offic 4 +deadlin 4 +interpol 3 +hall 3 +time 3 +work 3 +midterm 3 +exam 3 +system 3 +scmv 3 +file 3 +syllabu 2 +handout 2 +numer 2 +includ 2 +quadratur 2 +equat 2 +solv 2 +least 2 +squar 2 +vector 2 +niko 2 +cornel 2 +hour 2 +appoint 2 +ozan 2 +math 2 +student 2 +lab 2 +siblei 2 +martha 2 +hand 2 +done 2 +credit 2 +alon 2 +name 2 +chang 2 +total 2 +score 2 +newton 2 +cubic 2 +review 2 +classroom 2 +minim 2 +function 2 +euler 2 +method 2 +exampl 2 +unix 2 +zcat 2 +computationsumm 1 +inform 1 +setsan 1 +elementari 1 +analysi 1 +scientificcomput 1 +topic 1 +andnonlinear 1 +fit 1 +ordinarydifferenti 1 +environ 1 +us 1 +effici 1 +reliabl 1 +stabil 1 +stress 1 +informationstaff 1 +pitsiani 1 +instructor 1 +hafizogullari 1 +teach 1 +assist 1 +lecturesclass 1 +meet 1 +everi 1 +administrationlauri 1 +buck 1 +question 1 +concern 1 +record 1 +account 1 +addressedto 1 +administr 1 +prerequisitesc 1 +corequisit 1 +materialstext 1 +matrix 1 +approachus 1 +charl 1 +loan 1 +distribut 1 +softwar 1 +purchas 1 +eitherth 1 +macintosh 1 +version 1 +though 1 +labsthi 1 +design 1 +three 1 +renssela 1 +setsther 1 +lectur 1 +orfrom 1 +page 1 +extra 1 +avail 1 +rack 1 +outsid 1 +collect 1 +computingproblem 1 +return 1 +behandl 1 +begin 1 +duedat 1 +late 1 +accept 1 +worst 1 +gradefrom 1 +ignor 1 +partner 1 +printyour 1 +copi 1 +pair 1 +firstpag 1 +addit 1 +partnernam 1 +examsther 1 +dai 1 +list 1 +gradingyour 1 +follow 1 +best 1 +beassign 1 +accord 1 +rel 1 +rank 1 +base 1 +onyour 1 +calendar 1 +program 1 +error 1 +float 1 +point 1 +number 1 +registr 1 +polynomi 1 +vandermond 1 +piecewis 1 +hermit 1 +spline 1 +integr 1 +cote 1 +composit 1 +rule 1 +adapt 1 +drop 1 +matric 1 +oper 1 +given 1 +choleski 1 +find 1 +root 1 +variabl 1 +multivari 1 +initi 1 +valu 1 +backward 1 +rung 1 +kutta 1 +adam 1 +computingat 1 +rennselaerhal 1 +locat 1 +folder 1 +applic 1 +chapter 1 +plan 1 +stand 1 +otherthan 1 +on 1 +uncompress 1 +untar 1 +command 1 +highli 1 +recommend 1 +brows 1 +session 1 +need 1 +postscript 1 +viewer 1 +instal 1 +order 1 +randperm 1 +length 1 +set 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..9bbeff2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,182 @@ +system 11 +class 10 +cours 8 +materi 7 +oper 5 +quizz 5 +program 4 +also 4 +final 4 +nawaaz 3 +subject 3 +memori 3 +file 3 +cover 3 +issu 3 +thursdai 3 +summer 2 +home 2 +page 2 +instructor 2 +indupraka 2 +kodukula 2 +cornel 2 +ahm 2 +motd 2 +descript 2 +schedul 2 +polici 2 +statement 2 +collabor 2 +offic 2 +hour 2 +send 2 +comment 2 +topic 2 +deadlock 2 +method 2 +comput 2 +discuss 2 +requir 2 +roughli 2 +follow 2 +particular 2 +start 2 +lldiscuss 2 +next 2 +virtual 2 +look 2 +book 2 +week 2 +first 2 +worth 2 +group 2 +prereq 1 +permiss 1 +praka 1 +teach 1 +assist 1 +archiv 1 +prerequsit 1 +outlin 1 +textbook 1 +grade 1 +anintroduct 1 +logic 1 +design 1 +emphasison 1 +multiprogram 1 +includ 1 +processsynchron 1 +manag 1 +input 1 +output 1 +inform 1 +share 1 +protect 1 +secur 1 +theimpact 1 +network 1 +distribut 1 +environ 1 +operatingsystem 1 +fast 1 +pace 1 +requiringconst 1 +attent 1 +prerequsitescomplet 1 +familiar 1 +assum 1 +inparticular 1 +knowledg 1 +architectur 1 +assembl 1 +programminglanguag 1 +structur 1 +theintroductori 1 +purpos 1 +thatwil 1 +remind 1 +audienc 1 +outlineth 1 +organ 1 +depend 1 +feedback 1 +chang 1 +theorder 1 +content 1 +section 1 +overview 1 +concurr 1 +synchron 1 +ensur 1 +mutualexclus 1 +detect 1 +prevent 1 +algorithm 1 +multiprocessor 1 +well 1 +memorymanag 1 +variou 1 +usedto 1 +implement 1 +segment 1 +evolut 1 +thetradit 1 +micro 1 +kernel 1 +timepermit 1 +lectur 1 +advanc 1 +multithread 1 +serverless 1 +textbooksth 1 +princip 1 +text 1 +conceptsbook 1 +abraham 1 +silberschatz 1 +peter 1 +galvin 1 +distributeclass 1 +note 1 +complet 1 +noteswil 1 +avail 1 +world 1 +wide 1 +pageat 1 +meet 1 +mondaythru 1 +second 1 +addit 1 +weekli 1 +assign 1 +thesewil 1 +hand 1 +thursdayat 1 +gradingeach 1 +homework 1 +carri 1 +weightag 1 +combinedweightag 1 +twomidterm 1 +surpris 1 +todetermin 1 +understand 1 +collaborationat 1 +peopl 1 +form 1 +eachhomework 1 +need 1 +submit 1 +copi 1 +thehomework 1 +close 1 +closednot 1 +mondai 1 +tuesdai 1 +wednesdai 1 +upson 1 +maintain 1 +induprakaskodukula 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..32295c0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,41 @@ +lectur 32 +septemb 24 +note 22 +octob 19 +novemb 16 +solut 9 +decemb 8 +homework 7 +cornel 2 +offic 2 +hour 2 +appoint 2 +csc 1 +advanc 1 +program 1 +languagesfal 1 +upson 1 +instructor 1 +henzingerupson 1 +class 1 +teach 1 +assist 1 +neal 1 +glewupson 1 +glew 1 +handoutshandout 1 +cours 1 +informationhandout 1 +get 1 +start 1 +mlhandout 1 +meta 1 +lambdahomeworkshomework 1 +grieshomework 1 +notesraw 1 +scribe 1 +noteslectur 1 +introduct 1 +mllectur 1 +midterm 1 +grieslectur 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..d91217a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,29 @@ +fall 7 +note 4 +upson 2 +offic 2 +hour 2 +ravi 2 +advanc 1 +program 1 +languag 1 +faculti 1 +prof 1 +robert 1 +constabl 1 +mondai 1 +teach 1 +assist 1 +kumar 1 +thur 1 +assign 1 +nuprl 1 +classic 1 +comment 1 +question 1 +suggest 1 +page 1 +pleas 1 +mail 1 +pavel 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..d0e3f63a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,14 @@ +cours 2 +addit 2 +inform 2 +coursesc 1 +fall 1 +spring 1 +maintain 1 +individualfaculti 1 +member 1 +consult 1 +class 1 +page 1 +contactgloria 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..8d0d0bc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,33 @@ +almstrum 4 +utexa 3 +analysi 2 +program 2 +instructor 2 +vicki 2 +linyuan 2 +fall 1 +yang 1 +syllabu 1 +announc 1 +homework 1 +assign 1 +handout 1 +interest 1 +tutori 1 +new 1 +class 1 +homepag 1 +last 1 +updat 1 +page 1 +prepar 1 +suggest 1 +comment 1 +welcom 1 +click 1 +send 1 +mail 1 +depart 1 +comput 1 +scienc 1 +austin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..06372bac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,114 @@ +formal 6 +system 5 +april 5 +reason 4 +tool 4 +present 4 +method 4 +examin 3 +logic 3 +otter 3 +foundat 2 +mani 2 +program 2 +number 2 +order 2 +theori 2 +upon 2 +version 2 +page 2 +final 2 +spring 1 +ofmathemat 1 +taylor 1 +cours 1 +blurb 1 +approach 1 +theobject 1 +specifi 1 +comput 1 +includ 1 +formalizationof 1 +world 1 +interact 1 +creationof 1 +numer 1 +systemsfor 1 +mechan 1 +formalmethod 1 +support 1 +differ 1 +exampl 1 +suchsystem 1 +pair 1 +primit 1 +recurs 1 +arithmet 1 +boyer 1 +moor 1 +prover 1 +first 1 +nelson 1 +higher 1 +imp 1 +equat 1 +mizar 1 +quaif 1 +type 1 +nuprl 1 +lego 1 +coqstud 1 +choos 1 +help 1 +instructor 1 +ortool 1 +grade 1 +base 1 +aboutthes 1 +projecthtml 1 +theqe 1 +manifestoplain 1 +text 1 +qedmanifestobowen 1 +backup 1 +copi 1 +chief 1 +assign 1 +select 1 +bowen 1 +report 1 +class 1 +oral 1 +either 1 +good 1 +freeli 1 +avail 1 +implement 1 +consult 1 +make 1 +choic 1 +test 1 +hope 1 +guest 1 +localform 1 +commun 1 +tent 1 +schedul 1 +rick 1 +tannei 1 +continu 1 +trevor 1 +hick 1 +ruben 1 +gamboa 1 +squar 1 +root 1 +samuel 1 +guyer 1 +circal 1 +process 1 +algebra 1 +sawada 1 +russel 1 +turpin 1 +galoi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..e15b7e96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,151 @@ +project 8 +comput 6 +memori 4 +cours 3 +system 3 +design 3 +pipelin 3 +midterm 3 +advanc 2 +architectur 2 +quantit 2 +analysi 2 +major 2 +parallel 2 +network 2 +hour 2 +hennessi 2 +edit 2 +stedit 2 +research 2 +work 2 +pair 2 +tech 2 +hazard 2 +branch 2 +predictionch 2 +revieww 2 +spring 2 +architecturethi 1 +focus 1 +techniqu 1 +evaluationof 1 +modern 1 +select 1 +appropri 1 +benchmarksto 1 +reveal 1 +compar 1 +perform 1 +altern 1 +choic 1 +insystem 1 +emphasi 1 +compon 1 +subsystem 1 +highperform 1 +instruct 1 +level 1 +memoryhierarchi 1 +input 1 +output 1 +orient 1 +interconnect 1 +studentswil 1 +undertak 1 +oftheir 1 +choos 1 +administr 1 +informationuniqu 1 +number 1 +meet 1 +place 1 +instructor 1 +mikedahlinoffic 1 +appoint 1 +tbdtaoffic 1 +tbdreadingstextbook 1 +patteson 1 +computerarchitectur 1 +approach 1 +second 1 +note 1 +significantli 1 +differ 1 +recommend 1 +attempt 1 +textbook 1 +errata 1 +sheetfor 1 +pattersonin 1 +addit 1 +read 1 +current 1 +paper 1 +variou 1 +aspect 1 +currentcomput 1 +watch 1 +space 1 +pointer 1 +readinglist 1 +grade 1 +class 1 +particip 1 +homework 1 +exam 1 +scheduleweekdatetopicreadingduejan 1 +intro 1 +admin 1 +review 1 +perf 1 +cost 1 +amdahl 1 +trendsch 1 +cach 1 +isa 1 +mlkholidayf 1 +static 1 +proposalfeb 1 +scoreboard 1 +tomasulu 1 +speculationch 1 +dynam 1 +predict 1 +limit 1 +vector 1 +processorsch 1 +dfeb 1 +hierarchych 1 +surveyfeb 1 +dram 1 +banksf 1 +breakm 1 +breakmar 1 +metric 1 +queu 1 +buss 1 +disk 1 +raidch 1 +tertiari 1 +networksf 1 +networksch 1 +checkpointapr 1 +architecturesf 1 +mppsch 1 +mpp 1 +preseantationsm 1 +presentationsfri 1 +last 1 +classesm 1 +written 1 +reportaddit 1 +resourcescours 1 +page 1 +product 1 +confer 1 +bibliographi 1 +reportsyahoo 1 +businessand 1 +economi 1 +compani 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..9d4424aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,71 @@ +oper 5 +class 5 +system 4 +protocol 2 +address 2 +gener 2 +internet 2 +provid 2 +also 2 +paper 2 +project 2 +list 2 +systemsuniqu 1 +recent 1 +explos 1 +interest 1 +world 1 +wide 1 +resultedin 1 +evolv 1 +us 1 +thetradit 1 +concern 1 +interprocess 1 +commun 1 +resourc 1 +alloc 1 +secur 1 +contextof 1 +goal 1 +understandingof 1 +current 1 +state 1 +addressproblem 1 +must 1 +solv 1 +matur 1 +purpos 1 +operatingsystem 1 +hypothesi 1 +behind 1 +design 1 +mani 1 +theissu 1 +context 1 +addressedin 1 +tradit 1 +area 1 +occasionallyread 1 +relat 1 +bear 1 +understandingcurr 1 +problem 1 +reportspoint 1 +research 1 +refer 1 +inform 1 +syllabu 1 +read 1 +schedul 1 +longer 1 +less 1 +organ 1 +rosterhandout 1 +verif 1 +sslprotocol 1 +proofsketch 1 +guidelin 1 +final 1 +talk 1 +report 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..f1d83a71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,217 @@ +handout 57 +program 36 +exam 28 +solut 22 +date 16 +class 14 +homework 14 +avail 14 +content 13 +back 12 +last 11 +object 11 +pick 11 +lectur 10 +part 10 +utexa 9 +exampl 9 +fantasm 9 +discuss 9 +offic 8 +start 8 +test 8 +earli 8 +late 8 +problem 8 +cours 7 +hour 7 +p_global 7 +noon 6 +session 6 +drop 6 +mondai 6 +wednesdai 6 +practic 6 +note 6 +page 5 +final 5 +info 5 +regist 5 +electron 5 +section 4 +check 4 +place 4 +pass 4 +bit 4 +also 4 +import 3 +tue 3 +meet 3 +time 3 +thursdai 3 +us 3 +academ 3 +chang 3 +bonu 3 +slide 3 +turn 3 +electronc 3 +user 3 +manual 3 +disk 3 +paramet 3 +macsbug 3 +electoron 3 +sourc 3 +output 3 +fall 2 +comput 2 +organ 2 +updat 2 +professor 2 +yurkanan 2 +dragon 2 +version 2 +oper 2 +email 2 +yoonsuck 2 +choe 2 +yschoe 2 +edum 2 +pleas 2 +holidai 2 +period 2 +withdraw 2 +typo 2 +remov 2 +rightmost 2 +make 2 +fridai 2 +saturdai 2 +announc 2 +door 2 +procudur 2 +thur 2 +assign 2 +front 2 +system 2 +document 2 +folder 2 +powermac 2 +quadra 2 +mac 2 +instruct 2 +compil 2 +call 2 +model 2 +newsgroup 2 +pascal 2 +constantli 1 +construct 1 +onmon 1 +titl 1 +prerequisit 1 +grade 1 +least 1 +chri 1 +edmondson 1 +new 1 +post 1 +extra 1 +visit 1 +syllabu 1 +locat 1 +attend 1 +cynthia 1 +deepa 1 +ramani 1 +dparam 1 +eduw 1 +zhang 1 +gzhang 1 +eduf 1 +conduct 1 +calendar 1 +labor 1 +refund 1 +rare 1 +extenu 1 +circumst 1 +automat 1 +begin 1 +penalti 1 +reason 1 +univers 1 +registr 1 +fail 1 +deadlin 1 +appli 1 +graduat 1 +thanksgiv 1 +appeal 1 +schedul 1 +glanc 1 +correct 1 +night 1 +boxin 1 +prof 1 +letter 1 +student 1 +overview 1 +descript 1 +topic 1 +singl 1 +ascii 1 +code 1 +chart 1 +home 1 +work 1 +endia 1 +memori 1 +hierarchi 1 +submit 1 +stack 1 +function 1 +requir 1 +comment 1 +real 1 +proc 1 +func 1 +might 1 +risc 1 +architectur 1 +includ 1 +valu 1 +practiv 1 +obsolet 1 +spec 1 +turnin 1 +procedur 1 +gener 1 +interfac 1 +charact 1 +orient 1 +cheat 1 +polici 1 +questionair 1 +must 1 +offici 1 +except 1 +go 1 +held 1 +free 1 +toward 1 +next 1 +thank 1 +brett 1 +jame 1 +data 1 +subroutine_fil 1 +exception_fil 1 +avali 1 +resourc 1 +studi 1 +guid 1 +maintain 1 +austin 1 +utc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..9bfe4655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,206 @@ +pascal 15 +porter 11 +final 8 +review 7 +question 7 +link 6 +class 6 +version 6 +program 6 +new 6 +lang 6 +week 5 +offic 5 +exam 5 +page 4 +import 4 +avail 4 +room 4 +post 4 +assign 4 +utexa 4 +discuss 4 +midterm 4 +turbo 4 +us 3 +topic 3 +next 3 +note 3 +dwip 3 +lectur 3 +html 3 +postscript 3 +welch 3 +solut 3 +test 3 +newsgroup 3 +right 2 +semest 2 +inform 2 +announc 2 +take 2 +home 2 +last 2 +howev 2 +relat 2 +good 2 +special 2 +cover 2 +time 2 +need 2 +help 2 +bruce 2 +decemb 2 +click 2 +also 2 +addendum 2 +tutori 2 +session 2 +studi 2 +guid 2 +ansi 2 +articl 2 +homepag 1 +warn 1 +construct 1 +becom 1 +activ 1 +progress 1 +andther 1 +relev 1 +regard 1 +todai 1 +unabl 1 +maintain 1 +forthes 1 +coupl 1 +dai 1 +put 1 +follow 1 +luckfor 1 +held 1 +painter 1 +hall 1 +exact 1 +locat 1 +dependon 1 +availib 1 +door 1 +therewil 1 +someon 1 +moreov 1 +almost 1 +total 1 +coverag 1 +uptoth 1 +feel 1 +free 1 +come 1 +glad 1 +luck 1 +resolutio 1 +complex 1 +theori 1 +nimar 1 +arora 1 +parallel 1 +process 1 +banerje 1 +boolean 1 +circuit 1 +porterquest 1 +sheet 1 +rotat 1 +bit 1 +disregard 1 +somewhat 1 +beyond 1 +scope 1 +slide 1 +present 1 +summar 1 +thecont 1 +whole 1 +reserv 1 +desk 1 +atugl 1 +hope 1 +webpag 1 +soon 1 +experienc 1 +technic 1 +difficulti 1 +caus 1 +length 1 +file 1 +schedul 1 +pleas 1 +check 1 +ad 1 +sostai 1 +tune 1 +download 1 +prolog 1 +comput 1 +scienc 1 +iinstructorbruc 1 +mondai 1 +email 1 +tasoffic 1 +hourslab 1 +section 1 +schedulec 1 +thursdai 1 +uniqu 1 +number 1 +cours 1 +descriptionclass 1 +scheduleclass 1 +includ 1 +articlesclass 1 +newsgroupprogram 1 +assignmentsprogram 1 +pascaltutori 1 +text 1 +format 1 +faqyou 1 +sampl 1 +programm 1 +gener 1 +languag 1 +materi 1 +frequent 1 +ask 1 +zipe 1 +concept 1 +structur 1 +base 1 +newgroup 1 +might 1 +interest 1 +comp 1 +isocomp 1 +maccomp 1 +borlandcomp 1 +misccomp 1 +delphi 1 +miscfj 1 +rememb 1 +access 1 +dell 1 +serverto 1 +mail 1 +prefer 1 +item 1 +option 1 +menu 1 +look 1 +usual 1 +lead 1 +importantstuff 1 +descript 1 +send 1 +comment 1 +critic 1 +suggest 1 +addit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..29c0096d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,70 @@ +assign 7 +mesa 5 +cours 4 +librari 4 +exampl 4 +graphic 3 +opengl 3 +page 3 +updat 3 +comput 2 +gener 2 +inform 2 +us 2 +utc 2 +book 2 +second 2 +requir 2 +student 2 +code 2 +spring 1 +cscomput 1 +graphicsspr 1 +instructor 1 +donald 1 +fussel 1 +descript 1 +syllabu 1 +year 1 +provid 1 +anopengl 1 +like 1 +platform 1 +hasbeen 1 +instal 1 +public 1 +workstat 1 +sciencesdepart 1 +instruct 1 +sampl 1 +makefil 1 +machin 1 +specif 1 +home 1 +center 1 +refer 1 +manual 1 +ousterhout 1 +welch 1 +turn 1 +note 1 +option 1 +exam 1 +oneor 1 +show 1 +examwil 1 +submit 1 +wish 1 +bothmai 1 +higher 1 +score 1 +curv 1 +willcount 1 +xlib 1 +driver 1 +billthecat 1 +copi 1 +file 1 +directori 1 +contain 1 +slate 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..c4e4a0b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,67 @@ +graphic 5 +mesa 5 +exampl 5 +assign 5 +cours 4 +librari 4 +inform 3 +machin 3 +opengl 3 +page 3 +book 3 +comput 2 +gener 2 +us 2 +utc 2 +welch 2 +code 2 +reinstal 2 +walker 2 +fall 1 +gcomput 1 +graphicsfal 1 +instructor 1 +donald 1 +fussel 1 +descript 1 +syllabu 1 +year 1 +provid 1 +anopengl 1 +like 1 +platform 1 +hasbeen 1 +instal 1 +public 1 +workstat 1 +sciencesdepart 1 +instruct 1 +sampl 1 +makefil 1 +specif 1 +home 1 +center 1 +refer 1 +manual 1 +ousterhout 1 +program 1 +turn 1 +xlib 1 +driver 1 +billthecat 1 +copi 1 +file 1 +directori 1 +contain 1 +slate 1 +new 1 +turnin 1 +work 1 +libtcl 1 +libtk 1 +tclsh 1 +wish 1 +compil 1 +demo 1 +repair 1 +sourc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..0e136b9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,45 @@ +email 3 +utexa 3 +french 3 +offic 2 +hour 2 +taylor 2 +hqliu 2 +homework 2 +problem 2 +last 2 +appear 2 +cryptographi 1 +professor 1 +david 1 +zuckerman 1 +huiqun 1 +station 1 +hall 1 +basement 1 +syllabu 1 +mathemat 1 +background 1 +textbook 1 +ciphertext 1 +notic 1 +answer 1 +abl 1 +recogn 1 +word 1 +canada 1 +frequenc 1 +common 1 +letter 1 +chang 1 +drastic 1 +english 1 +howev 1 +digram 1 +like 1 +page 1 +modifi 1 +septemb 1 +comment 1 +welcom 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..f18805ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,314 @@ +problem 10 +class 10 +distribut 9 +protocol 9 +solut 8 +comput 6 +requir 6 +paper 6 +system 6 +final 5 +prove 5 +assign 5 +rajeev 4 +exam 4 +global 4 +topic 4 +show 4 +snapshot 4 +point 4 +lorenzo 3 +offic 3 +hour 3 +textbook 3 +suggest 3 +joshi 3 +mondai 3 +cover 3 +time 3 +us 3 +state 3 +detect 3 +messag 3 +file 3 +discuss 3 +give 3 +homework 3 +page 3 +proof 3 +deriv 3 +link 3 +cours 2 +set 2 +midterm 2 +newsgroup 2 +utexa 2 +hall 2 +phone 2 +expect 2 +sourc 2 +refer 2 +wednesdai 2 +second 2 +design 2 +distributedsystem 2 +includ 2 +clock 2 +manag 2 +agreement 2 +byzantin 2 +group 2 +program 2 +techniqu 2 +secur 2 +integr 2 +gener 2 +depend 2 +present 2 +allow 2 +consist 2 +written 2 +collabor 2 +student 2 +encourag 2 +take 2 +survei 2 +ofth 2 +last 2 +colleagu 2 +improv 2 +commun 2 +someth 2 +solv 2 +algorithm 2 +assum 2 +fifo 2 +asynchron 2 +predic 2 +number 2 +deadlock 2 +would 2 +cost 2 +postscript 2 +send 2 +ispr 1 +instructor 1 +alvisiteach 1 +assist 1 +joshicont 1 +locat 1 +mechan 1 +content 1 +grade 1 +inform 1 +pertain 1 +instruct 1 +stafflorenzo 1 +alvisi 1 +taylor 1 +tuesdai 1 +thursdai 1 +meet 1 +arrang 1 +appoint 1 +mechanicsi 1 +materi 1 +requiredtextbook 1 +remaind 1 +come 1 +given 1 +classat 1 +appropri 1 +lectur 1 +robert 1 +moor 1 +isutexa 1 +edit 1 +mullend 1 +editor 1 +acmpress 1 +addison 1 +weslei 1 +publish 1 +compani 1 +read 1 +contentc 1 +abstract 1 +tobe 1 +build 1 +tomorrow 1 +cut 1 +logic 1 +vector 1 +causal 1 +messagedeliveri 1 +properti 1 +log 1 +checkpoint 1 +replic 1 +machin 1 +approach 1 +primari 1 +backupapproach 1 +order 1 +multicast 1 +applic 1 +cach 1 +disconnect 1 +oper 1 +servic 1 +synchron 1 +encrypt 1 +authent 1 +principl 1 +thepresent 1 +case 1 +studi 1 +exemplifi 1 +principleshav 1 +implement 1 +real 1 +interest 1 +meor 1 +size 1 +apresent 1 +share 1 +memori 1 +object 1 +kernel 1 +support 1 +weak 1 +replica 1 +electron 1 +commerc 1 +wide 1 +area 1 +networksgradingther 1 +begrad 1 +demonstr 1 +credibl 1 +effort 1 +onbehalf 1 +author 1 +whether 1 +right 1 +wrong 1 +willrec 1 +better 1 +three 1 +ispermit 1 +acollabor 1 +singl 1 +submit 1 +forgrad 1 +name 1 +collaborationswil 1 +consid 1 +violat 1 +academ 1 +home 1 +examin 1 +nocollabor 1 +howev 1 +towrit 1 +issuesthat 1 +list 1 +bedistribut 1 +start 1 +henc 1 +week 1 +tocomplet 1 +also 1 +team 1 +prepar 1 +twolectur 1 +previous 1 +choosethi 1 +option 1 +write 1 +asingl 1 +warmli 1 +toconsid 1 +volunt 1 +excellentopportun 1 +skill 1 +setsin 1 +subsequ 1 +shouldconform 1 +follow 1 +guidelin 1 +synonym 1 +precis 1 +isrequir 1 +ask 1 +imposs 1 +thatmak 1 +clear 1 +cannot 1 +matter 1 +algorithmi 1 +insuffici 1 +particular 1 +work 1 +develop 1 +must 1 +accompani 1 +ofcorrect 1 +unless 1 +explicitli 1 +told 1 +otherwis 1 +thetextbook 1 +channel 1 +asnapshot 1 +onth 1 +assumpt 1 +correct 1 +theprotocol 1 +produc 1 +atmost 1 +note 1 +book 1 +contain 1 +mattern 1 +thatcontain 1 +urg 1 +resist 1 +thetempt 1 +visit 1 +librari 1 +agener 1 +stabl 1 +moreeffici 1 +specif 1 +often 1 +conceptu 1 +simpler 1 +effici 1 +term 1 +exchang 1 +base 1 +special 1 +ideal 1 +need 1 +central 1 +monitorprocess 1 +process 1 +monitor 1 +basedsnapshot 1 +nowonlin 1 +filedescrib 1 +examth 1 +constitut 1 +fridaymai 1 +thepostscript 1 +describ 1 +question 1 +feel 1 +freeto 1 +email 1 +idea 1 +pleas 1 +yoursuggest 1 +edurajeev 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..a6afda0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,57 @@ +class 5 +fall 3 +jacob 3 +kornerup 3 +note 2 +page 2 +taught 2 +avail 2 +read 2 +midterm 2 +reflect 1 +current 1 +version 1 +differ 1 +content 1 +scope 1 +welcom 1 +homepag 1 +austin 1 +bywil 1 +adam 1 +practic 1 +informationabout 1 +cours 1 +look 1 +syllabu 1 +linea 1 +technic 1 +compil 1 +program 1 +turn 1 +inhomework 1 +electron 1 +homework 1 +solut 1 +time 1 +crude 1 +interfac 1 +newsgrouputexa 1 +correspond 1 +takesplac 1 +exampl 1 +textbook 1 +pascalprogramm 1 +organ 1 +chapter 1 +link 1 +home 1 +requir 1 +overhead 1 +viewinginform 1 +projecthow 1 +find 1 +offic 1 +studi 1 +examand 1 +answer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..02caf754 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,198 @@ +java 20 +cours 13 +comp 11 +lang 11 +program 7 +addison 7 +weslei 7 +object 6 +orient 6 +newsgroup 6 +librari 6 +design 5 +sourc 5 +utexa 4 +inform 4 +class 3 +manual 3 +code 3 +student 3 +follow 3 +relat 3 +stroustrup 3 +faq 3 +standard 3 +applet 3 +lavend 2 +eduoffic 2 +hour 2 +gokul 2 +templat 2 +opportun 2 +solut 2 +problem 2 +us 2 +text 2 +avail 2 +style 2 +draft 2 +setup 2 +link 2 +site 2 +postscript 2 +home 2 +compil 2 +libg 2 +server 2 +archiv 2 +note 2 +descriptionc 1 +programminglast 1 +updat 1 +professor 1 +greg 1 +appt 1 +rajaram 1 +mondai 1 +wednesdai 1 +station 1 +lavendercours 1 +infocours 1 +syllabusannouncementslectur 1 +noteshomework 1 +solutionsprogram 1 +assignmentsgnu 1 +manualsstandard 1 +codesocket 1 +manualdescript 1 +intend 1 +alreadi 1 +anintroductori 1 +offer 1 +introduct 1 +give 1 +think 1 +comput 1 +manner 1 +captur 1 +reusablepattern 1 +construct 1 +polymorph 1 +typehierarchi 1 +write 1 +profici 1 +professionallyus 1 +challeng 1 +coop 1 +bookstor 1 +horstmann 1 +master 1 +john 1 +wilei 1 +associ 1 +materiali 1 +drawn 1 +lectur 1 +materi 1 +languag 1 +edit 1 +elli 1 +annot 1 +refer 1 +evolut 1 +cargil 1 +cline 1 +lomow 1 +coplien 1 +advanc 1 +idiom 1 +plauger 1 +prentic 1 +hall 1 +gamma 1 +helm 1 +johnson 1 +vlissid 1 +pattern 1 +element 1 +reusabl 1 +softwar 1 +forum 1 +open 1 +discuss 1 +announcementsabout 1 +strongli 1 +encourag 1 +particip 1 +linediscuss 1 +fellow 1 +classmat 1 +lavendery 1 +also 1 +interest 1 +usenet 1 +helpjava 1 +hotjava 1 +advocaci 1 +misc 1 +programm 1 +secur 1 +tech 1 +javascript 1 +internet 1 +oopth 1 +date 1 +pleas 1 +know 1 +dead 1 +ansi 1 +page 1 +lab 1 +renssela 1 +polytechn 1 +institut 1 +product 1 +info 1 +objectspac 1 +libstdc 1 +mitgnu 1 +cygnusgnu 1 +document 1 +doug 1 +pagec 1 +mirror 1 +list 1 +ftpobject 1 +system 1 +developmentindex 1 +librariesth 1 +virtual 1 +libraryindex 1 +sourcesth 1 +talig 1 +frameworkjava 1 +javasoft 1 +gamelan 1 +huge 1 +registri 1 +digit 1 +espresso 1 +good 1 +summari 1 +current 1 +centr 1 +new 1 +event 1 +jar 1 +rate 1 +denni 1 +kafura 1 +virginia 1 +techdoug 1 +schmidt 1 +irvin 1 +washington 1 +universitydoug 1 +sunyintroductori 1 +univers 1 +groningen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..083869ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,56 @@ +program 5 +parallel 3 +languag 3 +assign 3 +tuesdai 2 +thursdai 2 +calvin 2 +offic 2 +utexa 2 +exampl 2 +commun 2 +manual 2 +compilerscst 1 +compilersfal 1 +lectur 1 +instructor 1 +taylor 1 +phone 1 +email 1 +hour 1 +handout 1 +gener 1 +inform 1 +case 1 +tera 1 +comput 1 +copyright 1 +posix 1 +thread 1 +skeleton 1 +code 1 +tutori 1 +hello 1 +world 1 +ironman 1 +interfac 1 +onlin 1 +postscript 1 +logp 1 +paper 1 +time 1 +spent 1 +messag 1 +pass 1 +share 1 +memori 1 +foundat 1 +practic 1 +partit 1 +dynam 1 +adapt 1 +grid 1 +hierarchieslast 1 +modifi 1 +decemb 1 +linlin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..93aebc26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,16 @@ +inform 2 +send 2 +mail 2 +home 1 +page 1 +csintroduct 1 +oper 1 +system 1 +class 1 +handout 1 +assign 1 +read 1 +project 1 +group 1 +prof 1 +newsgroup 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..0df5003d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,16 @@ +distribut 2 +read 1 +list 1 +fall 1 +thot 1 +topic 1 +systemsfil 1 +systemstopolog 1 +systemselectron 1 +commenrcefailur 1 +detectorsdistribut 1 +objectsconsistencysecuregroup 1 +communicationlanguag 1 +system 1 +dsmmobil 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..01e11ee8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,13 @@ +mine 2 +monitor 2 +databas 1 +databasesprof 1 +daniel 1 +mirankernew 1 +seminarschedul 1 +term 1 +project 1 +materi 1 +overviewtentativeread 1 +list 1 +homeworkproject 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..12ef9bd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,25 @@ +cours 3 +code 2 +introduct 1 +artifici 1 +intellig 1 +instructor 1 +raymond 1 +mooneytim 1 +placespr 1 +tuth 1 +taylor 1 +hall 1 +informationclick 1 +inform 1 +sheetand 1 +syllabu 1 +last 1 +year 1 +updat 1 +file 1 +moonei 1 +depart 1 +networkfor 1 +trace 1 +assign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..20390324 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,35 @@ +homework 4 +cours 3 +test 3 +lisp 2 +program 2 +code 2 +symbol 1 +instructor 1 +raymond 1 +mooneyteach 1 +assist 1 +sowmya 1 +ramachandrantim 1 +placetu 1 +informationclick 1 +inform 1 +sheet 1 +syllabu 1 +informationon 1 +alsout 1 +allegro 1 +info 1 +page 1 +textparadigm 1 +artifici 1 +intellig 1 +case 1 +studi 1 +common 1 +lispassignmentsse 1 +file 1 +moonei 1 +depart 1 +networkfor 1 +trace 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..afb25862 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,52 @@ +learn 10 +homework 5 +cours 3 +machin 2 +base 2 +code 2 +project 2 +instructor 1 +raymond 1 +mooneytim 1 +placetu 1 +informationclick 1 +inform 1 +sheetand 1 +syllabu 1 +textmachinelearninglectur 1 +slide 1 +introduct 1 +concept 1 +gener 1 +order 1 +decis 1 +tree 1 +experiment 1 +evalu 1 +comput 1 +theori 1 +rule 1 +induct 1 +logic 1 +program 1 +neural 1 +network 1 +cluster 1 +unsupervis 1 +bayesian 1 +instanc 1 +explan 1 +learningassignmentsse 1 +file 1 +moonei 1 +depart 1 +networkfor 1 +trace 1 +final 1 +suggest 1 +spring 1 +paper 1 +format 1 +outlin 1 +talk 1 +version 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..9fc7ca85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,100 @@ +program 6 +exam 5 +languag 4 +guid 4 +comput 3 +scheme 3 +foundat 2 +sciencec 2 +section 2 +cours 2 +treesassign 2 +assign 2 +studi 2 +introduct 1 +scienc 1 +intend 1 +major 1 +atleast 1 +semest 1 +high 1 +school 1 +number 1 +willb 1 +chang 1 +next 1 +year 1 +never 1 +taken 1 +programmingcours 1 +take 1 +porter 1 +instead 1 +strong 1 +math 1 +background 1 +least 1 +precalculu 1 +requir 1 +dialect 1 +lisp 1 +theschem 1 +implement 1 +call 1 +gambit 1 +run 1 +macintoshcomput 1 +move 1 +faster 1 +previou 1 +coursesand 1 +emphas 1 +concept 1 +syntax 1 +work 1 +hard 1 +hopefulli 1 +learninga 1 +syllabu 1 +directori 1 +softwar 1 +tutorcopi 1 +pcassign 1 +machin 1 +simulationassign 1 +surf 1 +webassign 1 +basic 1 +schemeassign 1 +plai 1 +peano 1 +gamblingassign 1 +turtl 1 +graphicsassign 1 +snow 1 +list 1 +manipulationstudi 1 +vocabulari 1 +plot 1 +thickensassign 1 +treasur 1 +huntassign 1 +symbol 1 +algebraassign 1 +data 1 +abstract 1 +matricesstudi 1 +draw 1 +express 1 +unparsingassign 1 +translationstudi 1 +final 1 +thur 1 +gordon 1 +novak 1 +assignmentsprogram 1 +file 1 +descriptionsprogram 1 +submiss 1 +gradingmidterm 1 +guidefin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..6adb96da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,41 @@ +cours 3 +program 3 +compilersc 2 +compil 2 +student 2 +powerpc 2 +summer 2 +file 2 +studi 2 +cover 1 +design 1 +construct 1 +programminglanguag 1 +write 1 +pascal 1 +codei 1 +gener 1 +processor 1 +server 1 +incorpor 1 +chip 1 +heavi 1 +workload 1 +especi 1 +plan 1 +take 1 +expect 1 +dedicatetheir 1 +live 1 +five 1 +week 1 +syllabusprogram 1 +assignmentsprogram 1 +descript 1 +directori 1 +submiss 1 +gradingmidterm 1 +guidefin 1 +exam 1 +guidegordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..ad433e07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,53 @@ +intellig 3 +studi 3 +artifici 2 +comput 2 +stori 2 +intelligencec 1 +intelligenceartifici 1 +defin 1 +thecomput 1 +requir 1 +behavior 1 +attempt 1 +todupl 1 +us 1 +connectspercept 1 +environ 1 +action 1 +appropri 1 +achiev 1 +thegoal 1 +actor 1 +cours 1 +survei 1 +major 1 +topic 1 +includ 1 +search 1 +logic 1 +andknowledg 1 +represent 1 +natur 1 +languag 1 +process 1 +withbrief 1 +coverag 1 +brain 1 +machin 1 +vision 1 +syllabusprogram 1 +assignmentsprogram 1 +file 1 +descriptionsmidterm 1 +guidefin 1 +exam 1 +guidepred 1 +calculu 1 +problemssolut 1 +select 1 +problemsnot 1 +bibliographi 1 +human 1 +braingordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..804a23fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,52 @@ +program 5 +automat 2 +lectur 2 +semest 2 +programmingc 1 +programmingautomat 1 +gener 1 +execut 1 +programsfrom 1 +specif 1 +higher 1 +level 1 +ordinari 1 +languag 1 +cours 1 +consist 1 +first 1 +third 1 +homework 1 +problem 1 +assign 1 +given 1 +illustrateth 1 +materi 1 +long 1 +requirelearn 1 +sever 1 +kind 1 +system 1 +latter 1 +partof 1 +cover 1 +read 1 +research 1 +literatur 1 +student 1 +expect 1 +present 1 +paper 1 +class 1 +syllabusbibliographyassign 1 +compil 1 +optim 1 +done 1 +handpattern 1 +matchingobject 1 +orient 1 +programmingintroduct 1 +glispview 1 +graphic 1 +programminggordon 1 +novak 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..8a1d251a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,675 @@ +languag 13 +system 13 +program 13 +comput 12 +data 11 +parallel 11 +level 10 +algorithm 10 +network 9 +fault 9 +cilk 8 +toler 8 +logic 7 +compil 7 +softwar 7 +us 7 +problem 7 +adapt 6 +special 6 +design 6 +librari 6 +present 6 +approach 6 +describ 6 +implement 6 +learn 6 +cours 5 +robot 5 +natur 5 +applic 5 +user 5 +call 5 +file 5 +allow 5 +machin 5 +provid 5 +represent 5 +model 5 +interfac 5 +effici 5 +queri 5 +graduat 4 +task 4 +evolut 4 +neural 4 +open 4 +process 4 +reus 4 +high 4 +cooper 4 +featur 4 +failur 4 +talk 4 +well 4 +avail 4 +wide 4 +area 4 +abstract 4 +mathemat 4 +state 4 +result 4 +alamo 4 +sourc 4 +analysi 4 +idea 4 +introduct 3 +robert 3 +scienc 3 +semant 3 +hierarchi 3 +cognit 3 +sequenti 3 +decis 3 +cluster 3 +evalu 3 +inform 3 +current 3 +enabl 3 +distribut 3 +environ 3 +advantag 3 +explor 3 +perform 3 +execut 3 +gener 3 +issu 3 +differ 3 +formal 3 +method 3 +build 3 +make 3 +produc 3 +optim 3 +symbiot 3 +search 3 +develop 3 +databas 3 +engin 3 +chill 3 +also 3 +procedur 3 +conceptu 3 +notat 3 +pram 3 +lectur 2 +offic 2 +utexa 2 +free 2 +time 2 +student 2 +undergradu 2 +spatial 2 +principl 2 +programmingoctob 2 +wilsonextens 2 +ramachandranth 2 +beyond 2 +simpl 2 +technolog 2 +trend 2 +highli 2 +server 2 +power 2 +requir 2 +hand 2 +translat 2 +reliabl 2 +access 2 +solut 2 +lightweight 2 +goal 2 +scale 2 +depend 2 +sever 2 +number 2 +need 2 +integr 2 +theapplic 2 +support 2 +address 2 +effect 2 +overview 2 +runtim 2 +workstat 2 +within 2 +continu 2 +even 2 +automat 2 +includ 2 +fast 2 +close 2 +among 2 +singl 2 +consist 2 +space 2 +structur 2 +discret 2 +action 2 +place 2 +path 2 +built 2 +base 2 +view 2 +simpli 2 +prolog 2 +known 2 +concern 2 +experi 2 +appli 2 +reinforc 2 +sane 2 +popul 2 +genet 2 +form 2 +abl 2 +broad 2 +interest 2 +datasourc 2 +architectur 2 +theabstract 2 +embodi 2 +higher 2 +code 2 +advanc 2 +facil 2 +serv 2 +resolv 2 +exampl 2 +offer 2 +determinist 2 +corpu 2 +difficult 2 +easi 2 +asymptot 2 +main 2 +underli 2 +theform 2 +extens 2 +portabl 2 +discuss 2 +sciencecst 1 +sciencefal 1 +mondai 1 +instructor 1 +blumof 1 +taylor 1 +phone 1 +email 1 +hour 1 +thursdai 1 +feel 1 +stop 1 +semest 1 +seminar 1 +taken 1 +apass 1 +fail 1 +basi 1 +topic 1 +honor 1 +receiv 1 +credit 1 +must 1 +beregist 1 +attend 1 +least 1 +schedulespeakertitleseptemb 1 +mirankeralamo 1 +warehouseseptemb 1 +kuipersth 1 +humanand 1 +mapsseptemb 1 +blumofecilk 1 +reliableparallel 1 +workstationsseptemb 1 +risto 1 +miikkulainenlearn 1 +throughsymbiot 1 +networksoctob 1 +vladimir 1 +lifschitzmathemat 1 +paul 1 +reflectionoctob 1 +mooneylearn 1 +usinginduct 1 +mike 1 +dahlindistribut 1 +internetsnovemb 1 +gordon 1 +novaksoftwar 1 +genericprocedur 1 +viewsnovemb 1 +vijaya 1 +parallelalgorithmsnovemb 1 +lorenzo 1 +alvisilighweight 1 +tolerancenovemb 1 +calvin 1 +linadapt 1 +optimizationdecemb 1 +greg 1 +plaxtonanalysi 1 +algorithmslighweight 1 +tolerancelorenzo 1 +alvisidistribut 1 +move 1 +confin 1 +academia 1 +andresearch 1 +lab 1 +revolution 1 +busi 1 +government 1 +organ 1 +citizen 1 +andcollect 1 +promis 1 +todramat 1 +increas 1 +pace 1 +revolut 1 +thedesign 1 +beyondth 1 +client 1 +paradigm 1 +har 1 +ofdistribut 1 +scope 1 +emphasi 1 +toleranttechniqu 1 +undergo 1 +dramat 1 +chang 1 +willceas 1 +expens 1 +applicationsto 1 +exot 1 +distributedinform 1 +infrastructur 1 +acompetit 1 +guarante 1 +criticalinform 1 +engineerfault 1 +dedic 1 +resourc 1 +negligibleimpact 1 +cost 1 +offailur 1 +transpar 1 +programm 1 +emerg 1 +communicatethrough 1 +messag 1 +onnetwork 1 +workstationsrobert 1 +blumofethi 1 +pronouncedsilk 1 +multithread 1 +andcilk 1 +functionalsubset 1 +providesadapt 1 +tranpar 1 +touser 1 +mean 1 +ofworkst 1 +run 1 +grow 1 +shrinkdynam 1 +idl 1 +onth 1 +amount 1 +addit 1 +cilkprogram 1 +workstationscrash 1 +detect 1 +andrecov 1 +livedemonstr 1 +internetsmik 1 +dahlinthi 1 +give 1 +indistribut 1 +applicationsmotiv 1 +aggress 1 +inclust 1 +servicei 1 +request 1 +nodesto 1 +better 1 +centralserv 1 +challeng 1 +goodperform 1 +despit 1 +limit 1 +networkperform 1 +node 1 +projectwil 1 +human 1 +mapsbenjamin 1 +kuipershuman 1 +map 1 +reli 1 +forlarg 1 +ontolog 1 +similarli 1 +varietyof 1 +propos 1 +andmap 1 +unknown 1 +cast 1 +diverserepresent 1 +spatialsemant 1 +object 1 +relat 1 +andassumpt 1 +foundat 1 +thecontrol 1 +dynam 1 +whose 1 +stabl 1 +equilibrium 1 +point 1 +beabstract 1 +distinct 1 +trajectori 1 +link 1 +givinga 1 +causal 1 +graph 1 +causalgraph 1 +turn 1 +topologicalnetwork 1 +local 1 +metric 1 +occupancygrid 1 +neighborhood 1 +theframework 1 +topolog 1 +without 1 +usual 1 +ofglob 1 +programmingvladimir 1 +lifschitzlog 1 +sister 1 +functionalprogram 1 +notne 1 +contain 1 +explicit 1 +oper 1 +instruct 1 +instead 1 +itcan 1 +fact 1 +sufficientto 1 +solv 1 +declar 1 +executedus 1 +autom 1 +reason 1 +best 1 +logicprogram 1 +theori 1 +withdefin 1 +thereason 1 +investig 1 +thesound 1 +optimizationcalvin 1 +linthi 1 +andtheir 1 +differenthardwar 1 +platform 1 +efficientand 1 +usabl 1 +framework 1 +suchlibrari 1 +three 1 +plan 1 +thesetechniqu 1 +scientif 1 +weexplain 1 +facilit 1 +ofneur 1 +networksristo 1 +miikkulainena 1 +novel 1 +neuro 1 +evolv 1 +neuronsthrough 1 +given 1 +promot 1 +inth 1 +anddiscourag 1 +converg 1 +suboptim 1 +toextract 1 +domain 1 +specif 1 +spars 1 +rang 1 +sequentialdecis 1 +control 1 +game 1 +plai 1 +resourcemanag 1 +warehousedan 1 +mirankerth 1 +effort 1 +direct 1 +intra 1 +andint 1 +enumer 1 +site 1 +theuser 1 +illus 1 +virtual 1 +follow 1 +byqueri 1 +tool 1 +central 1 +corba 1 +compliant 1 +interfacethat 1 +uniform 1 +heterogen 1 +ofabstract 1 +clever 1 +algorithmsand 1 +separ 1 +isol 1 +buffer 1 +anddata 1 +prefetch 1 +claim 1 +often 1 +anobject 1 +orient 1 +deduct 1 +infer 1 +activedatabas 1 +mine 1 +constructedus 1 +common 1 +final 1 +sinc 1 +output 1 +databasefacil 1 +compon 1 +thealamo 1 +compos 1 +dataintegr 1 +particular 1 +anticip 1 +elementsof 1 +repres 1 +meta 1 +andsemant 1 +conflict 1 +ultim 1 +furthercomposit 1 +complex 1 +knowledg 1 +answerhigh 1 +induct 1 +logicprogrammingraymond 1 +mooneyinduct 1 +learningprolog 1 +offirst 1 +order 1 +standard 1 +learningmethod 1 +constrain 1 +fix 1 +length 1 +vector 1 +areappli 1 +believethi 1 +richer 1 +import 1 +havedevelop 1 +parsersfrom 1 +pars 1 +sentenc 1 +obtain 1 +superior 1 +onsever 1 +artifici 1 +corpora 1 +previous 1 +test 1 +networkmethod 1 +encourag 1 +realist 1 +ati 1 +ofairlin 1 +automaticallydevelop 1 +complet 1 +englishdatabas 1 +moreaccur 1 +parser 1 +smallgeograph 1 +foidl 1 +past 1 +tens 1 +english 1 +surpass 1 +previou 1 +treemethod 1 +throughviewsgordon 1 +novak 1 +clearli 1 +good 1 +toachiev 1 +practic 1 +assumpt 1 +thesoftwar 1 +type 1 +typesus 1 +agener 1 +version 1 +custom 1 +graphic 1 +specifyview 1 +theworld 1 +write 1 +adesir 1 +algorithmsgreg 1 +plaxtona 1 +major 1 +focu 1 +theoret 1 +andanalysi 1 +random 1 +forspecif 1 +research 1 +notuncommon 1 +come 1 +across 1 +written 1 +paper 1 +straightforward 1 +surprisingli 1 +lengthi 1 +deal 1 +minor 1 +side 1 +case 1 +havelittl 1 +noth 1 +suchpap 1 +seem 1 +signific 1 +andform 1 +difficulti 1 +gapsinher 1 +convent 1 +inadequatefor 1 +succinctli 1 +certain 1 +straightforwardalgorithm 1 +significantli 1 +reduc 1 +theconceptu 1 +associ 1 +trivialclass 1 +concret 1 +consid 1 +analysisof 1 +linear 1 +select 1 +blum 1 +floyd 1 +pratt 1 +rivest 1 +tarjan 1 +algorithmsvijaya 1 +forcombinatori 1 +studi 1 +recentyear 1 +larg 1 +willdescrib 1 +work 1 +parallelalgorithm 1 +thesealgorithm 1 +massiv 1 +maspar 1 +thendescrib 1 +queu 1 +variant 1 +wepropos 1 +appropri 1 +parallelshar 1 +memori 1 +tradit 1 +reflectionpaul 1 +ad 1 +fairli 1 +modif 1 +addnew 1 +analys 1 +reflect 1 +examin 1 +ofinterest 1 +part 1 +affect 1 +structureaccordingli 1 +thing 1 +modular 1 +adapat 1 +recent 1 +workon 1 +rscheme 1 +extensiblelanguag 1 +last 1 +modifi 1 +novemb 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..ef62e2ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,68 @@ +problem 12 +chapter 11 +except 6 +solut 5 +offic 4 +program 4 +assign 4 +septemb 4 +octob 4 +utexa 3 +exam 3 +novemb 3 +introduct 2 +oper 2 +lectur 2 +robert 2 +phone 2 +email 2 +hour 2 +thursdai 2 +gooti 2 +exampl 2 +found 2 +crypt 2 +topic 2 +cover 2 +midterm 2 +decemb 2 +systemsc 1 +systemsfal 1 +mondai 1 +wednesdai 1 +instructor 1 +blumof 1 +taylor 1 +feel 1 +free 1 +stop 1 +time 1 +teach 1 +assist 1 +subramanyam 1 +tuesdai 1 +station 1 +solari 1 +canb 1 +implement 1 +support 1 +multiplemap 1 +assum 1 +map 1 +file 1 +least 1 +long 1 +themap 1 +test 1 +encrypt 1 +decrypt 1 +handout 1 +gener 1 +inform 1 +final 1 +solutionsread 1 +book 1 +date 1 +last 1 +modifi 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..01abb560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,45 @@ +neural 3 +network 3 +utexa 3 +risto 2 +offic 2 +class 2 +fall 1 +networksfal 1 +uniqu 1 +number 1 +instructor 1 +miikkulainen 1 +bednar 1 +jbednar 1 +station 1 +text 1 +lauren 1 +fausett 1 +fundament 1 +ofneur 1 +architectur 1 +algorithm 1 +applic 1 +englewood 1 +cliff 1 +prenticehal 1 +select 1 +paper 1 +note 1 +copi 1 +slide 1 +us 1 +lectur 1 +grade 1 +homework 1 +midterm 1 +final 1 +detail 1 +schedulehomework 1 +assignmentsexamsclass 1 +resourcesa 1 +postscript 1 +versionof 1 +syllabusristo 1 +edusun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..30997ead --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,76 @@ +cognit 6 +scienc 5 +utexa 3 +risto 3 +read 3 +discuss 3 +introduct 2 +dept 2 +hall 2 +offic 2 +hour 2 +appt 2 +student 2 +note 2 +short 2 +page 2 +fall 1 +sciencefal 1 +instructor 1 +nichola 1 +asher 1 +philosophi 1 +waggen 1 +nasher 1 +berti 1 +miikkulainen 1 +comput 1 +taylor 1 +text 1 +posner 1 +foundat 1 +mitpress 1 +packet 1 +requir 1 +regular 1 +interv 1 +submit 1 +critic 1 +commentari 1 +collabor 1 +withanoth 1 +also 1 +write 1 +paper 1 +approxim 1 +signific 1 +research 1 +topic 1 +find 1 +ofinterest 1 +count 1 +toward 1 +final 1 +grade 1 +thepap 1 +class 1 +attend 1 +particip 1 +alsorequir 1 +detail 1 +cours 1 +descriptioncours 1 +schedulediscuss 1 +notesperson 1 +adscollabor 1 +paperclass 1 +resourcesstud 1 +questionnaireus 1 +link 1 +center 1 +list 1 +sciencefaculti 1 +pointer 1 +resourc 1 +gener 1 +edusun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..c311e439 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,190 @@ +program 6 +respons 5 +pascal 4 +page 4 +cours 4 +assign 4 +student 4 +take 4 +grade 4 +hour 4 +syllabu 3 +detail 3 +exam 3 +semest 3 +class 3 +wait 3 +deadlin 3 +quizz 3 +room 3 +suzi 2 +need 2 +requir 2 +schedul 2 +thenewsgroup 2 +utexa 2 +updat 2 +work 2 +prepar 2 +becom 2 +quiz 2 +long 2 +warn 2 +mani 2 +group 2 +note 2 +limit 2 +thetest 2 +programmingcsp 1 +pascalintroductori 1 +comput 1 +programminginstructor 1 +gallagherwelcom 1 +excit 1 +intellectu 1 +challeng 1 +cspi 1 +design 1 +give 1 +firm 1 +foundat 1 +andso 1 +effort 1 +read 1 +thecours 1 +carefulli 1 +summari 1 +contain 1 +wella 1 +import 1 +polici 1 +date 1 +otherdeadlin 1 +everyth 1 +thesyllabu 1 +without 1 +delai 1 +avail 1 +jenn 1 +copi 1 +guadalup 1 +hundr 1 +takethi 1 +courseeach 1 +highli 1 +structur 1 +foral 1 +monitor 1 +frequent 1 +expect 1 +depend 1 +howwel 1 +event 1 +extrem 1 +difficult 1 +behind 1 +procedur 1 +riski 1 +near 1 +deadlineto 1 +turn 1 +late 1 +get 1 +half 1 +credit 1 +line 1 +unfortun 1 +construct 1 +link 1 +nowher 1 +apolog 1 +everyon 1 +attend 1 +lectur 1 +gallagh 1 +everi 1 +thursdayeven 1 +welch 1 +mondai 1 +wednesdai 1 +break 1 +intosmal 1 +section 1 +discuss 1 +ofth 1 +materi 1 +ateach 1 +assist 1 +nine 1 +written 1 +debug 1 +theprogram 1 +laboratori 1 +thatlaboratori 1 +thatgrad 1 +even 1 +less 1 +andyou 1 +within 1 +eight 1 +thattest 1 +limitedand 1 +often 1 +foravail 1 +proctor 1 +sever 1 +hoursbefor 1 +submit 1 +andquizz 1 +earli 1 +enough 1 +three 1 +must 1 +betaken 1 +prescrib 1 +time 1 +make 1 +soon 1 +possibl 1 +begin 1 +file 1 +openedfor 1 +uniqu 1 +identifi 1 +yourstud 1 +access 1 +orsak 1 +requiredtextbook 1 +dale 1 +weem 1 +wewil 1 +cover 1 +chapter 1 +individu 1 +background 1 +vari 1 +consider 1 +thiscours 1 +partial 1 +self 1 +pace 1 +feel 1 +well 1 +click 1 +howev 1 +still 1 +liabl 1 +commun 1 +exampl 1 +could 1 +form 1 +studi 1 +also 1 +gripe 1 +thought 1 +articl 1 +gener 1 +interest 1 +elicit 1 +repli 1 +staff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..1604d7d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,59 @@ +output 8 +data 8 +sampl 6 +project 4 +file 4 +recoveri 4 +problem 3 +solut 3 +test 3 +databas 2 +system 2 +implement 2 +tong 2 +wang 2 +pleas 2 +dept 2 +student 2 +contest 2 +benchmark 2 +script 2 +time 2 +order 2 +tupl 2 +differ 2 +program 2 +pass 2 +fall 1 +professor 1 +batori 1 +syllabu 1 +homework 1 +read 1 +first 1 +retriev 1 +ret_into 1 +replac 1 +append 1 +delet 1 +mdb 1 +us 1 +measur 1 +run 1 +sinc 1 +attribut 1 +anoth 1 +wrote 1 +perl 1 +transform 1 +compar 1 +diff 1 +turn 1 +without 1 +error 1 +fail 1 +reason 1 +email 1 +suggest 1 +comment 1 +medec 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..10830c8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,302 @@ +system 42 +page 30 +oper 21 +comput 16 +distribut 10 +proceed 10 +time 9 +cours 7 +file 7 +decemb 7 +review 7 +real 6 +commun 6 +process 6 +ieee 6 +transact 6 +design 5 +implement 5 +advanc 4 +support 4 +concept 4 +issu 4 +schedul 4 +sosp 4 +mach 4 +kernel 4 +harrick 3 +read 3 +fall 3 +wireless 3 +mobil 3 +environ 3 +share 3 +confer 3 +april 3 +unix 3 +levi 3 +usenix 3 +symposium 3 +cheriton 3 +cach 3 +januari 3 +instructor 2 +descript 2 +gener 2 +inform 2 +textbook 2 +requir 2 +list 2 +earli 2 +idea 2 +topic 2 +research 2 +avail 2 +paper 2 +project 2 +dalei 2 +juli 2 +andrew 2 +operatingsystem 2 +survei 2 +thread 2 +formultiprogram 2 +memori 2 +multiprocessor 2 +anderson 2 +lazowska 2 +network 2 +novemb 2 +remot 2 +procedur 2 +call 2 +februari 2 +onoper 2 +ousterhout 2 +germani 2 +fault 2 +intern 2 +workshop 2 +beyond 2 +karshmer 2 +nehmer 2 +springer 2 +verlag 2 +summer 2 +june 2 +august 2 +schroeder 2 +needham 2 +protect 2 +princeton 2 +march 2 +trigger 2 +tabl 1 +content 1 +prerequisit 1 +synopsi 1 +titl 1 +professor 1 +last 1 +offer 1 +prerequisitegradu 1 +stand 1 +undergradu 1 +systemssuch 1 +student 1 +expect 1 +familiar 1 +materialin 1 +chapter 1 +peterson 1 +andsilberschatz 1 +synopsisc 1 +breadth 1 +coveringboth 1 +theoret 1 +practic 1 +systemdesign 1 +cover 1 +includ 1 +ofdistribut 1 +formobil 1 +case 1 +studi 1 +anemphasi 1 +place 1 +current 1 +collect 1 +articl 1 +made 1 +theinstructor 1 +requirementsstud 1 +number 1 +area 1 +anddiscuss 1 +grade 1 +determin 1 +examin 1 +aterm 1 +present 1 +systemsfernando 1 +corbato 1 +marjori 1 +merwin 1 +daggett 1 +robert 1 +anexperiment 1 +afip 1 +spring 1 +joint 1 +brinch 1 +hansen 1 +nucleu 1 +multiprogram 1 +bensoussan 1 +clingen 1 +multic 1 +virtualmemori 1 +denni 1 +ritchi 1 +thompson 1 +overview 1 +tannenbaum 1 +robbert 1 +reness 1 +silberschatz 1 +andexampl 1 +managementa 1 +tucker 1 +gupta 1 +control 1 +theth 1 +thoma 1 +edward 1 +henri 1 +theperform 1 +implic 1 +manag 1 +altern 1 +forshar 1 +schedulingr 1 +bunt 1 +techniqu 1 +octob 1 +black 1 +concurr 1 +parallel 1 +inth 1 +inter 1 +communicationj 1 +barrera 1 +fast 1 +inproceed 1 +group 1 +acmtransact 1 +birel 1 +bruce 1 +nelson 1 +rpc 1 +oncomput 1 +bershad 1 +lightweightremot 1 +principl 1 +migrationf 1 +dougli 1 +migrat 1 +spriteoper 1 +internationalconfer 1 +berlin 1 +septemb 1 +theimer 1 +lantz 1 +preemptabl 1 +execut 1 +tolerancef 1 +cristian 1 +basic 1 +toler 1 +distributedsystem 1 +sand 1 +birman 1 +joseph 1 +reliabl 1 +presenc 1 +offailur 1 +systemsr 1 +sandberg 1 +goldberg 1 +kleiman 1 +ofsun 1 +mckusick 1 +leffler 1 +fabri 1 +fastfil 1 +rosenblum 1 +alog 1 +structur 1 +systemsm 1 +gifford 1 +fora 1 +programm 1 +workstat 1 +terri 1 +hint 1 +ieeetransact 1 +softwar 1 +engin 1 +securityr 1 +us 1 +encrypt 1 +authent 1 +inlarg 1 +butler 1 +lampson 1 +origin 1 +proc 1 +oninform 1 +scienc 1 +accetta 1 +baron 1 +boloski 1 +golub 1 +rashid 1 +tevanian 1 +young 1 +foundat 1 +develop 1 +systemsh 1 +kopetz 1 +event 1 +versu 1 +timesystem 1 +layland 1 +algorithm 1 +hard 1 +journal 1 +theacm 1 +zhao 1 +ramamritham 1 +stankov 1 +preemptiv 1 +schedulingund 1 +resourc 1 +constraint 1 +tokuda 1 +mercer 1 +art 1 +computingb 1 +badrinath 1 +acharya 1 +imielinski 1 +impact 1 +ondistribut 1 +satyanarayanan 1 +kistler 1 +kumar 1 +okasaki 1 +siegel 1 +steer 1 +coda 1 +highli 1 +distributedworkst 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..907d9bc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,403 @@ +multimedia 35 +page 30 +proceed 20 +system 19 +video 13 +comput 12 +server 11 +design 11 +commun 11 +network 10 +oper 10 +octob 10 +septemb 9 +novemb 9 +goyal 9 +cours 8 +compress 8 +algorithm 8 +issu 8 +ieee 8 +support 7 +techniqu 7 +sigcomm 6 +list 5 +time 5 +schedul 5 +digit 5 +shenoi 5 +april 5 +august 5 +instructor 4 +harrick 4 +storag 4 +applic 4 +protocol 4 +audio 4 +research 4 +disk 4 +journal 4 +area 4 +offic 3 +hour 3 +media 3 +architectur 3 +mpeg 3 +placement 3 +project 3 +real 3 +packet 3 +analysi 3 +transport 3 +survei 3 +kandlur 3 +effici 3 +control 3 +scheme 3 +manag 3 +intern 3 +select 3 +zhang 3 +review 3 +descript 2 +teach 2 +read 2 +overview 2 +technolog 2 +trend 2 +problem 2 +class 2 +format 2 +fundament 2 +jpeg 2 +retriev 2 +cach 2 +batch 2 +introduct 2 +channel 2 +decemb 2 +fall 2 +number 2 +place 2 +basic 2 +concept 2 +cover 2 +multimediasystem 2 +topic 2 +student 2 +expect 2 +implement 2 +determin 2 +appoint 2 +phone 2 +mail 2 +utexa 2 +copi 2 +steinmetz 2 +principl 2 +standard 2 +hierarch 2 +magazin 2 +venkat 2 +rangan 2 +optim 2 +ofmultimedia 2 +arrai 2 +icmc 2 +washington 2 +symposium 2 +june 2 +anaheim 2 +chen 2 +workshop 2 +diego 2 +francisco 2 +scale 2 +buffer 2 +sitaram 2 +dynam 2 +polici 2 +boston 2 +keshav 2 +rate 2 +base 2 +campbel 2 +coulson 2 +peterson 2 +jacobson 2 +mccann 2 +framework 2 +tabl 1 +content 1 +gener 1 +inform 1 +prerequisit 1 +synopsi 1 +textbook 1 +requir 1 +assist 1 +spring 1 +databas 1 +handout 1 +note 1 +postscript 1 +scalabl 1 +possibl 1 +integr 1 +servic 1 +establish 1 +processor 1 +descriptiongener 1 +informationcours 1 +titl 1 +professor 1 +detail 1 +offer 1 +uniqu 1 +taylor 1 +hall 1 +prerequisitesgradu 1 +stand 1 +familiar 1 +incomput 1 +synopsisc 1 +advanc 1 +boththeoret 1 +practic 1 +includ 1 +systemsupport 1 +well 1 +transportprotocol 1 +emphasi 1 +current 1 +designissu 1 +textbooka 1 +collect 1 +recent 1 +articl 1 +madeavail 1 +requirementsth 1 +introduc 1 +thetop 1 +follow 1 +discuss 1 +relatedpap 1 +question 1 +answer 1 +tounderstand 1 +describ 1 +critiqu 1 +contribut 1 +ofpap 1 +addition 1 +carri 1 +asemest 1 +long 1 +grade 1 +examin 1 +andclass 1 +particip 1 +vintuesdai 1 +assistantmr 1 +prashant 1 +eduread 1 +cntain 1 +paper 1 +theread 1 +avail 1 +mondai 1 +speedwai 1 +locat 1 +dobi 1 +mall 1 +guadalup 1 +austin 1 +pleas 1 +callthem 1 +make 1 +sure 1 +packag 1 +readi 1 +compressionr 1 +data 1 +wallac 1 +still 1 +pictur 1 +gall 1 +multimediaappl 1 +chiang 1 +anastassi 1 +code 1 +digitaltelevis 1 +serversoverview 1 +serverdesign 1 +gemmel 1 +row 1 +tutori 1 +object 1 +ieeeintern 1 +confer 1 +failur 1 +recoveri 1 +inmulti 1 +annualintern 1 +fault 1 +toler 1 +ftc 1 +pasadena 1 +california 1 +chiueh 1 +katz 1 +multi 1 +resolut 1 +represent 1 +forparallel 1 +admiss 1 +groupedsweep 1 +ofthird 1 +supportfor 1 +narasimha 1 +reddi 1 +wylli 1 +multimediai 1 +statist 1 +admissioncontrol 1 +acmmultimedia 1 +designinglarg 1 +march 1 +scan 1 +inmultimedia 1 +sanfrancisco 1 +stream 1 +convers 1 +interactivevideo 1 +playout 1 +summer 1 +space 1 +shahabuddin 1 +foran 1 +demand 1 +demandvideo 1 +report 1 +papadimitri 1 +ramanathan 1 +informationcach 1 +deliveri 1 +person 1 +program 1 +homeentertain 1 +internationalconfer 1 +multimedianetwork 1 +layer 1 +shenker 1 +futur 1 +internet 1 +ferrari 1 +verma 1 +channelestablish 1 +wide 1 +areasin 1 +comparison 1 +servicedisciplin 1 +delaybound 1 +heterogen 1 +toappear 1 +also 1 +workshopon 1 +nossdav 1 +durham 1 +hampshir 1 +chow 1 +losslesssmooth 1 +london 1 +salehi 1 +kuros 1 +towslei 1 +storedvideo 1 +reduc 1 +variabl 1 +resourc 1 +requirementsthrough 1 +smooth 1 +sigmetr 1 +philadelphia 1 +grossglaus 1 +rcbr 1 +simpl 1 +efficientservic 1 +multipl 1 +traffic 1 +acmsigcomm 1 +kanakia 1 +misra 1 +reibman 1 +adapt 1 +congestioncontrol 1 +proceedingsof 1 +clark 1 +tennenhous 1 +consider 1 +newgener 1 +hutchison 1 +qualiti 1 +servicearchitectur 1 +turner 1 +imag 1 +transfer 1 +floyd 1 +reliablemulticast 1 +light 1 +weight 1 +session 1 +levelfram 1 +buss 1 +deffner 1 +schulzrinn 1 +januari 1 +blakowski 1 +synchron 1 +refer 1 +model 1 +specif 1 +case 1 +studi 1 +onselect 1 +januaryoper 1 +multimediag 1 +robin 1 +blair 1 +papathoma 1 +andd 1 +shepherd 1 +basedcommun 1 +choru 1 +incommun 1 +druschel 1 +abbott 1 +pagel 1 +subsystem 1 +workstat 1 +ofth 1 +third 1 +systemssupport 1 +govindan 1 +anderson 1 +mechan 1 +forcontinu 1 +operatingsystem 1 +pacif 1 +grove 1 +formultimedia 1 +second 1 +symposiumon 1 +osdi 1 +seattl 1 +conferencingh 1 +zellweg 1 +swinehart 1 +venkatrangan 1 +conferenc 1 +etherphon 1 +environ 1 +flexibl 1 +packetvideo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..34cb2a29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,296 @@ +proceed 17 +multimedia 10 +multicast 10 +cours 8 +rout 8 +system 8 +sigcomm 7 +databas 6 +design 5 +boston 5 +real 5 +share 5 +conferenc 4 +schedul 4 +page 4 +commun 3 +harrick 3 +inform 3 +requir 3 +read 3 +internet 3 +time 3 +oper 3 +paper 3 +acmmultimedia 3 +francisco 3 +novemb 3 +base 3 +technic 3 +report 3 +queri 3 +pictur 3 +fall 2 +instructor 2 +descript 2 +gener 2 +textbook 2 +offic 2 +list 2 +processor 2 +support 2 +topic 2 +protocol 2 +present 2 +class 2 +grade 2 +project 2 +mccann 2 +jacobson 2 +packet 2 +video 2 +crowcroft 2 +confer 2 +scalabl 2 +applic 2 +deer 2 +cheriton 2 +transact 2 +tree 2 +reliabl 2 +interact 2 +gupta 2 +resourc 2 +infocom 2 +warldersburg 2 +weihl 2 +symposium 2 +jeffai 2 +paradigm 2 +imag 2 +jain 2 +tabl 1 +content 1 +prerequisit 1 +synopsi 1 +hour 1 +servic 1 +titl 1 +professor 1 +semest 1 +meet 1 +fridai 1 +prerequisitesgradu 1 +stand 1 +familiar 1 +basic 1 +concept 1 +networkprotocol 1 +multimediasystem 1 +synopsisthi 1 +advanc 1 +bediscuss 1 +includ 1 +transport 1 +formultimedia 1 +mobil 1 +network 1 +andmultimedia 1 +multimediadatabas 1 +emphasi 1 +place 1 +current 1 +issu 1 +andresearch 1 +collect 1 +research 1 +articl 1 +made 1 +avail 1 +theinstructor 1 +requirementsstud 1 +number 1 +area 1 +aswel 1 +discuss 1 +determinedbas 1 +particip 1 +studentsenrol 1 +letter 1 +submit 1 +orcarri 1 +hoursfridai 1 +appoint 1 +phone 1 +mail 1 +utexa 1 +flexibleframework 1 +handlei 1 +wakeman 1 +controlchannel 1 +cccp 1 +build 1 +conferencecontrol 1 +gajewska 1 +kistler 1 +manass 1 +redel 1 +argo 1 +systemfor 1 +distribut 1 +collabor 1 +gong 1 +multipoint 1 +audio 1 +control 1 +basedmultimedia 1 +zellweg 1 +swinehart 1 +venkat 1 +rangan 1 +etherphon 1 +environ 1 +ieeecomput 1 +octob 1 +datagraminternetwork 1 +extend 1 +lan 1 +computersystem 1 +ballardi 1 +franci 1 +core 1 +architectur 1 +inter 1 +domain 1 +thyagarajan 1 +hierarch 1 +distanc 1 +vector 1 +mbone 1 +widyono 1 +andevalu 1 +algorithm 1 +channel 1 +msthesi 1 +berkelei 1 +kompella 1 +pasqual 1 +polyzo 1 +multimediacommun 1 +univers 1 +california 1 +diego 1 +floyd 1 +zhang 1 +framework 1 +light 1 +weightsess 1 +level 1 +frame 1 +ofacm 1 +holbrook 1 +singhal 1 +receiv 1 +fordistribut 1 +simul 1 +theacm 1 +herzog 1 +estrin 1 +shenker 1 +cost 1 +axiomat 1 +analysi 1 +how 1 +moran 1 +nguyen 1 +multi 1 +parti 1 +timecommun 1 +servicesj 1 +guyton 1 +schwartz 1 +locat 1 +nearbi 1 +copi 1 +replic 1 +server 1 +mogul 1 +case 1 +forpersist 1 +connect 1 +http 1 +acmsigcomm 1 +supportc 1 +lotteri 1 +effici 1 +flexibleproport 1 +mangement 1 +ofoper 1 +implement 1 +osdi 1 +strideschedul 1 +determinist 1 +proport 1 +resourcemanag 1 +golestani 1 +self 1 +clock 1 +fair 1 +queue 1 +scheme 1 +high 1 +speedappl 1 +govindan 1 +anderson 1 +mechan 1 +forcontinu 1 +media 1 +onoper 1 +principl 1 +sosp 1 +monterei 1 +timeproduc 1 +consum 1 +construct 1 +ofeffici 1 +predict 1 +ofth 1 +sigapp 1 +appli 1 +comput 1 +latenc 1 +manag 1 +intim 1 +workshop 1 +timeoper 1 +softwar 1 +seattl 1 +databasesw 1 +niblack 1 +qbic 1 +contentus 1 +color 1 +textur 1 +shape 1 +februari 1 +cawkel 1 +journal 1 +ofinform 1 +scienc 1 +bach 1 +paul 1 +managementsystem 1 +face 1 +retriev 1 +ieee 1 +knowledgeand 1 +data 1 +engin 1 +august 1 +weymouth 1 +semant 1 +vimsi 1 +model 1 +intern 1 +onveri 1 +larg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..3fb78e44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,86 @@ +home 6 +work 6 +question 6 +time 6 +answer 5 +final 4 +exam 4 +problem 4 +true 4 +cours 3 +updat 3 +sentenc 3 +omega 3 +submatrix 3 +algorithm 2 +techniqu 2 +theori 2 +fall 2 +handout 2 +receiv 2 +last 2 +total 2 +vertic 2 +cycl 2 +label 2 +right 2 +size 2 +largest 2 +decemb 2 +instructor 1 +vijaya 1 +ramachandranuniqu 1 +number 1 +descript 1 +instruct 1 +respons 1 +pose 1 +quot 1 +take 1 +sigma 1 +onsigma 1 +word 1 +refer 1 +amort 1 +oroth 1 +well 1 +known 1 +face 1 +data 1 +structur 1 +disjoint 1 +set 1 +requir 1 +inth 1 +worst 1 +case 1 +text 1 +book 1 +isther 1 +differ 1 +meant 1 +first 1 +second 1 +paragraphof 1 +chapter 1 +note 1 +containdistinct 1 +cancontain 1 +think 1 +littl 1 +unclear 1 +denot 1 +equal 1 +somek 1 +entri 1 +fridai 1 +mondai 1 +bepost 1 +either 1 +youhav 1 +sent 1 +pleas 1 +address 1 +us 1 +yourbest 1 +judgment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..56e676b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,227 @@ +scheme 43 +rscheme 12 +version 10 +repositori 10 +system 9 +run 9 +class 8 +implement 8 +object 8 +machin 8 +note 7 +us 7 +program 6 +page 6 +cours 6 +answer 6 +sure 5 +code 5 +languag 4 +home 4 +thing 4 +make 4 +read 4 +later 4 +solari 4 +unix 4 +free 4 +recommend 4 +feelei 4 +meroon 4 +comp 4 +lang 4 +html 3 +first 3 +document 3 +standard 3 +practic 3 +question 3 +also 3 +assign 3 +simpl 3 +take 3 +wilson 2 +construct 2 +chang 2 +brows 2 +section 2 +browser 2 +especi 2 +text 2 +error 2 +chapter 2 +describ 2 +quiz 2 +homework 2 +includ 2 +pictur 2 +illustr 2 +base 2 +inherit 2 +explan 2 +test 2 +main 2 +default 2 +whichi 2 +instal 2 +public 2 +sparc 2 +command 2 +runschem 2 +linux 2 +orani 2 +sever 2 +andinstal 2 +find 2 +itfrom 2 +donovan 2 +kolbl 2 +qing 2 +patch 2 +friendlier 2 +fornewbi 2 +gettinggambit 2 +marc 2 +youcan 2 +window 2 +bestschem 2 +bunch 2 +avail 2 +guil 2 +might 2 +gambit 2 +mark 2 +mzscheme 2 +rice 2 +someth 2 +besid 2 +get 2 +start 2 +doingobject 2 +orient 2 +tous 2 +advantag 2 +abl 2 +univers 2 +indiana 2 +lot 2 +freeimplement 2 +variou 2 +getinterest 2 +learn 2 +cover 2 +place 2 +look 2 +internet 2 +newsgroup 2 +devot 2 +pagec 1 +pagethi 1 +paulwilson 1 +subject 1 +reload 1 +button 1 +yourbrows 1 +come 1 +see 1 +mostrec 1 +onlin 1 +refer 1 +materi 1 +syllabu 1 +lectur 1 +ondeclar 1 +arereason 1 +well 1 +index 1 +willchang 1 +goe 1 +along 1 +ahead 1 +islik 1 +adventur 1 +suggest 1 +usinga 1 +interact 1 +work 1 +throughchapt 1 +tutori 1 +allow 1 +outof 1 +past 1 +sanoth 1 +reason 1 +rather 1 +thanprint 1 +hardcopi 1 +correct 1 +weget 1 +definit 1 +format 1 +onlinebrows 1 +list 1 +featur 1 +ters 1 +stuff 1 +coursenot 1 +help 1 +want 1 +know 1 +miscellanousfunct 1 +exactli 1 +author 1 +second 1 +third 1 +name 1 +convent 1 +indent 1 +shouldconsult 1 +grade 1 +itsens 1 +andnot 1 +draw 1 +data 1 +structur 1 +write 1 +merg 1 +sort 1 +solut 1 +three 1 +problem 1 +comment 1 +reader 1 +regular 1 +express 1 +grammar 1 +actual 1 +backward 1 +chain 1 +proposit 1 +calculu 1 +theoremprov 1 +essenti 1 +littl 1 +subset 1 +prolog 1 +setofrul 1 +classifi 1 +anim 1 +logic 1 +kind 1 +ofanim 1 +plai 1 +theorem 1 +prover 1 +instanc 1 +simpleobject 1 +anoth 1 +show 1 +metaclass 1 +circular 1 +latter 1 +self 1 +onclass 1 +gener 1 +procedur 1 +type 1 +subtyp 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..ec062635 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,97 @@ +homework 16 +solut 14 +file 9 +postscript 7 +session 5 +kumar 5 +utexa 5 +class 4 +note 4 +review 4 +cours 3 +slide 3 +section 3 +model 3 +format 3 +fall 2 +comput 2 +austin 2 +ajit 2 +contact 2 +test 2 +print 2 +weekli 2 +tuesdai 2 +thursdai 2 +station 2 +feng 2 +xfeng 2 +new 2 +midterm 2 +decimalinteg 2 +hexinteg 2 +octalinteg 2 +program 1 +welcom 1 +homepag 1 +taught 1 +adam 1 +georg 1 +announc 1 +final 1 +surpris 1 +xunnow 1 +make 1 +like 1 +homeworksreview 1 +slidesth 1 +second 1 +half 1 +semest 1 +pleas 1 +view 1 +onlineif 1 +possibl 1 +realli 1 +need 1 +found 1 +updatedhomework 1 +sourc 1 +filemidterm 1 +webta 1 +inform 1 +timetableta 1 +offic 1 +hour 1 +locat 1 +guana 1 +natarajan 1 +eduxun 1 +detail 1 +time 1 +tabl 1 +guid 1 +group 1 +also 1 +tip 1 +fridai 1 +download 1 +score 1 +requir 1 +wordlist 1 +linux 1 +provid 1 +warren 1 +wang 1 +wwang 1 +answer 1 +exercis 1 +made 1 +modif 1 +mondai 1 +afternoon 1 +assign 1 +maintain 1 +edudepart 1 +sciencesunivers 1 +texa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..b1d3a579 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,46 @@ +solut 6 +yang 4 +statist 4 +offic 3 +hour 3 +utexa 3 +assign 3 +program 3 +schwetman 2 +mesquit 2 +station 2 +contact 2 +yangyang 2 +file 2 +print 2 +email 2 +comput 1 +system 1 +architectur 1 +fall 1 +instructor 1 +herb 1 +softwar 1 +class 1 +appointmentcontact 1 +syllabu 1 +statisticsassign 1 +asga 1 +statisticsyour 1 +final 1 +gradesect 1 +section 1 +microsparc 1 +datasheetonlin 1 +resourc 1 +classmat 1 +ruiliu 1 +postmessag 1 +new 1 +group 1 +pagei 1 +creat 1 +august 1 +comment 1 +welcom 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..bf4bdd1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,70 @@ +handout 13 +homework 6 +project 4 +protocol 3 +class 3 +group 3 +csnet 2 +offic 2 +hour 2 +read 2 +newsgroup 2 +utexa 2 +draft 2 +http 2 +schedul 2 +network 1 +implement 1 +gener 1 +inform 1 +professor 1 +tuesdai 1 +thursdai 1 +teach 1 +assist 1 +mondai 1 +wensdai 1 +station 1 +descript 1 +text 1 +background 1 +prerequisit 1 +grade 1 +refer 1 +multicast 1 +rout 1 +texa 1 +internet 1 +platform 1 +netsim 1 +corejava 1 +mobil 1 +support 1 +specif 1 +address 1 +alloc 1 +manag 1 +tutori 1 +digest 1 +access 1 +authent 1 +comp 1 +java 1 +present 1 +individu 1 +paper 1 +turn 1 +sampl 1 +solut 1 +info 1 +configur 1 +file 1 +pleas 1 +note 1 +first 1 +fengyufeng 1 +edufing 1 +public 1 +ring 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..e477333c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,93 @@ +comput 6 +inform 5 +home 4 +scienc 4 +engin 4 +help 3 +page 3 +document 3 +class 2 +read 2 +assign 2 +homework 2 +degre 2 +program 2 +offer 2 +colleg 2 +mosaic 2 +engr 2 +pageclick 1 +techniqu 1 +spring 1 +quarterwelcom 1 +world 1 +wide 1 +hypermedia 1 +whichcontain 1 +bounti 1 +keep 1 +mind 1 +thatthi 1 +static 1 +addedfrequ 1 +problem 1 +send 1 +mail 1 +weld 1 +click 1 +highlight 1 +item 1 +personnel 1 +professor 1 +cours 1 +syllabu 1 +polici 1 +announc 1 +check 1 +regularli 1 +last 1 +chang 1 +handout 1 +lectur 1 +note 1 +gradesoth 1 +us 1 +link 1 +offici 1 +mathematica 1 +mvi 1 +visitor 1 +room 1 +schedul 1 +depart 1 +art 1 +avail 1 +follow 1 +topic 1 +basic 1 +hypertext 1 +markup 1 +languag 1 +html 1 +uniform 1 +resourc 1 +locat 1 +usinglynx 1 +charact 1 +base 1 +browserport 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +univers 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..31e93925 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,139 @@ +comput 6 +engr 5 +help 5 +exam 5 +autumn 4 +washington 4 +scienc 4 +engin 4 +home 3 +page 3 +program 3 +inform 3 +class 3 +messag 3 +webmast 3 +cours 3 +netscap 3 +document 2 +frequent 2 +problem 2 +send 2 +mail 2 +click 2 +last 2 +updat 2 +studi 2 +guid 2 +solut 2 +final 2 +tip 2 +think 2 +raini 2 +activ 2 +quarter 2 +like 2 +univers 2 +degre 2 +offer 2 +colleg 2 +dugan 1 +martin 1 +tompa 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +bounti 1 +theclass 1 +keep 1 +mind 1 +static 1 +newinform 1 +especi 1 +ad 1 +highlight 1 +item 1 +check 1 +syllabu 1 +offic 1 +hour 1 +staff 1 +lectur 1 +slide 1 +homework 1 +midterm 1 +mac 1 +debugg 1 +bulletin 1 +board 1 +textbook 1 +code 1 +refer 1 +regularli 1 +schedul 1 +time 1 +place 1 +special 1 +demo 1 +web 1 +earlier 1 +intact 1 +less 1 +notic 1 +instanc 1 +link 1 +work 1 +pleas 1 +might 1 +assign 1 +test 1 +us 1 +previous 1 +winter 1 +spring 1 +summer 1 +search 1 +previou 1 +miscellan 1 +info 1 +case 1 +insensit 1 +match 1 +whole 1 +word 1 +avail 1 +depart 1 +art 1 +relat 1 +major 1 +nonmajor 1 +consid 1 +take 1 +preview 1 +run 1 +find 1 +itemsund 1 +balloon 1 +menu 1 +particular 1 +onlin 1 +handbook 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..9bff1a7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,112 @@ +comput 5 +engr 3 +home 3 +autumn 3 +program 3 +cours 3 +hour 3 +scienc 3 +engin 3 +page 2 +class 2 +week 2 +lectur 2 +slide 2 +summer 2 +place 2 +test 2 +webmast 2 +like 2 +washington 2 +degre 2 +colleg 2 +martin 1 +dickei 1 +richard 1 +ladner 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermediadocu 1 +contain 1 +bounti 1 +informationabout 1 +click 1 +highlight 1 +item 1 +moreinform 1 +messag 1 +check 1 +frequent 1 +syllabu 1 +sort 1 +schedulesth 1 +glanceweek 1 +activ 1 +schedulecomput 1 +lab 1 +includinglab 1 +watch 1 +chang 1 +staff 1 +includ 1 +instructor 1 +andta 1 +offic 1 +audiofrom 1 +homework 1 +examin 1 +midtermand 1 +final 1 +originallyschedul 1 +time 1 +studi 1 +guid 1 +andtim 1 +kind 1 +tip 1 +usingth 1 +compil 1 +macintosh 1 +user 1 +textbook 1 +code 1 +refer 1 +tutori 1 +special 1 +demo 1 +web 1 +earlier 1 +quarter 1 +less 1 +intactand 1 +invit 1 +brows 1 +notic 1 +problem 1 +forinst 1 +link 1 +work 1 +pleas 1 +send 1 +mail 1 +might 1 +look 1 +assign 1 +andth 1 +us 1 +previous 1 +winter 1 +spring 1 +inform 1 +avail 1 +univers 1 +depart 1 +art 1 +andrel 1 +major 1 +nonmajor 1 +comment 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..7a900b59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,58 @@ +assign 8 +beam 4 +washington 4 +offic 3 +sieg 3 +thursdai 3 +autumn 2 +hour 2 +nowitz 2 +wednesdai 2 +acrobat 2 +midterm 2 +spring 2 +discret 1 +structur 1 +instructorpaul 1 +edulectur 1 +phone 1 +appoint 1 +teach 1 +assistantjonathan 1 +edusect 1 +johnson 1 +section 1 +loew 1 +tuesdai 1 +handout 1 +syllabu 1 +induct 1 +recurs 1 +defin 1 +set 1 +postscript 1 +reader 1 +part 1 +ofyour 1 +browser 1 +novemb 1 +class 1 +sampl 1 +question 1 +homework 1 +previou 1 +cours 1 +web 1 +fall 1 +karp 1 +ruzzo 1 +winter 1 +leveson 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..6fb8c451 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,96 @@ +latex 31 +solut 11 +homework 11 +text 8 +handout 6 +format 5 +exam 4 +html 4 +winter 3 +state 3 +diagram 3 +document 3 +autumn 3 +last 2 +updat 2 +cours 2 +regular 2 +grammar 2 +midterm 2 +final 2 +provid 2 +washington 2 +intro 1 +formal 1 +model 1 +richard 1 +ladnerclass 1 +messag 1 +check 1 +email 1 +frequent 1 +syllabu 1 +construct 1 +express 1 +extra 1 +rambl 1 +regard 1 +question 1 +construc 1 +pars 1 +review 1 +proof 1 +halt 1 +problem 1 +undecidableexam 1 +comment 1 +notat 1 +file 1 +materi 1 +three 1 +hypertext 1 +markup 1 +languag 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +origin 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +web 1 +previou 1 +quarter 1 +ladner 1 +edufix 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..c99423ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,46 @@ +lectur 3 +messag 3 +mail 3 +list 3 +autumn 3 +formal 2 +model 2 +fall 2 +condon 2 +page 2 +check 2 +upcom 2 +class 2 +subscrib 2 +last 2 +updat 2 +previou 2 +winter 2 +washington 2 +introduct 1 +intro 1 +ann 1 +welcom 1 +home 1 +regularli 1 +findhomework 1 +solut 1 +set 1 +pointer 1 +exam 1 +sent 1 +willb 1 +log 1 +send 1 +majordomo 1 +includ 1 +userid 1 +email 1 +frequent 1 +homework 1 +handout 1 +content 1 +web 1 +quarter 1 +edukaye 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..bb5f7c35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,26 @@ +autumn 3 +last 2 +updat 2 +winter 2 +spring 2 +data 1 +structur 1 +martin 1 +tompaclass 1 +messag 1 +check 1 +mail 1 +frequent 1 +cours 1 +informationlab 1 +technot 1 +unix 1 +questionnaireloc 1 +cdeletemin 1 +algorithm 1 +treeshomework 1 +web 1 +previou 1 +quarter 1 +request 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..3d195e96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,62 @@ +spring 3 +ladner 2 +class 2 +messag 2 +frequent 2 +washington 2 +home 1 +pagecs 1 +data 1 +structuresrichard 1 +instructordan 1 +fasulo 1 +teach 1 +assistantthi 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +inform 1 +theclass 1 +taught 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +especi 1 +ad 1 +click 1 +help 1 +check 1 +offic 1 +hour 1 +suggest 1 +read 1 +project 1 +homework 1 +exam 1 +lectur 1 +overheadsport 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quotedand 1 +duli 1 +credit 1 +copyright 1 +departmentof 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..c39b3627 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,37 @@ +home 7 +page 7 +offer 6 +ofcs 5 +program 3 +languag 3 +autumn 2 +spring 2 +winter 2 +depart 2 +washington 2 +pagecs 1 +languagesfal 1 +quarter 1 +current 1 +informationth 1 +listinfo 1 +everi 1 +research 1 +pagehom 1 +computersci 1 +engineeringport 1 +reprint 1 +adapt 1 +academicnonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +dulycredit 1 +copyright 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..f205e082 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,114 @@ +note 24 +last 16 +updat 16 +code 14 +april 13 +assign 12 +smalltalk 11 +transcript 9 +suggest 9 +read 9 +html 8 +postscript 8 +sieg 6 +lisp 6 +home 5 +us 5 +prolog 5 +page 5 +washington 4 +solut 4 +transcipt 4 +method 4 +offer 4 +ofcs 4 +program 3 +june 3 +grove 3 +emac 3 +sampl 3 +quiz 3 +languag 3 +section 2 +final 2 +exam 2 +mail 2 +eduoffic 2 +offic 2 +hour 2 +dave 2 +cours 2 +figur 2 +htmlpostscript 2 +march 2 +winter 2 +depart 2 +pagecs 1 +languagesspr 1 +quarter 1 +lectur 1 +review 1 +session 1 +mondai 1 +tuesdai 1 +thursdai 1 +instructor 1 +steve 1 +hanks 1 +hank 1 +administr 1 +syllabu 1 +overviewcours 1 +newsgroup 1 +help 1 +documentsgeneralintroduct 1 +new 1 +netscap 1 +unix 1 +turnin 1 +electron 1 +submiss 1 +homework 1 +clip 1 +save 1 +relatedrun 1 +reason 1 +thing 1 +relat 1 +done 1 +hand 1 +miss 1 +includ 1 +partial 1 +test 1 +daili 1 +class 1 +full 1 +interfac 1 +build 1 +databas 1 +employe 1 +informationth 1 +listinfo 1 +everi 1 +research 1 +pagehom 1 +autumn 1 +spring 1 +computersci 1 +engineeringport 1 +reprint 1 +adapt 1 +academicnonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +dulycredit 1 +copyright 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..10153b85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,51 @@ +washington 4 +comput 4 +scienc 4 +engin 4 +index 2 +quarter 2 +webmast 2 +univers 2 +depart 2 +degre 2 +program 2 +offer 2 +colleg 2 +page 1 +pagecurr 1 +quarterth 1 +current 1 +previou 1 +quarterscours 1 +web 1 +earlier 1 +intact 1 +less 1 +younotic 1 +problem 1 +instanc 1 +link 1 +work 1 +pleas 1 +send 1 +mail 1 +spring 1 +inform 1 +avail 1 +art 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +comment 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..e490e629 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,124 @@ +class 5 +design 4 +mail 4 +instructor 4 +lectur 4 +home 3 +page 3 +gaetano 3 +announc 3 +last 3 +updat 3 +washington 3 +autumn 2 +quarter 2 +borriello 2 +corei 2 +us 2 +inform 2 +document 2 +send 2 +webmast 2 +administr 2 +final 2 +exam 2 +tool 2 +topic 2 +offic 2 +hour 2 +sieg 2 +logic 2 +katz 2 +benjamin 2 +cum 2 +addison 2 +weslei 2 +maintain 2 +comput 2 +introduct 1 +digit 1 +andersonwelcom 1 +contain 1 +whole 1 +bunch 1 +keep 1 +mind 1 +static 1 +especi 1 +messag 1 +ad 1 +frequent 1 +problem 1 +gener 1 +tocs 1 +notic 1 +system 1 +archiv 1 +messagess 1 +everyon 1 +cours 1 +goal 1 +syllabu 1 +meet 1 +time 1 +mondai 1 +decemb 1 +workload 1 +grade 1 +expect 1 +laboratori 1 +softwar 1 +polici 1 +collabor 1 +cheat 1 +address 1 +overal 1 +schedul 1 +anderson 1 +corin 1 +aweekli 1 +assign 1 +weekli 1 +quizz 1 +onlin 1 +version 1 +slide 1 +textbook 1 +contemporari 1 +author 1 +publish 1 +note 1 +interest 1 +evolut 1 +implement 1 +technolog 1 +aid 1 +synario 1 +feedback 1 +tell 1 +think 1 +thing 1 +go 1 +even 1 +anonym 1 +desir 1 +link 1 +previou 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..ff4f3f2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,77 @@ +assign 4 +inform 3 +data 2 +structur 2 +tanimoto 2 +hall 2 +offic 2 +hour 2 +assist 2 +option 2 +grade 2 +midterm 2 +project 2 +final 2 +late 2 +cours 1 +pagecs 1 +algorithmsautumn 1 +basic 1 +instructor 1 +steve 1 +washington 1 +sieg 1 +room 1 +appoint 1 +teach 1 +anhai 1 +doan 1 +announc 1 +place 1 +dai 1 +time 1 +smith 1 +comput 1 +facil 1 +unix 1 +account 1 +mscc 1 +student 1 +languag 1 +requir 1 +lisp 1 +textbook 1 +shaffer 1 +practic 1 +introduct 1 +algorithm 1 +analysi 1 +publish 1 +summer 1 +prentic 1 +breakdown 1 +tent 1 +polici 1 +keep 1 +manag 1 +encourag 1 +punctual 1 +work 1 +point 1 +deduct 1 +penalti 1 +schedul 1 +updat 1 +aboutth 1 +topic 1 +studi 1 +examinform 1 +exambas 1 +us 1 +compilerassignmentssolut 1 +assignmentsteach 1 +informationscheduleweb 1 +previou 1 +offer 1 +winter 1 +autumn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..f94ad35b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,40 @@ +comput 4 +washington 3 +scienc 3 +engin 3 +holden 2 +nowitz 2 +cours 2 +degre 2 +program 2 +offer 2 +colleg 2 +major 2 +home 1 +pagecs 1 +data 1 +structur 1 +algorithmsspr 1 +instructor 1 +alistair 1 +jonathan 1 +class 1 +messag 1 +last 1 +updat 1 +mondai 1 +materi 1 +syllabu 1 +homework 1 +demo 1 +exam 1 +inform 1 +depart 1 +art 1 +relat 1 +mosaic 1 +help 1 +interest 1 +page 1 +raini 1 +funnowitz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..82ab7b0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,64 @@ +test 7 +file 7 +assign 4 +homework 3 +program 3 +indic 3 +enclos 3 +data 2 +meet 2 +siegtelephon 2 +email 2 +washington 2 +eduoffic 2 +hour 2 +next 2 +quot 2 +search 2 +cours 1 +pagecs 1 +structur 1 +algorithmswint 1 +time 1 +place 1 +sieg 1 +instructor 1 +linda 1 +shapirooffic 1 +shapiro 1 +denis 1 +pinneloffic 1 +denisep 1 +syllabustransparencieshomework 1 +assignmentshomework 1 +answer 1 +assignmentsprogram 1 +note 1 +set 1 +follow 1 +line 1 +begin 1 +insert 1 +tree 1 +inquot 1 +state 1 +charact 1 +long 1 +come 1 +integ 1 +length 1 +associatedvalu 1 +final 1 +string 1 +valu 1 +also 1 +linebegin 1 +find 1 +isfollow 1 +object 1 +model 1 +graphimag 1 +graphreview 1 +listsfin 1 +studi 1 +sheet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..21a48d9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,25 @@ +inform 2 +class 2 +compil 1 +classhomethi 1 +world 1 +wide 1 +hypermedia 1 +documentfor 1 +contain 1 +keep 1 +inmind 1 +document 1 +static 1 +willb 1 +ad 1 +frequent 1 +urgent 1 +announc 1 +assign 1 +onlin 1 +meet 1 +admin 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..43d1616b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,234 @@ +project 18 +softwar 17 +manag 11 +respons 11 +engin 9 +design 9 +includ 8 +plan 8 +primari 8 +document 7 +system 7 +class 7 +duti 7 +work 6 +experi 6 +student 5 +review 5 +provid 4 +group 4 +organ 4 +learn 4 +program 4 +role 4 +qualiti 4 +configur 4 +product 4 +user 4 +boe 3 +teach 3 +portfolio 3 +specif 3 +analysi 3 +quarter 3 +particip 3 +develop 3 +meet 3 +requir 3 +mainten 3 +control 3 +secur 3 +specialist 3 +experiment 2 +cours 2 +leveson 2 +concept 2 +team 2 +real 2 +industri 2 +written 2 +commun 2 +tool 2 +test 2 +also 2 +technic 2 +topic 2 +interact 2 +exampl 2 +first 2 +enough 2 +instructor 2 +addit 2 +set 2 +done 2 +leadership 2 +howev 2 +everyon 2 +activ 2 +write 2 +make 2 +creation 2 +overal 2 +issu 2 +evalu 2 +chang 2 +ensur 2 +human 2 +interfac 2 +assur 2 +conduct 2 +deliver 2 +mockup 2 +prototyp 2 +expertis 2 +reliabl 2 +home 1 +pagecs 1 +professor 1 +nanci 1 +offic 1 +sieg 1 +phone 1 +hour 1 +appoint 1 +mail 1 +washington 1 +educours 1 +descriptioninstruct 1 +object 1 +terminolog 1 +fundament 1 +oral 1 +skill 1 +produc 1 +studi 1 +method 1 +construct 1 +larg 1 +softwaresystem 1 +essenti 1 +tocreat 1 +complex 1 +successfulli 1 +effectiveor 1 +latter 1 +topicsar 1 +feedback 1 +sai 1 +import 1 +employersand 1 +often 1 +lack 1 +graduat 1 +version 1 +last 1 +realbo 1 +largegroup 1 +sever 1 +reason 1 +try 1 +approach 1 +isthat 1 +short 1 +realist 1 +cannotlearn 1 +session 1 +devotedto 1 +discuss 1 +regular 1 +thegroup 1 +usual 1 +hard 1 +isto 1 +effectivelytogeth 1 +head 1 +disast 1 +beavoid 1 +correct 1 +worktogeth 1 +requirementsanalysi 1 +possibl 1 +thenorm 1 +areal 1 +search 1 +engineeringinstitut 1 +master 1 +providedat 1 +assign 1 +playthat 1 +allow 1 +posit 1 +attach 1 +listof 1 +phase 1 +projectso 1 +part 1 +theproject 1 +outlin 1 +natur 1 +principl 1 +process 1 +model 1 +risk 1 +assess 1 +cost 1 +estim 1 +metric 1 +verif 1 +valid 1 +evolut 1 +reus 1 +ethic 1 +profession 1 +embed 1 +safeti 1 +take 1 +ofth 1 +aspect 1 +softwaredevelop 1 +member 1 +responsiblefor 1 +present 1 +administr 1 +assist 1 +updat 1 +track 1 +statu 1 +sure 1 +proper 1 +held 1 +get 1 +time 1 +princip 1 +architect 1 +consist 1 +hardwar 1 +platform 1 +transit 1 +exist 1 +augment 1 +necessari 1 +current 1 +factor 1 +respect 1 +survei 1 +interview 1 +employe 1 +releas 1 +duri 1 +characterist 1 +normal 1 +languag 1 +implement 1 +handl 1 +appear 1 +clariti 1 +manual 1 +determin 1 +us 1 +readabl 1 +understand 1 +support 1 +creat 1 +guid 1 +deliv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..75d03bf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,95 @@ +assign 6 +softwar 5 +note 5 +mail 3 +washington 3 +cours 3 +system 3 +time 2 +leveson 2 +offic 2 +sieg 2 +hour 2 +appoint 2 +carlson 2 +specif 2 +includ 2 +engin 2 +requir 2 +winter 2 +home 1 +pagecs 1 +engineeringmeet 1 +locat 1 +loew 1 +mondai 1 +wednesdai 1 +fridai 1 +professor 1 +nanci 1 +phone 1 +eduta 1 +adam 1 +educours 1 +descriptionthi 1 +studi 1 +concept 1 +method 1 +tool 1 +design 1 +construct 1 +test 1 +analysi 1 +document 1 +larg 1 +also 1 +technic 1 +topic 1 +essenti 1 +creat 1 +complex 1 +successfulli 1 +project 1 +manag 1 +textbookghezzi 1 +jazayeri 1 +mandrioli 1 +fundament 1 +prentic 1 +hall 1 +sampl 1 +interview 1 +question 1 +produc 1 +consum 1 +petri 1 +axiomat 1 +coupl 1 +cohes 1 +link 1 +interest 1 +syllabu 1 +updat 1 +pleas 1 +read 1 +newsgroup 1 +access 1 +machin 1 +send 1 +class 1 +mailinglist 1 +new 1 +comp 1 +risk 1 +militari 1 +standard 1 +defens 1 +develop 1 +inform 1 +avail 1 +spring 1 +comput 1 +scienc 1 +departmentsuggest 1 +feedback 1 +request 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..c8e48ba6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,346 @@ +lisp 22 +januari 13 +program 12 +languag 10 +copi 9 +common 9 +assign 9 +februari 9 +tuesdai 8 +sieg 8 +cours 8 +exam 8 +march 8 +thursdai 7 +project 7 +onlin 7 +part 7 +macintosh 7 +mscc 7 +final 6 +read 6 +book 6 +system 6 +student 6 +implement 5 +ruth 5 +hall 5 +room 5 +schedul 5 +mail 5 +list 5 +materi 5 +file 5 +facil 5 +email 5 +archiv 4 +examin 4 +info 4 +tutori 4 +mac 4 +window 4 +pleas 4 +review 4 +session 4 +solut 4 +comput 3 +thompson 3 +offic 3 +hour 3 +mondai 3 +topic 3 +also 3 +midterm 3 +class 3 +mani 3 +artifici 3 +intellig 3 +sever 3 +includ 3 +purchas 3 +center 3 +us 3 +edit 3 +refer 3 +provid 3 +support 3 +java 3 +call 3 +compil 3 +cover 3 +issu 3 +unix 3 +work 3 +note 3 +help 3 +turn 3 +page 3 +writeup 3 +winter 2 +steve 2 +tanimoto 2 +depart 2 +scienc 2 +washington 2 +meet 2 +wednesdai 2 +post 2 +number 2 +homework 2 +text 2 +get 2 +element 2 +question 2 +standard 2 +access 2 +download 2 +html 2 +world 2 +wide 2 +techniqu 2 +offer 2 +host 2 +extens 2 +inthompson 2 +stat 2 +free 2 +given 2 +close 2 +messag 2 +click 2 +token 2 +bring 2 +check 2 +koch 2 +descript 2 +fridai 2 +mileston 2 +home 1 +pagecs 1 +instructor 1 +engin 1 +univers 1 +seattl 1 +andersonmeet 1 +except 1 +time 1 +tent 1 +transpar 1 +past 1 +lectur 1 +credit 1 +grade 1 +particip 1 +hardcopi 1 +select 1 +section 1 +first 1 +ofread 1 +recent 1 +throughout 1 +logic 1 +prolog 1 +grammar 1 +andpars 1 +understand 1 +shell 1 +expert 1 +either 1 +whole 1 +bookstor 1 +approxim 1 +order 1 +combin 1 +fromth 1 +librari 1 +entitl 1 +freeman 1 +chapter 1 +introduct 1 +incommon 1 +glossari 1 +purchasedsepar 1 +basement 1 +commun 1 +build 1 +answer 1 +andit 1 +referenceon 1 +seem 1 +best 1 +tabl 1 +contentspag 1 +rather 1 +try 1 +entir 1 +orpostscript 1 +sourc 1 +code 1 +usingcommon 1 +site 1 +fordigitool 1 +compani 1 +thatmaintain 1 +currentinform 1 +interest 1 +link 1 +applic 1 +introductionto 1 +anoth 1 +websit 1 +thatdoesn 1 +alwai 1 +respond 1 +promptli 1 +trail 1 +announc 1 +welcom 1 +although 1 +catalog 1 +theirimplement 1 +interpret 1 +buildingprogram 1 +attent 1 +focu 1 +mainli 1 +particular 1 +programminglanguag 1 +explor 1 +relev 1 +tointepret 1 +investig 1 +addit 1 +tradit 1 +alsolook 1 +current 1 +visual 1 +programmingfacil 1 +quarter 1 +run 1 +themathemat 1 +locat 1 +thebas 1 +varieti 1 +mathematica 1 +yacc 1 +allegrocommon 1 +softwar 1 +particularli 1 +powerfulenviron 1 +full 1 +integr 1 +editor 1 +fred 1 +graphicsand 1 +user 1 +interfac 1 +construct 1 +network 1 +easili 1 +transfer 1 +machinesof 1 +difficulti 1 +supplement 1 +theirown 1 +packag 1 +xlisp 1 +micro 1 +emac 1 +howev 1 +limit 1 +abil 1 +altern 1 +wish 1 +need 1 +theseresourc 1 +internet 1 +thatxlisp 1 +bare 1 +bone 1 +nothav 1 +develop 1 +fortun 1 +power 1 +disadvantag 1 +must 1 +labunless 1 +fromdigitool 1 +normal 1 +cost 1 +special 1 +dealallow 1 +point 1 +version 1 +allegro 1 +lispfor 1 +franz 1 +attract 1 +tool 1 +test 1 +introduc 1 +thelaboratori 1 +thistim 1 +instead 1 +go 1 +regular 1 +classroom 1 +week 1 +move 1 +beginn 1 +guid 1 +might 1 +sent 1 +new 1 +group 1 +regardingread 1 +path 1 +remind 1 +todai 1 +even 1 +hard 1 +printout 1 +exampl 1 +onthursdai 1 +option 1 +held 1 +insieg 1 +becov 1 +convert 1 +string 1 +symbol 1 +import 1 +found 1 +send 1 +receiv 1 +modif 1 +deadlin 1 +announcedearli 1 +plu 1 +postscript 1 +viewer 1 +avail 1 +pictur 1 +snowflak 1 +detail 1 +complet 1 +projectgener 1 +find 1 +aboutdemonstr 1 +onmondai 1 +multipl 1 +choic 1 +format 1 +mark 1 +sens 1 +form 1 +pencil 1 +exercisestokenizerassign 1 +andpart 1 +parsertokenizerpart 1 +snowflakeassign 1 +local 1 +gener 1 +ondemonstr 1 +show 1 +displai 1 +demonstr 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..79139561 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,66 @@ +assign 8 +program 7 +introduct 4 +inform 4 +april 4 +artifici 3 +hour 3 +solut 3 +second 3 +postscript 3 +intellig 2 +holden 2 +offic 2 +redston 2 +mondai 2 +instruct 2 +project 2 +first 2 +homework 2 +third 2 +final 2 +allegro 2 +emac 2 +refcard 2 +intelligencecs 1 +spring 1 +professor 1 +alistair 1 +csoffic 1 +noon 1 +noonta 1 +joshua 1 +msoffic 1 +thompson 1 +thursdai 1 +text 1 +rich 1 +knight 1 +secondedit 1 +touretzki 1 +common 1 +lisp 1 +gentl 1 +symboliccomput 1 +gener 1 +basic 1 +comput 1 +cours 1 +outlin 1 +turnin 1 +sampl 1 +june 1 +exam 1 +us 1 +emacsinterfac 1 +interfac 1 +download 1 +standalonelisp 1 +macintosh 1 +note 1 +check 1 +grade 1 +record 1 +type 1 +gradesredston 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..6434e433 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,76 @@ +acrobat 5 +format 4 +winter 3 +latex 3 +syllabu 3 +midtem 3 +solut 3 +postscript 3 +ruzzo 2 +last 2 +updat 2 +text 2 +file 2 +legibl 2 +adob 2 +free 2 +viewer 2 +page 2 +intro 1 +algorithm 1 +larri 1 +martin 1 +tompaclass 1 +messag 1 +check 1 +email 1 +frequent 1 +book 1 +errata 1 +list 1 +handout 1 +homework 1 +web 1 +previou 1 +quarter 1 +karlin 1 +thecours 1 +materi 1 +provid 1 +three 1 +plain 1 +ascii 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +latest 1 +greatest 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +tompa 1 +aberman 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..704f9608 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,88 @@ +midterm 6 +final 6 +spring 4 +everyth 4 +acrobat 4 +format 4 +jayram 3 +page 3 +washington 3 +syllabu 3 +theori 2 +comput 2 +larri 2 +ruzzo 2 +gener 2 +instructor 2 +time 2 +offic 2 +hour 2 +sieg 2 +home 2 +problem 2 +class 2 +mail 2 +latex 2 +postscript 2 +file 2 +legibl 2 +adob 2 +free 2 +viewer 2 +intro 1 +introduct 1 +inform 1 +thathachar 1 +meet 1 +tent 1 +welcom 1 +document 1 +sendmail 1 +last 1 +updat 1 +messag 1 +sent 1 +list 1 +textbook 1 +errata 1 +handout 1 +admin 1 +solut 1 +sourc 1 +thecours 1 +materi 1 +provid 1 +three 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +quit 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +latest 1 +greatest 1 +ghostscript 1 +window 1 +linux 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +cours 1 +web 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..bf917860 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,48 @@ +databas 3 +offic 3 +link 3 +introduct 2 +shapiro 2 +sieg 2 +hour 2 +page 2 +imag 2 +systemscs 1 +systemsfal 1 +quarter 1 +instructor 1 +prof 1 +linda 1 +telephon 1 +patrick 1 +crowlei 1 +pcrowlei 1 +announc 1 +syllabu 1 +assign 1 +homework 1 +word 1 +shift 1 +left 1 +click 1 +save 1 +potenti 1 +us 1 +unisql 1 +home 1 +qbic 1 +queri 1 +content 1 +manag 1 +system 1 +dbm 1 +probabl 1 +interest 1 +want 1 +know 1 +window 1 +back 1 +cours 1 +webcs 1 +request 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..080ee054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,77 @@ +project 6 +page 5 +lectur 4 +section 4 +instructor 3 +washington 3 +cours 3 +solut 3 +feedback 3 +home 2 +brian 2 +bershad 2 +offic 2 +hour 2 +midterm 2 +materi 2 +avail 2 +person 2 +introduct 1 +oper 1 +system 1 +autumn 1 +sung 1 +choi 1 +sieg 1 +appoint 1 +intro 1 +adminth 1 +class 1 +outlin 1 +administr 1 +info 1 +textbook 1 +grade 1 +andoth 1 +word 1 +wisdom 1 +messag 1 +mail 1 +sent 1 +archiv 1 +scale 1 +first 1 +schedulewhat 1 +cover 1 +schedul 1 +aggress 1 +andwil 1 +updat 1 +regularli 1 +reflect 1 +actual 1 +pace 1 +note 1 +handout 1 +slide 1 +projectsdescript 1 +relat 1 +solutionsto 1 +notesnot 1 +watchthi 1 +space 1 +carefulli 1 +inform 1 +vital 1 +surviv 1 +andgrad 1 +hint 1 +appear 1 +receiv 1 +onproject 1 +send 1 +anonym 1 +wish 1 +lost 1 +click 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..632f0141 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,87 @@ +home 6 +page 5 +quarter 4 +comput 3 +autumn 2 +document 2 +help 2 +cours 2 +spring 2 +scienc 2 +engin 2 +degre 2 +netscap 2 +quarterwelcom 1 +world 1 +wide 1 +hypermedia 1 +contain 1 +bounti 1 +inform 1 +class 1 +keepin 1 +mind 1 +mean 1 +static 1 +informationwil 1 +ad 1 +frequent 1 +problem 1 +thisdocu 1 +send 1 +mail 1 +pighin 1 +click 1 +classpersonnel 1 +professor 1 +student 1 +syllabuscours 1 +calendarta 1 +offic 1 +hourshandout 1 +assignmentslectur 1 +notesread 1 +assignmentshomework 1 +assignmentsprojectsproject 1 +handoutsproject 1 +artifactsproject 1 +sessionsproject 1 +grade 1 +policyproject 1 +write 1 +upslibui 1 +documentationoth 1 +relat 1 +informationget 1 +classhearn 1 +baker 1 +erratath 1 +instruct 1 +labus 1 +indi 1 +guid 1 +opengl 1 +exampl 1 +program 1 +winter 1 +pagegraph 1 +linkssgi 1 +silicon 1 +surfgrafica 1 +obscurasiggraphgrailgraph 1 +site 1 +indexoth 1 +us 1 +linksmvi 1 +visitor 1 +room 1 +schedul 1 +departmentth 1 +programth 1 +programweb 1 +helpbas 1 +helpmosa 1 +lynxus 1 +indyspighin 1 +washington 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..8ad3d99f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,106 @@ +offic 5 +format 5 +hour 4 +disabl 4 +html 4 +somani 3 +jari 3 +accommod 3 +document 3 +class 2 +student 2 +servic 2 +letter 2 +provid 2 +latex 2 +autumn 1 +intro 1 +comput 1 +commun 1 +networksautumn 1 +instructor 1 +arun 1 +cslectur 1 +sieg 1 +eebphon 1 +kristensen 1 +csoffic 1 +chang 1 +tomatch 1 +prof 1 +thu 1 +cover 1 +everi 1 +week 1 +andprovid 1 +larger 1 +timewindow 1 +consult 1 +messag 1 +check 1 +email 1 +frequent 1 +lectur 1 +overheadshomeworksprojectsinterest 1 +stuffattentionif 1 +would 1 +like 1 +request 1 +academ 1 +pleasecontact 1 +schmitz 1 +havea 1 +indic 1 +requiresacadem 1 +pleas 1 +present 1 +discuss 1 +might 1 +need 1 +file 1 +cours 1 +materi 1 +three 1 +hypertext 1 +markup 1 +languag 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +handout 1 +origin 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..5b1882dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,78 @@ +format 5 +sieg 4 +html 4 +ladner 3 +document 3 +offic 2 +hour 2 +provid 2 +latex 2 +washington 2 +spring 1 +intro 1 +comput 1 +commun 1 +networksspr 1 +instructor 1 +richard 1 +cslectur 1 +phone 1 +noonta 1 +william 1 +chan 1 +wchan 1 +csoffic 1 +class 1 +messag 1 +check 1 +email 1 +frequent 1 +lectur 1 +overheadshomeworksprojectsabout 1 +file 1 +cours 1 +materi 1 +three 1 +hypertext 1 +markup 1 +languag 1 +handl 1 +browser 1 +fact 1 +current 1 +look 1 +view 1 +mani 1 +handout 1 +origin 1 +convert 1 +us 1 +latexhtml 1 +strang 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +legibl 1 +figur 1 +complex 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +postscript 1 +ghostscript 1 +home 1 +page 1 +free 1 +viewer 1 +window 1 +linux 1 +eduwchan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..108b1fa8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,10 @@ +fall 2 +home 1 +pagecs 1 +advanc 1 +digit 1 +designt 1 +kehl 1 +page 1 +found 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..0f8d2ca7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,124 @@ +logic 4 +data 4 +home 3 +page 3 +march 3 +homework 3 +kehl 2 +inform 2 +hour 2 +offic 2 +final 2 +fridai 2 +februari 2 +midnight 2 +sampl 2 +option 2 +optionlab 2 +combin 2 +sequenti 2 +fpga 2 +depart 2 +comput 2 +scienc 2 +engin 2 +book 2 +server 2 +washington 2 +pagecs 1 +advanc 1 +digit 1 +designt 1 +fall 1 +welcom 1 +cours 1 +time 1 +place 1 +johnson 1 +import 1 +announc 1 +last 1 +updat 1 +summari 1 +syllabusschedul 1 +polici 1 +staff 1 +instructor 1 +mark 1 +savoi 1 +tue 1 +savac 1 +richard 1 +chinn 1 +thur 1 +richin 1 +howard 1 +chang 1 +gener 1 +shchang 1 +csjason 1 +aaron 1 +scott 1 +stephen 1 +hardwar 1 +laboratori 1 +manag 1 +student 1 +work 1 +group 1 +exam 1 +review 1 +topic 1 +cover 1 +quarterhomework 1 +assignmentsweb 1 +duehomework 1 +writeup 1 +written 1 +assign 1 +project 1 +abel 1 +state 1 +machin 1 +test 1 +fixtur 1 +handout 1 +memori 1 +communicationoth 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +collect 1 +resourc 1 +care 1 +gaetano 1 +borriello 1 +list 1 +vlsi 1 +link 1 +comprehensivelist 1 +icmanufactur 1 +nation 1 +semiconductor 1 +sheet 1 +motorola 1 +philip 1 +semiconduct 1 +micron 1 +technolog 1 +sheetsth 1 +copyright 1 +univers 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..8bda7852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,102 @@ +postscript 30 +mondai 11 +wednesdai 11 +read 10 +fridai 10 +design 7 +homework 5 +verilog 4 +pipelin 4 +program 4 +comput 3 +address 3 +sieg 3 +review 3 +answer 3 +sampl 3 +assembl 3 +organ 2 +snyder 2 +offic 2 +hour 2 +mail 2 +instruct 2 +simul 2 +sheet 2 +skim 2 +holidai 2 +mip 2 +segment 2 +binari 2 +data 2 +fall 2 +gener 1 +inform 1 +meet 1 +loew 1 +instructor 1 +larri 1 +appoint 1 +assist 1 +judi 1 +watson 1 +jwatson 1 +robert 1 +chenoffic 1 +tuesdai 1 +thursdays 1 +chensg 1 +catalog 1 +descript 1 +model 1 +structur 1 +function 1 +arithmet 1 +logic 1 +unit 1 +regist 1 +transfer 1 +level 1 +hardwar 1 +microprogram 1 +control 1 +memori 1 +hierarchi 1 +andorgan 1 +system 1 +compon 1 +interconnect 1 +laboratoryproject 1 +involv 1 +setprocessor 1 +prerequisit 1 +class 1 +note 1 +html 1 +appendix 1 +color 1 +chap 1 +revis 1 +midterm 1 +fast 1 +materi 1 +follow 1 +file 1 +avail 1 +addit 1 +modul 1 +common 1 +sourc 1 +form 1 +test 1 +prog 1 +simpl 1 +languag 1 +page 1 +previou 1 +quarter 1 +referencesthi 1 +free 1 +refer 1 +card 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..690a1a45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,232 @@ +lisp 16 +class 11 +april 9 +cours 8 +newsgroup 8 +assign 8 +sieg 7 +final 7 +mondai 6 +fridai 6 +exam 6 +wednesdai 5 +us 5 +project 5 +common 5 +artifici 4 +intellig 4 +tuesdai 4 +post 4 +mail 4 +interfac 4 +schedul 4 +book 4 +program 4 +creat 3 +access 3 +topic 3 +midterm 3 +edit 3 +separ 3 +examin 3 +test 3 +march 3 +system 3 +review 3 +spring 2 +home 2 +steve 2 +tanimoto 2 +comput 2 +washington 2 +jeremi 2 +hall 2 +room 2 +offic 2 +hour 2 +machin 2 +messag 2 +send 2 +csor 2 +new 2 +pnew 2 +list 2 +implement 2 +read 2 +element 2 +purchas 2 +mathemat 2 +theori 2 +languag 2 +link 2 +allegro 2 +window 2 +bring 2 +mark 2 +sens 2 +form 2 +represent 2 +search 2 +reason 2 +neural 2 +net 2 +expert 2 +page 2 +term 2 +session 2 +part 2 +lectur 2 +preliminari 2 +demo 2 +peer 2 +pagecs 1 +introduct 1 +instructor 1 +depart 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +baermeet 1 +thursdai 1 +archiv 1 +tent 1 +number 1 +credit 1 +grade 1 +homework 1 +particip 1 +hardcopi 1 +requir 1 +text 1 +freeman 1 +work 1 +hard 1 +textbook 1 +self 1 +contain 1 +exampl 1 +youdon 1 +onlin 1 +refer 1 +materi 1 +provid 1 +answer 1 +mani 1 +question 1 +andit 1 +standard 1 +referenceon 1 +seem 1 +best 1 +tabl 1 +contentspag 1 +rather 1 +try 1 +download 1 +entir 1 +html 1 +file 1 +orpostscript 1 +sourc 1 +code 1 +usingcommon 1 +interest 1 +info 1 +forprogram 1 +world 1 +wide 1 +applic 1 +facil 1 +order 1 +take 1 +advantag 1 +windowsimplement 1 +excel 1 +featur 1 +programdevelop 1 +construct 1 +theintel 1 +pentium 1 +laboratori 1 +free 1 +version 1 +isfor 1 +bedownload 1 +franz 1 +kind 1 +least 1 +attract 1 +option 1 +givenaccord 1 +close 1 +multipl 1 +choic 1 +alist 1 +know 1 +announc 1 +welcom 1 +coversboth 1 +implementationof 1 +techniqu 1 +includ 1 +programmingtechniqu 1 +knowledg 1 +logicalreason 1 +probabilist 1 +case 1 +base 1 +plan 1 +learn 1 +understand 1 +vision 1 +clo 1 +staff 1 +given 1 +held 1 +inour 1 +regular 1 +meet 1 +note 1 +continu 1 +programmingpart 1 +turn 1 +paper 1 +follow 1 +solut 1 +exercis 1 +state 1 +screenshot 1 +user 1 +descript 1 +ofhow 1 +go 1 +gener 1 +move 1 +current 1 +statu 1 +workload 1 +reduct 1 +propos 1 +circul 1 +email 1 +approv 1 +rest 1 +ofproject 1 +either 1 +give 1 +right 1 +orturn 1 +progress 1 +report 1 +memori 1 +holidai 1 +insieg 1 +explan 1 +evalu 1 +wrap 1 +demonstr 1 +evaluationof 1 +june 1 +assignmentsassign 1 +portion 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..57da79ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,89 @@ +home 3 +burn 3 +data 3 +page 2 +inform 2 +hour 2 +societi 2 +robot 2 +depart 2 +comput 2 +scienc 2 +engin 2 +sourc 2 +book 2 +washington 2 +pagecs 1 +digit 1 +system 1 +designstev 1 +spring 1 +welcom 1 +cours 1 +time 1 +place 1 +loew 1 +import 1 +announc 1 +last 1 +updat 1 +summari 1 +syllabu 1 +schedul 1 +polici 1 +staff 1 +steve 1 +instructor 1 +kent 1 +smith 1 +casei 1 +anderson 1 +stephen 1 +hardwar 1 +laboratori 1 +manag 1 +offic 1 +studentslab 1 +assign 1 +mchc 1 +info 1 +nice 1 +introduct 1 +fred 1 +martinrobot 1 +seattl 1 +portland 1 +area 1 +societyoth 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +collect 1 +resourc 1 +care 1 +gaetano 1 +borriello 1 +comprehens 1 +list 1 +nation 1 +semiconductor 1 +sheet 1 +motorola 1 +server 1 +philip 1 +semiconduct 1 +serverth 1 +copyright 1 +univers 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..33ff4f8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,34 @@ +cours 2 +inform 2 +project 2 +home 1 +pagewelcom 1 +contain 1 +bevi 1 +relatingto 1 +usual 1 +document 1 +frequentlychang 1 +send 1 +mail 1 +bswest 1 +csif 1 +encount 1 +problem 1 +classpersonnelsyllabuslectur 1 +scheduleguest 1 +lectur 1 +scheduleoffic 1 +hoursproject 1 +handout 1 +schedul 1 +help 1 +session 1 +final 1 +projectoth 1 +bug 1 +erratarefer 1 +pagesmidterm 1 +questionnairebswest 1 +washington 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..370d5fd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,94 @@ +avail 11 +cecil 6 +year 4 +exam 4 +vortex 4 +languag 4 +compil 4 +postscript 4 +assign 3 +list 3 +answer 3 +refer 3 +version 3 +program 2 +quarter 2 +chamber 2 +offic 2 +hour 2 +sieg 2 +vass 2 +read 2 +last 2 +midterm 2 +final 2 +travers 2 +manual 2 +page 2 +research 2 +home 1 +pagecs 1 +implement 1 +languageswint 1 +import 1 +cours 1 +informationmeet 1 +time 1 +instructor 1 +craigchamb 1 +start 1 +second 1 +week 1 +litvinov 1 +cubicl 1 +floor 1 +archivesslid 1 +lectur 1 +handout 1 +full 1 +class 1 +homework 1 +messag 1 +sent 1 +mail 1 +archivedher 1 +note 1 +test 1 +closedbook 1 +affect 1 +kind 1 +question 1 +wereask 1 +sampl 1 +solut 1 +informationhandout 1 +tutorialsth 1 +tutorialth 1 +tutorialhow 1 +front 1 +enda 1 +file 1 +interestdead 1 +elim 1 +simpl 1 +exampl 1 +idfacfg 1 +interfac 1 +frameworkvortex 1 +textual 1 +descript 1 +grammarcecil 1 +documentationdocument 1 +html 1 +format 1 +standard 1 +librari 1 +resourcesth 1 +previou 1 +includ 1 +slide 1 +inform 1 +found 1 +onmark 1 +leon 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..e5117792 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,62 @@ +avail 5 +languag 4 +program 3 +cecil 3 +vortex 3 +compil 3 +found 3 +home 2 +implement 2 +cours 2 +sieg 2 +chamber 2 +offic 2 +hour 2 +jdean 2 +grove 2 +chateau 2 +read 2 +list 2 +inform 2 +page 2 +research 2 +pagecs 1 +languagesimport 1 +informationmeet 1 +time 1 +instructor 1 +craig 1 +jeff 1 +dean 1 +dave 1 +come 1 +find 1 +confer 1 +room 1 +archivesslid 1 +lectur 1 +slide 1 +turori 1 +handout 1 +assign 1 +full 1 +class 1 +messag 1 +sent 1 +mail 1 +archiv 1 +projectth 1 +project 1 +sort 1 +analysi 1 +andtransform 1 +optimizingcompil 1 +object 1 +orient 1 +written 1 +cecilproject 1 +manual 1 +resourcesmor 1 +onmark 1 +leon 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..3df96f6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,13 @@ +assign 5 +home 1 +pagecs 1 +softwar 1 +engineeringdavid 1 +notkin 1 +spring 1 +introductori 1 +handout 1 +kwic 1 +sampl 1 +projectsnotkin 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..f21ba018 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,86 @@ +languag 6 +list 5 +page 5 +program 4 +mail 4 +send 3 +washington 3 +home 2 +notkin 2 +offic 2 +hour 2 +handout 2 +thread 2 +messag 2 +subscrib 2 +research 2 +resourc 2 +lambda 2 +calculu 2 +univers 2 +orient 2 +pagecs 1 +concept 1 +languagesautumn 1 +loew 1 +instructor 1 +david 1 +sieg 1 +byappoint 1 +kurt 1 +partridg 1 +kepart 1 +floor 1 +cubiclescours 1 +introductori 1 +html 1 +postscript 1 +assign 1 +readingsmail 1 +archivesw 1 +us 1 +administr 1 +instructionalpurpos 1 +wish 1 +refer 1 +previous 1 +sent 1 +archiv 1 +emailto 1 +majordomo 1 +singl 1 +line 1 +bodi 1 +subject 1 +csegener 1 +yahoo 1 +mark 1 +leon 1 +excel 1 +pagesprogram 1 +critiquesgari 1 +leaven 1 +self 1 +studi 1 +pagefunct 1 +resourcesmit 1 +scheme 1 +pagecmu 1 +standard 1 +pagea 1 +gentl 1 +introduct 1 +mlhaskel 1 +monash 1 +universityobject 1 +geneva 1 +object 1 +info 1 +cecil 1 +project 1 +dylan 1 +carnegi 1 +mellon 1 +appl 1 +comput 1 +question 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..f98cd80c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,118 @@ +homework 14 +solut 8 +exam 6 +anderson 4 +washington 4 +hour 4 +class 4 +mondai 3 +project 3 +gilligan 3 +design 2 +analysi 2 +algorithm 2 +lectur 2 +offic 2 +time 2 +confer 2 +room 2 +cours 2 +set 2 +discuss 2 +write 2 +island 2 +algorithmscs 1 +winter 1 +instructor 1 +richard 1 +seig 1 +appoint 1 +teach 1 +assist 1 +william 1 +chan 1 +wchan 1 +wednesdai 1 +chateau 1 +sieg 1 +floor 1 +cubicl 1 +somebodi 1 +els 1 +us 1 +inform 1 +prerequisit 1 +go 1 +assum 1 +alreadi 1 +undergradu 1 +wrong 1 +know 1 +soon 1 +possibl 1 +suggest 1 +readingtextbook 1 +errata 1 +list 1 +realli 1 +preview 1 +check 1 +outer 1 +sapplet 1 +assign 1 +handout 1 +written 1 +gener 1 +tuesdai 1 +background 1 +quiz 1 +post 1 +script 1 +midterm 1 +cancel 1 +lack 1 +interest 1 +final 1 +told 1 +march 1 +probabl 1 +verifi 1 +close 1 +book 1 +cover 1 +materi 1 +willconsist 1 +short 1 +answer 1 +problem 1 +solv 1 +question 1 +bureaucrat 1 +stuffgrad 1 +base 1 +upon 1 +particip 1 +work 1 +togeth 1 +okai 1 +homeworkproblem 1 +classmat 1 +must 1 +upindepend 1 +rule 1 +could 1 +invok 1 +betweenani 1 +mustwatch 1 +least 1 +half 1 +theori 1 +thatan 1 +episod 1 +equival 1 +reboot 1 +anyth 1 +thatsurv 1 +learn 1 +understood 1 +eduwchan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..142c3702 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,218 @@ +cours 12 +parallel 10 +topic 8 +homework 7 +algorithm 6 +memori 5 +anderson 4 +problem 4 +thursdai 4 +paper 4 +theori 4 +comput 4 +could 4 +descript 3 +effici 3 +note 3 +april 3 +lectur 3 +share 3 +machin 3 +book 3 +gener 2 +sieg 2 +instructor 2 +offic 2 +syllabu 2 +list 2 +connect 2 +pointer 2 +although 2 +go 2 +model 2 +real 2 +year 2 +consid 2 +expect 2 +howev 2 +nice 2 +teach 2 +algorithmscs 1 +spring 1 +inform 1 +meet 1 +richard 1 +hour 1 +appointment 1 +mail 1 +address 1 +exam 1 +catalog 1 +design 1 +analysi 1 +fundament 1 +algorithmsfor 1 +sort 1 +arithmet 1 +matrix 1 +graph 1 +addit 1 +select 1 +emphasi 1 +techniqu 1 +approach 1 +us 1 +developingfast 1 +limit 1 +theirefficaci 1 +prerequisit 1 +equival 1 +major 1 +assign 1 +plu 1 +rambl 1 +commentsabout 1 +transpar 1 +code 1 +analysisfor 1 +rank 1 +compon 1 +algorithmi 1 +simpler 1 +correct 1 +section 1 +latex 1 +version 1 +referencesfor 1 +erew 1 +crew 1 +ullman 1 +yannakaki 1 +tuesdai 1 +union 1 +find 1 +certifi 1 +write 1 +impli 1 +exist 1 +consensu 1 +base 1 +upon 1 +swap 1 +likelysometh 1 +insid 1 +next 1 +supercomput 1 +asynchron 1 +refer 1 +martel 1 +foc 1 +buss 1 +manuscript 1 +special 1 +content 1 +whim 1 +titl 1 +would 1 +mayb 1 +smpc 1 +start 1 +collect 1 +basic 1 +spend 1 +time 1 +give 1 +cover 1 +term 1 +indic 1 +lookingat 1 +pertain 1 +specif 1 +interconnect 1 +topolog 1 +wewil 1 +situat 1 +cost 1 +access 1 +isnon 1 +uniform 1 +sens 1 +notconsid 1 +particular 1 +prove 1 +theorem 1 +andyou 1 +motiv 1 +practic 1 +consider 1 +goal 1 +indevelop 1 +come 1 +algorithmswhich 1 +conceiv 1 +three 1 +four 1 +set 1 +contain 1 +routin 1 +challeng 1 +goingto 1 +requir 1 +project 1 +happi 1 +student 1 +outsidework 1 +relat 1 +text 1 +introduct 1 +parallelalgorithm 1 +befollow 1 +close 1 +feel 1 +exception 1 +cheap 1 +youcould 1 +probabl 1 +without 1 +purchas 1 +copi 1 +origin 1 +plan 1 +volunt 1 +textwould 1 +progress 1 +fast 1 +volum 1 +artof 1 +program 1 +chose 1 +instead 1 +quit 1 +flexibl 1 +taught 1 +mychoic 1 +influenc 1 +interestingor 1 +uninterest 1 +also 1 +choic 1 +aseith 1 +tradit 1 +work 1 +researchcont 1 +number 1 +open 1 +mind 1 +turninto 1 +research 1 +result 1 +present 1 +half 1 +bake 1 +ideason 1 +provid 1 +other 1 +interest 1 +andenergi 1 +think 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..590fb501 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,31 @@ +page 2 +comput 2 +autumn 2 +home 1 +automata 1 +complex 1 +move 1 +current 1 +quarter 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accuratelyquot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +comment 1 +webmast 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..a8323f9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,19 @@ +postscript 3 +home 2 +page 2 +beam 2 +quiz 2 +fall 1 +automataautumn 1 +instructor 1 +paul 1 +welcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +exam 1 +final 1 +latex 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..c17b429c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,105 @@ +midterm 4 +cours 4 +acrobat 4 +format 4 +ruzzo 3 +sieg 3 +organ 3 +syllabu 3 +collabor 3 +fall 2 +comput 2 +complex 2 +larri 2 +nitin 2 +class 2 +mail 2 +last 2 +updat 2 +washington 2 +latex 2 +sourc 2 +postscript 2 +file 2 +provid 2 +legibl 2 +adob 2 +free 2 +viewer 2 +depart 2 +system 2 +page 2 +autumn 2 +automata 1 +tuth 1 +staffnameemailphoneoffic 1 +hour 1 +instructor 1 +sharma 1 +csmw 1 +messag 1 +sent 1 +list 1 +textbook 1 +errata 1 +handout 1 +administrivia 1 +homework 1 +thecours 1 +materi 1 +three 1 +plain 1 +ascii 1 +text 1 +includ 1 +command 1 +simpl 1 +thing 1 +assign 1 +gener 1 +quit 1 +figur 1 +math 1 +stuff 1 +hard 1 +imposs 1 +read 1 +latest 1 +greatest 1 +avail 1 +unix 1 +acroread 1 +perhap 1 +aavail 1 +ghostview 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +print 1 +ghostscriptcan 1 +exampl 1 +web 1 +portion 1 +reprint 1 +adapt 1 +foracadem 1 +nonprofit 1 +purpos 1 +accuratelyquot 1 +duli 1 +credit 1 +copyright 1 +scienc 1 +engin 1 +univers 1 +ofwashington 1 +comment 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..14f5ba3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,94 @@ +complex 6 +proposit 6 +proof 5 +logic 5 +theorem 4 +satisfi 4 +prove 3 +test 3 +order 3 +issu 3 +well 3 +system 3 +algorithm 2 +first 2 +higher 2 +cours 2 +concentr 2 +search 2 +strategi 2 +theoret 2 +practic 2 +paper 2 +instal 2 +prover 2 +topic 1 +autumn 1 +gener 1 +inform 1 +instructor 1 +paul 1 +beam 1 +meet 1 +time 1 +tuesdai 1 +thursdai 1 +loew 1 +autom 1 +comput 1 +aid 1 +verif 1 +vlsi 1 +andsoftwar 1 +engin 1 +give 1 +attempt 1 +decid 1 +truthof 1 +statement 1 +casea 1 +flip 1 +side 1 +even 1 +us 1 +oftheorem 1 +often 1 +involv 1 +finitedomain 1 +interpret 1 +anywai 1 +consid 1 +varieti 1 +theoremprov 1 +within 1 +good 1 +choic 1 +consider 1 +work 1 +thesequest 1 +complexityand 1 +rel 1 +also 1 +examin 1 +anumb 1 +implement 1 +compar 1 +theoryand 1 +thing 1 +urquhart 1 +survei 1 +talk 1 +slide 1 +softwar 1 +amus 1 +sato 1 +andboy 1 +moor 1 +tester 1 +gsat 1 +june 1 +thedirectori 1 +proversther 1 +scatter 1 +process 1 +ofinstal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..630258f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,38 @@ +comput 3 +home 2 +pagecs 1 +systemperform 1 +modelingspr 1 +host 1 +lazowskaandmaryvernonwelcom 1 +page 1 +system 1 +performancemodel 1 +meet 1 +mondai 1 +wednesdai 1 +fridai 1 +loew 1 +hall 1 +offic 1 +hourstent 1 +topic 1 +schedulecom 1 +goingsassignmentsproject 1 +informationmap 1 +queue 1 +network 1 +solut 1 +packag 1 +emailoth 1 +inform 1 +avail 1 +sigmetr 1 +confer 1 +measur 1 +model 1 +computersystemsuw 1 +depart 1 +scienc 1 +engineeringlazowska 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..e077eb80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,76 @@ +postscript 5 +simul 5 +alpha 4 +sieg 3 +page 2 +system 2 +egger 2 +washington 2 +offic 2 +hour 2 +redston 2 +cours 2 +inform 2 +architectur 2 +tool 2 +sparc 2 +pentium 2 +uniprocessor 2 +info 2 +home 1 +comput 1 +architecturewint 1 +instructorsusan 1 +tuth 1 +tajoshua 1 +overview 1 +schedul 1 +continu 1 +updat 1 +lectur 1 +note 1 +problem 1 +set 1 +previou 1 +test 1 +histori 1 +specmark 1 +rate 1 +shade 1 +instuct 1 +atom 1 +build 1 +analysi 1 +tullsen 1 +execut 1 +driven 1 +instruct 1 +level 1 +superscalar 1 +close 1 +etch 1 +binari 1 +rewrit 1 +analyz 1 +code 1 +hardwar 1 +monitor 1 +multiflow 1 +compil 1 +pixi 1 +user 1 +manual 1 +dinero 1 +cach 1 +local 1 +machin 1 +powerpc 1 +applic 1 +multiprocessor 1 +spec 1 +benchmark 1 +neat 1 +center 1 +current 1 +futur 1 +processor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..1d06f28c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,50 @@ +inform 3 +home 2 +instructor 2 +pighin 2 +offic 2 +hour 2 +thisdocu 2 +mail 2 +april 2 +assign 2 +cours 2 +pagecs 1 +oper 1 +system 1 +hank 1 +levi 1 +spring 1 +freder 1 +meet 1 +time 1 +chateau 1 +confer 1 +room 1 +number 1 +unit 1 +welcom 1 +page 1 +world 1 +wide 1 +short 1 +hypermedia 1 +document 1 +forcs 1 +contain 1 +class 1 +keep 1 +mind 1 +static 1 +especi 1 +classmessag 1 +ad 1 +frequent 1 +problem 1 +send 1 +announc 1 +first 1 +readi 1 +iti 1 +projectlevi 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..0d9b33b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,78 @@ +inform 6 +home 4 +comput 4 +document 3 +help 3 +scienc 3 +engin 3 +page 2 +class 2 +avail 2 +last 2 +year 2 +project 2 +degre 2 +program 2 +offer 2 +colleg 2 +mosaic 2 +pagecs 1 +graphicsautumn 1 +quarter 1 +welcom 1 +world 1 +wide 1 +hypermedia 1 +whichcontain 1 +wealth 1 +keep 1 +mind 1 +thatthi 1 +static 1 +addedfrequ 1 +problem 1 +send 1 +mail 1 +deros 1 +click 1 +professor 1 +cours 1 +syllabu 1 +lectur 1 +note 1 +written 1 +homework 1 +assign 1 +solut 1 +handout 1 +grade 1 +polici 1 +test 1 +cool 1 +imag 1 +addit 1 +get 1 +instruct 1 +us 1 +indi 1 +mvi 1 +visitor 1 +room 1 +schedul 1 +depart 1 +art 1 +follow 1 +topic 1 +basic 1 +hypertext 1 +markup 1 +languag 1 +html 1 +uniform 1 +resourc 1 +locat 1 +read 1 +usinglynx 1 +charact 1 +base 1 +browser 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..3d562f05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,110 @@ +homework 9 +class 8 +begin 7 +fridai 5 +ebel 4 +logic 4 +data 4 +home 3 +page 3 +book 3 +offic 3 +hour 3 +wednesdai 3 +sieg 3 +mondai 3 +design 2 +carl 2 +inform 2 +staff 2 +paul 2 +larri 2 +combin 2 +sequenti 2 +fpga 2 +depart 2 +comput 2 +scienc 2 +engin 2 +sheet 2 +server 2 +washington 2 +principl 1 +digit 1 +system 1 +fall 1 +welcom 1 +cours 1 +time 1 +place 1 +loew 1 +import 1 +announc 1 +summari 1 +syllabu 1 +text 1 +instructor 1 +franklin 1 +thursdai 1 +hine 1 +hineskj 1 +tuesdai 1 +mcmurchi 1 +research 1 +tool 1 +guru 1 +document 1 +simul 1 +synthesi 1 +pamett 1 +board 1 +mostli 1 +complet 1 +still 1 +construct 1 +student 1 +work 1 +groupsfin 1 +exam 1 +review 1 +topic 1 +cover 1 +quarter 1 +assign 1 +note 1 +hand 1 +handout 1 +memori 1 +commun 1 +mother 1 +site 1 +list 1 +vlsi 1 +link 1 +comprehensivelist 1 +icmanufactur 1 +murphi 1 +recent 1 +dilbert 1 +comic 1 +nation 1 +semiconductor 1 +motorola 1 +philip 1 +semiconduct 1 +micron 1 +technolog 1 +copyright 1 +univers 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..cdb0585f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,50 @@ +sieg 3 +hour 3 +artifici 2 +question 2 +index 1 +pagecs 1 +intelligencefal 1 +quarter 1 +intellig 1 +pose 1 +fundament 1 +andchalleng 1 +comput 1 +scienc 1 +build 1 +intelligentmachin 1 +cours 1 +address 1 +provid 1 +anin 1 +depth 1 +introduct 1 +select 1 +topic 1 +includ 1 +agentarchitectur 1 +knowledg 1 +represent 1 +search 1 +plan 1 +machinelearn 1 +reason 1 +uncertainti 1 +methodolog 1 +staff 1 +weldweld 1 +marc 1 +friedmanfriedman 1 +nick 1 +kushmericknick 1 +outlin 1 +topicsread 1 +assignmentsassign 1 +examsgradingresourcesth 1 +class 1 +mailinglist 1 +also 1 +archiv 1 +past 1 +messag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..8168ef19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,102 @@ +read 5 +theori 4 +uncertainti 3 +decis 3 +hank 3 +washington 3 +pearl 3 +reason 3 +probabl 3 +make 2 +intellig 2 +offic 2 +mail 2 +class 2 +list 2 +probabilist 2 +sever 2 +nice 2 +refer 2 +book 2 +paper 2 +avail 2 +librari 2 +histor 2 +look 2 +math 2 +summari 2 +artifici 1 +winter 1 +professor 1 +steve 1 +sieg 1 +hour 1 +whenev 1 +around 1 +appoint 1 +email 1 +address 1 +goe 1 +member 1 +send 1 +request 1 +materi 1 +systemsthi 1 +requir 1 +text 1 +chapter 1 +without 1 +bui 1 +strappedfor 1 +cash 1 +though 1 +shafer 1 +uncertain 1 +reasoningthi 1 +collect 1 +foundat 1 +select 1 +copi 1 +grail 1 +jayn 1 +logic 1 +scienc 1 +fragmentari 1 +edit 1 +juli 1 +extrem 1 +interest 1 +technic 1 +foundationsof 1 +statist 1 +definit 1 +worth 1 +perspect 1 +alon 1 +heavi 1 +go 1 +place 1 +beautifulli 1 +written 1 +neapolitan 1 +expert 1 +system 1 +algorithmsa 1 +signific 1 +overlap 1 +good 1 +secondari 1 +sourc 1 +inform 1 +graphic 1 +model 1 +propagationalgorithm 1 +research 1 +arrang 1 +cours 1 +topic 1 +cover 1 +html 1 +postscript 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..bc3c1871 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,271 @@ +cours 9 +khoro 8 +mondai 8 +assign 7 +fridai 7 +home 6 +imag 6 +page 6 +inform 6 +april 6 +class 5 +read 5 +wednesdai 5 +cantata 5 +comput 5 +part 4 +meet 4 +instal 3 +note 3 +next 3 +sun 3 +local 3 +environ 3 +copi 3 +engin 3 +week 3 +march 3 +onlin 3 +help 3 +contain 2 +document 2 +copyright 2 +materi 2 +washington 2 +first 2 +chapter 2 +run 2 +login 2 +file 2 +follow 2 +setenv 2 +khoros_hom 2 +path 2 +also 2 +tutori 2 +outlin 2 +experi 2 +take 2 +articl 2 +make 2 +process 2 +softwar 2 +msvc 2 +final 2 +june 2 +exam 2 +midterm 2 +plan 2 +select 2 +avail 2 +lectur 2 +student 2 +sign 2 +start 2 +applic 2 +account 2 +rene 2 +reed 2 +arrang 2 +sieg 2 +scienc 2 +version 2 +undergradu 2 +menu 2 +pagecs 1 +understandingwelcom 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +theclass 1 +keep 1 +mind 1 +static 1 +newinform 1 +especi 1 +messag 1 +ad 1 +frequent 1 +problem 1 +send 1 +mail 1 +mock 1 +notic 1 +subjectto 1 +view 1 +public 1 +site 1 +otherthan 1 +univers 1 +doexercis 1 +second 1 +exercis 1 +determin 1 +conveni 1 +torun 1 +aslillith 1 +edit 1 +workstat 1 +containxhost 1 +lilliththen 1 +cshrc 1 +manpath 1 +rlogin 1 +onto 1 +lillith 1 +rhost 1 +assignmentsand 1 +displai 1 +variabl 1 +appropri 1 +typecantata 1 +unix 1 +prompt 1 +machin 1 +georg 1 +haskhoro 1 +wwwhttp 1 +educ 1 +index 1 +htmland 1 +link 1 +itscours 1 +least 1 +twotop 1 +spatial 1 +resolut 1 +ideal 1 +pagesand 1 +anoth 1 +window 1 +noth 1 +turn 1 +third 1 +huerta 1 +andnevatia 1 +cvpr 1 +proceed 1 +tolook 1 +wolff 1 +fourth 1 +comparison 1 +three 1 +fast 1 +oper 1 +level 1 +learn 1 +effort 1 +requir 1 +announc 1 +examin 1 +pmin 1 +regular 1 +room 1 +cover 1 +combinationof 1 +post 1 +list 1 +topic 1 +studi 1 +remind 1 +approv 1 +sundai 1 +time 1 +review 1 +insieg 1 +period 1 +overhead 1 +transpar 1 +onneur 1 +net 1 +librari 1 +center 1 +floor 1 +packet 1 +number 1 +trainabl 1 +classifi 1 +permit 1 +temporari 1 +ofmatlab 1 +requirethat 1 +fill 1 +form 1 +contract 1 +know 1 +interest 1 +term 1 +project 1 +import 1 +ofth 1 +correct 1 +introduc 1 +pentium 1 +laboratori 1 +includingth 1 +develop 1 +evan 1 +mclain 1 +documentexplain 1 +transform 1 +current 1 +statu 1 +recent 1 +get 1 +withkhoro 1 +accompani 1 +pleas 1 +alreadi 1 +accesskhoro 1 +contact 1 +onthursdai 1 +pick 1 +youraccount 1 +name 1 +password 1 +itov 1 +weekend 1 +earli 1 +hour 1 +arelimit 1 +ahead 1 +email 1 +address 1 +andsh 1 +back 1 +offic 1 +kept 1 +lock 1 +either 1 +need 1 +knock 1 +orhav 1 +prior 1 +mani 1 +card 1 +willhav 1 +care 1 +slide 1 +resourc 1 +understand 1 +intro 1 +delft 1 +univ 1 +pattern 1 +recognit 1 +vision 1 +store 1 +thedepart 1 +brochur 1 +brochuremosa 1 +mosaic 1 +find 1 +itemsund 1 +balloon 1 +macmosa 1 +itemund 1 +navig 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..45aea03a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,174 @@ +novemb 7 +class 6 +parallel 5 +octob 5 +topic 5 +algorithm 5 +schedul 4 +begin 4 +tuesdai 4 +term 4 +project 4 +comput 3 +document 3 +machin 3 +neural 3 +network 3 +intel 3 +cours 3 +decemb 3 +home 2 +imag 2 +inform 2 +time 2 +normal 2 +start 2 +meet 2 +guest 2 +speaker 2 +univers 2 +complet 2 +pyramid 2 +embed 2 +overview 2 +architectur 2 +thursdai 2 +week 2 +implement 2 +languag 2 +paragon 2 +supercomput 2 +maspar 2 +copyright 2 +final 2 +exam 2 +pagecs 1 +processingwelcom 1 +page 1 +world 1 +wide 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +theclass 1 +keep 1 +mind 1 +static 1 +newinform 1 +ad 1 +informationon 1 +half 1 +hourearli 1 +dai 1 +prof 1 +nian 1 +simon 1 +fraser 1 +burnabi 1 +canada 1 +bharath 1 +modayur 1 +titl 1 +present 1 +effici 1 +object 1 +recognit 1 +simd 1 +mimd 1 +discuss 1 +scale 1 +invariantoper 1 +segment 1 +hierarchicalrelax 1 +us 1 +isodata 1 +approach 1 +burt 1 +hong 1 +rosenfeld 1 +introduct 1 +virtual 1 +process 1 +mesh 1 +brief 1 +treatment 1 +icon 1 +symbol 1 +analysi 1 +digit 1 +librari 1 +demo 1 +find 1 +topicsdur 1 +student 1 +activelyexplor 1 +written 1 +descript 1 +hand 1 +inon 1 +templat 1 +writeupsi 1 +avail 1 +resourcespvm 1 +virtualmachin 1 +softwar 1 +layear 1 +permit 1 +user 1 +program 1 +aviru 1 +made 1 +heterogen 1 +collect 1 +moreworkst 1 +conveni 1 +studydistribut 1 +technicalpubl 1 +includ 1 +paragonparallel 1 +system 1 +good 1 +arrai 1 +orient 1 +variousvendor 1 +info 1 +onth 1 +nation 1 +center 1 +sweden 1 +onlin 1 +theunivers 1 +tennesse 1 +resourc 1 +found 1 +neal 1 +friedman 1 +report 1 +also 1 +error 1 +correctionsto 1 +note 1 +notic 1 +materi 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +washington 1 +import 1 +part 1 +ofth 1 +review 1 +session 1 +fridai 1 +sieg 1 +hall 1 +wednesdai 1 +room 1 +close 1 +book 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..2e564552 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,65 @@ +linear 5 +method 4 +april 4 +intro 4 +optim 3 +program 3 +graphic 2 +matrix 2 +comput 2 +brad 2 +equat 2 +quadrat 2 +exampl 2 +paper 2 +finit 2 +seminarc 1 +rspring 1 +numer 1 +definit 1 +properti 1 +invers 1 +solv 1 +system 1 +eric 1 +eigenvalu 1 +eigenvector 1 +singular 1 +valu 1 +decomposit 1 +joel 1 +root 1 +find 1 +nonlinear 1 +corei 1 +shuichi 1 +unconstrain 1 +kari 1 +constrain 1 +global 1 +kevin 1 +chuck 1 +ronen 1 +daniel 1 +data 1 +fit 1 +conclus 1 +mike 1 +regress 1 +calibr 1 +ordinari 1 +differenti 1 +adam 1 +joanna 1 +discret 1 +element 1 +radios 1 +fred 1 +pde 1 +differ 1 +interv 1 +arithmet 1 +troi 1 +jonathan 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..454562ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,45 @@ +tanimoto 2 +autumn 2 +educ 2 +mathemat 2 +experi 2 +imag 2 +process 2 +copyright 2 +washington 2 +topic 2 +quarter 2 +quarterscs 1 +special 1 +topicssteven 1 +instructorcs 1 +transcript 1 +base 1 +winter 1 +spring 1 +technolog 1 +collabor 1 +learn 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +graduat 1 +seminar 1 +explor 1 +varieti 1 +relat 1 +useof 1 +comput 1 +specif 1 +activ 1 +varyfrom 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..319099d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,160 @@ +paper 10 +read 8 +novemb 5 +presentor 5 +educ 4 +octob 4 +proceed 4 +html 4 +present 3 +meet 3 +either 3 +home 2 +page 2 +autumn 2 +world 2 +wide 2 +inform 2 +time 2 +copyright 2 +materi 2 +student 2 +discuss 2 +labord 2 +promot 2 +concept 2 +map 2 +first 2 +degre 2 +beyond 2 +brows 2 +possibl 2 +toolkit 2 +layer 2 +ward 2 +transcript 1 +base 1 +wwwwelcom 1 +short 1 +hypermedia 1 +documentfor 1 +contain 1 +theclass 1 +keep 1 +mind 1 +document 1 +static 1 +newinform 1 +ad 1 +notic 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +washington 1 +mccalla 1 +central 1 +importanceof 1 +model 1 +intellig 1 +tutor 1 +sandi 1 +youngquist 1 +paul 1 +barton 1 +davi 1 +aboutinternet 1 +servic 1 +problem 1 +solv 1 +geometri 1 +microworld 1 +tointellig 1 +comput 1 +environ 1 +tessa 1 +bartel 1 +mathematicsconnect 1 +plu 1 +gari 1 +anderson 1 +onlin 1 +combin 1 +vision 1 +littl 1 +technolog 1 +noth 1 +particularli 1 +ambiti 1 +descript 1 +state 1 +second 1 +technic 1 +piec 1 +thethem 1 +learner 1 +take 1 +respons 1 +someth 1 +increasingli 1 +import 1 +futur 1 +choic 1 +third 1 +moresophist 1 +elabor 1 +group 1 +annot 1 +ofwww 1 +describ 1 +intechn 1 +term 1 +internet 1 +infrastructur 1 +couldmak 1 +smart 1 +distribut 1 +tutori 1 +applicationsthat 1 +mosaic 1 +netscap 1 +achiev 1 +pleas 1 +option 1 +advanc 1 +us 1 +webhttp 1 +jeremi 1 +baer 1 +empow 1 +agehttp 1 +ncsa 1 +uiuc 1 +marla 1 +baker 1 +share 1 +comment 1 +soap 1 +trail 1 +line 1 +communitieshttp 1 +john 1 +dietz 1 +enhanc 1 +protocol 1 +lower 1 +serviceshttp 1 +dcewebkit 1 +adam 1 +carlson 1 +hong 1 +zhumeet 1 +michael 1 +aboutcurriculum 1 +navig 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..2df8aa0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,78 @@ +autumn 3 +technolog 3 +learn 3 +paper 3 +schedul 3 +home 2 +page 2 +copyright 2 +washington 2 +current 2 +explor 2 +middl 2 +collabor 1 +learningwelcom 1 +notic 1 +materi 1 +cours 1 +subjectto 1 +view 1 +public 1 +instal 1 +site 1 +otherthan 1 +univers 1 +gener 1 +descript 1 +comput 1 +internet 1 +methodologiesfor 1 +teach 1 +come 1 +togeth 1 +innew 1 +wai 1 +seminar 1 +read 1 +number 1 +forcollabor 1 +particip 1 +student 1 +willtak 1 +respons 1 +make 1 +present 1 +group 1 +ofthes 1 +cover 1 +subset 1 +also 1 +possibl 1 +applic 1 +ofai 1 +visual 1 +techniqu 1 +analysi 1 +evid 1 +ofstud 1 +onlin 1 +context 1 +meet 1 +tuesdai 1 +howev 1 +decid 1 +move 1 +time 1 +better 1 +intopeopl 1 +visit 1 +meani 1 +school 1 +schoolmai 1 +depend 1 +interest 1 +participatingstud 1 +last 1 +updat 1 +septemb 1 +tanimoto 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..28ca3cb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,191 @@ +schedul 17 +system 14 +proc 14 +parallel 12 +processor 11 +alloc 7 +distribut 6 +page 6 +memori 6 +multiprocessor 6 +barbara 6 +santa 6 +conf 5 +multiprogram 5 +sigmetr 4 +share 4 +measur 4 +ipp 4 +workshop 4 +strategi 4 +base 4 +workload 3 +polici 3 +model 3 +demand 3 +mari 2 +arpaci 2 +dusseau 2 +dynam 2 +nguyen 2 +gupta 2 +process 2 +symp 2 +oper 2 +vaswani 2 +zahorjan 2 +us 2 +runtim 2 +tutori 2 +supercomput 2 +ofcomput 2 +eric 2 +parson 2 +kenneth 2 +sevcik 2 +feitelson 2 +mccann 2 +memorymultiprocessor 2 +coschedul 2 +migrat 2 +coordin 2 +mvmv 1 +global 1 +resourc 1 +manag 1 +systemsprofessor 1 +vernontim 1 +pmlocat 1 +gang 1 +now 1 +ousterhout 1 +techniqu 1 +concurr 1 +inrd 1 +vahdat 1 +anderson 1 +patterson 1 +interact 1 +andsequenti 1 +network 1 +workstat 1 +ofth 1 +equi 1 +partit 1 +tucker 1 +control 1 +issuesfor 1 +principl 1 +workloadcharacterist 1 +univ 1 +ofwashington 1 +technic 1 +report 1 +applic 1 +shun 1 +leung 1 +evangelo 1 +markato 1 +thoma 1 +leblanc 1 +affin 1 +loopschedul 1 +expand 1 +version 1 +iniee 1 +tran 1 +han 1 +zima 1 +chapman 1 +compil 1 +ieee 1 +edjlali 1 +agraw 1 +sussman 1 +saltz 1 +data 1 +parallelprogram 1 +adapt 1 +environ 1 +april 1 +comparisonsshikharesh 1 +majumdar 1 +derek 1 +eager 1 +richard 1 +bunt 1 +confer 1 +high 1 +variabilityservic 1 +time 1 +dror 1 +bill 1 +nitzberg 1 +characterist 1 +product 1 +scientif 1 +thenasa 1 +am 1 +ipsc 1 +follow 1 +also 1 +cover 1 +requir 1 +read 1 +leutenegg 1 +vernon 1 +perform 1 +transact 1 +comput 1 +patrick 1 +sobalvarro 1 +william 1 +weihl 1 +ofparallel 1 +job 1 +impact 1 +burger 1 +hyder 1 +miller 1 +wood 1 +tradeoff 1 +rohit 1 +chandra 1 +scott 1 +devin 1 +verghes 1 +anoop 1 +mendel 1 +rosenblum 1 +multiprocessorcomput 1 +server 1 +architectur 1 +support 1 +programminglanguag 1 +asplo 1 +jose 1 +alverson 1 +kahan 1 +korri 1 +smith 1 +tera 1 +octob 1 +discuss 1 +open 1 +problem 1 +culler 1 +effectivedistribut 1 +computersystem 1 +philadelphia 1 +june 1 +appear 1 +rudolph 1 +identif 1 +activ 1 +work 1 +set 1 +program 1 +theoret 1 +result 1 +karlin 1 +paper 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..8e38ab53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,67 @@ +acrobat 4 +format 4 +syllabu 3 +schedul 3 +winter 2 +ruzzo 2 +last 2 +updat 2 +html 2 +titl 2 +postscript 2 +file 2 +usual 2 +translat 2 +adob 2 +free 2 +viewer 2 +page 2 +algorithm 1 +molecular 1 +biologi 1 +richard 1 +karp 1 +larri 1 +martin 1 +tompaclass 1 +bboard 1 +handout 1 +administr 1 +lectur 1 +note 1 +draft 1 +homework 1 +slide 1 +cours 1 +materi 1 +provid 1 +sever 1 +load 1 +fast 1 +readabl 1 +mani 1 +part 1 +gener 1 +automat 1 +latex 1 +faith 1 +origin 1 +latest 1 +greatest 1 +ghostscript 1 +home 1 +window 1 +linux 1 +time 1 +support 1 +fewer 1 +system 1 +isprefer 1 +smaller 1 +render 1 +isfast 1 +legibl 1 +print 1 +ghostscriptcan 1 +exampl 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..17c3575d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,172 @@ +paper 11 +tuesdai 7 +read 6 +architectur 5 +quarter 4 +week 3 +present 3 +posit 3 +comput 3 +anderson 3 +asplo 3 +line 3 +memori 3 +mail 3 +organ 2 +cseg 2 +lunch 2 +format 2 +previou 2 +ofth 2 +discuss 2 +lead 2 +credit 2 +meet 2 +valu 2 +follow 2 +processor 2 +burger 2 +machin 2 +avail 2 +subscrib 2 +list 2 +lunchcs 1 +lunchcours 1 +jean 1 +loupbaermeet 1 +time 1 +continu 1 +withalmost 1 +year 1 +select 1 +discussedat 1 +begin 1 +distribut 1 +tobe 1 +might 1 +formal 1 +work 1 +progress 1 +byesteem 1 +member 1 +mostlyw 1 +hopefulli 1 +heat 1 +discussionson 1 +literatur 1 +differ 1 +quartersi 1 +start 1 +fromparticip 1 +recent 1 +workshop 1 +oncrit 1 +issu 1 +research 1 +copi 1 +hereread 1 +morethem 1 +mani 1 +thank 1 +ruth 1 +molli 1 +brown 1 +gershoni 1 +matthai 1 +philipos 1 +tabular 1 +summari 1 +guru 1 +found 1 +herefor 1 +usual 1 +thestud 1 +either 1 +informallyor 1 +slide 1 +cours 1 +variabl 1 +ifyou 1 +first 1 +octob 1 +local 1 +load 1 +predict 1 +lipasti 1 +wilkerson 1 +shen 1 +link 1 +advanceprogrami 1 +short 1 +bibliographi 1 +appreci 1 +volunt 1 +thesaulsburi 1 +readashlei 1 +saulsburi 1 +fong 1 +pong 1 +andrea 1 +nowatzyk 1 +miss 1 +wall 1 +case 1 +integr 1 +isca 1 +readm 1 +fillo 1 +keckler 1 +dalli 1 +multicomput 1 +micro 1 +machinelink 1 +readdoug 1 +stefano 1 +kaxira 1 +jame 1 +goodman 1 +datascalar 1 +spsd 1 +execut 1 +model 1 +univers 1 +wisconsin 1 +madison 1 +scienc 1 +depart 1 +technic 1 +report 1 +juli 1 +neton 1 +intellig 1 +iram 1 +chip 1 +rememb 1 +patterson 1 +cardwel 1 +fromm 1 +keeton 1 +kozyraki 1 +thomasand 1 +yelick 1 +availableher 1 +fortun 1 +author 1 +prof 1 +send 1 +email 1 +themajordomo 1 +majordomo 1 +content 1 +shouldinclud 1 +leav 1 +subject 1 +lineblank 1 +shortli 1 +receiv 1 +messag 1 +back 1 +sai 1 +welcom 1 +baer 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..a6c0afcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,13 @@ +home 2 +pagecs 1 +page 1 +spring 1 +offer 1 +experiment 1 +graduat 1 +cours 1 +human 1 +comput 1 +interact 1 +born 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..9d67656b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,127 @@ +week 9 +compil 4 +time 2 +memori 2 +gupta 2 +anderson 2 +gener 2 +special 2 +charl 2 +data 2 +optim 2 +roger 2 +regist 2 +mail 2 +line 2 +seminarcs 1 +seminarcours 1 +organ 1 +susan 1 +eggersand 1 +craigchambersmeet 1 +wednesdai 1 +offici 1 +loew 1 +butreal 1 +meet 1 +second 1 +floor 1 +atrium 1 +scheduleweek 1 +memspi 1 +analyz 1 +system 1 +bottleneck 1 +program 1 +margaretmartonosi 1 +anoop 1 +thoma 1 +approach 1 +applic 1 +consel 1 +francoi 1 +noel 1 +practic 1 +flow 1 +framework 1 +arrai 1 +refer 1 +analysi 1 +itsus 1 +evelyn 1 +duesterwald 1 +rajiv 1 +maryl 1 +soffa 1 +valu 1 +depend 1 +graph 1 +represent 1 +without 1 +taxat 1 +danielweis 1 +crew 1 +michael 1 +ernst 1 +bjarn 1 +steensgaard 1 +litvinov 1 +iter 1 +coalesc 1 +georg 1 +andrew 1 +appel 1 +garrett 1 +machin 1 +specif 1 +hooverand 1 +kenneth 1 +zadeck 1 +dean 1 +grant 1 +paradigm 1 +distribut 1 +multicomput 1 +byprivthviraj 1 +banerje 1 +lewi 1 +minimum 1 +cost 1 +interprocedur 1 +alloc 1 +stevenkurland 1 +fischer 1 +secoski 1 +todd 1 +knoblock 1 +erik 1 +grove 1 +lazi 1 +strength 1 +reduct 1 +jen 1 +knoop 1 +oliv 1 +ruth 1 +andbernhard 1 +steffen 1 +mock 1 +tullsen 1 +subscrib 1 +list 1 +send 1 +email 1 +majordomo 1 +content 1 +includ 1 +subscribecsek 1 +leav 1 +subject 1 +blank 1 +shortlyrec 1 +messag 1 +back 1 +sai 1 +welcom 1 +melodi 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..edc0ad63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,178 @@ +parallel 14 +program 11 +data 11 +compil 9 +analysi 7 +ppopp 6 +optim 6 +applic 5 +commun 5 +ipp 4 +icpp 4 +lcpc 4 +kennedi 4 +distribut 4 +model 4 +quarter 3 +matlab 3 +choi 3 +comput 3 +mail 2 +messag 2 +scalapack 2 +exploit 2 +task 2 +ramaswami 2 +hodg 2 +banerje 2 +sung 2 +cross 2 +loop 2 +reus 2 +cach 2 +cooper 2 +mcintosh 2 +global 2 +chakarabarti 2 +gupta 2 +pldi 2 +integer 2 +perform 2 +environ 2 +adv 2 +input 2 +output 2 +characterist 2 +scalabl 2 +crandal 2 +aydt 2 +chien 2 +reed 2 +strategi 2 +core 2 +bordawekar 2 +choudahari 2 +koelbel 2 +paleczni 2 +local 2 +iter 2 +block 2 +cyclic 2 +midkiff 2 +util 2 +thread 2 +fahring 2 +hain 2 +mehrotra 2 +gener 2 +environmentslarri 1 +snyderautumn 1 +mondai 1 +loew 1 +welcom 1 +home 1 +page 1 +read 1 +select 1 +paper 1 +recent 1 +supercomput 1 +ten 1 +schedul 1 +atmospher 1 +casual 1 +andwil 1 +hopefulli 1 +ignit 1 +live 1 +discuss 1 +everyon 1 +attend 1 +seminar 1 +expect 1 +present 1 +thepap 1 +still 1 +spot 1 +open 1 +hurri 1 +sign 1 +pleas 1 +send 1 +majordomo 1 +subscrib 1 +cseo 1 +bodi 1 +subscribeto 1 +class 1 +list 1 +datepaperpresentor 1 +falcon 1 +interact 1 +restructur 1 +deros 1 +gallivan 1 +gallopoulo 1 +marsolf 1 +padua 1 +portabl 1 +driven 1 +ramkumar 1 +forb 1 +kale 1 +ruth 1 +sean 1 +jason 1 +holidai 1 +stream 1 +librari 1 +complex 1 +structur 1 +gotwal 1 +sriniva 1 +gannon 1 +brad 1 +eric 1 +cilk 1 +effici 1 +multithread 1 +runtim 1 +system 1 +blumof 1 +joerg 1 +kuszmaul 1 +leiserson 1 +randal 1 +zhou 1 +code 1 +object 1 +orient 1 +mathemat 1 +andersson 1 +fritzson 1 +realign 1 +base 1 +kamachi 1 +kusano 1 +suehiro 1 +tamura 1 +sakon 1 +us 1 +access 1 +inform 1 +rinard 1 +tool 1 +rel 1 +debug 1 +develop 1 +larg 1 +numer 1 +abramson 1 +foster 1 +michalak 1 +sosic 1 +potpourri 1 +last 1 +modifi 1 +tuesdai 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..9a4c7d43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,27 @@ +system 3 +seminar 2 +mail 2 +list 2 +autumn 2 +preliminariesif 1 +alreadi 1 +need 1 +variou 1 +crucial 1 +bit 1 +ofinform 1 +week 1 +cancel 1 +besent 1 +send 1 +request 1 +line 1 +subscrib 1 +systemsin 1 +messag 1 +bodi 1 +quarterli 1 +web 1 +spring 1 +summer 1 +winter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..ed1d8af9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,70 @@ +present 8 +system 4 +file 3 +meet 2 +quarter 2 +read 2 +paper 2 +oper 2 +memori 2 +perform 2 +summer 1 +quarterw 1 +fridai 1 +loew 1 +wewil 1 +final 1 +appear 1 +upcomingacm 1 +symposium 1 +principl 1 +sosp 1 +pleas 1 +havean 1 +interact 1 +discuss 1 +scheduleoct 1 +implement 1 +global 1 +manag 1 +workstat 1 +cluster 1 +feelei 1 +log 1 +virtual 1 +savag 1 +autoraid 1 +hierarch 1 +storag 1 +wilk 1 +serverless 1 +network 1 +franklin 1 +montgomeri 1 +tiwari 1 +hypervisor 1 +base 1 +fault 1 +toler 1 +chan 1 +philipos 1 +wolman 1 +exploit 1 +weak 1 +connect 1 +mobil 1 +access 1 +voelker 1 +litvinov 1 +cach 1 +coher 1 +stackabl 1 +sriram 1 +fiuczynski 1 +impact 1 +architectur 1 +trend 1 +anderson 1 +romer 1 +return 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..287858e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,146 @@ +comput 11 +program 8 +scientif 7 +perform 6 +mail 5 +class 5 +list 4 +high 3 +student 3 +us 3 +languag 3 +machin 3 +parallel 3 +supercomput 3 +develop 3 +wednesdai 2 +page 2 +send 2 +majordomo 2 +subscrib 2 +bodi 2 +messag 2 +compil 2 +releas 2 +suitabl 2 +loop 2 +fast 2 +modern 2 +includ 2 +workstat 2 +scientist 2 +follow 2 +write 2 +zphigh 1 +zpllarri 1 +snyder 1 +teamautumn 1 +quarter 1 +sieg 1 +loew 1 +dai 1 +welcom 1 +home 1 +pleas 1 +csezpl 1 +subscribeto 1 +also 1 +interest 1 +join 1 +usersmail 1 +distribut 1 +informationabout 1 +librarai 1 +relatedinform 1 +ad 1 +user 1 +descriptionzpl 1 +scientificprogram 1 +previous 1 +written 1 +infortran 1 +arrai 1 +dramaticallysimplifi 1 +elimin 1 +nuisanc 1 +index 1 +run 1 +allow 1 +programm 1 +code 1 +andtrivi 1 +migrat 1 +largest 1 +simpli 1 +byrecompil 1 +toth 1 +commun 1 +design 1 +engin 1 +want 1 +learn 1 +effect 1 +cover 1 +topic 1 +state 1 +syntax 1 +semant 1 +algorithm 1 +exploit 1 +wysiwyg 1 +easili 1 +well 1 +scienc 1 +faster 1 +prototyp 1 +matlab 1 +text 1 +booknon 1 +reli 1 +materi 1 +document 1 +found 1 +onin 1 +specif 1 +close 1 +zplprogram 1 +guid 1 +version 1 +prerequisitesfamiliar 1 +fortran 1 +ormatlab 1 +unix 1 +platform 1 +assum 1 +variabl 1 +credit 1 +audit 1 +debug 1 +select 1 +technic 1 +disciplin 1 +rang 1 +whole 1 +applic 1 +kernel 1 +inner 1 +informationcours 1 +syllabu 1 +lectur 1 +note 1 +appli 1 +ncsa 1 +block 1 +grant 1 +account 1 +faculti 1 +staff 1 +remotezpl 1 +compileroth 1 +import 1 +link 1 +sung 1 +choi 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..14bd07e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,127 @@ +class 6 +design 4 +mail 4 +instructor 4 +last 4 +lectur 4 +home 3 +page 3 +gaetano 3 +announc 3 +updat 3 +washington 3 +autumn 2 +quarter 2 +borriello 2 +corei 2 +us 2 +inform 2 +document 2 +send 2 +webmast 2 +administr 2 +cours 2 +final 2 +exam 2 +tool 2 +topic 2 +offic 2 +hour 2 +sieg 2 +logic 2 +katz 2 +benjamin 2 +cum 2 +addison 2 +weslei 2 +maintain 2 +comput 2 +introduct 1 +digit 1 +andersonwelcom 1 +contain 1 +whole 1 +bunch 1 +keep 1 +mind 1 +static 1 +especi 1 +messag 1 +ad 1 +frequent 1 +problem 1 +gener 1 +tocs 1 +notic 1 +system 1 +archiv 1 +messagess 1 +everyon 1 +goal 1 +syllabu 1 +meet 1 +time 1 +mondai 1 +decemb 1 +workload 1 +grade 1 +expect 1 +laboratori 1 +softwar 1 +polici 1 +collabor 1 +cheat 1 +address 1 +overal 1 +schedul 1 +anderson 1 +corin 1 +aweekli 1 +assign 1 +weekli 1 +quizz 1 +onlin 1 +version 1 +slide 1 +textbook 1 +contemporari 1 +author 1 +publish 1 +note 1 +interest 1 +evolut 1 +implement 1 +technolog 1 +aid 1 +synario 1 +feedback 1 +tell 1 +think 1 +thing 1 +go 1 +even 1 +anonym 1 +desir 1 +question 1 +evalu 1 +complet 1 +link 1 +previou 1 +portion 1 +reprint 1 +adapt 1 +academ 1 +nonprofit 1 +purpos 1 +provid 1 +sourc 1 +accur 1 +quot 1 +duli 1 +credit 1 +copyright 1 +depart 1 +scienc 1 +engin 1 +univers 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..5ff0e434 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,36 @@ +comput 4 +societi 2 +cours 2 +societycs 1 +societywelcom 1 +home 1 +page 1 +wintercs 1 +focu 1 +social 1 +econom 1 +ethic 1 +legal 1 +implic 1 +present 1 +internet 1 +futur 1 +nation 1 +andglob 1 +inform 1 +highwai 1 +instructor 1 +alan 1 +born 1 +class 1 +time 1 +tue 1 +thur 1 +sieg 1 +syllabusclass 1 +schedulelink 1 +relev 1 +sitesbook 1 +journal 1 +avail 1 +referenceassignmentsassign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..2c394346 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,50 @@ +sieg 3 +hour 3 +artifici 2 +question 2 +index 1 +pagecs 1 +intelligencefal 1 +quarter 1 +intellig 1 +pose 1 +fundament 1 +andchalleng 1 +comput 1 +scienc 1 +build 1 +intelligentmachin 1 +cours 1 +address 1 +provid 1 +anin 1 +depth 1 +introduct 1 +select 1 +topic 1 +includ 1 +agentarchitectur 1 +knowledg 1 +represent 1 +search 1 +plan 1 +machinelearn 1 +reason 1 +uncertainti 1 +methodolog 1 +staff 1 +weldweld 1 +marc 1 +friedmanfriedman 1 +nick 1 +kushmericknick 1 +outlin 1 +topicsprojectread 1 +assignmentsassign 1 +examsgradingresourcesth 1 +class 1 +mailinglist 1 +also 1 +archiv 1 +past 1 +messag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..ad8c5e83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,132 @@ +check 7 +inform 5 +index 5 +page 5 +server 4 +mail 4 +html 4 +glimps 4 +zephyr 4 +us 3 +show 3 +regist 3 +comment 3 +intellig 2 +read 2 +program 2 +link 2 +list 2 +paul 2 +file 2 +rememb 2 +want 2 +first 2 +back 2 +interfac 2 +user 2 +make 2 +anoth 2 +class 2 +provid 2 +filter 2 +ics 1 +internet 1 +meet 1 +tuesdai 1 +siegcreat 1 +side 1 +scriptspleas 1 +guidelin 1 +towrit 1 +execut 1 +someon 1 +follow 1 +tothem 1 +peopl 1 +place 1 +collect 1 +gener 1 +futur 1 +begun 1 +updat 1 +ad 1 +phoenix 1 +impress 1 +older 1 +topic 1 +offici 1 +releas 1 +instal 1 +manual 1 +well 1 +develop 1 +home 1 +interest 1 +detail 1 +work 1 +winter 1 +usenix 1 +paper 1 +design 1 +implement 1 +wide 1 +area 1 +wai 1 +zwhere 1 +mosiac 1 +locat 1 +databas 1 +current 1 +guess 1 +room 1 +version 1 +znol 1 +zwatch 1 +zlocat 1 +extra 1 +info 1 +except 1 +statu 1 +on 1 +anyon 1 +note 1 +lectur 1 +discuss 1 +sent 1 +displai 1 +belief 1 +short 1 +mike 1 +releg 1 +review 1 +site 1 +good 1 +miscellan 1 +rather 1 +rambl 1 +kurt 1 +grumbl 1 +problem 1 +improv 1 +mosaic 1 +bring 1 +luddit 1 +perspect 1 +idea 1 +network 1 +sourc 1 +nick 1 +vagu 1 +relat 1 +decemb 1 +cacm 1 +summari 1 +articl 1 +chang 1 +document 1 +itout 1 +withci 1 +send 1 +theentir 1 +address 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..edb08099 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,35 @@ +system 2 +open 2 +home 1 +page 1 +machin 1 +organ 1 +program 1 +credit 1 +introduct 1 +current 1 +structur 1 +control 1 +commun 1 +memori 1 +processor 1 +devic 1 +project 1 +involv 1 +detail 1 +studi 1 +specif 1 +small 1 +computerhardwar 1 +softwar 1 +prerequisit 1 +consent 1 +instructor 1 +student 1 +taken 1 +freshmen 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..79fe19c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,26 @@ +design 3 +comput 2 +structur 2 +memori 2 +page 1 +introduct 1 +architectur 1 +credit 1 +system 1 +compon 1 +processor 1 +instruct 1 +address 1 +control 1 +microprogram 1 +manag 1 +cach 1 +hierarchi 1 +interrupt 1 +prerequisit 1 +andc 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..3bf9173b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,26 @@ +comput 3 +advanc 2 +architectur 2 +home 1 +page 1 +credit 1 +techniqu 1 +design 1 +parallel 1 +process 1 +andpipelin 1 +multiprocessor 1 +multi 1 +network 1 +high 1 +performancemachin 1 +special 1 +purpos 1 +processor 1 +data 1 +flow 1 +prerequisit 1 +semesterli 1 +cours 1 +inform 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..205a8bd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,31 @@ +parallel 2 +machin 2 +home 1 +page 1 +advanc 1 +comput 1 +architectur 1 +credit 1 +algorithm 1 +principl 1 +detect 1 +vectorizingcompil 1 +interconnect 1 +network 1 +simd 1 +mimd 1 +processorsynchron 1 +data 1 +coher 1 +multi 1 +dataflow 1 +special 1 +purposeprocessor 1 +prerequisit 1 +consent 1 +instructor 1 +semesterli 1 +cours 1 +inform 1 +info 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..755d3bc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,243 @@ +assign 11 +class 10 +problem 9 +week 9 +februari 9 +avail 8 +section 8 +final 8 +quizz 7 +us 7 +program 6 +lectur 6 +oper 5 +system 5 +set 5 +wednesdai 5 +schedul 5 +late 5 +april 5 +spring 4 +grade 4 +read 4 +cours 4 +discuss 4 +learn 4 +dai 4 +march 4 +introduct 3 +bart 3 +answer 3 +note 3 +wisc 3 +eduoffic 3 +csphone 3 +offic 3 +hour 3 +page 3 +comput 3 +exam 3 +quiz 3 +last 3 +process 3 +unix 3 +goal 3 +memori 3 +three 3 +work 3 +take 3 +fridai 2 +mondai 2 +need 2 +orient 2 +book 2 +first 2 +tuesdai 2 +thursdai 2 +follow 2 +past 2 +semaphor 2 +messag 2 +simul 2 +semest 2 +hand 2 +group 2 +probabl 2 +solari 2 +workstat 2 +januari 2 +alloc 2 +advanc 2 +topic 2 +univers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +millerc 1 +systemsnew 1 +stufffin 1 +post 1 +readi 1 +print 1 +staffinstructor 1 +milleremail 1 +noonor 1 +appoint 1 +karuna 1 +muthiahemail 1 +muthiah 1 +jonathan 1 +weyersemail 1 +weyer 1 +materialsth 1 +organ 1 +around 1 +thelectur 1 +notesar 1 +textbookmodern 1 +tanenbaum 1 +programmingassign 1 +purchas 1 +copi 1 +ofobject 1 +pohl 1 +whatev 1 +favorit 1 +come 1 +modern 1 +systemsandobject 1 +store 1 +sectionslectur 1 +time 1 +sciencesdiscuss 1 +nolandnot 1 +extra 1 +mainli 1 +recit 1 +materialcov 1 +weekli 1 +occas 1 +import 1 +detail 1 +ofth 1 +homework 1 +make 1 +sure 1 +leav 1 +room 1 +attend 1 +quizzesther 1 +midterm 1 +option 1 +start 1 +second 1 +thediscuss 1 +minut 1 +concurr 1 +monitor 1 +usetrac 1 +activ 1 +real 1 +drive 1 +algorithm 1 +trace 1 +driven 1 +experi 1 +quantit 1 +analyz 1 +written 1 +setsdur 1 +severalwritten 1 +base 1 +turn 1 +though 1 +find 1 +poorli 1 +youdon 1 +problemssolut 1 +theproblem 1 +happi 1 +question 1 +andlook 1 +solut 1 +variou 1 +synchronizationprimit 1 +solv 1 +manag 1 +hardwar 1 +softwar 1 +workassign 1 +date 1 +list 1 +handout 1 +entir 1 +havethre 1 +daysof 1 +credit 1 +differ 1 +eachof 1 +absolut 1 +accept 1 +cannot 1 +assignmentthat 1 +weekof 1 +cheatingprogram 1 +done 1 +partner 1 +independ 1 +cheater 1 +receiv 1 +maximum 1 +penalti 1 +includ 1 +receivingan 1 +mark 1 +transcript 1 +facilitiesw 1 +run 1 +window 1 +student 1 +regist 1 +account 1 +policyif 1 +lowest 1 +drop 1 +averag 1 +beno 1 +break 1 +count 1 +taught 1 +inth 1 +rang 1 +scheduleth 1 +tent 1 +could 1 +chang 1 +overview 1 +processesweek 1 +dispatch 1 +creationweek 1 +cooper 1 +synchronizationweek 1 +semaphoresweek 1 +monitorsweek 1 +deadlocksweek 1 +debug 1 +strategi 1 +dynam 1 +breakweek 1 +relocationweek 1 +segment 1 +tlbsweek 1 +virtual 1 +replac 1 +thrash 1 +devic 1 +filesweek 1 +disk 1 +directoriesweek 1 +protectionweek 1 +secur 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..761258ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,82 @@ +bart 6 +week 5 +april 5 +internet 4 +februari 4 +march 4 +miller 3 +honor 2 +spring 2 +januari 2 +seminarunivers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +millerc 1 +seminarinstructor 1 +milleremail 1 +wisc 1 +eduoffic 1 +csphone 1 +offic 1 +hour 1 +wednesdai 1 +fridai 1 +noonor 1 +appoint 1 +lectureslectur 1 +time 1 +mondai 1 +comput 1 +sciencesclass 1 +schedulether 1 +written 1 +assign 1 +class 1 +requir 1 +attendal 1 +lectur 1 +particip 1 +discuss 1 +follow 1 +schedul 1 +mostli 1 +right 1 +could 1 +chang 1 +introduct 1 +overviewweek 1 +larri 1 +landweb 1 +architectur 1 +protocolsweek 1 +client 1 +server 1 +remot 1 +procedur 1 +callsweek 1 +system 1 +securityweek 1 +eric 1 +bach 1 +secur 1 +encryptionweek 1 +breakweek 1 +miron 1 +livni 1 +imag 1 +pictur 1 +netweek 1 +high 1 +perform 1 +file 1 +systemsweek 1 +david 1 +wood 1 +supercomputerweek 1 +laru 1 +javaweek 1 +discussionslast 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..fb5edaec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,168 @@ +read 14 +paper 12 +class 8 +discuss 7 +system 6 +topic 5 +oper 4 +write 4 +project 4 +cours 3 +detail 3 +import 3 +first 3 +advanc 2 +fall 2 +give 2 +current 2 +lectur 2 +review 2 +form 2 +group 2 +twice 2 +week 2 +assign 2 +post 2 +particip 2 +comment 2 +design 2 +idea 2 +well 2 +second 2 +final 2 +grade 2 +availbl 2 +tuesdai 2 +thursdai 2 +univers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +departmentc 1 +bart 1 +millerc 1 +systemssummarythi 1 +intend 1 +broad 1 +exposur 1 +advancedoper 1 +protect 1 +secur 1 +memori 1 +manag 1 +kernel 1 +file 1 +synchron 1 +name 1 +distribut 1 +pleas 1 +rest 1 +inform 1 +sheet 1 +carefulli 1 +textther 1 +realli 1 +satisfactori 1 +textbook 1 +graduat 1 +level 1 +operatingsystemsclass 1 +literatur 1 +text 1 +structur 1 +around 1 +journal 1 +articl 1 +andconfer 1 +proceed 1 +abl 1 +purchas 1 +doit 1 +handout 1 +relev 1 +willinstead 1 +adiscuss 1 +major 1 +theme 1 +us 1 +focal 1 +point 1 +classmat 1 +meetonc 1 +especi 1 +part 1 +listaccord 1 +schedul 1 +formula 1 +success 1 +papersindepend 1 +try 1 +identifyth 1 +issu 1 +thepap 1 +discussionsclass 1 +meet 1 +talk 1 +besupport 1 +opinion 1 +will 1 +activ 1 +daili 1 +geta 1 +expect 1 +quietli 1 +listen 1 +beveri 1 +unhappi 1 +papersdur 1 +short 1 +page 1 +andon 1 +longer 1 +paperwil 1 +base 1 +work 1 +understood 1 +facilityand 1 +extens 1 +area 1 +involv 1 +summaryof 1 +aselect 1 +topicsfrom 1 +choos 1 +good 1 +least 1 +refere 1 +fellowstud 1 +writer 1 +critic 1 +anoth 1 +person 1 +giveth 1 +reader 1 +look 1 +someon 1 +els 1 +revis 1 +pass 1 +examsther 1 +exam 1 +keep 1 +busi 1 +gradesscor 1 +assignmenti 1 +summari 1 +score 1 +fromth 1 +proposalsi 1 +also 1 +gradesar 1 +avail 1 +detailstim 1 +place 1 +csoffic 1 +hour 1 +noonlast 1 +modifi 1 +bybart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..6c1b2e69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,146 @@ +comput 12 +fortran 10 +program 10 +solut 9 +click 8 +week 7 +question 7 +home 5 +cours 5 +microsoft 5 +scienc 4 +lectur 4 +bestor 4 +section 3 +cover 3 +student 3 +engin 3 +write 3 +solv 3 +problem 3 +exercis 3 +gareth 3 +us 3 +page 3 +basic 2 +structur 2 +elementari 2 +intend 2 +languag 2 +primarili 2 +instructor 2 +assign 2 +pointer 2 +interest 2 +pleas 2 +class 2 +vectra 2 +window 2 +also 2 +howev 2 +copi 2 +pagec 1 +introduct 1 +programmingsect 1 +credit 1 +need 1 +prepar 1 +prior 1 +experi 1 +requir 1 +knowledg 1 +assum 1 +materi 1 +enabl 1 +simpl 1 +done 1 +receiv 1 +littl 1 +instruct 1 +high 1 +school 1 +taught 1 +entir 1 +major 1 +descript 1 +menu 1 +import 1 +announc 1 +read 1 +grade 1 +polici 1 +syllabu 1 +text 1 +note 1 +psycholog 1 +march 1 +punctual 1 +avoid 1 +disturb 1 +offic 1 +overal 1 +gener 1 +code 1 +though 1 +want 1 +time 1 +algorithm 1 +even 1 +depend 1 +particular 1 +follow 1 +mondai 1 +subroutin 1 +function 1 +labyou 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +open 1 +seven 1 +dai 1 +except 1 +certain 1 +holidai 1 +printer 1 +room 1 +locat 1 +across 1 +hall 1 +dorm 1 +probabl 1 +purchas 1 +lahei 1 +person 1 +insid 1 +textbook 1 +work 1 +lab 1 +campu 1 +compil 1 +first 1 +softwar 1 +includ 1 +mail 1 +netscap 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +long 1 +night 1 +copyright 1 +wisc 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..48d58be3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,265 @@ +week 33 +program 20 +exam 18 +question 16 +click 14 +solut 14 +comput 13 +assign 13 +problem 12 +grade 11 +fortran 10 +class 10 +solv 9 +lectur 8 +home 7 +final 7 +section 6 +mail 6 +languag 5 +scienc 5 +note 5 +offic 5 +term 5 +microsoft 5 +cours 4 +import 4 +text 4 +exercis 4 +pleas 4 +bestor 4 +complet 4 +hand 4 +cover 4 +hour 4 +us 4 +fridai 4 +consult 4 +skill 4 +read 3 +instructor 3 +polici 3 +gareth 3 +must 3 +receiv 3 +approxim 3 +copi 3 +includ 3 +good 3 +long 3 +even 3 +compil 3 +mondai 3 +help 3 +send 3 +write 3 +page 3 +primarili 2 +engin 2 +student 2 +syllabu 2 +pointer 2 +interest 2 +contribut 2 +elig 2 +pass 2 +curv 2 +april 2 +come 2 +abl 2 +work 2 +understand 2 +line 2 +well 2 +though 2 +need 2 +time 2 +seven 2 +error 2 +wednesdai 2 +answer 2 +login 2 +printer 2 +netscap 2 +gener 2 +except 2 +want 2 +first 2 +learn 2 +particular 2 +vectra 2 +window 2 +also 2 +howev 2 +pagec 1 +algebra 1 +programmingsect 1 +taught 1 +entir 1 +intend 1 +major 1 +descript 1 +menu 1 +announc 1 +psycholog 1 +punctual 1 +avoid 1 +disturb 1 +lowest 1 +score 1 +mean 1 +rang 1 +thur 1 +februari 1 +amclick 1 +list 1 +current 1 +identif 1 +tent 1 +follow 1 +topic 1 +semest 1 +relev 1 +anyth 1 +unsur 1 +instead 1 +wait 1 +try 1 +discov 1 +didn 1 +realli 1 +someth 1 +notestext 1 +applic 1 +edit 1 +koffman 1 +friedman 1 +avail 1 +substitut 1 +show 1 +overhead 1 +projector 1 +exampl 1 +addit 1 +board 1 +respons 1 +materi 1 +assignmentsther 1 +three 1 +constitut 1 +ensur 1 +regardless 1 +perform 1 +stai 1 +longer 1 +extra 1 +close 1 +book 1 +bring 1 +pencil 1 +calcul 1 +necessari 1 +attempt 1 +everi 1 +without 1 +automat 1 +zero 1 +risk 1 +fail 1 +gradesheet 1 +handin 1 +directori 1 +onlin 1 +late 1 +academ 1 +misconduct 1 +cheat 1 +specif 1 +pmhow 1 +wear 1 +name 1 +tag 1 +duti 1 +short 1 +messag 1 +syntax 1 +inform 1 +requir 1 +explan 1 +best 1 +normal 1 +dissert 1 +research 1 +modem 1 +therefor 1 +outsid 1 +make 1 +appoint 1 +easili 1 +contact 1 +regularli 1 +exerciseson 1 +distinguish 1 +programm 1 +doesn 1 +matter 1 +familiar 1 +techniqu 1 +weekli 1 +small 1 +trivial 1 +give 1 +look 1 +think 1 +right 1 +step 1 +would 1 +overal 1 +structur 1 +code 1 +algorithm 1 +depend 1 +subroutin 1 +function 1 +labyou 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +open 1 +dai 1 +certain 1 +holidai 1 +room 1 +locat 1 +across 1 +hall 1 +dorm 1 +probabl 1 +purchas 1 +lahei 1 +person 1 +insid 1 +textbook 1 +lab 1 +campu 1 +softwar 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +night 1 +copyright 1 +wisc 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..3cea16fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,73 @@ +program 4 +page 3 +fall 2 +section 2 +dave 2 +burnett 2 +wisc 2 +offic 2 +updat 2 +home 2 +algebra 1 +languag 1 +name 1 +egglestonemail 1 +eduoffic 1 +phone 1 +hour 1 +announc 1 +note 1 +origin 1 +output 1 +prog 1 +error 1 +dai 1 +week 1 +correct 1 +valu 1 +inform 1 +exam 1 +question 1 +ask 1 +hourlywork 1 +classread 1 +scan 1 +thursdai 1 +class 1 +avail 1 +solut 1 +quiz 1 +grade 1 +gener 1 +cours 1 +informationc 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuswork 1 +homeclass 1 +handout 1 +gradeshomeworkexam 1 +quizzesmiscellan 1 +archivepolici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +policytextproblem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +eggleston 1 +base 1 +greg 1 +sharp 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..2d3ca051 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,117 @@ +project 10 +system 8 +paper 7 +discuss 5 +assign 5 +implement 4 +lectur 4 +oper 3 +cours 3 +topic 3 +read 3 +research 3 +us 3 +first 3 +count 3 +spring 2 +involv 2 +propos 2 +class 2 +particip 2 +tuesdai 2 +text 2 +semest 2 +grade 2 +schedul 2 +slide 2 +advanc 1 +summari 1 +intend 1 +give 1 +broad 1 +exposur 1 +advancedoper 1 +import 1 +compon 1 +ofvari 1 +anexperiment 1 +cover 1 +topicsinclud 1 +synchron 1 +commun 1 +memori 1 +manag 1 +file 1 +protect 1 +secur 1 +distribut 1 +requir 1 +tochoos 1 +problem 1 +solut 1 +prototyp 1 +info 1 +relev 1 +current 1 +detail 1 +review 1 +rathera 1 +major 1 +theme 1 +focal 1 +point 1 +activ 1 +strongli 1 +encourag 1 +thursdai 1 +engin 1 +halloffic 1 +hour 1 +appoint 1 +comput 1 +scienc 1 +select 1 +classic 1 +design 1 +purchas 1 +doit 1 +formerli 1 +macc 1 +document 1 +deskfor 1 +differ 1 +previou 1 +pleas 1 +copi 1 +exam 1 +instead 1 +benchmark 1 +suit 1 +measur 1 +performanceof 1 +variou 1 +suno 1 +solari 1 +linux 1 +window 1 +manya 1 +hand 1 +second 1 +final 1 +report 1 +present 1 +total 1 +tent 1 +list 1 +suggest 1 +make 1 +well 1 +either 1 +case 1 +need 1 +come 1 +choos 1 +team 1 +peopl 1 +allow 1 +assig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..4f23d62c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,45 @@ +lectur 15 +silva 5 +toni 4 +sidnei 4 +hummert 4 +cours 3 +comput 2 +program 2 +fall 2 +cover 2 +student 2 +materi 2 +fortran 2 +jeff 2 +lampert 2 +michael 2 +birk 2 +russel 2 +man 2 +introduct 1 +scienc 1 +credit 1 +design 1 +basic 1 +programmingstructur 1 +need 1 +prepar 1 +elementaryengin 1 +suffici 1 +enableth 1 +write 1 +simpl 1 +solv 1 +engin 1 +problem 1 +inelementari 1 +essenti 1 +first 1 +half 1 +list 1 +section 1 +martin 1 +reameslast 1 +modifi 1 +anthoni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..127e8272 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,96 @@ +offic 34 +hour 12 +phone 12 +wisc 12 +name 11 +email 11 +section 11 +grade 10 +site 4 +page 3 +kelli 3 +tsioli 3 +link 2 +home 2 +ratliff 2 +bockrath 2 +rehnuma 2 +ashraf 2 +geeri 2 +jyothi 2 +thano 2 +world 2 +list 2 +maintain 2 +fall 1 +midterm 1 +exam 1 +answer 1 +keyinstructorprofessor 1 +desautelsoffic 1 +comput 1 +sciencesoffic 1 +mondai 1 +wednesdai 1 +appoint 1 +dept 1 +mail 1 +teach 1 +assistantsfollow 1 +nathan 1 +rahman 1 +jaim 1 +fink 1 +jfink 1 +aboulnaga 1 +andrew 1 +jame 1 +herro 1 +jherro 1 +abhinav 1 +gupta 1 +agupta 1 +krothap 1 +chiang 1 +suhui 1 +gradesexplor 1 +compani 1 +whose 1 +softwar 1 +hardwar 1 +borland 1 +hewlett 1 +packard 1 +intel 1 +microsoft 1 +novel 1 +us 1 +explor 1 +lyco 1 +enorm 1 +databas 1 +yahoo 1 +internet 1 +resourc 1 +classifi 1 +categori 1 +lookup 1 +search 1 +virtual 1 +tourist 1 +find 1 +around 1 +click 1 +mother 1 +larg 1 +alphabet 1 +cool 1 +especi 1 +excel 1 +univers 1 +wisconsin 1 +madison 1 +origin 1 +creat 1 +teitelbaum 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..127e8272 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,96 @@ +offic 34 +hour 12 +phone 12 +wisc 12 +name 11 +email 11 +section 11 +grade 10 +site 4 +page 3 +kelli 3 +tsioli 3 +link 2 +home 2 +ratliff 2 +bockrath 2 +rehnuma 2 +ashraf 2 +geeri 2 +jyothi 2 +thano 2 +world 2 +list 2 +maintain 2 +fall 1 +midterm 1 +exam 1 +answer 1 +keyinstructorprofessor 1 +desautelsoffic 1 +comput 1 +sciencesoffic 1 +mondai 1 +wednesdai 1 +appoint 1 +dept 1 +mail 1 +teach 1 +assistantsfollow 1 +nathan 1 +rahman 1 +jaim 1 +fink 1 +jfink 1 +aboulnaga 1 +andrew 1 +jame 1 +herro 1 +jherro 1 +abhinav 1 +gupta 1 +agupta 1 +krothap 1 +chiang 1 +suhui 1 +gradesexplor 1 +compani 1 +whose 1 +softwar 1 +hardwar 1 +borland 1 +hewlett 1 +packard 1 +intel 1 +microsoft 1 +novel 1 +us 1 +explor 1 +lyco 1 +enorm 1 +databas 1 +yahoo 1 +internet 1 +resourc 1 +classifi 1 +categori 1 +lookup 1 +search 1 +virtual 1 +tourist 1 +find 1 +around 1 +click 1 +mother 1 +larg 1 +alphabet 1 +cool 1 +especi 1 +excel 1 +univers 1 +wisconsin 1 +madison 1 +origin 1 +creat 1 +teitelbaum 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..34e8f903 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,169 @@ +comput 8 +lectur 8 +us 6 +section 4 +system 4 +class 3 +macintosh 3 +part 3 +discuss 3 +program 3 +word 3 +assign 3 +info 2 +salli 2 +mail 2 +wisc 2 +text 2 +click 2 +avail 2 +cours 2 +gener 2 +topic 2 +includ 2 +follow 2 +spreadsheet 2 +databas 2 +oper 2 +aldu 2 +background 2 +quizz 2 +exam 2 +syllabu 2 +computersinstructor 1 +instructor 1 +petersonoffic 1 +sciencephon 1 +slpeter 1 +facstaff 1 +eduoffic 1 +hour 1 +tuesdai 1 +thursdai 1 +appointmentvit 1 +time 1 +place 1 +held 1 +engin 1 +halllectur 1 +inform 1 +technolog 1 +societi 1 +laudon 1 +traver 1 +laudonlab 1 +point 1 +drag 1 +petersoncours 1 +introduct 1 +design 1 +take 1 +zero 1 +knowledg 1 +computersto 1 +crack 1 +shot 1 +user 1 +skill 1 +throughcolleg 1 +arena 1 +taught 1 +macintoshcomput 1 +csuse 1 +compon 1 +term 1 +scienc 1 +work 1 +necessarili 1 +order 1 +applic 1 +processor 1 +graphic 1 +hardwar 1 +input 1 +output 1 +storag 1 +devic 1 +languag 1 +network 1 +telecommun 1 +artifici 1 +intellig 1 +expert 1 +relat 1 +social 1 +issu 1 +laboratori 1 +hand 1 +experienceon 1 +iici 1 +process 1 +electron 1 +newsgroup 1 +world 1 +wide 1 +eudora 1 +netscap 1 +paint 1 +draw 1 +superpaint 1 +chart 1 +excel 1 +filemak 1 +present 1 +manag 1 +hypercard 1 +desktop 1 +publish 1 +pagemak 1 +integr 1 +learn 1 +well 1 +addit 1 +special 1 +tool 1 +scanner 1 +teach 1 +thegoal 1 +provid 1 +high 1 +qualiti 1 +instruct 1 +rich 1 +educationalexperi 1 +namesectiontimedai 1 +bodner 1 +mwnick 1 +leavi 1 +mwtrshannon 1 +lloyd 1 +trtrjeff 1 +reminga 1 +mwfmwira 1 +sharenow 1 +trtrbrian 1 +swander 1 +mwfmwfbrad 1 +thayer 1 +mwfmwfjoe 1 +varghes 1 +trtrgeoff 1 +weinberg 1 +mwftrmaria 1 +yuin 1 +mwfmwrecommend 1 +necessari 1 +grade 1 +base 1 +regular 1 +assignmentsand 1 +glanc 1 +contain 1 +nitti 1 +gritti 1 +detail 1 +superpaintassign 1 +excellast 1 +modifi 1 +octob 1 +jonbodn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..34e8f903 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,169 @@ +comput 8 +lectur 8 +us 6 +section 4 +system 4 +class 3 +macintosh 3 +part 3 +discuss 3 +program 3 +word 3 +assign 3 +info 2 +salli 2 +mail 2 +wisc 2 +text 2 +click 2 +avail 2 +cours 2 +gener 2 +topic 2 +includ 2 +follow 2 +spreadsheet 2 +databas 2 +oper 2 +aldu 2 +background 2 +quizz 2 +exam 2 +syllabu 2 +computersinstructor 1 +instructor 1 +petersonoffic 1 +sciencephon 1 +slpeter 1 +facstaff 1 +eduoffic 1 +hour 1 +tuesdai 1 +thursdai 1 +appointmentvit 1 +time 1 +place 1 +held 1 +engin 1 +halllectur 1 +inform 1 +technolog 1 +societi 1 +laudon 1 +traver 1 +laudonlab 1 +point 1 +drag 1 +petersoncours 1 +introduct 1 +design 1 +take 1 +zero 1 +knowledg 1 +computersto 1 +crack 1 +shot 1 +user 1 +skill 1 +throughcolleg 1 +arena 1 +taught 1 +macintoshcomput 1 +csuse 1 +compon 1 +term 1 +scienc 1 +work 1 +necessarili 1 +order 1 +applic 1 +processor 1 +graphic 1 +hardwar 1 +input 1 +output 1 +storag 1 +devic 1 +languag 1 +network 1 +telecommun 1 +artifici 1 +intellig 1 +expert 1 +relat 1 +social 1 +issu 1 +laboratori 1 +hand 1 +experienceon 1 +iici 1 +process 1 +electron 1 +newsgroup 1 +world 1 +wide 1 +eudora 1 +netscap 1 +paint 1 +draw 1 +superpaint 1 +chart 1 +excel 1 +filemak 1 +present 1 +manag 1 +hypercard 1 +desktop 1 +publish 1 +pagemak 1 +integr 1 +learn 1 +well 1 +addit 1 +special 1 +tool 1 +scanner 1 +teach 1 +thegoal 1 +provid 1 +high 1 +qualiti 1 +instruct 1 +rich 1 +educationalexperi 1 +namesectiontimedai 1 +bodner 1 +mwnick 1 +leavi 1 +mwtrshannon 1 +lloyd 1 +trtrjeff 1 +reminga 1 +mwfmwira 1 +sharenow 1 +trtrbrian 1 +swander 1 +mwfmwfbrad 1 +thayer 1 +mwfmwfjoe 1 +varghes 1 +trtrgeoff 1 +weinberg 1 +mwftrmaria 1 +yuin 1 +mwfmwrecommend 1 +necessari 1 +grade 1 +base 1 +regular 1 +assignmentsand 1 +glanc 1 +contain 1 +nitti 1 +gritti 1 +detail 1 +superpaintassign 1 +excellast 1 +modifi 1 +octob 1 +jonbodn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..16dac1e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,64 @@ +section 4 +skrentni 4 +window 3 +home 2 +languag 2 +coordin 2 +email 2 +cours 2 +consult 2 +introduct 2 +inform 2 +borland 2 +pagecomput 1 +scienc 1 +algebra 1 +program 1 +instructorsw 1 +would 1 +like 1 +comment 1 +suggest 1 +complaint 1 +feedback 1 +provid 1 +click 1 +offic 1 +csinform 1 +frequent 1 +ask 1 +question 1 +overview 1 +microcomput 1 +laboratori 1 +fall 1 +schedul 1 +tutor 1 +mainli 1 +polici 1 +academ 1 +misconduct 1 +offer 1 +depart 1 +softwar 1 +microsoft 1 +hint 1 +compil 1 +oper 1 +system 1 +netscap 1 +creat 1 +us 1 +subdirectoriesc 1 +savitch 1 +text 1 +book 1 +integr 1 +develop 1 +environmentfortran 1 +jeff 1 +lampert 1 +page 1 +last 1 +updat 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..75859ea9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,38 @@ +languag 2 +advanc 2 +mathemat 2 +cours 1 +infocours 1 +inform 1 +cscours 1 +descriptionfrom 1 +guidebook 1 +undergradu 1 +student 1 +construct 1 +algorithm 1 +problem 1 +solv 1 +instruct 1 +experi 1 +least 1 +procedur 1 +orient 1 +pascal 1 +fortran 1 +survei 1 +program 1 +techniqu 1 +prereq 1 +high 1 +school 1 +prepar 1 +colleg 1 +work 1 +statist 1 +logic 1 +consent 1 +instructor 1 +open 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..16dac1e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,64 @@ +section 4 +skrentni 4 +window 3 +home 2 +languag 2 +coordin 2 +email 2 +cours 2 +consult 2 +introduct 2 +inform 2 +borland 2 +pagecomput 1 +scienc 1 +algebra 1 +program 1 +instructorsw 1 +would 1 +like 1 +comment 1 +suggest 1 +complaint 1 +feedback 1 +provid 1 +click 1 +offic 1 +csinform 1 +frequent 1 +ask 1 +question 1 +overview 1 +microcomput 1 +laboratori 1 +fall 1 +schedul 1 +tutor 1 +mainli 1 +polici 1 +academ 1 +misconduct 1 +offer 1 +depart 1 +softwar 1 +microsoft 1 +hint 1 +compil 1 +oper 1 +system 1 +netscap 1 +creat 1 +us 1 +subdirectoriesc 1 +savitch 1 +text 1 +book 1 +integr 1 +develop 1 +environmentfortran 1 +jeff 1 +lampert 1 +page 1 +last 1 +updat 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..b4e94e52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,47 @@ +page 5 +inform 4 +includ 4 +check 3 +assign 3 +copi 3 +home 2 +comput 2 +document 2 +postscript 2 +local 2 +servic 2 +problem 1 +solv 1 +us 1 +fall 1 +scienc 1 +follow 1 +instructor 1 +teach 1 +assist 1 +offic 1 +hour 1 +suggest 1 +explan 1 +grade 1 +polici 1 +work 1 +examin 1 +past 1 +exam 1 +lab 1 +handout 1 +syllabu 1 +mani 1 +need 1 +viewer 1 +obtain 1 +site 1 +section 1 +depart 1 +ghost 1 +directori 1 +read 1 +readm 1 +file 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..b4e94e52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,47 @@ +page 5 +inform 4 +includ 4 +check 3 +assign 3 +copi 3 +home 2 +comput 2 +document 2 +postscript 2 +local 2 +servic 2 +problem 1 +solv 1 +us 1 +fall 1 +scienc 1 +follow 1 +instructor 1 +teach 1 +assist 1 +offic 1 +hour 1 +suggest 1 +explan 1 +grade 1 +polici 1 +work 1 +examin 1 +past 1 +exam 1 +lab 1 +handout 1 +syllabu 1 +mani 1 +need 1 +viewer 1 +obtain 1 +site 1 +section 1 +depart 1 +ghost 1 +directori 1 +read 1 +readm 1 +file 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..b7ee2a5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,104 @@ +section 18 +chapter 15 +class 13 +assign 12 +exam 12 +quiz 12 +html 10 +fall 8 +program 8 +offic 7 +hour 7 +fridai 7 +jerri 5 +phone 5 +mail 5 +wisc 5 +karen 4 +homework 4 +cancel 3 +grade 3 +septemb 3 +novemb 3 +octob 3 +spring 3 +cours 2 +instructor 2 +handout 2 +solut 2 +simul 2 +help 2 +lectur 2 +note 2 +miller 2 +tuth 2 +wednesdai 2 +postscript 2 +syllabu 2 +answer 2 +data 2 +arithmet 2 +home 1 +page 1 +inform 1 +frequent 1 +ask 1 +question 1 +tusch 1 +tutsch 1 +execpc 1 +nolandsect 1 +smoler 1 +time 1 +psycholog 1 +sunlung 1 +suen 1 +ssuen 1 +edusridevi 1 +bhamidipati 1 +bsri 1 +edumohammad 1 +asgarian 1 +mondai 1 +schedul 1 +revis 1 +overview 1 +programs 1 +programm 1 +examsal 1 +quizz 1 +open 1 +book 1 +calcul 1 +probabl 1 +decemb 1 +last 1 +previou 1 +format 1 +summer 1 +midterm 1 +final 1 +lookup 1 +graphic 1 +interfac 1 +manual 1 +noteskaren 1 +number 1 +system 1 +represent 1 +integ 1 +float 1 +point 1 +structur 1 +regist 1 +procedur 1 +updat 1 +assembl 1 +updatedmondai 1 +except 1 +process 1 +featur 1 +perform 1 +architecur 1 +case 1 +studi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..e2b13104 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,115 @@ +section 20 +chapter 15 +assign 14 +quiz 14 +class 13 +exam 12 +html 11 +program 9 +fall 8 +offic 7 +hour 7 +fridai 7 +jerri 5 +phone 5 +mail 5 +wisc 5 +homework 5 +grade 4 +karen 4 +cancel 3 +septemb 3 +novemb 3 +octob 3 +decemb 3 +final 3 +spring 3 +cours 2 +instructor 2 +handout 2 +solut 2 +simul 2 +help 2 +lectur 2 +note 2 +miller 2 +tuth 2 +wednesdai 2 +postscript 2 +syllabu 2 +answer 2 +data 2 +arithmet 2 +home 1 +page 1 +inform 1 +frequent 1 +ask 1 +question 1 +tusch 1 +tutsch 1 +execpc 1 +nolandsect 1 +smoler 1 +time 1 +psycholog 1 +sunlung 1 +suen 1 +ssuen 1 +edusridevi 1 +bhamidipati 1 +bsri 1 +edumohammad 1 +asgarian 1 +mondai 1 +schedul 1 +revis 1 +overview 1 +programs 1 +programm 1 +programa 1 +programb 1 +examsal 1 +quizz 1 +open 1 +book 1 +calcul 1 +probabl 1 +last 1 +option 1 +thursdai 1 +difficult 1 +cumul 1 +offer 1 +desperateto 1 +rais 1 +sign 1 +advanc 1 +previou 1 +format 1 +summer 1 +midterm 1 +lookup 1 +graphic 1 +interfac 1 +manual 1 +noteskaren 1 +number 1 +system 1 +represent 1 +integ 1 +float 1 +point 1 +structur 1 +regist 1 +procedur 1 +updat 1 +assembl 1 +updatedmondai 1 +except 1 +process 1 +featur 1 +perform 1 +architecur 1 +case 1 +studi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..ea02c349 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,126 @@ +page 17 +lectur 16 +chapter 13 +sampl 10 +code 6 +comput 6 +onlin 6 +offic 6 +announc 5 +assign 5 +exam 5 +search 4 +list 4 +skrentni 4 +wisc 4 +inform 3 +get 3 +program 3 +place 3 +tree 3 +copi 3 +error 3 +basic 3 +scienc 3 +hour 3 +data 2 +read 2 +lab 2 +queue 2 +stack 2 +stale 2 +attend 2 +thur 2 +futur 2 +tabl 2 +skip 2 +sort 2 +algorithm 2 +structur 1 +lec 1 +introduct 1 +structureslectur 1 +psychologylectur 1 +psychologycours 1 +start 1 +help 1 +cours 1 +materi 1 +home 1 +gener 1 +recent 1 +first 1 +problem 1 +found 1 +locat 1 +binari 1 +last 1 +makeup 1 +done 1 +solut 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +topic 1 +handin 1 +directori 1 +creat 1 +common 1 +suggest 1 +addit 1 +welcom 1 +either 1 +vega 1 +work 1 +line 1 +mondai 1 +wednesdai 1 +magic 1 +number 1 +sourc 1 +file 1 +must 1 +abl 1 +compil 1 +otherwis 1 +unusu 1 +look 1 +forget 1 +reload 1 +updat 1 +browser 1 +cach 1 +becom 1 +outdat 1 +unix 1 +tutori 1 +need 1 +time 1 +balanc 1 +discuss 1 +comparison 1 +implement 1 +simul 1 +overload 1 +oper 1 +hash 1 +link 1 +pointer 1 +dynam 1 +memori 1 +alloc 1 +analysi 1 +recurs 1 +focu 1 +appendix 1 +teach 1 +assist 1 +baicheng 1 +billi 1 +liao 1 +bail 1 +cheng 1 +jiacheng 1 +pmcopyright 1 +jame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..594d372d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,548 @@ +program 38 +assign 14 +cours 12 +lectur 10 +exam 10 +note 10 +comput 9 +inform 8 +data 7 +home 7 +help 7 +document 7 +scienc 7 +time 7 +comment 7 +wisc 6 +offic 6 +hour 6 +text 6 +unix 6 +avail 6 +compil 6 +must 6 +email 5 +yanni 5 +women 5 +also 5 +take 5 +problem 5 +includ 5 +follow 5 +dewitt 5 +semest 5 +read 5 +machin 5 +possibl 5 +code 5 +name 5 +address 4 +grade 4 +late 4 +style 4 +want 4 +student 4 +materi 4 +often 4 +account 4 +final 4 +gener 4 +class 4 +algorithm 4 +clariti 4 +outputfil 4 +tuesdai 3 +schedul 3 +intern 3 +us 3 +develop 3 +cycl 3 +section 3 +print 3 +group 3 +encourag 3 +major 3 +like 3 +extra 3 +mail 3 +true 3 +wall 3 +well 3 +cover 3 +first 3 +debug 3 +function 3 +done 3 +copi 3 +turn 3 +make 3 +chang 3 +avoid 3 +need 3 +understand 3 +work 3 +effici 3 +paramet 3 +limit 3 +identifi 3 +statement 3 +line 3 +continu 3 +variabl 3 +introduct 2 +page 2 +http 2 +languag 2 +cheat 2 +extern 2 +readi 2 +notat 2 +binari 2 +graduat 2 +call 2 +would 2 +suzan 2 +write 2 +thiscours 2 +pascal 2 +requir 2 +book 2 +mirror 2 +complet 2 +still 2 +addit 2 +doit 2 +desk 2 +near 2 +dayton 2 +build 2 +experi 2 +edit 2 +file 2 +find 2 +list 2 +topic 2 +provid 2 +later 2 +design 2 +room 2 +thec 2 +univers 2 +electron 2 +sure 2 +assum 2 +simpli 2 +start 2 +know 2 +earli 2 +explain 2 +correct 2 +behav 2 +situat 2 +test 2 +case 2 +modular 2 +necessari 2 +consist 2 +suggest 2 +valu 2 +indent 2 +long 2 +begin 2 +user 2 +give 2 +descript 2 +tell 2 +assumpt 2 +describ 2 +neg 2 +main 2 +header 2 +declar 2 +segment 2 +error 2 +inputfil 2 +structuresfal 1 +htmlinstructor 1 +ioannidi 1 +sciencesoffic 1 +thursdai 1 +amoffic 1 +phone 1 +html 1 +content 1 +new 1 +teach 1 +assist 1 +polici 1 +newsassign 1 +midterm 1 +statisticssom 1 +interest 1 +statist 1 +median 1 +mean 1 +midterma 1 +sampl 1 +oldmidterm 1 +prepar 1 +ownmidterm 1 +searchth 1 +search 1 +either 1 +open 1 +filemenu 1 +ghostview 1 +window 1 +show 1 +andchoos 1 +menu 1 +item 1 +sciencesom 1 +femal 1 +faculti 1 +undergradu 1 +haveform 1 +wic 1 +oneof 1 +goal 1 +becomecomput 1 +thisclass 1 +talk 1 +someon 1 +incomput 1 +studi 1 +withtheir 1 +classwork 1 +computersci 1 +grad 1 +tomak 1 +appoint 1 +stodder 1 +eduand 1 +grow 1 +tremend 1 +field 1 +theodd 1 +ever 1 +anoth 1 +end 1 +abl 1 +statementi 1 +wide 1 +aniniti 1 +startup 1 +period 1 +product 1 +exceptionsy 1 +textth 1 +isdata 1 +abstract 1 +solv 1 +frank 1 +carrano 1 +isbn 1 +written 1 +separ 1 +notnecessari 1 +alwai 1 +fall 1 +david 1 +actual 1 +consider 1 +simpl 1 +lecturenot 1 +short 1 +isveri 1 +littl 1 +narr 1 +exercis 1 +recommend 1 +sourc 1 +purchas 1 +whichar 1 +street 1 +entranceof 1 +needsom 1 +activ 1 +log 1 +creat 1 +manipul 1 +run 1 +handoutc 1 +notesar 1 +contain 1 +invalu 1 +mention 1 +althoughi 1 +supplement 1 +handout 1 +courseof 1 +nonetheless 1 +respons 1 +base 1 +onth 1 +andth 1 +gradingther 1 +even 1 +five 1 +determin 1 +approxim 1 +equal 1 +weight 1 +programmingassign 1 +count 1 +octob 1 +chemistri 1 +wednesdai 1 +decemb 1 +place 1 +detail 1 +administr 1 +familiar 1 +basic 1 +stuff 1 +apoint 1 +record 1 +equival 1 +madison 1 +prerequisitecours 1 +thesear 1 +floor 1 +prefer 1 +certainrestrict 1 +emailand 1 +thatyou 1 +youwork 1 +provis 1 +download 1 +toyour 1 +runwith 1 +sparcstat 1 +notifi 1 +inassign 1 +hint 1 +allelectron 1 +send 1 +policyno 1 +accept 1 +exactli 1 +order 1 +caus 1 +load 1 +coincid 1 +duedat 1 +sever 1 +right 1 +awai 1 +oneach 1 +thing 1 +certain 1 +wrong 1 +wait 1 +thelast 1 +minut 1 +except 1 +approv 1 +good 1 +excus 1 +troubl 1 +soon 1 +cheatingth 1 +depart 1 +hard 1 +linest 1 +welcom 1 +tocommun 1 +datastructur 1 +butther 1 +share 1 +expect 1 +learn 1 +obei 1 +thecomput 1 +system 1 +policiesgovern 1 +helpif 1 +pleas 1 +policiesif 1 +best 1 +tovisit 1 +along 1 +currenthard 1 +intend 1 +conceptsthat 1 +present 1 +confus 1 +answer 1 +specif 1 +question 1 +reliabl 1 +contact 1 +respond 1 +emailsever 1 +daili 1 +almost 1 +everi 1 +week 1 +gradingprogram 1 +criteria 1 +correctli 1 +normal 1 +typicalinput 1 +state 1 +projectspecif 1 +easi 1 +informationabout 1 +robust 1 +behavior 1 +extrem 1 +unusu 1 +handl 1 +reason 1 +andlog 1 +manner 1 +blow 1 +qualiti 1 +shoulddemonstr 1 +facet 1 +capabl 1 +includingunusu 1 +unnecessarili 1 +ineffici 1 +construct 1 +howev 1 +never 1 +pursu 1 +expens 1 +effect 1 +useof 1 +incorpor 1 +sort 1 +paper 1 +subject 1 +considerationof 1 +arbitrari 1 +bound 1 +size 1 +orcomplex 1 +input 1 +whenev 1 +express 1 +definedconst 1 +easili 1 +numer 1 +liter 1 +appear 1 +thosevalu 1 +styleus 1 +meaning 1 +scheme 1 +convent 1 +variable_nam 1 +function_nam 1 +argument 1 +const 1 +defined_const 1 +enum 1 +enumtyp 1 +classnam 1 +multipl 1 +singl 1 +skip 1 +clear 1 +notesfor 1 +loop 1 +label 1 +meaningfulli 1 +documentationthi 1 +yourprogram 1 +typic 1 +someonewho 1 +superfici 1 +full 1 +format 1 +bug 1 +special 1 +featur 1 +made 1 +posit 1 +aspect 1 +unawar 1 +descriptionne 1 +repeat 1 +briefli 1 +summar 1 +point 1 +refer 1 +thensuffici 1 +appli 1 +documentationther 1 +four 1 +type 1 +structuresshould 1 +purpos 1 +outlin 1 +next 1 +membershould 1 +convei 1 +sname 1 +much 1 +withoutmak 1 +suppli 1 +exampl 1 +index 1 +last 1 +element 1 +ad 1 +stackyou 1 +local 1 +within 1 +tricki 1 +opaqu 1 +beavoid 1 +sometim 1 +commentcan 1 +reader 1 +go 1 +clarifi 1 +level 1 +outlineof 1 +vimani 1 +peopl 1 +thefirst 1 +becomecomfort 1 +particularli 1 +youronli 1 +previou 1 +macpasc 1 +macintosh 1 +strongli 1 +urg 1 +inth 1 +becom 1 +comfort 1 +withunix 1 +pain 1 +wellspent 1 +wish 1 +attend 1 +tutori 1 +held 1 +comp 1 +session 1 +thefollow 1 +dai 1 +tbayou 1 +pick 1 +environ 1 +look 1 +output 1 +break 1 +tire 1 +goto 1 +quit 1 +result 1 +submiss 1 +instruct 1 +given 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..32873ba4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,284 @@ +assign 20 +program 20 +lectur 15 +data 12 +cours 11 +exam 11 +structur 10 +note 9 +text 8 +wisc 6 +cover 6 +also 6 +inform 6 +us 5 +materi 5 +languag 5 +comput 4 +tuesdai 4 +abstract 4 +write 4 +chin 4 +section 4 +grade 4 +follow 4 +even 4 +http 3 +html 3 +fall 3 +offic 3 +hour 3 +teach 3 +cchin 3 +page 3 +book 3 +often 3 +dewitt 3 +electron 3 +final 3 +line 3 +introduct 2 +jame 2 +scienc 2 +laru 2 +fridai 2 +present 2 +concept 2 +detail 2 +build 2 +necessari 2 +determin 2 +requir 2 +type 2 +principl 2 +maintain 2 +place 2 +exercis 2 +tang 2 +sciencesoffic 2 +wednesdai 2 +thursdai 2 +phone 2 +email 2 +address 2 +written 2 +littl 2 +simpl 2 +contain 2 +addit 2 +avail 2 +doit 2 +desk 2 +dayton 2 +first 2 +experi 2 +regularli 2 +read 2 +mail 2 +schedul 2 +topic 2 +second 2 +revis 1 +larusinstructor 1 +laruslaru 1 +amcontentsteach 1 +assistantstextlectur 1 +informationelectron 1 +mailth 1 +languagegradingexamscours 1 +scheduleassign 1 +assignmentscours 1 +objectivesc 1 +object 1 +gener 1 +wide 1 +fundament 1 +block 1 +abl 1 +identifi 1 +situat 1 +select 1 +appropri 1 +reiter 1 +modular 1 +introduc 1 +essenti 1 +clear 1 +correct 1 +softwar 1 +close 1 +connect 1 +strong 1 +emphasi 1 +appli 1 +assistantswei 1 +zhang 1 +assist 1 +forthi 1 +homework 1 +assignmentsand 1 +happi 1 +answer 1 +question 1 +theassign 1 +aspect 1 +give 1 +troubl 1 +zhangoffic 1 +compuer 1 +sundai 1 +weiz 1 +mondai 1 +amoffic 1 +home 1 +textth 1 +problem 1 +solv 1 +wall 1 +mirror 1 +frank 1 +carrano 1 +isbn 1 +well 1 +long 1 +wind 1 +includ 1 +background 1 +separ 1 +alwai 1 +david 1 +complet 1 +short 1 +true 1 +narr 1 +basi 1 +feel 1 +free 1 +skip 1 +portion 1 +want 1 +purchas 1 +document 1 +street 1 +entranc 1 +unix 1 +need 1 +activ 1 +account 1 +log 1 +creat 1 +edit 1 +manipul 1 +file 1 +compil 1 +run 1 +debug 1 +handout 1 +crucial 1 +help 1 +psycholog 1 +mention 1 +attend 1 +strongli 1 +recommend 1 +appear 1 +textbook 1 +needless 1 +respons 1 +base 1 +maili 1 +notifi 1 +student 1 +chang 1 +hint 1 +assum 1 +gradingther 1 +semest 1 +five 1 +approxim 1 +equal 1 +weight 1 +count 1 +taught 1 +must 1 +know 1 +skrentni 1 +larg 1 +complex 1 +unless 1 +difficult 1 +learn 1 +anoth 1 +gdbthere 1 +describ 1 +debugg 1 +chemistri 1 +decemb 1 +rough 1 +outlin 1 +provid 1 +later 1 +administrationbas 1 +stuff 1 +function 1 +pointer 1 +record 1 +dynam 1 +storagelectur 1 +list 1 +binari 1 +search 1 +notat 1 +advanc 1 +listslectur 1 +stackslectur 1 +queueslectur 1 +hashinglectur 1 +recursionlectur 1 +treesbinari 1 +tree 1 +sort 1 +searchlectur 1 +treesgraphslectur 1 +sortinglectur 1 +tbaassign 1 +absolut 1 +turn 1 +index 1 +card 1 +name 1 +login 1 +nameyear 1 +school 1 +freshman 1 +sophomor 1 +previou 1 +coursesprevi 1 +experiencerec 1 +photograph 1 +pictur 1 +birthdai 1 +girl 1 +scout 1 +trip 1 +summer 1 +color 1 +black 1 +white 1 +size 1 +given 1 +without 1 +photo 1 +byte 1 +fora 1 +bound 1 +integ 1 +sequenc 1 +databaseof 1 +score 1 +tenni 1 +tournament 1 +produc 1 +aconcord 1 +hash 1 +tabl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..3eb2d58e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,121 @@ +page 15 +lectur 14 +chapter 11 +sampl 8 +comput 6 +offic 6 +announc 5 +assign 5 +exam 5 +code 5 +onlin 5 +list 4 +skrentni 4 +wisc 4 +inform 3 +get 3 +program 3 +place 3 +copi 3 +error 3 +basic 3 +scienc 3 +hour 3 +data 2 +read 2 +lab 2 +queue 2 +stack 2 +stale 2 +attend 2 +thur 2 +skip 2 +sort 2 +search 2 +algorithm 2 +structur 1 +lec 1 +introduct 1 +structureslectur 1 +psychologylectur 1 +psychologycours 1 +start 1 +help 1 +cours 1 +materi 1 +home 1 +gener 1 +recent 1 +first 1 +problem 1 +found 1 +locat 1 +last 1 +makeup 1 +done 1 +solut 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +topic 1 +handin 1 +directori 1 +creat 1 +common 1 +suggest 1 +addit 1 +welcom 1 +either 1 +vega 1 +work 1 +line 1 +mondai 1 +wednesdai 1 +magic 1 +number 1 +sourc 1 +file 1 +must 1 +abl 1 +compil 1 +otherwis 1 +unusu 1 +look 1 +forget 1 +reload 1 +updat 1 +browser 1 +cach 1 +becom 1 +outdat 1 +unix 1 +tutori 1 +need 1 +time 1 +futur 1 +tree 1 +simul 1 +overload 1 +oper 1 +hash 1 +tabl 1 +link 1 +pointer 1 +dynam 1 +memori 1 +alloc 1 +analysi 1 +recurs 1 +focu 1 +appendix 1 +teach 1 +assist 1 +baicheng 1 +billi 1 +liao 1 +bail 1 +cheng 1 +jiacheng 1 +pmcopyright 1 +jame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..e30b6c19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,55 @@ +comput 4 +offic 4 +page 4 +introduct 3 +scienc 3 +lectur 3 +home 3 +theoret 2 +brian 2 +cole 2 +email 2 +wisc 2 +hour 2 +sundaram 2 +assign 2 +examin 2 +madison 2 +fall 1 +room 1 +mondai 1 +fridai 1 +teach 1 +assist 1 +david 1 +stukel 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +text 1 +languag 1 +theori 1 +john 1 +martin 1 +north 1 +dakota 1 +state 1 +univers 1 +mcgraw 1 +hill 1 +isbn 1 +tent 1 +schedul 1 +includ 1 +exam 1 +inform 1 +clarif 1 +grade 1 +polici 1 +written 1 +term 1 +final 1 +archiv 1 +mail 1 +list 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..2cae2949 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,72 @@ +homework 12 +exam 11 +midterm 7 +octob 7 +stat 4 +final 4 +offic 4 +cours 4 +septemb 4 +march 4 +solut 4 +book 3 +decemb 3 +program 3 +novemb 3 +sampl 3 +also 2 +math 2 +fall 2 +linear 2 +mechan 2 +engin 2 +open 2 +time 2 +date 2 +locat 2 +wednesdai 2 +olvi 2 +mangasarian 2 +comp 2 +mail 2 +wisc 2 +hour 2 +semest 2 +matlab 2 +inform 2 +page 2 +site 2 +programmingfal 1 +schedul 1 +lectur 1 +thursdai 1 +instructor 1 +pphone 1 +teach 1 +assist 1 +telephon 1 +textbook 1 +ferri 1 +preliminari 1 +version 1 +doit 1 +madison 1 +syllabu 1 +overview 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +setup 1 +project 1 +mathemat 1 +home 1 +relev 1 +searchabl 1 +bibliograph 1 +databas 1 +item 1 +link 1 +variou 1 +updat 1 +period 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..ab9da90d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,68 @@ +offic 4 +program 3 +lectur 2 +horwitz 2 +telephon 2 +mail 2 +wisc 2 +hour 2 +appoint 2 +rahul 2 +compil 2 +tool 2 +cours 2 +inform 2 +assign 2 +grade 2 +fall 1 +introduct 1 +languag 1 +compilersspr 1 +stori 1 +month 1 +octob 1 +schedul 1 +tuth 1 +comp 1 +stat 1 +recit 1 +psycholog 1 +instructor 1 +susan 1 +tuesdai 1 +fridai 1 +teach 1 +assist 1 +kapoor 1 +mondai 1 +wednesdai 1 +text 1 +reserv 1 +wendt 1 +librari 1 +principl 1 +techniqu 1 +sethi 1 +ullman 1 +craft 1 +fischer 1 +leblanc 1 +check 1 +regularli 1 +gener 1 +overview 1 +date 1 +exam 1 +includ 1 +late 1 +polici 1 +get 1 +start 1 +read 1 +homework 1 +examin 1 +note 1 +us 1 +email 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..2ff98958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,526 @@ +java 26 +project 23 +sept 17 +avail 12 +file 12 +program 11 +schedul 10 +languag 9 +introduct 8 +system 8 +comput 8 +specif 8 +implement 8 +process 8 +midterm 7 +exam 7 +deadlock 7 +section 7 +assign 7 +lectur 7 +manag 7 +memori 7 +oper 6 +grade 6 +note 6 +class 6 +page 6 +first 6 +public 6 +cours 6 +synchron 6 +solomon 5 +room 5 +wisc 4 +summari 4 +correct 4 +take 4 +time 4 +differ 4 +semest 4 +discuss 4 +thank 4 +point 4 +threadschedul 4 +start 4 +directori 4 +fork 4 +string 4 +messag 4 +final 4 +disk 4 +refer 4 +us 4 +chapter 4 +updat 3 +detail 3 +also 3 +forproject 3 +avaiabl 3 +avoid 3 +call 3 +place 3 +text 3 +topic 3 +quit 3 +hand 3 +algorithm 3 +theproject 3 +creat 3 +three 3 +graph 3 +graphcontain 3 +number 3 +onlin 3 +student 3 +receiv 3 +makefil 3 +help 3 +seminar 3 +unix 3 +follow 3 +processor 3 +wednesdai 3 +collect 3 +late 3 +dai 3 +monitor 3 +count 3 +fall 2 +offic 2 +sciencesoffic 2 +hour 2 +phone 2 +email 2 +address 2 +new 2 +answer 2 +thursdai 2 +error 2 +other 2 +popular 2 +cover 2 +comp 2 +octob 2 +issu 2 +contain 2 +fix 2 +import 2 +caus 2 +second 2 +sampl 2 +code 2 +sched 2 +later 2 +data 2 +read 2 +peterson 2 +initi 2 +placement 2 +star 2 +philosoph 2 +todd 2 +thejava 2 +tutori 2 +argument 2 +substr 2 +work 2 +sourc 2 +type 2 +make 2 +run 2 +solari 2 +finish 2 +structur 2 +meet 2 +mondai 2 +book 2 +bookstor 2 +begin 2 +depart 2 +workstat 2 +must 2 +cshrc 2 +local 2 +path 2 +back 2 +thur 2 +inform 2 +relat 2 +softwar 2 +prevent 2 +recoveri 2 +main 2 +virtual 2 +swap 2 +devic 2 +protect 2 +secur 2 +tuesdai 2 +includ 2 +thejavaprogram 2 +requir 2 +strongli 2 +languagebi 2 +arnold 2 +gosl 2 +addit 2 +access 2 +scienc 2 +becom 2 +familiar 2 +environ 2 +pair 2 +anyon 2 +caught 2 +garbag 2 +histori 2 +resourc 2 +get 2 +view 2 +semaphor 2 +systemssect 1 +instructormarvin 1 +troffic 1 +tarob 1 +mellencamp 1 +mwfoffic 1 +mellen 1 +watch 1 +space 1 +latest 1 +score 1 +breakdown 1 +distributioni 1 +date 1 +move 1 +typograph 1 +importantli 1 +arraywa 1 +demand 1 +isavail 1 +look 1 +warn 1 +exampl 1 +larg 1 +grain 1 +salt 1 +long 1 +courseus 1 +order 1 +likelyb 1 +determin 1 +presentedin 1 +electron 1 +direct 1 +forprogram 1 +post 1 +procedur 1 +givefork 1 +specificationshould 1 +notifi 1 +show 1 +bug 1 +minor 1 +paragraph 1 +slightli 1 +garbl 1 +jake 1 +dawlei 1 +carr 1 +line 1 +omit 1 +theprogram 1 +detailssect 1 +specifi 1 +correctli 1 +lipe 1 +zhang 1 +test 1 +srccontain 1 +javacontain 1 +definit 1 +classgraphdescrib 1 +petersoncycl 1 +shown 1 +mention 1 +notacycl 1 +petersonacycl 1 +acycl 1 +topolog 1 +central 1 +sharingfork 1 +nine 1 +jenner 1 +typo 1 +maxthink 1 +replac 1 +maxeat 1 +versionha 1 +mistak 1 +version 1 +argumenti 1 +offset 1 +charactersin 1 +franco 1 +tung 1 +chan 1 +occasion 1 +send 1 +urgent 1 +directli 1 +mail 1 +listof 1 +regist 1 +archiv 1 +sent 1 +list 1 +think 1 +sendmail 1 +request 1 +copi 1 +rememb 1 +separ 1 +edit 1 +describ 1 +comment 1 +maketo 1 +compil 1 +compilewithout 1 +simul 1 +preemptiv 1 +multitask 1 +computershav 1 +ad 1 +tutoriali 1 +go 1 +onthread 1 +find 1 +hint 1 +awar 1 +weekli 1 +systemsand 1 +network 1 +checkth 1 +colloquia 1 +theunivers 1 +ajava 1 +afil 1 +name 1 +home 1 +onelin 1 +chang 1 +effect 1 +either 1 +localor 1 +simpli 1 +readi 1 +orient 1 +session 1 +user 1 +forth 1 +tue 1 +csmon 1 +cslast 1 +content 1 +intend 1 +gener 1 +techniqu 1 +usedto 1 +kind 1 +among 1 +beprocess 1 +creation 1 +commun 1 +segment 1 +replacementalgorithm 1 +control 1 +input 1 +output 1 +sciencesand 1 +statisticsdiscuss 1 +psychologyth 1 +option 1 +least 1 +primari 1 +focu 1 +anyquest 1 +regard 1 +rais 1 +thetext 1 +modern 1 +systemsbi 1 +andrew 1 +tanenbaum 1 +prentic 1 +hall 1 +recommend 1 +jame 1 +addison 1 +weslei 1 +lot 1 +materi 1 +fast 1 +tutorialth 1 +specificationjava 1 +documentationwatch 1 +spot 1 +link 1 +five 1 +sparcstat 1 +dialect 1 +unixoper 1 +provid 1 +anycomput 1 +programminglanguag 1 +howev 1 +respons 1 +transfer 1 +requireddata 1 +set 1 +packag 1 +easi 1 +acquaint 1 +exercis 1 +designedto 1 +subsequ 1 +involveprocess 1 +member 1 +feel 1 +free 1 +butyou 1 +share 1 +partner 1 +cheat 1 +vigor 1 +punish 1 +enough 1 +said 1 +dateind 1 +entir 1 +havethre 1 +daysof 1 +credit 1 +eachof 1 +last 1 +choos 1 +sever 1 +favor 1 +congeni 1 +runtim 1 +subscript 1 +null 1 +pointer 1 +uniniti 1 +variabl 1 +except 1 +runtimerath 1 +mysteri 1 +crash 1 +random 1 +behavior 1 +much 1 +easier 1 +char 1 +arrai 1 +storag 1 +extrem 1 +handi 1 +trendi 1 +faster 1 +mani 1 +reason 1 +grow 1 +littl 1 +withth 1 +byproduct 1 +coursewil 1 +knowledg 1 +market 1 +commod 1 +featur 1 +built 1 +particular 1 +wide 1 +withlanguag 1 +level 1 +support 1 +concurr 1 +thread 1 +switch 1 +alwaysa 1 +disloc 1 +fortun 1 +excel 1 +eas 1 +thetransit 1 +amazingli 1 +good 1 +neither 1 +introductori 1 +primer 1 +author 1 +assum 1 +youalreadi 1 +know 1 +manual 1 +although 1 +arefer 1 +manuali 1 +readabl 1 +wayfrom 1 +everyth 1 +need 1 +write 1 +sophisticatedprogram 1 +univers 1 +encourag 1 +gather 1 +varieti 1 +ofoth 1 +togeth 1 +niceonlin 1 +tutorialabout 1 +anda 1 +manualfor 1 +standard 1 +librari 1 +even 1 +statist 1 +timet 1 +decemb 1 +yourgrad 1 +remain 1 +four 1 +tent 1 +check 1 +frequent 1 +learn 1 +javaoct 1 +synchronizationoct 1 +schedulingoct 1 +schedulingdec 1 +systemsdec 1 +bottom 1 +outlin 1 +programm 1 +state 1 +race 1 +condit 1 +bound 1 +buffer 1 +problem 1 +dine 1 +terminolog 1 +detect 1 +critic 1 +short 1 +term 1 +alloc 1 +compact 1 +come 1 +eduthu 1 +copyright 1 +marvin 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..d7193474 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,88 @@ +system 4 +thread 3 +home 2 +introduct 2 +thursdai 2 +discuss 2 +fridai 2 +page 2 +concurr 2 +address 2 +manag 2 +memori 2 +protect 2 +file 2 +java 2 +distribut 2 +fall 1 +pagec 1 +oper 1 +systemsfal 1 +tuesdai 1 +host 1 +maryvernon 1 +instructor 1 +andkarunamuthiah 1 +welcom 1 +note 1 +lectur 1 +beinterchang 1 +follow 1 +date 1 +solut 1 +quiz 1 +assign 1 +offic 1 +hour 1 +email 1 +textbook 1 +read 1 +grade 1 +project 1 +quizz 1 +mail 1 +archiveapproxim 1 +schedul 1 +topicsweek 1 +oftopicsreadingsep 1 +space 1 +processeschapt 1 +cooper 1 +threadschapt 1 +synchron 1 +implement 1 +mutual 1 +exclusioncont 1 +semaphorescont 1 +monitor 1 +summarycont 1 +doct 1 +deadlock 1 +process 1 +schedulingchapt 1 +translat 1 +cach 1 +tlbschapter 1 +demand 1 +virtual 1 +memorycont 1 +review 1 +survei 1 +systemschapt 1 +name 1 +directorieschapt 1 +object 1 +core 1 +methodstbanov 1 +secur 1 +thanksgiv 1 +class 1 +network 1 +remot 1 +procedur 1 +call 1 +chapter 1 +global 1 +reviewchapt 1 +vernon 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..e88604cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,60 @@ +assign 11 +midterm 8 +spring 6 +project 5 +fall 4 +offic 4 +hour 4 +exam 4 +tuesdai 4 +answer 4 +cours 3 +inform 3 +select 3 +solut 3 +instructor 2 +handout 2 +david 2 +wednesdai 2 +appoint 2 +mail 2 +wisc 2 +phone 2 +time 2 +thursdai 2 +atkinson 2 +get 2 +start 2 +descript 2 +vhdl 2 +room 2 +lectur 1 +home 1 +page 1 +grade 1 +wood 1 +class 1 +locat 1 +phil 1 +help 1 +mentor 1 +error 1 +check 1 +correct 1 +sampl 1 +code 1 +compil 1 +simul 1 +mentorassign 1 +question 1 +projectthi 1 +section 1 +includ 1 +deadlin 1 +report 1 +demonstr 1 +decemb 1 +examsth 1 +final 1 +previou 1 +endterm 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..35e98670 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,80 @@ +assign 11 +postscript 8 +offic 6 +last 6 +handout 6 +hour 4 +info 4 +updat 3 +mail 3 +databas 2 +manag 2 +system 2 +design 2 +inform 2 +class 2 +fridai 2 +time 2 +solut 2 +chapter 2 +exercis 2 +grade 2 +import 2 +html 2 +year 2 +midterm 2 +sybas 2 +tree 2 +raghu 2 +phone 2 +lectur 2 +xbao 2 +implementationc 1 +implementationcours 1 +version 1 +instead 1 +chang 1 +text 1 +list 1 +pleas 1 +dont 1 +print 1 +first 1 +overview 1 +prerequisit 1 +topic 1 +cover 1 +date 1 +polici 1 +issu 1 +minibas 1 +home 1 +page 1 +check 1 +detail 1 +sampl 1 +us 1 +help 1 +yahoo 1 +entri 1 +resourc 1 +tutori 1 +debugg 1 +languag 1 +construct 1 +experi 1 +assignmentoth 1 +code 1 +convent 1 +instructor 1 +ramakrishnan 1 +discuss 1 +place 1 +ingraham 1 +teach 1 +assist 1 +xuemei 1 +tue 1 +thur 1 +modifi 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..d3f50956 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,47 @@ +lectur 7 +page 4 +discuss 3 +home 2 +know 2 +russel 2 +option 2 +psycholog 2 +time 2 +place 2 +inform 2 +assign 2 +welcom 1 +obvious 1 +construct 1 +semest 1 +progress 1 +addinginform 1 +need 1 +import 1 +thing 1 +class 1 +meetingroom 1 +chang 1 +current 1 +meet 1 +labsfor 1 +fridai 1 +beenmov 1 +still 1 +instructor 1 +jeff 1 +naughton 1 +offic 1 +wednesdai 1 +lab 1 +taught 1 +close 1 +cooper 1 +fact 1 +probabl 1 +exam 1 +gener 1 +minibas 1 +particular 1 +pleas 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..77b1e9dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,33 @@ +homework 6 +cours 4 +mail 4 +offic 3 +phone 3 +wisc 3 +hour 3 +introduct 2 +algorithm 2 +bach 2 +teach 2 +assist 2 +raji 2 +inform 1 +instructor 1 +eric 1 +appt 1 +bill 1 +donaldson 1 +gopalakrishnan 1 +midterm 1 +exam 1 +handout 1 +descript 1 +syllabu 1 +book 1 +reserv 1 +organ 1 +solut 1 +graph 1 +fractal 1 +behaviour 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..0b49f466 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,112 @@ +network 8 +comput 6 +cours 6 +email 5 +wisc 5 +fall 5 +project 5 +assign 4 +program 4 +postscript 4 +teach 3 +assist 3 +mail 3 +archiv 3 +refer 3 +eduoffic 3 +hour 3 +slide 3 +midterm 3 +advanc 2 +syllabu 2 +version 2 +comp 2 +phone 2 +time 2 +conveni 2 +feel 2 +free 2 +appoint 2 +layer 2 +powerpoint 2 +document 2 +form 2 +exam 2 +socket 2 +interfac 2 +lectur 2 +professor 1 +landweb 1 +introduct 1 +networksintroduct 1 +tabl 1 +content 1 +intern 1 +connect 1 +offer 1 +inform 1 +instructor 1 +select 1 +readingsclick 1 +hereto 1 +latest 1 +text 1 +networkingcours 1 +madisoncours 1 +informationlecturetim 1 +mwfplace 1 +statclass 1 +listinstructor 1 +lawrenc 1 +landweberoffic 1 +statphon 1 +srinivasa 1 +narayananoffic 1 +mondai 1 +wednesdai 1 +teitelbaumoffic 1 +naemail 1 +tuesdai 1 +thursdai 1 +moder 1 +complet 1 +error 1 +warn 1 +code 1 +class 1 +implement 1 +reliabl 1 +adapt 1 +handout 1 +overview 1 +softwar 1 +engin 1 +design 1 +evalu 1 +html 1 +pictur 1 +grade 1 +criteria 1 +gradingmidterm 1 +final 1 +term 1 +prior 1 +option 1 +book 1 +unix 1 +steven 1 +richard 1 +prentic 1 +hall 1 +isbn 1 +garbler 1 +packag 1 +annot 1 +bibliographyread 1 +partial 1 +icmp 1 +ospf 1 +ipng 1 +schedul 1 +spring 1 +review 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..c10ff890 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,21 @@ +homework 4 +fall 2 +solut 2 +note 2 +comput 2 +project 2 +part 2 +home 1 +page 1 +instructor 1 +robert 1 +meyer 1 +wisc 1 +time 1 +place 1 +comp 1 +offic 1 +hour 1 +cours 1 +descript 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..12219570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,92 @@ +program 6 +homework 6 +nonlinear 5 +cours 5 +wisc 4 +offic 4 +convex 4 +optim 4 +differenti 4 +assign 4 +grade 4 +mail 3 +function 3 +criteria 3 +theori 2 +ferri 2 +telephon 2 +hour 2 +wednesdai 2 +text 2 +second 2 +gener 2 +concav 2 +order 2 +examin 2 +page 2 +fall 1 +also 1 +math 1 +stat 1 +applicationsfal 1 +schedul 1 +lectur 1 +list 1 +http 1 +instructor 1 +michael 1 +mondai 1 +tuesdai 1 +teach 1 +assist 1 +thursdai 1 +class 1 +olvi 1 +mangasarian 1 +siam 1 +publish 1 +philadelphia 1 +us 1 +algorithm 1 +bazaraa 1 +sherali 1 +shetti 1 +edit 1 +wilei 1 +york 1 +bertseka 1 +athena 1 +scientif 1 +inform 1 +overview 1 +introduct 1 +linear 1 +inequ 1 +theorem 1 +altern 1 +set 1 +saddlepoint 1 +without 1 +first 1 +dualiti 1 +condit 1 +exact 1 +penalti 1 +augment 1 +lagrangian 1 +gradient 1 +project 1 +book 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +week 1 +midterm 1 +novemb 1 +final 1 +mathemat 1 +home 1 +updat 1 +period 1 +semest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..12219570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,92 @@ +program 6 +homework 6 +nonlinear 5 +cours 5 +wisc 4 +offic 4 +convex 4 +optim 4 +differenti 4 +assign 4 +grade 4 +mail 3 +function 3 +criteria 3 +theori 2 +ferri 2 +telephon 2 +hour 2 +wednesdai 2 +text 2 +second 2 +gener 2 +concav 2 +order 2 +examin 2 +page 2 +fall 1 +also 1 +math 1 +stat 1 +applicationsfal 1 +schedul 1 +lectur 1 +list 1 +http 1 +instructor 1 +michael 1 +mondai 1 +tuesdai 1 +teach 1 +assist 1 +thursdai 1 +class 1 +olvi 1 +mangasarian 1 +siam 1 +publish 1 +philadelphia 1 +us 1 +algorithm 1 +bazaraa 1 +sherali 1 +shetti 1 +edit 1 +wilei 1 +york 1 +bertseka 1 +athena 1 +scientif 1 +inform 1 +overview 1 +introduct 1 +linear 1 +inequ 1 +theorem 1 +altern 1 +set 1 +saddlepoint 1 +without 1 +first 1 +dualiti 1 +condit 1 +exact 1 +penalti 1 +augment 1 +lagrangian 1 +gradient 1 +project 1 +book 1 +reserv 1 +kurt 1 +wendt 1 +librari 1 +week 1 +midterm 1 +novemb 1 +final 1 +mathemat 1 +home 1 +updat 1 +period 1 +semest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..0521a9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,59 @@ +comput 4 +html 4 +postscript 3 +mimic 3 +scienc 3 +hour 3 +wisc 3 +home 2 +page 2 +sept 2 +text 2 +softwar 2 +pleas 2 +miron 2 +offic 2 +phone 2 +mail 2 +cychan 2 +system 1 +perform 1 +evalu 1 +model 1 +new 1 +assign 1 +librari 1 +avail 1 +public 1 +cours 1 +inform 1 +lectur 1 +devis 1 +user 1 +manual 1 +print 1 +file 1 +contain 1 +mani 1 +imag 1 +take 1 +least 1 +half 1 +initi 1 +instruct 1 +tutori 1 +onlin 1 +help 1 +qnet 1 +exampl 1 +devc 1 +professor 1 +livni 1 +teach 1 +assist 1 +chee 1 +yong 1 +chan 1 +suggest 1 +comment 1 +send 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..12dceb81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,78 @@ +student 3 +postscript 3 +find 2 +inform 2 +retriev 2 +cours 2 +resourc 2 +overview 2 +part 2 +class 2 +email 2 +uwisc 1 +technolog 1 +seek 1 +knowledgerichard 1 +belewvisit 1 +professorc 1 +lectur 1 +univ 1 +wisconsin 1 +comput 1 +scienc 1 +departmentfal 1 +thur 1 +acal 1 +room 1 +engrthi 1 +design 1 +interest 1 +understand 1 +aboutth 1 +knowledg 1 +represent 1 +machinelearn 1 +techniqu 1 +underli 1 +much 1 +excit 1 +activ 1 +occur 1 +onth 1 +world 1 +wide 1 +complet 1 +descript 1 +coures 1 +canse 1 +abstract 1 +asyllabu 1 +major 1 +topic 1 +consid 1 +graphic 1 +mapof 1 +thesear 1 +relat 1 +anda 1 +tent 1 +schedul 1 +semesterwil 1 +proce 1 +read 1 +polit 1 +infidel 1 +imag 1 +assign 1 +digest 1 +hypermai 1 +suggest 1 +compos 1 +classrel 1 +minut 1 +taken 1 +last 1 +modifi 1 +belew 1 +wisc 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..e52fb92d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,132 @@ +chang 8 +matlab 6 +assign 4 +diari 4 +class 4 +introduct 3 +page 3 +materi 3 +well 3 +list 3 +question 3 +comput 3 +also 3 +book 3 +student 3 +numer 2 +last 2 +cours 2 +tent 2 +syllabu 2 +order 2 +score 2 +addit 2 +note 2 +problem 2 +email 2 +concern 2 +mention 2 +name 2 +begin 2 +session 2 +edit 2 +avail 2 +look 2 +access 2 +telnet 2 +unix 2 +method 1 +methodsthi 1 +contain 1 +inform 1 +fall 1 +smile 1 +orderli 1 +mind 1 +cooper 1 +assignmentson 1 +total 1 +midterm 1 +date 1 +slightli 1 +point 1 +residu 1 +error 1 +condit 1 +rick 1 +carl 1 +offic 1 +hour 1 +errata 1 +text 1 +us 1 +updat 1 +sinc 1 +complex 1 +numericalanalysi 1 +algorithm 1 +post 1 +preprint 1 +foremostmathematician 1 +todai 1 +subject 1 +interest 1 +trickytop 1 +least 1 +squar 1 +solut 1 +approxim 1 +time 1 +place 1 +textmai 1 +supplement 1 +byaddit 1 +file 1 +areavail 1 +organ 1 +chapter 1 +awar 1 +though 1 +site 1 +capitallett 1 +sometim 1 +lower 1 +caselett 1 +present 1 +plan 1 +rather 1 +fortran 1 +kermit 1 +sigmon 1 +primer 1 +doit 1 +handout 1 +reaction 1 +winor 1 +machin 1 +overviewcours 1 +answer 1 +word 1 +grade 1 +four 1 +digit 1 +current 1 +conduct 1 +orient 1 +user 1 +andp 1 +relat 1 +linksyou 1 +might 1 +wish 1 +explor 1 +csdepart 1 +home 1 +system 1 +frequent 1 +ask 1 +simpl 1 +tutori 1 +advanc 1 +referenceviva 1 +good 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..15d75882 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,55 @@ +numer 2 +function 2 +page 2 +class 2 +email 2 +relat 2 +analysi 1 +last 1 +chang 1 +analysisthi 1 +contain 1 +inform 1 +fall 1 +version 1 +math 1 +current 1 +note 1 +avail 1 +follow 1 +directori 1 +well 1 +hard 1 +copi 1 +doit 1 +recent 1 +announc 1 +post 1 +grade 1 +time 1 +locat 1 +statlectur 1 +carl 1 +boor 1 +deboor 1 +wisc 1 +offic 1 +hour 1 +stat 1 +line 1 +classnot 1 +viii 1 +index 1 +assign 1 +none 1 +concern 1 +homework 1 +cours 1 +question 1 +linksyou 1 +might 1 +wish 1 +explor 1 +depart 1 +home 1 +courseoff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..e2569eff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,13 @@ +approxim 2 +cours 2 +theori 1 +last 1 +chang 1 +theorythi 1 +page 1 +contain 1 +inform 1 +spring 1 +version 1 +math 1 +note 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..fdf91f94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,65 @@ +program 6 +silva 3 +comput 3 +inform 3 +tuesdai 3 +solut 3 +toni 2 +dsilva 2 +wisc 2 +offic 2 +scienc 2 +statist 2 +savitch 2 +sept 2 +handout 2 +introduct 2 +window 2 +thursdai 2 +quiz 2 +home 1 +page 1 +sectioncsm 1 +instructor 1 +contact 1 +email 1 +phone 1 +hour 1 +appoint 1 +textbookproblem 1 +solv 1 +object 1 +walter 1 +section 1 +comp 1 +stat 1 +firstdai 1 +noland 1 +specifi 1 +timet 1 +chamberlin 1 +cours 1 +tent 1 +syllabu 1 +semest 1 +late 1 +polici 1 +grade 1 +criteria 1 +academ 1 +misconduct 1 +import 1 +softwar 1 +microsoft 1 +windowshint 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +text 1 +assign 1 +quizz 1 +last 1 +modifi 1 +anthoni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..32eb3cbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,432 @@ +imag 51 +vision 23 +exam 11 +comput 10 +paper 10 +us 10 +method 9 +homework 9 +process 9 +project 8 +read 8 +doit 8 +chapter 8 +tool 8 +vista 7 +cours 6 +student 6 +test 6 +algorithm 6 +email 5 +format 5 +assign 5 +avail 5 +thin 5 +mosaic 5 +khoro 5 +univers 5 +thursdai 4 +includ 4 +class 4 +result 4 +directori 4 +send 4 +page 4 +program 4 +code 4 +account 4 +printer 4 +print 4 +environ 4 +execut 4 +spring 4 +dyer 3 +wisc 3 +detect 3 +activ 3 +contour 3 +base 3 +novemb 3 +document 3 +handout 3 +window 3 +interact 3 +also 3 +start 3 +softwar 3 +least 3 +shape 3 +larg 3 +hand 3 +might 3 +line 3 +exampl 3 +note 3 +fast 3 +digit 3 +comm 3 +spline 3 +adelson 3 +graphic 3 +laser 3 +locat 3 +system 3 +instructor 2 +chuck 2 +csstelephon 2 +eduoffic 2 +hour 2 +gener 2 +introduct 2 +analysi 2 +segment 2 +modul 2 +three 2 +dimension 2 +scene 2 +inform 2 +shade 2 +depth 2 +stereo 2 +focu 2 +model 2 +grade 2 +requir 2 +select 2 +machin 2 +collect 2 +sold 2 +onlin 2 +enhanc 2 +modif 2 +public 2 +face 2 +rotat 2 +color 2 +free 2 +origin 2 +appli 2 +home 2 +well 2 +skeleton 2 +octob 2 +manual 2 +condit 2 +type 2 +disappear 2 +matrix 2 +block 2 +distanc 2 +four 2 +want 2 +follow 2 +addit 2 +output 2 +ubyt 2 +file 2 +point 2 +applic 2 +delet 2 +parallel 2 +pattern 2 +differ 2 +pyramid 2 +engin 2 +burt 2 +tran 2 +hint 2 +faq 2 +snake 2 +topic 2 +stanford 2 +room 2 +space 2 +sure 2 +list 2 +queue 2 +displai 2 +varieti 2 +imgstar 2 +basic 2 +convers 2 +matlab 2 +numer 2 +databas 2 +examin 2 +link 2 +interest 2 +scienc 2 +hdtv 2 +atsc 2 +demo 2 +visionc 1 +visionfal 1 +dyeroffic 1 +mondai 1 +appointmentteach 1 +assist 1 +bryan 1 +sooffic 1 +wednesdai 1 +fridai 1 +appointmentstud 1 +informationfundament 1 +first 1 +level 1 +edg 1 +featuredetect 1 +principl 1 +defin 1 +forreconstruct 1 +usingtechniqu 1 +asshap 1 +recoveri 1 +andocclud 1 +viewpoint 1 +control 1 +motion 1 +track 1 +object 1 +recognit 1 +schedul 1 +tuesdai 1 +prerequisit 1 +fundament 1 +calculu 1 +probabl 1 +theori 1 +linear 1 +algebra 1 +midterm 1 +particip 1 +syllabu 1 +part 1 +jain 1 +kasturi 1 +schunck 1 +mcgraw 1 +hill 1 +york 1 +readingsfrom 1 +journal 1 +confer 1 +proceed 1 +small 1 +batchessupplementari 1 +sourcesonlin 1 +informationmost 1 +urlhttp 1 +html 1 +date 1 +except 1 +primarili 1 +score 1 +assignmentshomework 1 +histogram 1 +option 1 +make 1 +copi 1 +portrait 1 +contrast 1 +byfirst 1 +crop 1 +around 1 +head 1 +shoulder 1 +final 1 +adjust 1 +theintens 1 +function 1 +editor 1 +thewindow 1 +button 1 +modifi 1 +thing 1 +colorif 1 +wish 1 +found 1 +good 1 +grayscal 1 +transformationsav 1 +andput 1 +whereth 1 +tell 1 +qualit 1 +whatintens 1 +transform 1 +improv 1 +qualityof 1 +overal 1 +photo 1 +board 1 +feel 1 +ownweb 1 +learn 1 +get 1 +correct 1 +chang 1 +instead 1 +prevent 1 +altogeth 1 +count 1 +transit 1 +case 1 +citi 1 +infin 1 +constant 1 +corner 1 +chessboard 1 +center 1 +posit 1 +evalu 1 +experi 1 +convert 1 +vconvert 1 +edit 1 +need 1 +emac 1 +clean 1 +header 1 +contain 1 +right 1 +repn 1 +component_interp 1 +gradient 1 +low_threshold 1 +high_threshold 1 +vlink 1 +vsegedg 1 +approach 1 +determin 1 +direct 1 +index 1 +finger 1 +fact 1 +entir 1 +surround 1 +zhang 1 +suen 1 +wang 1 +comment 1 +comparison 1 +version 1 +laplacian 1 +compact 1 +ieee 1 +multiresolut 1 +produc 1 +kass 1 +witkin 1 +terzopoulo 1 +william 1 +shah 1 +curvatur 1 +estim 1 +understand 1 +decemb 1 +titl 1 +abstract 1 +supplementari 1 +help 1 +done 1 +sparcstat 1 +call 1 +disk 1 +quota 1 +store 1 +compress 1 +other 1 +gzip 1 +howev 1 +order 1 +save 1 +sent 1 +goe 1 +everyon 1 +laserprint 1 +altern 1 +name 1 +shortest 1 +caution 1 +check 1 +job 1 +manner 1 +take 1 +long 1 +consider 1 +oper 1 +invok 1 +unix 1 +like 1 +command 1 +develop 1 +provid 1 +languag 1 +interfac 1 +rapid 1 +prototyp 1 +simpl 1 +cantata 1 +netpbm 1 +toolkit 1 +pbmplu 1 +packag 1 +visual 1 +signal 1 +toolbox 1 +especi 1 +relev 1 +although 1 +access 1 +solut 1 +held 1 +regular 1 +classroom 1 +earli 1 +time 1 +cover 1 +textbook 1 +bring 1 +sheet 1 +side 1 +main 1 +idea 1 +proof 1 +question 1 +ask 1 +highli 1 +recommend 1 +wandel 1 +number 1 +grand 1 +allianc 1 +specif 1 +advanc 1 +televis 1 +committe 1 +postscript 1 +spie 1 +optic 1 +librari 1 +appl 1 +quicktim 1 +product 1 +panoramix 1 +panoram 1 +decfac 1 +talk 1 +synthet 1 +video 1 +rate 1 +virtual 1 +realiti 1 +qbic 1 +miscellan 1 +relat 1 +boston 1 +cardiff 1 +royal 1 +institut 1 +sweden 1 +virginia 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..a1577ea6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,67 @@ +program 3 +fall 2 +section 2 +dave 2 +dzimm 2 +wisc 2 +offic 2 +home 2 +sharp 2 +algebra 1 +languag 1 +instructor 1 +zimmermannemail 1 +educlass 1 +meet 1 +time 1 +place 1 +nolandoffic 1 +phone 1 +hour 1 +announcementsprogram 1 +wednesdai 1 +octob 1 +readi 1 +fridai 1 +novemb 1 +gener 1 +cours 1 +informationc 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuswork 1 +homeclass 1 +handoutsprogramsexam 1 +quizzeslectur 1 +notesgreg 1 +style 1 +guidegrad 1 +referenc 1 +last 1 +digit 1 +number 1 +quizzesprogramsexam 1 +polici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +policytext 1 +problem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +zimmermann 1 +base 1 +greg 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..b49f2f63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,31 @@ +program 5 +cours 3 +linear 2 +method 2 +fall 2 +spring 2 +nonlinear 2 +gener 1 +inform 1 +offer 1 +semest 1 +page 1 +variou 1 +instructor 1 +michael 1 +ferri 1 +mangasarian 1 +graduat 1 +wisconsin 1 +network 1 +flow 1 +integ 1 +theori 1 +algorithm 1 +comput 1 +larg 1 +spars 1 +system 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..180b695e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,115 @@ +homework 9 +fridai 7 +mondai 6 +cours 5 +unix 5 +network 4 +class 4 +offic 4 +flow 4 +grade 4 +march 4 +mail 3 +wisc 3 +thursdai 3 +linear 3 +program 3 +path 3 +assign 3 +april 3 +cshrc 3 +local 3 +ferri 2 +telephon 2 +hour 2 +wednesdai 2 +text 2 +us 2 +gener 2 +inform 2 +orient 2 +session 2 +first 2 +user 2 +week 2 +room 2 +gam 2 +machin 2 +page 2 +spring 1 +flowsspr 1 +schedul 1 +lectur 1 +list 1 +februari 1 +instructor 1 +michael 1 +teach 1 +assist 1 +leei 1 +tuesdai 1 +requir 1 +ravindra 1 +ahuja 1 +thoma 1 +magnanti 1 +jame 1 +orlin 1 +prentic 1 +hall 1 +chvatal 1 +freeman 1 +optim 1 +bertseka 1 +press 1 +overview 1 +tree 1 +cycl 1 +data 1 +structur 1 +shortest 1 +cost 1 +simplex 1 +method 1 +convex 1 +equilibria 1 +lagrangian 1 +relax 1 +multicommod 1 +applic 1 +prerequisit 1 +knowledg 1 +project 1 +final 1 +examin 1 +close 1 +book 1 +except 1 +sheet 1 +paper 1 +allow 1 +repres 1 +question 1 +comput 1 +time 1 +novic 1 +previous 1 +workstat 1 +held 1 +second 1 +last 1 +minut 1 +introduct 1 +login 1 +access 1 +public 1 +sourc 1 +alter 1 +set 1 +directori 1 +appropri 1 +solari 1 +mathemat 1 +home 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..4bacaa74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,128 @@ +homework 10 +fridai 7 +mondai 7 +spars 6 +cours 5 +unix 5 +class 4 +offic 4 +grade 4 +march 4 +comput 3 +mail 3 +wisc 3 +thursdai 3 +assign 3 +april 3 +handout 3 +math 2 +method 2 +februari 2 +instructor 2 +ferri 2 +telephon 2 +hour 2 +wednesdai 2 +matrix 2 +second 2 +inform 2 +introduct 2 +orient 2 +session 2 +first 2 +user 2 +week 2 +room 2 +page 2 +spring 1 +also 1 +larg 1 +systemsspr 1 +schedul 1 +lectur 1 +list 1 +michael 1 +teach 1 +assist 1 +leei 1 +tuesdai 1 +recommend 1 +textbook 1 +golub 1 +loan 1 +john 1 +hopkinsunivers 1 +press 1 +edit 1 +direct 1 +matric 1 +duff 1 +erisman 1 +reid 1 +oxford 1 +scienc 1 +public 1 +finit 1 +dimension 1 +vector 1 +space 1 +halmo 1 +springer 1 +verlag 1 +gener 1 +overview 1 +storag 1 +scheme 1 +gaussian 1 +elimin 1 +dens 1 +error 1 +analysi 1 +local 1 +pivot 1 +strategi 1 +modif 1 +iter 1 +linear 1 +solver 1 +least 1 +squar 1 +nonlinear 1 +equat 1 +optim 1 +applic 1 +parallel 1 +techniqu 1 +eigenvalu 1 +eigenvector 1 +prerequisit 1 +consent 1 +project 1 +final 1 +examin 1 +close 1 +book 1 +except 1 +sheet 1 +paper 1 +allow 1 +repres 1 +question 1 +ieee 1 +arithmet 1 +machin 1 +time 1 +novic 1 +previous 1 +us 1 +workstat 1 +held 1 +last 1 +minut 1 +instruct 1 +matlab 1 +mathemat 1 +program 1 +home 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..0aef0e0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,56 @@ +offic 6 +fischer 3 +telephon 3 +mail 3 +wisc 3 +hour 3 +lectur 2 +charl 2 +mondai 2 +wednesdai 2 +fridai 2 +appoint 2 +teach 2 +assist 2 +program 2 +weyer 2 +cours 2 +fall 1 +construct 1 +compilersfal 1 +schedul 1 +tuth 1 +csst 1 +instructor 1 +krishna 1 +kunchithapadam 1 +krisna 1 +tuesdai 1 +thursdai 1 +assign 1 +homework 1 +read 1 +class 1 +text 1 +craft 1 +compil 1 +richard 1 +leblanc 1 +benjamin 1 +cum 1 +check 1 +regularli 1 +gener 1 +inform 1 +overview 1 +date 1 +grade 1 +examin 1 +get 1 +start 1 +handout 1 +note 1 +us 1 +tool 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..67a42969 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,57 @@ +greg 5 +wisc 4 +fall 2 +section 2 +offic 2 +algebra 1 +languag 1 +program 1 +instructor 1 +sharpemail 1 +eduoffic 1 +phone 1 +hour 1 +appt 1 +grader 1 +krishna 1 +kunchithapadamemail 1 +krisna 1 +edugener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesvectra 1 +labc 1 +consultantssyllabuscours 1 +difficultywork 1 +homenewsstartup 1 +informationclass 1 +noteshomeworkexam 1 +quizzesstyl 1 +guideemail 1 +archivepolici 1 +informationemail 1 +policygrad 1 +policyl 1 +policyacadem 1 +misconduct 1 +polici 1 +must 1 +read 1 +textproblem 1 +solv 1 +object 1 +porgrammingwalt 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +pleas 1 +list 1 +known 1 +erratalast 1 +modifi 1 +sharpgreg 1 +http 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..888b81f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,159 @@ +program 9 +class 7 +prog 7 +name 5 +polici 5 +section 4 +line 4 +need 4 +inform 4 +home 3 +version 3 +also 3 +chang 3 +assign 3 +grade 3 +mean 3 +page 2 +fall 2 +chad 2 +lane 2 +import 2 +tribbl 2 +compil 2 +problem 2 +enumer 2 +type 2 +public 2 +privat 2 +note 2 +call 2 +work 2 +file 2 +everyth 2 +copi 2 +want 2 +us 2 +link 2 +project 2 +directori 2 +consist 2 +sampl 2 +read 2 +question 2 +quizz 2 +test 2 +text 2 +introduct 2 +sharp 2 +algebra 1 +languag 1 +wisc 1 +announc 1 +tue 1 +recogn 1 +outsid 1 +must 1 +declar 1 +insid 1 +result 1 +bump 1 +bottom 1 +shown 1 +definit 1 +randomintinrang 1 +defin 1 +correspond 1 +function 1 +bodi 1 +within 1 +forgot 1 +valu 1 +uppercas 1 +overwrit 1 +obsolet 1 +includ 1 +might 1 +check 1 +help 1 +someth 1 +ad 1 +comment 1 +suggest 1 +throughout 1 +part 1 +updat 1 +descript 1 +pleas 1 +make 1 +sure 1 +discrep 1 +chri 1 +weaver 1 +shouldn 1 +matter 1 +readi 1 +crucial 1 +entir 1 +understand 1 +basic 1 +attempt 1 +earli 1 +start 1 +hard 1 +requir 1 +time 1 +piec 1 +togeth 1 +bring 1 +tuesdai 1 +midterm 1 +freshmen 1 +either 1 +fine 1 +great 1 +thumb 1 +noth 1 +freshman 1 +disregard 1 +stuff 1 +tent 1 +semest 1 +syllabu 1 +handout 1 +prepar 1 +solutionscours 1 +solv 1 +object 1 +walter 1 +savitch 1 +addison 1 +weslei 1 +publish 1 +compani 1 +meet 1 +vleck 1 +administr 1 +late 1 +mail 1 +attend 1 +academ 1 +misconduct 1 +microsoft 1 +window 1 +first 1 +borland 1 +second 1 +vectra 1 +sourc 1 +code 1 +consult 1 +extra 1 +refer 1 +materi 1 +mani 1 +answer 1 +lectur 1 +style 1 +guidelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..ab9da90d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,68 @@ +offic 4 +program 3 +lectur 2 +horwitz 2 +telephon 2 +mail 2 +wisc 2 +hour 2 +appoint 2 +rahul 2 +compil 2 +tool 2 +cours 2 +inform 2 +assign 2 +grade 2 +fall 1 +introduct 1 +languag 1 +compilersspr 1 +stori 1 +month 1 +octob 1 +schedul 1 +tuth 1 +comp 1 +stat 1 +recit 1 +psycholog 1 +instructor 1 +susan 1 +tuesdai 1 +fridai 1 +teach 1 +assist 1 +kapoor 1 +mondai 1 +wednesdai 1 +text 1 +reserv 1 +wendt 1 +librari 1 +principl 1 +techniqu 1 +sethi 1 +ullman 1 +craft 1 +fischer 1 +leblanc 1 +check 1 +regularli 1 +gener 1 +overview 1 +date 1 +exam 1 +includ 1 +late 1 +polici 1 +get 1 +start 1 +read 1 +homework 1 +examin 1 +note 1 +us 1 +email 1 +link 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..e6e4cf4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,51 @@ +hummert 3 +offic 3 +comput 3 +program 3 +inform 3 +home 2 +scienc 2 +statist 2 +phone 2 +savitch 2 +psych 2 +grade 2 +introduct 2 +window 2 +page 1 +sectionsc 1 +instructor 1 +contact 1 +email 1 +wisc 1 +hour 1 +mondai 1 +thursdai 1 +announc 1 +textbookproblem 1 +solv 1 +object 1 +walter 1 +section 1 +cours 1 +handout 1 +tent 1 +syllabu 1 +semest 1 +late 1 +polici 1 +criteria 1 +academ 1 +misconduct 1 +viewgraph 1 +import 1 +softwar 1 +microsoft 1 +windowshint 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +text 1 +assign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..f50b4a47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,43 @@ +section 3 +offic 3 +click 3 +hour 2 +mail 2 +jonb 2 +wisc 2 +infoc 1 +info 1 +name 1 +bodnersect 1 +mondai 1 +thursdai 1 +number 1 +comput 1 +scienc 1 +statist 1 +hall 1 +doit 1 +phone 1 +eduher 1 +thing 1 +keep 1 +mind 1 +need 1 +copi 1 +guid 1 +choos 1 +print 1 +file 1 +menu 1 +question 1 +pleas 1 +stop 1 +send 1 +grade 1 +avail 1 +bodner 1 +mound 1 +madison 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..abc902f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,139 @@ +program 10 +lisp 10 +exam 9 +common 5 +line 5 +cours 5 +kunen 4 +section 4 +inform 4 +avail 4 +directori 4 +introduct 3 +assign 3 +time 3 +topic 3 +us 3 +click 3 +artifici 2 +intellig 2 +coursewil 2 +email 2 +wisc 2 +count 2 +final 2 +late 2 +logic 2 +discuss 2 +search 2 +languag 2 +sinc 2 +probabl 2 +refer 2 +book 2 +recit 2 +last 2 +problem 2 +previou 2 +intro 1 +notic 1 +spring 1 +detail 1 +appear 1 +later 1 +instructor 1 +kunenoffic 1 +stat 1 +buildingtelephon 1 +eduoffic 1 +hour 1 +appoint 1 +grade 1 +four 1 +three 1 +thirdexam 1 +schedul 1 +place 1 +turn 1 +midnight 1 +thedai 1 +loos 1 +cover 1 +follow 1 +order 1 +entir 1 +buti 1 +design 1 +assignmenti 1 +game 1 +plai 1 +prolog 1 +natur 1 +understand 1 +learn 1 +neural 1 +network 1 +deduct 1 +plan 1 +reason 1 +uncertain 1 +knowledg 1 +begin 1 +would 1 +usefulto 1 +supplement 1 +lecturesand 1 +help 1 +within 1 +manypaperback 1 +like 1 +lispcraft 1 +wilenski 1 +anoth 1 +possibl 1 +ansi 1 +graham 1 +code 1 +ultim 1 +steel 1 +edit 1 +page 1 +also 1 +sun 1 +addit 1 +textbook 1 +modern 1 +approach 1 +russel 1 +norvig 1 +class 1 +session 1 +engr 1 +psych 1 +essentiallli 1 +materi 1 +present 1 +answer 1 +question 1 +give 1 +hint 1 +review 1 +usual 1 +minut 1 +teach 1 +attend 1 +differ 1 +lectur 1 +public 1 +alpha 1 +beta 1 +alpha_beta 1 +best 1 +first 1 +astar 1 +fall 1 +postscript 1 +still 1 +older 1 +chang 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..d2d65767 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,86 @@ +chapter 12 +homework 7 +assign 7 +part 6 +architectur 5 +tabl 5 +content 5 +reader 5 +comput 4 +full 4 +paper 4 +doit 4 +solut 4 +note 3 +instruct 3 +parallel 3 +spring 3 +inform 2 +comp 2 +wisc 2 +eduoffic 2 +hour 2 +appoint 2 +lectur 2 +project 2 +give 2 +basic 2 +pipelin 2 +level 2 +class 2 +talk 2 +decemb 2 +fall 1 +advanc 1 +ifal 1 +offer 1 +cours 1 +instructor 1 +mark 1 +hilloffic 1 +statemail 1 +markhil 1 +tuesdai 1 +fridai 1 +shenoffic 1 +statphon 1 +email 1 +mshen 1 +mondai 1 +thursdai 1 +miscellaneawhat 1 +talksread 1 +introduct 1 +perform 1 +cost 1 +set 1 +cach 1 +memori 1 +talluri 1 +hill 1 +input 1 +output 1 +interconnect 1 +process 1 +solutionproject 1 +propos 1 +novemb 1 +report 1 +noonmiscellanea 1 +final 1 +midterm 1 +us 1 +first 1 +edit 1 +hennessi 1 +patterson 1 +qualifi 1 +exam 1 +sourc 1 +hard 1 +question 1 +seminar 1 +wisconsin 1 +group 1 +world 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..6cdd6445 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,72 @@ +us 6 +exampl 5 +class 5 +offic 4 +home 3 +program 3 +section 2 +mbirk 2 +wisc 2 +comp 2 +assign 2 +grade 2 +dynam 2 +alloc 2 +oper 2 +overload 2 +complex 2 +page 2 +introduct 2 +instructor 1 +michael 1 +birk 1 +email 1 +phone 1 +hour 1 +appoint 1 +administr 1 +inform 1 +text 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +room 1 +time 1 +psycholog 1 +tuesdai 1 +thursdai 1 +vectra 1 +syllabu 1 +comput 1 +standard 1 +late 1 +polici 1 +handin 1 +procedur 1 +cheat 1 +academ 1 +misconduct 1 +consult 1 +string 1 +ration 1 +repres 1 +float 1 +point 1 +number 1 +anoth 1 +intstack 1 +simpl 1 +unlimit 1 +size 1 +data 1 +structur 1 +classinfo 1 +struct 1 +link 1 +microsoft 1 +window 1 +borland 1 +tutori 1 +debugg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..331d1cb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,107 @@ +program 11 +class 7 +us 6 +comp 5 +exampl 5 +offic 4 +assign 3 +room 3 +section 2 +mbirk 2 +wisc 2 +home 2 +avail 2 +number 2 +exam 2 +spring 2 +instruct 2 +syllabu 2 +eight 2 +week 2 +comput 2 +late 2 +polici 2 +grade 2 +dynam 2 +alloc 2 +oper 2 +overload 2 +complex 2 +introduct 2 +instructor 1 +michael 1 +birk 1 +email 1 +phone 1 +hour 1 +appoint 1 +announc 1 +test 1 +case 1 +dice 1 +code 1 +discuss 1 +rank 1 +last 1 +four 1 +digit 1 +student 1 +past 1 +onlin 1 +fall 1 +note 1 +hangman 1 +mondai 1 +octob 1 +lectur 1 +format 1 +first 1 +second 1 +come 1 +soon 1 +print 1 +output 1 +outsid 1 +final 1 +chang 1 +meet 1 +administr 1 +inform 1 +text 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +time 1 +tuesdai 1 +thursdai 1 +vectra 1 +standard 1 +handin 1 +procedur 1 +cheat 1 +academ 1 +misconduct 1 +consult 1 +string 1 +ration 1 +repres 1 +float 1 +point 1 +anoth 1 +intstack 1 +simpl 1 +unlimit 1 +size 1 +data 1 +structur 1 +classinfo 1 +struct 1 +link 1 +page 1 +microsoft 1 +window 1 +borland 1 +tutori 1 +debugg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..848bf6a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,80 @@ +section 7 +melski 4 +email 4 +page 3 +info 3 +stuff 2 +assign 2 +comput 2 +help 2 +link 2 +david 2 +offic 2 +wisc 2 +meet 2 +us 2 +archiv 2 +dave 1 +melskithes 1 +chang 1 +frequent 1 +press 1 +reload 1 +button 1 +daili 1 +get 1 +start 1 +alreadi 1 +uncomfort 1 +andth 1 +softwar 1 +instructor 1 +scienc 1 +statist 1 +floor 1 +phone 1 +hour 1 +click 1 +attach 1 +pleas 1 +noland 1 +psycholog 1 +text 1 +problem 1 +solv 1 +object 1 +program 1 +walter 1 +savitch 1 +addison 1 +wesleypublish 1 +gener 1 +window 1 +usingborland 1 +refer 1 +materi 1 +rough 1 +syllabu 1 +polici 1 +academ 1 +misconduct 1 +must 1 +read 1 +rule 1 +thumb 1 +share 1 +code 1 +consult 1 +grade 1 +late 1 +work 1 +check 1 +often 1 +essenti 1 +solut 1 +handout 1 +list 1 +tutor 1 +avail 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..c64db799 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,142 @@ +program 14 +novemb 10 +wednesdai 10 +quiz 9 +octob 9 +class 8 +page 7 +code 7 +bankaccount 7 +solut 7 +score 7 +milo 6 +mondai 6 +home 5 +html 5 +exam 5 +septemb 5 +guid 4 +refer 4 +section 3 +languag 3 +martin 3 +wisc 3 +take 3 +file 3 +us 3 +document 3 +postscript 3 +function 3 +call 3 +fall 2 +time 2 +offic 2 +tuesdai 2 +check 2 +homepag 2 +ad 2 +current 2 +grade 2 +gener 2 +inform 2 +consult 2 +exampl 2 +list 2 +user 2 +input 2 +withprompt 2 +version 2 +standard 2 +fridai 2 +decemb 2 +algebra 1 +instructor 1 +locat 1 +psychologyinstructor 1 +email 1 +eduoffic 1 +hour 1 +thursdai 1 +appoint 1 +phone 1 +announcementsthi 1 +chang 1 +frequent 1 +respons 1 +often 1 +given 1 +todai 1 +place 1 +onth 1 +project 1 +room 1 +test 1 +scheduledfor 1 +updat 1 +haseveryth 1 +pleas 1 +make 1 +sure 1 +isaccur 1 +link 1 +coupl 1 +withinform 1 +linksar 1 +titl 1 +avail 1 +onfridai 1 +remind 1 +sheet 1 +turn 1 +assign 1 +syllabu 1 +style 1 +vectra 1 +schedul 1 +academ 1 +misconduct 1 +policyclass 1 +final 1 +main 1 +struct 1 +minmax 1 +findth 1 +minimum 1 +maximum 1 +number 1 +case 1 +enteredfrom 1 +stdin 1 +form 1 +creat 1 +formlett 1 +data 1 +specifi 1 +theopen_fil 1 +introduc 1 +valu 1 +beginn 1 +introduct 1 +toth 1 +latest 1 +releas 1 +good 1 +viru 1 +hoax 1 +ethic 1 +andprofession 1 +conductassign 1 +survei 1 +questionar 1 +requir 1 +textbook 1 +problem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +known 1 +errata 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..77dc97f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,89 @@ +program 11 +tuesdai 7 +section 6 +thursdai 6 +april 4 +martin 3 +ream 3 +class 3 +inform 3 +februari 3 +march 3 +exam 3 +list 2 +semest 2 +email 2 +mream 2 +wisc 2 +offic 2 +comput 2 +noland 2 +home 1 +page 1 +classc 1 +algebra 1 +languag 1 +programmingspr 1 +teach 1 +assist 1 +absolut 1 +nothingeverydai 1 +informationc 1 +pagescommon 1 +programmingmistakesarch 1 +mail 1 +messag 1 +sent 1 +calendar 1 +januari 1 +final 1 +placeto 1 +announcedcours 1 +detail 1 +contact 1 +scienc 1 +statist 1 +dayton 1 +phone 1 +hour 1 +appoint 1 +talk 1 +send 1 +textbookproblem 1 +solv 1 +object 1 +walter 1 +savitch 1 +csst 1 +contain 1 +vectra 1 +run 1 +window 1 +andborland 1 +addit 1 +cours 1 +tent 1 +syllabu 1 +extra 1 +materi 1 +late 1 +polici 1 +grade 1 +criteria 1 +academicmisconduct 1 +rule 1 +thumb 1 +share 1 +code 1 +assign 1 +work 1 +anyform 1 +former 1 +student 1 +made 1 +bigtodd 1 +thielwendi 1 +staatsabout 1 +instructor 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..c711b90f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,107 @@ +program 6 +exampl 3 +fall 2 +section 2 +mike 2 +msteel 2 +wisc 2 +comp 2 +stat 2 +deadlin 2 +check 2 +mail 2 +read 2 +tuesdai 2 +novemb 2 +grade 2 +note 2 +polici 2 +algebra 1 +languag 1 +nolandinstructor 1 +steeleemail 1 +eduoffic 1 +buildingoffic 1 +hour 1 +time 1 +appoint 1 +soffic 1 +phone 1 +import 1 +announcementsi 1 +extend 1 +pleas 1 +thenew 1 +informationmidterm 1 +current 1 +line 1 +includ 1 +everyth 1 +hand 1 +sampl 1 +taken 1 +pastfew 1 +week 1 +class 1 +fill 1 +stuff 1 +gloss 1 +makefulli 1 +function 1 +find 1 +us 1 +ifyou 1 +miss 1 +even 1 +didn 1 +understand 1 +page 1 +near 1 +bottom 1 +rememb 1 +clarif 1 +programmingassign 1 +gener 1 +cours 1 +informationc 1 +home 1 +pagecours 1 +objectivesabout 1 +vectra 1 +labc 1 +consultantscours 1 +syllabu 1 +assignmentsnot 1 +work 1 +homeclass 1 +handoutsprogram 1 +assignmentsexam 1 +quizzessom 1 +examplespolici 1 +informationemail 1 +policygrad 1 +late 1 +academ 1 +misconduct 1 +policyus 1 +refer 1 +pagesintroduct 1 +microsoft 1 +windowsintroduct 1 +borland 1 +greg 1 +sharp 1 +styleguid 1 +codetextproblem 1 +solv 1 +object 1 +walter 1 +savitchaddison 1 +weslei 1 +publish 1 +compani 1 +list 1 +known 1 +erratalast 1 +modifi 1 +steel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..9be2d1d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,131 @@ +grade 6 +section 6 +assign 5 +polici 5 +offic 4 +onlin 4 +test 4 +wisc 3 +comput 3 +scienc 3 +statist 3 +hour 3 +check 3 +email 3 +fall 2 +prock 2 +eduoffic 2 +phone 2 +consult 2 +link 2 +crazi 2 +todai 2 +quiz 2 +gener 2 +question 2 +final 2 +page 2 +inform 2 +class 2 +text 2 +materi 2 +info 2 +archiv 2 +introduct 2 +session 1 +infoc 1 +sessionalgebra 1 +languag 1 +program 1 +instructor 1 +andrew 1 +prockemail 1 +thgrader 1 +haihong 1 +wangemail 1 +mtwrannounc 1 +pleas 1 +verifi 1 +score 1 +modifi 1 +made 1 +minor 1 +modif 1 +file 1 +copi 1 +alreadi 1 +gotton 1 +five 1 +exam 1 +result 1 +rang 1 +ad 1 +sampl 1 +perus 1 +anoth 1 +think 1 +give 1 +good 1 +idea 1 +level 1 +knowledg 1 +need 1 +rememb 1 +topic 1 +webpag 1 +seem 1 +done 1 +notic 1 +error 1 +make 1 +sure 1 +assing 1 +well 1 +like 1 +work 1 +ahead 1 +tuesdai 1 +decemb 1 +mark 1 +calendar 1 +everyon 1 +requir 1 +take 1 +feel 1 +thing 1 +locat 1 +import 1 +carefulli 1 +read 1 +administr 1 +welcom 1 +problem 1 +solv 1 +walter 1 +savitch 1 +room 1 +time 1 +tent 1 +syllabu 1 +late 1 +mail 1 +academ 1 +misconductcours 1 +cours 1 +style 1 +guid 1 +lectur 1 +note 1 +microsoft 1 +window 1 +first 1 +borland 1 +second 1 +home 1 +vectra 1 +sourc 1 +code 1 +extra 1 +refer 1 +mani 1 +answer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..f7fc8b70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,88 @@ +info 9 +site 3 +softwar 3 +archiv 3 +simtel 3 +kelli 2 +page 2 +mail 2 +messag 2 +function 2 +link 2 +inform 2 +interest 2 +world 2 +usenet 2 +need 2 +ratliff 1 +current 1 +grade 1 +keyword 1 +search 1 +exampl 1 +enter 1 +without 1 +quot 1 +everi 1 +paragraph 1 +us 1 +word 1 +also 1 +wildcard 1 +sent 1 +semest 1 +addit 1 +backup 1 +copi 1 +disk 1 +filesviru 1 +wide 1 +faqfun 1 +stuff 1 +oracl 1 +resourc 1 +index 1 +virtual 1 +tourist 1 +mapth 1 +space 1 +shuttl 1 +clickabl 1 +badger 1 +herald 1 +comicshumor 1 +abort 1 +retri 1 +ignor 1 +nine 1 +type 1 +usersfin 1 +weeklab 1 +jokesget 1 +comput 1 +home 1 +might 1 +tryingsom 1 +sharewar 1 +freewar 1 +avail 1 +internet 1 +program 1 +usual 1 +compress 1 +somecompress 1 +unpack 1 +reviewsom 1 +command 1 +try 1 +biggest 1 +best 1 +maintain 1 +minclud 1 +file 1 +post 1 +infocompress 1 +infofavorit 1 +clickher 1 +visit 1 +desautel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..fa8d6b8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,75 @@ +program 8 +comput 4 +introduct 3 +inform 3 +home 2 +room 2 +savitch 2 +novemb 2 +window 2 +page 1 +russ 1 +man 1 +csc 1 +instructor 1 +russel 1 +manningemail 1 +rman 1 +wisc 1 +eduoffic 1 +scienceoffic 1 +hour 1 +find 1 +basement 1 +saturdai 1 +except 1 +footbal 1 +game 1 +sundai 1 +come 1 +keep 1 +compani 1 +work 1 +like 1 +vectra 1 +although 1 +student 1 +prioriti 1 +grade 1 +lectur 1 +final 1 +click 1 +textbook 1 +problem 1 +solv 1 +object 1 +walter 1 +section 1 +semest 1 +univers 1 +rotc 1 +build 1 +scienc 1 +statist 1 +assign 1 +mondai 1 +wednesdai 1 +cours 1 +handout 1 +syllabu 1 +late 1 +polici 1 +academ 1 +misconduct 1 +import 1 +softwar 1 +microsoft 1 +windowshint 1 +compilersth 1 +oper 1 +systememailmosaicnetscap 1 +borland 1 +languageth 1 +textold 1 +quizz 1 +none 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..73df833d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,95 @@ +wisc 8 +spring 6 +fall 4 +group 4 +cours 3 +knowledg 3 +search 3 +vision 3 +learn 3 +robot 3 +shavlik 3 +comput 3 +introduct 2 +artifici 2 +intellig 2 +network 2 +deduct 2 +problem 2 +solv 2 +plan 2 +languag 2 +includ 2 +jude 2 +relat 2 +link 2 +machin 2 +gener 1 +inform 1 +offer 1 +semest 1 +academ 1 +year 1 +section 1 +thefal 1 +topic 1 +cover 1 +principl 1 +base 1 +techniqu 1 +best 1 +first 1 +alpha 1 +beta 1 +represent 1 +us 1 +predic 1 +logic 1 +semant 1 +connectionist 1 +frame 1 +rule 1 +autom 1 +applic 1 +expert 1 +system 1 +game 1 +plai 1 +natur 1 +understand 1 +program 1 +lisp 1 +possibl 1 +prolog 1 +previou 1 +assumedprerequisit 1 +page 1 +variou 1 +instructor 1 +chuck 1 +dyer 1 +kunen 1 +sabbat 1 +bryan 1 +local 1 +madison 1 +seminar 1 +qualifi 1 +exam 1 +recent 1 +tabl 1 +content 1 +abstract 1 +journal 1 +mostli 1 +wendt 1 +librari 1 +readabl 1 +biologi 1 +dept 1 +graduat 1 +wisconsin 1 +motion 1 +extern 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..58c60aac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,216 @@ +assign 17 +learn 16 +homework 15 +read 14 +wisc 11 +spring 10 +april 10 +postscript 8 +chapter 8 +textbook 8 +feedback 8 +author 8 +neural 8 +journal 8 +mondai 7 +mitchel 7 +network 7 +shavlik 6 +februari 6 +lisp 6 +machin 5 +cours 5 +wednesdai 5 +articl 5 +march 5 +towel 4 +januari 4 +late 4 +us 4 +link 4 +page 4 +offic 3 +class 3 +theori 3 +relat 3 +knowledg 3 +algorithm 3 +reinforc 3 +concept 3 +space 3 +paper 3 +server 3 +group 3 +jude 2 +home 2 +archiv 2 +readabl 2 +comput 2 +base 2 +fisher 2 +rumelhart 2 +moonei 2 +empir 2 +compar 2 +backprop 2 +experiment 2 +cobweb 2 +polici 2 +train 2 +induc 2 +decis 2 +tree 2 +free 2 +semest 2 +noon 2 +librari 2 +resourc 2 +bibliographi 2 +austrian 2 +institut 2 +refer 2 +help 2 +program 2 +akcl 2 +common 2 +tip 2 +frequent 2 +ask 2 +question 2 +print 2 +local 2 +gener 1 +inform 1 +instructor 1 +stat 1 +teach 1 +assist 1 +geoff 1 +weinberg 1 +highwai 1 +lab 1 +basement 1 +build 1 +geoffrei 1 +phone 1 +overview 1 +syllabu 1 +email 1 +suggest 1 +project 1 +refin 1 +logic 1 +definit 1 +quinlan 1 +artifici 1 +chunk 1 +soar 1 +laird 1 +rosenbloom 1 +newel 1 +level 1 +dietterich 1 +analyt 1 +unsupervis 1 +zipser 1 +cogsci 1 +lenat 1 +genet 1 +backpropag 1 +basic 1 +also 1 +scienc 1 +kibler 1 +introduct 1 +kbann 1 +sure 1 +answer 1 +sheet 1 +best 1 +idea 1 +next 1 +summar 1 +sentenc 1 +summari 1 +lead 1 +instead 1 +analyz 1 +brr 1 +hand 1 +materi 1 +cover 1 +lectur 1 +methodolog 1 +creat 1 +person 1 +start 1 +student 1 +five 1 +dai 1 +exhaust 1 +penalti 1 +measur 1 +weekend 1 +make 1 +tractabl 1 +accept 1 +week 1 +previous 1 +migrat 1 +progress 1 +heurist 1 +search 1 +version 1 +explan 1 +previou 1 +exam 1 +ineedagoodicon 1 +line 1 +nip 1 +premier 1 +confer 1 +recent 1 +tabl 1 +content 1 +abstract 1 +select 1 +mostli 1 +wendt 1 +irvin 1 +dataset 1 +pointer 1 +discoveri 1 +databas 1 +stuff 1 +benchmark 1 +ieee 1 +council 1 +sever 1 +connect 1 +intern 1 +societi 1 +adapt 1 +behavior 1 +canadian 1 +peopl 1 +extern 1 +department 1 +workstat 1 +emac 1 +code 1 +write 1 +debugg 1 +novic 1 +steel 1 +languag 1 +edit 1 +manual 1 +printer 1 +math 1 +comp 1 +biologi 1 +includ 1 +dept 1 +last 1 +modifi 1 +shavlikshavlik 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..c2874a93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,247 @@ +project 16 +paper 8 +comput 5 +present 5 +exam 5 +cours 5 +topic 5 +class 5 +read 5 +schedul 4 +lectur 4 +system 4 +final 3 +scienc 3 +room 3 +midterm 3 +suggest 3 +grade 3 +semest 3 +detail 3 +make 3 +wang 3 +chen 3 +solomon 2 +wisc 2 +new 2 +updat 2 +list 2 +mondai 2 +statist 2 +time 2 +memori 2 +manag 2 +avail 2 +content 2 +summari 2 +inform 2 +text 2 +us 2 +copi 2 +discuss 2 +activ 2 +particip 2 +strongli 2 +encourag 2 +design 2 +term 2 +experiment 2 +research 2 +literatur 2 +must 2 +group 2 +write 2 +fall 1 +advanc 1 +oper 1 +systemsfal 1 +marvin 1 +offic 1 +sciencesoffic 1 +hour 1 +troffic 1 +phone 1 +email 1 +address 1 +watch 1 +space 1 +latest 1 +last 1 +inroom 1 +build 1 +fridai 1 +noon 1 +exampl 1 +past 1 +examtogeth 1 +sampl 1 +answer 1 +wednesdai 1 +octob 1 +pmin 1 +decemb 1 +exact 1 +place 1 +bedetermin 1 +informationabout 1 +avaiabl 1 +readabl 1 +version 1 +figur 1 +multic 1 +intend 1 +give 1 +broad 1 +exposur 1 +advancedoper 1 +assum 1 +student 1 +good 1 +onoper 1 +equival 1 +cover 1 +normal 1 +inconsider 1 +synchron 1 +interprocess 1 +commun 1 +file 1 +protect 1 +secur 1 +distribut 1 +tuesdai 1 +thursdai 1 +sciencestextther 1 +realli 1 +satisfactori 1 +textbook 1 +graduat 1 +level 1 +operatingsystem 1 +usea 1 +select 1 +classic 1 +papersa 1 +structur 1 +around 1 +journal 1 +articl 1 +andconfer 1 +proceed 1 +purchas 1 +doit 1 +formerli 1 +macc 1 +document 1 +deskfor 1 +similar 1 +ident 1 +thoseof 1 +previou 1 +individu 1 +youto 1 +relev 1 +current 1 +click 1 +herefor 1 +tent 1 +review 1 +willinstead 1 +adiscuss 1 +major 1 +theme 1 +focal 1 +point 1 +will 1 +daili 1 +geta 1 +expect 1 +quietli 1 +listen 1 +week 1 +much 1 +lessout 1 +gradingther 1 +worth 1 +total 1 +verifi 1 +carefulli 1 +thoroughli 1 +projecty 1 +requir 1 +complet 1 +provid 1 +involv 1 +implement 1 +tool 1 +implementationsof 1 +algorithm 1 +measur 1 +studi 1 +simul 1 +compon 1 +survei 1 +unvalid 1 +suffici 1 +done 1 +person 1 +larger 1 +smaller 1 +approv 1 +case 1 +basi 1 +summar 1 +result 1 +meet 1 +standard 1 +public 1 +qualiti 1 +well 1 +also 1 +ashort 1 +presentationabout 1 +presentationsher 1 +presen 1 +approxim 1 +manyan 1 +stubb 1 +andrew 1 +bigg 1 +franci 1 +salmon 1 +gunawan 1 +agu 1 +qingmin 1 +chien 1 +pang 1 +jame 1 +eric 1 +larsen 1 +conroi 1 +fritz 1 +craig 1 +jordan 1 +prasad 1 +deshpand 1 +avinash 1 +sodani 1 +basnei 1 +rajesh 1 +raman 1 +biswadeep 1 +taxiao 1 +yanm 1 +xinyu 1 +richard 1 +zhang 1 +todd 1 +munson 1 +wenjun 1 +xinyi 1 +yufei 1 +zeyu 1 +sridhar 1 +gopal 1 +michael 1 +leesolomon 1 +eduthu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..fb319b24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,4 @@ +homepag 2 +chiang 1 +time 1 +gradesgo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..afb84f66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,123 @@ +fortran 7 +comput 5 +microsoft 5 +page 4 +home 4 +program 3 +us 3 +vectra 3 +room 3 +quota 3 +mail 3 +exercis 2 +write 2 +solut 2 +particular 2 +scienc 2 +window 2 +machin 2 +also 2 +howev 2 +copi 2 +modifi 2 +bestor 2 +section 1 +overal 1 +structur 1 +primarili 1 +gener 1 +problem 1 +solv 1 +code 1 +though 1 +want 1 +time 1 +algorithm 1 +even 1 +depend 1 +languag 1 +class 1 +follow 1 +mondai 1 +labyou 1 +statist 1 +contain 1 +hewlett 1 +packard 1 +run 1 +open 1 +seven 1 +dai 1 +week 1 +except 1 +certain 1 +holidai 1 +printer 1 +locat 1 +across 1 +hall 1 +print 1 +exce 1 +must 1 +contact 1 +either 1 +go 1 +offic 1 +hour 1 +prefer 1 +increas 1 +bewar 1 +aren 1 +configur 1 +correctli 1 +along 1 +wall 1 +closest 1 +outsidehallwai 1 +toward 1 +left 1 +hand 1 +part 1 +avoid 1 +dorm 1 +probabl 1 +purchas 1 +lahei 1 +person 1 +insid 1 +cover 1 +textbook 1 +work 1 +lab 1 +campu 1 +compil 1 +pleas 1 +first 1 +softwar 1 +includ 1 +netscap 1 +pointer 1 +interest 1 +jeff 1 +lampert 1 +depart 1 +start 1 +point 1 +internet 1 +explor 1 +lyco 1 +search 1 +world 1 +wide 1 +keyword 1 +dilbert 1 +comic 1 +relief 1 +long 1 +night 1 +assign 1 +copyright 1 +gareth 1 +wisc 1 +last 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..9b3a0060 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,52 @@ +homepag 2 +page 2 +wisc 2 +homepagec 1 +homepagewelcom 1 +purpos 1 +provid 1 +student 1 +inform 1 +pertain 1 +section 1 +sinc 1 +chang 1 +frequent 1 +responsibilityto 1 +check 1 +often 1 +gener 1 +informationinstructor 1 +todd 1 +munsonemail 1 +tmunson 1 +eduoffic 1 +comput 1 +scienc 1 +statisticsoffic 1 +phone 1 +offic 1 +hour 1 +appointmentsect 1 +textbook 1 +problem 1 +solv 1 +walter 1 +savitchclass 1 +informationexpectationssyllabusexam 1 +schedule 1 +mailgradingl 1 +assignmentsextra 1 +creditpoliciesconsult 1 +responsibilitiesacadem 1 +misconductoth 1 +informationdaili 1 +note 1 +assignmentshomework 1 +assignmentsprogram 1 +document 1 +us 1 +classoth 1 +program 1 +resourcesc 1 +homepagetmunson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..f4b1a069 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,64 @@ +inform 5 +section 4 +handout 4 +class 3 +cours 3 +todd 2 +hour 2 +avail 2 +program 2 +get 2 +start 2 +read 2 +gener 2 +text 2 +syllabu 2 +assign 2 +home 2 +page 2 +turnidg 2 +comput 1 +scienc 1 +time 1 +place 1 +nolandinstructor 1 +turnidgeoffic 1 +tbalab 1 +tbaannouncementsclass 1 +note 1 +struct 1 +us 1 +facil 1 +grade 1 +polici 1 +tent 1 +solut 1 +collect 1 +date 1 +mail 1 +list 1 +send 1 +messag 1 +classa 1 +whole 1 +muchinform 1 +interest 1 +includ 1 +tutor 1 +consult 1 +window 1 +oper 1 +system 1 +email 1 +netscap 1 +find 1 +provid 1 +byother 1 +instructor 1 +help 1 +exampl 1 +gregorysharp 1 +difficulti 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..ed2f1243 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,80 @@ +offic 6 +comput 5 +section 4 +program 4 +weaver 4 +scienc 4 +chri 3 +email 3 +polici 3 +statist 3 +hour 3 +exam 2 +wisc 2 +phone 2 +week 2 +noland 2 +text 2 +grade 2 +handout 2 +assign 2 +algebra 1 +languag 1 +announc 1 +thursdai 1 +psych 1 +reload 1 +page 1 +everi 1 +time 1 +login 1 +instructor 1 +appoint 1 +first 1 +grader 1 +zhang 1 +locat 1 +vectra 1 +seven 1 +dai 1 +staf 1 +consult 1 +gener 1 +cours 1 +info 1 +syllabu 1 +problem 1 +solv 1 +object 1 +walter 1 +savitch 1 +addison 1 +weslei 1 +isbn 1 +includ 1 +errata 1 +sourc 1 +code 1 +misconduct 1 +policyassign 1 +homework 1 +read 1 +lectur 1 +note 1 +exampl 1 +quiz 1 +kei 1 +late 1 +style 1 +guidelin 1 +still 1 +rough 1 +print 1 +paper 1 +statement 1 +depart 1 +univers 1 +wisconsin 1 +madison 1 +last 1 +chang 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..9c99955f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,84 @@ +homework 14 +offic 6 +part 6 +assign 6 +advanc 5 +tabl 5 +content 5 +read 5 +project 5 +solut 5 +lectur 4 +full 4 +paper 4 +doit 4 +pipelin 4 +cach 4 +network 4 +multiprocessor 4 +hour 3 +note 3 +exam 3 +memori 3 +spring 2 +comput 2 +architectur 2 +cours 2 +inform 2 +engin 2 +hall 2 +phone 2 +wisc 2 +final 2 +main 2 +disk 2 +arrai 2 +interconnect 2 +technolog 2 +midterm 2 +rout 2 +offer 1 +instructor 1 +prof 1 +jame 1 +smith 1 +tue 1 +thur 1 +email 1 +princeoffic 1 +mail 1 +address 1 +princ 1 +new 1 +miscellaneousnew 1 +soln 1 +special 1 +biochemistri 1 +pmread 1 +overview 1 +introduct 1 +perform 1 +cost 1 +instruct 1 +set 1 +vector 1 +vliw 1 +limit 1 +softwar 1 +system 1 +trace 1 +list 1 +miscellan 1 +us 1 +tool 1 +review 1 +specmark 1 +consid 1 +harm 1 +analysi 1 +clock 1 +detail 1 +design 1 +reserv 1 +station 1 +summari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..9ada7fa2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,244 @@ +surfac 13 +homework 12 +cours 9 +graphic 8 +cornel 8 +comput 7 +project 6 +object 6 +math 5 +field 5 +univers 5 +scientif 4 +visual 4 +schedul 4 +anim 4 +color 4 +upson 4 +group 3 +theori 3 +center 3 +topic 3 +model 3 +transform 3 +volum 3 +data 3 +march 3 +prelim 3 +final 3 +page 2 +atth 2 +alan 2 +watt 2 +also 2 +content 2 +summari 2 +administrivia 2 +textbook 2 +error 2 +staff 2 +mathemat 2 +program 2 +skill 2 +polygon 2 +list 2 +parametr 2 +oper 2 +quadric 2 +hierarch 2 +geometr 2 +kinemat 2 +dynam 2 +system 2 +view 2 +camera 2 +clip 2 +vision 2 +render 2 +shade 2 +light 2 +human 2 +space 2 +method 2 +textur 2 +map 2 +dimension 2 +scalar 2 +vector 2 +line 2 +april 2 +test 2 +spring 2 +religi 2 +holidai 2 +student 2 +exam 2 +last 2 +bruce 2 +land 2 +huang 2 +comment 2 +main 1 +pagecomput 1 +scienc 1 +evolv 1 +incomplet 1 +hopefulli 1 +us 1 +begin 1 +home 1 +forc 1 +deal 1 +level 1 +principl 1 +practic 1 +folei 1 +computergraph 1 +current 1 +taught 1 +bruceland 1 +leader 1 +relev 1 +requir 1 +artist 1 +among 1 +other 1 +fundament 1 +focus 1 +associ 1 +designedto 1 +help 1 +illumin 1 +cover 1 +follow 1 +year 1 +construct 1 +explicit 1 +figur 1 +rotat 1 +swept 1 +tensor 1 +product 1 +viewer 1 +implicit 1 +blobbi 1 +tessel 1 +normal 1 +simpl 1 +form 1 +complex 1 +scene 1 +composit 1 +introduct 1 +homogen 1 +coordin 1 +build 1 +combin 1 +prototyp 1 +mimic 1 +connect 1 +rigid 1 +part 1 +invers 1 +differenti 1 +equat 1 +cellular 1 +automata 1 +onto 1 +screen 1 +parallel 1 +perspect 1 +simul 1 +stereo 1 +devic 1 +limit 1 +optic 1 +wave 1 +gourand 1 +phong 1 +hidden 1 +remov 1 +buffer 1 +transpar 1 +shadow 1 +scan 1 +convers 1 +anti 1 +alias 1 +pixel 1 +vernier 1 +hyper 1 +acuiti 1 +imag 1 +properti 1 +modif 1 +bump 1 +aspect 1 +wall 1 +channel 1 +contour 1 +mispercept 1 +difficulti 1 +arrow 1 +particl 1 +advect 1 +multiparamet 1 +high 1 +assign 1 +first 1 +serv 1 +gener 1 +guid 1 +style 1 +break 1 +educationlaw 1 +mandat 1 +faculti 1 +make 1 +avail 1 +opportun 1 +tomak 1 +examin 1 +miss 1 +belief 1 +inord 1 +facilit 1 +prepar 1 +makeup 1 +intendingto 1 +absent 1 +order 1 +observ 1 +requestedto 1 +notifi 1 +instructor 1 +lectur 1 +period 1 +tuesdai 1 +mean 1 +standard 1 +deviat 1 +rhode 1 +jing 1 +justin 1 +mccune 1 +jmccune 1 +csrelev 1 +california 1 +davi 1 +waterloo 1 +wale 1 +colleg 1 +cardiff 1 +manchest 1 +oregon 1 +state 1 +universityrel 1 +onlin 1 +document 1 +welcom 1 +sent 1 +todoc 1 +modifi 1 +copyright 1 +statement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..9c46c2a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,96 @@ +cornel 4 +visual 4 +exercis 4 +graphic 3 +current 3 +project 3 +theori 3 +center 3 +comput 2 +laboratori 2 +page 2 +materi 2 +procedur 2 +student 2 +computergraph 2 +scientif 2 +surfac 2 +transform 2 +model 2 +camera 2 +textur 2 +anim 2 +mark 2 +relat 2 +topic 2 +comment 2 +main 1 +pagecomput 1 +scienc 1 +exercisesthi 1 +site 1 +universityundergradu 1 +contain 1 +includ 1 +softwar 1 +result 1 +section 1 +deal 1 +level 1 +principl 1 +practic 1 +folei 1 +dcomput 1 +watt 1 +taught 1 +bruceland 1 +also 1 +leader 1 +group 1 +atth 1 +first 1 +place 1 +sigucc 1 +basededuc 1 +train 1 +competit 1 +get 1 +start 1 +build 1 +polygon 1 +object 1 +parametr 1 +us 1 +virtual 1 +perspect 1 +light 1 +bump 1 +map 1 +design 1 +physic 1 +base 1 +implicit 1 +done 1 +order 1 +note 1 +areinclud 1 +refer 1 +chat 1 +facil 1 +commun 1 +aboutc 1 +spring 1 +semesteraccess 1 +restrict 1 +enrol 1 +final 1 +onlin 1 +document 1 +welcom 1 +sent 1 +todoc 1 +last 1 +modifi 1 +land 1 +copyright 1 +statement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..f12b0d0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,65 @@ +topic 4 +fall 2 +group 2 +student 2 +model 2 +imag 2 +system 2 +us 2 +cornel 1 +comput 1 +graphic 1 +semestereach 1 +chose 1 +current 1 +research 1 +computergraph 1 +read 1 +appropri 1 +paper 1 +implement 1 +code 1 +deliv 1 +lectur 1 +chosen 1 +anddocu 1 +work 1 +document 1 +metabal 1 +window 1 +michael 1 +arcuri 1 +alex 1 +benton 1 +human 1 +facial 1 +express 1 +huang 1 +hung 1 +content 1 +base 1 +retriev 1 +interior 1 +design 1 +sean 1 +landi 1 +interdepend 1 +particl 1 +justin 1 +mccune 1 +visual 1 +diffus 1 +distribut 1 +pollut 1 +spatial 1 +explicit 1 +landscap 1 +modelsfu 1 +tsai 1 +antialias 1 +video 1 +stochast 1 +sampl 1 +arun 1 +vermach 1 +hsun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..1d1aab6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,65 @@ +sandia 2 +livermor 2 +thesi 2 +vavasi 2 +research 2 +scientif 2 +comput 2 +least 2 +squar 2 +patti 1 +houghpatti 1 +hough 1 +nation 1 +laboratori 1 +cornel 1 +student 1 +center 1 +appli 1 +mathemat 1 +whichi 1 +hous 1 +frankh 1 +rhode 1 +hall 1 +cornellunivers 1 +advisor 1 +steve 1 +member 1 +committe 1 +nicktrefethen 1 +schatz 1 +interest 1 +fall 1 +area 1 +numer 1 +linearalgebra 1 +optimizationi 1 +current 1 +work 1 +postdoc 1 +juan 1 +meza 1 +depart 1 +nationallaboratori 1 +resum 1 +statement 1 +goal 1 +tech 1 +report 1 +complet 1 +orthogon 1 +decomposit 1 +weight 1 +appear 1 +siam 1 +matrix 1 +anal 1 +stabl 1 +effici 1 +solut 1 +ofweight 1 +problem 1 +applic 1 +interior 1 +pointmethod 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..5dd7866d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,85 @@ +postscript 6 +trefethen 3 +research 3 +page 3 +fluid 3 +baggett 2 +mathemat 2 +cornel 2 +univers 2 +year 2 +thesi 2 +dynam 2 +like 2 +interest 2 +model 2 +transit 2 +turbul 2 +physic 2 +submit 2 +jeff 1 +baggettjeff 1 +center 1 +appli 1 +frank 1 +rhode 1 +hall 1 +ithaca 1 +sixth 1 +graduat 1 +student 1 +depart 1 +atcornel 1 +supervis 1 +nick 1 +expect 1 +finish 1 +titl 1 +normal 1 +applic 1 +hydrodynam 1 +stabil 1 +summer 1 +would 1 +continu 1 +seek 1 +posit 1 +detail 1 +outlin 1 +curriculum 1 +vita 1 +activ 1 +background 1 +unusu 1 +blend 1 +scientif 1 +comput 1 +system 1 +mechan 1 +propos 1 +work 1 +iwould 1 +next 1 +coupl 1 +paper 1 +mostli 1 +linear 1 +driscol 1 +april 1 +exponenti 1 +type 1 +versu 1 +spectral 1 +abscissa 1 +hill 1 +andphillip 1 +exampl 1 +integr 1 +equat 1 +oper 1 +theori 1 +dimension 1 +subcrit 1 +misc 1 +link 1 +satish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..39e79652 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,71 @@ +ncstrl 7 +report 5 +search 5 +comput 4 +scienc 4 +technic 3 +collect 3 +network 2 +field 2 +sever 2 +institut 2 +word 2 +list 2 +document 2 +brows 2 +cornel 2 +librari 1 +libraryncstrl 1 +pronounc 1 +ancestr 1 +internationalcollect 1 +departmentsand 1 +industri 1 +govern 1 +research 1 +laboratori 1 +made 1 +availablefor 1 +commerci 1 +eduat 1 +ncstrlcollect 1 +distribut 1 +among 1 +interoper 1 +serversoper 1 +participatinginstitut 1 +read 1 +offici 1 +ncstrlpress 1 +packag 1 +descript 1 +background 1 +goal 1 +andorgan 1 +form 1 +allow 1 +perform 1 +bibliograph 1 +data 1 +limit 1 +specif 1 +enter 1 +whose 1 +author 1 +titl 1 +abstract 1 +contain 1 +theparticip 1 +want 1 +join 1 +tell 1 +moreread 1 +forinstitut 1 +interest 1 +particip 1 +informationfind 1 +snew 1 +relat 1 +send 1 +email 1 +totech 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..cef0a349 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu @@ -0,0 +1,33 @@ +design 2 +research 2 +institut 2 +technic 2 +instituteabout 1 +researchersat 1 +brows 1 +searchal 1 +public 1 +file 1 +reportssearch 1 +report 1 +ipic 1 +home 1 +page 1 +intern 1 +work 1 +confer 1 +integr 1 +enterpris 1 +informationand 1 +process 1 +anoth 1 +site 1 +inform 1 +itisingapor 1 +altavistaforum 1 +send 1 +question 1 +comment 1 +server 1 +mike 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..d6ccd794 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,177 @@ +us 6 +question 4 +resourc 4 +design 3 +gener 3 +interest 3 +access 3 +work 3 +project 3 +comput 3 +technic 3 +knowledg 3 +also 3 +cours 3 +student 3 +whether 3 +davi 2 +cornel 2 +system 2 +futur 2 +increas 2 +structur 2 +inform 2 +report 2 +corpor 2 +group 2 +memori 2 +mean 2 +institut 2 +product 2 +document 2 +share 2 +staff 2 +answer 2 +remot 2 +machin 2 +seem 2 +research 1 +institutejim 1 +davisxerox 1 +corporationphd 1 +media 1 +edumi 1 +goal 1 +build 1 +softwar 1 +improvecommun 1 +among 1 +peopl 1 +believ 1 +commun 1 +medium 1 +ofth 1 +understand 1 +andcont 1 +messag 1 +transmit 1 +manipul 1 +reformat 1 +even 1 +content 1 +inhypertext 1 +network 1 +collabor 1 +thecstr 1 +anarpa 1 +sponsor 1 +effort 1 +make 1 +scienc 1 +moreeasili 1 +part 1 +distribut 1 +server 1 +whichi 1 +run 1 +mani 1 +univers 1 +electronicsystem 1 +captur 1 +produc 1 +worker 1 +order 1 +qualiti 1 +ofor 1 +reduc 1 +time 1 +requir 1 +memoryinclud 1 +intellectu 1 +engin 1 +lawyer 1 +contract 1 +author 1 +sscreenplai 1 +process 1 +producedth 1 +dead 1 +end 1 +explor 1 +tool 1 +andjustif 1 +support 1 +final 1 +decis 1 +begun 1 +huttenloch 1 +developingcorpor 1 +sharedannot 1 +investig 1 +howpeopl 1 +read 1 +write 1 +annot 1 +inelectron 1 +initi 1 +prototypeimplement 1 +class 1 +shareddocu 1 +problem 1 +set 1 +note 1 +nnotat 1 +might 1 +berequest 1 +clarif 1 +orcorrect 1 +made 1 +aus 1 +obtain 1 +willfind 1 +sourc 1 +learn 1 +whetherstud 1 +often 1 +abl 1 +correctli 1 +find 1 +usefulmean 1 +feedback 1 +improv 1 +evid 1 +isthat 1 +natur 1 +languag 1 +designof 1 +proxi 1 +agent 1 +safe 1 +reliablycarri 1 +foreign 1 +without 1 +risk 1 +toeither 1 +owner 1 +alsopap 1 +onlin 1 +copi 1 +publicatiion 1 +list 1 +especi 1 +thedrimi 1 +collect 1 +meprofession 1 +historythi 1 +narr 1 +resum 1 +contact 1 +improvisationi 1 +sport 1 +resumeno 1 +market 1 +thank 1 +ask 1 +likeit 1 +fine 1 +xerox 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..a107c807 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,28 @@ +search 3 +metacrawl 2 +erik 2 +selberg 2 +greg 2 +oren 2 +home 2 +searchingmetacrawlerbi 1 +lauckhartand 1 +prof 1 +etzioniif 1 +person 1 +page 1 +ahoi 1 +phrase 1 +word 1 +wordssort 1 +result 1 +relev 1 +locationcontrol 1 +java 1 +configur 1 +problemswebmast 1 +comcopyright 1 +etzioni 1 +lauckhart 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..dbdd16ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,99 @@ +robot 13 +madison 5 +sensor 4 +plan 4 +research 3 +base 3 +motion 3 +ieee 3 +univers 2 +page 2 +engin 2 +relat 2 +project 2 +kinemat 2 +group 2 +system 2 +comput 2 +link 2 +wisconsin 1 +home 1 +mechan 1 +avenuemadison 1 +vladimir 1 +lumelski 1 +director 1 +brief 1 +review 1 +activ 1 +peopl 1 +technic 1 +report 1 +compress 1 +postscript 1 +avail 1 +simul 1 +librari 1 +cours 1 +seminar 1 +recent 1 +select 1 +public 1 +abstract 1 +maze 1 +search 1 +algorithm 1 +effect 1 +dynam 1 +control 1 +jogger 1 +model 1 +sens 1 +decentr 1 +intellig 1 +special 1 +topic 1 +tether 1 +underwat 1 +redund 1 +sensit 1 +skin 1 +human 1 +center 1 +geometri 1 +neil 1 +duffi 1 +manufactur 1 +chuck 1 +dyer 1 +vision 1 +robert 1 +lorenz 1 +actuat 1 +jude 1 +shavlik 1 +machin 1 +learn 1 +societi 1 +autom 1 +tech 1 +committe 1 +path 1 +internet 1 +resourc 1 +server 1 +nasa 1 +telerobot 1 +program 1 +frequent 1 +ask 1 +question 1 +list 1 +local 1 +dept 1 +colleg 1 +comment 1 +suggest 1 +errata 1 +hert 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..f7eb2d64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,40 @@ +engin 5 +comput 5 +robot 4 +dept 4 +ieee 3 +vladimir 2 +home 2 +page 2 +scienc 2 +link 2 +mathemat 2 +lumelski 1 +lumelskyprofessormechan 1 +underwat 1 +kinemat 1 +redund 1 +sensit 1 +skin 1 +project 1 +human 1 +center 1 +system 1 +geometri 1 +global 1 +societi 1 +autom 1 +tech 1 +committe 1 +motion 1 +path 1 +plan 1 +wisconsin 1 +colleg 1 +mechan 1 +electr 1 +graduat 1 +program 1 +mace 1 +grant 1 +institut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..74e4bea3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,196 @@ +system 10 +vogel 8 +high 8 +werner 7 +commun 6 +network 6 +distribut 5 +speed 4 +proceed 4 +comput 3 +cornel 3 +protocol 3 +design 3 +cluster 3 +horu 3 +need 3 +mechan 3 +level 3 +messag 3 +failur 3 +group 3 +robbert 3 +reness 3 +eicken 3 +noth 2 +left 2 +take 2 +project 2 +think 2 +latenc 2 +support 2 +highli 2 +real 2 +time 2 +issu 2 +environ 2 +oper 2 +guarante 2 +applic 2 +abl 2 +detect 2 +birman 2 +appear 2 +sigop 2 +workshopconnamoran 2 +ierland 2 +septemb 2 +thorsten 2 +perform 2 +researchera 1 +upson 1 +halldept 1 +sciencecornel 1 +univers 1 +ithaca 1 +phone 1 +email 1 +perfect 1 +reach 1 +awai 1 +involv 1 +major 1 +scienc 1 +depart 1 +thehorusand 1 +myresearch 1 +interest 1 +best 1 +describ 1 +bandwith 1 +reliabl 1 +requir 1 +focu 1 +engin 1 +thing 1 +amwork 1 +predict 1 +execut 1 +integr 1 +horuswith 1 +lead 1 +situat 1 +reason 1 +advanc 1 +effici 1 +data 1 +transfer 1 +devic 1 +bandwidth 1 +small 1 +fallen 1 +behind 1 +softwar 1 +adapt 1 +achiev 1 +desir 1 +latencyfor 1 +pass 1 +protocolsar 1 +exploit 1 +structureand 1 +interact 1 +pattern 1 +method 1 +deal 1 +miss 1 +deadlin 1 +meet 1 +guarant 1 +gave 1 +tell 1 +anyon 1 +acur 1 +want 1 +aglob 1 +scope 1 +find 1 +gener 1 +supportfailur 1 +suspis 1 +manag 1 +process 1 +node 1 +experi 1 +extract 1 +workwith 1 +middlewar 1 +packag 1 +regardless 1 +function 1 +brainchild 1 +andken 1 +work 1 +done 1 +cooper 1 +withthorsten 1 +multimedia 1 +video 1 +demand 1 +horusexperi 1 +concert 1 +brian 1 +smith 1 +respons 1 +practicum 1 +teach 1 +number 1 +lectureson 1 +technolog 1 +practic 1 +recent 1 +public 1 +world 1 +wide 1 +structur 1 +virtual 1 +synchroni 1 +explor 1 +bound 1 +virtuallysynchron 1 +katherin 1 +user 1 +interfacefor 1 +parallel 1 +anindya 1 +basu 1 +vineet 1 +buch 1 +symposium 1 +princpl 1 +copper 1 +mountain 1 +decemb 1 +deliv 1 +third 1 +ieee 1 +workshop 1 +architectur 1 +implementationof 1 +subsystem 1 +hpc 1 +august 1 +flexibl 1 +kenneth 1 +brad 1 +glade 1 +kati 1 +mark 1 +hayden 1 +takako 1 +hickei 1 +dalia 1 +malki 1 +alex 1 +vaysburd 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..86ec5936 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,265 @@ +horu 15 +system 9 +osiri 6 +project 5 +develop 5 +distribut 4 +isi 4 +commun 4 +applic 3 +egyptian 3 +softwar 3 +ensembl 3 +compani 2 +god 2 +rejoic 2 +whose 2 +groupcommun 2 +requir 2 +life 2 +oppos 2 +death 2 +framework 2 +comput 2 +toler 2 +data 2 +replic 2 +design 2 +effort 2 +gener 2 +purpos 2 +us 2 +version 2 +activ 2 +environ 2 +high 2 +network 2 +found 2 +dead 2 +bodi 2 +made 2 +introduct 2 +kenneth 2 +birman 2 +robbert 2 +reness 2 +projectth 1 +ofth 1 +come 1 +heart 1 +firm 1 +triumphant 1 +ofisi 1 +heir 1 +modular 1 +extens 1 +process 1 +address 1 +wide 1 +varieti 1 +robust 1 +attribut 1 +appealedstrongli 1 +egypt 1 +becausein 1 +everi 1 +woman 1 +type 1 +wish 1 +possess 1 +renew 1 +movementa 1 +inact 1 +provid 1 +applicationsbas 1 +group 1 +style 1 +aris 1 +infault 1 +manag 1 +thatexploit 1 +coher 1 +cach 1 +groupwar 1 +within 1 +theoveral 1 +larg 1 +collect 1 +applicationprotocol 1 +allow 1 +toconstruct 1 +modul 1 +exactli 1 +meet 1 +applicationrequir 1 +minim 1 +cost 1 +origin 1 +launch 1 +redesign 1 +theisi 1 +evolv 1 +communicationarchitectur 1 +advanc 1 +support 1 +robustdistribut 1 +set 1 +unsuit 1 +asappl 1 +special 1 +secur 1 +real 1 +time 1 +besidesth 1 +practic 1 +contribut 1 +toward 1 +theori 1 +virtual 1 +synchroni 1 +runtim 1 +model 1 +usedfor 1 +implement 1 +fault 1 +sametim 1 +much 1 +faster 1 +lighter 1 +weight 1 +exist 1 +initi 1 +code 1 +beus 1 +research 1 +restrict 1 +commericalright 1 +call 1 +written 1 +usabl 1 +manyoth 1 +languag 1 +avail 1 +class 1 +user 1 +nofe 1 +seri 1 +ofreleas 1 +fall 1 +spring 1 +earli 1 +ensemblewil 1 +outstand 1 +build 1 +java 1 +base 1 +groupwareappl 1 +multimedia 1 +conferenc 1 +platform 1 +independ 1 +areavail 1 +differentclass 1 +workstat 1 +person 1 +parallel 1 +processor 1 +onnext 1 +cluster 1 +standard 1 +speedcommun 1 +collabor 1 +close 1 +mani 1 +systemsproject 1 +includ 1 +transi 1 +navtech 1 +stormcast 1 +tacomaproject 1 +link 1 +elsewher 1 +thesepag 1 +circumst 1 +begotten 1 +gain 1 +good 1 +idea 1 +froma 1 +sorrow 1 +mother 1 +herhusband 1 +describ 1 +goddess 1 +greatli 1 +distress 1 +equippedwith 1 +mighti 1 +word 1 +power 1 +knew 1 +utter 1 +mighthav 1 +greatest 1 +effect 1 +search 1 +never 1 +rest 1 +hair 1 +light 1 +wing 1 +stir 1 +lament 1 +brother 1 +length 1 +brought 1 +state 1 +unit 1 +thu 1 +becam 1 +child 1 +born 1 +secret 1 +place 1 +suckl 1 +rear 1 +horusvisit 1 +papersand 1 +abstractpag 1 +overview 1 +public 1 +report 1 +relatedto 1 +follow 1 +recent 1 +articl 1 +present 1 +level 1 +reliabl 1 +scientif 1 +american 1 +silvano 1 +maffei 1 +flexiblegroup 1 +april 1 +final 1 +sentenc 1 +hyme 1 +osirisfrom 1 +papyru 1 +better 1 +know 1 +book 1 +walli 1 +budg 1 +studiesin 1 +mytholog 1 +volum 1 +page 1 +open 1 +court 1 +publish 1 +london 1 +comment 1 +werner 1 +vogel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..ee6a82f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,87 @@ +depart 10 +cornel 6 +report 5 +research 4 +page 4 +gener 3 +faculti 3 +annual 3 +home 3 +comput 2 +scienc 2 +info 2 +project 2 +know 2 +inform 2 +also 2 +find 2 +check 2 +go 2 +cours 2 +peopl 2 +server 2 +academ 1 +tech 1 +welcom 1 +universitydepart 1 +site 1 +feel 1 +free 1 +brows 1 +around 1 +infoget 1 +locat 1 +size 1 +contactswithin 1 +standard 1 +disclaim 1 +facultyfind 1 +list 1 +offici 1 +ortheir 1 +person 1 +researchcheck 1 +aboutour 1 +collabor 1 +publicationsfind 1 +link 1 +public 1 +researcherseith 1 +technic 1 +projector 1 +degreeslook 1 +degre 1 +program 1 +doctor 1 +master 1 +engin 1 +orundergradu 1 +academicsrefer 1 +taught 1 +webfor 1 +semest 1 +read 1 +generalcoursedescript 1 +appear 1 +studi 1 +peopleget 1 +outstand 1 +keep 1 +includ 1 +staff 1 +student 1 +directorylist 1 +activitiesfind 1 +activ 1 +theassoci 1 +undergradu 1 +excellenthockei 1 +team 1 +serverscheck 1 +gopherserv 1 +anonym 1 +ftpserver 1 +sitesquest 1 +comment 1 +informationpres 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..70a0a046 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,242 @@ +text 26 +retriev 17 +inform 11 +automat 7 +hypertext 6 +univers 5 +research 5 +larg 5 +us 5 +analysi 5 +system 5 +cornel 4 +process 4 +nation 4 +novemb 4 +allan 4 +develop 3 +avail 3 +document 3 +search 3 +gener 3 +purpos 3 +base 3 +databas 3 +link 3 +encyclopedia 3 +full 3 +smart 3 +librari 3 +ohio 3 +util 3 +comput 3 +proceed 3 +annual 3 +rapidli 2 +exist 2 +high 2 +sophist 2 +user 2 +queri 2 +file 2 +oper 2 +subject 2 +corpu 2 +determin 2 +similar 2 +main 2 +structur 2 +collect 2 +capabl 2 +consist 2 +differ 2 +version 2 +sigir 2 +confer 2 +germani 2 +genom 2 +medicin 2 +bethesda 2 +maryland 2 +june 2 +columbu 2 +octob 2 +seattl 2 +washington 2 +associ 2 +machineri 2 +york 2 +bucklei 2 +nevada 2 +home 2 +denis 2 +gerard 1 +saltongerard 1 +saltonprofessorg 1 +eduph 1 +harvard 1 +natur 1 +languag 1 +expand 1 +field 1 +mass 1 +machin 1 +readabl 1 +cheapli 1 +store 1 +densiti 1 +optic 1 +storag 1 +media 1 +demand 1 +furthermor 1 +method 1 +analyz 1 +formul 1 +appropri 1 +conduct 1 +rapid 1 +rank 1 +item 1 +decreas 1 +order 1 +import 1 +design 1 +environ 1 +handl 1 +without 1 +restrict 1 +size 1 +matter 1 +absenc 1 +knowledg 1 +would 1 +unrestrict 1 +mean 1 +word 1 +express 1 +refin 1 +context 1 +statist 1 +probabilist 1 +criteria 1 +approach 1 +abl 1 +degre 1 +accuraci 1 +applic 1 +semant 1 +piec 1 +represent 1 +provid 1 +flexibl 1 +brows 1 +access 1 +interest 1 +excerpt 1 +respons 1 +done 1 +extens 1 +work 1 +autom 1 +articl 1 +funk 1 +wagnal 1 +addit 1 +also 1 +trec 1 +cover 1 +number 1 +area 1 +gigabyt 1 +servic 1 +well 1 +relat 1 +section 1 +paragraph 1 +sentenc 1 +test 1 +vehicl 1 +continu 1 +current 1 +unix 1 +sparc 1 +station 1 +termin 1 +equip 1 +activitiesmemb 1 +engin 1 +colleg 1 +committeeprofession 1 +activitiesassoci 1 +editor 1 +transact 1 +systemsprogram 1 +committe 1 +seventeenth 1 +dublin 1 +ireland 1 +electron 1 +publish 1 +darmstadt 1 +multimedia 1 +hypermedia 1 +virtual 1 +realiti 1 +moscow 1 +septemb 1 +lecturesautomat 1 +construct 1 +feder 1 +institut 1 +technolog 1 +zurich 1 +switzerland 1 +progress 1 +konstanz 1 +asi 1 +meet 1 +scienc 1 +colloquium 1 +state 1 +lectur 1 +cours 1 +microsoft 1 +corpor 1 +workshop 1 +publicationsapproach 1 +passag 1 +select 1 +travers 1 +commun 1 +februari 1 +vector 1 +model 1 +third 1 +symposium 1 +vega 1 +april 1 +softwareth 1 +made 1 +free 1 +charg 1 +sever 1 +hundr 1 +copi 1 +distribut 1 +around 1 +world 1 +return 1 +list 1 +faculti 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..af2e0387 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,87 @@ +minim 6 +problem 5 +trust 5 +region 5 +method 5 +optim 4 +cornel 3 +gener 3 +interest 3 +scientif 3 +comput 3 +engin 3 +nonlinearli 3 +constrain 3 +nonlinear 3 +research 2 +techniqu 2 +solv 2 +includ 2 +converg 2 +us 2 +subject 2 +bound 2 +confer 2 +chines 2 +young 2 +scientist 2 +home 2 +denis 2 +yui 1 +liyui 1 +liresearch 1 +associateyui 1 +eduph 1 +univers 1 +waterloo 1 +numer 1 +also 1 +appli 1 +real 1 +world 1 +current 1 +theori 1 +unconstrain 1 +particular 1 +exist 1 +accept 1 +condit 1 +investig 1 +affin 1 +scale 1 +function 1 +analysi 1 +exact 1 +penalti 1 +approach 1 +applic 1 +consid 1 +imag 1 +enhanc 1 +lecturesan 1 +interior 1 +beij 1 +china 1 +august 1 +publicationsa 1 +global 1 +siam 1 +journal 1 +center 1 +reflect 1 +proceed 1 +return 1 +list 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..f7741e80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,178 @@ +comput 7 +mathemat 6 +depart 5 +object 5 +york 5 +us 4 +architectur 4 +system 4 +algebra 4 +polynomi 4 +present 4 +israel 4 +research 3 +cornel 3 +symbol 3 +scientif 3 +toolkit 3 +engin 3 +program 3 +workshop 3 +algorithm 3 +intern 3 +american 3 +societi 3 +region 3 +meet 3 +syracus 3 +decemb 3 +dawson 3 +dean 3 +haifa 3 +januari 3 +focus 2 +softwar 2 +techniqu 2 +gener 2 +fluid 2 +dynam 2 +differenti 2 +equat 2 +studi 2 +code 2 +weyl 2 +function 2 +languag 2 +provid 2 +number 2 +journal 2 +symposium 2 +septemb 2 +dexter 2 +kozen 2 +susan 2 +landau 2 +vista 2 +microstorag 2 +orient 2 +joint 2 +electr 2 +technion 2 +scienc 2 +ronitt 2 +rubinfeld 2 +page 2 +databas 2 +home 2 +denis 2 +richard 1 +zippelrichard 1 +zippelsenior 1 +associaterz 1 +eduph 1 +modern 1 +autom 1 +current 1 +work 1 +allow 1 +scientist 1 +specifi 1 +perform 1 +numer 1 +machin 1 +calcul 1 +done 1 +convert 1 +suppli 1 +method 1 +special 1 +colleagu 1 +mechan 1 +aerospac 1 +aris 1 +boundari 1 +layer 1 +compon 1 +substrat 1 +call 1 +extend 1 +data 1 +structur 1 +avail 1 +common 1 +lisp 1 +includ 1 +like 1 +matric 1 +ration 1 +ring 1 +vector 1 +space 1 +ideal 1 +introduct 1 +challeng 1 +type 1 +opportun 1 +deduct 1 +reason 1 +pursu 1 +profession 1 +activitieseditori 1 +board 1 +transact 1 +softwareprogram 1 +committe 1 +principl 1 +practic 1 +constraint 1 +refere 1 +review 1 +error 1 +correct 1 +computationlecturesalgebra 1 +decomposit 1 +effect 1 +irreduc 1 +test 1 +oper 1 +durham 1 +north 1 +carolina 1 +center 1 +synthes 1 +weizmann 1 +institut 1 +rehovot 1 +suni 1 +albani 1 +april 1 +modular 1 +interpol 1 +factor 1 +multivari 1 +theori 1 +ithaca 1 +publicationseffect 1 +kluwer 1 +academ 1 +publish 1 +boston 1 +massachusett 1 +june 1 +implement 1 +file 1 +proceed 1 +return 1 +list 1 +annual 1 +report 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..40fb40e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,217 @@ +comput 10 +horu 10 +distribut 9 +system 9 +develop 6 +group 5 +work 5 +isi 5 +us 4 +program 4 +commun 4 +secur 4 +fault 3 +toler 3 +model 3 +virtual 3 +process 3 +effort 3 +side 3 +softwar 3 +tool 3 +also 3 +depart 3 +committe 3 +member 3 +cornel 3 +reliabl 3 +california 2 +problem 2 +reconfigur 2 +practic 2 +toolkit 2 +support 2 +synchroni 2 +real 2 +time 2 +featur 2 +extens 2 +layer 2 +avail 2 +technolog 2 +techniqu 2 +parallel 2 +graduat 2 +student 2 +privaci 2 +high 2 +prove 2 +implement 2 +studi 2 +notabl 2 +friedman 2 +collabor 2 +head 2 +reness 2 +engin 2 +scienc 2 +chief 2 +consist 2 +ieee 2 +home 2 +denis 2 +kenneth 1 +birmankenneth 1 +birmanprofessorphd 1 +univ 1 +berkelei 1 +research 1 +concern 1 +oper 1 +focu 1 +base 1 +upon 1 +synchron 1 +solv 1 +manag 1 +replic 1 +data 1 +coordin 1 +action 1 +set 1 +perform 1 +dynam 1 +done 1 +provid 1 +although 1 +limit 1 +certain 1 +class 1 +reason 1 +benign 1 +failur 1 +theoret 1 +start 1 +call 1 +becam 1 +wide 1 +popular 1 +intend 1 +flexibl 1 +address 1 +issu 1 +import 1 +permit 1 +special 1 +purpos 1 +basic 1 +idea 1 +user 1 +actual 1 +broad 1 +collect 1 +option 1 +seek 1 +leverag 1 +emerg 1 +network 1 +activ 1 +messag 1 +origin 1 +supercomput 1 +embodi 1 +advanc 1 +mike 1 +reiter 1 +david 1 +cooper 1 +unusu 1 +combin 1 +singl 1 +packag 1 +fundament 1 +look 1 +specifi 1 +properti 1 +structur 1 +languag 1 +execut 1 +refer 1 +major 1 +goal 1 +constabl 1 +nuprl 1 +latter 1 +correctli 1 +former 1 +ad 1 +guarante 1 +mark 1 +hayden 1 +probabilist 1 +broadcast 1 +primit 1 +much 1 +architectur 1 +robbert 1 +werner 1 +vogel 1 +aspect 1 +includ 1 +object 1 +orient 1 +multimedia 1 +applic 1 +speed 1 +protocol 1 +exploit 1 +within 1 +thorsten 1 +eicken 1 +brian 1 +smith 1 +univers 1 +activitieschair 1 +polici 1 +act 1 +master 1 +faculti 1 +recruit 1 +academ 1 +leadership 1 +profession 1 +activitieseditor 1 +transact 1 +scientist 1 +isat 1 +robust 1 +critic 1 +element 1 +nation 1 +inform 1 +infrastructur 1 +publicationsth 1 +approach 1 +decemb 1 +integr 1 +runtim 1 +journal 1 +birman 1 +societi 1 +press 1 +alamito 1 +glade 1 +return 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..a5cb66db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,150 @@ +graphic 10 +comput 9 +scienc 7 +model 7 +univers 6 +technolog 5 +research 5 +scientif 4 +surfac 4 +imag 4 +center 3 +visual 3 +cornel 3 +displai 3 +techniqu 3 +nation 3 +director 2 +involv 2 +develop 2 +input 2 +method 2 +topic 2 +algorithm 2 +alias 2 +complex 2 +environ 2 +realist 2 +gener 2 +conduct 2 +reflect 2 +strategi 2 +medic 2 +program 2 +foundat 2 +institut 2 +engin 2 +fellow 2 +home 2 +denis 2 +donald 1 +greenbergdonald 1 +greenberg 1 +jacob 1 +gould 1 +schurman 1 +prof 1 +past 1 +decad 1 +activ 1 +wide 1 +rang 1 +number 1 +implement 1 +progress 1 +made 1 +larg 1 +varieti 1 +routin 1 +previous 1 +investig 1 +includ 1 +polygon 1 +clip 1 +hidden 1 +textur 1 +spatial 1 +tempor 1 +problem 1 +geometr 1 +parametr 1 +descript 1 +color 1 +current 1 +focu 1 +three 1 +dimension 1 +synthesi 1 +modular 1 +testb 1 +suffici 1 +flexibl 1 +evalu 1 +differ 1 +creat 1 +laboratori 1 +light 1 +determin 1 +interact 1 +improv 1 +effici 1 +trace 1 +parallel 1 +process 1 +perceptu 1 +studi 1 +micro 1 +geometri 1 +motion 1 +control 1 +dynam 1 +constraint 1 +anti 1 +host 1 +relat 1 +applic 1 +start 1 +volum 1 +render 1 +digit 1 +photographi 1 +anim 1 +tool 1 +well 1 +core 1 +multi 1 +media 1 +within 1 +facil 1 +member 1 +particip 1 +brown 1 +california 1 +north 1 +carolina 1 +chapel 1 +hill 1 +utah 1 +activitiesdirector 1 +visualizationprofession 1 +activitieseditori 1 +board 1 +journal 1 +academi 1 +found 1 +american 1 +biolog 1 +acmreturn 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..fa72849d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,200 @@ +comput 31 +scienc 20 +complex 8 +univers 8 +journal 8 +engin 5 +academi 5 +board 5 +problem 4 +resourc 4 +class 4 +council 4 +fellow 4 +lectur 4 +committe 4 +foundat 4 +research 3 +solv 3 +intern 3 +time 3 +member 3 +nation 3 +system 3 +eatc 3 +cornel 3 +technolog 2 +studi 2 +need 2 +variou 2 +random 2 +ture 2 +award 2 +american 2 +associ 2 +springer 2 +verlag 2 +advisori 2 +theoret 2 +chicago 2 +electron 2 +novemb 2 +natur 2 +distinguish 2 +seri 2 +februari 2 +home 2 +denis 2 +juri 1 +hartmanisjuri 1 +hartmani 1 +walter 1 +read 1 +professor 1 +california 1 +institut 1 +strateg 1 +goal 1 +contribut 1 +develop 1 +comprehens 1 +theori 1 +quantit 1 +law 1 +govern 1 +essenti 1 +part 1 +base 1 +guid 1 +har 1 +exploit 1 +explos 1 +grow 1 +classifi 1 +amount 1 +classif 1 +yield 1 +consist 1 +within 1 +given 1 +bound 1 +gain 1 +deeper 1 +understand 1 +make 1 +hard 1 +explor 1 +relat 1 +structur 1 +also 1 +trade 1 +off 1 +differ 1 +particular 1 +attent 1 +sequenti 1 +parallel 1 +nondeterminist 1 +memori 1 +requir 1 +interact 1 +activitiesmemb 1 +faculti 1 +representativeschair 1 +depart 1 +recruit 1 +committeehonorsacm 1 +stearn 1 +foreign 1 +latvian 1 +art 1 +york 1 +state 1 +advanc 1 +aaa 1 +charter 1 +profession 1 +activitieseditor 1 +note 1 +siam 1 +monograph 1 +director 1 +ifip 1 +technic 1 +georg 1 +brown 1 +school 1 +rice 1 +houston 1 +texa 1 +peer 1 +visit 1 +physic 1 +divis 1 +advisor 1 +world 1 +scientif 1 +presseditori 1 +press 1 +editor 1 +sciencegoedel 1 +prize 1 +telecommun 1 +awardshonorari 1 +doctor 1 +degre 1 +dortmund 1 +germani 1 +lecturessom 1 +observ 1 +banquet 1 +speech 1 +logic 1 +program 1 +symposium 1 +scope 1 +futur 1 +virginia 1 +tennesse 1 +april 1 +publicationson 1 +commun 1 +octob 1 +oracl 1 +hypothesi 1 +fals 1 +august 1 +richard 1 +chang 1 +benni 1 +chor 1 +od 1 +goldreich 1 +johan 1 +hastad 1 +desh 1 +ranjan 1 +pankaj 1 +rohatgi 1 +hausdorff 1 +topolog 1 +dimens 1 +kolmogorov 1 +real 1 +line 1 +decemb 1 +weight 1 +bulletin 1 +return 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..3833066f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,130 @@ +comput 7 +research 7 +scienc 7 +inform 4 +technolog 4 +engin 3 +institut 3 +nation 3 +board 3 +journal 3 +dean 2 +univers 2 +colleg 2 +depart 2 +unit 2 +captur 2 +access 2 +collabor 2 +design 2 +scientif 2 +advisori 2 +academi 2 +american 2 +associ 2 +fellow 2 +intern 2 +home 2 +cornel 2 +denis 2 +john 1 +hopcroftjohn 1 +hopcroftjoseph 1 +silbert 1 +engineeringprofessor 1 +sciencephd 1 +stanford 1 +januari 1 +appoint 1 +overse 1 +academ 1 +compris 1 +well 1 +variou 1 +involv 1 +continu 1 +robust 1 +geometr 1 +algorithm 1 +model 1 +simul 1 +develop 1 +facilit 1 +within 1 +environ 1 +among 1 +distribut 1 +databas 1 +persist 1 +object 1 +storag 1 +document 1 +imag 1 +process 1 +manag 1 +multimedia 1 +user 1 +interfac 1 +heterogen 1 +data 1 +knowledg 1 +represent 1 +organ 1 +remot 1 +profession 1 +activitiesmemb 1 +council 1 +commiss 1 +physic 1 +mathemat 1 +applicationsmemb 1 +boardmemb 1 +state 1 +forcememb 1 +engineeringfellow 1 +art 1 +sciencesfellow 1 +advanc 1 +aaa 1 +electr 1 +electron 1 +ieee 1 +machinerychairman 1 +siam 1 +trusteesmemb 1 +committe 1 +david 1 +lucil 1 +packard 1 +foundationmemb 1 +sloan 1 +fellowship 1 +committeeadvisori 1 +supercomput 1 +center 1 +defens 1 +analysiseditor 1 +oxford 1 +press 1 +seri 1 +algorithmica 1 +discret 1 +geometryassoci 1 +editor 1 +geometri 1 +applic 1 +system 1 +sciencesreturn 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..2b29acd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,257 @@ +program 14 +compil 12 +parallel 10 +develop 9 +comput 8 +work 5 +code 5 +local 5 +algorithm 5 +depend 5 +research 4 +spars 4 +techniqu 4 +matrix 4 +restructur 4 +memori 4 +data 4 +optim 4 +control 4 +scienc 4 +languag 3 +applic 3 +matric 3 +us 3 +produc 3 +approach 3 +problem 3 +linear 3 +distribut 3 +access 3 +must 3 +transfer 3 +best 3 +loop 3 +paper 3 +result 3 +hewlett 3 +packard 3 +journal 3 +area 2 +architectur 2 +tool 2 +gener 2 +larg 2 +involv 2 +equat 2 +almost 2 +system 2 +algebra 2 +technolog 2 +dens 2 +inform 2 +structur 2 +librari 2 +like 2 +earlier 2 +refer 2 +block 2 +recent 2 +align 2 +incorpor 2 +transform 2 +implement 2 +prize 2 +asplo 2 +foundat 2 +question 2 +microsoft 2 +member 2 +young 2 +investig 2 +award 2 +lab 2 +corpor 2 +ieee 2 +univers 2 +june 2 +annual 2 +home 2 +cornel 2 +denis 2 +keshav 1 +pingalikeshav 1 +pingaliassoci 1 +professorphd 1 +group 1 +goal 1 +deal 1 +scientif 1 +numer 1 +solut 1 +partial 1 +differenti 1 +alwai 1 +unfortun 1 +exist 1 +poor 1 +take 1 +radic 1 +differ 1 +sequenti 1 +user 1 +sparsiti 1 +enabl 1 +preliminari 1 +experi 1 +krylov 1 +space 1 +solver 1 +show 1 +competit 1 +hand 1 +argonn 1 +petsc 1 +extend 1 +direct 1 +method 1 +solv 1 +requir 1 +adapt 1 +mesh 1 +refin 1 +project 1 +build 1 +uniform 1 +numa 1 +processor 1 +faster 1 +good 1 +perform 1 +also 1 +ensur 1 +match 1 +made 1 +prefer 1 +mani 1 +small 1 +messag 1 +known 1 +automat 1 +test 1 +novel 1 +call 1 +normal 1 +nest 1 +increas 1 +potenti 1 +lambda 1 +toolkit 1 +summar 1 +fortran 1 +product 1 +line 1 +uniprocessor 1 +multiprocessor 1 +framework 1 +analysi 1 +base 1 +flow 1 +graph 1 +knit 1 +togeth 1 +permit 1 +better 1 +possibl 1 +compet 1 +independ 1 +interest 1 +exampl 1 +answer 1 +open 1 +decad 1 +time 1 +static 1 +singl 1 +assign 1 +form 1 +number 1 +includ 1 +flavor 1 +profession 1 +activitiespanel 1 +organ 1 +symposium 1 +principl 1 +practic 1 +nation 1 +panel 1 +consult 1 +intel 1 +armi 1 +ballist 1 +odyssei 1 +math 1 +institut 1 +refere 1 +review 1 +topla 1 +transact 1 +supercomput 1 +computereditori 1 +board 1 +intern 1 +awardsn 1 +presidenti 1 +faculti 1 +lecturesfast 1 +chelmsford 1 +massachusett 1 +januari 1 +depart 1 +wayn 1 +state 1 +detroit 1 +michigan 1 +februari 1 +rutger 1 +brunswick 1 +jersei 1 +laboratori 1 +redmond 1 +washington 1 +publicationssolv 1 +elementari 1 +proceed 1 +seventh 1 +workshop 1 +lcpc 1 +lectur 1 +note 1 +ithaca 1 +august 1 +david 1 +indupraka 1 +kodukula 1 +vladimir 1 +kotlyar 1 +paul 1 +stodghil 1 +sigplan 1 +confer 1 +design 1 +pldi 1 +gianfranco 1 +bilardi 1 +return 1 +report 1 +page 1 +department 1 +pageif 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..59a53c4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,366 @@ +system 17 +comput 16 +program 12 +fault 10 +toler 9 +real 9 +time 9 +committe 9 +intern 9 +univers 8 +scienc 8 +germani 8 +techniqu 7 +implement 7 +member 7 +research 6 +hypervisor 6 +trom 6 +agent 6 +workshop 6 +distribut 5 +applic 5 +gri 5 +logic 5 +coordin 5 +inform 5 +softwar 5 +symposium 5 +formal 5 +exploit 5 +environ 5 +sept 5 +march 5 +critic 4 +virtual 4 +replica 4 +school 4 +center 4 +outlin 4 +lectur 4 +juli 4 +washington 4 +proof 4 +north 4 +carolina 4 +teach 4 +state 3 +concurr 3 +becom 3 +reason 3 +complet 3 +past 3 +base 3 +architectur 3 +protocol 3 +instruct 3 +execut 3 +oper 3 +norwai 3 +cornel 3 +support 3 +process 3 +data 3 +algorithm 3 +springer 3 +systemsprogram 3 +confer 3 +work 3 +hybrid 3 +group 3 +summer 3 +marktoberdorf 3 +lubeck 3 +specif 3 +technic 3 +sigcs 3 +proceed 3 +note 3 +mission 2 +develop 2 +design 2 +investig 2 +order 2 +equat 2 +term 2 +partial 2 +build 2 +manag 2 +machin 2 +issu 2 +realiz 2 +given 2 +final 2 +johansen 2 +robbert 2 +reness 2 +tacoma 2 +project 2 +mobil 2 +network 2 +stoller 2 +detect 2 +asynchron 2 +pass 2 +ieee 2 +engineeringeditor 2 +high 2 +integr 2 +mathemat 2 +fifth 2 +ifip 2 +depend 2 +verif 2 +isat 2 +defens 2 +warfar 2 +studi 2 +advanc 2 +associ 2 +meet 2 +verifi 2 +synchron 2 +moder 2 +refin 2 +polici 2 +munich 2 +chapel 2 +hill 2 +ad 2 +tool 2 +educ 2 +nashvil 2 +tennesse 2 +israel 2 +april 2 +june 2 +verlag 2 +york 2 +forc 2 +home 2 +denis 2 +fred 1 +schneiderfr 1 +schneider 1 +professorphd 1 +univ 1 +stoni 1 +brook 1 +understand 1 +increasingli 1 +import 1 +widespread 1 +focus 1 +heavili 1 +involv 1 +appli 1 +assert 1 +textbook 1 +subject 1 +along 1 +david 1 +continu 1 +concern 1 +first 1 +year 1 +streamlin 1 +infer 1 +rule 1 +evalu 1 +number 1 +handl 1 +undefin 1 +function 1 +thoma 1 +bressoud 1 +analyz 1 +replic 1 +risc 1 +ensur 1 +sequenc 1 +run 1 +differ 1 +physic 1 +processor 1 +ident 1 +also 1 +attract 1 +least 1 +theori 1 +instantli 1 +avail 1 +hardwar 1 +includ 1 +exist 1 +written 1 +second 1 +singl 1 +suffic 1 +everi 1 +programm 1 +freed 1 +task 1 +jointli 1 +start 1 +move 1 +structur 1 +construct 1 +commun 1 +bandwidth 1 +conserv 1 +access 1 +site 1 +resid 1 +typic 1 +filter 1 +otherwis 1 +reduc 1 +read 1 +carri 1 +relev 1 +roam 1 +prototyp 1 +third 1 +experi 1 +scott 1 +whether 1 +particular 1 +could 1 +global 1 +satisfi 1 +predic 1 +allow 1 +effici 1 +possibl 1 +previou 1 +activitiessabbat 1 +leav 1 +profession 1 +activitieseditor 1 +chief 1 +computingeditor 1 +letterseditor 1 +transact 1 +systemseditor 1 +annal 1 +surveysco 1 +editor 1 +text 1 +monograph 1 +verlagprogram 1 +constructionprogram 1 +respons 1 +compos 1 +resili 1 +applicationsprogram 1 +sixteenth 1 +symposiumprogram 1 +dimac 1 +control 1 +systemsst 1 +assur 1 +chissa 1 +nation 1 +institut 1 +standard 1 +technologymemb 1 +agencyreview 1 +leibniz 1 +hebrew 1 +universitymemb 1 +methodolog 1 +awardsfellow 1 +american 1 +sciencefellow 1 +machinerylecturesproof 1 +origin 1 +tradit 1 +banquet 1 +speech 1 +afosr 1 +grante 1 +contractor 1 +panelist 1 +compar 1 +merit 1 +model 1 +safeti 1 +write 1 +reactiv 1 +dagstuhl 1 +merg 1 +analysi 1 +georg 1 +mason 1 +virginia 1 +avoid 1 +mistak 1 +invit 1 +speaker 1 +traffic 1 +nasa 1 +am 1 +distinguish 1 +seri 1 +panel 1 +organ 1 +technion 1 +haifa 1 +place 1 +airplan 1 +view 1 +successor 1 +arpa 1 +publicationsreason 1 +colloquium 1 +icalp 1 +jerusalem 1 +materi 1 +summari 1 +boll 1 +offic 1 +scientif 1 +septemb 1 +volum 1 +limor 1 +proposit 1 +letter 1 +februari 1 +aircraft 1 +hand 1 +foundat 1 +ultradepend 1 +parallel 1 +paradigm 1 +kluwer 1 +academ 1 +publish 1 +marzullo 1 +dehn 1 +bulletin 1 +topic 1 +hoto 1 +orca 1 +island 1 +causal 1 +messag 1 +art 1 +newslett 1 +spring 1 +approach 1 +discret 1 +primu 1 +return 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..540bef63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,115 @@ +univers 7 +product 6 +scienc 6 +kroneck 5 +comput 4 +committe 4 +siam 3 +prize 3 +member 3 +sweden 3 +januari 3 +develop 2 +method 2 +variou 2 +solv 2 +applic 2 +process 2 +descript 2 +perform 2 +undergradu 2 +last 2 +art 2 +journal 2 +household 2 +umea 2 +build 2 +intuit 2 +ohio 2 +state 2 +april 2 +optic 2 +home 2 +cornel 2 +denis 2 +charl 1 +loancharl 1 +loanprofessorphd 1 +michigan 1 +continu 1 +particular 1 +nearest 1 +problem 1 +factor 1 +matric 1 +subject 1 +inhomogen 1 +constraint 1 +signal 1 +markov 1 +result 1 +fast 1 +wavelet 1 +transform 1 +also 1 +deriv 1 +proce 1 +analog 1 +plai 1 +import 1 +role 1 +high 1 +algorithm 1 +text 1 +work 1 +four 1 +year 1 +current 1 +translat 1 +anticip 1 +fall 1 +semest 1 +activitiescomput 1 +depart 1 +curriculum 1 +committeedepart 1 +repres 1 +chair 1 +meetingfreshman 1 +admiss 1 +reader 1 +profession 1 +activitieseditor 1 +matrix 1 +analysismemb 1 +wilkinson 1 +diprima 1 +organ 1 +confer 1 +lecturesappl 1 +linkop 1 +publicationsoptim 1 +close 1 +loop 1 +adapt 1 +multipl 1 +control 1 +bandwidth 1 +societi 1 +america 1 +ellerbroek 1 +pitsiani 1 +plemmon 1 +return 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..dab5140f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,57 @@ +cornel 3 +cours 3 +univers 2 +comput 2 +scienc 2 +depart 2 +curriculum 2 +associ 2 +home 2 +denis 2 +catherin 1 +wagnercatherin 1 +wagnersenior 1 +lecturerphd 1 +primari 1 +respons 1 +teach 1 +work 1 +other 1 +revis 1 +lower 1 +level 1 +specif 1 +interest 1 +develop 1 +student 1 +prepar 1 +introductori 1 +program 1 +activitiescomput 1 +undergradu 1 +committe 1 +profession 1 +activitiesassoci 1 +symbol 1 +logic 1 +machineri 1 +institut 1 +electr 1 +electron 1 +engin 1 +women 1 +mathemat 1 +return 1 +annual 1 +report 1 +page 1 +department 1 +pageif 1 +question 1 +comment 1 +pleas 1 +contact 1 +last 1 +modifi 1 +novemb 1 +moor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..96508394 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,350 @@ +video 27 +research 24 +process 16 +data 13 +brian 12 +smith 12 +network 10 +server 9 +client 7 +multimedia 7 +system 6 +protocol 6 +talk 6 +avail 6 +proc 6 +lawrenc 6 +row 6 +environ 5 +continu 5 +media 5 +us 5 +comput 4 +first 4 +zeno 4 +softwar 4 +workstat 4 +effort 4 +cyclic 4 +compress 4 +onlin 4 +also 4 +languag 4 +intern 4 +cornel 3 +xerox 3 +univers 3 +offic 3 +build 3 +technolog 3 +commun 3 +approach 3 +connect 3 +distribut 3 +best 3 +develop 3 +audio 3 +paper 3 +describ 3 +algorithm 3 +imag 3 +method 3 +format 3 +independ 3 +review 3 +cours 3 +third 3 +confer 3 +workshop 3 +scienc 2 +california 2 +berkelei 2 +make 2 +class 2 +type 2 +support 2 +storag 2 +contrast 2 +hardwar 2 +current 2 +file 2 +architectur 2 +ofworkst 2 +gener 2 +local 2 +area 2 +common 2 +laboratori 2 +fileserv 2 +servic 2 +design 2 +ad 2 +load 2 +across 2 +initi 2 +invest 2 +exist 2 +center 2 +resourc 2 +well 2 +suit 2 +call 2 +playback 2 +applic 2 +decompress 2 +perform 2 +must 2 +idea 2 +jpeg 2 +transcod 2 +oper 2 +specifi 2 +graphic 2 +provid 2 +qualiti 2 +select 2 +jonathan 2 +resolut 2 +francisco 2 +novemb 2 +html 2 +version 2 +ketan 2 +patel 2 +mpeg 2 +transmiss 2 +electron 2 +june 2 +faculti 1 +interest 1 +bsmith 1 +professor 1 +upson 1 +hall 1 +phone 1 +hour 1 +semest 1 +tue 1 +thur 1 +intereststeachingselect 1 +publicationsresearch 1 +talksmisc 1 +linksresearch 1 +interestsmi 1 +goal 1 +ourcomput 1 +group 1 +project 1 +andprocess 1 +commercialand 1 +requir 1 +special 1 +operatingsystem 1 +usabl 1 +aredesign 1 +premis 1 +isthat 1 +infrastructurei 1 +suffici 1 +andappl 1 +verifi 1 +hypothesi 1 +workingsystem 1 +direct 1 +toward 1 +zenodistribut 1 +anethernet 1 +simultan 1 +serverof 1 +plai 1 +videostor 1 +receiv 1 +store 1 +severalserv 1 +sever 1 +compar 1 +withlarg 1 +central 1 +advantag 1 +scalabl 1 +automat 1 +balanc 1 +serv 1 +machin 1 +case 1 +locat 1 +differ 1 +util 1 +infrastructur 1 +promot 1 +earli 1 +adopt 1 +almost 1 +around 1 +effortdeliveri 1 +built 1 +networkprotocol 1 +mani 1 +needto 1 +reserv 1 +establish 1 +resourcereserv 1 +nation 1 +communicationinfrastructur 1 +user 1 +charg 1 +basi 1 +forbandwidth 1 +poorli 1 +networkenviron 1 +share 1 +equal 1 +accessiblebi 1 +appropri 1 +latterenviron 1 +commonli 1 +found 1 +thecommun 1 +builton 1 +datagram 1 +transport 1 +audioand 1 +metropolitan 1 +andwid 1 +todeliv 1 +availableonlin 1 +slide 1 +fold 1 +aredevelop 1 +compressedrepresent 1 +without 1 +lead 1 +todramat 1 +speed 1 +sinc 1 +remov 1 +thetim 1 +consum 1 +reducesth 1 +amount 1 +experi 1 +animplement 1 +indicatesthat 1 +order 1 +magnitud 1 +fasterthan 1 +possibl 1 +previou 1 +currentlyextend 1 +parallel 1 +insoftwar 1 +translat 1 +onecompress 1 +anoth 1 +heterogen 1 +compresseddomain 1 +explor 1 +simplifyexperiment 1 +programminglanguag 1 +calledrivl 1 +pronounc 1 +rival 1 +allowsvideo 1 +effect 1 +resolutionand 1 +sourc 1 +materi 1 +whatpostscript 1 +text 1 +resolutionindepend 1 +thu 1 +sameprogram 1 +quicktim 1 +quickli 1 +whileedit 1 +decis 1 +made 1 +high 1 +qualityfinish 1 +product 1 +line 1 +much 1 +postscript 1 +bepreview 1 +sent 1 +dpiprint 1 +camera 1 +readi 1 +copi 1 +onvideo 1 +domain 1 +rivl 1 +theme 1 +tool 1 +videous 1 +teachingat 1 +teach 1 +undergradu 1 +computerarchitectur 1 +graduat 1 +public 1 +swartz 1 +asif 1 +ghia 1 +logan 1 +david 1 +chamberlin 1 +queri 1 +hum 1 +larg 1 +music 1 +inform 1 +retriev 1 +databas 1 +peter 1 +name 1 +toronto 1 +ontario 1 +canada 1 +juli 1 +prioriti 1 +driven 1 +fast 1 +motion 1 +second 1 +sanfrancisco 1 +octob 1 +represent 1 +spie 1 +symposium 1 +jose 1 +februari 1 +stephen 1 +program 1 +decod 1 +internationalconfer 1 +anaheim 1 +august 1 +famili 1 +manipul 1 +ieee 1 +septemb 1 +player 1 +supportfor 1 +digit 1 +diego 1 +recent 1 +webster 1 +site 1 +minnesota 1 +colloquium 1 +misc 1 +link 1 +work 1 +annett 1 +hanna 1 +manual 1 +mmcn 1 +proceed 1 +documentationth 1 +priceweb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..affe2e00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,279 @@ +system 13 +nuprl 12 +comput 10 +us 9 +scienc 9 +theori 8 +logic 8 +program 7 +univers 6 +softwar 6 +mathemat 5 +type 5 +formal 5 +theorem 5 +design 5 +prove 5 +studi 4 +involv 3 +make 3 +line 3 +implement 3 +build 3 +method 3 +languag 3 +allen 3 +richard 3 +develop 3 +leeser 3 +weyl 3 +circuit 3 +theoret 3 +zippel 3 +connect 3 +committe 3 +israel 3 +chair 2 +cornel 2 +engag 2 +especi 2 +year 2 +major 2 +construct 2 +wide 2 +varieti 2 +version 2 +stuart 2 +eaton 2 +also 2 +how 2 +joint 2 +ventur 2 +polya 2 +algorithm 2 +gri 2 +hope 2 +explor 2 +futur 2 +journal 2 +symbol 2 +computationeditor 2 +fundament 2 +betweencomput 2 +decemb 2 +januari 2 +state 2 +notr 2 +dame 2 +march 2 +york 2 +pennsylvania 2 +springer 2 +verlag 2 +robert 1 +constablerobert 1 +constabledepart 1 +professorrc 1 +eduph 1 +wisconsin 1 +madison 1 +researchw 1 +providemechan 1 +assist 1 +problem 1 +solv 1 +programmingand 1 +long 1 +term 1 +wai 1 +theform 1 +feasibl 1 +implementedthre 1 +past 1 +experiment 1 +lispprogram 1 +sucha 1 +canexpress 1 +proof 1 +asmetalevel 1 +provid 1 +consider 1 +theoremprov 1 +power 1 +moreov 1 +canevalu 1 +content 1 +principl 1 +nuprli 1 +fomal 1 +continu 1 +improv 1 +current 1 +iscal 1 +differ 1 +predecessor 1 +termeditor 1 +itsintern 1 +structur 1 +modular 1 +suitabl 1 +hedefinit 1 +beyond 1 +built 1 +inconstruct 1 +entir 1 +mechanismha 1 +rebuilt 1 +stream 1 +paul 1 +jackson 1 +thework 1 +dougla 1 +contribut 1 +gener 1 +natur 1 +final 1 +refer 1 +isan 1 +intern 1 +descript 1 +builtprincip 1 +william 1 +aitken 1 +link 1 +internaldescript 1 +possibleto 1 +process 1 +three 1 +excit 1 +withmiriam 1 +electr 1 +engin 1 +incomput 1 +davidgri 1 +richardzippel 1 +withless 1 +hardwar 1 +synthesi 1 +verif 1 +student 1 +mark 1 +aagard 1 +thecorrect 1 +boolean 1 +minim 1 +packag 1 +compon 1 +bedrocsystem 1 +weak 1 +divis 1 +widelyus 1 +efforttaught 1 +great 1 +deal 1 +effect 1 +technolog 1 +inth 1 +hand 1 +expert 1 +user 1 +applic 1 +domain 1 +second 1 +model 1 +aprogram 1 +refin 1 +mechan 1 +david 1 +enabl 1 +write 1 +handbook 1 +themann 1 +devis 1 +programmingprocess 1 +givn 1 +definit 1 +ofpolya 1 +expect 1 +experi 1 +soon 1 +transform 1 +tryingto 1 +captur 1 +style 1 +want 1 +recent 1 +begun 1 +collabor 1 +relat 1 +thepolya 1 +effort 1 +conal 1 +mannion 1 +possibl 1 +ofus 1 +discussingproblem 1 +ssymbol 1 +algebra 1 +near 1 +thiswil 1 +scientif 1 +computingsoftwar 1 +togeth 1 +tool 1 +isbuild 1 +profession 1 +activitieseditor 1 +academ 1 +presseditor 1 +oxford 1 +pressgener 1 +licsprogram 1 +north 1 +american 1 +jumelageprogram 1 +aspect 1 +softwarerefere 1 +review 1 +nserc 1 +canada 1 +scienceunivers 1 +activitieschair 1 +recruit 1 +committeecomput 1 +facil 1 +committeeprovost 1 +mathematicslecturesform 1 +inria 1 +anniversari 1 +celebr 1 +pari 1 +franc 1 +colloquium 1 +bengurion 1 +sheva 1 +symposium 1 +aviv 1 +associ 1 +annual 1 +meet 1 +indiana 1 +metaprogram 1 +buffalo 1 +explan 1 +engineeringworkshop 1 +philadelphia 1 +publicationsform 1 +tendenc 1 +control 1 +appli 1 +bensoussan 1 +verju 1 +lectur 1 +note 1 +metalevel 1 +andmathemat 1 +manfr 1 +broi 1 +nato 1 +seri 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..82a94705 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,31 @@ +adam 2 +florenc 2 +cornel 2 +univers 2 +upson 1 +hall 1 +ithaca 1 +aflorenc 1 +first 1 +year 1 +student 1 +incomput 1 +scienc 1 +atcornel 1 +professionalinterest 1 +includ 1 +simul 1 +numer 1 +analysi 1 +follow 1 +link 1 +find 1 +academicsresearchworkinterest 1 +athlet 1 +last 1 +updat 1 +sept 1 +mail 1 +mewith 1 +comment 1 +correct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..17af2ca6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,30 @@ +aguilera 6 +kawazo 4 +cornel 4 +marco 3 +page 2 +univers 2 +home 1 +depart 1 +comput 1 +scienc 1 +ithaca 1 +second 1 +year 1 +student 1 +distribut 1 +system 1 +algorithmsrandom 1 +failur 1 +detect 1 +hybrid 1 +approach 1 +solv 1 +consensusgo 1 +tour 1 +brazil 1 +check 1 +suggest 1 +warn 1 +perman 1 +constructionmarco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..31820a99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,33 @@ +alfr 2 +hong 2 +cornel 2 +new 2 +chines 2 +home 1 +page 1 +get 1 +dizzi 1 +offic 1 +upson 1 +hall 1 +univers 1 +ithaca 1 +usaoffic 1 +phone 1 +worthwhil 1 +site 1 +check 1 +sinanet 1 +taiwan 1 +requir 1 +nandonet 1 +sunworld 1 +javaworldcours 1 +stuff 1 +corba 1 +essenti 1 +annot 1 +bibliographyc 1 +project 1 +reportalfr 1 +ahong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..04974360 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,58 @@ +comput 4 +cornel 3 +system 3 +high 3 +depart 2 +architectur 2 +network 2 +perform 2 +final 2 +project 2 +postscript 2 +vineet 1 +home 1 +pagevineet 1 +ahujam 1 +engg 1 +sciencecornel 1 +universityithaca 1 +address 1 +hasbrouck 1 +apt 1 +ithaca 1 +mail 1 +ahuja 1 +academ 1 +student 1 +scienc 1 +univers 1 +main 1 +area 1 +interest 1 +parallel 1 +process 1 +advanc 1 +oper 1 +object 1 +orient 1 +program 1 +coursework 1 +fall 1 +automata 1 +theori 1 +engin 1 +spring 1 +report 1 +softwar 1 +design 1 +reportfal 1 +capac 1 +inform 1 +multimedia 1 +resum 1 +recent 1 +html 1 +page 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..16832042 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,21 @@ +ching 2 +chinglan 1 +cornel 1 +edumast 1 +engin 1 +student 1 +comput 1 +scienc 1 +depart 1 +beau 1 +pair 1 +address 1 +seneca 1 +ithaca 1 +telephon 1 +page 1 +still 1 +construct 1 +java 1 +examplegraph 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..9d85d9c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,82 @@ +cornel 7 +univers 3 +technic 3 +report 3 +alexei 2 +home 2 +comput 2 +distribut 2 +system 2 +group 2 +new 2 +weather 2 +pagealexei 1 +vaysburdalexei 1 +program 1 +scienc 1 +research 1 +interest 1 +includ 1 +commun 1 +larg 1 +scale 1 +wide 1 +area 1 +andobject 1 +orient 1 +tool 1 +work 1 +within 1 +horu 1 +lead 1 +birman 1 +androbbert 1 +reness 1 +paper 1 +implement 1 +replic 1 +state 1 +machin 1 +partition 1 +network 1 +friedman 1 +vaysburd 1 +link 1 +search 1 +engin 1 +lyco 1 +broadcast 1 +seri 1 +ecol 1 +polytechniqu 1 +federal 1 +lausann 1 +hebrew 1 +transi 1 +page 1 +cuinfo 1 +gopher 1 +direct 1 +cornellcornel 1 +mail 1 +directorycornel 1 +staff 1 +directori 1 +student 1 +directorycours 1 +class 1 +examscornel 1 +calendarcornel 1 +art 1 +musicbailei 1 +hall 1 +concertscornel 1 +music 1 +event 1 +ithaca 1 +current 1 +condit 1 +ithacaworld 1 +brief 1 +odessa 1 +odessaweb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..af911c6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,170 @@ +simul 5 +compress 5 +chip 5 +graphic 4 +yama 3 +ride 3 +train 3 +us 3 +imag 3 +data 3 +hell 2 +heaven 2 +better 2 +comput 2 +microsystem 2 +avenu 2 +cupertino 2 +javasoft 2 +coursesvlsi 2 +engin 2 +system 2 +project 2 +includ 2 +landscap 2 +environ 2 +cours 2 +memori 2 +pattern 2 +softwar 2 +pyramania 2 +game 2 +accord 1 +hindu 1 +mytholog 1 +death 1 +come 1 +downto 1 +earth 1 +give 1 +though 1 +kind 1 +imparti 1 +mess 1 +amithyamasanim 1 +engg 1 +depart 1 +scienc 1 +cornel 1 +univers 1 +ithaca 1 +yorki 1 +make 1 +monei 1 +garcia 1 +mountain 1 +view 1 +mailstop 1 +ucup 1 +watch 1 +warburton 1 +santa 1 +clara 1 +mail 1 +amith 1 +yamasani 1 +comi 1 +currentlyemploi 1 +california 1 +workingin 1 +javamedia 1 +groupeducationfal 1 +multimedia 1 +cssoftwar 1 +csspring 1 +high 1 +perform 1 +cscomput 1 +csproject 1 +final 1 +mpeg 1 +descript 1 +parallel 1 +trace 1 +propos 1 +virtual 1 +realiti 1 +railroad 1 +master 1 +involv 1 +railroadsystem 1 +track 1 +switch 1 +station 1 +user 1 +interact 1 +wall 1 +cave 1 +stereo 1 +vision 1 +done 1 +processor 1 +onyx 1 +softwarei 1 +written 1 +openinventord 1 +librari 1 +silicon 1 +gener 1 +dynam 1 +ofth 1 +through 1 +documentimag 1 +chipdevelop 1 +vlsi 1 +basicallycompress 1 +stream 1 +algorithm 1 +thisalgorithm 1 +suit 1 +control 1 +extern 1 +cachecam 1 +content 1 +address 1 +store 1 +inputstream 1 +hope 1 +might 1 +occur 1 +capableof 1 +rate 1 +byte 1 +nano 1 +second 1 +nowai 1 +routin 1 +speed 1 +snap 1 +video 1 +transit 1 +rivlproposalpresentationand 1 +sampl 1 +dthi 1 +develop 1 +itswritten 1 +interfac 1 +xlib 1 +thed 1 +render 1 +take 1 +look 1 +snapshot 1 +screen 1 +parallelomania 1 +resumehtmlpostscript 1 +past 1 +present 1 +futur 1 +pal 1 +home 1 +page 1 +satyaprasad 1 +avinashgupta 1 +kartikh 1 +kapadia 1 +hrishikeshdixit 1 +joselui 1 +fernandez 1 +vineetahuja 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..c1aea522 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,89 @@ +ankit 2 +patel 2 +cornel 2 +comput 2 +cours 2 +multimedia 2 +project 2 +virtual 2 +realiti 2 +homepag 1 +east 1 +state 1 +street 1 +ithaca 1 +apatel 1 +person 1 +photo 1 +galleria 1 +chronologia 1 +curriculum 1 +vita 1 +universityresumedepart 1 +scienc 1 +enrolledgradu 1 +research 1 +assist 1 +prof 1 +brian 1 +smith 1 +group 1 +zeno 1 +canvasd 1 +toolkit 1 +interact 1 +platform 1 +independ 1 +develop 1 +high 1 +perform 1 +graphic 1 +applic 1 +video 1 +conferencingmultimedia 1 +system 1 +read 1 +assignmentsproject 1 +kernel 1 +endpoint 1 +netan 1 +annot 1 +bibliographi 1 +common 1 +object 1 +request 1 +broker 1 +architectur 1 +corba 1 +critiqu 1 +understand 1 +limit 1 +causal 1 +total 1 +order 1 +commun 1 +david 1 +cheriton 1 +dale 1 +skeen 1 +carnegi 1 +mellon 1 +summer 1 +school 1 +scienceworld 1 +wide 1 +technolog 1 +spring 1 +link 1 +real 1 +time 1 +support 1 +multimediamaharaja 1 +sayajirao 1 +univers 1 +academicsfriend 1 +techoreli 1 +industri 1 +limitedjob 1 +profilelif 1 +relianc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..57639aee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,15 @@ +comput 2 +ashish 1 +jhaveriashish 1 +jhaveridepart 1 +sciencemast 1 +engineeringresumehtmlpost 1 +scriptcourseworkadvanc 1 +databas 1 +systemscsmultimedia 1 +systemscsengin 1 +networkscsprogram 1 +languag 1 +softwareengin 1 +csashish 1 +jhaveri 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..b9aa8788 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,39 @@ +aswin 4 +swing 4 +cornel 3 +danc 3 +berg 2 +graduat 2 +student 2 +phone 2 +program 2 +transform 2 +person 2 +page 2 +eduoffic 1 +upson 1 +hall 1 +home 1 +skyacr 1 +drive 1 +ithaca 1 +interest 1 +languag 1 +systemmi 1 +life 1 +famili 1 +album 1 +annek 1 +server 1 +jean 1 +deejay 1 +guid 1 +isdn 1 +record 1 +hop 1 +pictur 1 +nederlands 1 +club 1 +dutch 1 +clubi 1 +atcornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..3128fcd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,122 @@ +system 6 +piec 5 +game 4 +five 4 +point 4 +home 3 +oper 3 +user 3 +trap 3 +comput 2 +multimedia 2 +distribut 2 +hoca 2 +chip 2 +like 2 +network 2 +requir 2 +player 2 +ipng 2 +space 2 +next 2 +gener 2 +interfac 2 +page 2 +subject 2 +welcomeavinash 1 +guptam 1 +engg 1 +depart 1 +sciencecornel 1 +universityithaca 1 +address 1 +magazin 1 +streetcambridg 1 +mail 1 +avinash 1 +thecia 1 +resumemi 1 +main 1 +area 1 +interest 1 +graphic 1 +cours 1 +softwar 1 +engin 1 +high 1 +perform 1 +project 1 +pronounc 1 +hodja 1 +full 1 +fledg 1 +cornel 1 +hypothet 1 +instruct 1 +processor 1 +support 1 +featur 1 +multipl 1 +multitask 1 +virtual 1 +memori 1 +scene 1 +transit 1 +effect 1 +rivl 1 +presentationpent 1 +skillpent 1 +mean 1 +board 1 +skill 1 +object 1 +horizont 1 +vertic 1 +diagon 1 +earn 1 +oppon 1 +even 1 +number 1 +either 1 +pair 1 +reach 1 +first 1 +win 1 +window 1 +screenshot 1 +gamedownload 1 +implement 1 +ipvimpl 1 +internet 1 +protocol 1 +us 1 +make 1 +abl 1 +stream 1 +proposalprogress 1 +reportsam 1 +caveat 1 +appli 1 +almost 1 +everi 1 +pageon 1 +still 1 +construct 1 +brows 1 +internethytelnetth 1 +librari 1 +catalogeinet 1 +galaxyplanet 1 +earth 1 +pagejoel 1 +hierarch 1 +indexyahoo 1 +guid 1 +wwwwebcrawlerlycosmi 1 +friend 1 +link 1 +sign 1 +guestbook 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..f10d24f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,257 @@ +peopl 7 +also 5 +comput 5 +page 4 +cornel 4 +high 4 +live 4 +work 4 +home 4 +jonathan 3 +barber 3 +cours 3 +research 3 +futur 3 +call 3 +list 3 +finish 3 +program 3 +summer 3 +project 3 +implement 3 +good 3 +favorit 3 +bulli 3 +upson 2 +interest 2 +ponch 2 +goe 2 +student 2 +master 2 +engin 2 +time 2 +develop 2 +protocol 2 +parallel 2 +past 2 +semest 2 +video 2 +product 2 +taken 2 +perform 2 +friend 2 +sugata 2 +paper 2 +encrypt 2 +oregon 2 +like 2 +write 2 +locat 2 +hous 2 +inde 2 +georg 2 +bush 2 +keep 2 +graphic 2 +need 2 +fleshpooooooooooooooch 1 +hall 1 +cool 1 +dude 1 +felt 1 +inclin 1 +chang 1 +first 1 +sentenc 1 +birth 1 +name 1 +usual 1 +callm 1 +orpooch 1 +us 1 +lake 1 +fork 1 +guppi 1 +skin 1 +mama 1 +phin 1 +degre 1 +graduat 1 +computersci 1 +cornellunivers 1 +undergrad 1 +scienc 1 +well 1 +grew 1 +coupl 1 +town 1 +jersei 1 +attendedmontgomeri 1 +school 1 +current 1 +collegetown 1 +adjac 1 +campu 1 +cheap 1 +sublet 1 +oncolleg 1 +ultra 1 +conveni 1 +sinc 1 +plai 1 +half 1 +mile 1 +radiu 1 +went 1 +greek 1 +spent 1 +epsilon 1 +fratern 1 +border 1 +cayuga 1 +height 1 +kind 1 +hard 1 +believ 1 +dai 1 +hand 1 +prof 1 +thorsten 1 +eicken 1 +packet 1 +filter 1 +thefilt 1 +bandwidth 1 +network 1 +bring 1 +person 1 +taught 1 +week 1 +session 1 +ofc 1 +fundament 1 +concept 1 +introductori 1 +year 1 +teach 1 +assist 1 +forc 1 +introduct 1 +digit 1 +system 1 +organ 1 +fall 1 +spring 1 +intereststhi 1 +rivl 1 +resolut 1 +independ 1 +languag 1 +multimedia 1 +group 1 +final 1 +labor 1 +report 1 +continu 1 +media 1 +rivlan 1 +improv 1 +object 1 +tracker 1 +rivli 1 +compil 1 +mukhopadhyai 1 +still 1 +class 1 +smpd 1 +code 1 +generatorfor 1 +fortran 1 +base 1 +linear 1 +algebra 1 +framework 1 +come 1 +soon 1 +previou 1 +greg 1 +weber 1 +webar 1 +smart 1 +frame 1 +depend 1 +stream 1 +mpeg 1 +unfortun 1 +electron 1 +form 1 +trust 1 +look 1 +bright 1 +pack 1 +gear 1 +take 1 +perman 1 +road 1 +trip 1 +hopefulli 1 +buddi 1 +surfer 1 +inform 1 +share 1 +pleas 1 +intel 1 +corp 1 +ahead 1 +pictur 1 +portland 1 +clarif 1 +actual 1 +thought 1 +would 1 +pretti 1 +resourceful 1 +camera 1 +room 1 +window 1 +pipe 1 +meanth 1 +gener 1 +line 1 +comrad 1 +realli 1 +nice 1 +ofhi 1 +found 1 +lucki 1 +consid 1 +least 1 +know 1 +decid 1 +otherwis 1 +captain 1 +swirl 1 +nefari 1 +toilet 1 +mukhopadyai 1 +bonei 1 +magoo 1 +fletop 1 +bigro 1 +dave 1 +koster 1 +bot 1 +offici 1 +tffl 1 +pageuuencod 1 +pagetar 1 +zip 1 +pageif 1 +brows 1 +feel 1 +urg 1 +send 1 +mail 1 +downloadsgraphicsbarb 1 +gifponch 1 +htmlres_htmlres_curemmittemmitt 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..f66f4819 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,202 @@ +anindya 7 +comput 6 +cornel 5 +commun 5 +messag 5 +work 4 +cluster 4 +thorsten 4 +eicken 4 +perform 4 +activ 4 +like 3 +basu 3 +also 3 +parallel 3 +homepag 2 +realli 2 +photograph 2 +scienc 2 +univers 2 +ithaca 2 +phone 2 +graduat 2 +student 2 +complet 2 +love 2 +would 2 +stuff 2 +us 2 +layer 2 +program 2 +develop 2 +fast 2 +implement 2 +latenc 2 +proceed 2 +network 2 +vineet 2 +buch 2 +appear 2 +look 1 +click 1 +recent 1 +mugshot 1 +locat 1 +home 1 +upson 1 +hall 1 +delawar 1 +avenu 1 +depart 1 +offic 1 +mail 1 +eduwhat 1 +scienceat 1 +hope 1 +turn 1 +centuri 1 +listen 1 +rock 1 +musicor 1 +goof 1 +thecornel 1 +projectwith 1 +advisor 1 +think 1 +coollik 1 +indiawho 1 +went 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +hardpink 1 +floydfanat 1 +final 1 +realiz 1 +childhood 1 +dream 1 +see 1 +livelast 1 +summer 1 +philadelphia 1 +plum 1 +known 1 +thegreat 1 +unwash 1 +pelham 1 +grenvil 1 +wodehous 1 +attendedwoodstock 1 +truli 1 +motiv 1 +onlinewoodstock 1 +review 1 +woodstock 1 +experienc 1 +metallica 1 +live 1 +first 1 +time 1 +thesocc 1 +worldcup 1 +last 1 +year 1 +butunfortun 1 +happen 1 +cook 1 +tri 1 +putsomerecip 1 +onlin 1 +fanci 1 +connoisseurof 1 +good 1 +whiski 1 +especi 1 +singl 1 +malt 1 +link 1 +cool 1 +calvin 1 +hobb 1 +archiv 1 +south 1 +asian 1 +writer 1 +cheer 1 +monti 1 +python 1 +beavi 1 +buttheadoth 1 +cuinfo 1 +gopherand 1 +someth 1 +differ 1 +projectwhich 1 +aim 1 +provid 1 +high 1 +ofworkst 1 +compar 1 +state 1 +mpp 1 +acheiv 1 +passinglay 1 +call 1 +split 1 +thatshow 1 +run 1 +meiko 1 +addit 1 +show 1 +saturateth 1 +fibr 1 +segment 1 +size 1 +collabor 1 +withth 1 +berkelei 1 +project 1 +team 1 +specificationfor 1 +enabl 1 +processesboth 1 +trust 1 +untrust 1 +environ 1 +public 1 +mechan 1 +integr 1 +david 1 +culler 1 +seth 1 +goldstein 1 +klau 1 +schauser 1 +symp 1 +architectur 1 +gold 1 +coast 1 +australia 1 +veena 1 +avula 1 +present 1 +interconnect 1 +palo 1 +alto 1 +abridgedvers 1 +paper 1 +ieee 1 +micro 1 +februari 1 +user 1 +level 1 +interfac 1 +distribut 1 +werner 1 +vogel 1 +sosp 1 +back 1 +homepagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..208d2b43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,105 @@ +cornel 5 +check 3 +weather 3 +jpop 3 +internet 3 +comput 2 +scienc 2 +area 2 +librari 2 +interact 2 +music 2 +navig 2 +info 2 +simpl 1 +homepageben 1 +haogradu 1 +studentbhao 1 +educornel 1 +universitydept 1 +upson 1 +hallithaca 1 +insid 1 +flea 1 +forth 1 +year 1 +student 1 +incomput 1 +work 1 +code 1 +synthesi 1 +massiv 1 +parallel 1 +processor 1 +advisor 1 +richard 1 +zippel 1 +first 1 +heard 1 +georg 1 +taylorwhen 1 +microsystem 1 +famili 1 +francisco 1 +find 1 +inform 1 +aboutth 1 +read 1 +local 1 +newspap 1 +forget 1 +itsgorg 1 +life 1 +cornellwhat 1 +ithaca 1 +movi 1 +studi 1 +dept 1 +home 1 +page 1 +gener 1 +neat 1 +stuff 1 +stock 1 +map 1 +frog 1 +dissectionmagazin 1 +intertext 1 +magazin 1 +wire 1 +magazinea 1 +peek 1 +galaxi 1 +view 1 +solar 1 +system 1 +shoemak 1 +levi 1 +weblouvr 1 +xmorphia 1 +galleri 1 +line 1 +geometri 1 +kaleidospac 1 +overview 1 +bonsai 1 +seiyuu 1 +underground 1 +archivenetwork 1 +global 1 +network 1 +wander 1 +spider 1 +edgelibrari 1 +congress 1 +martial 1 +scientif 1 +tutori 1 +infonih 1 +courseth 1 +intern 1 +guidecern 1 +seminar 1 +last 1 +modifi 1 +bhao 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..45ce2df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,23 @@ +aastha 1 +home 1 +pageaastha 1 +bhardwajdepart 1 +comput 1 +sciencemast 1 +ofengineeeringresumehtmlpost 1 +scriptcourseworkadvanceddatabas 1 +system 1 +csmultimediasystem 1 +csengineeringcomput 1 +network 1 +cssoftwareengin 1 +program 1 +languag 1 +cscontact 1 +inform 1 +hasbrouck 1 +apart 1 +ithaca 1 +york 1 +bhardwaj 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..35a923c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,283 @@ +robot 30 +cornel 9 +intern 9 +vision 7 +algorithm 7 +ieee 7 +paper 7 +macdonald 6 +arrai 6 +plan 6 +tommi 6 +lili 6 +donald 6 +manipul 6 +inform 6 +part 5 +mobil 5 +bhringer 5 +confer 5 +invari 5 +pictur 5 +click 5 +laboratori 4 +actuat 4 +us 4 +programm 4 +micro 4 +workshop 4 +distribut 4 +jen 4 +massiv 3 +parallel 3 +feeder 3 +group 3 +sensor 3 +system 3 +bound 3 +vector 3 +field 3 +vibratori 3 +foundat 3 +franc 3 +mem 3 +diego 3 +autom 3 +kinodynam 3 +xavier 3 +journal 3 +proc 3 +hereto 3 +peopl 3 +randal 2 +home 2 +page 2 +huttenloch 2 +thecornel 2 +noel 2 +chip 2 +small 2 +student 2 +work 2 +develop 2 +team 2 +demo 2 +fabric 2 +video 2 +chase 2 +ourlab 2 +recent 2 +toulous 2 +brigg 2 +task 2 +california 2 +cannot 2 +icra 2 +minneapoli 2 +minnesota 2 +april 2 +provablygood 2 +approxim 2 +optim 2 +algorithmica 2 +experiment 2 +intellig 2 +book 2 +artificialintellig 2 +ree 2 +couch 2 +plai 2 +bruce 1 +donaldbruc 1 +donaldassoci 1 +professorbrd 1 +eduph 1 +weather 1 +palo 1 +alto 1 +offici 1 +department 1 +laboratorydan 1 +found 1 +researchmi 1 +interest 1 +includ 1 +professor 1 +build 1 +microactu 1 +nation 1 +nanofabr 1 +arrayi 1 +scream 1 +contain 1 +squarecentemet 1 +orient 1 +without 1 +sensoryfeedback 1 +microfabr 1 +could 1 +toconstruct 1 +scale 1 +buildself 1 +propel 1 +walk 1 +vlsi 1 +graduat 1 +amybrigg 1 +surveil 1 +ofmobil 1 +board 1 +detect 1 +andintercept 1 +target 1 +mpeg 1 +built 1 +developedbi 1 +track 1 +follow 1 +visual 1 +informationalon 1 +show 1 +view 1 +face 1 +morph 1 +select 1 +public 1 +upper 1 +andlow 1 +applic 1 +memsand 1 +thealgorithm 1 +robustgeometr 1 +internationalworkshop 1 +singl 1 +crystal 1 +silicon 1 +electro 1 +mechan 1 +februari 1 +classif 1 +lower 1 +andimprov 1 +partsfeed 1 +partii 1 +robotswith 1 +decoupl 1 +dynam 1 +forcartesian 1 +open 1 +chain 1 +motion 1 +canni 1 +reif 1 +research 1 +inpress 1 +minim 1 +supermodular 1 +andtheoret 1 +artifici 1 +jetai 1 +press 1 +write 1 +entitl 1 +draft 1 +firstquart 1 +appear 1 +revis 1 +base 1 +experi 1 +inminim 1 +symposium 1 +iser 1 +stanford 1 +move 1 +furnitur 1 +automon 1 +societi 1 +ofjapan 1 +iro 1 +pittsburgh 1 +sensorlessmanipul 1 +actuatorarrai 1 +mihailovich 1 +andautom 1 +detail 1 +explan 1 +program 1 +scheme 1 +automationnic 1 +first 1 +ofrobot 1 +peter 1 +boston 1 +wilson 1 +andj 1 +latomb 1 +automat 1 +configur 1 +direct 1 +proceed 1 +otherpubl 1 +these 1 +post 1 +doc 1 +train 1 +daniela 1 +jonathan 1 +dinesh 1 +aval 1 +server 1 +list 1 +version 1 +onlin 1 +tech 1 +report 1 +librari 1 +catalogc 1 +indexobtain 1 +copi 1 +paperscopi 1 +avail 1 +anonym 1 +teamof 1 +autonom 1 +movefurnitur 1 +around 1 +portrait 1 +mobot 1 +push 1 +rotat 1 +apictur 1 +drawn 1 +loretta 1 +pompilio 1 +discoverychannel 1 +beyond 1 +find 1 +funa 1 +poem 1 +alfr 1 +mail 1 +agent 1 +famili 1 +harm 1 +swallow 1 +ithaca 1 +sometim 1 +moreoth 1 +depart 1 +herefor 1 +search 1 +tool 1 +access 1 +stuff 1 +return 1 +level 1 +clickher 1 +tallest 1 +darkest 1 +lead 1 +hollywood 1 +merian 1 +cooper 1 +wrai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..8900a8f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,242 @@ +cardi 13 +learn 13 +languag 10 +intellig 8 +natur 8 +proceed 8 +artifici 7 +machin 7 +press 7 +confer 7 +research 6 +base 6 +system 6 +us 6 +knowledg 6 +acquisit 5 +approach 5 +aaai 5 +annual 4 +case 4 +techniqu 4 +work 4 +project 4 +linguist 4 +sentenc 4 +analysi 4 +domain 4 +process 4 +univers 4 +thesi 4 +cognit 4 +cornel 3 +understand 3 +inform 3 +kenmor 3 +featur 3 +rel 3 +pronoun 3 +note 3 +specif 3 +massachusett 3 +amherst 3 +nation 3 +link 2 +report 2 +focu 2 +area 2 +naturallanguag 2 +group 2 +interest 2 +guid 2 +develop 2 +extract 2 +part 2 +technic 2 +text 2 +knowledgeacquisit 2 +corpora 2 +tag 2 +larger 2 +select 2 +riloff 2 +workshop 2 +intern 2 +chapter 2 +contain 2 +page 2 +tenth 2 +disambigu 2 +associ 2 +comput 2 +jose 2 +scienc 2 +societi 2 +lawrenc 2 +erlbaumassoci 2 +lehnert 2 +paper 2 +clair 1 +cardieclair 1 +assist 1 +professor 1 +upson 1 +hallphon 1 +email 1 +educlick 1 +interestscours 1 +teachselect 1 +publicationsnlp 1 +entri 1 +depart 1 +amalgam 1 +softbal 1 +stat 1 +finger 1 +lake 1 +tandem 1 +tour 1 +westi 1 +interestsalthough 1 +span 1 +number 1 +subfield 1 +within 1 +includ 1 +reason 1 +cognitivemodel 1 +primarili 1 +investig 1 +machinelearn 1 +tool 1 +forexplor 1 +mechan 1 +underli 1 +focus 1 +tworel 1 +design 1 +user 1 +train 1 +effici 1 +reliablyextract 1 +import 1 +document 1 +cstr 1 +tosupport 1 +content 1 +brows 1 +autom 1 +task 1 +compris 1 +build 1 +gener 1 +kenmoreacquir 1 +combin 1 +symbol 1 +learningtechniqu 1 +robust 1 +tworeal 1 +world 1 +perform 1 +speech 1 +semant 1 +andconcept 1 +activ 1 +find 1 +anteced 1 +current 1 +extend 1 +handl 1 +addit 1 +disambiguationtask 1 +evalu 1 +learningcompon 1 +context 1 +applic 1 +isembed 1 +goal 1 +determin 1 +condit 1 +expect 1 +offer 1 +cost 1 +effect 1 +teachingc 1 +spring 1 +foundat 1 +practicum 1 +inartifici 1 +fall 1 +seminar 1 +understandingselect 1 +publicationsautom 1 +conferenceon 1 +empir 1 +method 1 +pennsylvania 1 +embed 1 +agener 1 +framework 1 +wermter 1 +scheler 1 +gabriel 1 +connectionist 1 +statist 1 +andsymbol 1 +lectur 1 +springer 1 +origin 1 +present 1 +tolearn 1 +jointconfer 1 +ijcai 1 +introduct 1 +conceptu 1 +file 1 +introductori 1 +conceptualsent 1 +avail 1 +cmpsci 1 +eleventh 1 +washington 1 +decis 1 +tree 1 +improv 1 +morgan 1 +kaufmann 1 +corpu 1 +heurist 1 +newark 1 +bias 1 +fourteenth 1 +bloomington 1 +onconstrain 1 +prior 1 +plausibl 1 +complic 1 +syntax 1 +ninth 1 +anaheim 1 +analyz 1 +citat 1 +twelfth 1 +cambridg 1 +linkscomput 1 +linguistics 1 +print 1 +archiv 1 +databas 1 +recent 1 +aclspeci 1 +learningmachin 1 +digestmachinelearn 1 +resourc 1 +researchersmachin 1 +home 1 +penn 1 +treebank 1 +repositori 1 +pointer 1 +code 1 +variou 1 +compon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..51622e8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,88 @@ +chew 3 +geometr 3 +gener 3 +mathemat 3 +paul 2 +research 2 +univers 2 +cornel 2 +work 2 +delaunai 2 +creat 2 +applic 2 +mesh 2 +level 2 +softwar 2 +us 2 +senior 1 +associatephd 1 +purdu 1 +eduappletsy 1 +need 1 +java 1 +compat 1 +beta 1 +version 1 +browser 1 +asnetscap 1 +make 1 +voronoi 1 +applet 1 +avoronoi 1 +diagram 1 +triangul 1 +click 1 +point 1 +agenda 1 +primari 1 +interest 1 +algorithm 1 +emphasi 1 +onpract 1 +practic 1 +includedplac 1 +motion 1 +plan 1 +shape 1 +comparison 1 +vision 1 +sens 1 +exampl 1 +issu 1 +thataris 1 +part 1 +problem 1 +automat 1 +scientificsoftwar 1 +goal 1 +rais 1 +isspecifi 1 +develop 1 +environ 1 +scientif 1 +canb 1 +natur 1 +high 1 +concept 1 +ofphys 1 +engin 1 +thu 1 +program 1 +specifi 1 +implicitli 1 +acollect 1 +equat 1 +symbol 1 +techniquesar 1 +transform 1 +express 1 +effectiveprogram 1 +myonlin 1 +tech 1 +reportscornel 1 +depart 1 +computerscienceth 1 +simlabprojectaddress 1 +rhode 1 +hall 1 +ithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..1d5a5ce9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,76 @@ +cornel 5 +soccer 4 +chao 3 +chang 3 +page 2 +chichao 2 +student 2 +comput 2 +network 2 +interest 2 +system 2 +activ 2 +home 1 +edui 1 +thedepart 1 +scienceat 1 +univers 1 +faculti 1 +advisor 1 +isthorsten 1 +eicken 1 +summer 1 +microsoft 1 +group 1 +click 1 +addressesand 1 +phone 1 +number 1 +research 1 +interact 1 +compil 1 +runtim 1 +andoper 1 +toward 1 +effici 1 +concurr 1 +program 1 +overheterogen 1 +tham 1 +composit 1 +messageslow 1 +latenc 1 +commun 1 +risc 1 +multimatlab 1 +matlab 1 +multipl 1 +processor 1 +design 1 +perform 1 +messag 1 +anyon 1 +sport 1 +server 1 +latest 1 +newsbraziliansocc 1 +portugues 1 +world 1 +result 1 +andhomepagesoliv 1 +guid 1 +cool 1 +stuff 1 +lubrasa 1 +luso 1 +brazilian 1 +associationu 1 +chess 1 +centerjorn 1 +brasilmi 1 +carstockmasterjayhawk 1 +basketballwww 1 +tennisserverback 1 +scienc 1 +homepagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..a0769d4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,58 @@ +chee 2 +keong 2 +graduat 2 +engin 2 +comput 2 +scienc 2 +depart 2 +cornel 2 +ithaca 2 +inform 2 +tokyo 2 +singapor 2 +liau 1 +liauwelcom 1 +home 1 +page 1 +student 1 +master 1 +programm 1 +apolog 1 +forth 1 +construct 1 +work 1 +hopefulli 1 +thing 1 +improv 1 +soon 1 +school 1 +univers 1 +fall 1 +class 1 +foundat 1 +artifici 1 +intelligencec 1 +program 1 +languag 1 +softwar 1 +engineeringc 1 +networksc 1 +advanc 1 +databas 1 +systemsbaccalaur 1 +cours 1 +institut 1 +technolog 1 +japanhomei 1 +come 1 +small 1 +countri 1 +call 1 +know 1 +find 1 +avail 1 +homepag 1 +mapl 1 +avenu 1 +edulast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..53c35e7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,83 @@ +research 6 +cornel 5 +comput 5 +optim 3 +thoma 2 +center 2 +mathemat 2 +group 2 +project 2 +interest 2 +continu 2 +program 2 +problem 2 +larg 2 +scale 2 +imag 2 +linear 2 +student 2 +postdoc 2 +coleman 2 +colemanthoma 1 +colemancornel 1 +universityi 1 +professcp 1 +match 1 +http 1 +scienc 1 +depart 1 +appli 1 +also 1 +strong 1 +affili 1 +theori 1 +director 1 +applic 1 +advanc 1 +institut 1 +final 1 +member 1 +ccop 1 +faculti 1 +broadfield 1 +discret 1 +programmi 1 +concern 1 +design 1 +understand 1 +practic 1 +effici 1 +numer 1 +algorithm 1 +primari 1 +develop 1 +computationalmethod 1 +tool 1 +automat 1 +differenti 1 +reconstruct 1 +biomed 1 +parallel 1 +minim 1 +inequ 1 +nonlinear 1 +equal 1 +constraint 1 +profession 1 +activ 1 +recent 1 +paper 1 +book 1 +current 1 +former 1 +associ 1 +softwar 1 +link 1 +curriculum 1 +vita 1 +best 1 +rhode 1 +hall 1 +univers 1 +ithaca 1 +york 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..b814439a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,166 @@ +type 8 +program 7 +languag 7 +theori 6 +home 4 +cornel 4 +theoret 4 +also 4 +compil 4 +interest 3 +semant 3 +formul 3 +intermedi 3 +research 2 +implement 2 +atyp 2 +view 2 +tractabl 2 +approxim 2 +mathemat 2 +often 2 +programminglanguag 2 +translat 2 +calculi 2 +work 2 +project 2 +system 2 +comput 2 +scienc 2 +import 2 +lord 2 +love 2 +karl 1 +crari 1 +pagekarl 1 +crarycrari 1 +eduoffic 1 +address 1 +upson 1 +halloffic 1 +phone 1 +researchbroadli 1 +speak 1 +primari 1 +design 1 +implementationand 1 +recent 1 +emphasi 1 +area 1 +subtyp 1 +object 1 +orient 1 +modular 1 +practic 1 +kmlwhich 1 +combin 1 +featur 1 +function 1 +intract 1 +richworld 1 +foundat 1 +whole 1 +perform 1 +newprogram 1 +develop 1 +result 1 +map 1 +construct 1 +anapproxim 1 +set 1 +convers 1 +paradigm 1 +well 1 +understood 1 +aminterest 1 +deepen 1 +understand 1 +relationship 1 +particularli 1 +issu 1 +mitig 1 +model 1 +seri 1 +lower 1 +calculu 1 +embed 1 +andth 1 +correspond 1 +interpret 1 +invari 1 +modelallow 1 +relat 1 +stage 1 +origin 1 +allowsth 1 +standard 1 +techniqu 1 +optim 1 +guarante 1 +safeti 1 +andcorrect 1 +care 1 +make 1 +possibl 1 +additionaloptim 1 +unavail 1 +strategi 1 +form 1 +part 1 +thenuprl 1 +hereat 1 +name 1 +come 1 +nuprl 1 +formal 1 +logic 1 +base 1 +martin 1 +automatedreason 1 +committe 1 +consist 1 +ofrobert 1 +constabl 1 +greg 1 +morrisett 1 +dexter 1 +kozen 1 +close 1 +jasonhickei 1 +select 1 +papersoth 1 +linksmark 1 +leon 1 +maintain 1 +collect 1 +ofprogram 1 +resourc 1 +cansearch 1 +technic 1 +report 1 +onlin 1 +grad 1 +life 1 +biblestudi 1 +pageth 1 +lurker 1 +guid 1 +babylon 1 +command 1 +answer 1 +jesu 1 +hear 1 +israel 1 +thelord 1 +heart 1 +andwith 1 +soul 1 +mind 1 +strength 1 +thesecond 1 +neighbor 1 +commandmentgreat 1 +mark 1 +univers 1 +pagedepart 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..449489bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,130 @@ +spars 16 +parallel 15 +comput 11 +siam 11 +linear 8 +solut 8 +least 8 +squar 8 +problem 8 +cornel 7 +scientif 7 +memori 5 +multiprocessor 5 +distribut 5 +research 4 +theori 4 +center 4 +confer 4 +factor 4 +philadelphia 4 +coleman 3 +matrix 3 +multifront 3 +proceed 3 +pothen 3 +chunguang 2 +univers 2 +advanc 2 +institut 2 +optim 2 +project 2 +algorithm 2 +numer 2 +softwar 2 +dens 2 +row 2 +seventh 2 +orthogon 2 +journal 2 +technic 2 +report 2 +ctctr 2 +larg 2 +processingfor 2 +cliqu 2 +tree 2 +sunchunguang 1 +sunphd 1 +pennsylvania 1 +state 1 +welcom 1 +home 1 +page 1 +associ 1 +also 1 +affili 1 +thecornel 1 +work 1 +close 1 +professorthoma 1 +interest 1 +algebra 1 +mathemat 1 +current 1 +ppcx 1 +program 1 +rank 1 +defici 1 +bound 1 +packag 1 +pssl 1 +psspd 1 +symmetr 1 +posit 1 +definit 1 +systemsrec 1 +lecturesparallel 1 +contain 1 +second 1 +matric 1 +coeur 1 +alen 1 +idaho 1 +octob 1 +ondistribut 1 +parallelprocess 1 +francisco 1 +februari 1 +select 1 +public 1 +deal 1 +solutionof 1 +decemb 1 +cornellunivers 1 +conferenceon 1 +process 1 +bailei 1 +bjorstad 1 +gilbert 1 +mascagni 1 +schreiber 1 +simon 1 +torczon 1 +watson 1 +map 1 +choleskyfactor 1 +septemb 1 +matriceson 1 +sixth 1 +sinovec 1 +key 1 +leuz 1 +petzold 1 +reed 1 +us 1 +fifth 1 +dongarra 1 +kennedi 1 +messina 1 +sorensen 1 +voigt 1 +compact 1 +data 1 +structuresin 1 +scale 1 +univeristi 1 +ithaca 1 +mail 1 +csun 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..588d1049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,182 @@ +group 15 +process 7 +architectur 7 +david 6 +cooper 5 +implement 5 +horu 5 +privaci 5 +origin 5 +secur 5 +messag 5 +network 5 +research 4 +servic 4 +user 4 +current 3 +design 3 +synchroni 3 +us 3 +allow 3 +join 3 +trust 3 +member 3 +client 3 +server 3 +would 3 +scheme 3 +howev 3 +mobil 3 +privat 3 +inform 3 +devic 3 +birman 3 +page 2 +cornel 2 +involv 2 +provid 2 +authent 2 +order 2 +set 2 +virtual 2 +maintain 2 +within 2 +semant 2 +desir 2 +permit 2 +untrust 2 +relationshipsamong 2 +thesi 2 +propos 2 +solut 2 +type 2 +wish 2 +first 2 +send 2 +also 2 +prevent 2 +commun 2 +kenneth 2 +comput 2 +home 1 +postdoctor 1 +associ 1 +upson 1 +hall 1 +phone 1 +email 1 +dcooper 1 +securityarchitectur 1 +goal 1 +work 1 +layer 1 +horuswhich 1 +interact 1 +kerberosnetwork 1 +cryptograph 1 +tool 1 +toprovid 1 +mike 1 +reiter 1 +fault 1 +toler 1 +system 1 +originalimplement 1 +support 1 +model 1 +ofcomput 1 +crash 1 +failuremodel 1 +necessari 1 +honest 1 +result 1 +make 1 +assumpt 1 +anyprocess 1 +version 1 +isposs 1 +whose 1 +weaker 1 +ofvirtu 1 +untrustedprocess 1 +exampl 1 +might 1 +clientsto 1 +communicatewith 1 +accept 1 +limit 1 +command 1 +fromth 1 +respons 1 +screen 1 +horussecur 1 +arbitrari 1 +accomplish 1 +keymanag 1 +impersonateanoth 1 +trivial 1 +achieveth 1 +witha 1 +slightli 1 +higher 1 +overhead 1 +unlik 1 +enabl 1 +asclient 1 +mani 1 +complic 1 +problem 1 +inherentin 1 +static 1 +basic 1 +ofinform 1 +keep 1 +contentsof 1 +hiddenwith 1 +proper 1 +encrypt 1 +outsidersfrom 1 +determin 1 +maintainingth 1 +unlink 1 +sender 1 +recipi 1 +chaum 1 +februari 1 +sinc 1 +severaloth 1 +made 1 +improv 1 +addit 1 +staticnetwork 1 +locat 1 +carri 1 +mobilecommun 1 +gener 1 +themessag 1 +receiv 1 +reveal 1 +informationabout 1 +owner 1 +develop 1 +along 1 +advisorken 1 +protocol 1 +attack 1 +internaland 1 +extern 1 +adversari 1 +public 1 +preserv 1 +ofmobil 1 +proceed 1 +ieee 1 +symposium 1 +securityand 1 +apriv 1 +wireless 1 +anthoni 1 +mobilecomput 1 +dissert 1 +univers 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..5bc3189b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,131 @@ +project 3 +love 3 +ever 3 +hope 3 +cyber 2 +mine 2 +home 2 +instead 2 +deidr 2 +model 2 +faith 2 +dread 2 +burn 2 +pandora 2 +abodedan 1 +abodegreet 1 +travel 1 +digit 1 +space 1 +welcom 1 +humbl 1 +prithe 1 +gentl 1 +surf 1 +shore 1 +blink 1 +teari 1 +ey 1 +rest 1 +weari 1 +kei 1 +mice 1 +born 1 +hand 1 +make 1 +thyselv 1 +abod 1 +brief 1 +autobiographi 1 +resum 1 +facial 1 +express 1 +univers 1 +emot 1 +simnet 1 +builder 1 +type 1 +game 1 +faiththei 1 +hardli 1 +prize 1 +ferro 1 +concret 1 +sai 1 +wise 1 +scorn 1 +bend 1 +ear 1 +lawyer 1 +tone 1 +scientist 1 +word 1 +need 1 +unseen 1 +unheard 1 +untouch 1 +silenc 1 +night 1 +unknown 1 +question 1 +uncertain 1 +yearn 1 +true 1 +direct 1 +field 1 +lordlovewarm 1 +friendship 1 +mindless 1 +infatu 1 +sensual 1 +romanc 1 +passion 1 +soft 1 +sigh 1 +belov 1 +poetri 1 +hopemyth 1 +favor 1 +beauteou 1 +place 1 +fault 1 +human 1 +role 1 +releas 1 +demon 1 +mani 1 +astrai 1 +pretti 1 +glimmer 1 +fals 1 +tread 1 +tortur 1 +broken 1 +road 1 +amidst 1 +thorn 1 +dark 1 +filthi 1 +soul 1 +diseas 1 +pain 1 +horror 1 +suffer 1 +reach 1 +fear 1 +tear 1 +cannot 1 +blame 1 +deed 1 +told 1 +heart 1 +take 1 +hold 1 +world 1 +would 1 +never 1 +frozen 1 +miseri 1 +cold 1 +spring 1 +etern 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..86869c04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,78 @@ +server 5 +page 5 +librari 4 +digit 4 +search 4 +cornel 3 +inform 3 +tool 3 +home 2 +engin 2 +gopher 2 +network 2 +refer 2 +weather 2 +dean 1 +krafft 1 +select 1 +urlsdean 1 +point 1 +interestcornel 1 +cuinfo 1 +legal 1 +institut 1 +directori 1 +project 1 +public 1 +dimund 1 +document 1 +imag 1 +understand 1 +charact 1 +recognit 1 +siam 1 +captur 1 +access 1 +cours 1 +illinoi 1 +stanford 1 +michigan 1 +berkelei 1 +librarysearch 1 +lyco 1 +veronica 1 +archi 1 +gatewai 1 +anonym 1 +site 1 +depart 1 +togeth 1 +excel 1 +collect 1 +scout 1 +report 1 +intern 1 +planet 1 +earth 1 +whole 1 +internet 1 +catalog 1 +part 1 +global 1 +navig 1 +cern 1 +faqsvari 1 +stuff 1 +head 1 +mail 1 +list 1 +audio 1 +gear 1 +folkbook 1 +folk 1 +music 1 +ithaca 1 +forecast 1 +elsewher 1 +secur 1 +index 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..01d0cb17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,98 @@ +comput 7 +research 6 +cornel 5 +project 4 +dean 3 +report 3 +krafft 2 +page 2 +facil 2 +consortium 2 +avail 2 +technic 2 +system 2 +dienst 2 +home 1 +director 1 +facilitiesaddress 1 +upson 1 +halldepart 1 +sciencecornel 1 +universityithaca 1 +phone 1 +current 1 +serv 1 +administr 1 +inth 1 +scienc 1 +depart 1 +guis 1 +anadministr 1 +manag 1 +support 1 +group 1 +andworri 1 +number 1 +issu 1 +includ 1 +secur 1 +network 1 +build 1 +servic 1 +side 1 +princip 1 +investig 1 +spart 1 +arpa 1 +fund 1 +five 1 +sciencedepart 1 +thecorpor 1 +nation 1 +initi 1 +cnri 1 +researchi 1 +intend 1 +rapid 1 +dissemin 1 +break 1 +technicalresearch 1 +internet 1 +well 1 +make 1 +line 1 +theexist 1 +librari 1 +member 1 +part 1 +work 1 +davi 1 +xerox 1 +employe 1 +thedesign 1 +institut 1 +carl 1 +lagoz 1 +emploi 1 +develop 1 +implement 1 +protocol 1 +disseminationov 1 +similar 1 +materi 1 +call 1 +eight 1 +univers 1 +site 1 +atechn 1 +inform 1 +ondienst 1 +pleas 1 +send 1 +email 1 +togethera 1 +select 1 +url 1 +relat 1 +thing 1 +interestedin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..d2d8a735 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,24 @@ +cornel 3 +home 2 +analysi 2 +divakar 1 +pagedivakar 1 +viswanathdivakar 1 +address 1 +upson 1 +hall 1 +univers 1 +ithaca 1 +graduat 1 +student 1 +comput 1 +scienc 1 +area 1 +interest 1 +isnumer 1 +advis 1 +page 1 +good 1 +place 1 +find 1 +numer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..51a87e9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,90 @@ +comput 3 +cornel 3 +diyu 2 +home 2 +databas 2 +compil 2 +engin 2 +ithaca 2 +receiv 2 +physic 2 +jersei 2 +china 2 +pagediyu 1 +daisi 1 +welcom 1 +spring 1 +cours 1 +practic 1 +distribut 1 +system 1 +translat 1 +practicum 1 +translatorsfal 1 +coursesc 1 +oper 1 +systemc 1 +softwar 1 +engineeringc 1 +advanc 1 +systemsel 1 +network 1 +telecommunicationsm 1 +projectorigin 1 +virtual 1 +realiti 1 +railroad 1 +projectsinc 1 +mayb 1 +like 1 +know 1 +littl 1 +doubt 1 +anywai 1 +current 1 +master 1 +student 1 +depart 1 +scienc 1 +univers 1 +locat 1 +central 1 +york 1 +gorgeou 1 +place 1 +live 1 +except 1 +winter 1 +last 1 +year 1 +appli 1 +institut 1 +technolog 1 +newark 1 +brought 1 +beauti 1 +campu 1 +tsinghua 1 +unviers 1 +beij 1 +also 1 +want 1 +friend 1 +miss 1 +us 1 +linksjava 1 +html 1 +tkfavorit 1 +sitestimecnnlondon 1 +timeswashington 1 +postchines 1 +digestchina 1 +new 1 +digestfeng 1 +yuanxin 1 +siart 1 +chinaloc 1 +connectionsctc 1 +sunlabweathermovi 1 +miller 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..d84b7d80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,113 @@ +document 6 +recognit 5 +comput 5 +area 3 +visual 3 +match 3 +us 3 +collabor 3 +imag 3 +huttenloch 2 +cornel 2 +research 2 +work 2 +algorithm 2 +system 2 +remot 2 +video 2 +monitor 2 +also 2 +interest 2 +electron 2 +hausdorff 2 +method 2 +implement 2 +avail 2 +teach 2 +cours 2 +program 2 +vision 2 +activ 2 +home 1 +page 1 +daniel 1 +associ 1 +professordph 1 +main 1 +rang 1 +theoret 1 +techniqu 1 +geometri 1 +applic 1 +view 1 +wide 1 +network 1 +target 1 +type 1 +commun 1 +educ 1 +compar 1 +geometr 1 +structur 1 +base 1 +fast 1 +index 1 +eigenspac 1 +approxim 1 +fraction 1 +matlab 1 +perform 1 +evalu 1 +model 1 +object 1 +track 1 +identif 1 +digipap 1 +highli 1 +compact 1 +univers 1 +viewabl 1 +format 1 +conot 1 +support 1 +share 1 +brian 1 +smith 1 +develop 1 +author 1 +offer 1 +first 1 +time 1 +spring 1 +introduct 1 +profession 1 +xerox 1 +parc 1 +process 1 +start 1 +small 1 +group 1 +investig 1 +problem 1 +chair 1 +cvpr 1 +ieee 1 +confer 1 +pattern 1 +held 1 +juan 1 +june 1 +favorit 1 +geek 1 +snowboard 1 +mountain 1 +bike 1 +without 1 +extrem 1 +sport 1 +cool 1 +stupid 1 +attitud 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..9f48ac9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,94 @@ +process 8 +algebra 7 +dsouza 3 +work 3 +tool 3 +ashvin 2 +specif 2 +design 2 +express 2 +postscript 2 +lnc 2 +cornel 1 +edui 1 +graduat 1 +student 1 +bard 1 +bloom 1 +focu 1 +thesi 1 +research 1 +develop 1 +oftool 1 +support 1 +method 1 +andverif 1 +concurr 1 +system 1 +withrespect 1 +metatheori 1 +becom 1 +immediatelyavail 1 +wide 1 +class 1 +allevi 1 +theproblem 1 +duplic 1 +effort 1 +inher 1 +custom 1 +exampl 1 +prototyp 1 +base 1 +calculu 1 +model 1 +checker 1 +simpl 1 +gso 1 +semant 1 +form 1 +part 1 +input 1 +make 1 +applic 1 +mani 1 +commonli 1 +us 1 +includ 1 +basic 1 +loto 1 +addit 1 +investig 1 +power 1 +order 1 +better 1 +understand 1 +compar 1 +final 1 +exploringappl 1 +techniqu 1 +gener 1 +bdd 1 +algebraterm 1 +full 1 +postscipt 1 +lite 1 +version 1 +also 1 +written 1 +result 1 +presentedth 1 +former 1 +comput 1 +aid 1 +verif 1 +latter 1 +foundat 1 +softwar 1 +technolog 1 +theoret 1 +computersci 1 +june 1 +present 1 +verifi 1 +compass 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..2e46dae2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,53 @@ +page 3 +funda 2 +ergun 2 +cornel 2 +turkei 2 +ever 1 +improv 1 +pagefunda 1 +ergn 1 +mail 1 +eduhi 1 +welcom 1 +home 1 +name 1 +studentin 1 +comput 1 +scienc 1 +dept 1 +work 1 +programcheck 1 +prof 1 +ronitt 1 +rubinfeld 1 +researchpag 1 +also 1 +minor 1 +paint 1 +depart 1 +fine 1 +art 1 +origin 1 +come 1 +izmir 1 +undergrad 1 +bilkentunivers 1 +ankara 1 +research 1 +relat 1 +stuff 1 +warn 1 +might 1 +encounterpag 1 +written 1 +turkish 1 +angri 1 +dog 1 +risk 1 +person 1 +visit 1 +sinc 1 +alwai 1 +heavi 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..06516a21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,183 @@ +tardo 20 +algorithm 12 +comput 11 +symposium 11 +ori 10 +appear 9 +proceed 9 +theori 8 +annual 8 +problem 7 +approxim 7 +version 7 +flow 7 +research 6 +shmoi 6 +plotkin 6 +preliminari 5 +siam 5 +scienc 4 +multicommod 4 +fast 4 +approximationalgorithm 4 +network 4 +current 3 +combinatori 3 +paper 3 +januari 3 +combinator 3 +lovasz 3 +activ 2 +cornel 2 +school 2 +oper 2 +public 2 +complex 2 +optim 2 +discret 2 +improv 2 +klein 2 +stein 2 +journal 2 +goldberg 2 +hopp 2 +polynomi 2 +kleinberg 2 +disjoint 2 +planar 2 +path 2 +graph 2 +springer 2 +verlag 2 +handbook 2 +graham 2 +grotschel 2 +north 2 +holland 2 +theorem 2 +tardosassoci 1 +professor 1 +depart 1 +upson 1 +hallcornel 1 +universityithaca 1 +phone 1 +email 1 +industri 1 +engineeringphon 1 +click 1 +daughter 1 +rebecca 1 +julia 1 +researchrec 1 +broadli 1 +speak 1 +interest 1 +includ 1 +mani 1 +aspect 1 +mostlywork 1 +particular 1 +networkproblem 1 +linear 1 +integ 1 +programmingproblem 1 +recent 1 +paperssurvei 1 +thegener 1 +assign 1 +mathemat 1 +program 1 +bound 1 +cutratio 1 +combinatorica 1 +fasterapproxim 1 +unit 1 +capac 1 +concurr 1 +problemwith 1 +applic 1 +rout 1 +find 1 +spars 1 +cut 1 +oncomput 1 +appearedin 1 +leighton 1 +makedon 1 +tragouda 1 +flowproblem 1 +system 1 +stoc 1 +special 1 +issu 1 +annualacm 1 +fraction 1 +pack 1 +cover 1 +inmathemat 1 +hasappear 1 +ieee 1 +thefound 1 +goeman 1 +williamson 1 +designproblem 1 +discretealgorithm 1 +time 1 +someevacu 1 +ondiscret 1 +quickest 1 +transship 1 +theproceed 1 +steiner 1 +direct 1 +multicut 1 +pathsproblem 1 +high 1 +diamet 1 +proceedingsof 1 +dens 1 +embed 1 +annualiee 1 +foundat 1 +rabani 1 +distribut 1 +packet 1 +switch 1 +arbitrari 1 +fleischer 1 +separ 1 +maxim 1 +violat 1 +comb 1 +inequ 1 +ipco 1 +june 1 +survei 1 +tarjan 1 +sept 1 +vlsi 1 +design 1 +kort 1 +lovaszand 1 +schrijver 1 +strongli 1 +inoptim 1 +intern 1 +congress 1 +ofmathematician 1 +kyoto 1 +tokyo 1 +computersci 1 +annot 1 +bibliographi 1 +inproc 1 +summer 1 +maastricht 1 +netherland 1 +proc 1 +networkoptim 1 +practic 1 +netflow 1 +miniato 1 +itali 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..c39e8c8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,14 @@ +franci 2 +scienc 2 +home 1 +page 1 +graduat 1 +student 1 +univers 1 +california 1 +berkeleymathemat 1 +departmentcomput 1 +departmentcornel 1 +universitycomput 1 +departmenthumorfcc 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..57859a0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,2 @@ +felix 1 +world 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..a0c0cdbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,57 @@ +smith 4 +frederick 3 +cornel 3 +dexter 3 +kozen 3 +kleen 2 +algebra 2 +test 2 +technic 2 +report 2 +univers 2 +link 2 +homepag 2 +program 2 +languag 2 +homepagefrederick 1 +grad 1 +student 1 +upson 1 +halldepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +paper 1 +erni 1 +cohen 1 +complex 1 +juli 1 +complet 1 +decid 1 +april 1 +person 1 +us 1 +greg 1 +morrisett 1 +research 1 +page 1 +class 1 +take 1 +system 1 +multimedia 1 +semant 1 +math 1 +introduct 1 +analysi 1 +epicuri 1 +food 1 +zine 1 +cartalk 1 +home 1 +click 1 +clack 1 +catch 1 +sundai 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..8927b5ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,25 @@ +frank 7 +page 3 +cornel 3 +planet 1 +either 1 +matter 1 +adelstein 1 +post 1 +doctor 1 +associ 1 +xerox 1 +design 1 +research 1 +institut 1 +offic 1 +phone 1 +electron 1 +mail 1 +actual 1 +inform 1 +checkout 1 +improv 1 +happi 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..ca695821 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,65 @@ +fred 7 +cornel 4 +languag 3 +comput 3 +apollo 3 +yuan 2 +softwar 2 +engin 2 +current 2 +soon 2 +http 2 +visual 2 +affili 2 +theori 2 +center 2 +univers 2 +chelmsford 2 +version 1 +feet 1 +shown 1 +resum 1 +project 1 +other 1 +appear 1 +distribut 1 +server 1 +scramo 1 +midi 1 +choreograph 1 +anim 1 +model 1 +postscript 1 +vpla 1 +program 1 +animationlink 1 +massachusett 1 +hewlett 1 +packardlink 1 +previou 1 +scienc 1 +group 1 +binghamton 1 +lawrenc 1 +berkelei 1 +laboratoryinterest 1 +hobbi 1 +section 1 +develop 1 +mayb 1 +next 1 +year 1 +photographi 1 +cello 1 +guitar 1 +aquarium 1 +sciencecornel 1 +home 1 +burl 1 +work 1 +email 1 +fredhsu 1 +snail 1 +drive 1 +peopl 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..94c6c72b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,149 @@ +comput 8 +scienc 5 +deepak 4 +back 4 +main 4 +page 4 +engin 4 +multimedia 4 +prof 4 +univers 3 +cornel 3 +major 3 +cours 3 +system 3 +well 3 +long 3 +world 3 +born 3 +school 3 +balakrishna 2 +undergradu 2 +colleg 2 +india 2 +graduat 2 +pursu 2 +master 2 +degre 2 +actual 2 +four 2 +lucki 2 +part 2 +balakrishnamast 1 +engineeringdepart 1 +sciencecornel 1 +resumeeducationcoursesperson 1 +resum 1 +html 1 +postscript 1 +educ 1 +complet 1 +june 1 +karnataka 1 +region 1 +surathk 1 +interest 1 +relat 1 +oper 1 +artifici 1 +intellig 1 +compil 1 +construct 1 +data 1 +commun 1 +graphic 1 +present 1 +specialis 1 +want 1 +involv 1 +project 1 +deal 1 +server 1 +program 1 +follow 1 +list 1 +taken 1 +fall 1 +semest 1 +brian 1 +smith 1 +advanc 1 +databas 1 +praveen 1 +seshadri 1 +network 1 +srinivasan 1 +keshav 1 +softwar 1 +michael 1 +godfrei 1 +person 1 +start 1 +goe 1 +upon 1 +time 1 +novemb 1 +land 1 +call 1 +bharat 1 +outsid 1 +precis 1 +cute 1 +chubbi 1 +littl 1 +babi 1 +weigh 1 +approxim 1 +pound 1 +took 1 +name 1 +mean 1 +light 1 +went 1 +still 1 +process 1 +chang 1 +incident 1 +probabl 1 +divin 1 +interfer 1 +aishwarya 1 +miss 1 +leav 1 +miniscul 1 +detail 1 +earlier 1 +life 1 +dive 1 +straight 1 +high 1 +nation 1 +public 1 +bangalor 1 +greater 1 +place 1 +someon 1 +noth 1 +better 1 +krec 1 +that 1 +wonder 1 +anoth 1 +year 1 +holidai 1 +conquer 1 +class 1 +never 1 +match 1 +hope 1 +get 1 +somewher 1 +final 1 +here 1 +link 1 +friend 1 +ashish 1 +aastha 1 +indira 1 +ankit 1 +vineet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..97c83681 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,58 @@ +system 6 +uddin 3 +ghia 3 +interest 3 +engin 3 +cornel 3 +comput 2 +work 2 +program 2 +asif 1 +ghiasasif 1 +welcom 1 +mywww 1 +home 1 +page 1 +warn 1 +constructioni 1 +student 1 +scienc 1 +area 1 +distribut 1 +multimedia 1 +bachelor 1 +degre 1 +univers 1 +technolog 1 +karachi 1 +pakistan 1 +sinc 1 +global 1 +inform 1 +solut 1 +present 1 +studi 1 +leav 1 +master 1 +respons 1 +includ 1 +applic 1 +unix 1 +administr 1 +support 1 +educ 1 +network 1 +manag 1 +installationso 1 +number 1 +project 1 +plan 1 +onlin 1 +good 1 +hopefulli 1 +year 1 +publicationsth 1 +follow 1 +music 1 +cricket 1 +astronomyasif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..1c43c7f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,9 @@ +home 3 +page 2 +move 1 +http 1 +berkelei 1 +dglaser 1 +htmlpleas 1 +visit 1 +million 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..60c739a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,333 @@ +comput 9 +scienc 7 +award 7 +program 6 +cornel 5 +receiv 5 +educ 5 +gri 4 +logic 4 +us 4 +paper 4 +munich 3 +teach 3 +topic 3 +colleg 3 +depart 3 +born 3 +year 3 +stanford 3 +twin 3 +tabl 3 +system 3 +work 3 +survei 3 +david 2 +home 2 +professor 2 +institut 2 +technolog 2 +interest 2 +area 2 +languag 2 +formal 2 +tool 2 +inform 2 +biographi 2 +text 2 +symposium 2 +went 2 +wife 2 +elain 2 +later 2 +illinoi 2 +math 2 +help 2 +paul 2 +sinc 2 +made 2 +weather 2 +move 2 +chair 2 +return 2 +research 2 +proud 2 +raman 2 +best 2 +document 2 +serv 2 +associ 2 +also 2 +period 2 +editor 2 +know 2 +time 2 +laugh 2 +pagedavid 1 +grieswilliam 1 +lewi 1 +engineeringdr 1 +methodolog 1 +particular 1 +formaldevelop 1 +relat 1 +programminglanguag 1 +semant 1 +asinterest 1 +researchin 1 +fact 1 +understand 1 +taughta 1 +freshman 1 +sophomor 1 +level 1 +anoverrid 1 +concern 1 +mine 1 +click 1 +follow 1 +item 1 +curriculum 1 +vita 1 +short 1 +written 1 +polya 1 +announc 1 +dimac 1 +link 1 +pagecomput 1 +upson 1 +hallcornel 1 +universityithaca 1 +edushort 1 +griesi 1 +flush 1 +york 1 +spent 1 +iescap 1 +queen 1 +workfor 1 +naval 1 +weapon 1 +laboratori 1 +civilian 1 +amathematician 1 +programm 1 +fewmonth 1 +marri 1 +novemb 1 +master 1 +degreein 1 +assistantship 1 +twogerman 1 +manfr 1 +ruedig 1 +wiehl 1 +write 1 +full 1 +algol 1 +compilerfor 1 +figur 1 +implementrecurs 1 +effici 1 +mani 1 +end 1 +go 1 +almost 1 +three 1 +doctor 1 +bauer 1 +joseph 1 +stoer 1 +germani 1 +june 1 +wasin 1 +numer 1 +analysi 1 +these 1 +notyet 1 +kosher 1 +assist 1 +susan 1 +excit 1 +usual 1 +thebirthdai 1 +april 1 +intown 1 +make 1 +four 1 +birthdai 1 +cake 1 +left 1 +whichha 1 +snow 1 +ever 1 +wasdepart 1 +becam 1 +william 1 +lewisprofessor 1 +engin 1 +guggenheim 1 +fellowship 1 +contentsi 1 +better 1 +known 1 +mytext 1 +writingand 1 +contribut 1 +thewond 1 +good 1 +bloom 1 +wherey 1 +plant 1 +number 1 +contributionsto 1 +ieee 1 +taylor 1 +booth 1 +sigcseaward 1 +outstand 1 +clarkaward 1 +art 1 +theamerican 1 +feder 1 +process 1 +societi 1 +afip 1 +advise 1 +stand 1 +susanowicki 1 +thesi 1 +laid 1 +foundat 1 +proof 1 +correct 1 +ofparallel 1 +notion 1 +interfer 1 +freeness 1 +author 1 +bestpap 1 +langaug 1 +andt 1 +sthesi 1 +dissert 1 +designedand 1 +implement 1 +speak 1 +latex 1 +includ 1 +technic 1 +articl 1 +book 1 +printedor 1 +spoken 1 +abl 1 +speakmathemat 1 +effect 1 +manner 1 +import 1 +goal 1 +read 1 +blind 1 +alreadi 1 +produc 1 +audiocassett 1 +thecomput 1 +board 1 +late 1 +open 1 +officein 1 +washington 1 +began 1 +serious 1 +repres 1 +researchinterest 1 +conduct 1 +taulbe 1 +obtain 1 +essenti 1 +complet 1 +responsesfrom 1 +grant 1 +noother 1 +compar 1 +respons 1 +rate 1 +itrequir 1 +telephon 1 +call 1 +sendin 1 +questionnair 1 +researchassoci 1 +servic 1 +forchair 1 +toward 1 +respect 1 +andrespons 1 +current 1 +acta 1 +informatica 1 +aspect 1 +softwar 1 +concept 1 +andtool 1 +edit 1 +keep 1 +busi 1 +enjoi 1 +takean 1 +individu 1 +willsuggest 1 +substanti 1 +rewrit 1 +believ 1 +servewher 1 +fredb 1 +schneider 1 +springer 1 +verlag 1 +andmonograph 1 +spare 1 +sport 1 +like 1 +golf 1 +softbal 1 +volleybal 1 +swim 1 +tenni 1 +china 1 +isplit 1 +pant 1 +plai 1 +ping 1 +pong 1 +hour 1 +give 1 +alectur 1 +mention 1 +audienc 1 +turnedaround 1 +explain 1 +interpret 1 +spoke 1 +everyonelaugh 1 +howev 1 +whether 1 +told 1 +truth 1 +justsaid 1 +joke 1 +sing 1 +barbershop 1 +andgilbert 1 +sullivan 1 +around 1 +hous 1 +carpentri 1 +wire 1 +remodel 1 +taken 1 +yield 1 +considerablesatisfact 1 +content 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..e80052e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,23 @@ +grinzayd 2 +cornel 2 +alex 1 +homepagealex 1 +grinzaydm 1 +student 1 +comput 1 +sciencecornel 1 +universitytel 1 +email 1 +first 1 +week 1 +link 1 +necx 1 +directinternet 1 +shop 1 +networkcomput 1 +express 1 +damarkwarn 1 +page 1 +bore 1 +learn 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..3e367782 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,46 @@ +comput 3 +cornel 3 +grzegorz 2 +grze 2 +czajkowski 1 +homepag 1 +czajkowskidepart 1 +sciencecornel 1 +universityithaca 1 +offic 1 +second 1 +year 1 +student 1 +program 1 +depart 1 +scienceat 1 +univers 1 +ithaca 1 +york 1 +complet 1 +master 1 +degre 1 +scienc 1 +krakow 1 +poland 1 +current 1 +involv 1 +sever 1 +project 1 +also 1 +charg 1 +administ 1 +cuc 1 +advisor 1 +thorsten 1 +eicken 1 +link 1 +relat 1 +research 1 +architectur 1 +activ 1 +messag 1 +split 1 +last 1 +modifi 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..691848db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,77 @@ +halpern 4 +reason 3 +comput 3 +depart 2 +knowledg 2 +uncertainti 2 +distribut 2 +also 2 +work 2 +talk 2 +give 2 +detail 2 +paper 2 +fall 2 +teach 2 +home 1 +pagejoseph 1 +professorcornel 1 +universitycomput 1 +scienc 1 +upson 1 +hallithaca 1 +cornel 1 +research 1 +focus 1 +applic 1 +game 1 +theori 1 +although 1 +done 1 +continu 1 +interest 1 +topic 1 +fault 1 +toler 1 +program 1 +languag 1 +semant 1 +li 1 +boundari 1 +number 1 +field 1 +recent 1 +gave 1 +econom 1 +princeton 1 +describ 1 +someon 1 +mathemat 1 +call 1 +scientist 1 +economist 1 +abouta 1 +subject 1 +mainli 1 +studi 1 +philosoph 1 +probabl 1 +best 1 +sentenc 1 +descript 1 +like 1 +check 1 +list 1 +public 1 +pointer 1 +abstract 1 +mani 1 +case 1 +avail 1 +activ 1 +resum 1 +cours 1 +sequel 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..fa3251bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,10 @@ +tsuneshi 2 +hashimoto 2 +hashimototsuneshi 1 +hashimotothi 1 +home 1 +page 1 +construct 1 +cstsuneshi 1 +hashi 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..b2628fcf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,24 @@ +hayden 3 +system 3 +mark 2 +cornel 2 +distribut 2 +commun 2 +offic 1 +upson 1 +univers 1 +ithaca 1 +fall 1 +teach 1 +tast 1 +unix 1 +interest 1 +horu 1 +ensembl 1 +nuprl 1 +proof 1 +develop 1 +hockei 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..a63afff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,6 @@ +heji 2 +improv 1 +home 1 +page 1 +cyber 1 +pond 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..cc733adf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,27 @@ +cornel 4 +deyu 3 +student 2 +comput 2 +scienc 2 +home 1 +page 1 +graduat 1 +universitydept 1 +upson 1 +hallithaca 1 +third 1 +year 1 +born 1 +shanghai 1 +china 1 +receiv 1 +undergradu 1 +degre 1 +berkelei 1 +faculti 1 +advisor 1 +thorsten 1 +eicken 1 +come 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..f5657a81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,59 @@ +cornel 6 +comput 5 +univers 4 +huang 3 +chines 3 +christian 3 +jing 2 +depart 2 +scienc 2 +vision 2 +fellowship 2 +home 1 +page 1 +upson 1 +hall 1 +ithaca 1 +student 1 +thedepart 1 +scienceat 1 +receiv 1 +bachelorand 1 +master 1 +degre 1 +appli 1 +mathemat 1 +tsinghua 1 +beij 1 +chinami 1 +academ 1 +interest 1 +multimedia 1 +system 1 +work 1 +professor 1 +ramin 1 +zabih 1 +imag 1 +retriev 1 +video 1 +process 1 +motion 1 +track 1 +us 1 +link 1 +annot 1 +bibliographi 1 +pattern 1 +recognit 1 +relat 1 +machin 1 +learn 1 +optim 1 +check 1 +evangel 1 +resourc 1 +center 1 +mission 1 +back 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..0f02e546 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,35 @@ +page 3 +chin 2 +chen 2 +home 2 +address 2 +practic 2 +distribut 2 +system 2 +welcom 1 +current 1 +mapl 1 +ithaca 1 +mail 1 +icchen 1 +cornel 1 +perman 1 +sung 1 +taipei 1 +taiwan 1 +class 1 +spring 1 +comput 1 +graphic 1 +practicum 1 +databas 1 +manag 1 +album 1 +resum 1 +new 1 +china 1 +time 1 +nctu 1 +construct 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..a3d0c942 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,27 @@ +engin 3 +system 3 +page 2 +comput 2 +home 1 +indira 1 +malik 1 +depart 1 +scienc 1 +master 1 +imalik 1 +cornel 1 +resum 1 +post 1 +script 1 +cours 1 +program 1 +softwar 1 +advanc 1 +databas 1 +network 1 +multimedia 1 +visit 1 +high 1 +school 1 +tap 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..ad70f452 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,80 @@ +comput 7 +system 5 +project 4 +indira 3 +scienc 3 +oper 3 +graphic 3 +java 3 +cornel 2 +engin 2 +inform 2 +camera 2 +applet 2 +vidyaprakash 1 +vidyaprakashmast 1 +engineeringclass 1 +dept 1 +sciencecornel 1 +universitywelcom 1 +homepag 1 +current 1 +student 1 +depart 1 +univers 1 +ithaca 1 +degre 1 +colleg 1 +technolog 1 +coimbator 1 +india 1 +cours 1 +taken 1 +cornelluniversityfal 1 +practicum 1 +specif 1 +hoca 1 +softwar 1 +multimedia 1 +audio 1 +process 1 +toolkit 1 +manag 1 +polici 1 +spring 1 +cspracticum 1 +anim 1 +magic 1 +carpet 1 +colloqium 1 +manageri 1 +financesumm 1 +independ 1 +research 1 +tracingin 1 +perspectivetransform 1 +click 1 +postscript 1 +version 1 +myresumeclick 1 +perspect 1 +transformssom 1 +interest 1 +site 1 +cool 1 +sgamelan 1 +directori 1 +calvinand 1 +hobb 1 +galleri 1 +gif 1 +indian 1 +recip 1 +chicker 1 +wood 1 +drive 1 +nashvil 1 +tennesse 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..41525e28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,90 @@ +us 3 +system 3 +manual 3 +write 3 +pageioi 2 +home 2 +cornel 2 +softwar 2 +base 2 +engin 2 +test 2 +packag 2 +file 2 +short 2 +detail 2 +come 2 +homeless 1 +lamioi 1 +current 1 +research 1 +assist 1 +prof 1 +brian 1 +smith 1 +comput 1 +scienc 1 +depart 1 +univers 1 +interest 1 +multi 1 +media 1 +parallel 1 +program 1 +instruct 1 +set 1 +environ 1 +knowledg 1 +guidelin 1 +good 1 +extens 1 +code 1 +doesn 1 +cover 1 +much 1 +script 1 +section 1 +suit 1 +valuabl 1 +programm 1 +postscript 1 +version 1 +complet 1 +includ 1 +templat 1 +sourc 1 +document 1 +introduct 1 +descript 1 +user 1 +remot 1 +machin 1 +index 1 +inform 1 +tutori 1 +right 1 +week 1 +put 1 +togeth 1 +knowledgebas 1 +repositori 1 +try 1 +spam 1 +site 1 +multim 1 +directori 1 +get 1 +start 1 +virtual 1 +realiti 1 +conferenc 1 +work 1 +prototyp 1 +mpeg 1 +video 1 +server 1 +http 1 +protocol 1 +spring 1 +homework 1 +solut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..b73d6dc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,135 @@ +jackson 10 +nuprl 6 +paul 5 +abstract 5 +cornel 4 +develop 4 +design 4 +avail 4 +format 4 +theori 4 +comput 3 +algebra 3 +thesi 3 +proof 3 +full 3 +text 3 +postscript 3 +list 3 +home 2 +univers 2 +depart 2 +research 2 +softwar 2 +editor 2 +confer 2 +circuit 2 +proceed 2 +hardwar 2 +page 2 +access 2 +next 2 +includ 2 +pagepaul 1 +post 1 +doctor 1 +associatecornel 1 +mail 1 +eduwww 1 +http 1 +info 1 +peopl 1 +htmladdress 1 +scienc 1 +upson 1 +hall 1 +ithaca 1 +usaphon 1 +intereststheorem 1 +prove 1 +environ 1 +formal 1 +method 1 +andhardwar 1 +synthesi 1 +scientif 1 +program 1 +linkag 1 +tool 1 +engin 1 +informationmi 1 +entitl 1 +enhanc 1 +developmentsystem 1 +appli 1 +paper 1 +explor 1 +construct 1 +type 1 +bundi 1 +intern 1 +automateddeduct 1 +lectur 1 +note 1 +artif 1 +intellig 1 +springer 1 +verlag 1 +june 1 +bout 1 +stavrid 1 +melham 1 +inter 1 +theorem 1 +prover 1 +ifip 1 +transactionsa 1 +north 1 +holland 1 +toolkit 1 +float 1 +point 1 +thenuprl 1 +system 1 +theadvanc 1 +workshop 1 +correct 1 +methodolog 1 +elsevi 1 +nuprlth 1 +project 1 +world 1 +wide 1 +document 1 +commun 1 +live 1 +session 1 +basic 1 +load 1 +collect 1 +still 1 +need 1 +workon 1 +make 1 +someon 1 +els 1 +getround 1 +pai 1 +attent 1 +sometim 1 +month 1 +hypertext 1 +thetheori 1 +foreach 1 +introduct 1 +summari 1 +definit 1 +andtheorem 1 +thepolynomi 1 +relat 1 +moment 1 +shouldb 1 +coupl 1 +dai 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..c33fb575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,3 @@ +hani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..1823c4e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,21 @@ +cornel 2 +tibor 1 +jnositibor 1 +jnosiwelcom 1 +mywww 1 +home 1 +page 1 +perman 1 +constructionoffic 1 +upson 1 +hall 1 +univers 1 +ithaca 1 +usaoffic 1 +phone 1 +interest 1 +site 1 +project 1 +zenotibor 1 +jnosi 1 +janosi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..49bfbeed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,13 @@ +janwun 3 +cornel 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +depart 1 +address 1 +mapl 1 +avenu 1 +ithaca 1 +telephon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..8004052f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,183 @@ +languag 16 +morrisett 14 +program 13 +research 9 +greg 7 +compil 6 +harper 6 +standard 5 +system 5 +type 5 +also 4 +robert 4 +technic 4 +version 4 +gregori 4 +interest 3 +cornel 3 +high 3 +level 3 +softwar 3 +implement 3 +us 3 +memori 3 +report 3 +appear 3 +publish 3 +sigplan 3 +symposium 3 +principl 3 +comput 2 +ithaca 2 +offic 2 +phone 2 +time 2 +construct 2 +code 2 +support 2 +semant 2 +direct 2 +abstract 2 +design 2 +manag 2 +polymorph 2 +decemb 2 +tarditi 2 +cheng 2 +stone 2 +workshop 2 +extend 2 +proc 2 +proceed 2 +june 2 +portabl 2 +multiprocess 2 +jersei 2 +andrew 2 +tolmach 2 +project 2 +home 2 +page 2 +faculti 1 +assist 1 +professor 1 +scienc 1 +univers 1 +upson 1 +hall 1 +tabl 1 +content 1 +intereststeachingselect 1 +papersrel 1 +linksperson 1 +informationresearch 1 +interestsmi 1 +primari 1 +develop 1 +ofadvanc 1 +particularli 1 +interestedin 1 +forbuild 1 +includ 1 +operatingsystem 1 +distribut 1 +late 1 +focus 1 +onth 1 +issu 1 +kept 1 +safelanguag 1 +concentr 1 +produc 1 +faster 1 +consum 1 +less 1 +hack 1 +bit 1 +bring 1 +power 1 +base 1 +toolsfrom 1 +theori 1 +partial 1 +evalu 1 +interpret 1 +gener 1 +specif 1 +real 1 +systemssoftwar 1 +teachingc 1 +fall 1 +advanc 1 +spring 1 +select 1 +paperssemant 1 +septemb 1 +gzip 1 +postscript 1 +thesi 1 +optim 1 +confer 1 +perform 1 +safetythrough 1 +closur 1 +convers 1 +yasuhiko 1 +minamid 1 +juli 1 +model 1 +matthia 1 +felleisen 1 +conf 1 +function 1 +andcomput 1 +architectur 1 +reportcmu 1 +notecmu 1 +intensionaltyp 1 +analysi 1 +annual 1 +francisco 1 +januari 1 +optimist 1 +parallelizationgreg 1 +mauric 1 +herlihi 1 +octob 1 +refin 1 +first 1 +class 1 +store 1 +state 1 +copenhagen 1 +denmark 1 +lock 1 +platform 1 +fourth 1 +practic 1 +parallel 1 +diego 1 +interfac 1 +princeton 1 +ad 1 +thread 1 +eric 1 +cooper 1 +relat 1 +link 1 +mark 1 +leon 1 +resourc 1 +member 1 +carnegi 1 +mellon 1 +line 1 +inform 1 +orient 1 +bibliographi 1 +depart 1 +scienceperson 1 +informationhom 1 +address 1 +warren 1 +road 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..309c8f5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,15 @@ +jiun 2 +address 2 +resum 1 +java 1 +current 1 +mapl 1 +avenu 1 +ithaca 1 +email 1 +jhlin 1 +cornel 1 +perman 1 +shing 1 +taipei 1 +taiwan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..fb9814a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,5 @@ +jerri 2 +cornel 1 +edujerri 1 +project 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..4182db9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,115 @@ +china 12 +chines 9 +univers 8 +scienc 7 +comput 5 +cornel 5 +graduat 4 +student 3 +depart 3 +binghamton 3 +taiwan 3 +new 3 +rank 3 +home 2 +page 2 +twin 2 +sister 2 +phone 2 +suni 2 +mathemat 2 +nation 2 +fellowship 2 +site 2 +internet 2 +magazin 2 +music 2 +digest 2 +homepag 2 +servic 2 +wang 1 +wangphd 1 +upson 1 +hallithaca 1 +offic 1 +email 1 +jiawang 1 +first 1 +year 1 +state 1 +york 1 +degre 1 +transfer 1 +nankai 1 +tianjin 1 +honor 1 +award 1 +barri 1 +goldwat 1 +scholar 1 +engin 1 +sciencefound 1 +research 1 +famili 1 +cool 1 +link 1 +hongkong 1 +beij 1 +review 1 +chinaand 1 +relat 1 +daili 1 +cbnet 1 +forum 1 +chinanet 1 +stamp 1 +time 1 +window 1 +chinesecalendar 1 +directori 1 +mediainform 1 +educ 1 +histori 1 +cultur 1 +hongkonglaserdisccent 1 +internetdistribut 1 +multilingu 1 +softwar 1 +ryan 1 +smovieplex 1 +sceneri 1 +pictur 1 +tour 1 +entertain 1 +sheng 1 +tian 1 +diwww 1 +futur 1 +interest 1 +america 1 +best 1 +school 1 +liber 1 +art 1 +film 1 +ieee 1 +societi 1 +monei 1 +foundat 1 +peterson 1 +guid 1 +postcard 1 +program 1 +incomput 1 +thesenior 1 +virtual 1 +tourist 1 +worldmap 1 +yahoo 1 +christian 1 +mandarin 1 +cssa 1 +weather 1 +stoni 1 +brook 1 +ucla 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..bf01ff0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,45 @@ +vision 3 +justin 2 +miller 2 +home 2 +page 2 +mani 2 +research 2 +navi 1 +uniform 1 +current 1 +master 1 +engin 1 +student 1 +comput 1 +scienc 1 +colleg 1 +ofengin 1 +cornel 1 +univers 1 +semest 1 +teach 1 +assist 1 +com 1 +machin 1 +long 1 +night 1 +found 1 +robot 1 +csrvl 1 +assistantwork 1 +prof 1 +ramin 1 +zabih 1 +primari 1 +interest 1 +ismachin 1 +particularli 1 +level 1 +imag 1 +process 1 +gener 1 +informationsom 1 +rant 1 +project 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..10b2696c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,137 @@ +comput 5 +new 4 +scienc 3 +high 3 +softwar 3 +cornel 2 +work 2 +engin 2 +school 2 +hillsboro 2 +cool 2 +distribut 2 +system 2 +game 2 +parallel 2 +mpeg 2 +encod 2 +york 2 +press 2 +jeff 1 +moorejeff 1 +moorewel 1 +current 1 +graduat 1 +student 1 +mastersof 1 +receiv 1 +purdu 1 +univers 1 +west 1 +lafayett 1 +indiana 1 +went 1 +oregon 1 +suburb 1 +portland 1 +employmentmi 1 +resum 1 +anyon 1 +interest 1 +intel 1 +internet 1 +product 1 +divis 1 +creat 1 +spring 1 +classesnba 1 +thrive 1 +inform 1 +revolut 1 +entertain 1 +sectorc 1 +practic 1 +systemsc 1 +practicum 1 +cornellopoli 1 +perform 1 +architectur 1 +network 1 +optim 1 +researchfal 1 +classesc 1 +technolog 1 +techniquec 1 +formal 1 +methodsc 1 +multimedia 1 +research 1 +paperc 1 +colloquiumc 1 +tool 1 +seminar 1 +present 1 +opendoc 1 +mfcoptim 1 +researchsoftwar 1 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 1 +companiesintelsilicon 1 +graphicsibmsunapplemagazinespc 1 +magazinepc 1 +weekpc 1 +computingcomput 1 +shopperwindow 1 +sourcescomput 1 +lifemacusermacweekinteract 1 +weekfamili 1 +pccomput 1 +worldelectron 1 +newspapersusa 1 +todaywal 1 +street 1 +journalnew 1 +timesphiladelphia 1 +onlineth 1 +daili 1 +worldwideth 1 +dalla 1 +morn 1 +opinionsth 1 +detroit 1 +free 1 +gopherth 1 +knoxvil 1 +sentinelth 1 +leader 1 +onlinelat 1 +serviceth 1 +nugget 1 +newspap 1 +sister 1 +oregonrworld 1 +orang 1 +counti 1 +registerth 1 +francisco 1 +chronicl 1 +examinersan 1 +jose 1 +mercuryth 1 +seattl 1 +timesnando 1 +netusa 1 +todayboston 1 +globeportland 1 +herald 1 +main 1 +sundai 1 +telegramvisitor 1 +sinc 1 +januari 1 +campu 1 +address 1 +mapl 1 +fdithaca 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..bda268f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,81 @@ +project 7 +lui 3 +java 3 +enabl 3 +browser 3 +system 3 +home 2 +comput 2 +us 2 +would 2 +anim 2 +interest 2 +current 2 +distribut 2 +video 2 +page 2 +view 2 +clock 2 +jose 1 +fernandez 1 +pagejos 1 +fernandezjos 1 +fernandezmast 1 +engin 1 +depart 1 +sciencecornel 1 +universityithaca 1 +address 1 +mapl 1 +avenu 1 +ebithaca 1 +mail 1 +joselui 1 +cornel 1 +scroll 1 +text 1 +sign 1 +resum 1 +curriculum 1 +vita 1 +multimedia 1 +graphic 1 +imag 1 +transit 1 +rivl 1 +exampl 1 +presentationc 1 +pyramania 1 +game 1 +spaceship 1 +battl 1 +report 1 +hoca 1 +design 1 +code 1 +oper 1 +implement 1 +multitask 1 +virtual 1 +memori 1 +meng 1 +autonom 1 +vehicl 1 +simul 1 +hobbi 1 +photographi 1 +click 1 +picturesmusiccomputerswrit 1 +direct 1 +crazi 1 +movi 1 +actor 1 +recruit 1 +friend 1 +time 1 +courtesi 1 +bill 1 +giel 1 +visitor 1 +number 1 +better 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..cd04e980 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,30 @@ +hurtado 2 +cornel 2 +engin 2 +project 2 +julin 1 +home 1 +pagejulin 1 +click 1 +curriculum 1 +vita 1 +universitymast 1 +busi 1 +administr 1 +johnson 1 +graduat 1 +school 1 +managementmast 1 +depart 1 +comput 1 +science 1 +mail 1 +colombia 1 +linda 1 +er 1 +master 1 +distribut 1 +system 1 +autonom 1 +vehicl 1 +simul 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..46c8e175 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,28 @@ +page 2 +current 2 +cornel 2 +texa 2 +janeen 1 +homepagejaneen 1 +reich 1 +welcom 1 +home 1 +univers 1 +complet 1 +comput 1 +scienc 1 +august 1 +septemb 1 +join 1 +system 1 +group 1 +instrument 1 +dalla 1 +send 1 +email 1 +jreich 1 +edumi 1 +resum 1 +ad 1 +favorit 1 +thing 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..a6d6373a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,120 @@ +comput 7 +home 5 +list 4 +engin 3 +bodi 3 +mail 3 +camaro 3 +page 3 +chevi 3 +system 2 +cours 2 +master 2 +high 2 +facial 2 +car 2 +stock 2 +ford 2 +septemb 2 +come 2 +jodi 1 +shapirojodi 1 +shapiroeduc 1 +receiv 1 +univers 1 +massachusett 1 +amherst 1 +current 1 +cornel 1 +get 1 +meng 1 +scienc 1 +graduat 1 +resum 1 +fall 1 +multimedia 1 +systemsc 1 +softwar 1 +engineeringe 1 +network 1 +telecommunicationc 1 +researchspr 1 +machin 1 +visionc 1 +perform 1 +systemse 1 +capac 1 +networksnba 1 +thrive 1 +inform 1 +revolutionc 1 +researchma 1 +automot 1 +engineeringinterest 1 +project 1 +design 1 +implement 1 +dynam 1 +gener 1 +synchron 1 +speech 1 +animationlow 1 +cost 1 +portabl 1 +desktop 1 +videoconferenc 1 +window 1 +parallel 1 +object 1 +recognit 1 +applic 1 +recognitioninterest 1 +main 1 +interest 1 +obvious 1 +memberof 1 +firebird 1 +yourselfelectron 1 +fuel 1 +inject 1 +although 1 +alwayshav 1 +time 1 +particip 1 +also 1 +designingan 1 +ground 1 +pageefi 1 +pagethes 1 +pictur 1 +sold 1 +speed 1 +gearsmodif 1 +hypertech 1 +stage 1 +chip 1 +flowmast 1 +exhaust 1 +hurst 1 +shifter 1 +grant 1 +steer 1 +wheel 1 +filter 1 +mustang 1 +bought 1 +still 1 +speedmodif 1 +gear 1 +accel 1 +plug 1 +motorsport 1 +wiresbest 1 +mile 1 +mphbest 1 +januari 1 +pagenumb 1 +visit 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..b954d3a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,8 @@ +julia 1 +home 1 +pagejulia 1 +komissarchik 1 +juliak 1 +cornel 1 +eduto 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..b4402927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,104 @@ +cornel 4 +network 4 +julian 3 +engin 3 +comput 3 +scienc 3 +theori 3 +video 3 +full 3 +pelenur 2 +avail 2 +univers 2 +offic 2 +center 2 +current 2 +workstat 2 +develop 2 +grow 2 +fault 2 +toler 2 +design 2 +implement 2 +fast 2 +techniqu 2 +emilio 1 +better 1 +pictur 1 +master 1 +graduat 1 +campu 1 +adress 1 +summit 1 +ithaca 1 +centerithaca 1 +upson 1 +hallcornel 1 +occup 1 +fulltim 1 +student 1 +teach 1 +assist 1 +databas 1 +administr 1 +recent 1 +project 1 +global 1 +pointer 1 +complet 1 +toolkit 1 +write 1 +parallel 1 +program 1 +independ 1 +platform 1 +topolog 1 +compil 1 +sparcstat 1 +ethernet 1 +wfinger 1 +system 1 +search 1 +home 1 +page 1 +document 1 +world 1 +wide 1 +cyberserv 1 +need 1 +faster 1 +httpserver 1 +fulfil 1 +increas 1 +demand 1 +servic 1 +addit 1 +commerci 1 +high 1 +becom 1 +critic 1 +paper 1 +describ 1 +distribut 1 +http 1 +server 1 +us 1 +horu 1 +prvf 1 +poss 1 +realli 1 +thegoal 1 +achiev 1 +screenmot 1 +cluster 1 +showthat 1 +innov 1 +snarf 1 +blast 1 +capit 1 +hardwar 1 +produc 1 +transferwith 1 +compress 1 +color 1 +screen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..6bd40388 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,92 @@ +cornel 4 +home 3 +page 3 +theori 3 +program 3 +languag 3 +resourc 3 +type 3 +paper 3 +art 3 +czar 3 +comput 2 +scienc 2 +depart 2 +interest 2 +includ 2 +research 2 +work 2 +nuprl 2 +slide 2 +fine 2 +hockei 2 +theatr 2 +jason 1 +hickei 1 +graduat 1 +student 1 +supervis 1 +robertconst 1 +summari 1 +current 1 +statu 1 +practic 1 +great 1 +thefox 1 +project 1 +especi 1 +markleon 1 +mainli 1 +softwar 1 +verif 1 +tool 1 +specif 1 +formalsystem 1 +develop 1 +universitydepart 1 +make 1 +higher 1 +levelmodul 1 +abstract 1 +data 1 +relat 1 +recent 1 +publish 1 +cornella 1 +bibliographi 1 +publishedat 1 +bellcor 1 +also 1 +talk 1 +havegiven 1 +seminar 1 +pretti 1 +technic 1 +theygiv 1 +overview 1 +done 1 +want 1 +sequenc 1 +identif 1 +galleryof 1 +mine 1 +tryth 1 +orth 1 +fineart 1 +forum 1 +cucshockei 1 +backcountri 1 +take 1 +look 1 +thebackcountri 1 +perform 1 +servic 1 +publicli 1 +maintainedsoftwar 1 +equip 1 +back 1 +hockeyfor 1 +info 1 +schedul 1 +forth 1 +center 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..b0f67df4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,36 @@ +kamijo 4 +koichi 2 +juli 2 +english 2 +japanes 2 +back 2 +japan 2 +kamijokoichi 1 +welcom 1 +home 1 +page 1 +construct 1 +reach 1 +us 1 +thing 1 +sell 1 +sold 1 +click 1 +like 1 +sale 1 +class 1 +papershometownseduc 1 +work 1 +experienceskoichi 1 +muriel 1 +ithaca 1 +cornel 1 +kkamijoh 1 +vnet 1 +go 1 +accept 1 +access 1 +time 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..cd0893c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,115 @@ +cornel 6 +univers 4 +karl 3 +robot 3 +micro 3 +manipul 3 +friedrich 2 +bhringer 2 +karlsruh 2 +comput 2 +scienc 2 +stanford 2 +laboratori 2 +build 2 +interest 2 +andassembl 2 +microfabr 2 +strategi 2 +work 2 +page 2 +level 2 +dipl 1 +inform 1 +graduat 1 +student 1 +dept 1 +upson 1 +hall 1 +ithaca 1 +email 1 +educurr 1 +address 1 +gate 1 +current 1 +research 1 +nanofabr 1 +facil 1 +actuat 1 +arrai 1 +implementmicro 1 +gener 1 +innew 1 +devic 1 +handl 1 +part 1 +withprogramm 1 +forc 1 +vector 1 +field 1 +also 1 +investig 1 +design 1 +autom 1 +structur 1 +earlier 1 +germani 1 +includ 1 +develop 1 +better 1 +graph 1 +layout 1 +algorithm 1 +thesi 1 +advisor 1 +professorbruc 1 +donald 1 +founder 1 +director 1 +vision 1 +project 1 +close 1 +collabor 1 +professor 1 +noel 1 +macdonaldand 1 +hisresearch 1 +group 1 +public 1 +document 1 +confer 1 +announc 1 +call 1 +paper 1 +anim 1 +video 1 +sculptur 1 +invis 1 +cantilev 1 +model 1 +frank 1 +lloyd 1 +wright 1 +fallingwat 1 +articl 1 +york 1 +time 1 +magazin 1 +march 1 +wire 1 +octob 1 +offic 1 +nano 1 +outin 1 +kwon 1 +club 1 +find 1 +lindseth 1 +climb 1 +wall 1 +navig 1 +previou 1 +higher 1 +deeper 1 +next 1 +pagekarl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..58ddb298 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,232 @@ +layer 12 +stack 10 +interest 8 +protocol 8 +horu 8 +properti 7 +cornel 5 +distribut 5 +comput 4 +problem 4 +specifi 4 +applic 4 +provid 4 +karr 3 +work 3 +project 3 +concentr 3 +research 3 +engin 3 +commun 3 +weak 3 +consist 3 +java 3 +develop 3 +wide 3 +david 2 +univers 2 +system 2 +mathemat 2 +statist 2 +html 2 +includ 2 +verifi 2 +verif 2 +us 2 +guarante 2 +techniqu 2 +desir 2 +order 2 +part 2 +applet 2 +messag 2 +whose 2 +prone 2 +area 2 +network 2 +gener 2 +disconnect 2 +site 2 +time 2 +karrdavid 1 +karrphd 1 +studentdepart 1 +sciencecornel 1 +upson 1 +hall 1 +ithaca 1 +mail 1 +edui 1 +student 1 +depart 1 +scienceat 1 +thehoru 1 +architectur 1 +reliabl 1 +withprofessor 1 +kenneth 1 +birmananddr 1 +robbert 1 +reness 1 +minor 1 +field 1 +perform 1 +protocolsmi 1 +implement 1 +dissert 1 +formalspecif 1 +ofhoru 1 +tempor 1 +logic 1 +action 1 +variousinterest 1 +fundament 1 +usedin 1 +furthermor 1 +write 1 +formula 1 +assum 1 +stylefor 1 +might 1 +itsinterfac 1 +depend 1 +andbelow 1 +emploi 1 +straightforward 1 +agiven 1 +certain 1 +thetop 1 +condit 1 +even 1 +unusualcombin 1 +atyp 1 +ultim 1 +user 1 +systemsshould 1 +abl 1 +call 1 +help 1 +constructcustom 1 +omit 1 +unnecessari 1 +avoid 1 +theirassoci 1 +cost 1 +confid 1 +sufficientto 1 +intend 1 +basi 1 +thesecur 1 +harden 1 +give 1 +rough 1 +demonstr 1 +propos 1 +method 1 +ofverifi 1 +initi 1 +stem 1 +thepromis 1 +suit 1 +variousguarante 1 +programm 1 +passingenviron 1 +host 1 +crash 1 +delayedor 1 +lost 1 +softwar 1 +haswork 1 +componentswer 1 +failur 1 +feel 1 +featur 1 +offer 1 +considerablepromis 1 +consistencywhil 1 +becom 1 +ofdistribut 1 +look 1 +revis 1 +control 1 +filesin 1 +environ 1 +distributedenviron 1 +partitionedinto 1 +portion 1 +notion 1 +wouldallow 1 +multipl 1 +temporarili 1 +make 1 +progress 1 +concurr 1 +performancemi 1 +correct 1 +measur 1 +high 1 +avail 1 +respons 1 +andeffici 1 +resourc 1 +clearli 1 +equal 1 +import 1 +larg 1 +appar 1 +random 1 +ofsystem 1 +load 1 +activ 1 +notabl 1 +except 1 +dedic 1 +parallelmachin 1 +behavior 1 +also 1 +suscept 1 +analysi 1 +though 1 +differ 1 +kind 1 +encourag 1 +javath 1 +world 1 +applicationwith 1 +mani 1 +possibl 1 +explor 1 +experi 1 +simpl 1 +wai 1 +hypertext 1 +tonavig 1 +inform 1 +appear 1 +myweb 1 +lego 1 +toi 1 +hack 1 +execut 1 +code 1 +anetscap 1 +browser 1 +download 1 +exampl 1 +abirthdai 1 +puzzl 1 +calcul 1 +tool 1 +forverifi 1 +profession 1 +affiliationsi 1 +member 1 +ieee 1 +andmaa 1 +informationseemi 1 +linksfor 1 +topic 1 +find 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..8c2efb53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,137 @@ +system 7 +click 5 +comput 4 +work 4 +graphic 4 +home 3 +engin 3 +oper 3 +cornel 3 +hogman 3 +qualcomm 2 +softwar 2 +distribut 2 +network 2 +cours 2 +took 2 +multimedia 2 +virtual 2 +game 2 +break 2 +code 2 +interfac 2 +screenshot 2 +view 2 +postscript 2 +scene 2 +transit 2 +effect 2 +languag 2 +rivl 2 +develop 2 +project 2 +favorit 2 +cool 2 +lot 2 +welcom 1 +pagekartik 1 +kapadiamast 1 +engineeringclass 1 +depart 1 +sciencecornel 1 +universityithaca 1 +address 1 +dabnei 1 +drive 1 +diego 1 +california 1 +phone 1 +mail 1 +kkapadia 1 +comcurr 1 +incorporatedmi 1 +main 1 +area 1 +interest 1 +spring 1 +architectur 1 +high 1 +capac 1 +inform 1 +fall 1 +projectshoca 1 +chiphoca 1 +pronounc 1 +hodja 1 +full 1 +fledg 1 +chip 1 +hypothet 1 +instruct 1 +processor 1 +support 1 +featur 1 +like 1 +multitask 1 +memori 1 +enjoy 1 +quick 1 +singl 1 +player 1 +window 1 +platform 1 +good 1 +sourc 1 +entertain 1 +take 1 +gameboard 1 +help 1 +screen 1 +design 1 +document 1 +rivlrivl 1 +stand 1 +resolut 1 +independ 1 +video 1 +univers 1 +jonathan 1 +swartz 1 +brian 1 +smith 1 +excel 1 +applic 1 +enhanc 1 +incorpor 1 +primit 1 +implement 1 +present 1 +simul 1 +railroad 1 +master 1 +visual 1 +captur 1 +scientif 1 +aspect 1 +lai 1 +track 1 +vehicl 1 +model 1 +dynam 1 +motion 1 +us 1 +combin 1 +open 1 +inventor 1 +opengl 1 +realiti 1 +facil 1 +resumesom 1 +site 1 +star 1 +mpeg 1 +clip 1 +music 1 +page 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..c78377ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,63 @@ +publish 4 +letter 4 +thing 2 +write 2 +wrote 2 +suspect 2 +true 1 +writingsi 1 +much 1 +written 1 +proudof 1 +flame 1 +mine 1 +other 1 +morethought 1 +conscienti 1 +objector 1 +arm 1 +conflict 1 +myfirst 1 +grand 1 +rapid 1 +press 1 +fewyear 1 +back 1 +gulf 1 +sinc 1 +time 1 +howev 1 +vestart 1 +keep 1 +work 1 +onlin 1 +lest 1 +wonder 1 +also 1 +poetri 1 +dprobabl 1 +rather 1 +famou 1 +essayist 1 +anyhow 1 +like 1 +argu 1 +dread 1 +mess 1 +go 1 +byron 1 +center 1 +asuburb 1 +mile 1 +went 1 +high 1 +school 1 +unabomb 1 +mathematician 1 +mathematiciansar 1 +terrorist 1 +think 1 +editor 1 +newspap 1 +wide 1 +read 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..f0c43819 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,268 @@ +cornel 6 +katherin 5 +comput 5 +info 5 +html 5 +page 4 +scienc 4 +ithaca 4 +system 4 +robbert 4 +reness 4 +austin 4 +home 3 +distribut 3 +birman 3 +werner 3 +vogel 3 +group 3 +hall 2 +work 2 +reliabl 2 +horu 2 +commun 2 +appear 2 +proceed 2 +sigop 2 +brad 2 +glade 2 +ieee 2 +report 2 +depart 2 +univers 2 +relat 2 +infodistribut 2 +industri 2 +infocompani 2 +place 2 +lisboa 2 +colorado 2 +lectur 2 +httpd 2 +ncsa 2 +imag 2 +sourc 2 +xmosaic 2 +other 2 +journal 2 +school 2 +databas 2 +women 2 +cook 2 +electron 2 +fashion 2 +music 2 +wall 2 +network 2 +guokguo 1 +educornel 1 +universitydept 1 +upson 1 +student 1 +interest 1 +scalabl 1 +multicastprotocol 1 +project 1 +direct 1 +recent 1 +publicationskatherin 1 +structur 1 +virtual 1 +synchroni 1 +explor 1 +bound 1 +ofvirtu 1 +synchron 1 +european 1 +workshop 1 +connemara 1 +ireland 1 +septemb 1 +lui 1 +rodrigu 1 +antonio 1 +sargento 1 +paulo 1 +verisimo 1 +transpar 1 +light 1 +weight 1 +servic 1 +symposiumon 1 +niagara 1 +lake 1 +canada 1 +octob 1 +also 1 +avail 1 +technic 1 +kenneth 1 +mark 1 +hayden 1 +takako 1 +hickei 1 +dalia 1 +malki 1 +alex 1 +vaysburd 1 +flexibl 1 +march 1 +research 1 +systemscomput 1 +networkscool 1 +toolsbibliographyconferencesjournalsacademia 1 +infoschool 1 +infojob 1 +searchinterest 1 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 1 +inforesearch 1 +systempointershoru 1 +commerci 1 +productspringtotemtransisx 1 +kernel 1 +arizona 1 +microsystemslab 1 +networksmulticast 1 +protocolsn 1 +fromlblgun 1 +cool 1 +tool 1 +consortium 1 +gener 1 +theproject 1 +simpl 1 +beginn 1 +sguid 1 +quickrefer 1 +htmldocument 1 +tabl 1 +content 1 +common 1 +gatewai 1 +interfac 1 +overview 1 +find 1 +file 1 +finder 1 +mosaic 1 +binari 1 +document 1 +uiuc 1 +cern 1 +java 1 +hotjava 1 +bibliographybibliographi 1 +oldindex 1 +index 1 +confer 1 +pointer 1 +hpdc 1 +ftc 1 +sosp 1 +srd 1 +icdc 1 +jsac 1 +elsevi 1 +scienceacademia 1 +motorola 1 +openingsibmdelltandemtiapplebel 1 +atlant 1 +texa 1 +dept 1 +ucsd 1 +gradschool 1 +advic 1 +gradjob 1 +search 1 +ukinterest 1 +life 1 +weather 1 +moviesbailei 1 +concertslibrari 1 +hightechin 1 +institut 1 +inesc 1 +copper 1 +mountain 1 +resort 1 +summit 1 +counti 1 +coloradooth 1 +infoart 1 +weblouvreth 1 +world 1 +linebook 1 +amazon 1 +book 1 +calvinhobb 1 +archivecardsmagicchinaart 1 +china 1 +gourmetl 1 +cordonbleu 1 +itali 1 +dessert 1 +fashional 1 +linksa 1 +cjlutz 1 +wwweb 1 +pagewith 1 +heart 1 +tmexpressfirst 1 +view 1 +wireirc 1 +faqfashion 1 +nethair 1 +crew 1 +diesel 1 +jean 1 +guessfriend 1 +alan 1 +cheng 1 +david 1 +deng 1 +shiji 1 +insur 1 +plan 1 +email 1 +grove 1 +edulibrari 1 +librari 1 +congressmagazin 1 +intertext 1 +wire 1 +timegeorg 1 +gilder 1 +discoveri 1 +mail 1 +postcard 1 +map 1 +internet 1 +underground 1 +archivesinanet 1 +newsworld 1 +new 1 +brief 1 +sport 1 +open 1 +olymp 1 +stock 1 +streetheadlin 1 +street 1 +weatherhunt 1 +infoth 1 +lyco 1 +hunt 1 +informationglob 1 +navigatorhom 1 +global 1 +navig 1 +scout 1 +wanderersand 1 +spider 1 +edg 1 +yahoo 1 +refer 1 +netscap 1 +last 1 +modifi 1 +kguo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..fce13052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,43 @@ +java 4 +program 3 +wirefram 2 +rotat 2 +creat 2 +introductionthi 1 +project 1 +desgin 1 +learn 1 +tool 1 +comput 1 +graphic 1 +provid 1 +understand 1 +polygon 1 +form 1 +list 1 +vertic 1 +written 1 +simpl 1 +power 1 +languag 1 +safe 1 +portabl 1 +interact 1 +object 1 +orient 1 +multi 1 +threader 1 +environ 1 +platform 1 +speific 1 +applet 1 +react 1 +user 1 +input 1 +dynam 1 +chang 1 +cone 1 +cube 1 +cylind 1 +tetra 1 +toru 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..23d2363a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,242 @@ +comput 29 +kleinberg 22 +proc 15 +scienc 14 +symposium 14 +algorithm 12 +theori 10 +foundat 7 +cornel 6 +problem 6 +optim 5 +network 5 +ieee 5 +research 4 +geometri 4 +geometr 4 +appear 4 +siam 4 +discret 4 +line 4 +univers 3 +combinatori 3 +approxim 3 +distribut 3 +biologi 3 +path 3 +program 3 +inform 3 +resourc 3 +huttenloch 3 +project 3 +bibliographi 3 +confer 3 +kleinber 2 +molecular 2 +rout 2 +adversari 2 +queue 2 +protocol 2 +semi 2 +definit 2 +academ 2 +graph 2 +tardo 2 +disjointpath 2 +williamson 2 +trade 2 +goeman 2 +minimum 2 +process 2 +letter 2 +server 2 +leighton 2 +point 2 +technic 2 +report 2 +page 2 +group 2 +associ 2 +internet 2 +secur 2 +homepag 1 +assist 1 +professor 1 +ithaca 1 +interest 1 +emphasi 1 +recent 1 +work 1 +includ 1 +anddisjoint 1 +approach 1 +analyz 1 +stabilityof 1 +without 1 +probabilist 1 +assumpt 1 +method 1 +particularlyth 1 +posit 1 +studi 1 +conform 1 +spend 1 +year 1 +visit 1 +almaden 1 +center 1 +click 1 +seeselect 1 +publicationsmiscellan 1 +linkspapersapproxim 1 +singl 1 +sourc 1 +unsplitt 1 +flow 1 +rubinfeld 1 +short 1 +expand 1 +dens 1 +embed 1 +disjoint 1 +high 1 +diamet 1 +planar 1 +aggarw 1 +node 1 +mesh 1 +vlsi 1 +layout 1 +improvedapproxim 1 +ratio 1 +latenc 1 +lovasz 1 +thetafunct 1 +relax 1 +vertex 1 +cover 1 +math 1 +local 1 +formobil 1 +robot 1 +computersci 1 +search 1 +simplepolygon 1 +lower 1 +bound 1 +serverbalanc 1 +yaniv 1 +serveralgorithm 1 +robotnavig 1 +master 1 +thesi 1 +parallel 1 +andrew 1 +awerbuch 1 +fernandez 1 +stabil 1 +result 1 +greedi 1 +content 1 +resolut 1 +borodin 1 +raghavan 1 +sudan 1 +attiya 1 +lynch 1 +offsbetween 1 +messag 1 +deliveri 1 +quiesc 1 +time 1 +connect 1 +managementprotocol 1 +israel 1 +system 1 +mullainathan 1 +boundsand 1 +combin 1 +consensu 1 +object 1 +onprincipl 1 +berger 1 +reconstruct 1 +athre 1 +dimension 1 +model 1 +arbitrari 1 +error 1 +compar 1 +set 1 +kedem 1 +dynam 1 +voronoi 1 +diagram 1 +hausdorff 1 +distanc 1 +pointset 1 +euclidean 1 +motion 1 +plane 1 +symposiumon 1 +invariantsof 1 +segment 1 +universitycomput 1 +juli 1 +linkssearch 1 +tool 1 +bibliographiesaltavista 1 +infoseek 1 +excit 1 +yahoo 1 +nynex 1 +yellow 1 +glimps 1 +ncstrl 1 +librari 1 +david 1 +jone 1 +hypertext 1 +sitescornel 1 +oper 1 +stanford 1 +berkelei 1 +nation 1 +computingtc 1 +virtual 1 +address 1 +book 1 +crescenzi 1 +kann 1 +compendium 1 +foc 1 +soda 1 +stoc 1 +biologycomput 1 +carb 1 +biocomput 1 +sdsc 1 +list 1 +geometrydavid 1 +eppstein 1 +junkyard 1 +jeff 1 +erickson 1 +securitymitr 1 +corp 1 +princeton 1 +safe 1 +rivest 1 +cryptographi 1 +link 1 +miscellaneousnetscap 1 +intellicast 1 +interact 1 +tenni 1 +chess 1 +onlin 1 +talk 1 +kleinbergdepart 1 +scienceupson 1 +hallcornel 1 +universityithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..134f813e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,52 @@ +homepag 3 +cornel 3 +comput 2 +scienc 2 +japan 2 +page 2 +kazushi 1 +otakota 1 +edukazushi 1 +melco 1 +current 1 +master 1 +engin 1 +student 1 +cornellunivers 1 +receiv 1 +univers 1 +tokyo 1 +back 1 +march 1 +work 1 +mitusbishi 1 +electr 1 +corpor 1 +return 1 +degre 1 +worth 1 +isund 1 +construct 1 +start 1 +assign 1 +acquaint 1 +html 1 +forc 1 +depart 1 +inform 1 +superhighwai 1 +cours 1 +interest 1 +pictur 1 +music 1 +move 1 +sale 1 +come 1 +take 1 +februari 1 +thing 1 +want 1 +sell 1 +think 1 +advert 1 +peopl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..2e9f3f4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,54 @@ +research 3 +cornel 3 +comput 3 +interest 2 +algorithm 2 +complex 2 +logic 2 +algebra 2 +faculti 1 +dexter 1 +kozendext 1 +kozenjoseph 1 +newton 1 +professor 1 +engineeringphd 1 +univers 1 +interestsmi 1 +includ 1 +especiallycomplex 1 +decis 1 +problem 1 +andsemant 1 +program 1 +languag 1 +paper 1 +avail 1 +onlinekleen 1 +constraint 1 +type 1 +infer 1 +algebraautomata 1 +theori 1 +logicbibliographylist 1 +public 1 +technic 1 +reportscours 1 +notesc 1 +structur 1 +interpret 1 +programsc 1 +automata 1 +theoryfun 1 +stufffamili 1 +pictur 1 +rugbi 1 +effectcomput 1 +scienc 1 +departmentupson 1 +hallcornel 1 +universityithaca 1 +york 1 +usakozen 1 +work 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..2e864e11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,42 @@ +kreitz 5 +christoph 3 +cornel 3 +home 2 +page 2 +soon 2 +research 2 +comput 2 +theori 2 +pictur 1 +associ 1 +depart 1 +scienc 1 +univers 1 +ithaca 1 +offic 1 +phone 1 +email 1 +upson 1 +hall 1 +topic 1 +program 1 +synthesi 1 +autom 1 +deduct 1 +type 1 +teach 1 +learn 1 +german 1 +lehr 1 +lernen 1 +vorlesungsskript 1 +medienunterst 1 +uumltzt 1 +lehren 1 +person 1 +inform 1 +avail 1 +last 1 +modifi 1 +novemb 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..a80594e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,34 @@ +kuen 4 +heng 2 +cornel 2 +isi 1 +master 1 +engin 1 +comput 1 +scienc 1 +depart 1 +univers 1 +address 1 +grove 1 +street 1 +newton 1 +telephon 1 +welcom 1 +visit 1 +place 1 +myproject 1 +multimedia 1 +system 1 +cours 1 +would 1 +like 1 +read 1 +daili 1 +new 1 +taiwan 1 +home 1 +countri 1 +enjoi 1 +page 1 +still 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..5b50a8c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,182 @@ +digit 11 +librari 11 +comput 9 +scienc 9 +technic 9 +report 9 +research 7 +cornel 7 +dienst 5 +author 5 +implement 4 +object 4 +work 4 +carl 3 +lagoz 3 +person 3 +number 3 +server 3 +world 3 +distribut 3 +part 3 +also 3 +workshop 3 +page 2 +project 2 +group 2 +develop 2 +ncstrl 2 +issu 2 +protocol 2 +refer 2 +technolog 2 +paper 2 +wide 2 +confer 2 +architectur 2 +servic 2 +interoper 2 +area 2 +extend 2 +framework 2 +design 2 +secur 2 +repositori 2 +final 2 +meet 2 +substitut 2 +life 2 +time 2 +never 2 +quiet 2 +home 1 +leader 1 +depart 1 +upson 1 +hall 1 +universityithaca 1 +phone 1 +internet 1 +edui 1 +lead 1 +groupin 1 +departmentat 1 +univers 1 +ourgroup 1 +manag 1 +oper 1 +network 1 +intern 1 +consortium 1 +maintain 1 +adistribut 1 +collaborateson 1 +davi 1 +thedienstsoftwar 1 +providesdistribut 1 +access 1 +worldwid 1 +current 1 +enabl 1 +drop 1 +publish 1 +document 1 +commun 1 +april 1 +manual 1 +build 1 +product 1 +chapter 1 +advanc 1 +springer 1 +verlag 1 +primari 1 +involv 1 +defin 1 +protocolsfor 1 +infrastructur 1 +collabor 1 +corpor 1 +nation 1 +initiativesto 1 +developeda 1 +darpa 1 +fund 1 +open 1 +store 1 +iso 1 +dlib 1 +magazin 1 +decemb 1 +member 1 +dlibwork 1 +interfacesand 1 +releas 1 +metadata 1 +iiin 1 +warwick 1 +amveri 1 +interest 1 +us 1 +distributedobject 1 +read 1 +posit 1 +paperfor 1 +joint 1 +mobil 1 +codeworkshop 1 +know 1 +meetm 1 +find 1 +poor 1 +contact 1 +littl 1 +moreabout 1 +charact 1 +pictur 1 +pagei 1 +luci 1 +daughter 1 +rule 1 +major 1 +outsideof 1 +toddler 1 +constant 1 +challeng 1 +lucyg 1 +mean 1 +provid 1 +avid 1 +outdoor 1 +site 1 +fast 1 +movingwat 1 +lakeand 1 +itch 1 +cano 1 +give 1 +beauti 1 +think 1 +bike 1 +ridingalong 1 +road 1 +backwood 1 +trail 1 +tell 1 +sparehour 1 +run 1 +shoe 1 +breath 1 +deeplyth 1 +fresh 1 +spend 1 +much 1 +joi 1 +physicalnor 1 +ever 1 +interfer 1 +desir 1 +fight 1 +itspreserv 1 +hope 1 +sometim 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..f0ae509f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,101 @@ +homepag 7 +career 4 +comput 3 +system 3 +cornel 3 +page 3 +guid 3 +lidong 2 +zhou 2 +depart 2 +research 2 +materi 2 +distribut 2 +project 2 +secur 2 +internet 2 +network 2 +servic 2 +friend 2 +fudan 2 +buyer 2 +auto 2 +welcom 1 +sciencecornel 1 +universityithaca 1 +fall 1 +cours 1 +concept 1 +local 1 +access 1 +multimedia 1 +relat 1 +sigop 1 +paper 1 +oasi 1 +cambridg 1 +report 1 +adag 1 +author 1 +applic 1 +group 1 +level 1 +java 1 +safe 1 +program 1 +legion 1 +sirac 1 +kerbero 1 +authent 1 +massiv 1 +ocaml 1 +advanc 1 +standard 1 +robot 1 +exclus 1 +document 1 +opportun 1 +jobtrak 1 +colleg 1 +grad 1 +hunter 1 +open 1 +center 1 +onlin 1 +careermosa 1 +jobweb 1 +home 1 +xjob 1 +yingjun 1 +classmat 1 +inform 1 +resours 1 +tutori 1 +languag 1 +tool 1 +yellow 1 +book 1 +isso 1 +sunris 1 +chines 1 +soccer 1 +world 1 +edmund 1 +automobil 1 +autosit 1 +ultim 1 +insur 1 +basic 1 +legal 1 +surviv 1 +link 1 +travel 1 +agenc 1 +rank 1 +succe 1 +graduat 1 +school 1 +back 1 +indexlast 1 +updat 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..ec8424bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,163 @@ +comput 6 +project 4 +link 4 +system 3 +distribut 3 +home 3 +anim 3 +hsian 2 +page 2 +scienceat 2 +cornel 2 +univers 2 +taiwan 2 +classesc 2 +final 2 +orwel 2 +remov 2 +object 2 +site 2 +swartz 2 +cool 2 +totoro 2 +wangthi 1 +major 1 +constructionlin 1 +wang 1 +master 1 +student 1 +degre 1 +inform 1 +ohio 1 +state 1 +born 1 +fangliao 1 +small 1 +villag 1 +southern 1 +coast 1 +still 1 +construct 1 +fall 1 +multimedia 1 +track 1 +digit 1 +videoe 1 +networkse 1 +vision 1 +moment 1 +base 1 +edg 1 +oper 1 +amidonc 1 +autom 1 +video 1 +transcrib 1 +annot 1 +research 1 +advisor 1 +prof 1 +ramin 1 +zabihspr 1 +natur 1 +languag 1 +processingc 1 +practic 1 +computingc 1 +practicum 1 +network 1 +visual 1 +managementc 1 +machin 1 +visionc 1 +scienc 1 +colloquimc 1 +move 1 +scene 1 +high 1 +perform 1 +audit 1 +program 1 +java 1 +webspac 1 +interest 1 +us 1 +stuffscornel 1 +info 1 +depart 1 +annual 1 +reportiee 1 +societytaiwan 1 +headlin 1 +new 1 +sinanet 1 +comth 1 +musicmovi 1 +connect 1 +movieweb 1 +movi 1 +moviemania 1 +also 1 +click 1 +collect 1 +think 1 +picturesth 1 +list 1 +best 1 +sell 1 +book 1 +releas 1 +publish 1 +world 1 +journal 1 +bookstor 1 +quot 1 +chines 1 +classic 1 +linux 1 +linkstcl 1 +line 1 +resourc 1 +softwar 1 +engin 1 +galleri 1 +hacksth 1 +earth 1 +pagemiscellan 1 +hongkong 1 +bridg 1 +hong 1 +kong 1 +linksfor 1 +like 1 +japanes 1 +take 1 +look 1 +carlo 1 +jump 1 +cja 1 +calanimag 1 +alpha 1 +chapter 1 +berkelei 1 +pagelaputa 1 +castl 1 +nausicaa 1 +vallei 1 +wind 1 +conan 1 +slump 1 +kiki 1 +legend 1 +galact 1 +hero 1 +ming 1 +pagecampu 1 +address 1 +uptown 1 +eithaca 1 +york 1 +linhsian 1 +edulast 1 +updat 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..2de77721 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,33 @@ +page 3 +thought 3 +libbi 2 +home 2 +essai 2 +show 2 +take 2 +look 2 +pagewelcom 1 +collect 1 +last 1 +updat 1 +sept 1 +download 1 +theme 1 +song 1 +check 1 +video 1 +clip 1 +read 1 +lista 1 +littl 1 +thing 1 +septemb 1 +june 1 +april 1 +interest 1 +projectemail 1 +mehit 1 +counter 1 +courtesi 1 +http 1 +digit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..71e21924 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,12 @@ +offic 3 +name 1 +upson 1 +hall 1 +hour 1 +mail 1 +lili 1 +cornel 1 +oper 1 +system 1 +take 1 +cours 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..1ab051f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,100 @@ +numer 5 +cornel 4 +theori 3 +linear 3 +recent 3 +lloyd 2 +trefethen 2 +depart 2 +mathemat 2 +algebra 2 +matric 2 +oper 2 +textbook 2 +student 2 +trefethenprofessorlnt 1 +edumi 1 +appoint 1 +thecomput 1 +scienc 1 +also 1 +affili 1 +thecent 1 +appli 1 +thecornel 1 +center 1 +field 1 +analysi 1 +scientif 1 +comput 1 +havea 1 +person 1 +view 1 +mean 1 +specif 1 +interest 1 +includ 1 +numericalsolut 1 +conform 1 +map 1 +approxim 1 +fluid 1 +mechan 1 +year 1 +much 1 +work 1 +hasbeen 1 +relat 1 +normal 1 +whose 1 +eigenvector 1 +notorthogon 1 +applic 1 +textbooksfinit 1 +differ 1 +spectral 1 +method 1 +siam 1 +papersmultimatlab 1 +matlab 1 +multipl 1 +processorsmatrix 1 +iter 1 +gap 1 +betweenpotenti 1 +convergencepseudospectra 1 +operatorssom 1 +papersoth 1 +itemsclass 1 +paper 1 +analysiscurriculum 1 +vitaepseudospectra 1 +bibliographi 1 +peter 1 +alfeldcurr 1 +vicki 1 +howlegubjrn 1 +jnsson 1 +yohan 1 +kimdivakar 1 +viswanathprevi 1 +jeff 1 +baggetttobi 1 +driscollalan 1 +edelman 1 +loui 1 +howel 1 +walter 1 +mascarenhasnoel 1 +nachtigalsatish 1 +reddi 1 +chuan 1 +tohsom 1 +colleaguesjim 1 +demmelann 1 +greenbaummartin 1 +gutknechtd 1 +nick 1 +highamann 1 +trefethenandr 1 +weideman 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..57f88686 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,91 @@ +system 6 +network 4 +home 3 +page 3 +comput 3 +cornel 3 +distribut 3 +internet 3 +databas 3 +luci 2 +travel 2 +music 2 +spring 2 +class 2 +manag 2 +oper 2 +softwar 2 +site 2 +stuff 2 +languag 2 +china 2 +chines 2 +welcom 1 +student 1 +scienc 1 +depart 1 +univers 1 +ithaca 1 +interest 1 +topic 1 +programminglanguag 1 +applic 1 +hobbi 1 +ping 1 +pong 1 +badminton 1 +swim 1 +photograph 1 +read 1 +resum 1 +whiz 1 +stock 1 +search 1 +analysi 1 +tool 1 +degre 1 +project 1 +practic 1 +practicum 1 +fall 1 +engin 1 +multimedia 1 +systemscontact 1 +yuwu 1 +favorit 1 +java 1 +corba 1 +silvano 1 +tkcgi 1 +html 1 +vrml 1 +object 1 +orient 1 +product 1 +server 1 +securitypc 1 +lube 1 +tune 1 +ipngip_atmcomput 1 +compani 1 +netscap 1 +busi 1 +cube 1 +sapient 1 +microsoft 1 +novel 1 +relat 1 +misc 1 +jobtrack 1 +new 1 +artvark 1 +galleri 1 +underground 1 +archiv 1 +person 1 +connect 1 +librari 1 +catalog 1 +mail 1 +sunlab 1 +caltech 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..7aa5c939 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,50 @@ +cornel 3 +engin 2 +comput 2 +system 2 +network 2 +linda 1 +home 1 +page 1 +universitylinda 1 +lxwu 1 +master 1 +student 1 +scienc 1 +depart 1 +univsers 1 +receiv 1 +univers 1 +massachusett 1 +lowel 1 +sinc 1 +work 1 +digit 1 +equip 1 +corp 1 +banyan 1 +main 1 +research 1 +interest 1 +mulitimedia 1 +click 1 +resum 1 +project 1 +nativ 1 +protocol 1 +stack 1 +window 1 +us 1 +multicast 1 +group 1 +layer 1 +video 1 +electron 1 +commerc 1 +kramer 1 +mart 1 +coursesfal 1 +oper 1 +multimedia 1 +photoesus 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..18348b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,13 @@ +nikolai 1 +mateevnikolai 1 +mateevgradu 1 +studentmateev 1 +cornel 1 +upson 1 +halldepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..196a480f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,54 @@ +project 3 +morgenstern 2 +cornel 2 +comput 2 +databas 2 +arpa 2 +matthew 1 +home 1 +pagematthew 1 +morgensternresearch 1 +leaderaddress 1 +engin 1 +theori 1 +centerxerox 1 +design 1 +research 1 +institutecornel 1 +universityithaca 1 +phone 1 +email 1 +edustatu 1 +visit 1 +fellow 1 +scienc 1 +princip 1 +scientist 1 +xerox 1 +laboratori 1 +scienceproject 1 +distribut 1 +heterogen 1 +system 1 +fund 1 +metadata 1 +manag 1 +multimedia 1 +document 1 +supervis 1 +select 1 +student 1 +relat 1 +area 1 +fundedresearch 1 +work 1 +academ 1 +year 1 +summer 1 +avail 1 +stop 1 +chat 1 +inform 1 +come 1 +page 1 +soon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..f475a5a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,40 @@ +graph 4 +algorithm 3 +dynam 2 +data 2 +structur 2 +monika 1 +henzing 1 +homepagemonika 1 +rauch 1 +henzingerassist 1 +professorcomput 1 +scienc 1 +departmentcornel 1 +universityithaca 1 +email 1 +cornel 1 +eduphon 1 +current 1 +leav 1 +digit 1 +equip 1 +corpor 1 +system 1 +research 1 +centerhomepageresearch 1 +interestscombinatori 1 +especi 1 +random 1 +theori 1 +lower 1 +bound 1 +recent 1 +public 1 +project 1 +pageprogram 1 +committe 1 +stoc 1 +soda 1 +homepag 1 +fall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..dbd284c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,76 @@ +stanlei 3 +cornel 3 +univers 3 +comput 3 +scienc 3 +interest 3 +project 3 +paper 3 +huang 2 +engin 2 +kentucki 2 +area 2 +user 2 +distribut 2 +plan 2 +share 2 +home 1 +page 1 +huangmast 1 +studentmhuang 1 +sheldon 1 +courtcornel 1 +ithaca 1 +master 1 +class 1 +bachelor 1 +oper 1 +systemsdistribut 1 +systemsdatabas 1 +system 1 +inform 1 +retrievalgraph 1 +interfacesoth 1 +movi 1 +tenni 1 +horse_back 1 +ride 1 +travel 1 +read 1 +work 1 +myadvisor 1 +werner 1 +vogel 1 +robbertvan 1 +reness 1 +object 1 +integr 1 +horu 1 +link 1 +relat 1 +planplan 1 +distributionplan 1 +updateplan 1 +faqhorusc 1 +final 1 +exam 1 +collect 1 +memorydistribut 1 +memorysom 1 +technic 1 +group 1 +communicationsnapshotu 1 +level 1 +network 1 +interfac 1 +architecturejobscar 1 +pathbai 1 +jobscyberezumescar 1 +opportunitiesus 1 +stufftechn 1 +field 1 +searchbel 1 +labsspbsd 1 +sourcesjavarfclast 1 +modifi 1 +mhuang 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..31e7ae80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,62 @@ +millett 4 +lynett 3 +homepag 2 +cornel 2 +person 2 +last 2 +list 2 +millettdepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +participatoryform 1 +mass 1 +speech 1 +develop 1 +internetdeserv 1 +highest 1 +protect 1 +government 1 +intrus 1 +decis 1 +aclu 1 +reno 1 +challeng 1 +script 1 +second 1 +year 1 +skit 1 +cuc 1 +holidai 1 +parti 1 +inform 1 +pictur 1 +cat 1 +updat 1 +link 1 +never 1 +abl 1 +find 1 +precis 1 +femin 1 +know 1 +peopl 1 +call 1 +feminist 1 +whenver 1 +express 1 +sentiment 1 +differenti 1 +doormat 1 +prostitut 1 +rebecca 1 +west 1 +modifi 1 +octob 1 +comment 1 +welcom 1 +copi 1 +public 1 +pleas 1 +look 1 +copyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..6b0613d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,94 @@ +home 5 +link 4 +cornel 3 +newgroupc 3 +mishaal 2 +page 2 +univers 2 +engin 2 +interest 2 +system 2 +high 2 +pagemisha 1 +kuwaiti 1 +student 1 +master 1 +computersci 1 +program 1 +mengc 1 +graduat 1 +doubl 1 +major 1 +electr 1 +andcomput 1 +scienc 1 +worcest 1 +polytechn 1 +institut 1 +inworcest 1 +coolest 1 +place 1 +earth 1 +list 1 +stuff 1 +involv 1 +temporari 1 +servic 1 +bearaccess 1 +menu 1 +cours 1 +take 1 +machin 1 +vision 1 +practic 1 +distribut 1 +practicum 1 +perform 1 +comput 1 +advanc 1 +languag 1 +implement 1 +newgroup 1 +capac 1 +network 1 +newgroupnba 1 +databas 1 +manag 1 +newgroupoptim 1 +video 1 +transmiss 1 +meng 1 +project 1 +extens 1 +kuwait 1 +pagemi 1 +resum 1 +check 1 +stock 1 +quotescool 1 +public 1 +server 1 +hope 1 +offer 1 +conot 1 +soon 1 +weather 1 +ithaca 1 +latest 1 +new 1 +cann 1 +intern 1 +film 1 +festiv 1 +everyth 1 +wrong 1 +reason 1 +want 1 +ever 1 +accus 1 +nerd 1 +well 1 +sure 1 +cool 1 +almashanmisha 1 +educornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..fba44bcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,131 @@ +page 7 +cool 5 +link 4 +pleas 3 +stuff 3 +make 3 +look 3 +pretti 3 +realli 3 +thing 3 +mike 2 +date 2 +would 2 +like 2 +coupl 2 +know 2 +beavi 2 +korbi 2 +last 2 +name 2 +complet 2 +golf 2 +click 2 +list 2 +kid 2 +pagethi 1 +yeah 1 +version 1 +recent 1 +browser 1 +quit 1 +sign 1 +myguestbook 1 +chanc 1 +opinion 1 +count 1 +dark 1 +stockholm 1 +right 1 +take 1 +vote 1 +poll 1 +result 1 +peopl 1 +guess 1 +ryan 1 +call 1 +vitya 1 +better 1 +construct 1 +danc 1 +frog 1 +maria 1 +mark 1 +andrew 1 +corbett 1 +suck 1 +card 1 +nicknam 1 +kevin 1 +donnel 1 +love 1 +pictur 1 +eryn 1 +crave 1 +attent 1 +want 1 +movi 1 +graphic 1 +class 1 +amaz 1 +anim 1 +plai 1 +mpeg 1 +place 1 +univers 1 +stop 1 +expand 1 +find 1 +world 1 +need 1 +person 1 +pick 1 +site 1 +made 1 +onlin 1 +student 1 +homepag 1 +lame 1 +guttermouth 1 +brought 1 +byjust 1 +cours 1 +member 1 +internet 1 +exchang 1 +peic 1 +connect 1 +whole 1 +bunch 1 +other 1 +line 1 +come 1 +join 1 +hand 1 +sing 1 +togeth 1 +spirit 1 +harmoni 1 +someth 1 +visitor 1 +number 1 +keep 1 +reset 1 +check 1 +statist 1 +accuar 1 +send 1 +mail 1 +atmak 1 +cornel 1 +edubas 1 +much 1 +random 1 +imag 1 +relat 1 +anyth 1 +thank 1 +everybodi 1 +idea 1 +us 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..527b4aad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,22 @@ +nobuhiko 2 +mukai 2 +semest 2 +mukainobuhiko 1 +home 1 +page 1 +construct 1 +research 1 +effect 1 +jpeg 1 +compressionon 1 +multimedia 1 +system 1 +last 1 +fall 1 +made 1 +anim 1 +titl 1 +magicon 1 +comput 1 +graphic 1 +spring 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..7d1b2e3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,22 @@ +nichola 2 +how 2 +homepag 1 +click 1 +photo 1 +imag 1 +graduat 1 +studentdepart 1 +comput 1 +sciencecornel 1 +universityithaca 1 +email 1 +nihow 1 +cornel 1 +eduoffic 1 +upson 1 +hall 1 +phone 1 +resum 1 +refer 1 +person 1 +info 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..deb18dda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,23 @@ +niko 3 +home 2 +cornel 2 +page 1 +research 1 +pitsiani 1 +upson 1 +hall 1 +dept 1 +comput 1 +scienc 1 +univers 1 +ithaca 1 +work 1 +public 1 +lectur 1 +teach 1 +java 1 +vita 1 +pointer 1 +sinc 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..a7af5402 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,186 @@ +parallel 17 +chrisochoid 13 +comput 13 +environ 5 +project 5 +proceed 5 +algorithm 4 +data 4 +softwar 4 +dynam 4 +grid 4 +gener 4 +cornel 4 +confer 4 +numer 4 +map 4 +housti 4 +rice 4 +niko 3 +scienc 3 +distribut 3 +load 3 +balanc 3 +simul 3 +ellpack 3 +student 3 +appear 3 +intern 3 +journal 3 +method 3 +page 3 +work 2 +mani 2 +research 2 +runtim 2 +multicomput 2 +adapt 2 +implement 2 +black 2 +hole 2 +master 2 +sukup 2 +fluid 2 +field 2 +mathemat 2 +tool 2 +program 2 +iter 2 +solver 2 +center 2 +mississippi 2 +advanc 2 +partial 2 +domain 2 +papachi 2 +public 2 +depart 1 +purdu 1 +univers 1 +touch 1 +facet 1 +includ 1 +support 1 +back 1 +system 1 +compil 1 +problem 1 +solv 1 +schedul 1 +design 1 +compon 1 +well 1 +known 1 +current 1 +workshop 1 +bernoulli 1 +prema 1 +portabl 1 +architecur 1 +sensit 1 +messag 1 +binari 1 +grand 1 +challeng 1 +engin 1 +florian 1 +kodukula 1 +indupraka 1 +pingali 1 +vineet 1 +ahuja 1 +reza 1 +behforooz 1 +undergradu 1 +former 1 +animesh 1 +chatterje 1 +rajani 1 +vaidyanathan 1 +select 1 +paper 1 +task 1 +bowyer 1 +watson 1 +offifth 1 +incomput 1 +relat 1 +multithread 1 +model 1 +ctctr 1 +octob 1 +appli 1 +object 1 +orient 1 +kale 1 +kohl 1 +yellick 1 +scientif 1 +menu 1 +unstructur 1 +structur 1 +thompson 1 +contemporari 1 +key 1 +special 1 +issu 1 +april 1 +toolkit 1 +collid 1 +haupt 1 +aiaa 1 +colorado 1 +spring 1 +june 1 +altern 1 +scalabl 1 +scalableparallel 1 +librari 1 +nation 1 +foundat 1 +engineeringresearch 1 +state 1 +partit 1 +heurist 1 +base 1 +parallelhardwar 1 +geometri 1 +characterist 1 +differentialequ 1 +vichnevetski 1 +knight 1 +richter 1 +imac 1 +brunswick 1 +decompos 1 +architectur 1 +kortesi 1 +decomposit 1 +differenti 1 +equat 1 +symposium 1 +domaindecomposit 1 +moscow 1 +ussr 1 +glowinski 1 +siam 1 +programmingenviron 1 +mimd 1 +machin 1 +karathanas 1 +samartzi 1 +vavali 1 +yang 1 +wang 1 +and 1 +weerawarana 1 +onsupercomput 1 +nikosc 1 +institut 1 +theori 1 +univeristi 1 +rhode 1 +hall 1 +room 1 +ithaca 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..5f8d1122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,68 @@ +prema 6 +runtim 6 +parallel 5 +cornel 4 +system 3 +portabl 3 +environ 3 +model 3 +support 2 +multicomput 2 +architectur 2 +comput 2 +research 2 +compil 2 +port 2 +dynam 2 +load 2 +balanc 2 +niko 2 +chrisochoid 2 +welcom 1 +advanc 1 +institut 1 +theori 1 +center 1 +univers 1 +overview 1 +andproblem 1 +solv 1 +target 1 +scientif 1 +computingappl 1 +build 1 +implement 1 +design 1 +varieti 1 +suppot 1 +global 1 +address 1 +space 1 +memori 1 +data 1 +task 1 +program 1 +multi 1 +thread 1 +style 1 +execut 1 +automat 1 +work 1 +share 1 +mechan 1 +paper 1 +multithread 1 +adapt 1 +pdecomput 1 +ctctr 1 +journal 1 +appli 1 +numer 1 +mathemat 1 +relat 1 +group 1 +pcrc 1 +consortium 1 +copyright 1 +copi 1 +nikosc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..855400ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,30 @@ +look 2 +pavel 1 +naumov 1 +welcom 1 +home 1 +page 1 +invit 1 +learn 1 +locat 1 +cyberspac 1 +real 1 +world 1 +work 1 +nuprl 1 +project 1 +tire 1 +take 1 +rest 1 +galleri 1 +visit 1 +cinema 1 +photo 1 +orplai 1 +game 1 +java 1 +written 1 +sundai 1 +send 1 +mail 1 +place 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..3439ee13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,194 @@ +pearson 13 +comput 12 +algorithm 10 +parallel 7 +scienc 6 +design 5 +time 4 +architectur 4 +program 4 +languag 4 +work 3 +problem 3 +machin 3 +implement 3 +ieee 3 +publish 3 +region 3 +cornel 3 +david 2 +research 2 +processor 2 +connect 2 +mesh 2 +vision 2 +realiz 2 +current 2 +cost 2 +theoret 2 +effici 2 +oper 2 +gener 2 +purpos 2 +believ 2 +realli 2 +instruct 2 +neumann 2 +network 2 +intern 2 +confer 2 +vazirani 2 +fast 2 +find 2 +maxim 2 +bipartit 2 +technolog 2 +lectur 2 +note 2 +journal 2 +johnson 2 +pingali 2 +singl 2 +control 2 +linear 2 +tech 2 +report 2 +structur 2 +sigplan 2 +global 2 +regist 2 +alloc 2 +simd 2 +multiprocessor 2 +interest 1 +thesi 1 +investig 1 +highli 1 +scalabl 1 +consistingof 1 +simpl 1 +dimension 1 +guid 1 +perhap 1 +year 1 +henc 1 +materi 1 +taken 1 +place 1 +crystal 1 +molecul 1 +lattic 1 +long 1 +goal 1 +prepar 1 +theubiquit 1 +offer 1 +must 1 +heed 1 +lawsof 1 +physic 1 +attent 1 +chip 1 +spatial 1 +layoutand 1 +hidden 1 +commun 1 +accomplishedbi 1 +pursu 1 +requir 1 +practic 1 +could 1 +character 1 +feasibl 1 +studi 1 +ihav 1 +produc 1 +cellular 1 +couldb 1 +hardwar 1 +simul 1 +thisarchitectur 1 +system 1 +designfor 1 +like 1 +proteinstructur 1 +grand 1 +challeng 1 +parallelcomput 1 +power 1 +succe 1 +becom 1 +commodityand 1 +sold 1 +desktop 1 +video 1 +game 1 +direct 1 +futur 1 +includ 1 +vlsi 1 +architectureand 1 +wide 1 +us 1 +hideth 1 +detail 1 +reflect 1 +underlyingvon 1 +architectureha 1 +good 1 +thing 1 +exploit 1 +parallelmachin 1 +need 1 +easyto 1 +estim 1 +public 1 +dunten 1 +arm 1 +kiewit 1 +high 1 +speed 1 +campu 1 +societi 1 +compcon 1 +fall 1 +pillai 1 +near 1 +optim 1 +placement 1 +sensor 1 +element 1 +transact 1 +inform 1 +theori 1 +foundat 1 +softwar 1 +sequenti 1 +set 1 +entri 1 +exit 1 +tree 1 +proceed 1 +pldi 1 +notic 1 +polynomi 1 +chang 1 +make 1 +schedul 1 +workshop 1 +irregularli 1 +irregular 1 +sept 1 +zippel 1 +allerton 1 +press 1 +select 1 +area 1 +cryptographi 1 +appear 1 +depart 1 +upson 1 +hallcornel 1 +universityithaca 1 +york 1 +usaemail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..480ede64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,126 @@ +page 7 +cornel 7 +work 4 +year 4 +program 4 +engin 4 +comput 4 +chapter 4 +mail 4 +todd 3 +peskin 3 +student 3 +client 3 +java 3 +applet 3 +content 2 +favorit 2 +site 2 +meng 2 +colleg 2 +receiv 2 +master 2 +follow 2 +also 2 +acacia 2 +contact 2 +sinc 2 +semest 2 +intern 2 +experi 2 +develop 2 +server 2 +softwar 2 +creat 2 +construct 2 +pictur 1 +resum 1 +cours 1 +taken 1 +univers 1 +joint 1 +degre 1 +offer 1 +jointli 1 +johnson 1 +graduat 1 +school 1 +manag 1 +scienc 1 +busi 1 +administr 1 +current 1 +presid 1 +fratern 1 +brother 1 +would 1 +like 1 +becom 1 +part 1 +list 1 +pleas 1 +eduand 1 +soon 1 +suppli 1 +andyour 1 +roll 1 +number 1 +abl 1 +complet 1 +request 1 +quickli 1 +well 1 +best 1 +reach 1 +check 1 +round 1 +alwai 1 +found 1 +mani 1 +time 1 +log 1 +depart 1 +fall 1 +junior 1 +workeda 1 +throughth 1 +cooper 1 +enabl 1 +cornellundergradu 1 +supplement 1 +classroom 1 +knowledg 1 +practic 1 +compani 1 +theirfield 1 +enhanc 1 +nation 1 +level 1 +databas 1 +system 1 +isrun 1 +microsystem 1 +run 1 +window 1 +espn 1 +stock 1 +quot 1 +onlin 1 +final 1 +project 1 +larg 1 +mpeg 1 +file 1 +visitor 1 +februari 1 +still 1 +hope 1 +possibl 1 +includ 1 +us 1 +case 1 +wonder 1 +ticker 1 +tape 1 +borrow 1 +permiss 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..ffc9ad9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,54 @@ +pierc 4 +home 4 +stuff 4 +cornel 3 +page 2 +david 2 +student 2 +comput 2 +scienc 2 +ithaca 2 +pittsburgh 2 +pennsylvania 2 +read 2 +dave 1 +univers 1 +address 1 +offic 1 +upson 1 +hall 1 +valentin 1 +person 1 +second 1 +year 1 +recent 1 +although 1 +familycurr 1 +resid 1 +citi 1 +approxim 1 +halfwai 1 +philadelphia 1 +andharrisburg 1 +famou 1 +shop 1 +outlet 1 +otherwis 1 +younev 1 +want 1 +howev 1 +great 1 +place 1 +mani 1 +alreadi 1 +know 1 +sinceit 1 +imposs 1 +without 1 +go 1 +throughpittsburgh 1 +month 1 +favorit 1 +quot 1 +list 1 +work 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..3b0b41f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,123 @@ +cornel 5 +comput 5 +talk 5 +center 3 +group 3 +compil 3 +system 3 +present 3 +indupraka 2 +kodukula 2 +home 2 +page 2 +theori 2 +univers 2 +scienc 2 +work 2 +research 2 +applic 2 +architectur 2 +vliw 2 +public 2 +imperfectli 2 +nest 2 +abl 2 +loop 2 +transform 2 +summer 2 +support 2 +packag 2 +engin 1 +ithaca 1 +praka 1 +student 1 +depart 1 +prior 1 +tothat 1 +undergradu 1 +madra 1 +bernoulli 1 +prof 1 +keshav 1 +pingali 1 +member 1 +nawaaz 1 +ahm 1 +vladimir 1 +kotlyar 1 +vijai 1 +menon 1 +paul 1 +stodghil 1 +also 1 +affili 1 +advanc 1 +institut 1 +interplai 1 +runtim 1 +tradit 1 +andmultiprocessor 1 +deriv 1 +fromscientif 1 +imag 1 +process 1 +multimedia 1 +withibm 1 +hasinterest 1 +well 1 +given 1 +seri 1 +dens 1 +technolog 1 +first 1 +athp 1 +chelmsford 1 +wasabout 1 +necess 1 +deal 1 +looptransform 1 +handl 1 +trivial 1 +code 1 +framework 1 +perform 1 +loopparallel 1 +seminar 1 +schloss 1 +dagstuhl 1 +april 1 +watson 1 +regard 1 +useof 1 +octob 1 +lab 1 +palo 1 +alto 1 +regardingdata 1 +centric 1 +multi 1 +level 1 +block 1 +teach 1 +taught 1 +program 1 +andoper 1 +project 1 +czar 1 +instal 1 +maintain 1 +availableund 1 +gener 1 +licens 1 +departmentmachin 1 +check 1 +andfind 1 +handi 1 +tip 1 +alsofind 1 +extens 1 +info 1 +random 1 +link 1 +person 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..8edfad37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,49 @@ +praveen 2 +seshadri 2 +page 2 +ithaca 2 +databas 2 +project 2 +data 2 +home 1 +assist 1 +professor 1 +comput 1 +scienc 1 +depart 1 +cornel 1 +univers 1 +upson 1 +hall 1 +offic 1 +advanc 1 +system 1 +fall 1 +predat 1 +dbm 1 +adt 1 +know 1 +case 1 +enhanc 1 +abstract 1 +type 1 +sigmod 1 +submiss 1 +profession 1 +public 1 +time 1 +order 1 +manag 1 +sequenc 1 +postscript 1 +thesi 1 +tree 1 +save 1 +format 1 +person 1 +warren 1 +road 1 +ranjani 1 +ramamurthi 1 +green 1 +packer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..11cc0cab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 41 +queri 38 +data 23 +databas 13 +relat 13 +oper 13 +model 11 +record 11 +system 10 +optim 10 +project 8 +order 7 +sequin 7 +us 7 +time 6 +manag 6 +effici 6 +languag 6 +posit 6 +praveen 6 +seshadri 6 +express 5 +evalu 5 +variou 5 +support 5 +process 5 +miron 5 +livni 5 +ramakrishnan 5 +object 4 +import 4 +requir 4 +includ 4 +scienc 4 +kind 4 +techniqu 4 +implement 4 +nest 4 +complex 4 +storag 4 +earthquak 4 +similar 4 +click 4 +raghu 4 +wisc 4 +domain 3 +exist 3 +allow 3 +need 3 +like 3 +next 3 +built 3 +defin 3 +also 3 +propos 3 +shore 3 +extens 3 +volcano 3 +recent 3 +plan 3 +scan 3 +singl 3 +mani 3 +view 3 +orient 3 +zoom 3 +group 3 +involv 3 +server 3 +proceed 3 +confer 3 +madison 3 +document 2 +collect 2 +set 2 +declar 2 +manner 2 +advantag 2 +user 2 +tempor 2 +previou 2 +demonstr 2 +feasibl 2 +form 2 +embed 2 +base 2 +exampl 2 +inform 2 +meteorolog 2 +phenomena 2 +sequenti 2 +strength 2 +greater 2 +would 2 +sort 2 +join 2 +store 2 +buffer 2 +gener 2 +answer 2 +detail 2 +aredescrib 2 +publish 2 +paper 2 +postscript 2 +version 2 +map 2 +call 2 +could 2 +flavor 2 +explor 2 +collaps 2 +expand 2 +last 2 +work 2 +probabl 2 +devis 2 +result 2 +client 2 +insid 2 +provid 2 +depart 2 +home 1 +pageth 1 +construct 1 +content 1 +objectivescurr 1 +statusmotiv 1 +exampleseq 1 +languageoptim 1 +techniquesseq 1 +developmentpublicationsrel 1 +workcontact 1 +informationproject 1 +number 1 +applic 1 +processingof 1 +larg 1 +amount 1 +theseappl 1 +financi 1 +histor 1 +analysi 1 +econom 1 +social 1 +metereolog 1 +medic 1 +andbiolog 1 +inadequ 1 +regard 1 +treat 1 +consequ 1 +tediou 1 +ineffici 1 +abstract 1 +util 1 +semanticstak 1 +uniqu 1 +opportun 1 +avail 1 +evaluationintegr 1 +canstor 1 +combin 1 +sequencesthes 1 +serv 1 +goal 1 +themost 1 +notion 1 +natur 1 +consid 1 +issu 1 +studi 1 +theori 1 +theoret 1 +idea 1 +statusth 1 +current 1 +statu 1 +algebraicqueri 1 +compos 1 +analogousto 1 +composit 1 +algebra 1 +describ 1 +identifi 1 +candeclar 1 +likesql 1 +vice 1 +versa 1 +build 1 +disk 1 +architectur 1 +sever 1 +megabyt 1 +integr 1 +motiv 1 +querya 1 +weather 1 +monitor 1 +occurr 1 +event 1 +scientist 1 +ask 1 +erupt 1 +didth 1 +richter 1 +scale 1 +featur 1 +groupbi 1 +claus 1 +correl 1 +subqueri 1 +aggregatefunct 1 +convent 1 +find 1 +execut 1 +even 1 +given 1 +knowledg 1 +howev 1 +sequencesord 1 +lock 1 +step 1 +merg 1 +temporari 1 +whenev 1 +valu 1 +check 1 +possibl 1 +therefor 1 +littl 1 +memori 1 +modelth 1 +present 1 +gist 1 +basic 1 +ordereddomain 1 +relationship 1 +andposit 1 +dual 1 +distinct 1 +wai 1 +recordsmap 1 +respect 1 +give 1 +rise 1 +either 1 +relationaloper 1 +overlap 1 +contain 1 +andaggreg 1 +researchersin 1 +commun 1 +offset 1 +movingaggreg 1 +mean 1 +associ 1 +instanc 1 +daili 1 +weekli 1 +hourli 1 +part 1 +deal 1 +make 1 +easi 1 +case 1 +real 1 +worldsitu 1 +extend 1 +instead 1 +extensionof 1 +indic 1 +practic 1 +ofseq 1 +languagew 1 +usingwhich 1 +specifi 1 +languagei 1 +except 1 +input 1 +queriesa 1 +well 1 +descript 1 +techniquesw 1 +thathav 1 +transform 1 +meta 1 +cach 1 +intermedi 1 +algorithm 1 +reli 1 +cost 1 +estim 1 +observ 1 +access 1 +stream 1 +strategi 1 +take 1 +account 1 +developmentth 1 +serverarchitectur 1 +multipl 1 +viaa 1 +multi 1 +thread 1 +ontop 1 +subset 1 +languageswhich 1 +mode 1 +arbitrarylevel 1 +viceversa 1 +supportfor 1 +type 1 +function 1 +detailson 1 +publicationssequ 1 +sigmod 1 +framework 1 +datapraveen 1 +ieee 1 +engin 1 +march 1 +design 1 +systempraveen 1 +submit 1 +vldb 1 +queriesraghu 1 +michael 1 +cheng 1 +intern 1 +comad 1 +decemb 1 +workthedevis 1 +complementari 1 +visualizationenviron 1 +front 1 +pose 1 +examin 1 +graphic 1 +peopl 1 +research 1 +servercontact 1 +informationfor 1 +contact 1 +eduraghu 1 +edumiron 1 +educomput 1 +univers 1 +wisconsin 1 +dayton 1 +street 1 +modifi 1 +seshadripraveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..d1b322a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,50 @@ +ralph 2 +student 2 +comput 2 +scienc 2 +fellow 2 +cornel 2 +benzingerralph 1 +benzingerw 1 +sich 1 +seinen 1 +lorbeeren 1 +ausruht 1 +trgt 1 +derfalschen 1 +stell 1 +stori 1 +exchang 1 +univers 1 +karlsruh 1 +germani 1 +german 1 +august 1 +studienstiftung 1 +deutschen 1 +volk 1 +fulbright 1 +member 1 +siemen 1 +international 1 +studentenkrei 1 +alumnusat 1 +graduat 1 +depart 1 +cours 1 +taken 1 +advanc 1 +program 1 +languag 1 +design 1 +analysi 1 +algorithm 1 +reason 1 +knowledg 1 +contact 1 +inform 1 +mail 1 +offic 1 +upson 1 +hall 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..a41628b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,291 @@ +audio 84 +latex 70 +postscript 68 +section 23 +aster 22 +render 15 +express 13 +exampl 11 +produc 8 +us 8 +notic 8 +convei 7 +formula 6 +structur 6 +second 6 +first 5 +mathemat 4 +gener 4 +document 4 +format 4 +book 4 +demonstr 4 +fraction 4 +bruno 4 +nest 4 +integr 4 +listen 4 +reader 4 +comput 3 +spoken 3 +system 3 +version 3 +talk 3 +demo 3 +output 3 +effect 3 +along 3 +dimens 3 +superscript 3 +allow 3 +knuth 3 +percept 3 +recogn 3 +speak 3 +base 3 +interpret 3 +refer 3 +present 3 +level 3 +complex 3 +technic 2 +made 2 +avail 2 +three 2 +stereo 2 +visual 2 +progress 2 +continu 2 +file 2 +contain 2 +simpl 2 +succinctli 2 +vari 2 +space 2 +subscript 2 +independ 2 +taken 2 +power 2 +unambigu 2 +move 2 +monoton 2 +algebra 2 +follow 2 +written 2 +hard 2 +logarithm 2 +context 2 +specif 2 +user 2 +variabl 2 +tripl 2 +cross 2 +meaning 2 +name 2 +inton 2 +text 2 +intermix 2 +quantifi 2 +calcul 2 +sinc 2 +hear 2 +substitut 2 +process 2 +raman 2 +titl 1 +demonstrationi 1 +dedic 1 +guid 1 +read 1 +forrend 1 +develop 1 +myphd 1 +page 1 +thesi 1 +approxim 1 +hour 1 +record 1 +blind 1 +abstract 1 +print 1 +hypertext 1 +enhanc 1 +inlin 1 +imag 1 +compon 1 +origin 1 +input 1 +speech 1 +dectalk 1 +digit 1 +mulaw 1 +tabl 1 +mono 1 +encod 1 +dvip 1 +difficult 1 +suggest 1 +initi 1 +sequenti 1 +short 1 +typic 1 +show 1 +peopl 1 +andround 1 +quick 1 +overview 1 +faad 1 +casey 1 +want 1 +look 1 +place 1 +singl 1 +examplessinc 1 +would 1 +voic 1 +inflect 1 +paus 1 +toconvei 1 +group 1 +state 1 +renderingsub 1 +attribut 1 +audiost 1 +orthogon 1 +dimensionus 1 +mutual 1 +concept 1 +expon 1 +verbatim 1 +donald 1 +layoutoper 1 +compris 1 +symbol 1 +verydiffer 1 +defin 1 +monotonicchang 1 +notion 1 +vital 1 +school 1 +squar 1 +root 1 +choic 1 +trigonometr 1 +ident 1 +notat 1 +ambigu 1 +complet 1 +absenc 1 +parenthesi 1 +sever 1 +heurist 1 +construct 1 +correct 1 +tree 1 +forthes 1 +chosen 1 +reduc 1 +cognit 1 +load 1 +oppos 1 +seri 1 +rule 1 +asexpon 1 +wire 1 +isfulli 1 +customiz 1 +probabl 1 +innocu 1 +also 1 +mostdifficult 1 +imposs 1 +determin 1 +ofintegr 1 +applic 1 +theintegr 1 +oper 1 +brows 1 +piec 1 +shown 1 +trick 1 +experienc 1 +ofhuman 1 +error 1 +summat 1 +limit 1 +referenc 1 +equat 1 +meant 1 +illustr 1 +ofcross 1 +interact 1 +enabl 1 +give 1 +referenceableobject 1 +object 1 +latercross 1 +distanc 1 +good 1 +thati 1 +interest 1 +challeng 1 +exponenti 1 +followingdeepli 1 +emac 1 +full 1 +fledgedsymbol 1 +interfac 1 +directli 1 +justa 1 +well 1 +matrix 1 +dimension 1 +thematrix 1 +commenc 1 +left 1 +right 1 +aseach 1 +element 1 +ofcomput 1 +program 1 +heard 1 +took 1 +secondsto 1 +util 1 +featur 1 +spacenot 1 +human 1 +still 1 +changeth 1 +size 1 +shape 1 +head 1 +take 1 +soon 1 +even 1 +long 1 +forget 1 +begin 1 +thetim 1 +later 1 +techniquefor 1 +proper 1 +glori 1 +like 1 +upon 1 +request 1 +replac 1 +identifi 1 +renderingsconvei 1 +thesub 1 +separ 1 +perform 1 +orpostscript 1 +equival 1 +case 1 +lower 1 +constraint 1 +numer 1 +denomin 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..2470a15d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,63 @@ +comput 6 +ravi 4 +scienc 4 +foundat 3 +self 3 +learn 3 +confer 3 +kumar 2 +cornel 2 +program 2 +check 2 +approxim 2 +ieee 2 +octob 2 +funda 2 +uumln 2 +ronitt 2 +rubinfeld 2 +test 2 +sivakumar 2 +lnc 2 +theori 2 +combinator 2 +parallel 2 +process 2 +depart 1 +univers 1 +ithaca 1 +polynomi 1 +function 1 +equat 1 +effici 1 +correct 1 +linear 1 +recurr 1 +without 1 +gener 1 +bottleneck 1 +softwar 1 +technolog 1 +theoret 1 +decemb 1 +bound 1 +width 1 +branch 1 +juli 1 +latin 1 +squar 1 +extens 1 +june 1 +alexand 1 +russel 1 +sundaram 1 +scalabl 1 +studi 1 +intern 1 +august 1 +jeyakumar 1 +muthukumarasami 1 +umakishor 1 +ramachandran 1 +gautam 1 +shah 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..16e6f91c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,147 @@ +comput 8 +ramin 5 +zabih 5 +vision 5 +imag 4 +avail 4 +justin 4 +confer 4 +interest 3 +multimedia 3 +also 3 +greg 3 +pass 3 +miller 3 +base 3 +ieee 3 +novemb 3 +program 3 +current 2 +us 2 +undergradu 2 +content 2 +workshop 2 +third 2 +teach 2 +introduct 2 +cours 2 +page 2 +cvpr 2 +held 2 +home 1 +pageramin 1 +zabihassist 1 +professorrdz 1 +cornel 1 +researchmi 1 +research 1 +construct 1 +search 1 +engin 1 +method 1 +develop 1 +recent 1 +think 1 +econom 1 +impact 1 +freeli 1 +price 1 +inform 1 +essai 1 +subject 1 +appear 1 +phil 1 +agr 1 +electron 1 +newslett 1 +network 1 +observ 1 +march 1 +studentsi 1 +work 1 +student 1 +jing 1 +huang 1 +vera 1 +kettnak 1 +olga 1 +veksler 1 +spend 1 +fair 1 +amount 1 +time 1 +variou 1 +princip 1 +voskuhl 1 +includ 1 +scott 1 +cytacki 1 +szewczyk 1 +publicationsmost 1 +public 1 +postscript 1 +acrobat 1 +format 1 +free 1 +reader 1 +varieti 1 +differ 1 +architectur 1 +adob 1 +histogram 1 +refin 1 +retriev 1 +applic 1 +sarasota 1 +florida 1 +decemb 1 +compar 1 +color 1 +coher 1 +vector 1 +fourth 1 +boston 1 +massachusett 1 +featur 1 +algorithm 1 +detect 1 +classifi 1 +scene 1 +break 1 +kevin 1 +francisco 1 +california 1 +parametr 1 +local 1 +transform 1 +visual 1 +correspond 1 +john 1 +woodfil 1 +european 1 +stockholm 1 +sweden 1 +teachingi 1 +spring 1 +scribe 1 +note 1 +lectur 1 +taught 1 +profession 1 +activitiesi 1 +comitte 1 +pattern 1 +recognit 1 +juan 1 +june 1 +organ 1 +committe 1 +access 1 +video 1 +librari 1 +conjunct 1 +acknowledgementsthi 1 +design 1 +courtesi 1 +huttenlocherlast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..723aa152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,4 @@ +roderick 1 +moten 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..6a5763f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,45 @@ +ronitt 3 +comput 3 +scienc 3 +rubinfeld 2 +cornel 2 +depart 2 +fall 2 +homepageronitt 1 +rubinfeldi 1 +assist 1 +professor 1 +recent 1 +paper 1 +talk 1 +cours 1 +random 1 +spring 1 +engin 1 +graduat 1 +student 1 +funda 1 +ergun 1 +ravi 1 +kumar 1 +fair 1 +homepag 1 +wasserman 1 +page 1 +describ 1 +work 1 +research 1 +area 1 +result 1 +check 1 +address 1 +rubinfeldcomput 1 +upson 1 +hallcornel 1 +universityithaca 1 +york 1 +telephon 1 +email 1 +edupictur 1 +nephew 1 +eitan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..d2a644fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,73 @@ +comput 5 +scienc 4 +depart 3 +birman 3 +distribut 3 +technolog 3 +friedman 3 +doctor 2 +associ 2 +cornel 2 +work 2 +system 2 +project 2 +thedepart 2 +thetechnion 2 +israel 2 +institut 2 +technic 2 +report 2 +cornellunivers 2 +implement 2 +friedmanroi 1 +friedmanpost 1 +universityroi 1 +edui 1 +post 1 +scienceatcornel 1 +univers 1 +withken 1 +androbbert 1 +rennessein 1 +area 1 +mainli 1 +thehoru 1 +receiv 1 +advisor 1 +washagit 1 +attiya 1 +thesi 1 +titl 1 +wasconsist 1 +condit 1 +share 1 +memori 1 +current 1 +also 1 +involv 1 +themilliped 1 +withassaf 1 +schuster 1 +recent 1 +papersr 1 +trade 1 +consist 1 +avail 1 +us 1 +group 1 +commun 1 +reliabl 1 +scalabledistribut 1 +coprocessor 1 +appear 1 +tina 1 +vaysburd 1 +replic 1 +state 1 +machin 1 +partition 1 +network 1 +full 1 +list 1 +public 1 +clickher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..b0e1a7c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,28 @@ +cornel 3 +daniela 2 +home 2 +page 2 +research 2 +comput 2 +scienc 2 +associ 1 +photograph 1 +address 1 +upson 1 +hall 1 +depart 1 +univers 1 +ithaca 1 +model 1 +simul 1 +recent 1 +paper 1 +version 1 +onlin 1 +tech 1 +report 1 +librari 1 +catalogc 1 +dept 1 +infodesign 1 +institut 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..12da7273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,92 @@ +system 8 +horu 8 +ithaca 3 +jazz 3 +dutch 3 +group 3 +commun 3 +research 2 +brand 2 +network 2 +design 2 +perform 2 +lightweight 2 +version 2 +protocol 2 +composit 2 +support 2 +robbert 1 +renesserobbert 1 +renessesenior 1 +associatecornel 1 +universityrvr 1 +cornel 1 +edui 1 +senior 1 +associ 1 +depart 1 +comput 1 +scienceatcornel 1 +universityinithaca 1 +work 1 +withken 1 +birmanin 1 +area 1 +distribut 1 +advisor 1 +wasandi 1 +tanenbaum 1 +interestsmi 1 +babi 1 +girl 1 +hous 1 +tacoma 1 +project 1 +caml 1 +applet 1 +nynetth 1 +ageless 1 +band 1 +swing 1 +danc 1 +guitar 1 +accordion 1 +sharewar 1 +stuffcornel 1 +club 1 +contain 1 +mani 1 +link 1 +netherland 1 +ithacaithacanet 1 +spinner 1 +market 1 +place 1 +paperssoftwar 1 +reliabl 1 +scientif 1 +american 1 +html 1 +framework 1 +incorpor 1 +resourc 1 +inform 1 +flow 1 +control 1 +strong 1 +weak 1 +virtual 1 +synchroni 1 +flexibl 1 +secur 1 +architectur 1 +fault 1 +toler 1 +complex 1 +multi 1 +media 1 +applic 1 +us 1 +oper 1 +mobil 1 +agent 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..45ebf5f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,100 @@ +sabel 6 +laura 5 +system 5 +comput 5 +cornel 4 +marzullo 4 +univers 4 +failur 4 +distribut 4 +page 4 +scienc 4 +detect 3 +detector 3 +keith 3 +technic 3 +report 3 +postscript 3 +copi 3 +click 3 +proceed 3 +inform 2 +research 2 +asynchron 2 +approxim 2 +perfect 2 +asynchronousdistribut 2 +version 2 +appear 2 +symposium 2 +octob 2 +jelli 2 +bingo 2 +profession 1 +doctor 1 +professorkeith 1 +california 1 +diego 1 +formor 1 +tushar 1 +chandra 1 +sfailur 1 +final 1 +finish 1 +thesi 1 +public 1 +elect 1 +consensu 1 +februari 1 +submit 1 +process 1 +letter 1 +annual 1 +principl 1 +distributedcomput 1 +august 1 +reliabl 1 +march 1 +revis 1 +june 1 +us 1 +consist 1 +subcut 1 +stabl 1 +properti 1 +intern 1 +workshop 1 +algorithm 1 +wdag 1 +publish 1 +springer 1 +verlag 1 +lecturenot 1 +seri 1 +expand 1 +horizon 1 +cow 1 +interest 1 +strawberri 1 +tart 1 +blow 1 +torch 1 +experi 1 +alpacanet 1 +electron 1 +gourmet 1 +guid 1 +thebobbi 1 +award 1 +especi 1 +spam 1 +belli 1 +bean 1 +free 1 +sampl 1 +answer 1 +survei 1 +zone 1 +canplai 1 +cash 1 +prize 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..2ab61f55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,168 @@ +system 11 +toueg 10 +distribut 7 +wait 7 +free 7 +object 7 +consensu 6 +comput 5 +solv 5 +failur 5 +fault 4 +algorithm 4 +toler 4 +asynchron 4 +process 4 +hierarchi 4 +journal 4 +research 3 +work 3 +jayanti 3 +failuredetector 3 +share 3 +explor 3 +implement 3 +level 3 +chandra 3 +proceed 3 +symposium 3 +interest 2 +messag 2 +pass 2 +memori 2 +result 2 +prasad 2 +scienc 2 +determin 2 +crash 2 +unreli 2 +detector 2 +correct 2 +weakest 2 +type 2 +clock 2 +synchron 2 +databas 2 +neiger 2 +principl 2 +august 2 +canada 2 +faculti 1 +professorph 1 +princeton 1 +univers 1 +interestsmi 1 +includ 1 +toleranceand 1 +real 1 +time 1 +methodolog 1 +paradigm 1 +forfault 1 +andshar 1 +long 1 +term 1 +goal 1 +bridg 1 +gapbetween 1 +theoret 1 +need 1 +effici 1 +practicalsolut 1 +collabor 1 +withtushar 1 +chandraand 1 +student 1 +continu 1 +onunreli 1 +fundament 1 +computingst 1 +problem 1 +cannot 1 +adeterminist 1 +impossibilityresult 1 +inher 1 +difficulti 1 +whether 1 +aprocess 1 +mere 1 +slow 1 +inour 1 +abl 1 +exactli 1 +much 1 +informationabout 1 +necessari 1 +suffici 1 +wefirst 1 +show 1 +canmak 1 +infinit 1 +number 1 +mistak 1 +systemswith 1 +major 1 +prove 1 +solveconsensu 1 +provid 1 +least 1 +muchinform 1 +thu 1 +amajor 1 +practicalityof 1 +applic 1 +reli 1 +theircorrect 1 +concurr 1 +consist 1 +commun 1 +sharedobject 1 +accessesthi 1 +guarante 1 +respons 1 +even 1 +otherprocess 1 +ofobject 1 +assign 1 +thatcorrespond 1 +abil 1 +particular 1 +shown 1 +well 1 +known 1 +herlihi 1 +robust 1 +inform 1 +anobject 1 +us 1 +atani 1 +question 1 +whetherrobust 1 +exist 1 +select 1 +public 1 +bracha 1 +broadcast 1 +protocol 1 +srikanth 1 +optim 1 +abbadi 1 +maintain 1 +avail 1 +partit 1 +replic 1 +transact 1 +automat 1 +increas 1 +montreal 1 +hadzilaco 1 +detectorfor 1 +vancouv 1 +ieee 1 +foundat 1 +octob 1 +pittsburgh 1 +pennsylvania 1 +simul 1 +common 1 +knowledg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..06b82326 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,100 @@ +weber 11 +compil 6 +samuel 5 +cornel 5 +univers 4 +program 4 +comput 4 +semant 4 +bloom 4 +silicon 4 +page 4 +assist 3 +design 3 +public 3 +confer 3 +brown 3 +act 2 +professor 2 +master 2 +engin 2 +scienc 2 +research 2 +softwar 2 +languag 2 +distribut 2 +system 2 +technic 2 +report 2 +submit 2 +verifi 2 +algebra 2 +thesi 2 +editor 2 +proceed 2 +messag 2 +complex 2 +byzantin 2 +agreement 2 +upson 1 +hallphon 1 +email 1 +educurr 1 +director 1 +interest 1 +specif 1 +verif 1 +cours 1 +technolog 1 +techniqu 1 +fall 1 +introduct 1 +spring 1 +metatheori 1 +calculu 1 +formal 1 +delai 1 +insensit 1 +circuit 1 +cornellunivers 1 +journal 1 +process 1 +meta 1 +theori 1 +practic 1 +august 1 +exercis 1 +appli 1 +structur 1 +oper 1 +workshop 1 +foundat 1 +applic 1 +bakker 1 +roever 1 +rozenberg 1 +lectur 1 +note 1 +springer 1 +verlag 1 +scheme 1 +knight 1 +savag 1 +advanc 1 +vlsi 1 +parallel 1 +amdur 1 +hadzilaco 1 +binari 1 +crash 1 +failur 1 +bound 1 +toronto 1 +septemb 1 +seshadri 1 +wortman 1 +small 1 +analysi 1 +concurr 1 +sigplan 1 +implement 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..7e105827 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,81 @@ +comput 6 +project 5 +system 5 +graphic 5 +isi 5 +master 3 +engin 3 +cornel 3 +window 3 +interest 3 +object 3 +orbix 3 +sean 2 +landi 2 +current 2 +cours 2 +render 2 +draw 2 +base 2 +design 2 +orient 2 +work 2 +team 2 +basebal 2 +landissean 1 +sciencewelcom 1 +home 1 +page 1 +resum 1 +inform 1 +advanc 1 +databas 1 +systemsc 1 +past 1 +machin 1 +percept 1 +final 1 +analyz 1 +color 1 +book 1 +clickherefor 1 +postscript 1 +version 1 +sampl 1 +weanalyz 1 +topic 1 +content 1 +imag 1 +retriev 1 +interior 1 +educ 1 +program 1 +patternsprofession 1 +distribut 1 +divis 1 +stratu 1 +lead 1 +develop 1 +product 1 +combin 1 +acorba 1 +compliant 1 +request 1 +broker 1 +iona 1 +technolog 1 +releas 1 +person 1 +favorit 1 +alpin 1 +ski 1 +golf 1 +plai 1 +softbal 1 +card 1 +collectingi 1 +reach 1 +comeduc 1 +sheet 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..e203bfd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,62 @@ +comput 6 +system 5 +project 4 +seena 3 +scienc 3 +engin 3 +oper 3 +graphic 3 +univers 2 +cornel 2 +ithaca 2 +cherangara 1 +cherangaramast 1 +engineeringclass 1 +dept 1 +sciencecornel 1 +welcom 1 +homepagecurr 1 +student 1 +depart 1 +tech 1 +degre 1 +colleg 1 +trivandrum 1 +kerala 1 +india 1 +inform 1 +cours 1 +taken 1 +cornelluniversityfal 1 +practicum 1 +specif 1 +hoca 1 +softwar 1 +multimedia 1 +post 1 +processingalgorithm 1 +jpeg 1 +artifact 1 +reduct 1 +spring 1 +cspracticum 1 +anim 1 +magic 1 +carpet 1 +distribut 1 +colloqium 1 +summer 1 +model 1 +java 1 +parametr 1 +equat 1 +viewer 1 +click 1 +postscript 1 +version 1 +myresum 1 +mapl 1 +york 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..6f9ab63d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,238 @@ +imag 10 +rosen 8 +sharma 8 +video 7 +us 7 +like 6 +languag 5 +name 5 +internet 4 +file 4 +sharmila 4 +stanford 3 +multicast 3 +protocol 3 +develop 3 +system 3 +script 3 +widget 3 +call 3 +mean 3 +cornel 2 +student 2 +stream 2 +media 2 +part 2 +link 2 +softwar 2 +class 2 +also 2 +text 2 +vxtreme 2 +applic 2 +paper 2 +oper 2 +support 2 +nativ 2 +mode 2 +releas 2 +written 2 +indian 2 +stuff 2 +cool 2 +manipul 2 +data 2 +make 2 +implement 2 +give 2 +imagefram 2 +interpret 2 +shell 2 +gener 2 +record 2 +postscript 2 +write 2 +peopl 2 +claim 2 +hors 2 +offic 1 +upson 1 +hall 1 +email 1 +came 1 +spent 1 +coupl 1 +year 1 +work 1 +research 1 +live 1 +audio 1 +modifiedigmp 1 +unicast 1 +layer 1 +virtual 1 +classroom 1 +initi 1 +prototyp 1 +deploi 1 +spring 1 +fall 1 +quarter 1 +altern 1 +instruct 1 +televis 1 +network 1 +sitn 1 +program 1 +microwav 1 +remot 1 +site 1 +asynchron 1 +access 1 +lectur 1 +note 1 +stumbl 1 +upon 1 +fact 1 +slide 1 +portion 1 +enhanc 1 +greatli 1 +leadto 1 +format 1 +compani 1 +palo 1 +alto 1 +silicon 1 +vallei 1 +start 1 +client 1 +sever 1 +multimedia 1 +deliveri 1 +signal 1 +keshav 1 +sigcomm 1 +segment 1 +mix 1 +navin 1 +chaddha 1 +avneesh 1 +agarw 1 +anoop 1 +gupta 1 +asilomar 1 +igmp 1 +group 1 +membership 1 +design 1 +steve 1 +deer 1 +internetdraft 1 +bill 1 +fenner 1 +optic 1 +charact 1 +recognit 1 +statist 1 +structur 1 +method 1 +niten 1 +malhan 1 +bachelor 1 +thesi 1 +dept 1 +comput 1 +scienc 1 +institut 1 +technolog 1 +delhiunpublish 1 +character 1 +variabl 1 +rate 1 +sourc 1 +term 1 +preform 1 +conferenc 1 +intern 1 +report 1 +first 1 +type 1 +allow 1 +sequenc 1 +blur 1 +speckl 1 +transform 1 +affin 1 +subband 1 +motion 1 +estmat 1 +fast 1 +effici 1 +writen 1 +current 1 +test 1 +machin 1 +displai 1 +flavour 1 +look 1 +width 1 +height 1 +ifram 1 +nodisplai 1 +filenam 1 +putimageincanva 1 +predecessor 1 +hate 1 +motif 1 +yacc 1 +limit 1 +given 1 +compil 1 +dummi 1 +event 1 +snooper 1 +player 1 +consid 1 +coolest 1 +thing 1 +ever 1 +someth 1 +similar 1 +microsoft 1 +window 1 +doesnt 1 +replai 1 +lot 1 +kludg 1 +fool 1 +server 1 +fractal 1 +creat 1 +directori 1 +hole 1 +viewer 1 +fix 1 +dissalow 1 +semant 1 +question 1 +often 1 +ask 1 +gaveth 1 +chines 1 +friend 1 +wonder 1 +small 1 +smart 1 +hindi 1 +tongu 1 +sharm 1 +shyness 1 +actress 1 +tagor 1 +nicknam 1 +frozen 1 +stupid 1 +ealri 1 +jewish 1 +leader 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..305762cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,114 @@ +page 6 +comput 4 +eric 3 +home 3 +univers 3 +cornel 3 +korean 3 +project 3 +music 3 +friend 3 +shim 2 +view 2 +version 2 +scienc 2 +meng 2 +acoust 2 +movi 2 +jazz 2 +kwan 2 +pagewelcom 1 +young 1 +sang 1 +shimmast 1 +engin 1 +sciencecornel 1 +address 1 +dryden 1 +citi 1 +ithaca 1 +phone 1 +click 1 +onthi 1 +receiv 1 +degre 1 +california 1 +irvinestudi 1 +inform 1 +system 1 +camera 1 +transform 1 +abstract 1 +final 1 +graphic 1 +classi 1 +love 1 +plai 1 +follow 1 +instrument 1 +guitar 1 +piano 1 +keyboard 1 +listen 1 +stan 1 +getz 1 +antonio 1 +carlo 1 +jobim 1 +john 1 +coltran 1 +mile 1 +davi 1 +earl 1 +klugh 1 +metheni 1 +archemi 1 +chopin 1 +watch 1 +cinema 1 +paradiso 1 +french 1 +kiss 1 +miser 1 +miss 1 +saigon 1 +favorit 1 +korea 1 +newswant 1 +know 1 +graduat 1 +student 1 +associ 1 +anybodi 1 +like 1 +check 1 +interest 1 +java 1 +cyberspac 1 +hana 1 +work 1 +melco 1 +last 1 +time 1 +went 1 +world 1 +jung 1 +hwan 1 +middl 1 +school 1 +back 1 +victor 1 +hong 1 +jiyang 1 +kang 1 +homepag 1 +access 1 +timessinc 1 +still 1 +construct 1 +resum 1 +avail 1 +near 1 +futur 1 +also 1 +soon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..02e32c45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,169 @@ +text 16 +amit 12 +singhal 12 +gerard 10 +salton 10 +chri 9 +bucklei 9 +retriev 7 +automat 7 +smart 5 +us 5 +paper 5 +trec 5 +structur 5 +research 4 +inform 4 +group 4 +document 4 +cornel 3 +normal 3 +system 3 +confer 3 +mandar 3 +jame 3 +allan 3 +student 3 +home 2 +scienc 2 +process 2 +thesi 2 +prof 2 +gerardsalton 2 +current 2 +field 2 +length 2 +size 2 +lengthnorm 2 +propos 2 +pivot 2 +techniqu 2 +nist 2 +provid 2 +select 2 +mandarmitra 2 +decomposit 2 +mitra 2 +appear 2 +theme 2 +gener 2 +analysi 2 +pageamit 1 +singhaldepart 1 +comput 1 +universitysingh 1 +eduphon 1 +interest 1 +area 1 +andtext 1 +advisor 1 +late 1 +supervisor 1 +clairecardieher 1 +postscript 1 +copi 1 +resum 1 +depart 1 +beenon 1 +foremost 1 +informationretriev 1 +last 1 +thirti 1 +year 1 +involv 1 +fairli 1 +vari 1 +commonli 1 +term 1 +weight 1 +show 1 +thateffect 1 +chancessimilar 1 +likelihood 1 +relev 1 +modifi 1 +exist 1 +normalizationfunct 1 +yield 1 +substanti 1 +improv 1 +retrievaleffect 1 +also 1 +effect 1 +normalizationtechniqu 1 +trecparticipationtext 1 +arpa 1 +sponsoredeffort 1 +object 1 +evalu 1 +variou 1 +retrievaltechniqu 1 +independ 1 +testb 1 +hasconsist 1 +best 1 +somepap 1 +summarizationnon 1 +expositori 1 +usual 1 +read 1 +cover 1 +tocov 1 +reader 1 +help 1 +circumst 1 +selectiveaccess 1 +excerpt 1 +need 1 +develop 1 +toanalyz 1 +tool 1 +texttravers 1 +papersnorm 1 +documentlength 1 +mitraand 1 +degrad 1 +collect 1 +come 1 +soon 1 +approach 1 +usingsmart 1 +queri 1 +expans 1 +proceedingsof 1 +third 1 +special 1 +public 1 +segment 1 +textthem 1 +hypertext 1 +andmanag 1 +brows 1 +vectorspac 1 +model 1 +proceed 1 +ofth 1 +dual 1 +technolog 1 +applic 1 +travers 1 +summar 1 +machineread 1 +amitsingh 1 +june 1 +groupmemb 1 +senior 1 +associ 1 +david 1 +master 1 +engin 1 +other 1 +slowli 1 +fluctuat 1 +thank 1 +visit 1 +page 1 +visitor 1 +sinc 1 +iinstal 1 +counter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..e39c0a77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,153 @@ +name 7 +cornel 4 +version 4 +work 3 +home 3 +build 3 +keshav 2 +skeshav 2 +depart 2 +comput 2 +scienc 2 +univers 2 +ithaca 2 +sinc 2 +network 2 +idlinet 2 +base 2 +avail 2 +nativ 2 +mode 2 +also 2 +site 2 +paper 2 +real 2 +simul 2 +peopl 2 +last 2 +keshavemail 1 +upson 1 +hall 1 +christoph 1 +lane 1 +edui 1 +current 1 +associ 1 +professor 1 +spentfiv 1 +year 1 +xunet 1 +wide 1 +area 1 +built 1 +scratch 1 +router 1 +switch 1 +softwar 1 +oper 1 +incollabor 1 +delhi 1 +equip 1 +fore 1 +systemsand 1 +zeitnet 1 +idlinetsourc 1 +code 1 +public 1 +domain 1 +page 1 +featur 1 +protocol 1 +stack 1 +applicationget 1 +write 1 +directli 1 +virtual 1 +circuit 1 +support 1 +independ 1 +signal 1 +span 1 +compliant 1 +final 1 +goof 1 +talk 1 +head 1 +facial 1 +anim 1 +snoop 1 +send 1 +video 1 +format 1 +mbone 1 +canb 1 +driven 1 +remot 1 +internet 1 +linkspapersher 1 +linkto 1 +postscript 1 +reali 1 +packet 1 +level 1 +still 1 +maintain 1 +instal 1 +idea 1 +mani 1 +actual 1 +link 1 +latest 1 +releas 1 +fall 1 +includ 1 +beout 1 +goe 1 +well 1 +native_mod 1 +pagemi 1 +namein 1 +part 1 +world 1 +come 1 +south 1 +india 1 +thanjavur 1 +district 1 +beprecis 1 +prefix 1 +father 1 +sonli 1 +sometim 1 +villag 1 +surnam 1 +thu 1 +myfath 1 +srinivasan 1 +unfortun 1 +round 1 +intoth 1 +squar 1 +hole 1 +custom 1 +first 1 +beconfus 1 +quotabl 1 +quot 1 +ought 1 +everi 1 +least 1 +hear 1 +littl 1 +song 1 +read 1 +good 1 +poem 1 +possibl 1 +speak 1 +reason 1 +word 1 +johann 1 +wolfgang 1 +goeth 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..18d84fe0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,23 @@ +cornel 4 +kenneth 2 +page 2 +engin 2 +depart 2 +home 1 +road 1 +success 1 +alwai 1 +construct 1 +meng 1 +electr 1 +sinc 1 +work 1 +prof 1 +zabih 1 +place 1 +student 1 +came 1 +univers 1 +wisconsin 1 +madison 1 +sunlab 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..6a107c9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,329 @@ +seem 7 +clear 5 +much 5 +sound 5 +start 5 +quit 4 +also 4 +like 4 +us 4 +work 4 +pictur 4 +around 3 +school 3 +well 3 +look 3 +talk 3 +wear 3 +ring 3 +visit 3 +friend 3 +last 3 +page 3 +come 2 +tree 2 +especi 2 +figur 2 +snow 2 +insid 2 +leav 2 +label 2 +sai 2 +small 2 +presum 2 +place 2 +glass 2 +book 2 +vagu 2 +topic 2 +read 2 +probabl 2 +somewhat 2 +hair 2 +link 2 +next 2 +obviou 2 +heha 2 +beaver 2 +finger 2 +left 2 +someon 2 +peopl 2 +time 2 +internet 2 +half 2 +year 2 +summer 2 +busi 2 +better 2 +perri 2 +world 2 +cornel 2 +step 1 +forest 1 +clearinglook 1 +realiz 1 +must 1 +walk 1 +thanyou 1 +plan 1 +wide 1 +varieti 1 +surround 1 +onal 1 +side 1 +theweath 1 +fairli 1 +overcast 1 +somehow 1 +ifit 1 +go 1 +rain 1 +perhap 1 +distanc 1 +larg 1 +mountain 1 +quiteclear 1 +question 1 +hear 1 +bird 1 +chirp 1 +near 1 +cours 1 +theymai 1 +respond 1 +louder 1 +nearbywaterfal 1 +gotta 1 +could 1 +follow 1 +want 1 +apath 1 +direct 1 +path 1 +asign 1 +hillschool 1 +sign 1 +hell 1 +wormhol 1 +connect 1 +nearbyhous 1 +traffic 1 +clearinginsid 1 +coupl 1 +structur 1 +shack 1 +door 1 +fall 1 +offand 1 +complet 1 +modern 1 +hous 1 +withno 1 +stone 1 +front 1 +pile 1 +score 1 +magazin 1 +random 1 +paper 1 +scatter 1 +throughout 1 +theclear 1 +rhyme 1 +reasonto 1 +glanc 1 +sortsof 1 +recent 1 +betteridea 1 +make 1 +person 1 +tick 1 +resum 1 +itseem 1 +corner 1 +importantth 1 +worri 1 +kind 1 +strang 1 +clearingh 1 +smile 1 +hello 1 +oftendescrib 1 +thin 1 +mother 1 +mostdistinct 1 +featur 1 +bright 1 +golden 1 +quitelong 1 +elfin 1 +seen 1 +peoplebefor 1 +warn 1 +paragraph 1 +written 1 +theresoon 1 +alwai 1 +thing 1 +startstel 1 +stori 1 +stop 1 +hum 1 +tune 1 +tell 1 +whynichola 1 +negropont 1 +moron 1 +ifyou 1 +never 1 +heard 1 +polit 1 +late 1 +twentieth 1 +centuri 1 +america 1 +thenh 1 +paus 1 +obscur 1 +theorem 1 +theoret 1 +comput 1 +scienc 1 +rather 1 +listen 1 +hetend 1 +appearanceinstead 1 +mostli 1 +color 1 +purpl 1 +dark 1 +turquois 1 +everyth 1 +either 1 +silk 1 +linen 1 +contrast 1 +nice 1 +gold 1 +imageof 1 +right 1 +point 1 +awai 1 +fromhim 1 +silver 1 +ocean 1 +wave 1 +pattern 1 +pewter 1 +pentacl 1 +neck 1 +andlook 1 +altogeth 1 +hippi 1 +asclass 1 +intellectu 1 +clearingdan 1 +occasion 1 +spent 1 +briani 1 +live 1 +anundergrad 1 +nowadai 1 +pointcast 1 +newsprovid 1 +ancamosoiu 1 +pronounc 1 +schwa 1 +best 1 +backwhen 1 +mani 1 +week 1 +twoand 1 +went 1 +europ 1 +togeth 1 +wegradu 1 +inner 1 +child 1 +shejust 1 +onewav 1 +blame 1 +dread 1 +name 1 +actual 1 +usedto 1 +object 1 +power 1 +daniel 1 +issomeon 1 +gotten 1 +know 1 +severalmonth 1 +think 1 +sometim 1 +novemb 1 +becamemuch 1 +switch 1 +eedepart 1 +wise 1 +multimediastud 1 +dept 1 +commun 1 +reason 1 +inth 1 +own 1 +friendof 1 +finlei 1 +notanymor 1 +instead 1 +brian 1 +steelcas 1 +isth 1 +largest 1 +manufactur 1 +offic 1 +furnitur 1 +dserver 1 +kinda 1 +cheesi 1 +haveth 1 +pyramid 1 +still 1 +neat 1 +thebuild 1 +ius 1 +sing 1 +japan 1 +programcal 1 +hire 1 +teach 1 +english 1 +high 1 +student 1 +ideal 1 +winter 1 +hani 1 +graduatedfrom 1 +june 1 +couldn 1 +happen 1 +neededto 1 +great 1 +even 1 +nick 1 +agood 1 +cuter 1 +thanth 1 +blurri 1 +would 1 +indic 1 +music 1 +maker 1 +dreamer 1 +ofdream 1 +aphex 1 +twindan 1 +brown 1 +snowman 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..079036b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,25 @@ +lookin 2 +home 2 +page 2 +autobiographi 1 +upkeep 1 +lot 1 +inform 1 +ultra 1 +cool 1 +soon 1 +keep 1 +take 1 +long 1 +setuup 1 +doingajaymanishanujmom 1 +daddepart 1 +comput 1 +sciencesearch 1 +netentertain 1 +weeklycricket 1 +rate 1 +ashish 1 +soni 1 +sonia 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..c63be7dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,11 @@ +scott 1 +dawson 1 +padif 1 +us 1 +form 1 +capabl 1 +browser 1 +would 1 +better 1 +scottdawson 1 +shomebas 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..4aa371cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,25 @@ +cornel 2 +comput 2 +paul 1 +stodghil 1 +home 1 +pagepaul 1 +stodghillstodghil 1 +rhode 1 +hall 1 +affili 1 +depart 1 +scienc 1 +atcornel 1 +univers 1 +advanc 1 +research 1 +institut 1 +acri 1 +theori 1 +center 1 +bernoulli 1 +projectinterest 1 +ultim 1 +hockei 1 +scheme 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..e44092d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,13 @@ +stoller 3 +home 3 +former 2 +page 2 +scott 1 +pagescott 1 +move 1 +http 1 +indiana 1 +hyplan 1 +htmllast 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..6b1c870e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,49 @@ +sugata 3 +cornel 3 +work 3 +mukhopadhyai 2 +home 2 +page 2 +system 2 +take 2 +high 2 +perform 2 +phone 2 +welcom 1 +graduat 1 +student 1 +depart 1 +ofcomput 1 +scienc 1 +univers 1 +multimedia 1 +prof 1 +brian 1 +smith 1 +marri 1 +wonder 1 +person 1 +earth 1 +ritu 1 +spring 1 +comput 1 +compil 1 +design 1 +architectur 1 +also 1 +advanc 1 +option 1 +price 1 +theori 1 +czar 1 +progress 1 +seminar 1 +previou 1 +semest 1 +contact 1 +mehom 1 +mailsugata 1 +eduaddress 1 +hichori 1 +estat 1 +owego 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..eb29c04a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,28 @@ +sukhpal 3 +paul 2 +sanghera 2 +univers 2 +cornel 2 +home 1 +page 1 +physic 1 +carleton 1 +present 1 +student 1 +comput 1 +scienc 1 +ithaca 1 +background 1 +project 1 +philosophi 1 +life 1 +resum 1 +rout 1 +clock 1 +tick 1 +need 1 +java 1 +capabl 1 +browser 1 +view 1 +anim 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..bfb8364e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,132 @@ +gater 6 +episod 5 +lord 4 +strip 3 +employe 3 +first 3 +microsoft 3 +parti 3 +part 3 +bilth 2 +galact 2 +weekli 2 +drew 2 +read 2 +work 2 +empir 1 +empirewritten 1 +illustr 1 +sumedh 1 +kanetkaremail 1 +kanetkar 1 +cornel 1 +eduthi 1 +seri 1 +comic 1 +intern 1 +atmicrosoft 1 +summer 1 +post 1 +theintern 1 +social 1 +alia 1 +regularli 1 +peopl 1 +thesumm 1 +progress 1 +notic 1 +artwork 1 +begun 1 +leak 1 +theful 1 +time 1 +well 1 +whether 1 +high 1 +never 1 +found 1 +stripi 1 +within 1 +week 1 +arriv 1 +redmond 1 +tri 1 +persuad 1 +themicrosoft 1 +newslett 1 +print 1 +perceiv 1 +problemand 1 +declin 1 +didn 1 +want 1 +portrai 1 +evilempir 1 +understand 1 +viewpoint 1 +told 1 +comicstrip 1 +attempt 1 +show 1 +compani 1 +view 1 +mani 1 +theoutsid 1 +world 1 +person 1 +bitter 1 +feel 1 +toward 1 +eitherbil 1 +gate 1 +corpor 1 +heck 1 +realli 1 +enjoi 1 +summersof 1 +strongli 1 +recommend 1 +internship 1 +program 1 +anyoneinterest 1 +industri 1 +make 1 +orient 1 +thateveri 1 +suffer 1 +long 1 +session 1 +theyshow 1 +video 1 +fill 1 +kind 1 +trivia 1 +also 1 +makey 1 +sign 1 +disclosur 1 +agreeement 1 +would 1 +fit 1 +theymad 1 +everyon 1 +stand 1 +place 1 +hand 1 +theirheart 1 +pledg 1 +alleig 1 +comput 1 +everydesk 1 +everi 1 +home 1 +run 1 +softwar 1 +anywai 1 +space 1 +roosterepisod 1 +rebel 1 +threatepisod 1 +flame 1 +imperi 1 +insigniaepisod 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..2b6088ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,167 @@ +document 13 +structur 5 +electron 4 +logic 4 +summer 3 +cornel 3 +inform 3 +brows 3 +piec 3 +issu 3 +flexibl 3 +comput 3 +proceed 3 +kristen 2 +student 2 +univers 2 +research 2 +interest 2 +work 2 +goal 2 +support 2 +index 2 +primari 2 +technic 2 +report 2 +divid 2 +type 2 +retriev 2 +number 2 +paper 2 +us 2 +scienc 2 +autom 2 +upson 1 +hall 1 +captur 1 +accessresearch 1 +group 1 +analysi 1 +mylong 1 +term 1 +provid 1 +forsophist 1 +manipulationtool 1 +link 1 +discov 1 +logicalstructur 1 +arbitrari 1 +take 1 +documentrepresent 1 +input 1 +return 1 +hierarchyof 1 +output 1 +exampl 1 +given 1 +scan 1 +postscriptvers 1 +would 1 +like 1 +tobe 1 +abl 1 +section 1 +paragraph 1 +similarli 1 +busi 1 +letter 1 +address 1 +head 1 +bodi 1 +close 1 +identifi 1 +problem 1 +compon 1 +segment 1 +andclassif 1 +categor 1 +also 1 +rais 1 +question 1 +evalu 1 +previou 1 +differ 1 +descript 1 +correct 1 +hierarchi 1 +theoret 1 +limit 1 +task 1 +relev 1 +bruce 1 +croft 1 +stop 1 +novemb 1 +magazin 1 +interfac 1 +effici 1 +determininglog 1 +enabl 1 +hierarch 1 +soin 1 +gener 1 +system 1 +handl 1 +ofmultipl 1 +textual 1 +cue 1 +browsingco 1 +author 1 +daniela 1 +digit 1 +librari 1 +current 1 +nabil 1 +adam 1 +bharat 1 +bhargava 1 +yelena 1 +yesha 1 +editor 1 +chapter 1 +lectur 1 +note 1 +seri 1 +springer 1 +verlag 1 +version 1 +geometr 1 +algorithm 1 +experi 1 +mathemat 1 +model 1 +forthcom 1 +white 1 +space 1 +workshop 1 +principl 1 +process 1 +seeheim 1 +podp 1 +toward 1 +taxonomi 1 +structureselectron 1 +publish 1 +superhighwai 1 +dartmouth 1 +institut 1 +advanc 1 +graduat 1 +studi 1 +boston 1 +donald 1 +johnson 1 +memori 1 +dag 1 +scholaraward 1 +best 1 +recipi 1 +near 1 +wordless 1 +classif 1 +intern 1 +confer 1 +analysisand 1 +recognit 1 +montral 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..bb703530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,55 @@ +system 5 +project 4 +oper 3 +comput 3 +masafumi 2 +java 2 +research 2 +engin 2 +spring 2 +introduct 2 +graphic 2 +databas 2 +suzukither 1 +would 1 +applet 1 +browser 1 +suppot 1 +suzukisuzuki 1 +cornel 1 +educlassesfal 1 +case 1 +studi 1 +optim 1 +probabl 1 +statist 1 +design 1 +analysi 1 +simul 1 +stochast 1 +model 1 +summer 1 +data 1 +structur 1 +fall 1 +softwar 1 +technolog 1 +techniqu 1 +program 1 +multimedia 1 +report 1 +network 1 +telecommun 1 +polici 1 +thrive 1 +inform 1 +revolut 1 +sector 1 +site 1 +manag 1 +independ 1 +polygon 1 +displai 1 +us 1 +prototyp 1 +resum 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..2363f7cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,28 @@ +swartz 5 +jonathan 3 +cornel 3 +home 1 +page 1 +edui 1 +student 1 +departmentof 1 +comput 1 +scienc 1 +univers 1 +spend 1 +time 1 +heredevelopingrivl 1 +languag 1 +multimedia 1 +process 1 +myaddress 1 +phone 1 +number 1 +littl 1 +humor 1 +brighten 1 +dayjon 1 +movi 1 +connectioncool 1 +siteslast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..fbd07ddb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,23 @@ +sunil 2 +home 2 +page 2 +class 2 +srivastava 1 +pagewelcom 1 +srivastavamast 1 +engin 1 +studentcomput 1 +scienc 1 +departmentcornel 1 +univers 1 +academ 1 +project 1 +person 1 +inform 1 +us 1 +linkscom 1 +question 1 +send 1 +mail 1 +sxsriva 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..9637b2dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,218 @@ +year 5 +model 5 +quit 3 +cours 3 +program 3 +huang 2 +steven 2 +seven 2 +home 2 +thu 2 +love 2 +read 2 +life 2 +comput 2 +univers 2 +like 2 +career 2 +found 2 +ofcomput 2 +want 2 +mani 2 +network 2 +build 2 +watch 2 +also 2 +project 2 +involv 2 +huangszu 1 +defend 1 +truth 1 +champion 1 +justic 1 +around 1 +nice 1 +nevermind 1 +long 1 +exactli 1 +iarriv 1 +second 1 +proud 1 +parent 1 +soundslik 1 +mobi 1 +dick 1 +assur 1 +nointent 1 +find 1 +ship 1 +hunt 1 +whale 1 +digress 1 +brought 1 +taiwan 1 +tender 1 +wholefamili 1 +migrat 1 +south 1 +tropic 1 +island 1 +philippin 1 +made 1 +live 1 +fifteen 1 +pictur 1 +aroundsix 1 +becam 1 +fluentli 1 +bilingu 1 +thepoetri 1 +tang 1 +dynasti 1 +arabian 1 +night 1 +natur 1 +children 1 +version 1 +host 1 +stori 1 +somewhat 1 +fulfil 1 +name 1 +mean 1 +literatur 1 +class 1 +grade 1 +doveright 1 +marvel 1 +four 1 +later 1 +ienter 1 +philippineswith 1 +major 1 +talent 1 +draw 1 +scienc 1 +unabashedli 1 +knew 1 +good 1 +alsoin 1 +colleg 1 +whirlwind 1 +happi 1 +peac 1 +three 1 +run 1 +awoman 1 +becom 1 +import 1 +part 1 +effortlessli 1 +defeat 1 +hobbi 1 +eek 1 +ideal 1 +higher 1 +pai 1 +blunt 1 +ienrol 1 +cornel 1 +graduat 1 +reward 1 +almost 1 +everyth 1 +ever 1 +andwork 1 +lucki 1 +septemb 1 +welcom 1 +page 1 +segreg 1 +everydaygeek 1 +think 1 +interest 1 +asid 1 +fromactu 1 +write 1 +happili 1 +myspar 1 +time 1 +anyth 1 +calvin 1 +hobb 1 +unix 1 +internet 1 +relai 1 +chat 1 +gener 1 +linuxnet 1 +lego 1 +thing 1 +practic 1 +object 1 +orient 1 +design 1 +plastic 1 +weapon 1 +suspens 1 +thriller 1 +film 1 +listen 1 +music 1 +sting 1 +other 1 +sesam 1 +street 1 +discoveri 1 +channel 1 +railroad 1 +rich 1 +enough 1 +field 1 +rather 1 +broad 1 +spectrum 1 +ofinterest 1 +though 1 +studi 1 +concentr 1 +area 1 +graphic 1 +wish 1 +offend 1 +bysom 1 +blatant 1 +self 1 +promot 1 +check 1 +myresum 1 +highlyinterest 1 +linux 1 +freeli 1 +avail 1 +oper 1 +system 1 +intel 1 +compatiblecomput 1 +master 1 +engin 1 +blobbi 1 +metaballsund 1 +supervis 1 +bruce 1 +land 1 +current 1 +anoth 1 +survei 1 +techniquesin 1 +human 1 +face 1 +resolut 1 +independ 1 +andport 1 +audio 1 +effect 1 +editor 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..b00230b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,6 @@ +henzing 1 +hytechhytech 1 +hybrid 1 +technolog 1 +toolw 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..402d423a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,55 @@ +time 6 +analysi 5 +formal 4 +methodolog 4 +real 3 +system 3 +cornel 2 +concurr 2 +embed 2 +automata 2 +hybrid 2 +henzingerthoma 1 +henzing 1 +movedassist 1 +professorcomput 1 +scienc 1 +departmentcornel 1 +universityithaca 1 +email 1 +eduphon 1 +researchform 1 +support 1 +develop 1 +relat 1 +researchat 1 +cornelland 1 +worldwid 1 +resumepublicationsreact 1 +modul 1 +systemsr 1 +logic 1 +transit 1 +systemsclock 1 +systemshybrid 1 +systemsbibliographi 1 +bibtex 1 +list 1 +publicationstoolshytech 1 +symbol 1 +model 1 +checker 1 +linear 1 +systemscoursesc 1 +fall 1 +advanc 1 +program 1 +languagesconferenceshybrid 1 +verif 1 +control 1 +systemscav 1 +comput 1 +aid 1 +verificationlast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..e6bda7f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,38 @@ +takako 3 +hickei 2 +interest 2 +homepag 1 +email 1 +cornel 1 +eduoffic 1 +upson 1 +hallphon 1 +student 1 +depart 1 +comput 1 +scienc 1 +atcornel 1 +univers 1 +advis 1 +byrobbert 1 +reness 1 +andfr 1 +schneider 1 +research 1 +distribut 1 +system 1 +program 1 +environ 1 +resourc 1 +manag 1 +horu 1 +project 1 +previou 1 +life 1 +social 1 +psycholog 1 +backcountri 1 +hockei 1 +quot 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..6f851713 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,23 @@ +comput 2 +cornel 2 +program 2 +tim_teitelbaum 1 +teitelbaumassoci 1 +professor 1 +depart 1 +scienc 1 +univers 1 +eduresearch 1 +interest 1 +increment 1 +transform 1 +environ 1 +languag 1 +base 1 +editor 1 +compil 1 +attribut 1 +grammar 1 +adavita 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..3bdd1bde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,207 @@ +eicken 14 +parallel 13 +comput 10 +culler 10 +architectur 9 +schauser 8 +activ 6 +messag 6 +program 6 +goldstein 5 +proceed 5 +commun 4 +fall 4 +symp 4 +compil 4 +proc 4 +thorsten 3 +user 3 +level 3 +network 3 +high 3 +split 3 +system 3 +talk 3 +report 3 +annual 3 +paper 3 +mechan 3 +machin 3 +fine 3 +grain 3 +languag 3 +cluster 2 +workstat 2 +us 2 +interconnect 2 +sever 2 +port 2 +departement 2 +entri 2 +forum 2 +slide 2 +basu 2 +buch 2 +appear 2 +june 2 +version 2 +gold 2 +coast 2 +australia 2 +novemb 2 +berkelei 2 +abstract 2 +diego 2 +conf 2 +control 2 +multithread 2 +support 2 +eickenassist 1 +professor 1 +upson 1 +hallphon 1 +email 1 +cornel 1 +eduprojectsth 1 +architectureprovid 1 +interfacefor 1 +offer 1 +latencyand 1 +bandwidth 1 +speed 1 +lan 1 +currentimplement 1 +project 1 +platform 1 +includingth 1 +extend 1 +model 1 +tonon 1 +spmd 1 +simpl 1 +extensionto 1 +newplatform 1 +includ 1 +share 1 +memori 1 +multprocessor 1 +run 1 +coursesc 1 +introduct 1 +digit 1 +computerorgan 1 +perform 1 +spring 1 +frontier 1 +guest 1 +lectur 1 +maynd 1 +department 1 +person 1 +pagestv 1 +pond 1 +real 1 +water 1 +fish 1 +plant 1 +tire 1 +firewal 1 +macpppwhich 1 +gener 1 +time 1 +password 1 +automat 1 +without 1 +everhav 1 +think 1 +well 1 +month 1 +passwordssuddenli 1 +installationinstruct 1 +select 1 +publicationsu 1 +interfac 1 +distributedcomput 1 +anindya 1 +vineet 1 +werner 1 +vogel 1 +latenc 1 +atmnetwork 1 +avula 1 +present 1 +palo 1 +alto 1 +abridg 1 +ieee 1 +micro 1 +magazin 1 +integr 1 +andcomput 1 +effici 1 +communicationarchitectur 1 +multiprocessor 1 +thesi 1 +univers 1 +california 1 +publish 1 +link 1 +lead 1 +postscript 1 +dusseau 1 +krishnamurthi 1 +lumetta 1 +yelick 1 +supercomput 1 +controlledthread 1 +journal 1 +distribut 1 +special 1 +issu 1 +dataflow 1 +evalu 1 +spertu 1 +dalli 1 +logp 1 +toward 1 +realist 1 +modelof 1 +karp 1 +patterson 1 +sahai 1 +santo 1 +subramonian 1 +fourth 1 +sigplan 1 +principl 1 +practic 1 +fundament 1 +limit 1 +dataflowmultiprocess 1 +ifip 1 +work 1 +techniqu 1 +medium 1 +orlando 1 +forintegr 1 +symposium 1 +forleni 1 +confer 1 +function 1 +cambridg 1 +august 1 +minimalhardwar 1 +thread 1 +wawrzynek 1 +oper 1 +santa 1 +clara 1 +april 1 +analysi 1 +architecturesfor 1 +saavedra 1 +barrera 1 +algorithm 1 +crete 1 +greec 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..0c8cbfc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,43 @@ +erlingsson 3 +lfar 2 +pagelfar 1 +specificationi 1 +student 1 +incomput 1 +scienc 1 +cornel 1 +univers 1 +apart 1 +enjoi 1 +somewhat 1 +incongruousiceland 1 +link 1 +inform 1 +implementationbackgroundwher 1 +come 1 +current 1 +activitieswhat 1 +moment 1 +schedulewher 1 +time 1 +researchwhat 1 +real 1 +work 1 +done 1 +interestswhat 1 +actual 1 +like 1 +acquaintancesthos 1 +know 1 +contact 1 +infohow 1 +touch 1 +pleas 1 +note 1 +page 1 +often 1 +date 1 +gener 1 +assum 1 +disclaim 1 +appli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..282f5918 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,49 @@ +gener 5 +mesh 4 +softwar 3 +packag 3 +element 3 +releas 3 +page 3 +vavasi 3 +finit 2 +geometr 2 +home 2 +univers 2 +cornel 2 +project 1 +relat 1 +threedimens 1 +includ 1 +model 1 +themesh 1 +solver 1 +free 1 +softwaredownload 1 +run 1 +unix 1 +window 1 +andqmg 1 +novemb 1 +us 1 +websit 1 +robert 1 +schneider 1 +mcphedran 1 +offinit 1 +resourc 1 +minnesota 1 +geometri 1 +center 1 +list 1 +ofsoftwar 1 +computationalgeometri 1 +jonathan 1 +shewchuk 1 +triangl 1 +back 1 +stephen 1 +comput 1 +scienc 1 +depart 1 +ithaca 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..39c24169 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,147 @@ +vavasi 9 +mesh 5 +code 4 +numer 4 +click 4 +gener 4 +method 3 +boundari 3 +algorithm 3 +stephen 2 +cornel 2 +email 2 +phone 2 +argonn 2 +interest 2 +analysi 2 +problem 2 +recent 2 +avail 2 +line 2 +complet 2 +mitchel 2 +ratio 2 +triangul 2 +softwar 2 +packag 2 +sourc 2 +level 2 +anonym 2 +matlab 2 +well 2 +page 2 +associ 1 +professor 1 +depart 1 +comput 1 +scienc 1 +rhode 1 +hall 1 +univers 1 +ithaca 1 +period 1 +onsabbat 1 +divis 1 +bldg 1 +nation 1 +laboratori 1 +cass 1 +note 1 +chang 1 +area 1 +effect 1 +research 1 +aren 1 +tsure 1 +pleas 1 +essaybi 1 +colleagu 1 +trefethen 1 +specif 1 +optim 1 +complex 1 +issuesnumer 1 +valu 1 +problemsgeometr 1 +aris 1 +scientif 1 +computingspars 1 +matrix 1 +computationsi 1 +manuscript 1 +primal 1 +dual 1 +acceler 1 +interiorpoint 1 +whose 1 +run 1 +time 1 +depend 1 +hough 1 +orthogon 1 +decompositionfor 1 +weight 1 +least 1 +squar 1 +aspect 1 +bound 1 +gridcut 1 +hyperplan 1 +driscol 1 +conform 1 +map 1 +us 1 +cross 1 +delaunai 1 +packagei 1 +project 1 +forth 1 +finit 1 +element 1 +three 1 +dimens 1 +call 1 +construct 1 +polyhedr 1 +geometr 1 +object 1 +verycompl 1 +topolog 1 +hole 1 +intern 1 +andautomat 1 +creat 1 +unstructuredtetrahedr 1 +base 1 +work 1 +scott 1 +also 1 +solv 1 +ellipt 1 +boundaryvalu 1 +grad 1 +domain 1 +iswritten 1 +distributedfor 1 +free 1 +distributionbegan 1 +releas 1 +novemb 1 +featur 1 +manyimprov 1 +includ 1 +faster 1 +vrml 1 +graphic 1 +much 1 +cleaner 1 +compat 1 +microsoft 1 +window 1 +unix 1 +compatibilitywith 1 +pleasese 1 +document 1 +annual 1 +reportback 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..befeca06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,10 @@ +arun 1 +verma 1 +homepag 1 +need 1 +browser 1 +support 1 +frame 1 +netscap 1 +higher 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..eb3d6123 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,10 @@ +vinc 1 +browser 1 +us 1 +suck 1 +download 1 +date 1 +netscap 1 +read 1 +page 1 +thank 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..198364fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,15 @@ +vitrano 4 +pagec 2 +home 1 +pagehei 1 +start 1 +thing 1 +give 1 +break 1 +internet 1 +engin 1 +advanc 1 +databas 1 +multimedia 1 +pageer 1 +cornel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..23804479 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,101 @@ +seed 5 +page 4 +time 4 +browser 4 +home 3 +us 3 +java 3 +kolla 2 +specif 2 +scrollit 2 +reach 2 +view 2 +wonder 2 +dont 2 +know 2 +anim 2 +ground 1 +copyright 1 +right 1 +reserv 1 +held 1 +respons 1 +unwant 1 +effect 1 +usag 1 +applet 1 +deriv 1 +warrante 1 +usabl 1 +applic 1 +given 1 +impli 1 +function 1 +vivek 1 +million 1 +zillion 1 +call 1 +send 1 +mail 1 +cornel 1 +expect 1 +back 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +visitor 1 +number 1 +happi 1 +contact 1 +info 1 +transmit 1 +thoughtsfriend 1 +foeslinksa 1 +small 1 +collect 1 +relev 1 +life 1 +maintain 1 +larg 1 +list 1 +favorit 1 +link 1 +think 1 +wast 1 +search 1 +someth 1 +might 1 +tryalta 1 +vista 1 +yahoo 1 +theinktomiresumein 1 +htmlin 1 +postscriptin 1 +word 1 +perfectin 1 +asciith 1 +current 1 +good 1 +clock 1 +wanna 1 +around 1 +world 1 +need 1 +capabl 1 +site 1 +construct 1 +mani 1 +imag 1 +heavi 1 +file 1 +like 1 +note 1 +promis 1 +made 1 +regard 1 +qualiti 1 +visit 1 +fulli 1 +support 1 +technolog 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..7144b548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,81 @@ +work 4 +lawyer 4 +professor 4 +difficulti 3 +like 2 +said 2 +friend 2 +find 2 +everi 2 +number 2 +vlad 1 +home 1 +pagevladimir 1 +kotlyarvladimir 1 +cornel 1 +look 1 +fall 1 +david 1 +wereteach 1 +might 1 +guess 1 +graduat 1 +student 1 +depart 1 +comput 1 +scienc 1 +cornellunivers 1 +prof 1 +keshav 1 +pingali 1 +research 1 +interest 1 +compil 1 +high 1 +perform 1 +architectur 1 +particular 1 +parallel 1 +spars 1 +matrix 1 +code 1 +part 1 +bernoulli 1 +project 1 +member 1 +group 1 +paul 1 +stodghil 1 +andindu 1 +kodukulapubl 1 +henri 1 +kissing 1 +legal 1 +profess 1 +remind 1 +comment 1 +abritish 1 +judg 1 +differ 1 +sveri 1 +simpl 1 +lord 1 +den 1 +function 1 +asolut 1 +present 1 +wherea 1 +functionof 1 +solut 1 +todayth 1 +seem 1 +outpac 1 +ofsolut 1 +either 1 +hardenough 1 +mani 1 +govern 1 +support 1 +privaci 1 +strong 1 +encrypt 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..bffc58ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,17 @@ +cornel 2 +address 2 +ithaca 2 +vijai 1 +menonvijai 1 +menon 1 +graduat 1 +student 1 +offic 1 +home 1 +rhode 1 +hall 1 +mapl 1 +univers 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..69ec737e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,49 @@ +depart 3 +chen 2 +comput 2 +cornel 2 +univers 2 +interest 2 +spare 2 +time 2 +home 1 +pagewei 1 +upson 1 +hall 1 +sciencecornel 1 +universityithaca 1 +weichen 1 +current 1 +third 1 +year 1 +student 1 +computersci 1 +receiv 1 +bachelorand 1 +master 1 +degre 1 +scienc 1 +tsinghua 1 +beij 1 +chinami 1 +academ 1 +distributedsystem 1 +fault 1 +toler 1 +algorithm 1 +work 1 +professor 1 +toueg 1 +failur 1 +detect 1 +group 1 +membership 1 +inpartition 1 +network 1 +system 1 +soccer 1 +resum 1 +bookmark 1 +last 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..9672c963 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,21 @@ +page 2 +weitsang 1 +homepag 1 +lwhere 1 +fromwhat 1 +watchwhat 1 +movi 1 +likec 1 +wrotepictur 1 +drawa 1 +window 1 +motifcomput 1 +theoryhom 1 +vimi 1 +tsearch 1 +webcoolest 1 +sitessharewar 1 +archivem 1 +newspap 1 +onlineunivers 1 +site 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..e4b8c7eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,82 @@ +comput 6 +scienc 3 +hung 2 +glavin 2 +graduat 2 +univers 2 +cornel 2 +tenni 2 +favorit 2 +system 2 +multimedia 2 +project 2 +address 1 +mapl 1 +avenu 1 +ithaca 1 +telephon 1 +photo 1 +academ 1 +background 1 +nation 1 +taiwan 1 +plan 1 +habit 1 +sport 1 +basketbal 1 +billiard 1 +tabl 1 +bowl 1 +swim 1 +volleybal 1 +other 1 +sing 1 +drive 1 +danc 1 +except 1 +studi 1 +team 1 +orlando 1 +magic 1 +atlanta 1 +brave 1 +player 1 +anferne 1 +hardawai 1 +technic 1 +skill 1 +understand 1 +distribut 1 +oper 1 +graphic 1 +network 1 +databas 1 +vision 1 +financi 1 +calcul 1 +extens 1 +window 1 +java 1 +program 1 +final 1 +paper 1 +warp 1 +morph 1 +rivl 1 +partial 1 +result 1 +meng 1 +webpaint 1 +job 1 +interest 1 +market 1 +relat 1 +field 1 +softwar 1 +develop 1 +resum 1 +page 1 +still 1 +construct 1 +email 1 +whkao 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..92a1516e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,150 @@ +comput 6 +research 4 +system 3 +florida 3 +school 3 +optim 3 +parallel 3 +mpeg 3 +encod 3 +concerto 3 +william 2 +cornel 2 +univers 2 +project 2 +engin 2 +cornellopoli 2 +network 2 +databas 2 +tenni 2 +south 2 +spring 2 +collect 2 +piano 2 +probabl 2 +classesc 2 +softwar 2 +distribut 2 +visitor 1 +jersei 1 +exit 1 +minut 1 +awai 1 +princeton 1 +master 1 +student 1 +scienceat 1 +degre 1 +engineeringand 1 +mathemat 1 +sciencefrom 1 +carneig 1 +mellon 1 +didresearch 1 +design 1 +center 1 +robot 1 +institu 1 +spent 1 +year 1 +write 1 +oper 1 +xsro 1 +hpux 1 +motorola 1 +atft 1 +lauderdal 1 +besid 1 +sleep 1 +work 1 +like 1 +sector 1 +analysi 1 +partner 1 +compet 1 +usta 1 +tournment 1 +could 1 +never 1 +somehow 1 +parti 1 +enjoi 1 +weekli 1 +match 1 +mani 1 +beauti 1 +place 1 +plai 1 +faviorit 1 +on 1 +boca 1 +ratonkei 1 +west 1 +get 1 +coral 1 +live 1 +beethoven 1 +chopin 1 +gershwin 1 +liszt 1 +mendelssohn 1 +mozart 1 +rachmaninoff 1 +ravel 1 +tchaikovski 1 +also 1 +violinconcerto 1 +guess 1 +type 1 +even 1 +wrote 1 +graduat 1 +applic 1 +essai 1 +base 1 +reason 1 +reject 1 +fall 1 +technolog 1 +techniquec 1 +formal 1 +methodsc 1 +multimedia 1 +systemsc 1 +scienc 1 +colloquiumc 1 +cool 1 +tool 1 +seminar 1 +purifi 1 +quantifi 1 +wart 1 +present 1 +practic 1 +computingc 1 +practicum 1 +high 1 +perform 1 +computerc 1 +thrive 1 +inform 1 +revolut 1 +sectorcool 1 +links_leap 1 +copi 1 +frogski 1 +serverident 1 +crisi 1 +testweath 1 +undergroundinktomi 1 +search 1 +enginequest 1 +week 1 +archiveslast 1 +updat 1 +campu 1 +address 1 +mapl 1 +ecithaca 1 +york 1 +wwlee 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..0e951414 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,54 @@ +comput 5 +xichun 3 +zhejiang 3 +welcom 2 +jennif 2 +home 2 +depart 2 +current 2 +master 2 +scienc 2 +univers 2 +shade 2 +cours 2 +network 2 +page 1 +upson 1 +hall 1 +sciencecornel 1 +universityithaca 1 +offic 1 +cornel 1 +edui 1 +engin 1 +student 1 +atcornel 1 +receiv 1 +bachelor 1 +degre 1 +hangzhou 1 +china 1 +site 1 +javaworldsunhigh 1 +school 1 +alumni 1 +alumnimeng 1 +project 1 +phong 1 +gouraud 1 +spring 1 +graphic 1 +high 1 +capac 1 +inform 1 +databas 1 +manag 1 +taken 1 +fall 1 +multimedia 1 +systemsc 1 +softwar 1 +engineeringc 1 +oper 1 +systeme 1 +communicationby 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..cdbbcf17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,2 @@ +topic 1 +interest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..0c88682f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,80 @@ +program 12 +increment 10 +transform 5 +comput 5 +base 4 +cachet 3 +effici 3 +improv 3 +analysi 3 +teitelbaum 3 +proceed 3 +systemat 2 +attribut 2 +interact 2 +system 2 +languag 2 +deriv 2 +sigplan 2 +symposium 2 +principl 2 +anni 2 +relat 1 +project 1 +computationderiv 1 +programsa 1 +gener 1 +approach 1 +themeprogram 1 +usessystemat 1 +techniqu 1 +deriveincrement 1 +written 1 +function 1 +select 1 +public 1 +scienc 1 +februari 1 +cach 1 +intermedi 1 +result 1 +partial 1 +evalu 1 +semant 1 +manipul 1 +page 1 +jolla 1 +california 1 +june 1 +stoller 1 +discov 1 +auxiliari 1 +inform 1 +annual 1 +sigact 1 +petersburg 1 +beach 1 +florida 1 +januari 1 +knowledg 1 +softwar 1 +engin 1 +confer 1 +boston 1 +massachusett 1 +novemb 1 +ieee 1 +societi 1 +press 1 +strength 1 +reduct 1 +juli 1 +peoplei 1 +liutim 1 +teitelbaumkeyword 1 +optim 1 +cacheti 1 +yanhong 1 +cornel 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..8f4968cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,191 @@ +program 26 +comput 24 +increment 22 +deriv 11 +scienc 11 +univers 9 +report 9 +teitelbaum 8 +cornel 8 +confer 8 +systemat 7 +base 7 +york 7 +technic 7 +proceed 7 +system 7 +intern 7 +transform 6 +softwar 6 +beij 6 +juli 6 +depart 6 +research 5 +improv 5 +effici 5 +ithaca 5 +symposium 5 +cach 5 +intermedi 5 +result 5 +young 5 +china 5 +august 5 +techniqu 4 +select 4 +page 4 +scientist 4 +webster 4 +yanhong 3 +home 3 +gener 3 +approach 3 +languag 3 +algorithm 3 +develop 3 +public 3 +semant 3 +januari 3 +sigplan 3 +principl 3 +cachet 3 +knowledg 3 +novemb 3 +press 3 +evalu 3 +california 3 +zhang 3 +wang 3 +combin 3 +center 3 +anni 2 +associ 2 +analysi 2 +optim 2 +octob 2 +discov 2 +auxiliari 2 +inform 2 +annual 2 +sigact 2 +petersburg 2 +beach 2 +florida 2 +interact 2 +attribut 2 +engin 2 +boston 2 +massachusett 2 +partial 2 +manipul 2 +jolla 2 +june 2 +reason 2 +qualit 2 +quantit 2 +multi 2 +factor 2 +problem 2 +march 2 +xerox 2 +institut 2 +dagstuhl 2 +expert 2 +offic 2 +indiana 2 +pageyanhong 1 +post 1 +doctor 1 +work 1 +professor 1 +interest 1 +ofcomput 1 +forincrement 1 +parallel 1 +concurr 1 +applic 1 +compil 1 +interactivesystem 1 +design 1 +systemorgan 1 +mainten 1 +talksph 1 +dissert 1 +basedsystemat 1 +also 1 +appear 1 +abstractjourn 1 +februari 1 +refere 1 +stoller 1 +ieee 1 +societi 1 +peke 1 +tsinghua 1 +formal 1 +uncertainti 1 +model 1 +partit 1 +descript 1 +fuzzi 1 +world 1 +congress 1 +seattl 1 +washington 1 +inexact 1 +profession 1 +publish 1 +hous 1 +survei 1 +map 1 +septemb 1 +revis 1 +wakayama 1 +line 1 +break 1 +talk 1 +kestrel 1 +palo 1 +alto 1 +seminar 1 +dynam 1 +schloss 1 +germani 1 +automat 1 +laboratori 1 +document 1 +us 1 +obtain 1 +present 1 +oggeb 1 +basin 1 +implement 1 +test 1 +usag 1 +manual 1 +summari 1 +explor 1 +ri 1 +tshinghua 1 +author 1 +song 1 +huang 1 +current 1 +project 1 +compos 1 +build 1 +compon 1 +upson 1 +hallcornel 1 +universityithaca 1 +last 1 +updat 1 +move 1 +tocomput 1 +department 1 +lindlei 1 +hallindiana 1 +universitybloomington 1 +eduhttp 1 +peopl 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..f94e96f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,24 @@ +cornel 4 +cheng 3 +huang 2 +depart 2 +comput 2 +scienc 2 +univers 2 +ychuang 2 +home 1 +page 1 +huangyi 1 +upson 1 +hall 1 +ithaca 1 +email 1 +edui 1 +graduat 1 +student 1 +favorit 1 +link 1 +onlin 1 +documentscoursesprojectaccess 1 +byvisitorslast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..f6ba6c77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,120 @@ +school 7 +like 5 +comput 4 +korea 4 +high 4 +music 4 +cornel 3 +came 3 +java 3 +ilbo 3 +april 2 +studi 2 +scienc 2 +master 2 +degre 2 +engin 2 +america 2 +myoung 2 +junior 2 +virginia 2 +husband 2 +chang 2 +work 2 +would 2 +plai 2 +keyboard 2 +piano 2 +korean 2 +us 2 +search 2 +resum 2 +email 2 +chung 1 +home 1 +pagewelcom 1 +everybodi 1 +name 1 +chungyou 1 +thvisitor 1 +sinc 1 +univers 1 +pleas 1 +check 1 +sciencecornel 1 +universitywher 1 +origin 1 +seoul 1 +graduat 1 +went 1 +kindergarten 1 +elementari 1 +sang 1 +kang 1 +sung 1 +women 1 +georg 1 +mason 1 +universityin 1 +happiest 1 +thing 1 +life 1 +marri 1 +wonder 1 +forsaic 1 +programm 1 +pictur 1 +beauti 1 +moment 1 +avail 1 +browser 1 +click 1 +free 1 +actual 1 +better 1 +listen 1 +kind 1 +love 1 +shin 1 +seung 1 +hoon 1 +moon 1 +classic 1 +forth 1 +brows 1 +world 1 +wide 1 +link 1 +interest 1 +want 1 +onlin 1 +newpap 1 +hangook 1 +chosun 1 +joongang 1 +hire 1 +word 1 +perfect 1 +version 1 +meng 1 +project 1 +still 1 +titl 1 +imag 1 +process 1 +appletyoosun 1 +person 1 +infom 1 +triphamm 1 +sbithaca 1 +phone 1 +emerg 1 +ychung 1 +forward 1 +yooschung 1 +automat 1 +page 1 +construct 1 +last 1 +modifi 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..839833a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,221 @@ +page 5 +great 5 +good 4 +cornel 3 +time 3 +live 3 +plai 3 +much 3 +know 3 +take 3 +movi 3 +public 3 +yaron 2 +minski 2 +home 2 +graduat 2 +student 2 +ithaca 2 +place 2 +syracus 2 +toler 2 +comput 2 +flapdragon 2 +longer 2 +game 2 +better 2 +make 2 +easi 2 +medic 2 +school 2 +love 2 +everi 2 +rate 2 +tri 2 +yellow 2 +linux 2 +block 2 +yminski 1 +edudepart 1 +computersci 1 +upson 1 +hall 1 +univers 1 +phone 1 +comstock 1 +current 1 +focus 1 +onfault 1 +distribut 1 +particular 1 +work 1 +thetacoma 1 +project 1 +attempt 1 +build 1 +oper 1 +system 1 +support 1 +forfault 1 +agent 1 +base 1 +year 1 +livether 1 +still 1 +veggi 1 +coop 1 +crash 1 +often 1 +nowadai 1 +slightli 1 +outof 1 +date 1 +webpag 1 +howev 1 +notic 1 +anopen 1 +start 1 +need 1 +highli 1 +recommendit 1 +though 1 +ancientchines 1 +extremlysimpl 1 +rule 1 +complic 1 +satisfi 1 +strategi 1 +like 1 +learn 1 +intro 1 +also 1 +want 1 +internet 1 +nota 1 +real 1 +person 1 +front 1 +thannoth 1 +unix 1 +machin 1 +look 1 +cgoban 1 +nicest 1 +goboard 1 +program 1 +seen 1 +trivial 1 +thenet 1 +newli 1 +marri 1 +wife 1 +lisa 1 +go 1 +suni 1 +health 1 +scienc 1 +center 1 +uniqu 1 +qualifi 1 +within 1 +hour 1 +twenti 1 +minutesof 1 +favoritepoem 1 +lafiglia 1 +piang 1 +theidea 1 +order 1 +west 1 +advicefor 1 +resumesom 1 +interestinglink 1 +critic 1 +impress 1 +exampl 1 +fairli 1 +simpl 1 +technolog 1 +us 1 +effect 1 +site 1 +engin 1 +compar 1 +other 1 +come 1 +recommend 1 +found 1 +almost 1 +alarmingli 1 +contrast 1 +firefli 1 +thing 1 +fail 1 +miser 1 +yahoo 1 +address 1 +give 1 +direct 1 +seven 1 +closest 1 +bakeri 1 +perfect 1 +bigbook 1 +bigyellow 1 +advertis 1 +york 1 +read 1 +free 1 +plu 1 +save 1 +paper 1 +note 1 +download 1 +text 1 +slate 1 +magazin 1 +hate 1 +admit 1 +microsoft 1 +someth 1 +right 1 +thought 1 +well 1 +execut 1 +expect 1 +michael 1 +kinslei 1 +run 1 +compani 1 +instal 1 +maintain 1 +list 1 +amazon 1 +book 1 +solid 1 +discount 1 +virtual 1 +bookstor 1 +hope 1 +local 1 +booksel 1 +town 1 +brother 1 +follow 1 +begin 1 +version 1 +comment 1 +process 1 +mailcrypt 1 +emac 1 +interfacemqbtazgjohoaaaedalfhlgjmdg 1 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 1 +rbylf 1 +zwqujcioczoecv 1 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 1 +gkgarsokrinnoazihja 1 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 1 +wumjgzsnvispwkrvzgdrojswmc 1 +eigsqsb 1 +bsbpw 1 +jcwz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..6b8f46d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,43 @@ +yuichi 5 +tsuchimoto 3 +cornel 3 +home 2 +current 2 +program 2 +introduct 2 +compil 2 +translatorsc 2 +practicum 2 +artifici 2 +intelligencec 2 +format 2 +info 2 +peopl 2 +pageyuichi 1 +pagecours 1 +workfal 1 +semest 1 +system 1 +oper 1 +systemsc 1 +languag 1 +softwar 1 +engineeringspr 1 +machin 1 +visionfal 1 +foundat 1 +theori 1 +computingi 1 +look 1 +unit 1 +state 1 +resum 1 +postscript 1 +address 1 +eduhttp 1 +last 1 +modif 1 +novemb 1 +http 1 +welcom 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..7cf11453 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,326 @@ +messag 20 +activ 19 +commun 11 +latenc 8 +implement 8 +eicken 7 +network 7 +paper 7 +machin 6 +us 5 +version 5 +overhead 5 +layer 5 +split 5 +perform 5 +design 5 +releas 4 +chang 4 +thorsten 4 +processor 4 +high 4 +describ 4 +show 4 +cluster 4 +architectur 4 +multiprocessor 4 +cost 4 +cornel 3 +part 3 +pleas 3 +read 3 +chao 3 +grzegorz 3 +czajkowski 3 +abstract 3 +power 3 +bandwidth 3 +softwar 3 +activemessag 3 +compar 3 +standard 3 +workstat 3 +allow 3 +mechan 3 +conform 2 +spec 2 +project 2 +avail 2 +inform 2 +includ 2 +know 2 +select 2 +appear 2 +novemb 2 +mpp 2 +pass 2 +order 2 +reduc 2 +first 2 +round 2 +trip 2 +secondpart 2 +demonstr 2 +benchmark 2 +technic 2 +report 2 +characterist 2 +present 2 +detail 2 +gener 2 +specif 2 +interfac 2 +interconnect 2 +oper 2 +without 2 +evalu 2 +driven 2 +berkelei 2 +messagescornel 1 +implementationsact 1 +neta 1 +sourc 1 +code 1 +thegener 1 +moreinform 1 +page 1 +object 1 +codereleas 1 +thegam 1 +readm 1 +instal 1 +file 1 +distribut 1 +instructionson 1 +contact 1 +releasenot 1 +fileto 1 +find 1 +previou 1 +currentvers 1 +also 1 +major 1 +differencebetween 1 +modifi 1 +libmpci 1 +thedistribut 1 +document 1 +packag 1 +fordetail 1 +interest 1 +current 1 +pleaseclick 1 +send 1 +briefnot 1 +let 1 +someth 1 +organ 1 +theus 1 +intend 1 +public 1 +messageslow 1 +ibmrisc 1 +system 1 +chri 1 +hawblitzel 1 +ieeesupercomput 1 +pittsburgh 1 +commerci 1 +spiteof 1 +fast 1 +scommun 1 +inferior 1 +older 1 +tmccm 1 +meiko 1 +investig 1 +primit 1 +altern 1 +standardmessag 1 +tooffer 1 +good 1 +build 1 +block 1 +higher 1 +directli 1 +networkadapt 1 +yieldsa 1 +lower 1 +communicationsubstr 1 +well 1 +cbenchmark 1 +lowmessag 1 +throughput 1 +compens 1 +networklat 1 +base 1 +freeli 1 +availablempich 1 +achiev 1 +equival 1 +onth 1 +februari 1 +andevalu 1 +implementationbenchmark 1 +adapt 1 +firmwar 1 +butdo 1 +assumefamiliar 1 +concept 1 +underli 1 +mainperform 1 +word 1 +timeof 1 +asymptot 1 +focuseson 1 +analysi 1 +comparison 1 +smessag 1 +defin 1 +portabl 1 +across 1 +varieti 1 +parallel 1 +theu 1 +themeiko 1 +thehpam 1 +fddi 1 +ring 1 +theparagon 1 +thesp 1 +networksus 1 +veena 1 +avula 1 +anyndia 1 +basu 1 +vineet 1 +buch 1 +palo 1 +alto 1 +abridg 1 +ieee 1 +micro 1 +magazin 1 +slide 1 +talk 1 +recent 1 +develop 1 +forparallel 1 +made 1 +signific 1 +progress 1 +thecommun 1 +magnitud 1 +ascompar 1 +earlier 1 +propos 1 +examin 1 +whether 1 +thesetechniqu 1 +carri 1 +connect 1 +anatm 1 +even 1 +though 1 +systemsoftwar 1 +equip 1 +optim 1 +streamcommun 1 +direct 1 +protect 1 +user 1 +level 1 +access 1 +thenetwork 1 +reliabl 1 +transmiss 1 +flowcontrol 1 +differ 1 +incommun 1 +builtfrom 1 +hardwar 1 +compon 1 +state 1 +artmultiprocessor 1 +lack 1 +flow 1 +control 1 +systemcoordin 1 +affect 1 +significantli 1 +andrequir 1 +larger 1 +buffer 1 +prototyp 1 +model 1 +clusterinterconnect 1 +measur 1 +showappl 1 +applic 1 +microsecond 1 +smallmessag 1 +roughli 1 +messagesimplement 1 +think 1 +integr 1 +andcomput 1 +culler 1 +goldstein 1 +schauser 1 +proceed 1 +symp 1 +comput 1 +gold 1 +coast 1 +australia 1 +abstractth 1 +challeng 1 +larg 1 +scale 1 +tominim 1 +overlapcomput 1 +coordin 1 +sacrificingprocessor 1 +exist 1 +passingmultiprocessor 1 +unnecessarili 1 +researchprototyp 1 +communicationoverhead 1 +poor 1 +introduc 1 +simplecommun 1 +isintrins 1 +effect 1 +thehardwar 1 +offer 1 +tremend 1 +flexibl 1 +ncube 1 +phase 1 +share 1 +memoryextens 1 +messagesar 1 +suffici 1 +dynam 1 +schedul 1 +languag 1 +forwhich 1 +toler 1 +becom 1 +program 1 +compil 1 +concern 1 +hardwaresupport 1 +desir 1 +outlin 1 +rang 1 +ofenhanc 1 +mainstream 1 +efficientcommun 1 +thesi 1 +univers 1 +california 1 +sitesact 1 +messagesin 1 +projectfor 1 +contactthorsten 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..c6a0aaa8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,202 @@ +system 10 +horu 9 +applic 8 +isi 7 +cornel 6 +technolog 6 +comput 5 +demand 5 +militari 5 +toler 4 +control 4 +distribut 3 +effort 3 +develop 3 +program 3 +fault 3 +high 3 +perform 3 +work 3 +user 3 +plan 3 +environ 2 +year 2 +us 2 +featur 2 +virtual 2 +prior 2 +success 2 +also 2 +commun 2 +media 2 +remot 2 +base 2 +wide 2 +rang 2 +space 2 +branch 2 +project 2 +futur 2 +chang 2 +thu 2 +environmenthoru 1 +kenneth 1 +birman 1 +robbert 1 +reness 1 +shoru 1 +reliabledistribut 1 +last 1 +demonstrategroupwar 1 +network 1 +foundto 1 +offer 1 +higher 1 +similar 1 +novel 1 +ofhoru 1 +flexibl 1 +softwar 1 +architectur 1 +support 1 +synchronousprocess 1 +group 1 +toolkit 1 +becom 1 +signific 1 +commerci 1 +offersa 1 +securityand 1 +privaci 1 +view 1 +importantresearch 1 +advanc 1 +extend 1 +provid 1 +extrem 1 +latenc 1 +performancer 1 +time 1 +capabl 1 +approach 1 +combin 1 +element 1 +calledact 1 +messageswith 1 +multi 1 +playbacksystem 1 +calledcontinu 1 +expect 1 +demonstr 1 +speed 1 +interact 1 +multimediaserv 1 +might 1 +telemedicin 1 +videoon 1 +retain 1 +exist 1 +andsecur 1 +option 1 +synchroni 1 +model 1 +creat 1 +substanti 1 +expectrapid 1 +uptak 1 +within 1 +matur 1 +spana 1 +industri 1 +includ 1 +telecommun 1 +financialtrad 1 +stock 1 +market 1 +autom 1 +factori 1 +floor 1 +process 1 +fordiscret 1 +electron 1 +compon 1 +manufactur 1 +traffic 1 +basedcommun 1 +manag 1 +beingexplor 1 +sever 1 +well 1 +othernon 1 +govern 1 +among 1 +visibl 1 +isth 1 +naval 1 +hiper 1 +explor 1 +systemthat 1 +prototyp 1 +enhanc 1 +aegi 1 +battleradar 1 +would 1 +benefitfrom 1 +access 1 +initi 1 +make 1 +possibl 1 +migrateisi 1 +benefit 1 +communityin 1 +direct 1 +transit 1 +occur 1 +licens 1 +agreementswith 1 +subsidiari 1 +stratu 1 +howev 1 +avail 1 +research 1 +isdescrib 1 +detail 1 +public 1 +manual 1 +look 1 +hope 1 +mixtur 1 +technologieswil 1 +permit 1 +beseen 1 +next 1 +gener 1 +groupwar 1 +illustr 1 +belowshow 1 +mission 1 +integr 1 +data 1 +varieti 1 +ground 1 +resourc 1 +andus 1 +coordin 1 +action 1 +variou 1 +theatr 1 +asset 1 +thissort 1 +utmost 1 +reliabl 1 +secur 1 +whilealso 1 +failur 1 +rapidli 1 +reconfigur 1 +respond 1 +impact 1 +civilianand 1 +dept 1 +scienc 1 +univers 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..da0c6164 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,77 @@ +multimedia 4 +applic 4 +medianet 3 +perform 3 +network 3 +commun 3 +group 3 +cornel 2 +high 2 +process 2 +combin 2 +develop 2 +user 2 +level 2 +improv 2 +facilit 2 +horu 2 +reliabl 2 +toolkit 2 +video 2 +inform 2 +projectmedianet 1 +platform 1 +media 1 +technolog 1 +research 1 +todevelop 1 +flexibl 1 +testb 1 +store 1 +transport 1 +us 1 +data 1 +architectur 1 +access 1 +dramat 1 +protocolsth 1 +order 1 +magnitud 1 +communicationmak 1 +parallel 1 +comput 1 +workstat 1 +cluster 1 +practic 1 +communicationprimit 1 +adapt 1 +industri 1 +strength 1 +tool 1 +secur 1 +primit 1 +critic 1 +foradvanc 1 +militari 1 +commerci 1 +approach 1 +distribut 1 +audio 1 +portabl 1 +build 1 +includeaudio 1 +rapid 1 +prototyp 1 +multimediaappl 1 +fund 1 +project 1 +provid 1 +contract 1 +fromth 1 +darpa 1 +technologyofficefor 1 +contact 1 +thorstenvon 1 +eicken 1 +brian 1 +smith 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..1a8778d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,31 @@ +nuprl 11 +project 3 +theori 3 +cornel 2 +browser 2 +articl 2 +link 2 +autom 1 +reason 1 +introduct 1 +theorem 1 +design 1 +written 1 +vaughn 1 +user 1 +document 1 +relat 1 +public 1 +class 1 +note 1 +linux 1 +announc 1 +suggest 1 +feedback 1 +help 1 +main 1 +index 1 +curiou 1 +mani 1 +page 1 +askaltavista 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..178e033e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,217 @@ +split 15 +program 13 +node 7 +file 7 +messag 7 +granita 6 +includ 6 +debug 6 +activ 5 +comput 4 +exampl 4 +parallel 4 +bench 4 +sourc 4 +setenv 4 +compil 4 +locat 4 +am_run 4 +cuc 3 +us 3 +machin 3 +shell 3 +instal 3 +inform 3 +type 3 +read 3 +local 3 +commun 3 +makefil 3 +directori 3 +librari 3 +also 3 +login 2 +tcsh 2 +bash 2 +experi 2 +first 2 +command 2 +remot 2 +info 2 +softwar 2 +statement 2 +found 2 +execut 2 +look 2 +gmake 2 +follow 2 +return 2 +perform 2 +spam 2 +avail 2 +header 2 +script 2 +ampicc 2 +pleas 2 +scienc 1 +call 1 +eight 1 +granitathrough 1 +design 1 +asinteract 1 +problemsdur 1 +remov 1 +oper 1 +system 1 +specif 1 +stufffrom 1 +configur 1 +haveth 1 +arch 1 +unam 1 +instead 1 +readm 1 +contain 1 +informationabout 1 +releas 1 +addit 1 +manyou 1 +infoexplor 1 +commandsand 1 +usag 1 +displai 1 +properli 1 +job 1 +neither 1 +activemassag 1 +peor 1 +messagesor 1 +hardwar 1 +cornel 1 +theori 1 +center 1 +homegrown 1 +softwarein 1 +gener 1 +besur 1 +path 1 +csplit 1 +simpl 1 +extens 1 +forparallel 1 +provid 1 +global 1 +address 1 +space 1 +though 1 +globalpoint 1 +dereferenc 1 +like 1 +regular 1 +pointer 1 +phase 1 +assign 1 +allow 1 +programm 1 +hide 1 +latencyof 1 +access 1 +overlap 1 +work 1 +user 1 +shellsshould 1 +creat 1 +sampl 1 +variou 1 +must 1 +make 1 +asact 1 +scriptsloc 1 +programfoo 1 +processor 1 +foodebug 1 +step 1 +need 1 +done 1 +insert 1 +splitc_debug 1 +aftersplitc_main 1 +describ 1 +previou 1 +section 1 +commonli 1 +ongranita 1 +enter 1 +continu 1 +hit 1 +onto 1 +want 1 +youwant 1 +master 1 +open 1 +insid 1 +thenattach 1 +theth 1 +proc 1 +process 1 +proce 1 +attach 1 +stop 1 +andyou 1 +breakpoint 1 +stack 1 +frame 1 +messagesact 1 +overhead 1 +layerthat 1 +offer 1 +high 1 +mani 1 +nativ 1 +layer 1 +main 1 +characterist 1 +word 1 +round 1 +triplat 1 +asymptot 1 +network 1 +bandwidth 1 +libspgam 1 +aand 1 +beforerun 1 +runningprgm 1 +mpimpi 1 +popularmessag 1 +pass 1 +interfac 1 +portabl 1 +animplement 1 +base 1 +mpich 1 +run 1 +overact 1 +easiest 1 +link 1 +whichi 1 +built 1 +fooyou 1 +lookat 1 +examplesin 1 +ampi 1 +exactli 1 +likeordinari 1 +sure 1 +softwaresoftwar 1 +fortran 1 +xpdbx 1 +matlab 1 +emac 1 +bison 1 +replic 1 +problemsif 1 +difficulti 1 +contact 1 +czar 1 +grzegorz 1 +czajkowski 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..b06ffa3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,105 @@ +model 4 +project 4 +research 4 +languag 4 +cornel 3 +simul 3 +creat 3 +system 3 +simlab 3 +select 3 +effort 2 +comput 2 +gener 2 +scientif 2 +softwar 2 +collabor 2 +version 2 +program 2 +present 2 +chew 2 +home 1 +page 1 +enorm 1 +current 1 +expend 1 +scientificsoftwar 1 +particularli 1 +physic 1 +defin 1 +oncomplex 1 +geometri 1 +us 1 +advanc 1 +hardwar 1 +thegoal 1 +reduc 1 +bringingtogeth 1 +technolog 1 +geometr 1 +symbolicmathemat 1 +numer 1 +analysi 1 +compil 1 +code 1 +andform 1 +method 1 +tool 1 +rais 1 +semant 1 +levelat 1 +possibl 1 +overview 1 +softwarepackag 1 +activ 1 +mathemat 1 +environ 1 +propos 1 +postscript 1 +guarante 1 +qualiti 1 +mesh 1 +microstoragearchitectur 1 +weyl 1 +computeralgebra 1 +substrat 1 +high 1 +levelprogram 1 +synthes 1 +thechain 1 +algebra 1 +topolog 1 +compon 1 +thearpa 1 +nist 1 +madefast 1 +design 1 +manufactur 1 +exercis 1 +longer 1 +direct 1 +insystem 1 +richard 1 +zippel 1 +ideason 1 +proce 1 +includ 1 +brief 1 +discuss 1 +ofnon 1 +contemporan 1 +commun 1 +microstorag 1 +architectur 1 +theus 1 +transform 1 +chainsprogram 1 +complextopolog 1 +engin 1 +numericalalgorithm 1 +rick 1 +palmer 1 +peopl 1 +public 1 +report 1 +paul 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..0a047f1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,51 @@ +split 10 +inform 5 +eicken 4 +sourc 3 +code 3 +releas 3 +cornel 2 +prepar 2 +page 2 +ccornel 1 +implementationssplit 1 +neta 1 +isimpl 1 +activ 1 +messagesfor 1 +contact 1 +thorsten 1 +ofsplit 1 +distr 1 +implementedon 1 +spam 1 +contactchi 1 +chao 1 +chang 1 +grzegorz 1 +czajkowski 1 +thorstenvon 1 +share 1 +memori 1 +multiprocessorsa 1 +multiprocessor 1 +runningsolari 1 +mattwelsh 1 +select 1 +public 1 +cparallel 1 +program 1 +culler 1 +dusseau 1 +goldstein 1 +krishnamurthi 1 +lumetta 1 +yelick 1 +proceed 1 +supercomput 1 +novemb 1 +abstractproject 1 +sitessplit 1 +chome 1 +berkelei 1 +contactthorsten 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..32c1466f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,9 @@ +page 1 +move 1 +browser 1 +redirect 1 +second 1 +http 1 +cornel 1 +default 1 +html 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..06709ba9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,220 @@ +robot 11 +cornel 9 +donald 9 +comput 7 +vision 6 +paper 6 +list 5 +ieee 5 +manipul 4 +laboratori 3 +csrvl 3 +prof 3 +daniel 3 +applic 3 +zabih 3 +video 3 +base 3 +parallel 3 +justin 3 +avail 3 +report 3 +server 3 +intern 3 +confer 3 +proceed 3 +bhringer 3 +professor 3 +match 2 +direct 2 +scienc 2 +huttenloch 2 +ramin 2 +includ 2 +distribut 2 +micro 2 +electro 2 +mechan 2 +system 2 +bruce 2 +follow 2 +project 2 +automat 2 +break 2 +miller 2 +done 2 +move 2 +select 2 +mani 2 +proc 2 +revis 2 +symposium 2 +foundat 2 +inform 2 +invari 2 +workshop 2 +autom 2 +diego 2 +microfabr 2 +mihailovich 2 +macdonald 2 +technic 2 +associ 2 +greg 2 +csrvlcornel 1 +laboratorywelcom 1 +nich 1 +rrentli 1 +develop 1 +pleas 1 +hard 1 +question 1 +comment 1 +thank 1 +locat 1 +univers 1 +ithaca 1 +three 1 +main 1 +area 1 +ofresearch 1 +multimedia 1 +mem 1 +pictor 1 +tour 1 +current 1 +projectsth 1 +activ 1 +supervis 1 +byramin 1 +detect 1 +andclassif 1 +scene 1 +digit 1 +mpeg 1 +browser 1 +allowingscen 1 +global 1 +motion 1 +queri 1 +real 1 +time 1 +sourc 1 +transmiss 1 +full 1 +frame 1 +onplatform 1 +nynet 1 +cluster 1 +number 1 +involv 1 +high 1 +perform 1 +imag 1 +implement 1 +split 1 +foru 1 +symmetr 1 +multiprocessor 1 +potenti 1 +master 1 +sproject 1 +maintain 1 +work 1 +unix 1 +currentlyconsid 1 +windowsnt 1 +discuss 1 +theissuesher 1 +hope 1 +support 1 +microsoft 1 +publicationsth 1 +research 1 +thecsrvl 1 +anonym 1 +public 1 +tech 1 +serverar 1 +program 1 +mobil 1 +scheme 1 +ree 1 +automationnic 1 +franc 1 +complex 1 +homolog 1 +type 1 +triangul 1 +chang 1 +juan 1 +octob 1 +jen 1 +first 1 +algorithm 1 +peter 1 +boston 1 +wilson 1 +andj 1 +latomb 1 +submit 1 +artifici 1 +intellig 1 +sensor 1 +configur 1 +task 1 +plan 1 +brigg 1 +sensorless 1 +us 1 +massiv 1 +actuatorarrai 1 +theori 1 +control 1 +actuat 1 +arrai 1 +oiso 1 +japan 1 +januari 1 +approach 1 +design 1 +micromechan 1 +hing 1 +structur 1 +extend 1 +abstract 1 +siggraph 1 +solid 1 +model 1 +montral 1 +quebc 1 +canada 1 +authorthes 1 +gener 1 +dynam 1 +index 1 +search 1 +author 1 +titl 1 +keyword 1 +scott 1 +cytacki 1 +pedro 1 +felzenszwalb 1 +ryan 1 +lilien 1 +michel 1 +maharbiz 1 +pass 1 +scharstein 1 +aaron 1 +stump 1 +szewczyk 1 +fernando 1 +viton 1 +voskuhl 1 +wayt 1 +matt 1 +welsh 1 +whelan 1 +assist 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..5385edb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,91 @@ +inform 5 +research 4 +captur 3 +access 3 +group 3 +collect 3 +work 2 +comput 2 +structur 2 +materi 2 +document 2 +construct 2 +report 2 +cornel 2 +projectinform 1 +accessth 1 +waysthat 1 +locat 1 +ever 1 +increas 1 +volum 1 +ofonlin 1 +data 1 +determin 1 +extract 1 +forhuman 1 +user 1 +found 1 +john 1 +hopcroft 1 +davisin 1 +current 1 +area 1 +researchextract 1 +onlin 1 +thestructur 1 +explicit 1 +extractinginform 1 +present 1 +tabular 1 +form 1 +relat 1 +databas 1 +summari 1 +overview 1 +collectionsof 1 +text 1 +nationwid 1 +librari 1 +sciencetechn 1 +begun 1 +digit 1 +computersci 1 +technic 1 +order 1 +make 1 +moreaccess 1 +internet 1 +avail 1 +server 1 +addit 1 +toit 1 +util 1 +gener 1 +commun 1 +thisdocu 1 +test 1 +consist 1 +dean 1 +krafft 1 +visitingscientist 1 +jimdavi 1 +well 1 +number 1 +graduat 1 +undergradu 1 +student 1 +fall 1 +project 1 +activ 1 +longer 1 +jrdpublicationsjam 1 +allan 1 +informationag 1 +build 1 +hyperlink 1 +proceed 1 +confer 1 +oninform 1 +knowledg 1 +manag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..f39152ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,15 @@ +research 2 +multimedia 2 +zeno 1 +groupzeno 1 +cornel 1 +group 1 +peopl 1 +mission 1 +project 1 +paper 1 +softwar 1 +curricula 1 +develop 1 +potpourri 1 +direct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..e3e8a946 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,102 @@ +page 5 +work 3 +home 2 +degre 2 +come 2 +back 2 +current 2 +rain 2 +santa 2 +barbara 2 +cornel 2 +video 2 +pagewelcom 1 +depart 1 +issu 1 +pictur 1 +left 1 +see 1 +quit 1 +date 1 +frame 1 +challeng 1 +viewer 1 +syosset 1 +york 1 +town 1 +long 1 +island 1 +receiv 1 +bachelor 1 +scienc 1 +decemb 1 +month 1 +california 1 +decid 1 +fall 1 +master 1 +engin 1 +leav 1 +land 1 +ithaca 1 +mayb 1 +miss 1 +season 1 +wind 1 +snow 1 +actual 1 +enough 1 +anywai 1 +plan 1 +graduat 1 +meng 1 +project 1 +prof 1 +ramin 1 +zabih 1 +robot 1 +vision 1 +csrvl 1 +interest 1 +topic 1 +motion 1 +segment 1 +gener 1 +process 1 +paper 1 +relat 1 +research 1 +area 1 +link 1 +compani 1 +green 1 +hill 1 +softwar 1 +californialockhe 1 +martin 1 +control 1 +system 1 +binghamton 1 +yorkaltera 1 +corp 1 +jose 1 +californiafun 1 +stuff 1 +game 1 +domainvth 1 +babylon 1 +siteoth 1 +place 1 +univers 1 +worldcareermosaictop 1 +site 1 +student 1 +email 1 +kmai 1 +cours 1 +still 1 +construct 1 +last 1 +modifi 1 +januari 1 +access 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..db992356 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,42 @@ +utc 2 +inform 2 +faculti 2 +depart 2 +public 2 +peopl 2 +home 1 +pagegener 1 +recruit 1 +overview 1 +research 1 +group 1 +relat 1 +program 1 +academ 1 +admiss 1 +requir 1 +cours 1 +descript 1 +catalog 1 +comput 1 +facil 1 +upcom 1 +event 1 +calendar 1 +seminar 1 +talk 1 +visitor 1 +schedulespag 1 +class 1 +person 1 +page 1 +student 1 +organ 1 +alumni 1 +link 1 +find 1 +staff 1 +directoryth 1 +universitywww 1 +informationgrip 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..f300ff1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,67 @@ +intern 4 +mathemat 3 +award 3 +theorem 3 +prove 3 +intellig 3 +board 3 +artificialintellig 3 +woodrow 2 +chair 2 +emeritu 2 +univers 2 +servic 2 +autom 2 +artifici 2 +truste 2 +joint 2 +confer 2 +research 2 +analog 2 +profil 2 +bledso 1 +bledsoepet 1 +donnel 1 +centenni 1 +comput 1 +system 1 +professor 1 +utah 1 +salt 1 +lake 1 +citi 1 +california 1 +berkelei 1 +honor 1 +profession 1 +third 1 +mileston 1 +americanmathemat 1 +societi 1 +distinguish 1 +jointconfer 1 +presid 1 +american 1 +associ 1 +onartifici 1 +editor 1 +journal 1 +presentarea 1 +interestautomat 1 +summari 1 +researchmi 1 +focus 1 +automat 1 +theoremproof 1 +check 1 +involv 1 +heurist 1 +higher 1 +levelplan 1 +well 1 +exampl 1 +alsointerest 1 +learn 1 +previou 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..dc617406 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,90 @@ +werth 7 +parallel 7 +comput 6 +educ 5 +softwar 4 +engin 4 +brown 4 +mathemat 3 +chair 3 +board 3 +scienc 3 +program 3 +proceed 3 +intern 3 +confer 3 +jain 3 +john 2 +lectur 2 +research 2 +univers 2 +associ 2 +ieee 2 +compil 2 +environ 2 +direct 2 +model 2 +process 2 +august 2 +schedul 2 +oper 2 +profil 2 +werthsenior 1 +scientist 1 +emori 1 +washington 1 +profession 1 +servic 1 +accredit 1 +vice 1 +technic 1 +committe 1 +softwareengin 1 +present 1 +area 1 +interestparallel 1 +computersci 1 +summari 1 +researchmi 1 +current 1 +interest 1 +parallelprogram 1 +andimplement 1 +issu 1 +also 1 +activ 1 +set 1 +incomput 1 +local 1 +nation 1 +level 1 +select 1 +recent 1 +publicationss 1 +hyder 1 +unifi 1 +concurr 1 +debug 1 +societi 1 +sobek 1 +newton 1 +interact 1 +formal 1 +practic 1 +develop 1 +code 1 +note 1 +york 1 +springer 1 +verlag 1 +multipl 1 +system 1 +journal 1 +distribut 1 +decemb 1 +gener 1 +applic 1 +thirteenth 1 +previou 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..6df4fddb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,53 @@ +architectur 3 +alfr 2 +page 2 +area 2 +manag 2 +databas 2 +distribut 2 +faculti 2 +dale 1 +daleno 1 +person 1 +trammel 1 +crow 1 +regent 1 +professor 1 +emeritu 1 +comput 1 +scienc 1 +exet 1 +colleg 1 +oxford 1 +england 1 +univers 1 +texa 1 +austin 1 +interestdatabas 1 +system 1 +summari 1 +researchmi 1 +interest 1 +involv 1 +applic 1 +parallel 1 +multi 1 +stagei 1 +problem 1 +studiedinclud 1 +data 1 +strategi 1 +index 1 +andmap 1 +relat 1 +algebra 1 +oper 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..b0cec8a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,92 @@ +comput 7 +system 7 +scienc 5 +brumfield 4 +distribut 4 +model 4 +queue 3 +jeffrei 2 +mathemat 2 +univers 2 +award 2 +databas 2 +symposium 2 +sigcs 2 +faculti 2 +brumfieldsenior 1 +lectur 1 +math 1 +georgia 1 +purdu 1 +honor 1 +colleg 1 +natur 1 +teach 1 +excel 1 +area 1 +interestperform 1 +analysi 1 +oper 1 +summari 1 +researchi 1 +interest 1 +role 1 +plai 1 +designersof 1 +studi 1 +performanceof 1 +exist 1 +propos 1 +network 1 +eachresourc 1 +repres 1 +tasksawait 1 +servic 1 +solut 1 +involv 1 +computationof 1 +respons 1 +time 1 +length 1 +throughput 1 +select 1 +recent 1 +publicationsj 1 +shen 1 +richter 1 +graf 1 +verdi 1 +visual 1 +environ 1 +design 1 +journal 1 +ofparallel 1 +miller 1 +chou 1 +perform 1 +modelingof 1 +object 1 +orient 1 +intern 1 +parallel 1 +distributedsystem 1 +austin 1 +texa 1 +decemb 1 +concurr 1 +program 1 +modula 1 +inproceed 1 +technic 1 +loui 1 +bulletin 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..7724b07a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,113 @@ +mathemat 8 +cline 6 +comput 5 +softwar 5 +siam 4 +journal 4 +scientif 4 +numer 4 +analysi 3 +alan 2 +professor 2 +appli 2 +profession 2 +editor 2 +transact 2 +director 2 +interest 2 +problem 2 +fit 2 +renka 2 +constrain 2 +triangul 2 +condit 2 +number 2 +estim 2 +faculti 2 +clinedavid 1 +bruton 1 +centenni 1 +scienc 1 +univers 1 +michigan 1 +servic 1 +algorithm 1 +commun 1 +associ 1 +editori 1 +board 1 +statisticalcomput 1 +special 1 +group 1 +southern 1 +region 1 +socialrespons 1 +area 1 +interestmathemat 1 +summari 1 +researchi 1 +transform 1 +tool 1 +whichcan 1 +involv 1 +constructionof 1 +explor 1 +methodolog 1 +formathemat 1 +particular 1 +major 1 +developmentha 1 +packag 1 +hundr 1 +subprogram 1 +curv 1 +andsurfac 1 +emploi 1 +tension 1 +spline 1 +select 1 +recent 1 +publicationsr 1 +scatter 1 +data 1 +us 1 +delaunai 1 +imac 1 +expert 1 +system 1 +symbol 1 +north 1 +holland 1 +king 1 +meyer 1 +rout 1 +schedul 1 +coast 1 +guard 1 +buoi 1 +tender 1 +interfac 1 +dimension 1 +solut 1 +closest 1 +node 1 +presenc 1 +barrier 1 +counter 1 +exampl 1 +three 1 +statist 1 +moler 1 +stewart 1 +wilkinson 1 +matrix 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..60104975 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,69 @@ +mathemat 3 +univers 3 +edsger 2 +wybe 2 +comput 2 +physic 2 +award 2 +member 2 +academi 2 +art 2 +interest 2 +faculti 2 +dijkstra 1 +dijkstraschlumberg 1 +centenni 1 +chair 1 +sciencesprofessor 1 +mathematicskandidaatsexamen 1 +doctora 1 +examen 1 +theoret 1 +leydenph 1 +amsterdamhonor 1 +awardsacm 1 +ture 1 +foreign 1 +honorari 1 +american 1 +sciencesmemb 1 +royal 1 +netherland 1 +sciencesdistinguish 1 +fellow 1 +british 1 +societyafip 1 +harri 1 +good 1 +memori 1 +doctor 1 +scienc 1 +honori 1 +causa 1 +queen 1 +belfastarea 1 +program 1 +correct 1 +methodolog 1 +algorithm 1 +systemssummari 1 +research 1 +area 1 +focus 1 +streamlin 1 +argumentso 1 +increas 1 +power 1 +reason 1 +particular 1 +ofform 1 +techniqu 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..f8a0aae8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,60 @@ +comput 4 +edmondson 3 +scienc 3 +chri 2 +yurkanan 2 +univers 2 +network 2 +mobil 2 +protocol 2 +faculti 2 +yurkananlectur 1 +mathemat 1 +texa 1 +austin 1 +profession 1 +servic 1 +secretari 1 +treasur 1 +sigcomm 1 +area 1 +interestcomput 1 +educ 1 +manag 1 +larg 1 +softwar 1 +project 1 +databas 1 +design 1 +summari 1 +researchmi 1 +research 1 +interest 1 +high 1 +speed 1 +commun 1 +specif 1 +internetwork 1 +select 1 +recent 1 +public 1 +cobb 1 +andm 1 +gouda 1 +address 1 +internet 1 +inproceed 1 +annual 1 +theori 1 +informaticsconfer 1 +press 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..83ab8982 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,50 @@ +univers 3 +confer 3 +committe 3 +area 3 +suzi 2 +secondari 2 +educ 2 +comput 2 +scienc 2 +servic 2 +inform 2 +faculti 2 +gallagh 1 +gallagherlectur 1 +coordin 1 +academ 1 +program 1 +loyola 1 +southwestern 1 +louisiana 1 +profession 1 +sigcs 1 +necc 1 +interestcomput 1 +librari 1 +process 1 +summari 1 +researchmi 1 +interest 1 +student 1 +recruit 1 +andretent 1 +women 1 +minor 1 +improv 1 +scienceeduc 1 +school 1 +local 1 +system 1 +retriev 1 +techniqu 1 +addit 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..a75ed1bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,114 @@ +jenevein 7 +comput 7 +interconnect 6 +optic 5 +system 4 +wafer 4 +fault 4 +network 3 +perform 3 +scale 3 +toler 3 +processor 3 +measur 3 +chemistri 2 +parallel 2 +architectur 2 +recent 2 +work 2 +methodolog 2 +menez 2 +applic 2 +ieee 2 +transact 2 +malek 2 +nest 2 +proceed 2 +intern 2 +confer 2 +engin 2 +faculti 2 +senior 1 +lectur 1 +louisiana 1 +state 1 +univers 1 +orlean 1 +area 1 +interestinterconnect 1 +process 1 +summari 1 +researchmi 1 +research 1 +focus 1 +interconnectionnetwork 1 +success 1 +failur 1 +restsin 1 +abil 1 +devis 1 +appropri 1 +cost 1 +interconnectionstructur 1 +involv 1 +thedevelop 1 +special 1 +kindof 1 +laser 1 +wave 1 +guid 1 +design 1 +beinginvestig 1 +techniqu 1 +lead 1 +parallelsystem 1 +beingappli 1 +buss 1 +communicationswitch 1 +iscontinu 1 +performanceport 1 +across 1 +machin 1 +develop 1 +contrast 1 +tobenchmark 1 +repres 1 +true 1 +memorysystem 1 +select 1 +publicationsr 1 +kyklo 1 +multicomput 1 +strategi 1 +properti 1 +june 1 +laranjeira 1 +predic 1 +scheme 1 +press 1 +ullah 1 +metrix 1 +precis 1 +industri 1 +decemb 1 +johnson 1 +impact 1 +multiprocessor 1 +journal 1 +qualiti 1 +reliabl 1 +octob 1 +campbel 1 +prototyp 1 +integr 1 +januari 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..b4a42fbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,66 @@ +logic 6 +comput 3 +norman 2 +emeritu 2 +philosophi 2 +univers 2 +architectur 2 +closur 2 +space 2 +faculti 2 +martin 1 +martinprofessor 1 +scienc 1 +professor 1 +ofphilosophi 1 +chicago 1 +california 1 +angel 1 +area 1 +interestmathemat 1 +summari 1 +researchmi 1 +current 1 +activ 1 +concentr 1 +abstract 1 +structur 1 +asinterpret 1 +theori 1 +center 1 +whichexploit 1 +notion 1 +deduct 1 +oper 1 +andon 1 +intension 1 +model 1 +classic 1 +mathemat 1 +significantearli 1 +research 1 +design 1 +especi 1 +missil 1 +vehicl 1 +applic 1 +trackingalgorithm 1 +track 1 +scan 1 +radar 1 +function 1 +complet 1 +inmani 1 +valu 1 +delai 1 +metatheori 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..910cc7c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,140 @@ +time 20 +system 18 +real 14 +comput 6 +intern 6 +engin 5 +formal 5 +softwar 5 +proceed 4 +aloysiu 3 +present 3 +method 3 +committe 3 +ieee 3 +control 3 +design 3 +develop 3 +autom 3 +wang 3 +journal 2 +program 2 +symposium 2 +chair 2 +technic 2 +aid 2 +tool 2 +research 2 +robust 2 +respons 2 +specif 2 +confer 2 +decemb 2 +faculti 2 +mokassoci 1 +professorfaculti 1 +fellow 1 +scienc 1 +electr 1 +massachusett 1 +institut 1 +technolog 1 +profession 1 +servic 1 +associ 1 +editor 1 +critic 1 +editori 1 +board 1 +systemdesign 1 +taiwan 1 +vice 1 +work 1 +group 1 +federationof 1 +automat 1 +presentarea 1 +interestfault 1 +toler 1 +hard 1 +architectur 1 +summari 1 +researchi 1 +current 1 +conduct 1 +fundament 1 +area 1 +ofdistribut 1 +primari 1 +concern 1 +includespecif 1 +techniqu 1 +algorithm 1 +forguarante 1 +stringent 1 +constraint 1 +understand 1 +thetrad 1 +criticalsystem 1 +goal 1 +framework 1 +theanalysi 1 +synthesi 1 +applic 1 +areasinclud 1 +robot 1 +avion 1 +industrialprocess 1 +fund 1 +provid 1 +offic 1 +ofnav 1 +highli 1 +environ 1 +forreal 1 +select 1 +recent 1 +publicationsa 1 +toward 1 +mechan 1 +foundat 1 +tilborg 1 +kluwer 1 +academ 1 +publish 1 +heitmey 1 +labaw 1 +clement 1 +case 1 +support 1 +fifth 1 +workshop 1 +montreal 1 +juli 1 +emerson 1 +asynchron 1 +distribut 1 +aptl 1 +melbourn 1 +load 1 +adjust 1 +adapt 1 +antonio 1 +tsou 1 +brown 1 +analysi 1 +bound 1 +nasa 1 +expert 1 +sigsoft 1 +orlean 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..2140cf19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,75 @@ +program 5 +function 4 +hamilton 2 +richard 2 +engin 2 +interest 2 +concurr 2 +process 2 +languag 2 +faculti 2 +senior 1 +lecturerb 1 +appli 1 +physic 1 +harvard 1 +collegem 1 +aero 1 +astronaut 1 +stanford 1 +universityph 1 +comput 1 +scienc 1 +iowa 1 +state 1 +universityprofession 1 +servicecoordin 1 +univers 1 +texa 1 +austin 1 +year 1 +seri 1 +editor 1 +vol 1 +addison 1 +weslei 1 +area 1 +object 1 +orient 1 +undergradu 1 +educationsummari 1 +research 1 +maintain 1 +long 1 +stand 1 +potentialfor 1 +suitabl 1 +formal 1 +reason 1 +infal 1 +us 1 +teach 1 +sectionof 1 +work 1 +time 1 +permit 1 +implementationof 1 +real 1 +microcomput 1 +applic 1 +longer 1 +term 1 +project 1 +book 1 +onfunct 1 +algorithm 1 +addit 1 +inform 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..3b56cb1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,126 @@ +system 10 +databas 8 +silberschatz 6 +ieee 4 +confer 4 +intern 4 +comput 3 +knowledg 3 +area 3 +rastogi 3 +larg 3 +abraham 2 +award 2 +paper 2 +inform 2 +chair 2 +pod 2 +research 2 +parallel 2 +manag 2 +main 2 +process 2 +continu 2 +media 2 +storag 2 +high 2 +perform 2 +transact 2 +data 2 +septemb 2 +ozden 2 +faculti 2 +silberschatzprofessorship 1 +sciencesm 1 +stoni 1 +brookhonor 1 +profession 1 +serviceiee 1 +societi 1 +outstand 1 +journal 1 +advisori 1 +committe 1 +nation 1 +scienc 1 +foundat 1 +divis 1 +robot 1 +intellig 1 +gener 1 +seventh 1 +eighth 1 +sigact 1 +sigmod 1 +symposiumon 1 +principl 1 +organ 1 +ullman 1 +invit 1 +workshop 1 +futureof 1 +program 1 +symposium 1 +distributedsystem 1 +interest 1 +oper 1 +distribut 1 +basedsystemssummari 1 +special 1 +concurr 1 +recentresearch 1 +concentr 1 +multidatabas 1 +transactionmanag 1 +base 1 +real 1 +time 1 +databasesystem 1 +multiresolut 1 +server 1 +select 1 +recent 1 +publicationss 1 +ganguli 1 +tsur 1 +map 1 +datalog 1 +programexecut 1 +network 1 +processor 1 +knowledgeand 1 +engin 1 +june 1 +jagadish 1 +lieuwen 1 +sudarshan 1 +dali 1 +memori 1 +internationalconfer 1 +biliri 1 +cost 1 +storageserv 1 +movi 1 +demand 1 +onveri 1 +framework 1 +storageand 1 +retriev 1 +conferenceon 1 +multimedia 1 +read 1 +fussel 1 +multi 1 +resolut 1 +relationaldata 1 +model 1 +august 1 +addit 1 +obtain 1 +fromindividu 1 +member 1 +home 1 +page 1 +back 1 +list 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..4961e3ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,17 @@ +robert 2 +simmon 2 +simmonsquinci 1 +centenni 1 +professor 1 +emeritu 1 +comput 1 +scienc 1 +professoremeritu 1 +psychologymai 1 +novemb 1 +bledso 1 +rememb 1 +back 1 +list 1 +faculti 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..639a2e4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,53 @@ +adam 2 +seligman 2 +home 2 +page 2 +gzip 2 +email 2 +pageadam 1 +click 1 +log 1 +gradual 1 +student 1 +austin 1 +program 1 +aweekli 1 +happi 1 +hour 1 +depart 1 +claim 1 +fame 1 +undergradu 1 +thesi 1 +specifiedth 1 +type 1 +rule 1 +oper 1 +semant 1 +core 1 +avail 1 +fileor 1 +postscript 1 +file 1 +knowwhat 1 +think 1 +touch 1 +utexa 1 +call 1 +pagemart 1 +graphic 1 +phone 1 +number 1 +read 1 +progress 1 +vrml 1 +paper 1 +new 1 +junki 1 +fromreut 1 +yahoo 1 +altern 1 +could 1 +check 1 +nando 1 +time 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..64a1713f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,30 @@ +univers 5 +scienc 5 +texa 4 +comput 4 +agapito 3 +austin 3 +sustaita 2 +austincognit 1 +interest 1 +machin 1 +learn 1 +languag 1 +acquisit 1 +chill 1 +specif 1 +connection 1 +commonsens 1 +reasoningschoolingph 1 +hopefulli 1 +colleg 1 +station 1 +california 1 +santa 1 +barbara 1 +miscellaneouspost 1 +addressth 1 +depart 1 +mail 1 +utexa 1 +eduphon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..1ff95087 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,90 @@ +report 4 +artifici 3 +intellig 3 +faculti 3 +autom 3 +theorem 3 +prove 3 +novak 3 +technic 3 +program 2 +robert 2 +kuiper 2 +reason 2 +miikkulainen 2 +base 2 +moonei 2 +laboratoryut 1 +laboratoryth 1 +laboratori 1 +atth 1 +univers 1 +texa 1 +austinha 1 +distinguish 1 +histori 1 +larg 1 +number 1 +excel 1 +andgradu 1 +student 1 +new 1 +world 1 +rank 1 +nation 1 +close 1 +link 1 +comput 1 +scienc 1 +depart 1 +boyer 1 +causei 1 +logic 1 +philosoph 1 +foundat 1 +benjamin 1 +qualit 1 +vladimir 1 +lifschitz 1 +action 1 +risto 1 +neural 1 +network 1 +mirank 1 +rule 1 +system 1 +machin 1 +learn 1 +gordon 1 +automat 1 +physic 1 +problem 1 +solv 1 +bruce 1 +porter 1 +multi 1 +function 1 +knowledg 1 +emeritu 1 +woodi 1 +bledso 1 +deceas 1 +dream 1 +aaai 1 +presidenti 1 +address 1 +simmon 1 +memoriam 1 +postdoc 1 +peter 1 +clark 1 +souther 1 +softwar 1 +directori 1 +current 1 +avail 1 +porterpoint 1 +lab 1 +fund 1 +agenciescontact 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..821b8310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,40 @@ +parallel 3 +program 3 +austin 3 +comput 2 +scienc 2 +work 2 +system 2 +offic 2 +ajita 1 +johnajita 1 +john 1 +candid 1 +group 1 +depart 1 +univers 1 +texa 1 +hello 1 +research 1 +automat 1 +programmingframework 1 +base 1 +constraint 1 +compil 1 +parallelprocedur 1 +advisor 1 +professor 1 +brownemi 1 +papersmi 1 +us 1 +translat 1 +routin 1 +code 1 +want 1 +contact 1 +postal 1 +usavoic 1 +main 1 +taylor 1 +ajohn 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..884751ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,112 @@ +comput 8 +scienc 6 +austin 6 +home 5 +interest 5 +page 4 +almstrum 3 +learn 3 +univers 3 +texa 3 +group 3 +vicki 2 +educ 2 +understand 2 +mathemat 2 +method 2 +teach 2 +uppsala 2 +includ 2 +technolog 2 +special 2 +offic 2 +utexa 2 +utc 1 +almstrumabout 1 +scientist 1 +peopl 1 +particularli 1 +interestedin 1 +logic 1 +formal 1 +doctoralresearch 1 +topic 1 +limit 1 +mathematicallog 1 +novic 1 +student 1 +lectur 1 +addit 1 +ispent 1 +fall 1 +semest 1 +sweden 1 +pagether 1 +link 1 +encourag 1 +other 1 +excel 1 +computersci 1 +garden 1 +travel 1 +craft 1 +sew 1 +woodwork 1 +pictur 1 +hubbi 1 +torgni 1 +stadler 1 +check 1 +site 1 +itics 1 +confer 1 +integr 1 +educationjun 1 +work 1 +june 1 +swedenoth 1 +maintain 1 +class 1 +camp 1 +field 1 +research 1 +evalu 1 +mentor 1 +issu 1 +jump 1 +point 1 +area 1 +suffer 1 +spurt 1 +construct 1 +frenzi 1 +organ 1 +belong 1 +sigcs 1 +educationsigsoft 1 +softwar 1 +engineeringacm 1 +associ 1 +machineryieeeth 1 +institut 1 +electr 1 +electron 1 +engineerscpsrcomput 1 +profession 1 +social 1 +responsibilityconnect 1 +elsewhereto 1 +contact 1 +depart 1 +main 1 +direct 1 +seldom 1 +alwai 1 +connect 1 +need 1 +forewarn 1 +leav 1 +plenti 1 +time 1 +email 1 +address 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..50cf5a47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,15 @@ +anthoni 4 +hing 2 +pang 2 +offic 2 +hung 1 +home 1 +pagehung 1 +hour 1 +mondai 1 +wednesdai 1 +email 1 +utexa 1 +inform 1 +compil 1 +cours 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..e58ac8e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,32 @@ +aruna 3 +comput 3 +austin 2 +depart 2 +fall 2 +homepag 1 +addalacurr 1 +graduat 1 +studentth 1 +univers 1 +texa 1 +scienc 1 +taylor 1 +hall 1 +educ 1 +bachelor 1 +engin 1 +sciencess 1 +colleg 1 +engineeringmysorework 1 +experi 1 +lectur 1 +studi 1 +sciencesunivers 1 +mysoreindiai 1 +come 1 +mysor 1 +cityindiato 1 +contact 1 +email 1 +utexa 1 +eduvoic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..615b0bbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,21 @@ +ashi 2 +austin 2 +offic 2 +home 1 +page 1 +tarafdarashi 1 +tarafdarabout 1 +get 1 +round 1 +let 1 +exist 1 +known 1 +patienc 1 +pleas 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +usavoic 1 +main 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..f528192f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,64 @@ +scienc 5 +comput 4 +bayardo 3 +austin 3 +univers 2 +texa 2 +work 2 +research 2 +mail 2 +address 2 +engin 2 +roberto 1 +home 1 +pageroberto 1 +candid 1 +expect 1 +complet 1 +date 1 +fall 1 +depart 1 +current 1 +also 1 +within 1 +infosleuth 1 +project 1 +interest 1 +queri 1 +process 1 +activ 1 +expert 1 +databas 1 +system 1 +data 1 +mine 1 +constraint 1 +satisfactionmi 1 +thesi 1 +advisor 1 +prof 1 +daniel 1 +mirank 1 +paper 1 +line 1 +along 1 +toolkit 1 +generatingand 1 +solv 1 +exception 1 +hard 1 +instanc 1 +contact 1 +inform 1 +utexa 1 +campu 1 +dept 1 +taylor 1 +hall 1 +histori 1 +electr 1 +center 1 +coordin 1 +number 1 +sinc 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..4192886c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,40 @@ +bert 3 +refin 2 +imprecis 2 +model 2 +month 2 +address 2 +austin 2 +kayresearch 1 +reason 1 +physic 1 +process 1 +overviewof 1 +research 1 +vitami 1 +network 1 +retriev 1 +paper 1 +dissert 1 +entitl 1 +behavior 1 +abstract 1 +stuffsonia 1 +andnina 1 +page 1 +drink 1 +ofth 1 +springbank 1 +scotchdrinksof 1 +past 1 +contact 1 +informationemail 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..0e42f515 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,39 @@ +bhanu 2 +comput 2 +system 2 +home 1 +page 1 +welcom 1 +homepagethi 1 +akhil 1 +reddythank 1 +visit 1 +homepag 1 +visitor 1 +number 1 +school 1 +univers 1 +texa 1 +austinm 1 +scienc 1 +third 1 +semest 1 +coursesc 1 +multimedia 1 +harrick 1 +vinc 1 +introduct 1 +mathemat 1 +logic 1 +vladimir 1 +lifschitz 1 +datacommun 1 +network 1 +anitish 1 +barua 1 +architectur 1 +schwetmani 1 +term 1 +project 1 +databas 1 +manag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..c214f84e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,228 @@ +peopl 8 +program 7 +probabl 6 +comput 5 +scienc 4 +risk 4 +would 4 +adopt 4 +make 4 +decis 3 +analog 3 +neuron 3 +process 3 +number 3 +updat 3 +bogon 3 +cogsci 2 +sometim 2 +feel 2 +austin 2 +work 2 +avers 2 +outcom 2 +coin 2 +toss 2 +reject 2 +well 2 +sound 2 +explain 2 +diseas 2 +problem 2 +save 2 +human 2 +brain 2 +wrong 2 +inform 2 +telephon 2 +bogo 2 +bogu 2 +michael 1 +bogomolnymichael 1 +bogomolni 1 +advert 1 +although 1 +pictur 1 +read 1 +articl 1 +current 1 +first 1 +semest 1 +univers 1 +texa 1 +physic 1 +amherst 1 +colleg 1 +research 1 +interestsnot 1 +intend 1 +jenef 1 +husman 1 +final 1 +project 1 +quarter 1 +accept 1 +fair 1 +bet 1 +econom 1 +theori 1 +involv 1 +maxim 1 +util 1 +diminish 1 +return 1 +howev 1 +ask 1 +question 1 +formul 1 +take 1 +prevent 1 +manner 1 +respond 1 +differ 1 +exampl 1 +belov 1 +tverski 1 +kahneman 1 +taken 1 +almost 1 +verbatimfrom 1 +frame 1 +psycholog 1 +choic 1 +imagin 1 +prepar 1 +outbreak 1 +unusu 1 +expect 1 +kill 1 +altern 1 +combat 1 +beenpropos 1 +assum 1 +exact 1 +scientif 1 +estim 1 +consequ 1 +programsar 1 +follow 1 +besav 1 +nobodi 1 +favor 1 +digitalif 1 +made 1 +fire 1 +depend 1 +level 1 +electrochem 1 +charg 1 +built 1 +axon 1 +biolog 1 +foundat 1 +shaki 1 +ahead 1 +scream 1 +hypothesi 1 +transfer 1 +particular 1 +wire 1 +either 1 +high 1 +voltag 1 +interpret 1 +accur 1 +inaccur 1 +simpli 1 +come 1 +answer 1 +mistak 1 +subtract 1 +balanc 1 +checkbook 1 +rememb 1 +invalid 1 +mayb 1 +gave 1 +wasn 1 +real 1 +anoth 1 +stori 1 +nevertheless 1 +hard 1 +press 1 +point 1 +misfir 1 +account 1 +error 1 +correct 1 +lead 1 +incorrect 1 +result 1 +cognit 1 +even 1 +complet 1 +remind 1 +quot 1 +italic 1 +class 1 +append 1 +introduct 1 +cours 1 +graduat 1 +researchcognit 1 +sciencearitifici 1 +intelligencemathemat 1 +logictopolog 1 +ghrist 1 +oper 1 +system 1 +paper 1 +symbol 1 +differenti 1 +puzzl 1 +theorem 1 +prover 1 +contact 1 +email 1 +utexa 1 +better 1 +send 1 +postcard 1 +phone 1 +postal 1 +address 1 +wilshir 1 +parkwai 1 +informationthi 1 +page 1 +written 1 +us 1 +text 1 +editor 1 +last 1 +insert 1 +empti 1 +promis 1 +construct 1 +soon 1 +suppos 1 +list 1 +hidden 1 +talentsdefinit 1 +quantum 1 +bogodynamicsdefinit 1 +sortwhil 1 +free 1 +look 1 +bogos 1 +bogomet 1 +filter 1 +flux 1 +bogotifi 1 +autobogotiphobia 1 +blinkenlight 1 +lasher 1 +pleas 1 +connect 1 +stupid 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..1597461e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,186 @@ +scienc 6 +univers 6 +comput 5 +page 4 +boyer 4 +robert 3 +texa 3 +austin 3 +govern 3 +symbol 3 +mathemat 2 +time 2 +articl 2 +licens 2 +taken 2 +oftexa 2 +fundament 2 +home 1 +boyerhom 1 +stephen 1 +professor 1 +philosophydepart 1 +austinhow 1 +reach 1 +mepap 1 +mail 1 +dept 1 +univ 1 +usaemail 1 +utexa 1 +edufax 1 +physic 1 +locationsclassescurriculum 1 +vitaeperson 1 +dataeducationpublicationshonorsjobsgradu 1 +studentsth 1 +moor 1 +prover 1 +also 1 +knowna 1 +nqthm 1 +photo 1 +recommend 1 +read 1 +project 1 +vote 1 +smart 1 +webth 1 +projectmi 1 +view 1 +undergradu 1 +educ 1 +john 1 +mccarthi 1 +pageth 1 +moffett 1 +build 1 +controversyni 1 +mccune 1 +robbin 1 +algebra 1 +result 1 +andsom 1 +technic 1 +detail 1 +verif 1 +float 1 +point 1 +divis 1 +algorithm 1 +microprocessor 1 +wonder 1 +softwar 1 +polici 1 +permitsth 1 +public 1 +close 1 +zero 1 +administrativeoverhead 1 +short 1 +cours 1 +howthi 1 +work 1 +much 1 +intellectu 1 +properti 1 +thegreat 1 +book 1 +variou 1 +enumer 1 +thereof 1 +confess 1 +acanon 1 +thumper 1 +possibl 1 +end 1 +tenur 1 +universitiesstandard 1 +disclaim 1 +natur 1 +noth 1 +shouldb 1 +repres 1 +offici 1 +posit 1 +part 1 +state 1 +furthermor 1 +steal 1 +joke 1 +peter 1 +deutsch 1 +aweb 1 +own 1 +anind 1 +endors 1 +everyth 1 +formal 1 +method 1 +alwai 1 +riski 1 +peano 1 +first 1 +call 1 +logic 1 +introduc 1 +instanc 1 +mean 1 +habitu 1 +wrote 1 +hislectur 1 +note 1 +teach 1 +militaryacademi 1 +student 1 +incens 1 +hisformalist 1 +approach 1 +rebel 1 +despit 1 +hispromis 1 +pass 1 +fire 1 +subsequ 1 +found 1 +amor 1 +congeni 1 +set 1 +turin 1 +sincomplet 1 +theorem 1 +rudi 1 +rucker 1 +death 1 +verg 1 +extinct 1 +said 1 +harold 1 +kroto 1 +britain 1 +sussex 1 +share 1 +chemistrypr 1 +curl 1 +richard 1 +smallei 1 +rice 1 +inhouston 1 +discoveri 1 +carbon 1 +atom 1 +bound 1 +shape 1 +asocc 1 +ball 1 +scientist 1 +lament 1 +loss 1 +fund 1 +associ 1 +press 1 +decemb 1 +daili 1 +texan 1 +upup 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..af4d29b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,156 @@ +parallel 20 +program 16 +comput 12 +brown 10 +code 8 +environ 7 +languag 6 +visual 5 +high 4 +level 4 +debug 4 +paper 4 +proceed 4 +confer 4 +graphic 3 +hyder 3 +newton 3 +werth 3 +august 3 +process 3 +intern 3 +jame 2 +scienc 2 +professor 2 +physic 2 +engin 2 +texa 2 +fellow 2 +societi 2 +specif 2 +integr 2 +work 2 +gener 2 +structur 2 +model 2 +develop 2 +system 2 +dongarra 2 +moor 2 +ieee 2 +compar 2 +henc 2 +version 2 +base 2 +brownereg 1 +chair 1 +electr 1 +hendrix 1 +collegeph 1 +univers 1 +austinhonor 1 +award 1 +british 1 +american 1 +societyarea 1 +interestparallel 1 +major 1 +focu 1 +sciencewith 1 +applic 1 +area 1 +summari 1 +researchi 1 +tenyear 1 +computation 1 +orient 1 +displai 1 +anabstract 1 +declar 1 +evolv 1 +three 1 +ongo 1 +research 1 +includesmethod 1 +optim 1 +highlevel 1 +abstract 1 +throughdata 1 +partit 1 +data 1 +flow 1 +compositionalapproach 1 +addit 1 +intelligenceprocess 1 +control 1 +fluiddynam 1 +also 1 +design 1 +narrow 1 +domaincompil 1 +includ 1 +logic 1 +basedlanguag 1 +robust 1 +method 1 +intellig 1 +real 1 +timedecis 1 +select 1 +recent 1 +publicationsj 1 +distribut 1 +technolog 1 +spring 1 +volum 1 +number 1 +technic 1 +report 1 +dept 1 +univ 1 +austin 1 +longer 1 +refer 1 +interact 1 +formal 1 +andpract 1 +fourthworkshop 1 +compil 1 +santacruz 1 +california 1 +jain 1 +experiment 1 +studi 1 +theeffect 1 +ofth 1 +siam 1 +mirank 1 +parallelizingcompil 1 +rule 1 +unifi 1 +concurr 1 +kleyn 1 +specifi 1 +graph 1 +softwar 1 +baltimor 1 +april 1 +postscript 1 +file 1 +extend 1 +proc 1 +conf 1 +supercomput 1 +juli 1 +describ 1 +prototyp 1 +implement 1 +notat 1 +chang 1 +idea 1 +remain 1 +good 1 +broad 1 +introduct 1 +brief 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..6ab2d328 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,70 @@ +group 5 +vlsi 3 +interest 3 +austin 2 +ping 2 +depart 2 +thakur 2 +area 2 +fpga 2 +architectur 2 +link 2 +inform 2 +comment 2 +utc 1 +home 1 +page 1 +research 1 +addressdepart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +peopl 1 +supervis 1 +prof 1 +martin 1 +wong 1 +member 1 +chang 1 +chung 1 +chenyao 1 +chen 1 +yung 1 +ming 1 +fang 1 +shashidhar 1 +zhou 1 +researchth 1 +current 1 +wide 1 +rang 1 +broadli 1 +classifi 1 +follow 1 +placement 1 +rout 1 +partit 1 +logic 1 +synthesi 1 +issu 1 +high 1 +perform 1 +abstract 1 +recent 1 +public 1 +groupcan 1 +found 1 +trace 1 +sigda 1 +special 1 +design 1 +autom 1 +ieee 1 +institut 1 +electr 1 +electron 1 +engin 1 +austinclick 1 +mail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..7911addf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,93 @@ +work 3 +distribut 2 +project 2 +beer 2 +humor 2 +utexa 2 +home 2 +page 2 +bill 1 +canfieldhom 1 +businessmi 1 +resum 1 +postscript 1 +spring 1 +give 1 +report 1 +softwar 1 +highli 1 +avail 1 +system 1 +class 1 +slide 1 +talk 1 +effort 1 +mach 1 +implement 1 +flaviu 1 +cristian 1 +algorithm 1 +done 1 +prof 1 +built 1 +guangtian 1 +current 1 +hardwar 1 +verif 1 +ther 1 +divis 1 +ti 1 +research 1 +professor 1 +allen 1 +emerson 1 +pleasuredomest 1 +bliss 1 +depart 1 +photo 1 +wife 1 +carla 1 +newborn 1 +daughter 1 +ruth 1 +clair 1 +parenthood 1 +struck 1 +travel 1 +high 1 +prioriti 1 +somewher 1 +li 1 +enjoy 1 +peel 1 +label 1 +bottl 1 +foreign 1 +land 1 +variou 1 +sourcesth 1 +sofaspher 1 +haiku 1 +olestra 1 +approv 1 +substitut 1 +speak 1 +poetri 1 +interest 1 +women 1 +disinform 1 +dole 1 +canfield 1 +last 1 +updat 1 +april 1 +thank 1 +todd 1 +peter 1 +peterst 1 +mail 1 +mani 1 +link 1 +andth 1 +pictur 1 +cool 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..a01bdec4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,61 @@ +carruth 4 +austin 3 +uniti 3 +mail 2 +utexa 2 +depart 2 +professor 2 +misra 2 +time 2 +home 2 +page 2 +offic 2 +address 2 +phone 2 +carruthpleas 1 +send 1 +question 1 +suggest 1 +introduct 1 +candid 1 +computersci 1 +supervis 1 +jayadev 1 +mydissert 1 +topic 1 +real 1 +member 1 +research 1 +group 1 +extend 1 +theori 1 +order 1 +express 1 +finit 1 +boundson 1 +usual 1 +oper 1 +progress 1 +safeti 1 +alsointerest 1 +function 1 +program 1 +languag 1 +partial 1 +ordersemant 1 +autom 1 +theorem 1 +prove 1 +contact 1 +inform 1 +person 1 +email 1 +comput 1 +scienc 1 +taylor 1 +hall 1 +univers 1 +texa 1 +link 1 +world 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..659dacd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,43 @@ +ping 3 +chung 2 +chen 2 +schedul 2 +utexa 2 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +fiance 1 +meng 1 +tsai 1 +current 1 +intel 1 +summer 1 +intern 1 +work 1 +bufferinsert 1 +problem 1 +syllabu 1 +grade 1 +polici 1 +exam 1 +homework 1 +exercis 1 +offic 1 +hour 1 +locat 1 +new 1 +class 1 +fall 1 +syllabustopicschung 1 +clen 1 +last 1 +updat 1 +idea 1 +improv 1 +page 1 +send 1 +suggest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..7a849eb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,36 @@ +austin 4 +home 2 +deji 2 +anoth 2 +paragraph 2 +offic 2 +page 1 +chen 1 +chenabout 1 +mehello 1 +homepag 1 +student 1 +tongji 1 +univers 1 +shanghai 1 +chinaa 1 +bullet 1 +list 1 +easi 1 +includ 1 +well 1 +first 1 +item 1 +third 1 +forget 1 +break 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +usahom 1 +lake 1 +blvd 1 +usaphon 1 +main 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..ee9af42e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,139 @@ +cliff 4 +comput 3 +univers 2 +austin 2 +write 2 +system 2 +scienc 2 +educ 2 +macintosh 2 +program 2 +symbol 2 +mstk 2 +plai 2 +chaputcliff 1 +chaputth 1 +texa 1 +austindepart 1 +sciencestaylor 1 +hall 1 +robotlab 1 +home 1 +dullchaput 1 +utexa 1 +studi 1 +northwestern 1 +gothimself 1 +emploi 1 +programm 1 +anywai 1 +spent 1 +year 1 +anemail 1 +client 1 +portabl 1 +visual 1 +object 1 +librari 1 +odesta 1 +corpor 1 +left 1 +institut 1 +thelearn 1 +hewrot 1 +trane 1 +softwar 1 +common 1 +lisp 1 +thenimpl 1 +simul 1 +environ 1 +high 1 +school 1 +studentscal 1 +gamesproject 1 +graduat 1 +student 1 +hang 1 +robot 1 +labannoi 1 +peopl 1 +hair 1 +brain 1 +scheme 1 +mean 1 +represent 1 +artifici 1 +life 1 +sleep 1 +dream 1 +read 1 +fiction 1 +listen 1 +farka 1 +tour 1 +medeski 1 +martin 1 +wood 1 +watch 1 +rerun 1 +korg 1 +ride 1 +bike 1 +turnon 1 +includ 1 +breakfast 1 +version 1 +raspi 1 +voic 1 +starfleet 1 +captain 1 +turnoff 1 +republican 1 +microsoft 1 +hangov 1 +fave 1 +site 1 +current 1 +eventsdaili 1 +new 1 +reutersintellicast 1 +weatheraustin 1 +txchicago 1 +ilperiodicalssucksalonmirski 1 +worst 1 +webth 1 +onionmacweekmacuserreferencehypertext 1 +webster 1 +interfaceyahooalta 1 +vistacardiff 1 +movi 1 +databaselyco 1 +road 1 +mapalt 1 +culturemacintosh 1 +dataappl 1 +computercyberdogquicktimequickdraw 1 +dappl 1 +supportmacintouchmacintosh 1 +resourcecyberdog 1 +poundinfo 1 +archiv 1 +rootcool 1 +weird 1 +stufffringewareth 1 +actlabpbsnprnow 1 +catch 1 +phrase 1 +catalogpap 1 +softwareth 1 +rsumsymbol 1 +emerg 1 +groundingrobotmap 1 +peopledav 1 +falooncharl 1 +lewisjeff 1 +lindjeff 1 +sherwoodbrian 1 +slatorsandi 1 +stone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..cae61746 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,104 @@ +page 5 +chuanjun 2 +wang 2 +place 2 +china 2 +check 2 +texa 2 +austin 2 +view 2 +like 2 +find 2 +search 2 +diamond 2 +homepag 1 +welcom 1 +pictur 1 +captur 1 +gloriou 1 +moment 1 +came 1 +earth 1 +stun 1 +detail 1 +origin 1 +come 1 +orient 1 +countri 1 +call 1 +know 1 +hometown 1 +hubei 1 +provinc 1 +graduat 1 +student 1 +tsinghua 1 +univ 1 +decid 1 +time 1 +chang 1 +better 1 +thought 1 +end 1 +beautifulunivers 1 +current 1 +work 1 +comput 1 +scienc 1 +take 1 +break 1 +read 1 +enjoi 1 +tower 1 +opinion 1 +nifti 1 +thing 1 +televis 1 +surf 1 +mind 1 +numb 1 +faceless 1 +howev 1 +brilliant 1 +us 1 +materi 1 +inform 1 +miner 1 +among 1 +million 1 +rock 1 +unemploi 1 +internet 1 +philosoph 1 +well 1 +person 1 +look 1 +real 1 +unix 1 +program 1 +magazin 1 +jump 1 +dobb 1 +journal 1 +word 1 +need 1 +fresh 1 +world 1 +hard 1 +fine 1 +graphic 1 +design 1 +unusu 1 +prose 1 +cours 1 +list 1 +would 1 +complet 1 +without 1 +link 1 +pope 1 +porsch 1 +write 1 +return 1 +depart 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..fd446bc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,103 @@ +comput 3 +scienc 3 +inform 3 +taiwan 2 +univers 2 +texa 2 +austin 2 +research 2 +process 2 +system 2 +tag 2 +user 2 +welcom 1 +homepag 1 +chin 1 +tser 1 +huang 1 +last 1 +updat 1 +decemb 1 +educ 1 +june 1 +degre 1 +dept 1 +engin 1 +nation 1 +taipei 1 +current 1 +master 1 +student 1 +depart 1 +interest 1 +natur 1 +languag 1 +human 1 +interfac 1 +network 1 +distribut 1 +systemsexperiencei 1 +ever 1 +work 1 +chines 1 +knowledg 1 +group 1 +instituteof 1 +academia 1 +sinica 1 +assist 1 +major 1 +worki 1 +design 1 +capabl 1 +word 1 +segment 1 +categori 1 +usinghidden 1 +markov 1 +model 1 +improv 1 +friendli 1 +tool 1 +allow 1 +toexecut 1 +line 1 +proof 1 +read 1 +result 1 +automat 1 +automatictag 1 +reach 1 +accuraci 1 +improvedbecaus 1 +continu 1 +expans 1 +train 1 +data 1 +person 1 +interestsmovi 1 +book 1 +music 1 +literatur 1 +semiolog 1 +basebal 1 +basketbal 1 +tabl 1 +tenni 1 +pinbal 1 +favorit 1 +siteschina 1 +timesminsheng 1 +dailyth 1 +york 1 +timesusa 1 +todayth 1 +economistth 1 +atlant 1 +monthlymak 1 +contact 1 +chuang 1 +utexa 1 +edufing 1 +meyou 1 +visitor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..9e5fc01d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,23 @@ +page 2 +cilkcilkcilk 1 +pronounc 1 +silk 1 +parallel 1 +multithread 1 +base 1 +languageand 1 +runtim 1 +system 1 +find 1 +time 1 +us 1 +inform 1 +inthi 1 +check 1 +thecilk 1 +last 1 +modifi 1 +august 1 +robert 1 +blumoferdb 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..71aa89ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,33 @@ +chung 2 +keung 2 +comput 2 +poon 1 +home 1 +page 1 +poondepart 1 +sciencesunivers 1 +texa 1 +austinaustin 1 +offic 1 +ckpoon 1 +utexa 1 +edumi 1 +plan 1 +hungri 1 +fish 1 +askvinc 1 +gogan 1 +pleas 1 +thesi 1 +complex 1 +connect 1 +problemsom 1 +interest 1 +site 1 +theoret 1 +scienc 1 +hong 1 +kong 1 +harmonica 1 +high 1 +school 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..8a2aeea5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,62 @@ +page 4 +comput 4 +austin 4 +secur 4 +home 3 +project 3 +student 2 +scienc 2 +research 2 +link 2 +utexa 2 +offic 2 +chung 1 +wongchung 1 +wonglast 1 +modifi 1 +graduat 1 +thedepart 1 +univers 1 +texa 1 +member 1 +thenetwork 1 +labwhich 1 +head 1 +byprof 1 +simon 1 +relat 1 +java 1 +nist 1 +divis 1 +resourc 1 +clearinghous 1 +role 1 +base 1 +access 1 +control 1 +rbac 1 +prof 1 +rivest 1 +cryptographi 1 +contact 1 +meemail 1 +ckwong 1 +edupost 1 +usavoic 1 +dept 1 +hyde 1 +park 1 +baptist 1 +church 1 +chines 1 +mission 1 +hong 1 +kong 1 +associ 1 +linux 1 +netbsd 1 +freebsd 1 +openbsd 1 +send 1 +email 1 +tockwong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..c4ea7222 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,75 @@ +simul 4 +techniqu 3 +address 3 +clanci 2 +qualit 2 +knowledg 2 +comput 2 +behavior 2 +system 2 +larg 2 +abstract 2 +aggreg 2 +interest 2 +develop 2 +automat 2 +austin 2 +clancyresearch 1 +reason 1 +us 1 +incomplet 1 +descriptionof 1 +possibl 1 +dynam 1 +complex 1 +containinga 1 +number 1 +variabl 1 +constraint 1 +frequentlyi 1 +intract 1 +result 1 +incomprehens 1 +descript 1 +requir 1 +simulationto 1 +elimin 1 +irrelev 1 +detail 1 +focu 1 +distinctionsof 1 +whichaddress 1 +problem 1 +particular 1 +abstractiontechniqu 1 +appli 1 +thiswil 1 +facilit 1 +integr 1 +withlarg 1 +scale 1 +base 1 +model 1 +build 1 +followingtechniqu 1 +issu 1 +vita 1 +list 1 +network 1 +retriev 1 +real 1 +paper 1 +contact 1 +informationemail 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +depart 1 +scienc 1 +univers 1 +texa 1 +finger 1 +inform 1 +hotlist 1 +netscap 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..55c659f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,11 @@ +page 1 +construct 1 +jimbo 1 +click 1 +three 1 +four 1 +five 1 +seven 1 +eight 1 +nine 1 +eleven 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..8ab6993e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,38 @@ +austin 6 +chri 3 +home 2 +address 2 +depart 2 +comput 2 +scienc 2 +chines 2 +chuwelcom 1 +page 1 +myselfmi 1 +photo 1 +student 1 +phone 1 +number 1 +call 1 +offic 1 +mail 1 +univers 1 +texa 1 +taylor 1 +hall 1 +interest 1 +link 1 +campu 1 +christian 1 +fellowship 1 +church 1 +hong 1 +kong 1 +china 1 +author 1 +chuemail 1 +cnchu 1 +utexa 1 +edulast 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..69a85fcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,179 @@ +code 23 +program 8 +parallel 7 +mail 6 +user 5 +version 5 +system 4 +list 4 +emeri 4 +visual 3 +click 3 +tutori 3 +releas 3 +avail 3 +softwar 3 +interfac 3 +line 3 +document 3 +manual 3 +member 3 +scienc 2 +produc 2 +architectur 2 +model 2 +screen 2 +shot 2 +base 2 +announc 2 +download 2 +featur 2 +provid 2 +previou 2 +includ 2 +articl 2 +comput 2 +us 2 +name 2 +last 2 +address 2 +relat 2 +postscript 2 +public 2 +contact 2 +send 2 +utexa 2 +austin 2 +research 2 +john 2 +systemmast 1 +lawless 1 +codeless 1 +myriad 1 +preced 1 +wilder 1 +singl 1 +instanc 1 +alfr 1 +lord 1 +tennysoncod 1 +allow 1 +compos 1 +sequentialprogram 1 +direct 1 +graph 1 +wheredata 1 +flow 1 +arc 1 +connect 1 +node 1 +repres 1 +sequenti 1 +thesequenti 1 +written 1 +languag 1 +parallelprogram 1 +varieti 1 +independ 1 +network 1 +machin 1 +well 1 +sequent 1 +symmetri 1 +newest 1 +support 1 +crai 1 +smp 1 +free 1 +major 1 +revis 1 +sophist 1 +mani 1 +improv 1 +make 1 +easier 1 +pleasant 1 +like 1 +macdraw 1 +multipl 1 +window 1 +subgraph 1 +edit 1 +hierarchi 1 +browser 1 +hpcwire 1 +journal 1 +high 1 +perform 1 +recent 1 +publish 1 +entitl 1 +come 1 +kind 1 +enough 1 +reproduc 1 +introduct 1 +current 1 +prospect 1 +notifi 1 +backend 1 +join 1 +fill 1 +form 1 +also 1 +ad 1 +first 1 +xcodelib 1 +compon 1 +librari 1 +publicationscod 1 +construct 1 +directori 1 +compress 1 +file 1 +made 1 +lieu 1 +prepar 1 +stage 1 +despit 1 +chang 1 +still 1 +quit 1 +refer 1 +link 1 +informationfor 1 +specif 1 +comment 1 +regard 1 +berger 1 +snail 1 +group 1 +depart 1 +univers 1 +texa 1 +groupgroup 1 +leaderprofessor 1 +jame 1 +brown 1 +affili 1 +faculti 1 +werth 1 +project 1 +manag 1 +bergerstud 1 +dwip 1 +banerje 1 +incorpor 1 +dynam 1 +data 1 +partit 1 +ajita 1 +develop 1 +constraint 1 +automat 1 +alumni 1 +overview 1 +home 1 +page 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..e0c1d4ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,35 @@ +search 5 +austin 3 +inform 2 +address 2 +mail 2 +correl 2 +utexa 2 +texa 2 +home 2 +steve 1 +correlstev 1 +correlresearchph 1 +student 1 +work 1 +multifunct 1 +knowledg 1 +base 1 +group 1 +addit 1 +current 1 +construct 1 +hotlist 1 +site 1 +page 1 +email 1 +public 1 +tech 1 +reportcontact 1 +offic 1 +comput 1 +scienc 1 +depart 1 +univers 1 +taylor 1 +hall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..cda3e0fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,75 @@ +time 4 +system 4 +real 3 +program 3 +languag 3 +group 3 +robot 2 +esterel 2 +check 2 +home 2 +page 2 +driver 2 +offic 2 +carlo 1 +pucholcarlo 1 +pucholresearch 1 +interest 1 +respons 1 +reactiv 1 +gener 1 +formal 1 +method 1 +specif 1 +implement 1 +distribut 1 +control 1 +synchron 1 +mawl 1 +applic 1 +develop 1 +utc 1 +publicationsi 1 +list 1 +public 1 +avail 1 +forbrows 1 +softwareth 1 +tempest 1 +toolset 1 +packag 1 +verifyingsafeti 1 +properti 1 +written 1 +wrote 1 +half 1 +linux 1 +devic 1 +thequantavisionfram 1 +grabber 1 +part 1 +thejoystickdevic 1 +contact 1 +informationoffic 1 +dreal 1 +taylor 1 +hall 1 +univers 1 +texa 1 +austindepart 1 +comput 1 +sciencesaustin 1 +utexa 1 +austin 1 +lot 1 +phun 1 +interestsmemb 1 +theth 1 +latest 1 +interesti 1 +origin 1 +fromgandia 1 +inth 1 +provinc 1 +valencia 1 +spain 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..a2cdef1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,79 @@ +time 8 +real 7 +modechart 5 +system 3 +properti 3 +wang 3 +research 2 +group 2 +work 2 +foundat 2 +utc 1 +groupth 1 +head 1 +byprof 1 +aloysiu 1 +past 1 +year 1 +toward 1 +lai 1 +groundworkfor 1 +establish 1 +firm 1 +theoret 1 +systemsand 1 +also 1 +build 1 +design 1 +tool 1 +base 1 +canb 1 +categor 1 +three 1 +area 1 +follow 1 +specif 1 +model 1 +precis 1 +formul 1 +analysi 1 +verif 1 +reason 1 +synthesi 1 +enforc 1 +stringent 1 +constraint 1 +project 1 +logic 1 +toolset 1 +editor 1 +verifi 1 +simul 1 +compil 1 +timetool 1 +scenario 1 +languagepublicationsabstract 1 +ofth 1 +paper 1 +availableonlin 1 +postscript 1 +current 1 +member 1 +deji 1 +chen 1 +carlo 1 +puchol 1 +doug 1 +stuart 1 +chung 1 +tsou 1 +guangtian 1 +yangalumni 1 +paul 1 +clement 1 +chih 1 +farn 1 +supoj 1 +suthandavibul 1 +farnam 1 +jahanian 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..9faaa1fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,118 @@ +comput 8 +page 7 +scienc 6 +learn 5 +research 5 +intellig 4 +texa 3 +artifici 3 +system 3 +inform 3 +univers 3 +austin 3 +resourc 3 +home 3 +women 2 +interest 2 +machin 2 +acquisit 2 +lexic 2 +also 2 +robot 2 +address 2 +internet 2 +collect 2 +associ 2 +cindi 1 +thompsoncindi 1 +thompsonmachin 1 +groupunivers 1 +austini 1 +particip 1 +candlelight 1 +vigil 1 +across 1 +internetto 1 +help 1 +increas 1 +awar 1 +violenc 1 +researchmi 1 +current 1 +primarilyin 1 +area 1 +specif 1 +interestedin 1 +natur 1 +languag 1 +produc 1 +deep 1 +semanticrepresent 1 +input 1 +sentenc 1 +would 1 +us 1 +mani 1 +task 1 +propos 1 +corpu 1 +base 1 +wrote 1 +master 1 +thesi 1 +rule 1 +suitabl 1 +diagnost 1 +expert 1 +mobil 1 +exhibit 1 +atrobofest 1 +spring 1 +semest 1 +build 1 +agent 1 +finger 1 +pictur 1 +vita 1 +list 1 +public 1 +group 1 +educ 1 +north 1 +carolina 1 +state 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +cthomp 1 +utexa 1 +postal 1 +depart 1 +hotlist 1 +start 1 +point 1 +explor 1 +misc 1 +consortium 1 +bibliographi 1 +project 1 +repositori 1 +knowledg 1 +laboratori 1 +georgia 1 +tech 1 +journal 1 +linguist 1 +folk 1 +cognit 1 +miscellan 1 +stuff 1 +wolv 1 +truth 1 +evalu 1 +counsel 1 +expand 1 +horizon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..577d4691 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,18 @@ +home 2 +page 2 +xingshan 2 +welcom 1 +browser 1 +doesn 1 +seem 1 +support 1 +frame 1 +want 1 +downloadth 1 +latest 1 +netscap 1 +school 1 +work 1 +famili 1 +friend 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..beb2b019 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,71 @@ +comput 4 +scienc 3 +univers 3 +mike 2 +dahlin 2 +texa 2 +berkelei 2 +oper 2 +trend 2 +price 2 +page 2 +taylor 2 +hall 2 +dahlingener 1 +informationassist 1 +professor 1 +austin 1 +electr 1 +engin 1 +rice 1 +teachingfal 1 +systemsspr 1 +advanc 1 +architectureeveryon 1 +read 1 +technic 1 +classic 1 +researchxf 1 +serverless 1 +network 1 +file 1 +systemweb 1 +systemsth 1 +experiment 1 +softwar 1 +system 1 +less 1 +public 1 +list 1 +informationtechnolog 1 +pagethi 1 +pagesummar 1 +recent 1 +technolog 1 +interest 1 +operatingsystem 1 +research 1 +compter 1 +architect 1 +includinghistor 1 +data 1 +gather 1 +capac 1 +disk 1 +memori 1 +person 1 +informationif 1 +seem 1 +bore 1 +probabl 1 +want 1 +work 1 +internet 1 +root 1 +link 1 +world 1 +email 1 +utexa 1 +offic 1 +postal 1 +austinaustin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..2746a6d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,50 @@ +damani 2 +distribut 2 +home 1 +page 1 +howdi 1 +pagal 1 +dekho 1 +student 1 +busi 1 +read 1 +lazi 1 +updateth 1 +homepag 1 +regularli 1 +suffic 1 +anyth 1 +crazi 1 +appeal 1 +phrase 1 +us 1 +probabl 1 +meant 1 +research 1 +activ 1 +work 1 +parallel 1 +sytem 1 +laboratori 1 +vijai 1 +garg 1 +interest 1 +system 1 +network 1 +public 1 +follow 1 +time 1 +honor 1 +tradit 1 +feel 1 +oblig 1 +providesometh 1 +servic 1 +contact 1 +mehom 1 +guadulp 1 +austin 1 +offic 1 +austinphon 1 +dept 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..1c3dfd9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,55 @@ +austin 5 +texa 3 +research 3 +address 3 +dane 2 +depart 2 +comput 2 +scienc 2 +univers 2 +system 2 +pinbal 2 +view 2 +marshalldan 1 +marshal 1 +student 1 +multiresolut 1 +render 1 +autom 1 +model 1 +tree 1 +real 1 +time 1 +global 1 +illumin 1 +electromechan 1 +machin 1 +mainten 1 +thelogist 1 +equat 1 +escap 1 +attractor 1 +complex 1 +plane 1 +main 1 +area 1 +make 1 +nice 1 +imag 1 +contact 1 +inform 1 +work 1 +appli 1 +laboratori 1 +burnet 1 +phone 1 +email 1 +utexa 1 +school 1 +unrel 1 +link 1 +pastur 1 +jupit 1 +probe 1 +happi 1 +station 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..393abcd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,67 @@ +scienc 3 +stuart 2 +page 2 +sure 2 +well 2 +us 2 +link 2 +provid 2 +save 2 +doug 1 +welcom 1 +pagedoug 1 +home 1 +pagewelcom 1 +construct 1 +bear 1 +number 1 +oflinksto 1 +interest 1 +place 1 +inform 1 +aboutsport 1 +fiction 1 +booksin 1 +gener 1 +fewjok 1 +testof 1 +latexhtml 1 +aweath 1 +mapandcondit 1 +austinandnew 1 +orlean 1 +guess 1 +sort 1 +person 1 +archiv 1 +amgraci 1 +share 1 +perhap 1 +process 1 +index 1 +puttingit 1 +simpl 1 +keep 1 +webbrows 1 +databas 1 +browser 1 +know 1 +thisi 1 +good 1 +idea 1 +go 1 +someth 1 +justa 1 +easi 1 +access 1 +manner 1 +stuffmom 1 +click 1 +comput 1 +calendarlink 1 +video 1 +fictionbooksjokessportsfoodvideout 1 +libraryresumelast 1 +updat 1 +dasdastuart 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..9ee67009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,4 @@ +doug 1 +swhich 1 +annoi 1 +thisorthi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..41006a49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,57 @@ +austin 6 +comput 4 +scienc 4 +univers 4 +texa 4 +home 4 +page 4 +neural 3 +network 3 +genet 3 +algorithm 3 +depart 2 +research 2 +universityof 2 +address 2 +link 2 +utc 2 +dian 1 +lawdian 1 +student 1 +intereststh 1 +symbol 1 +ground 1 +problemnavig 1 +robot 1 +agent 1 +us 1 +evolv 1 +theus 1 +educ 1 +spanish 1 +literatur 1 +washingtonst 1 +fine 1 +art 1 +washington 1 +stateunivers 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +dianelaw 1 +utexa 1 +postal 1 +local 1 +homepag 1 +gann 1 +illig 1 +santa 1 +institut 1 +digest 1 +archiv 1 +michigan 1 +group 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..992754d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,47 @@ +comput 4 +student 3 +depart 3 +dionisi 2 +scienc 2 +engin 2 +univers 2 +patra 2 +greec 2 +austin 2 +home 1 +page 1 +papadopoulosdionisi 1 +papadopoulosabout 1 +graduat 1 +undergradu 1 +informat 1 +also 1 +work 1 +technolog 1 +institut 1 +member 1 +softwar 1 +applic 1 +reasearch 1 +unit 1 +contact 1 +medionisi 1 +papadopoulo 1 +texa 1 +mail 1 +utexa 1 +link 1 +mine 1 +monitor 1 +databas 1 +homework 1 +panhellen 1 +associationpanathinaiko 1 +athlet 1 +clubgreek 1 +newshellen 1 +resourc 1 +networkeveryth 1 +alwai 1 +want 1 +know 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..5ec068b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,119 @@ +random 8 +version 7 +stoc 6 +preliminari 6 +comput 5 +appear 5 +texa 3 +austin 3 +scienc 2 +univers 2 +inform 2 +address 2 +complex 2 +expand 2 +applic 2 +graph 2 +report 2 +public 2 +construct 2 +structur 2 +algorithm 2 +simul 2 +weak 2 +sourc 2 +foc 2 +revis 2 +bound 2 +combinatorica 2 +sicomp 2 +complet 2 +david 1 +zuckermandavid 1 +zuckermanassist 1 +professor 1 +contact 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +postal 1 +depart 1 +finger 1 +cours 1 +fall 1 +cryptographyresearch 1 +intereststh 1 +role 1 +theori 1 +walk 1 +cryptographi 1 +paragraph 1 +descript 1 +well 1 +myprofil 1 +annual 1 +also 1 +look 1 +recent 1 +asymptot 1 +good 1 +code 1 +correct 1 +insert 1 +delet 1 +transposit 1 +soda 1 +optim 1 +sampl 1 +extractor 1 +leader 1 +elect 1 +multipl 1 +cover 1 +time 1 +linear 1 +space 1 +jcss 1 +call 1 +determinist 1 +logspac 1 +us 1 +gener 1 +algorithmica 1 +tight 1 +analys 1 +local 1 +load 1 +balanc 1 +derandom 1 +product 1 +beat 1 +eigenvalu 1 +explicit 1 +constructionand 1 +utc 1 +technic 1 +effici 1 +small 1 +hit 1 +setfor 1 +combinatori 1 +rectangl 1 +high 1 +dimens 1 +lower 1 +mutual 1 +exclus 1 +unapproxim 1 +problem 1 +list 1 +abstract 1 +visit 1 +page 1 +sinc 1 +april 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..392c72a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,70 @@ +multimedia 7 +laboratori 5 +research 5 +distribut 4 +comput 3 +univers 3 +texa 3 +austin 3 +multimediacomput 2 +scienc 2 +sponsor 2 +foundat 2 +paper 2 +home 1 +page 1 +welcom 1 +main 1 +object 1 +investig 1 +wide 1 +rangeof 1 +issu 1 +area 1 +system 1 +currentresearch 1 +focus 1 +design 1 +storag 1 +server 1 +network 1 +transport 1 +protocol 1 +digit 1 +audio 1 +video 1 +andmultiresolut 1 +databas 1 +dmcl 1 +part 1 +departmentof 1 +work 1 +carri 1 +variou 1 +industri 1 +federalinstitut 1 +includ 1 +intel 1 +nation 1 +nasa 1 +microsoft 1 +mitsubishi 1 +electr 1 +merl 1 +microsystemsinc 1 +tabl 1 +content 1 +agenda 1 +relev 1 +technic 1 +report 1 +list 1 +member 1 +call 1 +would 1 +like 1 +hear 1 +send 1 +yourcom 1 +suggest 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..8e31f681 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,85 @@ +domain 4 +softwar 4 +research 4 +gener 3 +extens 3 +languag 3 +program 3 +student 2 +specif 2 +design 2 +current 2 +build 2 +jakarta 2 +project 2 +univers 2 +texa 2 +offic 2 +address 2 +austin 2 +batorydon 1 +batorysoftwar 1 +improv 1 +programm 1 +product 1 +reduc 1 +mainten 1 +cost 1 +enhanc 1 +applic 1 +perform 1 +investig 1 +wai 1 +realiz 1 +practic 1 +compon 1 +base 1 +methodolog 1 +technolog 1 +larg 1 +scale 1 +system 1 +synthesi 1 +span 1 +topic 1 +architectur 1 +pattern 1 +subject 1 +model 1 +parameter 1 +object 1 +orient 1 +framework 1 +interest 1 +databas 1 +manag 1 +data 1 +structur 1 +avion 1 +support 1 +goal 1 +preprocessor 1 +java 1 +would 1 +encapsul 1 +pluggabl 1 +fund 1 +darpa 1 +microsoft 1 +appli 1 +laboratori 1 +schlumberg 1 +public 1 +contact 1 +inform 1 +taylor 1 +hall 1 +email 1 +batori 1 +utexa 1 +phone 1 +number 1 +postal 1 +depart 1 +comput 1 +scienc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..513439a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,41 @@ +dwip 3 +austin 3 +info 2 +parallel 2 +usavoic 2 +offic 2 +home 1 +page 1 +banerje 1 +photograph 1 +banerjeeabout 1 +methi 1 +work 1 +code 1 +programminggroup 1 +methodolog 1 +includ 1 +data 1 +partit 1 +graphicalparallel 1 +program 1 +system 1 +paper 1 +present 1 +theintern 1 +process 1 +symposium 1 +list 1 +favorit 1 +site 1 +insert 1 +know 1 +contact 1 +departmentpost 1 +comput 1 +scienc 1 +main 1 +homepost 1 +enfield 1 +road 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..e4e324f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,50 @@ +anoth 3 +emilio 2 +camahort 2 +gurrea 2 +promis 2 +home 2 +page 2 +paragraph 2 +austin 2 +offic 2 +set 1 +decent 1 +summer 1 +mmmmm 1 +multipl 1 +complaint 1 +meet 1 +previou 1 +deadlin 1 +come 1 +excus 1 +know 1 +siggraph 1 +paper 1 +finish 1 +januari 1 +thing 1 +think 1 +bout 1 +right 1 +make 1 +time 1 +els 1 +lose 1 +credibl 1 +left 1 +first 1 +item 1 +third 1 +forget 1 +break 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +taylor 1 +usavoic 1 +main 1 +ecamahor 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..062b8847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,32 @@ +posnak 3 +austin 2 +system 2 +research 2 +view 2 +graduat 1 +student 1 +comput 1 +scienc 1 +univers 1 +texa 1 +interest 1 +network 1 +oper 1 +support 1 +multimedia 1 +work 1 +distribut 1 +multimediacomput 1 +laboratori 1 +head 1 +harrick 1 +supervis 1 +greg 1 +lavend 1 +isod 1 +consortium 1 +base 1 +summari 1 +public 1 +utexa 1 +eduphon 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..fef359cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,73 @@ +emerson 7 +comput 6 +tempor 4 +scienc 3 +logic 3 +real 3 +time 3 +allen 2 +texa 2 +austin 2 +automata 2 +srinivasan 2 +journal 2 +calculu 2 +emersonbruton 1 +centenni 1 +professor 1 +depart 1 +taylor 1 +hall 1 +univers 1 +mail 1 +utexa 1 +phone 1 +direct 1 +secretari 1 +area 1 +research 1 +interest 1 +formal 1 +method 1 +aid 1 +verif 1 +infinit 1 +object 1 +concurr 1 +distribut 1 +systemsselect 1 +recent 1 +publications 1 +sistla 1 +quantit 1 +reason 1 +system 1 +sadler 1 +effici 1 +satisfi 1 +theori 1 +practic 1 +bakker 1 +york 1 +springer 1 +verlag 1 +lectur 1 +note 1 +jutla 1 +tree 1 +determinaci 1 +annual 1 +ieee 1 +symposium 1 +foundat 1 +foc 1 +juan 1 +modal 1 +handbook 1 +theoret 1 +leeuwen 1 +elsevi 1 +press 1 +amsterdam 1 +cambridg 1 +mass 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..9b9c1b49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,103 @@ +program 7 +code 5 +emeri 4 +work 4 +parallel 4 +berger 3 +home 3 +mail 3 +austin 3 +utexa 3 +system 3 +group 3 +page 3 +function 3 +info 2 +comput 2 +scienc 2 +analyst 2 +groupi 2 +visual 2 +inform 2 +ticam 2 +composit 2 +materi 2 +haskel 2 +doug 2 +evangelist 2 +pageemeri 1 +person 1 +contact 1 +address 1 +dept 1 +taylor 1 +hall 1 +univers 1 +texa 1 +phone 1 +research 1 +also 1 +affili 1 +project 1 +click 1 +name 1 +tool 1 +search 1 +lyco 1 +databas 1 +file 1 +randomli 1 +select 1 +mirror 1 +near 1 +view 1 +unix 1 +user 1 +academ 1 +uttr 1 +abstract 1 +languag 1 +add 1 +object 1 +orient 1 +us 1 +concept 1 +known 1 +type 1 +class 1 +pure 1 +framework 1 +paper 1 +describ 1 +extens 1 +analyz 1 +accomplish 1 +well 1 +problem 1 +compress 1 +postscript 1 +html 1 +othermi 1 +youngest 1 +brother 1 +aspir 1 +artist 1 +graphic 1 +handiwork 1 +linksth 1 +systemtexbook 1 +textbook 1 +exchangegrac 1 +graduat 1 +repres 1 +associ 1 +last 1 +updat 1 +octob 1 +believ 1 +macintosh 1 +check 1 +http 1 +macaddict 1 +join 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..9ba8af80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,108 @@ +emma 4 +univers 4 +austin 4 +page 3 +comput 3 +scienc 3 +china 3 +home 2 +depart 2 +texa 2 +zhongshan 2 +librari 2 +wuabout 1 +myselfhi 1 +welcom 1 +chines 1 +girl 1 +come 1 +august 1 +studi 1 +interest 1 +immedi 1 +degre 1 +becam 1 +market 1 +repres 1 +inibm 1 +compani 1 +south 1 +branch 1 +try 1 +deliv 1 +solut 1 +small 1 +planet 1 +costom 1 +telecommun 1 +media 1 +industri 1 +would 1 +surpris 1 +find 1 +manyalumni 1 +enter 1 +alumni 1 +club 1 +nice 1 +thing 1 +attend 1 +graduat 1 +school 1 +lot 1 +intern 1 +opportun 1 +engin 1 +student 1 +semest 1 +work 1 +part 1 +time 1 +programm 1 +nation 1 +instrumentsinc 1 +cours 1 +schedul 1 +spring 1 +distribut 1 +computingmanag 1 +informationautomat 1 +program 1 +tool 1 +baseyahoogalaxi 1 +onlin 1 +universityyellow 1 +mini 1 +introduct 1 +us 1 +fortran 1 +tutori 1 +infoleisur 1 +timenewspagepeopl 1 +dailyartstim 1 +magazinechines 1 +magazinepc 1 +magazinec 1 +visit 1 +orlean 1 +houston 1 +antoniosan 1 +franciscomarina 1 +peac 1 +citysan 1 +jose 1 +capit 1 +silicon 1 +vallei 1 +love 1 +francisco 1 +contact 1 +pointemail 1 +emmawu 1 +utexa 1 +eduphon 1 +mail 1 +last 1 +date 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..2fdf3664 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,30 @@ +page 2 +emilio 2 +first 2 +anoth 2 +paragraph 2 +austin 2 +offic 2 +home 1 +remolinaemilio 1 +remolinaabout 1 +mehi 1 +bullet 1 +list 1 +easi 1 +includ 1 +well 1 +item 1 +third 1 +forget 1 +break 1 +curriculum 1 +vita 1 +contact 1 +mepost 1 +comput 1 +scienc 1 +usavoic 1 +main 1 +eremolin 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..589fe817 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,26 @@ +levent 2 +home 2 +page 2 +sayfasi 1 +welcom 1 +erkok 1 +graduat 1 +student 1 +depart 1 +comput 1 +sciencesat 1 +universityof 1 +texa 1 +austin 1 +former 1 +locat 1 +inturkei 1 +person 1 +inform 1 +reach 1 +http 1 +ceng 1 +metu 1 +erkokto 1 +find 1 +thank 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..ffcac5b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,39 @@ +esra 6 +erdem 4 +comput 4 +scienc 4 +univers 3 +austin 3 +texa 2 +depart 2 +interest 2 +monoton 2 +reason 2 +utexa 2 +homepag 1 +student 1 +thedepart 1 +educ 1 +engin 1 +andinform 1 +bilkent 1 +turkei 1 +area 1 +machin 1 +learninginduct 1 +logic 1 +program 1 +topic 1 +cognit 1 +sciencelearningreason 1 +children 1 +theori 1 +mind 1 +commonsens 1 +reasoningknowledg 1 +representationemotionsphilosophi 1 +mindcontact 1 +inform 1 +postal 1 +voic 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..01c2bdc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,69 @@ +carl 2 +student 2 +comput 2 +scienc 2 +univers 2 +rice 2 +austin 2 +offic 2 +home 1 +pagestephen 1 +carlpardon 1 +dust 1 +current 1 +work 1 +toward 1 +master 1 +art 1 +degre 1 +depart 1 +texa 1 +thesi 1 +describ 1 +system 1 +perform 1 +syntact 1 +extens 1 +scheme 1 +program 1 +languag 1 +wasn 1 +alwai 1 +life 1 +myresum 1 +believ 1 +item 1 +person 1 +interest 1 +planmi 1 +resum 1 +research 1 +interestsa 1 +psuedo 1 +random 1 +collect 1 +linksth 1 +household 1 +daili 1 +dose 1 +thing 1 +world 1 +wide 1 +snow 1 +pike 1 +peak 1 +houston 1 +chronicl 1 +interact 1 +sport 1 +worth 1 +athlet 1 +march 1 +bandget 1 +touchpost 1 +usavoic 1 +main 1 +know 1 +esteban 1 +utexa 1 +edureturn 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..a25f7f95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,63 @@ +univers 4 +texa 3 +inform 3 +comput 3 +scienc 3 +austin 3 +learn 2 +research 2 +improv 2 +us 2 +address 2 +estlin 2 +utexa 2 +tara 1 +estlintara 1 +estlinmachin 1 +groupth 1 +austinresearchcontrol 1 +knowledg 1 +perform 1 +problem 1 +solver 1 +byguid 1 +effici 1 +accur 1 +solut 1 +researchinvolv 1 +combin 1 +analyt 1 +induct 1 +machinelearn 1 +techniqu 1 +acquir 1 +control 1 +amparticularli 1 +interest 1 +method 1 +theperform 1 +plan 1 +schedul 1 +system 1 +includ 1 +detail 1 +descript 1 +myresearch 1 +vita 1 +list 1 +public 1 +also 1 +check 1 +machin 1 +group 1 +page 1 +educ 1 +tulan 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +postal 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..59c9e24f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,85 @@ +francoi 5 +barbanson 3 +research 3 +interest 3 +central 3 +market 3 +page 2 +utc 2 +austin 2 +todai 2 +tank 2 +polic 2 +class 2 +utexa 2 +home 1 +versionhom 1 +versionthi 1 +locat 1 +directori 1 +spool 1 +user 1 +francoisabout 1 +mecurr 1 +black 1 +forest 1 +cake 1 +genuin 1 +find 1 +real 1 +pastri 1 +fruit 1 +mouss 1 +pack 1 +groceri 1 +well 1 +swim 1 +forthcom 1 +trip 1 +shed 1 +lighton 1 +issu 1 +stop 1 +shop 1 +food 1 +women 1 +current 1 +crawl 1 +join 1 +foreign 1 +legion 1 +chines 1 +wisdom 1 +suggest 1 +watch 1 +plai 1 +basketbal 1 +hyogo 1 +japan 1 +check 1 +action 1 +atdominion 1 +hqcheck 1 +dilberti 1 +knew 1 +databas 1 +would 1 +noth 1 +troubl 1 +mentionthat 1 +parallel 1 +comput 1 +contact 1 +mepost 1 +guadalup 1 +street 1 +suit 1 +texa 1 +voic 1 +theori 1 +number 1 +assum 1 +machin 1 +work 1 +mail 1 +edufrancoi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..b7587f1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,58 @@ +comput 10 +fussel 3 +scienc 3 +research 3 +texa 3 +graphic 3 +donald 2 +depart 2 +technolog 2 +group 2 +appli 2 +engin 2 +mathemat 2 +univers 2 +austin 2 +utexa 2 +public 2 +trammel 1 +crow 1 +regent 1 +professor 1 +director 1 +advanc 1 +divis 1 +inform 1 +laboratori 1 +member 1 +center 1 +electr 1 +institut 1 +phone 1 +mail 1 +eduinform 1 +http 1 +user 1 +fussellb 1 +social 1 +dartmouth 1 +collegem 1 +dalla 1 +area 1 +interest 1 +architectur 1 +databas 1 +system 1 +design 1 +autom 1 +fault 1 +toler 1 +cours 1 +introduct 1 +journal 1 +confer 1 +work 1 +progress 1 +current 1 +former 1 +student 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..34ed857b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,33 @@ +ajit 2 +gener 2 +useless 2 +file 2 +georgemi 1 +pagethi 1 +page 1 +go 1 +youand 1 +construct 1 +someth 1 +odd 1 +goodthat 1 +find 1 +anyth 1 +start 1 +research 1 +address 1 +georg 1 +wickersham 1 +lane 1 +austin 1 +gajit 1 +utexa 1 +eduher 1 +softwar 1 +document 1 +foundus 1 +recent 1 +david 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..e513280c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,27 @@ +geeta 2 +arora 1 +home 1 +page 1 +graduat 1 +student 1 +current 1 +year 1 +still 1 +try 1 +tofigur 1 +research 1 +undergrad 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +india 1 +contact 1 +mehom 1 +river 1 +oak 1 +medic 1 +art 1 +austin 1 +phone 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..1f3113a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,62 @@ +page 5 +home 3 +comment 3 +gokul 2 +flame 2 +critic 2 +send 2 +click 2 +final 1 +receiv 1 +countless 1 +gripe 1 +theexcess 1 +verbos 1 +decid 1 +thecollect 1 +wish 1 +mass 1 +democrat 1 +world 1 +putonli 1 +barest 1 +minimum 1 +adieu 1 +outpour 1 +head 1 +plakal 1 +hag 1 +hopey 1 +sleep 1 +easi 1 +untroubl 1 +conscienc 1 +perfectli 1 +good 1 +untim 1 +demis 1 +actual 1 +quit 1 +want 1 +kind 1 +could 1 +merit 1 +vitriol 1 +risk 1 +mayb 1 +help 1 +reinstat 1 +earlier 1 +signin 1 +lesscrit 1 +contact 1 +medic 1 +art 1 +austin 1 +visitor 1 +number 1 +suggest 1 +utexa 1 +last 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..7c815c9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,176 @@ +like 9 +austin 5 +gooti 4 +know 4 +peopl 4 +univers 4 +friend 4 +thing 4 +also 4 +love 3 +game 3 +book 3 +home 2 +page 2 +subramanyam 2 +intro 2 +present 2 +futur 2 +read 2 +well 2 +interest 2 +place 2 +came 2 +made 2 +alwai 2 +great 2 +time 2 +comput 2 +scienc 2 +texa 2 +life 2 +around 2 +plai 2 +tenni 2 +check 2 +past 1 +hideout 1 +visitor 1 +number 1 +welcom 1 +bold 1 +name 1 +start 1 +suggest 1 +continu 1 +hopefulli 1 +wont 1 +disappoint 1 +gold 1 +born 1 +sept 1 +somebodi 1 +hyderabad 1 +andhra 1 +pradesh 1 +geographi 1 +southern 1 +state 1 +india 1 +curiou 1 +famili 1 +school 1 +join 1 +osmania 1 +colleg 1 +technolog 1 +bachelor 1 +chemic 1 +engin 1 +contact 1 +vari 1 +background 1 +thought 1 +proud 1 +call 1 +batch 1 +nebraska 1 +lincoln 1 +gala 1 +becam 1 +addict 1 +american 1 +footbal 1 +except 1 +cold 1 +winter 1 +everi 1 +els 1 +wasjust 1 +studi 1 +year 1 +transfer 1 +enrol 1 +master 1 +program 1 +depart 1 +real 1 +cool 1 +hang 1 +especi 1 +weather 1 +usual 1 +acad 1 +care 1 +list 1 +alphabet 1 +order 1 +abraham 1 +gokul 1 +kumar 1 +mehul 1 +neeraj 1 +shantanu 1 +shailesh 1 +vipin 1 +best 1 +keep 1 +chat 1 +mani 1 +make 1 +travel 1 +anoth 1 +likechess 1 +question 1 +carrom 1 +board 1 +racquet 1 +ball 1 +tabl 1 +cricket 1 +soccer 1 +watch 1 +definetli 1 +text 1 +want 1 +horoscop 1 +todai 1 +compatabil 1 +sign 1 +listen 1 +hindi 1 +song 1 +write 1 +would 1 +bore 1 +narrow 1 +option 1 +golden 1 +goe 1 +without 1 +sai 1 +control 1 +destini 1 +ever 1 +success 1 +cours 1 +attribut 1 +hardwork 1 +power 1 +good 1 +happen 1 +propos 1 +dispos 1 +pleas 1 +spend 1 +fill 1 +valuabl 1 +comment 1 +guest 1 +hide 1 +medic 1 +art 1 +utexa 1 +finger 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..caaaddce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,108 @@ +protocol 5 +comput 4 +permiss 4 +specif 4 +copi 4 +http 3 +survei 3 +citat 3 +exact 3 +pragmat 3 +utexa 3 +date 3 +statement 2 +decemb 2 +goudanetwork 2 +copyright 2 +page 2 +texa 2 +gouda 2 +implement 2 +network 2 +octob 2 +work 2 +posit 1 +goudaacm 1 +surveysa 1 +associ 1 +machineri 1 +fornetwork 1 +specificationsand 1 +implementationsmoham 1 +goudath 1 +univers 1 +austin 1 +depart 1 +sciencesaustin 1 +usagouda 1 +user 1 +utc 1 +report 1 +profil 1 +htmlabstract 1 +argu 1 +studi 1 +evolv 1 +bridgeth 1 +networkprotocol 1 +gener 1 +term 1 +formal 1 +implementationsaddit 1 +word 1 +phrase 1 +compil 1 +softwar 1 +tool 1 +develop 1 +methodologypubl 1 +inform 1 +submiss 1 +june 1 +revis 1 +accept 1 +public 1 +sourc 1 +html 1 +avail 1 +make 1 +digitalor 1 +hard 1 +part 1 +person 1 +classroomus 1 +grant 1 +without 1 +provid 1 +made 1 +ordistribut 1 +profit 1 +commerci 1 +advantag 1 +bearthi 1 +notic 1 +full 1 +first 1 +forcompon 1 +own 1 +other 1 +must 1 +honor 1 +abstract 1 +credit 1 +permit 1 +otherwis 1 +torepublish 1 +post 1 +server 1 +redistribut 1 +list 1 +requiresprior 1 +request 1 +frompubl 1 +dept 1 +orpermiss 1 +last 1 +modifi 1 +moham 1 +goudagouda 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..a6ec5b0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,63 @@ +gunnel 3 +john 2 +transpos 2 +case 2 +assign 2 +report 2 +look 2 +home 2 +plan 2 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +austin 1 +utexa 1 +plapack 1 +minut 1 +connect 1 +imag 1 +except 1 +guess 1 +drank 1 +depict 1 +product 1 +latter 1 +author 1 +collect 1 +code 1 +spars 1 +matrix 1 +computationsif 1 +would 1 +like 1 +meet 1 +best 1 +friend 1 +take 1 +data 1 +pageam 1 +log 1 +check 1 +class 1 +also 1 +glimps 1 +mysteri 1 +land 1 +hail 1 +central 1 +oregon 1 +towni 1 +less 1 +redmond 1 +doesn 1 +much 1 +page 1 +talk 1 +visitor 1 +rememb 1 +test 1 +file 1 +long 1 +bore 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..33446509 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,34 @@ +comput 3 +scienc 3 +frank 2 +utexa 2 +austin 2 +univers 2 +student 2 +depart 2 +tropschuhfrank 1 +tropschuh 1 +gunther 1 +schweiz 1 +clayton 1 +waldhofstrass 1 +rheinfelden 1 +curriculum 1 +vitaeenglishdeutschlinkscarnegi 1 +mellon 1 +undergradu 1 +school 1 +universitterlangen 1 +nrnberg 1 +junior 1 +year 1 +abroad 1 +institut 1 +mathematisch 1 +maschinen 1 +datenverarbeitung 1 +oper 1 +system 1 +texa 1 +graduat 1 +tropschuhgunth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..b393b7cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,41 @@ +yongxiang 2 +comput 2 +scienc 2 +univers 2 +texa 2 +austin 2 +home 1 +pagemerri 1 +christmashappi 1 +year 1 +welcom 1 +homepagegao 1 +yongxiangsever 1 +pointsto 1 +contact 1 +addresspictur 1 +mine 1 +ceremoni 1 +grant 1 +master 1 +degre 1 +chinadepart 1 +gener 1 +inform 1 +name 1 +male 1 +birthdai 1 +birth 1 +place 1 +huanan 1 +jiangsu 1 +china 1 +hobbi 1 +tabl 1 +tenniseduc 1 +background 1 +juli 1 +softwar 1 +directori 1 +servic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..27188f3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,26 @@ +comput 3 +zhang 2 +manag 2 +home 1 +page 1 +schoolth 1 +univers 1 +texa 1 +austin 1 +scienc 1 +second 1 +semestercoursesc 1 +languag 1 +linc 1 +distribut 1 +alvis 1 +databas 1 +mirankerfil 1 +term 1 +project 1 +databs 1 +queri 1 +formthank 1 +stop 1 +gzhang 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..5007053c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,107 @@ +comput 6 +scienc 5 +zhou 4 +design 3 +wong 3 +institut 2 +univers 2 +depart 2 +texa 2 +austin 2 +vlsi 2 +algorithm 2 +optim 2 +ieee 2 +aid 2 +jose 2 +room 2 +refer 2 +campu 2 +home 1 +pagealan 1 +headlin 1 +new 1 +year 1 +ture 1 +award 1 +given 1 +amir 1 +pnueli 1 +aprofessor 1 +weizmann 1 +israel 1 +theoret 1 +compuer 1 +tsinghua 1 +prestig 1 +china 1 +incompletelist 1 +undergradu 1 +classmat 1 +kept 1 +alex 1 +zhao 1 +current 1 +student 1 +research 1 +interest 1 +focus 1 +find 1 +mani 1 +applic 1 +mathemat 1 +analysi 1 +combinatori 1 +complex 1 +even 1 +mathematicallog 1 +researchgroup 1 +head 1 +prof 1 +martin 1 +publicationshai 1 +forriv 1 +rout 1 +crosstalk 1 +constraint 1 +internationalconfer 1 +chen 1 +optimalnon 1 +uniform 1 +wire 1 +size 1 +elmor 1 +delai 1 +model 1 +acmintern 1 +confer 1 +studi 1 +academ 1 +techniqu 1 +industri 1 +directori 1 +bulletin 1 +live 1 +period 1 +chines 1 +staff 1 +movi 1 +search 1 +engin 1 +internet 1 +contact 1 +inform 1 +sciencesunivers 1 +austintaylor 1 +hall 1 +staustin 1 +voic 1 +mail 1 +haizhou 1 +utexa 1 +edulast 1 +modifi 1 +number 1 +visit 1 +homepag 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..c8a7711d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,41 @@ +comput 3 +scienc 3 +texa 3 +austin 3 +univ 3 +wuhan 3 +china 3 +home 2 +page 2 +dept 2 +welcom 1 +construct 1 +myselfnow 1 +first 1 +year 1 +student 1 +departmentof 1 +universityof 1 +want 1 +know 1 +click 1 +hear 1 +educ 1 +pre 1 +nation 1 +softwar 1 +engin 1 +alumni 1 +pal 1 +univers 1 +alumnihom 1 +pagecontact 1 +wait 1 +email 1 +haosun 1 +utexa 1 +edunow 1 +call 1 +visitor 1 +sinc 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..22da89dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,91 @@ +champion 7 +comput 6 +intramur 5 +volleybal 5 +divis 5 +scienc 4 +austin 4 +micheal 3 +universityof 3 +hewett 2 +utexa 2 +texa 2 +kansa 2 +mathemat 2 +honor 2 +first 2 +place 2 +bowl 2 +faculti 2 +grad 2 +fall 2 +open 2 +summer 2 +finish 2 +address 2 +hewetthewett 1 +educlick 1 +fingerm 1 +click 1 +email 1 +fourth 1 +year 1 +student 1 +departmentof 1 +educ 1 +stanfordunivers 1 +electr 1 +engin 1 +washburnunivers 1 +intern 1 +collegiateprogram 1 +contest 1 +nation 1 +competit 1 +utc 1 +sawada 1 +ioanni 1 +smaragdaki 1 +thoma 1 +wahlutc 1 +tower 1 +hanoi 1 +lanc 1 +tokudaut 1 +spring 1 +club 1 +motorola 1 +marathon 1 +hour 1 +minut 1 +might 1 +want 1 +visit 1 +myfavorit 1 +page 1 +locatem 1 +learnabout 1 +research 1 +interest 1 +view 1 +downloadmi 1 +public 1 +learnmor 1 +phone 1 +number 1 +call 1 +offic 1 +home 1 +central 1 +timefax 1 +mail 1 +univers 1 +depart 1 +taylor 1 +hall 1 +author 1 +hewettemail 1 +edulast 1 +updat 1 +wednesdai 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..4c24fbfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,167 @@ +current 6 +work 6 +austin 5 +databas 5 +robot 5 +java 5 +softwar 4 +resum 4 +packag 4 +http 4 +contract 3 +us 3 +graphic 3 +also 3 +interest 3 +page 3 +hiep 2 +texa 2 +activ 2 +solut 2 +game 2 +receiv 2 +scienc 2 +utexa 2 +system 2 +xwindow 2 +languag 2 +gunu 2 +common 2 +lisp 2 +function 2 +written 2 +like 2 +program 2 +port 2 +netrek 2 +factoryx 2 +offic 2 +nguyenhiep 1 +nguyenabout 1 +meabout 1 +vietnames 1 +american 1 +born 1 +came 1 +unit 1 +state 1 +five 1 +resid 1 +texasfor 1 +life 1 +live 1 +programm 1 +seek 1 +client 1 +process 1 +start 1 +busi 1 +providinghigh 1 +internet 1 +product 1 +rang 1 +video 1 +detail 1 +link 1 +hypertextresum 1 +occup 1 +gordon 1 +novak 1 +compil 1 +class 1 +educ 1 +univers 1 +havedevelop 1 +year 1 +onlin 1 +natur 1 +placement 1 +center 1 +address 1 +con 1 +nsplace 1 +rexi 1 +real 1 +time 1 +emptiv 1 +oper 1 +board 1 +research 1 +gdraw 1 +object 1 +orient 1 +cross 1 +platform 1 +librari 1 +postscript 1 +legion 1 +data 1 +flow 1 +control 1 +flat 1 +simul 1 +realist 1 +specular 1 +reflect 1 +sonar 1 +xgcl 1 +xakcl 1 +interfac 1 +akcl 1 +standalon 1 +provid 1 +john 1 +ousterhout 1 +theunivers 1 +specif 1 +anonlin 1 +access 1 +student 1 +prototyp 1 +moredetail 1 +researchwith 1 +allow 1 +easi 1 +build 1 +andmaintain 1 +network 1 +explor 1 +methodolog 1 +larg 1 +currentlyact 1 +search 1 +expertis 1 +internetsoftwar 1 +might 1 +best 1 +leverag 1 +technic 1 +especi 1 +window 1 +fast 1 +textur 1 +mappingroutin 1 +anim 1 +processor 1 +assembl 1 +write 1 +poetri 1 +make 1 +potteri 1 +outdoor 1 +list 1 +spot 1 +group 1 +virtual 1 +realiti 1 +vrml 1 +sdsc 1 +vrmlto 1 +contact 1 +mepost 1 +comput 1 +usavoic 1 +main 1 +edulast 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..3b897e39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,72 @@ +comput 4 +austin 4 +depart 3 +univers 3 +world 3 +huiqun 2 +home 2 +page 2 +scienc 2 +texa 2 +research 2 +inform 2 +internet 2 +career 2 +book 2 +java 2 +rosett 2 +hqliu 2 +utexa 2 +nice 1 +meet 1 +student 1 +member 1 +vlsi 1 +design 1 +group 1 +guid 1 +professor 1 +martin 1 +wong 1 +new 1 +virtual 1 +tour 1 +citi 1 +collect 1 +chines 1 +site 1 +sunris 1 +stuff 1 +societi 1 +ieee 1 +giant 1 +search 1 +tool 1 +yahoo 1 +infoseek 1 +directori 1 +onlin 1 +center 1 +mosaic 1 +bookmark 1 +entertain 1 +languag 1 +unix 1 +perl 1 +expect 1 +refer 1 +manual 1 +program 1 +exampl 1 +contact 1 +mail 1 +phone 1 +address 1 +campu 1 +taylor 1 +last 1 +modifi 1 +comment 1 +welcom 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..61cb131b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,58 @@ +austin 7 +scienc 5 +comput 4 +avail 3 +hudson 2 +onlin 2 +research 2 +offic 2 +home 1 +pagehudson 1 +turnerphd 1 +student 1 +colleg 1 +natur 1 +sciencesat 1 +univers 1 +texa 1 +advisor 1 +vladimir 1 +lifschitz 1 +expect 1 +thesi 1 +titl 1 +infer 1 +rule 1 +causal 1 +represent 1 +ofcommonsens 1 +knowledg 1 +action 1 +msc 1 +mli 1 +librari 1 +inform 1 +english 1 +philosophi 1 +vita 1 +postscript 1 +draft 1 +dissert 1 +also 1 +interestscommonsens 1 +reason 1 +actionlog 1 +program 1 +nonmonoton 1 +reasoningmi 1 +paper 1 +linkseuropean 1 +colloquium 1 +spatialand 1 +tempor 1 +reasoningto 1 +contact 1 +mepost 1 +usavoic 1 +main 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..e3caaa19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,63 @@ +home 5 +austin 5 +comput 4 +page 3 +time 3 +yanbin 2 +zhang 2 +welcom 2 +graduat 2 +depart 2 +scienc 2 +univers 2 +texa 2 +full 2 +spring 2 +world 2 +compani 2 +hyanbin 2 +utexa 2 +address 2 +littl 1 +cutti 1 +allen 1 +student 1 +current 1 +seek 1 +part 1 +cours 1 +left 1 +resum 1 +click 1 +postscript 1 +format 1 +ieee 1 +onlin 1 +career 1 +center 1 +help 1 +languag 1 +internet 1 +librari 1 +webmuseum 1 +travel 1 +beauti 1 +homeland 1 +contact 1 +mail 1 +phone 1 +offic 1 +campu 1 +tarlor 1 +lake 1 +blvd 1 +number 1 +visit 1 +homepag 1 +sinc 1 +last 1 +modifi 1 +septemb 1 +comment 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..47fd0d77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,59 @@ +inform 4 +isaac 3 +sheldon 3 +isheldon 3 +utexa 3 +comput 3 +scienc 3 +austin 2 +contact 1 +phone 1 +mail 1 +http 1 +user 1 +profession 1 +current 1 +graduat 1 +student 1 +univeristi 1 +texa 1 +depart 1 +reciv 1 +master 1 +decemb 1 +coursework 1 +concentr 1 +graphic 1 +reciev 1 +undergradu 1 +degre 1 +unives 1 +mass 1 +lowel 1 +summer 1 +intern 1 +scientif 1 +engin 1 +softwar 1 +small 1 +compani 1 +creat 1 +schlaeor 1 +mellor 1 +case 1 +tool 1 +project 1 +construct 1 +solid 1 +geometri 1 +us 1 +bsptree 1 +modular 1 +trace 1 +framework 1 +butt 1 +person 1 +babi 1 +page 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..c3bbb773 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,43 @@ +john 2 +adair 2 +live 2 +taylor 2 +back 2 +page 2 +crinkum 1 +crankum 1 +homepag 1 +compound 1 +wife 1 +holli 1 +eileen 1 +evan 1 +jame 1 +rice 1 +alumni 1 +friend 1 +includ 1 +carl 1 +white 1 +also 1 +internet 1 +consult 1 +matthew 1 +mengerink 1 +fish 1 +fanat 1 +work 1 +dejanew 1 +steve 1 +traylen 1 +get 1 +doctor 1 +book 1 +email 1 +jadair 1 +utexa 1 +graduat 1 +student 1 +home 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..bef4132b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,22 @@ +oper 2 +univers 2 +texa 2 +research 2 +john 1 +chamber 1 +home 1 +pagejohn 1 +chamberssenior 1 +system 1 +specialistb 1 +physic 1 +paso 1 +comput 1 +scienc 1 +yale 1 +universityph 1 +austin 1 +paper 1 +vita 1 +link 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..b7b3a169 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,164 @@ +austin 9 +texa 8 +research 7 +aftereffect 7 +visual 7 +comput 6 +univers 5 +model 5 +scienc 4 +process 4 +tilt 4 +self 4 +organ 4 +illus 4 +result 4 +brain 3 +thesi 3 +primari 3 +cortex 3 +later 3 +lissom 3 +interact 3 +realist 2 +us 2 +dramat 2 +advanc 2 +make 2 +avail 2 +simul 2 +function 2 +call 2 +human 2 +propos 2 +also 2 +postscript 2 +inform 2 +jbednar 2 +utexa 2 +bednarjim 1 +bednar 1 +candid 1 +dept 1 +philosophi 1 +electr 1 +engin 1 +decemb 1 +concentr 1 +biolog 1 +ofcognit 1 +artifici 1 +neural 1 +network 1 +seek 1 +useth 1 +technolog 1 +past 1 +fewdecad 1 +equal 1 +understand 1 +thehuman 1 +mind 1 +power 1 +soon 1 +beavail 1 +cortic 1 +becomingpract 1 +enabl 1 +necessari 1 +refut 1 +testabl 1 +hypothes 1 +overal 1 +goal 1 +makecognit 1 +empir 1 +rather 1 +purelyphilosoph 1 +domain 1 +centuri 1 +master 1 +ofth 1 +nearli 1 +complet 1 +abstract 1 +long 1 +studi 1 +psychologist 1 +vision 1 +appar 1 +failur 1 +might 1 +offer 1 +insight 1 +carri 1 +particular 1 +class 1 +thought 1 +aris 1 +thu 1 +serv 1 +test 1 +case 1 +theori 1 +area 1 +specif 1 +sever 1 +inhibit 1 +neuron 1 +receiv 1 +input 1 +examin 1 +sirosh 1 +miikkulainen 1 +incorpor 1 +demonstr 1 +principl 1 +drive 1 +qualit 1 +quantit 1 +similar 1 +measur 1 +basi 1 +explan 1 +indirect 1 +effect 1 +line 1 +differ 1 +orient 1 +appli 1 +figur 1 +spatial 1 +frequenc 1 +predict 1 +preliminari 1 +report 1 +file 1 +begin 1 +doctor 1 +includ 1 +detail 1 +level 1 +visualbehavior 1 +extens 1 +contact 1 +email 1 +mail 1 +address 1 +depart 1 +finger 1 +command 1 +machin 1 +log 1 +departmentmi 1 +resum 1 +ascii 1 +format 1 +link 1 +probabl 1 +outdat 1 +paper 1 +interest 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..238ad65e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,95 @@ +homepag 3 +test 3 +comput 3 +system 3 +visit 3 +java 3 +home 2 +univers 2 +librari 2 +cours 2 +topic 2 +info 2 +document 2 +page 2 +html 2 +help 2 +construct 2 +ofjunfanghi 1 +welcom 1 +frame 1 +graduat 1 +student 1 +scienc 1 +depart 1 +texa 1 +austin 1 +largest 1 +academ 1 +north 1 +america 1 +catalog 1 +resum 1 +professor 1 +novak 1 +assign 1 +excel 1 +sourc 1 +ethernet 1 +technolog 1 +special 1 +sysadm 1 +domain 1 +name 1 +inform 1 +free 1 +unixish 1 +oper 1 +linux 1 +project 1 +us 1 +debug 1 +transfer 1 +latex 1 +file 1 +unix 1 +email 1 +stuff 1 +utc 1 +kristina 1 +ross 1 +tutori 1 +learn 1 +take 1 +jeff 1 +network 1 +administr 1 +last 1 +summer 1 +florida 1 +state 1 +edmund 1 +automobil 1 +buyer 1 +guid 1 +want 1 +packag 1 +languag 1 +specif 1 +public 1 +ascii 1 +format 1 +look 1 +pretti 1 +good 1 +privaci 1 +like 1 +sceneri 1 +pictur 1 +jfang 1 +utexa 1 +start 1 +visitor 1 +number 1 +sinc 1 +applet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..327af10f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,37 @@ +good 5 +austin 3 +john 2 +beer 2 +jprior 2 +utexa 2 +address 2 +priorjohn 1 +priormi 1 +resum 1 +accumul 1 +knowledg 1 +year 1 +dog 1 +someth 1 +long 1 +enough 1 +start 1 +hurt 1 +probabl 1 +chew 1 +nacho 1 +chip 1 +swallow 1 +sleep 1 +contact 1 +inform 1 +email 1 +mail 1 +univers 1 +texa 1 +depart 1 +comput 1 +scienc 1 +home 1 +phone 1 +swisher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..8cea9c93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,41 @@ +thoma 3 +comput 3 +jeff 2 +texa 2 +scienc 2 +homepagejeff 1 +homepagecontact 1 +informationpublicationssoftwar 1 +system 1 +gener 1 +research 1 +groupphoto 1 +albumfavorit 1 +internet 1 +sitesuniversityof 1 +departmentappliedresearch 1 +laboratori 1 +electricaland 1 +engin 1 +departmentedsfinanci 1 +trade 1 +technolog 1 +center 1 +fttc 1 +keyword 1 +search 1 +utacademiccalendarsut 1 +sportshook 1 +ultim 1 +longhorn 1 +site 1 +utfootbal 1 +scheduleaustintexa 1 +depart 1 +univers 1 +austin 1 +last 1 +modifi 1 +octob 1 +jthoma 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..7edb260a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,66 @@ +comput 5 +scienc 5 +univers 5 +austin 4 +peke 3 +jiani 2 +student 2 +texa 2 +china 2 +depart 2 +page 2 +homepagewelcom 1 +homepag 1 +first 1 +year 1 +indepart 1 +ataustin 1 +beij 1 +alumni 1 +ofpek 1 +chinesechines 1 +scenerychines 1 +novelschines 1 +classicschines 1 +magazineschines 1 +newspapersus 1 +link 1 +registrar 1 +gradaut 1 +studiesut 1 +libraryut 1 +campusutaccessabout 1 +weather 1 +todai 1 +citylimit 1 +lot 1 +excit 1 +stuff 1 +miscellaneousyahoojava 1 +sunjavascript 1 +netscapeth 1 +perl 1 +languag 1 +home 1 +pagecomput 1 +research 1 +associationcomput 1 +journal 1 +magzin 1 +webnetwork 1 +technicalreport 1 +libraryth 1 +collect 1 +bibliographiesintern 1 +contact 1 +street 1 +jyluo 1 +utexa 1 +finger 1 +meyour 1 +comment 1 +suggestionswould 1 +highli 1 +appreci 1 +visitorsinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..bacbad1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,54 @@ +home 3 +austin 2 +comput 2 +scienc 2 +interest 2 +concurr 2 +madra 2 +inform 2 +phone 2 +kedar 1 +namjoshiabout 1 +mehi 1 +thank 1 +check 1 +doctor 1 +student 1 +depart 1 +research 1 +advisor 1 +professor 1 +allen 1 +emerson 1 +tempor 1 +logic 1 +reason 1 +program 1 +semant 1 +distributedalgorithm 1 +automatatheori 1 +came 1 +fall 1 +receiv 1 +bachelor 1 +degre 1 +indian 1 +institut 1 +technolog 1 +wonder 1 +page 1 +lot 1 +stuff 1 +would 1 +like 1 +know 1 +person 1 +contact 1 +offic 1 +address 1 +west 1 +street 1 +todai 1 +amul 1 +adkedar 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..394c69d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,47 @@ +univers 4 +texa 4 +austin 4 +kenneth 3 +harker 3 +depart 3 +comput 3 +scienc 3 +utexa 3 +kharker 2 +public 2 +view 2 +opinion 2 +taylor 1 +hall 1 +amateur 1 +radio 1 +babylon 1 +linux 1 +rocketri 1 +cyberspac 1 +academ 1 +work 1 +polit 1 +stuff 1 +resum 1 +last 1 +updat 1 +world 1 +wide 1 +facil 1 +provid 1 +servic 1 +faculti 1 +student 1 +staff 1 +guest 1 +express 1 +page 1 +sole 1 +respons 1 +author 1 +necessarili 1 +reflect 1 +system 1 +board 1 +regent 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..f9b4f2c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,92 @@ +kincaid 5 +linear 5 +comput 5 +numer 4 +algebra 3 +imac 3 +world 3 +mathemat 3 +system 3 +david 2 +analysi 2 +univers 2 +develop 2 +congress 2 +area 2 +softwar 2 +interest 2 +algorithm 2 +equat 2 +larg 2 +solut 2 +young 2 +senior 1 +lecturerassoci 1 +director 1 +center 1 +lamar 1 +texa 1 +austin 1 +honor 1 +award 1 +profession 1 +servic 1 +certif 1 +recognit 1 +creativ 1 +technicalinnov 1 +basic 1 +subprogram 1 +nasa 1 +technic 1 +committe 1 +session 1 +organ 1 +andappli 1 +interestmathemat 1 +high 1 +perform 1 +summari 1 +researchmi 1 +focus 1 +research 1 +us 1 +iter 1 +solv 1 +spars 1 +coeffici 1 +matric 1 +aris 1 +ellipt 1 +partial 1 +differenti 1 +implement 1 +parallel 1 +anoth 1 +select 1 +recent 1 +publicationsw 1 +chenei 1 +pacif 1 +grove 1 +brook 1 +cole 1 +hay 1 +itpack 1 +proceed 1 +coput 1 +atlanta 1 +stationari 1 +second 1 +degre 1 +method 1 +topic 1 +polynomi 1 +sever 1 +variabl 1 +applic 1 +rassia 1 +scientif 1 +river 1 +edg 1 +jersei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..35abd45b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,67 @@ +comput 5 +univers 4 +kistler 3 +page 3 +scienc 3 +parallel 3 +mike 2 +home 2 +academ 2 +inform 2 +syracus 2 +york 2 +prof 2 +construct 1 +first 1 +year 1 +student 1 +texa 1 +ataustin 1 +thedepart 1 +also 1 +current 1 +emploi 1 +theperson 1 +softwar 1 +productsdivis 1 +backgroundba 1 +mathemat 1 +susquehanna 1 +selinsgrov 1 +master 1 +busi 1 +administr 1 +stern 1 +school 1 +businessnew 1 +interestsi 1 +interest 1 +algorithm 1 +particularli 1 +us 1 +commerci 1 +data 1 +process 1 +press 1 +random 1 +collect 1 +link 1 +coursesfal 1 +distribut 1 +iwith 1 +jayadev 1 +misra 1 +numer 1 +analysi 1 +linear 1 +algebrawith 1 +alan 1 +cline 1 +visitor 1 +number 1 +contact 1 +juli 1 +walk 1 +pflugervil 1 +email 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..7c49af5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,9 @@ +jacob 2 +kornerup 2 +kornerupjacob 1 +welcom 1 +home 1 +page 1 +time 1 +sinc 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..c36fd110 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,78 @@ +knowledg 5 +research 4 +texa 3 +austin 3 +kuiper 3 +benjamin 2 +comput 2 +scienc 2 +univers 2 +mathemat 2 +represent 2 +commonsens 2 +incomplet 2 +reason 2 +model 2 +qualit 2 +simul 2 +spring 2 +build 2 +intellig 2 +agent 2 +kuipersbenjamin 1 +kuipersbruton 1 +centenni 1 +professor 1 +swarthmor 1 +colleg 1 +interest 1 +expert 1 +withparticular 1 +emphasi 1 +effect 1 +thequalit 1 +grouphom 1 +page 1 +describ 1 +topic 1 +paper 1 +student 1 +andavail 1 +softwar 1 +consider 1 +detail 1 +accomplish 1 +includ 1 +tour 1 +spatial 1 +cognit 1 +qsim 1 +algorithm 1 +access 1 +limit 1 +logic 1 +robot 1 +explor 1 +map 1 +strategi 1 +base 1 +recognit 1 +distinct 1 +place 1 +qualitativereason 1 +cambridg 1 +press 1 +teach 1 +plan 1 +fall 1 +physic 1 +world 1 +contact 1 +inform 1 +mail 1 +prof 1 +depart 1 +email 1 +utexa 1 +phone 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..b3ffbf34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,48 @@ +comput 3 +simon 2 +texa 2 +austin 2 +email 2 +utexa 2 +eduphon 2 +network 2 +assist 2 +kata 2 +compress 2 +postscript 2 +professor 1 +sciencesdepart 1 +sciencesunivers 1 +offic 1 +taylor 1 +hall 1 +campu 1 +mail 1 +scienc 1 +photo 1 +profil 1 +research 1 +laboratori 1 +fall 1 +spring 1 +administr 1 +also 1 +editori 1 +ieee 1 +transact 1 +carbon 1 +inform 1 +electron 1 +submissionnew 1 +clip 1 +tune 1 +turn 1 +toss 1 +internet 1 +empt 1 +american 1 +statesman 1 +februari 1 +front 1 +page 1 +cont 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..cae0757e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,54 @@ +research 9 +network 6 +laboratori 4 +scienc 3 +protocol 3 +austin 2 +comput 2 +texa 2 +perform 2 +project 2 +foundat 2 +program 2 +depart 1 +univers 1 +activ 1 +span 1 +entir 1 +develop 1 +cycl 1 +design 1 +specif 1 +verif 1 +test 1 +analysi 1 +implement 1 +tune 1 +currentinterest 1 +architectur 1 +address 1 +chang 1 +underli 1 +commun 1 +technolog 1 +well 1 +applic 1 +supervis 1 +simon 1 +professor 1 +fund 1 +provid 1 +nation 1 +nsaunivers 1 +advanc 1 +lockhe 1 +current 1 +recent 1 +paper 1 +support 1 +videoservic 1 +secur 1 +theori 1 +workshop 1 +integr 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..0f22da0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,32 @@ +home 5 +page 3 +landrum 2 +christian 2 +robert 1 +stori 1 +mail 1 +viruspictur 1 +mountain 1 +empirepch 1 +retreattexa 1 +republican 1 +convent 1 +backbon 1 +rockrsumfamilyinterest 1 +council 1 +awai 1 +graham 1 +gordon 1 +pageth 1 +comput 1 +scienc 1 +depart 1 +ofth 1 +univers 1 +texa 1 +austin 1 +councillandrum 1 +utexa 1 +edulast 1 +updat 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..01111996 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,30 @@ +comput 4 +greg 2 +lavend 2 +univers 2 +texa 2 +scienc 2 +austinr 1 +lavenderadjunct 1 +assist 1 +professordepart 1 +anddepart 1 +electr 1 +engineeringth 1 +austin 1 +contact 1 +address 1 +research 1 +activ 1 +engin 1 +cours 1 +recommend 1 +read 1 +biograph 1 +informationsuggest 1 +improv 1 +page 1 +welcom 1 +last 1 +updat 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..d4814c7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,64 @@ +scienc 5 +home 4 +comput 4 +depart 3 +univers 3 +austin 3 +singapor 3 +jame 2 +welcom 2 +texa 2 +inform 2 +nation 2 +island 2 +come 2 +page 1 +pageyeap 1 +normal 1 +look 1 +student 1 +sciencesat 1 +bachelor 1 +master 1 +atth 1 +system 1 +disc 1 +research 1 +interest 1 +algorithm 1 +data 1 +structur 1 +vlsi 1 +designalgorithm 1 +small 1 +tropic 1 +call 1 +locat 1 +degre 1 +north 1 +equat 1 +internet 1 +communityi 1 +much 1 +aliv 1 +particip 1 +know 1 +aboutthi 1 +peopl 1 +wife 1 +hong 1 +kong 1 +month 1 +activ 1 +lovesto 1 +smile 1 +contact 1 +mail 1 +leekk 1 +utexa 1 +phone 1 +campu 1 +addr 1 +taylor 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..5a33712b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,30 @@ +softwar 4 +less 4 +laboratori 3 +experiment 3 +system 3 +research 2 +main 1 +object 1 +investig 1 +wai 1 +buildreli 1 +high 1 +perform 1 +parallel 1 +distributedsystem 1 +apart 1 +depart 1 +computersci 1 +univers 1 +oftexa 1 +austin 1 +projectsmemb 1 +lablessss 1 +seminar 1 +seriessponsorslast 1 +modifi 1 +decemb 1 +robert 1 +blumoferdb 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..52dded6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,81 @@ +comput 5 +parallel 5 +snyder 4 +languag 3 +program 3 +sciencesth 2 +portabl 2 +implement 2 +proceed 2 +intern 2 +address 2 +calvin 1 +lincalvin 1 +linassist 1 +professor 1 +import 1 +thing 1 +iswhat 1 +studi 1 +_study_ 1 +plai 1 +_play_ 1 +pete 1 +carrilresearch 1 +interestscompil 1 +performanceanalysi 1 +scientif 1 +project 1 +home 1 +page 1 +select 1 +publicationsth 1 +novel 1 +mathemat 1 +biologyalgorithm 1 +dikaiako 1 +manoussaki 1 +woodward 1 +conf 1 +supercomput 1 +accommod 1 +polymorph 1 +data 1 +decomposit 1 +explicitli 1 +parallelprogram 1 +internationalparallel 1 +process 1 +symposium 1 +april 1 +arrai 1 +sublanguag 1 +compilersfor 1 +banerje 1 +gelernt 1 +nicolau 1 +padua 1 +springer 1 +verlag 1 +simpl 1 +journal 1 +comparison 1 +model 1 +share 1 +memori 1 +multiprocessor 1 +withl 1 +confer 1 +parallelprocess 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +postal 1 +depart 1 +univers 1 +texa 1 +austinaustin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..cfc15054 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,62 @@ +page 4 +austin 3 +system 3 +guangtian 2 +home 2 +current 2 +time 2 +depart 2 +comput 2 +scienc 2 +texa 2 +real 2 +research 2 +last 2 +liugt 2 +utexa 2 +homepagehi 1 +welcom 1 +construct 1 +content 1 +permit 1 +apolog 1 +incomplet 1 +result 1 +inconveni 1 +graduat 1 +student 1 +theunivers 1 +researchi 1 +member 1 +professor 1 +group 1 +interest 1 +includ 1 +timeschedul 1 +algorithm 1 +oper 1 +network 1 +perform 1 +distribut 1 +also 1 +work 1 +data 1 +replic 1 +knowledg 1 +mine 1 +summer 1 +internship 1 +contact 1 +inform 1 +offic 1 +good 1 +view 1 +phone 1 +email 1 +mail 1 +address 1 +univers 1 +updat 1 +pleas 1 +send 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..f9f72d30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,45 @@ +lorenzo 5 +comput 5 +alvisi 3 +scienc 3 +distribut 3 +cornel 2 +utexa 2 +fall 2 +home 1 +page 1 +assist 1 +professor 1 +depart 1 +laurea 1 +physic 1 +universit 1 +agrav 1 +bologna 1 +itali 1 +offic 1 +taylorhal 1 +campusshow 1 +locat 1 +taylor 1 +hall 1 +phone 1 +mail 1 +research 1 +interestsi 1 +interest 1 +special 1 +emphasi 1 +fault 1 +toler 1 +cours 1 +spring 1 +oper 1 +system 1 +topic 1 +sytem 1 +public 1 +photo 1 +maria 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..2f281697 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,62 @@ +comput 5 +current 3 +austin 3 +home 2 +page 2 +work 2 +scienc 2 +texa 2 +link 2 +microsoft 2 +luxu 2 +utexa 2 +graduat 1 +student 1 +networksoth 1 +class 1 +tsinghua 1 +univers 1 +undergradu 1 +institut 1 +china 1 +depart 1 +studyut 1 +universityaustin 1 +live 1 +academ 1 +internet 1 +area 1 +languag 1 +system 1 +ieee 1 +sigcomm 1 +sigmod 1 +siglink 1 +siggraph 1 +sigmm 1 +sigir 1 +compani 1 +onlin 1 +shop 1 +cool 1 +site 1 +chines 1 +music 1 +newsjob 1 +hunt 1 +weather 1 +forcast 1 +dictionari 1 +contact 1 +inform 1 +campu 1 +dept 1 +univ 1 +addr 1 +corpor 1 +mail 1 +xuelu 1 +thank 1 +come 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..8eda9402 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,89 @@ +softwar 9 +engin 7 +werth 6 +educ 4 +comput 3 +chair 3 +honour 2 +fall 2 +offic 2 +link 2 +class 2 +ieee 2 +committe 2 +develop 2 +scienc 2 +tool 2 +direct 2 +object 2 +orient 2 +program 2 +lauri 1 +werthlauri 1 +werthlectur 1 +lwerth 1 +utexa 1 +educurr 1 +semest 1 +hour 1 +time 1 +taylor 1 +phone 1 +engineeringc 1 +contemporari 1 +issu 1 +scienceprofession 1 +servicevic 1 +technic 1 +presentco 1 +confer 1 +profession 1 +presentarea 1 +interestsoftwar 1 +cognit 1 +summari 1 +researchmi 1 +current 1 +work 1 +center 1 +andenviron 1 +area 1 +includ 1 +human 1 +interfac 1 +andsoftwar 1 +metric 1 +select 1 +recent 1 +publicationsl 1 +qualiti 1 +assur 1 +project 1 +transact 1 +januari 1 +lectur 1 +note 1 +process 1 +improv 1 +industri 1 +strength 1 +case 1 +tomayko 1 +springer 1 +verlag 1 +john 1 +proceed 1 +workshop 1 +ics 1 +macintosh 1 +journal 1 +us 1 +univers 1 +texa 1 +depart 1 +home 1 +pagefaculti 1 +profilesc 1 +classeslast 1 +updat 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..aa3a4c9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,42 @@ +madhukar 5 +austin 3 +texa 3 +reddi 2 +home 2 +page 2 +offic 2 +utexa 2 +offici 2 +korupoluwelcom 1 +reach 1 +avenu 1 +taylor 1 +hall 1 +dept 1 +comp 1 +scienc 1 +univ 1 +ahom 1 +link 1 +madrashomepag 1 +ganga 1 +alumniclass 1 +madra 1 +utalgorithm 1 +comput 1 +theori 1 +group 1 +colloquium 1 +oncomput 1 +complex 1 +info 1 +cricket 1 +worldwid 1 +site 1 +espnet 1 +sportszon 1 +interact 1 +batchu 1 +india 1 +author 1 +korupoluemail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..d9f71ef6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,23 @@ +richard 1 +malloryrichard 1 +malloryresearchthesi 1 +research 1 +produc 1 +quasi 1 +natur 1 +languag 1 +explan 1 +qsimsimul 1 +current 1 +implement 1 +work 1 +simpl 1 +system 1 +contact 1 +email 1 +mallori 1 +utexa 1 +offic 1 +taylor 1 +austin 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..b1ee3917 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,120 @@ +stabil 9 +system 9 +comput 8 +self 6 +author 6 +moham 6 +gouda 6 +flow 5 +state 5 +scienc 4 +rout 4 +legitim 3 +illegitim 3 +proceed 3 +inform 3 +prepar 3 +marco 2 +depart 2 +univers 2 +texa 2 +network 2 +interest 2 +distribut 2 +fault 2 +step 2 +time 2 +respons 2 +appear 2 +third 2 +workshop 2 +maximum 2 +tree 2 +minimum 2 +utexa 2 +austin 2 +home 1 +page 1 +schneidermarco 1 +schneiderph 1 +candid 1 +austinresearchth 1 +titl 1 +dissert 1 +research 1 +area 1 +protocol 1 +toler 1 +particular 1 +implicit 1 +design 1 +label 1 +itsstat 1 +identifi 1 +occur 1 +correct 1 +intend 1 +execut 1 +consid 1 +said 1 +whenregardless 1 +initi 1 +guarante 1 +converg 1 +finit 1 +number 1 +systemwhich 1 +stai 1 +forev 1 +vita 1 +postscript 1 +public 1 +survei 1 +march 1 +real 1 +decis 1 +toward 1 +tolerantr 1 +kluwer 1 +academ 1 +publish 1 +earlier 1 +version 1 +intern 1 +octob 1 +invit 1 +talk 1 +annual 1 +joint 1 +confer 1 +novemb 1 +submit 1 +journal 1 +second 1 +depth 1 +anish 1 +arora 1 +memori 1 +requir 1 +silent 1 +fifteenth 1 +symposium 1 +principl 1 +shlomi 1 +dolev 1 +span 1 +implement 1 +internet 1 +person 1 +list 1 +link 1 +construct 1 +contact 1 +offic 1 +taylor 1 +hall 1 +email 1 +postal 1 +address 1 +ctaylor 1 +usamarco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..897bb35f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,92 @@ +comput 4 +scienc 4 +inform 3 +austin 3 +depart 3 +research 3 +page 2 +mark 2 +offic 2 +taylor 2 +hall 2 +univers 2 +texa 2 +taught 2 +addit 2 +appl 2 +pleas 2 +class 2 +routin 2 +avail 2 +home 1 +johnstonemark 1 +johnstonecontact 1 +postal 1 +address 1 +usual 1 +find 1 +best 1 +reach 1 +isvia 1 +email 1 +markj 1 +utexa 1 +johnston 1 +also 1 +look 1 +full 1 +finger 1 +semest 1 +oper 1 +system 1 +byrichard 1 +brice 1 +object 1 +orient 1 +design 1 +analysisclass 1 +glenn 1 +down 1 +somerset 1 +compani 1 +graduat 1 +spring 1 +work 1 +motorola 1 +somersetdesign 1 +centerresearch 1 +informationi 1 +member 1 +oop 1 +group 1 +part 1 +build 1 +real 1 +time 1 +garbagecollector 1 +perform 1 +number 1 +ofstudi 1 +memori 1 +alloc 1 +postscript 1 +copi 1 +dissertationpropos 1 +listof 1 +public 1 +along 1 +brief 1 +descript 1 +develop 1 +librari 1 +allow 1 +precis 1 +timingof 1 +intel 1 +pentium 1 +run 1 +linux 1 +code 1 +publicli 1 +stuff 1 +relat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..5aaf7b92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,13 @@ +home 2 +page 2 +mark 1 +point 1 +interest 1 +visit 1 +also 1 +friend 1 +click 1 +last 1 +modifi 1 +markng 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..9d44b657 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,85 @@ +page 5 +comput 4 +marku 3 +link 3 +uniti 3 +time 2 +prof 2 +taylor 2 +hall 2 +phone 2 +address 2 +utexa 2 +austin 2 +scienc 2 +place 2 +interest 2 +archiv 2 +kaltenbachmarku 1 +kaltenbachintroductionwelcom 1 +home 1 +current 1 +construct 1 +andwil 1 +entri 1 +permit 1 +iapolog 1 +incomplet 1 +result 1 +inconveni 1 +researchi 1 +member 1 +misra 1 +spsp 1 +research 1 +groupand 1 +emerson 1 +stempor 1 +reason 1 +group 1 +part 1 +work 1 +develop 1 +model 1 +checkerfor 1 +finit 1 +state 1 +program 1 +proposit 1 +logic 1 +verifi 1 +system 1 +recent 1 +version 1 +thesi 1 +isalso 1 +avail 1 +contact 1 +inform 1 +offic 1 +email 1 +postal 1 +univers 1 +texa 1 +depart 1 +find 1 +internet 1 +worth 1 +avisit 1 +theut 1 +departmenthom 1 +softwar 1 +archivefor 1 +macintosh 1 +appl 1 +sworld 1 +wide 1 +technic 1 +supporthom 1 +actansit 1 +comprehens 1 +network 1 +theatt 1 +distribut 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..8d44fdf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,50 @@ +austin 6 +comput 4 +scienc 4 +univers 4 +texa 4 +home 3 +page 3 +depart 2 +address 2 +utc 2 +memarti 1 +mayberri 1 +student 1 +researchal 1 +kind 1 +stuff 1 +educ 1 +math 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +martym 1 +utexa 1 +postal 1 +click 1 +applet 1 +paus 1 +resum 1 +displai 1 +local 1 +link 1 +neural 1 +network 1 +homepag 1 +hotlist 1 +downtown 1 +anywher 1 +virtualc 1 +internetrestaur 1 +guid 1 +virtual 1 +tnstechnolog 1 +demonstr 1 +read 1 +daili 1 +texan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..e23f056f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,45 @@ +comput 4 +scienc 4 +austin 4 +univers 3 +norm 2 +mccain 2 +reason 2 +avail 2 +onlin 2 +offic 2 +home 1 +page 1 +mccainabout 1 +mephd 1 +student 1 +colleg 1 +natur 1 +sciencesat 1 +texa 1 +advisor 1 +vladimir 1 +lifschitz 1 +expect 1 +thesi 1 +titl 1 +causal 1 +commonsens 1 +action 1 +kansa 1 +philosophi 1 +baker 1 +vita 1 +postscript 1 +research 1 +interestscommonsens 1 +actionlog 1 +program 1 +nonmonoton 1 +reasoningmi 1 +paper 1 +contact 1 +mepost 1 +usavoic 1 +main 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..e161e915 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,42 @@ +univers 4 +baylor 3 +elain 2 +learn 2 +research 2 +texa 2 +comput 2 +scienc 2 +english 2 +address 2 +austin 2 +mari 1 +califfmari 1 +califfmachin 1 +groupunivers 1 +austinresearchmi 1 +current 1 +interest 1 +us 1 +machin 1 +especiallyinduct 1 +logic 1 +program 1 +natur 1 +languag 1 +acquisit 1 +formor 1 +info 1 +check 1 +vita 1 +educ 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +mecaliff 1 +utexa 1 +postal 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..4173f34c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,209 @@ +knowledg 16 +base 15 +utexa 9 +system 6 +us 6 +domain 6 +research 5 +question 5 +model 5 +porter 4 +explan 4 +gener 4 +answer 4 +help 4 +group 3 +mallori 3 +souther 3 +prado 3 +correl 3 +comput 3 +develop 3 +larg 3 +contain 3 +task 3 +construct 3 +lester 3 +desk 3 +project 3 +bruce 2 +rich 2 +peter 2 +clark 2 +fred 2 +charl 2 +callawai 2 +carl 2 +andersen 2 +steve 2 +austin 2 +would 2 +requir 2 +perform 2 +multipl 2 +viewpoint 2 +built 2 +biologi 2 +method 2 +automat 2 +varieti 2 +concern 2 +distribut 2 +result 2 +recent 2 +jame 2 +test 2 +composit 2 +predict 2 +jeff 2 +languag 2 +text 2 +plan 2 +assist 2 +acker 2 +eilert 2 +bareiss 2 +karl 2 +murrai 2 +rickel 2 +groupknowledg 1 +shown 1 +overviewour 1 +part 1 +depart 1 +scienc 1 +atuniv 1 +texa 1 +long 1 +term 1 +goal 1 +technolog 1 +forconstruct 1 +multifunct 1 +oncomput 1 +significantli 1 +improv 1 +currentexpert 1 +tutor 1 +broadknowledg 1 +toexplain 1 +past 1 +eight 1 +year 1 +inon 1 +area 1 +answeringa 1 +fact 1 +concept 1 +ofth 1 +largest 1 +kind 1 +content 1 +structur 1 +formallyrepres 1 +addit 1 +expand 1 +arealso 1 +begin 1 +similar 1 +notabl 1 +especi 1 +encourag 1 +knowledgebas 1 +thebiolog 1 +express 1 +english 1 +biolog 1 +object 1 +event 1 +control 1 +experi 1 +expert 1 +found 1 +littl 1 +differ 1 +andthos 1 +written 1 +colleagu 1 +current 1 +extend 1 +type 1 +beanswer 1 +autom 1 +reason 1 +jeffrickel 1 +taskof 1 +appropri 1 +well 1 +build 1 +thesimplest 1 +adequ 1 +dauntingrequir 1 +sinc 1 +like 1 +implicitli 1 +manymodel 1 +numer 1 +level 1 +detail 1 +qualit 1 +process 1 +compilerand 1 +qsim 1 +simul 1 +program 1 +final 1 +bybuild 1 +anoth 1 +computingenviron 1 +focuss 1 +deskassist 1 +proport 1 +custom 1 +squestion 1 +otherwis 1 +phone 1 +normal 1 +projectsour 1 +complet 1 +ongo 1 +includ 1 +represent 1 +kned 1 +editor 1 +kastl 1 +retriev 1 +knight 1 +fare 1 +natur 1 +lex 1 +aid 1 +mainten 1 +lexicon 1 +tripel 1 +theorist 1 +searcher 1 +alumni 1 +alumna 1 +lian 1 +erik 1 +brad 1 +blumenth 1 +brant 1 +eolu 1 +uwyo 1 +clarkp 1 +redwood 1 +boe 1 +ncsu 1 +publicationsclick 1 +select 1 +public 1 +relat 1 +projectsclick 1 +herefor 1 +extens 1 +collect 1 +pointer 1 +aroundth 1 +world 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..25ce1ad5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,99 @@ +mirank 4 +student 4 +rule 4 +current 3 +text 3 +bibtex 3 +daniel 2 +page 2 +match 2 +treat 2 +algorithm 2 +rete 2 +research 2 +candid 2 +past 2 +home 1 +welcom 1 +belat 1 +presenc 1 +construct 1 +someth 1 +like 1 +finish 1 +hold 1 +breath 1 +send 1 +note 1 +utexa 1 +fashionwai 1 +wouldn 1 +place 1 +either 1 +learn 1 +itscomparison 1 +warn 1 +render 1 +obsolet 1 +byth 1 +leap 1 +interest 1 +goal 1 +encompass 1 +venu 1 +languag 1 +usea 1 +basi 1 +activ 1 +distribut 1 +databas 1 +fundamentalcomput 1 +scienc 1 +problem 1 +corollari 1 +evolv 1 +thatgoal 1 +constraint 1 +satisfact 1 +search 1 +queri 1 +optim 1 +relat 1 +object 1 +orient 1 +parallel 1 +execut 1 +base 1 +program 1 +knowledg 1 +compil 1 +bibliographi 1 +sometim 1 +link 1 +paper 1 +come 1 +soon 1 +group 1 +roberto 1 +bayardo 1 +david 1 +gadboi 1 +lanc 1 +obermey 1 +vasili 1 +samoladi 1 +robert 1 +schrag 1 +master 1 +srinivasan 1 +vaidyaraman 1 +lane 1 +warshaw 1 +archi 1 +andrewsdavid 1 +brantchin 1 +ming 1 +kuoshiow 1 +yang 1 +salvator 1 +stolfo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..e8c35000 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,85 @@ +misra 6 +comput 3 +process 3 +jayadev 2 +chair 2 +john 2 +honor 2 +fellow 2 +program 2 +formal 2 +design 2 +synchron 2 +parallel 2 +inform 2 +research 2 +group 2 +misrareg 1 +scienc 1 +depart 1 +tech 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +hopkin 1 +univers 1 +award 1 +profession 1 +servic 1 +simon 1 +guggenheim 1 +ieee 1 +fellowarea 1 +interestparallel 1 +summari 1 +researchmi 1 +interest 1 +appli 1 +method 1 +practic 1 +particularli 1 +inth 1 +specif 1 +asynchronoussystem 1 +select 1 +recent 1 +publicationsj 1 +powerlist 1 +structur 1 +recurs 1 +classic 1 +mind 1 +essai 1 +hoar 1 +prentic 1 +hall 1 +januari 1 +loos 1 +coupl 1 +futur 1 +gener 1 +system 1 +north 1 +holland 1 +phase 1 +letter 1 +equat 1 +reason 1 +nondeterminist 1 +aspect 1 +chandi 1 +foundat 1 +addison 1 +weslei 1 +homepag 1 +work 1 +electron 1 +access 1 +otherpap 1 +current 1 +project 1 +seuss 1 +anoverview 1 +apostscript 1 +versionaccess 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..30856167 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,175 @@ +learn 19 +group 14 +research 10 +system 10 +machin 8 +intellig 7 +univers 7 +induct 7 +languag 6 +utexa 6 +program 6 +artifici 5 +confer 5 +journal 5 +base 4 +natur 4 +plan 4 +comput 4 +logic 4 +acquisit 3 +estlin 3 +theori 3 +order 3 +associ 3 +texa 2 +student 2 +sowmya 2 +baff 2 +mahonei 2 +bradlei 2 +public 2 +abduct 2 +model 2 +speedup 2 +list 2 +softwar 2 +revis 2 +rule 2 +first 2 +data 2 +aaai 2 +linguist 2 +european 2 +special 2 +interest 2 +intern 2 +inform 2 +index 2 +illinoi 2 +utc 1 +supervis 1 +professor 1 +moonei 1 +focuseson 1 +combin 1 +empir 1 +knowledg 1 +techniqu 1 +includ 1 +applic 1 +knowledgerefin 1 +part 1 +scienc 1 +depart 1 +atth 1 +ataustin 1 +pictur 1 +click 1 +graduat 1 +mari 1 +elain 1 +califf 1 +mecaliff 1 +tara 1 +hermjakob 1 +ramachandran 1 +cindi 1 +thompson 1 +cthomp 1 +alumni 1 +paul 1 +scicomp 1 +jeff 1 +firstadvisor 1 +hwee 1 +nhweetou 1 +trantor 1 +dirk 1 +ourston 1 +dirk_ourston 1 +cpqm 1 +saic 1 +richard 1 +furtwangen 1 +siddarth 1 +subramanian 1 +georgetown 1 +john 1 +zell 1 +acad 1 +drake 1 +area 1 +qualit 1 +diagnosi 1 +tutor 1 +refin 1 +uncertain 1 +reasoningher 1 +complet 1 +accel 1 +reason 1 +neither 1 +proposit 1 +fort 1 +chillin 1 +predic 1 +invent 1 +foidl 1 +decis 1 +dolphin 1 +ad 1 +search 1 +control 1 +prolog 1 +standard 1 +classif 1 +algorithm 1 +autom 1 +experiment 1 +comparison 1 +repositori 1 +form 1 +relat 1 +site 1 +american 1 +ilpnet 1 +scientif 1 +network 1 +sigart 1 +signll 1 +joint 1 +aritfici 1 +ijcai 1 +nation 1 +icml 1 +fourth 1 +sourc 1 +subject 1 +biblio 1 +queri 1 +machinelearn 1 +home 1 +page 1 +servic 1 +paper 1 +archiv 1 +jair 1 +foil 1 +quinlan 1 +learner 1 +prodigi 1 +problem 1 +solv 1 +carnegi 1 +mellon 1 +ucpop 1 +partial 1 +planner 1 +washington 1 +explan 1 +oxford 1 +irvin 1 +austin 1 +wisconsin 1 +madison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..a571ce73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,110 @@ +univers 7 +learn 6 +texa 5 +austin 5 +comput 5 +illinoi 5 +champaign 4 +scienc 4 +inform 4 +urbana 3 +machin 3 +program 3 +base 3 +address 3 +moonei 2 +research 2 +artifici 2 +intellig 2 +acquisit 2 +exampl 2 +network 2 +symbol 2 +home 2 +depart 2 +raymond 1 +homepageraymond 1 +mooneyassoci 1 +professor 1 +computersci 1 +engin 1 +interestsmi 1 +current 1 +interest 1 +primarilyin 1 +area 1 +includ 1 +natur 1 +languag 1 +parser 1 +lexicon 1 +extract 1 +word 1 +sens 1 +disambigu 1 +induct 1 +logic 1 +prolog 1 +knowledg 1 +theori 1 +refin 1 +automat 1 +modifi 1 +rule 1 +bayesian 1 +empir 1 +data 1 +search 1 +control 1 +improv 1 +plan 1 +effici 1 +qualiti 1 +compar 1 +combin 1 +neural 1 +public 1 +page 1 +vita 1 +finger 1 +cours 1 +informationfal 1 +lisp 1 +learningspr 1 +iiperson 1 +historyi 1 +grew 1 +small 1 +town 1 +fallon 1 +wherestart 1 +attend 1 +fallontownship 1 +highschool 1 +start 1 +fall 1 +went 1 +urbanato 1 +obtain 1 +degre 1 +list 1 +decemb 1 +complet 1 +myph 1 +thesi 1 +explan 1 +learninggroup 1 +direct 1 +prof 1 +gerald 1 +dejong 1 +began 1 +posit 1 +contact 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +postal 1 +meadowfir 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..353caa62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,100 @@ +austin 5 +link 5 +comput 4 +scienc 4 +decis 4 +task 4 +univers 3 +texa 3 +action 3 +home 3 +page 3 +moriarti 2 +depart 2 +control 2 +agent 2 +system 2 +select 2 +return 2 +research 2 +neural 2 +network 2 +inform 2 +address 2 +utc 2 +dave 1 +moriartydav 1 +student 1 +researchsequenti 1 +appear 1 +mani 1 +practic 1 +real 1 +world 1 +problemsinclud 1 +resourc 1 +alloc 1 +rout 1 +canb 1 +character 1 +follow 1 +scenario 1 +observ 1 +stateof 1 +dynam 1 +finit 1 +thesystem 1 +enter 1 +state 1 +upon 1 +must 1 +selectanoth 1 +payoff 1 +madeor 1 +sequenc 1 +object 1 +thesequ 1 +highest 1 +total 1 +cumulativepayoff 1 +evolv 1 +geneticalgorithm 1 +learn 1 +perform 1 +sequenti 1 +amparticularli 1 +interest 1 +problem 1 +specif 1 +knowledg 1 +iscurr 1 +unavail 1 +costli 1 +obtain 1 +domain 1 +havestudi 1 +includ 1 +game 1 +plai 1 +intellig 1 +constraintsatisfact 1 +list 1 +public 1 +educ 1 +universityof 1 +tulan 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +utexa 1 +postal 1 +local 1 +homepag 1 +us 1 +sport 1 +misc 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..1c1d6bfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,38 @@ +univers 4 +wade 3 +utexa 3 +austin 3 +comput 2 +scienc 2 +texa 2 +mine 2 +engin 2 +utah 2 +mwbarn 2 +barnesm 1 +barnesmwbarn 1 +eduresearch 1 +workhelp 1 +pagestyp 1 +map 1 +literatureliteratur 1 +research 1 +notesclassesbackground 1 +informationph 1 +student 1 +depart 1 +educ 1 +reach 1 +mehom 1 +tanglebriar 1 +trail 1 +campu 1 +offic 1 +yete 1 +mail 1 +eduauthor 1 +barnesemail 1 +edulast 1 +updat 1 +mondai 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..d97be000 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,83 @@ +room 5 +dale 4 +nell 3 +seed 3 +comput 3 +scienc 3 +contain 3 +professor 2 +home 2 +retir 2 +austin 2 +teach 2 +summer 2 +pleas 2 +author 2 +websit 1 +scrollit_rl 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +pagesunivers 1 +texa 1 +departmentwelcom 1 +page 1 +reach 1 +address 1 +senior 1 +lectur 1 +univers 1 +oftexa 1 +receiv 1 +utaustin 1 +faculti 1 +sinc 1 +fromful 1 +time 1 +full 1 +load 1 +falland 1 +spend 1 +spring 1 +write 1 +travel 1 +feel 1 +free 1 +brows 1 +resum 1 +curriculum 1 +vita 1 +bibliographi 1 +inform 1 +text 1 +book 1 +research 1 +abstract 1 +ofdissert 1 +chair 1 +recent 1 +person 1 +whichcontain 1 +memento 1 +nontechn 1 +interest 1 +direct 1 +anycorrespond 1 +mail 1 +account 1 +ndale 1 +utexa 1 +profession 1 +profilepublicationsresearch 1 +interestsperson 1 +interestsnel 1 +westlak 1 +offic 1 +document 1 +creat 1 +assist 1 +right 1 +reserv 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..5909df54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,27 @@ +page 5 +view 3 +netscap 3 +browser 2 +frame 2 +color 2 +neeraj 1 +support 1 +home 1 +either 1 +download 1 +navig 1 +without 1 +note 1 +choos 1 +latter 1 +pleas 1 +keep 1 +mind 1 +design 1 +pretti 1 +background 1 +chosen 1 +work 1 +obnoxi 1 +chartreus 1 +blame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..4300681f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,20 @@ +home 2 +comput 2 +texa 2 +gnan 1 +pagegnana 1 +kumar 1 +natarajan 1 +page 1 +depart 1 +sciencesunivers 1 +austini 1 +graduat 1 +student 1 +sciencedepart 1 +univers 1 +austin 1 +mail 1 +utexa 1 +edufind 1 +log 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..1d04c8df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,34 @@ +nimar 2 +arora 2 +home 2 +look 2 +singh 1 +page 1 +medic 1 +art 1 +austin 1 +typic 1 +first 1 +year 1 +student 1 +doesn 1 +quit 1 +knowwhat 1 +area 1 +interest 1 +resum 1 +know 1 +altern 1 +bookmarksto 1 +clearer 1 +pictur 1 +contact 1 +click 1 +queri 1 +hit 1 +term 1 +score 1 +ters 1 +output 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..0b740a0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,77 @@ +languag 5 +comput 4 +group 4 +utc 3 +natur 3 +paper 3 +learn 3 +hermjakob 3 +depart 2 +scienc 2 +univers 2 +texa 2 +discuss 2 +meet 2 +current 2 +prof 2 +research 2 +acquist 1 +groupnatur 1 +acquisit 1 +groupat 1 +austinw 1 +explor 1 +area 1 +acquisitionand 1 +fall 1 +usual 1 +everi 1 +wednesdai 1 +havedrawn 1 +close 1 +probabl 1 +resum 1 +second 1 +third 1 +week 1 +januari 1 +propos 1 +previous 1 +particip 1 +includ 1 +moonei 1 +risto 1 +miikkulainen 1 +bobbi 1 +bryant 1 +mari 1 +elain 1 +califf 1 +marti 1 +mayberri 1 +rupert 1 +tang 1 +poon 1 +cindi 1 +thompson 1 +inform 1 +pleas 1 +contact 1 +coordin 1 +utexa 1 +relat 1 +site 1 +associ 1 +linguist 1 +signll 1 +special 1 +interest 1 +print 1 +archiv 1 +machin 1 +neural 1 +network 1 +ofth 1 +ataustinlast 1 +updat 1 +novemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..62fecde5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,70 @@ +neural 3 +research 3 +utc 2 +net 2 +group 2 +artifici 2 +intellig 2 +scienc 2 +groupth 1 +supervis 1 +prof 1 +ristomiikkulainen 1 +part 1 +comput 1 +depart 1 +univers 1 +texa 1 +ataustin 1 +concentr 1 +andcognit 1 +includ 1 +natur 1 +languag 1 +process 1 +schema 1 +basedvis 1 +cortic 1 +self 1 +organ 1 +episod 1 +memori 1 +decis 1 +make 1 +evolv 1 +network 1 +genet 1 +algorithm 1 +click 1 +mapbelow 1 +detail 1 +check 1 +hypertext 1 +book 1 +later 1 +interact 1 +thecortex 1 +structur 1 +function 1 +risto 1 +miikkulainen 1 +graduat 1 +student 1 +alumni 1 +visitor 1 +public 1 +demo 1 +poster 1 +softwar 1 +home 1 +page 1 +confer 1 +newsgroup 1 +archiv 1 +inform 1 +sourc 1 +gener 1 +tool 1 +privat 1 +linkswusagemartym 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..0843d0b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,77 @@ +lisp 6 +scienc 5 +demo 5 +comput 4 +program 4 +gordon 3 +novak 3 +texa 3 +automat 3 +physic 3 +unit 3 +interact 3 +univers 2 +austin 2 +artifici 2 +problem 2 +server 2 +graphic 2 +draw 2 +us 2 +offic 2 +support 1 +free 1 +speech 1 +internet 1 +associ 1 +professor 1 +ofcomput 1 +atth 1 +director 1 +intelligencelaboratori 1 +highest 1 +honor 1 +research 1 +reus 1 +genericalgorithmssolv 1 +specifiedinformallyartifici 1 +intelligencecurriculum 1 +vita 1 +publicationsemploymentgrantsprofession 1 +activ 1 +honorscurriculum 1 +vitaefre 1 +softwar 1 +tmycin 1 +emycin 1 +like 1 +expert 1 +system 1 +shell 1 +interfac 1 +common 1 +lispconvers 1 +measurementsoftwar 1 +construct 1 +creat 1 +write 1 +connect 1 +diagram 1 +convers 1 +convert 1 +measur 1 +isaac 1 +solv 1 +state 1 +english 1 +class 1 +schemec 1 +compilersc 1 +intelligencec 1 +programmingweb 1 +linksweatheraddress 1 +ctai 1 +univ 1 +austinaustintexa 1 +faxnovak 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..dd7bc969 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,51 @@ +page 3 +meghan 2 +insult 2 +panic 2 +brienhi 1 +welcom 1 +home 1 +pleas 1 +wipe 1 +feet 1 +enter 1 +mani 1 +complaint 1 +crappi 1 +laugh 1 +stock 1 +internet 1 +remov 1 +link 1 +chanc 1 +work 1 +thank 1 +sent 1 +still 1 +download 1 +gorgeou 1 +pictur 1 +blow 1 +poster 1 +size 1 +beauti 1 +queen 1 +date 1 +wait 1 +hear 1 +paul 1 +get 1 +marri 1 +august 1 +hope 1 +come 1 +except 1 +anyon 1 +want 1 +give 1 +feel 1 +free 1 +resum 1 +email 1 +obrien 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..1f15e3c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,13 @@ +oguer 2 +gutierrezogu 1 +gutierrezth 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +texa 1 +austinprojectsomioswwhlinksconfer 1 +databas 1 +systemsth 1 +worldemail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..f34a838c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,209 @@ +scheme 8 +memori 7 +paper 6 +time 5 +system 5 +garbag 5 +avail 5 +research 4 +paul 4 +wilson 4 +object 4 +compil 4 +collect 4 +also 4 +page 3 +group 3 +stephen 3 +carl 3 +especi 3 +persist 3 +store 3 +us 3 +alloc 3 +extens 3 +master 3 +thesi 3 +manag 3 +real 3 +collector 3 +file 3 +includ 3 +sourc 3 +code 3 +languag 3 +introduct 3 +interest 3 +site 3 +oop 2 +sheetal 2 +kakkad 2 +donovan 2 +kolbl 2 +neeli 2 +distribut 2 +virtual 2 +develop 2 +pointer 2 +larg 2 +effici 2 +standard 2 +program 2 +survei 2 +interfac 2 +orient 2 +implement 2 +cours 2 +interpret 2 +descript 2 +macro 2 +note 2 +draft 2 +good 2 +gener 2 +list 2 +whichcontain 2 +groupoop 1 +groupthi 1 +home 1 +supervis 1 +prof 1 +graduat 1 +studentsin 1 +ajit 1 +georg 1 +mark 1 +johnston 1 +scott 1 +kaplan 1 +michael 1 +qing 1 +dougla 1 +wieren 1 +area 1 +hierarchi 1 +cach 1 +simpl 1 +high 1 +perform 1 +call 1 +texa 1 +swizzl 1 +fault 1 +toimplement 1 +address 1 +space 1 +stock 1 +hardwar 1 +andoper 1 +basic 1 +studi 1 +behavior 1 +whichattempt 1 +repair 1 +damag 1 +done 1 +three 1 +decad 1 +mostli 1 +unsoundstudi 1 +mike 1 +automat 1 +storag 1 +generationaland 1 +small 1 +ongarbag 1 +local 1 +recent 1 +hard 1 +written 1 +smart 1 +adapt 1 +managementfor 1 +dynam 1 +andcompress 1 +structur 1 +checkpoint 1 +forfault 1 +toler 1 +travel 1 +debug 1 +highli 1 +portabl 1 +programmingsystem 1 +extend 1 +rscheme 1 +thread 1 +socket 1 +homepag 1 +info 1 +alpha 1 +releas 1 +noteson 1 +rawascii 1 +text 1 +andrschemear 1 +integr 1 +process 1 +algorithm 1 +tosupport 1 +open 1 +thesiscontain 1 +refer 1 +coupl 1 +write 1 +whicharen 1 +form 1 +anywai 1 +sometimesoon 1 +onlin 1 +book 1 +progress 1 +htmlformat 1 +brows 1 +contain 1 +materialfrom 1 +ascii 1 +much 1 +improv 1 +expandedpresent 1 +texinfo 1 +materiali 1 +work 1 +intro 1 +metaobject 1 +besid 1 +providesa 1 +thing 1 +like 1 +make 1 +backgroundread 1 +brief 1 +bibliographi 1 +heap 1 +fortexa 1 +anonym 1 +utexa 1 +readm 1 +materi 1 +subdirectori 1 +oopsla 1 +workshop 1 +peopl 1 +henri 1 +baker 1 +sftp 1 +although 1 +overload 1 +notb 1 +access 1 +keep 1 +try 1 +anoth 1 +great 1 +han 1 +boehm 1 +sever 1 +well 1 +free 1 +severalgarbag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..e54dc802 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,11 @@ +depart 2 +robert 1 +otuomagieaddress 1 +austin 1 +phone 1 +emailotu 1 +utexa 1 +eduuniververs 1 +infouniversityth 1 +univers 1 +txa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..0fee997b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,74 @@ +file 10 +devic 9 +virtual 7 +specif 5 +avail 5 +compon 5 +languag 4 +interfac 4 +softwar 3 +construct 3 +view 3 +padgett 2 +work 2 +control 2 +domain 2 +class 2 +also 2 +follow 2 +physic 2 +implement 2 +export 2 +us 2 +austin 2 +padgettdon 1 +dissert 1 +researchi 1 +professor 1 +brown 1 +design 1 +andimplement 1 +driver 1 +investig 1 +creation 1 +program 1 +environ 1 +focu 1 +thu 1 +compil 1 +technolog 1 +devis 1 +prototyp 1 +specifi 1 +softar 1 +call 1 +contain 1 +variou 1 +featur 1 +reduc 1 +effort 1 +requir 1 +refer 1 +manualfor 1 +current 1 +postscript 1 +draft 1 +manual 1 +exampl 1 +counter 1 +multifunct 1 +transpar 1 +recent 1 +present 1 +creat 1 +microsoft 1 +powerpointvers 1 +window 1 +contact 1 +meemail 1 +utexa 1 +edupost 1 +depart 1 +comput 1 +scienc 1 +usafax 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..be0262d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,90 @@ +page 3 +battlebal 3 +austin 3 +comput 3 +research 3 +plan 2 +softwar 2 +game 2 +binari 2 +system 2 +librari 2 +tree 2 +univers 2 +scienc 2 +interest 2 +geometr 2 +model 2 +utexa 2 +home 1 +philip 1 +hardinphilip 1 +hardinabout 1 +elimin 1 +bug 1 +everywher 1 +fail 1 +fallback 1 +write 1 +access 1 +execut 1 +file 1 +oper 1 +unfortun 1 +avail 1 +want 1 +port 1 +suno 1 +solari 1 +work 1 +go 1 +school 1 +time 1 +multiplay 1 +wrote 1 +runsund 1 +window 1 +line 1 +code 1 +us 1 +standard 1 +templat 1 +anda 1 +campbel 1 +space 1 +partit 1 +imag 1 +screenshot 1 +student 1 +texa 1 +guess 1 +depart 1 +area 1 +graphic 1 +realli 1 +cool 1 +main 1 +graphicssoftwar 1 +reus 1 +engin 1 +gener 1 +group 1 +automat 1 +programmingto 1 +contact 1 +meemail 1 +pahardin 1 +edupost 1 +usanetrek 1 +server 1 +pita 1 +curli 1 +handl 1 +digitaldisast 1 +look 1 +get 1 +plaster 1 +congradul 1 +smartest 1 +person 1 +inth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..b46bdd07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,82 @@ +comput 4 +evolut 4 +scienc 3 +univers 3 +texa 3 +austin 3 +learn 2 +research 2 +paul 1 +mcquestenpaul 1 +mcquestenphd 1 +studentdepart 1 +interest 1 +interact 1 +think 1 +mechan 1 +natur 1 +might 1 +bepract 1 +addit 1 +current 1 +techniqu 1 +exampl 1 +death 1 +usual 1 +studi 1 +explicitli 1 +email 1 +paulmcq 1 +utexa 1 +offic 1 +taylor 1 +hall 1 +phone 1 +postal 1 +address 1 +austindepart 1 +spring 1 +head 1 +forcsp 1 +introduct 1 +pascal 1 +programmingmor 1 +neuro 1 +inmoriarti 1 +link 1 +atcnr 1 +rome 1 +neural 1 +network 1 +group 1 +artifici 1 +intellig 1 +cours 1 +schedul 1 +depart 1 +handi 1 +access 1 +tout 1 +librari 1 +onlin 1 +seriou 1 +reflect 1 +dave 1 +winer 1 +websit 1 +need 1 +pointer 1 +wast 1 +hour 1 +surf 1 +check 1 +cynb 1 +humong 1 +hotlist 1 +mix 1 +knowledg 1 +knick 1 +knack 1 +nut 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..ed16a5ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,32 @@ +multimedia 4 +austin 4 +pawang 3 +utexa 3 +system 2 +depart 2 +comput 2 +scienc 2 +univers 2 +texa 2 +pawan 1 +goyal 1 +home 1 +page 1 +research 1 +summari 1 +public 1 +network 1 +protocol 1 +file 1 +oper 1 +affili 1 +group 1 +get 1 +touch 1 +email 1 +inform 1 +finger 1 +also 1 +check 1 +log 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..b5b11650 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,115 @@ +austin 7 +physic 5 +comput 5 +univers 4 +group 4 +texa 3 +current 3 +scienc 3 +depart 3 +theori 3 +interest 3 +also 3 +jose 2 +pecina 2 +obtain 2 +invari 2 +represent 2 +dissert 2 +advisor 2 +explor 2 +research 2 +numer 2 +parallel 2 +quantum 2 +scientif 2 +softwar 2 +home 2 +utexa 2 +orpecina 2 +pecinaabout 1 +previous 1 +complet 1 +master 1 +innuclear 1 +engin 1 +finish 1 +thesi 1 +graduat 1 +studi 1 +workedinvestig 1 +gaug 1 +graviti 1 +base 1 +quantiz 1 +gravit 1 +field 1 +calcul 1 +unitari 1 +irreduc 1 +publish 1 +three 1 +joint 1 +paper 1 +yuval 1 +eman 1 +georg 1 +sudarshan 1 +jurgen 1 +lemk 1 +fromcologn 1 +germani 1 +previou 1 +posit 1 +bureau 1 +econom 1 +geologi 1 +spent 1 +year 1 +half 1 +work 1 +seismic 1 +invers 1 +tomographi 1 +supervisor 1 +hardag 1 +editor 1 +geophys 1 +journal 1 +societi 1 +geophysicist 1 +algorithm 1 +analysi 1 +cryptographi 1 +visitor 1 +thephys 1 +carnegi 1 +mellon 1 +pittsburgh 1 +open 1 +compani 1 +develop 1 +fill 1 +comerci 1 +symmetri 1 +algebra 1 +sequenti 1 +solutionsin 1 +gener 1 +rel 1 +problem 1 +chromodynamicsmi 1 +curriculum 1 +vita 1 +click 1 +want 1 +print 1 +contact 1 +center 1 +particl 1 +usavoic 1 +main 1 +offic 1 +mail 1 +defo 1 +phy 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..b49a5f77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,56 @@ +know 3 +page 3 +unix 2 +would 2 +woof 2 +find 2 +nettl 1 +welcom 1 +cornerinfolik 1 +anyon 1 +want 1 +make 1 +realli 1 +short 1 +blahblah 1 +system 1 +window 1 +blah 1 +stuff 1 +eeek 1 +staff 1 +escap 1 +floor 1 +mean 1 +ibm 1 +file 1 +afteri 1 +instal 1 +stori 1 +true 1 +name 1 +chang 1 +protect 1 +theinnoc 1 +experiment 1 +work 1 +pleas 1 +think 1 +us 1 +neat 1 +cool 1 +comic 1 +buena 1 +vista 1 +movieplex 1 +meyour 1 +chanc 1 +increas 1 +drastic 1 +could 1 +employan 1 +improb 1 +drive 1 +send 1 +mail 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..ed598395 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,25 @@ +greg 2 +plaxtongreg 2 +utexa 2 +hall 2 +texa 2 +plaxtoncontact 1 +informationemail 1 +plaxton 1 +eduphon 1 +offic 1 +taylor 1 +postal 1 +depart 1 +comput 1 +sciencetaylor 1 +univers 1 +austinaustin 1 +inform 1 +annual 1 +report 1 +profilepubl 1 +last 1 +modifi 1 +decemb 1 +plaxtonplaxton 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..47f7ce24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,85 @@ +learn 5 +search 5 +porter 4 +machin 3 +knowledg 3 +base 3 +system 3 +bruce 2 +comput 2 +scienc 2 +intellig 2 +research 2 +answer 2 +question 2 +select 2 +public 2 +aaai 2 +abstract 2 +porterassoci 1 +professor 1 +faculti 1 +fellow 1 +univers 1 +california 1 +irvin 1 +honor 1 +award 1 +profession 1 +servic 1 +presidenti 1 +young 1 +investig 1 +editor 1 +presentarea 1 +interestartifici 1 +summari 1 +researchhead 1 +group 1 +develop 1 +method 1 +build 1 +larg 1 +basesand 1 +us 1 +solv 1 +problem 1 +researchinterest 1 +case 1 +recent 1 +rickel 1 +autom 1 +model 1 +predict 1 +thetim 1 +scale 1 +boundari 1 +cambridg 1 +aait 1 +press 1 +andpostscript 1 +brant 1 +rule 1 +preced 1 +complementari 1 +warrant 1 +bareiss 1 +holt 1 +concept 1 +heurist 1 +classif 1 +weak 1 +theori 1 +domain 1 +artifici 1 +journal 1 +abstractand 1 +postscript 1 +hotlist 1 +site 1 +page 1 +email 1 +address 1 +tech 1 +reportport 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..a684de54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,197 @@ +uniti 12 +program 8 +misra 7 +powerlist 6 +seuss 6 +graduat 6 +present 6 +group 5 +jayadev 5 +notat 5 +parallel 5 +work 4 +book 4 +paper 4 +proof 3 +theori 3 +kornerup 3 +list 3 +comput 2 +area 2 +current 2 +adam 2 +carruth 2 +marku 2 +kaltenbach 2 +jacob 2 +avail 2 +logic 2 +design 2 +note 2 +ofpap 2 +written 2 +includ 2 +introduct 2 +specifi 2 +circuit 2 +structur 2 +mani 2 +thepowerlist 2 +correct 2 +network 2 +studi 2 +differ 2 +compil 2 +austinpsp 1 +austinthi 1 +home 1 +page 1 +depart 1 +scienc 1 +univers 1 +texa 1 +ataustin 1 +stand 1 +specif 1 +emphasi 1 +deriveparallel 1 +distribut 1 +rigor 1 +manner 1 +issupervis 1 +develop 1 +research 1 +former 1 +member 1 +groupinclud 1 +erni 1 +cohen 1 +rajeev 1 +joshi 1 +edgar 1 +knapp 1 +ingolf 1 +krger 1 +josyula 1 +mark 1 +staskauska 1 +publicationsbelow 1 +summar 1 +wherev 1 +possibl 1 +give 1 +link 1 +topap 1 +electron 1 +reason 1 +anddistribut 1 +chandi 1 +foundat 1 +addison 1 +weslei 1 +seri 1 +variou 1 +result 1 +applic 1 +thenot 1 +assum 1 +basic 1 +understand 1 +inchandi 1 +sinc 1 +public 1 +sever 1 +improv 1 +made 1 +inth 1 +reflect 1 +amanuscript 1 +newun 1 +tempor 1 +operatorco 1 +safeti 1 +refer 1 +forrefer 1 +implement 1 +write 1 +asymbol 1 +model 1 +checker 1 +forfinit 1 +state 1 +call 1 +unityverifi 1 +extend 1 +toinclud 1 +real 1 +time 1 +aspect 1 +hybrid 1 +system 1 +synchron 1 +data 1 +length 1 +equal 1 +power 1 +twodiffer 1 +oper 1 +balanc 1 +divis 1 +parallelalgorithm 1 +succinct 1 +simpl 1 +recurs 1 +givesnumer 1 +exampl 1 +algorithm 1 +fast 1 +fourier 1 +transform 1 +batcher 1 +sort 1 +arithmet 1 +asadd 1 +multipli 1 +prove 1 +verifi 1 +addercircuit 1 +us 1 +programscan 1 +map 1 +effici 1 +architectur 1 +speciallyhypercub 1 +detail 1 +offspr 1 +address 1 +issu 1 +ofprogram 1 +composit 1 +restrict 1 +compon 1 +caninterfer 1 +read 1 +overview 1 +chapter 1 +froma 1 +monograph 1 +adisciplin 1 +multiprogram 1 +alsoavail 1 +genrat 1 +code 1 +callsfor 1 +messag 1 +commun 1 +describ 1 +thesi 1 +anexperi 1 +concurr 1 +object 1 +basedprogram 1 +languag 1 +ingolfkrg 1 +site 1 +found 1 +thepsp 1 +sitejacob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..0b7817ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,71 @@ +qime 4 +comput 4 +guest 4 +texa 3 +austin 3 +scienc 3 +inform 3 +huang 2 +email 2 +utexa 2 +univers 2 +system 2 +center 2 +page 2 +comment 2 +depart 1 +sciencesunivers 1 +phone 1 +edureceiv 1 +nankai 1 +tianjin 1 +univeris 1 +hawaii 1 +manoa 1 +hawaiiwork 1 +austincours 1 +spring 1 +advanc 1 +telecommun 1 +client 1 +server 1 +develop 1 +appli 1 +data 1 +commun 1 +cours 1 +academ 1 +resourc 1 +teamweb 1 +utcssadaili 1 +texanstock 1 +room 1 +attract 1 +picturesimageschines 1 +popsend 1 +card 1 +electr 1 +postcard 1 +line 1 +job 1 +jobtrakut 1 +placement 1 +connect 1 +gopherftp 1 +csc 1 +newstelnet 1 +cschen 1 +junk 1 +staffyour 1 +person 1 +visit 1 +pleas 1 +sign 1 +book 1 +commentsguest 1 +name 1 +construct 1 +last 1 +modifi 1 +march 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..1e2cbaba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,63 @@ +reason 7 +qualit 5 +research 5 +paper 5 +qsim 3 +index 3 +bibliographi 3 +ourresearch 3 +group 2 +world 2 +access 2 +dissert 2 +softwar 2 +directori 2 +utexasqualit 1 +utexasth 1 +sever 1 +area 1 +thephys 1 +user 1 +applic 1 +like 1 +system 1 +spatial 1 +intelligentrobot 1 +tour 1 +limit 1 +logic 1 +knowledgerepresent 1 +algernon 1 +supervis 1 +professor 1 +benjaminkuip 1 +kuiper 1 +utexa 1 +part 1 +artifici 1 +intellig 1 +comput 1 +scienc 1 +depart 1 +atth 1 +univers 1 +texa 1 +ataustin 1 +pointer 1 +book 1 +graduat 1 +student 1 +robot 1 +knowledg 1 +represent 1 +alumni 1 +includ 1 +visitor 1 +abstract 1 +yellow 1 +page 1 +easili 1 +areadescript 1 +also 1 +visit 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..9d659e6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,62 @@ +page 4 +qiang 3 +home 3 +pleas 3 +seed 3 +program 3 +comput 3 +john 2 +welcom 2 +thank 2 +window 2 +austin 2 +feel 1 +free 1 +brows 1 +around 1 +leav 1 +comment 1 +suggest 1 +joke 1 +visit 1 +come 1 +scrollit_rl 1 +timertwo 1 +settimeout 1 +els 1 +sinc 1 +com 1 +time 1 +self 1 +introduct 1 +current 1 +master 1 +scienc 1 +depart 1 +univers 1 +texa 1 +click 1 +inform 1 +seriousjunk 1 +cours 1 +languag 1 +unix 1 +graphic 1 +linux 1 +technic 1 +java 1 +realjunk 1 +sport 1 +game 1 +new 1 +struggleforliv 1 +institut 1 +qzuo 1 +utexa 1 +guestbook 1 +still 1 +underconstruct 1 +back 1 +last 1 +modif 1 +copyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..61d11fad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,57 @@ +scienc 4 +comput 4 +univers 3 +texa 3 +robert 2 +gener 2 +last 2 +massachusett 2 +institut 2 +technolog 2 +system 2 +paper 2 +utexa 2 +blumoferobert 1 +blumofei 1 +bobbi 1 +name 1 +pronounc 1 +bloom 1 +informationassist 1 +professor 1 +ofcomput 1 +austin 1 +brown 1 +work 1 +cilkmultithread 1 +languag 1 +runtim 1 +laboratori 1 +experiment 1 +softwar 1 +less 1 +compil 1 +list 1 +document 1 +also 1 +avail 1 +directori 1 +semest 1 +spring 1 +teach 1 +abstract 1 +data 1 +type 1 +contact 1 +informationemail 1 +eduphon 1 +offic 1 +taylor 1 +hallpost 1 +depart 1 +sciencestaylor 1 +hall 1 +austinaustin 1 +modifi 1 +decemb 1 +blumoferdb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..906f2f73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,41 @@ +time 3 +current 2 +page 2 +comput 2 +interest 2 +wang 1 +homepag 1 +wangwelcom 1 +construct 1 +content 1 +permit 1 +candid 1 +depart 1 +scienc 1 +theunivers 1 +texa 1 +austin 1 +research 1 +includ 1 +real 1 +system 1 +rule 1 +base 1 +program 1 +analysi 1 +softwar 1 +engin 1 +artifici 1 +intellig 1 +publicationsi 1 +list 1 +public 1 +avail 1 +brows 1 +last 1 +updat 1 +pleas 1 +send 1 +comment 1 +rhwang 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..7bdf1568 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,77 @@ +neural 6 +network 5 +lectur 5 +cours 5 +artifici 4 +fall 4 +graduat 4 +univers 3 +austin 3 +scienc 3 +research 3 +intellig 3 +undergradu 3 +risto 2 +comput 2 +group 2 +model 2 +cognit 2 +work 2 +seminar 2 +spring 2 +address 2 +miikkulainenristo 1 +miikkulainenassoci 1 +professor 1 +computersci 1 +oftexa 1 +ucla 1 +appli 1 +mathemat 1 +helsinki 1 +technolog 1 +intereststh 1 +concentr 1 +processeswith 1 +current 1 +includ 1 +languageacquisit 1 +episod 1 +memori 1 +self 1 +organ 1 +visual 1 +cortex 1 +schema 1 +base 1 +vision 1 +also 1 +evolv 1 +networkswith 1 +genet 1 +algorithm 1 +goal 1 +automat 1 +discoversequenti 1 +decis 1 +strategi 1 +problem 1 +solv 1 +robot 1 +detail 1 +utc 1 +home 1 +page 1 +classessumm 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +phone 1 +postal 1 +depart 1 +texa 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..2cff8874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,2 @@ +rong 1 +bigfoot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..32f3194f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,52 @@ +comput 4 +algorithm 3 +austin 3 +rajaraman 2 +home 2 +depart 2 +scienc 2 +univers 2 +texa 2 +rraj 2 +utexa 2 +rajmohan 1 +pagerajmohan 1 +graduat 1 +student 1 +atth 1 +ataustin 1 +plan 1 +complet 1 +spring 1 +mydissert 1 +supervisor 1 +gregplaxton 1 +research 1 +member 1 +andcomput 1 +theori 1 +group 1 +particularli 1 +interest 1 +incombinator 1 +distribut 1 +network 1 +onlin 1 +parallel 1 +model 1 +random 1 +list 1 +mypubl 1 +curriculum 1 +vita 1 +us 1 +link 1 +relat 1 +sciencemiscellan 1 +linkscontact 1 +inform 1 +email 1 +phone 1 +offic 1 +ephon 1 +postal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..51aaedaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,86 @@ +austin 4 +movi 4 +homepag 3 +comput 3 +rong 2 +capit 2 +univers 2 +depart 2 +graduat 2 +scienc 2 +utaccess 2 +introduct 2 +homepagea 1 +chinachina 1 +jinan 1 +myhometown 1 +shandong 1 +provinc 1 +tsinghua 1 +gotmi 1 +computersci 1 +technolog 1 +beij 1 +ofth 1 +peopl 1 +republ 1 +ofchina 1 +spent 1 +five 1 +colleg 1 +year 1 +zhai 1 +sinanet 1 +texasaustin 1 +citi 1 +live 1 +weather 1 +apart 1 +rent 1 +utcsth 1 +texa 1 +utnetcat 1 +browsabl 1 +onlin 1 +catalog 1 +librari 1 +austininform 1 +servic 1 +login 1 +utcat 1 +cours 1 +fall 1 +oper 1 +systemsdynam 1 +file 1 +replic 1 +final 1 +project 1 +graphicsc 1 +mathemat 1 +logicc 1 +moviesaustin 1 +chronicl 1 +film 1 +time 1 +yahoo 1 +entertain 1 +filmsmicrosoft 1 +cinemania 1 +onlineal 1 +guidehollywood 1 +onlineinternet 1 +databaserog 1 +ebert 1 +moviesvisit 1 +page 1 +contactmail 1 +address 1 +river 1 +aaustin 1 +telephon 1 +emailrtan 1 +utexa 1 +fingerclick 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..b9964a36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,66 @@ +rupert 3 +austin 3 +tang 2 +page 2 +comput 2 +scienc 2 +univers 2 +texa 2 +littl 2 +think 2 +life 2 +would 2 +alwai 2 +home 1 +student 1 +dept 1 +almost 1 +miracl 1 +modern 1 +teach 1 +method 1 +entir 1 +strangl 1 +holi 1 +curious 1 +inquiri 1 +delic 1 +plant 1 +need 1 +anyth 1 +besid 1 +stimul 1 +freedom 1 +realli 1 +empti 1 +depriv 1 +opportun 1 +choos 1 +altern 1 +distast 1 +deni 1 +actual 1 +wish 1 +aspir 1 +fear 1 +duress 1 +fate 1 +much 1 +differ 1 +truck 1 +wash 1 +machin 1 +nice 1 +meet 1 +complet 1 +cool 1 +servic 1 +know 1 +academ 1 +interest 1 +research 1 +messi 1 +area 1 +construct 1 +utexa 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..44591eac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,16 @@ +frame 3 +ruwei 1 +homepag 1 +alert 1 +see 1 +messag 1 +us 1 +browser 1 +support 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..a130e6bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,87 @@ +comput 7 +parallel 4 +method 4 +scienc 3 +univers 3 +austin 3 +mathemat 3 +project 3 +public 3 +robert 2 +texa 2 +rvdg 2 +utexa 2 +appli 2 +numer 2 +develop 2 +case 2 +techniqu 2 +current 2 +geijn 1 +geijnassoci 1 +professor 1 +depart 1 +institut 1 +appliedmathemat 1 +phone 1 +mail 1 +http 1 +user 1 +wisconsin 1 +madison 1 +maryland 1 +colleg 1 +park 1 +area 1 +interestnumer 1 +analysi 1 +supercomput 1 +scientif 1 +summari 1 +researchth 1 +introduct 1 +forc 1 +evalu 1 +oftradit 1 +sequentialmachin 1 +continu 1 +us 1 +inoth 1 +prove 1 +perform 1 +better 1 +researchconcentr 1 +forimpl 1 +well 1 +environ 1 +allowssuch 1 +easili 1 +implement 1 +variou 1 +parallelprocessor 1 +inform 1 +graduat 1 +program 1 +workshop 1 +infrastructur 1 +applic 1 +april 1 +intercom 1 +plapack 1 +sl_librari 1 +book 1 +journal 1 +confer 1 +technic 1 +report 1 +tutori 1 +major 1 +softwar 1 +effort 1 +class 1 +fall 1 +schedul 1 +former 1 +student 1 +meet 1 +famili 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..6e0759b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,40 @@ +home 3 +seed 3 +india 3 +page 2 +time 2 +sundeep 1 +abraham 1 +scrollit_rl 1 +timertwo 1 +window 1 +settimeout 1 +els 1 +sundeepabraham 1 +master 1 +student 1 +computersci 1 +dept 1 +universityof 1 +texa 1 +austin 1 +undergradu 1 +studi 1 +comput 1 +sciencesand 1 +engin 1 +region 1 +engg 1 +colleg 1 +calicut 1 +countri 1 +hail 1 +state 1 +kerala 1 +know 1 +contact 1 +click 1 +construct 1 +tinkerwith 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..1a24c80f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,135 @@ +comput 5 +texa 5 +austin 5 +page 4 +univers 4 +sammi 3 +depart 3 +scienc 3 +swim 3 +refer 2 +yellow 2 +utexa 2 +research 2 +prof 2 +project 2 +parallel 2 +world 2 +wide 2 +librari 2 +construct 2 +address 2 +altavista 1 +startingpoint 1 +yahoo 1 +lyco 1 +map 1 +weather 1 +white 1 +congress 1 +shall 1 +make 1 +respect 1 +establish 1 +religion 1 +orprohibit 1 +free 1 +exercis 1 +thereof 1 +abridg 1 +freedom 1 +ofspeech 1 +press 1 +right 1 +peopl 1 +peaceabl 1 +toassembl 1 +petit 1 +govern 1 +redress 1 +grievanc 1 +constitut 1 +unit 1 +state 1 +america 1 +calvin 1 +robert 1 +geijn 1 +linear 1 +algebra 1 +packag 1 +schedul 1 +artifici 1 +intellig 1 +topic 1 +distribut 1 +languag 1 +compil 1 +advanc 1 +oper 1 +system 1 +experi 1 +herbarium 1 +plapack 1 +sign 1 +hypertext 1 +code 1 +anagram 1 +server 1 +friend 1 +nil 1 +virtual 1 +reker 1 +yanni 1 +musician 1 +jeff 1 +hockei 1 +andrea 1 +hamilton 1 +technolog 1 +famili 1 +pop 1 +guyer 1 +public 1 +health 1 +northwestern 1 +anthropolog 1 +kate 1 +nate 1 +activ 1 +link 1 +entertain 1 +showbiz 1 +chronicl 1 +pollstar 1 +concert 1 +databas 1 +html 1 +quick 1 +guid 1 +dell 1 +fring 1 +ryder 1 +laptop 1 +info 1 +consortium 1 +miscellan 1 +boston 1 +hotlist 1 +list 1 +traveloc 1 +offic 1 +taylor 1 +hall 1 +home 1 +great 1 +hill 1 +eduth 1 +opinion 1 +express 1 +mine 1 +necessarili 1 +repres 1 +view 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..1deb8865 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,50 @@ +address 3 +austin 3 +inform 2 +mail 2 +depart 2 +comput 2 +propos 2 +file 2 +formal 2 +method 2 +page 2 +sawadajun 1 +sawadacontact 1 +email 1 +sawada 1 +utexa 1 +offic 1 +main 1 +univ 1 +texa 1 +scienc 1 +taylor 1 +hall 1 +home 1 +wooten 1 +dissert 1 +oral 1 +time 1 +place 1 +abstract 1 +paper 1 +supplementari 1 +technic 1 +report 1 +kbresourc 1 +common 1 +lisp 1 +languag 1 +edit 1 +bowen 1 +around 1 +world 1 +pvsother 1 +frequent 1 +access 1 +teacher 1 +fellow 1 +logic 1 +boyer 1 +class 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..ebe53983 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,169 @@ +softwar 13 +system 8 +gener 8 +compon 5 +batori 5 +genvoca 4 +domain 4 +start 4 +mani 3 +applic 3 +get 3 +member 3 +design 3 +page 2 +research 2 +group 2 +complex 2 +result 2 +improv 2 +product 2 +perform 2 +look 2 +distribut 2 +decemb 2 +encapsul 2 +refin 2 +basic 2 +order 2 +architectur 2 +pattern 2 +utexa 2 +lectur 2 +note 2 +file 2 +ssgrg 1 +titl 1 +pagewelcom 1 +home 1 +tool 1 +assembl 1 +interchang 1 +reusabl 1 +develop 1 +independ 1 +model 1 +construct 1 +defin 1 +algebra 1 +equat 1 +term 1 +successfulli 1 +appli 1 +includ 1 +databas 1 +manag 1 +avion 1 +data 1 +structur 1 +demonstr 1 +substanti 1 +time 1 +first 1 +visit 1 +question 1 +best 1 +place 1 +take 1 +project 1 +index 1 +public 1 +relat 1 +utc 1 +professorangela 1 +dappert 1 +studentguillermo 1 +jimenez 1 +perezph 1 +studentjeff 1 +thomasph 1 +studentl 1 +tokuda 1 +studentyanni 1 +smaragdaki 1 +studentk 1 +shepherdresearch 1 +associateform 1 +graduat 1 +datesdinesh 1 +dasph 1 +milli 1 +villarrealph 1 +bart 1 +geracipostdoc 1 +marti 1 +sirkinph 1 +march 1 +sankar 1 +dasarim 1 +overview 1 +us 1 +build 1 +typic 1 +modul 1 +featur 1 +share 1 +possibl 1 +must 1 +differ 1 +part 1 +class 1 +requir 1 +manipul 1 +metadata 1 +reflect 1 +comput 1 +thu 1 +like 1 +approach 1 +goe 1 +beyond 1 +simpl 1 +object 1 +orient 1 +larg 1 +scale 1 +program 1 +transform 1 +feel 1 +issu 1 +involv 1 +breadth 1 +recommend 1 +follow 1 +paper 1 +starter 1 +read 1 +scalabl 1 +librari 1 +creat 1 +refer 1 +implement 1 +composit 1 +valid 1 +subject 1 +specif 1 +deliv 1 +relationship 1 +work 1 +check 1 +reengin 1 +lightweight 1 +dbm 1 +memori 1 +simul 1 +generatorsautom 1 +evolut 1 +inform 1 +pleas 1 +contact 1 +period 1 +releas 1 +tutori 1 +reus 1 +avail 1 +contain 1 +compress 1 +postscript 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..f93538d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,142 @@ +page 7 +home 5 +stuff 4 +austin 4 +inform 3 +work 3 +think 3 +research 3 +amherst 3 +want 3 +realli 2 +basic 2 +doesn 2 +look 2 +littl 2 +interest 2 +texa 2 +depart 2 +try 2 +group 2 +class 2 +orient 2 +colleg 2 +place 2 +time 2 +spent 2 +amaz 2 +like 2 +peopl 2 +check 2 +neat 2 +right 2 +scott 1 +pagescott 1 +pageokai 1 +long 1 +overdu 1 +slight 1 +updat 1 +mean 1 +go 1 +fanci 1 +expect 1 +least 1 +current 1 +univers 1 +finish 1 +year 1 +mani 1 +oop 1 +within 1 +utc 1 +system 1 +languag 1 +object 1 +design 1 +instructor 1 +glenn 1 +down 1 +great 1 +spend 1 +undergradu 1 +dai 1 +unpredict 1 +santa 1 +institut 1 +summer 1 +differ 1 +kind 1 +hobbi 1 +stuffit 1 +perfect 1 +linux 1 +invalu 1 +squash 1 +pageth 1 +mead 1 +wine 1 +beer 1 +psion 1 +maker 1 +cool 1 +palmtop 1 +without 1 +would 1 +forget 1 +name 1 +anastasi 1 +well 1 +sharp 1 +much 1 +free 1 +miscellan 1 +item 1 +particular 1 +order 1 +dine 1 +guid 1 +actual 1 +import 1 +part 1 +citizen 1 +poke 1 +good 1 +humor 1 +publish 1 +appl 1 +comput 1 +still 1 +thing 1 +iici 1 +last 1 +forev 1 +ala 1 +longer 1 +sure 1 +bright 1 +futur 1 +type 1 +machin 1 +bebox 1 +could 1 +simpl 1 +better 1 +noth 1 +probabl 1 +didn 1 +wait 1 +five 1 +minut 1 +load 1 +send 1 +email 1 +maintain 1 +sfkaplan 1 +utexa 1 +might 1 +grab 1 +includ 1 +link 1 +encrypt 1 +gener 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..2fb39d64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,34 @@ +shenoi 5 +austin 4 +utexa 3 +prashant 2 +home 2 +page 2 +univers 2 +texa 2 +depart 2 +comput 2 +scienc 2 +main 2 +welcom 1 +get 1 +touch 1 +email 1 +river 1 +offic 1 +tower 1 +floor 1 +build 1 +inform 1 +finger 1 +also 1 +check 1 +log 1 +multimedia 1 +group 1 +list 1 +recent 1 +public 1 +avail 1 +onlin 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..77f0b263 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,35 @@ +austin 3 +shaob 2 +cyberhom 2 +comput 2 +welcom 1 +current 1 +student 1 +depart 1 +scienc 1 +univers 1 +texa 1 +want 1 +know 1 +check 1 +ongo 1 +work 1 +hardvar 1 +verifc 1 +network 1 +vlsi 1 +final 1 +project 1 +fall 1 +bookshelf 1 +coffe 1 +tabl 1 +campu 1 +citi 1 +make 1 +contact 1 +pleasant 1 +vallei 1 +shma 1 +utexa 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..288a0d31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,93 @@ +austin 4 +shailesh 3 +univers 3 +texa 3 +comput 3 +learn 3 +interest 2 +internet 2 +neural 2 +network 2 +machin 2 +artifici 2 +genet 2 +algorithm 2 +kumar 2 +kumarshailesh 1 +kumarth 1 +austindepart 1 +sciencestaylor 1 +hall 1 +skumar 1 +utexa 1 +edumi 1 +resumeresearch 1 +publicationscontact 1 +mesrcm 1 +spiritu 1 +affili 1 +offersom 1 +linkscognit 1 +scienceutc 1 +researchutc 1 +research 1 +groupresearch 1 +intellig 1 +life 1 +neuroevolut 1 +applic 1 +cellular 1 +automata 1 +chao 1 +nonlinear 1 +dynam 1 +fuzzi 1 +logic 1 +massiv 1 +parallel 1 +processor 1 +publicationson 1 +line 1 +adapt 1 +signal 1 +predistort 1 +dual 1 +reinforc 1 +page 1 +patrick 1 +goetz 1 +risto 1 +miikkulainen 1 +appli 1 +mathemat 1 +depart 1 +scienc 1 +proceed 1 +annual 1 +confer 1 +bari 1 +itali 1 +object 1 +base 1 +evolut 1 +program 1 +bord 1 +singh 1 +symposium 1 +aprl 1 +india 1 +contact 1 +snail 1 +mail 1 +whiti 1 +avenu 1 +phone 1 +home 1 +offic 1 +offernet 1 +assistancesearch 1 +tool 1 +qualiti 1 +institutewww 1 +infoindia 1 +music 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..e538bd26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,47 @@ +smaragdaki 2 +yanni 2 +utc 2 +student 2 +interest 2 +project 2 +program 2 +comput 2 +scienc 2 +research 2 +austin 2 +posit 1 +graduat 1 +alchemi 1 +turn 1 +lead 1 +gold 1 +moder 1 +success 1 +plan 1 +scheme 1 +make 1 +arrang 1 +someth 1 +webster 1 +world 1 +dictionari 1 +serious 1 +depart 1 +main 1 +area 1 +meta 1 +system 1 +applic 1 +particularli 1 +softwar 1 +gener 1 +photo 1 +album 1 +favorit 1 +sitessmaragd 1 +utexa 1 +eduyanni 1 +smaragdakisunivers 1 +texa 1 +departmenttai 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..a93dba04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,58 @@ +link 3 +minion 3 +like 2 +email 2 +pleas 2 +mean 2 +homepag 1 +edward 1 +danger 1 +construct 1 +site 1 +fall 1 +asphalt 1 +bodi 1 +resum 1 +tell 1 +danc 1 +shadow 1 +moonlight 1 +click 1 +utexa 1 +person 1 +data 1 +strictli 1 +need 1 +know 1 +basi 1 +send 1 +effort 1 +duli 1 +note 1 +pictur 1 +interest 1 +greatest 1 +experi 1 +stimul 1 +nerv 1 +center 1 +wouldn 1 +absolut 1 +ever 1 +dy 1 +ignor 1 +previou 1 +recent 1 +addit 1 +field 1 +trip 1 +pania 1 +haiku 1 +leaf 1 +afloat 1 +wind 1 +stream 1 +eddi 1 +waterfal 1 +life 1 +visitor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..4c83240a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,29 @@ +search 5 +email 2 +utexa 2 +texa 2 +austin 2 +southerart 1 +southerresearchbuild 1 +member 1 +knowledg 1 +base 1 +system 1 +research 1 +group 1 +contact 1 +inform 1 +souther 1 +work 1 +mail 1 +comput 1 +scienc 1 +depart 1 +univers 1 +hotlist 1 +site 1 +page 1 +address 1 +public 1 +tech 1 +reportsouth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..62677588 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,60 @@ +learn 3 +address 3 +comput 3 +scienc 3 +sowmya 2 +research 2 +texa 2 +interest 2 +network 2 +univers 2 +austin 2 +ramachandransowmya 1 +ramachandranmachin 1 +groupunivers 1 +austinresearchmi 1 +area 1 +machin 1 +field 1 +ofartif 1 +intellig 1 +problem 1 +learningbayesian 1 +exampl 1 +bayesian 1 +withhidden 1 +variabl 1 +challeng 1 +approach 1 +appli 1 +symbol 1 +connectionist 1 +theori 1 +revis 1 +techniqu 1 +thisproblem 1 +also 1 +design 1 +creat 1 +multimediaappl 1 +resum 1 +list 1 +paper 1 +educ 1 +rutger 1 +tech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +india 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +utexa 1 +postal 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..af5b4ef1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,52 @@ +texa 4 +sriram 3 +austin 3 +comput 2 +univers 2 +click 2 +home 1 +page 1 +raocurr 1 +research 1 +involv 1 +design 1 +implement 1 +multimedia 1 +file 1 +systemoper 1 +system 1 +support 1 +multimediai 1 +work 1 +multimediagroup 1 +sciencesdepart 1 +advisor 1 +prof 1 +harrickvinpublicationsminegroupcontact 1 +informationofficetai 1 +email 1 +utexa 1 +edudepart 1 +scienc 1 +austinaustin 1 +miscellaneousotherinterest 1 +pagespicturesof 1 +toweraustin 1 +isth 1 +capit 1 +locat 1 +central 1 +hill 1 +countri 1 +herefor 1 +inform 1 +kannada 1 +koota 1 +informationabout 1 +tamil 1 +sangam 1 +comment 1 +pleas 1 +free 1 +send 1 +mail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..70b95de1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,15 @@ +frame 3 +tiger 1 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..a6f19631 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,50 @@ +home 4 +sunghe 3 +univers 3 +austin 3 +page 2 +depart 2 +comput 2 +texa 2 +engin 2 +seoul 2 +work 2 +system 2 +research 2 +utc 2 +click 2 +choisunghe 1 +choiwelcom 1 +construct 1 +master 1 +program 1 +scienc 1 +educ 1 +nation 1 +korea 1 +august 1 +experi 1 +present 1 +administr 1 +chemic 1 +graduat 1 +assist 1 +prof 1 +aloysiu 1 +real 1 +time 1 +group 1 +contact 1 +inform 1 +nuec 1 +list 1 +machin 1 +current 1 +log 1 +finger 1 +author 1 +choiemail 1 +utexa 1 +edulast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..d945b333 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,70 @@ +comput 5 +scienc 5 +texa 4 +austin 4 +inform 3 +univers 3 +depart 3 +research 3 +page 2 +sheetal 2 +offic 2 +taylor 2 +hall 2 +address 2 +avail 2 +persist 2 +call 2 +home 1 +kakkadsheet 1 +kakkadcontact 1 +postal 1 +usual 1 +best 1 +reach 1 +isvia 1 +email 1 +full 1 +finger 1 +informationi 1 +member 1 +oop 1 +group 1 +inth 1 +part 1 +implement 1 +storagesystem 1 +provid 1 +easi 1 +us 1 +novel 1 +techniqu 1 +pointer 1 +swizzl 1 +faulttim 1 +effici 1 +support 1 +larg 1 +standard 1 +hardwar 1 +pleas 1 +list 1 +mypubl 1 +along 1 +brief 1 +descript 1 +plan 1 +graduat 1 +myresum 1 +postscript 1 +current 1 +work 1 +motorola 1 +somerset 1 +design 1 +center 1 +whilefinish 1 +januari 1 +kakkad 1 +svkakkad 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..5d63ae47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,62 @@ +austin 3 +shengm 2 +student 2 +scienc 2 +univers 2 +texa 2 +chines 2 +home 1 +page 1 +welcom 1 +homepageabout 1 +depart 1 +comput 1 +want 1 +know 1 +check 1 +classmatesclass 1 +technolog 1 +china 1 +ustc 1 +class 1 +graduat 1 +school 1 +academi 1 +sciencesus 1 +linksut 1 +campu 1 +registrar 1 +gradaut 1 +studiesut 1 +libraryutaccesschines 1 +scholar 1 +associationchina 1 +chinesechinainternet 1 +distribut 1 +magazinestsinghua 1 +bbsncic 1 +bbschines 1 +novelschines 1 +classicsabout 1 +austinwhat 1 +weather 1 +todai 1 +citylimitsclassifi 1 +item 1 +sale 1 +austinto 1 +contact 1 +address 1 +medic 1 +art 1 +voic 1 +email 1 +utexa 1 +finger 1 +meyour 1 +comment 1 +suggest 1 +highli 1 +appreci 1 +visitorsinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..f91acc1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,15 @@ +frame 3 +wang 1 +alert 1 +see 1 +messag 1 +us 1 +browser 1 +support 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..f79ff495 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,4 @@ +page 2 +welcom 1 +home 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..3b5341bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,30 @@ +austin 3 +work 2 +comput 2 +renu 1 +tewarirenu 1 +tewariwhat 1 +addresshom 1 +home 1 +email 1 +tewari 1 +utexa 1 +multimedia 1 +dept 1 +scienc 1 +univers 1 +texa 1 +public 1 +done 1 +internship 1 +watson 1 +research 1 +center 1 +plai 1 +interest 1 +site 1 +bore 1 +send 1 +comment 1 +name 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..44a450b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,85 @@ +tumlin 4 +research 3 +distribut 3 +system 3 +synthesi 3 +pierc 2 +interest 2 +formal 2 +develop 2 +work 2 +paper 2 +control 2 +project 2 +inform 2 +address 2 +utexa 2 +page 2 +novemb 2 +photo 1 +brenda 1 +ladd 1 +photographi 1 +random 1 +stuff 1 +come 1 +soon 1 +stai 1 +tune 1 +issu 1 +secur 1 +present 1 +studi 1 +logic 1 +analyz 1 +authenticationprotocol 1 +done 1 +method 1 +specif 1 +verif 1 +parallel 1 +amwork 1 +resourc 1 +communicatewith 1 +client 1 +mean 1 +queu 1 +messag 1 +draft 1 +addit 1 +student 1 +assist 1 +appli 1 +laboratori 1 +current 1 +investig 1 +us 1 +evolutionari 1 +comput 1 +techniqu 1 +genet 1 +algorithm 1 +finit 1 +state 1 +machin 1 +click 1 +resum 1 +avail 1 +html 1 +postscript 1 +format 1 +contact 1 +offic 1 +taylor 1 +hall 1 +phone 1 +email 1 +postal 1 +metric 1 +blvd 1 +austin 1 +last 1 +updat 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..4b3fc9bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,173 @@ +prof 9 +page 8 +austin 6 +system 6 +tong 5 +java 5 +comput 5 +univers 5 +chines 4 +mpeg 4 +china 4 +cours 4 +fall 4 +project 4 +unix 4 +friend 4 +wang 3 +time 3 +shanghai 3 +technolog 3 +work 3 +distribut 3 +network 3 +databas 3 +netscap 2 +recommend 2 +visit 2 +viewer 2 +demo 2 +nanj 2 +program 2 +format 2 +jiao 2 +tsinghua 2 +compani 2 +lucent 2 +spring 2 +misra 2 +theori 2 +implement 2 +perform 2 +associ 2 +anim 2 +home 1 +view 1 +listen 1 +contain 1 +applet 1 +written 1 +pure 1 +nank 1 +peopl 1 +republ 1 +current 1 +scienc 1 +depart 1 +texa 1 +seek 1 +full 1 +resum 1 +html 1 +click 1 +postscript 1 +univeristi 1 +beij 1 +jersei 1 +summerluc 1 +form 1 +result 1 +plan 1 +restructur 1 +bell 1 +laboratori 1 +thissumm 1 +anoth 1 +homepag 1 +life 1 +commun 1 +gouda 1 +zuckerman 1 +batori 1 +algorithm 1 +techniqu 1 +plexton 1 +graphic 1 +fussel 1 +multimedia 1 +teach 1 +assist 1 +introduct 1 +oper 1 +present 1 +fault 1 +toler 1 +clock 1 +synchron 1 +real 1 +april 1 +mobil 1 +host 1 +protocol 1 +mini 1 +manag 1 +design 1 +tool 1 +standard 1 +compon 1 +libari 1 +robot 1 +opengl 1 +glut 1 +decod 1 +player 1 +plai 1 +semest 1 +know 1 +troubl 1 +made 1 +music 1 +favorit 1 +mariah 1 +boyz 1 +babyfac 1 +movi 1 +sound 1 +clip 1 +sampl 1 +misc 1 +zodiac 1 +person 1 +differ 1 +kind 1 +find 1 +high 1 +school 1 +attach 1 +normal 1 +haiq 1 +maintain 1 +shenfeng 1 +chen 1 +thank 1 +quit 1 +learn 1 +info 1 +perl 1 +tutori 1 +reach 1 +lake 1 +blvd 1 +twang 1 +utexa 1 +still 1 +underconstruct 1 +check 1 +like 1 +never 1 +leav 1 +eagl 1 +copyright 1 +creat 1 +last 1 +modifi 1 +background 1 +song 1 +deskmat 1 +lang 1 +visitor 1 +accord 1 +counter 1 +sinc 1 +trust 1 +book 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..b684abf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,84 @@ +comput 3 +languag 3 +student 2 +machin 2 +utc 2 +natur 2 +learn 2 +group 2 +interest 2 +associ 2 +hermjakob 1 +home 1 +pageulf 1 +hermjakobhello 1 +welcom 1 +graduat 1 +thedept 1 +scienc 1 +univers 1 +texa 1 +austinand 1 +work 1 +dissert 1 +aboutexampl 1 +base 1 +decis 1 +make 1 +context 1 +orient 1 +pars 1 +translationund 1 +supervis 1 +prof 1 +raymond 1 +moonei 1 +activ 1 +acquisit 1 +groupand 1 +research 1 +place 1 +linguist 1 +signll 1 +special 1 +print 1 +archiv 1 +european 1 +search 1 +engin 1 +altavista 1 +einet 1 +galaxi 1 +infoseek 1 +lyco 1 +yahoo 1 +new 1 +thing 1 +consid 1 +dernir 1 +nouvel 1 +alsac 1 +deutsch 1 +well 1 +focu 1 +york 1 +time 1 +spiegel 1 +svenska 1 +dagbladet 1 +tagesspiegel 1 +vanguardia 1 +welt 1 +zeitplusacm 1 +austin 1 +weather 1 +resourc 1 +perman 1 +address 1 +moltkestr 1 +bnde 1 +germanyphon 1 +voic 1 +last 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..2e031d1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,38 @@ +texa 3 +austin 3 +sport 3 +lanc 2 +tokuda 2 +univers 2 +softwar 2 +research 2 +intramur 2 +schedul 2 +system 1 +gener 1 +group 1 +depart 1 +comput 1 +scienc 1 +unicron 1 +utexa 1 +twelv 1 +time 1 +champion 1 +directori 1 +entertain 1 +financ 1 +magic 1 +gather 1 +new 1 +organ 1 +peopl 1 +refer 1 +home 1 +offic 1 +taylor 1 +perman 1 +heeia 1 +street 1 +kaneoh 1 +hawaii 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..bc615672 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,29 @@ +utexa 2 +hall 2 +austin 2 +home 1 +page 1 +balayoghanv 1 +balayoghancontact 1 +informationemail 1 +eduoffic 1 +painter 1 +telephon 1 +postal 1 +address 1 +univers 1 +texa 1 +depart 1 +comput 1 +scienc 1 +taylor 1 +click 1 +send 1 +email 1 +finger 1 +account 1 +find 1 +whether 1 +log 1 +ineosdi 1 +bookmarksvbb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..1c5ede76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,173 @@ +multimedia 18 +comput 13 +system 12 +award 8 +scienc 6 +univers 5 +distribut 5 +network 5 +research 4 +page 4 +goyal 4 +texa 3 +chair 3 +intern 3 +confer 3 +program 3 +committe 3 +databas 3 +server 3 +proceed 3 +austin 3 +laboratori 2 +california 2 +diego 2 +career 2 +develop 2 +initi 2 +ieee 2 +area 2 +andnetwork 2 +eurograph 2 +workshop 2 +applic 2 +symposium 2 +object 2 +algorithm 2 +effici 2 +audio 2 +video 2 +disk 2 +oper 2 +commun 2 +sponsor 2 +harrick 1 +vinharrick 1 +assist 1 +professor 1 +atth 1 +ataustin 1 +director 1 +multimediacomput 1 +educ 1 +tech 1 +engin 1 +indian 1 +institut 1 +technolog 1 +bombai 1 +colorado 1 +state 1 +honor 1 +profession 1 +servic 1 +faculti 1 +supercomput 1 +center 1 +creativ 1 +innov 1 +editori 1 +board 1 +vice 1 +icdc 1 +member 1 +electronicimag 1 +beij 1 +china 1 +novemb 1 +kaohsiung 1 +taiwan 1 +decemb 1 +rostock 1 +germani 1 +second 1 +internationalconfer 1 +third 1 +multimediasystem 1 +interestmultimedia 1 +high 1 +speed 1 +mobilecomput 1 +summari 1 +main 1 +design 1 +implement 1 +anend 1 +architectur 1 +enabl 1 +wide 1 +rang 1 +ofdistribut 1 +specif 1 +integr 1 +file 1 +protocolsfor 1 +transmiss 1 +digit 1 +larg 1 +scale 1 +select 1 +recent 1 +public 1 +shenoi 1 +failur 1 +recoveri 1 +inmulti 1 +annualintern 1 +fault 1 +toler 1 +ftc 1 +pasadena 1 +june 1 +gemmel 1 +kandlur 1 +venkat 1 +rangan 1 +row 1 +storag 1 +tutori 1 +optim 1 +placement 1 +ofmultimedia 1 +arrai 1 +ieeeintern 1 +icmc 1 +washington 1 +determin 1 +delaybound 1 +heterogen 1 +thintern 1 +support 1 +fordigit 1 +nossdav 1 +durham 1 +hampshir 1 +april 1 +designingmultimedia 1 +march 1 +work 1 +variou 1 +industri 1 +federalinstitut 1 +includ 1 +intel 1 +nation 1 +foundationresearch 1 +nasa 1 +mitsubishi 1 +electricresearch 1 +merl 1 +microsystem 1 +electrospacesystem 1 +cours 1 +advanc 1 +contact 1 +inform 1 +email 1 +utexa 1 +phone 1 +mail 1 +address 1 +depart 1 +taylor 1 +hall 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..65280f40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,67 @@ +vipin 2 +home 2 +haven 2 +guestbook 2 +pleas 2 +austin 2 +number 2 +page 1 +found 1 +time 1 +thing 1 +shall 1 +updat 1 +soon 1 +yeah 1 +know 1 +color 1 +match 1 +pictur 1 +cours 1 +interest 1 +report 1 +activ 1 +resum 1 +give 1 +graduat 1 +student 1 +univers 1 +texa 1 +depart 1 +comput 1 +scienc 1 +undergraduatefrom 1 +delhi 1 +india 1 +academ 1 +interestscours 1 +work 1 +list 1 +less 1 +incomplet 1 +pass 1 +semest 1 +take 1 +risk 1 +put 1 +interestsreportsy 1 +visitor 1 +go 1 +reset 1 +increas 1 +never 1 +decreas 1 +contact 1 +medic 1 +art 1 +street 1 +log 1 +sure 1 +sign 1 +though 1 +mani 1 +guest 1 +comment 1 +suggest 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..e543c284 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,155 @@ +mathemat 4 +paper 3 +vladimir 2 +feel 2 +human 2 +univers 2 +petersburg 2 +russia 2 +note 2 +research 2 +new 2 +germani 2 +race 2 +black 2 +offic 2 +lifschitzwhen 1 +burden 1 +downcast 1 +mind 1 +gladli 1 +turn 1 +therealm 1 +lucid 1 +precis 1 +grasp 1 +object 1 +isobtain 1 +insight 1 +gain 1 +pleasantli 1 +appropri 1 +conceptform 1 +spirit 1 +home 1 +paul 1 +bernai 1 +lifschitzgottesman 1 +famili 1 +centenni 1 +professor 1 +incomput 1 +sciencesat 1 +texasat 1 +austin 1 +fellow 1 +theamerican 1 +associ 1 +forartifici 1 +intelligenceb 1 +branchof 1 +steklov 1 +institut 1 +area 1 +interesttempor 1 +reasoningand 1 +reason 1 +aboutactionslog 1 +programmingand 1 +nonmonoton 1 +reasoningteachingoth 1 +profession 1 +activitiespap 1 +line 1 +lectur 1 +survei 1 +lifschitz 1 +dissert 1 +bylifschitz 1 +studentsrecommend 1 +read 1 +edsger 1 +dijkstra 1 +convoc 1 +speechgood 1 +madelein 1 +albright 1 +nomin 1 +secretari 1 +state 1 +becam 1 +better 1 +place 1 +live 1 +regain 1 +book 1 +taken 1 +soviet 1 +armi 1 +daniel 1 +ortega 1 +lost 1 +need 1 +recycl 1 +helm 1 +burton 1 +actbad 1 +wang 1 +sentenc 1 +year 1 +prison 1 +tortur 1 +us 1 +polic 1 +democrat 1 +countri 1 +sequest 1 +import 1 +archeolog 1 +evid 1 +world 1 +close 1 +societynot 1 +problem 1 +america 1 +elect 1 +recent 1 +redrawn 1 +district 1 +california 1 +civil 1 +right 1 +initi 1 +ratio 1 +white 1 +finish 1 +high 1 +school 1 +admit 1 +student 1 +neutral 1 +basisoth 1 +amnesti 1 +intern 1 +scientist 1 +scienc 1 +favorit 1 +stori 1 +three 1 +silli 1 +joke 1 +quot 1 +monthcontact 1 +inform 1 +taylor 1 +hall 1 +phone 1 +number 1 +postal 1 +address 1 +depart 1 +comput 1 +sciencesunivers 1 +texa 1 +austinaustin 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..846362a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,56 @@ +comput 4 +algorithm 4 +univers 3 +texa 3 +austin 3 +parallel 3 +design 3 +scienc 2 +research 2 +evalu 2 +address 2 +vijaya 1 +ramachandranvijaya 1 +ramachandranblakemor 1 +regent 1 +professor 1 +princeton 1 +interestsmi 1 +interest 1 +theori 1 +primarilyin 1 +area 1 +includ 1 +analysi 1 +effici 1 +sequenti 1 +model 1 +machin 1 +experiment 1 +access 1 +copi 1 +recent 1 +paper 1 +mine 1 +complet 1 +list 1 +public 1 +avail 1 +vita 1 +offici 1 +faculti 1 +profil 1 +contact 1 +inform 1 +offic 1 +taylor 1 +hall 1 +email 1 +utexa 1 +postal 1 +depart 1 +number 1 +visit 1 +page 1 +sinc 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..1e360e74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,203 @@ +comput 19 +algorithm 15 +theori 12 +utexa 11 +mail 9 +list 9 +warm 8 +scienc 6 +univers 6 +group 5 +research 5 +symposium 5 +design 4 +complex 4 +parallel 4 +graph 4 +random 4 +sigact 4 +theoret 3 +foundat 3 +interest 3 +analysi 3 +plaxton 3 +vijaya 3 +ramachandran 3 +announc 3 +messag 3 +texa 3 +faculti 2 +includ 2 +effici 2 +differ 2 +time 2 +number 2 +greg 2 +sanjoi 2 +kelsen 2 +madhukar 2 +ramgop 2 +suel 2 +yuke 2 +electron 2 +activ 2 +send 2 +remov 2 +stand 2 +state 2 +meet 2 +locat 2 +sever 2 +distinguish 2 +austin 2 +dalla 2 +louisiana 2 +sent 2 +also 2 +ieee 2 +bibliographi 2 +grouput 1 +groupth 1 +focus 1 +current 1 +probabilist 1 +method 1 +major 1 +focu 1 +provabl 1 +solv 1 +fundament 1 +problem 1 +measur 1 +term 1 +resourc 1 +space 1 +processor 1 +bit 1 +combinator 1 +lower 1 +bound 1 +machin 1 +model 1 +david 1 +zuckerman 1 +walk 1 +cryptographi 1 +affili 1 +folk 1 +postdoc 1 +student 1 +alumni 1 +baruah 1 +emba 1 +tsan 1 +sheng 1 +tshsu 1 +sinica 1 +pierr 1 +korupolu 1 +phil 1 +mackenzi 1 +philmac 1 +idbsu 1 +mettu 1 +poon 1 +ckpoon 1 +rajmohan 1 +rajaraman 1 +rraj 1 +santanu 1 +sinha 1 +ssinha 1 +torsten 1 +berkelei 1 +zhou 1 +relat 1 +seminar 1 +post 1 +lowvolum 1 +typic 1 +dozen 1 +semest 1 +express 1 +ad 1 +name 1 +request 1 +gripe 1 +workshop 1 +themidsouth 1 +midsouthwest 1 +forum 1 +surround 1 +twice 1 +year 1 +consist 1 +talk 1 +region 1 +recent 1 +result 1 +often 1 +keynot 1 +speaker 1 +first 1 +organ 1 +atut 1 +spring 1 +organizedanoth 1 +fall 1 +held 1 +southern 1 +methodist 1 +north 1 +southwestern 1 +oklahoma 1 +next 1 +schedul 1 +beheld 1 +novemb 1 +program 1 +algorithmsmail 1 +becom 1 +avail 1 +usuallytri 1 +pool 1 +attend 1 +take 1 +place 1 +outsid 1 +ofaustin 1 +regard 1 +arrang 1 +special 1 +algorithmsand 1 +thatinclud 1 +mani 1 +scientist 1 +sponsorsth 1 +stoc 1 +sponsor 1 +siam 1 +discret 1 +soda 1 +andarchitectur 1 +spaa 1 +import 1 +confer 1 +interestar 1 +foc 1 +serv 1 +elect 1 +member 1 +thesigact 1 +execut 1 +committe 1 +us 1 +pointer 1 +calendar 1 +eccc 1 +colloquium 1 +virtual 1 +rolodex 1 +hypertext 1 +project 1 +dept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..4510cd09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,7 @@ +srinivasan 2 +vaidyaraman 2 +offic 2 +phone 2 +email 1 +utexa 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..490338f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,45 @@ +vurgun 2 +comput 2 +scienc 2 +interest 2 +austin 2 +sengul 1 +sengulvurgun 1 +background 1 +interestsi 1 +student 1 +depart 1 +ammainli 1 +artifici 1 +intellig 1 +neural 1 +network 1 +evolutionaryalgorithm 1 +term 1 +paper 1 +topic 1 +order 1 +ofprefer 1 +memori 1 +represent 1 +knowledg 1 +howto 1 +retriev 1 +learn 1 +theori 1 +problem 1 +solv 1 +cognit 1 +skillacquisit 1 +search 1 +understand 1 +visual 1 +attent 1 +connectionist 1 +approach 1 +architectur 1 +mindto 1 +contact 1 +mepost 1 +usavoic 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..cb6b8a27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,10 @@ +page 2 +walbourn 1 +home 1 +chuck 1 +walbournmi 1 +person 1 +locat 1 +charybdi 1 +enterpris 1 +server 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..01536db3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,50 @@ +walker 5 +comput 4 +scienc 4 +depart 3 +colleg 3 +henri 2 +mackai 2 +visit 2 +univers 2 +texa 2 +austin 2 +professor 2 +mathemat 2 +grinnel 2 +math 2 +grin 2 +septemb 2 +senior 1 +lectur 1 +edua 1 +regular 1 +tenur 1 +member 1 +faculti 1 +professorwalk 1 +period 1 +teachand 1 +variou 1 +profession 1 +activ 1 +formal 1 +appoint 1 +follow 1 +academ 1 +year 1 +summer 1 +fall 1 +complet 1 +inform 1 +avail 1 +home 1 +page 1 +atgrinnel 1 +http 1 +creat 1 +last 1 +revis 1 +photograph 1 +jack 1 +robertson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..3fafba2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,61 @@ +warshaw 4 +austin 4 +rule 4 +research 3 +venu 3 +mirank 3 +home 2 +comput 2 +scienc 2 +graduat 2 +univers 2 +texa 2 +base 2 +appli 2 +case 2 +studi 2 +declar 2 +basi 2 +modul 2 +page 1 +lane 1 +warshawlan 1 +mike 1 +cool 1 +senior 1 +student 1 +recent 1 +accept 1 +school 1 +work 1 +area 1 +activ 1 +databas 1 +system 1 +current 1 +posit 1 +laboratoryinvolv 1 +maintain 1 +languag 1 +developedat 1 +andat 1 +laboratori 1 +lanc 1 +obermey 1 +first 1 +item 1 +anoth 1 +third 1 +follow 1 +list 1 +paper 1 +unpublish 1 +confer 1 +inform 1 +knowledg 1 +manag 1 +contact 1 +mepost 1 +usavoic 1 +arlut 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..6d7f6f84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,49 @@ +austin 5 +utexa 3 +chen 2 +comput 2 +scienc 2 +texa 2 +mathemat 2 +univers 2 +wchen 2 +java 2 +rosett 2 +program 2 +master 1 +student 1 +dept 1 +decemb 1 +august 1 +fudan 1 +china 1 +juli 1 +offic 1 +phone 1 +email 1 +math 1 +mail 1 +address 1 +center 1 +numer 1 +analysi 1 +us 1 +link 1 +unix 1 +book 1 +expect 1 +perl 1 +site 1 +demo 1 +refer 1 +manual 1 +exampl 1 +common 1 +gatewai 1 +interfac 1 +sampl 1 +pleas 1 +click 1 +load 1 +file 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..da6afeef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,48 @@ +austin 3 +depart 3 +comput 3 +research 3 +program 3 +page 2 +paul 2 +wilson 2 +taylor 2 +hall 2 +univers 2 +texa 2 +scienc 2 +oop 2 +group 2 +languag 2 +home 1 +contact 1 +inform 1 +offic 1 +postal 1 +address 1 +best 1 +reach 1 +email 1 +ltwilson 1 +utexa 1 +usual 1 +headshot 1 +novelti 1 +thought 1 +cross 1 +section 1 +informationi 1 +lead 1 +object 1 +orient 1 +system 1 +workson 1 +memori 1 +manag 1 +design 1 +implement 1 +teachingin 1 +fall 1 +teach 1 +sciencesnot 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..182389a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,137 @@ +christian 7 +holi 5 +jesu 4 +also 4 +becom 4 +sin 4 +christ 3 +thought 3 +becam 2 +life 2 +though 2 +time 2 +good 2 +think 2 +wrong 2 +true 2 +howev 2 +abl 2 +faith 2 +strength 2 +know 2 +seek 2 +arthur 1 +homepag 1 +cent 1 +christiani 1 +made 1 +major 1 +decis 1 +believ 1 +alittl 1 +month 1 +come 1 +totallyunexpect 1 +religi 1 +studi 1 +compulsori 1 +class 1 +thechristian 1 +high 1 +school 1 +attend 1 +hong 1 +kong 1 +intent 1 +tobecom 1 +record 1 +slife 1 +bibl 1 +quit 1 +credibl 1 +teach 1 +take 1 +deepli 1 +relat 1 +mylif 1 +misconcept 1 +mean 1 +christianwa 1 +clear 1 +came 1 +past 1 +differ 1 +christianand 1 +lovedeveri 1 +matter 1 +whether 1 +decid 1 +achristian 1 +us 1 +shouldb 1 +virtuou 1 +enough 1 +listen 1 +friend 1 +church 1 +thefellowship 1 +realiz 1 +thing 1 +born 1 +theywil 1 +stumbl 1 +flesh 1 +hei 1 +sinless 1 +sympath 1 +weak 1 +weconfess 1 +forgiv 1 +andto 1 +cleans 1 +unright 1 +john 1 +differencebetween 1 +peopl 1 +trust 1 +astheir 1 +saviour 1 +gratefulli 1 +accept 1 +redempt 1 +fortheir 1 +therefor 1 +justifi 1 +without 1 +deed 1 +ofth 1 +roman 1 +doubt 1 +live 1 +wedo 1 +thecontrari 1 +reli 1 +givesu 1 +said 1 +whole 1 +need 1 +nota 1 +physician 1 +sick 1 +matthew 1 +count 1 +onour 1 +number 1 +dai 1 +ought 1 +thetruth 1 +earli 1 +hesit 1 +start 1 +thankgod 1 +lead 1 +give 1 +opportun 1 +realli 1 +wkmak 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..2ce2e4c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,148 @@ +robot 21 +research 8 +interest 8 +page 8 +work 6 +group 6 +austin 5 +badminton 4 +utexa 4 +topic 3 +comment 3 +explor 3 +learn 3 +intellig 3 +rang 3 +reason 3 +system 3 +home 3 +built 3 +onlin 3 +internet 3 +resourc 3 +pictur 2 +educ 2 +public 2 +hotlist 2 +send 2 +contact 2 +inform 2 +mobil 2 +navig 2 +control 2 +spatial 2 +semant 2 +hierarchi 2 +engin 2 +machin 2 +network 2 +qualit 2 +interfac 2 +world 2 +ring 2 +ultrason 2 +sensor 2 +rhino 2 +manipul 2 +robokreta 2 +car 2 +email 2 +wyle 2 +member 2 +guitar 2 +clarinet 2 +martial 2 +art 2 +phone 2 +texa 2 +hello 1 +visitor 1 +number 1 +person 1 +miscellan 1 +worki 1 +primarili 1 +approach 1 +mobilerobot 1 +softwar 1 +develop 1 +area 1 +artifici 1 +includ 1 +neural 1 +vision 1 +oper 1 +embed 1 +graphic 1 +user 1 +multimedia 1 +spot 1 +real 1 +rover 1 +tall 1 +commerci 1 +robocac 1 +worm 1 +specif 1 +robofest 1 +organ 1 +besar 1 +kicik 1 +autonom 1 +us 1 +chassi 1 +motor 1 +fast 1 +remot 1 +race 1 +paper 1 +andqualit 1 +avail 1 +doctor 1 +dissert 1 +titl 1 +fora 1 +physic 1 +also 1 +offici 1 +start 1 +point 1 +technolog 1 +wide 1 +catalog 1 +usenet 1 +frequent 1 +ask 1 +question 1 +meta 1 +index 1 +ncsa 1 +malaysia 1 +homepag 1 +yahoo 1 +note 1 +common 1 +pleas 1 +eduperson 1 +interestsavid 1 +player 1 +unit 1 +state 1 +associ 1 +usba 1 +love 1 +plai 1 +miscellaneousinterest 1 +well 1 +movi 1 +offic 1 +taylor 1 +hall 1 +mail 1 +comput 1 +scienc 1 +depart 1 +univers 1 +finger 1 +back 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..eb6925df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,8 @@ +address 2 +phone 2 +contact 1 +xfeng 1 +utexa 1 +qaustin 1 +west 1 +austin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..83dcf4d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,92 @@ +page 4 +pictur 3 +search 3 +xingang 2 +home 2 +travel 2 +austin 2 +student 2 +comput 2 +work 2 +surpris 2 +well 2 +interest 2 +time 2 +creat 2 +photographi 1 +classic 1 +music 1 +audio 1 +sport 1 +visual 1 +welcom 1 +hang 1 +around 1 +univers 1 +texa 1 +depart 1 +scienc 1 +distribut 1 +multimedia 1 +head 1 +harrick 1 +year 1 +graduat 1 +program 1 +rank 1 +nation 1 +delight 1 +realli 1 +pretti 1 +hard 1 +take 1 +aswel 1 +stuff 1 +line 1 +soon 1 +right 1 +temporaryresort 1 +imagin 1 +hopefulli 1 +goe 1 +llgradual 1 +walk 1 +paper 1 +present 1 +clearer 1 +imag 1 +link 1 +find 1 +feel 1 +havesometh 1 +watch 1 +frequent 1 +access 1 +pointer 1 +serious 1 +foliag 1 +marvel 1 +engin 1 +alta 1 +vista 1 +string 1 +infoseek 1 +keyword 1 +miata 1 +club 1 +unit 1 +morn 1 +newspap 1 +american 1 +express 1 +financi 1 +card 1 +york 1 +atlant 1 +monthli 1 +china 1 +soccer 1 +major 1 +leagu 1 +group 1 +xguo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..8468f755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,4 @@ +geoffrei 1 +home 1 +pagemov 1 +address 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..4ad17eaa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,15 @@ +frame 3 +yang 2 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..963982ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,87 @@ +austin 3 +utc 3 +guitar 3 +corner 2 +home 2 +classic 2 +david 1 +wwwdavid 1 +hello 1 +whatev 1 +took 1 +welcom 1 +make 1 +establish 1 +contact 1 +street 1 +depart 1 +comput 1 +scienc 1 +univers 1 +texa 1 +internet 1 +utexa 1 +get 1 +busi 1 +research 1 +outsid 1 +work 1 +still 1 +seriou 1 +favorit 1 +quot 1 +word 1 +hope 1 +daili 1 +medit 1 +stuff 1 +concert 1 +byth 1 +assad 1 +brothersin 1 +hong 1 +kong 1 +art 1 +festiv 1 +must 1 +first 1 +introduc 1 +beauti 1 +christoph 1 +parkeningi 1 +guitarist 1 +interest 1 +life 1 +stori 1 +tell 1 +grew 1 +tire 1 +ofconcert 1 +retir 1 +reconcili 1 +jesu 1 +christ 1 +rekindl 1 +passion 1 +also 1 +theamsterdam 1 +trio 1 +french 1 +rich 1 +artist 1 +flair 1 +like 1 +nation 1 +footbal 1 +team 1 +michel 1 +platini 1 +label 1 +franc 1 +magazinepublish 1 +minist 1 +align 1 +absmiddl 1 +sinc 1 +sept 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..fd702c6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,15 @@ +frame 3 +yanbin 1 +alert 1 +see 1 +messag 1 +us 1 +challeng 1 +browser 1 +click 1 +version 1 +document 1 +could 1 +download 1 +netscap 1 +navig 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..29ac7163 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,9 @@ +yuan 1 +home 1 +page 1 +oop 1 +us 1 +browser 1 +pleas 1 +click 1 +continu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..bb413796 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,64 @@ +yong 4 +univers 3 +beij 2 +china 2 +program 2 +austin 2 +unit 2 +servic 2 +homepageto 1 +homepagey 1 +number 1 +visitor 1 +sinc 1 +current 1 +comput 1 +scienc 1 +texa 1 +settl 1 +stai 1 +mathemat 1 +graduat 1 +rutger 1 +brunswick 1 +year 1 +jersei 1 +beauti 1 +place 1 +wife 1 +tsinghua 1 +milanitalian 1 +soccerk 1 +soccernba 1 +sitefox 1 +sportschicago 1 +bullsmichael 1 +jordannflnhlc 1 +rankingmarri 1 +childrenseinfeldcomput 1 +sciencesutilitieshtml 1 +convertersimag 1 +collectionssystemshtmllatexcgitcl 1 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 1 +tmiscinternet 1 +travel 1 +network 1 +parcel 1 +state 1 +postal 1 +usp 1 +fedexus 1 +guidefun 1 +todayu 1 +newsstarwavesupermodel 1 +contact 1 +river 1 +street 1 +finger 1 +yonglu 1 +utexa 1 +page 1 +heavi 1 +construct 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..76d1518a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,138 @@ +young 6 +linear 6 +iter 6 +method 6 +system 6 +comput 5 +larg 5 +parallel 5 +scienc 4 +mathemat 4 +associ 4 +research 4 +professor 3 +numer 3 +algebra 3 +applic 3 +equat 3 +solut 3 +kincaid 3 +spars 3 +david 2 +analysi 2 +univers 2 +award 2 +american 2 +journal 2 +appli 2 +matric 2 +solv 2 +sever 2 +search 2 +carei 2 +crai 2 +profil 2 +youngashbel 1 +smith 1 +ofmathemat 1 +director 1 +center 1 +webb 1 +institut 1 +naval 1 +architectur 1 +harvard 1 +honor 1 +profession 1 +servic 1 +fellow 1 +advanc 1 +outstand 1 +contribut 1 +special 1 +issueded 1 +chair 1 +committe 1 +mathematicalsocieti 1 +board 1 +truste 1 +argonn 1 +editor 1 +siam 1 +matrixappl 1 +area 1 +interestnumer 1 +partial 1 +differenti 1 +numericallinear 1 +summari 1 +researchmi 1 +activ 1 +focus 1 +partialdifferenti 1 +base 1 +finit 1 +differ 1 +methodsand 1 +oflinear 1 +involv 1 +andspars 1 +softwar 1 +packag 1 +develop 1 +basedon 1 +part 1 +itpack 1 +project 1 +beingextend 1 +includ 1 +suitabl 1 +share 1 +memori 1 +distributedmemori 1 +rapidli 1 +converg 1 +methodsbas 1 +multilevel 1 +procedur 1 +also 1 +beingdevelop 1 +select 1 +recent 1 +publicationsd 1 +stationari 1 +second 1 +degre 1 +topic 1 +polynomi 1 +variabl 1 +rassia 1 +srivasiava 1 +yanushauska 1 +world 1 +scientif 1 +publ 1 +compani 1 +singapor 1 +vona 1 +ration 1 +omega 1 +academ 1 +press 1 +sepehrnoori 1 +vector 1 +pde 1 +engin 1 +minneapoli 1 +high 1 +level 1 +solver 1 +supercomput 1 +algorithm 1 +graham 1 +john 1 +wilei 1 +son 1 +previou 1 +index 1 +next 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..71812c11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,125 @@ +yoonsuck 7 +choe 7 +austin 7 +comput 6 +scienc 6 +univers 6 +texa 5 +research 5 +organ 5 +miikkulainen 5 +group 5 +later 5 +self 4 +lissom 4 +joseph 4 +sirosh 4 +risto 4 +spike 4 +model 3 +structur 3 +neural 3 +digit 3 +recognit 3 +interconnect 3 +utc 3 +object 3 +public 3 +depart 3 +august 2 +network 2 +work 2 +hand 2 +written 2 +featur 2 +prof 2 +segment 2 +repres 2 +book 2 +relat 2 +interact 2 +cortex 2 +function 2 +page 2 +report 2 +inform 2 +yschoe 2 +utexa 2 +home 2 +photo 1 +ad 1 +student 1 +dept 1 +decemb 1 +yonsei 1 +seoul 1 +korea 1 +welcom 1 +homepag 1 +visitor 1 +sinc 1 +interest 1 +cortic 1 +current 1 +systembas 1 +laterali 1 +synerget 1 +develop 1 +recent 1 +includ 1 +extend 1 +actualspik 1 +event 1 +call 1 +slissom 1 +beself 1 +multipl 1 +retinabi 1 +synchron 1 +within 1 +desynchron 1 +differ 1 +outlin 1 +also 1 +check 1 +html 1 +edit 1 +comment 1 +connect 1 +neuron 1 +technic 1 +septemb 1 +electron 1 +isbn 1 +map 1 +appear 1 +touretzki 1 +mozer 1 +hasselmo 1 +editor 1 +advanc 1 +process 1 +system 1 +cambridg 1 +press 1 +handwritten 1 +techic 1 +master 1 +thesi 1 +bunch 1 +link 1 +total 1 +unord 1 +click 1 +find 1 +interestingcontact 1 +offic 1 +phone 1 +email 1 +mail 1 +address 1 +maintain 1 +last 1 +updat 1 +newsgroup 1 +summari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..1e519629 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,76 @@ +china 4 +scienc 3 +austin 3 +famili 2 +texa 2 +manchest 2 +java 2 +yuanj 1 +xuwint 1 +break 1 +updat 1 +addresspictur 1 +depart 1 +comput 1 +univers 1 +spring 1 +time 1 +tabl 1 +select 1 +cours 1 +schedulec 1 +object 1 +orient 1 +softwar 1 +engr 1 +brown 1 +logic 1 +synthesi 1 +aziz 1 +client 1 +server 1 +system 1 +develop 1 +gang 1 +previou 1 +semest 1 +pagechines 1 +student 1 +associ 1 +alumni 1 +page 1 +work 1 +studi 1 +universityof 1 +technolog 1 +hefei 1 +institut 1 +mathemat 1 +chines 1 +academi 1 +beij 1 +chinaunivers 1 +munich 1 +atmunich 1 +germanyunivers 1 +prof 1 +nick 1 +higham 1 +wang 1 +lifan 1 +hong 1 +chen 1 +guizhongustc 1 +yuan 1 +hailiang 1 +yang 1 +yuhongfriend 1 +linsoftwar 1 +program 1 +perl 1 +common 1 +gatewai 1 +interfac 1 +link 1 +yahoo 1 +publish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..4a5b34c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,11 @@ +feng 2 +visitor 1 +number 1 +sinc 1 +yufeng 1 +utexa 1 +edufing 1 +public 1 +ring 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..7eee2286 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,68 @@ +china 3 +univers 3 +austin 3 +zhii 2 +chen 2 +guangzhou 2 +comput 2 +resum 2 +zhongshan 2 +chines 2 +fall 2 +spring 2 +associ 2 +anim 2 +page 2 +home 1 +pagezhii 1 +chenabout 1 +mefrom 1 +canton 1 +peopl 1 +republ 1 +current 1 +master 1 +program 1 +scienc 1 +depart 1 +texa 1 +seek 1 +full 1 +time 1 +click 1 +postcript 1 +format 1 +pleas 1 +view 1 +life 1 +calculu 1 +architectur 1 +misc 1 +zodiac 1 +person 1 +differ 1 +kind 1 +find 1 +friend 1 +maintain 1 +john 1 +dong 1 +thank 1 +els 1 +world 1 +wide 1 +info 1 +contact 1 +burton 1 +zchen 1 +utexa 1 +still 1 +construct 1 +copyright 1 +creat 1 +last 1 +modifi 1 +visitor 1 +accord 1 +counter 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..caca1896 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,94 @@ +comput 5 +austin 4 +home 3 +page 3 +univers 3 +china 3 +campu 3 +world 3 +magazin 3 +zhouxiao 3 +utexa 3 +maggi 2 +xiao 2 +zhou 2 +depart 2 +texa 2 +work 2 +fall 2 +onlin 2 +internet 2 +offic 2 +educ 1 +assistantship 1 +graduat 1 +student 1 +teach 1 +assist 1 +databas 1 +manag 1 +sciencesat 1 +buaa 1 +beij 1 +life 1 +current 1 +multimedia 1 +system 1 +algorithm 1 +data 1 +commun 1 +network 1 +distribut 1 +process 1 +spring 1 +look 1 +around 1 +kaleidoscop 1 +land 1 +beauti 1 +visit 1 +peopl 1 +daili 1 +new 1 +digest 1 +zhai 1 +chines 1 +newspag 1 +time 1 +entertain 1 +movi 1 +stamp 1 +societi 1 +ieee 1 +giant 1 +career 1 +center 1 +compani 1 +search 1 +yahoo 1 +galaxi 1 +lyco 1 +directori 1 +guid 1 +html 1 +script 1 +librari 1 +contact 1 +inform 1 +mail 1 +http 1 +user 1 +main 1 +build 1 +room 1 +phone 1 +address 1 +scienc 1 +taylor 1 +last 1 +modifi 1 +sept 1 +comment 1 +welcom 1 +send 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..07ac6199 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,82 @@ +dynam 4 +document 4 +friend 3 +univers 3 +texa 3 +austin 3 +china 3 +site 3 +homepag 2 +know 2 +student 2 +depart 2 +comput 2 +peke 2 +former 2 +classmat 2 +netscap 2 +clike 2 +qing 1 +qinghi 1 +welcom 1 +graduat 1 +inth 1 +scinc 1 +born 1 +beij 1 +capit 1 +citi 1 +bachelor 1 +degre 1 +meet 1 +ofmi 1 +findmor 1 +peopl 1 +pekingunivers 1 +alumni 1 +home 1 +page 1 +oversea 1 +html 1 +enjoi 1 +live 1 +peek 1 +follow 1 +like 1 +well 1 +find 1 +lot 1 +valuabl 1 +informationand 1 +professionalinternetpc 1 +relatedmac 1 +relatedaft 1 +worknetscap 1 +testtwin 1 +eldertwin 1 +youngernetscap 1 +testanim 1 +danc 1 +titledanc 1 +titl 1 +testanoth 1 +testyet 1 +anoth 1 +testfriendsthi 1 +travel 1 +maintain 1 +xiaohai 1 +best 1 +shan 1 +shinan 1 +visitor 1 +number 1 +sinc 1 +octob 1 +construct 1 +last 1 +modifi 1 +qingunivers 1 +sciencesaustin 1 +zhuqe 1 +utexa 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..3935ccb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,63 @@ +washington 3 +student 3 +faculti 2 +program 2 +inform 2 +region 2 +research 2 +univers 2 +univ 1 +comput 1 +scienc 1 +organizationsinclud 1 +staff 1 +visitor 1 +organ 1 +ouraffili 1 +graduat 1 +regioninclud 1 +local 1 +desktop 1 +refer 1 +link 1 +elsewher 1 +spotlightuwwin 1 +pacif 1 +intern 1 +programmingcontesttwovideo 1 +highlight 1 +educ 1 +initiativesourcolloquia 1 +live 1 +mbonemajordon 1 +intel 1 +corporationdickkarp 1 +receiv 1 +nation 1 +medal 1 +scienceprofessionalmast 1 +applic 1 +deadlin 1 +autumn 1 +departmentoverview 1 +theimpact 1 +perspect 1 +staffposit 1 +avail 1 +half 1 +centuri 1 +exponenti 1 +progress 1 +technolog 1 +page 1 +peopl 1 +cours 1 +laboratori 1 +newscan 1 +handl 1 +tabl 1 +click 1 +seattl 1 +voic 1 +comment 1 +webmast 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..0baaaca4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,57 @@ +berman 4 +andrew 3 +postscript 2 +format 2 +debbi 2 +home 1 +pageandrew 1 +aberman 1 +washington 1 +educomput 1 +scienc 1 +bourassa 1 +virgil 1 +selberg 1 +erik 1 +tron 1 +process 1 +specif 1 +file 1 +protect 1 +unix 1 +oper 1 +system 1 +bothpostscript 1 +andhtml 1 +proceed 1 +winter 1 +usenix 1 +confer 1 +data 1 +structur 1 +fast 1 +approxim 1 +match 1 +shapiro 1 +linda 1 +effici 1 +imag 1 +retriev 1 +multipl 1 +distanc 1 +measur 1 +avail 1 +appear 1 +spie 1 +special 1 +link 1 +wife 1 +beauti 1 +daughter 1 +melani 1 +miscellan 1 +poison 1 +donut 1 +stupid 1 +stupidmi 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..fcacb4d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,107 @@ +adam 3 +washington 3 +softwar 3 +imag 3 +finkelstein 2 +depart 2 +comput 2 +scienc 2 +univers 2 +seattl 2 +coffe 2 +student 2 +recent 2 +photocopi 2 +visit 2 +made 2 +start 1 +drink 1 +cup 1 +hair 1 +limp 1 +andlack 1 +bodi 1 +year 1 +live 1 +good 1 +life 1 +graduat 1 +final 1 +finish 1 +doctor 1 +graphic 1 +fall 1 +quarter 1 +post 1 +earli 1 +join 1 +thecomput 1 +atprinceton 1 +engin 1 +tibco 1 +formerli 1 +teknekron 1 +system 1 +palo 1 +alto 1 +wrote 1 +peopl 1 +trade 1 +stock 1 +undergradu 1 +swarthmor 1 +colleg 1 +class 1 +studi 1 +physic 1 +occasion 1 +research 1 +project 1 +find 1 +specif 1 +alarg 1 +databas 1 +sinc 1 +work 1 +someth 1 +call 1 +multiresolut 1 +video 1 +photo 1 +plai 1 +ultim 1 +frisbe 1 +team 1 +calledumatata 1 +address 1 +phone 1 +number 1 +look 1 +plan 1 +file 1 +across 1 +thehilari 1 +menu 1 +least 1 +hous 1 +caff 1 +lardo 1 +chilli 1 +night 1 +snoqualmi 1 +pass 1 +excel 1 +view 1 +comet 1 +hyakutak 1 +great 1 +pictur 1 +taken 1 +friend 1 +marcu 1 +cool 1 +glass 1 +sculptur 1 +dither 1 +mona 1 +gothic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..a097b1ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,146 @@ +data 13 +parallel 12 +load 9 +jame 7 +ahren 7 +balanc 7 +comput 6 +system 6 +render 6 +visual 5 +databas 5 +vision 5 +algorithm 5 +polygon 5 +scientif 4 +cost 4 +perform 4 +dataset 4 +redistribut 4 +paper 4 +model 4 +base 4 +univers 3 +washington 3 +manag 3 +charl 3 +hansen 3 +effect 3 +present 3 +experi 3 +fast 3 +home 2 +page 2 +address 2 +scienc 2 +engin 2 +depart 2 +research 2 +project 2 +laboratori 2 +process 2 +improv 2 +onunbalanc 2 +design 2 +describ 2 +method 2 +machin 2 +shade 2 +applic 2 +set 2 +seattl 1 +email 1 +phone 1 +interest 1 +distribut 1 +environ 1 +alamo 1 +nation 1 +public 1 +intern 1 +confer 1 +august 1 +typic 1 +program 1 +degrad 1 +unnecessari 1 +occur 1 +whichperform 1 +possibl 1 +save 1 +outweighth 1 +polygonrender 1 +show 1 +factor 1 +loss 1 +percent 1 +onbalanc 1 +us 1 +linda 1 +shapiro 1 +steven 1 +tanimoto 1 +brinklei 1 +jakobovit 1 +lara 1 +lewi 1 +proceed 1 +second 1 +workshop 1 +februari 1 +gener 1 +motiv 1 +intend 1 +provid 1 +unifi 1 +highli 1 +graphic 1 +user 1 +interfac 1 +advanc 1 +queri 1 +facil 1 +interact 1 +notebook 1 +aid 1 +experiment 1 +promot 1 +share 1 +commun 1 +frank 1 +ortega 1 +supercomput 1 +novemb 1 +massiv 1 +simpl 1 +target 1 +requir 1 +extrem 1 +larg 1 +found 1 +mani 1 +handl 1 +arbitrarili 1 +complex 1 +need 1 +mesh 1 +issu 1 +involv 1 +toolkit 1 +enabl 1 +scientist 1 +displai 1 +directli 1 +avoid 1 +transmiss 1 +huge 1 +amount 1 +post 1 +ofwashington 1 +april 1 +longer 1 +version 1 +icpp 1 +also 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..9a4ff407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,100 @@ +vision 9 +databas 7 +research 6 +visual 5 +environ 4 +linda 4 +shapiro 4 +steven 4 +tanimoto 4 +base 3 +comput 3 +imag 3 +jame 3 +ahren 3 +jakobovit 3 +lara 3 +lewi 3 +februari 3 +overview 2 +scientif 2 +system 2 +model 2 +graphic 2 +interfac 2 +data 2 +devr 2 +entiti 2 +relat 2 +multi 2 +level 2 +queri 2 +experi 2 +manag 2 +brinklei 2 +spie 2 +symposium 2 +electron 2 +technolog 2 +wasdesign 1 +gener 1 +motiv 1 +andintend 1 +provid 1 +unifieddata 1 +highli 1 +user 1 +advanc 1 +queryfacil 1 +interact 1 +laboratori 1 +notebook 1 +databaseenviron 1 +aid 1 +experiment 1 +andpromot 1 +share 1 +commun 1 +store 1 +hierarch 1 +datastructur 1 +schema 1 +contain 1 +name 1 +ofproperti 1 +part 1 +attribut 1 +among 1 +thepart 1 +definit 1 +describ 1 +buildinst 1 +specif 1 +studi 1 +mani 1 +differ 1 +topic 1 +includ 1 +peopl 1 +princip 1 +investig 1 +graduat 1 +student 1 +public 1 +inmodel 1 +proceed 1 +secondcad 1 +workshop 1 +present 1 +project 1 +flexibledata 1 +organ 1 +support 1 +databasesystem 1 +scienceand 1 +implement 1 +scienc 1 +email 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..e72d8186 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,77 @@ +comput 5 +link 4 +page 3 +scienc 3 +graduat 3 +washington 2 +school 2 +program 2 +outdoor 2 +home 2 +great 2 +women 2 +educ 2 +inform 2 +mentorship 2 +project 2 +bernheim 1 +start 1 +univers 1 +still 1 +construct 1 +william 1 +colleg 1 +math 1 +front 1 +plai 1 +ultim 1 +frisbe 1 +autumn 1 +quarter 1 +class 1 +programminglanguag 1 +automata 1 +principl 1 +ofdigit 1 +system 1 +design 1 +graphic 1 +seminar 1 +parallel 1 +environ 1 +relat 1 +nation 1 +park 1 +gorp 1 +guideto 1 +recreationfun 1 +christian 1 +scott 1 +interact 1 +list 1 +abig 1 +pile 1 +cool 1 +blast 1 +past 1 +scoobi 1 +dooeduc 1 +refer 1 +undergrad 1 +peterson 1 +center 1 +sourc 1 +opportun 1 +distribut 1 +allow 1 +undergradu 1 +spend 1 +summerwork 1 +research 1 +femal 1 +mentor 1 +experi 1 +highlyrecommend 1 +back 1 +pagelast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..a133650e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,128 @@ +comput 4 +proof 4 +algorithm 3 +softwar 3 +scientist 3 +amir 2 +design 2 +master 2 +wai 2 +teach 2 +mathematician 2 +motiv 2 +error 2 +like 2 +make 2 +defici 2 +michail 1 +michailgradu 1 +studenti 1 +second 1 +year 1 +graduat 1 +student 1 +depart 1 +scienceat 1 +univers 1 +washington 1 +research 1 +interest 1 +includ 1 +followingarea 1 +engin 1 +educ 1 +degre 1 +universityof 1 +toronto 1 +thesi 1 +optim 1 +broadcast 1 +summationfor 1 +hierarch 1 +ring 1 +architectur 1 +shift 1 +click 1 +hereto 1 +obtain 1 +compress 1 +postscript 1 +file 1 +recent 1 +experi 1 +particular 1 +built 1 +opsi 1 +java 1 +appletdesign 1 +balanc 1 +binari 1 +tree 1 +combinesprogram 1 +anim 1 +final 1 +lunar 1 +lander 1 +style 1 +gamethat 1 +wrote 1 +part 1 +undergradu 1 +graphic 1 +cours 1 +quotat 1 +tend 1 +conserv 1 +mani 1 +unwillingto 1 +consid 1 +might 1 +better 1 +write 1 +told 1 +embarrass 1 +learn 1 +publishedincorrect 1 +theorem 1 +avoid 1 +believ 1 +theywil 1 +structur 1 +persuad 1 +will 1 +explor 1 +unconvent 1 +proofstyl 1 +unfortun 1 +found 1 +care 1 +whether 1 +theyhav 1 +publish 1 +incorrect 1 +result 1 +often 1 +seem 1 +glad 1 +wasnot 1 +caught 1 +refere 1 +sinc 1 +would 1 +meant 1 +fewer 1 +public 1 +fear 1 +stylethat 1 +reveal 1 +mistak 1 +lesli 1 +lamport 1 +construct 1 +wayi 1 +simpl 1 +obvious 1 +theother 1 +complic 1 +obviou 1 +hoar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..6390c40f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,62 @@ +scienc 5 +richard 3 +anderson 3 +comput 3 +washington 3 +research 3 +institut 3 +project 3 +professor 2 +univers 2 +year 2 +visit 2 +algorithm 2 +work 2 +travel 2 +home 1 +page 1 +associ 1 +graduat 1 +inmathemat 1 +reed 1 +colleg 1 +stanfordin 1 +join 1 +aon 1 +postdoc 1 +mathemat 1 +inberkelei 1 +receiv 1 +presidenti 1 +younginvestig 1 +award 1 +spent 1 +academ 1 +yeara 1 +indian 1 +bangalor 1 +india 1 +main 1 +interest 1 +theori 1 +implementationof 1 +includ 1 +parallel 1 +geometri 1 +scientif 1 +applic 1 +engin 1 +depart 1 +seattl 1 +teach 1 +paper 1 +progress 1 +qualifi 1 +evalu 1 +note 1 +theindian 1 +resum 1 +tourist 1 +pictur 1 +recent 1 +talksanderson 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..f21f9edf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,128 @@ +interest 6 +research 4 +decis 4 +comput 3 +scienc 3 +univers 3 +name 3 +syllabl 3 +anhai 2 +doan 2 +hungari 2 +mostli 2 +plan 2 +travel 2 +music 2 +first 2 +ofmi 2 +birthplac 2 +librari 2 +homepageanhai 1 +page 1 +reconstruct 1 +pleas 1 +revisit 1 +soon 1 +born 1 +brought 1 +vietnam 1 +finish 1 +high 1 +school 1 +iwent 1 +studi 1 +graduat 1 +kossuth 1 +lajo 1 +art 1 +andscienc 1 +debrecen 1 +receiv 1 +also 1 +wisconsin 1 +milwauke 1 +start 1 +fall 1 +program 1 +depart 1 +andengin 1 +ofwashington 1 +seattl 1 +artifici 1 +intellig 1 +amcurr 1 +investig 1 +make 1 +underuncertainti 1 +theoret 1 +markov 1 +process 1 +qualit 1 +theori 1 +academ 1 +includ 1 +read 1 +listen 1 +jazz 1 +blue 1 +thing 1 +mean 1 +calm 1 +invietnames 1 +made 1 +combin 1 +last 1 +mother 1 +nghean 1 +father 1 +haiphong 1 +show 1 +creativ 1 +folkswer 1 +thought 1 +birth 1 +younger 1 +brother 1 +theysimpli 1 +switch 1 +gave 1 +namehaian 1 +content 1 +probabilist 1 +knowledg 1 +represent 1 +recent 1 +paper 1 +curriculum 1 +vita 1 +educ 1 +employ 1 +histori 1 +award 1 +honor 1 +public 1 +teach 1 +data 1 +structur 1 +algorithm 1 +take 1 +cours 1 +check 1 +inform 1 +offic 1 +hour 1 +locat 1 +person 1 +comtemporari 1 +vietnames 1 +affair 1 +literatur 1 +write 1 +paint 1 +foreign 1 +languag 1 +gener 1 +purpos 1 +life 1 +snapshotsanhai 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..a850319b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,128 @@ +ausland 8 +figur 4 +joel 3 +comput 3 +washington 3 +anim 3 +compil 3 +paper 3 +motion 3 +univers 2 +last 2 +project 2 +seattl 2 +dynam 2 +philipos 2 +chamber 2 +egger 2 +automat 2 +system 2 +graphic 2 +page 2 +without 2 +hope 1 +depart 1 +scienc 1 +engineeringat 1 +pictur 1 +spring 1 +class 1 +click 1 +onit 1 +origin 1 +quarter 1 +complet 1 +qual 1 +time 1 +code 1 +gener 1 +multiflow 1 +offic 1 +sieg 1 +hall 1 +home 1 +univ 1 +resum 1 +written 1 +follow 1 +fast 1 +effect 1 +andb 1 +bershad 1 +pldi 1 +support 1 +event 1 +dispatch 1 +inextens 1 +mock 1 +andp 1 +pardyak 1 +workshop 1 +compilersupport 1 +softwar 1 +februari 1 +experi 1 +control 1 +base 1 +synthesisfor 1 +articul 1 +fukunaga 1 +partovi 1 +christensen 1 +reiss 1 +shuman 1 +mark 1 +acmtransact 1 +also 1 +site 1 +optim 1 +leapfrog 1 +benjamin 1 +wilkerson 1 +mathemat 1 +magazin 1 +lossili 1 +compress 1 +mpeg 1 +animationthat 1 +goe 1 +synthesi 1 +sequenc 1 +show 1 +mywork 1 +piec 1 +togeth 1 +cartwheel 1 +jump 1 +andshuffl 1 +fall 1 +andcollaps 1 +brown 1 +us 1 +algorithm 1 +orang 1 +isjust 1 +try 1 +switch 1 +consider 1 +tosmooth 1 +physic 1 +autumn 1 +took 1 +super 1 +short 1 +doubl 1 +speed 1 +small 1 +version 1 +final 1 +find 1 +better 1 +place 1 +slide 1 +thetalk 1 +singular 1 +valu 1 +decomposit 1 +gave 1 +seminar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..1f5b60cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,96 @@ +brian 4 +info 4 +boinge 3 +name 2 +offic 2 +current 2 +page 2 +favorit 2 +look 2 +girl 1 +michalowskidepart 1 +comput 1 +scienc 1 +engineeringmail 1 +stop 1 +univers 1 +washingtonseattl 1 +locat 1 +sieg 1 +phone 1 +take 1 +ling 1 +edit 1 +mossi 1 +bitsthank 1 +visit 1 +visitor 1 +number 1 +worst 1 +view 1 +us 1 +headscapewhenev 1 +second 1 +year 1 +gradstud 1 +actual 1 +liber 1 +artist 1 +interest 1 +inlinguist 1 +confus 1 +good 1 +get 1 +know 1 +alreadi 1 +ultrahotlist 1 +site 1 +ofal 1 +time 1 +search 1 +onlin 1 +refer 1 +forsometh 1 +glorifi 1 +hotlist 1 +doesn 1 +thave 1 +urouletteto 1 +random 1 +find 1 +past 1 +institut 1 +ofwhich 1 +mental 1 +person 1 +quot 1 +file 1 +songsand 1 +poem 1 +fictiti 1 +thrash 1 +band 1 +puriti 1 +test 1 +origin 1 +work 1 +tokeep 1 +touch 1 +finger 1 +mail 1 +guestbook 1 +pagesfrom 1 +friend 1 +idea 1 +includ 1 +aslfingerspel 1 +snapshot 1 +blatantli 1 +stolen 1 +brad 1 +chamberlain 1 +michalowski 1 +dept 1 +complet 1 +sanityerad 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..df9f90f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,52 @@ +paul 4 +comput 4 +washington 3 +research 3 +proof 3 +beamepaul 2 +beam 2 +associ 2 +scienc 2 +receiv 2 +univers 2 +computationalcomplex 2 +recent 2 +professor 1 +thedepart 1 +engineeringat 1 +theunivers 1 +mathemat 1 +sciencein 1 +toronto 1 +post 1 +doctor 1 +academicyear 1 +join 1 +autumn 1 +presidentialyoung 1 +investig 1 +award 1 +concern 1 +primarili 1 +theoret 1 +aspect 1 +paralleland 1 +distribut 1 +concentr 1 +connect 1 +theori 1 +particular 1 +complex 1 +inproposit 1 +system 1 +enjoi 1 +squash 1 +softbal 1 +sport 1 +enthusiasm 1 +cancompens 1 +lack 1 +talent 1 +paper 1 +qual 1 +project 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..7cd6e75b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,84 @@ +code 2 +devic 2 +sport 2 +ultim 2 +frisbe 2 +confer 2 +champion 2 +interest 2 +david 1 +beckerdavid 1 +beckercontact 1 +info 1 +mark 1 +spot 1 +stuff 1 +spin 1 +much 1 +time 1 +goe 1 +makingspina 1 +real 1 +oper 1 +system 1 +respons 1 +level 1 +borrow 1 +model 1 +drvier 1 +support 1 +build 1 +environ 1 +manag 1 +keep 1 +develop 1 +platform 1 +function 1 +somedai 1 +getto 1 +perform 1 +measur 1 +optim 1 +tri 1 +bunch 1 +favorit 1 +tripl 1 +jump 1 +minnesota 1 +athlet 1 +bethel 1 +colleg 1 +volleybal 1 +men 1 +grad 1 +team 1 +plai 1 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 1 +handbal 1 +also 1 +playracquetballgolftenni 1 +done 1 +bridgecampingcanoeingdisc 1 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 1 +skiingweightliftingwhitewat 1 +raftinghorseback 1 +ridingmountain 1 +bikingin 1 +line 1 +skate 1 +librari 1 +econom 1 +topic 1 +particularli 1 +free 1 +bank 1 +anti 1 +trust 1 +currenc 1 +ssto 1 +rlv 1 +theologi 1 +centurai 1 +railroad 1 +boot 1 +locomot 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..43c5c8e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,280 @@ +bershad 36 +system 25 +oper 11 +brian 10 +perform 7 +extens 7 +workshop 7 +usenix 6 +support 6 +sosp 5 +asplo 5 +paper 5 +project 5 +dynam 5 +us 5 +appear 5 +compil 5 +softwar 5 +comput 4 +network 4 +memori 4 +manag 4 +pardyak 4 +romer 4 +februari 4 +microkernel 4 +univers 3 +seattl 3 +distribut 3 +architectur 3 +spin 3 +mobil 3 +design 3 +octob 3 +przemyslaw 3 +osdi 3 +fast 3 +confer 3 +marc 3 +fiuczynski 3 +savag 3 +chen 3 +maeda 3 +inform 3 +washington 2 +ofwashington 2 +parallel 2 +work 2 +isca 2 +midwai 2 +winter 2 +master 2 +qual 2 +karlin 2 +sigop 2 +associ 2 +symposium 2 +onoper 2 +implemen 2 +structur 2 +voelker 2 +languag 2 +ausland 2 +philipos 2 +chamber 2 +egger 2 +protocol 2 +specif 2 +write 2 +emin 2 +sirer 2 +stefan 2 +zekauska 2 +sawdon 2 +cach 2 +hardwar 2 +larg 2 +servic 2 +kernel 2 +stock 2 +mach 2 +commun 2 +machnix 2 +drave 2 +forin 2 +wwo 2 +relat 2 +eduwork 1 +scienc 1 +engin 1 +depart 1 +home 1 +street 1 +assist 1 +professor 1 +sinc 1 +receiv 1 +took 1 +brief 1 +respit 1 +experi 1 +post 1 +industri 1 +cultur 1 +northeast 1 +return 1 +northwest 1 +coffe 1 +research 1 +hasappear 1 +toc 1 +although 1 +seem 1 +asigmetr 1 +publish 1 +save 1 +life 1 +besid 1 +run 1 +plai 1 +squash 1 +hang 1 +thestairmast 1 +includ 1 +extensibleoper 1 +carnegi 1 +mellon 1 +parallelnetwork 1 +scalabl 1 +rocki 1 +thesequel 1 +etch 1 +binari 1 +instrument 1 +optimizationcours 1 +look 1 +click 1 +list 1 +youmight 1 +degre 1 +recent 1 +trace 1 +driven 1 +comparison 1 +algorithm 1 +prefetch 1 +cachingtraci 1 +kimbrel 1 +andrew 1 +tomkin 1 +hugo 1 +patterson 1 +edward 1 +felten 1 +garth 1 +gibson 1 +anna 1 +bind 1 +extensiblesystem 1 +interpret 1 +theodor 1 +denni 1 +geoffrei 1 +alec 1 +wolman 1 +wayn 1 +wong 1 +jean 1 +loup 1 +baer 1 +henri 1 +levi 1 +effect 1 +dynamiccompil 1 +program 1 +implementationj 1 +applic 1 +modula 1 +greg 1 +defouw 1 +mari 1 +alapat 1 +wilson 1 +hsieh 1 +charl 1 +garrett 1 +david 1 +becker 1 +safe 1 +link 1 +automat 1 +event 1 +dispatch 1 +systemsc 1 +mock 1 +safeti 1 +reduc 1 +overhead 1 +onlinesuperpag 1 +promot 1 +ohlrich 1 +detect 1 +sharedmemori 1 +appearedin 1 +page 1 +map 1 +polici 1 +conflictresolut 1 +standard 1 +mobisa 1 +inth 1 +issu 1 +avoid 1 +conflict 1 +miss 1 +direct 1 +mappedcach 1 +forappl 1 +uwtechn 1 +report 1 +effici 1 +packet 1 +demultiplex 1 +multipl 1 +endpoint 1 +messag 1 +yuhara 1 +moss 1 +impact 1 +decomposit 1 +high 1 +practic 1 +consider 1 +block 1 +concurr 1 +object 1 +interrupt 1 +prioriti 1 +share 1 +ieee 1 +compcon 1 +local 1 +area 1 +andmostli 1 +watson 1 +moblic 1 +consist 1 +virtual 1 +index 1 +wheeler 1 +mutual 1 +exclus 1 +uniprocessor 1 +redel 1 +elli 1 +primit 1 +ginsburg 1 +baron 1 +microbenchmark 1 +evalu 1 +increas 1 +irrelev 1 +micro 1 +base 1 +golub 1 +continu 1 +implement 1 +thread 1 +inoper 1 +rashid 1 +dean 1 +arpa 1 +rain 1 +citi 1 +hash 1 +hous 1 +harrier 1 +rel 1 +abduct 1 +alien 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..2857977a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,152 @@ +page 7 +search 6 +new 5 +home 4 +scienc 3 +seattl 3 +washington 3 +list 3 +american 3 +directori 3 +budget 3 +doorenbo 2 +current 2 +work 2 +todai 2 +project 2 +pointer 2 +link 2 +yahoo 2 +world 2 +yellow 2 +white 2 +sport 2 +pagebob 1 +depart 1 +comput 1 +engineeringunivers 1 +washingtonbox 1 +offic 1 +sieg 1 +hall 1 +bobd 1 +netbot 1 +union 1 +place 1 +suit 1 +voic 1 +daili 1 +comedi 1 +dilbert 1 +strip 1 +boffo 1 +david 1 +letterman 1 +geeki 1 +zdnet 1 +anchordesk 1 +magazin 1 +good 1 +stuff 1 +shortcut 1 +cool 1 +tool 1 +research 1 +postdoc 1 +oren 1 +etzioni 1 +steve 1 +hank 1 +weld 1 +softbot 1 +also 1 +particular 1 +shopbot 1 +internet 1 +shop 1 +agent 1 +previou 1 +soar 1 +thesi 1 +site 1 +collect 1 +repositori 1 +canada 1 +gopher 1 +scientif 1 +sigma 1 +scientist 1 +miscellan 1 +meta 1 +metacrawl 1 +savvysearch 1 +alta 1 +vista 1 +lyco 1 +inktomi 1 +open 1 +text 1 +infoseek 1 +excit 1 +crawler 1 +hotbot 1 +hierarch 1 +select 1 +magellan 1 +pointcom 1 +engin 1 +guid 1 +onlin 1 +telephon 1 +network 1 +switchboard 1 +cnnfn 1 +newshour 1 +post 1 +reuter 1 +headlin 1 +social 1 +cafe 1 +report 1 +boston 1 +globe 1 +span 1 +time 1 +view 1 +slate 1 +feed 1 +salon 1 +atlant 1 +monthli 1 +harper 1 +espn 1 +zone 1 +govern 1 +fedworld 1 +index 1 +hous 1 +congress 1 +arpa 1 +feder 1 +deficit 1 +nation 1 +debt 1 +clock 1 +concord 1 +coalit 1 +hand 1 +balanc 1 +bipartisan 1 +commiss 1 +entitl 1 +reform 1 +univers 1 +museum 1 +past 1 +life 1 +pittsburgh 1 +upcom 1 +birthdai 1 +person 1 +andfun 1 +pagebobd 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..3c6fdec4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,100 @@ +comput 7 +languag 5 +born 4 +scienc 4 +univers 4 +washington 4 +home 3 +depart 3 +constraint 3 +program 3 +research 3 +engin 2 +orient 2 +human 2 +societi 2 +page 2 +recent 2 +autumn 2 +graduat 2 +work 2 +receiv 2 +xerox 2 +spent 2 +alan 1 +pagealan 1 +pagei 1 +professor 1 +principalresearch 1 +interest 1 +base 1 +system 1 +object 1 +logic 1 +computerinteract 1 +current 1 +activitiesuwconstraint 1 +contain 1 +link 1 +paper 1 +public 1 +domainsourc 1 +code 1 +satisfact 1 +algorithm 1 +media 1 +technolog 1 +democraci 1 +groupuw 1 +student 1 +also 1 +idea 1 +qualsproject 1 +teachingher 1 +cours 1 +taught 1 +concept 1 +winter 1 +interact 1 +spring 1 +informationhistori 1 +grew 1 +idaho 1 +reed 1 +colleg 1 +mathemat 1 +atstanford 1 +degre 1 +dissert 1 +done 1 +associ 1 +paloalto 1 +center 1 +concern 1 +simulationlaboratori 1 +year 1 +post 1 +doctoralfellow 1 +artifici 1 +intellig 1 +ofedinburgh 1 +scotland 1 +mechan 1 +problem 1 +solv 1 +symbolicalgebra 1 +join 1 +andexcept 1 +sabbat 1 +europarc 1 +cambridg 1 +england 1 +havebeen 1 +sinc 1 +address 1 +dept 1 +seattl 1 +phone 1 +email 1 +eduwww 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..2e9e3421 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,20 @@ +thing 3 +brad 2 +home 1 +pagebrad 1 +chamberlainphoto 1 +credit 1 +mike 1 +perkowitzth 1 +probabl 1 +couldn 1 +care 1 +less 1 +offic 1 +address 1 +work 1 +like 1 +ad 1 +subset 1 +ofth 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..5452a337 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,125 @@ +html 5 +comput 4 +mumei 4 +postscript 4 +present 3 +brendan 2 +washington 2 +fall 2 +work 2 +problem 2 +physic 2 +map 2 +us 2 +also 2 +confer 2 +galleri 2 +home 1 +pagebrendan 1 +mumeyi 1 +student 1 +depart 1 +scienceat 1 +theunivers 1 +enter 1 +expect 1 +graduat 1 +around 1 +contact 1 +information 1 +mail 1 +edufor 1 +address 1 +click 1 +curriculum 1 +vitaein 1 +htmlorpostscriptformat 1 +academ 1 +interestsi 1 +would 1 +call 1 +appli 1 +theoret 1 +scientist 1 +current 1 +biologi 1 +moment 1 +look 1 +build 1 +rough 1 +locat 1 +landmark 1 +genom 1 +gener 1 +speak 1 +interest 1 +theori 1 +math 1 +tosolv 1 +reason 1 +practic 1 +done 1 +incomput 1 +astrophys 1 +hpcc 1 +groupher 1 +onlin 1 +papersb 1 +power 1 +clone 1 +overlap 1 +test 1 +poster 1 +ismb 1 +aspect 1 +probe 1 +survei 1 +paper 1 +written 1 +fulfil 1 +candidaci 1 +requir 1 +find 1 +cluster 1 +quickli 1 +parallel 1 +dimac 1 +challeng 1 +klaw 1 +upper 1 +lower 1 +bound 1 +construct 1 +alphabet 1 +binari 1 +tree 1 +soda 1 +siam 1 +ofdiscret 1 +mathemat 1 +note 1 +version 1 +produc 1 +latexhtml 1 +containsom 1 +error 1 +readabl 1 +part 1 +recreationhik 1 +cycl 1 +ski 1 +climb 1 +drink 1 +coffeeto 1 +name 1 +sailingand 1 +hope 1 +sometim 1 +like 1 +plai 1 +bridg 1 +older 1 +photo 1 +first 1 +second 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..3cd04205 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,88 @@ +comput 4 +project 4 +interest 3 +us 3 +cours 3 +bricker 2 +washington 2 +research 2 +graphic 2 +learn 2 +lauren 1 +brickerlauren 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +click 1 +need 1 +clue 1 +info 1 +primarli 1 +userinterfac 1 +although 1 +self 1 +proclaim 1 +groupi 1 +current 1 +work 1 +stevetanimoto 1 +mathematicsexperi 1 +imag 1 +process 1 +metip 1 +goal 1 +ofthi 1 +junior 1 +high 1 +school 1 +kid 1 +mathemat 1 +usingexploratori 1 +rather 1 +rote 1 +method 1 +particular 1 +minterest 1 +develop 1 +supportedcollabor 1 +cscl 1 +user 1 +interfac 1 +inthi 1 +well 1 +workin 1 +lawk 1 +dawg 1 +interfacea 1 +fairli 1 +extens 1 +resumeschool 1 +dazethi 1 +quarterdoth 1 +quartershuman 1 +interact 1 +spring 1 +quarter 1 +writeup 1 +final 1 +writeupwhat 1 +asystem 1 +insocieti 1 +excit 1 +hobbi 1 +enjoi 1 +busi 1 +lifesportscookingpotteri 1 +even 1 +studio 1 +garag 1 +year 1 +stuffbecaus 1 +ask 1 +itaddress 1 +last 1 +modifi 1 +mondai 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..f4affdda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,6 @@ +carlson 3 +washington 2 +adam 1 +carlsonadam 1 +comput 1 +scienc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..df6cc23d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,44 @@ +certain 3 +andrew 2 +page 2 +washington 2 +download 2 +look 2 +model 2 +current 2 +work 2 +viewer 2 +modifi 2 +home 1 +server 1 +fix 1 +give 1 +note 1 +interest 1 +follow 1 +direct 1 +theview 1 +tonyderos 1 +david 1 +salesin 1 +werner 1 +stuetzl 1 +duchamp 1 +jovan 1 +popov 1 +scanningproject 1 +build 1 +requir 1 +sgigraph 1 +workstat 1 +paper 1 +netscap 1 +shouldalso 1 +browser 1 +similar 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..0679dc89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,91 @@ +chou 3 +washington 2 +school 2 +fall 2 +line 2 +comput 2 +java 2 +welcom 1 +homepag 1 +grad 1 +student 1 +computersci 1 +seattl 1 +pictur 1 +right 1 +first 1 +quarter 1 +ross 1 +think 1 +scari 1 +relat 1 +infoth 1 +chinook 1 +projectmi 1 +schedulemi 1 +list 1 +publicationscod 1 +workshop 1 +codesignpersonalperson 1 +info 1 +taiwan 1 +greec 1 +resumefoodi 1 +enjoi 1 +cook 1 +peopl 1 +open 1 +restaur 1 +ofpeopl 1 +favorit 1 +dish 1 +includ 1 +stir 1 +fri 1 +rice 1 +noodl 1 +beefskew 1 +recip 1 +toysb 1 +geek 1 +computersand 1 +cool 1 +toi 1 +taiwanesei 1 +also 1 +promot 1 +taiwaneselanguag 1 +current 1 +develop 1 +tool 1 +taiwanes 1 +sureto 1 +check 1 +experiment 1 +taiwanesedictionari 1 +though 1 +absolut 1 +requir 1 +page 1 +best 1 +viewedif 1 +instal 1 +chines 1 +charact 1 +font 1 +us 1 +enabl 1 +browser 1 +like 1 +netscap 1 +beabl 1 +applet 1 +yellow 1 +ball 1 +bouncingov 1 +barnei 1 +purpl 1 +dynosaur 1 +last 1 +updat 1 +email 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..879d3647 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,115 @@ +math 6 +home 5 +page 4 +contest 4 +program 3 +problem 3 +html 3 +set 2 +color 2 +search 2 +linux 2 +result 2 +final 2 +version 2 +internet 2 +comput 2 +refer 2 +thing 2 +done 2 +washington 2 +depart 2 +engin 2 +spring 2 +meet 2 +corei 1 +andersoncorei 1 +andersonth 1 +interest 1 +late 1 +research 1 +percept 1 +automat 1 +correct 1 +induc 1 +compet 1 +programm 1 +month 1 +info 1 +localtalk 1 +adapt 1 +plai 1 +wavelet 1 +intern 1 +collegi 1 +pacif 1 +region 1 +previou 1 +year 1 +duke 1 +rsum 1 +onlin 1 +recent 1 +august 1 +review 1 +graphic 1 +text 1 +book 1 +graduat 1 +univser 1 +highlin 1 +commun 1 +colleg 1 +self 1 +tune 1 +fpga 1 +fall 1 +help 1 +polli 1 +organ 1 +contribut 1 +open 1 +hous 1 +april 1 +manag 1 +chapter 1 +treasuri 1 +develop 1 +read 1 +macintosh 1 +good 1 +servic 1 +providercool 1 +found 1 +usag 1 +statist 1 +lurker 1 +guid 1 +babylon 1 +sunsit 1 +archiv 1 +dilbert 1 +zone 1 +brother 1 +pageus 1 +link 1 +peek 1 +insid 1 +term 1 +lab 1 +featur 1 +netscap 1 +scienc 1 +univers 1 +washinton 1 +uwtv 1 +tech 1 +notesmi 1 +autumn 1 +schedul 1 +mondai 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +fridai 1 +corin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..a0db5460 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,65 @@ +washington 3 +valedictorian 3 +page 2 +enjoy 2 +experi 2 +ball 2 +address 2 +found 2 +craig 1 +experiencecraig 1 +kaplancurr 1 +locat 1 +student 1 +depart 1 +comput 1 +scienc 1 +universityof 1 +seattl 1 +well 1 +copyof 1 +homepag 1 +univers 1 +waterloo 1 +time 1 +modifi 1 +appropri 1 +tomi 1 +current 1 +situat 1 +near 1 +undergraduatewa 1 +grad 1 +photo 1 +fromth 1 +second 1 +occur 1 +saturdai 1 +convoc 1 +cannot 1 +express 1 +honour 1 +felt 1 +wonder 1 +graduat 1 +class 1 +choos 1 +repres 1 +incident 1 +didn 1 +know 1 +parent 1 +minut 1 +start 1 +ceremoni 1 +sai 1 +never 1 +forgiv 1 +text 1 +anyon 1 +curiou 1 +visitor 1 +number 1 +last 1 +updat 1 +cskaplan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..1cb1b26a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,56 @@ +codi 3 +kwok 3 +welcom 2 +work 2 +sanctuari 2 +nausicaa 2 +vallei 2 +home 1 +page 1 +first 1 +thing 1 +thought 1 +peopl 1 +think 1 +mean 1 +aliv 1 +asami 1 +chiaki 1 +chung 1 +ctkwok 1 +washington 1 +edui 1 +graduat 1 +student 1 +weld 1 +andoren 1 +etzioni 1 +plan 1 +andsoftwar 1 +agent 1 +ingram 1 +softbot 1 +aiuw 1 +contact 1 +informationleisur 1 +windlaputa 1 +castl 1 +skyhyp 1 +futur 1 +vision 1 +gunnm 1 +wind 1 +arch 1 +vile 1 +java 1 +applet 1 +anim 1 +take 1 +load 1 +last 1 +modifi 1 +visitor 1 +sinc 1 +figur 1 +doom 1 +numer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..6d925f72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,30 @@ +darren 2 +cronquist 2 +washington 2 +inform 2 +current 2 +resum 2 +curriculum 2 +darrenc 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +welcom 1 +home 1 +page 1 +last 1 +updat 1 +employ 1 +plan 1 +complet 1 +myph 1 +html 1 +postscript 1 +vita 1 +vitaperson 1 +rest 1 +homepag 1 +underconstruct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..901585b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,193 @@ +plan 5 +interfac 4 +david 3 +christianson 3 +user 3 +work 3 +assist 3 +automat 3 +also 3 +second 2 +till 2 +washington 2 +comput 2 +univers 2 +current 2 +graduat 2 +interest 2 +studi 2 +activ 2 +interact 2 +recent 2 +build 2 +shop 2 +simpl 2 +anderson 2 +weld 2 +salesin 2 +cohen 2 +develop 2 +camera 2 +local 2 +dave 2 +inform 2 +check 2 +midnight 1 +nowher 1 +babi 1 +christiansondbc 1 +scienc 1 +engin 1 +depart 1 +seattl 1 +workin 1 +spare 1 +time 1 +third 1 +year 1 +student 1 +atth 1 +inaiand 1 +gotten 1 +mayb 1 +even 1 +graphicsa 1 +well 1 +knowledg 1 +represent 1 +goal 1 +directedbehavior 1 +mix 1 +initi 1 +recognit 1 +buzzwordacquisit 1 +context 1 +human 1 +prototyp 1 +us 1 +intellig 1 +bobdoorenbo 1 +shopbot 1 +rather 1 +somehowintegr 1 +variou 1 +line 1 +store 1 +built 1 +applicationthat 1 +read 1 +pars 1 +basket 1 +order 1 +determinewhat 1 +product 1 +collabor 1 +sean 1 +michael 1 +moviethat 1 +demonstr 1 +appl 1 +intern 1 +russel 1 +technologyinto 1 +experi 1 +perpetr 1 +graphic 1 +debugg 1 +theucpop 1 +famili 1 +planner 1 +programm 1 +client 1 +number 1 +peopl 1 +whose 1 +live 1 +touch 1 +grow 1 +everi 1 +manual 1 +isher 1 +spent 1 +undergradu 1 +career 1 +theunivers 1 +chicago 1 +carboload 1 +harold 1 +chicken 1 +theanim 1 +agent 1 +publicationschristianson 1 +declar 1 +control 1 +cinematographi 1 +appear 1 +aaai 1 +firbi 1 +mcdougal 1 +fast 1 +map 1 +support 1 +navig 1 +object 1 +symposium 1 +sensor 1 +fusion 1 +boston 1 +novemb 1 +find 1 +thechateau 1 +cynic 1 +offic 1 +withfreder 1 +william 1 +darren 1 +adam 1 +gloriou 1 +leader 1 +juan 1 +import 1 +thing 1 +favorit 1 +practic 1 +judo 1 +compet 1 +senior 1 +nation 1 +sibl 1 +sisterjust 1 +school 1 +librari 1 +michigan 1 +surf 1 +cut 1 +edg 1 +research 1 +supercollid 1 +realli 1 +feel 1 +like 1 +slack 1 +mirski 1 +help 1 +watch 1 +hero 1 +youth 1 +duel 1 +death 1 +wwwf 1 +grudg 1 +match 1 +fame 1 +fortun 1 +respons 1 +week 1 +game 1 +domain 1 +straight 1 +doomgat 1 +sai 1 +evil 1 +book 1 +tick 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..3ab90cd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,39 @@ +page 2 +johnson 2 +washington 2 +take 2 +quiz 2 +home 1 +dave 1 +david 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +research 1 +interest 1 +navig 1 +assist 1 +hypertext 1 +readersproject 1 +activ 1 +racquetbal 1 +golf 1 +basketbal 1 +softbal 1 +tutori 1 +script 1 +fit 1 +togeth 1 +theracquetbal 1 +creat 1 +look 1 +thecreat 1 +assess 1 +form 1 +give 1 +last 1 +modifi 1 +mondai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..d2100343 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,161 @@ +spin 6 +seattl 6 +system 5 +page 4 +extens 4 +undergradu 4 +notr 4 +dame 4 +david 3 +dion 3 +washington 3 +current 3 +unix 3 +server 3 +level 3 +univers 3 +world 3 +person 2 +research 2 +work 2 +dynam 2 +kernel 2 +languag 2 +modula 2 +user 2 +mach 2 +first 2 +stuff 2 +time 2 +surf 2 +visit 2 +comput 2 +scienc 2 +condit 2 +major 2 +life 2 +stai 2 +touch 2 +movi 2 +transport 2 +traffic 2 +home 1 +ddion 1 +yeah 1 +right 1 +like 1 +anyth 1 +okai 1 +mayb 1 +littl 1 +academ 1 +past 1 +year 1 +brian 1 +bershad 1 +primari 1 +respons 1 +construct 1 +thespinoper 1 +oper 1 +applic 1 +achiev 1 +impress 1 +perform 1 +extend 1 +safeti 1 +protect 1 +maintain 1 +written 1 +slight 1 +variant 1 +think 1 +run 1 +link 1 +us 1 +intercept 1 +call 1 +emul 1 +environ 1 +previou 1 +havework 1 +studi 1 +help 1 +implement 1 +memori 1 +manag 1 +commun 1 +subsystem 1 +afraid 1 +around 1 +wouldn 1 +claim 1 +know 1 +cool 1 +ipromis 1 +soon 1 +netscap 1 +enhancedthi 1 +hold 1 +breath 1 +meanwhil 1 +site 1 +occasion 1 +distract 1 +engin 1 +reason 1 +homepag 1 +featur 1 +date 1 +view 1 +campu 1 +weather 1 +occupi 1 +vast 1 +program 1 +dai 1 +debug 1 +manual 1 +solv 1 +countless 1 +problem 1 +institut 1 +band 1 +trumpet 1 +section 1 +racquetbal 1 +ladder 1 +main 1 +outlet 1 +athlet 1 +espn 1 +sportzon 1 +sport 1 +todai 1 +rest 1 +dilbert 1 +learn 1 +real 1 +restaur 1 +fine 1 +eateri 1 +recommend 1 +other 1 +region 1 +list 1 +line 1 +guid 1 +excel 1 +public 1 +statu 1 +infam 1 +marin 1 +leagu 1 +basebal 1 +team 1 +bean 1 +shop 1 +last 1 +modifi 1 +mondai 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..d75db12b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,127 @@ +parallel 4 +derrick 3 +comput 3 +project 3 +data 3 +administr 3 +extens 3 +colleg 3 +advanc 3 +scienc 2 +univers 2 +washington 2 +offer 2 +graduat 2 +student 2 +fall 2 +experi 2 +commun 2 +develop 2 +design 2 +system 2 +tool 2 +research 2 +perform 2 +portabl 2 +collect 2 +topic 2 +engin 2 +teach 2 +assist 2 +index 1 +weathersbi 1 +bullssupersonicsi 1 +pursu 1 +phdin 1 +seattl 1 +beauti 1 +campu 1 +li 1 +heart 1 +ofseattl 1 +mani 1 +divers 1 +often 1 +prei 1 +therebyextend 1 +time 1 +averag 1 +career 1 +practic 1 +interestssignific 1 +skill 1 +sheet 1 +share 1 +server 1 +arrai 1 +languag 1 +compil 1 +host 1 +token 1 +ring 1 +protocol 1 +base 1 +network 1 +securityresearch 1 +interestsmi 1 +center 1 +around 1 +distribut 1 +challengespres 1 +field 1 +on 1 +conveni 1 +typic 1 +foremost 1 +goal 1 +run 1 +ordistribut 1 +environ 1 +howev 1 +suffer 1 +final 1 +theseenviron 1 +extra 1 +challeng 1 +asynchron 1 +independ 1 +event 1 +daunt 1 +task 1 +distributedenviron 1 +issu 1 +address 1 +group 1 +page 1 +spaa 1 +paper 1 +gener 1 +comm 1 +dissert 1 +integr 1 +softwar 1 +projectacadem 1 +achievementsinstructor 1 +summer 1 +curriculum 1 +cours 1 +certif 1 +program 1 +collegeinstructor 1 +start 1 +undergradu 1 +tutor 1 +women 1 +minoritystud 1 +depart 1 +engineeringoutstand 1 +award 1 +person 1 +interest 1 +interact 1 +cnnfinanciala 1 +newslet 1 +would 1 +javaw 1 +weathersbyderrick 1 +edutu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..4ea6fdb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,123 @@ +game 4 +northern 4 +note 4 +postscript 4 +dewei 3 +draft 3 +read 3 +ireland 3 +page 3 +brian 2 +washington 2 +need 2 +like 2 +avail 2 +carri 2 +hopefulli 2 +work 2 +pleas 2 +statist 2 +terrorist 2 +relat 2 +link 2 +thorough 2 +china 2 +home 1 +pagebrian 1 +deweyabout 1 +first 1 +year 1 +student 1 +depart 1 +comput 1 +scienceand 1 +engin 1 +univers 1 +doyou 1 +know 1 +music 1 +book 1 +ilov 1 +plai 1 +ride 1 +bike 1 +write 1 +letter 1 +drink 1 +myfavorit 1 +color 1 +blue 1 +favorit 1 +ocean 1 +atlant 1 +oldroomm 1 +think 1 +anim 1 +public 1 +finger 1 +irelandi 1 +return 1 +trip 1 +belfast 1 +june 1 +sixti 1 +pagesof 1 +interview 1 +luggag 1 +getthos 1 +semi 1 +chaotic 1 +readabl 1 +enlighteningformat 1 +feel 1 +free 1 +shoulder 1 +period 1 +make 1 +html 1 +format 1 +goal 1 +encourag 1 +frequent 1 +feedback 1 +soon 1 +possibl 1 +mail 1 +well 1 +much 1 +time 1 +thisproject 1 +late 1 +progress 1 +near 1 +ofth 1 +summer 1 +imag 1 +thecurr 1 +sinn 1 +fein 1 +inform 1 +bibliographi 1 +death 1 +injuri 1 +alreadi 1 +addict 1 +recuri 1 +check 1 +fascin 1 +histori 1 +develop 1 +ancient 1 +imageek 1 +york 1 +cuni 1 +provid 1 +mani 1 +interest 1 +site 1 +jansteen 1 +seen 1 +edulast 1 +modifi 1 +tuesdai 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..76ed5b5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,119 @@ +washington 4 +comput 4 +home 3 +dfasulo 3 +scienc 3 +fiction 3 +graduat 2 +student 2 +depart 2 +univers 2 +seattl 2 +work 2 +interest 2 +amber 2 +recommend 2 +random 2 +thing 2 +theori 2 +pagewelcom 1 +fasulo 1 +page 1 +third 1 +year 1 +williamscolleg 1 +computersci 1 +appli 1 +mathemat 1 +class 1 +note 1 +portrait 1 +contain 1 +slight 1 +inaccuraci 1 +find 1 +eastlak 1 +engin 1 +ofwashington 1 +offic 1 +chateau 1 +email 1 +academ 1 +graphic 1 +biologi 1 +person 1 +fantasi 1 +written 1 +otherwis 1 +fact 1 +probabl 1 +honest 1 +identifi 1 +illustr 1 +merlin 1 +corwin 1 +pictur 1 +favorit 1 +charact 1 +mine 1 +roger 1 +zelazni 1 +chronicl 1 +imag 1 +taken 1 +drpg 1 +publish 1 +phage 1 +press 1 +would 1 +anyon 1 +like 1 +book 1 +also 1 +seri 1 +babylon 1 +creativ 1 +write 1 +poetri 1 +absolut 1 +link 1 +athlet 1 +particular 1 +order 1 +tenni 1 +kwon 1 +distanc 1 +run 1 +role 1 +plai 1 +depend 1 +cat 1 +homepag 1 +friend 1 +fellow 1 +william 1 +alumnu 1 +sean 1 +sandi 1 +look 1 +woman 1 +former 1 +grad 1 +wendi 1 +belluomini 1 +dress 1 +dogbert 1 +peopl 1 +ask 1 +worthwhil 1 +area 1 +research 1 +whether 1 +abstract 1 +us 1 +better 1 +explan 1 +goal 1 +futur 1 +ever 1 +given 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..7799764b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,30 @@ +martin 2 +dickei 2 +home 2 +page 2 +dickeycomput 1 +scienc 1 +engineeringunivers 1 +washingtonwelcom 1 +weekli 1 +schedulenarr 1 +resum 1 +blurbcs 1 +engr 1 +autumn 1 +favorit 1 +seattl 1 +coffe 1 +housesfavorit 1 +internet 1 +site 1 +sister 1 +bookspirograph 1 +java 1 +script 1 +garg 1 +plai 1 +washington 1 +eduupd 1 +tuesdai 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..2c835562 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,187 @@ +peopl 5 +washington 4 +comput 3 +bershad 3 +chen 3 +romer 3 +cach 3 +seattl 3 +vote 3 +commit 3 +denni 2 +high 2 +baer 2 +brad 2 +calder 2 +grunwald 2 +paper 2 +issu 2 +instruct 2 +polici 2 +dynam 2 +page 2 +conflict 2 +univers 2 +dlee 2 +home 2 +good 2 +alwai 2 +huberthumphrei 2 +begin 2 +occur 2 +would 2 +dream 2 +primari 1 +interest 1 +perform 1 +architectur 1 +researchwith 1 +smart 1 +likejean 1 +loup 1 +brian 1 +alan 1 +eustac 1 +dirk 1 +andt 1 +recent 1 +energi 1 +manag 1 +system 1 +gener 1 +fetch 1 +specul 1 +execut 1 +isca 1 +map 1 +resolutionon 1 +standard 1 +hardwar 1 +osdi 1 +avoid 1 +miss 1 +larg 1 +direct 1 +mappedcach 1 +asplo 1 +effect 1 +differ 1 +code 1 +reorder 1 +algorithm 1 +qualifi 1 +project 1 +report 1 +contact 1 +work 1 +scienc 1 +engin 1 +depart 1 +offic 1 +sieg 1 +index 1 +pointer 1 +hotlist 1 +entri 1 +point 1 +explor 1 +yahoo 1 +yellow 1 +internet 1 +lyco 1 +realli 1 +search 1 +enginefor 1 +guid 1 +click 1 +million 1 +sensibl 1 +mind 1 +conced 1 +thatpolit 1 +almost 1 +choic 1 +lesser 1 +evil 1 +tweedledumand 1 +tweedlede 1 +abstain 1 +theyar 1 +present 1 +presid 1 +appoint 1 +go 1 +torummag 1 +around 1 +live 1 +next 1 +four 1 +year 1 +consid 1 +allth 1 +stew 1 +rather 1 +show 1 +humphrei 1 +taught 1 +lesson 1 +still 1 +enjoi 1 +nixon 1 +suprem 1 +court 1 +whentricia 1 +juli 1 +find 1 +silver 1 +thread 1 +among 1 +gold 1 +theblack 1 +russel 1 +baker 1 +ford 1 +without 1 +flummeri 1 +hesit 1 +chanc 1 +draw 1 +back 1 +ineffect 1 +concern 1 +act 1 +initi 1 +element 1 +truth 1 +ignor 1 +kill 1 +countless 1 +idea 1 +splendid 1 +plan 1 +moment 1 +definit 1 +provid 1 +move 1 +sort 1 +thing 1 +help 1 +never 1 +otherwis 1 +whole 1 +stream 1 +event 1 +decis 1 +rais 1 +favor 1 +manner 1 +unforeseen 1 +incid 1 +meet 1 +materi 1 +assist 1 +magic 1 +could 1 +come 1 +whatev 1 +goeth 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..3dc9a258 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,119 @@ +home 4 +inform 3 +comput 3 +page 2 +doug 2 +zongker 2 +research 2 +contact 2 +todai 2 +wast 2 +time 2 +sarcasm 2 +actual 2 +scienc 2 +univers 2 +depart 2 +homepag 2 +anoth 1 +unnecessari 1 +pagececi 1 +well 1 +much 1 +anywai 1 +sure 1 +casual 1 +mention 1 +name 1 +buti 1 +feel 1 +strongli 1 +bold 1 +noless 1 +exhaust 1 +list 1 +usual 1 +public 1 +classeshow 1 +meth 1 +unusu 1 +toxic 1 +custard 1 +workshop 1 +filesth 1 +mento 1 +galleryvisit 1 +supercolliderth 1 +useless 1 +date 1 +cron 1 +player 1 +databas 1 +test 1 +ground 1 +caveat 1 +user 1 +lucki 1 +brows 1 +avirtu 1 +intend 1 +treasur 1 +trove 1 +whichmai 1 +us 1 +realli 1 +first 1 +year 1 +grad 1 +student 1 +engineeringdepart 1 +ofwashington 1 +graduat 1 +michigan 1 +state 1 +imajor 1 +andminor 1 +math 1 +dubiou 1 +honorsjunior 1 +apprentic 1 +keeper 1 +brotherhood 1 +crouton 1 +death 1 +cart 1 +pizzicato 1 +intern 1 +club 1 +member 1 +bryan 1 +worst 1 +execut 1 +vice 1 +presid 1 +charg 1 +emerg 1 +backup 1 +clicker 1 +cruis 1 +highwai 1 +inhigh 1 +gear 1 +sit 1 +buttstar 1 +screen 1 +tast 1 +background 1 +stolen 1 +labor 1 +wheremi 1 +sister 1 +work 1 +dougz 1 +washington 1 +class 1 +last 1 +edit 1 +thursdai 1 +novemb 1 +hit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..daf18636 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,92 @@ +system 10 +oper 7 +dylan 3 +interact 3 +implement 3 +schedul 3 +work 3 +mcname 2 +washington 2 +research 2 +applic 2 +perform 2 +activ 2 +project 2 +current 2 +oodb 2 +us 2 +java 2 +jame 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +person 1 +inform 1 +concentr 1 +applicationsand 1 +mechanismthat 1 +allow 1 +page 1 +replacementpolici 1 +kernel 1 +polici 1 +caus 1 +poorli 1 +help 1 +machoper 1 +mechan 1 +thathelp 1 +user 1 +level 1 +thread 1 +properli 1 +kernelthread 1 +experi 1 +lead 1 +spin 1 +folk 1 +build 1 +architecturethat 1 +gener 1 +idea 1 +tailor 1 +carri 1 +start 1 +opal 1 +thesi 1 +investig 1 +betweenobject 1 +orient 1 +databas 1 +virtual 1 +memori 1 +demonstr 1 +signific 1 +improv 1 +achiv 1 +commod 1 +differ 1 +done 1 +addit 1 +improvementscan 1 +come 1 +modifi 1 +slightli 1 +betterserv 1 +paperscv 1 +cours 1 +geoff 1 +voelker 1 +built 1 +winter 1 +quarter 1 +seminar 1 +dedic 1 +gave 1 +lectureintroduc 1 +languag 1 +environ 1 +slide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..a0fca93e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,32 @@ +christoph 3 +lewi 3 +graduat 2 +student 2 +washington 2 +home 1 +page 1 +dept 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +echri 1 +hello 1 +glad 1 +could 1 +make 1 +work 1 +program 1 +languag 1 +project 1 +offic 1 +hour 1 +tent 1 +mondai 1 +wednesdai 1 +sieg 1 +last 1 +modifi 1 +thur 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..a41a4c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,4 @@ +page 1 +blank 1 +ecrock 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..9196a583 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,118 @@ +comput 4 +home 3 +page 3 +know 3 +washington 2 +well 2 +peopl 2 +time 2 +includ 2 +current 2 +work 2 +compil 2 +want 2 +place 2 +mail 2 +find 2 +eddi 1 +hong 1 +second 1 +year 1 +graduat 1 +scienc 1 +studentat 1 +univers 1 +tosai 1 +busi 1 +type 1 +littl 1 +hand 1 +hadto 1 +resum 1 +link 1 +postcriptand 1 +plain 1 +text 1 +form 1 +offic 1 +room 1 +seig 1 +hall 1 +anna 1 +karlin 1 +craig 1 +chamber 1 +theoret 1 +model 1 +dynam 1 +specif 1 +workingon 1 +develop 1 +line 1 +algorithm 1 +fordynam 1 +plan 1 +qual 1 +project 1 +access 1 +sinc 1 +august 1 +histor 1 +fact 1 +free 1 +activit 1 +vine 1 +branchesmi 1 +sister 1 +help 1 +creat 1 +also 1 +list 1 +variou 1 +anoth 1 +interest 1 +keep 1 +industri 1 +site 1 +give 1 +insight 1 +commentari 1 +happen 1 +knowof 1 +pleas 1 +daveneti 1 +power 1 +macintosh 1 +guess 1 +make 1 +bias 1 +towardslik 1 +mac 1 +howev 1 +think 1 +better 1 +eveneasi 1 +come 1 +sometim 1 +visit 1 +appl 1 +check 1 +seattl 1 +freewai 1 +traffic 1 +look 1 +advic 1 +import 1 +book 1 +worldher 1 +us 1 +inform 1 +alwai 1 +found 1 +address 1 +domain 1 +name 1 +countri 1 +friend 1 +stand 1 +edhong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..332d8be4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,74 @@ +compil 5 +egger 4 +research 4 +project 4 +washington 3 +share 3 +asplo 3 +susan 2 +architectur 2 +current 2 +dynam 2 +schedul 2 +time 2 +http 1 +home 1 +depart 1 +computersci 1 +engin 1 +univers 1 +seattl 1 +voic 1 +email 1 +offic 1 +sieg 1 +hall 1 +interest 1 +comput 1 +back 1 +emphasi 1 +onexperiment 1 +perform 1 +analysi 1 +work 1 +issu 1 +incompil 1 +optim 1 +data 1 +optimizationsand 1 +instruct 1 +processor 1 +design 1 +multithreadedarchitectur 1 +algorithm 1 +reduc 1 +fals 1 +multithread 1 +spinprevi 1 +cach 1 +coher 1 +code 1 +prefetch 1 +memori 1 +machin 1 +miscellan 1 +tool 1 +workload 1 +new 1 +program 1 +committe 1 +call 1 +paper 1 +homepag 1 +inform 1 +look 1 +click 1 +list 1 +might 1 +qual 1 +amast 1 +degre 1 +begin 1 +thesi 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..163f967c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,197 @@ +system 12 +spin 10 +oper 9 +extens 9 +washington 5 +univers 5 +safe 4 +dynam 4 +implement 4 +protect 4 +princeton 3 +work 3 +thread 3 +link 3 +kernel 3 +interfac 3 +support 3 +perform 3 +us 3 +describ 3 +paper 3 +wcsss 3 +softwar 3 +emin 2 +comput 2 +scienc 2 +current 2 +year 2 +summer 2 +jersei 2 +develop 2 +schedul 2 +domain 2 +also 2 +wrote 2 +call 2 +novel 2 +aspect 2 +provid 2 +high 2 +fine 2 +grain 2 +share 2 +time 2 +code 2 +data 2 +design 2 +simul 2 +mechan 2 +modula 2 +microkernel 2 +applic 2 +specif 2 +servic 2 +workshop 2 +sirer 1 +sirereg 1 +depart 1 +engin 1 +seattl 1 +backgroundi 1 +third 1 +graduat 1 +student 1 +grew 1 +istanbul 1 +turkei 1 +receiv 1 +toward 1 +spinproject 1 +prof 1 +brian 1 +bershad 1 +spent 1 +bell 1 +labswork 1 +theplan 1 +help 1 +build 1 +prototyp 1 +displai 1 +thesumm 1 +research 1 +center 1 +recent 1 +thevesta 1 +project 1 +projectsmi 1 +goal 1 +adapt 1 +synchron 1 +andprotect 1 +subsystem 1 +machin 1 +specificaspect 1 +interrupt 1 +path 1 +andsom 1 +miscellan 1 +alarm 1 +mach 1 +compat 1 +mean 1 +ofextend 1 +arbitrari 1 +schedulingpolici 1 +allowsu 1 +achiev 1 +strand 1 +isdesign 1 +fault 1 +isol 1 +allowsisol 1 +withconflict 1 +symbol 1 +simultan 1 +activ 1 +hide 1 +beassur 1 +possibl 1 +access 1 +clincher 1 +extensionsthat 1 +want 1 +protectionenforc 1 +overhead 1 +performanceweb 1 +server 1 +networkingstack 1 +main 1 +object 1 +reduc 1 +http 1 +latenc 1 +andminim 1 +load 1 +mip 1 +instruct 1 +coupl 1 +calledmipsi 1 +robust 1 +enough 1 +spec 1 +benchmark 1 +standard 1 +ofnew 1 +educ 1 +tool 1 +researchplatform 1 +page 1 +mipsi 1 +featuresand 1 +avail 1 +namespac 1 +manag 1 +write 1 +experi 1 +safeti 1 +sosp 1 +issu 1 +hoto 1 +posit 1 +compar 1 +hardwar 1 +sigop 1 +european 1 +review 1 +version 1 +technic 1 +report 1 +march 1 +measur 1 +limit 1 +parallel 1 +senior 1 +independ 1 +june 1 +talkslanguag 1 +slide 1 +present 1 +first 1 +compil 1 +tucson 1 +arizona 1 +interestswhenev 1 +find 1 +opportun 1 +follow 1 +sail 1 +windsurf 1 +dive 1 +ski 1 +bikingmak 1 +outdoor 1 +cloth 1 +andhik 1 +dylan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..8be83660 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,140 @@ +page 5 +project 4 +eric 3 +comput 3 +washington 3 +seattl 3 +prof 3 +method 3 +work 3 +home 2 +scienc 2 +univers 2 +longer 2 +black 2 +recent 2 +solut 2 +implicit 2 +solv 2 +signal 2 +like 2 +everybodi 2 +els 2 +anderson 1 +andersonwher 1 +find 1 +sieg 1 +hall 1 +depart 1 +engin 1 +street 1 +honor 1 +june 1 +decisionin 1 +aclu 1 +reno 1 +mind 1 +thedecis 1 +mere 1 +interim 1 +step 1 +could 1 +read 1 +take 1 +probabl 1 +still 1 +commit 1 +feloni 1 +care 1 +speech 1 +freedom 1 +themarketplac 1 +idea 1 +fact 1 +third 1 +year 1 +graduat 1 +student 1 +mean 1 +imostli 1 +panic 1 +qual 1 +also 1 +try 1 +write 1 +paper 1 +henri 1 +greensideof 1 +duke 1 +finish 1 +master 1 +thesi 1 +onsteadi 1 +state 1 +particular 1 +nonlinear 1 +biharmon 1 +stabil 1 +criterion 1 +explicit 1 +restrict 1 +fourth 1 +power 1 +spatial 1 +resolut 1 +timesteppingmethod 1 +backward 1 +euler 1 +necessari 1 +numer 1 +analysisissu 1 +involv 1 +newton 1 +nonlinearequ 1 +spars 1 +matrix 1 +newtonstep 1 +interplai 1 +pictur 1 +realli 1 +spiffi 1 +bodi 1 +code 1 +astrophys 1 +simul 1 +support 1 +data 1 +structuresbi 1 +richard 1 +andersoni 1 +process 1 +music 1 +aim 1 +automat 1 +transcript 1 +acoust 1 +anna 1 +karlin 1 +isth 1 +musician 1 +interest 1 +applet 1 +first 1 +link 1 +text 1 +small 1 +graphic 1 +section 1 +materi 1 +preparedfor 1 +last 1 +fall 1 +snapshot 1 +mostli 1 +famili 1 +prove 1 +brother 1 +final 1 +weather 1 +meander 1 +sept 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..cc0ade5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,197 @@ +softbot 9 +learn 9 +inform 8 +internet 6 +servic 6 +master 6 +thesi 6 +machin 5 +etzioni 4 +metacrawl 4 +home 3 +univers 3 +washington 3 +search 3 +comparison 3 +agent 3 +program 3 +oren 2 +comput 2 +research 2 +enabl 2 +softwar 2 +page 2 +effici 2 +aaai 2 +world 2 +multi 2 +us 2 +juli 2 +intellig 2 +robot 2 +understand 2 +plan 2 +paper 2 +student 2 +advis 2 +hewlett 2 +packard 2 +databas 2 +repositori 2 +irvin 2 +knowledg 2 +discoveri 2 +contain 2 +relev 2 +pageoren 1 +pagedepart 1 +scienc 1 +engin 1 +ofwashington 1 +seattl 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +brief 1 +bioand 1 +photo 1 +current 1 +human 1 +user 1 +state 1 +heor 1 +want 1 +accomplish 1 +disambigu 1 +request 1 +anddynam 1 +determin 1 +satisfi 1 +finalist 1 +discoveraward 1 +technolog 1 +innov 1 +field 1 +searchmultipl 1 +indic 1 +parallel 1 +provid 1 +sophist 1 +pruningopt 1 +netrecommend 1 +choic 1 +ahoi 1 +white 1 +locatesindividu 1 +high 1 +accuraci 1 +bruteforc 1 +brute 1 +analyz 1 +hypothes 1 +second 1 +whenrun 1 +sparc 1 +select 1 +public 1 +gather 1 +foc 1 +move 1 +food 1 +chain 1 +deploi 1 +theweb 1 +ascal 1 +shop 1 +wide 1 +autonom 1 +postscript 1 +html 1 +base 1 +interfac 1 +cacm 1 +fact 1 +fiction 1 +forecast 1 +ieee 1 +expert 1 +august 1 +without 1 +repli 1 +brook 1 +magazin 1 +decemb 1 +ijcai 1 +sound 1 +close 1 +reason 1 +toappear 1 +first 1 +addit 1 +richardseg 1 +bernard 1 +fileretriev 1 +neal 1 +lesh 1 +planner 1 +unix 1 +keith 1 +golden 1 +universalquantif 1 +incomplet 1 +terranc 1 +goan 1 +error 1 +mikeperkowitz 1 +erik 1 +selberg 1 +zamir 1 +jonathan 1 +shake 1 +undergradu 1 +stephen 1 +soderland 1 +umass 1 +amherst 1 +roomi 1 +bruce 1 +lesourd 1 +robert 1 +spiger 1 +lockhe 1 +center 1 +william 1 +alford 1 +wisconsin 1 +greg 1 +fitchenholtz 1 +guido 1 +hunt 1 +dymitr 1 +mozdyniewicz 1 +quark 1 +resourc 1 +minecontain 1 +neuroprosearch 1 +recent 1 +neural 1 +network 1 +illinoi 1 +induct 1 +group 1 +statlib 1 +data 1 +algorithm 1 +statist 1 +learningtoolbox 1 +bonn 1 +german 1 +list 1 +usenet 1 +faq 1 +access 1 +count 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..2e5be4b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,35 @@ +feelei 3 +mike 2 +comput 2 +home 1 +pagemik 1 +scienc 1 +finish 1 +done 1 +soon 1 +thesi 1 +concern 1 +global 1 +memori 1 +manag 1 +workstationclust 1 +also 1 +work 1 +distribut 1 +opalproject 1 +join 1 +faculti 1 +univers 1 +british 1 +columbia 1 +injanuari 1 +inform 1 +avail 1 +us 1 +link 1 +papersmi 1 +research 1 +summarycvsoutheast 1 +idaholast 1 +modifi 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..7dd2d9fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,20 @@ +fisher 3 +page 3 +washington 3 +chri 2 +home 2 +pictur 1 +fisherdepart 1 +comput 1 +scienc 1 +engineeringbox 1 +univers 1 +seattl 1 +voic 1 +mail 1 +sieg 1 +hall 1 +room 1 +current 1 +construct 1 +return 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..a8b8d0c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,73 @@ +comput 4 +washington 3 +place 2 +might 2 +depart 1 +scienc 1 +engineeringunivers 1 +seattl 1 +sieg 1 +hall 1 +schedulethi 1 +quarter 1 +autumn 1 +ta 1 +cours 1 +rather 1 +work 1 +gener 1 +exam 1 +check 1 +scheduleto 1 +otherwis 1 +around 1 +probablyb 1 +found 1 +librari 1 +somewher 1 +nice 1 +read 1 +paper 1 +research 1 +activitiesmi 1 +main 1 +interest 1 +algorithm 1 +specif 1 +areasof 1 +parallel 1 +geometri 1 +public 1 +meander 1 +denni 1 +outta 1 +mind 1 +vista 1 +pea 1 +music 1 +site 1 +chateau 1 +galleri 1 +fund 1 +drive 1 +thing 1 +alec 1 +wolman 1 +server 1 +seven 1 +lost 1 +soul 1 +captur 1 +html 1 +listen 1 +phone 1 +booth 1 +mofo 1 +peopl 1 +luci 1 +paul 1 +peach 1 +ruel 1 +look 1 +like 1 +moment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..44a8d718 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,84 @@ +comput 5 +research 5 +scienc 4 +interest 4 +univers 3 +modugno 2 +home 2 +washington 2 +human 2 +formal 2 +model 2 +softwar 2 +current 2 +activ 2 +chair 2 +carnegi 2 +mellon 2 +francesmari 1 +pagefrancesmari 1 +page 1 +depart 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +sieg 1 +hall 1 +phone 1 +mail 1 +main 1 +interact 1 +also 1 +user 1 +program 1 +specif 1 +verif 1 +technolog 1 +polici 1 +welcom 1 +opportun 1 +collabor 1 +relat 1 +topic 1 +project 1 +safeti 1 +machin 1 +interfac 1 +previou 1 +public 1 +avail 1 +onlin 1 +summari 1 +ofmi 1 +thesi 1 +real 1 +time 1 +concurr 1 +distribut 1 +system 1 +parallel 1 +algorthim 1 +profession 1 +basic 1 +symposium 1 +uist 1 +demonstr 1 +educ 1 +march 1 +august 1 +mathemat 1 +cornel 1 +anyth 1 +recent 1 +includecycl 1 +ski 1 +languag 1 +cultur 1 +spanish 1 +previouslyitalian 1 +vegetarian 1 +cook 1 +elleri 1 +line 1 +greet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..0c9d033f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,45 @@ +forman 3 +georg 2 +home 2 +comput 2 +pagegeorg 1 +pagei 1 +receiv 1 +scienc 1 +optim 1 +compil 1 +ariadn 1 +scalabl 1 +pattern 1 +match 1 +parallel 1 +trace 1 +debugg 1 +public 1 +mobil 1 +hyperlink 1 +librari 1 +someth 1 +interest 1 +free 1 +handi 1 +softwar 1 +script 1 +written 1 +word 1 +puzzl 1 +water 1 +song 1 +chang 1 +netscap 1 +anim 1 +gforman 1 +comhom 1 +page 1 +mail 1 +finger 1 +weather 1 +dept 1 +live 1 +pictur 1 +gener 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..de578b7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,48 @@ +page 4 +friedman 4 +marc 3 +home 2 +english 2 +research 2 +keith 2 +cours 1 +watercolor 1 +applet 1 +camp 1 +checklist 1 +spanish 1 +collabor 1 +dictionari 1 +poetri 1 +favorit 1 +quot 1 +link 1 +elsewher 1 +occam 1 +inform 1 +gather 1 +agent 1 +golden 1 +wordbot 1 +bike 1 +trip 1 +artifici 1 +intellig 1 +codi 1 +kwok 1 +weld 1 +ucpop 1 +planner 1 +tool 1 +chang 1 +life 1 +work 1 +nietzschein 1 +netscap 1 +bookmark 1 +file 1 +everi 1 +refer 1 +visitor 1 +sinc 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..96a75d21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,28 @@ +garrett 3 +charli 2 +genet 2 +home 1 +page 1 +address 1 +seattl 1 +research 1 +interest 1 +compil 1 +graphic 1 +neural 1 +network 1 +algorithm 1 +game 1 +plai 1 +algorithmspap 1 +line 1 +algorithmsformerli 1 +member 1 +cecil 1 +group 1 +univers 1 +ofwashington 1 +bookshelf 1 +audio 1 +file 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..f4d1e86f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,49 @@ +comput 3 +georg 2 +scienc 2 +graphic 2 +render 2 +imag 2 +winkenbach 1 +winkenbachdepart 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +mail 1 +georgew 1 +washington 1 +eduphon 1 +interest 1 +multimedia 1 +thesi 1 +work 1 +doneund 1 +supervis 1 +professor 1 +david 1 +salesin 1 +deal 1 +appli 1 +tradit 1 +illustr 1 +techniqu 1 +theautomat 1 +three 1 +dimension 1 +model 1 +imagescr 1 +prototyp 1 +system 1 +found 1 +link 1 +follow 1 +galleri 1 +grail 1 +laboratori 1 +depart 1 +engin 1 +wife 1 +home 1 +page 1 +taweewan 1 +siwadun 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..7c9ecb1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,211 @@ +home 9 +page 8 +washington 5 +scienc 5 +configur 5 +duke 5 +file 4 +badro 3 +seattl 3 +graduat 3 +mathemat 3 +work 3 +engin 3 +univers 3 +pagecs 3 +link 3 +comput 3 +greg 2 +welcom 2 +email 2 +recent 2 +spring 2 +part 2 +time 2 +cours 2 +first 2 +competit 2 +emac 2 +readm 2 +archiv 2 +zshell 2 +chronicl 2 +newspap 2 +articl 2 +especi 2 +jackson 2 +joel 2 +interest 2 +languag 2 +nation 2 +foundat 2 +pagegreg 1 +last 1 +updat 1 +eduaddress 1 +nesbit 1 +hello 1 +homepag 1 +pleas 1 +feel 1 +free 1 +send 1 +feedback 1 +address 1 +alwai 1 +isuppos 1 +construct 1 +keep 1 +check 1 +back 1 +excitingfeatur 1 +dukeunivers 1 +complet 1 +degre 1 +doubl 1 +major 1 +incomput 1 +amcurr 1 +emploi 1 +senior 1 +research 1 +scientist 1 +fortransworld 1 +numer 1 +small 1 +compani 1 +origin 1 +locat 1 +indurham 1 +headquart 1 +bermuda 1 +myapart 1 +also 1 +full 1 +student 1 +computersci 1 +depart 1 +ofwashington 1 +fall 1 +softwar 1 +seminarcs 1 +compil 1 +seminar 1 +newer 1 +stuff 1 +philosophi 1 +note 1 +misc 1 +patch 1 +fvwm 1 +place 1 +best 1 +show 1 +redhat 1 +desktop 1 +entri 1 +transworldnumer 1 +ieeenat 1 +program 1 +victori 1 +vertic 1 +winter 1 +issu 1 +magazin 1 +contain 1 +geneticalgorithm 1 +person 1 +rsum 1 +data 1 +date 1 +busi 1 +sampl 1 +drew 1 +bycomput 1 +simpl 1 +magic 1 +creat 1 +canterburi 1 +progress 1 +variou 1 +random 1 +pictur 1 +life 1 +definitelynot 1 +mani 1 +hobbi 1 +includ 1 +tenni 1 +ski 1 +hole 1 +volleybal 1 +juggl 1 +piano 1 +plai 1 +game 1 +rubik 1 +cube 1 +linux 1 +freewar 1 +unix 1 +music 1 +sarahmclachlan 1 +billi 1 +yahoo 1 +list 1 +parliamentari 1 +procedur 1 +ncaa 1 +basketbal 1 +lyco 1 +search 1 +commun 1 +daili 1 +univ 1 +unoffici 1 +microsoft 1 +corpor 1 +world 1 +wide 1 +server 1 +gatewai 1 +user 1 +group 1 +histor 1 +imag 1 +hotjava 1 +global 1 +network 1 +navig 1 +perl 1 +practic 1 +extract 1 +report 1 +virtual 1 +librari 1 +inter 1 +unif 1 +devic 1 +connect 1 +write 1 +html 1 +sgml 1 +seinfeld 1 +index 1 +friend 1 +sitcom 1 +materi 1 +base 1 +upon 1 +support 1 +fellowship 1 +opinion 1 +find 1 +conclus 1 +recommend 1 +express 1 +public 1 +author 1 +necessarili 1 +reflect 1 +view 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..41b2dd03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,294 @@ +java 11 +applet 9 +linden 6 +gamelan 6 +page 5 +scienc 5 +webview 5 +cool 5 +graduat 4 +comput 4 +us 4 +tree 4 +model 4 +program 3 +univers 3 +softwar 3 +interest 3 +jar 3 +staff 3 +pick 3 +work 3 +movi 3 +qualiti 3 +greg 2 +third 2 +year 2 +polit 2 +doubl 2 +look 2 +posit 2 +develop 2 +link 2 +enter 2 +found 2 +travel 2 +elicit 2 +flight 2 +prefer 2 +wasrat 2 +andwa 2 +repositori 2 +also 2 +book 2 +sorri 2 +time 2 +code 2 +submit 2 +ucsd 2 +project 2 +life 2 +artifici 2 +neuralnetwork 2 +learn 2 +user 2 +iter 2 +prison 2 +imag 2 +headless 2 +horseman 2 +transpar 2 +reflect 2 +made 2 +avail 2 +washington 2 +home 1 +lindenmi 1 +love 1 +wifecorina 1 +current 1 +scienceher 1 +thecomput 1 +depart 1 +ofwashington 1 +slave 1 +awai 1 +toward 1 +lofti 1 +goal 1 +complet 1 +undergraduatedegre 1 +california 1 +diego 1 +anodd 1 +major 1 +go 1 +leav 1 +school 1 +decemb 1 +mactiv 1 +check 1 +resum 1 +allow 1 +orset 1 +addit 1 +famili 1 +altavistawebviewand 1 +metawebview 1 +instead 1 +keyword 1 +foraltavista 1 +metacrawl 1 +hit 1 +search 1 +servic 1 +return 1 +graph 1 +displai 1 +searchservic 1 +autom 1 +assist 1 +emul 1 +dialog 1 +travelag 1 +client 1 +gradual 1 +whileallow 1 +brows 1 +real 1 +data 1 +research 1 +prototyp 1 +quit 1 +function 1 +even 1 +earli 1 +stage 1 +highli 1 +rate 1 +andjar 1 +altavistawebview 1 +winner 1 +thejava 1 +contest 1 +publish 1 +walsh 1 +foundat 1 +meilleur 1 +best 1 +first 1 +linear 1 +ballet 1 +oop 1 +capabl 1 +browser 1 +sourc 1 +demonstr 1 +buffer 1 +avoid 1 +flicker 1 +thread 1 +give 1 +run 1 +certainli 1 +could 1 +cleaner 1 +though 1 +expect 1 +thought 1 +might 1 +enough 1 +standardsto 1 +impress 1 +mylgramm 1 +particl 1 +draw 1 +lgrammer 1 +much 1 +realist 1 +theparticletre 1 +recent 1 +start 1 +judg 1 +evalu 1 +thejar 1 +archiv 1 +summer 1 +dawn 1 +civil 1 +ademonstr 1 +applic 1 +show 1 +plan 1 +techniqu 1 +cansuccessfulli 1 +appli 1 +entertain 1 +myriadsoftwar 1 +professor 1 +belew 1 +filippo 1 +menzer 1 +latentenergi 1 +environ 1 +tool 1 +developingartifici 1 +experi 1 +evolutionari 1 +enviro 1 +paper 1 +hank 1 +lesh 1 +theautom 1 +assit 1 +majeski 1 +spitzer 1 +localizedinteract 1 +spatial 1 +constraint 1 +dilemma 1 +associ 1 +econom 1 +scientist 1 +krishnamoorthi 1 +paturi 1 +blume 1 +liden 1 +esen 1 +hardwaretradeoff 1 +boolean 1 +concept 1 +world 1 +congress 1 +recurr 1 +neural 1 +network 1 +sdilemma 1 +unpublish 1 +honor 1 +thesi 1 +adam 1 +carlson 1 +sujai 1 +parekh 1 +wrote 1 +funrai 1 +tracer 1 +ofth 1 +inc 1 +graphic 1 +closeup 1 +chess 1 +duel 1 +assembl 1 +requir 1 +sphere 1 +withreflect 1 +shadow 1 +distribut 1 +trace 1 +adaptivesampl 1 +mess 1 +thing 1 +pattern 1 +thespher 1 +causingth 1 +rai 1 +refract 1 +multipl 1 +surfaceand 1 +intern 1 +second 1 +anim 1 +call 1 +strike 1 +theanim 1 +written 1 +inventor 1 +manipul 1 +thed 1 +origin 1 +file 1 +alow 1 +quicktim 1 +mbquicktim 1 +doesn 1 +compress 1 +anyfurth 1 +least 1 +anyth 1 +resembl 1 +reason 1 +stuff 1 +dilbert 1 +cognit 1 +info 1 +occasion 1 +chateau 1 +guggenheim 1 +annex 1 +engin 1 +seattl 1 +glinden 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..b8837a35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,27 @@ +grant 3 +brian 2 +kri 2 +home 2 +pagebrian 1 +awai 1 +homework 1 +relat 1 +infowork 1 +backgrounduwdynam 1 +compil 1 +groupuw 1 +depart 1 +computersci 1 +engineeringperson 1 +stuffperson 1 +backgroundmi 1 +daughter 1 +isismi 1 +trip 1 +singaporemi 1 +bookmarksmi 1 +public 1 +keylast 1 +updat 1 +octob 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..5833103a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,98 @@ +grove 3 +spend 3 +us 3 +dave 2 +washington 2 +offic 2 +plai 2 +cecil 2 +author 2 +trip 2 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +chateau 1 +sieg 1 +worki 1 +time 1 +pure 1 +object 1 +orient 1 +langaug 1 +vehicl 1 +integr 1 +research 1 +area 1 +languag 1 +design 1 +program 1 +environ 1 +optim 1 +compil 1 +also 1 +hord 1 +consult 1 +hang 1 +aroundth 1 +fring 1 +spinproject 1 +actual 1 +attempt 1 +graduat 1 +sometim 1 +soonish 1 +much 1 +less 1 +frequent 1 +paper 1 +wouldn 1 +complet 1 +without 1 +dilbertfix 1 +strip 1 +thathit 1 +littl 1 +close 1 +home 1 +current 1 +manag 1 +underacheiv 1 +fantasi 1 +footbal 1 +team 1 +summer 1 +hampshir 1 +work 1 +gui 1 +scoutreserv 1 +greaterlowel 1 +council 1 +pictur 1 +casunset 1 +taken 1 +right 1 +cabin 1 +kick 1 +anoth 1 +everi 1 +boi 1 +someth 1 +silli 1 +white 1 +water 1 +raft 1 +especi 1 +cool 1 +month 1 +toronto 1 +drove 1 +back 1 +toseattl 1 +took 1 +number 1 +detour 1 +along 1 +somehihglight 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..70b19512 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,61 @@ +fpga 8 +system 5 +design 4 +asynchron 4 +architectur 4 +hauck 3 +washington 3 +current 3 +multi 3 +rapid 3 +prototyp 3 +circuit 3 +scott 2 +univers 2 +graduat 2 +work 2 +board 2 +level 2 +well 2 +triptych 2 +montag 2 +develop 2 +comput 1 +scienc 1 +engin 1 +depart 1 +seattl 1 +year 1 +student 1 +thoughi 1 +also 1 +interest 1 +parallel 1 +june 1 +person 1 +biographi 1 +educ 1 +experi 1 +public 1 +curriculum 1 +vitaeresearch 1 +survei 1 +methodolog 1 +first 1 +improv 1 +densiti 1 +commerci 1 +springbok 1 +partit 1 +assign 1 +rout 1 +topolog 1 +gener 1 +chinook 1 +project 1 +hardwar 1 +softwar 1 +synthesi 1 +simul 1 +embed 1 +applic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..aae46406 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,4 @@ +kevin 1 +hinshaw 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..5f3b57bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,66 @@ +macduff 3 +place 2 +week 2 +time 2 +also 2 +stuff 2 +ultrasound 1 +imag 1 +emma 1 +elspeth 1 +name 1 +subject 1 +chang 1 +without 1 +notic 1 +unborn 1 +daughter 1 +fromconcept 1 +ripe 1 +inmid 1 +decemb 1 +view 1 +profil 1 +ly 1 +back 1 +lookingup 1 +head 1 +right 1 +upper 1 +half 1 +torso 1 +theleft 1 +busi 1 +obsess 1 +impend 1 +fatherhood 1 +master 1 +thesi 1 +part 1 +chinook 1 +project 1 +pass 1 +silli 1 +possibl 1 +rememb 1 +myspam 1 +unfortun 1 +wast 1 +html 1 +brows 1 +around 1 +interest 1 +usingwebcrawl 1 +pointer 1 +neat 1 +frogstv 1 +nationpenn 1 +tellermus 1 +lyricsian 1 +washington 1 +dept 1 +comput 1 +scienc 1 +engin 1 +univ 1 +washingtonseattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..7325e61d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,145 @@ +memori 11 +network 7 +page 5 +jamrozik 4 +system 4 +size 4 +washington 3 +global 3 +high 3 +speed 3 +reduc 3 +disk 3 +access 3 +transfer 3 +latenc 3 +research 2 +distribut 2 +object 2 +orient 2 +languag 2 +levi 2 +vernon 2 +karlin 2 +feelei 2 +voelker 2 +cach 2 +need 2 +node 2 +remot 2 +provid 2 +subpag 2 +environ 2 +pictur 2 +herv 1 +jamrozikherv 1 +postdoc 1 +univers 1 +sinc 1 +septemb 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +sieg 1 +hall 1 +phone 1 +mail 1 +main 1 +interest 1 +softwar 1 +engin 1 +current 1 +work 1 +memoi 1 +manag 1 +workstat 1 +cluster 1 +hank 1 +mari 1 +anna 1 +mike 1 +geoff 1 +greatli 1 +encourag 1 +virtual 1 +file 1 +therebi 1 +intens 1 +applic 1 +primari 1 +lightli 1 +load 1 +temporari 1 +back 1 +store 1 +introduc 1 +level 1 +hierarchi 1 +name 1 +li 1 +logic 1 +local 1 +fundament 1 +unit 1 +perform 1 +factor 1 +recent 1 +modern 1 +processor 1 +increas 1 +order 1 +coverag 1 +amort 1 +cost 1 +unfortun 1 +small 1 +trend 1 +thu 1 +odd 1 +studi 1 +mean 1 +us 1 +evan 1 +inproceed 1 +seventh 1 +confer 1 +architectur 1 +support 1 +program 1 +oper 1 +octob 1 +postscript 1 +thesi 1 +debug 1 +theuniversit 1 +joseph 1 +fourier 1 +grenobl 1 +involv 1 +guideproject 1 +laboratoir 1 +bull 1 +imag 1 +part 1 +imaginstitut 1 +extrem 1 +peopl 1 +area 1 +snot 1 +visit 1 +louvr 1 +galleri 1 +look 1 +map 1 +franc 1 +europ 1 +world 1 +somefamili 1 +somefriend 1 +eduv 1 +march 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..4282ce51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,25 @@ +jason 3 +secoski 2 +washington 2 +weather 2 +home 1 +pagejason 1 +eduaddress 1 +comput 1 +scienc 1 +engin 1 +depart 1 +sieg 1 +hall 1 +cunivers 1 +boxseattl 1 +offic 1 +frequent 1 +us 1 +page 1 +projectseattl 1 +forecast 1 +channel 1 +secoskylast 1 +modifi 1 +thursdai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..aa2672d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,82 @@ +applet 3 +jeremi 2 +baer 2 +educ 2 +stuff 2 +current 2 +comput 2 +washington 2 +softwar 2 +project 2 +baerjeremi 1 +never 1 +school 1 +interfer 1 +mark 1 +twain 1 +dream 1 +made 1 +william 1 +shakespearei 1 +graduat 1 +student 1 +scienceat 1 +univers 1 +interest 1 +includ 1 +artifici 1 +intellig 1 +human 1 +computerinteract 1 +multimedia 1 +engineeringtool 1 +gener 1 +music 1 +person 1 +creativ 1 +cool 1 +place 1 +spend 1 +signific 1 +time 1 +pierian 1 +spring 1 +softwareoregon 1 +museum 1 +scienc 1 +industri 1 +omsi 1 +pomona 1 +collegeher 1 +look 1 +mine 1 +eight 1 +puzzl 1 +java 1 +work 1 +progress 1 +experiment 1 +virtual 1 +travel 1 +copi 1 +effect 1 +demo 1 +question 1 +static 1 +layer 1 +analysi 1 +program 1 +feel 1 +stress 1 +realli 1 +silli 1 +littl 1 +macintosh 1 +thati 1 +wrote 1 +year 1 +download 1 +like 1 +metacrawl 1 +searchcopyright 1 +jbaer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..de2e7131 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,60 @@ +home 4 +washington 4 +buhler 3 +jeremi 2 +browser 2 +control 2 +offic 2 +phone 2 +jbuhler 2 +pagejeremi 1 +pagedo 1 +attempt 1 +adjust 1 +transmiss 1 +statu 1 +first 1 +year 1 +student 1 +institut 1 +univers 1 +depart 1 +comput 1 +scienc 1 +engin 1 +sieg 1 +hall 1 +mail 1 +address 1 +finger 1 +tako 1 +import 1 +stufflectur 1 +note 1 +suffix 1 +tree 1 +postscript 1 +latex 1 +research 1 +come 1 +soon 1 +cours 1 +schedulemi 1 +public 1 +keycyb 1 +activ 1 +electron 1 +frontier 1 +foundat 1 +grinsrecommend 1 +readingmi 1 +undergradu 1 +alma 1 +mater 1 +rice 1 +universityquot 1 +quotesmi 1 +page 1 +return 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..0d842d3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,270 @@ +optim 10 +compil 9 +languag 8 +research 6 +program 6 +pass 6 +techniqu 5 +seattl 4 +also 4 +cecil 4 +orient 4 +design 4 +develop 4 +vortex 4 +permit 4 +would 3 +work 3 +object 3 +implement 3 +system 3 +interest 3 +wife 3 +jeff 2 +dean 2 +depart 2 +washington 2 +offic 2 +view 2 +postscript 2 +integr 2 +larg 2 +line 2 +applic 2 +effici 2 +explor 2 +whole 2 +analysi 2 +affect 2 +guid 2 +appli 2 +exampl 2 +recent 2 +author 2 +spent 2 +biplan 2 +ride 2 +flight 2 +comput 1 +scienc 1 +engin 1 +univers 1 +chateau 1 +sieg 1 +dang 1 +build 1 +weren 1 +futur 1 +plansi 1 +plan 1 +graduat 1 +summer 1 +join 1 +western 1 +laboratori 1 +sunni 1 +palo 1 +alto 1 +bought 1 +hous 1 +nearbi 1 +menlo 1 +park 1 +curriculum 1 +vita 1 +summari 1 +teach 1 +experi 1 +projectsi 1 +primarili 1 +project 1 +pure 1 +langaug 1 +us 1 +avehicl 1 +area 1 +environ 1 +weintend 1 +scale 1 +real 1 +world 1 +andto 1 +keep 1 +true 1 +goal 1 +vortexcompil 1 +current 1 +codein 1 +much 1 +group 1 +involv 1 +track 1 +bug 1 +hang 1 +spinproject 1 +meet 1 +spin 1 +extens 1 +oper 1 +systemmicrokernel 1 +support 1 +dynam 1 +adapt 1 +interfacesand 1 +direct 1 +control 1 +stillmaintain 1 +inter 1 +isol 1 +spring 1 +quarter 1 +organ 1 +seminar 1 +concern 1 +ofobject 1 +particular 1 +usedto 1 +improv 1 +perform 1 +increment 1 +andhow 1 +interact 1 +especiallyprofil 1 +howwhol 1 +assumedthat 1 +access 1 +entir 1 +manycompromis 1 +made 1 +exist 1 +becom 1 +unnecessari 1 +lesson 1 +learn 1 +wholeprogram 1 +toward 1 +programminglanguag 1 +flexibl 1 +base 1 +underlyingimplement 1 +valid 1 +three 1 +principaldesign 1 +forobject 1 +defin 1 +independentintermedi 1 +ishigh 1 +enough 1 +level 1 +featur 1 +messagesend 1 +closur 1 +creation 1 +contain 1 +uniqu 1 +wayof 1 +compos 1 +parallel 1 +obtain 1 +better 1 +result 1 +repeatedli 1 +run 1 +passessepar 1 +intraprocedur 1 +classanalysi 1 +profil 1 +receiv 1 +class 1 +predict 1 +inlin 1 +aliasanalysi 1 +split 1 +singl 1 +combin 1 +part 1 +wai 1 +structuringoptim 1 +kind 1 +composit 1 +stillallow 1 +independ 1 +eachoth 1 +nice 1 +framework 1 +specifi 1 +iter 1 +data 1 +flowanalys 1 +client 1 +withrel 1 +littl 1 +effort 1 +dead 1 +assignmentelimin 1 +approxim 1 +code 1 +publicationssom 1 +paper 1 +personali 1 +love 1 +spici 1 +food 1 +mild 1 +four 1 +letter 1 +word 1 +coke 1 +probabl 1 +kick 1 +caffein 1 +habit 1 +enjoy 1 +moment 1 +heidi 1 +daughter 1 +victoria 1 +realli 1 +like 1 +somedai 1 +honeymoon 1 +kauai 1 +hurrican 1 +iniki 1 +galvin 1 +fly 1 +guess 1 +never 1 +anyth 1 +anymor 1 +took 1 +consist 1 +minut 1 +around 1 +downtown 1 +puget 1 +sound 1 +travel 1 +model 1 +feel 1 +dare 1 +sadli 1 +insur 1 +coverag 1 +doesn 1 +passeng 1 +walk 1 +wing 1 +back 1 +enjoi 1 +fantast 1 +even 1 +highli 1 +recommend 1 +look 1 +someth 1 +number 1 +rather 1 +lengthi 1 +hotlist 1 +jdean 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..f8fa3bcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,176 @@ +softwar 12 +analysi 8 +deviat 8 +requir 7 +rsml 6 +comput 5 +system 5 +rees 4 +safeti 4 +specif 4 +critic 3 +scienc 3 +control 3 +state 3 +develop 3 +languag 3 +tool 3 +semant 3 +damon 2 +doctor 2 +research 2 +jdrees 2 +washington 2 +place 2 +applic 2 +advantag 2 +public 2 +exampl 2 +oper 2 +project 2 +especi 2 +base 2 +us 2 +hazard 2 +procedur 2 +hazop 2 +studi 2 +dissert 2 +make 2 +avail 2 +kurt 2 +partridg 2 +univers 2 +postscript 2 +waxahachi 2 +leveson 2 +heimdahl 2 +hildreth 2 +process 2 +ieee 2 +transact 2 +industri 2 +home 1 +pagejon 1 +reesepost 1 +groupdepart 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +offic 1 +chateau 1 +guggenheim 1 +annex 1 +phone 1 +mail 1 +interest 1 +problem 1 +awar 1 +devic 1 +structur 1 +caus 1 +catastroph 1 +fail 1 +hardwar 1 +becom 1 +less 1 +expens 1 +expect 1 +wider 1 +rang 1 +convent 1 +technolog 1 +flexibl 1 +come 1 +price 1 +behavior 1 +complex 1 +unpredict 1 +perhap 1 +best 1 +three 1 +mile 1 +island 1 +incid 1 +great 1 +difficulti 1 +diagnos 1 +emerg 1 +stage 1 +success 1 +respect 1 +reason 1 +colleagu 1 +concentr 1 +commun 1 +call 1 +machin 1 +valid 1 +specifi 1 +tca 1 +avion 1 +thesi 1 +invent 1 +signific 1 +concept 1 +borrow 1 +henc 1 +name 1 +link 1 +html 1 +transcript 1 +current 1 +write 1 +confer 1 +articl 1 +summar 1 +group 1 +possibl 1 +dynam 1 +displai 1 +search 1 +siang 1 +integr 1 +alpha 1 +version 1 +publicli 1 +sean 1 +sandi 1 +draft 1 +document 1 +includ 1 +discuss 1 +variant 1 +improv 1 +academ 1 +histori 1 +inform 1 +california 1 +irvin 1 +linguist 1 +rice 1 +high 1 +school 1 +nanci 1 +mat 1 +holli 1 +engin 1 +septemb 1 +steven 1 +dolin 1 +curv 1 +interpret 1 +diagnost 1 +techniqu 1 +januari 1 +februari 1 +ortega 1 +experi 1 +statechart 1 +sixth 1 +intern 1 +workshop 1 +design 1 +como 1 +itali 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..9d0e445d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,104 @@ +jack 8 +compil 7 +multithread 6 +simultan 5 +page 4 +level 4 +parallel 4 +instruct 4 +postscript 4 +susan 4 +egger 4 +issu 4 +processor 4 +schedul 4 +washington 3 +abstract 3 +henri 3 +levi 3 +tullsen 3 +proceed 3 +architectur 3 +examin 3 +pictur 3 +home 2 +comput 2 +current 2 +research 2 +joel 2 +emer 2 +rebecca 2 +stamm 2 +anddean 2 +implement 2 +balanc 2 +optim 2 +static 2 +dynam 2 +superscalar 2 +written 2 +report 2 +lojack 1 +lojlo 1 +depart 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +construct 1 +paper 1 +convert 1 +thread 1 +submit 1 +public 1 +juli 1 +exploit 1 +choic 1 +fetch 1 +dean 1 +annual 1 +intern 1 +symposium 1 +philadelphia 1 +first 1 +suif 1 +workshop 1 +stanford 1 +januari 1 +improv 1 +increas 1 +sigplan 1 +confer 1 +program 1 +languag 1 +design 1 +jolla 1 +california 1 +june 1 +compar 1 +gener 1 +interact 1 +loph 1 +qualifi 1 +work 1 +support 1 +interest 1 +also 1 +includ 1 +vliw 1 +well 1 +particular 1 +investig 1 +person 1 +find 1 +franklin 1 +eseattl 1 +orsieg 1 +hall 1 +room 1 +phone 1 +coupl 1 +recent 1 +paintbal 1 +experi 1 +yahoojlo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..47473156 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,45 @@ +user 3 +interfac 3 +home 2 +page 2 +washington 2 +research 2 +project 2 +sherman 1 +shermanjoebob 1 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +usami 1 +interest 1 +designinform 1 +navig 1 +visual 1 +activ 1 +inform 1 +local 1 +survei 1 +useclass 1 +hcreat 1 +impress 1 +pagequ 1 +time 1 +sarahsoftballstuff 1 +might 1 +want 1 +automat 1 +suggest 1 +link 1 +relat 1 +topic 1 +directori 1 +us 1 +pagesif 1 +browser 1 +support 1 +send 1 +mail 1 +tojoebob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..1e9859b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,61 @@ +joshua 3 +seim 3 +josh 3 +home 2 +page 2 +current 2 +comput 2 +scienc 2 +sever 2 +abstract 1 +biolog 1 +base 1 +neural 1 +network 1 +system 1 +test 1 +theunivers 1 +washington 1 +depart 1 +begunin 1 +lockean 1 +blank 1 +slate 1 +learn 1 +emul 1 +observedbehavior 1 +successfulli 1 +accomplish 1 +task 1 +graduatingfrom 1 +colleg 1 +travel 1 +volit 1 +recent 1 +start 1 +demonstr 1 +potenti 1 +independ 1 +creativ 1 +thought 1 +taskw 1 +present 1 +earn 1 +expect 1 +take 1 +year 1 +document 1 +provid 1 +overviewof 1 +cognit 1 +ambulatori 1 +achiev 1 +organ 1 +person 1 +academichierarchi 1 +addition 1 +futur 1 +work 1 +discuss 1 +within 1 +context 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..6b72b1bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,4 @@ +jovan 2 +home 2 +page 2 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..cd45029e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,67 @@ +joanna 3 +page 3 +academ 2 +main 2 +interest 2 +comput 2 +graphic 2 +alma 2 +biologi 2 +powerjoanna 1 +pagehi 1 +cat 1 +interestsmi 1 +scienc 1 +grad 1 +school 1 +uwneat 1 +stuff 1 +matercool 1 +link 1 +jonathan 1 +shadegraph 1 +research 1 +uwduoton 1 +reproductionmi 1 +especi 1 +genet 1 +molecular 1 +matermost 1 +recent 1 +site 1 +gain 1 +employmentpubl 1 +power 1 +brad 1 +west 1 +eric 1 +stollnitz 1 +david 1 +salesin 1 +reproduc 1 +color 1 +imag 1 +duoton 1 +proceed 1 +siggraph 1 +york 1 +real 1 +lifepast 1 +homesdiversionsgend 1 +issuesstatu 1 +women 1 +sciencenow 1 +home 1 +pagefeminist 1 +major 1 +onlineultim 1 +frisbeefun 1 +stufffroggi 1 +sean 1 +quotesbrad 1 +comic 1 +musicevan 1 +jokes 1 +pagesmi 1 +herojpow 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..b0619a4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,16 @@ +jonathan 2 +shake 2 +washington 2 +sieg 1 +hall 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +ahoi 1 +homepag 1 +finderresumlinkslast 1 +updat 1 +august 1 +jshake 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..125d5ff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,5 @@ +juan 1 +alemanyjuan 1 +alemani 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..9cf741bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,116 @@ +comput 7 +work 5 +scienc 3 +washington 3 +data 3 +surfac 3 +taught 3 +slide 3 +kari 2 +pulli 2 +home 2 +antero 2 +year 2 +univers 2 +graphic 2 +vision 2 +research 2 +depart 2 +project 2 +report 2 +technic 2 +engin 2 +subdivis 2 +find 2 +class 2 +theclass 2 +seattl 2 +pagekari 1 +pullii 1 +third 1 +graduat 1 +student 1 +engineeringdepart 1 +ofwashington 1 +interest 1 +andmathemat 1 +try 1 +combin 1 +aspect 1 +thesedisciplin 1 +professor 1 +closest 1 +tonyderos 1 +actual 1 +left 1 +uwfor 1 +pixar 1 +lindashapiro 1 +addition 1 +werner 1 +stuetzl 1 +andjohn 1 +mcdonald 1 +statist 1 +duchamp 1 +mathemat 1 +andhugu 1 +hopp 1 +rick 1 +szeliski 1 +microsoft 1 +qual 1 +tribor 1 +triplet 1 +base 1 +object 1 +recognitionsystem 1 +linda 1 +universityof 1 +current 1 +surfacereconstruct 1 +rang 1 +multipl 1 +baselin 1 +camerasystem 1 +obtain 1 +waveletanalysi 1 +geometri 1 +reflect 1 +function 1 +pass 1 +gener 1 +examin 1 +topic 1 +rigidregistr 1 +click 1 +architecturesystem 1 +susanegg 1 +distribut 1 +brianbershad 1 +imag 1 +understand 1 +steven 1 +tanimoto 1 +present 1 +sketch 1 +siggraph 1 +getto 1 +remov 1 +wavelet 1 +herear 1 +speaker 1 +note 1 +eacutesum 1 +eacut 1 +sieg 1 +hall 1 +email 1 +kapu 1 +union 1 +folk 1 +takavainionti 1 +oulu 1 +finland 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..37e5b5b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,17 @@ +washington 3 +univers 2 +anna 1 +karlinanna 1 +rochel 1 +karlinassoci 1 +professor 1 +sincejuli 1 +work 1 +comput 1 +scienc 1 +engin 1 +depart 1 +seattl 1 +home 1 +page 1 +paperskarlin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..04791d44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,10 @@ +page 3 +home 2 +yeunghom 1 +yeungperson 1 +infomi 1 +picturemi 1 +researchtelnet 1 +machinessend 1 +email 1 +back 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..d0a30889 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,83 @@ +kurt 5 +partridg 4 +softwar 4 +comput 3 +washington 3 +specif 3 +safeti 3 +postscript 3 +graduat 2 +scienc 2 +univers 2 +interact 2 +environ 2 +poster 2 +academ 1 +inform 1 +student 1 +depart 1 +interest 1 +includ 1 +engin 1 +usabl 1 +readabl 1 +applic 1 +formal 1 +method 1 +page 1 +describ 1 +other 1 +work 1 +area 1 +also 1 +dabbl 1 +user 1 +interfac 1 +design 1 +human 1 +java 1 +program 1 +recent 1 +public 1 +bddtcl 1 +visual 1 +manipul 1 +binari 1 +decisiondiagram 1 +html 1 +preview 1 +nanci 1 +leveson 1 +bauer 1 +mat 1 +heimdahl 1 +wayn 1 +ohlrich 1 +vivek 1 +ratan 1 +rees 1 +critic 1 +nasa 1 +confer 1 +qualiti 1 +background 1 +start 1 +school 1 +complet 1 +berkelei 1 +live 1 +love 1 +suburban 1 +life 1 +thousand 1 +oak 1 +parent 1 +sister 1 +name 1 +oti 1 +right 1 +humor 1 +corner 1 +seattl 1 +voic 1 +kepart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..9942ef72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,50 @@ +keith 2 +golden 2 +washington 2 +home 1 +page 1 +questa 1 +pagina 1 +anch 1 +italiano 1 +researchsoftbotsplanningkrselect 1 +publicationscurriculum 1 +vita 1 +also 1 +inpostscriptrandom 1 +hackingwordbot 1 +collabor 1 +dictionari 1 +like 1 +bicycl 1 +tour 1 +languag 1 +paint 1 +photographi 1 +natur 1 +coffe 1 +godless 1 +pinko 1 +stuff 1 +dislik 1 +suit 1 +lawyer 1 +car 1 +friend 1 +ellenmarcruben 1 +laurennickrich 1 +joannavivek 1 +advisor 1 +oren 1 +etzioni 1 +weld 1 +keithgolden 1 +depart 1 +ofcomput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +kgolden 1 +complet 1 +list 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..bf383f20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,143 @@ +univers 9 +softwar 9 +kingsum 8 +hong 8 +chow 6 +kong 6 +washington 5 +evolut 4 +page 4 +engin 3 +david 3 +notkin 3 +asynchron 3 +pcct 3 +movi 3 +scienc 2 +seattl 2 +research 2 +confer 2 +resum 2 +line 2 +respons 2 +technic 2 +workshop 2 +program 2 +transform 2 +home 2 +onlin 2 +quot 2 +educomput 1 +depart 1 +usathi 1 +inform 1 +highwai 1 +alwai 1 +construct 1 +tabl 1 +content 1 +person 1 +upcom 1 +current 1 +schedul 1 +bridg 1 +glossari 1 +suggest 1 +feedbackresearchmi 1 +advisor 1 +develop 1 +toolspap 1 +qualiti 1 +manag 1 +driven 1 +readi 1 +semi 1 +automat 1 +updat 1 +applic 1 +librari 1 +chang 1 +report 1 +revis 1 +version 1 +appear 1 +icsm 1 +asia 1 +pacif 1 +march 1 +mainten 1 +proceed 1 +ics 1 +william 1 +griswold 1 +editor 1 +intern 1 +april 1 +us 1 +site 1 +sorcererpcct 1 +terrenc 1 +parr 1 +note 1 +newbiesresumepleasedrop 1 +mailto 1 +specifi 1 +text 1 +postscript 1 +format 1 +kongchines 1 +technolog 1 +polytechn 1 +citi 1 +kongsingapor 1 +sitessingapor 1 +world 1 +wide 1 +server 1 +alumnu 1 +websom 1 +campu 1 +friendstom 1 +liew 1 +fook 1 +wang 1 +jiang 1 +weidongu 1 +relatedunivers 1 +style 1 +polici 1 +manual 1 +these 1 +dissert 1 +graduat 1 +school 1 +webserv 1 +book 1 +storeinvestmentsfre 1 +minut 1 +delai 1 +watch 1 +market 1 +data 1 +experiment 1 +mutual 1 +fund 1 +chart 1 +invest 1 +center 1 +stock 1 +commod 1 +analysismisc 1 +read 1 +chines 1 +list 1 +thoma 1 +china 1 +new 1 +servic 1 +welcom 1 +visit 1 +sinc 1 +last 1 +modifi 1 +date 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..e542eab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,90 @@ +univers 4 +seattl 4 +home 3 +kevin 2 +washington 2 +comput 2 +engin 2 +work 2 +chaotic 2 +rout 2 +electr 2 +pacif 2 +time 2 +bold 1 +boldingkwb 1 +scienc 1 +depart 1 +juvenil 1 +squirt 1 +wander 1 +search 1 +suitabl 1 +rock 1 +hunk 1 +coral 1 +cling 1 +make 1 +life 1 +task 1 +rudimentari 1 +nervou 1 +system 1 +find 1 +spot 1 +take 1 +root 1 +doesn 1 +need 1 +brain 1 +eat 1 +rather 1 +like 1 +get 1 +tenur 1 +dennett 1 +conscious 1 +explain 1 +research 1 +current 1 +build 1 +high 1 +speed 1 +latencylan 1 +router 1 +previou 1 +researchha 1 +chaoticrout 1 +form 1 +minim 1 +adapt 1 +formass 1 +parallel 1 +multicomput 1 +profession 1 +assist 1 +professor 1 +also 1 +part 1 +researchassoci 1 +ofwashington 1 +signific 1 +paper 1 +written 1 +archiv 1 +ofth 1 +group 1 +spend 1 +teach 1 +engineeringat 1 +person 1 +photo 1 +took 1 +comethyakutak 1 +moustach 1 +real 1 +case 1 +want 1 +visit 1 +anoth 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..83c28655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,34 @@ +quarter 4 +comput 3 +washington 3 +scienc 2 +ladner 2 +introduct 2 +spring 2 +richard 1 +ladnerrichard 1 +ladnerprofessor 1 +depart 1 +engin 1 +univers 1 +seattl 1 +mail 1 +phone 1 +offic 1 +sieg 1 +hall 1 +room 1 +person 1 +short 1 +biographyresearch 1 +public 1 +studentsteachingcomput 1 +program 1 +fall 1 +commun 1 +network 1 +formal 1 +model 1 +winter 1 +data 1 +structur 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..d474dbad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,112 @@ +larri 6 +washington 4 +comput 3 +univers 3 +laboratori 3 +chemistri 3 +work 3 +integr 3 +system 3 +fpga 3 +mcmurchi 2 +home 2 +page 2 +scienc 2 +area 2 +function 2 +staff 2 +design 2 +develop 2 +mactest 2 +chip 2 +mcmurchiedepart 1 +engin 1 +ofwashington 1 +seattl 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +current 1 +research 1 +director 1 +integratedsystem 1 +western 1 +hework 1 +quantum 1 +graduat 1 +studi 1 +primari 1 +focu 1 +number 1 +evalu 1 +class 1 +ofintegr 1 +gaussian 1 +later 1 +appli 1 +theconstruct 1 +larg 1 +spars 1 +hamiltonian 1 +matric 1 +coauthorof 1 +comprehens 1 +packag 1 +program 1 +meld 1 +us 1 +abinitio 1 +calcul 1 +small 1 +molecul 1 +sinc 1 +join 1 +depart 1 +andengin 1 +supervis 1 +technic 1 +ofth 1 +coauthor 1 +wirec 1 +aschemat 1 +captur 1 +allow 1 +code 1 +withschemat 1 +symbol 1 +creat 1 +concis 1 +parameteriz 1 +representationof 1 +also 1 +involv 1 +andcommerci 1 +softwar 1 +hardwareenviron 1 +test 1 +board 1 +andsubsystem 1 +recent 1 +andha 1 +gener 1 +purpos 1 +perform 1 +driven 1 +router 1 +northwest 1 +cost 1 +vlsi 1 +tester 1 +triptych 1 +high 1 +densiti 1 +architectur 1 +public 1 +journal 1 +articl 1 +upcom 1 +confer 1 +return 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..3c9e8110 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,207 @@ +softwar 10 +comput 8 +system 8 +leveson 7 +safeti 7 +model 6 +professor 4 +research 4 +specif 4 +aircraft 4 +analysi 4 +design 4 +engin 4 +human 4 +nanci 3 +recent 3 +project 3 +member 3 +inform 3 +paper 3 +avail 3 +machin 3 +languag 3 +gener 3 +interfac 3 +scienc 2 +washington 2 +california 2 +year 2 +area 2 +build 2 +real 2 +life 2 +topic 2 +student 2 +requir 2 +tca 2 +collis 2 +avoid 2 +anyth 2 +current 2 +work 2 +specifi 2 +includ 2 +safe 2 +fault 2 +nation 2 +council 2 +space 2 +appli 2 +rsml 2 +deriv 2 +finger 2 +home 1 +page 1 +levesondepart 1 +engineeringunivers 1 +washingtonbox 1 +express 1 +mail 1 +sieg 1 +hall 1 +seattl 1 +join 1 +faculti 1 +come 1 +search 1 +rain 1 +receiv 1 +degre 1 +mathand 1 +ucla 1 +spent 1 +form 1 +univers 1 +irvin 1 +start 1 +concern 1 +problem 1 +time 1 +failur 1 +result 1 +loss 1 +properti 1 +advantag 1 +nobodi 1 +question 1 +goal 1 +except 1 +misanthrop 1 +matter 1 +anywai 1 +produc 1 +aform 1 +commerci 1 +airspac 1 +lesson 1 +learn 1 +never 1 +like 1 +seem 1 +pleas 1 +though 1 +adopt 1 +theiroffici 1 +behavior 1 +claim 1 +thatyou 1 +read 1 +fact 1 +take 1 +train 1 +late 1 +safetyresearch 1 +also 1 +autom 1 +highwai 1 +automobil 1 +variou 1 +aerospac 1 +subtop 1 +toler 1 +verif 1 +valid 1 +editor 1 +chief 1 +ieee 1 +transact 1 +softwareengin 1 +elect 1 +board 1 +director 1 +computingresearch 1 +associ 1 +commissionon 1 +technic 1 +committe 1 +public 1 +polici 1 +chair 1 +studi 1 +evalu 1 +shuttl 1 +process 1 +levesoni 1 +fellow 1 +award 1 +aiaa 1 +systemsaward 1 +contribut 1 +aeronaut 1 +technolog 1 +andscienc 1 +develop 1 +field 1 +promotingrespons 1 +practic 1 +propertyar 1 +stake 1 +book 1 +safewar 1 +addison 1 +weslei 1 +publish 1 +list 1 +isalso 1 +copi 1 +favorit 1 +actual 1 +keynoteaddress 1 +conf 1 +melbourn 1 +titl 1 +high 1 +pressur 1 +steam 1 +click 1 +qual 1 +follow 1 +hazardanalysi 1 +techniqu 1 +writtenin 1 +state 1 +style 1 +call 1 +determin 1 +wai 1 +tree 1 +analys 1 +newrequir 1 +principl 1 +hazard 1 +control 1 +cockpit 1 +analyz 1 +accid 1 +report 1 +involv 1 +mode 1 +awar 1 +problemsand 1 +issu 1 +interact 1 +citi 1 +airport 1 +perhap 1 +contact 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..74f3ae50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,202 @@ +system 17 +levi 12 +oper 12 +comput 8 +architectur 7 +henri 6 +proc 6 +symposium 5 +hank 4 +distribut 4 +languag 4 +feelei 4 +support 4 +research 3 +multithread 3 +principl 3 +washington 3 +workstat 3 +student 3 +octob 3 +michael 3 +thekkath 3 +parallel 2 +simultan 2 +environ 2 +recent 2 +singl 2 +address 2 +space 2 +paper 2 +chair 2 +program 2 +dessert 2 +global 2 +memori 2 +karlin 2 +implement 2 +dean 2 +tullsen 2 +susan 2 +egger 2 +intern 2 +jeffrei 2 +chase 2 +lazowska 2 +novemb 2 +conf 2 +arch 2 +prog 2 +asplo 2 +home 1 +page 1 +professor 1 +join 1 +faculti 1 +current 1 +project 1 +focu 1 +particularli 1 +object 1 +base 1 +projectcal 1 +opal 1 +deal 1 +theetch 1 +projecti 1 +produc 1 +tool 1 +perform 1 +instrument 1 +optim 1 +binari 1 +execut 1 +author 1 +book 1 +numer 1 +includ 1 +outstand 1 +select 1 +four 1 +consecutiveacm 1 +symposia 1 +former 1 +sigop 1 +special 1 +interest 1 +group 1 +onoper 1 +theth 1 +tobe 1 +held 1 +hold 1 +carnegi 1 +mellon 1 +universityand 1 +univers 1 +come 1 +consult 1 +engin 1 +digit 1 +equip 1 +corpor 1 +work 1 +span 1 +rang 1 +fellow 1 +associ 1 +machineryand 1 +recipi 1 +fulbright 1 +scholar 1 +award 1 +eleven 1 +master 1 +nine 1 +survivedlevi 1 +supervis 1 +haveal 1 +escap 1 +academ 1 +posit 1 +major 1 +lab 1 +glu 1 +usual 1 +befound 1 +ski 1 +bike 1 +plai 1 +tenni 1 +help 1 +lead 1 +thedepart 1 +infam 1 +softbal 1 +team 1 +smile 1 +potato 1 +death 1 +sampl 1 +seattl 1 +mani 1 +parlor 1 +publicationsreduc 1 +network 1 +latenc 1 +us 1 +subpag 1 +jamrozik 1 +voelker 1 +evan 1 +vernon 1 +inproceed 1 +seventh 1 +confer 1 +postscript 1 +manag 1 +cluster 1 +william 1 +morgan 1 +freder 1 +pighin 1 +anna 1 +chandramohan 1 +appear 1 +decemb 1 +maxim 1 +chip 1 +annual 1 +june 1 +exploit 1 +choic 1 +instruct 1 +fetch 1 +issu 1 +implementablesimultan 1 +processor 1 +joen 1 +emer 1 +jack 1 +rebecca 1 +stamm 1 +share 1 +protect 1 +edwardd 1 +transact 1 +integr 1 +coher 1 +recover 1 +vivek 1 +narasayya 1 +first 1 +design 1 +hardwar 1 +softwar 1 +effici 1 +except 1 +handl 1 +separ 1 +data 1 +control 1 +transfer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..a4cb0362 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,114 @@ +opal 7 +address 5 +structur 4 +share 4 +singl 4 +space 4 +oper 3 +system 3 +protect 3 +need 2 +complex 2 +cooper 2 +program 2 +larg 2 +data 2 +time 2 +alpha 2 +page 2 +right 2 +access 2 +inform 2 +list 2 +relat 2 +projectop 1 +project 1 +explor 1 +tunedto 1 +applic 1 +numberof 1 +manipul 1 +persist 1 +databaseof 1 +object 1 +code 1 +exist 1 +huge 1 +enhanc 1 +andcooper 1 +uniqu 1 +interpret 1 +thu 1 +pointer 1 +base 1 +directlycommun 1 +bestor 1 +directli 1 +secondari 1 +storag 1 +without 1 +translat 1 +simplifi 1 +avail 1 +addressspac 1 +provid 1 +mip 1 +risc 1 +independ 1 +thread 1 +execut 1 +within 1 +domainthat 1 +defin 1 +virtual 1 +easili 1 +transmit 1 +oneprocess 1 +anoth 1 +result 1 +much 1 +flexibl 1 +protectionstructur 1 +permit 1 +differ 1 +dynam 1 +chang 1 +option 1 +depend 1 +trust 1 +relationshipbetween 1 +parti 1 +believ 1 +organ 1 +canimprov 1 +perform 1 +cooperatingappl 1 +prototyp 1 +built 1 +platform 1 +ofth 1 +mach 1 +sourc 1 +paper 1 +faculti 1 +member 1 +hank 1 +levi 1 +lazowska 1 +jeff 1 +chase 1 +duke 1 +univers 1 +current 1 +graduat 1 +student 1 +mike 1 +feelei 1 +ashutosh 1 +tiwari 1 +vivek 1 +narasayya 1 +dylan 1 +mcname 1 +mail 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..a9383e98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,139 @@ +tutori 8 +object 6 +oopsla 6 +constraint 5 +orient 5 +languag 4 +program 4 +propos 4 +washington 3 +engin 3 +imper 3 +lopez 3 +bjorn 3 +freeman 3 +benson 3 +alan 3 +born 3 +confer 3 +accept 3 +comput 2 +scienc 2 +univers 2 +implement 2 +kaleidoscop 2 +advanc 2 +also 2 +inproceed 2 +chair 2 +technolog 2 +topic 2 +encourag 2 +submiss 2 +electron 2 +lopezgu 1 +lopezlopez 1 +school 1 +sieg 1 +hall 1 +depart 1 +seattl 1 +home 1 +student 1 +dissertationresearch 1 +design 1 +curriculum 1 +vita 1 +publicationsgu 1 +brian 1 +mayoh 1 +tougu 1 +jann 1 +penjam 1 +editor 1 +constraintprogram 1 +springer 1 +verlag 1 +nato 1 +studi 1 +instituteseri 1 +seri 1 +system 1 +publisheda 1 +technic 1 +report 1 +ident 1 +european 1 +bologna 1 +itali 1 +juli 1 +virtual 1 +machin 1 +programmingsystem 1 +applic 1 +portland 1 +oregon 1 +octob 1 +tutorialsi 1 +upcom 1 +conferencein 1 +jose 1 +california 1 +peopl 1 +andsoftwar 1 +develop 1 +meet 1 +speak 1 +well 1 +known 1 +breadth 1 +depth 1 +high 1 +qualiti 1 +itsextens 1 +previou 1 +year 1 +tutorialshav 1 +cover 1 +aspect 1 +introductorysurvei 1 +industri 1 +softwar 1 +practic 1 +lead 1 +edg 1 +academicresearch 1 +respons 1 +request 1 +past 1 +attende 1 +weespeci 1 +issu 1 +anyon 1 +consid 1 +submit 1 +requestguidelin 1 +theoopsla 1 +hotlin 1 +mail 1 +enthusiast 1 +proposalswithout 1 +email 1 +address 1 +march 1 +notif 1 +withcamera 1 +readi 1 +note 1 +august 1 +interest 1 +link 1 +green 1 +direct 1 +jimi 1 +hendrix 1 +grave 1 +star 1 +war 1 +collector 1 +archiv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..d4390a02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,40 @@ +omid 2 +home 2 +madani 2 +washington 2 +depart 2 +page 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +chateau 1 +suit 1 +bhello 1 +curiou 1 +browser 1 +welcom 1 +fourth 1 +year 1 +graduat 1 +student 1 +enjoytheori 1 +also 1 +like 1 +keep 1 +touch 1 +areasinclud 1 +graphic 1 +life 1 +work 1 +academ 1 +want 1 +look 1 +islamicarchitectur 1 +isfahan 1 +best 1 +nomine 1 +citi 1 +countri 1 +iran 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..39e5978f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,18 @@ +mike 3 +perkowitz 2 +page 1 +perkowitznewsflash 1 +goe 1 +blond 1 +area 1 +research 1 +academia 1 +music 1 +creativ 1 +randomfavorit 1 +sheba 1 +voyeur 1 +written 1 +grooveneedl 1 +espressoresumemik 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..05a25691 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,64 @@ +washington 5 +langheinrich 3 +bielefeld 3 +univers 3 +depart 3 +comput 3 +marclang 3 +homepagemarc 2 +email 2 +scienc 2 +phone 2 +marc 1 +langheinrichuniversitt 1 +washingtontechnisch 1 +fakultt 1 +scienceemail 1 +imlangh 1 +techfak 1 +eduabout 1 +myselfi 1 +spent 1 +last 1 +year 1 +theunivers 1 +visit 1 +graduat 1 +student 1 +thefulbright 1 +program 1 +check 1 +follow 1 +link 1 +depthinform 1 +resum 1 +project 1 +short 1 +biopost 1 +addressa 1 +septemb 1 +back 1 +germani 1 +finish 1 +mastersat 1 +pleas 1 +contact 1 +german 1 +address 1 +homeschoolgermanyringstra 1 +maintalphon 1 +paulusplatz 1 +bielefeldphon 1 +woodlawn 1 +seattl 1 +sieg 1 +hall 1 +browser 1 +support 1 +tabl 1 +access 1 +data 1 +list 1 +formatmarc 1 +http 1 +home 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..9726fb74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,109 @@ +visual 9 +marla 8 +baker 7 +comput 7 +softwar 5 +program 5 +languag 5 +work 4 +eick 4 +washington 3 +stephen 3 +system 3 +engin 2 +user 2 +interfac 2 +interact 2 +educ 2 +collabor 2 +current 2 +object 2 +wai 2 +journal 2 +burnett 2 +larg 2 +home 1 +chief 1 +editor 1 +depart 1 +scienc 1 +univers 1 +seattl 1 +partner 1 +crime 1 +bentlei 1 +academ 1 +interestsgraph 1 +human 1 +support 1 +learn 1 +cscl 1 +graphic 1 +techniqu 1 +stevetanimoto 1 +lauren 1 +bricker 1 +coimag 1 +project 1 +devleop 1 +activ 1 +order 1 +explor 1 +cooper 1 +contol 1 +goal 1 +investig 1 +differ 1 +multipl 1 +cansimultan 1 +share 1 +manipul 1 +given 1 +assess 1 +also 1 +part 1 +time 1 +packard 1 +bell 1 +compani 1 +resum 1 +publicationsbak 1 +space 1 +fill 1 +june 1 +bohu 1 +carlson 1 +yang 1 +scale 1 +ieee 1 +special 1 +issu 1 +march 1 +margaret 1 +classif 1 +septemb 1 +tool 1 +proceed 1 +intern 1 +confer 1 +sorento 1 +itali 1 +method 1 +apparatu 1 +displai 1 +hierarch 1 +inform 1 +patent 1 +applic 1 +submit 1 +octob 1 +tutori 1 +geometr 1 +transform 1 +imag 1 +metip 1 +environ 1 +check 1 +page 1 +offic 1 +sieg 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..25c371e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,87 @@ +compil 6 +philipos 4 +dynam 4 +runtim 4 +matthai 3 +work 3 +chamber 3 +home 2 +page 2 +code 2 +like 2 +washington 2 +seattl 2 +ausland 2 +egger 2 +support 2 +system 2 +stuff 2 +project 1 +compileri 1 +beast 1 +gener 1 +optim 1 +shortterm 1 +interest 1 +figur 1 +produc 1 +good 1 +modern 1 +processor 1 +architectur 1 +applic 1 +side 1 +think 1 +interpret 1 +basedsystem 1 +real 1 +time 1 +constraint 1 +java 1 +browser 1 +canbenefit 1 +select 1 +wire 1 +asystem 1 +goe 1 +withprofessor 1 +susan 1 +eggersand 1 +craig 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +phone 1 +public 1 +bershad 1 +fast 1 +effect 1 +confer 1 +onprogram 1 +languag 1 +design 1 +implement 1 +mock 1 +andp 1 +pardyak 1 +automaticdynam 1 +event 1 +dispatch 1 +extens 1 +workshop 1 +softwar 1 +februari 1 +bookmark 1 +plai 1 +frequentlymiscellan 1 +link 1 +local 1 +importancefrom 1 +past 1 +abuwhi 1 +black 1 +blue 1 +ribbon 1 +campaign 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..1e14a64d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,174 @@ +mckenzi 7 +neil 4 +rout 4 +link 4 +page 3 +project 3 +current 3 +inform 3 +east 3 +seattl 3 +larri 3 +us 3 +contact 2 +past 2 +public 2 +person 2 +mail 2 +merl 2 +coast 2 +time 2 +carl 2 +chaotic 2 +design 2 +implement 2 +messag 2 +pass 2 +interfac 2 +network 2 +tool 2 +gemini 2 +washington 2 +comput 2 +last 2 +updat 2 +angel 2 +shot 2 +menu 1 +fine 1 +dine 1 +index 1 +futur 1 +list 1 +game 1 +mitsubishi 1 +electr 1 +research 1 +laboratori 1 +broadwai 1 +floor 1 +cambridg 1 +phone 1 +projectsgonna 1 +teenag 1 +lobotomi 1 +ramonesi 1 +live 1 +mile 1 +andwork 1 +note 1 +involv 1 +projectconcern 1 +real 1 +volum 1 +render 1 +medic 1 +data 1 +copiou 1 +free 1 +expatri 1 +graduat 1 +student 1 +work 1 +onchaot 1 +routingwith 1 +faculti 1 +advisor 1 +ebel 1 +snyder 1 +packet 1 +algorithm 1 +mesh 1 +torusnetwork 1 +dissert 1 +thecranium 1 +compatiblewith 1 +projectsi 1 +teach 1 +assist 1 +summer 1 +chip 1 +tester 1 +call 1 +mactest 1 +maintain 1 +netlist 1 +graph 1 +isomorph 1 +calledgemini 1 +industri 1 +speak 1 +knowna 1 +layout 1 +schemat 1 +avail 1 +interest 1 +pleas 1 +send 1 +mcmurchi 1 +cranium 1 +adapt 1 +packetrout 1 +proceed 1 +parallel 1 +andcommun 1 +workshop 1 +tomactest 1 +home 1 +user 1 +guid 1 +march 1 +marri 1 +pictur 1 +hous 1 +arlington 1 +massachusett 1 +head 1 +livein 1 +fashion 1 +neighborhood 1 +ofballard 1 +creativ 1 +contain 1 +exampl 1 +artworkcr 1 +adob 1 +photoshop 1 +ownedthi 1 +year 1 +onlyth 1 +memori 1 +remain 1 +shirt 1 +correctlyguess 1 +answer 1 +toriddl 1 +jour 1 +octob 1 +label 1 +place 1 +jar 1 +countri 1 +farm 1 +honei 1 +produc 1 +myuncl 1 +edmonton 1 +alberta 1 +canada 1 +amus 1 +linkschairman 1 +linksnorm 1 +gregori 1 +bookmark 1 +halcyon 1 +eugen 1 +spafford 1 +purdu 1 +randi 1 +pausch 1 +virginia 1 +wallach 1 +scool 1 +princeton 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..78d39573 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,151 @@ +system 11 +oper 8 +extens 7 +washington 6 +spin 5 +paper 5 +protocol 4 +applic 4 +marc 3 +fiuczynski 3 +comput 3 +scienc 3 +engin 3 +univers 3 +work 3 +network 3 +interfac 3 +safe 3 +architectur 3 +compar 3 +perform 3 +describ 3 +proceed 3 +softwar 3 +home 2 +page 2 +depart 2 +spent 2 +creat 2 +telephon 2 +us 2 +forappl 2 +specif 2 +achiev 2 +run 2 +demonstr 2 +servic 2 +http 2 +request 2 +contacthttp 2 +design 2 +implement 2 +kernel 2 +appear 2 +languag 2 +shortcom 2 +dynam 2 +protect 2 +ieee 2 +analysi 2 +seattl 1 +backgroundi 1 +graduat 1 +student 1 +grewup 1 +germani 1 +near 1 +sseldorf 1 +year 1 +highschool 1 +princeton 1 +receiv 1 +fromrutg 1 +sever 1 +summer 1 +bell 1 +lab 1 +mitr 1 +corpor 1 +rang 1 +ofproject 1 +sole 1 +proprietor 1 +companythat 1 +decemb 1 +sell 1 +distribut 1 +fault 1 +toler 1 +base 1 +built 1 +scratch 1 +setof 1 +chasi 1 +processor 1 +univoic 1 +cardsand 1 +vxwork 1 +time 1 +spend 1 +hack 1 +adapt 1 +primari 1 +contribut 1 +compellingperform 1 +improv 1 +structur 1 +tosimilar 1 +commerci 1 +platform 1 +recent 1 +report 1 +anextens 1 +allow 1 +anyon 1 +custom 1 +anin 1 +graph 1 +enabl 1 +betterperform 1 +similar 1 +conventionaloper 1 +winter 1 +usenix 1 +technicalconfer 1 +safeti 1 +fifteenth 1 +symposium 1 +principl 1 +support 1 +pretti 1 +happi 1 +deal 1 +inord 1 +describeshow 1 +address 1 +link 1 +linker 1 +load 1 +code 1 +point 1 +isth 1 +abil 1 +manag 1 +linkabl 1 +namespac 1 +andcollect 1 +issu 1 +posit 1 +hardwar 1 +mechan 1 +fifth 1 +workshop 1 +topic 1 +region 1 +parallel 1 +elimin 1 +method 1 +data 1 +flow 1 +transact 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..8805eced --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,31 @@ +rout 5 +minim 2 +algorithm 2 +model 2 +deflect 2 +melani 1 +fulgham 1 +comput 1 +scienc 1 +versu 1 +method 1 +develop 1 +help 1 +predict 1 +compar 1 +perform 1 +router 1 +real 1 +parallel 1 +machin 1 +upper 1 +lower 1 +bound 1 +practic 1 +requir 1 +sort 1 +mesh 1 +topolog 1 +washington 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..c5f20ad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,64 @@ +washington 3 +comput 3 +singapor 3 +meng 2 +univers 2 +imag 2 +work 2 +heng 1 +homepag 1 +homepagemenghe 1 +edubox 1 +depart 1 +scienc 1 +engineeringunivers 1 +washingtonseattl 1 +second 1 +year 1 +student 1 +scienceat 1 +undergrad 1 +pennsylvania 1 +research 1 +interestsi 1 +interest 1 +retriev 1 +problem 1 +try 1 +findimag 1 +huge 1 +databas 1 +virag 1 +andqbicar 1 +commerci 1 +exampl 1 +similar 1 +kind 1 +stuff 1 +snapshot 1 +done 1 +singaporesingapor 1 +infomap 1 +provid 1 +fact 1 +andstatist 1 +singaporeonlin 1 +guid 1 +plan 1 +take 1 +trip 1 +nation 1 +boardi 1 +charg 1 +transform 1 +anintellig 1 +island 1 +graduat 1 +strait 1 +time 1 +main 1 +english 1 +newspap 1 +visit 1 +sinc 1 +menghe 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..743c84ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,70 @@ +comput 4 +includ 3 +program 3 +michael 2 +ernst 2 +graduat 2 +student 2 +washington 2 +scienc 2 +depart 2 +research 2 +page 2 +workshop 2 +maintain 2 +home 1 +pagemichael 1 +ernsti 1 +univers 1 +previous 1 +lectur 1 +riceunivers 1 +sciencedepart 1 +programanalysi 1 +group 1 +microsoft 1 +laboratori 1 +eec 1 +frequent 1 +updat 1 +technic 1 +interest 1 +compil 1 +static 1 +analysi 1 +slice 1 +debug 1 +optim 1 +code 1 +serial 1 +parallel 1 +chair 1 +intermedi 1 +represent 1 +coloc 1 +popl 1 +intellectu 1 +properti 1 +particularli 1 +area 1 +game 1 +theori 1 +cryptographi 1 +philosophi 1 +denot 1 +semanticsi 1 +list 1 +resourcesfor 1 +confer 1 +organ 1 +occasion 1 +manag 1 +slip 1 +awai 1 +work 1 +carri 1 +real 1 +life 1 +link 1 +possibleinterest 1 +mernst 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..af82260c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,124 @@ +page 7 +home 5 +link 5 +mock 4 +comput 4 +scienc 3 +interest 3 +chess 3 +marku 2 +washington 2 +come 2 +compil 2 +current 2 +time 2 +stuff 2 +live 2 +search 2 +rttemberg 1 +grew 1 +anotherpart 1 +state 1 +district 1 +biberach 1 +upper 1 +swabia 1 +oberschwaben 1 +studi 1 +univers 1 +karlsruh 1 +whichi 1 +obtain 1 +diplom 1 +also 1 +spent 1 +year 1 +umass 1 +fulbright 1 +grante 1 +main 1 +research 1 +parallel 1 +distribut 1 +object 1 +orient 1 +system 1 +work 1 +dynam 1 +includ 1 +spanish 1 +latin 1 +american 1 +cultur 1 +travel 1 +good 1 +book 1 +labyrinth 1 +solitud 1 +mainstream 1 +movi 1 +dieangst 1 +torwart 1 +beim 1 +elfmet 1 +know 1 +handk 1 +salsa 1 +merengu 1 +danc 1 +still 1 +left 1 +check 1 +els 1 +seattl 1 +publicationssepar 1 +list 1 +event 1 +olympiad 1 +yerewan 1 +colloquia 1 +oopsla 1 +volunt 1 +mossi 1 +bit 1 +grad 1 +journal 1 +csek 1 +csebi 1 +cse 1 +cours 1 +graduat 1 +studentsimag 1 +depart 1 +electr 1 +engineeringy 1 +wouldn 1 +expect 1 +squar 1 +view 1 +metacrawl 1 +altavista 1 +deutsch 1 +well 1 +realaudio 1 +cool 1 +linksand 1 +quot 1 +consid 1 +lili 1 +field 1 +grow 1 +toil 1 +neither 1 +spin 1 +unto 1 +even 1 +solomon 1 +glorywa 1 +arrai 1 +like 1 +matthew 1 +access 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..085fdf50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,15 @@ +vivek 2 +narasayya 2 +washington 2 +home 1 +page 1 +nara 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +person 1 +informationresearch 1 +interestspap 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..c5f6839a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,69 @@ +himanshu 3 +nautiy 3 +comput 2 +scienc 2 +engin 2 +washington 2 +offic 2 +name 2 +india 2 +internet 2 +home 1 +pagehimanshu 1 +nautiyalthi 1 +page 1 +heavi 1 +construct 1 +nautiyaldept 1 +mail 1 +stop 1 +univers 1 +seattl 1 +sieg 1 +phone 1 +cours 1 +take 1 +autumn 1 +quarter 1 +principl 1 +digit 1 +system 1 +design 1 +artifici 1 +intellig 1 +finger 1 +edugod 1 +gift 1 +personkind 1 +dougla 1 +adam 1 +terri 1 +pratchett 1 +pelham 1 +grenvil 1 +wodehouseth 1 +order 1 +alphabet 1 +last 1 +impli 1 +favorit 1 +link 1 +place 1 +radio 1 +search 1 +friend 1 +delhi 1 +finish 1 +tech 1 +astronomi 1 +skate 1 +aviat 1 +travel 1 +numismat 1 +sound 1 +much 1 +profound 1 +coin 1 +collect 1 +cook 1 +movi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..97ecd747 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,90 @@ +avail 3 +need 3 +technolog 2 +labyrinth 1 +mediocr 1 +bemoan 1 +hype 1 +skeptic 1 +cynic 1 +sinc 1 +research 1 +current 1 +work 1 +automaticconstruct 1 +wrapper 1 +inform 1 +resourc 1 +alsointerest 1 +sever 1 +area 1 +artifici 1 +intellig 1 +andcognit 1 +scienc 1 +paper 1 +beeninvolv 1 +stuff 1 +anonym 1 +servic 1 +provid 1 +glbal 1 +infrmatin 1 +sperhighwai 1 +preliminari 1 +version 1 +divers 1 +meter 1 +pictur 1 +hand 1 +shortli 1 +surgeri 1 +random 1 +number 1 +alwai 1 +handi 1 +know 1 +date 1 +time 1 +week 1 +favorit 1 +color 1 +line 1 +lost 1 +easili 1 +return 1 +page 1 +ronald 1 +wilson 1 +reagan 1 +temperatur 1 +look 1 +javascript 1 +enabl 1 +browser 1 +automat 1 +send 1 +mail 1 +great 1 +republican 1 +tell 1 +like 1 +miscellani 1 +contact 1 +bookmark 1 +societi 1 +awar 1 +bitter 1 +ironi 1 +involv 1 +nonetheless 1 +madeavail 1 +wendel 1 +berri 1 +guidelin 1 +constitutesgood 1 +comment 1 +nichola 1 +kushmerick 1 +uwcs 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..1665dc4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,75 @@ +page 6 +inform 5 +ohlrich 4 +wayn 3 +research 3 +memori 3 +home 3 +us 3 +washington 2 +univers 2 +seattl 2 +work 2 +leveson 2 +bershad 2 +karlin 2 +romer 2 +project 2 +isca 2 +contain 2 +invest 2 +depart 1 +comput 1 +scienc 1 +engin 1 +offic 1 +guggenheim 1 +annex 1 +chateau 1 +public 1 +current 1 +nanci 1 +model 1 +check 1 +automat 1 +test 1 +gener 1 +specif 1 +spare 1 +time 1 +brian 1 +anna 1 +perform 1 +analysi 1 +local 1 +known 1 +sever 1 +damag 1 +group 1 +paper 1 +make 1 +debut 1 +itali 1 +summer 1 +safeti 1 +sytem 1 +reduc 1 +overhead 1 +onlin 1 +superpag 1 +promot 1 +class 1 +cours 1 +person 1 +interest 1 +game 1 +world 1 +wonder 1 +sort 1 +link 1 +found 1 +creat 1 +octob 1 +last 1 +modifi 1 +march 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..94233c4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,86 @@ +gershoni 4 +year 3 +washington 2 +graduat 2 +scienc 2 +sieg 2 +seattl 2 +univers 2 +live 2 +california 2 +berkelei 2 +israel 2 +class 2 +comput 2 +offic 2 +summer 2 +time 2 +studentcomput 1 +engin 1 +departmentunivers 1 +washingtonoffic 1 +home 1 +second 1 +student 1 +ofwashington 1 +move 1 +seven 1 +fouryear 1 +three 1 +lancast 1 +northeast 1 +angel 1 +origin 1 +haifa 1 +holon 1 +like 1 +practic 1 +kwon 1 +plai 1 +basketbal 1 +hike 1 +quarter 1 +take 1 +whole 1 +bunch 1 +seminar 1 +amta 1 +architectur 1 +usual 1 +find 1 +hour 1 +aremondai 1 +wednesdai 1 +potenti 1 +employ 1 +welcom 1 +look 1 +resum 1 +pictur 1 +took 1 +last 1 +click 1 +tose 1 +cool 1 +shirt 1 +design 1 +made 1 +graphicsprogram 1 +call 1 +virtual 1 +realiti 1 +interest 1 +link 1 +daili 1 +new 1 +summari 1 +york 1 +riderlink 1 +seattletransport 1 +option 1 +inform 1 +mathemat 1 +depart 1 +access 1 +sinc 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..8ca98468 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,78 @@ +tequila 4 +ortega 3 +ross 2 +washington 2 +would 2 +work 2 +chinook 2 +project 2 +color 2 +wear 1 +jean 1 +depart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +knew 1 +call 1 +research 1 +albert 1 +einstein 1 +welcom 1 +home 1 +pageaft 1 +go 1 +school 1 +boston 1 +year 1 +decid 1 +head 1 +west 1 +realli 1 +northwest 1 +came 1 +fall 1 +leav 1 +sometim 1 +accord 1 +advisor 1 +gaetano 1 +borriello 1 +offici 1 +tool 1 +real 1 +time 1 +embed 1 +control 1 +system 1 +unoffici 1 +brew 1 +beer 1 +learn 1 +hack 1 +try 1 +teach 1 +german 1 +shepherd 1 +behav 1 +profession 1 +section 1 +myresum 1 +file 1 +educ 1 +experi 1 +public 1 +paper 1 +puppi 1 +pictur 1 +offic 1 +sieg 1 +check 1 +page 1 +link 1 +find 1 +interest 1 +last 1 +updatedthu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..5cdb1eb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,121 @@ +seattl 4 +weather 3 +comput 2 +washington 2 +show 2 +beth 2 +pardo 2 +sometim 2 +work 2 +item 2 +site 2 +think 2 +courtesei 2 +also 2 +untitl 1 +document 1 +flat 1 +morri 1 +minor 1 +pardodepart 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +washingtonusapardo 1 +edunot 1 +blue 1 +ribbon 1 +asimgsrc 1 +http 1 +graphic 1 +icon 1 +blueribbon 1 +rib_trn_plain_sm 1 +quiet 1 +opposit 1 +free 1 +speechprohibit 1 +nation 1 +telecommun 1 +bill 1 +likewis 1 +white 1 +letter 1 +black 1 +background 1 +everybodi 1 +need 1 +around 1 +share 1 +academicsom 1 +papersi 1 +find 1 +particularli 1 +interest 1 +runtim 1 +code 1 +gener 1 +rtcg 1 +instruct 1 +simul 1 +trace 1 +tool 1 +home 1 +page 1 +oncomput 1 +architectureandcompil 1 +quick 1 +link 1 +otherpeopl 1 +class 1 +thesi 1 +stylenon 1 +academicfeatur 1 +featur 1 +month 1 +weak 1 +week 1 +doesn 1 +blink 1 +anymor 1 +regular 1 +itemsbicyclesbusinessescomputersfoodhumori 1 +famou 1 +thing 1 +relat 1 +legal 1 +ethic 1 +weirdnesslinux 1 +journalmusicgoofi 1 +politicssci 1 +though 1 +unrel 1 +stuff 1 +transport 1 +movi 1 +list 1 +film 1 +festiv 1 +dant 1 +search 1 +truli 1 +gross 1 +stori 1 +trepan 1 +privaci 1 +log 1 +mail 1 +address 1 +wors 1 +take 1 +data 1 +disk 1 +everi 1 +time 1 +consid 1 +particular 1 +newhous 1 +newspap 1 +courtesi 1 +yesterdai 1 +stuffpardo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..ed88d4fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,88 @@ +washington 4 +research 4 +system 4 +school 3 +seattl 3 +relat 3 +pardyak 2 +home 2 +pardi 2 +grad 2 +year 2 +comput 2 +scienc 2 +also 2 +languag 2 +drizzl 2 +interest 2 +group 2 +mechan 2 +object 2 +univers 2 +phone 2 +przemek 1 +page 1 +przemyslaw 1 +first 1 +week 1 +coupl 1 +later 1 +third 1 +graduat 1 +student 1 +theunivers 1 +current 1 +area 1 +ofoper 1 +interast 1 +distribut 1 +compil 1 +besid 1 +life 1 +fill 1 +withth 1 +hike 1 +outdoor 1 +activ 1 +notbusi 1 +enjoi 1 +book 1 +music 1 +find 1 +short 1 +descript 1 +resum 1 +list 1 +paper 1 +outdat 1 +happenswhen 1 +busi 1 +schedul 1 +projectsspinan 1 +extens 1 +oper 1 +built 1 +gloriou 1 +leadership 1 +brian 1 +bershad 1 +base 1 +systemsgroup 1 +commun 1 +emerald 1 +basedprogram 1 +time 1 +link 1 +polish 1 +connect 1 +variou 1 +resourc 1 +somehow 1 +poland 1 +project 1 +mine 1 +unrel 1 +miscellan 1 +work 1 +engin 1 +depart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..36aa5717 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,167 @@ +year 8 +bike 6 +davi 4 +student 3 +us 3 +school 3 +taught 3 +prolog 3 +chri 3 +html 3 +franklin 2 +home 2 +graduat 2 +depart 2 +comput 2 +scienc 2 +work 2 +first 2 +second 2 +seattl 2 +dai 2 +around 2 +high 2 +time 2 +engin 2 +univers 2 +paul 1 +pagepaul 1 +pagei 1 +current 1 +univ 1 +washington 1 +inth 1 +offici 1 +myoffic 1 +sieg 1 +rapid 1 +project 1 +thesumm 1 +someon 1 +express 1 +concern 1 +aboutthi 1 +character 1 +usual 1 +somewher 1 +north 1 +iliv 1 +anoth 1 +pictur 1 +best 1 +oneof 1 +scan 1 +better 1 +on 1 +norwegian 1 +poem 1 +likea 1 +collect 1 +fortun 1 +receiv 1 +friend 1 +localchines 1 +restaur 1 +mundan 1 +stuffi 1 +hope 1 +folk 1 +might 1 +find 1 +stuff 1 +hotlink 1 +pagesstuff 1 +maintainmi 1 +schedul 1 +rememb 1 +updat 1 +contact 1 +love 1 +travel 1 +necessarili 1 +tell 1 +everyon 1 +hire 1 +mewher 1 +come 1 +diploma 1 +live 1 +inmorgan 1 +hill 1 +fromuc 1 +andy 1 +glad 1 +ididn 1 +bergen 1 +ialso 1 +research 1 +professor 1 +electr 1 +andcomput 1 +stuffwhil 1 +partner 1 +variou 1 +relatedact 1 +tend 1 +matt 1 +jame 1 +evengot 1 +togeth 1 +recent 1 +marriag 1 +joann 1 +anexcus 1 +brother 1 +also 1 +made 1 +itin 1 +photo 1 +throughout 1 +undergradu 1 +kept 1 +never 1 +flat 1 +exchang 1 +ofbergen 1 +hillier 1 +longer 1 +rout 1 +returnedto 1 +took 1 +rollerblad 1 +sinc 1 +town 1 +wasnow 1 +easi 1 +drop 1 +hewlettpackard 1 +return 1 +vengeanc 1 +move 1 +toseattl 1 +done 1 +annual 1 +portland 1 +ride 1 +intwo 1 +inseason 1 +march 1 +april 1 +june 1 +rest 1 +justcommut 1 +lot 1 +danc 1 +particularli 1 +lindyhop 1 +know 1 +everi 1 +document 1 +header 1 +linethat 1 +look 1 +someth 1 +like 1 +doctyp 1 +public 1 +ietf 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..1255adde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,98 @@ +pighin 3 +anna 3 +karlin 3 +might 3 +like 2 +rain 2 +live 2 +action 2 +pictur 2 +refresh 2 +everi 2 +minut 2 +note 2 +frdric 1 +washington 1 +comput 1 +scienc 1 +lcommun 1 +wonder 1 +citi 1 +tourist 1 +quarter 1 +ta 1 +taught 1 +gui 1 +found 1 +much 1 +often 1 +dani 1 +qual 1 +report 1 +rest 1 +british 1 +movi 1 +monti 1 +python 1 +swim 1 +nick 1 +cave 1 +corto 1 +maltes 1 +italian 1 +comic 1 +cat 1 +pari 1 +berlin 1 +venis 1 +simpson 1 +mpeg 1 +surpris 1 +traditionn 1 +french 1 +marin 1 +song 1 +collect 1 +otherwis 1 +work 1 +graphic 1 +supervis 1 +although 1 +formerli 1 +studi 1 +systemher 1 +name 1 +paper 1 +implement 1 +global 1 +memori 1 +manag 1 +workstat 1 +cluster 1 +michael 1 +feelei 1 +william 1 +morgan 1 +freder 1 +henri 1 +levi 1 +chandramohan 1 +thekkath 1 +proceed 1 +symposium 1 +oper 1 +system 1 +principl 1 +decemb 1 +postscript 1 +breath 1 +take 1 +door 1 +lucki 1 +even 1 +look 1 +darren 1 +juan 1 +dark 1 +squar 1 +five 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..923a0c98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,14 @@ +ruth 2 +anderson 2 +washington 2 +home 1 +page 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +wxyc 1 +map 1 +brother 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..4865931c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,12 @@ +home 3 +washington 3 +redston 3 +josh 1 +page 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +joshua 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..64583f6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,162 @@ +jakobovit 4 +scienc 3 +univers 3 +home 2 +washington 2 +comput 2 +seattl 2 +live 2 +imag 2 +camera 2 +research 2 +base 2 +manag 2 +program 2 +build 2 +databas 2 +inform 2 +professor 2 +psycholog 2 +hawaii 2 +onlin 2 +librari 2 +page 1 +engin 1 +depart 1 +usai 1 +get 1 +departmentof 1 +ofwashington 1 +wonder 1 +citi 1 +alwai 1 +rain 1 +thisup 1 +date 1 +weatherreport 1 +sneak 1 +peek 1 +video 1 +mount 1 +outsid 1 +point 1 +beauti 1 +drumhel 1 +fountain 1 +clear 1 +youcan 1 +catch 1 +glimps 1 +rainier 1 +glori 1 +probabl 1 +cleardai 1 +nice 1 +color 1 +pictur 1 +develop 1 +repositori 1 +toolkit 1 +multi 1 +media 1 +consol 1 +construct 1 +part 1 +astructur 1 +framework 1 +brain 1 +map 1 +knowledg 1 +support 1 +digit 1 +anatomist 1 +line 1 +interact 1 +atla 1 +human 1 +bodi 1 +implement 1 +databaseenviron 1 +vision 1 +local 1 +expert 1 +persistentprogram 1 +languag 1 +interest 1 +els 1 +proud 1 +creator 1 +internetracquetbal 1 +ladder 1 +taught 1 +advanc 1 +extens 1 +wrote 1 +perl 1 +script 1 +rotisseriebasebal 1 +leagu 1 +stand 1 +updat 1 +daili 1 +stat 1 +fromusa 1 +todai 1 +rais 1 +happi 1 +famili 1 +africancichlid 1 +visit 1 +town 1 +honolulu 1 +everi 1 +chanc 1 +camp 1 +magic 1 +kalalau 1 +vallei 1 +movi 1 +gambl 1 +stock 1 +market 1 +darn 1 +good 1 +fantasi 1 +footbal 1 +team 1 +newslet 1 +would 1 +javafamili 1 +link 1 +mydad 1 +leon 1 +jame 1 +whoi 1 +write 1 +book 1 +traffic 1 +foster 1 +polem 1 +emanuel 1 +swedenborg 1 +step 1 +dian 1 +nahl 1 +whoprovid 1 +great 1 +index 1 +judi 1 +realtor 1 +uncl 1 +eddi 1 +run 1 +site 1 +bioscienc 1 +profession 1 +bookmarksif 1 +java 1 +click 1 +drag 1 +word 1 +make 1 +poem 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..10a84c88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,11 @@ +twilight 1 +welcom 1 +galleri 1 +twenti 1 +photograph 1 +five 1 +head 1 +robert 1 +grimm 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..b90ba766 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,142 @@ +romer 10 +bershad 5 +perform 3 +chen 3 +comput 2 +washington 2 +research 2 +memori 2 +karlin 2 +ohlrich 2 +wong 2 +paper 2 +isca 2 +dynam 2 +page 2 +conflict 2 +asplo 2 +friend 2 +scientist 2 +depart 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +home 1 +offic 1 +eduoffic 1 +chateau 1 +sieg 1 +interestsi 1 +oper 1 +system 1 +supportfor 1 +high 1 +systemswith 1 +realli 1 +smart 1 +peopl 1 +likebrian 1 +brad 1 +alan 1 +eustac 1 +anna 1 +denni 1 +wayn 1 +andwayn 1 +three 1 +recent 1 +subject 1 +reduc 1 +overhead 1 +us 1 +onlinesuperpag 1 +promot 1 +map 1 +polici 1 +cach 1 +resolutionon 1 +standard 1 +hardwar 1 +osdi 1 +avoid 1 +miss 1 +larg 1 +direct 1 +mappedcach 1 +addit 1 +studi 1 +ofinterpret 1 +learn 1 +theproject 1 +rockyhom 1 +also 1 +wrote 1 +togeth 1 +structur 1 +interpret 1 +voelker 1 +wolman 1 +baer 1 +levi 1 +appear 1 +abstract 1 +postscript 1 +bibliographi 1 +lobo 1 +read 1 +listrandom 1 +stuffa 1 +hous 1 +rai 1 +limb 1 +knee 1 +arthroscop 1 +surgeri 1 +mark 1 +hill 1 +wrist 1 +dylansaid 1 +hair 1 +couldn 1 +flowbe 1 +said 1 +could 1 +beingexperiment 1 +conduct 1 +experi 1 +judg 1 +result 1 +attend 1 +travel 1 +europ 1 +took 1 +somepictur 1 +eatsomeon 1 +els 1 +food 1 +accompani 1 +sincer 1 +ration 1 +forexampl 1 +lunch 1 +thought 1 +leftth 1 +countri 1 +would 1 +didn 1 +origin 1 +unknown 1 +edward 1 +tuft 1 +tip 1 +public 1 +speak 1 +father 1 +edit 1 +american 1 +journal 1 +physic 1 +place 1 +ticker 1 +symbol 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..0c6c480c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,157 @@ +memori 10 +system 10 +washington 10 +page 7 +romer 7 +perform 6 +project 6 +superpag 6 +polici 5 +bershad 5 +univers 4 +monitor 4 +dynam 4 +overhead 4 +us 4 +cach 4 +miss 4 +research 3 +improv 3 +applic 3 +identifi 3 +size 3 +qual 3 +karlin 3 +ohlrich 3 +descript 2 +work 2 +hardwar 2 +support 2 +mechan 2 +small 2 +current 2 +cost 2 +intern 2 +fragment 2 +region 2 +construct 2 +paper 2 +reduc 2 +onlin 2 +promot 2 +isca 2 +appear 2 +algorithm 2 +wayn 2 +wong 2 +map 2 +conflict 2 +chen 2 +report 2 +washingtonmemori 1 +researchdepart 1 +comput 1 +scienc 1 +engin 1 +seattl 1 +welcom 1 +home 1 +group 1 +investig 1 +techniqu 1 +operatingsystem 1 +sharesth 1 +follow 1 +featur 1 +reli 1 +combin 1 +simpl 1 +oper 1 +modif 1 +behavior 1 +incur 1 +runtim 1 +inform 1 +collect 1 +sourc 1 +delai 1 +resolv 1 +bottleneck 1 +also 1 +significantli 1 +overal 1 +recent 1 +explor 1 +monitorappl 1 +refer 1 +pattern 1 +order 1 +resolvetlb 1 +problem 1 +poor 1 +result 1 +tlbi 1 +cover 1 +severalmodern 1 +architectur 1 +whose 1 +amultipl 1 +base 1 +tlbperform 1 +larger 1 +ofwast 1 +simul 1 +sever 1 +adapt 1 +todiffer 1 +address 1 +space 1 +constructingsuperpag 1 +copi 1 +compon 1 +contigu 1 +ofmemori 1 +develop 1 +balancesth 1 +potenti 1 +benefit 1 +reduct 1 +futur 1 +tlbmiss 1 +memorycopi 1 +misspattern 1 +warrant 1 +attain 1 +largepag 1 +without 1 +detail 1 +look 1 +someon 1 +implement 1 +would 1 +makea 1 +good 1 +master 1 +peoplefaculti 1 +brian 1 +anna 1 +student 1 +denni 1 +dlee 1 +waynew 1 +resolut 1 +standard 1 +osdi 1 +avoid 1 +larg 1 +direct 1 +asplo 1 +comparison 1 +mip 1 +alpha 1 +instruct 1 +effect 1 +differ 1 +code 1 +reorder 1 +bibliographi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..b0399398 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,96 @@ +washington 9 +interpret 7 +perform 6 +languag 6 +project 4 +architectur 3 +sever 3 +baer 3 +bershad 3 +levi 3 +romer 3 +voelker 3 +wolman 3 +rocki 2 +last 2 +environ 2 +implement 2 +collect 2 +benchmark 2 +wong 2 +etch 2 +avail 2 +intern 2 +descript 1 +becom 1 +increasingli 1 +popular 1 +year 1 +part 1 +demand 1 +portabl 1 +safeti 1 +eas 1 +examin 1 +perspect 1 +strategi 1 +processor 1 +util 1 +basi 1 +studi 1 +microbenchmark 1 +perl 1 +java 1 +mipsi 1 +us 1 +variou 1 +instrument 1 +trace 1 +techniqu 1 +evalu 1 +characterist 1 +order 1 +gain 1 +insight 1 +similar 1 +differ 1 +execut 1 +peoplefaculti 1 +jean 1 +loup 1 +brian 1 +henri 1 +student 1 +denni 1 +dlee 1 +geoff 1 +alec 1 +wayn 1 +waynew 1 +papersrom 1 +structur 1 +asplo 1 +appear 1 +abstractpostscriptjava 1 +measur 1 +xjava 1 +sourc 1 +file 1 +benchmarkstoolsto 1 +inform 1 +applic 1 +vebeen 1 +build 1 +binari 1 +rewrit 1 +tool 1 +call 1 +yetpublicli 1 +read 1 +etchhom 1 +page 1 +documentationproject 1 +document 1 +peopl 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..710ca4d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,98 @@ +scienc 6 +washington 3 +seattl 3 +system 3 +softwar 3 +richard 2 +phone 2 +intellig 2 +scam 2 +imag 2 +process 2 +comput 2 +work 2 +document 2 +also 2 +camp 2 +splash 2 +program 2 +nation 2 +roger 1 +home 1 +page 1 +rogersrrog 1 +educomput 1 +engin 1 +departmentunivers 1 +usaoffic 1 +chateau 1 +sieg 1 +laboratri 1 +research 1 +develop 1 +systol 1 +cellular 1 +arrai 1 +machin 1 +massiv 1 +parallel 1 +includ 1 +compil 1 +basic 1 +morpholog 1 +librari 1 +simul 1 +obtain 1 +paper 1 +current 1 +layout 1 +extract 1 +help 1 +produc 1 +groundtruth 1 +databas 1 +optic 1 +charact 1 +recognit 1 +commun 1 +director 1 +facil 1 +northwest 1 +center 1 +environment 1 +educ 1 +ncee 1 +offer 1 +summer 1 +student 1 +ag 1 +beauti 1 +juan 1 +island 1 +univers 1 +year 1 +long 1 +foundat 1 +fund 1 +grade 1 +minor 1 +girl 1 +area 1 +interest 1 +corn 1 +snake 1 +jessica 1 +squishi 1 +order 1 +increas 1 +length 1 +kuow 1 +public 1 +radio 1 +stationi 1 +bake 1 +best 1 +pecan 1 +seattlelast 1 +modifi 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..5662f8dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,19 @@ +mike 2 +home 2 +washington 2 +page 1 +salisburysalisbur 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +usaoffic 1 +chateau 1 +sieg 1 +lifehistori 1 +school 1 +friend 1 +vitacool 1 +stuff 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..ffa3ab67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,177 @@ +system 22 +oper 17 +proceed 10 +extens 6 +applic 6 +paper 6 +first 5 +workshop 5 +talk 4 +spin 4 +support 4 +reserv 4 +processor 4 +savag 3 +washington 3 +symposium 3 +slide 3 +softwar 3 +usenix 3 +capac 3 +multimedia 3 +time 3 +mach 3 +stefan 2 +work 2 +comput 2 +univers 2 +seattl 2 +industri 2 +bershad 2 +student 2 +american 2 +need 2 +compil 2 +wcsss 2 +tucson 2 +issu 2 +design 2 +abstract 2 +version 2 +microkernel 2 +specif 2 +servic 2 +januari 2 +tech 2 +report 2 +afraid 2 +confer 2 +real 2 +scienc 1 +engin 1 +depart 1 +home 1 +sampl 1 +rich 1 +post 1 +cultur 1 +modern 1 +pittsburghfor 1 +year 1 +caught 1 +ride 1 +migrat 1 +mnow 1 +gradual 1 +rank 1 +strongbackground 1 +centuri 1 +histori 1 +provid 1 +witha 1 +firm 1 +irrelev 1 +platform 1 +trash 1 +peer 1 +fool 1 +tocqeuvil 1 +statement 1 +find 1 +tiresom 1 +inconveni 1 +exercisepolit 1 +right 1 +distract 1 +quit 1 +similar 1 +tocurr 1 +microprocessor 1 +architectur 1 +trend 1 +favor 1 +ofappl 1 +code 1 +brian 1 +rest 1 +merri 1 +band 1 +onan 1 +project 1 +call 1 +projectsspinspin 1 +omnifemtokernel 1 +whichsupport 1 +dynam 1 +adapt 1 +interfac 1 +andimplement 1 +direct 1 +control 1 +stillmaintain 1 +integr 1 +inter 1 +isol 1 +thing 1 +writingspin 1 +safeti 1 +perform 1 +principl 1 +sosp 1 +copper 1 +mountain 1 +decemb 1 +languag 1 +write 1 +modula 1 +protect 1 +fifth 1 +topic 1 +hoto 1 +orca 1 +island 1 +implement 1 +osdi 1 +monterei 1 +novemb 1 +panel 1 +longer 1 +unpublish 1 +sixth 1 +sigop 1 +european 1 +match 1 +appear 1 +review 1 +march 1 +frequent 1 +redund 1 +arrai 1 +independ 1 +disk 1 +winter 1 +technic 1 +diego 1 +best 1 +ieee 1 +intern 1 +boston 1 +manag 1 +usag 1 +fourth 1 +workstat 1 +wwo 1 +napa 1 +octob 1 +carnegi 1 +mellon 1 +timer 1 +export 1 +user 1 +third 1 +santa 1 +april 1 +interest 1 +music 1 +hikingthi 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..d7ded55a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,9 @@ +sean 2 +sandi 2 +washington 2 +david 1 +comput 1 +scienc 1 +last 1 +revis 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..69dfaf25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,33 @@ +richard 2 +segal 2 +home 1 +page 1 +segaldepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +washingtonbox 1 +seattl 1 +washington 1 +person 1 +biographi 1 +better 1 +half 1 +famili 1 +pictur 1 +research 1 +overview 1 +brute 1 +internet 1 +softbot 1 +public 1 +curriculum 1 +vita 1 +postscript 1 +amus 1 +archeri 1 +bicycl 1 +racquetbal 1 +ski 1 +softbal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..616033cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,116 @@ +project 8 +char 6 +univers 5 +stefan 4 +washington 4 +activ 4 +els 4 +printf 4 +seattl 3 +teach 3 +assist 3 +work 2 +phone 2 +inform 2 +past 2 +current 2 +interest 2 +page 2 +cologn 2 +complet 2 +come 2 +receiv 2 +high 2 +school 2 +scienc 2 +implement 2 +comparison 2 +hardwar 2 +softwar 2 +solut 2 +fals 2 +share 2 +pictur 2 +print 2 +byte 2 +putchar 2 +main 2 +bergstefan 1 +berg 1 +home 1 +sieg 1 +hall 1 +email 1 +sgberg 1 +content 1 +address 1 +finger 1 +born 1 +germani 1 +spring 1 +mittler 1 +reif 1 +thgrade 1 +schillergymnasium 1 +unit 1 +statesto 1 +diploma 1 +bloomington 1 +north 1 +indiana 1 +bachelor 1 +honor 1 +distinctionin 1 +field 1 +comput 1 +fromindiana 1 +momenth 1 +toward 1 +expect 1 +date 1 +sometim 1 +thiscenturi 1 +reduct 1 +machin 1 +studi 1 +linear 1 +time 1 +sort 1 +algorithm 1 +qual 1 +peopl 1 +around 1 +raft 1 +bookmark 1 +excit 1 +squar 1 +moment 1 +weather 1 +yourselfsometh 1 +crazi 1 +didn 1 +even 1 +particularsolut 1 +done 1 +sall 1 +line 1 +shouldn 1 +contain 1 +trail 1 +carriag 1 +return 1 +compil 1 +without 1 +warn 1 +program 1 +exact 1 +sourc 1 +code 1 +itin 1 +fewer 1 +like 1 +resum 1 +avail 1 +inpostscript 1 +andtex 1 +format 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..5567df0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,62 @@ +thing 2 +project 2 +pictur 2 +work 2 +page 2 +ward 1 +shadegreet 1 +salut 1 +third 1 +year 1 +grad 1 +student 1 +hereat 1 +dubcs 1 +interact 1 +renderingof 1 +complex 1 +scene 1 +current 1 +follow 1 +link 1 +walkthruproject 1 +amonglot 1 +anim 1 +find 1 +siggraph 1 +paperdescrib 1 +recent 1 +shortcut 1 +click 1 +thepictur 1 +island 1 +lower 1 +left 1 +corner 1 +lot 1 +interest 1 +go 1 +mani 1 +differ 1 +aspectsof 1 +comput 1 +graphic 1 +thegraph 1 +imag 1 +laboratori 1 +get 1 +done 1 +contact 1 +info 1 +daili 1 +schedul 1 +travel 1 +plan 1 +public 1 +look 1 +scrunch 1 +make 1 +browser 1 +least 1 +pixel 1 +wide 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..262be6d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,29 @@ +shun 2 +leung 2 +depart 2 +comput 2 +scienc 2 +univers 2 +research 2 +washington 2 +leungshun 1 +student 1 +andengin 1 +ofwashington 1 +work 1 +prof 1 +johnzahorjan 1 +pointer 1 +summari 1 +public 1 +curriculum 1 +vita 1 +upon 1 +request 1 +engin 1 +seattl 1 +email 1 +shuntak 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..b887b0e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,70 @@ +shuichi 5 +koga 3 +washington 3 +graduat 3 +studi 3 +skoga 2 +haven 2 +involv 2 +asian 2 +also 2 +welcom 1 +home 1 +page 1 +studentcomput 1 +scienc 1 +engin 1 +departmentunivers 1 +name 1 +notic 1 +bynow 1 +start 1 +univers 1 +ofwashington 1 +quit 1 +figur 1 +myqual 1 +much 1 +less 1 +dissert 1 +universityof 1 +virginia 1 +degre 1 +mathemat 1 +alsoheavili 1 +foreign 1 +relat 1 +andgovern 1 +depart 1 +origin 1 +slate 1 +degreein 1 +heavili 1 +user 1 +interfac 1 +groupand 1 +comput 1 +sciencedepart 1 +work 1 +project 1 +call 1 +alic 1 +sinc 1 +anywai 1 +take 1 +look 1 +pictur 1 +smaller 1 +mean 1 +finger 1 +info 1 +current 1 +schedul 1 +neat 1 +hypertext 1 +link 1 +hunt 1 +destroi 1 +bug 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..2ce1cf1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,48 @@ +soha 3 +washington 3 +current 3 +univers 2 +comput 2 +scienc 2 +engin 2 +dept 2 +hassoun 1 +home 1 +page 1 +hassounit 1 +year 1 +graduat 1 +school 1 +circuit 1 +design 1 +whoturn 1 +develop 1 +work 1 +onarchitectur 1 +retim 1 +professor 1 +carlebel 1 +weekli 1 +schedul 1 +busi 1 +previou 1 +research 1 +educ 1 +experi 1 +public 1 +patent 1 +chao 1 +group 1 +profession 1 +interest 1 +vlsi 1 +site 1 +inform 1 +littl 1 +deede 1 +photo 1 +galleri 1 +address 1 +depart 1 +seattl 1 +phone 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..b592e3dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,115 @@ +favorit 4 +scienc 3 +sujai 2 +parekh 2 +home 2 +page 2 +comput 2 +washington 2 +seattl 2 +seminar 2 +system 2 +french 2 +softbot 2 +project 2 +interest 2 +soccer 2 +danc 2 +food 2 +work 1 +depart 1 +engin 1 +sieg 1 +hall 1 +chateau 1 +guggenheim 1 +annex 1 +univers 1 +quarter 1 +class 1 +research 1 +simultaneousmultithread 1 +particular 1 +issu 1 +relat 1 +tomultithread 1 +architectur 1 +evalu 1 +simon 1 +emploi 1 +procedur 1 +search 1 +controlsystem 1 +control 1 +action 1 +report 1 +construct 1 +design 1 +remov 1 +patio 1 +convent 1 +workspac 1 +fund 1 +pleas 1 +contact 1 +sport 1 +spud 1 +right 1 +sort 1 +bookmark 1 +like 1 +keep 1 +track 1 +academ 1 +cognit 1 +distribut 1 +parallel 1 +psycholog 1 +philosophi 1 +tenni 1 +sail 1 +squash 1 +volleybal 1 +ballroom 1 +cornel 1 +oracl 1 +corpor 1 +stottler 1 +henk 1 +associ 1 +done 1 +resum 1 +random 1 +person 1 +info 1 +oondhiu 1 +mango 1 +phad 1 +thai 1 +kung 1 +chicken 1 +beverag 1 +screwdriv 1 +scotch 1 +long 1 +island 1 +ic 1 +tango 1 +swing 1 +east 1 +west 1 +coast 1 +salsa 1 +rock 1 +music 1 +dire 1 +strait 1 +pink 1 +floyd 1 +phil 1 +collin 1 +genesi 1 +peter 1 +gabriel 1 +petti 1 +sparekh 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..2f79aebe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,64 @@ +erik 3 +speed 3 +selberg 1 +improv 1 +home 1 +page 1 +name 1 +washington 1 +academ 1 +almost 1 +live 1 +fishcam 1 +address 1 +lara 1 +lewi 1 +memorialhappi 1 +link 1 +peopl 1 +sport 1 +food 1 +drink 1 +cultur 1 +mari 1 +kay 1 +offic 1 +friend 1 +famili 1 +fish 1 +disc 1 +drive 1 +ultim 1 +pasti 1 +power 1 +ur 1 +machin 1 +mountain 1 +bike 1 +spud 1 +softbal 1 +ski 1 +utah 1 +raquetbal 1 +pro 1 +colleg 1 +wedgwood 1 +hous 1 +diet 1 +pepper 1 +salt 1 +lake 1 +roast 1 +compani 1 +bean 1 +bagel 1 +racer 1 +star 1 +war 1 +tini 1 +toon 1 +pinki 1 +brain 1 +phantom 1 +babylon 1 +comic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..d3dbe7e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,134 @@ +parallel 3 +washington 3 +simul 3 +home 2 +sung 2 +univers 2 +work 2 +watch 2 +like 2 +good 2 +plai 2 +scrub 2 +divis 2 +take 2 +page 1 +choiwelcom 1 +thehomepag 1 +ofsung 1 +eunchoi 1 +myschoollifemi 1 +primari 1 +research 1 +interest 1 +compil 1 +program 1 +languag 1 +involv 1 +zplcompil 1 +project 1 +late 1 +beenspend 1 +time 1 +think 1 +optim 1 +communicationgener 1 +us 1 +architechtur 1 +independ 1 +communicationlibrari 1 +ironman 1 +addit 1 +experi 1 +data 1 +programson 1 +superscalar 1 +processor 1 +goal 1 +improv 1 +nodeperform 1 +come 1 +gener 1 +machin 1 +alsobeen 1 +seen 1 +hang 1 +chaosrout 1 +group 1 +includ 1 +graphic 1 +front 1 +visual 1 +thatexperi 1 +current 1 +implement 1 +anoth 1 +router 1 +inzpl 1 +final 1 +also 1 +littl 1 +astronomi 1 +quarter 1 +ta 1 +enjoi 1 +movi 1 +mostli 1 +comfort 1 +yeah 1 +vegetarian 1 +sinc 1 +myjunior 1 +year 1 +colleg 1 +drink 1 +dinner 1 +would 1 +samewithout 1 +wine 1 +result 1 +must 1 +exercis 1 +quit 1 +twosoccerteam 1 +cousin 1 +cooper 1 +recdivis 1 +last 1 +season 1 +came 1 +second 1 +place 1 +andcoop 1 +unfortun 1 +recent 1 +sacrifiedmi 1 +left 1 +knee 1 +game 1 +soccer 1 +usualstep 1 +aerobicsclass 1 +instead 1 +find 1 +try 1 +swim 1 +weight 1 +trainingclass 1 +peopl 1 +world 1 +read 1 +book 1 +abit 1 +shakespear 1 +publictelevis 1 +listen 1 +classicalmus 1 +myotherlif 1 +choi 1 +sungeun 1 +depart 1 +comput 1 +scienc 1 +engin 1 +seattl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..8caca960 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,58 @@ +system 5 +nguyen 2 +runtim 2 +multiprocessorsenviron 2 +schedul 2 +depart 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +washington 1 +work 1 +world 1 +research 1 +interest 1 +includ 1 +oper 1 +distribut 1 +andparallel 1 +network 1 +secur 1 +current 1 +help 1 +frommi 1 +advisor 1 +johnzahorjan 1 +build 1 +support 1 +run 1 +soft 1 +real 1 +timeappl 1 +visual 1 +partial 1 +idl 1 +workstat 1 +innow 1 +recent 1 +complet 1 +studi 1 +measur 1 +ofappl 1 +characterist 1 +us 1 +tominim 1 +applic 1 +execut 1 +time 1 +uniprogram 1 +well 1 +make 1 +goodglob 1 +decis 1 +multiprogram 1 +cvpublic 1 +worldvietnameseresourc 1 +netcyclingplayground 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..45af3b29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,13 @@ +frame 2 +tian 1 +homepageyour 1 +browser 1 +rusti 1 +yellow 1 +turkei 1 +even 1 +part 1 +html 1 +standard 1 +click 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..ba9f327e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,103 @@ +system 14 +object 14 +distribut 9 +work 8 +tiwari 7 +workshop 7 +oopsla 6 +opal 5 +larg 5 +address 4 +oper 4 +persist 4 +applic 4 +softwar 4 +us 4 +levi 4 +comput 3 +databas 3 +measur 3 +build 3 +ashutosh 2 +washington 2 +scienc 2 +time 2 +area 2 +singl 2 +space 2 +orient 2 +workload 2 +user 2 +organ 2 +chase 2 +advisor 2 +also 2 +behavior 2 +except 2 +handl 2 +octob 2 +benchmark 2 +depart 1 +engin 1 +univers 1 +seattl 1 +mostli 1 +full 1 +fourth 1 +year 1 +graduat 1 +student 1 +support 1 +past 1 +infrastructur 1 +interfac 1 +spare 1 +group 1 +research 1 +technolog 1 +boe 1 +servic 1 +projectsopali 1 +project 1 +issu 1 +opportun 1 +involv 1 +creat 1 +global 1 +across 1 +multipl 1 +machin 1 +jeff 1 +primari 1 +architect 1 +hank 1 +close 1 +character 1 +gener 1 +techniqu 1 +paper 1 +sever 1 +profession 1 +career 1 +thisexperi 1 +basi 1 +follow 1 +distrbut 1 +public 1 +virtual 1 +refer 1 +proc 1 +intern 1 +septemb 1 +parallel 1 +environ 1 +ecoop 1 +juli 1 +bosch 1 +addendum 1 +proceed 1 +oop 1 +messeng 1 +evalu 1 +narasayya 1 +perform 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..bfa7b6db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,91 @@ +linux 3 +also 3 +tessa 2 +inform 2 +washington 2 +interest 2 +current 2 +work 2 +system 2 +java 2 +anoth 1 +dead 1 +superhighwai 1 +second 1 +yeargradu 1 +student 1 +comput 1 +scienc 1 +univers 1 +research 1 +includ 1 +sort 1 +relatedgoodi 1 +clio 1 +search 1 +andbrows 1 +person 1 +histori 1 +avail 1 +seek 1 +gain 1 +employ 1 +myresum 1 +onlin 1 +curiou 1 +kittyi 1 +honor 1 +share 1 +apart 1 +gambit 1 +siames 1 +cat 1 +great 1 +memor 1 +found 1 +therear 1 +pictur 1 +page 1 +tofind 1 +appar 1 +alsor 1 +scotland 1 +classesi 1 +still 1 +qual 1 +quarter 1 +take 1 +last 1 +ofeight 1 +class 1 +fulfil 1 +breadth 1 +requir 1 +digit 1 +seminarlinux 1 +gameseverybodi 1 +plai 1 +game 1 +maintain 1 +gametom 1 +commit 1 +advanc 1 +pretti 1 +coolgam 1 +platform 1 +first 1 +attempt 1 +program 1 +simpl 1 +maze 1 +applet 1 +sleepingi 1 +known 1 +frequent 1 +seattl 1 +area 1 +bookstor 1 +knit 1 +crochet 1 +copyright 1 +tlau 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..b5c2b154 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,44 @@ +martin 4 +tompa 4 +washington 3 +comput 2 +scienc 2 +univers 2 +depart 1 +engin 1 +seattl 1 +phone 1 +receptionist 1 +lectur 1 +note 1 +articl 1 +trajectori 1 +thelma 1 +louis 1 +recent 1 +holidai 1 +moon 1 +pearl 1 +among 1 +wash 1 +oyster 1 +collabor 1 +surrealist 1 +electron 1 +propheci 1 +build 1 +across 1 +pierc 1 +lane 1 +carol 1 +photograph 1 +photo 1 +courtesi 1 +health 1 +center 1 +educ 1 +resourc 1 +provid 1 +mani 1 +imag 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..edf83347 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,106 @@ +kimbrel 7 +traci 5 +prison 4 +held 3 +univers 3 +washington 3 +comput 3 +parallel 3 +prefetch 3 +anna 3 +karlin 3 +seattl 2 +forc 2 +imprison 2 +captor 2 +scienc 2 +algorithm 2 +cachingtraci 2 +edward 2 +felten 2 +appear 2 +system 2 +washingtonsinc 1 +without 1 +charg 1 +trial 1 +move 1 +year 1 +toanoth 1 +area 1 +inmat 1 +tomanufactur 1 +airplan 1 +escap 1 +institut 1 +wasrecaptur 1 +return 1 +help 1 +hisplight 1 +rescu 1 +list 1 +thing 1 +done 1 +curriculum 1 +vita 1 +detail 1 +statement 1 +ofwhat 1 +promis 1 +histori 1 +goal 1 +free 1 +depart 1 +engin 1 +tracyk 1 +eduher 1 +trace 1 +driven 1 +comparison 1 +andrew 1 +tomkin 1 +hugo 1 +patterson 1 +brian 1 +bershad 1 +garth 1 +gibson 1 +sigop 1 +usenix 1 +associ 1 +symposium 1 +onoper 1 +design 1 +implemen 1 +near 1 +optim 1 +ieeesymposium 1 +foundat 1 +longer 1 +version 1 +integr 1 +cach 1 +page 1 +extend 1 +abstract 1 +proceed 1 +sigmetr 1 +confer 1 +measurementand 1 +model 1 +probabilist 1 +verifi 1 +matrix 1 +product 1 +usingo 1 +squar 1 +time 1 +base 1 +random 1 +bit 1 +rakesh 1 +kumar 1 +sinha 1 +inform 1 +process 1 +letter 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..b0179a2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,49 @@ +travi 4 +time 4 +craig 3 +washington 2 +comput 2 +scienc 2 +real 2 +system 2 +work 2 +home 1 +page 1 +engin 1 +depart 1 +univers 1 +seattl 1 +research 1 +interest 1 +mechan 1 +predict 1 +cach 1 +restor 1 +queu 1 +spin 1 +lock 1 +arctic 1 +submarin 1 +current 1 +cours 1 +take 1 +quarter 1 +dissert 1 +consum 1 +side 1 +project 1 +half 1 +esca 1 +corpor 1 +help 1 +keep 1 +volvo 1 +run 1 +press 1 +latest 1 +motor 1 +pool 1 +statu 1 +understand 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..fcb449c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,12 @@ +dean 2 +tullsen 2 +home 2 +page 2 +biograph 1 +inform 1 +research 1 +interest 1 +bibliographi 1 +download 1 +resumemi 1 +hobbi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..43e56fc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,235 @@ +mike 14 +work 7 +softwar 5 +time 5 +oopsla 5 +washington 4 +year 4 +wrote 4 +univers 3 +start 3 +student 3 +smithsonian 3 +program 3 +thank 3 +pari 3 +uwin 3 +also 3 +present 3 +symposium 3 +michael 2 +harvard 2 +research 2 +comput 2 +engin 2 +seattl 2 +vanhilst 2 +come 2 +programm 2 +hardwar 2 +part 2 +locat 2 +learn 2 +bill 2 +continu 2 +group 2 +data 2 +angela 2 +languag 2 +summer 2 +schedul 2 +commun 2 +recent 2 +object 2 +particip 2 +citi 2 +enjoi 2 +novemb 2 +vanhilstmichael 1 +vanhilstvanhilst 1 +edumvh 1 +eduperson 1 +scienc 1 +depart 1 +usaclick 1 +send 1 +email 1 +messag 1 +personalmik 1 +graduat 1 +hopefulli 1 +littl 1 +luck 1 +finish 1 +around 1 +theend 1 +winter 1 +quarter 1 +immedi 1 +prior 1 +udub 1 +contractor 1 +atibm 1 +motif 1 +widget 1 +user 1 +unterfac 1 +sdata 1 +explor 1 +fix 1 +maintainingcomput 1 +astrophys 1 +observatori 1 +within 1 +debug 1 +could 1 +convinc 1 +realli 1 +correctli 1 +stai 1 +call 1 +saoimagewhich 1 +us 1 +lot 1 +astronom 1 +look 1 +imag 1 +saoimag 1 +gnudistribut 1 +would 1 +like 1 +wyatt 1 +eric 1 +mandel 1 +schwarz 1 +doug 1 +minkfor 1 +guid 1 +project 1 +mention 1 +countless 1 +other 1 +contribut 1 +took 1 +seismologistsin 1 +franc 1 +acquisit 1 +calibr 1 +analysi 1 +truli 1 +wonder 1 +wife 1 +french 1 +class 1 +theallianc 1 +francais 1 +nativ 1 +colombiain 1 +south 1 +america 1 +front 1 +studentsbrows 1 +base 1 +talent 1 +staff 1 +folk 1 +brought 1 +pine 1 +special 1 +shirei 1 +design 1 +traci 1 +stenvik 1 +screen 1 +librari 1 +machin 1 +taught 1 +begin 1 +extens 1 +motiv 1 +frommicrosoft 1 +product 1 +support 1 +sacrif 1 +paper 1 +theintern 1 +technolog 1 +advanc 1 +isota 1 +confer 1 +orient 1 +system 1 +applic 1 +theacm 1 +sigsoft 1 +foundat 1 +poster 1 +made 1 +subject 1 +workshop 1 +doctor 1 +demo 1 +uist 1 +steve 1 +earlier 1 +life 1 +earn 1 +degre 1 +inarchitectur 1 +wooden 1 +kind 1 +planningfrom 1 +mitand 1 +develop 1 +director 1 +forth 1 +grinnel 1 +iowa 1 +thing 1 +differ 1 +skill 1 +visualdesign 1 +problem 1 +solv 1 +valu 1 +still 1 +get 1 +talk 1 +chri 1 +alexand 1 +activ 1 +chapter 1 +associ 1 +improv 1 +ti 1 +larg 1 +small 1 +compani 1 +area 1 +hike 1 +cross 1 +countri 1 +ski 1 +sail 1 +andkayak 1 +swim 1 +lake 1 +bronson 1 +free 1 +taken 1 +marco 1 +harold 1 +sebastien 1 +hilst 1 +born 1 +post 1 +pictur 1 +soon 1 +anoth 1 +photo 1 +scanner 1 +visit 1 +sinc 1 +last 1 +modifi 1 +fridai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..549ba7bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,103 @@ +home 5 +link 4 +collect 3 +hello 2 +washington 2 +vass 2 +also 2 +expand 2 +staff 2 +project 2 +data 2 +graph 2 +page 2 +vassilylong 1 +live 1 +start 1 +realli 1 +person 1 +best 1 +linki 1 +come 1 +thu 1 +http 1 +us 1 +document 1 +shortest 1 +write 1 +young 1 +quit 1 +download 1 +fewfil 1 +sinc 1 +thisstuff 1 +select 1 +quotesrussian 1 +pagesvari 1 +linksguid 1 +html 1 +formsoth 1 +pageuw 1 +pagencsa 1 +mosaic 1 +pagerec 1 +join 1 +cecilproject 1 +cecil 1 +cool 1 +pure 1 +object 1 +orient 1 +languag 1 +andvortex 1 +even 1 +cooler 1 +compil 1 +infrastructur 1 +befast 1 +mayb 1 +paper 1 +written 1 +member 1 +design 1 +implement 1 +themvi 1 +system 1 +assist 1 +access 1 +ourdepartment 1 +room 1 +visitor 1 +databas 1 +recent 1 +beenupgrad 1 +peopl 1 +qual 1 +thezpl 1 +languageto 1 +handl 1 +irregular 1 +structur 1 +repres 1 +anddynam 1 +repartit 1 +arrai 1 +myqual 1 +writeup 1 +short 1 +overview 1 +check 1 +theslidesfrom 1 +present 1 +slide 1 +toresourc 1 +relat 1 +eduobject 1 +mirror 1 +closer 1 +appear 1 +pastor 1 +vybrasyvalsya 1 +okna 1 +pyatyi 1 +deystvov 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..18636a19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,55 @@ +washington 8 +virgil 4 +comput 4 +bourassa 3 +engin 3 +scienc 2 +theunivers 2 +seattl 2 +work 2 +univers 2 +home 1 +page 1 +uwvirgil 1 +evan 1 +bourassavirgil 1 +student 1 +depart 1 +research 1 +interestsinclud 1 +oper 1 +system 1 +architectur 1 +join 1 +boeingin 1 +scientist 1 +scienceorgan 1 +inform 1 +support 1 +servic 1 +divis 1 +bellevu 1 +receiv 1 +electr 1 +arizonast 1 +temp 1 +arizona 1 +electricalengin 1 +accesswhat 1 +interest 1 +expertis 1 +resum 1 +patent 1 +invent 1 +public 1 +present 1 +profession 1 +histori 1 +educ 1 +achiev 1 +recommend 1 +letter 1 +statusoccasion 1 +updat 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..48dd3855 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,201 @@ +avail 7 +univers 6 +work 6 +comput 5 +washington 5 +interest 5 +distribut 5 +system 5 +high 5 +scienc 4 +also 4 +safeti 4 +year 4 +vivek 3 +page 3 +current 3 +bellcor 3 +applic 3 +group 3 +simul 3 +last 3 +seattl 3 +cricket 3 +ratan 2 +home 2 +graduat 2 +depart 2 +research 2 +issu 2 +develop 2 +look 2 +recent 2 +fault 2 +detail 2 +project 2 +softwar 2 +found 2 +involv 2 +rsml 2 +public 2 +angelo 2 +like 2 +plai 2 +game 2 +learn 2 +danc 2 +model 2 +unit 2 +nation 2 +chapter 2 +particular 1 +student 1 +academ 1 +leav 1 +scientistat 1 +morristown 1 +researchwork 1 +distributedsoftwar 1 +simpli 1 +highli 1 +continu 1 +presenc 1 +failur 1 +toolset 1 +anatida 1 +provid 1 +mechan 1 +adher 1 +corba 1 +standard 1 +activ 1 +replic 1 +scheme 1 +indc 1 +effort 1 +focuss 1 +enhanc 1 +toler 1 +cell 1 +directori 1 +servic 1 +foundher 1 +integrationof 1 +technolog 1 +server 1 +murphi 1 +methodolog 1 +head 1 +bydr 1 +nanci 1 +leveson 1 +much 1 +automat 1 +tree 1 +gener 1 +fromrequir 1 +specif 1 +written 1 +well 1 +languag 1 +list 1 +person 1 +inform 1 +born 1 +brought 1 +india 1 +eight 1 +undergradu 1 +attend 1 +state 1 +wesleyan 1 +middletown 1 +receiv 1 +physic 1 +math 1 +right 1 +came 1 +purus 1 +studi 1 +tenni 1 +whenev 1 +squash 1 +lesserext 1 +racquetbal 1 +suffic 1 +ardent 1 +folow 1 +particip 1 +ultra 1 +email 1 +mani 1 +follow 1 +exploit 1 +superson 1 +marin 1 +cowboi 1 +half 1 +taken 1 +keen 1 +ballroom 1 +waltz 1 +foxtrot 1 +chacha 1 +rhumba 1 +tango 1 +swing 1 +west 1 +coast 1 +pleas 1 +occasion 1 +dabbl 1 +mambo 1 +area 1 +best 1 +place 1 +center 1 +us 1 +band 1 +session 1 +everi 1 +saturdai 1 +intern 1 +educ 1 +organ 1 +confer 1 +held 1 +throughout 1 +topic 1 +restructur 1 +reform 1 +part 1 +secur 1 +council 1 +ecosoc 1 +world 1 +bank 1 +rapidpopul 1 +growth 1 +nuclear 1 +prolifer 1 +read 1 +poetri 1 +mirza 1 +ghalib 1 +centuryindian 1 +poet 1 +english 1 +literatur 1 +especi 1 +romant 1 +victorian 1 +period 1 +link 1 +obligatori 1 +collect 1 +sitesthat 1 +tend 1 +visit 1 +often 1 +engin 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..97c7cf4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,30 @@ +washington 3 +geoff 2 +univers 2 +thesi 2 +window 2 +voelker 2 +skywhoi 1 +graduat 1 +student 1 +inseattl 1 +whati 1 +master 1 +wireless 1 +mobil 1 +comput 1 +design 1 +andbuild 1 +system 1 +call 1 +mobisa 1 +current 1 +avoid 1 +settl 1 +topic 1 +wherechateau 1 +guggenheim 1 +annex 1 +washingtonseattl 1 +look 1 +emac 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..18fd108a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,55 @@ +washington 3 +list 3 +home 2 +waynew 2 +look 2 +system 2 +thing 2 +wayn 1 +comput 1 +scienc 1 +engin 1 +depart 1 +univers 1 +seattl 1 +stuff 1 +current 1 +memori 1 +perform 1 +particular 1 +differentmemori 1 +organ 1 +investig 1 +work 1 +beingdon 1 +jean 1 +loup 1 +baer 1 +also 1 +interpret 1 +other 1 +denni 1 +geoff 1 +alec 1 +rightnow 1 +littl 1 +rocki 1 +actual 1 +earli 1 +version 1 +paper 1 +keep 1 +jump 1 +alpha 1 +interest 1 +place 1 +fish 1 +cool 1 +site 1 +howev 1 +peoplewho 1 +peopl 1 +well 1 +test 1 +testwayn 1 +wong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..4a20a92b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,13 @@ +chan 2 +home 2 +time 2 +william 1 +pagewilliam 1 +pagei 1 +spend 1 +hell 1 +spare 1 +hang 1 +heaven 1 +wchan 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..88aaf076 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,140 @@ +weld 9 +comput 4 +plan 4 +scienc 3 +univers 3 +intellig 3 +offic 3 +research 3 +aaai 3 +agent 3 +daniel 2 +receiv 2 +artifici 2 +award 2 +inform 2 +paper 2 +washington 2 +interest 2 +current 2 +softwar 2 +base 2 +associ 1 +professor 1 +engineeringat 1 +ofwashington 1 +bachelor 1 +degre 1 +biochemistri 1 +yale 1 +land 1 +presidenti 1 +young 1 +investig 1 +naval 1 +younginvestig 1 +theadvisori 1 +board 1 +journal 1 +airesearch 1 +guest 1 +editor 1 +edit 1 +report 1 +role 1 +ofintellig 1 +system 1 +nation 1 +infrastructur 1 +isco 1 +program 1 +chair 1 +publish 1 +book 1 +scad 1 +technic 1 +person 1 +data 1 +reach 1 +sieg 1 +hall 1 +phone 1 +work 1 +home 1 +mail 1 +dept 1 +engin 1 +seattlewa 1 +specif 1 +exampl 1 +group 1 +support 1 +ucpop 1 +planner 1 +us 1 +almost 1 +hundr 1 +sitesworldwid 1 +mani 1 +avail 1 +electron 1 +arehi 1 +favorit 1 +repres 1 +sens 1 +action 1 +middl 1 +ground 1 +revisit 1 +gather 1 +august 1 +control 1 +aip 1 +ascal 1 +comparison 1 +shop 1 +world 1 +wide 1 +januari 1 +softbot 1 +interfac 1 +internet 1 +cacm 1 +juli 1 +anintroduct 1 +least 1 +commit 1 +magazin 1 +winter 1 +select 1 +exhaustivelist 1 +recreat 1 +absent 1 +foundat 1 +cafe 1 +allegro 1 +stormymountain 1 +climb 1 +past 1 +enjoi 1 +travel 1 +theworld 1 +like 1 +found 1 +plai 1 +twin 1 +boi 1 +adam 1 +galen 1 +invit 1 +visit 1 +galleri 1 +pacif 1 +northwest 1 +desert 1 +wilder 1 +photograph 1 +also 1 +illustr 1 +stori 1 +morocco 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..c6243c61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,10 @@ +wendi 3 +belluomini 2 +washington 1 +graduat 1 +master 1 +current 1 +work 1 +univ 1 +utah 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..78b2d784 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,55 @@ +comput 4 +scienc 3 +engin 3 +washington 3 +thedepart 2 +compil 2 +system 2 +wilson 1 +hsiehwilson 1 +hsiehi 1 +postdoc 1 +theunivers 1 +inseattl 1 +member 1 +thespin 1 +project 1 +receiv 1 +electr 1 +sciencein 1 +theschool 1 +engineeringatmit 1 +work 1 +thelaboratori 1 +advisor 1 +werefran 1 +kaashoekandbil 1 +weihl 1 +research 1 +parallel 1 +myresearch 1 +interest 1 +interact 1 +among 1 +programminglanguag 1 +runtim 1 +oper 1 +architectur 1 +select 1 +publicationsselect 1 +linksperson 1 +interestswilson 1 +hsieh 1 +depart 1 +univers 1 +seattl 1 +offic 1 +sieg 1 +move 1 +phone 1 +numberha 1 +chang 1 +voic 1 +whsieh 1 +public 1 +keyoctob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..47fc159b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,83 @@ +wolman 7 +washington 3 +network 3 +alec 2 +home 2 +scienc 2 +seattl 2 +current 2 +research 2 +perform 2 +interpret 2 +usenix 2 +voelker 2 +page 1 +wolmanwolman 1 +eduworkcomput 1 +engin 1 +departmentunivers 1 +washingtonbox 1 +graduat 1 +student 1 +thecomput 1 +departmentat 1 +univers 1 +offic 1 +isroom 1 +thechateau 1 +gradual 1 +school 1 +work 1 +fordigit 1 +equip 1 +corp 1 +cambridg 1 +interest 1 +includ 1 +oper 1 +system 1 +architectur 1 +recent 1 +project 1 +scalabl 1 +etch 1 +binari 1 +instrument 1 +optim 1 +executablesrocki 1 +performanceon 1 +line 1 +paper 1 +firewal 1 +applic 1 +relai 1 +trees 1 +summer 1 +latenc 1 +analysi 1 +thekkath 1 +winter 1 +structur 1 +romer 1 +wong 1 +baer 1 +bershad 1 +levi 1 +appear 1 +asplo 1 +hungri 1 +otter 1 +fixha 1 +strang 1 +idea 1 +nervou 1 +habit 1 +realli 1 +plai 1 +guitar 1 +link 1 +hallwolman 1 +diseasewolman 1 +pressur 1 +treat 1 +lumber 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..d628015a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,85 @@ +baer 6 +perform 6 +parallel 5 +simul 5 +comput 4 +engin 4 +page 4 +washington 3 +evalu 3 +trace 3 +driven 3 +xiaohan 2 +scienc 2 +interest 2 +architectur 2 +distribut 2 +system 2 +paper 2 +confer 2 +multiprocessor 2 +proceed 2 +optimist 2 +zhang 2 +xqin 1 +depart 1 +univers 1 +seattl 1 +offic 1 +sieg 1 +phone 1 +year 1 +graduat 1 +student 1 +work 1 +jean 1 +loup 1 +research 1 +includ 1 +method 1 +model 1 +short 1 +term 1 +goal 1 +school 1 +soon 1 +possibl 1 +cluster 1 +base 1 +submit 1 +explicit 1 +communicationprimit 1 +cach 1 +coher 1 +appear 1 +hpca 1 +compar 1 +studi 1 +conserv 1 +award 1 +symposium 1 +tech 1 +report 1 +dept 1 +univ 1 +implement 1 +intern 1 +process 1 +graph 1 +toolfor 1 +monitor 1 +visual 1 +basedmultiprocessor 1 +nalluri 1 +journal 1 +june 1 +predict 1 +processingon 1 +numa 1 +ieee 1 +tran 1 +softwar 1 +stuff 1 +photo 1 +chinaread 1 +chinesesearch 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..8ea72a3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,71 @@ +washington 3 +info 3 +link 3 +yasushi 2 +project 2 +desktop 2 +index 2 +spin 2 +saitoyasushi 1 +saito 1 +second 1 +year 1 +graduat 1 +student 1 +atdepart 1 +comput 1 +scienc 1 +engin 1 +univers 1 +seattl 1 +current 1 +workingwith 1 +brian 1 +bershad 1 +thespin 1 +address 1 +andperson 1 +meta 1 +metacrawl 1 +yahoo 1 +refer 1 +alta 1 +vista 1 +lyco 1 +archi 1 +tech 1 +research 1 +intern 1 +document 1 +modula 1 +time 1 +schedul 1 +survei 1 +oper 1 +system 1 +transact 1 +servic 1 +qual 1 +sightse 1 +japanes 1 +random 1 +javascript 1 +apprentic 1 +page 1 +us 1 +linux 1 +connect 1 +gatewai 1 +japan 1 +perl 1 +patch 1 +touch 1 +type 1 +trainer 1 +dvorak 1 +lesson 1 +text 1 +want 1 +finger 1 +talk 1 +trycanva 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..d2ca94f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,71 @@ +zamir 4 +home 3 +washington 3 +depart 2 +seattl 2 +interest 2 +internet 2 +document 2 +search 2 +pictur 2 +oren 1 +pageoren 1 +page 1 +comput 1 +scienc 1 +engineeringunivers 1 +washingtonbox 1 +offic 1 +chateau 1 +edui 1 +isra 1 +graduat 1 +student 1 +computersci 1 +engin 1 +univers 1 +myundergradu 1 +degre 1 +physic 1 +mathemat 1 +hebrewunivers 1 +jerusalem 1 +israel 1 +field 1 +artifici 1 +intellig 1 +softwareengin 1 +current 1 +work 1 +line 1 +cluster 1 +algorithmsfor 1 +retriev 1 +basic 1 +idea 1 +help 1 +userwith 1 +result 1 +hundr 1 +worki 1 +done 1 +part 1 +metacrawl 1 +parallel 1 +servic 1 +along 1 +orenetzioni 1 +erik 1 +selberg 1 +resum 1 +avail 1 +thing 1 +like 1 +dive 1 +sinai 1 +jeeptour 1 +ski 1 +last 1 +raft 1 +trip 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..0e7c04f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,126 @@ +comput 10 +architectur 6 +also 4 +baer 3 +scienc 3 +research 3 +parallel 3 +system 3 +ieee 3 +chairman 3 +cach 3 +professor 2 +engin 2 +grenobl 2 +ucla 2 +author 2 +serv 2 +fellow 2 +journal 2 +distribut 2 +internationalsymposium 2 +project 2 +page 2 +protocol 2 +cluster 2 +prefetch 2 +simul 2 +titl 1 +jean 1 +loup 1 +professorand 1 +adjunct 1 +electr 1 +receiv 1 +diplom 1 +ingnieur 1 +electricalengin 1 +doctorat 1 +cycl 1 +theuniversit 1 +franc 1 +prior 1 +join 1 +univers 1 +washington 1 +laboratoir 1 +decalcul 1 +universit 1 +member 1 +digit 1 +technologygroup 1 +present 1 +interest 1 +anddistribut 1 +process 1 +coauthor 1 +paper 1 +thesearea 1 +textbook 1 +press 1 +distinguishedvisitor 1 +nation 1 +lectur 1 +guggenheim 1 +editor 1 +languag 1 +asprogram 1 +intern 1 +confer 1 +parallelprocess 1 +program 1 +gener 1 +ofth 1 +current 1 +chair 1 +sigarch 1 +eighteen 1 +student 1 +complet 1 +dissert 1 +professorba 1 +direct 1 +twelv 1 +work 1 +industri 1 +laboratoriesand 1 +inacademia 1 +although 1 +year 1 +hashad 1 +difficulti 1 +retain 1 +french 1 +accent 1 +cours 1 +recent 1 +look 1 +involv 1 +coher 1 +improv 1 +singl 1 +perform 1 +softwar 1 +primit 1 +appear 1 +hpca 1 +uniprocessor 1 +hardwar 1 +comparisonwith 1 +block 1 +asplo 1 +multiprocessor 1 +isca 1 +impact 1 +specul 1 +execut 1 +denni 1 +home 1 +andisca 1 +trace 1 +driven 1 +conserv 1 +approach 1 +icpp 1 +optimisticapproach 1 +comparison 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..4446aaad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,118 @@ +chamber 6 +craig 3 +comput 3 +scienc 3 +system 3 +object 3 +orient 3 +languag 3 +incorpor 2 +investig 2 +vortex 2 +project 2 +cecil 2 +static 2 +optim 2 +member 2 +spin 2 +extens 2 +applic 2 +washington 2 +assist 1 +professor 1 +join 1 +faculti 1 +receiv 1 +degre 1 +stanford 1 +research 1 +interest 1 +design 1 +implementationof 1 +advanc 1 +program 1 +express 1 +programminglanguag 1 +effici 1 +implement 1 +support 1 +programmingenviron 1 +current 1 +languagesand 1 +lead 1 +ceciland 1 +pure 1 +languageserv 1 +vehicl 1 +multi 1 +method 1 +type 1 +modul 1 +featur 1 +compilersystem 1 +intra 1 +andinterprocedur 1 +analys 1 +profil 1 +guid 1 +withfront 1 +end 1 +modula 1 +java 1 +previous 1 +chamberswa 1 +self 1 +also 1 +spinproject 1 +oper 1 +microkernel 1 +whichsupport 1 +dynam 1 +adapt 1 +interfac 1 +implementationsund 1 +direct 1 +control 1 +still 1 +maintain 1 +systemintegr 1 +isol 1 +util 1 +dialect 1 +themodula 1 +pointer 1 +safe 1 +kernel 1 +spinalso 1 +reli 1 +dynamiccompil 1 +achiev 1 +high 1 +perform 1 +despit 1 +fine 1 +grainedextens 1 +click 1 +herefor 1 +inform 1 +undergradu 1 +graduat 1 +level 1 +researchproject 1 +area 1 +contact 1 +informationprof 1 +chambersdepart 1 +engineeringunivers 1 +washingtonbox 1 +seattl 1 +mail 1 +requir 1 +street 1 +address 1 +sieg 1 +hall 1 +room 1 +last 1 +updat 1 +april 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..c691d861 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,116 @@ +carl 4 +ebel 4 +comput 4 +project 4 +scienc 3 +washington 3 +triptych 3 +interest 2 +system 2 +chao 2 +fpga 2 +home 1 +page 1 +ebelingdepart 1 +engin 1 +univers 1 +ofwashington 1 +seattl 1 +voic 1 +mail 1 +offic 1 +sieg 1 +hall 1 +room 1 +associ 1 +professor 1 +physic 1 +wheatoncolleg 1 +southern 1 +illinoisunivers 1 +carnegi 1 +mellonunivers 1 +join 1 +research 1 +fall 1 +categori 1 +vlsiarchitectur 1 +aid 1 +design 1 +digit 1 +haswork 1 +number 1 +vlsi 1 +includ 1 +hitech 1 +chessmachin 1 +apex 1 +graphic 1 +chip 1 +draw 1 +spline 1 +curv 1 +andsurfac 1 +field 1 +programm 1 +gate 1 +arrai 1 +current 1 +hei 1 +involv 1 +build 1 +multicomput 1 +routingnetwork 1 +focu 1 +method 1 +optim 1 +theperform 1 +circuit 1 +us 1 +level 1 +sensit 1 +latch 1 +placementand 1 +rout 1 +algorithm 1 +particularli 1 +teachingspr 1 +advanc 1 +logic 1 +designoffic 1 +hour 1 +mondai 1 +thursdai 1 +travel 1 +april 1 +fccm 1 +napamai 1 +burlington 1 +chicagojun 1 +vegasresearch 1 +northwest 1 +laboratori 1 +integr 1 +router 1 +high 1 +densiti 1 +architectur 1 +public 1 +journal 1 +articl 1 +confer 1 +workshop 1 +paper 1 +graduat 1 +student 1 +soha 1 +hassoun 1 +neil 1 +mckenzi 1 +darren 1 +cronquist 1 +paul 1 +franklin 1 +amara 1 +galleryelan 1 +galleryebel 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..f2cbb1f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,39 @@ +page 4 +seattl 3 +link 2 +uncertainti 2 +inform 2 +schedul 2 +steve 1 +hanksunivers 1 +washingtondepart 1 +comput 1 +scienc 1 +design 1 +agent 1 +architecturesai 1 +magazin 1 +seriou 1 +home 1 +spring 1 +confer 1 +group 1 +restaur 1 +symphoni 1 +wine 1 +opera 1 +server 1 +edita 1 +gruberova 1 +photo 1 +carlo 1 +maria 1 +giulini 1 +discographi 1 +sumac 1 +tenni 1 +new 1 +hank 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..29ec35d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,71 @@ +spent 3 +professor 2 +engin 2 +degre 2 +univers 2 +year 2 +divis 2 +comput 2 +group 2 +system 2 +titl 1 +alistair 1 +holden 1 +electr 1 +origin 1 +highland 1 +scotland 1 +receivedhi 1 +glasgow 1 +withth 1 +british 1 +broadcast 1 +corpor 1 +graduateapprentic 1 +research 1 +yale 1 +edison 1 +fellowship 1 +phddegre 1 +washington 1 +dissert 1 +learningin 1 +artifici 1 +intellig 1 +interest 1 +began 1 +take 1 +coursefrom 1 +colin 1 +cherri 1 +imperi 1 +colleg 1 +london 1 +thebbc 1 +initi 1 +scienc 1 +program 1 +theuw 1 +time 1 +faculti 1 +mostli 1 +math 1 +departmentsform 1 +within 1 +graduat 1 +school 1 +current 1 +work 1 +applic 1 +knowledg 1 +base 1 +verif 1 +expert 1 +integr 1 +symbol 1 +neural 1 +netmethodolog 1 +speech 1 +understand 1 +aid 1 +design 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..3cd49829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,116 @@ +research 5 +societi 5 +scienc 4 +prize 4 +berkelei 4 +univers 4 +institut 4 +mathemat 4 +lectur 4 +comput 4 +problem 4 +professor 3 +nation 3 +academi 3 +american 3 +board 3 +karp 2 +washington 2 +award 2 +member 2 +advanc 2 +oper 2 +america 2 +manag 2 +program 2 +john 2 +neumann 2 +presentmemb 2 +complex 2 +random 2 +algorithm 2 +travel 2 +salesman 2 +dick 1 +karprichard 1 +karpprofessor 1 +ofcomputersci 1 +engin 1 +andadjunct 1 +ofmolecularbiotechnologyunivers 1 +ofwashington 1 +eduaward 1 +membershipsn 1 +medal 1 +babbag 1 +fellow 1 +ture 1 +sciencesmemb 1 +engineeringfellow 1 +art 1 +sciencesfellow 1 +associ 1 +sciencedistinguish 1 +teach 1 +academ 1 +senat 1 +class 1 +chair 1 +berkeleylanchest 1 +fulkerson 1 +theori 1 +faculti 1 +hermann 1 +weyl 1 +studi 1 +industri 1 +appliedmathemat 1 +miller 1 +honorari 1 +doctor 1 +georgetown 1 +massachusett 1 +technion 1 +pennsylvania 1 +advisori 1 +profession 1 +forsoci 1 +respons 1 +governor 1 +weizmann 1 +truste 1 +intern 1 +scienceinstitut 1 +presentselect 1 +public 1 +combinator 1 +turingaward 1 +commun 1 +construct 1 +perfect 1 +match 1 +upfal 1 +wigderson 1 +combinatorica 1 +probabilist 1 +analysi 1 +partit 1 +plane 1 +ofoper 1 +theoret 1 +improv 1 +effici 1 +fornetwork 1 +flow 1 +edmond 1 +journal 1 +theacm 1 +reduc 1 +among 1 +combinatori 1 +plenum 1 +press 1 +minimum 1 +spanningtre 1 +part 1 +held 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..169e17c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,250 @@ +comput 11 +lazowska 10 +univers 10 +scienc 10 +committe 10 +washington 9 +chair 8 +inform 7 +board 6 +technolog 5 +member 5 +research 5 +program 5 +thedepart 4 +advisori 4 +system 4 +home 4 +lectur 3 +universityof 3 +berkelei 3 +faculti 3 +serv 3 +nation 3 +review 3 +perform 3 +concern 3 +engin 3 +ahalf 2 +centuri 2 +exponentialprogress 2 +annualfaculti 2 +vicepresid 2 +gore 2 +speech 2 +eniac 2 +thanniversari 2 +celebr 2 +california 2 +invent 2 +chines 2 +cook 2 +nathanmyhrvold 2 +joinsedlazowska 2 +theuwcs 2 +trip 2 +memori 2 +graduat 2 +student 2 +seem 2 +director 2 +depart 2 +industri 2 +council 2 +person 2 +award 2 +select 2 +complet 2 +recent 2 +machineri 2 +testimonyto 2 +page 2 +grade 2 +congress 1 +talk 1 +like 1 +georgejetson 1 +support 1 +polici 1 +appropri 1 +forfr 1 +flintston 1 +lane 1 +professor 1 +mostlywearsti 1 +push 1 +paper 1 +rack 1 +frequent 1 +flier 1 +mile 1 +pick 1 +onthi 1 +mbquicktim 1 +healso 1 +host 1 +lot 1 +visitor 1 +surpris 1 +number 1 +havefunnynos 1 +researchassoci 1 +includ 1 +essenti 1 +allgradu 1 +laboratoriesin 1 +field 1 +ofcra 1 +govern 1 +affair 1 +foundat 1 +advisorycommitte 1 +andengin 1 +scomputersci 1 +telecommun 1 +technic 1 +formicrosoft 1 +personnationalsemiconductor 1 +academicadvisori 1 +ofdata 1 +corpor 1 +scientif 1 +forcabl 1 +hows 1 +ventur 1 +cascadia 1 +fund 1 +belong 1 +stand 1 +committeesfor 1 +eecsat 1 +andth 1 +atstanford 1 +virginia 1 +hongkong 1 +ture 1 +servic 1 +councilpanel 1 +multi 1 +agencyhigh 1 +computingand 1 +commun 1 +brook 1 +sutherland 1 +andha 1 +examinersfor 1 +record 1 +examin 1 +test 1 +sigmetr 1 +associ 1 +sspecial 1 +interest 1 +group 1 +softwar 1 +chairof 1 +symposium 1 +oper 1 +principl 1 +andeditor 1 +ieee 1 +transact 1 +addit 1 +servinga 1 +onacadem 1 +thecommitte 1 +deanship 1 +colleg 1 +artsand 1 +forth 1 +molecular 1 +biotechnolog 1 +amemb 1 +deanof 1 +deliv 1 +theunivers 1 +annual 1 +fellowof 1 +associationfor 1 +theinstitut 1 +electr 1 +andelectron 1 +seventeenph 1 +studentshav 1 +degre 1 +work 1 +miscellan 1 +link 1 +integratedoverview 1 +region 1 +also 1 +apersuas 1 +player 1 +version 1 +intend 1 +forloc 1 +consumpt 1 +theimpact 1 +perspect 1 +uwcs 1 +profession 1 +master 1 +persuas 1 +playertopten 1 +reason 1 +major 1 +csebuild 1 +project 1 +abbrevi 1 +cvcomputingresearch 1 +drive 1 +informationtechnolog 1 +forwardmassi 1 +goldmanreport 1 +alleg 1 +cseph 1 +product 1 +issu 1 +flaw 1 +data 1 +medianyear 1 +confer 1 +boardstudi 1 +doctor 1 +think 1 +driver 1 +highwai 1 +saturdayseminar 1 +novemb 1 +houseappropri 1 +april 1 +hous 1 +hpcc 1 +octob 1 +februari 1 +interestinghom 1 +sometim 1 +demo 1 +purpos 1 +odeto 1 +execut 1 +vice 1 +presid 1 +tallman 1 +trask 1 +departsfor 1 +duke 1 +lanelazowska 1 +down 1 +famili 1 +pagedirect 1 +houseshilshol 1 +aquat 1 +club 1 +pagerec 1 +discoveredreview 1 +poetryfing 1 +scheduleinform 1 +offic 1 +reflector 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..b183fb23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,94 @@ +professor 6 +univers 5 +system 5 +shaw 3 +engin 3 +stanford 3 +scienc 3 +comput 3 +research 3 +fulbright 2 +associ 2 +real 2 +time 2 +softwar 2 +book 2 +committe 2 +dissert 2 +includ 2 +half 2 +hobbi 2 +titl 1 +alan 1 +graduat 1 +bachelor 1 +sdegre 1 +physic 1 +toronto 1 +amast 1 +mathemat 1 +incomput 1 +addit 1 +facultyappoint 1 +washington 1 +start 1 +hasbeen 1 +assist 1 +cornel 1 +visit 1 +scholar 1 +pari 1 +guest 1 +informat 1 +zurich 1 +atth 1 +linear 1 +acceler 1 +center 1 +theibm 1 +corpor 1 +current 1 +interest 1 +specif 1 +method 1 +publicationsinclud 1 +textbook 1 +oper 1 +softwareengin 1 +introductori 1 +text 1 +andan 1 +edit 1 +document 1 +prepar 1 +serv 1 +memberof 1 +editori 1 +member 1 +sciencescreen 1 +award 1 +associateeditor 1 +journal 1 +editor 1 +ieee 1 +transact 1 +among 1 +thing 1 +supervis 1 +mani 1 +these 1 +project 1 +fifteen 1 +distinguish 1 +former 1 +student 1 +academ 1 +posit 1 +work 1 +live 1 +good 1 +food 1 +trumpet 1 +hike 1 +bike 1 +tenni 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..79db93f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,97 @@ +parallel 6 +snyder 4 +professor 4 +univers 3 +serv 3 +research 3 +program 3 +chip 3 +editor 3 +system 3 +scienc 3 +receiv 2 +visit 2 +faculti 2 +architectur 2 +complet 2 +project 2 +journal 2 +nation 2 +committe 2 +comput 2 +numer 2 +direct 2 +chair 2 +titl 1 +lawrenc 1 +bachelor 1 +sdegre 1 +iowa 1 +mathemat 1 +econom 1 +andin 1 +carnegi 1 +mellon 1 +computersci 1 +scholar 1 +washington 1 +join 1 +perman 1 +onth 1 +yale 1 +purdu 1 +scholarat 1 +harvard 1 +rang 1 +proof 1 +theundecid 1 +properti 1 +design 1 +developmentof 1 +singl 1 +cmo 1 +microprocessor 1 +quarter 1 +hors 1 +hecreat 1 +configur 1 +highli 1 +thepok 1 +environ 1 +inventor 1 +chaoticrout 1 +follow 1 +blue 1 +nowprincip 1 +investig 1 +orca 1 +nwli 1 +associ 1 +computerand 1 +ofth 1 +area 1 +ieee 1 +transact 1 +anddistribut 1 +foundationadvisori 1 +divis 1 +particip 1 +advisorycommitte 1 +futur 1 +andcomput 1 +polici 1 +distinguish 1 +doctoraldissert 1 +award 1 +select 1 +first 1 +symposium 1 +algorithmsand 1 +addit 1 +dozen 1 +student 1 +doctor 1 +degreesund 1 +guid 1 +master 1 +seniorproject 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..e7697d8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,81 @@ +system 6 +comput 4 +somani 3 +professor 3 +design 3 +network 3 +parallel 3 +arun 2 +electr 2 +engin 2 +develop 2 +indian 2 +toler 2 +interconnect 2 +architectur 2 +proteu 2 +tenni 2 +earn 1 +msee 1 +degre 1 +mcgill 1 +univers 1 +montreal 1 +canada 1 +respect 1 +prior 1 +work 1 +scientif 1 +offic 1 +govt 1 +india 1 +delhi 1 +period 1 +anti 1 +submarin 1 +warfar 1 +navi 1 +research 1 +interest 1 +area 1 +offault 1 +algorithm 1 +current 1 +involv 1 +three 1 +major 1 +project 1 +high 1 +integr 1 +address 1 +issu 1 +relat 1 +tocach 1 +memori 1 +redund 1 +evalu 1 +tool 1 +congest 1 +control 1 +fault 1 +broadband 1 +multiprocessor 1 +autom 1 +classif 1 +object 1 +base 1 +generalizedenhanc 1 +hypercub 1 +reconfigur 1 +explor 1 +coars 1 +grain 1 +like 1 +cook 1 +food 1 +hike 1 +plai 1 +bridg 1 +tabl 1 +inform 1 +dpcnl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..6157239c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,173 @@ +comput 13 +serv 7 +tanimoto 6 +univers 6 +chair 6 +pattern 5 +visit 4 +confer 4 +imag 4 +visual 4 +ieee 4 +professor 3 +franc 3 +research 3 +analysi 3 +program 3 +intellig 3 +process 3 +languag 3 +recognit 3 +steven 2 +home 2 +page 2 +scienc 2 +adjunct 2 +electricalengin 2 +anda 2 +linkp 2 +sweden 2 +scientist 2 +machin 2 +june 2 +addit 2 +parallel 2 +artifici 2 +mathemat 2 +edit 2 +vision 2 +lisp 2 +current 2 +societi 2 +gener 2 +conferenceon 2 +committe 2 +journal 2 +colleg 2 +engin 1 +receiv 1 +degre 1 +fromharvard 1 +princeton 1 +join 1 +theunivers 1 +washington 1 +faculti 1 +year 1 +teach 1 +atth 1 +connecticut 1 +professorat 1 +institut 1 +programm 1 +pari 1 +scholar 1 +sinc 1 +hasalso 1 +member 1 +depart 1 +atkob 1 +japan 1 +think 1 +corpor 1 +cambridg 1 +massachusett 1 +theinstitut 1 +enseign 1 +superieur 1 +techniquesd 1 +electroniqu 1 +irest 1 +nant 1 +assist 1 +variou 1 +hasrec 1 +forimag 1 +devot 1 +processingand 1 +commun 1 +take 1 +place 1 +bordeaux 1 +interest 1 +includ 1 +ofimag 1 +particularli 1 +us 1 +processor 1 +educ 1 +technolog 1 +currentlydirect 1 +sponsor 1 +project 1 +experi 1 +throughimag 1 +whose 1 +object 1 +develop 1 +person 1 +softwarethat 1 +motiv 1 +grade 1 +student 1 +studi 1 +written 1 +coauthor 1 +paper 1 +thebook 1 +structur 1 +author 1 +textbook 1 +entitl 1 +element 1 +introductionus 1 +publish 1 +common 1 +accompanyingsoftwar 1 +work 1 +book 1 +subject 1 +ofparallel 1 +organ 1 +internationalworkshop 1 +held 1 +seattl 1 +serveda 1 +meet 1 +bergen 1 +norwai 1 +also 1 +intern 1 +subconfer 1 +programcommitte 1 +numer 1 +patternrecognit 1 +chairman 1 +societyworkshop 1 +architectur 1 +machineintellig 1 +steer 1 +theieee 1 +symposiaon 1 +editorialboard 1 +cvgip 1 +understand 1 +editor 1 +chief 1 +ieeetransact 1 +relat 1 +activ 1 +engineeringeduc 1 +polici 1 +vice 1 +council 1 +elect 1 +fellow 1 +outsid 1 +steve 1 +enjoi 1 +plai 1 +jazz 1 +andclass 1 +piano 1 +music 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..56ada86a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,133 @@ +comput 15 +scienc 10 +serv 10 +univers 5 +theori 5 +committe 5 +young 4 +foundat 4 +chairman 4 +professor 3 +colleg 3 +faculti 3 +also 3 +research 3 +program 3 +depart 2 +nation 2 +inform 2 +berkelei 2 +associ 2 +theoret 2 +complex 2 +algorithm 2 +special 2 +societi 2 +editori 2 +board 2 +current 2 +journal 2 +titl 1 +paul 1 +graduateof 1 +antioch 1 +receiv 1 +hejoin 1 +washington 1 +seventeen 1 +year 1 +atpurdu 1 +earli 1 +member 1 +inperhap 1 +first 1 +unit 1 +state 1 +postdoctor 1 +fellow 1 +atstanford 1 +reed 1 +briefli 1 +aschairman 1 +theunivers 1 +mexico 1 +twice 1 +taught 1 +visit 1 +professorin 1 +divis 1 +california 1 +becam 1 +dean 1 +facil 1 +engin 1 +interest 1 +emphasi 1 +question 1 +thegener 1 +connect 1 +mathematicallog 1 +author 1 +coauthor 1 +paper 1 +area 1 +iscoauthor 1 +graduat 1 +textbook 1 +gener 1 +three 1 +time 1 +symposiumon 1 +executivecommitte 1 +nomin 1 +interestgroup 1 +sigact 1 +chairmanof 1 +ieee 1 +annualsymposium 1 +foc 1 +hasserv 1 +vice 1 +stechnic 1 +mathemat 1 +chair 1 +programcommitte 1 +structur 1 +confer 1 +advisorysubcommitte 1 +thiscommitte 1 +ofth 1 +issu 1 +control 1 +annal 1 +histori 1 +ofcomput 1 +notr 1 +dame 1 +formallog 1 +system 1 +eleven 1 +student 1 +complet 1 +doctor 1 +dissert 1 +underprofessor 1 +direct 1 +sever 1 +gone 1 +dopostdoctor 1 +work 1 +cornel 1 +ofcalifornia 1 +eight 1 +hold 1 +posit 1 +avarieti 1 +chosen 1 +industri 1 +employ 1 +leather 1 +motorcycl 1 +jacket 1 +read 1 +ratherthan 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..a79df3af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,69 @@ +comput 4 +support 3 +applic 3 +zahorjan 2 +univers 2 +receiv 2 +research 2 +schedul 2 +runtim 2 +parallel 2 +polici 2 +system 2 +program 2 +titl 1 +john 1 +professor 1 +graduat 1 +frombrown 1 +oftoronto 1 +presidenti 1 +young 1 +investigatoraward 1 +primari 1 +interest 1 +area 1 +parallelsystem 1 +supportfor 1 +mobil 1 +current 1 +focu 1 +continu 1 +mediaappl 1 +involv 1 +real 1 +time 1 +audio 1 +video 1 +thegoal 1 +provid 1 +interfac 1 +allow 1 +torespond 1 +easili 1 +chang 1 +load 1 +activ 1 +topic 1 +includ 1 +techniqu 1 +parallelizationof 1 +code 1 +written 1 +sequenti 1 +languag 1 +exhibit 1 +bothcontrol 1 +data 1 +construct 1 +develop 1 +intend 1 +formobil 1 +platform 1 +editori 1 +board 1 +ieee 1 +transactionson 1 +softwar 1 +engin 1 +survei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..af7bad98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,175 @@ +imag 9 +project 8 +student 7 +metip 6 +learn 6 +process 5 +program 5 +mathemat 4 +us 4 +activ 4 +current 4 +experi 3 +comput 3 +click 3 +order 3 +free 3 +copi 3 +todai 3 +softwar 3 +tanimoto 2 +scienc 2 +encourag 2 +discuss 2 +digit 2 +develop 2 +allow 2 +materi 2 +part 2 +number 2 +pixel 2 +calcul 2 +transform 2 +applic 2 +list 2 +peopl 2 +work 2 +relat 2 +educ 2 +pleas 2 +link 2 +xform 2 +washington 2 +director 1 +steven 1 +depart 1 +emphas 1 +practic 1 +group 1 +exploratori 1 +open 1 +end 1 +goal 1 +help 1 +meet 1 +theseobject 1 +particular 1 +seri 1 +applicationsdesign 1 +manipul 1 +choic 1 +intend 1 +enrich 1 +rather 1 +astandard 1 +classroom 1 +curriculum 1 +teacher 1 +plai 1 +variou 1 +role 1 +withthes 1 +exampl 1 +catalyz 1 +bylead 1 +theconcept 1 +explor 1 +toexplor 1 +warper 1 +programm 1 +primarili 1 +pentium 1 +base 1 +srun 1 +microsoft 1 +window 1 +alsoavail 1 +appl 1 +macintosh 1 +volunt 1 +mathematicsteach 1 +particip 1 +test 1 +experiment 1 +transcriptproject 1 +design 1 +record 1 +keep 1 +framework 1 +willfacilit 1 +storag 1 +person 1 +academicinform 1 +hard 1 +disk 1 +floppi 1 +own 1 +themetip 1 +close 1 +involv 1 +studi 1 +ofmultiplay 1 +tointegr 1 +idea 1 +describ 1 +prospect 1 +forth 1 +direct 1 +distribut 1 +databas 1 +collect 1 +user 1 +itsxform 1 +done 1 +somethingfun 1 +know 1 +put 1 +version 1 +document 1 +onlin 1 +littl 1 +demonstr 1 +beenput 1 +togeth 1 +graduat 1 +took 1 +seminar 1 +winter 1 +environ 1 +integr 1 +witha 1 +subset 1 +common 1 +lisp 1 +offer 1 +technic 1 +essenti 1 +newapproach 1 +teach 1 +fundamentalattract 1 +approach 1 +thecomput 1 +pursuit 1 +creat 1 +neat 1 +visual 1 +effect 1 +digitalimag 1 +portrai 1 +thing 1 +interest 1 +successfulli 1 +instal 1 +would 1 +like 1 +discussteach 1 +contact 1 +support 1 +nation 1 +foundat 1 +undergr 1 +bricker 1 +last 1 +modifi 1 +tuesdai 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..d55caaf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,53 @@ +mobil 5 +comput 4 +univers 2 +washington 2 +system 2 +infrastructur 2 +paper 2 +research 1 +ubiquit 1 +washingtonher 1 +overview 1 +computingresearch 1 +project 1 +mobisa 1 +inform 1 +wireless 1 +environ 1 +handheld 1 +task 1 +graph 1 +manag 1 +applic 1 +cope 1 +resourc 1 +variabl 1 +survei 1 +describ 1 +fundament 1 +challeng 1 +field 1 +program 1 +methodolog 1 +disconnect 1 +operationdistribut 1 +transact 1 +mobilecomput 1 +systemcontact 1 +prof 1 +brian 1 +bershadprof 1 +gaetano 1 +borriellomarc 1 +fiuczynskigeorg 1 +formanprof 1 +hank 1 +levygeoff 1 +voelkerterri 1 +watsonprof 1 +john 1 +zahorjan 1 +last 1 +updat 1 +forman 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..59bbc2e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,138 @@ +cecil 10 +project 10 +vortex 6 +languag 6 +object 5 +orient 5 +research 3 +implement 3 +support 3 +static 3 +like 3 +class 3 +list 3 +avail 3 +page 2 +high 2 +type 2 +system 2 +optim 2 +level 2 +current 2 +profil 2 +intraprocedur 2 +messag 2 +analys 2 +also 2 +elimin 2 +written 2 +releas 2 +suno 2 +subscrib 2 +inform 2 +overview 2 +member 2 +paper 2 +sampl 2 +peopl 2 +intern 2 +document 2 +projectuw 1 +projectwelcom 1 +home 1 +conduct 1 +program 1 +design 1 +emphasi 1 +issu 1 +pure 1 +intend 1 +rapidconstruct 1 +qualiti 1 +extens 1 +softwar 1 +incorporatesmulti 1 +method 1 +simpl 1 +prototyp 1 +base 1 +model 1 +mechan 1 +tosupport 1 +structur 1 +form 1 +comput 1 +inherit 1 +modul 1 +basedencapsul 1 +flexibl 1 +allowsstat 1 +dynam 1 +code 1 +freeli 1 +compil 1 +infrastructur 1 +forobject 1 +target 1 +pureobject 1 +hybrid 1 +orientedlanguag 1 +modula 1 +java 1 +incorporateshigh 1 +analysi 1 +hierachyanalysi 1 +guid 1 +receiv 1 +predict 1 +guidedselect 1 +procedur 1 +special 1 +split 1 +automat 1 +inlin 1 +closur 1 +includ 1 +acollect 1 +standard 1 +commonsubexpress 1 +dead 1 +assign 1 +vortexcompil 1 +entir 1 +initi 1 +beta 1 +currentlyavail 1 +sparc 1 +run 1 +either 1 +solari 1 +send 1 +mail 1 +interest 1 +request 1 +bodi 1 +ofinterest 1 +parti 1 +obtain 1 +thebeta 1 +recent 1 +finish 1 +technic 1 +report 1 +describ 1 +much 1 +detail 1 +goal 1 +direct 1 +postscript 1 +version 1 +past 1 +uwcs 1 +sourc 1 +relat 1 +pointer 1 +projectslast 1 +updat 1 +august 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..4da3d89e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,34 @@ +grail 3 +imag 3 +washington 3 +graphic 2 +depart 2 +comput 2 +scienc 2 +engin 2 +laboratorywelcom 1 +home 1 +page 1 +laboratori 1 +theunivers 1 +inform 1 +peopl 1 +cours 1 +research 1 +project 1 +public 1 +these 1 +softwar 1 +data 1 +cool 1 +neighborhood 1 +univers 1 +seattl 1 +local 1 +interest 1 +disk 1 +usag 1 +polici 1 +comment 1 +mtwong 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..826a91c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,71 @@ +rout 8 +chao 5 +research 5 +simul 5 +chaotic 4 +project 4 +washington 4 +router 3 +algorithm 3 +result 3 +pcrcw 3 +comput 2 +univers 2 +seattl 2 +technic 2 +report 2 +avail 2 +chip 2 +micron 2 +network 2 +present 2 +group 2 +scienc 1 +engin 1 +depart 1 +friend 1 +mine 1 +dylan 1 +peopleal 1 +sort 1 +peopl 1 +work 1 +paper 1 +repositori 1 +papersand 1 +chaoticrout 1 +implement 1 +hardwar 1 +built 1 +test 1 +cmo 1 +redesign 1 +process 1 +better 1 +perform 1 +power 1 +allsort 1 +includ 1 +nice 1 +graphicalfront 1 +standard 1 +discuss 1 +presentationof 1 +abl 1 +come 1 +upwith 1 +guidelin 1 +build 1 +list 1 +thathav 1 +web 1 +describ 1 +interconnect 1 +parallel 1 +commun 1 +workshop 1 +held 1 +univeristi 1 +proceed 1 +home 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..c7816f47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,201 @@ +chinook 8 +synthesi 6 +design 6 +softwar 5 +hardwar 4 +time 4 +embed 3 +system 3 +architectur 3 +rather 3 +gener 3 +processor 3 +version 3 +includ 3 +scienc 3 +fellowship 3 +project 2 +salmon 2 +warm 2 +wind 2 +rocki 2 +mountain 2 +real 2 +constraint 2 +map 2 +detail 2 +enabl 2 +make 2 +level 2 +work 2 +retarget 2 +activ 2 +simul 2 +partit 2 +june 2 +connect 2 +code 2 +compon 2 +chou 2 +nation 2 +graduat 2 +shinook 1 +larg 1 +oncorhynchu 1 +tshawytscha 1 +pacif 1 +amer 1 +name 1 +tribe 1 +blow 1 +east 1 +southerli 1 +west 1 +rare 1 +american 1 +sled 1 +doga 1 +toolfor 1 +cadtool 1 +control 1 +domin 1 +reactivesystem 1 +behavior 1 +descriptionto 1 +user 1 +target 1 +fill 1 +neededto 1 +build 1 +complet 1 +inform 1 +designdecis 1 +high 1 +earli 1 +cycl 1 +reiterateaft 1 +willnot 1 +maintain 1 +also 1 +designerto 1 +take 1 +advantag 1 +technolog 1 +instead 1 +ti 1 +legacycod 1 +current 1 +develop 1 +currentlyw 1 +interprocessorcommun 1 +effici 1 +accur 1 +moredetail 1 +becom 1 +avail 1 +shortli 1 +character 1 +follow 1 +meet 1 +ratherthan 1 +try 1 +maxim 1 +averag 1 +perform 1 +util 1 +assumesmanu 1 +believ 1 +issu 1 +intricateand 1 +sometim 1 +even 1 +technic 1 +want 1 +evalu 1 +differentarchitectur 1 +forc 1 +singl 1 +fix 1 +asicarchitectur 1 +synthes 1 +reli 1 +onoff 1 +shelf 1 +kernel 1 +discourag 1 +first 1 +tool 1 +oper 1 +innovemb 1 +shownat 1 +autom 1 +confer 1 +diego 1 +mainfeatur 1 +automat 1 +peripheraldevic 1 +sequenti 1 +concurr 1 +descript 1 +andsynthes 1 +devic 1 +driver 1 +input 1 +verilog 1 +output 1 +hardwarenetlist 1 +need 1 +togeth 1 +softwareprogram 1 +main 1 +topic 1 +interfacingproblem 1 +schedul 1 +timingconstraint 1 +function 1 +improv 1 +demonstr 1 +nato 1 +summer 1 +school 1 +swcodedesign 1 +tremezzo 1 +itali 1 +incorpor 1 +severalmor 1 +interfac 1 +techniqu 1 +memori 1 +moreeffici 1 +chinookersfacultygaetano 1 +borriellogradu 1 +student 1 +ross 1 +ortegaken 1 +hinesian 1 +macduff 1 +recent 1 +selizabeth 1 +walkupscott 1 +hauck 1 +henrik 1 +hulgaardstafflarri 1 +mcmurchielist 1 +paperschinook 1 +sponsorsarpa 1 +contract 1 +foundat 1 +grant 1 +walkup 1 +patricia 1 +robert 1 +harri 1 +ortega 1 +link 1 +depart 1 +comput 1 +engin 1 +universityof 1 +washington 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..fcebee6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,135 @@ +emerald 8 +fpga 6 +architectur 6 +tool 5 +driven 3 +rout 3 +work 3 +copyright 3 +mcmurchi 3 +develop 2 +map 2 +design 2 +time 2 +need 2 +system 2 +provid 2 +logic 2 +placement 2 +author 2 +darren 2 +cronquist 2 +larri 2 +proceed 2 +intern 2 +symposium 2 +field 2 +programm 2 +gate 2 +februari 2 +router 2 +carl 2 +ebel 2 +projectid 1 +phase 1 +would 1 +makeus 1 +reliabl 1 +produc 1 +accur 1 +performanceevalu 1 +propos 1 +unfortun 1 +given 1 +quickproduct 1 +frame 1 +face 1 +construct 1 +isoften 1 +postpon 1 +mani 1 +featur 1 +beenfrozen 1 +satisfi 1 +fast 1 +prototyp 1 +havedesign 1 +power 1 +quickdevelop 1 +heart 1 +basicfeatur 1 +block 1 +analysi 1 +synthesisand 1 +technolog 1 +global 1 +partit 1 +anddetail 1 +environ 1 +aneffici 1 +thoroughli 1 +specifi 1 +blockarchitectur 1 +well 1 +specif 1 +metric 1 +tailorplac 1 +moreov 1 +parameter 1 +schematicspecif 1 +allow 1 +variat 1 +quickli 1 +capturedand 1 +evalu 1 +public 1 +document 1 +contain 1 +page 1 +includ 1 +byth 1 +contribut 1 +mean 1 +ensur 1 +dissemin 1 +ofscholarli 1 +technic 1 +commerci 1 +basi 1 +andal 1 +right 1 +therein 1 +maintain 1 +copyrighthold 1 +notwithstand 1 +offer 1 +hereelectron 1 +understood 1 +person 1 +copi 1 +thisinform 1 +adher 1 +term 1 +constraint 1 +invok 1 +eachauthor 1 +repost 1 +without 1 +theexplicit 1 +permiss 1 +holder 1 +definit 1 +paper 1 +compil 1 +appear 1 +sigda 1 +fourth 1 +arrai 1 +us 1 +emeraldlarri 1 +pathfind 1 +negoti 1 +basedperform 1 +third 1 +arraysaid 1 +research 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..26982705 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,133 @@ +system 9 +circuit 9 +fpga 8 +architectur 7 +asynchron 5 +integr 4 +design 4 +research 4 +embed 4 +project 4 +improv 4 +perform 4 +northwest 3 +laboratori 3 +comput 3 +develop 3 +current 3 +specif 3 +rapid 3 +level 3 +depart 2 +scienc 2 +engin 2 +univers 2 +washington 2 +vlsi 2 +synthesi 2 +well 2 +montag 2 +multi 2 +prototyp 2 +rout 2 +gener 2 +clock 2 +retim 2 +cost 2 +tune 2 +seattl 1 +usath 1 +engag 1 +larg 1 +scale 1 +aid 1 +educ 1 +sinc 1 +late 1 +todai 1 +focu 1 +wide 1 +varieti 1 +sytem 1 +verificationtim 1 +separ 1 +event 1 +verif 1 +time 1 +survei 1 +methodolog 1 +first 1 +prototypingtriptych 1 +triptych 1 +densiti 1 +commerci 1 +springbok 1 +board 1 +partit 1 +assign 1 +topolog 1 +work 1 +emerald 1 +adapt 1 +toolset 1 +complet 1 +map 1 +placement 1 +toolscan 1 +automat 1 +descript 1 +fpgaarchitectur 1 +metric 1 +incorporatedinto 1 +variou 1 +tool 1 +result 1 +systemsth 1 +chinook 1 +hardwar 1 +softwar 1 +simul 1 +applic 1 +optim 1 +synchron 1 +circuitsretim 1 +effici 1 +algorithm 1 +uselevel 1 +sensit 1 +latch 1 +reduc 1 +andincreas 1 +toler 1 +skew 1 +method 1 +synchronouscircuit 1 +latenc 1 +feedback 1 +contraint 1 +network 1 +routerth 1 +chaoticrout 1 +self 1 +systemsself 1 +direct 1 +kehlprevi 1 +gemini 1 +valid 1 +layout 1 +compar 1 +implement 1 +mactest 1 +digit 1 +function 1 +tester 1 +chip 1 +cmo 1 +voltag 1 +arpa 1 +reportsarpa 1 +bluebook 1 +paragraph 1 +overview 1 +accomplish 1 +high 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..85d49e87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,61 @@ +safeti 9 +system 7 +softwar 4 +univers 3 +washington 3 +techniqu 3 +comput 3 +foundat 3 +methodolog 3 +critic 2 +us 2 +engin 2 +analysi 2 +plai 1 +increasingli 1 +import 1 +role 1 +nuclear 1 +reactor 1 +aircraft 1 +defenc 1 +space 1 +chemic 1 +plant 1 +medic 1 +equip 1 +consequ 1 +malfunct 1 +must 1 +pass 1 +rigor 1 +test 1 +review 1 +although 1 +exist 1 +decad 1 +appli 1 +contain 1 +digit 1 +goal 1 +project 1 +develop 1 +theoret 1 +build 1 +built 1 +upon 1 +safewar 1 +nanci 1 +leveson 1 +summar 1 +issu 1 +involv 1 +lai 1 +work 1 +support 1 +prototyp 1 +tool 1 +valid 1 +specif 1 +scienc 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..8f3ea19c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,169 @@ +softbot 16 +internet 6 +user 4 +softwar 3 +system 3 +plan 3 +goal 3 +interfac 3 +etzioni 3 +inform 3 +washington 3 +found 3 +control 3 +build 2 +agent 2 +world 2 +research 2 +develop 2 +robot 2 +intellig 2 +high 2 +level 2 +enabl 2 +wide 2 +resourc 2 +oren 2 +access 2 +project 2 +base 2 +magazin 2 +decemb 2 +graphic 2 +keith 2 +golden 2 +search 2 +reactiv 2 +kwok 2 +back 2 +home 2 +page 2 +softbotinternet 1 +softbotth 1 +finalist 1 +discoveraward 1 +technolog 1 +innov 1 +comput 1 +autonom 1 +interact 1 +real 1 +softwareenviron 1 +oper 1 +databas 1 +pragmaticallyconveni 1 +intellectu 1 +challeng 1 +substrat 1 +support 1 +claim 1 +util 1 +machin 1 +learningtechniqu 1 +acustomiz 1 +moder 1 +assist 1 +internetaccess 1 +accept 1 +languag 1 +generatesand 1 +execut 1 +achiev 1 +learn 1 +itsexperi 1 +human 1 +state 1 +want 1 +accomplish 1 +disambigu 1 +requestand 1 +dynam 1 +determin 1 +satisfyit 1 +us 1 +unix 1 +shell 1 +interactwith 1 +rang 1 +take 1 +tour 1 +sgraphic 1 +princip 1 +investig 1 +daniel 1 +weld 1 +also 1 +check 1 +metacrawl 1 +field 1 +servic 1 +tosearch 1 +multipl 1 +indic 1 +parallel 1 +provid 1 +sophisticatedprun 1 +option 1 +contact 1 +introduct 1 +cacm 1 +juli 1 +methodolog 1 +motiv 1 +without 1 +repli 1 +brook 1 +technic 1 +paper 1 +cartoonrepresent 1 +taken 1 +blanchard 1 +articl 1 +appear 1 +issu 1 +ofcolumn 1 +univers 1 +alumni 1 +group 1 +current 1 +toth 1 +allow 1 +easili 1 +specifi 1 +extend 1 +maintain 1 +xiiplann 1 +work 1 +specif 1 +implement 1 +advanc 1 +space 1 +browser 1 +debug 1 +planner 1 +dave 1 +christianson 1 +compar 1 +rule 1 +versu 1 +procedur 1 +sujai 1 +parekh 1 +ilalearn 1 +design 1 +protocol 1 +multi 1 +collabor 1 +negoti 1 +ying 1 +experi 1 +domain 1 +goan 1 +optim 1 +ingram 1 +gather 1 +hacker 1 +info 1 +local 1 +mike 1 +perkowitz 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..524d828e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,213 @@ +system 17 +extens 15 +spin 10 +oper 10 +kernel 10 +project 10 +code 8 +dynam 7 +applic 7 +paper 6 +languag 5 +modula 5 +microsecond 5 +take 5 +perform 5 +describ 5 +interfac 4 +us 4 +implement 4 +page 4 +run 3 +servic 3 +gener 3 +network 3 +support 3 +time 3 +report 3 +appear 3 +washington 2 +control 2 +integr 2 +allow 2 +load 2 +basic 2 +call 2 +data 2 +collect 2 +address 2 +thread 2 +alpha 2 +unix 2 +program 2 +result 2 +creat 2 +termin 2 +benchmark 2 +execut 2 +design 2 +shortcom 2 +safe 2 +intern 2 +inform 2 +compil 2 +member 2 +sai 2 +peopl 2 +credit 2 +bershad 2 +systemspin 1 +thatsupport 1 +adapt 1 +andimplement 1 +direct 1 +stillmaintain 1 +inter 1 +isol 1 +atruntim 1 +accesshardwar 1 +almost 1 +nooverhead 1 +procedur 1 +pass 1 +byrefer 1 +rather 1 +copi 1 +providesa 1 +core 1 +capabl 1 +resourc 1 +thesear 1 +systemservic 1 +space 1 +allextens 1 +written 1 +typesaf 1 +properti 1 +oftypesafeti 1 +prevent 1 +crash 1 +attemptingto 1 +manipul 1 +arbitrari 1 +piec 1 +workstat 1 +writeboth 1 +special 1 +video 1 +wella 1 +purpos 1 +connect 1 +machinerun 1 +server 1 +quick 1 +structur 1 +extend 1 +withlow 1 +overhead 1 +exampl 1 +handl 1 +recov 1 +fault 1 +executeit 1 +synchron 1 +fork 1 +join 1 +protectedprocedur 1 +anoth 1 +function 1 +cross 1 +machin 1 +overethernet 1 +realli 1 +oldadapt 1 +less 1 +operationsund 1 +mach 1 +longer 1 +samehardwar 1 +saveyourself 1 +effort 1 +recent 1 +bind 1 +invoc 1 +mechan 1 +provid 1 +flexibl 1 +effici 1 +andsimpl 1 +osdi 1 +safeti 1 +sosp 1 +protocol 1 +architectur 1 +forappl 1 +specif 1 +usenix 1 +winter 1 +confer 1 +write 1 +experi 1 +build 1 +high 1 +make 1 +clear 1 +distinct 1 +pretti 1 +happi 1 +deal 1 +order 1 +link 1 +linker 1 +point 1 +abil 1 +manag 1 +linkabl 1 +namespac 1 +runtim 1 +interposit 1 +commun 1 +facil 1 +show 1 +improv 1 +critic 1 +wait 1 +trail 1 +talk 1 +interest 1 +bottom 1 +line 1 +arpa 1 +overview 1 +summari 1 +regular 1 +friend 1 +gotten 1 +assist 1 +academia 1 +industri 1 +involv 1 +relat 1 +pointer 1 +barb 1 +arrow 1 +document 1 +latest 1 +statu 1 +avail 1 +could 1 +qualif 1 +master 1 +degre 1 +fund 1 +raship 1 +posit 1 +undergradu 1 +mascot 1 +encourag 1 +mani 1 +decid 1 +adopt 1 +ourmascot 1 +maintain 1 +brian 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..a401d6d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,117 @@ +compil 14 +dynam 12 +time 5 +code 5 +constant 4 +optim 3 +valu 3 +us 3 +project 2 +paper 2 +comput 2 +howev 2 +produc 2 +machin 2 +gener 2 +program 2 +system 2 +projectth 1 +projectmor 1 +inform 1 +member 1 +relat 1 +projectsuw 1 +section 1 +student 1 +group 1 +webdynam 1 +enabl 1 +base 1 +ofinvari 1 +data 1 +theserun 1 +elimin 1 +memoryload 1 +perform 1 +propag 1 +fold 1 +remov 1 +branch 1 +theydetermin 1 +fulli 1 +unrol 1 +loop 1 +bound 1 +performancebenefit 1 +effici 1 +offsetbi 1 +cost 1 +approach 1 +dynamiccompil 1 +strive 1 +fast 1 +high 1 +qualitydynam 1 +programm 1 +annot 1 +region 1 +theprogram 1 +static 1 +optimizingcompil 1 +automat 1 +templat 1 +pair 1 +dataflow 1 +analys 1 +identifi 1 +variabl 1 +willb 1 +simpl 1 +copi 1 +thetempl 1 +patch 1 +execut 1 +work 1 +target 1 +purpos 1 +imper 1 +languag 1 +initi 1 +initialexperi 1 +appli 1 +producedspeedup 1 +rang 1 +part 1 +spinproject 1 +eventu 1 +dynamicallycompil 1 +spin 1 +kernel 1 +exampl 1 +spinev 1 +dispatch 1 +also 1 +activ 1 +explor 1 +otherposs 1 +applic 1 +invirtu 1 +interpret 1 +prototyp 1 +systemi 1 +describ 1 +pldi 1 +arenow 1 +start 1 +design 1 +build 1 +second 1 +wewil 1 +releas 1 +detail 1 +soon 1 +last 1 +updat 1 +august 1 +grant 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..efa6ac54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,4 @@ +move 2 +permanentlymov 1 +permanentlyth 1 +document 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..42719638 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,93 @@ +program 11 +project 5 +comput 4 +washington 3 +languag 3 +direct 3 +compil 3 +engin 2 +info 2 +arrai 2 +base 2 +written 2 +without 2 +machin 2 +level 2 +concept 2 +border 2 +easili 2 +scientif 2 +walk 2 +overview 2 +paper 2 +depart 1 +scienc 1 +univers 1 +seattl 1 +eduzpl 1 +suitabl 1 +would 1 +previous 1 +fortran 1 +fast 1 +sequenti 1 +parallel 1 +modif 1 +special 1 +independ 1 +recompil 1 +necessari 1 +higher 1 +like 1 +elimin 1 +error 1 +prone 1 +index 1 +tediou 1 +loop 1 +typic 1 +shorter 1 +understood 1 +modifi 1 +scientist 1 +find 1 +region 1 +shatter 1 +control 1 +flow 1 +conclus 1 +ideal 1 +small 1 +write 1 +yourmachin 1 +programm 1 +area 1 +shouldconsid 1 +enrol 1 +zpthi 1 +autumn 1 +check 1 +recent 1 +chang 1 +high 1 +minut 1 +introduct 1 +browser 1 +right 1 +manual 1 +relat 1 +detail 1 +line 1 +inform 1 +sampl 1 +peopl 1 +member 1 +horizon 1 +descript 1 +group 1 +futur 1 +acknowledg 1 +list 1 +help 1 +support 1 +work 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..673b8f04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,149 @@ +multithread 11 +processor 10 +simultan 9 +instruct 7 +parallel 6 +issu 5 +cycl 5 +egger 5 +architectur 4 +level 4 +postscript 4 +levi 4 +tullsen 4 +multipl 3 +superscalar 3 +singl 3 +thread 3 +compil 3 +student 3 +emer 3 +stamm 3 +abstract 3 +proceed 3 +problem 2 +face 2 +high 2 +maintain 2 +util 2 +latenc 2 +modern 2 +techniqu 2 +avail 2 +resourc 2 +enabl 2 +perform 2 +research 2 +jack 2 +public 2 +andd 2 +annual 2 +intern 2 +symposium 2 +comput 2 +page 2 +home 1 +pagesimultan 1 +projectoverviewpeoplepubl 1 +overviewth 1 +crucial 1 +todai 1 +speed 1 +microprocessor 1 +long 1 +memori 1 +allevi 1 +interleav 1 +execut 1 +differentthread 1 +differ 1 +ultim 1 +though 1 +limit 1 +amount 1 +within 1 +permit 1 +independ 1 +function 1 +unit 1 +combin 1 +issuefeatur 1 +wide 1 +hide 1 +abilityof 1 +hardwar 1 +contextsar 1 +activ 1 +compet 1 +dynam 1 +share 1 +exploitthread 1 +interchang 1 +formsof 1 +effect 1 +us 1 +increas 1 +studi 1 +havedemonstr 1 +significantli 1 +improvesprocessor 1 +throughput 1 +multiprogram 1 +parallelworkload 1 +shown 1 +gain 1 +achievedin 1 +minim 1 +extens 1 +ordersuperscalar 1 +current 1 +futur 1 +work 1 +includ 1 +investig 1 +fast 1 +synchronizationtechniqu 1 +also 1 +conduct 1 +otherarchitectur 1 +peoplefaculti 1 +susan 1 +hank 1 +levygradu 1 +dean 1 +tullsenindustri 1 +collabor 1 +digit 1 +equip 1 +corpor 1 +joel 1 +rebecca 1 +convert 1 +submit 1 +juli 1 +exploit 1 +choic 1 +fetch 1 +implement 1 +philadelphia 1 +first 1 +suif 1 +workshop 1 +stanford 1 +januari 1 +maxim 1 +chip 1 +andh 1 +santa 1 +margherita 1 +ligur 1 +itali 1 +june 1 +check 1 +list 1 +project 1 +still 1 +doon 1 +affair 1 +lojlo 1 +washington 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..665f3d23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,73 @@ +scienc 8 +comput 7 +depart 5 +award 5 +faculti 3 +three 2 +inform 2 +report 2 +guidebook 2 +statist 2 +wisc 2 +home 1 +pagecomput 1 +departmentabout 1 +departmentour 1 +form 1 +consist 1 +rank 1 +countri 1 +member 1 +receiv 1 +fourteen 1 +presidenti 1 +young 1 +investig 1 +packard 1 +fellowship 1 +women 1 +scientist 1 +engin 1 +incent 1 +excel 1 +doctor 1 +dissert 1 +develop 1 +area 1 +research 1 +project 1 +peopl 1 +cours 1 +offer 1 +fall 1 +class 1 +futur 1 +timet 1 +technic 1 +system 1 +answer 1 +frequent 1 +ask 1 +question 1 +alumni 1 +graduat 1 +undergradu 1 +annual 1 +onlin 1 +util 1 +madison 1 +local 1 +servic 1 +relat 1 +organ 1 +colophon 1 +server 1 +us 1 +infocomput 1 +departmentunivers 1 +wisconsin 1 +madisona 1 +west 1 +dayton 1 +streetmadison 1 +voic 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..8ef6bacb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,114 @@ +task 8 +control 6 +observ 6 +point 5 +object 4 +explor 4 +behavior 4 +viewpoint 3 +scene 3 +provabl 3 +local 3 +achiev 3 +smoothli 3 +global 3 +surfac 3 +purpos 2 +recoveri 2 +simpl 2 +navig 2 +abil 2 +correct 2 +base 2 +consid 2 +approach 2 +show 2 +reconstruct 2 +kyro 1 +kutulako 1 +chuck 1 +dyer 1 +consider 1 +interest 1 +recent 1 +emploi 1 +simpleobserv 1 +either 1 +make 1 +propertieseasi 1 +fixat 1 +combin 1 +order 1 +toperform 1 +complex 1 +obstacl 1 +avoid 1 +ourwork 1 +focus 1 +activ 1 +pointof 1 +perform 1 +involv 1 +develop 1 +makesimpl 1 +motion 1 +decis 1 +geometryof 1 +requir 1 +minim 1 +process 1 +imag 1 +first 1 +recov 1 +shape 1 +thesurfac 1 +select 1 +generalobserv 1 +posit 1 +provid 1 +inform 1 +objectthan 1 +other 1 +exist 1 +special 1 +beexploit 1 +mobil 1 +effici 1 +anddeterminist 1 +strategi 1 +reach 1 +localshap 1 +us 1 +qualitativestrategi 1 +viewingdirect 1 +align 1 +princip 1 +direct 1 +selectedpoint 1 +second 1 +deriv 1 +descriptionof 1 +formul 1 +thequalit 1 +observationso 1 +visibl 1 +slide 1 +maxim 1 +connect 1 +region 1 +arbitrari 1 +smooth 1 +attempt 1 +maintain 1 +well 1 +defin 1 +geometr 1 +relationship 1 +observationand 1 +view 1 +suggest 1 +lead 1 +also 1 +simplifi 1 +frame 1 +comput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..f9b2d644 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,99 @@ +contour 9 +model 8 +deform 5 +extract 5 +detect 5 +classif 3 +formul 3 +integr 2 +noisi 2 +imag 2 +case 2 +regular 2 +snake 2 +deriv 2 +energi 2 +yield 2 +consid 2 +arbitrari 2 +turn 2 +fung 1 +roland 1 +chin 1 +develop 1 +approach 1 +classifi 1 +directli 1 +conduct 1 +studi 1 +initi 1 +ofact 1 +us 1 +minimax 1 +principl 1 +criterion 1 +wherebi 1 +valu 1 +automat 1 +implicitli 1 +determin 1 +along 1 +furthermor 1 +function 1 +contain 1 +hough 1 +transform 1 +special 1 +subsequ 1 +problem 1 +combin 1 +stabl 1 +invari 1 +anduniqu 1 +markov 1 +random 1 +field 1 +priordistribut 1 +exert 1 +influenc 1 +global 1 +allow 1 +bayesian 1 +framework 1 +posterior 1 +estim 1 +equival 1 +minim 1 +gener 1 +activ 1 +final 1 +lower 1 +level 1 +visual 1 +task 1 +withpattern 1 +recognit 1 +process 1 +base 1 +nearman 1 +pearson 1 +lemma 1 +optim 1 +classificationtest 1 +summat 1 +peak 1 +practic 1 +applic 1 +small 1 +region 1 +need 1 +margin 1 +distribut 1 +valid 1 +confirm 1 +extens 1 +rigor 1 +experiment 1 +gsnake 1 +softwar 1 +avail 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..92468893 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,198 @@ +data 30 +object 22 +displai 21 +lattic 13 +defin 13 +visual 9 +base 9 +function 6 +model 6 +system 6 +program 6 +scalar 6 +languag 5 +time 5 +type 5 +call 4 +mathemat 4 +contain 4 +comput 4 +approxim 4 +finit 4 +particular 4 +expressivenesscondit 4 +temperatur 4 +primit 4 +implement 3 +develop 3 +precis 3 +thu 3 +purpos 3 +map 3 +condit 3 +order 3 +relat 3 +arrai 3 +voxel 3 +us 3 +foundat 2 +process 2 +ofdata 2 +scientist 2 +infinit 2 +number 2 +amount 2 +inform 2 +pixel 2 +color 2 +ideal 2 +appropri 2 +fact 2 +show 2 +satisfi 2 +scientif 2 +specifi 2 +class 2 +variabl 2 +graphic 2 +design 2 +along 2 +control 2 +abstract 2 +bill 1 +hibbard 1 +brian 1 +paul 1 +chuck 1 +dyer 1 +theidea 1 +calleda 1 +prototyp 1 +adha 1 +upon 1 +idea 1 +guid 1 +follow 1 +principl 1 +natur 1 +objectsrepres 1 +objectsfrequ 1 +real 1 +functionswith 1 +domain 1 +wherea 1 +containfinit 1 +themathemat 1 +repres 1 +chosenfrom 1 +palett 1 +anim 1 +sequenc 1 +numbersof 1 +frame 1 +close 1 +relationship 1 +computationalmodel 1 +commun 1 +informationcont 1 +thatdisplai 1 +encod 1 +onlythos 1 +complet 1 +sens 1 +itimpl 1 +satisfyingth 1 +expressivenss 1 +onhow 1 +wecan 1 +howprecis 1 +voxelresolut 1 +therefor 1 +visualizationprocess 1 +objectsto 1 +interpret 1 +context 1 +theexpress 1 +isomorph 1 +scientificdata 1 +built 1 +primitivevari 1 +latitud 1 +radianc 1 +tupl 1 +constructor 1 +appropriatefor 1 +seri 1 +containsth 1 +canalso 1 +displayi 1 +graphicsprimit 1 +locationand 1 +size 1 +volum 1 +place 1 +animationsequ 1 +fromth 1 +thedisplai 1 +ofcours 1 +alreadi 1 +assum 1 +exampl 1 +given 1 +isnatur 1 +graph 1 +axi 1 +andtemperatur 1 +anoth 1 +remark 1 +thing 1 +wedo 1 +take 1 +assumpt 1 +consequ 1 +fundament 1 +calledvi 1 +adthat 1 +allow 1 +experi 1 +algorithm 1 +steer 1 +theircomput 1 +creat 1 +theirprogram 1 +thevi 1 +vvof 1 +thatsatisfi 1 +express 1 +howev 1 +implementationi 1 +quit 1 +flow 1 +auser 1 +interfac 1 +abstractionof 1 +render 1 +pipelin 1 +user 1 +interfacefor 1 +ofmap 1 +possibl 1 +recurs 1 +defineddata 1 +complex 1 +link 1 +tree 1 +ingener 1 +datatyp 1 +orient 1 +provid 1 +rigor 1 +help 1 +analyt 1 +altern 1 +usualapproach 1 +construct 1 +bywrit 1 +special 1 +fora 1 +specif 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..da903ea1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,52 @@ +condor 7 +comput 5 +project 3 +throughput 3 +high 2 +page 2 +homepag 1 +object 1 +goal 1 +develop 1 +implement 1 +deploi 1 +evaluatemechan 1 +polici 1 +support 1 +larg 1 +collect 1 +distribut 1 +own 1 +resourc 1 +guid 1 +technologicaland 1 +sociolog 1 +challeng 1 +environ 1 +team 1 +build 1 +softwar 1 +tool 1 +enabl 1 +scientist 1 +engin 1 +increas 1 +introduct 1 +start 1 +research 1 +system 1 +pool 1 +univers 1 +wisconsin 1 +madison 1 +help 1 +home 1 +world 1 +mail 1 +list 1 +comment 1 +suggestionscondor 1 +admin 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..87690599 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,9 @@ +next 3 +homepag 1 +peopl 1 +project 1 +last 1 +modifi 1 +septemb 1 +miron 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..ced5785f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,170 @@ +coral 21 +includ 7 +binari 7 +releas 6 +also 5 +system 4 +support 4 +declar 4 +provid 4 +inform 4 +databas 3 +program 3 +version 3 +instal 3 +relat 2 +project 2 +develop 2 +group 2 +languag 2 +interfac 2 +combin 2 +imper 2 +queri 2 +contain 2 +modul 2 +wide 2 +rang 2 +evalu 2 +strategi 2 +user 2 +disk 2 +resid 2 +data 2 +octob 2 +grab 2 +file 2 +nobin 2 +solari 2 +linux 2 +mail 2 +announc 2 +wisc 2 +projectcor 1 +projectdocu 1 +content 1 +objectiveoverviewreleas 1 +informationse 1 +public 1 +coralpeopl 1 +work 1 +coraloth 1 +research 1 +madisonobject 1 +object 1 +robust 1 +efficientdeduct 1 +investig 1 +variou 1 +applic 1 +domain 1 +sever 1 +algorithm 1 +underli 1 +coralsystem 1 +member 1 +durationof 1 +sinc 1 +overview 1 +deduct 1 +rich 1 +allow 1 +declaritiveand 1 +supportsgener 1 +horn 1 +claus 1 +augment 1 +complex 1 +term 1 +aggreg 1 +negat 1 +tupl 1 +univers 1 +quantifi 1 +variabl 1 +canb 1 +organ 1 +collect 1 +interact 1 +coralimplement 1 +andautomat 1 +choos 1 +effici 1 +modulein 1 +addit 1 +permit 1 +guid 1 +optim 1 +desir 1 +select 1 +among 1 +control 1 +choic 1 +atth 1 +level 1 +construct 1 +updat 1 +insertand 1 +delet 1 +rule 1 +canprogram 1 +extend 1 +withcor 1 +primit 1 +high 1 +degre 1 +extens 1 +allowingc 1 +programm 1 +class 1 +structur 1 +enhanc 1 +coralimplemen 1 +main 1 +memori 1 +us 1 +theexodusstorag 1 +manang 1 +transact 1 +manag 1 +aclient 1 +server 1 +environ 1 +current 1 +want 1 +sourc 1 +code 1 +requiringy 1 +compil 1 +made 1 +forth 1 +indic 1 +machin 1 +type 1 +click 1 +readm 1 +gener 1 +manual 1 +instruct 1 +hpux 1 +seri 1 +suno 1 +stai 1 +announcemnt 1 +listwhich 1 +reciev 1 +relev 1 +newsgroup 1 +comp 1 +lang 1 +misc 1 +submit 1 +question 1 +comment 1 +report 1 +send 1 +edulast 1 +modifi 1 +shawn 1 +flisakowski 1 +flisakow 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..34bb1b49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,232 @@ +complementar 15 +problem 15 +gam 10 +applic 8 +algorithm 7 +solver 7 +newton 7 +system 7 +path 6 +point 6 +us 6 +research 5 +smooth 5 +nonsmooth 5 +page 4 +list 4 +link 4 +mile 4 +approxim 4 +equat 4 +paper 4 +econom 3 +sever 3 +inform 3 +interest 3 +also 3 +access 3 +file 3 +toolbox 3 +give 3 +model 3 +ferri 3 +implement 3 +well 2 +document 2 +theori 2 +softwar 2 +given 2 +directori 2 +relev 2 +mcplib 2 +matlab 2 +avail 2 +consist 2 +functionevalu 2 +detail 2 +help 2 +languag 2 +wisc 2 +method 2 +solut 2 +comput 2 +direct 2 +function 2 +emploi 2 +cannot 2 +linear 2 +subproblem 2 +appli 2 +sequenc 2 +iter 2 +step 2 +construct 2 +search 2 +strategi 2 +base 2 +uponreformul 2 +nation 2 +look 2 +michael 2 +interior 2 +neta 1 +result 1 +three 1 +decad 1 +subject 1 +divers 1 +engin 1 +scienc 1 +becom 1 +establish 1 +fruitfuldisciplin 1 +within 1 +mathemat 1 +program 1 +monograph 1 +survei 1 +basic 1 +role 1 +optim 1 +serv 1 +center 1 +regard 1 +incomplementar 1 +meetingsof 1 +commun 1 +pointer 1 +forcomplementar 1 +area 1 +tabl 1 +content 1 +researcherssoftwar 1 +collect 1 +nonlinear 1 +mix 1 +problemdescript 1 +sourc 1 +forthes 1 +evolv 1 +freeli 1 +andm 1 +allow 1 +frommatlab 1 +without 1 +spars 1 +jacobian 1 +evalu 1 +machin 1 +specificvers 1 +download 1 +interfac 1 +describ 1 +librari 1 +routin 1 +areavail 1 +hook 1 +contact 1 +steve 1 +rutherford 1 +colorado 1 +edufor 1 +extens 1 +classicaljosephi 1 +linearizedsubproblem 1 +lemk 1 +almost 1 +complementari 1 +pivot 1 +defineth 1 +dampedlinesearch 1 +merit 1 +measur 1 +violat 1 +infeas 1 +restartprocedur 1 +case 1 +totermin 1 +secondari 1 +everi 1 +rescal 1 +equilibr 1 +elementsappear 1 +data 1 +run 1 +mcpor 1 +directli 1 +techniqu 1 +similarto 1 +anonsmooth 1 +reformul 1 +algorithmconsist 1 +major 1 +anapproxim 1 +similar 1 +pathto 1 +aposs 1 +exist 1 +thepath 1 +entir 1 +along 1 +partiallycomput 1 +taken 1 +relinear 1 +anonmonoton 1 +watchdog 1 +avoid 1 +converg 1 +local 1 +minima 1 +norm 1 +forth 1 +underli 1 +keep 1 +number 1 +requir 1 +small 1 +possibl 1 +option 1 +robustnessimprov 1 +proxim 1 +perturb 1 +qpcomp 1 +ishandl 1 +deriv 1 +thenapproxim 1 +solv 1 +leadto 1 +zero 1 +origin 1 +form 1 +theaccuraci 1 +determin 1 +residu 1 +thecurr 1 +subsystem 1 +compar 1 +engineeringand 1 +mani 1 +known 1 +mpsge 1 +preprocessor 1 +thatallow 1 +equilibrium 1 +formul 1 +easili 1 +thegam 1 +home 1 +nemsth 1 +energi 1 +relat 1 +algorithmsand 1 +overview 1 +project 1 +trick 1 +oper 1 +pointmethod 1 +argonn 1 +laboratori 1 +archiv 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..255e9a40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,56 @@ +exodu 5 +wisc 4 +project 3 +storag 3 +user 2 +manag 2 +benchmark 2 +zwill 2 +home 1 +pageexodu 1 +extens 1 +object 1 +orient 1 +databas 1 +system 1 +toolkitnot 1 +document 1 +construct 1 +succed 1 +theshor 1 +still 1 +provid 1 +minim 1 +support 1 +theexodu 1 +compil 1 +persistentprogram 1 +languag 1 +avail 1 +licens 1 +requir 1 +inform 1 +need 1 +contact 1 +eduprincip 1 +investig 1 +mike 1 +carei 1 +david 1 +dewittse 1 +also 1 +public 1 +relat 1 +exodusshor 1 +successor 1 +exoduslatest 1 +compilercontribut 1 +softwar 1 +managera 1 +mail 1 +list 1 +exodus_al 1 +oodbsdat 1 +prepar 1 +april 1 +michael 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..84d29189 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,196 @@ +paradis 23 +queri 15 +data 7 +databas 6 +object 6 +client 5 +server 5 +us 4 +inform 4 +support 4 +displai 4 +provid 4 +spatial 4 +attribut 4 +parallel 3 +system 3 +sampl 3 +also 3 +brows 3 +type 3 +graphic 3 +interfac 3 +polygon 3 +front 3 +layer 3 +creat 3 +applic 2 +contact 2 +relat 2 +shore 2 +project 2 +manag 2 +madison 2 +depart 2 +design 2 +implement 2 +geograph 2 +store 2 +manipul 2 +set 2 +subset 2 +issu 2 +model 2 +either 2 +result 2 +method 2 +area 2 +drop 2 +ship 2 +execut 2 +wisc 2 +projectparadis 1 +document 1 +content 1 +frontend 1 +informationse 1 +public 1 +peopl 1 +work 1 +extens 1 +optim 1 +report 1 +examin 1 +sequoia 1 +benchmark 1 +script 1 +vldb 1 +paper 1 +research 1 +group 1 +serverobject 1 +andevalu 1 +scalabl 1 +iscap 1 +massiv 1 +applyingobject 1 +orient 1 +technolog 1 +problem 1 +ofstor 1 +hope 1 +tosignificantli 1 +advanc 1 +size 1 +complex 1 +thatcan 1 +successfulli 1 +databasesystem 1 +aim 1 +handl 1 +providesa 1 +user 1 +andsupport 1 +paradiseprovid 1 +extend 1 +gisappl 1 +addit 1 +base 1 +asinteg 1 +real 1 +string 1 +built 1 +raster 1 +polylin 1 +point 1 +circl 1 +video 1 +mpeg 1 +imag 1 +underli 1 +persist 1 +allow 1 +spatialattribut 1 +foroverlap 1 +correspond 1 +custom 1 +order 1 +selectingcolor 1 +label 1 +withad 1 +issueimplicit 1 +zoom 1 +click 1 +sketch 1 +arubb 1 +band 1 +querycompos 1 +menu 1 +compos 1 +access 1 +databaseschema 1 +assist 1 +composit 1 +beview 1 +bedisplai 1 +tabl 1 +tupl 1 +context 1 +sensit 1 +help 1 +sqlwe 1 +ad 1 +abil 1 +invok 1 +defin 1 +extendedset 1 +exampl 1 +calcul 1 +byus 1 +standarddatabas 1 +oper 1 +includ 1 +anddrop 1 +extent 1 +indic 1 +insert 1 +updat 1 +current 1 +version 1 +emploi 1 +architectur 1 +syntax 1 +paradiseserv 1 +theresult 1 +back 1 +ismulti 1 +thread 1 +multipl 1 +connect 1 +sameserv 1 +sever 1 +carefulattent 1 +paid 1 +insur 1 +could 1 +effici 1 +processqueri 1 +especi 1 +involv 1 +largevolum 1 +frontendeurop 1 +pressher 1 +projectattn 1 +prof 1 +david 1 +dewittunivers 1 +wisconsin 1 +madisoncomput 1 +scienc 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +edumor 1 +come 1 +biswadeep 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..4234e8df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,81 @@ +scout 7 +internet 6 +servic 4 +intern 4 +report 3 +student 3 +project 3 +best 3 +resourc 2 +wisconsin 2 +madison 2 +commun 2 +us 2 +provid 2 +suggest 2 +inform 2 +homepagego 1 +text 1 +versionnewslett 1 +newand 1 +newli 1 +discov 1 +network 1 +toolsinternet 1 +announc 1 +updat 1 +daili 1 +effectiveinternet 1 +tool 1 +availablea 1 +studentssurf 1 +smarter 1 +longer 1 +universityof 1 +show 1 +canchoos 1 +filter 1 +hundr 1 +annoucementseach 1 +week 1 +look 1 +valuabl 1 +onlin 1 +networktool 1 +organ 1 +summar 1 +annot 1 +vefound 1 +offer 1 +sever 1 +format 1 +goal 1 +support 1 +effect 1 +byeduc 1 +research 1 +howev 1 +everyon 1 +welcom 1 +useth 1 +public 1 +site 1 +encouragefeedback 1 +entir 1 +three 1 +primari 1 +includ 1 +happen 1 +thescout 1 +toolkit 1 +ournewest 1 +know 1 +locat 1 +depart 1 +comput 1 +scienc 1 +theunivers 1 +comment 1 +feedbackscout 1 +servicesfor 1 +educ 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..30727e54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,481 @@ +shore 46 +object 36 +system 21 +list 18 +data 17 +file 16 +applic 12 +unix 12 +persist 11 +type 11 +mail 10 +releas 9 +languag 9 +subscrib 9 +project 8 +wisc 8 +shore_al 8 +support 7 +access 7 +scalabl 6 +hardwar 6 +softwar 6 +provid 6 +server 6 +version 5 +inform 5 +beta 5 +design 5 +name 5 +odmg 5 +processor 5 +messag 5 +content 4 +research 4 +program 4 +space 4 +compat 4 +exist 4 +model 4 +first 4 +us 4 +defin 4 +byte 4 +listproc 4 +document 3 +relat 3 +includ 3 +geograph 3 +basic 3 +manag 3 +develop 3 +like 3 +interfac 3 +text 3 +environ 3 +store 3 +singl 3 +orient 3 +featur 3 +major 3 +goal 3 +commerci 3 +oodbm 3 +technolog 3 +uniqu 3 +process 3 +larg 3 +structur 3 +make 3 +term 3 +link 3 +chang 3 +shore_support 3 +user 3 +interest 3 +digest 3 +also 2 +sourc 2 +binari 2 +public 2 +benchmark 2 +madison 2 +databas 2 +depart 2 +need 2 +informationsystem 2 +multipl 2 +programminglanguag 2 +field 2 +eas 2 +transit 2 +abl 2 +either 2 +describ 2 +much 2 +base 2 +effort 2 +concentr 2 +heterogen 2 +focu 2 +architectur 2 +peer 2 +ashor 2 +client 2 +vendor 2 +second 2 +build 2 +exampl 2 +futur 2 +digit 2 +avail 2 +retriev 2 +contain 2 +mechan 2 +differ 2 +bulk 2 +set 2 +close 2 +standard 2 +oodb 2 +inter 2 +share 2 +flatten 2 +time 2 +directli 2 +framework 2 +regist 2 +anonym 2 +legaci 2 +read 2 +string 2 +attribut 2 +question 2 +mani 2 +clutter 2 +weekli 2 +request 2 +help 2 +default 2 +repli 2 +rather 2 +entir 2 +subscript 2 +home 1 +pageshor 1 +high 1 +perform 1 +repositorydocu 1 +objectiveoverviewreleas 1 +informationmail 1 +listsse 1 +line 1 +shorepeopl 1 +work 1 +shorelatest 1 +summari 1 +arpaparadis 1 +built 1 +shoreexodu 1 +predecessor 1 +shoreoo 1 +oodbsshor 1 +photo 1 +albumuw 1 +group 1 +serverobject 1 +implement 1 +andevalu 1 +serv 1 +widevarieti 1 +target 1 +cadsystem 1 +satellit 1 +repositori 1 +multi 1 +media 1 +expand 1 +capabl 1 +wide 1 +usedexodusstorag 1 +wisconsin 1 +fund 1 +arpa 1 +number 1 +ofwai 1 +hierarch 1 +anda 1 +thisinterfac 1 +intend 1 +theunix 1 +tool 1 +viand 1 +withoutmodif 1 +becom 1 +shoreobject 1 +complex 1 +overview 1 +someth 1 +hybrid 1 +natur 1 +inheritingcharacterist 1 +fromfil 1 +section 1 +briefli 1 +ofshor 1 +paper 1 +greater 1 +detail 1 +three 1 +scalabilitysupport 1 +heterogeneitysupport 1 +applicationswhen 1 +began 1 +year 1 +uniqueamong 1 +commun 1 +degre 1 +languageheterogen 1 +turn 1 +facilit 1 +remain 1 +distinguish 1 +supportfor 1 +depend 1 +persistentstorag 1 +furthermor 1 +sinc 1 +basicallycompat 1 +expect 1 +eventu 1 +betransf 1 +sector 1 +architectureshor 1 +sever 1 +wai 1 +symmetr 1 +distributedarchitectur 1 +everi 1 +particip 1 +run 1 +whether 1 +disksattach 1 +network 1 +workstat 1 +parallel 1 +intel 1 +paragon 1 +contrast 1 +architectureus 1 +exodu 1 +serverarchitectur 1 +fine 1 +typicallyus 1 +notionof 1 +valu 1 +ad 1 +runsin 1 +extens 1 +mind 1 +rel 1 +simpl 1 +forus 1 +specif 1 +theparadis 1 +alreadi 1 +nasa 1 +seosdi 1 +feel 1 +piec 1 +plai 1 +aimport 1 +role 1 +varieti 1 +endeavor 1 +librari 1 +almost 1 +certainlydepend 1 +go 1 +manipul 1 +transmitobject 1 +video 1 +pictur 1 +well 1 +whilecurr 1 +product 1 +could 1 +orientedtoward 1 +deal 1 +gigabyt 1 +terabyt 1 +customiz 1 +equal 1 +import 1 +index 1 +queri 1 +libraryar 1 +requir 1 +heterogeneityobject 1 +neutraltyp 1 +embodi 1 +enhanc 1 +databasefeatur 1 +provis 1 +simplifi 1 +task 1 +ofsupport 1 +feasibleto 1 +mention 1 +earlier 1 +quit 1 +neutral 1 +definit 1 +wasrec 1 +propos 1 +consortium 1 +emphasi 1 +howev 1 +onprovid 1 +withina 1 +applicationsa 1 +enabl 1 +currentlyus 1 +untyp 1 +stop 1 +structuredobject 1 +conveni 1 +safe 1 +intra 1 +ultim 1 +hope 1 +displac 1 +orientedfil 1 +servic 1 +standpoint 1 +world 1 +manypersist 1 +flexibl 1 +tree 1 +reachabl 1 +indirectli 1 +give 1 +usersa 1 +familiar 1 +individualpersist 1 +root 1 +oflarg 1 +unnam 1 +realiz 1 +involvessever 1 +kind 1 +includingdirectori 1 +pool 1 +symbol 1 +cross 1 +refer 1 +unixappl 1 +compil 1 +editor 1 +fromtradit 1 +stream 1 +standardunix 1 +open 1 +write 1 +mkdir 1 +chdir 1 +order 1 +callsposs 1 +option 1 +onevari 1 +length 1 +charact 1 +asb 1 +attempt 1 +objectthrough 1 +counterpart 1 +callswil 1 +portion 1 +thatwish 1 +without 1 +possibl 1 +mount 1 +datacontain 1 +feasibl 1 +bothnew 1 +componentof 1 +morestructur 1 +latest 1 +tabl 1 +date 1 +approxim 1 +subject 1 +contact 1 +rleas 1 +sept 1 +improv 1 +completeimplement 1 +fix 1 +port 1 +tosolari 1 +linux 1 +august 1 +gzip 1 +sparc 1 +andpentium 1 +solari 1 +found 1 +atftp 1 +liststher 1 +eduand 1 +eduthi 1 +reach 1 +team 1 +usebi 1 +submit 1 +comment 1 +report 1 +cannot 1 +madisonc 1 +current 1 +unmoder 1 +unlikelyev 1 +get 1 +junk 1 +moder 1 +mailbox 1 +isalreadi 1 +sign 1 +belowfor 1 +sentwhen 1 +purpos 1 +notifi 1 +parti 1 +archiv 1 +sent 1 +sender 1 +beingpost 1 +want 1 +yourrepli 1 +copi 1 +thu 1 +anyon 1 +maysubscrib 1 +post 1 +existenceof 1 +shown 1 +return 1 +whenit 1 +yoursubscript 1 +conceal 1 +subscriberscannot 1 +obtain 1 +membership 1 +must 1 +specialmessag 1 +look 1 +receiv 1 +individu 1 +sendthi 1 +along 1 +send 1 +separ 1 +unsubscrib 1 +messageshould 1 +helplast 1 +modifi 1 +nanci 1 +hall 1 +nhall 1 +footnot 1 +odlshor 1 +concurr 1 +decid 1 +modelidl 1 +start 1 +point 1 +henc 1 +odlar 1 +similar 1 +anoth 1 +stabilizesw 1 +convert 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..ce282345 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,31 @@ +madison 3 +abhinav 2 +page 2 +gupta 2 +wisc 2 +depart 2 +comput 2 +scienc 2 +home 1 +welcom 1 +agupta 1 +construct 1 +graduat 1 +student 1 +univers 1 +wisconsin 1 +contact 1 +residenceoffic 1 +kendal 1 +avenu 1 +dayton 1 +street 1 +interest 1 +link 1 +indian 1 +newspap 1 +stuff 1 +sport 1 +finger 1 +find 1 +whereabout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..aed25e0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,13 @@ +alain 3 +home 1 +pagealain 1 +click 1 +larger 1 +pictur 1 +largest 1 +carnivor 1 +ever 1 +live 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..261edd97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,49 @@ +wisconsin 7 +univers 6 +biologi 5 +comput 4 +intellig 4 +allex 3 +scienc 3 +system 3 +molecular 3 +ismb 3 +carolyn 2 +machin 2 +learn 2 +group 2 +home 1 +page 1 +graduat 1 +studentbiotechnolog 1 +train 1 +program 1 +traineecomput 1 +departmentunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +mail 1 +wisc 1 +edutelephon 1 +advisor 1 +professor 1 +jude 1 +shavlikinterest 1 +sequenc 1 +protein 1 +fold 1 +artifici 1 +neural 1 +networkseduc 1 +madisonb 1 +purdu 1 +universityb 1 +educ 1 +mankato 1 +state 1 +relat 1 +link 1 +depart 1 +research 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..ce2357b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,182 @@ +amir 5 +page 5 +program 4 +friend 4 +home 3 +interest 3 +super 3 +roth 2 +madison 2 +wisc 2 +group 2 +seminar 2 +arch 2 +compil 2 +us 2 +out 2 +comput 2 +scienc 2 +univers 2 +multiscalar 2 +physic 2 +yale 2 +degre 2 +beauti 2 +marci 2 +go 2 +like 2 +think 2 +analysi 2 +barb 2 +delphi 1 +maven 1 +show 1 +erin 1 +occasionali 1 +updat 1 +copi 1 +resum 1 +cvte 1 +truth 1 +week 1 +research 1 +topic 1 +implement 1 +preprocessor 1 +deleg 1 +work 1 +project 1 +partner 1 +set 1 +airport 1 +metal 1 +detector 1 +existencei 1 +graduat 1 +student 1 +depart 1 +wisconsin 1 +advisor 1 +guri 1 +sohi 1 +look 1 +method 1 +allevi 1 +data 1 +depend 1 +distribut 1 +regist 1 +file 1 +side 1 +curli 1 +fri 1 +advanc 1 +nail 1 +design 1 +vallei 1 +school 1 +much 1 +practic 1 +live 1 +girlfriend 1 +cat 1 +charli 1 +also 1 +went 1 +get 1 +master 1 +public 1 +polici 1 +lafollett 1 +institut 1 +presid 1 +meantim 1 +solv 1 +linear 1 +regress 1 +problem 1 +wacki 1 +recip 1 +find 1 +magazin 1 +watch 1 +parti 1 +five 1 +eggplant 1 +peopl 1 +weird 1 +anywai 1 +promis 1 +subba 1 +officem 1 +daddi 1 +novemb 1 +titanium 1 +screw 1 +desi 1 +relaford 1 +terri 1 +mulholland 1 +oxygen 1 +carbon 1 +dioxid 1 +area 1 +vagu 1 +languag 1 +whack 1 +optim 1 +parallel 1 +algorithm 1 +theori 1 +good 1 +soul 1 +evalu 1 +model 1 +perform 1 +enhanc 1 +three 1 +point 1 +shot 1 +thing 1 +scaryarea 1 +rabid 1 +interestth 1 +love 1 +know 1 +talk 1 +better 1 +leav 1 +never 1 +return 1 +hmmm 1 +handyinformatik 1 +index 1 +journal 1 +author 1 +madcat 1 +architectur 1 +resourc 1 +minut 1 +score 1 +sportslin 1 +philli 1 +everybodi 1 +favorit 1 +engin 1 +ickyth 1 +new 1 +write 1 +articl 1 +gui 1 +read 1 +want 1 +kid 1 +drew 1 +cornel 1 +david 1 +wierd 1 +featur 1 +associ 1 +kemin 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..237c8614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,2 @@ +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..64a72011 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,131 @@ +amo 4 +page 4 +approxim 4 +theori 4 +home 3 +present 3 +download 3 +analysi 3 +version 3 +click 3 +site 3 +file 3 +group 3 +activ 3 +homepag 2 +wisconsin 2 +madison 2 +wisc 2 +us 2 +clickher 2 +compress 2 +vita 2 +list 2 +variou 2 +public 2 +includ 2 +inform 2 +found 2 +wish 2 +view 2 +line 2 +research 2 +word 1 +search 1 +engin 1 +spline 1 +wavelet 1 +boxsplin 1 +radial 1 +basi 1 +function 1 +shift 1 +invari 1 +space 1 +toscatt 1 +data 1 +multiquadr 1 +thin 1 +plate 1 +splinesthi 1 +netscap 1 +enhanc 1 +associ 1 +professordepart 1 +comput 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +telephon 1 +tabl 1 +linksat 1 +item 1 +access 1 +order 1 +paperaffin 1 +system 1 +operatorof 1 +zuowei 1 +shen 1 +choos 1 +follow 1 +unix 1 +otherwis 1 +uncompress 1 +fromher 1 +none 1 +work 1 +server 1 +copi 1 +directlyfrom 1 +accounther 1 +handout 1 +email 1 +want 1 +abstract 1 +select 1 +articlesof 1 +mine 1 +anonym 1 +carl 1 +boor 1 +maintain 1 +containspostscript 1 +postscript 1 +articl 1 +theapproxim 1 +also 1 +technic 1 +filesconcern 1 +gener 1 +recommend 1 +read 1 +provid 1 +avail 1 +student 1 +andpubl 1 +main 1 +area 1 +interest 1 +togeth 1 +short 1 +summari 1 +futur 1 +goal 1 +univeristi 1 +ofwisconsin 1 +numer 1 +link 1 +peopl 1 +commun 1 +miscellan 1 +topic 1 +final 1 +offici 1 +pleas 1 +deposit 1 +comment 1 +mailbox 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..e587fddd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,10 @@ +andi 1 +therber 1 +home 1 +pageandi 1 +therberoffic 1 +sphone 1 +email 1 +andyt 1 +wisc 1 +eduzooresumebookmarksapplet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..94f07f99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,18 @@ +arvind 3 +ranganathan 2 +workplac 2 +ranga 1 +erstwhil 1 +present 1 +indiaworld 1 +fascin 1 +world 1 +escher 1 +collect 1 +classic 1 +paper 1 +comput 1 +scienc 1 +finger 1 +log 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..1d340cf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,35 @@ +ashish 2 +depart 2 +home 1 +page 1 +thusoo 1 +graduat 1 +student 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +come 1 +india 1 +hadmi 1 +undergradu 1 +educ 1 +indianinstitut 1 +technolog 1 +delhi 1 +iitd 1 +fantast 1 +place 1 +worth 1 +visit 1 +like 1 +contact 1 +canfing 1 +find 1 +whereabout 1 +altern 1 +send 1 +email 1 +ashisht 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..c7fb4fe0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,34 @@ +alexandria 4 +ashraf 3 +scienc 3 +univers 3 +aboulnaga 2 +home 2 +madison 2 +comput 2 +egypt 2 +section 2 +view 2 +grade 2 +pageashraf 1 +aboulnagacomput 1 +depart 1 +room 1 +wisconsin 1 +west 1 +dayton 1 +usaphon 1 +mail 1 +wisc 1 +edueduc 1 +juli 1 +june 1 +info 1 +offic 1 +hour 1 +desautel 1 +page 1 +last 1 +modifi 1 +septemb 1 +finger 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..39cde88a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,24 @@ +atkinson 3 +phil 2 +home 2 +infooffic 2 +page 1 +pageucla 1 +bannon 1 +win 1 +ncaa 1 +basketbal 1 +championship 1 +seattl 1 +gener 1 +phone 1 +email 1 +wisc 1 +educurr 1 +researchsailinghors 1 +back 1 +ridingscuba 1 +divingc 1 +hour 1 +tuth 1 +appoint 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..bd810872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,118 @@ +number 7 +comput 6 +bach 4 +interest 4 +theori 4 +algorithm 4 +problem 3 +page 2 +eric 2 +scienc 2 +univers 2 +email 2 +wisc 2 +theoret 2 +complex 2 +us 2 +effici 2 +exampl 2 +prime 2 +larg 2 +test 2 +wit 2 +question 2 +recent 2 +model 2 +proc 2 +home 1 +professor 1 +depart 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +california 1 +berkelei 1 +algebraicalgorithm 1 +cryptographi 1 +string 1 +automata 1 +research 1 +summari 1 +solvealgebra 1 +onetel 1 +digit 1 +without 1 +examin 1 +possiblefactor 1 +intrins 1 +mathemat 1 +well 1 +applic 1 +random 1 +gener 1 +code 1 +forreli 1 +secur 1 +inform 1 +transmiss 1 +algebra 1 +area 1 +also 1 +appli 1 +probabl 1 +designand 1 +analysi 1 +iscomposit 1 +prove 1 +simpl 1 +auxiliarynumb 1 +call 1 +practic 1 +usual 1 +find 1 +witnessbi 1 +direct 1 +search 1 +among 1 +small 1 +lead 1 +followingnatur 1 +least 1 +functionof 1 +work 1 +given 1 +accurateheurist 1 +base 1 +probabilist 1 +assumpt 1 +allowsthi 1 +similar 1 +answer 1 +public 1 +improv 1 +approxim 1 +euler 1 +product 1 +cnta 1 +canadian 1 +math 1 +proceed 1 +complet 1 +condon 1 +glaser 1 +tanguai 1 +annual 1 +conf 1 +volum 1 +shallit 1 +press 1 +info 1 +click 1 +curriculum 1 +vita 1 +creat 1 +juli 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..03bf9f21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,58 @@ +system 3 +bart 2 +miller 2 +home 2 +page 2 +wisc 2 +project 2 +parallel 2 +tool 2 +oper 2 +distribut 2 +barton 1 +professorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usath 1 +follow 1 +list 1 +thing 1 +research 1 +paradyn 1 +perform 1 +fuzz 1 +random 1 +softwar 1 +testingteach 1 +introduct 1 +spring 1 +honor 1 +internet 1 +seminar 1 +advanc 1 +fall 1 +director 1 +undergradu 1 +graduatesprofession 1 +symposium 1 +monona 1 +terrac 1 +frank 1 +lloyd 1 +wright 1 +convent 1 +center 1 +technic 1 +advisori 1 +groupperson 1 +offici 1 +depart 1 +famili 1 +photosbart 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..4cbd707f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,44 @@ +madison 2 +offic 2 +hour 2 +hyper 1 +home 1 +page 1 +benjamin 1 +teitelbaum 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +usaben 1 +wisc 1 +edursumquinc 1 +internet 1 +ultim 1 +word 1 +gamezillion 1 +bookmarksspr 1 +schedul 1 +browser 1 +support 1 +tabl 1 +look 1 +like 1 +garbag 1 +click 1 +someth 1 +readabl 1 +mondai 1 +tuesdai 1 +wednesdai 1 +thursdai 1 +fridai 1 +dbseminar 1 +osseminar 1 +condormeet 1 +miron 1 +plseminar 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..c5730d44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,206 @@ +comput 15 +program 11 +project 10 +problem 9 +bestor 8 +scienc 7 +home 6 +techniqu 6 +section 6 +fortran 6 +univers 5 +madison 5 +structur 5 +solv 5 +model 5 +student 5 +languag 5 +wisc 4 +research 4 +invers 4 +vision 4 +scene 4 +posit 4 +imag 4 +us 4 +projector 4 +robot 4 +cours 4 +engin 4 +page 4 +gareth 3 +wisconsin 3 +world 3 +zealand 3 +postscript 3 +observ 3 +base 3 +algorithm 3 +point 3 +explor 3 +group 3 +intend 3 +dissert 2 +teach 2 +depart 2 +telephon 2 +mail 2 +wide 2 +massei 2 +motion 2 +examin 2 +rigid 2 +concurr 2 +assumpt 2 +transform 2 +dimens 2 +requir 2 +navig 2 +environ 2 +interest 2 +machin 2 +cover 2 +basic 2 +prepar 2 +elementari 2 +experi 2 +instruct 2 +high 2 +school 2 +taught 2 +entir 2 +primarili 2 +major 2 +advanc 2 +mathemat 2 +pagewelcom 1 +pagegareth 1 +assist 1 +west 1 +dayton 1 +street 1 +click 1 +finger 1 +http 1 +system 1 +administr 1 +data 1 +librari 1 +servic 1 +observatori 1 +drive 1 +dpl 1 +dacc 1 +edueduc 1 +honor 1 +curriculum 1 +vita 1 +resum 1 +graduat 1 +coursework 1 +titl 1 +abstract 1 +import 1 +recov 1 +within 1 +essenti 1 +exist 1 +multipl 1 +howev 1 +extens 1 +practic 1 +sensit 1 +nois 1 +accur 1 +optic 1 +restrict 1 +call 1 +make 1 +instead 1 +camera 1 +tradition 1 +result 1 +defin 1 +geometr 1 +perspect 1 +given 1 +identifi 1 +constrain 1 +specifi 1 +minimum 1 +number 1 +also 1 +addit 1 +minim 1 +type 1 +error 1 +occur 1 +real 1 +applic 1 +allow 1 +approxim 1 +intersect 1 +current 1 +appli 1 +determin 1 +unknown 1 +time 1 +advisor 1 +prof 1 +charl 1 +dyer 1 +graphic 1 +virtual 1 +realiti 1 +artifici 1 +intellig 1 +learn 1 +duti 1 +spring 1 +introduct 1 +credit 1 +need 1 +prior 1 +knowledg 1 +assum 1 +materi 1 +enabl 1 +write 1 +simpl 1 +done 1 +receiv 1 +littl 1 +algebra 1 +construct 1 +least 1 +procedur 1 +orient 1 +pascal 1 +survei 1 +prereq 1 +colleg 1 +work 1 +statist 1 +logic 1 +consent 1 +instructor 1 +open 1 +pointer 1 +wiscinfo 1 +inform 1 +hoofer 1 +out 1 +club 1 +nextstep 1 +next 1 +softwar 1 +start 1 +internet 1 +lyco 1 +search 1 +keyword 1 +copyright 1 +copi 1 +last 1 +modifi 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..abbb16b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,33 @@ +beyer 2 +wisc 2 +graduat 2 +project 2 +cours 2 +kevin 1 +home 1 +pagekevin 1 +beyerbey 1 +caution 1 +work 1 +student 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +advisor 1 +raghu 1 +ramakrishnan 1 +area 1 +interest 1 +databas 1 +researchresearch 1 +coral 1 +local 1 +inform 1 +undergradu 1 +coursesinstruct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..dc7ba8a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,62 @@ +bezenek 3 +home 2 +window 2 +pith 2 +phone 2 +wisc 2 +todd 1 +page 1 +toddm 1 +back 1 +introduc 1 +actual 1 +faster 1 +cpu 1 +_great 1 +microprocessor 1 +past 1 +present_ 1 +uregina 1 +bayko 1 +html 1 +express 1 +locomot 1 +squeez 1 +skateboard 1 +size 1 +packag 1 +helen 1 +custer 1 +_insid 1 +microsoft 1 +press 1 +current 1 +cours 1 +advanc 1 +oper 1 +system 1 +bart 1 +miller 1 +pithi 1 +consist 1 +abound 1 +take 1 +yeah 1 +point 1 +skew 1 +associ 1 +cach 1 +access 1 +inform 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +madison 1 +offic 1 +mail 1 +edubezenek 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..371f07ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,54 @@ +bockrath 4 +section 4 +nathan 2 +graduat 2 +student 2 +wisc 2 +viru 2 +info 2 +home 2 +teach 1 +assist 1 +averag 1 +pictur 1 +nate 1 +jpeg 1 +send 1 +email 1 +click 1 +held 1 +grade 1 +quiz 1 +review 1 +word 1 +macro 1 +make 1 +page 1 +offic 1 +hour 1 +anywai 1 +mondai 1 +wednesdai 1 +schedul 1 +distribut 1 +system 1 +simul 1 +model 1 +support 1 +free 1 +speech 1 +onlin 1 +site 1 +anoth 1 +dai 1 +back 1 +pageback 1 +depart 1 +pageoth 1 +neat 1 +stuff 1 +condor 1 +project 1 +internet 1 +oraclesend 1 +comment 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..2b2b2687 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,288 @@ +work 7 +system 6 +thing 6 +scienc 5 +time 5 +bolo 4 +year 4 +comput 4 +new 4 +burger 3 +develop 3 +softwar 3 +engin 3 +scientist 3 +hand 3 +home 3 +uwvax 3 +member 3 +associ 3 +although 2 +parent 2 +josef 2 +wai 2 +call 2 +mani 2 +person 2 +hacker 2 +design 2 +dewitt 2 +implement 2 +beer 2 +sleep 2 +wonder 2 +page 2 +tiger 2 +place 2 +bore 2 +like 2 +parallel 2 +everi 2 +els 2 +follow 2 +world 2 +databas 2 +object 2 +store 2 +shore 2 +wisconsin 2 +madison 2 +also 2 +provid 2 +rather 2 +internet 2 +usenet 2 +uucp 2 +along 2 +line 2 +depart 2 +part 2 +much 2 +try 2 +take 2 +care 2 +aircraft 2 +essen 2 +hau 2 +drink 2 +bolobologreet 1 +christen 1 +thoma 1 +roll 1 +mebolo 1 +everyon 1 +includ 1 +quit 1 +sure 1 +bestexplan 1 +question 1 +defin 1 +bywhat 1 +case 1 +semi 1 +real 1 +timeoper 1 +util 1 +last 1 +kernel 1 +unix 1 +administr 1 +creat 1 +appli 1 +construct 1 +realli 1 +though 1 +acomput 1 +degre 1 +pai 1 +david 1 +shudder 1 +methodolog 1 +right 1 +architect 1 +newoper 1 +type 1 +often 1 +sameto 1 +forth 1 +woodwork 1 +control 1 +draw 1 +brew 1 +complet 1 +relax 1 +enough 1 +myroomm 1 +disagre 1 +pursuit 1 +enjoi 1 +fly 1 +read 1 +fiction 1 +comic 1 +book 1 +railroad 1 +prototyp 1 +model 1 +role 1 +plai 1 +game 1 +notic 1 +imag 1 +sublim 1 +stripe 1 +creatur 1 +thetig 1 +appear 1 +throughout 1 +taken 1 +william 1 +blake 1 +poemtyg 1 +tyger 1 +put 1 +word 1 +road 1 +againin 1 +tremend 1 +leap 1 +insan 1 +purchas 1 +ahous 1 +address 1 +isjosef 1 +east 1 +gate 1 +roadmonona 1 +voic 1 +number 1 +workwork 1 +drive 1 +banana 1 +us 1 +grung 1 +either 1 +matur 1 +job 1 +chang 1 +perhapssom 1 +seem 1 +othermonth 1 +beat 1 +intosubmiss 1 +everyth 1 +moon 1 +andstar 1 +current 1 +project 1 +fordav 1 +famou 1 +gamma 1 +relat 1 +queri 1 +interpret 1 +paradis 1 +geograph 1 +inform 1 +orient 1 +data 1 +wiss 1 +storag 1 +whatev 1 +need 1 +done 1 +whole 1 +occur 1 +thecomput 1 +departmentof 1 +themadison 1 +campusof 1 +univers 1 +campu 1 +locat 1 +peninsula 1 +five 1 +lake 1 +workin 1 +addit 1 +consult 1 +solut 1 +advic 1 +technicalexpertis 1 +help 1 +port 1 +newsystem 1 +reviv 1 +oddbal 1 +tasksar 1 +kind 1 +tell 1 +tovisit 1 +serverbut 1 +haven 1 +anyth 1 +mostlyempti 1 +except 1 +friend 1 +activitiesuwvaxi 1 +oper 1 +site 1 +free 1 +that 1 +print 1 +someth 1 +longer 1 +svolunt 1 +master 1 +goe 1 +run 1 +howev 1 +reader 1 +across 1 +differentarchitectur 1 +task 1 +organizationsi 1 +organ 1 +alwai 1 +agre 1 +oftenhav 1 +good 1 +benefit 1 +usersof 1 +commun 1 +aopa 1 +owner 1 +pilot 1 +experiment 1 +usenix 1 +blitz 1 +drinkingwhen 1 +school 1 +hord 1 +friendsand 1 +visit 1 +local 1 +thursdai 1 +night 1 +import 1 +slowli 1 +entir 1 +select 1 +acquaint 1 +becam 1 +loftili 1 +labelledblitz 1 +societi 1 +divers 1 +meet 1 +ofoctoberfest 1 +weekend 1 +chud 1 +accumulateda 1 +short 1 +histori 1 +whatnotof 1 +charad 1 +pagelast 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..527f07b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,47 @@ +page 8 +home 6 +link 3 +comput 3 +system 3 +brad 2 +oper 2 +search 2 +welcom 1 +thayer 1 +homepag 1 +much 1 +mail 1 +scienc 1 +intro 1 +network 1 +possibl 1 +advanc 1 +would 1 +foolish 1 +neglect 1 +model 1 +interest 1 +thec 1 +us 1 +probabl 1 +bore 1 +check 1 +seminaranywai 1 +aim 1 +beaucoup 1 +boir 1 +pepper 1 +badger 1 +packer 1 +pagesom 1 +altavista 1 +enginefind 1 +email 1 +adress 1 +world 1 +wideth 1 +jazz 1 +duan 1 +mclaughlin 1 +pageuw 1 +athlet 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..500a3581 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,64 @@ +breach 8 +scott 4 +comput 4 +wisc 2 +wisconsin 2 +madison 2 +univers 2 +intern 2 +symposium 2 +gurindar 2 +home 1 +pagescott 1 +addresseseducationresearch 1 +interest 1 +public 1 +recreat 1 +associatesaddressesscott 1 +breachdepart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usatel 1 +educationph 1 +scienc 1 +engin 1 +carnegi 1 +mellon 1 +advisorguri 1 +sohiresearch 1 +interestscomput 1 +architecturemultiscalarpublicationsmultiscalar 1 +processorsgurindar 1 +sohi 1 +vijaykumarnd 1 +architectur 1 +anatomi 1 +regist 1 +file 1 +multiscalar 1 +processorscott 1 +vijaykumar 1 +sohith 1 +microarchitectur 1 +effici 1 +detect 1 +pointer 1 +arrai 1 +access 1 +errorstodd 1 +austin 1 +sohiconfer 1 +program 1 +languag 1 +design 1 +implement 1 +recreationwingsbeersquidtvassociatestodd 1 +austindoug 1 +burgerbabak 1 +falsafialain 1 +kagit 1 +vijaykumarlast 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..eafdaa54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,83 @@ +page 4 +suni 2 +albani 2 +fall 2 +name 2 +sinc 2 +bleed 1 +nontrivi 1 +waysher 1 +temporarili 1 +underst 1 +unadorn 1 +provid 1 +section 1 +pizza 1 +pool 1 +brief 1 +hobbi 1 +schedul 1 +spring 1 +stinkin 1 +bookmark 1 +poor 1 +unfortun 1 +myclass 1 +hypersensit 1 +rockjock 1 +cretin 1 +brood 1 +glare 1 +clenchesfist 1 +crack 1 +knuckl 1 +tragic 1 +flightyfemm 1 +get 1 +razz 1 +asskick 1 +thirdgrad 1 +perhap 1 +smooth 1 +skin 1 +hardbodi 1 +leatherboi 1 +leer 1 +atm 1 +whenev 1 +call 1 +roll 1 +differ 1 +make 1 +todayi 1 +giggl 1 +said 1 +becam 1 +aprostitut 1 +societi 1 +bigotri 1 +pedagodi 1 +isaac 1 +theblack 1 +goat 1 +refus 1 +stai 1 +claw 1 +hand 1 +sssuuuhhh 1 +mmuuuhhhh 1 +dddduuuuuhhhhh 1 +mmmmuuuhhhh 1 +maaaahhhjaaaaaahhhhh 1 +fffuuuhhhhh 1 +yyyyyyyuuuuuhhhhh 1 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 1 +uuuhhh 1 +uuummmm 1 +uuuhhhh 1 +wwwwwhhhhuuuuuhhhhh 1 +zhang 1 +wouldn 1 +notic 1 +eggleston 1 +smile 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..e64d65a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,154 @@ +system 13 +prefetch 12 +applic 10 +cach 9 +file 8 +research 6 +perform 6 +parallel 6 +control 6 +page 5 +princeton 5 +felten 5 +proceed 5 +univers 4 +oper 4 +integr 4 +edward 4 +wisc 3 +comput 3 +recent 3 +alloc 3 +anna 3 +karlin 3 +disk 3 +polici 3 +educ 2 +interest 2 +cours 2 +high 2 +resourc 2 +depart 2 +tech 2 +report 2 +sigmetr 2 +confer 2 +implement 2 +princetonunivers 2 +osdi 2 +uniprocessor 2 +investig 2 +techniqu 2 +replac 2 +kernel 2 +physic 2 +home 1 +assist 1 +professor 1 +sciencedepart 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usacao 1 +eduphon 1 +department 1 +offic 1 +paper 1 +talk 1 +summari 1 +collect 1 +link 1 +tsinghua 1 +beij 1 +china 1 +memori 1 +project 1 +optim 1 +cachingacf 1 +topic 1 +distribut 1 +fall 1 +advanc 1 +spring 1 +trace 1 +simul 1 +access 1 +tracesrec 1 +papersintegr 1 +cachingtraci 1 +kimbrel 1 +novemb 1 +shorter 1 +version 1 +thesi 1 +also 1 +schedulingpei 1 +appear 1 +toc 1 +studi 1 +strategiespei 1 +peform 1 +first 1 +symposium 1 +slide 1 +present 1 +usenix 1 +summer 1 +technic 1 +tickertaip 1 +raid 1 +architectur 1 +swee 1 +boon 1 +shivakumar 1 +venkataraman 1 +john 1 +wilk 1 +isca 1 +talksslid 1 +postscript 1 +andpostscript 1 +summarymi 1 +focus 1 +storag 1 +manag 1 +andparallel 1 +particular 1 +improvefil 1 +specif 1 +filecach 1 +aggress 1 +data 1 +havedevelop 1 +individualappl 1 +respons 1 +decid 1 +useit 1 +us 1 +fairglob 1 +carefulli 1 +cachereplac 1 +schedul 1 +prototyp 1 +implementationon 1 +demonstratedthat 1 +good 1 +chosen 1 +strategi 1 +informationcan 1 +significantli 1 +improv 1 +mani 1 +current 1 +extend 1 +amdevelop 1 +algorithm 1 +diskarrai 1 +addit 1 +global 1 +managementproblem 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..4d9deb7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,284 @@ +system 13 +proc 11 +conf 8 +databas 7 +data 7 +work 6 +research 5 +manag 5 +perform 5 +project 5 +almaden 4 +object 4 +garlic 4 +comput 3 +madison 3 +area 3 +shore 3 +repositori 3 +heterogen 3 +effort 3 +experi 3 +time 3 +queri 3 +applic 3 +march 3 +persist 3 +franklin 3 +minneapoli 3 +univers 2 +center 2 +altern 2 +carei 2 +evalu 2 +interest 2 +next 2 +gener 2 +orient 2 +design 2 +complex 2 +workload 2 +base 2 +goal 2 +involv 2 +share 2 +build 2 +recent 2 +twelv 2 +student 2 +spent 2 +call 2 +extend 2 +languag 2 +octob 2 +multimedia 2 +codi 2 +haa 2 +niblack 2 +arya 2 +fagin 2 +flickner 2 +petkov 2 +schwarz 2 +thoma 2 +william 2 +wimmer 2 +toward 2 +workshop 2 +oodbm 2 +dewitt 2 +naughton 2 +livni 2 +septemb 2 +real 2 +server 2 +andm 2 +sigmod 2 +mike 1 +careymichael 1 +careyprofessor 1 +leav 1 +scienc 1 +depart 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +staff 1 +member 1 +harri 1 +road 1 +jose 1 +phone 1 +primari 1 +mail 1 +wisc 1 +eduresearch 1 +interestsdatabas 1 +parallel 1 +distribut 1 +appli 1 +main 1 +performanceand 1 +topicsof 1 +current 1 +includ 1 +tradeoff 1 +techniqu 1 +forobject 1 +algorithmsrel 1 +transact 1 +process 1 +schedul 1 +multi 1 +userdatabas 1 +user 1 +specifi 1 +theexodu 1 +extens 1 +dbm 1 +aimedat 1 +develop 1 +scalabl 1 +storag 1 +persistentobject 1 +environ 1 +whichi 1 +upon 1 +exodu 1 +meet 1 +objectmanag 1 +need 1 +replac 1 +unix 1 +file 1 +applicationssuch 1 +case 1 +move 1 +academia 1 +industri 1 +greatyear 1 +part 1 +becom 1 +best 1 +academ 1 +researchgroup 1 +known 1 +come 1 +tackl 1 +anddiffer 1 +challeng 1 +thesourc 1 +number 1 +paper 1 +teach 1 +forth 1 +past 1 +year 1 +relat 1 +signific 1 +fraction 1 +rel 1 +projectther 1 +multimediainform 1 +allow 1 +live 1 +varieti 1 +tobe 1 +manipul 1 +though 1 +resid 1 +homogen 1 +objectdatabas 1 +sabbat 1 +continuedto 1 +focus 1 +graduat 1 +aqueri 1 +browser 1 +front 1 +tool 1 +pesto 1 +thegarl 1 +locat 1 +public 1 +oodb 1 +access 1 +implement 1 +kiernan 1 +orientedprogram 1 +oopsla 1 +austin 1 +appear 1 +multipl 1 +content 1 +tork 1 +roth 1 +ifip 1 +confer 1 +visualdatabas 1 +lausann 1 +switzerland 1 +inform 1 +garlicapproach 1 +luniewski 1 +and 1 +ieee 1 +issu 1 +dataengin 1 +ride 1 +taipei 1 +taiwan 1 +statu 1 +report 1 +benchmark 1 +withd 1 +kant 1 +onobject 1 +program 1 +portland 1 +autom 1 +tune 1 +brown 1 +mehta 1 +thint 1 +larg 1 +santiago 1 +chile 1 +make 1 +initi 1 +smrc 1 +withb 1 +reinwald 1 +desslock 1 +lehman 1 +pirahesh 1 +srinivasan 1 +tarascon 1 +provenc 1 +franc 1 +hall 1 +mcauliff 1 +schuh 1 +solomon 1 +tsatalo 1 +white 1 +zwill 1 +sigmodint 1 +fine 1 +grain 1 +page 1 +zaharioudaki 1 +managementof 1 +memori 1 +pang 1 +accur 1 +model 1 +hybrid 1 +hash 1 +join 1 +algorithm 1 +patel 1 +vernon 1 +sigmetr 1 +measur 1 +modelingof 1 +nashvil 1 +index 1 +multivers 1 +lock 1 +bober 1 +technolog 1 +cambridg 1 +england 1 +client 1 +cach 1 +revisit 1 +indistribut 1 +oszu 1 +dayal 1 +andp 1 +valduriez 1 +morgan 1 +kaufmann 1 +publish 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..dc78c728 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,30 @@ +wisconsin 4 +chin 3 +univers 3 +offic 2 +cchin 2 +wisc 2 +biochemistri 2 +madison 2 +tang 1 +home 1 +pagechin 1 +tanggradu 1 +student 1 +depart 1 +west 1 +dayton 1 +streetmadison 1 +bldg 1 +mail 1 +edutelephon 1 +current 1 +assign 1 +introduct 1 +data 1 +structur 1 +hour 1 +mondai 1 +tuesdai 1 +fridai 1 +ameduc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..0a49693b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,37 @@ +chandra 3 +research 3 +satish 2 +soon 2 +home 1 +page 1 +wisc 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +interest 1 +summari 1 +public 1 +come 1 +real 1 +stuff 1 +wodehous 1 +book 1 +internet 1 +movi 1 +databas 1 +nostalgia 1 +york 1 +time 1 +altavista 1 +italian 1 +languag 1 +cultur 1 +miscellan 1 +linksclick 1 +log 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..1fb5a69f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,91 @@ +chilimbi 6 +parallel 4 +trishul 3 +research 3 +comput 3 +wisconsin 3 +laru 3 +memori 3 +merit 3 +state 3 +page 2 +wisc 2 +click 2 +madison 2 +program 2 +compil 2 +share 2 +visual 2 +indian 2 +tool 2 +jame 2 +examin 2 +home 1 +real 1 +megradu 1 +assistantdepart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +interest 1 +languag 1 +architectur 1 +integr 1 +messag 1 +pass 1 +perform 1 +analysi 1 +enhanc 1 +designresearch 1 +project 1 +wind 1 +tunneleduc 1 +univers 1 +tech 1 +institut 1 +technolog 1 +bombai 1 +summari 1 +publicationscachi 1 +automat 1 +insert 1 +cico 1 +annot 1 +intern 1 +confer 1 +process 1 +icpp 1 +august 1 +stormwatch 1 +system 1 +protocolstrishul 1 +thoma 1 +ball 1 +stephen 1 +eick 1 +supercomput 1 +appear 1 +decemb 1 +award 1 +honor 1 +certif 1 +mathemat 1 +olympiadpresid 1 +gold 1 +medal 1 +nation 1 +physic 1 +examinationcertif 1 +chemistrycertif 1 +electron 1 +miscellan 1 +movi 1 +dream 1 +curriculum 1 +vita 1 +last 1 +updat 1 +mail 1 +suggest 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..2f9a4ffe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,31 @@ +chandrasekaran 2 +sashikanth 2 +wisconsin 2 +home 1 +page 1 +csashi 1 +wisc 1 +graduat 1 +studentdepart 1 +comput 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +mark 1 +hill 1 +project 1 +educ 1 +btech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +june 1 +univeristi 1 +depart 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..53e312e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,106 @@ +databas 5 +page 5 +paradis 4 +comput 4 +site 4 +curt 3 +scienc 3 +madison 3 +relat 3 +home 3 +wiscinfo 3 +ellmann 2 +univers 2 +wisconsin 2 +wisc 2 +item 2 +eosdi 2 +standard 2 +inform 2 +intern 2 +dienst 2 +project 1 +depart 1 +focu 1 +java 1 +develop 1 +webgnat 1 +defect 1 +track 1 +prototyp 1 +index 1 +shore 1 +previou 1 +life 1 +doit 1 +opengi 1 +consortium 1 +global 1 +posit 1 +system 1 +calmit 1 +nebraska 1 +lincoln 1 +feder 1 +approach 1 +object 1 +manag 1 +group 1 +free 1 +list 1 +transact 1 +process 1 +perform 1 +council 1 +illustra 1 +white 1 +papersmiscellan 1 +sitescampu 1 +wyrm 1 +hoard 1 +gopher 1 +librari 1 +wiscnet 1 +netcorpor 1 +appl 1 +microsoft 1 +research 1 +land 1 +paww 1 +commerc 1 +metrowerk 1 +taligentsearch 1 +savvi 1 +search 1 +webcrawl 1 +open 1 +text 1 +worm 1 +network 1 +organ 1 +internet 1 +draft 1 +dilbert 1 +world 1 +onlin 1 +winsock 1 +applic 1 +current 1 +weather 1 +map 1 +implement 1 +geolog 1 +survei 1 +govern 1 +locat 1 +gil 1 +oakridg 1 +nation 1 +center 1 +stock 1 +market 1 +datacurt 1 +ellmanncurt 1 +eduparadis 1 +projectdepart 1 +sciencesunivers 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..5d4b8a83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,22 @@ +chee 2 +yong 2 +madison 2 +home 1 +pagechan 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +email 1 +cychan 1 +wisc 1 +offic 1 +phone 1 +page 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..d3d3b2cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,45 @@ +comput 4 +engin 4 +sara 3 +madison 3 +scienc 3 +home 2 +page 2 +univers 2 +wisconsin 2 +depart 2 +physic 2 +bauman 1 +dailei 1 +baumandailei 1 +wisc 1 +edugradu 1 +program 1 +mathemat 1 +mace 1 +mechan 1 +astronaut 1 +nuclear 1 +educ 1 +math 1 +lewi 1 +clark 1 +colleg 1 +research 1 +work 1 +public 1 +current 1 +schedul 1 +link 1 +friend 1 +pagessend 1 +mail 1 +offic 1 +address 1 +statist 1 +west 1 +dayton 1 +street 1 +last 1 +modifi 1 +daileytu 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..c5c95b9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,313 @@ +comput 19 +memori 18 +wood 15 +david 11 +architectur 11 +share 10 +system 9 +simul 9 +cach 8 +perform 7 +lebeck 7 +research 6 +parallel 6 +reinhardt 6 +mark 6 +user 6 +jame 6 +laru 6 +wisconsin 5 +program 5 +hardwar 5 +anddavid 5 +ieee 5 +intern 5 +symposium 5 +network 5 +interfac 5 +fine 5 +hill 5 +time 5 +design 4 +implement 4 +tool 4 +babak 4 +falsafi 4 +grain 4 +proceed 4 +protocol 4 +level 4 +alvin 4 +steven 4 +control 4 +tempest 4 +typhoon 4 +develop 4 +fast 4 +refer 4 +multiprocessor 3 +evalu 3 +techniqu 3 +wind 3 +tunnel 3 +univers 3 +machin 3 +support 3 +isca 3 +coher 3 +abstract 3 +access 3 +case 3 +messag 3 +transpar 3 +block 3 +page 2 +wisc 2 +juli 2 +especi 2 +includ 2 +project 2 +wart 2 +california 2 +berkelei 2 +current 2 +graduat 2 +student 2 +hyder 2 +pfile 2 +introduct 2 +architecturec 2 +advanc 2 +recent 2 +distribut 2 +supercomput 2 +invalid 2 +overhead 2 +ioanni 2 +schoina 2 +softwar 2 +new 2 +paradigm 2 +integr 2 +pass 2 +exist 2 +mechan 2 +allow 2 +programm 2 +data 2 +read 2 +us 2 +processor 2 +also 2 +specifi 2 +state 2 +rewrit 2 +home 1 +associ 1 +professor 1 +scienceand 1 +electr 1 +engineeringdepart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usadavid 1 +eduphon 1 +secretari 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +offic 1 +interest 1 +uniprocessor 1 +oper 1 +analysi 1 +vlsi 1 +power 1 +portabl 1 +educ 1 +steve 1 +brian 1 +toonenrec 1 +rahmat 1 +intel 1 +alvi 1 +duke 1 +microsystem 1 +callaghan 1 +informix 1 +cours 1 +teach 1 +fall 1 +organ 1 +programmingc 1 +select 1 +paper 1 +decoupl 1 +memorysteven 1 +robert 1 +communicationshubhendu 1 +mukherje 1 +synchron 1 +workstat 1 +costrahmat 1 +confer 1 +dynam 1 +self 1 +reduc 1 +multiprocessorsalvin 1 +june 1 +activ 1 +simulationalvin 1 +sigmetricsmai 1 +accuraci 1 +interconnect 1 +dougla 1 +burger 1 +process 1 +april 1 +applic 1 +specif 1 +ann 1 +roger 1 +asplo 1 +profil 1 +spec 1 +benchmark 1 +studi 1 +octob 1 +cooper 1 +scalabl 1 +transact 1 +toc 1 +novemb 1 +annot 1 +bibliographi 1 +decemb 1 +line 1 +version 1 +revis 1 +frequent 1 +madhusudhan 1 +talluri 1 +august 1 +summari 1 +main 1 +goal 1 +cost 1 +effect 1 +computerarchitectur 1 +take 1 +advantag 1 +rapidli 1 +chang 1 +technolog 1 +myresearch 1 +major 1 +thrust 1 +feasibl 1 +correct 1 +facilit 1 +focuss 1 +follow 1 +three 1 +area 1 +multi 1 +effici 1 +hybridprogram 1 +virtual 1 +prototyp 1 +exploit 1 +similaritesof 1 +hypothet 1 +understand 1 +tune 1 +result 1 +calledtempest 1 +handler 1 +suppliedmechan 1 +provid 1 +compil 1 +librari 1 +hybrid 1 +combin 1 +tempestmechan 1 +bulk 1 +transfer 1 +virtualmemori 1 +manag 1 +novelmechan 1 +tagblock 1 +byte 1 +write 1 +theloc 1 +remot 1 +explor 1 +altern 1 +wai 1 +first 1 +call 1 +propos 1 +hardwareplatform 1 +fulli 1 +revers 1 +translationt 1 +rtlb 1 +invok 1 +detect 1 +grainaccess 1 +fault 1 +found 1 +thata 1 +run 1 +performscompar 1 +anal 1 +five 1 +memoryprogram 1 +method 1 +thatoptim 1 +common 1 +hit 1 +significantli 1 +reducingsimul 1 +tightli 1 +gener 1 +byprovid 1 +tag 1 +referenceinvok 1 +function 1 +depend 1 +upon 1 +type 1 +andmemori 1 +processedbi 1 +manipul 1 +special 1 +null 1 +functionfor 1 +action 1 +usingbinari 1 +tabl 1 +lookup 1 +memoryrefer 1 +sparcstat 1 +tothre 1 +faster 1 +convent 1 +trace 1 +driven 1 +thatcal 1 +procedur 1 +onlythre 1 +slower 1 +origin 1 +instrument 1 +investig 1 +binari 1 +techniquesto 1 +platform 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..e010081c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,30 @@ +doug 2 +burger 2 +home 2 +comput 2 +page 1 +pageprofession 1 +inform 1 +research 1 +summaryresum 1 +cvtranscriptcours 1 +projectsadvisoraffili 1 +project 1 +galileo 1 +sciwisconsin 1 +wind 1 +tunnelpag 1 +maintain 1 +architectureuw 1 +architecturesimplescalar 1 +tool 1 +setgenericasacmperson 1 +stuff 1 +meus 1 +linksphoto 1 +galleryrid 1 +demonhunt 1 +damn 1 +catsbewar 1 +grad 1 +school 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..f74c7c9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,172 @@ +inform 6 +approxim 5 +boor 4 +click 4 +journal 4 +carl 3 +file 3 +variou 3 +check 3 +publish 3 +paul 3 +home 2 +page 2 +comput 2 +wisconsin 2 +madison 2 +fall 2 +email 2 +wisc 2 +look 2 +recent 2 +theori 2 +read 2 +clickabl 2 +version 2 +list 2 +errata 2 +numer 2 +analysi 2 +spline 2 +ditto 2 +thank 2 +nevai 2 +find 2 +also 2 +us 2 +pinku 2 +last 1 +chang 1 +professor 1 +scienc 1 +mathematicsdepart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usaoffic 1 +hour 1 +town 1 +schoenberg 1 +work 1 +death 1 +deboor 1 +telephon 1 +schedul 1 +teach 1 +former 1 +present 1 +student 1 +select 1 +articl 1 +written 1 +areavail 1 +anonym 1 +approx 1 +provid 1 +access 1 +individu 1 +theclick 1 +button 1 +small 1 +subset 1 +author 1 +third 1 +edit 1 +cont 1 +elementari 1 +algorithm 1 +approach 1 +print 1 +practic 1 +guid 1 +latest 1 +thevari 1 +program 1 +driver 1 +latter 1 +book 1 +ofapproxim 1 +academ 1 +press 1 +includ 1 +accept 1 +publishedpap 1 +well 1 +postal 1 +address 1 +mani 1 +andmuch 1 +much 1 +forconstruct 1 +springer 1 +verlag 1 +foreast 1 +search 1 +theirtabl 1 +content 1 +singli 1 +combin 1 +thishandi 1 +tool 1 +alsoapproxim 1 +amo 1 +slist 1 +homepag 1 +bibliographi 1 +avail 1 +link 1 +peopl 1 +resourc 1 +ila 1 +center 1 +seek 1 +shall 1 +organ 1 +introduct 1 +joi 1 +seeviva_vi 1 +alsoon 1 +screen 1 +tutori 1 +great 1 +pictur 1 +hermit 1 +place 1 +contain 1 +html 1 +thehtml 1 +primermight 1 +even 1 +better 1 +unusu 1 +ever_chang 1 +david 1 +griffeath 1 +sprimordi 1 +soup 1 +kitchen 1 +interest 1 +seeodd 1 +end 1 +allan 1 +techunix 1 +technion 1 +nevaiif 1 +makehi 1 +mathemat 1 +outputavail 1 +cours 1 +math 1 +hous 1 +next 1 +door 1 +occupi 1 +taki 1 +souganid 1 +andthaleia 1 +zariphopoul 1 +szego 1 +bust 1 +stand 1 +inscript 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..43792f84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,175 @@ +data 16 +devis 9 +visual 7 +queri 4 +record 4 +livni 4 +raghu 4 +ramakrishnan 4 +environ 3 +explor 3 +input 3 +link 3 +confer 3 +graphic 2 +help 2 +user 2 +need 2 +base 2 +scienc 2 +medicin 2 +inform 2 +jussi 2 +myllymaki 2 +proceed 2 +spie 2 +analysi 2 +michael 2 +cheng 2 +miron 2 +releas 2 +support 2 +time 2 +hotlin 2 +home 1 +pagedevis 1 +visualizationt 1 +content 1 +featuresexamplesin 1 +depthpublicationsrel 1 +workreleasecontactsfeaturesthes 1 +featur 1 +distinguish 1 +interfac 1 +construct 1 +oneset 1 +save 1 +appli 1 +larger 1 +memori 1 +effici 1 +handl 1 +map 1 +level 1 +cancontrol 1 +color 1 +shape 1 +individu 1 +abil 1 +us 1 +repres 1 +flexibl 1 +layout 1 +mechan 1 +within 1 +window 1 +group 1 +togeth 1 +comparison 1 +asid 1 +ax 1 +cursor 1 +compar 1 +relationship 1 +differ 1 +viewsof 1 +direct 1 +ascii 1 +file 1 +integ 1 +float 1 +date 1 +string 1 +type 1 +examplescheck 1 +follow 1 +exampl 1 +cool 1 +pictur 1 +quick 1 +introduct 1 +tree 1 +validationmolecular 1 +biologi 1 +cell 1 +imag 1 +soil 1 +birch 1 +clusteringfinanci 1 +explorationfamili 1 +nation 1 +climatedata 1 +centergeograph 1 +systemsoil 1 +sciencefil 1 +serverprogram 1 +tracesclin 1 +mani 1 +moreexampl 1 +viewer 1 +famili 1 +depthfor 1 +detail 1 +descript 1 +model 1 +visualizationvisu 1 +interfaceperform 1 +issuespublicationsmiron 1 +larg 1 +dataset 1 +dataexplor 1 +januari 1 +stream 1 +inproceed 1 +andanalysi 1 +februari 1 +praveenseshadri 1 +next 1 +sequencequeri 1 +intern 1 +themanag 1 +comad 1 +decemb 1 +relat 1 +workth 1 +seqproject 1 +complementari 1 +design 1 +queryrecord 1 +sequenc 1 +output 1 +bevisu 1 +informationw 1 +current 1 +version 1 +executablesfor 1 +solari 1 +platform 1 +dynam 1 +ld_library_path 1 +variabl 1 +appropri 1 +rundevis 1 +architectur 1 +execut 1 +arestat 1 +requir 1 +shareabl 1 +librari 1 +download 1 +click 1 +contactsfor 1 +research 1 +project 1 +contactmiron 1 +guangshun 1 +chen 1 +kent 1 +wenger 1 +send 1 +mail 1 +usersupport 1 +page 1 +access 1 +sinc 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..410b078b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,200 @@ +shore 10 +object 9 +system 9 +data 9 +paradis 5 +unix 5 +dewitt 4 +geograph 4 +project 4 +univers 3 +wisconsin 3 +inform 3 +wide 3 +target 3 +hardwar 3 +store 3 +larg 3 +naughton 3 +page 2 +david 2 +email 2 +wisc 2 +orient 2 +databas 2 +parallel 2 +research 2 +persist 2 +need 2 +applic 2 +basic 2 +manag 2 +develop 2 +text 2 +field 2 +file 2 +complex 2 +client 2 +server 2 +carei 2 +gamma 2 +set 2 +current 2 +relat 2 +match 2 +emploi 2 +recent 2 +proceed 2 +sigmod 2 +confer 2 +talk 2 +home 1 +professor 1 +romn 1 +fellow 1 +comput 1 +scienc 1 +depart 1 +dayton 1 +madison 1 +telephon 1 +michigan 1 +interest 1 +databasebenchmark 1 +summari 1 +main 1 +objectiveof 1 +design 1 +implement 1 +evalu 1 +objectsystem 1 +serv 1 +varieti 1 +applicationsinclud 1 +softwar 1 +programminglanguag 1 +satellit 1 +repositori 1 +multimedia 1 +expand 1 +capabilitiesof 1 +us 1 +exodu 1 +storag 1 +fund 1 +arpa 1 +number 1 +wai 1 +includ 1 +support 1 +typedobject 1 +multipl 1 +program 1 +languag 1 +like 1 +hierarchicalnam 1 +space 1 +name 1 +compat 1 +interfaceto 1 +interfac 1 +intend 1 +toeas 1 +transit 1 +systemenviron 1 +exist 1 +tool 1 +ccwill 1 +abl 1 +without 1 +modif 1 +becom 1 +either 1 +singl 1 +orth 1 +rang 1 +environ 1 +scale 1 +fromindividu 1 +workstat 1 +heterogen 1 +networksto 1 +multiprocessor 1 +intel 1 +paragon 1 +ajoint 1 +prof 1 +solomon 1 +attempt 1 +appli 1 +technolog 1 +developeda 1 +part 1 +relationaldatabas 1 +thetask 1 +manipul 1 +mani 1 +databasesystem 1 +hold 1 +excel 1 +formanag 1 +busi 1 +poor 1 +modelingne 1 +must 1 +capabl 1 +manipulatingmuch 1 +polygon 1 +polylin 1 +instead 1 +model 1 +provid 1 +muchbett 1 +type 1 +anoth 1 +signific 1 +differencefrom 1 +parallelismto 1 +facilit 1 +execut 1 +process 1 +assatellit 1 +imag 1 +platform 1 +projecti 1 +cluster 1 +sparc 1 +connect 1 +sampl 1 +public 1 +benchmark 1 +withm 1 +washington 1 +persistentappl 1 +franklin 1 +hall 1 +mcauliff 1 +chuh 1 +tsatalo 1 +white 1 +zwill 1 +intern 1 +conferenceon 1 +minneapoli 1 +kabra 1 +patel 1 +proceedingsof 1 +base 1 +santiego 1 +chile 1 +august 1 +vldb 1 +invit 1 +summit 1 +present 1 +automat 1 +creat 1 +januari 1 +pub 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..aded4d99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,326 @@ +dyer 18 +vision 17 +comput 16 +imag 11 +control 11 +view 10 +motion 10 +proc 10 +object 9 +seitz 9 +model 8 +visual 8 +scene 7 +algorithm 7 +base 6 +surfac 6 +us 6 +kutulako 6 +three 5 +shape 5 +appear 5 +interact 5 +global 5 +data 5 +recognit 5 +conf 5 +wisconsin 4 +develop 4 +real 4 +camera 4 +user 4 +recent 4 +viewpoint 4 +displai 4 +purpos 4 +dimension 3 +represent 3 +activ 3 +environ 3 +move 3 +morph 3 +order 3 +behavior 3 +explor 3 +observ 3 +defin 3 +reconstruct 3 +analysi 3 +cyclic 3 +invari 3 +pattern 3 +workshop 3 +seal 3 +detect 3 +hibbard 3 +charl 2 +wisc 2 +area 2 +interest 2 +synthesi 2 +research 2 +time 2 +virtual 2 +input 2 +path 2 +combin 2 +without 2 +interpol 2 +continu 2 +correspond 2 +center 2 +orient 2 +import 2 +unknown 2 +mark 2 +build 2 +need 2 +gener 2 +provabl 2 +recov 2 +understand 2 +scientif 2 +system 2 +public 2 +period 2 +toward 2 +contour 2 +ieee 2 +artifici 2 +intellig 2 +point 2 +affin 2 +paul 2 +spring 2 +fall 2 +allmen 2 +stewart 2 +kjell 2 +home 1 +pagecharl 1 +dyerprofessordepart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +edutelephon 1 +finger 1 +infoph 1 +univers 1 +maryland 1 +curriculum 1 +vita 1 +visualizationgroup 1 +leader 1 +groupprogram 1 +chair 1 +cvpr 1 +synthesisth 1 +goal 1 +work 1 +basic 1 +tool 1 +controllingin 1 +either 1 +autonom 1 +cameraof 1 +videostream 1 +acquir 1 +fix 1 +mobil 1 +around 1 +site 1 +output 1 +panoram 1 +whicha 1 +technolog 1 +could 1 +navig 1 +througha 1 +custom 1 +thesit 1 +predetermin 1 +main 1 +researchquest 1 +adapt 1 +basi 1 +synthesizenew 1 +reconstructiona 1 +intermedi 1 +step 1 +innovativetechniqu 1 +callview 1 +take 1 +basisimag 1 +rang 1 +linear 1 +connect 1 +explorationcomput 1 +start 1 +investig 1 +howto 1 +acquisit 1 +process 1 +controllingcamera 1 +paramet 1 +studi 1 +purposefulli 1 +theposit 1 +dynam 1 +adjustviewpoint 1 +theus 1 +forsolv 1 +task 1 +findspecif 1 +unknownshap 1 +recogn 1 +coordin 1 +simpl 1 +chang 1 +appearanceof 1 +well 1 +simplifi 1 +computationsrequir 1 +make 1 +precis 1 +progress 1 +andelimin 1 +accur 1 +differenti 1 +measur 1 +thecamera 1 +believ 1 +approach 1 +towardsviewpoint 1 +close 1 +relat 1 +geometri 1 +viewedobject 1 +thisapproach 1 +correct 1 +asid 1 +revolut 1 +smooth 1 +arbitrarili 1 +visualizationin 1 +map 1 +techniquescap 1 +possibl 1 +type 1 +specificgraph 1 +procedur 1 +capabl 1 +displayingarbitrari 1 +commonfram 1 +refer 1 +coupl 1 +algorithmexecut 1 +provid 1 +power 1 +especi 1 +experi 1 +dataanalysi 1 +implement 1 +call 1 +forexperi 1 +techniqu 1 +visualizingintermedi 1 +final 1 +result 1 +forproblem 1 +discrimin 1 +cloud 1 +satellit 1 +trace 1 +shah 1 +jain 1 +kluwer 1 +boston 1 +siggraph 1 +track 1 +recoveri 1 +stationari 1 +advanc 1 +festschrift 1 +azriel 1 +rosenfeld 1 +societi 1 +press 1 +alamito 1 +complet 1 +four 1 +physic 1 +valid 1 +adjust 1 +lumelski 1 +strategi 1 +guid 1 +dimens 1 +robot 1 +autom 1 +occlud 1 +irregular 1 +rigid 1 +articul 1 +battaiola 1 +santek 1 +voidrot 1 +martinez 1 +earth 1 +space 1 +scienc 1 +juli 1 +lattic 1 +includ 1 +abstract 1 +groupcours 1 +taught 1 +introduct 1 +current 1 +student 1 +gareth 1 +bestor 1 +brian 1 +morgan 1 +steve 1 +liangyin 1 +yuph 1 +graduat 1 +bill 1 +whibbard 1 +macc 1 +onlattic 1 +structur 1 +kiriako 1 +kyro 1 +rochest 1 +ofobserv 1 +iutech 1 +sequenc 1 +descript 1 +spatiotempor 1 +flow 1 +curv 1 +brent 1 +dimensionalshap 1 +machin 1 +graphic 1 +harri 1 +plantinga 1 +wheaton 1 +viewer 1 +representationfor 1 +connectionist 1 +stereo 1 +bradlei 1 +ccsua 1 +ctstateu 1 +edg 1 +separ 1 +textur 1 +measureslink 1 +interestmi 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..b420aa8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,19 @@ +home 3 +page 3 +welcom 2 +friend 1 +machinew 1 +appreci 1 +patienc 1 +long 1 +arduou 1 +task 1 +bring 1 +better 1 +check 1 +educ 1 +curriculum 1 +vitaecheck 1 +class 1 +teach 1 +section 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..44019150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,165 @@ +scout 6 +page 6 +check 4 +comput 3 +scienc 3 +servic 3 +email 3 +molecular 3 +biologi 3 +time 3 +video 3 +make 3 +madison 2 +intern 2 +wait 2 +thing 2 +depart 2 +univers 2 +explain 2 +also 2 +assist 2 +support 2 +mac 2 +digit 2 +purpos 2 +multimedia 2 +complet 2 +link 2 +eric 1 +hazen 1 +home 1 +pageer 1 +hazennon 1 +professorroom 1 +west 1 +dayton 1 +current 1 +work 1 +fornet 1 +project 1 +part 1 +registr 1 +couldn 1 +help 1 +domain 1 +name 1 +problem 1 +even 1 +want 1 +neat 1 +monei 1 +locat 1 +ofwisconsin 1 +see 1 +could 1 +never 1 +elegantli 1 +fearless 1 +leader 1 +susan 1 +calcari 1 +offici 1 +explan 1 +design 1 +mainten 1 +site 1 +list 1 +report 1 +pete 1 +devri 1 +withtech 1 +unix 1 +machin 1 +come 1 +spent 1 +half 1 +year 1 +laboratori 1 +full 1 +student 1 +well 1 +call 1 +technic 1 +specialist 1 +meant 1 +around 1 +fix 1 +peopl 1 +broken 1 +mice 1 +answer 1 +question 1 +lucki 1 +cool 1 +interest 1 +us 1 +instruct 1 +graduat 1 +wisconsin 1 +philosophi 1 +program 1 +philosoph 1 +real 1 +commod 1 +capitalist 1 +societi 1 +requir 1 +curriculum 1 +shameless 1 +pragmatist 1 +talk 1 +sinc 1 +good 1 +metaphys 1 +discuss 1 +serv 1 +practic 1 +makethi 1 +look 1 +standard 1 +resum 1 +date 1 +made 1 +anim 1 +shown 1 +world 1 +among 1 +drosophila 1 +geneticist 1 +told 1 +wonder 1 +girlfriend 1 +salon 1 +magazin 1 +entertain 1 +inform 1 +ezin 1 +creat 1 +lauri 1 +anderson 1 +green 1 +room 1 +shockwav 1 +kudon 1 +know 1 +wit 1 +quicktimevr 1 +documentari 1 +plight 1 +bosnia 1 +uproot 1 +popul 1 +billi 1 +holidai 1 +homepag 1 +nation 1 +secur 1 +archiv 1 +nixon 1 +preslei 1 +meetingsejhazen 1 +facstaff 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..dab6b0e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,13 @@ +eliassi 3 +univers 2 +tina 1 +home 1 +pagetina 1 +illinoi 1 +urbana 1 +champaign 1 +wisconsin 1 +madison 1 +offic 1 +bldgphone 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..e69de29b diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..09f76c5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,72 @@ +madison 5 +eric 4 +instruct 4 +comput 3 +univers 3 +wisconsin 3 +high 3 +bandwidth 3 +fetch 3 +branch 3 +rotenberg 3 +jame 3 +smith 3 +depart 2 +predict 2 +confid 2 +trace 2 +cach 2 +latenc 2 +approach 2 +steve 2 +bennett 2 +appear 2 +proceed 2 +annual 2 +intern 2 +symposium 2 +microarchitectur 2 +decemb 2 +home 1 +page 1 +passsth 1 +anoth 1 +cold 1 +budweisth 1 +address 1 +scienc 1 +west 1 +dayton 1 +street 1 +electr 1 +engin 1 +johnson 1 +drive 1 +offic 1 +phone 1 +mail 1 +ericro 1 +wisc 1 +research 1 +area 1 +architectur 1 +advisor 1 +professor 1 +smithresearch 1 +topic 1 +kestrel 1 +multiscalar 1 +project 1 +level 1 +parallel 1 +mechan 1 +mispredict 1 +tolerancepubl 1 +assign 1 +condit 1 +erik 1 +jacobsen 1 +technic 1 +report 1 +april 1 +resum 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..5574a7f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,69 @@ +comput 8 +would 4 +rather 4 +babak 3 +falsafi 3 +wisconsin 2 +madison 2 +wisc 2 +parallel 2 +scienc 2 +suni 2 +buffalo 2 +june 2 +like 2 +home 1 +page 1 +research 1 +assistantdepart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usatel 1 +email 1 +work 1 +peopl 1 +mentorcultresearch 1 +interest 1 +architectur 1 +perform 1 +evalu 1 +measur 1 +system 1 +program 1 +modelseduc 1 +univers 1 +decemb 1 +electr 1 +engin 1 +miscellan 1 +public 1 +drink 1 +read 1 +listen 1 +us 1 +high 1 +school 1 +idea 1 +fail 1 +morf 1 +shubu 1 +dionosi 1 +hillari 1 +profan 1 +phone 1 +convers 1 +check 1 +american 1 +french 1 +queen 1 +architect 1 +look 1 +hack 1 +partner 1 +crime 1 +next 1 +gener 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..54f400c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,117 @@ +problem 4 +ferri 3 +scienc 3 +mathemat 3 +applic 3 +structur 3 +optim 3 +parallel 3 +home 2 +page 2 +michael 2 +comput 2 +univers 2 +program 2 +larg 2 +scale 2 +nonlinear 2 +econom 2 +investig 2 +emphasi 2 +model 2 +effect 2 +system 2 +complementar 2 +associ 1 +professor 1 +industri 1 +engineeringand 1 +member 1 +center 1 +depart 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +madison 1 +telephon 1 +email 1 +wisc 1 +cambridg 1 +interest 1 +theori 1 +algorithm 1 +research 1 +summari 1 +look 1 +robust 1 +method 1 +solv 1 +variationalinequ 1 +toproblem 1 +engin 1 +pivot 1 +path 1 +followingtechniqu 1 +base 1 +success 1 +linear 1 +numer 1 +properti 1 +andinterfac 1 +languag 1 +particular 1 +beingconsid 1 +includ 1 +equilibria 1 +taxat 1 +oncarbon 1 +emiss 1 +traffic 1 +congest 1 +toll 1 +contact 1 +chemic 1 +process 1 +design 1 +consid 1 +architectur 1 +solvingproblem 1 +graph 1 +partitioningtechniqu 1 +determin 1 +underli 1 +tool 1 +gener 1 +purpos 1 +techniqu 1 +forexploit 1 +machin 1 +directli 1 +within 1 +arealso 1 +consider 1 +prototyp 1 +us 1 +condor 1 +extens 1 +framework 1 +also 1 +beinginvestig 1 +identifi 1 +exploit 1 +underlyingmodel 1 +public 1 +complet 1 +list 1 +paper 1 +mostli 1 +electron 1 +avail 1 +relev 1 +link 1 +cpnet 1 +prgram 1 +pagec 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..6748d135 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,254 @@ +learn 7 +page 6 +madison 5 +finton 4 +wisconsin 4 +current 4 +system 4 +wisc 3 +comput 3 +univers 3 +artifici 3 +intellig 3 +goal 3 +reinforc 3 +action 3 +environ 3 +input 3 +also 3 +need 3 +import 3 +world 3 +browser 3 +star 3 +trek 3 +david 2 +home 2 +michigan 2 +show 2 +develop 2 +comment 2 +understand 2 +make 2 +problem 2 +output 2 +instead 2 +thumb 2 +work 2 +explor 2 +base 2 +inform 2 +avail 2 +openstep 2 +daili 2 +head 2 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +welcom 1 +grad 1 +student 1 +research 1 +nerdin 1 +intelligenceher 1 +grew 1 +grand 1 +rapid 1 +late 1 +offic 1 +earn 1 +degre 1 +math 1 +state 1 +master 1 +scienceher 1 +dissert 1 +institut 1 +take 1 +littl 1 +year 1 +traffic 1 +measur 1 +softwarefor 1 +first 1 +thesi 1 +advisor 1 +left 1 +trusti 1 +nextstationor 1 +librari 1 +enjoyplai 1 +trumpet 1 +piano 1 +listen 1 +longhair 1 +music 1 +plai 1 +volleybal 1 +intervarsityfolk 1 +contribut 1 +supersoak 1 +arm 1 +race 1 +feel 1 +free 1 +form 1 +send 1 +mail 1 +finger 1 +accountto 1 +plan 1 +whether 1 +gain 1 +employ 1 +introduct 1 +project 1 +smart 1 +machin 1 +intelligenti 1 +essenc 1 +intelligencei 1 +abil 1 +adapt 1 +actappropri 1 +order 1 +reach 1 +treat 1 +gener 1 +case 1 +control 1 +chang 1 +sens 1 +weak 1 +kind 1 +feedback 1 +express 1 +posit 1 +neg 1 +number 1 +teacher 1 +present 1 +thesystem 1 +pair 1 +receiv 1 +irregular 1 +interv 1 +focuss 1 +todistinguish 1 +good 1 +on 1 +direct 1 +process 1 +build 1 +agood 1 +represent 1 +term 1 +relev 1 +orimport 1 +featur 1 +note 1 +basedfeatur 1 +extract 1 +appli 1 +notion 1 +balanc 1 +perform 1 +optim 1 +exploit 1 +investig 1 +wai 1 +us 1 +learningprocess 1 +effici 1 +allow 1 +specifi 1 +start 1 +point 1 +experi 1 +activ 1 +better 1 +intelligentadapt 1 +hope 1 +provid 1 +basi 1 +whichwil 1 +benefit 1 +knowledg 1 +task 1 +realli 1 +date 1 +sorri 1 +pagefor 1 +hotlistthi 1 +independ 1 +hotlist 1 +keep 1 +copi 1 +access 1 +platform 1 +combin 1 +actual 1 +bookmark 1 +file 1 +omniweb 1 +eleg 1 +function 1 +netscap 1 +opinion 1 +omniwebi 1 +nextstep 1 +foral 1 +variant 1 +releas 1 +editori 1 +responseto 1 +jehovah 1 +wit 1 +deiti 1 +christwisconsin 1 +site 1 +intervars 1 +graduat 1 +fellowship 1 +check 1 +weatherin 1 +citi 1 +program 1 +link 1 +isthmu 1 +pagesom 1 +favorit 1 +place 1 +visit 1 +nebula 1 +nasa 1 +pictur 1 +wide 1 +studi 1 +bibl 1 +crosssearch 1 +minor 1 +glenn 1 +gould 1 +homepag 1 +farsid 1 +voyagerent 1 +dilbert 1 +zoneroam 1 +virtual 1 +tourist 1 +stereogram 1 +tell 1 +blow 1 +true 1 +next 1 +think 1 +bill 1 +gate 1 +word 1 +sponsor 1 +last 1 +modifi 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..02c777ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,242 @@ +nbsp 15 +program 13 +fischer 8 +regist 8 +alloc 8 +attribut 7 +charl 6 +compil 6 +code 5 +cost 5 +languag 5 +context 5 +august 5 +gener 4 +error 4 +kurland 4 +comput 3 +implement 3 +schedul 3 +model 3 +interprocedur 3 +time 3 +approach 3 +pointer 3 +arrai 3 +check 3 +us 3 +steven 3 +januari 3 +todd 3 +proebst 3 +transact 3 +system 3 +syntact 3 +analysi 3 +grammar 3 +scienc 2 +wisconsin 2 +univers 2 +teachingc 2 +cours 2 +research 2 +interest 2 +design 2 +recent 2 +best 2 +architectur 2 +issu 2 +avoid 2 +unnecessari 2 +delai 2 +optim 2 +procedur 2 +practic 2 +monitor 2 +execut 2 +sigplan 2 +harish 2 +patil 2 +appear 2 +effici 2 +june 2 +complet 2 +free 2 +parallel 2 +environ 2 +william 2 +specif 2 +juli 2 +evalu 2 +least 2 +decemb 2 +correct 2 +techniqu 2 +home 1 +page 1 +nbspcharl 1 +nbspprofessor 1 +nbspunivers 1 +depart 1 +dayton 1 +madison 1 +telephon 1 +messag 1 +email 1 +wisc 1 +teach 1 +semest 1 +graduat 1 +spring 1 +undergradu 1 +focu 1 +exploit 1 +enormouscap 1 +provid 1 +modern 1 +student 1 +investig 1 +includ 1 +import 1 +pipelin 1 +haveinvestig 1 +arithmet 1 +express 1 +domin 1 +global 1 +level 1 +graph 1 +color 1 +mayb 1 +explicitli 1 +quantifi 1 +likelihood 1 +benefit 1 +registerresid 1 +attract 1 +care 1 +vital 1 +loadsand 1 +store 1 +must 1 +ultim 1 +theprocedur 1 +analyz 1 +studiedinterprocedur 1 +modelsthat 1 +optimallyalloc 1 +among 1 +polynomi 1 +seem 1 +effect 1 +anyon 1 +know 1 +easi 1 +make 1 +mistak 1 +involv 1 +indic 1 +especi 1 +common 1 +studi 1 +wai 1 +toautomat 1 +oper 1 +share 1 +memori 1 +multiprocessor 1 +workstat 1 +processor 1 +anoth 1 +possibl 1 +routin 1 +littl 1 +orno 1 +appar 1 +slowdown 1 +public 1 +minimum 1 +popl 1 +sigact 1 +symposium 1 +principl 1 +programminglanguag 1 +concurr 1 +access 1 +insoftwar 1 +experi 1 +demand 1 +driven 1 +inacm 1 +instruct 1 +load 1 +zero 1 +rang 1 +split 1 +confer 1 +activitiesa 1 +revis 1 +second 1 +edit 1 +craft 1 +author 1 +cytronand 1 +richard 1 +leblanc 1 +almost 1 +publish 1 +benjamin 1 +cum 1 +look 1 +soon 1 +better 1 +bookstor 1 +everywher 1 +short 1 +commun 1 +editor 1 +topla 1 +educationph 1 +cornel 1 +pars 1 +supervis 1 +john 1 +studentsdonn 1 +milton 1 +bruce 1 +rowland 1 +semant 1 +stephen 1 +skedzielewski 1 +definit 1 +reevalu 1 +septemb 1 +bernard 1 +dion 1 +local 1 +corrector 1 +sensitivepars 1 +mahadevan 1 +ganapathi 1 +retarget 1 +novemb 1 +vimal 1 +begwami 1 +maunei 1 +extend 1 +right 1 +gregori 1 +johnson 1 +sensit 1 +flow 1 +anil 1 +facil 1 +integr 1 +winsborough 1 +automat 1 +transpar 1 +logic 1 +venkatesh 1 +framework 1 +algorithm 1 +steve 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..f46faeee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,259 @@ +comput 15 +wisconsin 14 +memori 14 +univers 13 +madison 13 +goodman 12 +scienc 12 +depart 12 +technic 11 +report 11 +share 10 +jame 9 +architectur 7 +burger 7 +project 6 +chip 6 +synchron 6 +integr 5 +base 5 +intern 5 +also 5 +scalabl 5 +processor 4 +system 4 +specif 4 +bandwidth 4 +design 4 +current 4 +coher 4 +standard 4 +galileo 3 +studi 3 +main 3 +research 3 +perform 3 +larg 3 +latenc 3 +point 3 +interconnect 3 +appear 3 +confer 3 +stefano 3 +kaxira 3 +juli 3 +alain 3 +cach 3 +relat 3 +transport 3 +layer 3 +hardwar 3 +includ 3 +support 3 +proceed 3 +johnson 3 +woest 3 +focus 2 +process 2 +capabl 2 +arrow 2 +differ 2 +iram 2 +follow 2 +futur 2 +model 2 +execut 2 +datascalar 2 +public 2 +optic 2 +us 2 +link 2 +list 2 +protocol 2 +effici 2 +primit 2 +high 2 +extend 2 +mechan 2 +march 2 +interfac 2 +scale 2 +februari 2 +nagi 2 +philip 2 +novemb 2 +analysi 2 +mari 2 +vernon 2 +doug 2 +home 1 +page 1 +wisconsint 1 +contentsgalileoproject 1 +descriptionpublicationsrel 1 +projectssci 1 +wisconsinproject 1 +descriptionpublicationsproject 1 +membersgalileo 1 +wisconsingalileo 1 +conduct 1 +groupat 1 +medium 1 +long 1 +term 1 +evolut 1 +emphasison 1 +therelationship 1 +futuresystem 1 +complet 1 +separ 1 +todai 1 +extent 1 +storag 1 +merg 1 +least 1 +wai 1 +increas 1 +penalti 1 +issuabl 1 +instruct 1 +orlimit 1 +place 1 +capacityon 1 +modul 1 +eventu 1 +sizabl 1 +fractionof 1 +resid 1 +repres 1 +label 1 +mopin 1 +diagram 1 +possibl 1 +migrat 1 +ofprocessor 1 +onto 1 +dram 1 +eventuallyobvi 1 +central 1 +area 1 +examin 1 +impact 1 +andlimit 1 +microprocessor 1 +systemsperform 1 +variou 1 +along 1 +theprocessor 1 +spectrumcach 1 +hierarchi 1 +systemsdesign 1 +bank 1 +systemprogram 1 +multipl 1 +exploit 1 +elimin 1 +serial 1 +bottlenecksdoug 1 +massiv 1 +parallel 1 +octob 1 +spsd 1 +modeldoug 1 +quantifi 1 +limit 1 +microprocessorsdoug 1 +symposium 1 +declin 1 +effect 1 +dynam 1 +gener 1 +purpos 1 +microprocessorsdougla 1 +januari 1 +berkeleyppram 1 +kyushu 1 +univeristi 1 +japansci 1 +wisconsinour 1 +group 1 +close 1 +involv 1 +coherentshar 1 +multiprocessor 1 +coherentinterfac 1 +ieee 1 +platform 1 +explor 1 +idea 1 +specifi 1 +queue 1 +lock 1 +qolb 1 +aswel 1 +optim 1 +pattern 1 +pairwis 1 +fresh 1 +read 1 +definitionfor 1 +extrem 1 +betweenprocess 1 +element 1 +individu 1 +cluster 1 +topic 1 +logarithm 1 +grow 1 +structureseffici 1 +multiprocessorsa 1 +extensionsaggress 1 +consist 1 +multiprocessorswisconsin 1 +minim 1 +overhead 1 +applic 1 +best 1 +paper 1 +supercomput 1 +simul 1 +wind 1 +tunneldougla 1 +second 1 +workshop 1 +cost 1 +hierarch 1 +extens 1 +scijam 1 +memoryross 1 +evan 1 +aboulenein 1 +stein 1 +gjess 1 +topolog 1 +ringsross 1 +decemb 1 +ringsteven 1 +scott 1 +lower 1 +bound 1 +coherenceross 1 +june 1 +multiprocessorsphilip 1 +multiprocessorjam 1 +third 1 +program 1 +languag 1 +oper 1 +april 1 +particip 1 +faculti 1 +graduat 1 +student 1 +alumni 1 +abouleneinross 1 +johnsonstev 1 +scottlast 1 +modifi 1 +dburger 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..25cad9b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,34 @@ +geeri 3 +andrew 2 +madison 2 +home 1 +page 1 +wisc 1 +west 1 +dayton 1 +street 1 +regent 1 +madisonin 1 +comput 1 +scienc 1 +current 1 +work 1 +compsci 1 +grade 1 +schedul 1 +pontif 1 +peopl 1 +interest 1 +jacqu 1 +derrida 1 +post 1 +structur 1 +martin 1 +heidegg 1 +albert 1 +camu 1 +jean 1 +paul 1 +sartr 1 +friedrich 1 +nietzsch 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..d373ca57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,148 @@ +time 4 +pleas 3 +gideon 2 +glass 2 +homepag 2 +tweak 2 +find 2 +sampl 2 +follow 2 +send 2 +read 2 +index 2 +machin 2 +netscap 2 +unix 2 +mayb 2 +continu 1 +thank 1 +stop 1 +collect 1 +imag 1 +deposit 1 +directori 1 +pictur 1 +gui 1 +claim 1 +cooler 1 +accord 1 +toonion 1 +seethi 1 +movi 1 +usual 1 +suspect 1 +walk 1 +nearest 1 +blockbust 1 +note 1 +recent 1 +move 1 +none 1 +roommat 1 +dutch 1 +cheap 1 +either 1 +given 1 +predica 1 +dismal 1 +prospect 1 +improv 1 +withno 1 +outsid 1 +influenc 1 +consid 1 +make 1 +small 1 +donationto 1 +help 1 +defrai 1 +cost 1 +purchas 1 +check 1 +monei 1 +orderscan 1 +sent 1 +address 1 +cash 1 +monro 1 +floor 1 +madison 1 +usathank 1 +support 1 +grad 1 +student 1 +sometim 1 +paper 1 +eventhough 1 +shelf 1 +feet 1 +unread 1 +book 1 +wait 1 +anyhow 1 +look 1 +someth 1 +christian 1 +achil 1 +huge 1 +might 1 +also 1 +unifi 1 +cstechreport 1 +class 1 +project 1 +report 1 +otherstuff 1 +avail 1 +program 1 +load 1 +averagewil 1 +grow 1 +fast 1 +main 1 +fork 1 +doofu 1 +actual 1 +share 1 +back 1 +calvin 1 +great 1 +killer 1 +zippi 1 +pinheadha 1 +reload 1 +sever 1 +justtri 1 +last 1 +fall 1 +kill 1 +noth 1 +think 1 +work 1 +mozilla 1 +higher 1 +well 1 +dabbl 1 +object 1 +orient 1 +programmingin 1 +mostli 1 +exercis 1 +suppos 1 +buttonher 1 +thing 1 +right 1 +suffic 1 +case 1 +told 1 +somethingin 1 +bookmark 1 +denni 1 +ritchi 1 +creator 1 +wrote 1 +anti 1 +forward 1 +hater 1 +handbook 1 +mailand 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..4ac5a6f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,406 @@ +meet 13 +comput 12 +calendar 9 +andi 8 +glew 7 +first 7 +intel 6 +page 5 +unix 5 +system 5 +us 5 +schedul 5 +email 5 +wisconsin 4 +home 4 +arrang 4 +approach 4 +architectur 4 +manag 4 +time 4 +advoc 4 +also 4 +pilot 4 +synchron 4 +univers 3 +thing 3 +public 3 +html 3 +work 3 +configur 3 +beef 3 +montreal 3 +peopl 3 +code 3 +although 3 +like 3 +optimist 3 +lock 3 +version 3 +creat 3 +least 3 +possibl 3 +person 3 +krazi 2 +organ 2 +touch 2 +filesystem 2 +access 2 +wisc 2 +read 2 +research 2 +group 2 +interest 2 +wannab 2 +author 2 +thought 2 +suggest 2 +miscellan 2 +info 2 +stuff 2 +self 2 +imag 2 +resum 2 +trust 2 +keep 2 +warm 2 +enough 2 +alwai 2 +architect 2 +someth 2 +busi 2 +sinc 2 +start 2 +learn 2 +hacker 2 +gould 2 +real 2 +futur 2 +cannot 2 +tool 2 +paper 2 +softwar 2 +concurr 2 +control 2 +get 2 +test 2 +requir 2 +even 2 +commit 2 +download 2 +softwareto 2 +user 2 +oper 2 +urgent 2 +teresa 2 +pageandi 1 +pagethi 1 +largelyform 1 +snippet 1 +stylishor 1 +summarycontact 1 +ship 1 +addressescalendar 1 +http 1 +anyon 1 +file 1 +unless 1 +specif 1 +open 1 +default 1 +scribe 1 +minut 1 +taker 1 +sohi 1 +weekli 1 +rather 1 +gener 1 +form 1 +adapt 1 +applic 1 +dynam 1 +versu 1 +static 1 +high 1 +level 1 +edit 1 +higher 1 +educ 1 +effect 1 +patent 1 +claim 1 +fame 1 +favorit 1 +quot 1 +sai 1 +care 1 +pope 1 +priest 1 +parson 1 +king 1 +william 1 +boyn 1 +want 1 +coin 1 +summer 1 +harm 1 +see 1 +frost 1 +snow 1 +poor 1 +almighti 1 +dollar 1 +good 1 +mapl 1 +wood 1 +bellow 1 +church 1 +chapel 1 +ranter 1 +preacher 1 +beecher 1 +alreadi 1 +keeper 1 +harbour 1 +deplor 1 +churchmen 1 +notori 1 +atheist 1 +less 1 +well 1 +known 1 +chariti 1 +strand 1 +sailor 1 +knew 1 +could 1 +exchang 1 +chop 1 +chord 1 +firewood 1 +meal 1 +place 1 +sleep 1 +print 1 +manifesto 1 +handbil 1 +advertis 1 +hackeralthough 1 +aspir 1 +formerlyhad 1 +fake 1 +motorola 1 +card 1 +ever 1 +assembl 1 +redesign 1 +chip 1 +kernel 1 +andstil 1 +think 1 +wistfulli 1 +beard 1 +frequent 1 +wear 1 +suspend 1 +bald 1 +architectureonc 1 +involv 1 +microarchitectur 1 +pentium 1 +adopt 1 +architecturei 1 +constantli 1 +verg 1 +write 1 +book 1 +entitl 1 +grabbag 1 +trick 1 +techniqu 1 +sort 1 +antidot 1 +hennessi 1 +patterson 1 +afford 1 +diskspac 1 +internet 1 +servic 1 +provideror 1 +charg 1 +connect 1 +appreci 1 +piec 1 +architectureon 1 +best 1 +wai 1 +textbook 1 +datasheet 1 +instruct 1 +refer 1 +wander 1 +netscapebookmarksstockscod 1 +standardsroi 1 +wilkinson 1 +standardsi 1 +disagre 1 +mani 1 +perhap 1 +quit 1 +quickli 1 +defunct 1 +startup 1 +compani 1 +call 1 +enfopris 1 +build 1 +workstat 1 +chang 1 +assign 1 +driver 1 +writingto 1 +integr 1 +longstand 1 +love 1 +hate 1 +relationship 1 +configurationmanag 1 +scc 1 +publish 1 +box 1 +link 1 +parallel 1 +tree 1 +element 1 +usenix 1 +workshop 1 +describ 1 +central 1 +databas 1 +multipl 1 +view 1 +hardlink 1 +clone 1 +save 1 +space 1 +divis 1 +team 1 +brian 1 +berlin 1 +deprec 1 +mainli 1 +wherea 1 +actual 1 +case 1 +livelock 1 +usual 1 +insist 1 +singl 1 +identifi 1 +serial 1 +sourc 1 +checkinsso 1 +proce 1 +linear 1 +manner 1 +programm 1 +previou 1 +fix 1 +appli 1 +recogn 1 +relax 1 +often 1 +strip 1 +approachin 1 +apolog 1 +never 1 +truli 1 +portabl 1 +accomplish 1 +similar 1 +mike 1 +fetterman 1 +mark 1 +aitken 1 +deserv 1 +credit 1 +enhanc 1 +sever 1 +featur 1 +went 1 +notabl 1 +number 1 +becam 1 +overal 1 +suffici 1 +everyth 1 +includ 1 +cshrc 1 +login 1 +wisconsinhow 1 +seem 1 +ubiquit 1 +programat 1 +depart 1 +variou 1 +cmtool 1 +domain 1 +ical 1 +plan 1 +critic 1 +mass 1 +anyof 1 +isol 1 +associ 1 +mean 1 +record 1 +voic 1 +therefor 1 +must 1 +prefer 1 +phone 1 +manuallyadd 1 +microsoft 1 +watch 1 +intelat 1 +devout 1 +program 1 +last 1 +ontim 1 +past 1 +weak 1 +disconnect 1 +allow 1 +major 1 +meetingswith 1 +without 1 +manual 1 +intervent 1 +algorithm 1 +tell 1 +reserveth 1 +right 1 +blindli 1 +invit 1 +make 1 +admin 1 +check 1 +proposeif 1 +week 1 +avoid 1 +bother 1 +send 1 +realiz 1 +miss 1 +sent 1 +advanc 1 +overallschedul 1 +topic 1 +fascin 1 +bring 1 +effici 1 +advantag 1 +secretariesand 1 +aid 1 +camp 1 +header 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..edc4e42d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,24 @@ +goodman 3 +comput 2 +wisconsin 2 +home 1 +page 1 +jame 1 +wisc 1 +professor 1 +sciencesdepart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaresearch 1 +interest 1 +lot 1 +good 1 +stuff 1 +current 1 +project 1 +galileo 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..7223a172 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,338 @@ +imag 26 +databas 16 +comput 15 +home 8 +retriev 8 +architectur 7 +link 7 +pic 7 +page 7 +greg 6 +inform 6 +mirror 4 +softwar 4 +index 4 +japanes 4 +relat 4 +wisc 3 +fall 3 +system 3 +learn 3 +librari 3 +info 3 +project 3 +vision 3 +cool 3 +japan 3 +nice 3 +stuff 3 +site 3 +recognit 3 +process 3 +graphic 3 +text 3 +group 3 +cours 3 +sharp 2 +offic 2 +section 2 +lectur 2 +spring 2 +manag 2 +engin 2 +html 2 +refer 2 +string 2 +attach 2 +introduct 2 +geometri 2 +machin 2 +freewar 2 +internet 2 +hyper 2 +rang 2 +shape 2 +shade 2 +pretti 2 +idea 2 +data 2 +planet 2 +wyom 2 +satelit 2 +line 2 +histori 2 +base 2 +list 2 +handwrit 2 +washington 2 +state 2 +tracer 2 +archiv 2 +trace 2 +simul 2 +univ 2 +includ 2 +english 2 +robot 2 +thoma 2 +tool 2 +languag 2 +invest 2 +fund 2 +schwab 2 +pagegreg 1 +pagenam 1 +sharpemail 1 +eduoffic 1 +phone 1 +hour 1 +appt 1 +tgif 1 +everi 1 +fridai 1 +dept 1 +instructor 1 +note 1 +notesclass 1 +topic 1 +find 1 +aboutsearch 1 +altavista 1 +dejanew 1 +excit 1 +infoseek 1 +lyco 1 +metacrawl 1 +yahoo 1 +usenet 1 +format 1 +ohioc 1 +program 1 +todai 1 +draft 1 +standard 1 +april 1 +stanford 1 +cygnu 1 +mumit 1 +newbi 1 +guideplatform 1 +independ 1 +portabl 1 +develop 1 +kit 1 +amulet 1 +dclap 1 +requir 1 +motif 1 +suit 1 +wxwindow 1 +yacl 1 +class 1 +projectclass 1 +numer 1 +linear 1 +algebra 1 +theoret 1 +scienc 1 +homework 1 +projectmisc 1 +sharewar 1 +cygwin 1 +directori 1 +gimp 1 +harmonai 1 +harmoni 1 +unix 1 +client 1 +browser 1 +vasc 1 +altern 1 +video 1 +research 1 +give 1 +specifi 1 +night 1 +jaida 1 +year 1 +worth 1 +atmospher 1 +multiresolut 1 +seamless 1 +click 1 +zoom 1 +resolut 1 +view 1 +solar 1 +moon 1 +comet 1 +meteor 1 +version 1 +also 1 +lot 1 +misc 1 +like 1 +overhead 1 +shot 1 +antarctica 1 +catalogu 1 +niae 1 +gothic 1 +electr 1 +postcard 1 +card 1 +rack 1 +select 1 +thank 1 +todd 1 +vistex 1 +textur 1 +databaseartifici 1 +gener 1 +primoridi 1 +soup 1 +kitchen 1 +math 1 +depart 1 +awesom 1 +medic 1 +medicin 1 +document 1 +pictur 1 +diagon 1 +dermatolog 1 +onlin 1 +atla 1 +erlang 1 +germani 1 +enter 1 +diagnosi 1 +back 1 +orthopaed 1 +ecvnet 1 +optic 1 +charact 1 +nici 1 +groupimag 1 +univers 1 +raytrac 1 +rayshad 1 +utah 1 +raster 1 +toolkit 1 +radianc 1 +radios 1 +packag 1 +avalon 1 +object 1 +grimstead 1 +massiv 1 +dsite 1 +hardwar 1 +board 1 +intergraph 1 +lockhe 1 +glint 1 +chipset 1 +nvidia 1 +chipsetcomput 1 +geometeri 1 +center 1 +applic 1 +challeng 1 +geometrylispuseless 1 +pagescomput 1 +hennessi 1 +patterson 1 +resourc 1 +superdlx 1 +parallel 1 +parl 1 +mexico 1 +washingt 1 +georgia 1 +tech 1 +groupjapanes 1 +guid 1 +unvers 1 +monash 1 +infowav 1 +edict 1 +window 1 +dictionari 1 +shodouka 1 +asiasoftinform 1 +retrev 1 +peregrin 1 +travers 1 +written 1 +perl 1 +trec 1 +infomin 1 +gigabyt 1 +search 1 +textual 1 +provid 1 +experi 1 +feedback 1 +linguist 1 +util 1 +repositori 1 +survei 1 +natur 1 +nist 1 +other_sw 1 +info_retriev 1 +world 1 +wide 1 +wander 1 +spider 1 +jedi 1 +might 1 +strictli 1 +hartlib 1 +paper 1 +latin 1 +stemmer 1 +multimedia 1 +academ 1 +storag 1 +new 1 +pointcast 1 +check 1 +custom 1 +portfolio 1 +automat 1 +updat 1 +literatur 1 +mark 1 +twainhumor 1 +apolog 1 +citizen 1 +offens 1 +threw 1 +garbag 1 +belong 1 +investorweb 1 +networth 1 +fundscap 1 +brill 1 +editori 1 +servic 1 +stockmastermutu 1 +brokerag 1 +hous 1 +fidel 1 +vanguard 1 +row 1 +price 1 +jack 1 +white 1 +compani 1 +charl 1 +gabelli 1 +mutualsmisc 1 +psnuplast 1 +modifi 1 +sharpgreg 1 +http 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..40ce5899 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,10 @@ +sridhar 1 +gopalsridhar 1 +gopalgsri 1 +wisc 1 +edubon 1 +marrow 1 +pageresumest 1 +wisconsin 1 +pagecalvin 1 +hobbesbookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..cecd35fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,58 @@ +univers 4 +madison 4 +home 3 +visitor 3 +sinc 3 +guangshun 2 +page 2 +depart 2 +comput 2 +wisconsin 2 +interest 2 +project 2 +data 2 +relat 2 +send 2 +email 2 +number 2 +welcom 1 +graduat 1 +student 1 +scienc 1 +dayton 1 +phone 1 +offic 1 +educ 1 +california 1 +state 1 +angel 1 +peke 1 +physic 1 +grade 1 +research 1 +databas 1 +manag 1 +system 1 +advis 1 +raghu 1 +ramakrishnan 1 +miron 1 +livni 1 +analysi 1 +famili 1 +medicin 1 +devis 1 +explor 1 +visual 1 +environ 1 +class 1 +link 1 +stuff 1 +career 1 +plan 1 +chines 1 +miscellani 1 +around 1 +weather 1 +forecast 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..b37bf9e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,44 @@ +data 3 +guhan 2 +viswanathan 2 +thesi 2 +research 2 +design 2 +implement 2 +parallellanguag 2 +home 1 +page 1 +gviswana 1 +wisc 1 +graduat 1 +studentdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaadvisor 1 +laru 1 +focus 1 +involv 1 +base 1 +develop 1 +local 1 +compil 1 +target 1 +investig 1 +parallelappl 1 +execut 1 +effici 1 +hand 1 +code 1 +parallelprogram 1 +amor 1 +detail 1 +summari 1 +list 1 +public 1 +us 1 +link 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..8b5447df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,73 @@ +page 4 +harit 3 +univers 3 +comput 3 +prof 3 +home 2 +student 2 +cours 2 +take 2 +advanc 2 +architectur 2 +link 2 +access 2 +time 2 +sinc 2 +sept 2 +counter 2 +mail 2 +graduat 1 +wisconsin 1 +madison 1 +depart 1 +scienc 1 +would 1 +like 1 +list 1 +classmat 1 +fall 1 +databas 1 +manag 1 +system 1 +raghu 1 +ramakrishnan 1 +mark 1 +hill 1 +spring 1 +jame 1 +goodman 1 +undergradu 1 +world 1 +famou 1 +mvsr 1 +engin 1 +colleg 1 +osmania 1 +hyderabad 1 +india 1 +meet 1 +draw 1 +line 1 +thing 1 +interest 1 +indian 1 +newspap 1 +stuff 1 +sport 1 +sastri 1 +roommat 1 +saeed 1 +mirza 1 +murthi 1 +zubber 1 +dust 1 +photo 1 +photograph 1 +warn 1 +click 1 +year 1 +folk 1 +courtesi 1 +electron 1 +address 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..aae8a348 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,43 @@ +madison 4 +hasti 3 +wisconsin 3 +rebecca 2 +scienc 2 +java 2 +univers 2 +mathemat 2 +home 1 +page 1 +graduat 1 +student 1 +research 1 +assistantcomput 1 +departmentunivers 1 +dayton 1 +offic 1 +mail 1 +wisc 1 +edutelephon 1 +telephon 1 +dept 1 +first 1 +applet 1 +click 1 +fall 1 +schedul 1 +engr 1 +noland 1 +seminar 1 +comput 1 +carleton 1 +colleg 1 +interest 1 +program 1 +languag 1 +basketbal 1 +volleybal 1 +softbal 1 +linkag 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..7b964b61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,108 @@ +comput 4 +univers 4 +lane 3 +stuff 3 +scienc 3 +madison 3 +phone 3 +offic 3 +inform 3 +home 2 +page 2 +chad 2 +academ 2 +person 2 +neat 2 +dept 2 +wisconsin 2 +research 2 +program 2 +state 2 +claud 2 +info 1 +west 1 +dayton 1 +mail 1 +address 1 +hour 1 +wisc 1 +welcomethank 1 +stop 1 +hope 1 +enjoi 1 +bestbet 1 +link 1 +section 1 +biggest 1 +new 1 +life 1 +right 1 +get 1 +marri 1 +onmai 1 +nichol 1 +final 1 +want 1 +tell 1 +good 1 +luck 1 +count 1 +fall 1 +cours 1 +retriev 1 +technolog 1 +seek 1 +databas 1 +manag 1 +system 1 +ling 1 +audit 1 +advanc 1 +semant 1 +interest 1 +linguist 1 +discours 1 +process 1 +us 1 +advic 1 +barwis 1 +epigram 1 +alan 1 +perli 1 +educ 1 +mathemat 1 +minor 1 +philosophi 1 +laud 1 +truman 1 +formerli 1 +northeast 1 +missouri 1 +expect 1 +stand 1 +accord 1 +truli 1 +click 1 +imag 1 +cyber 1 +poop 1 +creation 1 +unabash 1 +brother 1 +bart 1 +arthur 1 +download 1 +psychot 1 +talk 1 +rais 1 +plant 1 +internet 1 +deep 1 +thought 1 +jack 1 +handi 1 +reload 1 +differ 1 +on 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..68ec78e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,103 @@ +robot 12 +hert 9 +susan 8 +comput 7 +vladimir 6 +lumelski 6 +motion 5 +plan 5 +interest 4 +geometri 4 +algorithm 4 +tether 4 +page 3 +research 3 +madison 3 +link 3 +multipl 3 +appear 3 +confer 3 +intern 3 +autonom 3 +scienc 2 +univers 2 +wisconsin 2 +public 2 +curv 2 +paper 2 +august 2 +journal 2 +system 2 +version 2 +proc 2 +librari 2 +book 2 +home 1 +wisc 1 +assist 1 +depart 1 +dayton 1 +telephon 1 +curriculum 1 +vita 1 +postscript 1 +softwar 1 +appli 1 +experiment 1 +analysi 1 +design 1 +graphic 1 +geometr 1 +advisor 1 +current 1 +work 1 +develop 1 +alogirthm 1 +common 1 +environ 1 +select 1 +deform 1 +plane 1 +extend 1 +abstract 1 +proceed 1 +canadian 1 +planar 1 +rout 1 +applic 1 +ti 1 +bind 1 +publish 1 +ieee 1 +autom 1 +sanjai 1 +tiwari 1 +terrain 1 +cover 1 +special 1 +issu 1 +underwat 1 +move 1 +arbitrari 1 +configur 1 +intellig 1 +reznik 1 +simul 1 +basi 1 +anim 1 +program 1 +technic 1 +report 1 +laboratori 1 +juli 1 +educ 1 +refer 1 +shelf 1 +congress 1 +line 1 +travel 1 +samantha 1 +cook 1 +epicuri 1 +veggi 1 +unit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..124cb1c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,49 @@ +madison 4 +kirk 3 +hogenson 3 +offic 3 +mail 3 +graduat 2 +wisconsin 2 +student 2 +hour 2 +studentcomput 1 +scienc 1 +departmentunivers 1 +dayton 1 +wisc 1 +edutelephon 1 +depart 1 +section 1 +tue 1 +also 1 +look 1 +myschedul 1 +none 1 +workout 1 +tryto 1 +appoint 1 +time 1 +finger 1 +send 1 +visit 1 +ghana 1 +countri 1 +serv 1 +peac 1 +corp 1 +usernam 1 +check 1 +pnhp 1 +group 1 +page 1 +maintain 1 +wife 1 +eilun 1 +experi 1 +counter 1 +sai 1 +accessedtim 1 +sinc 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..16be66b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,12 @@ +horn 3 +jeffrei 2 +swanton 1 +road 1 +madison 1 +wisconsin 1 +phone 1 +email 1 +wisc 1 +wise 1 +linear 1 +familyemploymenteducationresearchgenealog 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..5db309b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,148 @@ +program 15 +horwitz 8 +analysi 7 +languag 5 +dataflow 5 +work 5 +rep 5 +confer 5 +problem 4 +graph 4 +interprocedur 4 +sagiv 4 +symposium 4 +proceed 4 +depend 3 +algorithm 3 +precis 3 +effici 3 +record 3 +januari 3 +softwar 3 +base 2 +slice 2 +mainli 2 +involv 2 +understand 2 +textual 2 +semant 2 +differ 2 +version 2 +call 2 +class 2 +develop 2 +implement 2 +twenti 2 +intern 2 +principlesof 2 +engin 2 +susan 1 +horwitzsusan 1 +horwitzprofessorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +wisc 1 +telephon 1 +secretari 1 +depart 1 +cornel 1 +univers 1 +research 1 +interest 1 +environmentsprogram 1 +differenc 1 +mergingstat 1 +programsinterprocedur 1 +analysisresearch 1 +summarymi 1 +design 1 +implementationof 1 +tool 1 +help 1 +programm 1 +like 1 +exist 1 +would 1 +affectedbi 1 +propos 1 +modif 1 +structur 1 +betweentwo 1 +retest 1 +chang 1 +combin 1 +piec 1 +produc 1 +certainsemant 1 +guarante 1 +represent 1 +theprogram 1 +oper 1 +also 1 +interproceduraldataflow 1 +previou 1 +concentratedeith 1 +specif 1 +individu 1 +necessarili 1 +gener 1 +thoma 1 +mooli 1 +newalgorithm 1 +appli 1 +larg 1 +recent 1 +publicationsm 1 +shapiro 1 +fast 1 +accur 1 +flow 1 +insensit 1 +point 1 +appear 1 +fourth 1 +onprincipl 1 +pari 1 +franc 1 +demand 1 +sigsoft 1 +foundat 1 +softwareengin 1 +washington 1 +octob 1 +applic 1 +constantpropag 1 +sixth 1 +joint 1 +theoryand 1 +practic 1 +aarhu 1 +denmark 1 +reachabl 1 +second 1 +francisco 1 +bate 1 +increment 1 +test 1 +us 1 +twentieth 1 +charleston 1 +fourteenth 1 +conferenceon 1 +melbourn 1 +australia 1 +identifi 1 +aprogram 1 +sigplan 1 +languagedesign 1 +white 1 +plain 1 +june 1 +teach 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..32008726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,16 @@ +pagesid 1 +pagenam 1 +sidnei 1 +hummertoffic 1 +phone 1 +offic 1 +email 1 +hummert 1 +wisc 1 +edua 1 +postscript 1 +version 1 +resum 1 +pictur 1 +click 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..a1cf2cfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,46 @@ +home 2 +alien 2 +construct 2 +mail 2 +univers 2 +wisconsin 2 +phone 2 +wisc 2 +igor 1 +ivanisev 1 +pageigorivanisev 1 +work 1 +newest 1 +project 1 +disclaim 1 +speak 1 +particular 1 +needless 1 +page 1 +ever 1 +feel 1 +like 1 +actual 1 +alreadi 1 +link 1 +research 1 +interest 1 +robot 1 +vision 1 +stuff 1 +generalgradu 1 +slave 1 +departmentwa 1 +undergrad 1 +drake 1 +math 1 +departmentaddress 1 +comput 1 +scienc 1 +departmentunivers 1 +west 1 +dayton 1 +streetmadison 1 +offic 1 +iigor 1 +eduiigor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..e9f8abef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,2 @@ +pageireland 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..270d67d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,37 @@ +sharenow 4 +comput 3 +meet 3 +offic 2 +section 2 +tuesdai 2 +home 1 +page 1 +wisc 1 +teach 1 +assist 1 +peterson 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +madisonmadison 1 +sciencestelephon 1 +hour 1 +thursdai 1 +pmsection 1 +pmboth 1 +class 1 +room 1 +sciencesc 1 +announcementshandoutsmoth 1 +jone 1 +profil 1 +recreat 1 +site 1 +pleas 1 +send 1 +email 1 +comment 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..d592a165 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,44 @@ +scienc 3 +research 2 +madison 2 +comput 2 +basneyjim 1 +basneygradu 1 +student 1 +assistantcomput 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +email 1 +jbasnei 1 +wisc 1 +eduoffic 1 +statisticsoffic 1 +phone 1 +interest 1 +area 1 +oper 1 +system 1 +andnetwork 1 +current 1 +work 1 +condor 1 +directionof 1 +prof 1 +miron 1 +livni 1 +receiv 1 +fromoberlin 1 +colleg 1 +english 1 +webpag 1 +oberlin 1 +resum 1 +codefrom 1 +previou 1 +project 1 +avail 1 +onlin 1 +last 1 +modifi 1 +basnei 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..69021677 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,83 @@ +jerel 3 +mackai 3 +comput 3 +work 3 +scienc 2 +databas 2 +system 2 +plai 2 +also 2 +home 1 +pagejerel 1 +assist 1 +research 1 +special 1 +oper 1 +fulltim 1 +univers 1 +wisconsin 1 +madison 1 +depart 1 +respons 1 +includ 1 +develop 1 +support 1 +sybas 1 +ingr 1 +instal 1 +backup 1 +softwar 1 +train 1 +student 1 +hourli 1 +electr 1 +guitar 1 +thrash 1 +metal 1 +specialti 1 +violin 1 +classic 1 +baroqu 1 +mainli 1 +seen 1 +error 1 +evil 1 +wai 1 +click 1 +shock 1 +case 1 +didn 1 +believ 1 +ey 1 +first 1 +time 1 +like 1 +record 1 +mostli 1 +funni 1 +cover 1 +stuff 1 +abba 1 +metallica 1 +origin 1 +soon 1 +abl 1 +sampl 1 +hit 1 +watch 1 +favorit 1 +show 1 +raquetbal 1 +golf 1 +shoot 1 +pool 1 +stand 1 +around 1 +towel 1 +yeah 1 +know 1 +much 1 +finger 1 +jerellast 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..ceb03a0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,6 @@ +home 1 +page 1 +johan 1 +larson 1 +homepag 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..77df0706 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,131 @@ +page 6 +link 6 +cool 5 +pictur 5 +home 4 +frame 4 +year 3 +realli 2 +would 2 +default 2 +number 2 +paus 2 +imag 2 +stuff 2 +time 2 +friend 2 +homepag 2 +roomat 2 +notr 2 +dame 2 +search 2 +engin 2 +cano 2 +relief 1 +happi 1 +java 1 +good 1 +censorship 1 +mail 1 +jherro 1 +wisc 1 +note 1 +class 1 +made 1 +relax 1 +let 1 +thing 1 +apictur 1 +girlfriend 1 +half 1 +afraid 1 +date 1 +though 1 +almost 1 +mani 1 +tortur 1 +disembody 1 +becam 1 +float 1 +head 1 +death 1 +directori 1 +anim 1 +seri 1 +jpeg 1 +format 1 +name 1 +start 1 +millisecond 1 +overriden 1 +repeat 1 +sequenc 1 +explicit 1 +order 1 +vital 1 +inform 1 +aquir 1 +nicknam 1 +like 1 +take 1 +apolog 1 +lame 1 +pleas 1 +bear 1 +pretti 1 +jack 1 +skellington 1 +kermit 1 +frog 1 +interest 1 +someth 1 +els 1 +neat 1 +write 1 +haiku 1 +said 1 +thath 1 +go 1 +click 1 +mine 1 +grad 1 +memori 1 +forgotten 1 +cult 1 +hippothi 1 +exploratori 1 +intervent 1 +chaotic 1 +exist 1 +realiti 1 +follow 1 +enjoi 1 +benefit 1 +matriarch 1 +societi 1 +join 1 +todai 1 +exclus 1 +club 1 +hierarchi 1 +rule 1 +semi 1 +yahooooooooooooo 1 +work 1 +contain 1 +free 1 +softwar 1 +shack 1 +bazillion 1 +mpeg 1 +movi 1 +archiv 1 +great 1 +muppet 1 +sound 1 +rachel 1 +want 1 +select 1 +trip 1 +look 1 +bout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..55e3d8b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,130 @@ +storag 4 +dewitt 4 +bing 3 +research 3 +public 3 +madison 3 +dbm 3 +databas 3 +system 3 +paradis 3 +queri 3 +data 3 +page 2 +gener 2 +inform 2 +advisor 2 +interest 2 +project 2 +pointer 2 +comput 2 +wisconsin 2 +jieb 2 +wisc 2 +tertiari 2 +manag 2 +object 2 +technolog 2 +patel 2 +kabra 2 +naughton 2 +submit 2 +octob 2 +process 2 +size 2 +appear 2 +septemb 2 +constraint 2 +februari 2 +client 2 +server 2 +proceed 2 +confer 2 +tenni 2 +pictur 2 +home 1 +index 1 +educ 1 +hobbi 1 +informationresearch 1 +assistantdepart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +edueduc 1 +scienc 1 +univers 1 +prof 1 +david 1 +dewittresearch 1 +support 1 +parallel 1 +orient 1 +geograph 1 +systemsresearch 1 +shorepublicationsbuild 1 +scaleabl 1 +spatial 1 +implment 1 +evalu 1 +tuft 1 +burger 1 +hall 1 +ramasami 1 +lueder 1 +ellman 1 +kupsch 1 +execut 1 +batch 1 +prong 1 +approach 1 +effici 1 +tape 1 +resid 1 +set 1 +satellit 1 +imag 1 +studi 1 +impact 1 +tile 1 +perform 1 +nasa 1 +goddard 1 +conferenceon 1 +mass 1 +us 1 +tree 1 +goldstein 1 +ramakrishnan 1 +shaft 1 +shorter 1 +version 1 +workshop 1 +larg 1 +base 1 +santiago 1 +chile 1 +reclam 1 +reorgan 1 +serverpersist 1 +store 1 +yong 1 +ieee 1 +engin 1 +houston 1 +eosdi 1 +sigmod 1 +grouphobbi 1 +volleybal 1 +volleyballweb 1 +white 1 +water 1 +raft 1 +whitewat 1 +find 1 +click 1 +full 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..3cafa53e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,58 @@ +jignesh 3 +paradis 3 +publish 3 +home 2 +page 2 +research 2 +madison 2 +system 2 +databas 2 +relat 2 +public 2 +paper 2 +join 2 +patel 1 +wisc 1 +welcom 1 +assist 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +telephon 1 +advisor 1 +david 1 +dewitt 1 +interest 1 +parallel 1 +object 1 +current 1 +work 1 +project 1 +client 1 +server 1 +vldb 1 +partit 1 +base 1 +spatial 1 +merg 1 +sigmod 1 +accur 1 +model 1 +hybrid 1 +hash 1 +algorithm 1 +sigmetr 1 +miscellan 1 +stuff 1 +virtual 1 +tourist 1 +inlin 1 +skate 1 +madhuri 1 +kashmir 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..42c52833 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,10 @@ +page 2 +georg 1 +varghes 1 +peopl 1 +download 1 +netscap 1 +click 1 +warn 1 +pretti 1 +lame 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..98ddd1bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,76 @@ +gehrk 4 +madison 4 +johann 3 +comput 3 +system 3 +time 3 +intern 2 +univers 2 +area 2 +interest 2 +inform 2 +public 2 +wisconsin 2 +scienc 2 +depart 2 +baruah 2 +plaxton 2 +share 2 +resourc 2 +real 2 +ieee 2 +version 2 +homepagejohann 1 +gehrkewelcom 1 +graduat 1 +studentat 1 +sciencesdepart 1 +ofwisconsin 1 +databasemanag 1 +work 1 +data 1 +mine 1 +underprofessor 1 +raghuramakrishnan 1 +page 1 +construct 1 +contact 1 +linkscontact 1 +email 1 +utexa 1 +offic 1 +west 1 +dayton 1 +street 1 +room 1 +home 1 +eagl 1 +height 1 +stoica 1 +abdel 1 +wahab 1 +jeffai 1 +proport 1 +alloc 1 +algorithmfor 1 +proceed 1 +symposium 1 +washington 1 +decemb 1 +appear 1 +anexpand 1 +fastschedul 1 +period 1 +task 1 +multipl 1 +inproceed 1 +parallel 1 +processingsymposium 1 +april 1 +expand 1 +avail 1 +technicalreport 1 +universityof 1 +texa 1 +austin 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..1107a838 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,71 @@ +madison 4 +home 3 +page 3 +comput 3 +student 2 +list 2 +appl 2 +site 2 +pagewelcom 1 +first 1 +year 1 +graduat 1 +univers 1 +wisconsin 1 +studi 1 +scienc 1 +us 1 +also 1 +maintain 1 +frequent 1 +ask 1 +question 1 +latest 1 +powerbook 1 +model 1 +releas 1 +thing 1 +look 1 +section 1 +click 1 +herei 1 +amass 1 +good 1 +number 1 +catagori 1 +check 1 +depart 1 +alma 1 +mater 1 +visit 1 +often 1 +needsth 1 +nando 1 +time 1 +great 1 +new 1 +coverageth 1 +spot 1 +mind 1 +numb 1 +soap 1 +operaish 1 +drivelziffnet 1 +industri 1 +newsc 1 +databas 1 +manag 1 +system 1 +construct 1 +compil 1 +keep 1 +classworktodai 1 +dilbert 1 +chucklejon 1 +bodner 1 +jonb 1 +wisc 1 +mound 1 +last 1 +modifi 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..819799cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,85 @@ +depart 5 +scienc 4 +home 3 +madison 3 +comput 3 +student 2 +chines 2 +academi 2 +china 2 +nanj 2 +univers 2 +advanc 2 +network 2 +spring 2 +welcom 1 +page 1 +first 1 +year 1 +graduat 1 +came 1 +frombeij 1 +hometown 1 +capitol 1 +jiangsu 1 +provinc 1 +degre 1 +wisconsin 1 +institut 1 +autom 1 +beij 1 +specil 1 +pattern 1 +recognit 1 +imag 1 +process 1 +biomed 1 +engin 1 +southeast 1 +chinacurr 1 +activ 1 +cours 1 +topic 1 +databas 1 +manag 1 +oper 1 +system 1 +teach 1 +assist 1 +data 1 +structur 1 +current 1 +address 1 +work 1 +west 1 +dayton 1 +street 1 +tele 1 +offic 1 +could 1 +finger 1 +wisc 1 +refer 1 +inform 1 +class 1 +technic 1 +stuffjava 1 +placeshor 1 +tutorialchina 1 +affairchina 1 +democracybeij 1 +place 1 +interest 1 +stanford 1 +groupstanford 1 +medic 1 +informaticsmit 1 +commun 1 +control 1 +signal 1 +processingjob 1 +site 1 +newsyou 1 +visitor 1 +number 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..36fd7782 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,79 @@ +jose 6 +comput 4 +mercuri 4 +new 4 +home 3 +andnando 3 +jeff 2 +shabel 2 +offic 2 +scienc 2 +graduat 2 +view 2 +engin 2 +cupertino 2 +high 2 +school 2 +favorit 2 +columbia 2 +hous 2 +join 2 +pagech 1 +welcom 1 +page 1 +wisconsinch 1 +theme 1 +song 1 +hour 1 +tue 1 +thur 1 +appoint 1 +person 1 +informationmajor 1 +architectur 1 +emphasi 1 +statu 1 +second 1 +year 1 +student 1 +fall 1 +schedul 1 +academ 1 +background 1 +receiv 1 +diego 1 +electr 1 +depart 1 +town 1 +monta 1 +vista 1 +plan 1 +sport 1 +team 1 +golden 1 +state 1 +warrior 1 +basketbal 1 +shark 1 +hockei 1 +francisco 1 +footbal 1 +oakland 1 +link 1 +newsmus 1 +find 1 +deal 1 +also 1 +tip 1 +info 1 +music 1 +club 1 +miscellan 1 +print 1 +postscript 1 +document 1 +window 1 +send 1 +mail 1 +jshabel 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..51f1634c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,193 @@ +data 9 +disk 8 +report 8 +tape 7 +livni 7 +perform 5 +technolog 5 +larg 5 +proceed 5 +confer 5 +jussi 4 +comput 4 +depart 4 +univers 4 +tertiari 4 +miron 4 +join 4 +home 4 +myllymaki 3 +wisconsin 3 +explor 3 +structur 3 +visual 3 +submit 3 +research 2 +wisc 2 +analysi 2 +includ 2 +buffer 2 +dataset 2 +storageto 2 +recent 2 +relat 2 +appli 2 +organ 2 +set 2 +andtap 2 +intern 2 +integr 2 +parallel 2 +theintern 2 +engin 2 +ramakrishnan 2 +synchron 2 +technic 2 +master 2 +yoav 2 +weiss 2 +class 2 +link 2 +scsi 2 +digit 2 +myllymakijussi 1 +assist 1 +scienc 1 +west 1 +dayton 1 +street 1 +madison 1 +telephon 1 +email 1 +eduresearch 1 +summaryi 1 +interest 1 +dbm 1 +oper 1 +onadvanc 1 +arrai 1 +mcurrent 1 +studi 1 +memori 1 +us 1 +andvisu 1 +deviseproject 1 +advisor 1 +prof 1 +mironlivni 1 +work 1 +improv 1 +joinsof 1 +volum 1 +resid 1 +public 1 +listbelow 1 +solv 1 +problem 1 +associ 1 +divers 1 +characterist 1 +andfunct 1 +limit 1 +media 1 +paper 1 +datavisu 1 +discuss 1 +metadata 1 +managementissu 1 +complex 1 +involv 1 +refere 1 +publicationseffici 1 +concurr 1 +theori 1 +measur 1 +evalu 1 +commun 1 +system 1 +octob 1 +programperform 1 +karen 1 +karavan 1 +bartonp 1 +miller 1 +third 1 +workshop 1 +environ 1 +andtool 1 +scientif 1 +august 1 +tertiarystorag 1 +daniel 1 +ford 1 +februari 1 +alsoavail 1 +almaden 1 +withmiron 1 +raghu 1 +spie 1 +societi 1 +optic 1 +januari 1 +access 1 +acmsigmetr 1 +publicationdevis 1 +queri 1 +beyer 1 +chen 1 +donjerkov 1 +lawand 1 +wenger 1 +sigmod 1 +storag 1 +andmiron 1 +dataengin 1 +publicationsdisk 1 +tapeaccess 1 +project 1 +degreeproject 1 +client 1 +server 1 +model 1 +networkarchitectur 1 +thesi 1 +helsinki 1 +industri 1 +manag 1 +finnish 1 +documentsimplement 1 +treealgorithm 1 +jeff 1 +schwarz 1 +experi 1 +implement 1 +filesystem 1 +trishul 1 +chilimbi 1 +overview 1 +current 1 +productsoverview 1 +raid 1 +supplier 1 +productssom 1 +frequent 1 +need 1 +unifi 1 +search 1 +adaptec 1 +adapt 1 +alpha 1 +workstationsandpcsandtechn 1 +journaland 1 +whitepap 1 +researchandcyberjourn 1 +quantum 1 +linear 1 +tapeanddlt 1 +faqandwhitepap 1 +solarisandsparcstationsandtechn 1 +faqandstorag 1 +faqand 1 +otherusenet 1 +faqsmani 1 +found 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..e8e7b2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,14 @@ +homepag 1 +jyothithi 1 +page 1 +construct 1 +info 1 +student 1 +cours 1 +grade 1 +other 1 +sorri 1 +dissappoint 1 +email 1 +jyothi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..ee38ec81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,88 @@ +comput 5 +school 4 +madison 4 +scienc 4 +high 3 +karen 2 +parallel 2 +perform 2 +support 2 +ship 2 +karavaniceveryth 1 +need 1 +know 1 +learn 1 +public 1 +karavanicresearch 1 +assist 1 +paradyn 1 +tool 1 +project 1 +univers 1 +wisconsin 1 +depart 1 +west 1 +dayton 1 +street 1 +karavan 1 +wisc 1 +current 1 +pursu 1 +research 1 +interest 1 +includ 1 +environ 1 +autom 1 +tune 1 +process 1 +oper 1 +system 1 +databasesask 1 +women 1 +wic 1 +frontier 1 +cool 1 +program 1 +dane 1 +counti 1 +studentstrio 1 +student 1 +servic 1 +free 1 +tutor 1 +undergradu 1 +miss 1 +site 1 +page 1 +could 1 +save 1 +life 1 +safer 1 +pagefor 1 +chocol 1 +lover 1 +onlystuyves 1 +alumni 1 +associationstuyves 1 +class 1 +thoma 1 +legisl 1 +inform 1 +internetth 1 +constitut 1 +cure 1 +anyth 1 +salt 1 +water 1 +sweat 1 +tear 1 +isak 1 +dinesen 1 +port 1 +safe 1 +sail 1 +thing 1 +admir 1 +grace 1 +hopper 1 +pioneer 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..3ef6d921 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,109 @@ +extens 8 +stefano 7 +kaxira 6 +jame 5 +kiloprocessor 4 +intern 4 +wisconsin 3 +memori 3 +coher 3 +cach 3 +parallel 3 +glow 3 +hierarch 3 +goodman 3 +perform 3 +comput 3 +papakonstantin 3 +research 2 +interest 2 +summari 2 +recent 2 +share 2 +design 2 +processor 2 +architectur 2 +appear 2 +proceed 2 +workshop 2 +base 2 +high 2 +cost 2 +softwar 2 +synthesi 2 +us 2 +prolog 2 +tsanaka 2 +home 1 +page 1 +wisc 1 +editor 1 +ieee 1 +sciresearch 1 +assist 1 +univers 1 +publicationsresearch 1 +multiprocess 1 +scalabl 1 +interfac 1 +aspect 1 +galileo 1 +introduc 1 +collaborationwith 1 +work 1 +examin 1 +depth 1 +option 1 +develop 1 +upcom 1 +standard 1 +incolabor 1 +david 1 +stein 1 +gjess 1 +public 1 +protocol 1 +wide 1 +data 1 +goodmanto 1 +confer 1 +supercomput 1 +also 1 +technic 1 +report 1 +kaxirasto 1 +process 1 +symposium 1 +april 1 +implement 1 +wind 1 +tunnel 1 +goodmannd 1 +march 1 +goodmanst 1 +august 1 +kaxirasunivers 1 +scienc 1 +dept 1 +juli 1 +tool 1 +simul 1 +prototyp 1 +monitor 1 +multiprocessor 1 +system 1 +stafylopati 1 +kaxirasinform 1 +technolog 1 +autom 1 +dedic 1 +specif 1 +pekmestzi 1 +kaxirasp 1 +greec 1 +hardwar 1 +methodolog 1 +kaxirasmicroprocess 1 +microprogram 1 +north 1 +holland 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..dbcb71f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,251 @@ +page 7 +caltech 6 +inform 5 +probabl 4 +comput 4 +slide 4 +like 3 +world 3 +make 3 +us 3 +would 3 +databas 3 +talk 3 +gave 3 +index 3 +quot 3 +keeper 2 +home 2 +steven 2 +everyth 2 +made 2 +note 2 +markup 2 +look 2 +peopl 2 +allow 2 +start 2 +year 2 +graduat 2 +student 2 +ever 2 +leav 2 +surpris 2 +aliv 2 +want 2 +hedgehog 2 +contact 2 +pager 2 +write 2 +number 2 +list 2 +project 2 +insid 2 +user 2 +foughtthei 1 +heaven 1 +perfect 1 +littl 1 +doesn 1 +realli 1 +need 1 +light 1 +lauri 1 +anderson 1 +strang 1 +angel 1 +possibl 1 +structur 1 +indic 1 +piec 1 +rather 1 +access 1 +bitmap 1 +displai 1 +includ 1 +theblind 1 +standard 1 +tag 1 +whateverbrows 1 +wish 1 +also 1 +literari 1 +convent 1 +ironi 1 +satir 1 +sarcasm 1 +butnoth 1 +contain 1 +herein 1 +meant 1 +offens 1 +areoffend 1 +stupid 1 +third 1 +scienc 1 +depart 1 +firsttwo 1 +week 1 +sinc 1 +support 1 +take 1 +care 1 +varieti 1 +machin 1 +vari 1 +degre 1 +success 1 +side 1 +never 1 +publish 1 +done 1 +anyth 1 +impress 1 +miracl 1 +pass 1 +prelim 1 +research 1 +addup 1 +hill 1 +bean 1 +fizzl 1 +result 1 +areobtain 1 +junior 1 +level 1 +programm 1 +creatingkiosk 1 +front 1 +end 1 +visual 1 +primit 1 +system 1 +perman 1 +skill 1 +free 1 +discov 1 +especi 1 +wasn 1 +convinc 1 +thosewho 1 +know 1 +well 1 +argu 1 +proof 1 +mybe 1 +anywai 1 +might 1 +read 1 +thoughtson 1 +electron 1 +mail 1 +reliabl 1 +specif 1 +locat 1 +often 1 +work 1 +ifyou 1 +person 1 +someth 1 +address 1 +wisc 1 +becom 1 +clear 1 +wantto 1 +short 1 +notic 1 +give 1 +variou 1 +creation 1 +thought 1 +todo 1 +updat 1 +sporad 1 +associ 1 +rsum 1 +postscript 1 +document 1 +html 1 +section 1 +long 1 +outof 1 +date 1 +unfortun 1 +danenet 1 +dilhr 1 +jobnet 1 +soon 1 +defunct 1 +institut 1 +archiv 1 +photonet 1 +personnel 1 +directori 1 +much 1 +better 1 +anyon 1 +enter 1 +databaseus 1 +form 1 +interfac 1 +distribut 1 +object 1 +call 1 +java 1 +danger 1 +love 1 +come 1 +hell 1 +freez 1 +rate 1 +break 1 +hierarchi 1 +consult 1 +somewher 1 +fought 1 +unifi 1 +attribut 1 +sfuai 1 +informationag 1 +intellectu 1 +properti 1 +assigna 1 +uniqu 1 +serial 1 +refer 1 +atth 1 +provid 1 +sourc 1 +contextu 1 +pointer 1 +relev 1 +bui 1 +adob 1 +distil 1 +translat 1 +rsuminto 1 +chanc 1 +ofread 1 +suppos 1 +print 1 +pinch 1 +certaintruth 1 +psycholog 1 +softwar 1 +eventuallypick 1 +aren 1 +taught 1 +explicitli 1 +think 1 +possibleto 1 +easier 1 +cheap 1 +shot 1 +thing 1 +hate 1 +idea 1 +mull 1 +accessibleto 1 +small 1 +subset 1 +tough 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..647ad520 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,24 @@ +perform 2 +krishna 1 +kunchithapadamkrishna 1 +kunchithapadamgreet 1 +welcom 1 +page 1 +interest 1 +read 1 +languag 1 +indian 1 +classic 1 +music 1 +miscellaneouspubl 1 +data 1 +distribut 1 +steer 1 +toolsresum 1 +gzip 1 +postscript 1 +contact 1 +search 1 +last 1 +modifi 1 +bykk 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..c1f76358 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,31 @@ +tuft 4 +madison 3 +kristin 2 +home 2 +research 2 +scienc 2 +wisc 2 +inform 2 +pagekristin 1 +assist 1 +comput 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +eduadvisor 1 +david 1 +dewitt 1 +miscellani 1 +serveruw 1 +dbm 1 +groupacm 1 +sigmod 1 +server 1 +pageeo 1 +project 1 +officelast 1 +modifi 1 +tuftekristin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..4e1ffffa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,47 @@ +krung 3 +comput 3 +page 2 +inform 2 +follow 2 +cours 2 +work 2 +scienc 2 +depart 2 +person 2 +madison 2 +wisconsin 2 +homepageupd 1 +novemb 1 +homepag 1 +underconstructioni 1 +keep 1 +short 1 +good 1 +serf 1 +year 1 +cometh 1 +relat 1 +topic 1 +research 1 +mathemat 1 +program 1 +project 1 +pursu 1 +compani 1 +favorit 1 +hobbi 1 +opinion 1 +life 1 +linkedth 1 +import 1 +link 1 +univers 1 +whole 1 +uniqu 1 +entiti 1 +electron 1 +librari 1 +system 1 +sinapiromsaran 1 +emailkrung 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..b29c945a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,137 @@ +kunen 17 +autom 7 +mathemat 7 +logic 6 +comput 5 +theori 5 +reason 5 +appear 5 +univers 4 +hart 4 +wisconsin 3 +deduct 3 +program 3 +applic 3 +semant 3 +measur 3 +axiom 3 +wisc 2 +interest 2 +topolog 2 +research 2 +work 2 +theorem 2 +singl 2 +group 2 +expon 2 +fundamenta 2 +mathematica 2 +technic 2 +report 2 +quasigroup 2 +algebra 2 +loop 2 +preprint 2 +review 2 +math 2 +home 1 +page 1 +kenneth 1 +professormath 1 +scienc 1 +dayton 1 +madison 1 +mail 1 +edutelephon 1 +stanford 1 +summari 1 +involv 1 +typic 1 +tool 1 +like 1 +resolutionto 1 +prove 1 +studi 1 +languag 1 +likeprolog 1 +specif 1 +topic 1 +consid 1 +prologus 1 +negat 1 +failur 1 +incompat 1 +betweenleast 1 +fix 1 +point 1 +prolog 1 +style 1 +backtrack 1 +axiomat 1 +besid 1 +right 1 +thissubject 1 +relat 1 +variou 1 +abstract 1 +area 1 +theoret 1 +mani 1 +basic 1 +question 1 +turn 1 +independ 1 +usualaxiom 1 +select 1 +recent 1 +public 1 +follow 1 +postscript 1 +file 1 +shortest 1 +ramsei 1 +boyer 1 +moor 1 +mill 1 +corson 1 +compact 1 +space 1 +local 1 +constant 1 +function 1 +answer 1 +liter 1 +construct 1 +moufang 1 +associ 1 +law 1 +structur 1 +conjugaci 1 +close 1 +complet 1 +result 1 +link 1 +resolut 1 +press 1 +weak 1 +extens 1 +rough 1 +draft 1 +book 1 +note 1 +moschovaki 1 +american 1 +monthli 1 +cours 1 +taught 1 +fall 1 +geometr 1 +infer 1 +foundat 1 +spring 1 +comp 1 +artifici 1 +intellig 1 +last 1 +chang 1 +octob 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..2a3b692e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,245 @@ +laru 20 +program 18 +jame 17 +parallel 12 +languag 11 +research 9 +memori 9 +compil 8 +support 7 +system 7 +comput 6 +softwar 6 +share 6 +david 6 +wood 6 +mark 5 +hill 5 +confer 5 +intern 5 +architectur 5 +tool 5 +august 5 +wisconsin 4 +project 4 +profil 4 +execut 4 +edit 4 +data 4 +richard 4 +control 4 +employ 4 +wisc 3 +interest 3 +machin 3 +wind 3 +tunnel 3 +grain 3 +ball 3 +brad 3 +guhan 3 +viswanathan 3 +sigplan 3 +implement 3 +novemb 3 +alvin 3 +lebeck 3 +steven 3 +reinhardt 3 +sixth 3 +forprogram 3 +oper 3 +asplo 3 +octob 3 +first 3 +develop 3 +juli 2 +educ 2 +cours 2 +recent 2 +univers 2 +california 2 +berkelei 2 +colleg 2 +design 2 +larg 2 +path 2 +thoma 2 +appear 2 +micro 2 +protocol 2 +satish 2 +chandra 2 +pldi 2 +eric 2 +schnarr 2 +effici 2 +applic 2 +distribut 2 +symposium 2 +user 2 +level 2 +babak 2 +falsafi 2 +ioanni 2 +schoina 2 +ann 2 +roger 2 +annot 2 +hardwar 2 +lorenz 2 +bell 2 +lab 2 +flow 2 +perform 2 +home 1 +page 1 +associ 1 +professor 1 +sciencedepart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usalaru 1 +eduphon 1 +secretari 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +offic 1 +upcom 1 +paper 1 +graduat 1 +summari 1 +harvard 1 +languagesand 1 +particular 1 +symbol 1 +trace 1 +librari 1 +structuresc 1 +java 1 +spim 1 +wartsrec 1 +paperseffici 1 +decemb 1 +programminglanguag 1 +gregori 1 +wilson 1 +us 1 +press 1 +teapot 1 +write 1 +coher 1 +instruct 1 +schedul 1 +andjam 1 +workshop 1 +wcsss 1 +februari 1 +irregular 1 +shubhendu 1 +mukherje 1 +shamik 1 +sharma 1 +annerog 1 +joel 1 +saltz 1 +fifth 1 +principl 1 +practiceof 1 +ppopp 1 +independ 1 +languagesdesign 1 +june 1 +tempest 1 +substrat 1 +portabl 1 +compcon 1 +spring 1 +march 1 +static 1 +branch 1 +frequenc 1 +analysi 1 +youfeng 1 +annual 1 +ieee 1 +microarchitectur 1 +specif 1 +markhil 1 +supercomput 1 +time 1 +spent 1 +messag 1 +pass 1 +fine 1 +access 1 +jameslaru 1 +cachier 1 +automat 1 +insert 1 +cico 1 +trishul 1 +chilimbi 1 +icpp 1 +bibliographi 1 +unpublish 1 +manuscript 1 +revis 1 +frequent 1 +cooper 1 +scalabl 1 +multiprocessor 1 +transact 1 +toc 1 +wart 1 +madhusudhan 1 +talluri 1 +new 1 +graduatesbrad 1 +vassar 1 +septemb 1 +techniqu 1 +languagesfirst 1 +oracl 1 +huelsbergen 1 +dynam 1 +depend 1 +tball 1 +summarymi 1 +focus 1 +problem 1 +part 1 +thewisconsin 1 +havehelp 1 +hybrid 1 +computerarchitectur 1 +facilit 1 +parallelmachin 1 +current 1 +student 1 +demonstr 1 +exploit 1 +power 1 +coherencepolici 1 +also 1 +evalu 1 +help 1 +programmersunderstand 1 +improv 1 +andi 1 +algorithm 1 +provid 1 +moredetail 1 +understand 1 +within 1 +routin 1 +hasidentifi 1 +possibl 1 +better 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..68106d8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,11 @@ +nick 1 +pagenick 1 +pageoffic 1 +phone 1 +email 1 +leavi 1 +wisc 1 +eduoffic 1 +hour 1 +tuesdai 1 +wednessdai 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..5adcc736 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,77 @@ +lederman 6 +steven 3 +huss 3 +research 3 +madison 3 +forum 3 +home 2 +page 2 +interest 2 +univ 2 +wisconsin 2 +also 2 +mpistandard 2 +book 2 +inform 2 +complet 2 +sourc 2 +file 2 +comput 2 +scienc 2 +wisc 2 +includ 1 +relat 1 +thewisconsin 1 +wind 1 +tunnel 1 +project 1 +area 1 +parallel 1 +linear 1 +algebra 1 +iscov 1 +prismproject 1 +heavili 1 +invol 1 +sever 1 +other 1 +recent 1 +publish 1 +origin 1 +order 1 +press 1 +isbn 1 +look 1 +refer 1 +editor 1 +current 1 +draft 1 +pleas 1 +keep 1 +mind 1 +work 1 +ongo 1 +andit 1 +document 1 +intend 1 +ongoingwork 1 +committe 1 +member 1 +compress 1 +postscript 1 +compressedtar 1 +individu 1 +avail 1 +would 1 +finger 1 +dept 1 +dayton 1 +phone 1 +messag 1 +desper 1 +mail 1 +http 1 +html 1 +offic 1 +statist 1 +build 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..c1fdcfca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,84 @@ +network 6 +design 3 +control 3 +virtual 3 +proceed 3 +lawrenc 2 +landweb 2 +comput 2 +univers 2 +wisconsin 2 +protocol 2 +high 2 +speed 2 +research 2 +project 2 +implement 2 +congest 2 +admiss 2 +infocom 2 +confer 2 +dynam 2 +time 2 +window 2 +faber 2 +mukherje 2 +loop 2 +home 1 +page 1 +professor 1 +scienc 1 +depart 1 +dayton 1 +madison 1 +telephon 1 +email 1 +wisc 1 +purdu 1 +interest 1 +electronicmail 1 +summari 1 +program 1 +focus 1 +participatingin 1 +gigabit 1 +darpa 1 +nation 1 +involvesth 1 +testb 1 +oper 1 +atgigabit 1 +second 1 +data 1 +rate 1 +work 1 +onissu 1 +visualizationof 1 +atmospher 1 +phenomena 1 +conferenc 1 +sampl 1 +recent 1 +public 1 +fast 1 +circuit 1 +establishmentmethod 1 +olsen 1 +theieee 1 +francisco 1 +april 1 +packet 1 +feedback 1 +witht 1 +sigcommconfer 1 +baltimor 1 +august 1 +gener 1 +clock 1 +combin 1 +close 1 +open 1 +ieee 1 +florenc 1 +coursesconnect 1 +tabl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..e25f0af5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,49 @@ +comput 7 +scienc 4 +lloyd 3 +univers 3 +depart 3 +linguist 3 +shannon 2 +madison 2 +comp 2 +utah 2 +languag 2 +home 1 +page 1 +work 1 +address 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +wisc 1 +respons 1 +us 1 +lectur 1 +section 1 +offic 1 +hour 1 +wednesdai 1 +thursdai 1 +appoint 1 +fall 1 +cours 1 +construct 1 +compil 1 +variou 1 +link 1 +women 1 +chemistri 1 +person 1 +engin 1 +career 1 +servic 1 +archiv 1 +natur 1 +process 1 +artifici 1 +intellig 1 +cognit 1 +xsoft 1 +lexdemo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..9f8db313 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,193 @@ +page 4 +option 4 +cool 3 +fill 3 +luka 2 +fall 2 +also 2 +mail 2 +list 2 +click 2 +entertain 2 +check 2 +thing 2 +lone 2 +meet 2 +women 2 +interact 2 +realli 2 +like 2 +know 2 +feel 2 +search 2 +checkbox 2 +includ 2 +christoph 1 +home 1 +pagechristoph 1 +lukasrelev 1 +inform 1 +offic 1 +phone 1 +email 1 +wisc 1 +edui 1 +appar 1 +coordin 1 +mspl 1 +workshipi 1 +defeat 1 +rival 1 +quest 1 +becom 1 +semest 1 +sunivers 1 +wisconsin 1 +program 1 +languag 1 +seminar 1 +czar 1 +cours 1 +go 1 +take 1 +festiv 1 +java 1 +taught 1 +advisor 1 +tuft 1 +univers 1 +site 1 +save 1 +tiger 1 +number 1 +free 1 +prisonerthi 1 +stock 1 +quoteserv 1 +maintain 1 +fabul 1 +wealth 1 +todd 1 +amus 1 +friend 1 +pagebet 1 +polit 1 +candid 1 +legal 1 +iowa 1 +electron 1 +market 1 +identitycaptain 1 +kirk 1 +sing 1 +gui 1 +troubl 1 +throughamaz 1 +technolog 1 +longer 1 +need 1 +concern 1 +withtri 1 +real 1 +virtual 1 +girlfriend 1 +traci 1 +teri 1 +wait 1 +wife 1 +incred 1 +jump 1 +catthi 1 +anywai 1 +well 1 +pleas 1 +send 1 +case 1 +someth 1 +current 1 +name 1 +address 1 +favorit 1 +appli 1 +killer 1 +buttmunchextrem 1 +dudemichael 1 +nesmith 1 +fanfoolmyth 1 +figurewick 1 +good 1 +basketbal 1 +playervalu 1 +studentment 1 +defectivea 1 +wkrp 1 +cincinatti 1 +tragic 1 +figuregeek 1 +tradesgonzo 1 +admirernetscap 1 +junki 1 +child 1 +pornpersonifi 1 +organ 1 +condom 1 +stretch 1 +much 1 +readi 1 +blowflam 1 +testicl 1 +outer 1 +space 1 +tast 1 +goodpoetri 1 +guruhogwildthi 1 +kick 1 +assman 1 +manbig 1 +dudeuh 1 +ohprofession 1 +muff 1 +diverregress 1 +higher 1 +lifeformherald 1 +alien 1 +invas 1 +forcechri 1 +html 1 +formsalienherpetophiletodd 1 +turnidg 1 +hatth 1 +mancreepi 1 +laugh 1 +headsmal 1 +planetdr 1 +companioneast 1 +bunnycyberweenietcl 1 +hellbeast 1 +simpli 1 +submit 1 +reload 1 +mayb 1 +figur 1 +automat 1 +keyword 1 +interest 1 +superhighwai 1 +drug 1 +cosmo 1 +irrit 1 +gross 1 +nake 1 +scatolog 1 +pervert 1 +offspr 1 +food 1 +etymolog 1 +phat 1 +gnarli 1 +bogu 1 +wierd 1 +cybermuffin 1 +pictur 1 +erotica 1 +chees 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..a58a49f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,84 @@ +comput 6 +univers 6 +scienc 5 +home 4 +page 4 +want 3 +wuhan 3 +dept 3 +ling 2 +zheng 2 +madison 2 +mail 2 +know 2 +click 2 +research 2 +paradyn 2 +take 2 +look 2 +alumni 2 +best 2 +graduat 2 +welcom 1 +depart 1 +sheboygan 1 +dayton 1 +offic 1 +phone 1 +lzheng 1 +wisc 1 +shameless 1 +self 1 +promot 1 +resum 1 +text 1 +version 1 +side 1 +interest 1 +assist 1 +group 1 +current 1 +hack 1 +onto 1 +hpux 1 +port 1 +boss 1 +barton 1 +miller 1 +also 1 +charg 1 +chinaand 1 +girlfriend 1 +pictur 1 +temporarili 1 +architectur 1 +educ 1 +prese 1 +winsconsin 1 +iowa 1 +officem 1 +marcelo 1 +goncalv 1 +ignor 1 +china 1 +place 1 +surf 1 +compani 1 +hereif 1 +school 1 +sthe 1 +infom 1 +could 1 +america 1 +schoolssend 1 +suggest 1 +homepag 1 +bother 1 +thank 1 +last 1 +updat 1 +march 1 +visitor 1 +number 1 +sinc 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..b1700aea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,44 @@ +know 3 +manuvir 2 +look 2 +like 2 +golf 2 +home 1 +pagemanuvir 1 +dasnow 1 +name 1 +andwhat 1 +hello 1 +feelfre 1 +around 1 +need 1 +inform 1 +somethingsend 1 +email 1 +passion 1 +anact 1 +photo 1 +later 1 +manuvirwhat 1 +gener 1 +start 1 +advisor 1 +better 1 +thisto 1 +keep 1 +monei 1 +come 1 +turn 1 +theorigin 1 +america 1 +team 1 +cours 1 +leagu 1 +plai 1 +dai 1 +sundai 1 +round 1 +final 1 +consin 1 +said 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..378b8364 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,394 @@ +comput 40 +hill 36 +mark 35 +architectur 17 +memori 16 +system 16 +wisconsin 15 +david 13 +wood 13 +parallel 12 +page 12 +research 10 +jame 10 +perform 9 +share 9 +laru 9 +intern 9 +sarita 8 +adv 8 +scienc 7 +hardwar 7 +june 7 +symposium 7 +cach 7 +wind 6 +tunnel 6 +isca 6 +home 5 +univers 5 +address 5 +project 5 +juli 5 +implement 5 +support 5 +confer 5 +talluri 5 +ieee 5 +transact 5 +august 5 +current 4 +inform 4 +recent 4 +engin 4 +program 4 +tabl 4 +oper 4 +shubhendu 4 +mukherje 4 +distribut 4 +madhusudhan 4 +alvin 4 +lebeck 4 +steven 4 +reinhardt 4 +kessler 4 +model 4 +wisc 3 +offic 3 +like 3 +data 3 +advanc 3 +languag 3 +machin 3 +design 3 +level 3 +space 3 +interfac 3 +softwar 3 +supercomput 3 +subblock 3 +babak 3 +falsafi 3 +simul 3 +novemb 3 +sigmetr 3 +consist 3 +first 3 +employ 3 +email 3 +markhil 2 +associ 2 +professor 2 +content 2 +hour 2 +interest 2 +sampler 2 +us 2 +group 2 +tool 2 +wart 2 +patterson 2 +spec 2 +benchmark 2 +suit 2 +california 2 +berkelei 2 +high 2 +larg 2 +analysi 2 +requir 2 +work 2 +expect 2 +workstat 2 +process 2 +compil 2 +tempest 2 +madhu 2 +translat 2 +cluster 2 +base 2 +tlb 2 +experiment 2 +coher 2 +applic 2 +ann 2 +roger 2 +protocol 2 +superpag 2 +comparison 2 +trace 2 +sampl 2 +multi 2 +megabyt 2 +cooper 2 +multiprocessor 2 +lewi 2 +weak 2 +rice 2 +richard 2 +crai 2 +pagemark 1 +andelectr 1 +engineeringat 1 +wisconsint 1 +teach 1 +catalog 1 +educ 1 +andsummari 1 +paper 1 +graduateslink 1 +world 1 +wide 1 +stuff 1 +oralpresent 1 +advic 1 +includ 1 +show 1 +give 1 +talk 1 +onlin 1 +forcach 1 +proof 1 +sound 1 +depart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usamarkhil 1 +eduphon 1 +secretari 1 +fingerson 1 +thea 1 +sklenar 1 +department 1 +fall 1 +mondai 1 +wednesdai 1 +appoint 1 +educurr 1 +teachingfal 1 +ifal 1 +topic 1 +java 1 +icatalog 1 +cours 1 +teachc 1 +organ 1 +programmingc 1 +introduct 1 +architecturec 1 +iieduc 1 +michigan 1 +evaluationresearch 1 +summarymi 1 +target 1 +multiprocessorsand 1 +uniprocessor 1 +import 1 +determin 1 +sustain 1 +mywork 1 +emphas 1 +quantit 1 +often 1 +evaluationtechniqu 1 +much 1 +part 1 +windtunnel 1 +projectwith 1 +prof 1 +manystud 1 +futur 1 +massiv 1 +computerswil 1 +built 1 +node 1 +levelparallel 1 +inwhich 1 +uniformli 1 +refer 1 +seek 1 +todevelop 1 +consensu 1 +middl 1 +languagesand 1 +recentlypropos 1 +enabl 1 +programm 1 +librari 1 +messag 1 +pass 1 +transpar 1 +hybrid 1 +combin 1 +aredevelop 1 +think 1 +aclust 1 +hypothet 1 +platform 1 +name 1 +toolsto 1 +cull 1 +manner 1 +similarto 1 +aeronaut 1 +convent 1 +designairplan 1 +talluritarget 1 +improv 1 +lookasid 1 +buffer 1 +align 1 +option 1 +chang 1 +complet 1 +superpagesand 1 +partial 1 +asplosandsosppap 1 +papersth 1 +annot 1 +bibliographi 1 +unpublish 1 +manuscript 1 +revis 1 +frequent 1 +bidirect 1 +technolog 1 +transfer 1 +sabbat 1 +industri 1 +network 1 +fine 1 +grain 1 +commun 1 +anddavid 1 +optimist 1 +execut 1 +sashikanth 1 +chandrasekaran 1 +workshop 1 +pad 1 +yousef 1 +khalidi 1 +princip 1 +sosp 1 +decemb 1 +presidenti 1 +young 1 +investig 1 +award 1 +final 1 +report 1 +effici 1 +irregular 1 +shamik 1 +sharma 1 +joel 1 +saltz 1 +ppopp 1 +cost 1 +effect 1 +februari 1 +solv 1 +microstructur 1 +electrostat 1 +propos 1 +frank 1 +traenkl 1 +sangta 1 +chemic 1 +specif 1 +user 1 +ioanni 1 +schoina 1 +surpass 1 +less 1 +forprogram 1 +asplo 1 +octob 1 +evalu 1 +directori 1 +medium 1 +scale 1 +memorymultiprocessor 1 +techniqu 1 +scalabl 1 +toc 1 +new 1 +jeffrei 1 +dionisio 1 +pnevmatikato 1 +alan 1 +smith 1 +micro 1 +unifi 1 +formal 1 +four 1 +tpd 1 +implic 1 +toler 1 +fault 1 +andrea 1 +farid 1 +pour 1 +march 1 +mechan 1 +satish 1 +chandra 1 +subbarao 1 +palacharla 1 +virtual 1 +prototyp 1 +placement 1 +algorithm 1 +real 1 +index 1 +differ 1 +kourosh 1 +gharachorloo 1 +anoop 1 +gupta 1 +john 1 +hennessi 1 +journal 1 +tradeoff 1 +size 1 +shing 1 +kong 1 +detect 1 +race 1 +barton 1 +miller 1 +robert 1 +netzer 1 +scheme 1 +vikram 1 +mari 1 +vernon 1 +estim 1 +miss 1 +ratio 1 +kessleracm 1 +stack 1 +highli 1 +extend 1 +abstract 1 +sequenti 1 +order 1 +definit 1 +graduatesmadhusudhan 1 +hierarchi 1 +microsystem 1 +assist 1 +secondari 1 +click 1 +last 1 +updatedw 1 +keyword 1 +help 1 +search 1 +rank 1 +higher 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..0ace692e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,18 @@ +marko 3 +page 2 +wisc 2 +home 1 +zaharioudaki 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +mail 1 +note 1 +construct 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..17f889a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,13 @@ +home 4 +page 4 +michael 2 +birk 2 +section 2 +project 1 +list 1 +program 1 +languag 1 +link 1 +alltraxx 1 +mbirk 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..6ce999eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,55 @@ +mcauliff 6 +mark 4 +solomon 3 +madison 2 +carei 2 +sigmod 2 +marvin 2 +proceed 2 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +wisc 1 +research 1 +interest 1 +design 1 +implement 1 +object 1 +orient 1 +databas 1 +system 1 +public 1 +dewitt 1 +franklin 1 +hall 1 +naughton 1 +schuh 1 +tsatalo 1 +white 1 +zwill 1 +shoringup 1 +persist 1 +applic 1 +proc 1 +atrac 1 +base 1 +simul 1 +pointer 1 +swizzl 1 +techniqu 1 +ieee 1 +data 1 +engin 1 +march 1 +michael 1 +towardseffect 1 +effici 1 +free 1 +space 1 +manag 1 +appear 1 +confer 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..09f0de4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,116 @@ +marc 6 +shapiro 5 +wisc 4 +page 3 +tautolog 3 +languag 3 +fast 2 +pointer 2 +think 2 +program 2 +html 2 +madison 2 +hous 2 +believ 1 +current 1 +obsess 1 +fond 1 +disappear 1 +fear 1 +repuls 1 +ponder 1 +analys 1 +watch 1 +lot 1 +jacki 1 +chan 1 +movi 1 +design 1 +read 1 +much 1 +try 1 +teach 1 +elementari 1 +school 1 +student 1 +term 1 +recurs 1 +hope 1 +interrupt 1 +hoar 1 +wrote 1 +introduct 1 +high 1 +level 1 +step 1 +backward 1 +never 1 +recov 1 +home 1 +schedul 1 +todd 1 +automat 1 +accid 1 +gener 1 +elain 1 +dimasi 1 +twisti 1 +littl 1 +amanda 1 +peet 1 +retreather 1 +hyper 1 +mode 1 +emac 1 +thepul 1 +menu 1 +doesn 1 +cool 1 +tag 1 +submiss 1 +softwarei 1 +cobbl 1 +togeth 1 +pldi 1 +abl 1 +work 1 +nowinclud 1 +previous 1 +mostli 1 +miss 1 +file 1 +submit 1 +popl 1 +paper 1 +accur 1 +flow 1 +insensit 1 +point 1 +analysi 1 +shapiroand 1 +susan 1 +horwitz 1 +appear 1 +symposium 1 +principl 1 +variou 1 +address 1 +dept 1 +dayton 1 +mail 1 +talk 1 +finger 1 +marion 1 +list 1 +peopl 1 +know 1 +realli 1 +meet 1 +jonathan 1 +goldstein 1 +paul 1 +ferguson 1 +lawrenc 1 +brown 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..789294fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,22 @@ +offic 3 +home 2 +mellen 2 +wisc 2 +pagerob 1 +minimalist 1 +page 1 +last 1 +modifi 1 +august 1 +mellencamp 1 +taship 1 +introduct 1 +oper 1 +system 1 +email 1 +comput 1 +scienc 1 +build 1 +phone 1 +hour 1 +appoint 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..bd8b589a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,82 @@ +melski 3 +page 3 +also 3 +david 2 +person 2 +comput 2 +scienc 2 +madison 2 +work 2 +russian 2 +somedai 2 +info 1 +pagedavid 1 +melskicurr 1 +depart 1 +mill 1 +statisticsmadison 1 +dayton 1 +permen 1 +west 1 +ivesmarshfield 1 +michel 1 +awesom 1 +current 1 +construct 1 +sister 1 +kasei 1 +great 1 +home 1 +brother 1 +eric 1 +semest 1 +teach 1 +coupl 1 +section 1 +rep 1 +program 1 +languag 1 +myexact 1 +schedul 1 +still 1 +need 1 +determin 1 +undergrad 1 +major 1 +studiesher 1 +univers 1 +wisconsin 1 +even 1 +spent 1 +fall 1 +semesterof 1 +russia 1 +chanc 1 +often 1 +miss 1 +make 1 +back 1 +interest 1 +includ 1 +chess 1 +soccer 1 +recent 1 +beenbik 1 +distract 1 +numerousbook 1 +hasti 1 +rewrit 1 +want 1 +link 1 +tomapquest 1 +plan 1 +steal 1 +alot 1 +map 1 +second 1 +give 1 +direct 1 +marshfield 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..2c2e8329 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,203 @@ +milo 6 +martin 6 +comput 6 +interest 6 +scienc 5 +player 5 +java 4 +wisc 3 +gustavu 3 +adolphu 3 +colleg 3 +compil 3 +architectur 3 +program 3 +year 3 +system 3 +mani 3 +fink 3 +footbal 3 +plai 3 +game 3 +atlanti 3 +ultim 3 +home 2 +student 2 +offic 2 +charl 2 +advanc 2 +mark 2 +hill 2 +technolog 2 +oper 2 +publicationsresearch 2 +perform 2 +advis 2 +humm 2 +micklich 2 +evalu 2 +illicitsubst 2 +detect 2 +fast 2 +neutron 2 +hailperin 2 +next 2 +softwar 2 +direct 2 +quot 2 +page 2 +live 2 +minnesota 2 +land 2 +explor 2 +rule 2 +everyon 2 +pagemilo 1 +graduat 1 +teach 1 +assistantcomput 1 +departmentunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaemail 1 +eduoffic 1 +phone 1 +hour 1 +tuesdai 1 +thursdai 1 +byappointmentba 1 +class 1 +construct 1 +fischer 1 +sit 1 +jame 1 +larusteach 1 +algebra 1 +languag 1 +section 1 +research 1 +interestsi 1 +first 1 +programminglanguag 1 +specif 1 +optim 1 +beinfluenc 1 +hardwar 1 +mobil 1 +addit 1 +challeng 1 +present 1 +design 1 +thing 1 +even 1 +know 1 +summer 1 +argonn 1 +nation 1 +laboratori 1 +develop 1 +divis 1 +view 1 +reconstruct 1 +paramet 1 +us 1 +transmiss 1 +spectroscopi 1 +ieee 1 +nuclear 1 +symposium 1 +medic 1 +imag 1 +confer 1 +yule 1 +sagalovski 1 +techniqu 1 +nucl 1 +inst 1 +meth 1 +school 1 +languageflex 1 +determinist 1 +dynam 1 +parallel 1 +senior 1 +honor 1 +thesi 1 +mathemat 1 +depart 1 +postscript 1 +resourc 1 +compani 1 +found 1 +anintern 1 +scientif 1 +educ 1 +organ 1 +dedic 1 +toadvanc 1 +engin 1 +applic 1 +informationtechnolog 1 +serv 1 +profession 1 +public 1 +fosteringth 1 +open 1 +interchang 1 +inform 1 +promot 1 +highestprofession 1 +ethic 1 +standard 1 +person 1 +bignfl 1 +sinc 1 +myfavorit 1 +team 1 +vike 1 +eventhough 1 +chees 1 +head 1 +colon 1 +conquer 1 +multi 1 +mail 1 +space 1 +combat 1 +wrote 1 +babylon 1 +best 1 +show 1 +imho 1 +email 1 +mythic 1 +world 1 +build 1 +armi 1 +engaug 1 +trade 1 +fight 1 +wonder 1 +monster 1 +train 1 +wizard 1 +discov 1 +underworld 1 +right 1 +current 1 +list 1 +frisbe 1 +associ 1 +combin 1 +element 1 +ofsocc 1 +basketbal 1 +pace 1 +afrisbe 1 +quarterback 1 +receiv 1 +ultimatein 1 +simpl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..c25317ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,108 @@ +comput 7 +mino 6 +home 6 +page 6 +madison 4 +databas 4 +garofalaki 4 +research 3 +resourc 3 +parallel 3 +queri 3 +scienc 3 +schedul 3 +yanni 3 +wisconsin 2 +interest 2 +multimedia 2 +system 2 +optim 2 +univers 2 +dept 2 +decemb 2 +patra 2 +june 2 +public 2 +ioannidi 2 +sigmod 2 +paper 2 +postscript 2 +technic 2 +report 2 +garofalakismino 1 +wisc 1 +eduphd 1 +candid 1 +assist 1 +depart 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usaoffic 1 +stat 1 +phone 1 +workresearch 1 +effect 1 +manag 1 +complex 1 +process 1 +algorithm 1 +theoryeduc 1 +engin 1 +informat 1 +refere 1 +multi 1 +dimension 1 +proceed 1 +confer 1 +montreal 1 +canada 1 +abstract 1 +issu 1 +survei 1 +enhanc 1 +view 1 +continu 1 +media 1 +banu 1 +ozden 1 +silberschatz 1 +submit 1 +octob 1 +model 1 +check 1 +sequenti 1 +probabilist 1 +real 1 +time 1 +technolog 1 +institut 1 +februari 1 +advisor 1 +ioannidismor 1 +feel 1 +free 1 +peek 1 +resum 1 +pointer 1 +stuff 1 +dbm 1 +reasearch 1 +hellen 1 +societi 1 +vldb 1 +almaden 1 +center 1 +watson 1 +centerdr 1 +michael 1 +bibliograpi 1 +server 1 +logic 1 +program 1 +perpetu 1 +construct 1 +last 1 +updat 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..3f638390 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,21 @@ +madison 3 +marcelo 2 +gonalv 2 +comput 2 +depart 2 +phone 2 +mjrg 1 +wisc 1 +associ 1 +research 1 +paradyn 1 +project 1 +addresswork 1 +home 1 +scienc 1 +sheboygan 1 +west 1 +dayton 1 +street 1 +sciencesunivers 1 +wisconsin 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..08fbe1a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,35 @@ +brian 2 +morgan 2 +wisconsin 2 +conferenc 2 +home 1 +page 1 +morgangradu 1 +studentcomput 1 +scienc 1 +depart 1 +univers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +mail 1 +wisc 1 +telephon 1 +advisor 1 +chuck 1 +dyerresearch 1 +interestsvirtu 1 +system 1 +imag 1 +compress 1 +video 1 +high 1 +bandwidth 1 +network 1 +relat 1 +link 1 +interest 1 +comput 1 +vision 1 +group 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..2a53abdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,121 @@ +comput 5 +wisconsin 3 +work 3 +processor 3 +support 3 +univers 3 +andrea 2 +page 2 +multiscalar 2 +architectur 2 +data 2 +specul 2 +download 2 +postscript 2 +interest 2 +scienc 2 +crete 2 +greec 2 +greek 2 +mani 2 +moshovo 1 +home 1 +moshovosresearch 1 +assist 1 +depart 1 +sciencesunivers 1 +madisonadvisor 1 +guri 1 +sohigroup 1 +kestrel 1 +address 1 +leav 1 +notese 1 +aroundw 1 +peek 1 +futur 1 +clickheremi 1 +brother 1 +write 1 +poetri 1 +click 1 +herefor 1 +sampl 1 +current 1 +depend 1 +technic 1 +report 1 +compress 1 +uncompress 1 +talk 1 +slide 1 +load 1 +balanc 1 +gener 1 +instruct 1 +level 1 +parallel 1 +compil 1 +explot 1 +vlsi 1 +fall 1 +spring 1 +graduat 1 +student 1 +thecour 1 +instituteof 1 +york 1 +earn 1 +degre 1 +sinc 1 +transfer 1 +howev 1 +theopportun 1 +excel 1 +peopl 1 +meet 1 +wife 1 +implement 1 +numer 1 +algorithm 1 +access 1 +decoupl 1 +architecturethat 1 +softwar 1 +pipelin 1 +advisor 1 +kateveni 1 +short 1 +descript 1 +found 1 +viha 1 +like 1 +editor 1 +edit 1 +link 1 +hellen 1 +resouc 1 +network 1 +sure 1 +visit 1 +obtain 1 +instal 1 +font 1 +local 1 +copi 1 +resid 1 +atwww 1 +hyper 1 +devil 1 +dictionari 1 +bookmark 1 +mess 1 +nation 1 +fraud 1 +inform 1 +centerusenet 1 +chang 1 +want 1 +send 1 +afax 1 +free 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..f49bd1ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,38 @@ +page 5 +updat 3 +toni 2 +chang 2 +contact 2 +home 1 +pagewhat 1 +newoctob 1 +back 1 +inmadison 1 +sever 1 +background 1 +black 1 +better 1 +contrast 1 +inform 1 +minor 1 +variou 1 +list 1 +older 1 +prefer 1 +keep 1 +main 1 +brief 1 +herear 1 +link 1 +second 1 +level 1 +navig 1 +index 1 +friend 1 +favorit 1 +interest 1 +informationlast 1 +modifi 1 +octob 1 +wisc 1 +educopyright 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..2a0e32fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,151 @@ +coke 5 +wisconsin 4 +martin 3 +ream 3 +page 3 +poobah 3 +comput 3 +scienc 3 +interest 3 +databas 3 +design 3 +student 2 +madison 2 +telephon 2 +dept 2 +mream 2 +wisc 2 +compil 2 +exam 2 +year 2 +gone 2 +want 2 +even 2 +thing 2 +might 2 +home 1 +graduat 1 +teach 1 +assist 1 +also 1 +finger 1 +machin 1 +departmentunivers 1 +dayton 1 +offic 1 +email 1 +edufal 1 +scheduleresearch 1 +particular 1 +digit 1 +terrain 1 +model 1 +tin 1 +program 1 +languag 1 +logic 1 +logicprogram 1 +qualifi 1 +spring 1 +previou 1 +softwar 1 +develop 1 +product 1 +orientedenviron 1 +exploit 1 +educ 1 +interestsin 1 +resum 1 +postscriptand 1 +html 1 +distribut 1 +affili 1 +mathemat 1 +wesleyan 1 +univers 1 +faint 1 +heart 1 +section 1 +alink 1 +senior 1 +honorsthesi 1 +gener 1 +unif 1 +poobahlook 1 +work 1 +mighti 1 +afraid 1 +dear 1 +tomi 1 +head 1 +usual 1 +realli 1 +talk 1 +tosomeon 1 +better 1 +adjust 1 +crucial 1 +role 1 +life 1 +youshould 1 +probabl 1 +elton 1 +doesn 1 +mention 1 +imaginethat 1 +besid 1 +aforement 1 +poobahship 1 +mental 1 +ill 1 +afew 1 +know 1 +third 1 +yeargradu 1 +depart 1 +concentr 1 +indatabas 1 +current 1 +studi 1 +qual 1 +sometim 1 +inearli 1 +februari 1 +exercis 1 +relax 1 +plai 1 +squash 1 +reason 1 +well 1 +round 1 +ultim 1 +frisbe 1 +summer 1 +basketbal 1 +poorli 1 +andinfrequ 1 +notic 1 +rapidlyrid 1 +mountain 1 +bike 1 +around 1 +campu 1 +chilliest 1 +weather 1 +alwai 1 +helmet 1 +wish 1 +learn 1 +feel 1 +free 1 +examin 1 +mynot 1 +often 1 +updat 1 +hierarchi 1 +stuff 1 +ilik 1 +enjoi 1 +line 1 +librarylast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..0371b963 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,34 @@ +wisconsin 3 +multiscalar 3 +project 3 +comput 3 +architectur 2 +inform 2 +sohi 2 +home 1 +pagewisconsin 1 +technic 1 +paper 1 +talk 1 +given 1 +peopl 1 +contributor 1 +fund 1 +sourc 1 +relat 1 +avail 1 +softwar 1 +group 1 +scienc 1 +departmentat 1 +univers 1 +world 1 +wide 1 +interest 1 +local 1 +user 1 +last 1 +updat 1 +februari 1 +guri 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..f29c9d44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,134 @@ +depart 4 +comput 4 +scienc 4 +steel 3 +maryland 3 +oper 3 +system 3 +univers 3 +come 3 +mail 3 +wisc 2 +class 2 +wisconsin 2 +around 2 +introduct 2 +friend 2 +favorit 2 +usenet 2 +frequent 2 +ask 2 +question 2 +list 2 +mike 1 +homepagemik 1 +homepagemsteel 1 +eduoffic 1 +comp 1 +stat 1 +build 1 +sit 1 +univ 1 +struggl 1 +undergradu 1 +sometimearound 1 +april 1 +note 1 +time 1 +stamp 1 +lower 1 +right 1 +corner 1 +sai 1 +folk 1 +graduat 1 +student 1 +madison 1 +school 1 +motto 1 +freezein 1 +land 1 +chees 1 +research 1 +studi 1 +interest 1 +center 1 +artificialintellig 1 +vision 1 +hope 1 +narrow 1 +year 1 +semest 1 +graduateinstructor 1 +section 1 +algebra 1 +languag 1 +program 1 +receiv 1 +bachelor 1 +degre 1 +theunivers 1 +scomput 1 +publicationsgrindston 1 +test 1 +suit 1 +parallel 1 +perform 1 +tool 1 +jefferyk 1 +hollingsworth 1 +michael 1 +technic 1 +reportc 1 +gzip 1 +postscriptfil 1 +semesterc 1 +mari 1 +vernonc 1 +artifici 1 +intellig 1 +chuck 1 +dyermi 1 +pagesinform 1 +gettingin 1 +touch 1 +back 1 +marylandwhom 1 +forgot 1 +address 1 +linksmi 1 +sport 1 +teamssom 1 +dave 1 +barri 1 +listth 1 +billi 1 +joel 1 +listi 1 +also 1 +administr 1 +thefruit 1 +still 1 +host 1 +ofmaryland 1 +start 1 +insidejok 1 +offic 1 +hand 1 +sometim 1 +andnow 1 +member 1 +world 1 +wide 1 +predat 1 +kill 1 +someinfrar 1 +photo 1 +know 1 +looklik 1 +infrar 1 +pictur 1 +memik 1 +steelemsteel 1 +eduunivers 1 +madisoncomput 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..f0d3792b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,16 @@ +home 2 +visit 2 +univers 2 +maria 1 +pagemaria 1 +pagehow 1 +maryland 1 +colleg 1 +park 1 +mayb 1 +wisconsin 1 +madison 1 +section 1 +might 1 +want 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..7bd892d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,79 @@ +venezuela 5 +barquisimeto 3 +like 3 +naim 2 +work 2 +research 2 +guitar 2 +oscar 1 +home 1 +page 1 +bienvenido 1 +staff 1 +member 1 +paradyn 1 +project 1 +finish 1 +myph 1 +univers 1 +southampton 1 +england 1 +undergrad 1 +universidad 1 +simon 1 +bolivar 1 +caraca 1 +born 1 +beauti 1 +citi 1 +barquisimetoi 1 +locat 1 +central 1 +western 1 +part 1 +popul 1 +ofabout 1 +million 1 +peopl 1 +also 1 +known 1 +music 1 +capit 1 +main 1 +area 1 +perform 1 +analysi 1 +visual 1 +parallel 1 +program 1 +howev 1 +apart 1 +playclass 1 +fact 1 +studi 1 +year 1 +excellentmaestro 1 +rodrigo 1 +riera 1 +antonio 1 +lauro 1 +spend 1 +time 1 +plai 1 +read 1 +good 1 +book 1 +sherlock 1 +holm 1 +stori 1 +cook 1 +watch 1 +basebal 1 +beati 1 +pictur 1 +pleaseclick 1 +finger 1 +wisc 1 +check 1 +around 1 +mundo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..550554a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,39 @@ +home 3 +anastassia 2 +ailamaki 2 +madison 2 +realli 2 +welcom 1 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +street 1 +phone 1 +want 1 +pictur 1 +import 1 +notic 1 +find 1 +time 1 +make 1 +decent 1 +page 1 +nice 1 +link 1 +georg 1 +rochest 1 +alex 1 +guid 1 +greek 1 +islandsar 1 +worth 1 +visit 1 +send 1 +mail 1 +natassa 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..6e253e81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,99 @@ +databas 5 +object 4 +system 4 +parallel 3 +algorithm 3 +relat 3 +dimension 2 +data 2 +analysi 2 +current 2 +techniqu 2 +perform 2 +includ 2 +storag 2 +benchmark 2 +dbm 2 +public 2 +multidimension 2 +prasad 2 +deshpand 2 +intern 2 +confer 2 +larg 2 +mumbai 2 +bombai 2 +jeffrei 1 +naughtonjeffrei 1 +naughtonnaughton 1 +wisc 1 +eduresearch 1 +interestsolap 1 +multi 1 +relationaldbm 1 +overal 1 +goal 1 +research 1 +develop 1 +ofdatabas 1 +surpass 1 +inperform 1 +eas 1 +three 1 +main 1 +area 1 +ofinterest 1 +improv 1 +ofmulti 1 +arrai 1 +base 1 +andprocess 1 +indic 1 +computingth 1 +cube 1 +valuedattribut 1 +workload 1 +spatial 1 +inform 1 +recent 1 +comput 1 +aggreg 1 +withsameet 1 +agarw 1 +rakesh 1 +agraw 1 +ashish 1 +gupta 1 +raghu 1 +ramakrishnan 1 +sunita 1 +sarawagi 1 +proceed 1 +thend 1 +estim 1 +aggregatesin 1 +presenc 1 +hierarchi 1 +amit 1 +shukla 1 +karthikeyan 1 +ramasami 1 +bucki 1 +michael 1 +carei 1 +david 1 +dewitt 1 +johann 1 +gerhk 1 +dhaval 1 +shah 1 +moham 1 +asgarian 1 +prepar 1 +toward 1 +molap 1 +withyihong 1 +zhao 1 +kristin 1 +tuft 1 +submit 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..579cde07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,39 @@ +navin 2 +kabranavin 1 +kabragradu 1 +student 1 +depart 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madisonadvisor 1 +david 1 +dewittresearch 1 +area 1 +databas 1 +research 1 +interest 1 +customiz 1 +queri 1 +optim 1 +paradis 1 +project 1 +plan 1 +address 1 +noth 1 +better 1 +explor 1 +bookmark 1 +could 1 +look 1 +indian 1 +stuff 1 +includ 1 +among 1 +thing 1 +archiv 1 +hindi 1 +song 1 +wisc 1 +public 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..da841f3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,28 @@ +newhal 2 +research 2 +perform 2 +newhalltia 1 +wisc 1 +graduat 1 +student 1 +dayton 1 +madison 1 +telephon 1 +interest 1 +parallel 1 +distribut 1 +system 1 +tool 1 +scalabl 1 +analysi 1 +predict 1 +java 1 +group 1 +paradynadvisor 1 +bart 1 +millermummi 1 +pictur 1 +guanajuato 1 +last 1 +chang 1 +august 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..621b9d8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,16 @@ +madison 2 +nanci 1 +hallcomput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +project 1 +shore 1 +scalabl 1 +heterogen 1 +object 1 +repositori 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..0f2313e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,207 @@ +mathemat 28 +program 24 +report 22 +mangasarian 20 +technic 20 +comput 10 +optim 9 +revis 8 +problem 7 +august 7 +scienc 6 +complementar 6 +juli 6 +machin 5 +learn 5 +linear 5 +street 5 +minim 5 +novemb 5 +olvi 4 +decemb 4 +page 3 +univers 3 +wisconsin 3 +parallel 3 +research 3 +converg 3 +smooth 3 +nonlinear 3 +cancer 3 +system 3 +bradlei 3 +method 3 +octob 3 +submit 3 +neural 3 +inform 3 +journal 3 +chunhui 3 +chen 3 +view 3 +group 3 +home 2 +wisc 2 +solv 2 +aspect 2 +bound 2 +inequ 2 +gener 2 +breast 2 +current 2 +public 2 +function 2 +constraint 2 +convex 2 +concav 2 +process 2 +wolberg 2 +januari 2 +editor 2 +proceed 2 +nick 2 +april 2 +appear 2 +applic 2 +june 2 +misclassif 2 +februari 2 +advanc 2 +septemb 2 +global 2 +solodov 2 +backpropag 2 +perturb 2 +paper 2 +john 1 +neumann 1 +professor 1 +member 1 +center 1 +depart 1 +dayton 1 +madison 1 +telephon 1 +email 1 +harvard 1 +interest 1 +summari 1 +theori 1 +rich 1 +effectivecomputation 1 +mani 1 +real 1 +life 1 +interestsin 1 +topic 1 +rang 1 +broad 1 +spectrum 1 +encompassestheoret 1 +error 1 +programsand 1 +variat 1 +proof 1 +parallelgradi 1 +variabl 1 +distribut 1 +algorithm 1 +techniqu 1 +constrain 1 +problemsa 1 +differenti 1 +equat 1 +well 1 +applicationsto 1 +specif 1 +context 1 +animport 1 +programmingtechniqu 1 +diagnos 1 +result 1 +ahighli 1 +accur 1 +computer 1 +diagnost 1 +useat 1 +hospit 1 +student 1 +paul 1 +recent 1 +solodova 1 +linearli 1 +descent 1 +strongli 1 +monotonecomplementar 1 +jong 1 +pangexact 1 +penalti 1 +programswith 1 +mangasarianmathemat 1 +data 1 +miningmathemat 1 +mangasarianerror 1 +nondifferenti 1 +strong 1 +slater 1 +qualif 1 +cluster 1 +individu 1 +collect 1 +prognost 1 +predict 1 +featur 1 +select 1 +polyhedr 1 +appli 1 +festschrift 1 +klau 1 +ritter 1 +fischer 1 +riedmuel 1 +schaeffler 1 +physica 1 +verlag 1 +germani 1 +pose 1 +siam 1 +internationalsymposium 1 +baltimor 1 +improv 1 +toler 1 +train 1 +workshop 1 +eric 1 +plenum 1 +press 1 +hybrid 1 +siag 1 +new 1 +class 1 +mix 1 +diagnosi 1 +prognosi 1 +oper 1 +separ 1 +bilinear 1 +determinist 1 +nonmonoton 1 +cowan 1 +tesauro 1 +alspector 1 +morgan 1 +kaufmann 1 +publish 1 +francisco 1 +california 1 +inequalitiesand 1 +serial 1 +net 1 +vianonmonoton 1 +minimn 1 +softwar 1 +chronolog 1 +bibliographi 1 +download 1 +period 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..d112fc33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,421 @@ +cancer 17 +diagnosi 14 +comput 13 +breast 13 +mangasarian 11 +wolberg 11 +us 10 +page 9 +program 9 +imag 9 +abstract 9 +prognosi 8 +research 8 +street 8 +predict 8 +learn 7 +case 7 +malign 7 +time 7 +machin 6 +result 6 +sampl 6 +probabl 6 +patient 6 +new 6 +linear 5 +featur 5 +approxim 5 +valu 5 +recurr 5 +surviv 5 +medic 5 +march 5 +base 4 +work 4 +relat 4 +diagnos 4 +data 4 +analysi 4 +slide 4 +nuclei 4 +nuclear 4 +benign 4 +diseas 4 +free 4 +prognost 4 +cytolog 4 +biopsi 4 +wisconsin 3 +madison 3 +scienc 3 +depart 3 +surgeri 3 +oncolog 3 +provid 3 +link 3 +needl 3 +aspir 3 +construct 3 +separ 3 +known 3 +xcyt 3 +extrem 3 +year 3 +curv 3 +april 3 +report 3 +univers 3 +januari 3 +variou 2 +appli 2 +collabor 2 +human 2 +american 2 +citat 2 +popular 2 +press 2 +local 2 +mass 2 +fine 2 +nine 2 +characterist 2 +setiono 2 +method 2 +pattern 2 +small 2 +system 2 +process 2 +differenti 2 +user 2 +isol 2 +individu 2 +boundari 2 +ofeach 2 +approach 2 +textur 2 +mean 2 +train 2 +plane 2 +allow 2 +shown 2 +segment 2 +recognit 2 +algorithm 2 +pleas 2 +wisc 2 +problem 2 +term 2 +surfac 2 +plot 2 +ofdiseas 2 +repres 2 +factor 2 +lymph 2 +node 2 +could 2 +paper 2 +postscript 2 +proceed 2 +siam 2 +analyt 2 +quantit 2 +histolog 2 +heisei 2 +deriv 2 +august 2 +mathemat 2 +technic 2 +intern 2 +confer 2 +aid 2 +submit 2 +medicin 2 +milwauke 2 +hunt 2 +journal 2 +interpret 2 +group 2 +biomed 2 +prognosismachin 1 +prognosisthi 1 +describ 1 +learningapproach 1 +ofbreast 1 +theunivers 1 +betweenprof 1 +olvi 1 +anddr 1 +william 1 +wolbergof 1 +copi 1 +thepress 1 +releas 1 +distribut 1 +societi 1 +writer 1 +seminar 1 +inmarch 1 +good 1 +overview 1 +tabl 1 +content 1 +bibliographi 1 +linksdiagnosisthi 1 +grew 1 +desir 1 +accur 1 +diagnosebreast 1 +sole 1 +heidentifi 1 +visual 1 +assess 1 +consideredrelev 1 +prof 1 +andtwo 1 +graduat 1 +student 1 +rudi 1 +kristin 1 +bennett 1 +aclassifi 1 +multisurfac 1 +thatsuccessfulli 1 +iswel 1 +began 1 +addit 1 +nick 1 +streetto 1 +team 1 +goal 1 +adigit 1 +section 1 +ofthi 1 +consolid 1 +softwar 1 +current 1 +clinicalpractic 1 +perform 1 +follow 1 +taken 1 +materi 1 +thenmount 1 +microscop 1 +stain 1 +highlight 1 +cellularnuclei 1 +portion 1 +cell 1 +arewel 1 +scan 1 +digit 1 +camera 1 +afram 1 +grabber 1 +board 1 +mous 1 +pointer 1 +draw 1 +nucleu 1 +vision 1 +snake 1 +converg 1 +exact 1 +interact 1 +take 1 +five 1 +minut 1 +showingxcyt 1 +thisfas 1 +measur 1 +size 1 +shape 1 +standarderror 1 +total 1 +classifi 1 +wasconstruct 1 +thisclassifi 1 +consist 1 +singl 1 +space 1 +threeof 1 +area 1 +smooth 1 +project 1 +onto 1 +thenorm 1 +densiti 1 +ofth 1 +point 1 +simpl 1 +bayesiancomput 1 +thesedens 1 +judg 1 +confid 1 +comparison 1 +hundr 1 +previou 1 +date 1 +correctli 1 +consecut 1 +newpati 1 +eight 1 +didxcyt 1 +return 1 +suspici 1 +estimatedprob 1 +subset 1 +sourc 1 +found 1 +goodtest 1 +object 1 +petsegment 1 +automat 1 +identifi 1 +inthes 1 +email 1 +togeth 1 +prognosisth 1 +second 1 +consid 1 +long 1 +behavior 1 +haveapproach 1 +function 1 +inputfeatur 1 +includ 1 +atim 1 +right 1 +censor 1 +solut 1 +util 1 +linearprogram 1 +fornew 1 +examin 1 +actual 1 +caseswith 1 +similar 1 +anindividu 1 +capabl 1 +incorpor 1 +intoxcyt 1 +exampl 1 +versu 1 +black 1 +ourorigin 1 +studi 1 +particular 1 +thereforeha 1 +averag 1 +freeafter 1 +equal 1 +procedur 1 +also 1 +compar 1 +power 1 +ofvari 1 +indic 1 +precis 1 +detail 1 +inform 1 +type 1 +xcytgiv 1 +better 1 +accuraci 1 +tradit 1 +tumors 1 +statu 1 +corrobor 1 +remov 1 +need 1 +often 1 +pain 1 +axillari 1 +chronolog 1 +bibliographylink 1 +format 1 +viewer 1 +download 1 +file 1 +shift 1 +click 1 +netscap 1 +print 1 +ascii 1 +text 1 +obtain 1 +notlink 1 +contact 1 +first 1 +author 1 +theori 1 +applic 1 +workshop 1 +larg 1 +scale 1 +numer 1 +optim 1 +philadelphia 1 +distinguish 1 +patholog 1 +grade 1 +oper 1 +juli 1 +avail 1 +induct 1 +twelfth 1 +priediti 1 +russel 1 +morgan 1 +kaufmann 1 +teagu 1 +call 1 +indetermin 1 +collect 1 +icml 1 +aaai 1 +prime 1 +without 1 +friend 1 +todai 1 +detect 1 +imit 1 +prospect 1 +man 1 +sentinel 1 +analyz 1 +detroit 1 +high 1 +tech 1 +marilynn 1 +marchion 1 +computer 1 +progress 1 +ruth 1 +sorel 1 +houston 1 +chronicl 1 +improv 1 +suggest 1 +replac 1 +surgic 1 +associ 1 +perspect 1 +column 1 +june 1 +cope 1 +septemb 1 +octob 1 +seek 1 +capit 1 +angel 1 +schooloth 1 +nation 1 +librari 1 +nevada 1 +center 1 +model 1 +oncolink 1 +washington 1 +institut 1 +paulb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..bef62568 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,160 @@ +point 16 +separ 14 +plane 13 +linear 10 +program 10 +method 8 +set 7 +page 6 +region 6 +pattern 5 +dimension 5 +parallel 5 +mathemat 4 +optim 4 +us 4 +surfac 4 +also 4 +multisurfac 4 +contain 4 +tree 4 +implement 4 +mangasarian 4 +linearli 3 +euclidean 3 +space 3 +follow 3 +misclassifi 3 +assign 3 +node 3 +neural 3 +network 3 +bennett 3 +describ 2 +comput 2 +scienc 2 +approach 2 +construct 2 +nonlinear 2 +paramet 2 +disjoint 2 +finit 2 +togeth 2 +euclideanspac 2 +sequenc 2 +minim 2 +li 2 +side 2 +mostli 2 +gener 2 +view 2 +decis 2 +split 2 +artifici 2 +packag 2 +matlab 2 +paul 2 +bradlei 2 +programmingpattern 1 +programmingthi 1 +work 1 +section 1 +univers 1 +wisconsin 1 +madison 1 +depart 1 +brief 1 +histori 1 +outlinemathemat 1 +particular 1 +long 1 +problem 1 +whenev 1 +appear 1 +quadrat 1 +polynomi 1 +formul 1 +howev 1 +could 1 +failon 1 +avoid 1 +difficulti 1 +choos 1 +close 1 +discard 1 +repeat 1 +process 1 +variant 1 +develop 1 +goal 1 +todetermin 1 +determin 1 +averag 1 +distanc 1 +similarli 1 +stop 1 +otherwis 1 +anoth 1 +error 1 +eachnod 1 +best 1 +reach 1 +found 1 +solv 1 +branch 1 +thesam 1 +procedur 1 +appli 1 +oneset 1 +astrain 1 +hidden 1 +layer 1 +shown 1 +learn 1 +concept 1 +well 1 +better 1 +traditionallearn 1 +cart 1 +advantag 1 +backpropag 1 +inthat 1 +train 1 +proce 1 +much 1 +faster 1 +mino 1 +numer 1 +nick 1 +street 1 +kristin 1 +descript 1 +file 1 +requir 1 +chronolog 1 +bibliographi 1 +oper 1 +research 1 +june 1 +ieee 1 +transact 1 +inform 1 +theori 1 +novemb 1 +proceed 1 +midwest 1 +intellig 1 +cognit 1 +societi 1 +confer 1 +robust 1 +discrimin 1 +insepar 1 +softwar 1 +orsa 1 +journal 1 +fall 1 +last 1 +modifi 1 +paulb 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..8c3362ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,84 @@ +paradyn 8 +project 7 +tool 5 +parallel 4 +arpa 4 +page 3 +perform 3 +meet 3 +csto 3 +super 3 +comput 3 +program 2 +presentationthi 2 +present 2 +made 2 +wisc 2 +home 1 +releas 1 +informationthi 1 +contain 1 +describ 1 +copi 1 +ofreleas 1 +goalsth 1 +explor 1 +newapproach 1 +build 1 +scalabl 1 +technic 1 +paper 1 +manualsstatu 1 +reporta 1 +recent 1 +statu 1 +report 1 +blizzard 1 +panel 1 +insan 1 +antonio 1 +symbol 1 +tabl 1 +inflorida 1 +effort 1 +develop 1 +common 1 +access 1 +routin 1 +tocompil 1 +gener 1 +inform 1 +us 1 +high 1 +level 1 +parallellanguag 1 +staff 1 +postera 1 +hypertext 1 +version 1 +poster 1 +relat 1 +elsewher 1 +spdt 1 +sigmetr 1 +symposium 1 +distribut 1 +toolsyou 1 +also 1 +restaur 1 +includ 1 +temporari 1 +placehold 1 +contact 1 +informationparadyn 1 +projectdepart 1 +sciencesunivers 1 +wisconsin 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +edufax 1 +last 1 +modifi 1 +bart 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..44c8132a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,32 @@ +parker 3 +home 3 +page 3 +steven 2 +wisconsin 2 +madison 2 +prism 2 +graduat 1 +student 1 +comput 1 +scienc 1 +depart 1 +univers 1 +west 1 +dayton 1 +street 1 +offic 1 +wisc 1 +depth 1 +area 1 +numer 1 +analysi 1 +employ 1 +projectfal 1 +schedul 1 +math 1 +relat 1 +link 1 +send 1 +mail 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..8ba2f8fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,90 @@ +wisconsin 7 +program 6 +comput 4 +univers 4 +mathemat 4 +street 4 +page 3 +paul 3 +scienc 3 +depart 3 +madison 3 +abstract 3 +bradlei 3 +paulb 2 +wisc 2 +fish 2 +us 2 +work 2 +nick 2 +postscript 2 +mangasarian 2 +technic 2 +report 2 +inform 2 +home 1 +bradleygradu 1 +student 1 +eduoffic 1 +csphone 1 +advisor 1 +mangasarianinterestsmathemat 1 +programmingmachin 1 +learningfli 1 +interest 1 +techniqu 1 +specif 1 +nonlinear 1 +linear 1 +induct 1 +learn 1 +summari 1 +currentlyb 1 +done 1 +area 1 +pleas 1 +madisonmathemat 1 +thiswork 1 +guid 1 +professor 1 +olvimangasarian 1 +publicationsal 1 +paper 1 +store 1 +format 1 +ascii 1 +text 1 +viewer 1 +download 1 +file 1 +shift 1 +click 1 +netscap 1 +print 1 +featur 1 +select 1 +decemb 1 +revis 1 +march 1 +submit 1 +journal 1 +cluster 1 +concav 1 +minim 1 +accept 1 +present 1 +neural 1 +process 1 +system 1 +picksthes 1 +site 1 +backcountri 1 +grate 1 +dead 1 +nasa 1 +frog 1 +espnet 1 +timesfax 1 +uroullett 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..4bd31712 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,117 @@ +develop 6 +scout 5 +internet 4 +devri 2 +tool 2 +specialist 2 +comput 2 +scienc 2 +intern 2 +everyth 2 +articl 2 +work 2 +molecularbiolog 2 +resourc 2 +fortun 2 +prof 2 +imag 2 +embryo 2 +journal 2 +site 2 +microscop 2 +pete 1 +home 1 +page 1 +peter 1 +room 1 +westdayton 1 +madison 1 +pdevri 1 +wisc 1 +mean 1 +isthat 1 +read 1 +technolog 1 +make 1 +sens 1 +andthen 1 +write 1 +overview 1 +toolkit 1 +great 1 +thing 1 +iread 1 +think 1 +anywai 1 +rather 1 +foolish 1 +topai 1 +tell 1 +eric 1 +hazen 1 +alsoprovid 1 +technic 1 +webmast 1 +servic 1 +group 1 +although 1 +excel 1 +help 1 +system 1 +folksat 1 +recent 1 +join 1 +team 1 +laboratori 1 +integr 1 +microscopi 1 +biomed 1 +nearli 1 +eight 1 +year 1 +seancarrol 1 +techniqu 1 +creat 1 +multipl 1 +label 1 +confoc 1 +basic 1 +cool 1 +look 1 +lotof 1 +book 1 +cover 1 +also 1 +molecular 1 +biologi 1 +johnwhit 1 +rest 1 +imrstaff 1 +receiv 1 +star 1 +inth 1 +magellan 1 +guid 1 +last 1 +major 1 +project 1 +involv 1 +dimension 1 +studi 1 +isdescrib 1 +appear 1 +augustnd 1 +issu 1 +photo 1 +guest 1 +lab 1 +standard 1 +info 1 +resum 1 +relat 1 +experi 1 +public 1 +present 1 +updat 1 +tuesdai 1 +decemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..e2953e80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,99 @@ +comput 5 +madison 5 +page 4 +scienc 4 +home 3 +plakal 3 +graduat 3 +kanpur 3 +slave 2 +calcutta 2 +bosco 2 +univers 2 +wisconsin 2 +stuff 2 +class 2 +link 2 +friend 2 +wisc 2 +yumpe 1 +manoj 1 +dept 1 +universityofwisconsin 1 +blah 1 +countri 1 +india 1 +though 1 +origin 1 +state 1 +kerala 1 +stai 1 +life 1 +studi 1 +school 1 +salesian 1 +undergrad 1 +major 1 +engin 1 +current 1 +first 1 +year 1 +student 1 +support 1 +teach 1 +assistantship 1 +depart 1 +stare 1 +barrel 1 +either 1 +architectur 1 +program 1 +languag 1 +interest 1 +music 1 +rock 1 +metal 1 +altern 1 +blue 1 +movi 1 +book 1 +acad 1 +hack 1 +industri 1 +geeki 1 +nerdi 1 +featur 1 +chat 1 +gatewai 1 +seealso 1 +iitk 1 +snap 1 +pinup 1 +galleri 1 +everi 1 +nerd 1 +need 1 +check 1 +bookmark 1 +access 1 +log 1 +visit 1 +contact 1 +north 1 +randal 1 +avenu 1 +dayton 1 +street 1 +acknowledg 1 +suresh 1 +venkat 1 +nifti 1 +tabl 1 +igor 1 +ivanisev 1 +wisecrack 1 +icon 1 +variou 1 +corner 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..58eda21a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,70 @@ +madison 4 +page 3 +comput 3 +theori 3 +prasad 2 +home 2 +scienc 2 +univers 2 +wisconsin 2 +address 2 +interest 2 +databas 2 +music 2 +vldb 2 +time 2 +constuct 1 +meanwhil 1 +deshpand 1 +graduat 1 +student 1 +depar 1 +princeton 1 +offic 1 +build 1 +dayton 1 +academ 1 +system 1 +research 1 +area 1 +current 1 +schedul 1 +invest 1 +manageri 1 +econom 1 +meet 1 +prof 1 +jeff 1 +naughton 1 +introduct 1 +public 1 +multidimension 1 +aggreg 1 +storag 1 +estim 1 +multidimensionalaggreg 1 +presenc 1 +hierarchi 1 +cours 1 +project 1 +packag 1 +java 1 +download 1 +want 1 +spend 1 +timex 1 +world 1 +find 1 +india 1 +dilbert 1 +comix 1 +explor 1 +bookmark 1 +random 1 +link 1 +finger 1 +sinc 1 +hakuna 1 +matata 1 +info 1 +creat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..4419f47e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,38 @@ +inform 3 +home 2 +page 2 +poosala 2 +research 2 +madison 2 +vishi 1 +viswanath 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +wisc 1 +reseach 1 +summari 1 +resum 1 +html 1 +postscript 1 +relat 1 +databas 1 +advisor 1 +prof 1 +yanni 1 +ioannidi 1 +asha 1 +voluntari 1 +organ 1 +help 1 +improv 1 +basic 1 +educ 1 +india 1 +interestsuw 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..11cc0cab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,353 @@ +sequenc 41 +queri 38 +data 23 +databas 13 +relat 13 +oper 13 +model 11 +record 11 +system 10 +optim 10 +project 8 +order 7 +sequin 7 +us 7 +time 6 +manag 6 +effici 6 +languag 6 +posit 6 +praveen 6 +seshadri 6 +express 5 +evalu 5 +variou 5 +support 5 +process 5 +miron 5 +livni 5 +ramakrishnan 5 +object 4 +import 4 +requir 4 +includ 4 +scienc 4 +kind 4 +techniqu 4 +implement 4 +nest 4 +complex 4 +storag 4 +earthquak 4 +similar 4 +click 4 +raghu 4 +wisc 4 +domain 3 +exist 3 +allow 3 +need 3 +like 3 +next 3 +built 3 +defin 3 +also 3 +propos 3 +shore 3 +extens 3 +volcano 3 +recent 3 +plan 3 +scan 3 +singl 3 +mani 3 +view 3 +orient 3 +zoom 3 +group 3 +involv 3 +server 3 +proceed 3 +confer 3 +madison 3 +document 2 +collect 2 +set 2 +declar 2 +manner 2 +advantag 2 +user 2 +tempor 2 +previou 2 +demonstr 2 +feasibl 2 +form 2 +embed 2 +base 2 +exampl 2 +inform 2 +meteorolog 2 +phenomena 2 +sequenti 2 +strength 2 +greater 2 +would 2 +sort 2 +join 2 +store 2 +buffer 2 +gener 2 +answer 2 +detail 2 +aredescrib 2 +publish 2 +paper 2 +postscript 2 +version 2 +map 2 +call 2 +could 2 +flavor 2 +explor 2 +collaps 2 +expand 2 +last 2 +work 2 +probabl 2 +devis 2 +result 2 +client 2 +insid 2 +provid 2 +depart 2 +home 1 +pageth 1 +construct 1 +content 1 +objectivescurr 1 +statusmotiv 1 +exampleseq 1 +languageoptim 1 +techniquesseq 1 +developmentpublicationsrel 1 +workcontact 1 +informationproject 1 +number 1 +applic 1 +processingof 1 +larg 1 +amount 1 +theseappl 1 +financi 1 +histor 1 +analysi 1 +econom 1 +social 1 +metereolog 1 +medic 1 +andbiolog 1 +inadequ 1 +regard 1 +treat 1 +consequ 1 +tediou 1 +ineffici 1 +abstract 1 +util 1 +semanticstak 1 +uniqu 1 +opportun 1 +avail 1 +evaluationintegr 1 +canstor 1 +combin 1 +sequencesthes 1 +serv 1 +goal 1 +themost 1 +notion 1 +natur 1 +consid 1 +issu 1 +studi 1 +theori 1 +theoret 1 +idea 1 +statusth 1 +current 1 +statu 1 +algebraicqueri 1 +compos 1 +analogousto 1 +composit 1 +algebra 1 +describ 1 +identifi 1 +candeclar 1 +likesql 1 +vice 1 +versa 1 +build 1 +disk 1 +architectur 1 +sever 1 +megabyt 1 +integr 1 +motiv 1 +querya 1 +weather 1 +monitor 1 +occurr 1 +event 1 +scientist 1 +ask 1 +erupt 1 +didth 1 +richter 1 +scale 1 +featur 1 +groupbi 1 +claus 1 +correl 1 +subqueri 1 +aggregatefunct 1 +convent 1 +find 1 +execut 1 +even 1 +given 1 +knowledg 1 +howev 1 +sequencesord 1 +lock 1 +step 1 +merg 1 +temporari 1 +whenev 1 +valu 1 +check 1 +possibl 1 +therefor 1 +littl 1 +memori 1 +modelth 1 +present 1 +gist 1 +basic 1 +ordereddomain 1 +relationship 1 +andposit 1 +dual 1 +distinct 1 +wai 1 +recordsmap 1 +respect 1 +give 1 +rise 1 +either 1 +relationaloper 1 +overlap 1 +contain 1 +andaggreg 1 +researchersin 1 +commun 1 +offset 1 +movingaggreg 1 +mean 1 +associ 1 +instanc 1 +daili 1 +weekli 1 +hourli 1 +part 1 +deal 1 +make 1 +easi 1 +case 1 +real 1 +worldsitu 1 +extend 1 +instead 1 +extensionof 1 +indic 1 +practic 1 +ofseq 1 +languagew 1 +usingwhich 1 +specifi 1 +languagei 1 +except 1 +input 1 +queriesa 1 +well 1 +descript 1 +techniquesw 1 +thathav 1 +transform 1 +meta 1 +cach 1 +intermedi 1 +algorithm 1 +reli 1 +cost 1 +estim 1 +observ 1 +access 1 +stream 1 +strategi 1 +take 1 +account 1 +developmentth 1 +serverarchitectur 1 +multipl 1 +viaa 1 +multi 1 +thread 1 +ontop 1 +subset 1 +languageswhich 1 +mode 1 +arbitrarylevel 1 +viceversa 1 +supportfor 1 +type 1 +function 1 +detailson 1 +publicationssequ 1 +sigmod 1 +framework 1 +datapraveen 1 +ieee 1 +engin 1 +march 1 +design 1 +systempraveen 1 +submit 1 +vldb 1 +queriesraghu 1 +michael 1 +cheng 1 +intern 1 +comad 1 +decemb 1 +workthedevis 1 +complementari 1 +visualizationenviron 1 +front 1 +pose 1 +examin 1 +graphic 1 +peopl 1 +research 1 +servercontact 1 +informationfor 1 +contact 1 +eduraghu 1 +edumiron 1 +educomput 1 +univers 1 +wisconsin 1 +dayton 1 +street 1 +modifi 1 +seshadripraveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..ced9eb09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,20 @@ +andrew 2 +prock 2 +school 2 +class 2 +alta 2 +vista 2 +home 1 +page 1 +clemen 1 +hockert 1 +prockoffic 1 +hour 1 +person 1 +histori 1 +madison 1 +bookmark 1 +resum 1 +doonesburi 1 +trot 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..0babcfc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,121 @@ +comput 6 +theori 6 +system 6 +parallel 4 +problem 4 +condon 3 +scienc 3 +interest 3 +interact 3 +proof 3 +us 3 +work 3 +algorithm 3 +page 2 +ann 2 +univers 2 +email 2 +wisc 2 +complex 2 +random 2 +model 2 +recent 2 +hard 2 +approxim 2 +result 2 +graph 2 +automata 2 +develop 2 +journal 2 +home 1 +associ 1 +professor 1 +depart 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +washington 1 +complexityclass 1 +research 1 +summari 1 +interactiveproof 1 +combin 1 +nondetermin 1 +suchmodel 1 +proven 1 +surprisingli 1 +solv 1 +classicproblem 1 +exampl 1 +although 1 +theoryof 1 +complet 1 +long 1 +identifi 1 +computationalproblem 1 +much 1 +progress 1 +understand 1 +whichhard 1 +solut 1 +easi 1 +recentresult 1 +novel 1 +modelsof 1 +turn 1 +prove 1 +approximabilityresult 1 +sever 1 +developingboth 1 +posit 1 +neg 1 +hardcombinatori 1 +aris 1 +game 1 +theoryand 1 +also 1 +design 1 +analysi 1 +current 1 +forsort 1 +minimum 1 +span 1 +tree 1 +goal 1 +well 1 +practic 1 +commun 1 +synchron 1 +costscan 1 +expens 1 +sampl 1 +public 1 +polynomi 1 +bound 1 +strategi 1 +ladner 1 +finit 1 +state 1 +nondeterminist 1 +probabilisticst 1 +hellerstein 1 +pottl 1 +wigderson 1 +proceedingsof 1 +annual 1 +symposium 1 +pspace 1 +provabl 1 +prover 1 +round 1 +caiand 1 +lipton 1 +februari 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..b2bc3902 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,137 @@ +comput 8 +structur 5 +research 5 +scienc 4 +complex 4 +class 4 +sequenc 4 +joseph 3 +properti 3 +algorithm 3 +page 2 +deborah 2 +mathemat 2 +univers 2 +email 2 +wisc 2 +interest 2 +biologi 2 +geometri 2 +concern 2 +area 2 +set 2 +time 2 +recent 2 +question 2 +proof 2 +techniqu 2 +method 2 +genom 2 +develop 2 +proceed 2 +similar 2 +home 1 +associ 1 +professor 1 +depart 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +purdu 1 +appli 1 +theori 1 +logic 1 +summari 1 +theoret 1 +studi 1 +design 1 +analysi 1 +biolog 1 +problem 1 +last 1 +twenti 1 +year 1 +great 1 +deal 1 +work 1 +gone 1 +studyingth 1 +decid 1 +determinist 1 +andnondeterminist 1 +polynomi 1 +despit 1 +effort 1 +stillknow 1 +littl 1 +fact 1 +computerscientist 1 +adequaci 1 +known 1 +techniquesfor 1 +resolv 1 +whether 1 +investigatesth 1 +exploresin 1 +formal 1 +type 1 +necessari 1 +resolveproblem 1 +primarili 1 +inth 1 +theseinclud 1 +dynam 1 +data 1 +algorithmsfor 1 +fragment 1 +assembl 1 +larg 1 +scale 1 +project 1 +specif 1 +handlingrepetit 1 +addit 1 +util 1 +graphtheoret 1 +rapid 1 +homolog 1 +detect 1 +analysisof 1 +anonym 1 +sampl 1 +public 1 +collaps 1 +degre 1 +subexponenti 1 +pruim 1 +young 1 +ninth 1 +theoryconfer 1 +spars 1 +spanner 1 +weight 1 +graph 1 +althof 1 +dobkin 1 +soar 1 +discret 1 +obtain 1 +global 1 +local 1 +meidanisand 1 +tiwari 1 +fourth 1 +scandinavianworkshop 1 +springer 1 +verlag 1 +lectur 1 +note 1 +incomput 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..5dffde59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,108 @@ +simul 5 +manag 4 +polici 4 +miron 3 +resourc 3 +perform 3 +process 3 +page 2 +livni 2 +scienc 2 +email 2 +wisc 2 +interest 2 +algorithm 2 +discret 2 +event 2 +research 2 +data 2 +system 2 +laboratori 2 +disk 2 +tape 2 +proceed 2 +confer 2 +home 1 +professor 1 +comput 1 +depart 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +weizmann 1 +institut 1 +rehovot 1 +israel 1 +model 1 +analysi 1 +summari 1 +major 1 +emphasi 1 +design 1 +evaluationof 1 +involv 1 +developmentof 1 +managementsystem 1 +type 1 +gener 1 +purpos 1 +well 1 +asreal 1 +time 1 +schedul 1 +consid 1 +researchinvolv 1 +studi 1 +differ 1 +specialemphasi 1 +interplai 1 +properti 1 +systemand 1 +sinc 1 +performancestudi 1 +emploi 1 +modelingand 1 +techniqu 1 +current 1 +implementinga 1 +base 1 +novel 1 +languag 1 +includ 1 +util 1 +visualizationtool 1 +graphic 1 +interfac 1 +sampl 1 +recent 1 +public 1 +join 1 +synchron 1 +access 1 +myllymaki 1 +sigmetr 1 +sequenc 1 +queri 1 +sashadri 1 +ramakrishnan 1 +sigmod 1 +foundat 1 +visual 1 +metaphor 1 +schema 1 +displai 1 +haberand 1 +ioannidi 1 +journal 1 +intellig 1 +inform 1 +juli 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..900f3eb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,102 @@ +method 6 +equat 4 +precondit 4 +parter 3 +numer 3 +differenti 3 +ellipt 3 +condit 3 +page 2 +seymour 2 +comput 2 +scienc 2 +univers 2 +email 2 +wisc 2 +partial 2 +research 2 +work 2 +indefinit 2 +discret 2 +system 2 +posit 2 +definit 2 +effect 2 +siam 2 +analysi 2 +distribut 2 +singular 2 +valu 2 +journal 2 +home 1 +professor 1 +mathemat 1 +depart 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +york 1 +interest 1 +summari 1 +time 1 +major 1 +emphasi 1 +solutionof 1 +classicalit 1 +multigrid 1 +effectivelywhen 1 +also 1 +bemad 1 +real 1 +symmetr 1 +part 1 +operatori 1 +hand 1 +casedirect 1 +attempt 1 +preserv 1 +spars 1 +thesystem 1 +encount 1 +small 1 +pivot 1 +thu 1 +challengingproblem 1 +mix 1 +concept 1 +procedur 1 +linearalgebra 1 +nowinvolv 1 +sever 1 +project 1 +attack 1 +class 1 +problem 1 +includ 1 +studi 1 +specialmultigrid 1 +sampl 1 +recent 1 +public 1 +chebyshev 1 +collact 1 +ellipticparti 1 +appear 1 +journalon 1 +boundari 1 +without 1 +estim 1 +number 1 +second 1 +order 1 +oper 1 +numbersand 1 +scientificcomput 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..a46be976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,128 @@ +parallel 8 +system 6 +comput 4 +techniqu 4 +analysi 4 +vernon 3 +applic 3 +perform 3 +model 3 +issu 3 +processor 3 +polici 3 +proceed 3 +page 2 +mari 2 +scienc 2 +univers 2 +email 2 +wisc 2 +interest 2 +research 2 +repres 2 +also 2 +recent 2 +schedul 2 +network 2 +confer 2 +june 2 +home 1 +professor 1 +industri 1 +engin 1 +depart 1 +wisconsin 1 +dayton 1 +madison 1 +telephon 1 +california 1 +angel 1 +architectur 1 +operatingsystem 1 +summari 1 +analyt 1 +applicationto 1 +emphasi 1 +paralleland 1 +distribut 1 +design 1 +techniquesi 1 +develop 1 +togeth 1 +graduat 1 +student 1 +colleaguesinclud 1 +gener 1 +time 1 +petri 1 +customizedmean 1 +valu 1 +gtpn 1 +systemfeatur 1 +synchron 1 +prioriti 1 +servic 1 +precis 1 +custom 1 +yield 1 +intuit 1 +equationsthat 1 +featur 1 +approxim 1 +butcan 1 +solv 1 +effici 1 +proposedth 1 +call 1 +interpol 1 +approximationsfor 1 +alloc 1 +techniquemai 1 +broader 1 +performanceanalysi 1 +current 1 +project 1 +includ 1 +character 1 +high 1 +performanceparallel 1 +workload 1 +schedulingpolici 1 +multimedia 1 +server 1 +memorymanag 1 +workstat 1 +sampl 1 +public 1 +fair 1 +dqdb 1 +slot 1 +reus 1 +brewster 1 +ieee 1 +infocom 1 +august 1 +accur 1 +hybrid 1 +hash 1 +join 1 +algorithm 1 +pateland 1 +carei 1 +sigmetr 1 +characterist 1 +limit 1 +preemption 1 +forrun 1 +complet 1 +with 1 +chiang 1 +mansharamani 1 +sigmetricsconfer 1 +automat 1 +creat 1 +octob 1 +pub 1 +eduto 1 +report 1 +error 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..47e29d8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,9 @@ +qinqin 2 +wang 2 +home 2 +page 1 +welcom 1 +pageqw 1 +wisc 1 +edulast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..6fad6d0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,198 @@ +data 17 +project 9 +databas 9 +research 8 +queri 8 +system 6 +interest 5 +develop 5 +imag 5 +work 5 +hill 4 +first 4 +us 4 +base 4 +sequenc 4 +languag 4 +next 4 +august 4 +employ 4 +raghu 3 +comput 3 +depart 3 +educ 3 +graduat 3 +text 3 +relat 3 +logic 3 +integr 3 +content 3 +set 3 +access 3 +joint 3 +prof 3 +livni 3 +program 3 +evalu 3 +bell 3 +lab 3 +murrai 3 +ramakrishnan 2 +scienc 2 +madison 2 +teach 2 +activ 2 +univers 2 +cours 2 +manag 2 +second 2 +level 2 +dbm 2 +coral 2 +deal 2 +sever 2 +heterogen 2 +sourc 2 +analysi 2 +larg 2 +constraint 2 +techniqu 2 +support 2 +visual 2 +appli 2 +design 2 +part 2 +gener 2 +explor 2 +algorithm 2 +call 2 +bottom 2 +optim 2 +cornel 2 +home 1 +page 1 +associ 1 +professor 1 +wisc 1 +sciencesunivers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +usaphon 1 +texa 1 +austin 1 +tech 1 +indian 1 +institut 1 +technolog 1 +madra 1 +softwar 1 +minibaseand 1 +coralth 1 +publish 1 +mcgraw 1 +aimedat 1 +undergraduateand 1 +minibas 1 +inconjunct 1 +also 1 +coursesthat 1 +school 1 +index 1 +exploratori 1 +mine 1 +extend 1 +deductiona 1 +grow 1 +diversifi 1 +increasinglyimport 1 +abl 1 +dispers 1 +independ 1 +easili 1 +rodin 1 +successor 1 +investig 1 +severalissu 1 +formal 1 +practic 1 +toolkit 1 +forsemant 1 +multipl 1 +serviceand 1 +networkedclust 1 +machin 1 +ioannidi 1 +recent 1 +result 1 +explorationfrom 1 +area 1 +complex 1 +assequ 1 +seqsystem 1 +focus 1 +optimizationissu 1 +import 1 +aspect 1 +identifyingtrend 1 +identifi 1 +pattern 1 +ofinform 1 +goal 1 +retriev 1 +fromlarg 1 +focu 1 +implementingan 1 +express 1 +definit 1 +customizea 1 +take 1 +advantag 1 +specializedinform 1 +given 1 +collect 1 +indexedand 1 +cover 1 +andmin 1 +power 1 +cluster 1 +birchfor 1 +dataset 1 +tool 1 +devisea 1 +long 1 +stand 1 +extens 1 +databasequeri 1 +featuressuch 1 +structur 1 +term 1 +recurs 1 +ofarithmet 1 +specifi 1 +morecompactli 1 +effici 1 +ongo 1 +involv 1 +continu 1 +coraldeduct 1 +upon 1 +fixpointevalu 1 +make 1 +efficientacross 1 +broad 1 +rang 1 +sudarshan 1 +time 1 +sudarsha 1 +srivastava 1 +deduct 1 +object 1 +orient 1 +divesh 1 +transit 1 +closur 1 +seshadri 1 +managementfirst 1 +praveen 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..981c2ae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,163 @@ +madison 4 +know 3 +fall 3 +india 3 +love 3 +rahul 2 +home 2 +long 2 +littl 2 +offici 2 +final 2 +semest 2 +comput 2 +came 2 +kanpur 2 +interest 2 +kind 2 +livabl 2 +think 2 +summer 2 +california 2 +great 2 +place 2 +comedi 2 +show 2 +contact 2 +page 1 +kapoorhello 1 +internet 1 +surfer 1 +welcom 1 +cyber 1 +hope 1 +stai 1 +enough 1 +third 1 +master 1 +student 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +get 1 +bachelor 1 +degre 1 +indianinstitut 1 +technolog 1 +employ 1 +pleas 1 +check 1 +resum 1 +cours 1 +schedulemydepartmentmyuniversityiitkanpuriitkclass 1 +relatedlink 1 +menow 1 +gone 1 +want 1 +person 1 +well 1 +normal 1 +born 1 +andrais 1 +small 1 +famili 1 +compris 1 +parent 1 +elder 1 +sister 1 +nice 1 +town 1 +call 1 +state 1 +good 1 +fortun 1 +live 1 +moneymagazin 1 +rate 1 +citi 1 +year 1 +editormust 1 +come 1 +greenland 1 +winter 1 +guess 1 +shouldn 1 +complain 1 +spring 1 +isawesom 1 +jose 1 +work 1 +almaden 1 +research 1 +centr 1 +cannot 1 +much 1 +suppos 1 +anywai 1 +regret 1 +time 1 +area 1 +whatev 1 +monei 1 +magazin 1 +sai 1 +northern 1 +like 1 +music 1 +take 1 +look 1 +collect 1 +movi 1 +almost 1 +genr 1 +though 1 +prefer 1 +romanc 1 +file 1 +star 1 +trek 1 +read 1 +stuff 1 +novel 1 +philosophi 1 +surf 1 +sport 1 +concern 1 +watch 1 +cricket 1 +plai 1 +win 1 +tenni 1 +figur 1 +skate 1 +gymnast 1 +try 1 +learn 1 +swim 1 +bridg 1 +natur 1 +lover 1 +enjoi 1 +go 1 +walk 1 +hike 1 +cloudi 1 +slightli 1 +breezi 1 +wish 1 +could 1 +job 1 +televis 1 +travel 1 +youget 1 +world 1 +paid 1 +musicstuffmovi 1 +televisioninternettravelotherbookmark 1 +meget 1 +form 1 +rest 1 +guestbookrahul 1 +wisc 1 +eduh 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..0f2195ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,76 @@ +raman 6 +rajesh 4 +comput 4 +scienc 3 +univers 3 +current 3 +page 2 +depart 2 +madison 2 +wisc 2 +system 2 +home 1 +old 1 +homm 1 +off 1 +west 1 +dayton 1 +street 1 +email 1 +telephon 1 +ohio 1 +wesleyan 1 +major 1 +mathemat 1 +minor 1 +music 1 +first 1 +year 1 +graduat 1 +student 1 +winsonsin 1 +person 1 +curriculum 1 +vita 1 +postscript 1 +specif 1 +cours 1 +architectur 1 +saluja 1 +perform 1 +evalu 1 +model 1 +livni 1 +distribut 1 +inform 1 +work 1 +team 1 +member 1 +condor 1 +project 1 +integr 1 +part 1 +committe 1 +bookmark 1 +chimera 1 +novelti 1 +monster 1 +chao 1 +subject 1 +contradict 1 +prodigi 1 +judg 1 +thing 1 +feebleworm 1 +earth 1 +depositari 1 +truth 1 +cloaca 1 +uncertainti 1 +error 1 +theglori 1 +shame 1 +blais 1 +pascal 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..6ecd48af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,78 @@ +databas 3 +jeffrei 3 +naughton 3 +comput 2 +scienc 2 +depart 2 +wisconsin 2 +karthik 1 +pagekarthikeyan 1 +ramasamyabouti 1 +student 1 +univers 1 +research 1 +interest 1 +mostli 1 +oper 1 +system 1 +work 1 +paradis 1 +project 1 +projectshack 1 +david 1 +dewitt 1 +connectivityparadis 1 +parallel 1 +pthread 1 +wrapperspublicationsstorag 1 +estim 1 +multidimension 1 +aggreg 1 +presenc 1 +hierarchi 1 +amit 1 +shukla 1 +prasad 1 +deshpand 1 +karthikeyan 1 +ramasami 1 +intern 1 +confer 1 +larg 1 +mumbai 1 +bombai 1 +avail 1 +presentationsweb 1 +picturearchitectur 1 +altern 1 +scalabl 1 +serversphoto 1 +albumencount 1 +leafperson 1 +inforesum 1 +financemonei 1 +wall 1 +street 1 +journal 1 +person 1 +interestshack 1 +photographycontact 1 +informationstreet 1 +address 1 +dayton 1 +madison 1 +electron 1 +mail 1 +addresskarthik 1 +wisc 1 +eduoffic 1 +phone 1 +number 1 +comment 1 +suggestionspleas 1 +tell 1 +think 1 +home 1 +page 1 +might 1 +improv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..60dcb5ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,21 @@ +kelli 3 +page 2 +home 1 +ratliffoffic 1 +phone 1 +email 1 +wisc 1 +edulast 1 +login 1 +offic 1 +hour 1 +inform 1 +student 1 +genealog 1 +interest 1 +place 1 +visit 1 +space 1 +construct 1 +stai 1 +tune 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..f9ef76a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,45 @@ +compress 5 +ratnakar 3 +madison 3 +viresh 2 +page 2 +research 2 +comput 2 +scienc 2 +depart 2 +univers 2 +wisconsin 2 +imag 2 +invok 2 +avail 2 +assist 1 +area 1 +digit 1 +video 1 +advisor 1 +miron 1 +livni 1 +main 1 +interest 1 +base 1 +vector 1 +quantiz 1 +fractal 1 +qualiti 1 +control 1 +lossi 1 +product 1 +mode 1 +public 1 +home 1 +qclicauthor 1 +qclic 1 +qclicbrows 1 +thing 1 +rever 1 +reveal 1 +click 1 +west 1 +dayton 1 +street 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..802dce2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,78 @@ +offic 4 +omin 3 +haven 3 +address 3 +number 3 +monasteriu 2 +doominu 2 +brother 2 +richard 2 +madison 2 +comput 2 +mondai 2 +wednesdai 2 +welcom 1 +without 1 +beard 1 +person 1 +univers 1 +wisconsin 1 +scienc 1 +depart 1 +west 1 +dayton 1 +street 1 +rcarl 1 +wisc 1 +hour 1 +thur 1 +home 1 +page 1 +current 1 +cours 1 +load 1 +distribut 1 +oper 1 +system 1 +underwat 1 +fire 1 +prevent 1 +saturdai 1 +advanc 1 +architectur 1 +tuesdai 1 +thursdai 1 +math 1 +introduct 1 +whole 1 +emphasi 1 +sundai 1 +subsurfac 1 +depositori 1 +engin 1 +grave 1 +dig 1 +fridai 1 +mani 1 +shade 1 +profession 1 +doom 1 +polit 1 +goofi 1 +solitari 1 +innebri 1 +vampir 1 +seriou 1 +nostalg 1 +funki 1 +monk 1 +fellow 1 +order 1 +ever 1 +need 1 +graphic 1 +artist 1 +desktop 1 +publish 1 +check 1 +best 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..1cff896c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,558 @@ +program 91 +rep 77 +comput 48 +languag 33 +scienc 31 +softwar 31 +slice 30 +analysi 28 +horwitz 28 +proceed 24 +york 22 +wisconsin 20 +univers 18 +depart 17 +interprocedur 17 +also 17 +engin 17 +note 17 +popl 17 +confer 17 +graph 16 +integr 16 +madison 15 +system 15 +algorithm 15 +increment 14 +problem 14 +symposium 14 +base 13 +sigsoft 13 +semant 13 +sigplan 13 +juli 13 +environ 12 +dagstuhl 12 +binklei 12 +synthes 12 +gener 12 +teitelbaum 12 +topla 11 +paper 11 +springer 11 +verlag 11 +januari 11 +depend 11 +sagiv 11 +click 11 +ramalingam 10 +record 10 +access 10 +dataflow 9 +develop 9 +us 9 +transact 9 +ieee 9 +lectur 9 +june 9 +shape 8 +yang 8 +editor 8 +reprint 8 +principl 8 +merg 7 +attribut 7 +acta 7 +version 7 +evalu 7 +intern 7 +april 7 +cornel 6 +research 6 +public 6 +manipul 6 +solv 6 +reachabl 6 +thesi 6 +applic 6 +prin 6 +novemb 6 +societi 6 +symposiumon 6 +germani 6 +differenc 5 +grammar 5 +implement 5 +precis 5 +demand 5 +manual 5 +journal 5 +appear 5 +second 5 +notic 5 +foundat 5 +decemb 5 +august 5 +project 4 +associ 4 +tool 4 +algebra 4 +current 4 +idfa 4 +book 4 +third 4 +publish 4 +procedur 4 +interf 4 +modif 4 +direct 4 +alamito 4 +colloquium 4 +washington 4 +portland 4 +octob 4 +wilhelm 4 +updat 4 +releas 4 +page 3 +pointer 3 +categor 3 +post 3 +support 3 +special 3 +chop 3 +tosem 3 +licens 3 +pfeiffer 3 +logic 3 +compcon 3 +refer 3 +informatica 3 +dynam 3 +methodolog 3 +inform 3 +process 3 +demer 3 +syntax 3 +septemb 3 +march 3 +fromacm 3 +berzin 3 +sigsoftsymposium 3 +francisco 3 +seminar 3 +schloss 3 +wadern 3 +destruct 3 +twenti 3 +rosai 3 +denmark 3 +parallel 3 +copenhagen 3 +ofprogram 3 +submit 3 +center 3 +dissert 3 +tech 3 +thoma 2 +home 2 +interest 2 +alia 2 +index 2 +list 2 +visitor 2 +doctor 2 +aim 2 +complex 2 +particular 2 +work 2 +oper 2 +element 2 +instanc 2 +reus 2 +technolog 2 +larg 2 +solut 2 +ics 2 +david 2 +fseb 2 +thesiswuu 2 +esop 2 +amast 2 +poplb 2 +pepma 2 +handl 2 +distribut 2 +transform 2 +fsea 2 +diku 2 +fase 2 +pepmb 2 +lape 2 +psde 2 +spaa 2 +world 2 +toconst 2 +propag 2 +theoret 2 +path 2 +accommod 2 +properti 2 +tree 2 +commun 2 +european 2 +databas 2 +theori 2 +combin 2 +paradigmsfor 2 +brighton 2 +abramski 2 +maibaum 2 +scientif 2 +interact 2 +hill 2 +wherefor 2 +sigoa 2 +text 2 +fourth 2 +petersburg 2 +time 2 +pepm 2 +onparti 2 +jolla 2 +california 2 +fifth 2 +iowa 2 +citi 2 +bricker 2 +workshop 2 +variabl 2 +fifteenth 2 +diego 2 +principlesof 2 +latest 2 +report 2 +ibfi 2 +twentieth 2 +charleston 2 +tutori 2 +represent 2 +multi 2 +univ 2 +professor 2 +repsprofessorcomput 1 +departmentunivers 1 +west 1 +dayton 1 +streetmadison 1 +usa 1 +mail 1 +wisc 1 +telephon 1 +secretari 1 +curriculum 1 +vita 1 +thehom 1 +content 1 +summari 1 +student 1 +summarymi 1 +creat 1 +thedevelop 1 +object 1 +createtool 1 +provid 1 +power 1 +specif 1 +manipulationoper 1 +explor 1 +slicingcan 1 +serv 1 +basi 1 +respect 1 +elementss 1 +includ 1 +thatmight 1 +affect 1 +either 1 +directli 1 +transit 1 +valu 1 +thevari 1 +member 1 +allow 1 +findsemant 1 +meaning 1 +decomposit 1 +thedecomposit 1 +consist 1 +textual 1 +contigu 1 +fundament 1 +solvingmani 1 +applicationsin 1 +understand 1 +mainten 1 +debug 1 +test 1 +worker 1 +carri 1 +atimprov 1 +underli 1 +relatedoper 1 +slicer 1 +method 1 +andbuild 1 +clickherefor 1 +recent 1 +establish 1 +unexpect 1 +connect 1 +betweeninterprocedur 1 +previou 1 +oninterprocedur 1 +show 1 +class 1 +interproceduraldataflow 1 +transformingthem 1 +kind 1 +polynomi 1 +timebi 1 +origin 1 +subject 1 +mean 1 +make 1 +probleminst 1 +find 1 +nearbi 1 +publicationsprogram 1 +overview 1 +slicing_pat 1 +pldi 1 +thesismerg 1 +iwscm 1 +popla 1 +iwsvcc 1 +ccpsd 1 +npfo_submiss 1 +ccipl 1 +prog_integration_system 1 +prog_integration_manu 1 +describ 1 +small 1 +subsetof 1 +pascal 1 +obtain 1 +clickingher 1 +retarget 1 +andexpect 1 +anddifferenc 1 +probabl 1 +miscellan 1 +thesesdavid 1 +thesisphil 1 +thesisinterprocedur 1 +analysisdemand 1 +bottom 1 +magic 1 +set 1 +exhaust 1 +tcs_ide_pap 1 +ptime 1 +complet 1 +acta_pap 1 +pfeiffer_thesi 1 +cacm 1 +jalg_pap 1 +popl_not 1 +publicationsbooksrep 1 +constructinglanguag 1 +edit 1 +chines 1 +corpor 1 +beij 1 +china 1 +press 1 +cambridg 1 +publicationssagiv 1 +sequenti 1 +natur 1 +shortest 1 +j_alg 1 +call 1 +competit 1 +line 1 +prioriti 1 +order 1 +letter 1 +preservingtransform 1 +effici 1 +comparison 1 +grammarswith 1 +unrestrict 1 +movement 1 +sublinear 1 +space 1 +context 1 +invit 1 +papershorwitz 1 +fourteenth 1 +conferenceon 1 +melbourn 1 +australia 1 +nanci 1 +franc 1 +ganzing 1 +chaptersrep 1 +ramakrishnan 1 +kluwer 1 +academ 1 +boston 1 +chang 1 +impact 1 +bohner 1 +arnold 1 +fromproceed 1 +architectur 1 +ichikawa 1 +tsubotani 1 +compani 1 +singapor 1 +barstow 1 +sandewal 1 +shrobe 1 +mcgraw 1 +wasserman 1 +publicationssiff 1 +turnidg 1 +partial 1 +danvi 1 +glueck 1 +thiemann 1 +hentenryck 1 +bind 1 +imper 1 +formalapproach 1 +aarhu 1 +moss 1 +nielsen 1 +schwartzbach 1 +tapsoft 1 +speed 1 +onth 1 +orlean 1 +compilerconstruct 1 +edinburgh 1 +scotland 1 +fritzson 1 +maintain 1 +domin 1 +reducibleflowgraph 1 +first 1 +scan 1 +data 1 +andarchitectur 1 +velen 1 +onalgebra 1 +preserv 1 +irvin 1 +jone 1 +illustr 1 +interfer 1 +softwareconfigur 1 +manag 1 +princeton 1 +languagedesign 1 +issuesin 1 +barcelona 1 +spain 1 +diaz 1 +oreja 1 +programminglanguag 1 +design 1 +atlanta 1 +variant 1 +forprogram 1 +versionand 1 +configur 1 +control 1 +grassau 1 +bericht 1 +german 1 +chapter 1 +winkler 1 +teubner 1 +stuttgart 1 +adequaci 1 +repres 1 +marceau 1 +remot 1 +thirteenth 1 +engineeringsymposium 1 +practic 1 +pittsburgh 1 +alpern 1 +proof 1 +check 1 +eleventh 1 +onprincipl 1 +salt 1 +lake 1 +utah 1 +static 1 +digest 1 +spring 1 +optim 1 +ninth 1 +albuquerqu 1 +tosyntax 1 +eighth 1 +williamsburg 1 +softwarerep 1 +site 1 +herefor 1 +patentsrep 1 +patent 1 +number 1 +pend 1 +submissionsrep 1 +reportsrep 1 +abstract 1 +analys 1 +leeuwen 1 +mehlhorn 1 +datalogisk 1 +institut 1 +psramalingam 1 +bibliographi 1 +unpublish 1 +present 1 +klint 1 +snelt 1 +identifi 1 +differ 1 +extendedabstract 1 +ball 1 +correct 1 +reconstitut 1 +equival 1 +theorem 1 +demonstr 1 +prototyp 1 +doc 1 +studentsvisitor 1 +mooli 1 +israel 1 +jiazhen 1 +robert 1 +paig 1 +nation 1 +chiao 1 +tung 1 +taiwan 1 +north 1 +carolina 1 +chapel 1 +studentsramalingam 1 +bound 1 +programintegr 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..971e4205 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,69 @@ +saeed 4 +madison 4 +home 3 +window 3 +depart 3 +contact 3 +function 2 +statusclock 2 +comput 2 +scienc 2 +indian 2 +univ 2 +wisconsin 2 +time 2 +love 2 +beauti 2 +right 2 +pagespe 1 +statu 1 +date 1 +settimeout 1 +speed 1 +clearid 1 +cleartimeout 1 +mirza 1 +tech 1 +engin 1 +institut 1 +technolog 1 +kanpur 1 +graduat 1 +student 1 +lucknow 1 +india 1 +like 1 +spend 1 +listn 1 +film 1 +song 1 +netsurf 1 +read 1 +comic 1 +hero 1 +calvin 1 +peopl 1 +seem 1 +offic 1 +randal 1 +apart 1 +comp 1 +dayton 1 +street 1 +best 1 +email 1 +wisc 1 +friend 1 +pictur 1 +wismad 1 +suggest 1 +send 1 +check 1 +guestbook 1 +page 1 +access 1 +sinc 1 +last 1 +updat 1 +copi 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..c667393f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,32 @@ +salli 4 +peterson 2 +wisc 2 +comput 2 +home 1 +page 1 +goodwin 1 +lecturercomput 1 +scienc 1 +univers 1 +wisconsin 1 +dayton 1 +madison 1 +mail 1 +edutelephon 1 +interest 1 +desktop 1 +real 1 +time 1 +oper 1 +system 1 +program 1 +languag 1 +cours 1 +taught 1 +fall 1 +comp 1 +lectur 1 +us 1 +last 1 +chang 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..3c315429 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,129 @@ +databas 7 +marathon 6 +madison 5 +scienc 4 +amit 3 +home 3 +page 3 +comput 3 +univers 3 +interest 3 +process 3 +confer 3 +protocol 2 +wisconsin 2 +educ 2 +naughton 2 +technolog 2 +research 2 +onlin 2 +intern 2 +larg 2 +vldb 2 +server 2 +sort 2 +data 2 +inform 2 +transact 2 +olap 2 +warn 1 +reach 1 +offic 1 +email 1 +wisc 1 +snail 1 +mail 1 +dept 1 +princeton 1 +phone 1 +work 1 +toward 1 +guidanc 1 +jeff 1 +master 1 +bachelor 1 +engin 1 +indian 1 +institut 1 +madra 1 +analyt 1 +queri 1 +perform 1 +evalu 1 +public 1 +storag 1 +estim 1 +multidimension 1 +aggreg 1 +presenc 1 +hierarchi 1 +shukla 1 +prasad 1 +deshpand 1 +jeffrei 1 +karthikeyan 1 +ramasami 1 +mumbai 1 +bombai 1 +paper 1 +postscript 1 +slide 1 +present 1 +link 1 +run 1 +boston 1 +chicago 1 +york 1 +seattl 1 +georg 1 +utah 1 +relat 1 +pointer 1 +network 1 +activ 1 +bibliographi 1 +logic 1 +program 1 +competit 1 +profil 1 +spec 1 +idea 1 +name 1 +trier 1 +date 1 +sigmod 1 +base 1 +endow 1 +articl 1 +archiv 1 +massiv 1 +digit 1 +system 1 +mdd 1 +initi 1 +multimedia 1 +sourc 1 +nation 1 +industri 1 +infrastructur 1 +niiip 1 +consortium 1 +council 1 +transcoop 1 +manag 1 +support 1 +cooper 1 +applic 1 +introduct 1 +pilot 1 +softwar 1 +help 1 +needi 1 +children 1 +look 1 +asha 1 +person 1 +pageand 1 +bookmarksar 1 +also 1 +garfield 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..663bc48c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,38 @@ +ashwin 2 +page 2 +depart 2 +home 1 +construct 1 +name 1 +graduat 1 +student 1 +comput 1 +scienc 1 +univers 1 +wisconsin 1 +madison 1 +come 1 +india 1 +hadmi 1 +undergradu 1 +educ 1 +indianinstitut 1 +technolog 1 +bombai 1 +iitb 1 +fantast 1 +place 1 +worth 1 +visit 1 +like 1 +contact 1 +canfing 1 +meto 1 +find 1 +whereabout 1 +altern 1 +send 1 +email 1 +sashwin 1 +wisc 1 +bookmark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..f4dba446 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,166 @@ +like 9 +also 5 +photo 4 +iitk 4 +much 4 +sastri 3 +year 3 +album 3 +scienc 3 +time 3 +cricket 3 +someth 3 +read 3 +subramanya 2 +home 2 +mine 2 +come 2 +hospet 2 +awai 2 +tungabhadra 2 +place 2 +dont 2 +long 2 +kanpur 2 +depart 2 +comput 2 +graduat 2 +wisconsin 2 +madison 2 +interest 2 +field 2 +favourit 2 +anyth 2 +compani 2 +friend 2 +pleasant 2 +watch 2 +back 2 +pagei 1 +mugshot 1 +town 1 +karnataka 1 +india 1 +school 1 +near 1 +hampi 1 +ruin 1 +vijayanagara 1 +empir 1 +built 1 +across 1 +river 1 +beauti 1 +unfortun 1 +photograph 1 +would 1 +scan 1 +undergradu 1 +indian 1 +institut 1 +technolog 1 +engin 1 +wonder 1 +fewphotograph 1 +gokul 1 +maintain 1 +contain 1 +mani 1 +class 1 +homepag 1 +inform 1 +classmatesat 1 +presentcurr 1 +student 1 +univers 1 +plan 1 +cours 1 +registeredfor 1 +spring 1 +interestsmi 1 +academ 1 +architectur 1 +program 1 +languag 1 +compil 1 +hope 1 +sport 1 +us 1 +playphatta 1 +tenni 1 +ball 1 +version 1 +thati 1 +champ 1 +provid 1 +entertainmentin 1 +bookmark 1 +link 1 +site 1 +enjoi 1 +listen 1 +music 1 +must 1 +consid 1 +hard 1 +rock 1 +metal 1 +realli 1 +donot 1 +whatev 1 +seinfeld 1 +sshow 1 +goe 1 +voraci 1 +reader 1 +rather 1 +whati 1 +better 1 +horror 1 +fantasi 1 +neither 1 +fiction 1 +unsuccesfulli 1 +tri 1 +grip 1 +earth 1 +know 1 +mean 1 +hint 1 +romanc 1 +ifposs 1 +jeffrei 1 +archer 1 +author 1 +date 1 +jane 1 +austen 1 +pride 1 +prejudic 1 +talk 1 +ramesh 1 +mahadeven 1 +sarticl 1 +make 1 +plai 1 +bridg 1 +pick 1 +wonderfulgam 1 +itagain 1 +solv 1 +crossword 1 +cryptic 1 +kind 1 +inth 1 +past 1 +devot 1 +hobbi 1 +last 1 +updat 1 +januari 1 +send 1 +comment 1 +suggest 1 +wisc 1 +eduunivers 1 +page 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..0507d101 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,219 @@ +scout 18 +servic 11 +internet 9 +project 6 +inform 6 +report 6 +scienc 5 +madison 5 +research 5 +wisconsin 4 +nation 4 +includ 4 +staff 4 +susan 3 +page 3 +support 3 +time 3 +join 3 +year 3 +intern 3 +open 3 +posit 3 +departmentunivers 2 +wisc 2 +manag 2 +comput 2 +depart 2 +sciencefound 2 +user 2 +educ 2 +commun 2 +provid 2 +tool 2 +goal 2 +andeduc 2 +work 2 +week 2 +organ 2 +toolkit 2 +kid 2 +peopl 2 +receiv 2 +email 2 +read 2 +resourc 2 +public 2 +devot 2 +merit 2 +also 2 +network 2 +base 2 +diego 2 +agreement 2 +reloc 2 +speciallibrarian 2 +systemadministr 2 +addit 2 +specif 2 +calcari 1 +home 1 +calcarimanag 1 +servicescomput 1 +madisonsc 1 +edumi 1 +titl 1 +univers 1 +scoutservic 1 +internicand 1 +thehigh 1 +bestresourc 1 +help 1 +effect 1 +filter 1 +hundr 1 +item 1 +edit 1 +import 1 +present 1 +multipl 1 +usabl 1 +format 1 +happen 1 +come 1 +soonth 1 +sprout 1 +newslett 1 +written 1 +andthousand 1 +annotatedlist 1 +best 1 +newli 1 +discov 1 +kind 1 +select 1 +itemsinclud 1 +issu 1 +happeningspost 1 +everi 1 +weekdai 1 +thousand 1 +orth 1 +newsgroup 1 +moreinform 1 +profession 1 +background 1 +involv 1 +wide 1 +sinc 1 +wheni 1 +arbor 1 +thensfnet 1 +backbon 1 +informationservic 1 +divis 1 +spent 1 +three 1 +speak 1 +tonat 1 +higher 1 +audienc 1 +internetand 1 +develop 1 +produc 1 +seminarseri 1 +first 1 +seminar 1 +seri 1 +focus 1 +need 1 +internetend 1 +later 1 +becam 1 +director 1 +forcerfnet 1 +respect 1 +wrote 1 +propos 1 +result 1 +award 1 +part 1 +internicproject 1 +cooper 1 +third 1 +termin 1 +theport 1 +chose 1 +continu 1 +workof 1 +elect 1 +andrequest 1 +approv 1 +thecomput 1 +heartilyagre 1 +futur 1 +plan 1 +servicesat 1 +jack 1 +solock 1 +expand 1 +theaddit 1 +june 1 +matthew 1 +livesei 1 +aproject 1 +assist 1 +expans 1 +thescout 1 +disciplin 1 +asscout 1 +area 1 +studi 1 +branch 1 +researcharea 1 +collabor 1 +group 1 +campus 1 +potenti 1 +topic 1 +includenetwork 1 +discoveri 1 +retriev 1 +nidr 1 +anddisciplin 1 +orient 1 +gather 1 +depend 1 +onth 1 +natur 1 +hire 1 +willincludecomput 1 +graduat 1 +undergradu 1 +level 1 +visit 1 +site 1 +find 1 +ofour 1 +mail 1 +list 1 +ifyou 1 +interest 1 +appli 1 +theonlin 1 +descript 1 +special 1 +librarian 1 +send 1 +aresum 1 +write 1 +sampl 1 +address 1 +feel 1 +free 1 +contactm 1 +telephon 1 +calcariinternet 1 +dayton 1 +street 1 +scal 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..5e9c83a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,53 @@ +chandrasekar 3 +comput 3 +scienc 3 +page 2 +coimbator 2 +school 2 +educ 2 +madison 2 +home 1 +welcom 1 +homepag 1 +worri 1 +happi 1 +present 1 +graduat 1 +student 1 +depart 1 +past 1 +born 1 +june 1 +southern 1 +state 1 +tamilnadu 1 +inindia 1 +high 1 +higher 1 +secondari 1 +undergradu 1 +theindian 1 +institut 1 +technolog 1 +kharagpur 1 +major 1 +dept 1 +engin 1 +person 1 +stuff 1 +resid 1 +kendal 1 +avenu 1 +officedept 1 +dayton 1 +sivasankaran 1 +schandra 1 +wisc 1 +last 1 +updat 1 +finger 1 +find 1 +whereabout 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..9a2c27bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,31 @@ +schnarr 4 +wisc 2 +wisconsin 2 +eric 1 +home 1 +pageer 1 +research 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usaphon 1 +advisor 1 +larusresearch 1 +interest 1 +architectur 1 +descript 1 +languagesfunct 1 +languag 1 +designinterest 1 +link 1 +wind 1 +tunnel 1 +sacm 1 +hockei 1 +club 1 +dragon 1 +byte 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..3e44f617 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,66 @@ +schoina 5 +yanni 2 +page 2 +research 2 +mark 2 +hill 2 +share 2 +memori 2 +ioanni 2 +babak 2 +falsafi 2 +alvin 2 +lebeck 2 +steven 2 +reinhardt 2 +jame 2 +laru 2 +david 2 +wood 2 +univers 2 +crete 2 +iraklio 2 +home 1 +wisc 1 +assistantdepart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +note 1 +construct 1 +advisor 1 +interest 1 +parallel 1 +systemspubl 1 +fine 1 +grain 1 +access 1 +control 1 +distribut 1 +sixth 1 +intern 1 +confer 1 +architectur 1 +support 1 +programminglanguag 1 +oper 1 +system 1 +asplo 1 +applic 1 +specif 1 +protocol 1 +user 1 +level 1 +ann 1 +roger 1 +supercomput 1 +educ 1 +last 1 +updat 1 +juli 1 +cretan 1 +cook 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..2a74c2b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,37 @@ +comput 3 +beverli 2 +seavei 2 +danc 2 +ramayana 2 +india 2 +southeast 2 +asia 2 +finger 2 +home 1 +page 1 +current 1 +regist 1 +grad 1 +student 1 +scienc 1 +switch 1 +special 1 +degre 1 +biologi 1 +interest 1 +mine 1 +includ 1 +asian 1 +classic 1 +differ 1 +version 1 +drama 1 +ramakien 1 +wish 1 +could 1 +give 1 +account 1 +hairbal 1 +keyboard 1 +keeper 1 +instead 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..647b6c64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,62 @@ +home 6 +page 5 +madison 3 +colvil 2 +wisc 2 +welcom 2 +school 2 +uwisc 2 +world 2 +scott 1 +pagescott 1 +mail 1 +eduoffic 1 +address 1 +comput 1 +scienc 1 +offic 1 +dayton 1 +franc 1 +well 1 +back 1 +univers 1 +wisconsin 1 +seen 1 +largest 1 +ball 1 +chees 1 +want 1 +link 1 +pagein 1 +addit 1 +list 1 +find 1 +interest 1 +hopefulli 1 +also 1 +enjoi 1 +beer 1 +drink 1 +game 1 +absolut 1 +add 1 +caffein 1 +rate 1 +soda 1 +guid 1 +lock 1 +pickingand 1 +educ 1 +artsi 1 +fact 1 +book 1 +constitut 1 +english 1 +dictionari 1 +roget 1 +thesauru 1 +poetri 1 +databas 1 +last 1 +updat 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..7c6feb1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,59 @@ +motion 5 +anim 4 +steve 2 +seitz 2 +imag 2 +analysi 2 +problem 2 +graphic 2 +model 2 +virtual 2 +writeup 1 +charact 1 +control 1 +chuck 1 +dyerour 1 +research 1 +motiv 1 +teachinga 1 +perform 1 +realist 1 +hasit 1 +root 1 +cartoon 1 +modern 1 +applic 1 +tocomput 1 +realiti 1 +teleconferenc 1 +robot 1 +task 1 +endow 1 +knowledg 1 +performa 1 +repertoir 1 +interest 1 +learn 1 +beinvok 1 +directli 1 +high 1 +level 1 +cue 1 +smile 1 +walk 1 +infer 1 +anabstract 1 +goal 1 +store 1 +cu 1 +levelev 1 +input 1 +devic 1 +sequenc 1 +project 1 +includ 1 +period 1 +track 1 +rigid 1 +nonrigid 1 +object 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..d5020d01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,85 @@ +view 9 +interpol 9 +imag 9 +scene 5 +seitz 3 +synthesi 3 +techniqu 3 +morph 3 +work 3 +origin 3 +steve 2 +autom 2 +produc 2 +physic 2 +valid 2 +describ 2 +us 2 +investig 1 +chuck 1 +dyerw 1 +devis 1 +provabl 1 +correct 1 +creat 1 +basi 1 +reli 1 +geometr 1 +known 1 +graphicscommun 1 +intermedi 1 +although 1 +techniquescurr 1 +enjoi 1 +widespread 1 +theoret 1 +validityha 1 +establish 1 +particular 1 +viewsof 1 +sequenc 1 +ofthat 1 +surprisingli 1 +answer 1 +provid 1 +first 1 +undergo 1 +simplerectif 1 +procedur 1 +certain 1 +assumpt 1 +visibl 1 +theproject 1 +process 1 +satisfi 1 +stereo 1 +todetermin 1 +correspond 1 +recent 1 +consid 1 +user 1 +interact 1 +guid 1 +comput 1 +three 1 +differ 1 +pair 1 +therectifi 1 +shown 1 +left 1 +right 1 +click 1 +theinterpol 1 +center 1 +mpeg 1 +movi 1 +show 1 +computedinterpol 1 +dyer 1 +proc 1 +workshop 1 +represent 1 +visual 1 +last 1 +chang 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..4a9f3220 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,94 @@ +morph 11 +view 8 +imag 7 +mpeg 6 +movi 6 +interpol 4 +scene 3 +techniqu 3 +differ 3 +viewpoint 3 +click 3 +resolut 3 +steve 2 +seitz 2 +chuck 2 +dyer 2 +appear 2 +us 2 +object 2 +pose 2 +simpl 2 +camera 2 +shape 2 +chang 2 +produc 2 +mona 2 +lisa 2 +frame 2 +investig 1 +relat 1 +public 1 +siggraph 1 +toward 1 +base 1 +represent 1 +icpr 1 +gener 1 +compel 1 +transit 1 +betweenimag 1 +howev 1 +often 1 +causeunnatur 1 +distort 1 +difficult 1 +correct 1 +manual 1 +basic 1 +principl 1 +projectivegeometri 1 +paper 1 +introduc 1 +extens 1 +morphingthat 1 +correctli 1 +handl 1 +project 1 +transform 1 +call 1 +work 1 +prewarp 1 +imagesprior 1 +comput 1 +postwarp 1 +knowledg 1 +requir 1 +appliedto 1 +photograph 1 +draw 1 +well 1 +render 1 +abil 1 +synthes 1 +structureafford 1 +wide 1 +varieti 1 +interest 1 +effect 1 +imagetransform 1 +taken 1 +illus 1 +physic 1 +move 1 +virtual 1 +face 1 +simultan 1 +facial 1 +color 1 +reflect 1 +high 1 +jude 1 +shavlik 1 +last 1 +septemb 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..9d0f75fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,127 @@ +period 16 +imag 9 +motion 8 +trace 6 +inform 4 +refer 4 +cycl 4 +cyclic 3 +frame 3 +determin 3 +correspond 3 +tempor 3 +composit 3 +real 2 +move 2 +sequenc 2 +object 2 +poscript 2 +paper 2 +recov 2 +turntabl 2 +scene 2 +pure 2 +us 2 +enhanc 2 +invari 2 +analysi 1 +steve 1 +seitz 1 +chuck 1 +dyermani 1 +life 1 +instanc 1 +human 1 +locomotori 1 +walk 1 +run 1 +skip 1 +shuffl 1 +areperiod 1 +person 1 +havedevelop 1 +approach 1 +could 1 +beenproduc 1 +whose 1 +unlik 1 +previou 1 +attempt 1 +ourapproach 1 +allow 1 +camera 1 +film 1 +click 1 +tracethi 1 +show 1 +line 1 +imagesequ 1 +phonograph 1 +ramp 1 +moment 1 +timewher 1 +momentarili 1 +slow 1 +shownsuperimpos 1 +error 1 +surfac 1 +repeat 1 +tend 1 +perfectli 1 +even 1 +variesslightli 1 +next 1 +physic 1 +import 1 +changesin 1 +gener 1 +defin 1 +motionsthat 1 +make 1 +variat 1 +explicit 1 +represent 1 +call 1 +compact 1 +describ 1 +evolutionof 1 +without 1 +spatial 1 +quantiti 1 +asposit 1 +veloc 1 +delimit 1 +identifi 1 +correspondencesacross 1 +provid 1 +mean 1 +parsinga 1 +addit 1 +sever 1 +featur 1 +canb 1 +deriv 1 +relat 1 +natur 1 +locat 1 +irregular 1 +tracecan 1 +also 1 +medic 1 +fromdiffer 1 +furthermor 1 +reliabl 1 +recoveredfrom 1 +view 1 +fashion 1 +theori 1 +affin 1 +clickher 1 +heart 1 +angiograph 1 +bottom 1 +note 1 +additionalstructur 1 +visibl 1 +appar 1 +singl 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..34d15ddc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,47 @@ +seitz 4 +imag 3 +comput 3 +steve 2 +motion 2 +analysi 2 +vision 2 +view 2 +mpeg 2 +movi 2 +click 2 +home 1 +page 1 +wisc 1 +graduat 1 +student 1 +berkelei 1 +math 1 +area 1 +interest 1 +base 1 +render 1 +machin 1 +graphic 1 +research 1 +project 1 +morph 1 +synthesi 1 +show 1 +interpol 1 +left 1 +exampl 1 +cyclic 1 +recent 1 +public 1 +stuff 1 +frequent 1 +link 1 +wisconsin 1 +group 1 +surreal 1 +cach 1 +closer 1 +look 1 +last 1 +chang 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..d289075e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,102 @@ +madison 13 +group 8 +mlrg 8 +learn 7 +machin 6 +page 5 +schedul 5 +graduat 5 +home 4 +archiv 4 +recent 4 +paper 4 +local 4 +link 4 +contain 3 +relev 3 +read 3 +seminar 3 +shavlik 3 +also 3 +comput 3 +research 2 +wisconsin 2 +tabl 2 +content 2 +member 2 +dataset 2 +domain 2 +cours 2 +august 2 +jude 2 +access 2 +current 2 +dept 2 +librari 2 +wisc 2 +agent 2 +inform 1 +thememb 1 +univers 1 +theori 1 +us 1 +extern 1 +carolyn 1 +allex 1 +jonathon 1 +bodner 1 +kevin 1 +cherkauer 1 +mark 1 +craven 1 +tina 1 +eliassi 1 +richard 1 +maclin 1 +david 1 +opitz 1 +papersvisit 1 +describ 1 +public 1 +ascii 1 +file 1 +list 1 +recentabstractsi 1 +avail 1 +theoriesy 1 +directori 1 +severalml 1 +testb 1 +breast 1 +cancer 1 +databas 1 +prof 1 +olvi 1 +mangasarian 1 +sgroup 1 +line 1 +math 1 +program 1 +biologi 1 +neurosci 1 +vision 1 +robot 1 +doit 1 +center 1 +mathemat 1 +scienc 1 +gopher 1 +school 1 +abstract 1 +select 1 +journal 1 +mostli 1 +wendt 1 +readabl 1 +proc 1 +workshop 1 +held 1 +intern 1 +confer 1 +last 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..5093d4ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,96 @@ +wisconsin 5 +shubu 4 +mukherje 4 +madison 4 +research 3 +comput 3 +isca 3 +home 2 +page 2 +wind 2 +tunnel 2 +public 2 +architect 2 +interest 2 +univers 2 +coher 2 +share 2 +memori 2 +cach 2 +protocol 2 +ppopp 2 +simul 2 +wisc 1 +fiance 1 +mimi 1 +nephew 1 +avirup 1 +month 1 +graduat 1 +assist 1 +scienc 1 +departmentunivers 1 +west 1 +dayton 1 +street 1 +usaphon 1 +shubhendu 1 +click 1 +button 1 +jump 1 +correspond 1 +articl 1 +advisor 1 +mark 1 +hill 1 +project 1 +world 1 +wide 1 +badger 1 +ballroom 1 +danc 1 +team 1 +person 1 +hobbi 1 +morph 1 +dionisio 1 +courtesi 1 +steve 1 +seitz 1 +random 1 +linkseducationph 1 +spring 1 +expect 1 +tech 1 +indian 1 +institut 1 +technolog 1 +kanpur 1 +india 1 +summari 1 +network 1 +interfac 1 +dissert 1 +cachabl 1 +queue 1 +design 1 +space 1 +progress 1 +distribut 1 +mechan 1 +cooper 1 +commod 1 +workstat 1 +submit 1 +custom 1 +irregular 1 +applic 1 +grai 1 +softwar 1 +dirsw 1 +parallel 1 +tutori 1 +copyright 1 +copi 1 +right 1 +reserv 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..f94364d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,25 @@ +michael 2 +siff 2 +home 1 +page 1 +philosophi 1 +research 1 +academ 1 +interest 1 +run 1 +club 1 +fall 1 +midwest 1 +seminar 1 +wonder 1 +wai 1 +wast 1 +time 1 +new 1 +inform 1 +resourc 1 +book 1 +movi 1 +televis 1 +sport 1 +humor 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..7b42fb92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,24 @@ +skrentni 3 +wisconsin 3 +madison 2 +wisc 2 +univers 2 +home 1 +page 1 +lecturerc 1 +coordinatorgradu 1 +studentcomput 1 +scienc 1 +departmentunivers 1 +dayton 1 +offic 1 +comput 1 +sciencesemail 1 +edutelephon 1 +relat 1 +link 1 +depart 1 +groupskrentni 1 +last 1 +updat 1 +februari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..7bbca3da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,53 @@ +wisconsin 5 +univers 5 +madison 4 +scienc 3 +comput 3 +system 3 +unix 3 +bryan 2 +intellig 2 +help 2 +knowledg 2 +represent 2 +travi 2 +util 2 +home 1 +page 1 +graduat 1 +studentcomput 1 +departmentunivers 1 +dayton 1 +offic 1 +mail 1 +wisc 1 +edutelephon 1 +purdu 1 +interest 1 +human 1 +interact 1 +oper 1 +activ 1 +select 1 +recent 1 +public 1 +step 1 +toward 1 +technic 1 +report 1 +april 1 +miller 1 +fredriksen 1 +empir 1 +studi 1 +reliabl 1 +commun 1 +relat 1 +link 1 +depart 1 +group 1 +professor 1 +larri 1 +advisor 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..2b4d38e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,128 @@ +system 7 +intellig 7 +artifici 5 +wisc 4 +expert 4 +comput 4 +data 3 +databas 3 +larri 2 +travi 2 +interest 2 +procedur 2 +control 2 +deduct 2 +support 2 +social 2 +research 2 +recent 2 +larg 2 +inform 2 +integr 2 +develop 2 +project 2 +altern 2 +metaphor 2 +west 2 +magazin 2 +home 1 +page 1 +travisprofessorcomput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +madison 1 +mail 1 +edutelephon 1 +univers 1 +californa 1 +angel 1 +automat 1 +understand 1 +complex 1 +philosoph 1 +foundat 1 +ofartifici 1 +manag 1 +implic 1 +summari 1 +center 1 +around 1 +us 1 +logic 1 +basi 1 +knowledg 1 +formal 1 +augment 1 +databasesystem 1 +work 1 +focus 1 +automaticdeduct 1 +design 1 +contruct 1 +displai 1 +test 1 +high 1 +level 1 +abstract 1 +pattern 1 +form 1 +informationcontain 1 +heterogen 1 +special 1 +attent 1 +beingdevot 1 +represent 1 +geograph 1 +waysthat 1 +enhanc 1 +visualiz 1 +map 1 +activ 1 +involv 1 +sever 1 +andwith 1 +incorpor 1 +model 1 +visual 1 +aid 1 +singl 1 +organiz 1 +issu 1 +associ 1 +introduct 1 +technolog 1 +analysi 1 +suppositionsunderli 1 +approach 1 +current 1 +student 1 +chuck 1 +ohar 1 +bryan 1 +scott 1 +swanson 1 +andi 1 +whitsitt 1 +derek 1 +zahn 1 +public 1 +could 1 +failur 1 +implement 1 +oravec 1 +appear 1 +journal 1 +softwar 1 +reflex 1 +examin 1 +falsework 1 +societi 1 +landscap 1 +link 1 +dept 1 +group 1 +last 1 +chang 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..1ab04ef0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,63 @@ +comput 4 +sodani 3 +depart 3 +scienc 3 +madison 3 +india 3 +link 3 +avinash 2 +page 2 +univers 2 +wisconsin 2 +interest 2 +project 2 +packag 2 +relat 2 +home 1 +wisc 1 +graduat 1 +student 1 +west 1 +dayton 1 +street 1 +educ 1 +tech 1 +hon 1 +indian 1 +institut 1 +technolog 1 +kharagpur 1 +juli 1 +academ 1 +architectur 1 +multiscalar 1 +kestrel 1 +program 1 +languag 1 +compil 1 +cours 1 +java 1 +download 1 +meet 1 +batch 1 +mate 1 +info 1 +center 1 +rank 1 +new 1 +hindu 1 +onlin 1 +edit 1 +random 1 +look 1 +kgpite 1 +follow 1 +toll 1 +free 1 +directori 1 +will 1 +world 1 +cricket 1 +access 1 +time 1 +sinc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..c0e468f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,252 @@ +sohi 22 +comput 15 +symposium 11 +austin 10 +intern 10 +architectur 9 +processor 9 +multiscalar 8 +scienc 7 +parallel 7 +research 6 +current 6 +univers 6 +instruct 6 +memori 6 +program 6 +microarchitectur 6 +recent 5 +depart 5 +wisconsin 5 +madison 5 +execut 5 +breach 5 +annual 5 +decemb 5 +dynam 5 +franklin 5 +pnevmatikato 5 +engin 4 +system 4 +gener 4 +micro 4 +technic 4 +report 4 +address 3 +level 3 +multiprocessor 3 +design 3 +perform 3 +cycl 3 +character 3 +exploit 3 +novemb 3 +file 3 +load 3 +ieee 3 +vijaykumar 3 +access 3 +guard 3 +predict 3 +fine 3 +grain 3 +wisc 2 +educ 2 +interest 2 +summari 2 +graduat 2 +student 2 +talk 2 +illinoi 2 +urbana 2 +electr 2 +process 2 +compil 2 +investig 2 +ordinari 2 +group 2 +sever 2 +issu 2 +studi 2 +avail 2 +paradigm 2 +given 2 +compress 2 +postscript 2 +framemak 2 +appear 2 +zero 2 +reduc 2 +hardwar 2 +mechan 2 +transact 2 +streamlin 2 +regist 2 +effici 2 +detect 2 +pointer 2 +arrai 2 +branch 2 +analysi 2 +april 2 +guri 1 +home 1 +page 1 +gurindar 1 +associ 1 +professor 1 +andelectr 1 +public 1 +graduatesaddress 1 +sciencesunivers 1 +west 1 +dayton 1 +streetmadison 1 +usasohi 1 +eduphon 1 +department 1 +offic 1 +elect 1 +electron 1 +birla 1 +institut 1 +technolog 1 +pilani 1 +india 1 +share 1 +focus 1 +thehighest 1 +uniprocessor 1 +circa 1 +plenti 1 +transistor 1 +availableon 1 +chip 1 +challeng 1 +resourc 1 +getth 1 +highest 1 +possibl 1 +sequenti 1 +target 1 +sustain 1 +ofov 1 +numer 1 +applic 1 +needto 1 +resolv 1 +goal 1 +achiev 1 +thenatur 1 +numericappl 1 +order 1 +understand 1 +andhow 1 +could 1 +bulk 1 +effort 1 +expend 1 +continu 1 +thedevelop 1 +model 1 +novel 1 +develop 1 +andcarri 1 +detail 1 +simul 1 +assessth 1 +potenti 1 +concept 1 +todd 1 +scott 1 +andrea 1 +moshovo 1 +vijaykumarrec 1 +talkswil 1 +set 1 +import 1 +futur 1 +risc 1 +held 1 +watson 1 +researchcent 1 +yorktown 1 +height 1 +place 1 +publicationshigh 1 +bandwidth 1 +translat 1 +multipl 1 +inrd 1 +appendix 1 +ofdetail 1 +resultsi 1 +also 1 +support 1 +latencyt 1 +superscalar 1 +processorsj 1 +smith 1 +proceed 1 +reorder 1 +referencesm 1 +data 1 +cach 1 +fast 1 +calcul 1 +anatomi 1 +request 1 +combin 1 +arbitrari 1 +interconnect 1 +network 1 +lebeck 1 +distribut 1 +error 1 +sigplan 1 +confer 1 +languag 1 +implement 1 +goodman 1 +handbook 1 +press 1 +control 1 +flow 1 +traffic 1 +inter 1 +oper 1 +communicationin 1 +expand 1 +split 1 +window 1 +depend 1 +errorst 1 +processorsd 1 +knapsack 1 +hierarchi 1 +componentt 1 +tetra 1 +evalu 1 +serial 1 +processorst 1 +juli 1 +gradstodd 1 +softwar 1 +latencydionisio 1 +incorpor 1 +exist 1 +setsmanoj 1 +architecturemark 1 +friedman 1 +januari 1 +prolog 1 +executionsriram 1 +vajapeyam 1 +crai 1 +processormen 1 +chow 1 +chiang 1 +septemb 1 +base 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..8b87fa51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,118 @@ +solomon 8 +comput 4 +manag 4 +home 3 +page 3 +marvin 3 +scienc 3 +softwar 3 +program 3 +proc 3 +conf 3 +data 3 +michael 3 +andmarvin 3 +astech 3 +report 3 +chair 2 +depart 2 +wisconsin 2 +madison 2 +interest 2 +object 2 +databas 2 +system 2 +develop 2 +environ 2 +languag 2 +sigmod 2 +june 2 +mark 2 +mcauliff 2 +carei 2 +odyssea 2 +tsatalo 2 +version 2 +appear 2 +persist 2 +paul 2 +adam 2 +avail 2 +shore 2 +project 2 +professor 1 +former 1 +goodman 1 +univers 1 +west 1 +dayton 1 +street 1 +phone 1 +wisc 1 +research 1 +orient 1 +support 1 +distribut 1 +oper 1 +network 1 +design 1 +implement 1 +theori 1 +recent 1 +publicationstoward 1 +effect 1 +effici 1 +free 1 +space 1 +abstractpostscriptth 1 +gmap 1 +versatil 1 +tool 1 +physic 1 +independ 1 +larg 1 +septemb 1 +andyanni 1 +ioannidi 1 +abstractpostscriptexpand 1 +inth 1 +vldb 1 +journalv 1 +april 1 +abstractpostscriptshor 1 +applic 1 +david 1 +dewitt 1 +franklin 1 +nanci 1 +hall 1 +jeffrei 1 +naughton 1 +daniel 1 +schuh 1 +seth 1 +white 1 +andmichael 1 +zwillingavail 1 +overview 1 +capitl 1 +fourth 1 +intern 1 +workshop 1 +configur 1 +updat 1 +lectur 1 +note 1 +logic 1 +point 1 +graphic 1 +interfac 1 +room 1 +built 1 +us 1 +java 1 +spring 1 +univ 1 +photoalbum 1 +todai 1 +dilbert 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..9f4bee86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,6 @@ +sowmya 3 +home 2 +page 2 +welcom 1 +subramanian 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..fc73f5d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,78 @@ +shilpa 3 +comput 3 +scienc 3 +lawand 2 +home 2 +page 2 +wisconsin 2 +link 2 +stuff 2 +wisc 2 +welcom 1 +pagei 1 +graduat 1 +student 1 +depart 1 +univers 1 +madison 1 +person 1 +stuffa 1 +pastfor 1 +info 1 +schoolher 1 +resum 1 +html 1 +ascii 1 +second 1 +love 1 +us 1 +resours 1 +want 1 +place 1 +syster 1 +women 1 +relat 1 +madisonsurf 1 +madisonst 1 +inform 1 +serverth 1 +hoofer 1 +sail 1 +clubowl 1 +music 1 +book 1 +movi 1 +java 1 +signatur 1 +meet 1 +first 1 +lovesnowi 1 +homepag 1 +cool 1 +linksher 1 +iswher 1 +finger 1 +three 1 +judg 1 +panel 1 +philadelphia 1 +vote 1 +constitut 1 +follow 1 +read 1 +decis 1 +access 1 +time 1 +sinc 1 +june 1 +send 1 +comment 1 +suggest 1 +email 1 +tossl 1 +shilpal 1 +thru 1 +guest 1 +formlast 1 +modifi 1 +juli 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..5cee77e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,32 @@ +home 5 +comput 4 +scienc 4 +page 3 +jeremi 2 +stenglein 2 +depart 2 +madison 2 +section 2 +graduat 1 +student 1 +univers 1 +wisconsin 1 +west 1 +dayton 1 +street 1 +offic 1 +phone 1 +mail 1 +stenglei 1 +wisc 1 +teach 1 +gener 1 +take 1 +construct 1 +compil 1 +link 1 +pageth 1 +simpson 1 +pageespn 1 +sport 1 +hotwir 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..a66a9319 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,57 @@ +reinhardt 2 +page 2 +wisconsin 2 +depart 2 +steve 1 +home 1 +steven 1 +graduat 1 +student 1 +computerarchitectur 1 +work 1 +wind 1 +tunnelgroup 1 +advisor 1 +david 1 +wood 1 +although 1 +project 1 +mark 1 +hill 1 +andjim 1 +laru 1 +often 1 +feel 1 +free 1 +tell 1 +mewhat 1 +well 1 +plan 1 +finish 1 +fall 1 +join 1 +faculti 1 +ofth 1 +univers 1 +michigan 1 +eec 1 +januari 1 +interest 1 +find 1 +publicationsresearch 1 +summari 1 +email 1 +stever 1 +wisc 1 +click 1 +finger 1 +phone 1 +comput 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +usalast 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..bae0821c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,58 @@ +comput 4 +john 3 +home 3 +strikwerda 2 +page 2 +scienc 2 +wisconsin 2 +inform 2 +numer 2 +analysi 2 +chicago 2 +nathan 2 +professor 1 +strikwerdadepart 1 +sciencesunivers 1 +madison 1 +west 1 +dayton 1 +streetmadison 1 +email 1 +strik 1 +wisc 1 +telephon 1 +fall 1 +teach 1 +problem 1 +solv 1 +us 1 +begin 1 +januari 1 +assign 1 +nation 1 +foundat 1 +year 1 +click 1 +qualifi 1 +exam 1 +research 1 +interest 1 +fluid 1 +dynamicsmyoffici 1 +depart 1 +pageoth 1 +stuff 1 +field 1 +museum 1 +point 1 +search 1 +rate 1 +best 1 +tribun 1 +talk 1 +radio 1 +show 1 +car 1 +footballmi 1 +kid 1 +drew 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..ff20009d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,30 @@ +interest 2 +subba 1 +home 1 +page 1 +thing 1 +enjoi 1 +calvin 1 +hobb 1 +late 1 +show 1 +david 1 +letterman 1 +seinfeld 1 +prooocessor 1 +histor 1 +paper 1 +evalu 1 +stream 1 +buffer 1 +secondari 1 +cach 1 +replac 1 +decoupl 1 +integ 1 +execut 1 +superscalar 1 +processor 1 +subbarao 1 +cambridg 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..69bdcb93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,83 @@ +parallel 3 +taiwan 3 +chiang 2 +page 2 +processor 2 +schedul 2 +mari 2 +vernon 2 +home 1 +depart 1 +univers 1 +wisconsin 1 +madisonoffic 1 +stelephon 1 +mail 1 +suhui 1 +wisc 1 +educlick 1 +send 1 +emailoffic 1 +hour 1 +thur 1 +still 1 +construct 1 +ta 1 +fall 1 +public 1 +applic 1 +characterist 1 +limit 1 +preemption 1 +complet 1 +polici 1 +rajesh 1 +mansharamani 1 +proc 1 +sigmetr 1 +conf 1 +measur 1 +model 1 +comput 1 +system 1 +nashvil 1 +dynam 1 +static 1 +quantum 1 +base 1 +alloc 1 +workshop 1 +strategi 1 +process 1 +conjunct 1 +ipp 1 +april 1 +search 1 +engin 1 +yahoo 1 +sourc 1 +resourc 1 +bibliographi 1 +world 1 +wide 1 +virtual 1 +librari 1 +subject 1 +catalogu 1 +link 1 +relat 1 +network 1 +servic 1 +sinanet 1 +shop 1 +magzin 1 +new 1 +job 1 +calendar 1 +seednet 1 +vistor 1 +guid 1 +academia 1 +sinica 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..7d6c98ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,192 @@ +link 10 +page 8 +site 7 +new 5 +madison 5 +weather 4 +scienc 3 +current 3 +info 3 +variou 3 +interest 3 +articl 3 +david 2 +sundaram 2 +stukel 2 +browbeck 2 +feet 2 +class 2 +take 2 +comput 2 +view 2 +specif 2 +philosoph 2 +scientif 2 +onlin 2 +nation 2 +astronomi 2 +channel 2 +homepag 1 +upon 1 +effronteri 1 +push 1 +hand 1 +sever 1 +patient 1 +femor 1 +arteri 1 +blood 1 +spurt 1 +blind 1 +anesthetist 1 +hall 1 +scream 1 +tri 1 +knee 1 +groin 1 +manag 1 +hamstr 1 +scalpel 1 +crawl 1 +floor 1 +stab 1 +leg 1 +voilet 1 +baboon 1 +assist 1 +woman 1 +ever 1 +care 1 +damn 1 +realli 1 +wig 1 +climb 1 +tabl 1 +pois 1 +jump 1 +stomp 1 +cop 1 +rush 1 +william 1 +burrough 1 +nake 1 +lunch 1 +construct 1 +catapult 1 +reader 1 +choos 1 +index 1 +brief 1 +relat 1 +dedic 1 +smart 1 +cloth 1 +also 1 +steve 1 +mann 1 +see 1 +wearabl 1 +camera 1 +inform 1 +artifici 1 +life 1 +santa 1 +institut 1 +project 1 +call 1 +tierra 1 +thoma 1 +recent 1 +dilbert 1 +strip 1 +technic 1 +math 1 +joke 1 +somewher 1 +artist 1 +natur 1 +physic 1 +conscious 1 +surviv 1 +research 1 +laboratori 1 +destruct 1 +show 1 +organ 1 +arcosanti 1 +arcolog 1 +outsid 1 +phoenix 1 +krishnamurti 1 +foundat 1 +tell 1 +centuri 1 +beat 1 +writer 1 +includ 1 +pictur 1 +fill 1 +wait 1 +sourc 1 +packer 1 +scientist 1 +regist 1 +harass 1 +mail 1 +reward 1 +dozen 1 +factoid 1 +astound 1 +friend 1 +american 1 +advantag 1 +hypertext 1 +addit 1 +provid 1 +select 1 +print 1 +edit 1 +publish 1 +weekli 1 +contain 1 +smaller 1 +hindu 1 +newspap 1 +india 1 +onion 1 +local 1 +depart 1 +washburn 1 +observatori 1 +public 1 +univers 1 +len 1 +insignific 1 +piec 1 +histori 1 +obtain 1 +follow 1 +webweath 1 +servic 1 +home 1 +late 1 +timothi 1 +leari 1 +numer 1 +written 1 +note 1 +optimist 1 +noam 1 +chomski 1 +disinform 1 +great 1 +list 1 +conspiraci 1 +theori 1 +buri 1 +within 1 +ultra 1 +trendi 1 +movi 1 +review 1 +back 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..3e1dbf53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,9 @@ +swander 2 +offic 2 +brian 1 +home 1 +pagebrian 1 +think 1 +hour 1 +bookmark 1 +mark 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..bcbe7057 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,107 @@ +system 4 +perform 4 +snowboard 4 +tamch 3 +oper 3 +talk 3 +paper 3 +ariel 2 +comput 2 +scienc 2 +fall 2 +parallel 2 +distribut 2 +differ 2 +municip 2 +bond 2 +share 2 +memori 2 +spring 2 +network 2 +file 2 +version 2 +tamchesari 1 +research 1 +assistantemail 1 +wisc 1 +depart 1 +west 1 +dayton 1 +street 1 +madison 1 +typic 1 +pose 1 +angri 1 +posei 1 +organ 1 +colleg 1 +park 1 +offic 1 +sresearch 1 +paradyn 1 +toolsstatu 1 +search 1 +thesi 1 +topic 1 +els 1 +interest 1 +toolsparallel 1 +systemsbluesth 1 +simpsonsseinfeldskiingskinetkeyston 1 +favorit 1 +area 1 +joke 1 +vacum 1 +cleaner 1 +dirt 1 +attach 1 +greet 1 +peopl 1 +whoa 1 +sorri 1 +dude 1 +eventu 1 +matur 1 +gener 1 +incom 1 +hate 1 +countri 1 +music 1 +fortran 1 +cool 1 +link 1 +yahooespncpu 1 +infoskinetoth 1 +stuff 1 +exokernel 1 +architectur 1 +applic 1 +level 1 +resourc 1 +manag 1 +octob 1 +techniqu 1 +tool 1 +improv 1 +callaghan 1 +supercomput 1 +interconnect 1 +april 1 +zebra 1 +stripe 1 +need 1 +structur 1 +raid 1 +block 1 +wait 1 +free 1 +highli 1 +concurr 1 +object 1 +asynchron 1 +multiprocessor 1 +postscript 1 +analysi 1 +risc 1 +instruct 1 +enhanc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..be81a353 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,102 @@ +pictur 5 +home 3 +page 3 +look 3 +show 3 +lampert 2 +good 2 +like 2 +babi 2 +someon 2 +tick 2 +turn 2 +click 2 +take 2 +work 2 +music 2 +song 2 +sound 2 +jeff 1 +pagejeff 1 +ricardo 1 +montalban 1 +voic 1 +welcom 1 +know 1 +nota 1 +least 1 +still 1 +foron 1 +incrimin 1 +doesn 1 +make 1 +aconvict 1 +high 1 +school 1 +yearbook 1 +lasttim 1 +never 1 +heard 1 +cute 1 +think 1 +well 1 +found 1 +coupl 1 +threaten 1 +intoa 1 +human 1 +dispens 1 +took 1 +henc 1 +befound 1 +separ 1 +anautograph 1 +copi 1 +sign 1 +name 1 +monitor 1 +pictureappear 1 +choos 1 +link 1 +weasel 1 +seek 1 +pace 1 +basic 1 +factswho 1 +person 1 +last 1 +night 1 +academ 1 +relatedwhat 1 +class 1 +dept 1 +resum 1 +entertainmentbook 1 +movi 1 +program 1 +newsgroup 1 +import 1 +subjectsfriendsno 1 +sick 1 +theme 1 +hobbi 1 +club 1 +organizationsgroup 1 +plu 1 +wish 1 +inmi 1 +favorit 1 +linksugh 1 +servo 1 +juli 1 +andrew 1 +fire 1 +crow 1 +mstk 1 +eclect 1 +paraphenaliai 1 +would 1 +miscellan 1 +straight 1 +forward 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..280f992f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,18 @@ +mathematicalprogram 2 +todd 1 +homepagetodd 1 +homepagein 1 +fall 1 +teach 1 +section 1 +sinc 1 +area 1 +mathemat 1 +program 1 +plug 1 +page 1 +contain 1 +wealth 1 +inform 1 +tmunson 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..efcb0523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,35 @@ +brian 1 +home 1 +pagebrian 1 +toonen 1 +comput 1 +scienc 1 +departmentunivers 1 +wisconsin 1 +dayton 1 +streetmadison 1 +offic 1 +cswhatev 1 +chief 1 +seattleth 1 +ground 1 +tipi 1 +medit 1 +life 1 +itsmean 1 +accept 1 +kinship 1 +creatur 1 +acknowledgingun 1 +univers 1 +thing 1 +infus 1 +thetru 1 +essenc 1 +civil 1 +luther 1 +stand 1 +bear 1 +oglala 1 +siouxlast 1 +modifi 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..78bec837 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,15 @@ +page 3 +home 2 +netscap 2 +thano 1 +tsioli 1 +site 1 +enhanc 1 +read 1 +shouldconsid 1 +upgrad 1 +browser 1 +latest 1 +version 1 +ifthat 1 +option 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..e7313847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,56 @@ +madison 4 +todd 3 +turnidg 3 +wisconsin 2 +axi 2 +ohio 2 +dougla 1 +turnidgeschoolcomput 1 +scienc 1 +departmentunivers 1 +dayton 1 +homemuppet 1 +babylon 1 +milton 1 +eyesightright 1 +left 1 +graduat 1 +student 1 +depart 1 +comput 1 +sciencesat 1 +univers 1 +year 1 +work 1 +professorthoma 1 +rep 1 +studyingprogram 1 +languag 1 +teach 1 +section 1 +hold 1 +mathematicsand 1 +computersci 1 +case 1 +western 1 +reserveunivers 1 +locat 1 +cleveland 1 +origin 1 +kent 1 +myfamili 1 +live 1 +judg 1 +compani 1 +keep 1 +click 1 +enough 1 +evid 1 +awai 1 +long 1 +time 1 +amus 1 +shortcut 1 +last 1 +modifi 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..f54f418a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,37 @@ +home 4 +page 4 +taxiao 3 +wang 3 +sinc 3 +madison 2 +offic 2 +phone 2 +visitor 2 +number 2 +welcom 1 +heavi 1 +construct 1 +click 1 +finger 1 +contact 1 +inform 1 +graduat 1 +student 1 +teach 1 +assist 1 +comput 1 +scienc 1 +depart 1 +univers 1 +wisconsin 1 +bldg 1 +dayton 1 +street 1 +mail 1 +twang 1 +wisc 1 +visit 1 +time 1 +last 1 +updat 1 +june 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..c6b2ee60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,12 @@ +shaft 2 +home 2 +trek 2 +meet 2 +pageuri 1 +pageemail 1 +wisc 1 +eduinterest 1 +diversionsstart 1 +microsoft 1 +start 1 +window 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..d53b6d60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,60 @@ +madison 4 +india 3 +asha 3 +ganti 2 +graduat 2 +student 2 +madra 2 +godav 2 +homepag 2 +databas 2 +real 2 +time 2 +venkatesh 1 +home 1 +pagevenkatesh 1 +vganti 1 +wisc 1 +studentoffic 1 +comput 1 +scienc 1 +depart 1 +dayton 1 +usaphon 1 +note 1 +page 1 +construct 1 +past 1 +present 1 +univers 1 +wisconsin 1 +fall 1 +earlier 1 +undergradu 1 +nativ 1 +kakinada 1 +andhra 1 +pradesh 1 +info 1 +basic 1 +educ 1 +click 1 +know 1 +hostel 1 +yearbook 1 +hope 1 +onlin 1 +sometim 1 +research 1 +interest 1 +work 1 +till 1 +btech 1 +project 1 +want 1 +look 1 +genesi 1 +group 1 +last 1 +updat 1 +januari 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..74fee9b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,62 @@ +multiscalar 7 +regist 6 +vijaykumar 5 +sohi 5 +commun 4 +vijai 3 +comput 3 +scienc 3 +madison 3 +compil 3 +architectur 3 +processor 3 +breach 3 +intern 3 +symposium 3 +wisc 2 +depart 2 +univers 2 +wisconsin 2 +project 2 +file 2 +annual 2 +microarchitectur 2 +micro 2 +home 1 +page 1 +profession 1 +affili 1 +contact 1 +address 1 +dayton 1 +street 1 +phone 1 +email 1 +advisor 1 +guri 1 +educ 1 +doctor 1 +august 1 +undergradu 1 +birla 1 +institut 1 +technolog 1 +pilani 1 +india 1 +research 1 +dissert 1 +distribut 1 +design 1 +anatomi 1 +strategi 1 +submit 1 +schedul 1 +architecturet 1 +go 1 +work 1 +memori 1 +data 1 +depend 1 +predict 1 +person 1 +side 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..ebc7e301 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,74 @@ +comput 6 +quantum 6 +watrou 5 +john 3 +scienc 3 +link 3 +bibliographi 3 +home 2 +theori 2 +inform 2 +page 2 +laboratori 2 +pagejohn 1 +wisc 1 +departmentunivers 1 +wisconsin 1 +madison 1 +dayton 1 +streetmadison 1 +telephon 1 +public 1 +dimension 1 +cellular 1 +automata 1 +proc 1 +symp 1 +foundat 1 +polynomi 1 +time 1 +algorithm 1 +artin 1 +whapl 1 +approxim 1 +theorem 1 +number 1 +fourth 1 +confer 1 +canadiannumb 1 +associ 1 +assort 1 +archiv 1 +stanford 1 +oxford 1 +particl 1 +beam 1 +physic 1 +ucla 1 +theoret 1 +montreal 1 +lanl 1 +preprint 1 +hypertext 1 +project 1 +hypatia 1 +gener 1 +refer 1 +element 1 +stylehypertext 1 +webster 1 +interfaceroget 1 +thesauru 1 +random 1 +parasol 1 +recordsplayst 1 +linksweath 1 +forecast 1 +madisonth 1 +isthmu 1 +daili 1 +pagemathemat 1 +quotat 1 +servermathematician 1 +biographiesgeek 1 +site 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..0620fe80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,108 @@ +entertain 5 +univers 5 +send 3 +date 3 +food 3 +affect 3 +home 2 +page 2 +recit 2 +plai 2 +ship 2 +world 2 +show 2 +movi 2 +weiru 1 +eiru 1 +email 1 +ppppleas 1 +find 1 +around 1 +sometim 1 +think 1 +english 1 +speaker 1 +commit 1 +asylum 1 +verbal 1 +insan 1 +languag 1 +peopl 1 +truck 1 +cargo 1 +havenos 1 +feet 1 +smell 1 +richard 1 +leder 1 +three 1 +possibl 1 +part 1 +least 1 +must 1 +beoffer 1 +customari 1 +begina 1 +seri 1 +great 1 +deal 1 +moder 1 +amountof 1 +merest 1 +suggest 1 +amount 1 +ofaffect 1 +increas 1 +reduc 1 +proportion 1 +longer 1 +call 1 +circumst 1 +omit 1 +miss 1 +manner 1 +guid 1 +excruciatingli 1 +correct 1 +behaviour 1 +peke 1 +friend 1 +physic 1 +depart 1 +alumni 1 +associ 1 +atmadison 1 +littl 1 +grei 1 +cell 1 +pictur 1 +game 1 +late 1 +david 1 +letterman 1 +studio 1 +wish 1 +postcard 1 +someon 1 +review 1 +favorit 1 +hockei 1 +player 1 +steve 1 +francai 1 +dictionnairefrancai 1 +anglai 1 +dictionnair 1 +softwar 1 +relatif 1 +lafrancophoni 1 +test 1 +degrammair 1 +francais 1 +french 1 +lesson 1 +weather 1 +forecast 1 +madison 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..f2c6154a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,82 @@ +comput 4 +depart 3 +scienc 3 +univers 3 +manag 3 +home 2 +page 2 +student 2 +peopl 2 +republ 2 +jose 2 +technolog 2 +system 2 +platform 2 +includ 2 +us 2 +softwar 2 +corpor 2 +memori 2 +welcom 1 +zhang 1 +first 1 +year 1 +graduat 1 +hometown 1 +shanghai 1 +china 1 +educ 1 +wisconsin 1 +madison 1 +state 1 +california 1 +tsinghua 1 +beij 1 +chinaemail 1 +weiz 1 +wisc 1 +eduwork 1 +experiencecontractor 1 +develop 1 +variou 1 +inform 1 +differ 1 +windowsnt 1 +solari 1 +tuxedo 1 +pathwai 1 +design 1 +tandem 1 +engin 1 +sherpa 1 +oper 1 +nasa 1 +am 1 +research 1 +center 1 +hobbiesma 1 +jiangbridg 1 +card 1 +game 1 +tabl 1 +tenni 1 +pingpong 1 +joggingth 1 +ultim 1 +challengesolv 1 +mine 1 +sweeper 1 +expert 1 +level 1 +puzzl 1 +within 1 +second 1 +without 1 +cheat 1 +quot 1 +dayth 1 +best 1 +ackowledgementthi 1 +written 1 +framework 1 +provid 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..b3733623 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,62 @@ +home 4 +page 4 +kent 3 +wenger 3 +madison 3 +scienc 2 +wisconsin 2 +project 2 +work 2 +data 2 +welcom 1 +note 1 +definit 1 +still 1 +construct 1 +preparedfor 1 +pothol 1 +need 1 +pictur 1 +scan 1 +wengerassoci 1 +researchercomput 1 +departmentunivers 1 +west 1 +dayton 1 +streetmadison 1 +telephon 1 +email 1 +wisc 1 +edufing 1 +workth 1 +main 1 +arecod 1 +cluster 1 +provid 1 +anddevis 1 +explor 1 +andvisu 1 +come 1 +good 1 +acronym 1 +importantpart 1 +wouldn 1 +agre 1 +visualizationproduc 1 +devis 1 +softwar 1 +peopl 1 +yanni 1 +ioannidi 1 +miron 1 +livnyraghu 1 +ramakrishnanmor 1 +inform 1 +univers 1 +dbm 1 +research 1 +groupuw 1 +comput 1 +pagewiscinfo 1 +personallinksimageslast 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..f10743d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,117 @@ +wisconsin 7 +project 5 +comput 4 +tempest 4 +wind 3 +tunnel 3 +share 3 +interfac 3 +hardwar 3 +memori 3 +slide 3 +page 2 +like 2 +level 2 +compil 2 +softwar 2 +implement 2 +paradyn 2 +architectur 2 +home 1 +projectmost 1 +futur 1 +massiv 1 +parallel 1 +built 1 +fromworkst 1 +node 1 +program 1 +high 1 +parallellanguag 1 +support 1 +address 1 +space 1 +whichprocess 1 +uniformli 1 +refer 1 +data 1 +seek 1 +develop 1 +consensu 1 +aboutth 1 +middl 1 +languag 1 +abovesystem 1 +first 1 +propos 1 +wascoop 1 +evolutionari 1 +extens 1 +toconvent 1 +recent 1 +havebeen 1 +work 1 +revolutionari 1 +call 1 +provid 1 +mechan 1 +allow 1 +programm 1 +andprogram 1 +librari 1 +messag 1 +pass 1 +transparentshar 1 +hybrid 1 +combin 1 +developingimplement 1 +think 1 +machin 1 +cluster 1 +ofworkst 1 +wisconsincow 1 +hypothet 1 +platform 1 +approach 1 +cowus 1 +snoop 1 +logic 1 +fpga 1 +sram 1 +collaboratingwith 1 +adapt 1 +perform 1 +tool 1 +overviewand 1 +annot 1 +bibliographi 1 +overview 1 +talk 1 +novemb 1 +pageor 1 +four 1 +complet 1 +technic 1 +paper 1 +contributor 1 +fund 1 +sourc 1 +origin 1 +name 1 +week 1 +articl 1 +relat 1 +group 1 +scienc 1 +departmentat 1 +univers 1 +world 1 +wide 1 +inform 1 +last 1 +updat 1 +juli 1 +mark 1 +hill 1 +markhil 1 +wisc 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..618ef628 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,85 @@ +first 3 +televis 3 +felix 2 +star 2 +trick 2 +make 2 +finger 2 +xuelin 1 +home 1 +page 1 +charact 1 +creat 1 +otto 1 +messmer 1 +base 1 +anim 1 +human 1 +person 1 +featur 1 +save 1 +whichwa 1 +shown 1 +famou 1 +rival 1 +chaplin 1 +keaton 1 +princ 1 +wale 1 +pick 1 +polo 1 +team 1 +mascot 1 +pictur 1 +accompani 1 +charl 1 +lindbergh 1 +across 1 +theatlant 1 +statu 1 +imag 1 +successfulli 1 +transmit 1 +develop 1 +seri 1 +somehow 1 +obtain 1 +magic 1 +didn 1 +oneev 1 +seem 1 +agre 1 +whether 1 +teeth 1 +whisker 1 +like 1 +spend 1 +time 1 +film 1 +program 1 +appear 1 +newspap 1 +comic 1 +strip 1 +advertis 1 +hundr 1 +product 1 +thing 1 +remov 1 +tail 1 +ear 1 +put 1 +back 1 +wish 1 +could 1 +give 1 +account 1 +hairbal 1 +keyboard 1 +keeper 1 +instead 1 +sui 1 +vritabl 1 +chat 1 +pass 1 +partout 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..12b73422 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,329 @@ +ioannidi 27 +proc 16 +system 13 +queri 13 +intern 13 +databas 12 +visual 10 +optim 8 +septemb 8 +confer 8 +complex 6 +histogram 6 +schema 6 +livni 6 +issu 5 +data 5 +ramakrishnan 5 +inform 5 +problem 4 +estim 4 +comput 4 +integr 4 +transact 4 +haber 4 +sigmod 4 +poosala 4 +vldbconfer 4 +manag 3 +scientif 3 +user 3 +interfac 3 +primarili 3 +altern 3 +parallel 3 +investig 3 +simul 3 +special 3 +schedul 3 +error 3 +size 3 +studi 3 +identifi 3 +tool 3 +decemb 3 +tod 3 +miller 3 +translat 3 +result 3 +august 3 +yanni 2 +heterogen 2 +research 2 +focus 2 +support 2 +futur 2 +sever 2 +number 2 +algorithm 2 +well 2 +valu 2 +queryoptim 2 +current 2 +solut 2 +also 2 +aris 2 +multimedia 2 +environ 2 +propag 2 +distribut 2 +disciplin 2 +gener 2 +need 2 +develop 2 +desktop 2 +scientist 2 +concentr 2 +metaphor 2 +object 2 +dynam 2 +base 2 +survei 2 +symposium 2 +march 2 +garofalaki 2 +ieee 2 +engin 2 +ofheterogen 2 +bridg 2 +theori 2 +practic 2 +join 2 +ondatabas 2 +bombai 2 +india 2 +balanc 2 +orient 2 +montreal 2 +canada 2 +workshop 2 +opossum 2 +tsatalo 2 +dublin 2 +ireland 2 +ioannidisyanni 1 +wisc 1 +eduresearch 1 +interestsdatabas 1 +andinform 1 +area 1 +scientificdata 1 +applic 1 +pose 1 +challeng 1 +toqueri 1 +ask 1 +significantli 1 +higher 1 +thanin 1 +tradit 1 +evalu 1 +much 1 +highera 1 +especi 1 +attempt 1 +tooptim 1 +time 1 +paramet 1 +parametr 1 +thu 1 +access 1 +plan 1 +process 1 +querywil 1 +extrem 1 +larg 1 +us 1 +algorithmsfor 1 +find 1 +optimum 1 +among 1 +inadequ 1 +random 1 +algorithmsa 1 +viabl 1 +interest 1 +anneal 1 +genet 1 +take 1 +advantag 1 +propertiesof 1 +look 1 +especiallythos 1 +cost 1 +alsopart 1 +try 1 +appropriateinform 1 +must 1 +maintain 1 +limit 1 +thepropag 1 +properti 1 +ofoptim 1 +approxim 1 +inrel 1 +attribut 1 +mode 1 +expect 1 +part 1 +manyexperi 1 +variou 1 +mani 1 +aspectsthat 1 +technolog 1 +readi 1 +provid 1 +involv 1 +experi 1 +managementenviron 1 +help 1 +throughout 1 +life 1 +cycl 1 +theirexperiment 1 +primari 1 +compon 1 +major 1 +work 1 +address 1 +andsemant 1 +former 1 +right 1 +arefor 1 +repres 1 +scientistsso 1 +natur 1 +power 1 +latter 1 +facilitatetransl 1 +differ 1 +format 1 +although 1 +experimentalscientif 1 +effort 1 +guid 1 +specificproject 1 +associ 1 +particular 1 +basedperform 1 +model 1 +plantgrowth 1 +spectroscopi 1 +sequenc 1 +microscop 1 +imag 1 +recent 1 +publicationsi 1 +issueon 1 +anniversari 1 +multimediasystem 1 +contain 1 +conjunct 1 +beyondrel 1 +set 1 +foundat 1 +forschema 1 +displai 1 +journal 1 +intellig 1 +juli 1 +tsangari 1 +design 1 +implement 1 +performanceevalu 1 +bermuda 1 +knowledg 1 +tkde 1 +februari 1 +januari 1 +christodoulaki 1 +limitingworst 1 +case 1 +winger 1 +transit 1 +closur 1 +algorithmsbas 1 +graph 1 +travers 1 +record 1 +divers 1 +databaseestim 1 +gupta 1 +ponnekanti 1 +experimentmanag 1 +vldb 1 +itsappl 1 +load 1 +anjur 1 +frog 1 +turtl 1 +bridgesbetween 1 +file 1 +conferenceon 1 +statist 1 +stockholm 1 +sweden 1 +june 1 +multi 1 +dimension 1 +resourc 1 +forparallel 1 +haa 1 +shekita 1 +improv 1 +forselect 1 +rang 1 +predic 1 +internationalacm 1 +layoutat 1 +multipl 1 +granular 1 +advancedvisu 1 +gubbio 1 +itali 1 +desk 1 +managementthrough 1 +customiz 1 +zurich 1 +switzerland 1 +practicalityfor 1 +sigmodconfer 1 +jose 1 +solomon 1 +gmap 1 +versatil 1 +forphys 1 +independ 1 +santiago 1 +chile 1 +unifi 1 +framework 1 +index 1 +databasesystem 1 +dexa 1 +athen 1 +greec 1 +lashkari 1 +incomplet 1 +path 1 +express 1 +theirdisambigu 1 +minneapoli 1 +flexibl 1 +schemavisu 1 +edit 1 +boston 1 +april 1 +edbt 1 +cambridg 1 +england 1 +univers 1 +serial 1 +internationalvldb 1 +capacityin 1 +wiener 1 +moos 1 +withdata 1 +program 1 +languag 1 +york 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..1f0ddd68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,3 @@ +kevin 1 +zhongbin 1 +homepag 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..92436a46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,76 @@ +home 4 +good 2 +person 2 +cours 2 +time 2 +madison 2 +last 2 +yinng 1 +pageindexofyinongwei 1 +spagehi 1 +welcom 1 +homepag 1 +pleas 1 +look 1 +info 1 +especi 1 +employ 1 +give 1 +alsolink 1 +classmat 1 +take 1 +telephon 1 +work 1 +address 1 +offic 1 +comp 1 +stat 1 +bldg 1 +univ 1 +inforesumehobbiestravel 1 +usathi 1 +collect 1 +pictur 1 +took 1 +travel 1 +articl 1 +wrote 1 +trip 1 +chicago 1 +seattl 1 +pointersr 1 +computingmacin 1 +learningpattern 1 +recognitioncomputatin 1 +geometrydatabasevisionacadem 1 +diarythi 1 +diari 1 +everi 1 +month 1 +sometim 1 +amaz 1 +mani 1 +littl 1 +read 1 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 1 +linksmi 1 +beida 1 +classmatespek 1 +univers 1 +alumni 1 +page 1 +oversea 1 +chines 1 +organ 1 +ciumi 1 +bookmarkcom 1 +press 1 +client 1 +support 1 +send 1 +comment 1 +visitor 1 +number 1 +access 1 +modifi 1 +yinong 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..5d4f7d6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,44 @@ +home 2 +madison 2 +wisc 2 +convuls 2 +sera 2 +food 2 +poor 2 +call 2 +matt 1 +pageuntil 1 +around 1 +updat 1 +basic 1 +inform 1 +offic 1 +matthew 1 +zeidenbergcent 1 +wisconsin 1 +strategi 1 +observatori 1 +drive 1 +room 1 +voic 1 +gilson 1 +email 1 +zeiden 1 +eduzeidenb 1 +eduwhen 1 +california 1 +parent 1 +hous 1 +coho 1 +huntington 1 +beach 1 +beauti 1 +breton 1 +nadja 1 +beaut 1 +give 1 +saint 1 +whyth 1 +communist 1 +helder 1 +camara 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..47cd3470 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,161 @@ +zhang 20 +tian 10 +data 10 +larg 8 +research 6 +databas 6 +raghu 5 +ramakrishnan 5 +miron 5 +livni 5 +mine 5 +cluster 5 +proc 5 +robot 5 +analysi 4 +dataset 4 +effici 4 +densiti 4 +birch 4 +method 4 +joint 3 +artifici 3 +intellig 3 +comput 3 +algorithm 3 +applic 3 +journal 3 +june 3 +ling 3 +home 2 +prof 2 +scienc 2 +wisconsin 2 +madison 2 +telephon 2 +us 2 +pattern 2 +interest 2 +design 2 +multi 2 +estim 2 +recent 2 +system 2 +sigmod 2 +canada 2 +technic 2 +motion 2 +plan 2 +topolog 2 +confer 2 +jianwei 2 +educ 2 +symposium 2 +beij 2 +page 1 +gener 1 +inform 1 +student 1 +assistantadvisor 1 +major 1 +concentr 1 +compilerminor 1 +financi 1 +invest 1 +bankingoffic 1 +room 1 +dept 1 +univ 1 +mail 1 +wisc 1 +eduoffic 1 +depart 1 +intereststher 1 +grow 1 +need 1 +exploratori 1 +discov 1 +territori 1 +develop 1 +purpos 1 +ortool 1 +integr 1 +techniqu 1 +statist 1 +thesi 1 +topic 1 +densityanalysi 1 +given 1 +dimension 1 +limit 1 +amount 1 +resourc 1 +run 1 +time 1 +memori 1 +implement 1 +accur 1 +identifi 1 +spars 1 +crowd 1 +region 1 +function 1 +overal 1 +distribut 1 +import 1 +practic 1 +branch 1 +appli 1 +mani 1 +domain 1 +dataclassif 1 +imag 1 +compress 1 +recognit 1 +project 1 +select 1 +public 1 +submit 1 +knowledg 1 +discoveri 1 +conf 1 +manag 1 +interact 1 +classif 1 +workshop 1 +issu 1 +knowledgediscoveri 1 +cooper 1 +fast 1 +probabl 1 +kernel 1 +report 1 +juli 1 +dimensionreduct 1 +ijcai 1 +findpath 1 +manipul 1 +finit 1 +divis 1 +configur 1 +space 1 +manufactur 1 +trend 1 +andmanufactur 1 +dimens 1 +reduct 1 +technolog 1 +find 1 +collis 1 +free 1 +path 1 +mobil 1 +young 1 +profession 1 +relev 1 +link 1 +document 1 +organ 1 +china 1 +last 1 +updat 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..d1710df5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,72 @@ +research 3 +data 3 +site 3 +server 3 +yihong 2 +zhao 2 +madison 2 +relat 2 +dbm 2 +mine 2 +financi 2 +pathfind 2 +daili 2 +new 2 +home 1 +page 1 +wisc 1 +assist 1 +depart 1 +comput 1 +sciencesunivers 1 +wisconsin 1 +west 1 +dayton 1 +streetmadison 1 +advis 1 +prof 1 +jeff 1 +naughton 1 +interest 1 +parallel 1 +object 1 +line 1 +analyt 1 +process 1 +olap 1 +benchmark 1 +educationb 1 +univers 1 +north 1 +carolina 1 +chapel 1 +hillm 1 +fall 1 +wiscosin 1 +group 1 +sigmod 1 +maryland 1 +datamin 1 +microstrategi 1 +rolap 1 +arbor 1 +molap 1 +stock 1 +lombard 1 +graph 1 +kiwi 1 +club 1 +todai 1 +monei 1 +chines 1 +taiwan 1 +search 1 +engin 1 +lyco 1 +excit 1 +yahoo 1 +surf 1 +ters 1 +detail 1 +comment 1 +pgmo 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..5af50083 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,19 @@ +home 3 +address 3 +madison 2 +phone 2 +offic 2 +wisc 2 +page 1 +still 1 +construct 1 +wang 1 +homepag 1 +offer 1 +inform 1 +johnson 1 +dayton 1 +street 1 +email 1 +zhewang 1 +student 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..3c1eae68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,102 @@ +comput 6 +perform 5 +languag 4 +area 3 +parallel 3 +system 3 +studi 3 +distribut 3 +zhichen 2 +scienc 2 +research 2 +confer 2 +interest 2 +program 2 +recent 2 +techniqu 2 +environ 2 +high 2 +softwar 2 +univers 2 +fudan 2 +compil 2 +version 2 +home 1 +page 1 +depart 1 +dayton 1 +madison 1 +offic 1 +phone 1 +assist 1 +advisor 1 +professor 1 +jame 1 +larusprofessor 1 +barton 1 +millerawardbest 1 +paper 1 +award 1 +intern 1 +supercomput 1 +press 1 +juli 1 +issu 1 +anddistribut 1 +detect 1 +eliminateperform 1 +bottleneck 1 +share 1 +memori 1 +combin 1 +paradyn 1 +toolwith 1 +blizzard 1 +wisconsinwind 1 +tunnel 1 +think 1 +machin 1 +andth 1 +cluster 1 +workstat 1 +public 1 +field 1 +interestprogram 1 +tool 1 +network 1 +oper 1 +architectur 1 +evalu 1 +benchmark 1 +place 1 +work 1 +laboratori 1 +texa 1 +antonio 1 +publish 1 +ofparallel 1 +predict 1 +model 1 +simul 1 +departmentat 1 +particip 1 +sever 1 +nation 1 +project 1 +china 1 +develop 1 +levelprogram 1 +object 1 +orient 1 +technolog 1 +andimcrement 1 +click 1 +postscript 1 +html 1 +link 1 +asplo 1 +programjourn 1 +researchchines 1 +novel 1 +friend 1 +java 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..0589f625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,25 @@ +zhang 2 +univers 2 +wisconsin 2 +madison 2 +home 1 +page 1 +hello 1 +name 1 +pictur 1 +taken 1 +invit 1 +supper 1 +theth 1 +restaur 1 +tsinghua 1 +chen 1 +weihai 1 +wang 1 +tong 1 +depart 1 +comput 1 +scienc 1 +west 1 +dayton 1 +street 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..681f51bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,13 @@ +inform 2 +krzysztof 1 +zmudzinskikrzysztof 1 +zmudzinskispin 1 +student 1 +inc 1 +pictur 1 +poland 1 +pole 1 +thank 1 +stop 1 +visitor 1 +number 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..12d2bcf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,131 @@ +test 9 +design 6 +circuit 4 +built 4 +self 4 +perform 4 +engin 3 +comput 3 +research 3 +testabl 3 +vlsi 3 +gener 3 +area 3 +system 3 +investig 3 +saluja 2 +kewal 2 +colleg 2 +mail 2 +engr 2 +wisc 2 +data 2 +compress 2 +fault 2 +digit 2 +make 2 +littl 2 +oper 2 +laboratori 2 +engineeringunivers 1 +wisconsin 1 +madison 1 +salujaprofessor 1 +hall 1 +drivemadison 1 +eduportrait 1 +jpgdepartmentselectr 1 +engineeringcomput 1 +scienc 1 +educ 1 +univers 1 +iowa 1 +interestsdesign 1 +architectur 1 +integr 1 +toler 1 +interest 1 +testableand 1 +reliabl 1 +carri 1 +thisarea 1 +extens 1 +analysi 1 +tool 1 +theresearch 1 +involv 1 +model 1 +testgener 1 +modif 1 +enhanc 1 +inself 1 +fabric 1 +applic 1 +techniqu 1 +andfault 1 +simul 1 +process 1 +effici 1 +combin 1 +andsequenti 1 +compact 1 +methodsapplic 1 +testenviron 1 +concentr 1 +regularstructur 1 +programm 1 +logic 1 +arrai 1 +ram 1 +areinvestig 1 +algorithm 1 +implement 1 +inhardwar 1 +penalti 1 +anoth 1 +projectw 1 +wai 1 +hardwar 1 +asystem 1 +normal 1 +goal 1 +thatth 1 +continu 1 +noimpact 1 +much 1 +work 1 +us 1 +facil 1 +digitalsystem 1 +hous 1 +number 1 +station 1 +withcolor 1 +monitor 1 +termin 1 +program 1 +dept 1 +center 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +fridai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +support 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..54e4088c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,135 @@ +system 14 +control 8 +duffi 7 +manufactur 7 +comput 6 +wisconsin 5 +autom 5 +engin 4 +research 4 +center 4 +engr 3 +univers 3 +develop 3 +perform 3 +neil 2 +colleg 2 +mail 2 +wisc 2 +integr 2 +space 2 +sensor 2 +product 2 +machin 2 +robot 2 +distribut 2 +experiment 2 +feedback 2 +human 2 +telerobot 2 +work 2 +engineeringunivers 1 +madison 1 +professor 1 +build 1 +drivemadison 1 +eduportrait 1 +departmentsmechan 1 +engineeringeducationb 1 +madisonm 1 +madisonphd 1 +madisonresearch 1 +interestsrobot 1 +precis 1 +micromechanismscent 1 +consortiamanufactur 1 +programwisconsin 1 +roboticsprofessor 1 +involv 1 +actuat 1 +data 1 +base 1 +advanc 1 +self 1 +guid 1 +inspect 1 +weld 1 +high 1 +materi 1 +handl 1 +finish 1 +mold 1 +rework 1 +studi 1 +highli 1 +hierarch 1 +architectur 1 +hope 1 +reduc 1 +cost 1 +complex 1 +larg 1 +scale 1 +increas 1 +flexibl 1 +fault 1 +toler 1 +construct 1 +sever 1 +incorpor 1 +real 1 +time 1 +fulli 1 +schedul 1 +optim 1 +theori 1 +explain 1 +properti 1 +associ 1 +director 1 +nasa 1 +fund 1 +emphas 1 +agricultur 1 +tactil 1 +oper 1 +method 1 +evalu 1 +well 1 +factor 1 +sensori 1 +fatigu 1 +test 1 +carri 1 +close 1 +aerospac 1 +industri 1 +teach 1 +cours 1 +automat 1 +author 1 +process 1 +dept 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +tuesdai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +support 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..16e33e1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,203 @@ +model 14 +mechan 10 +engin 10 +design 10 +comput 9 +physic 8 +geometr 6 +system 4 +represent 4 +behavior 4 +research 4 +part 4 +shapiro 3 +wisc 3 +aid 3 +manufactur 3 +analysi 3 +specif 3 +vadim 2 +colleg 2 +professor 2 +univers 2 +mail 2 +engr 2 +cornel 2 +geometri 2 +solid 2 +simul 2 +award 2 +chain 2 +april 2 +function 2 +center 2 +artifact 2 +algorithm 2 +us 2 +current 2 +formal 2 +investig 2 +support 2 +form 2 +process 2 +combinatori 2 +competit 2 +systemat 2 +develop 2 +engineeringunivers 1 +wisconsin 1 +madison 1 +assist 1 +avenuemadison 1 +vshapiro 1 +eduportrait 1 +jpgurl 1 +http 1 +departmentscomput 1 +sciencemechan 1 +engineeringeducationba 1 +york 1 +universitym 1 +california 1 +angelesm 1 +universityphd 1 +univeristyresearch 1 +interestscomput 1 +appli 1 +product 1 +automationcent 1 +consortiamathemat 1 +graduat 1 +programmanufactur 1 +programspati 1 +autom 1 +laboratoryselect 1 +honorsn 1 +scienc 1 +foundat 1 +career 1 +gener 1 +motor 1 +fellow 1 +select 1 +public 1 +mainten 1 +space 1 +decomposit 1 +intern 1 +journal 1 +applic 1 +palmer 1 +real 1 +rigid 1 +separ 1 +boundari 1 +convers 1 +transact 1 +graphic 1 +januari 1 +vossler 1 +interest 1 +relationship 1 +betweengeometri 1 +phenomena 1 +bemodel 1 +repres 1 +analyz 1 +manipul 1 +manufacturedbas 1 +ongo 1 +project 1 +includ 1 +abil 1 +creat 1 +convert 1 +maintain 1 +consist 1 +ofdistinct 1 +major 1 +technologicalbarri 1 +undermin 1 +reliabl 1 +commercialgeometr 1 +effort 1 +focu 1 +eliminatingambigu 1 +commun 1 +ofparametr 1 +famili 1 +novel 1 +methodsand 1 +techniqu 1 +todai 1 +fabric 1 +cannot 1 +bedescrib 1 +term 1 +discret 1 +simpl 1 +interactingprimit 1 +appar 1 +lack 1 +structur 1 +amajor 1 +roadblock 1 +collabor 1 +industri 1 +present 1 +deal 1 +withtheoret 1 +practic 1 +aspect 1 +designand 1 +seek 1 +establish 1 +basi 1 +make 1 +andmanufactur 1 +smoothintegr 1 +activ 1 +contain 1 +inform 1 +need 1 +captur 1 +thedesir 1 +tomanufactur 1 +recent 1 +studi 1 +algebra 1 +topolog 1 +call 1 +suggest 1 +possibl 1 +tounifi 1 +thu 1 +facilit 1 +ofnew 1 +tool 1 +theseand 1 +languagesand 1 +physicalobject 1 +dept 1 +consortia 1 +servic 1 +fountain 1 +index 1 +search 1 +credit 1 +help 1 +last 1 +modifi 1 +thursdai 1 +cdtthi 1 +page 1 +best 1 +view 1 +browser 1 +tabl 1 +photograph 1 +address 1 +comment 1 +webmast 1 +eduupd 1 +profil 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..5b6a3dad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,107 @@ +prover 6 +theorem 4 +prove 4 +incomplet 4 +list 4 +mathemat 3 +tech 3 +autom 2 +groupth 2 +comput 2 +scienc 2 +method 2 +first 2 +order 2 +logic 2 +report 2 +student 2 +woodi 2 +bledso 2 +hine 2 +proof 2 +hein 2 +borel 2 +groupautom 1 +group 1 +part 1 +depart 1 +univers 1 +texa 1 +ataustin 1 +produc 1 +system 1 +intend 1 +higher 1 +intent 1 +appli 1 +systemsand 1 +problem 1 +primarili 1 +also 1 +computersci 1 +technolog 1 +herei 1 +index 1 +electron 1 +avail 1 +site 1 +seri 1 +continu 1 +current 1 +techreport 1 +ad 1 +reportseri 1 +present 1 +grouplarri 1 +hinesmarti 1 +mayberrybenjamin 1 +shultsalumniprevi 1 +previou 1 +robert 1 +boyer 1 +other 1 +relat 1 +late 1 +faculti 1 +profil 1 +robertboyerj 1 +strother 1 +moorethi 1 +past 1 +visitor 1 +collaboratorswhat 1 +done 1 +implyth 1 +natur 1 +deduct 1 +proverstrivelarri 1 +inequ 1 +struvelarri 1 +theori 1 +chou 1 +geometri 1 +proverand 1 +variou 1 +improv 1 +theretoinclud 1 +mcphee 1 +feng 1 +theoryimplement 1 +descript 1 +theoremprecondit 1 +proverbledso 1 +analog 1 +theoremnqthmboy 1 +andmoor 1 +develop 1 +clinc 1 +iprshult 1 +knowledg 1 +us 1 +relatedlinksdo 1 +feedback 1 +want 1 +inform 1 +contact 1 +benjamin 1 +shult 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..ebf68804 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,5 @@ +document 2 +moveddocu 1 +movedthi 1 +perman 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..ebf68804 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tf/tf_raw/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,5 @@ +document 2 +moveddocu 1 +movedthi 1 +perman 1 +move 1 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..93d53008 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +structur 0 106 2.197225 0.000000 105 +solut 1 82 2.484907 2.484907 162 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +upson 0 71 2.639057 0.000000 218 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +new 0 64 2.772589 0.000000 262 +unix 0 58 2.890372 0.000000 308 +fridai 0 44 3.135494 0.000000 390 +static 0 27 3.637586 0.000000 619 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +prelim 0 12 4.465908 0.000000 1201 +huang 0 12 4.465908 0.000000 1202 +systemsc 0 11 4.553877 0.000000 1293 +cheng 0 10 4.653960 0.000000 1381 +lili 0 5 5.347108 0.000000 2240 +filesystem 0 4 5.568345 0.000000 2587 +groupcours 0 3 5.857933 0.000000 3092 +ychuang 0 3 5.857933 0.000000 3093 +budiu 0 2 6.263398 0.000000 4042 +systemkenneth 0 2 6.263398 0.000000 4043 +birmanc 0 2 6.263398 0.000000 4044 +syllabuslectur 0 2 6.263398 0.000000 4045 +taslili 0 2 6.263398 0.000000 4046 +mihai 0 2 6.263398 0.000000 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..0357c7e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +oper 1 180 1.609438 1.609438 34 +group 0 183 1.609438 0.000000 36 +schedul 0 119 2.079442 0.000000 85 +specif 0 106 2.197225 0.000000 106 +question 0 91 2.397895 0.000000 141 +chang 0 82 2.484907 0.000000 163 +exampl 0 77 2.564949 0.000000 195 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +principl 0 48 3.044522 0.000000 357 +answer 0 45 3.135494 0.000000 391 +tutori 0 39 3.258097 0.000000 437 +hand 0 37 3.332205 0.000000 475 +symbol 0 27 3.637586 0.000000 620 +displai 0 23 3.806662 0.000000 712 +chip 0 21 3.912023 0.000000 770 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +configur 0 15 4.248495 0.000000 1012 +systemsc 0 11 4.553877 0.000000 1293 +correspond 0 10 4.653960 0.000000 1382 +phase 0 6 5.164786 0.000000 1977 +hoca 0 5 5.347108 0.000000 2241 +lorenzo 0 4 5.568345 0.000000 2588 +penn 0 3 5.857933 0.000000 3094 +alvisi 0 3 5.857933 0.000000 3095 +consol 0 2 6.263398 0.000000 4048 +systemsselect 0 2 6.263398 0.000000 4049 +postcript 0 2 6.263398 0.000000 4050 +postcriptdocu 0 1 6.957497 0.000000 6248 +hocacours 0 1 6.957497 0.000000 6249 +broccoli 0 1 6.957497 0.000000 6250 +fileth 0 1 6.957497 0.000000 6251 +systemth 0 1 6.957497 0.000000 6252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..e6bddbc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,241 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +databas 0 122 2.079442 0.000000 86 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +control 0 82 2.484907 0.000000 164 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +exam 0 86 2.484907 0.000000 169 +learn 0 86 2.484907 0.000000 170 +homework 1 79 2.564949 2.564949 193 +april 0 77 2.564949 0.000000 196 +optim 0 79 2.564949 0.000000 197 +know 0 80 2.564949 0.000000 198 +exampl 0 77 2.564949 0.000000 195 +want 0 79 2.564949 0.000000 199 +tuesdai 0 73 2.639057 0.000000 219 +name 0 72 2.639057 0.000000 220 +upson 0 71 2.639057 0.000000 218 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +integr 0 67 2.708050 0.000000 245 +januari 0 62 2.772589 0.000000 264 +organ 0 65 2.772589 0.000000 265 +evalu 0 64 2.772589 0.000000 266 +wednesdai 0 64 2.772589 0.000000 261 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +type 0 61 2.833213 0.000000 296 +back 0 60 2.833213 0.000000 297 +index 0 56 2.890372 0.000000 309 +space 0 57 2.890372 0.000000 310 +summer 0 56 2.890372 0.000000 311 +semest 0 58 2.890372 0.000000 312 +februari 0 54 2.944439 0.000000 328 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +date 0 51 2.995732 0.000000 344 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +midterm 0 45 3.135494 0.000000 392 +even 0 45 3.135494 0.000000 393 +algebra 0 45 3.135494 0.000000 394 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +transact 0 39 3.258097 0.000000 438 +late 0 40 3.258097 0.000000 439 +submit 0 39 3.258097 0.000000 440 +credit 0 38 3.295837 0.000000 460 +concurr 0 34 3.401197 0.000000 501 +return 0 34 3.401197 0.000000 502 +queri 0 33 3.433987 0.000000 524 +within 0 33 3.433987 0.000000 525 +chapter 0 32 3.465736 0.000000 536 +concept 0 32 3.465736 0.000000 537 +computersci 0 30 3.555348 0.000000 562 +hard 0 30 3.555348 0.000000 563 +domain 0 30 3.555348 0.000000 564 +retriev 0 27 3.637586 0.000000 621 +though 0 27 3.637586 0.000000 622 +request 0 26 3.688879 0.000000 635 +constraint 0 26 3.688879 0.000000 636 +relev 0 26 3.688879 0.000000 637 +fundament 0 25 3.737670 0.000000 661 +deal 0 22 3.850148 0.000000 736 +recommend 0 22 3.850148 0.000000 737 +sort 0 22 3.850148 0.000000 738 +tent 0 22 3.850148 0.000000 739 +similar 0 21 3.912023 0.000000 771 +reserv 0 20 3.951244 0.000000 808 +nice 0 20 3.951244 0.000000 809 +minut 0 20 3.951244 0.000000 810 +entir 0 20 3.951244 0.000000 811 +break 0 20 3.951244 0.000000 812 +five 0 19 4.007333 0.000000 841 +along 0 18 4.060443 0.000000 878 +accept 0 18 4.060443 0.000000 879 +encourag 0 18 4.060443 0.000000 880 +vector 0 16 4.174387 0.000000 961 +permit 0 16 4.174387 0.000000 962 +alreadi 0 16 4.174387 0.000000 963 +pagec 0 15 4.248495 0.000000 1011 +indic 0 15 4.248495 0.000000 1013 +rank 0 14 4.317488 0.000000 1063 +latex 0 14 4.317488 0.000000 1064 +calculu 0 12 4.465908 0.000000 1203 +prelim 0 12 4.465908 0.000000 1201 +weight 0 12 4.465908 0.000000 1204 +worth 0 11 4.553877 0.000000 1294 +summar 0 11 4.553877 0.000000 1295 +benjamin 0 11 4.553877 0.000000 1296 +bill 0 11 4.553877 0.000000 1297 +relationship 0 10 4.653960 0.000000 1383 +forc 0 10 4.653960 0.000000 1384 +recoveri 0 9 4.753590 0.000000 1474 +crash 0 8 4.875197 0.000000 1616 +databasesystem 0 8 4.875197 0.000000 1617 +hash 0 8 4.875197 0.000000 1618 +cum 0 8 4.875197 0.000000 1619 +attach 0 7 5.010635 0.000000 1785 +silberschatz 0 6 5.164786 0.000000 1978 +textual 0 6 5.164786 0.000000 1979 +alphabet 0 6 5.164786 0.000000 1980 +phrase 0 5 5.347108 0.000000 2242 +ross 0 5 5.347108 0.000000 2243 +tupl 0 5 5.347108 0.000000 2244 +marco 0 4 5.568345 0.000000 2589 +clearli 0 4 5.568345 0.000000 2590 +entiti 0 3 5.857933 0.000000 3096 +roughli 0 3 5.857933 0.000000 3097 +singhal 0 3 5.857933 0.000000 3098 +sendmail 0 3 5.857933 0.000000 3099 +korth 0 2 6.263398 0.000000 4051 +aguilera 0 2 6.263398 0.000000 4052 +amith 0 2 6.263398 0.000000 4053 +thegroup 0 2 6.263398 0.000000 4054 +universityspr 0 2 6.263398 0.000000 4055 +introductionthi 0 2 6.263398 0.000000 4056 +queryoptim 0 2 6.263398 0.000000 4057 +prerequisitesc 0 2 6.263398 0.000000 4058 +elmasri 0 2 6.263398 0.000000 4059 +salton 0 2 6.263398 0.000000 4060 +amitsingh 0 2 6.263398 0.000000 4061 +yamasani 0 2 6.263398 0.000000 4062 +ofyour 0 2 6.263398 0.000000 4063 +ofcours 0 2 6.263398 0.000000 4064 +throughth 0 2 6.263398 0.000000 4065 +iti 0 2 6.263398 0.000000 4066 +dole 0 2 6.263398 0.000000 4067 +schedulethi 0 2 6.263398 0.000000 4068 +availablethursdai 0 1 6.957497 0.000000 6253 +duetuesdai 0 1 6.957497 0.000000 6254 +regrad 0 1 6.957497 0.000000 6255 +retrievalthursdai 0 1 6.957497 0.000000 6256 +retrievaldepart 0 1 6.957497 0.000000 6257 +gradeshav 0 1 6.957497 0.000000 6258 +twothird 0 1 6.957497 0.000000 6259 +systemsinclud 0 1 6.957497 0.000000 6260 +transactionprocess 0 1 6.957497 0.000000 6261 +usefulinform 0 1 6.957497 0.000000 6262 +willcov 0 1 6.957497 0.000000 6263 +invert 0 1 6.957497 0.000000 6264 +smartsystem 0 1 6.957497 0.000000 6265 +relevancefeedback 0 1 6.957497 0.000000 6266 +thesaurusconstruct 0 1 6.957497 0.000000 6267 +automatictext 0 1 6.957497 0.000000 6268 +placetuesdai 0 1 6.957497 0.000000 6269 +thurston 0 1 6.957497 0.000000 6270 +booksdatabas 0 1 6.957497 0.000000 6271 +mcgrawhil 0 1 6.957497 0.000000 6272 +andnavath 0 1 6.957497 0.000000 6273 +byullman 0 1 6.957497 0.000000 6274 +photocopiedmateri 0 1 6.957497 0.000000 6275 +sophia 0 1 6.957497 0.000000 6276 +georgiakaki 0 1 6.957497 0.000000 6277 +officehour 0 1 6.957497 0.000000 6278 +gradingexam 0 1 6.957497 0.000000 6279 +yourfin 0 1 6.957497 0.000000 6280 +policiesy 0 1 6.957497 0.000000 6281 +samegrad 0 1 6.957497 0.000000 6282 +tuesdayand 0 1 6.957497 0.000000 6283 +illeg 0 1 6.957497 0.000000 6284 +latexif 0 1 6.957497 0.000000 6285 +goodopportun 0 1 6.957497 0.000000 6286 +submissionpleas 0 1 6.957497 0.000000 6287 +clinton 0 1 6.957497 0.000000 6288 +perot 0 1 6.957497 0.000000 6289 +homeworksgrad 0 1 6.957497 0.000000 6290 +sortedalphabet 0 1 6.957497 0.000000 6291 +thecov 0 1 6.957497 0.000000 6292 +pagefollow 0 1 6.957497 0.000000 6293 +policyal 0 1 6.957497 0.000000 6294 +inwrit 0 1 6.957497 0.000000 6295 +referto 0 1 6.957497 0.000000 6296 +modelhomework 0 1 6.957497 0.000000 6297 +weightingthursdai 0 1 6.957497 0.000000 6298 +indexinghomework 0 1 6.957497 0.000000 6299 +evaluationtuesdai 0 1 6.957497 0.000000 6300 +feedbackthursdai 0 1 6.957497 0.000000 6301 +clusteringhomework 0 1 6.957497 0.000000 6302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..76f92d22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +hour 0 165 1.791759 0.000000 46 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +come 0 78 2.564949 0.000000 202 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +new 0 64 2.772589 0.000000 262 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +electron 0 47 3.091042 0.000000 379 +submit 0 39 3.258097 0.000000 440 +respons 0 37 3.332205 0.000000 476 +procedur 0 36 3.367296 0.000000 488 +print 0 34 3.401197 0.000000 503 +quot 0 29 3.583519 0.000000 582 +ask 0 28 3.610918 0.000000 597 +wai 0 25 3.737670 0.000000 662 +leav 0 21 3.912023 0.000000 772 +break 0 20 3.951244 0.000000 812 +nice 0 20 3.951244 0.000000 809 +modif 0 17 4.110874 0.000000 913 +mayb 0 15 4.248495 0.000000 1014 +convert 0 13 4.382027 0.000000 1122 +social 0 13 4.382027 0.000000 1123 +misc 0 13 4.382027 0.000000 1124 +submiss 0 11 4.553877 0.000000 1298 +recit 0 9 4.753590 0.000000 1475 +admin 0 9 4.753590 0.000000 1476 +joke 0 8 4.875197 0.000000 1620 +printer 0 8 4.875197 0.000000 1621 +ethic 0 7 5.010635 0.000000 1786 +header 0 7 5.010635 0.000000 1787 +pfile 0 3 5.857933 0.000000 3100 +sumedh 0 3 5.857933 0.000000 3101 +enscript 0 2 6.263398 0.000000 4069 +incl 0 2 6.263398 0.000000 4070 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..b6bf01f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +present 0 91 2.397895 0.000000 145 +comment 0 93 2.397895 0.000000 146 +practic 0 70 2.708050 0.000000 246 +complex 0 64 2.772589 0.000000 269 +interact 0 62 2.772589 0.000000 270 +descript 0 64 2.772589 0.000000 271 +plan 0 65 2.772589 0.000000 272 +simpl 0 60 2.833213 0.000000 298 +semest 0 58 2.890372 0.000000 312 +instruct 0 53 2.944439 0.000000 332 +tabl 0 51 2.995732 0.000000 346 +basic 0 50 3.044522 0.000000 360 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +rang 0 30 3.555348 0.000000 565 +depend 0 29 3.583519 0.000000 583 +progress 0 28 3.610918 0.000000 598 +aspect 0 25 3.737670 0.000000 663 +size 0 23 3.806662 0.000000 713 +varieti 0 22 3.850148 0.000000 740 +practicum 0 16 4.174387 0.000000 960 +choos 0 16 4.174387 0.000000 964 +signific 0 13 4.382027 0.000000 1125 +werner 0 10 4.653960 0.000000 1385 +vogel 0 8 4.875197 0.000000 1622 +earn 0 7 5.010635 0.000000 1788 +theywil 0 3 5.857933 0.000000 3102 +contentspag 0 3 5.857933 0.000000 3103 +offersa 0 2 6.263398 0.000000 4071 +systemsor 0 1 6.957497 0.000000 6303 +dirti 0 1 6.957497 0.000000 6304 +internetworkingto 0 1 6.957497 0.000000 6305 +teamsof 0 1 6.957497 0.000000 6306 +trough 0 1 6.957497 0.000000 6307 +complexityof 0 1 6.957497 0.000000 6308 +offcial 0 1 6.957497 0.000000 6309 +pageslink 0 1 6.957497 0.000000 6310 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..4a893cdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +avail 1 169 1.791759 1.791759 48 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +problem 1 147 1.945910 1.945910 75 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +part 0 98 2.302585 0.000000 129 +need 0 98 2.302585 0.000000 135 +graphic 0 90 2.397895 0.000000 147 +follow 0 92 2.397895 0.000000 143 +homepag 0 93 2.397895 0.000000 148 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +solut 1 82 2.484907 2.484907 162 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +start 0 83 2.484907 0.000000 173 +refer 0 78 2.564949 0.000000 203 +homework 0 79 2.564949 0.000000 193 +server 0 76 2.564949 0.000000 204 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +semest 0 58 2.890372 0.000000 312 +get 0 46 3.091042 0.000000 380 +announc 0 40 3.258097 0.000000 441 +electr 0 38 3.295837 0.000000 461 +manual 0 35 3.401197 0.000000 504 +intend 0 28 3.610918 0.000000 599 +outlin 0 17 4.110874 0.000000 914 +intro 0 17 4.110874 0.000000 915 +anyon 0 17 4.110874 0.000000 916 +conduct 0 14 4.317488 0.000000 1065 +train 0 14 4.317488 0.000000 1066 +valid 0 11 4.553877 0.000000 1299 +literatur 0 11 4.553877 0.000000 1300 +surf 0 11 4.553877 0.000000 1301 +andcomput 0 8 4.875197 0.000000 1623 +architect 0 8 4.875197 0.000000 1624 +theproject 0 6 5.164786 0.000000 1981 +gopher 0 6 5.164786 0.000000 1982 +mentor 0 4 5.568345 0.000000 2591 +sole 0 4 5.568345 0.000000 2592 +addition 0 4 5.568345 0.000000 2593 +saluja 0 3 5.857933 0.000000 3104 +duedat 0 3 5.857933 0.000000 3105 +wiscinfo 0 3 5.857933 0.000000 3106 +kewal 0 2 6.263398 0.000000 4072 +studentsenrol 0 2 6.263398 0.000000 4073 +sorin 0 1 6.957497 0.000000 6311 +generalinform 0 1 6.957497 0.000000 6312 +midtermsyllabu 0 1 6.957497 0.000000 6313 +midtermi 0 1 6.957497 0.000000 6314 +caeworkst 0 1 6.957497 0.000000 6315 +whomai 0 1 6.957497 0.000000 6316 +throughbold_brows 0 1 6.957497 0.000000 6317 +gettingstart 0 1 6.957497 0.000000 6318 +workbook 0 1 6.957497 0.000000 6319 +quicksim 0 1 6.957497 0.000000 6320 +trainingworkbook 0 1 6.957497 0.000000 6321 +exersis 0 1 6.957497 0.000000 6322 +thesedocu 0 1 6.957497 0.000000 6323 +uwengin 0 1 6.957497 0.000000 6324 +pmcst 0 1 6.957497 0.000000 6325 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..da4ea9e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +pleas 0 113 2.197225 0.000000 114 +grade 0 90 2.397895 0.000000 142 +homepag 0 93 2.397895 0.000000 148 +section 0 94 2.397895 0.000000 149 +info 1 85 2.484907 2.484907 176 +exam 0 86 2.484907 0.000000 169 +stuff 0 87 2.484907 0.000000 171 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +homework 0 79 2.564949 0.000000 193 +refer 0 78 2.564949 0.000000 203 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +run 0 51 2.995732 0.000000 347 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +abl 0 30 3.555348 0.000000 566 +frame 0 24 3.761200 0.000000 684 +handl 0 24 3.761200 0.000000 685 +individu 0 13 4.382027 0.000000 1126 +criteria 0 9 4.753590 0.000000 1477 +preced 0 3 5.857933 0.000000 3107 +goofi 0 2 6.263398 0.000000 4074 +herelink 0 1 6.957497 0.000000 6326 +motw 0 1 6.957497 0.000000 6327 +stuffnot 0 1 6.957497 0.000000 6328 +edupag 0 1 6.957497 0.000000 6329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..747267bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +info 0 85 2.484907 0.000000 176 +materi 1 75 2.639057 2.639057 221 +window 0 68 2.708050 0.000000 242 +java 0 70 2.708050 0.000000 248 +browser 0 56 2.890372 0.000000 313 +netscap 0 44 3.135494 0.000000 395 +directori 0 45 3.135494 0.000000 396 +announc 0 40 3.258097 0.000000 441 +request 0 26 3.688879 0.000000 635 +interpret 1 24 3.761200 3.761200 686 +thu 0 21 3.912023 0.000000 773 +util 0 21 3.912023 0.000000 774 +behavior 0 18 4.060443 0.000000 881 +attempt 0 17 4.110874 0.000000 917 +pagec 0 15 4.248495 0.000000 1011 +correctli 0 9 4.753590 0.000000 1478 +dylan 0 8 4.875197 0.000000 1625 +password 0 4 5.568345 0.000000 2594 +parter 0 2 6.263398 0.000000 4075 +noodll 0 1 6.957497 0.000000 6330 +inconsist 0 1 6.957497 0.000000 6331 +partnerjoin 0 1 6.957497 0.000000 6332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..3d403f14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,360 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +site 0 106 2.197225 0.000000 119 +topic 0 114 2.197225 0.000000 110 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +mathemat 0 108 2.197225 0.000000 123 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +structur 0 106 2.197225 0.000000 105 +techniqu 0 99 2.302585 0.000000 138 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +peopl 0 96 2.302585 0.000000 132 +memori 0 101 2.302585 0.000000 139 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +exam 0 86 2.484907 0.000000 169 +help 0 83 2.484907 0.000000 175 +info 0 85 2.484907 0.000000 176 +contain 0 81 2.484907 0.000000 174 +chang 0 82 2.484907 0.000000 163 +learn 0 86 2.484907 0.000000 170 +solut 0 82 2.484907 0.000000 162 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +mondai 0 77 2.564949 0.000000 206 +exampl 0 77 2.564949 0.000000 195 +state 0 76 2.564949 0.000000 207 +dynam 0 76 2.564949 0.000000 194 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +complet 0 77 2.564949 0.000000 208 +come 0 78 2.564949 0.000000 202 +server 0 76 2.564949 0.000000 204 +optim 0 79 2.564949 0.000000 197 +tuesdai 0 73 2.639057 0.000000 219 +materi 0 75 2.639057 0.000000 221 +upson 0 71 2.639057 0.000000 218 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +thursdai 0 70 2.708050 0.000000 241 +goal 0 66 2.708050 0.000000 250 +window 0 68 2.708050 0.000000 242 +would 0 67 2.708050 0.000000 251 +function 0 62 2.772589 0.000000 275 +evalu 0 64 2.772589 0.000000 266 +abstract 0 62 2.772589 0.000000 276 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +creat 0 63 2.772589 0.000000 277 +written 0 63 2.772589 0.000000 278 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +colleg 0 61 2.833213 0.000000 300 +type 0 61 2.833213 0.000000 296 +think 0 57 2.890372 0.000000 314 +browser 0 56 2.890372 0.000000 313 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +variou 0 56 2.890372 0.000000 317 +reason 0 57 2.890372 0.000000 318 +cover 0 55 2.944439 0.000000 329 +allow 0 53 2.944439 0.000000 333 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +date 0 51 2.995732 0.000000 344 +week 0 52 2.995732 0.000000 343 +run 0 51 2.995732 0.000000 347 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +even 0 45 3.135494 0.000000 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +http 0 41 3.218876 0.000000 420 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +late 0 40 3.258097 0.000000 439 +must 0 40 3.258097 0.000000 442 +submit 0 39 3.258097 0.000000 440 +correct 0 38 3.295837 0.000000 462 +credit 0 38 3.295837 0.000000 460 +respons 0 37 3.332205 0.000000 476 +hand 0 37 3.332205 0.000000 475 +staff 0 36 3.367296 0.000000 490 +procedur 0 36 3.367296 0.000000 488 +ofth 0 36 3.367296 0.000000 491 +download 0 36 3.367296 0.000000 489 +tree 0 36 3.367296 0.000000 492 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +word 0 34 3.401197 0.000000 508 +approxim 0 35 3.401197 0.000000 509 +return 0 34 3.401197 0.000000 502 +singl 0 34 3.401197 0.000000 510 +random 0 34 3.401197 0.000000 511 +concept 0 32 3.465736 0.000000 537 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +express 0 32 3.465736 0.000000 540 +often 0 31 3.496508 0.000000 551 +rang 0 30 3.555348 0.000000 565 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +focus 0 29 3.583519 0.000000 584 +ask 0 28 3.610918 0.000000 597 +held 0 28 3.610918 0.000000 600 +campu 0 27 3.637586 0.000000 623 +symbol 0 27 3.637586 0.000000 620 +request 0 26 3.688879 0.000000 635 +rule 0 26 3.688879 0.000000 638 +background 0 25 3.737670 0.000000 664 +valu 0 25 3.737670 0.000000 665 +consult 0 24 3.761200 0.000000 687 +reach 0 24 3.761200 0.000000 688 +pattern 0 24 3.761200 0.000000 689 +interpret 0 24 3.761200 0.000000 686 +higher 0 24 3.761200 0.000000 690 +togeth 0 23 3.806662 0.000000 714 +variabl 0 23 3.806662 0.000000 715 +inth 0 22 3.850148 0.000000 741 +almost 0 22 3.850148 0.000000 742 +period 0 22 3.850148 0.000000 743 +thu 0 21 3.912023 0.000000 773 +annot 0 21 3.912023 0.000000 775 +half 0 21 3.912023 0.000000 776 +tell 0 21 3.912023 0.000000 777 +sure 0 20 3.951244 0.000000 813 +facil 0 20 3.951244 0.000000 814 +wonder 0 20 3.951244 0.000000 815 +break 0 20 3.951244 0.000000 812 +exercis 0 19 4.007333 0.000000 842 +els 0 19 4.007333 0.000000 843 +accept 0 18 4.060443 0.000000 879 +encourag 0 18 4.060443 0.000000 880 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +weekli 0 17 4.110874 0.000000 919 +macintosh 0 17 4.110874 0.000000 920 +outlin 0 17 4.110874 0.000000 914 +differenti 0 17 4.110874 0.000000 921 +match 0 16 4.174387 0.000000 965 +modern 0 16 4.174387 0.000000 966 +transfer 0 16 4.174387 0.000000 967 +earli 0 16 4.174387 0.000000 968 +easi 0 16 4.174387 0.000000 969 +stream 0 15 4.248495 0.000000 1015 +capabl 0 15 4.248495 0.000000 1016 +score 0 15 4.248495 0.000000 1017 +hierarch 0 15 4.248495 0.000000 1018 +borland 0 14 4.317488 0.000000 1067 +warn 0 14 4.317488 0.000000 1068 +polynomi 0 14 4.317488 0.000000 1069 +recurs 0 13 4.382027 0.000000 1127 +someon 0 13 4.382027 0.000000 1128 +suit 0 13 4.382027 0.000000 1129 +joint 0 13 4.382027 0.000000 1130 +skill 0 12 4.465908 0.000000 1205 +prelim 0 12 4.465908 0.000000 1201 +iter 0 12 4.465908 0.000000 1206 +broad 0 11 4.553877 0.000000 1302 +appl 0 11 4.553877 0.000000 1303 +induct 0 11 4.553877 0.000000 1304 +queue 0 10 4.653960 0.000000 1386 +sundai 0 10 4.653960 0.000000 1387 +arithmet 0 10 4.653960 0.000000 1388 +stack 0 10 4.653960 0.000000 1389 +introductori 0 9 4.753590 0.000000 1479 +recit 0 9 4.753590 0.000000 1475 +preliminari 0 9 4.753590 0.000000 1480 +andth 0 9 4.753590 0.000000 1481 +dylan 0 8 4.875197 0.000000 1625 +simpli 0 8 4.875197 0.000000 1626 +matter 0 8 4.875197 0.000000 1627 +on 0 8 4.875197 0.000000 1628 +justin 0 7 5.010635 0.000000 1789 +happen 0 7 5.010635 0.000000 1790 +dispatch 0 7 5.010635 0.000000 1791 +prioriti 0 7 5.010635 0.000000 1792 +huttenloch 0 6 5.164786 0.000000 1983 +chosen 0 6 5.164786 0.000000 1984 +contract 0 6 5.164786 0.000000 1985 +garbag 0 6 5.164786 0.000000 1986 +conot 0 5 5.347108 0.000000 2245 +hardcopi 0 5 5.347108 0.000000 2246 +substitut 0 5 5.347108 0.000000 2247 +variat 0 5 5.347108 0.000000 2248 +password 0 4 5.568345 0.000000 2594 +toth 0 4 5.568345 0.000000 2595 +infinit 0 4 5.568345 0.000000 2596 +clearli 0 4 5.568345 0.000000 2590 +wherea 0 4 5.568345 0.000000 2597 +exposur 0 4 5.568345 0.000000 2598 +midnight 0 4 5.568345 0.000000 2599 +amaz 0 4 5.568345 0.000000 2600 +thiscours 0 4 5.568345 0.000000 2601 +catch 0 4 5.568345 0.000000 2602 +illus 0 4 5.568345 0.000000 2603 +szewczyk 0 3 5.857933 0.000000 3108 +voskuhl 0 3 5.857933 0.000000 3109 +useth 0 3 5.857933 0.000000 3110 +programsand 0 3 5.857933 0.000000 3111 +toolbox 0 3 5.857933 0.000000 3112 +programmingtechniqu 0 3 5.857933 0.000000 3113 +kimbal 0 3 5.857933 0.000000 3114 +andon 0 3 5.857933 0.000000 3115 +requirementsstud 0 3 5.857933 0.000000 3116 +immedi 0 3 5.857933 0.000000 3117 +jointli 0 3 5.857933 0.000000 3118 +doubt 0 3 5.857933 0.000000 3119 +argument 0 3 5.857933 0.000000 3120 +quotat 0 3 5.857933 0.000000 3121 +inherit 0 3 5.857933 0.000000 3122 +heap 0 3 5.857933 0.000000 3123 +exit 0 3 5.857933 0.000000 3124 +rangeof 0 2 6.263398 0.000000 4076 +standalon 0 2 6.263398 0.000000 4077 +developedat 0 2 6.263398 0.000000 4078 +orientedlanguag 0 2 6.263398 0.000000 4079 +therewil 0 2 6.263398 0.000000 4080 +combinationof 0 2 6.263398 0.000000 4081 +programmingproblem 0 2 6.263398 0.000000 4082 +youwork 0 2 6.263398 0.000000 4083 +growth 0 2 6.263398 0.000000 4084 +ugrad 0 1 6.957497 0.000000 6333 +idand 0 1 6.957497 0.000000 6334 +tobia 0 1 6.957497 0.000000 6335 +mayr 0 1 6.957497 0.000000 6336 +hamblin 0 1 6.957497 0.000000 6337 +mutabl 0 1 6.957497 0.000000 6338 +informationaugust 0 1 6.957497 0.000000 6339 +courseabout 0 1 6.957497 0.000000 6340 +notationthat 0 1 6.957497 0.000000 6341 +takec 0 1 6.957497 0.000000 6342 +programmingparadigm 0 1 6.957497 0.000000 6343 +imperativeprogram 0 1 6.957497 0.000000 6344 +goodform 0 1 6.957497 0.000000 6345 +probablytak 0 1 6.957497 0.000000 6346 +questionsor 0 1 6.957497 0.000000 6347 +serverwhich 0 1 6.957497 0.000000 6348 +answersa 0 1 6.957497 0.000000 6349 +thisweek 0 1 6.957497 0.000000 6350 +edubut 0 1 6.957497 0.000000 6351 +aboutproblem 0 1 6.957497 0.000000 6352 +upsonjam 0 1 6.957497 0.000000 6353 +tarobert 0 1 6.957497 0.000000 6354 +tajustin 0 1 6.957497 0.000000 6355 +taandra 0 1 6.957497 0.000000 6356 +ferencz 0 1 6.957497 0.000000 6357 +melissa 0 1 6.957497 0.000000 6358 +consultantwhen 0 1 6.957497 0.000000 6359 +meetlectur 0 1 6.957497 0.000000 6360 +andrecit 0 1 6.957497 0.000000 6361 +recitationsexpand 0 1 6.957497 0.000000 6362 +opportunityto 0 1 6.957497 0.000000 6363 +eachproblem 0 1 6.957497 0.000000 6364 +setsdu 0 1 6.957497 0.000000 6365 +mondayeven 0 1 6.957497 0.000000 6366 +consultinghour 0 1 6.957497 0.000000 6367 +voskuhltba 0 1 6.957497 0.000000 6368 +materialsther 0 1 6.957497 0.000000 6369 +handoutsand 0 1 6.957497 0.000000 6370 +implementedin 0 1 6.957497 0.000000 6371 +downloadonto 0 1 6.957497 0.000000 6372 +ontoyour 0 1 6.957497 0.000000 6373 +recentvers 0 1 6.957497 0.000000 6374 +gradeswil 0 1 6.957497 0.000000 6375 +thetot 0 1 6.957497 0.000000 6376 +willgener 0 1 6.957497 0.000000 6377 +followingclass 0 1 6.957497 0.000000 6378 +sittingdown 0 1 6.957497 0.000000 6379 +sink 0 1 6.957497 0.000000 6380 +beforesit 0 1 6.957497 0.000000 6381 +workmuch 0 1 6.957497 0.000000 6382 +jointassign 0 1 6.957497 0.000000 6383 +circumstancesmai 0 1 6.957497 0.000000 6384 +yourown 0 1 6.957497 0.000000 6385 +yougot 0 1 6.957497 0.000000 6386 +whenpeopl 0 1 6.957497 0.000000 6387 +lifeunpleas 0 1 6.957497 0.000000 6388 +facilitiescit 0 1 6.957497 0.000000 6389 +andpc 0 1 6.957497 0.000000 6390 +upsonmac 0 1 6.957497 0.000000 6391 +datesal 0 1 6.957497 0.000000 6392 +mondaynight 0 1 6.957497 0.000000 6393 +submityour 0 1 6.957497 0.000000 6394 +multimethod 0 1 6.957497 0.000000 6395 +heapsort 0 1 6.957497 0.000000 6396 +metacircular 0 1 6.957497 0.000000 6397 +nonloc 0 1 6.957497 0.000000 6398 +throw 0 1 6.957497 0.000000 6399 +quicksort 0 1 6.957497 0.000000 6400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..1ee5af67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +document 0 121 2.079442 0.000000 89 +send 0 114 2.197225 0.000000 109 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +memori 0 101 2.302585 0.000000 139 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +help 0 83 2.484907 0.000000 175 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +homework 0 79 2.564949 0.000000 193 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +materi 0 75 2.639057 0.000000 221 +organ 0 65 2.772589 0.000000 265 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +made 0 44 3.135494 0.000000 398 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +post 0 35 3.401197 0.000000 505 +bookmark 0 26 3.688879 0.000000 639 +request 0 26 3.688879 0.000000 635 +consult 0 24 3.761200 0.000000 687 +hierarchi 0 22 3.850148 0.000000 744 +sequenti 0 22 3.850148 0.000000 745 +annot 0 21 3.912023 0.000000 775 +path 0 21 3.912023 0.000000 778 +unit 0 21 3.912023 0.000000 779 +separ 0 19 4.007333 0.000000 844 +appropri 0 18 4.060443 0.000000 883 +account 0 18 4.060443 0.000000 882 +otherwis 0 17 4.110874 0.000000 922 +sign 0 16 4.174387 0.000000 970 +circuit 0 13 4.382027 0.000000 1131 +difficulti 0 13 4.382027 0.000000 1132 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +assembl 0 12 4.465908 0.000000 1207 +combinatori 0 8 4.875197 0.000000 1629 +interrupt 0 7 5.010635 0.000000 1793 +saturdai 0 7 5.010635 0.000000 1794 +hidden 0 6 5.164786 0.000000 1987 +conot 0 5 5.347108 0.000000 2245 +registr 0 5 5.347108 0.000000 2249 +microprogram 0 4 5.568345 0.000000 2604 +eickenfal 0 3 5.857933 0.000000 3125 +kimbal 0 3 5.857933 0.000000 3114 +helpif 0 3 5.857933 0.000000 3126 +mate 0 3 5.857933 0.000000 3127 +encount 0 3 5.857933 0.000000 3128 +btopic 0 2 6.263398 0.000000 4085 +organizationthorsten 0 1 6.957497 0.000000 6401 +materialsal 0 1 6.957497 0.000000 6402 +listlist 0 1 6.957497 0.000000 6403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..0840f896 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +hour 0 165 1.791759 0.000000 46 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +mondai 0 77 2.564949 0.000000 206 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +new 0 64 2.772589 0.000000 262 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +electron 0 47 3.091042 0.000000 379 +submit 0 39 3.258097 0.000000 440 +respons 0 37 3.332205 0.000000 476 +procedur 0 36 3.367296 0.000000 488 +print 0 34 3.401197 0.000000 503 +quot 0 29 3.583519 0.000000 582 +held 0 28 3.610918 0.000000 600 +ask 0 28 3.610918 0.000000 597 +wai 0 25 3.737670 0.000000 662 +leav 0 21 3.912023 0.000000 772 +break 0 20 3.951244 0.000000 812 +nice 0 20 3.951244 0.000000 809 +modif 0 17 4.110874 0.000000 913 +mayb 0 15 4.248495 0.000000 1014 +floor 0 14 4.317488 0.000000 1070 +convert 0 13 4.382027 0.000000 1122 +social 0 13 4.382027 0.000000 1123 +misc 0 13 4.382027 0.000000 1124 +submiss 0 11 4.553877 0.000000 1298 +recit 0 9 4.753590 0.000000 1475 +admin 0 9 4.753590 0.000000 1476 +joke 0 8 4.875197 0.000000 1620 +printer 0 8 4.875197 0.000000 1621 +ethic 0 7 5.010635 0.000000 1786 +header 0 7 5.010635 0.000000 1787 +pfile 0 3 5.857933 0.000000 3100 +sumedh 0 3 5.857933 0.000000 3101 +enscript 0 2 6.263398 0.000000 4069 +incl 0 2 6.263398 0.000000 4070 +csuglab 0 1 6.957497 0.000000 6404 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..e4efe862 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +structur 0 106 2.197225 0.000000 105 +solut 1 82 2.484907 2.484907 162 +dynam 0 76 2.564949 0.000000 194 +upson 0 71 2.639057 0.000000 218 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +new 0 64 2.772589 0.000000 262 +unix 0 58 2.890372 0.000000 308 +fridai 0 44 3.135494 0.000000 390 +static 0 27 3.637586 0.000000 619 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +prelim 0 12 4.465908 0.000000 1201 +huang 0 12 4.465908 0.000000 1202 +systemsc 0 11 4.553877 0.000000 1293 +cheng 0 10 4.653960 0.000000 1381 +lili 0 5 5.347108 0.000000 2240 +filesystem 0 4 5.568345 0.000000 2587 +groupcours 0 3 5.857933 0.000000 3092 +ychuang 0 3 5.857933 0.000000 3093 +budiu 0 2 6.263398 0.000000 4042 +systemkenneth 0 2 6.263398 0.000000 4043 +birmanc 0 2 6.263398 0.000000 4044 +syllabuslectur 0 2 6.263398 0.000000 4045 +taslili 0 2 6.263398 0.000000 4046 +mihai 0 2 6.263398 0.000000 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..fd1c1992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +includ 0 208 1.609438 0.000000 42 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +assign 1 135 1.945910 1.945910 66 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +postscript 0 131 2.079442 0.000000 90 +welcom 0 122 2.079442 0.000000 99 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +need 0 98 2.302585 0.000000 135 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +solut 1 82 2.484907 2.484907 162 +academ 0 82 2.484907 0.000000 178 +chang 0 82 2.484907 0.000000 163 +start 0 83 2.484907 0.000000 173 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +complet 0 77 2.564949 0.000000 208 +materi 0 75 2.639057 0.000000 221 +intellig 0 72 2.639057 0.000000 225 +appli 0 71 2.639057 0.000000 226 +tuesdai 0 73 2.639057 0.000000 219 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +result 0 65 2.772589 0.000000 281 +special 0 56 2.890372 0.000000 320 +found 0 53 2.944439 0.000000 337 +date 0 51 2.995732 0.000000 344 +still 0 50 3.044522 0.000000 362 +right 0 48 3.044522 0.000000 363 +move 0 47 3.091042 0.000000 382 +get 0 46 3.091042 0.000000 380 +netscap 0 44 3.135494 0.000000 395 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +third 0 43 3.178054 0.000000 412 +announc 0 40 3.258097 0.000000 441 +either 0 35 3.401197 0.000000 506 +queri 0 33 3.433987 0.000000 524 +posit 0 31 3.496508 0.000000 552 +abl 0 30 3.555348 0.000000 566 +specifi 0 30 3.555348 0.000000 568 +exist 0 30 3.555348 0.000000 569 +limit 0 29 3.583519 0.000000 585 +load 0 28 3.610918 0.000000 601 +concern 0 25 3.737670 0.000000 666 +begin 0 23 3.806662 0.000000 716 +variabl 0 23 3.806662 0.000000 715 +defin 0 22 3.850148 0.000000 746 +fact 0 21 3.912023 0.000000 780 +longer 0 20 3.951244 0.000000 816 +assum 0 19 4.007333 0.000000 845 +scott 0 18 4.060443 0.000000 884 +account 0 18 4.060443 0.000000 882 +statu 0 18 4.060443 0.000000 885 +previous 0 17 4.110874 0.000000 923 +otherwis 0 17 4.110874 0.000000 922 +alreadi 0 16 4.174387 0.000000 963 +portion 0 16 4.174387 0.000000 971 +atth 0 15 4.248495 0.000000 1019 +success 0 10 4.653960 0.000000 1390 +guarante 0 10 4.653960 0.000000 1391 +kevin 1 9 4.753590 4.753590 1482 +slightli 0 7 5.010635 0.000000 1795 +paramet 0 7 5.010635 0.000000 1796 +planner 0 7 5.010635 0.000000 1797 +hunt 0 7 5.010635 0.000000 1798 +remind 0 7 5.010635 0.000000 1799 +schema 0 6 5.164786 0.000000 1988 +bind 0 5 5.347108 0.000000 2250 +constant 0 5 5.347108 0.000000 2251 +seriou 0 5 5.347108 0.000000 2252 +clarif 0 5 5.347108 0.000000 2253 +clair 1 4 5.568345 5.568345 2605 +shouldn 0 4 5.568345 0.000000 2606 +unless 0 4 5.568345 0.000000 2607 +meanwhil 0 3 5.857933 0.000000 3129 +worri 0 3 5.857933 0.000000 3130 +add 0 3 5.857933 0.000000 3131 +thec 0 3 5.857933 0.000000 3132 +pagesc 0 3 5.857933 0.000000 3133 +pagecsfound 0 2 6.263398 0.000000 4086 +dodg 0 1 6.957497 0.000000 6405 +notethat 0 1 6.957497 0.000000 6406 +rubix 0 1 6.957497 0.000000 6407 +thefunct 0 1 6.957497 0.000000 6408 +rearrang 0 1 6.957497 0.000000 6409 +appeas 0 1 6.957497 0.000000 6410 +var 0 1 6.957497 0.000000 6411 +bracket 0 1 6.957497 0.000000 6412 +youus 0 1 6.957497 0.000000 6413 +machinesshould 0 1 6.957497 0.000000 6414 +sbin 0 1 6.957497 0.000000 6415 +ksaunder 0 1 6.957497 0.000000 6416 +sbinfor 0 1 6.957497 0.000000 6417 +gremlin 0 1 6.957497 0.000000 6418 +codefor 0 1 6.957497 0.000000 6419 +andget 0 1 6.957497 0.000000 6420 +uponcomplet 0 1 6.957497 0.000000 6421 +thoseus 0 1 6.957497 0.000000 6422 +zeroon 0 1 6.957497 0.000000 6423 +asspecifi 0 1 6.957497 0.000000 6424 +oneassign 0 1 6.957497 0.000000 6425 +vanto 0 1 6.957497 0.000000 6426 +thisclarif 0 1 6.957497 0.000000 6427 +newhomework 0 1 6.957497 0.000000 6428 +coursemateri 0 1 6.957497 0.000000 6429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..d070c105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +note 0 142 1.945910 0.000000 67 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +studi 0 120 2.079442 0.000000 91 +pleas 0 113 2.197225 0.000000 114 +exam 0 86 2.484907 0.000000 169 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +homework 1 79 2.564949 2.564949 193 +integr 0 67 2.708050 0.000000 245 +room 0 59 2.833213 0.000000 301 +date 0 51 2.995732 0.000000 344 +set 0 50 3.044522 0.000000 361 +revis 0 26 3.688879 0.000000 640 +pagec 0 15 4.248495 0.000000 1011 +automata 0 13 4.382027 0.000000 1135 +prelim 0 12 4.465908 0.000000 1201 +hardcopi 0 5 5.347108 0.000000 2246 +incorrect 0 3 5.857933 0.000000 3134 +nikolai 0 2 6.263398 0.000000 4087 +theorywelcom 0 1 6.957497 0.000000 6430 +guideannounc 0 1 6.957497 0.000000 6431 +erratum 0 1 6.957497 0.000000 6432 +hourscod 0 1 6.957497 0.000000 6433 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..e4a4b927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +click 0 142 1.945910 0.000000 78 +assign 0 135 1.945910 0.000000 66 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +databas 0 122 2.079442 0.000000 86 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +schedul 0 119 2.079442 0.000000 85 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +specif 0 106 2.197225 0.000000 106 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +associ 0 93 2.397895 0.000000 151 +proceed 0 93 2.397895 0.000000 152 +homepag 0 93 2.397895 0.000000 148 +question 0 91 2.397895 0.000000 141 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +activ 0 84 2.484907 0.000000 182 +journal 0 83 2.484907 0.000000 183 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +control 0 82 2.484907 0.000000 164 +resourc 0 81 2.484907 0.000000 172 +refer 0 78 2.564949 0.000000 203 +complet 0 77 2.564949 0.000000 208 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +homework 0 79 2.564949 0.000000 193 +appear 0 78 2.564949 0.000000 210 +come 0 78 2.564949 0.000000 202 +optim 0 79 2.564949 0.000000 197 +know 0 80 2.564949 0.000000 198 +materi 0 75 2.639057 0.000000 221 +upson 0 71 2.639057 0.000000 218 +involv 0 71 2.639057 0.000000 227 +write 0 72 2.639057 0.000000 222 +tuesdai 0 73 2.639057 0.000000 219 +addit 0 74 2.639057 0.000000 228 +meet 0 72 2.639057 0.000000 229 +free 0 73 2.639057 0.000000 224 +thursdai 0 70 2.708050 0.000000 241 +test 0 66 2.708050 0.000000 252 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +import 0 65 2.772589 0.000000 282 +result 0 65 2.772589 0.000000 281 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +evalu 0 64 2.772589 0.000000 266 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +interact 0 62 2.772589 0.000000 270 +collect 0 65 2.772589 0.000000 268 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +type 0 61 2.833213 0.000000 296 +detail 0 57 2.890372 0.000000 321 +variou 0 56 2.890372 0.000000 317 +semest 0 58 2.890372 0.000000 312 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +much 0 52 2.995732 0.000000 349 +date 0 51 2.995732 0.000000 344 +basic 0 50 3.044522 0.000000 360 +archiv 0 49 3.044522 0.000000 364 +give 0 50 3.044522 0.000000 359 +principl 0 48 3.044522 0.000000 357 +standard 0 48 3.044522 0.000000 365 +possibl 0 47 3.091042 0.000000 378 +could 0 46 3.091042 0.000000 383 +understand 0 47 3.091042 0.000000 384 +textbook 0 44 3.135494 0.000000 397 +discuss 0 45 3.135494 0.000000 399 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +even 0 45 3.135494 0.000000 393 +term 0 43 3.178054 0.000000 411 +offer 0 43 3.178054 0.000000 414 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +edit 0 42 3.218876 0.000000 418 +compani 0 41 3.218876 0.000000 423 +examin 0 42 3.218876 0.000000 424 +howev 0 41 3.218876 0.000000 422 +review 0 42 3.218876 0.000000 425 +might 0 41 3.218876 0.000000 426 +transact 0 39 3.258097 0.000000 438 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +prototyp 0 38 3.295837 0.000000 463 +industri 0 38 3.295837 0.000000 464 +close 0 38 3.295837 0.000000 465 +mean 0 37 3.332205 0.000000 477 +hand 0 37 3.332205 0.000000 475 +survei 0 35 3.401197 0.000000 513 +singl 0 34 3.401197 0.000000 510 +michael 0 35 3.401197 0.000000 514 +concurr 0 34 3.401197 0.000000 501 +queri 0 33 3.433987 0.000000 524 +taught 0 33 3.433987 0.000000 526 +concept 0 32 3.465736 0.000000 537 +storag 0 31 3.496508 0.000000 553 +someth 0 31 3.496508 0.000000 554 +compon 0 30 3.555348 0.000000 570 +domain 0 30 3.555348 0.000000 564 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +produc 0 30 3.555348 0.000000 572 +turn 0 29 3.583519 0.000000 586 +depend 0 29 3.583519 0.000000 583 +semant 0 29 3.583519 0.000000 587 +propos 0 28 3.610918 0.000000 602 +becom 0 28 3.610918 0.000000 603 +intend 0 28 3.610918 0.000000 599 +actual 0 28 3.610918 0.000000 604 +progress 0 28 3.610918 0.000000 598 +manipul 0 27 3.637586 0.000000 624 +team 0 27 3.637586 0.000000 625 +altern 0 26 3.688879 0.000000 641 +background 0 25 3.737670 0.000000 664 +fundament 0 25 3.737670 0.000000 661 +although 0 25 3.737670 0.000000 667 +toward 0 25 3.737670 0.000000 668 +primari 0 25 3.737670 0.000000 669 +hill 0 25 3.737670 0.000000 670 +aspect 0 25 3.737670 0.000000 663 +alwai 0 24 3.761200 0.000000 691 +wish 0 24 3.761200 0.000000 692 +higher 0 24 3.761200 0.000000 690 +store 0 24 3.761200 0.000000 693 +begin 0 23 3.806662 0.000000 716 +initi 0 23 3.806662 0.000000 717 +lead 0 23 3.806662 0.000000 718 +tent 0 22 3.850148 0.000000 739 +disk 0 22 3.850148 0.000000 747 +recommend 0 22 3.850148 0.000000 737 +finish 0 22 3.850148 0.000000 748 +period 0 22 3.850148 0.000000 743 +among 0 21 3.912023 0.000000 781 +half 0 21 3.912023 0.000000 776 +prerequisit 0 19 4.007333 0.000000 846 +thur 0 19 4.007333 0.000000 847 +lower 0 18 4.060443 0.000000 886 +minim 0 18 4.060443 0.000000 887 +demo 0 18 4.060443 0.000000 888 +stat 0 17 4.110874 0.000000 924 +whether 0 17 4.110874 0.000000 918 +anyon 0 17 4.110874 0.000000 916 +choos 0 16 4.174387 0.000000 964 +ramakrishnan 0 16 4.174387 0.000000 972 +piec 0 15 4.248495 0.000000 1020 +contribut 0 15 4.248495 0.000000 1021 +hopefulli 0 14 4.317488 0.000000 1071 +convent 0 14 4.317488 0.000000 1072 +demand 0 14 4.317488 0.000000 1073 +manner 0 14 4.317488 0.000000 1074 +decid 0 14 4.317488 0.000000 1075 +dbm 0 13 4.382027 0.000000 1136 +essenti 0 13 4.382027 0.000000 1137 +signific 0 13 4.382027 0.000000 1125 +step 0 13 4.382027 0.000000 1138 +introduc 0 13 4.382027 0.000000 1139 +earlier 0 13 4.382027 0.000000 1140 +prelim 0 12 4.465908 0.000000 1201 +amount 0 12 4.465908 0.000000 1208 +grow 0 12 4.465908 0.000000 1209 +workload 0 12 4.465908 0.000000 1210 +buffer 0 12 4.465908 0.000000 1211 +raghu 0 12 4.465908 0.000000 1212 +sens 0 11 4.553877 0.000000 1305 +submiss 0 11 4.553877 0.000000 1298 +benjamin 0 11 4.553877 0.000000 1296 +modular 0 10 4.653960 0.000000 1392 +forc 0 10 4.653960 0.000000 1384 +certain 0 10 4.653960 0.000000 1393 +introductori 0 9 4.753590 0.000000 1479 +herefor 0 9 4.753590 0.000000 1483 +morgan 0 9 4.753590 0.000000 1484 +familiar 0 9 4.753590 0.000000 1485 +suitabl 0 9 4.753590 0.000000 1486 +rel 0 9 4.753590 0.000000 1487 +clear 0 9 4.753590 0.000000 1488 +recoveri 0 9 4.753590 0.000000 1474 +databasesystem 0 8 4.875197 0.000000 1617 +cum 0 8 4.875197 0.000000 1619 +awar 0 7 5.010635 0.000000 1800 +bug 0 7 5.010635 0.000000 1801 +fromth 0 7 5.010635 0.000000 1802 +seshadri 0 7 5.010635 0.000000 1803 +noon 0 7 5.010635 0.000000 1804 +consequ 0 6 5.164786 0.000000 1989 +neither 0 6 5.164786 0.000000 1990 +huge 0 6 5.164786 0.000000 1991 +ifyou 0 6 5.164786 0.000000 1992 +beta 0 6 5.164786 0.000000 1993 +silberschatz 0 6 5.164786 0.000000 1978 +lack 0 6 5.164786 0.000000 1994 +tobe 0 6 5.164786 0.000000 1995 +praveen 0 6 5.164786 0.000000 1996 +kaufmann 0 5 5.347108 0.000000 2254 +solid 0 5 5.347108 0.000000 2255 +valuabl 0 5 5.347108 0.000000 2256 +thrive 0 5 5.347108 0.000000 2257 +greater 0 5 5.347108 0.000000 2258 +fraction 0 5 5.347108 0.000000 2259 +interestedin 0 5 5.347108 0.000000 2260 +categori 0 5 5.347108 0.000000 2261 +mcgraw 0 5 5.347108 0.000000 2262 +minibas 0 4 5.568345 0.000000 2608 +surprisingli 0 4 5.568345 0.000000 2609 +behind 0 4 5.568345 0.000000 2610 +suppli 0 4 5.568345 0.000000 2611 +thiscours 0 4 5.568345 0.000000 2601 +asystem 0 4 5.568345 0.000000 2612 +enrol 0 4 5.568345 0.000000 2613 +twice 0 4 5.568345 0.000000 2614 +fold 0 4 5.568345 0.000000 2615 +thati 0 4 5.568345 0.000000 2616 +predat 0 3 5.857933 0.000000 3135 +comfort 0 3 5.857933 0.000000 3136 +giant 0 3 5.857933 0.000000 3137 +explos 0 3 5.857933 0.000000 3138 +alon 0 3 5.857933 0.000000 3139 +scratch 0 3 5.857933 0.000000 3140 +parser 0 3 5.857933 0.000000 3141 +aproject 0 3 5.857933 0.000000 3142 +bibl 0 3 5.857933 0.000000 3143 +confus 0 3 5.857933 0.000000 3144 +weitsang 0 2 6.263398 0.000000 4088 +databasemanag 0 2 6.263398 0.000000 4089 +certainli 0 2 6.263398 0.000000 4090 +proportion 0 2 6.263398 0.000000 4091 +thefirst 0 2 6.263398 0.000000 4092 +youto 0 2 6.263398 0.000000 4093 +builton 0 2 6.263398 0.000000 4094 +thehigh 0 2 6.263398 0.000000 4095 +korth 0 2 6.263398 0.000000 4051 +secondedit 0 2 6.263398 0.000000 4096 +ingr 0 2 6.263398 0.000000 4097 +elmasri 0 2 6.263398 0.000000 4059 +grai 0 2 6.263398 0.000000 4098 +reuter 0 2 6.263398 0.000000 4099 +likewis 0 2 6.263398 0.000000 4100 +confirm 0 2 6.263398 0.000000 4101 +outsidefirewal 0 1 6.957497 0.000000 6434 +stonebrak 0 1 6.957497 0.000000 6435 +samplequest 0 1 6.957497 0.000000 6436 +predatordbm 0 1 6.957497 0.000000 6437 +currentproject 0 1 6.957497 0.000000 6438 +coursedescript 0 1 6.957497 0.000000 6439 +intendedto 0 1 6.957497 0.000000 6440 +slargest 0 1 6.957497 0.000000 6441 +piecesof 0 1 6.957497 0.000000 6442 +knowledgeabledatabas 0 1 6.957497 0.000000 6443 +researchcommun 0 1 6.957497 0.000000 6444 +addressedbecaus 0 1 6.957497 0.000000 6445 +informedus 0 1 6.957497 0.000000 6446 +teller 0 1 6.957497 0.000000 6447 +newcours 0 1 6.957497 0.000000 6448 +quickreview 0 1 6.957497 0.000000 6449 +abreadth 0 1 6.957497 0.000000 6450 +advancedtop 0 1 6.957497 0.000000 6451 +thepurpos 0 1 6.957497 0.000000 6452 +coursei 0 1 6.957497 0.000000 6453 +weeksaft 0 1 6.957497 0.000000 6454 +requireread 0 1 6.957497 0.000000 6455 +engineeringlibrari 0 1 6.957497 0.000000 6456 +pursueaddit 0 1 6.957497 0.000000 6457 +forinform 0 1 6.957497 0.000000 6458 +examtim 0 1 6.957497 0.000000 6459 +developmentproject 0 1 6.957497 0.000000 6460 +involvea 0 1 6.957497 0.000000 6461 +wishto 0 1 6.957497 0.000000 6462 +willinvolv 0 1 6.957497 0.000000 6463 +andmodifi 0 1 6.957497 0.000000 6464 +andrar 0 1 6.957497 0.000000 6465 +luxuri 0 1 6.957497 0.000000 6466 +thediffer 0 1 6.957497 0.000000 6467 +inevit 0 1 6.957497 0.000000 6468 +varioussystem 0 1 6.957497 0.000000 6469 +buffermanag 0 1 6.957497 0.000000 6470 +enginethat 0 1 6.957497 0.000000 6471 +possibleproject 0 1 6.957497 0.000000 6472 +likecomplex 0 1 6.957497 0.000000 6473 +becauseth 0 1 6.957497 0.000000 6474 +betweenminibas 0 1 6.957497 0.000000 6475 +somegener 0 1 6.957497 0.000000 6476 +ideaon 0 1 6.957497 0.000000 6477 +advanceof 0 1 6.957497 0.000000 6478 +submitan 0 1 6.957497 0.000000 6479 +discussth 0 1 6.957497 0.000000 6480 +particularsystem 0 1 6.957497 0.000000 6481 +documentwil 0 1 6.957497 0.000000 6482 +picki 0 1 6.957497 0.000000 6483 +geton 0 1 6.957497 0.000000 6484 +oftest 0 1 6.957497 0.000000 6485 +coursetextbook 0 1 6.957497 0.000000 6486 +bookcontain 0 1 6.957497 0.000000 6487 +databasebook 0 1 6.957497 0.000000 6488 +thecampu 0 1 6.957497 0.000000 6489 +collectedand 0 1 6.957497 0.000000 6490 +postgr 0 1 6.957497 0.000000 6491 +andillustra 0 1 6.957497 0.000000 6492 +corearea 0 1 6.957497 0.000000 6493 +navath 0 1 6.957497 0.000000 6494 +tellsyou 0 1 6.957497 0.000000 6495 +wonderfulrefer 0 1 6.957497 0.000000 6496 +debuggingwith 0 1 6.957497 0.000000 6497 +gradingpolici 0 1 6.957497 0.000000 6498 +percentag 0 1 6.957497 0.000000 6499 +anextra 0 1 6.957497 0.000000 6500 +thefin 0 1 6.957497 0.000000 6501 +willfocu 0 1 6.957497 0.000000 6502 +coveredin 0 1 6.957497 0.000000 6503 +professorpraveen 0 1 6.957497 0.000000 6504 +teachingassist 0 1 6.957497 0.000000 6505 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..a9ac1b87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +implement 0 152 1.791759 0.000000 52 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +base 0 165 1.791759 0.000000 50 +lectur 0 135 1.945910 0.000000 73 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +instructor 0 108 2.197225 0.000000 107 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +call 0 91 2.397895 0.000000 153 +environ 0 84 2.484907 0.000000 177 +learn 0 86 2.484907 0.000000 170 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +logic 0 71 2.639057 0.000000 230 +line 0 75 2.639057 0.000000 231 +knowledg 0 67 2.708050 0.000000 243 +java 0 70 2.708050 0.000000 248 +goal 0 66 2.708050 0.000000 250 +descript 0 64 2.772589 0.000000 271 +experi 0 64 2.772589 0.000000 283 +function 0 62 2.772589 0.000000 275 +abstract 0 62 2.772589 0.000000 276 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +undergradu 0 54 2.944439 0.000000 338 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +tabl 0 51 2.995732 0.000000 346 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +understand 0 47 3.091042 0.000000 384 +describ 0 45 3.135494 0.000000 400 +better 0 45 3.135494 0.000000 401 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +edit 0 42 3.218876 0.000000 418 +programm 0 39 3.258097 0.000000 445 +theoret 0 39 3.258097 0.000000 446 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +formal 0 37 3.332205 0.000000 478 +survei 0 35 3.401197 0.000000 513 +tech 0 35 3.401197 0.000000 515 +least 0 35 3.401197 0.000000 516 +concept 0 32 3.465736 0.000000 537 +specifi 0 30 3.555348 0.000000 568 +semant 0 29 3.583519 0.000000 587 +turn 0 29 3.583519 0.000000 586 +though 0 27 3.637586 0.000000 622 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +concern 0 25 3.737670 0.000000 666 +demonstr 0 24 3.761200 0.000000 694 +greg 0 24 3.761200 0.000000 695 +compress 0 23 3.806662 0.000000 719 +lead 0 23 3.806662 0.000000 718 +proof 0 23 3.806662 0.000000 720 +properti 0 22 3.850148 0.000000 749 +programminglanguag 0 21 3.912023 0.000000 782 +newsgroup 0 21 3.912023 0.000000 783 +safeti 0 20 3.951244 0.000000 817 +scheme 0 20 3.951244 0.000000 818 +prove 0 19 4.007333 0.000000 848 +assum 0 19 4.007333 0.000000 845 +prerequisit 0 19 4.007333 0.000000 846 +andrew 0 19 4.007333 0.000000 849 +analyz 0 17 4.110874 0.000000 925 +side 0 15 4.248495 0.000000 1022 +precis 0 15 4.248495 0.000000 1023 +carl 0 15 4.248495 0.000000 1024 +conduct 0 14 4.317488 0.000000 1065 +directli 0 13 4.382027 0.000000 1141 +larri 0 13 4.382027 0.000000 1142 +recurs 0 13 4.382027 0.000000 1127 +emac 0 13 4.382027 0.000000 1143 +pascal 0 12 4.465908 0.000000 1213 +calculu 0 12 4.465908 0.000000 1203 +meng 0 12 4.465908 0.000000 1214 +broad 0 11 4.553877 0.000000 1302 +induct 0 11 4.553877 0.000000 1304 +linda 0 10 4.653960 0.000000 1394 +notat 0 9 4.753590 0.000000 1489 +entitl 0 9 4.753590 0.000000 1490 +prefer 0 9 4.753590 0.000000 1491 +suitabl 0 9 4.753590 0.000000 1486 +admin 0 9 4.753590 0.000000 1476 +mode 0 9 4.753590 0.000000 1492 +ideal 0 8 4.875197 0.000000 1630 +leon 0 8 4.875197 0.000000 1631 +cum 0 8 4.875197 0.000000 1619 +dispatch 0 7 5.010635 0.000000 1791 +henc 0 7 5.010635 0.000000 1805 +predic 0 7 5.010635 0.000000 1806 +ture 0 6 5.164786 0.000000 1997 +morrisett 0 5 5.347108 0.000000 2263 +gentl 0 5 5.347108 0.000000 2264 +witha 0 4 5.568345 0.000000 2617 +haskel 0 4 5.568345 0.000000 2618 +principlesof 0 3 5.857933 0.000000 3145 +deeper 0 3 5.857933 0.000000 3146 +denot 0 3 5.857933 0.000000 3147 +noteshomework 0 2 6.263398 0.000000 4102 +profici 0 2 6.263398 0.000000 4103 +andlog 0 2 6.263398 0.000000 4104 +competillo 0 2 6.263398 0.000000 4105 +lfar 0 2 6.263398 0.000000 4106 +erlingsson 0 2 6.263398 0.000000 4107 +indexdocument 0 2 6.263398 0.000000 4108 +toolsa 0 2 6.263398 0.000000 4109 +prerequisiteshandoutsscrib 0 1 6.957497 0.000000 6506 +assignmentscontact 0 1 6.957497 0.000000 6507 +informationrelev 0 1 6.957497 0.000000 6508 +goalof 0 1 6.957497 0.000000 6509 +multipleinherit 0 1 6.957497 0.000000 6510 +subsum 0 1 6.957497 0.000000 6511 +thestudi 0 1 6.957497 0.000000 6512 +abstractli 0 1 6.957497 0.000000 6513 +howprogram 0 1 6.957497 0.000000 6514 +asnot 0 1 6.957497 0.000000 6515 +preciser 0 1 6.957497 0.000000 6516 +forform 0 1 6.957497 0.000000 6517 +somethingabout 0 1 6.957497 0.000000 6518 +tomanipul 0 1 6.957497 0.000000 6519 +gunter 0 1 6.957497 0.000000 6520 +paulson 0 1 6.957497 0.000000 6521 +undergraduatemathemat 0 1 6.957497 0.000000 6522 +mathematicalmatur 0 1 6.957497 0.000000 6523 +anmeng 0 1 6.957497 0.000000 6524 +ifth 0 1 6.957497 0.000000 6525 +ulfar 0 1 6.957497 0.000000 6526 +pmrelev 0 1 6.957497 0.000000 6527 +comint 0 1 6.957497 0.000000 6528 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..f300f9a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +schedul 0 119 2.079442 0.000000 85 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +info 0 85 2.484907 0.000000 176 +materi 0 75 2.639057 0.000000 221 +staff 0 36 3.367296 0.000000 490 +newsgroup 0 21 3.912023 0.000000 783 +rivl 0 8 4.875197 0.000000 1632 +systemscomput 0 3 5.857933 0.000000 3148 +janosi 0 3 5.857933 0.000000 3149 +pagecsmultimedia 0 2 6.263398 0.000000 4110 +anounc 0 2 6.263398 0.000000 4111 +bugcom 0 2 6.263398 0.000000 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..42deefbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +question 0 91 2.397895 0.000000 141 +info 0 85 2.484907 0.000000 176 +materi 0 75 2.639057 0.000000 221 +staff 0 36 3.367296 0.000000 490 +newsgroup 0 21 3.912023 0.000000 783 +rivl 0 8 4.875197 0.000000 1632 +systemscomput 0 3 5.857933 0.000000 3148 +janosi 0 3 5.857933 0.000000 3149 +pagecsmultimedia 0 2 6.263398 0.000000 4110 +anounc 0 2 6.263398 0.000000 4111 +bugcom 0 2 6.263398 0.000000 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..b6a88a4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +modifi 0 178 1.609438 0.000000 35 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +analysi 0 124 2.079442 0.000000 98 +instructor 0 108 2.197225 0.000000 107 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +homepag 0 93 2.397895 0.000000 148 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +homework 1 79 2.564949 2.564949 193 +refer 0 78 2.564949 0.000000 203 +upson 0 71 2.639057 0.000000 218 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +copi 0 63 2.772589 0.000000 284 +locat 0 59 2.833213 0.000000 303 +talk 0 53 2.944439 0.000000 336 +announc 0 40 3.258097 0.000000 441 +approxim 0 35 3.401197 0.000000 509 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +sheet 0 16 4.174387 0.000000 973 +cannot 0 13 4.382027 0.000000 1144 +cheat 0 10 4.653960 0.000000 1395 +evan 0 8 4.875197 0.000000 1633 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 0 5 5.347108 0.000000 2265 +kozen 0 4 5.568345 0.000000 2619 +addendum 0 3 5.857933 0.000000 3150 +moran 0 3 5.857933 0.000000 3151 +rajeev 0 3 5.857933 0.000000 3152 +inupson 0 1 6.957497 0.000000 6529 +tome 0 1 6.957497 0.000000 6530 +reschedul 0 1 6.957497 0.000000 6531 +motwani 0 1 6.957497 0.000000 6532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..05d2cb8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +parallel 1 169 1.791759 1.791759 60 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +specif 0 106 2.197225 0.000000 106 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +issu 0 78 2.564949 0.000000 211 +complet 0 77 2.564949 0.000000 208 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +integr 0 67 2.708050 0.000000 245 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +locat 0 59 2.833213 0.000000 303 +share 0 59 2.833213 0.000000 304 +point 0 58 2.890372 0.000000 319 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +much 0 52 2.995732 0.000000 349 +week 0 52 2.995732 0.000000 343 +hardwar 0 51 2.995732 0.000000 350 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +execut 0 45 3.135494 0.000000 404 +offer 0 43 3.178054 0.000000 414 +term 0 43 3.178054 0.000000 411 +howev 0 41 3.218876 0.000000 422 +futur 0 41 3.218876 0.000000 427 +past 0 42 3.218876 0.000000 428 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +workstat 0 37 3.332205 0.000000 479 +cost 0 37 3.332205 0.000000 480 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +product 0 33 3.433987 0.000000 527 +taken 0 31 3.496508 0.000000 555 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +multiprocessor 0 28 3.610918 0.000000 605 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +aspect 0 25 3.737670 0.000000 663 +almost 0 22 3.850148 0.000000 742 +sequenti 0 22 3.850148 0.000000 745 +fact 0 21 3.912023 0.000000 780 +busi 0 21 3.912023 0.000000 784 +latest 0 21 3.912023 0.000000 785 +portabl 0 20 3.951244 0.000000 819 +spend 0 19 4.007333 0.000000 850 +layer 0 17 4.110874 0.000000 926 +across 0 16 4.174387 0.000000 974 +month 0 15 4.248495 0.000000 1025 +massiv 0 15 4.248495 0.000000 1026 +consider 0 14 4.317488 0.000000 1076 +easili 0 14 4.317488 0.000000 1077 +split 0 14 4.317488 0.000000 1078 +thorsten 0 13 4.382027 0.000000 1133 +cannot 0 13 4.382027 0.000000 1144 +stai 0 12 4.465908 0.000000 1215 +workload 0 12 4.465908 0.000000 1210 +grant 0 12 4.465908 0.000000 1216 +host 0 11 4.553877 0.000000 1306 +debugg 0 9 4.753590 0.000000 1493 +manufactur 0 8 4.875197 0.000000 1634 +competit 0 8 4.875197 0.000000 1635 +depth 0 8 4.875197 0.000000 1636 +contrast 0 8 4.875197 0.000000 1637 +smile 0 7 5.010635 0.000000 1807 +microprocessor 0 7 5.010635 0.000000 1808 +core 0 7 5.010635 0.000000 1809 +usabl 0 7 5.010635 0.000000 1810 +price 0 6 5.164786 0.000000 1999 +quickli 0 6 5.164786 0.000000 2000 +vari 0 6 5.164786 0.000000 2001 +feder 0 5 5.347108 0.000000 2266 +eas 0 5 5.347108 0.000000 2267 +suffer 0 5 5.347108 0.000000 2268 +matur 0 5 5.347108 0.000000 2269 +vertic 0 5 5.347108 0.000000 2270 +focuss 0 5 5.347108 0.000000 2271 +cut 0 4 5.568345 0.000000 2620 +shelf 0 4 5.568345 0.000000 2621 +slice 0 4 5.568345 0.000000 2622 +eickenfal 0 3 5.857933 0.000000 3125 +leverag 0 3 5.857933 0.000000 3153 +heat 0 2 6.263398 0.000000 4113 +glorifi 0 2 6.263398 0.000000 4114 +farm 0 2 6.263398 0.000000 4115 +adequ 0 2 6.263398 0.000000 4116 +horizont 0 2 6.263398 0.000000 4117 +debat 0 1 6.957497 0.000000 6533 +pagefronti 0 1 6.957497 0.000000 6534 +pmoffic 0 1 6.957497 0.000000 6535 +pmcours 0 1 6.957497 0.000000 6536 +descriptionparallel 0 1 6.957497 0.000000 6537 +underscor 0 1 6.957497 0.000000 6538 +erad 0 1 6.957497 0.000000 6539 +competitor 0 1 6.957497 0.000000 6540 +dash 0 1 6.957497 0.000000 6541 +materialscours 0 1 6.957497 0.000000 6542 +formatlectur 0 1 6.957497 0.000000 6543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..91ed9172 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +assign 0 135 1.945910 0.000000 66 +like 0 132 1.945910 0.000000 81 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +question 0 91 2.397895 0.000000 141 +search 0 95 2.397895 0.000000 155 +start 0 83 2.484907 0.000000 173 +control 0 82 2.484907 0.000000 164 +help 0 83 2.484907 0.000000 175 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +exampl 0 77 2.564949 0.000000 195 +homework 0 79 2.564949 0.000000 193 +know 0 80 2.564949 0.000000 198 +refer 0 78 2.564949 0.000000 203 +come 0 78 2.564949 0.000000 202 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +david 0 71 2.639057 0.000000 232 +organ 0 65 2.772589 0.000000 265 +guid 0 63 2.772589 0.000000 267 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +digit 0 52 2.995732 0.000000 348 +case 0 51 2.995732 0.000000 351 +maintain 0 51 2.995732 0.000000 342 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +video 0 44 3.135494 0.000000 405 +tutori 0 39 3.258097 0.000000 437 +small 0 39 3.258097 0.000000 447 +staff 0 36 3.367296 0.000000 490 +procedur 0 36 3.367296 0.000000 488 +common 0 30 3.555348 0.000000 574 +ask 0 28 3.610918 0.000000 597 +great 0 27 3.637586 0.000000 626 +never 0 25 3.737670 0.000000 671 +todai 0 25 3.737670 0.000000 672 +consult 0 24 3.761200 0.000000 687 +wish 0 24 3.761200 0.000000 692 +hierarchi 0 22 3.850148 0.000000 744 +annot 0 21 3.912023 0.000000 775 +path 0 21 3.912023 0.000000 778 +unit 0 21 3.912023 0.000000 779 +newsgroup 0 21 3.912023 0.000000 783 +lot 0 18 4.060443 0.000000 889 +otherwis 0 17 4.110874 0.000000 922 +fortran 0 15 4.248495 0.000000 1027 +thorsten 0 13 4.382027 0.000000 1133 +assembl 0 12 4.465908 0.000000 1207 +pascal 0 12 4.465908 0.000000 1213 +surf 0 11 4.553877 0.000000 1301 +combinatori 0 8 4.875197 0.000000 1629 +brain 0 8 4.875197 0.000000 1638 +interrupt 0 7 5.010635 0.000000 1793 +conot 0 5 5.347108 0.000000 2245 +eickenfal 0 3 5.857933 0.000000 3125 +kimbal 0 3 5.857933 0.000000 3114 +helpif 0 3 5.857933 0.000000 3126 +aproject 0 3 5.857933 0.000000 3142 +mate 0 3 5.857933 0.000000 3127 +cardiff 0 3 5.857933 0.000000 3154 +programsand 0 3 5.857933 0.000000 3111 +marshal 0 2 6.263398 0.000000 4118 +btopic 0 2 6.263398 0.000000 4085 +representationof 0 2 6.263398 0.000000 4119 +toon 0 2 6.263398 0.000000 4120 +pageintroduct 0 1 6.957497 0.000000 6544 +sequentialcircuit 0 1 6.957497 0.000000 6545 +andmicroprogram 0 1 6.957497 0.000000 6546 +theappropri 0 1 6.957497 0.000000 6547 +gethelp 0 1 6.957497 0.000000 6548 +informationcoursemateri 0 1 6.957497 0.000000 6549 +announcementsannounc 0 1 6.957497 0.000000 6550 +onlinean 0 1 6.957497 0.000000 6551 +forpeopl 0 1 6.957497 0.000000 6552 +cclass 0 1 6.957497 0.000000 6553 +learnc 0 1 6.957497 0.000000 6554 +theyahoo 0 1 6.957497 0.000000 6555 +ofmor 0 1 6.957497 0.000000 6556 +inansw 0 1 6.957497 0.000000 6557 +voneicken 0 1 6.957497 0.000000 6558 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..ce771a36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +welcom 0 122 2.079442 0.000000 99 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +grade 0 90 2.397895 0.000000 142 +academ 0 82 2.484907 0.000000 178 +exam 0 86 2.484907 0.000000 169 +upson 0 71 2.639057 0.000000 218 +intellig 0 72 2.639057 0.000000 225 +materi 0 75 2.639057 0.000000 221 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +date 0 51 2.995732 0.000000 344 +announc 0 40 3.258097 0.000000 441 +request 0 26 3.688879 0.000000 635 +altern 0 26 3.688879 0.000000 641 +sometim 0 24 3.761200 0.000000 696 +saturdai 0 7 5.010635 0.000000 1794 +clair 0 4 5.568345 0.000000 2605 +pagesc 0 3 5.857933 0.000000 3133 +pagecsfound 0 2 6.263398 0.000000 4086 +yourgrad 0 2 6.263398 0.000000 4121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..337cfc27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +technolog 0 131 2.079442 0.000000 102 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +resourc 0 81 2.484907 0.000000 172 +stuff 0 87 2.484907 0.000000 171 +upson 0 71 2.639057 0.000000 218 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +overview 0 56 2.890372 0.000000 323 +frequent 0 49 3.044522 0.000000 367 +staff 0 36 3.367296 0.000000 490 +ask 0 28 3.610918 0.000000 597 +edulast 0 17 4.110874 0.000000 927 +pagec 0 15 4.248495 0.000000 1011 +borland 0 14 4.317488 0.000000 1067 +recit 0 9 4.753590 0.000000 1475 +vineet 0 8 4.875197 0.000000 1639 +none 0 7 5.010635 0.000000 1811 +buch 0 5 5.347108 0.000000 2272 +samuel 0 3 5.857933 0.000000 3155 +weber 0 3 5.857933 0.000000 3156 +yaron 0 2 6.263398 0.000000 4122 +minski 0 2 6.263398 0.000000 4123 +remark 0 2 6.263398 0.000000 4124 +techniquescomput 0 1 6.957497 0.000000 6559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..a40de881 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +introduct 0 126 2.079442 0.000000 87 +postscript 0 131 2.079442 0.000000 90 +homework 0 79 2.564949 0.000000 193 +logic 0 71 2.639057 0.000000 230 +guid 0 63 2.772589 0.000000 267 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +tabl 0 51 2.995732 0.000000 346 +brian 0 38 3.295837 0.000000 466 +slide 0 38 3.295837 0.000000 467 +procedur 0 36 3.367296 0.000000 488 +smith 0 20 3.951244 0.000000 820 +recurs 0 13 4.382027 0.000000 1127 +assembl 0 12 4.465908 0.000000 1207 +tour 0 11 4.553877 0.000000 1307 +stack 0 10 4.653960 0.000000 1389 +spec 0 8 4.875197 0.000000 1640 +interrupt 0 7 5.010635 0.000000 1793 +linker 0 3 5.857933 0.000000 3157 +loader 0 1 6.957497 0.000000 6560 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..112e510c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +lectur 0 135 1.945910 0.000000 73 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +april 0 77 2.564949 0.000000 196 +messag 0 76 2.564949 0.000000 212 +tuesdai 0 73 2.639057 0.000000 219 +materi 0 75 2.639057 0.000000 221 +import 0 65 2.772589 0.000000 282 +march 0 61 2.833213 0.000000 295 +februari 0 54 2.944439 0.000000 328 +get 0 46 3.091042 0.000000 380 +review 0 42 3.218876 0.000000 425 +held 0 28 3.610918 0.000000 600 +session 0 26 3.688879 0.000000 643 +thur 0 19 4.007333 0.000000 847 +prelim 0 12 4.465908 0.000000 1201 +rememb 0 12 4.465908 0.000000 1217 +tue 0 11 4.553877 0.000000 1308 +regard 0 11 4.553877 0.000000 1309 +baker 0 7 5.010635 0.000000 1812 +pierc 0 4 5.568345 0.000000 2623 +theworld 0 3 5.857933 0.000000 3158 +codewarrior 0 2 6.263398 0.000000 4125 +frequentlyfor 0 1 6.957497 0.000000 6561 +onsundai 0 1 6.957497 0.000000 6562 +personalmac 0 1 6.957497 0.000000 6563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..a7135f2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,261 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +lectur 1 135 1.945910 1.945910 73 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +architectur 0 139 1.945910 0.000000 77 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +structur 0 106 2.197225 0.000000 105 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +site 0 106 2.197225 0.000000 119 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +exam 0 86 2.484907 0.000000 169 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +server 0 76 2.564949 0.000000 204 +dynam 0 76 2.564949 0.000000 194 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +orient 0 80 2.564949 0.000000 205 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +sourc 0 77 2.564949 0.000000 201 +upson 0 71 2.639057 0.000000 218 +david 0 71 2.639057 0.000000 232 +tuesdai 0 73 2.639057 0.000000 219 +name 0 72 2.639057 0.000000 220 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +thursdai 0 70 2.708050 0.000000 241 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +function 0 62 2.772589 0.000000 275 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +complex 0 64 2.772589 0.000000 269 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +summer 0 56 2.890372 0.000000 311 +unix 0 58 2.890372 0.000000 308 +think 0 57 2.890372 0.000000 314 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +date 0 51 2.995732 0.000000 344 +tabl 0 51 2.995732 0.000000 346 +format 0 48 3.044522 0.000000 356 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +appoint 0 49 3.044522 0.000000 358 +set 0 50 3.044522 0.000000 361 +effect 0 46 3.091042 0.000000 385 +netscap 0 44 3.135494 0.000000 395 +fridai 0 44 3.135494 0.000000 390 +http 0 41 3.218876 0.000000 420 +correct 0 38 3.295837 0.000000 462 +industri 0 38 3.295837 0.000000 464 +credit 0 38 3.295837 0.000000 460 +microsoft 0 38 3.295837 0.000000 468 +tree 0 36 3.367296 0.000000 492 +staff 0 36 3.367296 0.000000 490 +soon 0 36 3.367296 0.000000 494 +word 0 34 3.401197 0.000000 508 +next 0 34 3.401197 0.000000 517 +print 0 34 3.401197 0.000000 503 +manual 0 35 3.401197 0.000000 504 +given 0 32 3.465736 0.000000 538 +storag 0 31 3.496508 0.000000 553 +framework 0 28 3.610918 0.000000 606 +held 0 28 3.610918 0.000000 600 +arrai 0 27 3.637586 0.000000 627 +session 0 26 3.688879 0.000000 643 +enhanc 0 26 3.688879 0.000000 644 +jeff 0 25 3.737670 0.000000 673 +consult 0 24 3.761200 0.000000 687 +other 0 24 3.761200 0.000000 697 +lab 0 24 3.761200 0.000000 698 +thank 0 23 3.806662 0.000000 721 +proof 0 23 3.806662 0.000000 720 +almost 0 22 3.850148 0.000000 742 +inth 0 22 3.850148 0.000000 741 +alloc 0 20 3.951244 0.000000 821 +fine 0 20 3.951244 0.000000 822 +binari 0 20 3.951244 0.000000 823 +prove 0 19 4.007333 0.000000 848 +prerequisit 0 19 4.007333 0.000000 846 +expand 0 17 4.110874 0.000000 928 +regular 0 17 4.110874 0.000000 929 +macintosh 0 17 4.110874 0.000000 920 +intro 0 17 4.110874 0.000000 915 +condit 0 16 4.174387 0.000000 975 +jose 0 16 4.174387 0.000000 976 +charact 0 15 4.248495 0.000000 1028 +piec 0 15 4.248495 0.000000 1020 +recurs 0 13 4.382027 0.000000 1127 +deriv 0 13 4.382027 0.000000 1145 +alan 0 13 4.382027 0.000000 1146 +prelim 0 12 4.465908 0.000000 1201 +iter 0 12 4.465908 0.000000 1206 +grant 0 12 4.465908 0.000000 1216 +onth 0 12 4.465908 0.000000 1218 +loop 0 11 4.553877 0.000000 1310 +chri 0 11 4.553877 0.000000 1311 +rich 0 10 4.653960 0.000000 1396 +princip 0 10 4.653960 0.000000 1397 +sundai 0 10 4.653960 0.000000 1387 +preliminari 0 9 4.753590 0.000000 1480 +notat 0 9 4.753590 0.000000 1489 +strength 0 9 4.753590 0.000000 1494 +plain 0 9 4.753590 0.000000 1495 +equival 0 9 4.753590 0.000000 1496 +intermedi 0 9 4.753590 0.000000 1497 +filter 0 8 4.875197 0.000000 1641 +printer 0 8 4.875197 0.000000 1621 +dictionari 0 8 4.875197 0.000000 1642 +none 0 7 5.010635 0.000000 1811 +noon 0 7 5.010635 0.000000 1804 +troubl 0 6 5.164786 0.000000 2002 +yale 0 6 5.164786 0.000000 2003 +contest 0 5 5.347108 0.000000 2273 +cell 0 5 5.347108 0.000000 2274 +ofprogram 0 4 5.568345 0.000000 2624 +flavor 0 4 5.568345 0.000000 2625 +haskel 0 4 5.568345 0.000000 2618 +cuinfo 0 4 5.568345 0.000000 2626 +foster 0 3 5.857933 0.000000 3159 +tripl 0 3 5.857933 0.000000 3160 +walker 0 3 5.857933 0.000000 3161 +tocomput 0 3 5.857933 0.000000 3162 +ghostview 0 3 5.857933 0.000000 3163 +maker 0 3 5.857933 0.000000 3164 +kwan 0 2 6.263398 0.000000 4126 +stuffit 0 2 6.263398 0.000000 4127 +codewarrior 0 2 6.263398 0.000000 4125 +thesecond 0 2 6.263398 0.000000 4128 +datatyp 0 2 6.263398 0.000000 4129 +csdepart 0 2 6.263398 0.000000 4130 +metrowerk 0 2 6.263398 0.000000 4131 +gofer 0 1 6.957497 0.000000 6564 +macgof 0 1 6.957497 0.000000 6565 +jfoster 0 1 6.957497 0.000000 6566 +hollist 0 1 6.957497 0.000000 6567 +binhqx 0 1 6.957497 0.000000 6568 +dynamicdata 0 1 6.957497 0.000000 6569 +curri 0 1 6.957497 0.000000 6570 +olin 0 1 6.957497 0.000000 6571 +ahal 0 1 6.957497 0.000000 6572 +walkerwednesdai 0 1 6.957497 0.000000 6573 +kaykylesteveericvasantha 0 1 6.957497 0.000000 6574 +danerickaychrisdan 0 1 6.957497 0.000000 6575 +earlyvers 0 1 6.957497 0.000000 6576 +announcetim 0 1 6.957497 0.000000 6577 +theprelim 0 1 6.957497 0.000000 6578 +wereannounc 0 1 6.957497 0.000000 6579 +lastnam 0 1 6.957497 0.000000 6580 +covereveryth 0 1 6.957497 0.000000 6581 +topicsconv 0 1 6.957497 0.000000 6582 +daywhenwherewhomondai 0 1 6.957497 0.000000 6583 +davetuesdai 0 1 6.957497 0.000000 6584 +jeffwednesdai 0 1 6.957497 0.000000 6585 +davethursdai 0 1 6.957497 0.000000 6586 +halfridai 0 1 6.957497 0.000000 6587 +halsaturdai 0 1 6.957497 0.000000 6588 +breview 0 1 6.957497 0.000000 6589 +chrisand 0 1 6.957497 0.000000 6590 +engrd 0 1 6.957497 0.000000 6591 +bothcom 0 1 6.957497 0.000000 6592 +programmingexperi 0 1 6.957497 0.000000 6593 +ofalgorithm 0 1 6.957497 0.000000 6594 +perkin 0 1 6.957497 0.000000 6595 +sectionsdaytimeroominstructortuesdai 0 1 6.957497 0.000000 6596 +perkinstuesdai 0 1 6.957497 0.000000 6597 +perkinswednesdai 0 1 6.957497 0.000000 6598 +walkerthursdai 0 1 6.957497 0.000000 6599 +fosterfridai 0 1 6.957497 0.000000 6600 +ofclass 0 1 6.957497 0.000000 6601 +consultingsundaymondaytuesdaywednesdaythursdayfridai 0 1 6.957497 0.000000 6602 +steveerickylechrisjpkyl 0 1 6.957497 0.000000 6603 +steveerickylechrisjpvasantha 0 1 6.957497 0.000000 6604 +josejosekayjosejpvasantha 0 1 6.957497 0.000000 6605 +josejosekayjosejp 0 1 6.957497 0.000000 6606 +macbinari 0 1 6.957497 0.000000 6607 +parseabl 0 1 6.957497 0.000000 6608 +waspost 0 1 6.957497 0.000000 6609 +foraladdin 0 1 6.957497 0.000000 6610 +armandonunez 0 1 6.957497 0.000000 6611 +anylas 0 1 6.957497 0.000000 6612 +applicationlik 0 1 6.957497 0.000000 6613 +ishaskel 0 1 6.957497 0.000000 6614 +systemsz 0 1 6.957497 0.000000 6615 +ofgof 0 1 6.957497 0.000000 6616 +itavail 0 1 6.957497 0.000000 6617 +enhance_assign 0 1 6.957497 0.000000 6618 +aladdin 0 1 6.957497 0.000000 6619 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..6ac73797 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +section 0 94 2.397895 0.000000 149 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +materi 0 75 2.639057 0.000000 221 +room 0 59 2.833213 0.000000 301 +extens 0 53 2.944439 0.000000 340 +date 0 51 2.995732 0.000000 344 +announc 0 40 3.258097 0.000000 441 +staff 0 36 3.367296 0.000000 490 +demo 0 18 4.060443 0.000000 888 +pagec 0 15 4.248495 0.000000 1011 +happi 0 14 4.317488 0.000000 1079 +emac 0 13 4.382027 0.000000 1143 +prelim 0 12 4.465908 0.000000 1201 +departmentcornel 0 5 5.347108 0.000000 2275 +grader 0 3 5.857933 0.000000 3165 +universityspr 0 2 6.263398 0.000000 4055 +interpretationof 0 1 6.957497 0.000000 6620 +programscomput 0 1 6.957497 0.000000 6621 +macmarlai 0 1 6.957497 0.000000 6622 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..fbb94366 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +implement 0 152 1.791759 0.000000 52 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +advanc 0 99 2.302585 0.000000 130 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +refer 0 78 2.564949 0.000000 203 +orient 0 80 2.564949 0.000000 205 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +tuesdai 0 73 2.639057 0.000000 219 +line 0 75 2.639057 0.000000 231 +java 0 70 2.708050 0.000000 248 +thursdai 0 70 2.708050 0.000000 241 +descript 0 64 2.772589 0.000000 271 +handout 0 64 2.772589 0.000000 263 +abstract 0 62 2.772589 0.000000 276 +collect 0 65 2.772589 0.000000 268 +function 0 62 2.772589 0.000000 275 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +featur 0 46 3.091042 0.000000 386 +mark 0 44 3.135494 0.000000 403 +continu 0 39 3.258097 0.000000 448 +survei 0 35 3.401197 0.000000 513 +kind 0 32 3.465736 0.000000 541 +focu 0 30 3.555348 0.000000 571 +synchron 0 29 3.583519 0.000000 588 +except 0 28 3.610918 0.000000 607 +greg 0 24 3.761200 0.000000 695 +thread 0 23 3.806662 0.000000 722 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +modern 0 16 4.174387 0.000000 966 +linda 0 10 4.653960 0.000000 1394 +admin 0 9 4.753590 0.000000 1476 +evan 0 8 4.875197 0.000000 1633 +dylan 0 8 4.875197 0.000000 1625 +closur 0 8 4.875197 0.000000 1643 +leon 0 8 4.875197 0.000000 1631 +cum 0 8 4.875197 0.000000 1619 +implementationof 0 7 5.010635 0.000000 1813 +garbag 0 6 5.164786 0.000000 1986 +notabl 0 5 5.347108 0.000000 2276 +morrisett 0 5 5.347108 0.000000 2263 +gentl 0 5 5.347108 0.000000 2264 +haskel 0 4 5.568345 0.000000 2618 +polymorph 0 4 5.568345 0.000000 2627 +administrivia 0 3 5.857933 0.000000 3166 +moran 0 3 5.857933 0.000000 3151 +competillo 0 2 6.263398 0.000000 4105 +indexdocument 0 2 6.263398 0.000000 4108 +toolsa 0 2 6.263398 0.000000 4109 +descriptionhandoutsadministriviaweb 0 1 6.957497 0.000000 6623 +ofmodern 0 1 6.957497 0.000000 6624 +connectionsto 0 1 6.957497 0.000000 6625 +pmweb 0 1 6.957497 0.000000 6626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..5d2d0ef8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +postscript 1 131 2.079442 2.079442 90 +final 0 116 2.197225 0.000000 108 +homework 0 79 2.564949 0.000000 193 +practic 0 70 2.708050 0.000000 246 +overview 0 56 2.890372 0.000000 323 +examin 0 42 3.218876 0.000000 424 +bibliographi 0 34 3.401197 0.000000 518 +annot 0 21 3.912023 0.000000 775 +prepar 0 20 3.951244 0.000000 824 +necessari 0 13 4.382027 0.000000 1147 +registr 0 5 5.347108 0.000000 2249 +informationcours 0 3 5.857933 0.000000 3167 +systemspract 0 1 6.957497 0.000000 6627 +takingc 0 1 6.957497 0.000000 6628 +logist 0 1 6.957497 0.000000 6629 +homeworkshomework 0 1 6.957497 0.000000 6630 +amexaminationsmidterm 0 1 6.957497 0.000000 6631 +bibliographiesselect 0 1 6.957497 0.000000 6632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..d1173585 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +problem 0 147 1.945910 0.000000 75 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +report 0 131 2.079442 0.000000 92 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +pleas 0 113 2.197225 0.000000 114 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +need 0 98 2.302585 0.000000 135 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +start 0 83 2.484907 0.000000 173 +solut 0 82 2.484907 0.000000 162 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +homework 0 79 2.564949 0.000000 193 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +know 0 80 2.564949 0.000000 198 +upson 0 71 2.639057 0.000000 218 +solv 0 73 2.639057 0.000000 234 +order 0 69 2.708050 0.000000 249 +import 0 65 2.772589 0.000000 282 +result 0 65 2.772589 0.000000 281 +think 0 57 2.890372 0.000000 314 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +give 0 50 3.044522 0.000000 359 +discuss 0 45 3.135494 0.000000 399 +futur 0 41 3.218876 0.000000 427 +might 0 41 3.218876 0.000000 426 +late 0 40 3.258097 0.000000 439 +must 0 40 3.258097 0.000000 442 +open 0 38 3.295837 0.000000 469 +everi 0 34 3.401197 0.000000 519 +board 0 33 3.433987 0.000000 528 +taken 0 31 3.496508 0.000000 555 +option 0 30 3.555348 0.000000 575 +limit 0 29 3.583519 0.000000 585 +particip 0 29 3.583519 0.000000 589 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +measur 0 28 3.610918 0.000000 609 +session 0 26 3.688879 0.000000 643 +experiment 0 26 3.688879 0.000000 645 +begin 0 23 3.806662 0.000000 716 +minut 0 20 3.951244 0.000000 810 +left 0 19 4.007333 0.000000 851 +sign 0 16 4.174387 0.000000 970 +across 0 16 4.174387 0.000000 974 +contribut 0 15 4.248495 0.000000 1021 +split 0 14 4.317488 0.000000 1078 +consider 0 14 4.317488 0.000000 1076 +shown 0 14 4.317488 0.000000 1080 +thorsten 0 13 4.382027 0.000000 1133 +everyon 0 13 4.382027 0.000000 1148 +difficulti 0 13 4.382027 0.000000 1132 +eicken 0 13 4.382027 0.000000 1134 +outsid 0 12 4.465908 0.000000 1219 +pick 0 9 4.753590 0.000000 1498 +hang 0 9 4.753590 0.000000 1499 +andth 0 9 4.753590 0.000000 1481 +judg 0 8 4.875197 0.000000 1644 +hold 0 8 4.875197 0.000000 1645 +absolut 0 8 4.875197 0.000000 1646 +poster 1 7 5.010635 5.010635 1814 +noon 0 7 5.010635 0.000000 1804 +trade 0 7 5.010635 0.000000 1815 +explain 0 7 5.010635 0.000000 1816 +exactli 0 7 5.010635 0.000000 1817 +plu 0 6 5.164786 0.000000 2004 +willb 0 5 5.347108 0.000000 2277 +remain 0 5 5.347108 0.000000 2278 +gotten 0 4 5.568345 0.000000 2628 +chose 0 4 5.568345 0.000000 2629 +cuc 0 4 5.568345 0.000000 2630 +attack 0 3 5.857933 0.000000 3168 +memberof 0 3 5.857933 0.000000 3169 +off 0 3 5.857933 0.000000 3170 +arriv 0 2 6.263398 0.000000 4132 +subdirectori 0 2 6.263398 0.000000 4133 +thorough 0 2 6.263398 0.000000 4134 +programmingin 0 2 6.263398 0.000000 4135 +pagehigh 0 1 6.957497 0.000000 6633 +eickenspr 0 1 6.957497 0.000000 6634 +sessionthu 0 1 6.957497 0.000000 6635 +tbdpleas 0 1 6.957497 0.000000 6636 +willdetermin 0 1 6.957497 0.000000 6637 +postersess 0 1 6.957497 0.000000 6638 +cindywilliam 0 1 6.957497 0.000000 6639 +ithorizont 0 1 6.957497 0.000000 6640 +corridor 0 1 6.957497 0.000000 6641 +presentyour 0 1 6.957497 0.000000 6642 +asens 0 1 6.957497 0.000000 6643 +contempl 0 1 6.957497 0.000000 6644 +presentationswil 0 1 6.957497 0.000000 6645 +nativespeak 0 1 6.957497 0.000000 6646 +thelongest 0 1 6.957497 0.000000 6647 +tocom 0 1 6.957497 0.000000 6648 +finalreport 0 1 6.957497 0.000000 6649 +aretri 0 1 6.957497 0.000000 6650 +thesolut 0 1 6.957497 0.000000 6651 +youreject 0 1 6.957497 0.000000 6652 +webread 0 1 6.957497 0.000000 6653 +convic 0 1 6.957497 0.000000 6654 +bestsolut 0 1 6.957497 0.000000 6655 +showcas 0 1 6.957497 0.000000 6656 +ampl 0 1 6.957497 0.000000 6657 +goodexplan 0 1 6.957497 0.000000 6658 +whatyou 0 1 6.957497 0.000000 6659 +projectsproject 0 1 6.957497 0.000000 6660 +reportsproject 0 1 6.957497 0.000000 6661 +proposalsiniti 0 1 6.957497 0.000000 6662 +ideascours 0 1 6.957497 0.000000 6663 +materialshomework 0 1 6.957497 0.000000 6664 +pagebefor 0 1 6.957497 0.000000 6665 +introc 0 1 6.957497 0.000000 6666 +casec 0 1 6.957497 0.000000 6667 +technologyc 0 1 6.957497 0.000000 6668 +cachesc 0 1 6.957497 0.000000 6669 +netsc 0 1 6.957497 0.000000 6670 +spc 0 1 6.957497 0.000000 6671 +cyou 0 1 6.957497 0.000000 6672 +emdc 0 1 6.957497 0.000000 6673 +sortingc 0 1 6.957497 0.000000 6674 +spamc 0 1 6.957497 0.000000 6675 +msgpassc 0 1 6.957497 0.000000 6676 +mpic 0 1 6.957497 0.000000 6677 +cachecohc 0 1 6.957497 0.000000 6678 +locksc 0 1 6.957497 0.000000 6679 +threadsc 0 1 6.957497 0.000000 6680 +atmc 0 1 6.957497 0.000000 6681 +netc 0 1 6.957497 0.000000 6682 +scoreboardc 0 1 6.957497 0.000000 6683 +tomasuloc 0 1 6.957497 0.000000 6684 +predc 0 1 6.957497 0.000000 6685 +superscalarc 0 1 6.957497 0.000000 6686 +busesc 0 1 6.957497 0.000000 6687 +pentiummaintain 0 1 6.957497 0.000000 6688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..1092dca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +class 0 199 1.609438 0.000000 37 +base 0 165 1.791759 0.000000 50 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +april 1 77 2.564949 2.564949 196 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +simul 0 66 2.708050 0.000000 255 +januari 0 62 2.772589 0.000000 264 +march 0 61 2.833213 0.000000 295 +februari 0 54 2.944439 0.000000 328 +week 0 52 2.995732 0.000000 343 +set 0 50 3.044522 0.000000 361 +vision 0 41 3.218876 0.000000 430 +continu 0 39 3.258097 0.000000 448 +field 0 37 3.332205 0.000000 482 +staff 0 36 3.367296 0.000000 490 +random 0 34 3.401197 0.000000 511 +transform 0 32 3.465736 0.000000 542 +detect 0 26 3.688879 0.000000 646 +constraint 0 26 3.688879 0.000000 636 +motion 0 24 3.761200 0.000000 699 +flow 0 24 3.761200 0.000000 700 +recognit 0 23 3.806662 0.000000 723 +geometri 0 22 3.850148 0.000000 752 +geometr 0 19 4.007333 0.000000 852 +histori 0 19 4.007333 0.000000 853 +regular 0 17 4.110874 0.000000 929 +estim 0 17 4.110874 0.000000 930 +segment 0 17 4.110874 0.000000 931 +track 0 15 4.248495 0.000000 1029 +guest 0 12 4.465908 0.000000 1220 +calculu 0 12 4.465908 0.000000 1203 +optic 0 12 4.465908 0.000000 1221 +distanc 0 9 4.753590 0.000000 1500 +face 0 9 4.753590 0.000000 1501 +edg 0 8 4.875197 0.000000 1647 +stereo 0 7 5.010635 0.000000 1818 +parametr 0 7 5.010635 0.000000 1819 +ramin 0 7 5.010635 0.000000 1820 +justin 0 7 5.010635 0.000000 1789 +correl 0 5 5.347108 0.000000 2279 +variat 0 5 5.347108 0.000000 2248 +markov 0 5 5.347108 0.000000 2280 +snake 0 5 5.347108 0.000000 2281 +scribe 0 4 5.568345 0.000000 2631 +maximum 0 4 5.568345 0.000000 2632 +hausdorff 0 4 5.568345 0.000000 2633 +cont 0 3 5.857933 0.000000 3171 +likelihood 0 3 5.857933 0.000000 3172 +anneal 0 2 6.263398 0.000000 4136 +zabihteach 0 1 6.957497 0.000000 6689 +millerclass 0 1 6.957497 0.000000 6690 +phillip 0 1 6.957497 0.000000 6691 +suggestionsproblem 0 1 6.957497 0.000000 6692 +mestim 0 1 6.957497 0.000000 6693 +censu 0 1 6.957497 0.000000 6694 +eigenhausdorff 0 1 6.957497 0.000000 6695 +recognitionsect 0 1 6.957497 0.000000 6696 +equationoth 0 1 6.957497 0.000000 6697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..be5e03de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +final 0 116 2.197225 0.000000 108 +site 0 106 2.197225 0.000000 119 +part 0 98 2.302585 0.000000 129 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +contain 0 81 2.484907 0.000000 174 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +environ 0 84 2.484907 0.000000 177 +materi 0 75 2.639057 0.000000 221 +integr 0 67 2.708050 0.000000 245 +descript 0 64 2.772589 0.000000 271 +content 0 59 2.833213 0.000000 302 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +directori 0 45 3.135494 0.000000 396 +natur 0 44 3.135494 0.000000 406 +execut 0 45 3.135494 0.000000 404 +small 0 39 3.258097 0.000000 447 +ofth 0 36 3.367296 0.000000 491 +compon 0 30 3.555348 0.000000 570 +turn 0 29 3.583519 0.000000 586 +variabl 0 23 3.806662 0.000000 715 +annot 0 21 3.912023 0.000000 775 +sure 0 20 3.951244 0.000000 813 +repositori 0 17 4.110874 0.000000 932 +brown 0 16 4.174387 0.000000 977 +speech 0 12 4.465908 0.000000 1222 +tag 0 7 5.010635 0.000000 1821 +corpu 0 5 5.347108 0.000000 2282 +penn 0 3 5.857933 0.000000 3094 +pagesc 0 3 5.857933 0.000000 3133 +brill 0 2 6.263398 0.000000 4137 +treebank 0 2 6.263398 0.000000 4138 +schedulewhat 0 2 6.263398 0.000000 4139 +wordnet 0 1 6.957497 0.000000 6698 +pagecsintroduct 0 1 6.957497 0.000000 6699 +understandingcomput 0 1 6.957497 0.000000 6700 +announcementsher 0 1 6.957497 0.000000 6701 +taggerbrown 0 1 6.957497 0.000000 6702 +withpart 0 1 6.957497 0.000000 6703 +wnsearchdir 0 1 6.957497 0.000000 6704 +dict 0 1 6.957497 0.000000 6705 +iicollect 0 1 6.957497 0.000000 6706 +canus 0 1 6.957497 0.000000 6707 +francisabout 0 1 6.957497 0.000000 6708 +computationallinguist 0 1 6.957497 0.000000 6709 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..862bf1a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +cours 0 273 1.098612 0.000000 15 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +info 0 85 2.484907 0.000000 176 +exam 0 86 2.484907 0.000000 169 +upson 0 71 2.639057 0.000000 218 +tuesdai 0 73 2.639057 0.000000 219 +meet 0 72 2.639057 0.000000 229 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +room 0 59 2.833213 0.000000 301 +cover 0 55 2.944439 0.000000 329 +still 0 50 3.044522 0.000000 362 +close 0 38 3.295837 0.000000 465 +usual 0 28 3.610918 0.000000 608 +lab 0 24 3.761200 0.000000 698 +prelim 0 12 4.465908 0.000000 1201 +therefor 0 7 5.010635 0.000000 1822 +philip 0 6 5.164786 0.000000 2005 +circumst 0 5 5.347108 0.000000 2283 +materialcov 0 2 6.263398 0.000000 4140 +announcementsroom 0 1 6.957497 0.000000 6710 +unforseen 0 1 6.957497 0.000000 6711 +unableto 0 1 6.957497 0.000000 6712 +maclab 0 1 6.957497 0.000000 6713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..a45c72e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +lectur 1 135 1.945910 1.945910 73 +professor 0 137 1.945910 0.000000 76 +find 0 111 2.197225 0.000000 111 +solut 0 82 2.484907 0.000000 162 +dynam 0 76 2.564949 0.000000 194 +explor 0 58 2.890372 0.000000 324 +tree 0 36 3.367296 0.000000 492 +graph 0 30 3.555348 0.000000 576 +theorem 0 21 3.912023 0.000000 786 +matrix 0 17 4.110874 0.000000 933 +closur 0 8 4.875197 0.000000 1643 +karp 0 5 5.347108 0.000000 2284 +union 0 4 5.568345 0.000000 2634 +push 0 4 5.568345 0.000000 2635 +ford 0 4 5.568345 0.000000 2636 +heap 0 3 5.857933 0.000000 3123 +dijkstra 0 3 5.857933 0.000000 3173 +monika 0 2 6.263398 0.000000 4141 +rauch 0 2 6.263398 0.000000 4142 +greedi 0 2 6.263398 0.000000 4143 +edmond 0 2 6.263398 0.000000 4144 +maxflow 0 1 6.957497 0.000000 6714 +matroid 0 1 6.957497 0.000000 6715 +binomi 0 1 6.957497 0.000000 6716 +preflow 0 1 6.957497 0.000000 6717 +henzingeremail 0 1 6.957497 0.000000 6718 +informationhomework 0 1 6.957497 0.000000 6719 +bellman 0 1 6.957497 0.000000 6720 +fibonacci 0 1 6.957497 0.000000 6721 +treap 0 1 6.957497 0.000000 6722 +randomizedsearch 0 1 6.957497 0.000000 6723 +mincut 0 1 6.957497 0.000000 6724 +dinitz 0 1 6.957497 0.000000 6725 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..db2fec5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,213 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +lectur 0 135 1.945910 0.000000 73 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +need 0 98 2.302585 0.000000 135 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +exam 0 86 2.484907 0.000000 169 +chang 0 82 2.484907 0.000000 163 +environ 0 84 2.484907 0.000000 177 +june 0 79 2.564949 0.000000 214 +sourc 0 77 2.564949 0.000000 201 +method 0 80 2.564949 0.000000 213 +exampl 0 77 2.564949 0.000000 195 +upson 0 71 2.639057 0.000000 218 +solv 0 73 2.639057 0.000000 234 +name 0 72 2.639057 0.000000 220 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +syllabu 0 67 2.708050 0.000000 247 +integr 0 67 2.708050 0.000000 245 +order 0 69 2.708050 0.000000 249 +handout 0 64 2.772589 0.000000 263 +function 0 62 2.772589 0.000000 275 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +plan 0 65 2.772589 0.000000 272 +juli 1 60 2.833213 2.833213 305 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +unix 0 58 2.890372 0.000000 308 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +numer 0 49 3.044522 0.000000 369 +appoint 0 49 3.044522 0.000000 358 +set 0 50 3.044522 0.000000 361 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +midterm 0 45 3.135494 0.000000 392 +math 0 44 3.135494 0.000000 402 +linear 0 41 3.218876 0.000000 431 +review 0 42 3.218876 0.000000 425 +late 0 40 3.258097 0.000000 439 +error 0 40 3.258097 0.000000 449 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +least 0 35 3.401197 0.000000 516 +everi 0 34 3.401197 0.000000 519 +return 0 34 3.401197 0.000000 502 +given 0 32 3.465736 0.000000 538 +chapter 0 32 3.465736 0.000000 536 +administr 0 27 3.637586 0.000000 628 +though 0 27 3.637586 0.000000 622 +rule 0 26 3.688879 0.000000 638 +session 0 26 3.688879 0.000000 643 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +valu 0 25 3.737670 0.000000 665 +lab 0 24 3.761200 0.000000 698 +equat 0 23 3.806662 0.000000 724 +begin 0 23 3.806662 0.000000 716 +variabl 0 23 3.806662 0.000000 715 +initi 0 23 3.806662 0.000000 717 +highli 0 23 3.806662 0.000000 725 +brows 0 23 3.806662 0.000000 726 +dai 0 22 3.850148 0.000000 753 +recommend 0 22 3.850148 0.000000 737 +instal 0 22 3.850148 0.000000 754 +viewer 0 21 3.912023 0.000000 787 +minim 0 18 4.060443 0.000000 887 +record 0 18 4.060443 0.000000 890 +account 0 18 4.060443 0.000000 882 +accept 0 18 4.060443 0.000000 879 +stand 0 18 4.060443 0.000000 891 +matrix 0 17 4.110874 0.000000 933 +macintosh 0 17 4.110874 0.000000 920 +adam 0 17 4.110874 0.000000 934 +vector 0 16 4.174387 0.000000 961 +score 0 15 4.248495 0.000000 1017 +purchas 0 15 4.248495 0.000000 1030 +matlab 0 14 4.317488 0.000000 1081 +squar 0 14 4.317488 0.000000 1082 +rank 0 14 4.317488 0.000000 1063 +polynomi 0 14 4.317488 0.000000 1069 +command 0 14 4.317488 0.000000 1083 +charl 0 13 4.382027 0.000000 1149 +composit 0 13 4.382027 0.000000 1150 +outsid 0 12 4.465908 0.000000 1219 +extra 0 11 4.553877 0.000000 1312 +total 0 10 4.653960 0.000000 1398 +matric 0 10 4.653960 0.000000 1399 +length 0 10 4.653960 0.000000 1400 +deadlin 0 9 4.753590 0.000000 1502 +pair 0 9 4.753590 0.000000 1503 +rel 0 9 4.753590 0.000000 1487 +float 0 9 4.753590 0.000000 1504 +partner 0 8 4.875197 0.000000 1648 +calendar 0 8 4.875197 0.000000 1649 +root 0 8 4.875197 0.000000 1650 +on 0 8 4.875197 0.000000 1628 +interpol 0 7 5.010635 0.000000 1823 +newton 0 7 5.010635 0.000000 1824 +elementari 0 7 5.010635 0.000000 1825 +accord 0 7 5.010635 0.000000 1826 +classroom 0 6 5.164786 0.000000 2006 +spline 0 6 5.164786 0.000000 2007 +drop 0 6 5.164786 0.000000 2008 +otherthan 0 6 5.164786 0.000000 2009 +fit 0 5 5.347108 0.000000 2285 +stabil 0 5 5.347108 0.000000 2286 +worst 0 5 5.347108 0.000000 2287 +ignor 0 5 5.347108 0.000000 2288 +registr 0 5 5.347108 0.000000 2249 +niko 0 4 5.568345 0.000000 2637 +backward 0 4 5.568345 0.000000 2638 +alon 0 3 5.857933 0.000000 3139 +euler 0 3 5.857933 0.000000 3174 +pitsiani 0 3 5.857933 0.000000 3175 +rack 0 3 5.857933 0.000000 3176 +duedat 0 3 5.857933 0.000000 3105 +uncompress 0 3 5.857933 0.000000 3177 +scientificcomput 0 2 6.263398 0.000000 4145 +stress 0 2 6.263398 0.000000 4146 +prerequisitesc 0 2 6.263398 0.000000 4058 +loan 0 2 6.263398 0.000000 4147 +renssela 0 2 6.263398 0.000000 4148 +examsther 0 2 6.263398 0.000000 4149 +hermit 0 2 6.263398 0.000000 4150 +multivari 0 2 6.263398 0.000000 4151 +folder 0 2 6.263398 0.000000 4152 +scmv 0 1 6.957497 0.000000 6726 +quadratur 0 1 6.957497 0.000000 6727 +ozan 0 1 6.957497 0.000000 6728 +siblei 0 1 6.957497 0.000000 6729 +martha 0 1 6.957497 0.000000 6730 +cubic 0 1 6.957497 0.000000 6731 +zcat 0 1 6.957497 0.000000 6732 +computationsumm 0 1 6.957497 0.000000 6733 +setsan 0 1 6.957497 0.000000 6734 +andnonlinear 0 1 6.957497 0.000000 6735 +ordinarydifferenti 0 1 6.957497 0.000000 6736 +informationstaff 0 1 6.957497 0.000000 6737 +hafizogullari 0 1 6.957497 0.000000 6738 +lecturesclass 0 1 6.957497 0.000000 6739 +administrationlauri 0 1 6.957497 0.000000 6740 +buck 0 1 6.957497 0.000000 6741 +addressedto 0 1 6.957497 0.000000 6742 +corequisit 0 1 6.957497 0.000000 6743 +materialstext 0 1 6.957497 0.000000 6744 +approachus 0 1 6.957497 0.000000 6745 +eitherth 0 1 6.957497 0.000000 6746 +labsthi 0 1 6.957497 0.000000 6747 +setsther 0 1 6.957497 0.000000 6748 +orfrom 0 1 6.957497 0.000000 6749 +computingproblem 0 1 6.957497 0.000000 6750 +behandl 0 1 6.957497 0.000000 6751 +gradefrom 0 1 6.957497 0.000000 6752 +printyour 0 1 6.957497 0.000000 6753 +firstpag 0 1 6.957497 0.000000 6754 +partnernam 0 1 6.957497 0.000000 6755 +gradingyour 0 1 6.957497 0.000000 6756 +beassign 0 1 6.957497 0.000000 6757 +onyour 0 1 6.957497 0.000000 6758 +vandermond 0 1 6.957497 0.000000 6759 +piecewis 0 1 6.957497 0.000000 6760 +cote 0 1 6.957497 0.000000 6761 +choleski 0 1 6.957497 0.000000 6762 +rung 0 1 6.957497 0.000000 6763 +kutta 0 1 6.957497 0.000000 6764 +computingat 0 1 6.957497 0.000000 6765 +rennselaerhal 0 1 6.957497 0.000000 6766 +untar 0 1 6.957497 0.000000 6767 +randperm 0 1 6.957497 0.000000 6768 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..894a62a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +architectur 0 139 1.945910 0.000000 77 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +schedul 0 119 2.079442 0.000000 85 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +advanc 0 99 2.302585 0.000000 130 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +requir 0 81 2.484907 0.000000 167 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +second 0 81 2.484907 0.000000 166 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +materi 0 75 2.639057 0.000000 221 +logic 0 71 2.639057 0.000000 230 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +tuesdai 0 73 2.639057 0.000000 219 +upson 0 71 2.639057 0.000000 218 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +descript 0 64 2.772589 0.000000 271 +polici 0 64 2.772589 0.000000 279 +virtual 0 62 2.772589 0.000000 285 +organ 0 65 2.772589 0.000000 265 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +summer 0 56 2.890372 0.000000 311 +overview 0 56 2.890372 0.000000 323 +variou 0 56 2.890372 0.000000 317 +cover 0 55 2.944439 0.000000 329 +particular 0 51 2.995732 0.000000 352 +week 0 52 2.995732 0.000000 343 +maintain 0 51 2.995732 0.000000 342 +archiv 0 49 3.044522 0.000000 364 +understand 0 47 3.091042 0.000000 384 +discuss 0 45 3.135494 0.000000 399 +textbook 0 44 3.135494 0.000000 397 +fast 0 42 3.218876 0.000000 429 +form 0 39 3.258097 0.000000 443 +submit 0 39 3.258097 0.000000 440 +close 0 38 3.295837 0.000000 465 +purpos 0 37 3.332205 0.000000 481 +hand 0 37 3.332205 0.000000 475 +next 0 34 3.401197 0.000000 517 +concurr 0 34 3.401197 0.000000 501 +collabor 0 32 3.465736 0.000000 543 +secur 0 30 3.555348 0.000000 577 +depend 0 29 3.583519 0.000000 583 +synchron 0 29 3.583519 0.000000 588 +multiprocessor 0 28 3.610918 0.000000 605 +subject 0 26 3.688879 0.000000 647 +detect 0 26 3.688879 0.000000 646 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +programminglanguag 0 21 3.912023 0.000000 782 +kernel 0 20 3.951244 0.000000 825 +assum 0 19 4.007333 0.000000 845 +feedback 0 19 4.007333 0.000000 854 +outlin 0 17 4.110874 0.000000 914 +protect 0 17 4.110874 0.000000 935 +segment 0 17 4.110874 0.000000 931 +weekli 0 17 4.110874 0.000000 919 +micro 0 15 4.248495 0.000000 1031 +quizz 0 13 4.382027 0.000000 1151 +carri 0 13 4.382027 0.000000 1152 +assembl 0 12 4.465908 0.000000 1207 +statement 0 11 4.553877 0.000000 1313 +worth 0 11 4.553877 0.000000 1294 +evolut 0 11 4.553877 0.000000 1314 +multithread 0 11 4.553877 0.000000 1315 +peter 0 11 4.553877 0.000000 1316 +operatingsystem 0 10 4.653960 0.000000 1401 +princip 0 10 4.653960 0.000000 1397 +familiar 0 9 4.753590 0.000000 1485 +attent 0 8 4.875197 0.000000 1651 +remind 0 7 5.010635 0.000000 1799 +prevent 0 7 5.010635 0.000000 1827 +surpris 0 7 5.010635 0.000000 1828 +multiprogram 0 6 5.164786 0.000000 2010 +pace 0 6 5.164786 0.000000 2011 +ensur 0 6 5.164786 0.000000 2012 +silberschatz 0 6 5.164786 0.000000 1978 +indupraka 0 4 5.568345 0.000000 2639 +kodukula 0 4 5.568345 0.000000 2640 +deadlock 0 4 5.568345 0.000000 2641 +permiss 0 4 5.568345 0.000000 2642 +usedto 0 4 5.568345 0.000000 2643 +abraham 0 4 5.568345 0.000000 2644 +roughli 0 3 5.857933 0.000000 3097 +prereq 0 3 5.857933 0.000000 3178 +theimpact 0 3 5.857933 0.000000 3179 +audienc 0 3 5.857933 0.000000 3180 +serverless 0 3 5.857933 0.000000 3181 +todetermin 0 3 5.857933 0.000000 3182 +nawaaz 0 2 6.263398 0.000000 4153 +ahm 0 2 6.263398 0.000000 4154 +praka 0 2 6.263398 0.000000 4155 +anintroduct 0 2 6.263398 0.000000 4156 +emphasison 0 2 6.263398 0.000000 4157 +memorymanag 0 2 6.263398 0.000000 4158 +thetradit 0 2 6.263398 0.000000 4159 +galvin 0 2 6.263398 0.000000 4160 +motd 0 1 6.957497 0.000000 6769 +lldiscuss 0 1 6.957497 0.000000 6770 +prerequsit 0 1 6.957497 0.000000 6771 +processsynchron 0 1 6.957497 0.000000 6772 +requiringconst 0 1 6.957497 0.000000 6773 +prerequsitescomplet 0 1 6.957497 0.000000 6774 +inparticular 0 1 6.957497 0.000000 6775 +theintroductori 0 1 6.957497 0.000000 6776 +thatwil 0 1 6.957497 0.000000 6777 +outlineth 0 1 6.957497 0.000000 6778 +theorder 0 1 6.957497 0.000000 6779 +mutualexclus 0 1 6.957497 0.000000 6780 +timepermit 0 1 6.957497 0.000000 6781 +textbooksth 0 1 6.957497 0.000000 6782 +conceptsbook 0 1 6.957497 0.000000 6783 +distributeclass 0 1 6.957497 0.000000 6784 +noteswil 0 1 6.957497 0.000000 6785 +pageat 0 1 6.957497 0.000000 6786 +mondaythru 0 1 6.957497 0.000000 6787 +thesewil 0 1 6.957497 0.000000 6788 +thursdayat 0 1 6.957497 0.000000 6789 +gradingeach 0 1 6.957497 0.000000 6790 +weightag 0 1 6.957497 0.000000 6791 +combinedweightag 0 1 6.957497 0.000000 6792 +twomidterm 0 1 6.957497 0.000000 6793 +collaborationat 0 1 6.957497 0.000000 6794 +eachhomework 0 1 6.957497 0.000000 6795 +thehomework 0 1 6.957497 0.000000 6796 +closednot 0 1 6.957497 0.000000 6797 +induprakaskodukula 0 1 6.957497 0.000000 6798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..2867f5b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +hour 0 165 1.791759 0.000000 46 +lectur 1 135 1.945910 1.945910 73 +note 0 142 1.945910 0.000000 67 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +advanc 0 99 2.302585 0.000000 130 +octob 0 89 2.397895 0.000000 156 +novemb 0 81 2.484907 0.000000 179 +solut 0 82 2.484907 0.000000 162 +start 0 83 2.484907 0.000000 173 +decemb 0 80 2.564949 0.000000 215 +homework 0 79 2.564949 0.000000 193 +upson 0 71 2.639057 0.000000 218 +septemb 0 65 2.772589 0.000000 274 +appoint 0 49 3.044522 0.000000 358 +get 0 46 3.091042 0.000000 380 +midterm 0 45 3.135494 0.000000 392 +meta 0 9 4.753590 0.000000 1505 +scribe 0 4 5.568345 0.000000 2631 +csc 0 3 5.857933 0.000000 3183 +neal 0 3 5.857933 0.000000 3184 +languagesfal 0 2 6.263398 0.000000 4161 +glew 0 2 6.263398 0.000000 4162 +informationhandout 0 2 6.263398 0.000000 4163 +henzingerupson 0 1 6.957497 0.000000 6799 +glewupson 0 1 6.957497 0.000000 6800 +handoutshandout 0 1 6.957497 0.000000 6801 +mlhandout 0 1 6.957497 0.000000 6802 +lambdahomeworkshomework 0 1 6.957497 0.000000 6803 +grieshomework 0 1 6.957497 0.000000 6804 +notesraw 0 1 6.957497 0.000000 6805 +noteslectur 0 1 6.957497 0.000000 6806 +mllectur 0 1 6.957497 0.000000 6807 +grieslectur 0 1 6.957497 0.000000 6808 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..69d84d47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +fall 1 181 1.609438 1.609438 40 +hour 0 165 1.791759 0.000000 46 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +advanc 0 99 2.302585 0.000000 130 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +mondai 0 77 2.564949 0.000000 206 +upson 0 71 2.639057 0.000000 218 +prof 0 64 2.772589 0.000000 273 +faculti 0 56 2.890372 0.000000 325 +suggest 0 53 2.944439 0.000000 331 +robert 0 30 3.555348 0.000000 567 +thur 0 19 4.007333 0.000000 847 +classic 0 14 4.317488 0.000000 1084 +nuprl 0 10 4.653960 0.000000 1402 +kumar 0 9 4.753590 0.000000 1506 +ravi 0 3 5.857933 0.000000 3185 +constabl 0 3 5.857933 0.000000 3186 +pavel 0 2 6.263398 0.000000 4164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..d95519e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +spring 0 131 2.079442 0.000000 88 +member 0 84 2.484907 0.000000 165 +addit 1 74 2.639057 2.639057 228 +maintain 0 51 2.995732 0.000000 342 +consult 0 24 3.761200 0.000000 687 +coursesc 0 4 5.568345 0.000000 2692 +individualfaculti 0 1 6.957497 0.000000 7418 +contactgloria 0 1 6.957497 0.000000 7419 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..4a032a10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +analysi 0 124 2.079442 0.000000 98 +welcom 0 122 2.079442 0.000000 99 +instructor 0 108 2.197225 0.000000 107 +send 0 114 2.197225 0.000000 109 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +homework 0 79 2.564949 0.000000 193 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +new 0 64 2.772589 0.000000 262 +suggest 0 53 2.944439 0.000000 331 +announc 0 40 3.258097 0.000000 441 +tutori 0 39 3.258097 0.000000 437 +prepar 0 20 3.951244 0.000000 824 +yang 0 8 4.875197 0.000000 1652 +vicki 0 3 5.857933 0.000000 3187 +almstrum 1 2 6.263398 6.263398 4165 +linyuan 0 1 6.957497 0.000000 6809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..97da01e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +tool 0 117 2.079442 0.000000 93 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +schedul 0 119 2.079442 0.000000 85 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +instructor 0 108 2.197225 0.000000 107 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +grade 0 90 2.397895 0.000000 142 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +help 0 83 2.484907 0.000000 175 +april 0 77 2.564949 0.000000 196 +method 0 80 2.564949 0.000000 213 +exampl 0 77 2.564949 0.000000 195 +good 0 77 2.564949 0.000000 200 +logic 0 71 2.639057 0.000000 230 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +test 0 66 2.708050 0.000000 252 +foundat 0 62 2.772589 0.000000 286 +taylor 0 63 2.772589 0.000000 287 +interact 0 62 2.772589 0.000000 270 +copi 0 63 2.772589 0.000000 284 +type 0 61 2.833213 0.000000 296 +reason 0 57 2.890372 0.000000 318 +approach 0 48 3.044522 0.000000 366 +numer 0 49 3.044522 0.000000 369 +algebra 0 45 3.135494 0.000000 394 +mechan 0 43 3.178054 0.000000 416 +examin 0 42 3.218876 0.000000 424 +continu 0 39 3.258097 0.000000 448 +formal 1 37 3.332205 3.332205 478 +either 0 35 3.401197 0.000000 506 +specifi 0 30 3.555348 0.000000 568 +hope 0 28 3.610918 0.000000 610 +higher 0 24 3.761200 0.000000 690 +consult 0 24 3.761200 0.000000 687 +equat 0 23 3.806662 0.000000 724 +tent 0 22 3.850148 0.000000 739 +moor 0 17 4.110874 0.000000 936 +upon 0 16 4.174387 0.000000 978 +choos 0 16 4.174387 0.000000 964 +choic 0 16 4.174387 0.000000 979 +squar 0 14 4.317488 0.000000 1082 +recurs 0 13 4.382027 0.000000 1127 +guest 0 12 4.465908 0.000000 1220 +primit 0 11 4.553877 0.000000 1317 +arithmet 0 10 4.653960 0.000000 1388 +nuprl 0 10 4.653960 0.000000 1402 +pair 0 9 4.753590 0.000000 1503 +russel 0 9 4.753590 0.000000 1507 +prover 0 8 4.875197 0.000000 1653 +root 0 8 4.875197 0.000000 1650 +chief 0 7 5.010635 0.000000 1829 +boyer 0 6 5.164786 0.000000 2013 +freeli 0 6 5.164786 0.000000 2014 +backup 0 4 5.568345 0.000000 2645 +rick 0 4 5.568345 0.000000 2646 +lego 0 3 5.857933 0.000000 3188 +oral 0 3 5.857933 0.000000 3189 +samuel 0 3 5.857933 0.000000 3155 +sawada 0 3 5.857933 0.000000 3190 +otter 0 2 6.263398 0.000000 4166 +ofmathemat 0 2 6.263398 0.000000 4167 +nelson 0 2 6.263398 0.000000 4168 +ortool 0 2 6.263398 0.000000 4169 +bowen 0 2 6.263398 0.000000 4170 +guyer 0 2 6.263398 0.000000 4171 +blurb 0 1 6.957497 0.000000 6810 +theobject 0 1 6.957497 0.000000 6811 +formalizationof 0 1 6.957497 0.000000 6812 +creationof 0 1 6.957497 0.000000 6813 +systemsfor 0 1 6.957497 0.000000 6814 +formalmethod 0 1 6.957497 0.000000 6815 +suchsystem 0 1 6.957497 0.000000 6816 +imp 0 1 6.957497 0.000000 6817 +mizar 0 1 6.957497 0.000000 6818 +quaif 0 1 6.957497 0.000000 6819 +coqstud 0 1 6.957497 0.000000 6820 +aboutthes 0 1 6.957497 0.000000 6821 +projecthtml 0 1 6.957497 0.000000 6822 +theqe 0 1 6.957497 0.000000 6823 +manifestoplain 0 1 6.957497 0.000000 6824 +qedmanifestobowen 0 1 6.957497 0.000000 6825 +localform 0 1 6.957497 0.000000 6826 +tannei 0 1 6.957497 0.000000 6827 +trevor 0 1 6.957497 0.000000 6828 +hick 0 1 6.957497 0.000000 6829 +ruben 0 1 6.957497 0.000000 6830 +gamboa 0 1 6.957497 0.000000 6831 +circal 0 1 6.957497 0.000000 6832 +turpin 0 1 6.957497 0.000000 6833 +galoi 0 1 6.957497 0.000000 6834 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..368497f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +note 0 142 1.945910 0.000000 67 +analysi 0 124 2.079442 0.000000 98 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +techniqu 0 99 2.302585 0.000000 138 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +exam 0 86 2.484907 0.000000 169 +orient 0 80 2.564949 0.000000 205 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +differ 0 66 2.708050 0.000000 253 +written 0 63 2.772589 0.000000 278 +major 0 56 2.890372 0.000000 315 +variou 0 56 2.890372 0.000000 317 +space 0 57 2.890372 0.000000 310 +instruct 0 53 2.944439 0.000000 332 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +pointer 0 48 3.044522 0.000000 368 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +edit 0 42 3.218876 0.000000 418 +review 0 42 3.218876 0.000000 425 +cach 0 41 3.218876 0.000000 432 +compani 0 41 3.218876 0.000000 423 +cost 0 37 3.332205 0.000000 480 +tech 0 35 3.401197 0.000000 515 +bibliographi 0 34 3.401197 0.000000 518 +product 0 33 3.433987 0.000000 527 +compon 0 30 3.555348 0.000000 570 +focus 0 29 3.583519 0.000000 584 +particip 0 29 3.583519 0.000000 589 +limit 0 29 3.583519 0.000000 585 +administr 0 27 3.637586 0.000000 628 +static 0 27 3.637586 0.000000 619 +compar 0 26 3.688879 0.000000 648 +altern 0 26 3.688879 0.000000 641 +aspect 0 25 3.737670 0.000000 663 +input 0 23 3.806662 0.000000 727 +emphasi 0 22 3.850148 0.000000 755 +recommend 0 22 3.850148 0.000000 737 +disk 0 22 3.850148 0.000000 747 +output 0 21 3.912023 0.000000 788 +watch 0 21 3.912023 0.000000 789 +predict 0 19 4.007333 0.000000 855 +appropri 0 18 4.060443 0.000000 883 +interconnect 0 17 4.110874 0.000000 937 +attempt 0 17 4.110874 0.000000 917 +intro 0 17 4.110874 0.000000 915 +modern 0 16 4.174387 0.000000 966 +choic 0 16 4.174387 0.000000 979 +choos 0 16 4.174387 0.000000 964 +vector 0 16 4.174387 0.000000 961 +branch 0 11 4.553877 0.000000 1318 +errata 0 10 4.653960 0.000000 1403 +pair 0 9 4.753590 0.000000 1503 +significantli 0 9 4.753590 0.000000 1508 +admin 0 9 4.753590 0.000000 1476 +quantit 0 8 4.875197 0.000000 1654 +pipelin 0 7 5.010635 0.000000 1830 +metric 0 7 5.010635 0.000000 1831 +subsystem 0 6 5.164786 0.000000 2015 +hennessi 0 5 5.347108 0.000000 2289 +computerarchitectur 0 5 5.347108 0.000000 2290 +reveal 0 4 5.568345 0.000000 2647 +queu 0 4 5.568345 0.000000 2648 +buss 0 4 5.568345 0.000000 2649 +hazard 0 3 5.857933 0.000000 3191 +evaluationof 0 3 5.857933 0.000000 3192 +tertiari 0 3 5.857933 0.000000 3193 +mpp 0 3 5.857933 0.000000 3194 +insystem 0 2 6.263398 0.000000 4172 +dram 0 2 6.263398 0.000000 4173 +stedit 0 1 6.957497 0.000000 6835 +predictionch 0 1 6.957497 0.000000 6836 +revieww 0 1 6.957497 0.000000 6837 +architecturethi 0 1 6.957497 0.000000 6838 +benchmarksto 0 1 6.957497 0.000000 6839 +highperform 0 1 6.957497 0.000000 6840 +memoryhierarchi 0 1 6.957497 0.000000 6841 +studentswil 0 1 6.957497 0.000000 6842 +undertak 0 1 6.957497 0.000000 6843 +oftheir 0 1 6.957497 0.000000 6844 +informationuniqu 0 1 6.957497 0.000000 6845 +mikedahlinoffic 0 1 6.957497 0.000000 6846 +tbdtaoffic 0 1 6.957497 0.000000 6847 +tbdreadingstextbook 0 1 6.957497 0.000000 6848 +patteson 0 1 6.957497 0.000000 6849 +sheetfor 0 1 6.957497 0.000000 6850 +pattersonin 0 1 6.957497 0.000000 6851 +currentcomput 0 1 6.957497 0.000000 6852 +readinglist 0 1 6.957497 0.000000 6853 +scheduleweekdatetopicreadingduejan 0 1 6.957497 0.000000 6854 +perf 0 1 6.957497 0.000000 6855 +amdahl 0 1 6.957497 0.000000 6856 +trendsch 0 1 6.957497 0.000000 6857 +isa 0 1 6.957497 0.000000 6858 +mlkholidayf 0 1 6.957497 0.000000 6859 +proposalfeb 0 1 6.957497 0.000000 6860 +scoreboard 0 1 6.957497 0.000000 6861 +tomasulu 0 1 6.957497 0.000000 6862 +speculationch 0 1 6.957497 0.000000 6863 +processorsch 0 1 6.957497 0.000000 6864 +dfeb 0 1 6.957497 0.000000 6865 +hierarchych 0 1 6.957497 0.000000 6866 +surveyfeb 0 1 6.957497 0.000000 6867 +banksf 0 1 6.957497 0.000000 6868 +breakm 0 1 6.957497 0.000000 6869 +breakmar 0 1 6.957497 0.000000 6870 +raidch 0 1 6.957497 0.000000 6871 +networksf 0 1 6.957497 0.000000 6872 +networksch 0 1 6.957497 0.000000 6873 +checkpointapr 0 1 6.957497 0.000000 6874 +architecturesf 0 1 6.957497 0.000000 6875 +mppsch 0 1 6.957497 0.000000 6876 +preseantationsm 0 1 6.957497 0.000000 6877 +presentationsfri 0 1 6.957497 0.000000 6878 +classesm 0 1 6.957497 0.000000 6879 +reportaddit 0 1 6.957497 0.000000 6880 +resourcescours 0 1 6.957497 0.000000 6881 +reportsyahoo 0 1 6.957497 0.000000 6882 +businessand 0 1 6.957497 0.000000 6883 +economi 0 1 6.957497 0.000000 6884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..3cb07ca6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +world 0 115 2.197225 0.000000 126 +final 0 116 2.197225 0.000000 108 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +resourc 0 81 2.484907 0.000000 172 +state 0 76 2.564949 0.000000 207 +refer 0 78 2.564949 0.000000 203 +solv 0 73 2.639057 0.000000 234 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +organ 0 65 2.772589 0.000000 265 +talk 0 53 2.944439 0.000000 336 +protocol 0 45 3.135494 0.000000 407 +must 0 40 3.258097 0.000000 442 +purpos 0 37 3.332205 0.000000 481 +secur 0 30 3.555348 0.000000 577 +concern 0 25 3.737670 0.000000 666 +alloc 0 20 3.951244 0.000000 821 +longer 0 20 3.951244 0.000000 816 +verif 0 20 3.951244 0.000000 826 +less 0 18 4.060443 0.000000 892 +context 0 13 4.382027 0.000000 1153 +evolv 0 12 4.465908 0.000000 1223 +operatingsystem 0 10 4.653960 0.000000 1401 +tradit 0 10 4.653960 0.000000 1404 +guidelin 0 7 5.010635 0.000000 1832 +matur 0 5 5.347108 0.000000 2269 +hypothesi 0 4 5.568345 0.000000 2650 +behind 0 4 5.568345 0.000000 2610 +bear 0 4 5.568345 0.000000 2651 +explos 0 3 5.857933 0.000000 3138 +thetradit 0 2 6.263398 0.000000 4159 +interprocess 0 2 6.263398 0.000000 4174 +systemsuniqu 0 1 6.957497 0.000000 6885 +resultedin 0 1 6.957497 0.000000 6886 +contextof 0 1 6.957497 0.000000 6887 +understandingof 0 1 6.957497 0.000000 6888 +addressproblem 0 1 6.957497 0.000000 6889 +theissu 0 1 6.957497 0.000000 6890 +addressedin 0 1 6.957497 0.000000 6891 +occasionallyread 0 1 6.957497 0.000000 6892 +understandingcurr 0 1 6.957497 0.000000 6893 +reportspoint 0 1 6.957497 0.000000 6894 +rosterhandout 0 1 6.957497 0.000000 6895 +sslprotocol 0 1 6.957497 0.000000 6896 +proofsketch 0 1 6.957497 0.000000 6897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..aa67c4c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +austin 0 168 1.791759 0.000000 63 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +assign 0 135 1.945910 0.000000 66 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +final 0 116 2.197225 0.000000 108 +check 0 115 2.197225 0.000000 118 +place 0 106 2.197225 0.000000 124 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +real 0 93 2.397895 0.000000 144 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +start 0 83 2.484907 0.000000 173 +info 0 85 2.484907 0.000000 176 +academ 0 82 2.484907 0.000000 178 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +meet 0 72 2.639057 0.000000 229 +appli 0 71 2.639057 0.000000 226 +free 0 73 2.639057 0.000000 224 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +handout 1 64 2.772589 2.772589 263 +wednesdai 0 64 2.772589 0.000000 261 +import 0 65 2.772589 0.000000 282 +organ 0 65 2.772589 0.000000 265 +new 0 64 2.772589 0.000000 262 +visit 0 63 2.772589 0.000000 288 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +polici 0 64 2.772589 0.000000 279 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +locat 0 59 2.833213 0.000000 303 +automat 0 61 2.833213 0.000000 306 +reason 0 57 2.890372 0.000000 318 +overview 0 56 2.890372 0.000000 323 +instruct 0 53 2.944439 0.000000 332 +date 0 51 2.995732 0.000000 344 +maintain 0 51 2.995732 0.000000 342 +electron 0 47 3.091042 0.000000 379 +discuss 0 45 3.135494 0.000000 399 +fridai 0 44 3.135494 0.000000 390 +might 0 41 3.218876 0.000000 426 +late 0 40 3.258097 0.000000 439 +announc 0 40 3.258097 0.000000 441 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +slide 0 38 3.295837 0.000000 467 +correct 0 38 3.295837 0.000000 462 +procedur 0 36 3.367296 0.000000 488 +manual 0 35 3.401197 0.000000 504 +least 0 35 3.401197 0.000000 516 +post 0 35 3.401197 0.000000 505 +singl 0 34 3.401197 0.000000 510 +next 0 34 3.401197 0.000000 517 +jame 0 35 3.401197 0.000000 507 +go 0 33 3.433987 0.000000 529 +titl 0 31 3.496508 0.000000 556 +turn 0 29 3.583519 0.000000 586 +pass 0 28 3.610918 0.000000 611 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +utc 0 27 3.637586 0.000000 629 +session 0 26 3.688879 0.000000 643 +proc 0 26 3.688879 0.000000 649 +valu 0 25 3.737670 0.000000 665 +toward 0 25 3.737670 0.000000 668 +begin 0 23 3.806662 0.000000 716 +thank 0 23 3.806662 0.000000 721 +disk 0 22 3.850148 0.000000 747 +period 0 22 3.850148 0.000000 743 +hierarchi 0 22 3.850148 0.000000 744 +output 0 21 3.912023 0.000000 788 +newsgroup 0 21 3.912023 0.000000 783 +thur 0 19 4.007333 0.000000 847 +prerequisit 0 19 4.007333 0.000000 846 +attend 0 18 4.060443 0.000000 893 +offici 0 18 4.060443 0.000000 894 +regist 0 17 4.110874 0.000000 938 +earli 0 16 4.174387 0.000000 968 +zhang 0 16 4.174387 0.000000 980 +letter 0 16 4.174387 0.000000 981 +ascii 0 15 4.248495 0.000000 1032 +charact 0 15 4.248495 0.000000 1028 +conduct 0 14 4.317488 0.000000 1065 +front 0 13 4.382027 0.000000 1154 +holidai 0 12 4.465908 0.000000 1224 +remov 0 12 4.465908 0.000000 1225 +pascal 0 12 4.465908 0.000000 1213 +tue 0 11 4.553877 0.000000 1308 +chri 0 11 4.553877 0.000000 1311 +extra 0 11 4.553877 0.000000 1312 +night 0 11 4.553877 0.000000 1319 +penalti 0 10 4.653960 0.000000 1405 +stack 0 10 4.653960 0.000000 1389 +cheat 0 10 4.653960 0.000000 1395 +pick 0 9 4.753590 0.000000 1498 +deadlin 0 9 4.753590 0.000000 1502 +calendar 0 8 4.875197 0.000000 1649 +fail 0 8 4.875197 0.000000 1655 +spec 0 8 4.875197 0.000000 1640 +noon 0 7 5.010635 0.000000 1804 +bit 0 7 5.010635 0.000000 1833 +paramet 0 7 5.010635 0.000000 1796 +saturdai 0 7 5.010635 0.000000 1794 +drop 0 6 5.164786 0.000000 2008 +risc 0 6 5.164786 0.000000 2016 +door 0 5 5.347108 0.000000 2291 +mac 0 5 5.347108 0.000000 2292 +circumst 0 5 5.347108 0.000000 2283 +registr 0 5 5.347108 0.000000 2249 +glanc 0 4 5.568345 0.000000 2652 +chart 0 4 5.568345 0.000000 2653 +turnin 0 4 5.568345 0.000000 2654 +labor 0 3 5.857933 0.000000 3195 +obsolet 0 3 5.857933 0.000000 3196 +yurkanan 0 2 6.263398 0.000000 4175 +dragon 0 2 6.263398 0.000000 4176 +yoonsuck 0 2 6.263398 0.000000 4177 +choe 0 2 6.263398 0.000000 4178 +yschoe 0 2 6.263398 0.000000 4179 +typo 0 2 6.263398 0.000000 4180 +folder 0 2 6.263398 0.000000 4152 +constantli 0 2 6.263398 0.000000 4181 +edmondson 0 2 6.263398 0.000000 4182 +gzhang 0 2 6.263398 0.000000 4183 +rare 0 2 6.263398 0.000000 4184 +thanksgiv 0 2 6.263398 0.000000 4185 +appeal 0 2 6.263398 0.000000 4186 +fantasm 0 1 6.957497 0.000000 6898 +p_global 0 1 6.957497 0.000000 6899 +bonu 0 1 6.957497 0.000000 6900 +electronc 0 1 6.957497 0.000000 6901 +macsbug 0 1 6.957497 0.000000 6902 +electoron 0 1 6.957497 0.000000 6903 +edum 0 1 6.957497 0.000000 6904 +withdraw 0 1 6.957497 0.000000 6905 +rightmost 0 1 6.957497 0.000000 6906 +procudur 0 1 6.957497 0.000000 6907 +powermac 0 1 6.957497 0.000000 6908 +quadra 0 1 6.957497 0.000000 6909 +onmon 0 1 6.957497 0.000000 6910 +cynthia 0 1 6.957497 0.000000 6911 +deepa 0 1 6.957497 0.000000 6912 +ramani 0 1 6.957497 0.000000 6913 +dparam 0 1 6.957497 0.000000 6914 +eduw 0 1 6.957497 0.000000 6915 +eduf 0 1 6.957497 0.000000 6916 +refund 0 1 6.957497 0.000000 6917 +extenu 0 1 6.957497 0.000000 6918 +boxin 0 1 6.957497 0.000000 6919 +endia 0 1 6.957497 0.000000 6920 +func 0 1 6.957497 0.000000 6921 +practiv 0 1 6.957497 0.000000 6922 +questionair 0 1 6.957497 0.000000 6923 +brett 0 1 6.957497 0.000000 6924 +subroutine_fil 0 1 6.957497 0.000000 6925 +exception_fil 0 1 6.957497 0.000000 6926 +avali 0 1 6.957497 0.000000 6927 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..9752b99f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +relat 0 139 1.945910 0.000000 68 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +studi 0 120 2.079442 0.000000 91 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +structur 0 106 2.197225 0.000000 105 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +technic 0 100 2.302585 0.000000 140 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +question 0 91 2.397895 0.000000 141 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +section 0 94 2.397895 0.000000 149 +comment 0 93 2.397895 0.000000 146 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +activ 0 84 2.484907 0.000000 182 +good 0 77 2.564949 0.000000 200 +decemb 0 80 2.564949 0.000000 215 +come 0 78 2.564949 0.000000 202 +mondai 0 77 2.564949 0.000000 206 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +new 0 64 2.772589 0.000000 262 +import 0 65 2.772589 0.000000 282 +guid 0 63 2.772589 0.000000 267 +complex 0 64 2.772589 0.000000 269 +descript 0 64 2.772589 0.000000 271 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +suggest 0 53 2.944439 0.000000 331 +week 0 52 2.995732 0.000000 343 +maintain 0 51 2.995732 0.000000 342 +right 0 48 3.044522 0.000000 363 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +discuss 0 45 3.135494 0.000000 399 +midterm 0 45 3.135494 0.000000 392 +review 0 42 3.218876 0.000000 425 +howev 0 41 3.218876 0.000000 422 +might 0 41 3.218876 0.000000 426 +announc 0 40 3.258097 0.000000 441 +tutori 0 39 3.258097 0.000000 437 +programm 0 39 3.258097 0.000000 445 +slide 0 38 3.295837 0.000000 467 +feel 0 37 3.332205 0.000000 483 +soon 0 36 3.367296 0.000000 494 +download 0 36 3.367296 0.000000 489 +post 0 35 3.401197 0.000000 505 +next 0 34 3.401197 0.000000 517 +articl 0 33 3.433987 0.000000 530 +ad 0 32 3.465736 0.000000 544 +concept 0 32 3.465736 0.000000 537 +option 0 30 3.555348 0.000000 575 +becom 0 28 3.610918 0.000000 603 +progress 0 28 3.610918 0.000000 598 +held 0 28 3.610918 0.000000 600 +hope 0 28 3.610918 0.000000 610 +ask 0 28 3.610918 0.000000 597 +usual 0 28 3.610918 0.000000 608 +session 0 26 3.688879 0.000000 643 +relev 0 26 3.688879 0.000000 637 +comp 0 26 3.688879 0.000000 650 +todai 0 25 3.737670 0.000000 672 +lead 0 23 3.806662 0.000000 718 +dai 0 22 3.850148 0.000000 753 +almost 0 22 3.850148 0.000000 742 +newsgroup 0 21 3.912023 0.000000 783 +reserv 0 20 3.951244 0.000000 808 +item 0 19 4.007333 0.000000 856 +coupl 0 17 4.110874 0.000000 939 +whole 0 17 4.110874 0.000000 940 +sheet 0 16 4.174387 0.000000 973 +critic 0 16 4.174387 0.000000 982 +warn 0 14 4.317488 0.000000 1068 +someon 0 13 4.382027 0.000000 1128 +circuit 0 13 4.382027 0.000000 1131 +difficulti 0 13 4.382027 0.000000 1132 +prolog 0 13 4.382027 0.000000 1155 +menu 0 13 4.382027 0.000000 1156 +pascal 1 12 4.465908 4.465908 1213 +bruce 0 12 4.465908 0.000000 1226 +tune 0 12 4.465908 0.000000 1227 +uniqu 0 12 4.465908 0.000000 1228 +rememb 0 12 4.465908 0.000000 1217 +regard 0 11 4.553877 0.000000 1309 +summar 0 11 4.553877 0.000000 1295 +total 0 10 4.653960 0.000000 1398 +length 0 10 4.653960 0.000000 1400 +exact 0 9 4.753590 0.000000 1509 +prefer 0 9 4.753590 0.000000 1491 +bit 0 7 5.010635 0.000000 1833 +beyond 0 7 5.010635 0.000000 1834 +put 0 6 5.164786 0.000000 2017 +banerje 0 6 5.164786 0.000000 2018 +assignmentsprogram 0 6 5.164786 0.000000 2019 +porter 0 5 5.347108 0.000000 2293 +lang 0 5 5.347108 0.000000 2294 +door 0 5 5.347108 0.000000 2291 +rotat 0 5 5.347108 0.000000 2295 +scope 0 5 5.347108 0.000000 2296 +desk 0 5 5.347108 0.000000 2297 +caus 0 5 5.347108 0.000000 2298 +welch 0 4 5.568345 0.000000 2655 +coverag 0 4 5.568345 0.000000 2656 +glad 0 4 5.568345 0.000000 2657 +arora 0 4 5.568345 0.000000 2658 +somewhat 0 4 5.568345 0.000000 2659 +webpag 0 4 5.568345 0.000000 2660 +dwip 0 3 5.857933 0.000000 3197 +addendum 0 3 5.857933 0.000000 3150 +ansi 0 3 5.857933 0.000000 3198 +forthes 0 3 5.857933 0.000000 3199 +moreov 0 3 5.857933 0.000000 3200 +luck 0 3 5.857933 0.000000 3201 +boolean 0 3 5.857933 0.000000 3202 +experienc 0 3 5.857933 0.000000 3203 +painter 0 2 6.263398 0.000000 4187 +therewil 0 2 6.263398 0.000000 4080 +nimar 0 2 6.263398 0.000000 4188 +disregard 0 2 6.263398 0.000000 4189 +schedulec 0 2 6.263398 0.000000 4190 +newgroup 0 2 6.263398 0.000000 4191 +delphi 0 2 6.263398 0.000000 4192 +dell 0 2 6.263398 0.000000 4193 +turbo 0 1 6.957497 0.000000 6928 +andther 0 1 6.957497 0.000000 6929 +unabl 0 1 6.957497 0.000000 6930 +luckfor 0 1 6.957497 0.000000 6931 +dependon 0 1 6.957497 0.000000 6932 +availib 0 1 6.957497 0.000000 6933 +uptoth 0 1 6.957497 0.000000 6934 +resolutio 0 1 6.957497 0.000000 6935 +porterquest 0 1 6.957497 0.000000 6936 +thecont 0 1 6.957497 0.000000 6937 +atugl 0 1 6.957497 0.000000 6938 +sostai 0 1 6.957497 0.000000 6939 +iinstructorbruc 0 1 6.957497 0.000000 6940 +tasoffic 0 1 6.957497 0.000000 6941 +hourslab 0 1 6.957497 0.000000 6942 +descriptionclass 0 1 6.957497 0.000000 6943 +scheduleclass 0 1 6.957497 0.000000 6944 +articlesclass 0 1 6.957497 0.000000 6945 +newsgroupprogram 0 1 6.957497 0.000000 6946 +pascaltutori 0 1 6.957497 0.000000 6947 +faqyou 0 1 6.957497 0.000000 6948 +zipe 0 1 6.957497 0.000000 6949 +isocomp 0 1 6.957497 0.000000 6950 +maccomp 0 1 6.957497 0.000000 6951 +borlandcomp 0 1 6.957497 0.000000 6952 +misccomp 0 1 6.957497 0.000000 6953 +miscfj 0 1 6.957497 0.000000 6954 +serverto 0 1 6.957497 0.000000 6955 +importantstuff 0 1 6.957497 0.000000 6956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..93dde04a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +assign 1 135 1.945910 1.945910 66 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +machin 0 129 2.079442 0.000000 95 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +specif 0 106 2.197225 0.000000 106 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +librari 0 87 2.484907 0.000000 181 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +exam 0 86 2.484907 0.000000 169 +contain 0 81 2.484907 0.000000 174 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +syllabu 0 67 2.708050 0.000000 247 +descript 0 64 2.772589 0.000000 271 +copi 0 63 2.772589 0.000000 284 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +directori 0 45 3.135494 0.000000 396 +show 0 43 3.178054 0.000000 417 +submit 0 39 3.258097 0.000000 440 +workstat 0 37 3.332205 0.000000 479 +manual 0 35 3.401197 0.000000 504 +option 0 30 3.555348 0.000000 575 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +utc 0 27 3.637586 0.000000 629 +wish 0 24 3.761200 0.000000 692 +higher 0 24 3.761200 0.000000 690 +instal 0 22 3.850148 0.000000 754 +score 0 15 4.248495 0.000000 1017 +donald 0 9 4.753590 0.000000 1510 +curv 0 8 4.875197 0.000000 1656 +driver 0 8 4.875197 0.000000 1657 +sciencesdepart 0 6 5.164786 0.000000 2020 +slate 0 6 5.164786 0.000000 2021 +opengl 0 5 5.347108 0.000000 2299 +fussel 0 5 5.347108 0.000000 2300 +ousterhout 0 5 5.347108 0.000000 2301 +hasbeen 0 4 5.568345 0.000000 2661 +makefil 0 4 5.568345 0.000000 2662 +welch 0 4 5.568345 0.000000 2655 +xlib 0 3 5.857933 0.000000 3204 +mesa 0 2 6.263398 0.000000 4194 +cscomput 0 2 6.263398 0.000000 4195 +anopengl 0 2 6.263398 0.000000 4196 +billthecat 0 2 6.263398 0.000000 4197 +graphicsspr 0 1 6.957497 0.000000 6957 +oneor 0 1 6.957497 0.000000 6958 +examwil 0 1 6.957497 0.000000 6959 +bothmai 0 1 6.957497 0.000000 6960 +willcount 0 1 6.957497 0.000000 6961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..5c98f719 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +assign 1 135 1.945910 1.945910 66 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +specif 0 106 2.197225 0.000000 106 +book 0 99 2.302585 0.000000 131 +graphic 1 90 2.397895 2.397895 147 +center 0 88 2.397895 0.000000 158 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +exampl 1 77 2.564949 2.564949 195 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +syllabu 0 67 2.708050 0.000000 247 +descript 0 64 2.772589 0.000000 271 +copi 0 63 2.772589 0.000000 284 +new 0 64 2.772589 0.000000 262 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +directori 0 45 3.135494 0.000000 396 +workstat 0 37 3.332205 0.000000 479 +manual 0 35 3.401197 0.000000 504 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +utc 0 27 3.637586 0.000000 629 +wish 0 24 3.761200 0.000000 692 +instal 0 22 3.850148 0.000000 754 +demo 0 18 4.060443 0.000000 888 +donald 0 9 4.753590 0.000000 1510 +driver 0 8 4.875197 0.000000 1657 +sciencesdepart 0 6 5.164786 0.000000 2020 +slate 0 6 5.164786 0.000000 2021 +opengl 0 5 5.347108 0.000000 2299 +fussel 0 5 5.347108 0.000000 2300 +ousterhout 0 5 5.347108 0.000000 2301 +welch 0 4 5.568345 0.000000 2655 +hasbeen 0 4 5.568345 0.000000 2661 +makefil 0 4 5.568345 0.000000 2662 +turnin 0 4 5.568345 0.000000 2654 +walker 0 3 5.857933 0.000000 3161 +xlib 0 3 5.857933 0.000000 3204 +mesa 1 2 6.263398 6.263398 4194 +anopengl 0 2 6.263398 0.000000 4196 +billthecat 0 2 6.263398 0.000000 4197 +repair 0 2 6.263398 0.000000 4198 +reinstal 0 1 6.957497 0.000000 6962 +gcomput 0 1 6.957497 0.000000 6963 +graphicsfal 0 1 6.957497 0.000000 6964 +libtcl 0 1 6.957497 0.000000 6965 +libtk 0 1 6.957497 0.000000 6966 +tclsh 0 1 6.957497 0.000000 6967 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..bdd16038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +mathemat 0 108 2.197225 0.000000 123 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +chang 0 82 2.484907 0.000000 163 +homework 0 79 2.564949 0.000000 193 +appear 0 78 2.564949 0.000000 210 +david 0 71 2.639057 0.000000 232 +syllabu 0 67 2.708050 0.000000 247 +taylor 0 63 2.772589 0.000000 287 +septemb 0 65 2.772589 0.000000 274 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +howev 0 41 3.218876 0.000000 422 +word 0 34 3.401197 0.000000 508 +abl 0 30 3.555348 0.000000 566 +common 0 30 3.555348 0.000000 574 +background 0 25 3.737670 0.000000 664 +notic 0 25 3.737670 0.000000 675 +letter 0 16 4.174387 0.000000 981 +english 0 15 4.248495 0.000000 1033 +station 0 13 4.382027 0.000000 1157 +canada 0 13 4.382027 0.000000 1158 +french 1 9 4.753590 4.753590 1511 +cryptographi 0 9 4.753590 0.000000 1512 +recogn 0 5 5.347108 0.000000 2302 +basement 0 4 5.568345 0.000000 2663 +zuckerman 0 3 5.857933 0.000000 3205 +frequenc 0 3 5.857933 0.000000 3206 +hqliu 0 2 6.263398 0.000000 4199 +huiqun 0 2 6.263398 0.000000 4200 +drastic 0 2 6.263398 0.000000 4201 +ciphertext 0 1 6.957497 0.000000 6968 +digram 0 1 6.957497 0.000000 6969 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..8dea727f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,315 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +class 1 199 1.609438 1.609438 37 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +problem 1 147 1.945910 1.945910 75 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +send 0 114 2.197225 0.000000 109 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +take 0 97 2.302585 0.000000 134 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +grade 0 90 2.397895 0.000000 142 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +question 0 91 2.397895 0.000000 141 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +exam 0 86 2.484907 0.000000 169 +second 0 81 2.484907 0.000000 166 +build 0 85 2.484907 0.000000 184 +wide 0 84 2.484907 0.000000 185 +academ 0 82 2.484907 0.000000 178 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +mondai 0 77 2.564949 0.000000 206 +state 0 76 2.564949 0.000000 207 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +come 0 78 2.564949 0.000000 202 +solv 0 73 2.639057 0.000000 234 +tuesdai 0 73 2.639057 0.000000 219 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +logic 0 71 2.639057 0.000000 230 +servic 0 72 2.639057 0.000000 236 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +improv 0 62 2.772589 0.000000 289 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +locat 0 59 2.833213 0.000000 303 +content 0 59 2.833213 0.000000 302 +share 0 59 2.833213 0.000000 304 +point 0 58 2.890372 0.000000 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +suggest 0 53 2.944439 0.000000 331 +cover 0 55 2.944439 0.000000 329 +allow 0 53 2.944439 0.000000 333 +instruct 0 53 2.944439 0.000000 332 +three 0 54 2.944439 0.000000 330 +case 0 51 2.995732 0.000000 351 +week 0 52 2.995732 0.000000 343 +particular 0 51 2.995732 0.000000 352 +give 0 50 3.044522 0.000000 359 +set 0 50 3.044522 0.000000 361 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +electron 0 47 3.091042 0.000000 379 +protocol 0 45 3.135494 0.000000 407 +textbook 0 44 3.135494 0.000000 397 +discuss 0 45 3.135494 0.000000 399 +midterm 0 45 3.135494 0.000000 392 +better 0 45 3.135494 0.000000 401 +describ 0 45 3.135494 0.000000 400 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +howev 0 41 3.218876 0.000000 422 +author 0 39 3.258097 0.000000 450 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +expect 0 37 3.332205 0.000000 484 +cost 0 37 3.332205 0.000000 480 +feel 0 37 3.332205 0.000000 483 +ofth 0 36 3.367296 0.000000 491 +global 0 34 3.401197 0.000000 520 +survei 0 35 3.401197 0.000000 513 +singl 0 34 3.401197 0.000000 510 +collabor 0 32 3.465736 0.000000 543 +given 0 32 3.465736 0.000000 538 +idea 0 32 3.465736 0.000000 545 +someth 0 31 3.496508 0.000000 554 +often 0 31 3.496508 0.000000 551 +secur 0 30 3.555348 0.000000 577 +robert 0 30 3.555348 0.000000 567 +option 0 30 3.555348 0.000000 575 +produc 0 30 3.555348 0.000000 572 +depend 0 29 3.583519 0.000000 583 +synchron 0 29 3.583519 0.000000 588 +consid 0 29 3.583519 0.000000 590 +ask 0 28 3.610918 0.000000 597 +team 0 27 3.637586 0.000000 625 +detect 0 26 3.688879 0.000000 646 +consist 0 26 3.688879 0.000000 651 +effort 0 26 3.688879 0.000000 652 +primari 0 25 3.737670 0.000000 669 +demonstr 0 24 3.761200 0.000000 694 +proof 0 23 3.806662 0.000000 720 +size 0 23 3.806662 0.000000 713 +properti 0 22 3.850148 0.000000 749 +newsgroup 0 21 3.912023 0.000000 783 +kernel 0 20 3.951244 0.000000 825 +prepar 0 20 3.951244 0.000000 824 +prove 0 19 4.007333 0.000000 848 +assum 0 19 4.007333 0.000000 845 +log 0 19 4.007333 0.000000 857 +encourag 0 18 4.060443 0.000000 880 +appropri 0 18 4.060443 0.000000 883 +moor 0 17 4.110874 0.000000 936 +whether 0 17 4.110874 0.000000 918 +previous 0 17 4.110874 0.000000 923 +otherwis 0 17 4.110874 0.000000 922 +monitor 0 17 4.110874 0.000000 941 +weslei 0 16 4.174387 0.000000 983 +vector 0 16 4.174387 0.000000 961 +precis 0 15 4.248495 0.000000 1023 +deriv 0 13 4.382027 0.000000 1145 +weak 0 13 4.382027 0.000000 1159 +cannot 0 13 4.382027 0.000000 1144 +central 0 13 4.382027 0.000000 1160 +asynchron 0 12 4.465908 0.000000 1229 +addison 0 12 4.465908 0.000000 1230 +replic 0 12 4.465908 0.000000 1231 +skill 0 12 4.465908 0.000000 1205 +onth 0 12 4.465908 0.000000 1218 +clock 0 11 4.553877 0.000000 1320 +imposs 0 9 4.753590 0.000000 1513 +clear 0 9 4.753590 0.000000 1488 +assumpt 0 9 4.753590 0.000000 1514 +matter 0 8 4.875197 0.000000 1627 +told 0 8 4.875197 0.000000 1658 +ideal 0 8 4.875197 0.000000 1630 +predic 0 7 5.010635 0.000000 1806 +encrypt 0 7 5.010635 0.000000 1835 +henc 0 7 5.010635 0.000000 1805 +guidelin 0 7 5.010635 0.000000 1832 +channel 0 7 5.010635 0.000000 1836 +distributedsystem 0 6 5.164786 0.000000 2022 +arrang 0 6 5.164786 0.000000 2023 +tobe 0 6 5.164786 0.000000 1995 +causal 0 6 5.164786 0.000000 2024 +wrong 0 6 5.164786 0.000000 2025 +constitut 0 6 5.164786 0.000000 2026 +snapshot 0 5 5.347108 0.000000 2303 +colleagu 0 5 5.347108 0.000000 2304 +multicast 0 5 5.347108 0.000000 2305 +authent 0 5 5.347108 0.000000 2306 +volunt 0 5 5.347108 0.000000 2307 +explicitli 0 5 5.347108 0.000000 2308 +stabl 0 5 5.347108 0.000000 2309 +exchang 0 5 5.347108 0.000000 2310 +lorenzo 0 4 5.568345 0.000000 2588 +deadlock 0 4 5.568345 0.000000 2641 +cut 0 4 5.568345 0.000000 2620 +disconnect 0 4 5.568345 0.000000 2664 +subsequ 0 4 5.568345 0.000000 2665 +accompani 0 4 5.568345 0.000000 2666 +unless 0 4 5.568345 0.000000 2607 +rajeev 0 3 5.857933 0.000000 3152 +agreement 0 3 5.857933 0.000000 3207 +pertain 0 3 5.857933 0.000000 3208 +alvisi 0 3 5.857933 0.000000 3095 +commerc 0 3 5.857933 0.000000 3209 +credibl 0 3 5.857933 0.000000 3210 +violat 0 3 5.857933 0.000000 3211 +urg 0 3 5.857933 0.000000 3212 +agener 0 3 5.857933 0.000000 3213 +conceptu 0 3 5.857933 0.000000 3214 +joshi 0 2 6.263398 0.000000 4202 +byzantin 0 2 6.263398 0.000000 4203 +requiredtextbook 0 2 6.263398 0.000000 4204 +checkpoint 0 2 6.263398 0.000000 4205 +replica 0 2 6.263398 0.000000 4206 +towrit 0 2 6.263398 0.000000 4207 +algorithmi 0 2 6.263398 0.000000 4208 +moreeffici 0 2 6.263398 0.000000 4209 +simpler 0 2 6.263398 0.000000 4210 +fifo 0 1 6.957497 0.000000 6970 +ispr 0 1 6.957497 0.000000 6971 +alvisiteach 0 1 6.957497 0.000000 6972 +joshicont 0 1 6.957497 0.000000 6973 +stafflorenzo 0 1 6.957497 0.000000 6974 +mechanicsi 0 1 6.957497 0.000000 6975 +remaind 0 1 6.957497 0.000000 6976 +classat 0 1 6.957497 0.000000 6977 +isutexa 0 1 6.957497 0.000000 6978 +mullend 0 1 6.957497 0.000000 6979 +acmpress 0 1 6.957497 0.000000 6980 +contentc 0 1 6.957497 0.000000 6981 +tomorrow 0 1 6.957497 0.000000 6982 +messagedeliveri 0 1 6.957497 0.000000 6983 +backupapproach 0 1 6.957497 0.000000 6984 +thepresent 0 1 6.957497 0.000000 6985 +exemplifi 0 1 6.957497 0.000000 6986 +principleshav 0 1 6.957497 0.000000 6987 +meor 0 1 6.957497 0.000000 6988 +apresent 0 1 6.957497 0.000000 6989 +networksgradingther 0 1 6.957497 0.000000 6990 +begrad 0 1 6.957497 0.000000 6991 +onbehalf 0 1 6.957497 0.000000 6992 +willrec 0 1 6.957497 0.000000 6993 +ispermit 0 1 6.957497 0.000000 6994 +acollabor 0 1 6.957497 0.000000 6995 +forgrad 0 1 6.957497 0.000000 6996 +collaborationswil 0 1 6.957497 0.000000 6997 +nocollabor 0 1 6.957497 0.000000 6998 +issuesthat 0 1 6.957497 0.000000 6999 +bedistribut 0 1 6.957497 0.000000 7000 +tocomplet 0 1 6.957497 0.000000 7001 +twolectur 0 1 6.957497 0.000000 7002 +choosethi 0 1 6.957497 0.000000 7003 +asingl 0 1 6.957497 0.000000 7004 +warmli 0 1 6.957497 0.000000 7005 +toconsid 0 1 6.957497 0.000000 7006 +excellentopportun 0 1 6.957497 0.000000 7007 +setsin 0 1 6.957497 0.000000 7008 +shouldconform 0 1 6.957497 0.000000 7009 +synonym 0 1 6.957497 0.000000 7010 +isrequir 0 1 6.957497 0.000000 7011 +thatmak 0 1 6.957497 0.000000 7012 +insuffici 0 1 6.957497 0.000000 7013 +ofcorrect 0 1 6.957497 0.000000 7014 +thetextbook 0 1 6.957497 0.000000 7015 +asnapshot 0 1 6.957497 0.000000 7016 +theprotocol 0 1 6.957497 0.000000 7017 +atmost 0 1 6.957497 0.000000 7018 +mattern 0 1 6.957497 0.000000 7019 +thatcontain 0 1 6.957497 0.000000 7020 +resist 0 1 6.957497 0.000000 7021 +thetempt 0 1 6.957497 0.000000 7022 +monitorprocess 0 1 6.957497 0.000000 7023 +basedsnapshot 0 1 6.957497 0.000000 7024 +nowonlin 0 1 6.957497 0.000000 7025 +filedescrib 0 1 6.957497 0.000000 7026 +examth 0 1 6.957497 0.000000 7027 +fridaymai 0 1 6.957497 0.000000 7028 +thepostscript 0 1 6.957497 0.000000 7029 +freeto 0 1 6.957497 0.000000 7030 +yoursuggest 0 1 6.957497 0.000000 7031 +edurajeev 0 1 6.957497 0.000000 7032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..d5d28d51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +austin 0 168 1.791759 0.000000 63 +note 0 142 1.945910 0.000000 67 +welcom 0 122 2.079442 0.000000 99 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +version 0 113 2.197225 0.000000 122 +look 0 107 2.197225 0.000000 115 +find 0 111 2.197225 0.000000 111 +technic 0 100 2.302585 0.000000 140 +homepag 0 93 2.397895 0.000000 148 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +homework 0 79 2.564949 0.000000 193 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +differ 0 66 2.708050 0.000000 253 +practic 0 70 2.708050 0.000000 246 +syllabu 0 67 2.708050 0.000000 247 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +electron 0 47 3.091042 0.000000 379 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +taught 0 33 3.433987 0.000000 526 +chapter 0 32 3.465736 0.000000 536 +turn 0 29 3.583519 0.000000 586 +adam 0 17 4.110874 0.000000 934 +reflect 0 15 4.248495 0.000000 1034 +overhead 0 15 4.248495 0.000000 1035 +correspond 0 10 4.653960 0.000000 1382 +informationabout 0 9 4.753590 0.000000 1515 +scope 0 5 5.347108 0.000000 2296 +jacob 0 4 5.568345 0.000000 2667 +kornerup 0 3 5.857933 0.000000 3215 +bywil 0 1 6.957497 0.000000 7033 +linea 0 1 6.957497 0.000000 7034 +inhomework 0 1 6.957497 0.000000 7035 +crude 0 1 6.957497 0.000000 7036 +newsgrouputexa 0 1 6.957497 0.000000 7037 +takesplac 0 1 6.957497 0.000000 7038 +pascalprogramm 0 1 6.957497 0.000000 7039 +viewinginform 0 1 6.957497 0.000000 7040 +projecthow 0 1 6.957497 0.000000 7041 +examand 0 1 6.957497 0.000000 7042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..53f0c91f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +hall 0 146 1.945910 0.000000 65 +postscript 0 131 2.079442 0.000000 90 +compil 0 122 2.079442 0.000000 96 +introduct 0 126 2.079442 0.000000 87 +document 0 121 2.079442 0.000000 89 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +text 0 98 2.302585 0.000000 133 +advanc 0 99 2.302585 0.000000 130 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +librari 0 87 2.484907 0.000000 181 +solut 0 82 2.484907 0.000000 162 +internet 0 83 2.484907 0.000000 186 +institut 0 84 2.484907 0.000000 187 +info 0 85 2.484907 0.000000 176 +orient 0 80 2.564949 0.000000 205 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +mondai 0 77 2.564949 0.000000 206 +master 0 76 2.564949 0.000000 216 +refer 0 78 2.564949 0.000000 203 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +write 0 72 2.639057 0.000000 222 +materi 0 75 2.639057 0.000000 221 +summari 0 73 2.639057 0.000000 237 +java 1 70 2.708050 2.708050 248 +wednesdai 0 64 2.772589 0.000000 261 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +think 0 57 2.890372 0.000000 314 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +standard 0 48 3.044522 0.000000 365 +archiv 0 49 3.044522 0.000000 364 +give 0 50 3.044522 0.000000 359 +discuss 0 45 3.135494 0.000000 399 +offer 0 43 3.178054 0.000000 414 +edit 0 42 3.218876 0.000000 418 +programm 0 39 3.258097 0.000000 445 +open 0 38 3.295837 0.000000 469 +manual 0 35 3.401197 0.000000 504 +tech 0 35 3.401197 0.000000 515 +eduoffic 0 33 3.433987 0.000000 531 +john 0 33 3.433987 0.000000 532 +product 0 33 3.433987 0.000000 527 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +intend 0 28 3.610918 0.000000 599 +comp 0 26 3.688879 0.000000 650 +challeng 0 26 3.688879 0.000000 653 +greg 0 24 3.761200 0.000000 695 +pattern 0 24 3.761200 0.000000 689 +fellow 0 24 3.761200 0.000000 701 +lab 0 24 3.761200 0.000000 698 +newsgroup 0 21 3.912023 0.000000 783 +annot 0 21 3.912023 0.000000 775 +applet 0 20 3.951244 0.000000 827 +element 0 18 4.060443 0.000000 895 +encourag 0 18 4.060443 0.000000 880 +event 0 18 4.060443 0.000000 896 +weslei 0 16 4.174387 0.000000 983 +alreadi 0 16 4.174387 0.000000 963 +style 0 15 4.248495 0.000000 1036 +pagec 0 15 4.248495 0.000000 1011 +rate 0 15 4.248495 0.000000 1037 +draft 0 14 4.317488 0.000000 1085 +manner 0 14 4.317488 0.000000 1074 +opportun 0 13 4.382027 0.000000 1161 +station 0 13 4.382027 0.000000 1157 +johnson 0 13 4.382027 0.000000 1162 +misc 0 13 4.382027 0.000000 1124 +addison 0 12 4.465908 0.000000 1230 +captur 0 12 4.465908 0.000000 1232 +evolut 0 11 4.553877 0.000000 1314 +denni 0 11 4.553877 0.000000 1321 +strongli 0 10 4.653960 0.000000 1406 +classmat 0 9 4.753590 0.000000 1516 +doug 0 9 4.753590 0.000000 1517 +virginia 0 8 4.875197 0.000000 1659 +irvin 0 8 4.875197 0.000000 1660 +bookstor 0 7 5.010635 0.000000 1837 +prentic 0 7 5.010635 0.000000 1838 +usenet 0 7 5.010635 0.000000 1839 +dead 0 7 5.010635 0.000000 1840 +forum 0 6 5.164786 0.000000 2027 +mirror 0 6 5.164786 0.000000 2028 +huge 0 6 5.164786 0.000000 1991 +lang 0 5 5.347108 0.000000 2294 +templat 0 5 5.347108 0.000000 2311 +appt 0 5 5.347108 0.000000 2312 +gokul 0 4 5.568345 0.000000 2668 +polymorph 0 4 5.568345 0.000000 2627 +wilei 0 4 5.568345 0.000000 2669 +faq 0 3 5.857933 0.000000 3216 +lavend 0 3 5.857933 0.000000 3217 +cline 0 3 5.857933 0.000000 3218 +gamma 0 3 5.857933 0.000000 3219 +hotjava 0 3 5.857933 0.000000 3220 +javascript 0 3 5.857933 0.000000 3221 +ansi 0 3 5.857933 0.000000 3198 +polytechn 0 3 5.857933 0.000000 3222 +jar 0 3 5.857933 0.000000 3223 +setup 0 2 6.263398 0.000000 4211 +infocours 0 2 6.263398 0.000000 4212 +noteshomework 0 2 6.263398 0.000000 4102 +profici 0 2 6.263398 0.000000 4103 +coop 0 2 6.263398 0.000000 4213 +materiali 0 2 6.263398 0.000000 4214 +drawn 0 2 6.263398 0.000000 4215 +elli 0 2 6.263398 0.000000 4216 +helm 0 2 6.263398 0.000000 4217 +reusabl 0 2 6.263398 0.000000 4218 +renssela 0 2 6.263398 0.000000 4148 +sourcesth 0 2 6.263398 0.000000 4219 +javasoft 0 2 6.263398 0.000000 4220 +gamelan 0 2 6.263398 0.000000 4221 +centr 0 2 6.263398 0.000000 4222 +stroustrup 0 1 6.957497 0.000000 7043 +libg 0 1 6.957497 0.000000 7044 +descriptionc 0 1 6.957497 0.000000 7045 +programminglast 0 1 6.957497 0.000000 7046 +rajaram 0 1 6.957497 0.000000 7047 +lavendercours 0 1 6.957497 0.000000 7048 +syllabusannouncementslectur 0 1 6.957497 0.000000 7049 +solutionsprogram 0 1 6.957497 0.000000 7050 +assignmentsgnu 0 1 6.957497 0.000000 7051 +manualsstandard 0 1 6.957497 0.000000 7052 +codesocket 0 1 6.957497 0.000000 7053 +manualdescript 0 1 6.957497 0.000000 7054 +anintroductori 0 1 6.957497 0.000000 7055 +reusablepattern 0 1 6.957497 0.000000 7056 +typehierarchi 0 1 6.957497 0.000000 7057 +professionallyus 0 1 6.957497 0.000000 7058 +horstmann 0 1 6.957497 0.000000 7059 +cargil 0 1 6.957497 0.000000 7060 +lomow 0 1 6.957497 0.000000 7061 +coplien 0 1 6.957497 0.000000 7062 +idiom 0 1 6.957497 0.000000 7063 +plauger 0 1 6.957497 0.000000 7064 +vlissid 0 1 6.957497 0.000000 7065 +announcementsabout 0 1 6.957497 0.000000 7066 +linediscuss 0 1 6.957497 0.000000 7067 +lavendery 0 1 6.957497 0.000000 7068 +helpjava 0 1 6.957497 0.000000 7069 +advocaci 0 1 6.957497 0.000000 7070 +oopth 0 1 6.957497 0.000000 7071 +objectspac 0 1 6.957497 0.000000 7072 +libstdc 0 1 6.957497 0.000000 7073 +mitgnu 0 1 6.957497 0.000000 7074 +cygnusgnu 0 1 6.957497 0.000000 7075 +ftpobject 0 1 6.957497 0.000000 7076 +developmentindex 0 1 6.957497 0.000000 7077 +librariesth 0 1 6.957497 0.000000 7078 +libraryindex 0 1 6.957497 0.000000 7079 +talig 0 1 6.957497 0.000000 7080 +frameworkjava 0 1 6.957497 0.000000 7081 +registri 0 1 6.957497 0.000000 7082 +espresso 0 1 6.957497 0.000000 7083 +kafura 0 1 6.957497 0.000000 7084 +techdoug 0 1 6.957497 0.000000 7085 +schmidt 0 1 6.957497 0.000000 7086 +universitydoug 0 1 6.957497 0.000000 7087 +sunyintroductori 0 1 6.957497 0.000000 7088 +groningen 0 1 6.957497 0.000000 7089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..e2e53aa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +world 0 115 2.197225 0.000000 126 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +decemb 0 80 2.564949 0.000000 215 +tuesdai 0 73 2.639057 0.000000 219 +onlin 0 75 2.639057 0.000000 223 +thursdai 0 70 2.708050 0.000000 241 +practic 0 70 2.708050 0.000000 246 +taylor 0 63 2.772589 0.000000 287 +handout 0 64 2.772589 0.000000 263 +foundat 0 62 2.772589 0.000000 286 +share 0 59 2.833213 0.000000 304 +case 0 51 2.995732 0.000000 351 +adapt 0 46 3.091042 0.000000 387 +tutori 0 39 3.258097 0.000000 437 +copyright 0 36 3.367296 0.000000 495 +manual 0 35 3.401197 0.000000 504 +pass 0 28 3.610918 0.000000 611 +spent 0 25 3.737670 0.000000 676 +thread 0 23 3.806662 0.000000 722 +partit 0 16 4.174387 0.000000 984 +hello 0 10 4.653960 0.000000 1407 +calvin 0 9 4.753590 0.000000 1518 +compilersfal 0 2 6.263398 0.000000 4223 +tera 0 2 6.263398 0.000000 4224 +skeleton 0 2 6.263398 0.000000 4225 +ironman 0 2 6.263398 0.000000 4226 +logp 0 2 6.263398 0.000000 4227 +grid 0 2 6.263398 0.000000 4228 +compilerscst 0 1 6.957497 0.000000 7090 +posix 0 1 6.957497 0.000000 7091 +hierarchieslast 0 1 6.957497 0.000000 7092 +linlin 0 1 6.957497 0.000000 7093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..6382eb39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +mail 1 238 1.386294 1.386294 22 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +send 1 114 2.197225 2.197225 109 +handout 0 64 2.772589 0.000000 263 +prof 0 64 2.772589 0.000000 273 +newsgroup 0 21 3.912023 0.000000 783 +csintroduct 0 1 6.957497 0.000000 7094 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..9f75234a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +topic 0 114 2.197225 0.000000 110 +thot 0 1 6.957497 0.000000 7095 +systemsfil 0 1 6.957497 0.000000 7096 +systemstopolog 0 1 6.957497 0.000000 7097 +systemselectron 0 1 6.957497 0.000000 7098 +commenrcefailur 0 1 6.957497 0.000000 7099 +detectorsdistribut 0 1 6.957497 0.000000 7100 +objectsconsistencysecuregroup 0 1 6.957497 0.000000 7101 +communicationlanguag 0 1 6.957497 0.000000 7102 +dsmmobil 0 1 6.957497 0.000000 7103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..4cc65402 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +list 0 201 1.609438 0.000000 39 +databas 0 122 2.079442 0.000000 86 +materi 0 75 2.639057 0.000000 221 +term 0 43 3.178054 0.000000 411 +mine 1 26 3.688879 3.688879 654 +monitor 1 17 4.110874 4.110874 941 +daniel 0 12 4.465908 0.000000 1233 +databasesprof 0 1 6.957497 0.000000 7104 +mirankernew 0 1 6.957497 0.000000 7105 +seminarschedul 0 1 6.957497 0.000000 7106 +overviewtentativeread 0 1 6.957497 0.000000 7107 +homeworkproject 0 1 6.957497 0.000000 7108 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..0b9b9ff9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +intellig 0 72 2.639057 0.000000 225 +syllabu 0 67 2.708050 0.000000 247 +artifici 0 63 2.772589 0.000000 280 +taylor 0 63 2.772589 0.000000 287 +trace 0 25 3.737670 0.000000 677 +tuth 0 9 4.753590 0.000000 1519 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +mooneytim 0 2 6.263398 0.000000 4229 +sheetand 0 2 6.263398 0.000000 4230 +placespr 0 1 6.957497 0.000000 7109 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..e1e8a53c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +info 0 85 2.484907 0.000000 176 +homework 1 79 2.564949 2.564949 193 +intellig 0 72 2.639057 0.000000 225 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +artifici 0 63 2.772589 0.000000 280 +case 0 51 2.995732 0.000000 351 +common 0 30 3.555348 0.000000 574 +symbol 0 27 3.637586 0.000000 620 +trace 0 25 3.737670 0.000000 677 +lisp 0 18 4.060443 0.000000 897 +sheet 0 16 4.174387 0.000000 973 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +allegro 0 5 5.347108 0.000000 2314 +sowmya 0 4 5.568345 0.000000 2670 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +placetu 0 2 6.263398 0.000000 4231 +informationon 0 2 6.263398 0.000000 4232 +mooneyteach 0 1 6.957497 0.000000 7110 +ramachandrantim 0 1 6.957497 0.000000 7111 +alsout 0 1 6.957497 0.000000 7112 +textparadigm 0 1 6.957497 0.000000 7113 +lispassignmentsse 0 1 6.957497 0.000000 7114 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..f481a00a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +learn 1 86 2.484907 2.484907 170 +homework 0 79 2.564949 0.000000 193 +logic 0 71 2.639057 0.000000 230 +syllabu 0 67 2.708050 0.000000 247 +order 0 69 2.708050 0.000000 249 +evalu 0 64 2.772589 0.000000 266 +suggest 0 53 2.944439 0.000000 331 +talk 0 53 2.944439 0.000000 336 +format 0 48 3.044522 0.000000 356 +slide 0 38 3.295837 0.000000 467 +tree 0 36 3.367296 0.000000 492 +concept 0 32 3.465736 0.000000 537 +neural 0 30 3.555348 0.000000 578 +cluster 0 28 3.610918 0.000000 612 +experiment 0 26 3.688879 0.000000 645 +rule 0 26 3.688879 0.000000 638 +trace 0 25 3.737670 0.000000 677 +decis 0 23 3.806662 0.000000 728 +outlin 0 17 4.110874 0.000000 914 +explan 0 16 4.174387 0.000000 985 +induct 0 11 4.553877 0.000000 1304 +instanc 0 11 4.553877 0.000000 1322 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +bayesian 0 4 5.568345 0.000000 2671 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +mooneytim 0 2 6.263398 0.000000 4229 +placetu 0 2 6.263398 0.000000 4231 +sheetand 0 2 6.263398 0.000000 4230 +unsupervis 0 2 6.263398 0.000000 4233 +textmachinelearninglectur 0 1 6.957497 0.000000 7115 +learningassignmentsse 0 1 6.957497 0.000000 7116 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..b2ef4245 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +list 0 201 1.609438 0.000000 39 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +exam 0 86 2.484907 0.000000 169 +school 0 84 2.484907 0.000000 188 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +syllabu 0 67 2.708050 0.000000 247 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +semest 0 58 2.890372 0.000000 312 +run 0 51 2.995732 0.000000 347 +basic 0 50 3.044522 0.000000 360 +move 0 47 3.091042 0.000000 382 +math 0 44 3.135494 0.000000 402 +directori 0 45 3.135494 0.000000 396 +next 0 34 3.401197 0.000000 517 +least 0 35 3.401197 0.000000 516 +concept 0 32 3.465736 0.000000 537 +express 0 32 3.465736 0.000000 540 +taken 0 31 3.496508 0.000000 555 +hard 0 30 3.555348 0.000000 563 +intend 0 28 3.610918 0.000000 599 +symbol 0 27 3.637586 0.000000 620 +never 0 25 3.737670 0.000000 671 +background 0 25 3.737670 0.000000 664 +instead 0 22 3.850148 0.000000 756 +scheme 0 20 3.951244 0.000000 818 +thur 0 19 4.007333 0.000000 847 +lisp 0 18 4.060443 0.000000 897 +hopefulli 0 14 4.317488 0.000000 1071 +draw 0 14 4.317488 0.000000 1086 +faster 0 11 4.553877 0.000000 1323 +surf 0 11 4.553877 0.000000 1301 +submiss 0 11 4.553877 0.000000 1298 +novak 0 9 4.753590 0.000000 1521 +strong 0 6 5.164786 0.000000 2029 +syntax 0 6 5.164786 0.000000 2030 +snow 0 6 5.164786 0.000000 2031 +gordon 0 6 5.164786 0.000000 2032 +assignmentsprogram 0 6 5.164786 0.000000 2019 +willb 0 5 5.347108 0.000000 2277 +porter 0 5 5.347108 0.000000 2293 +emphas 0 4 5.568345 0.000000 2672 +dialect 0 3 5.857933 0.000000 3226 +gambit 0 3 5.857933 0.000000 3227 +macintoshcomput 0 3 5.857933 0.000000 3228 +treasur 0 3 5.857933 0.000000 3229 +gradingmidterm 0 3 5.857933 0.000000 3230 +guidefin 0 3 5.857933 0.000000 3231 +peano 0 2 6.263398 0.000000 4234 +turtl 0 2 6.263398 0.000000 4235 +plot 0 2 6.263398 0.000000 4236 +sciencec 0 1 6.957497 0.000000 7117 +treesassign 0 1 6.957497 0.000000 7118 +atleast 0 1 6.957497 0.000000 7119 +programmingcours 0 1 6.957497 0.000000 7120 +precalculu 0 1 6.957497 0.000000 7121 +theschem 0 1 6.957497 0.000000 7122 +coursesand 0 1 6.957497 0.000000 7123 +learninga 0 1 6.957497 0.000000 7124 +tutorcopi 0 1 6.957497 0.000000 7125 +pcassign 0 1 6.957497 0.000000 7126 +simulationassign 0 1 6.957497 0.000000 7127 +webassign 0 1 6.957497 0.000000 7128 +schemeassign 0 1 6.957497 0.000000 7129 +gamblingassign 0 1 6.957497 0.000000 7130 +graphicsassign 0 1 6.957497 0.000000 7131 +manipulationstudi 0 1 6.957497 0.000000 7132 +vocabulari 0 1 6.957497 0.000000 7133 +thickensassign 0 1 6.957497 0.000000 7134 +huntassign 0 1 6.957497 0.000000 7135 +algebraassign 0 1 6.957497 0.000000 7136 +matricesstudi 0 1 6.957497 0.000000 7137 +unparsingassign 0 1 6.957497 0.000000 7138 +translationstudi 0 1 6.957497 0.000000 7139 +descriptionsprogram 0 1 6.957497 0.000000 7140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..d0d414a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +take 0 97 2.302585 0.000000 134 +exam 0 86 2.484907 0.000000 169 +server 0 76 2.564949 0.000000 204 +write 0 72 2.639057 0.000000 222 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +summer 0 56 2.890372 0.000000 311 +cover 0 55 2.944439 0.000000 329 +processor 0 54 2.944439 0.000000 335 +week 0 52 2.995732 0.000000 343 +directori 0 45 3.135494 0.000000 396 +live 0 40 3.258097 0.000000 451 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +programminglanguag 0 21 3.912023 0.000000 782 +chip 0 21 3.912023 0.000000 770 +five 0 19 4.007333 0.000000 841 +incorpor 0 13 4.382027 0.000000 1163 +pascal 0 12 4.465908 0.000000 1213 +workload 0 12 4.465908 0.000000 1210 +submiss 0 11 4.553877 0.000000 1298 +novak 0 9 4.753590 0.000000 1521 +heavi 0 7 5.010635 0.000000 1841 +assignmentsprogram 0 6 5.164786 0.000000 2019 +gradingmidterm 0 3 5.857933 0.000000 3230 +guidefin 0 3 5.857933 0.000000 3231 +compilersc 0 2 6.263398 0.000000 4237 +powerpc 0 2 6.263398 0.000000 4238 +syllabusprogram 0 2 6.263398 0.000000 4239 +codei 0 1 6.957497 0.000000 7141 +dedicatetheir 0 1 6.957497 0.000000 7142 +guidegordon 0 1 6.957497 0.000000 7143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..1730171e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +studi 1 120 2.079442 2.079442 91 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +requir 0 81 2.484907 0.000000 167 +environ 0 84 2.484907 0.000000 177 +exam 0 86 2.484907 0.000000 169 +intellig 1 72 2.639057 2.639057 225 +logic 0 71 2.639057 0.000000 230 +artifici 0 63 2.772589 0.000000 280 +major 0 56 2.890372 0.000000 315 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +survei 0 35 3.401197 0.000000 513 +represent 0 35 3.401197 0.000000 512 +bibliographi 0 34 3.401197 0.000000 518 +human 0 32 3.465736 0.000000 546 +defin 0 22 3.850148 0.000000 746 +behavior 0 18 4.060443 0.000000 881 +appropri 0 18 4.060443 0.000000 883 +attempt 0 17 4.110874 0.000000 917 +action 0 15 4.248495 0.000000 1038 +stori 0 14 4.317488 0.000000 1087 +achiev 0 14 4.317488 0.000000 1088 +calculu 0 12 4.465908 0.000000 1203 +thecomput 0 10 4.653960 0.000000 1408 +novak 0 9 4.753590 0.000000 1521 +brain 0 8 4.875197 0.000000 1638 +thegoal 0 6 5.164786 0.000000 2033 +assignmentsprogram 0 6 5.164786 0.000000 2019 +intelligencec 0 4 5.568345 0.000000 2673 +coverag 0 4 5.568345 0.000000 2656 +guidefin 0 3 5.857933 0.000000 3231 +actor 0 2 6.263398 0.000000 4240 +syllabusprogram 0 2 6.263398 0.000000 4239 +problemssolut 0 2 6.263398 0.000000 4241 +intelligenceartifici 0 1 6.957497 0.000000 7144 +todupl 0 1 6.957497 0.000000 7145 +connectspercept 0 1 6.957497 0.000000 7146 +andknowledg 0 1 6.957497 0.000000 7147 +withbrief 0 1 6.957497 0.000000 7148 +descriptionsmidterm 0 1 6.957497 0.000000 7149 +guidepred 0 1 6.957497 0.000000 7150 +problemsnot 0 1 6.957497 0.000000 7151 +braingordon 0 1 6.957497 0.000000 7152 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..aa51b2cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +compil 0 122 2.079442 0.000000 96 +specif 0 106 2.197225 0.000000 106 +present 0 91 2.397895 0.000000 145 +graphic 0 90 2.397895 0.000000 147 +level 0 87 2.484907 0.000000 180 +homework 0 79 2.564949 0.000000 193 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +materi 0 75 2.639057 0.000000 221 +automat 0 61 2.833213 0.000000 306 +semest 0 58 2.890372 0.000000 312 +sever 0 56 2.890372 0.000000 322 +cover 0 55 2.944439 0.000000 329 +done 0 47 3.091042 0.000000 381 +execut 0 45 3.135494 0.000000 404 +third 0 43 3.178054 0.000000 412 +long 0 43 3.178054 0.000000 413 +expect 0 37 3.332205 0.000000 484 +given 0 32 3.465736 0.000000 538 +kind 0 32 3.465736 0.000000 541 +consist 0 26 3.688879 0.000000 651 +higher 0 24 3.761200 0.000000 690 +literatur 0 11 4.553877 0.000000 1300 +latter 0 9 4.753590 0.000000 1522 +novak 0 9 4.753590 0.000000 1521 +programmingc 0 3 5.857933 0.000000 3232 +ordinari 0 3 5.857933 0.000000 3233 +programmingautomat 0 1 6.957497 0.000000 7153 +programsfrom 0 1 6.957497 0.000000 7154 +illustrateth 0 1 6.957497 0.000000 7155 +requirelearn 0 1 6.957497 0.000000 7156 +partof 0 1 6.957497 0.000000 7157 +syllabusbibliographyassign 0 1 6.957497 0.000000 7158 +handpattern 0 1 6.957497 0.000000 7159 +matchingobject 0 1 6.957497 0.000000 7160 +programmingintroduct 0 1 6.957497 0.000000 7161 +glispview 0 1 6.957497 0.000000 7162 +programminggordon 0 1 6.957497 0.000000 7163 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..2cd381aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,676 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +well 0 109 2.197225 0.000000 121 +mathemat 0 108 2.197225 0.000000 123 +make 0 111 2.197225 0.000000 120 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +level 0 87 2.484907 0.000000 180 +librari 0 87 2.484907 0.000000 181 +learn 0 86 2.484907 0.000000 170 +wide 0 84 2.484907 0.000000 185 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +solut 0 82 2.484907 0.000000 162 +chang 0 82 2.484907 0.000000 163 +resourc 0 81 2.484907 0.000000 172 +contain 0 81 2.484907 0.000000 174 +control 0 82 2.484907 0.000000 164 +larg 0 82 2.484907 0.000000 168 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +sourc 0 77 2.564949 0.000000 201 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +mondai 0 77 2.564949 0.000000 206 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +orient 0 80 2.564949 0.000000 205 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +come 0 78 2.564949 0.000000 202 +logic 0 71 2.639057 0.000000 230 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +appli 0 71 2.639057 0.000000 226 +addit 0 74 2.639057 0.000000 228 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +differ 0 66 2.708050 0.000000 253 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +view 0 70 2.708050 0.000000 254 +main 0 67 2.708050 0.000000 256 +thursdai 0 70 2.708050 0.000000 241 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +abstract 0 62 2.772589 0.000000 276 +result 0 65 2.772589 0.000000 281 +evalu 0 64 2.772589 0.000000 266 +experi 0 64 2.772589 0.000000 283 +taylor 0 63 2.772589 0.000000 287 +organ 0 65 2.772589 0.000000 265 +foundat 0 62 2.772589 0.000000 286 +plan 0 65 2.772589 0.000000 272 +virtual 0 62 2.772589 0.000000 285 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +artifici 0 63 2.772589 0.000000 280 +previou 0 62 2.772589 0.000000 290 +written 0 63 2.772589 0.000000 278 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +explor 0 58 2.890372 0.000000 324 +sever 0 56 2.890372 0.000000 322 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +reason 0 57 2.890372 0.000000 318 +direct 0 57 2.890372 0.000000 316 +major 0 56 2.890372 0.000000 315 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +undergradu 0 54 2.944439 0.000000 338 +extens 0 53 2.944439 0.000000 340 +local 0 55 2.944439 0.000000 334 +instruct 0 53 2.944439 0.000000 332 +three 0 54 2.944439 0.000000 330 +scientif 0 53 2.944439 0.000000 341 +run 0 51 2.995732 0.000000 347 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +standard 0 48 3.044522 0.000000 365 +adapt 0 46 3.091042 0.000000 387 +featur 0 46 3.091042 0.000000 386 +effect 0 46 3.091042 0.000000 385 +move 0 47 3.091042 0.000000 382 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +execut 0 45 3.135494 0.000000 404 +even 0 45 3.135494 0.000000 393 +discuss 0 45 3.135494 0.000000 399 +better 0 45 3.135494 0.000000 401 +offer 0 43 3.178054 0.000000 414 +fast 0 42 3.218876 0.000000 429 +autom 0 41 3.218876 0.000000 434 +past 0 42 3.218876 0.000000 428 +linear 0 41 3.218876 0.000000 431 +examin 0 42 3.218876 0.000000 424 +continu 0 39 3.258097 0.000000 448 +form 0 39 3.258097 0.000000 443 +must 0 40 3.258097 0.000000 442 +programm 0 39 3.258097 0.000000 445 +map 0 39 3.258097 0.000000 452 +theoret 0 39 3.258097 0.000000 446 +open 0 38 3.295837 0.000000 469 +close 0 38 3.295837 0.000000 465 +seminar 0 38 3.295837 0.000000 470 +credit 0 38 3.295837 0.000000 460 +paul 0 38 3.295837 0.000000 471 +formal 0 37 3.332205 0.000000 478 +hand 0 37 3.332205 0.000000 475 +workstat 0 37 3.332205 0.000000 479 +feel 0 37 3.332205 0.000000 483 +cost 0 37 3.332205 0.000000 480 +mean 0 37 3.332205 0.000000 477 +robot 0 36 3.367296 0.000000 497 +procedur 0 36 3.367296 0.000000 488 +game 0 36 3.367296 0.000000 498 +represent 0 35 3.401197 0.000000 512 +singl 0 34 3.401197 0.000000 510 +least 0 35 3.401197 0.000000 516 +random 0 34 3.401197 0.000000 511 +toler 0 33 3.433987 0.000000 533 +queri 0 33 3.433987 0.000000 524 +within 0 33 3.433987 0.000000 525 +obtain 0 33 3.433987 0.000000 534 +fault 0 32 3.465736 0.000000 547 +idea 0 32 3.465736 0.000000 545 +human 0 32 3.465736 0.000000 546 +given 0 32 3.465736 0.000000 538 +ad 0 32 3.465736 0.000000 544 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +neural 0 30 3.555348 0.000000 578 +robert 0 30 3.555348 0.000000 567 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +abl 0 30 3.555348 0.000000 566 +graph 0 30 3.555348 0.000000 576 +domain 0 30 3.555348 0.000000 564 +rang 0 30 3.555348 0.000000 565 +common 0 30 3.555348 0.000000 574 +compon 0 30 3.555348 0.000000 570 +focu 0 30 3.555348 0.000000 571 +semant 0 29 3.583519 0.000000 587 +depend 0 29 3.583519 0.000000 583 +built 0 29 3.583519 0.000000 592 +limit 0 29 3.583519 0.000000 585 +turn 0 29 3.583519 0.000000 586 +platform 0 29 3.583519 0.000000 591 +consid 0 29 3.583519 0.000000 590 +cluster 0 28 3.610918 0.000000 612 +scale 0 28 3.610918 0.000000 613 +propos 0 28 3.610918 0.000000 602 +usual 0 28 3.610918 0.000000 608 +framework 0 28 3.610918 0.000000 606 +enabl 0 26 3.688879 0.000000 655 +consist 0 26 3.688879 0.000000 651 +detect 0 26 3.688879 0.000000 646 +request 0 26 3.688879 0.000000 635 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +task 0 25 3.737670 0.000000 678 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +client 0 25 3.737670 0.000000 679 +known 0 24 3.761200 0.000000 702 +higher 0 24 3.761200 0.000000 690 +mike 0 24 3.761200 0.000000 703 +greg 0 24 3.761200 0.000000 695 +lab 0 24 3.761200 0.000000 698 +decis 0 23 3.806662 0.000000 728 +highli 0 23 3.806662 0.000000 725 +honor 0 23 3.806662 0.000000 729 +cooper 0 22 3.850148 0.000000 757 +hierarchi 0 22 3.850148 0.000000 744 +sequenti 0 22 3.850148 0.000000 745 +serv 0 22 3.850148 0.000000 758 +emphasi 0 22 3.850148 0.000000 755 +instead 0 22 3.850148 0.000000 756 +inth 0 22 3.850148 0.000000 741 +deal 0 22 3.850148 0.000000 736 +reduc 0 22 3.850148 0.000000 759 +among 0 21 3.912023 0.000000 781 +path 0 21 3.912023 0.000000 778 +busi 0 21 3.912023 0.000000 784 +fact 0 21 3.912023 0.000000 780 +output 0 21 3.912023 0.000000 788 +facil 0 20 3.951244 0.000000 814 +portabl 0 20 3.951244 0.000000 819 +basi 0 20 3.951244 0.000000 828 +increas 0 20 3.951244 0.000000 829 +runtim 0 19 4.007333 0.000000 858 +separ 0 19 4.007333 0.000000 844 +failur 0 18 4.060443 0.000000 898 +attend 0 18 4.060443 0.000000 893 +encourag 0 18 4.060443 0.000000 880 +seem 0 18 4.060443 0.000000 899 +appropri 0 18 4.060443 0.000000 883 +stop 0 17 4.110874 0.000000 942 +ultim 0 17 4.110874 0.000000 943 +previous 0 17 4.110874 0.000000 923 +modif 0 17 4.110874 0.000000 913 +cognit 0 16 4.174387 0.000000 986 +advantag 0 16 4.174387 0.000000 987 +spatial 0 16 4.174387 0.000000 988 +easi 0 16 4.174387 0.000000 969 +spars 0 16 4.174387 0.000000 989 +vector 0 16 4.174387 0.000000 961 +across 0 16 4.174387 0.000000 974 +action 0 15 4.248495 0.000000 1038 +novel 0 15 4.248495 0.000000 1039 +english 0 15 4.248495 0.000000 1033 +side 0 15 4.248495 0.000000 1022 +massiv 0 15 4.248495 0.000000 1026 +reflect 0 15 4.248495 0.000000 1034 +topolog 0 14 4.317488 0.000000 1089 +heterogen 0 14 4.317488 0.000000 1090 +convent 0 14 4.317488 0.000000 1072 +translat 0 13 4.382027 0.000000 1164 +discret 0 13 4.382027 0.000000 1165 +prolog 0 13 4.382027 0.000000 1155 +whose 0 13 4.382027 0.000000 1166 +central 0 13 4.382027 0.000000 1160 +signific 0 13 4.382027 0.000000 1125 +difficulti 0 13 4.382027 0.000000 1132 +infrastructur 0 12 4.465908 0.000000 1234 +grow 0 12 4.465908 0.000000 1209 +onth 0 12 4.465908 0.000000 1218 +amount 0 12 4.465908 0.000000 1208 +evolv 0 12 4.465908 0.000000 1223 +promot 0 12 4.465908 0.000000 1235 +buffer 0 12 4.465908 0.000000 1211 +deduct 0 12 4.465908 0.000000 1236 +minor 0 12 4.465908 0.000000 1237 +evolut 0 11 4.553877 0.000000 1314 +broad 0 11 4.553877 0.000000 1302 +vladimir 0 11 4.553877 0.000000 1324 +transpar 0 11 4.553877 0.000000 1325 +multithread 0 11 4.553877 0.000000 1315 +node 0 11 4.553877 0.000000 1326 +induct 0 11 4.553877 0.000000 1304 +fix 0 11 4.553877 0.000000 1327 +noth 0 11 4.553877 0.000000 1328 +genet 0 10 4.653960 0.000000 1409 +underli 0 10 4.653960 0.000000 1410 +guarante 0 10 4.653960 0.000000 1391 +reli 0 10 4.653960 0.000000 1411 +facilit 0 10 4.653960 0.000000 1412 +length 0 10 4.653960 0.000000 1400 +sentenc 0 10 4.653960 0.000000 1413 +custom 0 10 4.653960 0.000000 1414 +certain 0 10 4.653960 0.000000 1393 +tradit 0 10 4.653960 0.000000 1404 +modular 0 10 4.653960 0.000000 1392 +notat 0 9 4.753590 0.000000 1489 +risto 0 9 4.753590 0.000000 1523 +calvin 0 9 4.753590 0.000000 1518 +sister 0 9 4.753590 0.000000 1524 +explicit 0 9 4.753590 0.000000 1525 +declar 0 9 4.753590 0.000000 1526 +compos 0 9 4.753590 0.000000 1527 +meta 0 9 4.753590 0.000000 1505 +novak 0 9 4.753590 0.000000 1521 +assumpt 0 9 4.753590 0.000000 1514 +significantli 0 9 4.753590 0.000000 1508 +reus 0 8 4.875197 0.000000 1661 +simpli 0 8 4.875197 0.000000 1626 +fail 0 8 4.875197 0.000000 1655 +paradigm 0 8 4.875197 0.000000 1662 +isol 0 8 4.875197 0.000000 1663 +claim 0 8 4.875197 0.000000 1664 +realist 0 8 4.875197 0.000000 1665 +analys 0 8 4.875197 0.000000 1666 +beyond 0 7 5.010635 0.000000 1834 +trend 0 7 5.010635 0.000000 1842 +dedic 0 7 5.010635 0.000000 1843 +metric 0 7 5.010635 0.000000 1831 +usabl 0 7 5.010635 0.000000 1810 +converg 0 7 5.010635 0.000000 1844 +uniform 0 7 5.010635 0.000000 1845 +determinist 0 6 5.164786 0.000000 2034 +difficult 0 6 5.164786 0.000000 2035 +gordon 0 6 5.164786 0.000000 2032 +academia 0 6 5.164786 0.000000 2036 +promis 0 6 5.164786 0.000000 2037 +pace 0 6 5.164786 0.000000 2011 +emerg 0 6 5.164786 0.000000 2038 +causal 0 6 5.164786 0.000000 2024 +prefetch 0 6 5.164786 0.000000 2039 +infer 0 6 5.164786 0.000000 2040 +conflict 0 6 5.164786 0.000000 2041 +constrain 0 6 5.164786 0.000000 2042 +variant 0 6 5.164786 0.000000 2043 +affect 0 6 5.164786 0.000000 2044 +corpu 0 5 5.347108 0.000000 2282 +revolut 0 5 5.347108 0.000000 2315 +ofdistribut 0 5 5.347108 0.000000 2316 +scope 0 5 5.347108 0.000000 2296 +despit 0 5 5.347108 0.000000 2317 +unknown 0 5 5.347108 0.000000 2318 +stabl 0 5 5.347108 0.000000 2309 +distinct 0 5 5.347108 0.000000 2319 +corba 0 5 5.347108 0.000000 2320 +pars 0 5 5.347108 0.000000 2321 +fairli 0 5 5.347108 0.000000 2322 +ofinterest 0 5 5.347108 0.000000 2323 +blumoferdb 0 5 5.347108 0.000000 2324 +reinforc 0 4 5.568345 0.000000 2674 +resolv 0 4 5.568345 0.000000 2675 +asymptot 0 4 5.568345 0.000000 2676 +vijaya 0 4 5.568345 0.000000 2677 +lorenzo 0 4 5.568345 0.000000 2588 +expens 0 4 5.568345 0.000000 2678 +ofworkst 0 4 5.568345 0.000000 2679 +illus 0 4 5.568345 0.000000 2603 +algorithmsand 0 4 5.568345 0.000000 2680 +havedevelop 0 4 5.568345 0.000000 2681 +clearli 0 4 5.568345 0.000000 2590 +surprisingli 0 4 5.568345 0.000000 2609 +floyd 0 4 5.568345 0.000000 2682 +queu 0 4 5.568345 0.000000 2648 +conceptu 0 3 5.857933 0.000000 3214 +lightweight 0 3 5.857933 0.000000 3234 +popul 0 3 5.857933 0.000000 3235 +embodi 0 3 5.857933 0.000000 3236 +blumof 0 3 5.857933 0.000000 3237 +citizen 0 3 5.857933 0.000000 3238 +dramat 0 3 5.857933 0.000000 3239 +aggress 0 3 5.857933 0.000000 3240 +similarli 0 3 5.857933 0.000000 3241 +neighborhood 0 3 5.857933 0.000000 3242 +intra 0 3 5.857933 0.000000 3243 +enumer 0 3 5.857933 0.000000 3244 +compliant 0 3 5.857933 0.000000 3245 +andsemant 0 3 5.857933 0.000000 3246 +parser 0 3 5.857933 0.000000 3141 +surpass 0 3 5.857933 0.000000 3247 +agener 0 3 5.857933 0.000000 3213 +theworld 0 3 5.857933 0.000000 3158 +rivest 0 3 5.857933 0.000000 3248 +parallelalgorithm 0 3 5.857933 0.000000 3249 +rscheme 0 3 5.857933 0.000000 3250 +cilk 0 2 6.263398 0.000000 4242 +alamo 0 2 6.263398 0.000000 4243 +chill 0 2 6.263398 0.000000 4244 +theform 0 2 6.263398 0.000000 4245 +sciencefal 0 2 6.263398 0.000000 4246 +andresearch 0 2 6.263398 0.000000 4247 +government 0 2 6.263398 0.000000 4248 +andcollect 0 2 6.263398 0.000000 4249 +todramat 0 2 6.263398 0.000000 4250 +thedesign 0 2 6.263398 0.000000 4251 +har 0 2 6.263398 0.000000 4252 +undergo 0 2 6.263398 0.000000 4253 +applicationsto 0 2 6.263398 0.000000 4254 +offailur 0 2 6.263398 0.000000 4255 +idl 0 2 6.263398 0.000000 4256 +indistribut 0 2 6.263398 0.000000 4257 +andmap 0 2 6.263398 0.000000 4258 +equilibrium 0 2 6.263398 0.000000 4259 +trajectori 0 2 6.263398 0.000000 4260 +sufficientto 0 2 6.263398 0.000000 4261 +logicprogram 0 2 6.263398 0.000000 4262 +thesetechniqu 0 2 6.263398 0.000000 4263 +ofneur 0 2 6.263398 0.000000 4264 +neuro 0 2 6.263398 0.000000 4265 +resourcemanag 0 2 6.263398 0.000000 4266 +anobject 0 2 6.263398 0.000000 4267 +anticip 0 2 6.263398 0.000000 4268 +corpora 0 2 6.263398 0.000000 4269 +foidl 0 2 6.263398 0.000000 4270 +andanalysi 0 2 6.263398 0.000000 4271 +straightforward 0 2 6.263398 0.000000 4272 +lengthi 0 2 6.263398 0.000000 4273 +andform 0 2 6.263398 0.000000 4274 +succinctli 0 2 6.263398 0.000000 4275 +concret 0 2 6.263398 0.000000 4276 +analysisof 0 2 6.263398 0.000000 4277 +tarjan 0 2 6.263398 0.000000 4278 +maspar 0 2 6.263398 0.000000 4279 +workon 0 2 6.263398 0.000000 4280 +symbiot 0 1 6.957497 0.000000 7164 +pram 0 1 6.957497 0.000000 7165 +programmingoctob 0 1 6.957497 0.000000 7166 +wilsonextens 0 1 6.957497 0.000000 7167 +ramachandranth 0 1 6.957497 0.000000 7168 +theapplic 0 1 6.957497 0.000000 7169 +sane 0 1 6.957497 0.000000 7170 +datasourc 0 1 6.957497 0.000000 7171 +theabstract 0 1 6.957497 0.000000 7172 +sciencecst 0 1 6.957497 0.000000 7173 +apass 0 1 6.957497 0.000000 7174 +beregist 0 1 6.957497 0.000000 7175 +schedulespeakertitleseptemb 0 1 6.957497 0.000000 7176 +mirankeralamo 0 1 6.957497 0.000000 7177 +warehouseseptemb 0 1 6.957497 0.000000 7178 +kuipersth 0 1 6.957497 0.000000 7179 +humanand 0 1 6.957497 0.000000 7180 +mapsseptemb 0 1 6.957497 0.000000 7181 +blumofecilk 0 1 6.957497 0.000000 7182 +reliableparallel 0 1 6.957497 0.000000 7183 +workstationsseptemb 0 1 6.957497 0.000000 7184 +miikkulainenlearn 0 1 6.957497 0.000000 7185 +throughsymbiot 0 1 6.957497 0.000000 7186 +networksoctob 0 1 6.957497 0.000000 7187 +lifschitzmathemat 0 1 6.957497 0.000000 7188 +reflectionoctob 0 1 6.957497 0.000000 7189 +mooneylearn 0 1 6.957497 0.000000 7190 +usinginduct 0 1 6.957497 0.000000 7191 +dahlindistribut 0 1 6.957497 0.000000 7192 +internetsnovemb 0 1 6.957497 0.000000 7193 +novaksoftwar 0 1 6.957497 0.000000 7194 +genericprocedur 0 1 6.957497 0.000000 7195 +viewsnovemb 0 1 6.957497 0.000000 7196 +parallelalgorithmsnovemb 0 1 6.957497 0.000000 7197 +alvisilighweight 0 1 6.957497 0.000000 7198 +tolerancenovemb 0 1 6.957497 0.000000 7199 +linadapt 0 1 6.957497 0.000000 7200 +optimizationdecemb 0 1 6.957497 0.000000 7201 +plaxtonanalysi 0 1 6.957497 0.000000 7202 +algorithmslighweight 0 1 6.957497 0.000000 7203 +tolerancelorenzo 0 1 6.957497 0.000000 7204 +alvisidistribut 0 1 6.957497 0.000000 7205 +confin 0 1 6.957497 0.000000 7206 +revolution 0 1 6.957497 0.000000 7207 +beyondth 0 1 6.957497 0.000000 7208 +toleranttechniqu 0 1 6.957497 0.000000 7209 +willceas 0 1 6.957497 0.000000 7210 +exot 0 1 6.957497 0.000000 7211 +distributedinform 0 1 6.957497 0.000000 7212 +acompetit 0 1 6.957497 0.000000 7213 +criticalinform 0 1 6.957497 0.000000 7214 +engineerfault 0 1 6.957497 0.000000 7215 +negligibleimpact 0 1 6.957497 0.000000 7216 +communicatethrough 0 1 6.957497 0.000000 7217 +onnetwork 0 1 6.957497 0.000000 7218 +workstationsrobert 0 1 6.957497 0.000000 7219 +blumofethi 0 1 6.957497 0.000000 7220 +pronouncedsilk 0 1 6.957497 0.000000 7221 +andcilk 0 1 6.957497 0.000000 7222 +functionalsubset 0 1 6.957497 0.000000 7223 +providesadapt 0 1 6.957497 0.000000 7224 +tranpar 0 1 6.957497 0.000000 7225 +touser 0 1 6.957497 0.000000 7226 +shrinkdynam 0 1 6.957497 0.000000 7227 +cilkprogram 0 1 6.957497 0.000000 7228 +workstationscrash 0 1 6.957497 0.000000 7229 +andrecov 0 1 6.957497 0.000000 7230 +livedemonstr 0 1 6.957497 0.000000 7231 +internetsmik 0 1 6.957497 0.000000 7232 +dahlinthi 0 1 6.957497 0.000000 7233 +applicationsmotiv 0 1 6.957497 0.000000 7234 +inclust 0 1 6.957497 0.000000 7235 +servicei 0 1 6.957497 0.000000 7236 +nodesto 0 1 6.957497 0.000000 7237 +centralserv 0 1 6.957497 0.000000 7238 +goodperform 0 1 6.957497 0.000000 7239 +networkperform 0 1 6.957497 0.000000 7240 +projectwil 0 1 6.957497 0.000000 7241 +mapsbenjamin 0 1 6.957497 0.000000 7242 +kuipershuman 0 1 6.957497 0.000000 7243 +forlarg 0 1 6.957497 0.000000 7244 +ontolog 0 1 6.957497 0.000000 7245 +varietyof 0 1 6.957497 0.000000 7246 +cast 0 1 6.957497 0.000000 7247 +diverserepresent 0 1 6.957497 0.000000 7248 +spatialsemant 0 1 6.957497 0.000000 7249 +andassumpt 0 1 6.957497 0.000000 7250 +thecontrol 0 1 6.957497 0.000000 7251 +beabstract 0 1 6.957497 0.000000 7252 +givinga 0 1 6.957497 0.000000 7253 +causalgraph 0 1 6.957497 0.000000 7254 +topologicalnetwork 0 1 6.957497 0.000000 7255 +occupancygrid 0 1 6.957497 0.000000 7256 +theframework 0 1 6.957497 0.000000 7257 +ofglob 0 1 6.957497 0.000000 7258 +programmingvladimir 0 1 6.957497 0.000000 7259 +lifschitzlog 0 1 6.957497 0.000000 7260 +functionalprogram 0 1 6.957497 0.000000 7261 +notne 0 1 6.957497 0.000000 7262 +itcan 0 1 6.957497 0.000000 7263 +executedus 0 1 6.957497 0.000000 7264 +withdefin 0 1 6.957497 0.000000 7265 +thereason 0 1 6.957497 0.000000 7266 +thesound 0 1 6.957497 0.000000 7267 +optimizationcalvin 0 1 6.957497 0.000000 7268 +linthi 0 1 6.957497 0.000000 7269 +andtheir 0 1 6.957497 0.000000 7270 +differenthardwar 0 1 6.957497 0.000000 7271 +efficientand 0 1 6.957497 0.000000 7272 +suchlibrari 0 1 6.957497 0.000000 7273 +weexplain 0 1 6.957497 0.000000 7274 +networksristo 0 1 6.957497 0.000000 7275 +miikkulainena 0 1 6.957497 0.000000 7276 +neuronsthrough 0 1 6.957497 0.000000 7277 +anddiscourag 0 1 6.957497 0.000000 7278 +suboptim 0 1 6.957497 0.000000 7279 +toextract 0 1 6.957497 0.000000 7280 +sequentialdecis 0 1 6.957497 0.000000 7281 +warehousedan 0 1 6.957497 0.000000 7282 +mirankerth 0 1 6.957497 0.000000 7283 +andint 0 1 6.957497 0.000000 7284 +theuser 0 1 6.957497 0.000000 7285 +byqueri 0 1 6.957497 0.000000 7286 +interfacethat 0 1 6.957497 0.000000 7287 +ofabstract 0 1 6.957497 0.000000 7288 +clever 0 1 6.957497 0.000000 7289 +anddata 0 1 6.957497 0.000000 7290 +activedatabas 0 1 6.957497 0.000000 7291 +constructedus 0 1 6.957497 0.000000 7292 +databasefacil 0 1 6.957497 0.000000 7293 +thealamo 0 1 6.957497 0.000000 7294 +dataintegr 0 1 6.957497 0.000000 7295 +elementsof 0 1 6.957497 0.000000 7296 +furthercomposit 0 1 6.957497 0.000000 7297 +answerhigh 0 1 6.957497 0.000000 7298 +logicprogrammingraymond 0 1 6.957497 0.000000 7299 +mooneyinduct 0 1 6.957497 0.000000 7300 +learningprolog 0 1 6.957497 0.000000 7301 +offirst 0 1 6.957497 0.000000 7302 +learningmethod 0 1 6.957497 0.000000 7303 +areappli 0 1 6.957497 0.000000 7304 +believethi 0 1 6.957497 0.000000 7305 +richer 0 1 6.957497 0.000000 7306 +parsersfrom 0 1 6.957497 0.000000 7307 +superior 0 1 6.957497 0.000000 7308 +onsever 0 1 6.957497 0.000000 7309 +networkmethod 0 1 6.957497 0.000000 7310 +ati 0 1 6.957497 0.000000 7311 +ofairlin 0 1 6.957497 0.000000 7312 +automaticallydevelop 0 1 6.957497 0.000000 7313 +englishdatabas 0 1 6.957497 0.000000 7314 +moreaccur 0 1 6.957497 0.000000 7315 +smallgeograph 0 1 6.957497 0.000000 7316 +tens 0 1 6.957497 0.000000 7317 +treemethod 0 1 6.957497 0.000000 7318 +throughviewsgordon 0 1 6.957497 0.000000 7319 +toachiev 0 1 6.957497 0.000000 7320 +thesoftwar 0 1 6.957497 0.000000 7321 +typesus 0 1 6.957497 0.000000 7322 +specifyview 0 1 6.957497 0.000000 7323 +adesir 0 1 6.957497 0.000000 7324 +algorithmsgreg 0 1 6.957497 0.000000 7325 +plaxtona 0 1 6.957497 0.000000 7326 +forspecif 0 1 6.957497 0.000000 7327 +notuncommon 0 1 6.957497 0.000000 7328 +havelittl 0 1 6.957497 0.000000 7329 +suchpap 0 1 6.957497 0.000000 7330 +gapsinher 0 1 6.957497 0.000000 7331 +inadequatefor 0 1 6.957497 0.000000 7332 +straightforwardalgorithm 0 1 6.957497 0.000000 7333 +theconceptu 0 1 6.957497 0.000000 7334 +trivialclass 0 1 6.957497 0.000000 7335 +blum 0 1 6.957497 0.000000 7336 +pratt 0 1 6.957497 0.000000 7337 +algorithmsvijaya 0 1 6.957497 0.000000 7338 +forcombinatori 0 1 6.957497 0.000000 7339 +recentyear 0 1 6.957497 0.000000 7340 +willdescrib 0 1 6.957497 0.000000 7341 +thesealgorithm 0 1 6.957497 0.000000 7342 +thendescrib 0 1 6.957497 0.000000 7343 +wepropos 0 1 6.957497 0.000000 7344 +parallelshar 0 1 6.957497 0.000000 7345 +reflectionpaul 0 1 6.957497 0.000000 7346 +addnew 0 1 6.957497 0.000000 7347 +structureaccordingli 0 1 6.957497 0.000000 7348 +adapat 0 1 6.957497 0.000000 7349 +extensiblelanguag 0 1 6.957497 0.000000 7350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..702fc921 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +implement 0 152 1.791759 0.000000 52 +problem 1 147 1.945910 1.945910 75 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +support 0 132 1.945910 0.000000 83 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +book 0 99 2.302585 0.000000 131 +octob 0 89 2.397895 0.000000 156 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +novemb 0 81 2.484907 0.000000 179 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +mondai 0 77 2.564949 0.000000 206 +free 0 73 2.639057 0.000000 224 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +test 0 66 2.708050 0.000000 252 +septemb 0 65 2.772589 0.000000 274 +wednesdai 0 64 2.772589 0.000000 261 +taylor 0 63 2.772589 0.000000 287 +handout 0 64 2.772589 0.000000 263 +found 0 53 2.944439 0.000000 337 +cover 0 55 2.944439 0.000000 329 +date 0 51 2.995732 0.000000 344 +midterm 0 45 3.135494 0.000000 392 +long 0 43 3.178054 0.000000 413 +map 0 39 3.258097 0.000000 452 +feel 0 37 3.332205 0.000000 483 +least 0 35 3.401197 0.000000 516 +chapter 0 32 3.465736 0.000000 536 +robert 0 30 3.555348 0.000000 567 +except 0 28 3.610918 0.000000 607 +assum 0 19 4.007333 0.000000 845 +stop 0 17 4.110874 0.000000 942 +station 0 13 4.382027 0.000000 1157 +solari 0 12 4.465908 0.000000 1238 +systemsc 0 11 4.553877 0.000000 1293 +canb 0 7 5.010635 0.000000 1846 +encrypt 0 7 5.010635 0.000000 1835 +blumoferdb 0 5 5.347108 0.000000 2324 +systemsfal 0 4 5.568345 0.000000 2683 +blumof 0 3 5.857933 0.000000 3237 +gooti 0 2 6.263398 0.000000 4281 +subramanyam 0 2 6.263398 0.000000 4282 +crypt 0 1 6.957497 0.000000 7351 +multiplemap 0 1 6.957497 0.000000 7352 +themap 0 1 6.957497 0.000000 7353 +decrypt 0 1 6.957497 0.000000 7354 +solutionsread 0 1 6.957497 0.000000 7355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..6a5229a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +homework 0 79 2.564949 0.000000 193 +copi 0 63 2.772589 0.000000 284 +detail 0 57 2.890372 0.000000 321 +midterm 0 45 3.135494 0.000000 392 +slide 0 38 3.295837 0.000000 467 +neural 1 30 3.555348 3.555348 578 +fundament 0 25 3.737670 0.000000 661 +station 0 13 4.382027 0.000000 1157 +uniqu 0 12 4.465908 0.000000 1228 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +lauren 0 3 5.857933 0.000000 3251 +bednar 0 2 6.263398 0.000000 4283 +jbednar 0 2 6.263398 0.000000 4284 +ofneur 0 2 6.263398 0.000000 4264 +cliff 0 2 6.263398 0.000000 4285 +edusun 0 2 6.263398 0.000000 4286 +networksfal 0 1 6.957497 0.000000 7356 +fausett 0 1 6.957497 0.000000 7357 +englewood 0 1 6.957497 0.000000 7358 +prenticehal 0 1 6.957497 0.000000 7359 +schedulehomework 0 1 6.957497 0.000000 7360 +assignmentsexamsclass 0 1 6.957497 0.000000 7361 +resourcesa 0 1 6.957497 0.000000 7362 +versionof 0 1 6.957497 0.000000 7363 +syllabusristo 0 1 6.957497 0.000000 7364 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..b0aeae6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +center 0 88 2.397895 0.000000 158 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +write 0 72 2.639057 0.000000 222 +dept 0 64 2.772589 0.000000 291 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +detail 0 57 2.890372 0.000000 321 +pointer 0 48 3.044522 0.000000 368 +discuss 0 45 3.135494 0.000000 399 +submit 0 39 3.258097 0.000000 440 +short 0 36 3.367296 0.000000 499 +approxim 0 35 3.401197 0.000000 509 +collabor 0 32 3.465736 0.000000 543 +particip 0 29 3.583519 0.000000 589 +toward 0 25 3.737670 0.000000 668 +attend 0 18 4.060443 0.000000 893 +regular 0 17 4.110874 0.000000 929 +cognit 1 16 4.174387 4.174387 986 +critic 0 16 4.174387 0.000000 982 +philosophi 0 13 4.382027 0.000000 1167 +signific 0 13 4.382027 0.000000 1125 +count 0 12 4.465908 0.000000 1239 +packet 0 10 4.653960 0.000000 1415 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +appt 0 5 5.347108 0.000000 2312 +ofinterest 0 5 5.347108 0.000000 2323 +nichola 0 3 5.857933 0.000000 3252 +interv 0 3 5.857933 0.000000 3253 +thepap 0 3 5.857933 0.000000 3254 +sciencefal 0 2 6.263398 0.000000 4246 +commentari 0 2 6.263398 0.000000 4287 +edusun 0 2 6.263398 0.000000 4286 +asher 0 1 6.957497 0.000000 7365 +waggen 0 1 6.957497 0.000000 7366 +nasher 0 1 6.957497 0.000000 7367 +berti 0 1 6.957497 0.000000 7368 +posner 0 1 6.957497 0.000000 7369 +mitpress 0 1 6.957497 0.000000 7370 +withanoth 0 1 6.957497 0.000000 7371 +alsorequir 0 1 6.957497 0.000000 7372 +descriptioncours 0 1 6.957497 0.000000 7373 +schedulediscuss 0 1 6.957497 0.000000 7374 +notesperson 0 1 6.957497 0.000000 7375 +adscollabor 0 1 6.957497 0.000000 7376 +paperclass 0 1 6.957497 0.000000 7377 +resourcesstud 0 1 6.957497 0.000000 7378 +questionnaireus 0 1 6.957497 0.000000 7379 +sciencefaculti 0 1 6.957497 0.000000 7380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..28164473 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,191 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +group 0 183 1.609438 0.000000 36 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +exam 0 86 2.484907 0.000000 169 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +mondai 0 77 2.564949 0.000000 206 +exampl 0 77 2.564949 0.000000 195 +summari 0 73 2.639057 0.000000 237 +line 0 75 2.639057 0.000000 231 +materi 0 75 2.639057 0.000000 221 +syllabu 0 67 2.708050 0.000000 247 +foundat 0 62 2.772589 0.000000 286 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +laboratori 0 63 2.772589 0.000000 292 +room 0 59 2.833213 0.000000 301 +detail 0 57 2.890372 0.000000 321 +semest 0 58 2.890372 0.000000 312 +sever 0 56 2.890372 0.000000 322 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +date 0 51 2.995732 0.000000 344 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +frequent 0 49 3.044522 0.000000 367 +still 0 50 3.044522 0.000000 362 +get 0 46 3.091042 0.000000 380 +possibl 0 47 3.091042 0.000000 378 +could 0 46 3.091042 0.000000 383 +discuss 0 45 3.135494 0.000000 399 +even 0 45 3.135494 0.000000 393 +long 0 43 3.178054 0.000000 413 +howev 0 41 3.218876 0.000000 422 +late 0 40 3.258097 0.000000 439 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +form 0 39 3.258097 0.000000 443 +credit 0 38 3.295837 0.000000 460 +respons 0 37 3.332205 0.000000 476 +expect 0 37 3.332205 0.000000 484 +feel 0 37 3.332205 0.000000 483 +procedur 0 36 3.367296 0.000000 488 +ofth 0 36 3.367296 0.000000 491 +soon 0 36 3.367296 0.000000 494 +staff 0 36 3.367296 0.000000 490 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +articl 0 33 3.433987 0.000000 530 +chapter 0 32 3.465736 0.000000 536 +often 0 31 3.496508 0.000000 551 +limit 0 29 3.583519 0.000000 585 +depend 0 29 3.583519 0.000000 583 +turn 0 29 3.583519 0.000000 586 +becom 0 28 3.610918 0.000000 603 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +background 0 25 3.737670 0.000000 664 +highli 0 23 3.806662 0.000000 725 +begin 0 23 3.806662 0.000000 716 +identifi 0 22 3.850148 0.000000 760 +self 0 22 3.850148 0.000000 761 +half 0 21 3.912023 0.000000 776 +prepar 0 20 3.951244 0.000000 824 +break 0 20 3.951244 0.000000 812 +event 0 18 4.060443 0.000000 896 +attend 0 18 4.060443 0.000000 893 +less 0 18 4.060443 0.000000 892 +partial 0 18 4.060443 0.000000 900 +monitor 0 17 4.110874 0.000000 941 +debug 0 17 4.110874 0.000000 944 +thought 0 17 4.110874 0.000000 945 +quiz 0 16 4.174387 0.000000 990 +earli 0 16 4.174387 0.000000 968 +enough 0 15 4.248495 0.000000 1040 +warn 0 14 4.317488 0.000000 1068 +near 0 14 4.317488 0.000000 1091 +consider 0 14 4.317488 0.000000 1076 +wait 0 13 4.382027 0.000000 1168 +quizz 0 13 4.382027 0.000000 1151 +everyth 0 13 4.382027 0.000000 1169 +unfortun 0 13 4.382027 0.000000 1170 +everyon 0 13 4.382027 0.000000 1148 +individu 0 13 4.382027 0.000000 1126 +pascal 0 12 4.465908 0.000000 1213 +uniqu 0 12 4.465908 0.000000 1228 +excit 0 11 4.553877 0.000000 1329 +extrem 0 11 4.553877 0.000000 1330 +eight 0 11 4.553877 0.000000 1331 +deadlin 0 9 4.753590 0.000000 1502 +hundr 0 9 4.753590 0.000000 1528 +intellectu 0 7 5.010635 0.000000 1847 +delai 0 7 5.010635 0.000000 1848 +carefulli 0 6 5.164786 0.000000 2045 +difficult 0 6 5.164786 0.000000 2035 +apolog 0 6 5.164786 0.000000 2046 +nine 0 6 5.164786 0.000000 2047 +vari 0 6 5.164786 0.000000 2001 +pace 0 6 5.164786 0.000000 2011 +firm 0 4 5.568345 0.000000 2684 +thecours 0 4 5.568345 0.000000 2685 +behind 0 4 5.568345 0.000000 2610 +welch 0 4 5.568345 0.000000 2655 +theprogram 0 4 5.568345 0.000000 2686 +dale 0 4 5.568345 0.000000 2687 +wewil 0 4 5.568345 0.000000 2688 +thiscours 0 4 5.568345 0.000000 2601 +repli 0 4 5.568345 0.000000 2689 +guadalup 0 3 5.857933 0.000000 3255 +andyou 0 3 5.857933 0.000000 3256 +gripe 0 3 5.857933 0.000000 3257 +suzi 0 2 6.263398 0.000000 4288 +wella 0 2 6.263398 0.000000 4289 +foral 0 2 6.263398 0.000000 4290 +riski 0 2 6.263398 0.000000 4291 +nowher 0 2 6.263398 0.000000 4292 +gallagh 0 2 6.263398 0.000000 4293 +requiredtextbook 0 2 6.263398 0.000000 4204 +elicit 0 2 6.263398 0.000000 4294 +thenewsgroup 0 1 6.957497 0.000000 7381 +thetest 0 1 6.957497 0.000000 7382 +programmingcsp 0 1 6.957497 0.000000 7383 +pascalintroductori 0 1 6.957497 0.000000 7384 +programminginstructor 0 1 6.957497 0.000000 7385 +gallagherwelcom 0 1 6.957497 0.000000 7386 +cspi 0 1 6.957497 0.000000 7387 +andso 0 1 6.957497 0.000000 7388 +otherdeadlin 0 1 6.957497 0.000000 7389 +thesyllabu 0 1 6.957497 0.000000 7390 +jenn 0 1 6.957497 0.000000 7391 +takethi 0 1 6.957497 0.000000 7392 +courseeach 0 1 6.957497 0.000000 7393 +howwel 0 1 6.957497 0.000000 7394 +deadlineto 0 1 6.957497 0.000000 7395 +thursdayeven 0 1 6.957497 0.000000 7396 +intosmal 0 1 6.957497 0.000000 7397 +ateach 0 1 6.957497 0.000000 7398 +thatlaboratori 0 1 6.957497 0.000000 7399 +thatgrad 0 1 6.957497 0.000000 7400 +thattest 0 1 6.957497 0.000000 7401 +limitedand 0 1 6.957497 0.000000 7402 +foravail 0 1 6.957497 0.000000 7403 +proctor 0 1 6.957497 0.000000 7404 +hoursbefor 0 1 6.957497 0.000000 7405 +andquizz 0 1 6.957497 0.000000 7406 +betaken 0 1 6.957497 0.000000 7407 +prescrib 0 1 6.957497 0.000000 7408 +openedfor 0 1 6.957497 0.000000 7409 +yourstud 0 1 6.957497 0.000000 7410 +orsak 0 1 6.957497 0.000000 7411 +weem 0 1 6.957497 0.000000 7412 +liabl 0 1 6.957497 0.000000 7413 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..2324ea2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +data 1 170 1.791759 1.791759 49 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +databas 0 122 2.079442 0.000000 86 +pleas 0 113 2.197225 0.000000 114 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +solut 0 82 2.484907 0.000000 162 +homework 0 79 2.564949 0.000000 193 +test 0 66 2.708050 0.000000 252 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +syllabu 0 67 2.708050 0.000000 247 +dept 0 64 2.772589 0.000000 291 +reason 0 57 2.890372 0.000000 318 +sampl 0 53 2.944439 0.000000 339 +suggest 0 53 2.944439 0.000000 331 +run 0 51 2.995732 0.000000 347 +without 0 50 3.044522 0.000000 370 +anoth 0 45 3.135494 0.000000 408 +error 0 40 3.258097 0.000000 449 +transform 0 32 3.465736 0.000000 542 +turn 0 29 3.583519 0.000000 586 +pass 0 28 3.610918 0.000000 611 +measur 0 28 3.610918 0.000000 609 +retriev 0 27 3.637586 0.000000 621 +compar 0 26 3.688879 0.000000 648 +output 1 21 3.912023 3.912023 788 +wang 0 21 3.912023 0.000000 790 +wrote 0 20 3.951244 0.000000 830 +benchmark 0 19 4.007333 0.000000 859 +attribut 0 14 4.317488 0.000000 1092 +script 0 13 4.382027 0.000000 1171 +perl 0 11 4.553877 0.000000 1332 +recoveri 0 9 4.753590 0.000000 1474 +replac 0 8 4.875197 0.000000 1668 +fail 0 8 4.875197 0.000000 1655 +contest 0 5 5.347108 0.000000 2273 +tupl 0 5 5.347108 0.000000 2244 +batori 0 4 5.568345 0.000000 2690 +delet 0 4 5.568345 0.000000 2691 +tong 0 3 5.857933 0.000000 3258 +append 0 2 6.263398 0.000000 4295 +ret_into 0 1 6.957497 0.000000 7414 +mdb 0 1 6.957497 0.000000 7415 +diff 0 1 6.957497 0.000000 7416 +medec 0 1 6.957497 0.000000 7417 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..85f5fc3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,303 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +intern 0 108 2.197225 0.000000 128 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +grade 0 90 2.397895 0.000000 142 +present 0 91 2.397895 0.000000 145 +octob 0 89 2.397895 0.000000 156 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +novemb 0 81 2.484907 0.000000 179 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +decemb 0 80 2.564949 0.000000 215 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +practic 0 70 2.708050 0.000000 246 +januari 0 62 2.772589 0.000000 264 +descript 0 64 2.772589 0.000000 271 +collect 0 65 2.772589 0.000000 268 +septemb 0 65 2.772589 0.000000 274 +foundat 0 62 2.772589 0.000000 286 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +content 0 59 2.833213 0.000000 302 +unix 0 58 2.890372 0.000000 308 +summer 0 56 2.890372 0.000000 311 +overview 0 56 2.890372 0.000000 323 +februari 0 54 2.944439 0.000000 328 +undergradu 0 54 2.944439 0.000000 338 +cover 0 55 2.944439 0.000000 329 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +principl 0 48 3.044522 0.000000 357 +basic 0 50 3.044522 0.000000 360 +textbook 0 44 3.135494 0.000000 397 +made 0 44 3.135494 0.000000 398 +execut 0 45 3.135494 0.000000 404 +offer 0 43 3.178054 0.000000 414 +review 0 42 3.218876 0.000000 425 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +fast 0 42 3.218876 0.000000 429 +transact 0 39 3.258097 0.000000 438 +theoret 0 39 3.258097 0.000000 446 +programm 0 39 3.258097 0.000000 445 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +procedur 0 36 3.367296 0.000000 488 +survei 0 35 3.401197 0.000000 513 +concurr 0 34 3.401197 0.000000 501 +articl 0 33 3.433987 0.000000 530 +toler 0 33 3.433987 0.000000 533 +concept 0 32 3.465736 0.000000 537 +idea 0 32 3.465736 0.000000 545 +fault 0 32 3.465736 0.000000 547 +chapter 0 32 3.465736 0.000000 536 +titl 0 31 3.496508 0.000000 556 +robert 0 30 3.555348 0.000000 567 +hard 0 30 3.555348 0.000000 563 +art 0 29 3.583519 0.000000 593 +multiprocessor 0 28 3.610918 0.000000 605 +determin 0 27 3.637586 0.000000 630 +altern 0 26 3.688879 0.000000 641 +proc 0 26 3.688879 0.000000 649 +constraint 0 26 3.688879 0.000000 636 +reliabl 0 25 3.737670 0.000000 674 +mobil 0 23 3.806662 0.000000 730 +thread 0 23 3.806662 0.000000 722 +highli 0 23 3.806662 0.000000 725 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +inth 0 22 3.850148 0.000000 741 +kernel 0 20 3.951244 0.000000 825 +andrew 0 19 4.007333 0.000000 849 +anderson 0 19 4.007333 0.000000 860 +prerequisit 0 19 4.007333 0.000000 846 +stand 0 18 4.060443 0.000000 891 +thoma 0 18 4.060443 0.000000 901 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +germani 0 17 4.110874 0.000000 946 +protect 0 17 4.110874 0.000000 935 +earli 0 16 4.174387 0.000000 968 +young 0 16 4.174387 0.000000 991 +remot 0 15 4.248495 0.000000 1041 +princeton 0 15 4.248495 0.000000 1042 +levi 0 14 4.317488 0.000000 1093 +joint 0 13 4.382027 0.000000 1130 +usenix 0 12 4.465908 0.000000 1240 +gupta 0 12 4.465908 0.000000 1241 +bruce 0 12 4.465908 0.000000 1226 +denni 0 11 4.553877 0.000000 1321 +reness 0 11 4.553877 0.000000 1333 +impact 0 11 4.553877 0.000000 1334 +sosp 0 10 4.653960 0.000000 1416 +operatingsystem 0 10 4.653960 0.000000 1401 +henri 0 10 4.653960 0.000000 1417 +black 0 10 4.653960 0.000000 1418 +hint 0 10 4.653960 0.000000 1419 +familiar 0 9 4.753590 0.000000 1485 +robbert 0 9 4.753590 0.000000 1529 +inter 0 9 4.753590 0.000000 1530 +birman 0 9 4.753590 0.000000 1531 +kumar 0 9 4.753590 0.000000 1506 +mach 0 8 4.875197 0.000000 1669 +inproceed 0 8 4.875197 0.000000 1670 +presenc 0 8 4.875197 0.000000 1671 +harrick 0 7 5.010635 0.000000 1849 +beyond 0 7 5.010635 0.000000 1834 +peterson 0 7 5.010635 0.000000 1850 +migrat 0 7 5.010635 0.000000 1851 +encrypt 0 7 5.010635 0.000000 1835 +onoper 0 6 5.164786 0.000000 2048 +multiprogram 0 6 5.164786 0.000000 2010 +thompson 0 6 5.164786 0.000000 2049 +silberschatz 0 6 5.164786 0.000000 1978 +edward 0 6 5.164786 0.000000 2050 +internationalconfer 0 6 5.164786 0.000000 2051 +distributedsystem 0 6 5.164786 0.000000 2022 +versu 0 6 5.164786 0.000000 2052 +ousterhout 0 5 5.347108 0.000000 2301 +ofdistribut 0 5 5.347108 0.000000 2316 +theth 0 5 5.347108 0.000000 2325 +oncomput 0 5 5.347108 0.000000 2326 +joseph 0 5 5.347108 0.000000 2327 +authent 0 5 5.347108 0.000000 2306 +steer 0 5 5.347108 0.000000 2328 +wireless 0 4 5.568345 0.000000 2693 +lazowska 0 4 5.568345 0.000000 2694 +breadth 0 4 5.568345 0.000000 2695 +implic 0 4 5.568345 0.000000 2696 +fora 0 4 5.568345 0.000000 2697 +theacm 0 4 5.568345 0.000000 2698 +zhao 0 4 5.568345 0.000000 2699 +cheriton 0 3 5.857933 0.000000 3259 +synopsi 0 3 5.857933 0.000000 3260 +formobil 0 3 5.857933 0.000000 3261 +requirementsstud 0 3 5.857933 0.000000 3116 +theperform 0 3 5.857933 0.000000 3262 +berlin 0 3 5.857933 0.000000 3263 +terri 0 3 5.857933 0.000000 3264 +golub 0 3 5.857933 0.000000 3265 +tokuda 0 3 5.857933 0.000000 3266 +kistler 0 3 5.857933 0.000000 3267 +synopsisc 0 2 6.263398 0.000000 4296 +systemdesign 0 2 6.263398 0.000000 4297 +theinstructor 0 2 6.263398 0.000000 4298 +anexperiment 0 2 6.263398 0.000000 4299 +afip 0 2 6.263398 0.000000 4300 +hansen 0 2 6.263398 0.000000 4301 +nucleu 0 2 6.263398 0.000000 4302 +bensoussan 0 2 6.263398 0.000000 4303 +multic 0 2 6.263398 0.000000 4304 +virtualmemori 0 2 6.263398 0.000000 4305 +ritchi 0 2 6.263398 0.000000 4306 +tucker 0 2 6.263398 0.000000 4307 +bunt 0 2 6.263398 0.000000 4308 +barrera 0 2 6.263398 0.000000 4309 +acmtransact 0 2 6.263398 0.000000 4310 +nelson 0 2 6.263398 0.000000 4168 +cristian 0 2 6.263398 0.000000 4311 +offailur 0 2 6.263398 0.000000 4255 +systemsr 0 2 6.263398 0.000000 4312 +goldberg 0 2 6.263398 0.000000 4313 +rosenblum 0 2 6.263398 0.000000 4314 +ieeetransact 0 2 6.263398 0.000000 4315 +oninform 0 2 6.263398 0.000000 4316 +baron 0 2 6.263398 0.000000 4317 +rashid 0 2 6.263398 0.000000 4318 +preemptiv 0 2 6.263398 0.000000 4319 +ondistribut 0 2 6.263398 0.000000 4320 +dalei 0 1 6.957497 0.000000 7420 +formultiprogram 0 1 6.957497 0.000000 7421 +karshmer 0 1 6.957497 0.000000 7422 +nehmer 0 1 6.957497 0.000000 7423 +schroeder 0 1 6.957497 0.000000 7424 +needham 0 1 6.957497 0.000000 7425 +trigger 0 1 6.957497 0.000000 7426 +prerequisitegradu 0 1 6.957497 0.000000 7427 +systemssuch 0 1 6.957497 0.000000 7428 +materialin 0 1 6.957497 0.000000 7429 +andsilberschatz 0 1 6.957497 0.000000 7430 +coveringboth 0 1 6.957497 0.000000 7431 +anemphasi 0 1 6.957497 0.000000 7432 +anddiscuss 0 1 6.957497 0.000000 7433 +aterm 0 1 6.957497 0.000000 7434 +systemsfernando 0 1 6.957497 0.000000 7435 +corbato 0 1 6.957497 0.000000 7436 +marjori 0 1 6.957497 0.000000 7437 +merwin 0 1 6.957497 0.000000 7438 +daggett 0 1 6.957497 0.000000 7439 +brinch 0 1 6.957497 0.000000 7440 +clingen 0 1 6.957497 0.000000 7441 +tannenbaum 0 1 6.957497 0.000000 7442 +andexampl 0 1 6.957497 0.000000 7443 +managementa 0 1 6.957497 0.000000 7444 +forshar 0 1 6.957497 0.000000 7445 +schedulingr 0 1 6.957497 0.000000 7446 +communicationj 0 1 6.957497 0.000000 7447 +birel 0 1 6.957497 0.000000 7448 +rpc 0 1 6.957497 0.000000 7449 +lightweightremot 0 1 6.957497 0.000000 7450 +migrationf 0 1 6.957497 0.000000 7451 +dougli 0 1 6.957497 0.000000 7452 +spriteoper 0 1 6.957497 0.000000 7453 +theimer 0 1 6.957497 0.000000 7454 +lantz 0 1 6.957497 0.000000 7455 +preemptabl 0 1 6.957497 0.000000 7456 +tolerancef 0 1 6.957497 0.000000 7457 +sand 0 1 6.957497 0.000000 7458 +sandberg 0 1 6.957497 0.000000 7459 +kleiman 0 1 6.957497 0.000000 7460 +ofsun 0 1 6.957497 0.000000 7461 +mckusick 0 1 6.957497 0.000000 7462 +leffler 0 1 6.957497 0.000000 7463 +fabri 0 1 6.957497 0.000000 7464 +fastfil 0 1 6.957497 0.000000 7465 +alog 0 1 6.957497 0.000000 7466 +systemsm 0 1 6.957497 0.000000 7467 +gifford 0 1 6.957497 0.000000 7468 +securityr 0 1 6.957497 0.000000 7469 +inlarg 0 1 6.957497 0.000000 7470 +butler 0 1 6.957497 0.000000 7471 +lampson 0 1 6.957497 0.000000 7472 +accetta 0 1 6.957497 0.000000 7473 +boloski 0 1 6.957497 0.000000 7474 +tevanian 0 1 6.957497 0.000000 7475 +systemsh 0 1 6.957497 0.000000 7476 +kopetz 0 1 6.957497 0.000000 7477 +timesystem 0 1 6.957497 0.000000 7478 +layland 0 1 6.957497 0.000000 7479 +ramamritham 0 1 6.957497 0.000000 7480 +stankov 0 1 6.957497 0.000000 7481 +schedulingund 0 1 6.957497 0.000000 7482 +mercer 0 1 6.957497 0.000000 7483 +computingb 0 1 6.957497 0.000000 7484 +badrinath 0 1 6.957497 0.000000 7485 +acharya 0 1 6.957497 0.000000 7486 +imielinski 0 1 6.957497 0.000000 7487 +satyanarayanan 0 1 6.957497 0.000000 7488 +okasaki 0 1 6.957497 0.000000 7489 +siegel 0 1 6.957497 0.000000 7490 +coda 0 1 6.957497 0.000000 7491 +distributedworkst 0 1 6.957497 0.000000 7492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..be5915bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +phone 0 175 1.791759 0.000000 45 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +austin 0 168 1.791759 0.000000 63 +data 0 170 1.791759 0.000000 49 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +teach 0 108 2.197225 0.000000 112 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +assist 0 112 2.197225 0.000000 113 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +proceed 0 93 2.397895 0.000000 152 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +follow 0 92 2.397895 0.000000 143 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +control 0 82 2.484907 0.000000 164 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +resourc 0 81 2.484907 0.000000 172 +second 0 81 2.484907 0.000000 166 +environ 0 84 2.484907 0.000000 177 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +optim 0 79 2.564949 0.000000 197 +june 0 79 2.564949 0.000000 214 +dynam 0 76 2.564949 0.000000 194 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +multimedia 1 68 2.708050 2.708050 258 +august 0 66 2.708050 0.000000 257 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +septemb 0 65 2.772589 0.000000 274 +descript 0 64 2.772589 0.000000 271 +copi 0 63 2.772589 0.000000 284 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +taylor 0 63 2.772589 0.000000 287 +collect 0 65 2.772589 0.000000 268 +januari 0 62 2.772589 0.000000 264 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +march 0 61 2.833213 0.000000 295 +simpl 0 60 2.833213 0.000000 298 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +summer 0 56 2.890372 0.000000 311 +space 0 57 2.890372 0.000000 310 +cover 0 55 2.944439 0.000000 329 +processor 0 54 2.944439 0.000000 335 +digit 0 52 2.995732 0.000000 348 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +format 0 48 3.044522 0.000000 356 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +standard 0 48 3.044522 0.000000 365 +still 0 50 3.044522 0.000000 362 +possibl 0 47 3.091042 0.000000 378 +california 0 46 3.091042 0.000000 388 +adapt 0 46 3.091042 0.000000 387 +video 0 44 3.135494 0.000000 405 +protocol 0 45 3.135494 0.000000 407 +textbook 0 44 3.135494 0.000000 397 +discuss 0 45 3.135494 0.000000 399 +answer 0 45 3.135494 0.000000 391 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +long 0 43 3.178054 0.000000 413 +third 0 43 3.178054 0.000000 412 +mechan 0 43 3.178054 0.000000 416 +review 0 42 3.218876 0.000000 425 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +futur 0 41 3.218876 0.000000 427 +tutori 0 39 3.258097 0.000000 437 +multipl 0 39 3.258097 0.000000 453 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +multi 0 36 3.367296 0.000000 493 +ofth 0 36 3.367296 0.000000 491 +survei 0 35 3.401197 0.000000 513 +represent 0 35 3.401197 0.000000 512 +statist 0 35 3.401197 0.000000 521 +articl 0 33 3.433987 0.000000 530 +toler 0 33 3.433987 0.000000 533 +concept 0 32 3.465736 0.000000 537 +fault 0 32 3.465736 0.000000 547 +storag 0 31 3.496508 0.000000 553 +titl 0 31 3.496508 0.000000 556 +particip 0 29 3.583519 0.000000 589 +synchron 0 29 3.583519 0.000000 588 +scale 0 28 3.610918 0.000000 613 +framework 0 28 3.610918 0.000000 606 +packag 0 28 3.610918 0.000000 614 +retriev 0 27 3.637586 0.000000 621 +determin 0 27 3.637586 0.000000 630 +arrai 0 27 3.637586 0.000000 627 +session 0 26 3.688879 0.000000 643 +fundament 0 25 3.737670 0.000000 661 +magazin 0 24 3.761200 0.000000 704 +scalabl 0 24 3.761200 0.000000 705 +compress 0 23 3.806662 0.000000 719 +variabl 0 23 3.806662 0.000000 715 +disk 0 22 3.850148 0.000000 747 +emphasi 0 22 3.850148 0.000000 755 +reduc 0 22 3.850148 0.000000 759 +chen 0 21 3.912023 0.000000 791 +flexibl 0 21 3.912023 0.000000 792 +mpeg 0 20 3.951244 0.000000 831 +scheme 0 20 3.951244 0.000000 818 +sure 0 20 3.951244 0.000000 813 +qualiti 0 20 3.951244 0.000000 832 +media 0 19 4.007333 0.000000 861 +boston 0 19 4.007333 0.000000 862 +prerequisit 0 19 4.007333 0.000000 846 +comparison 0 19 4.007333 0.000000 863 +anderson 0 19 4.007333 0.000000 860 +stand 0 18 4.060443 0.000000 891 +failur 0 18 4.060443 0.000000 898 +layer 0 17 4.110874 0.000000 926 +zhang 0 16 4.174387 0.000000 980 +diego 0 16 4.174387 0.000000 992 +transfer 0 16 4.174387 0.000000 967 +hierarch 0 15 4.248495 0.000000 1018 +rate 0 15 4.248495 0.000000 1037 +contribut 0 15 4.248495 0.000000 1021 +stream 0 15 4.248495 0.000000 1015 +audio 0 14 4.317488 0.000000 1094 +francisco 0 14 4.317488 0.000000 1095 +incomput 0 14 4.317488 0.000000 1096 +demand 0 14 4.317488 0.000000 1073 +heterogen 0 14 4.317488 0.000000 1090 +consider 0 14 4.317488 0.000000 1076 +introduc 0 13 4.382027 0.000000 1139 +carri 0 13 4.382027 0.000000 1152 +resolut 0 13 4.382027 0.000000 1172 +sigmetr 0 13 4.382027 0.000000 1173 +buffer 0 12 4.465908 0.000000 1211 +uniqu 0 12 4.465908 0.000000 1228 +readi 0 12 4.465908 0.000000 1242 +scan 0 12 4.465908 0.000000 1243 +philadelphia 0 12 4.465908 0.000000 1244 +weight 0 12 4.465908 0.000000 1204 +placement 0 10 4.653960 0.000000 1420 +packet 0 10 4.653960 0.000000 1415 +traffic 0 10 4.653960 0.000000 1421 +operatingsystem 0 10 4.653960 0.000000 1401 +establish 0 9 4.753590 0.000000 1532 +familiar 0 9 4.753590 0.000000 1485 +recoveri 0 9 4.753590 0.000000 1474 +light 0 9 4.753590 0.000000 1533 +osdi 0 9 4.753590 0.000000 1534 +transport 0 8 4.875197 0.000000 1672 +convers 0 8 4.875197 0.000000 1673 +pacif 0 8 4.875197 0.000000 1674 +grove 0 8 4.875197 0.000000 1675 +harrick 0 7 5.010635 0.000000 1849 +trend 0 7 5.010635 0.000000 1842 +channel 0 7 5.010635 0.000000 1836 +keshav 0 7 5.010635 0.000000 1852 +peterson 0 7 5.010635 0.000000 1850 +chiang 0 7 5.010635 0.000000 1853 +supportfor 0 7 5.010635 0.000000 1854 +smooth 0 7 5.010635 0.000000 1855 +misra 0 7 5.010635 0.000000 1856 +conferenc 0 7 5.010635 0.000000 1857 +jpeg 0 6 5.164786 0.000000 2053 +internationalconfer 0 6 5.164786 0.000000 2051 +subsystem 0 6 5.164786 0.000000 2015 +symposiumon 0 6 5.164786 0.000000 2054 +sigcomm 0 5 5.347108 0.000000 2329 +row 0 5 5.347108 0.000000 2330 +proceedingsof 0 5 5.347108 0.000000 2331 +batch 0 4 5.568345 0.000000 2700 +multimediasystem 0 4 5.568345 0.000000 2701 +venkat 0 4 5.568345 0.000000 2702 +addition 0 4 5.568345 0.000000 2593 +forparallel 0 4 5.568345 0.000000 2703 +admiss 0 4 5.568345 0.000000 2704 +clark 0 4 5.568345 0.000000 2705 +floyd 0 4 5.568345 0.000000 2682 +buss 0 4 5.568345 0.000000 2649 +goyal 0 3 5.857933 0.000000 3268 +shenoi 0 3 5.857933 0.000000 3269 +rangan 0 3 5.857933 0.000000 3270 +anaheim 0 3 5.857933 0.000000 3271 +campbel 0 3 5.857933 0.000000 3272 +mccann 0 3 5.857933 0.000000 3273 +synopsi 0 3 5.857933 0.000000 3260 +informationcours 0 3 5.857933 0.000000 3167 +guadalup 0 3 5.857933 0.000000 3255 +multimediaappl 0 3 5.857933 0.000000 3274 +ftc 0 3 5.857933 0.000000 3275 +katz 0 3 5.857933 0.000000 3276 +reddi 0 3 5.857933 0.000000 3277 +deliveri 0 3 5.857933 0.000000 3278 +durham 0 3 5.857933 0.000000 3279 +hampshir 0 3 5.857933 0.000000 3280 +chow 0 3 5.857933 0.000000 3281 +london 0 3 5.857933 0.000000 3282 +kandlur 0 2 6.263398 0.000000 4321 +ofmultimedia 0 2 6.263398 0.000000 4322 +icmc 0 2 6.263398 0.000000 4323 +jacobson 0 2 6.263398 0.000000 4324 +prerequisitesgradu 0 2 6.263398 0.000000 4325 +synopsisc 0 2 6.263398 0.000000 4296 +madeavail 0 2 6.263398 0.000000 4326 +thetop 0 2 6.263398 0.000000 4327 +critiqu 0 2 6.263398 0.000000 4328 +ofpap 0 2 6.263398 0.000000 4329 +andclass 0 2 6.263398 0.000000 4330 +prashant 0 2 6.263398 0.000000 4331 +gemmel 0 2 6.263398 0.000000 4332 +ieeeintern 0 2 6.263398 0.000000 4333 +inmulti 0 2 6.263398 0.000000 4334 +annualintern 0 2 6.263398 0.000000 4335 +pasadena 0 2 6.263398 0.000000 4336 +multimediai 0 2 6.263398 0.000000 4337 +acmmultimedia 0 2 6.263398 0.000000 4338 +sanfrancisco 0 2 6.263398 0.000000 4339 +shenker 0 2 6.263398 0.000000 4340 +verma 0 2 6.263398 0.000000 4341 +delaybound 0 2 6.263398 0.000000 4342 +toappear 0 2 6.263398 0.000000 4343 +nossdav 0 2 6.263398 0.000000 4344 +acmsigcomm 0 2 6.263398 0.000000 4345 +andd 0 2 6.263398 0.000000 4346 +shepherd 0 2 6.263398 0.000000 4347 +basedcommun 0 2 6.263398 0.000000 4348 +incommun 0 2 6.263398 0.000000 4349 +govindan 0 2 6.263398 0.000000 4350 +forcontinu 0 2 6.263398 0.000000 4351 +formultimedia 0 2 6.263398 0.000000 4352 +zellweg 0 2 6.263398 0.000000 4353 +swinehart 0 2 6.263398 0.000000 4354 +etherphon 0 2 6.263398 0.000000 4355 +steinmetz 0 1 6.957497 0.000000 7493 +sitaram 0 1 6.957497 0.000000 7494 +coulson 0 1 6.957497 0.000000 7495 +descriptiongener 0 1 6.957497 0.000000 7496 +boththeoret 0 1 6.957497 0.000000 7497 +systemsupport 0 1 6.957497 0.000000 7498 +transportprotocol 0 1 6.957497 0.000000 7499 +designissu 0 1 6.957497 0.000000 7500 +textbooka 0 1 6.957497 0.000000 7501 +requirementsth 0 1 6.957497 0.000000 7502 +relatedpap 0 1 6.957497 0.000000 7503 +tounderstand 0 1 6.957497 0.000000 7504 +asemest 0 1 6.957497 0.000000 7505 +vintuesdai 0 1 6.957497 0.000000 7506 +assistantmr 0 1 6.957497 0.000000 7507 +eduread 0 1 6.957497 0.000000 7508 +cntain 0 1 6.957497 0.000000 7509 +theread 0 1 6.957497 0.000000 7510 +speedwai 0 1 6.957497 0.000000 7511 +dobi 0 1 6.957497 0.000000 7512 +mall 0 1 6.957497 0.000000 7513 +callthem 0 1 6.957497 0.000000 7514 +compressionr 0 1 6.957497 0.000000 7515 +wallac 0 1 6.957497 0.000000 7516 +gall 0 1 6.957497 0.000000 7517 +anastassi 0 1 6.957497 0.000000 7518 +digitaltelevis 0 1 6.957497 0.000000 7519 +serversoverview 0 1 6.957497 0.000000 7520 +serverdesign 0 1 6.957497 0.000000 7521 +chiueh 0 1 6.957497 0.000000 7522 +groupedsweep 0 1 6.957497 0.000000 7523 +ofthird 0 1 6.957497 0.000000 7524 +narasimha 0 1 6.957497 0.000000 7525 +wylli 0 1 6.957497 0.000000 7526 +admissioncontrol 0 1 6.957497 0.000000 7527 +designinglarg 0 1 6.957497 0.000000 7528 +inmultimedia 0 1 6.957497 0.000000 7529 +interactivevideo 0 1 6.957497 0.000000 7530 +playout 0 1 6.957497 0.000000 7531 +shahabuddin 0 1 6.957497 0.000000 7532 +foran 0 1 6.957497 0.000000 7533 +demandvideo 0 1 6.957497 0.000000 7534 +papadimitri 0 1 6.957497 0.000000 7535 +ramanathan 0 1 6.957497 0.000000 7536 +informationcach 0 1 6.957497 0.000000 7537 +homeentertain 0 1 6.957497 0.000000 7538 +multimedianetwork 0 1 6.957497 0.000000 7539 +ferrari 0 1 6.957497 0.000000 7540 +channelestablish 0 1 6.957497 0.000000 7541 +areasin 0 1 6.957497 0.000000 7542 +servicedisciplin 0 1 6.957497 0.000000 7543 +workshopon 0 1 6.957497 0.000000 7544 +losslesssmooth 0 1 6.957497 0.000000 7545 +salehi 0 1 6.957497 0.000000 7546 +kuros 0 1 6.957497 0.000000 7547 +towslei 0 1 6.957497 0.000000 7548 +storedvideo 0 1 6.957497 0.000000 7549 +requirementsthrough 0 1 6.957497 0.000000 7550 +grossglaus 0 1 6.957497 0.000000 7551 +rcbr 0 1 6.957497 0.000000 7552 +efficientservic 0 1 6.957497 0.000000 7553 +kanakia 0 1 6.957497 0.000000 7554 +reibman 0 1 6.957497 0.000000 7555 +congestioncontrol 0 1 6.957497 0.000000 7556 +tennenhous 0 1 6.957497 0.000000 7557 +newgener 0 1 6.957497 0.000000 7558 +hutchison 0 1 6.957497 0.000000 7559 +servicearchitectur 0 1 6.957497 0.000000 7560 +turner 0 1 6.957497 0.000000 7561 +reliablemulticast 0 1 6.957497 0.000000 7562 +levelfram 0 1 6.957497 0.000000 7563 +deffner 0 1 6.957497 0.000000 7564 +schulzrinn 0 1 6.957497 0.000000 7565 +blakowski 0 1 6.957497 0.000000 7566 +onselect 0 1 6.957497 0.000000 7567 +januaryoper 0 1 6.957497 0.000000 7568 +multimediag 0 1 6.957497 0.000000 7569 +robin 0 1 6.957497 0.000000 7570 +blair 0 1 6.957497 0.000000 7571 +papathoma 0 1 6.957497 0.000000 7572 +choru 0 1 6.957497 0.000000 7573 +druschel 0 1 6.957497 0.000000 7574 +abbott 0 1 6.957497 0.000000 7575 +pagel 0 1 6.957497 0.000000 7576 +systemssupport 0 1 6.957497 0.000000 7577 +conferencingh 0 1 6.957497 0.000000 7578 +venkatrangan 0 1 6.957497 0.000000 7579 +packetvideo 0 1 6.957497 0.000000 7580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..4a8a879e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,297 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +hour 0 165 1.791759 0.000000 46 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +proceed 1 93 2.397895 2.397895 152 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +pictur 0 89 2.397895 0.000000 160 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +novemb 0 81 2.484907 0.000000 179 +resourc 0 81 2.484907 0.000000 172 +build 0 85 2.484907 0.000000 184 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +symposium 0 72 2.639057 0.000000 238 +servic 0 72 2.639057 0.000000 236 +meet 0 72 2.639057 0.000000 229 +effici 0 73 2.639057 0.000000 233 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +multimedia 0 68 2.708050 0.000000 258 +receiv 0 66 2.708050 0.000000 244 +simul 0 66 2.708050 0.000000 255 +august 0 66 2.708050 0.000000 257 +descript 0 64 2.772589 0.000000 271 +interact 0 62 2.772589 0.000000 270 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +processor 0 54 2.944439 0.000000 335 +februari 0 54 2.944439 0.000000 328 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +textbook 0 44 3.135494 0.000000 397 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +fridai 0 44 3.135494 0.000000 390 +made 0 44 3.135494 0.000000 398 +discuss 0 45 3.135494 0.000000 399 +mechan 0 43 3.178054 0.000000 416 +http 0 41 3.218876 0.000000 420 +transact 0 39 3.258097 0.000000 438 +submit 0 39 3.258097 0.000000 440 +paul 0 38 3.295837 0.000000 471 +cost 0 37 3.332205 0.000000 480 +connect 0 37 3.332205 0.000000 485 +tree 0 36 3.367296 0.000000 492 +multi 0 36 3.367296 0.000000 493 +ofth 0 36 3.367296 0.000000 491 +queri 0 33 3.433987 0.000000 524 +articl 0 33 3.433987 0.000000 530 +concept 0 32 3.465736 0.000000 537 +collabor 0 32 3.465736 0.000000 543 +extend 0 32 3.465736 0.000000 539 +titl 0 31 3.496508 0.000000 556 +domain 0 30 3.555348 0.000000 564 +particip 0 29 3.583519 0.000000 589 +semant 0 29 3.583519 0.000000 587 +framework 0 28 3.610918 0.000000 606 +retriev 0 27 3.637586 0.000000 621 +berkelei 0 26 3.688879 0.000000 657 +reliabl 0 25 3.737670 0.000000 674 +scalabl 0 24 3.761200 0.000000 705 +frame 0 24 3.761200 0.000000 684 +mobil 0 23 3.806662 0.000000 730 +emphasi 0 22 3.850148 0.000000 755 +self 0 22 3.850148 0.000000 761 +color 0 22 3.850148 0.000000 762 +rout 0 21 3.912023 0.000000 793 +scheme 0 20 3.951244 0.000000 818 +boston 0 19 4.007333 0.000000 862 +prerequisit 0 19 4.007333 0.000000 846 +anderson 0 19 4.007333 0.000000 860 +media 0 19 4.007333 0.000000 861 +predict 0 19 4.007333 0.000000 855 +stand 0 18 4.060443 0.000000 891 +letter 0 16 4.174387 0.000000 981 +vector 0 16 4.174387 0.000000 961 +diego 0 16 4.174387 0.000000 992 +zhang 0 16 4.174387 0.000000 980 +latenc 0 16 4.174387 0.000000 993 +hierarch 0 15 4.248495 0.000000 1018 +francisco 0 14 4.317488 0.000000 1095 +audio 0 14 4.317488 0.000000 1094 +gupta 0 12 4.465908 0.000000 1241 +replic 0 12 4.465908 0.000000 1231 +shape 0 12 4.465908 0.000000 1245 +clock 0 11 4.553877 0.000000 1320 +packet 0 10 4.653960 0.000000 1415 +queue 0 10 4.653960 0.000000 1386 +sosp 0 10 4.653960 0.000000 1416 +familiar 0 9 4.753590 0.000000 1485 +inter 0 9 4.753590 0.000000 1530 +distanc 0 9 4.753590 0.000000 1500 +light 0 9 4.753590 0.000000 1533 +osdi 0 9 4.753590 0.000000 1534 +face 0 9 4.753590 0.000000 1501 +paradigm 0 8 4.875197 0.000000 1662 +transport 0 8 4.875197 0.000000 1672 +parti 0 8 4.875197 0.000000 1676 +textur 0 8 4.875197 0.000000 1677 +conferenc 0 7 5.010635 0.000000 1857 +harrick 0 7 5.010635 0.000000 1849 +core 0 7 5.010635 0.000000 1809 +channel 0 7 5.010635 0.000000 1836 +determinist 0 6 5.164786 0.000000 2034 +onoper 0 6 5.164786 0.000000 2048 +multicast 0 5 5.347108 0.000000 2305 +sigcomm 0 5 5.347108 0.000000 2329 +jain 0 5 5.347108 0.000000 2332 +fair 0 5 5.347108 0.000000 2333 +consum 0 5 5.347108 0.000000 2334 +multimediasystem 0 4 5.568345 0.000000 2701 +venkat 0 4 5.568345 0.000000 2702 +andevalu 0 4 5.568345 0.000000 2706 +floyd 0 4 5.568345 0.000000 2682 +theacm 0 4 5.568345 0.000000 2698 +ofinform 0 4 5.568345 0.000000 2707 +bach 0 4 5.568345 0.000000 2708 +mccann 0 3 5.857933 0.000000 3273 +cheriton 0 3 5.857933 0.000000 3259 +infocom 0 3 5.857933 0.000000 3283 +weihl 0 3 5.857933 0.000000 3284 +synopsi 0 3 5.857933 0.000000 3260 +networkprotocol 0 3 5.857933 0.000000 3285 +requirementsstud 0 3 5.857933 0.000000 3116 +aswel 0 3 5.857933 0.000000 3286 +kistler 0 3 5.857933 0.000000 3267 +rangan 0 3 5.857933 0.000000 3270 +franci 0 3 5.857933 0.000000 3287 +singhal 0 3 5.857933 0.000000 3098 +axiomat 0 3 5.857933 0.000000 3288 +how 0 3 5.857933 0.000000 3289 +moran 0 3 5.857933 0.000000 3151 +nguyen 0 3 5.857933 0.000000 3290 +nearbi 0 3 5.857933 0.000000 3291 +ofoper 0 3 5.857933 0.000000 3292 +proport 0 3 5.857933 0.000000 3293 +qbic 0 3 5.857933 0.000000 3294 +acmmultimedia 0 2 6.263398 0.000000 4338 +jacobson 0 2 6.263398 0.000000 4324 +deer 0 2 6.263398 0.000000 4356 +jeffai 0 2 6.263398 0.000000 4357 +prerequisitesgradu 0 2 6.263398 0.000000 4325 +formultimedia 0 2 6.263398 0.000000 4352 +andresearch 0 2 6.263398 0.000000 4247 +theinstructor 0 2 6.263398 0.000000 4298 +studentsenrol 0 2 6.263398 0.000000 4073 +redel 0 2 6.263398 0.000000 4358 +zellweg 0 2 6.263398 0.000000 4353 +swinehart 0 2 6.263398 0.000000 4354 +etherphon 0 2 6.263398 0.000000 4355 +lan 0 2 6.263398 0.000000 4359 +computersystem 0 2 6.263398 0.000000 4360 +mbone 0 2 6.263398 0.000000 4361 +shenker 0 2 6.263398 0.000000 4340 +acmsigcomm 0 2 6.263398 0.000000 4345 +resourcemanag 0 2 6.263398 0.000000 4266 +govindan 0 2 6.263398 0.000000 4350 +forcontinu 0 2 6.263398 0.000000 4351 +monterei 0 2 6.263398 0.000000 4362 +timeoper 0 2 6.263398 0.000000 4363 +niblack 0 2 6.263398 0.000000 4364 +managementsystem 0 2 6.263398 0.000000 4365 +knowledgeand 0 2 6.263398 0.000000 4366 +onveri 0 2 6.263398 0.000000 4367 +crowcroft 0 1 6.957497 0.000000 7581 +warldersburg 0 1 6.957497 0.000000 7582 +synopsisthi 0 1 6.957497 0.000000 7583 +bediscuss 0 1 6.957497 0.000000 7584 +andmultimedia 0 1 6.957497 0.000000 7585 +multimediadatabas 0 1 6.957497 0.000000 7586 +determinedbas 0 1 6.957497 0.000000 7587 +orcarri 0 1 6.957497 0.000000 7588 +hoursfridai 0 1 6.957497 0.000000 7589 +flexibleframework 0 1 6.957497 0.000000 7590 +handlei 0 1 6.957497 0.000000 7591 +wakeman 0 1 6.957497 0.000000 7592 +controlchannel 0 1 6.957497 0.000000 7593 +cccp 0 1 6.957497 0.000000 7594 +conferencecontrol 0 1 6.957497 0.000000 7595 +gajewska 0 1 6.957497 0.000000 7596 +manass 0 1 6.957497 0.000000 7597 +argo 0 1 6.957497 0.000000 7598 +systemfor 0 1 6.957497 0.000000 7599 +gong 0 1 6.957497 0.000000 7600 +multipoint 0 1 6.957497 0.000000 7601 +basedmultimedia 0 1 6.957497 0.000000 7602 +ieeecomput 0 1 6.957497 0.000000 7603 +datagraminternetwork 0 1 6.957497 0.000000 7604 +ballardi 0 1 6.957497 0.000000 7605 +thyagarajan 0 1 6.957497 0.000000 7606 +widyono 0 1 6.957497 0.000000 7607 +msthesi 0 1 6.957497 0.000000 7608 +kompella 0 1 6.957497 0.000000 7609 +pasqual 0 1 6.957497 0.000000 7610 +polyzo 0 1 6.957497 0.000000 7611 +multimediacommun 0 1 6.957497 0.000000 7612 +weightsess 0 1 6.957497 0.000000 7613 +ofacm 0 1 6.957497 0.000000 7614 +holbrook 0 1 6.957497 0.000000 7615 +fordistribut 0 1 6.957497 0.000000 7616 +herzog 0 1 6.957497 0.000000 7617 +estrin 0 1 6.957497 0.000000 7618 +timecommun 0 1 6.957497 0.000000 7619 +servicesj 0 1 6.957497 0.000000 7620 +guyton 0 1 6.957497 0.000000 7621 +schwartz 0 1 6.957497 0.000000 7622 +mogul 0 1 6.957497 0.000000 7623 +forpersist 0 1 6.957497 0.000000 7624 +supportc 0 1 6.957497 0.000000 7625 +lotteri 0 1 6.957497 0.000000 7626 +flexibleproport 0 1 6.957497 0.000000 7627 +mangement 0 1 6.957497 0.000000 7628 +strideschedul 0 1 6.957497 0.000000 7629 +golestani 0 1 6.957497 0.000000 7630 +speedappl 0 1 6.957497 0.000000 7631 +timeproduc 0 1 6.957497 0.000000 7632 +ofeffici 0 1 6.957497 0.000000 7633 +sigapp 0 1 6.957497 0.000000 7634 +intim 0 1 6.957497 0.000000 7635 +databasesw 0 1 6.957497 0.000000 7636 +contentus 0 1 6.957497 0.000000 7637 +cawkel 0 1 6.957497 0.000000 7638 +weymouth 0 1 6.957497 0.000000 7639 +vimsi 0 1 6.957497 0.000000 7640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..91edcaf1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +updat 0 191 1.609438 0.000000 41 +fall 0 181 1.609438 0.000000 40 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +number 0 130 2.079442 0.000000 97 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +question 1 91 2.397895 2.397895 141 +exam 0 86 2.484907 0.000000 169 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +decemb 0 80 2.564949 0.000000 215 +refer 0 78 2.564949 0.000000 203 +mondai 0 77 2.564949 0.000000 206 +receiv 0 66 2.708050 0.000000 244 +differ 0 66 2.708050 0.000000 253 +handout 0 64 2.772589 0.000000 263 +descript 0 64 2.772589 0.000000 271 +think 0 57 2.890372 0.000000 314 +instruct 0 53 2.944439 0.000000 332 +case 0 51 2.995732 0.000000 351 +right 0 48 3.044522 0.000000 363 +set 0 50 3.044522 0.000000 361 +answer 0 45 3.135494 0.000000 391 +fridai 0 44 3.135494 0.000000 390 +littl 0 39 3.258097 0.000000 454 +respons 0 37 3.332205 0.000000 476 +word 0 34 3.401197 0.000000 508 +either 0 35 3.401197 0.000000 506 +chapter 0 32 3.465736 0.000000 536 +quot 0 29 3.583519 0.000000 582 +known 0 24 3.761200 0.000000 702 +size 0 23 3.806662 0.000000 713 +inth 0 22 3.850148 0.000000 741 +sent 0 22 3.850148 0.000000 763 +cycl 0 11 4.553877 0.000000 1335 +true 0 10 4.653960 0.000000 1422 +sentenc 0 10 4.653960 0.000000 1413 +total 0 10 4.653960 0.000000 1398 +label 0 10 4.653960 0.000000 1423 +equal 0 10 4.653960 0.000000 1424 +pose 0 9 4.753590 0.000000 1535 +face 0 9 4.753590 0.000000 1501 +entri 0 8 4.875197 0.000000 1678 +largest 0 7 5.010635 0.000000 1858 +meant 0 6 5.164786 0.000000 2055 +vertic 0 5 5.347108 0.000000 2270 +worst 0 5 5.347108 0.000000 2287 +vijaya 0 4 5.568345 0.000000 2677 +disjoint 0 4 5.568345 0.000000 2709 +denot 0 3 5.857933 0.000000 3147 +omega 0 2 6.263398 0.000000 4368 +sigma 0 2 6.263398 0.000000 4369 +amort 0 2 6.263398 0.000000 4370 +submatrix 0 1 6.957497 0.000000 7641 +ramachandranuniqu 0 1 6.957497 0.000000 7642 +onsigma 0 1 6.957497 0.000000 7643 +oroth 0 1 6.957497 0.000000 7644 +isther 0 1 6.957497 0.000000 7645 +paragraphof 0 1 6.957497 0.000000 7646 +containdistinct 0 1 6.957497 0.000000 7647 +cancontain 0 1 6.957497 0.000000 7648 +unclear 0 1 6.957497 0.000000 7649 +somek 0 1 6.957497 0.000000 7650 +bepost 0 1 6.957497 0.000000 7651 +youhav 0 1 6.957497 0.000000 7652 +yourbest 0 1 6.957497 0.000000 7653 +judgment 0 1 6.957497 0.000000 7654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..fbfa9241 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,228 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +object 0 138 1.945910 0.000000 79 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +machin 0 129 2.079442 0.000000 95 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +code 0 108 2.197225 0.000000 116 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +place 0 106 2.197225 0.000000 124 +look 0 107 2.197225 0.000000 115 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +chang 0 82 2.484907 0.000000 163 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +help 0 83 2.484907 0.000000 175 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +homework 0 79 2.564949 0.000000 193 +orient 0 80 2.564949 0.000000 205 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +free 0 73 2.639057 0.000000 224 +html 0 75 2.639057 0.000000 235 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +logic 0 71 2.639057 0.000000 230 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +interact 0 62 2.772589 0.000000 270 +simpl 0 60 2.833213 0.000000 298 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +unix 0 58 2.890372 0.000000 308 +browser 0 56 2.890372 0.000000 313 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +cover 0 55 2.944439 0.000000 329 +suggest 0 53 2.944439 0.000000 331 +allow 0 53 2.944439 0.000000 333 +three 0 54 2.944439 0.000000 330 +run 0 51 2.995732 0.000000 347 +standard 0 48 3.044522 0.000000 365 +format 0 48 3.044522 0.000000 356 +get 0 46 3.091042 0.000000 380 +featur 0 46 3.091042 0.000000 386 +answer 0 45 3.135494 0.000000 391 +describ 0 45 3.135494 0.000000 400 +mark 0 44 3.135494 0.000000 403 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +show 0 43 3.178054 0.000000 417 +might 0 41 3.218876 0.000000 426 +past 0 42 3.218876 0.000000 428 +error 0 40 3.258097 0.000000 449 +tutori 0 39 3.258097 0.000000 437 +author 0 39 3.258097 0.000000 450 +littl 0 39 3.258097 0.000000 454 +correct 0 38 3.295837 0.000000 462 +especi 0 36 3.367296 0.000000 496 +procedur 0 36 3.367296 0.000000 488 +chapter 0 32 3.465736 0.000000 536 +express 0 32 3.465736 0.000000 540 +kind 0 32 3.465736 0.000000 541 +someth 0 31 3.496508 0.000000 554 +anim 0 31 3.496508 0.000000 557 +abl 0 30 3.555348 0.000000 566 +actual 0 28 3.610918 0.000000 604 +linux 0 27 3.637586 0.000000 631 +comp 0 26 3.688879 0.000000 650 +subject 0 26 3.688879 0.000000 647 +rather 0 26 3.688879 0.000000 642 +brows 0 23 3.806662 0.000000 726 +recommend 0 22 3.850148 0.000000 737 +instal 0 22 3.850148 0.000000 754 +sort 0 22 3.850148 0.000000 738 +self 0 22 3.850148 0.000000 761 +newsgroup 0 21 3.912023 0.000000 783 +theorem 0 21 3.912023 0.000000 786 +scheme 1 20 3.951244 3.951244 818 +sure 0 20 3.951244 0.000000 813 +definit 0 19 4.007333 0.000000 864 +lot 0 18 4.060443 0.000000 889 +along 0 18 4.060443 0.000000 878 +repositori 0 17 4.110874 0.000000 932 +regular 0 17 4.110874 0.000000 929 +quiz 0 16 4.174387 0.000000 990 +explan 0 16 4.174387 0.000000 985 +advantag 0 16 4.174387 0.000000 987 +later 0 15 4.248495 0.000000 1043 +pagec 0 15 4.248495 0.000000 1011 +goe 0 15 4.248495 0.000000 1044 +command 0 14 4.317488 0.000000 1083 +convent 0 14 4.317488 0.000000 1072 +draw 0 14 4.317488 0.000000 1086 +essenti 0 13 4.382027 0.000000 1137 +prolog 0 13 4.382027 0.000000 1155 +solari 0 12 4.465908 0.000000 1238 +reader 0 12 4.465908 0.000000 1246 +calculu 0 12 4.465908 0.000000 1203 +rice 0 11 4.553877 0.000000 1336 +see 0 11 4.553877 0.000000 1337 +instanc 0 11 4.553877 0.000000 1322 +subset 0 10 4.653960 0.000000 1425 +wilson 0 9 4.753590 0.000000 1536 +classifi 0 9 4.753590 0.000000 1537 +latter 0 9 4.753590 0.000000 1522 +illustr 0 8 4.875197 0.000000 1679 +marc 0 8 4.875197 0.000000 1680 +besid 0 8 4.875197 0.000000 1681 +reload 0 8 4.875197 0.000000 1682 +prover 0 8 4.875197 0.000000 1653 +feelei 0 7 5.010635 0.000000 1859 +sparc 0 7 5.010635 0.000000 1860 +bunch 0 7 5.010635 0.000000 1861 +exactli 0 7 5.010635 0.000000 1817 +merg 0 7 5.010635 0.000000 1862 +whichi 0 6 5.164786 0.000000 2056 +indiana 0 6 5.164786 0.000000 2057 +grammar 0 6 5.164786 0.000000 2058 +lang 0 5 5.347108 0.000000 2294 +default 0 5 5.347108 0.000000 2335 +pagethi 0 5 5.347108 0.000000 2336 +button 0 5 5.347108 0.000000 2337 +ahead 0 5 5.347108 0.000000 2338 +hardcopi 0 5 5.347108 0.000000 2246 +proposit 0 5 5.347108 0.000000 2339 +patch 0 4 5.568345 0.000000 2710 +devot 0 4 5.568345 0.000000 2711 +backward 0 4 5.568345 0.000000 2638 +chain 0 4 5.568345 0.000000 2712 +rscheme 0 3 5.857933 0.000000 3250 +inherit 0 3 5.857933 0.000000 3122 +qing 0 3 5.857933 0.000000 3295 +gambit 0 3 5.857933 0.000000 3227 +outof 0 3 5.857933 0.000000 3296 +ters 0 3 5.857933 0.000000 3297 +theoremprov 0 3 5.857933 0.000000 3298 +donovan 0 2 6.263398 0.000000 4371 +kolbl 0 2 6.263398 0.000000 4372 +youcan 0 2 6.263398 0.000000 4373 +indent 0 2 6.263398 0.000000 4374 +subtyp 0 2 6.263398 0.000000 4375 +meroon 0 1 6.957497 0.000000 7655 +runschem 0 1 6.957497 0.000000 7656 +orani 0 1 6.957497 0.000000 7657 +andinstal 0 1 6.957497 0.000000 7658 +itfrom 0 1 6.957497 0.000000 7659 +friendlier 0 1 6.957497 0.000000 7660 +fornewbi 0 1 6.957497 0.000000 7661 +gettinggambit 0 1 6.957497 0.000000 7662 +bestschem 0 1 6.957497 0.000000 7663 +guil 0 1 6.957497 0.000000 7664 +mzscheme 0 1 6.957497 0.000000 7665 +doingobject 0 1 6.957497 0.000000 7666 +tous 0 1 6.957497 0.000000 7667 +freeimplement 0 1 6.957497 0.000000 7668 +getinterest 0 1 6.957497 0.000000 7669 +paulwilson 0 1 6.957497 0.000000 7670 +yourbrows 0 1 6.957497 0.000000 7671 +mostrec 0 1 6.957497 0.000000 7672 +ondeclar 0 1 6.957497 0.000000 7673 +arereason 0 1 6.957497 0.000000 7674 +willchang 0 1 6.957497 0.000000 7675 +islik 0 1 6.957497 0.000000 7676 +adventur 0 1 6.957497 0.000000 7677 +usinga 0 1 6.957497 0.000000 7678 +throughchapt 0 1 6.957497 0.000000 7679 +sanoth 0 1 6.957497 0.000000 7680 +thanprint 0 1 6.957497 0.000000 7681 +weget 0 1 6.957497 0.000000 7682 +onlinebrows 0 1 6.957497 0.000000 7683 +coursenot 0 1 6.957497 0.000000 7684 +miscellanousfunct 0 1 6.957497 0.000000 7685 +shouldconsult 0 1 6.957497 0.000000 7686 +itsens 0 1 6.957497 0.000000 7687 +andnot 0 1 6.957497 0.000000 7688 +setofrul 0 1 6.957497 0.000000 7689 +ofanim 0 1 6.957497 0.000000 7690 +simpleobject 0 1 6.957497 0.000000 7691 +metaclass 0 1 6.957497 0.000000 7692 +circular 0 1 6.957497 0.000000 7693 +onclass 0 1 6.957497 0.000000 7694 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..353dc6a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +texa 0 160 1.791759 0.000000 64 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +like 0 132 1.945910 0.000000 81 +assign 0 135 1.945910 0.000000 66 +postscript 0 131 2.079442 0.000000 90 +welcom 0 122 2.079442 0.000000 99 +provid 0 121 2.079442 0.000000 94 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +homepag 0 93 2.397895 0.000000 148 +solut 0 82 2.484907 0.000000 162 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +homework 1 79 2.564949 2.564949 193 +sourc 0 77 2.564949 0.000000 201 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +new 0 64 2.772589 0.000000 262 +guid 0 63 2.772589 0.000000 267 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +found 0 53 2.944439 0.000000 337 +tabl 0 51 2.995732 0.000000 346 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +possibl 0 47 3.091042 0.000000 378 +midterm 0 45 3.135494 0.000000 392 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +review 0 42 3.218876 0.000000 425 +announc 0 40 3.258097 0.000000 441 +realli 0 40 3.258097 0.000000 444 +slide 0 38 3.295837 0.000000 467 +sciencesunivers 0 37 3.332205 0.000000 486 +download 0 36 3.367296 0.000000 489 +print 0 34 3.401197 0.000000 503 +taught 0 33 3.433987 0.000000 526 +linux 0 27 3.637586 0.000000 631 +session 0 26 3.688879 0.000000 643 +half 0 21 3.912023 0.000000 776 +wang 0 21 3.912023 0.000000 790 +exercis 0 19 4.007333 0.000000 842 +weekli 0 17 4.110874 0.000000 919 +adam 0 17 4.110874 0.000000 934 +modif 0 17 4.110874 0.000000 913 +georg 0 16 4.174387 0.000000 994 +score 0 15 4.248495 0.000000 1017 +station 0 13 4.382027 0.000000 1157 +kumar 0 9 4.753590 0.000000 1506 +surpris 0 7 5.010635 0.000000 1828 +tip 0 7 5.010635 0.000000 1863 +ajit 0 3 5.857933 0.000000 3299 +feng 0 3 5.857933 0.000000 3300 +warren 0 3 5.857933 0.000000 3301 +edudepart 0 3 5.857933 0.000000 3302 +xfeng 0 2 6.263398 0.000000 4376 +natarajan 0 2 6.263398 0.000000 4377 +decimalinteg 0 1 6.957497 0.000000 7695 +hexinteg 0 1 6.957497 0.000000 7696 +octalinteg 0 1 6.957497 0.000000 7697 +xunnow 0 1 6.957497 0.000000 7698 +homeworksreview 0 1 6.957497 0.000000 7699 +slidesth 0 1 6.957497 0.000000 7700 +onlineif 0 1 6.957497 0.000000 7701 +updatedhomework 0 1 6.957497 0.000000 7702 +filemidterm 0 1 6.957497 0.000000 7703 +webta 0 1 6.957497 0.000000 7704 +timetableta 0 1 6.957497 0.000000 7705 +guana 0 1 6.957497 0.000000 7706 +eduxun 0 1 6.957497 0.000000 7707 +wordlist 0 1 6.957497 0.000000 7708 +wwang 0 1 6.957497 0.000000 7709 +afternoon 0 1 6.957497 0.000000 7710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..7143cb84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +hour 0 165 1.791759 0.000000 46 +contact 0 153 1.791759 0.000000 59 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +section 0 94 2.397895 0.000000 149 +comment 0 93 2.397895 0.000000 146 +solut 1 82 2.484907 2.484907 162 +resourc 0 81 2.484907 0.000000 172 +syllabu 0 67 2.708050 0.000000 247 +august 0 66 2.708050 0.000000 257 +new 0 64 2.772589 0.000000 262 +creat 0 63 2.772589 0.000000 277 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +station 0 13 4.382027 0.000000 1157 +classmat 0 9 4.753590 0.000000 1516 +yang 0 8 4.875197 0.000000 1652 +pagei 0 8 4.875197 0.000000 1683 +schwetman 0 1 6.957497 0.000000 7711 +mesquit 0 1 6.957497 0.000000 7712 +yangyang 0 1 6.957497 0.000000 7713 +herb 0 1 6.957497 0.000000 7714 +appointmentcontact 0 1 6.957497 0.000000 7715 +statisticsassign 0 1 6.957497 0.000000 7716 +asga 0 1 6.957497 0.000000 7717 +statisticsyour 0 1 6.957497 0.000000 7718 +gradesect 0 1 6.957497 0.000000 7719 +microsparc 0 1 6.957497 0.000000 7720 +datasheetonlin 0 1 6.957497 0.000000 7721 +ruiliu 0 1 6.957497 0.000000 7722 +postmessag 0 1 6.957497 0.000000 7723 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..ea45e96f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +grade 0 90 2.397895 0.000000 142 +present 0 91 2.397895 0.000000 145 +internet 0 83 2.484907 0.000000 186 +solut 0 82 2.484907 0.000000 162 +info 0 85 2.484907 0.000000 176 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +java 0 70 2.708050 0.000000 248 +handout 1 64 2.772589 2.772589 263 +descript 0 64 2.772589 0.000000 271 +sampl 0 53 2.944439 0.000000 339 +protocol 0 45 3.135494 0.000000 407 +http 0 41 3.218876 0.000000 420 +tutori 0 39 3.258097 0.000000 437 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +comp 0 26 3.688879 0.000000 650 +background 0 25 3.737670 0.000000 664 +mobil 0 23 3.806662 0.000000 730 +newsgroup 0 21 3.912023 0.000000 783 +rout 0 21 3.912023 0.000000 793 +alloc 0 20 3.951244 0.000000 821 +prerequisit 0 19 4.007333 0.000000 846 +configur 0 15 4.248495 0.000000 1012 +draft 0 14 4.317488 0.000000 1085 +station 0 13 4.382027 0.000000 1157 +individu 0 13 4.382027 0.000000 1126 +ring 0 8 4.875197 0.000000 1684 +digest 0 7 5.010635 0.000000 1864 +multicast 0 5 5.347108 0.000000 2305 +authent 0 5 5.347108 0.000000 2306 +edufing 0 4 5.568345 0.000000 2713 +csnet 0 1 6.957497 0.000000 7724 +wensdai 0 1 6.957497 0.000000 7725 +netsim 0 1 6.957497 0.000000 7726 +corejava 0 1 6.957497 0.000000 7727 +fengyufeng 0 1 6.957497 0.000000 7728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..0544cef1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +topic 0 114 2.197225 0.000000 110 +techniqu 0 99 2.302585 0.000000 138 +follow 0 92 2.397895 0.000000 143 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +chang 0 82 2.484907 0.000000 163 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +sourc 0 77 2.564949 0.000000 201 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +colleg 0 61 2.833213 0.000000 300 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +visitor 0 49 3.044522 0.000000 371 +basic 0 50 3.044522 0.000000 360 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +offer 0 43 3.178054 0.000000 414 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +art 0 29 3.583519 0.000000 593 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +accur 0 25 3.737670 0.000000 680 +item 0 19 4.007333 0.000000 856 +hypertext 0 19 4.007333 0.000000 865 +offici 0 18 4.060443 0.000000 894 +charact 0 15 4.248495 0.000000 1028 +reprint 0 14 4.317488 0.000000 1097 +hypermedia 0 12 4.465908 0.000000 1247 +duli 0 12 4.465908 0.000000 1248 +regularli 0 11 4.553877 0.000000 1338 +nonprofit 0 11 4.553877 0.000000 1339 +mosaic 0 10 4.653960 0.000000 1426 +engr 0 10 4.653960 0.000000 1427 +weld 0 9 4.753590 0.000000 1538 +departmentof 0 9 4.753590 0.000000 1539 +uniform 0 7 5.010635 0.000000 1845 +markup 0 6 5.164786 0.000000 2059 +highlight 0 5 5.347108 0.000000 2340 +foracadem 0 5 5.347108 0.000000 2341 +whichcontain 0 4 5.568345 0.000000 2714 +bounti 0 4 5.568345 0.000000 2715 +mathematica 0 3 5.857933 0.000000 3303 +quotedand 0 3 5.857933 0.000000 3304 +quarterwelcom 0 2 6.263398 0.000000 4378 +thatthi 0 2 6.263398 0.000000 4379 +addedfrequ 0 2 6.263398 0.000000 4380 +personnel 0 2 6.263398 0.000000 4381 +mvi 0 2 6.263398 0.000000 4382 +usinglynx 0 2 6.263398 0.000000 4383 +pageclick 0 1 6.957497 0.000000 7729 +gradesoth 0 1 6.957497 0.000000 7730 +browserport 0 1 6.957497 0.000000 7731 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..ddb23ada --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +studi 0 120 2.079442 0.000000 91 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +place 0 106 2.197225 0.000000 124 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +help 0 83 2.484907 0.000000 175 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +info 0 85 2.484907 0.000000 176 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +onlin 0 75 2.639057 0.000000 223 +degre 0 69 2.708050 0.000000 259 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +guid 0 63 2.772589 0.000000 267 +previou 0 62 2.772589 0.000000 290 +colleg 0 61 2.833213 0.000000 300 +think 0 57 2.890372 0.000000 314 +special 0 56 2.890372 0.000000 320 +summer 0 56 2.890372 0.000000 311 +major 0 56 2.890372 0.000000 315 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +particular 0 51 2.995732 0.000000 352 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +netscap 0 44 3.135494 0.000000 395 +keep 0 44 3.135494 0.000000 409 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +offer 0 43 3.178054 0.000000 414 +might 0 41 3.218876 0.000000 426 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +staff 0 36 3.367296 0.000000 490 +winter 0 36 3.367296 0.000000 500 +copyright 0 36 3.367296 0.000000 495 +word 0 34 3.401197 0.000000 508 +board 0 33 3.433987 0.000000 528 +ad 0 32 3.465736 0.000000 544 +autumn 0 31 3.496508 0.000000 558 +art 0 29 3.583519 0.000000 593 +consid 0 29 3.583519 0.000000 590 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +miscellan 0 23 3.806662 0.000000 731 +martin 0 21 3.912023 0.000000 794 +item 0 19 4.007333 0.000000 856 +demo 0 18 4.060443 0.000000 888 +less 0 18 4.060443 0.000000 892 +previous 0 17 4.110874 0.000000 923 +whole 0 17 4.110874 0.000000 940 +match 0 16 4.174387 0.000000 965 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +earlier 0 13 4.382027 0.000000 1140 +menu 0 13 4.382027 0.000000 1156 +hypermedia 0 12 4.465908 0.000000 1247 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +regularli 0 11 4.553877 0.000000 1338 +instanc 0 11 4.553877 0.000000 1322 +nonprofit 0 11 4.553877 0.000000 1339 +engr 0 10 4.653960 0.000000 1427 +debugg 0 9 4.753590 0.000000 1493 +departmentof 0 9 4.753590 0.000000 1539 +tip 0 7 5.010635 0.000000 1863 +documentfor 0 7 5.010635 0.000000 1865 +theclass 0 6 5.164786 0.000000 2060 +handbook 0 6 5.164786 0.000000 2061 +newinform 0 5 5.347108 0.000000 2342 +highlight 0 5 5.347108 0.000000 2340 +mac 0 5 5.347108 0.000000 2292 +bulletin 0 5 5.347108 0.000000 2343 +foracadem 0 5 5.347108 0.000000 2341 +bounti 0 4 5.568345 0.000000 2715 +insensit 0 4 5.568345 0.000000 2716 +tompa 0 3 5.857933 0.000000 3305 +preview 0 3 5.857933 0.000000 3306 +quotedand 0 3 5.857933 0.000000 3304 +raini 0 2 6.263398 0.000000 4384 +intact 0 2 6.263398 0.000000 4385 +nonmajor 0 2 6.263398 0.000000 4386 +itemsund 0 2 6.263398 0.000000 4387 +balloon 0 2 6.263398 0.000000 4388 +dugan 0 1 6.957497 0.000000 7732 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..2b6a72fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +lectur 0 135 1.945910 0.000000 73 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +spring 0 131 2.079442 0.000000 88 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +comment 0 93 2.397895 0.000000 146 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +refer 0 78 2.564949 0.000000 203 +test 0 66 2.708050 0.000000 252 +degre 0 69 2.708050 0.000000 259 +syllabu 0 67 2.708050 0.000000 247 +guid 0 63 2.772589 0.000000 267 +colleg 0 61 2.833213 0.000000 300 +summer 0 56 2.890372 0.000000 311 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +week 0 52 2.995732 0.000000 343 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +textbook 0 44 3.135494 0.000000 397 +examin 0 42 3.218876 0.000000 424 +might 0 41 3.218876 0.000000 426 +tutori 0 39 3.258097 0.000000 437 +slide 0 38 3.295837 0.000000 467 +short 0 36 3.367296 0.000000 499 +staff 0 36 3.367296 0.000000 490 +winter 0 36 3.367296 0.000000 500 +kind 0 32 3.465736 0.000000 541 +autumn 0 31 3.496508 0.000000 558 +richard 0 31 3.496508 0.000000 559 +art 0 29 3.583519 0.000000 593 +notic 0 25 3.737670 0.000000 675 +lab 0 24 3.761200 0.000000 698 +brows 0 23 3.806662 0.000000 726 +sort 0 22 3.850148 0.000000 738 +martin 0 21 3.912023 0.000000 794 +watch 0 21 3.912023 0.000000 789 +item 0 19 4.007333 0.000000 856 +demo 0 18 4.060443 0.000000 888 +less 0 18 4.060443 0.000000 892 +macintosh 0 17 4.110874 0.000000 920 +previous 0 17 4.110874 0.000000 923 +webmast 0 15 4.248495 0.000000 1045 +earlier 0 13 4.382027 0.000000 1140 +web 0 12 4.465908 0.000000 1249 +engr 0 10 4.653960 0.000000 1427 +invit 0 10 4.653960 0.000000 1428 +informationabout 0 9 4.753590 0.000000 1515 +andth 0 9 4.753590 0.000000 1481 +tip 0 7 5.010635 0.000000 1863 +ladner 0 6 5.164786 0.000000 2062 +highlight 0 5 5.347108 0.000000 2340 +bounti 0 4 5.568345 0.000000 2715 +moreinform 0 3 5.857933 0.000000 3307 +dickei 0 2 6.263398 0.000000 4389 +nonmajor 0 2 6.263398 0.000000 4386 +hypermediadocu 0 1 6.957497 0.000000 7733 +schedulesth 0 1 6.957497 0.000000 7734 +glanceweek 0 1 6.957497 0.000000 7735 +schedulecomput 0 1 6.957497 0.000000 7736 +includinglab 0 1 6.957497 0.000000 7737 +andta 0 1 6.957497 0.000000 7738 +audiofrom 0 1 6.957497 0.000000 7739 +midtermand 0 1 6.957497 0.000000 7740 +originallyschedul 0 1 6.957497 0.000000 7741 +andtim 0 1 6.957497 0.000000 7742 +usingth 0 1 6.957497 0.000000 7743 +intactand 0 1 6.957497 0.000000 7744 +forinst 0 1 6.957497 0.000000 7745 +andrel 0 1 6.957497 0.000000 7746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..ac4531b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +assign 1 135 1.945910 1.945910 66 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +seattl 0 120 2.079442 0.000000 103 +structur 0 106 2.197225 0.000000 105 +teach 0 108 2.197225 0.000000 112 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +novemb 0 81 2.484907 0.000000 179 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +previou 0 62 2.772589 0.000000 290 +browser 0 56 2.890372 0.000000 313 +sampl 0 53 2.944439 0.000000 339 +appoint 0 49 3.044522 0.000000 358 +set 0 50 3.044522 0.000000 361 +midterm 0 45 3.135494 0.000000 392 +winter 0 36 3.367296 0.000000 500 +autumn 0 31 3.496508 0.000000 558 +defin 0 22 3.850148 0.000000 746 +edulast 0 17 4.110874 0.000000 927 +discret 0 13 4.382027 0.000000 1165 +johnson 0 13 4.382027 0.000000 1162 +recurs 0 13 4.382027 0.000000 1127 +loew 0 12 4.465908 0.000000 1252 +reader 0 12 4.465908 0.000000 1246 +web 0 12 4.465908 0.000000 1249 +induct 0 11 4.553877 0.000000 1304 +leveson 0 9 4.753590 0.000000 1540 +acrobat 0 6 5.164786 0.000000 2063 +beam 0 5 5.347108 0.000000 2344 +karp 0 5 5.347108 0.000000 2284 +ruzzo 0 5 5.347108 0.000000 2345 +nowitz 0 2 6.263398 0.000000 4390 +ofyour 0 2 6.263398 0.000000 4063 +instructorpaul 0 1 6.957497 0.000000 7747 +edulectur 0 1 6.957497 0.000000 7748 +assistantjonathan 0 1 6.957497 0.000000 7749 +edusect 0 1 6.957497 0.000000 7750 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..5300b420 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +includ 0 208 1.609438 0.000000 42 +read 0 154 1.791759 0.000000 47 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +postscript 0 131 2.079442 0.000000 90 +final 0 116 2.197225 0.000000 108 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +text 0 98 2.302585 0.000000 133 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +mani 0 92 2.397895 0.000000 150 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +homework 0 79 2.564949 0.000000 193 +state 0 76 2.564949 0.000000 207 +messag 0 76 2.564949 0.000000 212 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +syllabu 0 67 2.708050 0.000000 247 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +previou 0 62 2.772589 0.000000 290 +januari 0 62 2.772589 0.000000 264 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +three 0 54 2.944439 0.000000 330 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +midterm 0 45 3.135494 0.000000 392 +math 0 44 3.135494 0.000000 402 +review 0 42 3.218876 0.000000 425 +origin 0 38 3.295837 0.000000 472 +formal 0 37 3.332205 0.000000 478 +winter 0 36 3.367296 0.000000 500 +express 0 32 3.465736 0.000000 540 +autumn 0 31 3.496508 0.000000 558 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +handl 0 24 3.761200 0.000000 685 +proof 0 23 3.806662 0.000000 720 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +regular 0 17 4.110874 0.000000 929 +intro 0 17 4.110874 0.000000 915 +ascii 0 15 4.248495 0.000000 1032 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +web 0 12 4.465908 0.000000 1249 +extra 0 11 4.553877 0.000000 1312 +regard 0 11 4.553877 0.000000 1309 +notat 0 9 4.753590 0.000000 1489 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +grammar 0 6 5.164786 0.000000 2058 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +ladner 0 6 5.164786 0.000000 2062 +diagram 0 5 5.347108 0.000000 2346 +pars 0 5 5.347108 0.000000 2321 +latexhtml 0 5 5.347108 0.000000 2347 +rambl 0 3 5.857933 0.000000 3308 +ladnerclass 0 1 6.957497 0.000000 7751 +construc 0 1 6.957497 0.000000 7752 +halt 0 1 6.957497 0.000000 7753 +undecidableexam 0 1 6.957497 0.000000 7754 +edufix 0 1 6.957497 0.000000 7755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..979a7d8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +includ 0 208 1.609438 0.000000 42 +lectur 1 135 1.945910 1.945910 73 +model 0 145 1.945910 0.000000 69 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +messag 1 76 2.564949 2.564949 212 +homework 0 79 2.564949 0.000000 193 +previou 0 62 2.772589 0.000000 290 +handout 0 64 2.772589 0.000000 263 +septemb 0 65 2.772589 0.000000 274 +content 0 59 2.833213 0.000000 302 +set 0 50 3.044522 0.000000 361 +pointer 0 48 3.044522 0.000000 368 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +formal 0 37 3.332205 0.000000 478 +winter 0 36 3.367296 0.000000 500 +autumn 1 31 3.496508 3.496508 558 +sent 0 22 3.850148 0.000000 763 +log 0 19 4.007333 0.000000 857 +intro 0 17 4.110874 0.000000 915 +web 0 12 4.465908 0.000000 1249 +regularli 0 11 4.553877 0.000000 1338 +subscrib 0 9 4.753590 0.000000 1541 +upcom 0 8 4.875197 0.000000 1685 +ann 0 6 5.164786 0.000000 2065 +majordomo 0 6 5.164786 0.000000 2066 +willb 0 5 5.347108 0.000000 2277 +condon 0 3 5.857933 0.000000 3309 +findhomework 0 1 6.957497 0.000000 7756 +userid 0 1 6.957497 0.000000 7757 +edukaye 0 1 6.957497 0.000000 7758 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..c54e7a53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +spring 0 131 2.079442 0.000000 88 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +messag 0 76 2.564949 0.000000 212 +previou 0 62 2.772589 0.000000 290 +unix 0 58 2.890372 0.000000 308 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +winter 0 36 3.367296 0.000000 500 +autumn 1 31 3.496508 3.496508 558 +request 0 26 3.688879 0.000000 635 +martin 0 21 3.912023 0.000000 794 +web 0 12 4.465908 0.000000 1249 +tompaclass 0 3 5.857933 0.000000 3310 +informationlab 0 1 6.957497 0.000000 7759 +technot 0 1 6.957497 0.000000 7760 +questionnaireloc 0 1 6.957497 0.000000 7761 +cdeletemin 0 1 6.957497 0.000000 7762 +treeshomework 0 1 6.957497 0.000000 7763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..853d0ab9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +spring 1 131 2.079442 2.079442 88 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +seattl 0 120 2.079442 0.000000 103 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +exam 0 86 2.484907 0.000000 169 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +sourc 0 77 2.564949 0.000000 201 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +ad 0 32 3.465736 0.000000 544 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +reprint 0 14 4.317488 0.000000 1097 +hypermedia 0 12 4.465908 0.000000 1247 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +departmentof 0 9 4.753590 0.000000 1539 +documentfor 0 7 5.010635 0.000000 1865 +ladner 0 6 5.164786 0.000000 2062 +theclass 0 6 5.164786 0.000000 2060 +newinform 0 5 5.347108 0.000000 2342 +foracadem 0 5 5.347108 0.000000 2341 +quotedand 0 3 5.857933 0.000000 3304 +fasulo 0 2 6.263398 0.000000 4391 +structuresrichard 0 1 6.957497 0.000000 7764 +instructordan 0 1 6.957497 0.000000 7765 +assistantthi 0 1 6.957497 0.000000 7766 +overheadsport 0 1 6.957497 0.000000 7767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..0a84afae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +sourc 0 77 2.564949 0.000000 201 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +offer 0 43 3.178054 0.000000 414 +purpos 0 37 3.332205 0.000000 481 +winter 0 36 3.367296 0.000000 500 +copyright 0 36 3.367296 0.000000 495 +everi 0 34 3.401197 0.000000 519 +autumn 0 31 3.496508 0.000000 558 +computersci 0 30 3.555348 0.000000 562 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +scienceand 0 5 5.347108 0.000000 2348 +ofcs 0 2 6.263398 0.000000 4392 +languagesfal 0 2 6.263398 0.000000 4161 +informationth 0 2 6.263398 0.000000 4393 +listinfo 0 2 6.263398 0.000000 4394 +pagehom 0 2 6.263398 0.000000 4395 +engineeringport 0 2 6.263398 0.000000 4396 +academicnonprofit 0 2 6.263398 0.000000 4397 +dulycredit 0 2 6.263398 0.000000 4398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..b0664ea3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +note 1 142 1.945910 1.945910 67 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +relat 0 139 1.945910 0.000000 68 +postscript 0 131 2.079442 0.000000 90 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +section 0 94 2.397895 0.000000 149 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +build 0 85 2.484907 0.000000 184 +april 0 77 2.564949 0.000000 196 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +interfac 0 79 2.564949 0.000000 209 +sourc 0 77 2.564949 0.000000 201 +html 0 75 2.639057 0.000000 235 +tuesdai 0 73 2.639057 0.000000 219 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +new 0 64 2.772589 0.000000 262 +march 0 61 2.833213 0.000000 295 +unix 0 58 2.890372 0.000000 308 +reason 0 57 2.890372 0.000000 318 +suggest 0 53 2.944439 0.000000 331 +sampl 0 53 2.944439 0.000000 339 +quarter 0 47 3.091042 0.000000 389 +electron 0 47 3.091042 0.000000 379 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +netscap 0 44 3.135494 0.000000 395 +offer 0 43 3.178054 0.000000 414 +review 0 42 3.218876 0.000000 425 +hand 0 37 3.332205 0.000000 475 +purpos 0 37 3.332205 0.000000 481 +winter 0 36 3.367296 0.000000 500 +copyright 0 36 3.367296 0.000000 495 +everi 0 34 3.401197 0.000000 519 +eduoffic 0 33 3.433987 0.000000 531 +autumn 0 31 3.496508 0.000000 558 +computersci 0 30 3.555348 0.000000 562 +steve 0 29 3.583519 0.000000 594 +quot 0 29 3.583519 0.000000 582 +full 0 28 3.610918 0.000000 615 +administr 0 27 3.637586 0.000000 628 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +accur 0 25 3.737670 0.000000 680 +daili 0 24 3.761200 0.000000 706 +newsgroup 0 21 3.912023 0.000000 783 +miss 0 19 4.007333 0.000000 866 +lisp 0 18 4.060443 0.000000 897 +figur 0 18 4.060443 0.000000 903 +partial 0 18 4.060443 0.000000 900 +quiz 0 16 4.174387 0.000000 990 +webmast 0 15 4.248495 0.000000 1045 +dave 0 14 4.317488 0.000000 1098 +save 0 14 4.317488 0.000000 1099 +reprint 0 14 4.317488 0.000000 1097 +prolog 0 13 4.382027 0.000000 1155 +emac 0 13 4.382027 0.000000 1143 +hank 0 12 4.465908 0.000000 1253 +submiss 0 11 4.553877 0.000000 1298 +grove 0 8 4.875197 0.000000 1675 +clip 0 7 5.010635 0.000000 1868 +transcript 0 6 5.164786 0.000000 2067 +scienceand 0 5 5.347108 0.000000 2348 +turnin 0 4 5.568345 0.000000 2654 +employe 0 4 5.568345 0.000000 2717 +ofcs 0 2 6.263398 0.000000 4392 +overviewcours 0 2 6.263398 0.000000 4399 +informationth 0 2 6.263398 0.000000 4393 +listinfo 0 2 6.263398 0.000000 4394 +pagehom 0 2 6.263398 0.000000 4395 +engineeringport 0 2 6.263398 0.000000 4396 +academicnonprofit 0 2 6.263398 0.000000 4397 +dulycredit 0 2 6.263398 0.000000 4398 +smalltalk 0 1 6.957497 0.000000 7768 +transcipt 0 1 6.957497 0.000000 7769 +htmlpostscript 0 1 6.957497 0.000000 7770 +languagesspr 0 1 6.957497 0.000000 7771 +hanks 0 1 6.957497 0.000000 7772 +documentsgeneralintroduct 0 1 6.957497 0.000000 7773 +relatedrun 0 1 6.957497 0.000000 7774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..cca55942 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +academ 0 82 2.484907 0.000000 178 +sourc 0 77 2.564949 0.000000 201 +degre 0 69 2.708050 0.000000 259 +previou 0 62 2.772589 0.000000 290 +colleg 0 61 2.833213 0.000000 300 +index 0 56 2.890372 0.000000 309 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +offer 0 43 3.178054 0.000000 414 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +art 0 29 3.583519 0.000000 593 +quot 0 29 3.583519 0.000000 582 +accur 0 25 3.737670 0.000000 680 +less 0 18 4.060443 0.000000 892 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +earlier 0 13 4.382027 0.000000 1140 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +instanc 0 11 4.553877 0.000000 1322 +nonprofit 0 11 4.553877 0.000000 1339 +intact 0 2 6.263398 0.000000 4385 +pagecurr 0 1 6.957497 0.000000 7775 +quarterth 0 1 6.957497 0.000000 7776 +quarterscours 0 1 6.957497 0.000000 7777 +younotic 0 1 6.957497 0.000000 7778 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..d648c30f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +instructor 0 108 2.197225 0.000000 107 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +version 0 113 2.197225 0.000000 122 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +exam 0 86 2.484907 0.000000 169 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +sourc 0 77 2.564949 0.000000 201 +logic 0 71 2.639057 0.000000 230 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +publish 0 57 2.890372 0.000000 326 +think 0 57 2.890372 0.000000 314 +maintain 0 51 2.995732 0.000000 342 +digit 0 52 2.995732 0.000000 348 +frequent 0 49 3.044522 0.000000 367 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +even 0 45 3.135494 0.000000 393 +announc 0 40 3.258097 0.000000 441 +author 0 39 3.258097 0.000000 450 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +go 0 33 3.433987 0.000000 529 +ad 0 32 3.465736 0.000000 544 +collabor 0 32 3.465736 0.000000 543 +autumn 0 31 3.496508 0.000000 558 +quot 0 29 3.583519 0.000000 582 +administr 0 27 3.637586 0.000000 628 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +tell 0 21 3.912023 0.000000 777 +anderson 0 19 4.007333 0.000000 860 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +whole 0 17 4.110874 0.000000 940 +weekli 0 17 4.110874 0.000000 919 +weslei 0 16 4.174387 0.000000 983 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +anonym 0 14 4.317488 0.000000 1100 +reprint 0 14 4.317488 0.000000 1097 +everyon 0 13 4.382027 0.000000 1148 +quizz 0 13 4.382027 0.000000 1151 +addison 0 12 4.465908 0.000000 1230 +workload 0 12 4.465908 0.000000 1210 +overal 0 12 4.465908 0.000000 1254 +duli 0 12 4.465908 0.000000 1248 +benjamin 0 11 4.553877 0.000000 1296 +evolut 0 11 4.553877 0.000000 1314 +nonprofit 0 11 4.553877 0.000000 1339 +cheat 0 10 4.653960 0.000000 1395 +desir 0 9 4.753590 0.000000 1542 +cum 0 8 4.875197 0.000000 1619 +bunch 0 7 5.010635 0.000000 1861 +gaetano 0 6 5.164786 0.000000 2068 +borriello 0 5 5.347108 0.000000 2349 +corei 0 4 5.568345 0.000000 2718 +contemporari 0 4 5.568345 0.000000 2719 +katz 0 3 5.857933 0.000000 3276 +corin 0 3 5.857933 0.000000 3311 +aweekli 0 3 5.857933 0.000000 3312 +andersonwelcom 0 2 6.263398 0.000000 4400 +tocs 0 2 6.263398 0.000000 4401 +messagess 0 2 6.263398 0.000000 4402 +synario 0 2 6.263398 0.000000 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..3a5ba3c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +assign 1 135 1.945910 1.945910 66 +hall 0 146 1.945910 0.000000 65 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +grade 0 90 2.397895 0.000000 142 +requir 0 81 2.484907 0.000000 167 +sieg 0 69 2.708050 0.000000 260 +practic 0 70 2.708050 0.000000 246 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +publish 0 57 2.890372 0.000000 326 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +keep 0 44 3.135494 0.000000 409 +offer 0 43 3.178054 0.000000 414 +late 0 40 3.258097 0.000000 439 +announc 0 40 3.258097 0.000000 441 +winter 0 36 3.367296 0.000000 500 +autumn 0 31 3.496508 0.000000 558 +option 0 30 3.555348 0.000000 575 +steve 0 29 3.583519 0.000000 594 +pagecs 0 26 3.688879 0.000000 658 +dai 0 22 3.850148 0.000000 753 +tent 0 22 3.850148 0.000000 739 +smith 0 20 3.951244 0.000000 820 +facil 0 20 3.951244 0.000000 814 +account 0 18 4.060443 0.000000 882 +lisp 0 18 4.060443 0.000000 897 +encourag 0 18 4.060443 0.000000 880 +deduct 0 12 4.465908 0.000000 1236 +tanimoto 0 10 4.653960 0.000000 1429 +penalti 0 10 4.653960 0.000000 1405 +prentic 0 7 5.010635 0.000000 1838 +aboutth 0 4 5.568345 0.000000 2720 +punctual 0 3 5.857933 0.000000 3313 +anhai 0 2 6.263398 0.000000 4404 +doan 0 2 6.263398 0.000000 4405 +mscc 0 2 6.263398 0.000000 4406 +breakdown 0 2 6.263398 0.000000 4407 +algorithmsautumn 0 1 6.957497 0.000000 7779 +shaffer 0 1 6.957497 0.000000 7780 +examinform 0 1 6.957497 0.000000 7781 +exambas 0 1 6.957497 0.000000 7782 +compilerassignmentssolut 0 1 6.957497 0.000000 7783 +assignmentsteach 0 1 6.957497 0.000000 7784 +informationscheduleweb 0 1 6.957497 0.000000 7785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..de42d2cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +relat 0 139 1.945910 0.000000 68 +structur 0 106 2.197225 0.000000 105 +instructor 0 108 2.197225 0.000000 107 +exam 0 86 2.484907 0.000000 169 +help 0 83 2.484907 0.000000 175 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +materi 0 75 2.639057 0.000000 221 +degre 0 69 2.708050 0.000000 259 +syllabu 0 67 2.708050 0.000000 247 +colleg 0 61 2.833213 0.000000 300 +major 0 56 2.890372 0.000000 315 +offer 0 43 3.178054 0.000000 414 +art 0 29 3.583519 0.000000 593 +pagecs 0 26 3.688879 0.000000 658 +demo 0 18 4.060443 0.000000 888 +jonathan 0 13 4.382027 0.000000 1174 +mosaic 0 10 4.653960 0.000000 1426 +holden 0 3 5.857933 0.000000 3314 +alistair 0 3 5.857933 0.000000 3315 +nowitz 0 2 6.263398 0.000000 4390 +raini 0 2 6.263398 0.000000 4384 +algorithmsspr 0 1 6.957497 0.000000 7786 +funnowitz 0 1 6.957497 0.000000 7787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..52421ade --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +file 1 132 1.945910 1.945910 70 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +homework 0 79 2.564949 0.000000 193 +state 0 76 2.564949 0.000000 207 +come 0 78 2.564949 0.000000 202 +meet 0 72 2.639057 0.000000 229 +line 0 75 2.639057 0.000000 231 +test 1 66 2.708050 2.708050 252 +sieg 0 69 2.708050 0.000000 260 +set 0 50 3.044522 0.000000 361 +answer 0 45 3.135494 0.000000 391 +long 0 43 3.178054 0.000000 413 +tree 0 36 3.367296 0.000000 492 +next 0 34 3.401197 0.000000 517 +eduoffic 0 33 3.433987 0.000000 531 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +valu 0 25 3.737670 0.000000 665 +begin 0 23 3.806662 0.000000 716 +sheet 0 16 4.174387 0.000000 973 +indic 0 15 4.248495 0.000000 1013 +charact 0 15 4.248495 0.000000 1028 +denis 0 12 4.465908 0.000000 1255 +string 0 11 4.553877 0.000000 1340 +linda 0 10 4.653960 0.000000 1394 +length 0 10 4.653960 0.000000 1400 +shapiro 0 8 4.875197 0.000000 1686 +insert 0 8 4.875197 0.000000 1687 +integ 0 8 4.875197 0.000000 1688 +assignmentsprogram 0 6 5.164786 0.000000 2019 +assignmentshomework 0 4 5.568345 0.000000 2721 +enclos 0 1 6.957497 0.000000 7788 +siegtelephon 0 1 6.957497 0.000000 7789 +algorithmswint 0 1 6.957497 0.000000 7790 +shapirooffic 0 1 6.957497 0.000000 7791 +pinneloffic 0 1 6.957497 0.000000 7792 +denisep 0 1 6.957497 0.000000 7793 +syllabustransparencieshomework 0 1 6.957497 0.000000 7794 +inquot 0 1 6.957497 0.000000 7795 +associatedvalu 0 1 6.957497 0.000000 7796 +linebegin 0 1 6.957497 0.000000 7797 +isfollow 0 1 6.957497 0.000000 7798 +graphimag 0 1 6.957497 0.000000 7799 +graphreview 0 1 6.957497 0.000000 7800 +listsfin 0 1 6.957497 0.000000 7801 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..589ee86b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +assign 0 135 1.945910 0.000000 66 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +world 0 115 2.197225 0.000000 126 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +announc 0 40 3.258097 0.000000 441 +ad 0 32 3.465736 0.000000 544 +static 0 27 3.637586 0.000000 619 +hypermedia 0 12 4.465908 0.000000 1247 +admin 0 9 4.753590 0.000000 1476 +documentfor 0 7 5.010635 0.000000 1865 +willb 0 5 5.347108 0.000000 2277 +urgent 0 3 5.857933 0.000000 3316 +classhomethi 0 1 6.957497 0.000000 7802 +inmind 0 1 6.957497 0.000000 7803 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..0282f1fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,235 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +implement 0 152 1.791759 0.000000 52 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +user 0 104 2.302585 0.000000 137 +technic 0 100 2.302585 0.000000 140 +part 0 98 2.302585 0.000000 129 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +search 0 95 2.397895 0.000000 155 +present 0 91 2.397895 0.000000 145 +learn 0 86 2.484907 0.000000 170 +requir 0 81 2.484907 0.000000 167 +control 0 82 2.484907 0.000000 164 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +larg 0 82 2.484907 0.000000 168 +member 0 84 2.484907 0.000000 165 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +interfac 0 79 2.564949 0.000000 209 +method 0 80 2.564949 0.000000 213 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +write 0 72 2.639057 0.000000 222 +test 0 66 2.708050 0.000000 252 +sieg 0 69 2.708050 0.000000 260 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +organ 0 65 2.772589 0.000000 265 +written 0 63 2.772589 0.000000 278 +interact 0 62 2.772589 0.000000 270 +evalu 0 64 2.772589 0.000000 266 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +creat 0 63 2.772589 0.000000 277 +guid 0 63 2.772589 0.000000 267 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +allow 0 53 2.944439 0.000000 333 +profession 0 51 2.995732 0.000000 345 +hardwar 0 51 2.995732 0.000000 350 +set 0 50 3.044522 0.000000 361 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +quarter 0 47 3.091042 0.000000 389 +done 0 47 3.091042 0.000000 381 +possibl 0 47 3.091042 0.000000 378 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +discuss 0 45 3.135494 0.000000 399 +natur 0 44 3.135494 0.000000 406 +review 0 42 3.218876 0.000000 425 +howev 0 41 3.218876 0.000000 422 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +correct 0 38 3.295837 0.000000 462 +respons 0 37 3.332205 0.000000 476 +cost 0 37 3.332205 0.000000 480 +short 0 36 3.367296 0.000000 499 +ofth 0 36 3.367296 0.000000 491 +survei 0 35 3.401197 0.000000 513 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +concept 0 32 3.465736 0.000000 537 +human 0 32 3.465736 0.000000 546 +often 0 31 3.496508 0.000000 551 +posit 0 31 3.496508 0.000000 552 +secur 0 30 3.555348 0.000000 577 +produc 0 30 3.555348 0.000000 572 +hard 0 30 3.555348 0.000000 563 +exist 0 30 3.555348 0.000000 569 +particip 0 29 3.583519 0.000000 589 +platform 0 29 3.583519 0.000000 591 +usual 0 28 3.610918 0.000000 608 +held 0 28 3.610918 0.000000 600 +releas 0 28 3.610918 0.000000 616 +team 0 27 3.637586 0.000000 625 +administr 0 27 3.637586 0.000000 628 +determin 0 27 3.637586 0.000000 630 +experiment 0 26 3.688879 0.000000 645 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +consist 0 26 3.688879 0.000000 651 +primari 0 25 3.737670 0.000000 669 +reliabl 0 25 3.737670 0.000000 674 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +handl 0 24 3.761200 0.000000 685 +head 0 23 3.806662 0.000000 732 +try 0 22 3.850148 0.000000 764 +qualiti 0 20 3.951244 0.000000 832 +verif 0 20 3.951244 0.000000 826 +safeti 0 20 3.951244 0.000000 817 +sure 0 20 3.951244 0.000000 813 +feedback 0 19 4.007333 0.000000 854 +statu 0 18 4.060443 0.000000 885 +regular 0 17 4.110874 0.000000 929 +outlin 0 17 4.110874 0.000000 914 +estim 0 17 4.110874 0.000000 930 +normal 0 16 4.174387 0.000000 995 +configur 0 15 4.248495 0.000000 1012 +enough 0 15 4.248495 0.000000 1040 +track 0 15 4.248495 0.000000 1029 +transit 0 15 4.248495 0.000000 1046 +role 0 14 4.317488 0.000000 1101 +conduct 0 14 4.317488 0.000000 1065 +embed 0 14 4.317488 0.000000 1102 +everyon 0 13 4.382027 0.000000 1148 +essenti 0 13 4.382027 0.000000 1137 +sai 0 13 4.382027 0.000000 1175 +necessari 0 13 4.382027 0.000000 1147 +overal 0 12 4.465908 0.000000 1254 +nanci 0 12 4.465908 0.000000 1256 +skill 0 12 4.465908 0.000000 1205 +characterist 0 12 4.465908 0.000000 1257 +readabl 0 12 4.465908 0.000000 1258 +valid 0 11 4.553877 0.000000 1299 +evolut 0 11 4.553877 0.000000 1314 +princip 0 10 4.653960 0.000000 1397 +mainten 0 9 4.753590 0.000000 1543 +leveson 0 9 4.753590 0.000000 1540 +latter 0 9 4.753590 0.000000 1522 +factor 0 9 4.753590 0.000000 1544 +respect 0 9 4.753590 0.000000 1545 +realist 0 8 4.875197 0.000000 1665 +risk 0 8 4.875197 0.000000 1689 +reus 0 8 4.875197 0.000000 1661 +architect 0 8 4.875197 0.000000 1624 +successfulli 0 7 5.010635 0.000000 1869 +attach 0 7 5.010635 0.000000 1785 +metric 0 7 5.010635 0.000000 1831 +ethic 0 7 5.010635 0.000000 1786 +creation 0 6 5.164786 0.000000 2069 +ensur 0 6 5.164786 0.000000 2012 +lack 0 6 5.164786 0.000000 1994 +phase 0 6 5.164786 0.000000 1977 +theproject 0 6 5.164786 0.000000 1981 +deliv 0 6 5.164786 0.000000 2070 +augment 0 5 5.347108 0.000000 2350 +assur 0 4 5.568345 0.000000 2722 +isthat 0 4 5.568345 0.000000 2723 +assess 0 4 5.568345 0.000000 2724 +employe 0 4 5.568345 0.000000 2717 +duti 0 3 5.857933 0.000000 3317 +boe 0 3 5.857933 0.000000 3318 +specialist 0 3 5.857933 0.000000 3319 +leadership 0 3 5.857933 0.000000 3320 +expertis 0 3 5.857933 0.000000 3321 +oral 0 3 5.857933 0.000000 3189 +listof 0 3 5.857933 0.000000 3322 +proper 0 3 5.857933 0.000000 3323 +interview 0 3 5.857933 0.000000 3324 +portfolio 0 2 6.263398 0.000000 4408 +educours 0 2 6.263398 0.000000 4409 +terminolog 0 2 6.263398 0.000000 4410 +thegroup 0 2 6.263398 0.000000 4054 +beavoid 0 2 6.263398 0.000000 4411 +thenorm 0 2 6.263398 0.000000 4412 +clariti 0 2 6.263398 0.000000 4413 +deliver 0 1 6.957497 0.000000 7804 +mockup 0 1 6.957497 0.000000 7805 +descriptioninstruct 0 1 6.957497 0.000000 7806 +softwaresystem 0 1 6.957497 0.000000 7807 +tocreat 0 1 6.957497 0.000000 7808 +effectiveor 0 1 6.957497 0.000000 7809 +topicsar 0 1 6.957497 0.000000 7810 +employersand 0 1 6.957497 0.000000 7811 +realbo 0 1 6.957497 0.000000 7812 +largegroup 0 1 6.957497 0.000000 7813 +cannotlearn 0 1 6.957497 0.000000 7814 +devotedto 0 1 6.957497 0.000000 7815 +isto 0 1 6.957497 0.000000 7816 +effectivelytogeth 0 1 6.957497 0.000000 7817 +disast 0 1 6.957497 0.000000 7818 +worktogeth 0 1 6.957497 0.000000 7819 +requirementsanalysi 0 1 6.957497 0.000000 7820 +areal 0 1 6.957497 0.000000 7821 +engineeringinstitut 0 1 6.957497 0.000000 7822 +providedat 0 1 6.957497 0.000000 7823 +playthat 0 1 6.957497 0.000000 7824 +projectso 0 1 6.957497 0.000000 7825 +softwaredevelop 0 1 6.957497 0.000000 7826 +responsiblefor 0 1 6.957497 0.000000 7827 +duri 0 1 6.957497 0.000000 7828 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..81139049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +question 0 91 2.397895 0.000000 141 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +method 0 80 2.564949 0.000000 213 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +creat 0 63 2.772589 0.000000 277 +complex 0 64 2.772589 0.000000 269 +new 0 64 2.772589 0.000000 262 +locat 0 59 2.833213 0.000000 303 +sampl 0 53 2.944439 0.000000 339 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +fridai 0 44 3.135494 0.000000 390 +winter 0 36 3.367296 0.000000 500 +concept 0 32 3.465736 0.000000 537 +produc 0 30 3.555348 0.000000 572 +pagecs 0 26 3.688879 0.000000 658 +comp 0 26 3.688879 0.000000 650 +request 0 26 3.688879 0.000000 635 +fundament 0 25 3.737670 0.000000 661 +newsgroup 0 21 3.912023 0.000000 783 +feedback 0 19 4.007333 0.000000 854 +adam 0 17 4.110874 0.000000 934 +coupl 0 17 4.110874 0.000000 939 +essenti 0 13 4.382027 0.000000 1137 +loew 0 12 4.465908 0.000000 1252 +nanci 0 12 4.465908 0.000000 1256 +leveson 0 9 4.753590 0.000000 1540 +risk 0 8 4.875197 0.000000 1689 +successfulli 0 7 5.010635 0.000000 1869 +prentic 0 7 5.010635 0.000000 1838 +carlson 0 5 5.347108 0.000000 2351 +consum 0 5 5.347108 0.000000 2334 +interview 0 3 5.857933 0.000000 3324 +axiomat 0 3 5.857933 0.000000 3288 +mailinglist 0 3 5.857933 0.000000 3325 +militari 0 3 5.857933 0.000000 3326 +defens 0 3 5.857933 0.000000 3327 +educours 0 2 6.263398 0.000000 4409 +petri 0 2 6.263398 0.000000 4414 +engineeringmeet 0 1 6.957497 0.000000 7829 +eduta 0 1 6.957497 0.000000 7830 +descriptionthi 0 1 6.957497 0.000000 7831 +textbookghezzi 0 1 6.957497 0.000000 7832 +jazayeri 0 1 6.957497 0.000000 7833 +mandrioli 0 1 6.957497 0.000000 7834 +cohes 0 1 6.957497 0.000000 7835 +departmentsuggest 0 1 6.957497 0.000000 7836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..ece839b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,347 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +assign 0 135 1.945910 0.000000 66 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +note 0 142 1.945910 0.000000 67 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +number 0 130 2.079442 0.000000 97 +seattl 0 120 2.079442 0.000000 103 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +mani 0 92 2.397895 0.000000 150 +center 0 88 2.397895 0.000000 158 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +select 0 91 2.397895 0.000000 154 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +pictur 0 89 2.397895 0.000000 160 +exam 0 86 2.484907 0.000000 169 +info 0 85 2.484907 0.000000 176 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +build 0 85 2.484907 0.000000 184 +internet 0 83 2.484907 0.000000 186 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +issu 0 78 2.564949 0.000000 211 +homework 0 79 2.564949 0.000000 193 +messag 0 76 2.564949 0.000000 212 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +complet 0 77 2.564949 0.000000 208 +tuesdai 0 73 2.639057 0.000000 219 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +intellig 0 72 2.639057 0.000000 225 +meet 0 72 2.639057 0.000000 229 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +test 0 66 2.708050 0.000000 252 +receiv 0 66 2.708050 0.000000 244 +januari 0 62 2.772589 0.000000 264 +copi 0 63 2.772589 0.000000 284 +artifici 0 63 2.772589 0.000000 280 +wednesdai 0 64 2.772589 0.000000 261 +descript 0 64 2.772589 0.000000 271 +guid 0 63 2.772589 0.000000 267 +new 0 64 2.772589 0.000000 262 +import 0 65 2.772589 0.000000 282 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +sever 0 56 2.890372 0.000000 322 +unix 0 58 2.890372 0.000000 308 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +detail 0 57 2.890372 0.000000 321 +februari 0 54 2.944439 0.000000 328 +cover 0 55 2.944439 0.000000 329 +extens 0 53 2.944439 0.000000 340 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +archiv 0 49 3.044522 0.000000 364 +standard 0 48 3.044522 0.000000 365 +visual 0 48 3.044522 0.000000 372 +format 0 48 3.044522 0.000000 356 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +quarter 0 47 3.091042 0.000000 389 +move 0 47 3.091042 0.000000 382 +midterm 0 45 3.135494 0.000000 392 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +mark 0 44 3.135494 0.000000 403 +offer 0 43 3.178054 0.000000 414 +show 0 43 3.178054 0.000000 417 +examin 0 42 3.218876 0.000000 424 +review 0 42 3.218876 0.000000 425 +edit 0 42 3.218876 0.000000 418 +past 0 42 3.218876 0.000000 428 +combin 0 42 3.218876 0.000000 421 +compani 0 41 3.218876 0.000000 423 +editor 0 41 3.218876 0.000000 433 +howev 0 41 3.218876 0.000000 422 +might 0 41 3.218876 0.000000 426 +tutori 0 39 3.258097 0.000000 437 +announc 0 40 3.258097 0.000000 441 +must 0 40 3.258097 0.000000 442 +multipl 0 39 3.258097 0.000000 453 +form 0 39 3.258097 0.000000 443 +close 0 38 3.295837 0.000000 465 +credit 0 38 3.295837 0.000000 460 +cost 0 37 3.332205 0.000000 480 +winter 0 36 3.367296 0.000000 500 +download 0 36 3.367296 0.000000 489 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +approxim 0 35 3.401197 0.000000 509 +go 0 33 3.433987 0.000000 529 +given 0 32 3.465736 0.000000 538 +chapter 0 32 3.465736 0.000000 536 +common 0 30 3.555348 0.000000 574 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +hard 0 30 3.555348 0.000000 563 +option 0 30 3.555348 0.000000 575 +turn 0 29 3.583519 0.000000 586 +steve 0 29 3.583519 0.000000 594 +particip 0 29 3.583519 0.000000 589 +limit 0 29 3.583519 0.000000 585 +except 0 28 3.610918 0.000000 607 +full 0 28 3.610918 0.000000 615 +packag 0 28 3.610918 0.000000 614 +held 0 28 3.610918 0.000000 600 +symbol 0 27 3.637586 0.000000 620 +session 0 26 3.688879 0.000000 643 +pagecs 0 26 3.688879 0.000000 658 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +todai 0 25 3.737670 0.000000 672 +alwai 0 24 3.761200 0.000000 691 +interpret 0 24 3.761200 0.000000 686 +wish 0 24 3.761200 0.000000 692 +demonstr 0 24 3.761200 0.000000 694 +displai 0 23 3.806662 0.000000 712 +tent 0 22 3.850148 0.000000 739 +try 0 22 3.850148 0.000000 764 +varieti 0 22 3.850148 0.000000 740 +instead 0 22 3.850148 0.000000 756 +sent 0 22 3.850148 0.000000 763 +programminglanguag 0 21 3.912023 0.000000 782 +path 0 21 3.912023 0.000000 778 +viewer 0 21 3.912023 0.000000 787 +facil 0 20 3.951244 0.000000 814 +expert 0 20 3.951244 0.000000 833 +entir 0 20 3.951244 0.000000 811 +particularli 0 19 4.007333 0.000000 867 +lisp 1 18 4.060443 4.060443 897 +element 0 18 4.060443 0.000000 895 +seem 0 18 4.060443 0.000000 899 +macintosh 0 17 4.110874 0.000000 920 +stat 0 17 4.110874 0.000000 924 +whole 0 17 4.110874 0.000000 940 +regular 0 17 4.110874 0.000000 929 +modif 0 17 4.110874 0.000000 913 +transfer 0 16 4.174387 0.000000 967 +normal 0 16 4.174387 0.000000 995 +choic 0 16 4.174387 0.000000 979 +purchas 0 15 4.248495 0.000000 1030 +micro 0 15 4.248495 0.000000 1031 +easili 0 14 4.317488 0.000000 1077 +prolog 0 13 4.382027 0.000000 1155 +difficulti 0 13 4.382027 0.000000 1132 +emac 0 13 4.382027 0.000000 1143 +introduc 0 13 4.382027 0.000000 1139 +convert 0 13 4.382027 0.000000 1122 +host 0 11 4.553877 0.000000 1306 +transpar 0 11 4.553877 0.000000 1325 +abil 0 11 4.553877 0.000000 1341 +string 0 11 4.553877 0.000000 1340 +sens 0 11 4.553877 0.000000 1305 +tanimoto 0 10 4.653960 0.000000 1429 +bring 0 10 4.653960 0.000000 1430 +catalog 0 10 4.653960 0.000000 1431 +mainli 0 10 4.653960 0.000000 1432 +tradit 0 10 4.653960 0.000000 1404 +entitl 0 9 4.753590 0.000000 1490 +deadlin 0 9 4.753590 0.000000 1502 +attent 0 8 4.875197 0.000000 1651 +ruth 0 7 5.010635 0.000000 1870 +throughout 0 7 5.010635 0.000000 1871 +bookstor 0 7 5.010635 0.000000 1837 +fromth 0 7 5.010635 0.000000 1802 +fortun 0 7 5.010635 0.000000 1872 +remind 0 7 5.010635 0.000000 1799 +thompson 0 6 5.164786 0.000000 2049 +grammar 0 6 5.164786 0.000000 2058 +trail 0 6 5.164786 0.000000 2071 +fred 0 6 5.164786 0.000000 2072 +classroom 0 6 5.164786 0.000000 2006 +plu 0 6 5.164786 0.000000 2004 +mac 0 5 5.347108 0.000000 2292 +writeup 0 5 5.347108 0.000000 2352 +hardcopi 0 5 5.347108 0.000000 2246 +shell 0 5 5.347108 0.000000 2353 +respond 0 5 5.347108 0.000000 2354 +supplement 0 5 5.347108 0.000000 2355 +allegro 0 5 5.347108 0.000000 2314 +attract 0 5 5.347108 0.000000 2356 +freeman 0 4 5.568345 0.000000 2725 +basement 0 4 5.568345 0.000000 2663 +websit 0 4 5.568345 0.000000 2726 +andit 0 3 5.857933 0.000000 3328 +contentspag 0 3 5.857933 0.000000 3103 +orpostscript 0 3 5.857933 0.000000 3329 +mathematica 0 3 5.857933 0.000000 3303 +beginn 0 3 5.857933 0.000000 3330 +insieg 0 3 5.857933 0.000000 3331 +mscc 0 2 6.263398 0.000000 4406 +token 0 2 6.263398 0.000000 4415 +mileston 0 2 6.263398 0.000000 4416 +ofread 0 2 6.263398 0.000000 4417 +glossari 0 2 6.263398 0.000000 4418 +referenceon 0 2 6.263398 0.000000 4419 +usingcommon 0 2 6.263398 0.000000 4420 +themathemat 0 2 6.263398 0.000000 4421 +yacc 0 2 6.263398 0.000000 4422 +franz 0 2 6.263398 0.000000 4423 +thelaboratori 0 2 6.263398 0.000000 4424 +onthursdai 0 2 6.263398 0.000000 4425 +pencil 0 2 6.263398 0.000000 4426 +inthompson 0 1 6.957497 0.000000 7837 +koch 0 1 6.957497 0.000000 7838 +andersonmeet 0 1 6.957497 0.000000 7839 +andpars 0 1 6.957497 0.000000 7840 +incommon 0 1 6.957497 0.000000 7841 +purchasedsepar 0 1 6.957497 0.000000 7842 +fordigitool 0 1 6.957497 0.000000 7843 +thatmaintain 0 1 6.957497 0.000000 7844 +currentinform 0 1 6.957497 0.000000 7845 +introductionto 0 1 6.957497 0.000000 7846 +thatdoesn 0 1 6.957497 0.000000 7847 +promptli 0 1 6.957497 0.000000 7848 +theirimplement 0 1 6.957497 0.000000 7849 +buildingprogram 0 1 6.957497 0.000000 7850 +tointepret 0 1 6.957497 0.000000 7851 +alsolook 0 1 6.957497 0.000000 7852 +programmingfacil 0 1 6.957497 0.000000 7853 +thebas 0 1 6.957497 0.000000 7854 +allegrocommon 0 1 6.957497 0.000000 7855 +powerfulenviron 0 1 6.957497 0.000000 7856 +graphicsand 0 1 6.957497 0.000000 7857 +machinesof 0 1 6.957497 0.000000 7858 +theirown 0 1 6.957497 0.000000 7859 +xlisp 0 1 6.957497 0.000000 7860 +theseresourc 0 1 6.957497 0.000000 7861 +thatxlisp 0 1 6.957497 0.000000 7862 +bare 0 1 6.957497 0.000000 7863 +bone 0 1 6.957497 0.000000 7864 +nothav 0 1 6.957497 0.000000 7865 +disadvantag 0 1 6.957497 0.000000 7866 +labunless 0 1 6.957497 0.000000 7867 +fromdigitool 0 1 6.957497 0.000000 7868 +dealallow 0 1 6.957497 0.000000 7869 +lispfor 0 1 6.957497 0.000000 7870 +thistim 0 1 6.957497 0.000000 7871 +regardingread 0 1 6.957497 0.000000 7872 +printout 0 1 6.957497 0.000000 7873 +becov 0 1 6.957497 0.000000 7874 +announcedearli 0 1 6.957497 0.000000 7875 +snowflak 0 1 6.957497 0.000000 7876 +projectgener 0 1 6.957497 0.000000 7877 +aboutdemonstr 0 1 6.957497 0.000000 7878 +onmondai 0 1 6.957497 0.000000 7879 +exercisestokenizerassign 0 1 6.957497 0.000000 7880 +andpart 0 1 6.957497 0.000000 7881 +parsertokenizerpart 0 1 6.957497 0.000000 7882 +snowflakeassign 0 1 6.957497 0.000000 7883 +ondemonstr 0 1 6.957497 0.000000 7884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..35f5ace4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +hour 0 165 1.791759 0.000000 46 +assign 1 135 1.945910 1.945910 66 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +note 0 142 1.945910 0.000000 67 +introduct 0 126 2.079442 0.000000 87 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +final 0 116 2.197225 0.000000 108 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +solut 0 82 2.484907 0.000000 162 +second 0 81 2.484907 0.000000 166 +exam 0 86 2.484907 0.000000 169 +april 0 77 2.564949 0.000000 196 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +june 0 79 2.564949 0.000000 214 +interfac 0 79 2.564949 0.000000 209 +intellig 0 72 2.639057 0.000000 225 +thursdai 0 70 2.708050 0.000000 241 +artifici 0 63 2.772589 0.000000 280 +type 0 61 2.833213 0.000000 296 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +basic 0 50 3.044522 0.000000 360 +third 0 43 3.178054 0.000000 412 +download 0 36 3.367296 0.000000 489 +common 0 30 3.555348 0.000000 574 +lisp 0 18 4.060443 0.000000 897 +record 0 18 4.060443 0.000000 890 +outlin 0 17 4.110874 0.000000 914 +macintosh 0 17 4.110874 0.000000 920 +emac 0 13 4.382027 0.000000 1143 +rich 0 10 4.653960 0.000000 1396 +noon 0 7 5.010635 0.000000 1804 +thompson 0 6 5.164786 0.000000 2049 +allegro 0 5 5.347108 0.000000 2314 +gentl 0 5 5.347108 0.000000 2264 +csoffic 0 4 5.568345 0.000000 2727 +knight 0 4 5.568345 0.000000 2728 +turnin 0 4 5.568345 0.000000 2654 +holden 0 3 5.857933 0.000000 3314 +redston 0 3 5.857933 0.000000 3332 +alistair 0 3 5.857933 0.000000 3315 +joshua 0 3 5.857933 0.000000 3333 +noonta 0 2 6.263398 0.000000 4427 +secondedit 0 2 6.263398 0.000000 4096 +touretzki 0 2 6.263398 0.000000 4428 +refcard 0 1 6.957497 0.000000 7885 +intelligencecs 0 1 6.957497 0.000000 7886 +msoffic 0 1 6.957497 0.000000 7887 +symboliccomput 0 1 6.957497 0.000000 7888 +emacsinterfac 0 1 6.957497 0.000000 7889 +standalonelisp 0 1 6.957497 0.000000 7890 +gradesredston 0 1 6.957497 0.000000 7891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..9128e5f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +solut 0 82 2.484907 0.000000 162 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +previou 0 62 2.772589 0.000000 290 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +three 0 54 2.944439 0.000000 330 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +math 0 44 3.135494 0.000000 402 +winter 0 36 3.367296 0.000000 500 +print 0 34 3.401197 0.000000 503 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +viewer 0 21 3.912023 0.000000 787 +martin 0 21 3.912023 0.000000 794 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +render 0 17 4.110874 0.000000 947 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +command 0 14 4.317488 0.000000 1083 +larri 0 13 4.382027 0.000000 1142 +karlin 0 13 4.382027 0.000000 1176 +web 0 12 4.465908 0.000000 1249 +errata 0 10 4.653960 0.000000 1403 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 0 7 5.010635 0.000000 1866 +adob 0 7 5.010635 0.000000 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 0 5 5.347108 0.000000 2345 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +tompaclass 0 3 5.857933 0.000000 3310 +tompa 0 3 5.857933 0.000000 3305 +aberman 0 2 6.263398 0.000000 4429 +midtem 0 1 6.957497 0.000000 7892 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..34ff8ffe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +final 1 116 2.197225 2.197225 108 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +solut 0 82 2.484907 0.000000 162 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +sourc 0 77 2.564949 0.000000 201 +exampl 0 77 2.564949 0.000000 195 +free 0 73 2.639057 0.000000 224 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +syllabu 0 67 2.708050 0.000000 247 +sieg 0 69 2.708050 0.000000 260 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +three 0 54 2.944439 0.000000 330 +format 0 48 3.044522 0.000000 356 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +print 0 34 3.401197 0.000000 503 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +tent 0 22 3.850148 0.000000 739 +sent 0 22 3.850148 0.000000 763 +viewer 0 21 3.912023 0.000000 787 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +render 0 17 4.110874 0.000000 947 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +command 0 14 4.317488 0.000000 1083 +everyth 0 13 4.382027 0.000000 1169 +larri 0 13 4.382027 0.000000 1142 +web 0 12 4.465908 0.000000 1249 +errata 0 10 4.653960 0.000000 1403 +admin 0 9 4.753590 0.000000 1476 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 0 7 5.010635 0.000000 1866 +adob 0 7 5.010635 0.000000 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 0 6 5.164786 0.000000 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 0 5 5.347108 0.000000 2345 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +sendmail 0 3 5.857933 0.000000 3099 +jayram 0 1 6.957497 0.000000 7893 +thathachar 0 1 6.957497 0.000000 7894 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..6715e73c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +link 1 247 1.386294 1.386294 24 +washington 0 236 1.386294 0.000000 32 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +databas 1 122 2.079442 2.079442 86 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +imag 0 91 2.397895 0.000000 161 +homework 0 79 2.564949 0.000000 193 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +telephon 0 50 3.044522 0.000000 373 +quarter 0 47 3.091042 0.000000 389 +announc 0 40 3.258097 0.000000 441 +probabl 0 40 3.258097 0.000000 455 +word 0 34 3.401197 0.000000 508 +queri 0 33 3.433987 0.000000 524 +request 0 26 3.688879 0.000000 635 +left 0 19 4.007333 0.000000 851 +save 0 14 4.317488 0.000000 1099 +dbm 0 13 4.382027 0.000000 1136 +linda 0 10 4.653960 0.000000 1394 +shapiro 0 8 4.875197 0.000000 1686 +potenti 0 8 4.875197 0.000000 1690 +shift 0 5 5.347108 0.000000 2357 +systemsfal 0 4 5.568345 0.000000 2683 +patrick 0 3 5.857933 0.000000 3334 +qbic 0 3 5.857933 0.000000 3294 +systemscs 0 1 6.957497 0.000000 7895 +crowlei 0 1 6.957497 0.000000 7896 +pcrowlei 0 1 6.957497 0.000000 7897 +unisql 0 1 6.957497 0.000000 7898 +webcs 0 1 6.957497 0.000000 7899 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..b8ed6a47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +relat 0 139 1.945910 0.000000 68 +click 0 142 1.945910 0.000000 78 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 0 108 2.197225 0.000000 107 +person 0 111 2.197225 0.000000 117 +send 0 114 2.197225 0.000000 109 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +solut 0 82 2.484907 0.000000 162 +info 0 85 2.484907 0.000000 176 +messag 0 76 2.564949 0.000000 212 +appear 0 78 2.564949 0.000000 210 +materi 0 75 2.639057 0.000000 221 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +handout 0 64 2.772589 0.000000 263 +space 0 57 2.890372 0.000000 310 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +archiv 0 49 3.044522 0.000000 364 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +brian 0 38 3.295837 0.000000 466 +slide 0 38 3.295837 0.000000 467 +word 0 34 3.401197 0.000000 508 +autumn 0 31 3.496508 0.000000 558 +scale 0 28 3.610918 0.000000 613 +actual 0 28 3.610918 0.000000 604 +administr 0 27 3.637586 0.000000 628 +wish 0 24 3.761200 0.000000 692 +sent 0 22 3.850148 0.000000 763 +feedback 0 19 4.007333 0.000000 854 +bershad 0 18 4.060443 0.000000 902 +intro 0 17 4.110874 0.000000 915 +outlin 0 17 4.110874 0.000000 914 +reflect 0 15 4.248495 0.000000 1034 +webmast 0 15 4.248495 0.000000 1045 +anonym 0 14 4.317488 0.000000 1100 +regularli 0 11 4.553877 0.000000 1338 +hint 0 10 4.653960 0.000000 1419 +sung 0 6 5.164786 0.000000 2075 +pace 0 6 5.164786 0.000000 2011 +carefulli 0 6 5.164786 0.000000 2045 +lost 0 5 5.347108 0.000000 2358 +choi 0 4 5.568345 0.000000 2732 +vital 0 4 5.568345 0.000000 2733 +surviv 0 4 5.568345 0.000000 2734 +aggress 0 3 5.857933 0.000000 3240 +andwil 0 3 5.857933 0.000000 3335 +wisdom 0 2 6.263398 0.000000 4430 +schedulewhat 0 2 6.263398 0.000000 4139 +adminth 0 1 6.957497 0.000000 7900 +andoth 0 1 6.957497 0.000000 7901 +projectsdescript 0 1 6.957497 0.000000 7902 +solutionsto 0 1 6.957497 0.000000 7903 +notesnot 0 1 6.957497 0.000000 7904 +watchthi 0 1 6.957497 0.000000 7905 +andgrad 0 1 6.957497 0.000000 7906 +onproject 0 1 6.957497 0.000000 7907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..e7c113b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +grade 0 90 2.397895 0.000000 142 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +exampl 0 77 2.564949 0.000000 195 +write 0 72 2.639057 0.000000 222 +degre 0 69 2.708050 0.000000 259 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +instruct 0 53 2.944439 0.000000 332 +frequent 0 49 3.044522 0.000000 367 +visitor 0 49 3.044522 0.000000 371 +quarter 0 47 3.091042 0.000000 389 +netscap 0 44 3.135494 0.000000 395 +mean 0 37 3.332205 0.000000 477 +winter 0 36 3.367296 0.000000 500 +ad 0 32 3.465736 0.000000 544 +autumn 0 31 3.496508 0.000000 558 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +hypermedia 0 12 4.465908 0.000000 1247 +baker 0 7 5.010635 0.000000 1812 +silicon 0 6 5.164786 0.000000 2076 +opengl 0 5 5.347108 0.000000 2299 +bounti 0 4 5.568345 0.000000 2715 +pighin 0 4 5.568345 0.000000 2735 +assignmentshomework 0 4 5.568345 0.000000 2721 +thisdocu 0 3 5.857933 0.000000 3336 +quarterwelcom 0 2 6.263398 0.000000 4378 +indi 0 2 6.263398 0.000000 4431 +keepin 0 1 6.957497 0.000000 7908 +informationwil 0 1 6.957497 0.000000 7909 +classpersonnel 0 1 6.957497 0.000000 7910 +syllabuscours 0 1 6.957497 0.000000 7911 +calendarta 0 1 6.957497 0.000000 7912 +hourshandout 0 1 6.957497 0.000000 7913 +assignmentslectur 0 1 6.957497 0.000000 7914 +notesread 0 1 6.957497 0.000000 7915 +assignmentsprojectsproject 0 1 6.957497 0.000000 7916 +handoutsproject 0 1 6.957497 0.000000 7917 +artifactsproject 0 1 6.957497 0.000000 7918 +sessionsproject 0 1 6.957497 0.000000 7919 +policyproject 0 1 6.957497 0.000000 7920 +upslibui 0 1 6.957497 0.000000 7921 +documentationoth 0 1 6.957497 0.000000 7922 +informationget 0 1 6.957497 0.000000 7923 +classhearn 0 1 6.957497 0.000000 7924 +erratath 0 1 6.957497 0.000000 7925 +labus 0 1 6.957497 0.000000 7926 +pagegraph 0 1 6.957497 0.000000 7927 +linkssgi 0 1 6.957497 0.000000 7928 +surfgrafica 0 1 6.957497 0.000000 7929 +obscurasiggraphgrailgraph 0 1 6.957497 0.000000 7930 +indexoth 0 1 6.957497 0.000000 7931 +linksmvi 0 1 6.957497 0.000000 7932 +departmentth 0 1 6.957497 0.000000 7933 +programth 0 1 6.957497 0.000000 7934 +programweb 0 1 6.957497 0.000000 7935 +helpbas 0 1 6.957497 0.000000 7936 +helpmosa 0 1 6.957497 0.000000 7937 +lynxus 0 1 6.957497 0.000000 7938 +indyspighin 0 1 6.957497 0.000000 7939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..daadd032 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +html 0 75 2.639057 0.000000 235 +servic 0 72 2.639057 0.000000 236 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +sieg 0 69 2.708050 0.000000 260 +would 0 67 2.708050 0.000000 251 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +discuss 0 45 3.135494 0.000000 399 +math 0 44 3.135494 0.000000 402 +might 0 41 3.218876 0.000000 426 +origin 0 38 3.295837 0.000000 472 +everi 0 34 3.401197 0.000000 519 +autumn 0 31 3.496508 0.000000 558 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +request 0 26 3.688879 0.000000 635 +consult 0 24 3.761200 0.000000 687 +handl 0 24 3.761200 0.000000 685 +thu 0 21 3.912023 0.000000 773 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +letter 0 16 4.174387 0.000000 981 +indic 0 15 4.248495 0.000000 1013 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +larger 0 7 5.010635 0.000000 1875 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +latexhtml 0 5 5.347108 0.000000 2347 +arun 0 4 5.568345 0.000000 2736 +csoffic 0 4 5.568345 0.000000 2727 +accommod 0 3 5.857933 0.000000 3337 +somani 0 2 6.263398 0.000000 4432 +cslectur 0 2 6.263398 0.000000 4433 +havea 0 2 6.263398 0.000000 4434 +disabl 0 1 6.957497 0.000000 7940 +jari 0 1 6.957497 0.000000 7941 +networksautumn 0 1 6.957497 0.000000 7942 +eebphon 0 1 6.957497 0.000000 7943 +kristensen 0 1 6.957497 0.000000 7944 +tomatch 0 1 6.957497 0.000000 7945 +andprovid 0 1 6.957497 0.000000 7946 +timewindow 0 1 6.957497 0.000000 7947 +overheadshomeworksprojectsinterest 0 1 6.957497 0.000000 7948 +stuffattentionif 0 1 6.957497 0.000000 7949 +pleasecontact 0 1 6.957497 0.000000 7950 +schmitz 0 1 6.957497 0.000000 7951 +requiresacadem 0 1 6.957497 0.000000 7952 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..35bbad95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +math 0 44 3.135494 0.000000 402 +origin 0 38 3.295837 0.000000 472 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +handl 0 24 3.761200 0.000000 685 +william 0 22 3.850148 0.000000 765 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +chan 0 7 5.010635 0.000000 1876 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +ladner 0 6 5.164786 0.000000 2062 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +latexhtml 0 5 5.347108 0.000000 2347 +csoffic 0 4 5.568345 0.000000 2727 +wchan 0 3 5.857933 0.000000 3338 +cslectur 0 2 6.263398 0.000000 4433 +noonta 0 2 6.263398 0.000000 4427 +eduwchan 0 2 6.263398 0.000000 4435 +networksspr 0 1 6.957497 0.000000 7953 +overheadshomeworksprojectsabout 0 1 6.957497 0.000000 7954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..fda8a46f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +washington 0 236 1.386294 0.000000 32 +fall 1 181 1.609438 1.609438 40 +advanc 0 99 2.302585 0.000000 130 +found 0 53 2.944439 0.000000 337 +digit 0 52 2.995732 0.000000 348 +pagecs 0 26 3.688879 0.000000 658 +designt 0 2 6.263398 0.000000 4436 +kehl 0 2 6.263398 0.000000 4437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..7257e791 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +data 1 170 1.791759 1.791759 49 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +assign 0 135 1.945910 0.000000 66 +welcom 0 122 2.079442 0.000000 99 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +book 0 99 2.302585 0.000000 131 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +resourc 0 81 2.484907 0.000000 172 +academ 0 82 2.484907 0.000000 178 +homework 0 79 2.564949 0.000000 193 +server 0 76 2.564949 0.000000 204 +state 0 76 2.564949 0.000000 207 +sourc 0 77 2.564949 0.000000 201 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +test 0 66 2.708050 0.000000 252 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +laboratori 0 63 2.772589 0.000000 292 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +collect 0 65 2.772589 0.000000 268 +march 0 61 2.833213 0.000000 295 +februari 0 54 2.944439 0.000000 328 +sampl 0 53 2.944439 0.000000 339 +cover 0 55 2.944439 0.000000 329 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +adapt 0 46 3.091042 0.000000 387 +fridai 0 44 3.135494 0.000000 390 +mark 0 44 3.135494 0.000000 403 +combin 0 42 3.218876 0.000000 421 +review 0 42 3.218876 0.000000 425 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +richard 0 31 3.496508 0.000000 559 +option 0 30 3.555348 0.000000 575 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +sequenti 0 22 3.850148 0.000000 745 +vlsi 0 21 3.912023 0.000000 795 +thur 0 19 4.007333 0.000000 847 +scott 0 18 4.060443 0.000000 884 +dilbert 0 16 4.174387 0.000000 996 +sheet 0 16 4.174387 0.000000 973 +portion 0 16 4.174387 0.000000 971 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +johnson 0 13 4.382027 0.000000 1162 +care 0 13 4.382027 0.000000 1177 +duli 0 12 4.465908 0.000000 1248 +tue 0 11 4.553877 0.000000 1308 +stephen 0 11 4.553877 0.000000 1342 +nonprofit 0 11 4.553877 0.000000 1339 +fpga 0 10 4.653960 0.000000 1433 +motorola 0 9 4.753590 0.000000 1546 +gaetano 0 6 5.164786 0.000000 2068 +philip 0 6 5.164786 0.000000 2005 +writeup 0 5 5.347108 0.000000 2352 +borriello 0 5 5.347108 0.000000 2349 +midnight 0 4 5.568345 0.000000 2599 +murphi 0 4 5.568345 0.000000 2737 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +micron 0 3 5.857933 0.000000 3341 +kehl 0 2 6.263398 0.000000 4437 +designt 0 2 6.263398 0.000000 4436 +aaron 0 2 6.263398 0.000000 4438 +comprehensivelist 0 2 6.263398 0.000000 4439 +icmanufactur 0 2 6.263398 0.000000 4440 +optionlab 0 1 6.957497 0.000000 7955 +syllabusschedul 0 1 6.957497 0.000000 7956 +savoi 0 1 6.957497 0.000000 7957 +savac 0 1 6.957497 0.000000 7958 +chinn 0 1 6.957497 0.000000 7959 +richin 0 1 6.957497 0.000000 7960 +howard 0 1 6.957497 0.000000 7961 +shchang 0 1 6.957497 0.000000 7962 +csjason 0 1 6.957497 0.000000 7963 +quarterhomework 0 1 6.957497 0.000000 7964 +assignmentsweb 0 1 6.957497 0.000000 7965 +duehomework 0 1 6.957497 0.000000 7966 +abel 0 1 6.957497 0.000000 7967 +fixtur 0 1 6.957497 0.000000 7968 +communicationoth 0 1 6.957497 0.000000 7969 +sheetsth 0 1 6.957497 0.000000 7970 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..58848a36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +model 0 145 1.945910 0.000000 69 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +postscript 1 131 2.079442 2.079442 90 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +memori 0 101 2.302585 0.000000 139 +follow 0 92 2.397895 0.000000 143 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +free 0 73 2.639057 0.000000 224 +sieg 0 69 2.708050 0.000000 260 +simul 0 66 2.708050 0.000000 255 +test 0 66 2.708050 0.000000 252 +wednesdai 0 64 2.772589 0.000000 261 +organ 0 65 2.772589 0.000000 265 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +previou 0 62 2.772589 0.000000 290 +simpl 0 60 2.833213 0.000000 298 +sampl 0 53 2.944439 0.000000 339 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +quarter 0 47 3.091042 0.000000 389 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +midterm 0 45 3.135494 0.000000 392 +review 0 42 3.218876 0.000000 425 +fast 0 42 3.218876 0.000000 429 +form 0 39 3.258097 0.000000 443 +robert 0 30 3.555348 0.000000 567 +compon 0 30 3.555348 0.000000 570 +common 0 30 3.555348 0.000000 574 +revis 0 26 3.688879 0.000000 640 +hierarchi 0 22 3.850148 0.000000 744 +color 0 22 3.850148 0.000000 762 +unit 0 21 3.912023 0.000000 779 +binari 0 20 3.951244 0.000000 823 +prerequisit 0 19 4.007333 0.000000 846 +segment 0 17 4.110874 0.000000 931 +regist 0 17 4.110874 0.000000 938 +interconnect 0 17 4.110874 0.000000 937 +sheet 0 16 4.174387 0.000000 973 +transfer 0 16 4.174387 0.000000 967 +larri 0 13 4.382027 0.000000 1142 +assembl 0 12 4.465908 0.000000 1207 +holidai 0 12 4.465908 0.000000 1224 +loew 0 12 4.465908 0.000000 1252 +catalog 0 10 4.653960 0.000000 1431 +arithmet 0 10 4.653960 0.000000 1388 +modul 0 10 4.653960 0.000000 1434 +card 0 10 4.653960 0.000000 1435 +watson 0 8 4.875197 0.000000 1691 +pipelin 0 7 5.010635 0.000000 1830 +snyder 0 5 5.347108 0.000000 2359 +mip 0 4 5.568345 0.000000 2738 +microprogram 0 4 5.568345 0.000000 2604 +appendix 0 4 5.568345 0.000000 2739 +prog 0 4 5.568345 0.000000 2740 +verilog 0 2 6.263398 0.000000 4441 +judi 0 2 6.263398 0.000000 4442 +andorgan 0 2 6.263398 0.000000 4443 +skim 0 1 6.957497 0.000000 7971 +jwatson 0 1 6.957497 0.000000 7972 +chenoffic 0 1 6.957497 0.000000 7973 +thursdays 0 1 6.957497 0.000000 7974 +chensg 0 1 6.957497 0.000000 7975 +laboratoryproject 0 1 6.957497 0.000000 7976 +setprocessor 0 1 6.957497 0.000000 7977 +chap 0 1 6.957497 0.000000 7978 +referencesthi 0 1 6.957497 0.000000 7979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..a483ec34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +hour 0 165 1.791759 0.000000 46 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +assign 0 135 1.945910 0.000000 66 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +welcom 0 122 2.079442 0.000000 99 +report 0 131 2.079442 0.000000 92 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +send 0 114 2.197225 0.000000 109 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +world 0 115 2.197225 0.000000 126 +version 0 113 2.197225 0.000000 122 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +part 0 98 2.302585 0.000000 129 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +search 0 95 2.397895 0.000000 155 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +exam 0 86 2.484907 0.000000 169 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +info 0 85 2.484907 0.000000 176 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +solut 0 82 2.484907 0.000000 162 +april 0 77 2.564949 0.000000 196 +mondai 0 77 2.564949 0.000000 206 +interfac 0 79 2.564949 0.000000 209 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +know 0 80 2.564949 0.000000 198 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +intellig 0 72 2.639057 0.000000 225 +tuesdai 0 73 2.639057 0.000000 219 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +meet 0 72 2.639057 0.000000 229 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +window 0 68 2.708050 0.000000 242 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +wednesdai 0 64 2.772589 0.000000 261 +artifici 0 63 2.772589 0.000000 280 +creat 0 63 2.772589 0.000000 277 +new 0 64 2.772589 0.000000 262 +laboratori 0 63 2.772589 0.000000 292 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +best 0 59 2.833213 0.000000 299 +reason 0 57 2.890372 0.000000 318 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +archiv 0 49 3.044522 0.000000 364 +standard 0 48 3.044522 0.000000 365 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +featur 0 46 3.091042 0.000000 386 +understand 0 47 3.091042 0.000000 384 +move 0 47 3.091042 0.000000 382 +fridai 0 44 3.135494 0.000000 390 +midterm 0 45 3.135494 0.000000 392 +mark 0 44 3.135494 0.000000 403 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +examin 0 42 3.218876 0.000000 424 +review 0 42 3.218876 0.000000 425 +vision 0 41 3.218876 0.000000 430 +form 0 39 3.258097 0.000000 443 +multipl 0 39 3.258097 0.000000 453 +announc 0 40 3.258097 0.000000 441 +continu 0 39 3.258097 0.000000 448 +credit 0 38 3.295837 0.000000 460 +close 0 38 3.295837 0.000000 465 +download 0 36 3.367296 0.000000 489 +staff 0 36 3.367296 0.000000 490 +post 0 35 3.401197 0.000000 505 +represent 0 35 3.401197 0.000000 512 +least 0 35 3.401197 0.000000 516 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +kind 0 32 3.465736 0.000000 541 +given 0 32 3.465736 0.000000 538 +common 0 30 3.555348 0.000000 574 +neural 0 30 3.555348 0.000000 578 +hard 0 30 3.555348 0.000000 563 +option 0 30 3.555348 0.000000 575 +steve 0 29 3.583519 0.000000 594 +particip 0 29 3.583519 0.000000 589 +turn 0 29 3.583519 0.000000 586 +held 0 28 3.610918 0.000000 600 +propos 0 28 3.610918 0.000000 602 +progress 0 28 3.610918 0.000000 598 +session 0 26 3.688879 0.000000 643 +pagecs 0 26 3.688879 0.000000 658 +rather 0 26 3.688879 0.000000 642 +demonstr 0 24 3.761200 0.000000 694 +tent 0 22 3.850148 0.000000 739 +self 0 22 3.850148 0.000000 761 +try 0 22 3.850148 0.000000 764 +newsgroup 0 21 3.912023 0.000000 783 +expert 0 20 3.951244 0.000000 833 +entir 0 20 3.951244 0.000000 811 +facil 0 20 3.951244 0.000000 814 +separ 0 19 4.007333 0.000000 844 +excel 0 19 4.007333 0.000000 868 +exercis 0 19 4.007333 0.000000 842 +lisp 1 18 4.060443 4.060443 897 +element 0 18 4.060443 0.000000 895 +demo 0 18 4.060443 0.000000 888 +seem 0 18 4.060443 0.000000 899 +statu 0 18 4.060443 0.000000 885 +regular 0 17 4.110874 0.000000 929 +advantag 0 16 4.174387 0.000000 987 +choic 0 16 4.174387 0.000000 979 +explan 0 16 4.174387 0.000000 985 +portion 0 16 4.174387 0.000000 971 +purchas 0 15 4.248495 0.000000 1030 +workload 0 12 4.465908 0.000000 1210 +rest 0 12 4.465908 0.000000 1259 +holidai 0 12 4.465908 0.000000 1224 +sens 0 11 4.553877 0.000000 1305 +probabilist 0 11 4.553877 0.000000 1343 +tanimoto 0 10 4.653960 0.000000 1429 +bring 0 10 4.653960 0.000000 1430 +preliminari 0 9 4.753590 0.000000 1480 +implementationof 0 7 5.010635 0.000000 1813 +reduct 0 7 5.010635 0.000000 1877 +pentium 0 6 5.164786 0.000000 2077 +approv 0 6 5.164786 0.000000 2078 +jeremi 0 5 5.347108 0.000000 2360 +allegro 0 5 5.347108 0.000000 2314 +hardcopi 0 5 5.347108 0.000000 2246 +forprogram 0 5 5.347108 0.000000 2361 +attract 0 5 5.347108 0.000000 2356 +net 0 4 5.568345 0.000000 2741 +peer 0 4 5.568345 0.000000 2742 +freeman 0 4 5.568345 0.000000 2725 +screenshot 0 4 5.568345 0.000000 2743 +andit 0 3 5.857933 0.000000 3328 +contentspag 0 3 5.857933 0.000000 3103 +orpostscript 0 3 5.857933 0.000000 3329 +programmingtechniqu 0 3 5.857933 0.000000 3113 +insieg 0 3 5.857933 0.000000 3331 +evaluationof 0 3 5.857933 0.000000 3192 +assignmentsassign 0 3 5.857933 0.000000 3342 +youdon 0 2 6.263398 0.000000 4444 +referenceon 0 2 6.263398 0.000000 4419 +usingcommon 0 2 6.263398 0.000000 4420 +franz 0 2 6.263398 0.000000 4423 +inour 0 2 6.263398 0.000000 4445 +ofproject 0 2 6.263398 0.000000 4446 +csor 0 1 6.957497 0.000000 7980 +pnew 0 1 6.957497 0.000000 7981 +baermeet 0 1 6.957497 0.000000 7982 +windowsimplement 0 1 6.957497 0.000000 7983 +programdevelop 0 1 6.957497 0.000000 7984 +theintel 0 1 6.957497 0.000000 7985 +isfor 0 1 6.957497 0.000000 7986 +bedownload 0 1 6.957497 0.000000 7987 +givenaccord 0 1 6.957497 0.000000 7988 +alist 0 1 6.957497 0.000000 7989 +coversboth 0 1 6.957497 0.000000 7990 +logicalreason 0 1 6.957497 0.000000 7991 +clo 0 1 6.957497 0.000000 7992 +programmingpart 0 1 6.957497 0.000000 7993 +ofhow 0 1 6.957497 0.000000 7994 +circul 0 1 6.957497 0.000000 7995 +orturn 0 1 6.957497 0.000000 7996 +wrap 0 1 6.957497 0.000000 7997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..098505af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +data 1 170 1.791759 1.791759 49 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +assign 0 135 1.945910 0.000000 66 +area 0 144 1.945910 0.000000 80 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +seattl 0 120 2.079442 0.000000 103 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +book 0 99 2.302585 0.000000 131 +info 0 85 2.484907 0.000000 176 +resourc 0 81 2.484907 0.000000 172 +academ 0 82 2.484907 0.000000 178 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +syllabu 0 67 2.708050 0.000000 247 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +laboratori 0 63 2.772589 0.000000 292 +collect 0 65 2.772589 0.000000 268 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +adapt 0 46 3.091042 0.000000 387 +societi 0 40 3.258097 0.000000 456 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +robot 0 36 3.367296 0.000000 497 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +steve 0 29 3.583519 0.000000 594 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +smith 0 20 3.951244 0.000000 820 +nice 0 20 3.951244 0.000000 809 +anderson 0 19 4.007333 0.000000 860 +dilbert 0 16 4.174387 0.000000 996 +sheet 0 16 4.174387 0.000000 973 +portion 0 16 4.174387 0.000000 971 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +care 0 13 4.382027 0.000000 1177 +loew 0 12 4.465908 0.000000 1252 +duli 0 12 4.465908 0.000000 1248 +stephen 0 11 4.553877 0.000000 1342 +nonprofit 0 11 4.553877 0.000000 1339 +motorola 0 9 4.753590 0.000000 1546 +portland 0 7 5.010635 0.000000 1878 +fred 0 6 5.164786 0.000000 2072 +gaetano 0 6 5.164786 0.000000 2068 +philip 0 6 5.164786 0.000000 2005 +borriello 0 5 5.347108 0.000000 2349 +kent 0 4 5.568345 0.000000 2744 +murphi 0 4 5.568345 0.000000 2737 +comprehens 0 4 5.568345 0.000000 2745 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +burn 1 2 6.263398 6.263398 4447 +serverth 0 2 6.263398 0.000000 4448 +designstev 0 1 6.957497 0.000000 7998 +casei 0 1 6.957497 0.000000 7999 +studentslab 0 1 6.957497 0.000000 8000 +mchc 0 1 6.957497 0.000000 8001 +martinrobot 0 1 6.957497 0.000000 8002 +societyoth 0 1 6.957497 0.000000 8003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..0267274d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +document 0 121 2.079442 0.000000 89 +schedul 0 119 2.079442 0.000000 85 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +handout 0 64 2.772589 0.000000 263 +march 0 61 2.833213 0.000000 295 +usual 0 28 3.610918 0.000000 608 +session 0 26 3.688879 0.000000 643 +pagewelcom 0 11 4.553877 0.000000 1344 +bug 0 7 5.010635 0.000000 1801 +encount 0 3 5.857933 0.000000 3128 +bevi 0 1 6.957497 0.000000 8004 +relatingto 0 1 6.957497 0.000000 8005 +frequentlychang 0 1 6.957497 0.000000 8006 +bswest 0 1 6.957497 0.000000 8007 +csif 0 1 6.957497 0.000000 8008 +classpersonnelsyllabuslectur 0 1 6.957497 0.000000 8009 +scheduleguest 0 1 6.957497 0.000000 8010 +scheduleoffic 0 1 6.957497 0.000000 8011 +hoursproject 0 1 6.957497 0.000000 8012 +projectoth 0 1 6.957497 0.000000 8013 +erratarefer 0 1 6.957497 0.000000 8014 +pagesmidterm 0 1 6.957497 0.000000 8015 +questionnairebswest 0 1 6.957497 0.000000 8016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..bf51bb4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +year 0 148 1.945910 0.000000 84 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +question 0 91 2.397895 0.000000 141 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +refer 0 78 2.564949 0.000000 203 +homework 0 79 2.564949 0.000000 193 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +import 0 65 2.772589 0.000000 282 +handout 0 64 2.772589 0.000000 263 +descript 0 64 2.772589 0.000000 271 +previou 0 62 2.772589 0.000000 290 +simpl 0 60 2.833213 0.000000 298 +sampl 0 53 2.944439 0.000000 339 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +quarter 0 47 3.091042 0.000000 389 +answer 0 45 3.135494 0.000000 391 +midterm 0 45 3.135494 0.000000 392 +slide 0 38 3.295837 0.000000 467 +manual 0 35 3.401197 0.000000 504 +kind 0 32 3.465736 0.000000 541 +full 0 28 3.610918 0.000000 615 +pagecs 0 26 3.688879 0.000000 658 +sent 0 22 3.850148 0.000000 763 +floor 0 14 4.317488 0.000000 1070 +front 0 13 4.382027 0.000000 1154 +cecil 0 9 4.753590 0.000000 1547 +chamber 0 8 4.875197 0.000000 1692 +leon 0 8 4.875197 0.000000 1631 +affect 0 6 5.164786 0.000000 2044 +textual 0 6 5.164786 0.000000 1979 +vortex 0 5 5.347108 0.000000 2362 +travers 0 5 5.347108 0.000000 2363 +litvinov 0 3 5.857933 0.000000 3343 +vass 0 2 6.263398 0.000000 4449 +informationmeet 0 2 6.263398 0.000000 4450 +cubicl 0 2 6.263398 0.000000 4451 +archivesslid 0 2 6.263398 0.000000 4452 +informationhandout 0 2 6.263398 0.000000 4163 +tutorialth 0 2 6.263398 0.000000 4453 +onmark 0 2 6.263398 0.000000 4454 +languageswint 0 1 6.957497 0.000000 8017 +craigchamb 0 1 6.957497 0.000000 8018 +archivedher 0 1 6.957497 0.000000 8019 +closedbook 0 1 6.957497 0.000000 8020 +wereask 0 1 6.957497 0.000000 8021 +tutorialsth 0 1 6.957497 0.000000 8022 +tutorialhow 0 1 6.957497 0.000000 8023 +enda 0 1 6.957497 0.000000 8024 +interestdead 0 1 6.957497 0.000000 8025 +elim 0 1 6.957497 0.000000 8026 +idfacfg 0 1 6.957497 0.000000 8027 +frameworkvortex 0 1 6.957497 0.000000 8028 +grammarcecil 0 1 6.957497 0.000000 8029 +documentationdocument 0 1 6.957497 0.000000 8030 +resourcesth 0 1 6.957497 0.000000 8031 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..2db641a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +avail 1 169 1.791759 1.791759 48 +implement 0 152 1.791759 0.000000 52 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +object 0 138 1.945910 0.000000 79 +compil 0 122 2.079442 0.000000 96 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +instructor 0 108 2.197225 0.000000 107 +find 0 111 2.197225 0.000000 111 +come 0 78 2.564949 0.000000 202 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +sieg 0 69 2.708050 0.000000 260 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +room 0 59 2.833213 0.000000 301 +found 0 53 2.944439 0.000000 337 +archiv 0 49 3.044522 0.000000 364 +slide 0 38 3.295837 0.000000 467 +manual 0 35 3.401197 0.000000 504 +full 0 28 3.610918 0.000000 615 +pagecs 0 26 3.688879 0.000000 658 +jeff 0 25 3.737670 0.000000 673 +sent 0 22 3.850148 0.000000 763 +sort 0 22 3.850148 0.000000 738 +chateau 0 16 4.174387 0.000000 997 +dean 0 14 4.317488 0.000000 1104 +dave 0 14 4.317488 0.000000 1098 +cecil 0 9 4.753590 0.000000 1547 +chamber 0 8 4.875197 0.000000 1692 +grove 0 8 4.875197 0.000000 1675 +leon 0 8 4.875197 0.000000 1631 +craig 0 7 5.010635 0.000000 1879 +vortex 0 5 5.347108 0.000000 2362 +projectth 0 3 5.857933 0.000000 3344 +jdean 0 2 6.263398 0.000000 4455 +informationmeet 0 2 6.263398 0.000000 4450 +archivesslid 0 2 6.263398 0.000000 4452 +optimizingcompil 0 2 6.263398 0.000000 4456 +cecilproject 0 2 6.263398 0.000000 4457 +onmark 0 2 6.263398 0.000000 4454 +languagesimport 0 1 6.957497 0.000000 8032 +turori 0 1 6.957497 0.000000 8033 +andtransform 0 1 6.957497 0.000000 8034 +resourcesmor 0 1 6.957497 0.000000 8035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..f6b3df66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +assign 1 135 1.945910 1.945910 66 +spring 0 131 2.079442 0.000000 88 +handout 0 64 2.772589 0.000000 263 +sampl 0 53 2.944439 0.000000 339 +pagecs 0 26 3.688879 0.000000 658 +introductori 0 9 4.753590 0.000000 1479 +notkin 0 3 5.857933 0.000000 3345 +engineeringdavid 0 1 6.957497 0.000000 8036 +kwic 0 1 6.957497 0.000000 8037 +projectsnotkin 0 1 6.957497 0.000000 8038 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..2d87e9a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +object 0 138 1.945910 0.000000 79 +postscript 0 131 2.079442 0.000000 90 +studi 0 120 2.079442 0.000000 91 +introduct 0 126 2.079442 0.000000 87 +send 0 114 2.197225 0.000000 109 +instructor 0 108 2.197225 0.000000 107 +question 0 91 2.397895 0.000000 141 +resourc 0 81 2.484907 0.000000 172 +info 0 85 2.484907 0.000000 176 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +refer 0 78 2.564949 0.000000 203 +david 0 71 2.639057 0.000000 232 +html 0 75 2.639057 0.000000 235 +line 0 75 2.639057 0.000000 231 +sieg 0 69 2.708050 0.000000 260 +handout 0 64 2.772589 0.000000 263 +archiv 0 49 3.044522 0.000000 364 +standard 0 48 3.044522 0.000000 365 +mark 0 44 3.135494 0.000000 403 +singl 0 34 3.401197 0.000000 510 +concept 0 32 3.465736 0.000000 537 +administr 0 27 3.637586 0.000000 628 +pagecs 0 26 3.688879 0.000000 658 +subject 0 26 3.688879 0.000000 647 +wish 0 24 3.761200 0.000000 692 +yahoo 0 24 3.761200 0.000000 707 +thread 0 23 3.806662 0.000000 722 +sent 0 22 3.850148 0.000000 763 +self 0 22 3.850148 0.000000 761 +scheme 0 20 3.951244 0.000000 818 +excel 0 19 4.007333 0.000000 868 +previous 0 17 4.110874 0.000000 923 +floor 0 14 4.317488 0.000000 1070 +bodi 0 13 4.382027 0.000000 1178 +mellon 0 13 4.382027 0.000000 1179 +calculu 0 12 4.465908 0.000000 1203 +loew 0 12 4.465908 0.000000 1252 +carnegi 0 12 4.465908 0.000000 1260 +appl 0 11 4.553877 0.000000 1303 +subscrib 0 9 4.753590 0.000000 1541 +kurt 0 9 4.753590 0.000000 1548 +introductori 0 9 4.753590 0.000000 1479 +cecil 0 9 4.753590 0.000000 1547 +leon 0 8 4.875197 0.000000 1631 +dylan 0 8 4.875197 0.000000 1625 +majordomo 0 6 5.164786 0.000000 2066 +gentl 0 5 5.347108 0.000000 2264 +notkin 0 3 5.857933 0.000000 3345 +partridg 0 3 5.857933 0.000000 3346 +lambda 0 2 6.263398 0.000000 4458 +kepart 0 2 6.263398 0.000000 4459 +monash 0 2 6.263398 0.000000 4460 +languagesautumn 0 1 6.957497 0.000000 8039 +byappoint 0 1 6.957497 0.000000 8040 +cubiclescours 0 1 6.957497 0.000000 8041 +readingsmail 0 1 6.957497 0.000000 8042 +archivesw 0 1 6.957497 0.000000 8043 +instructionalpurpos 0 1 6.957497 0.000000 8044 +emailto 0 1 6.957497 0.000000 8045 +csegener 0 1 6.957497 0.000000 8046 +pagesprogram 0 1 6.957497 0.000000 8047 +critiquesgari 0 1 6.957497 0.000000 8048 +leaven 0 1 6.957497 0.000000 8049 +pagefunct 0 1 6.957497 0.000000 8050 +resourcesmit 0 1 6.957497 0.000000 8051 +pagecmu 0 1 6.957497 0.000000 8052 +pagea 0 1 6.957497 0.000000 8053 +mlhaskel 0 1 6.957497 0.000000 8054 +universityobject 0 1 6.957497 0.000000 8055 +geneva 0 1 6.957497 0.000000 8056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..663d6180 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +book 0 99 2.302585 0.000000 131 +question 0 91 2.397895 0.000000 141 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +learn 0 86 2.484907 0.000000 170 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +know 0 80 2.564949 0.000000 198 +write 0 72 2.639057 0.000000 222 +tuesdai 0 73 2.639057 0.000000 219 +materi 0 75 2.639057 0.000000 221 +solv 0 73 2.639057 0.000000 234 +sieg 0 69 2.708050 0.000000 260 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +room 0 59 2.833213 0.000000 301 +march 0 61 2.833213 0.000000 295 +undergradu 0 54 2.944439 0.000000 338 +suggest 0 53 2.944439 0.000000 331 +cover 0 55 2.944439 0.000000 329 +set 0 50 3.044522 0.000000 361 +appoint 0 49 3.044522 0.000000 358 +possibl 0 47 3.091042 0.000000 378 +could 0 46 3.091042 0.000000 383 +discuss 0 45 3.135494 0.000000 399 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +realli 0 40 3.258097 0.000000 444 +probabl 0 40 3.258097 0.000000 455 +must 0 40 3.258097 0.000000 442 +close 0 38 3.295837 0.000000 465 +winter 0 36 3.367296 0.000000 500 +soon 0 36 3.367296 0.000000 494 +short 0 36 3.367296 0.000000 499 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +richard 0 31 3.496508 0.000000 559 +particip 0 29 3.583519 0.000000 589 +rule 0 26 3.688879 0.000000 638 +background 0 25 3.737670 0.000000 664 +togeth 0 23 3.806662 0.000000 714 +william 0 22 3.850148 0.000000 765 +half 0 21 3.912023 0.000000 776 +anderson 0 19 4.007333 0.000000 860 +els 0 19 4.007333 0.000000 843 +prerequisit 0 19 4.007333 0.000000 846 +assum 0 19 4.007333 0.000000 845 +chateau 0 16 4.174387 0.000000 997 +alreadi 0 16 4.174387 0.000000 963 +quiz 0 16 4.174387 0.000000 990 +upon 0 16 4.174387 0.000000 978 +anyth 0 16 4.174387 0.000000 998 +floor 0 14 4.317488 0.000000 1070 +script 0 13 4.382027 0.000000 1171 +verifi 0 12 4.465908 0.000000 1261 +island 0 11 4.553877 0.000000 1345 +errata 0 10 4.653960 0.000000 1403 +classmat 0 9 4.753590 0.000000 1516 +equival 0 9 4.753590 0.000000 1496 +told 0 8 4.875197 0.000000 1658 +chan 0 7 5.010635 0.000000 1876 +wrong 0 6 5.164786 0.000000 2025 +lack 0 6 5.164786 0.000000 1994 +invok 0 6 5.164786 0.000000 2079 +understood 0 5 5.347108 0.000000 2364 +cancel 0 4 5.568345 0.000000 2746 +episod 0 4 5.568345 0.000000 2747 +wchan 0 3 5.857933 0.000000 3338 +preview 0 3 5.857933 0.000000 3306 +algorithmscs 0 2 6.263398 0.000000 4461 +seig 0 2 6.263398 0.000000 4462 +cubicl 0 2 6.263398 0.000000 4451 +somebodi 0 2 6.263398 0.000000 4463 +outer 0 2 6.263398 0.000000 4464 +okai 0 2 6.263398 0.000000 4465 +eduwchan 0 2 6.263398 0.000000 4435 +gilligan 0 1 6.957497 0.000000 8057 +readingtextbook 0 1 6.957497 0.000000 8058 +sapplet 0 1 6.957497 0.000000 8059 +willconsist 0 1 6.957497 0.000000 8060 +bureaucrat 0 1 6.957497 0.000000 8061 +stuffgrad 0 1 6.957497 0.000000 8062 +homeworkproblem 0 1 6.957497 0.000000 8063 +upindepend 0 1 6.957497 0.000000 8064 +betweenani 0 1 6.957497 0.000000 8065 +mustwatch 0 1 6.957497 0.000000 8066 +thatan 0 1 6.957497 0.000000 8067 +reboot 0 1 6.957497 0.000000 8068 +thatsurv 0 1 6.957497 0.000000 8069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..d848083c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +year 0 148 1.945910 0.000000 84 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +code 0 108 2.197225 0.000000 116 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +specif 0 106 2.197225 0.000000 106 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +section 0 94 2.397895 0.000000 149 +present 0 91 2.397895 0.000000 145 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +requir 0 81 2.484907 0.000000 167 +homework 0 79 2.564949 0.000000 193 +april 0 77 2.564949 0.000000 196 +refer 0 78 2.564949 0.000000 203 +come 0 78 2.564949 0.000000 202 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +tuesdai 0 73 2.639057 0.000000 219 +write 0 72 2.639057 0.000000 222 +thursdai 0 70 2.708050 0.000000 241 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +descript 0 64 2.772589 0.000000 271 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +plan 0 65 2.772589 0.000000 272 +result 0 65 2.772589 0.000000 281 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +major 0 56 2.890372 0.000000 315 +special 0 56 2.890372 0.000000 320 +think 0 57 2.890372 0.000000 314 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +particular 0 51 2.995732 0.000000 352 +pointer 0 48 3.044522 0.000000 368 +approach 0 48 3.044522 0.000000 366 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +could 0 46 3.091042 0.000000 383 +term 0 43 3.178054 0.000000 411 +howev 0 41 3.218876 0.000000 422 +fast 0 42 3.218876 0.000000 429 +probabl 0 40 3.258097 0.000000 455 +correct 0 38 3.295837 0.000000 462 +close 0 38 3.295837 0.000000 465 +origin 0 38 3.295837 0.000000 472 +open 0 38 3.295837 0.000000 469 +connect 0 37 3.332205 0.000000 485 +expect 0 37 3.332205 0.000000 484 +cost 0 37 3.332205 0.000000 480 +feel 0 37 3.332205 0.000000 483 +next 0 34 3.401197 0.000000 517 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +richard 0 31 3.496508 0.000000 559 +titl 0 31 3.496508 0.000000 556 +graph 0 30 3.555348 0.000000 576 +compon 0 30 3.555348 0.000000 570 +exist 0 30 3.555348 0.000000 569 +consid 0 29 3.583519 0.000000 590 +limit 0 29 3.583519 0.000000 585 +progress 0 28 3.610918 0.000000 598 +quit 0 27 3.637586 0.000000 633 +mind 0 27 3.637586 0.000000 632 +challeng 0 26 3.688879 0.000000 653 +although 0 25 3.737670 0.000000 667 +fundament 0 25 3.737670 0.000000 661 +supercomput 0 25 3.737670 0.000000 681 +other 0 24 3.761200 0.000000 697 +sort 0 22 3.850148 0.000000 738 +emphasi 0 22 3.850148 0.000000 755 +instead 0 22 3.850148 0.000000 756 +theorem 0 21 3.912023 0.000000 786 +flexibl 0 21 3.912023 0.000000 792 +half 0 21 3.912023 0.000000 776 +nice 0 20 3.951244 0.000000 809 +anderson 0 19 4.007333 0.000000 860 +prerequisit 0 19 4.007333 0.000000 846 +spend 0 19 4.007333 0.000000 850 +prove 0 19 4.007333 0.000000 848 +four 0 18 4.060443 0.000000 905 +matrix 0 17 4.110874 0.000000 933 +interconnect 0 17 4.110874 0.000000 937 +upon 0 16 4.174387 0.000000 978 +choic 0 16 4.174387 0.000000 979 +mayb 0 15 4.248495 0.000000 1014 +indic 0 15 4.248495 0.000000 1013 +purchas 0 15 4.248495 0.000000 1030 +rank 0 14 4.317488 0.000000 1063 +latex 0 14 4.317488 0.000000 1064 +topolog 0 14 4.317488 0.000000 1089 +consider 0 14 4.317488 0.000000 1076 +happi 0 14 4.317488 0.000000 1079 +insid 0 12 4.465908 0.000000 1262 +asynchron 0 12 4.465908 0.000000 1229 +transpar 0 11 4.553877 0.000000 1325 +sens 0 11 4.553877 0.000000 1305 +motiv 0 11 4.553877 0.000000 1346 +volum 0 11 4.553877 0.000000 1347 +catalog 0 10 4.653960 0.000000 1431 +arithmet 0 10 4.653960 0.000000 1388 +tradit 0 10 4.653960 0.000000 1404 +equival 0 9 4.753590 0.000000 1496 +routin 0 9 4.753590 0.000000 1549 +foc 0 7 5.010635 0.000000 1880 +uniform 0 7 5.010635 0.000000 1845 +plu 0 6 5.164786 0.000000 2004 +consensu 0 6 5.164786 0.000000 2080 +situat 0 5 5.347108 0.000000 2365 +volunt 0 5 5.347108 0.000000 2307 +algorithmsfor 0 4 5.568345 0.000000 2748 +ullman 0 4 5.568345 0.000000 2749 +union 0 4 5.568345 0.000000 2634 +buss 0 4 5.568345 0.000000 2649 +manuscript 0 4 5.568345 0.000000 2750 +wewil 0 4 5.568345 0.000000 2688 +cheap 0 4 5.568345 0.000000 2751 +chose 0 4 5.568345 0.000000 2629 +rambl 0 3 5.857933 0.000000 3308 +crew 0 3 5.857933 0.000000 3347 +impli 0 3 5.857933 0.000000 3348 +pertain 0 3 5.857933 0.000000 3208 +andyou 0 3 5.857933 0.000000 3256 +parallelalgorithm 0 3 5.857933 0.000000 3249 +influenc 0 3 5.857933 0.000000 3349 +algorithmscs 0 2 6.263398 0.000000 4461 +algorithmi 0 2 6.263398 0.000000 4208 +simpler 0 2 6.263398 0.000000 4210 +swap 0 2 6.263398 0.000000 4466 +exception 0 2 6.263398 0.000000 4467 +bake 0 2 6.263398 0.000000 4468 +ideason 0 2 6.263398 0.000000 4469 +appointment 0 1 6.957497 0.000000 8070 +developingfast 0 1 6.957497 0.000000 8071 +theirefficaci 0 1 6.957497 0.000000 8072 +commentsabout 0 1 6.957497 0.000000 8073 +analysisfor 0 1 6.957497 0.000000 8074 +referencesfor 0 1 6.957497 0.000000 8075 +erew 0 1 6.957497 0.000000 8076 +yannakaki 0 1 6.957497 0.000000 8077 +certifi 0 1 6.957497 0.000000 8078 +likelysometh 0 1 6.957497 0.000000 8079 +martel 0 1 6.957497 0.000000 8080 +whim 0 1 6.957497 0.000000 8081 +smpc 0 1 6.957497 0.000000 8082 +lookingat 0 1 6.957497 0.000000 8083 +isnon 0 1 6.957497 0.000000 8084 +notconsid 0 1 6.957497 0.000000 8085 +indevelop 0 1 6.957497 0.000000 8086 +algorithmswhich 0 1 6.957497 0.000000 8087 +conceiv 0 1 6.957497 0.000000 8088 +goingto 0 1 6.957497 0.000000 8089 +outsidework 0 1 6.957497 0.000000 8090 +befollow 0 1 6.957497 0.000000 8091 +youcould 0 1 6.957497 0.000000 8092 +textwould 0 1 6.957497 0.000000 8093 +artof 0 1 6.957497 0.000000 8094 +mychoic 0 1 6.957497 0.000000 8095 +interestingor 0 1 6.957497 0.000000 8096 +uninterest 0 1 6.957497 0.000000 8097 +aseith 0 1 6.957497 0.000000 8098 +researchcont 0 1 6.957497 0.000000 8099 +turninto 0 1 6.957497 0.000000 8100 +andenergi 0 1 6.957497 0.000000 8101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..b48f741a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +provid 0 121 2.079442 0.000000 94 +comment 0 93 2.397895 0.000000 146 +sourc 0 77 2.564949 0.000000 201 +complex 0 64 2.772589 0.000000 269 +move 0 47 3.091042 0.000000 382 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +autumn 1 31 3.496508 3.496508 558 +ofwashington 0 22 3.850148 0.000000 766 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +automata 0 13 4.382027 0.000000 1135 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +foracadem 0 5 5.347108 0.000000 2341 +accuratelyquot 0 2 6.263398 0.000000 4470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..70a7bf8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +washington 0 236 1.386294 0.000000 32 +fall 0 181 1.609438 0.000000 40 +postscript 1 131 2.079442 2.079442 90 +welcom 0 122 2.079442 0.000000 99 +instructor 0 108 2.197225 0.000000 107 +world 0 115 2.197225 0.000000 126 +final 0 116 2.197225 0.000000 108 +wide 0 84 2.484907 0.000000 185 +exam 0 86 2.484907 0.000000 169 +paul 0 38 3.295837 0.000000 471 +short 0 36 3.367296 0.000000 499 +quiz 0 16 4.174387 0.000000 990 +latex 0 14 4.317488 0.000000 1064 +hypermedia 0 12 4.465908 0.000000 1247 +documentfor 0 7 5.010635 0.000000 1865 +beam 0 5 5.347108 0.000000 2344 +automataautumn 0 1 6.957497 0.000000 8102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..25993ed3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +sourc 0 77 2.564949 0.000000 201 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +organ 0 65 2.772589 0.000000 265 +complex 0 64 2.772589 0.000000 269 +handout 0 64 2.772589 0.000000 263 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +adapt 0 46 3.091042 0.000000 387 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +print 0 34 3.401197 0.000000 503 +collabor 0 32 3.465736 0.000000 543 +autumn 0 31 3.496508 0.000000 558 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +sent 0 22 3.850148 0.000000 763 +ofwashington 0 22 3.850148 0.000000 766 +viewer 0 21 3.912023 0.000000 787 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +render 0 17 4.110874 0.000000 947 +portion 0 16 4.174387 0.000000 971 +ascii 0 15 4.248495 0.000000 1032 +webmast 0 15 4.248495 0.000000 1045 +latex 0 14 4.317488 0.000000 1064 +command 0 14 4.317488 0.000000 1083 +reprint 0 14 4.317488 0.000000 1097 +larri 0 13 4.382027 0.000000 1142 +automata 0 13 4.382027 0.000000 1135 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +errata 0 10 4.653960 0.000000 1403 +tuth 0 9 4.753590 0.000000 1519 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +perhap 0 8 4.875197 0.000000 1693 +legibl 0 7 5.010635 0.000000 1866 +adob 0 7 5.010635 0.000000 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 0 5 5.347108 0.000000 2345 +foracadem 0 5 5.347108 0.000000 2341 +sharma 0 4 5.568345 0.000000 2752 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +administrivia 0 3 5.857933 0.000000 3166 +ghostview 0 3 5.857933 0.000000 3163 +accuratelyquot 0 2 6.263398 0.000000 4470 +nitin 0 1 6.957497 0.000000 8103 +staffnameemailphoneoffic 0 1 6.957497 0.000000 8104 +csmw 0 1 6.957497 0.000000 8105 +acroread 0 1 6.957497 0.000000 8106 +aavail 0 1 6.957497 0.000000 8107 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..fc896ce0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +well 0 109 2.197225 0.000000 121 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +search 0 95 2.397895 0.000000 155 +thing 0 84 2.484907 0.000000 189 +issu 0 78 2.564949 0.000000 211 +good 0 77 2.564949 0.000000 200 +june 0 79 2.564949 0.000000 214 +logic 0 71 2.639057 0.000000 230 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +involv 0 71 2.639057 0.000000 227 +test 0 66 2.708050 0.000000 252 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +thursdai 0 70 2.708050 0.000000 241 +complex 1 64 2.772589 2.772589 269 +talk 0 53 2.944439 0.000000 336 +give 0 50 3.044522 0.000000 359 +even 0 45 3.135494 0.000000 393 +autom 0 41 3.218876 0.000000 434 +examin 0 42 3.218876 0.000000 424 +theoret 0 39 3.258097 0.000000 446 +paul 0 38 3.295837 0.000000 471 +slide 0 38 3.295837 0.000000 467 +survei 0 35 3.401197 0.000000 513 +within 0 33 3.433987 0.000000 525 +autumn 0 31 3.496508 0.000000 558 +often 0 31 3.496508 0.000000 551 +consid 0 29 3.583519 0.000000 590 +compar 0 26 3.688879 0.000000 648 +strategi 0 25 3.737670 0.000000 682 +higher 0 24 3.761200 0.000000 690 +interpret 0 24 3.761200 0.000000 686 +proof 0 23 3.806662 0.000000 720 +instal 0 22 3.850148 0.000000 754 +varieti 0 22 3.850148 0.000000 740 +theorem 0 21 3.912023 0.000000 786 +vlsi 0 21 3.912023 0.000000 795 +verif 0 20 3.951244 0.000000 826 +prove 0 19 4.007333 0.000000 848 +concentr 0 18 4.060443 0.000000 906 +aid 0 18 4.060443 0.000000 904 +attempt 0 17 4.110874 0.000000 917 +moor 0 17 4.110874 0.000000 936 +choic 0 16 4.174387 0.000000 979 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +decid 0 14 4.317488 0.000000 1075 +consider 0 14 4.317488 0.000000 1076 +loew 0 12 4.465908 0.000000 1252 +statement 0 11 4.553877 0.000000 1313 +rel 0 9 4.753590 0.000000 1487 +satisfi 0 8 4.875197 0.000000 1694 +prover 0 8 4.875197 0.000000 1653 +proposit 1 5 5.347108 5.347108 2339 +beam 0 5 5.347108 0.000000 2344 +amus 0 5 5.347108 0.000000 2366 +andsoftwar 0 4 5.568345 0.000000 2753 +tester 0 4 5.568345 0.000000 2754 +theoremprov 0 3 5.857933 0.000000 3298 +theoryand 0 3 5.857933 0.000000 3350 +scatter 0 3 5.857933 0.000000 3351 +truthof 0 1 6.957497 0.000000 8108 +casea 0 1 6.957497 0.000000 8109 +flip 0 1 6.957497 0.000000 8110 +oftheorem 0 1 6.957497 0.000000 8111 +finitedomain 0 1 6.957497 0.000000 8112 +thesequest 0 1 6.957497 0.000000 8113 +complexityand 0 1 6.957497 0.000000 8114 +anumb 0 1 6.957497 0.000000 8115 +urquhart 0 1 6.957497 0.000000 8116 +sato 0 1 6.957497 0.000000 8117 +andboy 0 1 6.957497 0.000000 8118 +gsat 0 1 6.957497 0.000000 8119 +thedirectori 0 1 6.957497 0.000000 8120 +proversther 0 1 6.957497 0.000000 8121 +ofinstal 0 1 6.957497 0.000000 8122 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..7bbbd226 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +confer 0 126 2.079442 0.000000 100 +topic 0 114 2.197225 0.000000 110 +solut 0 82 2.484907 0.000000 162 +mondai 0 77 2.564949 0.000000 206 +meet 0 72 2.639057 0.000000 229 +wednesdai 0 64 2.772589 0.000000 261 +fridai 0 44 3.135494 0.000000 390 +packag 0 28 3.610918 0.000000 614 +measur 0 28 3.610918 0.000000 609 +pagecs 0 26 3.688879 0.000000 658 +sigmetr 0 13 4.382027 0.000000 1173 +loew 0 12 4.465908 0.000000 1252 +host 0 11 4.553877 0.000000 1306 +queue 0 10 4.653960 0.000000 1386 +systemperform 0 1 6.957497 0.000000 8123 +modelingspr 0 1 6.957497 0.000000 8124 +lazowskaandmaryvernonwelcom 0 1 6.957497 0.000000 8125 +performancemodel 0 1 6.957497 0.000000 8126 +hourstent 0 1 6.957497 0.000000 8127 +schedulecom 0 1 6.957497 0.000000 8128 +goingsassignmentsproject 0 1 6.957497 0.000000 8129 +informationmap 0 1 6.957497 0.000000 8130 +emailoth 0 1 6.957497 0.000000 8131 +computersystemsuw 0 1 6.957497 0.000000 8132 +engineeringlazowska 0 1 6.957497 0.000000 8133 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..48f2cb00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +postscript 1 131 2.079442 2.079442 90 +tool 0 117 2.079442 0.000000 93 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +code 0 108 2.197225 0.000000 116 +user 0 104 2.302585 0.000000 137 +center 0 88 2.397895 0.000000 158 +info 0 85 2.484907 0.000000 176 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +simul 1 66 2.708050 2.708050 255 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +set 0 50 3.044522 0.000000 361 +execut 0 45 3.135494 0.000000 404 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +continu 0 39 3.258097 0.000000 448 +close 0 38 3.295837 0.000000 465 +manual 0 35 3.401197 0.000000 504 +multiprocessor 0 28 3.610918 0.000000 605 +binari 0 20 3.951244 0.000000 823 +histori 0 19 4.007333 0.000000 853 +benchmark 0 19 4.007333 0.000000 859 +analyz 0 17 4.110874 0.000000 925 +monitor 0 17 4.110874 0.000000 941 +rate 0 15 4.248495 0.000000 1037 +driven 0 15 4.248495 0.000000 1048 +neat 0 12 4.465908 0.000000 1263 +alpha 0 11 4.553877 0.000000 1348 +tuth 0 9 4.753590 0.000000 1519 +egger 0 8 4.875197 0.000000 1695 +uniprocessor 0 8 4.875197 0.000000 1696 +spec 0 8 4.875197 0.000000 1640 +sparc 0 7 5.010635 0.000000 1860 +shade 0 7 5.010635 0.000000 1881 +pentium 0 6 5.164786 0.000000 2077 +tullsen 0 6 5.164786 0.000000 2081 +superscalar 0 6 5.164786 0.000000 2082 +rewrit 0 5 5.347108 0.000000 2367 +etch 0 4 5.568345 0.000000 2755 +redston 0 3 5.857933 0.000000 3332 +specmark 0 2 6.263398 0.000000 4471 +atom 0 2 6.263398 0.000000 4472 +multiflow 0 2 6.263398 0.000000 4473 +powerpc 0 2 6.263398 0.000000 4238 +architecturewint 0 1 6.957497 0.000000 8134 +instructorsusan 0 1 6.957497 0.000000 8135 +tajoshua 0 1 6.957497 0.000000 8136 +instuct 0 1 6.957497 0.000000 8137 +pixi 0 1 6.957497 0.000000 8138 +dinero 0 1 6.957497 0.000000 8139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..3066c0c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +spring 0 131 2.079442 0.000000 88 +confer 0 126 2.079442 0.000000 100 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +instructor 0 108 2.197225 0.000000 107 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +april 0 77 2.564949 0.000000 196 +meet 0 72 2.639057 0.000000 229 +room 0 59 2.833213 0.000000 301 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +announc 0 40 3.258097 0.000000 441 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +ad 0 32 3.465736 0.000000 544 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +unit 0 21 3.912023 0.000000 779 +chateau 0 16 4.174387 0.000000 997 +levi 0 14 4.317488 0.000000 1093 +hank 0 12 4.465908 0.000000 1253 +hypermedia 0 12 4.465908 0.000000 1247 +readi 0 12 4.465908 0.000000 1242 +pighin 0 4 5.568345 0.000000 2735 +thisdocu 0 3 5.857933 0.000000 3336 +freder 0 3 5.857933 0.000000 3352 +iti 0 2 6.263398 0.000000 4066 +forcs 0 1 6.957497 0.000000 8140 +classmessag 0 1 6.957497 0.000000 8141 +projectlevi 0 1 6.957497 0.000000 8142 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..7fd3186c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +class 0 199 1.609438 0.000000 37 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +grade 0 90 2.397895 0.000000 142 +imag 0 91 2.397895 0.000000 161 +follow 0 92 2.397895 0.000000 143 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +solut 0 82 2.484907 0.000000 162 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +addit 0 74 2.639057 0.000000 228 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +colleg 0 61 2.833213 0.000000 300 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +instruct 0 53 2.944439 0.000000 332 +cool 0 49 3.044522 0.000000 374 +visitor 0 49 3.044522 0.000000 371 +basic 0 50 3.044522 0.000000 360 +quarter 0 47 3.091042 0.000000 389 +get 0 46 3.091042 0.000000 380 +keep 0 44 3.135494 0.000000 409 +offer 0 43 3.178054 0.000000 414 +art 0 29 3.583519 0.000000 593 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +hypertext 0 19 4.007333 0.000000 865 +charact 0 15 4.248495 0.000000 1028 +hypermedia 0 12 4.465908 0.000000 1247 +mosaic 0 10 4.653960 0.000000 1426 +uniform 0 7 5.010635 0.000000 1845 +markup 0 6 5.164786 0.000000 2059 +whichcontain 0 4 5.568345 0.000000 2714 +wealth 0 3 5.857933 0.000000 3353 +thatthi 0 2 6.263398 0.000000 4379 +addedfrequ 0 2 6.263398 0.000000 4380 +deros 0 2 6.263398 0.000000 4474 +indi 0 2 6.263398 0.000000 4431 +mvi 0 2 6.263398 0.000000 4382 +usinglynx 0 2 6.263398 0.000000 4383 +graphicsautumn 0 1 6.957497 0.000000 8143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..11c5fafe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +site 0 106 2.197225 0.000000 119 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +exam 0 86 2.484907 0.000000 169 +academ 0 82 2.484907 0.000000 178 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +tuesdai 0 73 2.639057 0.000000 219 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +wednesdai 0 64 2.772589 0.000000 261 +import 0 65 2.772589 0.000000 282 +handout 0 64 2.772589 0.000000 263 +cover 0 55 2.944439 0.000000 329 +digit 0 52 2.995732 0.000000 348 +principl 0 48 3.044522 0.000000 357 +still 0 50 3.044522 0.000000 362 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +fridai 0 44 3.135494 0.000000 390 +combin 0 42 3.218876 0.000000 421 +review 0 42 3.218876 0.000000 425 +announc 0 40 3.258097 0.000000 441 +paul 0 38 3.295837 0.000000 471 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +purpos 0 37 3.332205 0.000000 481 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +board 0 33 3.433987 0.000000 528 +quot 0 29 3.583519 0.000000 582 +accur 0 25 3.737670 0.000000 680 +begin 0 23 3.806662 0.000000 716 +sequenti 0 22 3.850148 0.000000 745 +vlsi 0 21 3.912023 0.000000 795 +synthesi 0 20 3.951244 0.000000 834 +mostli 0 19 4.007333 0.000000 869 +sheet 0 16 4.174387 0.000000 973 +dilbert 0 16 4.174387 0.000000 996 +portion 0 16 4.174387 0.000000 971 +carl 0 15 4.248495 0.000000 1024 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +larri 0 13 4.382027 0.000000 1142 +loew 0 12 4.465908 0.000000 1252 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +fpga 0 10 4.653960 0.000000 1433 +franklin 0 10 4.653960 0.000000 1436 +motorola 0 9 4.753590 0.000000 1546 +mother 0 6 5.164786 0.000000 2083 +philip 0 6 5.164786 0.000000 2005 +ebel 0 4 5.568345 0.000000 2756 +mcmurchi 0 4 5.568345 0.000000 2757 +murphi 0 4 5.568345 0.000000 2737 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +micron 0 3 5.857933 0.000000 3341 +hine 0 2 6.263398 0.000000 4475 +guru 0 2 6.263398 0.000000 4476 +comprehensivelist 0 2 6.263398 0.000000 4439 +icmanufactur 0 2 6.263398 0.000000 4440 +hineskj 0 1 6.957497 0.000000 8144 +pamett 0 1 6.957497 0.000000 8145 +groupsfin 0 1 6.957497 0.000000 8146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..efe02e48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +topic 0 114 2.197225 0.000000 110 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +build 0 85 2.484907 0.000000 184 +messag 0 76 2.564949 0.000000 212 +intellig 0 72 2.639057 0.000000 225 +sieg 1 69 2.708050 2.708050 260 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +past 0 42 3.218876 0.000000 428 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +pagecs 0 26 3.688879 0.000000 658 +fundament 0 25 3.737670 0.000000 661 +methodolog 0 23 3.806662 0.000000 733 +outlin 0 17 4.110874 0.000000 914 +nick 0 13 4.382027 0.000000 1180 +pose 0 9 4.753590 0.000000 1535 +depth 0 8 4.875197 0.000000 1636 +marc 0 8 4.875197 0.000000 1680 +uncertainti 0 7 5.010635 0.000000 1882 +machinelearn 0 6 5.164786 0.000000 2084 +anin 0 3 5.857933 0.000000 3354 +assignmentsassign 0 3 5.857933 0.000000 3342 +mailinglist 0 3 5.857933 0.000000 3325 +intelligencefal 0 2 6.263398 0.000000 4477 +andchalleng 0 2 6.263398 0.000000 4478 +intelligentmachin 0 2 6.263398 0.000000 4479 +agentarchitectur 0 2 6.263398 0.000000 4480 +weldweld 0 2 6.263398 0.000000 4481 +friedmanfriedman 0 2 6.263398 0.000000 4482 +kushmericknick 0 2 6.263398 0.000000 4483 +examsgradingresourcesth 0 2 6.263398 0.000000 4484 +topicsread 0 1 6.957497 0.000000 8147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..bc8ed029 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +read 1 154 1.791759 1.791759 47 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +postscript 0 131 2.079442 0.000000 90 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +graphic 0 90 2.397895 0.000000 147 +librari 0 87 2.484907 0.000000 181 +member 0 84 2.484907 0.000000 165 +requir 0 81 2.484907 0.000000 167 +refer 0 78 2.564949 0.000000 203 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +intellig 0 72 2.639057 0.000000 225 +summari 0 73 2.639057 0.000000 237 +materi 0 75 2.639057 0.000000 221 +logic 0 71 2.639057 0.000000 230 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +artifici 0 63 2.772589 0.000000 280 +collect 0 65 2.772589 0.000000 268 +foundat 0 62 2.772589 0.000000 286 +copi 0 63 2.772589 0.000000 284 +written 0 63 2.772589 0.000000 278 +juli 0 60 2.833213 0.000000 305 +reason 0 57 2.890372 0.000000 318 +sever 0 56 2.890372 0.000000 322 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +around 0 43 3.178054 0.000000 415 +edit 0 42 3.218876 0.000000 418 +probabl 0 40 3.258097 0.000000 455 +winter 0 36 3.367296 0.000000 500 +statist 0 35 3.401197 0.000000 521 +go 0 33 3.433987 0.000000 529 +chapter 0 32 3.465736 0.000000 536 +steve 0 29 3.583519 0.000000 594 +though 0 27 3.637586 0.000000 622 +request 0 26 3.688879 0.000000 635 +decis 0 23 3.806662 0.000000 728 +nice 0 20 3.951244 0.000000 809 +expert 0 20 3.951244 0.000000 833 +definit 0 19 4.007333 0.000000 864 +goe 0 15 4.248495 0.000000 1044 +signific 0 13 4.382027 0.000000 1125 +hank 0 12 4.465908 0.000000 1253 +probabilist 0 11 4.553877 0.000000 1343 +extrem 0 11 4.553877 0.000000 1330 +worth 0 11 4.553877 0.000000 1294 +perspect 0 10 4.653960 0.000000 1437 +uncertainti 0 7 5.010635 0.000000 1882 +whenev 0 7 5.010635 0.000000 1883 +heavi 0 7 5.010635 0.000000 1841 +secondari 0 7 5.010635 0.000000 1884 +histor 0 6 5.164786 0.000000 2085 +arrang 0 6 5.164786 0.000000 2023 +overlap 0 5 5.347108 0.000000 2368 +uncertain 0 4 5.568345 0.000000 2758 +cash 0 3 5.857933 0.000000 3355 +grail 0 3 5.857933 0.000000 3356 +alon 0 3 5.857933 0.000000 3139 +pearl 0 2 6.263398 0.000000 4485 +bui 0 2 6.263398 0.000000 4486 +algorithmsa 0 2 6.263398 0.000000 4487 +systemsthi 0 1 6.957497 0.000000 8148 +strappedfor 0 1 6.957497 0.000000 8149 +shafer 0 1 6.957497 0.000000 8150 +reasoningthi 0 1 6.957497 0.000000 8151 +jayn 0 1 6.957497 0.000000 8152 +fragmentari 0 1 6.957497 0.000000 8153 +foundationsof 0 1 6.957497 0.000000 8154 +beautifulli 0 1 6.957497 0.000000 8155 +neapolitan 0 1 6.957497 0.000000 8156 +propagationalgorithm 0 1 6.957497 0.000000 8157 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..82b2bdf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,272 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +cours 1 273 1.098612 1.098612 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +topic 0 114 2.197225 0.000000 110 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +imag 0 91 2.397895 0.000000 161 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +help 0 83 2.484907 0.000000 175 +contain 0 81 2.484907 0.000000 174 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +second 0 81 2.484907 0.000000 166 +educ 0 86 2.484907 0.000000 191 +level 0 87 2.484907 0.000000 180 +learn 0 86 2.484907 0.000000 170 +requir 0 81 2.484907 0.000000 167 +librari 0 87 2.484907 0.000000 181 +resourc 0 81 2.484907 0.000000 172 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +messag 0 76 2.564949 0.000000 212 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +name 0 72 2.639057 0.000000 220 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +wednesdai 0 64 2.772589 0.000000 261 +copi 0 63 2.772589 0.000000 284 +experi 0 64 2.772589 0.000000 283 +plan 0 65 2.772589 0.000000 272 +import 0 65 2.772589 0.000000 282 +laboratori 0 63 2.772589 0.000000 292 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +local 0 55 2.944439 0.000000 334 +undergradu 0 54 2.944439 0.000000 338 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +week 0 52 2.995732 0.000000 343 +run 0 51 2.995732 0.000000 347 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +fridai 0 44 3.135494 0.000000 390 +midterm 0 45 3.135494 0.000000 392 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +review 0 42 3.218876 0.000000 425 +vision 0 41 3.218876 0.000000 430 +tutori 0 39 3.258097 0.000000 437 +announc 0 40 3.258097 0.000000 441 +form 0 39 3.258097 0.000000 443 +correct 0 38 3.295837 0.000000 462 +slide 0 38 3.295837 0.000000 467 +workstat 0 37 3.332205 0.000000 479 +copyright 0 36 3.367296 0.000000 495 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +ofth 0 36 3.367296 0.000000 491 +next 0 34 3.401197 0.000000 517 +least 0 35 3.401197 0.000000 516 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +articl 0 33 3.433987 0.000000 530 +chapter 0 32 3.465736 0.000000 536 +ad 0 32 3.465736 0.000000 544 +transform 0 32 3.465736 0.000000 542 +turn 0 29 3.583519 0.000000 586 +univ 0 28 3.610918 0.000000 617 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +determin 0 27 3.637586 0.000000 630 +pagecs 0 26 3.688879 0.000000 658 +effort 0 26 3.688879 0.000000 652 +notic 0 25 3.737670 0.000000 675 +pattern 0 24 3.761200 0.000000 689 +store 0 24 3.761200 0.000000 693 +displai 0 23 3.806662 0.000000 712 +variabl 0 23 3.806662 0.000000 715 +recognit 0 23 3.806662 0.000000 723 +instal 0 22 3.850148 0.000000 754 +period 0 22 3.850148 0.000000 743 +path 0 21 3.912023 0.000000 778 +navig 0 21 3.912023 0.000000 796 +exercis 0 19 4.007333 0.000000 842 +comparison 0 19 4.007333 0.000000 863 +account 0 18 4.060443 0.000000 882 +appropri 0 18 4.060443 0.000000 883 +statu 0 18 4.060443 0.000000 885 +outlin 0 17 4.110874 0.000000 914 +regular 0 17 4.110874 0.000000 929 +intro 0 17 4.110874 0.000000 915 +sign 0 16 4.174387 0.000000 970 +georg 0 16 4.174387 0.000000 994 +spatial 0 16 4.174387 0.000000 988 +fourth 0 16 4.174387 0.000000 999 +permit 0 16 4.174387 0.000000 962 +alreadi 0 16 4.174387 0.000000 963 +earli 0 16 4.174387 0.000000 968 +overhead 0 15 4.248495 0.000000 1035 +floor 0 14 4.317488 0.000000 1070 +menu 0 13 4.382027 0.000000 1156 +resolut 0 13 4.382027 0.000000 1172 +introduc 0 13 4.382027 0.000000 1139 +care 0 13 4.382027 0.000000 1177 +hypermedia 0 12 4.465908 0.000000 1247 +noth 0 11 4.553877 0.000000 1328 +transpar 0 11 4.553877 0.000000 1325 +fill 0 11 4.553877 0.000000 1349 +thedepart 0 11 4.553877 0.000000 1350 +sundai 0 10 4.653960 0.000000 1387 +packet 0 10 4.653960 0.000000 1415 +prior 0 10 4.653960 0.000000 1438 +card 0 10 4.653960 0.000000 1435 +mosaic 0 10 4.653960 0.000000 1426 +login 0 9 4.753590 0.000000 1550 +classifi 0 9 4.753590 0.000000 1537 +pick 0 9 4.753590 0.000000 1498 +lock 0 9 4.753590 0.000000 1551 +ideal 0 8 4.875197 0.000000 1630 +evan 0 8 4.875197 0.000000 1633 +documentfor 0 7 5.010635 0.000000 1865 +remind 0 7 5.010635 0.000000 1799 +reed 0 6 5.164786 0.000000 2086 +arrang 0 6 5.164786 0.000000 2023 +theclass 0 6 5.164786 0.000000 2060 +mock 0 6 5.164786 0.000000 2087 +otherthan 0 6 5.164786 0.000000 2009 +conveni 0 6 5.164786 0.000000 2088 +onto 0 6 5.164786 0.000000 2089 +approv 0 6 5.164786 0.000000 2078 +temporari 0 6 5.164786 0.000000 2090 +contract 0 6 5.164786 0.000000 1985 +pentium 0 6 5.164786 0.000000 2077 +newinform 0 5 5.347108 0.000000 2342 +subjectto 0 5 5.347108 0.000000 2369 +ahead 0 5 5.347108 0.000000 2338 +cshrc 0 4 5.568345 0.000000 2759 +assignmentsand 0 4 5.568345 0.000000 2760 +cvpr 0 4 5.568345 0.000000 2761 +net 0 4 5.568345 0.000000 2741 +accompani 0 4 5.568345 0.000000 2666 +password 0 4 5.568345 0.000000 2594 +kept 0 4 5.568345 0.000000 2762 +insieg 0 3 5.857933 0.000000 3331 +weekend 0 3 5.857933 0.000000 3357 +khoro 0 2 6.263398 0.000000 4488 +cantata 0 2 6.263398 0.000000 4489 +sun 0 2 6.263398 0.000000 4490 +setenv 0 2 6.263398 0.000000 4491 +pmin 0 2 6.263398 0.000000 4492 +combinationof 0 2 6.263398 0.000000 4081 +includingth 0 2 6.263398 0.000000 4493 +onthursdai 0 2 6.263398 0.000000 4425 +itemsund 0 2 6.263398 0.000000 4387 +balloon 0 2 6.263398 0.000000 4388 +khoros_hom 0 1 6.957497 0.000000 8158 +msvc 0 1 6.957497 0.000000 8159 +rene 0 1 6.957497 0.000000 8160 +understandingwelcom 0 1 6.957497 0.000000 8161 +doexercis 0 1 6.957497 0.000000 8162 +torun 0 1 6.957497 0.000000 8163 +aslillith 0 1 6.957497 0.000000 8164 +containxhost 0 1 6.957497 0.000000 8165 +lilliththen 0 1 6.957497 0.000000 8166 +manpath 0 1 6.957497 0.000000 8167 +rlogin 0 1 6.957497 0.000000 8168 +lillith 0 1 6.957497 0.000000 8169 +rhost 0 1 6.957497 0.000000 8170 +typecantata 0 1 6.957497 0.000000 8171 +prompt 0 1 6.957497 0.000000 8172 +haskhoro 0 1 6.957497 0.000000 8173 +wwwhttp 0 1 6.957497 0.000000 8174 +htmland 0 1 6.957497 0.000000 8175 +itscours 0 1 6.957497 0.000000 8176 +twotop 0 1 6.957497 0.000000 8177 +pagesand 0 1 6.957497 0.000000 8178 +huerta 0 1 6.957497 0.000000 8179 +andnevatia 0 1 6.957497 0.000000 8180 +tolook 0 1 6.957497 0.000000 8181 +wolff 0 1 6.957497 0.000000 8182 +onneur 0 1 6.957497 0.000000 8183 +trainabl 0 1 6.957497 0.000000 8184 +ofmatlab 0 1 6.957497 0.000000 8185 +requirethat 0 1 6.957497 0.000000 8186 +mclain 0 1 6.957497 0.000000 8187 +documentexplain 0 1 6.957497 0.000000 8188 +withkhoro 0 1 6.957497 0.000000 8189 +accesskhoro 0 1 6.957497 0.000000 8190 +youraccount 0 1 6.957497 0.000000 8191 +itov 0 1 6.957497 0.000000 8192 +arelimit 0 1 6.957497 0.000000 8193 +andsh 0 1 6.957497 0.000000 8194 +knock 0 1 6.957497 0.000000 8195 +orhav 0 1 6.957497 0.000000 8196 +willhav 0 1 6.957497 0.000000 8197 +delft 0 1 6.957497 0.000000 8198 +brochur 0 1 6.957497 0.000000 8199 +brochuremosa 0 1 6.957497 0.000000 8200 +macmosa 0 1 6.957497 0.000000 8201 +itemund 0 1 6.957497 0.000000 8202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..a4711310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +octob 0 89 2.397895 0.000000 156 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +center 0 88 2.397895 0.000000 158 +novemb 1 81 2.484907 2.484907 179 +start 0 83 2.484907 0.000000 173 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +resourc 0 81 2.484907 0.000000 172 +decemb 0 80 2.564949 0.000000 215 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +orient 0 80 2.564949 0.000000 205 +tuesdai 0 73 2.639057 0.000000 219 +meet 0 72 2.639057 0.000000 229 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +sieg 0 69 2.708050 0.000000 260 +prof 0 64 2.772589 0.000000 273 +virtual 0 62 2.772589 0.000000 285 +written 0 63 2.772589 0.000000 278 +descript 0 64 2.772589 0.000000 271 +collect 0 65 2.772589 0.000000 268 +import 0 65 2.772589 0.000000 282 +wednesdai 0 64 2.772589 0.000000 261 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +digit 0 52 2.995732 0.000000 348 +approach 0 48 3.044522 0.000000 366 +keep 0 44 3.135494 0.000000 409 +discuss 0 45 3.135494 0.000000 399 +made 0 44 3.135494 0.000000 398 +fridai 0 44 3.135494 0.000000 390 +term 0 43 3.178054 0.000000 411 +review 0 42 3.218876 0.000000 425 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +hand 0 37 3.332205 0.000000 475 +copyright 0 36 3.367296 0.000000 495 +short 0 36 3.367296 0.000000 499 +ofth 0 36 3.367296 0.000000 491 +ad 0 32 3.465736 0.000000 544 +titl 0 31 3.496508 0.000000 556 +neural 0 30 3.555348 0.000000 578 +scale 0 28 3.610918 0.000000 613 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +symbol 0 27 3.637586 0.000000 620 +arrai 0 27 3.637586 0.000000 627 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +supercomput 0 25 3.737670 0.000000 681 +notic 0 25 3.737670 0.000000 675 +begin 0 23 3.806662 0.000000 716 +recognit 0 23 3.806662 0.000000 723 +dai 0 22 3.850148 0.000000 753 +instal 0 22 3.850148 0.000000 754 +half 0 21 3.912023 0.000000 776 +theunivers 0 21 3.912023 0.000000 797 +demo 0 18 4.060443 0.000000 888 +segment 0 17 4.110874 0.000000 931 +intel 0 16 4.174387 0.000000 1000 +normal 0 16 4.174387 0.000000 995 +brief 0 16 4.174387 0.000000 1001 +permit 0 16 4.174387 0.000000 962 +embed 0 14 4.317488 0.000000 1102 +hong 0 14 4.317488 0.000000 1105 +heterogen 0 14 4.317488 0.000000 1090 +canada 0 13 4.382027 0.000000 1158 +guest 0 12 4.465908 0.000000 1220 +hypermedia 0 12 4.465908 0.000000 1247 +onth 0 12 4.465908 0.000000 1218 +mesh 0 11 4.553877 0.000000 1351 +simon 0 8 4.875197 0.000000 1697 +documentfor 0 7 5.010635 0.000000 1865 +sweden 0 7 5.010635 0.000000 1885 +friedman 0 7 5.010635 0.000000 1886 +theclass 0 6 5.164786 0.000000 2060 +conveni 0 6 5.164786 0.000000 2088 +otherthan 0 6 5.164786 0.000000 2009 +speaker 0 5 5.347108 0.000000 2370 +newinform 0 5 5.347108 0.000000 2342 +templat 0 5 5.347108 0.000000 2311 +subjectto 0 5 5.347108 0.000000 2369 +tennesse 0 4 5.568345 0.000000 2763 +pyramid 0 3 5.857933 0.000000 3358 +paragon 0 3 5.857933 0.000000 3359 +simd 0 3 5.857933 0.000000 3360 +mimd 0 3 5.857933 0.000000 3361 +icon 0 3 5.857933 0.000000 3362 +neal 0 3 5.857933 0.000000 3184 +maspar 0 2 6.263398 0.000000 4279 +informationon 0 2 6.263398 0.000000 4232 +burt 0 2 6.263398 0.000000 4494 +rosenfeld 0 2 6.263398 0.000000 4495 +inon 0 2 6.263398 0.000000 4496 +processingwelcom 0 1 6.957497 0.000000 8203 +hourearli 0 1 6.957497 0.000000 8204 +nian 0 1 6.957497 0.000000 8205 +fraser 0 1 6.957497 0.000000 8206 +burnabi 0 1 6.957497 0.000000 8207 +bharath 0 1 6.957497 0.000000 8208 +modayur 0 1 6.957497 0.000000 8209 +invariantoper 0 1 6.957497 0.000000 8210 +hierarchicalrelax 0 1 6.957497 0.000000 8211 +isodata 0 1 6.957497 0.000000 8212 +treatment 0 1 6.957497 0.000000 8213 +topicsdur 0 1 6.957497 0.000000 8214 +activelyexplor 0 1 6.957497 0.000000 8215 +writeupsi 0 1 6.957497 0.000000 8216 +resourcespvm 0 1 6.957497 0.000000 8217 +virtualmachin 0 1 6.957497 0.000000 8218 +layear 0 1 6.957497 0.000000 8219 +aviru 0 1 6.957497 0.000000 8220 +moreworkst 0 1 6.957497 0.000000 8221 +studydistribut 0 1 6.957497 0.000000 8222 +technicalpubl 0 1 6.957497 0.000000 8223 +paragonparallel 0 1 6.957497 0.000000 8224 +variousvendor 0 1 6.957497 0.000000 8225 +correctionsto 0 1 6.957497 0.000000 8226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..ef14b797 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +find 0 111 2.197225 0.000000 111 +graphic 0 90 2.397895 0.000000 147 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +optim 0 79 2.564949 0.000000 197 +exampl 0 77 2.564949 0.000000 195 +solv 0 73 2.639057 0.000000 234 +differ 0 66 2.708050 0.000000 253 +numer 0 49 3.044522 0.000000 369 +linear 1 41 3.218876 3.218876 431 +global 0 34 3.401197 0.000000 520 +valu 0 25 3.737670 0.000000 665 +mike 0 24 3.761200 0.000000 703 +equat 0 23 3.806662 0.000000 724 +properti 0 22 3.850148 0.000000 749 +definit 0 19 4.007333 0.000000 864 +eric 0 19 4.007333 0.000000 870 +element 0 18 4.060443 0.000000 895 +intro 0 17 4.110874 0.000000 915 +matrix 0 17 4.110874 0.000000 933 +differenti 0 17 4.110874 0.000000 921 +adam 0 17 4.110874 0.000000 934 +finit 0 14 4.317488 0.000000 1106 +nonlinear 0 14 4.317488 0.000000 1107 +chuck 0 14 4.317488 0.000000 1108 +discret 0 13 4.382027 0.000000 1165 +jonathan 0 13 4.382027 0.000000 1174 +brad 0 12 4.465908 0.000000 1264 +daniel 0 12 4.465908 0.000000 1233 +decomposit 0 10 4.653960 0.000000 1439 +arithmet 0 10 4.653960 0.000000 1388 +kevin 0 9 4.753590 0.000000 1482 +joel 0 8 4.875197 0.000000 1698 +root 0 8 4.875197 0.000000 1650 +constrain 0 6 5.164786 0.000000 2042 +fred 0 6 5.164786 0.000000 2072 +fit 0 5 5.347108 0.000000 2285 +invers 0 4 5.568345 0.000000 2764 +corei 0 4 5.568345 0.000000 2718 +eigenvalu 0 3 5.857933 0.000000 3364 +eigenvector 0 3 5.857933 0.000000 3365 +singular 0 3 5.857933 0.000000 3366 +conclus 0 3 5.857933 0.000000 3367 +ordinari 0 3 5.857933 0.000000 3233 +interv 0 3 5.857933 0.000000 3253 +quadrat 0 2 6.263398 0.000000 4497 +shuichi 0 2 6.263398 0.000000 4498 +unconstrain 0 2 6.263398 0.000000 4499 +kari 0 2 6.263398 0.000000 4500 +regress 0 2 6.263398 0.000000 4501 +calibr 0 2 6.263398 0.000000 4502 +joanna 0 2 6.263398 0.000000 4503 +radios 0 2 6.263398 0.000000 4504 +pde 0 2 6.263398 0.000000 4505 +seminarc 0 1 6.957497 0.000000 8228 +rspring 0 1 6.957497 0.000000 8229 +ronen 0 1 6.957497 0.000000 8230 +troi 0 1 6.957497 0.000000 8231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..b1c5a0c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +process 1 142 1.945910 1.945910 72 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +site 0 106 2.197225 0.000000 119 +specif 0 106 2.197225 0.000000 106 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +materi 0 75 2.639057 0.000000 221 +view 0 70 2.708050 0.000000 254 +experi 1 64 2.772589 2.772589 283 +septemb 0 65 2.772589 0.000000 274 +special 0 56 2.890372 0.000000 320 +explor 0 58 2.890372 0.000000 324 +quarter 1 47 3.091042 3.091042 389 +seminar 0 38 3.295837 0.000000 470 +copyright 1 36 3.367296 3.367296 495 +winter 0 36 3.367296 0.000000 500 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +notic 0 25 3.737670 0.000000 675 +instal 0 22 3.850148 0.000000 754 +varieti 0 22 3.850148 0.000000 740 +tanimoto 1 10 4.653960 4.653960 1429 +transcript 0 6 5.164786 0.000000 2067 +otherthan 0 6 5.164786 0.000000 2009 +subjectto 0 5 5.347108 0.000000 2369 +useof 0 3 5.857933 0.000000 3368 +quarterscs 0 1 6.957497 0.000000 8232 +topicssteven 0 1 6.957497 0.000000 8233 +instructorcs 0 1 6.957497 0.000000 8234 +varyfrom 0 1 6.957497 0.000000 8235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..9662e37c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +document 0 121 2.079442 0.000000 89 +technolog 0 131 2.079442 0.000000 102 +world 0 115 2.197225 0.000000 126 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +octob 0 89 2.397895 0.000000 156 +proceed 0 93 2.397895 0.000000 152 +present 0 91 2.397895 0.000000 145 +comment 0 93 2.397895 0.000000 146 +novemb 0 81 2.484907 0.000000 179 +educ 0 86 2.484907 0.000000 191 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +environ 0 84 2.484907 0.000000 177 +second 0 81 2.484907 0.000000 166 +internet 0 83 2.484907 0.000000 186 +state 0 76 2.564949 0.000000 207 +html 0 75 2.639057 0.000000 235 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +onlin 0 75 2.639057 0.000000 223 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +view 0 70 2.708050 0.000000 254 +descript 0 64 2.772589 0.000000 271 +import 0 65 2.772589 0.000000 282 +share 0 59 2.833213 0.000000 304 +possibl 0 47 3.091042 0.000000 378 +discuss 0 45 3.135494 0.000000 399 +keep 0 44 3.135494 0.000000 409 +describ 0 45 3.135494 0.000000 400 +netscap 0 44 3.135494 0.000000 395 +protocol 0 45 3.135494 0.000000 407 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +combin 0 42 3.218876 0.000000 421 +vision 0 41 3.218876 0.000000 430 +futur 0 41 3.218876 0.000000 427 +map 0 39 3.258097 0.000000 452 +littl 0 39 3.258097 0.000000 454 +tutori 0 39 3.258097 0.000000 437 +paul 0 38 3.295837 0.000000 471 +respons 0 37 3.332205 0.000000 476 +copyright 0 36 3.367296 0.000000 495 +short 0 36 3.367296 0.000000 499 +either 0 35 3.401197 0.000000 506 +michael 0 35 3.401197 0.000000 514 +john 0 33 3.433987 0.000000 532 +concept 0 32 3.465736 0.000000 537 +ad 0 32 3.465736 0.000000 544 +autumn 0 31 3.496508 0.000000 558 +someth 0 31 3.496508 0.000000 554 +option 0 30 3.555348 0.000000 575 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +enhanc 0 26 3.688879 0.000000 644 +notic 0 25 3.737670 0.000000 675 +brows 0 23 3.806662 0.000000 726 +instal 0 22 3.850148 0.000000 754 +geometri 0 22 3.850148 0.000000 752 +annot 0 21 3.912023 0.000000 775 +navig 0 21 3.912023 0.000000 796 +toolkit 0 20 3.951244 0.000000 835 +anderson 0 19 4.007333 0.000000 860 +particularli 0 19 4.007333 0.000000 867 +lower 0 18 4.060443 0.000000 886 +layer 0 17 4.110874 0.000000 926 +adam 0 17 4.110874 0.000000 934 +choic 0 16 4.174387 0.000000 979 +piec 0 15 4.248495 0.000000 1020 +achiev 0 14 4.317488 0.000000 1088 +hong 0 14 4.317488 0.000000 1105 +central 0 13 4.382027 0.000000 1160 +promot 0 12 4.465908 0.000000 1235 +hypermedia 0 12 4.465908 0.000000 1247 +infrastructur 0 12 4.465908 0.000000 1234 +noth 0 11 4.553877 0.000000 1328 +smart 0 11 4.553877 0.000000 1352 +baer 0 11 4.553877 0.000000 1353 +mosaic 0 10 4.653960 0.000000 1426 +tutor 0 9 4.753590 0.000000 1552 +beyond 0 7 5.010635 0.000000 1834 +documentfor 0 7 5.010635 0.000000 1865 +davi 0 7 5.010635 0.000000 1888 +baker 0 7 5.010635 0.000000 1812 +transcript 0 6 5.164786 0.000000 2067 +theclass 0 6 5.164786 0.000000 2060 +otherthan 0 6 5.164786 0.000000 2009 +plu 0 6 5.164786 0.000000 2004 +trail 0 6 5.164786 0.000000 2071 +newinform 0 5 5.347108 0.000000 2342 +subjectto 0 5 5.347108 0.000000 2369 +barton 0 5 5.347108 0.000000 2371 +jeremi 0 5 5.347108 0.000000 2360 +carlson 0 5 5.347108 0.000000 2351 +sandi 0 4 5.568345 0.000000 2765 +increasingli 0 4 5.568345 0.000000 2766 +ncsa 0 4 5.568345 0.000000 2767 +ward 0 2 6.263398 0.000000 4506 +tessa 0 2 6.263398 0.000000 4507 +learner 0 2 6.263398 0.000000 4508 +uiuc 0 2 6.263398 0.000000 4509 +marla 0 2 6.263398 0.000000 4510 +soap 0 2 6.263398 0.000000 4511 +presentor 0 1 6.957497 0.000000 8236 +labord 0 1 6.957497 0.000000 8237 +wwwwelcom 0 1 6.957497 0.000000 8238 +mccalla 0 1 6.957497 0.000000 8239 +importanceof 0 1 6.957497 0.000000 8240 +youngquist 0 1 6.957497 0.000000 8241 +aboutinternet 0 1 6.957497 0.000000 8242 +microworld 0 1 6.957497 0.000000 8243 +tointellig 0 1 6.957497 0.000000 8244 +bartel 0 1 6.957497 0.000000 8245 +mathematicsconnect 0 1 6.957497 0.000000 8246 +gari 0 1 6.957497 0.000000 8247 +ambiti 0 1 6.957497 0.000000 8248 +thethem 0 1 6.957497 0.000000 8249 +moresophist 0 1 6.957497 0.000000 8250 +elabor 0 1 6.957497 0.000000 8251 +ofwww 0 1 6.957497 0.000000 8252 +intechn 0 1 6.957497 0.000000 8253 +couldmak 0 1 6.957497 0.000000 8254 +applicationsthat 0 1 6.957497 0.000000 8255 +webhttp 0 1 6.957497 0.000000 8256 +empow 0 1 6.957497 0.000000 8257 +agehttp 0 1 6.957497 0.000000 8258 +communitieshttp 0 1 6.957497 0.000000 8259 +dietz 0 1 6.957497 0.000000 8260 +serviceshttp 0 1 6.957497 0.000000 8261 +dcewebkit 0 1 6.957497 0.000000 8262 +zhumeet 0 1 6.957497 0.000000 8263 +aboutcurriculum 0 1 6.957497 0.000000 8264 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..06932d66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +technolog 1 131 2.079442 2.079442 102 +schedul 1 119 2.079442 2.079442 85 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +site 0 106 2.197225 0.000000 119 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +present 0 91 2.397895 0.000000 145 +learn 1 86 2.484907 2.484907 170 +internet 0 83 2.484907 0.000000 186 +school 0 84 2.484907 0.000000 188 +come 0 78 2.564949 0.000000 202 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +view 0 70 2.708050 0.000000 254 +descript 0 64 2.772589 0.000000 271 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +explor 0 58 2.890372 0.000000 324 +cover 0 55 2.944439 0.000000 329 +visual 0 48 3.044522 0.000000 372 +possibl 0 47 3.091042 0.000000 378 +move 0 47 3.091042 0.000000 382 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +seminar 0 38 3.295837 0.000000 470 +respons 0 37 3.332205 0.000000 476 +copyright 0 36 3.367296 0.000000 495 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +particip 0 29 3.583519 0.000000 589 +depend 0 29 3.583519 0.000000 583 +notic 0 25 3.737670 0.000000 675 +wai 0 25 3.737670 0.000000 662 +togeth 0 23 3.806662 0.000000 714 +instal 0 22 3.850148 0.000000 754 +decid 0 14 4.317488 0.000000 1075 +context 0 13 4.382027 0.000000 1153 +subset 0 10 4.653960 0.000000 1425 +tanimoto 0 10 4.653960 0.000000 1429 +otherthan 0 6 5.164786 0.000000 2009 +middl 0 5 5.347108 0.000000 2372 +subjectto 0 5 5.347108 0.000000 2369 +evid 0 4 5.568345 0.000000 2768 +innew 0 2 6.263398 0.000000 4512 +learningwelcom 0 1 6.957497 0.000000 8265 +methodologiesfor 0 1 6.957497 0.000000 8266 +forcollabor 0 1 6.957497 0.000000 8267 +willtak 0 1 6.957497 0.000000 8268 +ofthes 0 1 6.957497 0.000000 8269 +ofai 0 1 6.957497 0.000000 8270 +ofstud 0 1 6.957497 0.000000 8271 +intopeopl 0 1 6.957497 0.000000 8272 +meani 0 1 6.957497 0.000000 8273 +schoolmai 0 1 6.957497 0.000000 8274 +participatingstud 0 1 6.957497 0.000000 8275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..134aad82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +schedul 1 119 2.079442 2.079442 85 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +technic 0 100 2.302585 0.000000 140 +follow 0 92 2.397895 0.000000 143 +octob 0 89 2.397895 0.000000 156 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +activ 0 84 2.484907 0.000000 182 +dynam 0 76 2.564949 0.000000 194 +april 0 77 2.564949 0.000000 196 +server 0 76 2.564949 0.000000 204 +june 0 79 2.564949 0.000000 214 +appear 0 78 2.564949 0.000000 210 +workshop 0 71 2.639057 0.000000 239 +polici 0 64 2.772589 0.000000 279 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +share 0 59 2.833213 0.000000 304 +processor 0 54 2.944439 0.000000 335 +scientif 0 53 2.944439 0.000000 341 +cover 0 55 2.944439 0.000000 329 +principl 0 48 3.044522 0.000000 357 +set 0 50 3.044522 0.000000 361 +adapt 0 46 3.091042 0.000000 387 +discuss 0 45 3.135494 0.000000 399 +tutori 0 39 3.258097 0.000000 437 +transact 0 39 3.258097 0.000000 438 +theoret 0 39 3.258097 0.000000 446 +open 0 38 3.295837 0.000000 469 +workstat 0 37 3.332205 0.000000 479 +ofth 0 36 3.367296 0.000000 491 +global 0 34 3.401197 0.000000 520 +concurr 0 34 3.401197 0.000000 501 +product 0 33 3.433987 0.000000 527 +richard 0 31 3.496508 0.000000 559 +multiprocessor 0 28 3.610918 0.000000 605 +measur 0 28 3.610918 0.000000 609 +univ 0 28 3.610918 0.000000 617 +proc 0 26 3.688879 0.000000 649 +strategi 0 25 3.737670 0.000000 682 +supercomput 0 25 3.737670 0.000000 681 +ofwashington 0 22 3.850148 0.000000 766 +william 0 22 3.850148 0.000000 765 +programminglanguag 0 21 3.912023 0.000000 782 +alloc 0 20 3.951244 0.000000 821 +smith 0 20 3.951244 0.000000 820 +runtim 0 19 4.007333 0.000000 858 +eric 0 19 4.007333 0.000000 870 +anderson 0 19 4.007333 0.000000 860 +thoma 0 18 4.060443 0.000000 901 +scott 0 18 4.060443 0.000000 884 +expand 0 17 4.110874 0.000000 928 +miller 0 17 4.110874 0.000000 949 +asplo 0 17 4.110874 0.000000 948 +partit 0 16 4.174387 0.000000 984 +jose 0 16 4.174387 0.000000 976 +demand 0 14 4.317488 0.000000 1073 +conf 0 13 4.382027 0.000000 1181 +sigmetr 0 13 4.382027 0.000000 1173 +coordin 0 13 4.382027 0.000000 1182 +karlin 0 13 4.382027 0.000000 1176 +workload 0 12 4.465908 0.000000 1210 +mari 0 12 4.465908 0.000000 1266 +gupta 0 12 4.465908 0.000000 1241 +kenneth 0 12 4.465908 0.000000 1265 +characterist 0 12 4.465908 0.000000 1257 +philadelphia 0 12 4.465908 0.000000 1244 +bill 0 11 4.553877 0.000000 1297 +impact 0 11 4.553877 0.000000 1334 +wood 0 11 4.553877 0.000000 1355 +santa 0 10 4.653960 0.000000 1441 +ofcomput 0 10 4.653960 0.000000 1442 +patterson 0 9 4.753590 0.000000 1554 +vernon 0 9 4.753590 0.000000 1556 +job 0 8 4.875197 0.000000 1702 +migrat 0 7 5.010635 0.000000 1851 +burger 0 7 5.010635 0.000000 1889 +multiprogram 0 6 5.164786 0.000000 2010 +chandra 0 6 5.164786 0.000000 2091 +symp 0 5 5.347108 0.000000 2376 +ousterhout 0 5 5.347108 0.000000 2301 +leblanc 0 5 5.347108 0.000000 2377 +affin 0 5 5.347108 0.000000 2378 +parallelprogram 0 5 5.347108 0.000000 2379 +ofparallel 0 5 5.347108 0.000000 2380 +culler 0 5 5.347108 0.000000 2381 +hyder 0 4 5.568345 0.000000 2772 +anoop 0 4 5.568345 0.000000 2770 +identif 0 4 5.568345 0.000000 2773 +barbara 0 3 5.857933 0.000000 3380 +ipp 0 3 5.857933 0.000000 3381 +dusseau 0 3 5.857933 0.000000 3382 +nguyen 0 3 5.857933 0.000000 3290 +zahorjan 0 3 5.857933 0.000000 3383 +mccann 0 3 5.857933 0.000000 3273 +tran 0 3 5.857933 0.000000 3384 +saltz 0 3 5.857933 0.000000 3385 +am 0 3 5.857933 0.000000 3386 +patrick 0 3 5.857933 0.000000 3334 +weihl 0 3 5.857933 0.000000 3284 +tradeoff 0 3 5.857933 0.000000 3387 +parson 0 2 6.263398 0.000000 4528 +memorymultiprocessor 0 2 6.263398 0.000000 4529 +gang 0 2 6.263398 0.000000 4530 +inrd 0 2 6.263398 0.000000 4531 +andsequenti 0 2 6.263398 0.000000 4532 +tucker 0 2 6.263398 0.000000 4307 +shun 0 2 6.263398 0.000000 4533 +leung 0 2 6.263398 0.000000 4534 +han 0 2 6.263398 0.000000 4535 +agraw 0 2 6.263398 0.000000 4536 +derek 0 2 6.263398 0.000000 4537 +bunt 0 2 6.263398 0.000000 4308 +rosenblum 0 2 6.263398 0.000000 4314 +tera 0 2 6.263398 0.000000 4224 +computersystem 0 2 6.263398 0.000000 4360 +arpaci 0 1 6.957497 0.000000 8345 +vaswani 0 1 6.957497 0.000000 8346 +sevcik 0 1 6.957497 0.000000 8347 +feitelson 0 1 6.957497 0.000000 8348 +coschedul 0 1 6.957497 0.000000 8349 +mvmv 0 1 6.957497 0.000000 8350 +systemsprofessor 0 1 6.957497 0.000000 8351 +vernontim 0 1 6.957497 0.000000 8352 +pmlocat 0 1 6.957497 0.000000 8353 +now 0 1 6.957497 0.000000 8354 +vahdat 0 1 6.957497 0.000000 8355 +equi 0 1 6.957497 0.000000 8356 +issuesfor 0 1 6.957497 0.000000 8357 +workloadcharacterist 0 1 6.957497 0.000000 8358 +evangelo 0 1 6.957497 0.000000 8359 +markato 0 1 6.957497 0.000000 8360 +loopschedul 0 1 6.957497 0.000000 8361 +iniee 0 1 6.957497 0.000000 8362 +zima 0 1 6.957497 0.000000 8363 +chapman 0 1 6.957497 0.000000 8364 +edjlali 0 1 6.957497 0.000000 8365 +sussman 0 1 6.957497 0.000000 8366 +comparisonsshikharesh 0 1 6.957497 0.000000 8367 +majumdar 0 1 6.957497 0.000000 8368 +eager 0 1 6.957497 0.000000 8369 +variabilityservic 0 1 6.957497 0.000000 8370 +dror 0 1 6.957497 0.000000 8371 +nitzberg 0 1 6.957497 0.000000 8372 +thenasa 0 1 6.957497 0.000000 8373 +ipsc 0 1 6.957497 0.000000 8374 +leutenegg 0 1 6.957497 0.000000 8375 +sobalvarro 0 1 6.957497 0.000000 8376 +rohit 0 1 6.957497 0.000000 8377 +devin 0 1 6.957497 0.000000 8378 +verghes 0 1 6.957497 0.000000 8379 +mendel 0 1 6.957497 0.000000 8380 +multiprocessorcomput 0 1 6.957497 0.000000 8381 +alverson 0 1 6.957497 0.000000 8382 +kahan 0 1 6.957497 0.000000 8383 +korri 0 1 6.957497 0.000000 8384 +effectivedistribut 0 1 6.957497 0.000000 8385 +rudolph 0 1 6.957497 0.000000 8386 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..214aba50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +algorithm 0 162 1.791759 0.000000 57 +file 0 132 1.945910 0.000000 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +format 1 48 3.044522 3.044522 356 +fast 0 42 3.218876 0.000000 429 +slide 0 38 3.295837 0.000000 467 +origin 0 38 3.295837 0.000000 472 +winter 0 36 3.367296 0.000000 500 +print 0 34 3.401197 0.000000 503 +titl 0 31 3.496508 0.000000 556 +richard 0 31 3.496508 0.000000 559 +usual 0 28 3.610918 0.000000 608 +load 0 28 3.610918 0.000000 601 +administr 0 27 3.637586 0.000000 628 +linux 0 27 3.637586 0.000000 631 +viewer 0 21 3.912023 0.000000 787 +martin 0 21 3.912023 0.000000 794 +latest 0 21 3.912023 0.000000 785 +render 0 17 4.110874 0.000000 947 +biologi 0 15 4.248495 0.000000 1049 +draft 0 14 4.317488 0.000000 1085 +latex 0 14 4.317488 0.000000 1064 +translat 0 13 4.382027 0.000000 1164 +larri 0 13 4.382027 0.000000 1142 +readabl 0 12 4.465908 0.000000 1258 +adob 0 7 5.010635 0.000000 1873 +molecular 0 7 5.010635 0.000000 1887 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +legibl 0 7 5.010635 0.000000 1866 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 0 5 5.347108 0.000000 2345 +karp 0 5 5.347108 0.000000 2284 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +tompaclass 0 3 5.857933 0.000000 3310 +faith 0 3 5.857933 0.000000 3363 +bboard 0 1 6.957497 0.000000 8227 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..e2fd91cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +paper 1 205 1.609438 1.609438 38 +list 0 201 1.609438 0.000000 39 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +machin 0 129 2.079442 0.000000 95 +report 0 131 2.079442 0.000000 92 +welcom 0 122 2.079442 0.000000 99 +send 0 114 2.197225 0.000000 109 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +member 0 84 2.484907 0.000000 165 +start 0 83 2.484907 0.000000 173 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +tuesdai 0 73 2.639057 0.000000 219 +line 0 75 2.639057 0.000000 231 +meet 0 72 2.639057 0.000000 229 +workshop 0 71 2.639057 0.000000 239 +summari 0 73 2.639057 0.000000 237 +intellig 0 72 2.639057 0.000000 225 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +previou 0 62 2.772589 0.000000 290 +copi 0 63 2.772589 0.000000 284 +prof 0 64 2.772589 0.000000 273 +juli 0 60 2.833213 0.000000 305 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +processor 0 54 2.944439 0.000000 335 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +week 0 52 2.995732 0.000000 343 +case 0 51 2.995732 0.000000 351 +format 0 48 3.044522 0.000000 356 +quarter 0 47 3.091042 0.000000 389 +discuss 0 45 3.135494 0.000000 399 +execut 0 45 3.135494 0.000000 404 +might 0 41 3.218876 0.000000 426 +continu 0 39 3.258097 0.000000 448 +author 0 39 3.258097 0.000000 450 +credit 0 38 3.295837 0.000000 460 +slide 0 38 3.295837 0.000000 467 +formal 0 37 3.332205 0.000000 478 +ofth 0 36 3.367296 0.000000 491 +short 0 36 3.367296 0.000000 499 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +jame 0 35 3.401197 0.000000 507 +posit 0 31 3.496508 0.000000 552 +progress 0 28 3.610918 0.000000 598 +usual 0 28 3.610918 0.000000 608 +load 0 28 3.610918 0.000000 601 +subject 0 26 3.688879 0.000000 647 +valu 0 25 3.737670 0.000000 665 +lead 0 23 3.806662 0.000000 718 +begin 0 23 3.806662 0.000000 716 +thank 0 23 3.806662 0.000000 721 +variabl 0 23 3.806662 0.000000 715 +chip 0 21 3.912023 0.000000 770 +leav 0 21 3.912023 0.000000 772 +anderson 0 19 4.007333 0.000000 860 +predict 0 19 4.007333 0.000000 855 +miss 0 19 4.007333 0.000000 866 +asplo 0 17 4.110874 0.000000 948 +brown 0 16 4.174387 0.000000 977 +micro 0 15 4.248495 0.000000 1031 +hopefulli 0 14 4.317488 0.000000 1071 +sai 0 13 4.382027 0.000000 1175 +rememb 0 12 4.465908 0.000000 1217 +literatur 0 11 4.553877 0.000000 1300 +isca 0 11 4.553877 0.000000 1354 +baer 0 11 4.553877 0.000000 1353 +jean 0 10 4.653960 0.000000 1440 +subscrib 0 9 4.753590 0.000000 1541 +herefor 0 9 4.753590 0.000000 1483 +wall 0 9 4.753590 0.000000 1553 +patterson 0 9 4.753590 0.000000 1554 +readm 0 8 4.875197 0.000000 1699 +burger 0 7 5.010635 0.000000 1889 +ruth 0 7 5.010635 0.000000 1870 +multicomput 0 7 5.010635 0.000000 1890 +goodman 0 7 5.010635 0.000000 1891 +fortun 0 7 5.010635 0.000000 1872 +tobe 0 6 5.164786 0.000000 1995 +ifyou 0 6 5.164786 0.000000 1992 +majordomo 0 6 5.164786 0.000000 2066 +philipos 0 5 5.347108 0.000000 2373 +appreci 0 5 5.347108 0.000000 2374 +volunt 0 5 5.347108 0.000000 2307 +andrea 0 5 5.347108 0.000000 2375 +lunch 0 3 5.857933 0.000000 3369 +shen 0 3 5.857933 0.000000 3370 +pong 0 3 5.857933 0.000000 3371 +stefano 0 3 5.857933 0.000000 3372 +kaxira 0 3 5.857933 0.000000 3373 +yelick 0 3 5.857933 0.000000 3374 +shortli 0 3 5.857933 0.000000 3375 +heat 0 2 6.263398 0.000000 4113 +gershoni 0 2 6.263398 0.000000 4513 +matthai 0 2 6.263398 0.000000 4514 +tabular 0 2 6.263398 0.000000 4515 +guru 0 2 6.263398 0.000000 4476 +wilkerson 0 2 6.263398 0.000000 4516 +dalli 0 2 6.263398 0.000000 4517 +datascalar 0 2 6.263398 0.000000 4518 +spsd 0 2 6.263398 0.000000 4519 +iram 0 2 6.263398 0.000000 4520 +cseg 0 1 6.957497 0.000000 8276 +lunchcs 0 1 6.957497 0.000000 8277 +lunchcours 0 1 6.957497 0.000000 8278 +loupbaermeet 0 1 6.957497 0.000000 8279 +withalmost 0 1 6.957497 0.000000 8280 +discussedat 0 1 6.957497 0.000000 8281 +byesteem 0 1 6.957497 0.000000 8282 +mostlyw 0 1 6.957497 0.000000 8283 +discussionson 0 1 6.957497 0.000000 8284 +quartersi 0 1 6.957497 0.000000 8285 +fromparticip 0 1 6.957497 0.000000 8286 +oncrit 0 1 6.957497 0.000000 8287 +hereread 0 1 6.957497 0.000000 8288 +morethem 0 1 6.957497 0.000000 8289 +molli 0 1 6.957497 0.000000 8290 +thestud 0 1 6.957497 0.000000 8291 +informallyor 0 1 6.957497 0.000000 8292 +lipasti 0 1 6.957497 0.000000 8293 +advanceprogrami 0 1 6.957497 0.000000 8294 +thesaulsburi 0 1 6.957497 0.000000 8295 +readashlei 0 1 6.957497 0.000000 8296 +saulsburi 0 1 6.957497 0.000000 8297 +fong 0 1 6.957497 0.000000 8298 +nowatzyk 0 1 6.957497 0.000000 8299 +fillo 0 1 6.957497 0.000000 8300 +keckler 0 1 6.957497 0.000000 8301 +machinelink 0 1 6.957497 0.000000 8302 +readdoug 0 1 6.957497 0.000000 8303 +neton 0 1 6.957497 0.000000 8304 +cardwel 0 1 6.957497 0.000000 8305 +fromm 0 1 6.957497 0.000000 8306 +keeton 0 1 6.957497 0.000000 8307 +kozyraki 0 1 6.957497 0.000000 8308 +thomasand 0 1 6.957497 0.000000 8309 +availableher 0 1 6.957497 0.000000 8310 +themajordomo 0 1 6.957497 0.000000 8311 +shouldinclud 0 1 6.957497 0.000000 8312 +lineblank 0 1 6.957497 0.000000 8313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..635ef698 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +cours 0 273 1.098612 0.000000 15 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +spring 0 131 2.079442 0.000000 88 +interact 0 62 2.772589 0.000000 270 +offer 0 43 3.178054 0.000000 414 +human 0 32 3.465736 0.000000 546 +pagecs 0 26 3.688879 0.000000 658 +experiment 0 26 3.688879 0.000000 645 +born 0 21 3.912023 0.000000 798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..09e7d4ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +welcom 0 122 2.079442 0.000000 99 +specif 0 106 2.197225 0.000000 106 +send 0 114 2.197225 0.000000 109 +memori 0 101 2.302585 0.000000 139 +second 0 81 2.484907 0.000000 166 +optim 0 79 2.564949 0.000000 197 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +line 0 75 2.639057 0.000000 231 +meet 0 72 2.639057 0.000000 229 +practic 0 70 2.708050 0.000000 246 +organ 0 65 2.772589 0.000000 265 +wednesdai 0 64 2.772589 0.000000 261 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +special 0 56 2.890372 0.000000 320 +week 1 52 2.995732 2.995732 343 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +cost 0 37 3.332205 0.000000 480 +represent 0 35 3.401197 0.000000 512 +michael 0 35 3.401197 0.000000 514 +graph 0 30 3.555348 0.000000 576 +depend 0 29 3.583519 0.000000 583 +framework 0 28 3.610918 0.000000 606 +arrai 0 27 3.637586 0.000000 627 +subject 0 26 3.688879 0.000000 647 +valu 0 25 3.737670 0.000000 665 +flow 0 24 3.761200 0.000000 700 +leav 0 21 3.912023 0.000000 772 +alloc 0 20 3.951244 0.000000 821 +anderson 0 19 4.007333 0.000000 860 +andrew 0 19 4.007333 0.000000 849 +offici 0 18 4.060443 0.000000 894 +thoma 0 18 4.060443 0.000000 901 +regist 0 17 4.110874 0.000000 938 +analyz 0 17 4.110874 0.000000 925 +georg 0 16 4.174387 0.000000 994 +susan 0 15 4.248495 0.000000 1050 +todd 0 15 4.248495 0.000000 1051 +floor 0 14 4.317488 0.000000 1070 +dean 0 14 4.317488 0.000000 1104 +charl 0 13 4.382027 0.000000 1149 +sai 0 13 4.382027 0.000000 1175 +gupta 0 12 4.465908 0.000000 1241 +loew 0 12 4.465908 0.000000 1252 +iter 0 12 4.465908 0.000000 1206 +kenneth 0 12 4.465908 0.000000 1265 +grant 0 12 4.465908 0.000000 1216 +minimum 0 9 4.753590 0.000000 1555 +strength 0 9 4.753590 0.000000 1494 +subscrib 0 9 4.753590 0.000000 1541 +paradigm 0 8 4.875197 0.000000 1662 +lewi 0 8 4.875197 0.000000 1700 +erik 0 8 4.875197 0.000000 1701 +grove 0 8 4.875197 0.000000 1675 +roger 0 7 5.010635 0.000000 1892 +multicomput 0 7 5.010635 0.000000 1890 +fischer 0 7 5.010635 0.000000 1893 +reduct 0 7 5.010635 0.000000 1877 +ruth 0 7 5.010635 0.000000 1870 +banerje 0 6 5.164786 0.000000 2018 +mock 0 6 5.164786 0.000000 2087 +tullsen 0 6 5.164786 0.000000 2081 +majordomo 0 6 5.164786 0.000000 2066 +bottleneck 0 4 5.568345 0.000000 2769 +anoop 0 4 5.568345 0.000000 2770 +interprocedur 0 4 5.568345 0.000000 2771 +noel 0 3 5.857933 0.000000 3376 +crew 0 3 5.857933 0.000000 3347 +litvinov 0 3 5.857933 0.000000 3343 +garrett 0 3 5.857933 0.000000 3377 +jen 0 3 5.857933 0.000000 3378 +blank 0 3 5.857933 0.000000 3379 +seminarcs 0 2 6.263398 0.000000 4521 +eggersand 0 2 6.263398 0.000000 4522 +francoi 0 2 6.263398 0.000000 4523 +taxat 0 2 6.263398 0.000000 4524 +ernst 0 2 6.263398 0.000000 4525 +secoski 0 2 6.263398 0.000000 4526 +lazi 0 2 6.263398 0.000000 4527 +seminarcours 0 1 6.957497 0.000000 8314 +craigchambersmeet 0 1 6.957497 0.000000 8315 +butreal 0 1 6.957497 0.000000 8316 +atrium 0 1 6.957497 0.000000 8317 +scheduleweek 0 1 6.957497 0.000000 8318 +memspi 0 1 6.957497 0.000000 8319 +margaretmartonosi 0 1 6.957497 0.000000 8320 +consel 0 1 6.957497 0.000000 8321 +itsus 0 1 6.957497 0.000000 8322 +evelyn 0 1 6.957497 0.000000 8323 +duesterwald 0 1 6.957497 0.000000 8324 +rajiv 0 1 6.957497 0.000000 8325 +maryl 0 1 6.957497 0.000000 8326 +soffa 0 1 6.957497 0.000000 8327 +danielweis 0 1 6.957497 0.000000 8328 +bjarn 0 1 6.957497 0.000000 8329 +steensgaard 0 1 6.957497 0.000000 8330 +coalesc 0 1 6.957497 0.000000 8331 +appel 0 1 6.957497 0.000000 8332 +hooverand 0 1 6.957497 0.000000 8333 +zadeck 0 1 6.957497 0.000000 8334 +byprivthviraj 0 1 6.957497 0.000000 8335 +stevenkurland 0 1 6.957497 0.000000 8336 +knoblock 0 1 6.957497 0.000000 8337 +knoop 0 1 6.957497 0.000000 8338 +oliv 0 1 6.957497 0.000000 8339 +andbernhard 0 1 6.957497 0.000000 8340 +steffen 0 1 6.957497 0.000000 8341 +subscribecsek 0 1 6.957497 0.000000 8342 +shortlyrec 0 1 6.957497 0.000000 8343 +melodi 0 1 6.957497 0.000000 8344 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..f80f6b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +tool 0 117 2.079442 0.000000 93 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +environ 0 84 2.484907 0.000000 177 +librari 0 87 2.484907 0.000000 181 +larg 0 82 2.484907 0.000000 168 +optim 0 79 2.564949 0.000000 197 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +orient 0 80 2.564949 0.000000 205 +effici 0 73 2.639057 0.000000 233 +tuesdai 0 73 2.639057 0.000000 219 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +septemb 0 65 2.772589 0.000000 274 +local 0 55 2.944439 0.000000 334 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +quarter 0 47 3.091042 0.000000 389 +discuss 0 45 3.135494 0.000000 399 +cach 0 41 3.218876 0.000000 432 +live 0 40 3.258097 0.000000 451 +seminar 0 38 3.295837 0.000000 470 +open 0 38 3.295837 0.000000 469 +expect 0 37 3.332205 0.000000 484 +global 0 34 3.401197 0.000000 520 +task 0 25 3.737670 0.000000 678 +strategi 0 25 3.737670 0.000000 682 +supercomput 0 25 3.737670 0.000000 681 +scalabl 0 24 3.761200 0.000000 705 +input 0 23 3.806662 0.000000 727 +thread 0 23 3.806662 0.000000 722 +cooper 0 22 3.850148 0.000000 757 +output 0 21 3.912023 0.000000 788 +util 0 21 3.912023 0.000000 774 +exploit 0 20 3.951244 0.000000 836 +portabl 0 20 3.951244 0.000000 819 +eric 0 19 4.007333 0.000000 870 +runtim 0 19 4.007333 0.000000 858 +attend 0 18 4.060443 0.000000 893 +debug 0 17 4.110874 0.000000 944 +sign 0 16 4.174387 0.000000 970 +driven 0 15 4.248495 0.000000 1048 +stream 0 15 4.248495 0.000000 1015 +matlab 0 14 4.317488 0.000000 1081 +hopefulli 0 14 4.317488 0.000000 1071 +block 0 13 4.382027 0.000000 1183 +everyon 0 13 4.382027 0.000000 1148 +bodi 0 13 4.382027 0.000000 1178 +gupta 0 12 4.465908 0.000000 1241 +characterist 0 12 4.465908 0.000000 1257 +iter 0 12 4.465908 0.000000 1206 +loew 0 12 4.465908 0.000000 1252 +holidai 0 12 4.465908 0.000000 1224 +brad 0 12 4.465908 0.000000 1264 +loop 0 11 4.553877 0.000000 1310 +multithread 0 11 4.553877 0.000000 1315 +subscrib 0 9 4.753590 0.000000 1541 +rel 0 9 4.753590 0.000000 1487 +cross 0 8 4.875197 0.000000 1703 +reus 0 8 4.875197 0.000000 1661 +pldi 0 8 4.875197 0.000000 1704 +sean 0 8 4.875197 0.000000 1705 +core 0 7 5.010635 0.000000 1809 +spot 0 7 5.010635 0.000000 1894 +ruth 0 7 5.010635 0.000000 1870 +banerje 0 6 5.164786 0.000000 2018 +sung 0 6 5.164786 0.000000 2075 +reed 0 6 5.164786 0.000000 2086 +majordomo 0 6 5.164786 0.000000 2066 +zhou 0 6 5.164786 0.000000 2092 +icpp 0 5 5.347108 0.000000 2382 +cyclic 0 5 5.347108 0.000000 2383 +ppopp 0 4 5.568345 0.000000 2774 +choi 0 4 5.568345 0.000000 2732 +restructur 0 4 5.568345 0.000000 2775 +randal 0 4 5.568345 0.000000 2776 +ipp 0 3 5.857933 0.000000 3381 +atmospher 0 3 5.857933 0.000000 3388 +andwil 0 3 5.857933 0.000000 3335 +thepap 0 3 5.857933 0.000000 3254 +jason 0 3 5.857933 0.000000 3389 +blumof 0 3 5.857933 0.000000 3237 +foster 0 3 5.857933 0.000000 3159 +lcpc 0 2 6.263398 0.000000 4538 +kennedi 0 2 6.263398 0.000000 4539 +adv 0 2 6.263398 0.000000 4540 +chien 0 2 6.263398 0.000000 4541 +casual 0 2 6.263398 0.000000 4542 +subscribeto 0 2 6.263398 0.000000 4543 +deros 0 2 6.263398 0.000000 4474 +padua 0 2 6.263398 0.000000 4544 +kale 0 2 6.263398 0.000000 4545 +cilk 0 2 6.263398 0.000000 4242 +fritzson 0 2 6.263398 0.000000 4546 +potpourri 0 2 6.263398 0.000000 4547 +scalapack 0 1 6.957497 0.000000 8387 +ramaswami 0 1 6.957497 0.000000 8388 +hodg 0 1 6.957497 0.000000 8389 +mcintosh 0 1 6.957497 0.000000 8390 +chakarabarti 0 1 6.957497 0.000000 8391 +integer 0 1 6.957497 0.000000 8392 +crandal 0 1 6.957497 0.000000 8393 +aydt 0 1 6.957497 0.000000 8394 +bordawekar 0 1 6.957497 0.000000 8395 +choudahari 0 1 6.957497 0.000000 8396 +koelbel 0 1 6.957497 0.000000 8397 +paleczni 0 1 6.957497 0.000000 8398 +midkiff 0 1 6.957497 0.000000 8399 +fahring 0 1 6.957497 0.000000 8400 +hain 0 1 6.957497 0.000000 8401 +mehrotra 0 1 6.957497 0.000000 8402 +environmentslarri 0 1 6.957497 0.000000 8403 +snyderautumn 0 1 6.957497 0.000000 8404 +ten 0 1 6.957497 0.000000 8405 +ignit 0 1 6.957497 0.000000 8406 +hurri 0 1 6.957497 0.000000 8407 +cseo 0 1 6.957497 0.000000 8408 +datepaperpresentor 0 1 6.957497 0.000000 8409 +falcon 0 1 6.957497 0.000000 8410 +gallivan 0 1 6.957497 0.000000 8411 +gallopoulo 0 1 6.957497 0.000000 8412 +marsolf 0 1 6.957497 0.000000 8413 +ramkumar 0 1 6.957497 0.000000 8414 +forb 0 1 6.957497 0.000000 8415 +gotwal 0 1 6.957497 0.000000 8416 +sriniva 0 1 6.957497 0.000000 8417 +gannon 0 1 6.957497 0.000000 8418 +joerg 0 1 6.957497 0.000000 8419 +kuszmaul 0 1 6.957497 0.000000 8420 +leiserson 0 1 6.957497 0.000000 8421 +andersson 0 1 6.957497 0.000000 8422 +realign 0 1 6.957497 0.000000 8423 +kamachi 0 1 6.957497 0.000000 8424 +kusano 0 1 6.957497 0.000000 8425 +suehiro 0 1 6.957497 0.000000 8426 +tamura 0 1 6.957497 0.000000 8427 +sakon 0 1 6.957497 0.000000 8428 +rinard 0 1 6.957497 0.000000 8429 +abramson 0 1 6.957497 0.000000 8430 +michalak 0 1 6.957497 0.000000 8431 +sosic 0 1 6.957497 0.000000 8432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..81453212 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +mail 0 238 1.386294 0.000000 22 +list 0 201 1.609438 0.000000 39 +spring 0 131 2.079442 0.000000 88 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +messag 0 76 2.564949 0.000000 212 +line 0 75 2.639057 0.000000 231 +variou 0 56 2.890372 0.000000 317 +summer 0 56 2.890372 0.000000 311 +week 0 52 2.995732 0.000000 343 +seminar 0 38 3.295837 0.000000 470 +winter 0 36 3.367296 0.000000 500 +autumn 0 31 3.496508 0.000000 558 +request 0 26 3.688879 0.000000 635 +alreadi 0 16 4.174387 0.000000 963 +bodi 0 13 4.382027 0.000000 1178 +web 0 12 4.465908 0.000000 1249 +subscrib 0 9 4.753590 0.000000 1541 +bit 0 7 5.010635 0.000000 1833 +crucial 0 5 5.347108 0.000000 2384 +ofinform 0 4 5.568345 0.000000 2707 +cancel 0 4 5.568345 0.000000 2746 +preliminariesif 0 1 6.957497 0.000000 8433 +besent 0 1 6.957497 0.000000 8434 +systemsin 0 1 6.957497 0.000000 8435 +quarterli 0 1 6.957497 0.000000 8436 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..d29da919 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +file 0 132 1.945910 0.000000 70 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +present 1 91 2.397895 2.397895 145 +appear 0 78 2.564949 0.000000 210 +meet 0 72 2.639057 0.000000 229 +symposium 0 72 2.639057 0.000000 238 +interact 0 62 2.772589 0.000000 270 +virtual 0 62 2.772589 0.000000 285 +summer 0 56 2.890372 0.000000 311 +principl 0 48 3.044522 0.000000 357 +quarter 0 47 3.091042 0.000000 389 +fridai 0 44 3.135494 0.000000 390 +discuss 0 45 3.135494 0.000000 399 +cach 0 41 3.218876 0.000000 432 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +storag 0 31 3.496508 0.000000 553 +cluster 0 28 3.610918 0.000000 612 +mobil 0 23 3.806662 0.000000 730 +exploit 0 20 3.951244 0.000000 836 +log 0 19 4.007333 0.000000 857 +anderson 0 19 4.007333 0.000000 860 +hierarch 0 15 4.248495 0.000000 1018 +coher 0 14 4.317488 0.000000 1109 +weak 0 13 4.382027 0.000000 1159 +loew 0 12 4.465908 0.000000 1252 +impact 0 11 4.553877 0.000000 1334 +sosp 0 10 4.653960 0.000000 1416 +franklin 0 10 4.653960 0.000000 1436 +voelker 0 9 4.753590 0.000000 1557 +romer 0 8 4.875197 0.000000 1706 +feelei 0 7 5.010635 0.000000 1859 +chan 0 7 5.010635 0.000000 1876 +trend 0 7 5.010635 0.000000 1842 +wolman 0 6 5.164786 0.000000 2093 +tiwari 0 5 5.347108 0.000000 2385 +philipos 0 5 5.347108 0.000000 2373 +wewil 0 4 5.568345 0.000000 2688 +savag 0 4 5.568345 0.000000 2777 +serverless 0 3 5.857933 0.000000 3181 +litvinov 0 3 5.857933 0.000000 3343 +fiuczynski 0 3 5.857933 0.000000 3390 +wilk 0 2 6.263398 0.000000 4548 +hypervisor 0 2 6.263398 0.000000 4549 +sriram 0 2 6.263398 0.000000 4550 +quarterw 0 1 6.957497 0.000000 8437 +upcomingacm 0 1 6.957497 0.000000 8438 +havean 0 1 6.957497 0.000000 8439 +scheduleoct 0 1 6.957497 0.000000 8440 +autoraid 0 1 6.957497 0.000000 8441 +montgomeri 0 1 6.957497 0.000000 8442 +stackabl 0 1 6.957497 0.000000 8443 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..dbde7849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +high 0 130 2.079442 0.000000 101 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +send 0 114 2.197225 0.000000 109 +pleas 0 113 2.197225 0.000000 114 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +follow 0 92 2.397895 0.000000 143 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +learn 0 86 2.484907 0.000000 170 +messag 0 76 2.564949 0.000000 212 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +write 0 72 2.639057 0.000000 222 +materi 0 75 2.639057 0.000000 221 +appli 0 71 2.639057 0.000000 226 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +import 0 65 2.772589 0.000000 282 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +faculti 0 56 2.890372 0.000000 325 +scientif 0 53 2.944439 0.000000 341 +allow 0 53 2.944439 0.000000 333 +cover 0 55 2.944439 0.000000 329 +found 0 53 2.944439 0.000000 337 +run 0 51 2.995732 0.000000 347 +quarter 0 47 3.091042 0.000000 389 +effect 0 46 3.091042 0.000000 385 +fast 0 42 3.218876 0.000000 429 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +close 0 38 3.295837 0.000000 465 +credit 0 38 3.295837 0.000000 460 +workstat 0 37 3.332205 0.000000 479 +staff 0 36 3.367296 0.000000 490 +ad 0 32 3.465736 0.000000 544 +scientist 0 31 3.496508 0.000000 560 +rang 0 30 3.555348 0.000000 565 +semant 0 29 3.583519 0.000000 587 +platform 0 29 3.583519 0.000000 591 +releas 0 28 3.610918 0.000000 616 +arrai 0 27 3.637586 0.000000 627 +supercomput 0 25 3.737670 0.000000 681 +variabl 0 23 3.806662 0.000000 715 +dai 0 22 3.850148 0.000000 753 +exploit 0 20 3.951244 0.000000 836 +kernel 0 20 3.951244 0.000000 825 +assum 0 19 4.007333 0.000000 845 +account 0 18 4.060443 0.000000 882 +previous 0 17 4.110874 0.000000 923 +debug 0 17 4.110874 0.000000 944 +whole 0 17 4.110874 0.000000 940 +modern 0 16 4.174387 0.000000 966 +fortran 0 15 4.248495 0.000000 1027 +easili 0 14 4.317488 0.000000 1077 +matlab 0 14 4.317488 0.000000 1081 +bodi 0 13 4.382027 0.000000 1178 +block 0 13 4.382027 0.000000 1183 +loew 0 12 4.465908 0.000000 1252 +grant 0 12 4.465908 0.000000 1216 +loop 0 11 4.553877 0.000000 1310 +faster 0 11 4.553877 0.000000 1323 +reli 0 10 4.653960 0.000000 1411 +subscrib 0 9 4.753590 0.000000 1541 +suitabl 0 9 4.753590 0.000000 1486 +informationabout 0 9 4.753590 0.000000 1515 +elimin 0 9 4.753590 0.000000 1558 +simpli 0 8 4.875197 0.000000 1626 +migrat 0 7 5.010635 0.000000 1851 +largest 0 7 5.010635 0.000000 1858 +majordomo 0 6 5.164786 0.000000 2066 +syntax 0 6 5.164786 0.000000 2030 +sung 0 6 5.164786 0.000000 2075 +snyder 0 5 5.347108 0.000000 2359 +toth 0 4 5.568345 0.000000 2595 +ncsa 0 4 5.568345 0.000000 2767 +choi 0 4 5.568345 0.000000 2732 +audit 0 3 5.857933 0.000000 3391 +disciplin 0 3 5.857933 0.000000 3392 +informationcours 0 3 5.857933 0.000000 3167 +subscribeto 0 2 6.263398 0.000000 4543 +inner 0 2 6.263398 0.000000 4551 +zphigh 0 1 6.957497 0.000000 8444 +zpllarri 0 1 6.957497 0.000000 8445 +teamautumn 0 1 6.957497 0.000000 8446 +csezpl 0 1 6.957497 0.000000 8447 +usersmail 0 1 6.957497 0.000000 8448 +librarai 0 1 6.957497 0.000000 8449 +relatedinform 0 1 6.957497 0.000000 8450 +descriptionzpl 0 1 6.957497 0.000000 8451 +scientificprogram 0 1 6.957497 0.000000 8452 +infortran 0 1 6.957497 0.000000 8453 +dramaticallysimplifi 0 1 6.957497 0.000000 8454 +nuisanc 0 1 6.957497 0.000000 8455 +andtrivi 0 1 6.957497 0.000000 8456 +byrecompil 0 1 6.957497 0.000000 8457 +wysiwyg 0 1 6.957497 0.000000 8458 +booknon 0 1 6.957497 0.000000 8459 +onin 0 1 6.957497 0.000000 8460 +zplprogram 0 1 6.957497 0.000000 8461 +prerequisitesfamiliar 0 1 6.957497 0.000000 8462 +ormatlab 0 1 6.957497 0.000000 8463 +remotezpl 0 1 6.957497 0.000000 8464 +compileroth 0 1 6.957497 0.000000 8465 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..cf9a1646 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +instructor 0 108 2.197225 0.000000 107 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +version 0 113 2.197225 0.000000 122 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +exam 0 86 2.484907 0.000000 169 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 0 71 2.639057 0.000000 230 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +previou 0 62 2.772589 0.000000 290 +publish 0 57 2.890372 0.000000 326 +think 0 57 2.890372 0.000000 314 +maintain 0 51 2.995732 0.000000 342 +digit 0 52 2.995732 0.000000 348 +frequent 0 49 3.044522 0.000000 367 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +even 0 45 3.135494 0.000000 393 +announc 0 40 3.258097 0.000000 441 +author 0 39 3.258097 0.000000 450 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +go 0 33 3.433987 0.000000 529 +ad 0 32 3.465736 0.000000 544 +collabor 0 32 3.465736 0.000000 543 +autumn 0 31 3.496508 0.000000 558 +quot 0 29 3.583519 0.000000 582 +administr 0 27 3.637586 0.000000 628 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +tell 0 21 3.912023 0.000000 777 +anderson 0 19 4.007333 0.000000 860 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +whole 0 17 4.110874 0.000000 940 +weekli 0 17 4.110874 0.000000 919 +weslei 0 16 4.174387 0.000000 983 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +anonym 0 14 4.317488 0.000000 1100 +reprint 0 14 4.317488 0.000000 1097 +everyon 0 13 4.382027 0.000000 1148 +quizz 0 13 4.382027 0.000000 1151 +addison 0 12 4.465908 0.000000 1230 +workload 0 12 4.465908 0.000000 1210 +overal 0 12 4.465908 0.000000 1254 +duli 0 12 4.465908 0.000000 1248 +benjamin 0 11 4.553877 0.000000 1296 +evolut 0 11 4.553877 0.000000 1314 +nonprofit 0 11 4.553877 0.000000 1339 +cheat 0 10 4.653960 0.000000 1395 +desir 0 9 4.753590 0.000000 1542 +cum 0 8 4.875197 0.000000 1619 +bunch 0 7 5.010635 0.000000 1861 +gaetano 0 6 5.164786 0.000000 2068 +borriello 0 5 5.347108 0.000000 2349 +corei 0 4 5.568345 0.000000 2718 +contemporari 0 4 5.568345 0.000000 2719 +katz 0 3 5.857933 0.000000 3276 +corin 0 3 5.857933 0.000000 3311 +aweekli 0 3 5.857933 0.000000 3312 +andersonwelcom 0 2 6.263398 0.000000 4400 +tocs 0 2 6.263398 0.000000 4401 +messagess 0 2 6.263398 0.000000 4402 +synario 0 2 6.263398 0.000000 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..4e824017 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +class 0 199 1.609438 0.000000 37 +avail 0 169 1.791759 0.000000 48 +instructor 0 108 2.197225 0.000000 107 +present 0 91 2.397895 0.000000 145 +internet 0 83 2.484907 0.000000 186 +journal 0 83 2.484907 0.000000 183 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +futur 0 41 3.218876 0.000000 427 +societi 0 40 3.258097 0.000000 456 +focu 0 30 3.555348 0.000000 571 +relev 0 26 3.688879 0.000000 637 +born 0 21 3.912023 0.000000 798 +thur 0 19 4.007333 0.000000 847 +social 0 13 4.382027 0.000000 1123 +econom 0 13 4.382027 0.000000 1184 +alan 0 13 4.382027 0.000000 1146 +tue 0 11 4.553877 0.000000 1308 +ethic 0 7 5.010635 0.000000 1786 +legal 0 6 5.164786 0.000000 2094 +highwai 0 6 5.164786 0.000000 2095 +implic 0 4 5.568345 0.000000 2696 +societycs 0 1 6.957497 0.000000 8466 +societywelcom 0 1 6.957497 0.000000 8467 +wintercs 0 1 6.957497 0.000000 8468 +andglob 0 1 6.957497 0.000000 8469 +syllabusclass 0 1 6.957497 0.000000 8470 +schedulelink 0 1 6.957497 0.000000 8471 +sitesbook 0 1 6.957497 0.000000 8472 +referenceassignmentsassign 0 1 6.957497 0.000000 8473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..08934ad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +topic 0 114 2.197225 0.000000 110 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +build 0 85 2.484907 0.000000 184 +messag 0 76 2.564949 0.000000 212 +intellig 0 72 2.639057 0.000000 225 +sieg 1 69 2.708050 2.708050 260 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +past 0 42 3.218876 0.000000 428 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +pagecs 0 26 3.688879 0.000000 658 +fundament 0 25 3.737670 0.000000 661 +methodolog 0 23 3.806662 0.000000 733 +outlin 0 17 4.110874 0.000000 914 +nick 0 13 4.382027 0.000000 1180 +pose 0 9 4.753590 0.000000 1535 +depth 0 8 4.875197 0.000000 1636 +marc 0 8 4.875197 0.000000 1680 +uncertainti 0 7 5.010635 0.000000 1882 +machinelearn 0 6 5.164786 0.000000 2084 +anin 0 3 5.857933 0.000000 3354 +assignmentsassign 0 3 5.857933 0.000000 3342 +mailinglist 0 3 5.857933 0.000000 3325 +intelligencefal 0 2 6.263398 0.000000 4477 +andchalleng 0 2 6.263398 0.000000 4478 +intelligentmachin 0 2 6.263398 0.000000 4479 +agentarchitectur 0 2 6.263398 0.000000 4480 +weldweld 0 2 6.263398 0.000000 4481 +friedmanfriedman 0 2 6.263398 0.000000 4482 +kushmericknick 0 2 6.263398 0.000000 4483 +examsgradingresourcesth 0 2 6.263398 0.000000 4484 +topicsprojectread 0 1 6.957497 0.000000 8474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..e6b0580f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +paper 0 205 1.609438 0.000000 38 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +provid 0 121 2.079442 0.000000 94 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +check 1 115 2.197225 2.197225 118 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +user 0 104 2.302585 0.000000 137 +peopl 0 96 2.302585 0.000000 132 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +decemb 0 80 2.564949 0.000000 215 +html 0 75 2.639057 0.000000 235 +intellig 0 72 2.639057 0.000000 225 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +summari 0 73 2.639057 0.000000 237 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +back 0 60 2.833213 0.000000 297 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +index 0 56 2.890372 0.000000 309 +detail 0 57 2.890372 0.000000 321 +anoth 0 45 3.135494 0.000000 408 +execut 0 45 3.135494 0.000000 404 +discuss 0 45 3.135494 0.000000 399 +show 0 43 3.178054 0.000000 417 +futur 0 41 3.218876 0.000000 427 +review 0 42 3.218876 0.000000 425 +paul 0 38 3.295837 0.000000 471 +winter 0 36 3.367296 0.000000 500 +short 0 36 3.367296 0.000000 499 +manual 0 35 3.401197 0.000000 504 +articl 0 33 3.433987 0.000000 530 +ad 0 32 3.465736 0.000000 544 +idea 0 32 3.465736 0.000000 545 +releas 0 28 3.610918 0.000000 616 +except 0 28 3.610918 0.000000 607 +rather 0 26 3.688879 0.000000 642 +wai 0 25 3.737670 0.000000 662 +mike 0 24 3.761200 0.000000 703 +displai 0 23 3.806662 0.000000 712 +miscellan 0 23 3.806662 0.000000 731 +instal 0 22 3.850148 0.000000 754 +sent 0 22 3.850148 0.000000 763 +offici 0 18 4.060443 0.000000 894 +statu 0 18 4.060443 0.000000 885 +regist 0 17 4.110874 0.000000 938 +anyon 0 17 4.110874 0.000000 916 +side 0 15 4.248495 0.000000 1022 +someon 0 13 4.382027 0.000000 1128 +nick 0 13 4.382027 0.000000 1180 +rememb 0 12 4.465908 0.000000 1217 +usenix 0 12 4.465908 0.000000 1240 +extra 0 11 4.553877 0.000000 1312 +guess 0 10 4.653960 0.000000 1443 +mosaic 0 10 4.653960 0.000000 1426 +bring 0 10 4.653960 0.000000 1430 +perspect 0 10 4.653960 0.000000 1437 +kurt 0 9 4.753590 0.000000 1548 +filter 0 8 4.875197 0.000000 1641 +on 0 8 4.875197 0.000000 1628 +guidelin 0 7 5.010635 0.000000 1832 +impress 0 6 5.164786 0.000000 2096 +begun 0 5 5.347108 0.000000 2386 +older 0 5 5.347108 0.000000 2387 +cacm 0 5 5.347108 0.000000 2388 +glimps 0 4 5.568345 0.000000 2778 +ics 0 4 5.568345 0.000000 2779 +rambl 0 3 5.857933 0.000000 3308 +vagu 0 3 5.857933 0.000000 3393 +towrit 0 2 6.263398 0.000000 4207 +phoenix 0 2 6.263398 0.000000 4552 +belief 0 2 6.263398 0.000000 4553 +zephyr 0 1 6.957497 0.000000 8475 +siegcreat 0 1 6.957497 0.000000 8476 +scriptspleas 0 1 6.957497 0.000000 8477 +tothem 0 1 6.957497 0.000000 8478 +zwhere 0 1 6.957497 0.000000 8479 +mosiac 0 1 6.957497 0.000000 8480 +znol 0 1 6.957497 0.000000 8481 +zwatch 0 1 6.957497 0.000000 8482 +zlocat 0 1 6.957497 0.000000 8483 +releg 0 1 6.957497 0.000000 8484 +grumbl 0 1 6.957497 0.000000 8485 +luddit 0 1 6.957497 0.000000 8486 +itout 0 1 6.957497 0.000000 8487 +withci 0 1 6.957497 0.000000 8488 +theentir 0 1 6.957497 0.000000 8489 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..0ac92380 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +instructor 0 108 2.197225 0.000000 107 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +control 0 82 2.484907 0.000000 164 +info 0 85 2.484907 0.000000 176 +involv 0 71 2.639057 0.000000 227 +organ 0 65 2.772589 0.000000 265 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +small 0 39 3.258097 0.000000 447 +open 1 38 3.295837 3.295837 469 +credit 0 38 3.295837 0.000000 460 +taken 0 31 3.496508 0.000000 555 +prerequisit 0 19 4.007333 0.000000 846 +devic 0 16 4.174387 0.000000 1002 +consent 0 5 5.347108 0.000000 2389 +semesterli 0 4 5.568345 0.000000 2780 +freshmen 0 2 6.263398 0.000000 4554 +computerhardwar 0 1 6.957497 0.000000 8490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..16609284 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +design 1 213 1.386294 1.386294 25 +fall 0 181 1.609438 0.000000 40 +address 0 170 1.791759 0.000000 62 +architectur 0 139 1.945910 0.000000 77 +introduct 0 126 2.079442 0.000000 87 +structur 0 106 2.197225 0.000000 105 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +control 0 82 2.484907 0.000000 164 +info 0 85 2.484907 0.000000 176 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +cach 0 41 3.218876 0.000000 432 +credit 0 38 3.295837 0.000000 460 +compon 0 30 3.555348 0.000000 570 +hierarchi 0 22 3.850148 0.000000 744 +prerequisit 0 19 4.007333 0.000000 846 +interrupt 0 7 5.010635 0.000000 1793 +microprogram 0 4 5.568345 0.000000 2604 +semesterli 0 4 5.568345 0.000000 2780 +andc 0 1 6.957497 0.000000 8491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..4f1f6f07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +advanc 0 99 2.302585 0.000000 130 +techniqu 0 99 2.302585 0.000000 138 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +multiprocessor 0 28 3.610918 0.000000 605 +flow 0 24 3.761200 0.000000 700 +prerequisit 0 19 4.007333 0.000000 846 +semesterli 0 4 5.568345 0.000000 2780 +andpipelin 0 1 6.957497 0.000000 8492 +performancemachin 0 1 6.957497 0.000000 8493 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..5d7bda2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +fall 0 181 1.609438 0.000000 40 +parallel 1 169 1.791759 1.791759 60 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +machin 1 129 2.079442 2.079442 95 +instructor 0 108 2.197225 0.000000 107 +advanc 0 99 2.302585 0.000000 130 +info 0 85 2.484907 0.000000 176 +special 0 56 2.890372 0.000000 320 +principl 0 48 3.044522 0.000000 357 +credit 0 38 3.295837 0.000000 460 +multi 0 36 3.367296 0.000000 493 +detect 0 26 3.688879 0.000000 646 +prerequisit 0 19 4.007333 0.000000 846 +interconnect 0 17 4.110874 0.000000 937 +coher 0 14 4.317488 0.000000 1109 +dataflow 0 5 5.347108 0.000000 2390 +consent 0 5 5.347108 0.000000 2389 +semesterli 0 4 5.568345 0.000000 2780 +simd 0 3 5.857933 0.000000 3360 +mimd 0 3 5.857933 0.000000 3361 +vectorizingcompil 0 1 6.957497 0.000000 8494 +processorsynchron 0 1 6.957497 0.000000 8495 +purposeprocessor 0 1 6.957497 0.000000 8496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..0fa19ea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,244 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +wisconsin 0 169 1.791759 0.000000 54 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +final 0 116 2.197225 0.000000 108 +topic 0 114 2.197225 0.000000 110 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +book 0 99 2.302585 0.000000 131 +advanc 0 99 2.302585 0.000000 130 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +learn 0 86 2.484907 0.000000 170 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +activ 0 84 2.484907 0.000000 182 +solut 0 82 2.484907 0.000000 162 +chang 0 82 2.484907 0.000000 163 +april 0 77 2.564949 0.000000 196 +mondai 0 77 2.564949 0.000000 206 +orient 0 80 2.564949 0.000000 205 +messag 0 76 2.564949 0.000000 212 +come 0 78 2.564949 0.000000 202 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +goal 0 66 2.708050 0.000000 250 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +differ 0 66 2.708050 0.000000 253 +receiv 0 66 2.708050 0.000000 244 +window 0 68 2.708050 0.000000 242 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +organ 0 65 2.772589 0.000000 265 +copi 0 63 2.772589 0.000000 284 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +virtual 0 62 2.772589 0.000000 285 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +variou 0 56 2.890372 0.000000 317 +overview 0 56 2.890372 0.000000 323 +februari 0 54 2.944439 0.000000 328 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +hardwar 0 51 2.995732 0.000000 350 +date 0 51 2.995732 0.000000 344 +run 0 51 2.995732 0.000000 347 +set 0 50 3.044522 0.000000 361 +appoint 0 49 3.044522 0.000000 358 +done 0 47 3.091042 0.000000 381 +could 0 46 3.091042 0.000000 383 +discuss 0 45 3.135494 0.000000 399 +answer 0 45 3.135494 0.000000 391 +fridai 0 44 3.135494 0.000000 390 +favorit 0 44 3.135494 0.000000 410 +midterm 0 45 3.135494 0.000000 392 +mark 0 44 3.135494 0.000000 403 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +late 0 40 3.258097 0.000000 439 +probabl 0 40 3.258097 0.000000 455 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +workstat 0 37 3.332205 0.000000 479 +ofth 0 36 3.367296 0.000000 491 +post 0 35 3.401197 0.000000 505 +print 0 34 3.401197 0.000000 503 +concurr 0 34 3.401197 0.000000 501 +eduoffic 0 33 3.433987 0.000000 531 +taught 0 33 3.433987 0.000000 526 +independ 0 32 3.465736 0.000000 548 +option 0 30 3.555348 0.000000 575 +rang 0 30 3.555348 0.000000 565 +secur 0 30 3.555348 0.000000 577 +turn 0 29 3.583519 0.000000 586 +though 0 27 3.637586 0.000000 622 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +store 0 24 3.761200 0.000000 693 +dai 0 22 3.850148 0.000000 753 +inth 0 22 3.850148 0.000000 741 +tent 0 22 3.850148 0.000000 739 +cooper 0 22 3.850148 0.000000 757 +disk 0 22 3.850148 0.000000 747 +leav 0 21 3.912023 0.000000 772 +alloc 0 20 3.951244 0.000000 821 +sure 0 20 3.951244 0.000000 813 +minut 0 20 3.951244 0.000000 810 +entir 0 20 3.951244 0.000000 811 +break 0 20 3.951244 0.000000 812 +attend 0 18 4.060443 0.000000 893 +accept 0 18 4.060443 0.000000 879 +account 0 18 4.060443 0.000000 882 +weekli 0 17 4.110874 0.000000 919 +monitor 0 17 4.110874 0.000000 941 +analyz 0 17 4.110874 0.000000 925 +regist 0 17 4.110874 0.000000 938 +debug 0 17 4.110874 0.000000 944 +segment 0 17 4.110874 0.000000 931 +quiz 0 16 4.174387 0.000000 990 +modern 0 16 4.174387 0.000000 966 +devic 0 16 4.174387 0.000000 1002 +purchas 0 15 4.248495 0.000000 1030 +drive 0 15 4.248495 0.000000 1052 +driven 0 15 4.248495 0.000000 1048 +happi 0 14 4.317488 0.000000 1079 +quizz 0 13 4.382027 0.000000 1151 +jonathan 0 13 4.382027 0.000000 1174 +cannot 0 13 4.382027 0.000000 1144 +solari 0 12 4.465908 0.000000 1238 +readi 0 12 4.465908 0.000000 1242 +count 0 12 4.465908 0.000000 1239 +extra 0 11 4.553877 0.000000 1312 +mainli 0 10 4.653960 0.000000 1432 +penalti 0 10 4.653960 0.000000 1405 +bart 0 9 4.753590 0.000000 1559 +recit 0 9 4.753590 0.000000 1475 +quantit 0 8 4.875197 0.000000 1654 +absolut 0 8 4.875197 0.000000 1646 +partner 0 8 4.875197 0.000000 1648 +replac 0 8 4.875197 0.000000 1668 +dispatch 0 7 5.010635 0.000000 1791 +whatev 0 6 5.164786 0.000000 2097 +transcript 0 6 5.164786 0.000000 2067 +drop 0 6 5.164786 0.000000 2008 +averag 0 6 5.164786 0.000000 2098 +madisoncomput 0 5 5.347108 0.000000 2391 +poorli 0 4 5.568345 0.000000 2781 +maximum 0 4 5.568345 0.000000 2632 +csphone 0 3 5.857933 0.000000 3394 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +tanenbaum 0 3 5.857933 0.000000 3397 +programmingassign 0 3 5.857933 0.000000 3398 +ofobject 0 3 5.857933 0.000000 3399 +thrash 0 3 5.857933 0.000000 3400 +bybart 0 3 5.857933 0.000000 3401 +semaphor 0 2 6.263398 0.000000 4555 +milleremail 0 2 6.263398 0.000000 4556 +noonor 0 2 6.263398 0.000000 4557 +weyer 0 2 6.263398 0.000000 4558 +notesar 0 2 6.263398 0.000000 4559 +materialcov 0 2 6.263398 0.000000 4140 +youdon 0 2 6.263398 0.000000 4444 +problemssolut 0 2 6.263398 0.000000 4241 +theproblem 0 2 6.263398 0.000000 4560 +andlook 0 2 6.263398 0.000000 4561 +havethre 0 2 6.263398 0.000000 4562 +daysof 0 2 6.263398 0.000000 4563 +eachof 0 2 6.263398 0.000000 4564 +lowest 0 2 6.263398 0.000000 4565 +breakweek 0 2 6.263398 0.000000 4566 +systemsnew 0 1 6.957497 0.000000 8497 +stufffin 0 1 6.957497 0.000000 8498 +staffinstructor 0 1 6.957497 0.000000 8499 +karuna 0 1 6.957497 0.000000 8500 +muthiahemail 0 1 6.957497 0.000000 8501 +muthiah 0 1 6.957497 0.000000 8502 +weyersemail 0 1 6.957497 0.000000 8503 +materialsth 0 1 6.957497 0.000000 8504 +thelectur 0 1 6.957497 0.000000 8505 +textbookmodern 0 1 6.957497 0.000000 8506 +pohl 0 1 6.957497 0.000000 8507 +systemsandobject 0 1 6.957497 0.000000 8508 +sectionslectur 0 1 6.957497 0.000000 8509 +sciencesdiscuss 0 1 6.957497 0.000000 8510 +nolandnot 0 1 6.957497 0.000000 8511 +occas 0 1 6.957497 0.000000 8512 +quizzesther 0 1 6.957497 0.000000 8513 +thediscuss 0 1 6.957497 0.000000 8514 +usetrac 0 1 6.957497 0.000000 8515 +setsdur 0 1 6.957497 0.000000 8516 +severalwritten 0 1 6.957497 0.000000 8517 +synchronizationprimit 0 1 6.957497 0.000000 8518 +workassign 0 1 6.957497 0.000000 8519 +assignmentthat 0 1 6.957497 0.000000 8520 +weekof 0 1 6.957497 0.000000 8521 +cheatingprogram 0 1 6.957497 0.000000 8522 +cheater 0 1 6.957497 0.000000 8523 +receivingan 0 1 6.957497 0.000000 8524 +facilitiesw 0 1 6.957497 0.000000 8525 +policyif 0 1 6.957497 0.000000 8526 +beno 0 1 6.957497 0.000000 8527 +scheduleth 0 1 6.957497 0.000000 8528 +processesweek 0 1 6.957497 0.000000 8529 +creationweek 0 1 6.957497 0.000000 8530 +synchronizationweek 0 1 6.957497 0.000000 8531 +semaphoresweek 0 1 6.957497 0.000000 8532 +monitorsweek 0 1 6.957497 0.000000 8533 +deadlocksweek 0 1 6.957497 0.000000 8534 +relocationweek 0 1 6.957497 0.000000 8535 +tlbsweek 0 1 6.957497 0.000000 8536 +filesweek 0 1 6.957497 0.000000 8537 +directoriesweek 0 1 6.957497 0.000000 8538 +protectionweek 0 1 6.957497 0.000000 8539 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..b105f639 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +file 0 132 1.945910 0.000000 70 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +follow 0 92 2.397895 0.000000 143 +imag 0 91 2.397895 0.000000 161 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +requir 0 81 2.484907 0.000000 167 +chang 0 82 2.484907 0.000000 163 +april 0 77 2.564949 0.000000 196 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +januari 0 62 2.772589 0.000000 264 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +march 0 61 2.833213 0.000000 295 +februari 0 54 2.944439 0.000000 328 +week 0 52 2.995732 0.000000 343 +appoint 0 49 3.044522 0.000000 358 +right 0 48 3.044522 0.000000 363 +could 0 46 3.091042 0.000000 383 +fridai 0 44 3.135494 0.000000 390 +discuss 0 45 3.135494 0.000000 399 +procedur 0 36 3.367296 0.000000 488 +eduoffic 0 33 3.433987 0.000000 531 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +client 0 25 3.737670 0.000000 679 +honor 0 23 3.806662 0.000000 729 +mostli 0 19 4.007333 0.000000 869 +eric 0 19 4.007333 0.000000 870 +miller 0 17 4.110874 0.000000 949 +remot 0 15 4.248495 0.000000 1041 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +larri 0 13 4.382027 0.000000 1142 +wood 0 11 4.553877 0.000000 1355 +bart 1 9 4.753590 4.753590 1559 +laru 0 9 4.753590 0.000000 1560 +madisoncomput 0 5 5.347108 0.000000 2391 +bach 0 4 5.568345 0.000000 2708 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +csphone 0 3 5.857933 0.000000 3394 +landweb 0 3 5.857933 0.000000 3402 +bybart 0 3 5.857933 0.000000 3401 +milleremail 0 2 6.263398 0.000000 4556 +noonor 0 2 6.263398 0.000000 4557 +breakweek 0 2 6.263398 0.000000 4566 +seminarunivers 0 1 6.957497 0.000000 8540 +seminarinstructor 0 1 6.957497 0.000000 8541 +lectureslectur 0 1 6.957497 0.000000 8542 +sciencesclass 0 1 6.957497 0.000000 8543 +schedulether 0 1 6.957497 0.000000 8544 +attendal 0 1 6.957497 0.000000 8545 +overviewweek 0 1 6.957497 0.000000 8546 +protocolsweek 0 1 6.957497 0.000000 8547 +callsweek 0 1 6.957497 0.000000 8548 +securityweek 0 1 6.957497 0.000000 8549 +encryptionweek 0 1 6.957497 0.000000 8550 +netweek 0 1 6.957497 0.000000 8551 +systemsweek 0 1 6.957497 0.000000 8552 +supercomputerweek 0 1 6.957497 0.000000 8553 +javaweek 0 1 6.957497 0.000000 8554 +discussionslast 0 1 6.957497 0.000000 8555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..3f5a9eb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +read 1 154 1.791759 1.791759 47 +wisconsin 0 169 1.791759 0.000000 54 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +schedul 0 119 2.079442 0.000000 85 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +structur 0 106 2.197225 0.000000 105 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +text 0 98 2.302585 0.000000 133 +part 0 98 2.302585 0.000000 129 +comment 0 93 2.397895 0.000000 146 +grade 0 90 2.397895 0.000000 142 +proceed 0 93 2.397895 0.000000 152 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +activ 0 84 2.484907 0.000000 182 +exam 0 86 2.484907 0.000000 169 +issu 0 78 2.564949 0.000000 211 +good 0 77 2.564949 0.000000 200 +write 0 72 2.639057 0.000000 222 +tuesdai 0 73 2.639057 0.000000 219 +name 0 72 2.639057 0.000000 220 +meet 0 72 2.639057 0.000000 229 +involv 0 71 2.639057 0.000000 227 +summari 0 73 2.639057 0.000000 237 +thursdai 0 70 2.708050 0.000000 241 +import 0 65 2.772589 0.000000 282 +handout 0 64 2.772589 0.000000 263 +detail 0 57 2.890372 0.000000 321 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +talk 0 53 2.944439 0.000000 336 +extens 0 53 2.944439 0.000000 340 +week 0 52 2.995732 0.000000 343 +give 0 50 3.044522 0.000000 359 +discuss 0 45 3.135494 0.000000 399 +textbook 0 44 3.135494 0.000000 397 +anoth 0 45 3.135494 0.000000 408 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +review 0 42 3.218876 0.000000 425 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +short 0 36 3.367296 0.000000 499 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +articl 0 33 3.433987 0.000000 530 +idea 0 32 3.465736 0.000000 545 +secur 0 30 3.555348 0.000000 577 +abl 0 30 3.555348 0.000000 566 +particip 0 29 3.583519 0.000000 589 +synchron 0 29 3.583519 0.000000 588 +intend 0 28 3.610918 0.000000 599 +pass 0 28 3.610918 0.000000 611 +relev 0 26 3.688879 0.000000 637 +revis 0 26 3.688879 0.000000 640 +daili 0 24 3.761200 0.000000 706 +try 0 22 3.850148 0.000000 764 +busi 0 21 3.912023 0.000000 784 +kernel 0 20 3.951244 0.000000 825 +longer 0 20 3.951244 0.000000 816 +els 0 19 4.007333 0.000000 843 +listen 0 18 4.060443 0.000000 907 +protect 0 17 4.110874 0.000000 935 +sheet 0 16 4.174387 0.000000 973 +choos 0 16 4.174387 0.000000 964 +critic 0 16 4.174387 0.000000 982 +purchas 0 15 4.248495 0.000000 1030 +score 0 15 4.248495 0.000000 1017 +doit 0 14 4.317488 0.000000 1111 +someon 0 13 4.382027 0.000000 1128 +rest 0 12 4.465908 0.000000 1259 +reader 0 12 4.465908 0.000000 1246 +broad 0 11 4.553877 0.000000 1302 +literatur 0 11 4.553877 0.000000 1300 +success 0 10 4.653960 0.000000 1390 +bart 0 9 4.753590 0.000000 1559 +classmat 0 9 4.753590 0.000000 1516 +theme 0 8 4.875197 0.000000 1707 +opinion 0 8 4.875197 0.000000 1708 +refere 0 7 5.010635 0.000000 1895 +fromth 0 7 5.010635 0.000000 1802 +carefulli 0 6 5.164786 0.000000 2045 +madisoncomput 0 5 5.347108 0.000000 2391 +understood 0 5 5.347108 0.000000 2364 +twice 0 4 5.568345 0.000000 2614 +exposur 0 4 5.568345 0.000000 2598 +will 0 4 5.568345 0.000000 2782 +writer 0 4 5.568345 0.000000 2783 +csoffic 0 4 5.568345 0.000000 2727 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +advancedoper 0 3 5.857933 0.000000 3403 +focal 0 3 5.857933 0.000000 3404 +formula 0 3 5.857933 0.000000 3405 +thepap 0 3 5.857933 0.000000 3254 +andon 0 3 5.857933 0.000000 3115 +bybart 0 3 5.857933 0.000000 3401 +satisfactori 0 2 6.263398 0.000000 4567 +andconfer 0 2 6.263398 0.000000 4568 +willinstead 0 2 6.263398 0.000000 4569 +adiscuss 0 2 6.263398 0.000000 4570 +geta 0 2 6.263398 0.000000 4571 +quietli 0 2 6.263398 0.000000 4572 +examsther 0 2 6.263398 0.000000 4149 +assignmenti 0 2 6.263398 0.000000 4573 +availbl 0 1 6.957497 0.000000 8556 +systemssummarythi 0 1 6.957497 0.000000 8557 +textther 0 1 6.957497 0.000000 8558 +operatingsystemsclass 0 1 6.957497 0.000000 8559 +meetonc 0 1 6.957497 0.000000 8560 +listaccord 0 1 6.957497 0.000000 8561 +papersindepend 0 1 6.957497 0.000000 8562 +identifyth 0 1 6.957497 0.000000 8563 +discussionsclass 0 1 6.957497 0.000000 8564 +besupport 0 1 6.957497 0.000000 8565 +beveri 0 1 6.957497 0.000000 8566 +unhappi 0 1 6.957497 0.000000 8567 +papersdur 0 1 6.957497 0.000000 8568 +paperwil 0 1 6.957497 0.000000 8569 +facilityand 0 1 6.957497 0.000000 8570 +summaryof 0 1 6.957497 0.000000 8571 +aselect 0 1 6.957497 0.000000 8572 +topicsfrom 0 1 6.957497 0.000000 8573 +fellowstud 0 1 6.957497 0.000000 8574 +giveth 0 1 6.957497 0.000000 8575 +gradesscor 0 1 6.957497 0.000000 8576 +proposalsi 0 1 6.957497 0.000000 8577 +gradesar 0 1 6.957497 0.000000 8578 +detailstim 0 1 6.957497 0.000000 8579 +noonlast 0 1 6.957497 0.000000 8580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..1c795a32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +compil 0 122 2.079442 0.000000 96 +structur 0 106 2.197225 0.000000 105 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +school 0 84 2.484907 0.000000 188 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +materi 0 75 2.639057 0.000000 221 +window 0 68 2.708050 0.000000 242 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +syllabu 0 67 2.708050 0.000000 247 +copi 0 63 2.772589 0.000000 284 +experi 0 64 2.772589 0.000000 283 +descript 0 64 2.772589 0.000000 271 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 0 55 2.944439 0.000000 329 +instruct 0 53 2.944439 0.000000 332 +week 0 52 2.995732 0.000000 343 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +even 0 45 3.135494 0.000000 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +howev 0 41 3.218876 0.000000 422 +littl 0 39 3.258097 0.000000 454 +announc 0 40 3.258097 0.000000 441 +probabl 0 40 3.258097 0.000000 455 +microsoft 0 38 3.295837 0.000000 468 +credit 0 38 3.295837 0.000000 460 +open 0 38 3.295837 0.000000 469 +copyright 0 36 3.367296 0.000000 495 +statist 0 35 3.401197 0.000000 521 +taught 0 33 3.433987 0.000000 526 +depend 0 29 3.583519 0.000000 583 +intend 0 28 3.610918 0.000000 599 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +enabl 0 26 3.688879 0.000000 655 +lab 0 24 3.761200 0.000000 698 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +prepar 0 20 3.951244 0.000000 824 +entir 0 20 3.951244 0.000000 811 +exercis 0 19 4.007333 0.000000 842 +assum 0 19 4.007333 0.000000 845 +lyco 0 19 4.007333 0.000000 871 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 0 15 4.248495 0.000000 1027 +pagec 0 15 4.248495 0.000000 1011 +psycholog 0 15 4.248495 0.000000 1054 +purchas 0 15 4.248495 0.000000 1030 +comic 0 14 4.317488 0.000000 1103 +primarili 0 13 4.382027 0.000000 1185 +menu 0 13 4.382027 0.000000 1156 +vectra 0 12 4.465908 0.000000 1267 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +prior 0 10 4.653960 0.000000 1438 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 0 9 4.753590 0.000000 1561 +hewlett 0 8 4.875197 0.000000 1709 +printer 0 8 4.875197 0.000000 1621 +elementari 0 7 5.010635 0.000000 1825 +bestor 0 6 5.164786 0.000000 2099 +gareth 0 5 5.347108 0.000000 2392 +relief 0 4 5.568345 0.000000 2784 +punctual 0 3 5.857933 0.000000 3313 +labyou 0 3 5.857933 0.000000 3406 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +programmingsect 0 2 6.263398 0.000000 4574 +disturb 0 2 6.263398 0.000000 4575 +subroutin 0 2 6.263398 0.000000 4576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..439a9884 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +algorithm 0 162 1.791759 0.000000 57 +click 0 142 1.945910 0.000000 78 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +hall 0 146 1.945910 0.000000 65 +compil 0 122 2.079442 0.000000 96 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +instructor 0 108 2.197225 0.000000 107 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +topic 0 114 2.197225 0.000000 110 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +text 0 98 2.302585 0.000000 133 +need 0 98 2.302585 0.000000 135 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +learn 0 86 2.484907 0.000000 170 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +exampl 0 77 2.564949 0.000000 195 +messag 0 76 2.564949 0.000000 212 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +receiv 0 66 2.708050 0.000000 244 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +would 0 67 2.708050 0.000000 251 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +major 0 56 2.890372 0.000000 315 +semest 0 58 2.890372 0.000000 312 +think 0 57 2.890372 0.000000 314 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 0 55 2.944439 0.000000 329 +februari 0 54 2.944439 0.000000 328 +three 0 54 2.944439 0.000000 330 +week 1 52 2.995732 2.995732 343 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +appoint 0 49 3.044522 0.000000 358 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +understand 0 47 3.091042 0.000000 384 +fridai 0 44 3.135494 0.000000 390 +even 0 45 3.135494 0.000000 393 +answer 0 45 3.135494 0.000000 391 +netscap 0 44 3.135494 0.000000 395 +algebra 0 45 3.135494 0.000000 394 +directori 0 45 3.135494 0.000000 396 +textbook 0 44 3.135494 0.000000 397 +term 0 43 3.178054 0.000000 411 +long 0 43 3.178054 0.000000 413 +show 0 43 3.178054 0.000000 417 +howev 0 41 3.218876 0.000000 422 +edit 0 42 3.218876 0.000000 418 +must 0 40 3.258097 0.000000 442 +error 0 40 3.258097 0.000000 449 +announc 0 40 3.258097 0.000000 441 +realli 0 40 3.258097 0.000000 444 +late 0 40 3.258097 0.000000 439 +programm 0 39 3.258097 0.000000 445 +small 0 39 3.258097 0.000000 447 +probabl 0 40 3.258097 0.000000 455 +microsoft 0 38 3.295837 0.000000 468 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +hand 0 37 3.332205 0.000000 475 +mean 0 37 3.332205 0.000000 477 +respons 0 37 3.332205 0.000000 476 +short 0 36 3.367296 0.000000 499 +copyright 0 36 3.367296 0.000000 495 +approxim 0 35 3.401197 0.000000 509 +everi 0 34 3.401197 0.000000 519 +statist 0 35 3.401197 0.000000 521 +taught 0 33 3.433987 0.000000 526 +board 0 33 3.433987 0.000000 528 +dissert 0 32 3.465736 0.000000 549 +someth 0 31 3.496508 0.000000 554 +abl 0 30 3.555348 0.000000 566 +rang 0 30 3.555348 0.000000 565 +depend 0 29 3.583519 0.000000 583 +pass 0 28 3.610918 0.000000 611 +except 0 28 3.610918 0.000000 607 +intend 0 28 3.610918 0.000000 599 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +relev 0 26 3.688879 0.000000 637 +consult 0 24 3.761200 0.000000 687 +lab 0 24 3.761200 0.000000 698 +tent 0 22 3.850148 0.000000 739 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +entir 0 20 3.951244 0.000000 811 +longer 0 20 3.951244 0.000000 816 +exercis 0 19 4.007333 0.000000 842 +thur 0 19 4.007333 0.000000 847 +lyco 0 19 4.007333 0.000000 871 +attempt 0 17 4.110874 0.000000 917 +weekli 0 17 4.110874 0.000000 919 +anyth 0 16 4.174387 0.000000 998 +misconduct 0 16 4.174387 0.000000 1003 +explan 0 16 4.174387 0.000000 985 +normal 0 16 4.174387 0.000000 995 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 0 15 4.248495 0.000000 1027 +contribut 0 15 4.248495 0.000000 1021 +pagec 0 15 4.248495 0.000000 1011 +psycholog 0 15 4.248495 0.000000 1054 +score 0 15 4.248495 0.000000 1017 +overhead 0 15 4.248495 0.000000 1035 +doesn 0 15 4.248495 0.000000 1055 +purchas 0 15 4.248495 0.000000 1030 +easili 0 14 4.317488 0.000000 1077 +comic 0 14 4.317488 0.000000 1103 +primarili 0 13 4.382027 0.000000 1185 +menu 0 13 4.382027 0.000000 1156 +wait 0 13 4.382027 0.000000 1168 +necessari 0 13 4.382027 0.000000 1147 +step 0 13 4.382027 0.000000 1138 +skill 0 12 4.465908 0.000000 1205 +vectra 0 12 4.465908 0.000000 1267 +stai 0 12 4.465908 0.000000 1215 +calcul 0 12 4.465908 0.000000 1268 +outsid 0 12 4.465908 0.000000 1219 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +extra 0 11 4.553877 0.000000 1312 +regularli 0 11 4.553877 0.000000 1338 +distinguish 0 11 4.553877 0.000000 1357 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +bring 0 10 4.653960 0.000000 1430 +cheat 0 10 4.653960 0.000000 1395 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 0 9 4.753590 0.000000 1561 +login 0 9 4.753590 0.000000 1550 +discov 0 9 4.753590 0.000000 1562 +didn 0 9 4.753590 0.000000 1563 +familiar 0 9 4.753590 0.000000 1485 +curv 0 8 4.875197 0.000000 1656 +printer 0 8 4.875197 0.000000 1621 +risk 0 8 4.875197 0.000000 1689 +fail 0 8 4.875197 0.000000 1655 +matter 0 8 4.875197 0.000000 1627 +hewlett 0 8 4.875197 0.000000 1709 +friedman 0 7 5.010635 0.000000 1886 +zero 0 7 5.010635 0.000000 1896 +tag 0 7 5.010635 0.000000 1821 +therefor 0 7 5.010635 0.000000 1822 +bestor 0 6 5.164786 0.000000 2099 +constitut 0 6 5.164786 0.000000 2026 +ensur 0 6 5.164786 0.000000 2012 +syntax 0 6 5.164786 0.000000 2030 +gareth 0 5 5.347108 0.000000 2392 +substitut 0 5 5.347108 0.000000 2247 +handin 0 5 5.347108 0.000000 2393 +identif 0 4 5.568345 0.000000 2773 +wear 0 4 5.568345 0.000000 2785 +trivial 0 4 5.568345 0.000000 2786 +relief 0 4 5.568345 0.000000 2784 +punctual 0 3 5.857933 0.000000 3313 +projector 0 3 5.857933 0.000000 3409 +duti 0 3 5.857933 0.000000 3317 +labyou 0 3 5.857933 0.000000 3406 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +programmingsect 0 2 6.263398 0.000000 4574 +disturb 0 2 6.263398 0.000000 4575 +lowest 0 2 6.263398 0.000000 4565 +regardless 0 2 6.263398 0.000000 4577 +pencil 0 2 6.263398 0.000000 4426 +subroutin 0 2 6.263398 0.000000 4576 +elig 0 1 6.957497 0.000000 8581 +amclick 0 1 6.957497 0.000000 8582 +unsur 0 1 6.957497 0.000000 8583 +notestext 0 1 6.957497 0.000000 8584 +koffman 0 1 6.957497 0.000000 8585 +assignmentsther 0 1 6.957497 0.000000 8586 +gradesheet 0 1 6.957497 0.000000 8587 +pmhow 0 1 6.957497 0.000000 8588 +modem 0 1 6.957497 0.000000 8589 +exerciseson 0 1 6.957497 0.000000 8590 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..f75ee19e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +name 0 72 2.639057 0.000000 220 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +publish 0 57 2.890372 0.000000 326 +week 0 52 2.995732 0.000000 343 +algebra 0 45 3.135494 0.000000 394 +compani 0 41 3.218876 0.000000 423 +announc 0 40 3.258097 0.000000 441 +error 0 40 3.258097 0.000000 449 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +eduoffic 0 33 3.433987 0.000000 531 +ask 0 28 3.610918 0.000000 597 +valu 0 25 3.737670 0.000000 665 +known 0 24 3.761200 0.000000 702 +greg 0 24 3.761200 0.000000 695 +dai 0 22 3.850148 0.000000 753 +output 0 21 3.912023 0.000000 788 +walter 0 17 4.110874 0.000000 950 +quiz 0 16 4.174387 0.000000 990 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +dave 0 14 4.317488 0.000000 1098 +scan 0 12 4.465908 0.000000 1243 +informationemail 0 9 4.753590 0.000000 1564 +sharp 0 6 5.164786 0.000000 2100 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +prog 0 4 5.568345 0.000000 2740 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +objectivesvectra 0 3 5.857933 0.000000 3410 +homeclass 0 3 5.857933 0.000000 3411 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +burnett 0 2 6.263398 0.000000 4578 +consultantssyllabuswork 0 2 6.263398 0.000000 4579 +archivepolici 0 2 6.263398 0.000000 4580 +eggleston 0 2 6.263398 0.000000 4581 +egglestonemail 0 1 6.957497 0.000000 8591 +hourlywork 0 1 6.957497 0.000000 8592 +classread 0 1 6.957497 0.000000 8593 +gradeshomeworkexam 0 1 6.957497 0.000000 8594 +quizzesmiscellan 0 1 6.957497 0.000000 8595 +policytextproblem 0 1 6.957497 0.000000 8596 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..4afd3e4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +distribut 0 162 1.791759 0.000000 51 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +text 0 98 2.302585 0.000000 133 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +need 0 98 2.302585 0.000000 135 +peopl 0 96 2.302585 0.000000 132 +grade 0 90 2.397895 0.000000 142 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +requir 0 81 2.484907 0.000000 167 +solut 0 82 2.484907 0.000000 162 +info 0 85 2.484907 0.000000 176 +activ 0 84 2.484907 0.000000 182 +exam 0 86 2.484907 0.000000 169 +second 0 81 2.484907 0.000000 166 +come 0 78 2.564949 0.000000 202 +involv 0 71 2.639057 0.000000 227 +tuesdai 0 73 2.639057 0.000000 219 +summari 0 73 2.639057 0.000000 237 +thursdai 0 70 2.708050 0.000000 241 +differ 0 66 2.708050 0.000000 253 +window 0 68 2.708050 0.000000 242 +import 0 65 2.772589 0.000000 282 +previou 0 62 2.772589 0.000000 290 +copi 0 63 2.772589 0.000000 284 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +cover 0 55 2.944439 0.000000 329 +suggest 0 53 2.944439 0.000000 331 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +give 0 50 3.044522 0.000000 359 +appoint 0 49 3.044522 0.000000 358 +discuss 0 45 3.135494 0.000000 399 +review 0 42 3.218876 0.000000 425 +slide 0 38 3.295837 0.000000 467 +prototyp 0 38 3.295837 0.000000 463 +hand 0 37 3.332205 0.000000 475 +either 0 35 3.401197 0.000000 506 +compon 0 30 3.555348 0.000000 570 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +synchron 0 29 3.583519 0.000000 588 +propos 0 28 3.610918 0.000000 602 +intend 0 28 3.610918 0.000000 599 +measur 0 28 3.610918 0.000000 609 +linux 0 27 3.637586 0.000000 631 +team 0 27 3.637586 0.000000 625 +relev 0 26 3.688879 0.000000 637 +instead 0 22 3.850148 0.000000 756 +tent 0 22 3.850148 0.000000 739 +benchmark 0 19 4.007333 0.000000 859 +encourag 0 18 4.060443 0.000000 880 +protect 0 17 4.110874 0.000000 935 +choos 0 16 4.174387 0.000000 964 +purchas 0 15 4.248495 0.000000 1030 +classic 0 14 4.317488 0.000000 1084 +doit 0 14 4.317488 0.000000 1111 +suit 0 13 4.382027 0.000000 1129 +count 0 12 4.465908 0.000000 1239 +solari 0 12 4.465908 0.000000 1238 +broad 0 11 4.553877 0.000000 1302 +strongli 0 10 4.653960 0.000000 1406 +total 0 10 4.653960 0.000000 1398 +theme 0 8 4.875197 0.000000 1707 +formerli 0 5 5.347108 0.000000 2397 +exposur 0 4 5.568345 0.000000 2598 +suno 0 4 5.568345 0.000000 2790 +advancedoper 0 3 5.857933 0.000000 3403 +focal 0 3 5.857933 0.000000 3404 +macc 0 3 5.857933 0.000000 3414 +ofvari 0 2 6.263398 0.000000 4582 +anexperiment 0 2 6.263398 0.000000 4299 +halloffic 0 2 6.263398 0.000000 4583 +deskfor 0 2 6.263398 0.000000 4584 +performanceof 0 2 6.263398 0.000000 4585 +topicsinclud 0 1 6.957497 0.000000 8597 +tochoos 0 1 6.957497 0.000000 8598 +rathera 0 1 6.957497 0.000000 8599 +manya 0 1 6.957497 0.000000 8600 +assig 0 1 6.957497 0.000000 8601 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..182a5ee9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +lectur 1 135 1.945910 1.945910 73 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +simpl 0 60 2.833213 0.000000 298 +cover 0 55 2.944439 0.000000 329 +basic 0 50 3.044522 0.000000 360 +credit 0 38 3.295837 0.000000 460 +michael 0 35 3.401197 0.000000 514 +jeff 0 25 3.737670 0.000000 673 +half 0 21 3.912023 0.000000 776 +martin 0 21 3.912023 0.000000 794 +prepar 0 20 3.951244 0.000000 824 +fortran 0 15 4.248495 0.000000 1027 +essenti 0 13 4.382027 0.000000 1137 +russel 0 9 4.753590 0.000000 1507 +suffici 0 7 5.010635 0.000000 1897 +lampert 0 5 5.347108 0.000000 2398 +birk 0 4 5.568345 0.000000 2791 +anthoni 0 4 5.568345 0.000000 2792 +toni 0 3 5.857933 0.000000 3415 +hummert 0 3 5.857933 0.000000 3416 +man 0 3 5.857933 0.000000 3417 +silva 0 2 6.263398 0.000000 4586 +sidnei 0 2 6.263398 0.000000 4587 +programmingstructur 0 1 6.957497 0.000000 8602 +elementaryengin 0 1 6.957497 0.000000 8603 +enableth 0 1 6.957497 0.000000 8604 +inelementari 0 1 6.957497 0.000000 8605 +reameslast 0 1 6.957497 0.000000 8606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..b81df35e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +teach 0 108 2.197225 0.000000 112 +find 0 111 2.197225 0.000000 111 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +name 0 72 2.639057 0.000000 220 +wednesdai 0 64 2.772589 0.000000 261 +dept 0 64 2.772589 0.000000 291 +virtual 0 62 2.772589 0.000000 285 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +maintain 0 51 2.995732 0.000000 342 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +cool 0 49 3.044522 0.000000 374 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +origin 0 38 3.295837 0.000000 472 +especi 0 36 3.367296 0.000000 496 +jame 0 35 3.401197 0.000000 507 +yahoo 0 24 3.761200 0.000000 707 +andrew 0 19 4.007333 0.000000 849 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +borland 0 14 4.317488 0.000000 1067 +whose 0 13 4.382027 0.000000 1166 +gupta 0 12 4.465908 0.000000 1241 +packard 0 10 4.653960 0.000000 1444 +classifi 0 9 4.753590 0.000000 1537 +hewlett 0 8 4.875197 0.000000 1709 +tourist 0 8 4.875197 0.000000 1710 +chiang 0 7 5.010635 0.000000 1853 +sciencesoffic 0 6 5.164786 0.000000 2101 +mother 0 6 5.164786 0.000000 2083 +alphabet 0 6 5.164786 0.000000 1980 +teitelbaum 0 6 5.164786 0.000000 2102 +categori 0 5 5.347108 0.000000 2261 +lookup 0 5 5.347108 0.000000 2399 +kelli 0 4 5.568345 0.000000 2793 +nathan 0 4 5.568345 0.000000 2794 +tsioli 0 3 5.857933 0.000000 3418 +ratliff 0 3 5.857933 0.000000 3419 +bockrath 0 3 5.857933 0.000000 3420 +ashraf 0 3 5.857933 0.000000 3421 +geeri 0 3 5.857933 0.000000 3422 +jyothi 0 3 5.857933 0.000000 3423 +thano 0 3 5.857933 0.000000 3424 +fink 0 3 5.857933 0.000000 3425 +aboulnaga 0 3 5.857933 0.000000 3426 +jherro 0 3 5.857933 0.000000 3427 +abhinav 0 3 5.857933 0.000000 3428 +agupta 0 3 5.857933 0.000000 3429 +suhui 0 3 5.857933 0.000000 3430 +enorm 0 3 5.857933 0.000000 3431 +rehnuma 0 2 6.263398 0.000000 4588 +keyinstructorprofessor 0 2 6.263398 0.000000 4589 +desautelsoffic 0 2 6.263398 0.000000 4590 +assistantsfollow 0 2 6.263398 0.000000 4591 +rahman 0 2 6.263398 0.000000 4592 +jaim 0 2 6.263398 0.000000 4593 +jfink 0 2 6.263398 0.000000 4594 +herro 0 2 6.263398 0.000000 4595 +krothap 0 2 6.263398 0.000000 4596 +gradesexplor 0 2 6.263398 0.000000 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..b81df35e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +teach 0 108 2.197225 0.000000 112 +find 0 111 2.197225 0.000000 111 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +name 0 72 2.639057 0.000000 220 +wednesdai 0 64 2.772589 0.000000 261 +dept 0 64 2.772589 0.000000 291 +virtual 0 62 2.772589 0.000000 285 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +maintain 0 51 2.995732 0.000000 342 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +cool 0 49 3.044522 0.000000 374 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +origin 0 38 3.295837 0.000000 472 +especi 0 36 3.367296 0.000000 496 +jame 0 35 3.401197 0.000000 507 +yahoo 0 24 3.761200 0.000000 707 +andrew 0 19 4.007333 0.000000 849 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +borland 0 14 4.317488 0.000000 1067 +whose 0 13 4.382027 0.000000 1166 +gupta 0 12 4.465908 0.000000 1241 +packard 0 10 4.653960 0.000000 1444 +classifi 0 9 4.753590 0.000000 1537 +hewlett 0 8 4.875197 0.000000 1709 +tourist 0 8 4.875197 0.000000 1710 +chiang 0 7 5.010635 0.000000 1853 +sciencesoffic 0 6 5.164786 0.000000 2101 +mother 0 6 5.164786 0.000000 2083 +alphabet 0 6 5.164786 0.000000 1980 +teitelbaum 0 6 5.164786 0.000000 2102 +categori 0 5 5.347108 0.000000 2261 +lookup 0 5 5.347108 0.000000 2399 +kelli 0 4 5.568345 0.000000 2793 +nathan 0 4 5.568345 0.000000 2794 +tsioli 0 3 5.857933 0.000000 3418 +ratliff 0 3 5.857933 0.000000 3419 +bockrath 0 3 5.857933 0.000000 3420 +ashraf 0 3 5.857933 0.000000 3421 +geeri 0 3 5.857933 0.000000 3422 +jyothi 0 3 5.857933 0.000000 3423 +thano 0 3 5.857933 0.000000 3424 +fink 0 3 5.857933 0.000000 3425 +aboulnaga 0 3 5.857933 0.000000 3426 +jherro 0 3 5.857933 0.000000 3427 +abhinav 0 3 5.857933 0.000000 3428 +agupta 0 3 5.857933 0.000000 3429 +suhui 0 3 5.857933 0.000000 3430 +enorm 0 3 5.857933 0.000000 3431 +rehnuma 0 2 6.263398 0.000000 4588 +keyinstructorprofessor 0 2 6.263398 0.000000 4589 +desautelsoffic 0 2 6.263398 0.000000 4590 +assistantsfollow 0 2 6.263398 0.000000 4591 +rahman 0 2 6.263398 0.000000 4592 +jaim 0 2 6.263398 0.000000 4593 +jfink 0 2 6.263398 0.000000 4594 +herro 0 2 6.263398 0.000000 4595 +krothap 0 2 6.263398 0.000000 4596 +gradesexplor 0 2 6.263398 0.000000 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..d33c6892 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +lectur 1 135 1.945910 1.945910 73 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +part 0 98 2.302585 0.000000 129 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +info 0 85 2.484907 0.000000 176 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +issu 0 78 2.564949 0.000000 211 +tuesdai 0 73 2.639057 0.000000 219 +intellig 0 72 2.639057 0.000000 225 +addit 0 74 2.639057 0.000000 228 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +point 0 58 2.890372 0.000000 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +electron 0 47 3.091042 0.000000 379 +discuss 0 45 3.135494 0.000000 399 +netscap 0 44 3.135494 0.000000 395 +term 0 43 3.178054 0.000000 411 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +word 0 34 3.401197 0.000000 508 +eduoffic 0 33 3.433987 0.000000 531 +taught 0 33 3.433987 0.000000 526 +storag 0 31 3.496508 0.000000 553 +compon 0 30 3.555348 0.000000 570 +held 0 28 3.610918 0.000000 600 +background 0 25 3.737670 0.000000 664 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +newsgroup 0 21 3.912023 0.000000 783 +expert 0 20 3.951244 0.000000 833 +qualiti 0 20 3.951244 0.000000 832 +excel 0 19 4.007333 0.000000 868 +macintosh 0 17 4.110874 0.000000 920 +regular 0 17 4.110874 0.000000 929 +devic 0 16 4.174387 0.000000 1002 +draw 0 14 4.317488 0.000000 1086 +quizz 0 13 4.382027 0.000000 1151 +social 0 13 4.382027 0.000000 1123 +necessari 0 13 4.382027 0.000000 1147 +skill 0 12 4.465908 0.000000 1205 +desktop 0 10 4.653960 0.000000 1445 +rich 0 10 4.653960 0.000000 1396 +telecommun 0 9 4.753590 0.000000 1565 +zero 0 7 5.010635 0.000000 1896 +shot 0 7 5.010635 0.000000 1898 +necessarili 0 7 5.010635 0.000000 1899 +thegoal 0 6 5.164786 0.000000 2033 +lloyd 0 6 5.164786 0.000000 2103 +paint 0 5 5.347108 0.000000 2400 +bodner 0 5 5.347108 0.000000 2401 +chart 0 4 5.568345 0.000000 2653 +assignmentsand 0 4 5.568345 0.000000 2760 +glanc 0 4 5.568345 0.000000 2652 +salli 0 3 5.857933 0.000000 3432 +facstaff 0 3 5.857933 0.000000 3433 +drag 0 3 5.857933 0.000000 3434 +crack 0 3 5.857933 0.000000 3435 +macintoshcomput 0 3 5.857933 0.000000 3228 +iici 0 3 5.857933 0.000000 3436 +scanner 0 3 5.857933 0.000000 3437 +leavi 0 3 5.857933 0.000000 3438 +sharenow 0 3 5.857933 0.000000 3439 +swander 0 3 5.857933 0.000000 3440 +thayer 0 3 5.857933 0.000000 3441 +varghes 0 3 5.857933 0.000000 3442 +weinberg 0 3 5.857933 0.000000 3443 +spreadsheet 0 2 6.263398 0.000000 4598 +aldu 0 2 6.263398 0.000000 4599 +computersinstructor 0 2 6.263398 0.000000 4600 +petersonoffic 0 2 6.263398 0.000000 4601 +sciencephon 0 2 6.263398 0.000000 4602 +slpeter 0 2 6.263398 0.000000 4603 +appointmentvit 0 2 6.263398 0.000000 4604 +halllectur 0 2 6.263398 0.000000 4605 +laudon 0 2 6.263398 0.000000 4606 +traver 0 2 6.263398 0.000000 4607 +laudonlab 0 2 6.263398 0.000000 4608 +petersoncours 0 2 6.263398 0.000000 4609 +computersto 0 2 6.263398 0.000000 4610 +throughcolleg 0 2 6.263398 0.000000 4611 +arena 0 2 6.263398 0.000000 4612 +csuse 0 2 6.263398 0.000000 4613 +experienceon 0 2 6.263398 0.000000 4614 +eudora 0 2 6.263398 0.000000 4615 +superpaint 0 2 6.263398 0.000000 4616 +filemak 0 2 6.263398 0.000000 4617 +hypercard 0 2 6.263398 0.000000 4618 +pagemak 0 2 6.263398 0.000000 4619 +educationalexperi 0 2 6.263398 0.000000 4620 +namesectiontimedai 0 2 6.263398 0.000000 4621 +mwnick 0 2 6.263398 0.000000 4622 +mwtrshannon 0 2 6.263398 0.000000 4623 +trtrjeff 0 2 6.263398 0.000000 4624 +reminga 0 2 6.263398 0.000000 4625 +mwfmwira 0 2 6.263398 0.000000 4626 +trtrbrian 0 2 6.263398 0.000000 4627 +mwfmwfbrad 0 2 6.263398 0.000000 4628 +mwfmwfjoe 0 2 6.263398 0.000000 4629 +trtrgeoff 0 2 6.263398 0.000000 4630 +mwftrmaria 0 2 6.263398 0.000000 4631 +yuin 0 2 6.263398 0.000000 4632 +mwfmwrecommend 0 2 6.263398 0.000000 4633 +nitti 0 2 6.263398 0.000000 4634 +gritti 0 2 6.263398 0.000000 4635 +superpaintassign 0 2 6.263398 0.000000 4636 +excellast 0 2 6.263398 0.000000 4637 +jonbodn 0 2 6.263398 0.000000 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..d33c6892 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +lectur 1 135 1.945910 1.945910 73 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +part 0 98 2.302585 0.000000 129 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +info 0 85 2.484907 0.000000 176 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +issu 0 78 2.564949 0.000000 211 +tuesdai 0 73 2.639057 0.000000 219 +intellig 0 72 2.639057 0.000000 225 +addit 0 74 2.639057 0.000000 228 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +point 0 58 2.890372 0.000000 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +electron 0 47 3.091042 0.000000 379 +discuss 0 45 3.135494 0.000000 399 +netscap 0 44 3.135494 0.000000 395 +term 0 43 3.178054 0.000000 411 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +word 0 34 3.401197 0.000000 508 +eduoffic 0 33 3.433987 0.000000 531 +taught 0 33 3.433987 0.000000 526 +storag 0 31 3.496508 0.000000 553 +compon 0 30 3.555348 0.000000 570 +held 0 28 3.610918 0.000000 600 +background 0 25 3.737670 0.000000 664 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +newsgroup 0 21 3.912023 0.000000 783 +expert 0 20 3.951244 0.000000 833 +qualiti 0 20 3.951244 0.000000 832 +excel 0 19 4.007333 0.000000 868 +macintosh 0 17 4.110874 0.000000 920 +regular 0 17 4.110874 0.000000 929 +devic 0 16 4.174387 0.000000 1002 +draw 0 14 4.317488 0.000000 1086 +quizz 0 13 4.382027 0.000000 1151 +social 0 13 4.382027 0.000000 1123 +necessari 0 13 4.382027 0.000000 1147 +skill 0 12 4.465908 0.000000 1205 +desktop 0 10 4.653960 0.000000 1445 +rich 0 10 4.653960 0.000000 1396 +telecommun 0 9 4.753590 0.000000 1565 +zero 0 7 5.010635 0.000000 1896 +shot 0 7 5.010635 0.000000 1898 +necessarili 0 7 5.010635 0.000000 1899 +thegoal 0 6 5.164786 0.000000 2033 +lloyd 0 6 5.164786 0.000000 2103 +paint 0 5 5.347108 0.000000 2400 +bodner 0 5 5.347108 0.000000 2401 +chart 0 4 5.568345 0.000000 2653 +assignmentsand 0 4 5.568345 0.000000 2760 +glanc 0 4 5.568345 0.000000 2652 +salli 0 3 5.857933 0.000000 3432 +facstaff 0 3 5.857933 0.000000 3433 +drag 0 3 5.857933 0.000000 3434 +crack 0 3 5.857933 0.000000 3435 +macintoshcomput 0 3 5.857933 0.000000 3228 +iici 0 3 5.857933 0.000000 3436 +scanner 0 3 5.857933 0.000000 3437 +leavi 0 3 5.857933 0.000000 3438 +sharenow 0 3 5.857933 0.000000 3439 +swander 0 3 5.857933 0.000000 3440 +thayer 0 3 5.857933 0.000000 3441 +varghes 0 3 5.857933 0.000000 3442 +weinberg 0 3 5.857933 0.000000 3443 +spreadsheet 0 2 6.263398 0.000000 4598 +aldu 0 2 6.263398 0.000000 4599 +computersinstructor 0 2 6.263398 0.000000 4600 +petersonoffic 0 2 6.263398 0.000000 4601 +sciencephon 0 2 6.263398 0.000000 4602 +slpeter 0 2 6.263398 0.000000 4603 +appointmentvit 0 2 6.263398 0.000000 4604 +halllectur 0 2 6.263398 0.000000 4605 +laudon 0 2 6.263398 0.000000 4606 +traver 0 2 6.263398 0.000000 4607 +laudonlab 0 2 6.263398 0.000000 4608 +petersoncours 0 2 6.263398 0.000000 4609 +computersto 0 2 6.263398 0.000000 4610 +throughcolleg 0 2 6.263398 0.000000 4611 +arena 0 2 6.263398 0.000000 4612 +csuse 0 2 6.263398 0.000000 4613 +experienceon 0 2 6.263398 0.000000 4614 +eudora 0 2 6.263398 0.000000 4615 +superpaint 0 2 6.263398 0.000000 4616 +filemak 0 2 6.263398 0.000000 4617 +hypercard 0 2 6.263398 0.000000 4618 +pagemak 0 2 6.263398 0.000000 4619 +educationalexperi 0 2 6.263398 0.000000 4620 +namesectiontimedai 0 2 6.263398 0.000000 4621 +mwnick 0 2 6.263398 0.000000 4622 +mwtrshannon 0 2 6.263398 0.000000 4623 +trtrjeff 0 2 6.263398 0.000000 4624 +reminga 0 2 6.263398 0.000000 4625 +mwfmwira 0 2 6.263398 0.000000 4626 +trtrbrian 0 2 6.263398 0.000000 4627 +mwfmwfbrad 0 2 6.263398 0.000000 4628 +mwfmwfjoe 0 2 6.263398 0.000000 4629 +trtrgeoff 0 2 6.263398 0.000000 4630 +mwftrmaria 0 2 6.263398 0.000000 4631 +yuin 0 2 6.263398 0.000000 4632 +mwfmwrecommend 0 2 6.263398 0.000000 4633 +nitti 0 2 6.263398 0.000000 4634 +gritti 0 2 6.263398 0.000000 4635 +superpaintassign 0 2 6.263398 0.000000 4636 +excellast 0 2 6.263398 0.000000 4637 +jonbodn 0 2 6.263398 0.000000 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..6b61d48f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +introduct 0 126 2.079442 0.000000 87 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +section 1 94 2.397895 2.397895 149 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +window 0 68 2.708050 0.000000 242 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +creat 0 63 2.772589 0.000000 277 +overview 0 56 2.890372 0.000000 323 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +algebra 0 45 3.135494 0.000000 394 +netscap 0 44 3.135494 0.000000 395 +offer 0 43 3.178054 0.000000 414 +microsoft 0 38 3.295837 0.000000 468 +ask 0 28 3.610918 0.000000 597 +jeff 0 25 3.737670 0.000000 673 +consult 0 24 3.761200 0.000000 687 +feedback 0 19 4.007333 0.000000 854 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +coordin 0 13 4.382027 0.000000 1182 +savitch 0 12 4.465908 0.000000 1269 +mainli 0 10 4.653960 0.000000 1432 +hint 0 10 4.653960 0.000000 1419 +tutor 0 9 4.753590 0.000000 1552 +pagecomput 0 7 5.010635 0.000000 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 0 5 5.347108 0.000000 2398 +complaint 0 4 5.568345 0.000000 2795 +microcomput 0 3 5.857933 0.000000 3444 +instructorsw 0 2 6.263398 0.000000 4639 +csinform 0 2 6.263398 0.000000 4640 +subdirectoriesc 0 2 6.263398 0.000000 4641 +environmentfortran 0 2 6.263398 0.000000 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..431141b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +languag 1 227 1.386294 1.386294 26 +algorithm 0 162 1.791759 0.000000 57 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +high 0 130 2.079442 0.000000 101 +mathemat 1 108 2.197225 2.197225 123 +instructor 0 108 2.197225 0.000000 107 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +school 0 84 2.484907 0.000000 188 +orient 0 80 2.564949 0.000000 205 +solv 0 73 2.639057 0.000000 234 +logic 0 71 2.639057 0.000000 230 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +instruct 0 53 2.944439 0.000000 332 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +least 0 35 3.401197 0.000000 516 +survei 0 35 3.401197 0.000000 513 +statist 0 35 3.401197 0.000000 521 +prepar 0 20 3.951244 0.000000 824 +fortran 0 15 4.248495 0.000000 1027 +pascal 0 12 4.465908 0.000000 1213 +consent 0 5 5.347108 0.000000 2389 +prereq 0 3 5.857933 0.000000 3178 +infocours 0 2 6.263398 0.000000 4212 +guidebook 0 2 6.263398 0.000000 4643 +cscours 0 1 6.957497 0.000000 8607 +descriptionfrom 0 1 6.957497 0.000000 8608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..6b61d48f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +introduct 0 126 2.079442 0.000000 87 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +section 1 94 2.397895 2.397895 149 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +window 0 68 2.708050 0.000000 242 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +creat 0 63 2.772589 0.000000 277 +overview 0 56 2.890372 0.000000 323 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +algebra 0 45 3.135494 0.000000 394 +netscap 0 44 3.135494 0.000000 395 +offer 0 43 3.178054 0.000000 414 +microsoft 0 38 3.295837 0.000000 468 +ask 0 28 3.610918 0.000000 597 +jeff 0 25 3.737670 0.000000 673 +consult 0 24 3.761200 0.000000 687 +feedback 0 19 4.007333 0.000000 854 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +coordin 0 13 4.382027 0.000000 1182 +savitch 0 12 4.465908 0.000000 1269 +mainli 0 10 4.653960 0.000000 1432 +hint 0 10 4.653960 0.000000 1419 +tutor 0 9 4.753590 0.000000 1552 +pagecomput 0 7 5.010635 0.000000 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 0 5 5.347108 0.000000 2398 +complaint 0 4 5.568345 0.000000 2795 +microcomput 0 3 5.857933 0.000000 3444 +instructorsw 0 2 6.263398 0.000000 4639 +csinform 0 2 6.263398 0.000000 4640 +subdirectoriesc 0 2 6.263398 0.000000 4641 +environmentfortran 0 2 6.263398 0.000000 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..f0987d51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +copi 0 63 2.772589 0.000000 284 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +direct 0 57 2.890372 0.000000 316 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +examin 0 42 3.218876 0.000000 424 +past 0 42 3.218876 0.000000 428 +obtain 0 33 3.433987 0.000000 534 +lab 0 24 3.761200 0.000000 698 +viewer 0 21 3.912023 0.000000 787 +explan 0 16 4.174387 0.000000 985 +readm 0 8 4.875197 0.000000 1699 +ghost 0 2 6.263398 0.000000 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..f0987d51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +copi 0 63 2.772589 0.000000 284 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +direct 0 57 2.890372 0.000000 316 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +examin 0 42 3.218876 0.000000 424 +past 0 42 3.218876 0.000000 428 +obtain 0 33 3.433987 0.000000 534 +lab 0 24 3.761200 0.000000 698 +viewer 0 21 3.912023 0.000000 787 +explan 0 16 4.174387 0.000000 985 +readm 0 8 4.875197 0.000000 1699 +ghost 0 2 6.263398 0.000000 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..7220f229 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +book 0 99 2.302585 0.000000 131 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +question 0 91 2.397895 0.000000 141 +graphic 0 90 2.397895 0.000000 147 +exam 0 86 2.484907 0.000000 169 +novemb 0 81 2.484907 0.000000 179 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +interfac 0 79 2.564949 0.000000 209 +html 0 75 2.639057 0.000000 235 +simul 0 66 2.708050 0.000000 255 +syllabu 0 67 2.708050 0.000000 247 +septemb 0 65 2.772589 0.000000 274 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +case 0 51 2.995732 0.000000 351 +frequent 0 49 3.044522 0.000000 367 +format 0 48 3.044522 0.000000 356 +featur 0 46 3.091042 0.000000 386 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +midterm 0 45 3.135494 0.000000 392 +programm 0 39 3.258097 0.000000 445 +probabl 0 40 3.258097 0.000000 455 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +manual 0 35 3.401197 0.000000 504 +represent 0 35 3.401197 0.000000 512 +chapter 0 32 3.465736 0.000000 536 +ask 0 28 3.610918 0.000000 597 +except 0 28 3.610918 0.000000 607 +revis 0 26 3.688879 0.000000 640 +miller 0 17 4.110874 0.000000 949 +regist 0 17 4.110874 0.000000 938 +quiz 0 16 4.174387 0.000000 990 +psycholog 0 15 4.248495 0.000000 1054 +quizz 0 13 4.382027 0.000000 1151 +calcul 0 12 4.465908 0.000000 1268 +assembl 0 12 4.465908 0.000000 1207 +arithmet 0 10 4.653960 0.000000 1388 +tuth 0 9 4.753590 0.000000 1519 +float 0 9 4.753590 0.000000 1504 +integ 0 8 4.875197 0.000000 1688 +lookup 0 5 5.347108 0.000000 2399 +karen 0 4 5.568345 0.000000 2796 +cancel 0 4 5.568345 0.000000 2746 +jerri 0 3 5.857933 0.000000 3445 +suen 0 3 5.857933 0.000000 3446 +asgarian 0 3 5.857933 0.000000 3447 +architecur 0 3 5.857933 0.000000 3448 +tusch 0 2 6.263398 0.000000 4645 +tutsch 0 2 6.263398 0.000000 4646 +execpc 0 2 6.263398 0.000000 4647 +nolandsect 0 2 6.263398 0.000000 4648 +smoler 0 2 6.263398 0.000000 4649 +sunlung 0 2 6.263398 0.000000 4650 +ssuen 0 2 6.263398 0.000000 4651 +edusridevi 0 2 6.263398 0.000000 4652 +bhamidipati 0 2 6.263398 0.000000 4653 +bsri 0 2 6.263398 0.000000 4654 +edumohammad 0 2 6.263398 0.000000 4655 +programs 0 2 6.263398 0.000000 4656 +examsal 0 2 6.263398 0.000000 4657 +noteskaren 0 2 6.263398 0.000000 4658 +updatedmondai 0 2 6.263398 0.000000 4659 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..24afa453 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +structur 0 106 2.197225 0.000000 105 +book 0 99 2.302585 0.000000 131 +advanc 0 99 2.302585 0.000000 130 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +question 0 91 2.397895 0.000000 141 +graphic 0 90 2.397895 0.000000 147 +exam 0 86 2.484907 0.000000 169 +novemb 0 81 2.484907 0.000000 179 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +homework 0 79 2.564949 0.000000 193 +decemb 0 80 2.564949 0.000000 215 +mondai 0 77 2.564949 0.000000 206 +interfac 0 79 2.564949 0.000000 209 +html 0 75 2.639057 0.000000 235 +simul 0 66 2.708050 0.000000 255 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +septemb 0 65 2.772589 0.000000 274 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +case 0 51 2.995732 0.000000 351 +frequent 0 49 3.044522 0.000000 367 +format 0 48 3.044522 0.000000 356 +featur 0 46 3.091042 0.000000 386 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +probabl 0 40 3.258097 0.000000 455 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +manual 0 35 3.401197 0.000000 504 +represent 0 35 3.401197 0.000000 512 +chapter 0 32 3.465736 0.000000 536 +option 0 30 3.555348 0.000000 575 +ask 0 28 3.610918 0.000000 597 +except 0 28 3.610918 0.000000 607 +revis 0 26 3.688879 0.000000 640 +miller 0 17 4.110874 0.000000 949 +regist 0 17 4.110874 0.000000 938 +quiz 0 16 4.174387 0.000000 990 +sign 0 16 4.174387 0.000000 970 +psycholog 0 15 4.248495 0.000000 1054 +quizz 0 13 4.382027 0.000000 1151 +calcul 0 12 4.465908 0.000000 1268 +assembl 0 12 4.465908 0.000000 1207 +arithmet 0 10 4.653960 0.000000 1388 +tuth 0 9 4.753590 0.000000 1519 +float 0 9 4.753590 0.000000 1504 +rais 0 8 4.875197 0.000000 1711 +integ 0 8 4.875197 0.000000 1688 +difficult 0 6 5.164786 0.000000 2035 +lookup 0 5 5.347108 0.000000 2399 +karen 0 4 5.568345 0.000000 2796 +cancel 0 4 5.568345 0.000000 2746 +jerri 0 3 5.857933 0.000000 3445 +suen 0 3 5.857933 0.000000 3446 +asgarian 0 3 5.857933 0.000000 3447 +architecur 0 3 5.857933 0.000000 3448 +tusch 0 2 6.263398 0.000000 4645 +tutsch 0 2 6.263398 0.000000 4646 +execpc 0 2 6.263398 0.000000 4647 +nolandsect 0 2 6.263398 0.000000 4648 +smoler 0 2 6.263398 0.000000 4649 +sunlung 0 2 6.263398 0.000000 4650 +ssuen 0 2 6.263398 0.000000 4651 +edusridevi 0 2 6.263398 0.000000 4652 +bhamidipati 0 2 6.263398 0.000000 4653 +bsri 0 2 6.263398 0.000000 4654 +edumohammad 0 2 6.263398 0.000000 4655 +programs 0 2 6.263398 0.000000 4656 +examsal 0 2 6.263398 0.000000 4657 +noteskaren 0 2 6.263398 0.000000 4658 +updatedmondai 0 2 6.263398 0.000000 4659 +programa 0 1 6.957497 0.000000 8609 +programb 0 1 6.957497 0.000000 8610 +cumul 0 1 6.957497 0.000000 8611 +desperateto 0 1 6.957497 0.000000 8612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..d8bbc3b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +code 0 108 2.197225 0.000000 116 +place 0 106 2.197225 0.000000 124 +structur 0 106 2.197225 0.000000 105 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +need 0 98 2.302585 0.000000 135 +memori 0 101 2.302585 0.000000 139 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +dynam 0 76 2.564949 0.000000 194 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +simul 0 66 2.708050 0.000000 255 +copi 0 63 2.772589 0.000000 284 +creat 0 63 2.772589 0.000000 277 +wednesdai 0 64 2.772589 0.000000 261 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +sampl 0 53 2.944439 0.000000 339 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +get 0 46 3.091042 0.000000 380 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +discuss 0 45 3.135494 0.000000 399 +futur 0 41 3.218876 0.000000 427 +cach 0 41 3.218876 0.000000 432 +announc 0 40 3.258097 0.000000 441 +error 0 40 3.258097 0.000000 449 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +tree 0 36 3.367296 0.000000 492 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +chapter 0 32 3.465736 0.000000 536 +common 0 30 3.555348 0.000000 574 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +becom 0 28 3.610918 0.000000 603 +lab 0 24 3.761200 0.000000 698 +sort 0 22 3.850148 0.000000 738 +binari 0 20 3.951244 0.000000 823 +reserv 0 20 3.951244 0.000000 808 +alloc 0 20 3.951244 0.000000 821 +thur 0 19 4.007333 0.000000 847 +comparison 0 19 4.007333 0.000000 863 +attend 0 18 4.060443 0.000000 893 +otherwis 0 17 4.110874 0.000000 922 +balanc 0 14 4.317488 0.000000 1112 +recurs 0 13 4.382027 0.000000 1127 +magic 0 11 4.553877 0.000000 1358 +queue 0 10 4.653960 0.000000 1386 +stack 0 10 4.653960 0.000000 1389 +wendt 0 10 4.653960 0.000000 1446 +cheng 0 10 4.653960 0.000000 1381 +kurt 0 9 4.753590 0.000000 1548 +unusu 0 9 4.753590 0.000000 1566 +forget 0 8 4.875197 0.000000 1712 +reload 0 8 4.875197 0.000000 1682 +hash 0 8 4.875197 0.000000 1618 +skrentni 0 6 5.164786 0.000000 2104 +skip 0 5 5.347108 0.000000 2402 +handin 0 5 5.347108 0.000000 2393 +overload 0 5 5.347108 0.000000 2403 +billi 0 5 5.347108 0.000000 2404 +outdat 0 4 5.568345 0.000000 2797 +appendix 0 4 5.568345 0.000000 2739 +makeup 0 3 5.857933 0.000000 3449 +vega 0 3 5.857933 0.000000 3450 +stale 0 2 6.263398 0.000000 4660 +lec 0 2 6.263398 0.000000 4661 +structureslectur 0 2 6.263398 0.000000 4662 +psychologylectur 0 2 6.263398 0.000000 4663 +psychologycours 0 2 6.263398 0.000000 4664 +baicheng 0 2 6.263398 0.000000 4665 +liao 0 2 6.263398 0.000000 4666 +bail 0 2 6.263398 0.000000 4667 +jiacheng 0 2 6.263398 0.000000 4668 +pmcopyright 0 2 6.263398 0.000000 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..a30ef799 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,549 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +phone 0 175 1.791759 0.000000 45 +base 0 165 1.791759 0.000000 50 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +first 0 140 1.945910 0.000000 71 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +dayton 0 119 2.079442 0.000000 104 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +welcom 0 122 2.079442 0.000000 99 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +topic 0 114 2.197225 0.000000 110 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +book 0 99 2.302585 0.000000 131 +user 0 104 2.302585 0.000000 137 +peopl 0 96 2.302585 0.000000 132 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +exam 0 86 2.484907 0.000000 169 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +contain 0 81 2.484907 0.000000 174 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +want 0 79 2.564949 0.000000 199 +complet 0 77 2.564949 0.000000 208 +know 0 80 2.564949 0.000000 198 +sourc 0 77 2.564949 0.000000 201 +decemb 0 80 2.564949 0.000000 215 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +appear 0 78 2.564949 0.000000 210 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +name 0 72 2.639057 0.000000 220 +materi 0 75 2.639057 0.000000 221 +tuesdai 0 73 2.639057 0.000000 219 +effici 0 73 2.639057 0.000000 233 +line 0 75 2.639057 0.000000 231 +write 0 72 2.639057 0.000000 222 +addit 0 74 2.639057 0.000000 228 +html 0 75 2.639057 0.000000 235 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +appli 0 71 2.639057 0.000000 226 +would 0 67 2.708050 0.000000 251 +test 0 66 2.708050 0.000000 252 +main 0 67 2.708050 0.000000 256 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +function 0 62 2.772589 0.000000 275 +copi 0 63 2.772589 0.000000 284 +experi 0 64 2.772589 0.000000 283 +descript 0 64 2.772589 0.000000 271 +new 0 64 2.772589 0.000000 262 +polici 0 64 2.772589 0.000000 279 +abstract 0 62 2.772589 0.000000 276 +written 0 63 2.772589 0.000000 278 +street 0 63 2.772589 0.000000 293 +creat 0 63 2.772589 0.000000 277 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +room 0 59 2.833213 0.000000 301 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +best 0 59 2.833213 0.000000 299 +type 0 61 2.833213 0.000000 296 +unix 0 58 2.890372 0.000000 308 +semest 0 58 2.890372 0.000000 312 +major 0 56 2.890372 0.000000 315 +faculti 0 56 2.890372 0.000000 325 +detail 0 57 2.890372 0.000000 321 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +cover 0 55 2.944439 0.000000 329 +suggest 0 53 2.944439 0.000000 331 +sampl 0 53 2.944439 0.000000 339 +undergradu 0 54 2.944439 0.000000 338 +talk 0 53 2.944439 0.000000 336 +local 0 55 2.944439 0.000000 334 +instruct 0 53 2.944439 0.000000 332 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +much 0 52 2.995732 0.000000 349 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +appoint 0 49 3.044522 0.000000 358 +basic 0 50 3.044522 0.000000 360 +right 0 48 3.044522 0.000000 363 +numer 0 49 3.044522 0.000000 369 +format 0 48 3.044522 0.000000 356 +possibl 0 47 3.091042 0.000000 378 +done 0 47 3.091042 0.000000 381 +understand 0 47 3.091042 0.000000 384 +electron 0 47 3.091042 0.000000 379 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +describ 0 45 3.135494 0.000000 400 +midterm 0 45 3.135494 0.000000 392 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +long 0 43 3.178054 0.000000 413 +show 0 43 3.178054 0.000000 417 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +howev 0 41 3.218876 0.000000 422 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +continu 0 39 3.258097 0.000000 448 +error 0 40 3.258097 0.000000 449 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +tutori 0 39 3.258097 0.000000 437 +correct 0 38 3.295837 0.000000 462 +open 0 38 3.295837 0.000000 469 +mean 0 37 3.332205 0.000000 477 +field 0 37 3.332205 0.000000 482 +respons 0 37 3.332205 0.000000 476 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +download 0 36 3.367296 0.000000 489 +soon 0 36 3.367296 0.000000 494 +print 0 34 3.401197 0.000000 503 +statist 0 35 3.401197 0.000000 521 +either 0 35 3.401197 0.000000 506 +approxim 0 35 3.401197 0.000000 509 +everi 0 34 3.401197 0.000000 519 +singl 0 34 3.401197 0.000000 510 +next 0 34 3.401197 0.000000 517 +product 0 33 3.433987 0.000000 527 +within 0 33 3.433987 0.000000 525 +go 0 33 3.433987 0.000000 529 +express 0 32 3.465736 0.000000 540 +ad 0 32 3.465736 0.000000 544 +given 0 32 3.465736 0.000000 538 +often 0 31 3.496508 0.000000 551 +posit 0 31 3.496508 0.000000 552 +computersci 0 30 3.555348 0.000000 562 +abl 0 30 3.555348 0.000000 566 +hard 0 30 3.555348 0.000000 563 +turn 0 29 3.583519 0.000000 586 +limit 0 29 3.583519 0.000000 585 +actual 0 28 3.610918 0.000000 604 +load 0 28 3.610918 0.000000 601 +except 0 28 3.610918 0.000000 607 +intend 0 28 3.610918 0.000000 599 +full 0 28 3.610918 0.000000 615 +becom 0 28 3.610918 0.000000 603 +held 0 28 3.610918 0.000000 600 +manipul 0 27 3.637586 0.000000 624 +determin 0 27 3.637586 0.000000 630 +administr 0 27 3.637586 0.000000 628 +quit 0 27 3.637586 0.000000 633 +consist 0 26 3.688879 0.000000 651 +subject 0 26 3.688879 0.000000 647 +bound 0 26 3.688879 0.000000 659 +comp 0 26 3.688879 0.000000 650 +session 0 26 3.688879 0.000000 643 +valu 0 25 3.737670 0.000000 665 +reliabl 0 25 3.737670 0.000000 674 +never 0 25 3.737670 0.000000 671 +aspect 0 25 3.737670 0.000000 663 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +handl 0 24 3.761200 0.000000 685 +sometim 0 24 3.761200 0.000000 696 +wish 0 24 3.761200 0.000000 692 +variabl 0 23 3.806662 0.000000 715 +begin 0 23 3.806662 0.000000 716 +size 0 23 3.806662 0.000000 713 +input 0 23 3.806662 0.000000 727 +identifi 0 22 3.850148 0.000000 760 +period 0 22 3.850148 0.000000 743 +recommend 0 22 3.850148 0.000000 737 +almost 0 22 3.850148 0.000000 742 +sort 0 22 3.850148 0.000000 738 +inth 0 22 3.850148 0.000000 741 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +tell 0 21 3.912023 0.000000 777 +output 0 21 3.912023 0.000000 788 +binari 0 20 3.951244 0.000000 823 +sure 0 20 3.951244 0.000000 813 +prepar 0 20 3.951244 0.000000 824 +grad 0 20 3.951244 0.000000 837 +minut 0 20 3.951244 0.000000 810 +qualiti 0 20 3.951244 0.000000 832 +scheme 0 20 3.951244 0.000000 818 +break 0 20 3.951244 0.000000 812 +assum 0 19 4.007333 0.000000 845 +item 0 19 4.007333 0.000000 856 +ever 0 19 4.007333 0.000000 872 +separ 0 19 4.007333 0.000000 844 +exercis 0 19 4.007333 0.000000 842 +log 0 19 4.007333 0.000000 857 +five 0 19 4.007333 0.000000 841 +particularli 0 19 4.007333 0.000000 867 +account 0 18 4.060443 0.000000 882 +encourag 0 18 4.060443 0.000000 880 +record 0 18 4.060443 0.000000 890 +accept 0 18 4.060443 0.000000 879 +along 0 18 4.060443 0.000000 878 +behavior 0 18 4.060443 0.000000 881 +four 0 18 4.060443 0.000000 905 +element 0 18 4.060443 0.000000 895 +attend 0 18 4.060443 0.000000 893 +debug 0 17 4.110874 0.000000 944 +segment 0 17 4.110874 0.000000 931 +outlin 0 17 4.110874 0.000000 914 +macintosh 0 17 4.110874 0.000000 920 +women 0 16 4.174387 0.000000 1004 +earli 0 16 4.174387 0.000000 968 +normal 0 16 4.174387 0.000000 995 +easi 0 16 4.174387 0.000000 969 +style 0 15 4.248495 0.000000 1036 +later 0 15 4.248495 0.000000 1043 +purchas 0 15 4.248495 0.000000 1030 +capabl 0 15 4.248495 0.000000 1016 +doit 0 14 4.317488 0.000000 1111 +near 0 14 4.317488 0.000000 1091 +incomput 0 14 4.317488 0.000000 1096 +consider 0 14 4.317488 0.000000 1076 +floor 0 14 4.317488 0.000000 1070 +manner 0 14 4.317488 0.000000 1074 +easili 0 14 4.317488 0.000000 1077 +convent 0 14 4.317488 0.000000 1072 +necessari 0 13 4.382027 0.000000 1147 +menu 0 13 4.382027 0.000000 1156 +someon 0 13 4.382027 0.000000 1128 +wait 0 13 4.382027 0.000000 1168 +incorpor 0 13 4.382027 0.000000 1163 +dewitt 0 12 4.465908 0.000000 1270 +readi 0 12 4.465908 0.000000 1242 +pascal 0 12 4.465908 0.000000 1213 +grow 0 12 4.465908 0.000000 1209 +onth 0 12 4.465908 0.000000 1218 +weight 0 12 4.465908 0.000000 1204 +count 0 12 4.465908 0.000000 1239 +robust 0 12 4.465908 0.000000 1271 +reader 0 12 4.465908 0.000000 1246 +cycl 0 11 4.553877 0.000000 1335 +extra 0 11 4.553877 0.000000 1312 +statement 0 11 4.553877 0.000000 1313 +extrem 0 11 4.553877 0.000000 1330 +arbitrari 0 11 4.553877 0.000000 1359 +loop 0 11 4.553877 0.000000 1310 +typic 0 11 4.553877 0.000000 1360 +summar 0 11 4.553877 0.000000 1295 +submiss 0 11 4.553877 0.000000 1298 +true 0 10 4.653960 0.000000 1422 +cheat 0 10 4.653960 0.000000 1395 +modular 0 10 4.653960 0.000000 1392 +equal 0 10 4.653960 0.000000 1424 +hint 0 10 4.653960 0.000000 1419 +awai 0 10 4.653960 0.000000 1447 +certain 0 10 4.653960 0.000000 1393 +thecomput 0 10 4.653960 0.000000 1408 +label 0 10 4.653960 0.000000 1423 +strongli 0 10 4.653960 0.000000 1406 +wall 0 9 4.753590 0.000000 1553 +notat 0 9 4.753590 0.000000 1489 +assumpt 0 9 4.753590 0.000000 1514 +declar 0 9 4.753590 0.000000 1526 +end 0 9 4.753590 0.000000 1567 +frank 0 9 4.753590 0.000000 1568 +mention 0 9 4.753590 0.000000 1569 +andth 0 9 4.753590 0.000000 1481 +familiar 0 9 4.753590 0.000000 1485 +equival 0 9 4.753590 0.000000 1496 +prefer 0 9 4.753590 0.000000 1491 +criteria 0 9 4.753590 0.000000 1477 +correctli 0 9 4.753590 0.000000 1478 +informationabout 0 9 4.753590 0.000000 1515 +unusu 0 9 4.753590 0.000000 1566 +clear 0 9 4.753590 0.000000 1488 +pick 0 9 4.753590 0.000000 1498 +yanni 0 8 4.875197 0.000000 1713 +simpli 0 8 4.875197 0.000000 1626 +ioannidi 0 8 4.875197 0.000000 1714 +paramet 0 7 5.010635 0.000000 1796 +explain 0 7 5.010635 0.000000 1816 +header 0 7 5.010635 0.000000 1787 +isbn 0 7 5.010635 0.000000 1901 +exactli 0 7 5.010635 0.000000 1817 +pursu 0 7 5.010635 0.000000 1902 +whenev 0 7 5.010635 0.000000 1883 +bug 0 7 5.010635 0.000000 1801 +extern 0 6 5.164786 0.000000 2105 +mirror 0 6 5.164786 0.000000 2028 +sciencesoffic 0 6 5.164786 0.000000 2101 +notifi 0 6 5.164786 0.000000 2106 +wrong 0 6 5.164786 0.000000 2025 +approv 0 6 5.164786 0.000000 2078 +troubl 0 6 5.164786 0.000000 2002 +desk 0 5 5.347108 0.000000 2297 +situat 0 5 5.347108 0.000000 2365 +supplement 0 5 5.347108 0.000000 2355 +chemistri 0 5 5.347108 0.000000 2405 +sparcstat 0 5 5.347108 0.000000 2406 +caus 0 5 5.347108 0.000000 2298 +respond 0 5 5.347108 0.000000 2354 +blow 0 5 5.347108 0.000000 2407 +skip 0 5 5.347108 0.000000 2402 +thiscours 0 4 5.568345 0.000000 2601 +expens 0 4 5.568345 0.000000 2678 +repeat 0 4 5.568345 0.000000 2798 +suppli 0 4 5.568345 0.000000 2611 +tire 0 4 5.568345 0.000000 2799 +thec 0 3 5.857933 0.000000 3132 +neg 0 3 5.857933 0.000000 3451 +ghostview 0 3 5.857933 0.000000 3163 +eduand 0 3 5.857933 0.000000 3452 +tremend 0 3 5.857933 0.000000 3453 +narr 0 3 5.857933 0.000000 3454 +gradingther 0 3 5.857933 0.000000 3455 +programmingassign 0 3 5.857933 0.000000 3398 +thesear 0 3 5.857933 0.000000 3456 +duedat 0 3 5.857933 0.000000 3105 +helpif 0 3 5.857933 0.000000 3126 +confus 0 3 5.857933 0.000000 3144 +ineffici 0 3 5.857933 0.000000 3457 +useof 0 3 5.857933 0.000000 3368 +meaning 0 3 5.857933 0.000000 3458 +argument 0 3 5.857933 0.000000 3120 +briefli 0 3 5.857933 0.000000 3459 +urg 0 3 5.857933 0.000000 3212 +comfort 0 3 5.857933 0.000000 3136 +pain 0 3 5.857933 0.000000 3460 +clariti 0 2 6.263398 0.000000 4413 +behav 0 2 6.263398 0.000000 4670 +indent 0 2 6.263398 0.000000 4374 +amoffic 0 2 6.263398 0.000000 4671 +femal 0 2 6.263398 0.000000 4672 +wic 0 2 6.263398 0.000000 4673 +oneof 0 2 6.263398 0.000000 4674 +tomak 0 2 6.263398 0.000000 4675 +startup 0 2 6.263398 0.000000 4676 +textth 0 2 6.263398 0.000000 4677 +carrano 0 2 6.263398 0.000000 4678 +lecturenot 0 2 6.263398 0.000000 4679 +notesar 0 2 6.263398 0.000000 4559 +invalu 0 2 6.263398 0.000000 4680 +nonetheless 0 2 6.263398 0.000000 4681 +thatyou 0 2 6.263398 0.000000 4682 +youwork 0 2 6.263398 0.000000 4083 +provis 0 2 6.263398 0.000000 4683 +excus 0 2 6.263398 0.000000 4684 +datastructur 0 2 6.263398 0.000000 4685 +tovisit 0 2 6.263398 0.000000 4686 +andlog 0 2 6.263398 0.000000 4104 +facet 0 2 6.263398 0.000000 4687 +unnecessarili 0 2 6.263398 0.000000 4688 +liter 0 2 6.263398 0.000000 4689 +convei 0 2 6.263398 0.000000 4690 +beavoid 0 2 6.263398 0.000000 4411 +thefirst 0 2 6.263398 0.000000 4092 +outputfil 0 1 6.957497 0.000000 8613 +suzan 0 1 6.957497 0.000000 8614 +inputfil 0 1 6.957497 0.000000 8615 +structuresfal 0 1 6.957497 0.000000 8616 +htmlinstructor 0 1 6.957497 0.000000 8617 +newsassign 0 1 6.957497 0.000000 8618 +statisticssom 0 1 6.957497 0.000000 8619 +median 0 1 6.957497 0.000000 8620 +midterma 0 1 6.957497 0.000000 8621 +oldmidterm 0 1 6.957497 0.000000 8622 +ownmidterm 0 1 6.957497 0.000000 8623 +searchth 0 1 6.957497 0.000000 8624 +filemenu 0 1 6.957497 0.000000 8625 +andchoos 0 1 6.957497 0.000000 8626 +sciencesom 0 1 6.957497 0.000000 8627 +haveform 0 1 6.957497 0.000000 8628 +becomecomput 0 1 6.957497 0.000000 8629 +thisclass 0 1 6.957497 0.000000 8630 +withtheir 0 1 6.957497 0.000000 8631 +classwork 0 1 6.957497 0.000000 8632 +stodder 0 1 6.957497 0.000000 8633 +theodd 0 1 6.957497 0.000000 8634 +statementi 0 1 6.957497 0.000000 8635 +aniniti 0 1 6.957497 0.000000 8636 +exceptionsy 0 1 6.957497 0.000000 8637 +isdata 0 1 6.957497 0.000000 8638 +notnecessari 0 1 6.957497 0.000000 8639 +isveri 0 1 6.957497 0.000000 8640 +whichar 0 1 6.957497 0.000000 8641 +entranceof 0 1 6.957497 0.000000 8642 +needsom 0 1 6.957497 0.000000 8643 +handoutc 0 1 6.957497 0.000000 8644 +althoughi 0 1 6.957497 0.000000 8645 +courseof 0 1 6.957497 0.000000 8646 +apoint 0 1 6.957497 0.000000 8647 +prerequisitecours 0 1 6.957497 0.000000 8648 +certainrestrict 0 1 6.957497 0.000000 8649 +emailand 0 1 6.957497 0.000000 8650 +toyour 0 1 6.957497 0.000000 8651 +runwith 0 1 6.957497 0.000000 8652 +inassign 0 1 6.957497 0.000000 8653 +allelectron 0 1 6.957497 0.000000 8654 +policyno 0 1 6.957497 0.000000 8655 +coincid 0 1 6.957497 0.000000 8656 +oneach 0 1 6.957497 0.000000 8657 +thelast 0 1 6.957497 0.000000 8658 +cheatingth 0 1 6.957497 0.000000 8659 +linest 0 1 6.957497 0.000000 8660 +tocommun 0 1 6.957497 0.000000 8661 +butther 0 1 6.957497 0.000000 8662 +obei 0 1 6.957497 0.000000 8663 +policiesgovern 0 1 6.957497 0.000000 8664 +policiesif 0 1 6.957497 0.000000 8665 +currenthard 0 1 6.957497 0.000000 8666 +conceptsthat 0 1 6.957497 0.000000 8667 +emailsever 0 1 6.957497 0.000000 8668 +gradingprogram 0 1 6.957497 0.000000 8669 +typicalinput 0 1 6.957497 0.000000 8670 +projectspecif 0 1 6.957497 0.000000 8671 +shoulddemonstr 0 1 6.957497 0.000000 8672 +includingunusu 0 1 6.957497 0.000000 8673 +considerationof 0 1 6.957497 0.000000 8674 +orcomplex 0 1 6.957497 0.000000 8675 +definedconst 0 1 6.957497 0.000000 8676 +thosevalu 0 1 6.957497 0.000000 8677 +styleus 0 1 6.957497 0.000000 8678 +variable_nam 0 1 6.957497 0.000000 8679 +function_nam 0 1 6.957497 0.000000 8680 +const 0 1 6.957497 0.000000 8681 +defined_const 0 1 6.957497 0.000000 8682 +enum 0 1 6.957497 0.000000 8683 +enumtyp 0 1 6.957497 0.000000 8684 +classnam 0 1 6.957497 0.000000 8685 +notesfor 0 1 6.957497 0.000000 8686 +meaningfulli 0 1 6.957497 0.000000 8687 +documentationthi 0 1 6.957497 0.000000 8688 +yourprogram 0 1 6.957497 0.000000 8689 +someonewho 0 1 6.957497 0.000000 8690 +superfici 0 1 6.957497 0.000000 8691 +unawar 0 1 6.957497 0.000000 8692 +descriptionne 0 1 6.957497 0.000000 8693 +thensuffici 0 1 6.957497 0.000000 8694 +documentationther 0 1 6.957497 0.000000 8695 +structuresshould 0 1 6.957497 0.000000 8696 +membershould 0 1 6.957497 0.000000 8697 +sname 0 1 6.957497 0.000000 8698 +withoutmak 0 1 6.957497 0.000000 8699 +stackyou 0 1 6.957497 0.000000 8700 +tricki 0 1 6.957497 0.000000 8701 +opaqu 0 1 6.957497 0.000000 8702 +commentcan 0 1 6.957497 0.000000 8703 +clarifi 0 1 6.957497 0.000000 8704 +outlineof 0 1 6.957497 0.000000 8705 +vimani 0 1 6.957497 0.000000 8706 +becomecomfort 0 1 6.957497 0.000000 8707 +youronli 0 1 6.957497 0.000000 8708 +macpasc 0 1 6.957497 0.000000 8709 +withunix 0 1 6.957497 0.000000 8710 +wellspent 0 1 6.957497 0.000000 8711 +thefollow 0 1 6.957497 0.000000 8712 +tbayou 0 1 6.957497 0.000000 8713 +goto 0 1 6.957497 0.000000 8714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..db70e74b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +structur 0 106 2.197225 0.000000 105 +teach 0 108 2.197225 0.000000 112 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +assist 0 112 2.197225 0.000000 113 +well 0 109 2.197225 0.000000 121 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +advanc 0 99 2.302585 0.000000 130 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +question 0 91 2.397895 0.000000 141 +search 0 95 2.397895 0.000000 155 +pictur 0 89 2.397895 0.000000 160 +exam 0 86 2.484907 0.000000 169 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +second 0 81 2.484907 0.000000 166 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +complet 0 77 2.564949 0.000000 208 +want 0 79 2.564949 0.000000 199 +appear 0 78 2.564949 0.000000 210 +know 0 80 2.564949 0.000000 198 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +materi 0 75 2.639057 0.000000 221 +tuesdai 0 73 2.639057 0.000000 219 +write 0 72 2.639057 0.000000 222 +html 0 75 2.639057 0.000000 235 +line 0 75 2.639057 0.000000 231 +addit 0 74 2.639057 0.000000 228 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +thursdai 0 70 2.708050 0.000000 241 +abstract 0 62 2.772589 0.000000 276 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +street 0 63 2.772589 0.000000 293 +creat 0 63 2.772589 0.000000 277 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +function 0 62 2.772589 0.000000 275 +previou 0 62 2.772589 0.000000 290 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +detail 0 57 2.890372 0.000000 321 +unix 0 58 2.890372 0.000000 308 +semest 0 58 2.890372 0.000000 312 +index 0 56 2.890372 0.000000 309 +summer 0 56 2.890372 0.000000 311 +cover 0 55 2.944439 0.000000 329 +maintain 0 51 2.995732 0.000000 342 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +principl 0 48 3.044522 0.000000 357 +give 0 50 3.044522 0.000000 359 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +electron 0 47 3.091042 0.000000 379 +even 0 45 3.135494 0.000000 393 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +textbook 0 44 3.135494 0.000000 397 +anoth 0 45 3.135494 0.000000 408 +describ 0 45 3.135494 0.000000 400 +long 0 43 3.178054 0.000000 413 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +littl 0 39 3.258097 0.000000 454 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +close 0 38 3.295837 0.000000 465 +connect 0 37 3.332205 0.000000 485 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +short 0 36 3.367296 0.000000 499 +tree 0 36 3.367296 0.000000 492 +jame 0 35 3.401197 0.000000 507 +approxim 0 35 3.401197 0.000000 509 +taught 0 33 3.433987 0.000000 526 +concept 0 32 3.465736 0.000000 537 +given 0 32 3.465736 0.000000 538 +often 0 31 3.496508 0.000000 551 +photo 0 31 3.496508 0.000000 561 +abl 0 30 3.555348 0.000000 566 +produc 0 30 3.555348 0.000000 572 +turn 0 29 3.583519 0.000000 586 +determin 0 27 3.637586 0.000000 630 +manipul 0 27 3.637586 0.000000 624 +revis 0 26 3.688879 0.000000 640 +bound 0 26 3.688879 0.000000 659 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +background 0 25 3.737670 0.000000 664 +alwai 0 24 3.761200 0.000000 691 +size 0 23 3.806662 0.000000 713 +sequenc 0 23 3.806662 0.000000 734 +identifi 0 22 3.850148 0.000000 760 +emphasi 0 22 3.850148 0.000000 755 +recommend 0 22 3.850148 0.000000 737 +sort 0 22 3.850148 0.000000 738 +color 0 22 3.850148 0.000000 762 +basi 0 20 3.951244 0.000000 828 +binari 0 20 3.951244 0.000000 823 +tenni 0 20 3.951244 0.000000 838 +exercis 0 19 4.007333 0.000000 842 +separ 0 19 4.007333 0.000000 844 +log 0 19 4.007333 0.000000 857 +assum 0 19 4.007333 0.000000 845 +five 0 19 4.007333 0.000000 841 +appropri 0 18 4.060443 0.000000 883 +wind 0 18 4.060443 0.000000 908 +account 0 18 4.060443 0.000000 882 +attend 0 18 4.060443 0.000000 893 +record 0 18 4.060443 0.000000 890 +debug 0 17 4.110874 0.000000 944 +outlin 0 17 4.110874 0.000000 914 +white 0 17 4.110874 0.000000 951 +zhang 0 16 4.174387 0.000000 980 +portion 0 16 4.174387 0.000000 971 +purchas 0 15 4.248495 0.000000 1030 +psycholog 0 15 4.248495 0.000000 1054 +later 0 15 4.248495 0.000000 1043 +photograph 0 15 4.248495 0.000000 1056 +score 0 15 4.248495 0.000000 1017 +doit 0 14 4.317488 0.000000 1111 +happi 0 14 4.317488 0.000000 1079 +trip 0 14 4.317488 0.000000 1113 +necessari 0 13 4.382027 0.000000 1147 +block 0 13 4.382027 0.000000 1183 +introduc 0 13 4.382027 0.000000 1139 +essenti 0 13 4.382027 0.000000 1137 +dewitt 0 12 4.465908 0.000000 1270 +weight 0 12 4.465908 0.000000 1204 +count 0 12 4.465908 0.000000 1239 +regularli 0 11 4.553877 0.000000 1338 +modular 0 10 4.653960 0.000000 1392 +sundai 0 10 4.653960 0.000000 1387 +true 0 10 4.653960 0.000000 1422 +strongli 0 10 4.653960 0.000000 1406 +hint 0 10 4.653960 0.000000 1419 +equal 0 10 4.653960 0.000000 1424 +card 0 10 4.653960 0.000000 1435 +black 0 10 4.653960 0.000000 1418 +laru 0 9 4.753590 0.000000 1560 +clear 0 9 4.753590 0.000000 1488 +wall 0 9 4.753590 0.000000 1553 +frank 0 9 4.753590 0.000000 1568 +mention 0 9 4.753590 0.000000 1569 +debugg 0 9 4.753590 0.000000 1493 +notat 0 9 4.753590 0.000000 1489 +login 0 9 4.753590 0.000000 1550 +absolut 0 8 4.875197 0.000000 1646 +integ 0 8 4.875197 0.000000 1688 +hash 0 8 4.875197 0.000000 1618 +isbn 0 7 5.010635 0.000000 1901 +scout 0 7 5.010635 0.000000 1903 +sciencesoffic 0 6 5.164786 0.000000 2101 +strong 0 6 5.164786 0.000000 2029 +troubl 0 6 5.164786 0.000000 2002 +mirror 0 6 5.164786 0.000000 2028 +notifi 0 6 5.164786 0.000000 2106 +skrentni 0 6 5.164786 0.000000 2104 +difficult 0 6 5.164786 0.000000 2035 +rough 0 6 5.164786 0.000000 2107 +byte 0 6 5.164786 0.000000 2108 +chin 0 5 5.347108 0.000000 2408 +tang 0 5 5.347108 0.000000 2409 +desk 0 5 5.347108 0.000000 2297 +situat 0 5 5.347108 0.000000 2365 +skip 0 5 5.347108 0.000000 2402 +crucial 0 5 5.347108 0.000000 2384 +chemistri 0 5 5.347108 0.000000 2405 +girl 0 5 5.347108 0.000000 2410 +assignmentsand 0 4 5.568345 0.000000 2760 +unless 0 4 5.568345 0.000000 2607 +birthdai 0 4 5.568345 0.000000 2800 +fora 0 4 5.568345 0.000000 2697 +reiter 0 3 5.857933 0.000000 3461 +narr 0 3 5.857933 0.000000 3454 +gradingther 0 3 5.857933 0.000000 3455 +freshman 0 3 5.857933 0.000000 3462 +cchin 0 2 6.263398 0.000000 4691 +compuer 0 2 6.263398 0.000000 4692 +weiz 0 2 6.263398 0.000000 4693 +amoffic 0 2 6.263398 0.000000 4671 +textth 0 2 6.263398 0.000000 4677 +carrano 0 2 6.263398 0.000000 4678 +needless 0 2 6.263398 0.000000 4694 +sophomor 0 2 6.263398 0.000000 4695 +databaseof 0 2 6.263398 0.000000 4696 +larusinstructor 0 1 6.957497 0.000000 8715 +laruslaru 0 1 6.957497 0.000000 8716 +amcontentsteach 0 1 6.957497 0.000000 8717 +assistantstextlectur 0 1 6.957497 0.000000 8718 +informationelectron 0 1 6.957497 0.000000 8719 +mailth 0 1 6.957497 0.000000 8720 +languagegradingexamscours 0 1 6.957497 0.000000 8721 +scheduleassign 0 1 6.957497 0.000000 8722 +assignmentscours 0 1 6.957497 0.000000 8723 +objectivesc 0 1 6.957497 0.000000 8724 +assistantswei 0 1 6.957497 0.000000 8725 +forthi 0 1 6.957497 0.000000 8726 +theassign 0 1 6.957497 0.000000 8727 +zhangoffic 0 1 6.957497 0.000000 8728 +entranc 0 1 6.957497 0.000000 8729 +maili 0 1 6.957497 0.000000 8730 +gdbthere 0 1 6.957497 0.000000 8731 +administrationbas 0 1 6.957497 0.000000 8732 +storagelectur 0 1 6.957497 0.000000 8733 +listslectur 0 1 6.957497 0.000000 8734 +stackslectur 0 1 6.957497 0.000000 8735 +queueslectur 0 1 6.957497 0.000000 8736 +hashinglectur 0 1 6.957497 0.000000 8737 +recursionlectur 0 1 6.957497 0.000000 8738 +treesbinari 0 1 6.957497 0.000000 8739 +searchlectur 0 1 6.957497 0.000000 8740 +treesgraphslectur 0 1 6.957497 0.000000 8741 +sortinglectur 0 1 6.957497 0.000000 8742 +tbaassign 0 1 6.957497 0.000000 8743 +nameyear 0 1 6.957497 0.000000 8744 +coursesprevi 0 1 6.957497 0.000000 8745 +experiencerec 0 1 6.957497 0.000000 8746 +tournament 0 1 6.957497 0.000000 8747 +aconcord 0 1 6.957497 0.000000 8748 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..f34a18f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +code 0 108 2.197225 0.000000 116 +place 0 106 2.197225 0.000000 124 +structur 0 106 2.197225 0.000000 105 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +need 0 98 2.302585 0.000000 135 +memori 0 101 2.302585 0.000000 139 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +dynam 0 76 2.564949 0.000000 194 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +simul 0 66 2.708050 0.000000 255 +copi 0 63 2.772589 0.000000 284 +creat 0 63 2.772589 0.000000 277 +wednesdai 0 64 2.772589 0.000000 261 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +sampl 0 53 2.944439 0.000000 339 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +get 0 46 3.091042 0.000000 380 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +announc 0 40 3.258097 0.000000 441 +error 0 40 3.258097 0.000000 449 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +tree 0 36 3.367296 0.000000 492 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +chapter 0 32 3.465736 0.000000 536 +common 0 30 3.555348 0.000000 574 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +becom 0 28 3.610918 0.000000 603 +lab 0 24 3.761200 0.000000 698 +sort 0 22 3.850148 0.000000 738 +reserv 0 20 3.951244 0.000000 808 +alloc 0 20 3.951244 0.000000 821 +thur 0 19 4.007333 0.000000 847 +attend 0 18 4.060443 0.000000 893 +otherwis 0 17 4.110874 0.000000 922 +recurs 0 13 4.382027 0.000000 1127 +magic 0 11 4.553877 0.000000 1358 +queue 0 10 4.653960 0.000000 1386 +stack 0 10 4.653960 0.000000 1389 +wendt 0 10 4.653960 0.000000 1446 +cheng 0 10 4.653960 0.000000 1381 +kurt 0 9 4.753590 0.000000 1548 +unusu 0 9 4.753590 0.000000 1566 +forget 0 8 4.875197 0.000000 1712 +reload 0 8 4.875197 0.000000 1682 +hash 0 8 4.875197 0.000000 1618 +skrentni 0 6 5.164786 0.000000 2104 +skip 0 5 5.347108 0.000000 2402 +handin 0 5 5.347108 0.000000 2393 +overload 0 5 5.347108 0.000000 2403 +billi 0 5 5.347108 0.000000 2404 +outdat 0 4 5.568345 0.000000 2797 +appendix 0 4 5.568345 0.000000 2739 +makeup 0 3 5.857933 0.000000 3449 +vega 0 3 5.857933 0.000000 3450 +stale 0 2 6.263398 0.000000 4660 +lec 0 2 6.263398 0.000000 4661 +structureslectur 0 2 6.263398 0.000000 4662 +psychologylectur 0 2 6.263398 0.000000 4663 +psychologycours 0 2 6.263398 0.000000 4664 +baicheng 0 2 6.263398 0.000000 4665 +liao 0 2 6.263398 0.000000 4666 +bail 0 2 6.263398 0.000000 4667 +jiacheng 0 2 6.263398 0.000000 4668 +pmcopyright 0 2 6.263398 0.000000 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..db7e0180 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +hour 0 165 1.791759 0.000000 46 +madison 0 165 1.791759 0.000000 55 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +mondai 0 77 2.564949 0.000000 206 +state 0 76 2.564949 0.000000 207 +david 0 71 2.639057 0.000000 232 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +written 0 63 2.772589 0.000000 278 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +archiv 0 49 3.044522 0.000000 364 +fridai 0 44 3.135494 0.000000 390 +term 0 43 3.178054 0.000000 411 +examin 0 42 3.218876 0.000000 424 +theoret 0 39 3.258097 0.000000 446 +brian 0 38 3.295837 0.000000 466 +john 0 33 3.433987 0.000000 532 +hill 0 25 3.737670 0.000000 670 +tent 0 22 3.850148 0.000000 739 +martin 0 21 3.912023 0.000000 794 +north 0 19 4.007333 0.000000 873 +isbn 0 7 5.010635 0.000000 1901 +mcgraw 0 5 5.347108 0.000000 2262 +clarif 0 5 5.347108 0.000000 2253 +sundaram 0 3 5.857933 0.000000 3463 +cole 0 2 6.263398 0.000000 4697 +stukel 0 2 6.263398 0.000000 4698 +dakota 0 1 6.957497 0.000000 8749 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..15d39ab2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +madison 0 165 1.791759 0.000000 55 +lectur 0 135 1.945910 0.000000 73 +schedul 0 119 2.079442 0.000000 85 +databas 0 122 2.079442 0.000000 86 +final 0 116 2.197225 0.000000 108 +site 0 106 2.197225 0.000000 119 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +mathemat 0 108 2.197225 0.000000 123 +book 0 99 2.302585 0.000000 131 +octob 0 89 2.397895 0.000000 156 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +homework 1 79 2.564949 2.564949 193 +decemb 0 80 2.564949 0.000000 215 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +septemb 0 65 2.772589 0.000000 274 +wednesdai 0 64 2.772589 0.000000 261 +march 0 61 2.833213 0.000000 295 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +overview 0 56 2.890372 0.000000 323 +variou 0 56 2.890372 0.000000 317 +sampl 0 53 2.944439 0.000000 339 +date 0 51 2.995732 0.000000 344 +telephon 0 50 3.044522 0.000000 373 +midterm 0 45 3.135494 0.000000 392 +math 0 44 3.135494 0.000000 402 +textbook 0 44 3.135494 0.000000 397 +mechan 0 43 3.178054 0.000000 416 +linear 0 41 3.218876 0.000000 431 +open 0 38 3.295837 0.000000 469 +comp 0 26 3.688879 0.000000 650 +relev 0 26 3.688879 0.000000 637 +period 0 22 3.850148 0.000000 743 +reserv 0 20 3.951244 0.000000 808 +item 0 19 4.007333 0.000000 856 +stat 0 17 4.110874 0.000000 924 +matlab 0 14 4.317488 0.000000 1081 +doit 0 14 4.317488 0.000000 1111 +wendt 0 10 4.653960 0.000000 1446 +mangasarian 0 9 4.753590 0.000000 1570 +preliminari 0 9 4.753590 0.000000 1480 +kurt 0 9 4.753590 0.000000 1548 +ferri 0 8 4.875197 0.000000 1715 +olvi 0 6 5.164786 0.000000 2109 +setup 0 2 6.263398 0.000000 4211 +bibliograph 0 2 6.263398 0.000000 4699 +programmingfal 0 1 6.957497 0.000000 8750 +pphone 0 1 6.957497 0.000000 8751 +searchabl 0 1 6.957497 0.000000 8752 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..5cfc750c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +techniqu 0 99 2.302585 0.000000 138 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +librari 0 87 2.484907 0.000000 181 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 0 50 3.044522 0.000000 373 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +fridai 0 44 3.135494 0.000000 390 +examin 0 42 3.218876 0.000000 424 +late 0 40 3.258097 0.000000 439 +comp 0 26 3.688879 0.000000 650 +reserv 0 20 3.951244 0.000000 808 +stat 0 17 4.110874 0.000000 924 +month 0 15 4.248495 0.000000 1025 +psycholog 0 15 4.248495 0.000000 1054 +susan 0 15 4.248495 0.000000 1050 +stori 0 14 4.317488 0.000000 1087 +regularli 0 11 4.553877 0.000000 1338 +wendt 0 10 4.653960 0.000000 1446 +tuth 0 9 4.753590 0.000000 1519 +recit 0 9 4.753590 0.000000 1475 +fischer 0 7 5.010635 0.000000 1893 +horwitz 0 5 5.347108 0.000000 2411 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +ullman 0 4 5.568345 0.000000 2749 +rahul 0 3 5.857933 0.000000 3464 +compilersspr 0 2 6.263398 0.000000 4700 +kapoor 0 2 6.263398 0.000000 4701 +sethi 0 2 6.263398 0.000000 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..66cac1e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,527 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +make 0 111 2.197225 0.000000 120 +structur 0 106 2.197225 0.000000 105 +look 0 107 2.197225 0.000000 115 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +follow 0 92 2.397895 0.000000 143 +octob 0 89 2.397895 0.000000 156 +comment 0 93 2.397895 0.000000 146 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +contain 0 81 2.484907 0.000000 174 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +level 0 87 2.484907 0.000000 180 +librari 0 87 2.484907 0.000000 181 +learn 0 86 2.484907 0.000000 170 +messag 0 76 2.564949 0.000000 212 +refer 0 78 2.564949 0.000000 203 +issu 0 78 2.564949 0.000000 211 +sourc 0 77 2.564949 0.000000 201 +mondai 0 77 2.564949 0.000000 206 +exampl 0 77 2.564949 0.000000 195 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +decemb 0 80 2.564949 0.000000 215 +state 0 76 2.564949 0.000000 207 +come 0 78 2.564949 0.000000 202 +summari 0 73 2.639057 0.000000 237 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +name 0 72 2.639057 0.000000 220 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +java 1 70 2.708050 2.708050 248 +differ 0 66 2.708050 0.000000 253 +receiv 0 66 2.708050 0.000000 244 +thursdai 0 70 2.708050 0.000000 241 +main 0 67 2.708050 0.000000 256 +view 0 70 2.708050 0.000000 254 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +simul 0 66 2.708050 0.000000 255 +knowledg 0 67 2.708050 0.000000 243 +creat 0 63 2.772589 0.000000 277 +wednesdai 0 64 2.772589 0.000000 261 +collect 0 65 2.772589 0.000000 268 +new 0 64 2.772589 0.000000 262 +import 0 65 2.772589 0.000000 282 +virtual 0 62 2.772589 0.000000 285 +copi 0 63 2.772589 0.000000 284 +room 0 59 2.833213 0.000000 301 +type 0 61 2.833213 0.000000 296 +back 0 60 2.833213 0.000000 297 +content 0 59 2.833213 0.000000 302 +share 0 59 2.833213 0.000000 304 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +detail 0 57 2.890372 0.000000 321 +unix 0 58 2.890372 0.000000 308 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +think 0 57 2.890372 0.000000 314 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +processor 0 54 2.944439 0.000000 335 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +local 0 55 2.944439 0.000000 334 +run 0 51 2.995732 0.000000 347 +date 0 51 2.995732 0.000000 344 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +archiv 0 49 3.044522 0.000000 364 +set 0 50 3.044522 0.000000 361 +pointer 0 48 3.044522 0.000000 368 +standard 0 48 3.044522 0.000000 365 +frequent 0 49 3.044522 0.000000 367 +right 0 48 3.044522 0.000000 363 +get 0 46 3.091042 0.000000 380 +move 0 47 3.091042 0.000000 382 +electron 0 47 3.091042 0.000000 379 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +midterm 0 45 3.135494 0.000000 392 +discuss 0 45 3.135494 0.000000 399 +directori 0 45 3.135494 0.000000 396 +answer 0 45 3.135494 0.000000 391 +describ 0 45 3.135494 0.000000 400 +even 0 45 3.135494 0.000000 393 +long 0 43 3.178054 0.000000 413 +show 0 43 3.178054 0.000000 417 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +fast 0 42 3.218876 0.000000 429 +howev 0 41 3.218876 0.000000 422 +late 0 40 3.258097 0.000000 439 +error 0 40 3.258097 0.000000 449 +tutori 0 39 3.258097 0.000000 437 +must 0 40 3.258097 0.000000 442 +littl 0 39 3.258097 0.000000 454 +author 0 39 3.258097 0.000000 450 +programm 0 39 3.258097 0.000000 445 +correct 0 38 3.295837 0.000000 462 +seminar 0 38 3.295837 0.000000 470 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +workstat 0 37 3.332205 0.000000 479 +respons 0 37 3.332205 0.000000 476 +feel 0 37 3.332205 0.000000 483 +procedur 0 36 3.367296 0.000000 488 +short 0 36 3.367296 0.000000 499 +copyright 0 36 3.367296 0.000000 495 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +least 0 35 3.401197 0.000000 516 +jame 0 35 3.401197 0.000000 507 +random 0 34 3.401197 0.000000 511 +concurr 0 34 3.401197 0.000000 501 +manual 0 35 3.401197 0.000000 504 +statist 0 35 3.401197 0.000000 521 +go 0 33 3.433987 0.000000 529 +chapter 0 32 3.465736 0.000000 536 +ad 0 32 3.465736 0.000000 544 +kind 0 32 3.465736 0.000000 541 +storag 0 31 3.496508 0.000000 553 +graph 0 30 3.555348 0.000000 576 +secur 0 30 3.555348 0.000000 577 +specifi 0 30 3.555348 0.000000 568 +option 0 30 3.555348 0.000000 575 +focu 0 30 3.555348 0.000000 571 +synchron 0 29 3.583519 0.000000 588 +built 0 29 3.583519 0.000000 592 +becom 0 28 3.610918 0.000000 603 +intend 0 28 3.610918 0.000000 599 +packag 0 28 3.610918 0.000000 614 +except 0 28 3.610918 0.000000 607 +quit 0 27 3.637586 0.000000 633 +determin 0 27 3.637586 0.000000 630 +arrai 0 27 3.637586 0.000000 627 +comp 0 26 3.688879 0.000000 650 +request 0 26 3.688879 0.000000 635 +session 0 26 3.688879 0.000000 643 +bound 0 26 3.688879 0.000000 659 +detect 0 26 3.688879 0.000000 646 +primari 0 25 3.737670 0.000000 669 +although 0 25 3.737670 0.000000 667 +other 0 24 3.761200 0.000000 697 +thank 0 23 3.806662 0.000000 721 +initi 0 23 3.806662 0.000000 717 +begin 0 23 3.806662 0.000000 716 +input 0 23 3.806662 0.000000 727 +variabl 0 23 3.806662 0.000000 715 +thread 0 23 3.806662 0.000000 722 +togeth 0 23 3.806662 0.000000 714 +disk 0 22 3.850148 0.000000 747 +dai 0 22 3.850148 0.000000 753 +finish 0 22 3.850148 0.000000 748 +sent 0 22 3.850148 0.000000 763 +recommend 0 22 3.850148 0.000000 737 +varieti 0 22 3.850148 0.000000 740 +tent 0 22 3.850148 0.000000 739 +avoid 0 21 3.912023 0.000000 799 +path 0 21 3.912023 0.000000 778 +watch 0 21 3.912023 0.000000 789 +latest 0 21 3.912023 0.000000 785 +theunivers 0 21 3.912023 0.000000 797 +among 0 21 3.912023 0.000000 781 +output 0 21 3.912023 0.000000 788 +programminglanguag 0 21 3.912023 0.000000 782 +entir 0 20 3.951244 0.000000 811 +alloc 0 20 3.951244 0.000000 821 +reserv 0 20 3.951244 0.000000 808 +thur 0 19 4.007333 0.000000 847 +histori 0 19 4.007333 0.000000 853 +definit 0 19 4.007333 0.000000 864 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +five 0 19 4.007333 0.000000 841 +exercis 0 19 4.007333 0.000000 842 +runtim 0 19 4.007333 0.000000 858 +excel 0 19 4.007333 0.000000 868 +assum 0 19 4.007333 0.000000 845 +lot 0 18 4.060443 0.000000 889 +behavior 0 18 4.060443 0.000000 881 +encourag 0 18 4.060443 0.000000 880 +four 0 18 4.060443 0.000000 905 +sept 0 17 4.110874 0.000000 952 +monitor 0 17 4.110874 0.000000 941 +protect 0 17 4.110874 0.000000 935 +anyon 0 17 4.110874 0.000000 916 +regist 0 17 4.110874 0.000000 938 +weekli 0 17 4.110874 0.000000 919 +segment 0 17 4.110874 0.000000 931 +outlin 0 17 4.110874 0.000000 914 +devic 0 16 4.174387 0.000000 1002 +zhang 0 16 4.174387 0.000000 980 +modern 0 16 4.174387 0.000000 966 +weslei 0 16 4.174387 0.000000 983 +transfer 0 16 4.174387 0.000000 967 +easi 0 16 4.174387 0.000000 969 +choos 0 16 4.174387 0.000000 964 +condit 0 16 4.174387 0.000000 975 +critic 0 16 4.174387 0.000000 982 +later 0 15 4.248495 0.000000 1043 +todd 0 15 4.248495 0.000000 1051 +score 0 15 4.248495 0.000000 1017 +enough 0 15 4.248495 0.000000 1040 +demand 0 14 4.317488 0.000000 1073 +warn 0 14 4.317488 0.000000 1068 +shown 0 14 4.317488 0.000000 1080 +topolog 0 14 4.317488 0.000000 1089 +central 0 13 4.382027 0.000000 1160 +directli 0 13 4.382027 0.000000 1141 +forth 0 13 4.382027 0.000000 1186 +everyth 0 13 4.382027 0.000000 1169 +count 0 12 4.465908 0.000000 1239 +solari 0 12 4.465908 0.000000 1238 +minor 0 12 4.465908 0.000000 1237 +rememb 0 12 4.465908 0.000000 1217 +readi 0 12 4.465908 0.000000 1242 +addison 0 12 4.465908 0.000000 1230 +grow 0 12 4.465908 0.000000 1209 +readabl 0 12 4.465908 0.000000 1258 +buffer 0 12 4.465908 0.000000 1211 +string 0 11 4.553877 0.000000 1340 +fix 0 11 4.553877 0.000000 1327 +tue 0 11 4.553877 0.000000 1308 +regard 0 11 4.553877 0.000000 1309 +extrem 0 11 4.553877 0.000000 1330 +faster 0 11 4.553877 0.000000 1323 +market 0 11 4.553877 0.000000 1361 +placement 0 10 4.653960 0.000000 1420 +strongli 0 10 4.653960 0.000000 1406 +grain 0 10 4.653960 0.000000 1448 +paragraph 0 10 4.653960 0.000000 1449 +hint 0 10 4.653960 0.000000 1419 +cheat 0 10 4.653960 0.000000 1395 +recoveri 0 9 4.753590 0.000000 1474 +familiar 0 9 4.753590 0.000000 1485 +pair 0 9 4.753590 0.000000 1503 +correctli 0 9 4.753590 0.000000 1478 +mention 0 9 4.753590 0.000000 1569 +said 0 9 4.753590 0.000000 1571 +introductori 0 9 4.753590 0.000000 1479 +solomon 0 8 4.875197 0.000000 1716 +star 0 8 4.875197 0.000000 1717 +replac 0 8 4.875197 0.000000 1668 +simpli 0 8 4.875197 0.000000 1626 +rais 0 8 4.875197 0.000000 1711 +partner 0 8 4.875197 0.000000 1648 +crash 0 8 4.875197 0.000000 1616 +switch 0 8 4.875197 0.000000 1718 +gather 0 8 4.875197 0.000000 1719 +peterson 0 7 5.010635 0.000000 1850 +philosoph 0 7 5.010635 0.000000 1904 +bookstor 0 7 5.010635 0.000000 1837 +prevent 0 7 5.010635 0.000000 1827 +bug 0 7 5.010635 0.000000 1801 +slightli 0 7 5.010635 0.000000 1795 +chan 0 7 5.010635 0.000000 1876 +occasion 0 7 5.010635 0.000000 1905 +awar 0 7 5.010635 0.000000 1800 +prentic 0 7 5.010635 0.000000 1838 +spot 0 7 5.010635 0.000000 1894 +fortun 0 7 5.010635 0.000000 1872 +bottom 0 7 5.010635 0.000000 1906 +compact 0 7 5.010635 0.000000 1907 +theproject 0 6 5.164786 0.000000 1981 +sciencesoffic 0 6 5.164786 0.000000 2101 +garbag 0 6 5.164786 0.000000 1986 +notifi 0 6 5.164786 0.000000 2106 +nine 0 6 5.164786 0.000000 2047 +mistak 0 6 5.164786 0.000000 2110 +creation 0 6 5.164786 0.000000 2069 +handi 0 6 5.164786 0.000000 2111 +neither 0 6 5.164786 0.000000 1990 +caus 0 5 5.347108 0.000000 2298 +salt 0 5 5.347108 0.000000 2413 +forprogram 0 5 5.347108 0.000000 2361 +sparcstat 0 5 5.347108 0.000000 2406 +favor 0 5 5.347108 0.000000 2414 +commod 0 5 5.347108 0.000000 2415 +eas 0 5 5.347108 0.000000 2267 +anda 0 5 5.347108 0.000000 2416 +remain 0 5 5.347108 0.000000 2278 +race 0 5 5.347108 0.000000 2417 +deadlock 0 4 5.568345 0.000000 2641 +fork 0 4 5.568345 0.000000 2801 +makefil 0 4 5.568345 0.000000 2662 +popular 0 4 5.568345 0.000000 2802 +cshrc 0 4 5.568345 0.000000 2759 +theprogram 0 4 5.568345 0.000000 2686 +multitask 0 4 5.568345 0.000000 2803 +systemsand 0 4 5.568345 0.000000 2804 +usedto 0 4 5.568345 0.000000 2643 +subsequ 0 4 5.568345 0.000000 2665 +withth 0 4 5.568345 0.000000 2805 +marvin 0 4 5.568345 0.000000 2806 +argument 0 3 5.857933 0.000000 3120 +caught 0 3 5.857933 0.000000 3465 +omit 0 3 5.857933 0.000000 3466 +offset 0 3 5.857933 0.000000 3467 +urgent 0 3 5.857933 0.000000 3316 +listof 0 3 5.857933 0.000000 3322 +sendmail 0 3 5.857933 0.000000 3099 +tanenbaum 0 3 5.857933 0.000000 3397 +dialect 0 3 5.857933 0.000000 3226 +acquaint 0 3 5.857933 0.000000 3468 +subscript 0 3 5.857933 0.000000 3469 +easier 0 3 5.857933 0.000000 3470 +timet 0 3 5.857933 0.000000 3471 +dine 0 3 5.857933 0.000000 3472 +avaiabl 0 2 6.263398 0.000000 4703 +thejava 0 2 6.263398 0.000000 4704 +swap 0 2 6.263398 0.000000 4466 +arnold 0 2 6.263398 0.000000 4705 +semaphor 0 2 6.263398 0.000000 4555 +troffic 0 2 6.263398 0.000000 4706 +mellencamp 0 2 6.263398 0.000000 4707 +mellen 0 2 6.263398 0.000000 4708 +breakdown 0 2 6.263398 0.000000 4407 +typo 0 2 6.263398 0.000000 4180 +tung 0 2 6.263398 0.000000 4709 +preemptiv 0 2 6.263398 0.000000 4319 +colloquia 0 2 6.263398 0.000000 4710 +sciencesand 0 2 6.263398 0.000000 4711 +tutorialth 0 2 6.263398 0.000000 4453 +designedto 0 2 6.263398 0.000000 4712 +havethre 0 2 6.263398 0.000000 4562 +daysof 0 2 6.263398 0.000000 4563 +eachof 0 2 6.263398 0.000000 4564 +congeni 0 2 6.263398 0.000000 4713 +null 0 2 6.263398 0.000000 4714 +mysteri 0 2 6.263398 0.000000 4715 +char 0 2 6.263398 0.000000 4716 +trendi 0 2 6.263398 0.000000 4717 +coursewil 0 2 6.263398 0.000000 4718 +primer 0 2 6.263398 0.000000 4719 +manualfor 0 2 6.263398 0.000000 4720 +yourgrad 0 2 6.263398 0.000000 4121 +terminolog 0 2 6.263398 0.000000 4410 +eduthu 0 2 6.263398 0.000000 4721 +threadschedul 0 1 6.957497 0.000000 8753 +forproject 0 1 6.957497 0.000000 8754 +graphcontain 0 1 6.957497 0.000000 8755 +sched 0 1 6.957497 0.000000 8756 +substr 0 1 6.957497 0.000000 8757 +thejavaprogram 0 1 6.957497 0.000000 8758 +languagebi 0 1 6.957497 0.000000 8759 +gosl 0 1 6.957497 0.000000 8760 +systemssect 0 1 6.957497 0.000000 8761 +instructormarvin 0 1 6.957497 0.000000 8762 +tarob 0 1 6.957497 0.000000 8763 +mwfoffic 0 1 6.957497 0.000000 8764 +distributioni 0 1 6.957497 0.000000 8765 +typograph 0 1 6.957497 0.000000 8766 +importantli 0 1 6.957497 0.000000 8767 +arraywa 0 1 6.957497 0.000000 8768 +isavail 0 1 6.957497 0.000000 8769 +courseus 0 1 6.957497 0.000000 8770 +likelyb 0 1 6.957497 0.000000 8771 +presentedin 0 1 6.957497 0.000000 8772 +givefork 0 1 6.957497 0.000000 8773 +specificationshould 0 1 6.957497 0.000000 8774 +garbl 0 1 6.957497 0.000000 8775 +jake 0 1 6.957497 0.000000 8776 +dawlei 0 1 6.957497 0.000000 8777 +carr 0 1 6.957497 0.000000 8778 +detailssect 0 1 6.957497 0.000000 8779 +lipe 0 1 6.957497 0.000000 8780 +srccontain 0 1 6.957497 0.000000 8781 +javacontain 0 1 6.957497 0.000000 8782 +classgraphdescrib 0 1 6.957497 0.000000 8783 +petersoncycl 0 1 6.957497 0.000000 8784 +notacycl 0 1 6.957497 0.000000 8785 +petersonacycl 0 1 6.957497 0.000000 8786 +acycl 0 1 6.957497 0.000000 8787 +sharingfork 0 1 6.957497 0.000000 8788 +jenner 0 1 6.957497 0.000000 8789 +maxthink 0 1 6.957497 0.000000 8790 +maxeat 0 1 6.957497 0.000000 8791 +versionha 0 1 6.957497 0.000000 8792 +argumenti 0 1 6.957497 0.000000 8793 +charactersin 0 1 6.957497 0.000000 8794 +franco 0 1 6.957497 0.000000 8795 +maketo 0 1 6.957497 0.000000 8796 +compilewithout 0 1 6.957497 0.000000 8797 +computershav 0 1 6.957497 0.000000 8798 +tutoriali 0 1 6.957497 0.000000 8799 +onthread 0 1 6.957497 0.000000 8800 +checkth 0 1 6.957497 0.000000 8801 +ajava 0 1 6.957497 0.000000 8802 +afil 0 1 6.957497 0.000000 8803 +onelin 0 1 6.957497 0.000000 8804 +localor 0 1 6.957497 0.000000 8805 +csmon 0 1 6.957497 0.000000 8806 +cslast 0 1 6.957497 0.000000 8807 +beprocess 0 1 6.957497 0.000000 8808 +replacementalgorithm 0 1 6.957497 0.000000 8809 +statisticsdiscuss 0 1 6.957497 0.000000 8810 +psychologyth 0 1 6.957497 0.000000 8811 +anyquest 0 1 6.957497 0.000000 8812 +thetext 0 1 6.957497 0.000000 8813 +systemsbi 0 1 6.957497 0.000000 8814 +specificationjava 0 1 6.957497 0.000000 8815 +documentationwatch 0 1 6.957497 0.000000 8816 +unixoper 0 1 6.957497 0.000000 8817 +anycomput 0 1 6.957497 0.000000 8818 +requireddata 0 1 6.957497 0.000000 8819 +involveprocess 0 1 6.957497 0.000000 8820 +butyou 0 1 6.957497 0.000000 8821 +vigor 0 1 6.957497 0.000000 8822 +punish 0 1 6.957497 0.000000 8823 +dateind 0 1 6.957497 0.000000 8824 +uniniti 0 1 6.957497 0.000000 8825 +runtimerath 0 1 6.957497 0.000000 8826 +byproduct 0 1 6.957497 0.000000 8827 +withlanguag 0 1 6.957497 0.000000 8828 +alwaysa 0 1 6.957497 0.000000 8829 +disloc 0 1 6.957497 0.000000 8830 +thetransit 0 1 6.957497 0.000000 8831 +amazingli 0 1 6.957497 0.000000 8832 +youalreadi 0 1 6.957497 0.000000 8833 +arefer 0 1 6.957497 0.000000 8834 +manuali 0 1 6.957497 0.000000 8835 +wayfrom 0 1 6.957497 0.000000 8836 +sophisticatedprogram 0 1 6.957497 0.000000 8837 +ofoth 0 1 6.957497 0.000000 8838 +niceonlin 0 1 6.957497 0.000000 8839 +tutorialabout 0 1 6.957497 0.000000 8840 +javaoct 0 1 6.957497 0.000000 8841 +synchronizationoct 0 1 6.957497 0.000000 8842 +schedulingoct 0 1 6.957497 0.000000 8843 +schedulingdec 0 1 6.957497 0.000000 8844 +systemsdec 0 1 6.957497 0.000000 8845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..c7d5d4ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +manag 0 114 2.197225 0.000000 125 +instructor 0 108 2.197225 0.000000 107 +memori 0 101 2.302585 0.000000 139 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +solut 0 82 2.484907 0.000000 162 +tuesdai 0 73 2.639057 0.000000 219 +name 0 72 2.639057 0.000000 220 +thursdai 0 70 2.708050 0.000000 241 +java 0 70 2.708050 0.000000 248 +virtual 0 62 2.772589 0.000000 285 +space 0 57 2.890372 0.000000 310 +date 0 51 2.995732 0.000000 344 +discuss 0 45 3.135494 0.000000 399 +fridai 0 44 3.135494 0.000000 390 +textbook 0 44 3.135494 0.000000 397 +cach 0 41 3.218876 0.000000 432 +review 0 42 3.218876 0.000000 425 +procedur 0 36 3.367296 0.000000 488 +concurr 0 34 3.401197 0.000000 501 +survei 0 35 3.401197 0.000000 513 +global 0 34 3.401197 0.000000 520 +chapter 0 32 3.465736 0.000000 536 +secur 0 30 3.555348 0.000000 577 +synchron 0 29 3.583519 0.000000 588 +thread 0 23 3.806662 0.000000 722 +cooper 0 22 3.850148 0.000000 757 +protect 0 17 4.110874 0.000000 935 +monitor 0 17 4.110874 0.000000 941 +quiz 0 16 4.174387 0.000000 990 +pagec 0 15 4.248495 0.000000 1011 +remot 0 15 4.248495 0.000000 1041 +demand 0 14 4.317488 0.000000 1073 +quizz 0 13 4.382027 0.000000 1151 +translat 0 13 4.382027 0.000000 1164 +host 0 11 4.553877 0.000000 1306 +vernon 0 9 4.753590 0.000000 1556 +core 0 7 5.010635 0.000000 1809 +mutual 0 5 5.347108 0.000000 2418 +systemsfal 0 4 5.568345 0.000000 2683 +deadlock 0 4 5.568345 0.000000 2641 +thanksgiv 0 2 6.263398 0.000000 4185 +maryvernon 0 1 6.957497 0.000000 8846 +andkarunamuthiah 0 1 6.957497 0.000000 8847 +beinterchang 0 1 6.957497 0.000000 8848 +archiveapproxim 0 1 6.957497 0.000000 8849 +topicsweek 0 1 6.957497 0.000000 8850 +oftopicsreadingsep 0 1 6.957497 0.000000 8851 +processeschapt 0 1 6.957497 0.000000 8852 +threadschapt 0 1 6.957497 0.000000 8853 +exclusioncont 0 1 6.957497 0.000000 8854 +semaphorescont 0 1 6.957497 0.000000 8855 +summarycont 0 1 6.957497 0.000000 8856 +doct 0 1 6.957497 0.000000 8857 +schedulingchapt 0 1 6.957497 0.000000 8858 +tlbschapter 0 1 6.957497 0.000000 8859 +memorycont 0 1 6.957497 0.000000 8860 +systemschapt 0 1 6.957497 0.000000 8861 +directorieschapt 0 1 6.957497 0.000000 8862 +methodstbanov 0 1 6.957497 0.000000 8863 +reviewchapt 0 1 6.957497 0.000000 8864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..11360764 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +decemb 0 80 2.564949 0.000000 215 +tuesdai 0 73 2.639057 0.000000 219 +david 0 71 2.639057 0.000000 232 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +descript 0 64 2.772589 0.000000 271 +previou 0 62 2.772589 0.000000 290 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +sampl 0 53 2.944439 0.000000 339 +appoint 0 49 3.044522 0.000000 358 +get 0 46 3.091042 0.000000 380 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +error 0 40 3.258097 0.000000 449 +correct 0 38 3.295837 0.000000 462 +demonstr 0 24 3.761200 0.000000 694 +wood 0 11 4.553877 0.000000 1355 +deadlin 0 9 4.753590 0.000000 1502 +phil 0 5 5.347108 0.000000 2419 +mentor 0 4 5.568345 0.000000 2591 +atkinson 0 2 6.263398 0.000000 4722 +vhdl 0 1 6.957497 0.000000 8865 +mentorassign 0 1 6.957497 0.000000 8866 +projectthi 0 1 6.957497 0.000000 8867 +examsth 0 1 6.957497 0.000000 8868 +endterm 0 1 6.957497 0.000000 8869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..d2e675b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +assign 1 135 1.945910 1.945910 66 +year 0 148 1.945910 0.000000 84 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +postscript 0 131 2.079442 0.000000 90 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +topic 0 114 2.197225 0.000000 110 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +info 0 85 2.484907 0.000000 176 +solut 0 82 2.484907 0.000000 162 +chang 0 82 2.484907 0.000000 163 +help 0 83 2.484907 0.000000 175 +resourc 0 81 2.484907 0.000000 172 +issu 0 78 2.564949 0.000000 211 +html 0 75 2.639057 0.000000 235 +handout 0 64 2.772589 0.000000 263 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +experi 0 64 2.772589 0.000000 283 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +date 0 51 2.995732 0.000000 344 +fridai 0 44 3.135494 0.000000 390 +midterm 0 45 3.135494 0.000000 392 +discuss 0 45 3.135494 0.000000 399 +tutori 0 39 3.258097 0.000000 437 +tree 0 36 3.367296 0.000000 492 +print 0 34 3.401197 0.000000 503 +chapter 0 32 3.465736 0.000000 536 +yahoo 0 24 3.761200 0.000000 707 +instead 0 22 3.850148 0.000000 756 +exercis 0 19 4.007333 0.000000 842 +prerequisit 0 19 4.007333 0.000000 846 +thur 0 19 4.007333 0.000000 847 +sept 0 17 4.110874 0.000000 952 +ramakrishnan 0 16 4.174387 0.000000 972 +convent 0 14 4.317488 0.000000 1072 +raghu 0 12 4.465908 0.000000 1212 +tue 0 11 4.553877 0.000000 1308 +debugg 0 9 4.753590 0.000000 1493 +entri 0 8 4.875197 0.000000 1678 +minibas 0 4 5.568345 0.000000 2608 +dont 0 3 5.857933 0.000000 3473 +sybas 0 2 6.263398 0.000000 4723 +xbao 0 1 6.957497 0.000000 8870 +implementationc 0 1 6.957497 0.000000 8871 +implementationcours 0 1 6.957497 0.000000 8872 +assignmentoth 0 1 6.957497 0.000000 8873 +ingraham 0 1 6.957497 0.000000 8874 +xuemei 0 1 6.957497 0.000000 8875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..7ca90157 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +lectur 1 135 1.945910 1.945910 73 +assign 0 135 1.945910 0.000000 66 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +import 0 65 2.772589 0.000000 282 +wednesdai 0 64 2.772589 0.000000 261 +semest 0 58 2.890372 0.000000 312 +particular 0 51 2.995732 0.000000 352 +still 0 50 3.044522 0.000000 362 +discuss 0 45 3.135494 0.000000 399 +fridai 0 44 3.135494 0.000000 390 +probabl 0 40 3.258097 0.000000 455 +close 0 38 3.295837 0.000000 465 +taught 0 33 3.433987 0.000000 526 +option 0 30 3.555348 0.000000 575 +progress 0 28 3.610918 0.000000 598 +jeff 0 25 3.737670 0.000000 673 +lab 0 24 3.761200 0.000000 698 +cooper 0 22 3.850148 0.000000 757 +fact 0 21 3.912023 0.000000 780 +psycholog 0 15 4.248495 0.000000 1054 +naughton 0 10 4.653960 0.000000 1450 +russel 0 9 4.753590 0.000000 1507 +minibas 0 4 5.568345 0.000000 2608 +obvious 0 3 5.857933 0.000000 3474 +addinginform 0 1 6.957497 0.000000 8876 +meetingroom 0 1 6.957497 0.000000 8877 +labsfor 0 1 6.957497 0.000000 8878 +beenmov 0 1 6.957497 0.000000 8879 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..a30c4185 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +introduct 0 126 2.079442 0.000000 87 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +instructor 0 108 2.197225 0.000000 107 +book 0 99 2.302585 0.000000 131 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +homework 1 79 2.564949 2.564949 193 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +descript 0 64 2.772589 0.000000 271 +organ 0 65 2.772589 0.000000 265 +archiv 0 49 3.044522 0.000000 364 +midterm 0 45 3.135494 0.000000 392 +graph 0 30 3.555348 0.000000 576 +reserv 0 20 3.951244 0.000000 808 +eric 0 19 4.007333 0.000000 870 +bill 0 11 4.553877 0.000000 1297 +appt 0 5 5.347108 0.000000 2312 +bach 0 4 5.568345 0.000000 2708 +fractal 0 3 5.857933 0.000000 3475 +behaviour 0 2 6.263398 0.000000 4724 +raji 0 1 6.957497 0.000000 8880 +donaldson 0 1 6.957497 0.000000 8881 +gopalakrishnan 0 1 6.957497 0.000000 8882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..4ac27981 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +network 1 168 1.791759 1.791759 61 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +implement 0 152 1.791759 0.000000 52 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +advanc 0 99 2.302585 0.000000 130 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +select 0 91 2.397895 0.000000 154 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +refer 0 78 2.564949 0.000000 203 +interfac 0 79 2.564949 0.000000 209 +mondai 0 77 2.564949 0.000000 206 +complet 0 77 2.564949 0.000000 208 +free 0 73 2.639057 0.000000 224 +tuesdai 0 73 2.639057 0.000000 219 +html 0 75 2.639057 0.000000 235 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +content 0 59 2.833213 0.000000 302 +overview 0 56 2.890372 0.000000 323 +unix 0 58 2.890372 0.000000 308 +tabl 0 51 2.995732 0.000000 346 +archiv 0 49 3.044522 0.000000 364 +appoint 0 49 3.044522 0.000000 358 +adapt 0 46 3.091042 0.000000 387 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +term 0 43 3.178054 0.000000 411 +review 0 42 3.218876 0.000000 425 +form 0 39 3.258097 0.000000 443 +error 0 40 3.258097 0.000000 449 +slide 0 38 3.295837 0.000000 467 +feel 0 37 3.332205 0.000000 483 +connect 0 37 3.332205 0.000000 485 +eduoffic 0 33 3.433987 0.000000 531 +richard 0 31 3.496508 0.000000 559 +option 0 30 3.555348 0.000000 575 +packag 0 28 3.610918 0.000000 614 +comp 0 26 3.688879 0.000000 650 +reliabl 0 25 3.737670 0.000000 674 +latest 0 21 3.912023 0.000000 785 +annot 0 21 3.912023 0.000000 775 +partial 0 18 4.060443 0.000000 900 +layer 0 17 4.110874 0.000000 926 +steven 0 17 4.110874 0.000000 953 +warn 0 14 4.317488 0.000000 1068 +prior 0 10 4.653960 0.000000 1438 +criteria 0 9 4.753590 0.000000 1477 +lawrenc 0 7 5.010635 0.000000 1908 +prentic 0 7 5.010635 0.000000 1838 +isbn 0 7 5.010635 0.000000 1901 +conveni 0 6 5.164786 0.000000 2088 +moder 0 6 5.164786 0.000000 2112 +landweb 0 3 5.857933 0.000000 3402 +hereto 0 3 5.857933 0.000000 3476 +gradingmidterm 0 3 5.857933 0.000000 3230 +socket 0 2 6.263398 0.000000 4725 +statphon 0 2 6.263398 0.000000 4726 +ipng 0 2 6.263398 0.000000 4727 +powerpoint 0 1 6.957497 0.000000 8883 +networksintroduct 0 1 6.957497 0.000000 8884 +readingsclick 0 1 6.957497 0.000000 8885 +networkingcours 0 1 6.957497 0.000000 8886 +madisoncours 0 1 6.957497 0.000000 8887 +informationlecturetim 0 1 6.957497 0.000000 8888 +mwfplace 0 1 6.957497 0.000000 8889 +statclass 0 1 6.957497 0.000000 8890 +listinstructor 0 1 6.957497 0.000000 8891 +landweberoffic 0 1 6.957497 0.000000 8892 +srinivasa 0 1 6.957497 0.000000 8893 +narayananoffic 0 1 6.957497 0.000000 8894 +teitelbaumoffic 0 1 6.957497 0.000000 8895 +naemail 0 1 6.957497 0.000000 8896 +garbler 0 1 6.957497 0.000000 8897 +bibliographyread 0 1 6.957497 0.000000 8898 +icmp 0 1 6.957497 0.000000 8899 +ospf 0 1 6.957497 0.000000 8900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..d021c2a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +note 0 142 1.945910 0.000000 67 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +part 0 98 2.302585 0.000000 129 +solut 0 82 2.484907 0.000000 162 +homework 1 79 2.564949 2.564949 193 +descript 0 64 2.772589 0.000000 271 +robert 0 30 3.555348 0.000000 567 +option 0 30 3.555348 0.000000 575 +comp 0 26 3.688879 0.000000 650 +meyer 0 2 6.263398 0.000000 4728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..001930a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +grade 0 90 2.397895 0.000000 142 +second 0 81 2.484907 0.000000 166 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +optim 0 79 2.564949 0.000000 197 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +order 0 69 2.708050 0.000000 249 +thursdai 0 70 2.708050 0.000000 241 +function 0 62 2.772589 0.000000 275 +wednesdai 0 64 2.772589 0.000000 261 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +semest 0 58 2.890372 0.000000 312 +scientif 0 53 2.944439 0.000000 341 +week 0 52 2.995732 0.000000 343 +telephon 0 50 3.044522 0.000000 373 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +midterm 0 45 3.135494 0.000000 392 +examin 0 42 3.218876 0.000000 424 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +linear 0 41 3.218876 0.000000 431 +michael 0 35 3.401197 0.000000 514 +altern 0 26 3.688879 0.000000 641 +period 0 22 3.850148 0.000000 743 +siam 0 21 3.912023 0.000000 800 +theorem 0 21 3.912023 0.000000 786 +reserv 0 20 3.951244 0.000000 808 +differenti 0 17 4.110874 0.000000 921 +stat 0 17 4.110874 0.000000 924 +condit 0 16 4.174387 0.000000 975 +nonlinear 0 14 4.317488 0.000000 1107 +philadelphia 0 12 4.465908 0.000000 1244 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +criteria 0 9 4.753590 0.000000 1477 +mangasarian 0 9 4.753590 0.000000 1570 +exact 0 9 4.753590 0.000000 1509 +kurt 0 9 4.753590 0.000000 1548 +ferri 0 8 4.875197 0.000000 1715 +olvi 0 6 5.164786 0.000000 2109 +inequ 0 6 5.164786 0.000000 2113 +augment 0 5 5.347108 0.000000 2350 +convex 0 4 5.568345 0.000000 2807 +concav 0 4 5.568345 0.000000 2808 +wilei 0 4 5.568345 0.000000 2669 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gradient 0 3 5.857933 0.000000 3479 +applicationsfal 0 2 6.263398 0.000000 4729 +bazaraa 0 2 6.263398 0.000000 4730 +sherali 0 2 6.263398 0.000000 4731 +shetti 0 2 6.263398 0.000000 4732 +athena 0 2 6.263398 0.000000 4733 +saddlepoint 0 2 6.263398 0.000000 4734 +dualiti 0 2 6.263398 0.000000 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..001930a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +grade 0 90 2.397895 0.000000 142 +second 0 81 2.484907 0.000000 166 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +optim 0 79 2.564949 0.000000 197 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +order 0 69 2.708050 0.000000 249 +thursdai 0 70 2.708050 0.000000 241 +function 0 62 2.772589 0.000000 275 +wednesdai 0 64 2.772589 0.000000 261 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +semest 0 58 2.890372 0.000000 312 +scientif 0 53 2.944439 0.000000 341 +week 0 52 2.995732 0.000000 343 +telephon 0 50 3.044522 0.000000 373 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +midterm 0 45 3.135494 0.000000 392 +examin 0 42 3.218876 0.000000 424 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +linear 0 41 3.218876 0.000000 431 +michael 0 35 3.401197 0.000000 514 +altern 0 26 3.688879 0.000000 641 +period 0 22 3.850148 0.000000 743 +siam 0 21 3.912023 0.000000 800 +theorem 0 21 3.912023 0.000000 786 +reserv 0 20 3.951244 0.000000 808 +differenti 0 17 4.110874 0.000000 921 +stat 0 17 4.110874 0.000000 924 +condit 0 16 4.174387 0.000000 975 +nonlinear 0 14 4.317488 0.000000 1107 +philadelphia 0 12 4.465908 0.000000 1244 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +criteria 0 9 4.753590 0.000000 1477 +mangasarian 0 9 4.753590 0.000000 1570 +exact 0 9 4.753590 0.000000 1509 +kurt 0 9 4.753590 0.000000 1548 +ferri 0 8 4.875197 0.000000 1715 +olvi 0 6 5.164786 0.000000 2109 +inequ 0 6 5.164786 0.000000 2113 +augment 0 5 5.347108 0.000000 2350 +convex 0 4 5.568345 0.000000 2807 +concav 0 4 5.568345 0.000000 2808 +wilei 0 4 5.568345 0.000000 2669 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gradient 0 3 5.857933 0.000000 3479 +applicationsfal 0 2 6.263398 0.000000 4729 +bazaraa 0 2 6.263398 0.000000 4730 +sherali 0 2 6.263398 0.000000 4731 +shetti 0 2 6.263398 0.000000 4732 +athena 0 2 6.263398 0.000000 4733 +saddlepoint 0 2 6.263398 0.000000 4734 +dualiti 0 2 6.263398 0.000000 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..458dc7e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +professor 0 137 1.945910 0.000000 76 +postscript 0 131 2.079442 0.000000 90 +pleas 0 113 2.197225 0.000000 114 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +comment 0 93 2.397895 0.000000 146 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +exampl 0 77 2.564949 0.000000 195 +html 1 75 2.639057 2.639057 235 +onlin 0 75 2.639057 0.000000 223 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +instruct 0 53 2.944439 0.000000 332 +suggest 0 53 2.944439 0.000000 331 +tutori 0 39 3.258097 0.000000 437 +manual 0 35 3.401197 0.000000 504 +print 0 34 3.401197 0.000000 503 +least 0 35 3.401197 0.000000 516 +initi 0 23 3.806662 0.000000 717 +half 0 21 3.912023 0.000000 776 +sept 0 17 4.110874 0.000000 952 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +devis 0 10 4.653960 0.000000 1451 +chan 0 7 5.010635 0.000000 1876 +yong 0 4 5.568345 0.000000 2809 +chee 0 3 5.857933 0.000000 3480 +mimic 0 2 6.263398 0.000000 4736 +cychan 0 2 6.263398 0.000000 4737 +qnet 0 1 6.957497 0.000000 8901 +devc 0 1 6.957497 0.000000 8902 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..75b0c00b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +relat 0 139 1.945910 0.000000 68 +assign 0 135 1.945910 0.000000 66 +postscript 1 131 2.079442 2.079442 90 +technolog 0 131 2.079442 0.000000 102 +schedul 0 119 2.079442 0.000000 85 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +topic 0 114 2.197225 0.000000 110 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +resourc 0 81 2.484907 0.000000 172 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +complet 0 77 2.564949 0.000000 208 +knowledg 0 67 2.708050 0.000000 243 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +major 0 56 2.890372 0.000000 315 +suggest 0 53 2.944439 0.000000 331 +much 0 52 2.995732 0.000000 349 +understand 0 47 3.091042 0.000000 384 +represent 0 35 3.401197 0.000000 512 +taken 0 31 3.496508 0.000000 555 +consid 0 29 3.583519 0.000000 590 +univ 0 28 3.610918 0.000000 617 +retriev 0 27 3.637586 0.000000 621 +tent 0 22 3.850148 0.000000 739 +minut 0 20 3.951244 0.000000 810 +thur 0 19 4.007333 0.000000 847 +seek 0 17 4.110874 0.000000 954 +sept 0 17 4.110874 0.000000 952 +onth 0 12 4.465908 0.000000 1218 +excit 0 11 4.553877 0.000000 1329 +underli 0 10 4.653960 0.000000 1410 +occur 0 9 4.753590 0.000000 1572 +compos 0 9 4.753590 0.000000 1527 +digest 0 7 5.010635 0.000000 1864 +machinelearn 0 6 5.164786 0.000000 2084 +proce 0 6 5.164786 0.000000 2114 +polit 0 6 5.164786 0.000000 2115 +anda 0 5 5.347108 0.000000 2416 +aboutth 0 4 5.568345 0.000000 2720 +thesear 0 3 5.857933 0.000000 3456 +uwisc 0 2 6.263398 0.000000 4738 +belew 0 2 6.263398 0.000000 4739 +knowledgerichard 0 1 6.957497 0.000000 8903 +belewvisit 0 1 6.957497 0.000000 8904 +professorc 0 1 6.957497 0.000000 8905 +departmentfal 0 1 6.957497 0.000000 8906 +acal 0 1 6.957497 0.000000 8907 +engrthi 0 1 6.957497 0.000000 8908 +coures 0 1 6.957497 0.000000 8909 +canse 0 1 6.957497 0.000000 8910 +asyllabu 0 1 6.957497 0.000000 8911 +mapof 0 1 6.957497 0.000000 8912 +semesterwil 0 1 6.957497 0.000000 8913 +infidel 0 1 6.957497 0.000000 8914 +hypermai 0 1 6.957497 0.000000 8915 +classrel 0 1 6.957497 0.000000 8916 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..d852a6ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +relat 0 139 1.945910 0.000000 68 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +question 0 91 2.397895 0.000000 141 +sinc 0 90 2.397895 0.000000 159 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +chang 1 82 2.484907 2.484907 163 +contain 0 81 2.484907 0.000000 174 +solut 0 82 2.484907 0.000000 162 +method 0 80 2.564949 0.000000 213 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +name 0 72 2.639057 0.000000 220 +syllabu 0 67 2.708050 0.000000 247 +order 0 69 2.708050 0.000000 249 +complex 0 64 2.772589 0.000000 269 +organ 0 65 2.772589 0.000000 265 +plan 0 65 2.772589 0.000000 272 +handout 0 64 2.772589 0.000000 263 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +numer 0 49 3.044522 0.000000 369 +frequent 0 49 3.044522 0.000000 367 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +edit 0 42 3.218876 0.000000 418 +might 0 41 3.218876 0.000000 426 +error 0 40 3.258097 0.000000 449 +tutori 0 39 3.258097 0.000000 437 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +approxim 0 35 3.401197 0.000000 509 +word 0 34 3.401197 0.000000 508 +chapter 0 32 3.465736 0.000000 536 +ask 0 28 3.610918 0.000000 597 +mind 0 27 3.637586 0.000000 632 +though 0 27 3.637586 0.000000 622 +session 0 26 3.688879 0.000000 643 +subject 0 26 3.688879 0.000000 647 +rather 0 26 3.688879 0.000000 642 +concern 0 25 3.737670 0.000000 666 +todai 0 25 3.737670 0.000000 672 +sometim 0 24 3.761200 0.000000 696 +wish 0 24 3.761200 0.000000 692 +begin 0 23 3.806662 0.000000 716 +tent 0 22 3.850148 0.000000 739 +cooper 0 22 3.850148 0.000000 757 +lower 0 18 4.060443 0.000000 886 +four 0 18 4.060443 0.000000 905 +condit 0 16 4.174387 0.000000 975 +score 0 15 4.248495 0.000000 1017 +carl 0 15 4.248495 0.000000 1024 +fortran 0 15 4.248495 0.000000 1027 +matlab 0 14 4.317488 0.000000 1081 +squar 0 14 4.317488 0.000000 1082 +doit 0 14 4.317488 0.000000 1111 +conduct 0 14 4.317488 0.000000 1065 +total 0 10 4.653960 0.000000 1398 +errata 0 10 4.653960 0.000000 1403 +mention 0 9 4.753590 0.000000 1569 +smile 0 7 5.010635 0.000000 1807 +slightli 0 7 5.010635 0.000000 1795 +awar 0 7 5.010635 0.000000 1800 +supplement 0 5 5.347108 0.000000 2355 +rick 0 4 5.568345 0.000000 2646 +areavail 0 4 5.568345 0.000000 2810 +andp 0 4 5.568345 0.000000 2811 +preprint 0 3 5.857933 0.000000 3481 +diari 0 2 6.263398 0.000000 4740 +residu 0 2 6.263398 0.000000 4741 +kermit 0 2 6.263398 0.000000 4742 +primer 0 2 6.263398 0.000000 4719 +overviewcours 0 2 6.263398 0.000000 4399 +linksyou 0 2 6.263398 0.000000 4743 +csdepart 0 2 6.263398 0.000000 4130 +telnet 0 1 6.957497 0.000000 8917 +methodsthi 0 1 6.957497 0.000000 8918 +orderli 0 1 6.957497 0.000000 8919 +assignmentson 0 1 6.957497 0.000000 8920 +numericalanalysi 0 1 6.957497 0.000000 8921 +foremostmathematician 0 1 6.957497 0.000000 8922 +trickytop 0 1 6.957497 0.000000 8923 +textmai 0 1 6.957497 0.000000 8924 +byaddit 0 1 6.957497 0.000000 8925 +capitallett 0 1 6.957497 0.000000 8926 +caselett 0 1 6.957497 0.000000 8927 +sigmon 0 1 6.957497 0.000000 8928 +reaction 0 1 6.957497 0.000000 8929 +winor 0 1 6.957497 0.000000 8930 +referenceviva 0 1 6.957497 0.000000 8931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..a848e8f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +email 1 220 1.386294 1.386294 29 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +hour 0 165 1.791759 0.000000 46 +relat 1 139 1.945910 1.945910 68 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +analysi 0 124 2.079442 0.000000 98 +version 0 113 2.197225 0.000000 122 +well 0 109 2.197225 0.000000 121 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +homework 0 79 2.564949 0.000000 193 +line 0 75 2.639057 0.000000 231 +function 1 62 2.772589 2.772589 275 +copi 0 63 2.772589 0.000000 284 +locat 0 59 2.833213 0.000000 303 +index 0 56 2.890372 0.000000 309 +explor 0 58 2.890372 0.000000 324 +numer 1 49 3.044522 3.044522 369 +math 0 44 3.135494 0.000000 402 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +announc 0 40 3.258097 0.000000 441 +post 0 35 3.401197 0.000000 505 +hard 0 30 3.555348 0.000000 563 +concern 0 25 3.737670 0.000000 666 +wish 0 24 3.761200 0.000000 692 +stat 0 17 4.110874 0.000000 924 +carl 0 15 4.248495 0.000000 1024 +doit 0 14 4.317488 0.000000 1111 +none 0 7 5.010635 0.000000 1811 +boor 0 3 5.857933 0.000000 3482 +deboor 0 2 6.263398 0.000000 4744 +linksyou 0 2 6.263398 0.000000 4743 +analysisthi 0 1 6.957497 0.000000 8932 +statlectur 0 1 6.957497 0.000000 8933 +classnot 0 1 6.957497 0.000000 8934 +viii 0 1 6.957497 0.000000 8935 +courseoff 0 1 6.957497 0.000000 8936 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..64fa3e63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +note 0 142 1.945910 0.000000 67 +spring 0 131 2.079442 0.000000 88 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +math 0 44 3.135494 0.000000 402 +approxim 1 35 3.401197 3.401197 509 +theorythi 0 1 6.957497 0.000000 8937 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..1ba74b22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +solut 0 82 2.484907 0.000000 162 +academ 0 82 2.484907 0.000000 178 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +window 0 68 2.708050 0.000000 242 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +semest 0 58 2.890372 0.000000 312 +appoint 0 49 3.044522 0.000000 358 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +statist 0 35 3.401197 0.000000 521 +specifi 0 30 3.555348 0.000000 568 +comp 0 26 3.688879 0.000000 650 +tent 0 22 3.850148 0.000000 739 +sept 0 17 4.110874 0.000000 952 +walter 0 17 4.110874 0.000000 950 +stat 0 17 4.110874 0.000000 924 +quiz 0 16 4.174387 0.000000 990 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +quizz 0 13 4.382027 0.000000 1151 +savitch 0 12 4.465908 0.000000 1269 +criteria 0 9 4.753590 0.000000 1477 +noland 0 5 5.347108 0.000000 2420 +anthoni 0 4 5.568345 0.000000 2792 +toni 0 3 5.857933 0.000000 3415 +textbookproblem 0 3 5.857933 0.000000 3483 +timet 0 3 5.857933 0.000000 3471 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +silva 0 2 6.263398 0.000000 4586 +chamberlin 0 2 6.263398 0.000000 4745 +dsilva 0 1 6.957497 0.000000 8938 +sectioncsm 0 1 6.957497 0.000000 8939 +firstdai 0 1 6.957497 0.000000 8940 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..d0b126c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,433 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +oper 0 180 1.609438 0.000000 34 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +process 0 142 1.945910 0.000000 72 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +tool 0 117 2.079442 0.000000 93 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +databas 0 122 2.079442 0.000000 86 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +send 0 114 2.197225 0.000000 109 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +well 0 109 2.197225 0.000000 121 +topic 0 114 2.197225 0.000000 110 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +grade 0 90 2.397895 0.000000 142 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +follow 0 92 2.397895 0.000000 143 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +exam 0 86 2.484907 0.000000 169 +environ 0 84 2.484907 0.000000 177 +activ 0 84 2.484907 0.000000 182 +novemb 0 81 2.484907 0.000000 179 +start 0 83 2.484907 0.000000 173 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +thing 0 84 2.484907 0.000000 189 +learn 0 86 2.484907 0.000000 170 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +method 0 80 2.564949 0.000000 213 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +good 0 77 2.564949 0.000000 200 +decemb 0 80 2.564949 0.000000 215 +interfac 0 79 2.564949 0.000000 209 +line 0 75 2.639057 0.000000 231 +onlin 0 75 2.639057 0.000000 223 +free 0 73 2.639057 0.000000 224 +appli 0 71 2.639057 0.000000 226 +addit 0 74 2.639057 0.000000 228 +tuesdai 0 73 2.639057 0.000000 219 +html 0 75 2.639057 0.000000 235 +name 0 72 2.639057 0.000000 220 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +differ 0 66 2.708050 0.000000 253 +syllabu 0 67 2.708050 0.000000 247 +order 0 69 2.708050 0.000000 249 +main 0 67 2.708050 0.000000 256 +result 0 65 2.772589 0.000000 281 +handout 0 64 2.772589 0.000000 263 +interact 0 62 2.772589 0.000000 270 +collect 0 65 2.772589 0.000000 268 +wednesdai 0 64 2.772589 0.000000 261 +copi 0 63 2.772589 0.000000 284 +function 0 62 2.772589 0.000000 275 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +experi 0 64 2.772589 0.000000 283 +abstract 0 62 2.772589 0.000000 276 +virtual 0 62 2.772589 0.000000 285 +locat 0 59 2.833213 0.000000 303 +type 0 61 2.833213 0.000000 296 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +cover 0 55 2.944439 0.000000 329 +talk 0 53 2.944439 0.000000 336 +digit 0 52 2.995732 0.000000 348 +date 0 51 2.995732 0.000000 344 +case 0 51 2.995732 0.000000 351 +finger 0 52 2.995732 0.000000 354 +format 0 48 3.044522 0.000000 356 +basic 0 50 3.044522 0.000000 360 +numer 0 49 3.044522 0.000000 369 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +approach 0 48 3.044522 0.000000 366 +visual 0 48 3.044522 0.000000 372 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +execut 0 45 3.135494 0.000000 404 +fridai 0 44 3.135494 0.000000 390 +algebra 0 45 3.135494 0.000000 394 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +video 0 44 3.135494 0.000000 405 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +vision 0 41 3.218876 0.000000 430 +might 0 41 3.218876 0.000000 426 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +linear 0 41 3.218876 0.000000 431 +york 0 41 3.218876 0.000000 435 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +howev 0 41 3.218876 0.000000 422 +probabl 0 40 3.258097 0.000000 455 +small 0 39 3.258097 0.000000 447 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +hand 0 37 3.332205 0.000000 475 +feel 0 37 3.332205 0.000000 483 +especi 0 36 3.367296 0.000000 496 +print 0 34 3.401197 0.000000 503 +least 0 35 3.401197 0.000000 516 +manual 0 35 3.401197 0.000000 504 +committe 0 34 3.401197 0.000000 522 +eduoffic 0 33 3.433987 0.000000 531 +board 0 33 3.433987 0.000000 528 +product 0 33 3.433987 0.000000 527 +chapter 0 32 3.465736 0.000000 536 +transform 0 32 3.465736 0.000000 542 +idea 0 32 3.465736 0.000000 545 +photo 0 31 3.496508 0.000000 561 +posit 0 31 3.496508 0.000000 552 +titl 0 31 3.496508 0.000000 556 +focu 0 30 3.555348 0.000000 571 +option 0 30 3.555348 0.000000 575 +produc 0 30 3.555348 0.000000 572 +particip 0 29 3.583519 0.000000 589 +except 0 28 3.610918 0.000000 607 +packag 0 28 3.610918 0.000000 614 +held 0 28 3.610918 0.000000 600 +ask 0 28 3.610918 0.000000 597 +determin 0 27 3.637586 0.000000 630 +detect 0 26 3.688879 0.000000 646 +enhanc 0 26 3.688879 0.000000 644 +altern 0 26 3.688879 0.000000 641 +relev 0 26 3.688879 0.000000 637 +fundament 0 25 3.737670 0.000000 661 +hill 0 25 3.737670 0.000000 670 +although 0 25 3.737670 0.000000 667 +pattern 0 24 3.761200 0.000000 689 +motion 0 24 3.761200 0.000000 699 +wish 0 24 3.761200 0.000000 692 +store 0 24 3.761200 0.000000 693 +other 0 24 3.761200 0.000000 697 +displai 0 23 3.806662 0.000000 712 +recognit 0 23 3.806662 0.000000 723 +head 0 23 3.806662 0.000000 732 +compress 0 23 3.806662 0.000000 719 +proof 0 23 3.806662 0.000000 720 +highli 0 23 3.806662 0.000000 725 +miscellan 0 23 3.806662 0.000000 731 +color 0 22 3.850148 0.000000 762 +varieti 0 22 3.850148 0.000000 740 +defin 0 22 3.850148 0.000000 746 +instead 0 22 3.850148 0.000000 756 +william 0 22 3.850148 0.000000 765 +disk 0 22 3.850148 0.000000 747 +sent 0 22 3.850148 0.000000 763 +recommend 0 22 3.850148 0.000000 737 +output 0 21 3.912023 0.000000 788 +tell 0 21 3.912023 0.000000 777 +fact 0 21 3.912023 0.000000 780 +wang 0 21 3.912023 0.000000 790 +sure 0 20 3.951244 0.000000 813 +entir 0 20 3.951244 0.000000 811 +toolkit 0 20 3.951244 0.000000 835 +prerequisit 0 19 4.007333 0.000000 846 +citi 0 19 4.007333 0.000000 874 +comparison 0 19 4.007333 0.000000 863 +boston 0 19 4.007333 0.000000 862 +account 0 18 4.060443 0.000000 882 +dimension 0 18 4.060443 0.000000 909 +four 0 18 4.060443 0.000000 905 +demo 0 18 4.060443 0.000000 888 +segment 0 17 4.110874 0.000000 931 +modif 0 17 4.110874 0.000000 913 +matrix 0 17 4.110874 0.000000 933 +stanford 0 17 4.110874 0.000000 955 +estim 0 17 4.110874 0.000000 930 +regular 0 17 4.110874 0.000000 929 +condit 0 16 4.174387 0.000000 975 +zhang 0 16 4.174387 0.000000 980 +earli 0 16 4.174387 0.000000 968 +sheet 0 16 4.174387 0.000000 973 +track 0 15 4.248495 0.000000 1029 +score 0 15 4.248495 0.000000 1017 +transit 0 15 4.248495 0.000000 1046 +goe 0 15 4.248495 0.000000 1044 +side 0 15 4.248495 0.000000 1022 +rate 0 15 4.248495 0.000000 1037 +doit 0 14 4.317488 0.000000 1111 +chuck 0 14 4.317488 0.000000 1108 +scene 0 14 4.317488 0.000000 1114 +matlab 0 14 4.317488 0.000000 1081 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +consider 0 14 4.317488 0.000000 1076 +command 0 14 4.317488 0.000000 1083 +block 0 13 4.382027 0.000000 1183 +primarili 0 13 4.382027 0.000000 1185 +convert 0 13 4.382027 0.000000 1122 +emac 0 13 4.382027 0.000000 1143 +everyon 0 13 4.382027 0.000000 1148 +shape 0 12 4.465908 0.000000 1245 +calculu 0 12 4.465908 0.000000 1203 +overal 0 12 4.465908 0.000000 1254 +count 0 12 4.465908 0.000000 1239 +optic 0 12 4.465908 0.000000 1221 +realiti 0 12 4.465908 0.000000 1272 +qualit 0 11 4.553877 0.000000 1362 +appl 0 11 4.553877 0.000000 1303 +vista 0 10 4.653960 0.000000 1452 +mosaic 0 10 4.653960 0.000000 1426 +modul 0 10 4.653960 0.000000 1434 +hint 0 10 4.653960 0.000000 1419 +queue 0 10 4.653960 0.000000 1386 +rapid 0 10 4.653960 0.000000 1453 +bring 0 10 4.653960 0.000000 1430 +dyer 0 9 4.753590 0.000000 1573 +face 0 9 4.753590 0.000000 1501 +distanc 0 9 4.753590 0.000000 1500 +recoveri 0 9 4.753590 0.000000 1474 +printer 0 8 4.875197 0.000000 1621 +depth 0 8 4.875197 0.000000 1636 +convers 0 8 4.875197 0.000000 1673 +edg 0 8 4.875197 0.000000 1647 +contrast 0 8 4.875197 0.000000 1637 +job 0 8 4.875197 0.000000 1702 +virginia 0 8 4.875197 0.000000 1659 +shade 0 7 5.010635 0.000000 1881 +stereo 0 7 5.010635 0.000000 1818 +prevent 0 7 5.010635 0.000000 1827 +corner 0 7 5.010635 0.000000 1909 +header 0 7 5.010635 0.000000 1787 +compact 0 7 5.010635 0.000000 1907 +signal 0 7 5.010635 0.000000 1910 +sweden 0 7 5.010635 0.000000 1885 +spline 0 6 5.164786 0.000000 2007 +viewpoint 0 6 5.164786 0.000000 2116 +gzip 0 6 5.164786 0.000000 2117 +invok 0 6 5.164786 0.000000 2079 +classroom 0 6 5.164786 0.000000 2006 +televis 0 6 5.164786 0.000000 2118 +spie 0 6 5.164786 0.000000 2119 +rotat 0 5 5.347108 0.000000 2295 +snake 0 5 5.347108 0.000000 2281 +bryan 0 5 5.347108 0.000000 2421 +jain 0 5 5.347108 0.000000 2332 +mcgraw 0 5 5.347108 0.000000 2262 +adjust 0 5 5.347108 0.000000 2422 +button 0 5 5.347108 0.000000 2337 +constant 0 5 5.347108 0.000000 2251 +multiresolut 0 5 5.347108 0.000000 2423 +sparcstat 0 5 5.347108 0.000000 2406 +shortest 0 5 5.347108 0.000000 2424 +grand 0 5 5.347108 0.000000 2425 +contour 0 4 5.568345 0.000000 2812 +sold 0 4 5.568345 0.000000 2813 +delet 0 4 5.568345 0.000000 2691 +assignmentshomework 0 4 5.568345 0.000000 2721 +shah 0 4 5.568345 0.000000 2814 +thin 0 3 5.857933 0.000000 3488 +pyramid 0 3 5.857933 0.000000 3358 +tran 0 3 5.857933 0.000000 3384 +faq 0 3 5.857933 0.000000 3216 +visionc 0 3 5.857933 0.000000 3489 +histogram 0 3 5.857933 0.000000 3490 +portrait 0 3 5.857933 0.000000 3491 +gradient 0 3 5.857933 0.000000 3479 +surround 0 3 5.857933 0.000000 3492 +suen 0 3 5.857933 0.000000 3446 +toolbox 0 3 5.857933 0.000000 3112 +quicktim 0 3 5.857933 0.000000 3493 +qbic 0 3 5.857933 0.000000 3294 +cardiff 0 3 5.857933 0.000000 3154 +khoro 0 2 6.263398 0.000000 4488 +comm 0 2 6.263398 0.000000 4746 +laser 0 2 6.263398 0.000000 4747 +skeleton 0 2 6.263398 0.000000 4225 +disappear 0 2 6.263398 0.000000 4748 +burt 0 2 6.263398 0.000000 4494 +visionfal 0 2 6.263398 0.000000 4749 +shoulder 0 2 6.263398 0.000000 4750 +altogeth 0 2 6.263398 0.000000 4751 +supplementari 0 2 6.263398 0.000000 4752 +quota 0 2 6.263398 0.000000 4753 +caution 0 2 6.263398 0.000000 4754 +cantata 0 2 6.263398 0.000000 4489 +panoram 0 2 6.263398 0.000000 4755 +royal 0 2 6.263398 0.000000 4756 +adelson 0 1 6.957497 0.000000 8941 +csstelephon 0 1 6.957497 0.000000 8942 +ubyt 0 1 6.957497 0.000000 8943 +imgstar 0 1 6.957497 0.000000 8944 +hdtv 0 1 6.957497 0.000000 8945 +atsc 0 1 6.957497 0.000000 8946 +dyeroffic 0 1 6.957497 0.000000 8947 +appointmentteach 0 1 6.957497 0.000000 8948 +sooffic 0 1 6.957497 0.000000 8949 +appointmentstud 0 1 6.957497 0.000000 8950 +informationfundament 0 1 6.957497 0.000000 8951 +featuredetect 0 1 6.957497 0.000000 8952 +forreconstruct 0 1 6.957497 0.000000 8953 +usingtechniqu 0 1 6.957497 0.000000 8954 +asshap 0 1 6.957497 0.000000 8955 +andocclud 0 1 6.957497 0.000000 8956 +kasturi 0 1 6.957497 0.000000 8957 +schunck 0 1 6.957497 0.000000 8958 +readingsfrom 0 1 6.957497 0.000000 8959 +batchessupplementari 0 1 6.957497 0.000000 8960 +sourcesonlin 0 1 6.957497 0.000000 8961 +informationmost 0 1 6.957497 0.000000 8962 +urlhttp 0 1 6.957497 0.000000 8963 +byfirst 0 1 6.957497 0.000000 8964 +crop 0 1 6.957497 0.000000 8965 +theintens 0 1 6.957497 0.000000 8966 +thewindow 0 1 6.957497 0.000000 8967 +colorif 0 1 6.957497 0.000000 8968 +grayscal 0 1 6.957497 0.000000 8969 +transformationsav 0 1 6.957497 0.000000 8970 +andput 0 1 6.957497 0.000000 8971 +whereth 0 1 6.957497 0.000000 8972 +whatintens 0 1 6.957497 0.000000 8973 +qualityof 0 1 6.957497 0.000000 8974 +ownweb 0 1 6.957497 0.000000 8975 +infin 0 1 6.957497 0.000000 8976 +chessboard 0 1 6.957497 0.000000 8977 +vconvert 0 1 6.957497 0.000000 8978 +clean 0 1 6.957497 0.000000 8979 +repn 0 1 6.957497 0.000000 8980 +component_interp 0 1 6.957497 0.000000 8981 +low_threshold 0 1 6.957497 0.000000 8982 +high_threshold 0 1 6.957497 0.000000 8983 +vlink 0 1 6.957497 0.000000 8984 +vsegedg 0 1 6.957497 0.000000 8985 +laplacian 0 1 6.957497 0.000000 8986 +kass 0 1 6.957497 0.000000 8987 +witkin 0 1 6.957497 0.000000 8988 +terzopoulo 0 1 6.957497 0.000000 8989 +curvatur 0 1 6.957497 0.000000 8990 +laserprint 0 1 6.957497 0.000000 8991 +netpbm 0 1 6.957497 0.000000 8992 +pbmplu 0 1 6.957497 0.000000 8993 +wandel 0 1 6.957497 0.000000 8994 +allianc 0 1 6.957497 0.000000 8995 +panoramix 0 1 6.957497 0.000000 8996 +decfac 0 1 6.957497 0.000000 8997 +synthet 0 1 6.957497 0.000000 8998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..bb798380 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +base 0 165 1.791759 0.000000 50 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +number 0 130 2.079442 0.000000 97 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +section 0 94 2.397895 0.000000 149 +octob 0 89 2.397895 0.000000 156 +novemb 0 81 2.484907 0.000000 179 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +algebra 0 45 3.135494 0.000000 394 +fridai 0 44 3.135494 0.000000 390 +compani 0 41 3.218876 0.000000 423 +known 0 24 3.761200 0.000000 702 +greg 0 24 3.761200 0.000000 695 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +style 0 15 4.248495 0.000000 1036 +dave 0 14 4.317488 0.000000 1098 +readi 0 12 4.465908 0.000000 1242 +informationemail 0 9 4.753590 0.000000 1564 +sharp 0 6 5.164786 0.000000 2100 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +objectivesvectra 0 3 5.857933 0.000000 3410 +homeclass 0 3 5.857933 0.000000 3411 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +consultantssyllabuswork 0 2 6.263398 0.000000 4579 +referenc 0 2 6.263398 0.000000 4757 +dzimm 0 1 6.957497 0.000000 8999 +zimmermannemail 0 1 6.957497 0.000000 9000 +educlass 0 1 6.957497 0.000000 9001 +nolandoffic 0 1 6.957497 0.000000 9002 +announcementsprogram 0 1 6.957497 0.000000 9003 +handoutsprogramsexam 0 1 6.957497 0.000000 9004 +quizzeslectur 0 1 6.957497 0.000000 9005 +notesgreg 0 1 6.957497 0.000000 9006 +guidegrad 0 1 6.957497 0.000000 9007 +quizzesprogramsexam 0 1 6.957497 0.000000 9008 +policytext 0 1 6.957497 0.000000 9009 +zimmermann 0 1 6.957497 0.000000 9010 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..d7861b09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +spring 0 131 2.079442 0.000000 88 +instructor 0 108 2.197225 0.000000 107 +theori 0 111 2.197225 0.000000 127 +larg 0 82 2.484907 0.000000 168 +method 0 80 2.564949 0.000000 213 +januari 0 62 2.772589 0.000000 264 +semest 0 58 2.890372 0.000000 312 +variou 0 56 2.890372 0.000000 317 +offer 0 43 3.178054 0.000000 414 +linear 0 41 3.218876 0.000000 431 +michael 0 35 3.401197 0.000000 514 +flow 0 24 3.761200 0.000000 700 +spars 0 16 4.174387 0.000000 989 +nonlinear 0 14 4.317488 0.000000 1107 +mangasarian 0 9 4.753590 0.000000 1570 +ferri 0 8 4.875197 0.000000 1715 +integ 0 8 4.875197 0.000000 1688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..fe0f2c39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +network 0 168 1.791759 0.000000 61 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +overview 0 56 2.890372 0.000000 323 +local 0 55 2.944439 0.000000 334 +februari 0 54 2.944439 0.000000 328 +allow 0 53 2.944439 0.000000 333 +week 0 52 2.995732 0.000000 343 +telephon 0 50 3.044522 0.000000 373 +set 0 50 3.044522 0.000000 361 +fridai 0 44 3.135494 0.000000 390 +directori 0 45 3.135494 0.000000 396 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +examin 0 42 3.218876 0.000000 424 +close 0 38 3.295837 0.000000 465 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +tree 0 36 3.367296 0.000000 492 +michael 0 35 3.401197 0.000000 514 +jame 0 35 3.401197 0.000000 507 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +session 0 26 3.688879 0.000000 643 +repres 0 26 3.688879 0.000000 656 +flow 0 24 3.761200 0.000000 700 +path 0 21 3.912023 0.000000 778 +minut 0 20 3.951244 0.000000 810 +prerequisit 0 19 4.007333 0.000000 846 +thoma 0 18 4.060443 0.000000 901 +appropri 0 18 4.060443 0.000000 883 +previous 0 17 4.110874 0.000000 923 +sheet 0 16 4.174387 0.000000 973 +solari 0 12 4.465908 0.000000 1238 +cycl 0 11 4.553877 0.000000 1335 +login 0 9 4.753590 0.000000 1550 +ferri 0 8 4.875197 0.000000 1715 +prentic 0 7 5.010635 0.000000 1838 +relax 0 6 5.164786 0.000000 2120 +shortest 0 5 5.347108 0.000000 2424 +cshrc 0 4 5.568345 0.000000 2759 +freeman 0 4 5.568345 0.000000 2725 +convex 0 4 5.568345 0.000000 2807 +novic 0 4 5.568345 0.000000 2815 +ahuja 0 3 5.857933 0.000000 3494 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gam 0 2 6.263398 0.000000 4758 +leei 0 2 6.263398 0.000000 4759 +equilibria 0 2 6.263398 0.000000 4760 +multicommod 0 2 6.263398 0.000000 4761 +flowsspr 0 1 6.957497 0.000000 9011 +ravindra 0 1 6.957497 0.000000 9012 +magnanti 0 1 6.957497 0.000000 9013 +orlin 0 1 6.957497 0.000000 9014 +chvatal 0 1 6.957497 0.000000 9015 +simplex 0 1 6.957497 0.000000 9016 +alter 0 1 6.957497 0.000000 9017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..dce0a024 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +second 0 81 2.484907 0.000000 166 +larg 0 82 2.484907 0.000000 168 +ieee 0 86 2.484907 0.000000 190 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +method 0 80 2.564949 0.000000 213 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +direct 0 57 2.890372 0.000000 316 +space 0 57 2.890372 0.000000 310 +overview 0 56 2.890372 0.000000 323 +februari 0 54 2.944439 0.000000 328 +local 0 55 2.944439 0.000000 334 +allow 0 53 2.944439 0.000000 333 +instruct 0 53 2.944439 0.000000 332 +week 0 52 2.995732 0.000000 343 +telephon 0 50 3.044522 0.000000 373 +fridai 0 44 3.135494 0.000000 390 +math 0 44 3.135494 0.000000 402 +textbook 0 44 3.135494 0.000000 397 +press 0 42 3.218876 0.000000 419 +edit 0 42 3.218876 0.000000 418 +linear 0 41 3.218876 0.000000 431 +examin 0 42 3.218876 0.000000 424 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +workstat 0 37 3.332205 0.000000 479 +michael 0 35 3.401197 0.000000 514 +least 0 35 3.401197 0.000000 516 +john 0 33 3.433987 0.000000 532 +storag 0 31 3.496508 0.000000 553 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +session 0 26 3.688879 0.000000 643 +repres 0 26 3.688879 0.000000 656 +strategi 0 25 3.737670 0.000000 682 +equat 0 23 3.806662 0.000000 724 +recommend 0 22 3.850148 0.000000 737 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +scheme 0 20 3.951244 0.000000 818 +minut 0 20 3.951244 0.000000 810 +prerequisit 0 19 4.007333 0.000000 846 +dimension 0 18 4.060443 0.000000 909 +matrix 0 17 4.110874 0.000000 933 +modif 0 17 4.110874 0.000000 913 +previous 0 17 4.110874 0.000000 923 +spars 0 16 4.174387 0.000000 989 +vector 0 16 4.174387 0.000000 961 +sheet 0 16 4.174387 0.000000 973 +finit 0 14 4.317488 0.000000 1106 +squar 0 14 4.317488 0.000000 1082 +nonlinear 0 14 4.317488 0.000000 1107 +matlab 0 14 4.317488 0.000000 1081 +iter 0 12 4.465908 0.000000 1206 +matric 0 10 4.653960 0.000000 1399 +arithmet 0 10 4.653960 0.000000 1388 +elimin 0 9 4.753590 0.000000 1558 +ferri 0 8 4.875197 0.000000 1715 +solver 0 7 5.010635 0.000000 1911 +oxford 0 6 5.164786 0.000000 2121 +dens 0 6 5.164786 0.000000 2122 +pivot 0 5 5.347108 0.000000 2426 +consent 0 5 5.347108 0.000000 2389 +novic 0 4 5.568345 0.000000 2815 +golub 0 3 5.857933 0.000000 3265 +eigenvalu 0 3 5.857933 0.000000 3364 +eigenvector 0 3 5.857933 0.000000 3365 +systemsspr 0 2 6.263398 0.000000 4762 +leei 0 2 6.263398 0.000000 4759 +loan 0 2 6.263398 0.000000 4147 +gaussian 0 2 6.263398 0.000000 4763 +hopkinsunivers 0 1 6.957497 0.000000 9018 +duff 0 1 6.957497 0.000000 9019 +erisman 0 1 6.957497 0.000000 9020 +reid 0 1 6.957497 0.000000 9021 +halmo 0 1 6.957497 0.000000 9022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..bd0596f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +start 0 83 2.484907 0.000000 173 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 0 50 3.044522 0.000000 373 +appoint 0 49 3.044522 0.000000 358 +get 0 46 3.091042 0.000000 380 +fridai 0 44 3.135494 0.000000 390 +examin 0 42 3.218876 0.000000 424 +richard 0 31 3.496508 0.000000 559 +charl 0 13 4.382027 0.000000 1149 +benjamin 0 11 4.553877 0.000000 1296 +regularli 0 11 4.553877 0.000000 1338 +tuth 0 9 4.753590 0.000000 1519 +cum 0 8 4.875197 0.000000 1619 +fischer 0 7 5.010635 0.000000 1893 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +krishna 0 3 5.857933 0.000000 3495 +weyer 0 2 6.263398 0.000000 4558 +compilersfal 0 2 6.263398 0.000000 4223 +csst 0 2 6.263398 0.000000 4764 +krisna 0 2 6.263398 0.000000 4765 +kunchithapadam 0 1 6.957497 0.000000 9023 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..07cb82f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +object 0 138 1.945910 0.000000 79 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +section 0 94 2.397895 0.000000 149 +solv 0 73 2.639057 0.000000 234 +html 0 75 2.639057 0.000000 235 +polici 0 64 2.772589 0.000000 279 +publish 0 57 2.890372 0.000000 326 +algebra 0 45 3.135494 0.000000 394 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +must 0 40 3.258097 0.000000 442 +eduoffic 0 33 3.433987 0.000000 531 +greg 1 24 3.761200 3.761200 695 +known 0 24 3.761200 0.000000 702 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +informationemail 0 9 4.753590 0.000000 1564 +appt 0 5 5.347108 0.000000 2312 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +grader 0 3 5.857933 0.000000 3165 +krishna 0 3 5.857933 0.000000 3495 +objectivesvectra 0 3 5.857933 0.000000 3410 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +sharpemail 0 2 6.263398 0.000000 4766 +krisna 0 2 6.263398 0.000000 4765 +archivepolici 0 2 6.263398 0.000000 4580 +sharpgreg 0 2 6.263398 0.000000 4767 +kunchithapadamemail 0 1 6.957497 0.000000 9024 +edugener 0 1 6.957497 0.000000 9025 +consultantssyllabuscours 0 1 6.957497 0.000000 9026 +difficultywork 0 1 6.957497 0.000000 9027 +homenewsstartup 0 1 6.957497 0.000000 9028 +informationclass 0 1 6.957497 0.000000 9029 +noteshomeworkexam 0 1 6.957497 0.000000 9030 +quizzesstyl 0 1 6.957497 0.000000 9031 +guideemail 0 1 6.957497 0.000000 9032 +textproblem 0 1 6.957497 0.000000 9033 +porgrammingwalt 0 1 6.957497 0.000000 9034 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..bb915c3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,160 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +compil 0 122 2.079442 0.000000 96 +introduct 0 126 2.079442 0.000000 87 +version 0 113 2.197225 0.000000 122 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +code 0 108 2.197225 0.000000 116 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +mani 0 92 2.397895 0.000000 150 +chang 0 82 2.484907 0.000000 163 +help 0 83 2.484907 0.000000 175 +start 0 83 2.484907 0.000000 173 +requir 0 81 2.484907 0.000000 167 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +second 0 81 2.484907 0.000000 166 +want 0 79 2.564949 0.000000 199 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +name 0 72 2.639057 0.000000 220 +line 0 75 2.639057 0.000000 231 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +copi 0 63 2.772589 0.000000 284 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +descript 0 64 2.772589 0.000000 271 +handout 0 64 2.772589 0.000000 263 +type 0 61 2.833213 0.000000 296 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +sampl 0 53 2.944439 0.000000 339 +suggest 0 53 2.944439 0.000000 331 +basic 0 50 3.044522 0.000000 360 +understand 0 47 3.091042 0.000000 384 +directori 0 45 3.135494 0.000000 396 +algebra 0 45 3.135494 0.000000 394 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +might 0 41 3.218876 0.000000 426 +compani 0 41 3.218876 0.000000 423 +announc 0 40 3.258097 0.000000 441 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +mean 0 37 3.332205 0.000000 477 +either 0 35 3.401197 0.000000 506 +within 0 33 3.433987 0.000000 525 +ad 0 32 3.465736 0.000000 544 +someth 0 31 3.496508 0.000000 554 +hard 0 30 3.555348 0.000000 563 +great 0 27 3.637586 0.000000 626 +administr 0 27 3.637586 0.000000 628 +consist 0 26 3.688879 0.000000 651 +valu 0 25 3.737670 0.000000 665 +consult 0 24 3.761200 0.000000 687 +togeth 0 23 3.806662 0.000000 714 +defin 0 22 3.850148 0.000000 746 +tent 0 22 3.850148 0.000000 739 +sure 0 20 3.951244 0.000000 813 +entir 0 20 3.951244 0.000000 811 +fine 0 20 3.951244 0.000000 822 +prepar 0 20 3.951244 0.000000 824 +definit 0 19 4.007333 0.000000 864 +attend 0 18 4.060443 0.000000 893 +attempt 0 17 4.110874 0.000000 917 +walter 0 17 4.110874 0.000000 950 +earli 0 16 4.174387 0.000000 968 +weslei 0 16 4.174387 0.000000 983 +misconduct 0 16 4.174387 0.000000 1003 +piec 0 15 4.248495 0.000000 1020 +style 0 15 4.248495 0.000000 1036 +shown 0 14 4.317488 0.000000 1080 +borland 0 14 4.317488 0.000000 1067 +everyth 0 13 4.382027 0.000000 1169 +quizz 0 13 4.382027 0.000000 1151 +bodi 0 13 4.382027 0.000000 1178 +outsid 0 12 4.465908 0.000000 1219 +insid 0 12 4.465908 0.000000 1262 +readi 0 12 4.465908 0.000000 1242 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +vectra 0 12 4.465908 0.000000 1267 +tue 0 11 4.553877 0.000000 1308 +chri 0 11 4.553877 0.000000 1311 +noth 0 11 4.553877 0.000000 1328 +extra 0 11 4.553877 0.000000 1312 +correspond 0 10 4.653960 0.000000 1382 +bring 0 10 4.653960 0.000000 1430 +declar 0 9 4.753590 0.000000 1526 +lane 0 8 4.875197 0.000000 1720 +matter 0 8 4.875197 0.000000 1627 +bottom 0 7 5.010635 0.000000 1906 +throughout 0 7 5.010635 0.000000 1871 +sharp 0 6 5.164786 0.000000 2100 +recogn 0 5 5.347108 0.000000 2302 +crucial 0 5 5.347108 0.000000 2384 +prog 0 4 5.568345 0.000000 2740 +shouldn 0 4 5.568345 0.000000 2606 +thumb 0 4 5.568345 0.000000 2816 +enumer 0 3 5.857933 0.000000 3244 +privat 0 3 5.857933 0.000000 3496 +bump 0 3 5.857933 0.000000 3497 +obsolet 0 3 5.857933 0.000000 3196 +freshman 0 3 5.857933 0.000000 3462 +chad 0 2 6.263398 0.000000 4768 +forgot 0 2 6.263398 0.000000 4769 +weaver 0 2 6.263398 0.000000 4770 +freshmen 0 2 6.263398 0.000000 4554 +disregard 0 2 6.263398 0.000000 4189 +tribbl 0 1 6.957497 0.000000 9035 +randomintinrang 0 1 6.957497 0.000000 9036 +uppercas 0 1 6.957497 0.000000 9037 +overwrit 0 1 6.957497 0.000000 9038 +discrep 0 1 6.957497 0.000000 9039 +solutionscours 0 1 6.957497 0.000000 9040 +vleck 0 1 6.957497 0.000000 9041 +guidelast 0 1 6.957497 0.000000 9042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..5cfc750c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +techniqu 0 99 2.302585 0.000000 138 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +librari 0 87 2.484907 0.000000 181 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 0 50 3.044522 0.000000 373 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +fridai 0 44 3.135494 0.000000 390 +examin 0 42 3.218876 0.000000 424 +late 0 40 3.258097 0.000000 439 +comp 0 26 3.688879 0.000000 650 +reserv 0 20 3.951244 0.000000 808 +stat 0 17 4.110874 0.000000 924 +month 0 15 4.248495 0.000000 1025 +psycholog 0 15 4.248495 0.000000 1054 +susan 0 15 4.248495 0.000000 1050 +stori 0 14 4.317488 0.000000 1087 +regularli 0 11 4.553877 0.000000 1338 +wendt 0 10 4.653960 0.000000 1446 +tuth 0 9 4.753590 0.000000 1519 +recit 0 9 4.753590 0.000000 1475 +fischer 0 7 5.010635 0.000000 1893 +horwitz 0 5 5.347108 0.000000 2411 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +ullman 0 4 5.568345 0.000000 2749 +rahul 0 3 5.857933 0.000000 3464 +compilersspr 0 2 6.263398 0.000000 4700 +kapoor 0 2 6.263398 0.000000 4701 +sethi 0 2 6.263398 0.000000 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..91d6a4e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +academ 0 82 2.484907 0.000000 178 +mondai 0 77 2.564949 0.000000 206 +solv 0 73 2.639057 0.000000 234 +window 0 68 2.708050 0.000000 242 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +semest 0 58 2.890372 0.000000 312 +announc 0 40 3.258097 0.000000 441 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +statist 0 35 3.401197 0.000000 521 +tent 0 22 3.850148 0.000000 739 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +savitch 0 12 4.465908 0.000000 1269 +criteria 0 9 4.753590 0.000000 1477 +hummert 1 3 5.857933 5.857933 3416 +psych 0 3 5.857933 0.000000 3498 +textbookproblem 0 3 5.857933 0.000000 3483 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +sectionsc 0 1 6.957497 0.000000 9043 +viewgraph 0 1 6.957497 0.000000 9044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..0baa9546 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +madison 0 165 1.791759 0.000000 55 +click 1 142 1.945910 1.945910 78 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +info 0 85 2.484907 0.000000 176 +thing 0 84 2.484907 0.000000 189 +mondai 0 77 2.564949 0.000000 206 +name 0 72 2.639057 0.000000 220 +thursdai 0 70 2.708050 0.000000 241 +copi 0 63 2.772589 0.000000 284 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +keep 0 44 3.135494 0.000000 409 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +mind 0 27 3.637586 0.000000 632 +stop 0 17 4.110874 0.000000 942 +choos 0 16 4.174387 0.000000 964 +doit 0 14 4.317488 0.000000 1111 +menu 0 13 4.382027 0.000000 1156 +bodner 0 5 5.347108 0.000000 2401 +eduher 0 3 5.857933 0.000000 3499 +jonb 0 2 6.263398 0.000000 4771 +infoc 0 2 6.263398 0.000000 4772 +mound 0 2 6.263398 0.000000 4773 +bodnersect 0 1 6.957497 0.000000 9045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..7187bfd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +network 0 168 1.791759 0.000000 61 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +code 0 108 2.197225 0.000000 116 +teach 0 108 2.197225 0.000000 112 +book 0 99 2.302585 0.000000 131 +section 0 94 2.397895 0.000000 149 +search 0 95 2.397895 0.000000 155 +sinc 0 90 2.397895 0.000000 159 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +exam 0 86 2.484907 0.000000 169 +learn 0 86 2.484907 0.000000 170 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +refer 0 78 2.564949 0.000000 203 +appear 0 78 2.564949 0.000000 210 +line 0 75 2.639057 0.000000 231 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +artifici 0 63 2.772589 0.000000 280 +previou 0 62 2.772589 0.000000 290 +plan 0 65 2.772589 0.000000 272 +plai 0 60 2.833213 0.000000 307 +best 0 59 2.833213 0.000000 299 +detail 0 57 2.890372 0.000000 321 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +give 0 50 3.044522 0.000000 359 +still 0 50 3.044522 0.000000 362 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +directori 0 45 3.135494 0.000000 396 +discuss 0 45 3.135494 0.000000 399 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +edit 0 42 3.218876 0.000000 418 +review 0 42 3.218876 0.000000 425 +late 0 40 3.258097 0.000000 439 +probabl 0 40 3.258097 0.000000 455 +game 0 36 3.367296 0.000000 498 +eduoffic 0 33 3.433987 0.000000 531 +within 0 33 3.433987 0.000000 525 +common 0 30 3.555348 0.000000 574 +neural 0 30 3.555348 0.000000 578 +turn 0 29 3.583519 0.000000 586 +usual 0 28 3.610918 0.000000 608 +session 0 26 3.688879 0.000000 643 +notic 0 25 3.737670 0.000000 675 +begin 0 23 3.806662 0.000000 716 +entir 0 20 3.951244 0.000000 811 +minut 0 20 3.951244 0.000000 810 +lisp 1 18 4.060443 4.060443 897 +four 0 18 4.060443 0.000000 905 +attend 0 18 4.060443 0.000000 893 +intro 0 17 4.110874 0.000000 915 +stat 0 17 4.110874 0.000000 924 +ultim 0 17 4.110874 0.000000 943 +modern 0 16 4.174387 0.000000 966 +later 0 15 4.248495 0.000000 1043 +prolog 0 13 4.382027 0.000000 1155 +count 0 12 4.465908 0.000000 1239 +deduct 0 12 4.465908 0.000000 1236 +alpha 0 11 4.553877 0.000000 1348 +engr 0 10 4.653960 0.000000 1427 +hint 0 10 4.653960 0.000000 1419 +recit 0 9 4.753590 0.000000 1475 +russel 0 9 4.753590 0.000000 1507 +beta 0 6 5.164786 0.000000 1993 +supplement 0 5 5.347108 0.000000 2355 +older 0 5 5.347108 0.000000 2387 +midnight 0 4 5.568345 0.000000 2599 +uncertain 0 4 5.568345 0.000000 2758 +graham 0 4 5.568345 0.000000 2817 +steel 0 4 5.568345 0.000000 2818 +kunen 0 3 5.857933 0.000000 3500 +ansi 0 3 5.857933 0.000000 3198 +psych 0 3 5.857933 0.000000 3498 +coursewil 0 2 6.263398 0.000000 4718 +loos 0 2 6.263398 0.000000 4774 +buti 0 2 6.263398 0.000000 4775 +assignmenti 0 2 6.263398 0.000000 4573 +sun 0 2 6.263398 0.000000 4490 +kunenoffic 0 1 6.957497 0.000000 9046 +buildingtelephon 0 1 6.957497 0.000000 9047 +thirdexam 0 1 6.957497 0.000000 9048 +thedai 0 1 6.957497 0.000000 9049 +usefulto 0 1 6.957497 0.000000 9050 +lecturesand 0 1 6.957497 0.000000 9051 +manypaperback 0 1 6.957497 0.000000 9052 +lispcraft 0 1 6.957497 0.000000 9053 +wilenski 0 1 6.957497 0.000000 9054 +norvig 0 1 6.957497 0.000000 9055 +essentiallli 0 1 6.957497 0.000000 9056 +alpha_beta 0 1 6.957497 0.000000 9057 +astar 0 1 6.957497 0.000000 9058 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..fd33e82d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +hour 0 165 1.791759 0.000000 46 +wisconsin 0 169 1.791759 0.000000 54 +assign 0 135 1.945910 0.000000 66 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +report 0 131 2.079442 0.000000 92 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +question 0 91 2.397895 0.000000 141 +solut 0 82 2.484907 0.000000 162 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +homework 0 79 2.564949 0.000000 193 +decemb 0 80 2.564949 0.000000 215 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +content 0 59 2.833213 0.000000 302 +instruct 0 53 2.944439 0.000000 332 +talk 0 53 2.944439 0.000000 336 +tabl 0 51 2.995732 0.000000 346 +appoint 0 49 3.044522 0.000000 358 +give 0 50 3.044522 0.000000 359 +basic 0 50 3.044522 0.000000 360 +set 0 50 3.044522 0.000000 361 +mark 0 44 3.135494 0.000000 403 +fridai 0 44 3.135494 0.000000 390 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +cach 0 41 3.218876 0.000000 432 +edit 0 42 3.218876 0.000000 418 +seminar 0 38 3.295837 0.000000 470 +cost 0 37 3.332205 0.000000 480 +eduoffic 0 33 3.433987 0.000000 531 +chapter 1 32 3.465736 3.465736 536 +hard 0 30 3.555348 0.000000 563 +full 0 28 3.610918 0.000000 615 +propos 0 28 3.610918 0.000000 602 +comp 0 26 3.688879 0.000000 650 +hill 0 25 3.737670 0.000000 670 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +interconnect 0 17 4.110874 0.000000 937 +doit 0 14 4.317488 0.000000 1111 +reader 0 12 4.465908 0.000000 1246 +patterson 0 9 4.753590 0.000000 1554 +qualifi 0 8 4.875197 0.000000 1721 +pipelin 0 7 5.010635 0.000000 1830 +hennessi 0 5 5.347108 0.000000 2289 +markhil 0 4 5.568345 0.000000 2819 +talluri 0 4 5.568345 0.000000 2820 +ifal 0 2 6.263398 0.000000 4776 +statphon 0 2 6.263398 0.000000 4726 +hilloffic 0 1 6.957497 0.000000 9059 +statemail 0 1 6.957497 0.000000 9060 +shenoffic 0 1 6.957497 0.000000 9061 +mshen 0 1 6.957497 0.000000 9062 +miscellaneawhat 0 1 6.957497 0.000000 9063 +talksread 0 1 6.957497 0.000000 9064 +solutionproject 0 1 6.957497 0.000000 9065 +noonmiscellanea 0 1 6.957497 0.000000 9066 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..a90aedcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +introduct 0 126 2.079442 0.000000 87 +number 0 130 2.079442 0.000000 97 +instructor 0 108 2.197225 0.000000 107 +structur 0 106 2.197225 0.000000 105 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +academ 0 82 2.484907 0.000000 178 +exampl 0 77 2.564949 0.000000 195 +dynam 0 76 2.564949 0.000000 194 +solv 0 73 2.639057 0.000000 234 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +complex 0 64 2.772589 0.000000 269 +polici 0 64 2.772589 0.000000 279 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +anoth 0 45 3.135494 0.000000 408 +late 0 40 3.258097 0.000000 439 +tutori 0 39 3.258097 0.000000 437 +microsoft 0 38 3.295837 0.000000 468 +procedur 0 36 3.367296 0.000000 488 +michael 0 35 3.401197 0.000000 514 +administr 0 27 3.637586 0.000000 628 +comp 0 26 3.688879 0.000000 650 +repres 0 26 3.688879 0.000000 656 +consult 0 24 3.761200 0.000000 687 +size 0 23 3.806662 0.000000 713 +alloc 0 20 3.951244 0.000000 821 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +psycholog 0 15 4.248495 0.000000 1054 +borland 0 14 4.317488 0.000000 1067 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +string 0 11 4.553877 0.000000 1340 +cheat 0 10 4.653960 0.000000 1395 +float 0 9 4.753590 0.000000 1504 +debugg 0 9 4.753590 0.000000 1493 +overload 0 5 5.347108 0.000000 2403 +handin 0 5 5.347108 0.000000 2393 +ration 0 5 5.347108 0.000000 2427 +birk 0 4 5.568345 0.000000 2791 +struct 0 4 5.568345 0.000000 2821 +mbirk 0 3 5.857933 0.000000 3501 +intstack 0 2 6.263398 0.000000 4777 +unlimit 0 2 6.263398 0.000000 4778 +classinfo 0 2 6.263398 0.000000 4779 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..9608b869 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +exam 0 86 2.484907 0.000000 169 +second 0 81 2.484907 0.000000 166 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +exampl 0 77 2.564949 0.000000 195 +dynam 0 76 2.564949 0.000000 194 +mondai 0 77 2.564949 0.000000 206 +come 0 78 2.564949 0.000000 202 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +tuesdai 0 73 2.639057 0.000000 219 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +polici 0 64 2.772589 0.000000 279 +complex 0 64 2.772589 0.000000 269 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +instruct 0 53 2.944439 0.000000 332 +week 0 52 2.995732 0.000000 343 +case 0 51 2.995732 0.000000 351 +digit 0 52 2.995732 0.000000 348 +appoint 0 49 3.044522 0.000000 358 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +discuss 0 45 3.135494 0.000000 399 +anoth 0 45 3.135494 0.000000 408 +past 0 42 3.218876 0.000000 428 +late 0 40 3.258097 0.000000 439 +announc 0 40 3.258097 0.000000 441 +tutori 0 39 3.258097 0.000000 437 +microsoft 0 38 3.295837 0.000000 468 +soon 0 36 3.367296 0.000000 494 +procedur 0 36 3.367296 0.000000 488 +michael 0 35 3.401197 0.000000 514 +print 0 34 3.401197 0.000000 503 +administr 0 27 3.637586 0.000000 628 +comp 0 26 3.688879 0.000000 650 +repres 0 26 3.688879 0.000000 656 +consult 0 24 3.761200 0.000000 687 +size 0 23 3.806662 0.000000 713 +output 0 21 3.912023 0.000000 788 +alloc 0 20 3.951244 0.000000 821 +four 0 18 4.060443 0.000000 905 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +rank 0 14 4.317488 0.000000 1063 +borland 0 14 4.317488 0.000000 1067 +outsid 0 12 4.465908 0.000000 1219 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +eight 0 11 4.553877 0.000000 1331 +string 0 11 4.553877 0.000000 1340 +cheat 0 10 4.653960 0.000000 1395 +float 0 9 4.753590 0.000000 1504 +debugg 0 9 4.753590 0.000000 1493 +overload 0 5 5.347108 0.000000 2403 +handin 0 5 5.347108 0.000000 2393 +ration 0 5 5.347108 0.000000 2427 +birk 0 4 5.568345 0.000000 2791 +struct 0 4 5.568345 0.000000 2821 +mbirk 0 3 5.857933 0.000000 3501 +intstack 0 2 6.263398 0.000000 4777 +unlimit 0 2 6.263398 0.000000 4778 +classinfo 0 2 6.263398 0.000000 4779 +dice 0 1 6.957497 0.000000 9067 +hangman 0 1 6.957497 0.000000 9068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..925b7dd2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +start 0 83 2.484907 0.000000 173 +academ 0 82 2.484907 0.000000 178 +solut 0 82 2.484907 0.000000 162 +refer 0 78 2.564949 0.000000 203 +david 0 71 2.639057 0.000000 232 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +materi 0 75 2.639057 0.000000 221 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +share 0 59 2.833213 0.000000 304 +archiv 0 49 3.044522 0.000000 364 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +press 0 42 3.218876 0.000000 419 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +statist 0 35 3.401197 0.000000 521 +often 0 31 3.496508 0.000000 551 +rule 0 26 3.688879 0.000000 638 +daili 0 24 3.761200 0.000000 706 +consult 0 24 3.761200 0.000000 687 +walter 0 17 4.110874 0.000000 950 +alreadi 0 16 4.174387 0.000000 963 +misconduct 0 16 4.174387 0.000000 1003 +psycholog 0 15 4.248495 0.000000 1054 +dave 0 14 4.317488 0.000000 1098 +floor 0 14 4.317488 0.000000 1070 +essenti 0 13 4.382027 0.000000 1137 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +andth 0 9 4.753590 0.000000 1481 +tutor 0 9 4.753590 0.000000 1552 +reload 0 8 4.875197 0.000000 1682 +attach 0 7 5.010635 0.000000 1785 +rough 0 6 5.164786 0.000000 2107 +button 0 5 5.347108 0.000000 2337 +noland 0 5 5.347108 0.000000 2420 +thumb 0 4 5.568345 0.000000 2816 +melski 0 2 6.263398 0.000000 4780 +melskithes 0 1 6.957497 0.000000 9069 +uncomfort 0 1 6.957497 0.000000 9070 +wesleypublish 0 1 6.957497 0.000000 9071 +usingborland 0 1 6.957497 0.000000 9072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..c84304ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +introduct 0 126 2.079442 0.000000 87 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +version 0 113 2.197225 0.000000 122 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +octob 0 89 2.397895 0.000000 156 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +grade 0 90 2.397895 0.000000 142 +novemb 0 81 2.484907 0.000000 179 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +good 0 77 2.564949 0.000000 200 +html 0 75 2.639057 0.000000 235 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +main 0 67 2.708050 0.000000 256 +wednesdai 0 64 2.772589 0.000000 261 +septemb 0 65 2.772589 0.000000 274 +guid 0 63 2.772589 0.000000 267 +function 0 62 2.772589 0.000000 275 +creat 0 63 2.772589 0.000000 277 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +publish 0 57 2.890372 0.000000 326 +case 0 51 2.995732 0.000000 351 +standard 0 48 3.044522 0.000000 365 +appoint 0 49 3.044522 0.000000 358 +frequent 0 49 3.044522 0.000000 367 +fridai 0 44 3.135494 0.000000 390 +algebra 0 45 3.135494 0.000000 394 +textbook 0 44 3.135494 0.000000 397 +compani 0 41 3.218876 0.000000 423 +form 0 39 3.258097 0.000000 443 +respons 0 37 3.332205 0.000000 476 +survei 0 35 3.401197 0.000000 513 +eduoffic 0 33 3.433987 0.000000 531 +ad 0 32 3.465736 0.000000 544 +given 0 32 3.465736 0.000000 538 +often 0 31 3.496508 0.000000 551 +titl 0 31 3.496508 0.000000 556 +specifi 0 30 3.555348 0.000000 568 +turn 0 29 3.583519 0.000000 586 +releas 0 28 3.610918 0.000000 616 +todai 0 25 3.737670 0.000000 672 +valu 0 25 3.737670 0.000000 665 +consult 0 24 3.761200 0.000000 687 +known 0 24 3.761200 0.000000 702 +input 0 23 3.806662 0.000000 727 +martin 0 21 3.912023 0.000000 794 +latest 0 21 3.912023 0.000000 785 +sure 0 20 3.951244 0.000000 813 +coupl 0 17 4.110874 0.000000 939 +walter 0 17 4.110874 0.000000 950 +quiz 0 16 4.174387 0.000000 990 +sheet 0 16 4.174387 0.000000 973 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +score 0 15 4.248495 0.000000 1017 +style 0 15 4.248495 0.000000 1036 +introduc 0 13 4.382027 0.000000 1139 +onth 0 12 4.465908 0.000000 1218 +vectra 0 12 4.465908 0.000000 1267 +errata 0 10 4.653960 0.000000 1403 +minimum 0 9 4.753590 0.000000 1555 +remind 0 7 5.010635 0.000000 1799 +ethic 0 7 5.010635 0.000000 1786 +savitchaddison 0 5 5.347108 0.000000 2396 +struct 0 4 5.568345 0.000000 2821 +maximum 0 4 5.568345 0.000000 2632 +toth 0 4 5.568345 0.000000 2595 +beginn 0 3 5.857933 0.000000 3330 +milo 0 2 6.263398 0.000000 4781 +viru 0 2 6.263398 0.000000 4782 +bankaccount 0 1 6.957497 0.000000 9073 +withprompt 0 1 6.957497 0.000000 9074 +psychologyinstructor 0 1 6.957497 0.000000 9075 +announcementsthi 0 1 6.957497 0.000000 9076 +scheduledfor 0 1 6.957497 0.000000 9077 +haseveryth 0 1 6.957497 0.000000 9078 +isaccur 0 1 6.957497 0.000000 9079 +withinform 0 1 6.957497 0.000000 9080 +linksar 0 1 6.957497 0.000000 9081 +onfridai 0 1 6.957497 0.000000 9082 +policyclass 0 1 6.957497 0.000000 9083 +minmax 0 1 6.957497 0.000000 9084 +findth 0 1 6.957497 0.000000 9085 +enteredfrom 0 1 6.957497 0.000000 9086 +stdin 0 1 6.957497 0.000000 9087 +formlett 0 1 6.957497 0.000000 9088 +theopen_fil 0 1 6.957497 0.000000 9089 +hoax 0 1 6.957497 0.000000 9090 +andprofession 0 1 6.957497 0.000000 9091 +conductassign 0 1 6.957497 0.000000 9092 +questionar 0 1 6.957497 0.000000 9093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..b0dc32f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +contain 0 81 2.484907 0.000000 174 +april 0 77 2.564949 0.000000 196 +messag 0 76 2.564949 0.000000 212 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +januari 0 62 2.772589 0.000000 264 +polici 0 64 2.772589 0.000000 279 +march 0 61 2.833213 0.000000 295 +share 0 59 2.833213 0.000000 304 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +februari 0 54 2.944439 0.000000 328 +talk 0 53 2.944439 0.000000 336 +run 0 51 2.995732 0.000000 347 +appoint 0 49 3.044522 0.000000 358 +algebra 0 45 3.135494 0.000000 394 +made 0 44 3.135494 0.000000 398 +late 0 40 3.258097 0.000000 439 +statist 0 35 3.401197 0.000000 521 +rule 0 26 3.688879 0.000000 638 +sent 0 22 3.850148 0.000000 763 +tent 0 22 3.850148 0.000000 739 +martin 0 21 3.912023 0.000000 794 +walter 0 17 4.110874 0.000000 950 +former 0 17 4.110874 0.000000 956 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +extra 0 11 4.553877 0.000000 1312 +criteria 0 9 4.753590 0.000000 1477 +absolut 0 8 4.875197 0.000000 1646 +calendar 0 8 4.875197 0.000000 1649 +noland 0 5 5.347108 0.000000 2420 +informationc 0 5 5.347108 0.000000 2394 +thumb 0 4 5.568345 0.000000 2816 +textbookproblem 0 3 5.857933 0.000000 3483 +ream 0 2 6.263398 0.000000 4783 +mream 0 2 6.263398 0.000000 4784 +csst 0 2 6.263398 0.000000 4764 +classc 0 1 6.957497 0.000000 9094 +programmingspr 0 1 6.957497 0.000000 9095 +nothingeverydai 0 1 6.957497 0.000000 9096 +pagescommon 0 1 6.957497 0.000000 9097 +programmingmistakesarch 0 1 6.957497 0.000000 9098 +placeto 0 1 6.957497 0.000000 9099 +announcedcours 0 1 6.957497 0.000000 9100 +andborland 0 1 6.957497 0.000000 9101 +academicmisconduct 0 1 6.957497 0.000000 9102 +anyform 0 1 6.957497 0.000000 9103 +bigtodd 0 1 6.957497 0.000000 9104 +thielwendi 0 1 6.957497 0.000000 9105 +staatsabout 0 1 6.957497 0.000000 9106 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..c2578504 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +novemb 0 81 2.484907 0.000000 179 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +tuesdai 0 73 2.639057 0.000000 219 +line 0 75 2.639057 0.000000 231 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +function 0 62 2.772589 0.000000 275 +publish 0 57 2.890372 0.000000 326 +sampl 0 53 2.944439 0.000000 339 +week 0 52 2.995732 0.000000 343 +appoint 0 49 3.044522 0.000000 358 +understand 0 47 3.091042 0.000000 384 +algebra 0 45 3.135494 0.000000 394 +even 0 45 3.135494 0.000000 393 +compani 0 41 3.218876 0.000000 423 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +hand 0 37 3.332205 0.000000 475 +eduoffic 0 33 3.433987 0.000000 531 +extend 0 32 3.465736 0.000000 539 +taken 0 31 3.496508 0.000000 555 +comp 0 26 3.688879 0.000000 650 +mike 0 24 3.761200 0.000000 703 +greg 0 24 3.761200 0.000000 695 +known 0 24 3.761200 0.000000 702 +miss 0 19 4.007333 0.000000 866 +stat 0 17 4.110874 0.000000 924 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +near 0 14 4.317488 0.000000 1091 +borland 0 14 4.317488 0.000000 1067 +everyth 0 13 4.382027 0.000000 1169 +rememb 0 12 4.465908 0.000000 1217 +vectra 0 12 4.465908 0.000000 1267 +fill 0 11 4.553877 0.000000 1349 +deadlin 0 9 4.753590 0.000000 1502 +didn 0 9 4.753590 0.000000 1563 +informationemail 0 9 4.753590 0.000000 1564 +bottom 0 7 5.010635 0.000000 1906 +ifyou 0 6 5.164786 0.000000 1992 +sharp 0 6 5.164786 0.000000 2100 +clarif 0 5 5.347108 0.000000 2253 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +steel 0 4 5.568345 0.000000 2818 +programmingassign 0 3 5.857933 0.000000 3398 +homeclass 0 3 5.857933 0.000000 3411 +nolandinstructor 0 2 6.263398 0.000000 4785 +msteel 0 1 6.957497 0.000000 9107 +steeleemail 0 1 6.957497 0.000000 9108 +buildingoffic 0 1 6.957497 0.000000 9109 +soffic 0 1 6.957497 0.000000 9110 +announcementsi 0 1 6.957497 0.000000 9111 +thenew 0 1 6.957497 0.000000 9112 +informationmidterm 0 1 6.957497 0.000000 9113 +pastfew 0 1 6.957497 0.000000 9114 +gloss 0 1 6.957497 0.000000 9115 +makefulli 0 1 6.957497 0.000000 9116 +objectivesabout 0 1 6.957497 0.000000 9117 +consultantscours 0 1 6.957497 0.000000 9118 +assignmentsnot 0 1 6.957497 0.000000 9119 +handoutsprogram 0 1 6.957497 0.000000 9120 +assignmentsexam 0 1 6.957497 0.000000 9121 +quizzessom 0 1 6.957497 0.000000 9122 +examplespolici 0 1 6.957497 0.000000 9123 +policyus 0 1 6.957497 0.000000 9124 +pagesintroduct 0 1 6.957497 0.000000 9125 +windowsintroduct 0 1 6.957497 0.000000 9126 +styleguid 0 1 6.957497 0.000000 9127 +codetextproblem 0 1 6.957497 0.000000 9128 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..11ed4dd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +topic 0 114 2.197225 0.000000 110 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +text 0 98 2.302585 0.000000 133 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +info 0 85 2.484907 0.000000 176 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +second 0 81 2.484907 0.000000 166 +good 0 77 2.564949 0.000000 200 +decemb 0 80 2.564949 0.000000 215 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +test 0 66 2.708050 0.000000 252 +knowledg 0 67 2.708050 0.000000 243 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +polici 0 64 2.772589 0.000000 279 +copi 0 63 2.772589 0.000000 284 +result 0 65 2.772589 0.000000 281 +import 0 65 2.772589 0.000000 282 +guid 0 63 2.772589 0.000000 267 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +think 0 57 2.890372 0.000000 314 +sampl 0 53 2.944439 0.000000 339 +archiv 0 49 3.044522 0.000000 364 +give 0 50 3.044522 0.000000 359 +done 0 47 3.091042 0.000000 381 +made 0 44 3.135494 0.000000 398 +anoth 0 45 3.135494 0.000000 408 +mark 0 44 3.135494 0.000000 403 +answer 0 45 3.135494 0.000000 391 +error 0 40 3.258097 0.000000 449 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +feel 0 37 3.332205 0.000000 483 +statist 0 35 3.401197 0.000000 521 +eduoffic 0 33 3.433987 0.000000 531 +ad 0 32 3.465736 0.000000 544 +idea 0 32 3.465736 0.000000 545 +rang 0 30 3.555348 0.000000 565 +administr 0 27 3.637586 0.000000 628 +session 0 26 3.688879 0.000000 643 +todai 0 25 3.737670 0.000000 672 +notic 0 25 3.737670 0.000000 675 +consult 0 24 3.761200 0.000000 687 +tent 0 22 3.850148 0.000000 739 +sure 0 20 3.951244 0.000000 813 +andrew 0 19 4.007333 0.000000 849 +five 0 19 4.007333 0.000000 841 +seem 0 18 4.060443 0.000000 899 +modif 0 17 4.110874 0.000000 913 +walter 0 17 4.110874 0.000000 950 +quiz 0 16 4.174387 0.000000 990 +alreadi 0 16 4.174387 0.000000 963 +score 0 15 4.248495 0.000000 1017 +style 0 15 4.248495 0.000000 1036 +borland 0 14 4.317488 0.000000 1067 +everyon 0 13 4.382027 0.000000 1148 +verifi 0 12 4.465908 0.000000 1261 +minor 0 12 4.465908 0.000000 1237 +rememb 0 12 4.465908 0.000000 1217 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +extra 0 11 4.553877 0.000000 1312 +calendar 0 8 4.875197 0.000000 1649 +carefulli 0 6 5.164786 0.000000 2045 +ahead 0 5 5.347108 0.000000 2338 +crazi 0 4 5.568345 0.000000 2822 +webpag 0 4 5.568345 0.000000 2660 +prock 0 2 6.263398 0.000000 4786 +infoc 0 2 6.263398 0.000000 4772 +sessionalgebra 0 1 6.957497 0.000000 9129 +prockemail 0 1 6.957497 0.000000 9130 +thgrader 0 1 6.957497 0.000000 9131 +haihong 0 1 6.957497 0.000000 9132 +wangemail 0 1 6.957497 0.000000 9133 +mtwrannounc 0 1 6.957497 0.000000 9134 +gotton 0 1 6.957497 0.000000 9135 +perus 0 1 6.957497 0.000000 9136 +assing 0 1 6.957497 0.000000 9137 +misconductcours 0 1 6.957497 0.000000 9138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..4e1d4c71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +need 0 98 2.302585 0.000000 135 +grade 0 90 2.397895 0.000000 142 +search 0 95 2.397895 0.000000 155 +info 1 85 2.484907 2.484907 176 +wide 0 84 2.484907 0.000000 185 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +internet 0 83 2.484907 0.000000 186 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +addit 0 74 2.639057 0.000000 228 +function 0 62 2.772589 0.000000 275 +copi 0 63 2.772589 0.000000 284 +virtual 0 62 2.772589 0.000000 285 +visit 0 63 2.772589 0.000000 288 +type 0 61 2.833213 0.000000 296 +best 0 59 2.833213 0.000000 299 +semest 0 58 2.890372 0.000000 312 +index 0 56 2.890372 0.000000 309 +space 0 57 2.890372 0.000000 310 +maintain 0 51 2.995732 0.000000 342 +archiv 0 49 3.044522 0.000000 364 +without 0 50 3.044522 0.000000 370 +might 0 41 3.218876 0.000000 426 +everi 0 34 3.401197 0.000000 519 +word 0 34 3.401197 0.000000 508 +post 0 35 3.401197 0.000000 505 +quot 0 29 3.583519 0.000000 582 +usual 0 28 3.610918 0.000000 608 +compress 0 23 3.806662 0.000000 719 +sent 0 22 3.850148 0.000000 763 +disk 0 22 3.850148 0.000000 747 +try 0 22 3.850148 0.000000 764 +command 0 14 4.317488 0.000000 1083 +keyword 0 11 4.553877 0.000000 1356 +enter 0 10 4.653960 0.000000 1454 +paragraph 0 10 4.653960 0.000000 1449 +tourist 0 8 4.875197 0.000000 1710 +usenet 0 7 5.010635 0.000000 1839 +nine 0 6 5.164786 0.000000 2047 +ignor 0 5 5.347108 0.000000 2288 +clickher 0 5 5.347108 0.000000 2428 +kelli 0 4 5.568345 0.000000 2793 +backup 0 4 5.568345 0.000000 2645 +oracl 0 4 5.568345 0.000000 2823 +ratliff 0 3 5.857933 0.000000 3419 +badger 0 3 5.857933 0.000000 3502 +sharewar 0 3 5.857933 0.000000 3503 +freewar 0 3 5.857933 0.000000 3504 +shuttl 0 2 6.263398 0.000000 4787 +clickabl 0 2 6.263398 0.000000 4788 +herald 0 2 6.263398 0.000000 4789 +biggest 0 2 6.263398 0.000000 4790 +desautel 0 2 6.263398 0.000000 4791 +simtel 0 1 6.957497 0.000000 9139 +wildcard 0 1 6.957497 0.000000 9140 +filesviru 0 1 6.957497 0.000000 9141 +faqfun 0 1 6.957497 0.000000 9142 +mapth 0 1 6.957497 0.000000 9143 +comicshumor 0 1 6.957497 0.000000 9144 +abort 0 1 6.957497 0.000000 9145 +retri 0 1 6.957497 0.000000 9146 +usersfin 0 1 6.957497 0.000000 9147 +weeklab 0 1 6.957497 0.000000 9148 +jokesget 0 1 6.957497 0.000000 9149 +tryingsom 0 1 6.957497 0.000000 9150 +somecompress 0 1 6.957497 0.000000 9151 +unpack 0 1 6.957497 0.000000 9152 +reviewsom 0 1 6.957497 0.000000 9153 +minclud 0 1 6.957497 0.000000 9154 +infocompress 0 1 6.957497 0.000000 9155 +infofavorit 0 1 6.957497 0.000000 9156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..66dd50ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +like 0 132 1.945910 0.000000 81 +lectur 0 135 1.945910 0.000000 73 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +novemb 0 81 2.484907 0.000000 179 +build 0 85 2.484907 0.000000 184 +academ 0 82 2.484907 0.000000 178 +come 0 78 2.564949 0.000000 202 +mondai 0 77 2.564949 0.000000 206 +solv 0 73 2.639057 0.000000 234 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +room 0 59 2.833213 0.000000 301 +semest 0 58 2.890372 0.000000 312 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +compani 0 41 3.218876 0.000000 423 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +game 0 36 3.367296 0.000000 498 +statist 0 35 3.401197 0.000000 521 +eduoffic 0 33 3.433987 0.000000 531 +except 0 28 3.610918 0.000000 607 +although 0 25 3.737670 0.000000 667 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +quizz 0 13 4.382027 0.000000 1151 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +sundai 0 10 4.653960 0.000000 1387 +russel 0 9 4.753590 0.000000 1507 +saturdai 0 7 5.010635 0.000000 1794 +footbal 0 7 5.010635 0.000000 1912 +prioriti 0 7 5.010635 0.000000 1792 +none 0 7 5.010635 0.000000 1811 +basement 0 4 5.568345 0.000000 2663 +man 0 3 5.857933 0.000000 3417 +csc 0 3 5.857933 0.000000 3183 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +russ 0 1 6.957497 0.000000 9157 +manningemail 0 1 6.957497 0.000000 9158 +rman 0 1 6.957497 0.000000 9159 +scienceoffic 0 1 6.957497 0.000000 9160 +rotc 0 1 6.957497 0.000000 9161 +textold 0 1 6.957497 0.000000 9162 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..4092bd22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +techniqu 0 99 2.302585 0.000000 138 +search 0 95 2.397895 0.000000 155 +section 0 94 2.397895 0.000000 149 +learn 0 86 2.484907 0.000000 170 +academ 0 82 2.484907 0.000000 178 +exam 0 86 2.484907 0.000000 169 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +intellig 0 72 2.639057 0.000000 225 +solv 0 73 2.639057 0.000000 234 +logic 0 71 2.639057 0.000000 230 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +best 0 59 2.833213 0.000000 299 +plai 0 60 2.833213 0.000000 307 +content 0 59 2.833213 0.000000 302 +semest 0 58 2.890372 0.000000 312 +variou 0 56 2.890372 0.000000 317 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +principl 0 48 3.044522 0.000000 357 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +offer 0 43 3.178054 0.000000 414 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +seminar 0 38 3.295837 0.000000 470 +robot 0 36 3.367296 0.000000 497 +game 0 36 3.367296 0.000000 498 +represent 0 35 3.401197 0.000000 512 +semant 0 29 3.583519 0.000000 587 +rule 0 26 3.688879 0.000000 638 +frame 0 24 3.761200 0.000000 684 +motion 0 24 3.761200 0.000000 699 +expert 0 20 3.951244 0.000000 833 +mostli 0 19 4.007333 0.000000 869 +lisp 0 18 4.060443 0.000000 897 +biologi 0 15 4.248495 0.000000 1049 +chuck 0 14 4.317488 0.000000 1108 +prolog 0 13 4.382027 0.000000 1155 +deduct 0 12 4.465908 0.000000 1236 +readabl 0 12 4.465908 0.000000 1258 +alpha 0 11 4.553877 0.000000 1348 +wendt 0 10 4.653960 0.000000 1446 +dyer 0 9 4.753590 0.000000 1573 +qualifi 0 8 4.875197 0.000000 1721 +predic 0 7 5.010635 0.000000 1806 +jude 0 6 5.164786 0.000000 2123 +beta 0 6 5.164786 0.000000 1993 +extern 0 6 5.164786 0.000000 2105 +shavlik 0 5 5.347108 0.000000 2429 +connectionist 0 5 5.347108 0.000000 2430 +bryan 0 5 5.347108 0.000000 2421 +sabbat 0 4 5.568345 0.000000 2824 +kunen 0 3 5.857933 0.000000 3500 +thefal 0 1 6.957497 0.000000 9163 +assumedprerequisit 0 1 6.957497 0.000000 9164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..d4b22052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,217 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +assign 1 135 1.945910 1.945910 66 +relat 0 139 1.945910 0.000000 68 +lectur 0 135 1.945910 0.000000 73 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +databas 0 122 2.079442 0.000000 86 +theori 0 111 2.197225 0.000000 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +question 0 91 2.397895 0.000000 141 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +resourc 0 81 2.484907 0.000000 172 +institut 0 84 2.484907 0.000000 187 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +start 0 83 2.484907 0.000000 173 +exam 0 86 2.484907 0.000000 169 +stuff 0 87 2.484907 0.000000 171 +ieee 0 86 2.484907 0.000000 190 +homework 0 79 2.564949 0.000000 193 +april 0 77 2.564949 0.000000 196 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +refer 0 78 2.564949 0.000000 203 +free 0 73 2.639057 0.000000 224 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +write 0 72 2.639057 0.000000 222 +knowledg 0 67 2.708050 0.000000 243 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +polici 0 64 2.772589 0.000000 279 +artifici 0 63 2.772589 0.000000 280 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +march 0 61 2.833213 0.000000 295 +best 0 59 2.833213 0.000000 299 +content 0 59 2.833213 0.000000 302 +space 0 57 2.890372 0.000000 310 +semest 0 58 2.890372 0.000000 312 +overview 0 56 2.890372 0.000000 323 +sever 0 56 2.890372 0.000000 322 +februari 0 54 2.944439 0.000000 328 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +cover 0 55 2.944439 0.000000 329 +week 0 52 2.995732 0.000000 343 +tabl 0 51 2.995732 0.000000 346 +archiv 0 49 3.044522 0.000000 364 +frequent 0 49 3.044522 0.000000 367 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +adapt 0 46 3.091042 0.000000 387 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +edit 0 42 3.218876 0.000000 418 +author 0 39 3.258097 0.000000 450 +late 0 40 3.258097 0.000000 439 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +connect 0 37 3.332205 0.000000 485 +workstat 0 37 3.332205 0.000000 479 +tree 0 36 3.367296 0.000000 492 +bibliographi 0 34 3.401197 0.000000 518 +print 0 34 3.401197 0.000000 503 +next 0 34 3.401197 0.000000 517 +manual 0 35 3.401197 0.000000 504 +articl 0 33 3.433987 0.000000 530 +chapter 0 32 3.465736 0.000000 536 +concept 0 32 3.465736 0.000000 537 +idea 0 32 3.465736 0.000000 545 +neural 0 30 3.555348 0.000000 578 +common 0 30 3.555348 0.000000 574 +ask 0 28 3.610918 0.000000 597 +measur 0 28 3.610918 0.000000 609 +progress 0 28 3.610918 0.000000 598 +compar 0 26 3.688879 0.000000 648 +experiment 0 26 3.688879 0.000000 645 +comp 0 26 3.688879 0.000000 650 +lab 0 24 3.761200 0.000000 698 +decis 0 23 3.806662 0.000000 728 +lead 0 23 3.806662 0.000000 718 +methodolog 0 23 3.806662 0.000000 733 +instead 0 22 3.850148 0.000000 756 +dai 0 22 3.850148 0.000000 753 +sure 0 20 3.951244 0.000000 813 +department 0 20 3.951244 0.000000 839 +feedback 0 19 4.007333 0.000000 854 +definit 0 19 4.007333 0.000000 864 +five 0 19 4.007333 0.000000 841 +mostli 0 19 4.007333 0.000000 869 +benchmark 0 19 4.007333 0.000000 859 +lisp 0 18 4.060443 0.000000 897 +accept 0 18 4.060443 0.000000 879 +behavior 0 18 4.060443 0.000000 881 +stat 0 17 4.110874 0.000000 924 +analyz 0 17 4.110874 0.000000 925 +previous 0 17 4.110874 0.000000 923 +sheet 0 16 4.174387 0.000000 973 +explan 0 16 4.174387 0.000000 985 +biologi 0 15 4.248495 0.000000 1049 +train 0 14 4.317488 0.000000 1066 +emac 0 13 4.382027 0.000000 1143 +readabl 0 12 4.465908 0.000000 1258 +refin 0 11 4.553877 0.000000 1363 +summar 0 11 4.553877 0.000000 1295 +council 0 11 4.553877 0.000000 1364 +genet 0 10 4.653960 0.000000 1409 +sentenc 0 10 4.653960 0.000000 1413 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +moonei 0 9 4.753590 0.000000 1520 +debugg 0 9 4.753590 0.000000 1493 +empir 0 8 4.875197 0.000000 1722 +irvin 0 8 4.875197 0.000000 1660 +printer 0 8 4.875197 0.000000 1621 +noon 0 7 5.010635 0.000000 1804 +tip 0 7 5.010635 0.000000 1863 +analyt 0 7 5.010635 0.000000 1913 +migrat 0 7 5.010635 0.000000 1851 +dataset 0 7 5.010635 0.000000 1914 +discoveri 0 7 5.010635 0.000000 1915 +jude 0 6 5.164786 0.000000 2123 +geoff 0 6 5.164786 0.000000 2124 +highwai 0 6 5.164786 0.000000 2095 +heurist 0 6 5.164786 0.000000 2125 +extern 0 6 5.164786 0.000000 2105 +shavlik 0 5 5.347108 0.000000 2429 +reinforc 0 4 5.568345 0.000000 2674 +basement 0 4 5.568345 0.000000 2663 +exhaust 0 4 5.568345 0.000000 2825 +novic 0 4 5.568345 0.000000 2815 +steel 0 4 5.568345 0.000000 2818 +weinberg 0 3 5.857933 0.000000 3443 +geoffrei 0 3 5.857933 0.000000 3505 +soar 0 3 5.857933 0.000000 3506 +backpropag 0 3 5.857933 0.000000 3507 +weekend 0 3 5.857933 0.000000 3357 +canadian 0 3 5.857933 0.000000 3508 +mitchel 0 2 6.263398 0.000000 4792 +towel 0 2 6.263398 0.000000 4793 +fisher 0 2 6.263398 0.000000 4794 +induc 0 2 6.263398 0.000000 4795 +akcl 0 2 6.263398 0.000000 4796 +quinlan 0 2 6.263398 0.000000 4797 +unsupervis 0 2 6.263398 0.000000 4233 +cogsci 0 2 6.263398 0.000000 4798 +tractabl 0 2 6.263398 0.000000 4799 +rumelhart 0 1 6.957497 0.000000 9165 +backprop 0 1 6.957497 0.000000 9166 +cobweb 0 1 6.957497 0.000000 9167 +austrian 0 1 6.957497 0.000000 9168 +chunk 0 1 6.957497 0.000000 9169 +laird 0 1 6.957497 0.000000 9170 +rosenbloom 0 1 6.957497 0.000000 9171 +newel 0 1 6.957497 0.000000 9172 +dietterich 0 1 6.957497 0.000000 9173 +zipser 0 1 6.957497 0.000000 9174 +lenat 0 1 6.957497 0.000000 9175 +kibler 0 1 6.957497 0.000000 9176 +kbann 0 1 6.957497 0.000000 9177 +brr 0 1 6.957497 0.000000 9178 +ineedagoodicon 0 1 6.957497 0.000000 9179 +nip 0 1 6.957497 0.000000 9180 +premier 0 1 6.957497 0.000000 9181 +shavlikshavlik 0 1 6.957497 0.000000 9182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..e1818d67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,248 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +manag 0 114 2.197225 0.000000 125 +place 0 106 2.197225 0.000000 124 +version 0 113 2.197225 0.000000 122 +structur 0 106 2.197225 0.000000 105 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +memori 0 101 2.302585 0.000000 139 +text 0 98 2.302585 0.000000 133 +advanc 0 99 2.302585 0.000000 130 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +exam 0 86 2.484907 0.000000 169 +activ 0 84 2.484907 0.000000 182 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +requir 0 81 2.484907 0.000000 167 +mondai 0 77 2.564949 0.000000 206 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +summari 0 73 2.639057 0.000000 237 +write 0 72 2.639057 0.000000 222 +tuesdai 0 73 2.639057 0.000000 219 +involv 0 71 2.639057 0.000000 227 +meet 0 72 2.639057 0.000000 229 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +new 0 64 2.772589 0.000000 262 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +room 0 59 2.833213 0.000000 301 +content 0 59 2.833213 0.000000 302 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +space 0 57 2.890372 0.000000 310 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +suggest 0 53 2.944439 0.000000 331 +sampl 0 53 2.944439 0.000000 339 +cover 0 55 2.944439 0.000000 329 +week 0 52 2.995732 0.000000 343 +much 0 52 2.995732 0.000000 349 +case 0 51 2.995732 0.000000 351 +give 0 50 3.044522 0.000000 359 +standard 0 48 3.044522 0.000000 365 +done 0 47 3.091042 0.000000 381 +midterm 0 45 3.135494 0.000000 392 +discuss 0 45 3.135494 0.000000 399 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +textbook 0 44 3.135494 0.000000 397 +term 0 43 3.178054 0.000000 411 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +review 0 42 3.218876 0.000000 425 +must 0 40 3.258097 0.000000 442 +realli 0 40 3.258097 0.000000 444 +expect 0 37 3.332205 0.000000 484 +statist 0 35 3.401197 0.000000 521 +survei 0 35 3.401197 0.000000 513 +approxim 0 35 3.401197 0.000000 509 +jame 0 35 3.401197 0.000000 507 +michael 0 35 3.401197 0.000000 514 +articl 0 33 3.433987 0.000000 530 +richard 0 31 3.496508 0.000000 559 +secur 0 30 3.555348 0.000000 577 +compon 0 30 3.555348 0.000000 570 +particip 0 29 3.583519 0.000000 589 +synchron 0 29 3.583519 0.000000 588 +intend 0 28 3.610918 0.000000 599 +measur 0 28 3.610918 0.000000 609 +experiment 0 26 3.688879 0.000000 645 +relev 0 26 3.688879 0.000000 637 +daili 0 24 3.761200 0.000000 706 +tent 0 22 3.850148 0.000000 739 +wang 0 21 3.912023 0.000000 790 +chen 0 21 3.912023 0.000000 791 +watch 0 21 3.912023 0.000000 789 +latest 0 21 3.912023 0.000000 785 +similar 0 21 3.912023 0.000000 771 +basi 0 20 3.951244 0.000000 828 +qualiti 0 20 3.951244 0.000000 832 +assum 0 19 4.007333 0.000000 845 +andrew 0 19 4.007333 0.000000 849 +eric 0 19 4.007333 0.000000 870 +encourag 0 18 4.060443 0.000000 880 +figur 0 18 4.060443 0.000000 903 +listen 0 18 4.060443 0.000000 907 +protect 0 17 4.110874 0.000000 935 +normal 0 16 4.174387 0.000000 995 +zhang 0 16 4.174387 0.000000 980 +purchas 0 15 4.248495 0.000000 1030 +todd 0 15 4.248495 0.000000 1051 +classic 0 14 4.317488 0.000000 1084 +doit 0 14 4.317488 0.000000 1111 +individu 0 13 4.382027 0.000000 1126 +readabl 0 12 4.465908 0.000000 1258 +verifi 0 12 4.465908 0.000000 1261 +literatur 0 11 4.553877 0.000000 1300 +broad 0 11 4.553877 0.000000 1302 +worth 0 11 4.553877 0.000000 1294 +summar 0 11 4.553877 0.000000 1295 +strongli 0 10 4.653960 0.000000 1406 +operatingsystem 0 10 4.653960 0.000000 1401 +total 0 10 4.653960 0.000000 1398 +exact 0 9 4.753590 0.000000 1509 +informationabout 0 9 4.753590 0.000000 1515 +equival 0 9 4.753590 0.000000 1496 +herefor 0 9 4.753590 0.000000 1483 +solomon 0 8 4.875197 0.000000 1716 +theme 0 8 4.875197 0.000000 1707 +noon 0 7 5.010635 0.000000 1804 +suffici 0 7 5.010635 0.000000 1897 +larger 0 7 5.010635 0.000000 1875 +smaller 0 7 5.010635 0.000000 1874 +craig 0 7 5.010635 0.000000 1879 +sciencesoffic 0 6 5.164786 0.000000 2101 +onoper 0 6 5.164786 0.000000 2048 +carefulli 0 6 5.164786 0.000000 2045 +approv 0 6 5.164786 0.000000 2078 +prasad 0 6 5.164786 0.000000 2126 +formerli 0 5 5.347108 0.000000 2397 +deshpand 0 5 5.347108 0.000000 2431 +systemsfal 0 4 5.568345 0.000000 2683 +marvin 0 4 5.568345 0.000000 2806 +exposur 0 4 5.568345 0.000000 2598 +ident 0 4 5.568345 0.000000 2826 +will 0 4 5.568345 0.000000 2782 +raman 0 4 5.568345 0.000000 2827 +advancedoper 0 3 5.857933 0.000000 3403 +macc 0 3 5.857933 0.000000 3414 +focal 0 3 5.857933 0.000000 3404 +gradingther 0 3 5.857933 0.000000 3455 +franci 0 3 5.857933 0.000000 3287 +pang 0 3 5.857933 0.000000 3509 +avinash 0 3 5.857933 0.000000 3510 +rajesh 0 3 5.857933 0.000000 3511 +troffic 0 2 6.263398 0.000000 4706 +pmin 0 2 6.263398 0.000000 4492 +avaiabl 0 2 6.263398 0.000000 4703 +multic 0 2 6.263398 0.000000 4304 +interprocess 0 2 6.263398 0.000000 4174 +satisfactori 0 2 6.263398 0.000000 4567 +usea 0 2 6.263398 0.000000 4800 +andconfer 0 2 6.263398 0.000000 4568 +deskfor 0 2 6.263398 0.000000 4584 +youto 0 2 6.263398 0.000000 4093 +willinstead 0 2 6.263398 0.000000 4569 +adiscuss 0 2 6.263398 0.000000 4570 +geta 0 2 6.263398 0.000000 4571 +quietli 0 2 6.263398 0.000000 4572 +thoroughli 0 2 6.263398 0.000000 4801 +salmon 0 2 6.263398 0.000000 4802 +chien 0 2 6.263398 0.000000 4541 +sodani 0 2 6.263398 0.000000 4803 +basnei 0 2 6.263398 0.000000 4804 +biswadeep 0 2 6.263398 0.000000 4805 +taxiao 0 2 6.263398 0.000000 4806 +sridhar 0 2 6.263398 0.000000 4807 +eduthu 0 2 6.263398 0.000000 4721 +inroom 0 1 6.957497 0.000000 9183 +examtogeth 0 1 6.957497 0.000000 9184 +bedetermin 0 1 6.957497 0.000000 9185 +inconsider 0 1 6.957497 0.000000 9186 +sciencestextther 0 1 6.957497 0.000000 9187 +papersa 0 1 6.957497 0.000000 9188 +thoseof 0 1 6.957497 0.000000 9189 +lessout 0 1 6.957497 0.000000 9190 +projecty 0 1 6.957497 0.000000 9191 +implementationsof 0 1 6.957497 0.000000 9192 +unvalid 0 1 6.957497 0.000000 9193 +ashort 0 1 6.957497 0.000000 9194 +presentationabout 0 1 6.957497 0.000000 9195 +presentationsher 0 1 6.957497 0.000000 9196 +presen 0 1 6.957497 0.000000 9197 +manyan 0 1 6.957497 0.000000 9198 +stubb 0 1 6.957497 0.000000 9199 +bigg 0 1 6.957497 0.000000 9200 +gunawan 0 1 6.957497 0.000000 9201 +agu 0 1 6.957497 0.000000 9202 +qingmin 0 1 6.957497 0.000000 9203 +larsen 0 1 6.957497 0.000000 9204 +conroi 0 1 6.957497 0.000000 9205 +fritz 0 1 6.957497 0.000000 9206 +jordan 0 1 6.957497 0.000000 9207 +yanm 0 1 6.957497 0.000000 9208 +xinyu 0 1 6.957497 0.000000 9209 +munson 0 1 6.957497 0.000000 9210 +wenjun 0 1 6.957497 0.000000 9211 +xinyi 0 1 6.957497 0.000000 9212 +yufei 0 1 6.957497 0.000000 9213 +zeyu 0 1 6.957497 0.000000 9214 +gopal 0 1 6.957497 0.000000 9215 +leesolomon 0 1 6.957497 0.000000 9216 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..59a9f4be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +time 0 293 1.098612 0.000000 17 +homepag 1 93 2.397895 2.397895 148 +chiang 0 7 5.010635 0.000000 1853 +gradesgo 0 1 6.957497 0.000000 9217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..04bb6964 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +solut 0 82 2.484907 0.000000 162 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +window 0 68 2.708050 0.000000 242 +copi 0 63 2.772589 0.000000 284 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 0 55 2.944439 0.000000 329 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +pointer 0 48 3.044522 0.000000 368 +even 0 45 3.135494 0.000000 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +howev 0 41 3.218876 0.000000 422 +must 0 40 3.258097 0.000000 442 +probabl 0 40 3.258097 0.000000 455 +microsoft 0 38 3.295837 0.000000 468 +open 0 38 3.295837 0.000000 469 +hand 0 37 3.332205 0.000000 475 +copyright 0 36 3.367296 0.000000 495 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +depend 0 29 3.583519 0.000000 583 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +toward 0 25 3.737670 0.000000 668 +jeff 0 25 3.737670 0.000000 673 +lab 0 24 3.761200 0.000000 698 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +increas 0 20 3.951244 0.000000 829 +exercis 0 19 4.007333 0.000000 842 +left 0 19 4.007333 0.000000 851 +lyco 0 19 4.007333 0.000000 871 +along 0 18 4.060443 0.000000 878 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 1 15 4.248495 4.248495 1027 +configur 0 15 4.248495 0.000000 1012 +purchas 0 15 4.248495 0.000000 1030 +comic 0 14 4.317488 0.000000 1103 +primarili 0 13 4.382027 0.000000 1185 +vectra 0 12 4.465908 0.000000 1267 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 0 9 4.753590 0.000000 1561 +prefer 0 9 4.753590 0.000000 1491 +correctli 0 9 4.753590 0.000000 1478 +wall 0 9 4.753590 0.000000 1553 +hewlett 0 8 4.875197 0.000000 1709 +printer 0 8 4.875197 0.000000 1621 +bestor 0 6 5.164786 0.000000 2099 +lampert 0 5 5.347108 0.000000 2398 +gareth 0 5 5.347108 0.000000 2392 +closest 0 4 5.568345 0.000000 2828 +relief 0 4 5.568345 0.000000 2784 +labyou 0 3 5.857933 0.000000 3406 +aren 0 3 5.857933 0.000000 3512 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +quota 0 2 6.263398 0.000000 4753 +exce 0 1 6.957497 0.000000 9218 +bewar 0 1 6.957497 0.000000 9219 +outsidehallwai 0 1 6.957497 0.000000 9220 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..345e6a48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +document 0 121 2.079442 0.000000 89 +check 0 115 2.197225 0.000000 118 +homepag 1 93 2.397895 2.397895 148 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +chang 0 82 2.484907 0.000000 163 +solv 0 73 2.639057 0.000000 234 +frequent 0 49 3.044522 0.000000 367 +textbook 0 44 3.135494 0.000000 397 +purpos 0 37 3.332205 0.000000 481 +eduoffic 0 33 3.433987 0.000000 531 +often 0 31 3.496508 0.000000 551 +walter 0 17 4.110874 0.000000 950 +todd 0 15 4.248495 0.000000 1051 +assignmentsprogram 0 6 5.164786 0.000000 2019 +assignmentshomework 0 4 5.568345 0.000000 2721 +pertain 0 3 5.857933 0.000000 3208 +homepagewelcom 0 2 6.263398 0.000000 4808 +tmunson 0 2 6.263398 0.000000 4809 +statisticsoffic 0 2 6.263398 0.000000 4810 +homepagec 0 1 6.957497 0.000000 9221 +responsibilityto 0 1 6.957497 0.000000 9222 +informationinstructor 0 1 6.957497 0.000000 9223 +munsonemail 0 1 6.957497 0.000000 9224 +appointmentsect 0 1 6.957497 0.000000 9225 +savitchclass 0 1 6.957497 0.000000 9226 +informationexpectationssyllabusexam 0 1 6.957497 0.000000 9227 +schedule 0 1 6.957497 0.000000 9228 +mailgradingl 0 1 6.957497 0.000000 9229 +assignmentsextra 0 1 6.957497 0.000000 9230 +creditpoliciesconsult 0 1 6.957497 0.000000 9231 +responsibilitiesacadem 0 1 6.957497 0.000000 9232 +misconductoth 0 1 6.957497 0.000000 9233 +informationdaili 0 1 6.957497 0.000000 9234 +classoth 0 1 6.957497 0.000000 9235 +resourcesc 0 1 6.957497 0.000000 9236 +homepagetmunson 0 1 6.957497 0.000000 9237 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..929f9fac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +start 0 83 2.484907 0.000000 173 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +date 0 51 2.995732 0.000000 344 +get 0 46 3.091042 0.000000 380 +netscap 0 44 3.135494 0.000000 395 +consult 0 24 3.761200 0.000000 687 +tent 0 22 3.850148 0.000000 739 +facil 0 20 3.951244 0.000000 814 +whole 0 17 4.110874 0.000000 940 +todd 0 15 4.248495 0.000000 1051 +difficulti 0 13 4.382027 0.000000 1132 +tutor 0 9 4.753590 0.000000 1552 +turnidg 0 4 5.568345 0.000000 2829 +struct 0 4 5.568345 0.000000 2821 +nolandinstructor 0 2 6.263398 0.000000 4785 +muchinform 0 2 6.263398 0.000000 4811 +turnidgeoffic 0 1 6.957497 0.000000 9238 +tbalab 0 1 6.957497 0.000000 9239 +tbaannouncementsclass 0 1 6.957497 0.000000 9240 +classa 0 1 6.957497 0.000000 9241 +byother 0 1 6.957497 0.000000 9242 +gregorysharp 0 1 6.957497 0.000000 9243 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..a6b91947 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +sourc 0 77 2.564949 0.000000 201 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +locat 0 59 2.833213 0.000000 303 +week 0 52 2.995732 0.000000 343 +appoint 0 49 3.044522 0.000000 358 +still 0 50 3.044522 0.000000 362 +algebra 0 45 3.135494 0.000000 394 +announc 0 40 3.258097 0.000000 441 +late 0 40 3.258097 0.000000 439 +statist 0 35 3.401197 0.000000 521 +everi 0 34 3.401197 0.000000 519 +print 0 34 3.401197 0.000000 503 +consult 0 24 3.761200 0.000000 687 +dai 0 22 3.850148 0.000000 753 +walter 0 17 4.110874 0.000000 950 +zhang 0 16 4.174387 0.000000 980 +weslei 0 16 4.174387 0.000000 983 +misconduct 0 16 4.174387 0.000000 1003 +quiz 0 16 4.174387 0.000000 990 +style 0 15 4.248495 0.000000 1036 +vectra 0 12 4.465908 0.000000 1267 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +chri 0 11 4.553877 0.000000 1311 +statement 0 11 4.553877 0.000000 1313 +errata 0 10 4.653960 0.000000 1403 +login 0 9 4.753590 0.000000 1550 +seven 0 9 4.753590 0.000000 1561 +reload 0 8 4.875197 0.000000 1682 +isbn 0 7 5.010635 0.000000 1901 +guidelin 0 7 5.010635 0.000000 1832 +rough 0 6 5.164786 0.000000 2107 +noland 0 5 5.347108 0.000000 2420 +psych 0 3 5.857933 0.000000 3498 +grader 0 3 5.857933 0.000000 3165 +weaver 0 2 6.263398 0.000000 4770 +kei 0 2 6.263398 0.000000 4812 +staf 0 1 6.957497 0.000000 9244 +policyassign 0 1 6.957497 0.000000 9245 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..e12d2e08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +architectur 0 139 1.945910 0.000000 77 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +final 0 116 2.197225 0.000000 108 +instructor 0 108 2.197225 0.000000 107 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +homework 1 79 2.564949 2.564949 193 +summari 0 73 2.639057 0.000000 237 +main 0 67 2.708050 0.000000 256 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +content 0 59 2.833213 0.000000 302 +special 0 56 2.890372 0.000000 320 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +instruct 0 53 2.944439 0.000000 332 +tabl 0 51 2.995732 0.000000 346 +set 0 50 3.044522 0.000000 361 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +cach 0 41 3.218876 0.000000 432 +review 0 42 3.218876 0.000000 425 +cost 0 37 3.332205 0.000000 480 +jame 0 35 3.401197 0.000000 507 +limit 0 29 3.583519 0.000000 585 +consid 0 29 3.583519 0.000000 590 +full 0 28 3.610918 0.000000 615 +multiprocessor 0 28 3.610918 0.000000 605 +arrai 0 27 3.637586 0.000000 627 +trace 0 25 3.737670 0.000000 677 +miscellan 0 23 3.806662 0.000000 731 +disk 0 22 3.850148 0.000000 747 +rout 0 21 3.912023 0.000000 793 +smith 0 20 3.951244 0.000000 820 +reserv 0 20 3.951244 0.000000 808 +thur 0 19 4.007333 0.000000 847 +interconnect 0 17 4.110874 0.000000 937 +vector 0 16 4.174387 0.000000 961 +doit 0 14 4.317488 0.000000 1111 +station 0 13 4.382027 0.000000 1157 +tue 0 11 4.553877 0.000000 1308 +clock 0 11 4.553877 0.000000 1320 +pipelin 0 7 5.010635 0.000000 1830 +biochemistri 0 3 5.857933 0.000000 3513 +vliw 0 3 5.857933 0.000000 3514 +harm 0 3 5.857933 0.000000 3515 +princ 0 2 6.263398 0.000000 4813 +specmark 0 2 6.263398 0.000000 4471 +princeoffic 0 1 6.957497 0.000000 9246 +miscellaneousnew 0 1 6.957497 0.000000 9247 +soln 0 1 6.957497 0.000000 9248 +pmread 0 1 6.957497 0.000000 9249 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..1a3f7ff4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,245 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +part 0 98 2.302585 0.000000 129 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +imag 0 91 2.397895 0.000000 161 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +state 0 76 2.564949 0.000000 207 +upson 0 71 2.639057 0.000000 218 +summari 0 73 2.639057 0.000000 237 +line 0 75 2.639057 0.000000 231 +tuesdai 0 73 2.639057 0.000000 219 +onlin 0 75 2.639057 0.000000 223 +view 0 70 2.708050 0.000000 254 +test 0 66 2.708050 0.000000 252 +main 0 67 2.708050 0.000000 256 +practic 0 70 2.708050 0.000000 246 +simul 0 66 2.708050 0.000000 255 +order 0 69 2.708050 0.000000 249 +complex 0 64 2.772589 0.000000 269 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +colleg 0 61 2.833213 0.000000 300 +space 0 57 2.890372 0.000000 310 +faculti 0 56 2.890372 0.000000 325 +scientif 0 53 2.944439 0.000000 341 +cover 0 55 2.944439 0.000000 329 +visual 0 48 3.044522 0.000000 372 +principl 0 48 3.044522 0.000000 357 +standard 0 48 3.044522 0.000000 365 +california 0 46 3.091042 0.000000 388 +math 0 44 3.135494 0.000000 402 +textbook 0 44 3.135494 0.000000 397 +vision 0 41 3.218876 0.000000 430 +combin 0 42 3.218876 0.000000 421 +examin 0 42 3.218876 0.000000 424 +error 0 40 3.258097 0.000000 449 +map 0 39 3.258097 0.000000 452 +form 0 39 3.258097 0.000000 443 +prototyp 0 38 3.295837 0.000000 463 +field 0 37 3.332205 0.000000 482 +connect 0 37 3.332205 0.000000 485 +mean 0 37 3.332205 0.000000 477 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +product 0 33 3.433987 0.000000 527 +transform 0 32 3.465736 0.000000 542 +human 0 32 3.465736 0.000000 546 +anim 0 31 3.496508 0.000000 557 +focus 0 29 3.583519 0.000000 584 +limit 0 29 3.583519 0.000000 585 +relev 0 26 3.688879 0.000000 637 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +other 0 24 3.761200 0.000000 697 +begin 0 23 3.806662 0.000000 716 +equat 0 23 3.806662 0.000000 724 +color 0 22 3.850148 0.000000 762 +deal 0 22 3.850148 0.000000 736 +properti 0 22 3.850148 0.000000 749 +serv 0 22 3.850148 0.000000 758 +period 0 22 3.850148 0.000000 743 +sent 0 22 3.850148 0.000000 763 +among 0 21 3.912023 0.000000 781 +viewer 0 21 3.912023 0.000000 787 +break 0 20 3.951244 0.000000 812 +prepar 0 20 3.951244 0.000000 824 +geometr 0 19 4.007333 0.000000 852 +miss 0 19 4.007333 0.000000 866 +dimension 0 18 4.060443 0.000000 909 +figur 0 18 4.060443 0.000000 903 +render 0 17 4.110874 0.000000 947 +differenti 0 17 4.110874 0.000000 921 +modif 0 17 4.110874 0.000000 913 +vector 0 16 4.174387 0.000000 961 +normal 0 16 4.174387 0.000000 995 +devic 0 16 4.174387 0.000000 1002 +atth 0 15 4.248495 0.000000 1019 +hierarch 0 15 4.248495 0.000000 1018 +style 0 15 4.248495 0.000000 1036 +camera 0 14 4.317488 0.000000 1115 +hopefulli 0 14 4.317488 0.000000 1071 +scene 0 14 4.317488 0.000000 1114 +alan 0 13 4.382027 0.000000 1146 +composit 0 13 4.382027 0.000000 1150 +coordin 0 13 4.382027 0.000000 1182 +automata 0 13 4.382027 0.000000 1135 +difficulti 0 13 4.382027 0.000000 1132 +opportun 0 13 4.382027 0.000000 1161 +prelim 0 12 4.465908 0.000000 1201 +skill 0 12 4.465908 0.000000 1205 +holidai 0 12 4.465908 0.000000 1224 +bruce 0 12 4.465908 0.000000 1226 +land 0 12 4.465908 0.000000 1273 +huang 0 12 4.465908 0.000000 1202 +evolv 0 12 4.465908 0.000000 1223 +optic 0 12 4.465908 0.000000 1221 +remov 0 12 4.465908 0.000000 1225 +buffer 0 12 4.465908 0.000000 1211 +scan 0 12 4.465908 0.000000 1243 +volum 0 11 4.553877 0.000000 1347 +transpar 0 11 4.553877 0.000000 1325 +statement 0 11 4.553877 0.000000 1313 +forc 0 10 4.653960 0.000000 1384 +perspect 0 10 4.653960 0.000000 1437 +facilit 0 10 4.653960 0.000000 1412 +surfac 1 9 4.753590 4.753590 1574 +light 0 9 4.753590 0.000000 1533 +incomplet 0 9 4.753590 0.000000 1575 +leader 0 9 4.753590 0.000000 1576 +explicit 0 9 4.753590 0.000000 1525 +screen 0 9 4.753590 0.000000 1577 +wall 0 9 4.753590 0.000000 1553 +observ 0 9 4.753590 0.000000 1578 +rhode 0 9 4.753590 0.000000 1579 +polygon 0 8 4.875197 0.000000 1723 +textur 0 8 4.875197 0.000000 1677 +convers 0 8 4.875197 0.000000 1673 +parametr 0 7 5.010635 0.000000 1819 +clip 0 7 5.010635 0.000000 1868 +shade 0 7 5.010635 0.000000 1881 +pagecomput 0 7 5.010635 0.000000 1900 +stereo 0 7 5.010635 0.000000 1818 +channel 0 7 5.010635 0.000000 1836 +justin 0 7 5.010635 0.000000 1789 +davi 0 7 5.010635 0.000000 1888 +artist 0 6 5.164786 0.000000 2127 +onto 0 6 5.164786 0.000000 2089 +hidden 0 6 5.164786 0.000000 1987 +notifi 0 6 5.164786 0.000000 2106 +rotat 0 5 5.347108 0.000000 2295 +rigid 0 5 5.347108 0.000000 2432 +cellular 0 5 5.347108 0.000000 2433 +anti 0 5 5.347108 0.000000 2434 +hyper 0 5 5.347108 0.000000 2435 +particl 0 5 5.347108 0.000000 2436 +oregon 0 5 5.347108 0.000000 2437 +implicit 0 4 5.568345 0.000000 2830 +invers 0 4 5.568345 0.000000 2764 +pixel 0 4 5.568345 0.000000 2831 +contour 0 4 5.568345 0.000000 2812 +administrivia 0 3 5.857933 0.000000 3166 +kinemat 0 3 5.857933 0.000000 3516 +computergraph 0 3 5.857933 0.000000 3517 +wave 0 3 5.857933 0.000000 3518 +shadow 0 3 5.857933 0.000000 3519 +bump 0 3 5.857933 0.000000 3497 +arrow 0 3 5.857933 0.000000 3520 +makeup 0 3 5.857933 0.000000 3449 +jing 0 3 5.857933 0.000000 3521 +mccune 0 3 5.857933 0.000000 3522 +waterloo 0 3 5.857933 0.000000 3523 +cardiff 0 3 5.857933 0.000000 3154 +watt 0 2 6.263398 0.000000 4814 +scalar 0 2 6.263398 0.000000 4815 +religi 0 2 6.263398 0.000000 4816 +folei 0 2 6.263398 0.000000 4817 +bruceland 0 2 6.263398 0.000000 4818 +designedto 0 2 6.263398 0.000000 4712 +illumin 0 2 6.263398 0.000000 4819 +blobbi 0 2 6.263398 0.000000 4820 +homogen 0 2 6.263398 0.000000 4821 +mimic 0 2 6.263398 0.000000 4736 +phong 0 2 6.263398 0.000000 4822 +alias 0 2 6.263398 0.000000 4823 +tomak 0 2 6.263398 0.000000 4675 +belief 0 2 6.263398 0.000000 4553 +inord 0 2 6.263398 0.000000 4824 +absent 0 2 6.263398 0.000000 4825 +deviat 0 2 6.263398 0.000000 4826 +wale 0 2 6.263398 0.000000 4827 +manchest 0 2 6.263398 0.000000 4828 +todoc 0 2 6.263398 0.000000 4829 +quadric 0 1 6.957497 0.000000 9250 +swept 0 1 6.957497 0.000000 9251 +tensor 0 1 6.957497 0.000000 9252 +tessel 0 1 6.957497 0.000000 9253 +gourand 0 1 6.957497 0.000000 9254 +vernier 0 1 6.957497 0.000000 9255 +acuiti 0 1 6.957497 0.000000 9256 +mispercept 0 1 6.957497 0.000000 9257 +advect 0 1 6.957497 0.000000 9258 +multiparamet 0 1 6.957497 0.000000 9259 +educationlaw 0 1 6.957497 0.000000 9260 +mandat 0 1 6.957497 0.000000 9261 +intendingto 0 1 6.957497 0.000000 9262 +requestedto 0 1 6.957497 0.000000 9263 +jmccune 0 1 6.957497 0.000000 9264 +csrelev 0 1 6.957497 0.000000 9265 +universityrel 0 1 6.957497 0.000000 9266 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..f3993b8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +base 0 165 1.791759 0.000000 50 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +note 0 142 1.945910 0.000000 67 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +site 0 106 2.197225 0.000000 119 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +contain 0 81 2.484907 0.000000 174 +level 0 87 2.484907 0.000000 180 +start 0 83 2.484907 0.000000 173 +build 0 85 2.484907 0.000000 184 +refer 0 78 2.564949 0.000000 203 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +main 0 67 2.708050 0.000000 256 +practic 0 70 2.708050 0.000000 246 +order 0 69 2.708050 0.000000 249 +laboratori 0 63 2.772589 0.000000 292 +result 0 65 2.772589 0.000000 281 +virtual 0 62 2.772589 0.000000 285 +scientif 0 53 2.944439 0.000000 341 +visual 1 48 3.044522 3.044522 372 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +mark 0 44 3.135494 0.000000 403 +map 0 39 3.258097 0.000000 452 +procedur 0 36 3.367296 0.000000 488 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +transform 0 32 3.465736 0.000000 542 +anim 0 31 3.496508 0.000000 557 +deal 0 22 3.850148 0.000000 736 +sent 0 22 3.850148 0.000000 763 +facil 0 20 3.951244 0.000000 814 +exercis 1 19 4.007333 4.007333 842 +atth 0 15 4.248495 0.000000 1019 +camera 0 14 4.317488 0.000000 1115 +train 0 14 4.317488 0.000000 1066 +land 0 12 4.465908 0.000000 1273 +statement 0 11 4.553877 0.000000 1313 +perspect 0 10 4.653960 0.000000 1437 +surfac 0 9 4.753590 0.000000 1574 +leader 0 9 4.753590 0.000000 1576 +light 0 9 4.753590 0.000000 1533 +textur 0 8 4.875197 0.000000 1677 +competit 0 8 4.875197 0.000000 1635 +polygon 0 8 4.875197 0.000000 1723 +pagecomput 0 7 5.010635 0.000000 1900 +parametr 0 7 5.010635 0.000000 1819 +chat 0 6 5.164786 0.000000 2128 +restrict 0 6 5.164786 0.000000 2129 +implicit 0 4 5.568345 0.000000 2830 +enrol 0 4 5.568345 0.000000 2613 +computergraph 0 3 5.857933 0.000000 3517 +bump 0 3 5.857933 0.000000 3497 +folei 0 2 6.263398 0.000000 4817 +watt 0 2 6.263398 0.000000 4814 +bruceland 0 2 6.263398 0.000000 4818 +todoc 0 2 6.263398 0.000000 4829 +exercisesthi 0 1 6.957497 0.000000 9267 +universityundergradu 0 1 6.957497 0.000000 9268 +dcomput 0 1 6.957497 0.000000 9269 +sigucc 0 1 6.957497 0.000000 9270 +basededuc 0 1 6.957497 0.000000 9271 +areinclud 0 1 6.957497 0.000000 9272 +aboutc 0 1 6.957497 0.000000 9273 +semesteraccess 0 1 6.957497 0.000000 9274 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..55b39f0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +model 0 145 1.945910 0.000000 69 +lectur 0 135 1.945910 0.000000 73 +document 0 121 2.079442 0.000000 89 +topic 1 114 2.197225 2.197225 110 +code 0 108 2.197225 0.000000 116 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +window 0 68 2.708050 0.000000 242 +content 0 59 2.833213 0.000000 302 +sampl 0 53 2.944439 0.000000 339 +visual 0 48 3.044522 0.000000 372 +video 0 44 3.135494 0.000000 405 +michael 0 35 3.401197 0.000000 514 +human 0 32 3.465736 0.000000 546 +express 0 32 3.465736 0.000000 540 +retriev 0 27 3.637586 0.000000 621 +appropri 0 18 4.060443 0.000000 883 +spatial 0 16 4.174387 0.000000 988 +huang 0 12 4.465908 0.000000 1202 +explicit 0 9 4.753590 0.000000 1525 +sean 0 8 4.875197 0.000000 1705 +justin 0 7 5.010635 0.000000 1789 +deliv 0 6 5.164786 0.000000 2070 +chosen 0 6 5.164786 0.000000 1984 +alex 0 6 5.164786 0.000000 2130 +facial 0 5 5.347108 0.000000 2438 +interior 0 5 5.347108 0.000000 2439 +particl 0 5 5.347108 0.000000 2436 +chose 0 4 5.568345 0.000000 2629 +arun 0 4 5.568345 0.000000 2736 +computergraph 0 3 5.857933 0.000000 3517 +hung 0 3 5.857933 0.000000 3524 +mccune 0 3 5.857933 0.000000 3522 +landscap 0 3 5.857933 0.000000 3525 +landi 0 2 6.263398 0.000000 4830 +tsai 0 2 6.263398 0.000000 4831 +stochast 0 2 6.263398 0.000000 4832 +semestereach 0 1 6.957497 0.000000 9275 +anddocu 0 1 6.957497 0.000000 9276 +metabal 0 1 6.957497 0.000000 9277 +arcuri 0 1 6.957497 0.000000 9278 +benton 0 1 6.957497 0.000000 9279 +interdepend 0 1 6.957497 0.000000 9280 +diffus 0 1 6.957497 0.000000 9281 +pollut 0 1 6.957497 0.000000 9282 +modelsfu 0 1 6.957497 0.000000 9283 +antialias 0 1 6.957497 0.000000 9284 +vermach 0 1 6.957497 0.000000 9285 +hsun 0 1 6.957497 0.000000 9286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..35c27805 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +applic 0 170 1.791759 0.000000 56 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +problem 0 147 1.945910 0.000000 75 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +nation 0 74 2.639057 0.000000 240 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +goal 0 66 2.708050 0.000000 250 +laboratori 0 63 2.772589 0.000000 292 +thesi 1 57 2.890372 2.890372 327 +scientif 1 53 2.944439 2.944439 341 +advisor 0 51 2.995732 0.000000 355 +numer 0 49 3.044522 0.000000 369 +least 1 35 3.401197 3.401197 516 +committe 0 34 3.401197 0.000000 522 +tech 0 35 3.401197 0.000000 515 +steve 0 29 3.583519 0.000000 594 +hous 0 21 3.912023 0.000000 801 +siam 0 21 3.912023 0.000000 800 +matrix 0 17 4.110874 0.000000 933 +squar 1 14 4.317488 4.317488 1082 +weight 0 12 4.465908 0.000000 1204 +statement 0 11 4.553877 0.000000 1313 +decomposit 0 10 4.653960 0.000000 1439 +rhode 0 9 4.753590 0.000000 1579 +juan 0 9 4.753590 0.000000 1580 +postdoc 0 8 4.875197 0.000000 1724 +cornellunivers 0 7 5.010635 0.000000 1916 +whichi 0 6 5.164786 0.000000 2056 +stabl 0 5 5.347108 0.000000 2309 +interior 0 5 5.347108 0.000000 2439 +orthogon 0 4 5.568345 0.000000 2832 +vavasi 1 3 5.857933 5.857933 3526 +hough 0 3 5.857933 0.000000 3527 +linearalgebra 0 2 6.263398 0.000000 4833 +anal 0 2 6.263398 0.000000 4834 +pointmethod 0 2 6.263398 0.000000 4835 +sandia 1 1 6.957497 6.957497 9287 +livermor 1 1 6.957497 6.957497 9288 +patti 0 1 6.957497 0.000000 9289 +houghpatti 0 1 6.957497 0.000000 9290 +frankh 0 1 6.957497 0.000000 9291 +nicktrefethen 0 1 6.957497 0.000000 9292 +schatz 0 1 6.957497 0.000000 9293 +optimizationi 0 1 6.957497 0.000000 9294 +meza 0 1 6.957497 0.000000 9295 +nationallaboratori 0 1 6.957497 0.000000 9296 +ofweight 0 1 6.957497 0.000000 9297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..d4e67206 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +applic 0 170 1.791759 0.000000 56 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +postscript 1 131 2.079442 2.079442 90 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +center 0 88 2.397895 0.000000 158 +activ 0 84 2.484907 0.000000 182 +dynam 0 76 2.564949 0.000000 194 +april 0 77 2.564949 0.000000 196 +exampl 0 77 2.564949 0.000000 195 +appli 0 71 2.639057 0.000000 226 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +summer 0 56 2.890372 0.000000 311 +detail 0 57 2.890372 0.000000 321 +scientif 0 53 2.944439 0.000000 341 +physic 0 47 3.091042 0.000000 377 +mechan 0 43 3.178054 0.000000 416 +linear 0 41 3.218876 0.000000 431 +submit 0 39 3.258097 0.000000 440 +continu 0 39 3.258097 0.000000 448 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +next 0 34 3.401197 0.000000 517 +curriculum 0 33 3.433987 0.000000 535 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +propos 0 28 3.610918 0.000000 602 +jeff 0 25 3.737670 0.000000 673 +background 0 25 3.737670 0.000000 664 +hill 0 25 3.737670 0.000000 670 +equat 0 23 3.806662 0.000000 724 +finish 0 22 3.850148 0.000000 748 +supervis 0 20 3.951244 0.000000 840 +mostli 0 19 4.007333 0.000000 869 +dimension 0 18 4.060443 0.000000 909 +seek 0 17 4.110874 0.000000 954 +outlin 0 17 4.110874 0.000000 914 +coupl 0 17 4.110874 0.000000 939 +normal 0 16 4.174387 0.000000 995 +transit 0 15 4.248495 0.000000 1046 +nick 0 13 4.382027 0.000000 1180 +misc 0 13 4.382027 0.000000 1124 +frank 0 9 4.753590 0.000000 1568 +rhode 0 9 4.753590 0.000000 1579 +unusu 0 9 4.753590 0.000000 1566 +sixth 0 7 5.010635 0.000000 1917 +atcornel 0 6 5.164786 0.000000 2131 +versu 0 6 5.164786 0.000000 2052 +fluid 0 5 5.347108 0.000000 2440 +stabil 0 5 5.347108 0.000000 2286 +satish 0 4 5.568345 0.000000 2833 +trefethen 0 3 5.857933 0.000000 3528 +exponenti 0 3 5.857933 0.000000 3529 +driscol 0 2 6.263398 0.000000 4836 +spectral 0 2 6.263398 0.000000 4837 +baggett 0 1 6.957497 0.000000 9298 +turbul 0 1 6.957497 0.000000 9299 +baggettjeff 0 1 6.957497 0.000000 9300 +hydrodynam 0 1 6.957497 0.000000 9301 +blend 0 1 6.957497 0.000000 9302 +iwould 0 1 6.957497 0.000000 9303 +abscissa 0 1 6.957497 0.000000 9304 +andphillip 0 1 6.957497 0.000000 9305 +subcrit 0 1 6.957497 0.000000 9306 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..a04a0d5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +report 0 131 2.079442 0.000000 92 +document 0 121 2.079442 0.000000 89 +specif 0 106 2.197225 0.000000 106 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +search 0 95 2.397895 0.000000 155 +institut 0 84 2.484907 0.000000 187 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +want 0 79 2.564949 0.000000 199 +goal 0 66 2.708050 0.000000 250 +collect 0 65 2.772589 0.000000 268 +laboratori 0 63 2.772589 0.000000 292 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +made 0 44 3.135494 0.000000 398 +form 0 39 3.258097 0.000000 443 +author 0 39 3.258097 0.000000 450 +join 0 39 3.258097 0.000000 457 +industri 0 38 3.295837 0.000000 464 +field 0 37 3.332205 0.000000 482 +word 0 34 3.401197 0.000000 508 +titl 0 31 3.496508 0.000000 556 +limit 0 29 3.583519 0.000000 585 +particip 0 29 3.583519 0.000000 589 +packag 0 28 3.610918 0.000000 614 +background 0 25 3.737670 0.000000 664 +brows 0 23 3.806662 0.000000 726 +among 0 21 3.912023 0.000000 781 +tell 0 21 3.912023 0.000000 777 +offici 0 18 4.060443 0.000000 894 +commerci 0 16 4.174387 0.000000 1005 +whose 0 13 4.382027 0.000000 1166 +enter 0 10 4.653960 0.000000 1454 +govern 0 9 4.753590 0.000000 1581 +pronounc 0 7 5.010635 0.000000 1918 +ncstrl 1 3 5.857933 5.857933 3530 +interoper 0 2 6.263398 0.000000 4838 +andorgan 0 2 6.263398 0.000000 4443 +bibliograph 0 2 6.263398 0.000000 4699 +libraryncstrl 0 1 6.957497 0.000000 9307 +ancestr 0 1 6.957497 0.000000 9308 +internationalcollect 0 1 6.957497 0.000000 9309 +departmentsand 0 1 6.957497 0.000000 9310 +availablefor 0 1 6.957497 0.000000 9311 +eduat 0 1 6.957497 0.000000 9312 +ncstrlcollect 0 1 6.957497 0.000000 9313 +serversoper 0 1 6.957497 0.000000 9314 +participatinginstitut 0 1 6.957497 0.000000 9315 +ncstrlpress 0 1 6.957497 0.000000 9316 +theparticip 0 1 6.957497 0.000000 9317 +moreread 0 1 6.957497 0.000000 9318 +forinstitut 0 1 6.957497 0.000000 9319 +informationfind 0 1 6.957497 0.000000 9320 +snew 0 1 6.957497 0.000000 9321 +totech 0 1 6.957497 0.000000 9322 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..39c6ff48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +design 1 213 1.386294 1.386294 25 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +technic 1 100 2.302585 2.302585 140 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +institut 1 84 2.484907 2.484907 187 +server 0 76 2.564949 0.000000 204 +integr 0 67 2.708050 0.000000 245 +anoth 0 45 3.135494 0.000000 408 +mike 0 24 3.761200 0.000000 703 +brows 0 23 3.806662 0.000000 726 +enterpris 0 2 6.263398 0.000000 4839 +informationand 0 2 6.263398 0.000000 4840 +instituteabout 0 1 6.957497 0.000000 9323 +researchersat 0 1 6.957497 0.000000 9324 +searchal 0 1 6.957497 0.000000 9325 +reportssearch 0 1 6.957497 0.000000 9326 +ipic 0 1 6.957497 0.000000 9327 +itisingapor 0 1 6.957497 0.000000 9328 +altavistaforum 0 1 6.957497 0.000000 9329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..e4adcec6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +report 0 131 2.079442 0.000000 92 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +question 0 91 2.397895 0.000000 141 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +resourc 0 81 2.484907 0.000000 172 +institut 0 84 2.484907 0.000000 187 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +learn 0 86 2.484907 0.000000 170 +messag 0 76 2.564949 0.000000 212 +server 0 76 2.564949 0.000000 204 +sourc 0 77 2.564949 0.000000 201 +resum 0 79 2.564949 0.000000 217 +write 0 72 2.639057 0.000000 222 +onlin 0 75 2.639057 0.000000 223 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +improv 0 62 2.772589 0.000000 289 +copi 0 63 2.772589 0.000000 284 +collect 0 65 2.772589 0.000000 268 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +explor 0 58 2.890372 0.000000 324 +run 0 51 2.995732 0.000000 347 +investig 0 51 2.995732 0.000000 353 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +understand 0 47 3.091042 0.000000 384 +answer 0 45 3.135494 0.000000 391 +even 0 45 3.135494 0.000000 393 +made 0 44 3.135494 0.000000 398 +natur 0 44 3.135494 0.000000 406 +futur 0 41 3.218876 0.000000 427 +might 0 41 3.218876 0.000000 426 +author 0 39 3.258097 0.000000 450 +mean 0 37 3.332205 0.000000 477 +staff 0 36 3.367296 0.000000 490 +ofth 0 36 3.367296 0.000000 491 +especi 0 36 3.367296 0.000000 496 +product 0 33 3.433987 0.000000 527 +obtain 0 33 3.433987 0.000000 534 +collabor 0 32 3.465736 0.000000 543 +often 0 31 3.496508 0.000000 551 +produc 0 30 3.555348 0.000000 572 +abl 0 30 3.555348 0.000000 566 +ask 0 28 3.610918 0.000000 597 +manipul 0 27 3.637586 0.000000 624 +effort 0 26 3.688879 0.000000 652 +sport 0 25 3.737670 0.000000 683 +decis 0 23 3.806662 0.000000 728 +initi 0 23 3.806662 0.000000 717 +thank 0 23 3.806662 0.000000 721 +reduc 0 22 3.850148 0.000000 759 +corpor 0 21 3.912023 0.000000 802 +among 0 21 3.912023 0.000000 781 +annot 0 21 3.912023 0.000000 775 +increas 0 20 3.951244 0.000000 829 +qualiti 0 20 3.951244 0.000000 832 +fine 0 20 3.951244 0.000000 822 +media 0 19 4.007333 0.000000 861 +feedback 0 19 4.007333 0.000000 854 +seem 0 18 4.060443 0.000000 899 +agent 0 18 4.060443 0.000000 910 +whether 0 17 4.110874 0.000000 918 +remot 0 15 4.248495 0.000000 1041 +believ 0 13 4.382027 0.000000 1187 +captur 0 12 4.465908 0.000000 1232 +safe 0 12 4.465908 0.000000 1274 +market 0 11 4.553877 0.000000 1361 +end 0 9 4.753590 0.000000 1567 +correctli 0 9 4.753590 0.000000 1478 +risk 0 8 4.875197 0.000000 1689 +xerox 0 8 4.875197 0.000000 1725 +davi 0 7 5.010635 0.000000 1888 +intellectu 0 7 5.010635 0.000000 1847 +dead 0 7 5.010635 0.000000 1840 +foreign 0 7 5.010635 0.000000 1919 +edumi 0 6 5.164786 0.000000 2132 +sponsor 0 6 5.164786 0.000000 2133 +whichi 0 6 5.164786 0.000000 2056 +contract 0 6 5.164786 0.000000 1985 +huttenloch 0 6 5.164786 0.000000 1983 +begun 0 5 5.347108 0.000000 2386 +clarif 0 5 5.347108 0.000000 2253 +medium 0 4 5.568345 0.000000 2834 +transmit 0 4 5.568345 0.000000 2835 +lawyer 0 4 5.568345 0.000000 2836 +evid 0 4 5.568345 0.000000 2768 +isthat 0 4 5.568345 0.000000 2723 +owner 0 3 5.857933 0.000000 3531 +narr 0 3 5.857933 0.000000 3454 +worker 0 2 6.263398 0.000000 4841 +institutejim 0 1 6.957497 0.000000 9330 +davisxerox 0 1 6.957497 0.000000 9331 +corporationphd 0 1 6.957497 0.000000 9332 +improvecommun 0 1 6.957497 0.000000 9333 +andcont 0 1 6.957497 0.000000 9334 +reformat 0 1 6.957497 0.000000 9335 +inhypertext 0 1 6.957497 0.000000 9336 +thecstr 0 1 6.957497 0.000000 9337 +anarpa 0 1 6.957497 0.000000 9338 +moreeasili 0 1 6.957497 0.000000 9339 +electronicsystem 0 1 6.957497 0.000000 9340 +ofor 0 1 6.957497 0.000000 9341 +memoryinclud 0 1 6.957497 0.000000 9342 +sscreenplai 0 1 6.957497 0.000000 9343 +producedth 0 1 6.957497 0.000000 9344 +andjustif 0 1 6.957497 0.000000 9345 +developingcorpor 0 1 6.957497 0.000000 9346 +sharedannot 0 1 6.957497 0.000000 9347 +howpeopl 0 1 6.957497 0.000000 9348 +inelectron 0 1 6.957497 0.000000 9349 +prototypeimplement 0 1 6.957497 0.000000 9350 +shareddocu 0 1 6.957497 0.000000 9351 +nnotat 0 1 6.957497 0.000000 9352 +berequest 0 1 6.957497 0.000000 9353 +orcorrect 0 1 6.957497 0.000000 9354 +aus 0 1 6.957497 0.000000 9355 +willfind 0 1 6.957497 0.000000 9356 +whetherstud 0 1 6.957497 0.000000 9357 +usefulmean 0 1 6.957497 0.000000 9358 +designof 0 1 6.957497 0.000000 9359 +proxi 0 1 6.957497 0.000000 9360 +reliablycarri 0 1 6.957497 0.000000 9361 +toeither 0 1 6.957497 0.000000 9362 +alsopap 0 1 6.957497 0.000000 9363 +publicatiion 0 1 6.957497 0.000000 9364 +thedrimi 0 1 6.957497 0.000000 9365 +meprofession 0 1 6.957497 0.000000 9366 +historythi 0 1 6.957497 0.000000 9367 +improvisationi 0 1 6.957497 0.000000 9368 +resumeno 0 1 6.957497 0.000000 9369 +likeit 0 1 6.957497 0.000000 9370 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..613f3fd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +person 0 111 2.197225 0.000000 117 +search 1 95 2.397895 2.397895 155 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +result 0 65 2.772589 0.000000 281 +right 0 48 3.044522 0.000000 363 +word 0 34 3.401197 0.000000 508 +relev 0 26 3.688879 0.000000 637 +greg 0 24 3.761200 0.000000 695 +reserv 0 20 3.951244 0.000000 808 +configur 0 15 4.248495 0.000000 1012 +metacrawl 0 10 4.653960 0.000000 1455 +erik 0 8 4.875197 0.000000 1701 +oren 0 6 5.164786 0.000000 2134 +etzioni 0 6 5.164786 0.000000 2135 +selberg 0 5 5.347108 0.000000 2441 +phrase 0 5 5.347108 0.000000 2242 +ahoi 0 3 5.857933 0.000000 3532 +searchingmetacrawlerbi 0 1 6.957497 0.000000 9371 +lauckhartand 0 1 6.957497 0.000000 9372 +etzioniif 0 1 6.957497 0.000000 9373 +wordssort 0 1 6.957497 0.000000 9374 +locationcontrol 0 1 6.957497 0.000000 9375 +problemswebmast 0 1 6.957497 0.000000 9376 +comcopyright 0 1 6.957497 0.000000 9377 +lauckhart 0 1 6.957497 0.000000 9378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..db3a2449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +wisconsin 0 169 1.791759 0.000000 54 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +ieee 0 86 2.484907 0.000000 190 +activ 0 84 2.484907 0.000000 182 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +dynam 0 76 2.564949 0.000000 194 +server 0 76 2.564949 0.000000 204 +intellig 0 72 2.639057 0.000000 225 +simul 0 66 2.708050 0.000000 255 +plan 0 65 2.772589 0.000000 272 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +special 0 56 2.890372 0.000000 320 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +effect 0 46 3.091042 0.000000 385 +mechan 0 43 3.178054 0.000000 416 +review 0 42 3.218876 0.000000 425 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +societi 0 40 3.258097 0.000000 456 +seminar 0 38 3.295837 0.000000 470 +robot 1 36 3.367296 3.367296 497 +tech 0 35 3.401197 0.000000 515 +committe 0 34 3.401197 0.000000 522 +human 0 32 3.465736 0.000000 546 +robert 0 30 3.555348 0.000000 567 +ask 0 28 3.610918 0.000000 597 +motion 0 24 3.761200 0.000000 699 +compress 0 23 3.806662 0.000000 719 +director 0 22 3.850148 0.000000 767 +geometri 0 22 3.850148 0.000000 752 +path 0 21 3.912023 0.000000 778 +brief 0 16 4.174387 0.000000 1001 +chuck 0 14 4.317488 0.000000 1108 +nasa 0 13 4.382027 0.000000 1188 +vladimir 0 11 4.553877 0.000000 1324 +sens 0 11 4.553877 0.000000 1305 +errata 0 10 4.653960 0.000000 1403 +dyer 0 9 4.753590 0.000000 1573 +sensit 0 8 4.875197 0.000000 1726 +manufactur 0 8 4.875197 0.000000 1634 +sensor 0 7 5.010635 0.000000 1920 +jude 0 6 5.164786 0.000000 2123 +actuat 0 5 5.347108 0.000000 2442 +shavlik 0 5 5.347108 0.000000 2429 +lumelski 0 4 5.568345 0.000000 2837 +underwat 0 4 5.568345 0.000000 2838 +redund 0 4 5.568345 0.000000 2839 +skin 0 4 5.568345 0.000000 2840 +neil 0 4 5.568345 0.000000 2841 +kinemat 0 3 5.857933 0.000000 3516 +avenuemadison 0 2 6.263398 0.000000 4842 +maze 0 2 6.263398 0.000000 4843 +tether 0 2 6.263398 0.000000 4844 +duffi 0 2 6.263398 0.000000 4845 +lorenz 0 2 6.263398 0.000000 4846 +telerobot 0 2 6.263398 0.000000 4847 +hert 0 2 6.263398 0.000000 4848 +jogger 0 1 6.957497 0.000000 9379 +decentr 0 1 6.957497 0.000000 9380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..d9f450db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +wisconsin 0 169 1.791759 0.000000 54 +mathemat 0 108 2.197225 0.000000 123 +center 0 88 2.397895 0.000000 158 +ieee 0 86 2.484907 0.000000 190 +institut 0 84 2.484907 0.000000 187 +dept 0 64 2.772589 0.000000 291 +plan 0 65 2.772589 0.000000 272 +colleg 0 61 2.833213 0.000000 300 +mechan 0 43 3.178054 0.000000 416 +autom 0 41 3.218876 0.000000 434 +societi 0 40 3.258097 0.000000 456 +electr 0 38 3.295837 0.000000 461 +robot 0 36 3.367296 0.000000 497 +global 0 34 3.401197 0.000000 520 +tech 0 35 3.401197 0.000000 515 +committe 0 34 3.401197 0.000000 522 +human 0 32 3.465736 0.000000 546 +motion 0 24 3.761200 0.000000 699 +geometri 0 22 3.850148 0.000000 752 +path 0 21 3.912023 0.000000 778 +grant 0 12 4.465908 0.000000 1216 +vladimir 0 11 4.553877 0.000000 1324 +sensit 0 8 4.875197 0.000000 1726 +lumelski 0 4 5.568345 0.000000 2837 +underwat 0 4 5.568345 0.000000 2838 +redund 0 4 5.568345 0.000000 2839 +skin 0 4 5.568345 0.000000 2840 +kinemat 0 3 5.857933 0.000000 3516 +mace 0 2 6.263398 0.000000 4849 +lumelskyprofessormechan 0 1 6.957497 0.000000 9381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..1dd0da89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +structur 0 106 2.197225 0.000000 105 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +messag 0 76 2.564949 0.000000 212 +issu 0 78 2.564949 0.000000 211 +appear 0 78 2.564949 0.000000 210 +method 0 80 2.564949 0.000000 213 +want 0 79 2.564949 0.000000 199 +decemb 0 80 2.564949 0.000000 215 +upson 0 71 2.639057 0.000000 218 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +septemb 0 65 2.772589 0.000000 274 +ithaca 0 65 2.772589 0.000000 294 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +function 0 62 2.772589 0.000000 275 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +think 0 57 2.890372 0.000000 314 +major 0 56 2.890372 0.000000 315 +reason 0 57 2.890372 0.000000 318 +explor 0 58 2.890372 0.000000 324 +adapt 0 46 3.091042 0.000000 387 +done 0 47 3.091042 0.000000 381 +protocol 0 45 3.135494 0.000000 407 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +video 0 44 3.135494 0.000000 405 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +third 0 43 3.178054 0.000000 412 +small 0 39 3.258097 0.000000 447 +brian 0 38 3.295837 0.000000 466 +respons 0 37 3.332205 0.000000 476 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +cluster 0 28 3.610918 0.000000 612 +pass 0 28 3.610918 0.000000 611 +packag 0 28 3.610918 0.000000 614 +detect 0 26 3.688879 0.000000 646 +bound 0 26 3.688879 0.000000 659 +reliabl 0 25 3.737670 0.000000 674 +reach 0 24 3.761200 0.000000 688 +pattern 0 24 3.761200 0.000000 689 +highli 0 23 3.806662 0.000000 725 +lead 0 23 3.806662 0.000000 718 +sciencecornel 0 22 3.850148 0.000000 768 +deal 0 22 3.850148 0.000000 736 +cooper 0 22 3.850148 0.000000 757 +tell 0 21 3.912023 0.000000 777 +flexibl 0 21 3.912023 0.000000 792 +exploit 0 20 3.951244 0.000000 836 +smith 0 20 3.951244 0.000000 820 +left 0 19 4.007333 0.000000 851 +predict 0 19 4.007333 0.000000 855 +miss 0 19 4.007333 0.000000 866 +speed 0 18 4.060443 0.000000 911 +failur 0 18 4.060443 0.000000 898 +anyon 0 17 4.110874 0.000000 916 +latenc 0 16 4.174387 0.000000 993 +transfer 0 16 4.174387 0.000000 967 +devic 0 16 4.174387 0.000000 1002 +practicum 0 16 4.174387 0.000000 960 +horu 0 14 4.317488 0.000000 1116 +achiev 0 14 4.317488 0.000000 1088 +demand 0 14 4.317488 0.000000 1073 +eicken 0 13 4.382027 0.000000 1134 +thorsten 0 13 4.382027 0.000000 1133 +kenneth 0 12 4.465908 0.000000 1265 +brad 0 12 4.465908 0.000000 1264 +reness 0 11 4.553877 0.000000 1333 +noth 0 11 4.553877 0.000000 1328 +bandwidth 0 11 4.553877 0.000000 1365 +node 0 11 4.553877 0.000000 1326 +werner 0 10 4.653960 0.000000 1385 +guarante 0 10 4.653960 0.000000 1391 +awai 0 10 4.653960 0.000000 1447 +mountain 0 10 4.653960 0.000000 1456 +robbert 0 9 4.753590 0.000000 1529 +birman 0 9 4.753590 0.000000 1531 +desir 0 9 4.753590 0.000000 1542 +deadlin 0 9 4.753590 0.000000 1502 +vogel 0 8 4.875197 0.000000 1622 +sigop 0 8 4.875197 0.000000 1727 +extract 0 8 4.875197 0.000000 1728 +vineet 0 8 4.875197 0.000000 1639 +perfect 0 7 5.010635 0.000000 1921 +gave 0 7 5.010635 0.000000 1922 +synchroni 0 7 5.010635 0.000000 1923 +implementationof 0 7 5.010635 0.000000 1813 +deliv 0 6 5.164786 0.000000 2070 +subsystem 0 6 5.164786 0.000000 2015 +alex 0 6 5.164786 0.000000 2130 +situat 0 5 5.347108 0.000000 2365 +scope 0 5 5.347108 0.000000 2296 +buch 0 5 5.347108 0.000000 2272 +myresearch 0 4 5.568345 0.000000 2842 +behind 0 4 5.568345 0.000000 2610 +basu 0 4 5.568345 0.000000 2843 +hayden 0 4 5.568345 0.000000 2844 +hickei 0 4 5.568345 0.000000 2845 +vaysburd 0 4 5.568345 0.000000 2846 +concert 0 3 5.857933 0.000000 3533 +interfacefor 0 3 5.857933 0.000000 3534 +anindya 0 3 5.857933 0.000000 3535 +copper 0 3 5.857933 0.000000 3536 +glade 0 3 5.857933 0.000000 3537 +takako 0 3 5.857933 0.000000 3538 +amwork 0 2 6.263398 0.000000 4850 +regardless 0 2 6.263398 0.000000 4577 +katherin 0 2 6.263398 0.000000 4851 +dalia 0 2 6.263398 0.000000 4852 +malki 0 2 6.263398 0.000000 4853 +workshopconnamoran 0 1 6.957497 0.000000 9382 +ierland 0 1 6.957497 0.000000 9383 +researchera 0 1 6.957497 0.000000 9384 +halldept 0 1 6.957497 0.000000 9385 +thehorusand 0 1 6.957497 0.000000 9386 +bandwith 0 1 6.957497 0.000000 9387 +horuswith 0 1 6.957497 0.000000 9388 +fallen 0 1 6.957497 0.000000 9389 +latencyfor 0 1 6.957497 0.000000 9390 +protocolsar 0 1 6.957497 0.000000 9391 +structureand 0 1 6.957497 0.000000 9392 +guarant 0 1 6.957497 0.000000 9393 +acur 0 1 6.957497 0.000000 9394 +aglob 0 1 6.957497 0.000000 9395 +supportfailur 0 1 6.957497 0.000000 9396 +suspis 0 1 6.957497 0.000000 9397 +workwith 0 1 6.957497 0.000000 9398 +middlewar 0 1 6.957497 0.000000 9399 +brainchild 0 1 6.957497 0.000000 9400 +andken 0 1 6.957497 0.000000 9401 +withthorsten 0 1 6.957497 0.000000 9402 +horusexperi 0 1 6.957497 0.000000 9403 +lectureson 0 1 6.957497 0.000000 9404 +virtuallysynchron 0 1 6.957497 0.000000 9405 +princpl 0 1 6.957497 0.000000 9406 +hpc 0 1 6.957497 0.000000 9407 +kati 0 1 6.957497 0.000000 9408 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..178d1da2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +provid 0 121 2.079442 0.000000 94 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +theori 0 111 2.197225 0.000000 127 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +commun 0 95 2.397895 0.000000 157 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +comment 0 93 2.397895 0.000000 146 +requir 0 81 2.484907 0.000000 167 +activ 0 84 2.484907 0.000000 182 +environ 0 84 2.484907 0.000000 177 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +practic 0 70 2.708050 0.000000 246 +java 0 70 2.708050 0.000000 248 +multimedia 0 68 2.708050 0.000000 258 +collect 0 65 2.772589 0.000000 268 +virtual 0 62 2.772589 0.000000 285 +written 0 63 2.772589 0.000000 278 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +overview 0 56 2.890372 0.000000 323 +publish 0 57 2.890372 0.000000 326 +found 0 53 2.944439 0.000000 337 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +scientif 0 53 2.944439 0.000000 341 +much 0 52 2.995732 0.000000 349 +life 0 50 3.044522 0.000000 375 +set 0 50 3.044522 0.000000 361 +standard 0 48 3.044522 0.000000 365 +effect 0 46 3.091042 0.000000 385 +made 0 44 3.135494 0.000000 398 +describ 0 45 3.135494 0.000000 400 +better 0 45 3.135494 0.000000 401 +compani 0 41 3.218876 0.000000 423 +cach 0 41 3.218876 0.000000 432 +origin 0 38 3.295837 0.000000 472 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +purpos 0 37 3.332205 0.000000 481 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +ofth 0 36 3.367296 0.000000 491 +everi 0 34 3.401197 0.000000 519 +word 0 34 3.401197 0.000000 508 +toler 0 33 3.433987 0.000000 533 +within 0 33 3.433987 0.000000 525 +articl 0 33 3.433987 0.000000 530 +fault 0 32 3.465736 0.000000 547 +independ 0 32 3.465736 0.000000 548 +collabor 0 32 3.465736 0.000000 543 +idea 0 32 3.465736 0.000000 545 +secur 0 30 3.555348 0.000000 577 +exist 0 30 3.555348 0.000000 569 +power 0 30 3.555348 0.000000 573 +platform 0 29 3.583519 0.000000 591 +framework 0 28 3.610918 0.000000 606 +cluster 0 28 3.610918 0.000000 612 +american 0 27 3.637586 0.000000 634 +effort 0 26 3.688879 0.000000 652 +toward 0 25 3.737670 0.000000 668 +never 0 25 3.737670 0.000000 671 +reliabl 0 25 3.737670 0.000000 674 +wish 0 24 3.761200 0.000000 692 +seri 0 24 3.761200 0.000000 708 +initi 0 23 3.806662 0.000000 717 +varieti 0 22 3.850148 0.000000 740 +unit 0 21 3.912023 0.000000 779 +thu 0 21 3.912023 0.000000 773 +born 0 21 3.912023 0.000000 798 +runtim 0 19 4.007333 0.000000 858 +minim 0 18 4.060443 0.000000 887 +earli 0 16 4.174387 0.000000 968 +style 0 15 4.248495 0.000000 1036 +contribut 0 15 4.248495 0.000000 1021 +horu 1 14 4.317488 4.317488 1116 +attribut 0 14 4.317488 0.000000 1092 +coher 0 14 4.317488 0.000000 1109 +becam 0 14 4.317488 0.000000 1117 +whose 0 13 4.382027 0.000000 1166 +bodi 0 13 4.382027 0.000000 1178 +brother 0 13 4.382027 0.000000 1189 +replic 0 12 4.465908 0.000000 1231 +kenneth 0 12 4.465908 0.000000 1265 +robust 0 12 4.465908 0.000000 1271 +evolv 0 12 4.465908 0.000000 1223 +weight 0 12 4.465908 0.000000 1204 +rest 0 12 4.465908 0.000000 1259 +reness 0 11 4.553877 0.000000 1333 +faster 0 11 4.553877 0.000000 1323 +volum 0 11 4.553877 0.000000 1347 +death 0 10 4.653960 0.000000 1457 +modular 0 10 4.653960 0.000000 1392 +modul 0 10 4.653960 0.000000 1434 +length 0 10 4.653960 0.000000 1400 +sentenc 0 10 4.653960 0.000000 1413 +werner 0 10 4.653960 0.000000 1385 +birman 0 9 4.753590 0.000000 1531 +robbert 0 9 4.753590 0.000000 1529 +light 0 9 4.753590 0.000000 1533 +heart 0 8 4.875197 0.000000 1729 +gain 0 8 4.875197 0.000000 1730 +vogel 0 8 4.875197 0.000000 1622 +dead 0 7 5.010635 0.000000 1840 +aris 0 7 5.010635 0.000000 1924 +exactli 0 7 5.010635 0.000000 1817 +synchroni 0 7 5.010635 0.000000 1923 +usabl 0 7 5.010635 0.000000 1810 +conferenc 0 7 5.010635 0.000000 1857 +brought 0 7 5.010635 0.000000 1925 +restrict 0 6 5.164786 0.000000 2129 +outstand 0 6 5.164786 0.000000 2136 +mother 0 6 5.164786 0.000000 2083 +greatest 0 6 5.164786 0.000000 2073 +isi 0 5 5.347108 0.000000 2443 +elsewher 0 5 5.347108 0.000000 2444 +circumst 0 5 5.347108 0.000000 2283 +knew 0 5 5.347108 0.000000 2445 +hair 0 5 5.347108 0.000000 2446 +firm 0 4 5.568345 0.000000 2684 +areavail 0 4 5.568345 0.000000 2810 +projectth 0 3 5.857933 0.000000 3344 +woman 0 3 5.857933 0.000000 3539 +redesign 0 3 5.857933 0.000000 3540 +greatli 0 3 5.857933 0.000000 3541 +child 0 3 5.857933 0.000000 3542 +london 0 3 5.857933 0.000000 3282 +ensembl 0 2 6.263398 0.000000 4854 +oppos 0 2 6.263398 0.000000 4855 +egypt 0 2 6.263398 0.000000 4856 +groupwar 0 2 6.263398 0.000000 4857 +toconstruct 0 2 6.263398 0.000000 4858 +communicationarchitectur 0 2 6.263398 0.000000 4859 +ofreleas 0 2 6.263398 0.000000 4860 +transi 0 2 6.263398 0.000000 4861 +froma 0 2 6.263398 0.000000 4862 +mighti 0 2 6.263398 0.000000 4863 +wing 0 2 6.263398 0.000000 4864 +stir 0 2 6.263398 0.000000 4865 +lament 0 2 6.263398 0.000000 4866 +papersand 0 2 6.263398 0.000000 4867 +silvano 0 2 6.263398 0.000000 4868 +mytholog 0 2 6.263398 0.000000 4869 +court 0 2 6.263398 0.000000 4870 +osiri 0 1 6.957497 0.000000 9409 +egyptian 0 1 6.957497 0.000000 9410 +god 0 1 6.957497 0.000000 9411 +rejoic 0 1 6.957497 0.000000 9412 +groupcommun 0 1 6.957497 0.000000 9413 +triumphant 0 1 6.957497 0.000000 9414 +ofisi 0 1 6.957497 0.000000 9415 +heir 0 1 6.957497 0.000000 9416 +appealedstrongli 0 1 6.957497 0.000000 9417 +becausein 0 1 6.957497 0.000000 9418 +possess 0 1 6.957497 0.000000 9419 +renew 0 1 6.957497 0.000000 9420 +movementa 0 1 6.957497 0.000000 9421 +inact 0 1 6.957497 0.000000 9422 +applicationsbas 0 1 6.957497 0.000000 9423 +infault 0 1 6.957497 0.000000 9424 +thatexploit 0 1 6.957497 0.000000 9425 +theoveral 0 1 6.957497 0.000000 9426 +applicationprotocol 0 1 6.957497 0.000000 9427 +applicationrequir 0 1 6.957497 0.000000 9428 +launch 0 1 6.957497 0.000000 9429 +theisi 0 1 6.957497 0.000000 9430 +robustdistribut 0 1 6.957497 0.000000 9431 +unsuit 0 1 6.957497 0.000000 9432 +asappl 0 1 6.957497 0.000000 9433 +besidesth 0 1 6.957497 0.000000 9434 +usedfor 0 1 6.957497 0.000000 9435 +sametim 0 1 6.957497 0.000000 9436 +lighter 0 1 6.957497 0.000000 9437 +beus 0 1 6.957497 0.000000 9438 +commericalright 0 1 6.957497 0.000000 9439 +manyoth 0 1 6.957497 0.000000 9440 +nofe 0 1 6.957497 0.000000 9441 +ensemblewil 0 1 6.957497 0.000000 9442 +groupwareappl 0 1 6.957497 0.000000 9443 +differentclass 0 1 6.957497 0.000000 9444 +onnext 0 1 6.957497 0.000000 9445 +speedcommun 0 1 6.957497 0.000000 9446 +systemsproject 0 1 6.957497 0.000000 9447 +navtech 0 1 6.957497 0.000000 9448 +stormcast 0 1 6.957497 0.000000 9449 +tacomaproject 0 1 6.957497 0.000000 9450 +thesepag 0 1 6.957497 0.000000 9451 +begotten 0 1 6.957497 0.000000 9452 +sorrow 0 1 6.957497 0.000000 9453 +herhusband 0 1 6.957497 0.000000 9454 +goddess 0 1 6.957497 0.000000 9455 +distress 0 1 6.957497 0.000000 9456 +equippedwith 0 1 6.957497 0.000000 9457 +utter 0 1 6.957497 0.000000 9458 +mighthav 0 1 6.957497 0.000000 9459 +secret 0 1 6.957497 0.000000 9460 +suckl 0 1 6.957497 0.000000 9461 +rear 0 1 6.957497 0.000000 9462 +horusvisit 0 1 6.957497 0.000000 9463 +abstractpag 0 1 6.957497 0.000000 9464 +relatedto 0 1 6.957497 0.000000 9465 +maffei 0 1 6.957497 0.000000 9466 +flexiblegroup 0 1 6.957497 0.000000 9467 +hyme 0 1 6.957497 0.000000 9468 +osirisfrom 0 1 6.957497 0.000000 9469 +papyru 0 1 6.957497 0.000000 9470 +walli 0 1 6.957497 0.000000 9471 +budg 0 1 6.957497 0.000000 9472 +studiesin 0 1 6.957497 0.000000 9473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..5ee369a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +read 0 154 1.791759 0.000000 47 +report 0 131 2.079442 0.000000 92 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +comment 0 93 2.397895 0.000000 146 +info 0 85 2.484907 0.000000 176 +academ 0 82 2.484907 0.000000 178 +activ 0 84 2.484907 0.000000 182 +know 0 80 2.564949 0.000000 198 +server 0 76 2.564949 0.000000 204 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +free 0 73 2.639057 0.000000 224 +degre 0 69 2.708050 0.000000 259 +locat 0 59 2.833213 0.000000 303 +faculti 0 56 2.890372 0.000000 325 +semest 0 58 2.890372 0.000000 312 +direct 0 57 2.890372 0.000000 316 +undergradu 0 54 2.944439 0.000000 338 +standard 0 48 3.044522 0.000000 365 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +annual 0 40 3.258097 0.000000 458 +feel 0 37 3.332205 0.000000 483 +staff 0 36 3.367296 0.000000 490 +tech 0 35 3.401197 0.000000 515 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +collabor 0 32 3.465736 0.000000 543 +team 0 27 3.637586 0.000000 625 +doctor 0 24 3.761200 0.000000 709 +brows 0 23 3.806662 0.000000 726 +size 0 23 3.806662 0.000000 713 +offici 0 18 4.060443 0.000000 894 +anonym 0 14 4.317488 0.000000 1100 +outstand 0 6 5.164786 0.000000 2136 +disclaim 0 4 5.568345 0.000000 2847 +projector 0 3 5.857933 0.000000 3409 +universitydepart 0 2 6.263398 0.000000 4871 +infoget 0 1 6.957497 0.000000 9474 +contactswithin 0 1 6.957497 0.000000 9475 +facultyfind 0 1 6.957497 0.000000 9476 +ortheir 0 1 6.957497 0.000000 9477 +researchcheck 0 1 6.957497 0.000000 9478 +aboutour 0 1 6.957497 0.000000 9479 +publicationsfind 0 1 6.957497 0.000000 9480 +researcherseith 0 1 6.957497 0.000000 9481 +degreeslook 0 1 6.957497 0.000000 9482 +orundergradu 0 1 6.957497 0.000000 9483 +academicsrefer 0 1 6.957497 0.000000 9484 +webfor 0 1 6.957497 0.000000 9485 +generalcoursedescript 0 1 6.957497 0.000000 9486 +peopleget 0 1 6.957497 0.000000 9487 +directorylist 0 1 6.957497 0.000000 9488 +activitiesfind 0 1 6.957497 0.000000 9489 +theassoci 0 1 6.957497 0.000000 9490 +excellenthockei 0 1 6.957497 0.000000 9491 +serverscheck 0 1 6.957497 0.000000 9492 +gopherserv 0 1 6.957497 0.000000 9493 +ftpserver 0 1 6.957497 0.000000 9494 +sitesquest 0 1 6.957497 0.000000 9495 +informationpres 0 1 6.957497 0.000000 9496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..c467a7f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +analysi 0 124 2.079442 0.000000 98 +document 0 121 2.079442 0.000000 89 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +seattl 0 120 2.079442 0.000000 103 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +structur 0 106 2.197225 0.000000 105 +version 0 113 2.197225 0.000000 122 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +pleas 0 113 2.197225 0.000000 114 +text 1 98 2.302585 2.302585 133 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +search 0 95 2.397895 0.000000 155 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +associ 0 93 2.397895 0.000000 151 +section 0 94 2.397895 0.000000 149 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +larg 0 82 2.484907 0.000000 168 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +environ 0 84 2.484907 0.000000 177 +institut 0 84 2.484907 0.000000 187 +june 0 79 2.564949 0.000000 214 +method 0 80 2.564949 0.000000 213 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +nation 0 74 2.639057 0.000000 240 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +meet 0 72 2.639057 0.000000 229 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +free 0 73 2.639057 0.000000 224 +main 0 67 2.708050 0.000000 256 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +degre 0 69 2.708050 0.000000 259 +test 0 66 2.708050 0.000000 252 +multimedia 0 68 2.708050 0.000000 258 +collect 0 65 2.772589 0.000000 268 +import 0 65 2.772589 0.000000 282 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +copi 0 63 2.772589 0.000000 284 +automat 0 61 2.833213 0.000000 306 +colleg 0 61 2.833213 0.000000 300 +unix 0 58 2.890372 0.000000 308 +publish 0 57 2.890372 0.000000 326 +sever 0 56 2.890372 0.000000 322 +faculti 0 56 2.890372 0.000000 325 +extens 0 53 2.944439 0.000000 340 +cover 0 55 2.944439 0.000000 329 +februari 0 54 2.944439 0.000000 328 +without 0 50 3.044522 0.000000 370 +approach 0 48 3.044522 0.000000 366 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +natur 0 44 3.135494 0.000000 406 +made 0 44 3.135494 0.000000 398 +third 0 43 3.178054 0.000000 412 +around 0 43 3.178054 0.000000 415 +york 0 41 3.218876 0.000000 435 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +annual 0 40 3.258097 0.000000 458 +continu 0 39 3.258097 0.000000 448 +transact 0 39 3.258097 0.000000 438 +microsoft 0 38 3.295837 0.000000 468 +purpos 0 37 3.332205 0.000000 481 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +respons 0 37 3.332205 0.000000 476 +word 0 34 3.401197 0.000000 508 +statist 0 35 3.401197 0.000000 521 +represent 0 35 3.401197 0.000000 512 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +queri 0 33 3.433987 0.000000 524 +articl 0 33 3.433987 0.000000 530 +express 0 32 3.465736 0.000000 540 +storag 0 31 3.496508 0.000000 553 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +semant 0 29 3.583519 0.000000 587 +full 0 28 3.610918 0.000000 615 +progress 0 28 3.610918 0.000000 598 +retriev 0 27 3.637586 0.000000 621 +determin 0 27 3.637586 0.000000 630 +subject 0 26 3.688879 0.000000 647 +consist 0 26 3.688879 0.000000 651 +store 0 24 3.761200 0.000000 693 +handl 0 24 3.761200 0.000000 685 +size 0 23 3.806662 0.000000 713 +brows 0 23 3.806662 0.000000 726 +util 0 21 3.912023 0.000000 774 +similar 0 21 3.912023 0.000000 771 +flexibl 0 21 3.912023 0.000000 792 +corpor 0 21 3.912023 0.000000 802 +department 0 20 3.951244 0.000000 839 +hypertext 0 19 4.007333 0.000000 865 +media 0 19 4.007333 0.000000 861 +item 0 19 4.007333 0.000000 856 +appropri 0 18 4.060443 0.000000 883 +germani 0 17 4.110874 0.000000 946 +expand 0 17 4.110874 0.000000 928 +analyz 0 17 4.110874 0.000000 925 +moor 0 17 4.110874 0.000000 936 +vector 0 16 4.174387 0.000000 961 +capabl 0 15 4.248495 0.000000 1016 +piec 0 15 4.248495 0.000000 1020 +demand 0 14 4.317488 0.000000 1073 +conduct 0 14 4.317488 0.000000 1065 +rank 0 14 4.317488 0.000000 1063 +context 0 13 4.382027 0.000000 1153 +station 0 13 4.382027 0.000000 1157 +denis 0 12 4.465908 0.000000 1255 +readabl 0 12 4.465908 0.000000 1258 +optic 0 12 4.465908 0.000000 1221 +hypermedia 0 12 4.465908 0.000000 1247 +realiti 0 12 4.465908 0.000000 1272 +pageif 0 12 4.465908 0.000000 1275 +smart 0 11 4.553877 0.000000 1352 +refin 0 11 4.553877 0.000000 1363 +probabilist 0 11 4.553877 0.000000 1343 +rapid 0 10 4.653960 0.000000 1453 +paragraph 0 10 4.653960 0.000000 1449 +sentenc 0 10 4.653960 0.000000 1413 +equip 0 10 4.653960 0.000000 1459 +criteria 0 9 4.753590 0.000000 1477 +charg 0 9 4.753590 0.000000 1582 +hundr 0 9 4.753590 0.000000 1528 +mass 0 8 4.875197 0.000000 1732 +formul 0 8 4.875197 0.000000 1733 +matter 0 8 4.875197 0.000000 1627 +colloquium 0 8 4.875197 0.000000 1734 +harvard 0 7 5.010635 0.000000 1926 +densiti 0 7 5.010635 0.000000 1927 +vehicl 0 7 5.010635 0.000000 1928 +sparc 0 7 5.010635 0.000000 1860 +maryland 0 6 5.164786 0.000000 2140 +furthermor 0 6 5.164786 0.000000 2141 +restrict 0 6 5.164786 0.000000 2129 +ohio 0 5 5.347108 0.000000 2447 +corpu 0 5 5.347108 0.000000 2282 +medicin 0 5 5.347108 0.000000 2448 +eduph 0 5 5.347108 0.000000 2449 +accuraci 0 5 5.347108 0.000000 2450 +feder 0 5 5.347108 0.000000 2266 +travers 0 5 5.347108 0.000000 2363 +allan 0 4 5.568345 0.000000 2849 +rapidli 0 4 5.568345 0.000000 2850 +machineri 0 4 5.568345 0.000000 2851 +termin 0 4 5.568345 0.000000 2852 +ireland 0 4 5.568345 0.000000 2853 +sophist 0 3 5.857933 0.000000 3545 +genom 0 3 5.857933 0.000000 3546 +trec 0 3 5.857933 0.000000 3547 +gigabyt 0 3 5.857933 0.000000 3548 +activitiesmemb 0 3 5.857933 0.000000 3549 +zurich 0 3 5.857933 0.000000 3550 +switzerland 0 3 5.857933 0.000000 3551 +vega 0 3 5.857933 0.000000 3450 +softwareth 0 3 5.857933 0.000000 3552 +sigir 0 2 6.263398 0.000000 4873 +bucklei 0 2 6.263398 0.000000 4874 +nevada 0 2 6.263398 0.000000 4875 +gerard 0 2 6.263398 0.000000 4876 +decreas 0 2 6.263398 0.000000 4877 +absenc 0 2 6.263398 0.000000 4878 +unrestrict 0 2 6.263398 0.000000 4879 +excerpt 0 2 6.263398 0.000000 4880 +activitiesassoci 0 2 6.263398 0.000000 4881 +systemsprogram 0 2 6.263398 0.000000 4882 +dublin 0 2 6.263398 0.000000 4883 +moscow 0 2 6.263398 0.000000 4884 +encyclopedia 0 1 6.957497 0.000000 9505 +bethesda 0 1 6.957497 0.000000 9506 +columbu 0 1 6.957497 0.000000 9507 +saltongerard 0 1 6.957497 0.000000 9508 +saltonprofessorg 0 1 6.957497 0.000000 9509 +cheapli 0 1 6.957497 0.000000 9510 +funk 0 1 6.957497 0.000000 9511 +wagnal 0 1 6.957497 0.000000 9512 +committeeprofession 0 1 6.957497 0.000000 9513 +seventeenth 0 1 6.957497 0.000000 9514 +darmstadt 0 1 6.957497 0.000000 9515 +lecturesautomat 0 1 6.957497 0.000000 9516 +konstanz 0 1 6.957497 0.000000 9517 +asi 0 1 6.957497 0.000000 9518 +publicationsapproach 0 1 6.957497 0.000000 9519 +passag 0 1 6.957497 0.000000 9520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..8f505edc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +problem 0 147 1.945910 0.000000 75 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +world 0 115 2.197225 0.000000 126 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +solv 0 73 2.639057 0.000000 234 +appli 0 71 2.639057 0.000000 226 +august 0 66 2.708050 0.000000 257 +function 0 62 2.772589 0.000000 275 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +numer 0 49 3.044522 0.000000 369 +approach 0 48 3.044522 0.000000 366 +annual 0 40 3.258097 0.000000 458 +china 0 37 3.332205 0.000000 487 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +scientist 0 31 3.496508 0.000000 560 +exist 0 30 3.555348 0.000000 569 +chines 0 29 3.583519 0.000000 595 +consid 0 29 3.583519 0.000000 590 +scale 0 28 3.610918 0.000000 613 +subject 0 26 3.688879 0.000000 647 +bound 0 26 3.688879 0.000000 659 +enhanc 0 26 3.688879 0.000000 644 +siam 0 21 3.912023 0.000000 800 +department 0 20 3.951244 0.000000 839 +region 0 19 4.007333 0.000000 875 +beij 0 19 4.007333 0.000000 876 +minim 1 18 4.060443 4.060443 887 +accept 0 18 4.060443 0.000000 879 +moor 0 17 4.110874 0.000000 936 +young 0 16 4.174387 0.000000 991 +condit 0 16 4.174387 0.000000 975 +reflect 0 15 4.248495 0.000000 1034 +nonlinear 0 14 4.317488 0.000000 1107 +denis 0 12 4.465908 0.000000 1255 +pageif 0 12 4.465908 0.000000 1275 +penalti 0 10 4.653960 0.000000 1405 +trust 0 9 4.753590 0.000000 1583 +exact 0 9 4.753590 0.000000 1509 +converg 0 7 5.010635 0.000000 1844 +constrain 0 6 5.164786 0.000000 2042 +eduph 0 5 5.347108 0.000000 2449 +affin 0 5 5.347108 0.000000 2378 +interior 0 5 5.347108 0.000000 2439 +waterloo 0 3 5.857933 0.000000 3523 +unconstrain 0 2 6.263398 0.000000 4499 +publicationsa 0 2 6.263398 0.000000 4885 +nonlinearli 0 1 6.957497 0.000000 9521 +yui 0 1 6.957497 0.000000 9522 +liyui 0 1 6.957497 0.000000 9523 +liresearch 0 1 6.957497 0.000000 9524 +associateyui 0 1 6.957497 0.000000 9525 +lecturesan 0 1 6.957497 0.000000 9526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..03af62fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +code 0 108 2.197225 0.000000 116 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +orient 0 80 2.564949 0.000000 205 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +workshop 0 71 2.639057 0.000000 239 +meet 0 72 2.639057 0.000000 229 +symposium 0 72 2.639057 0.000000 238 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +januari 0 62 2.772589 0.000000 264 +function 0 62 2.772589 0.000000 275 +septemb 0 65 2.772589 0.000000 274 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +space 0 57 2.890372 0.000000 310 +reason 0 57 2.890372 0.000000 318 +publish 0 57 2.890372 0.000000 326 +scientif 0 53 2.944439 0.000000 341 +allow 0 53 2.944439 0.000000 333 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +principl 0 48 3.044522 0.000000 357 +done 0 47 3.091042 0.000000 381 +effect 0 46 3.091042 0.000000 385 +algebra 0 45 3.135494 0.000000 394 +mechan 0 43 3.178054 0.000000 416 +york 0 41 3.218876 0.000000 435 +autom 0 41 3.218876 0.000000 434 +review 0 42 3.218876 0.000000 425 +societi 0 40 3.258097 0.000000 456 +transact 0 39 3.258097 0.000000 438 +error 0 40 3.258097 0.000000 449 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +correct 0 38 3.295837 0.000000 462 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +extend 0 32 3.465736 0.000000 539 +richard 0 31 3.496508 0.000000 559 +scientist 0 31 3.496508 0.000000 560 +specifi 0 30 3.555348 0.000000 568 +compon 0 30 3.555348 0.000000 570 +common 0 30 3.555348 0.000000 574 +focus 0 29 3.583519 0.000000 584 +symbol 0 27 3.637586 0.000000 620 +american 0 27 3.637586 0.000000 634 +challeng 0 26 3.688879 0.000000 653 +constraint 0 26 3.688879 0.000000 636 +equat 0 23 3.806662 0.000000 724 +toolkit 0 20 3.951244 0.000000 835 +department 0 20 3.951244 0.000000 839 +region 0 19 4.007333 0.000000 875 +north 0 19 4.007333 0.000000 873 +boston 0 19 4.007333 0.000000 862 +lisp 0 18 4.060443 0.000000 897 +differenti 0 17 4.110874 0.000000 921 +layer 0 17 4.110874 0.000000 926 +moor 0 17 4.110874 0.000000 936 +modern 0 16 4.174387 0.000000 966 +vector 0 16 4.174387 0.000000 961 +susan 0 15 4.248495 0.000000 1050 +polynomi 0 14 4.317488 0.000000 1069 +dean 0 14 4.317488 0.000000 1104 +massachusett 0 14 4.317488 0.000000 1118 +joint 0 13 4.382027 0.000000 1130 +convert 0 13 4.382027 0.000000 1122 +opportun 0 13 4.382027 0.000000 1161 +denis 0 12 4.465908 0.000000 1255 +calcul 0 12 4.465908 0.000000 1268 +deduct 0 12 4.465908 0.000000 1236 +pageif 0 12 4.465908 0.000000 1275 +israel 0 11 4.553877 0.000000 1366 +vista 0 10 4.653960 0.000000 1452 +matric 0 10 4.653960 0.000000 1399 +decomposit 0 10 4.653960 0.000000 1439 +modular 0 10 4.653960 0.000000 1392 +factor 0 9 4.753590 0.000000 1544 +ring 0 8 4.875197 0.000000 1684 +ideal 0 8 4.875197 0.000000 1630 +aris 0 7 5.010635 0.000000 1924 +boundari 0 7 5.010635 0.000000 1929 +pursu 0 7 5.010635 0.000000 1902 +refere 0 7 5.010635 0.000000 1895 +interpol 0 7 5.010635 0.000000 1823 +rubinfeld 0 6 5.164786 0.000000 1998 +carolina 0 6 5.164786 0.000000 2142 +kluwer 0 6 5.164786 0.000000 2143 +fluid 0 5 5.347108 0.000000 2440 +ronitt 0 5 5.347108 0.000000 2265 +eduph 0 5 5.347108 0.000000 2449 +colleagu 0 5 5.347108 0.000000 2304 +ration 0 5 5.347108 0.000000 2427 +synthes 0 5 5.347108 0.000000 2451 +suni 0 5 5.347108 0.000000 2452 +weyl 0 4 5.568345 0.000000 2854 +dexter 0 4 5.568345 0.000000 2855 +kozen 0 4 5.568345 0.000000 2619 +technion 0 4 5.568345 0.000000 2856 +suppli 0 4 5.568345 0.000000 2611 +substrat 0 4 5.568345 0.000000 2857 +weizmann 0 4 5.568345 0.000000 2858 +syracus 0 3 5.857933 0.000000 3553 +haifa 0 3 5.857933 0.000000 3554 +aerospac 0 3 5.857933 0.000000 3555 +durham 0 3 5.857933 0.000000 3279 +dawson 0 2 6.263398 0.000000 4886 +microstorag 0 2 6.263398 0.000000 4887 +activitieseditori 0 2 6.263398 0.000000 4888 +softwareprogram 0 2 6.263398 0.000000 4889 +irreduc 0 2 6.263398 0.000000 4890 +rehovot 0 2 6.263398 0.000000 4891 +albani 0 2 6.263398 0.000000 4892 +multivari 0 2 6.263398 0.000000 4151 +landau 0 1 6.957497 0.000000 9527 +zippelrichard 0 1 6.957497 0.000000 9528 +zippelsenior 0 1 6.957497 0.000000 9529 +associaterz 0 1 6.957497 0.000000 9530 +computationlecturesalgebra 0 1 6.957497 0.000000 9531 +publicationseffect 0 1 6.957497 0.000000 9532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..3375ff7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +provid 0 121 2.079442 0.000000 94 +report 0 131 2.079442 0.000000 92 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +refer 0 78 2.564949 0.000000 203 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +multimedia 0 68 2.708050 0.000000 258 +integr 0 67 2.708050 0.000000 245 +virtual 0 62 2.772589 0.000000 285 +import 0 65 2.772589 0.000000 282 +collect 0 65 2.772589 0.000000 268 +polici 0 64 2.772589 0.000000 279 +reason 0 57 2.890372 0.000000 318 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +faculti 0 56 2.890372 0.000000 325 +extens 0 53 2.944439 0.000000 340 +much 0 52 2.995732 0.000000 349 +profession 0 51 2.995732 0.000000 345 +set 0 50 3.044522 0.000000 361 +basic 0 50 3.044522 0.000000 360 +approach 0 48 3.044522 0.000000 366 +california 0 46 3.091042 0.000000 388 +featur 0 46 3.091042 0.000000 386 +done 0 47 3.091042 0.000000 381 +execut 0 45 3.135494 0.000000 404 +mark 0 44 3.135494 0.000000 403 +protocol 0 45 3.135494 0.000000 407 +combin 0 42 3.218876 0.000000 421 +press 0 42 3.218876 0.000000 419 +theoret 0 39 3.258097 0.000000 446 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +annual 0 40 3.258097 0.000000 458 +origin 0 38 3.295837 0.000000 472 +brian 0 38 3.295837 0.000000 466 +purpos 0 37 3.332205 0.000000 481 +committe 0 34 3.401197 0.000000 522 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +toler 0 33 3.433987 0.000000 533 +within 0 33 3.433987 0.000000 525 +fault 0 32 3.465736 0.000000 547 +collabor 0 32 3.465736 0.000000 543 +idea 0 32 3.465736 0.000000 545 +ad 0 32 3.465736 0.000000 544 +scientist 0 31 3.496508 0.000000 560 +secur 0 30 3.555348 0.000000 577 +focu 0 30 3.555348 0.000000 571 +option 0 30 3.555348 0.000000 575 +specifi 0 30 3.555348 0.000000 568 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +univ 0 28 3.610918 0.000000 617 +intend 0 28 3.610918 0.000000 599 +actual 0 28 3.610918 0.000000 604 +packag 0 28 3.610918 0.000000 614 +effort 0 26 3.688879 0.000000 652 +consist 0 26 3.688879 0.000000 651 +berkelei 0 26 3.688879 0.000000 657 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +although 0 25 3.737670 0.000000 667 +supercomput 0 25 3.737670 0.000000 681 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +mike 0 24 3.761200 0.000000 703 +head 0 23 3.806662 0.000000 732 +cooper 0 22 3.850148 0.000000 757 +properti 0 22 3.850148 0.000000 749 +flexibl 0 21 3.912023 0.000000 792 +toolkit 0 20 3.951244 0.000000 835 +exploit 0 20 3.951244 0.000000 836 +smith 0 20 3.951244 0.000000 820 +department 0 20 3.951244 0.000000 839 +prove 0 19 4.007333 0.000000 848 +runtim 0 19 4.007333 0.000000 858 +failur 0 18 4.060443 0.000000 898 +speed 0 18 4.060443 0.000000 911 +element 0 18 4.060443 0.000000 895 +layer 0 17 4.110874 0.000000 926 +seek 0 17 4.110874 0.000000 954 +former 0 17 4.110874 0.000000 956 +moor 0 17 4.110874 0.000000 936 +upon 0 16 4.174387 0.000000 978 +permit 0 16 4.174387 0.000000 962 +critic 0 16 4.174387 0.000000 982 +side 0 15 4.248495 0.000000 1022 +action 0 15 4.248495 0.000000 1038 +horu 1 14 4.317488 4.317488 1116 +becam 0 14 4.317488 0.000000 1117 +coordin 0 13 4.382027 0.000000 1182 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +denis 0 12 4.465908 0.000000 1255 +kenneth 0 12 4.465908 0.000000 1265 +replic 0 12 4.465908 0.000000 1231 +robust 0 12 4.465908 0.000000 1271 +infrastructur 0 12 4.465908 0.000000 1234 +pageif 0 12 4.465908 0.000000 1275 +reness 0 11 4.553877 0.000000 1333 +broad 0 11 4.553877 0.000000 1302 +probabilist 0 11 4.553877 0.000000 1343 +primit 0 11 4.553877 0.000000 1317 +certain 0 10 4.653960 0.000000 1393 +nuprl 0 10 4.653960 0.000000 1402 +guarante 0 10 4.653960 0.000000 1391 +werner 0 10 4.653960 0.000000 1385 +unusu 0 9 4.753590 0.000000 1566 +latter 0 9 4.753590 0.000000 1522 +correctli 0 9 4.753590 0.000000 1478 +robbert 0 9 4.753590 0.000000 1529 +birman 0 9 4.753590 0.000000 1531 +vogel 0 8 4.875197 0.000000 1622 +synchroni 0 7 5.010635 0.000000 1923 +friedman 0 7 5.010635 0.000000 1886 +chief 0 7 5.010635 0.000000 1829 +privaci 0 6 5.164786 0.000000 2144 +emerg 0 6 5.164786 0.000000 2038 +recruit 0 6 5.164786 0.000000 2145 +isi 0 5 5.347108 0.000000 2443 +notabl 0 5 5.347108 0.000000 2276 +broadcast 0 5 5.347108 0.000000 2453 +activitieseditor 0 5 5.347108 0.000000 2454 +popular 0 4 5.568345 0.000000 2802 +hayden 0 4 5.568345 0.000000 2844 +publicationsth 0 4 5.568345 0.000000 2859 +reconfigur 0 3 5.857933 0.000000 3556 +leverag 0 3 5.857933 0.000000 3153 +embodi 0 3 5.857933 0.000000 3236 +reiter 0 3 5.857933 0.000000 3461 +constabl 0 3 5.857933 0.000000 3186 +act 0 3 5.857933 0.000000 3557 +leadership 0 3 5.857933 0.000000 3320 +alamito 0 3 5.857933 0.000000 3558 +glade 0 3 5.857933 0.000000 3537 +benign 0 2 6.263398 0.000000 4893 +activitieschair 0 2 6.263398 0.000000 4894 +isat 0 2 6.263398 0.000000 4895 +birmankenneth 0 1 6.957497 0.000000 9533 +birmanprofessorphd 0 1 6.957497 0.000000 9534 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..152e1afb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +technolog 0 131 2.079442 0.000000 102 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +graphic 1 90 2.397895 2.397895 147 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +environ 0 84 2.484907 0.000000 177 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +nation 0 74 2.639057 0.000000 240 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +differ 0 66 2.708050 0.000000 253 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +creat 0 63 2.772589 0.000000 277 +laboratori 0 63 2.772589 0.000000 292 +interact 0 62 2.772589 0.000000 270 +improv 0 62 2.772589 0.000000 289 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +visual 0 48 3.044522 0.000000 372 +california 0 46 3.091042 0.000000 388 +made 0 44 3.135494 0.000000 398 +past 0 42 3.218876 0.000000 428 +annual 0 40 3.258097 0.000000 458 +multi 0 36 3.367296 0.000000 493 +within 0 33 3.433987 0.000000 525 +board 0 33 3.433987 0.000000 528 +anim 0 31 3.496508 0.000000 557 +rang 0 30 3.555348 0.000000 565 +focu 0 30 3.555348 0.000000 571 +particip 0 29 3.583519 0.000000 589 +progress 0 28 3.610918 0.000000 598 +determin 0 27 3.637586 0.000000 630 +american 0 27 3.637586 0.000000 634 +constraint 0 26 3.688879 0.000000 636 +strategi 0 25 3.737670 0.000000 682 +trace 0 25 3.737670 0.000000 677 +hill 0 25 3.737670 0.000000 670 +fellow 0 24 3.761200 0.000000 701 +motion 0 24 3.761200 0.000000 699 +displai 0 23 3.806662 0.000000 712 +input 0 23 3.806662 0.000000 727 +director 0 22 3.850148 0.000000 767 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +geometri 0 22 3.850148 0.000000 752 +flexibl 0 21 3.912023 0.000000 792 +synthesi 0 20 3.951244 0.000000 834 +facil 0 20 3.951244 0.000000 814 +department 0 20 3.951244 0.000000 839 +geometr 0 19 4.007333 0.000000 852 +media 0 19 4.007333 0.000000 861 +north 0 19 4.007333 0.000000 873 +dimension 0 18 4.060443 0.000000 909 +medic 0 17 4.110874 0.000000 958 +previous 0 17 4.110874 0.000000 923 +render 0 17 4.110874 0.000000 947 +moor 0 17 4.110874 0.000000 936 +spatial 0 16 4.174387 0.000000 988 +brown 0 16 4.174387 0.000000 977 +reflect 0 15 4.248495 0.000000 1034 +micro 0 15 4.248495 0.000000 1031 +conduct 0 14 4.317488 0.000000 1065 +denis 0 12 4.465908 0.000000 1255 +pageif 0 12 4.465908 0.000000 1275 +host 0 11 4.553877 0.000000 1306 +volum 0 11 4.553877 0.000000 1347 +modular 0 10 4.653960 0.000000 1392 +surfac 0 9 4.753590 0.000000 1574 +donald 0 9 4.753590 0.000000 1510 +routin 0 9 4.753590 0.000000 1549 +tempor 0 9 4.753590 0.000000 1584 +light 0 9 4.753590 0.000000 1533 +utah 0 9 4.753590 0.000000 1585 +realist 0 8 4.875197 0.000000 1665 +polygon 0 8 4.875197 0.000000 1723 +textur 0 8 4.875197 0.000000 1677 +academi 0 8 4.875197 0.000000 1735 +clip 0 7 5.010635 0.000000 1868 +parametr 0 7 5.010635 0.000000 1819 +suffici 0 7 5.010635 0.000000 1897 +core 0 7 5.010635 0.000000 1809 +hidden 0 6 5.164786 0.000000 1987 +photographi 0 6 5.164786 0.000000 2146 +carolina 0 6 5.164786 0.000000 2142 +biolog 0 6 5.164786 0.000000 2147 +decad 0 5 5.347108 0.000000 2455 +testb 0 5 5.347108 0.000000 2456 +anti 0 5 5.347108 0.000000 2434 +chapel 0 5 5.347108 0.000000 2457 +jacob 0 4 5.568345 0.000000 2667 +gould 0 3 5.857933 0.000000 3559 +alias 0 2 6.263398 0.000000 4823 +activitieseditori 0 2 6.263398 0.000000 4888 +greenbergdonald 0 1 6.957497 0.000000 9535 +greenberg 0 1 6.957497 0.000000 9536 +schurman 0 1 6.957497 0.000000 9537 +perceptu 0 1 6.957497 0.000000 9538 +activitiesdirector 0 1 6.957497 0.000000 9539 +visualizationprofession 0 1 6.957497 0.000000 9540 +acmreturn 0 1 6.957497 0.000000 9541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..a1fca9d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +structur 0 106 2.197225 0.000000 105 +world 0 115 2.197225 0.000000 126 +pleas 0 113 2.197225 0.000000 114 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +member 0 84 2.484907 0.000000 165 +novemb 0 81 2.484907 0.000000 179 +institut 0 84 2.484907 0.000000 187 +requir 0 81 2.484907 0.000000 167 +school 0 84 2.484907 0.000000 188 +chang 0 82 2.484907 0.000000 163 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +solv 0 73 2.639057 0.000000 234 +nation 0 74 2.639057 0.000000 240 +logic 0 71 2.639057 0.000000 230 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +degre 0 69 2.708050 0.000000 259 +august 0 66 2.708050 0.000000 257 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +guid 0 63 2.772589 0.000000 267 +interact 0 62 2.772589 0.000000 270 +visit 0 63 2.772589 0.000000 288 +variou 0 56 2.890372 0.000000 317 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +februari 0 54 2.944439 0.000000 328 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +advisor 0 51 2.995732 0.000000 355 +electron 0 47 3.091042 0.000000 379 +california 0 46 3.091042 0.000000 388 +understand 0 47 3.091042 0.000000 384 +physic 0 47 3.091042 0.000000 377 +natur 0 44 3.135494 0.000000 406 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +futur 0 41 3.218876 0.000000 427 +theoret 0 39 3.258097 0.000000 446 +annual 0 40 3.258097 0.000000 458 +committe 0 34 3.401197 0.000000 522 +random 0 34 3.401197 0.000000 511 +award 0 34 3.401197 0.000000 523 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +within 0 33 3.433987 0.000000 525 +given 0 32 3.465736 0.000000 538 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +art 0 29 3.583519 0.000000 593 +american 0 27 3.637586 0.000000 634 +consist 0 26 3.688879 0.000000 651 +bound 0 26 3.688879 0.000000 659 +fellow 0 24 3.761200 0.000000 701 +seri 0 24 3.761200 0.000000 708 +doctor 0 24 3.761200 0.000000 709 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +sequenti 0 22 3.850148 0.000000 745 +director 0 22 3.850148 0.000000 767 +siam 0 21 3.912023 0.000000 800 +divis 0 21 3.912023 0.000000 803 +exploit 0 20 3.951244 0.000000 836 +department 0 20 3.951244 0.000000 839 +walter 0 17 4.110874 0.000000 950 +germani 0 17 4.110874 0.000000 946 +moor 0 17 4.110874 0.000000 936 +georg 0 16 4.174387 0.000000 994 +brown 0 16 4.174387 0.000000 977 +contribut 0 15 4.248495 0.000000 1021 +topolog 0 14 4.317488 0.000000 1089 +essenti 0 13 4.382027 0.000000 1137 +denis 0 12 4.465908 0.000000 1255 +grow 0 12 4.465908 0.000000 1209 +amount 0 12 4.465908 0.000000 1208 +speech 0 12 4.465908 0.000000 1222 +weight 0 12 4.465908 0.000000 1204 +pageif 0 12 4.465908 0.000000 1275 +council 0 11 4.553877 0.000000 1364 +distinguish 0 11 4.553877 0.000000 1357 +rice 0 11 4.553877 0.000000 1336 +govern 0 9 4.753590 0.000000 1581 +classifi 0 9 4.753590 0.000000 1537 +classif 0 9 4.753590 0.000000 1586 +telecommun 0 9 4.753590 0.000000 1565 +observ 0 9 4.753590 0.000000 1578 +academi 0 8 4.875197 0.000000 1735 +quantit 0 8 4.875197 0.000000 1654 +gain 0 8 4.875197 0.000000 1730 +attent 0 8 4.875197 0.000000 1651 +virginia 0 8 4.875197 0.000000 1659 +trade 0 7 5.010635 0.000000 1815 +foreign 0 7 5.010635 0.000000 1919 +dimens 0 7 5.010635 0.000000 1930 +ture 0 6 5.164786 0.000000 1997 +advisori 0 6 5.164786 0.000000 2148 +chicago 0 6 5.164786 0.000000 2149 +recruit 0 6 5.164786 0.000000 2145 +prize 0 6 5.164786 0.000000 2150 +yield 0 5 5.347108 0.000000 2458 +activitieseditor 0 5 5.347108 0.000000 2454 +ifip 0 5 5.347108 0.000000 2459 +houston 0 5 5.347108 0.000000 2460 +scope 0 5 5.347108 0.000000 2296 +bulletin 0 5 5.347108 0.000000 2343 +comprehens 0 4 5.568345 0.000000 2745 +monograph 0 4 5.568345 0.000000 2860 +peer 0 4 5.568345 0.000000 2742 +tennesse 0 4 5.568345 0.000000 2763 +oracl 0 4 5.568345 0.000000 2823 +hypothesi 0 4 5.568345 0.000000 2650 +fals 0 4 5.568345 0.000000 2861 +hausdorff 0 4 5.568345 0.000000 2633 +explos 0 3 5.857933 0.000000 3138 +deeper 0 3 5.857933 0.000000 3146 +off 0 3 5.857933 0.000000 3170 +nondeterminist 0 3 5.857933 0.000000 3560 +activitiesmemb 0 3 5.857933 0.000000 3549 +law 0 2 6.263398 0.000000 4896 +har 0 2 6.263398 0.000000 4252 +aaa 0 2 6.263398 0.000000 4897 +banquet 0 2 6.263398 0.000000 4898 +publicationson 0 2 6.263398 0.000000 4899 +johan 0 2 6.263398 0.000000 4900 +eatc 0 1 6.957497 0.000000 9542 +juri 0 1 6.957497 0.000000 9543 +hartmanisjuri 0 1 6.957497 0.000000 9544 +hartmani 0 1 6.957497 0.000000 9545 +strateg 0 1 6.957497 0.000000 9546 +representativeschair 0 1 6.957497 0.000000 9547 +committeehonorsacm 0 1 6.957497 0.000000 9548 +stearn 0 1 6.957497 0.000000 9549 +latvian 0 1 6.957497 0.000000 9550 +charter 0 1 6.957497 0.000000 9551 +presseditori 0 1 6.957497 0.000000 9552 +sciencegoedel 0 1 6.957497 0.000000 9553 +awardshonorari 0 1 6.957497 0.000000 9554 +dortmund 0 1 6.957497 0.000000 9555 +lecturessom 0 1 6.957497 0.000000 9556 +benni 0 1 6.957497 0.000000 9557 +chor 0 1 6.957497 0.000000 9558 +od 0 1 6.957497 0.000000 9559 +goldreich 0 1 6.957497 0.000000 9560 +hastad 0 1 6.957497 0.000000 9561 +desh 0 1 6.957497 0.000000 9562 +ranjan 0 1 6.957497 0.000000 9563 +pankaj 0 1 6.957497 0.000000 9564 +rohatgi 0 1 6.957497 0.000000 9565 +kolmogorov 0 1 6.957497 0.000000 9566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..5bb2dbab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +modifi 0 178 1.609438 0.000000 35 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +manag 0 114 2.197225 0.000000 125 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +associ 0 93 2.397895 0.000000 151 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +academ 0 82 2.484907 0.000000 178 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +nation 0 74 2.639057 0.000000 240 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +simul 0 66 2.708050 0.000000 255 +multimedia 0 68 2.708050 0.000000 258 +knowledg 0 67 2.708050 0.000000 243 +januari 0 62 2.772589 0.000000 264 +organ 0 65 2.772589 0.000000 265 +colleg 0 61 2.833213 0.000000 300 +variou 0 56 2.890372 0.000000 317 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +appoint 0 49 3.044522 0.000000 358 +physic 0 47 3.091042 0.000000 377 +electron 0 47 3.091042 0.000000 379 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +continu 0 39 3.258097 0.000000 448 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +represent 0 35 3.401197 0.000000 512 +committe 0 34 3.401197 0.000000 522 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +within 0 33 3.433987 0.000000 525 +collabor 0 32 3.465736 0.000000 543 +storag 0 31 3.496508 0.000000 553 +art 0 29 3.583519 0.000000 593 +american 0 27 3.637586 0.000000 634 +supercomput 0 25 3.737670 0.000000 681 +fellow 0 24 3.761200 0.000000 701 +seri 0 24 3.761200 0.000000 708 +geometri 0 22 3.850148 0.000000 752 +unit 0 21 3.912023 0.000000 779 +among 0 21 3.912023 0.000000 781 +siam 0 21 3.912023 0.000000 800 +department 0 20 3.951244 0.000000 839 +geometr 0 19 4.007333 0.000000 852 +stanford 0 17 4.110874 0.000000 955 +moor 0 17 4.110874 0.000000 936 +remot 0 15 4.248495 0.000000 1041 +dean 0 14 4.317488 0.000000 1104 +heterogen 0 14 4.317488 0.000000 1090 +discret 0 13 4.382027 0.000000 1165 +captur 0 12 4.465908 0.000000 1232 +denis 0 12 4.465908 0.000000 1255 +robust 0 12 4.465908 0.000000 1271 +pageif 0 12 4.465908 0.000000 1275 +persist 0 11 4.553877 0.000000 1367 +council 0 11 4.553877 0.000000 1364 +facilit 0 10 4.653960 0.000000 1412 +packard 0 10 4.653960 0.000000 1444 +fellowship 0 10 4.653960 0.000000 1460 +academi 0 8 4.875197 0.000000 1735 +advisori 0 6 5.164786 0.000000 2148 +oxford 0 6 5.164786 0.000000 2121 +compris 0 4 5.568345 0.000000 2862 +activitiesmemb 0 3 5.857933 0.000000 3549 +defens 0 3 5.857933 0.000000 3327 +algorithmica 0 3 5.857933 0.000000 3561 +commiss 0 2 6.263398 0.000000 4901 +engineeringfellow 0 2 6.263398 0.000000 4902 +sciencesfellow 0 2 6.263398 0.000000 4903 +aaa 0 2 6.263398 0.000000 4897 +hopcroftjohn 0 1 6.957497 0.000000 9567 +hopcroftjoseph 0 1 6.957497 0.000000 9568 +silbert 0 1 6.957497 0.000000 9569 +engineeringprofessor 0 1 6.957497 0.000000 9570 +sciencephd 0 1 6.957497 0.000000 9571 +overse 0 1 6.957497 0.000000 9572 +applicationsmemb 0 1 6.957497 0.000000 9573 +boardmemb 0 1 6.957497 0.000000 9574 +forcememb 0 1 6.957497 0.000000 9575 +machinerychairman 0 1 6.957497 0.000000 9576 +trusteesmemb 0 1 6.957497 0.000000 9577 +lucil 0 1 6.957497 0.000000 9578 +foundationmemb 0 1 6.957497 0.000000 9579 +sloan 0 1 6.957497 0.000000 9580 +committeeadvisori 0 1 6.957497 0.000000 9581 +analysiseditor 0 1 6.957497 0.000000 9582 +geometryassoci 0 1 6.957497 0.000000 9583 +sciencesreturn 0 1 6.957497 0.000000 9584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..48a5f65d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +code 0 108 2.197225 0.000000 116 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +proceed 0 93 2.397895 0.000000 152 +comment 0 93 2.397895 0.000000 146 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +build 0 85 2.484907 0.000000 184 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +optim 0 79 2.564949 0.000000 197 +refer 0 78 2.564949 0.000000 203 +june 0 79 2.564949 0.000000 214 +method 0 80 2.564949 0.000000 213 +good 0 77 2.564949 0.000000 200 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +state 0 76 2.564949 0.000000 207 +involv 0 71 2.639057 0.000000 227 +solv 0 73 2.639057 0.000000 234 +line 0 75 2.639057 0.000000 231 +symposium 0 72 2.639057 0.000000 238 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +experi 0 64 2.772589 0.000000 283 +organ 0 65 2.772589 0.000000 265 +januari 0 62 2.772589 0.000000 264 +laboratori 0 63 2.772589 0.000000 292 +ithaca 0 65 2.772589 0.000000 294 +best 0 59 2.833213 0.000000 299 +automat 0 61 2.833213 0.000000 306 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +faculti 0 56 2.890372 0.000000 325 +local 0 55 2.944439 0.000000 334 +scientif 0 53 2.944439 0.000000 341 +processor 0 54 2.944439 0.000000 335 +februari 0 54 2.944439 0.000000 328 +investig 0 51 2.995732 0.000000 353 +profession 0 51 2.995732 0.000000 345 +approach 0 48 3.044522 0.000000 366 +numer 0 49 3.044522 0.000000 369 +principl 0 48 3.044522 0.000000 357 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +made 0 44 3.135494 0.000000 398 +better 0 45 3.135494 0.000000 401 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +show 0 43 3.178054 0.000000 417 +linear 0 41 3.218876 0.000000 431 +review 0 42 3.218876 0.000000 425 +must 0 40 3.258097 0.000000 442 +annual 0 40 3.258097 0.000000 458 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +transact 0 39 3.258097 0.000000 438 +microsoft 0 38 3.295837 0.000000 468 +open 0 38 3.295837 0.000000 469 +paul 0 38 3.295837 0.000000 471 +hand 0 37 3.332205 0.000000 475 +award 0 34 3.401197 0.000000 523 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +product 0 33 3.433987 0.000000 527 +board 0 33 3.433987 0.000000 528 +transform 0 32 3.465736 0.000000 542 +extend 0 32 3.465736 0.000000 539 +independ 0 32 3.465736 0.000000 548 +produc 0 30 3.555348 0.000000 572 +exist 0 30 3.555348 0.000000 569 +graph 0 30 3.555348 0.000000 576 +depend 0 29 3.583519 0.000000 583 +multiprocessor 0 28 3.610918 0.000000 605 +framework 0 28 3.610918 0.000000 606 +static 0 27 3.637586 0.000000 619 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +lab 0 24 3.761200 0.000000 698 +alwai 0 24 3.761200 0.000000 691 +known 0 24 3.761200 0.000000 702 +flow 0 24 3.761200 0.000000 700 +consult 0 24 3.761200 0.000000 687 +equat 0 23 3.806662 0.000000 724 +togeth 0 23 3.806662 0.000000 714 +almost 0 22 3.850148 0.000000 742 +deal 0 22 3.850148 0.000000 736 +sequenti 0 22 3.850148 0.000000 745 +corpor 0 21 3.912023 0.000000 802 +increas 0 20 3.951244 0.000000 829 +toolkit 0 20 3.951244 0.000000 835 +department 0 20 3.951244 0.000000 839 +partial 0 18 4.060443 0.000000 900 +matrix 0 17 4.110874 0.000000 933 +asplo 0 17 4.110874 0.000000 948 +differenti 0 17 4.110874 0.000000 921 +moor 0 17 4.110874 0.000000 936 +spars 0 16 4.174387 0.000000 989 +transfer 0 16 4.174387 0.000000 967 +young 0 16 4.174387 0.000000 991 +match 0 16 4.174387 0.000000 965 +normal 0 16 4.174387 0.000000 995 +permit 0 16 4.174387 0.000000 962 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +fortran 0 15 4.248495 0.000000 1027 +massachusett 0 14 4.317488 0.000000 1118 +earlier 0 13 4.382027 0.000000 1140 +block 0 13 4.382027 0.000000 1183 +incorpor 0 13 4.382027 0.000000 1163 +unfortun 0 13 4.382027 0.000000 1170 +sigplan 0 13 4.382027 0.000000 1190 +denis 0 12 4.465908 0.000000 1255 +pageif 0 12 4.465908 0.000000 1275 +loop 0 11 4.553877 0.000000 1310 +mesh 0 11 4.553877 0.000000 1351 +refin 0 11 4.553877 0.000000 1363 +faster 0 11 4.553877 0.000000 1323 +summar 0 11 4.553877 0.000000 1295 +michigan 0 11 4.553877 0.000000 1368 +vladimir 0 11 4.553877 0.000000 1324 +matric 0 10 4.653960 0.000000 1399 +packard 0 10 4.653960 0.000000 1444 +preliminari 0 9 4.753590 0.000000 1480 +prefer 0 9 4.753590 0.000000 1491 +jersei 0 9 4.753590 0.000000 1587 +hewlett 0 8 4.875197 0.000000 1709 +poor 0 8 4.875197 0.000000 1736 +competit 0 8 4.875197 0.000000 1635 +potenti 0 8 4.875197 0.000000 1690 +uniprocessor 0 8 4.875197 0.000000 1696 +presidenti 0 8 4.875197 0.000000 1737 +wayn 0 8 4.875197 0.000000 1738 +pldi 0 8 4.875197 0.000000 1704 +keshav 0 7 5.010635 0.000000 1852 +solver 0 7 5.010635 0.000000 1911 +uniform 0 7 5.010635 0.000000 1845 +refere 0 7 5.010635 0.000000 1895 +elementari 0 7 5.010635 0.000000 1825 +dens 0 6 5.164786 0.000000 2122 +prize 0 6 5.164786 0.000000 2150 +ensur 0 6 5.164786 0.000000 2012 +nest 0 6 5.164786 0.000000 2151 +argonn 0 5 5.347108 0.000000 2461 +compet 0 5 5.347108 0.000000 2462 +decad 0 5 5.347108 0.000000 2455 +panel 0 5 5.347108 0.000000 2463 +seventh 0 5 5.347108 0.000000 2464 +restructur 0 4 5.568345 0.000000 2775 +align 0 4 5.568345 0.000000 2863 +flavor 0 4 5.568345 0.000000 2625 +indupraka 0 4 5.568345 0.000000 2639 +kodukula 0 4 5.568345 0.000000 2640 +stodghil 0 4 5.568345 0.000000 2864 +armi 0 3 5.857933 0.000000 3562 +topla 0 3 5.857933 0.000000 3563 +chelmsford 0 3 5.857933 0.000000 3564 +detroit 0 3 5.857933 0.000000 3565 +rutger 0 3 5.857933 0.000000 3566 +brunswick 0 3 5.857933 0.000000 3567 +redmond 0 3 5.857933 0.000000 3568 +professorphd 0 2 6.263398 0.000000 4904 +numa 0 2 6.263398 0.000000 4905 +lambda 0 2 6.263398 0.000000 4458 +knit 0 2 6.263398 0.000000 4906 +lcpc 0 2 6.263398 0.000000 4538 +kotlyar 0 2 6.263398 0.000000 4907 +pingalikeshav 0 1 6.957497 0.000000 9585 +pingaliassoci 0 1 6.957497 0.000000 9586 +radic 0 1 6.957497 0.000000 9587 +sparsiti 0 1 6.957497 0.000000 9588 +krylov 0 1 6.957497 0.000000 9589 +petsc 0 1 6.957497 0.000000 9590 +activitiespanel 0 1 6.957497 0.000000 9591 +ballist 0 1 6.957497 0.000000 9592 +odyssei 0 1 6.957497 0.000000 9593 +computereditori 0 1 6.957497 0.000000 9594 +awardsn 0 1 6.957497 0.000000 9595 +lecturesfast 0 1 6.957497 0.000000 9596 +publicationssolv 0 1 6.957497 0.000000 9597 +gianfranco 0 1 6.957497 0.000000 9598 +bilardi 0 1 6.957497 0.000000 9599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..87b9c59b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,367 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +lectur 0 135 1.945910 0.000000 73 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +confer 0 126 2.079442 0.000000 100 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +site 0 106 2.197225 0.000000 119 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +pleas 0 113 2.197225 0.000000 114 +techniqu 0 99 2.302585 0.000000 138 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +environ 0 84 2.484907 0.000000 177 +school 0 84 2.484907 0.000000 188 +build 0 85 2.484907 0.000000 184 +ieee 0 86 2.484907 0.000000 190 +educ 0 86 2.484907 0.000000 191 +second 0 81 2.484907 0.000000 166 +start 0 83 2.484907 0.000000 173 +control 0 82 2.484907 0.000000 164 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +state 0 76 2.564949 0.000000 207 +complet 0 77 2.564949 0.000000 208 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +messag 0 76 2.564949 0.000000 212 +workshop 0 71 2.639057 0.000000 239 +logic 0 71 2.639057 0.000000 230 +symposium 0 72 2.639057 0.000000 238 +meet 0 72 2.639057 0.000000 229 +involv 0 71 2.639057 0.000000 227 +appli 0 71 2.639057 0.000000 226 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +write 0 72 2.639057 0.000000 222 +materi 0 75 2.639057 0.000000 221 +summari 0 73 2.639057 0.000000 237 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +differ 0 66 2.708050 0.000000 253 +view 0 70 2.708050 0.000000 254 +virtual 0 62 2.772589 0.000000 285 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +function 0 62 2.772589 0.000000 275 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +septemb 0 65 2.772589 0.000000 274 +foundat 0 62 2.772589 0.000000 286 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +reason 0 57 2.890372 0.000000 318 +summer 0 56 2.890372 0.000000 311 +publish 0 57 2.890372 0.000000 326 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +investig 0 51 2.995732 0.000000 353 +run 0 51 2.995732 0.000000 347 +hardwar 0 51 2.995732 0.000000 350 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +standard 0 48 3.044522 0.000000 365 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +physic 0 47 3.091042 0.000000 377 +move 0 47 3.091042 0.000000 382 +could 0 46 3.091042 0.000000 383 +possibl 0 47 3.091042 0.000000 378 +protocol 0 45 3.135494 0.000000 407 +execut 0 45 3.135494 0.000000 404 +textbook 0 44 3.135494 0.000000 397 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +past 0 42 3.218876 0.000000 428 +york 0 41 3.218876 0.000000 435 +editor 0 41 3.218876 0.000000 433 +continu 0 39 3.258097 0.000000 448 +programm 0 39 3.258097 0.000000 445 +transact 0 39 3.258097 0.000000 438 +annual 0 40 3.258097 0.000000 458 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +formal 0 37 3.332205 0.000000 478 +respons 0 37 3.332205 0.000000 476 +hand 0 37 3.332205 0.000000 475 +committe 0 34 3.401197 0.000000 522 +concurr 0 34 3.401197 0.000000 501 +least 0 35 3.401197 0.000000 516 +singl 0 34 3.401197 0.000000 510 +everi 0 34 3.401197 0.000000 519 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +given 0 32 3.465736 0.000000 538 +ad 0 32 3.465736 0.000000 544 +exist 0 30 3.555348 0.000000 569 +depend 0 29 3.583519 0.000000 583 +synchron 0 29 3.583519 0.000000 588 +focus 0 29 3.583519 0.000000 584 +art 0 29 3.583519 0.000000 593 +becom 0 28 3.610918 0.000000 603 +pass 0 28 3.610918 0.000000 611 +univ 0 28 3.610918 0.000000 617 +american 0 27 3.637586 0.000000 634 +detect 0 26 3.688879 0.000000 646 +subject 0 26 3.688879 0.000000 647 +rule 0 26 3.688879 0.000000 638 +relev 0 26 3.688879 0.000000 637 +compar 0 26 3.688879 0.000000 648 +hill 0 25 3.737670 0.000000 670 +concern 0 25 3.737670 0.000000 666 +task 0 25 3.737670 0.000000 678 +handl 0 24 3.761200 0.000000 685 +seri 0 24 3.761200 0.000000 708 +proof 0 23 3.806662 0.000000 720 +equat 0 23 3.806662 0.000000 724 +mobil 0 23 3.806662 0.000000 730 +sequenc 0 23 3.806662 0.000000 734 +methodolog 0 23 3.806662 0.000000 733 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +reduc 0 22 3.850148 0.000000 759 +leav 0 21 3.912023 0.000000 772 +avoid 0 21 3.912023 0.000000 799 +exploit 0 20 3.951244 0.000000 836 +verif 0 20 3.951244 0.000000 826 +safeti 0 20 3.951244 0.000000 817 +department 0 20 3.951244 0.000000 839 +north 0 19 4.007333 0.000000 873 +agent 0 18 4.060443 0.000000 910 +partial 0 18 4.060443 0.000000 900 +along 0 18 4.060443 0.000000 878 +thoma 0 18 4.060443 0.000000 901 +scott 0 18 4.060443 0.000000 884 +germani 0 17 4.110874 0.000000 946 +sept 0 17 4.110874 0.000000 952 +outlin 0 17 4.110874 0.000000 914 +analyz 0 17 4.110874 0.000000 925 +otherwis 0 17 4.110874 0.000000 922 +whether 0 17 4.110874 0.000000 918 +moor 0 17 4.110874 0.000000 936 +critic 0 16 4.174387 0.000000 982 +georg 0 16 4.174387 0.000000 994 +letter 0 16 4.174387 0.000000 981 +hybrid 0 15 4.248495 0.000000 1057 +coordin 0 13 4.382027 0.000000 1182 +carri 0 13 4.382027 0.000000 1152 +nasa 0 13 4.382027 0.000000 1188 +discret 0 13 4.382027 0.000000 1165 +asynchron 0 12 4.465908 0.000000 1229 +verifi 0 12 4.465908 0.000000 1261 +denis 0 12 4.465908 0.000000 1255 +replic 0 12 4.465908 0.000000 1231 +speech 0 12 4.465908 0.000000 1222 +pageif 0 12 4.465908 0.000000 1275 +reness 0 11 4.553877 0.000000 1333 +refin 0 11 4.553877 0.000000 1363 +israel 0 11 4.553877 0.000000 1366 +bandwidth 0 11 4.553877 0.000000 1365 +typic 0 11 4.553877 0.000000 1360 +distinguish 0 11 4.553877 0.000000 1357 +arpa 0 11 4.553877 0.000000 1369 +volum 0 11 4.553877 0.000000 1347 +island 0 11 4.553877 0.000000 1345 +forc 0 10 4.653960 0.000000 1384 +resid 0 10 4.653960 0.000000 1461 +tradit 0 10 4.653960 0.000000 1404 +invit 0 10 4.653960 0.000000 1428 +traffic 0 10 4.653960 0.000000 1421 +robbert 0 9 4.753590 0.000000 1529 +compos 0 9 4.753590 0.000000 1527 +realiz 0 8 4.875197 0.000000 1739 +filter 0 8 4.875197 0.000000 1641 +satisfi 0 8 4.875197 0.000000 1694 +virginia 0 8 4.875197 0.000000 1659 +colloquium 0 8 4.875197 0.000000 1734 +paradigm 0 8 4.875197 0.000000 1662 +fifth 0 7 5.010635 0.000000 1931 +predic 0 7 5.010635 0.000000 1806 +chief 0 7 5.010635 0.000000 1829 +merg 0 7 5.010635 0.000000 1862 +carolina 0 6 5.164786 0.000000 2142 +moder 0 6 5.164786 0.000000 2112 +fred 0 6 5.164786 0.000000 2072 +brook 0 6 5.164786 0.000000 2152 +infer 0 6 5.164786 0.000000 2040 +risc 0 6 5.164786 0.000000 2016 +ensur 0 6 5.164786 0.000000 2012 +mistak 0 6 5.164786 0.000000 2110 +kluwer 0 6 5.164786 0.000000 2143 +causal 0 6 5.164786 0.000000 2024 +mission 0 5 5.347108 0.000000 2465 +ifip 0 5 5.347108 0.000000 2459 +chapel 0 5 5.347108 0.000000 2457 +attract 0 5 5.347108 0.000000 2356 +activitieseditor 0 5 5.347108 0.000000 2454 +merit 0 5 5.347108 0.000000 2466 +speaker 0 5 5.347108 0.000000 2370 +panel 0 5 5.347108 0.000000 2463 +proposit 0 5 5.347108 0.000000 2339 +bulletin 0 5 5.347108 0.000000 2343 +sigcs 0 4 5.568345 0.000000 2865 +stoller 0 4 5.568345 0.000000 2866 +nashvil 0 4 5.568345 0.000000 2867 +tennesse 0 4 5.568345 0.000000 2763 +schneider 0 4 5.568345 0.000000 2868 +increasingli 0 4 5.568345 0.000000 2766 +ident 0 4 5.568345 0.000000 2826 +suffic 0 4 5.568345 0.000000 2869 +conserv 0 4 5.568345 0.000000 2870 +monograph 0 4 5.568345 0.000000 2860 +assur 0 4 5.568345 0.000000 2722 +dagstuhl 0 4 5.568345 0.000000 2871 +technion 0 4 5.568345 0.000000 2856 +aircraft 0 4 5.568345 0.000000 2872 +newslett 0 4 5.568345 0.000000 2873 +gri 0 3 5.857933 0.000000 3569 +defens 0 3 5.857933 0.000000 3327 +munich 0 3 5.857933 0.000000 3570 +stoni 0 3 5.857933 0.000000 3571 +heavili 0 3 5.857933 0.000000 3572 +streamlin 0 3 5.857933 0.000000 3573 +jointli 0 3 5.857933 0.000000 3118 +dimac 0 3 5.857933 0.000000 3574 +reactiv 0 3 5.857933 0.000000 3575 +am 0 3 5.857933 0.000000 3386 +haifa 0 3 5.857933 0.000000 3554 +successor 0 3 5.857933 0.000000 3576 +hoto 0 3 5.857933 0.000000 3577 +orca 0 3 5.857933 0.000000 3578 +hypervisor 0 2 6.263398 0.000000 4549 +replica 0 2 6.263398 0.000000 4206 +norwai 0 2 6.263398 0.000000 4908 +systemsprogram 0 2 6.263398 0.000000 4882 +tacoma 0 2 6.263398 0.000000 4909 +isat 0 2 6.263398 0.000000 4895 +warfar 0 2 6.263398 0.000000 4910 +professorphd 0 2 6.263398 0.000000 4904 +widespread 0 2 6.263398 0.000000 4911 +annal 0 2 6.263398 0.000000 4912 +hebrew 0 2 6.263398 0.000000 4913 +banquet 0 2 6.263398 0.000000 4898 +grante 0 2 6.263398 0.000000 4914 +contractor 0 2 6.263398 0.000000 4915 +mason 0 2 6.263398 0.000000 4916 +airplan 0 2 6.263398 0.000000 4917 +jerusalem 0 2 6.263398 0.000000 4918 +marzullo 0 2 6.263398 0.000000 4919 +trom 0 1 6.957497 0.000000 9600 +marktoberdorf 0 1 6.957497 0.000000 9601 +lubeck 0 1 6.957497 0.000000 9602 +johansen 0 1 6.957497 0.000000 9603 +engineeringeditor 0 1 6.957497 0.000000 9604 +schneiderfr 0 1 6.957497 0.000000 9605 +assert 0 1 6.957497 0.000000 9606 +undefin 0 1 6.957497 0.000000 9607 +bressoud 0 1 6.957497 0.000000 9608 +instantli 0 1 6.957497 0.000000 9609 +freed 0 1 6.957497 0.000000 9610 +roam 0 1 6.957497 0.000000 9611 +activitiessabbat 0 1 6.957497 0.000000 9612 +computingeditor 0 1 6.957497 0.000000 9613 +letterseditor 0 1 6.957497 0.000000 9614 +systemseditor 0 1 6.957497 0.000000 9615 +surveysco 0 1 6.957497 0.000000 9616 +verlagprogram 0 1 6.957497 0.000000 9617 +constructionprogram 0 1 6.957497 0.000000 9618 +resili 0 1 6.957497 0.000000 9619 +applicationsprogram 0 1 6.957497 0.000000 9620 +sixteenth 0 1 6.957497 0.000000 9621 +symposiumprogram 0 1 6.957497 0.000000 9622 +systemsst 0 1 6.957497 0.000000 9623 +chissa 0 1 6.957497 0.000000 9624 +technologymemb 0 1 6.957497 0.000000 9625 +agencyreview 0 1 6.957497 0.000000 9626 +leibniz 0 1 6.957497 0.000000 9627 +universitymemb 0 1 6.957497 0.000000 9628 +awardsfellow 0 1 6.957497 0.000000 9629 +sciencefellow 0 1 6.957497 0.000000 9630 +machinerylecturesproof 0 1 6.957497 0.000000 9631 +afosr 0 1 6.957497 0.000000 9632 +panelist 0 1 6.957497 0.000000 9633 +publicationsreason 0 1 6.957497 0.000000 9634 +icalp 0 1 6.957497 0.000000 9635 +boll 0 1 6.957497 0.000000 9636 +limor 0 1 6.957497 0.000000 9637 +ultradepend 0 1 6.957497 0.000000 9638 +dehn 0 1 6.957497 0.000000 9639 +primu 0 1 6.957497 0.000000 9640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..f0901278 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +pleas 0 113 2.197225 0.000000 114 +text 0 98 2.302585 0.000000 133 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +build 0 85 2.484907 0.000000 184 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +method 0 80 2.564949 0.000000 213 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +solv 0 73 2.639057 0.000000 234 +januari 0 62 2.772589 0.000000 264 +descript 0 64 2.772589 0.000000 271 +result 0 65 2.772589 0.000000 281 +import 0 65 2.772589 0.000000 282 +organ 0 65 2.772589 0.000000 265 +plai 0 60 2.833213 0.000000 307 +variou 0 56 2.890372 0.000000 317 +semest 0 58 2.890372 0.000000 312 +undergradu 0 54 2.944439 0.000000 338 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +adapt 0 46 3.091042 0.000000 387 +fast 0 42 3.218876 0.000000 429 +continu 0 39 3.258097 0.000000 448 +multipl 0 39 3.258097 0.000000 453 +societi 0 40 3.258097 0.000000 456 +annual 0 40 3.258097 0.000000 458 +close 0 38 3.295837 0.000000 465 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +product 0 33 3.433987 0.000000 527 +curriculum 0 33 3.433987 0.000000 535 +transform 0 32 3.465736 0.000000 542 +art 0 29 3.583519 0.000000 593 +chair 0 29 3.583519 0.000000 596 +subject 0 26 3.688879 0.000000 647 +constraint 0 26 3.688879 0.000000 636 +repres 0 26 3.688879 0.000000 656 +siam 0 21 3.912023 0.000000 800 +department 0 20 3.951244 0.000000 839 +four 0 18 4.060443 0.000000 905 +matrix 0 17 4.110874 0.000000 933 +moor 0 17 4.110874 0.000000 936 +role 0 14 4.317488 0.000000 1101 +charl 0 13 4.382027 0.000000 1149 +deriv 0 13 4.382027 0.000000 1145 +translat 0 13 4.382027 0.000000 1164 +optic 0 12 4.465908 0.000000 1221 +denis 0 12 4.465908 0.000000 1255 +reader 0 12 4.465908 0.000000 1246 +pageif 0 12 4.465908 0.000000 1275 +michigan 0 11 4.553877 0.000000 1368 +loop 0 11 4.553877 0.000000 1310 +bandwidth 0 11 4.553877 0.000000 1365 +america 0 11 4.553877 0.000000 1370 +matric 0 10 4.653960 0.000000 1399 +factor 0 9 4.753590 0.000000 1544 +sweden 0 7 5.010635 0.000000 1885 +signal 0 7 5.010635 0.000000 1910 +prize 0 6 5.164786 0.000000 2150 +proce 0 6 5.164786 0.000000 2114 +ohio 0 5 5.347108 0.000000 2447 +markov 0 5 5.347108 0.000000 2280 +activitieseditor 0 5 5.347108 0.000000 2454 +wavelet 0 4 5.568345 0.000000 2874 +analog 0 4 5.568345 0.000000 2875 +admiss 0 4 5.568345 0.000000 2704 +wilkinson 0 3 5.857933 0.000000 3579 +pitsiani 0 3 5.857933 0.000000 3175 +household 0 2 6.263398 0.000000 4920 +intuit 0 2 6.263398 0.000000 4921 +nearest 0 2 6.263398 0.000000 4922 +anticip 0 2 6.263398 0.000000 4268 +activitiescomput 0 2 6.263398 0.000000 4923 +kroneck 0 1 6.957497 0.000000 9641 +umea 0 1 6.957497 0.000000 9642 +loancharl 0 1 6.957497 0.000000 9643 +loanprofessorphd 0 1 6.957497 0.000000 9644 +inhomogen 0 1 6.957497 0.000000 9645 +committeedepart 0 1 6.957497 0.000000 9646 +meetingfreshman 0 1 6.957497 0.000000 9647 +analysismemb 0 1 6.957497 0.000000 9648 +diprima 0 1 6.957497 0.000000 9649 +lecturesappl 0 1 6.957497 0.000000 9650 +linkop 0 1 6.957497 0.000000 9651 +publicationsoptim 0 1 6.957497 0.000000 9652 +ellerbroek 0 1 6.957497 0.000000 9653 +plemmon 0 1 6.957497 0.000000 9654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..fcb979bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +report 0 131 2.079442 0.000000 92 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +associ 0 93 2.397895 0.000000 151 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +level 0 87 2.484907 0.000000 180 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +logic 0 71 2.639057 0.000000 230 +undergradu 0 54 2.944439 0.000000 338 +profession 0 51 2.995732 0.000000 345 +electron 0 47 3.091042 0.000000 379 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +respons 0 37 3.332205 0.000000 476 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +curriculum 0 33 3.433987 0.000000 535 +symbol 0 27 3.637586 0.000000 620 +revis 0 26 3.688879 0.000000 640 +primari 0 25 3.737670 0.000000 669 +other 0 24 3.761200 0.000000 697 +prepar 0 20 3.951244 0.000000 824 +department 0 20 3.951244 0.000000 839 +lower 0 18 4.060443 0.000000 886 +moor 0 17 4.110874 0.000000 936 +women 0 16 4.174387 0.000000 1004 +denis 0 12 4.465908 0.000000 1255 +pageif 0 12 4.465908 0.000000 1275 +introductori 0 9 4.753590 0.000000 1479 +machineri 0 4 5.568345 0.000000 2851 +activitiescomput 0 2 6.263398 0.000000 4923 +activitiesassoci 0 2 6.263398 0.000000 4881 +catherin 0 1 6.957497 0.000000 9655 +wagnercatherin 0 1 6.957497 0.000000 9656 +wagnersenior 0 1 6.957497 0.000000 9657 +lecturerphd 0 1 6.957497 0.000000 9658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..48cf402f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,351 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +support 0 132 1.945910 0.000000 83 +file 0 132 1.945910 0.000000 70 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +intern 0 108 2.197225 0.000000 128 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +teach 0 108 2.197225 0.000000 112 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +proceed 0 93 2.397895 0.000000 152 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +resourc 0 81 2.484907 0.000000 172 +novemb 0 81 2.484907 0.000000 179 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +second 0 81 2.484907 0.000000 166 +ieee 0 86 2.484907 0.000000 190 +server 0 76 2.564949 0.000000 204 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +sourc 0 77 2.564949 0.000000 201 +onlin 0 75 2.639057 0.000000 223 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +html 0 75 2.639057 0.000000 235 +upson 0 71 2.639057 0.000000 218 +nation 0 74 2.639057 0.000000 240 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +david 0 71 2.639057 0.000000 232 +name 0 72 2.639057 0.000000 220 +symposium 0 72 2.639057 0.000000 238 +multimedia 0 68 2.708050 0.000000 258 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +august 0 66 2.708050 0.000000 257 +laboratori 0 63 2.772589 0.000000 292 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +copi 0 63 2.772589 0.000000 284 +septemb 0 65 2.772589 0.000000 274 +best 0 59 2.833213 0.000000 299 +type 0 61 2.833213 0.000000 296 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +locat 0 59 2.833213 0.000000 303 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +faculti 0 56 2.890372 0.000000 325 +semest 0 58 2.890372 0.000000 312 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +sever 0 56 2.890372 0.000000 322 +explor 0 58 2.890372 0.000000 324 +talk 0 53 2.944439 0.000000 336 +local 0 55 2.944439 0.000000 334 +found 0 53 2.944439 0.000000 337 +undergradu 0 54 2.944439 0.000000 338 +februari 0 54 2.944439 0.000000 328 +hardwar 0 51 2.995732 0.000000 350 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +digit 0 52 2.995732 0.000000 348 +approach 0 48 3.044522 0.000000 366 +format 0 48 3.044522 0.000000 356 +without 0 50 3.044522 0.000000 370 +california 0 46 3.091042 0.000000 388 +electron 0 47 3.091042 0.000000 379 +possibl 0 47 3.091042 0.000000 378 +effect 0 46 3.091042 0.000000 385 +video 1 44 3.135494 3.135494 405 +protocol 0 45 3.135494 0.000000 407 +describ 0 45 3.135494 0.000000 400 +anoth 0 45 3.135494 0.000000 408 +made 0 44 3.135494 0.000000 398 +third 0 43 3.178054 0.000000 412 +around 0 43 3.178054 0.000000 415 +review 0 42 3.218876 0.000000 425 +music 0 42 3.218876 0.000000 436 +fast 0 42 3.218876 0.000000 429 +continu 0 39 3.258097 0.000000 448 +must 0 40 3.258097 0.000000 442 +brian 0 38 3.295837 0.000000 466 +slide 0 38 3.295837 0.000000 467 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +represent 0 35 3.401197 0.000000 512 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +queri 0 33 3.433987 0.000000 524 +independ 0 32 3.465736 0.000000 548 +ad 0 32 3.465736 0.000000 544 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +common 0 30 3.555348 0.000000 574 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +load 0 28 3.610918 0.000000 601 +retriev 0 27 3.637586 0.000000 621 +manipul 0 27 3.637586 0.000000 624 +proc 0 26 3.688879 0.000000 649 +effort 0 26 3.688879 0.000000 652 +berkelei 0 26 3.688879 0.000000 657 +compar 0 26 3.688879 0.000000 648 +client 0 25 3.737670 0.000000 679 +toward 0 25 3.737670 0.000000 668 +store 0 24 3.761200 0.000000 693 +scalabl 0 24 3.761200 0.000000 705 +motion 0 24 3.761200 0.000000 699 +compress 0 23 3.806662 0.000000 719 +initi 0 23 3.806662 0.000000 717 +lead 0 23 3.806662 0.000000 718 +decis 0 23 3.806662 0.000000 728 +famili 0 23 3.806662 0.000000 735 +serv 0 22 3.850148 0.000000 758 +almost 0 22 3.850148 0.000000 742 +sent 0 22 3.850148 0.000000 763 +util 0 21 3.912023 0.000000 774 +programminglanguag 0 21 3.912023 0.000000 782 +thu 0 21 3.912023 0.000000 773 +smith 0 20 3.951244 0.000000 820 +qualiti 0 20 3.951244 0.000000 832 +mpeg 0 20 3.951244 0.000000 831 +reserv 0 20 3.951244 0.000000 808 +basi 0 20 3.951244 0.000000 828 +media 0 19 4.007333 0.000000 861 +thur 0 19 4.007333 0.000000 847 +appropri 0 18 4.060443 0.000000 883 +speed 0 18 4.060443 0.000000 911 +across 0 16 4.174387 0.000000 974 +advantag 0 16 4.174387 0.000000 987 +earli 0 16 4.174387 0.000000 968 +jose 0 16 4.174387 0.000000 976 +diego 0 16 4.174387 0.000000 992 +driven 0 15 4.248495 0.000000 1048 +audio 0 14 4.317488 0.000000 1094 +francisco 0 14 4.317488 0.000000 1095 +balanc 0 14 4.317488 0.000000 1112 +heterogen 0 14 4.317488 0.000000 1090 +camera 0 14 4.317488 0.000000 1115 +suit 0 13 4.382027 0.000000 1129 +jonathan 0 13 4.382027 0.000000 1174 +resolut 0 13 4.382027 0.000000 1172 +central 0 13 4.382027 0.000000 1160 +translat 0 13 4.382027 0.000000 1164 +canada 0 13 4.382027 0.000000 1158 +misc 0 13 4.382027 0.000000 1124 +verifi 0 12 4.465908 0.000000 1261 +infrastructur 0 12 4.465908 0.000000 1234 +promot 0 12 4.465908 0.000000 1235 +remov 0 12 4.465908 0.000000 1225 +amount 0 12 4.465908 0.000000 1208 +readi 0 12 4.465908 0.000000 1242 +tue 0 11 4.553877 0.000000 1308 +peter 0 11 4.553877 0.000000 1316 +stephen 0 11 4.553877 0.000000 1342 +player 0 11 4.553877 0.000000 1371 +interestsmi 0 10 4.653960 0.000000 1462 +operatingsystem 0 10 4.653960 0.000000 1401 +equal 0 10 4.653960 0.000000 1424 +transmiss 0 9 4.753590 0.000000 1588 +establish 0 9 4.753590 0.000000 1532 +charg 0 9 4.753590 0.000000 1582 +xerox 0 8 4.875197 0.000000 1725 +contrast 0 8 4.875197 0.000000 1637 +transport 0 8 4.875197 0.000000 1672 +rivl 0 8 4.875197 0.000000 1632 +theme 0 8 4.875197 0.000000 1707 +colloquium 0 8 4.875197 0.000000 1734 +lawrenc 0 7 5.010635 0.000000 1908 +usabl 0 7 5.010635 0.000000 1810 +suffici 0 7 5.010635 0.000000 1897 +pronounc 0 7 5.010635 0.000000 1918 +prioriti 0 7 5.010635 0.000000 1792 +supportfor 0 7 5.010635 0.000000 1854 +invest 0 6 5.164786 0.000000 2153 +jpeg 0 6 5.164786 0.000000 2053 +patel 0 6 5.164786 0.000000 2154 +simultan 0 6 5.164786 0.000000 2155 +quickli 0 6 5.164786 0.000000 2000 +toronto 0 6 5.164786 0.000000 2156 +spie 0 6 5.164786 0.000000 2119 +internationalconfer 0 6 5.164786 0.000000 2051 +row 0 5 5.347108 0.000000 2330 +cyclic 0 5 5.347108 0.000000 2383 +adopt 0 5 5.347108 0.000000 2467 +consum 0 5 5.347108 0.000000 2334 +computerarchitectur 0 5 5.347108 0.000000 2290 +webster 0 5 5.347108 0.000000 2468 +minnesota 0 5 5.347108 0.000000 2469 +ofworkst 0 4 5.568345 0.000000 2679 +publicationsresearch 0 4 5.568345 0.000000 2876 +isthat 0 4 5.568345 0.000000 2723 +hypothesi 0 4 5.568345 0.000000 2650 +poorli 0 4 5.568345 0.000000 2781 +commonli 0 4 5.568345 0.000000 2877 +fold 0 4 5.568345 0.000000 2615 +swartz 0 4 5.568345 0.000000 2878 +zeno 0 3 5.857933 0.000000 3580 +networkprotocol 0 3 5.857933 0.000000 3285 +thetim 0 3 5.857933 0.000000 3581 +magnitud 0 3 5.857933 0.000000 3582 +rival 0 3 5.857933 0.000000 3583 +quicktim 0 3 5.857933 0.000000 3493 +anaheim 0 3 5.857933 0.000000 3271 +intereststeachingselect 0 2 6.263398 0.000000 4924 +andprocess 0 2 6.263398 0.000000 4925 +withlarg 0 2 6.263398 0.000000 4926 +needto 0 2 6.263398 0.000000 4927 +thecommun 0 2 6.263398 0.000000 4928 +builton 0 2 6.263398 0.000000 4094 +availableonlin 0 2 6.263398 0.000000 4929 +aredevelop 0 2 6.263398 0.000000 4930 +todramat 0 2 6.263398 0.000000 4250 +animplement 0 2 6.263398 0.000000 4931 +insoftwar 0 2 6.263398 0.000000 4932 +asif 0 2 6.263398 0.000000 4933 +ghia 0 2 6.263398 0.000000 4934 +chamberlin 0 2 6.263398 0.000000 4745 +hum 0 2 6.263398 0.000000 4935 +sanfrancisco 0 2 6.263398 0.000000 4339 +decod 0 2 6.263398 0.000000 4936 +fileserv 0 1 6.957497 0.000000 9659 +playback 0 1 6.957497 0.000000 9660 +decompress 0 1 6.957497 0.000000 9661 +transcod 0 1 6.957497 0.000000 9662 +ketan 0 1 6.957497 0.000000 9663 +bsmith 0 1 6.957497 0.000000 9664 +talksmisc 0 1 6.957497 0.000000 9665 +linksresearch 0 1 6.957497 0.000000 9666 +ourcomput 0 1 6.957497 0.000000 9667 +commercialand 0 1 6.957497 0.000000 9668 +aredesign 0 1 6.957497 0.000000 9669 +premis 0 1 6.957497 0.000000 9670 +infrastructurei 0 1 6.957497 0.000000 9671 +andappl 0 1 6.957497 0.000000 9672 +workingsystem 0 1 6.957497 0.000000 9673 +zenodistribut 0 1 6.957497 0.000000 9674 +anethernet 0 1 6.957497 0.000000 9675 +serverof 0 1 6.957497 0.000000 9676 +videostor 0 1 6.957497 0.000000 9677 +severalserv 0 1 6.957497 0.000000 9678 +effortdeliveri 0 1 6.957497 0.000000 9679 +resourcereserv 0 1 6.957497 0.000000 9680 +communicationinfrastructur 0 1 6.957497 0.000000 9681 +forbandwidth 0 1 6.957497 0.000000 9682 +networkenviron 0 1 6.957497 0.000000 9683 +accessiblebi 0 1 6.957497 0.000000 9684 +latterenviron 0 1 6.957497 0.000000 9685 +datagram 0 1 6.957497 0.000000 9686 +audioand 0 1 6.957497 0.000000 9687 +metropolitan 0 1 6.957497 0.000000 9688 +andwid 0 1 6.957497 0.000000 9689 +todeliv 0 1 6.957497 0.000000 9690 +compressedrepresent 0 1 6.957497 0.000000 9691 +reducesth 0 1 6.957497 0.000000 9692 +indicatesthat 0 1 6.957497 0.000000 9693 +fasterthan 0 1 6.957497 0.000000 9694 +currentlyextend 0 1 6.957497 0.000000 9695 +onecompress 0 1 6.957497 0.000000 9696 +compresseddomain 0 1 6.957497 0.000000 9697 +simplifyexperiment 0 1 6.957497 0.000000 9698 +calledrivl 0 1 6.957497 0.000000 9699 +allowsvideo 0 1 6.957497 0.000000 9700 +resolutionand 0 1 6.957497 0.000000 9701 +whatpostscript 0 1 6.957497 0.000000 9702 +resolutionindepend 0 1 6.957497 0.000000 9703 +sameprogram 0 1 6.957497 0.000000 9704 +whileedit 0 1 6.957497 0.000000 9705 +qualityfinish 0 1 6.957497 0.000000 9706 +bepreview 0 1 6.957497 0.000000 9707 +dpiprint 0 1 6.957497 0.000000 9708 +onvideo 0 1 6.957497 0.000000 9709 +videous 0 1 6.957497 0.000000 9710 +teachingat 0 1 6.957497 0.000000 9711 +logan 0 1 6.957497 0.000000 9712 +ontario 0 1 6.957497 0.000000 9713 +annett 0 1 6.957497 0.000000 9714 +hanna 0 1 6.957497 0.000000 9715 +mmcn 0 1 6.957497 0.000000 9716 +documentationth 0 1 6.957497 0.000000 9717 +priceweb 0 1 6.957497 0.000000 9718 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..7a6a3f7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,280 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +studi 0 120 2.079442 0.000000 91 +provid 0 121 2.079442 0.000000 94 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +final 0 116 2.197225 0.000000 108 +intern 0 108 2.197225 0.000000 128 +user 0 104 2.302585 0.000000 137 +associ 0 93 2.397895 0.000000 151 +build 0 85 2.484907 0.000000 184 +wide 0 84 2.484907 0.000000 185 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +control 0 82 2.484907 0.000000 164 +method 0 80 2.564949 0.000000 213 +decemb 0 80 2.564949 0.000000 215 +state 0 76 2.564949 0.000000 207 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +line 0 75 2.639057 0.000000 231 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +write 0 72 2.639057 0.000000 222 +symposium 0 72 2.639057 0.000000 238 +meet 0 72 2.639057 0.000000 229 +appli 0 71 2.639057 0.000000 226 +differ 0 66 2.708050 0.000000 253 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +descript 0 64 2.772589 0.000000 271 +experi 0 64 2.772589 0.000000 283 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +content 0 59 2.833213 0.000000 302 +major 0 56 2.890372 0.000000 315 +explor 0 58 2.890372 0.000000 324 +three 0 54 2.944439 0.000000 330 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +effect 0 46 3.091042 0.000000 385 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +mark 0 44 3.135494 0.000000 403 +algebra 0 45 3.135494 0.000000 394 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +mechan 0 43 3.178054 0.000000 416 +futur 0 41 3.218876 0.000000 427 +york 0 41 3.218876 0.000000 435 +past 0 42 3.218876 0.000000 428 +review 0 42 3.218876 0.000000 425 +theoret 0 39 3.258097 0.000000 446 +continu 0 39 3.258097 0.000000 448 +annual 0 40 3.258097 0.000000 458 +paul 0 38 3.295837 0.000000 471 +electr 0 38 3.295837 0.000000 461 +formal 0 37 3.332205 0.000000 478 +connect 0 37 3.332205 0.000000 485 +hand 0 37 3.332205 0.000000 475 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +soon 0 36 3.367296 0.000000 494 +committe 0 34 3.401197 0.000000 522 +transform 0 32 3.465736 0.000000 542 +collabor 0 32 3.465736 0.000000 543 +richard 0 31 3.496508 0.000000 559 +robert 0 30 3.555348 0.000000 567 +power 0 30 3.555348 0.000000 573 +compon 0 30 3.555348 0.000000 570 +domain 0 30 3.555348 0.000000 564 +chair 0 29 3.583519 0.000000 596 +built 0 29 3.583519 0.000000 592 +hope 0 28 3.610918 0.000000 610 +packag 0 28 3.610918 0.000000 614 +symbol 0 27 3.637586 0.000000 620 +great 0 27 3.637586 0.000000 626 +american 0 27 3.637586 0.000000 634 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +effort 0 26 3.688879 0.000000 652 +fundament 0 25 3.737670 0.000000 661 +wai 0 25 3.737670 0.000000 662 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +proof 0 23 3.806662 0.000000 720 +togeth 0 23 3.806662 0.000000 714 +varieti 0 22 3.850148 0.000000 740 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +william 0 22 3.850148 0.000000 765 +deal 0 22 3.850148 0.000000 736 +inth 0 22 3.850148 0.000000 741 +theorem 0 21 3.912023 0.000000 786 +divis 0 21 3.912023 0.000000 803 +entir 0 20 3.951244 0.000000 811 +synthesi 0 20 3.951244 0.000000 834 +verif 0 20 3.951244 0.000000 826 +expert 0 20 3.951244 0.000000 833 +facil 0 20 3.951244 0.000000 814 +prove 0 19 4.007333 0.000000 848 +definit 0 19 4.007333 0.000000 864 +north 0 19 4.007333 0.000000 873 +minim 0 18 4.060443 0.000000 887 +explan 0 16 4.174387 0.000000 985 +stream 0 15 4.248495 0.000000 1015 +contribut 0 15 4.248495 0.000000 1021 +style 0 15 4.248495 0.000000 1036 +consider 0 14 4.317488 0.000000 1076 +incomput 0 14 4.317488 0.000000 1096 +near 0 14 4.317488 0.000000 1091 +circuit 0 13 4.382027 0.000000 1131 +joint 0 13 4.382027 0.000000 1130 +weak 0 13 4.382027 0.000000 1159 +canada 0 13 4.382027 0.000000 1158 +captur 0 12 4.465908 0.000000 1232 +franc 0 12 4.465908 0.000000 1276 +philadelphia 0 12 4.465908 0.000000 1244 +israel 0 11 4.553877 0.000000 1366 +excit 0 11 4.553877 0.000000 1329 +refin 0 11 4.553877 0.000000 1363 +nuprl 0 10 4.653960 0.000000 1402 +modular 0 10 4.653960 0.000000 1392 +devis 0 10 4.653960 0.000000 1451 +suitabl 0 9 4.753590 0.000000 1486 +colloquium 0 8 4.875197 0.000000 1734 +pennsylvania 0 7 5.010635 0.000000 1932 +beyond 0 7 5.010635 0.000000 1834 +feasibl 0 6 5.164786 0.000000 2157 +handbook 0 6 5.164786 0.000000 2061 +oxford 0 6 5.164786 0.000000 2121 +recruit 0 6 5.164786 0.000000 2145 +pari 0 6 5.164786 0.000000 2158 +indiana 0 6 5.164786 0.000000 2057 +allen 0 5 5.347108 0.000000 2470 +eduph 0 5 5.347108 0.000000 2449 +dougla 0 5 5.347108 0.000000 2471 +begun 0 5 5.347108 0.000000 2386 +activitieseditor 0 5 5.347108 0.000000 2454 +weyl 0 4 5.568345 0.000000 2854 +zippel 0 4 5.568345 0.000000 2879 +notr 0 4 5.568345 0.000000 2880 +dame 0 4 5.568345 0.000000 2881 +stuart 0 3 5.857933 0.000000 3584 +how 0 3 5.857933 0.000000 3289 +gri 0 3 5.857933 0.000000 3569 +theoremprov 0 3 5.857933 0.000000 3298 +moreov 0 3 5.857933 0.000000 3200 +predecessor 0 3 5.857933 0.000000 3585 +jackson 0 3 5.857933 0.000000 3586 +boolean 0 3 5.857933 0.000000 3202 +nato 0 3 5.857933 0.000000 3587 +engag 0 2 6.263398 0.000000 4937 +ventur 0 2 6.263398 0.000000 4938 +polya 0 2 6.263398 0.000000 4939 +programmingand 0 2 6.263398 0.000000 4940 +theform 0 2 6.263398 0.000000 4245 +aitken 0 2 6.263398 0.000000 4941 +possibleto 0 2 6.263398 0.000000 4942 +aprogram 0 2 6.263398 0.000000 4943 +thiswil 0 2 6.263398 0.000000 4944 +activitieschair 0 2 6.263398 0.000000 4894 +anniversari 0 2 6.263398 0.000000 4945 +celebr 0 2 6.263398 0.000000 4946 +buffalo 0 2 6.263398 0.000000 4947 +bensoussan 0 2 6.263398 0.000000 4303 +andmathemat 0 2 6.263398 0.000000 4948 +manfr 0 2 6.263398 0.000000 4949 +leeser 0 1 6.957497 0.000000 9719 +eaton 0 1 6.957497 0.000000 9720 +computationeditor 0 1 6.957497 0.000000 9721 +betweencomput 0 1 6.957497 0.000000 9722 +constablerobert 0 1 6.957497 0.000000 9723 +constabledepart 0 1 6.957497 0.000000 9724 +professorrc 0 1 6.957497 0.000000 9725 +researchw 0 1 6.957497 0.000000 9726 +providemechan 0 1 6.957497 0.000000 9727 +implementedthre 0 1 6.957497 0.000000 9728 +lispprogram 0 1 6.957497 0.000000 9729 +sucha 0 1 6.957497 0.000000 9730 +canexpress 0 1 6.957497 0.000000 9731 +asmetalevel 0 1 6.957497 0.000000 9732 +canevalu 0 1 6.957497 0.000000 9733 +nuprli 0 1 6.957497 0.000000 9734 +fomal 0 1 6.957497 0.000000 9735 +iscal 0 1 6.957497 0.000000 9736 +termeditor 0 1 6.957497 0.000000 9737 +itsintern 0 1 6.957497 0.000000 9738 +hedefinit 0 1 6.957497 0.000000 9739 +inconstruct 0 1 6.957497 0.000000 9740 +mechanismha 0 1 6.957497 0.000000 9741 +rebuilt 0 1 6.957497 0.000000 9742 +thework 0 1 6.957497 0.000000 9743 +isan 0 1 6.957497 0.000000 9744 +builtprincip 0 1 6.957497 0.000000 9745 +internaldescript 0 1 6.957497 0.000000 9746 +withmiriam 0 1 6.957497 0.000000 9747 +davidgri 0 1 6.957497 0.000000 9748 +richardzippel 0 1 6.957497 0.000000 9749 +withless 0 1 6.957497 0.000000 9750 +aagard 0 1 6.957497 0.000000 9751 +thecorrect 0 1 6.957497 0.000000 9752 +bedrocsystem 0 1 6.957497 0.000000 9753 +widelyus 0 1 6.957497 0.000000 9754 +efforttaught 0 1 6.957497 0.000000 9755 +themann 0 1 6.957497 0.000000 9756 +programmingprocess 0 1 6.957497 0.000000 9757 +givn 0 1 6.957497 0.000000 9758 +ofpolya 0 1 6.957497 0.000000 9759 +tryingto 0 1 6.957497 0.000000 9760 +thepolya 0 1 6.957497 0.000000 9761 +conal 0 1 6.957497 0.000000 9762 +mannion 0 1 6.957497 0.000000 9763 +ofus 0 1 6.957497 0.000000 9764 +discussingproblem 0 1 6.957497 0.000000 9765 +ssymbol 0 1 6.957497 0.000000 9766 +computingsoftwar 0 1 6.957497 0.000000 9767 +isbuild 0 1 6.957497 0.000000 9768 +presseditor 0 1 6.957497 0.000000 9769 +pressgener 0 1 6.957497 0.000000 9770 +licsprogram 0 1 6.957497 0.000000 9771 +jumelageprogram 0 1 6.957497 0.000000 9772 +softwarerefere 0 1 6.957497 0.000000 9773 +nserc 0 1 6.957497 0.000000 9774 +scienceunivers 0 1 6.957497 0.000000 9775 +committeecomput 0 1 6.957497 0.000000 9776 +committeeprovost 0 1 6.957497 0.000000 9777 +mathematicslecturesform 0 1 6.957497 0.000000 9778 +inria 0 1 6.957497 0.000000 9779 +bengurion 0 1 6.957497 0.000000 9780 +sheva 0 1 6.957497 0.000000 9781 +aviv 0 1 6.957497 0.000000 9782 +metaprogram 0 1 6.957497 0.000000 9783 +engineeringworkshop 0 1 6.957497 0.000000 9784 +publicationsform 0 1 6.957497 0.000000 9785 +tendenc 0 1 6.957497 0.000000 9786 +verju 0 1 6.957497 0.000000 9787 +metalevel 0 1 6.957497 0.000000 9788 +broi 0 1 6.957497 0.000000 9789 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..6c219357 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +analysi 0 124 2.079442 0.000000 98 +find 0 111 2.197225 0.000000 111 +follow 0 92 2.397895 0.000000 143 +comment 0 93 2.397895 0.000000 146 +upson 0 71 2.639057 0.000000 218 +simul 0 66 2.708050 0.000000 255 +ithaca 0 65 2.772589 0.000000 294 +numer 0 49 3.044522 0.000000 369 +correct 0 38 3.295837 0.000000 462 +adam 1 17 4.110874 4.110874 934 +sept 0 17 4.110874 0.000000 952 +incomput 0 14 4.317488 0.000000 1096 +athlet 0 7 5.010635 0.000000 1933 +atcornel 0 6 5.164786 0.000000 2131 +florenc 1 2 6.263398 6.263398 4950 +aflorenc 0 1 6.957497 0.000000 9790 +professionalinterest 0 1 6.957497 0.000000 9791 +academicsresearchworkinterest 0 1 6.957497 0.000000 9792 +mewith 0 1 6.957497 0.000000 9793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..c99c9444 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +check 0 115 2.197225 0.000000 118 +second 0 81 2.484907 0.000000 166 +solv 0 73 2.639057 0.000000 234 +ithaca 0 65 2.772589 0.000000 294 +suggest 0 53 2.944439 0.000000 331 +approach 0 48 3.044522 0.000000 366 +detect 0 26 3.688879 0.000000 646 +failur 0 18 4.060443 0.000000 898 +hybrid 0 15 4.248495 0.000000 1057 +warn 0 14 4.317488 0.000000 1068 +tour 0 11 4.553877 0.000000 1307 +perman 0 11 4.553877 0.000000 1372 +marco 0 4 5.568345 0.000000 2589 +aguilera 1 2 6.263398 6.263398 4052 +kawazo 0 1 6.957497 0.000000 9794 +algorithmsrandom 0 1 6.957497 0.000000 9795 +consensusgo 0 1 6.957497 0.000000 9796 +brazil 0 1 6.957497 0.000000 9797 +constructionmarco 0 1 6.957497 0.000000 9798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..70dc2287 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +requir 0 81 2.484907 0.000000 167 +stuff 0 87 2.484907 0.000000 171 +upson 0 71 2.639057 0.000000 218 +new 1 64 2.772589 2.772589 262 +ithaca 0 65 2.772589 0.000000 294 +get 0 46 3.091042 0.000000 380 +chines 1 29 3.583519 3.583519 595 +annot 0 21 3.912023 0.000000 775 +taiwan 0 16 4.174387 0.000000 1006 +hong 1 14 4.317488 4.317488 1105 +essenti 0 13 4.382027 0.000000 1137 +usaoffic 0 6 5.164786 0.000000 2159 +corba 0 5 5.347108 0.000000 2320 +alfr 1 4 5.568345 5.568345 2882 +sinanet 0 4 5.568345 0.000000 2883 +worthwhil 0 2 6.263398 0.000000 4951 +dizzi 0 1 6.957497 0.000000 9799 +nandonet 0 1 6.957497 0.000000 9800 +sunworld 0 1 6.957497 0.000000 9801 +javaworldcours 0 1 6.957497 0.000000 9802 +bibliographyc 0 1 6.957497 0.000000 9803 +reportalfr 0 1 6.957497 0.000000 9804 +ahong 0 1 6.957497 0.000000 9805 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..ec9de517 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +academ 0 82 2.484907 0.000000 178 +orient 0 80 2.564949 0.000000 205 +resum 0 79 2.564949 0.000000 217 +html 0 75 2.639057 0.000000 235 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +automata 0 13 4.382027 0.000000 1135 +vineet 0 8 4.875197 0.000000 1639 +capac 0 8 4.875197 0.000000 1740 +engg 0 4 5.568345 0.000000 2884 +ahuja 0 3 5.857933 0.000000 3494 +coursework 0 3 5.857933 0.000000 3588 +hasbrouck 0 2 6.263398 0.000000 4952 +pagevineet 0 1 6.957497 0.000000 9806 +ahujam 0 1 6.957497 0.000000 9807 +apt 0 1 6.957497 0.000000 9808 +reportfal 0 1 6.957497 0.000000 9809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..27e98809 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +address 0 170 1.791759 0.000000 62 +construct 0 139 1.945910 0.000000 82 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +pair 0 9 4.753590 0.000000 1503 +ching 1 1 6.957497 6.957497 9810 +chinglan 0 1 6.957497 0.000000 9811 +edumast 0 1 6.957497 0.000000 9812 +beau 0 1 6.957497 0.000000 9813 +seneca 0 1 6.957497 0.000000 9814 +examplegraph 0 1 6.957497 0.000000 9815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..8e96dbbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +report 0 131 2.079442 0.000000 92 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +technic 0 100 2.302585 0.000000 140 +commun 0 95 2.397895 0.000000 157 +search 0 95 2.397895 0.000000 155 +larg 0 82 2.484907 0.000000 168 +wide 0 84 2.484907 0.000000 185 +orient 0 80 2.564949 0.000000 205 +state 0 76 2.564949 0.000000 207 +new 0 64 2.772589 0.000000 262 +ithaca 0 65 2.772589 0.000000 294 +direct 0 57 2.890372 0.000000 316 +directori 0 45 3.135494 0.000000 396 +music 0 42 3.218876 0.000000 436 +staff 0 36 3.367296 0.000000 490 +within 0 33 3.433987 0.000000 525 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +scale 0 28 3.610918 0.000000 613 +seri 0 24 3.761200 0.000000 708 +lead 0 23 3.806662 0.000000 718 +lyco 0 19 4.007333 0.000000 871 +event 0 18 4.060443 0.000000 896 +condit 0 16 4.174387 0.000000 975 +brief 0 16 4.174387 0.000000 1001 +horu 0 14 4.317488 0.000000 1116 +replic 0 12 4.465908 0.000000 1231 +reness 0 11 4.553877 0.000000 1333 +birman 0 9 4.753590 0.000000 1531 +friedman 0 7 5.010635 0.000000 1886 +gopher 0 6 5.164786 0.000000 1982 +broadcast 0 5 5.347108 0.000000 2453 +vaysburd 0 4 5.568345 0.000000 2846 +cuinfo 0 4 5.568345 0.000000 2626 +androbbert 0 2 6.263398 0.000000 4953 +partition 0 2 6.263398 0.000000 4954 +lausann 0 2 6.263398 0.000000 4955 +hebrew 0 2 6.263398 0.000000 4913 +transi 0 2 6.263398 0.000000 4861 +alexei 0 1 6.957497 0.000000 9816 +pagealexei 0 1 6.957497 0.000000 9817 +vaysburdalexei 0 1 6.957497 0.000000 9818 +andobject 0 1 6.957497 0.000000 9819 +ecol 0 1 6.957497 0.000000 9820 +polytechniqu 0 1 6.957497 0.000000 9821 +federal 0 1 6.957497 0.000000 9822 +cornellcornel 0 1 6.957497 0.000000 9823 +directorycornel 0 1 6.957497 0.000000 9824 +directorycours 0 1 6.957497 0.000000 9825 +examscornel 0 1 6.957497 0.000000 9826 +calendarcornel 0 1 6.957497 0.000000 9827 +musicbailei 0 1 6.957497 0.000000 9828 +concertscornel 0 1 6.957497 0.000000 9829 +ithacaworld 0 1 6.957497 0.000000 9830 +odessa 0 1 6.957497 0.000000 9831 +odessaweb 0 1 6.957497 0.000000 9832 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..e15393fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +environ 0 84 2.484907 0.000000 177 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +second 0 81 2.484907 0.000000 166 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +involv 0 71 2.639057 0.000000 227 +simul 1 66 2.708050 2.708050 255 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +descript 0 64 2.772589 0.000000 271 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +written 0 63 2.772589 0.000000 278 +content 0 59 2.833213 0.000000 302 +processor 0 54 2.944439 0.000000 335 +sampl 0 53 2.944439 0.000000 339 +give 0 50 3.044522 0.000000 359 +california 0 46 3.091042 0.000000 388 +done 0 47 3.091042 0.000000 381 +better 0 45 3.135494 0.000000 401 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +might 0 41 3.218876 0.000000 426 +past 0 42 3.218876 0.000000 428 +futur 0 41 3.218876 0.000000 427 +game 0 36 3.367296 0.000000 498 +ofth 0 36 3.367296 0.000000 491 +kind 0 32 3.465736 0.000000 541 +propos 0 28 3.610918 0.000000 602 +hope 0 28 3.610918 0.000000 610 +though 0 27 3.637586 0.000000 622 +bookmark 0 26 3.688879 0.000000 639 +trace 0 25 3.737670 0.000000 677 +pattern 0 24 3.761200 0.000000 689 +store 0 24 3.761200 0.000000 693 +compress 1 23 3.806662 3.806662 719 +chip 1 21 3.912023 3.912023 770 +watch 0 21 3.912023 0.000000 789 +vlsi 0 21 3.912023 0.000000 795 +mpeg 0 20 3.951244 0.000000 831 +speed 0 18 4.060443 0.000000 911 +render 0 17 4.110874 0.000000 947 +track 0 15 4.248495 0.000000 1029 +stream 0 15 4.248495 0.000000 1015 +rate 0 15 4.248495 0.000000 1037 +transit 0 15 4.248495 0.000000 1046 +train 0 14 4.317488 0.000000 1066 +station 0 13 4.382027 0.000000 1157 +suit 0 13 4.382027 0.000000 1129 +avenu 0 12 4.465908 0.000000 1277 +realiti 0 12 4.465908 0.000000 1272 +death 0 10 4.653960 0.000000 1457 +earth 0 10 4.653960 0.000000 1463 +mountain 0 10 4.653960 0.000000 1456 +santa 0 10 4.653960 0.000000 1441 +wall 0 9 4.753590 0.000000 1553 +occur 0 9 4.753590 0.000000 1572 +routin 0 9 4.753590 0.000000 1549 +screen 0 9 4.753590 0.000000 1577 +ride 0 8 4.875197 0.000000 1741 +switch 0 8 4.875197 0.000000 1718 +accord 0 7 5.010635 0.000000 1826 +monei 0 7 5.010635 0.000000 1934 +stereo 0 7 5.010635 0.000000 1818 +microsystem 0 6 5.164786 0.000000 2160 +railroad 0 6 5.164786 0.000000 2161 +silicon 0 6 5.164786 0.000000 2076 +extern 0 6 5.164786 0.000000 2105 +byte 0 6 5.164786 0.000000 2108 +snapshot 0 5 5.347108 0.000000 2303 +hell 0 4 5.568345 0.000000 2885 +mess 0 4 5.568345 0.000000 2886 +engg 0 4 5.568345 0.000000 2884 +heaven 0 3 5.857933 0.000000 3589 +landscap 0 3 5.857933 0.000000 3525 +hindu 0 3 5.857933 0.000000 3590 +xlib 0 3 5.857933 0.000000 3204 +fernandez 0 3 5.857933 0.000000 3591 +cupertino 0 2 6.263398 0.000000 4956 +javasoft 0 2 6.263398 0.000000 4220 +pyramania 0 2 6.263398 0.000000 4957 +mytholog 0 2 6.263398 0.000000 4869 +clara 0 2 6.263398 0.000000 4958 +amith 0 2 6.263398 0.000000 4053 +yamasani 0 2 6.263398 0.000000 4062 +cscomput 0 2 6.263398 0.000000 4195 +cave 0 2 6.263398 0.000000 4959 +softwarei 0 2 6.263398 0.000000 4960 +nano 0 2 6.263398 0.000000 4961 +snap 0 2 6.263398 0.000000 4962 +thed 0 2 6.263398 0.000000 4963 +pal 0 2 6.263398 0.000000 4964 +joselui 0 2 6.263398 0.000000 4965 +yama 0 1 6.957497 0.000000 9833 +coursesvlsi 0 1 6.957497 0.000000 9834 +downto 0 1 6.957497 0.000000 9835 +imparti 0 1 6.957497 0.000000 9836 +amithyamasanim 0 1 6.957497 0.000000 9837 +yorki 0 1 6.957497 0.000000 9838 +garcia 0 1 6.957497 0.000000 9839 +mailstop 0 1 6.957497 0.000000 9840 +ucup 0 1 6.957497 0.000000 9841 +warburton 0 1 6.957497 0.000000 9842 +comi 0 1 6.957497 0.000000 9843 +currentlyemploi 0 1 6.957497 0.000000 9844 +workingin 0 1 6.957497 0.000000 9845 +javamedia 0 1 6.957497 0.000000 9846 +groupeducationfal 0 1 6.957497 0.000000 9847 +cssoftwar 0 1 6.957497 0.000000 9848 +csspring 0 1 6.957497 0.000000 9849 +csproject 0 1 6.957497 0.000000 9850 +railroadsystem 0 1 6.957497 0.000000 9851 +onyx 0 1 6.957497 0.000000 9852 +openinventord 0 1 6.957497 0.000000 9853 +through 0 1 6.957497 0.000000 9854 +documentimag 0 1 6.957497 0.000000 9855 +chipdevelop 0 1 6.957497 0.000000 9856 +basicallycompress 0 1 6.957497 0.000000 9857 +thisalgorithm 0 1 6.957497 0.000000 9858 +cachecam 0 1 6.957497 0.000000 9859 +inputstream 0 1 6.957497 0.000000 9860 +capableof 0 1 6.957497 0.000000 9861 +nowai 0 1 6.957497 0.000000 9862 +rivlproposalpresentationand 0 1 6.957497 0.000000 9863 +dthi 0 1 6.957497 0.000000 9864 +itswritten 0 1 6.957497 0.000000 9865 +parallelomania 0 1 6.957497 0.000000 9866 +resumehtmlpostscript 0 1 6.957497 0.000000 9867 +satyaprasad 0 1 6.957497 0.000000 9868 +avinashgupta 0 1 6.957497 0.000000 9869 +kartikh 0 1 6.957497 0.000000 9870 +kapadia 0 1 6.957497 0.000000 9871 +hrishikeshdixit 0 1 6.957497 0.000000 9872 +vineetahuja 0 1 6.957497 0.000000 9873 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..96eaff61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +homepag 0 93 2.397895 0.000000 148 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +real 0 93 2.397895 0.000000 144 +school 0 84 2.484907 0.000000 188 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +david 0 71 2.639057 0.000000 232 +multimedia 1 68 2.708050 2.708050 258 +order 0 69 2.708050 0.000000 249 +virtual 1 62 2.772589 2.772589 285 +street 0 63 2.772589 0.000000 293 +ithaca 0 65 2.772589 0.000000 294 +prof 0 64 2.772589 0.000000 273 +interact 0 62 2.772589 0.000000 270 +summer 0 56 2.890372 0.000000 311 +understand 0 47 3.091042 0.000000 384 +video 0 44 3.135494 0.000000 405 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +industri 0 38 3.295837 0.000000 464 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +independ 0 32 3.465736 0.000000 548 +photo 0 31 3.496508 0.000000 561 +common 0 30 3.555348 0.000000 574 +platform 0 29 3.583519 0.000000 591 +limit 0 29 3.583519 0.000000 585 +request 0 26 3.688879 0.000000 635 +annot 0 21 3.912023 0.000000 775 +smith 0 20 3.951244 0.000000 820 +toolkit 0 20 3.951244 0.000000 835 +kernel 0 20 3.951244 0.000000 825 +mellon 0 13 4.382027 0.000000 1179 +realiti 1 12 4.465908 4.465908 1272 +carnegi 0 12 4.465908 0.000000 1260 +total 0 10 4.653960 0.000000 1398 +patel 1 6 5.164786 5.164786 2154 +causal 0 6 5.164786 0.000000 2024 +east 0 5 5.347108 0.000000 2472 +corba 0 5 5.347108 0.000000 2320 +dale 0 4 5.568345 0.000000 2687 +zeno 0 3 5.857933 0.000000 3580 +cheriton 0 3 5.857933 0.000000 3259 +ankit 1 2 6.263398 6.263398 4966 +endpoint 0 2 6.263398 0.000000 4967 +broker 0 2 6.263398 0.000000 4968 +critiqu 0 2 6.263398 0.000000 4328 +apatel 0 1 6.957497 0.000000 9874 +galleria 0 1 6.957497 0.000000 9875 +chronologia 0 1 6.957497 0.000000 9876 +universityresumedepart 0 1 6.957497 0.000000 9877 +enrolledgradu 0 1 6.957497 0.000000 9878 +canvasd 0 1 6.957497 0.000000 9879 +conferencingmultimedia 0 1 6.957497 0.000000 9880 +assignmentsproject 0 1 6.957497 0.000000 9881 +netan 0 1 6.957497 0.000000 9882 +skeen 0 1 6.957497 0.000000 9883 +scienceworld 0 1 6.957497 0.000000 9884 +multimediamaharaja 0 1 6.957497 0.000000 9885 +sayajirao 0 1 6.957497 0.000000 9886 +academicsfriend 0 1 6.957497 0.000000 9887 +techoreli 0 1 6.957497 0.000000 9888 +limitedjob 0 1 6.957497 0.000000 9889 +profilelif 0 1 6.957497 0.000000 9890 +relianc 0 1 6.957497 0.000000 9891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..f7d481ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +languag 0 227 1.386294 0.000000 26 +databas 0 122 2.079442 0.000000 86 +softwareengin 0 6 5.164786 0.000000 2162 +ashish 0 5 5.347108 0.000000 2473 +sciencemast 0 2 6.263398 0.000000 4969 +jhaveriashish 0 1 6.957497 0.000000 9892 +jhaveridepart 0 1 6.957497 0.000000 9893 +engineeringresumehtmlpost 0 1 6.957497 0.000000 9894 +scriptcourseworkadvanc 0 1 6.957497 0.000000 9895 +systemscsmultimedia 0 1 6.957497 0.000000 9896 +systemscsengin 0 1 6.957497 0.000000 9897 +networkscsprogram 0 1 6.957497 0.000000 9898 +csashish 0 1 6.957497 0.000000 9899 +jhaveri 0 1 6.957497 0.000000 9900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..4fe86875 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +server 0 76 2.564949 0.000000 204 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +guid 0 63 2.772589 0.000000 267 +life 0 50 3.044522 0.000000 375 +eduoffic 0 33 3.433987 0.000000 531 +transform 0 32 3.465736 0.000000 542 +famili 0 23 3.806662 0.000000 735 +record 0 18 4.060443 0.000000 890 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +danc 0 12 4.465908 0.000000 1278 +jean 0 10 4.653960 0.000000 1440 +atcornel 0 6 5.164786 0.000000 2131 +swing 1 4 5.568345 5.568345 2887 +album 0 4 5.568345 0.000000 2888 +dutch 0 3 5.857933 0.000000 3592 +berg 0 2 6.263398 0.000000 4970 +aswin 1 1 6.957497 6.957497 9901 +skyacr 0 1 6.957497 0.000000 9902 +systemmi 0 1 6.957497 0.000000 9903 +annek 0 1 6.957497 0.000000 9904 +deejay 0 1 6.957497 0.000000 9905 +isdn 0 1 6.957497 0.000000 9906 +hop 0 1 6.957497 0.000000 9907 +nederlands 0 1 6.957497 0.000000 9908 +clubi 0 1 6.957497 0.000000 9909 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..49aca0fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,123 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +make 0 111 2.197225 0.000000 120 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +interfac 0 79 2.564949 0.000000 209 +appli 0 71 2.639057 0.000000 226 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +virtual 0 62 2.772589 0.000000 285 +guid 0 63 2.772589 0.000000 267 +point 0 58 2.890372 0.000000 319 +space 0 57 2.890372 0.000000 310 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +still 0 50 3.044522 0.000000 362 +friend 0 48 3.044522 0.000000 376 +featur 0 46 3.091042 0.000000 386 +effect 0 46 3.091042 0.000000 385 +even 0 45 3.135494 0.000000 393 +protocol 0 45 3.135494 0.000000 407 +multipl 0 39 3.258097 0.000000 453 +mean 0 37 3.332205 0.000000 477 +game 0 36 3.367296 0.000000 498 +next 0 34 3.401197 0.000000 517 +either 0 35 3.401197 0.000000 506 +everi 0 34 3.401197 0.000000 519 +board 0 33 3.433987 0.000000 528 +abl 0 30 3.555348 0.000000 566 +full 0 28 3.610918 0.000000 615 +subject 0 26 3.688879 0.000000 647 +universityithaca 0 24 3.761200 0.000000 710 +magazin 0 24 3.761200 0.000000 704 +reach 0 24 3.761200 0.000000 688 +brows 0 23 3.806662 0.000000 726 +sciencecornel 0 22 3.850148 0.000000 768 +almost 0 22 3.850148 0.000000 742 +chip 0 21 3.912023 0.000000 770 +five 0 19 4.007333 0.000000 841 +sign 0 16 4.174387 0.000000 970 +piec 0 15 4.248495 0.000000 1020 +transit 0 15 4.248495 0.000000 1046 +stream 0 15 4.248495 0.000000 1015 +hierarch 0 15 4.248495 0.000000 1018 +scene 0 14 4.317488 0.000000 1114 +skill 0 12 4.465908 0.000000 1205 +player 0 11 4.553877 0.000000 1371 +earth 0 10 4.653960 0.000000 1463 +pair 0 9 4.753590 0.000000 1503 +rivl 0 8 4.875197 0.000000 1632 +pronounc 0 7 5.010635 0.000000 1918 +earn 0 7 5.010635 0.000000 1788 +hoca 0 5 5.347108 0.000000 2241 +hypothet 0 5 5.347108 0.000000 2474 +vertic 0 5 5.347108 0.000000 2270 +guestbook 0 5 5.347108 0.000000 2475 +engg 0 4 5.568345 0.000000 2884 +multitask 0 4 5.568345 0.000000 2803 +screenshot 0 4 5.568345 0.000000 2743 +avinash 0 3 5.857933 0.000000 3510 +win 0 3 5.857933 0.000000 3593 +ipng 0 2 6.263398 0.000000 4727 +resumemi 0 2 6.263398 0.000000 4971 +hodja 0 2 6.263398 0.000000 4972 +fledg 0 2 6.263398 0.000000 4973 +horizont 0 2 6.263398 0.000000 4117 +diagon 0 2 6.263398 0.000000 4974 +caveat 0 2 6.263398 0.000000 4975 +trap 0 1 6.957497 0.000000 9910 +welcomeavinash 0 1 6.957497 0.000000 9911 +guptam 0 1 6.957497 0.000000 9912 +streetcambridg 0 1 6.957497 0.000000 9913 +thecia 0 1 6.957497 0.000000 9914 +presentationpent 0 1 6.957497 0.000000 9915 +skillpent 0 1 6.957497 0.000000 9916 +oppon 0 1 6.957497 0.000000 9917 +gamedownload 0 1 6.957497 0.000000 9918 +ipvimpl 0 1 6.957497 0.000000 9919 +proposalprogress 0 1 6.957497 0.000000 9920 +reportsam 0 1 6.957497 0.000000 9921 +pageon 0 1 6.957497 0.000000 9922 +internethytelnetth 0 1 6.957497 0.000000 9923 +catalogeinet 0 1 6.957497 0.000000 9924 +galaxyplanet 0 1 6.957497 0.000000 9925 +pagejoel 0 1 6.957497 0.000000 9926 +indexyahoo 0 1 6.957497 0.000000 9927 +wwwwebcrawlerlycosmi 0 1 6.957497 0.000000 9928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..7a51005c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +peopl 1 96 2.302585 2.302585 132 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +chang 0 82 2.484907 0.000000 163 +school 0 84 2.484907 0.000000 188 +good 0 77 2.564949 0.000000 200 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +name 0 72 2.639057 0.000000 220 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +would 0 67 2.708050 0.000000 251 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +previou 0 62 2.772589 0.000000 290 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +share 0 59 2.833213 0.000000 304 +room 0 59 2.833213 0.000000 301 +summer 0 56 2.890372 0.000000 311 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +digit 0 52 2.995732 0.000000 348 +friend 0 48 3.044522 0.000000 376 +cool 0 49 3.044522 0.000000 374 +still 0 50 3.044522 0.000000 362 +electron 0 47 3.091042 0.000000 379 +favorit 0 44 3.135494 0.000000 410 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +keep 0 44 3.135494 0.000000 409 +algebra 0 45 3.135494 0.000000 394 +futur 0 41 3.218876 0.000000 427 +past 0 42 3.218876 0.000000 428 +linear 0 41 3.218876 0.000000 431 +live 0 40 3.258097 0.000000 451 +continu 0 39 3.258097 0.000000 448 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +hand 0 37 3.332205 0.000000 475 +feel 0 37 3.332205 0.000000 483 +soon 0 36 3.367296 0.000000 494 +least 0 35 3.401197 0.000000 516 +product 0 33 3.433987 0.000000 527 +taught 0 33 3.433987 0.000000 526 +kind 0 32 3.465736 0.000000 541 +concept 0 32 3.465736 0.000000 537 +independ 0 32 3.465736 0.000000 548 +taken 0 31 3.496508 0.000000 555 +computersci 0 30 3.555348 0.000000 562 +hard 0 30 3.555348 0.000000 563 +depend 0 29 3.583519 0.000000 583 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +framework 0 28 3.610918 0.000000 606 +actual 0 28 3.610918 0.000000 604 +campu 0 27 3.637586 0.000000 623 +session 0 26 3.688879 0.000000 643 +spent 0 25 3.737670 0.000000 676 +fundament 0 25 3.737670 0.000000 661 +greg 0 24 3.761200 0.000000 695 +frame 0 24 3.761200 0.000000 684 +brows 0 23 3.806662 0.000000 726 +finish 0 22 3.850148 0.000000 748 +dai 0 22 3.850148 0.000000 753 +hous 0 21 3.912023 0.000000 801 +half 0 21 3.912023 0.000000 776 +mpeg 0 20 3.951244 0.000000 831 +nice 0 20 3.951244 0.000000 809 +media 0 19 4.007333 0.000000 861 +offici 0 18 4.060443 0.000000 894 +coupl 0 17 4.110874 0.000000 939 +thought 0 17 4.110874 0.000000 945 +otherwis 0 17 4.110874 0.000000 922 +georg 0 16 4.174387 0.000000 994 +intel 0 16 4.174387 0.000000 1000 +goe 0 15 4.248495 0.000000 1044 +fortran 0 15 4.248495 0.000000 1027 +stream 0 15 4.248495 0.000000 1015 +trip 0 14 4.317488 0.000000 1113 +hopefulli 0 14 4.317488 0.000000 1071 +camera 0 14 4.317488 0.000000 1115 +decid 0 14 4.317488 0.000000 1075 +dave 0 14 4.317488 0.000000 1098 +jonathan 0 13 4.382027 0.000000 1174 +believ 0 13 4.382027 0.000000 1187 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +resolut 0 13 4.382027 0.000000 1172 +unfortun 0 13 4.382027 0.000000 1170 +pretti 0 13 4.382027 0.000000 1191 +went 0 12 4.465908 0.000000 1279 +pageif 0 12 4.465908 0.000000 1275 +lake 0 11 4.553877 0.000000 1373 +bandwidth 0 11 4.553877 0.000000 1365 +smart 0 11 4.553877 0.000000 1352 +perman 0 11 4.553877 0.000000 1372 +road 0 11 4.553877 0.000000 1374 +sentenc 0 10 4.653960 0.000000 1413 +town 0 10 4.653960 0.000000 1458 +packet 0 10 4.653960 0.000000 1415 +bring 0 10 4.653960 0.000000 1430 +forc 0 10 4.653960 0.000000 1384 +undergrad 0 9 4.753590 0.000000 1589 +jersei 0 9 4.753590 0.000000 1587 +introductori 0 9 4.753590 0.000000 1479 +trust 0 9 4.753590 0.000000 1583 +grew 0 8 4.875197 0.000000 1742 +mile 0 8 4.875197 0.000000 1743 +filter 0 8 4.875197 0.000000 1641 +rivl 0 8 4.875197 0.000000 1632 +encrypt 0 7 5.010635 0.000000 1835 +cornellunivers 0 7 5.010635 0.000000 1916 +portland 0 7 5.010635 0.000000 1878 +conveni 0 6 5.164786 0.000000 2088 +corp 0 6 5.164786 0.000000 2139 +lucki 0 6 5.164786 0.000000 2163 +oregon 0 5 5.347108 0.000000 2437 +ahead 0 5 5.347108 0.000000 2338 +clarif 0 5 5.347108 0.000000 2253 +fork 0 4 5.568345 0.000000 2801 +skin 0 4 5.568345 0.000000 2840 +cheap 0 4 5.568345 0.000000 2751 +ultra 0 4 5.568345 0.000000 2889 +height 0 4 5.568345 0.000000 2890 +gear 0 4 5.568345 0.000000 2891 +birth 0 3 5.857933 0.000000 3594 +greek 0 3 5.857933 0.000000 3595 +labor 0 3 5.857933 0.000000 3195 +weber 0 3 5.857933 0.000000 3156 +bright 0 3 5.857933 0.000000 3596 +pack 0 3 5.857933 0.000000 3597 +urg 0 3 5.857933 0.000000 3212 +sugata 0 2 6.263398 0.000000 4976 +dude 0 2 6.263398 0.000000 4977 +felt 0 2 6.263398 0.000000 4978 +fratern 0 2 6.263398 0.000000 4979 +border 0 2 6.263398 0.000000 4980 +mukhopadhyai 0 2 6.263398 0.000000 4981 +surfer 0 2 6.263398 0.000000 4982 +captain 0 2 6.263398 0.000000 4983 +barber 0 1 6.957497 0.000000 9929 +bulli 0 1 6.957497 0.000000 9930 +ponch 0 1 6.957497 0.000000 9931 +inde 0 1 6.957497 0.000000 9932 +bush 0 1 6.957497 0.000000 9933 +fleshpooooooooooooooch 0 1 6.957497 0.000000 9934 +inclin 0 1 6.957497 0.000000 9935 +callm 0 1 6.957497 0.000000 9936 +orpooch 0 1 6.957497 0.000000 9937 +guppi 0 1 6.957497 0.000000 9938 +mama 0 1 6.957497 0.000000 9939 +phin 0 1 6.957497 0.000000 9940 +attendedmontgomeri 0 1 6.957497 0.000000 9941 +collegetown 0 1 6.957497 0.000000 9942 +adjac 0 1 6.957497 0.000000 9943 +sublet 0 1 6.957497 0.000000 9944 +oncolleg 0 1 6.957497 0.000000 9945 +radiu 0 1 6.957497 0.000000 9946 +epsilon 0 1 6.957497 0.000000 9947 +cayuga 0 1 6.957497 0.000000 9948 +thefilt 0 1 6.957497 0.000000 9949 +ofc 0 1 6.957497 0.000000 9950 +intereststhi 0 1 6.957497 0.000000 9951 +rivlan 0 1 6.957497 0.000000 9952 +tracker 0 1 6.957497 0.000000 9953 +rivli 0 1 6.957497 0.000000 9954 +smpd 0 1 6.957497 0.000000 9955 +generatorfor 0 1 6.957497 0.000000 9956 +webar 0 1 6.957497 0.000000 9957 +buddi 0 1 6.957497 0.000000 9958 +resourceful 0 1 6.957497 0.000000 9959 +pipe 0 1 6.957497 0.000000 9960 +meanth 0 1 6.957497 0.000000 9961 +comrad 0 1 6.957497 0.000000 9962 +ofhi 0 1 6.957497 0.000000 9963 +swirl 0 1 6.957497 0.000000 9964 +nefari 0 1 6.957497 0.000000 9965 +toilet 0 1 6.957497 0.000000 9966 +mukhopadyai 0 1 6.957497 0.000000 9967 +bonei 0 1 6.957497 0.000000 9968 +magoo 0 1 6.957497 0.000000 9969 +fletop 0 1 6.957497 0.000000 9970 +bigro 0 1 6.957497 0.000000 9971 +koster 0 1 6.957497 0.000000 9972 +bot 0 1 6.957497 0.000000 9973 +tffl 0 1 6.957497 0.000000 9974 +pageuuencod 0 1 6.957497 0.000000 9975 +pagetar 0 1 6.957497 0.000000 9976 +zip 0 1 6.957497 0.000000 9977 +downloadsgraphicsbarb 0 1 6.957497 0.000000 9978 +gifponch 0 1 6.957497 0.000000 9979 +htmlres_htmlres_curemmittemmitt 0 1 6.957497 0.000000 9980 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..e0bfce8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +look 0 107 2.197225 0.000000 115 +final 0 116 2.197225 0.000000 108 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +homepag 0 93 2.397895 0.000000 148 +proceed 0 93 2.397895 0.000000 152 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +activ 0 84 2.484907 0.000000 182 +stuff 0 87 2.484907 0.000000 171 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +messag 0 76 2.564949 0.000000 212 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +interfac 0 79 2.564949 0.000000 209 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +addit 0 74 2.639057 0.000000 228 +david 0 71 2.639057 0.000000 232 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +ithaca 0 65 2.772589 0.000000 294 +locat 0 59 2.833213 0.000000 303 +back 0 60 2.833213 0.000000 297 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +februari 0 54 2.944439 0.000000 328 +advisor 0 51 2.995732 0.000000 355 +run 0 51 2.995732 0.000000 347 +cool 0 49 3.044522 0.000000 374 +archiv 0 49 3.044522 0.000000 364 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +fast 0 42 3.218876 0.000000 429 +review 0 42 3.218876 0.000000 425 +realli 0 40 3.258097 0.000000 444 +live 0 40 3.258097 0.000000 451 +especi 0 36 3.367296 0.000000 496 +singl 0 34 3.401197 0.000000 510 +collabor 0 32 3.465736 0.000000 543 +someth 0 31 3.496508 0.000000 554 +turn 0 29 3.583519 0.000000 586 +cluster 0 28 3.610918 0.000000 612 +hope 0 28 3.610918 0.000000 610 +team 0 27 3.637586 0.000000 625 +compar 0 26 3.688879 0.000000 648 +berkelei 0 26 3.688879 0.000000 657 +enabl 0 26 3.688879 0.000000 655 +known 0 24 3.761200 0.000000 702 +size 0 23 3.806662 0.000000 713 +indian 0 22 3.850148 0.000000 769 +love 0 21 3.912023 0.000000 804 +listen 0 18 4.060443 0.000000 907 +layer 0 17 4.110874 0.000000 926 +segment 0 17 4.110874 0.000000 931 +interconnect 0 17 4.110874 0.000000 937 +latenc 0 16 4.174387 0.000000 993 +photograph 0 15 4.248495 0.000000 1056 +micro 0 15 4.248495 0.000000 1031 +split 0 14 4.317488 0.000000 1078 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +avenu 0 12 4.465908 0.000000 1277 +went 0 12 4.465908 0.000000 1279 +philadelphia 0 12 4.465908 0.000000 1244 +scienceat 0 11 4.553877 0.000000 1375 +see 0 11 4.553877 0.000000 1337 +motiv 0 11 4.553877 0.000000 1346 +cook 0 10 4.653960 0.000000 1464 +werner 0 10 4.653960 0.000000 1385 +sosp 0 10 4.653960 0.000000 1416 +calvin 0 9 4.753590 0.000000 1518 +trust 0 9 4.753590 0.000000 1583 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +vineet 0 8 4.875197 0.000000 1639 +kanpur 0 8 4.875197 0.000000 1744 +realiz 0 8 4.875197 0.000000 1739 +gold 0 8 4.875197 0.000000 1745 +coast 0 8 4.875197 0.000000 1746 +vogel 0 8 4.875197 0.000000 1622 +centuri 0 7 5.010635 0.000000 1935 +happen 0 7 5.010635 0.000000 1790 +rock 0 6 5.164786 0.000000 2164 +dream 0 6 5.164786 0.000000 2165 +tri 0 6 5.164786 0.000000 2166 +south 0 6 5.164786 0.000000 2167 +goldstein 0 6 5.164786 0.000000 2168 +buch 0 5 5.347108 0.000000 2272 +truli 0 5 5.347108 0.000000 2476 +aim 0 5 5.347108 0.000000 2477 +culler 0 5 5.347108 0.000000 2381 +symp 0 5 5.347108 0.000000 2376 +australia 0 5 5.347108 0.000000 2478 +basu 0 4 5.568345 0.000000 2843 +thecornel 0 4 5.568345 0.000000 2892 +hobb 0 4 5.568345 0.000000 2893 +writer 0 4 5.568345 0.000000 2783 +cuinfo 0 4 5.568345 0.000000 2626 +ofworkst 0 4 5.568345 0.000000 2679 +withth 0 4 5.568345 0.000000 2805 +anindya 1 3 5.857933 5.857933 3535 +experienc 0 3 5.857933 0.000000 3203 +asian 0 3 5.857933 0.000000 3598 +mpp 0 3 5.857933 0.000000 3194 +schauser 0 3 5.857933 0.000000 3599 +avula 0 3 5.857933 0.000000 3600 +mugshot 0 2 6.263398 0.000000 4984 +goof 0 2 6.263398 0.000000 4985 +projectwith 0 2 6.263398 0.000000 4986 +thegreat 0 2 6.263398 0.000000 4987 +pelham 0 2 6.263398 0.000000 4988 +grenvil 0 2 6.263398 0.000000 4989 +wodehous 0 2 6.263398 0.000000 4990 +metallica 0 2 6.263398 0.000000 4991 +fanci 0 2 6.263398 0.000000 4992 +monti 0 2 6.263398 0.000000 4993 +python 0 2 6.263398 0.000000 4994 +beavi 0 2 6.263398 0.000000 4995 +meiko 0 2 6.263398 0.000000 4996 +untrust 0 2 6.263398 0.000000 4997 +seth 0 2 6.263398 0.000000 4998 +klau 0 2 6.263398 0.000000 4999 +veena 0 2 6.263398 0.000000 5000 +homepagelast 0 2 6.263398 0.000000 5001 +delawar 0 1 6.957497 0.000000 9981 +eduwhat 0 1 6.957497 0.000000 9982 +musicor 0 1 6.957497 0.000000 9983 +coollik 0 1 6.957497 0.000000 9984 +indiawho 0 1 6.957497 0.000000 9985 +hardpink 0 1 6.957497 0.000000 9986 +floydfanat 0 1 6.957497 0.000000 9987 +childhood 0 1 6.957497 0.000000 9988 +livelast 0 1 6.957497 0.000000 9989 +plum 0 1 6.957497 0.000000 9990 +unwash 0 1 6.957497 0.000000 9991 +attendedwoodstock 0 1 6.957497 0.000000 9992 +onlinewoodstock 0 1 6.957497 0.000000 9993 +woodstock 0 1 6.957497 0.000000 9994 +thesocc 0 1 6.957497 0.000000 9995 +worldcup 0 1 6.957497 0.000000 9996 +butunfortun 0 1 6.957497 0.000000 9997 +putsomerecip 0 1 6.957497 0.000000 9998 +connoisseurof 0 1 6.957497 0.000000 9999 +whiski 0 1 6.957497 0.000000 10000 +malt 0 1 6.957497 0.000000 10001 +cheer 0 1 6.957497 0.000000 10002 +buttheadoth 0 1 6.957497 0.000000 10003 +gopherand 0 1 6.957497 0.000000 10004 +projectwhich 0 1 6.957497 0.000000 10005 +acheiv 0 1 6.957497 0.000000 10006 +passinglay 0 1 6.957497 0.000000 10007 +thatshow 0 1 6.957497 0.000000 10008 +saturateth 0 1 6.957497 0.000000 10009 +fibr 0 1 6.957497 0.000000 10010 +specificationfor 0 1 6.957497 0.000000 10011 +processesboth 0 1 6.957497 0.000000 10012 +abridgedvers 0 1 6.957497 0.000000 10013 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..06272d74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +studi 0 120 2.079442 0.000000 91 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +view 0 70 2.708050 0.000000 254 +interact 0 62 2.772589 0.000000 270 +ithaca 0 65 2.772589 0.000000 294 +dept 0 64 2.772589 0.000000 291 +simpl 0 60 2.833213 0.000000 298 +overview 0 56 2.890372 0.000000 323 +processor 0 54 2.944439 0.000000 335 +local 0 55 2.944439 0.000000 334 +scientif 0 53 2.944439 0.000000 341 +advisor 0 51 2.995732 0.000000 355 +life 0 50 3.044522 0.000000 375 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +map 0 39 3.258097 0.000000 452 +tutori 0 39 3.258097 0.000000 437 +seminar 0 38 3.295837 0.000000 470 +global 0 34 3.401197 0.000000 520 +richard 0 31 3.496508 0.000000 559 +weather 0 28 3.610918 0.000000 618 +magazin 0 24 3.761200 0.000000 704 +famili 0 23 3.806662 0.000000 735 +geometri 0 22 3.850148 0.000000 752 +navig 0 21 3.912023 0.000000 796 +synthesi 0 20 3.951244 0.000000 834 +georg 0 16 4.174387 0.000000 994 +stock 0 16 4.174387 0.000000 1007 +massiv 0 15 4.248495 0.000000 1026 +incomput 0 14 4.317488 0.000000 1096 +francisco 0 14 4.317488 0.000000 1095 +levi 0 14 4.317488 0.000000 1093 +forth 0 13 4.382027 0.000000 1186 +galleri 0 13 4.382027 0.000000 1192 +insid 0 12 4.465908 0.000000 1262 +newspap 0 12 4.465908 0.000000 1280 +neat 0 12 4.465908 0.000000 1263 +congress 0 9 4.753590 0.000000 1592 +forget 0 8 4.875197 0.000000 1712 +wire 0 8 4.875197 0.000000 1747 +microsystem 0 6 5.164786 0.000000 2160 +peek 0 6 5.164786 0.000000 2169 +frog 0 5 5.347108 0.000000 2479 +hallithaca 0 4 5.568345 0.000000 2894 +zippel 0 4 5.568345 0.000000 2879 +heard 0 4 5.568345 0.000000 2895 +aboutth 0 4 5.568345 0.000000 2720 +wander 0 4 5.568345 0.000000 2896 +educornel 0 3 5.857933 0.000000 3601 +universitydept 0 3 5.857933 0.000000 3602 +galaxi 0 3 5.857933 0.000000 3603 +underground 0 3 5.857933 0.000000 3604 +spider 0 3 5.857933 0.000000 3605 +intertext 0 2 6.263398 0.000000 5002 +solar 0 2 6.263398 0.000000 5003 +martial 0 2 6.263398 0.000000 5004 +jpop 0 1 6.957497 0.000000 10014 +homepageben 0 1 6.957497 0.000000 10015 +haogradu 0 1 6.957497 0.000000 10016 +studentbhao 0 1 6.957497 0.000000 10017 +flea 0 1 6.957497 0.000000 10018 +taylorwhen 0 1 6.957497 0.000000 10019 +itsgorg 0 1 6.957497 0.000000 10020 +cornellwhat 0 1 6.957497 0.000000 10021 +dissectionmagazin 0 1 6.957497 0.000000 10022 +magazinea 0 1 6.957497 0.000000 10023 +shoemak 0 1 6.957497 0.000000 10024 +weblouvr 0 1 6.957497 0.000000 10025 +xmorphia 0 1 6.957497 0.000000 10026 +kaleidospac 0 1 6.957497 0.000000 10027 +bonsai 0 1 6.957497 0.000000 10028 +seiyuu 0 1 6.957497 0.000000 10029 +archivenetwork 0 1 6.957497 0.000000 10030 +edgelibrari 0 1 6.957497 0.000000 10031 +infonih 0 1 6.957497 0.000000 10032 +courseth 0 1 6.957497 0.000000 10033 +guidecern 0 1 6.957497 0.000000 10034 +bhao 0 1 6.957497 0.000000 10035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..a918f0e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +network 1 168 1.791759 1.791759 61 +ithaca 1 65 2.772589 2.772589 294 +york 1 41 3.218876 3.218876 435 +apart 1 7 5.010635 5.010635 1936 +aastha 1 2 6.263398 6.263398 5005 +sciencemast 1 2 6.263398 6.263398 4969 +hasbrouck 1 2 6.263398 6.263398 4952 +pageaastha 1 1 6.957497 6.957497 10036 +bhardwajdepart 1 1 6.957497 6.957497 10037 +ofengineeeringresumehtmlpost 1 1 6.957497 6.957497 10038 +scriptcourseworkadvanceddatabas 1 1 6.957497 6.957497 10039 +csmultimediasystem 1 1 6.957497 6.957497 10040 +csengineeringcomput 1 1 6.957497 6.957497 10041 +cssoftwareengin 1 1 6.957497 6.957497 10042 +cscontact 1 1 6.957497 6.957497 10043 +bhardwaj 1 1 6.957497 6.957497 10044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..2262a359 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,284 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +tool 0 117 2.079442 0.000000 93 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +search 0 95 2.397895 0.000000 155 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +april 0 77 2.564949 0.000000 196 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +workshop 0 71 2.639057 0.000000 239 +intellig 0 72 2.639057 0.000000 225 +nation 0 74 2.639057 0.000000 240 +write 0 72 2.639057 0.000000 222 +symposium 0 72 2.639057 0.000000 238 +onlin 0 75 2.639057 0.000000 223 +view 0 70 2.708050 0.000000 254 +plan 0 65 2.772589 0.000000 272 +laboratori 0 63 2.772589 0.000000 292 +foundat 0 62 2.772589 0.000000 286 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +copi 0 63 2.772589 0.000000 284 +ithaca 0 65 2.772589 0.000000 294 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +found 0 53 2.944439 0.000000 337 +februari 0 54 2.944439 0.000000 328 +without 0 50 3.044522 0.000000 370 +visual 0 48 3.044522 0.000000 372 +california 0 46 3.091042 0.000000 388 +could 0 46 3.091042 0.000000 383 +move 0 47 3.091042 0.000000 382 +video 0 44 3.135494 0.000000 405 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +around 0 43 3.178054 0.000000 415 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +press 0 42 3.218876 0.000000 419 +programm 0 39 3.258097 0.000000 445 +small 0 39 3.258097 0.000000 447 +societi 0 40 3.258097 0.000000 456 +open 0 38 3.295837 0.000000 469 +field 0 37 3.332205 0.000000 482 +robot 1 36 3.367296 3.367296 497 +approxim 0 35 3.401197 0.000000 509 +singl 0 34 3.401197 0.000000 510 +post 0 35 3.401197 0.000000 505 +tech 0 35 3.401197 0.000000 515 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +built 0 29 3.583519 0.000000 592 +weather 0 28 3.610918 0.000000 618 +scale 0 28 3.610918 0.000000 613 +arrai 0 27 3.637586 0.000000 627 +manipul 0 27 3.637586 0.000000 624 +team 0 27 3.637586 0.000000 625 +bound 0 26 3.688879 0.000000 659 +proc 0 26 3.688879 0.000000 649 +experiment 0 26 3.688879 0.000000 645 +detect 0 26 3.688879 0.000000 646 +revis 0 26 3.688879 0.000000 640 +task 0 25 3.737670 0.000000 678 +motion 0 24 3.761200 0.000000 699 +sometim 0 24 3.761200 0.000000 696 +mobil 0 23 3.806662 0.000000 730 +famili 0 23 3.806662 0.000000 735 +lead 0 23 3.806662 0.000000 718 +cooper 0 22 3.850148 0.000000 757 +chip 0 21 3.912023 0.000000 770 +vlsi 0 21 3.912023 0.000000 795 +department 0 20 3.951244 0.000000 839 +mpeg 0 20 3.951244 0.000000 831 +scheme 0 20 3.951244 0.000000 818 +boston 0 19 4.007333 0.000000 862 +demo 0 18 4.060443 0.000000 888 +offici 0 18 4.060443 0.000000 894 +lower 0 18 4.060443 0.000000 886 +minim 0 18 4.060443 0.000000 887 +agent 0 18 4.060443 0.000000 910 +stanford 0 17 4.110874 0.000000 955 +vector 0 16 4.174387 0.000000 961 +diego 0 16 4.174387 0.000000 992 +explan 0 16 4.174387 0.000000 985 +micro 0 15 4.248495 0.000000 1031 +massiv 0 15 4.248495 0.000000 1026 +track 0 15 4.248495 0.000000 1029 +configur 0 15 4.248495 0.000000 1012 +researchmi 0 14 4.317488 0.000000 1119 +draft 0 14 4.317488 0.000000 1085 +train 0 14 4.317488 0.000000 1066 +anonym 0 14 4.317488 0.000000 1100 +cannot 0 13 4.382027 0.000000 1144 +jonathan 0 13 4.382027 0.000000 1174 +franc 0 12 4.465908 0.000000 1276 +bruce 0 12 4.465908 0.000000 1226 +walk 0 12 4.465908 0.000000 1281 +target 0 12 4.465908 0.000000 1282 +peter 0 11 4.553877 0.000000 1316 +donald 0 9 4.753590 0.000000 1510 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +face 0 9 4.753590 0.000000 1501 +classif 0 9 4.753590 0.000000 1586 +entitl 0 9 4.753590 0.000000 1490 +wilson 0 9 4.753590 0.000000 1536 +herefor 0 9 4.753590 0.000000 1483 +invari 0 8 4.875197 0.000000 1748 +autonom 0 8 4.875197 0.000000 1749 +sensor 0 7 5.010635 0.000000 1920 +morph 0 7 5.010635 0.000000 1937 +pittsburgh 0 7 5.010635 0.000000 1938 +beyond 0 7 5.010635 0.000000 1834 +huttenloch 0 6 5.164786 0.000000 1983 +silicon 0 6 5.164786 0.000000 2076 +lili 0 5 5.347108 0.000000 2240 +actuat 0 5 5.347108 0.000000 2442 +minneapoli 0 5 5.347108 0.000000 2480 +minnesota 0 5 5.347108 0.000000 2469 +eduph 0 5 5.347108 0.000000 2449 +upper 0 5 5.347108 0.000000 2481 +these 0 5 5.347108 0.000000 2482 +rotat 0 5 5.347108 0.000000 2295 +poem 0 5 5.347108 0.000000 2483 +clickher 0 5 5.347108 0.000000 2428 +randal 0 4 5.568345 0.000000 2776 +thecornel 0 4 5.568345 0.000000 2892 +chase 0 4 5.568345 0.000000 2897 +decoupl 0 4 5.568345 0.000000 2898 +chain 0 4 5.568345 0.000000 2712 +push 0 4 5.568345 0.000000 2635 +alfr 0 4 5.568345 0.000000 2882 +bhringer 0 3 5.857933 0.000000 3606 +jen 0 3 5.857933 0.000000 3378 +hereto 0 3 5.857933 0.000000 3476 +noel 0 3 5.857933 0.000000 3376 +fabric 0 3 5.857933 0.000000 3607 +algorithmica 0 3 5.857933 0.000000 3561 +artificialintellig 0 3 5.857933 0.000000 3608 +scream 0 3 5.857933 0.000000 3609 +microfabr 0 3 5.857933 0.000000 3610 +daniela 0 3 5.857933 0.000000 3611 +portrait 0 3 5.857933 0.000000 3491 +harm 0 3 5.857933 0.000000 3515 +macdonald 0 2 6.263398 0.000000 5006 +mem 0 2 6.263398 0.000000 5007 +brigg 0 2 6.263398 0.000000 5008 +ree 0 2 6.263398 0.000000 5009 +nanofabr 0 2 6.263398 0.000000 5010 +toconstruct 0 2 6.263398 0.000000 4858 +ofmobil 0 2 6.263398 0.000000 5011 +internationalworkshop 0 2 6.263398 0.000000 5012 +crystal 0 2 6.263398 0.000000 5013 +electro 0 2 6.263398 0.000000 5014 +reif 0 2 6.263398 0.000000 5015 +furnitur 0 2 6.263398 0.000000 5016 +actuatorarrai 0 2 6.263398 0.000000 5017 +mihailovich 0 2 6.263398 0.000000 5018 +automationnic 0 2 6.263398 0.000000 5019 +andj 0 2 6.263398 0.000000 5020 +latomb 0 2 6.263398 0.000000 5021 +doc 0 2 6.263398 0.000000 5022 +catalogc 0 2 6.263398 0.000000 5023 +apictur 0 2 6.263398 0.000000 5024 +drawn 0 2 6.263398 0.000000 4215 +swallow 0 2 6.263398 0.000000 5025 +tommi 0 1 6.957497 0.000000 10045 +feeder 0 1 6.957497 0.000000 10046 +vibratori 0 1 6.957497 0.000000 10047 +kinodynam 0 1 6.957497 0.000000 10048 +xavier 0 1 6.957497 0.000000 10049 +ourlab 0 1 6.957497 0.000000 10050 +toulous 0 1 6.957497 0.000000 10051 +icra 0 1 6.957497 0.000000 10052 +provablygood 0 1 6.957497 0.000000 10053 +couch 0 1 6.957497 0.000000 10054 +donaldbruc 0 1 6.957497 0.000000 10055 +donaldassoci 0 1 6.957497 0.000000 10056 +professorbrd 0 1 6.957497 0.000000 10057 +laboratorydan 0 1 6.957497 0.000000 10058 +microactu 0 1 6.957497 0.000000 10059 +arrayi 0 1 6.957497 0.000000 10060 +squarecentemet 0 1 6.957497 0.000000 10061 +sensoryfeedback 0 1 6.957497 0.000000 10062 +buildself 0 1 6.957497 0.000000 10063 +propel 0 1 6.957497 0.000000 10064 +amybrigg 0 1 6.957497 0.000000 10065 +surveil 0 1 6.957497 0.000000 10066 +andintercept 0 1 6.957497 0.000000 10067 +developedbi 0 1 6.957497 0.000000 10068 +informationalon 0 1 6.957497 0.000000 10069 +andlow 0 1 6.957497 0.000000 10070 +memsand 0 1 6.957497 0.000000 10071 +thealgorithm 0 1 6.957497 0.000000 10072 +robustgeometr 0 1 6.957497 0.000000 10073 +andimprov 0 1 6.957497 0.000000 10074 +partsfeed 0 1 6.957497 0.000000 10075 +partii 0 1 6.957497 0.000000 10076 +robotswith 0 1 6.957497 0.000000 10077 +forcartesian 0 1 6.957497 0.000000 10078 +canni 0 1 6.957497 0.000000 10079 +inpress 0 1 6.957497 0.000000 10080 +supermodular 0 1 6.957497 0.000000 10081 +andtheoret 0 1 6.957497 0.000000 10082 +jetai 0 1 6.957497 0.000000 10083 +firstquart 0 1 6.957497 0.000000 10084 +inminim 0 1 6.957497 0.000000 10085 +iser 0 1 6.957497 0.000000 10086 +automon 0 1 6.957497 0.000000 10087 +ofjapan 0 1 6.957497 0.000000 10088 +iro 0 1 6.957497 0.000000 10089 +sensorlessmanipul 0 1 6.957497 0.000000 10090 +andautom 0 1 6.957497 0.000000 10091 +ofrobot 0 1 6.957497 0.000000 10092 +otherpubl 0 1 6.957497 0.000000 10093 +dinesh 0 1 6.957497 0.000000 10094 +aval 0 1 6.957497 0.000000 10095 +indexobtain 0 1 6.957497 0.000000 10096 +paperscopi 0 1 6.957497 0.000000 10097 +teamof 0 1 6.957497 0.000000 10098 +movefurnitur 0 1 6.957497 0.000000 10099 +mobot 0 1 6.957497 0.000000 10100 +loretta 0 1 6.957497 0.000000 10101 +pompilio 0 1 6.957497 0.000000 10102 +discoverychannel 0 1 6.957497 0.000000 10103 +funa 0 1 6.957497 0.000000 10104 +moreoth 0 1 6.957497 0.000000 10105 +tallest 0 1 6.957497 0.000000 10106 +darkest 0 1 6.957497 0.000000 10107 +hollywood 0 1 6.957497 0.000000 10108 +merian 0 1 6.957497 0.000000 10109 +wrai 0 1 6.957497 0.000000 10110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..2ff8c126 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +specif 0 106 2.197225 0.000000 106 +intern 0 108 2.197225 0.000000 128 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +associ 0 93 2.397895 0.000000 151 +present 0 91 2.397895 0.000000 145 +learn 1 86 2.484907 2.484907 170 +contain 0 81 2.484907 0.000000 174 +build 0 85 2.484907 0.000000 184 +activ 0 84 2.484907 0.000000 182 +resourc 0 81 2.484907 0.000000 172 +method 0 80 2.564949 0.000000 213 +intellig 0 72 2.639057 0.000000 225 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +addit 0 74 2.639057 0.000000 228 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +artifici 0 63 2.772589 0.000000 280 +guid 0 63 2.772589 0.000000 267 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +foundat 0 62 2.772589 0.000000 286 +improv 0 62 2.772589 0.000000 289 +content 0 59 2.833213 0.000000 302 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +variou 0 56 2.890372 0.000000 317 +case 0 51 2.995732 0.000000 351 +finger 0 52 2.995732 0.000000 354 +investig 0 51 2.995732 0.000000 353 +approach 0 48 3.044522 0.000000 366 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +understand 0 47 3.091042 0.000000 384 +featur 0 46 3.091042 0.000000 386 +effect 0 46 3.091042 0.000000 385 +natur 0 44 3.135494 0.000000 406 +mechan 0 43 3.178054 0.000000 416 +offer 0 43 3.178054 0.000000 414 +press 0 42 3.218876 0.000000 419 +autom 0 41 3.218876 0.000000 434 +combin 0 42 3.218876 0.000000 421 +annual 0 40 3.258097 0.000000 458 +societi 0 40 3.258097 0.000000 456 +seminar 0 38 3.295837 0.000000 470 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +cost 0 37 3.332205 0.000000 480 +tree 0 36 3.367296 0.000000 492 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +within 0 33 3.433987 0.000000 525 +chapter 0 32 3.465736 0.000000 536 +extend 0 32 3.465736 0.000000 539 +domain 0 30 3.555348 0.000000 564 +focu 0 30 3.555348 0.000000 571 +compon 0 30 3.555348 0.000000 570 +focus 0 29 3.583519 0.000000 584 +semant 0 29 3.583519 0.000000 587 +framework 0 28 3.610918 0.000000 606 +symbol 0 27 3.637586 0.000000 620 +determin 0 27 3.637586 0.000000 630 +task 0 25 3.737670 0.000000 678 +handl 0 24 3.761200 0.000000 685 +brows 0 23 3.806662 0.000000 726 +decis 0 23 3.806662 0.000000 728 +springer 0 22 3.850148 0.000000 750 +stat 0 17 4.110874 0.000000 924 +analyz 0 17 4.110874 0.000000 925 +repositori 0 17 4.110874 0.000000 932 +cognit 0 16 4.174387 0.000000 986 +jose 0 16 4.174387 0.000000 976 +condit 0 16 4.174387 0.000000 975 +practicum 0 16 4.174387 0.000000 960 +cambridg 0 16 4.174387 0.000000 1008 +massachusett 0 14 4.317488 0.000000 1118 +train 0 14 4.317488 0.000000 1066 +embed 0 14 4.317488 0.000000 1102 +primarili 0 13 4.382027 0.000000 1185 +context 0 13 4.382027 0.000000 1153 +robust 0 12 4.465908 0.000000 1271 +speech 0 12 4.465908 0.000000 1222 +lake 0 11 4.553877 0.000000 1373 +tour 0 11 4.553877 0.000000 1307 +acquisit 0 10 4.653960 0.000000 1465 +sentenc 0 10 4.653960 0.000000 1413 +underli 0 10 4.653960 0.000000 1410 +prior 0 10 4.653960 0.000000 1438 +linguist 0 9 4.753590 0.000000 1593 +rel 0 9 4.753590 0.000000 1487 +softbal 0 9 4.753590 0.000000 1594 +conferenceon 0 9 4.753590 0.000000 1595 +introductori 0 9 4.753590 0.000000 1479 +morgan 0 9 4.753590 0.000000 1484 +aaai 0 8 4.875197 0.000000 1750 +extract 0 8 4.875197 0.000000 1728 +entri 0 8 4.875197 0.000000 1678 +span 0 8 4.875197 0.000000 1751 +empir 0 8 4.875197 0.000000 1722 +tag 0 7 5.010635 0.000000 1821 +larger 0 7 5.010635 0.000000 1875 +lawrenc 0 7 5.010635 0.000000 1908 +pennsylvania 0 7 5.010635 0.000000 1932 +machinelearn 0 6 5.164786 0.000000 2084 +heurist 0 6 5.164786 0.000000 2125 +syntax 0 6 5.164786 0.000000 2030 +amherst 0 5 5.347108 0.000000 2484 +connectionist 0 5 5.347108 0.000000 2430 +kaufmann 0 5 5.347108 0.000000 2254 +corpu 0 5 5.347108 0.000000 2282 +disambigu 0 4 5.568345 0.000000 2899 +clair 0 4 5.568345 0.000000 2605 +hallphon 0 4 5.568345 0.000000 2900 +compris 0 4 5.568345 0.000000 2862 +ijcai 0 4 5.568345 0.000000 2901 +complic 0 4 5.568345 0.000000 2902 +educlick 0 3 5.857933 0.000000 3612 +tosupport 0 3 5.857933 0.000000 3613 +teachingc 0 3 5.857933 0.000000 3614 +agener 0 3 5.857933 0.000000 3213 +conceptu 0 3 5.857933 0.000000 3214 +fourteenth 0 3 5.857933 0.000000 3615 +ninth 0 3 5.857933 0.000000 3616 +anaheim 0 3 5.857933 0.000000 3271 +citat 0 3 5.857933 0.000000 3617 +penn 0 3 5.857933 0.000000 3094 +corpora 0 2 6.263398 0.000000 4269 +interestscours 0 2 6.263398 0.000000 5026 +tandem 0 2 6.263398 0.000000 5027 +learningtechniqu 0 2 6.263398 0.000000 5028 +gabriel 0 2 6.263398 0.000000 5029 +jointconfer 0 2 6.263398 0.000000 5030 +eleventh 0 2 6.263398 0.000000 5031 +newark 0 2 6.263398 0.000000 5032 +bias 0 2 6.263398 0.000000 5033 +bloomington 0 2 6.263398 0.000000 5034 +twelfth 0 2 6.263398 0.000000 5035 +treebank 0 2 6.263398 0.000000 4138 +cardi 1 1 6.957497 6.957497 10111 +kenmor 0 1 6.957497 0.000000 10112 +pronoun 0 1 6.957497 0.000000 10113 +naturallanguag 0 1 6.957497 0.000000 10114 +knowledgeacquisit 0 1 6.957497 0.000000 10115 +riloff 0 1 6.957497 0.000000 10116 +tenth 0 1 6.957497 0.000000 10117 +erlbaumassoci 0 1 6.957497 0.000000 10118 +lehnert 0 1 6.957497 0.000000 10119 +cardieclair 0 1 6.957497 0.000000 10120 +teachselect 0 1 6.957497 0.000000 10121 +publicationsnlp 0 1 6.957497 0.000000 10122 +amalgam 0 1 6.957497 0.000000 10123 +westi 0 1 6.957497 0.000000 10124 +interestsalthough 0 1 6.957497 0.000000 10125 +subfield 0 1 6.957497 0.000000 10126 +cognitivemodel 0 1 6.957497 0.000000 10127 +forexplor 0 1 6.957497 0.000000 10128 +tworel 0 1 6.957497 0.000000 10129 +reliablyextract 0 1 6.957497 0.000000 10130 +cstr 0 1 6.957497 0.000000 10131 +kenmoreacquir 0 1 6.957497 0.000000 10132 +tworeal 0 1 6.957497 0.000000 10133 +andconcept 0 1 6.957497 0.000000 10134 +anteced 0 1 6.957497 0.000000 10135 +disambiguationtask 0 1 6.957497 0.000000 10136 +learningcompon 0 1 6.957497 0.000000 10137 +isembed 0 1 6.957497 0.000000 10138 +inartifici 0 1 6.957497 0.000000 10139 +understandingselect 0 1 6.957497 0.000000 10140 +publicationsautom 0 1 6.957497 0.000000 10141 +wermter 0 1 6.957497 0.000000 10142 +scheler 0 1 6.957497 0.000000 10143 +andsymbol 0 1 6.957497 0.000000 10144 +tolearn 0 1 6.957497 0.000000 10145 +conceptualsent 0 1 6.957497 0.000000 10146 +cmpsci 0 1 6.957497 0.000000 10147 +onconstrain 0 1 6.957497 0.000000 10148 +plausibl 0 1 6.957497 0.000000 10149 +linkscomput 0 1 6.957497 0.000000 10150 +linguistics 0 1 6.957497 0.000000 10151 +aclspeci 0 1 6.957497 0.000000 10152 +learningmachin 0 1 6.957497 0.000000 10153 +digestmachinelearn 0 1 6.957497 0.000000 10154 +researchersmachin 0 1 6.957497 0.000000 10155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..e41c8273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +gener 1 220 1.386294 1.386294 27 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +mathemat 1 108 2.197225 2.197225 123 +version 0 113 2.197225 0.000000 122 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +java 0 70 2.708050 0.000000 248 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +creat 0 63 2.772589 0.000000 277 +plan 0 65 2.772589 0.000000 272 +ithaca 0 65 2.772589 0.000000 294 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +paul 0 38 3.295837 0.000000 471 +tech 0 35 3.401197 0.000000 515 +concept 0 32 3.465736 0.000000 537 +transform 0 32 3.465736 0.000000 542 +express 0 32 3.465736 0.000000 540 +specifi 0 30 3.555348 0.000000 568 +symbol 0 27 3.637586 0.000000 620 +primari 0 25 3.737670 0.000000 669 +motion 0 24 3.761200 0.000000 699 +equat 0 23 3.806662 0.000000 724 +emphasi 0 22 3.850148 0.000000 755 +thu 0 21 3.912023 0.000000 773 +applet 0 20 3.951244 0.000000 827 +geometr 1 19 4.007333 4.007333 852 +comparison 0 19 4.007333 0.000000 863 +senior 0 14 4.317488 0.000000 1120 +shape 0 12 4.465908 0.000000 1245 +mesh 0 11 4.553877 0.000000 1351 +sens 0 11 4.553877 0.000000 1305 +purdu 0 10 4.653960 0.000000 1466 +rhode 0 9 4.753590 0.000000 1579 +rais 0 8 4.875197 0.000000 1711 +canb 0 7 5.010635 0.000000 1846 +beta 0 6 5.164786 0.000000 1993 +compat 0 5 5.347108 0.000000 2485 +diagram 0 5 5.347108 0.000000 2346 +triangul 0 4 5.568345 0.000000 2903 +chew 1 3 5.857933 5.857933 3618 +delaunai 0 3 5.857933 0.000000 3619 +implicitli 0 3 5.857933 0.000000 3620 +voronoi 0 2 6.263398 0.000000 5036 +agenda 0 2 6.263398 0.000000 5037 +scientificsoftwar 0 2 6.263398 0.000000 5038 +acollect 0 2 6.263398 0.000000 5039 +associatephd 0 1 6.957497 0.000000 10156 +eduappletsy 0 1 6.957497 0.000000 10157 +asnetscap 0 1 6.957497 0.000000 10158 +avoronoi 0 1 6.957497 0.000000 10159 +onpract 0 1 6.957497 0.000000 10160 +includedplac 0 1 6.957497 0.000000 10161 +thataris 0 1 6.957497 0.000000 10162 +isspecifi 0 1 6.957497 0.000000 10163 +ofphys 0 1 6.957497 0.000000 10164 +techniquesar 0 1 6.957497 0.000000 10165 +effectiveprogram 0 1 6.957497 0.000000 10166 +myonlin 0 1 6.957497 0.000000 10167 +reportscornel 0 1 6.957497 0.000000 10168 +computerscienceth 0 1 6.957497 0.000000 10169 +simlabprojectaddress 0 1 6.957497 0.000000 10170 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..bc9debf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +network 0 168 1.791759 0.000000 61 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +perform 0 143 1.945910 0.000000 74 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +world 0 115 2.197225 0.000000 126 +commun 0 95 2.397895 0.000000 157 +chang 0 82 2.484907 0.000000 163 +activ 0 84 2.484907 0.000000 182 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +server 0 76 2.564949 0.000000 204 +effici 0 73 2.639057 0.000000 233 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +guid 0 63 2.772589 0.000000 267 +faculti 0 56 2.890372 0.000000 325 +summer 0 56 2.890372 0.000000 311 +processor 0 54 2.944439 0.000000 335 +advisor 0 51 2.995732 0.000000 355 +cool 0 49 3.044522 0.000000 374 +multipl 0 39 3.258097 0.000000 453 +microsoft 0 38 3.295837 0.000000 468 +concurr 0 34 3.401197 0.000000 501 +toward 0 25 3.737670 0.000000 668 +sport 0 25 3.737670 0.000000 683 +latest 0 21 3.912023 0.000000 785 +runtim 0 19 4.007333 0.000000 858 +anyon 0 17 4.110874 0.000000 916 +latenc 0 16 4.174387 0.000000 993 +matlab 0 14 4.317488 0.000000 1081 +edui 0 13 4.382027 0.000000 1193 +eicken 0 13 4.382027 0.000000 1134 +composit 0 13 4.382027 0.000000 1150 +thedepart 0 11 4.553877 0.000000 1350 +scienceat 0 11 4.553877 0.000000 1375 +soccer 0 8 4.875197 0.000000 1752 +chao 0 8 4.875197 0.000000 1753 +risc 0 6 5.164786 0.000000 2016 +chess 0 5 5.347108 0.000000 2486 +andoper 0 3 5.857933 0.000000 3621 +messageslow 0 2 6.263398 0.000000 5040 +homepagelast 0 2 6.263398 0.000000 5001 +chichao 0 1 6.957497 0.000000 10171 +isthorsten 0 1 6.957497 0.000000 10172 +addressesand 0 1 6.957497 0.000000 10173 +overheterogen 0 1 6.957497 0.000000 10174 +tham 0 1 6.957497 0.000000 10175 +multimatlab 0 1 6.957497 0.000000 10176 +newsbraziliansocc 0 1 6.957497 0.000000 10177 +portugues 0 1 6.957497 0.000000 10178 +andhomepagesoliv 0 1 6.957497 0.000000 10179 +lubrasa 0 1 6.957497 0.000000 10180 +luso 0 1 6.957497 0.000000 10181 +brazilian 0 1 6.957497 0.000000 10182 +associationu 0 1 6.957497 0.000000 10183 +centerjorn 0 1 6.957497 0.000000 10184 +brasilmi 0 1 6.957497 0.000000 10185 +carstockmasterjayhawk 0 1 6.957497 0.000000 10186 +basketballwww 0 1 6.957497 0.000000 10187 +tennisserverback 0 1 6.957497 0.000000 10188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..0d55c4a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +find 0 111 2.197225 0.000000 111 +advanc 0 99 2.302585 0.000000 130 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +ithaca 1 65 2.772589 2.772589 294 +improv 0 62 2.772589 0.000000 289 +foundat 0 62 2.772589 0.000000 286 +artifici 0 63 2.772589 0.000000 280 +programm 0 39 3.258097 0.000000 445 +small 0 39 3.258097 0.000000 447 +soon 0 36 3.367296 0.000000 494 +edulast 0 17 4.110874 0.000000 927 +countri 0 15 4.248495 0.000000 1059 +hopefulli 0 14 4.317488 0.000000 1071 +forth 0 13 4.382027 0.000000 1186 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +apolog 0 6 5.164786 0.000000 2046 +singapor 1 5 5.347108 5.347108 2487 +intelligencec 0 4 5.568345 0.000000 2673 +engineeringc 0 4 5.568345 0.000000 2904 +chee 1 3 5.857933 5.857933 3480 +tokyo 1 3 5.857933 5.857933 3622 +keong 1 1 6.957497 6.957497 10189 +liau 0 1 6.957497 0.000000 10190 +liauwelcom 0 1 6.957497 0.000000 10191 +networksc 0 1 6.957497 0.000000 10192 +systemsbaccalaur 0 1 6.957497 0.000000 10193 +japanhomei 0 1 6.957497 0.000000 10194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..ed6df6a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +tool 0 117 2.079442 0.000000 93 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +center 0 88 2.397895 0.000000 158 +imag 0 91 2.397895 0.000000 161 +associ 0 93 2.397895 0.000000 151 +larg 0 82 2.484907 0.000000 168 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +optim 0 79 2.564949 0.000000 197 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +understand 0 47 3.091042 0.000000 384 +linear 0 41 3.218876 0.000000 431 +http 0 41 3.218876 0.000000 420 +york 0 41 3.218876 0.000000 435 +continu 0 39 3.258097 0.000000 448 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +scale 0 28 3.610918 0.000000 613 +constraint 0 26 3.688879 0.000000 636 +concern 0 25 3.737670 0.000000 666 +primari 0 25 3.737670 0.000000 669 +director 0 22 3.850148 0.000000 767 +thoma 0 18 4.060443 0.000000 901 +minim 0 18 4.060443 0.000000 887 +differenti 0 17 4.110874 0.000000 921 +former 0 17 4.110874 0.000000 956 +match 0 16 4.174387 0.000000 965 +nonlinear 0 14 4.317488 0.000000 1107 +affili 0 13 4.382027 0.000000 1194 +discret 0 13 4.382027 0.000000 1165 +equal 0 10 4.653960 0.000000 1424 +rhode 0 9 4.753590 0.000000 1579 +postdoc 0 8 4.875197 0.000000 1724 +strong 0 6 5.164786 0.000000 2029 +reconstruct 0 6 5.164786 0.000000 2170 +inequ 0 6 5.164786 0.000000 2113 +biomed 0 4 5.568345 0.000000 2905 +coleman 0 2 6.263398 0.000000 5041 +colemanthoma 0 1 6.957497 0.000000 10195 +colemancornel 0 1 6.957497 0.000000 10196 +universityi 0 1 6.957497 0.000000 10197 +professcp 0 1 6.957497 0.000000 10198 +ccop 0 1 6.957497 0.000000 10199 +broadfield 0 1 6.957497 0.000000 10200 +programmi 0 1 6.957497 0.000000 10201 +computationalmethod 0 1 6.957497 0.000000 10202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..9b06b1b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +resourc 0 81 2.484907 0.000000 172 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +name 0 72 2.639057 0.000000 220 +logic 0 71 2.639057 0.000000 230 +onlin 0 75 2.639057 0.000000 223 +view 0 70 2.708050 0.000000 254 +practic 0 70 2.708050 0.000000 246 +import 0 65 2.772589 0.000000 282 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +type 1 61 2.833213 2.833213 296 +maintain 0 51 2.995732 0.000000 342 +set 0 50 3.044522 0.000000 361 +standard 0 48 3.044522 0.000000 365 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +answer 0 45 3.135494 0.000000 391 +mark 0 44 3.135494 0.000000 403 +combin 0 42 3.218876 0.000000 421 +theoret 0 39 3.258097 0.000000 446 +map 0 39 3.258097 0.000000 452 +form 0 39 3.258097 0.000000 443 +origin 0 38 3.295837 0.000000 472 +close 0 38 3.295837 0.000000 465 +formal 0 37 3.332205 0.000000 478 +approxim 0 35 3.401197 0.000000 509 +committe 0 34 3.401197 0.000000 522 +eduoffic 0 33 3.433987 0.000000 531 +often 0 31 3.496508 0.000000 551 +semant 0 29 3.583519 0.000000 587 +mind 0 27 3.637586 0.000000 632 +consist 0 26 3.688879 0.000000 651 +primari 0 25 3.737670 0.000000 669 +strategi 0 25 3.737670 0.000000 682 +seri 0 24 3.761200 0.000000 708 +interpret 0 24 3.761200 0.000000 686 +greg 0 24 3.761200 0.000000 695 +emphasi 0 22 3.850148 0.000000 755 +programminglanguag 0 21 3.912023 0.000000 782 +love 0 21 3.912023 0.000000 804 +martin 0 21 3.912023 0.000000 794 +safeti 0 20 3.951244 0.000000 817 +grad 0 20 3.951244 0.000000 837 +particularli 0 19 4.007333 0.000000 867 +lower 0 18 4.060443 0.000000 886 +whole 0 17 4.110874 0.000000 940 +embed 0 14 4.317488 0.000000 1102 +command 0 14 4.317488 0.000000 1083 +translat 0 13 4.382027 0.000000 1164 +care 0 13 4.382027 0.000000 1177 +speak 0 12 4.465908 0.000000 1283 +calculu 0 12 4.465908 0.000000 1203 +israel 0 11 4.553877 0.000000 1366 +modular 0 10 4.653960 0.000000 1392 +relationship 0 10 4.653960 0.000000 1383 +correspond 0 10 4.653960 0.000000 1382 +guarante 0 10 4.653960 0.000000 1391 +nuprl 0 10 4.653960 0.000000 1402 +intermedi 0 9 4.753590 0.000000 1497 +andth 0 9 4.753590 0.000000 1481 +strength 0 9 4.753590 0.000000 1494 +formul 0 8 4.875197 0.000000 1733 +convers 0 8 4.875197 0.000000 1673 +paradigm 0 8 4.875197 0.000000 1662 +invari 0 8 4.875197 0.000000 1748 +leon 0 8 4.875197 0.000000 1631 +babylon 0 8 4.875197 0.000000 1731 +heart 0 8 4.875197 0.000000 1729 +pageth 0 7 5.010635 0.000000 1939 +hear 0 7 5.010635 0.000000 1940 +understood 0 5 5.347108 0.000000 2364 +stage 0 5 5.347108 0.000000 2488 +morrisett 0 5 5.347108 0.000000 2263 +lord 0 4 5.568345 0.000000 2906 +dexter 0 4 5.568345 0.000000 2855 +kozen 0 4 5.568345 0.000000 2619 +ofprogram 0 4 5.568345 0.000000 2624 +soul 0 4 5.568345 0.000000 2907 +karl 0 3 5.857933 0.000000 3623 +constabl 0 3 5.857933 0.000000 3186 +jesu 0 3 5.857933 0.000000 3624 +atyp 0 2 6.263398 0.000000 5042 +tractabl 0 2 6.263398 0.000000 4799 +pagekarl 0 2 6.263398 0.000000 5043 +halloffic 0 2 6.263398 0.000000 4583 +subtyp 0 2 6.263398 0.000000 4375 +intract 0 2 6.263398 0.000000 5044 +anapproxim 0 2 6.263398 0.000000 5045 +unavail 0 2 6.263398 0.000000 5046 +thenuprl 0 2 6.263398 0.000000 5047 +hereat 0 2 6.263398 0.000000 5048 +papersoth 0 2 6.263398 0.000000 5049 +lurker 0 2 6.263398 0.000000 5050 +andwith 0 2 6.263398 0.000000 5051 +thesecond 0 2 6.263398 0.000000 4128 +pagedepart 0 2 6.263398 0.000000 5052 +calculi 0 1 6.957497 0.000000 10203 +crari 0 1 6.957497 0.000000 10204 +crarycrari 0 1 6.957497 0.000000 10205 +researchbroadli 0 1 6.957497 0.000000 10206 +implementationand 0 1 6.957497 0.000000 10207 +kmlwhich 0 1 6.957497 0.000000 10208 +richworld 0 1 6.957497 0.000000 10209 +newprogram 0 1 6.957497 0.000000 10210 +aminterest 0 1 6.957497 0.000000 10211 +deepen 0 1 6.957497 0.000000 10212 +mitig 0 1 6.957497 0.000000 10213 +modelallow 0 1 6.957497 0.000000 10214 +allowsth 0 1 6.957497 0.000000 10215 +andcorrect 0 1 6.957497 0.000000 10216 +additionaloptim 0 1 6.957497 0.000000 10217 +automatedreason 0 1 6.957497 0.000000 10218 +ofrobert 0 1 6.957497 0.000000 10219 +jasonhickei 0 1 6.957497 0.000000 10220 +linksmark 0 1 6.957497 0.000000 10221 +cansearch 0 1 6.957497 0.000000 10222 +biblestudi 0 1 6.957497 0.000000 10223 +thelord 0 1 6.957497 0.000000 10224 +neighbor 0 1 6.957497 0.000000 10225 +commandmentgreat 0 1 6.957497 0.000000 10226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..7cd8d1dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +select 0 91 2.397895 0.000000 154 +solut 0 82 2.484907 0.000000 162 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +larg 0 82 2.484907 0.000000 168 +contain 0 81 2.484907 0.000000 174 +second 0 81 2.484907 0.000000 166 +optim 0 79 2.564949 0.000000 197 +state 0 76 2.564949 0.000000 207 +decemb 0 80 2.564949 0.000000 215 +septemb 0 65 2.772589 0.000000 274 +ithaca 0 65 2.772589 0.000000 294 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +numer 0 49 3.044522 0.000000 369 +algebra 0 45 3.135494 0.000000 394 +linear 0 41 3.218876 0.000000 431 +map 0 39 3.258097 0.000000 452 +close 0 38 3.295837 0.000000 465 +tree 0 36 3.367296 0.000000 492 +least 0 35 3.401197 0.000000 516 +posit 0 31 3.496508 0.000000 552 +multiprocessor 0 28 3.610918 0.000000 605 +packag 0 28 3.610918 0.000000 614 +scale 0 28 3.610918 0.000000 613 +bound 0 26 3.688879 0.000000 659 +deal 0 22 3.850148 0.000000 736 +siam 0 21 3.912023 0.000000 800 +definit 0 19 4.007333 0.000000 864 +matrix 0 17 4.110874 0.000000 933 +spars 1 16 4.174387 4.174387 989 +squar 0 14 4.317488 0.000000 1082 +rank 0 14 4.317488 0.000000 1063 +francisco 0 14 4.317488 0.000000 1095 +affili 0 13 4.382027 0.000000 1194 +philadelphia 0 12 4.465908 0.000000 1244 +matric 0 10 4.653960 0.000000 1399 +factor 0 9 4.753590 0.000000 1544 +conferenceon 0 9 4.753590 0.000000 1595 +simon 0 8 4.875197 0.000000 1697 +watson 0 8 4.875197 0.000000 1691 +univeristi 0 8 4.875197 0.000000 1754 +pennsylvania 0 7 5.010635 0.000000 1932 +cornellunivers 0 7 5.010635 0.000000 1916 +sixth 0 7 5.010635 0.000000 1917 +fifth 0 7 5.010635 0.000000 1931 +compact 0 7 5.010635 0.000000 1907 +dens 0 6 5.164786 0.000000 2122 +reed 0 6 5.164786 0.000000 2086 +row 0 5 5.347108 0.000000 2330 +seventh 0 5 5.347108 0.000000 2464 +orthogon 0 4 5.568345 0.000000 2832 +thecornel 0 4 5.568345 0.000000 2892 +symmetr 0 4 5.568345 0.000000 2908 +ctctr 0 3 5.857933 0.000000 3625 +parallelprocess 0 3 5.857933 0.000000 3626 +coleman 0 2 6.263398 0.000000 5041 +professorthoma 0 2 6.263398 0.000000 5053 +defici 0 2 6.263398 0.000000 5054 +idaho 0 2 6.263398 0.000000 5055 +ondistribut 0 2 6.263398 0.000000 4320 +solutionof 0 2 6.263398 0.000000 5056 +key 0 2 6.263398 0.000000 5057 +dongarra 0 2 6.263398 0.000000 5058 +kennedi 0 2 6.263398 0.000000 4539 +multifront 0 1 6.957497 0.000000 10227 +pothen 0 1 6.957497 0.000000 10228 +chunguang 0 1 6.957497 0.000000 10229 +processingfor 0 1 6.957497 0.000000 10230 +cliqu 0 1 6.957497 0.000000 10231 +sunchunguang 0 1 6.957497 0.000000 10232 +sunphd 0 1 6.957497 0.000000 10233 +ppcx 0 1 6.957497 0.000000 10234 +pssl 0 1 6.957497 0.000000 10235 +psspd 0 1 6.957497 0.000000 10236 +systemsrec 0 1 6.957497 0.000000 10237 +lecturesparallel 0 1 6.957497 0.000000 10238 +coeur 0 1 6.957497 0.000000 10239 +alen 0 1 6.957497 0.000000 10240 +bailei 0 1 6.957497 0.000000 10241 +bjorstad 0 1 6.957497 0.000000 10242 +gilbert 0 1 6.957497 0.000000 10243 +mascagni 0 1 6.957497 0.000000 10244 +schreiber 0 1 6.957497 0.000000 10245 +torczon 0 1 6.957497 0.000000 10246 +choleskyfactor 0 1 6.957497 0.000000 10247 +matriceson 0 1 6.957497 0.000000 10248 +sinovec 0 1 6.957497 0.000000 10249 +leuz 0 1 6.957497 0.000000 10250 +petzold 0 1 6.957497 0.000000 10251 +messina 0 1 6.957497 0.000000 10252 +sorensen 0 1 6.957497 0.000000 10253 +voigt 0 1 6.957497 0.000000 10254 +structuresin 0 1 6.957497 0.000000 10255 +csun 0 1 6.957497 0.000000 10256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..1b52aa39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +send 0 114 2.197225 0.000000 109 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +proceed 0 93 2.397895 0.000000 152 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +ieee 0 86 2.484907 0.000000 190 +messag 0 76 2.564949 0.000000 212 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +david 0 71 2.639057 0.000000 232 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +upson 0 71 2.639057 0.000000 218 +addit 0 74 2.639057 0.000000 228 +symposium 0 72 2.639057 0.000000 238 +would 0 67 2.708050 0.000000 251 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +august 0 66 2.708050 0.000000 257 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +improv 0 62 2.772589 0.000000 289 +type 0 61 2.833213 0.000000 296 +locat 0 59 2.833213 0.000000 303 +thesi 0 57 2.890372 0.000000 327 +allow 0 53 2.944439 0.000000 333 +februari 0 54 2.944439 0.000000 328 +maintain 0 51 2.995732 0.000000 342 +set 0 50 3.044522 0.000000 361 +basic 0 50 3.044522 0.000000 360 +keep 0 44 3.135494 0.000000 409 +made 0 44 3.135494 0.000000 398 +protocol 0 45 3.135494 0.000000 407 +howev 0 41 3.218876 0.000000 422 +might 0 41 3.218876 0.000000 426 +join 0 39 3.258097 0.000000 457 +origin 0 38 3.295837 0.000000 472 +respons 0 37 3.332205 0.000000 476 +within 0 33 3.433987 0.000000 525 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +dissert 0 32 3.465736 0.000000 549 +secur 0 30 3.555348 0.000000 577 +semant 0 29 3.583519 0.000000 587 +limit 0 29 3.583519 0.000000 585 +propos 0 28 3.610918 0.000000 602 +static 0 27 3.637586 0.000000 619 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +client 0 25 3.737670 0.000000 679 +wish 0 24 3.761200 0.000000 692 +mike 0 24 3.761200 0.000000 703 +higher 0 24 3.761200 0.000000 690 +mobil 0 23 3.806662 0.000000 730 +cooper 0 22 3.850148 0.000000 757 +scheme 0 20 3.951244 0.000000 818 +accept 0 18 4.060443 0.000000 879 +along 0 18 4.060443 0.000000 878 +layer 0 17 4.110874 0.000000 926 +devic 0 16 4.174387 0.000000 1002 +permit 0 16 4.174387 0.000000 962 +overhead 0 15 4.248495 0.000000 1035 +horu 0 14 4.317488 0.000000 1116 +command 0 14 4.317488 0.000000 1083 +necessari 0 13 4.382027 0.000000 1147 +whose 0 13 4.382027 0.000000 1166 +carri 0 13 4.382027 0.000000 1152 +kenneth 0 12 4.465908 0.000000 1265 +arbitrari 0 11 4.553877 0.000000 1359 +ofcomput 0 10 4.653960 0.000000 1442 +trust 0 9 4.753590 0.000000 1583 +birman 0 9 4.753590 0.000000 1531 +desir 0 9 4.753590 0.000000 1542 +assumpt 0 9 4.753590 0.000000 1514 +screen 0 9 4.753590 0.000000 1577 +informationabout 0 9 4.753590 0.000000 1515 +crash 0 8 4.875197 0.000000 1616 +accomplish 0 8 4.875197 0.000000 1755 +synchroni 0 7 5.010635 0.000000 1923 +prevent 0 7 5.010635 0.000000 1827 +fromth 0 7 5.010635 0.000000 1802 +slightli 0 7 5.010635 0.000000 1795 +encrypt 0 7 5.010635 0.000000 1835 +privaci 0 6 5.164786 0.000000 2144 +extern 0 6 5.164786 0.000000 2105 +authent 0 5 5.347108 0.000000 2306 +trivial 0 4 5.568345 0.000000 2786 +witha 0 4 5.568345 0.000000 2617 +complic 0 4 5.568345 0.000000 2902 +ofinform 0 4 5.568345 0.000000 2707 +reveal 0 4 5.568345 0.000000 2647 +wireless 0 4 5.568345 0.000000 2693 +anthoni 0 4 5.568345 0.000000 2792 +privat 0 3 5.857933 0.000000 3496 +reiter 0 3 5.857933 0.000000 3461 +proper 0 3 5.857933 0.000000 3323 +recipi 0 3 5.857933 0.000000 3627 +owner 0 3 5.857933 0.000000 3531 +attack 0 3 5.857933 0.000000 3168 +preserv 0 3 5.857933 0.000000 3628 +mobilecomput 0 3 5.857933 0.000000 3629 +untrust 0 2 6.263398 0.000000 4997 +postdoctor 0 2 6.263398 0.000000 5059 +honest 0 2 6.263398 0.000000 5060 +ofvirtu 0 2 6.263398 0.000000 5061 +communicatewith 0 2 6.263398 0.000000 5062 +unlik 0 2 6.263398 0.000000 5063 +sender 0 2 6.263398 0.000000 5064 +adversari 0 2 6.263398 0.000000 5065 +ofmobil 0 2 6.263398 0.000000 5011 +securityand 0 2 6.263398 0.000000 5066 +relationshipsamong 0 1 6.957497 0.000000 10257 +dcooper 0 1 6.957497 0.000000 10258 +securityarchitectur 0 1 6.957497 0.000000 10259 +horuswhich 0 1 6.957497 0.000000 10260 +kerberosnetwork 0 1 6.957497 0.000000 10261 +cryptograph 0 1 6.957497 0.000000 10262 +toprovid 0 1 6.957497 0.000000 10263 +originalimplement 0 1 6.957497 0.000000 10264 +failuremodel 0 1 6.957497 0.000000 10265 +anyprocess 0 1 6.957497 0.000000 10266 +isposs 0 1 6.957497 0.000000 10267 +weaker 0 1 6.957497 0.000000 10268 +untrustedprocess 0 1 6.957497 0.000000 10269 +clientsto 0 1 6.957497 0.000000 10270 +horussecur 0 1 6.957497 0.000000 10271 +keymanag 0 1 6.957497 0.000000 10272 +impersonateanoth 0 1 6.957497 0.000000 10273 +achieveth 0 1 6.957497 0.000000 10274 +asclient 0 1 6.957497 0.000000 10275 +inherentin 0 1 6.957497 0.000000 10276 +contentsof 0 1 6.957497 0.000000 10277 +hiddenwith 0 1 6.957497 0.000000 10278 +outsidersfrom 0 1 6.957497 0.000000 10279 +maintainingth 0 1 6.957497 0.000000 10280 +unlink 0 1 6.957497 0.000000 10281 +chaum 0 1 6.957497 0.000000 10282 +severaloth 0 1 6.957497 0.000000 10283 +staticnetwork 0 1 6.957497 0.000000 10284 +mobilecommun 0 1 6.957497 0.000000 10285 +themessag 0 1 6.957497 0.000000 10286 +advisorken 0 1 6.957497 0.000000 10287 +internaland 0 1 6.957497 0.000000 10288 +apriv 0 1 6.957497 0.000000 10289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..1ccb5024 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +project 1 340 1.098612 1.098612 18 +model 0 145 1.945910 0.000000 69 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +resum 0 79 2.564949 0.000000 217 +would 0 67 2.708050 0.000000 251 +type 0 61 2.833213 0.000000 296 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +hand 0 37 3.332205 0.000000 475 +field 0 37 3.332205 0.000000 482 +game 0 36 3.367296 0.000000 498 +word 0 34 3.401197 0.000000 508 +express 0 32 3.465736 0.000000 540 +fault 0 32 3.465736 0.000000 547 +human 0 32 3.465736 0.000000 546 +scientist 0 31 3.496508 0.000000 560 +travel 0 30 3.555348 0.000000 579 +hope 1 28 3.610918 3.610918 610 +releas 0 28 3.610918 0.000000 616 +mine 0 26 3.688879 0.000000 654 +never 0 25 3.737670 0.000000 671 +reach 0 24 3.761200 0.000000 688 +instead 0 22 3.850148 0.000000 756 +love 1 21 3.912023 3.912023 804 +born 0 21 3.912023 0.000000 798 +ever 1 19 4.007333 4.007333 872 +brief 0 16 4.174387 0.000000 1001 +role 0 14 4.317488 0.000000 1101 +sai 0 13 4.382027 0.000000 1175 +pretti 0 13 4.382027 0.000000 1191 +cannot 0 13 4.382027 0.000000 1144 +rest 0 12 4.465908 0.000000 1259 +surf 0 11 4.553877 0.000000 1301 +shore 0 11 4.553877 0.000000 1377 +night 0 11 4.553877 0.000000 1319 +road 0 11 4.553877 0.000000 1374 +true 0 10 4.653960 0.000000 1422 +poetri 0 9 4.753590 0.000000 1596 +told 0 8 4.875197 0.000000 1658 +heart 0 8 4.875197 0.000000 1729 +hold 0 8 4.875197 0.000000 1645 +prize 0 6 5.164786 0.000000 2150 +gentl 0 5 5.347108 0.000000 2264 +facial 0 5 5.347108 0.000000 2438 +unknown 0 5 5.347108 0.000000 2318 +favor 0 5 5.347108 0.000000 2414 +suffer 0 5 5.347108 0.000000 2268 +cyber 0 4 5.568345 0.000000 2909 +lawyer 0 4 5.568345 0.000000 2836 +uncertain 0 4 5.568345 0.000000 2758 +fals 0 4 5.568345 0.000000 2861 +dark 0 4 5.568345 0.000000 2910 +soul 0 4 5.568345 0.000000 2907 +fear 0 4 5.568345 0.000000 2911 +faith 0 3 5.857933 0.000000 3363 +dread 0 3 5.857933 0.000000 3630 +wise 0 3 5.857933 0.000000 3631 +romanc 0 3 5.857933 0.000000 3632 +passion 0 3 5.857933 0.000000 3633 +tortur 0 3 5.857933 0.000000 3634 +diseas 0 3 5.857933 0.000000 3635 +pain 0 3 5.857933 0.000000 3460 +blame 0 3 5.857933 0.000000 3636 +cold 0 3 5.857933 0.000000 3637 +burn 0 2 6.263398 0.000000 4447 +blink 0 2 6.263398 0.000000 5067 +ey 0 2 6.263398 0.000000 5068 +kei 0 2 6.263398 0.000000 4812 +mice 0 2 6.263398 0.000000 5069 +autobiographi 0 2 6.263398 0.000000 5070 +concret 0 2 6.263398 0.000000 4276 +ear 0 2 6.263398 0.000000 5071 +soft 0 2 6.263398 0.000000 5072 +belov 0 2 6.263398 0.000000 5073 +broken 0 2 6.263398 0.000000 5074 +horror 0 2 6.263398 0.000000 5075 +tear 0 2 6.263398 0.000000 5076 +deed 0 2 6.263398 0.000000 5077 +frozen 0 2 6.263398 0.000000 5078 +deidr 0 1 6.957497 0.000000 10290 +pandora 0 1 6.957497 0.000000 10291 +abodedan 0 1 6.957497 0.000000 10292 +abodegreet 0 1 6.957497 0.000000 10293 +humbl 0 1 6.957497 0.000000 10294 +prithe 0 1 6.957497 0.000000 10295 +teari 0 1 6.957497 0.000000 10296 +weari 0 1 6.957497 0.000000 10297 +thyselv 0 1 6.957497 0.000000 10298 +abod 0 1 6.957497 0.000000 10299 +emot 0 1 6.957497 0.000000 10300 +simnet 0 1 6.957497 0.000000 10301 +builder 0 1 6.957497 0.000000 10302 +faiththei 0 1 6.957497 0.000000 10303 +hardli 0 1 6.957497 0.000000 10304 +ferro 0 1 6.957497 0.000000 10305 +scorn 0 1 6.957497 0.000000 10306 +bend 0 1 6.957497 0.000000 10307 +tone 0 1 6.957497 0.000000 10308 +unseen 0 1 6.957497 0.000000 10309 +unheard 0 1 6.957497 0.000000 10310 +untouch 0 1 6.957497 0.000000 10311 +silenc 0 1 6.957497 0.000000 10312 +yearn 0 1 6.957497 0.000000 10313 +lordlovewarm 0 1 6.957497 0.000000 10314 +friendship 0 1 6.957497 0.000000 10315 +mindless 0 1 6.957497 0.000000 10316 +infatu 0 1 6.957497 0.000000 10317 +sensual 0 1 6.957497 0.000000 10318 +sigh 0 1 6.957497 0.000000 10319 +hopemyth 0 1 6.957497 0.000000 10320 +beauteou 0 1 6.957497 0.000000 10321 +demon 0 1 6.957497 0.000000 10322 +astrai 0 1 6.957497 0.000000 10323 +glimmer 0 1 6.957497 0.000000 10324 +tread 0 1 6.957497 0.000000 10325 +amidst 0 1 6.957497 0.000000 10326 +thorn 0 1 6.957497 0.000000 10327 +filthi 0 1 6.957497 0.000000 10328 +miseri 0 1 6.957497 0.000000 10329 +etern 0 1 6.957497 0.000000 10330 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..59030d21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +site 0 106 2.197225 0.000000 119 +intern 0 108 2.197225 0.000000 128 +access 0 102 2.302585 0.000000 136 +part 0 98 2.302585 0.000000 129 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +imag 0 91 2.397895 0.000000 161 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +server 1 76 2.564949 2.564949 204 +refer 0 78 2.564949 0.000000 203 +collect 0 65 2.772589 0.000000 268 +ithaca 0 65 2.772589 0.000000 294 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +digit 0 52 2.995732 0.000000 348 +understand 0 47 3.091042 0.000000 384 +directori 0 45 3.135494 0.000000 396 +music 0 42 3.218876 0.000000 436 +global 0 34 3.401197 0.000000 520 +secur 0 30 3.555348 0.000000 577 +weather 0 28 3.610918 0.000000 618 +berkelei 0 26 3.688879 0.000000 657 +recognit 0 23 3.806662 0.000000 723 +togeth 0 23 3.806662 0.000000 714 +head 0 23 3.806662 0.000000 732 +siam 0 21 3.912023 0.000000 800 +navig 0 21 3.912023 0.000000 796 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +stanford 0 17 4.110874 0.000000 955 +whole 0 17 4.110874 0.000000 940 +charact 0 15 4.248495 0.000000 1028 +dean 0 14 4.317488 0.000000 1104 +anonym 0 14 4.317488 0.000000 1100 +audio 0 14 4.317488 0.000000 1094 +captur 0 12 4.465908 0.000000 1232 +michigan 0 11 4.553877 0.000000 1368 +earth 0 10 4.653960 0.000000 1463 +catalog 0 10 4.653960 0.000000 1431 +folk 0 9 4.753590 0.000000 1597 +illinoi 0 7 5.010635 0.000000 1941 +gatewai 0 7 5.010635 0.000000 1942 +scout 0 7 5.010635 0.000000 1903 +gopher 0 6 5.164786 0.000000 1982 +legal 0 6 5.164786 0.000000 2094 +forecast 0 6 5.164786 0.000000 2171 +elsewher 0 5 5.347108 0.000000 2444 +cuinfo 0 4 5.568345 0.000000 2626 +planet 0 4 5.568345 0.000000 2912 +gear 0 4 5.568345 0.000000 2891 +krafft 0 3 5.857933 0.000000 3638 +archi 0 3 5.857933 0.000000 3639 +cern 0 2 6.263398 0.000000 5079 +urlsdean 0 1 6.957497 0.000000 10331 +interestcornel 0 1 6.957497 0.000000 10332 +dimund 0 1 6.957497 0.000000 10333 +librarysearch 0 1 6.957497 0.000000 10334 +veronica 0 1 6.957497 0.000000 10335 +faqsvari 0 1 6.957497 0.000000 10336 +folkbook 0 1 6.957497 0.000000 10337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..85113840 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +select 0 91 2.397895 0.000000 154 +build 0 85 2.484907 0.000000 184 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +issu 0 78 2.564949 0.000000 211 +upson 0 71 2.639057 0.000000 218 +servic 0 72 2.639057 0.000000 236 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +materi 0 75 2.639057 0.000000 221 +investig 0 51 2.995732 0.000000 353 +protocol 0 45 3.135494 0.000000 407 +secur 0 30 3.555348 0.000000 577 +intend 0 28 3.610918 0.000000 599 +administr 0 27 3.637586 0.000000 628 +universityithaca 0 24 3.761200 0.000000 710 +initi 0 23 3.806662 0.000000 717 +director 0 22 3.850148 0.000000 767 +sciencecornel 0 22 3.850148 0.000000 768 +serv 0 22 3.850148 0.000000 758 +inth 0 22 3.850148 0.000000 741 +fund 0 21 3.912023 0.000000 805 +similar 0 21 3.912023 0.000000 771 +facil 0 20 3.951244 0.000000 814 +break 0 20 3.951244 0.000000 812 +five 0 19 4.007333 0.000000 841 +side 0 15 4.248495 0.000000 1022 +carl 0 15 4.248495 0.000000 1024 +dean 0 14 4.317488 0.000000 1104 +emploi 0 12 4.465908 0.000000 1284 +arpa 0 11 4.553877 0.000000 1369 +eight 0 11 4.553877 0.000000 1331 +consortium 0 10 4.653960 0.000000 1467 +princip 0 10 4.653960 0.000000 1397 +rapid 0 10 4.653960 0.000000 1453 +researchi 0 8 4.875197 0.000000 1756 +xerox 0 8 4.875197 0.000000 1725 +davi 0 7 5.010635 0.000000 1888 +sciencedepart 0 6 5.164786 0.000000 2172 +interestedin 0 5 5.347108 0.000000 2260 +employe 0 4 5.568345 0.000000 2717 +krafft 0 3 5.857933 0.000000 3638 +dienst 0 3 5.857933 0.000000 3640 +halldepart 0 3 5.857933 0.000000 3641 +dissemin 0 2 6.263398 0.000000 5080 +thedesign 0 2 6.263398 0.000000 4251 +lagoz 0 2 6.263398 0.000000 5081 +facilitiesaddress 0 1 6.957497 0.000000 10338 +guis 0 1 6.957497 0.000000 10339 +anadministr 0 1 6.957497 0.000000 10340 +andworri 0 1 6.957497 0.000000 10341 +spart 0 1 6.957497 0.000000 10342 +thecorpor 0 1 6.957497 0.000000 10343 +cnri 0 1 6.957497 0.000000 10344 +technicalresearch 0 1 6.957497 0.000000 10345 +theexist 0 1 6.957497 0.000000 10346 +disseminationov 0 1 6.957497 0.000000 10347 +atechn 0 1 6.957497 0.000000 10348 +ondienst 0 1 6.957497 0.000000 10349 +togethera 0 1 6.957497 0.000000 10350 +url 0 1 6.957497 0.000000 10351 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..163f2f5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +address 0 170 1.791759 0.000000 62 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +analysi 0 124 2.079442 0.000000 98 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +good 0 77 2.564949 0.000000 200 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +numer 0 49 3.044522 0.000000 369 +advis 0 6 5.164786 0.000000 2173 +divakar 0 1 6.957497 0.000000 10352 +pagedivakar 0 1 6.957497 0.000000 10353 +viswanathdivakar 0 1 6.957497 0.000000 10354 +isnumer 0 1 6.957497 0.000000 10355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..fe4b9e05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +advanc 0 99 2.302585 0.000000 130 +institut 0 84 2.484907 0.000000 187 +know 0 80 2.564949 0.000000 198 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +receiv 0 66 2.708050 0.000000 244 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +locat 0 59 2.833213 0.000000 303 +friend 0 48 3.044522 0.000000 376 +physic 0 47 3.091042 0.000000 377 +york 0 41 3.218876 0.000000 435 +littl 0 39 3.258097 0.000000 454 +live 0 40 3.258097 0.000000 451 +china 0 37 3.332205 0.000000 487 +winter 0 36 3.367296 0.000000 500 +except 0 28 3.610918 0.000000 607 +campu 0 27 3.637586 0.000000 623 +beij 0 19 4.007333 0.000000 876 +miss 0 19 4.007333 0.000000 866 +beauti 0 18 4.060443 0.000000 912 +miller 0 17 4.110874 0.000000 949 +practicum 0 16 4.174387 0.000000 960 +mayb 0 15 4.248495 0.000000 1014 +anywai 0 15 4.248495 0.000000 1047 +translat 0 13 4.382027 0.000000 1164 +central 0 13 4.382027 0.000000 1160 +tsinghua 0 13 4.382027 0.000000 1195 +realiti 0 12 4.465908 0.000000 1272 +jersei 0 9 4.753590 0.000000 1587 +brought 0 7 5.010635 0.000000 1925 +railroad 0 6 5.164786 0.000000 2161 +coursesc 0 4 5.568345 0.000000 2692 +engineeringc 0 4 5.568345 0.000000 2904 +doubt 0 3 5.857933 0.000000 3119 +gorgeou 0 2 6.263398 0.000000 5082 +newark 0 2 6.263398 0.000000 5032 +diyu 0 1 6.957497 0.000000 10356 +pagediyu 0 1 6.957497 0.000000 10357 +daisi 0 1 6.957497 0.000000 10358 +translatorsfal 0 1 6.957497 0.000000 10359 +systemc 0 1 6.957497 0.000000 10360 +systemsel 0 1 6.957497 0.000000 10361 +telecommunicationsm 0 1 6.957497 0.000000 10362 +projectorigin 0 1 6.957497 0.000000 10363 +projectsinc 0 1 6.957497 0.000000 10364 +unviers 0 1 6.957497 0.000000 10365 +linksjava 0 1 6.957497 0.000000 10366 +tkfavorit 0 1 6.957497 0.000000 10367 +sitestimecnnlondon 0 1 6.957497 0.000000 10368 +timeswashington 0 1 6.957497 0.000000 10369 +postchines 0 1 6.957497 0.000000 10370 +digestchina 0 1 6.957497 0.000000 10371 +digestfeng 0 1 6.957497 0.000000 10372 +yuanxin 0 1 6.957497 0.000000 10373 +siart 0 1 6.957497 0.000000 10374 +chinaloc 0 1 6.957497 0.000000 10375 +connectionsctc 0 1 6.957497 0.000000 10376 +sunlabweathermovi 0 1 6.957497 0.000000 10377 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..b24a141b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +document 1 121 2.079442 2.079442 89 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +teach 0 108 2.197225 0.000000 112 +structur 0 106 2.197225 0.000000 105 +techniqu 0 99 2.302585 0.000000 138 +imag 0 91 2.397895 0.000000 161 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +main 0 67 2.708050 0.000000 256 +view 0 70 2.708050 0.000000 254 +evalu 0 64 2.772589 0.000000 266 +type 0 61 2.833213 0.000000 296 +share 0 59 2.833213 0.000000 304 +index 0 56 2.890372 0.000000 309 +profession 0 51 2.995732 0.000000 345 +investig 0 51 2.995732 0.000000 353 +visual 0 48 3.044522 0.000000 372 +format 0 48 3.044522 0.000000 356 +without 0 50 3.044522 0.000000 370 +cool 0 49 3.044522 0.000000 374 +electron 0 47 3.091042 0.000000 379 +video 0 44 3.135494 0.000000 405 +favorit 0 44 3.135494 0.000000 410 +offer 0 43 3.178054 0.000000 414 +vision 0 41 3.218876 0.000000 430 +fast 0 42 3.218876 0.000000 429 +theoret 0 39 3.258097 0.000000 446 +author 0 39 3.258097 0.000000 450 +small 0 39 3.258097 0.000000 447 +brian 0 38 3.295837 0.000000 466 +approxim 0 35 3.401197 0.000000 509 +collabor 0 32 3.465736 0.000000 543 +rang 0 30 3.555348 0.000000 565 +chair 0 29 3.583519 0.000000 596 +held 0 28 3.610918 0.000000 600 +compar 0 26 3.688879 0.000000 648 +sport 0 25 3.737670 0.000000 683 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +highli 0 23 3.806662 0.000000 725 +geometri 0 22 3.850148 0.000000 752 +smith 0 20 3.951244 0.000000 820 +geometr 0 19 4.007333 0.000000 852 +monitor 0 17 4.110874 0.000000 941 +match 0 16 4.174387 0.000000 965 +remot 0 15 4.248495 0.000000 1041 +track 0 15 4.248495 0.000000 1029 +matlab 0 14 4.317488 0.000000 1081 +daniel 0 12 4.465908 0.000000 1233 +target 0 12 4.465908 0.000000 1282 +extrem 0 11 4.553877 0.000000 1330 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +juan 0 9 4.753590 0.000000 1580 +xerox 0 8 4.875197 0.000000 1725 +compact 0 7 5.010635 0.000000 1907 +huttenloch 0 6 5.164786 0.000000 1983 +fraction 0 5 5.347108 0.000000 2259 +conot 0 5 5.347108 0.000000 2245 +stupid 0 5 5.347108 0.000000 2489 +hausdorff 0 4 5.568345 0.000000 2633 +identif 0 4 5.568345 0.000000 2773 +cvpr 0 4 5.568345 0.000000 2761 +geek 0 2 6.263398 0.000000 5083 +snowboard 0 2 6.263398 0.000000 5084 +professordph 0 1 6.957497 0.000000 10378 +eigenspac 0 1 6.957497 0.000000 10379 +digipap 0 1 6.957497 0.000000 10380 +viewabl 0 1 6.957497 0.000000 10381 +parc 0 1 6.957497 0.000000 10382 +attitud 0 1 6.957497 0.000000 10383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..4b7becd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +process 1 142 1.945910 1.945910 72 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +mani 0 92 2.397895 0.000000 150 +present 0 91 2.397895 0.000000 145 +wide 0 84 2.484907 0.000000 185 +method 0 80 2.564949 0.000000 213 +exampl 0 77 2.564949 0.000000 195 +june 0 79 2.564949 0.000000 214 +addit 0 74 2.639057 0.000000 228 +order 0 69 2.708050 0.000000 249 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +simpl 0 60 2.833213 0.000000 298 +thesi 0 57 2.890372 0.000000 327 +investig 0 51 2.995732 0.000000 353 +basic 0 50 3.044522 0.000000 360 +understand 0 47 3.091042 0.000000 384 +algebra 0 45 3.135494 0.000000 394 +better 0 45 3.135494 0.000000 401 +form 0 39 3.258097 0.000000 443 +theoret 0 39 3.258097 0.000000 446 +prototyp 0 38 3.295837 0.000000 463 +concurr 0 34 3.401197 0.000000 501 +express 0 32 3.465736 0.000000 540 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +computersci 0 30 3.555348 0.000000 562 +semant 0 29 3.583519 0.000000 587 +becom 0 28 3.610918 0.000000 603 +full 0 28 3.610918 0.000000 615 +effort 0 26 3.688879 0.000000 652 +compar 0 26 3.688879 0.000000 648 +input 0 23 3.806662 0.000000 727 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +former 0 17 4.110874 0.000000 956 +edui 0 13 4.382027 0.000000 1193 +calculu 0 12 4.465908 0.000000 1203 +verifi 0 12 4.465908 0.000000 1261 +custom 0 10 4.653960 0.000000 1414 +latter 0 9 4.753590 0.000000 1522 +bloom 0 4 5.568345 0.000000 2913 +commonli 0 4 5.568345 0.000000 2877 +metatheori 0 3 5.857933 0.000000 3642 +allevi 0 3 5.857933 0.000000 3643 +checker 0 3 5.857933 0.000000 3644 +lnc 0 2 6.263398 0.000000 5085 +theproblem 0 2 6.263398 0.000000 4560 +inher 0 2 6.263398 0.000000 5086 +dsouza 0 1 6.957497 0.000000 10384 +ashvin 0 1 6.957497 0.000000 10385 +bard 0 1 6.957497 0.000000 10386 +oftool 0 1 6.957497 0.000000 10387 +andverif 0 1 6.957497 0.000000 10388 +withrespect 0 1 6.957497 0.000000 10389 +immediatelyavail 0 1 6.957497 0.000000 10390 +duplic 0 1 6.957497 0.000000 10391 +gso 0 1 6.957497 0.000000 10392 +loto 0 1 6.957497 0.000000 10393 +exploringappl 0 1 6.957497 0.000000 10394 +bdd 0 1 6.957497 0.000000 10395 +algebraterm 0 1 6.957497 0.000000 10396 +postscipt 0 1 6.957497 0.000000 10397 +lite 0 1 6.957497 0.000000 10398 +presentedth 0 1 6.957497 0.000000 10399 +compass 0 1 6.957497 0.000000 10400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..8653c978 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +sinc 0 90 2.397895 0.000000 159 +stuff 0 87 2.484907 0.000000 171 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +improv 0 62 2.772589 0.000000 289 +dept 0 64 2.772589 0.000000 291 +prof 0 64 2.772589 0.000000 273 +written 0 63 2.772589 0.000000 278 +visit 0 63 2.772589 0.000000 288 +might 0 41 3.218876 0.000000 426 +origin 0 38 3.295837 0.000000 472 +art 0 29 3.583519 0.000000 593 +alwai 0 24 3.761200 0.000000 691 +fine 0 20 3.951244 0.000000 822 +ever 0 19 4.007333 0.000000 872 +warn 0 14 4.317488 0.000000 1068 +minor 0 12 4.465908 0.000000 1237 +undergrad 0 9 4.753590 0.000000 1589 +risk 0 8 4.875197 0.000000 1689 +heavi 0 7 5.010635 0.000000 1841 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 0 5 5.347108 0.000000 2265 +paint 0 5 5.347108 0.000000 2400 +turkei 0 4 5.568345 0.000000 2914 +funda 0 3 5.857933 0.000000 3645 +ergun 0 2 6.263398 0.000000 5087 +angri 0 2 6.263398 0.000000 5088 +dog 0 2 6.263398 0.000000 5089 +pagefunda 0 1 6.957497 0.000000 10401 +ergn 0 1 6.957497 0.000000 10402 +eduhi 0 1 6.957497 0.000000 10403 +studentin 0 1 6.957497 0.000000 10404 +programcheck 0 1 6.957497 0.000000 10405 +researchpag 0 1 6.957497 0.000000 10406 +izmir 0 1 6.957497 0.000000 10407 +bilkentunivers 0 1 6.957497 0.000000 10408 +ankara 0 1 6.957497 0.000000 10409 +encounterpag 0 1 6.957497 0.000000 10410 +turkish 0 1 6.957497 0.000000 10411 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..03c24f9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +assign 0 135 1.945910 0.000000 66 +high 0 130 2.079442 0.000000 101 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +mathemat 0 108 2.197225 0.000000 123 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +proceed 0 93 2.397895 0.000000 152 +mani 0 92 2.397895 0.000000 150 +activ 0 84 2.484907 0.000000 182 +school 0 84 2.484907 0.000000 188 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +appear 0 78 2.564949 0.000000 210 +optim 0 79 2.564949 0.000000 197 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +upson 0 71 2.639057 0.000000 218 +practic 0 70 2.708050 0.000000 246 +januari 0 62 2.772589 0.000000 264 +complex 0 64 2.772589 0.000000 269 +improv 0 62 2.772589 0.000000 289 +foundat 0 62 2.772589 0.000000 286 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +summer 0 56 2.890372 0.000000 311 +cover 0 55 2.944439 0.000000 329 +particular 0 51 2.995732 0.000000 352 +fast 0 42 3.218876 0.000000 429 +linear 0 41 3.218876 0.000000 431 +annual 0 40 3.258097 0.000000 458 +industri 0 38 3.295837 0.000000 464 +approxim 0 35 3.401197 0.000000 509 +concurr 0 34 3.401197 0.000000 501 +survei 0 35 3.401197 0.000000 513 +bibliographi 0 34 3.401197 0.000000 518 +graph 0 30 3.555348 0.000000 576 +computersci 0 30 3.555348 0.000000 562 +bound 0 26 3.688879 0.000000 659 +proc 0 26 3.688879 0.000000 649 +aspect 0 25 3.737670 0.000000 663 +flow 0 24 3.761200 0.000000 700 +universityithaca 0 24 3.761200 0.000000 710 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +siam 0 21 3.912023 0.000000 800 +path 0 21 3.912023 0.000000 778 +theorem 0 21 3.912023 0.000000 786 +unit 0 21 3.912023 0.000000 779 +rout 0 21 3.912023 0.000000 793 +vlsi 0 21 3.912023 0.000000 795 +annot 0 21 3.912023 0.000000 775 +north 0 19 4.007333 0.000000 873 +separ 0 19 4.007333 0.000000 844 +sept 0 17 4.110874 0.000000 952 +spars 0 16 4.174387 0.000000 989 +polynomi 0 14 4.317488 0.000000 1069 +embed 0 14 4.317488 0.000000 1102 +discret 0 13 4.382027 0.000000 1165 +speak 0 12 4.465908 0.000000 1283 +arbitrari 0 11 4.553877 0.000000 1359 +itali 0 11 4.553877 0.000000 1378 +packet 0 10 4.653960 0.000000 1415 +strongli 0 10 4.653960 0.000000 1406 +preliminari 0 9 4.753590 0.000000 1480 +congress 0 9 4.753590 0.000000 1592 +combinatori 0 8 4.875197 0.000000 1629 +hallcornel 0 8 4.875197 0.000000 1757 +integ 0 8 4.875197 0.000000 1688 +capac 0 8 4.875197 0.000000 1740 +switch 0 8 4.875197 0.000000 1718 +daughter 0 7 5.010635 0.000000 1943 +maxim 0 7 5.010635 0.000000 1944 +handbook 0 6 5.164786 0.000000 2061 +rebecca 0 6 5.164786 0.000000 2174 +dens 0 6 5.164786 0.000000 2122 +inequ 0 6 5.164786 0.000000 2113 +holland 0 5 5.347108 0.000000 2490 +oncomput 0 5 5.347108 0.000000 2326 +stoc 0 5 5.347108 0.000000 2491 +fraction 0 5 5.347108 0.000000 2259 +proceedingsof 0 5 5.347108 0.000000 2331 +combinator 0 4 5.568345 0.000000 2915 +disjoint 0 4 5.568345 0.000000 2709 +graham 0 4 5.568345 0.000000 2817 +cut 0 4 5.568345 0.000000 2620 +stein 0 3 5.857933 0.000000 3646 +planar 0 3 5.857933 0.000000 3647 +thegener 0 3 5.857933 0.000000 3648 +combinatorica 0 3 5.857933 0.000000 3649 +pack 0 3 5.857933 0.000000 3597 +violat 0 3 5.857933 0.000000 3211 +tokyo 0 3 5.857933 0.000000 3622 +netherland 0 3 5.857933 0.000000 3650 +tardo 1 2 6.263398 6.263398 5090 +multicommod 0 2 6.263398 0.000000 4761 +lovasz 0 2 6.263398 0.000000 5091 +goldberg 0 2 6.263398 0.000000 4313 +hopp 0 2 6.263398 0.000000 5092 +kleinberg 0 2 6.263398 0.000000 5093 +julia 0 2 6.263398 0.000000 5094 +broadli 0 2 6.263398 0.000000 5095 +programmingproblem 0 2 6.263398 0.000000 4082 +appearedin 0 2 6.263398 0.000000 5096 +leighton 0 2 6.263398 0.000000 5097 +inmathemat 0 2 6.263398 0.000000 5098 +hasappear 0 2 6.263398 0.000000 5099 +goeman 0 2 6.263398 0.000000 5100 +williamson 0 2 6.263398 0.000000 5101 +diamet 0 2 6.263398 0.000000 5102 +tarjan 0 2 6.263398 0.000000 4278 +ori 0 1 6.957497 0.000000 10412 +shmoi 0 1 6.957497 0.000000 10413 +plotkin 0 1 6.957497 0.000000 10414 +approximationalgorithm 0 1 6.957497 0.000000 10415 +klein 0 1 6.957497 0.000000 10416 +grotschel 0 1 6.957497 0.000000 10417 +tardosassoci 0 1 6.957497 0.000000 10418 +engineeringphon 0 1 6.957497 0.000000 10419 +researchrec 0 1 6.957497 0.000000 10420 +mostlywork 0 1 6.957497 0.000000 10421 +networkproblem 0 1 6.957497 0.000000 10422 +paperssurvei 0 1 6.957497 0.000000 10423 +cutratio 0 1 6.957497 0.000000 10424 +fasterapproxim 0 1 6.957497 0.000000 10425 +problemwith 0 1 6.957497 0.000000 10426 +makedon 0 1 6.957497 0.000000 10427 +tragouda 0 1 6.957497 0.000000 10428 +flowproblem 0 1 6.957497 0.000000 10429 +annualacm 0 1 6.957497 0.000000 10430 +thefound 0 1 6.957497 0.000000 10431 +designproblem 0 1 6.957497 0.000000 10432 +discretealgorithm 0 1 6.957497 0.000000 10433 +someevacu 0 1 6.957497 0.000000 10434 +ondiscret 0 1 6.957497 0.000000 10435 +quickest 0 1 6.957497 0.000000 10436 +transship 0 1 6.957497 0.000000 10437 +theproceed 0 1 6.957497 0.000000 10438 +steiner 0 1 6.957497 0.000000 10439 +multicut 0 1 6.957497 0.000000 10440 +pathsproblem 0 1 6.957497 0.000000 10441 +annualiee 0 1 6.957497 0.000000 10442 +rabani 0 1 6.957497 0.000000 10443 +fleischer 0 1 6.957497 0.000000 10444 +comb 0 1 6.957497 0.000000 10445 +ipco 0 1 6.957497 0.000000 10446 +kort 0 1 6.957497 0.000000 10447 +lovaszand 0 1 6.957497 0.000000 10448 +schrijver 0 1 6.957497 0.000000 10449 +inoptim 0 1 6.957497 0.000000 10450 +ofmathematician 0 1 6.957497 0.000000 10451 +kyoto 0 1 6.957497 0.000000 10452 +inproc 0 1 6.957497 0.000000 10453 +maastricht 0 1 6.957497 0.000000 10454 +networkoptim 0 1 6.957497 0.000000 10455 +netflow 0 1 6.957497 0.000000 10456 +miniato 0 1 6.957497 0.000000 10457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..c3898e70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +cornel 0 215 1.386294 0.000000 23 +california 0 46 3.091042 0.000000 388 +departmentcornel 0 5 5.347108 0.000000 2275 +franci 1 3 5.857933 5.857933 3287 +universitycomput 0 3 5.857933 0.000000 3651 +berkeleymathemat 0 1 6.957497 0.000000 10458 +departmentcomput 0 1 6.957497 0.000000 10459 +departmenthumorfcc 0 1 6.957497 0.000000 10460 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..592c7022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +world 1 115 2.197225 2.197225 126 +felix 1 2 6.263398 6.263398 5103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..d06dab64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +click 0 142 1.945910 0.000000 78 +report 0 131 2.079442 0.000000 92 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +person 0 111 2.197225 0.000000 117 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +homepag 0 93 2.397895 0.000000 148 +complet 0 77 2.564949 0.000000 208 +april 0 77 2.564949 0.000000 196 +upson 0 71 2.639057 0.000000 218 +test 0 66 2.708050 0.000000 252 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +complex 0 64 2.772589 0.000000 269 +juli 0 60 2.833213 0.000000 305 +algebra 0 45 3.135494 0.000000 394 +math 0 44 3.135494 0.000000 402 +semant 0 29 3.583519 0.000000 587 +universityithaca 0 24 3.761200 0.000000 710 +greg 0 24 3.761200 0.000000 695 +sciencecornel 0 22 3.850148 0.000000 768 +smith 1 20 3.951244 3.951244 820 +grad 0 20 3.951244 0.000000 837 +decid 0 14 4.317488 0.000000 1075 +food 0 12 4.465908 0.000000 1285 +sundai 0 10 4.653960 0.000000 1387 +morrisett 0 5 5.347108 0.000000 2263 +dexter 0 4 5.568345 0.000000 2855 +kozen 0 4 5.568345 0.000000 2619 +catch 0 4 5.568345 0.000000 2602 +halldepart 0 3 5.857933 0.000000 3641 +cohen 0 3 5.857933 0.000000 3652 +erni 0 2 6.263398 0.000000 5104 +epicuri 0 2 6.263398 0.000000 5105 +frederick 0 1 6.957497 0.000000 10461 +kleen 0 1 6.957497 0.000000 10462 +homepagefrederick 0 1 6.957497 0.000000 10463 +zine 0 1 6.957497 0.000000 10464 +cartalk 0 1 6.957497 0.000000 10465 +clack 0 1 6.957497 0.000000 10466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..31730a51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +phone 0 175 1.791759 0.000000 45 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +improv 0 62 2.772589 0.000000 289 +electron 0 47 3.091042 0.000000 379 +either 0 35 3.401197 0.000000 506 +post 0 35 3.401197 0.000000 505 +actual 0 28 3.610918 0.000000 604 +doctor 0 24 3.761200 0.000000 709 +happi 0 14 4.317488 0.000000 1079 +frank 1 9 4.753590 4.753590 1568 +matter 0 8 4.875197 0.000000 1627 +xerox 0 8 4.875197 0.000000 1725 +planet 0 4 5.568345 0.000000 2912 +adelstein 0 1 6.957497 0.000000 10467 +checkout 0 1 6.957497 0.000000 10468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..af16ce62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +model 0 145 1.945910 0.000000 69 +year 0 148 1.945910 0.000000 84 +postscript 0 131 2.079442 0.000000 90 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +peopl 0 96 2.302585 0.000000 132 +center 0 88 2.397895 0.000000 158 +section 0 94 2.397895 0.000000 149 +resum 0 79 2.564949 0.000000 217 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +previou 0 62 2.772589 0.000000 290 +visual 0 48 3.044522 0.000000 372 +http 0 41 3.218876 0.000000 420 +soon 0 36 3.367296 0.000000 494 +next 0 34 3.401197 0.000000 517 +anim 0 31 3.496508 0.000000 557 +berkelei 0 26 3.688879 0.000000 657 +other 0 24 3.761200 0.000000 697 +sciencecornel 0 22 3.850148 0.000000 768 +hobbi 0 16 4.174387 0.000000 1009 +mayb 0 15 4.248495 0.000000 1014 +drive 0 15 4.248495 0.000000 1052 +shown 0 14 4.317488 0.000000 1080 +massachusett 0 14 4.317488 0.000000 1118 +affili 0 13 4.382027 0.000000 1194 +hewlett 0 8 4.875197 0.000000 1709 +guitar 0 8 4.875197 0.000000 1758 +lawrenc 0 7 5.010635 0.000000 1908 +fred 1 6 5.164786 5.164786 2072 +photographi 0 6 5.164786 0.000000 2146 +feet 0 5 5.347108 0.000000 2492 +snail 0 4 5.568345 0.000000 2916 +yuan 0 3 5.857933 0.000000 3653 +chelmsford 0 3 5.857933 0.000000 3564 +binghamton 0 3 5.857933 0.000000 3544 +apollo 0 1 6.957497 0.000000 10469 +scramo 0 1 6.957497 0.000000 10470 +midi 0 1 6.957497 0.000000 10471 +choreograph 0 1 6.957497 0.000000 10472 +vpla 0 1 6.957497 0.000000 10473 +animationlink 0 1 6.957497 0.000000 10474 +packardlink 0 1 6.957497 0.000000 10475 +laboratoryinterest 0 1 6.957497 0.000000 10476 +cello 0 1 6.957497 0.000000 10477 +aquarium 0 1 6.957497 0.000000 10478 +burl 0 1 6.957497 0.000000 10479 +fredhsu 0 1 6.957497 0.000000 10480 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..c167fdbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +postscript 0 131 2.079442 0.000000 90 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +novemb 0 81 2.484907 0.000000 179 +chang 0 82 2.484907 0.000000 163 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +june 0 79 2.564949 0.000000 214 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +intellig 0 72 2.639057 0.000000 225 +involv 0 71 2.639057 0.000000 227 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +degre 0 69 2.708050 0.000000 259 +prof 0 64 2.772589 0.000000 273 +artifici 0 63 2.772589 0.000000 280 +back 0 60 2.833213 0.000000 297 +colleg 0 61 2.833213 0.000000 300 +major 0 56 2.890372 0.000000 315 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +undergradu 0 54 2.944439 0.000000 338 +still 0 50 3.044522 0.000000 362 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +better 0 45 3.135494 0.000000 401 +anoth 0 45 3.135494 0.000000 408 +long 0 43 3.178054 0.000000 413 +littl 0 39 3.258097 0.000000 454 +probabl 0 40 3.258097 0.000000 455 +brian 0 38 3.295837 0.000000 466 +mean 0 37 3.332205 0.000000 477 +michael 0 35 3.401197 0.000000 514 +approxim 0 35 3.401197 0.000000 509 +india 0 32 3.465736 0.000000 550 +taken 0 31 3.496508 0.000000 555 +actual 0 28 3.610918 0.000000 604 +hope 0 28 3.610918 0.000000 610 +never 0 25 3.737670 0.000000 671 +sciencecornel 0 22 3.850148 0.000000 768 +deal 0 22 3.850148 0.000000 736 +born 0 21 3.912023 0.000000 798 +leav 0 21 3.912023 0.000000 772 +smith 0 20 3.951244 0.000000 820 +wonder 0 20 3.951244 0.000000 815 +region 0 19 4.007333 0.000000 875 +miss 0 19 4.007333 0.000000 866 +four 0 18 4.060443 0.000000 905 +upon 0 16 4.174387 0.000000 978 +took 0 16 4.174387 0.000000 1010 +match 0 16 4.174387 0.000000 965 +goe 0 15 4.248495 0.000000 1044 +precis 0 15 4.248495 0.000000 1023 +earlier 0 13 4.382027 0.000000 1140 +someon 0 13 4.382027 0.000000 1128 +land 0 12 4.465908 0.000000 1273 +outsid 0 12 4.465908 0.000000 1219 +went 0 12 4.465908 0.000000 1279 +holidai 0 12 4.465908 0.000000 1224 +noth 0 11 4.553877 0.000000 1328 +light 0 9 4.753590 0.000000 1533 +vineet 0 8 4.875197 0.000000 1639 +pursu 0 7 5.010635 0.000000 1902 +seshadri 0 7 5.010635 0.000000 1803 +keshav 0 7 5.010635 0.000000 1852 +lucki 0 6 5.164786 0.000000 2163 +praveen 0 6 5.164786 0.000000 1996 +srinivasan 0 6 5.164786 0.000000 2175 +somewher 0 6 5.164786 0.000000 2176 +babi 0 5 5.347108 0.000000 2493 +interfer 0 5 5.347108 0.000000 2494 +greater 0 5 5.347108 0.000000 2258 +ashish 0 5 5.347108 0.000000 2473 +engineeringdepart 0 4 5.568345 0.000000 2917 +dive 0 3 5.857933 0.000000 3654 +straight 0 3 5.857933 0.000000 3655 +indira 0 3 5.857933 0.000000 3656 +karnataka 0 2 6.263398 0.000000 5106 +bharat 0 2 6.263398 0.000000 5107 +cute 0 2 6.263398 0.000000 5108 +incident 0 2 6.263398 0.000000 5109 +bangalor 0 2 6.263398 0.000000 5110 +that 0 2 6.263398 0.000000 5111 +conquer 0 2 6.263398 0.000000 5112 +aastha 0 2 6.263398 0.000000 5005 +ankit 0 2 6.263398 0.000000 4966 +deepak 0 1 6.957497 0.000000 10481 +balakrishna 0 1 6.957497 0.000000 10482 +balakrishnamast 0 1 6.957497 0.000000 10483 +resumeeducationcoursesperson 0 1 6.957497 0.000000 10484 +surathk 0 1 6.957497 0.000000 10485 +specialis 0 1 6.957497 0.000000 10486 +godfrei 0 1 6.957497 0.000000 10487 +chubbi 0 1 6.957497 0.000000 10488 +weigh 0 1 6.957497 0.000000 10489 +pound 0 1 6.957497 0.000000 10490 +divin 0 1 6.957497 0.000000 10491 +aishwarya 0 1 6.957497 0.000000 10492 +miniscul 0 1 6.957497 0.000000 10493 +krec 0 1 6.957497 0.000000 10494 +here 0 1 6.957497 0.000000 10495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..2227bcbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +sinc 0 90 2.397895 0.000000 159 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +solut 0 82 2.484907 0.000000 162 +educ 0 86 2.484907 0.000000 191 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +onlin 0 75 2.639057 0.000000 223 +multimedia 0 68 2.708050 0.000000 258 +degre 0 69 2.708050 0.000000 259 +plan 0 65 2.772589 0.000000 272 +unix 0 58 2.890372 0.000000 308 +music 0 42 3.218876 0.000000 436 +respons 0 37 3.332205 0.000000 476 +global 0 34 3.401197 0.000000 520 +administr 0 27 3.637586 0.000000 628 +leav 0 21 3.912023 0.000000 772 +bachelor 0 17 4.110874 0.000000 957 +warn 0 14 4.317488 0.000000 1068 +hopefulli 0 14 4.317488 0.000000 1071 +cricket 0 7 5.010635 0.000000 1945 +publicationsth 0 4 5.568345 0.000000 2859 +ghia 0 2 6.263398 0.000000 4934 +asif 0 2 6.263398 0.000000 4933 +mywww 0 2 6.263398 0.000000 5113 +uddin 0 1 6.957497 0.000000 10496 +ghiasasif 0 1 6.957497 0.000000 10497 +constructioni 0 1 6.957497 0.000000 10498 +karachi 0 1 6.957497 0.000000 10499 +pakistan 0 1 6.957497 0.000000 10500 +installationso 0 1 6.957497 0.000000 10501 +astronomyasif 0 1 6.957497 0.000000 10502 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..9423795f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +visit 0 63 2.772589 0.000000 288 +move 0 47 3.091042 0.000000 382 +http 0 41 3.218876 0.000000 420 +berkelei 0 26 3.688879 0.000000 657 +million 0 5 5.347108 0.000000 2495 +dglaser 0 1 6.957497 0.000000 10503 +htmlpleas 0 1 6.957497 0.000000 10504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..6d06c7bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,334 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +topic 0 114 2.197225 0.000000 110 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +sinc 0 90 2.397895 0.000000 159 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +help 0 83 2.484907 0.000000 175 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +know 0 80 2.564949 0.000000 198 +master 0 76 2.564949 0.000000 216 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +logic 0 71 2.639057 0.000000 230 +david 0 71 2.639057 0.000000 232 +symposium 0 72 2.639057 0.000000 238 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +servic 0 72 2.639057 0.000000 236 +receiv 0 66 2.708050 0.000000 244 +goal 0 66 2.708050 0.000000 250 +written 0 63 2.772589 0.000000 278 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +import 0 65 2.772589 0.000000 282 +colleg 0 61 2.833213 0.000000 300 +best 0 59 2.833213 0.000000 299 +plai 0 60 2.833213 0.000000 307 +content 0 59 2.833213 0.000000 302 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +tabl 0 51 2.995732 0.000000 346 +particular 0 51 2.995732 0.000000 352 +numer 0 49 3.044522 0.000000 369 +telephon 0 50 3.044522 0.000000 373 +give 0 50 3.044522 0.000000 359 +move 0 47 3.091042 0.000000 382 +understand 0 47 3.091042 0.000000 384 +effect 0 46 3.091042 0.000000 385 +math 0 44 3.135494 0.000000 402 +made 0 44 3.135494 0.000000 398 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +editor 0 41 3.218876 0.000000 433 +york 0 41 3.218876 0.000000 435 +edit 0 42 3.218876 0.000000 418 +howev 0 41 3.218876 0.000000 422 +announc 0 40 3.258097 0.000000 441 +programm 0 39 3.258097 0.000000 445 +societi 0 40 3.258097 0.000000 456 +author 0 39 3.258097 0.000000 450 +late 0 40 3.258097 0.000000 439 +paul 0 38 3.295837 0.000000 471 +vita 0 38 3.295837 0.000000 473 +correct 0 38 3.295837 0.000000 462 +open 0 38 3.295837 0.000000 469 +formal 0 37 3.332205 0.000000 478 +respons 0 37 3.332205 0.000000 476 +china 0 37 3.332205 0.000000 487 +short 0 36 3.367296 0.000000 499 +award 0 34 3.401197 0.000000 523 +survei 0 35 3.401197 0.000000 513 +return 0 34 3.401197 0.000000 502 +curriculum 0 33 3.433987 0.000000 535 +go 0 33 3.433987 0.000000 529 +articl 0 33 3.433987 0.000000 530 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +dissert 0 32 3.465736 0.000000 549 +concept 0 32 3.465736 0.000000 537 +taken 0 31 3.496508 0.000000 555 +abl 0 30 3.555348 0.000000 566 +produc 0 30 3.555348 0.000000 572 +chair 0 29 3.583519 0.000000 596 +semant 0 29 3.583519 0.000000 587 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +full 0 28 3.610918 0.000000 615 +usual 0 28 3.610918 0.000000 608 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +compar 0 26 3.688879 0.000000 648 +enjoi 0 26 3.688879 0.000000 660 +concern 0 25 3.737670 0.000000 666 +spent 0 25 3.737670 0.000000 676 +toward 0 25 3.737670 0.000000 668 +aspect 0 25 3.737670 0.000000 663 +sport 0 25 3.737670 0.000000 683 +universityithaca 0 24 3.761200 0.000000 710 +doctor 0 24 3.761200 0.000000 709 +known 0 24 3.761200 0.000000 702 +interpret 0 24 3.761200 0.000000 686 +methodolog 0 23 3.806662 0.000000 733 +proof 0 23 3.806662 0.000000 720 +serv 0 22 3.850148 0.000000 758 +period 0 22 3.850148 0.000000 743 +almost 0 22 3.850148 0.000000 742 +william 0 22 3.850148 0.000000 765 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +born 0 21 3.912023 0.000000 798 +programminglanguag 0 21 3.912023 0.000000 782 +fact 0 21 3.912023 0.000000 780 +busi 0 21 3.912023 0.000000 784 +hous 0 21 3.912023 0.000000 801 +tenni 0 20 3.951244 0.000000 838 +item 0 19 4.007333 0.000000 856 +left 0 19 4.007333 0.000000 851 +ever 0 19 4.007333 0.000000 872 +figur 0 18 4.060443 0.000000 903 +four 0 18 4.060443 0.000000 905 +stand 0 18 4.060443 0.000000 891 +stanford 0 17 4.110874 0.000000 955 +germani 0 17 4.110874 0.000000 946 +whether 0 17 4.110874 0.000000 918 +alreadi 0 16 4.174387 0.000000 963 +later 0 15 4.248495 0.000000 1043 +susan 0 15 4.248495 0.000000 1050 +contribut 0 15 4.248495 0.000000 1021 +rate 0 15 4.248495 0.000000 1037 +becam 0 14 4.317488 0.000000 1117 +latex 0 14 4.317488 0.000000 1064 +manner 0 14 4.317488 0.000000 1074 +conduct 0 14 4.317488 0.000000 1065 +wife 0 13 4.382027 0.000000 1196 +essenti 0 13 4.382027 0.000000 1137 +individu 0 13 4.382027 0.000000 1126 +believ 0 13 4.382027 0.000000 1187 +went 0 12 4.465908 0.000000 1279 +speak 0 12 4.465908 0.000000 1283 +grant 0 12 4.465908 0.000000 1216 +excit 0 11 4.553877 0.000000 1329 +fellowship 0 10 4.653960 0.000000 1460 +thecomput 0 10 4.653960 0.000000 1408 +end 0 9 4.753590 0.000000 1567 +respect 0 9 4.753590 0.000000 1545 +softbal 0 9 4.753590 0.000000 1594 +volleybal 0 9 4.753590 0.000000 1598 +swim 0 9 4.753590 0.000000 1599 +mention 0 9 4.753590 0.000000 1569 +lewi 0 8 4.875197 0.000000 1700 +hallcornel 0 8 4.875197 0.000000 1757 +guggenheim 0 8 4.875197 0.000000 1759 +told 0 8 4.875197 0.000000 1658 +joke 0 8 4.875197 0.000000 1620 +wire 0 8 4.875197 0.000000 1747 +illinoi 0 7 5.010635 0.000000 1941 +pagecomput 0 7 5.010635 0.000000 1900 +marri 0 7 5.010635 0.000000 1946 +notion 0 7 5.010635 0.000000 1947 +explain 0 7 5.010635 0.000000 1816 +snow 0 6 5.164786 0.000000 2031 +outstand 0 6 5.164786 0.000000 2136 +spare 0 6 5.164786 0.000000 2177 +golf 0 6 5.164786 0.000000 2178 +truth 0 6 5.164786 0.000000 2179 +elain 0 5 5.347108 0.000000 2496 +joseph 0 5 5.347108 0.000000 2327 +these 0 5 5.347108 0.000000 2482 +plant 0 5 5.347108 0.000000 2497 +feder 0 5 5.347108 0.000000 2266 +ofparallel 0 5 5.347108 0.000000 2380 +interfer 0 5 5.347108 0.000000 2494 +began 0 5 5.347108 0.000000 2498 +rewrit 0 5 5.347108 0.000000 2367 +sing 0 5 5.347108 0.000000 2499 +yield 0 5 5.347108 0.000000 2458 +proud 0 4 5.568345 0.000000 2918 +raman 0 4 5.568345 0.000000 2827 +queen 0 4 5.568345 0.000000 2919 +naval 0 4 5.568345 0.000000 2920 +birthdai 0 4 5.568345 0.000000 2800 +bloom 0 4 5.568345 0.000000 2913 +substanti 0 4 5.568345 0.000000 2921 +schneider 0 4 5.568345 0.000000 2868 +ping 0 4 5.568345 0.000000 2922 +gri 0 3 5.857933 0.000000 3569 +munich 0 3 5.857933 0.000000 3570 +twin 0 3 5.857933 0.000000 3657 +biographi 0 3 5.857933 0.000000 3658 +laugh 0 3 5.857933 0.000000 3659 +freshman 0 3 5.857933 0.000000 3462 +dimac 0 3 5.857933 0.000000 3574 +assistantship 0 3 5.857933 0.000000 3660 +langaug 0 3 5.857933 0.000000 3661 +blind 0 3 5.857933 0.000000 3662 +serious 0 3 5.857933 0.000000 3663 +researchassoci 0 3 5.857933 0.000000 3664 +pong 0 3 5.857933 0.000000 3371 +audienc 0 3 5.857933 0.000000 3180 +pagedavid 0 2 6.263398 0.000000 5114 +sophomor 0 2 6.263398 0.000000 4695 +polya 0 2 6.263398 0.000000 4939 +weapon 0 2 6.263398 0.000000 5115 +degreein 0 2 6.263398 0.000000 5116 +manfr 0 2 6.263398 0.000000 4949 +bauer 0 2 6.263398 0.000000 5117 +cake 0 2 6.263398 0.000000 5118 +booth 0 2 6.263398 0.000000 5119 +theamerican 0 2 6.263398 0.000000 5120 +afip 0 2 6.263398 0.000000 4300 +andt 0 2 6.263398 0.000000 5121 +spoken 0 2 6.263398 0.000000 5122 +researchinterest 0 2 6.263398 0.000000 5123 +acta 0 2 6.263398 0.000000 5124 +informatica 0 2 6.263398 0.000000 5125 +andtool 0 2 6.263398 0.000000 5126 +grieswilliam 0 1 6.957497 0.000000 10505 +engineeringdr 0 1 6.957497 0.000000 10506 +formaldevelop 0 1 6.957497 0.000000 10507 +asinterest 0 1 6.957497 0.000000 10508 +researchin 0 1 6.957497 0.000000 10509 +taughta 0 1 6.957497 0.000000 10510 +anoverrid 0 1 6.957497 0.000000 10511 +edushort 0 1 6.957497 0.000000 10512 +griesi 0 1 6.957497 0.000000 10513 +flush 0 1 6.957497 0.000000 10514 +iescap 0 1 6.957497 0.000000 10515 +workfor 0 1 6.957497 0.000000 10516 +civilian 0 1 6.957497 0.000000 10517 +amathematician 0 1 6.957497 0.000000 10518 +fewmonth 0 1 6.957497 0.000000 10519 +twogerman 0 1 6.957497 0.000000 10520 +ruedig 0 1 6.957497 0.000000 10521 +wiehl 0 1 6.957497 0.000000 10522 +algol 0 1 6.957497 0.000000 10523 +compilerfor 0 1 6.957497 0.000000 10524 +implementrecurs 0 1 6.957497 0.000000 10525 +stoer 0 1 6.957497 0.000000 10526 +wasin 0 1 6.957497 0.000000 10527 +notyet 0 1 6.957497 0.000000 10528 +kosher 0 1 6.957497 0.000000 10529 +thebirthdai 0 1 6.957497 0.000000 10530 +intown 0 1 6.957497 0.000000 10531 +whichha 0 1 6.957497 0.000000 10532 +wasdepart 0 1 6.957497 0.000000 10533 +lewisprofessor 0 1 6.957497 0.000000 10534 +contentsi 0 1 6.957497 0.000000 10535 +mytext 0 1 6.957497 0.000000 10536 +writingand 0 1 6.957497 0.000000 10537 +thewond 0 1 6.957497 0.000000 10538 +wherey 0 1 6.957497 0.000000 10539 +contributionsto 0 1 6.957497 0.000000 10540 +sigcseaward 0 1 6.957497 0.000000 10541 +clarkaward 0 1 6.957497 0.000000 10542 +advise 0 1 6.957497 0.000000 10543 +susanowicki 0 1 6.957497 0.000000 10544 +laid 0 1 6.957497 0.000000 10545 +freeness 0 1 6.957497 0.000000 10546 +bestpap 0 1 6.957497 0.000000 10547 +sthesi 0 1 6.957497 0.000000 10548 +designedand 0 1 6.957497 0.000000 10549 +printedor 0 1 6.957497 0.000000 10550 +speakmathemat 0 1 6.957497 0.000000 10551 +audiocassett 0 1 6.957497 0.000000 10552 +officein 0 1 6.957497 0.000000 10553 +taulbe 0 1 6.957497 0.000000 10554 +responsesfrom 0 1 6.957497 0.000000 10555 +noother 0 1 6.957497 0.000000 10556 +itrequir 0 1 6.957497 0.000000 10557 +sendin 0 1 6.957497 0.000000 10558 +questionnair 0 1 6.957497 0.000000 10559 +forchair 0 1 6.957497 0.000000 10560 +andrespons 0 1 6.957497 0.000000 10561 +takean 0 1 6.957497 0.000000 10562 +willsuggest 0 1 6.957497 0.000000 10563 +servewher 0 1 6.957497 0.000000 10564 +fredb 0 1 6.957497 0.000000 10565 +andmonograph 0 1 6.957497 0.000000 10566 +isplit 0 1 6.957497 0.000000 10567 +pant 0 1 6.957497 0.000000 10568 +alectur 0 1 6.957497 0.000000 10569 +turnedaround 0 1 6.957497 0.000000 10570 +spoke 0 1 6.957497 0.000000 10571 +everyonelaugh 0 1 6.957497 0.000000 10572 +justsaid 0 1 6.957497 0.000000 10573 +barbershop 0 1 6.957497 0.000000 10574 +andgilbert 0 1 6.957497 0.000000 10575 +sullivan 0 1 6.957497 0.000000 10576 +carpentri 0 1 6.957497 0.000000 10577 +remodel 0 1 6.957497 0.000000 10578 +considerablesatisfact 0 1 6.957497 0.000000 10579 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..5778679b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +first 0 140 1.945910 0.000000 71 +learn 0 86 2.484907 0.000000 170 +html 0 75 2.639057 0.000000 235 +week 0 52 2.995732 0.000000 343 +express 0 32 3.465736 0.000000 540 +sciencecornel 0 22 3.850148 0.000000 768 +shop 0 10 4.653960 0.000000 1469 +bore 0 7 5.010635 0.000000 1948 +alex 0 6 5.164786 0.000000 2130 +grinzayd 1 1 6.957497 6.957497 10580 +homepagealex 0 1 6.957497 0.000000 10581 +grinzaydm 0 1 6.957497 0.000000 10582 +universitytel 0 1 6.957497 0.000000 10583 +necx 0 1 6.957497 0.000000 10584 +directinternet 0 1 6.957497 0.000000 10585 +networkcomput 0 1 6.957497 0.000000 10586 +damarkwarn 0 1 6.957497 0.000000 10587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..61f05e0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +homepag 0 93 2.397895 0.000000 148 +second 0 81 2.484907 0.000000 166 +activ 0 84 2.484907 0.000000 182 +novemb 0 81 2.484907 0.000000 179 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +involv 0 71 2.639057 0.000000 227 +degre 0 69 2.708050 0.000000 259 +ithaca 0 65 2.772589 0.000000 294 +sever 0 56 2.890372 0.000000 322 +advisor 0 51 2.995732 0.000000 355 +york 0 41 3.218876 0.000000 435 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +split 0 14 4.317488 0.000000 1078 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +scienceat 0 11 4.553877 0.000000 1375 +charg 0 9 4.753590 0.000000 1582 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +cuc 0 4 5.568345 0.000000 2630 +poland 0 3 5.857933 0.000000 3665 +grze 0 1 6.957497 0.000000 10588 +czajkowskidepart 0 1 6.957497 0.000000 10589 +krakow 0 1 6.957497 0.000000 10590 +administ 0 1 6.957497 0.000000 10591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..f2e4b311 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +like 0 132 1.945910 0.000000 81 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +teach 0 108 2.197225 0.000000 112 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +check 0 115 2.197225 0.000000 118 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +activ 0 84 2.484907 0.000000 182 +resum 0 79 2.564949 0.000000 217 +upson 0 71 2.639057 0.000000 218 +knowledg 0 67 2.708050 0.000000 243 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +septemb 0 65 2.772589 0.000000 274 +best 0 59 2.833213 0.000000 299 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +talk 0 53 2.944439 0.000000 336 +case 0 51 2.995732 0.000000 351 +give 0 50 3.044522 0.000000 359 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +describ 0 45 3.135494 0.000000 400 +continu 0 39 3.258097 0.000000 448 +probabl 0 40 3.258097 0.000000 455 +field 0 37 3.332205 0.000000 482 +game 0 36 3.367296 0.000000 498 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +scientist 0 31 3.496508 0.000000 560 +focus 0 29 3.583519 0.000000 584 +semant 0 29 3.583519 0.000000 587 +subject 0 26 3.688879 0.000000 647 +although 0 25 3.737670 0.000000 667 +princeton 0 15 4.248495 0.000000 1042 +econom 0 13 4.382027 0.000000 1184 +someon 0 13 4.382027 0.000000 1128 +mainli 0 10 4.653960 0.000000 1432 +sentenc 0 10 4.653960 0.000000 1413 +uncertainti 0 7 5.010635 0.000000 1882 +boundari 0 7 5.010635 0.000000 1929 +gave 0 7 5.010635 0.000000 1922 +philosoph 0 7 5.010635 0.000000 1904 +li 0 5 5.347108 0.000000 2500 +hallithaca 0 4 5.568345 0.000000 2894 +universitycomput 0 3 5.857933 0.000000 3651 +halpern 1 1 6.957497 6.957497 10592 +pagejoseph 0 1 6.957497 0.000000 10593 +professorcornel 0 1 6.957497 0.000000 10594 +economist 0 1 6.957497 0.000000 10595 +abouta 0 1 6.957497 0.000000 10596 +sequel 0 1 6.957497 0.000000 10597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..0b1bf2a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +cornel 0 215 1.386294 0.000000 23 +construct 0 139 1.945910 0.000000 82 +tsuneshi 1 1 6.957497 6.957497 10598 +hashimoto 1 1 6.957497 6.957497 10599 +hashimototsuneshi 0 1 6.957497 0.000000 10600 +hashimotothi 0 1 6.957497 0.000000 10601 +cstsuneshi 0 1 6.957497 0.000000 10602 +hashi 0 1 6.957497 0.000000 10603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..f2d54f9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +teach 0 108 2.197225 0.000000 112 +commun 0 95 2.397895 0.000000 157 +novemb 0 81 2.484907 0.000000 179 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +unix 0 58 2.890372 0.000000 308 +mark 0 44 3.135494 0.000000 403 +proof 0 23 3.806662 0.000000 720 +horu 0 14 4.317488 0.000000 1116 +nuprl 0 10 4.653960 0.000000 1402 +hockei 0 8 4.875197 0.000000 1760 +hayden 1 4 5.568345 5.568345 2844 +tast 0 3 5.857933 0.000000 3666 +ensembl 0 2 6.263398 0.000000 4854 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..8100a368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +improv 0 62 2.772589 0.000000 289 +cyber 0 4 5.568345 0.000000 2909 +pond 0 2 6.263398 0.000000 5127 +heji 1 1 6.957497 6.957497 10604 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..0b8424be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +year 0 148 1.945910 0.000000 84 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +faculti 0 56 2.890372 0.000000 325 +undergradu 0 54 2.944439 0.000000 338 +advisor 0 51 2.995732 0.000000 355 +third 0 43 3.178054 0.000000 412 +china 0 37 3.332205 0.000000 487 +berkelei 0 26 3.688879 0.000000 657 +born 0 21 3.912023 0.000000 798 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +hallithaca 0 4 5.568345 0.000000 2894 +shanghai 0 4 5.568345 0.000000 2925 +universitydept 0 3 5.857933 0.000000 3602 +deyu 0 1 6.957497 0.000000 10606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..caf9e7c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +mathemat 0 108 2.197225 0.000000 123 +check 0 115 2.197225 0.000000 118 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +homepag 0 93 2.397895 0.000000 148 +academ 0 82 2.484907 0.000000 178 +learn 0 86 2.484907 0.000000 170 +resourc 0 81 2.484907 0.000000 172 +master 0 76 2.564949 0.000000 216 +optim 0 79 2.564949 0.000000 197 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +bibliographi 0 34 3.401197 0.000000 518 +chines 0 29 3.583519 0.000000 595 +retriev 0 27 3.637586 0.000000 621 +motion 0 24 3.761200 0.000000 699 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +annot 0 21 3.912023 0.000000 775 +beij 0 19 4.007333 0.000000 876 +track 0 15 4.248495 0.000000 1029 +tsinghua 0 13 4.382027 0.000000 1195 +huang 0 12 4.465908 0.000000 1202 +thedepart 0 11 4.553877 0.000000 1350 +scienceat 0 11 4.553877 0.000000 1375 +fellowship 0 10 4.653960 0.000000 1460 +christian 0 7 5.010635 0.000000 1949 +ramin 0 7 5.010635 0.000000 1820 +zabih 0 6 5.164786 0.000000 2138 +mission 0 5 5.347108 0.000000 2465 +jing 0 3 5.857933 0.000000 3521 +bachelorand 0 2 6.263398 0.000000 5128 +chinami 0 2 6.263398 0.000000 5129 +evangel 0 1 6.957497 0.000000 10605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..027baa23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +graphic 0 90 2.397895 0.000000 147 +resum 0 79 2.564949 0.000000 217 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +china 0 37 3.332205 0.000000 487 +chen 0 21 3.912023 0.000000 791 +taiwan 0 16 4.174387 0.000000 1006 +practicum 0 16 4.174387 0.000000 960 +mapl 0 11 4.553877 0.000000 1376 +perman 0 11 4.553877 0.000000 1372 +sung 0 6 5.164786 0.000000 2075 +chin 0 5 5.347108 0.000000 2408 +taipei 0 4 5.568345 0.000000 2926 +album 0 4 5.568345 0.000000 2888 +icchen 0 1 6.957497 0.000000 10607 +nctu 0 1 6.957497 0.000000 10608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..2c008575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +network 0 168 1.791759 0.000000 61 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +advanc 0 99 2.302585 0.000000 130 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +post 0 35 3.401197 0.000000 505 +script 0 13 4.382027 0.000000 1171 +indira 0 3 5.857933 0.000000 3656 +malik 0 1 6.957497 0.000000 10609 +imalik 0 1 6.957497 0.000000 10610 +tap 0 1 6.957497 0.000000 10611 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..73239a21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +graphic 0 90 2.397895 0.000000 147 +homepag 0 93 2.397895 0.000000 148 +java 0 70 2.708050 0.000000 248 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +polici 0 64 2.772589 0.000000 279 +colleg 0 61 2.833213 0.000000 300 +cool 0 49 3.044522 0.000000 374 +directori 0 45 3.135494 0.000000 396 +india 0 32 3.465736 0.000000 550 +independ 0 32 3.465736 0.000000 548 +taken 0 31 3.496508 0.000000 555 +anim 0 31 3.496508 0.000000 557 +sciencecornel 0 22 3.850148 0.000000 768 +indian 0 22 3.850148 0.000000 769 +applet 0 20 3.951244 0.000000 827 +toolkit 0 20 3.951244 0.000000 835 +practicum 0 16 4.174387 0.000000 960 +drive 0 15 4.248495 0.000000 1052 +camera 0 14 4.317488 0.000000 1115 +audio 0 14 4.317488 0.000000 1094 +galleri 0 13 4.382027 0.000000 1192 +magic 0 11 4.553877 0.000000 1358 +wood 0 11 4.553877 0.000000 1355 +perspect 0 10 4.653960 0.000000 1437 +hoca 0 5 5.347108 0.000000 2241 +hobb 0 4 5.568345 0.000000 2893 +nashvil 0 4 5.568345 0.000000 2867 +tennesse 0 4 5.568345 0.000000 2763 +indira 0 3 5.857933 0.000000 3656 +engineeringclass 0 3 5.857933 0.000000 3667 +recip 0 3 5.857933 0.000000 3668 +coimbator 0 2 6.263398 0.000000 5130 +cornelluniversityfal 0 2 6.263398 0.000000 5131 +cspracticum 0 2 6.263398 0.000000 5132 +carpet 0 2 6.263398 0.000000 5133 +colloqium 0 2 6.263398 0.000000 5134 +manageri 0 2 6.263398 0.000000 5135 +vidyaprakash 0 1 6.957497 0.000000 10612 +vidyaprakashmast 0 1 6.957497 0.000000 10613 +universitywelcom 0 1 6.957497 0.000000 10614 +financesumm 0 1 6.957497 0.000000 10615 +tracingin 0 1 6.957497 0.000000 10616 +perspectivetransform 0 1 6.957497 0.000000 10617 +myresumeclick 0 1 6.957497 0.000000 10618 +transformssom 0 1 6.957497 0.000000 10619 +sgamelan 0 1 6.957497 0.000000 10620 +calvinand 0 1 6.957497 0.000000 10621 +gif 0 1 6.957497 0.000000 10622 +chicker 0 1 6.957497 0.000000 10623 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..19eb6927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +includ 0 208 1.609438 0.000000 42 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +code 0 108 2.197225 0.000000 116 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +section 0 94 2.397895 0.000000 149 +environ 0 84 2.484907 0.000000 177 +start 0 83 2.484907 0.000000 173 +solut 0 82 2.484907 0.000000 162 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +homework 0 79 2.564949 0.000000 193 +write 1 72 2.639057 2.639057 222 +test 0 66 2.708050 0.000000 252 +knowledg 0 67 2.708050 0.000000 243 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +virtual 0 62 2.772589 0.000000 285 +detail 0 57 2.890372 0.000000 321 +index 0 56 2.890372 0.000000 309 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +cover 0 55 2.944439 0.000000 329 +much 0 52 2.995732 0.000000 349 +week 0 52 2.995732 0.000000 343 +set 0 50 3.044522 0.000000 361 +right 0 48 3.044522 0.000000 363 +get 0 46 3.091042 0.000000 380 +directori 0 45 3.135494 0.000000 396 +video 0 44 3.135494 0.000000 405 +protocol 0 45 3.135494 0.000000 407 +http 0 41 3.218876 0.000000 420 +programm 0 39 3.258097 0.000000 445 +tutori 0 39 3.258097 0.000000 437 +brian 0 38 3.295837 0.000000 466 +prototyp 0 38 3.295837 0.000000 463 +short 0 36 3.367296 0.000000 499 +multi 0 36 3.367296 0.000000 493 +manual 1 35 3.401197 3.401197 504 +packag 0 28 3.610918 0.000000 614 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +smith 0 20 3.951244 0.000000 820 +mpeg 0 20 3.951244 0.000000 831 +media 0 19 4.007333 0.000000 861 +repositori 0 17 4.110874 0.000000 932 +doesn 0 15 4.248495 0.000000 1055 +remot 0 15 4.248495 0.000000 1041 +script 0 13 4.382027 0.000000 1171 +suit 0 13 4.382027 0.000000 1129 +realiti 0 12 4.465908 0.000000 1272 +guidelin 0 7 5.010635 0.000000 1832 +conferenc 0 7 5.010635 0.000000 1857 +put 0 6 5.164786 0.000000 2017 +valuabl 0 5 5.347108 0.000000 2256 +templat 0 5 5.347108 0.000000 2311 +spam 0 4 5.568345 0.000000 2927 +knowledgebas 0 2 6.263398 0.000000 5136 +pageioi 0 1 6.957497 0.000000 10624 +homeless 0 1 6.957497 0.000000 10625 +lamioi 0 1 6.957497 0.000000 10626 +multim 0 1 6.957497 0.000000 10627 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..1ca1dd33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +relat 0 139 1.945910 0.000000 68 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +world 0 115 2.197225 0.000000 126 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +proceed 0 93 2.397895 0.000000 152 +commun 0 95 2.397895 0.000000 157 +info 0 85 2.484907 0.000000 176 +environ 0 84 2.484907 0.000000 177 +wide 0 84 2.484907 0.000000 185 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +workshop 0 71 2.639057 0.000000 239 +summari 0 73 2.639057 0.000000 237 +abstract 0 62 2.772589 0.000000 276 +ithaca 0 65 2.772589 0.000000 294 +collect 0 65 2.772589 0.000000 268 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +explor 0 58 2.890372 0.000000 324 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +format 0 48 3.044522 0.000000 356 +basic 0 50 3.044522 0.000000 360 +still 0 50 3.044522 0.000000 362 +algebra 0 45 3.135494 0.000000 394 +editor 0 41 3.218876 0.000000 433 +http 0 41 3.218876 0.000000 420 +live 0 40 3.258097 0.000000 451 +paul 0 38 3.295837 0.000000 471 +correct 0 38 3.295837 0.000000 462 +formal 0 37 3.332205 0.000000 478 +next 0 34 3.401197 0.000000 517 +post 0 35 3.401197 0.000000 505 +full 0 28 3.610918 0.000000 615 +load 0 28 3.610918 0.000000 601 +enhanc 0 26 3.688879 0.000000 644 +session 0 26 3.688879 0.000000 643 +doctor 0 24 3.761200 0.000000 709 +sometim 0 24 3.761200 0.000000 696 +proof 0 23 3.806662 0.000000 720 +methodolog 0 23 3.806662 0.000000 733 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +dai 0 22 3.850148 0.000000 753 +theorem 0 21 3.912023 0.000000 786 +synthesi 0 20 3.951244 0.000000 834 +toolkit 0 20 3.951244 0.000000 835 +prove 0 19 4.007333 0.000000 848 +north 0 19 4.007333 0.000000 873 +els 0 19 4.007333 0.000000 843 +hypertext 0 19 4.007333 0.000000 865 +definit 0 19 4.007333 0.000000 864 +coupl 0 17 4.110874 0.000000 939 +month 0 15 4.248495 0.000000 1025 +circuit 0 13 4.382027 0.000000 1131 +someon 0 13 4.382027 0.000000 1128 +moment 0 11 4.553877 0.000000 1379 +nuprl 0 10 4.653960 0.000000 1402 +usaphon 0 9 4.753590 0.000000 1600 +entitl 0 9 4.753590 0.000000 1490 +inter 0 9 4.753590 0.000000 1530 +float 0 9 4.753590 0.000000 1504 +prover 0 8 4.875197 0.000000 1653 +attent 0 8 4.875197 0.000000 1651 +ifip 0 5 5.347108 0.000000 2459 +holland 0 5 5.347108 0.000000 2490 +jackson 1 3 5.857933 5.857933 3586 +pagepaul 0 3 5.857933 0.000000 3669 +bout 0 3 5.857933 0.000000 3670 +elsevi 0 3 5.857933 0.000000 3671 +pai 0 3 5.857933 0.000000 3672 +shouldb 0 3 5.857933 0.000000 3673 +associatecornel 0 2 6.263398 0.000000 5137 +eduwww 0 2 6.263398 0.000000 5138 +linkag 0 2 6.263398 0.000000 5139 +thenuprl 0 2 6.263398 0.000000 5047 +workon 0 2 6.263398 0.000000 4280 +htmladdress 0 1 6.957497 0.000000 10628 +intereststheorem 0 1 6.957497 0.000000 10629 +andhardwar 0 1 6.957497 0.000000 10630 +informationmi 0 1 6.957497 0.000000 10631 +developmentsystem 0 1 6.957497 0.000000 10632 +bundi 0 1 6.957497 0.000000 10633 +automateddeduct 0 1 6.957497 0.000000 10634 +artif 0 1 6.957497 0.000000 10635 +stavrid 0 1 6.957497 0.000000 10636 +melham 0 1 6.957497 0.000000 10637 +transactionsa 0 1 6.957497 0.000000 10638 +theadvanc 0 1 6.957497 0.000000 10639 +nuprlth 0 1 6.957497 0.000000 10640 +getround 0 1 6.957497 0.000000 10641 +thetheori 0 1 6.957497 0.000000 10642 +foreach 0 1 6.957497 0.000000 10643 +andtheorem 0 1 6.957497 0.000000 10644 +thepolynomi 0 1 6.957497 0.000000 10645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..25bda0fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +hani 1 2 6.263398 6.263398 5140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..49f1919c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +perman 0 11 4.553877 0.000000 1372 +usaoffic 0 6 5.164786 0.000000 2159 +janosi 0 3 5.857933 0.000000 3149 +mywww 0 2 6.263398 0.000000 5113 +tibor 0 1 6.957497 0.000000 10646 +jnositibor 0 1 6.957497 0.000000 10647 +jnosiwelcom 0 1 6.957497 0.000000 10648 +constructionoffic 0 1 6.957497 0.000000 10649 +zenotibor 0 1 6.957497 0.000000 10650 +jnosi 0 1 6.957497 0.000000 10651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..8c91e64b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +address 0 170 1.791759 0.000000 62 +master 0 76 2.564949 0.000000 216 +ithaca 0 65 2.772589 0.000000 294 +telephon 0 50 3.044522 0.000000 373 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +janwun 1 1 6.957497 6.957497 10652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..ad76b99b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +implement 0 152 1.791759 0.000000 52 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +construct 0 139 1.945910 0.000000 82 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +version 0 113 2.197225 0.000000 122 +code 0 108 2.197225 0.000000 116 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +member 0 84 2.484907 0.000000 165 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +state 0 76 2.564949 0.000000 207 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +abstract 0 62 2.772589 0.000000 276 +evalu 0 64 2.772589 0.000000 266 +septemb 0 65 2.772589 0.000000 274 +function 0 62 2.772589 0.000000 275 +januari 0 62 2.772589 0.000000 264 +type 0 61 2.833213 0.000000 296 +content 0 59 2.833213 0.000000 302 +juli 0 60 2.833213 0.000000 305 +publish 0 57 2.890372 0.000000 326 +direct 0 57 2.890372 0.000000 316 +faculti 0 56 2.890372 0.000000 325 +thesi 0 57 2.890372 0.000000 327 +tabl 0 51 2.995732 0.000000 346 +standard 0 48 3.044522 0.000000 365 +principl 0 48 3.044522 0.000000 357 +mark 0 44 3.135494 0.000000 403 +late 0 40 3.258097 0.000000 439 +annual 0 40 3.258097 0.000000 458 +bibliographi 0 34 3.401197 0.000000 518 +extend 0 32 3.465736 0.000000 539 +ad 0 32 3.465736 0.000000 544 +robert 0 30 3.555348 0.000000 567 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +semant 0 29 3.583519 0.000000 587 +focus 0 29 3.583519 0.000000 584 +platform 0 29 3.583519 0.000000 591 +proc 0 26 3.688879 0.000000 649 +primari 0 25 3.737670 0.000000 669 +greg 0 24 3.761200 0.000000 695 +interpret 0 24 3.761200 0.000000 686 +store 0 24 3.761200 0.000000 693 +thread 0 23 3.806662 0.000000 722 +cooper 0 22 3.850148 0.000000 757 +portabl 0 20 3.951244 0.000000 819 +andrew 0 19 4.007333 0.000000 849 +particularli 0 19 4.007333 0.000000 867 +eric 0 19 4.007333 0.000000 870 +concentr 0 18 4.060443 0.000000 906 +less 0 18 4.060443 0.000000 892 +partial 0 18 4.060443 0.000000 900 +fourth 0 16 4.174387 0.000000 999 +diego 0 16 4.174387 0.000000 992 +princeton 0 15 4.248495 0.000000 1042 +francisco 0 14 4.317488 0.000000 1095 +sigplan 0 13 4.382027 0.000000 1190 +conf 0 13 4.382027 0.000000 1181 +mellon 0 13 4.382027 0.000000 1179 +onth 0 12 4.465908 0.000000 1218 +carnegi 0 12 4.465908 0.000000 1260 +faster 0 11 4.553877 0.000000 1323 +refin 0 11 4.553877 0.000000 1363 +road 0 11 4.553877 0.000000 1374 +cheng 0 10 4.653960 0.000000 1381 +interestsmi 0 10 4.653960 0.000000 1462 +operatingsystem 0 10 4.653960 0.000000 1401 +bring 0 10 4.653960 0.000000 1430 +jersei 0 9 4.753590 0.000000 1587 +lock 0 9 4.753590 0.000000 1551 +closur 0 8 4.875197 0.000000 1643 +convers 0 8 4.875197 0.000000 1673 +andcomput 0 8 4.875197 0.000000 1623 +leon 0 8 4.875197 0.000000 1631 +hack 0 7 5.010635 0.000000 1950 +bit 0 7 5.010635 0.000000 1833 +gzip 0 6 5.164786 0.000000 2117 +morrisett 0 5 5.347108 0.000000 2263 +interestedin 0 5 5.347108 0.000000 2260 +consum 0 5 5.347108 0.000000 2334 +optimist 0 5 5.347108 0.000000 2501 +gregori 0 4 5.568345 0.000000 2928 +polymorph 0 4 5.568345 0.000000 2627 +kept 0 4 5.568345 0.000000 2762 +stone 0 3 5.857933 0.000000 3674 +informationresearch 0 3 5.857933 0.000000 3675 +teachingc 0 3 5.857933 0.000000 3614 +denmark 0 3 5.857933 0.000000 3676 +warren 0 3 5.857933 0.000000 3301 +harper 0 2 6.263398 0.000000 5141 +multiprocess 0 2 6.263398 0.000000 5142 +intereststeachingselect 0 2 6.263398 0.000000 4924 +linksperson 0 2 6.263398 0.000000 5143 +herlihi 0 2 6.263398 0.000000 5144 +copenhagen 0 2 6.263398 0.000000 5145 +tarditi 0 1 6.957497 0.000000 10653 +tolmach 0 1 6.957497 0.000000 10654 +papersrel 0 1 6.957497 0.000000 10655 +ofadvanc 0 1 6.957497 0.000000 10656 +forbuild 0 1 6.957497 0.000000 10657 +safelanguag 0 1 6.957497 0.000000 10658 +toolsfrom 0 1 6.957497 0.000000 10659 +systemssoftwar 0 1 6.957497 0.000000 10660 +paperssemant 0 1 6.957497 0.000000 10661 +safetythrough 0 1 6.957497 0.000000 10662 +yasuhiko 0 1 6.957497 0.000000 10663 +minamid 0 1 6.957497 0.000000 10664 +matthia 0 1 6.957497 0.000000 10665 +felleisen 0 1 6.957497 0.000000 10666 +reportcmu 0 1 6.957497 0.000000 10667 +notecmu 0 1 6.957497 0.000000 10668 +intensionaltyp 0 1 6.957497 0.000000 10669 +parallelizationgreg 0 1 6.957497 0.000000 10670 +mauric 0 1 6.957497 0.000000 10671 +scienceperson 0 1 6.957497 0.000000 10672 +informationhom 0 1 6.957497 0.000000 10673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..51d88e87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +address 1 170 1.791759 1.791759 62 +resum 0 79 2.564949 0.000000 217 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +taiwan 0 16 4.174387 0.000000 1006 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +perman 0 11 4.553877 0.000000 1372 +taipei 0 4 5.568345 0.000000 2926 +shing 0 2 6.263398 0.000000 5146 +jiun 1 1 6.957497 6.957497 10674 +jhlin 0 1 6.957497 0.000000 10675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..d7707cfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +homepag 0 93 2.397895 0.000000 148 +jerri 1 3 5.857933 5.857933 3445 +edujerri 0 1 6.957497 0.000000 10676 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..0d45118d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +phone 0 175 1.791759 0.000000 45 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +mathemat 0 108 2.197225 0.000000 123 +site 0 106 2.197225 0.000000 119 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +educ 0 86 2.484907 0.000000 191 +school 0 84 2.484907 0.000000 188 +ieee 0 86 2.484907 0.000000 190 +state 0 76 2.564949 0.000000 207 +nation 0 74 2.639057 0.000000 240 +servic 0 72 2.639057 0.000000 236 +upson 0 71 2.639057 0.000000 218 +degre 0 69 2.708050 0.000000 259 +window 0 68 2.708050 0.000000 242 +new 0 64 2.772589 0.000000 262 +foundat 0 62 2.772589 0.000000 286 +guid 0 63 2.772589 0.000000 267 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +cool 0 49 3.044522 0.000000 374 +directori 0 45 3.135494 0.000000 396 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +review 0 42 3.218876 0.000000 425 +futur 0 41 3.218876 0.000000 427 +societi 0 40 3.258097 0.000000 456 +china 1 37 3.332205 3.332205 487 +award 0 34 3.401197 0.000000 523 +chines 0 29 3.583519 0.000000 595 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +magazin 0 24 3.761200 0.000000 704 +daili 0 24 3.761200 0.000000 706 +yahoo 0 24 3.761200 0.000000 707 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +wang 0 21 3.912023 0.000000 790 +beij 0 19 4.007333 0.000000 876 +histori 0 19 4.007333 0.000000 853 +taiwan 0 16 4.174387 0.000000 1006 +transfer 0 16 4.174387 0.000000 967 +rank 0 14 4.317488 0.000000 1063 +incomput 0 14 4.317488 0.000000 1096 +entertain 0 12 4.465908 0.000000 1286 +tour 0 11 4.553877 0.000000 1307 +america 0 11 4.553877 0.000000 1370 +fellowship 0 10 4.653960 0.000000 1460 +sister 0 9 4.753590 0.000000 1524 +film 0 8 4.875197 0.000000 1761 +tourist 0 8 4.875197 0.000000 1710 +digest 0 7 5.010635 0.000000 1864 +cultur 0 7 5.010635 0.000000 1951 +monei 0 7 5.010635 0.000000 1934 +peterson 0 7 5.010635 0.000000 1850 +christian 0 7 5.010635 0.000000 1949 +scholar 0 6 5.164786 0.000000 2180 +forum 0 6 5.164786 0.000000 2027 +postcard 0 6 5.164786 0.000000 2181 +brook 0 6 5.164786 0.000000 2152 +suni 0 5 5.347108 0.000000 2452 +ucla 0 5 5.347108 0.000000 2502 +hallithaca 0 4 5.568345 0.000000 2894 +binghamton 0 3 5.857933 0.000000 3544 +twin 0 3 5.857933 0.000000 3657 +hongkong 0 3 5.857933 0.000000 3677 +stamp 0 3 5.857933 0.000000 3678 +ryan 0 3 5.857933 0.000000 3679 +tian 0 3 5.857933 0.000000 3680 +stoni 0 3 5.857933 0.000000 3571 +nankai 0 2 6.263398 0.000000 5147 +tianjin 0 2 6.263398 0.000000 5148 +barri 0 2 6.263398 0.000000 5149 +sciencefound 0 2 6.263398 0.000000 5150 +chinaand 0 2 6.263398 0.000000 5151 +sceneri 0 2 6.263398 0.000000 5152 +sheng 0 2 6.263398 0.000000 5153 +liber 0 2 6.263398 0.000000 5154 +wangphd 0 1 6.957497 0.000000 10677 +jiawang 0 1 6.957497 0.000000 10678 +goldwat 0 1 6.957497 0.000000 10679 +cbnet 0 1 6.957497 0.000000 10680 +chinanet 0 1 6.957497 0.000000 10681 +chinesecalendar 0 1 6.957497 0.000000 10682 +mediainform 0 1 6.957497 0.000000 10683 +hongkonglaserdisccent 0 1 6.957497 0.000000 10684 +internetdistribut 0 1 6.957497 0.000000 10685 +multilingu 0 1 6.957497 0.000000 10686 +smovieplex 0 1 6.957497 0.000000 10687 +diwww 0 1 6.957497 0.000000 10688 +thesenior 0 1 6.957497 0.000000 10689 +worldmap 0 1 6.957497 0.000000 10690 +mandarin 0 1 6.957497 0.000000 10691 +cssa 0 1 6.957497 0.000000 10692 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..dd6bcf79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +process 0 142 1.945910 0.000000 72 +machin 0 129 2.079442 0.000000 95 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +level 0 87 2.484907 0.000000 180 +info 0 85 2.484907 0.000000 176 +master 0 76 2.564949 0.000000 216 +prof 0 64 2.772589 0.000000 273 +colleg 0 61 2.833213 0.000000 300 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +long 0 43 3.178054 0.000000 413 +vision 1 41 3.218876 3.218876 430 +robot 0 36 3.367296 0.000000 497 +primari 0 25 3.737670 0.000000 669 +particularli 0 19 4.007333 0.000000 867 +miller 0 17 4.110874 0.000000 949 +night 0 11 4.553877 0.000000 1319 +justin 0 7 5.010635 0.000000 1789 +uniform 0 7 5.010635 0.000000 1845 +ramin 0 7 5.010635 0.000000 1820 +zabih 0 6 5.164786 0.000000 2138 +csrvl 0 3 5.857933 0.000000 3543 +navi 0 2 6.263398 0.000000 5155 +com 0 2 6.263398 0.000000 5156 +ofengin 0 1 6.957497 0.000000 10693 +assistantwork 0 1 6.957497 0.000000 10694 +ismachin 0 1 6.957497 0.000000 10695 +informationsom 0 1 6.957497 0.000000 10696 +rant 0 1 6.957497 0.000000 10697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..375dc350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +high 0 130 2.079442 0.000000 101 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +school 0 84 2.484907 0.000000 188 +west 0 83 2.484907 0.000000 192 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +free 0 73 2.639057 0.000000 224 +receiv 0 66 2.708050 0.000000 244 +practic 0 70 2.708050 0.000000 246 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +new 0 64 2.772589 0.000000 262 +creat 0 63 2.772589 0.000000 277 +street 0 63 2.772589 0.000000 293 +januari 0 62 2.772589 0.000000 264 +cool 0 49 3.044522 0.000000 374 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +seminar 0 38 3.295837 0.000000 470 +formal 0 37 3.332205 0.000000 478 +game 0 36 3.367296 0.000000 498 +product 0 33 3.433987 0.000000 527 +campu 0 27 3.637586 0.000000 623 +jeff 0 25 3.737670 0.000000 673 +daili 0 24 3.761200 0.000000 706 +divis 0 21 3.912023 0.000000 803 +mpeg 0 20 3.951244 0.000000 831 +anyon 0 17 4.110874 0.000000 916 +intel 0 16 4.174387 0.000000 1000 +practicum 0 16 4.174387 0.000000 960 +jose 0 16 4.174387 0.000000 976 +francisco 0 14 4.317488 0.000000 1095 +went 0 12 4.465908 0.000000 1279 +entertain 0 12 4.465908 0.000000 1286 +newspap 0 12 4.465908 0.000000 1280 +systemsc 0 11 4.553877 0.000000 1293 +mapl 0 11 4.553877 0.000000 1376 +purdu 0 10 4.653960 0.000000 1466 +sundai 0 10 4.653960 0.000000 1387 +leader 0 9 4.753590 0.000000 1576 +sister 0 9 4.753590 0.000000 1524 +portland 0 7 5.010635 0.000000 1878 +chronicl 0 7 5.010635 0.000000 1952 +indiana 0 6 5.164786 0.000000 2057 +oregon 0 5 5.347108 0.000000 2437 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +encod 0 4 5.568345 0.000000 2929 +dalla 0 4 5.568345 0.000000 2930 +classesc 0 3 5.857933 0.000000 3681 +detroit 0 3 5.857933 0.000000 3565 +counti 0 3 5.857933 0.000000 3682 +cornellopoli 0 2 6.263398 0.000000 5157 +techniquec 0 2 6.263398 0.000000 5158 +methodsc 0 2 6.263398 0.000000 5159 +colloquiumc 0 2 6.263398 0.000000 5160 +magazinepc 0 2 6.263398 0.000000 5161 +morn 0 2 6.263398 0.000000 5162 +orang 0 2 6.263398 0.000000 5163 +herald 0 2 6.263398 0.000000 4789 +hillsboro 0 1 6.957497 0.000000 10698 +moorejeff 0 1 6.957497 0.000000 10699 +moorewel 0 1 6.957497 0.000000 10700 +mastersof 0 1 6.957497 0.000000 10701 +lafayett 0 1 6.957497 0.000000 10702 +suburb 0 1 6.957497 0.000000 10703 +employmentmi 0 1 6.957497 0.000000 10704 +classesnba 0 1 6.957497 0.000000 10705 +sectorc 0 1 6.957497 0.000000 10706 +researchfal 0 1 6.957497 0.000000 10707 +paperc 0 1 6.957497 0.000000 10708 +opendoc 0 1 6.957497 0.000000 10709 +mfcoptim 0 1 6.957497 0.000000 10710 +researchsoftwar 0 1 6.957497 0.000000 10711 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 0 1 6.957497 0.000000 10712 +companiesintelsilicon 0 1 6.957497 0.000000 10713 +graphicsibmsunapplemagazinespc 0 1 6.957497 0.000000 10714 +weekpc 0 1 6.957497 0.000000 10715 +computingcomput 0 1 6.957497 0.000000 10716 +shopperwindow 0 1 6.957497 0.000000 10717 +sourcescomput 0 1 6.957497 0.000000 10718 +lifemacusermacweekinteract 0 1 6.957497 0.000000 10719 +weekfamili 0 1 6.957497 0.000000 10720 +pccomput 0 1 6.957497 0.000000 10721 +worldelectron 0 1 6.957497 0.000000 10722 +newspapersusa 0 1 6.957497 0.000000 10723 +todaywal 0 1 6.957497 0.000000 10724 +journalnew 0 1 6.957497 0.000000 10725 +timesphiladelphia 0 1 6.957497 0.000000 10726 +onlineth 0 1 6.957497 0.000000 10727 +worldwideth 0 1 6.957497 0.000000 10728 +opinionsth 0 1 6.957497 0.000000 10729 +gopherth 0 1 6.957497 0.000000 10730 +knoxvil 0 1 6.957497 0.000000 10731 +sentinelth 0 1 6.957497 0.000000 10732 +onlinelat 0 1 6.957497 0.000000 10733 +serviceth 0 1 6.957497 0.000000 10734 +nugget 0 1 6.957497 0.000000 10735 +oregonrworld 0 1 6.957497 0.000000 10736 +registerth 0 1 6.957497 0.000000 10737 +examinersan 0 1 6.957497 0.000000 10738 +mercuryth 0 1 6.957497 0.000000 10739 +timesnando 0 1 6.957497 0.000000 10740 +netusa 0 1 6.957497 0.000000 10741 +todayboston 0 1 6.957497 0.000000 10742 +globeportland 0 1 6.957497 0.000000 10743 +telegramvisitor 0 1 6.957497 0.000000 10744 +fdithaca 0 1 6.957497 0.000000 10745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..bb963644 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +click 0 142 1.945910 0.000000 78 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +code 0 108 2.197225 0.000000 116 +text 0 98 2.302585 0.000000 133 +memori 0 101 2.302585 0.000000 139 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +resum 0 79 2.564949 0.000000 217 +exampl 0 77 2.564949 0.000000 195 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +simul 0 66 2.708050 0.000000 255 +virtual 0 62 2.772589 0.000000 285 +browser 0 56 2.890372 0.000000 313 +direct 0 57 2.890372 0.000000 316 +friend 0 48 3.044522 0.000000 376 +visitor 0 49 3.044522 0.000000 371 +video 0 44 3.135494 0.000000 405 +better 0 45 3.135494 0.000000 401 +movi 0 40 3.258097 0.000000 459 +vita 0 38 3.295837 0.000000 473 +game 0 36 3.367296 0.000000 498 +curriculum 0 33 3.433987 0.000000 535 +anim 0 31 3.496508 0.000000 557 +enabl 0 26 3.688879 0.000000 655 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +jose 0 16 4.174387 0.000000 976 +sign 0 16 4.174387 0.000000 970 +hobbi 0 16 4.174387 0.000000 1009 +transit 0 15 4.248495 0.000000 1046 +avenu 0 12 4.465908 0.000000 1277 +meng 0 12 4.465908 0.000000 1214 +clock 0 11 4.553877 0.000000 1320 +mapl 0 11 4.553877 0.000000 1376 +bill 0 11 4.553877 0.000000 1297 +rivl 0 8 4.875197 0.000000 1632 +autonom 0 8 4.875197 0.000000 1749 +vehicl 0 7 5.010635 0.000000 1928 +courtesi 0 7 5.010635 0.000000 1953 +photographi 0 6 5.164786 0.000000 2146 +recruit 0 6 5.164786 0.000000 2145 +hoca 0 5 5.347108 0.000000 2241 +multitask 0 4 5.568345 0.000000 2803 +crazi 0 4 5.568345 0.000000 2822 +fernandez 0 3 5.857933 0.000000 3591 +lui 0 2 6.263398 0.000000 5164 +joselui 0 2 6.263398 0.000000 4965 +pyramania 0 2 6.263398 0.000000 4957 +actor 0 2 6.263398 0.000000 4240 +pagejos 0 1 6.957497 0.000000 10746 +fernandezjos 0 1 6.957497 0.000000 10747 +fernandezmast 0 1 6.957497 0.000000 10748 +ebithaca 0 1 6.957497 0.000000 10749 +scroll 0 1 6.957497 0.000000 10750 +presentationc 0 1 6.957497 0.000000 10751 +spaceship 0 1 6.957497 0.000000 10752 +battl 0 1 6.957497 0.000000 10753 +picturesmusiccomputerswrit 0 1 6.957497 0.000000 10754 +giel 0 1 6.957497 0.000000 10755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..ee1f15c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +distribut 0 162 1.791759 0.000000 51 +click 0 142 1.945910 0.000000 78 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +simul 0 66 2.708050 0.000000 255 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +administr 0 27 3.637586 0.000000 628 +busi 0 21 3.912023 0.000000 784 +johnson 0 13 4.382027 0.000000 1162 +linda 0 10 4.653960 0.000000 1394 +autonom 0 8 4.875197 0.000000 1749 +vehicl 0 7 5.010635 0.000000 1928 +hurtado 1 1 6.957497 6.957497 10756 +julin 0 1 6.957497 0.000000 10757 +pagejulin 0 1 6.957497 0.000000 10758 +universitymast 0 1 6.957497 0.000000 10759 +managementmast 0 1 6.957497 0.000000 10760 +science 0 1 6.957497 0.000000 10761 +colombia 0 1 6.957497 0.000000 10762 +er 0 1 6.957497 0.000000 10763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..98432308 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +texa 1 160 1.791759 1.791759 64 +welcom 0 122 2.079442 0.000000 99 +send 0 114 2.197225 0.000000 109 +thing 0 84 2.484907 0.000000 189 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +august 0 66 2.708050 0.000000 257 +septemb 0 65 2.772589 0.000000 274 +favorit 0 44 3.135494 0.000000 410 +join 0 39 3.258097 0.000000 457 +ad 0 32 3.465736 0.000000 544 +instrument 0 7 5.010635 0.000000 1954 +edumi 0 6 5.164786 0.000000 2132 +dalla 0 4 5.568345 0.000000 2930 +janeen 0 1 6.957497 0.000000 10764 +homepagejaneen 0 1 6.957497 0.000000 10765 +reich 0 1 6.957497 0.000000 10766 +jreich 0 1 6.957497 0.000000 10767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..ce689bc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +high 0 130 2.079442 0.000000 101 +machin 0 129 2.079442 0.000000 95 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +resum 0 79 2.564949 0.000000 217 +dynam 0 76 2.564949 0.000000 194 +receiv 0 66 2.708050 0.000000 244 +multimedia 0 68 2.708050 0.000000 258 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +septemb 0 65 2.772589 0.000000 274 +januari 0 62 2.772589 0.000000 264 +visit 0 63 2.772589 0.000000 288 +still 0 50 3.044522 0.000000 362 +get 0 46 3.091042 0.000000 380 +cost 0 37 3.332205 0.000000 480 +synchron 0 29 3.583519 0.000000 588 +particip 0 29 3.583519 0.000000 589 +although 0 25 3.737670 0.000000 667 +recognit 0 23 3.806662 0.000000 723 +chip 0 21 3.912023 0.000000 770 +portabl 0 20 3.951244 0.000000 819 +speed 0 18 4.060443 0.000000 911 +stock 0 16 4.174387 0.000000 1007 +massachusett 0 14 4.317488 0.000000 1118 +bodi 0 13 4.382027 0.000000 1178 +meng 0 12 4.465908 0.000000 1214 +speech 0 12 4.465908 0.000000 1222 +grant 0 12 4.465908 0.000000 1216 +systemsc 0 11 4.553877 0.000000 1293 +desktop 0 10 4.653960 0.000000 1445 +capac 0 8 4.875197 0.000000 1740 +filter 0 8 4.875197 0.000000 1641 +mile 0 8 4.875197 0.000000 1743 +ground 0 7 5.010635 0.000000 1955 +facial 0 5 5.347108 0.000000 2438 +amherst 0 5 5.347108 0.000000 2484 +thrive 0 5 5.347108 0.000000 2257 +stage 0 5 5.347108 0.000000 2488 +steer 0 5 5.347108 0.000000 2328 +car 0 4 5.568345 0.000000 2931 +ford 0 4 5.568345 0.000000 2636 +sold 0 4 5.568345 0.000000 2813 +exhaust 0 4 5.568345 0.000000 2825 +gear 0 4 5.568345 0.000000 2891 +visionc 0 3 5.857933 0.000000 3489 +obvious 0 3 5.857933 0.000000 3474 +memberof 0 3 5.857933 0.000000 3169 +bought 0 2 6.263398 0.000000 5165 +accel 0 2 6.263398 0.000000 5166 +plug 0 2 6.263398 0.000000 5167 +camaro 0 1 6.957497 0.000000 10768 +chevi 0 1 6.957497 0.000000 10769 +jodi 0 1 6.957497 0.000000 10770 +shapirojodi 0 1 6.957497 0.000000 10771 +shapiroeduc 0 1 6.957497 0.000000 10772 +engineeringe 0 1 6.957497 0.000000 10773 +telecommunicationc 0 1 6.957497 0.000000 10774 +researchspr 0 1 6.957497 0.000000 10775 +systemse 0 1 6.957497 0.000000 10776 +networksnba 0 1 6.957497 0.000000 10777 +revolutionc 0 1 6.957497 0.000000 10778 +researchma 0 1 6.957497 0.000000 10779 +automot 0 1 6.957497 0.000000 10780 +engineeringinterest 0 1 6.957497 0.000000 10781 +animationlow 0 1 6.957497 0.000000 10782 +videoconferenc 0 1 6.957497 0.000000 10783 +recognitioninterest 0 1 6.957497 0.000000 10784 +firebird 0 1 6.957497 0.000000 10785 +yourselfelectron 0 1 6.957497 0.000000 10786 +fuel 0 1 6.957497 0.000000 10787 +inject 0 1 6.957497 0.000000 10788 +alwayshav 0 1 6.957497 0.000000 10789 +designingan 0 1 6.957497 0.000000 10790 +pageefi 0 1 6.957497 0.000000 10791 +pagethes 0 1 6.957497 0.000000 10792 +gearsmodif 0 1 6.957497 0.000000 10793 +hypertech 0 1 6.957497 0.000000 10794 +flowmast 0 1 6.957497 0.000000 10795 +hurst 0 1 6.957497 0.000000 10796 +shifter 0 1 6.957497 0.000000 10797 +wheel 0 1 6.957497 0.000000 10798 +mustang 0 1 6.957497 0.000000 10799 +speedmodif 0 1 6.957497 0.000000 10800 +motorsport 0 1 6.957497 0.000000 10801 +wiresbest 0 1 6.957497 0.000000 10802 +mphbest 0 1 6.957497 0.000000 10803 +pagenumb 0 1 6.957497 0.000000 10804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..85cab4e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +cornel 1 215 1.386294 1.386294 23 +continu 1 39 3.258097 3.258097 448 +eduto 1 7 5.010635 5.010635 1956 +julia 1 2 6.263398 6.263398 5094 +pagejulia 1 1 6.957497 6.957497 10805 +komissarchik 1 1 6.957497 6.957497 10806 +juliak 1 1 6.957497 6.957497 10807 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..783470a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +avail 0 169 1.791759 0.000000 48 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +high 0 130 2.079442 0.000000 101 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +center 0 88 2.397895 0.000000 158 +pictur 0 89 2.397895 0.000000 160 +search 0 95 2.397895 0.000000 155 +wide 0 84 2.484907 0.000000 185 +master 0 76 2.564949 0.000000 216 +complet 0 77 2.564949 0.000000 208 +server 0 76 2.564949 0.000000 204 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +addit 0 74 2.639057 0.000000 228 +ithaca 0 65 2.772589 0.000000 294 +hardwar 0 51 2.995732 0.000000 350 +pointer 0 48 3.044522 0.000000 368 +video 0 44 3.135494 0.000000 405 +better 0 45 3.135494 0.000000 401 +describ 0 45 3.135494 0.000000 400 +fast 0 42 3.218876 0.000000 429 +http 0 41 3.218876 0.000000 420 +realli 0 40 3.258097 0.000000 444 +workstat 0 37 3.332205 0.000000 479 +global 0 34 3.401197 0.000000 520 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +independ 0 32 3.465736 0.000000 548 +produc 0 30 3.555348 0.000000 572 +platform 0 29 3.583519 0.000000 591 +full 0 28 3.610918 0.000000 615 +becom 0 28 3.610918 0.000000 603 +cluster 0 28 3.610918 0.000000 612 +campu 0 27 3.637586 0.000000 623 +administr 0 27 3.637586 0.000000 628 +compress 0 23 3.806662 0.000000 719 +color 0 22 3.850148 0.000000 762 +toolkit 0 20 3.951244 0.000000 835 +increas 0 20 3.951244 0.000000 829 +commerci 0 16 4.174387 0.000000 1005 +critic 0 16 4.174387 0.000000 982 +topolog 0 14 4.317488 0.000000 1089 +demand 0 14 4.317488 0.000000 1073 +horu 0 14 4.317488 0.000000 1116 +achiev 0 14 4.317488 0.000000 1088 +grow 0 12 4.465908 0.000000 1209 +faster 0 11 4.553877 0.000000 1323 +screen 0 9 4.753590 0.000000 1577 +hallcornel 0 8 4.875197 0.000000 1757 +capit 0 7 5.010635 0.000000 1957 +thegoal 0 6 5.164786 0.000000 2033 +sparcstat 0 5 5.347108 0.000000 2406 +fulfil 0 4 5.568345 0.000000 2932 +innov 0 4 5.568345 0.000000 2933 +emilio 0 3 5.857933 0.000000 3683 +summit 0 3 5.857933 0.000000 3684 +adress 0 2 6.263398 0.000000 5168 +occup 0 2 6.263398 0.000000 5169 +fulltim 0 2 6.263398 0.000000 5170 +ethernet 0 2 6.263398 0.000000 5171 +blast 0 2 6.263398 0.000000 5172 +julian 0 1 6.957497 0.000000 10808 +pelenur 0 1 6.957497 0.000000 10809 +centerithaca 0 1 6.957497 0.000000 10810 +wfinger 0 1 6.957497 0.000000 10811 +cyberserv 0 1 6.957497 0.000000 10812 +httpserver 0 1 6.957497 0.000000 10813 +prvf 0 1 6.957497 0.000000 10814 +poss 0 1 6.957497 0.000000 10815 +screenmot 0 1 6.957497 0.000000 10816 +showthat 0 1 6.957497 0.000000 10817 +snarf 0 1 6.957497 0.000000 10818 +transferwith 0 1 6.957497 0.000000 10819 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..95c6cfaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +relat 0 139 1.945910 0.000000 68 +perform 0 143 1.945910 0.000000 74 +tool 0 117 2.079442 0.000000 93 +schedul 0 119 2.079442 0.000000 85 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +center 0 88 2.397895 0.000000 158 +resourc 0 81 2.484907 0.000000 172 +info 0 85 2.484907 0.000000 176 +want 0 79 2.564949 0.000000 199 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +practic 0 70 2.708050 0.000000 246 +abstract 0 62 2.772589 0.000000 276 +type 0 61 2.833213 0.000000 296 +back 0 60 2.833213 0.000000 297 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +talk 0 53 2.944439 0.000000 336 +done 0 47 3.091042 0.000000 381 +slide 0 38 3.295837 0.000000 467 +seminar 0 38 3.295837 0.000000 470 +especi 0 36 3.367296 0.000000 496 +bibliographi 0 34 3.401197 0.000000 518 +art 0 29 3.583519 0.000000 593 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +higher 0 24 3.761200 0.000000 690 +sequenc 0 23 3.806662 0.000000 734 +fine 0 20 3.951244 0.000000 822 +supervis 0 20 3.951244 0.000000 840 +verif 0 20 3.951244 0.000000 826 +statu 0 18 4.060443 0.000000 885 +pretti 0 13 4.382027 0.000000 1191 +forth 0 13 4.382027 0.000000 1186 +nuprl 0 10 4.653960 0.000000 1402 +mainli 0 10 4.653960 0.000000 1432 +equip 0 10 4.653960 0.000000 1459 +hockei 0 8 4.875197 0.000000 1760 +forum 0 6 5.164786 0.000000 2027 +czar 0 5 5.347108 0.000000 2503 +hickei 0 4 5.568345 0.000000 2845 +identif 0 4 5.568345 0.000000 2773 +jason 0 3 5.857933 0.000000 3389 +orth 0 3 5.857933 0.000000 3685 +backcountri 0 3 5.857933 0.000000 3686 +publicli 0 3 5.857933 0.000000 3687 +theatr 0 2 6.263398 0.000000 5173 +universitydepart 0 2 6.263398 0.000000 4871 +bellcor 0 2 6.263398 0.000000 5174 +robertconst 0 1 6.957497 0.000000 10820 +thefox 0 1 6.957497 0.000000 10821 +markleon 0 1 6.957497 0.000000 10822 +formalsystem 0 1 6.957497 0.000000 10823 +levelmodul 0 1 6.957497 0.000000 10824 +cornella 0 1 6.957497 0.000000 10825 +publishedat 0 1 6.957497 0.000000 10826 +havegiven 0 1 6.957497 0.000000 10827 +theygiv 0 1 6.957497 0.000000 10828 +galleryof 0 1 6.957497 0.000000 10829 +tryth 0 1 6.957497 0.000000 10830 +fineart 0 1 6.957497 0.000000 10831 +cucshockei 0 1 6.957497 0.000000 10832 +thebackcountri 0 1 6.957497 0.000000 10833 +maintainedsoftwar 0 1 6.957497 0.000000 10834 +hockeyfor 0 1 6.957497 0.000000 10835 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..617a5ccf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +thing 0 84 2.484907 0.000000 189 +ithaca 0 65 2.772589 0.000000 294 +juli 0 60 2.833213 0.000000 305 +back 0 60 2.833213 0.000000 297 +go 0 33 3.433987 0.000000 529 +reach 0 24 3.761200 0.000000 688 +accept 0 18 4.060443 0.000000 879 +english 0 15 4.248495 0.000000 1033 +japan 0 8 4.875197 0.000000 1762 +japanes 0 4 5.568345 0.000000 2934 +sell 0 4 5.568345 0.000000 2935 +sold 0 4 5.568345 0.000000 2813 +sale 0 3 5.857933 0.000000 3688 +kamijo 1 1 6.957497 6.957497 10836 +koichi 0 1 6.957497 0.000000 10837 +kamijokoichi 0 1 6.957497 0.000000 10838 +papershometownseduc 0 1 6.957497 0.000000 10839 +experienceskoichi 0 1 6.957497 0.000000 10840 +muriel 0 1 6.957497 0.000000 10841 +kkamijoh 0 1 6.957497 0.000000 10842 +vnet 0 1 6.957497 0.000000 10843 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..33c2d8ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +document 0 121 2.079442 0.000000 89 +confer 0 126 2.079442 0.000000 100 +structur 0 106 2.197225 0.000000 105 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +upson 0 71 2.639057 0.000000 218 +laboratori 0 63 2.772589 0.000000 292 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +previou 0 62 2.772589 0.000000 290 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +investig 0 51 2.995732 0.000000 353 +advisor 0 51 2.995732 0.000000 355 +better 0 45 3.135494 0.000000 401 +video 0 44 3.135494 0.000000 405 +autom 0 41 3.218876 0.000000 434 +vision 0 41 3.218876 0.000000 430 +york 0 41 3.218876 0.000000 435 +announc 0 40 3.258097 0.000000 441 +close 0 38 3.295837 0.000000 465 +field 0 37 3.332205 0.000000 482 +robot 0 36 3.367296 0.000000 497 +next 0 34 3.401197 0.000000 517 +articl 0 33 3.433987 0.000000 530 +collabor 0 32 3.465736 0.000000 543 +anim 0 31 3.496508 0.000000 557 +graph 0 30 3.555348 0.000000 576 +manipul 0 27 3.637586 0.000000 624 +arrai 0 27 3.637586 0.000000 627 +strategi 0 25 3.737670 0.000000 682 +handl 0 24 3.761200 0.000000 685 +magazin 0 24 3.761200 0.000000 704 +higher 0 24 3.761200 0.000000 690 +director 0 22 3.850148 0.000000 767 +navig 0 21 3.912023 0.000000 796 +facil 0 20 3.951244 0.000000 814 +stanford 0 17 4.110874 0.000000 955 +germani 0 17 4.110874 0.000000 946 +devic 0 16 4.174387 0.000000 1002 +vector 0 16 4.174387 0.000000 961 +micro 0 15 4.248495 0.000000 1031 +club 0 15 4.248495 0.000000 1058 +earlier 0 13 4.382027 0.000000 1140 +forc 0 10 4.653960 0.000000 1384 +donald 0 9 4.753590 0.000000 1510 +frank 0 9 4.753590 0.000000 1568 +wall 0 9 4.753590 0.000000 1553 +wire 0 8 4.875197 0.000000 1747 +gate 0 6 5.164786 0.000000 2182 +layout 0 6 5.164786 0.000000 2183 +lloyd 0 6 5.164786 0.000000 2103 +educurr 0 5 5.347108 0.000000 2504 +actuat 0 5 5.347108 0.000000 2442 +climb 0 4 5.568345 0.000000 2936 +karl 0 3 5.857933 0.000000 3623 +bhringer 0 3 5.857933 0.000000 3606 +karlsruh 0 3 5.857933 0.000000 3689 +microfabr 0 3 5.857933 0.000000 3610 +noel 0 3 5.857933 0.000000 3376 +kwon 0 3 5.857933 0.000000 3690 +deeper 0 3 5.857933 0.000000 3146 +friedrich 0 2 6.263398 0.000000 5175 +nanofabr 0 2 6.263398 0.000000 5010 +innew 0 2 6.263398 0.000000 4512 +sculptur 0 2 6.263398 0.000000 5176 +wright 0 2 6.263398 0.000000 5177 +nano 0 2 6.263398 0.000000 4961 +pagekarl 0 2 6.263398 0.000000 5043 +andassembl 0 1 6.957497 0.000000 10844 +dipl 0 1 6.957497 0.000000 10845 +implementmicro 0 1 6.957497 0.000000 10846 +withprogramm 0 1 6.957497 0.000000 10847 +professorbruc 0 1 6.957497 0.000000 10848 +founder 0 1 6.957497 0.000000 10849 +macdonaldand 0 1 6.957497 0.000000 10850 +hisresearch 0 1 6.957497 0.000000 10851 +invis 0 1 6.957497 0.000000 10852 +cantilev 0 1 6.957497 0.000000 10853 +fallingwat 0 1 6.957497 0.000000 10854 +outin 0 1 6.957497 0.000000 10855 +lindseth 0 1 6.957497 0.000000 10856 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..33fadf5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +mathemat 0 108 2.197225 0.000000 123 +site 0 106 2.197225 0.000000 119 +look 0 107 2.197225 0.000000 115 +make 0 111 2.197225 0.000000 120 +world 0 115 2.197225 0.000000 126 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +wide 0 84 2.484907 0.000000 185 +help 0 83 2.484907 0.000000 175 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +activ 0 84 2.484907 0.000000 182 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +messag 0 76 2.564949 0.000000 212 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +exampl 0 77 2.564949 0.000000 195 +june 0 79 2.564949 0.000000 214 +david 0 71 2.639057 0.000000 232 +html 0 75 2.639057 0.000000 235 +upson 0 71 2.639057 0.000000 218 +logic 0 71 2.639057 0.000000 230 +write 0 72 2.639057 0.000000 222 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +ithaca 0 65 2.772589 0.000000 294 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +browser 0 56 2.890372 0.000000 313 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +protocol 0 45 3.135494 0.000000 407 +even 0 45 3.135494 0.000000 393 +execut 0 45 3.135494 0.000000 404 +offer 0 43 3.178054 0.000000 414 +might 0 41 3.218876 0.000000 426 +programm 0 39 3.258097 0.000000 445 +multipl 0 39 3.258097 0.000000 453 +correct 0 38 3.295837 0.000000 462 +field 0 37 3.332205 0.000000 482 +cost 0 37 3.332205 0.000000 480 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +download 0 36 3.367296 0.000000 489 +statist 0 35 3.401197 0.000000 521 +concurr 0 34 3.401197 0.000000 501 +random 0 34 3.401197 0.000000 511 +dissert 0 32 3.465736 0.000000 549 +kind 0 32 3.465736 0.000000 541 +specifi 0 30 3.555348 0.000000 568 +abl 0 30 3.555348 0.000000 566 +depend 0 29 3.583519 0.000000 583 +intend 0 28 3.610918 0.000000 599 +propos 0 28 3.610918 0.000000 602 +becom 0 28 3.610918 0.000000 603 +progress 0 28 3.610918 0.000000 598 +measur 0 28 3.610918 0.000000 609 +load 0 28 3.610918 0.000000 601 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +consist 0 26 3.688879 0.000000 651 +revis 0 26 3.688879 0.000000 640 +reliabl 0 25 3.737670 0.000000 674 +fundament 0 25 3.737670 0.000000 661 +wai 0 25 3.737670 0.000000 662 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +properti 0 22 3.850148 0.000000 749 +sciencecornel 0 22 3.850148 0.000000 768 +avoid 0 21 3.912023 0.000000 799 +verif 0 20 3.951244 0.000000 826 +applet 0 20 3.951244 0.000000 827 +basi 0 20 3.951244 0.000000 828 +assum 0 19 4.007333 0.000000 845 +hypertext 0 19 4.007333 0.000000 865 +concentr 0 18 4.060443 0.000000 906 +failur 0 18 4.060443 0.000000 898 +behavior 0 18 4.060443 0.000000 881 +encourag 0 18 4.060443 0.000000 880 +layer 1 17 4.110874 4.110874 926 +ultim 0 17 4.110874 0.000000 943 +condit 0 16 4.174387 0.000000 975 +portion 0 16 4.174387 0.000000 971 +action 0 15 4.248495 0.000000 1038 +horu 0 14 4.317488 0.000000 1116 +weak 0 13 4.382027 0.000000 1159 +whose 0 13 4.382027 0.000000 1166 +edui 0 13 4.382027 0.000000 1193 +suit 0 13 4.382027 0.000000 1129 +verifi 0 12 4.465908 0.000000 1261 +kenneth 0 12 4.465908 0.000000 1265 +minor 0 12 4.465908 0.000000 1237 +emploi 0 12 4.465908 0.000000 1284 +calcul 0 12 4.465908 0.000000 1268 +scienceat 0 11 4.553877 0.000000 1375 +reness 0 11 4.553877 0.000000 1333 +host 0 11 4.553877 0.000000 1306 +stack 0 10 4.653960 0.000000 1389 +guarante 0 10 4.653960 0.000000 1391 +certain 0 10 4.653960 0.000000 1393 +equal 0 10 4.653960 0.000000 1424 +desir 0 9 4.753590 0.000000 1542 +robbert 0 9 4.753590 0.000000 1529 +tempor 0 9 4.753590 0.000000 1584 +crash 0 8 4.875197 0.000000 1616 +notion 0 7 5.010635 0.000000 1947 +appar 0 7 5.010635 0.000000 1958 +dedic 0 7 5.010635 0.000000 1843 +hack 0 7 5.010635 0.000000 1950 +furthermor 0 6 5.164786 0.000000 2141 +rough 0 6 5.164786 0.000000 2107 +studentdepart 0 5 5.347108 0.000000 2505 +unnecessari 0 5 5.347108 0.000000 2506 +lost 0 5 5.347108 0.000000 2358 +ofdistribut 0 5 5.347108 0.000000 2316 +notabl 0 5 5.347108 0.000000 2276 +puzzl 0 5 5.347108 0.000000 2507 +disconnect 0 4 5.568345 0.000000 2664 +clearli 0 4 5.568345 0.000000 2590 +formula 0 3 5.857933 0.000000 3405 +omit 0 3 5.857933 0.000000 3466 +confid 0 3 5.857933 0.000000 3691 +temporarili 0 3 5.857933 0.000000 3692 +parallelmachin 0 3 5.857933 0.000000 3693 +lego 0 3 5.857933 0.000000 3188 +prone 0 2 6.263398 0.000000 5178 +thehoru 0 2 6.263398 0.000000 5179 +withprofessor 0 2 6.263398 0.000000 5180 +ofhoru 0 2 6.263398 0.000000 5181 +straightforward 0 2 6.263398 0.000000 4272 +thetop 0 2 6.263398 0.000000 4327 +atyp 0 2 6.263398 0.000000 5042 +sufficientto 0 2 6.263398 0.000000 4261 +haswork 0 2 6.263398 0.000000 5182 +distributedenviron 0 2 6.263398 0.000000 5183 +toi 0 2 6.263398 0.000000 5184 +linksfor 0 2 6.263398 0.000000 5185 +karr 0 1 6.957497 0.000000 10857 +karrdavid 0 1 6.957497 0.000000 10858 +karrphd 0 1 6.957497 0.000000 10859 +birmananddr 0 1 6.957497 0.000000 10860 +protocolsmi 0 1 6.957497 0.000000 10861 +formalspecif 0 1 6.957497 0.000000 10862 +variousinterest 0 1 6.957497 0.000000 10863 +usedin 0 1 6.957497 0.000000 10864 +stylefor 0 1 6.957497 0.000000 10865 +itsinterfac 0 1 6.957497 0.000000 10866 +andbelow 0 1 6.957497 0.000000 10867 +agiven 0 1 6.957497 0.000000 10868 +unusualcombin 0 1 6.957497 0.000000 10869 +systemsshould 0 1 6.957497 0.000000 10870 +constructcustom 0 1 6.957497 0.000000 10871 +theirassoci 0 1 6.957497 0.000000 10872 +thesecur 0 1 6.957497 0.000000 10873 +harden 0 1 6.957497 0.000000 10874 +ofverifi 0 1 6.957497 0.000000 10875 +stem 0 1 6.957497 0.000000 10876 +thepromis 0 1 6.957497 0.000000 10877 +variousguarante 0 1 6.957497 0.000000 10878 +passingenviron 0 1 6.957497 0.000000 10879 +delayedor 0 1 6.957497 0.000000 10880 +componentswer 0 1 6.957497 0.000000 10881 +considerablepromis 0 1 6.957497 0.000000 10882 +consistencywhil 0 1 6.957497 0.000000 10883 +filesin 0 1 6.957497 0.000000 10884 +partitionedinto 0 1 6.957497 0.000000 10885 +wouldallow 0 1 6.957497 0.000000 10886 +performancemi 0 1 6.957497 0.000000 10887 +andeffici 0 1 6.957497 0.000000 10888 +ofsystem 0 1 6.957497 0.000000 10889 +suscept 0 1 6.957497 0.000000 10890 +javath 0 1 6.957497 0.000000 10891 +applicationwith 0 1 6.957497 0.000000 10892 +tonavig 0 1 6.957497 0.000000 10893 +myweb 0 1 6.957497 0.000000 10894 +anetscap 0 1 6.957497 0.000000 10895 +abirthdai 0 1 6.957497 0.000000 10896 +forverifi 0 1 6.957497 0.000000 10897 +affiliationsi 0 1 6.957497 0.000000 10898 +andmaa 0 1 6.957497 0.000000 10899 +informationseemi 0 1 6.957497 0.000000 10900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..75ec5f92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +postscript 0 131 2.079442 0.000000 90 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +help 0 83 2.484907 0.000000 175 +interfac 0 79 2.564949 0.000000 209 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +multimedia 0 68 2.708050 0.000000 258 +view 0 70 2.708050 0.000000 254 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +simul 0 66 2.708050 0.000000 255 +virtual 0 62 2.772589 0.000000 285 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +scientif 0 53 2.944439 0.000000 341 +cool 0 49 3.044522 0.000000 374 +visual 0 48 3.044522 0.000000 372 +effect 0 46 3.091042 0.000000 385 +california 0 46 3.091042 0.000000 388 +featur 0 46 3.091042 0.000000 386 +favorit 0 44 3.135494 0.000000 410 +video 0 44 3.135494 0.000000 405 +combin 0 42 3.218876 0.000000 421 +music 0 42 3.218876 0.000000 436 +brian 0 38 3.295837 0.000000 466 +open 0 38 3.295837 0.000000 469 +game 0 36 3.367296 0.000000 498 +singl 0 34 3.401197 0.000000 510 +independ 0 32 3.465736 0.000000 548 +platform 0 29 3.583519 0.000000 591 +full 0 28 3.610918 0.000000 615 +enhanc 0 26 3.688879 0.000000 644 +aspect 0 25 3.737670 0.000000 663 +universityithaca 0 24 3.761200 0.000000 710 +motion 0 24 3.761200 0.000000 699 +sciencecornel 0 22 3.850148 0.000000 768 +chip 0 21 3.912023 0.000000 770 +break 0 20 3.951244 0.000000 812 +smith 0 20 3.951244 0.000000 820 +facil 0 20 3.951244 0.000000 814 +mpeg 0 20 3.951244 0.000000 831 +excel 0 19 4.007333 0.000000 868 +lot 0 18 4.060443 0.000000 889 +stand 0 18 4.060443 0.000000 891 +took 0 16 4.174387 0.000000 1010 +diego 0 16 4.174387 0.000000 992 +transit 0 15 4.248495 0.000000 1046 +drive 0 15 4.248495 0.000000 1052 +track 0 15 4.248495 0.000000 1029 +scene 0 14 4.317488 0.000000 1114 +resolut 0 13 4.382027 0.000000 1172 +jonathan 0 13 4.382027 0.000000 1174 +incorpor 0 13 4.382027 0.000000 1163 +entertain 0 12 4.465908 0.000000 1286 +captur 0 12 4.465908 0.000000 1232 +realiti 0 12 4.465908 0.000000 1272 +player 0 11 4.553877 0.000000 1371 +primit 0 11 4.553877 0.000000 1317 +screen 0 9 4.753590 0.000000 1577 +rivl 0 8 4.875197 0.000000 1632 +capac 0 8 4.875197 0.000000 1740 +star 0 8 4.875197 0.000000 1717 +pronounc 0 7 5.010635 0.000000 1918 +vehicl 0 7 5.010635 0.000000 1928 +clip 0 7 5.010635 0.000000 1868 +quick 0 6 5.164786 0.000000 2184 +railroad 0 6 5.164786 0.000000 2161 +hypothet 0 5 5.347108 0.000000 2474 +opengl 0 5 5.347108 0.000000 2299 +screenshot 0 4 5.568345 0.000000 2743 +multitask 0 4 5.568345 0.000000 2803 +enjoy 0 4 5.568345 0.000000 2937 +swartz 0 4 5.568345 0.000000 2878 +engineeringclass 0 3 5.857933 0.000000 3667 +lai 0 3 5.857933 0.000000 3694 +inventor 0 3 5.857933 0.000000 3695 +hodja 0 2 6.263398 0.000000 4972 +fledg 0 2 6.263398 0.000000 4973 +resumesom 0 2 6.263398 0.000000 5186 +hogman 0 1 6.957497 0.000000 10901 +qualcomm 0 1 6.957497 0.000000 10902 +pagekartik 0 1 6.957497 0.000000 10903 +kapadiamast 0 1 6.957497 0.000000 10904 +dabnei 0 1 6.957497 0.000000 10905 +kkapadia 0 1 6.957497 0.000000 10906 +comcurr 0 1 6.957497 0.000000 10907 +incorporatedmi 0 1 6.957497 0.000000 10908 +projectshoca 0 1 6.957497 0.000000 10909 +chiphoca 0 1 6.957497 0.000000 10910 +gameboard 0 1 6.957497 0.000000 10911 +rivlrivl 0 1 6.957497 0.000000 10912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..90eafe75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +high 0 130 2.079442 0.000000 101 +sinc 0 90 2.397895 0.000000 159 +center 0 88 2.397895 0.000000 158 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +wide 0 84 2.484907 0.000000 185 +write 0 72 2.639057 0.000000 222 +onlin 0 75 2.639057 0.000000 223 +written 0 63 2.772589 0.000000 278 +back 0 60 2.833213 0.000000 297 +publish 1 57 2.890372 2.890372 326 +think 0 57 2.890372 0.000000 314 +much 0 52 2.995732 0.000000 349 +keep 0 44 3.135494 0.000000 409 +press 0 42 3.218876 0.000000 419 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +go 0 33 3.433987 0.000000 529 +mine 0 26 3.688879 0.000000 654 +rather 0 26 3.688879 0.000000 642 +other 0 24 3.761200 0.000000 697 +wrote 0 20 3.951244 0.000000 830 +wonder 0 20 3.951244 0.000000 815 +letter 1 16 4.174387 4.174387 981 +went 0 12 4.465908 0.000000 1279 +newspap 0 12 4.465908 0.000000 1280 +true 0 10 4.653960 0.000000 1422 +rapid 0 10 4.653960 0.000000 1453 +poetri 0 9 4.753590 0.000000 1596 +mile 0 8 4.875197 0.000000 1743 +conflict 0 6 5.164786 0.000000 2041 +famou 0 6 5.164786 0.000000 2185 +grand 0 5 5.347108 0.000000 2425 +mess 0 4 5.568345 0.000000 2886 +flame 0 3 5.857933 0.000000 3696 +arm 0 3 5.857933 0.000000 3697 +argu 0 3 5.857933 0.000000 3698 +dread 0 3 5.857933 0.000000 3630 +suspect 0 2 6.263398 0.000000 5187 +anyhow 0 2 6.263398 0.000000 5188 +mathematician 0 2 6.263398 0.000000 5189 +terrorist 0 2 6.263398 0.000000 5190 +writingsi 0 1 6.957497 0.000000 10913 +proudof 0 1 6.957497 0.000000 10914 +morethought 0 1 6.957497 0.000000 10915 +conscienti 0 1 6.957497 0.000000 10916 +objector 0 1 6.957497 0.000000 10917 +myfirst 0 1 6.957497 0.000000 10918 +fewyear 0 1 6.957497 0.000000 10919 +gulf 0 1 6.957497 0.000000 10920 +vestart 0 1 6.957497 0.000000 10921 +lest 0 1 6.957497 0.000000 10922 +dprobabl 0 1 6.957497 0.000000 10923 +essayist 0 1 6.957497 0.000000 10924 +byron 0 1 6.957497 0.000000 10925 +asuburb 0 1 6.957497 0.000000 10926 +unabomb 0 1 6.957497 0.000000 10927 +mathematiciansar 0 1 6.957497 0.000000 10928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..73b0ae17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,269 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +austin 0 168 1.791759 0.000000 63 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +hall 0 146 1.945910 0.000000 65 +relat 0 139 1.945910 0.000000 68 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +report 0 131 2.079442 0.000000 92 +databas 0 122 2.079442 0.000000 86 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +confer 0 126 2.079442 0.000000 100 +place 0 106 2.197225 0.000000 124 +structur 0 106 2.197225 0.000000 105 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +info 0 85 2.484907 0.000000 176 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +appear 0 78 2.564949 0.000000 210 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +refer 0 78 2.564949 0.000000 203 +html 0 75 2.639057 0.000000 235 +upson 0 71 2.639057 0.000000 218 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +david 0 71 2.639057 0.000000 232 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +ithaca 0 65 2.772589 0.000000 294 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +dept 0 64 2.772589 0.000000 291 +plan 0 65 2.772589 0.000000 272 +new 0 64 2.772589 0.000000 262 +street 0 63 2.772589 0.000000 293 +march 0 61 2.833213 0.000000 295 +simpl 0 60 2.833213 0.000000 298 +content 0 59 2.833213 0.000000 302 +direct 0 57 2.890372 0.000000 316 +explor 0 58 2.890372 0.000000 324 +overview 0 56 2.890372 0.000000 323 +index 0 56 2.890372 0.000000 309 +tabl 0 51 2.995732 0.000000 346 +cool 0 49 3.044522 0.000000 374 +pointer 0 48 3.044522 0.000000 368 +life 0 50 3.044522 0.000000 375 +electron 0 47 3.091042 0.000000 379 +mark 0 44 3.135494 0.000000 403 +netscap 0 44 3.135494 0.000000 395 +music 0 42 3.218876 0.000000 436 +map 0 39 3.258097 0.000000 452 +industri 0 38 3.295837 0.000000 464 +open 0 38 3.295837 0.000000 469 +china 0 37 3.332205 0.000000 487 +global 0 34 3.401197 0.000000 520 +common 0 30 3.555348 0.000000 574 +synchron 0 29 3.583519 0.000000 588 +weather 0 28 3.610918 0.000000 618 +bound 0 26 3.688879 0.000000 659 +reliabl 0 25 3.737670 0.000000 674 +sport 0 25 3.737670 0.000000 683 +other 0 24 3.761200 0.000000 697 +scalabl 0 24 3.761200 0.000000 705 +yahoo 0 24 3.761200 0.000000 707 +flexibl 0 21 3.912023 0.000000 792 +navig 0 21 3.912023 0.000000 796 +kernel 0 20 3.951244 0.000000 825 +binari 0 20 3.951244 0.000000 823 +lyco 0 19 4.007333 0.000000 871 +women 0 16 4.174387 0.000000 1004 +commerci 0 16 4.174387 0.000000 1005 +brief 0 16 4.174387 0.000000 1001 +stock 0 16 4.174387 0.000000 1007 +horu 0 14 4.317488 0.000000 1116 +canada 0 13 4.382027 0.000000 1158 +alan 0 13 4.382027 0.000000 1146 +brad 0 12 4.465908 0.000000 1264 +weight 0 12 4.465908 0.000000 1204 +kenneth 0 12 4.465908 0.000000 1265 +reness 0 11 4.553877 0.000000 1333 +transpar 0 11 4.553877 0.000000 1325 +lake 0 11 4.553877 0.000000 1373 +itali 0 11 4.553877 0.000000 1378 +werner 0 10 4.653960 0.000000 1385 +cook 0 10 4.653960 0.000000 1464 +consortium 0 10 4.653960 0.000000 1467 +mosaic 0 10 4.653960 0.000000 1426 +sosp 0 10 4.653960 0.000000 1416 +mountain 0 10 4.653960 0.000000 1456 +jean 0 10 4.653960 0.000000 1440 +cheng 0 10 4.653960 0.000000 1381 +robbert 0 9 4.753590 0.000000 1529 +birman 0 9 4.753590 0.000000 1531 +wall 0 9 4.753590 0.000000 1553 +light 0 9 4.753590 0.000000 1533 +motorola 0 9 4.753590 0.000000 1546 +vogel 0 8 4.875197 0.000000 1622 +sigop 0 8 4.875197 0.000000 1727 +european 0 8 4.875197 0.000000 1763 +heart 0 8 4.875197 0.000000 1729 +grove 0 8 4.875197 0.000000 1675 +wire 0 8 4.875197 0.000000 1747 +edg 0 8 4.875197 0.000000 1647 +synchroni 0 7 5.010635 0.000000 1923 +gatewai 0 7 5.010635 0.000000 1942 +discoveri 0 7 5.010635 0.000000 1915 +hunt 0 7 5.010635 0.000000 1798 +scout 0 7 5.010635 0.000000 1903 +antonio 0 6 5.164786 0.000000 2186 +symposiumon 0 6 5.164786 0.000000 2054 +alex 0 6 5.164786 0.000000 2130 +theproject 0 6 5.164786 0.000000 1981 +postcard 0 6 5.164786 0.000000 2181 +atlant 0 5 5.347108 0.000000 2508 +advic 0 5 5.347108 0.000000 2509 +colorado 0 4 5.568345 0.000000 2938 +ncsa 0 4 5.568345 0.000000 2767 +ireland 0 4 5.568345 0.000000 2853 +hayden 0 4 5.568345 0.000000 2844 +hickei 0 4 5.568345 0.000000 2845 +vaysburd 0 4 5.568345 0.000000 2846 +insur 0 4 5.568345 0.000000 2939 +glade 0 3 5.857933 0.000000 3537 +fashion 0 3 5.857933 0.000000 3699 +educornel 0 3 5.857933 0.000000 3601 +universitydept 0 3 5.857933 0.000000 3602 +takako 0 3 5.857933 0.000000 3538 +systemscomput 0 3 5.857933 0.000000 3148 +arizona 0 3 5.857933 0.000000 3700 +beginn 0 3 5.857933 0.000000 3330 +hotjava 0 3 5.857933 0.000000 3220 +ftc 0 3 5.857933 0.000000 3275 +elsevi 0 3 5.857933 0.000000 3671 +copper 0 3 5.857933 0.000000 3536 +summit 0 3 5.857933 0.000000 3684 +counti 0 3 5.857933 0.000000 3682 +crew 0 3 5.857933 0.000000 3347 +underground 0 3 5.857933 0.000000 3604 +spider 0 3 5.857933 0.000000 3605 +katherin 0 2 6.263398 0.000000 4851 +ofvirtu 0 2 6.263398 0.000000 5061 +lui 0 2 6.263398 0.000000 5164 +dalia 0 2 6.263398 0.000000 4852 +malki 0 2 6.263398 0.000000 4853 +uiuc 0 2 6.263398 0.000000 4509 +cern 0 2 6.263398 0.000000 5079 +icdc 0 2 6.263398 0.000000 5191 +ucsd 0 2 6.263398 0.000000 5192 +amazon 0 2 6.263398 0.000000 5193 +dessert 0 2 6.263398 0.000000 5194 +intertext 0 2 6.263398 0.000000 5002 +infoth 0 2 6.263398 0.000000 5195 +infodistribut 0 1 6.957497 0.000000 10929 +infocompani 0 1 6.957497 0.000000 10930 +lisboa 0 1 6.957497 0.000000 10931 +httpd 0 1 6.957497 0.000000 10932 +xmosaic 0 1 6.957497 0.000000 10933 +guokguo 0 1 6.957497 0.000000 10934 +multicastprotocol 0 1 6.957497 0.000000 10935 +publicationskatherin 0 1 6.957497 0.000000 10936 +connemara 0 1 6.957497 0.000000 10937 +rodrigu 0 1 6.957497 0.000000 10938 +sargento 0 1 6.957497 0.000000 10939 +paulo 0 1 6.957497 0.000000 10940 +verisimo 0 1 6.957497 0.000000 10941 +niagara 0 1 6.957497 0.000000 10942 +networkscool 0 1 6.957497 0.000000 10943 +toolsbibliographyconferencesjournalsacademia 0 1 6.957497 0.000000 10944 +infoschool 0 1 6.957497 0.000000 10945 +infojob 0 1 6.957497 0.000000 10946 +searchinterest 0 1 6.957497 0.000000 10947 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 0 1 6.957497 0.000000 10948 +inforesearch 0 1 6.957497 0.000000 10949 +systempointershoru 0 1 6.957497 0.000000 10950 +productspringtotemtransisx 0 1 6.957497 0.000000 10951 +microsystemslab 0 1 6.957497 0.000000 10952 +networksmulticast 0 1 6.957497 0.000000 10953 +protocolsn 0 1 6.957497 0.000000 10954 +fromlblgun 0 1 6.957497 0.000000 10955 +sguid 0 1 6.957497 0.000000 10956 +quickrefer 0 1 6.957497 0.000000 10957 +htmldocument 0 1 6.957497 0.000000 10958 +finder 0 1 6.957497 0.000000 10959 +bibliographybibliographi 0 1 6.957497 0.000000 10960 +oldindex 0 1 6.957497 0.000000 10961 +hpdc 0 1 6.957497 0.000000 10962 +srd 0 1 6.957497 0.000000 10963 +jsac 0 1 6.957497 0.000000 10964 +scienceacademia 0 1 6.957497 0.000000 10965 +openingsibmdelltandemtiapplebel 0 1 6.957497 0.000000 10966 +gradschool 0 1 6.957497 0.000000 10967 +gradjob 0 1 6.957497 0.000000 10968 +ukinterest 0 1 6.957497 0.000000 10969 +moviesbailei 0 1 6.957497 0.000000 10970 +concertslibrari 0 1 6.957497 0.000000 10971 +hightechin 0 1 6.957497 0.000000 10972 +inesc 0 1 6.957497 0.000000 10973 +resort 0 1 6.957497 0.000000 10974 +coloradooth 0 1 6.957497 0.000000 10975 +infoart 0 1 6.957497 0.000000 10976 +weblouvreth 0 1 6.957497 0.000000 10977 +linebook 0 1 6.957497 0.000000 10978 +calvinhobb 0 1 6.957497 0.000000 10979 +archivecardsmagicchinaart 0 1 6.957497 0.000000 10980 +gourmetl 0 1 6.957497 0.000000 10981 +cordonbleu 0 1 6.957497 0.000000 10982 +fashional 0 1 6.957497 0.000000 10983 +linksa 0 1 6.957497 0.000000 10984 +cjlutz 0 1 6.957497 0.000000 10985 +wwweb 0 1 6.957497 0.000000 10986 +pagewith 0 1 6.957497 0.000000 10987 +tmexpressfirst 0 1 6.957497 0.000000 10988 +wireirc 0 1 6.957497 0.000000 10989 +faqfashion 0 1 6.957497 0.000000 10990 +nethair 0 1 6.957497 0.000000 10991 +diesel 0 1 6.957497 0.000000 10992 +guessfriend 0 1 6.957497 0.000000 10993 +deng 0 1 6.957497 0.000000 10994 +shiji 0 1 6.957497 0.000000 10995 +edulibrari 0 1 6.957497 0.000000 10996 +congressmagazin 0 1 6.957497 0.000000 10997 +timegeorg 0 1 6.957497 0.000000 10998 +gilder 0 1 6.957497 0.000000 10999 +archivesinanet 0 1 6.957497 0.000000 11000 +newsworld 0 1 6.957497 0.000000 11001 +olymp 0 1 6.957497 0.000000 11002 +streetheadlin 0 1 6.957497 0.000000 11003 +weatherhunt 0 1 6.957497 0.000000 11004 +informationglob 0 1 6.957497 0.000000 11005 +navigatorhom 0 1 6.957497 0.000000 11006 +wanderersand 0 1 6.957497 0.000000 11007 +kguo 0 1 6.957497 0.000000 11008 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..508fcdbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +object 0 138 1.945910 0.000000 79 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +learn 0 86 2.484907 0.000000 170 +environ 0 84 2.484907 0.000000 177 +chang 0 82 2.484907 0.000000 163 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +java 1 70 2.708050 2.708050 248 +creat 0 63 2.772589 0.000000 277 +written 0 63 2.772589 0.000000 278 +interact 0 62 2.772589 0.000000 270 +simpl 0 60 2.833213 0.000000 298 +understand 0 47 3.091042 0.000000 384 +form 0 39 3.258097 0.000000 443 +multi 0 36 3.367296 0.000000 493 +power 0 30 3.555348 0.000000 573 +platform 0 29 3.583519 0.000000 591 +input 0 23 3.806662 0.000000 727 +portabl 0 20 3.951244 0.000000 819 +applet 0 20 3.951244 0.000000 827 +safe 0 12 4.465908 0.000000 1274 +polygon 0 8 4.875197 0.000000 1723 +rotat 0 5 5.347108 0.000000 2295 +vertic 0 5 5.347108 0.000000 2270 +cube 0 4 5.568345 0.000000 2940 +introductionthi 0 2 6.263398 0.000000 4056 +tetra 0 2 6.263398 0.000000 5196 +wirefram 0 1 6.957497 0.000000 11009 +desgin 0 1 6.957497 0.000000 11010 +threader 0 1 6.957497 0.000000 11011 +speific 0 1 6.957497 0.000000 11012 +react 0 1 6.957497 0.000000 11013 +cone 0 1 6.957497 0.000000 11014 +cylind 0 1 6.957497 0.000000 11015 +toru 0 1 6.957497 0.000000 11016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..54af4eb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +theori 0 111 2.197225 0.000000 127 +assist 0 112 2.197225 0.000000 113 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +associ 0 93 2.397895 0.000000 151 +homepag 0 93 2.397895 0.000000 148 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +ieee 0 86 2.484907 0.000000 190 +resourc 0 81 2.484907 0.000000 172 +academ 0 82 2.484907 0.000000 178 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +foundat 0 62 2.772589 0.000000 286 +ithaca 0 65 2.772589 0.000000 294 +visit 0 63 2.772589 0.000000 288 +result 0 65 2.772589 0.000000 281 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +content 0 59 2.833213 0.000000 302 +juli 0 60 2.833213 0.000000 305 +point 0 58 2.890372 0.000000 319 +thesi 0 57 2.890372 0.000000 327 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +talk 0 53 2.944439 0.000000 336 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +set 0 50 3.044522 0.000000 361 +protocol 0 45 3.135494 0.000000 407 +math 0 44 3.135494 0.000000 402 +combin 0 42 3.218876 0.000000 421 +error 0 40 3.258097 0.000000 449 +connect 0 37 3.332205 0.000000 485 +short 0 36 3.367296 0.000000 499 +robot 0 36 3.367296 0.000000 497 +approxim 0 35 3.401197 0.000000 509 +bibliographi 0 34 3.401197 0.000000 518 +singl 0 34 3.401197 0.000000 510 +posit 0 31 3.496508 0.000000 552 +graph 0 30 3.555348 0.000000 576 +secur 0 30 3.555348 0.000000 577 +computersci 0 30 3.555348 0.000000 562 +proc 0 26 3.688879 0.000000 649 +bound 0 26 3.688879 0.000000 659 +compar 0 26 3.688879 0.000000 648 +berkelei 0 26 3.688879 0.000000 657 +jeff 0 25 3.737670 0.000000 673 +flow 0 24 3.761200 0.000000 700 +motion 0 24 3.761200 0.000000 699 +yahoo 0 24 3.761200 0.000000 707 +universityithaca 0 24 3.761200 0.000000 710 +geometri 0 22 3.850148 0.000000 752 +emphasi 0 22 3.850148 0.000000 755 +siam 0 21 3.912023 0.000000 800 +path 0 21 3.912023 0.000000 778 +rout 0 21 3.912023 0.000000 793 +vlsi 0 21 3.912023 0.000000 795 +tenni 0 20 3.951244 0.000000 838 +geometr 0 19 4.007333 0.000000 852 +definit 0 19 4.007333 0.000000 864 +spend 0 19 4.007333 0.000000 850 +andrew 0 19 4.007333 0.000000 849 +hypertext 0 19 4.007333 0.000000 865 +lower 0 18 4.060443 0.000000 886 +dimension 0 18 4.060443 0.000000 909 +analyz 0 17 4.110874 0.000000 925 +expand 0 17 4.110874 0.000000 928 +segment 0 17 4.110874 0.000000 931 +stanford 0 17 4.110874 0.000000 955 +letter 0 16 4.174387 0.000000 981 +latenc 0 16 4.174387 0.000000 993 +biologi 0 15 4.248495 0.000000 1049 +princeton 0 15 4.248495 0.000000 1042 +embed 0 14 4.317488 0.000000 1102 +discret 0 13 4.382027 0.000000 1165 +resolut 0 13 4.382027 0.000000 1172 +safe 0 12 4.465908 0.000000 1274 +probabilist 0 11 4.553877 0.000000 1343 +node 0 11 4.553877 0.000000 1326 +mesh 0 11 4.553877 0.000000 1351 +israel 0 11 4.553877 0.000000 1366 +arbitrari 0 11 4.553877 0.000000 1359 +excit 0 11 4.553877 0.000000 1329 +queue 0 10 4.653960 0.000000 1386 +minimum 0 9 4.753590 0.000000 1555 +assumpt 0 9 4.753590 0.000000 1514 +distanc 0 9 4.753590 0.000000 1500 +yellow 0 9 4.753590 0.000000 1601 +cryptographi 0 9 4.753590 0.000000 1512 +combinatori 0 8 4.875197 0.000000 1629 +hallcornel 0 8 4.875197 0.000000 1757 +molecular 0 7 5.010635 0.000000 1887 +trade 0 7 5.010635 0.000000 1815 +foc 0 7 5.010635 0.000000 1880 +huttenloch 0 6 5.164786 0.000000 1983 +rubinfeld 0 6 5.164786 0.000000 1998 +dens 0 6 5.164786 0.000000 2122 +layout 0 6 5.164786 0.000000 2183 +relax 0 6 5.164786 0.000000 2120 +consensu 0 6 5.164786 0.000000 2080 +reconstruct 0 6 5.164786 0.000000 2170 +plane 0 6 5.164786 0.000000 2187 +symposiumon 0 6 5.164786 0.000000 2054 +infoseek 0 6 5.164786 0.000000 2188 +soda 0 6 5.164786 0.000000 2189 +corp 0 6 5.164786 0.000000 2139 +semi 0 5 5.347108 0.000000 2510 +almaden 0 5 5.347108 0.000000 2511 +stabil 0 5 5.347108 0.000000 2286 +diagram 0 5 5.347108 0.000000 2346 +stoc 0 5 5.347108 0.000000 2491 +chess 0 5 5.347108 0.000000 2486 +conform 0 4 5.568345 0.000000 2941 +disjoint 0 4 5.568345 0.000000 2709 +ratio 0 4 5.568345 0.000000 2942 +hausdorff 0 4 5.568345 0.000000 2633 +glimps 0 4 5.568345 0.000000 2778 +planar 0 3 5.857933 0.000000 3647 +formobil 0 3 5.857933 0.000000 3261 +fernandez 0 3 5.857933 0.000000 3591 +deliveri 0 3 5.857933 0.000000 3278 +onprincipl 0 3 5.857933 0.000000 3701 +berger 0 3 5.857933 0.000000 3702 +universitycomput 0 3 5.857933 0.000000 3651 +ncstrl 0 3 5.857933 0.000000 3530 +jone 0 3 5.857933 0.000000 3703 +rivest 0 3 5.857933 0.000000 3248 +kleinberg 0 2 6.263398 0.000000 5093 +adversari 0 2 6.263398 0.000000 5065 +tardo 0 2 6.263398 0.000000 5090 +williamson 0 2 6.263398 0.000000 5101 +goeman 0 2 6.263398 0.000000 5100 +leighton 0 2 6.263398 0.000000 5097 +diamet 0 2 6.263398 0.000000 5102 +lovasz 0 2 6.263398 0.000000 5091 +greedi 0 2 6.263398 0.000000 4143 +attiya 0 2 6.263398 0.000000 5197 +voronoi 0 2 6.263398 0.000000 5036 +euclidean 0 2 6.263398 0.000000 5198 +sdsc 0 2 6.263398 0.000000 5199 +kleinber 0 1 6.957497 0.000000 11017 +disjointpath 0 1 6.957497 0.000000 11018 +anddisjoint 0 1 6.957497 0.000000 11019 +stabilityof 0 1 6.957497 0.000000 11020 +particularlyth 0 1 6.957497 0.000000 11021 +seeselect 0 1 6.957497 0.000000 11022 +publicationsmiscellan 0 1 6.957497 0.000000 11023 +linkspapersapproxim 0 1 6.957497 0.000000 11024 +unsplitt 0 1 6.957497 0.000000 11025 +aggarw 0 1 6.957497 0.000000 11026 +improvedapproxim 0 1 6.957497 0.000000 11027 +thetafunct 0 1 6.957497 0.000000 11028 +vertex 0 1 6.957497 0.000000 11029 +simplepolygon 0 1 6.957497 0.000000 11030 +serverbalanc 0 1 6.957497 0.000000 11031 +yaniv 0 1 6.957497 0.000000 11032 +serveralgorithm 0 1 6.957497 0.000000 11033 +robotnavig 0 1 6.957497 0.000000 11034 +awerbuch 0 1 6.957497 0.000000 11035 +borodin 0 1 6.957497 0.000000 11036 +raghavan 0 1 6.957497 0.000000 11037 +sudan 0 1 6.957497 0.000000 11038 +lynch 0 1 6.957497 0.000000 11039 +offsbetween 0 1 6.957497 0.000000 11040 +quiesc 0 1 6.957497 0.000000 11041 +managementprotocol 0 1 6.957497 0.000000 11042 +mullainathan 0 1 6.957497 0.000000 11043 +boundsand 0 1 6.957497 0.000000 11044 +athre 0 1 6.957497 0.000000 11045 +kedem 0 1 6.957497 0.000000 11046 +pointset 0 1 6.957497 0.000000 11047 +invariantsof 0 1 6.957497 0.000000 11048 +linkssearch 0 1 6.957497 0.000000 11049 +bibliographiesaltavista 0 1 6.957497 0.000000 11050 +nynex 0 1 6.957497 0.000000 11051 +sitescornel 0 1 6.957497 0.000000 11052 +computingtc 0 1 6.957497 0.000000 11053 +crescenzi 0 1 6.957497 0.000000 11054 +kann 0 1 6.957497 0.000000 11055 +compendium 0 1 6.957497 0.000000 11056 +biologycomput 0 1 6.957497 0.000000 11057 +carb 0 1 6.957497 0.000000 11058 +biocomput 0 1 6.957497 0.000000 11059 +geometrydavid 0 1 6.957497 0.000000 11060 +eppstein 0 1 6.957497 0.000000 11061 +junkyard 0 1 6.957497 0.000000 11062 +erickson 0 1 6.957497 0.000000 11063 +securitymitr 0 1 6.957497 0.000000 11064 +miscellaneousnetscap 0 1 6.957497 0.000000 11065 +intellicast 0 1 6.957497 0.000000 11066 +kleinbergdepart 0 1 6.957497 0.000000 11067 +scienceupson 0 1 6.957497 0.000000 11068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..e46977a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +thing 0 84 2.484907 0.000000 189 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +html 0 75 2.639057 0.000000 235 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +back 0 60 2.833213 0.000000 297 +march 0 61 2.833213 0.000000 295 +think 0 57 2.890372 0.000000 314 +februari 0 54 2.944439 0.000000 328 +move 0 47 3.091042 0.000000 382 +music 0 42 3.218876 0.000000 436 +electr 0 38 3.295837 0.000000 461 +return 0 34 3.401197 0.000000 502 +corpor 0 21 3.912023 0.000000 802 +worth 0 11 4.553877 0.000000 1294 +forc 0 10 4.653960 0.000000 1384 +japan 0 8 4.875197 0.000000 1762 +cornellunivers 0 7 5.010635 0.000000 1916 +superhighwai 0 4 5.568345 0.000000 2943 +sell 0 4 5.568345 0.000000 2935 +tokyo 0 3 5.857933 0.000000 3622 +acquaint 0 3 5.857933 0.000000 3468 +sale 0 3 5.857933 0.000000 3688 +melco 0 2 6.263398 0.000000 5200 +advert 0 2 6.263398 0.000000 5201 +kazushi 0 1 6.957497 0.000000 11069 +otakota 0 1 6.957497 0.000000 11070 +edukazushi 0 1 6.957497 0.000000 11071 +mitusbishi 0 1 6.957497 0.000000 11072 +isund 0 1 6.957497 0.000000 11073 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..ac5bb91e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +technic 0 100 2.302585 0.000000 140 +pictur 0 89 2.397895 0.000000 160 +logic 0 71 2.639057 0.000000 230 +complex 0 64 2.772589 0.000000 269 +type 0 61 2.833213 0.000000 296 +faculti 0 56 2.890372 0.000000 325 +algebra 0 45 3.135494 0.000000 394 +york 0 41 3.218876 0.000000 435 +constraint 0 26 3.688879 0.000000 636 +interpret 0 24 3.761200 0.000000 686 +universityithaca 0 24 3.761200 0.000000 710 +decis 0 23 3.806662 0.000000 728 +automata 0 13 4.382027 0.000000 1135 +interestsmi 0 10 4.653960 0.000000 1462 +hallcornel 0 8 4.875197 0.000000 1757 +newton 0 7 5.010635 0.000000 1824 +infer 0 6 5.164786 0.000000 2040 +dexter 0 4 5.568345 0.000000 2855 +andsemant 0 3 5.857933 0.000000 3246 +kozendext 0 1 6.957497 0.000000 11074 +kozenjoseph 0 1 6.957497 0.000000 11075 +engineeringphd 0 1 6.957497 0.000000 11076 +especiallycomplex 0 1 6.957497 0.000000 11077 +onlinekleen 0 1 6.957497 0.000000 11078 +algebraautomata 0 1 6.957497 0.000000 11079 +logicbibliographylist 0 1 6.957497 0.000000 11080 +reportscours 0 1 6.957497 0.000000 11081 +notesc 0 1 6.957497 0.000000 11082 +programsc 0 1 6.957497 0.000000 11083 +theoryfun 0 1 6.957497 0.000000 11084 +stufffamili 0 1 6.957497 0.000000 11085 +rugbi 0 1 6.957497 0.000000 11086 +effectcomput 0 1 6.957497 0.000000 11087 +departmentupson 0 1 6.957497 0.000000 11088 +usakozen 0 1 6.957497 0.000000 11089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..685fa849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +teach 0 108 2.197225 0.000000 112 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +associ 0 93 2.397895 0.000000 151 +learn 0 86 2.484907 0.000000 170 +novemb 0 81 2.484907 0.000000 179 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +autom 0 41 3.218876 0.000000 434 +soon 0 36 3.367296 0.000000 494 +synthesi 0 20 3.951244 0.000000 834 +deduct 0 12 4.465908 0.000000 1236 +german 0 6 5.164786 0.000000 2190 +christoph 0 5 5.347108 0.000000 2512 +kreitz 1 1 6.957497 6.957497 11090 +lehr 0 1 6.957497 0.000000 11091 +lernen 0 1 6.957497 0.000000 11092 +vorlesungsskript 0 1 6.957497 0.000000 11093 +medienunterst 0 1 6.957497 0.000000 11094 +uumltzt 0 1 6.957497 0.000000 11095 +lehren 0 1 6.957497 0.000000 11096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..5c9511c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +master 0 76 2.564949 0.000000 216 +multimedia 0 68 2.708050 0.000000 258 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +enjoi 0 26 3.688879 0.000000 660 +daili 0 24 3.761200 0.000000 706 +taiwan 0 16 4.174387 0.000000 1006 +countri 0 15 4.248495 0.000000 1059 +grove 0 8 4.875197 0.000000 1675 +newton 0 7 5.010635 0.000000 1824 +isi 0 5 5.347108 0.000000 2443 +heng 0 2 6.263398 0.000000 5202 +kuen 1 1 6.957497 6.957497 11097 +myproject 0 1 6.957497 0.000000 11098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..d808c70a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +contact 0 153 1.791759 0.000000 59 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +final 0 116 2.197225 0.000000 108 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +find 0 111 2.197225 0.000000 111 +site 0 106 2.197225 0.000000 119 +technic 0 100 2.302585 0.000000 140 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +pictur 0 89 2.397895 0.000000 160 +librari 1 87 2.484907 2.484907 181 +wide 0 84 2.484907 0.000000 185 +internet 0 83 2.484907 0.000000 186 +build 0 85 2.484907 0.000000 184 +member 0 84 2.484907 0.000000 165 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +know 0 80 2.564949 0.000000 198 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +meet 0 72 2.639057 0.000000 229 +upson 0 71 2.639057 0.000000 218 +involv 0 71 2.639057 0.000000 227 +nation 0 74 2.639057 0.000000 240 +publish 0 57 2.890372 0.000000 326 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +digit 1 52 2.995732 2.995732 348 +maintain 0 51 2.995732 0.000000 342 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +life 0 50 3.044522 0.000000 375 +give 0 50 3.044522 0.000000 359 +protocol 0 45 3.135494 0.000000 407 +fast 0 42 3.218876 0.000000 429 +author 0 39 3.258097 0.000000 450 +littl 0 39 3.258097 0.000000 454 +open 0 38 3.295837 0.000000 469 +mean 0 37 3.332205 0.000000 477 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +extend 0 32 3.465736 0.000000 539 +chapter 0 32 3.465736 0.000000 536 +collabor 0 32 3.465736 0.000000 543 +posit 0 31 3.496508 0.000000 552 +secur 0 30 3.555348 0.000000 577 +framework 0 28 3.610918 0.000000 606 +releas 0 28 3.610918 0.000000 616 +hope 0 28 3.610918 0.000000 610 +enabl 0 26 3.688879 0.000000 655 +rule 0 26 3.688879 0.000000 638 +challeng 0 26 3.688879 0.000000 653 +never 0 25 3.737670 0.000000 671 +primari 0 25 3.737670 0.000000 669 +universityithaca 0 24 3.761200 0.000000 710 +store 0 24 3.761200 0.000000 693 +magazin 0 24 3.761200 0.000000 704 +sometim 0 24 3.761200 0.000000 696 +lead 0 23 3.806662 0.000000 718 +mobil 0 23 3.806662 0.000000 730 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +defin 0 22 3.850148 0.000000 746 +corpor 0 21 3.912023 0.000000 802 +fund 0 21 3.912023 0.000000 805 +tell 0 21 3.912023 0.000000 777 +spend 0 19 4.007333 0.000000 850 +ever 0 19 4.007333 0.000000 872 +beauti 0 18 4.060443 0.000000 912 +repositori 0 17 4.110874 0.000000 932 +carl 0 15 4.248495 0.000000 1024 +charact 0 15 4.248495 0.000000 1028 +edui 0 13 4.382027 0.000000 1193 +joint 0 13 4.382027 0.000000 1130 +infrastructur 0 12 4.465908 0.000000 1234 +road 0 11 4.553877 0.000000 1374 +consortium 0 10 4.653960 0.000000 1467 +bike 0 10 4.653960 0.000000 1468 +leader 0 9 4.753590 0.000000 1576 +desir 0 9 4.753590 0.000000 1542 +poor 0 8 4.875197 0.000000 1736 +pagei 0 8 4.875197 0.000000 1683 +davi 0 7 5.010635 0.000000 1888 +daughter 0 7 5.010635 0.000000 1943 +drop 0 6 5.164786 0.000000 2008 +trail 0 6 5.164786 0.000000 2071 +substitut 0 5 5.347108 0.000000 2247 +departmentat 0 5 5.347108 0.000000 2513 +constant 0 5 5.347108 0.000000 2251 +outdoor 0 5 5.347108 0.000000 2514 +interfer 0 5 5.347108 0.000000 2494 +darpa 0 4 5.568345 0.000000 2944 +metadata 0 4 5.568345 0.000000 2945 +breath 0 4 5.568345 0.000000 2946 +dienst 0 3 5.857933 0.000000 3640 +ncstrl 0 3 5.857933 0.000000 3530 +worldwid 0 3 5.857933 0.000000 3704 +luci 0 3 5.857933 0.000000 3705 +fresh 0 3 5.857933 0.000000 3706 +lagoz 0 2 6.263398 0.000000 5081 +interoper 0 2 6.263398 0.000000 4838 +quiet 0 2 6.263398 0.000000 5203 +protocolsfor 0 2 6.263398 0.000000 5204 +developeda 0 2 6.263398 0.000000 5205 +interfacesand 0 2 6.263398 0.000000 5206 +cano 0 2 6.263398 0.000000 5207 +joi 0 2 6.263398 0.000000 5208 +fight 0 2 6.263398 0.000000 5209 +groupin 0 1 6.957497 0.000000 11099 +ourgroup 0 1 6.957497 0.000000 11100 +adistribut 0 1 6.957497 0.000000 11101 +collaborateson 0 1 6.957497 0.000000 11102 +thedienstsoftwar 0 1 6.957497 0.000000 11103 +providesdistribut 0 1 6.957497 0.000000 11104 +initiativesto 0 1 6.957497 0.000000 11105 +iso 0 1 6.957497 0.000000 11106 +dlib 0 1 6.957497 0.000000 11107 +dlibwork 0 1 6.957497 0.000000 11108 +iiin 0 1 6.957497 0.000000 11109 +warwick 0 1 6.957497 0.000000 11110 +amveri 0 1 6.957497 0.000000 11111 +distributedobject 0 1 6.957497 0.000000 11112 +paperfor 0 1 6.957497 0.000000 11113 +codeworkshop 0 1 6.957497 0.000000 11114 +meetm 0 1 6.957497 0.000000 11115 +moreabout 0 1 6.957497 0.000000 11116 +outsideof 0 1 6.957497 0.000000 11117 +toddler 0 1 6.957497 0.000000 11118 +lucyg 0 1 6.957497 0.000000 11119 +avid 0 1 6.957497 0.000000 11120 +movingwat 0 1 6.957497 0.000000 11121 +lakeand 0 1 6.957497 0.000000 11122 +itch 0 1 6.957497 0.000000 11123 +ridingalong 0 1 6.957497 0.000000 11124 +backwood 0 1 6.957497 0.000000 11125 +sparehour 0 1 6.957497 0.000000 11126 +shoe 0 1 6.957497 0.000000 11127 +deeplyth 0 1 6.957497 0.000000 11128 +physicalnor 0 1 6.957497 0.000000 11129 +itspreserv 0 1 6.957497 0.000000 11130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..e3ef555f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +report 0 131 2.079442 0.000000 92 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +homepag 1 93 2.397895 2.397895 148 +center 0 88 2.397895 0.000000 158 +internet 0 83 2.484907 0.000000 186 +level 0 87 2.484907 0.000000 180 +school 0 84 2.484907 0.000000 188 +materi 0 75 2.639057 0.000000 221 +servic 0 72 2.639057 0.000000 236 +onlin 0 75 2.639057 0.000000 223 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +guid 0 63 2.772589 0.000000 267 +colleg 0 61 2.833213 0.000000 300 +back 0 60 2.833213 0.000000 297 +local 0 55 2.944439 0.000000 334 +friend 0 48 3.044522 0.000000 376 +standard 0 48 3.044522 0.000000 365 +basic 0 50 3.044522 0.000000 360 +author 0 39 3.258097 0.000000 450 +tutori 0 39 3.258097 0.000000 437 +open 0 38 3.295837 0.000000 469 +robot 0 36 3.367296 0.000000 497 +concept 0 32 3.465736 0.000000 537 +secur 0 30 3.555348 0.000000 577 +travel 0 30 3.555348 0.000000 579 +chines 0 29 3.583519 0.000000 595 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +grad 0 20 3.951244 0.000000 837 +ultim 0 17 4.110874 0.000000 943 +cambridg 0 16 4.174387 0.000000 1008 +massiv 0 15 4.248495 0.000000 1026 +rank 0 14 4.317488 0.000000 1063 +opportun 0 13 4.382027 0.000000 1161 +career 0 12 4.465908 0.000000 1287 +safe 0 12 4.465908 0.000000 1274 +classmat 0 9 4.753590 0.000000 1516 +yellow 0 9 4.753590 0.000000 1601 +sigop 0 8 4.875197 0.000000 1727 +soccer 0 8 4.875197 0.000000 1752 +zhou 0 6 5.164786 0.000000 2092 +legal 0 6 5.164786 0.000000 2094 +authent 0 5 5.347108 0.000000 2306 +exclus 0 4 5.568345 0.000000 2947 +insur 0 4 5.568345 0.000000 2939 +surviv 0 4 5.568345 0.000000 2734 +fudan 0 3 5.857933 0.000000 3707 +legion 0 3 5.857933 0.000000 3708 +automobil 0 3 5.857933 0.000000 3709 +buyer 0 2 6.263398 0.000000 5210 +resours 0 2 6.263398 0.000000 5211 +sunris 0 2 6.263398 0.000000 5212 +edmund 0 2 6.263398 0.000000 5213 +succe 0 2 6.263398 0.000000 5214 +lidong 0 1 6.957497 0.000000 11131 +auto 0 1 6.957497 0.000000 11132 +oasi 0 1 6.957497 0.000000 11133 +adag 0 1 6.957497 0.000000 11134 +sirac 0 1 6.957497 0.000000 11135 +kerbero 0 1 6.957497 0.000000 11136 +ocaml 0 1 6.957497 0.000000 11137 +jobtrak 0 1 6.957497 0.000000 11138 +hunter 0 1 6.957497 0.000000 11139 +careermosa 0 1 6.957497 0.000000 11140 +jobweb 0 1 6.957497 0.000000 11141 +xjob 0 1 6.957497 0.000000 11142 +yingjun 0 1 6.957497 0.000000 11143 +isso 0 1 6.957497 0.000000 11144 +autosit 0 1 6.957497 0.000000 11145 +agenc 0 1 6.957497 0.000000 11146 +indexlast 0 1 6.957497 0.000000 11147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..5e87050c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +link 0 247 1.386294 0.000000 24 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +final 0 116 2.197225 0.000000 108 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +look 0 107 2.197225 0.000000 115 +book 0 99 2.302585 0.000000 131 +take 0 97 2.302585 0.000000 134 +octob 0 89 2.397895 0.000000 156 +info 0 85 2.484907 0.000000 176 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +advisor 0 51 2.995732 0.000000 355 +cool 0 49 3.044522 0.000000 374 +still 0 50 3.044522 0.000000 362 +visual 0 48 3.044522 0.000000 372 +move 0 47 3.091042 0.000000 382 +video 0 44 3.135494 0.000000 405 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +york 0 41 3.218876 0.000000 435 +small 0 39 3.258097 0.000000 447 +annual 0 40 3.258097 0.000000 458 +movi 0 40 3.258097 0.000000 459 +connect 0 37 3.332205 0.000000 485 +chapter 0 32 3.465736 0.000000 536 +anim 0 31 3.496508 0.000000 557 +quot 0 29 3.583519 0.000000 582 +chines 0 29 3.583519 0.000000 595 +releas 0 28 3.610918 0.000000 616 +linux 0 27 3.637586 0.000000 631 +berkelei 0 26 3.688879 0.000000 657 +wang 0 21 3.912023 0.000000 790 +born 0 21 3.912023 0.000000 798 +annot 0 21 3.912023 0.000000 775 +wind 0 18 4.060443 0.000000 908 +edulast 0 17 4.110874 0.000000 927 +taiwan 0 16 4.174387 0.000000 1006 +practicum 0 16 4.174387 0.000000 960 +track 0 15 4.248495 0.000000 1029 +scene 0 14 4.317488 0.000000 1114 +classic 0 14 4.317488 0.000000 1084 +hong 0 14 4.317488 0.000000 1105 +galleri 0 13 4.382027 0.000000 1192 +remov 0 12 4.465908 0.000000 1225 +scienceat 0 11 4.553877 0.000000 1375 +moment 0 11 4.553877 0.000000 1379 +alpha 0 11 4.553877 0.000000 1348 +earth 0 10 4.653960 0.000000 1463 +kong 0 9 4.753590 0.000000 1602 +jump 0 9 4.753590 0.000000 1603 +coast 0 8 4.875197 0.000000 1746 +edg 0 8 4.875197 0.000000 1647 +bridg 0 8 4.875197 0.000000 1764 +ramin 0 7 5.010635 0.000000 1820 +bookstor 0 7 5.010635 0.000000 1837 +vallei 0 7 5.010635 0.000000 1959 +southern 0 6 5.164786 0.000000 2191 +ohio 0 5 5.347108 0.000000 2447 +carlo 0 5 5.347108 0.000000 2515 +swartz 0 4 5.568345 0.000000 2878 +sinanet 0 4 5.568345 0.000000 2883 +sell 0 4 5.568345 0.000000 2935 +japanes 0 4 5.568345 0.000000 2934 +classesc 0 3 5.857933 0.000000 3681 +visionc 0 3 5.857933 0.000000 3489 +audit 0 3 5.857933 0.000000 3391 +headlin 0 3 5.857933 0.000000 3710 +hongkong 0 3 5.857933 0.000000 3677 +hero 0 3 5.857933 0.000000 3711 +ming 0 3 5.857933 0.000000 3712 +villag 0 2 6.263398 0.000000 5215 +computingc 0 2 6.263398 0.000000 5216 +linksfor 0 2 6.263398 0.000000 5185 +castl 0 2 6.263398 0.000000 5217 +nausicaa 0 2 6.263398 0.000000 5218 +galact 0 2 6.263398 0.000000 5219 +hsian 0 1 6.957497 0.000000 11148 +orwel 0 1 6.957497 0.000000 11149 +totoro 0 1 6.957497 0.000000 11150 +wangthi 0 1 6.957497 0.000000 11151 +constructionlin 0 1 6.957497 0.000000 11152 +fangliao 0 1 6.957497 0.000000 11153 +videoe 0 1 6.957497 0.000000 11154 +networkse 0 1 6.957497 0.000000 11155 +amidonc 0 1 6.957497 0.000000 11156 +transcrib 0 1 6.957497 0.000000 11157 +zabihspr 0 1 6.957497 0.000000 11158 +processingc 0 1 6.957497 0.000000 11159 +managementc 0 1 6.957497 0.000000 11160 +colloquimc 0 1 6.957497 0.000000 11161 +webspac 0 1 6.957497 0.000000 11162 +stuffscornel 0 1 6.957497 0.000000 11163 +reportiee 0 1 6.957497 0.000000 11164 +societytaiwan 0 1 6.957497 0.000000 11165 +comth 0 1 6.957497 0.000000 11166 +musicmovi 0 1 6.957497 0.000000 11167 +movieweb 0 1 6.957497 0.000000 11168 +moviemania 0 1 6.957497 0.000000 11169 +picturesth 0 1 6.957497 0.000000 11170 +linkstcl 0 1 6.957497 0.000000 11171 +hacksth 0 1 6.957497 0.000000 11172 +pagemiscellan 0 1 6.957497 0.000000 11173 +cja 0 1 6.957497 0.000000 11174 +calanimag 0 1 6.957497 0.000000 11175 +pagelaputa 0 1 6.957497 0.000000 11176 +conan 0 1 6.957497 0.000000 11177 +slump 0 1 6.957497 0.000000 11178 +kiki 0 1 6.957497 0.000000 11179 +legend 0 1 6.957497 0.000000 11180 +pagecampu 0 1 6.957497 0.000000 11181 +uptown 0 1 6.957497 0.000000 11182 +eithaca 0 1 6.957497 0.000000 11183 +linhsian 0 1 6.957497 0.000000 11184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..3f0b8030 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +look 0 107 2.197225 0.000000 115 +check 0 115 2.197225 0.000000 118 +take 0 97 2.302585 0.000000 134 +thing 0 84 2.484907 0.000000 189 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +collect 0 65 2.772589 0.000000 268 +septemb 0 65 2.772589 0.000000 274 +digit 0 52 2.995732 0.000000 348 +video 0 44 3.135494 0.000000 405 +show 0 43 3.178054 0.000000 417 +http 0 41 3.218876 0.000000 420 +littl 0 39 3.258097 0.000000 454 +download 0 36 3.367296 0.000000 489 +thought 1 17 4.110874 4.110874 945 +sept 0 17 4.110874 0.000000 952 +pagewelcom 0 11 4.553877 0.000000 1344 +song 0 11 4.553877 0.000000 1380 +theme 0 8 4.875197 0.000000 1707 +counter 0 8 4.875197 0.000000 1765 +clip 0 7 5.010635 0.000000 1868 +courtesi 0 7 5.010635 0.000000 1953 +essai 0 4 5.568345 0.000000 2948 +libbi 0 1 6.957497 0.000000 11185 +lista 0 1 6.957497 0.000000 11186 +projectemail 0 1 6.957497 0.000000 11187 +mehit 0 1 6.957497 0.000000 11188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..b4b1d7e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +take 0 97 2.302585 0.000000 134 +name 0 72 2.639057 0.000000 220 +upson 0 71 2.639057 0.000000 218 +lili 0 5 5.347108 0.000000 2240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..a1017ab0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +analysi 0 124 2.079442 0.000000 98 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +center 0 88 2.397895 0.000000 158 +method 0 80 2.564949 0.000000 213 +appli 0 71 2.639057 0.000000 226 +view 0 70 2.708050 0.000000 254 +differ 0 66 2.708050 0.000000 253 +scientif 0 53 2.944439 0.000000 341 +much 0 52 2.995732 0.000000 349 +numer 1 49 3.044522 3.044522 369 +appoint 0 49 3.044522 0.000000 358 +algebra 0 45 3.135494 0.000000 394 +textbook 0 44 3.135494 0.000000 397 +mechan 0 43 3.178054 0.000000 416 +linear 0 41 3.218876 0.000000 431 +map 0 39 3.258097 0.000000 452 +multipl 0 39 3.258097 0.000000 453 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +approxim 0 35 3.401197 0.000000 509 +bibliographi 0 34 3.401197 0.000000 518 +jeff 0 25 3.737670 0.000000 673 +siam 0 21 3.912023 0.000000 800 +walter 0 17 4.110874 0.000000 950 +normal 0 16 4.174387 0.000000 995 +matlab 0 14 4.317488 0.000000 1081 +affili 0 13 4.382027 0.000000 1194 +whose 0 13 4.382027 0.000000 1166 +nick 0 13 4.382027 0.000000 1180 +iter 0 12 4.465908 0.000000 1206 +peter 0 11 4.553877 0.000000 1316 +matric 0 10 4.653960 0.000000 1399 +thecomput 0 10 4.653960 0.000000 1408 +lloyd 0 6 5.164786 0.000000 2103 +edumi 0 6 5.164786 0.000000 2132 +fluid 0 5 5.347108 0.000000 2440 +thecornel 0 4 5.568345 0.000000 2892 +conform 0 4 5.568345 0.000000 2941 +hasbeen 0 4 5.568345 0.000000 2661 +trefethen 0 3 5.857933 0.000000 3528 +eigenvector 0 3 5.857933 0.000000 3365 +vicki 0 3 5.857933 0.000000 3187 +reddi 0 3 5.857933 0.000000 3277 +havea 0 2 6.263398 0.000000 4434 +spectral 0 2 6.263398 0.000000 4837 +papersoth 0 2 6.263398 0.000000 5049 +loui 0 2 6.263398 0.000000 5220 +trefethenprofessorlnt 0 1 6.957497 0.000000 11189 +thecent 0 1 6.957497 0.000000 11190 +numericalsolut 0 1 6.957497 0.000000 11191 +notorthogon 0 1 6.957497 0.000000 11192 +textbooksfinit 0 1 6.957497 0.000000 11193 +papersmultimatlab 0 1 6.957497 0.000000 11194 +processorsmatrix 0 1 6.957497 0.000000 11195 +gap 0 1 6.957497 0.000000 11196 +betweenpotenti 0 1 6.957497 0.000000 11197 +convergencepseudospectra 0 1 6.957497 0.000000 11198 +operatorssom 0 1 6.957497 0.000000 11199 +itemsclass 0 1 6.957497 0.000000 11200 +analysiscurriculum 0 1 6.957497 0.000000 11201 +vitaepseudospectra 0 1 6.957497 0.000000 11202 +alfeldcurr 0 1 6.957497 0.000000 11203 +howlegubjrn 0 1 6.957497 0.000000 11204 +jnsson 0 1 6.957497 0.000000 11205 +yohan 0 1 6.957497 0.000000 11206 +kimdivakar 0 1 6.957497 0.000000 11207 +viswanathprevi 0 1 6.957497 0.000000 11208 +baggetttobi 0 1 6.957497 0.000000 11209 +driscollalan 0 1 6.957497 0.000000 11210 +edelman 0 1 6.957497 0.000000 11211 +howel 0 1 6.957497 0.000000 11212 +mascarenhasnoel 0 1 6.957497 0.000000 11213 +nachtigalsatish 0 1 6.957497 0.000000 11214 +chuan 0 1 6.957497 0.000000 11215 +tohsom 0 1 6.957497 0.000000 11216 +colleaguesjim 0 1 6.957497 0.000000 11217 +demmelann 0 1 6.957497 0.000000 11218 +greenbaummartin 0 1 6.957497 0.000000 11219 +gutknechtd 0 1 6.957497 0.000000 11220 +highamann 0 1 6.957497 0.000000 11221 +trefethenandr 0 1 6.957497 0.000000 11222 +weideman 0 1 6.957497 0.000000 11223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..de8887df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +manag 0 114 2.197225 0.000000 125 +site 0 106 2.197225 0.000000 119 +topic 0 114 2.197225 0.000000 110 +person 0 111 2.197225 0.000000 117 +search 0 95 2.397895 0.000000 155 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +librari 0 87 2.484907 0.000000 181 +resum 0 79 2.564949 0.000000 217 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +archiv 0 49 3.044522 0.000000 364 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +music 0 42 3.218876 0.000000 436 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +china 0 37 3.332205 0.000000 487 +connect 0 37 3.332205 0.000000 485 +product 0 33 3.433987 0.000000 527 +travel 0 30 3.555348 0.000000 579 +chines 0 29 3.583519 0.000000 595 +programminglanguag 0 21 3.912023 0.000000 782 +busi 0 21 3.912023 0.000000 784 +hobbi 0 16 4.174387 0.000000 1009 +stock 0 16 4.174387 0.000000 1007 +practicum 0 16 4.174387 0.000000 960 +photograph 0 15 4.248495 0.000000 1056 +novel 0 15 4.248495 0.000000 1039 +misc 0 13 4.382027 0.000000 1124 +galleri 0 13 4.382027 0.000000 1192 +tune 0 12 4.465908 0.000000 1227 +catalog 0 10 4.653960 0.000000 1431 +swim 0 9 4.753590 0.000000 1599 +corba 0 5 5.347108 0.000000 2320 +ping 0 4 5.568345 0.000000 2922 +vrml 0 4 5.568345 0.000000 2949 +cube 0 4 5.568345 0.000000 2940 +luci 0 3 5.857933 0.000000 3705 +pong 0 3 5.857933 0.000000 3371 +underground 0 3 5.857933 0.000000 3604 +badminton 0 2 6.263398 0.000000 5221 +silvano 0 2 6.263398 0.000000 4868 +sunlab 0 2 6.263398 0.000000 5222 +caltech 0 2 6.263398 0.000000 5223 +whiz 0 1 6.957497 0.000000 11224 +systemscontact 0 1 6.957497 0.000000 11225 +yuwu 0 1 6.957497 0.000000 11226 +tkcgi 0 1 6.957497 0.000000 11227 +securitypc 0 1 6.957497 0.000000 11228 +lube 0 1 6.957497 0.000000 11229 +ipngip_atmcomput 0 1 6.957497 0.000000 11230 +sapient 0 1 6.957497 0.000000 11231 +jobtrack 0 1 6.957497 0.000000 11232 +artvark 0 1 6.957497 0.000000 11233 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..334ab118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +network 0 168 1.791759 0.000000 61 +click 0 142 1.945910 0.000000 78 +sinc 0 90 2.397895 0.000000 159 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +receiv 0 66 2.708050 0.000000 244 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +multimedia 0 68 2.708050 0.000000 258 +digit 0 52 2.995732 0.000000 348 +electron 0 47 3.091042 0.000000 379 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +layer 0 17 4.110874 0.000000 926 +massachusett 0 14 4.317488 0.000000 1118 +linda 0 10 4.653960 0.000000 1394 +equip 0 10 4.653960 0.000000 1459 +stack 0 10 4.653960 0.000000 1389 +corp 0 6 5.164786 0.000000 2139 +nativ 0 6 5.164786 0.000000 2192 +multicast 0 5 5.347108 0.000000 2305 +commerc 0 3 5.857933 0.000000 3209 +lowel 0 2 6.263398 0.000000 5224 +coursesfal 0 2 6.263398 0.000000 5225 +universitylinda 0 1 6.957497 0.000000 11234 +lxwu 0 1 6.957497 0.000000 11235 +univsers 0 1 6.957497 0.000000 11236 +banyan 0 1 6.957497 0.000000 11237 +mulitimedia 0 1 6.957497 0.000000 11238 +kramer 0 1 6.957497 0.000000 11239 +mart 0 1 6.957497 0.000000 11240 +photoesus 0 1 6.957497 0.000000 11241 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..f4b962ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +upson 1 71 2.639057 2.639057 218 +juli 1 60 2.833213 2.833213 305 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +halldepart 1 3 5.857933 5.857933 3641 +nikolai 1 2 6.263398 6.263398 4087 +mateevnikolai 1 1 6.957497 6.957497 11242 +mateevgradu 1 1 6.957497 6.957497 11243 +studentmateev 1 1 6.957497 6.957497 11244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..516bf4e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +select 0 91 2.397895 0.000000 154 +academ 0 82 2.484907 0.000000 178 +come 0 78 2.564949 0.000000 202 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +laboratori 0 63 2.772589 0.000000 292 +summer 0 56 2.890372 0.000000 311 +soon 0 36 3.367296 0.000000 494 +scientist 0 31 3.496508 0.000000 560 +universityithaca 0 24 3.761200 0.000000 710 +fellow 0 24 3.761200 0.000000 701 +fund 0 21 3.912023 0.000000 805 +supervis 0 20 3.951244 0.000000 840 +stop 0 17 4.110874 0.000000 942 +heterogen 0 14 4.317488 0.000000 1090 +arpa 0 11 4.553877 0.000000 1369 +princip 0 10 4.653960 0.000000 1397 +xerox 0 8 4.875197 0.000000 1725 +matthew 0 6 5.164786 0.000000 2193 +chat 0 6 5.164786 0.000000 2128 +metadata 0 4 5.568345 0.000000 2945 +morgenstern 0 1 6.957497 0.000000 11245 +pagematthew 0 1 6.957497 0.000000 11246 +morgensternresearch 0 1 6.957497 0.000000 11247 +leaderaddress 0 1 6.957497 0.000000 11248 +centerxerox 0 1 6.957497 0.000000 11249 +institutecornel 0 1 6.957497 0.000000 11250 +edustatu 0 1 6.957497 0.000000 11251 +scienceproject 0 1 6.957497 0.000000 11252 +fundedresearch 0 1 6.957497 0.000000 11253 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..0121c4d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +homepag 0 93 2.397895 0.000000 148 +dynam 0 76 2.564949 0.000000 194 +digit 0 52 2.995732 0.000000 348 +especi 0 36 3.367296 0.000000 496 +random 0 34 3.401197 0.000000 511 +committe 0 34 3.401197 0.000000 522 +graph 1 30 3.555348 3.555348 576 +bound 0 26 3.688879 0.000000 659 +universityithaca 0 24 3.761200 0.000000 710 +leav 0 21 3.912023 0.000000 772 +corpor 0 21 3.912023 0.000000 802 +lower 0 18 4.060443 0.000000 886 +eduphon 0 15 4.248495 0.000000 1060 +equip 0 10 4.653960 0.000000 1459 +soda 0 6 5.164786 0.000000 2189 +departmentcornel 0 5 5.347108 0.000000 2275 +stoc 0 5 5.347108 0.000000 2491 +henzing 0 3 5.857933 0.000000 3713 +professorcomput 0 3 5.857933 0.000000 3714 +monika 0 2 6.263398 0.000000 4141 +rauch 0 2 6.263398 0.000000 4142 +homepagemonika 0 1 6.957497 0.000000 11254 +henzingerassist 0 1 6.957497 0.000000 11255 +centerhomepageresearch 0 1 6.957497 0.000000 11256 +interestscombinatori 0 1 6.957497 0.000000 11257 +pageprogram 0 1 6.957497 0.000000 11258 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..586caaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +paper 1 205 1.609438 1.609438 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +final 0 116 2.197225 0.000000 108 +user 0 104 2.302585 0.000000 137 +technic 0 100 2.302585 0.000000 140 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +integr 0 67 2.708050 0.000000 245 +plan 0 65 2.772589 0.000000 272 +ithaca 0 65 2.772589 0.000000 294 +collect 0 65 2.772589 0.000000 268 +share 0 59 2.833213 0.000000 304 +movi 0 40 3.258097 0.000000 459 +field 0 37 3.332205 0.000000 482 +travel 0 30 3.555348 0.000000 579 +tenni 0 20 3.951244 0.000000 838 +bachelor 0 17 4.110874 0.000000 957 +horu 0 14 4.317488 0.000000 1116 +huang 0 12 4.465908 0.000000 1202 +reness 0 11 4.553877 0.000000 1333 +werner 0 10 4.653960 0.000000 1385 +ride 0 8 4.875197 0.000000 1741 +vogel 0 8 4.875197 0.000000 1622 +sheldon 0 2 6.263398 0.000000 5226 +stanlei 1 1 6.957497 6.957497 11259 +kentucki 0 1 6.957497 0.000000 11260 +huangmast 0 1 6.957497 0.000000 11261 +studentmhuang 0 1 6.957497 0.000000 11262 +courtcornel 0 1 6.957497 0.000000 11263 +systemsdistribut 0 1 6.957497 0.000000 11264 +systemsdatabas 0 1 6.957497 0.000000 11265 +retrievalgraph 0 1 6.957497 0.000000 11266 +interfacesoth 0 1 6.957497 0.000000 11267 +horse_back 0 1 6.957497 0.000000 11268 +myadvisor 0 1 6.957497 0.000000 11269 +robbertvan 0 1 6.957497 0.000000 11270 +planplan 0 1 6.957497 0.000000 11271 +distributionplan 0 1 6.957497 0.000000 11272 +updateplan 0 1 6.957497 0.000000 11273 +faqhorusc 0 1 6.957497 0.000000 11274 +memorydistribut 0 1 6.957497 0.000000 11275 +memorysom 0 1 6.957497 0.000000 11276 +communicationsnapshotu 0 1 6.957497 0.000000 11277 +architecturejobscar 0 1 6.957497 0.000000 11278 +pathbai 0 1 6.957497 0.000000 11279 +jobscyberezumescar 0 1 6.957497 0.000000 11280 +opportunitiesus 0 1 6.957497 0.000000 11281 +stufftechn 0 1 6.957497 0.000000 11282 +searchbel 0 1 6.957497 0.000000 11283 +labsspbsd 0 1 6.957497 0.000000 11284 +sourcesjavarfclast 0 1 6.957497 0.000000 11285 +mhuang 0 1 6.957497 0.000000 11286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..a8d2a4cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +develop 0 174 1.791759 0.000000 53 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +comment 0 93 2.397895 0.000000 146 +second 0 81 2.484907 0.000000 166 +west 0 83 2.484907 0.000000 192 +know 0 80 2.564949 0.000000 198 +copi 0 63 2.772589 0.000000 284 +copyright 0 36 3.367296 0.000000 495 +express 0 32 3.465736 0.000000 540 +abl 0 30 3.555348 0.000000 566 +challeng 0 26 3.688879 0.000000 653 +never 0 25 3.737670 0.000000 671 +universityithaca 0 24 3.761200 0.000000 710 +decis 0 23 3.806662 0.000000 728 +sciencecornel 0 22 3.850148 0.000000 768 +protect 0 17 4.110874 0.000000 935 +differenti 0 17 4.110874 0.000000 921 +precis 0 15 4.248495 0.000000 1023 +script 0 13 4.382027 0.000000 1171 +speech 0 12 4.465908 0.000000 1222 +holidai 0 12 4.465908 0.000000 1224 +mass 0 8 4.875197 0.000000 1732 +parti 0 8 4.875197 0.000000 1676 +cat 0 6 5.164786 0.000000 2194 +rebecca 0 6 5.164786 0.000000 2174 +highest 0 4 5.568345 0.000000 2950 +cuc 0 4 5.568345 0.000000 2630 +government 0 2 6.263398 0.000000 4248 +aclu 0 2 6.263398 0.000000 5227 +reno 0 2 6.263398 0.000000 5228 +millett 1 1 6.957497 6.957497 11287 +lynett 0 1 6.957497 0.000000 11288 +millettdepart 0 1 6.957497 0.000000 11289 +participatoryform 0 1 6.957497 0.000000 11290 +internetdeserv 0 1 6.957497 0.000000 11291 +intrus 0 1 6.957497 0.000000 11292 +skit 0 1 6.957497 0.000000 11293 +femin 0 1 6.957497 0.000000 11294 +feminist 0 1 6.957497 0.000000 11295 +whenver 0 1 6.957497 0.000000 11296 +sentiment 0 1 6.957497 0.000000 11297 +doormat 0 1 6.957497 0.000000 11298 +prostitut 0 1 6.957497 0.000000 11299 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..1f198c84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +machin 0 129 2.079442 0.000000 95 +databas 0 122 2.079442 0.000000 86 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +involv 0 71 2.639057 0.000000 227 +servic 0 72 2.639057 0.000000 236 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +major 0 56 2.890372 0.000000 315 +reason 0 57 2.890372 0.000000 318 +extens 0 53 2.944439 0.000000 340 +cool 0 49 3.044522 0.000000 374 +video 0 44 3.135494 0.000000 405 +offer 0 43 3.178054 0.000000 414 +vision 0 41 3.218876 0.000000 430 +electr 0 38 3.295837 0.000000 461 +soon 0 36 3.367296 0.000000 494 +computersci 0 30 3.555348 0.000000 562 +hope 0 28 3.610918 0.000000 610 +weather 0 28 3.610918 0.000000 618 +latest 0 21 3.912023 0.000000 785 +sure 0 20 3.951244 0.000000 813 +ever 0 19 4.007333 0.000000 872 +practicum 0 16 4.174387 0.000000 960 +stock 0 16 4.174387 0.000000 1007 +menu 0 13 4.382027 0.000000 1156 +everyth 0 13 4.382027 0.000000 1169 +meng 0 12 4.465908 0.000000 1214 +earth 0 10 4.653960 0.000000 1463 +transmiss 0 9 4.753590 0.000000 1588 +andcomput 0 8 4.875197 0.000000 1623 +capac 0 8 4.875197 0.000000 1740 +film 0 8 4.875197 0.000000 1761 +temporari 0 6 5.164786 0.000000 2090 +wrong 0 6 5.164786 0.000000 2025 +conot 0 5 5.347108 0.000000 2245 +doubl 0 4 5.568345 0.000000 2951 +festiv 0 4 5.568345 0.000000 2952 +polytechn 0 3 5.857933 0.000000 3222 +educornel 0 3 5.857933 0.000000 3601 +coolest 0 2 6.263398 0.000000 5229 +newgroup 0 2 6.263398 0.000000 4191 +pagemi 0 2 6.263398 0.000000 5230 +nerd 0 2 6.263398 0.000000 5231 +newgroupc 0 1 6.957497 0.000000 11300 +mishaal 0 1 6.957497 0.000000 11301 +pagemisha 0 1 6.957497 0.000000 11302 +kuwaiti 0 1 6.957497 0.000000 11303 +mengc 0 1 6.957497 0.000000 11304 +worcest 0 1 6.957497 0.000000 11305 +inworcest 0 1 6.957497 0.000000 11306 +bearaccess 0 1 6.957497 0.000000 11307 +newgroupnba 0 1 6.957497 0.000000 11308 +newgroupoptim 0 1 6.957497 0.000000 11309 +kuwait 0 1 6.957497 0.000000 11310 +quotescool 0 1 6.957497 0.000000 11311 +cann 0 1 6.957497 0.000000 11312 +accus 0 1 6.957497 0.000000 11313 +almashanmisha 0 1 6.957497 0.000000 11314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..beb06203 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +number 0 130 2.079442 0.000000 97 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +version 0 113 2.197225 0.000000 122 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +homepag 0 93 2.397895 0.000000 148 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +member 0 84 2.484907 0.000000 165 +internet 0 83 2.484907 0.000000 186 +know 0 80 2.564949 0.000000 198 +complet 0 77 2.564949 0.000000 208 +want 0 79 2.564949 0.000000 199 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +onlin 0 75 2.639057 0.000000 223 +line 0 75 2.639057 0.000000 231 +would 0 67 2.708050 0.000000 251 +result 0 65 2.772589 0.000000 281 +plai 0 60 2.833213 0.000000 307 +browser 0 56 2.890372 0.000000 313 +date 0 51 2.995732 0.000000 344 +much 0 52 2.995732 0.000000 349 +cool 0 49 3.044522 0.000000 374 +right 0 48 3.044522 0.000000 363 +visitor 0 49 3.044522 0.000000 371 +better 0 45 3.135494 0.000000 401 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +keep 0 44 3.135494 0.000000 409 +realli 0 40 3.258097 0.000000 444 +movi 0 40 3.258097 0.000000 459 +join 0 39 3.258097 0.000000 457 +connect 0 37 3.332205 0.000000 485 +hand 0 37 3.332205 0.000000 475 +statist 0 35 3.401197 0.000000 521 +random 0 34 3.401197 0.000000 511 +idea 0 32 3.465736 0.000000 545 +anim 0 31 3.496508 0.000000 557 +someth 0 31 3.496508 0.000000 554 +quit 0 27 3.637586 0.000000 633 +mike 0 24 3.761200 0.000000 703 +other 0 24 3.761200 0.000000 697 +togeth 0 23 3.806662 0.000000 714 +thank 0 23 3.806662 0.000000 721 +love 0 21 3.912023 0.000000 804 +mpeg 0 20 3.951244 0.000000 831 +andrew 0 19 4.007333 0.000000 849 +coupl 0 17 4.110874 0.000000 939 +stop 0 17 4.110874 0.000000 942 +expand 0 17 4.110874 0.000000 928 +whole 0 17 4.110874 0.000000 940 +sign 0 16 4.174387 0.000000 970 +anyth 0 16 4.174387 0.000000 998 +pretti 0 13 4.382027 0.000000 1191 +count 0 12 4.465908 0.000000 1239 +danc 0 12 4.465908 0.000000 1278 +guess 0 10 4.653960 0.000000 1443 +card 0 10 4.653960 0.000000 1435 +kevin 0 9 4.753590 0.000000 1482 +pick 0 9 4.753590 0.000000 1498 +opinion 0 8 4.875197 0.000000 1708 +attent 0 8 4.875197 0.000000 1651 +chanc 0 7 5.010635 0.000000 1960 +brought 0 7 5.010635 0.000000 1925 +bunch 0 7 5.010635 0.000000 1861 +golf 0 6 5.164786 0.000000 2178 +yeah 0 6 5.164786 0.000000 2195 +kid 0 5 5.347108 0.000000 2516 +pagethi 0 5 5.347108 0.000000 2336 +frog 0 5 5.347108 0.000000 2479 +exchang 0 5 5.347108 0.000000 2310 +sing 0 5 5.347108 0.000000 2499 +everybodi 0 5 5.347108 0.000000 2517 +dark 0 4 5.568345 0.000000 2910 +vote 0 4 5.568345 0.000000 2953 +maria 0 4 5.568345 0.000000 2954 +amaz 0 4 5.568345 0.000000 2600 +stockholm 0 3 5.857933 0.000000 3715 +ryan 0 3 5.857933 0.000000 3679 +nicknam 0 3 5.857933 0.000000 3716 +lame 0 3 5.857933 0.000000 3717 +beavi 0 2 6.263398 0.000000 4995 +suck 0 2 6.263398 0.000000 5232 +donnel 0 2 6.263398 0.000000 5233 +spirit 0 2 6.263398 0.000000 5234 +harmoni 0 2 6.263398 0.000000 5235 +reset 0 2 6.263398 0.000000 5236 +korbi 0 1 6.957497 0.000000 11315 +myguestbook 0 1 6.957497 0.000000 11316 +poll 0 1 6.957497 0.000000 11317 +vitya 0 1 6.957497 0.000000 11318 +corbett 0 1 6.957497 0.000000 11319 +eryn 0 1 6.957497 0.000000 11320 +crave 0 1 6.957497 0.000000 11321 +guttermouth 0 1 6.957497 0.000000 11322 +byjust 0 1 6.957497 0.000000 11323 +peic 0 1 6.957497 0.000000 11324 +accuar 0 1 6.957497 0.000000 11325 +atmak 0 1 6.957497 0.000000 11326 +edubas 0 1 6.957497 0.000000 11327 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..279726f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +fall 0 181 1.609438 0.000000 40 +construct 0 139 1.945910 0.000000 82 +spring 0 131 2.079442 0.000000 88 +graphic 0 90 2.397895 0.000000 147 +multimedia 0 68 2.708050 0.000000 258 +semest 1 58 2.890372 2.890372 312 +effect 0 46 3.091042 0.000000 385 +made 0 44 3.135494 0.000000 398 +anim 0 31 3.496508 0.000000 557 +titl 0 31 3.496508 0.000000 556 +jpeg 0 6 5.164786 0.000000 2053 +nobuhiko 1 1 6.957497 6.957497 11328 +mukai 1 1 6.957497 6.957497 11329 +mukainobuhiko 0 1 6.957497 0.000000 11330 +compressionon 0 1 6.957497 0.000000 11331 +magicon 0 1 6.957497 0.000000 11332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..e25416ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +person 0 111 2.197225 0.000000 117 +homepag 0 93 2.397895 0.000000 148 +imag 0 91 2.397895 0.000000 161 +info 0 85 2.484907 0.000000 176 +resum 0 79 2.564949 0.000000 217 +refer 0 78 2.564949 0.000000 203 +upson 0 71 2.639057 0.000000 218 +eduoffic 0 33 3.433987 0.000000 531 +photo 0 31 3.496508 0.000000 561 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +studentdepart 0 5 5.347108 0.000000 2505 +nichola 1 3 5.857933 5.857933 3252 +how 1 3 5.857933 5.857933 3289 +nihow 0 1 6.957497 0.000000 11333 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..c9f598b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +sinc 0 90 2.397895 0.000000 159 +upson 0 71 2.639057 0.000000 218 +java 0 70 2.708050 0.000000 248 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +pointer 0 48 3.044522 0.000000 368 +visitor 0 49 3.044522 0.000000 371 +vita 0 38 3.295837 0.000000 473 +niko 1 4 5.568345 5.568345 2637 +pitsiani 0 3 5.857933 0.000000 3175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..8ad661f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +parallel 1 169 1.791759 1.791759 60 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +phone 0 175 1.791759 0.000000 45 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +hall 0 146 1.945910 0.000000 65 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +machin 0 129 2.079442 0.000000 95 +intern 0 108 2.197225 0.000000 128 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +proceed 0 93 2.397895 0.000000 152 +mani 0 92 2.397895 0.000000 150 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +environ 0 84 2.484907 0.000000 177 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +method 0 80 2.564949 0.000000 213 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +state 0 76 2.564949 0.000000 207 +solv 0 73 2.639057 0.000000 234 +workshop 0 71 2.639057 0.000000 239 +appli 0 71 2.639057 0.000000 226 +nation 0 74 2.639057 0.000000 240 +symposium 0 72 2.639057 0.000000 238 +simul 0 66 2.708050 0.000000 255 +foundat 0 62 2.772589 0.000000 286 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +room 0 59 2.833213 0.000000 301 +special 0 56 2.890372 0.000000 320 +undergradu 0 54 2.944439 0.000000 338 +scientif 0 53 2.944439 0.000000 341 +numer 0 49 3.044522 0.000000 369 +adapt 0 46 3.091042 0.000000 387 +map 0 39 3.258097 0.000000 452 +field 0 37 3.332205 0.000000 482 +domain 0 30 3.555348 0.000000 564 +compon 0 30 3.555348 0.000000 570 +load 0 28 3.610918 0.000000 601 +challeng 0 26 3.688879 0.000000 653 +altern 0 26 3.688879 0.000000 641 +task 0 25 3.737670 0.000000 678 +known 0 24 3.761200 0.000000 702 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +geometri 0 22 3.850148 0.000000 752 +siam 0 21 3.912023 0.000000 800 +wang 0 21 3.912023 0.000000 790 +portabl 0 20 3.951244 0.000000 819 +binari 0 20 3.951244 0.000000 823 +toolkit 0 20 3.951244 0.000000 835 +runtim 0 19 4.007333 0.000000 858 +partial 0 18 4.060443 0.000000 900 +former 0 17 4.110874 0.000000 956 +differenti 0 17 4.110874 0.000000 921 +partit 0 16 4.174387 0.000000 984 +balanc 0 14 4.317488 0.000000 1112 +incomput 0 14 4.317488 0.000000 1096 +menu 0 13 4.382027 0.000000 1156 +iter 0 12 4.465908 0.000000 1206 +touch 0 12 4.465908 0.000000 1288 +characterist 0 12 4.465908 0.000000 1257 +rice 0 11 4.553877 0.000000 1336 +multithread 0 11 4.553877 0.000000 1315 +black 0 10 4.653960 0.000000 1418 +purdu 0 10 4.653960 0.000000 1466 +decomposit 0 10 4.653960 0.000000 1439 +rhode 0 9 4.753590 0.000000 1579 +sensit 0 8 4.875197 0.000000 1726 +vineet 0 8 4.875197 0.000000 1639 +watson 0 8 4.875197 0.000000 1691 +yang 0 8 4.875197 0.000000 1652 +univeristi 0 8 4.875197 0.000000 1754 +multicomput 0 7 5.010635 0.000000 1890 +solver 0 7 5.010635 0.000000 1911 +thompson 0 6 5.164786 0.000000 2049 +heurist 0 6 5.164786 0.000000 2125 +hole 0 5 5.347108 0.000000 2518 +fluid 0 5 5.347108 0.000000 2440 +grand 0 5 5.347108 0.000000 2425 +niko 0 4 5.568345 0.000000 2637 +bernoulli 0 4 5.568345 0.000000 2955 +kodukula 0 4 5.568345 0.000000 2640 +indupraka 0 4 5.568345 0.000000 2639 +pingali 0 4 5.568345 0.000000 2956 +contemporari 0 4 5.568345 0.000000 2719 +colorado 0 4 5.568345 0.000000 2938 +knight 0 4 5.568345 0.000000 2728 +richter 0 4 5.568345 0.000000 2957 +architecur 0 3 5.857933 0.000000 3448 +ahuja 0 3 5.857933 0.000000 3494 +ctctr 0 3 5.857933 0.000000 3625 +imac 0 3 5.857933 0.000000 3718 +brunswick 0 3 5.857933 0.000000 3567 +mimd 0 3 5.857933 0.000000 3361 +chrisochoid 0 2 6.263398 0.000000 5237 +grid 0 2 6.263398 0.000000 4228 +facet 0 2 6.263398 0.000000 4687 +prema 0 2 6.263398 0.000000 5238 +kale 0 2 6.263398 0.000000 4545 +key 0 2 6.263398 0.000000 5057 +aiaa 0 2 6.263398 0.000000 5239 +moscow 0 2 6.263398 0.000000 4884 +programmingenviron 0 2 6.263398 0.000000 5240 +and 0 2 6.263398 0.000000 5241 +nikosc 0 2 6.263398 0.000000 5242 +housti 0 1 6.957497 0.000000 11334 +ellpack 0 1 6.957497 0.000000 11335 +sukup 0 1 6.957497 0.000000 11336 +mississippi 0 1 6.957497 0.000000 11337 +papachi 0 1 6.957497 0.000000 11338 +florian 0 1 6.957497 0.000000 11339 +reza 0 1 6.957497 0.000000 11340 +behforooz 0 1 6.957497 0.000000 11341 +animesh 0 1 6.957497 0.000000 11342 +chatterje 0 1 6.957497 0.000000 11343 +rajani 0 1 6.957497 0.000000 11344 +vaidyanathan 0 1 6.957497 0.000000 11345 +bowyer 0 1 6.957497 0.000000 11346 +offifth 0 1 6.957497 0.000000 11347 +kohl 0 1 6.957497 0.000000 11348 +yellick 0 1 6.957497 0.000000 11349 +unstructur 0 1 6.957497 0.000000 11350 +collid 0 1 6.957497 0.000000 11351 +haupt 0 1 6.957497 0.000000 11352 +scalableparallel 0 1 6.957497 0.000000 11353 +engineeringresearch 0 1 6.957497 0.000000 11354 +parallelhardwar 0 1 6.957497 0.000000 11355 +differentialequ 0 1 6.957497 0.000000 11356 +vichnevetski 0 1 6.957497 0.000000 11357 +decompos 0 1 6.957497 0.000000 11358 +kortesi 0 1 6.957497 0.000000 11359 +domaindecomposit 0 1 6.957497 0.000000 11360 +ussr 0 1 6.957497 0.000000 11361 +glowinski 0 1 6.957497 0.000000 11362 +karathanas 0 1 6.957497 0.000000 11363 +samartzi 0 1 6.957497 0.000000 11364 +vavali 0 1 6.957497 0.000000 11365 +weerawarana 0 1 6.957497 0.000000 11366 +onsupercomput 0 1 6.957497 0.000000 11367 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..b61c4c14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +center 0 88 2.397895 0.000000 158 +environ 0 84 2.484907 0.000000 177 +institut 0 84 2.484907 0.000000 187 +build 0 85 2.484907 0.000000 184 +journal 0 83 2.484907 0.000000 183 +dynam 0 76 2.564949 0.000000 194 +solv 0 73 2.639057 0.000000 234 +appli 0 71 2.639057 0.000000 226 +copi 0 63 2.772589 0.000000 284 +automat 0 61 2.833213 0.000000 306 +share 0 59 2.833213 0.000000 304 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +scientif 0 53 2.944439 0.000000 341 +numer 0 49 3.044522 0.000000 369 +adapt 0 46 3.091042 0.000000 387 +execut 0 45 3.135494 0.000000 404 +mechan 0 43 3.178054 0.000000 416 +multi 0 36 3.367296 0.000000 493 +copyright 0 36 3.367296 0.000000 495 +global 0 34 3.401197 0.000000 520 +load 0 28 3.610918 0.000000 601 +task 0 25 3.737670 0.000000 678 +thread 0 23 3.806662 0.000000 722 +varieti 0 22 3.850148 0.000000 740 +portabl 0 20 3.951244 0.000000 819 +runtim 1 19 4.007333 4.007333 858 +style 0 15 4.248495 0.000000 1036 +balanc 0 14 4.317488 0.000000 1112 +target 0 12 4.465908 0.000000 1282 +multithread 0 11 4.553877 0.000000 1315 +consortium 0 10 4.653960 0.000000 1467 +port 0 8 4.875197 0.000000 1766 +multicomput 0 7 5.010635 0.000000 1890 +niko 0 4 5.568345 0.000000 2637 +ctctr 0 3 5.857933 0.000000 3625 +prema 1 2 6.263398 6.263398 5238 +chrisochoid 0 2 6.263398 0.000000 5237 +suppot 0 2 6.263398 0.000000 5243 +nikosc 0 2 6.263398 0.000000 5242 +andproblem 0 1 6.957497 0.000000 11368 +computingappl 0 1 6.957497 0.000000 11369 +pdecomput 0 1 6.957497 0.000000 11370 +pcrc 0 1 6.957497 0.000000 11371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..d507e68c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +welcom 0 122 2.079442 0.000000 99 +look 1 107 2.197225 2.197225 115 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +learn 0 86 2.484907 0.000000 170 +java 0 70 2.708050 0.000000 248 +visit 0 63 2.772589 0.000000 288 +written 0 63 2.772589 0.000000 278 +locat 0 59 2.833213 0.000000 303 +game 0 36 3.367296 0.000000 498 +photo 0 31 3.496508 0.000000 561 +galleri 0 13 4.382027 0.000000 1192 +rest 0 12 4.465908 0.000000 1259 +invit 0 10 4.653960 0.000000 1428 +nuprl 0 10 4.653960 0.000000 1402 +sundai 0 10 4.653960 0.000000 1387 +tire 0 4 5.568345 0.000000 2799 +cyberspac 0 3 5.857933 0.000000 3719 +pavel 0 2 6.263398 0.000000 4164 +cinema 0 2 6.263398 0.000000 5244 +naumov 0 1 6.957497 0.000000 11372 +orplai 0 1 6.957497 0.000000 11373 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..dda06d6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,195 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +algorithm 0 162 1.791759 0.000000 57 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +architectur 0 139 1.945910 0.000000 77 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +intern 0 108 2.197225 0.000000 128 +find 0 111 2.197225 0.000000 111 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +control 0 82 2.484907 0.000000 164 +requir 0 81 2.484907 0.000000 167 +wide 0 84 2.484907 0.000000 185 +thing 0 84 2.484907 0.000000 189 +chang 0 82 2.484907 0.000000 163 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +upson 0 71 2.639057 0.000000 218 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +simul 0 66 2.708050 0.000000 255 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +simpl 0 60 2.833213 0.000000 298 +publish 0 57 2.890372 0.000000 326 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +investig 0 51 2.995732 0.000000 353 +hardwar 0 51 2.995732 0.000000 350 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +could 0 46 3.091042 0.000000 383 +video 0 44 3.135494 0.000000 405 +long 0 43 3.178054 0.000000 413 +offer 0 43 3.178054 0.000000 414 +vision 0 41 3.218876 0.000000 430 +fast 0 42 3.218876 0.000000 429 +linear 0 41 3.218876 0.000000 431 +futur 0 41 3.218876 0.000000 427 +press 0 42 3.218876 0.000000 419 +york 0 41 3.218876 0.000000 435 +theoret 0 39 3.258097 0.000000 446 +realli 0 40 3.258097 0.000000 444 +must 0 40 3.258097 0.000000 442 +societi 0 40 3.258097 0.000000 456 +transact 0 39 3.258097 0.000000 438 +connect 0 37 3.332205 0.000000 485 +cost 0 37 3.332205 0.000000 480 +purpos 0 37 3.332205 0.000000 481 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +singl 0 34 3.401197 0.000000 510 +tech 0 35 3.401197 0.000000 515 +global 0 34 3.401197 0.000000 520 +taken 0 31 3.496508 0.000000 555 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +multiprocessor 0 28 3.610918 0.000000 605 +becom 0 28 3.610918 0.000000 603 +campu 0 27 3.637586 0.000000 623 +challeng 0 26 3.688879 0.000000 653 +notic 0 25 3.737670 0.000000 675 +scalabl 0 24 3.761200 0.000000 705 +universityithaca 0 24 3.761200 0.000000 710 +highli 0 23 3.806662 0.000000 725 +sequenti 0 22 3.850148 0.000000 745 +chip 0 21 3.912023 0.000000 770 +vlsi 0 21 3.912023 0.000000 795 +alloc 0 20 3.951244 0.000000 821 +prepar 0 20 3.951244 0.000000 824 +exploit 0 20 3.951244 0.000000 836 +region 0 19 4.007333 0.000000 875 +dimension 0 18 4.060443 0.000000 909 +speed 0 18 4.060443 0.000000 911 +element 0 18 4.060443 0.000000 895 +regist 0 17 4.110874 0.000000 938 +estim 0 17 4.110874 0.000000 930 +sept 0 17 4.110874 0.000000 952 +spatial 0 16 4.174387 0.000000 988 +reflect 0 15 4.248495 0.000000 1034 +near 0 14 4.317488 0.000000 1091 +polynomi 0 14 4.317488 0.000000 1069 +believ 0 13 4.382027 0.000000 1187 +johnson 0 13 4.382027 0.000000 1162 +sigplan 0 13 4.382027 0.000000 1190 +mesh 0 11 4.553877 0.000000 1351 +desktop 0 10 4.653960 0.000000 1445 +placement 0 10 4.653960 0.000000 1420 +cryptographi 0 9 4.753590 0.000000 1512 +realiz 0 8 4.875197 0.000000 1739 +perhap 0 8 4.875197 0.000000 1693 +attent 0 8 4.875197 0.000000 1651 +character 0 8 4.875197 0.000000 1767 +entri 0 8 4.875197 0.000000 1678 +pldi 0 8 4.875197 0.000000 1704 +irregular 0 8 4.875197 0.000000 1768 +hallcornel 0 8 4.875197 0.000000 1757 +maxim 0 7 5.010635 0.000000 1944 +henc 0 7 5.010635 0.000000 1805 +pursu 0 7 5.010635 0.000000 1902 +sensor 0 7 5.010635 0.000000 1920 +hidden 0 6 5.164786 0.000000 1987 +feasibl 0 6 5.164786 0.000000 2157 +cellular 0 5 5.347108 0.000000 2433 +grand 0 5 5.347108 0.000000 2425 +pingali 0 4 5.568345 0.000000 2956 +sold 0 4 5.568345 0.000000 2813 +compcon 0 4 5.568345 0.000000 2958 +zippel 0 4 5.568345 0.000000 2879 +neumann 0 3 5.857933 0.000000 3720 +simd 0 3 5.857933 0.000000 3360 +lattic 0 3 5.857933 0.000000 3721 +parallelmachin 0 3 5.857933 0.000000 3693 +arm 0 3 5.857933 0.000000 3697 +exit 0 3 5.857933 0.000000 3124 +usaemail 0 3 5.857933 0.000000 3722 +pearson 1 2 6.263398 6.263398 5245 +crystal 0 2 6.263398 0.000000 5013 +molecul 0 2 6.263398 0.000000 5246 +succe 0 2 6.263398 0.000000 5214 +vazirani 0 1 6.957497 0.000000 11374 +bipartit 0 1 6.957497 0.000000 11375 +consistingof 0 1 6.957497 0.000000 11376 +theubiquit 0 1 6.957497 0.000000 11377 +heed 0 1 6.957497 0.000000 11378 +lawsof 0 1 6.957497 0.000000 11379 +layoutand 0 1 6.957497 0.000000 11380 +accomplishedbi 0 1 6.957497 0.000000 11381 +ihav 0 1 6.957497 0.000000 11382 +couldb 0 1 6.957497 0.000000 11383 +thisarchitectur 0 1 6.957497 0.000000 11384 +designfor 0 1 6.957497 0.000000 11385 +proteinstructur 0 1 6.957497 0.000000 11386 +parallelcomput 0 1 6.957497 0.000000 11387 +commodityand 0 1 6.957497 0.000000 11388 +architectureand 0 1 6.957497 0.000000 11389 +hideth 0 1 6.957497 0.000000 11390 +underlyingvon 0 1 6.957497 0.000000 11391 +architectureha 0 1 6.957497 0.000000 11392 +easyto 0 1 6.957497 0.000000 11393 +dunten 0 1 6.957497 0.000000 11394 +kiewit 0 1 6.957497 0.000000 11395 +pillai 0 1 6.957497 0.000000 11396 +irregularli 0 1 6.957497 0.000000 11397 +allerton 0 1 6.957497 0.000000 11398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..ed61ca14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +contact 0 153 1.791759 0.000000 59 +develop 0 174 1.791759 0.000000 53 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +site 0 106 2.197225 0.000000 119 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +part 0 98 2.302585 0.000000 129 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +school 0 84 2.484907 0.000000 188 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +master 0 76 2.564949 0.000000 216 +server 0 76 2.564949 0.000000 204 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +java 0 70 2.708050 0.000000 248 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +window 0 68 2.708050 0.000000 242 +experi 0 64 2.772589 0.000000 283 +creat 0 63 2.772589 0.000000 277 +content 0 59 2.833213 0.000000 302 +colleg 0 61 2.833213 0.000000 300 +best 0 59 2.833213 0.000000 299 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +februari 0 54 2.944439 0.000000 328 +run 0 51 2.995732 0.000000 347 +case 0 51 2.995732 0.000000 351 +visitor 0 49 3.044522 0.000000 371 +still 0 50 3.044522 0.000000 362 +possibl 0 47 3.091042 0.000000 378 +favorit 0 44 3.135494 0.000000 410 +offer 0 43 3.178054 0.000000 414 +compani 0 41 3.218876 0.000000 423 +soon 0 36 3.367296 0.000000 494 +chapter 0 32 3.465736 0.000000 536 +taken 0 31 3.496508 0.000000 555 +abl 0 30 3.555348 0.000000 566 +quot 0 29 3.583519 0.000000 582 +becom 0 28 3.610918 0.000000 603 +hope 0 28 3.610918 0.000000 610 +administr 0 27 3.637586 0.000000 628 +request 0 26 3.688879 0.000000 635 +enabl 0 26 3.688879 0.000000 655 +enhanc 0 26 3.688879 0.000000 644 +client 0 25 3.737670 0.000000 679 +reach 0 24 3.761200 0.000000 688 +alwai 0 24 3.761200 0.000000 691 +cooper 0 22 3.850148 0.000000 757 +busi 0 21 3.912023 0.000000 784 +applet 0 20 3.951244 0.000000 827 +mpeg 0 20 3.951244 0.000000 831 +wonder 0 20 3.951244 0.000000 815 +log 0 19 4.007333 0.000000 857 +stock 0 16 4.174387 0.000000 1007 +todd 0 15 4.248495 0.000000 1051 +joint 0 13 4.382027 0.000000 1130 +johnson 0 13 4.382027 0.000000 1162 +brother 0 13 4.382027 0.000000 1189 +meng 0 12 4.465908 0.000000 1214 +round 0 8 4.875197 0.000000 1769 +presid 0 6 5.164786 0.000000 2196 +quickli 0 6 5.164786 0.000000 2000 +classroom 0 6 5.164786 0.000000 2006 +microsystem 0 6 5.164786 0.000000 2160 +junior 0 5 5.347108 0.000000 2519 +supplement 0 5 5.347108 0.000000 2355 +suppli 0 4 5.568345 0.000000 2611 +tape 0 4 5.568345 0.000000 2959 +permiss 0 4 5.568345 0.000000 2642 +jointli 0 3 5.857933 0.000000 3118 +eduand 0 3 5.857933 0.000000 3452 +roll 0 3 5.857933 0.000000 3723 +espn 0 3 5.857933 0.000000 3724 +borrow 0 3 5.857933 0.000000 3725 +fratern 0 2 6.263398 0.000000 4979 +throughth 0 2 6.263398 0.000000 4065 +ticker 0 2 6.263398 0.000000 5247 +peskin 0 1 6.957497 0.000000 11399 +acacia 0 1 6.957497 0.000000 11400 +andyour 0 1 6.957497 0.000000 11401 +workeda 0 1 6.957497 0.000000 11402 +cornellundergradu 0 1 6.957497 0.000000 11403 +theirfield 0 1 6.957497 0.000000 11404 +isrun 0 1 6.957497 0.000000 11405 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..618d9618 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +cornel 0 215 1.386294 0.000000 23 +list 0 201 1.609438 0.000000 39 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +mani 0 92 2.397895 0.000000 150 +stuff 1 87 2.484907 2.484907 171 +second 0 81 2.484907 0.000000 166 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +david 0 71 2.639057 0.000000 232 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +without 0 50 3.044522 0.000000 370 +favorit 0 44 3.135494 0.000000 410 +howev 0 41 3.218876 0.000000 422 +approxim 0 35 3.401197 0.000000 509 +go 0 33 3.433987 0.000000 529 +quot 0 29 3.583519 0.000000 582 +great 0 27 3.637586 0.000000 626 +although 0 25 3.737670 0.000000 667 +citi 0 19 4.007333 0.000000 874 +otherwis 0 17 4.110874 0.000000 922 +alreadi 0 16 4.174387 0.000000 963 +month 0 15 4.248495 0.000000 1025 +dave 0 14 4.317488 0.000000 1098 +philadelphia 0 12 4.465908 0.000000 1244 +resid 0 10 4.653960 0.000000 1461 +shop 0 10 4.653960 0.000000 1469 +imposs 0 9 4.753590 0.000000 1513 +pittsburgh 0 7 5.010635 0.000000 1938 +pennsylvania 0 7 5.010635 0.000000 1932 +famou 0 6 5.164786 0.000000 2185 +pierc 1 4 5.568345 5.568345 2623 +outlet 0 2 6.263398 0.000000 5248 +valentin 0 1 6.957497 0.000000 11406 +familycurr 0 1 6.957497 0.000000 11407 +halfwai 0 1 6.957497 0.000000 11408 +andharrisburg 0 1 6.957497 0.000000 11409 +younev 0 1 6.957497 0.000000 11410 +sinceit 0 1 6.957497 0.000000 11411 +throughpittsburgh 0 1 6.957497 0.000000 11412 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..9f4ce584 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +theori 0 111 2.197225 0.000000 127 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +center 0 88 2.397895 0.000000 158 +present 0 91 2.397895 0.000000 145 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +level 0 87 2.484907 0.000000 180 +info 0 85 2.484907 0.000000 176 +april 0 77 2.564949 0.000000 196 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +prof 0 64 2.772589 0.000000 273 +summer 0 56 2.890372 0.000000 311 +talk 1 53 2.944439 2.944439 336 +undergradu 0 54 2.944439 0.000000 338 +extens 0 53 2.944439 0.000000 340 +maintain 0 51 2.995732 0.000000 342 +paul 0 38 3.295837 0.000000 471 +seminar 0 38 3.295837 0.000000 470 +multi 0 36 3.367296 0.000000 493 +random 0 34 3.401197 0.000000 511 +taught 0 33 3.433987 0.000000 526 +transform 0 32 3.465736 0.000000 542 +given 0 32 3.465736 0.000000 538 +abl 0 30 3.555348 0.000000 566 +packag 0 28 3.610918 0.000000 614 +framework 0 28 3.610918 0.000000 606 +seri 0 24 3.761200 0.000000 708 +handl 0 24 3.761200 0.000000 685 +lab 0 24 3.761200 0.000000 698 +deal 0 22 3.850148 0.000000 736 +instal 0 22 3.850148 0.000000 754 +runtim 0 19 4.007333 0.000000 858 +affili 0 13 4.382027 0.000000 1194 +deriv 0 13 4.382027 0.000000 1145 +block 0 13 4.382027 0.000000 1183 +loop 0 11 4.553877 0.000000 1310 +vladimir 0 11 4.553877 0.000000 1324 +regard 0 11 4.553877 0.000000 1309 +prior 0 10 4.653960 0.000000 1438 +tradit 0 10 4.653960 0.000000 1404 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +madra 0 8 4.875197 0.000000 1770 +watson 0 8 4.875197 0.000000 1691 +keshav 0 7 5.010635 0.000000 1852 +tip 0 7 5.010635 0.000000 1863 +nest 0 6 5.164786 0.000000 2151 +dens 0 6 5.164786 0.000000 2122 +handi 0 6 5.164786 0.000000 2111 +czar 0 5 5.347108 0.000000 2503 +licens 0 5 5.347108 0.000000 2520 +indupraka 0 4 5.568345 0.000000 2639 +kodukula 0 4 5.568345 0.000000 2640 +bernoulli 0 4 5.568345 0.000000 2955 +pingali 0 4 5.568345 0.000000 2956 +vijai 0 4 5.568345 0.000000 2960 +stodghil 0 4 5.568345 0.000000 2864 +trivial 0 4 5.568345 0.000000 2786 +dagstuhl 0 4 5.568345 0.000000 2871 +vliw 0 3 5.857933 0.000000 3514 +interplai 0 3 5.857933 0.000000 3726 +chelmsford 0 3 5.857933 0.000000 3564 +schloss 0 3 5.857933 0.000000 3727 +useof 0 3 5.857933 0.000000 3368 +andoper 0 3 5.857933 0.000000 3621 +praka 0 2 6.263398 0.000000 4155 +nawaaz 0 2 6.263398 0.000000 4153 +ahm 0 2 6.263398 0.000000 4154 +kotlyar 0 2 6.263398 0.000000 4907 +menon 0 2 6.263398 0.000000 5249 +imperfectli 0 1 6.957497 0.000000 11413 +tothat 0 1 6.957497 0.000000 11414 +andmultiprocessor 0 1 6.957497 0.000000 11415 +fromscientif 0 1 6.957497 0.000000 11416 +withibm 0 1 6.957497 0.000000 11417 +hasinterest 0 1 6.957497 0.000000 11418 +athp 0 1 6.957497 0.000000 11419 +wasabout 0 1 6.957497 0.000000 11420 +necess 0 1 6.957497 0.000000 11421 +looptransform 0 1 6.957497 0.000000 11422 +loopparallel 0 1 6.957497 0.000000 11423 +regardingdata 0 1 6.957497 0.000000 11424 +centric 0 1 6.957497 0.000000 11425 +availableund 0 1 6.957497 0.000000 11426 +departmentmachin 0 1 6.957497 0.000000 11427 +andfind 0 1 6.957497 0.000000 11428 +alsofind 0 1 6.957497 0.000000 11429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..188dd901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +data 1 170 1.791759 1.791759 49 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +databas 1 122 2.079442 2.079442 86 +postscript 0 131 2.079442 0.000000 90 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +know 0 80 2.564949 0.000000 198 +upson 0 71 2.639057 0.000000 218 +order 0 69 2.708050 0.000000 249 +ithaca 1 65 2.772589 2.772589 294 +abstract 0 62 2.772589 0.000000 276 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +case 0 51 2.995732 0.000000 351 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +tree 0 36 3.367296 0.000000 492 +enhanc 0 26 3.688879 0.000000 644 +sequenc 0 23 3.806662 0.000000 734 +sigmod 0 19 4.007333 0.000000 877 +save 0 14 4.317488 0.000000 1099 +dbm 0 13 4.382027 0.000000 1136 +submiss 0 11 4.553877 0.000000 1298 +road 0 11 4.553877 0.000000 1374 +seshadri 1 7 5.010635 5.010635 1803 +praveen 1 6 5.164786 5.164786 1996 +green 0 4 5.568345 0.000000 2848 +predat 0 3 5.857933 0.000000 3135 +warren 0 3 5.857933 0.000000 3301 +packer 0 3 5.857933 0.000000 3728 +adt 0 1 6.957497 0.000000 11430 +ranjani 0 1 6.957497 0.000000 11431 +ramamurthi 0 1 6.957497 0.000000 11432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..8c7c4c97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +mani 0 92 2.397895 0.000000 150 +proceed 0 93 2.397895 0.000000 152 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +order 0 69 2.708050 0.000000 249 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +evalu 0 64 2.772589 0.000000 266 +import 0 65 2.772589 0.000000 282 +complex 0 64 2.772589 0.000000 269 +plan 0 65 2.772589 0.000000 272 +collect 0 65 2.772589 0.000000 268 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +street 0 63 2.772589 0.000000 293 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +variou 0 56 2.890372 0.000000 317 +detail 0 57 2.890372 0.000000 321 +publish 0 57 2.890372 0.000000 326 +explor 0 58 2.890372 0.000000 324 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +case 0 51 2.995732 0.000000 351 +set 0 50 3.044522 0.000000 361 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +answer 0 45 3.135494 0.000000 391 +natur 0 44 3.135494 0.000000 406 +algebra 0 45 3.135494 0.000000 394 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +even 0 45 3.135494 0.000000 393 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +form 0 39 3.258097 0.000000 443 +join 0 39 3.258097 0.000000 457 +map 0 39 3.258097 0.000000 452 +probabl 0 40 3.258097 0.000000 455 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +mean 0 37 3.332205 0.000000 477 +cost 0 37 3.332205 0.000000 480 +multi 0 36 3.367296 0.000000 493 +next 0 34 3.401197 0.000000 517 +singl 0 34 3.401197 0.000000 510 +either 0 35 3.401197 0.000000 506 +michael 0 35 3.401197 0.000000 514 +queri 0 33 3.433987 0.000000 524 +express 0 32 3.465736 0.000000 540 +kind 0 32 3.465736 0.000000 541 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +posit 0 31 3.496508 0.000000 552 +storag 0 31 3.496508 0.000000 553 +scientist 0 31 3.496508 0.000000 560 +domain 0 30 3.555348 0.000000 564 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +propos 0 28 3.610918 0.000000 602 +weather 0 28 3.610918 0.000000 618 +ask 0 28 3.610918 0.000000 597 +scale 0 28 3.610918 0.000000 613 +except 0 28 3.610918 0.000000 607 +framework 0 28 3.610918 0.000000 606 +client 0 25 3.737670 0.000000 679 +valu 0 25 3.737670 0.000000 665 +wai 0 25 3.737670 0.000000 662 +strategi 0 25 3.737670 0.000000 682 +demonstr 0 24 3.761200 0.000000 694 +store 0 24 3.761200 0.000000 693 +daili 0 24 3.761200 0.000000 706 +sequenc 1 23 3.806662 3.806662 734 +input 0 23 3.806662 0.000000 727 +thread 0 23 3.806662 0.000000 722 +defin 0 22 3.850148 0.000000 746 +sequenti 0 22 3.850148 0.000000 745 +sort 0 22 3.850148 0.000000 738 +serv 0 22 3.850148 0.000000 758 +identifi 0 22 3.850148 0.000000 760 +disk 0 22 3.850148 0.000000 747 +deal 0 22 3.850148 0.000000 736 +instead 0 22 3.850148 0.000000 756 +similar 0 21 3.912023 0.000000 771 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +record 0 18 4.060443 0.000000 890 +statu 0 18 4.060443 0.000000 885 +event 0 18 4.060443 0.000000 896 +account 0 18 4.060443 0.000000 882 +expand 0 17 4.110874 0.000000 928 +medic 0 17 4.110874 0.000000 958 +monitor 0 17 4.110874 0.000000 941 +weekli 0 17 4.110874 0.000000 919 +estim 0 17 4.110874 0.000000 930 +ramakrishnan 0 16 4.174387 0.000000 972 +advantag 0 16 4.174387 0.000000 987 +easi 0 16 4.174387 0.000000 969 +livni 0 15 4.248495 0.000000 1053 +indic 0 15 4.248495 0.000000 1013 +stream 0 15 4.248495 0.000000 1015 +miron 0 14 4.317488 0.000000 1110 +manner 0 14 4.317488 0.000000 1074 +embed 0 14 4.317488 0.000000 1102 +convent 0 14 4.317488 0.000000 1072 +econom 0 13 4.382027 0.000000 1184 +social 0 13 4.382027 0.000000 1123 +opportun 0 13 4.382027 0.000000 1161 +composit 0 13 4.382027 0.000000 1150 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +raghu 0 12 4.465908 0.000000 1212 +scan 0 12 4.465908 0.000000 1243 +buffer 0 12 4.465908 0.000000 1211 +insid 0 12 4.465908 0.000000 1262 +amount 0 12 4.465908 0.000000 1208 +uniqu 0 12 4.465908 0.000000 1228 +shore 0 11 4.553877 0.000000 1377 +regard 0 11 4.553877 0.000000 1309 +motiv 0 11 4.553877 0.000000 1346 +instanc 0 11 4.553877 0.000000 1322 +devis 0 10 4.653960 0.000000 1451 +relationship 0 10 4.653960 0.000000 1383 +reli 0 10 4.653960 0.000000 1411 +subset 0 10 4.653960 0.000000 1425 +vldb 0 10 4.653960 0.000000 1470 +cheng 0 10 4.653960 0.000000 1381 +declar 0 9 4.753590 0.000000 1526 +tempor 0 9 4.753590 0.000000 1584 +strength 0 9 4.753590 0.000000 1494 +compos 0 9 4.753590 0.000000 1527 +vice 0 9 4.753590 0.000000 1604 +lock 0 9 4.753590 0.000000 1551 +respect 0 9 4.753590 0.000000 1545 +meta 0 9 4.753590 0.000000 1505 +intermedi 0 9 4.753590 0.000000 1497 +observ 0 9 4.753590 0.000000 1578 +mode 0 9 4.753590 0.000000 1492 +pose 0 9 4.753590 0.000000 1535 +seshadri 0 7 5.010635 0.000000 1803 +pageth 0 7 5.010635 0.000000 1939 +notion 0 7 5.010635 0.000000 1947 +merg 0 7 5.010635 0.000000 1862 +whenev 0 7 5.010635 0.000000 1883 +therefor 0 7 5.010635 0.000000 1822 +supportfor 0 7 5.010635 0.000000 1854 +praveen 0 6 5.164786 0.000000 1996 +nest 0 6 5.164786 0.000000 2151 +feasibl 0 6 5.164786 0.000000 2157 +financi 0 6 5.164786 0.000000 2197 +histor 0 6 5.164786 0.000000 2085 +consequ 0 6 5.164786 0.000000 1989 +temporari 0 6 5.164786 0.000000 2090 +greater 0 5 5.347108 0.000000 2258 +treat 0 5 5.347108 0.000000 2521 +correl 0 5 5.347108 0.000000 2279 +dual 0 5 5.347108 0.000000 2522 +distinct 0 5 5.347108 0.000000 2319 +overlap 0 5 5.347108 0.000000 2368 +complementari 0 5 5.347108 0.000000 2523 +educomput 0 5 5.347108 0.000000 2524 +zoom 0 4 5.568345 0.000000 2961 +phenomena 0 4 5.568345 0.000000 2962 +flavor 0 4 5.568345 0.000000 2625 +richter 0 4 5.568345 0.000000 2957 +collaps 0 3 5.857933 0.000000 3729 +inadequ 0 3 5.857933 0.000000 3730 +tediou 0 3 5.857933 0.000000 3731 +ineffici 0 3 5.857933 0.000000 3457 +megabyt 0 3 5.857933 0.000000 3732 +claus 0 3 5.857933 0.000000 3733 +offset 0 3 5.857933 0.000000 3467 +hourli 0 3 5.857933 0.000000 3734 +thathav 0 3 5.857933 0.000000 3735 +serverarchitectur 0 3 5.857933 0.000000 3736 +comad 0 3 5.857933 0.000000 3737 +informationfor 0 3 5.857933 0.000000 3738 +sequin 0 2 6.263398 0.000000 5250 +earthquak 0 2 6.263398 0.000000 5251 +volcano 0 2 6.263398 0.000000 5252 +meteorolog 0 2 6.263398 0.000000 5253 +aredescrib 0 2 6.263398 0.000000 5254 +objectivescurr 0 2 6.263398 0.000000 5255 +statusmotiv 0 2 6.263398 0.000000 5256 +exampleseq 0 2 6.263398 0.000000 5257 +languageoptim 0 2 6.263398 0.000000 5258 +techniquesseq 0 2 6.263398 0.000000 5259 +developmentpublicationsrel 0 2 6.263398 0.000000 5260 +workcontact 0 2 6.263398 0.000000 5261 +informationproject 0 2 6.263398 0.000000 5262 +processingof 0 2 6.263398 0.000000 5263 +theseappl 0 2 6.263398 0.000000 5264 +metereolog 0 2 6.263398 0.000000 5265 +andbiolog 0 2 6.263398 0.000000 5266 +semanticstak 0 2 6.263398 0.000000 5267 +evaluationintegr 0 2 6.263398 0.000000 5268 +canstor 0 2 6.263398 0.000000 5269 +sequencesthes 0 2 6.263398 0.000000 5270 +themost 0 2 6.263398 0.000000 5271 +statusth 0 2 6.263398 0.000000 5272 +algebraicqueri 0 2 6.263398 0.000000 5273 +analogousto 0 2 6.263398 0.000000 5274 +candeclar 0 2 6.263398 0.000000 5275 +likesql 0 2 6.263398 0.000000 5276 +versa 0 2 6.263398 0.000000 5277 +querya 0 2 6.263398 0.000000 5278 +occurr 0 2 6.263398 0.000000 5279 +erupt 0 2 6.263398 0.000000 5280 +didth 0 2 6.263398 0.000000 5281 +groupbi 0 2 6.263398 0.000000 5282 +subqueri 0 2 6.263398 0.000000 5283 +aggregatefunct 0 2 6.263398 0.000000 5284 +sequencesord 0 2 6.263398 0.000000 5285 +modelth 0 2 6.263398 0.000000 5286 +gist 0 2 6.263398 0.000000 5287 +ordereddomain 0 2 6.263398 0.000000 5288 +andposit 0 2 6.263398 0.000000 5289 +recordsmap 0 2 6.263398 0.000000 5290 +rise 0 2 6.263398 0.000000 5291 +relationaloper 0 2 6.263398 0.000000 5292 +andaggreg 0 2 6.263398 0.000000 5293 +researchersin 0 2 6.263398 0.000000 5294 +movingaggreg 0 2 6.263398 0.000000 5295 +worldsitu 0 2 6.263398 0.000000 5296 +extensionof 0 2 6.263398 0.000000 5297 +ofseq 0 2 6.263398 0.000000 5298 +languagew 0 2 6.263398 0.000000 5299 +usingwhich 0 2 6.263398 0.000000 5300 +languagei 0 2 6.263398 0.000000 5301 +queriesa 0 2 6.263398 0.000000 5302 +techniquesw 0 2 6.263398 0.000000 5303 +developmentth 0 2 6.263398 0.000000 5304 +viaa 0 2 6.263398 0.000000 5305 +ontop 0 2 6.263398 0.000000 5306 +languageswhich 0 2 6.263398 0.000000 5307 +arbitrarylevel 0 2 6.263398 0.000000 5308 +viceversa 0 2 6.263398 0.000000 5309 +detailson 0 2 6.263398 0.000000 5310 +publicationssequ 0 2 6.263398 0.000000 5311 +datapraveen 0 2 6.263398 0.000000 5312 +systempraveen 0 2 6.263398 0.000000 5313 +queriesraghu 0 2 6.263398 0.000000 5314 +workthedevis 0 2 6.263398 0.000000 5315 +visualizationenviron 0 2 6.263398 0.000000 5316 +servercontact 0 2 6.263398 0.000000 5317 +eduraghu 0 2 6.263398 0.000000 5318 +edumiron 0 2 6.263398 0.000000 5319 +seshadripraveen 0 2 6.263398 0.000000 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..37f0139b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +advanc 0 99 2.302585 0.000000 130 +member 0 84 2.484907 0.000000 165 +upson 0 71 2.639057 0.000000 218 +august 0 66 2.708050 0.000000 257 +knowledg 0 67 2.708050 0.000000 243 +reason 0 57 2.890372 0.000000 318 +taken 0 31 3.496508 0.000000 555 +fellow 1 24 3.761200 3.761200 701 +germani 0 17 4.110874 0.000000 946 +stori 0 14 4.317488 0.000000 1087 +german 0 6 5.164786 0.000000 2190 +exchang 0 5 5.347108 0.000000 2310 +fulbright 0 4 5.568345 0.000000 2963 +karlsruh 0 3 5.857933 0.000000 3689 +ralph 1 1 6.957497 6.957497 11433 +benzingerralph 0 1 6.957497 0.000000 11434 +benzingerw 0 1 6.957497 0.000000 11435 +sich 0 1 6.957497 0.000000 11436 +seinen 0 1 6.957497 0.000000 11437 +lorbeeren 0 1 6.957497 0.000000 11438 +ausruht 0 1 6.957497 0.000000 11439 +trgt 0 1 6.957497 0.000000 11440 +derfalschen 0 1 6.957497 0.000000 11441 +stell 0 1 6.957497 0.000000 11442 +studienstiftung 0 1 6.957497 0.000000 11443 +deutschen 0 1 6.957497 0.000000 11444 +volk 0 1 6.957497 0.000000 11445 +siemen 0 1 6.957497 0.000000 11446 +international 0 1 6.957497 0.000000 11447 +studentenkrei 0 1 6.957497 0.000000 11448 +alumnusat 0 1 6.957497 0.000000 11449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..ba1e72ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,292 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +first 0 140 1.945910 0.000000 71 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +construct 0 139 1.945910 0.000000 82 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +structur 0 106 2.197225 0.000000 105 +mathemat 0 108 2.197225 0.000000 123 +version 0 113 2.197225 0.000000 122 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +well 0 109 2.197225 0.000000 121 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +take 0 97 2.302585 0.000000 134 +section 0 94 2.397895 0.000000 149 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +imag 0 91 2.397895 0.000000 161 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +school 0 84 2.484907 0.000000 188 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +interfac 0 79 2.564949 0.000000 209 +name 0 72 2.639057 0.000000 220 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +complex 0 64 2.772589 0.000000 269 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +abstract 0 62 2.772589 0.000000 276 +interact 0 62 2.772589 0.000000 270 +simpl 0 60 2.833213 0.000000 298 +space 0 57 2.890372 0.000000 310 +thesi 0 57 2.890372 0.000000 327 +overview 0 56 2.890372 0.000000 323 +sever 0 56 2.890372 0.000000 322 +talk 0 53 2.944439 0.000000 336 +allow 0 53 2.944439 0.000000 333 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +digit 0 52 2.995732 0.000000 348 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +format 0 48 3.044522 0.000000 356 +visual 0 48 3.044522 0.000000 372 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +effect 0 46 3.091042 0.000000 385 +move 0 47 3.091042 0.000000 382 +featur 0 46 3.091042 0.000000 386 +made 0 44 3.135494 0.000000 398 +algebra 0 45 3.135494 0.000000 394 +even 0 45 3.135494 0.000000 393 +show 0 43 3.178054 0.000000 417 +long 0 43 3.178054 0.000000 413 +continu 0 39 3.258097 0.000000 448 +probabl 0 40 3.258097 0.000000 455 +error 0 40 3.258097 0.000000 449 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +short 0 36 3.367296 0.000000 499 +tree 0 36 3.367296 0.000000 492 +soon 0 36 3.367296 0.000000 494 +approxim 0 35 3.401197 0.000000 509 +print 0 34 3.401197 0.000000 503 +singl 0 34 3.401197 0.000000 510 +express 0 32 3.465736 0.000000 540 +independ 0 32 3.465736 0.000000 548 +concept 0 32 3.465736 0.000000 537 +human 0 32 3.465736 0.000000 546 +taken 0 31 3.496508 0.000000 555 +titl 0 31 3.496508 0.000000 556 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +hard 0 30 3.555348 0.000000 563 +compon 0 30 3.555348 0.000000 570 +limit 0 29 3.583519 0.000000 585 +progress 0 28 3.610918 0.000000 598 +load 0 28 3.610918 0.000000 601 +full 0 28 3.610918 0.000000 615 +symbol 0 27 3.637586 0.000000 620 +determin 0 27 3.637586 0.000000 630 +enhanc 0 26 3.688879 0.000000 644 +rule 0 26 3.688879 0.000000 638 +enabl 0 26 3.688879 0.000000 655 +challeng 0 26 3.688879 0.000000 653 +request 0 26 3.688879 0.000000 635 +constraint 0 26 3.688879 0.000000 636 +notic 0 25 3.737670 0.000000 675 +demonstr 0 24 3.761200 0.000000 694 +interpret 0 24 3.761200 0.000000 686 +seri 0 24 3.761200 0.000000 708 +variabl 0 23 3.806662 0.000000 715 +input 0 23 3.806662 0.000000 727 +initi 0 23 3.806662 0.000000 717 +brows 0 23 3.806662 0.000000 726 +equat 0 23 3.806662 0.000000 724 +size 0 23 3.806662 0.000000 713 +head 0 23 3.806662 0.000000 732 +begin 0 23 3.806662 0.000000 716 +sequenti 0 22 3.850148 0.000000 745 +defin 0 22 3.850148 0.000000 746 +reduc 0 22 3.850148 0.000000 759 +identifi 0 22 3.850148 0.000000 760 +output 0 21 3.912023 0.000000 788 +voic 0 21 3.912023 0.000000 806 +util 0 21 3.912023 0.000000 774 +hypertext 0 19 4.007333 0.000000 865 +left 0 19 4.007333 0.000000 851 +separ 0 19 4.007333 0.000000 844 +listen 0 18 4.060443 0.000000 907 +demo 0 18 4.060443 0.000000 888 +along 0 18 4.060443 0.000000 878 +record 0 18 4.060443 0.000000 890 +dimension 0 18 4.060443 0.000000 909 +element 0 18 4.060443 0.000000 895 +lower 0 18 4.060443 0.000000 886 +render 0 17 4.110874 0.000000 947 +matrix 0 17 4.110874 0.000000 933 +choic 0 16 4.174387 0.000000 979 +cognit 0 16 4.174387 0.000000 986 +took 0 16 4.174387 0.000000 1010 +upon 0 16 4.174387 0.000000 978 +piec 0 15 4.248495 0.000000 1020 +later 0 15 4.248495 0.000000 1043 +audio 1 14 4.317488 4.317488 1094 +latex 0 14 4.317488 0.000000 1064 +attribut 0 14 4.317488 0.000000 1092 +squar 0 14 4.317488 0.000000 1082 +shown 0 14 4.317488 0.000000 1080 +context 0 13 4.382027 0.000000 1153 +emac 0 13 4.382027 0.000000 1143 +directli 0 13 4.382027 0.000000 1141 +reader 0 12 4.465908 0.000000 1246 +speak 0 12 4.465908 0.000000 1283 +calcul 0 12 4.465908 0.000000 1268 +speech 0 12 4.465908 0.000000 1222 +shape 0 12 4.465908 0.000000 1245 +typic 0 11 4.553877 0.000000 1360 +ofcomput 0 10 4.653960 0.000000 1442 +donald 0 9 4.753590 0.000000 1510 +notat 0 9 4.753590 0.000000 1489 +imposs 0 9 4.753590 0.000000 1513 +distanc 0 9 4.753590 0.000000 1500 +equival 0 9 4.753590 0.000000 1496 +cross 0 8 4.875197 0.000000 1703 +root 0 8 4.875197 0.000000 1650 +wire 0 8 4.875197 0.000000 1747 +illustr 0 8 4.875197 0.000000 1679 +forget 0 8 4.875197 0.000000 1712 +replac 0 8 4.875197 0.000000 1668 +dimens 0 7 5.010635 0.000000 1930 +stereo 0 7 5.010635 0.000000 1818 +hear 0 7 5.010635 0.000000 1940 +dedic 0 7 5.010635 0.000000 1843 +notion 0 7 5.010635 0.000000 1947 +nest 0 6 5.164786 0.000000 2151 +vari 0 6 5.164786 0.000000 2001 +difficult 0 6 5.164786 0.000000 2035 +quick 0 6 5.164786 0.000000 2184 +heurist 0 6 5.164786 0.000000 2125 +chosen 0 6 5.164786 0.000000 1984 +meant 0 6 5.164786 0.000000 2055 +fraction 0 5 5.347108 0.000000 2259 +recogn 0 5 5.347108 0.000000 2302 +quantifi 0 5 5.347108 0.000000 2525 +substitut 0 5 5.347108 0.000000 2247 +mutual 0 5 5.347108 0.000000 2418 +raman 0 4 5.568345 0.000000 2827 +inlin 0 4 5.568345 0.000000 2964 +encod 0 4 5.568345 0.000000 2929 +paus 0 4 5.568345 0.000000 2965 +orthogon 0 4 5.568345 0.000000 2832 +compris 0 4 5.568345 0.000000 2862 +vital 0 4 5.568345 0.000000 2733 +ident 0 4 5.568345 0.000000 2826 +customiz 0 4 5.568345 0.000000 2966 +trick 0 4 5.568345 0.000000 2967 +thati 0 4 5.568345 0.000000 2616 +heard 0 4 5.568345 0.000000 2895 +formula 0 3 5.857933 0.000000 3405 +percept 0 3 5.857933 0.000000 3739 +subscript 0 3 5.857933 0.000000 3469 +tripl 0 3 5.857933 0.000000 3160 +meaning 0 3 5.857933 0.000000 3458 +blind 0 3 5.857933 0.000000 3662 +forthes 0 3 5.857933 0.000000 3199 +experienc 0 3 5.857933 0.000000 3203 +exponenti 0 3 5.857933 0.000000 3529 +thetim 0 3 5.857933 0.000000 3581 +proper 0 3 5.857933 0.000000 3323 +orpostscript 0 3 5.857933 0.000000 3329 +convei 0 2 6.263398 0.000000 4690 +spoken 0 2 6.263398 0.000000 5122 +succinctli 0 2 6.263398 0.000000 4275 +monoton 0 2 6.263398 0.000000 5321 +logarithm 0 2 6.263398 0.000000 5322 +expon 0 2 6.263398 0.000000 5323 +absenc 0 2 6.263398 0.000000 4878 +oppos 0 2 6.263398 0.000000 4855 +ofintegr 0 2 6.263398 0.000000 5324 +summat 0 2 6.263398 0.000000 5325 +referenc 0 2 6.263398 0.000000 4757 +justa 0 2 6.263398 0.000000 5326 +glori 0 2 6.263398 0.000000 5327 +aster 0 1 6.957497 0.000000 11450 +bruno 0 1 6.957497 0.000000 11451 +superscript 0 1 6.957497 0.000000 11452 +knuth 0 1 6.957497 0.000000 11453 +unambigu 0 1 6.957497 0.000000 11454 +inton 0 1 6.957497 0.000000 11455 +intermix 0 1 6.957497 0.000000 11456 +demonstrationi 0 1 6.957497 0.000000 11457 +forrend 0 1 6.957497 0.000000 11458 +myphd 0 1 6.957497 0.000000 11459 +dectalk 0 1 6.957497 0.000000 11460 +mulaw 0 1 6.957497 0.000000 11461 +mono 0 1 6.957497 0.000000 11462 +dvip 0 1 6.957497 0.000000 11463 +andround 0 1 6.957497 0.000000 11464 +faad 0 1 6.957497 0.000000 11465 +casey 0 1 6.957497 0.000000 11466 +examplessinc 0 1 6.957497 0.000000 11467 +inflect 0 1 6.957497 0.000000 11468 +toconvei 0 1 6.957497 0.000000 11469 +renderingsub 0 1 6.957497 0.000000 11470 +audiost 0 1 6.957497 0.000000 11471 +dimensionus 0 1 6.957497 0.000000 11472 +verbatim 0 1 6.957497 0.000000 11473 +layoutoper 0 1 6.957497 0.000000 11474 +verydiffer 0 1 6.957497 0.000000 11475 +monotonicchang 0 1 6.957497 0.000000 11476 +trigonometr 0 1 6.957497 0.000000 11477 +ambigu 0 1 6.957497 0.000000 11478 +parenthesi 0 1 6.957497 0.000000 11479 +asexpon 0 1 6.957497 0.000000 11480 +isfulli 0 1 6.957497 0.000000 11481 +innocu 0 1 6.957497 0.000000 11482 +mostdifficult 0 1 6.957497 0.000000 11483 +theintegr 0 1 6.957497 0.000000 11484 +ofhuman 0 1 6.957497 0.000000 11485 +ofcross 0 1 6.957497 0.000000 11486 +referenceableobject 0 1 6.957497 0.000000 11487 +latercross 0 1 6.957497 0.000000 11488 +followingdeepli 0 1 6.957497 0.000000 11489 +fledgedsymbol 0 1 6.957497 0.000000 11490 +thematrix 0 1 6.957497 0.000000 11491 +commenc 0 1 6.957497 0.000000 11492 +aseach 0 1 6.957497 0.000000 11493 +secondsto 0 1 6.957497 0.000000 11494 +spacenot 0 1 6.957497 0.000000 11495 +changeth 0 1 6.957497 0.000000 11496 +techniquefor 0 1 6.957497 0.000000 11497 +renderingsconvei 0 1 6.957497 0.000000 11498 +thesub 0 1 6.957497 0.000000 11499 +denomin 0 1 6.957497 0.000000 11500 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..96fff47f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +parallel 0 169 1.791759 0.000000 60 +process 0 142 1.945910 0.000000 72 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +check 0 115 2.197225 0.000000 118 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +octob 0 89 2.397895 0.000000 156 +learn 0 86 2.484907 0.000000 170 +ieee 0 86 2.484907 0.000000 190 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +effici 0 73 2.639057 0.000000 233 +test 0 66 2.708050 0.000000 252 +august 0 66 2.708050 0.000000 257 +foundat 0 62 2.772589 0.000000 286 +ithaca 0 65 2.772589 0.000000 294 +function 0 62 2.772589 0.000000 275 +juli 0 60 2.833213 0.000000 305 +extens 0 53 2.944439 0.000000 340 +without 0 50 3.044522 0.000000 370 +linear 0 41 3.218876 0.000000 431 +theoret 0 39 3.258097 0.000000 446 +correct 0 38 3.295837 0.000000 462 +approxim 0 35 3.401197 0.000000 509 +bound 0 26 3.688879 0.000000 659 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +self 0 22 3.850148 0.000000 761 +polynomi 0 14 4.317488 0.000000 1069 +squar 0 14 4.317488 0.000000 1082 +branch 0 11 4.553877 0.000000 1318 +kumar 0 9 4.753590 0.000000 1506 +russel 0 9 4.753590 0.000000 1507 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 0 5 5.347108 0.000000 2265 +combinator 0 4 5.568345 0.000000 2915 +bottleneck 0 4 5.568345 0.000000 2769 +shah 0 4 5.568345 0.000000 2814 +ravi 0 3 5.857933 0.000000 3185 +funda 0 3 5.857933 0.000000 3645 +recurr 0 3 5.857933 0.000000 3740 +latin 0 3 5.857933 0.000000 3741 +sundaram 0 3 5.857933 0.000000 3463 +ramachandran 0 3 5.857933 0.000000 3742 +lnc 0 2 6.263398 0.000000 5085 +width 0 2 6.263398 0.000000 5328 +alexand 0 2 6.263398 0.000000 5329 +uumln 0 1 6.957497 0.000000 11501 +sivakumar 0 1 6.957497 0.000000 11502 +jeyakumar 0 1 6.957497 0.000000 11503 +muthukumarasami 0 1 6.957497 0.000000 11504 +umakishor 0 1 6.957497 0.000000 11505 +gautam 0 1 6.957497 0.000000 11506 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..8a537ea3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +confer 0 126 2.079442 0.000000 100 +introduct 0 126 2.079442 0.000000 87 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +teach 0 108 2.197225 0.000000 112 +access 0 102 2.302585 0.000000 136 +imag 0 91 2.397895 0.000000 161 +search 0 95 2.397895 0.000000 155 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +multimedia 0 68 2.708050 0.000000 258 +differ 0 66 2.708050 0.000000 253 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +march 0 61 2.833213 0.000000 295 +think 0 57 2.890372 0.000000 314 +variou 0 56 2.890372 0.000000 317 +undergradu 0 54 2.944439 0.000000 338 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +visual 0 48 3.044522 0.000000 372 +electron 0 47 3.091042 0.000000 379 +featur 0 46 3.091042 0.000000 386 +california 0 46 3.091042 0.000000 388 +video 0 44 3.135494 0.000000 405 +third 0 43 3.178054 0.000000 412 +vision 0 41 3.218876 0.000000 430 +committe 0 34 3.401197 0.000000 522 +john 0 33 3.433987 0.000000 532 +taught 0 33 3.433987 0.000000 526 +transform 0 32 3.465736 0.000000 542 +pass 0 28 3.610918 0.000000 611 +held 0 28 3.610918 0.000000 600 +retriev 0 27 3.637586 0.000000 621 +subject 0 26 3.688879 0.000000 647 +compar 0 26 3.688879 0.000000 648 +detect 0 26 3.688879 0.000000 646 +greg 0 24 3.761200 0.000000 695 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +break 0 20 3.951244 0.000000 812 +spend 0 19 4.007333 0.000000 850 +boston 0 19 4.007333 0.000000 862 +scott 0 18 4.060443 0.000000 884 +miller 0 17 4.110874 0.000000 949 +vector 0 16 4.174387 0.000000 961 +fourth 0 16 4.174387 0.000000 999 +researchmi 0 14 4.317488 0.000000 1119 +coher 0 14 4.317488 0.000000 1109 +massachusett 0 14 4.317488 0.000000 1118 +scene 0 14 4.317488 0.000000 1114 +francisco 0 14 4.317488 0.000000 1095 +econom 0 13 4.382027 0.000000 1184 +huang 0 12 4.465908 0.000000 1202 +amount 0 12 4.465908 0.000000 1208 +reader 0 12 4.465908 0.000000 1246 +impact 0 11 4.553877 0.000000 1334 +refin 0 11 4.553877 0.000000 1363 +princip 0 10 4.653960 0.000000 1397 +correspond 0 10 4.653960 0.000000 1382 +observ 0 9 4.753590 0.000000 1578 +classifi 0 9 4.753590 0.000000 1537 +kevin 0 9 4.753590 0.000000 1482 +juan 0 9 4.753590 0.000000 1580 +european 0 8 4.875197 0.000000 1763 +ramin 0 7 5.010635 0.000000 1820 +justin 0 7 5.010635 0.000000 1789 +adob 0 7 5.010635 0.000000 1873 +parametr 0 7 5.010635 0.000000 1819 +sweden 0 7 5.010635 0.000000 1885 +courtesi 0 7 5.010635 0.000000 1953 +zabih 0 6 5.164786 0.000000 2138 +freeli 0 6 5.164786 0.000000 2014 +price 0 6 5.164786 0.000000 1999 +acrobat 0 6 5.164786 0.000000 2063 +phil 0 5 5.347108 0.000000 2419 +fair 0 5 5.347108 0.000000 2333 +florida 0 5 5.347108 0.000000 2526 +cvpr 0 4 5.568345 0.000000 2761 +essai 0 4 5.568345 0.000000 2948 +newslett 0 4 5.568345 0.000000 2873 +scribe 0 4 5.568345 0.000000 2631 +jing 0 3 5.857933 0.000000 3521 +voskuhl 0 3 5.857933 0.000000 3109 +szewczyk 0 3 5.857933 0.000000 3108 +histogram 0 3 5.857933 0.000000 3490 +stockholm 0 3 5.857933 0.000000 3715 +conjunct 0 3 5.857933 0.000000 3743 +cytacki 0 2 6.263398 0.000000 5330 +pageramin 0 1 6.957497 0.000000 11507 +zabihassist 0 1 6.957497 0.000000 11508 +professorrdz 0 1 6.957497 0.000000 11509 +agr 0 1 6.957497 0.000000 11510 +studentsi 0 1 6.957497 0.000000 11511 +vera 0 1 6.957497 0.000000 11512 +kettnak 0 1 6.957497 0.000000 11513 +olga 0 1 6.957497 0.000000 11514 +veksler 0 1 6.957497 0.000000 11515 +publicationsmost 0 1 6.957497 0.000000 11516 +sarasota 0 1 6.957497 0.000000 11517 +woodfil 0 1 6.957497 0.000000 11518 +teachingi 0 1 6.957497 0.000000 11519 +activitiesi 0 1 6.957497 0.000000 11520 +comitte 0 1 6.957497 0.000000 11521 +acknowledgementsthi 0 1 6.957497 0.000000 11522 +huttenlocherlast 0 1 6.957497 0.000000 11523 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..a6e08e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +roderick 1 1 6.957497 6.957497 11524 +moten 1 1 6.957497 6.957497 11525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..4f40ba42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +homepag 0 93 2.397895 0.000000 148 +upson 0 71 2.639057 0.000000 218 +result 0 65 2.772589 0.000000 281 +talk 0 53 2.944439 0.000000 336 +telephon 0 50 3.044522 0.000000 373 +describ 0 45 3.135494 0.000000 400 +york 0 41 3.218876 0.000000 435 +random 0 34 3.401197 0.000000 511 +universityithaca 0 24 3.761200 0.000000 710 +kumar 0 9 4.753590 0.000000 1506 +hallcornel 0 8 4.875197 0.000000 1757 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 1 5 5.347108 5.347108 2265 +fair 0 5 5.347108 0.000000 2333 +funda 0 3 5.857933 0.000000 3645 +ravi 0 3 5.857933 0.000000 3185 +ergun 0 2 6.263398 0.000000 5087 +wasserman 0 2 6.263398 0.000000 5331 +nephew 0 2 6.263398 0.000000 5332 +homepageronitt 0 1 6.957497 0.000000 11526 +rubinfeldi 0 1 6.957497 0.000000 11527 +rubinfeldcomput 0 1 6.957497 0.000000 11528 +edupictur 0 1 6.957497 0.000000 11529 +eitan 0 1 6.957497 0.000000 11530 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..48e2dec6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +institut 0 84 2.484907 0.000000 187 +appear 0 78 2.564949 0.000000 210 +state 0 76 2.564949 0.000000 207 +involv 0 71 2.639057 0.000000 227 +receiv 0 66 2.708050 0.000000 244 +share 0 59 2.833213 0.000000 304 +thesi 0 57 2.890372 0.000000 327 +advisor 0 51 2.995732 0.000000 355 +post 0 35 3.401197 0.000000 505 +titl 0 31 3.496508 0.000000 556 +full 0 28 3.610918 0.000000 615 +consist 0 26 3.688879 0.000000 651 +reliabl 0 25 3.737670 0.000000 674 +doctor 0 24 3.761200 0.000000 709 +condit 0 16 4.174387 0.000000 975 +edui 0 13 4.382027 0.000000 1193 +replic 0 12 4.465908 0.000000 1231 +thedepart 0 11 4.553877 0.000000 1350 +israel 0 11 4.553877 0.000000 1366 +mainli 0 10 4.653960 0.000000 1432 +birman 0 9 4.753590 0.000000 1531 +friedman 0 7 5.010635 0.000000 1886 +cornellunivers 0 7 5.010635 0.000000 1916 +trade 0 7 5.010635 0.000000 1815 +clickher 0 5 5.347108 0.000000 2428 +vaysburd 0 4 5.568345 0.000000 2846 +tina 0 3 5.857933 0.000000 3744 +scienceatcornel 0 2 6.263398 0.000000 5333 +withken 0 2 6.263398 0.000000 5334 +androbbert 0 2 6.263398 0.000000 4953 +thehoru 0 2 6.263398 0.000000 5179 +attiya 0 2 6.263398 0.000000 5197 +partition 0 2 6.263398 0.000000 4954 +thetechnion 0 1 6.957497 0.000000 11531 +friedmanroi 0 1 6.957497 0.000000 11532 +friedmanpost 0 1 6.957497 0.000000 11533 +universityroi 0 1 6.957497 0.000000 11534 +rennessein 0 1 6.957497 0.000000 11535 +washagit 0 1 6.957497 0.000000 11536 +wasconsist 0 1 6.957497 0.000000 11537 +themilliped 0 1 6.957497 0.000000 11538 +withassaf 0 1 6.957497 0.000000 11539 +schuster 0 1 6.957497 0.000000 11540 +papersr 0 1 6.957497 0.000000 11541 +scalabledistribut 0 1 6.957497 0.000000 11542 +coprocessor 0 1 6.957497 0.000000 11543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..2d764c7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +cornel 1 215 1.386294 1.386294 23 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +associ 0 93 2.397895 0.000000 151 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +simul 0 66 2.708050 0.000000 255 +ithaca 0 65 2.772589 0.000000 294 +dept 0 64 2.772589 0.000000 291 +tech 0 35 3.401197 0.000000 515 +photograph 0 15 4.248495 0.000000 1056 +daniela 0 3 5.857933 0.000000 3611 +catalogc 0 2 6.263398 0.000000 5023 +infodesign 0 1 6.957497 0.000000 11544 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..063fa068 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +version 0 113 2.197225 0.000000 122 +place 0 106 2.197225 0.000000 124 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +contain 0 81 2.484907 0.000000 174 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +html 0 75 2.639057 0.000000 235 +ithaca 0 65 2.772589 0.000000 294 +virtual 0 62 2.772589 0.000000 285 +complex 0 64 2.772589 0.000000 269 +scientif 0 53 2.944439 0.000000 341 +advisor 0 51 2.995732 0.000000 355 +protocol 0 45 3.135494 0.000000 407 +multi 0 36 3.367296 0.000000 493 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +secur 0 30 3.555348 0.000000 577 +framework 0 28 3.610918 0.000000 606 +american 0 27 3.637586 0.000000 634 +reliabl 0 25 3.737670 0.000000 674 +flow 0 24 3.761200 0.000000 700 +mobil 0 23 3.806662 0.000000 730 +hous 0 21 3.912023 0.000000 801 +flexibl 0 21 3.912023 0.000000 792 +applet 0 20 3.951244 0.000000 827 +media 0 19 4.007333 0.000000 861 +agent 0 18 4.060443 0.000000 910 +club 0 15 4.248495 0.000000 1058 +horu 1 14 4.317488 4.317488 1116 +senior 0 14 4.317488 0.000000 1120 +composit 0 13 4.382027 0.000000 1150 +edui 0 13 4.382027 0.000000 1193 +incorpor 0 13 4.382027 0.000000 1163 +weak 0 13 4.382027 0.000000 1159 +danc 0 12 4.465908 0.000000 1278 +market 0 11 4.553877 0.000000 1361 +interestsmi 0 10 4.653960 0.000000 1462 +robbert 0 9 4.753590 0.000000 1529 +guitar 0 8 4.875197 0.000000 1758 +synchroni 0 7 5.010635 0.000000 1923 +band 0 6 5.164786 0.000000 2198 +strong 0 6 5.164786 0.000000 2029 +jazz 0 5 5.347108 0.000000 2527 +babi 0 5 5.347108 0.000000 2493 +girl 0 5 5.347108 0.000000 2410 +swing 0 4 5.568345 0.000000 2887 +dutch 0 3 5.857933 0.000000 3592 +lightweight 0 3 5.857933 0.000000 3234 +tanenbaum 0 3 5.857933 0.000000 3397 +sharewar 0 3 5.857933 0.000000 3503 +netherland 0 3 5.857933 0.000000 3650 +associatecornel 0 2 6.263398 0.000000 5137 +scienceatcornel 0 2 6.263398 0.000000 5333 +withken 0 2 6.263398 0.000000 5334 +tacoma 0 2 6.263398 0.000000 4909 +brand 0 1 6.957497 0.000000 11545 +renesserobbert 0 1 6.957497 0.000000 11546 +renessesenior 0 1 6.957497 0.000000 11547 +universityrvr 0 1 6.957497 0.000000 11548 +universityinithaca 0 1 6.957497 0.000000 11549 +birmanin 0 1 6.957497 0.000000 11550 +wasandi 0 1 6.957497 0.000000 11551 +caml 0 1 6.957497 0.000000 11552 +nynetth 0 1 6.957497 0.000000 11553 +ageless 0 1 6.957497 0.000000 11554 +accordion 0 1 6.957497 0.000000 11555 +stuffcornel 0 1 6.957497 0.000000 11556 +ithacaithacanet 0 1 6.957497 0.000000 11557 +spinner 0 1 6.957497 0.000000 11558 +paperssoftwar 0 1 6.957497 0.000000 11559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..12409237 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +intern 0 108 2.197225 0.000000 128 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +august 0 66 2.708050 0.000000 257 +copi 0 63 2.772589 0.000000 284 +experi 0 64 2.772589 0.000000 283 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +februari 0 54 2.944439 0.000000 328 +sampl 0 53 2.944439 0.000000 339 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +electron 0 47 3.091042 0.000000 379 +answer 0 45 3.135494 0.000000 391 +submit 0 39 3.258097 0.000000 440 +annual 0 40 3.258097 0.000000 458 +especi 0 36 3.367296 0.000000 496 +approxim 0 35 3.401197 0.000000 509 +award 0 34 3.401197 0.000000 523 +survei 0 35 3.401197 0.000000 513 +detect 0 26 3.688879 0.000000 646 +revis 0 26 3.688879 0.000000 640 +consist 0 26 3.688879 0.000000 651 +reliabl 0 25 3.737670 0.000000 674 +doctor 0 24 3.761200 0.000000 709 +seri 0 24 3.761200 0.000000 708 +finish 0 22 3.850148 0.000000 748 +properti 0 22 3.850148 0.000000 749 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +failur 0 18 4.060443 0.000000 898 +expand 0 17 4.110874 0.000000 928 +diego 0 16 4.174387 0.000000 992 +letter 0 16 4.174387 0.000000 981 +asynchron 0 12 4.465908 0.000000 1229 +elect 0 8 4.875197 0.000000 1771 +perfect 0 7 5.010635 0.000000 1921 +chandra 0 6 5.164786 0.000000 2091 +consensu 0 6 5.164786 0.000000 2080 +prize 0 6 5.164786 0.000000 2150 +keith 0 5 5.347108 0.000000 2528 +stabl 0 5 5.347108 0.000000 2309 +blow 0 5 5.347108 0.000000 2407 +spam 0 4 5.568345 0.000000 2927 +bean 0 4 5.568345 0.000000 2968 +detector 0 3 5.857933 0.000000 3745 +horizon 0 3 5.857933 0.000000 3746 +zone 0 3 5.857933 0.000000 3747 +cash 0 3 5.857933 0.000000 3355 +marzullo 0 2 6.263398 0.000000 4919 +formor 0 2 6.263398 0.000000 5335 +distributedcomput 0 2 6.263398 0.000000 5336 +lecturenot 0 2 6.263398 0.000000 4679 +sabel 1 1 6.957497 6.957497 11560 +laura 0 1 6.957497 0.000000 11561 +asynchronousdistribut 0 1 6.957497 0.000000 11562 +jelli 0 1 6.957497 0.000000 11563 +bingo 0 1 6.957497 0.000000 11564 +professorkeith 0 1 6.957497 0.000000 11565 +tushar 0 1 6.957497 0.000000 11566 +sfailur 0 1 6.957497 0.000000 11567 +subcut 0 1 6.957497 0.000000 11568 +wdag 0 1 6.957497 0.000000 11569 +cow 0 1 6.957497 0.000000 11570 +strawberri 0 1 6.957497 0.000000 11571 +tart 0 1 6.957497 0.000000 11572 +torch 0 1 6.957497 0.000000 11573 +alpacanet 0 1 6.957497 0.000000 11574 +gourmet 0 1 6.957497 0.000000 11575 +thebobbi 0 1 6.957497 0.000000 11576 +belli 0 1 6.957497 0.000000 11577 +canplai 0 1 6.957497 0.000000 11578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..f5c93653 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +well 0 109 2.197225 0.000000 121 +memori 0 101 2.302585 0.000000 139 +need 0 98 2.302585 0.000000 135 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +journal 0 83 2.484907 0.000000 183 +level 0 87 2.484907 0.000000 180 +ieee 0 86 2.484907 0.000000 190 +messag 0 76 2.564949 0.000000 212 +optim 0 79 2.564949 0.000000 197 +free 0 73 2.639057 0.000000 224 +solv 0 73 2.639057 0.000000 234 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +august 0 66 2.708050 0.000000 257 +goal 0 66 2.708050 0.000000 250 +simul 0 66 2.708050 0.000000 255 +knowledg 0 67 2.708050 0.000000 243 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +share 0 59 2.833213 0.000000 304 +type 0 61 2.833213 0.000000 296 +automat 0 61 2.833213 0.000000 306 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +major 0 56 2.890372 0.000000 315 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +maintain 0 51 2.995732 0.000000 342 +principl 0 48 3.044522 0.000000 357 +even 0 45 3.135494 0.000000 393 +protocol 0 45 3.135494 0.000000 407 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +show 0 43 3.178054 0.000000 417 +theoret 0 39 3.258097 0.000000 446 +continu 0 39 3.258097 0.000000 448 +transact 0 39 3.258097 0.000000 438 +correct 0 38 3.295837 0.000000 462 +respons 0 37 3.332205 0.000000 476 +least 0 35 3.401197 0.000000 516 +concurr 0 34 3.401197 0.000000 501 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +collabor 0 32 3.465736 0.000000 543 +abl 0 30 3.555348 0.000000 566 +exist 0 30 3.555348 0.000000 569 +common 0 30 3.555348 0.000000 574 +synchron 0 29 3.583519 0.000000 588 +pass 0 28 3.610918 0.000000 611 +determin 0 27 3.637586 0.000000 630 +consist 0 26 3.688879 0.000000 651 +fundament 0 25 3.737670 0.000000 661 +known 0 24 3.761200 0.000000 702 +methodolog 0 23 3.806662 0.000000 733 +hierarchi 0 22 3.850148 0.000000 744 +thu 0 21 3.912023 0.000000 773 +increas 0 20 3.951244 0.000000 829 +prove 0 19 4.007333 0.000000 848 +failur 0 18 4.060443 0.000000 898 +whether 0 17 4.110874 0.000000 918 +partit 0 16 4.174387 0.000000 984 +princeton 0 15 4.248495 0.000000 1042 +shown 0 14 4.317488 0.000000 1080 +wait 0 13 4.382027 0.000000 1168 +canada 0 13 4.382027 0.000000 1158 +cannot 0 13 4.382027 0.000000 1144 +difficulti 0 13 4.382027 0.000000 1132 +necessari 0 13 4.382027 0.000000 1147 +asynchron 0 12 4.465908 0.000000 1229 +robust 0 12 4.465908 0.000000 1271 +replic 0 12 4.465908 0.000000 1231 +clock 0 11 4.553877 0.000000 1320 +abil 0 11 4.553877 0.000000 1341 +interestsmi 0 10 4.653960 0.000000 1462 +reli 0 10 4.653960 0.000000 1411 +guarante 0 10 4.653960 0.000000 1391 +informationabout 0 9 4.753590 0.000000 1515 +crash 0 8 4.875197 0.000000 1616 +paradigm 0 8 4.875197 0.000000 1662 +bridg 0 8 4.875197 0.000000 1764 +exactli 0 7 5.010635 0.000000 1817 +suffici 0 7 5.010635 0.000000 1897 +montreal 0 7 5.010635 0.000000 1961 +pittsburgh 0 7 5.010635 0.000000 1938 +pennsylvania 0 7 5.010635 0.000000 1932 +consensu 0 6 5.164786 0.000000 2080 +chandra 0 6 5.164786 0.000000 2091 +prasad 0 6 5.164786 0.000000 2126 +mistak 0 6 5.164786 0.000000 2110 +broadcast 0 5 5.347108 0.000000 2453 +infinit 0 4 5.568345 0.000000 2596 +detector 0 3 5.857933 0.000000 3745 +forfault 0 3 5.857933 0.000000 3748 +ofobject 0 3 5.857933 0.000000 3399 +toueg 0 2 6.263398 0.000000 5339 +inher 0 2 6.263398 0.000000 5086 +mere 0 2 6.263398 0.000000 5340 +slow 0 2 6.263398 0.000000 5341 +inour 0 2 6.263398 0.000000 4445 +systemswith 0 2 6.263398 0.000000 5342 +muchinform 0 2 6.263398 0.000000 4811 +amajor 0 2 6.263398 0.000000 5343 +herlihi 0 2 6.263398 0.000000 5144 +anobject 0 2 6.263398 0.000000 4267 +hadzilaco 0 2 6.263398 0.000000 5338 +jayanti 0 1 6.957497 0.000000 11583 +failuredetector 0 1 6.957497 0.000000 11584 +unreli 0 1 6.957497 0.000000 11585 +weakest 0 1 6.957497 0.000000 11586 +neiger 0 1 6.957497 0.000000 11587 +professorph 0 1 6.957497 0.000000 11588 +toleranceand 0 1 6.957497 0.000000 11589 +andshar 0 1 6.957497 0.000000 11590 +gapbetween 0 1 6.957497 0.000000 11591 +practicalsolut 0 1 6.957497 0.000000 11592 +withtushar 0 1 6.957497 0.000000 11593 +chandraand 0 1 6.957497 0.000000 11594 +onunreli 0 1 6.957497 0.000000 11595 +computingst 0 1 6.957497 0.000000 11596 +adeterminist 0 1 6.957497 0.000000 11597 +impossibilityresult 0 1 6.957497 0.000000 11598 +aprocess 0 1 6.957497 0.000000 11599 +wefirst 0 1 6.957497 0.000000 11600 +canmak 0 1 6.957497 0.000000 11601 +solveconsensu 0 1 6.957497 0.000000 11602 +practicalityof 0 1 6.957497 0.000000 11603 +theircorrect 0 1 6.957497 0.000000 11604 +sharedobject 0 1 6.957497 0.000000 11605 +accessesthi 0 1 6.957497 0.000000 11606 +otherprocess 0 1 6.957497 0.000000 11607 +thatcorrespond 0 1 6.957497 0.000000 11608 +atani 0 1 6.957497 0.000000 11609 +whetherrobust 0 1 6.957497 0.000000 11610 +bracha 0 1 6.957497 0.000000 11611 +srikanth 0 1 6.957497 0.000000 11612 +abbadi 0 1 6.957497 0.000000 11613 +detectorfor 0 1 6.957497 0.000000 11614 +vancouv 0 1 6.957497 0.000000 11615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..66bfd59a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +compil 0 122 2.079442 0.000000 96 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +proceed 0 93 2.397895 0.000000 152 +journal 0 83 2.484907 0.000000 183 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +septemb 0 65 2.772589 0.000000 274 +thesi 0 57 2.890372 0.000000 327 +algebra 0 45 3.135494 0.000000 394 +editor 0 41 3.218876 0.000000 433 +submit 0 39 3.258097 0.000000 440 +small 0 39 3.258097 0.000000 447 +formal 0 37 3.332205 0.000000 478 +concurr 0 34 3.401197 0.000000 501 +semant 0 29 3.583519 0.000000 587 +bound 0 26 3.688879 0.000000 659 +director 0 22 3.850148 0.000000 767 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +vlsi 0 21 3.912023 0.000000 795 +verif 0 20 3.951244 0.000000 826 +scheme 0 20 3.951244 0.000000 818 +binari 0 20 3.951244 0.000000 823 +exercis 0 19 4.007333 0.000000 842 +failur 0 18 4.060443 0.000000 898 +brown 0 16 4.174387 0.000000 977 +circuit 0 13 4.382027 0.000000 1131 +sigplan 0 13 4.382027 0.000000 1190 +verifi 0 12 4.465908 0.000000 1261 +calculu 0 12 4.465908 0.000000 1203 +meta 0 9 4.753590 0.000000 1505 +crash 0 8 4.875197 0.000000 1616 +delai 0 7 5.010635 0.000000 1848 +cornellunivers 0 7 5.010635 0.000000 1916 +seshadri 0 7 5.010635 0.000000 1803 +silicon 0 6 5.164786 0.000000 2076 +toronto 0 6 5.164786 0.000000 2156 +educurr 0 5 5.347108 0.000000 2504 +bloom 0 4 5.568345 0.000000 2913 +hallphon 0 4 5.568345 0.000000 2900 +insensit 0 4 5.568345 0.000000 2716 +knight 0 4 5.568345 0.000000 2728 +savag 0 4 5.568345 0.000000 2777 +weber 1 3 5.857933 5.857933 3156 +samuel 0 3 5.857933 0.000000 3155 +act 0 3 5.857933 0.000000 3557 +agreement 0 3 5.857933 0.000000 3207 +metatheori 0 3 5.857933 0.000000 3642 +byzantin 0 2 6.263398 0.000000 4203 +bakker 0 2 6.263398 0.000000 5337 +hadzilaco 0 2 6.263398 0.000000 5338 +roever 0 1 6.957497 0.000000 11579 +rozenberg 0 1 6.957497 0.000000 11580 +amdur 0 1 6.957497 0.000000 11581 +wortman 0 1 6.957497 0.000000 11582 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..6add0231 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +modifi 0 178 1.609438 0.000000 35 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +topic 0 114 2.197225 0.000000 110 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +educ 0 86 2.484907 0.000000 191 +master 0 76 2.564949 0.000000 216 +orient 0 80 2.564949 0.000000 205 +resum 0 79 2.564949 0.000000 217 +window 0 68 2.708050 0.000000 242 +content 0 59 2.833213 0.000000 302 +plai 0 60 2.833213 0.000000 307 +sampl 0 53 2.944439 0.000000 339 +favorit 0 44 3.135494 0.000000 410 +past 0 42 3.218876 0.000000 428 +combin 0 42 3.218876 0.000000 421 +product 0 33 3.433987 0.000000 527 +releas 0 28 3.610918 0.000000 616 +team 0 27 3.637586 0.000000 625 +retriev 0 27 3.637586 0.000000 621 +request 0 26 3.688879 0.000000 635 +reach 0 24 3.761200 0.000000 688 +lead 0 23 3.806662 0.000000 718 +color 0 22 3.850148 0.000000 762 +divis 0 21 3.912023 0.000000 803 +render 0 17 4.110874 0.000000 947 +analyz 0 17 4.110874 0.000000 925 +sheet 0 16 4.174387 0.000000 973 +draw 0 14 4.317488 0.000000 1086 +systemsc 0 11 4.553877 0.000000 1293 +ski 0 10 4.653960 0.000000 1471 +card 0 10 4.653960 0.000000 1435 +softbal 0 9 4.753590 0.000000 1594 +sean 0 8 4.875197 0.000000 1705 +golf 0 6 5.164786 0.000000 2178 +isi 0 5 5.347108 0.000000 2443 +interior 0 5 5.347108 0.000000 2439 +basebal 0 4 5.568345 0.000000 2969 +percept 0 3 5.857933 0.000000 3739 +compliant 0 3 5.857933 0.000000 3245 +landi 0 2 6.263398 0.000000 4830 +clickherefor 0 2 6.263398 0.000000 5344 +stratu 0 2 6.263398 0.000000 5345 +broker 0 2 6.263398 0.000000 4968 +orbix 0 1 6.957497 0.000000 11616 +landissean 0 1 6.957497 0.000000 11617 +sciencewelcom 0 1 6.957497 0.000000 11618 +weanalyz 0 1 6.957497 0.000000 11619 +patternsprofession 0 1 6.957497 0.000000 11620 +acorba 0 1 6.957497 0.000000 11621 +iona 0 1 6.957497 0.000000 11622 +alpin 0 1 6.957497 0.000000 11623 +collectingi 0 1 6.957497 0.000000 11624 +comeduc 0 1 6.957497 0.000000 11625 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..984c867b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +model 0 145 1.945910 0.000000 69 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +graphic 0 90 2.397895 0.000000 147 +june 0 79 2.564949 0.000000 214 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +summer 0 56 2.890372 0.000000 311 +york 0 41 3.218876 0.000000 435 +tech 0 35 3.401197 0.000000 515 +post 0 35 3.401197 0.000000 505 +india 0 32 3.465736 0.000000 550 +taken 0 31 3.496508 0.000000 555 +anim 0 31 3.496508 0.000000 557 +equat 0 23 3.806662 0.000000 724 +sciencecornel 0 22 3.850148 0.000000 768 +viewer 0 21 3.912023 0.000000 787 +practicum 0 16 4.174387 0.000000 960 +magic 0 11 4.553877 0.000000 1358 +mapl 0 11 4.553877 0.000000 1376 +reduct 0 7 5.010635 0.000000 1877 +parametr 0 7 5.010635 0.000000 1819 +jpeg 0 6 5.164786 0.000000 2053 +myresum 0 6 5.164786 0.000000 2199 +hoca 0 5 5.347108 0.000000 2241 +engineeringclass 0 3 5.857933 0.000000 3667 +kerala 0 3 5.857933 0.000000 3749 +cornelluniversityfal 0 2 6.263398 0.000000 5131 +artifact 0 2 6.263398 0.000000 5346 +cspracticum 0 2 6.263398 0.000000 5132 +carpet 0 2 6.263398 0.000000 5133 +colloqium 0 2 6.263398 0.000000 5134 +seena 0 1 6.957497 0.000000 11626 +cherangara 0 1 6.957497 0.000000 11627 +cherangaramast 0 1 6.957497 0.000000 11628 +homepagecurr 0 1 6.957497 0.000000 11629 +trivandrum 0 1 6.957497 0.000000 11630 +processingalgorithm 0 1 6.957497 0.000000 11631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..262e30b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,239 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +make 0 111 2.197225 0.000000 120 +site 0 106 2.197225 0.000000 119 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +part 0 98 2.302585 0.000000 129 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +imag 1 91 2.397895 2.397895 161 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +start 0 83 2.484907 0.000000 173 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +multimedia 0 68 2.708050 0.000000 258 +test 0 66 2.708050 0.000000 252 +window 0 68 2.708050 0.000000 242 +written 0 63 2.772589 0.000000 278 +virtual 0 62 2.772589 0.000000 285 +dept 0 64 2.772589 0.000000 291 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +sever 0 56 2.890372 0.000000 322 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +allow 0 53 2.944439 0.000000 333 +cool 0 49 3.044522 0.000000 374 +give 0 50 3.044522 0.000000 359 +format 0 48 3.044522 0.000000 356 +friend 0 48 3.044522 0.000000 376 +quarter 0 47 3.091042 0.000000 389 +video 0 44 3.135494 0.000000 405 +protocol 0 45 3.135494 0.000000 407 +directori 0 45 3.135494 0.000000 396 +term 0 43 3.178054 0.000000 411 +compani 0 41 3.218876 0.000000 423 +fast 0 42 3.218876 0.000000 429 +live 0 40 3.258097 0.000000 451 +small 0 39 3.258097 0.000000 447 +prototyp 0 38 3.295837 0.000000 463 +slide 0 38 3.295837 0.000000 467 +microsoft 0 38 3.295837 0.000000 468 +mean 0 37 3.332205 0.000000 477 +statist 0 35 3.401197 0.000000 521 +transform 0 32 3.465736 0.000000 542 +given 0 32 3.465736 0.000000 538 +someth 0 31 3.496508 0.000000 554 +often 0 31 3.496508 0.000000 551 +steve 0 29 3.583519 0.000000 594 +limit 0 29 3.583519 0.000000 585 +consid 0 29 3.583519 0.000000 590 +semant 0 29 3.583519 0.000000 587 +chines 0 29 3.583519 0.000000 595 +releas 0 28 3.610918 0.000000 616 +ask 0 28 3.610918 0.000000 597 +manipul 0 27 3.637586 0.000000 624 +altern 0 26 3.688879 0.000000 641 +enhanc 0 26 3.688879 0.000000 644 +spent 0 25 3.737670 0.000000 676 +client 0 25 3.737670 0.000000 679 +interpret 0 24 3.761200 0.000000 686 +motion 0 24 3.761200 0.000000 699 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +variabl 0 23 3.806662 0.000000 715 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +indian 0 22 3.850148 0.000000 769 +fact 0 21 3.912023 0.000000 780 +similar 0 21 3.912023 0.000000 771 +viewer 0 21 3.912023 0.000000 787 +wonder 0 20 3.951244 0.000000 815 +media 0 19 4.007333 0.000000 861 +ever 0 19 4.007333 0.000000 872 +record 0 18 4.060443 0.000000 890 +event 0 18 4.060443 0.000000 896 +lot 0 18 4.060443 0.000000 889 +stanford 0 17 4.110874 0.000000 955 +coupl 0 17 4.110874 0.000000 939 +layer 0 17 4.110874 0.000000 926 +segment 0 17 4.110874 0.000000 931 +bachelor 0 17 4.110874 0.000000 957 +upon 0 16 4.174387 0.000000 978 +portion 0 16 4.174387 0.000000 971 +stream 0 15 4.248495 0.000000 1015 +remot 0 15 4.248495 0.000000 1041 +charact 0 15 4.248495 0.000000 1028 +rate 0 15 4.248495 0.000000 1037 +audio 0 14 4.317488 0.000000 1094 +script 0 13 4.382027 0.000000 1171 +came 0 13 4.382027 0.000000 1197 +asynchron 0 12 4.465908 0.000000 1229 +gupta 0 12 4.465908 0.000000 1241 +optic 0 12 4.465908 0.000000 1221 +bill 0 11 4.553877 0.000000 1297 +player 0 11 4.553877 0.000000 1371 +fix 0 11 4.553877 0.000000 1327 +smart 0 11 4.553877 0.000000 1352 +mode 0 9 4.753590 0.000000 1492 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +leader 0 9 4.753590 0.000000 1576 +claim 0 8 4.875197 0.000000 1664 +character 0 8 4.875197 0.000000 1767 +vallei 0 7 5.010635 0.000000 1959 +signal 0 7 5.010635 0.000000 1910 +keshav 0 7 5.010635 0.000000 1852 +conferenc 0 7 5.010635 0.000000 1857 +nativ 0 6 5.164786 0.000000 2192 +classroom 0 6 5.164786 0.000000 2006 +televis 0 6 5.164786 0.000000 2118 +silicon 0 6 5.164786 0.000000 2076 +mix 0 6 5.164786 0.000000 2200 +multicast 0 5 5.347108 0.000000 2305 +shell 0 5 5.347108 0.000000 2353 +sigcomm 0 5 5.347108 0.000000 2329 +affin 0 5 5.347108 0.000000 2378 +hate 0 5 5.347108 0.000000 2529 +hole 0 5 5.347108 0.000000 2518 +stupid 0 5 5.347108 0.000000 2489 +sharma 0 4 5.568345 0.000000 2752 +anoop 0 4 5.568345 0.000000 2770 +height 0 4 5.568345 0.000000 2890 +deploi 0 3 5.857933 0.000000 3750 +greatli 0 3 5.857933 0.000000 3541 +deliveri 0 3 5.857933 0.000000 3278 +membership 0 3 5.857933 0.000000 3751 +predecessor 0 3 5.857933 0.000000 3585 +motif 0 3 5.857933 0.000000 3752 +fractal 0 3 5.857933 0.000000 3475 +hindi 0 3 5.857933 0.000000 3753 +nicknam 0 3 5.857933 0.000000 3716 +widget 0 2 6.263398 0.000000 5347 +hors 0 2 6.263398 0.000000 5348 +stumbl 0 2 6.263398 0.000000 5349 +leadto 0 2 6.263398 0.000000 5350 +navin 0 2 6.263398 0.000000 5351 +agarw 0 2 6.263398 0.000000 5352 +deer 0 2 6.263398 0.000000 4356 +width 0 2 6.263398 0.000000 5328 +yacc 0 2 6.263398 0.000000 4422 +coolest 0 2 6.263398 0.000000 5229 +fool 0 2 6.263398 0.000000 5353 +frozen 0 2 6.263398 0.000000 5078 +rosen 0 1 6.957497 0.000000 11632 +sharmila 0 1 6.957497 0.000000 11633 +vxtreme 0 1 6.957497 0.000000 11634 +imagefram 0 1 6.957497 0.000000 11635 +modifiedigmp 0 1 6.957497 0.000000 11636 +unicast 0 1 6.957497 0.000000 11637 +sitn 0 1 6.957497 0.000000 11638 +microwav 0 1 6.957497 0.000000 11639 +chaddha 0 1 6.957497 0.000000 11640 +avneesh 0 1 6.957497 0.000000 11641 +asilomar 0 1 6.957497 0.000000 11642 +igmp 0 1 6.957497 0.000000 11643 +internetdraft 0 1 6.957497 0.000000 11644 +fenner 0 1 6.957497 0.000000 11645 +niten 0 1 6.957497 0.000000 11646 +malhan 0 1 6.957497 0.000000 11647 +delhiunpublish 0 1 6.957497 0.000000 11648 +preform 0 1 6.957497 0.000000 11649 +blur 0 1 6.957497 0.000000 11650 +speckl 0 1 6.957497 0.000000 11651 +subband 0 1 6.957497 0.000000 11652 +estmat 0 1 6.957497 0.000000 11653 +writen 0 1 6.957497 0.000000 11654 +flavour 0 1 6.957497 0.000000 11655 +ifram 0 1 6.957497 0.000000 11656 +nodisplai 0 1 6.957497 0.000000 11657 +filenam 0 1 6.957497 0.000000 11658 +putimageincanva 0 1 6.957497 0.000000 11659 +dummi 0 1 6.957497 0.000000 11660 +snooper 0 1 6.957497 0.000000 11661 +doesnt 0 1 6.957497 0.000000 11662 +replai 0 1 6.957497 0.000000 11663 +kludg 0 1 6.957497 0.000000 11664 +dissalow 0 1 6.957497 0.000000 11665 +gaveth 0 1 6.957497 0.000000 11666 +tongu 0 1 6.957497 0.000000 11667 +sharm 0 1 6.957497 0.000000 11668 +shyness 0 1 6.957497 0.000000 11669 +actress 0 1 6.957497 0.000000 11670 +tagor 0 1 6.957497 0.000000 11671 +ealri 0 1 6.957497 0.000000 11672 +jewish 0 1 6.957497 0.000000 11673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..7d776992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +view 0 70 2.708050 0.000000 254 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +friend 0 48 3.044522 0.000000 376 +still 0 50 3.044522 0.000000 362 +california 0 46 3.091042 0.000000 388 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +futur 0 41 3.218876 0.000000 427 +movi 0 40 3.258097 0.000000 459 +soon 0 36 3.367296 0.000000 494 +john 0 33 3.433987 0.000000 532 +transform 0 32 3.465736 0.000000 542 +sciencecornel 0 22 3.850148 0.000000 768 +love 0 21 3.912023 0.000000 804 +watch 0 21 3.912023 0.000000 789 +eric 0 19 4.007333 0.000000 870 +citi 0 19 4.007333 0.000000 874 +miss 0 19 4.007333 0.000000 866 +listen 0 18 4.060443 0.000000 907 +young 0 16 4.174387 0.000000 991 +camera 0 14 4.317488 0.000000 1115 +hong 0 14 4.317488 0.000000 1105 +near 0 14 4.317488 0.000000 1091 +meng 0 12 4.465908 0.000000 1214 +went 0 12 4.465908 0.000000 1279 +pagewelcom 0 11 4.553877 0.000000 1344 +french 0 9 4.753590 0.000000 1511 +guitar 0 8 4.875197 0.000000 1758 +mile 0 8 4.875197 0.000000 1743 +instrument 0 7 5.010635 0.000000 1954 +davi 0 7 5.010635 0.000000 1888 +piano 0 6 5.164786 0.000000 2201 +antonio 0 6 5.164786 0.000000 2186 +jazz 0 5 5.347108 0.000000 2527 +carlo 0 5 5.347108 0.000000 2515 +middl 0 5 5.347108 0.000000 2372 +keyboard 0 4 5.568345 0.000000 2970 +korea 0 4 5.568345 0.000000 2971 +cyberspac 0 3 5.857933 0.000000 3719 +korean 0 2 6.263398 0.000000 5354 +acoust 0 2 6.263398 0.000000 5355 +kwan 0 2 6.263398 0.000000 4126 +sang 0 2 6.263398 0.000000 5356 +onthi 0 2 6.263398 0.000000 5357 +chopin 0 2 6.263398 0.000000 5358 +cinema 0 2 6.263398 0.000000 5244 +miser 0 2 6.263398 0.000000 5359 +melco 0 2 6.263398 0.000000 5200 +kang 0 2 6.263398 0.000000 5360 +shim 0 1 6.957497 0.000000 11674 +shimmast 0 1 6.957497 0.000000 11675 +dryden 0 1 6.957497 0.000000 11676 +irvinestudi 0 1 6.957497 0.000000 11677 +classi 0 1 6.957497 0.000000 11678 +stan 0 1 6.957497 0.000000 11679 +getz 0 1 6.957497 0.000000 11680 +jobim 0 1 6.957497 0.000000 11681 +coltran 0 1 6.957497 0.000000 11682 +earl 0 1 6.957497 0.000000 11683 +klugh 0 1 6.957497 0.000000 11684 +metheni 0 1 6.957497 0.000000 11685 +archemi 0 1 6.957497 0.000000 11686 +paradiso 0 1 6.957497 0.000000 11687 +kiss 0 1 6.957497 0.000000 11688 +saigon 0 1 6.957497 0.000000 11689 +newswant 0 1 6.957497 0.000000 11690 +anybodi 0 1 6.957497 0.000000 11691 +hana 0 1 6.957497 0.000000 11692 +jung 0 1 6.957497 0.000000 11693 +hwan 0 1 6.957497 0.000000 11694 +victor 0 1 6.957497 0.000000 11695 +jiyang 0 1 6.957497 0.000000 11696 +timessinc 0 1 6.957497 0.000000 11697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..37f15ae2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +document 0 121 2.079442 0.000000 89 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +analysi 0 124 2.079442 0.000000 98 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +structur 0 106 2.197225 0.000000 105 +text 1 98 2.302585 2.302585 133 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +sinc 0 90 2.397895 0.000000 159 +help 0 83 2.484907 0.000000 175 +appear 0 78 2.564949 0.000000 210 +resum 0 79 2.564949 0.000000 217 +come 0 78 2.564949 0.000000 202 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +prof 0 64 2.772589 0.000000 273 +copi 0 63 2.772589 0.000000 284 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +thesi 0 57 2.890372 0.000000 327 +variou 0 56 2.890372 0.000000 317 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +advisor 0 51 2.995732 0.000000 355 +approach 0 48 3.044522 0.000000 366 +visitor 0 49 3.044522 0.000000 371 +effect 0 46 3.091042 0.000000 385 +term 0 43 3.178054 0.000000 411 +show 0 43 3.178054 0.000000 417 +third 0 43 3.178054 0.000000 412 +late 0 40 3.258097 0.000000 439 +field 0 37 3.332205 0.000000 482 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +jame 0 35 3.401197 0.000000 507 +queri 0 33 3.433987 0.000000 524 +independ 0 32 3.465736 0.000000 548 +exist 0 30 3.555348 0.000000 569 +propos 0 28 3.610918 0.000000 602 +usual 0 28 3.610918 0.000000 608 +retriev 0 27 3.637586 0.000000 621 +relev 0 26 3.688879 0.000000 637 +other 0 24 3.761200 0.000000 697 +size 0 23 3.806662 0.000000 713 +brows 0 23 3.806662 0.000000 726 +thank 0 23 3.806662 0.000000 721 +hypertext 0 19 4.007333 0.000000 865 +segment 0 17 4.110874 0.000000 931 +normal 0 16 4.174387 0.000000 995 +eduphon 0 15 4.248495 0.000000 1060 +senior 0 14 4.317488 0.000000 1120 +weight 0 12 4.465908 0.000000 1204 +reader 0 12 4.465908 0.000000 1246 +chri 0 11 4.553877 0.000000 1311 +smart 0 11 4.553877 0.000000 1352 +arpa 0 11 4.553877 0.000000 1369 +summar 0 11 4.553877 0.000000 1295 +length 0 10 4.653960 0.000000 1400 +decomposit 0 10 4.653960 0.000000 1439 +theme 0 8 4.875197 0.000000 1707 +counter 0 8 4.875197 0.000000 1765 +vari 0 6 5.164786 0.000000 2001 +pivot 0 5 5.347108 0.000000 2426 +fairli 0 5 5.347108 0.000000 2322 +yield 0 5 5.347108 0.000000 2458 +testb 0 5 5.347108 0.000000 2456 +circumst 0 5 5.347108 0.000000 2283 +proceedingsof 0 5 5.347108 0.000000 2331 +dual 0 5 5.347108 0.000000 2522 +travers 0 5 5.347108 0.000000 2363 +amit 0 4 5.568345 0.000000 2972 +allan 0 4 5.568345 0.000000 2849 +nist 0 4 5.568345 0.000000 2973 +commonli 0 4 5.568345 0.000000 2877 +substanti 0 4 5.568345 0.000000 2921 +singhal 0 3 5.857933 0.000000 3098 +trec 0 3 5.857933 0.000000 3547 +supervisor 0 3 5.857933 0.000000 3754 +likelihood 0 3 5.857933 0.000000 3172 +expans 0 3 5.857933 0.000000 3755 +gerard 0 2 6.263398 0.000000 4876 +salton 0 2 6.263398 0.000000 4060 +bucklei 0 2 6.263398 0.000000 4874 +foremost 0 2 6.263398 0.000000 5361 +excerpt 0 2 6.263398 0.000000 4880 +degrad 0 2 6.263398 0.000000 5362 +amitsingh 0 2 6.263398 0.000000 4061 +slowli 0 2 6.263398 0.000000 5363 +mandar 0 1 6.957497 0.000000 11698 +gerardsalton 0 1 6.957497 0.000000 11699 +lengthnorm 0 1 6.957497 0.000000 11700 +mandarmitra 0 1 6.957497 0.000000 11701 +mitra 0 1 6.957497 0.000000 11702 +pageamit 0 1 6.957497 0.000000 11703 +singhaldepart 0 1 6.957497 0.000000 11704 +universitysingh 0 1 6.957497 0.000000 11705 +andtext 0 1 6.957497 0.000000 11706 +clairecardieher 0 1 6.957497 0.000000 11707 +beenon 0 1 6.957497 0.000000 11708 +informationretriev 0 1 6.957497 0.000000 11709 +thirti 0 1 6.957497 0.000000 11710 +thateffect 0 1 6.957497 0.000000 11711 +chancessimilar 0 1 6.957497 0.000000 11712 +normalizationfunct 0 1 6.957497 0.000000 11713 +retrievaleffect 0 1 6.957497 0.000000 11714 +normalizationtechniqu 0 1 6.957497 0.000000 11715 +trecparticipationtext 0 1 6.957497 0.000000 11716 +sponsoredeffort 0 1 6.957497 0.000000 11717 +retrievaltechniqu 0 1 6.957497 0.000000 11718 +hasconsist 0 1 6.957497 0.000000 11719 +somepap 0 1 6.957497 0.000000 11720 +summarizationnon 0 1 6.957497 0.000000 11721 +expositori 0 1 6.957497 0.000000 11722 +tocov 0 1 6.957497 0.000000 11723 +selectiveaccess 0 1 6.957497 0.000000 11724 +toanalyz 0 1 6.957497 0.000000 11725 +texttravers 0 1 6.957497 0.000000 11726 +papersnorm 0 1 6.957497 0.000000 11727 +documentlength 0 1 6.957497 0.000000 11728 +mitraand 0 1 6.957497 0.000000 11729 +usingsmart 0 1 6.957497 0.000000 11730 +textthem 0 1 6.957497 0.000000 11731 +andmanag 0 1 6.957497 0.000000 11732 +vectorspac 0 1 6.957497 0.000000 11733 +machineread 0 1 6.957497 0.000000 11734 +groupmemb 0 1 6.957497 0.000000 11735 +fluctuat 0 1 6.957497 0.000000 11736 +iinstal 0 1 6.957497 0.000000 11737 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..8422a2ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,154 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +sinc 0 90 2.397895 0.000000 159 +real 0 93 2.397895 0.000000 144 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +build 0 85 2.484907 0.000000 184 +wide 0 84 2.484907 0.000000 185 +internet 0 83 2.484907 0.000000 186 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +name 1 72 2.639057 2.639057 220 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +simul 0 66 2.708050 0.000000 255 +ithaca 0 65 2.772589 0.000000 294 +virtual 0 62 2.772589 0.000000 285 +reason 0 57 2.890372 0.000000 318 +talk 0 53 2.944439 0.000000 336 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +still 0 50 3.044522 0.000000 362 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +littl 0 39 3.258097 0.000000 454 +everi 0 34 3.401197 0.000000 519 +least 0 35 3.401197 0.000000 516 +word 0 34 3.401197 0.000000 508 +independ 0 32 3.465736 0.000000 548 +idea 0 32 3.465736 0.000000 545 +india 0 32 3.465736 0.000000 550 +anim 0 31 3.496508 0.000000 557 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +releas 0 28 3.610918 0.000000 616 +sometim 0 24 3.761200 0.000000 696 +head 0 23 3.806662 0.000000 732 +instal 0 22 3.850148 0.000000 754 +latest 0 21 3.912023 0.000000 785 +thu 0 21 3.912023 0.000000 773 +driven 0 15 4.248495 0.000000 1048 +remot 0 15 4.248495 0.000000 1041 +goe 0 15 4.248495 0.000000 1044 +squar 0 14 4.317488 0.000000 1082 +edui 0 13 4.382027 0.000000 1193 +directli 0 13 4.382027 0.000000 1141 +circuit 0 13 4.382027 0.000000 1131 +unfortun 0 13 4.382027 0.000000 1170 +speak 0 12 4.465908 0.000000 1283 +song 0 11 4.553877 0.000000 1380 +equip 0 10 4.653960 0.000000 1459 +stack 0 10 4.653960 0.000000 1389 +packet 0 10 4.653960 0.000000 1415 +custom 0 10 4.653960 0.000000 1414 +mode 0 9 4.753590 0.000000 1492 +lane 0 8 4.875197 0.000000 1720 +router 0 8 4.875197 0.000000 1772 +switch 0 8 4.875197 0.000000 1718 +span 0 8 4.875197 0.000000 1751 +round 0 8 4.875197 0.000000 1769 +keshav 0 7 5.010635 0.000000 1852 +signal 0 7 5.010635 0.000000 1910 +canb 0 7 5.010635 0.000000 1846 +hear 0 7 5.010635 0.000000 1940 +nativ 0 6 5.164786 0.000000 2192 +south 0 6 5.164786 0.000000 2167 +srinivasan 0 6 5.164786 0.000000 2175 +christoph 0 5 5.347108 0.000000 2512 +delhi 0 5 5.347108 0.000000 2530 +facial 0 5 5.347108 0.000000 2438 +hole 0 5 5.347108 0.000000 2518 +poem 0 5 5.347108 0.000000 2483 +systemsand 0 4 5.568345 0.000000 2804 +scratch 0 3 5.857933 0.000000 3140 +compliant 0 3 5.857933 0.000000 3245 +district 0 3 5.857933 0.000000 3756 +father 0 3 5.857933 0.000000 3757 +johann 0 3 5.857933 0.000000 3758 +goof 0 2 6.263398 0.000000 4985 +snoop 0 2 6.263398 0.000000 5364 +mbone 0 2 6.263398 0.000000 4361 +pagemi 0 2 6.263398 0.000000 5230 +villag 0 2 6.263398 0.000000 5215 +ought 0 2 6.263398 0.000000 5365 +goeth 0 2 6.263398 0.000000 5366 +skeshav 0 1 6.957497 0.000000 11738 +idlinet 0 1 6.957497 0.000000 11739 +keshavemail 0 1 6.957497 0.000000 11740 +spentfiv 0 1 6.957497 0.000000 11741 +xunet 0 1 6.957497 0.000000 11742 +incollabor 0 1 6.957497 0.000000 11743 +fore 0 1 6.957497 0.000000 11744 +zeitnet 0 1 6.957497 0.000000 11745 +idlinetsourc 0 1 6.957497 0.000000 11746 +applicationget 0 1 6.957497 0.000000 11747 +linkspapersher 0 1 6.957497 0.000000 11748 +linkto 0 1 6.957497 0.000000 11749 +reali 0 1 6.957497 0.000000 11750 +beout 0 1 6.957497 0.000000 11751 +native_mod 0 1 6.957497 0.000000 11752 +namein 0 1 6.957497 0.000000 11753 +thanjavur 0 1 6.957497 0.000000 11754 +beprecis 0 1 6.957497 0.000000 11755 +prefix 0 1 6.957497 0.000000 11756 +sonli 0 1 6.957497 0.000000 11757 +surnam 0 1 6.957497 0.000000 11758 +myfath 0 1 6.957497 0.000000 11759 +intoth 0 1 6.957497 0.000000 11760 +beconfus 0 1 6.957497 0.000000 11761 +quotabl 0 1 6.957497 0.000000 11762 +wolfgang 0 1 6.957497 0.000000 11763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..64a437fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +construct 0 139 1.945910 0.000000 82 +place 0 106 2.197225 0.000000 124 +sinc 0 90 2.397895 0.000000 159 +prof 0 64 2.772589 0.000000 273 +electr 0 38 3.295837 0.000000 461 +alwai 0 24 3.761200 0.000000 691 +came 0 13 4.382027 0.000000 1197 +kenneth 0 12 4.465908 0.000000 1265 +meng 0 12 4.465908 0.000000 1214 +road 0 11 4.553877 0.000000 1374 +success 0 10 4.653960 0.000000 1390 +zabih 0 6 5.164786 0.000000 2138 +sunlab 0 2 6.263398 0.000000 5222 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..c77ce05c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +high 0 130 2.079442 0.000000 101 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +pictur 0 89 2.397895 0.000000 160 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +commun 0 95 2.397895 0.000000 157 +start 0 83 2.484907 0.000000 173 +school 0 84 2.484907 0.000000 188 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +june 0 79 2.564949 0.000000 214 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +visit 0 63 2.772589 0.000000 288 +plan 0 65 2.772589 0.000000 272 +written 0 63 2.772589 0.000000 278 +dept 0 64 2.772589 0.000000 291 +best 0 59 2.833213 0.000000 299 +summer 0 56 2.890372 0.000000 311 +direct 0 57 2.890372 0.000000 316 +point 0 58 2.890372 0.000000 319 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +talk 0 53 2.944439 0.000000 336 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +week 0 52 2.995732 0.000000 343 +friend 0 48 3.044522 0.000000 376 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +better 0 45 3.135494 0.000000 401 +even 0 45 3.135494 0.000000 393 +around 0 43 3.178054 0.000000 415 +music 0 42 3.218876 0.000000 436 +small 0 39 3.258097 0.000000 447 +probabl 0 40 3.258097 0.000000 455 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +theoret 0 39 3.258097 0.000000 446 +live 0 40 3.258097 0.000000 451 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +tree 0 36 3.367296 0.000000 492 +especi 0 36 3.367296 0.000000 496 +winter 0 36 3.367296 0.000000 500 +next 0 34 3.401197 0.000000 517 +random 0 34 3.401197 0.000000 511 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +kind 0 32 3.465736 0.000000 541 +power 0 30 3.555348 0.000000 573 +actual 0 28 3.610918 0.000000 604 +quit 0 27 3.637586 0.000000 633 +great 0 27 3.637586 0.000000 626 +rather 0 26 3.688879 0.000000 642 +never 0 25 3.737670 0.000000 671 +spent 0 25 3.737670 0.000000 676 +magazin 0 24 3.761200 0.000000 704 +alwai 0 24 3.761200 0.000000 691 +pattern 0 24 3.761200 0.000000 689 +sometim 0 24 3.761200 0.000000 696 +togeth 0 23 3.806662 0.000000 714 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +inth 0 22 3.850148 0.000000 741 +instead 0 22 3.850148 0.000000 756 +leav 0 21 3.912023 0.000000 772 +half 0 21 3.912023 0.000000 776 +busi 0 21 3.912023 0.000000 784 +path 0 21 3.912023 0.000000 778 +hous 0 21 3.912023 0.000000 801 +tell 0 21 3.912023 0.000000 777 +theorem 0 21 3.912023 0.000000 786 +nice 0 20 3.951244 0.000000 809 +left 0 19 4.007333 0.000000 851 +mostli 0 19 4.007333 0.000000 869 +seem 1 18 4.060443 4.060443 899 +figur 0 18 4.060443 0.000000 903 +listen 0 18 4.060443 0.000000 907 +coupl 0 17 4.110874 0.000000 939 +stop 0 17 4.110874 0.000000 942 +sign 0 16 4.174387 0.000000 970 +modern 0 16 4.174387 0.000000 966 +brown 0 16 4.174387 0.000000 977 +side 0 15 4.248495 0.000000 1022 +score 0 15 4.248495 0.000000 1017 +english 0 15 4.248495 0.000000 1033 +indic 0 15 4.248495 0.000000 1013 +near 0 14 4.317488 0.000000 1091 +warn 0 14 4.317488 0.000000 1068 +stori 0 14 4.317488 0.000000 1087 +sai 0 13 4.382027 0.000000 1175 +someon 0 13 4.382027 0.000000 1128 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +everyth 0 13 4.382027 0.000000 1169 +nick 0 13 4.382027 0.000000 1180 +insid 0 12 4.465908 0.000000 1262 +walk 0 12 4.465908 0.000000 1281 +tune 0 12 4.465908 0.000000 1227 +went 0 12 4.465908 0.000000 1279 +daniel 0 12 4.465908 0.000000 1233 +neat 0 12 4.465908 0.000000 1263 +america 0 11 4.553877 0.000000 1370 +label 0 10 4.653960 0.000000 1423 +mountain 0 10 4.653960 0.000000 1456 +traffic 0 10 4.653960 0.000000 1421 +hello 0 10 4.653960 0.000000 1407 +paragraph 0 10 4.653960 0.000000 1449 +awai 0 10 4.653960 0.000000 1447 +clear 0 9 4.753590 0.000000 1488 +sound 0 9 4.753590 0.000000 1605 +distanc 0 9 4.753590 0.000000 1500 +ring 0 8 4.875197 0.000000 1684 +realiz 0 8 4.875197 0.000000 1739 +perhap 0 8 4.875197 0.000000 1693 +contrast 0 8 4.875197 0.000000 1637 +gold 0 8 4.875197 0.000000 1745 +switch 0 8 4.875197 0.000000 1718 +manufactur 0 8 4.875197 0.000000 1634 +japan 0 8 4.875197 0.000000 1762 +ideal 0 8 4.875197 0.000000 1630 +hear 0 7 5.010635 0.000000 1940 +throughout 0 7 5.010635 0.000000 1871 +corner 0 7 5.010635 0.000000 1909 +smile 0 7 5.010635 0.000000 1807 +golden 0 7 5.010635 0.000000 1962 +centuri 0 7 5.010635 0.000000 1935 +intellectu 0 7 5.010635 0.000000 1847 +occasion 0 7 5.010635 0.000000 1905 +pronounc 0 7 5.010635 0.000000 1918 +largest 0 7 5.010635 0.000000 1858 +happen 0 7 5.010635 0.000000 1790 +snow 0 6 5.164786 0.000000 2031 +rain 0 6 5.164786 0.000000 2137 +strang 0 6 5.164786 0.000000 2064 +mother 0 6 5.164786 0.000000 2083 +seen 0 6 5.164786 0.000000 2202 +ifyou 0 6 5.164786 0.000000 1992 +polit 0 6 5.164786 0.000000 2115 +hair 0 5 5.347108 0.000000 2446 +fairli 0 5 5.347108 0.000000 2322 +respond 0 5 5.347108 0.000000 2354 +door 0 5 5.347108 0.000000 2291 +own 0 5 5.347108 0.000000 2531 +isth 0 5 5.347108 0.000000 2532 +sing 0 5 5.347108 0.000000 2499 +wear 0 4 5.568345 0.000000 2785 +somewhat 0 4 5.568345 0.000000 2659 +somehow 0 4 5.568345 0.000000 2974 +hell 0 4 5.568345 0.000000 2885 +glanc 0 4 5.568345 0.000000 2652 +tick 0 4 5.568345 0.000000 2975 +heard 0 4 5.568345 0.000000 2895 +paus 0 4 5.568345 0.000000 2965 +dark 0 4 5.568345 0.000000 2910 +usedto 0 4 5.568345 0.000000 2643 +gotten 0 4 5.568345 0.000000 2628 +hire 0 4 5.568345 0.000000 2976 +couldn 0 4 5.568345 0.000000 2977 +glass 0 3 5.857933 0.000000 3759 +vagu 0 3 5.857933 0.000000 3393 +surround 0 3 5.857933 0.000000 3492 +stone 0 3 5.857933 0.000000 3674 +scatter 0 3 5.857933 0.000000 3351 +worri 0 3 5.857933 0.000000 3130 +thin 0 3 5.857933 0.000000 3488 +bright 0 3 5.857933 0.000000 3596 +twentieth 0 3 5.857933 0.000000 3760 +wave 0 3 5.857933 0.000000 3518 +europ 0 3 5.857933 0.000000 3761 +child 0 3 5.857933 0.000000 3542 +blame 0 3 5.857933 0.000000 3636 +dread 0 3 5.857933 0.000000 3630 +wise 0 3 5.857933 0.000000 3631 +pyramid 0 3 5.857933 0.000000 3358 +maker 0 3 5.857933 0.000000 3164 +obviou 0 2 6.263398 0.000000 5367 +forest 0 2 6.263398 0.000000 5368 +shack 0 2 6.263398 0.000000 5369 +withno 0 2 6.263398 0.000000 5370 +pile 0 2 6.263398 0.000000 5371 +hum 0 2 6.263398 0.000000 4935 +purpl 0 2 6.263398 0.000000 5372 +silk 0 2 6.263398 0.000000 5373 +silver 0 2 6.263398 0.000000 5374 +ocean 0 2 6.263398 0.000000 5375 +andlook 0 2 6.263398 0.000000 4561 +altogeth 0 2 6.263398 0.000000 4751 +nowadai 0 2 6.263398 0.000000 5376 +pointcast 0 2 6.263398 0.000000 5377 +inner 0 2 6.263398 0.000000 4551 +furnitur 0 2 6.263398 0.000000 5016 +haveth 0 2 6.263398 0.000000 5378 +hani 0 2 6.263398 0.000000 5140 +neededto 0 2 6.263398 0.000000 5379 +agood 0 2 6.263398 0.000000 5380 +presum 0 1 6.957497 0.000000 11764 +heha 0 1 6.957497 0.000000 11765 +beaver 0 1 6.957497 0.000000 11766 +perri 0 1 6.957497 0.000000 11767 +clearinglook 0 1 6.957497 0.000000 11768 +thanyou 0 1 6.957497 0.000000 11769 +onal 0 1 6.957497 0.000000 11770 +theweath 0 1 6.957497 0.000000 11771 +overcast 0 1 6.957497 0.000000 11772 +ifit 0 1 6.957497 0.000000 11773 +quiteclear 0 1 6.957497 0.000000 11774 +bird 0 1 6.957497 0.000000 11775 +chirp 0 1 6.957497 0.000000 11776 +theymai 0 1 6.957497 0.000000 11777 +louder 0 1 6.957497 0.000000 11778 +nearbywaterfal 0 1 6.957497 0.000000 11779 +gotta 0 1 6.957497 0.000000 11780 +apath 0 1 6.957497 0.000000 11781 +asign 0 1 6.957497 0.000000 11782 +hillschool 0 1 6.957497 0.000000 11783 +wormhol 0 1 6.957497 0.000000 11784 +nearbyhous 0 1 6.957497 0.000000 11785 +clearinginsid 0 1 6.957497 0.000000 11786 +offand 0 1 6.957497 0.000000 11787 +theclear 0 1 6.957497 0.000000 11788 +rhyme 0 1 6.957497 0.000000 11789 +reasonto 0 1 6.957497 0.000000 11790 +sortsof 0 1 6.957497 0.000000 11791 +betteridea 0 1 6.957497 0.000000 11792 +itseem 0 1 6.957497 0.000000 11793 +importantth 0 1 6.957497 0.000000 11794 +clearingh 0 1 6.957497 0.000000 11795 +oftendescrib 0 1 6.957497 0.000000 11796 +mostdistinct 0 1 6.957497 0.000000 11797 +quitelong 0 1 6.957497 0.000000 11798 +elfin 0 1 6.957497 0.000000 11799 +peoplebefor 0 1 6.957497 0.000000 11800 +theresoon 0 1 6.957497 0.000000 11801 +startstel 0 1 6.957497 0.000000 11802 +whynichola 0 1 6.957497 0.000000 11803 +negropont 0 1 6.957497 0.000000 11804 +moron 0 1 6.957497 0.000000 11805 +thenh 0 1 6.957497 0.000000 11806 +obscur 0 1 6.957497 0.000000 11807 +hetend 0 1 6.957497 0.000000 11808 +appearanceinstead 0 1 6.957497 0.000000 11809 +turquois 0 1 6.957497 0.000000 11810 +linen 0 1 6.957497 0.000000 11811 +imageof 0 1 6.957497 0.000000 11812 +fromhim 0 1 6.957497 0.000000 11813 +pewter 0 1 6.957497 0.000000 11814 +pentacl 0 1 6.957497 0.000000 11815 +neck 0 1 6.957497 0.000000 11816 +hippi 0 1 6.957497 0.000000 11817 +asclass 0 1 6.957497 0.000000 11818 +clearingdan 0 1 6.957497 0.000000 11819 +briani 0 1 6.957497 0.000000 11820 +anundergrad 0 1 6.957497 0.000000 11821 +newsprovid 0 1 6.957497 0.000000 11822 +ancamosoiu 0 1 6.957497 0.000000 11823 +schwa 0 1 6.957497 0.000000 11824 +backwhen 0 1 6.957497 0.000000 11825 +twoand 0 1 6.957497 0.000000 11826 +wegradu 0 1 6.957497 0.000000 11827 +shejust 0 1 6.957497 0.000000 11828 +onewav 0 1 6.957497 0.000000 11829 +issomeon 0 1 6.957497 0.000000 11830 +severalmonth 0 1 6.957497 0.000000 11831 +becamemuch 0 1 6.957497 0.000000 11832 +eedepart 0 1 6.957497 0.000000 11833 +multimediastud 0 1 6.957497 0.000000 11834 +friendof 0 1 6.957497 0.000000 11835 +finlei 0 1 6.957497 0.000000 11836 +notanymor 0 1 6.957497 0.000000 11837 +steelcas 0 1 6.957497 0.000000 11838 +dserver 0 1 6.957497 0.000000 11839 +kinda 0 1 6.957497 0.000000 11840 +cheesi 0 1 6.957497 0.000000 11841 +thebuild 0 1 6.957497 0.000000 11842 +ius 0 1 6.957497 0.000000 11843 +programcal 0 1 6.957497 0.000000 11844 +graduatedfrom 0 1 6.957497 0.000000 11845 +cuter 0 1 6.957497 0.000000 11846 +thanth 0 1 6.957497 0.000000 11847 +blurri 0 1 6.957497 0.000000 11848 +dreamer 0 1 6.957497 0.000000 11849 +ofdream 0 1 6.957497 0.000000 11850 +aphex 0 1 6.957497 0.000000 11851 +twindan 0 1 6.957497 0.000000 11852 +snowman 0 1 6.957497 0.000000 11853 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..6046a2c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +cornel 0 215 1.386294 0.000000 23 +take 0 97 2.302585 0.000000 134 +cool 0 49 3.044522 0.000000 374 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +soon 0 36 3.367296 0.000000 494 +lot 0 18 4.060443 0.000000 889 +rate 0 15 4.248495 0.000000 1037 +ashish 0 5 5.347108 0.000000 2473 +ultra 0 4 5.568345 0.000000 2889 +autobiographi 0 2 6.263398 0.000000 5070 +lookin 1 1 6.957497 6.957497 11854 +upkeep 0 1 6.957497 0.000000 11855 +setuup 0 1 6.957497 0.000000 11856 +doingajaymanishanujmom 0 1 6.957497 0.000000 11857 +daddepart 0 1 6.957497 0.000000 11858 +sciencesearch 0 1 6.957497 0.000000 11859 +netentertain 0 1 6.957497 0.000000 11860 +weeklycricket 0 1 6.957497 0.000000 11861 +soni 0 1 6.957497 0.000000 11862 +sonia 0 1 6.957497 0.000000 11863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..ea8c79c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +would 1 67 2.708050 2.708050 251 +browser 1 56 2.890372 2.890372 313 +better 1 45 3.135494 3.135494 401 +form 1 39 3.258097 3.258097 443 +scott 1 18 4.060443 4.060443 884 +capabl 1 15 4.248495 4.248495 1016 +dawson 1 2 6.263398 6.263398 4886 +padif 1 1 6.957497 6.957497 11864 +scottdawson 1 1 6.957497 6.957497 11865 +shomebas 1 1 6.957497 6.957497 11866 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..bb680f65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +cornel 1 215 1.386294 1.386294 23 +hall 0 146 1.945910 0.000000 65 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +center 0 88 2.397895 0.000000 158 +institut 0 84 2.484907 0.000000 187 +paul 0 38 3.295837 0.000000 471 +scheme 0 20 3.951244 0.000000 818 +ultim 0 17 4.110874 0.000000 943 +affili 0 13 4.382027 0.000000 1194 +rhode 0 9 4.753590 0.000000 1579 +hockei 0 8 4.875197 0.000000 1760 +atcornel 0 6 5.164786 0.000000 2131 +stodghil 0 4 5.568345 0.000000 2864 +bernoulli 0 4 5.568345 0.000000 2955 +pagepaul 0 3 5.857933 0.000000 3669 +stodghillstodghil 0 1 6.957497 0.000000 11867 +acri 0 1 6.957497 0.000000 11868 +projectinterest 0 1 6.957497 0.000000 11869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..e7274eb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +updat 0 191 1.609438 0.000000 41 +august 0 66 2.708050 0.000000 257 +move 0 47 3.091042 0.000000 382 +http 0 41 3.218876 0.000000 420 +scott 0 18 4.060443 0.000000 884 +former 0 17 4.110874 0.000000 956 +indiana 0 6 5.164786 0.000000 2057 +stoller 1 4 5.568345 5.568345 2866 +pagescott 0 4 5.568345 0.000000 2978 +hyplan 0 1 6.957497 0.000000 11870 +htmllast 0 1 6.957497 0.000000 11871 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..b163abfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +high 0 130 2.079442 0.000000 101 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +person 0 111 2.197225 0.000000 117 +theori 0 111 2.197225 0.000000 127 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +multimedia 0 68 2.708050 0.000000 258 +prof 0 64 2.772589 0.000000 273 +previou 0 62 2.772589 0.000000 290 +semest 0 58 2.890372 0.000000 312 +brian 0 38 3.295837 0.000000 466 +seminar 0 38 3.295837 0.000000 470 +option 0 30 3.555348 0.000000 575 +progress 0 28 3.610918 0.000000 598 +smith 0 20 3.951244 0.000000 820 +wonder 0 20 3.951244 0.000000 815 +ofcomput 0 10 4.653960 0.000000 1442 +earth 0 10 4.653960 0.000000 1463 +marri 0 7 5.010635 0.000000 1946 +price 0 6 5.164786 0.000000 1999 +czar 0 5 5.347108 0.000000 2503 +mehom 0 4 5.568345 0.000000 2979 +eduaddress 0 3 5.857933 0.000000 3762 +sugata 1 2 6.263398 6.263398 4976 +mukhopadhyai 0 2 6.263398 0.000000 4981 +ritu 0 1 6.957497 0.000000 11872 +mailsugata 0 1 6.957497 0.000000 11873 +hichori 0 1 6.957497 0.000000 11874 +estat 0 1 6.957497 0.000000 11875 +owego 0 1 6.957497 0.000000 11876 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..373ba025 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +need 0 98 2.302585 0.000000 135 +present 0 91 2.397895 0.000000 145 +resum 0 79 2.564949 0.000000 217 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +ithaca 0 65 2.772589 0.000000 294 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +physic 0 47 3.091042 0.000000 377 +paul 0 38 3.295837 0.000000 471 +anim 0 31 3.496508 0.000000 557 +background 0 25 3.737670 0.000000 664 +rout 0 21 3.912023 0.000000 793 +capabl 0 15 4.248495 0.000000 1016 +philosophi 0 13 4.382027 0.000000 1167 +clock 0 11 4.553877 0.000000 1320 +tick 0 4 5.568345 0.000000 2975 +carleton 0 2 6.263398 0.000000 5381 +sukhpal 1 1 6.957497 6.957497 11877 +sanghera 0 1 6.957497 0.000000 11878 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..8d232d9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +read 0 154 1.791759 0.000000 47 +first 0 140 1.945910 0.000000 71 +high 0 130 2.079442 0.000000 101 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +mani 0 92 2.397895 0.000000 150 +want 0 79 2.564949 0.000000 199 +orient 0 80 2.564949 0.000000 205 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +summer 0 56 2.890372 0.000000 311 +space 0 57 2.890372 0.000000 310 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +run 0 51 2.995732 0.000000 347 +understand 0 47 3.091042 0.000000 384 +video 0 44 3.135494 0.000000 405 +show 0 43 3.178054 0.000000 417 +long 0 43 3.178054 0.000000 413 +compani 0 41 3.218876 0.000000 423 +realli 0 40 3.258097 0.000000 444 +microsoft 0 38 3.295837 0.000000 468 +industri 0 38 3.295837 0.000000 464 +feel 0 37 3.332205 0.000000 483 +hand 0 37 3.332205 0.000000 475 +post 0 35 3.401197 0.000000 505 +print 0 34 3.401197 0.000000 503 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +kind 0 32 3.465736 0.000000 541 +progress 0 28 3.610918 0.000000 598 +enjoi 0 26 3.688879 0.000000 660 +session 0 26 3.688879 0.000000 643 +notic 0 25 3.737670 0.000000 675 +never 0 25 3.737670 0.000000 671 +toward 0 25 3.737670 0.000000 668 +seri 0 24 3.761200 0.000000 708 +recommend 0 22 3.850148 0.000000 737 +corpor 0 21 3.912023 0.000000 802 +stand 0 18 4.060443 0.000000 891 +weekli 0 17 4.110874 0.000000 919 +whether 0 17 4.110874 0.000000 918 +attempt 0 17 4.110874 0.000000 917 +sign 0 16 4.174387 0.000000 970 +anywai 0 15 4.248495 0.000000 1047 +comic 0 14 4.317488 0.000000 1103 +social 0 13 4.382027 0.000000 1123 +everyon 0 13 4.382027 0.000000 1148 +regularli 0 11 4.553877 0.000000 1338 +fill 0 11 4.553877 0.000000 1349 +strongli 0 10 4.653960 0.000000 1406 +didn 0 9 4.753590 0.000000 1563 +parti 0 8 4.875197 0.000000 1676 +empir 0 8 4.875197 0.000000 1722 +illustr 0 8 4.875197 0.000000 1679 +told 0 8 4.875197 0.000000 1658 +strip 0 6 5.164786 0.000000 2203 +tri 0 6 5.164786 0.000000 2166 +viewpoint 0 6 5.164786 0.000000 2116 +gate 0 6 5.164786 0.000000 2182 +begun 0 5 5.347108 0.000000 2386 +suffer 0 5 5.347108 0.000000 2268 +fit 0 5 5.347108 0.000000 2285 +episod 0 4 5.568345 0.000000 2747 +lord 0 4 5.568345 0.000000 2906 +employe 0 4 5.568345 0.000000 2717 +drew 0 4 5.568345 0.000000 2980 +theintern 0 4 5.568345 0.000000 2981 +newslett 0 4 5.568345 0.000000 2873 +sumedh 0 3 5.857933 0.000000 3101 +thesumm 0 3 5.857933 0.000000 3763 +redmond 0 3 5.857933 0.000000 3568 +internship 0 3 5.857933 0.000000 3764 +flame 0 3 5.857933 0.000000 3696 +galact 0 2 6.263398 0.000000 5219 +eduthi 0 2 6.263398 0.000000 5382 +alia 0 2 6.263398 0.000000 5383 +arriv 0 2 6.263398 0.000000 4132 +persuad 0 2 6.263398 0.000000 5384 +declin 0 2 6.263398 0.000000 5385 +portrai 0 2 6.263398 0.000000 5386 +bitter 0 2 6.263398 0.000000 5387 +rebel 0 2 6.263398 0.000000 5388 +imperi 0 2 6.263398 0.000000 5389 +gater 1 1 6.957497 6.957497 11879 +bilth 0 1 6.957497 0.000000 11880 +empirewritten 0 1 6.957497 0.000000 11881 +kanetkaremail 0 1 6.957497 0.000000 11882 +kanetkar 0 1 6.957497 0.000000 11883 +atmicrosoft 0 1 6.957497 0.000000 11884 +artwork 0 1 6.957497 0.000000 11885 +leak 0 1 6.957497 0.000000 11886 +theful 0 1 6.957497 0.000000 11887 +stripi 0 1 6.957497 0.000000 11888 +themicrosoft 0 1 6.957497 0.000000 11889 +perceiv 0 1 6.957497 0.000000 11890 +problemand 0 1 6.957497 0.000000 11891 +evilempir 0 1 6.957497 0.000000 11892 +comicstrip 0 1 6.957497 0.000000 11893 +theoutsid 0 1 6.957497 0.000000 11894 +eitherbil 0 1 6.957497 0.000000 11895 +heck 0 1 6.957497 0.000000 11896 +summersof 0 1 6.957497 0.000000 11897 +anyoneinterest 0 1 6.957497 0.000000 11898 +thateveri 0 1 6.957497 0.000000 11899 +theyshow 0 1 6.957497 0.000000 11900 +trivia 0 1 6.957497 0.000000 11901 +makey 0 1 6.957497 0.000000 11902 +disclosur 0 1 6.957497 0.000000 11903 +agreeement 0 1 6.957497 0.000000 11904 +theymad 0 1 6.957497 0.000000 11905 +theirheart 0 1 6.957497 0.000000 11906 +pledg 0 1 6.957497 0.000000 11907 +alleig 0 1 6.957497 0.000000 11908 +everydesk 0 1 6.957497 0.000000 11909 +roosterepisod 0 1 6.957497 0.000000 11910 +threatepisod 0 1 6.957497 0.000000 11911 +insigniaepisod 0 1 6.957497 0.000000 11912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..969dff3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +address 0 170 1.791759 0.000000 62 +algorithm 0 162 1.791759 0.000000 57 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +document 1 121 2.079442 2.079442 89 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +structur 0 106 2.197225 0.000000 105 +version 0 113 2.197225 0.000000 122 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +proceed 0 93 2.397895 0.000000 152 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +issu 0 78 2.564949 0.000000 211 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +logic 0 71 2.639057 0.000000 230 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +workshop 0 71 2.639057 0.000000 239 +goal 0 66 2.708050 0.000000 250 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +august 0 66 2.708050 0.000000 257 +evalu 0 64 2.772589 0.000000 266 +previou 0 62 2.772589 0.000000 290 +descript 0 64 2.772589 0.000000 271 +experi 0 64 2.772589 0.000000 283 +type 0 61 2.833213 0.000000 296 +best 0 59 2.833213 0.000000 299 +summer 0 56 2.890372 0.000000 311 +index 0 56 2.890372 0.000000 309 +space 0 57 2.890372 0.000000 310 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +principl 0 48 3.044522 0.000000 357 +electron 0 47 3.091042 0.000000 379 +term 0 43 3.178054 0.000000 411 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +theoret 0 39 3.258097 0.000000 446 +author 0 39 3.258097 0.000000 450 +close 0 38 3.295837 0.000000 465 +correct 0 38 3.295837 0.000000 462 +return 0 34 3.401197 0.000000 502 +given 0 32 3.465736 0.000000 538 +chapter 0 32 3.465736 0.000000 536 +abl 0 30 3.555348 0.000000 566 +compon 0 30 3.555348 0.000000 570 +limit 0 29 3.583519 0.000000 585 +retriev 0 27 3.637586 0.000000 621 +relev 0 26 3.688879 0.000000 637 +enabl 0 26 3.688879 0.000000 655 +primari 0 25 3.737670 0.000000 669 +task 0 25 3.737670 0.000000 678 +toward 0 25 3.737670 0.000000 668 +magazin 0 24 3.761200 0.000000 704 +handl 0 24 3.761200 0.000000 685 +seri 0 24 3.761200 0.000000 708 +brows 0 23 3.806662 0.000000 726 +input 0 23 3.806662 0.000000 727 +head 0 23 3.806662 0.000000 732 +recognit 0 23 3.806662 0.000000 723 +identifi 0 22 3.850148 0.000000 760 +hierarchi 0 22 3.850148 0.000000 744 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +flexibl 0 21 3.912023 0.000000 792 +output 0 21 3.912023 0.000000 788 +busi 0 21 3.912023 0.000000 784 +geometr 0 19 4.007333 0.000000 852 +boston 0 19 4.007333 0.000000 862 +segment 0 17 4.110874 0.000000 931 +stop 0 17 4.110874 0.000000 942 +adam 0 17 4.110874 0.000000 934 +white 0 17 4.110874 0.000000 951 +letter 0 16 4.174387 0.000000 981 +piec 0 15 4.248495 0.000000 1020 +hierarch 0 15 4.248495 0.000000 1018 +near 0 14 4.317488 0.000000 1091 +bodi 0 13 4.382027 0.000000 1178 +johnson 0 13 4.382027 0.000000 1162 +captur 0 12 4.465908 0.000000 1232 +scan 0 12 4.465908 0.000000 1243 +bruce 0 12 4.465908 0.000000 1226 +arbitrari 0 11 4.553877 0.000000 1359 +paragraph 0 10 4.653960 0.000000 1449 +discov 0 9 4.753590 0.000000 1562 +donald 0 9 4.753590 0.000000 1510 +classif 0 9 4.753590 0.000000 1586 +rais 0 8 4.875197 0.000000 1711 +tobe 0 6 5.164786 0.000000 1995 +textual 0 6 5.164786 0.000000 1979 +superhighwai 0 4 5.568345 0.000000 2943 +similarli 0 3 5.857933 0.000000 3241 +categor 0 3 5.857933 0.000000 3765 +daniela 0 3 5.857933 0.000000 3611 +recipi 0 3 5.857933 0.000000 3627 +andclassif 0 2 6.263398 0.000000 5390 +cue 0 2 6.263398 0.000000 5391 +bharat 0 2 6.263398 0.000000 5107 +forthcom 0 2 6.263398 0.000000 5392 +dartmouth 0 2 6.263398 0.000000 5393 +montral 0 2 6.263398 0.000000 5394 +kristen 0 1 6.957497 0.000000 11913 +divid 0 1 6.957497 0.000000 11914 +accessresearch 0 1 6.957497 0.000000 11915 +mylong 0 1 6.957497 0.000000 11916 +forsophist 0 1 6.957497 0.000000 11917 +manipulationtool 0 1 6.957497 0.000000 11918 +logicalstructur 0 1 6.957497 0.000000 11919 +documentrepresent 0 1 6.957497 0.000000 11920 +hierarchyof 0 1 6.957497 0.000000 11921 +postscriptvers 0 1 6.957497 0.000000 11922 +croft 0 1 6.957497 0.000000 11923 +determininglog 0 1 6.957497 0.000000 11924 +soin 0 1 6.957497 0.000000 11925 +ofmultipl 0 1 6.957497 0.000000 11926 +browsingco 0 1 6.957497 0.000000 11927 +nabil 0 1 6.957497 0.000000 11928 +bhargava 0 1 6.957497 0.000000 11929 +yelena 0 1 6.957497 0.000000 11930 +yesha 0 1 6.957497 0.000000 11931 +seeheim 0 1 6.957497 0.000000 11932 +podp 0 1 6.957497 0.000000 11933 +taxonomi 0 1 6.957497 0.000000 11934 +structureselectron 0 1 6.957497 0.000000 11935 +dag 0 1 6.957497 0.000000 11936 +scholaraward 0 1 6.957497 0.000000 11937 +wordless 0 1 6.957497 0.000000 11938 +analysisand 0 1 6.957497 0.000000 11939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..2320d614 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +model 0 145 1.945910 0.000000 69 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +structur 0 106 2.197225 0.000000 105 +site 0 106 2.197225 0.000000 119 +manag 0 114 2.197225 0.000000 125 +techniqu 0 99 2.302585 0.000000 138 +graphic 0 90 2.397895 0.000000 147 +optim 0 79 2.564949 0.000000 197 +resum 0 79 2.564949 0.000000 217 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +simul 0 66 2.708050 0.000000 255 +multimedia 0 68 2.708050 0.000000 258 +polici 0 64 2.772589 0.000000 279 +browser 0 56 2.890372 0.000000 313 +summer 0 56 2.890372 0.000000 311 +case 0 51 2.995732 0.000000 351 +probabl 0 40 3.258097 0.000000 455 +prototyp 0 38 3.295837 0.000000 463 +statist 0 35 3.401197 0.000000 521 +independ 0 32 3.465736 0.000000 548 +displai 0 23 3.806662 0.000000 712 +applet 0 20 3.951244 0.000000 827 +telecommun 0 9 4.753590 0.000000 1565 +polygon 0 8 4.875197 0.000000 1723 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +sector 0 3 5.857933 0.000000 3766 +suppot 0 2 6.263398 0.000000 5243 +stochast 0 2 6.263398 0.000000 4832 +masafumi 0 1 6.957497 0.000000 11940 +suzukither 0 1 6.957497 0.000000 11941 +suzukisuzuki 0 1 6.957497 0.000000 11942 +educlassesfal 0 1 6.957497 0.000000 11943 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..d2515102 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +process 0 142 1.945910 0.000000 72 +number 0 130 2.079442 0.000000 97 +multimedia 0 68 2.708050 0.000000 258 +littl 0 39 3.258097 0.000000 454 +movi 0 40 3.258097 0.000000 459 +spend 0 19 4.007333 0.000000 850 +jonathan 0 13 4.382027 0.000000 1174 +edui 0 13 4.382027 0.000000 1193 +departmentof 0 9 4.753590 0.000000 1539 +humor 0 5 5.347108 0.000000 2533 +swartz 1 4 5.568345 5.568345 2878 +heredevelopingrivl 0 1 6.957497 0.000000 11944 +myaddress 0 1 6.957497 0.000000 11945 +brighten 0 1 6.957497 0.000000 11946 +dayjon 0 1 6.957497 0.000000 11947 +connectioncool 0 1 6.957497 0.000000 11948 +siteslast 0 1 6.957497 0.000000 11949 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..74e3aa21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +class 1 199 1.609438 1.609438 37 +person 0 111 2.197225 0.000000 117 +send 0 114 2.197225 0.000000 109 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +pagewelcom 0 11 4.553877 0.000000 1344 +studentcomput 0 7 5.010635 0.000000 1963 +departmentcornel 0 5 5.347108 0.000000 2275 +srivastava 0 2 6.263398 0.000000 5395 +sunil 1 1 6.957497 6.957497 11950 +srivastavamast 0 1 6.957497 0.000000 11951 +linkscom 0 1 6.957497 0.000000 11952 +sxsriva 0 1 6.957497 0.000000 11953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..b4467d5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +find 0 111 2.197225 0.000000 111 +version 0 113 2.197225 0.000000 122 +check 0 115 2.197225 0.000000 118 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +graphic 0 90 2.397895 0.000000 147 +octob 0 89 2.397895 0.000000 156 +build 0 85 2.484907 0.000000 184 +second 0 81 2.484907 0.000000 166 +internet 0 83 2.484907 0.000000 186 +thing 0 84 2.484907 0.000000 189 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +involv 0 71 2.639057 0.000000 227 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +practic 0 70 2.708050 0.000000 246 +import 0 65 2.772589 0.000000 282 +septemb 0 65 2.772589 0.000000 274 +street 0 63 2.772589 0.000000 293 +colleg 0 61 2.833213 0.000000 300 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +unix 0 58 2.890372 0.000000 308 +found 0 53 2.944439 0.000000 337 +three 0 54 2.944439 0.000000 330 +run 0 51 2.995732 0.000000 347 +life 0 50 3.044522 0.000000 375 +effect 0 46 3.091042 0.000000 385 +made 0 44 3.135494 0.000000 398 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +music 0 42 3.218876 0.000000 436 +editor 0 41 3.218876 0.000000 433 +live 0 40 3.258097 0.000000 451 +mean 0 37 3.332205 0.000000 477 +field 0 37 3.332205 0.000000 482 +survei 0 35 3.401197 0.000000 513 +human 0 32 3.465736 0.000000 546 +independ 0 32 3.465736 0.000000 548 +becom 0 28 3.610918 0.000000 603 +quit 0 27 3.637586 0.000000 633 +though 0 27 3.637586 0.000000 622 +linux 0 27 3.637586 0.000000 631 +rather 0 26 3.688879 0.000000 642 +higher 0 24 3.761200 0.000000 690 +other 0 24 3.761200 0.000000 697 +wish 0 24 3.761200 0.000000 692 +almost 0 22 3.850148 0.000000 742 +self 0 22 3.850148 0.000000 761 +thu 0 21 3.912023 0.000000 773 +love 0 21 3.912023 0.000000 804 +watch 0 21 3.912023 0.000000 789 +nice 0 20 3.951244 0.000000 809 +supervis 0 20 3.951244 0.000000 840 +ever 0 19 4.007333 0.000000 872 +four 0 18 4.060443 0.000000 905 +listen 0 18 4.060443 0.000000 907 +concentr 0 18 4.060443 0.000000 906 +steven 0 17 4.110874 0.000000 953 +taiwan 0 16 4.174387 0.000000 1006 +hobbi 0 16 4.174387 0.000000 1009 +anyth 0 16 4.174387 0.000000 998 +intel 0 16 4.174387 0.000000 1000 +later 0 15 4.248495 0.000000 1043 +enough 0 15 4.248495 0.000000 1040 +becam 0 14 4.317488 0.000000 1117 +stori 0 14 4.317488 0.000000 1087 +draw 0 14 4.317488 0.000000 1086 +happi 0 14 4.317488 0.000000 1079 +audio 0 14 4.317488 0.000000 1094 +everyth 0 13 4.382027 0.000000 1169 +resolut 0 13 4.382027 0.000000 1172 +huang 0 12 4.465908 0.000000 1202 +career 0 12 4.465908 0.000000 1287 +promot 0 12 4.465908 0.000000 1235 +bruce 0 12 4.465908 0.000000 1226 +land 0 12 4.465908 0.000000 1273 +island 0 11 4.553877 0.000000 1345 +night 0 11 4.553877 0.000000 1319 +host 0 11 4.553877 0.000000 1306 +literatur 0 11 4.553877 0.000000 1300 +broad 0 11 4.553877 0.000000 1302 +ofcomput 0 10 4.653960 0.000000 1442 +rich 0 10 4.653960 0.000000 1396 +seven 0 9 4.753590 0.000000 1561 +calvin 0 9 4.753590 0.000000 1518 +face 0 9 4.753590 0.000000 1501 +ideal 0 8 4.875197 0.000000 1630 +film 0 8 4.875197 0.000000 1761 +exactli 0 7 5.010635 0.000000 1817 +hunt 0 7 5.010635 0.000000 1798 +brought 0 7 5.010635 0.000000 1925 +migrat 0 7 5.010635 0.000000 1851 +discoveri 0 7 5.010635 0.000000 1915 +channel 0 7 5.010635 0.000000 1836 +truth 0 6 5.164786 0.000000 2179 +parent 0 6 5.164786 0.000000 2204 +south 0 6 5.164786 0.000000 2167 +lucki 0 6 5.164786 0.000000 2163 +chat 0 6 5.164786 0.000000 2128 +railroad 0 6 5.164786 0.000000 2161 +myresum 0 6 5.164786 0.000000 2199 +freeli 0 6 5.164786 0.000000 2014 +ship 0 5 5.347108 0.000000 2534 +tang 0 5 5.347108 0.000000 2409 +knew 0 5 5.347108 0.000000 2445 +ofinterest 0 5 5.347108 0.000000 2323 +champion 0 4 5.568345 0.000000 2982 +proud 0 4 5.568345 0.000000 2918 +assur 0 4 5.568345 0.000000 2722 +somewhat 0 4 5.568345 0.000000 2659 +fulfil 0 4 5.568345 0.000000 2932 +hobb 0 4 5.568345 0.000000 2893 +children 0 3 5.857933 0.000000 3767 +talent 0 3 5.857933 0.000000 3768 +peac 0 3 5.857933 0.000000 3769 +pai 0 3 5.857933 0.000000 3672 +asid 0 3 5.857933 0.000000 3770 +lego 0 3 5.857933 0.000000 3188 +dick 0 2 6.263398 0.000000 5396 +tender 0 2 6.263398 0.000000 5397 +tropic 0 2 6.263398 0.000000 5398 +fifteen 0 2 6.263398 0.000000 5399 +marvel 0 2 6.263398 0.000000 5400 +defeat 0 2 6.263398 0.000000 5401 +reward 0 2 6.263398 0.000000 5402 +andwork 0 2 6.263398 0.000000 5403 +relai 0 2 6.263398 0.000000 5404 +weapon 0 2 6.263398 0.000000 5115 +spectrum 0 2 6.263398 0.000000 5405 +blobbi 0 2 6.263398 0.000000 4820 +huangszu 0 1 6.957497 0.000000 11954 +defend 0 1 6.957497 0.000000 11955 +justic 0 1 6.957497 0.000000 11956 +nevermind 0 1 6.957497 0.000000 11957 +iarriv 0 1 6.957497 0.000000 11958 +soundslik 0 1 6.957497 0.000000 11959 +mobi 0 1 6.957497 0.000000 11960 +nointent 0 1 6.957497 0.000000 11961 +whale 0 1 6.957497 0.000000 11962 +digress 0 1 6.957497 0.000000 11963 +wholefamili 0 1 6.957497 0.000000 11964 +philippin 0 1 6.957497 0.000000 11965 +aroundsix 0 1 6.957497 0.000000 11966 +fluentli 0 1 6.957497 0.000000 11967 +bilingu 0 1 6.957497 0.000000 11968 +thepoetri 0 1 6.957497 0.000000 11969 +dynasti 0 1 6.957497 0.000000 11970 +arabian 0 1 6.957497 0.000000 11971 +doveright 0 1 6.957497 0.000000 11972 +ienter 0 1 6.957497 0.000000 11973 +philippineswith 0 1 6.957497 0.000000 11974 +unabashedli 0 1 6.957497 0.000000 11975 +alsoin 0 1 6.957497 0.000000 11976 +whirlwind 0 1 6.957497 0.000000 11977 +awoman 0 1 6.957497 0.000000 11978 +effortlessli 0 1 6.957497 0.000000 11979 +eek 0 1 6.957497 0.000000 11980 +blunt 0 1 6.957497 0.000000 11981 +ienrol 0 1 6.957497 0.000000 11982 +segreg 0 1 6.957497 0.000000 11983 +everydaygeek 0 1 6.957497 0.000000 11984 +fromactu 0 1 6.957497 0.000000 11985 +happili 0 1 6.957497 0.000000 11986 +myspar 0 1 6.957497 0.000000 11987 +linuxnet 0 1 6.957497 0.000000 11988 +plastic 0 1 6.957497 0.000000 11989 +suspens 0 1 6.957497 0.000000 11990 +thriller 0 1 6.957497 0.000000 11991 +sting 0 1 6.957497 0.000000 11992 +sesam 0 1 6.957497 0.000000 11993 +offend 0 1 6.957497 0.000000 11994 +bysom 0 1 6.957497 0.000000 11995 +blatant 0 1 6.957497 0.000000 11996 +highlyinterest 0 1 6.957497 0.000000 11997 +compatiblecomput 0 1 6.957497 0.000000 11998 +metaballsund 0 1 6.957497 0.000000 11999 +techniquesin 0 1 6.957497 0.000000 12000 +andport 0 1 6.957497 0.000000 12001 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..2d5b37e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +technolog 1 131 2.079442 2.079442 102 +move 1 47 3.091042 3.091042 382 +hybrid 1 15 4.248495 4.248495 1057 +henzing 1 3 5.857933 5.857933 3713 +hytechhytech 1 1 6.957497 6.957497 12002 +toolw 1 1 6.957497 6.957497 12003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..3c40758d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +analysi 0 124 2.079442 0.000000 98 +advanc 0 99 2.302585 0.000000 130 +real 0 93 2.397895 0.000000 144 +control 0 82 2.484907 0.000000 164 +logic 0 71 2.639057 0.000000 230 +septemb 0 65 2.772589 0.000000 274 +linear 0 41 3.218876 0.000000 431 +formal 0 37 3.332205 0.000000 478 +concurr 0 34 3.401197 0.000000 501 +symbol 0 27 3.637586 0.000000 620 +universityithaca 0 24 3.761200 0.000000 710 +methodolog 0 23 3.806662 0.000000 733 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +hybrid 0 15 4.248495 0.000000 1057 +eduphon 0 15 4.248495 0.000000 1060 +transit 0 15 4.248495 0.000000 1046 +embed 0 14 4.317488 0.000000 1102 +automata 0 13 4.382027 0.000000 1135 +modul 0 10 4.653960 0.000000 1434 +departmentcornel 0 5 5.347108 0.000000 2275 +henzing 0 3 5.857933 0.000000 3713 +professorcomput 0 3 5.857933 0.000000 3714 +worldwid 0 3 5.857933 0.000000 3704 +checker 0 3 5.857933 0.000000 3644 +systemsr 0 2 6.263398 0.000000 4312 +bibtex 0 2 6.263398 0.000000 5406 +henzingerthoma 0 1 6.957497 0.000000 12004 +movedassist 0 1 6.957497 0.000000 12005 +researchform 0 1 6.957497 0.000000 12006 +researchat 0 1 6.957497 0.000000 12007 +cornelland 0 1 6.957497 0.000000 12008 +resumepublicationsreact 0 1 6.957497 0.000000 12009 +systemsclock 0 1 6.957497 0.000000 12010 +systemshybrid 0 1 6.957497 0.000000 12011 +systemsbibliographi 0 1 6.957497 0.000000 12012 +publicationstoolshytech 0 1 6.957497 0.000000 12013 +systemscoursesc 0 1 6.957497 0.000000 12014 +languagesconferenceshybrid 0 1 6.957497 0.000000 12015 +systemscav 0 1 6.957497 0.000000 12016 +verificationlast 0 1 6.957497 0.000000 12017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..13bca711 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +manag 0 114 2.197225 0.000000 125 +homepag 0 93 2.397895 0.000000 148 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +upson 0 71 2.639057 0.000000 218 +previou 0 62 2.772589 0.000000 290 +life 0 50 3.044522 0.000000 375 +eduoffic 0 33 3.433987 0.000000 531 +quot 0 29 3.583519 0.000000 582 +psycholog 0 15 4.248495 0.000000 1054 +horu 0 14 4.317488 0.000000 1116 +social 0 13 4.382027 0.000000 1123 +reness 0 11 4.553877 0.000000 1333 +hockei 0 8 4.875197 0.000000 1760 +atcornel 0 6 5.164786 0.000000 2131 +advis 0 6 5.164786 0.000000 2173 +hickei 0 4 5.568345 0.000000 2845 +hallphon 0 4 5.568345 0.000000 2900 +schneider 0 4 5.568345 0.000000 2868 +takako 1 3 5.857933 5.857933 3538 +backcountri 0 3 5.857933 0.000000 3686 +byrobbert 0 1 6.957497 0.000000 12018 +andfr 0 1 6.957497 0.000000 12019 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..d665555f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +environ 0 84 2.484907 0.000000 177 +editor 0 41 3.218876 0.000000 433 +transform 0 32 3.465736 0.000000 542 +attribut 0 14 4.317488 0.000000 1092 +eduresearch 0 6 5.164786 0.000000 2205 +increment 0 6 5.164786 0.000000 2206 +grammar 0 6 5.164786 0.000000 2058 +tim_teitelbaum 0 1 6.957497 0.000000 12020 +teitelbaumassoci 0 1 6.957497 0.000000 12021 +adavita 0 1 6.957497 0.000000 12022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..3d932856 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +version 0 113 2.197225 0.000000 122 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +commun 0 95 2.397895 0.000000 157 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +activ 0 84 2.484907 0.000000 182 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +control 0 82 2.484907 0.000000 164 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +messag 0 76 2.564949 0.000000 212 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +abstract 0 62 2.772589 0.000000 276 +evalu 0 64 2.772589 0.000000 266 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +automat 0 61 2.833213 0.000000 306 +juli 0 60 2.833213 0.000000 305 +sever 0 56 2.890372 0.000000 322 +think 0 57 2.890372 0.000000 314 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +talk 0 53 2.944439 0.000000 336 +run 0 51 2.995732 0.000000 347 +digit 0 52 2.995732 0.000000 348 +without 0 50 3.044522 0.000000 370 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +mechan 0 43 3.178054 0.000000 416 +offer 0 43 3.178054 0.000000 414 +annual 0 40 3.258097 0.000000 458 +slide 0 38 3.295837 0.000000 467 +workstat 0 37 3.332205 0.000000 479 +extend 0 32 3.465736 0.000000 539 +platform 0 29 3.583519 0.000000 591 +limit 0 29 3.583519 0.000000 585 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +proc 0 26 3.688879 0.000000 649 +berkelei 0 26 3.688879 0.000000 657 +supercomput 0 25 3.737670 0.000000 681 +toward 0 25 3.737670 0.000000 668 +fundament 0 25 3.737670 0.000000 661 +magazin 0 24 3.761200 0.000000 704 +lead 0 23 3.806662 0.000000 718 +thread 0 23 3.806662 0.000000 722 +fine 0 20 3.951244 0.000000 822 +department 0 20 3.951244 0.000000 839 +speed 0 18 4.060443 0.000000 911 +interconnect 0 17 4.110874 0.000000 937 +diego 0 16 4.174387 0.000000 992 +latenc 0 16 4.174387 0.000000 993 +fourth 0 16 4.174387 0.000000 999 +cambridg 0 16 4.174387 0.000000 1008 +month 0 15 4.248495 0.000000 1025 +micro 0 15 4.248495 0.000000 1031 +split 0 14 4.317488 0.000000 1078 +eicken 1 13 4.382027 4.382027 1134 +thorsten 0 13 4.382027 0.000000 1133 +conf 0 13 4.382027 0.000000 1181 +sigplan 0 13 4.382027 0.000000 1190 +guest 0 12 4.465908 0.000000 1220 +multithread 0 11 4.553877 0.000000 1315 +bandwidth 0 11 4.553877 0.000000 1365 +grain 0 10 4.653960 0.000000 1448 +werner 0 10 4.653960 0.000000 1385 +santa 0 10 4.653960 0.000000 1441 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +patterson 0 9 4.753590 0.000000 1554 +port 0 8 4.875197 0.000000 1766 +entri 0 8 4.875197 0.000000 1678 +gold 0 8 4.875197 0.000000 1745 +coast 0 8 4.875197 0.000000 1746 +vineet 0 8 4.875197 0.000000 1639 +vogel 0 8 4.875197 0.000000 1622 +andcomput 0 8 4.875197 0.000000 1623 +realist 0 8 4.875197 0.000000 1665 +goldstein 0 6 5.164786 0.000000 2168 +forum 0 6 5.164786 0.000000 2027 +fish 0 6 5.164786 0.000000 2207 +greec 0 6 5.164786 0.000000 2208 +culler 0 5 5.347108 0.000000 2381 +symp 0 5 5.347108 0.000000 2376 +buch 0 5 5.347108 0.000000 2272 +australia 0 5 5.347108 0.000000 2478 +water 0 5 5.347108 0.000000 2535 +plant 0 5 5.347108 0.000000 2497 +dataflow 0 5 5.347108 0.000000 2390 +karp 0 5 5.347108 0.000000 2284 +ifip 0 5 5.347108 0.000000 2459 +basu 0 4 5.568345 0.000000 2843 +hallphon 0 4 5.568345 0.000000 2900 +coursesc 0 4 5.568345 0.000000 2692 +tire 0 4 5.568345 0.000000 2799 +password 0 4 5.568345 0.000000 2594 +medium 0 4 5.568345 0.000000 2834 +schauser 0 3 5.857933 0.000000 3599 +interfacefor 0 3 5.857933 0.000000 3534 +frontier 0 3 5.857933 0.000000 3771 +anindya 0 3 5.857933 0.000000 3535 +avula 0 3 5.857933 0.000000 3600 +abridg 0 3 5.857933 0.000000 3772 +dusseau 0 3 5.857933 0.000000 3382 +yelick 0 3 5.857933 0.000000 3374 +crete 0 3 5.857933 0.000000 3773 +lan 0 2 6.263398 0.000000 4359 +includingth 0 2 6.263398 0.000000 4493 +pond 0 2 6.263398 0.000000 5127 +firewal 0 2 6.263398 0.000000 5407 +distributedcomput 0 2 6.263398 0.000000 5336 +communicationarchitectur 0 2 6.263398 0.000000 4859 +krishnamurthi 0 2 6.263398 0.000000 5408 +lumetta 0 2 6.263398 0.000000 5409 +dalli 0 2 6.263398 0.000000 4517 +logp 0 2 6.263398 0.000000 4227 +orlando 0 2 6.263398 0.000000 5410 +clara 0 2 6.263398 0.000000 4958 +barrera 0 2 6.263398 0.000000 4309 +departement 0 1 6.957497 0.000000 12023 +eickenassist 0 1 6.957497 0.000000 12024 +eduprojectsth 0 1 6.957497 0.000000 12025 +architectureprovid 0 1 6.957497 0.000000 12026 +latencyand 0 1 6.957497 0.000000 12027 +currentimplement 0 1 6.957497 0.000000 12028 +tonon 0 1 6.957497 0.000000 12029 +spmd 0 1 6.957497 0.000000 12030 +extensionto 0 1 6.957497 0.000000 12031 +newplatform 0 1 6.957497 0.000000 12032 +multprocessor 0 1 6.957497 0.000000 12033 +computerorgan 0 1 6.957497 0.000000 12034 +maynd 0 1 6.957497 0.000000 12035 +pagestv 0 1 6.957497 0.000000 12036 +macpppwhich 0 1 6.957497 0.000000 12037 +everhav 0 1 6.957497 0.000000 12038 +passwordssuddenli 0 1 6.957497 0.000000 12039 +installationinstruct 0 1 6.957497 0.000000 12040 +publicationsu 0 1 6.957497 0.000000 12041 +atmnetwork 0 1 6.957497 0.000000 12042 +controlledthread 0 1 6.957497 0.000000 12043 +spertu 0 1 6.957497 0.000000 12044 +modelof 0 1 6.957497 0.000000 12045 +sahai 0 1 6.957497 0.000000 12046 +santo 0 1 6.957497 0.000000 12047 +subramonian 0 1 6.957497 0.000000 12048 +dataflowmultiprocess 0 1 6.957497 0.000000 12049 +forintegr 0 1 6.957497 0.000000 12050 +forleni 0 1 6.957497 0.000000 12051 +minimalhardwar 0 1 6.957497 0.000000 12052 +wawrzynek 0 1 6.957497 0.000000 12053 +architecturesfor 0 1 6.957497 0.000000 12054 +saavedra 0 1 6.957497 0.000000 12055 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..5b63b592 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +contact 0 153 1.791759 0.000000 59 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +pleas 0 113 2.197225 0.000000 114 +real 0 93 2.397895 0.000000 144 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +appli 0 71 2.639057 0.000000 226 +date 0 51 2.995732 0.000000 344 +done 0 47 3.091042 0.000000 381 +often 0 31 3.496508 0.000000 551 +actual 0 28 3.610918 0.000000 604 +enjoi 0 26 3.688879 0.000000 660 +assum 0 19 4.007333 0.000000 845 +incomput 0 14 4.317488 0.000000 1096 +touch 0 12 4.465908 0.000000 1288 +moment 0 11 4.553877 0.000000 1379 +apart 0 7 5.010635 0.000000 1936 +somewhat 0 4 5.568345 0.000000 2659 +disclaim 0 4 5.568345 0.000000 2847 +erlingsson 1 2 6.263398 6.263398 4107 +lfar 0 2 6.263398 0.000000 4106 +pagelfar 0 1 6.957497 0.000000 12056 +specificationi 0 1 6.957497 0.000000 12057 +incongruousiceland 0 1 6.957497 0.000000 12058 +implementationbackgroundwher 0 1 6.957497 0.000000 12059 +activitieswhat 0 1 6.957497 0.000000 12060 +schedulewher 0 1 6.957497 0.000000 12061 +researchwhat 0 1 6.957497 0.000000 12062 +interestswhat 0 1 6.957497 0.000000 12063 +acquaintancesthos 0 1 6.957497 0.000000 12064 +infohow 0 1 6.957497 0.000000 12065 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..2a471f36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +center 0 88 2.397895 0.000000 158 +novemb 0 81 2.484907 0.000000 179 +resourc 0 81 2.484907 0.000000 172 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +run 0 51 2.995732 0.000000 347 +robert 0 30 3.555348 0.000000 567 +packag 0 28 3.610918 0.000000 614 +releas 0 28 3.610918 0.000000 616 +geometri 0 22 3.850148 0.000000 752 +geometr 0 19 4.007333 0.000000 852 +element 0 18 4.060443 0.000000 895 +finit 0 14 4.317488 0.000000 1106 +jonathan 0 13 4.382027 0.000000 1174 +mesh 0 11 4.553877 0.000000 1351 +stephen 0 11 4.553877 0.000000 1342 +solver 0 7 5.010635 0.000000 1911 +minnesota 0 5 5.347108 0.000000 2469 +websit 0 4 5.568345 0.000000 2726 +schneider 0 4 5.568345 0.000000 2868 +vavasi 0 3 5.857933 0.000000 3526 +threedimens 0 1 6.957497 0.000000 12066 +themesh 0 1 6.957497 0.000000 12067 +softwaredownload 0 1 6.957497 0.000000 12068 +andqmg 0 1 6.957497 0.000000 12069 +mcphedran 0 1 6.957497 0.000000 12070 +offinit 0 1 6.957497 0.000000 12071 +ofsoftwar 0 1 6.957497 0.000000 12072 +computationalgeometri 0 1 6.957497 0.000000 12073 +shewchuk 0 1 6.957497 0.000000 12074 +triangl 0 1 6.957497 0.000000 12075 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..93e1c107 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +object 0 138 1.945910 0.000000 79 +analysi 0 124 2.079442 0.000000 98 +document 0 121 2.079442 0.000000 89 +code 0 108 2.197225 0.000000 116 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +intern 0 108 2.197225 0.000000 128 +associ 0 93 2.397895 0.000000 151 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +level 0 87 2.484907 0.000000 180 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +optim 0 79 2.564949 0.000000 197 +line 0 75 2.639057 0.000000 231 +nation 0 74 2.639057 0.000000 240 +solv 0 73 2.639057 0.000000 234 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +ithaca 0 65 2.772589 0.000000 294 +laboratori 0 63 2.772589 0.000000 292 +complex 0 64 2.772589 0.000000 269 +creat 0 63 2.772589 0.000000 277 +unix 0 58 2.890372 0.000000 308 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +numer 0 49 3.044522 0.000000 369 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +map 0 39 3.258097 0.000000 452 +annual 0 40 3.258097 0.000000 458 +microsoft 0 38 3.295837 0.000000 468 +least 0 35 3.401197 0.000000 516 +domain 0 30 3.555348 0.000000 564 +depend 0 29 3.583519 0.000000 583 +packag 0 28 3.610918 0.000000 614 +releas 0 28 3.610918 0.000000 616 +bound 0 26 3.688879 0.000000 659 +valu 0 25 3.737670 0.000000 665 +aspect 0 25 3.737670 0.000000 663 +period 0 22 3.850148 0.000000 743 +divis 0 21 3.912023 0.000000 803 +grad 0 20 3.951244 0.000000 837 +geometr 0 19 4.007333 0.000000 852 +element 0 18 4.060443 0.000000 895 +scott 0 18 4.060443 0.000000 884 +matrix 0 17 4.110874 0.000000 933 +anonym 0 14 4.317488 0.000000 1100 +matlab 0 14 4.317488 0.000000 1081 +squar 0 14 4.317488 0.000000 1082 +finit 0 14 4.317488 0.000000 1106 +topolog 0 14 4.317488 0.000000 1089 +whose 0 13 4.382027 0.000000 1166 +forth 0 13 4.382027 0.000000 1186 +weight 0 12 4.465908 0.000000 1204 +mesh 0 11 4.553877 0.000000 1351 +stephen 0 11 4.553877 0.000000 1342 +faster 0 11 4.553877 0.000000 1323 +rhode 0 9 4.753590 0.000000 1579 +cross 0 8 4.875197 0.000000 1703 +boundari 0 7 5.010635 0.000000 1929 +aris 0 7 5.010635 0.000000 1924 +dimens 0 7 5.010635 0.000000 1930 +argonn 0 5 5.347108 0.000000 2461 +colleagu 0 5 5.347108 0.000000 2304 +dual 0 5 5.347108 0.000000 2522 +hole 0 5 5.347108 0.000000 2518 +compat 0 5 5.347108 0.000000 2485 +ratio 0 4 5.568345 0.000000 2942 +triangul 0 4 5.568345 0.000000 2903 +bldg 0 4 5.568345 0.000000 2983 +manuscript 0 4 5.568345 0.000000 2750 +orthogon 0 4 5.568345 0.000000 2832 +conform 0 4 5.568345 0.000000 2941 +vrml 0 4 5.568345 0.000000 2949 +vavasi 1 3 5.857933 5.857933 3526 +aren 0 3 5.857933 0.000000 3512 +trefethen 0 3 5.857933 0.000000 3528 +hough 0 3 5.857933 0.000000 3527 +delaunai 0 3 5.857933 0.000000 3619 +ellipt 0 3 5.857933 0.000000 3774 +cleaner 0 3 5.857933 0.000000 3775 +mitchel 0 2 6.263398 0.000000 4792 +acceler 0 2 6.263398 0.000000 5411 +driscol 0 2 6.263398 0.000000 4836 +polyhedr 0 2 6.263398 0.000000 5412 +andautomat 0 2 6.263398 0.000000 5413 +onsabbat 0 1 6.957497 0.000000 12076 +cass 0 1 6.957497 0.000000 12077 +tsure 0 1 6.957497 0.000000 12078 +essaybi 0 1 6.957497 0.000000 12079 +issuesnumer 0 1 6.957497 0.000000 12080 +problemsgeometr 0 1 6.957497 0.000000 12081 +computingspars 0 1 6.957497 0.000000 12082 +computationsi 0 1 6.957497 0.000000 12083 +primal 0 1 6.957497 0.000000 12084 +interiorpoint 0 1 6.957497 0.000000 12085 +decompositionfor 0 1 6.957497 0.000000 12086 +gridcut 0 1 6.957497 0.000000 12087 +hyperplan 0 1 6.957497 0.000000 12088 +packagei 0 1 6.957497 0.000000 12089 +verycompl 0 1 6.957497 0.000000 12090 +unstructuredtetrahedr 0 1 6.957497 0.000000 12091 +boundaryvalu 0 1 6.957497 0.000000 12092 +iswritten 0 1 6.957497 0.000000 12093 +distributedfor 0 1 6.957497 0.000000 12094 +distributionbegan 0 1 6.957497 0.000000 12095 +manyimprov 0 1 6.957497 0.000000 12096 +compatibilitywith 0 1 6.957497 0.000000 12097 +pleasese 0 1 6.957497 0.000000 12098 +reportback 0 1 6.957497 0.000000 12099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..26ed3678 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +support 1 132 1.945910 1.945910 83 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +frame 1 24 3.761200 3.761200 684 +higher 1 24 3.761200 3.761200 690 +arun 1 4 5.568345 5.568345 2736 +verma 1 2 6.263398 6.263398 4341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..08df42ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +read 1 154 1.791759 1.791759 47 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +thank 1 23 3.806662 3.806662 721 +vinc 1 2 6.263398 6.263398 5414 +suck 1 2 6.263398 6.263398 5232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..0afaee38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +databas 0 122 2.079442 0.000000 86 +advanc 0 99 2.302585 0.000000 130 +start 0 83 2.484907 0.000000 173 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +multimedia 0 68 2.708050 0.000000 258 +give 0 50 3.044522 0.000000 359 +break 0 20 3.951244 0.000000 812 +pagec 0 15 4.248495 0.000000 1011 +pageer 0 3 5.857933 0.000000 3776 +vitrano 1 1 6.957497 6.957497 12100 +pagehei 0 1 6.957497 0.000000 12101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..3089dbf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +specif 0 106 2.197225 0.000000 106 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +info 0 85 2.484907 0.000000 176 +larg 0 82 2.484907 0.000000 168 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +function 0 62 2.772589 0.000000 275 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +back 0 60 2.833213 0.000000 297 +browser 0 56 2.890372 0.000000 313 +think 0 57 2.890372 0.000000 314 +maintain 0 51 2.995732 0.000000 342 +right 0 48 3.044522 0.000000 363 +visitor 0 49 3.044522 0.000000 371 +life 0 50 3.044522 0.000000 375 +effect 0 46 3.091042 0.000000 385 +favorit 0 44 3.135494 0.000000 410 +made 0 44 3.135494 0.000000 398 +around 0 43 3.178054 0.000000 415 +might 0 41 3.218876 0.000000 426 +small 0 39 3.258097 0.000000 447 +respons 0 37 3.332205 0.000000 476 +expect 0 37 3.332205 0.000000 484 +copyright 0 36 3.367296 0.000000 495 +word 0 34 3.401197 0.000000 508 +given 0 32 3.465736 0.000000 538 +anim 0 31 3.496508 0.000000 557 +someth 0 31 3.496508 0.000000 554 +held 0 28 3.610918 0.000000 600 +relev 0 26 3.688879 0.000000 637 +reach 0 24 3.761200 0.000000 688 +yahoo 0 24 3.761200 0.000000 707 +wonder 0 20 3.951244 0.000000 815 +reserv 0 20 3.951244 0.000000 808 +applet 0 20 3.951244 0.000000 827 +qualiti 0 20 3.951244 0.000000 832 +els 0 19 4.007333 0.000000 843 +capabl 0 15 4.248495 0.000000 1016 +happi 0 14 4.317488 0.000000 1079 +deriv 0 13 4.382027 0.000000 1145 +clock 0 11 4.553877 0.000000 1320 +regard 0 11 4.553877 0.000000 1309 +vista 0 10 4.653960 0.000000 1452 +ground 0 7 5.010635 0.000000 1955 +usabl 0 7 5.010635 0.000000 1810 +heavi 0 7 5.010635 0.000000 1841 +usag 0 6 5.164786 0.000000 2209 +vivek 0 6 5.164786 0.000000 2210 +promis 0 6 5.164786 0.000000 2037 +million 0 5 5.347108 0.000000 2495 +settimeout 0 5 5.347108 0.000000 2536 +wast 0 5 5.347108 0.000000 2537 +seed 1 4 5.568345 5.568345 2984 +timertwo 0 4 5.568345 0.000000 2985 +transmit 0 4 5.568345 0.000000 2835 +fulli 0 4 5.568345 0.000000 2986 +dont 0 3 5.857933 0.000000 3473 +impli 0 3 5.857933 0.000000 3348 +kolla 0 1 6.957497 0.000000 12102 +scrollit 0 1 6.957497 0.000000 12103 +unwant 0 1 6.957497 0.000000 12104 +warrante 0 1 6.957497 0.000000 12105 +zillion 0 1 6.957497 0.000000 12106 +thoughtsfriend 0 1 6.957497 0.000000 12107 +foeslinksa 0 1 6.957497 0.000000 12108 +tryalta 0 1 6.957497 0.000000 12109 +theinktomiresumein 0 1 6.957497 0.000000 12110 +htmlin 0 1 6.957497 0.000000 12111 +postscriptin 0 1 6.957497 0.000000 12112 +perfectin 0 1 6.957497 0.000000 12113 +asciith 0 1 6.957497 0.000000 12114 +wanna 0 1 6.957497 0.000000 12115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..76ca3c81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +professor 1 137 1.945910 1.945910 76 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +comment 0 93 2.397895 0.000000 146 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +david 0 71 2.639057 0.000000 232 +differ 0 66 2.708050 0.000000 253 +prof 0 64 2.772589 0.000000 273 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +particular 0 51 2.995732 0.000000 352 +friend 0 48 3.044522 0.000000 376 +might 0 41 3.218876 0.000000 426 +paul 0 38 3.295837 0.000000 471 +everi 0 34 3.401197 0.000000 519 +either 0 35 3.401197 0.000000 506 +seem 0 18 4.060443 0.000000 899 +matrix 0 17 4.110874 0.000000 933 +spars 0 16 4.174387 0.000000 989 +difficulti 0 13 4.382027 0.000000 1132 +guess 0 10 4.653960 0.000000 1443 +henri 0 10 4.653960 0.000000 1417 +said 0 9 4.753590 0.000000 1571 +govern 0 9 4.753590 0.000000 1581 +judg 0 8 4.875197 0.000000 1644 +cornellunivers 0 7 5.010635 0.000000 1916 +keshav 0 7 5.010635 0.000000 1852 +remind 0 7 5.010635 0.000000 1799 +encrypt 0 7 5.010635 0.000000 1835 +legal 0 6 5.164786 0.000000 2094 +privaci 0 6 5.164786 0.000000 2144 +strong 0 6 5.164786 0.000000 2029 +lawyer 1 4 5.568345 5.568345 2836 +pingali 0 4 5.568345 0.000000 2956 +bernoulli 0 4 5.568345 0.000000 2955 +stodghil 0 4 5.568345 0.000000 2864 +lord 0 4 5.568345 0.000000 2906 +wherea 0 4 5.568345 0.000000 2597 +functionof 0 2 6.263398 0.000000 5415 +todayth 0 2 6.263398 0.000000 5416 +vlad 0 1 6.957497 0.000000 12116 +pagevladimir 0 1 6.957497 0.000000 12117 +kotlyarvladimir 0 1 6.957497 0.000000 12118 +wereteach 0 1 6.957497 0.000000 12119 +andindu 0 1 6.957497 0.000000 12120 +kodukulapubl 0 1 6.957497 0.000000 12121 +kissing 0 1 6.957497 0.000000 12122 +profess 0 1 6.957497 0.000000 12123 +abritish 0 1 6.957497 0.000000 12124 +sveri 0 1 6.957497 0.000000 12125 +den 0 1 6.957497 0.000000 12126 +asolut 0 1 6.957497 0.000000 12127 +outpac 0 1 6.957497 0.000000 12128 +ofsolut 0 1 6.957497 0.000000 12129 +hardenough 0 1 6.957497 0.000000 12130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..7bdb0581 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +address 1 170 1.791759 1.791759 62 +hall 0 146 1.945910 0.000000 65 +august 0 66 2.708050 0.000000 257 +ithaca 1 65 2.772589 2.772589 294 +mapl 0 11 4.553877 0.000000 1376 +rhode 0 9 4.753590 0.000000 1579 +vijai 0 4 5.568345 0.000000 2960 +menon 0 2 6.263398 0.000000 5249 +menonvijai 0 1 6.957497 0.000000 12131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..bf199491 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +academ 0 82 2.484907 0.000000 178 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +august 0 66 2.708050 0.000000 257 +third 0 43 3.178054 0.000000 412 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +computersci 0 30 3.555348 0.000000 562 +detect 0 26 3.688879 0.000000 646 +bookmark 0 26 3.688879 0.000000 639 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +chen 0 21 3.912023 0.000000 791 +beij 0 19 4.007333 0.000000 876 +failur 0 18 4.060443 0.000000 898 +tsinghua 0 13 4.382027 0.000000 1195 +soccer 0 8 4.875197 0.000000 1752 +spare 0 6 5.164786 0.000000 2177 +distributedsystem 0 6 5.164786 0.000000 2022 +membership 0 3 5.857933 0.000000 3751 +bachelorand 0 2 6.263398 0.000000 5128 +chinami 0 2 6.263398 0.000000 5129 +toueg 0 2 6.263398 0.000000 5339 +pagewei 0 1 6.957497 0.000000 12132 +weichen 0 1 6.957497 0.000000 12133 +inpartition 0 1 6.957497 0.000000 12134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..2db41f63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +site 0 106 2.197225 0.000000 119 +homepag 0 93 2.397895 0.000000 148 +window 0 68 2.708050 0.000000 242 +movi 0 40 3.258097 0.000000 459 +newspap 0 12 4.465908 0.000000 1280 +weitsang 0 2 6.263398 0.000000 4088 +lwhere 0 1 6.957497 0.000000 12135 +fromwhat 0 1 6.957497 0.000000 12136 +watchwhat 0 1 6.957497 0.000000 12137 +likec 0 1 6.957497 0.000000 12138 +wrotepictur 0 1 6.957497 0.000000 12139 +drawa 0 1 6.957497 0.000000 12140 +motifcomput 0 1 6.957497 0.000000 12141 +theoryhom 0 1 6.957497 0.000000 12142 +vimi 0 1 6.957497 0.000000 12143 +tsearch 0 1 6.957497 0.000000 12144 +webcoolest 0 1 6.957497 0.000000 12145 +sitessharewar 0 1 6.957497 0.000000 12146 +archivem 0 1 6.957497 0.000000 12147 +onlineunivers 0 1 6.957497 0.000000 12148 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..c7e12502 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +final 0 116 2.197225 0.000000 108 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +academ 0 82 2.484907 0.000000 178 +resum 0 79 2.564949 0.000000 217 +nation 0 74 2.639057 0.000000 240 +multimedia 0 68 2.708050 0.000000 258 +window 0 68 2.708050 0.000000 242 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +plan 0 65 2.772589 0.000000 272 +result 0 65 2.772589 0.000000 281 +extens 0 53 2.944439 0.000000 340 +tabl 0 51 2.995732 0.000000 346 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +understand 0 47 3.091042 0.000000 384 +favorit 0 44 3.135494 0.000000 410 +vision 0 41 3.218876 0.000000 430 +field 0 37 3.332205 0.000000 482 +photo 0 31 3.496508 0.000000 561 +except 0 28 3.610918 0.000000 607 +team 0 27 3.637586 0.000000 625 +background 0 25 3.737670 0.000000 664 +sport 0 25 3.737670 0.000000 683 +other 0 24 3.761200 0.000000 697 +tenni 0 20 3.951244 0.000000 838 +partial 0 18 4.060443 0.000000 900 +taiwan 0 16 4.174387 0.000000 1006 +drive 0 15 4.248495 0.000000 1052 +avenu 0 12 4.465908 0.000000 1277 +basketbal 0 12 4.465908 0.000000 1289 +danc 0 12 4.465908 0.000000 1278 +skill 0 12 4.465908 0.000000 1205 +calcul 0 12 4.465908 0.000000 1268 +meng 0 12 4.465908 0.000000 1214 +mapl 0 11 4.553877 0.000000 1376 +magic 0 11 4.553877 0.000000 1358 +player 0 11 4.553877 0.000000 1371 +market 0 11 4.553877 0.000000 1361 +swim 0 9 4.753590 0.000000 1599 +volleybal 0 9 4.753590 0.000000 1598 +rivl 0 8 4.875197 0.000000 1632 +job 0 8 4.875197 0.000000 1702 +morph 0 7 5.010635 0.000000 1937 +financi 0 6 5.164786 0.000000 2197 +sing 0 5 5.347108 0.000000 2499 +hung 0 3 5.857933 0.000000 3524 +habit 0 3 5.857933 0.000000 3777 +atlanta 0 3 5.857933 0.000000 3778 +bowl 0 2 6.263398 0.000000 5417 +orlando 0 2 6.263398 0.000000 5410 +glavin 0 1 6.957497 0.000000 12149 +billiard 0 1 6.957497 0.000000 12150 +brave 0 1 6.957497 0.000000 12151 +anferne 0 1 6.957497 0.000000 12152 +hardawai 0 1 6.957497 0.000000 12153 +warp 0 1 6.957497 0.000000 12154 +webpaint 0 1 6.957497 0.000000 12155 +whkao 0 1 6.957497 0.000000 12156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..379b935e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +present 0 91 2.397895 0.000000 145 +search 0 95 2.397895 0.000000 155 +school 0 84 2.484907 0.000000 188 +west 0 83 2.484907 0.000000 192 +optim 0 79 2.564949 0.000000 197 +master 0 76 2.564949 0.000000 216 +write 0 72 2.639057 0.000000 222 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +reason 0 57 2.890372 0.000000 318 +week 0 52 2.995732 0.000000 343 +visitor 0 49 3.044522 0.000000 371 +cool 0 49 3.044522 0.000000 374 +could 0 46 3.091042 0.000000 383 +get 0 46 3.091042 0.000000 380 +even 0 45 3.135494 0.000000 393 +york 0 41 3.218876 0.000000 435 +probabl 0 40 3.258097 0.000000 455 +live 0 40 3.258097 0.000000 451 +seminar 0 38 3.295837 0.000000 470 +formal 0 37 3.332205 0.000000 478 +robot 0 36 3.367296 0.000000 497 +campu 0 27 3.637586 0.000000 623 +enjoi 0 26 3.688879 0.000000 660 +spent 0 25 3.737670 0.000000 676 +never 0 25 3.737670 0.000000 671 +william 0 22 3.850148 0.000000 765 +mpeg 0 20 3.951244 0.000000 831 +tenni 0 20 3.951244 0.000000 838 +minut 0 20 3.951244 0.000000 810 +wrote 0 20 3.951244 0.000000 830 +beauti 0 18 4.060443 0.000000 912 +weekli 0 17 4.110874 0.000000 919 +match 0 16 4.174387 0.000000 965 +practicum 0 16 4.174387 0.000000 960 +princeton 0 15 4.248495 0.000000 1042 +mellon 0 13 4.382027 0.000000 1179 +scienceat 0 11 4.553877 0.000000 1375 +systemsc 0 11 4.553877 0.000000 1293 +mapl 0 11 4.553877 0.000000 1376 +awai 0 10 4.653960 0.000000 1447 +guess 0 10 4.653960 0.000000 1443 +jersei 0 9 4.753590 0.000000 1587 +motorola 0 9 4.753590 0.000000 1546 +besid 0 8 4.875197 0.000000 1681 +partner 0 8 4.875197 0.000000 1648 +parti 0 8 4.875197 0.000000 1676 +on 0 8 4.875197 0.000000 1628 +south 0 6 5.164786 0.000000 2167 +piano 0 6 5.164786 0.000000 2201 +sleep 0 6 5.164786 0.000000 2211 +florida 0 5 5.347108 0.000000 2526 +compet 0 5 5.347108 0.000000 2462 +coral 0 5 5.347108 0.000000 2538 +quantifi 0 5 5.347108 0.000000 2525 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +encod 0 4 5.568345 0.000000 2929 +somehow 0 4 5.568345 0.000000 2974 +essai 0 4 5.568345 0.000000 2948 +wart 0 4 5.568345 0.000000 2987 +classesc 0 3 5.857933 0.000000 3681 +exit 0 3 5.857933 0.000000 3124 +engineeringand 0 3 5.857933 0.000000 3779 +hpux 0 3 5.857933 0.000000 3780 +sector 0 3 5.857933 0.000000 3766 +cornellopoli 0 2 6.263398 0.000000 5157 +chopin 0 2 6.263398 0.000000 5358 +reject 0 2 6.263398 0.000000 5418 +techniquec 0 2 6.263398 0.000000 5158 +methodsc 0 2 6.263398 0.000000 5159 +colloquiumc 0 2 6.263398 0.000000 5160 +computingc 0 2 6.263398 0.000000 5216 +concerto 0 1 6.957497 0.000000 12157 +sciencefrom 0 1 6.957497 0.000000 12158 +carneig 0 1 6.957497 0.000000 12159 +didresearch 0 1 6.957497 0.000000 12160 +institu 0 1 6.957497 0.000000 12161 +xsro 0 1 6.957497 0.000000 12162 +atft 0 1 6.957497 0.000000 12163 +lauderdal 0 1 6.957497 0.000000 12164 +usta 0 1 6.957497 0.000000 12165 +tournment 0 1 6.957497 0.000000 12166 +faviorit 0 1 6.957497 0.000000 12167 +boca 0 1 6.957497 0.000000 12168 +ratonkei 0 1 6.957497 0.000000 12169 +beethoven 0 1 6.957497 0.000000 12170 +gershwin 0 1 6.957497 0.000000 12171 +liszt 0 1 6.957497 0.000000 12172 +mendelssohn 0 1 6.957497 0.000000 12173 +mozart 0 1 6.957497 0.000000 12174 +rachmaninoff 0 1 6.957497 0.000000 12175 +ravel 0 1 6.957497 0.000000 12176 +tchaikovski 0 1 6.957497 0.000000 12177 +violinconcerto 0 1 6.957497 0.000000 12178 +purifi 0 1 6.957497 0.000000 12179 +computerc 0 1 6.957497 0.000000 12180 +sectorcool 0 1 6.957497 0.000000 12181 +links_leap 0 1 6.957497 0.000000 12182 +frogski 0 1 6.957497 0.000000 12183 +serverident 0 1 6.957497 0.000000 12184 +crisi 0 1 6.957497 0.000000 12185 +testweath 0 1 6.957497 0.000000 12186 +undergroundinktomi 0 1 6.957497 0.000000 12187 +enginequest 0 1 6.957497 0.000000 12188 +archiveslast 0 1 6.957497 0.000000 12189 +ecithaca 0 1 6.957497 0.000000 12190 +wwlee 0 1 6.957497 0.000000 12191 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..30c4bd79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +network 0 168 1.791759 0.000000 61 +hall 0 146 1.945910 0.000000 65 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +high 0 130 2.079442 0.000000 101 +databas 0 122 2.079442 0.000000 86 +site 0 106 2.197225 0.000000 119 +manag 0 114 2.197225 0.000000 125 +graphic 0 90 2.397895 0.000000 147 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +china 0 37 3.332205 0.000000 487 +taken 0 31 3.496508 0.000000 555 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +alumni 0 21 3.912023 0.000000 807 +bachelor 0 17 4.110874 0.000000 957 +edui 0 13 4.382027 0.000000 1193 +systemsc 0 11 4.553877 0.000000 1293 +capac 0 8 4.875197 0.000000 1740 +shade 0 7 5.010635 0.000000 1881 +atcornel 0 6 5.164786 0.000000 2131 +engineeringc 0 4 5.568345 0.000000 2904 +phong 0 2 6.263398 0.000000 4822 +xichun 0 1 6.957497 0.000000 12192 +zhejiang 0 1 6.957497 0.000000 12193 +jennif 0 1 6.957497 0.000000 12194 +hangzhou 0 1 6.957497 0.000000 12195 +javaworldsunhigh 0 1 6.957497 0.000000 12196 +alumnimeng 0 1 6.957497 0.000000 12197 +gouraud 0 1 6.957497 0.000000 12198 +systeme 0 1 6.957497 0.000000 12199 +communicationby 0 1 6.957497 0.000000 12200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..22cb19b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +topic 1 114 2.197225 2.197225 110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..422c85d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +relat 0 139 1.945910 0.000000 68 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +june 0 79 2.564949 0.000000 214 +optim 0 79 2.564949 0.000000 197 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +knowledg 0 67 2.708050 0.000000 243 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +written 0 63 2.772589 0.000000 278 +function 0 62 2.772589 0.000000 275 +result 0 65 2.772589 0.000000 281 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +juli 0 60 2.833213 0.000000 305 +februari 0 54 2.944439 0.000000 328 +principl 0 48 3.044522 0.000000 357 +approach 0 48 3.044522 0.000000 366 +california 0 46 3.091042 0.000000 388 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +societi 0 40 3.258097 0.000000 456 +transform 0 32 3.465736 0.000000 542 +semant 0 29 3.583519 0.000000 587 +manipul 0 27 3.637586 0.000000 624 +boston 0 19 4.007333 0.000000 862 +partial 0 18 4.060443 0.000000 900 +attribut 0 14 4.317488 0.000000 1092 +massachusett 0 14 4.317488 0.000000 1118 +deriv 0 13 4.382027 0.000000 1145 +sigplan 0 13 4.382027 0.000000 1190 +intermedi 0 9 4.753590 0.000000 1497 +discov 0 9 4.753590 0.000000 1562 +strength 0 9 4.753590 0.000000 1494 +reduct 0 7 5.010635 0.000000 1877 +increment 0 6 5.164786 0.000000 2206 +teitelbaum 0 6 5.164786 0.000000 2102 +sigact 0 6 5.164786 0.000000 2212 +florida 0 5 5.347108 0.000000 2526 +jolla 0 4 5.568345 0.000000 2988 +stoller 0 4 5.568345 0.000000 2866 +petersburg 0 4 5.568345 0.000000 2989 +systemat 0 3 5.857933 0.000000 3781 +beach 0 3 5.857933 0.000000 3782 +cachet 0 2 6.263398 0.000000 5419 +anni 0 2 6.263398 0.000000 5420 +auxiliari 0 2 6.263398 0.000000 5421 +yanhong 0 2 6.263398 0.000000 5422 +computationderiv 0 1 6.957497 0.000000 12201 +programsa 0 1 6.957497 0.000000 12202 +themeprogram 0 1 6.957497 0.000000 12203 +usessystemat 0 1 6.957497 0.000000 12204 +deriveincrement 0 1 6.957497 0.000000 12205 +peoplei 0 1 6.957497 0.000000 12206 +liutim 0 1 6.957497 0.000000 12207 +teitelbaumkeyword 0 1 6.957497 0.000000 12208 +cacheti 0 1 6.957497 0.000000 12209 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..8cae0e9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +intern 0 108 2.197225 0.000000 128 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +peopl 0 96 2.302585 0.000000 132 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +center 0 88 2.397895 0.000000 158 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +present 0 91 2.397895 0.000000 145 +novemb 0 81 2.484907 0.000000 179 +institut 0 84 2.484907 0.000000 187 +ieee 0 86 2.484907 0.000000 190 +build 0 85 2.484907 0.000000 184 +optim 0 79 2.564949 0.000000 197 +june 0 79 2.564949 0.000000 214 +appear 0 78 2.564949 0.000000 210 +dynam 0 76 2.564949 0.000000 194 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +upson 0 71 2.639057 0.000000 218 +html 0 75 2.639057 0.000000 235 +august 0 66 2.708050 0.000000 257 +knowledg 0 67 2.708050 0.000000 243 +test 0 66 2.708050 0.000000 252 +improv 0 62 2.772589 0.000000 289 +ithaca 0 65 2.772589 0.000000 294 +result 0 65 2.772589 0.000000 281 +januari 0 62 2.772589 0.000000 264 +evalu 0 64 2.772589 0.000000 266 +interact 0 62 2.772589 0.000000 270 +descript 0 64 2.772589 0.000000 271 +septemb 0 65 2.772589 0.000000 274 +laboratori 0 63 2.772589 0.000000 292 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +automat 0 61 2.833213 0.000000 306 +reason 0 57 2.890372 0.000000 318 +publish 0 57 2.890372 0.000000 326 +explor 0 58 2.890372 0.000000 324 +februari 0 54 2.944439 0.000000 328 +talk 0 53 2.944439 0.000000 336 +profession 0 51 2.995732 0.000000 345 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +move 0 47 3.091042 0.000000 382 +york 0 41 3.218876 0.000000 435 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +combin 0 42 3.218876 0.000000 421 +annual 0 40 3.258097 0.000000 458 +societi 0 40 3.258097 0.000000 456 +map 0 39 3.258097 0.000000 452 +author 0 39 3.258097 0.000000 450 +seminar 0 38 3.295837 0.000000 470 +china 0 37 3.332205 0.000000 487 +formal 0 37 3.332205 0.000000 478 +multi 0 36 3.367296 0.000000 493 +post 0 35 3.401197 0.000000 505 +concurr 0 34 3.401197 0.000000 501 +survei 0 35 3.401197 0.000000 513 +manual 0 35 3.401197 0.000000 504 +obtain 0 33 3.433987 0.000000 534 +transform 0 32 3.465736 0.000000 542 +dissert 0 32 3.465736 0.000000 549 +scientist 0 31 3.496508 0.000000 560 +compon 0 30 3.555348 0.000000 570 +semant 0 29 3.583519 0.000000 587 +manipul 0 27 3.637586 0.000000 624 +revis 0 26 3.688879 0.000000 640 +doctor 0 24 3.761200 0.000000 709 +universityithaca 0 24 3.761200 0.000000 710 +wang 0 21 3.912023 0.000000 790 +hous 0 21 3.912023 0.000000 801 +expert 0 20 3.951244 0.000000 833 +break 0 20 3.951244 0.000000 812 +department 0 20 3.951244 0.000000 839 +beij 0 19 4.007333 0.000000 876 +boston 0 19 4.007333 0.000000 862 +partial 0 18 4.060443 0.000000 900 +germani 0 17 4.110874 0.000000 946 +young 0 16 4.174387 0.000000 991 +zhang 0 16 4.174387 0.000000 980 +partit 0 16 4.174387 0.000000 984 +attribut 0 14 4.317488 0.000000 1092 +massachusett 0 14 4.317488 0.000000 1118 +deriv 0 13 4.382027 0.000000 1145 +sigplan 0 13 4.382027 0.000000 1190 +tsinghua 0 13 4.382027 0.000000 1195 +huang 0 12 4.465908 0.000000 1202 +qualit 0 11 4.553877 0.000000 1362 +song 0 11 4.553877 0.000000 1380 +ofcomput 0 10 4.653960 0.000000 1442 +intermedi 0 9 4.753590 0.000000 1497 +discov 0 9 4.753590 0.000000 1562 +factor 0 9 4.753590 0.000000 1544 +mainten 0 9 4.753590 0.000000 1543 +congress 0 9 4.753590 0.000000 1592 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +compos 0 9 4.753590 0.000000 1527 +quantit 0 8 4.875197 0.000000 1654 +xerox 0 8 4.875197 0.000000 1725 +hallcornel 0 8 4.875197 0.000000 1757 +refere 0 7 5.010635 0.000000 1895 +uncertainti 0 7 5.010635 0.000000 1882 +increment 0 6 5.164786 0.000000 2206 +teitelbaum 0 6 5.164786 0.000000 2102 +sigact 0 6 5.164786 0.000000 2212 +indiana 0 6 5.164786 0.000000 2057 +usag 0 6 5.164786 0.000000 2209 +webster 0 5 5.347108 0.000000 2468 +florida 0 5 5.347108 0.000000 2526 +peke 0 5 5.347108 0.000000 2539 +petersburg 0 4 5.568345 0.000000 2989 +jolla 0 4 5.568345 0.000000 2988 +dagstuhl 0 4 5.568345 0.000000 2871 +stoller 0 4 5.568345 0.000000 2866 +kestrel 0 4 5.568345 0.000000 2990 +systemat 0 3 5.857933 0.000000 3781 +beach 0 3 5.857933 0.000000 3782 +schloss 0 3 5.857933 0.000000 3727 +tocomput 0 3 5.857933 0.000000 3162 +yanhong 0 2 6.263398 0.000000 5422 +cachet 0 2 6.263398 0.000000 5419 +anni 0 2 6.263398 0.000000 5420 +auxiliari 0 2 6.263398 0.000000 5421 +fuzzi 0 2 6.263398 0.000000 5423 +eduhttp 0 2 6.263398 0.000000 5424 +pageyanhong 0 1 6.957497 0.000000 12210 +forincrement 0 1 6.957497 0.000000 12211 +interactivesystem 0 1 6.957497 0.000000 12212 +systemorgan 0 1 6.957497 0.000000 12213 +talksph 0 1 6.957497 0.000000 12214 +basedsystemat 0 1 6.957497 0.000000 12215 +abstractjourn 0 1 6.957497 0.000000 12216 +inexact 0 1 6.957497 0.000000 12217 +wakayama 0 1 6.957497 0.000000 12218 +oggeb 0 1 6.957497 0.000000 12219 +basin 0 1 6.957497 0.000000 12220 +ri 0 1 6.957497 0.000000 12221 +tshinghua 0 1 6.957497 0.000000 12222 +lindlei 0 1 6.957497 0.000000 12223 +hallindiana 0 1 6.957497 0.000000 12224 +universitybloomington 0 1 6.957497 0.000000 12225 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..94e597c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +hall 0 146 1.945910 0.000000 65 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +ithaca 0 65 2.772589 0.000000 294 +favorit 0 44 3.135494 0.000000 410 +edui 0 13 4.382027 0.000000 1193 +huang 0 12 4.465908 0.000000 1202 +cheng 0 10 4.653960 0.000000 1381 +ychuang 0 3 5.857933 0.000000 3093 +huangyi 0 1 6.957497 0.000000 12226 +documentscoursesprojectaccess 0 1 6.957497 0.000000 12227 +byvisitorslast 0 1 6.957497 0.000000 12228 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..5827093f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +construct 0 139 1.945910 0.000000 82 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +version 0 113 2.197225 0.000000 122 +person 0 111 2.197225 0.000000 117 +search 0 95 2.397895 0.000000 155 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +school 1 84 2.484907 2.484907 188 +chang 0 82 2.484907 0.000000 163 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +novemb 0 81 2.484907 0.000000 179 +april 0 77 2.564949 0.000000 196 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +java 0 70 2.708050 0.000000 248 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +still 0 50 3.044522 0.000000 362 +better 0 45 3.135494 0.000000 401 +music 0 42 3.218876 0.000000 436 +programm 0 39 3.258097 0.000000 445 +origin 0 38 3.295837 0.000000 472 +word 0 34 3.401197 0.000000 508 +kind 0 32 3.465736 0.000000 541 +titl 0 31 3.496508 0.000000 556 +actual 0 28 3.610918 0.000000 604 +brows 0 23 3.806662 0.000000 726 +sciencecornel 0 22 3.850148 0.000000 768 +love 0 21 3.912023 0.000000 804 +wonder 0 20 3.951244 0.000000 815 +beauti 0 18 4.060443 0.000000 912 +listen 0 18 4.060443 0.000000 907 +women 0 16 4.174387 0.000000 1004 +georg 0 16 4.174387 0.000000 994 +classic 0 14 4.317488 0.000000 1084 +came 0 13 4.382027 0.000000 1197 +forth 0 13 4.382027 0.000000 1186 +went 0 12 4.465908 0.000000 1279 +meng 0 12 4.465908 0.000000 1214 +america 0 11 4.553877 0.000000 1370 +pagewelcom 0 11 4.553877 0.000000 1344 +moment 0 11 4.553877 0.000000 1379 +virginia 0 8 4.875197 0.000000 1659 +chung 0 7 5.010635 0.000000 1964 +elementari 0 7 5.010635 0.000000 1825 +marri 0 7 5.010635 0.000000 1946 +perfect 0 7 5.010635 0.000000 1921 +piano 0 6 5.164786 0.000000 2201 +sung 0 6 5.164786 0.000000 2075 +emerg 0 6 5.164786 0.000000 2038 +junior 0 5 5.347108 0.000000 2519 +everybodi 0 5 5.347108 0.000000 2517 +korea 0 4 5.568345 0.000000 2971 +keyboard 0 4 5.568345 0.000000 2970 +moon 0 4 5.568345 0.000000 2991 +hire 0 4 5.568345 0.000000 2976 +seoul 0 3 5.857933 0.000000 3783 +forward 0 3 5.857933 0.000000 3784 +korean 0 2 6.263398 0.000000 5354 +sang 0 2 6.263398 0.000000 5356 +kang 0 2 6.263398 0.000000 5360 +mason 0 2 6.263398 0.000000 4916 +infom 0 2 6.263398 0.000000 5425 +ilbo 0 1 6.957497 0.000000 12229 +myoung 0 1 6.957497 0.000000 12230 +husband 0 1 6.957497 0.000000 12231 +chungyou 0 1 6.957497 0.000000 12232 +thvisitor 0 1 6.957497 0.000000 12233 +universitywher 0 1 6.957497 0.000000 12234 +kindergarten 0 1 6.957497 0.000000 12235 +universityin 0 1 6.957497 0.000000 12236 +happiest 0 1 6.957497 0.000000 12237 +forsaic 0 1 6.957497 0.000000 12238 +shin 0 1 6.957497 0.000000 12239 +seung 0 1 6.957497 0.000000 12240 +hoon 0 1 6.957497 0.000000 12241 +newpap 0 1 6.957497 0.000000 12242 +hangook 0 1 6.957497 0.000000 12243 +chosun 0 1 6.957497 0.000000 12244 +joongang 0 1 6.957497 0.000000 12245 +appletyoosun 0 1 6.957497 0.000000 12246 +triphamm 0 1 6.957497 0.000000 12247 +sbithaca 0 1 6.957497 0.000000 12248 +ychung 0 1 6.957497 0.000000 12249 +yooschung 0 1 6.957497 0.000000 12250 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..ce641f86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,222 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +follow 0 92 2.397895 0.000000 143 +comment 0 93 2.397895 0.000000 146 +school 0 84 2.484907 0.000000 188 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +free 0 73 2.639057 0.000000 224 +order 0 69 2.708050 0.000000 249 +ithaca 0 65 2.772589 0.000000 294 +virtual 0 62 2.772589 0.000000 285 +plai 0 60 2.833213 0.000000 307 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +direct 0 57 2.890372 0.000000 316 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +date 0 51 2.995732 0.000000 344 +run 0 51 2.995732 0.000000 347 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +effect 0 46 3.091042 0.000000 385 +better 0 45 3.135494 0.000000 401 +execut 0 45 3.135494 0.000000 404 +howev 0 41 3.218876 0.000000 422 +york 0 41 3.218876 0.000000 435 +compani 0 41 3.218876 0.000000 423 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +microsoft 0 38 3.295837 0.000000 468 +expect 0 37 3.332205 0.000000 484 +game 0 36 3.367296 0.000000 498 +download 0 36 3.367296 0.000000 489 +everi 0 34 3.401197 0.000000 519 +michael 0 35 3.401197 0.000000 514 +toler 0 33 3.433987 0.000000 533 +go 0 33 3.433987 0.000000 529 +within 0 33 3.433987 0.000000 525 +often 0 31 3.496508 0.000000 551 +someth 0 31 3.496508 0.000000 554 +computersci 0 30 3.555348 0.000000 562 +focus 0 29 3.583519 0.000000 584 +hope 0 28 3.610918 0.000000 610 +great 1 27 3.637586 3.637586 626 +linux 0 27 3.637586 0.000000 631 +though 0 27 3.637586 0.000000 622 +rule 0 26 3.688879 0.000000 638 +compar 0 26 3.688879 0.000000 648 +notic 0 25 3.737670 0.000000 675 +strategi 0 25 3.737670 0.000000 682 +other 0 24 3.761200 0.000000 697 +yahoo 0 24 3.761200 0.000000 707 +magazin 0 24 3.761200 0.000000 704 +highli 0 23 3.806662 0.000000 725 +begin 0 23 3.806662 0.000000 716 +recommend 0 22 3.850148 0.000000 737 +almost 0 22 3.850148 0.000000 742 +instal 0 22 3.850148 0.000000 754 +love 0 21 3.912023 0.000000 804 +longer 0 20 3.951244 0.000000 816 +agent 0 18 4.060443 0.000000 910 +medic 0 17 4.110874 0.000000 958 +attempt 0 17 4.110874 0.000000 917 +intro 0 17 4.110874 0.000000 915 +thought 0 17 4.110874 0.000000 945 +easi 0 16 4.174387 0.000000 969 +critic 0 16 4.174387 0.000000 982 +rate 0 15 4.248495 0.000000 1037 +save 0 14 4.317488 0.000000 1099 +block 0 13 4.382027 0.000000 1183 +front 0 13 4.382027 0.000000 1154 +wife 0 13 4.382027 0.000000 1196 +brother 0 13 4.382027 0.000000 1189 +emac 0 13 4.382027 0.000000 1143 +uniqu 0 12 4.465908 0.000000 1228 +town 0 10 4.653960 0.000000 1458 +yellow 0 9 4.753590 0.000000 1601 +seven 0 9 4.753590 0.000000 1561 +crash 0 8 4.875197 0.000000 1616 +satisfi 0 8 4.875197 0.000000 1694 +qualifi 0 8 4.875197 0.000000 1721 +contrast 0 8 4.875197 0.000000 1637 +fail 0 8 4.875197 0.000000 1655 +slightli 0 7 5.010635 0.000000 1795 +marri 0 7 5.010635 0.000000 1946 +perfect 0 7 5.010635 0.000000 1921 +bookstor 0 7 5.010635 0.000000 1837 +tri 0 6 5.164786 0.000000 2166 +seen 0 6 5.164786 0.000000 2202 +impress 0 6 5.164786 0.000000 2096 +plu 0 6 5.164786 0.000000 2004 +slate 0 6 5.164786 0.000000 2021 +suni 0 5 5.347108 0.000000 2452 +twenti 0 5 5.347108 0.000000 2540 +fairli 0 5 5.347108 0.000000 2322 +hate 0 5 5.347108 0.000000 2529 +solid 0 5 5.347108 0.000000 2255 +webpag 0 4 5.568345 0.000000 2660 +complic 0 4 5.568345 0.000000 2902 +trivial 0 4 5.568345 0.000000 2786 +closest 0 4 5.568345 0.000000 2828 +syracus 0 3 5.857933 0.000000 3553 +edudepart 0 3 5.857933 0.000000 3302 +forfault 0 3 5.857933 0.000000 3748 +outof 0 3 5.857933 0.000000 3296 +nota 0 3 5.857933 0.000000 3785 +newli 0 3 5.857933 0.000000 3786 +health 0 3 5.857933 0.000000 3787 +advertis 0 3 5.857933 0.000000 3788 +yaron 0 2 6.263398 0.000000 4122 +minski 0 2 6.263398 0.000000 4123 +veggi 0 2 6.263398 0.000000 5426 +coop 0 2 6.263398 0.000000 4213 +nowadai 0 2 6.263398 0.000000 5376 +lisa 0 2 6.263398 0.000000 5427 +theidea 0 2 6.263398 0.000000 5428 +resumesom 0 2 6.263398 0.000000 5186 +miser 0 2 6.263398 0.000000 5359 +admit 0 2 6.263398 0.000000 5429 +amazon 0 2 6.263398 0.000000 5193 +flapdragon 0 1 6.957497 0.000000 12251 +yminski 0 1 6.957497 0.000000 12252 +comstock 0 1 6.957497 0.000000 12253 +onfault 0 1 6.957497 0.000000 12254 +thetacoma 0 1 6.957497 0.000000 12255 +livether 0 1 6.957497 0.000000 12256 +anopen 0 1 6.957497 0.000000 12257 +recommendit 0 1 6.957497 0.000000 12258 +ancientchines 0 1 6.957497 0.000000 12259 +extremlysimpl 0 1 6.957497 0.000000 12260 +thannoth 0 1 6.957497 0.000000 12261 +cgoban 0 1 6.957497 0.000000 12262 +nicest 0 1 6.957497 0.000000 12263 +goboard 0 1 6.957497 0.000000 12264 +thenet 0 1 6.957497 0.000000 12265 +minutesof 0 1 6.957497 0.000000 12266 +favoritepoem 0 1 6.957497 0.000000 12267 +lafiglia 0 1 6.957497 0.000000 12268 +piang 0 1 6.957497 0.000000 12269 +advicefor 0 1 6.957497 0.000000 12270 +interestinglink 0 1 6.957497 0.000000 12271 +alarmingli 0 1 6.957497 0.000000 12272 +firefli 0 1 6.957497 0.000000 12273 +bakeri 0 1 6.957497 0.000000 12274 +bigbook 0 1 6.957497 0.000000 12275 +bigyellow 0 1 6.957497 0.000000 12276 +kinslei 0 1 6.957497 0.000000 12277 +discount 0 1 6.957497 0.000000 12278 +booksel 0 1 6.957497 0.000000 12279 +mailcrypt 0 1 6.957497 0.000000 12280 +interfacemqbtazgjohoaaaedalfhlgjmdg 0 1 6.957497 0.000000 12281 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 0 1 6.957497 0.000000 12282 +rbylf 0 1 6.957497 0.000000 12283 +zwqujcioczoecv 0 1 6.957497 0.000000 12284 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 0 1 6.957497 0.000000 12285 +gkgarsokrinnoazihja 0 1 6.957497 0.000000 12286 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 0 1 6.957497 0.000000 12287 +wumjgzsnvispwkrvzgdrojswmc 0 1 6.957497 0.000000 12288 +eigsqsb 0 1 6.957497 0.000000 12289 +bsbpw 0 1 6.957497 0.000000 12290 +jcwz 0 1 6.957497 0.000000 12291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..143996ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +address 0 170 1.791759 0.000000 62 +introduct 0 126 2.079442 0.000000 87 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +info 0 85 2.484907 0.000000 176 +novemb 0 81 2.484907 0.000000 179 +state 0 76 2.564949 0.000000 207 +resum 0 79 2.564949 0.000000 217 +html 0 75 2.639057 0.000000 235 +artifici 0 63 2.772589 0.000000 280 +foundat 0 62 2.772589 0.000000 286 +semest 0 58 2.890372 0.000000 312 +format 0 48 3.044522 0.000000 356 +http 0 41 3.218876 0.000000 420 +unit 0 21 3.912023 0.000000 779 +modif 0 17 4.110874 0.000000 913 +practicum 0 16 4.174387 0.000000 960 +systemsc 0 11 4.553877 0.000000 1293 +pagecours 0 5 5.347108 0.000000 2395 +intelligencec 0 4 5.568345 0.000000 2673 +visionfal 0 2 6.263398 0.000000 4749 +eduhttp 0 2 6.263398 0.000000 5424 +yuichi 1 1 6.957497 6.957497 12292 +tsuchimoto 0 1 6.957497 0.000000 12293 +translatorsc 0 1 6.957497 0.000000 12294 +pageyuichi 0 1 6.957497 0.000000 12295 +workfal 0 1 6.957497 0.000000 12296 +engineeringspr 0 1 6.957497 0.000000 12297 +computingi 0 1 6.957497 0.000000 12298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..75a0cd08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +document 0 121 2.079442 0.000000 89 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +proceed 0 93 2.397895 0.000000 152 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +build 0 85 2.484907 0.000000 184 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +larg 0 82 2.484907 0.000000 168 +messag 1 76 2.564949 2.564949 212 +know 0 80 2.564949 0.000000 198 +appear 0 78 2.564949 0.000000 210 +interfac 0 79 2.564949 0.000000 209 +sourc 0 77 2.564949 0.000000 201 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +state 0 76 2.564949 0.000000 207 +dynam 0 76 2.564949 0.000000 194 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +abstract 0 62 2.772589 0.000000 276 +evalu 0 64 2.772589 0.000000 266 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +share 0 59 2.833213 0.000000 304 +detail 0 57 2.890372 0.000000 321 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +think 0 57 2.890372 0.000000 314 +thesi 0 57 2.890372 0.000000 327 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +februari 0 54 2.944439 0.000000 328 +talk 0 53 2.944439 0.000000 336 +investig 0 51 2.995732 0.000000 353 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +without 0 50 3.044522 0.000000 370 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +california 0 46 3.091042 0.000000 388 +describ 0 45 3.135494 0.000000 400 +made 0 44 3.135494 0.000000 398 +even 0 45 3.135494 0.000000 393 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +offer 0 43 3.178054 0.000000 414 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +slide 0 38 3.295837 0.000000 467 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +word 0 34 3.401197 0.000000 508 +toler 0 33 3.433987 0.000000 533 +concept 0 32 3.465736 0.000000 537 +someth 0 31 3.496508 0.000000 554 +power 0 30 3.555348 0.000000 573 +compon 0 30 3.555348 0.000000 570 +exist 0 30 3.555348 0.000000 569 +rang 0 30 3.555348 0.000000 565 +releas 0 28 3.610918 0.000000 616 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +pass 0 28 3.610918 0.000000 611 +packag 0 28 3.610918 0.000000 614 +intend 0 28 3.610918 0.000000 599 +progress 0 28 3.610918 0.000000 598 +propos 0 28 3.610918 0.000000 602 +measur 0 28 3.610918 0.000000 609 +scale 0 28 3.610918 0.000000 613 +becom 0 28 3.610918 0.000000 603 +though 0 27 3.637586 0.000000 622 +compar 0 26 3.688879 0.000000 648 +berkelei 0 26 3.688879 0.000000 657 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +demonstr 0 24 3.761200 0.000000 694 +higher 0 24 3.761200 0.000000 690 +magazin 0 24 3.761200 0.000000 704 +flow 0 24 3.761200 0.000000 700 +reduc 0 22 3.850148 0.000000 759 +instal 0 22 3.850148 0.000000 754 +defin 0 22 3.850148 0.000000 746 +varieti 0 22 3.850148 0.000000 740 +flexibl 0 21 3.912023 0.000000 792 +portabl 0 20 3.951244 0.000000 819 +benchmark 0 19 4.007333 0.000000 859 +comparison 0 19 4.007333 0.000000 863 +lower 0 18 4.060443 0.000000 886 +layer 0 17 4.110874 0.000000 926 +interconnect 0 17 4.110874 0.000000 937 +whether 0 17 4.110874 0.000000 918 +protect 0 17 4.110874 0.000000 935 +outlin 0 17 4.110874 0.000000 914 +latenc 0 16 4.174387 0.000000 993 +commerci 0 16 4.174387 0.000000 1005 +across 0 16 4.174387 0.000000 974 +overhead 0 15 4.248495 0.000000 1035 +driven 0 15 4.248495 0.000000 1048 +micro 0 15 4.248495 0.000000 1031 +split 0 14 4.317488 0.000000 1078 +trip 0 14 4.317488 0.000000 1113 +achiev 0 14 4.317488 0.000000 1088 +eicken 0 13 4.382027 0.000000 1134 +thorsten 0 13 4.382027 0.000000 1133 +block 0 13 4.382027 0.000000 1183 +directli 0 13 4.382027 0.000000 1141 +signific 0 13 4.382027 0.000000 1125 +earlier 0 13 4.382027 0.000000 1140 +carri 0 13 4.382027 0.000000 1152 +coordin 0 13 4.382027 0.000000 1182 +introduc 0 13 4.382027 0.000000 1139 +characterist 0 12 4.465908 0.000000 1257 +onth 0 12 4.465908 0.000000 1218 +buffer 0 12 4.465908 0.000000 1211 +bandwidth 0 11 4.553877 0.000000 1365 +chri 0 11 4.553877 0.000000 1311 +primit 0 11 4.553877 0.000000 1317 +underli 0 10 4.653960 0.000000 1410 +equip 0 10 4.653960 0.000000 1459 +equival 0 9 4.753590 0.000000 1496 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +transmiss 0 9 4.753590 0.000000 1588 +significantli 0 9 4.753590 0.000000 1508 +desir 0 9 4.753590 0.000000 1542 +chao 0 8 4.875197 0.000000 1753 +spec 0 8 4.875197 0.000000 1640 +round 0 8 4.875197 0.000000 1769 +readm 0 8 4.875197 0.000000 1699 +ring 0 8 4.875197 0.000000 1684 +vineet 0 8 4.875197 0.000000 1639 +andcomput 0 8 4.875197 0.000000 1623 +gold 0 8 4.875197 0.000000 1745 +coast 0 8 4.875197 0.000000 1746 +poor 0 8 4.875197 0.000000 1736 +pittsburgh 0 7 5.010635 0.000000 1938 +larger 0 7 5.010635 0.000000 1875 +suffici 0 7 5.010635 0.000000 1897 +freeli 0 6 5.164786 0.000000 2014 +lack 0 6 5.164786 0.000000 1994 +affect 0 6 5.164786 0.000000 2044 +goldstein 0 6 5.164786 0.000000 2168 +phase 0 6 5.164786 0.000000 1977 +older 0 5 5.347108 0.000000 2387 +buch 0 5 5.347108 0.000000 2272 +culler 0 5 5.347108 0.000000 2381 +symp 0 5 5.347108 0.000000 2376 +australia 0 5 5.347108 0.000000 2478 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +conform 0 4 5.568345 0.000000 2941 +theus 0 4 5.568345 0.000000 2992 +throughput 0 4 5.568345 0.000000 2993 +andevalu 0 4 5.568345 0.000000 2706 +asymptot 0 4 5.568345 0.000000 2676 +basu 0 4 5.568345 0.000000 2843 +forparallel 0 4 5.568345 0.000000 2703 +mpp 0 3 5.857933 0.000000 3194 +neta 0 3 5.857933 0.000000 3789 +thegener 0 3 5.857933 0.000000 3648 +moreinform 0 3 5.857933 0.000000 3307 +let 0 3 5.857933 0.000000 3790 +avula 0 3 5.857933 0.000000 3600 +abridg 0 3 5.857933 0.000000 3772 +magnitud 0 3 5.857933 0.000000 3582 +roughli 0 3 5.857933 0.000000 3097 +schauser 0 3 5.857933 0.000000 3599 +tremend 0 3 5.857933 0.000000 3453 +thegam 0 2 6.263398 0.000000 5430 +differencebetween 0 2 6.263398 0.000000 5431 +pleaseclick 0 2 6.263398 0.000000 5432 +messageslow 0 2 6.263398 0.000000 5040 +meiko 0 2 6.263398 0.000000 4996 +focuseson 0 2 6.263398 0.000000 5433 +veena 0 2 6.263398 0.000000 5000 +thecommun 0 2 6.263398 0.000000 4928 +thesetechniqu 0 2 6.263398 0.000000 4263 +thenetwork 0 2 6.263398 0.000000 5434 +incommun 0 2 6.263398 0.000000 4349 +microsecond 0 2 6.263398 0.000000 5435 +tominim 0 2 6.263398 0.000000 5436 +unnecessarili 0 2 6.263398 0.000000 4688 +mainstream 0 2 6.263398 0.000000 5437 +contactthorsten 0 2 6.263398 0.000000 5438 +activemessag 0 1 6.957497 0.000000 12299 +secondpart 0 1 6.957497 0.000000 12300 +messagescornel 0 1 6.957497 0.000000 12301 +implementationsact 0 1 6.957497 0.000000 12302 +codereleas 0 1 6.957497 0.000000 12303 +instructionson 0 1 6.957497 0.000000 12304 +releasenot 0 1 6.957497 0.000000 12305 +fileto 0 1 6.957497 0.000000 12306 +currentvers 0 1 6.957497 0.000000 12307 +libmpci 0 1 6.957497 0.000000 12308 +thedistribut 0 1 6.957497 0.000000 12309 +fordetail 0 1 6.957497 0.000000 12310 +briefnot 0 1 6.957497 0.000000 12311 +ibmrisc 0 1 6.957497 0.000000 12312 +hawblitzel 0 1 6.957497 0.000000 12313 +ieeesupercomput 0 1 6.957497 0.000000 12314 +spiteof 0 1 6.957497 0.000000 12315 +scommun 0 1 6.957497 0.000000 12316 +inferior 0 1 6.957497 0.000000 12317 +tmccm 0 1 6.957497 0.000000 12318 +standardmessag 0 1 6.957497 0.000000 12319 +tooffer 0 1 6.957497 0.000000 12320 +networkadapt 0 1 6.957497 0.000000 12321 +yieldsa 0 1 6.957497 0.000000 12322 +communicationsubstr 0 1 6.957497 0.000000 12323 +cbenchmark 0 1 6.957497 0.000000 12324 +lowmessag 0 1 6.957497 0.000000 12325 +compens 0 1 6.957497 0.000000 12326 +networklat 0 1 6.957497 0.000000 12327 +availablempich 0 1 6.957497 0.000000 12328 +implementationbenchmark 0 1 6.957497 0.000000 12329 +firmwar 0 1 6.957497 0.000000 12330 +butdo 0 1 6.957497 0.000000 12331 +assumefamiliar 0 1 6.957497 0.000000 12332 +mainperform 0 1 6.957497 0.000000 12333 +timeof 0 1 6.957497 0.000000 12334 +smessag 0 1 6.957497 0.000000 12335 +theu 0 1 6.957497 0.000000 12336 +themeiko 0 1 6.957497 0.000000 12337 +thehpam 0 1 6.957497 0.000000 12338 +fddi 0 1 6.957497 0.000000 12339 +theparagon 0 1 6.957497 0.000000 12340 +thesp 0 1 6.957497 0.000000 12341 +networksus 0 1 6.957497 0.000000 12342 +anyndia 0 1 6.957497 0.000000 12343 +ascompar 0 1 6.957497 0.000000 12344 +anatm 0 1 6.957497 0.000000 12345 +systemsoftwar 0 1 6.957497 0.000000 12346 +streamcommun 0 1 6.957497 0.000000 12347 +flowcontrol 0 1 6.957497 0.000000 12348 +builtfrom 0 1 6.957497 0.000000 12349 +artmultiprocessor 0 1 6.957497 0.000000 12350 +systemcoordin 0 1 6.957497 0.000000 12351 +andrequir 0 1 6.957497 0.000000 12352 +clusterinterconnect 0 1 6.957497 0.000000 12353 +showappl 0 1 6.957497 0.000000 12354 +smallmessag 0 1 6.957497 0.000000 12355 +messagesimplement 0 1 6.957497 0.000000 12356 +abstractth 0 1 6.957497 0.000000 12357 +overlapcomput 0 1 6.957497 0.000000 12358 +sacrificingprocessor 0 1 6.957497 0.000000 12359 +passingmultiprocessor 0 1 6.957497 0.000000 12360 +researchprototyp 0 1 6.957497 0.000000 12361 +communicationoverhead 0 1 6.957497 0.000000 12362 +simplecommun 0 1 6.957497 0.000000 12363 +isintrins 0 1 6.957497 0.000000 12364 +thehardwar 0 1 6.957497 0.000000 12365 +ncube 0 1 6.957497 0.000000 12366 +memoryextens 0 1 6.957497 0.000000 12367 +messagesar 0 1 6.957497 0.000000 12368 +forwhich 0 1 6.957497 0.000000 12369 +hardwaresupport 0 1 6.957497 0.000000 12370 +ofenhanc 0 1 6.957497 0.000000 12371 +efficientcommun 0 1 6.957497 0.000000 12372 +sitesact 0 1 6.957497 0.000000 12373 +messagesin 0 1 6.957497 0.000000 12374 +projectfor 0 1 6.957497 0.000000 12375 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..7f37bc85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +wide 0 84 2.484907 0.000000 185 +chang 0 82 2.484907 0.000000 163 +resourc 0 81 2.484907 0.000000 172 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +plan 0 65 2.772589 0.000000 272 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +creat 0 63 2.772589 0.000000 277 +dept 0 64 2.772589 0.000000 291 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +explor 0 58 2.890372 0.000000 324 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +variou 0 56 2.890372 0.000000 317 +approach 0 48 3.044522 0.000000 366 +featur 0 46 3.091042 0.000000 386 +electron 0 47 3.091042 0.000000 379 +possibl 0 47 3.091042 0.000000 378 +offer 0 43 3.178054 0.000000 414 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +might 0 41 3.218876 0.000000 426 +autom 0 41 3.218876 0.000000 434 +howev 0 41 3.218876 0.000000 422 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +expect 0 37 3.332205 0.000000 484 +multi 0 36 3.367296 0.000000 493 +manual 0 35 3.401197 0.000000 504 +next 0 34 3.401197 0.000000 517 +toler 0 33 3.433987 0.000000 533 +within 0 33 3.433987 0.000000 525 +fault 0 32 3.465736 0.000000 547 +extend 0 32 3.465736 0.000000 539 +rang 0 30 3.555348 0.000000 565 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +compon 0 30 3.555348 0.000000 570 +secur 0 30 3.555348 0.000000 577 +becom 0 28 3.610918 0.000000 603 +hope 0 28 3.610918 0.000000 610 +effort 0 26 3.688879 0.000000 652 +enhanc 0 26 3.688879 0.000000 644 +reliabl 0 25 3.737670 0.000000 674 +higher 0 24 3.761200 0.000000 690 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +varieti 0 22 3.850148 0.000000 740 +thu 0 21 3.912023 0.000000 773 +similar 0 21 3.912023 0.000000 771 +flexibl 0 21 3.912023 0.000000 792 +among 0 21 3.912023 0.000000 781 +toolkit 0 20 3.951244 0.000000 835 +media 0 19 4.007333 0.000000 861 +element 0 18 4.060443 0.000000 895 +speed 0 18 4.060443 0.000000 911 +failur 0 18 4.060443 0.000000 898 +commerci 0 16 4.174387 0.000000 1005 +latenc 0 16 4.174387 0.000000 993 +stock 0 16 4.174387 0.000000 1007 +permit 0 16 4.174387 0.000000 962 +remot 0 15 4.248495 0.000000 1041 +novel 0 15 4.248495 0.000000 1039 +capabl 0 15 4.248495 0.000000 1016 +transit 0 15 4.248495 0.000000 1046 +action 0 15 4.248495 0.000000 1038 +horu 0 14 4.317488 0.000000 1116 +demand 0 14 4.317488 0.000000 1073 +floor 0 14 4.317488 0.000000 1070 +signific 0 13 4.382027 0.000000 1125 +coordin 0 13 4.382027 0.000000 1182 +kenneth 0 12 4.465908 0.000000 1265 +branch 0 11 4.553877 0.000000 1318 +reness 0 11 4.553877 0.000000 1333 +extrem 0 11 4.553877 0.000000 1330 +market 0 11 4.553877 0.000000 1361 +impact 0 11 4.553877 0.000000 1334 +prior 0 10 4.653960 0.000000 1438 +success 0 10 4.653960 0.000000 1390 +traffic 0 10 4.653960 0.000000 1421 +birman 0 9 4.753590 0.000000 1531 +robbert 0 9 4.753590 0.000000 1529 +telecommun 0 9 4.753590 0.000000 1565 +govern 0 9 4.753590 0.000000 1581 +occur 0 9 4.753590 0.000000 1572 +manufactur 0 8 4.875197 0.000000 1634 +illustr 0 8 4.875197 0.000000 1679 +synchroni 0 7 5.010635 0.000000 1923 +ground 0 7 5.010635 0.000000 1955 +privaci 0 6 5.164786 0.000000 2144 +benefit 0 6 5.164786 0.000000 2213 +isi 0 5 5.347108 0.000000 2443 +matur 0 5 5.347108 0.000000 2269 +isth 0 5 5.347108 0.000000 2532 +licens 0 5 5.347108 0.000000 2520 +mission 0 5 5.347108 0.000000 2465 +respond 0 5 5.347108 0.000000 2354 +substanti 0 4 5.568345 0.000000 2921 +visibl 0 4 5.568345 0.000000 2994 +naval 0 4 5.568345 0.000000 2920 +rapidli 0 4 5.568345 0.000000 2850 +militari 0 3 5.857933 0.000000 3326 +reconfigur 0 3 5.857933 0.000000 3556 +ofhoru 0 2 6.263398 0.000000 5181 +offersa 0 2 6.263398 0.000000 4071 +securityand 0 2 6.263398 0.000000 5066 +retain 0 2 6.263398 0.000000 5443 +basedcommun 0 2 6.263398 0.000000 4348 +stratu 0 2 6.263398 0.000000 5345 +isdescrib 0 2 6.263398 0.000000 5444 +groupwar 0 2 6.263398 0.000000 4857 +theatr 0 2 6.263398 0.000000 5173 +environmenthoru 0 1 6.957497 0.000000 12408 +shoru 0 1 6.957497 0.000000 12409 +reliabledistribut 0 1 6.957497 0.000000 12410 +demonstrategroupwar 0 1 6.957497 0.000000 12411 +foundto 0 1 6.957497 0.000000 12412 +synchronousprocess 0 1 6.957497 0.000000 12413 +importantresearch 0 1 6.957497 0.000000 12414 +performancer 0 1 6.957497 0.000000 12415 +calledact 0 1 6.957497 0.000000 12416 +messageswith 0 1 6.957497 0.000000 12417 +playbacksystem 0 1 6.957497 0.000000 12418 +calledcontinu 0 1 6.957497 0.000000 12419 +multimediaserv 0 1 6.957497 0.000000 12420 +telemedicin 0 1 6.957497 0.000000 12421 +videoon 0 1 6.957497 0.000000 12422 +andsecur 0 1 6.957497 0.000000 12423 +expectrapid 0 1 6.957497 0.000000 12424 +uptak 0 1 6.957497 0.000000 12425 +spana 0 1 6.957497 0.000000 12426 +financialtrad 0 1 6.957497 0.000000 12427 +factori 0 1 6.957497 0.000000 12428 +fordiscret 0 1 6.957497 0.000000 12429 +beingexplor 0 1 6.957497 0.000000 12430 +othernon 0 1 6.957497 0.000000 12431 +hiper 0 1 6.957497 0.000000 12432 +systemthat 0 1 6.957497 0.000000 12433 +aegi 0 1 6.957497 0.000000 12434 +battleradar 0 1 6.957497 0.000000 12435 +benefitfrom 0 1 6.957497 0.000000 12436 +migrateisi 0 1 6.957497 0.000000 12437 +communityin 0 1 6.957497 0.000000 12438 +agreementswith 0 1 6.957497 0.000000 12439 +subsidiari 0 1 6.957497 0.000000 12440 +mixtur 0 1 6.957497 0.000000 12441 +technologieswil 0 1 6.957497 0.000000 12442 +beseen 0 1 6.957497 0.000000 12443 +belowshow 0 1 6.957497 0.000000 12444 +andus 0 1 6.957497 0.000000 12445 +asset 0 1 6.957497 0.000000 12446 +thissort 0 1 6.957497 0.000000 12447 +utmost 0 1 6.957497 0.000000 12448 +whilealso 0 1 6.957497 0.000000 12449 +civilianand 0 1 6.957497 0.000000 12450 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..6ee82053 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +group 0 183 1.609438 0.000000 36 +applic 1 170 1.791759 1.791759 56 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +level 0 87 2.484907 0.000000 180 +build 0 85 2.484907 0.000000 184 +multimedia 1 68 2.708050 2.708050 258 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +improv 0 62 2.772589 0.000000 289 +approach 0 48 3.044522 0.000000 366 +adapt 0 46 3.091042 0.000000 387 +video 0 44 3.135494 0.000000 405 +combin 0 42 3.218876 0.000000 421 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +secur 0 30 3.555348 0.000000 577 +platform 0 29 3.583519 0.000000 591 +cluster 0 28 3.610918 0.000000 612 +reliabl 0 25 3.737670 0.000000 674 +store 0 24 3.761200 0.000000 693 +flexibl 0 21 3.912023 0.000000 792 +fund 0 21 3.912023 0.000000 805 +toolkit 0 20 3.951244 0.000000 835 +portabl 0 20 3.951244 0.000000 819 +smith 0 20 3.951244 0.000000 820 +media 0 19 4.007333 0.000000 861 +critic 0 16 4.174387 0.000000 982 +commerci 0 16 4.174387 0.000000 1005 +horu 0 14 4.317488 0.000000 1116 +audio 0 14 4.317488 0.000000 1094 +eicken 0 13 4.382027 0.000000 1134 +primit 0 11 4.553877 0.000000 1317 +facilit 0 10 4.653960 0.000000 1412 +rapid 0 10 4.653960 0.000000 1453 +strength 0 9 4.753590 0.000000 1494 +transport 0 8 4.875197 0.000000 1672 +fromth 0 7 5.010635 0.000000 1802 +contract 0 6 5.164786 0.000000 1985 +testb 0 5 5.347108 0.000000 2456 +darpa 0 4 5.568345 0.000000 2944 +dramat 0 3 5.857933 0.000000 3239 +magnitud 0 3 5.857933 0.000000 3582 +militari 0 3 5.857933 0.000000 3326 +multimediaappl 0 3 5.857933 0.000000 3274 +todevelop 0 2 6.263398 0.000000 5448 +communicationprimit 0 2 6.263398 0.000000 5449 +thorstenvon 0 2 6.263398 0.000000 5450 +medianet 0 1 6.957497 0.000000 12468 +projectmedianet 0 1 6.957497 0.000000 12469 +protocolsth 0 1 6.957497 0.000000 12470 +communicationmak 0 1 6.957497 0.000000 12471 +foradvanc 0 1 6.957497 0.000000 12472 +includeaudio 0 1 6.957497 0.000000 12473 +technologyofficefor 0 1 6.957497 0.000000 12474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..a771fa5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +introduct 0 126 2.079442 0.000000 87 +document 0 121 2.079442 0.000000 89 +theori 0 111 2.197225 0.000000 127 +user 0 104 2.302585 0.000000 137 +mani 0 92 2.397895 0.000000 150 +help 0 83 2.484907 0.000000 175 +main 0 67 2.708050 0.000000 256 +written 0 63 2.772589 0.000000 278 +browser 0 56 2.890372 0.000000 313 +reason 0 57 2.890372 0.000000 318 +index 0 56 2.890372 0.000000 309 +suggest 0 53 2.944439 0.000000 331 +autom 0 41 3.218876 0.000000 434 +announc 0 40 3.258097 0.000000 441 +articl 0 33 3.433987 0.000000 530 +linux 0 27 3.637586 0.000000 631 +theorem 0 21 3.912023 0.000000 786 +feedback 0 19 4.007333 0.000000 854 +nuprl 1 10 4.653960 4.653960 1402 +curiou 0 5 5.347108 0.000000 2541 +vaughn 0 1 6.957497 0.000000 12475 +askaltavista 0 1 6.957497 0.000000 12476 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..bffb66c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +commun 0 95 2.397895 0.000000 157 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +section 0 94 2.397895 0.000000 149 +mani 0 92 2.397895 0.000000 150 +activ 0 84 2.484907 0.000000 182 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +contain 0 81 2.484907 0.000000 174 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +sourc 0 77 2.564949 0.000000 201 +want 0 79 2.564949 0.000000 199 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +addit 0 74 2.639057 0.000000 228 +main 0 67 2.708050 0.000000 256 +experi 0 64 2.772589 0.000000 283 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +locat 0 59 2.833213 0.000000 303 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +space 0 57 2.890372 0.000000 310 +variou 0 56 2.890372 0.000000 317 +local 0 55 2.944439 0.000000 334 +found 0 53 2.944439 0.000000 337 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +sampl 0 53 2.944439 0.000000 339 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +run 0 51 2.995732 0.000000 347 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +execut 0 45 3.135494 0.000000 404 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +must 0 40 3.258097 0.000000 442 +continu 0 39 3.258097 0.000000 448 +open 0 38 3.295837 0.000000 469 +return 0 34 3.401197 0.000000 502 +global 0 34 3.401197 0.000000 520 +word 0 34 3.401197 0.000000 508 +built 0 29 3.583519 0.000000 592 +releas 0 28 3.610918 0.000000 616 +pass 0 28 3.610918 0.000000 611 +though 0 27 3.637586 0.000000 622 +proc 0 26 3.688879 0.000000 649 +frame 0 24 3.761200 0.000000 684 +displai 0 23 3.806662 0.000000 712 +instal 0 22 3.850148 0.000000 754 +instead 0 22 3.850148 0.000000 756 +path 0 21 3.912023 0.000000 778 +portabl 0 20 3.951244 0.000000 819 +sure 0 20 3.951244 0.000000 813 +debug 0 17 4.110874 0.000000 944 +regular 0 17 4.110874 0.000000 929 +stop 0 17 4.110874 0.000000 942 +layer 0 17 4.110874 0.000000 926 +remot 0 15 4.248495 0.000000 1041 +configur 0 15 4.248495 0.000000 1012 +overhead 0 15 4.248495 0.000000 1035 +fortran 0 15 4.248495 0.000000 1027 +split 1 14 4.317488 4.317488 1078 +command 0 14 4.317488 0.000000 1083 +matlab 0 14 4.317488 0.000000 1081 +script 0 13 4.382027 0.000000 1171 +step 0 13 4.382027 0.000000 1138 +emac 0 13 4.382027 0.000000 1143 +difficulti 0 13 4.382027 0.000000 1132 +remov 0 12 4.465908 0.000000 1225 +insid 0 12 4.465908 0.000000 1262 +characterist 0 12 4.465908 0.000000 1257 +replic 0 12 4.465908 0.000000 1231 +node 0 11 4.553877 0.000000 1326 +statement 0 11 4.553877 0.000000 1313 +eight 0 11 4.553877 0.000000 1331 +bandwidth 0 11 4.553877 0.000000 1365 +enter 0 10 4.653960 0.000000 1454 +stack 0 10 4.653960 0.000000 1389 +login 0 9 4.753590 0.000000 1550 +informationabout 0 9 4.753590 0.000000 1515 +readm 0 8 4.875197 0.000000 1699 +job 0 8 4.875197 0.000000 1702 +insert 0 8 4.875197 0.000000 1687 +round 0 8 4.875197 0.000000 1769 +header 0 7 5.010635 0.000000 1787 +hit 0 7 5.010635 0.000000 1965 +attach 0 7 5.010635 0.000000 1785 +exactli 0 7 5.010635 0.000000 1817 +usag 0 6 5.164786 0.000000 2209 +neither 0 6 5.164786 0.000000 1990 +phase 0 6 5.164786 0.000000 1977 +onto 0 6 5.164786 0.000000 2089 +proce 0 6 5.164786 0.000000 2114 +nativ 0 6 5.164786 0.000000 2192 +whichi 0 6 5.164786 0.000000 2056 +shell 0 5 5.347108 0.000000 2353 +overlap 0 5 5.347108 0.000000 2368 +theth 0 5 5.347108 0.000000 2325 +czar 0 5 5.347108 0.000000 2503 +cuc 0 4 5.568345 0.000000 2630 +makefil 0 4 5.568345 0.000000 2662 +spam 0 4 5.568345 0.000000 2927 +arch 0 4 5.568345 0.000000 2995 +forparallel 0 4 5.568345 0.000000 2703 +hide 0 4 5.568345 0.000000 2996 +commonli 0 4 5.568345 0.000000 2877 +asymptot 0 4 5.568345 0.000000 2676 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +andyou 0 3 5.857933 0.000000 3256 +setenv 0 2 6.263398 0.000000 4491 +haveth 0 2 6.263398 0.000000 5378 +properli 0 2 6.263398 0.000000 5454 +animplement 0 2 6.263398 0.000000 4931 +granita 0 1 6.957497 0.000000 12493 +bench 0 1 6.957497 0.000000 12494 +am_run 0 1 6.957497 0.000000 12495 +tcsh 0 1 6.957497 0.000000 12496 +bash 0 1 6.957497 0.000000 12497 +gmake 0 1 6.957497 0.000000 12498 +ampicc 0 1 6.957497 0.000000 12499 +granitathrough 0 1 6.957497 0.000000 12500 +asinteract 0 1 6.957497 0.000000 12501 +problemsdur 0 1 6.957497 0.000000 12502 +stufffrom 0 1 6.957497 0.000000 12503 +unam 0 1 6.957497 0.000000 12504 +manyou 0 1 6.957497 0.000000 12505 +infoexplor 0 1 6.957497 0.000000 12506 +commandsand 0 1 6.957497 0.000000 12507 +activemassag 0 1 6.957497 0.000000 12508 +peor 0 1 6.957497 0.000000 12509 +messagesor 0 1 6.957497 0.000000 12510 +homegrown 0 1 6.957497 0.000000 12511 +softwarein 0 1 6.957497 0.000000 12512 +besur 0 1 6.957497 0.000000 12513 +csplit 0 1 6.957497 0.000000 12514 +globalpoint 0 1 6.957497 0.000000 12515 +dereferenc 0 1 6.957497 0.000000 12516 +latencyof 0 1 6.957497 0.000000 12517 +shellsshould 0 1 6.957497 0.000000 12518 +asact 0 1 6.957497 0.000000 12519 +scriptsloc 0 1 6.957497 0.000000 12520 +programfoo 0 1 6.957497 0.000000 12521 +foodebug 0 1 6.957497 0.000000 12522 +splitc_debug 0 1 6.957497 0.000000 12523 +aftersplitc_main 0 1 6.957497 0.000000 12524 +ongranita 0 1 6.957497 0.000000 12525 +youwant 0 1 6.957497 0.000000 12526 +thenattach 0 1 6.957497 0.000000 12527 +breakpoint 0 1 6.957497 0.000000 12528 +messagesact 0 1 6.957497 0.000000 12529 +layerthat 0 1 6.957497 0.000000 12530 +triplat 0 1 6.957497 0.000000 12531 +libspgam 0 1 6.957497 0.000000 12532 +aand 0 1 6.957497 0.000000 12533 +beforerun 0 1 6.957497 0.000000 12534 +runningprgm 0 1 6.957497 0.000000 12535 +mpimpi 0 1 6.957497 0.000000 12536 +popularmessag 0 1 6.957497 0.000000 12537 +mpich 0 1 6.957497 0.000000 12538 +overact 0 1 6.957497 0.000000 12539 +easiest 0 1 6.957497 0.000000 12540 +fooyou 0 1 6.957497 0.000000 12541 +lookat 0 1 6.957497 0.000000 12542 +examplesin 0 1 6.957497 0.000000 12543 +ampi 0 1 6.957497 0.000000 12544 +likeordinari 0 1 6.957497 0.000000 12545 +softwaresoftwar 0 1 6.957497 0.000000 12546 +xpdbx 0 1 6.957497 0.000000 12547 +bison 0 1 6.957497 0.000000 12548 +problemsif 0 1 6.957497 0.000000 12549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..9fa5f82e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +model 1 145 1.945910 1.945910 69 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +peopl 0 96 2.302585 0.000000 132 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +activ 0 84 2.484907 0.000000 182 +environ 0 84 2.484907 0.000000 177 +method 0 80 2.564949 0.000000 213 +simul 0 66 2.708050 0.000000 255 +creat 0 63 2.772589 0.000000 277 +overview 0 56 2.890372 0.000000 323 +direct 0 57 2.890372 0.000000 316 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +numer 0 49 3.044522 0.000000 369 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +discuss 0 45 3.135494 0.000000 399 +paul 0 38 3.295837 0.000000 471 +collabor 0 32 3.465736 0.000000 543 +transform 0 32 3.465736 0.000000 542 +richard 0 31 3.496508 0.000000 559 +compon 0 30 3.555348 0.000000 570 +semant 0 29 3.583519 0.000000 587 +propos 0 28 3.610918 0.000000 602 +effort 0 26 3.688879 0.000000 652 +defin 0 22 3.850148 0.000000 746 +geometri 0 22 3.850148 0.000000 752 +reduc 0 22 3.850148 0.000000 759 +qualiti 0 20 3.951244 0.000000 832 +longer 0 20 3.951244 0.000000 816 +particularli 0 19 4.007333 0.000000 867 +geometr 0 19 4.007333 0.000000 852 +exercis 0 19 4.007333 0.000000 842 +brief 0 16 4.174387 0.000000 1001 +topolog 0 14 4.317488 0.000000 1089 +mesh 0 11 4.553877 0.000000 1351 +guarante 0 10 4.653960 0.000000 1391 +rais 0 8 4.875197 0.000000 1711 +manufactur 0 8 4.875197 0.000000 1634 +thegoal 0 6 5.164786 0.000000 2033 +proce 0 6 5.164786 0.000000 2114 +synthes 0 5 5.347108 0.000000 2451 +weyl 0 4 5.568345 0.000000 2854 +substrat 0 4 5.568345 0.000000 2857 +nist 0 4 5.568345 0.000000 2973 +zippel 0 4 5.568345 0.000000 2879 +theus 0 4 5.568345 0.000000 2992 +rick 0 4 5.568345 0.000000 2646 +chew 0 3 5.857933 0.000000 3618 +enorm 0 3 5.857933 0.000000 3431 +expend 0 2 6.263398 0.000000 5451 +scientificsoftwar 0 2 6.263398 0.000000 5038 +andform 0 2 6.263398 0.000000 4274 +levelprogram 0 2 6.263398 0.000000 5452 +insystem 0 2 6.263398 0.000000 4172 +ideason 0 2 6.263398 0.000000 4469 +microstorag 0 2 6.263398 0.000000 4887 +palmer 0 2 6.263398 0.000000 5453 +simlab 0 1 6.957497 0.000000 12477 +oncomplex 0 1 6.957497 0.000000 12478 +bringingtogeth 0 1 6.957497 0.000000 12479 +symbolicmathemat 0 1 6.957497 0.000000 12480 +levelat 0 1 6.957497 0.000000 12481 +softwarepackag 0 1 6.957497 0.000000 12482 +microstoragearchitectur 0 1 6.957497 0.000000 12483 +computeralgebra 0 1 6.957497 0.000000 12484 +thechain 0 1 6.957497 0.000000 12485 +thearpa 0 1 6.957497 0.000000 12486 +madefast 0 1 6.957497 0.000000 12487 +ofnon 0 1 6.957497 0.000000 12488 +contemporan 0 1 6.957497 0.000000 12489 +chainsprogram 0 1 6.957497 0.000000 12490 +complextopolog 0 1 6.957497 0.000000 12491 +numericalalgorithm 0 1 6.957497 0.000000 12492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..24cae583 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +contact 0 153 1.791759 0.000000 59 +code 0 108 2.197225 0.000000 116 +memori 0 101 2.302585 0.000000 139 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +sourc 0 77 2.564949 0.000000 201 +share 0 59 2.833213 0.000000 304 +releas 0 28 3.610918 0.000000 616 +multiprocessor 0 28 3.610918 0.000000 605 +berkelei 0 26 3.688879 0.000000 657 +supercomput 0 25 3.737670 0.000000 681 +prepar 0 20 3.951244 0.000000 824 +split 1 14 4.317488 4.317488 1078 +eicken 0 13 4.382027 0.000000 1134 +thorsten 0 13 4.382027 0.000000 1133 +chao 0 8 4.875197 0.000000 1753 +goldstein 0 6 5.164786 0.000000 2168 +culler 0 5 5.347108 0.000000 2381 +spam 0 4 5.568345 0.000000 2927 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +neta 0 3 5.857933 0.000000 3789 +dusseau 0 3 5.857933 0.000000 3382 +yelick 0 3 5.857933 0.000000 3374 +thorstenvon 0 2 6.263398 0.000000 5450 +multiprocessorsa 0 2 6.263398 0.000000 5455 +krishnamurthi 0 2 6.263398 0.000000 5408 +lumetta 0 2 6.263398 0.000000 5409 +contactthorsten 0 2 6.263398 0.000000 5438 +ccornel 0 1 6.957497 0.000000 12550 +implementationssplit 0 1 6.957497 0.000000 12551 +isimpl 0 1 6.957497 0.000000 12552 +messagesfor 0 1 6.957497 0.000000 12553 +ofsplit 0 1 6.957497 0.000000 12554 +distr 0 1 6.957497 0.000000 12555 +implementedon 0 1 6.957497 0.000000 12556 +contactchi 0 1 6.957497 0.000000 12557 +runningsolari 0 1 6.957497 0.000000 12558 +mattwelsh 0 1 6.957497 0.000000 12559 +cparallel 0 1 6.957497 0.000000 12560 +abstractproject 0 1 6.957497 0.000000 12561 +sitessplit 0 1 6.957497 0.000000 12562 +chome 0 1 6.957497 0.000000 12563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..b61312f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +cornel 1 215 1.386294 1.386294 23 +second 1 81 2.484907 2.484907 166 +html 1 75 2.639057 2.639057 235 +browser 1 56 2.890372 2.890372 313 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +default 1 5 5.347108 5.347108 2335 +redirect 1 1 6.957497 6.957497 12564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..dda55510 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,221 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +intern 0 108 2.197225 0.000000 128 +pleas 0 113 2.197225 0.000000 114 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +assist 0 112 2.197225 0.000000 113 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +associ 0 93 2.397895 0.000000 151 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +ieee 0 86 2.484907 0.000000 190 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +server 0 76 2.564949 0.000000 204 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +laboratori 0 63 2.772589 0.000000 292 +prof 0 64 2.772589 0.000000 273 +foundat 0 62 2.772589 0.000000 286 +ithaca 0 65 2.772589 0.000000 294 +complex 0 64 2.772589 0.000000 269 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +abstract 0 62 2.772589 0.000000 276 +automat 0 61 2.833213 0.000000 306 +locat 0 59 2.833213 0.000000 303 +type 0 61 2.833213 0.000000 296 +direct 0 57 2.890372 0.000000 316 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +three 0 54 2.944439 0.000000 330 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +approach 0 48 3.044522 0.000000 366 +done 0 47 3.091042 0.000000 381 +move 0 47 3.091042 0.000000 382 +video 0 44 3.135494 0.000000 405 +discuss 0 45 3.135494 0.000000 399 +mechan 0 43 3.178054 0.000000 416 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +submit 0 39 3.258097 0.000000 440 +author 0 39 3.258097 0.000000 450 +microsoft 0 38 3.295837 0.000000 468 +robot 1 36 3.367296 3.367296 497 +global 0 34 3.401197 0.000000 520 +tech 0 35 3.401197 0.000000 515 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +titl 0 31 3.496508 0.000000 556 +hard 0 30 3.555348 0.000000 563 +full 0 28 3.610918 0.000000 615 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +hope 0 28 3.610918 0.000000 610 +pass 0 28 3.610918 0.000000 611 +manipul 0 27 3.637586 0.000000 624 +arrai 0 27 3.637586 0.000000 627 +proc 0 26 3.688879 0.000000 649 +revis 0 26 3.688879 0.000000 640 +detect 0 26 3.688879 0.000000 646 +task 0 25 3.737670 0.000000 678 +greg 0 24 3.761200 0.000000 695 +motion 0 24 3.761200 0.000000 699 +frame 0 24 3.761200 0.000000 684 +thank 0 23 3.806662 0.000000 721 +mobil 0 23 3.806662 0.000000 730 +break 0 20 3.951244 0.000000 812 +supervis 0 20 3.951244 0.000000 840 +mpeg 0 20 3.951244 0.000000 831 +scheme 0 20 3.951244 0.000000 818 +boston 0 19 4.007333 0.000000 862 +scott 0 18 4.060443 0.000000 884 +miller 0 17 4.110874 0.000000 949 +match 0 16 4.174387 0.000000 965 +diego 0 16 4.174387 0.000000 992 +micro 0 15 4.248495 0.000000 1031 +configur 0 15 4.248495 0.000000 1012 +massiv 0 15 4.248495 0.000000 1026 +scene 0 14 4.317488 0.000000 1114 +split 0 14 4.317488 0.000000 1078 +anonym 0 14 4.317488 0.000000 1100 +canada 0 13 4.382027 0.000000 1158 +daniel 0 12 4.465908 0.000000 1233 +bruce 0 12 4.465908 0.000000 1226 +franc 0 12 4.465908 0.000000 1276 +tour 0 11 4.553877 0.000000 1307 +peter 0 11 4.553877 0.000000 1316 +keyword 0 11 4.553877 0.000000 1356 +donald 0 9 4.753590 0.000000 1510 +transmiss 0 9 4.753590 0.000000 1588 +juan 0 9 4.753590 0.000000 1580 +wilson 0 9 4.753590 0.000000 1536 +invari 0 8 4.875197 0.000000 1748 +potenti 0 8 4.875197 0.000000 1690 +japan 0 8 4.875197 0.000000 1762 +siggraph 0 8 4.875197 0.000000 1773 +justin 0 7 5.010635 0.000000 1789 +ramin 0 7 5.010635 0.000000 1820 +sensor 0 7 5.010635 0.000000 1920 +zabih 0 6 5.164786 0.000000 2138 +huttenloch 0 6 5.164786 0.000000 1983 +actuat 0 5 5.347108 0.000000 2442 +solid 0 5 5.347108 0.000000 2255 +symmetr 0 4 5.568345 0.000000 2908 +publicationsth 0 4 5.568345 0.000000 2859 +triangul 0 4 5.568345 0.000000 2903 +csrvl 0 3 5.857933 0.000000 3543 +bhringer 0 3 5.857933 0.000000 3606 +microfabr 0 3 5.857933 0.000000 3610 +jen 0 3 5.857933 0.000000 3378 +ryan 0 3 5.857933 0.000000 3679 +michel 0 3 5.857933 0.000000 3791 +szewczyk 0 3 5.857933 0.000000 3108 +voskuhl 0 3 5.857933 0.000000 3109 +matt 0 3 5.857933 0.000000 3792 +electro 0 2 6.263398 0.000000 5014 +mihailovich 0 2 6.263398 0.000000 5018 +macdonald 0 2 6.263398 0.000000 5006 +laboratorywelcom 0 2 6.263398 0.000000 5439 +mem 0 2 6.263398 0.000000 5007 +andclassif 0 2 6.263398 0.000000 5390 +windowsnt 0 2 6.263398 0.000000 5440 +ree 0 2 6.263398 0.000000 5009 +automationnic 0 2 6.263398 0.000000 5019 +homolog 0 2 6.263398 0.000000 5441 +andj 0 2 6.263398 0.000000 5020 +latomb 0 2 6.263398 0.000000 5021 +brigg 0 2 6.263398 0.000000 5008 +actuatorarrai 0 2 6.263398 0.000000 5017 +hing 0 2 6.263398 0.000000 5442 +montral 0 2 6.263398 0.000000 5394 +cytacki 0 2 6.263398 0.000000 5330 +aaron 0 2 6.263398 0.000000 4438 +csrvlcornel 0 1 6.957497 0.000000 12376 +nich 0 1 6.957497 0.000000 12377 +rrentli 0 1 6.957497 0.000000 12378 +ofresearch 0 1 6.957497 0.000000 12379 +pictor 0 1 6.957497 0.000000 12380 +projectsth 0 1 6.957497 0.000000 12381 +byramin 0 1 6.957497 0.000000 12382 +allowingscen 0 1 6.957497 0.000000 12383 +onplatform 0 1 6.957497 0.000000 12384 +nynet 0 1 6.957497 0.000000 12385 +foru 0 1 6.957497 0.000000 12386 +sproject 0 1 6.957497 0.000000 12387 +currentlyconsid 0 1 6.957497 0.000000 12388 +theissuesher 0 1 6.957497 0.000000 12389 +thecsrvl 0 1 6.957497 0.000000 12390 +serverar 0 1 6.957497 0.000000 12391 +sensorless 0 1 6.957497 0.000000 12392 +oiso 0 1 6.957497 0.000000 12393 +micromechan 0 1 6.957497 0.000000 12394 +quebc 0 1 6.957497 0.000000 12395 +authorthes 0 1 6.957497 0.000000 12396 +pedro 0 1 6.957497 0.000000 12397 +felzenszwalb 0 1 6.957497 0.000000 12398 +lilien 0 1 6.957497 0.000000 12399 +maharbiz 0 1 6.957497 0.000000 12400 +scharstein 0 1 6.957497 0.000000 12401 +stump 0 1 6.957497 0.000000 12402 +fernando 0 1 6.957497 0.000000 12403 +viton 0 1 6.957497 0.000000 12404 +wayt 0 1 6.957497 0.000000 12405 +welsh 0 1 6.957497 0.000000 12406 +whelan 0 1 6.957497 0.000000 12407 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..6e59a655 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +fall 0 181 1.609438 0.000000 40 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +construct 0 139 1.945910 0.000000 82 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +manag 0 114 2.197225 0.000000 125 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +build 0 85 2.484907 0.000000 184 +server 0 76 2.564949 0.000000 204 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +knowledg 0 67 2.708050 0.000000 243 +collect 0 65 2.772589 0.000000 268 +locat 0 59 2.833213 0.000000 303 +overview 0 56 2.890372 0.000000 323 +found 0 53 2.944439 0.000000 337 +undergradu 0 54 2.944439 0.000000 338 +digit 0 52 2.995732 0.000000 348 +form 0 39 3.258097 0.000000 443 +john 0 33 3.433987 0.000000 532 +computersci 0 30 3.555348 0.000000 562 +determin 0 27 3.637586 0.000000 630 +consist 0 26 3.688879 0.000000 651 +util 0 21 3.912023 0.000000 774 +increas 0 20 3.951244 0.000000 829 +longer 0 20 3.951244 0.000000 816 +ever 0 19 4.007333 0.000000 872 +dean 0 14 4.317488 0.000000 1104 +captur 0 12 4.465908 0.000000 1232 +volum 0 11 4.553877 0.000000 1347 +explicit 0 9 4.753590 0.000000 1525 +extract 0 8 4.875197 0.000000 1728 +begun 0 5 5.347108 0.000000 2386 +allan 0 4 5.568345 0.000000 2849 +thisdocu 0 3 5.857933 0.000000 3336 +krafft 0 3 5.857933 0.000000 3638 +waysthat 0 2 6.263398 0.000000 5445 +tabular 0 2 6.263398 0.000000 4515 +informationag 0 2 6.263398 0.000000 5446 +hyperlink 0 2 6.263398 0.000000 5447 +oninform 0 2 6.263398 0.000000 4316 +projectinform 0 1 6.957497 0.000000 12451 +accessth 0 1 6.957497 0.000000 12452 +ofonlin 0 1 6.957497 0.000000 12453 +forhuman 0 1 6.957497 0.000000 12454 +hopcroft 0 1 6.957497 0.000000 12455 +davisin 0 1 6.957497 0.000000 12456 +researchextract 0 1 6.957497 0.000000 12457 +thestructur 0 1 6.957497 0.000000 12458 +extractinginform 0 1 6.957497 0.000000 12459 +collectionsof 0 1 6.957497 0.000000 12460 +nationwid 0 1 6.957497 0.000000 12461 +sciencetechn 0 1 6.957497 0.000000 12462 +moreaccess 0 1 6.957497 0.000000 12463 +toit 0 1 6.957497 0.000000 12464 +visitingscientist 0 1 6.957497 0.000000 12465 +jimdavi 0 1 6.957497 0.000000 12466 +jrdpublicationsjam 0 1 6.957497 0.000000 12467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..11b09b5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +develop 0 174 1.791759 0.000000 53 +peopl 0 96 2.302585 0.000000 132 +multimedia 1 68 2.708050 2.708050 258 +direct 0 57 2.890372 0.000000 316 +mission 0 5 5.347108 0.000000 2465 +zeno 0 3 5.857933 0.000000 3580 +potpourri 0 2 6.263398 0.000000 4547 +groupzeno 0 1 6.957497 0.000000 12565 +curricula 0 1 6.957497 0.000000 12566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..53be869f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +topic 0 114 2.197225 0.000000 110 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +control 0 82 2.484907 0.000000 164 +stuff 0 87 2.484907 0.000000 171 +come 0 78 2.564949 0.000000 202 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +degre 0 69 2.708050 0.000000 259 +receiv 0 66 2.708050 0.000000 244 +ithaca 0 65 2.772589 0.000000 294 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +januari 0 62 2.772589 0.000000 264 +back 0 60 2.833213 0.000000 297 +date 0 51 2.995732 0.000000 344 +still 0 50 3.044522 0.000000 362 +california 0 46 3.091042 0.000000 388 +video 0 44 3.135494 0.000000 405 +long 0 43 3.178054 0.000000 413 +york 0 41 3.218876 0.000000 435 +vision 0 41 3.218876 0.000000 430 +compani 0 41 3.218876 0.000000 423 +robot 0 36 3.367296 0.000000 497 +game 0 36 3.367296 0.000000 498 +actual 0 28 3.610918 0.000000 604 +quit 0 27 3.637586 0.000000 633 +challeng 0 26 3.688879 0.000000 653 +hill 0 25 3.737670 0.000000 670 +frame 0 24 3.761200 0.000000 684 +motion 0 24 3.761200 0.000000 699 +viewer 0 21 3.912023 0.000000 787 +leav 0 21 3.912023 0.000000 772 +martin 0 21 3.912023 0.000000 794 +left 0 19 4.007333 0.000000 851 +miss 0 19 4.007333 0.000000 866 +wind 0 18 4.060443 0.000000 908 +bachelor 0 17 4.110874 0.000000 957 +segment 0 17 4.110874 0.000000 931 +jose 0 16 4.174387 0.000000 976 +month 0 15 4.248495 0.000000 1025 +mayb 0 15 4.248495 0.000000 1014 +enough 0 15 4.248495 0.000000 1040 +anywai 0 15 4.248495 0.000000 1047 +decid 0 14 4.317488 0.000000 1075 +land 0 12 4.465908 0.000000 1273 +meng 0 12 4.465908 0.000000 1214 +pagewelcom 0 11 4.553877 0.000000 1344 +see 0 11 4.553877 0.000000 1337 +island 0 11 4.553877 0.000000 1345 +santa 0 10 4.653960 0.000000 1441 +town 0 10 4.653960 0.000000 1458 +babylon 0 8 4.875197 0.000000 1731 +ramin 0 7 5.010635 0.000000 1820 +rain 0 6 5.164786 0.000000 2137 +snow 0 6 5.164786 0.000000 2031 +zabih 0 6 5.164786 0.000000 2138 +corp 0 6 5.164786 0.000000 2139 +green 0 4 5.568345 0.000000 2848 +barbara 0 3 5.857933 0.000000 3380 +csrvl 0 3 5.857933 0.000000 3543 +binghamton 0 3 5.857933 0.000000 3544 +season 0 2 6.263398 0.000000 4872 +syosset 0 1 6.957497 0.000000 9497 +californialockhe 0 1 6.957497 0.000000 9498 +yorkaltera 0 1 6.957497 0.000000 9499 +californiafun 0 1 6.957497 0.000000 9500 +domainvth 0 1 6.957497 0.000000 9501 +siteoth 0 1 6.957497 0.000000 9502 +worldcareermosaictop 0 1 6.957497 0.000000 9503 +kmai 0 1 6.957497 0.000000 9504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..383ec768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +link 0 247 1.386294 0.000000 24 +public 1 202 1.609438 1.609438 43 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +relat 0 139 1.945910 0.000000 68 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +peopl 1 96 2.302585 2.302585 132 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +descript 0 64 2.772589 0.000000 271 +organ 0 65 2.772589 0.000000 265 +faculti 1 56 2.890372 2.890372 325 +overview 0 56 2.890372 0.000000 323 +talk 0 53 2.944439 0.000000 336 +visitor 0 49 3.044522 0.000000 371 +seminar 0 38 3.295837 0.000000 470 +staff 0 36 3.367296 0.000000 490 +utc 1 27 3.637586 3.637586 629 +alumni 0 21 3.912023 0.000000 807 +facil 0 20 3.951244 0.000000 814 +event 0 18 4.060443 0.000000 896 +catalog 0 10 4.653960 0.000000 1431 +upcom 0 8 4.875197 0.000000 1685 +calendar 0 8 4.875197 0.000000 1649 +recruit 0 6 5.164786 0.000000 2145 +admiss 0 4 5.568345 0.000000 2704 +pagegener 0 1 6.957497 0.000000 12567 +schedulespag 0 1 6.957497 0.000000 12568 +directoryth 0 1 6.957497 0.000000 12569 +universitywww 0 1 6.957497 0.000000 12570 +informationgrip 0 1 6.957497 0.000000 12571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..5ffb1548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +professor 0 137 1.945910 0.000000 76 +confer 0 126 2.079442 0.000000 100 +intern 1 108 2.197225 2.197225 128 +mathemat 0 108 2.197225 0.000000 123 +check 0 115 2.197225 0.000000 118 +well 0 109 2.197225 0.000000 121 +associ 0 93 2.397895 0.000000 151 +journal 0 83 2.484907 0.000000 183 +learn 0 86 2.484907 0.000000 170 +exampl 0 77 2.564949 0.000000 195 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +artifici 0 63 2.772589 0.000000 280 +previou 0 62 2.772589 0.000000 290 +automat 0 61 2.833213 0.000000 306 +index 0 56 2.890372 0.000000 309 +profession 0 51 2.995732 0.000000 345 +california 0 46 3.091042 0.000000 388 +third 0 43 3.178054 0.000000 412 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +societi 0 40 3.258097 0.000000 456 +award 0 34 3.401197 0.000000 523 +next 0 34 3.401197 0.000000 517 +board 0 33 3.433987 0.000000 528 +profil 0 30 3.555348 0.000000 581 +chair 0 29 3.583519 0.000000 596 +focus 0 29 3.583519 0.000000 584 +american 0 27 3.637586 0.000000 634 +berkelei 0 26 3.688879 0.000000 657 +higher 0 24 3.761200 0.000000 690 +honor 0 23 3.806662 0.000000 729 +theorem 0 21 3.912023 0.000000 786 +prove 0 19 4.007333 0.000000 848 +citi 0 19 4.007333 0.000000 874 +researchmi 0 14 4.317488 0.000000 1119 +joint 0 13 4.382027 0.000000 1130 +lake 0 11 4.553877 0.000000 1373 +distinguish 0 11 4.553877 0.000000 1357 +utah 0 9 4.753590 0.000000 1585 +centenni 0 7 5.010635 0.000000 1967 +presid 0 6 5.164786 0.000000 2196 +heurist 0 6 5.164786 0.000000 2125 +emeritu 0 5 5.347108 0.000000 2544 +salt 0 5 5.347108 0.000000 2413 +analog 0 4 5.568345 0.000000 2875 +bledso 0 4 5.568345 0.000000 2999 +presentarea 0 4 5.568345 0.000000 3026 +artificialintellig 0 3 5.857933 0.000000 3608 +truste 0 3 5.857933 0.000000 3900 +alsointerest 0 3 5.857933 0.000000 3813 +donnel 0 2 6.263398 0.000000 5233 +mileston 0 2 6.263398 0.000000 4416 +jointconfer 0 2 6.263398 0.000000 5030 +woodrow 0 1 6.957497 0.000000 14287 +bledsoepet 0 1 6.957497 0.000000 14288 +americanmathemat 0 1 6.957497 0.000000 14289 +onartifici 0 1 6.957497 0.000000 14290 +interestautomat 0 1 6.957497 0.000000 14291 +theoremproof 0 1 6.957497 0.000000 14292 +levelplan 0 1 6.957497 0.000000 14293 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..940805e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +oper 0 180 1.609438 0.000000 34 +parallel 1 169 1.791759 1.791759 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +confer 0 126 2.079442 0.000000 100 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +educ 0 86 2.484907 0.000000 191 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +activ 0 84 2.484907 0.000000 182 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +august 0 66 2.708050 0.000000 257 +practic 0 70 2.708050 0.000000 246 +interact 0 62 2.772589 0.000000 270 +previou 0 62 2.772589 0.000000 290 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +set 0 50 3.044522 0.000000 361 +york 0 41 3.218876 0.000000 435 +societi 0 40 3.258097 0.000000 456 +multipl 0 39 3.258097 0.000000 453 +formal 0 37 3.332205 0.000000 478 +committe 0 34 3.401197 0.000000 522 +concurr 0 34 3.401197 0.000000 501 +next 0 34 3.401197 0.000000 517 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +scientist 0 31 3.496508 0.000000 560 +profil 0 30 3.555348 0.000000 581 +computersci 0 30 3.555348 0.000000 562 +chair 0 29 3.583519 0.000000 596 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +debug 0 17 4.110874 0.000000 944 +brown 0 16 4.174387 0.000000 977 +researchmi 0 14 4.317488 0.000000 1119 +incomput 0 14 4.317488 0.000000 1096 +vice 0 9 4.753590 0.000000 1604 +unifi 0 8 4.875197 0.000000 1774 +newton 0 7 5.010635 0.000000 1824 +softwareengin 0 6 5.164786 0.000000 2162 +jain 0 5 5.347108 0.000000 2332 +parallelprogram 0 5 5.347108 0.000000 2379 +werth 1 4 5.568345 5.568345 3004 +andimplement 0 4 5.568345 0.000000 3029 +hyder 0 4 5.568345 0.000000 2772 +interestparallel 0 3 5.857933 0.000000 3806 +publicationss 0 2 6.263398 0.000000 5732 +thirteenth 0 2 6.263398 0.000000 5733 +werthsenior 0 1 6.957497 0.000000 14294 +emori 0 1 6.957497 0.000000 14295 +accredit 0 1 6.957497 0.000000 14296 +sobek 0 1 6.957497 0.000000 14297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..40806b1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +distribut 0 162 1.791759 0.000000 51 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +architectur 1 139 1.945910 1.945910 77 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +person 0 111 2.197225 0.000000 117 +member 0 84 2.484907 0.000000 165 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +colleg 0 61 2.833213 0.000000 300 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +index 0 56 2.890372 0.000000 309 +algebra 0 45 3.135494 0.000000 394 +multi 0 36 3.367296 0.000000 493 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +strategi 0 25 3.737670 0.000000 682 +researchmi 0 14 4.317488 0.000000 1119 +fromindividu 0 12 4.465908 0.000000 1290 +oxford 0 6 5.164786 0.000000 2121 +regent 0 5 5.347108 0.000000 2551 +emeritu 0 5 5.347108 0.000000 2544 +england 0 5 5.347108 0.000000 2557 +alfr 0 4 5.568345 0.000000 2882 +dale 0 4 5.568345 0.000000 2687 +crow 0 3 5.857933 0.000000 3845 +trammel 0 2 6.263398 0.000000 5562 +andmap 0 2 6.263398 0.000000 4258 +daleno 0 1 6.957497 0.000000 14298 +exet 0 1 6.957497 0.000000 14299 +interestdatabas 0 1 6.957497 0.000000 14300 +stagei 0 1 6.957497 0.000000 14301 +studiedinclud 0 1 6.957497 0.000000 14302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..c93c89b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +model 0 145 1.945910 0.000000 69 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +mathemat 0 108 2.197225 0.000000 123 +teach 0 108 2.197225 0.000000 112 +intern 0 108 2.197225 0.000000 128 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +solut 0 82 2.484907 0.000000 162 +environ 0 84 2.484907 0.000000 177 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +orient 0 80 2.564949 0.000000 205 +decemb 0 80 2.564949 0.000000 215 +symposium 0 72 2.639057 0.000000 238 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +visual 0 48 3.044522 0.000000 372 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +respons 0 37 3.332205 0.000000 476 +award 0 34 3.401197 0.000000 523 +concurr 0 34 3.401197 0.000000 501 +obtain 0 33 3.433987 0.000000 534 +exist 0 30 3.555348 0.000000 569 +profil 0 30 3.555348 0.000000 581 +propos 0 28 3.610918 0.000000 602 +repres 0 26 3.688879 0.000000 656 +honor 0 23 3.806662 0.000000 729 +excel 0 19 4.007333 0.000000 868 +miller 0 17 4.110874 0.000000 949 +role 0 14 4.317488 0.000000 1101 +fromindividu 0 12 4.465908 0.000000 1290 +queue 0 10 4.653960 0.000000 1386 +purdu 0 10 4.653960 0.000000 1466 +length 0 10 4.653960 0.000000 1400 +jeffrei 0 9 4.753590 0.000000 1612 +modula 0 9 4.753590 0.000000 1613 +researchi 0 8 4.875197 0.000000 1756 +inproceed 0 8 4.875197 0.000000 1670 +distributedsystem 0 6 5.164786 0.000000 2022 +ofparallel 0 5 5.347108 0.000000 2380 +bulletin 0 5 5.347108 0.000000 2343 +sigcs 0 4 5.568345 0.000000 2865 +throughput 0 4 5.568345 0.000000 2993 +richter 0 4 5.568345 0.000000 2957 +chou 0 4 5.568345 0.000000 3033 +georgia 0 3 5.857933 0.000000 3834 +publicationsj 0 3 5.857933 0.000000 3808 +shen 0 3 5.857933 0.000000 3370 +performanceof 0 2 6.263398 0.000000 4585 +modelingof 0 2 6.263398 0.000000 5734 +loui 0 2 6.263398 0.000000 5220 +brumfield 0 1 6.957497 0.000000 14303 +brumfieldsenior 0 1 6.957497 0.000000 14304 +interestperform 0 1 6.957497 0.000000 14305 +designersof 0 1 6.957497 0.000000 14306 +eachresourc 0 1 6.957497 0.000000 14307 +tasksawait 0 1 6.957497 0.000000 14308 +computationof 0 1 6.957497 0.000000 14309 +graf 0 1 6.957497 0.000000 14310 +verdi 0 1 6.957497 0.000000 14311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..3af14764 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +schedul 0 119 2.079442 0.000000 85 +mathemat 1 108 2.197225 2.197225 123 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +journal 0 83 2.484907 0.000000 183 +solut 0 82 2.484907 0.000000 162 +member 0 84 2.484907 0.000000 165 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +appli 0 71 2.639057 0.000000 226 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +explor 0 58 2.890372 0.000000 324 +major 0 56 2.890372 0.000000 315 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +profession 0 51 2.995732 0.000000 345 +particular 0 51 2.995732 0.000000 352 +numer 0 49 3.044522 0.000000 369 +editor 0 41 3.218876 0.000000 433 +transact 0 39 3.258097 0.000000 438 +statist 0 35 3.401197 0.000000 521 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +transform 0 32 3.465736 0.000000 542 +profil 0 30 3.555348 0.000000 581 +packag 0 28 3.610918 0.000000 614 +symbol 0 27 3.637586 0.000000 620 +methodolog 0 23 3.806662 0.000000 733 +director 0 22 3.850148 0.000000 767 +siam 0 21 3.912023 0.000000 800 +rout 0 21 3.912023 0.000000 793 +expert 0 20 3.951244 0.000000 833 +region 0 19 4.007333 0.000000 875 +north 0 19 4.007333 0.000000 873 +dimension 0 18 4.060443 0.000000 909 +estim 0 17 4.110874 0.000000 930 +matrix 0 17 4.110874 0.000000 933 +condit 0 16 4.174387 0.000000 975 +alan 0 13 4.382027 0.000000 1146 +emploi 0 12 4.465908 0.000000 1284 +fromindividu 0 12 4.465908 0.000000 1290 +michigan 0 11 4.553877 0.000000 1368 +node 0 11 4.553877 0.000000 1326 +editori 0 9 4.753590 0.000000 1611 +hundr 0 9 4.753590 0.000000 1528 +researchi 0 8 4.875197 0.000000 1756 +curv 0 8 4.875197 0.000000 1656 +coast 0 8 4.875197 0.000000 1746 +presenc 0 8 4.875197 0.000000 1671 +counter 0 8 4.875197 0.000000 1765 +centenni 0 7 5.010635 0.000000 1967 +constrain 0 6 5.164786 0.000000 2042 +southern 0 6 5.164786 0.000000 2191 +spline 0 6 5.164786 0.000000 2007 +fit 0 5 5.347108 0.000000 2285 +holland 0 5 5.347108 0.000000 2490 +triangul 0 4 5.568345 0.000000 2903 +closest 0 4 5.568345 0.000000 2828 +cline 0 3 5.857933 0.000000 3218 +interestmathemat 0 3 5.857933 0.000000 3860 +scatter 0 3 5.857933 0.000000 3351 +delaunai 0 3 5.857933 0.000000 3619 +imac 0 3 5.857933 0.000000 3718 +wilkinson 0 3 5.857933 0.000000 3579 +subprogram 0 2 6.263398 0.000000 5618 +andsurfac 0 2 6.263398 0.000000 5735 +publicationsr 0 2 6.263398 0.000000 5736 +king 0 2 6.263398 0.000000 5737 +meyer 0 2 6.263398 0.000000 4728 +guard 0 2 6.263398 0.000000 5738 +tender 0 2 6.263398 0.000000 5397 +stewart 0 2 6.263398 0.000000 5739 +renka 0 1 6.957497 0.000000 14312 +clinedavid 0 1 6.957497 0.000000 14313 +bruton 0 1 6.957497 0.000000 14314 +statisticalcomput 0 1 6.957497 0.000000 14315 +socialrespons 0 1 6.957497 0.000000 14316 +whichcan 0 1 6.957497 0.000000 14317 +constructionof 0 1 6.957497 0.000000 14318 +formathemat 0 1 6.957497 0.000000 14319 +developmentha 0 1 6.957497 0.000000 14320 +tension 0 1 6.957497 0.000000 14321 +buoi 0 1 6.957497 0.000000 14322 +barrier 0 1 6.957497 0.000000 14323 +moler 0 1 6.957497 0.000000 14324 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..2810f938 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +area 0 144 1.945910 0.000000 80 +mathemat 1 108 2.197225 2.197225 123 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +member 0 84 2.484907 0.000000 165 +good 0 77 2.564949 0.000000 200 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +reason 0 57 2.890372 0.000000 318 +particular 0 51 2.995732 0.000000 352 +physic 0 47 3.091042 0.000000 377 +theoret 0 39 3.258097 0.000000 446 +correct 0 38 3.295837 0.000000 462 +award 0 34 3.401197 0.000000 523 +obtain 0 33 3.433987 0.000000 534 +power 0 30 3.555348 0.000000 573 +profil 0 30 3.555348 0.000000 581 +art 0 29 3.583519 0.000000 593 +chair 0 29 3.583519 0.000000 596 +focus 0 29 3.583519 0.000000 584 +american 0 27 3.637586 0.000000 634 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +methodolog 0 23 3.806662 0.000000 733 +increas 0 20 3.951244 0.000000 829 +fromindividu 0 12 4.465908 0.000000 1290 +academi 0 8 4.875197 0.000000 1735 +centenni 0 7 5.010635 0.000000 1967 +foreign 0 7 5.010635 0.000000 1919 +ture 0 6 5.164786 0.000000 1997 +british 0 5 5.347108 0.000000 2546 +harri 0 4 5.568345 0.000000 3034 +queen 0 4 5.568345 0.000000 2919 +dijkstra 0 3 5.857933 0.000000 3173 +netherland 0 3 5.857933 0.000000 3650 +streamlin 0 3 5.857933 0.000000 3573 +edsger 0 2 6.263398 0.000000 5740 +honorari 0 2 6.263398 0.000000 5741 +sciencesmemb 0 2 6.263398 0.000000 5742 +royal 0 2 6.263398 0.000000 4756 +wybe 0 1 6.957497 0.000000 14325 +dijkstraschlumberg 0 1 6.957497 0.000000 14326 +sciencesprofessor 0 1 6.957497 0.000000 14327 +mathematicskandidaatsexamen 0 1 6.957497 0.000000 14328 +doctora 0 1 6.957497 0.000000 14329 +examen 0 1 6.957497 0.000000 14330 +leydenph 0 1 6.957497 0.000000 14331 +amsterdamhonor 0 1 6.957497 0.000000 14332 +awardsacm 0 1 6.957497 0.000000 14333 +sciencesdistinguish 0 1 6.957497 0.000000 14334 +societyafip 0 1 6.957497 0.000000 14335 +honori 0 1 6.957497 0.000000 14336 +causa 0 1 6.957497 0.000000 14337 +belfastarea 0 1 6.957497 0.000000 14338 +systemssummari 0 1 6.957497 0.000000 14339 +argumentso 0 1 6.957497 0.000000 14340 +ofform 0 1 6.957497 0.000000 14341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..4263da4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +mathemat 0 108 2.197225 0.000000 123 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +educ 0 86 2.484907 0.000000 191 +larg 0 82 2.484907 0.000000 168 +internet 0 83 2.484907 0.000000 186 +member 0 84 2.484907 0.000000 165 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +protocol 0 45 3.135494 0.000000 407 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +mobil 0 23 3.806662 0.000000 730 +speed 0 18 4.060443 0.000000 911 +researchmi 0 14 4.317488 0.000000 1119 +fromindividu 0 12 4.465908 0.000000 1290 +chri 0 11 4.553877 0.000000 1311 +secretari 0 8 4.875197 0.000000 1775 +inproceed 0 8 4.875197 0.000000 1670 +sigcomm 0 5 5.347108 0.000000 2329 +gouda 0 4 5.568345 0.000000 3021 +treasur 0 3 5.857933 0.000000 3229 +andm 0 3 5.857933 0.000000 3901 +edmondson 0 2 6.263398 0.000000 4182 +yurkanan 0 2 6.263398 0.000000 4175 +interestcomput 0 2 6.263398 0.000000 5743 +yurkananlectur 0 1 6.957497 0.000000 14342 +internetwork 0 1 6.957497 0.000000 14343 +cobb 0 1 6.957497 0.000000 14344 +informaticsconfer 0 1 6.957497 0.000000 14345 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..58abebe4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +list 0 201 1.609438 0.000000 39 +area 1 144 1.945910 1.945910 80 +process 0 142 1.945910 0.000000 72 +confer 1 126 2.079442 2.079442 100 +techniqu 0 99 2.302585 0.000000 138 +educ 0 86 2.484907 0.000000 191 +academ 0 82 2.484907 0.000000 178 +librari 0 87 2.484907 0.000000 181 +school 0 84 2.484907 0.000000 188 +member 0 84 2.484907 0.000000 165 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +improv 0 62 2.772589 0.000000 289 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +committe 1 34 3.401197 3.401197 522 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +retriev 0 27 3.637586 0.000000 621 +women 0 16 4.174387 0.000000 1004 +researchmi 0 14 4.317488 0.000000 1119 +coordin 0 13 4.382027 0.000000 1182 +minor 0 12 4.465908 0.000000 1237 +fromindividu 0 12 4.465908 0.000000 1290 +secondari 0 7 5.010635 0.000000 1884 +recruit 0 6 5.164786 0.000000 2145 +sigcs 0 4 5.568345 0.000000 2865 +louisiana 0 3 5.857933 0.000000 3902 +suzi 0 2 6.263398 0.000000 4288 +gallagh 0 2 6.263398 0.000000 4293 +southwestern 0 2 6.263398 0.000000 5744 +interestcomput 0 2 6.263398 0.000000 5743 +gallagherlectur 0 1 6.957497 0.000000 14346 +loyola 0 1 6.957497 0.000000 14347 +necc 0 1 6.957497 0.000000 14348 +andretent 0 1 6.957497 0.000000 14349 +scienceeduc 0 1 6.957497 0.000000 14350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..85a10625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +confer 0 126 2.079442 0.000000 100 +machin 0 129 2.079442 0.000000 95 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +integr 0 67 2.708050 0.000000 245 +guid 0 63 2.772589 0.000000 267 +januari 0 62 2.772589 0.000000 264 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +press 0 42 3.218876 0.000000 419 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +toler 0 33 3.433987 0.000000 533 +obtain 0 33 3.433987 0.000000 534 +fault 0 32 3.465736 0.000000 547 +profil 0 30 3.555348 0.000000 581 +focus 0 29 3.583519 0.000000 584 +scale 0 28 3.610918 0.000000 613 +measur 0 28 3.610918 0.000000 609 +multiprocessor 0 28 3.610918 0.000000 605 +repres 0 26 3.688879 0.000000 656 +strategi 0 25 3.737670 0.000000 682 +reliabl 0 25 3.737670 0.000000 674 +methodolog 0 23 3.806662 0.000000 733 +lead 0 23 3.806662 0.000000 718 +properti 0 22 3.850148 0.000000 749 +scheme 0 20 3.951244 0.000000 818 +qualiti 0 20 3.951244 0.000000 832 +failur 0 18 4.060443 0.000000 898 +appropri 0 18 4.060443 0.000000 883 +interconnect 0 17 4.110874 0.000000 937 +across 0 16 4.174387 0.000000 974 +precis 0 15 4.248495 0.000000 1023 +senior 0 14 4.317488 0.000000 1120 +researchmi 0 14 4.317488 0.000000 1119 +johnson 0 13 4.382027 0.000000 1162 +optic 0 12 4.465908 0.000000 1221 +fromindividu 0 12 4.465908 0.000000 1290 +abil 0 11 4.553877 0.000000 1341 +impact 0 11 4.553877 0.000000 1334 +success 0 10 4.653960 0.000000 1390 +devis 0 10 4.653960 0.000000 1451 +true 0 10 4.653960 0.000000 1422 +contrast 0 8 4.875197 0.000000 1637 +multicomput 0 7 5.010635 0.000000 1890 +predic 0 7 5.010635 0.000000 1806 +nest 0 6 5.164786 0.000000 2151 +chemistri 0 5 5.347108 0.000000 2405 +orlean 0 5 5.347108 0.000000 2550 +buss 0 4 5.568345 0.000000 2649 +louisiana 0 3 5.857933 0.000000 3902 +thedevelop 0 3 5.857933 0.000000 3903 +wave 0 3 5.857933 0.000000 3518 +campbel 0 3 5.857933 0.000000 3272 +laser 0 2 6.263398 0.000000 4747 +beinginvestig 0 2 6.263398 0.000000 5745 +parallelsystem 0 2 6.263398 0.000000 5746 +publicationsr 0 2 6.263398 0.000000 5736 +jenevein 1 1 6.957497 6.957497 14351 +wafer 0 1 6.957497 0.000000 14352 +menez 0 1 6.957497 0.000000 14353 +malek 0 1 6.957497 0.000000 14354 +interestinterconnect 0 1 6.957497 0.000000 14355 +interconnectionnetwork 0 1 6.957497 0.000000 14356 +restsin 0 1 6.957497 0.000000 14357 +interconnectionstructur 0 1 6.957497 0.000000 14358 +kindof 0 1 6.957497 0.000000 14359 +beingappli 0 1 6.957497 0.000000 14360 +communicationswitch 0 1 6.957497 0.000000 14361 +iscontinu 0 1 6.957497 0.000000 14362 +performanceport 0 1 6.957497 0.000000 14363 +tobenchmark 0 1 6.957497 0.000000 14364 +memorysystem 0 1 6.957497 0.000000 14365 +kyklo 0 1 6.957497 0.000000 14366 +laranjeira 0 1 6.957497 0.000000 14367 +ullah 0 1 6.957497 0.000000 14368 +metrix 0 1 6.957497 0.000000 14369 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..445b1e8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +center 0 88 2.397895 0.000000 158 +activ 0 84 2.484907 0.000000 182 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +abstract 0 62 2.772589 0.000000 276 +function 0 62 2.772589 0.000000 275 +back 0 60 2.833213 0.000000 297 +space 0 57 2.890372 0.000000 310 +faculti 0 56 2.890372 0.000000 325 +california 0 46 3.091042 0.000000 388 +especi 0 36 3.367296 0.000000 496 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +valu 0 25 3.737670 0.000000 665 +martin 0 21 3.912023 0.000000 794 +concentr 0 18 4.060443 0.000000 906 +track 0 15 4.248495 0.000000 1029 +researchmi 0 14 4.317488 0.000000 1119 +classic 0 14 4.317488 0.000000 1084 +philosophi 0 13 4.382027 0.000000 1167 +deduct 0 12 4.465908 0.000000 1236 +scan 0 12 4.465908 0.000000 1243 +fromindividu 0 12 4.465908 0.000000 1290 +closur 0 8 4.875197 0.000000 1643 +angel 0 8 4.875197 0.000000 1779 +notion 0 7 5.010635 0.000000 1947 +vehicl 0 7 5.010635 0.000000 1928 +delai 0 7 5.010635 0.000000 1848 +chicago 0 6 5.164786 0.000000 2149 +emeritu 0 5 5.347108 0.000000 2544 +interestmathemat 0 3 5.857933 0.000000 3860 +andon 0 3 5.857933 0.000000 3115 +metatheori 0 3 5.857933 0.000000 3642 +norman 0 1 6.957497 0.000000 14370 +martinprofessor 0 1 6.957497 0.000000 14371 +ofphilosophi 0 1 6.957497 0.000000 14372 +asinterpret 0 1 6.957497 0.000000 14373 +whichexploit 0 1 6.957497 0.000000 14374 +intension 0 1 6.957497 0.000000 14375 +significantearli 0 1 6.957497 0.000000 14376 +missil 0 1 6.957497 0.000000 14377 +trackingalgorithm 0 1 6.957497 0.000000 14378 +radar 0 1 6.957497 0.000000 14379 +inmani 0 1 6.957497 0.000000 14380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..890990a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +analysi 0 124 2.079442 0.000000 98 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +proceed 0 93 2.397895 0.000000 152 +present 0 91 2.397895 0.000000 145 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +academ 0 82 2.484907 0.000000 178 +member 0 84 2.484907 0.000000 165 +method 0 80 2.564949 0.000000 213 +decemb 0 80 2.564949 0.000000 215 +symposium 0 72 2.639057 0.000000 238 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +addit 0 74 2.639057 0.000000 228 +goal 0 66 2.708050 0.000000 250 +foundat 0 62 2.772589 0.000000 286 +automat 0 61 2.833213 0.000000 306 +juli 0 60 2.833213 0.000000 305 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +publish 0 57 2.890372 0.000000 326 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +understand 0 47 3.091042 0.000000 384 +adapt 0 46 3.091042 0.000000 387 +mechan 0 43 3.178054 0.000000 416 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +electr 0 38 3.295837 0.000000 461 +formal 0 37 3.332205 0.000000 478 +respons 0 37 3.332205 0.000000 476 +robot 0 36 3.367296 0.000000 497 +committe 0 34 3.401197 0.000000 522 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +obtain 0 33 3.433987 0.000000 534 +hard 0 30 3.555348 0.000000 563 +profil 0 30 3.555348 0.000000 581 +chair 0 29 3.583519 0.000000 596 +framework 0 28 3.610918 0.000000 606 +load 0 28 3.610918 0.000000 601 +constraint 0 26 3.688879 0.000000 636 +bound 0 26 3.688879 0.000000 659 +fundament 0 25 3.737670 0.000000 661 +primari 0 25 3.737670 0.000000 669 +concern 0 25 3.737670 0.000000 666 +toward 0 25 3.737670 0.000000 668 +fellow 0 24 3.761200 0.000000 701 +highli 0 23 3.806662 0.000000 725 +wang 0 21 3.912023 0.000000 790 +fund 0 21 3.912023 0.000000 805 +synthesi 0 20 3.951244 0.000000 834 +expert 0 20 3.951244 0.000000 833 +aid 0 18 4.060443 0.000000 904 +critic 0 16 4.174387 0.000000 982 +taiwan 0 16 4.174387 0.000000 1006 +brown 0 16 4.174387 0.000000 977 +massachusett 0 14 4.317488 0.000000 1118 +conduct 0 14 4.317488 0.000000 1065 +nasa 0 13 4.382027 0.000000 1188 +robust 0 12 4.465908 0.000000 1271 +asynchron 0 12 4.465908 0.000000 1229 +fromindividu 0 12 4.465908 0.000000 1290 +editori 0 9 4.753590 0.000000 1611 +vice 0 9 4.753590 0.000000 1604 +researchi 0 8 4.875197 0.000000 1756 +fifth 0 7 5.010635 0.000000 1931 +montreal 0 7 5.010635 0.000000 1961 +kluwer 0 6 5.164786 0.000000 2143 +antonio 0 6 5.164786 0.000000 2186 +ofdistribut 0 5 5.347108 0.000000 2316 +emerson 0 5 5.347108 0.000000 2547 +adjust 0 5 5.347108 0.000000 2422 +orlean 0 5 5.347108 0.000000 2550 +presentarea 0 4 5.568345 0.000000 3026 +avion 0 4 5.568345 0.000000 3018 +melbourn 0 4 5.568345 0.000000 3035 +sigsoft 0 4 5.568345 0.000000 3036 +aloysiu 0 3 5.857933 0.000000 3829 +systemdesign 0 2 6.263398 0.000000 4297 +stringent 0 2 6.263398 0.000000 5523 +areasinclud 0 2 6.263398 0.000000 5747 +publicationsa 0 2 6.263398 0.000000 4885 +clement 0 2 6.263398 0.000000 5526 +tsou 0 2 6.263398 0.000000 5525 +mokassoci 0 1 6.957497 0.000000 14381 +professorfaculti 0 1 6.957497 0.000000 14382 +federationof 0 1 6.957497 0.000000 14383 +interestfault 0 1 6.957497 0.000000 14384 +includespecif 0 1 6.957497 0.000000 14385 +forguarante 0 1 6.957497 0.000000 14386 +thetrad 0 1 6.957497 0.000000 14387 +criticalsystem 0 1 6.957497 0.000000 14388 +theanalysi 0 1 6.957497 0.000000 14389 +industrialprocess 0 1 6.957497 0.000000 14390 +ofnav 0 1 6.957497 0.000000 14391 +forreal 0 1 6.957497 0.000000 14392 +tilborg 0 1 6.957497 0.000000 14393 +heitmey 0 1 6.957497 0.000000 14394 +labaw 0 1 6.957497 0.000000 14395 +aptl 0 1 6.957497 0.000000 14396 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..02de3e23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +teach 0 108 2.197225 0.000000 112 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +appli 0 71 2.639057 0.000000 226 +addit 0 74 2.639057 0.000000 228 +function 0 62 2.772589 0.000000 275 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +reason 0 57 2.890372 0.000000 318 +undergradu 0 54 2.944439 0.000000 338 +maintain 0 51 2.995732 0.000000 342 +physic 0 47 3.091042 0.000000 377 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +editor 0 41 3.218876 0.000000 433 +formal 0 37 3.332205 0.000000 478 +concurr 0 34 3.401197 0.000000 501 +obtain 0 33 3.433987 0.000000 534 +richard 0 31 3.496508 0.000000 559 +profil 0 30 3.555348 0.000000 581 +seri 0 24 3.761200 0.000000 708 +longer 0 20 3.951244 0.000000 816 +stand 0 18 4.060443 0.000000 891 +stanford 0 17 4.110874 0.000000 955 +weslei 0 16 4.174387 0.000000 983 +permit 0 16 4.174387 0.000000 962 +senior 0 14 4.317488 0.000000 1120 +addison 0 12 4.465908 0.000000 1230 +fromindividu 0 12 4.465908 0.000000 1290 +suitabl 0 9 4.753590 0.000000 1486 +harvard 0 7 5.010635 0.000000 1926 +iowa 0 7 5.010635 0.000000 1971 +implementationof 0 7 5.010635 0.000000 1813 +microcomput 0 3 5.857933 0.000000 3444 +hamilton 0 2 6.263398 0.000000 5719 +collegem 0 2 6.263398 0.000000 5563 +astronaut 0 2 6.263398 0.000000 5748 +universityph 0 2 6.263398 0.000000 5604 +lecturerb 0 1 6.957497 0.000000 14397 +aero 0 1 6.957497 0.000000 14398 +universityprofession 0 1 6.957497 0.000000 14399 +servicecoordin 0 1 6.957497 0.000000 14400 +vol 0 1 6.957497 0.000000 14401 +educationsummari 0 1 6.957497 0.000000 14402 +potentialfor 0 1 6.957497 0.000000 14403 +infal 0 1 6.957497 0.000000 14404 +sectionof 0 1 6.957497 0.000000 14405 +onfunct 0 1 6.957497 0.000000 14406 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..605d5715 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +server 0 76 2.564949 0.000000 204 +june 0 79 2.564949 0.000000 214 +nation 0 74 2.639057 0.000000 240 +intellig 0 72 2.639057 0.000000 225 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +knowledg 0 67 2.708050 0.000000 243 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +septemb 0 65 2.772589 0.000000 274 +foundat 0 62 2.772589 0.000000 286 +organ 0 65 2.772589 0.000000 265 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +continu 0 39 3.258097 0.000000 448 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +map 0 39 3.258097 0.000000 452 +movi 0 40 3.258097 0.000000 459 +cost 0 37 3.332205 0.000000 480 +robot 0 36 3.367296 0.000000 497 +multi 0 36 3.367296 0.000000 493 +award 0 34 3.401197 0.000000 523 +committe 0 34 3.401197 0.000000 522 +concurr 0 34 3.401197 0.000000 501 +obtain 0 33 3.433987 0.000000 534 +storag 0 31 3.496508 0.000000 553 +profil 0 30 3.555348 0.000000 581 +chair 0 29 3.583519 0.000000 596 +framework 0 28 3.610918 0.000000 606 +retriev 0 27 3.637586 0.000000 621 +divis 0 21 3.912023 0.000000 803 +media 0 19 4.007333 0.000000 861 +sigmod 0 19 4.007333 0.000000 877 +concentr 0 18 4.060443 0.000000 906 +demand 0 14 4.317488 0.000000 1073 +resolut 0 13 4.382027 0.000000 1172 +fromindividu 0 12 4.465908 0.000000 1290 +invit 0 10 4.653960 0.000000 1428 +conferenceon 0 9 4.753590 0.000000 1595 +databasesystem 0 8 4.875197 0.000000 1617 +silberschatz 0 6 5.164786 0.000000 1978 +outstand 0 6 5.164786 0.000000 2136 +advisori 0 6 5.164786 0.000000 2148 +sigact 0 6 5.164786 0.000000 2212 +symposiumon 0 6 5.164786 0.000000 2054 +distributedsystem 0 6 5.164786 0.000000 2022 +internationalconfer 0 6 5.164786 0.000000 2051 +seventh 0 5 5.347108 0.000000 2464 +multiresolut 0 5 5.347108 0.000000 2423 +fussel 0 5 5.347108 0.000000 2300 +abraham 0 4 5.568345 0.000000 2644 +ullman 0 4 5.568345 0.000000 2749 +stoni 0 3 5.857933 0.000000 3571 +sudarshan 0 3 5.857933 0.000000 3885 +ozden 0 2 6.263398 0.000000 5749 +eighth 0 2 6.263398 0.000000 5750 +publicationss 0 2 6.263398 0.000000 5732 +knowledgeand 0 2 6.263398 0.000000 4366 +onveri 0 2 6.263398 0.000000 4367 +rastogi 0 1 6.957497 0.000000 14407 +pod 0 1 6.957497 0.000000 14408 +silberschatzprofessorship 0 1 6.957497 0.000000 14409 +sciencesm 0 1 6.957497 0.000000 14410 +brookhonor 0 1 6.957497 0.000000 14411 +serviceiee 0 1 6.957497 0.000000 14412 +futureof 0 1 6.957497 0.000000 14413 +basedsystemssummari 0 1 6.957497 0.000000 14414 +recentresearch 0 1 6.957497 0.000000 14415 +multidatabas 0 1 6.957497 0.000000 14416 +transactionmanag 0 1 6.957497 0.000000 14417 +ganguli 0 1 6.957497 0.000000 14418 +tsur 0 1 6.957497 0.000000 14419 +datalog 0 1 6.957497 0.000000 14420 +programexecut 0 1 6.957497 0.000000 14421 +jagadish 0 1 6.957497 0.000000 14422 +lieuwen 0 1 6.957497 0.000000 14423 +dali 0 1 6.957497 0.000000 14424 +biliri 0 1 6.957497 0.000000 14425 +storageserv 0 1 6.957497 0.000000 14426 +storageand 0 1 6.957497 0.000000 14427 +relationaldata 0 1 6.957497 0.000000 14428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..926f1c96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +list 0 201 1.609438 0.000000 39 +professor 0 137 1.945910 0.000000 76 +novemb 0 81 2.484907 0.000000 179 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +robert 1 30 3.555348 3.555348 567 +profil 0 30 3.555348 0.000000 581 +rememb 0 12 4.465908 0.000000 1217 +centenni 0 7 5.010635 0.000000 1967 +emeritu 0 5 5.347108 0.000000 2544 +bledso 0 4 5.568345 0.000000 2999 +simmon 1 2 6.263398 6.263398 5460 +simmonsquinci 0 1 6.957497 0.000000 14429 +professoremeritu 0 1 6.957497 0.000000 14430 +psychologymai 0 1 6.957497 0.000000 14431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..335d37a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +email 1 220 1.386294 1.386294 29 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +austin 0 168 1.791759 0.000000 63 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +number 0 130 2.079442 0.000000 97 +check 0 115 2.197225 0.000000 118 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +new 0 64 2.772589 0.000000 262 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +think 0 57 2.890372 0.000000 314 +undergradu 0 54 2.944439 0.000000 338 +could 0 46 3.091042 0.000000 383 +semant 0 29 3.583519 0.000000 587 +progress 0 28 3.610918 0.000000 598 +rule 0 26 3.688879 0.000000 638 +altern 0 26 3.688879 0.000000 641 +yahoo 0 24 3.761200 0.000000 707 +log 0 19 4.007333 0.000000 857 +adam 1 17 4.110874 4.110874 934 +happi 0 14 4.317488 0.000000 1079 +touch 0 12 4.465908 0.000000 1288 +claim 0 8 4.875197 0.000000 1664 +core 0 7 5.010635 0.000000 1809 +gzip 1 6 5.164786 5.164786 2117 +gradual 0 4 5.568345 0.000000 2997 +vrml 0 4 5.568345 0.000000 2949 +aweekli 0 3 5.857933 0.000000 3312 +fame 0 3 5.857933 0.000000 3793 +knowwhat 0 2 6.263398 0.000000 5456 +junki 0 2 6.263398 0.000000 5457 +nando 0 2 6.263398 0.000000 5458 +seligman 1 1 6.957497 6.957497 12572 +pageadam 0 1 6.957497 0.000000 12573 +specifiedth 0 1 6.957497 0.000000 12574 +fileor 0 1 6.957497 0.000000 12575 +pagemart 0 1 6.957497 0.000000 12576 +fromreut 0 1 6.957497 0.000000 12577 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..91026d67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +machin 0 129 2.079442 0.000000 95 +specif 0 106 2.197225 0.000000 106 +learn 0 86 2.484907 0.000000 170 +colleg 0 61 2.833213 0.000000 300 +california 0 46 3.091042 0.000000 388 +eduphon 0 15 4.248495 0.000000 1060 +hopefulli 0 14 4.317488 0.000000 1071 +station 0 13 4.382027 0.000000 1157 +acquisit 0 10 4.653960 0.000000 1465 +santa 0 10 4.653960 0.000000 1441 +commonsens 0 4 5.568345 0.000000 2998 +barbara 0 3 5.857933 0.000000 3380 +chill 0 2 6.263398 0.000000 4244 +agapito 0 1 6.957497 0.000000 12578 +sustaita 0 1 6.957497 0.000000 12579 +austincognit 0 1 6.957497 0.000000 12580 +connection 0 1 6.957497 0.000000 12581 +reasoningschoolingph 0 1 6.957497 0.000000 12582 +miscellaneouspost 0 1 6.957497 0.000000 12583 +addressth 0 1 6.957497 0.000000 12584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..308551ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +base 0 165 1.791759 0.000000 50 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +report 1 131 2.079442 2.079442 92 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +intellig 0 72 2.639057 0.000000 225 +nation 0 74 2.639057 0.000000 240 +logic 0 71 2.639057 0.000000 230 +solv 0 73 2.639057 0.000000 234 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +new 0 64 2.772589 0.000000 262 +foundat 0 62 2.772589 0.000000 286 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +faculti 0 56 2.890372 0.000000 325 +reason 0 57 2.890372 0.000000 318 +physic 0 47 3.091042 0.000000 377 +directori 0 45 3.135494 0.000000 396 +autom 0 41 3.218876 0.000000 434 +close 0 38 3.295837 0.000000 465 +multi 0 36 3.367296 0.000000 493 +robert 0 30 3.555348 0.000000 567 +neural 0 30 3.555348 0.000000 578 +rule 0 26 3.688879 0.000000 638 +lab 0 24 3.761200 0.000000 698 +theorem 0 21 3.912023 0.000000 786 +fund 0 21 3.912023 0.000000 805 +prove 0 19 4.007333 0.000000 848 +histori 0 19 4.007333 0.000000 853 +excel 0 19 4.007333 0.000000 868 +atth 0 15 4.248495 0.000000 1019 +action 0 15 4.248495 0.000000 1038 +rank 0 14 4.317488 0.000000 1063 +bruce 0 12 4.465908 0.000000 1226 +distinguish 0 11 4.553877 0.000000 1357 +benjamin 0 11 4.553877 0.000000 1296 +qualit 0 11 4.553877 0.000000 1362 +vladimir 0 11 4.553877 0.000000 1324 +peter 0 11 4.553877 0.000000 1316 +novak 0 9 4.753590 0.000000 1521 +moonei 0 9 4.753590 0.000000 1520 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +aaai 0 8 4.875197 0.000000 1750 +presidenti 0 8 4.875197 0.000000 1737 +postdoc 0 8 4.875197 0.000000 1724 +philosoph 0 7 5.010635 0.000000 1904 +boyer 0 6 5.164786 0.000000 2013 +gordon 0 6 5.164786 0.000000 2032 +dream 0 6 5.164786 0.000000 2165 +lifschitz 0 5 5.347108 0.000000 2542 +mirank 0 5 5.347108 0.000000 2543 +porter 0 5 5.347108 0.000000 2293 +emeritu 0 5 5.347108 0.000000 2544 +bledso 0 4 5.568345 0.000000 2999 +clark 0 4 5.568345 0.000000 2705 +kuiper 0 3 5.857933 0.000000 3794 +souther 0 3 5.857933 0.000000 3795 +woodi 0 2 6.263398 0.000000 5459 +simmon 0 2 6.263398 0.000000 5460 +laboratoryut 0 1 6.957497 0.000000 12585 +laboratoryth 0 1 6.957497 0.000000 12586 +austinha 0 1 6.957497 0.000000 12587 +andgradu 0 1 6.957497 0.000000 12588 +causei 0 1 6.957497 0.000000 12589 +deceas 0 1 6.957497 0.000000 12590 +memoriam 0 1 6.957497 0.000000 12591 +porterpoint 0 1 6.957497 0.000000 12592 +agenciescontact 0 1 6.957497 0.000000 12593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..51d6f37c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +want 0 79 2.564949 0.000000 199 +main 0 67 2.708050 0.000000 256 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +advisor 0 51 2.995732 0.000000 355 +john 0 33 3.433987 0.000000 532 +postal 0 30 3.555348 0.000000 580 +constraint 0 26 3.688879 0.000000 636 +translat 0 13 4.382027 0.000000 1164 +usavoic 0 13 4.382027 0.000000 1198 +hello 0 10 4.653960 0.000000 1407 +candid 0 9 4.753590 0.000000 1606 +routin 0 9 4.753590 0.000000 1549 +ajita 0 2 6.263398 0.000000 5461 +papersmi 0 2 6.263398 0.000000 5462 +johnajita 0 1 6.957497 0.000000 12594 +programmingframework 0 1 6.957497 0.000000 12595 +parallelprocedur 0 1 6.957497 0.000000 12596 +brownemi 0 1 6.957497 0.000000 12597 +ajohn 0 1 6.957497 0.000000 12598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..b2992f1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +mathemat 0 108 2.197225 0.000000 123 +teach 0 108 2.197225 0.000000 112 +topic 0 114 2.197225 0.000000 110 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +pictur 0 89 2.397895 0.000000 160 +associ 0 93 2.397895 0.000000 151 +learn 0 86 2.484907 0.000000 170 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +integr 0 67 2.708050 0.000000 245 +main 0 67 2.708050 0.000000 256 +evalu 0 64 2.772589 0.000000 266 +organ 0 65 2.772589 0.000000 265 +special 0 56 2.890372 0.000000 320 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +direct 0 57 2.890372 0.000000 316 +maintain 0 51 2.995732 0.000000 342 +profession 0 51 2.995732 0.000000 345 +understand 0 47 3.091042 0.000000 384 +electron 0 47 3.091042 0.000000 379 +electr 0 38 3.295837 0.000000 461 +formal 0 37 3.332205 0.000000 478 +field 0 37 3.332205 0.000000 482 +connect 0 37 3.332205 0.000000 485 +scientist 0 31 3.496508 0.000000 560 +computersci 0 30 3.555348 0.000000 562 +travel 0 30 3.555348 0.000000 579 +limit 0 29 3.583519 0.000000 585 +utc 0 27 3.637586 0.000000 629 +other 0 24 3.761200 0.000000 697 +alwai 0 24 3.761200 0.000000 691 +leav 0 21 3.912023 0.000000 772 +particularli 0 19 4.007333 0.000000 867 +excel 0 19 4.007333 0.000000 868 +encourag 0 18 4.060443 0.000000 880 +social 0 13 4.382027 0.000000 1123 +jump 0 9 4.753590 0.000000 1603 +sweden 0 7 5.010635 0.000000 1885 +interestedin 0 5 5.347108 0.000000 2260 +craft 0 5 5.347108 0.000000 2412 +camp 0 5 5.347108 0.000000 2545 +suffer 0 5 5.347108 0.000000 2268 +novic 0 4 5.568345 0.000000 2815 +mentor 0 4 5.568345 0.000000 2591 +sigcs 0 4 5.568345 0.000000 2865 +vicki 0 3 5.857933 0.000000 3187 +mathematicallog 0 3 5.857933 0.000000 3796 +belong 0 3 5.857933 0.000000 3797 +almstrum 0 2 6.263398 0.000000 4165 +woodwork 0 2 6.263398 0.000000 5463 +spurt 0 2 6.263398 0.000000 5464 +plenti 0 2 6.263398 0.000000 5465 +uppsala 0 1 6.957497 0.000000 12599 +almstrumabout 0 1 6.957497 0.000000 12600 +doctoralresearch 0 1 6.957497 0.000000 12601 +ispent 0 1 6.957497 0.000000 12602 +pagether 0 1 6.957497 0.000000 12603 +garden 0 1 6.957497 0.000000 12604 +sew 0 1 6.957497 0.000000 12605 +hubbi 0 1 6.957497 0.000000 12606 +torgni 0 1 6.957497 0.000000 12607 +stadler 0 1 6.957497 0.000000 12608 +itics 0 1 6.957497 0.000000 12609 +educationjun 0 1 6.957497 0.000000 12610 +swedenoth 0 1 6.957497 0.000000 12611 +frenzi 0 1 6.957497 0.000000 12612 +educationsigsoft 0 1 6.957497 0.000000 12613 +engineeringacm 0 1 6.957497 0.000000 12614 +machineryieeeth 0 1 6.957497 0.000000 12615 +engineerscpsrcomput 0 1 6.957497 0.000000 12616 +responsibilityconnect 0 1 6.957497 0.000000 12617 +elsewhereto 0 1 6.957497 0.000000 12618 +seldom 0 1 6.957497 0.000000 12619 +forewarn 0 1 6.957497 0.000000 12620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..5c14fed7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +hour 0 165 1.791759 0.000000 46 +compil 0 122 2.079442 0.000000 96 +mondai 0 77 2.564949 0.000000 206 +wednesdai 0 64 2.772589 0.000000 261 +anthoni 1 4 5.568345 5.568345 2792 +pang 0 3 5.857933 0.000000 3509 +hung 0 3 5.857933 0.000000 3524 +hing 0 2 6.263398 0.000000 5442 +pagehung 0 1 6.957497 0.000000 12621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..b199498c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +studi 0 120 2.079442 0.000000 91 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +taylor 0 63 2.772589 0.000000 287 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +sciencesunivers 0 37 3.332205 0.000000 486 +bachelor 0 17 4.110874 0.000000 957 +aruna 1 1 6.957497 6.957497 12622 +addalacurr 0 1 6.957497 0.000000 12623 +studentth 0 1 6.957497 0.000000 12624 +sciencess 0 1 6.957497 0.000000 12625 +engineeringmysorework 0 1 6.957497 0.000000 12626 +mysoreindiai 0 1 6.957497 0.000000 12627 +mysor 0 1 6.957497 0.000000 12628 +cityindiato 0 1 6.957497 0.000000 12629 +eduvoic 0 1 6.957497 0.000000 12630 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..f37041e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +pleas 0 113 2.197225 0.000000 114 +main 0 67 2.708050 0.000000 256 +get 0 46 3.091042 0.000000 380 +exist 0 30 3.555348 0.000000 569 +known 0 24 3.761200 0.000000 702 +usavoic 0 13 4.382027 0.000000 1198 +mepost 0 10 4.653960 0.000000 1472 +round 0 8 4.875197 0.000000 1769 +let 0 3 5.857933 0.000000 3790 +patienc 0 2 6.263398 0.000000 5466 +ashi 1 1 6.957497 6.957497 12631 +tarafdarashi 0 1 6.957497 0.000000 12632 +tarafdarabout 0 1 6.957497 0.000000 12633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..6bd8c425 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +activ 0 84 2.484907 0.000000 182 +complet 0 77 2.564949 0.000000 208 +line 0 75 2.639057 0.000000 231 +solv 0 73 2.639057 0.000000 234 +prof 0 64 2.772589 0.000000 273 +dept 0 64 2.772589 0.000000 291 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +date 0 51 2.995732 0.000000 344 +advisor 0 51 2.995732 0.000000 355 +electr 0 38 3.295837 0.000000 461 +expect 0 37 3.332205 0.000000 484 +within 0 33 3.433987 0.000000 525 +queri 0 33 3.433987 0.000000 524 +hard 0 30 3.555348 0.000000 563 +campu 0 27 3.637586 0.000000 623 +mine 0 26 3.688879 0.000000 654 +constraint 0 26 3.688879 0.000000 636 +expert 0 20 3.951244 0.000000 833 +toolkit 0 20 3.951244 0.000000 835 +histori 0 19 4.007333 0.000000 853 +along 0 18 4.060443 0.000000 878 +coordin 0 13 4.382027 0.000000 1182 +daniel 0 12 4.465908 0.000000 1233 +instanc 0 11 4.553877 0.000000 1322 +candid 0 9 4.753590 0.000000 1606 +mirank 0 5 5.347108 0.000000 2543 +bayardo 0 2 6.263398 0.000000 5467 +roberto 0 2 6.263398 0.000000 5468 +exception 0 2 6.263398 0.000000 4467 +pageroberto 0 1 6.957497 0.000000 12634 +infosleuth 0 1 6.957497 0.000000 12635 +satisfactionmi 0 1 6.957497 0.000000 12636 +generatingand 0 1 6.957497 0.000000 12637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..9a426038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +abstract 0 62 2.772589 0.000000 276 +taylor 0 63 2.772589 0.000000 287 +reason 0 57 2.890372 0.000000 318 +physic 0 47 3.091042 0.000000 377 +past 0 42 3.218876 0.000000 428 +ofth 0 36 3.367296 0.000000 491 +dissert 0 32 3.465736 0.000000 549 +retriev 0 27 3.637586 0.000000 621 +behavior 0 18 4.060443 0.000000 881 +month 0 15 4.248495 0.000000 1025 +refin 0 11 4.553877 0.000000 1363 +entitl 0 9 4.753590 0.000000 1490 +drink 0 9 4.753590 0.000000 1607 +informationemail 0 9 4.753590 0.000000 1564 +overviewof 0 2 6.263398 0.000000 5469 +bert 1 1 6.957497 6.957497 12638 +imprecis 0 1 6.957497 0.000000 12639 +kayresearch 0 1 6.957497 0.000000 12640 +vitami 0 1 6.957497 0.000000 12641 +stuffsonia 0 1 6.957497 0.000000 12642 +andnina 0 1 6.957497 0.000000 12643 +springbank 0 1 6.957497 0.000000 12644 +scotchdrinksof 0 1 6.957497 0.000000 12645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..9717e5eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +project 0 340 1.098612 0.000000 18 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +mathemat 0 108 2.197225 0.000000 123 +manag 0 114 2.197225 0.000000 125 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +logic 0 71 2.639057 0.000000 230 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +semest 0 58 2.890372 0.000000 312 +visitor 0 49 3.044522 0.000000 371 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +vladimir 0 11 4.553877 0.000000 1324 +harrick 0 7 5.010635 0.000000 1849 +lifschitz 0 5 5.347108 0.000000 2542 +coursesc 0 4 5.568345 0.000000 2692 +vinc 0 2 6.263398 0.000000 5414 +bhanu 1 1 6.957497 6.957497 12646 +homepagethi 0 1 6.957497 0.000000 12647 +akhil 0 1 6.957497 0.000000 12648 +reddythank 0 1 6.957497 0.000000 12649 +austinm 0 1 6.957497 0.000000 12650 +datacommun 0 1 6.957497 0.000000 12651 +anitish 0 1 6.957497 0.000000 12652 +barua 0 1 6.957497 0.000000 12653 +schwetmani 0 1 6.957497 0.000000 12654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..67577bee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,229 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +list 0 201 1.609438 0.000000 39 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +peopl 1 96 2.302585 2.302585 132 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +real 0 93 2.397895 0.000000 144 +level 0 87 2.484907 0.000000 180 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +complet 0 77 2.564949 0.000000 208 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +written 0 63 2.772589 0.000000 278 +colleg 0 61 2.833213 0.000000 300 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +telephon 0 50 3.044522 0.000000 373 +physic 0 47 3.091042 0.000000 377 +quarter 0 47 3.091042 0.000000 389 +made 0 44 3.135494 0.000000 398 +answer 0 45 3.135494 0.000000 391 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +probabl 0 40 3.258097 0.000000 455 +error 0 40 3.258097 0.000000 449 +correct 0 38 3.295837 0.000000 462 +feel 0 37 3.332205 0.000000 483 +expect 0 37 3.332205 0.000000 484 +connect 0 37 3.332205 0.000000 485 +soon 0 36 3.367296 0.000000 494 +michael 0 35 3.401197 0.000000 514 +return 0 34 3.401197 0.000000 502 +either 0 35 3.401197 0.000000 506 +articl 0 33 3.433987 0.000000 530 +human 0 32 3.465736 0.000000 546 +taken 0 31 3.496508 0.000000 555 +hard 0 30 3.555348 0.000000 563 +postal 0 30 3.555348 0.000000 580 +depend 0 29 3.583519 0.000000 583 +built 0 29 3.583519 0.000000 592 +quot 0 29 3.583519 0.000000 582 +intend 0 28 3.610918 0.000000 599 +ask 0 28 3.610918 0.000000 597 +symbol 0 27 3.637586 0.000000 620 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +accur 0 25 3.737670 0.000000 680 +sometim 0 24 3.761200 0.000000 696 +frame 0 24 3.761200 0.000000 684 +interpret 0 24 3.761200 0.000000 686 +decis 0 23 3.806662 0.000000 728 +lead 0 23 3.806662 0.000000 718 +almost 0 22 3.850148 0.000000 742 +util 0 21 3.912023 0.000000 774 +theorem 0 21 3.912023 0.000000 786 +prepar 0 20 3.951244 0.000000 824 +assum 0 19 4.007333 0.000000 845 +accept 0 18 4.060443 0.000000 879 +account 0 18 4.060443 0.000000 882 +estim 0 17 4.110874 0.000000 930 +differenti 0 17 4.110874 0.000000 921 +choic 0 16 4.174387 0.000000 979 +transfer 0 16 4.174387 0.000000 967 +cognit 0 16 4.174387 0.000000 986 +psycholog 0 15 4.248495 0.000000 1054 +mayb 0 15 4.248495 0.000000 1014 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +balanc 0 14 4.317488 0.000000 1112 +stori 0 14 4.317488 0.000000 1087 +econom 0 13 4.382027 0.000000 1184 +rememb 0 12 4.465908 0.000000 1217 +sound 0 9 4.753590 0.000000 1605 +unusu 0 9 4.753590 0.000000 1566 +exact 0 9 4.753590 0.000000 1509 +charg 0 9 4.753590 0.000000 1582 +risk 0 8 4.875197 0.000000 1689 +brain 0 8 4.875197 0.000000 1638 +formul 0 8 4.875197 0.000000 1733 +wire 0 8 4.875197 0.000000 1747 +simpli 0 8 4.875197 0.000000 1626 +prover 0 8 4.875197 0.000000 1653 +insert 0 8 4.875197 0.000000 1687 +filter 0 8 4.875197 0.000000 1641 +explain 0 7 5.010635 0.000000 1816 +maxim 0 7 5.010635 0.000000 1944 +prevent 0 7 5.010635 0.000000 1827 +gave 0 7 5.010635 0.000000 1922 +remind 0 7 5.010635 0.000000 1799 +wrong 0 6 5.164786 0.000000 2025 +consequ 0 6 5.164786 0.000000 1989 +biolog 0 6 5.164786 0.000000 2147 +mistak 0 6 5.164786 0.000000 2110 +postcard 0 6 5.164786 0.000000 2181 +promis 0 6 5.164786 0.000000 2037 +hidden 0 6 5.164786 0.000000 1987 +quantum 0 6 5.164786 0.000000 2214 +adopt 0 5 5.347108 0.000000 2467 +amherst 0 5 5.347108 0.000000 2484 +fair 0 5 5.347108 0.000000 2333 +respond 0 5 5.347108 0.000000 2354 +favor 0 5 5.347108 0.000000 2414 +ahead 0 5 5.347108 0.000000 2338 +puzzl 0 5 5.347108 0.000000 2507 +stupid 0 5 5.347108 0.000000 2489 +analog 0 4 5.568345 0.000000 2875 +kill 0 4 5.568345 0.000000 3000 +fire 0 4 5.568345 0.000000 3001 +hypothesi 0 4 5.568345 0.000000 2650 +suppos 0 4 5.568345 0.000000 3002 +neuron 0 3 5.857933 0.000000 3798 +coin 0 3 5.857933 0.000000 3799 +diseas 0 3 5.857933 0.000000 3635 +scream 0 3 5.857933 0.000000 3609 +wasn 0 3 5.857933 0.000000 3800 +incorrect 0 3 5.857933 0.000000 3134 +cogsci 0 2 6.263398 0.000000 4798 +toss 0 2 6.263398 0.000000 5470 +reject 0 2 6.263398 0.000000 5418 +bogu 0 2 6.263398 0.000000 5471 +advert 0 2 6.263398 0.000000 5201 +belov 0 2 6.263398 0.000000 5073 +imagin 0 2 6.263398 0.000000 5472 +combat 0 2 6.263398 0.000000 5473 +nobodi 0 2 6.263398 0.000000 5474 +voltag 0 2 6.263398 0.000000 5475 +invalid 0 2 6.263398 0.000000 5476 +append 0 2 6.263398 0.000000 4295 +informationthi 0 2 6.263398 0.000000 5477 +empti 0 2 6.263398 0.000000 5478 +bogon 0 1 6.957497 0.000000 12655 +avers 0 1 6.957497 0.000000 12656 +outcom 0 1 6.957497 0.000000 12657 +bogo 0 1 6.957497 0.000000 12658 +bogomolnymichael 0 1 6.957497 0.000000 12659 +bogomolni 0 1 6.957497 0.000000 12660 +interestsnot 0 1 6.957497 0.000000 12661 +jenef 0 1 6.957497 0.000000 12662 +husman 0 1 6.957497 0.000000 12663 +bet 0 1 6.957497 0.000000 12664 +diminish 0 1 6.957497 0.000000 12665 +tverski 0 1 6.957497 0.000000 12666 +kahneman 0 1 6.957497 0.000000 12667 +verbatimfrom 0 1 6.957497 0.000000 12668 +outbreak 0 1 6.957497 0.000000 12669 +beenpropos 0 1 6.957497 0.000000 12670 +programsar 0 1 6.957497 0.000000 12671 +besav 0 1 6.957497 0.000000 12672 +digitalif 0 1 6.957497 0.000000 12673 +electrochem 0 1 6.957497 0.000000 12674 +axon 0 1 6.957497 0.000000 12675 +shaki 0 1 6.957497 0.000000 12676 +inaccur 0 1 6.957497 0.000000 12677 +subtract 0 1 6.957497 0.000000 12678 +checkbook 0 1 6.957497 0.000000 12679 +nevertheless 0 1 6.957497 0.000000 12680 +misfir 0 1 6.957497 0.000000 12681 +italic 0 1 6.957497 0.000000 12682 +researchcognit 0 1 6.957497 0.000000 12683 +sciencearitifici 0 1 6.957497 0.000000 12684 +intelligencemathemat 0 1 6.957497 0.000000 12685 +logictopolog 0 1 6.957497 0.000000 12686 +ghrist 0 1 6.957497 0.000000 12687 +wilshir 0 1 6.957497 0.000000 12688 +parkwai 0 1 6.957497 0.000000 12689 +talentsdefinit 0 1 6.957497 0.000000 12690 +bogodynamicsdefinit 0 1 6.957497 0.000000 12691 +sortwhil 0 1 6.957497 0.000000 12692 +bogos 0 1 6.957497 0.000000 12693 +bogomet 0 1 6.957497 0.000000 12694 +flux 0 1 6.957497 0.000000 12695 +bogotifi 0 1 6.957497 0.000000 12696 +autobogotiphobia 0 1 6.957497 0.000000 12697 +blinkenlight 0 1 6.957497 0.000000 12698 +lasher 0 1 6.957497 0.000000 12699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..a51094e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +mathemat 0 108 2.197225 0.000000 123 +teach 0 108 2.197225 0.000000 112 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +educ 0 86 2.484907 0.000000 191 +build 0 85 2.484907 0.000000 184 +state 0 76 2.564949 0.000000 207 +method 0 80 2.564949 0.000000 213 +decemb 0 80 2.564949 0.000000 215 +logic 0 71 2.639057 0.000000 230 +view 0 70 2.708050 0.000000 254 +dept 0 64 2.772589 0.000000 291 +result 0 65 2.772589 0.000000 281 +polici 0 64 2.772589 0.000000 279 +share 0 59 2.833213 0.000000 304 +detail 0 57 2.890372 0.000000 321 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +undergradu 0 54 2.944439 0.000000 338 +found 0 53 2.944439 0.000000 337 +much 0 52 2.995732 0.000000 349 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +natur 0 44 3.135494 0.000000 406 +press 0 42 3.218876 0.000000 419 +close 0 38 3.295837 0.000000 465 +formal 0 37 3.332205 0.000000 478 +mean 0 37 3.332205 0.000000 477 +short 0 36 3.367296 0.000000 499 +articl 0 33 3.433987 0.000000 530 +john 0 33 3.433987 0.000000 532 +taken 0 31 3.496508 0.000000 555 +photo 0 31 3.496508 0.000000 561 +posit 0 31 3.496508 0.000000 552 +richard 0 31 3.496508 0.000000 559 +scientist 0 31 3.496508 0.000000 560 +robert 0 30 3.555348 0.000000 567 +univ 0 28 3.610918 0.000000 617 +pass 0 28 3.610918 0.000000 611 +symbol 0 27 3.637586 0.000000 620 +repres 0 26 3.688879 0.000000 656 +bound 0 26 3.688879 0.000000 659 +fundament 0 25 3.737670 0.000000 661 +reach 0 24 3.761200 0.000000 688 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +recommend 0 22 3.850148 0.000000 737 +properti 0 22 3.850148 0.000000 749 +divis 0 21 3.912023 0.000000 803 +theorem 0 21 3.912023 0.000000 786 +fund 0 21 3.912023 0.000000 805 +verif 0 20 3.951244 0.000000 826 +wonder 0 20 3.951244 0.000000 815 +wrote 0 20 3.951244 0.000000 830 +offici 0 18 4.060443 0.000000 894 +moor 0 17 4.110874 0.000000 936 +everyth 0 13 4.382027 0.000000 1169 +introduc 0 13 4.382027 0.000000 1139 +shape 0 12 4.465908 0.000000 1245 +stephen 0 11 4.553877 0.000000 1342 +smart 0 11 4.553877 0.000000 1352 +noth 0 11 4.553877 0.000000 1328 +peter 0 11 4.553877 0.000000 1316 +instanc 0 11 4.553877 0.000000 1322 +rice 0 11 4.553877 0.000000 1336 +death 0 10 4.653960 0.000000 1457 +govern 0 9 4.753590 0.000000 1581 +float 0 9 4.753590 0.000000 1504 +end 0 9 4.753590 0.000000 1567 +said 0 9 4.753590 0.000000 1571 +ball 0 9 4.753590 0.000000 1608 +prover 0 8 4.875197 0.000000 1653 +joke 0 8 4.875197 0.000000 1620 +pageth 0 7 5.010635 0.000000 1939 +microprocessor 0 7 5.010635 0.000000 1808 +zero 0 7 5.010635 0.000000 1896 +intellectu 0 7 5.010635 0.000000 1847 +discoveri 0 7 5.010635 0.000000 1915 +boyer 0 6 5.164786 0.000000 2013 +furthermor 0 6 5.164786 0.000000 2141 +licens 0 5 5.347108 0.000000 2520 +own 0 5 5.347108 0.000000 2531 +despit 0 5 5.347108 0.000000 2317 +oftexa 0 4 5.568345 0.000000 3003 +vote 0 4 5.568345 0.000000 2953 +disclaim 0 4 5.568345 0.000000 2847 +fire 0 4 5.568345 0.000000 3001 +subsequ 0 4 5.568345 0.000000 2665 +usaemail 0 3 5.857933 0.000000 3722 +mccune 0 3 5.857933 0.000000 3522 +enumer 0 3 5.857933 0.000000 3244 +tenur 0 3 5.857933 0.000000 3801 +shouldb 0 3 5.857933 0.000000 3673 +deutsch 0 3 5.857933 0.000000 3802 +harold 0 3 5.857933 0.000000 3803 +carbon 0 3 5.857933 0.000000 3804 +loss 0 3 5.857933 0.000000 3805 +edufax 0 2 6.263398 0.000000 5479 +knowna 0 2 6.263398 0.000000 5480 +webth 0 2 6.263398 0.000000 5481 +projectmi 0 2 6.263398 0.000000 5482 +andsom 0 2 6.263398 0.000000 5483 +thegreat 0 2 6.263398 0.000000 4987 +thereof 0 2 6.263398 0.000000 5484 +steal 0 2 6.263398 0.000000 5485 +riski 0 2 6.263398 0.000000 4291 +peano 0 2 6.263398 0.000000 4234 +rebel 0 2 6.263398 0.000000 5388 +amor 0 2 6.263398 0.000000 5486 +congeni 0 2 6.263398 0.000000 4713 +rudi 0 2 6.263398 0.000000 5487 +verg 0 2 6.263398 0.000000 5488 +atom 0 2 6.263398 0.000000 4472 +lament 0 2 6.263398 0.000000 4866 +texan 0 2 6.263398 0.000000 5489 +boyerhom 0 1 6.957497 0.000000 12700 +philosophydepart 0 1 6.957497 0.000000 12701 +austinhow 0 1 6.957497 0.000000 12702 +mepap 0 1 6.957497 0.000000 12703 +locationsclassescurriculum 0 1 6.957497 0.000000 12704 +vitaeperson 0 1 6.957497 0.000000 12705 +dataeducationpublicationshonorsjobsgradu 0 1 6.957497 0.000000 12706 +studentsth 0 1 6.957497 0.000000 12707 +nqthm 0 1 6.957497 0.000000 12708 +mccarthi 0 1 6.957497 0.000000 12709 +moffett 0 1 6.957497 0.000000 12710 +controversyni 0 1 6.957497 0.000000 12711 +robbin 0 1 6.957497 0.000000 12712 +permitsth 0 1 6.957497 0.000000 12713 +administrativeoverhead 0 1 6.957497 0.000000 12714 +howthi 0 1 6.957497 0.000000 12715 +confess 0 1 6.957497 0.000000 12716 +acanon 0 1 6.957497 0.000000 12717 +thumper 0 1 6.957497 0.000000 12718 +universitiesstandard 0 1 6.957497 0.000000 12719 +aweb 0 1 6.957497 0.000000 12720 +anind 0 1 6.957497 0.000000 12721 +endors 0 1 6.957497 0.000000 12722 +habitu 0 1 6.957497 0.000000 12723 +hislectur 0 1 6.957497 0.000000 12724 +militaryacademi 0 1 6.957497 0.000000 12725 +incens 0 1 6.957497 0.000000 12726 +hisformalist 0 1 6.957497 0.000000 12727 +hispromis 0 1 6.957497 0.000000 12728 +turin 0 1 6.957497 0.000000 12729 +sincomplet 0 1 6.957497 0.000000 12730 +rucker 0 1 6.957497 0.000000 12731 +extinct 0 1 6.957497 0.000000 12732 +kroto 0 1 6.957497 0.000000 12733 +britain 0 1 6.957497 0.000000 12734 +sussex 0 1 6.957497 0.000000 12735 +chemistrypr 0 1 6.957497 0.000000 12736 +curl 0 1 6.957497 0.000000 12737 +smallei 0 1 6.957497 0.000000 12738 +inhouston 0 1 6.957497 0.000000 12739 +asocc 0 1 6.957497 0.000000 12740 +upup 0 1 6.957497 0.000000 12741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..5ae72c5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,157 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +code 0 108 2.197225 0.000000 116 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +version 0 113 2.197225 0.000000 122 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +graphic 0 90 2.397895 0.000000 147 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +refer 0 78 2.564949 0.000000 203 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +august 0 66 2.708050 0.000000 257 +integr 0 67 2.708050 0.000000 245 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +interact 0 62 2.772589 0.000000 270 +juli 0 60 2.833213 0.000000 305 +major 0 56 2.890372 0.000000 315 +three 0 54 2.944439 0.000000 330 +visual 0 48 3.044522 0.000000 372 +physic 0 47 3.091042 0.000000 377 +california 0 46 3.091042 0.000000 388 +describ 0 45 3.135494 0.000000 400 +societi 0 40 3.258097 0.000000 456 +electr 0 38 3.295837 0.000000 461 +prototyp 0 38 3.295837 0.000000 463 +formal 0 37 3.332205 0.000000 478 +ofth 0 36 3.367296 0.000000 491 +jame 0 35 3.401197 0.000000 507 +award 0 34 3.401197 0.000000 523 +concurr 0 34 3.401197 0.000000 501 +extend 0 32 3.465736 0.000000 539 +idea 0 32 3.465736 0.000000 545 +focu 0 30 3.555348 0.000000 571 +specifi 0 30 3.555348 0.000000 568 +graph 0 30 3.555348 0.000000 576 +chair 0 29 3.583519 0.000000 596 +univ 0 28 3.610918 0.000000 617 +american 0 27 3.637586 0.000000 634 +compar 0 26 3.688879 0.000000 648 +experiment 0 26 3.688879 0.000000 645 +rule 0 26 3.688879 0.000000 638 +proc 0 26 3.688879 0.000000 649 +supercomput 0 25 3.737670 0.000000 681 +fellow 0 24 3.761200 0.000000 701 +flow 0 24 3.761200 0.000000 700 +displai 0 23 3.806662 0.000000 712 +siam 0 21 3.912023 0.000000 800 +longer 0 20 3.951244 0.000000 816 +debug 0 17 4.110874 0.000000 944 +moor 0 17 4.110874 0.000000 936 +brown 0 16 4.174387 0.000000 977 +partit 0 16 4.174387 0.000000 984 +brief 0 16 4.174387 0.000000 1001 +conf 0 13 4.382027 0.000000 1181 +evolv 0 12 4.465908 0.000000 1223 +robust 0 12 4.465908 0.000000 1271 +volum 0 11 4.553877 0.000000 1347 +broad 0 11 4.553877 0.000000 1302 +declar 0 9 4.753590 0.000000 1526 +notat 0 9 4.753590 0.000000 1489 +researchi 0 8 4.875197 0.000000 1756 +unifi 0 8 4.875197 0.000000 1774 +newton 0 7 5.010635 0.000000 1824 +henc 0 7 5.010635 0.000000 1805 +ongo 0 6 5.164786 0.000000 2215 +british 0 5 5.347108 0.000000 2546 +jain 0 5 5.347108 0.000000 2332 +mirank 0 5 5.347108 0.000000 2543 +remain 0 5 5.347108 0.000000 2278 +hyder 0 4 5.568345 0.000000 2772 +werth 0 4 5.568345 0.000000 3004 +interestparallel 0 3 5.857933 0.000000 3806 +narrow 0 3 5.857933 0.000000 3807 +publicationsj 0 3 5.857933 0.000000 3808 +baltimor 0 3 5.857933 0.000000 3809 +dongarra 0 2 6.263398 0.000000 5058 +hendrix 0 2 6.263398 0.000000 5490 +anabstract 0 2 6.263398 0.000000 5491 +brownereg 0 1 6.957497 0.000000 12742 +collegeph 0 1 6.957497 0.000000 12743 +austinhonor 0 1 6.957497 0.000000 12744 +societyarea 0 1 6.957497 0.000000 12745 +sciencewith 0 1 6.957497 0.000000 12746 +tenyear 0 1 6.957497 0.000000 12747 +computation 0 1 6.957497 0.000000 12748 +includesmethod 0 1 6.957497 0.000000 12749 +highlevel 0 1 6.957497 0.000000 12750 +throughdata 0 1 6.957497 0.000000 12751 +compositionalapproach 0 1 6.957497 0.000000 12752 +intelligenceprocess 0 1 6.957497 0.000000 12753 +fluiddynam 0 1 6.957497 0.000000 12754 +domaincompil 0 1 6.957497 0.000000 12755 +basedlanguag 0 1 6.957497 0.000000 12756 +timedecis 0 1 6.957497 0.000000 12757 +andpract 0 1 6.957497 0.000000 12758 +fourthworkshop 0 1 6.957497 0.000000 12759 +santacruz 0 1 6.957497 0.000000 12760 +theeffect 0 1 6.957497 0.000000 12761 +parallelizingcompil 0 1 6.957497 0.000000 12762 +kleyn 0 1 6.957497 0.000000 12763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..b15f7091 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +recent 0 167 1.791759 0.000000 58 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +peopl 0 96 2.302585 0.000000 132 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +member 0 84 2.484907 0.000000 165 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +institut 0 84 2.484907 0.000000 187 +issu 0 78 2.564949 0.000000 211 +logic 0 71 2.639057 0.000000 230 +prof 0 64 2.772589 0.000000 273 +abstract 0 62 2.772589 0.000000 276 +special 0 56 2.890372 0.000000 320 +found 0 53 2.944439 0.000000 337 +electron 0 47 3.091042 0.000000 379 +autom 0 41 3.218876 0.000000 434 +electr 0 38 3.295837 0.000000 461 +rang 0 30 3.555348 0.000000 565 +utc 0 27 3.637586 0.000000 629 +trace 0 25 3.737670 0.000000 677 +vlsi 0 21 3.912023 0.000000 795 +martin 0 21 3.912023 0.000000 794 +chen 0 21 3.912023 0.000000 791 +rout 0 21 3.912023 0.000000 793 +supervis 0 20 3.951244 0.000000 840 +synthesi 0 20 3.951244 0.000000 834 +partit 0 16 4.174387 0.000000 984 +fpga 0 10 4.653960 0.000000 1433 +placement 0 10 4.653960 0.000000 1420 +wong 0 9 4.753590 0.000000 1609 +classifi 0 9 4.753590 0.000000 1537 +chung 0 7 5.010635 0.000000 1964 +zhou 0 6 5.164786 0.000000 2092 +ping 0 4 5.568345 0.000000 2922 +ming 0 3 5.857933 0.000000 3712 +researchth 0 2 6.263398 0.000000 5492 +broadli 0 2 6.263398 0.000000 5095 +sigda 0 2 6.263398 0.000000 5493 +thakur 0 1 6.957497 0.000000 12764 +addressdepart 0 1 6.957497 0.000000 12765 +chenyao 0 1 6.957497 0.000000 12766 +yung 0 1 6.957497 0.000000 12767 +fang 0 1 6.957497 0.000000 12768 +shashidhar 0 1 6.957497 0.000000 12769 +groupcan 0 1 6.957497 0.000000 12770 +austinclick 0 1 6.957497 0.000000 12771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..882c97cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +resum 0 79 2.564949 0.000000 217 +april 0 77 2.564949 0.000000 196 +prof 0 64 2.772589 0.000000 273 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +hardwar 0 51 2.995732 0.000000 350 +give 0 50 3.044522 0.000000 359 +cool 0 49 3.044522 0.000000 374 +done 0 47 3.091042 0.000000 381 +slide 0 38 3.295837 0.000000 467 +photo 0 31 3.496508 0.000000 561 +travel 0 30 3.555348 0.000000 579 +built 0 29 3.583519 0.000000 592 +effort 0 26 3.688879 0.000000 652 +highli 0 23 3.806662 0.000000 725 +thank 0 23 3.806662 0.000000 721 +divis 0 21 3.912023 0.000000 803 +verif 0 20 3.951244 0.000000 826 +women 0 16 4.174387 0.000000 1004 +todd 0 15 4.248495 0.000000 1051 +wife 0 13 4.382027 0.000000 1196 +land 0 12 4.465908 0.000000 1273 +speak 0 12 4.465908 0.000000 1283 +bill 0 11 4.553877 0.000000 1297 +peter 0 11 4.553877 0.000000 1316 +label 0 10 4.653960 0.000000 1423 +poetri 0 9 4.753590 0.000000 1596 +andth 0 9 4.753590 0.000000 1481 +mach 0 8 4.875197 0.000000 1669 +daughter 0 7 5.010635 0.000000 1943 +ruth 0 7 5.010635 0.000000 1870 +prioriti 0 7 5.010635 0.000000 1792 +foreign 0 7 5.010635 0.000000 1919 +beer 0 6 5.164786 0.000000 2216 +somewher 0 6 5.164786 0.000000 2176 +approv 0 6 5.164786 0.000000 2078 +humor 0 5 5.347108 0.000000 2533 +allen 0 5 5.347108 0.000000 2470 +emerson 0 5 5.347108 0.000000 2547 +li 0 5 5.347108 0.000000 2500 +substitut 0 5 5.347108 0.000000 2247 +ti 0 4 5.568345 0.000000 3005 +clair 0 4 5.568345 0.000000 2605 +enjoy 0 4 5.568345 0.000000 2937 +guangtian 0 3 5.857933 0.000000 3810 +haiku 0 3 5.857933 0.000000 3811 +cristian 0 2 6.263398 0.000000 4311 +sourcesth 0 2 6.263398 0.000000 4219 +disinform 0 2 6.263398 0.000000 5494 +dole 0 2 6.263398 0.000000 4067 +canfieldhom 0 1 6.957497 0.000000 12772 +businessmi 0 1 6.957497 0.000000 12773 +flaviu 0 1 6.957497 0.000000 12774 +ther 0 1 6.957497 0.000000 12775 +pleasuredomest 0 1 6.957497 0.000000 12776 +bliss 0 1 6.957497 0.000000 12777 +carla 0 1 6.957497 0.000000 12778 +newborn 0 1 6.957497 0.000000 12779 +parenthood 0 1 6.957497 0.000000 12780 +struck 0 1 6.957497 0.000000 12781 +peel 0 1 6.957497 0.000000 12782 +bottl 0 1 6.957497 0.000000 12783 +sofaspher 0 1 6.957497 0.000000 12784 +olestra 0 1 6.957497 0.000000 12785 +canfield 0 1 6.957497 0.000000 12786 +peterst 0 1 6.957497 0.000000 12787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..322a4b28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +introduct 0 126 2.079442 0.000000 87 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +order 0 69 2.708050 0.000000 249 +function 0 62 2.772589 0.000000 275 +taylor 0 63 2.772589 0.000000 287 +suggest 0 53 2.944439 0.000000 331 +autom 0 41 3.218876 0.000000 434 +extend 0 32 3.465736 0.000000 539 +express 0 32 3.465736 0.000000 540 +computersci 0 30 3.555348 0.000000 562 +usual 0 28 3.610918 0.000000 608 +progress 0 28 3.610918 0.000000 598 +theorem 0 21 3.912023 0.000000 786 +supervis 0 20 3.951244 0.000000 840 +safeti 0 20 3.951244 0.000000 817 +prove 0 19 4.007333 0.000000 848 +partial 0 18 4.060443 0.000000 900 +finit 0 14 4.317488 0.000000 1106 +candid 0 9 4.753590 0.000000 1606 +misra 0 7 5.010635 0.000000 1856 +jayadev 0 4 5.568345 0.000000 3006 +uniti 0 3 5.857933 0.000000 3812 +alsointerest 0 3 5.857933 0.000000 3813 +carruth 1 2 6.263398 6.263398 5495 +mydissert 0 2 6.263398 0.000000 5496 +carruthpleas 0 1 6.957497 0.000000 12788 +boundson 0 1 6.957497 0.000000 12789 +ordersemant 0 1 6.957497 0.000000 12790 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..9322659b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +schedul 0 119 2.079442 0.000000 85 +intern 0 108 2.197225 0.000000 128 +send 0 114 2.197225 0.000000 109 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +homework 0 79 2.564949 0.000000 193 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +new 0 64 2.772589 0.000000 262 +improv 0 62 2.772589 0.000000 289 +locat 0 59 2.833213 0.000000 303 +summer 0 56 2.890372 0.000000 311 +suggest 0 53 2.944439 0.000000 331 +idea 0 32 3.465736 0.000000 545 +chen 0 21 3.912023 0.000000 791 +exercis 0 19 4.007333 0.000000 842 +intel 0 16 4.174387 0.000000 1000 +meng 0 12 4.465908 0.000000 1214 +chung 0 7 5.010635 0.000000 1964 +ping 1 4 5.568345 5.568345 2922 +fiance 0 2 6.263398 0.000000 5497 +tsai 0 2 6.263398 0.000000 4831 +bufferinsert 0 1 6.957497 0.000000 12791 +syllabustopicschung 0 1 6.957497 0.000000 12792 +clen 0 1 6.957497 0.000000 12793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..cd25d3d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +well 0 109 2.197225 0.000000 121 +homepag 0 93 2.397895 0.000000 148 +main 0 67 2.708050 0.000000 256 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +chen 0 21 3.912023 0.000000 791 +break 0 20 3.951244 0.000000 812 +item 0 19 4.007333 0.000000 856 +easi 0 16 4.174387 0.000000 969 +lake 0 11 4.553877 0.000000 1373 +paragraph 0 10 4.653960 0.000000 1449 +mepost 0 10 4.653960 0.000000 1472 +usaphon 0 9 4.753590 0.000000 1600 +forget 0 8 4.875197 0.000000 1712 +shanghai 0 4 5.568345 0.000000 2925 +blvd 0 4 5.568345 0.000000 3007 +deji 0 2 6.263398 0.000000 5498 +chenabout 0 2 6.263398 0.000000 5499 +bullet 0 2 6.263398 0.000000 5500 +mehello 0 1 6.957497 0.000000 12794 +tongji 0 1 6.957497 0.000000 12795 +chinaa 0 1 6.957497 0.000000 12796 +usahom 0 1 6.957497 0.000000 12797 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..27266b8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +peopl 0 96 2.302585 0.000000 132 +educ 0 86 2.484907 0.000000 191 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +school 0 84 2.484907 0.000000 188 +write 0 72 2.639057 0.000000 222 +simul 0 66 2.708050 0.000000 255 +artifici 0 63 2.772589 0.000000 280 +new 0 64 2.772589 0.000000 262 +plai 0 60 2.833213 0.000000 307 +visual 0 48 3.044522 0.000000 372 +life 0 50 3.044522 0.000000 375 +archiv 0 49 3.044522 0.000000 364 +programm 0 39 3.258097 0.000000 445 +movi 0 40 3.258097 0.000000 459 +microsoft 0 38 3.295837 0.000000 468 +mean 0 37 3.332205 0.000000 477 +robot 0 36 3.367296 0.000000 497 +represent 0 35 3.401197 0.000000 512 +common 0 30 3.555348 0.000000 574 +symbol 0 27 3.637586 0.000000 620 +spent 0 25 3.737670 0.000000 676 +client 0 25 3.737670 0.000000 679 +corpor 0 21 3.912023 0.000000 802 +martin 0 21 3.912023 0.000000 794 +watch 0 21 3.912023 0.000000 789 +voic 0 21 3.912023 0.000000 806 +portabl 0 20 3.951244 0.000000 819 +scheme 0 20 3.951244 0.000000 818 +left 0 19 4.007333 0.000000 851 +lisp 0 18 4.060443 0.000000 897 +listen 0 18 4.060443 0.000000 907 +macintosh 0 17 4.110874 0.000000 920 +anywai 0 15 4.248495 0.000000 1047 +emploi 0 12 4.465908 0.000000 1284 +tour 0 11 4.553877 0.000000 1307 +wood 0 11 4.553877 0.000000 1355 +road 0 11 4.553877 0.000000 1374 +bike 0 10 4.653960 0.000000 1468 +hang 0 9 4.753590 0.000000 1499 +brain 0 8 4.875197 0.000000 1638 +ride 0 8 4.875197 0.000000 1741 +sleep 0 6 5.164786 0.000000 2211 +dream 0 6 5.164786 0.000000 2165 +fiction 0 6 5.164786 0.000000 2217 +emerg 0 6 5.164786 0.000000 2038 +hair 0 5 5.347108 0.000000 2446 +worst 0 5 5.347108 0.000000 2287 +webster 0 5 5.347108 0.000000 2468 +phrase 0 5 5.347108 0.000000 2242 +austindepart 0 4 5.568345 0.000000 3008 +catch 0 4 5.568345 0.000000 2602 +sciencestaylor 0 3 5.857933 0.000000 3814 +republican 0 3 5.857933 0.000000 3815 +softwareth 0 3 5.857933 0.000000 3552 +stone 0 3 5.857933 0.000000 3674 +cliff 1 2 6.263398 6.263398 4285 +mstk 0 2 6.263398 0.000000 5501 +northwestern 0 2 6.263398 0.000000 5502 +captain 0 2 6.263398 0.000000 4983 +webth 0 2 6.263398 0.000000 5481 +weird 0 2 6.263398 0.000000 5503 +chaputcliff 0 1 6.957497 0.000000 12798 +chaputth 0 1 6.957497 0.000000 12799 +robotlab 0 1 6.957497 0.000000 12800 +dullchaput 0 1 6.957497 0.000000 12801 +gothimself 0 1 6.957497 0.000000 12802 +anemail 0 1 6.957497 0.000000 12803 +odesta 0 1 6.957497 0.000000 12804 +thelearn 0 1 6.957497 0.000000 12805 +hewrot 0 1 6.957497 0.000000 12806 +trane 0 1 6.957497 0.000000 12807 +thenimpl 0 1 6.957497 0.000000 12808 +studentscal 0 1 6.957497 0.000000 12809 +gamesproject 0 1 6.957497 0.000000 12810 +labannoi 0 1 6.957497 0.000000 12811 +farka 0 1 6.957497 0.000000 12812 +medeski 0 1 6.957497 0.000000 12813 +rerun 0 1 6.957497 0.000000 12814 +korg 0 1 6.957497 0.000000 12815 +turnon 0 1 6.957497 0.000000 12816 +breakfast 0 1 6.957497 0.000000 12817 +raspi 0 1 6.957497 0.000000 12818 +starfleet 0 1 6.957497 0.000000 12819 +turnoff 0 1 6.957497 0.000000 12820 +hangov 0 1 6.957497 0.000000 12821 +fave 0 1 6.957497 0.000000 12822 +eventsdaili 0 1 6.957497 0.000000 12823 +reutersintellicast 0 1 6.957497 0.000000 12824 +weatheraustin 0 1 6.957497 0.000000 12825 +txchicago 0 1 6.957497 0.000000 12826 +ilperiodicalssucksalonmirski 0 1 6.957497 0.000000 12827 +onionmacweekmacuserreferencehypertext 0 1 6.957497 0.000000 12828 +interfaceyahooalta 0 1 6.957497 0.000000 12829 +vistacardiff 0 1 6.957497 0.000000 12830 +databaselyco 0 1 6.957497 0.000000 12831 +mapalt 0 1 6.957497 0.000000 12832 +culturemacintosh 0 1 6.957497 0.000000 12833 +dataappl 0 1 6.957497 0.000000 12834 +computercyberdogquicktimequickdraw 0 1 6.957497 0.000000 12835 +dappl 0 1 6.957497 0.000000 12836 +supportmacintouchmacintosh 0 1 6.957497 0.000000 12837 +resourcecyberdog 0 1 6.957497 0.000000 12838 +poundinfo 0 1 6.957497 0.000000 12839 +rootcool 0 1 6.957497 0.000000 12840 +stufffringewareth 0 1 6.957497 0.000000 12841 +actlabpbsnprnow 0 1 6.957497 0.000000 12842 +catalogpap 0 1 6.957497 0.000000 12843 +rsumsymbol 0 1 6.957497 0.000000 12844 +groundingrobotmap 0 1 6.957497 0.000000 12845 +peopledav 0 1 6.957497 0.000000 12846 +falooncharl 0 1 6.957497 0.000000 12847 +lewisjeff 0 1 6.957497 0.000000 12848 +lindjeff 0 1 6.957497 0.000000 12849 +sherwoodbrian 0 1 6.957497 0.000000 12850 +slatorsandi 0 1 6.957497 0.000000 12851 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..073591e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +check 0 115 2.197225 0.000000 118 +find 0 111 2.197225 0.000000 111 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +chang 0 82 2.484907 0.000000 163 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +journal 0 83 2.484907 0.000000 183 +come 0 78 2.564949 0.000000 202 +orient 0 80 2.564949 0.000000 205 +know 0 80 2.564949 0.000000 198 +complet 0 77 2.564949 0.000000 208 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +detail 0 57 2.890372 0.000000 321 +unix 0 58 2.890372 0.000000 308 +without 0 50 3.044522 0.000000 370 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +origin 0 38 3.295837 0.000000 472 +china 0 37 3.332205 0.000000 487 +word 0 34 3.401197 0.000000 508 +return 0 34 3.401197 0.000000 502 +hard 0 30 3.555348 0.000000 563 +univ 0 28 3.610918 0.000000 617 +mind 0 27 3.637586 0.000000 632 +enjoi 0 26 3.688879 0.000000 660 +magazin 0 24 3.761200 0.000000 704 +wang 0 21 3.912023 0.000000 790 +among 0 21 3.912023 0.000000 781 +break 0 20 3.951244 0.000000 812 +fine 0 20 3.951244 0.000000 822 +thought 0 17 4.110874 0.000000 945 +countri 0 15 4.248495 0.000000 1059 +decid 0 14 4.317488 0.000000 1075 +came 0 13 4.382027 0.000000 1197 +tsinghua 0 13 4.382027 0.000000 1195 +captur 0 12 4.465908 0.000000 1232 +moment 0 11 4.553877 0.000000 1379 +surf 0 11 4.553877 0.000000 1301 +earth 0 10 4.653960 0.000000 1463 +end 0 9 4.753590 0.000000 1567 +jump 0 9 4.753590 0.000000 1603 +unusu 0 9 4.753590 0.000000 1566 +opinion 0 8 4.875197 0.000000 1708 +philosoph 0 7 5.010635 0.000000 1904 +televis 0 6 5.164786 0.000000 2118 +rock 0 6 5.164786 0.000000 2164 +million 0 5 5.347108 0.000000 2495 +provinc 0 4 5.568345 0.000000 3009 +gloriou 0 3 5.857933 0.000000 3816 +hometown 0 3 5.857933 0.000000 3817 +tower 0 3 5.857933 0.000000 3818 +fresh 0 3 5.857933 0.000000 3706 +nifti 0 2 6.263398 0.000000 5504 +numb 0 2 6.263398 0.000000 5505 +pope 0 2 6.263398 0.000000 5506 +chuanjun 0 1 6.957497 0.000000 12852 +diamond 0 1 6.957497 0.000000 12853 +stun 0 1 6.957497 0.000000 12854 +hubei 0 1 6.957497 0.000000 12855 +beautifulunivers 0 1 6.957497 0.000000 12856 +faceless 0 1 6.957497 0.000000 12857 +brilliant 0 1 6.957497 0.000000 12858 +miner 0 1 6.957497 0.000000 12859 +unemploi 0 1 6.957497 0.000000 12860 +dobb 0 1 6.957497 0.000000 12861 +prose 0 1 6.957497 0.000000 12862 +porsch 0 1 6.957497 0.000000 12863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..47494572 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +updat 0 191 1.609438 0.000000 41 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +assist 0 112 2.197225 0.000000 113 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +knowledg 0 67 2.708050 0.000000 243 +dept 0 64 2.772589 0.000000 291 +improv 0 62 2.772589 0.000000 289 +result 0 65 2.772589 0.000000 281 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +allow 0 53 2.944439 0.000000 333 +tabl 0 51 2.995732 0.000000 346 +visitor 0 49 3.044522 0.000000 371 +natur 0 44 3.135494 0.000000 406 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +continu 0 39 3.258097 0.000000 448 +word 0 34 3.401197 0.000000 508 +human 0 32 3.465736 0.000000 546 +chines 0 29 3.583519 0.000000 595 +reach 0 24 3.761200 0.000000 688 +proof 0 23 3.806662 0.000000 720 +tenni 0 20 3.951244 0.000000 838 +ever 0 19 4.007333 0.000000 872 +segment 0 17 4.110874 0.000000 931 +taiwan 0 16 4.174387 0.000000 1006 +capabl 0 15 4.248495 0.000000 1016 +train 0 14 4.317488 0.000000 1066 +huang 0 12 4.465908 0.000000 1202 +basketbal 0 12 4.465908 0.000000 1289 +literatur 0 11 4.553877 0.000000 1300 +tag 0 7 5.010635 0.000000 1821 +academia 0 6 5.164786 0.000000 2036 +chin 0 5 5.347108 0.000000 2408 +categori 0 5 5.347108 0.000000 2261 +markov 0 5 5.347108 0.000000 2280 +accuraci 0 5 5.347108 0.000000 2450 +atlant 0 5 5.347108 0.000000 2508 +taipei 0 4 5.568345 0.000000 2926 +worki 0 4 5.568345 0.000000 3010 +basebal 0 4 5.568345 0.000000 2969 +edufing 0 4 5.568345 0.000000 2713 +sinica 0 3 5.857933 0.000000 3819 +expans 0 3 5.857933 0.000000 3755 +instituteof 0 2 6.263398 0.000000 5507 +pinbal 0 2 6.263398 0.000000 5508 +todayth 0 2 6.263398 0.000000 5416 +tser 0 1 6.957497 0.000000 12864 +systemsexperiencei 0 1 6.957497 0.000000 12865 +usinghidden 0 1 6.957497 0.000000 12866 +friendli 0 1 6.957497 0.000000 12867 +toexecut 0 1 6.957497 0.000000 12868 +automatictag 0 1 6.957497 0.000000 12869 +improvedbecaus 0 1 6.957497 0.000000 12870 +interestsmovi 0 1 6.957497 0.000000 12871 +semiolog 0 1 6.957497 0.000000 12872 +siteschina 0 1 6.957497 0.000000 12873 +timesminsheng 0 1 6.957497 0.000000 12874 +dailyth 0 1 6.957497 0.000000 12875 +timesusa 0 1 6.957497 0.000000 12876 +economistth 0 1 6.957497 0.000000 12877 +monthlymak 0 1 6.957497 0.000000 12878 +chuang 0 1 6.957497 0.000000 12879 +meyou 0 1 6.957497 0.000000 12880 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..09ae684e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +august 0 66 2.708050 0.000000 257 +robert 0 30 3.555348 0.000000 567 +runtim 0 19 4.007333 0.000000 858 +multithread 0 11 4.553877 0.000000 1315 +pronounc 0 7 5.010635 0.000000 1918 +blumoferdb 0 5 5.347108 0.000000 2324 +silk 0 2 6.263398 0.000000 5373 +inthi 0 2 6.263398 0.000000 5509 +cilkcilkcilk 0 1 6.957497 0.000000 12881 +languageand 0 1 6.957497 0.000000 12882 +thecilk 0 1 6.957497 0.000000 12883 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..56706f8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +high 0 130 2.079442 0.000000 101 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +school 0 84 2.484907 0.000000 188 +plan 0 65 2.772589 0.000000 272 +complex 0 64 2.772589 0.000000 269 +thesi 0 57 2.890372 0.000000 327 +theoret 0 39 3.258097 0.000000 446 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +hong 0 14 4.317488 0.000000 1105 +kong 0 9 4.753590 0.000000 1602 +chung 1 7 5.010635 5.010635 1964 +austinaustin 0 7 5.010635 0.000000 1966 +edumi 0 6 5.164786 0.000000 2132 +fish 0 6 5.164786 0.000000 2207 +poon 0 3 5.857933 0.000000 3820 +ckpoon 0 2 6.263398 0.000000 5510 +hungri 0 2 6.263398 0.000000 5511 +keung 1 1 6.957497 6.957497 12884 +poondepart 0 1 6.957497 0.000000 12885 +askvinc 0 1 6.957497 0.000000 12886 +gogan 0 1 6.957497 0.000000 12887 +problemsom 0 1 6.957497 0.000000 12888 +harmonica 0 1 6.957497 0.000000 12889 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..b07ec79b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +relat 0 139 1.945910 0.000000 68 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +member 0 84 2.484907 0.000000 165 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +dept 0 64 2.772589 0.000000 291 +secur 1 30 3.555348 3.555348 577 +chines 0 29 3.583519 0.000000 595 +linux 0 27 3.637586 0.000000 631 +head 0 23 3.806662 0.000000 732 +divis 0 21 3.912023 0.000000 803 +role 0 14 4.317488 0.000000 1101 +hong 0 14 4.317488 0.000000 1105 +usavoic 0 13 4.382027 0.000000 1198 +thedepart 0 11 4.553877 0.000000 1350 +cryptographi 0 9 4.753590 0.000000 1512 +kong 0 9 4.753590 0.000000 1602 +simon 0 8 4.875197 0.000000 1697 +chung 0 7 5.010635 0.000000 1964 +park 0 6 5.164786 0.000000 2218 +mission 0 5 5.347108 0.000000 2465 +nist 0 4 5.568345 0.000000 2973 +church 0 4 5.568345 0.000000 3011 +rivest 0 3 5.857933 0.000000 3248 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +thenetwork 0 2 6.263398 0.000000 5434 +byprof 0 2 6.263398 0.000000 5512 +wongchung 0 1 6.957497 0.000000 12890 +wonglast 0 1 6.957497 0.000000 12891 +labwhich 0 1 6.957497 0.000000 12892 +clearinghous 0 1 6.957497 0.000000 12893 +rbac 0 1 6.957497 0.000000 12894 +ckwong 0 1 6.957497 0.000000 12895 +hyde 0 1 6.957497 0.000000 12896 +baptist 0 1 6.957497 0.000000 12897 +netbsd 0 1 6.957497 0.000000 12898 +freebsd 0 1 6.957497 0.000000 12899 +openbsd 0 1 6.957497 0.000000 12900 +tockwong 0 1 6.957497 0.000000 12901 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..56127109 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +austin 0 168 1.791759 0.000000 63 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +build 0 85 2.484907 0.000000 184 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +appli 0 71 2.639057 0.000000 226 +simul 1 66 2.708050 2.708050 255 +knowledg 0 67 2.708050 0.000000 243 +integr 0 67 2.708050 0.000000 245 +abstract 0 62 2.772589 0.000000 276 +complex 0 64 2.772589 0.000000 269 +result 0 65 2.772589 0.000000 281 +descript 0 64 2.772589 0.000000 271 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +particular 0 51 2.995732 0.000000 352 +finger 0 52 2.995732 0.000000 354 +possibl 0 47 3.091042 0.000000 378 +netscap 0 44 3.135494 0.000000 395 +vita 0 38 3.295837 0.000000 473 +focu 0 30 3.555348 0.000000 571 +scale 0 28 3.610918 0.000000 613 +retriev 0 27 3.637586 0.000000 621 +constraint 0 26 3.688879 0.000000 636 +variabl 0 23 3.806662 0.000000 715 +behavior 0 18 4.060443 0.000000 881 +hotlist 0 13 4.382027 0.000000 1199 +qualit 0 11 4.553877 0.000000 1362 +facilit 0 10 4.653960 0.000000 1412 +incomplet 0 9 4.753590 0.000000 1575 +elimin 0 9 4.753590 0.000000 1558 +informationemail 0 9 4.753590 0.000000 1564 +aggreg 0 6 5.164786 0.000000 2219 +irrelev 0 3 5.857933 0.000000 3823 +descriptionof 0 2 6.263398 0.000000 5513 +intract 0 2 6.263398 0.000000 5044 +thiswil 0 2 6.263398 0.000000 4944 +withlarg 0 2 6.263398 0.000000 4926 +followingtechniqu 0 2 6.263398 0.000000 5514 +clanci 0 1 6.957497 0.000000 12902 +clancyresearch 0 1 6.957497 0.000000 12903 +containinga 0 1 6.957497 0.000000 12904 +frequentlyi 0 1 6.957497 0.000000 12905 +incomprehens 0 1 6.957497 0.000000 12906 +simulationto 0 1 6.957497 0.000000 12907 +distinctionsof 0 1 6.957497 0.000000 12908 +whichaddress 0 1 6.957497 0.000000 12909 +abstractiontechniqu 0 1 6.957497 0.000000 12910 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..9453d798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +three 1 54 2.944439 2.944439 330 +five 1 19 4.007333 4.007333 841 +four 1 18 4.060443 4.060443 905 +eight 1 11 4.553877 4.553877 1331 +seven 1 9 4.753590 4.753590 1561 +nine 1 6 5.164786 5.164786 2047 +eleven 1 3 5.857933 5.857933 3824 +jimbo 1 1 6.957497 6.957497 12911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..8e56e6b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +call 0 91 2.397895 0.000000 153 +taylor 0 63 2.772589 0.000000 287 +januari 0 62 2.772589 0.000000 264 +author 0 39 3.258097 0.000000 450 +china 0 37 3.332205 0.000000 487 +photo 0 31 3.496508 0.000000 561 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +edulast 0 17 4.110874 0.000000 927 +hong 0 14 4.317488 0.000000 1105 +chri 0 11 4.553877 0.000000 1311 +fellowship 0 10 4.653960 0.000000 1460 +kong 0 9 4.753590 0.000000 1602 +christian 0 7 5.010635 0.000000 1949 +church 0 4 5.568345 0.000000 3011 +chuwelcom 0 1 6.957497 0.000000 12912 +myselfmi 0 1 6.957497 0.000000 12913 +chuemail 0 1 6.957497 0.000000 12914 +cnchu 0 1 6.957497 0.000000 12915 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..e00539d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,180 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +texa 0 160 1.791759 0.000000 64 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +code 1 108 2.197225 2.197225 116 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +user 0 104 2.302585 0.000000 137 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +interfac 0 79 2.564949 0.000000 209 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +dynam 0 76 2.564949 0.000000 194 +line 0 75 2.639057 0.000000 231 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +previou 0 62 2.772589 0.000000 290 +written 0 63 2.772589 0.000000 278 +improv 0 62 2.772589 0.000000 289 +januari 0 62 2.772589 0.000000 264 +automat 0 61 2.833213 0.000000 306 +direct 0 57 2.890372 0.000000 316 +major 0 56 2.890372 0.000000 315 +browser 0 56 2.890372 0.000000 313 +publish 0 57 2.890372 0.000000 326 +faculti 0 56 2.890372 0.000000 325 +overview 0 56 2.890372 0.000000 323 +allow 0 53 2.944439 0.000000 333 +visual 0 48 3.044522 0.000000 372 +still 0 50 3.044522 0.000000 362 +featur 0 46 3.091042 0.000000 386 +directori 0 45 3.135494 0.000000 396 +made 0 44 3.135494 0.000000 398 +edit 0 42 3.218876 0.000000 418 +tutori 0 39 3.258097 0.000000 437 +announc 0 40 3.258097 0.000000 441 +multipl 0 39 3.258097 0.000000 453 +join 0 39 3.258097 0.000000 457 +form 0 39 3.258097 0.000000 443 +connect 0 37 3.332205 0.000000 485 +download 0 36 3.367296 0.000000 489 +manual 0 35 3.401197 0.000000 504 +singl 0 34 3.401197 0.000000 510 +jame 0 35 3.401197 0.000000 507 +articl 0 33 3.433987 0.000000 530 +john 0 33 3.433987 0.000000 532 +independ 0 32 3.465736 0.000000 548 +kind 0 32 3.465736 0.000000 541 +ad 0 32 3.465736 0.000000 544 +produc 0 30 3.555348 0.000000 572 +graph 0 30 3.555348 0.000000 576 +compon 0 30 3.555348 0.000000 570 +releas 0 28 3.610918 0.000000 616 +quit 0 27 3.637586 0.000000 633 +repres 0 26 3.688879 0.000000 656 +revis 0 26 3.688879 0.000000 640 +constraint 0 26 3.688879 0.000000 636 +flow 0 24 3.761200 0.000000 700 +compress 0 23 3.806662 0.000000 719 +sequenti 0 22 3.850148 0.000000 745 +varieti 0 22 3.850148 0.000000 740 +hierarchi 0 22 3.850148 0.000000 744 +alumni 0 21 3.912023 0.000000 807 +prepar 0 20 3.951244 0.000000 824 +brown 0 16 4.174387 0.000000 977 +partit 0 16 4.174387 0.000000 984 +enough 0 15 4.248495 0.000000 1040 +affili 0 13 4.382027 0.000000 1194 +incorpor 0 13 4.382027 0.000000 1163 +instanc 0 11 4.553877 0.000000 1322 +node 0 11 4.553877 0.000000 1326 +fill 0 11 4.553877 0.000000 1349 +regard 0 11 4.553877 0.000000 1309 +screen 0 9 4.753590 0.000000 1577 +compos 0 9 4.753590 0.000000 1527 +entitl 0 9 4.753590 0.000000 1490 +shot 0 7 5.010635 0.000000 1898 +notifi 0 6 5.164786 0.000000 2106 +banerje 0 6 5.164786 0.000000 2018 +parallelprogram 0 5 5.347108 0.000000 2379 +stage 0 5 5.347108 0.000000 2488 +despit 0 5 5.347108 0.000000 2317 +alfr 0 4 5.568345 0.000000 2882 +lord 0 4 5.568345 0.000000 2906 +crai 0 4 5.568345 0.000000 3012 +prospect 0 4 5.568345 0.000000 3013 +snail 0 4 5.568345 0.000000 2916 +werth 0 4 5.568345 0.000000 3004 +preced 0 3 5.857933 0.000000 3107 +sophist 0 3 5.857933 0.000000 3545 +easier 0 3 5.857933 0.000000 3470 +pleasant 0 3 5.857933 0.000000 3825 +informationfor 0 3 5.857933 0.000000 3738 +berger 0 3 5.857933 0.000000 3702 +dwip 0 3 5.857933 0.000000 3197 +emeri 0 2 6.263398 0.000000 5515 +wilder 0 2 6.263398 0.000000 5516 +symmetri 0 2 6.263398 0.000000 5517 +newest 0 2 6.263398 0.000000 5518 +reproduc 0 2 6.263398 0.000000 5519 +publicationscod 0 2 6.263398 0.000000 5520 +ajita 0 2 6.263398 0.000000 5461 +systemmast 0 1 6.957497 0.000000 12916 +lawless 0 1 6.957497 0.000000 12917 +codeless 0 1 6.957497 0.000000 12918 +myriad 0 1 6.957497 0.000000 12919 +tennysoncod 0 1 6.957497 0.000000 12920 +sequentialprogram 0 1 6.957497 0.000000 12921 +wheredata 0 1 6.957497 0.000000 12922 +arc 0 1 6.957497 0.000000 12923 +thesequenti 0 1 6.957497 0.000000 12924 +sequent 0 1 6.957497 0.000000 12925 +smp 0 1 6.957497 0.000000 12926 +macdraw 0 1 6.957497 0.000000 12927 +subgraph 0 1 6.957497 0.000000 12928 +hpcwire 0 1 6.957497 0.000000 12929 +backend 0 1 6.957497 0.000000 12930 +xcodelib 0 1 6.957497 0.000000 12931 +lieu 0 1 6.957497 0.000000 12932 +groupgroup 0 1 6.957497 0.000000 12933 +leaderprofessor 0 1 6.957497 0.000000 12934 +bergerstud 0 1 6.957497 0.000000 12935 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..eadc5ec2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +addit 0 74 2.639057 0.000000 228 +knowledg 0 67 2.708050 0.000000 243 +taylor 0 63 2.772589 0.000000 287 +tech 0 35 3.401197 0.000000 515 +steve 0 29 3.583519 0.000000 594 +hotlist 0 13 4.382027 0.000000 1199 +correl 0 5 5.347108 0.000000 2279 +multifunct 0 3 5.857933 0.000000 3826 +correlstev 0 1 6.957497 0.000000 12936 +correlresearchph 0 1 6.957497 0.000000 12937 +reportcontact 0 1 6.957497 0.000000 12938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..e85c4893 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +hall 0 146 1.945910 0.000000 65 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +real 0 93 2.397895 0.000000 144 +control 0 82 2.484907 0.000000 164 +method 0 80 2.564949 0.000000 213 +written 0 63 2.772589 0.000000 278 +taylor 0 63 2.772589 0.000000 287 +origin 0 38 3.295837 0.000000 472 +respons 0 37 3.332205 0.000000 476 +formal 0 37 3.332205 0.000000 478 +robot 0 36 3.367296 0.000000 497 +synchron 0 29 3.583519 0.000000 588 +packag 0 28 3.610918 0.000000 614 +utc 0 27 3.637586 0.000000 629 +linux 0 27 3.637586 0.000000 631 +properti 0 22 3.850148 0.000000 749 +inth 0 22 3.850148 0.000000 741 +half 0 21 3.912023 0.000000 776 +latest 0 21 3.912023 0.000000 785 +wrote 0 20 3.951244 0.000000 830 +lot 0 18 4.060443 0.000000 889 +devic 0 16 4.174387 0.000000 1002 +driver 0 8 4.875197 0.000000 1657 +carlo 0 5 5.347108 0.000000 2515 +tempest 0 5 5.347108 0.000000 2548 +theth 0 5 5.347108 0.000000 2325 +toolset 0 4 5.568345 0.000000 3014 +austindepart 0 4 5.568345 0.000000 3008 +provinc 0 4 5.568345 0.000000 3009 +reactiv 0 3 5.857933 0.000000 3575 +publicationsi 0 3 5.857933 0.000000 3827 +softwareth 0 3 5.857933 0.000000 3552 +sciencesaustin 0 3 5.857933 0.000000 3828 +grabber 0 2 6.263398 0.000000 5521 +spain 0 2 6.263398 0.000000 5522 +esterel 0 1 6.957497 0.000000 12939 +pucholcarlo 0 1 6.957497 0.000000 12940 +pucholresearch 0 1 6.957497 0.000000 12941 +mawl 0 1 6.957497 0.000000 12942 +forbrows 0 1 6.957497 0.000000 12943 +verifyingsafeti 0 1 6.957497 0.000000 12944 +thequantavisionfram 0 1 6.957497 0.000000 12945 +thejoystickdevic 0 1 6.957497 0.000000 12946 +informationoffic 0 1 6.957497 0.000000 12947 +dreal 0 1 6.957497 0.000000 12948 +phun 0 1 6.957497 0.000000 12949 +interestsmemb 0 1 6.957497 0.000000 12950 +interesti 0 1 6.957497 0.000000 12951 +fromgandia 0 1 6.957497 0.000000 12952 +valencia 0 1 6.957497 0.000000 12953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..53c7e342 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +build 0 85 2.484907 0.000000 184 +member 0 84 2.484907 0.000000 165 +logic 0 71 2.639057 0.000000 230 +simul 0 66 2.708050 0.000000 255 +foundat 0 62 2.772589 0.000000 286 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +past 0 42 3.218876 0.000000 428 +editor 0 41 3.218876 0.000000 433 +theoret 0 39 3.258097 0.000000 446 +paul 0 38 3.295837 0.000000 471 +ofth 0 36 3.367296 0.000000 491 +utc 0 27 3.637586 0.000000 629 +constraint 0 26 3.688879 0.000000 636 +toward 0 25 3.737670 0.000000 668 +head 0 23 3.806662 0.000000 732 +properti 0 22 3.850148 0.000000 749 +wang 0 21 3.912023 0.000000 790 +chen 0 21 3.912023 0.000000 791 +verif 0 20 3.951244 0.000000 826 +synthesi 0 20 3.951244 0.000000 834 +precis 0 15 4.248495 0.000000 1023 +verifi 0 12 4.465908 0.000000 1261 +establish 0 9 4.753590 0.000000 1532 +doug 0 9 4.753590 0.000000 1517 +formul 0 8 4.875197 0.000000 1733 +canb 0 7 5.010635 0.000000 1846 +chung 0 7 5.010635 0.000000 1964 +groupth 0 5 5.347108 0.000000 2549 +carlo 0 5 5.347108 0.000000 2515 +firm 0 4 5.568345 0.000000 2684 +systemsand 0 4 5.568345 0.000000 2804 +toolset 0 4 5.568345 0.000000 3014 +aloysiu 0 3 5.857933 0.000000 3829 +lai 0 3 5.857933 0.000000 3694 +categor 0 3 5.857933 0.000000 3765 +stuart 0 3 5.857933 0.000000 3584 +guangtian 0 3 5.857933 0.000000 3810 +byprof 0 2 6.263398 0.000000 5512 +stringent 0 2 6.263398 0.000000 5523 +scenario 0 2 6.263398 0.000000 5524 +availableonlin 0 2 6.263398 0.000000 4929 +deji 0 2 6.263398 0.000000 5498 +tsou 0 2 6.263398 0.000000 5525 +clement 0 2 6.263398 0.000000 5526 +modechart 0 1 6.957497 0.000000 12954 +groundworkfor 0 1 6.957497 0.000000 12955 +enforc 0 1 6.957497 0.000000 12956 +timetool 0 1 6.957497 0.000000 12957 +languagepublicationsabstract 0 1 6.957497 0.000000 12958 +puchol 0 1 6.957497 0.000000 12959 +yangalumni 0 1 6.957497 0.000000 12960 +chih 0 1 6.957497 0.000000 12961 +farn 0 1 6.957497 0.000000 12962 +supoj 0 1 6.957497 0.000000 12963 +suthandavibul 0 1 6.957497 0.000000 12964 +farnam 0 1 6.957497 0.000000 12965 +jahanian 0 1 6.957497 0.000000 12966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..e418e2bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +specif 0 106 2.197225 0.000000 106 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +learn 0 86 2.484907 0.000000 170 +resourc 0 81 2.484907 0.000000 172 +internet 0 83 2.484907 0.000000 186 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +journal 0 83 2.484907 0.000000 183 +stuff 0 87 2.484907 0.000000 171 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +intellig 0 72 2.639057 0.000000 225 +would 0 67 2.708050 0.000000 251 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +collect 0 65 2.772589 0.000000 268 +taylor 0 63 2.772589 0.000000 287 +laboratori 0 63 2.772589 0.000000 292 +evalu 0 64 2.772589 0.000000 266 +thesi 0 57 2.890372 0.000000 327 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +robot 0 36 3.367296 0.000000 497 +bibliographi 0 34 3.401197 0.000000 518 +tech 0 35 3.401197 0.000000 515 +produc 0 30 3.555348 0.000000 572 +postal 0 30 3.555348 0.000000 580 +particip 0 29 3.583519 0.000000 589 +propos 0 28 3.610918 0.000000 602 +rule 0 26 3.688879 0.000000 638 +task 0 25 3.737670 0.000000 678 +input 0 23 3.806662 0.000000 727 +mobil 0 23 3.806662 0.000000 730 +miscellan 0 23 3.806662 0.000000 731 +increas 0 20 3.951244 0.000000 829 +wrote 0 20 3.951244 0.000000 830 +expert 0 20 3.951244 0.000000 833 +north 0 19 4.007333 0.000000 873 +agent 0 18 4.060443 0.000000 910 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +women 0 16 4.174387 0.000000 1004 +across 0 16 4.174387 0.000000 974 +cognit 0 16 4.174387 0.000000 986 +researchmi 0 14 4.317488 0.000000 1119 +hotlist 0 13 4.382027 0.000000 1199 +misc 0 13 4.382027 0.000000 1124 +acquisit 0 10 4.653960 0.000000 1465 +sentenc 0 10 4.653960 0.000000 1413 +consortium 0 10 4.653960 0.000000 1467 +suitabl 0 9 4.753590 0.000000 1486 +linguist 0 9 4.753590 0.000000 1593 +folk 0 9 4.753590 0.000000 1597 +awar 0 7 5.010635 0.000000 1800 +carolina 0 6 5.164786 0.000000 2142 +truth 0 6 5.164786 0.000000 2179 +interestedin 0 5 5.347108 0.000000 2260 +corpu 0 5 5.347108 0.000000 2282 +cindi 0 3 5.857933 0.000000 3830 +groupunivers 0 3 5.857933 0.000000 3831 +primarilyin 0 3 5.857933 0.000000 3832 +diagnost 0 3 5.857933 0.000000 3833 +georgia 0 3 5.857933 0.000000 3834 +horizon 0 3 5.857933 0.000000 3746 +austini 0 2 6.263398 0.000000 5527 +deep 0 2 6.263398 0.000000 5528 +exhibit 0 2 6.263398 0.000000 5529 +cthomp 0 2 6.263398 0.000000 5530 +lexic 0 1 6.957497 0.000000 12967 +thompsoncindi 0 1 6.957497 0.000000 12968 +thompsonmachin 0 1 6.957497 0.000000 12969 +candlelight 0 1 6.957497 0.000000 12970 +vigil 0 1 6.957497 0.000000 12971 +internetto 0 1 6.957497 0.000000 12972 +violenc 0 1 6.957497 0.000000 12973 +semanticrepresent 0 1 6.957497 0.000000 12974 +atrobofest 0 1 6.957497 0.000000 12975 +wolv 0 1 6.957497 0.000000 12976 +counsel 0 1 6.957497 0.000000 12977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..e9b74758 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +utexa 0 189 1.609438 0.000000 44 +support 0 132 1.945910 0.000000 83 +welcom 0 122 2.079442 0.000000 99 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +browser 0 56 2.890372 0.000000 313 +friend 0 48 3.044522 0.000000 376 +netscap 0 44 3.135494 0.000000 395 +frame 0 24 3.761200 0.000000 684 +famili 0 23 3.806662 0.000000 735 +latest 0 21 3.912023 0.000000 785 +seem 0 18 4.060443 0.000000 899 +doesn 0 15 4.248495 0.000000 1055 +xingshan 1 1 6.957497 6.957497 12978 +downloadth 0 1 6.957497 0.000000 12979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..466dca94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +technolog 0 131 2.079442 0.000000 102 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +internet 0 83 2.484907 0.000000 186 +want 0 79 2.564949 0.000000 199 +taylor 0 63 2.772589 0.000000 287 +probabl 0 40 3.258097 0.000000 455 +electr 0 38 3.295837 0.000000 461 +postal 0 30 3.555348 0.000000 580 +berkelei 0 26 3.688879 0.000000 657 +experiment 0 26 3.688879 0.000000 645 +mike 0 24 3.761200 0.000000 703 +disk 0 22 3.850148 0.000000 747 +less 0 18 4.060443 0.000000 892 +seem 0 18 4.060443 0.000000 899 +classic 0 14 4.317488 0.000000 1084 +rice 0 11 4.553877 0.000000 1336 +operatingsystem 0 10 4.653960 0.000000 1401 +architect 0 8 4.875197 0.000000 1624 +gather 0 8 4.875197 0.000000 1719 +capac 0 8 4.875197 0.000000 1740 +root 0 8 4.875197 0.000000 1650 +trend 0 7 5.010635 0.000000 1842 +bore 0 7 5.010635 0.000000 1948 +austinaustin 0 7 5.010635 0.000000 1966 +price 0 6 5.164786 0.000000 1999 +pagethi 0 5 5.347108 0.000000 2336 +serverless 0 3 5.857933 0.000000 3181 +systemsth 0 3 5.857933 0.000000 3835 +informationtechnolog 0 3 5.857933 0.000000 3836 +informationassist 0 2 6.263398 0.000000 5531 +teachingfal 0 2 6.263398 0.000000 5532 +systemsspr 0 2 6.263398 0.000000 4762 +dahlin 0 1 6.957497 0.000000 12980 +dahlingener 0 1 6.957497 0.000000 12981 +architectureeveryon 0 1 6.957497 0.000000 12982 +researchxf 0 1 6.957497 0.000000 12983 +systemweb 0 1 6.957497 0.000000 12984 +pagesummar 0 1 6.957497 0.000000 12985 +compter 0 1 6.957497 0.000000 12986 +includinghistor 0 1 6.957497 0.000000 12987 +informationif 0 1 6.957497 0.000000 12988 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..9cb4c2b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +activ 0 84 2.484907 0.000000 182 +servic 0 72 2.639057 0.000000 236 +laboratori 0 63 2.772589 0.000000 292 +dept 0 64 2.772589 0.000000 291 +probabl 0 40 3.258097 0.000000 455 +feel 0 37 3.332205 0.000000 483 +honor 0 23 3.806662 0.000000 729 +busi 0 21 3.912023 0.000000 784 +anyth 0 16 4.174387 0.000000 998 +regularli 0 11 4.553877 0.000000 1338 +tradit 0 10 4.653960 0.000000 1404 +meant 0 6 5.164786 0.000000 2055 +phrase 0 5 5.347108 0.000000 2242 +suffic 0 4 5.568345 0.000000 2869 +crazi 0 4 5.568345 0.000000 2822 +sytem 0 4 5.568345 0.000000 3015 +vijai 0 4 5.568345 0.000000 2960 +mehom 0 4 5.568345 0.000000 2979 +lazi 0 2 6.263398 0.000000 4527 +appeal 0 2 6.263398 0.000000 4186 +garg 0 2 6.263398 0.000000 5533 +damani 1 1 6.957497 6.957497 12989 +howdi 0 1 6.957497 0.000000 12990 +pagal 0 1 6.957497 0.000000 12991 +dekho 0 1 6.957497 0.000000 12992 +updateth 0 1 6.957497 0.000000 12993 +oblig 0 1 6.957497 0.000000 12994 +providesometh 0 1 6.957497 0.000000 12995 +guadulp 0 1 6.957497 0.000000 12996 +austinphon 0 1 6.957497 0.000000 12997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..f3406699 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +machin 0 129 2.079442 0.000000 95 +make 0 111 2.197225 0.000000 120 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +school 0 84 2.484907 0.000000 188 +appli 0 71 2.639057 0.000000 226 +view 0 70 2.708050 0.000000 254 +main 0 67 2.708050 0.000000 256 +complex 0 64 2.772589 0.000000 269 +laboratori 0 63 2.772589 0.000000 292 +autom 0 41 3.218876 0.000000 434 +tree 0 36 3.367296 0.000000 492 +global 0 34 3.401197 0.000000 520 +equat 0 23 3.806662 0.000000 724 +nice 0 20 3.951244 0.000000 809 +render 0 17 4.110874 0.000000 947 +happi 0 14 4.317488 0.000000 1079 +station 0 13 4.382027 0.000000 1157 +mainten 0 9 4.753590 0.000000 1543 +plane 0 6 5.164786 0.000000 2187 +multiresolut 0 5 5.347108 0.000000 2423 +escap 0 4 5.568345 0.000000 3016 +unrel 0 3 5.857933 0.000000 3837 +dane 0 2 6.263398 0.000000 5534 +pinbal 0 2 6.263398 0.000000 5508 +marshal 0 2 6.263398 0.000000 4118 +illumin 0 2 6.263398 0.000000 4819 +probe 0 2 6.263398 0.000000 5535 +marshalldan 0 1 6.957497 0.000000 12998 +electromechan 0 1 6.957497 0.000000 12999 +thelogist 0 1 6.957497 0.000000 13000 +attractor 0 1 6.957497 0.000000 13001 +burnet 0 1 6.957497 0.000000 13002 +pastur 0 1 6.957497 0.000000 13003 +jupit 0 1 6.957497 0.000000 13004 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..2732a15e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +provid 0 121 2.079442 0.000000 94 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +access 0 102 2.302585 0.000000 136 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +archiv 0 49 3.044522 0.000000 364 +keep 0 44 3.135494 0.000000 409 +video 0 44 3.135494 0.000000 405 +go 0 33 3.433987 0.000000 529 +idea 0 32 3.465736 0.000000 545 +someth 0 31 3.496508 0.000000 554 +sort 0 22 3.850148 0.000000 738 +sure 0 20 3.951244 0.000000 813 +easi 0 16 4.174387 0.000000 969 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +pagewelcom 0 11 4.553877 0.000000 1344 +guess 0 10 4.653960 0.000000 1443 +doug 0 9 4.753590 0.000000 1517 +perhap 0 8 4.875197 0.000000 1693 +fiction 0 6 5.164786 0.000000 2217 +latexhtml 0 5 5.347108 0.000000 2347 +orlean 0 5 5.347108 0.000000 2550 +bear 0 4 5.568345 0.000000 2651 +stuart 0 3 5.857933 0.000000 3584 +justa 0 2 6.263398 0.000000 5326 +pagedoug 0 1 6.957497 0.000000 13005 +oflinksto 0 1 6.957497 0.000000 13006 +aboutsport 0 1 6.957497 0.000000 13007 +booksin 0 1 6.957497 0.000000 13008 +fewjok 0 1 6.957497 0.000000 13009 +testof 0 1 6.957497 0.000000 13010 +aweath 0 1 6.957497 0.000000 13011 +mapandcondit 0 1 6.957497 0.000000 13012 +austinandnew 0 1 6.957497 0.000000 13013 +amgraci 0 1 6.957497 0.000000 13014 +puttingit 0 1 6.957497 0.000000 13015 +webbrows 0 1 6.957497 0.000000 13016 +thisi 0 1 6.957497 0.000000 13017 +stuffmom 0 1 6.957497 0.000000 13018 +calendarlink 0 1 6.957497 0.000000 13019 +fictionbooksjokessportsfoodvideout 0 1 6.957497 0.000000 13020 +libraryresumelast 0 1 6.957497 0.000000 13021 +dasdastuart 0 1 6.957497 0.000000 13022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..0c23c3a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +doug 1 9 4.753590 4.753590 1517 +swhich 1 1 6.957497 6.957497 13023 +annoi 1 1 6.957497 6.957497 13024 +thisorthi 1 1 6.957497 6.957497 13025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..6ef09471 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +taylor 0 63 2.772589 0.000000 287 +local 0 55 2.944439 0.000000 334 +archiv 0 49 3.044522 0.000000 364 +robot 0 36 3.367296 0.000000 497 +neural 0 30 3.555348 0.000000 578 +postal 0 30 3.555348 0.000000 580 +art 0 29 3.583519 0.000000 593 +utc 0 27 3.637586 0.000000 629 +symbol 0 27 3.637586 0.000000 620 +fine 0 20 3.951244 0.000000 822 +agent 0 18 4.060443 0.000000 910 +universityof 0 15 4.248495 0.000000 1061 +evolv 0 12 4.465908 0.000000 1223 +literatur 0 11 4.553877 0.000000 1300 +michigan 0 11 4.553877 0.000000 1368 +genet 0 10 4.653960 0.000000 1409 +santa 0 10 4.653960 0.000000 1441 +ground 0 7 5.010635 0.000000 1955 +digest 0 7 5.010635 0.000000 1864 +theus 0 4 5.568345 0.000000 2992 +spanish 0 4 5.568345 0.000000 3017 +intereststh 0 3 5.857933 0.000000 3838 +dian 0 2 6.263398 0.000000 5536 +lawdian 0 1 6.957497 0.000000 13026 +problemnavig 0 1 6.957497 0.000000 13027 +washingtonst 0 1 6.957497 0.000000 13028 +stateunivers 0 1 6.957497 0.000000 13029 +dianelaw 0 1 6.957497 0.000000 13030 +gann 0 1 6.957497 0.000000 13031 +illig 0 1 6.957497 0.000000 13032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..7e73ca27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +undergradu 0 54 2.944439 0.000000 338 +mine 0 26 3.688879 0.000000 654 +alwai 0 24 3.761200 0.000000 691 +unit 0 21 3.912023 0.000000 779 +monitor 0 17 4.110874 0.000000 941 +athlet 0 7 5.010635 0.000000 1933 +greec 0 6 5.164786 0.000000 2208 +informat 0 3 5.857933 0.000000 3839 +patra 0 2 6.263398 0.000000 5537 +reasearch 0 2 6.263398 0.000000 5538 +dionisi 0 1 6.957497 0.000000 13033 +papadopoulosdionisi 0 1 6.957497 0.000000 13034 +papadopoulosabout 0 1 6.957497 0.000000 13035 +medionisi 0 1 6.957497 0.000000 13036 +papadopoulo 0 1 6.957497 0.000000 13037 +panhellen 0 1 6.957497 0.000000 13038 +associationpanathinaiko 0 1 6.957497 0.000000 13039 +clubgreek 0 1 6.957497 0.000000 13040 +newshellen 0 1 6.957497 0.000000 13041 +networkeveryth 0 1 6.957497 0.000000 13042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..ab4c218f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +recent 0 167 1.791759 0.000000 58 +construct 0 139 1.945910 0.000000 82 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +problem 0 147 1.945910 0.000000 75 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +version 0 113 2.197225 0.000000 122 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +appear 0 78 2.564949 0.000000 210 +sourc 0 77 2.564949 0.000000 201 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +simul 0 66 2.708050 0.000000 255 +complex 0 64 2.772589 0.000000 269 +taylor 0 63 2.772589 0.000000 287 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +space 0 57 2.890372 0.000000 310 +sampl 0 53 2.944439 0.000000 339 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +finger 0 52 2.995732 0.000000 354 +linear 0 41 3.218876 0.000000 431 +annual 0 40 3.258097 0.000000 458 +multipl 0 39 3.258097 0.000000 453 +small 0 39 3.258097 0.000000 447 +correct 0 38 3.295837 0.000000 462 +random 1 34 3.401197 3.401197 511 +product 0 33 3.433987 0.000000 527 +graph 0 30 3.555348 0.000000 576 +postal 0 30 3.555348 0.000000 580 +load 0 28 3.610918 0.000000 601 +utc 0 27 3.637586 0.000000 629 +revis 0 26 3.688879 0.000000 640 +bound 0 26 3.688879 0.000000 659 +lower 0 18 4.060443 0.000000 886 +expand 0 17 4.110874 0.000000 928 +role 0 14 4.317488 0.000000 1101 +balanc 0 14 4.317488 0.000000 1112 +weak 0 13 4.382027 0.000000 1159 +walk 0 12 4.465908 0.000000 1281 +paragraph 0 10 4.653960 0.000000 1449 +preliminari 0 9 4.753590 0.000000 1480 +cryptographi 0 9 4.753590 0.000000 1512 +leader 0 9 4.753590 0.000000 1576 +explicit 0 9 4.753590 0.000000 1525 +insert 0 8 4.875197 0.000000 1687 +elect 0 8 4.875197 0.000000 1771 +analys 0 8 4.875197 0.000000 1666 +combinatori 0 8 4.875197 0.000000 1629 +foc 0 7 5.010635 0.000000 1880 +hit 0 7 5.010635 0.000000 1965 +dimens 0 7 5.010635 0.000000 1930 +soda 0 6 5.164786 0.000000 2189 +determinist 0 6 5.164786 0.000000 2034 +stoc 0 5 5.347108 0.000000 2491 +mutual 0 5 5.347108 0.000000 2418 +asymptot 0 4 5.568345 0.000000 2676 +delet 0 4 5.568345 0.000000 2691 +exclus 0 4 5.568345 0.000000 2947 +combinatorica 0 3 5.857933 0.000000 3649 +intereststh 0 3 5.857933 0.000000 3838 +algorithmica 0 3 5.857933 0.000000 3561 +beat 0 3 5.857933 0.000000 3840 +eigenvalu 0 3 5.857933 0.000000 3364 +sicomp 0 1 6.957497 0.000000 13043 +zuckermandavid 0 1 6.957497 0.000000 13044 +zuckermanassist 0 1 6.957497 0.000000 13045 +cryptographyresearch 0 1 6.957497 0.000000 13046 +myprofil 0 1 6.957497 0.000000 13047 +transposit 0 1 6.957497 0.000000 13048 +extractor 0 1 6.957497 0.000000 13049 +jcss 0 1 6.957497 0.000000 13050 +logspac 0 1 6.957497 0.000000 13051 +tight 0 1 6.957497 0.000000 13052 +derandom 0 1 6.957497 0.000000 13053 +constructionand 0 1 6.957497 0.000000 13054 +setfor 0 1 6.957497 0.000000 13055 +rectangl 0 1 6.957497 0.000000 13056 +unapproxim 0 1 6.957497 0.000000 13057 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..fa3c90f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +distribut 0 162 1.791759 0.000000 51 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +report 0 131 2.079442 0.000000 92 +send 0 114 2.197225 0.000000 109 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +wide 0 84 2.484907 0.000000 185 +member 0 84 2.484907 0.000000 165 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +nation 0 74 2.639057 0.000000 240 +multimedia 1 68 2.708050 2.708050 258 +main 0 67 2.708050 0.000000 256 +would 0 67 2.708050 0.000000 251 +laboratori 0 63 2.772589 0.000000 292 +foundat 0 62 2.772589 0.000000 286 +content 0 59 2.833213 0.000000 302 +variou 0 56 2.890372 0.000000 317 +suggest 0 53 2.944439 0.000000 331 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +tabl 0 51 2.995732 0.000000 346 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +industri 0 38 3.295837 0.000000 464 +microsoft 0 38 3.295837 0.000000 468 +electr 0 38 3.295837 0.000000 461 +storag 0 31 3.496508 0.000000 553 +focus 0 29 3.583519 0.000000 584 +relev 0 26 3.688879 0.000000 637 +intel 0 16 4.174387 0.000000 1000 +audio 0 14 4.317488 0.000000 1094 +carri 0 13 4.382027 0.000000 1152 +nasa 0 13 4.382027 0.000000 1188 +departmentof 0 9 4.753590 0.000000 1539 +transport 0 8 4.875197 0.000000 1672 +hear 0 7 5.010635 0.000000 1940 +sponsor 0 6 5.164786 0.000000 2133 +multimediacomput 0 3 5.857933 0.000000 3841 +mitsubishi 0 3 5.857933 0.000000 3842 +merl 0 3 5.857933 0.000000 3843 +rangeof 0 2 6.263398 0.000000 4076 +federalinstitut 0 2 6.263398 0.000000 5539 +agenda 0 2 6.263398 0.000000 5037 +currentresearch 0 1 6.957497 0.000000 13058 +andmultiresolut 0 1 6.957497 0.000000 13059 +dmcl 0 1 6.957497 0.000000 13060 +microsystemsinc 0 1 6.957497 0.000000 13061 +yourcom 0 1 6.957497 0.000000 13062 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..81944d43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +orient 0 80 2.564949 0.000000 205 +appli 0 71 2.639057 0.000000 226 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +improv 0 62 2.772589 0.000000 289 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +programm 0 39 3.258097 0.000000 445 +microsoft 0 38 3.295837 0.000000 468 +cost 0 37 3.332205 0.000000 480 +product 0 33 3.433987 0.000000 527 +domain 1 30 3.555348 3.555348 564 +compon 0 30 3.555348 0.000000 570 +postal 0 30 3.555348 0.000000 580 +scale 0 28 3.610918 0.000000 613 +framework 0 28 3.610918 0.000000 606 +enhanc 0 26 3.688879 0.000000 644 +subject 0 26 3.688879 0.000000 647 +wai 0 25 3.737670 0.000000 662 +pattern 0 24 3.761200 0.000000 689 +methodolog 0 23 3.806662 0.000000 733 +reduc 0 22 3.850148 0.000000 759 +fund 0 21 3.912023 0.000000 805 +synthesi 0 20 3.951244 0.000000 834 +mainten 0 9 4.753590 0.000000 1543 +realiz 0 8 4.875197 0.000000 1739 +span 0 8 4.875197 0.000000 1751 +avion 0 4 5.568345 0.000000 3018 +darpa 0 4 5.568345 0.000000 2944 +batori 0 4 5.568345 0.000000 2690 +preprocessor 0 3 5.857933 0.000000 3844 +parameter 0 2 6.263398 0.000000 5540 +encapsul 0 2 6.263398 0.000000 5541 +jakarta 0 1 6.957497 0.000000 13063 +batorydon 0 1 6.957497 0.000000 13064 +batorysoftwar 0 1 6.957497 0.000000 13065 +pluggabl 0 1 6.957497 0.000000 13066 +schlumberg 0 1 6.957497 0.000000 13067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..e98843e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +present 0 91 2.397895 0.000000 145 +info 0 85 2.484907 0.000000 176 +know 0 80 2.564949 0.000000 198 +symposium 0 72 2.639057 0.000000 238 +main 0 67 2.708050 0.000000 256 +favorit 0 44 3.135494 0.000000 410 +methodolog 0 23 3.806662 0.000000 733 +partit 0 16 4.174387 0.000000 984 +photograph 0 15 4.248495 0.000000 1056 +usavoic 0 13 4.382027 0.000000 1198 +road 0 11 4.553877 0.000000 1374 +insert 0 8 4.875197 0.000000 1687 +banerje 0 6 5.164786 0.000000 2018 +theintern 0 4 5.568345 0.000000 2981 +dwip 1 3 5.857933 5.857933 3197 +banerjeeabout 0 1 6.957497 0.000000 13068 +methi 0 1 6.957497 0.000000 13069 +programminggroup 0 1 6.957497 0.000000 13070 +graphicalparallel 0 1 6.957497 0.000000 13071 +departmentpost 0 1 6.957497 0.000000 13072 +homepost 0 1 6.957497 0.000000 13073 +enfield 0 1 6.957497 0.000000 13074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..d069438c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +make 0 111 2.197225 0.000000 120 +thing 0 84 2.484907 0.000000 189 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +main 0 67 2.708050 0.000000 256 +previou 0 62 2.772589 0.000000 290 +januari 0 62 2.772589 0.000000 264 +taylor 0 63 2.772589 0.000000 287 +summer 0 56 2.890372 0.000000 311 +think 0 57 2.890372 0.000000 314 +set 0 50 3.044522 0.000000 361 +right 0 48 3.044522 0.000000 363 +anoth 1 45 3.135494 3.135494 408 +third 0 43 3.178054 0.000000 412 +multipl 0 39 3.258097 0.000000 453 +finish 0 22 3.850148 0.000000 748 +break 0 20 3.951244 0.000000 812 +els 0 19 4.007333 0.000000 843 +left 0 19 4.007333 0.000000 851 +item 0 19 4.007333 0.000000 856 +usavoic 0 13 4.382027 0.000000 1198 +paragraph 0 10 4.653960 0.000000 1449 +mepost 0 10 4.653960 0.000000 1472 +deadlin 0 9 4.753590 0.000000 1502 +siggraph 0 8 4.875197 0.000000 1773 +forget 0 8 4.875197 0.000000 1712 +promis 0 6 5.164786 0.000000 2037 +complaint 0 4 5.568345 0.000000 2795 +emilio 0 3 5.857933 0.000000 3683 +bout 0 3 5.857933 0.000000 3670 +credibl 0 3 5.857933 0.000000 3210 +decent 0 2 6.263398 0.000000 5542 +excus 0 2 6.263398 0.000000 4684 +camahort 0 1 6.957497 0.000000 13075 +gurrea 0 1 6.957497 0.000000 13076 +mmmmm 0 1 6.957497 0.000000 13077 +lose 0 1 6.957497 0.000000 13078 +ecamahor 0 1 6.957497 0.000000 13079 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..1b5c5642 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +support 0 132 1.945910 0.000000 83 +summari 0 73 2.639057 0.000000 237 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +laboratori 0 63 2.772589 0.000000 292 +greg 0 24 3.761200 0.000000 695 +head 0 23 3.806662 0.000000 732 +supervis 0 20 3.951244 0.000000 840 +eduphon 0 15 4.248495 0.000000 1060 +consortium 0 10 4.653960 0.000000 1467 +harrick 0 7 5.010635 0.000000 1849 +multimediacomput 0 3 5.857933 0.000000 3841 +lavend 0 3 5.857933 0.000000 3217 +posnak 1 1 6.957497 6.957497 13080 +isod 0 1 6.957497 0.000000 13081 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..467fd316 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +theori 0 111 2.197225 0.000000 127 +real 0 93 2.397895 0.000000 144 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +method 0 80 2.564949 0.000000 213 +logic 0 71 2.639057 0.000000 230 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +practic 0 70 2.708050 0.000000 246 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +direct 0 57 2.890372 0.000000 316 +reason 0 57 2.890372 0.000000 318 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +theoret 0 39 3.258097 0.000000 446 +formal 0 37 3.332205 0.000000 478 +tree 0 36 3.367296 0.000000 492 +concurr 0 34 3.401197 0.000000 501 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +cambridg 0 16 4.174387 0.000000 1008 +automata 0 13 4.382027 0.000000 1135 +calculu 0 12 4.465908 0.000000 1203 +tempor 0 9 4.753590 0.000000 1584 +juan 0 9 4.753590 0.000000 1580 +secretari 0 8 4.875197 0.000000 1775 +quantit 0 8 4.875197 0.000000 1654 +satisfi 0 8 4.875197 0.000000 1694 +mass 0 8 4.875197 0.000000 1732 +centenni 0 7 5.010635 0.000000 1967 +foc 0 7 5.010635 0.000000 1880 +srinivasan 0 6 5.164786 0.000000 2175 +handbook 0 6 5.164786 0.000000 2061 +emerson 1 5 5.347108 5.347108 2547 +allen 0 5 5.347108 0.000000 2470 +infinit 0 4 5.568345 0.000000 2596 +elsevi 0 3 5.857933 0.000000 3671 +systemsselect 0 2 6.263398 0.000000 4049 +bakker 0 2 6.263398 0.000000 5337 +leeuwen 0 2 6.263398 0.000000 5543 +emersonbruton 0 1 6.957497 0.000000 13082 +publications 0 1 6.957497 0.000000 13083 +sistla 0 1 6.957497 0.000000 13084 +sadler 0 1 6.957497 0.000000 13085 +jutla 0 1 6.957497 0.000000 13086 +determinaci 0 1 6.957497 0.000000 13087 +modal 0 1 6.957497 0.000000 13088 +amsterdam 0 1 6.957497 0.000000 13089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..9db7c030 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +object 0 138 1.945910 0.000000 79 +problem 0 147 1.945910 0.000000 75 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +user 0 104 2.302585 0.000000 137 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +graphic 0 90 2.397895 0.000000 147 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +info 0 85 2.484907 0.000000 176 +academ 0 82 2.484907 0.000000 178 +orient 0 80 2.564949 0.000000 205 +materi 0 75 2.639057 0.000000 221 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +function 0 62 2.772589 0.000000 275 +dept 0 64 2.772589 0.000000 291 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +type 0 61 2.833213 0.000000 296 +unix 0 58 2.890372 0.000000 308 +extens 0 53 2.944439 0.000000 340 +visual 0 48 3.044522 0.000000 372 +describ 0 45 3.135494 0.000000 400 +textbook 0 44 3.135494 0.000000 397 +http 0 41 3.218876 0.000000 420 +join 0 39 3.258097 0.000000 457 +concept 0 32 3.465736 0.000000 537 +framework 0 28 3.610918 0.000000 606 +repres 0 26 3.688879 0.000000 656 +known 0 24 3.761200 0.000000 702 +compress 0 23 3.806662 0.000000 719 +lyco 0 19 4.007333 0.000000 871 +analyz 0 17 4.110874 0.000000 925 +macintosh 0 17 4.110874 0.000000 920 +near 0 14 4.317488 0.000000 1091 +composit 0 13 4.382027 0.000000 1150 +affili 0 13 4.382027 0.000000 1194 +brother 0 13 4.382027 0.000000 1189 +believ 0 13 4.382027 0.000000 1187 +doug 0 9 4.753590 0.000000 1517 +pure 0 8 4.875197 0.000000 1776 +accomplish 0 8 4.875197 0.000000 1755 +mirror 0 6 5.164786 0.000000 2028 +artist 0 6 5.164786 0.000000 2127 +haskel 0 4 5.568345 0.000000 2618 +aspir 0 4 5.568345 0.000000 3019 +berger 0 3 5.857933 0.000000 3702 +add 0 3 5.857933 0.000000 3131 +emeri 0 2 6.263398 0.000000 5515 +groupi 0 2 6.263398 0.000000 5544 +linksth 0 2 6.263398 0.000000 5545 +analyst 0 1 6.957497 0.000000 13090 +ticam 0 1 6.957497 0.000000 13091 +evangelist 0 1 6.957497 0.000000 13092 +pageemeri 0 1 6.957497 0.000000 13093 +randomli 0 1 6.957497 0.000000 13094 +uttr 0 1 6.957497 0.000000 13095 +othermi 0 1 6.957497 0.000000 13096 +youngest 0 1 6.957497 0.000000 13097 +handiwork 0 1 6.957497 0.000000 13098 +systemtexbook 0 1 6.957497 0.000000 13099 +exchangegrac 0 1 6.957497 0.000000 13100 +macaddict 0 1 6.957497 0.000000 13101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..3fe91e6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +part 0 98 2.302585 0.000000 129 +librari 0 87 2.484907 0.000000 181 +solut 0 82 2.484907 0.000000 162 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +come 0 78 2.564949 0.000000 202 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +august 0 66 2.708050 0.000000 257 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +visit 0 63 2.772589 0.000000 288 +januari 0 62 2.772589 0.000000 264 +semest 0 58 2.890372 0.000000 312 +date 0 51 2.995732 0.000000 344 +compani 0 41 3.218876 0.000000 423 +small 0 39 3.258097 0.000000 447 +programm 0 39 3.258097 0.000000 445 +tutori 0 39 3.258097 0.000000 437 +industri 0 38 3.295837 0.000000 464 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +repres 0 26 3.688879 0.000000 656 +try 0 22 3.850148 0.000000 764 +alumni 0 21 3.912023 0.000000 807 +love 0 21 3.912023 0.000000 804 +nice 0 20 3.951244 0.000000 809 +media 0 19 4.007333 0.000000 861 +attend 0 18 4.060443 0.000000 893 +lot 0 18 4.060443 0.000000 889 +jose 0 16 4.174387 0.000000 976 +club 0 15 4.248495 0.000000 1058 +fortran 0 15 4.248495 0.000000 1027 +eduphon 0 15 4.248495 0.000000 1060 +becam 0 14 4.317488 0.000000 1117 +francisco 0 14 4.317488 0.000000 1095 +opportun 0 13 4.382027 0.000000 1161 +market 0 11 4.553877 0.000000 1361 +branch 0 11 4.553877 0.000000 1318 +enter 0 10 4.653960 0.000000 1454 +telecommun 0 9 4.753590 0.000000 1565 +surpris 0 7 5.010635 0.000000 1828 +capit 0 7 5.010635 0.000000 1957 +vallei 0 7 5.010635 0.000000 1959 +south 0 6 5.164786 0.000000 2167 +deliv 0 6 5.164786 0.000000 2070 +silicon 0 6 5.164786 0.000000 2076 +girl 0 5 5.347108 0.000000 2410 +orlean 0 5 5.347108 0.000000 2550 +houston 0 5 5.347108 0.000000 2460 +planet 0 4 5.568345 0.000000 2912 +immedi 0 3 5.857933 0.000000 3117 +peac 0 3 5.857933 0.000000 3769 +emma 1 2 6.263398 6.263398 5546 +zhongshan 0 2 6.263398 0.000000 5547 +mini 0 2 6.263398 0.000000 5548 +magazinepc 0 2 6.263398 0.000000 5161 +wuabout 0 1 6.957497 0.000000 13102 +myselfhi 0 1 6.957497 0.000000 13103 +inibm 0 1 6.957497 0.000000 13104 +costom 0 1 6.957497 0.000000 13105 +manyalumni 0 1 6.957497 0.000000 13106 +instrumentsinc 0 1 6.957497 0.000000 13107 +computingmanag 0 1 6.957497 0.000000 13108 +informationautomat 0 1 6.957497 0.000000 13109 +baseyahoogalaxi 0 1 6.957497 0.000000 13110 +universityyellow 0 1 6.957497 0.000000 13111 +infoleisur 0 1 6.957497 0.000000 13112 +timenewspagepeopl 0 1 6.957497 0.000000 13113 +dailyartstim 0 1 6.957497 0.000000 13114 +magazinechines 0 1 6.957497 0.000000 13115 +magazinec 0 1 6.957497 0.000000 13116 +antoniosan 0 1 6.957497 0.000000 13117 +franciscomarina 0 1 6.957497 0.000000 13118 +citysan 0 1 6.957497 0.000000 13119 +pointemail 0 1 6.957497 0.000000 13120 +emmawu 0 1 6.957497 0.000000 13121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..36a37889 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +first 1 140 1.945910 1.945910 71 +well 0 109 2.197225 0.000000 121 +main 0 67 2.708050 0.000000 256 +anoth 1 45 3.135494 3.135494 408 +third 0 43 3.178054 0.000000 412 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +break 0 20 3.951244 0.000000 812 +item 0 19 4.007333 0.000000 856 +easi 0 16 4.174387 0.000000 969 +usavoic 0 13 4.382027 0.000000 1198 +paragraph 1 10 4.653960 4.653960 1449 +mepost 0 10 4.653960 0.000000 1472 +forget 0 8 4.875197 0.000000 1712 +emilio 1 3 5.857933 5.857933 3683 +mehi 0 2 6.263398 0.000000 5549 +bullet 0 2 6.263398 0.000000 5500 +remolinaemilio 0 1 6.957497 0.000000 13122 +remolinaabout 0 1 6.957497 0.000000 13123 +eremolin 0 1 6.957497 0.000000 13124 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..d6c4bfc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +locat 0 59 2.833213 0.000000 303 +http 0 41 3.218876 0.000000 420 +reach 0 24 3.761200 0.000000 688 +thank 0 23 3.806662 0.000000 721 +former 0 17 4.110874 0.000000 956 +universityof 0 15 4.248495 0.000000 1061 +sciencesat 0 7 5.010635 0.000000 1968 +levent 1 1 6.957497 6.957497 13125 +sayfasi 0 1 6.957497 0.000000 13126 +erkok 0 1 6.957497 0.000000 13127 +inturkei 0 1 6.957497 0.000000 13128 +ceng 0 1 6.957497 0.000000 13129 +metu 0 1 6.957497 0.000000 13130 +erkokto 0 1 6.957497 0.000000 13131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..17102ff7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +area 0 144 1.945910 0.000000 80 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +logic 0 71 2.639057 0.000000 230 +reason 0 57 2.890372 0.000000 318 +postal 0 30 3.555348 0.000000 580 +mind 0 27 3.637586 0.000000 632 +voic 0 21 3.912023 0.000000 806 +cognit 0 16 4.174387 0.000000 986 +thedepart 0 11 4.553877 0.000000 1350 +turkei 0 4 5.568345 0.000000 2914 +commonsens 0 4 5.568345 0.000000 2998 +children 0 3 5.857933 0.000000 3767 +monoton 0 2 6.263398 0.000000 5321 +andinform 0 2 6.263398 0.000000 5550 +esra 1 1 6.957497 6.957497 13132 +erdem 0 1 6.957497 0.000000 13133 +bilkent 0 1 6.957497 0.000000 13134 +learninginduct 0 1 6.957497 0.000000 13135 +sciencelearningreason 0 1 6.957497 0.000000 13136 +reasoningknowledg 0 1 6.957497 0.000000 13137 +representationemotionsphilosophi 0 1 6.957497 0.000000 13138 +mindcontact 0 1 6.957497 0.000000 13139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..35a5154c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +perform 0 143 1.945910 0.000000 74 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +degre 0 69 2.708050 0.000000 259 +main 0 67 2.708050 0.000000 256 +collect 0 65 2.772589 0.000000 268 +interact 0 62 2.772589 0.000000 270 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +extens 0 53 2.944439 0.000000 340 +life 0 50 3.044522 0.000000 375 +describ 0 45 3.135494 0.000000 400 +random 0 34 3.401197 0.000000 511 +art 0 29 3.583519 0.000000 593 +toward 0 25 3.737670 0.000000 668 +sport 0 25 3.737670 0.000000 683 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +scheme 0 20 3.951244 0.000000 818 +item 0 19 4.007333 0.000000 856 +carl 1 15 4.248495 4.248495 1024 +believ 0 13 4.382027 0.000000 1187 +usavoic 0 13 4.382027 0.000000 1198 +rice 1 11 4.553877 4.553877 1336 +worth 0 11 4.553877 0.000000 1294 +chronicl 0 7 5.010635 0.000000 1952 +athlet 0 7 5.010635 0.000000 1933 +myresum 0 6 5.164786 0.000000 2199 +snow 0 6 5.164786 0.000000 2031 +houston 0 5 5.347108 0.000000 2460 +wasn 0 3 5.857933 0.000000 3800 +dust 0 2 6.263398 0.000000 5551 +syntact 0 2 6.263398 0.000000 5552 +linksth 0 2 6.263398 0.000000 5545 +household 0 2 6.263398 0.000000 4920 +peak 0 2 6.263398 0.000000 5553 +pagestephen 0 1 6.957497 0.000000 13140 +carlpardon 0 1 6.957497 0.000000 13141 +planmi 0 1 6.957497 0.000000 13142 +interestsa 0 1 6.957497 0.000000 13143 +psuedo 0 1 6.957497 0.000000 13144 +dose 0 1 6.957497 0.000000 13145 +pike 0 1 6.957497 0.000000 13146 +bandget 0 1 6.957497 0.000000 13147 +touchpost 0 1 6.957497 0.000000 13148 +esteban 0 1 6.957497 0.000000 13149 +edureturn 0 1 6.957497 0.000000 13150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..005769d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +schedul 0 119 2.079442 0.000000 85 +machin 0 129 2.079442 0.000000 95 +check 0 115 2.197225 0.000000 118 +techniqu 0 99 2.302585 0.000000 138 +learn 0 86 2.484907 0.000000 170 +solut 0 82 2.484907 0.000000 162 +control 0 82 2.484907 0.000000 164 +educ 0 86 2.484907 0.000000 191 +method 0 80 2.564949 0.000000 213 +effici 0 73 2.639057 0.000000 233 +knowledg 0 67 2.708050 0.000000 243 +improv 0 62 2.772589 0.000000 289 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +taylor 0 63 2.772589 0.000000 287 +detail 0 57 2.890372 0.000000 321 +combin 0 42 3.218876 0.000000 421 +vita 0 38 3.295837 0.000000 473 +postal 0 30 3.555348 0.000000 580 +accur 0 25 3.737670 0.000000 680 +induct 0 11 4.553877 0.000000 1304 +solver 0 7 5.010635 0.000000 1911 +analyt 0 7 5.010635 0.000000 1913 +machinelearn 0 6 5.164786 0.000000 2084 +groupth 0 5 5.347108 0.000000 2549 +myresearch 0 4 5.568345 0.000000 2842 +theperform 0 3 5.857933 0.000000 3262 +estlin 0 2 6.263398 0.000000 5554 +tara 0 2 6.263398 0.000000 5555 +researchinvolv 0 2 6.263398 0.000000 5556 +acquir 0 2 6.263398 0.000000 5557 +amparticularli 0 2 6.263398 0.000000 5558 +tulan 0 2 6.263398 0.000000 5559 +estlintara 0 1 6.957497 0.000000 13151 +estlinmachin 0 1 6.957497 0.000000 13152 +austinresearchcontrol 0 1 6.957497 0.000000 13153 +byguid 0 1 6.957497 0.000000 13154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..5e860fe4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +find 0 111 2.197225 0.000000 111 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +theori 0 111 2.197225 0.000000 127 +user 0 104 2.302585 0.000000 137 +real 0 93 2.397895 0.000000 144 +issu 0 78 2.564949 0.000000 211 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +join 0 39 3.258097 0.000000 457 +chines 0 29 3.583519 0.000000 595 +utc 0 27 3.637586 0.000000 629 +todai 0 25 3.737670 0.000000 672 +watch 0 21 3.912023 0.000000 789 +voic 0 21 3.912023 0.000000 806 +assum 0 19 4.007333 0.000000 845 +stop 0 17 4.110874 0.000000 942 +women 0 16 4.174387 0.000000 1004 +action 0 15 4.248495 0.000000 1038 +trip 0 14 4.317488 0.000000 1113 +central 0 13 4.382027 0.000000 1160 +suit 0 13 4.382027 0.000000 1129 +food 0 12 4.465908 0.000000 1285 +basketbal 0 12 4.465908 0.000000 1289 +market 0 11 4.553877 0.000000 1361 +noth 0 11 4.553877 0.000000 1328 +black 0 10 4.653960 0.000000 1418 +shop 0 10 4.653960 0.000000 1469 +mepost 0 10 4.653960 0.000000 1472 +swim 0 9 4.753590 0.000000 1599 +japan 0 8 4.875197 0.000000 1762 +foreign 0 7 5.010635 0.000000 1919 +troubl 0 6 5.164786 0.000000 2002 +knew 0 5 5.347108 0.000000 2445 +pack 0 3 5.857933 0.000000 3597 +legion 0 3 5.857933 0.000000 3708 +guadalup 0 3 5.857933 0.000000 3255 +francoi 1 2 6.263398 6.263398 4523 +polic 0 2 6.263398 0.000000 5560 +forest 0 2 6.263398 0.000000 5368 +cake 0 2 6.263398 0.000000 5118 +forthcom 0 2 6.263398 0.000000 5392 +crawl 0 2 6.263398 0.000000 5561 +wisdom 0 2 6.263398 0.000000 4430 +barbanson 0 1 6.957497 0.000000 13155 +tank 0 1 6.957497 0.000000 13156 +versionhom 0 1 6.957497 0.000000 13157 +versionthi 0 1 6.957497 0.000000 13158 +spool 0 1 6.957497 0.000000 13159 +francoisabout 0 1 6.957497 0.000000 13160 +mecurr 0 1 6.957497 0.000000 13161 +genuin 0 1 6.957497 0.000000 13162 +pastri 0 1 6.957497 0.000000 13163 +fruit 0 1 6.957497 0.000000 13164 +mouss 0 1 6.957497 0.000000 13165 +groceri 0 1 6.957497 0.000000 13166 +shed 0 1 6.957497 0.000000 13167 +lighton 0 1 6.957497 0.000000 13168 +hyogo 0 1 6.957497 0.000000 13169 +atdominion 0 1 6.957497 0.000000 13170 +hqcheck 0 1 6.957497 0.000000 13171 +dilberti 0 1 6.957497 0.000000 13172 +mentionthat 0 1 6.957497 0.000000 13173 +edufrancoi 0 1 6.957497 0.000000 13174 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..4b31c7f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +appli 0 71 2.639057 0.000000 226 +laboratori 0 63 2.772589 0.000000 292 +http 0 41 3.218876 0.000000 420 +autom 0 41 3.218876 0.000000 434 +electr 0 38 3.295837 0.000000 461 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +progress 0 28 3.610918 0.000000 598 +director 0 22 3.850148 0.000000 767 +divis 0 21 3.912023 0.000000 803 +former 0 17 4.110874 0.000000 956 +social 0 13 4.382027 0.000000 1123 +donald 0 9 4.753590 0.000000 1510 +fussel 0 5 5.347108 0.000000 2300 +regent 0 5 5.347108 0.000000 2551 +dalla 0 4 5.568345 0.000000 2930 +crow 0 3 5.857933 0.000000 3845 +trammel 0 2 6.263398 0.000000 5562 +dartmouth 0 2 6.263398 0.000000 5393 +collegem 0 2 6.263398 0.000000 5563 +eduinform 0 1 6.957497 0.000000 13175 +fussellb 0 1 6.957497 0.000000 13176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..c7e599c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +file 1 132 1.945910 1.945910 70 +construct 0 139 1.945910 0.000000 82 +document 0 121 2.079442 0.000000 89 +find 0 111 2.197225 0.000000 111 +start 0 83 2.484907 0.000000 173 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +go 0 33 3.433987 0.000000 529 +someth 0 31 3.496508 0.000000 554 +anyth 0 16 4.174387 0.000000 998 +georg 0 16 4.174387 0.000000 994 +lane 0 8 4.875197 0.000000 1720 +pagethi 0 5 5.347108 0.000000 2336 +ajit 1 3 5.857933 5.857933 3299 +eduher 0 3 5.857933 0.000000 3499 +useless 1 2 6.263398 6.263398 5564 +odd 0 2 6.263398 0.000000 5565 +georgemi 0 1 6.957497 0.000000 13177 +youand 0 1 6.957497 0.000000 13178 +goodthat 0 1 6.957497 0.000000 13179 +wickersham 0 1 6.957497 0.000000 13180 +gajit 0 1 6.957497 0.000000 13181 +foundus 0 1 6.957497 0.000000 13182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..4f8afd78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +technolog 0 131 2.079442 0.000000 102 +institut 0 84 2.484907 0.000000 187 +still 0 50 3.044522 0.000000 362 +india 0 32 3.465736 0.000000 550 +art 0 29 3.583519 0.000000 593 +try 0 22 3.850148 0.000000 764 +indian 0 22 3.850148 0.000000 769 +medic 0 17 4.110874 0.000000 958 +undergrad 0 9 4.753590 0.000000 1589 +kanpur 0 8 4.875197 0.000000 1744 +river 0 6 5.164786 0.000000 2220 +arora 0 4 5.568345 0.000000 2658 +mehom 0 4 5.568345 0.000000 2979 +oak 0 2 6.263398 0.000000 5566 +geeta 1 1 6.957497 6.957497 13183 +tofigur 0 1 6.957497 0.000000 13184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..b7ada643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +last 0 314 1.098612 0.000000 14 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +click 0 142 1.945910 0.000000 78 +number 0 130 2.079442 0.000000 97 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +comment 0 93 2.397895 0.000000 146 +help 0 83 2.484907 0.000000 175 +novemb 0 81 2.484907 0.000000 179 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +receiv 0 66 2.708050 0.000000 244 +suggest 0 53 2.944439 0.000000 331 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +kind 0 32 3.465736 0.000000 541 +art 0 29 3.583519 0.000000 593 +actual 0 28 3.610918 0.000000 604 +quit 0 27 3.637586 0.000000 633 +wish 0 24 3.761200 0.000000 692 +head 0 23 3.806662 0.000000 732 +medic 0 17 4.110874 0.000000 958 +critic 0 16 4.174387 0.000000 982 +easi 0 16 4.174387 0.000000 969 +mayb 0 15 4.248495 0.000000 1014 +decid 0 14 4.317488 0.000000 1075 +earlier 0 13 4.382027 0.000000 1140 +minimum 0 9 4.753590 0.000000 1555 +mass 0 8 4.875197 0.000000 1732 +risk 0 8 4.875197 0.000000 1689 +sleep 0 6 5.164786 0.000000 2211 +merit 0 5 5.347108 0.000000 2466 +gokul 0 4 5.568345 0.000000 2668 +countless 0 4 5.568345 0.000000 3020 +flame 0 3 5.857933 0.000000 3696 +gripe 0 3 5.857933 0.000000 3257 +democrat 0 2 6.263398 0.000000 5567 +plakal 0 2 6.263398 0.000000 5568 +perfectli 0 2 6.263398 0.000000 5569 +theexcess 0 1 6.957497 0.000000 13185 +verbos 0 1 6.957497 0.000000 13186 +thecollect 0 1 6.957497 0.000000 13187 +putonli 0 1 6.957497 0.000000 13188 +barest 0 1 6.957497 0.000000 13189 +adieu 0 1 6.957497 0.000000 13190 +outpour 0 1 6.957497 0.000000 13191 +hag 0 1 6.957497 0.000000 13192 +hopey 0 1 6.957497 0.000000 13193 +untroubl 0 1 6.957497 0.000000 13194 +conscienc 0 1 6.957497 0.000000 13195 +untim 0 1 6.957497 0.000000 13196 +demis 0 1 6.957497 0.000000 13197 +vitriol 0 1 6.957497 0.000000 13198 +reinstat 0 1 6.957497 0.000000 13199 +signin 0 1 6.957497 0.000000 13200 +lesscrit 0 1 6.957497 0.000000 13201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..31b77512 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +like 1 132 1.945910 1.945910 81 +year 0 148 1.945910 0.000000 84 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +start 0 83 2.484907 0.000000 173 +school 0 84 2.484907 0.000000 188 +control 0 82 2.484907 0.000000 164 +know 0 80 2.564949 0.000000 198 +state 0 76 2.564949 0.000000 207 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +order 0 69 2.708050 0.000000 249 +would 0 67 2.708050 0.000000 251 +plai 0 60 2.833213 0.000000 307 +colleg 0 61 2.833213 0.000000 300 +best 0 59 2.833213 0.000000 299 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +finger 0 52 2.995732 0.000000 354 +friend 0 48 3.044522 0.000000 376 +life 0 50 3.044522 0.000000 375 +visitor 0 49 3.044522 0.000000 371 +cool 0 49 3.044522 0.000000 374 +without 0 50 3.044522 0.000000 370 +made 0 44 3.135494 0.000000 398 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +around 0 43 3.178054 0.000000 415 +futur 0 41 3.218876 0.000000 427 +past 0 42 3.218876 0.000000 428 +continu 0 39 3.258097 0.000000 448 +join 0 39 3.258097 0.000000 457 +game 0 36 3.367296 0.000000 498 +winter 0 36 3.367296 0.000000 500 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +board 0 33 3.433987 0.000000 528 +india 0 32 3.465736 0.000000 550 +travel 0 30 3.555348 0.000000 579 +option 0 30 3.555348 0.000000 575 +power 0 30 3.555348 0.000000 573 +art 0 29 3.583519 0.000000 593 +except 0 28 3.610918 0.000000 607 +weather 0 28 3.610918 0.000000 618 +usual 0 28 3.610918 0.000000 608 +propos 0 28 3.610918 0.000000 602 +great 0 27 3.637586 0.000000 626 +american 0 27 3.637586 0.000000 634 +background 0 25 3.737670 0.000000 664 +todai 0 25 3.737670 0.000000 672 +alwai 0 24 3.761200 0.000000 691 +famili 0 23 3.806662 0.000000 735 +love 0 21 3.912023 0.000000 804 +born 0 21 3.912023 0.000000 798 +watch 0 21 3.912023 0.000000 789 +tenni 0 20 3.951244 0.000000 838 +els 0 19 4.007333 0.000000 843 +ever 0 19 4.007333 0.000000 872 +spend 0 19 4.007333 0.000000 850 +listen 0 18 4.060443 0.000000 907 +intro 0 17 4.110874 0.000000 915 +sept 0 17 4.110874 0.000000 952 +bachelor 0 17 4.110874 0.000000 957 +thought 0 17 4.110874 0.000000 945 +medic 0 17 4.110874 0.000000 958 +transfer 0 16 4.174387 0.000000 967 +sign 0 16 4.174387 0.000000 970 +goe 0 15 4.248495 0.000000 1044 +hopefulli 0 14 4.317488 0.000000 1071 +becam 0 14 4.317488 0.000000 1117 +attribut 0 14 4.317488 0.000000 1092 +came 0 13 4.382027 0.000000 1197 +care 0 13 4.382027 0.000000 1177 +sai 0 13 4.382027 0.000000 1175 +guest 0 12 4.465908 0.000000 1220 +song 0 11 4.553877 0.000000 1380 +fill 0 11 4.553877 0.000000 1349 +success 0 10 4.653960 0.000000 1390 +hang 0 9 4.753590 0.000000 1499 +kumar 0 9 4.753590 0.000000 1506 +ball 0 9 4.753590 0.000000 1608 +gold 0 8 4.875197 0.000000 1745 +soccer 0 8 4.875197 0.000000 1752 +footbal 0 7 5.010635 0.000000 1912 +cricket 0 7 5.010635 0.000000 1945 +bore 0 7 5.010635 0.000000 1948 +golden 0 7 5.010635 0.000000 1962 +happen 0 7 5.010635 0.000000 1790 +southern 0 6 5.164786 0.000000 2191 +vari 0 6 5.164786 0.000000 2001 +alphabet 0 6 5.164786 0.000000 1980 +chat 0 6 5.164786 0.000000 2128 +curiou 0 5 5.347108 0.000000 2541 +chemic 0 5 5.347108 0.000000 2552 +valuabl 0 5 5.347108 0.000000 2256 +proud 0 4 5.568345 0.000000 2918 +batch 0 4 5.568345 0.000000 2700 +enrol 0 4 5.568345 0.000000 2613 +abraham 0 4 5.568345 0.000000 2644 +gokul 0 4 5.568345 0.000000 2668 +hide 0 4 5.568345 0.000000 2996 +bold 0 3 5.857933 0.000000 3846 +cold 0 3 5.857933 0.000000 3637 +acad 0 3 5.857933 0.000000 3847 +hindi 0 3 5.857933 0.000000 3753 +narrow 0 3 5.857933 0.000000 3807 +gooti 0 2 6.263398 0.000000 4281 +subramanyam 0 2 6.263398 0.000000 4282 +somebodi 0 2 6.263398 0.000000 4463 +hyderabad 0 2 6.263398 0.000000 5570 +andhra 0 2 6.263398 0.000000 5571 +pradesh 0 2 6.263398 0.000000 5572 +osmania 0 2 6.263398 0.000000 5573 +nebraska 0 2 6.263398 0.000000 5574 +lincoln 0 2 6.263398 0.000000 5575 +addict 0 2 6.263398 0.000000 5576 +neeraj 0 2 6.263398 0.000000 5577 +shailesh 0 2 6.263398 0.000000 5578 +vipin 0 2 6.263398 0.000000 5579 +hideout 0 1 6.957497 0.000000 13202 +wont 0 1 6.957497 0.000000 13203 +disappoint 0 1 6.957497 0.000000 13204 +geographi 0 1 6.957497 0.000000 13205 +gala 0 1 6.957497 0.000000 13206 +wasjust 0 1 6.957497 0.000000 13207 +mehul 0 1 6.957497 0.000000 13208 +shantanu 0 1 6.957497 0.000000 13209 +likechess 0 1 6.957497 0.000000 13210 +carrom 0 1 6.957497 0.000000 13211 +racquet 0 1 6.957497 0.000000 13212 +definetli 0 1 6.957497 0.000000 13213 +horoscop 0 1 6.957497 0.000000 13214 +compatabil 0 1 6.957497 0.000000 13215 +destini 0 1 6.957497 0.000000 13216 +hardwork 0 1 6.957497 0.000000 13217 +dispos 0 1 6.957497 0.000000 13218 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..40e71129 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 0 160 1.791759 0.000000 64 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +first 0 140 1.945910 0.000000 71 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +octob 0 89 2.397895 0.000000 156 +associ 0 93 2.397895 0.000000 151 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +copi 0 63 2.772589 0.000000 284 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +date 0 51 2.995732 0.000000 344 +without 0 50 3.044522 0.000000 370 +protocol 1 45 3.135494 3.135494 407 +made 0 44 3.135494 0.000000 398 +term 0 43 3.178054 0.000000 411 +http 0 41 3.218876 0.000000 420 +must 0 40 3.258097 0.000000 442 +credit 0 38 3.295837 0.000000 460 +formal 0 37 3.332205 0.000000 478 +copyright 0 36 3.367296 0.000000 495 +survei 0 35 3.401197 0.000000 513 +word 0 34 3.401197 0.000000 508 +post 0 35 3.401197 0.000000 505 +posit 0 31 3.496508 0.000000 552 +profil 0 30 3.555348 0.000000 581 +hard 0 30 3.555348 0.000000 563 +full 0 28 3.610918 0.000000 615 +utc 0 27 3.637586 0.000000 629 +revis 0 26 3.688879 0.000000 640 +request 0 26 3.688879 0.000000 635 +notic 0 25 3.737670 0.000000 675 +other 0 24 3.761200 0.000000 697 +honor 0 23 3.806662 0.000000 729 +accept 0 18 4.060443 0.000000 879 +otherwis 0 17 4.110874 0.000000 922 +commerci 0 16 4.174387 0.000000 1005 +advantag 0 16 4.174387 0.000000 987 +permit 0 16 4.174387 0.000000 962 +evolv 0 12 4.465908 0.000000 1223 +grant 0 12 4.465908 0.000000 1216 +statement 0 11 4.553877 0.000000 1313 +submiss 0 11 4.553877 0.000000 1298 +exact 0 9 4.753590 0.000000 1509 +phrase 0 5 5.347108 0.000000 2242 +own 0 5 5.347108 0.000000 2531 +permiss 0 4 5.568345 0.000000 2642 +gouda 0 4 5.568345 0.000000 3021 +machineri 0 4 5.568345 0.000000 2851 +citat 0 3 5.857933 0.000000 3617 +sciencesaustin 0 3 5.857933 0.000000 3828 +argu 0 3 5.857933 0.000000 3698 +networkprotocol 0 3 5.857933 0.000000 3285 +moham 0 3 5.857933 0.000000 3848 +fornetwork 0 2 6.263398 0.000000 5580 +ordistribut 0 2 6.263398 0.000000 5581 +redistribut 0 2 6.263398 0.000000 5582 +pragmat 0 1 6.957497 0.000000 13219 +goudanetwork 0 1 6.957497 0.000000 13220 +goudaacm 0 1 6.957497 0.000000 13221 +surveysa 0 1 6.957497 0.000000 13222 +specificationsand 0 1 6.957497 0.000000 13223 +implementationsmoham 0 1 6.957497 0.000000 13224 +goudath 0 1 6.957497 0.000000 13225 +usagouda 0 1 6.957497 0.000000 13226 +htmlabstract 0 1 6.957497 0.000000 13227 +bridgeth 0 1 6.957497 0.000000 13228 +implementationsaddit 0 1 6.957497 0.000000 13229 +methodologypubl 0 1 6.957497 0.000000 13230 +digitalor 0 1 6.957497 0.000000 13231 +classroomus 0 1 6.957497 0.000000 13232 +profit 0 1 6.957497 0.000000 13233 +bearthi 0 1 6.957497 0.000000 13234 +forcompon 0 1 6.957497 0.000000 13235 +torepublish 0 1 6.957497 0.000000 13236 +requiresprior 0 1 6.957497 0.000000 13237 +frompubl 0 1 6.957497 0.000000 13238 +orpermiss 0 1 6.957497 0.000000 13239 +goudagouda 0 1 6.957497 0.000000 13240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..9f87b441 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +data 0 170 1.791759 0.000000 49 +assign 0 135 1.945910 0.000000 66 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +report 0 131 2.079442 0.000000 92 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +take 0 97 2.302585 0.000000 134 +imag 0 91 2.397895 0.000000 161 +meet 0 72 2.639057 0.000000 229 +would 0 67 2.708050 0.000000 251 +test 0 66 2.708050 0.000000 252 +plan 0 65 2.772589 0.000000 272 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +talk 0 53 2.944439 0.000000 336 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +friend 0 48 3.044522 0.000000 376 +visitor 0 49 3.044522 0.000000 371 +long 0 43 3.178054 0.000000 413 +author 0 39 3.258097 0.000000 450 +connect 0 37 3.332205 0.000000 485 +john 0 33 3.433987 0.000000 532 +product 0 33 3.433987 0.000000 527 +except 0 28 3.610918 0.000000 607 +minut 0 20 3.951244 0.000000 810 +log 0 19 4.007333 0.000000 857 +less 0 18 4.060443 0.000000 892 +matrix 0 17 4.110874 0.000000 933 +spars 0 16 4.174387 0.000000 989 +doesn 0 15 4.248495 0.000000 1055 +central 0 13 4.382027 0.000000 1160 +land 0 12 4.465908 0.000000 1273 +rememb 0 12 4.465908 0.000000 1217 +guess 0 10 4.653960 0.000000 1443 +latter 0 9 4.753590 0.000000 1522 +bore 0 7 5.010635 0.000000 1948 +oregon 0 5 5.347108 0.000000 2437 +glimps 0 4 5.568345 0.000000 2778 +plapack 0 3 5.857933 0.000000 3849 +redmond 0 3 5.857933 0.000000 3568 +mysteri 0 2 6.263398 0.000000 4715 +hail 0 2 6.263398 0.000000 5583 +gunnel 1 1 6.957497 6.957497 13241 +transpos 0 1 6.957497 0.000000 13242 +drank 0 1 6.957497 0.000000 13243 +depict 0 1 6.957497 0.000000 13244 +computationsif 0 1 6.957497 0.000000 13245 +pageam 0 1 6.957497 0.000000 13246 +towni 0 1 6.957497 0.000000 13247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..64b7e2e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +year 0 148 1.945910 0.000000 84 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +undergradu 0 54 2.944439 0.000000 338 +curriculum 0 33 3.433987 0.000000 535 +mellon 0 13 4.382027 0.000000 1179 +frank 0 9 4.753590 0.000000 1568 +junior 0 5 5.347108 0.000000 2519 +tropschuhfrank 0 1 6.957497 0.000000 13248 +tropschuh 0 1 6.957497 0.000000 13249 +gunther 0 1 6.957497 0.000000 13250 +schweiz 0 1 6.957497 0.000000 13251 +clayton 0 1 6.957497 0.000000 13252 +waldhofstrass 0 1 6.957497 0.000000 13253 +rheinfelden 0 1 6.957497 0.000000 13254 +vitaeenglishdeutschlinkscarnegi 0 1 6.957497 0.000000 13255 +universitterlangen 0 1 6.957497 0.000000 13256 +nrnberg 0 1 6.957497 0.000000 13257 +abroad 0 1 6.957497 0.000000 13258 +mathematisch 0 1 6.957497 0.000000 13259 +maschinen 0 1 6.957497 0.000000 13260 +datenverarbeitung 0 1 6.957497 0.000000 13261 +tropschuhgunth 0 1 6.957497 0.000000 13262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..72ffd61b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +master 0 76 2.564949 0.000000 216 +name 0 72 2.639057 0.000000 220 +servic 0 72 2.639057 0.000000 236 +degre 0 69 2.708050 0.000000 259 +juli 0 60 2.833213 0.000000 305 +tabl 0 51 2.995732 0.000000 346 +directori 0 45 3.135494 0.000000 396 +china 0 37 3.332205 0.000000 487 +mine 0 26 3.688879 0.000000 654 +background 0 25 3.737670 0.000000 664 +hobbi 0 16 4.174387 0.000000 1009 +grant 0 12 4.465908 0.000000 1216 +birthdai 0 4 5.568345 0.000000 2800 +birth 0 3 5.857933 0.000000 3594 +addresspictur 0 2 6.263398 0.000000 5584 +ceremoni 0 2 6.263398 0.000000 5585 +jiangsu 0 2 6.263398 0.000000 5586 +yongxiang 1 1 6.957497 6.957497 13263 +pagemerri 0 1 6.957497 0.000000 13264 +christmashappi 0 1 6.957497 0.000000 13265 +homepagegao 0 1 6.957497 0.000000 13266 +yongxiangsever 0 1 6.957497 0.000000 13267 +pointsto 0 1 6.957497 0.000000 13268 +chinadepart 0 1 6.957497 0.000000 13269 +male 0 1 6.957497 0.000000 13270 +huanan 0 1 6.957497 0.000000 13271 +tenniseduc 0 1 6.957497 0.000000 13272 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..a9f5e717 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +distribut 0 162 1.791759 0.000000 51 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +second 0 81 2.484907 0.000000 166 +term 0 43 3.178054 0.000000 411 +queri 0 33 3.433987 0.000000 524 +stop 0 17 4.110874 0.000000 942 +zhang 0 16 4.174387 0.000000 980 +gzhang 0 2 6.263398 0.000000 4183 +schoolth 0 1 6.957497 0.000000 13273 +semestercoursesc 0 1 6.957497 0.000000 13274 +linc 0 1 6.957497 0.000000 13275 +alvis 0 1 6.957497 0.000000 13276 +mirankerfil 0 1 6.957497 0.000000 13277 +databs 0 1 6.957497 0.000000 13278 +formthank 0 1 6.957497 0.000000 13279 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..211cb270 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +find 0 111 2.197225 0.000000 111 +mathemat 0 108 2.197225 0.000000 123 +techniqu 0 99 2.302585 0.000000 138 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +institut 0 84 2.484907 0.000000 187 +ieee 0 86 2.484907 0.000000 190 +academ 0 82 2.484907 0.000000 178 +internet 0 83 2.484907 0.000000 186 +optim 0 79 2.564949 0.000000 197 +refer 0 78 2.564949 0.000000 203 +new 0 64 2.772589 0.000000 262 +complex 0 64 2.772589 0.000000 269 +prof 0 64 2.772589 0.000000 273 +visit 0 63 2.772589 0.000000 288 +room 0 59 2.833213 0.000000 301 +undergradu 0 54 2.944439 0.000000 338 +even 0 45 3.135494 0.000000 393 +directori 0 45 3.135494 0.000000 396 +theoret 0 39 3.258097 0.000000 446 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +industri 0 38 3.295837 0.000000 464 +china 0 37 3.332205 0.000000 487 +sciencesunivers 0 37 3.332205 0.000000 486 +staff 0 36 3.367296 0.000000 490 +award 0 34 3.401197 0.000000 523 +given 0 32 3.465736 0.000000 538 +focus 0 29 3.583519 0.000000 584 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +constraint 0 26 3.688879 0.000000 636 +head 0 23 3.806662 0.000000 732 +size 0 23 3.806662 0.000000 713 +period 0 22 3.850148 0.000000 743 +vlsi 0 21 3.912023 0.000000 795 +martin 0 21 3.912023 0.000000 794 +rout 0 21 3.912023 0.000000 793 +chen 0 21 3.912023 0.000000 791 +voic 0 21 3.912023 0.000000 806 +aid 0 18 4.060443 0.000000 904 +edulast 0 17 4.110874 0.000000 927 +jose 0 16 4.174387 0.000000 976 +tsinghua 0 13 4.382027 0.000000 1195 +israel 0 11 4.553877 0.000000 1366 +wong 0 9 4.753590 0.000000 1609 +classmat 0 9 4.753590 0.000000 1516 +combinatori 0 8 4.875197 0.000000 1629 +wire 0 8 4.875197 0.000000 1747 +uniform 0 7 5.010635 0.000000 1845 +delai 0 7 5.010635 0.000000 1848 +zhou 0 6 5.164786 0.000000 2092 +ture 0 6 5.164786 0.000000 1997 +alex 0 6 5.164786 0.000000 2130 +internationalconfer 0 6 5.164786 0.000000 2051 +bulletin 0 5 5.347108 0.000000 2343 +weizmann 0 4 5.568345 0.000000 2858 +kept 0 4 5.568345 0.000000 2762 +zhao 0 4 5.568345 0.000000 2699 +headlin 0 3 5.857933 0.000000 3710 +amir 0 3 5.857933 0.000000 3850 +mathematicallog 0 3 5.857933 0.000000 3796 +pagealan 0 2 6.263398 0.000000 5587 +compuer 0 2 6.263398 0.000000 4692 +researchgroup 0 2 6.263398 0.000000 5588 +pnueli 0 1 6.957497 0.000000 13280 +aprofessor 0 1 6.957497 0.000000 13281 +prestig 0 1 6.957497 0.000000 13282 +incompletelist 0 1 6.957497 0.000000 13283 +publicationshai 0 1 6.957497 0.000000 13284 +forriv 0 1 6.957497 0.000000 13285 +crosstalk 0 1 6.957497 0.000000 13286 +optimalnon 0 1 6.957497 0.000000 13287 +elmor 0 1 6.957497 0.000000 13288 +acmintern 0 1 6.957497 0.000000 13289 +austintaylor 0 1 6.957497 0.000000 13290 +staustin 0 1 6.957497 0.000000 13291 +haizhou 0 1 6.957497 0.000000 13292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..ddd25dc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +nation 0 74 2.639057 0.000000 240 +dept 0 64 2.772589 0.000000 291 +visitor 0 49 3.044522 0.000000 371 +china 1 37 3.332205 3.332205 487 +univ 1 28 3.610918 3.610918 617 +alumni 0 21 3.912023 0.000000 807 +sept 0 17 4.110874 0.000000 952 +universityof 0 15 4.248495 0.000000 1061 +wait 0 13 4.382027 0.000000 1168 +departmentof 0 9 4.753590 0.000000 1539 +hear 0 7 5.010635 0.000000 1940 +wuhan 1 2 6.263398 6.263398 5589 +pal 0 2 6.263398 0.000000 4964 +myselfnow 0 1 6.957497 0.000000 13293 +pre 0 1 6.957497 0.000000 13294 +alumnihom 0 1 6.957497 0.000000 13295 +pagecontact 0 1 6.957497 0.000000 13296 +haosun 0 1 6.957497 0.000000 13297 +edunow 0 1 6.957497 0.000000 13298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..9f4a5755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +first 0 140 1.945910 0.000000 71 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +intern 0 108 2.197225 0.000000 128 +call 0 91 2.397895 0.000000 153 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +nation 0 74 2.639057 0.000000 240 +view 0 70 2.708050 0.000000 254 +visit 0 63 2.772589 0.000000 288 +taylor 0 63 2.772589 0.000000 287 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +faculti 0 56 2.890372 0.000000 325 +summer 0 56 2.890372 0.000000 311 +might 0 41 3.218876 0.000000 426 +author 0 39 3.258097 0.000000 450 +open 0 38 3.295837 0.000000 469 +electr 0 38 3.295837 0.000000 461 +utc 0 27 3.637586 0.000000 629 +honor 0 23 3.806662 0.000000 729 +finish 0 22 3.850148 0.000000 748 +divis 0 21 3.912023 0.000000 803 +grad 0 20 3.951244 0.000000 837 +minut 0 20 3.951244 0.000000 810 +thoma 0 18 4.060443 0.000000 901 +edulast 0 17 4.110874 0.000000 927 +fourth 0 16 4.174387 0.000000 999 +universityof 0 15 4.248495 0.000000 1061 +club 0 15 4.248495 0.000000 1058 +central 0 13 4.382027 0.000000 1160 +volleybal 0 9 4.753590 0.000000 1598 +departmentof 0 9 4.753590 0.000000 1539 +motorola 0 9 4.753590 0.000000 1546 +competit 0 8 4.875197 0.000000 1635 +contest 0 5 5.347108 0.000000 2273 +ioanni 0 5 5.347108 0.000000 2553 +champion 1 4 5.568345 5.568345 2982 +lanc 0 4 5.568345 0.000000 3022 +educlick 0 3 5.857933 0.000000 3612 +sawada 0 3 5.857933 0.000000 3190 +smaragdaki 0 3 5.857933 0.000000 3851 +tower 0 3 5.857933 0.000000 3818 +myfavorit 0 3 5.857933 0.000000 3852 +intramur 0 2 6.263398 0.000000 5590 +kansa 0 2 6.263398 0.000000 5591 +bowl 0 2 6.263398 0.000000 5417 +marathon 0 2 6.263398 0.000000 5592 +micheal 0 1 6.957497 0.000000 13299 +hewett 0 1 6.957497 0.000000 13300 +hewetthewett 0 1 6.957497 0.000000 13301 +fingerm 0 1 6.957497 0.000000 13302 +stanfordunivers 0 1 6.957497 0.000000 13303 +washburnunivers 0 1 6.957497 0.000000 13304 +collegiateprogram 0 1 6.957497 0.000000 13305 +wahlutc 0 1 6.957497 0.000000 13306 +hanoi 0 1 6.957497 0.000000 13307 +tokudaut 0 1 6.957497 0.000000 13308 +locatem 0 1 6.957497 0.000000 13309 +learnabout 0 1 6.957497 0.000000 13310 +downloadmi 0 1 6.957497 0.000000 13311 +learnmor 0 1 6.957497 0.000000 13312 +timefax 0 1 6.957497 0.000000 13313 +hewettemail 0 1 6.957497 0.000000 13314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..66d3b2c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +real 0 93 2.397895 0.000000 144 +search 0 95 2.397895 0.000000 155 +activ 0 84 2.484907 0.000000 182 +solut 0 82 2.484907 0.000000 162 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +educ 0 86 2.484907 0.000000 191 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +novemb 0 81 2.484907 0.000000 179 +resum 0 79 2.564949 0.000000 217 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +interfac 0 79 2.564949 0.000000 209 +onlin 0 75 2.639057 0.000000 223 +write 0 72 2.639057 0.000000 222 +java 0 70 2.708050 0.000000 248 +receiv 0 66 2.708050 0.000000 244 +simul 0 66 2.708050 0.000000 255 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +function 0 62 2.772589 0.000000 275 +written 0 63 2.772589 0.000000 278 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +detail 0 57 2.890372 0.000000 321 +explor 0 58 2.890372 0.000000 324 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +life 0 50 3.044522 0.000000 375 +video 0 44 3.135494 0.000000 405 +natur 0 44 3.135494 0.000000 406 +http 0 41 3.218876 0.000000 420 +might 0 41 3.218876 0.000000 426 +fast 0 42 3.218876 0.000000 429 +live 0 40 3.258097 0.000000 451 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +robot 0 36 3.367296 0.000000 497 +game 0 36 3.367296 0.000000 498 +especi 0 36 3.367296 0.000000 496 +product 0 33 3.433987 0.000000 527 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +anim 0 31 3.496508 0.000000 557 +common 0 30 3.555348 0.000000 574 +rang 0 30 3.555348 0.000000 565 +platform 0 29 3.583519 0.000000 591 +packag 0 28 3.610918 0.000000 614 +american 0 27 3.637586 0.000000 634 +client 0 25 3.737670 0.000000 679 +flow 0 24 3.761200 0.000000 700 +methodolog 0 23 3.806662 0.000000 733 +born 0 21 3.912023 0.000000 798 +unit 0 21 3.912023 0.000000 779 +busi 0 21 3.912023 0.000000 784 +theunivers 0 21 3.912023 0.000000 797 +five 0 19 4.007333 0.000000 841 +lisp 0 18 4.060443 0.000000 897 +seek 0 17 4.110874 0.000000 954 +edulast 0 17 4.110874 0.000000 927 +easi 0 16 4.174387 0.000000 969 +reflect 0 15 4.248495 0.000000 1034 +came 0 13 4.382027 0.000000 1197 +usavoic 0 13 4.382027 0.000000 1198 +assembl 0 12 4.465908 0.000000 1207 +realiti 0 12 4.465908 0.000000 1272 +resid 0 10 4.653960 0.000000 1461 +placement 0 10 4.653960 0.000000 1420 +mepost 0 10 4.653960 0.000000 1472 +novak 0 9 4.753590 0.000000 1521 +poetri 0 9 4.753590 0.000000 1596 +port 0 8 4.875197 0.000000 1766 +cross 0 8 4.875197 0.000000 1703 +realist 0 8 4.875197 0.000000 1665 +textur 0 8 4.875197 0.000000 1677 +spot 0 7 5.010635 0.000000 1894 +contract 0 6 5.164786 0.000000 1985 +gordon 0 6 5.164786 0.000000 2032 +ousterhout 0 5 5.347108 0.000000 2301 +outdoor 0 5 5.347108 0.000000 2514 +havedevelop 0 4 5.568345 0.000000 2681 +vrml 0 4 5.568345 0.000000 2949 +legion 0 3 5.857933 0.000000 3708 +flat 0 3 5.857933 0.000000 3853 +moredetail 0 3 5.857933 0.000000 3854 +expertis 0 3 5.857933 0.000000 3321 +leverag 0 3 5.857933 0.000000 3153 +vietnames 0 2 6.263398 0.000000 5593 +occup 0 2 6.263398 0.000000 5169 +akcl 0 2 6.263398 0.000000 4796 +standalon 0 2 6.263398 0.000000 4077 +researchwith 0 2 6.263398 0.000000 5594 +sdsc 0 2 6.263398 0.000000 5199 +hiep 0 1 6.957497 0.000000 13315 +xwindow 0 1 6.957497 0.000000 13316 +gunu 0 1 6.957497 0.000000 13317 +netrek 0 1 6.957497 0.000000 13318 +factoryx 0 1 6.957497 0.000000 13319 +nguyenhiep 0 1 6.957497 0.000000 13320 +nguyenabout 0 1 6.957497 0.000000 13321 +meabout 0 1 6.957497 0.000000 13322 +texasfor 0 1 6.957497 0.000000 13323 +providinghigh 0 1 6.957497 0.000000 13324 +hypertextresum 0 1 6.957497 0.000000 13325 +con 0 1 6.957497 0.000000 13326 +nsplace 0 1 6.957497 0.000000 13327 +rexi 0 1 6.957497 0.000000 13328 +emptiv 0 1 6.957497 0.000000 13329 +gdraw 0 1 6.957497 0.000000 13330 +specular 0 1 6.957497 0.000000 13331 +sonar 0 1 6.957497 0.000000 13332 +xgcl 0 1 6.957497 0.000000 13333 +xakcl 0 1 6.957497 0.000000 13334 +anonlin 0 1 6.957497 0.000000 13335 +andmaintain 0 1 6.957497 0.000000 13336 +currentlyact 0 1 6.957497 0.000000 13337 +internetsoftwar 0 1 6.957497 0.000000 13338 +mappingroutin 0 1 6.957497 0.000000 13339 +potteri 0 1 6.957497 0.000000 13340 +vrmlto 0 1 6.957497 0.000000 13341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..d216281e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +tool 0 117 2.079442 0.000000 93 +welcom 0 122 2.079442 0.000000 99 +world 0 115 2.197225 0.000000 126 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +book 0 99 2.302585 0.000000 131 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +internet 0 83 2.484907 0.000000 186 +member 0 84 2.484907 0.000000 165 +stuff 0 87 2.484907 0.000000 171 +ieee 0 86 2.484907 0.000000 190 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +java 0 70 2.708050 0.000000 248 +guid 0 63 2.772589 0.000000 267 +new 0 64 2.772589 0.000000 262 +virtual 0 62 2.772589 0.000000 285 +collect 0 65 2.772589 0.000000 268 +taylor 0 63 2.772589 0.000000 287 +unix 0 58 2.890372 0.000000 308 +directori 0 45 3.135494 0.000000 396 +societi 0 40 3.258097 0.000000 456 +expect 0 37 3.332205 0.000000 484 +manual 0 35 3.401197 0.000000 504 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +bookmark 0 26 3.688879 0.000000 639 +yahoo 0 24 3.761200 0.000000 707 +vlsi 0 21 3.912023 0.000000 795 +martin 0 21 3.912023 0.000000 794 +nice 0 20 3.951244 0.000000 809 +citi 0 19 4.007333 0.000000 874 +career 0 12 4.465908 0.000000 1287 +entertain 0 12 4.465908 0.000000 1286 +tour 0 11 4.553877 0.000000 1307 +perl 0 11 4.553877 0.000000 1332 +mosaic 0 10 4.653960 0.000000 1426 +wong 0 9 4.753590 0.000000 1609 +infoseek 0 6 5.164786 0.000000 2188 +giant 0 3 5.857933 0.000000 3137 +huiqun 0 2 6.263398 0.000000 4200 +rosett 0 2 6.263398 0.000000 5595 +hqliu 0 2 6.263398 0.000000 4199 +sunris 0 2 6.263398 0.000000 5212 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..f7a751ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +postscript 0 131 2.079442 0.000000 90 +librari 0 87 2.484907 0.000000 181 +onlin 0 75 2.639057 0.000000 223 +knowledg 0 67 2.708050 0.000000 243 +main 0 67 2.708050 0.000000 256 +colleg 0 61 2.833213 0.000000 300 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +advisor 0 51 2.995732 0.000000 355 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +represent 0 35 3.401197 0.000000 512 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +rule 0 26 3.688879 0.000000 638 +action 0 15 4.248495 0.000000 1038 +english 0 15 4.248495 0.000000 1033 +draft 0 14 4.317488 0.000000 1085 +philosophi 0 13 4.382027 0.000000 1167 +usavoic 0 13 4.382027 0.000000 1198 +vladimir 0 11 4.553877 0.000000 1324 +mepost 0 10 4.653960 0.000000 1472 +tempor 0 9 4.753590 0.000000 1584 +colloquium 0 8 4.875197 0.000000 1734 +sciencesat 0 7 5.010635 0.000000 1968 +infer 0 6 5.164786 0.000000 2040 +causal 0 6 5.164786 0.000000 2024 +lifschitz 0 5 5.347108 0.000000 2542 +nonmonoton 0 4 5.568345 0.000000 3023 +interestscommonsens 0 2 6.263398 0.000000 5596 +actionlog 0 2 6.263398 0.000000 5597 +reasoningmi 0 2 6.263398 0.000000 5598 +hudson 0 1 6.957497 0.000000 13342 +pagehudson 0 1 6.957497 0.000000 13343 +turnerphd 0 1 6.957497 0.000000 13344 +ofcommonsens 0 1 6.957497 0.000000 13345 +msc 0 1 6.957497 0.000000 13346 +mli 0 1 6.957497 0.000000 13347 +linkseuropean 0 1 6.957497 0.000000 13348 +spatialand 0 1 6.957497 0.000000 13349 +reasoningto 0 1 6.957497 0.000000 13350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..df9c734d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +number 0 130 2.079442 0.000000 97 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +part 0 98 2.302585 0.000000 129 +center 0 88 2.397895 0.000000 158 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +resum 0 79 2.564949 0.000000 217 +onlin 0 75 2.639057 0.000000 223 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +format 0 48 3.044522 0.000000 356 +compani 0 41 3.218876 0.000000 423 +littl 0 39 3.258097 0.000000 454 +travel 0 30 3.555348 0.000000 579 +full 0 28 3.610918 0.000000 615 +campu 0 27 3.637586 0.000000 623 +left 0 19 4.007333 0.000000 851 +beauti 0 18 4.060443 0.000000 912 +seek 0 17 4.110874 0.000000 954 +zhang 0 16 4.174387 0.000000 980 +career 0 12 4.465908 0.000000 1287 +lake 0 11 4.553877 0.000000 1373 +allen 0 5 5.347108 0.000000 2470 +blvd 0 4 5.568345 0.000000 3007 +yanbin 0 2 6.263398 0.000000 5599 +hyanbin 0 1 6.957497 0.000000 13351 +cutti 0 1 6.957497 0.000000 13352 +webmuseum 0 1 6.957497 0.000000 13353 +homeland 0 1 6.957497 0.000000 13354 +tarlor 0 1 6.957497 0.000000 13355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..79ffccdd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +construct 0 139 1.945910 0.000000 82 +tool 0 117 2.079442 0.000000 93 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +degre 0 69 2.708050 0.000000 259 +creat 0 63 2.772589 0.000000 277 +summer 0 56 2.890372 0.000000 311 +undergradu 0 54 2.944439 0.000000 338 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +http 0 41 3.218876 0.000000 420 +compani 0 41 3.218876 0.000000 423 +small 0 39 3.258097 0.000000 447 +framework 0 28 3.610918 0.000000 606 +trace 0 25 3.737670 0.000000 677 +geometri 0 22 3.850148 0.000000 752 +concentr 0 18 4.060443 0.000000 906 +modular 0 10 4.653960 0.000000 1392 +univeristi 0 8 4.875197 0.000000 1754 +mass 0 8 4.875197 0.000000 1732 +solid 0 5 5.347108 0.000000 2255 +babi 0 5 5.347108 0.000000 2493 +isaac 0 3 5.857933 0.000000 3855 +coursework 0 3 5.857933 0.000000 3588 +sheldon 0 2 6.263398 0.000000 5226 +reciev 0 2 6.263398 0.000000 5600 +lowel 0 2 6.263398 0.000000 5224 +isheldon 0 1 6.957497 0.000000 13356 +reciv 0 1 6.957497 0.000000 13357 +unives 0 1 6.957497 0.000000 13358 +schlaeor 0 1 6.957497 0.000000 13359 +mellor 0 1 6.957497 0.000000 13360 +bsptree 0 1 6.957497 0.000000 13361 +butt 0 1 6.957497 0.000000 13362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..8234957c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +book 0 99 2.302585 0.000000 131 +homepag 0 93 2.397895 0.000000 148 +internet 0 83 2.484907 0.000000 186 +taylor 1 63 2.772589 2.772589 287 +back 1 60 2.833213 2.833213 297 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +live 1 40 3.258097 3.258097 451 +jame 0 35 3.401197 0.000000 507 +john 1 33 3.433987 3.433987 532 +steve 0 29 3.583519 0.000000 594 +consult 0 24 3.761200 0.000000 687 +doctor 0 24 3.761200 0.000000 709 +alumni 0 21 3.912023 0.000000 807 +white 0 17 4.110874 0.000000 951 +carl 0 15 4.248495 0.000000 1024 +wife 0 13 4.382027 0.000000 1196 +rice 0 11 4.553877 0.000000 1336 +evan 0 8 4.875197 0.000000 1633 +matthew 0 6 5.164786 0.000000 2193 +fish 0 6 5.164786 0.000000 2207 +holli 0 2 6.263398 0.000000 5601 +dejanew 0 2 6.263398 0.000000 5602 +adair 1 1 6.957497 6.957497 13363 +crinkum 0 1 6.957497 0.000000 13364 +crankum 0 1 6.957497 0.000000 13365 +compound 0 1 6.957497 0.000000 13366 +eileen 0 1 6.957497 0.000000 13367 +mengerink 0 1 6.957497 0.000000 13368 +fanat 0 1 6.957497 0.000000 13369 +traylen 0 1 6.957497 0.000000 13370 +jadair 0 1 6.957497 0.000000 13371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..f4a6957b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +oper 1 180 1.609438 1.609438 34 +paper 0 205 1.609438 0.000000 38 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +physic 0 47 3.091042 0.000000 377 +vita 0 38 3.295837 0.000000 473 +john 0 33 3.433987 0.000000 532 +chamber 0 8 4.875197 0.000000 1692 +yale 0 6 5.164786 0.000000 2003 +pagejohn 0 2 6.263398 0.000000 5603 +universityph 0 2 6.263398 0.000000 5604 +chamberssenior 0 1 6.957497 0.000000 13372 +specialistb 0 1 6.957497 0.000000 13373 +paso 0 1 6.957497 0.000000 13374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..e05efbf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,165 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +make 0 111 2.197225 0.000000 120 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +advanc 0 99 2.302585 0.000000 130 +call 0 91 2.397895 0.000000 153 +level 0 87 2.484907 0.000000 180 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +complet 0 77 2.564949 0.000000 208 +orient 0 80 2.564949 0.000000 205 +resum 0 79 2.564949 0.000000 217 +line 0 75 2.639057 0.000000 231 +appli 0 71 2.639057 0.000000 226 +simul 0 66 2.708050 0.000000 255 +goal 0 66 2.708050 0.000000 250 +test 0 66 2.708050 0.000000 252 +receiv 0 66 2.708050 0.000000 244 +differ 0 66 2.708050 0.000000 253 +organ 0 65 2.772589 0.000000 265 +result 0 65 2.772589 0.000000 281 +interact 0 62 2.772589 0.000000 270 +function 0 62 2.772589 0.000000 275 +dept 0 64 2.772589 0.000000 291 +artifici 0 63 2.772589 0.000000 280 +abstract 0 62 2.772589 0.000000 276 +juli 0 60 2.833213 0.000000 305 +thesi 0 57 2.890372 0.000000 327 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +extens 0 53 2.944439 0.000000 340 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +finger 0 52 2.995732 0.000000 354 +visual 0 48 3.044522 0.000000 372 +principl 0 48 3.044522 0.000000 357 +format 0 48 3.044522 0.000000 356 +understand 0 47 3.091042 0.000000 384 +effect 0 46 3.091042 0.000000 385 +long 0 43 3.178054 0.000000 413 +offer 0 43 3.178054 0.000000 414 +past 0 42 3.218876 0.000000 428 +vision 0 41 3.218876 0.000000 430 +might 0 41 3.218876 0.000000 426 +examin 0 42 3.218876 0.000000 424 +probabl 0 40 3.258097 0.000000 455 +electr 0 38 3.295837 0.000000 461 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +human 0 32 3.465736 0.000000 546 +neural 0 30 3.555348 0.000000 578 +power 0 30 3.555348 0.000000 573 +domain 0 30 3.555348 0.000000 564 +propos 0 28 3.610918 0.000000 602 +measur 0 28 3.610918 0.000000 609 +mind 0 27 3.637586 0.000000 632 +enabl 0 26 3.688879 0.000000 655 +rather 0 26 3.688879 0.000000 642 +primari 0 25 3.737670 0.000000 669 +demonstr 0 24 3.761200 0.000000 694 +doctor 0 24 3.761200 0.000000 709 +input 0 23 3.806662 0.000000 727 +begin 0 23 3.806662 0.000000 716 +self 0 22 3.850148 0.000000 761 +serv 0 22 3.850148 0.000000 758 +thu 0 21 3.912023 0.000000 773 +similar 0 21 3.912023 0.000000 771 +basi 0 20 3.951244 0.000000 828 +predict 0 19 4.007333 0.000000 855 +log 0 19 4.007333 0.000000 857 +concentr 0 18 4.060443 0.000000 906 +failur 0 18 4.060443 0.000000 898 +figur 0 18 4.060443 0.000000 903 +seek 0 17 4.110874 0.000000 954 +thought 0 17 4.110874 0.000000 945 +explan 0 16 4.174387 0.000000 985 +spatial 0 16 4.174387 0.000000 988 +later 0 15 4.248495 0.000000 1043 +drive 0 15 4.248495 0.000000 1052 +ascii 0 15 4.248495 0.000000 1032 +command 0 14 4.317488 0.000000 1083 +philosophi 0 13 4.382027 0.000000 1167 +necessari 0 13 4.382027 0.000000 1147 +carri 0 13 4.382027 0.000000 1152 +incorpor 0 13 4.382027 0.000000 1163 +overal 0 12 4.465908 0.000000 1254 +qualit 0 11 4.553877 0.000000 1362 +equal 0 10 4.653960 0.000000 1424 +candid 0 9 4.753590 0.000000 1606 +preliminari 0 9 4.753590 0.000000 1480 +brain 0 8 4.875197 0.000000 1638 +realist 0 8 4.875197 0.000000 1665 +empir 0 8 4.875197 0.000000 1722 +miikkulainen 0 8 4.875197 0.000000 1667 +quantit 0 8 4.875197 0.000000 1654 +centuri 0 7 5.010635 0.000000 1935 +appar 0 7 5.010635 0.000000 1958 +aris 0 7 5.010635 0.000000 1924 +biolog 0 6 5.164786 0.000000 2147 +illus 0 4 5.568345 0.000000 2603 +insight 0 4 5.568345 0.000000 3024 +outdat 0 4 5.568345 0.000000 2797 +cortex 0 3 5.857933 0.000000 3856 +dramat 0 3 5.857933 0.000000 3239 +useth 0 3 5.857933 0.000000 3110 +cortic 0 3 5.857933 0.000000 3857 +neuron 0 3 5.857933 0.000000 3798 +frequenc 0 3 5.857933 0.000000 3206 +lissom 0 2 6.263398 0.000000 5605 +jbednar 0 2 6.263398 0.000000 4284 +bednar 0 2 6.263398 0.000000 4283 +testabl 0 2 6.263398 0.000000 5606 +hypothes 0 2 6.263398 0.000000 5607 +nearli 0 2 6.263398 0.000000 5608 +sirosh 0 2 6.263398 0.000000 5609 +aftereffect 0 1 6.957497 0.000000 13375 +tilt 0 1 6.957497 0.000000 13376 +bednarjim 0 1 6.957497 0.000000 13377 +ofcognit 0 1 6.957497 0.000000 13378 +fewdecad 0 1 6.957497 0.000000 13379 +thehuman 0 1 6.957497 0.000000 13380 +beavail 0 1 6.957497 0.000000 13381 +becomingpract 0 1 6.957497 0.000000 13382 +refut 0 1 6.957497 0.000000 13383 +makecognit 0 1 6.957497 0.000000 13384 +purelyphilosoph 0 1 6.957497 0.000000 13385 +psychologist 0 1 6.957497 0.000000 13386 +inhibit 0 1 6.957497 0.000000 13387 +indirect 0 1 6.957497 0.000000 13388 +visualbehavior 0 1 6.957497 0.000000 13389 +departmentmi 0 1 6.957497 0.000000 13390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..bd325cb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +construct 0 139 1.945910 0.000000 82 +professor 0 137 1.945910 0.000000 76 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +number 0 130 2.079442 0.000000 97 +topic 0 114 2.197225 0.000000 110 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +take 0 97 2.302585 0.000000 134 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +help 0 83 2.484907 0.000000 175 +academ 0 82 2.484907 0.000000 178 +stuff 0 87 2.484907 0.000000 171 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +resum 0 79 2.564949 0.000000 217 +sourc 0 77 2.564949 0.000000 201 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +html 0 75 2.639057 0.000000 235 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +test 1 66 2.708050 2.708050 252 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +guid 0 63 2.772589 0.000000 267 +special 0 56 2.890372 0.000000 320 +unix 0 58 2.890372 0.000000 308 +summer 0 56 2.890372 0.000000 311 +format 0 48 3.044522 0.000000 356 +visitor 0 49 3.044522 0.000000 371 +tutori 0 39 3.258097 0.000000 437 +domain 0 30 3.555348 0.000000 564 +packag 0 28 3.610918 0.000000 614 +linux 0 27 3.637586 0.000000 631 +utc 0 27 3.637586 0.000000 629 +administr 0 27 3.637586 0.000000 628 +jeff 0 25 3.737670 0.000000 673 +frame 0 24 3.761200 0.000000 684 +applet 0 20 3.951244 0.000000 827 +north 0 19 4.007333 0.000000 873 +excel 0 19 4.007333 0.000000 868 +debug 0 17 4.110874 0.000000 944 +transfer 0 16 4.174387 0.000000 967 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +pretti 0 13 4.382027 0.000000 1191 +america 0 11 4.553877 0.000000 1370 +catalog 0 10 4.653960 0.000000 1431 +novak 0 9 4.753590 0.000000 1521 +largest 0 7 5.010635 0.000000 1858 +privaci 0 6 5.164786 0.000000 2144 +ross 0 5 5.347108 0.000000 2243 +florida 0 5 5.347108 0.000000 2526 +automobil 0 3 5.857933 0.000000 3709 +ethernet 0 2 6.263398 0.000000 5171 +edmund 0 2 6.263398 0.000000 5213 +buyer 0 2 6.263398 0.000000 5210 +sceneri 0 2 6.263398 0.000000 5152 +ofjunfanghi 0 1 6.957497 0.000000 13391 +sysadm 0 1 6.957497 0.000000 13392 +unixish 0 1 6.957497 0.000000 13393 +kristina 0 1 6.957497 0.000000 13394 +jfang 0 1 6.957497 0.000000 13395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..30420b82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +start 0 83 2.484907 0.000000 173 +good 1 77 2.564949 2.564949 200 +resum 0 79 2.564949 0.000000 217 +knowledg 0 67 2.708050 0.000000 243 +long 0 43 3.178054 0.000000 413 +probabl 0 40 3.258097 0.000000 455 +john 0 33 3.433987 0.000000 532 +someth 0 31 3.496508 0.000000 554 +chip 0 21 3.912023 0.000000 770 +enough 0 15 4.248495 0.000000 1040 +beer 0 6 5.164786 0.000000 2216 +sleep 0 6 5.164786 0.000000 2211 +chew 0 3 5.857933 0.000000 3618 +dog 0 2 6.263398 0.000000 5089 +swallow 0 2 6.263398 0.000000 5025 +jprior 0 1 6.957497 0.000000 13396 +priorjohn 0 1 6.957497 0.000000 13397 +priormi 0 1 6.957497 0.000000 13398 +accumul 0 1 6.957497 0.000000 13399 +hurt 0 1 6.957497 0.000000 13400 +nacho 0 1 6.957497 0.000000 13401 +swisher 0 1 6.957497 0.000000 13402 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..048c9a43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +technolog 0 131 2.079442 0.000000 102 +site 0 106 2.197225 0.000000 119 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +internet 0 83 2.484907 0.000000 186 +laboratori 0 63 2.772589 0.000000 292 +jeff 0 25 3.737670 0.000000 673 +thoma 1 18 4.060443 4.060443 901 +ultim 0 17 4.110874 0.000000 943 +keyword 0 11 4.553877 0.000000 1356 +trade 0 7 5.010635 0.000000 1815 +homepagejeff 0 1 6.957497 0.000000 13403 +homepagecontact 0 1 6.957497 0.000000 13404 +informationpublicationssoftwar 0 1 6.957497 0.000000 13405 +groupphoto 0 1 6.957497 0.000000 13406 +albumfavorit 0 1 6.957497 0.000000 13407 +sitesuniversityof 0 1 6.957497 0.000000 13408 +departmentappliedresearch 0 1 6.957497 0.000000 13409 +electricaland 0 1 6.957497 0.000000 13410 +departmentedsfinanci 0 1 6.957497 0.000000 13411 +fttc 0 1 6.957497 0.000000 13412 +utacademiccalendarsut 0 1 6.957497 0.000000 13413 +sportshook 0 1 6.957497 0.000000 13414 +longhorn 0 1 6.957497 0.000000 13415 +utfootbal 0 1 6.957497 0.000000 13416 +scheduleaustintexa 0 1 6.957497 0.000000 13417 +jthoma 0 1 6.957497 0.000000 13418 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..65a2cad3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +octob 0 89 2.397895 0.000000 156 +stuff 0 87 2.484907 0.000000 171 +journal 0 83 2.484907 0.000000 183 +collect 0 65 2.772589 0.000000 268 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +china 0 37 3.332205 0.000000 487 +weather 0 28 3.610918 0.000000 618 +todai 0 25 3.737670 0.000000 672 +highli 0 23 3.806662 0.000000 725 +alumni 0 21 3.912023 0.000000 807 +beij 0 19 4.007333 0.000000 876 +lot 0 18 4.060443 0.000000 889 +excit 0 11 4.553877 0.000000 1329 +perl 0 11 4.553877 0.000000 1332 +ataustin 0 9 4.753590 0.000000 1610 +pagecomput 0 7 5.010635 0.000000 1900 +peke 0 5 5.347108 0.000000 2539 +appreci 0 5 5.347108 0.000000 2374 +meyour 0 3 5.857933 0.000000 3858 +homepagewelcom 0 2 6.263398 0.000000 4808 +novelschines 0 2 6.263398 0.000000 5610 +registrar 0 2 6.263398 0.000000 5611 +gradaut 0 2 6.263398 0.000000 5612 +studiesut 0 2 6.263398 0.000000 5613 +magzin 0 2 6.263398 0.000000 5614 +technicalreport 0 2 6.263398 0.000000 5615 +visitorsinc 0 2 6.263398 0.000000 5616 +jiani 0 1 6.957497 0.000000 13419 +indepart 0 1 6.957497 0.000000 13420 +ofpek 0 1 6.957497 0.000000 13421 +chinesechines 0 1 6.957497 0.000000 13422 +scenerychines 0 1 6.957497 0.000000 13423 +classicschines 0 1 6.957497 0.000000 13424 +magazineschines 0 1 6.957497 0.000000 13425 +newspapersus 0 1 6.957497 0.000000 13426 +libraryut 0 1 6.957497 0.000000 13427 +campusutaccessabout 0 1 6.957497 0.000000 13428 +citylimit 0 1 6.957497 0.000000 13429 +miscellaneousyahoojava 0 1 6.957497 0.000000 13430 +sunjavascript 0 1 6.957497 0.000000 13431 +netscapeth 0 1 6.957497 0.000000 13432 +associationcomput 0 1 6.957497 0.000000 13433 +webnetwork 0 1 6.957497 0.000000 13434 +libraryth 0 1 6.957497 0.000000 13435 +bibliographiesintern 0 1 6.957497 0.000000 13436 +jyluo 0 1 6.957497 0.000000 13437 +suggestionswould 0 1 6.957497 0.000000 13438 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..42949dca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +know 0 80 2.564949 0.000000 198 +logic 0 71 2.639057 0.000000 230 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +reason 0 57 2.890372 0.000000 318 +advisor 0 51 2.995732 0.000000 355 +concurr 0 34 3.401197 0.000000 501 +semant 0 29 3.583519 0.000000 587 +todai 0 25 3.737670 0.000000 672 +doctor 0 24 3.761200 0.000000 709 +thank 0 23 3.806662 0.000000 721 +indian 0 22 3.850148 0.000000 769 +wonder 0 20 3.951244 0.000000 815 +lot 0 18 4.060443 0.000000 889 +bachelor 0 17 4.110874 0.000000 957 +came 0 13 4.382027 0.000000 1197 +tempor 0 9 4.753590 0.000000 1584 +madra 0 8 4.875197 0.000000 1770 +allen 0 5 5.347108 0.000000 2470 +emerson 0 5 5.347108 0.000000 2547 +mehi 0 2 6.263398 0.000000 5549 +kedar 0 1 6.957497 0.000000 13439 +namjoshiabout 0 1 6.957497 0.000000 13440 +distributedalgorithm 0 1 6.957497 0.000000 13441 +automatatheori 0 1 6.957497 0.000000 13442 +amul 0 1 6.957497 0.000000 13443 +adkedar 0 1 6.957497 0.000000 13444 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..b0368d1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 0 146 1.945910 0.000000 65 +provid 0 121 2.079442 0.000000 94 +world 0 115 2.197225 0.000000 126 +academ 0 82 2.484907 0.000000 178 +stuff 0 87 2.484907 0.000000 171 +wide 0 84 2.484907 0.000000 185 +resum 0 79 2.564949 0.000000 217 +servic 0 72 2.639057 0.000000 236 +view 0 70 2.708050 0.000000 254 +taylor 0 63 2.772589 0.000000 287 +faculti 0 56 2.890372 0.000000 325 +author 0 39 3.258097 0.000000 450 +respons 0 37 3.332205 0.000000 476 +staff 0 36 3.367296 0.000000 490 +board 0 33 3.433987 0.000000 528 +express 0 32 3.465736 0.000000 540 +linux 0 27 3.637586 0.000000 631 +facil 0 20 3.951244 0.000000 814 +reflect 0 15 4.248495 0.000000 1034 +kenneth 0 12 4.465908 0.000000 1265 +guest 0 12 4.465908 0.000000 1220 +opinion 0 8 4.875197 0.000000 1708 +babylon 0 8 4.875197 0.000000 1731 +necessarili 0 7 5.010635 0.000000 1899 +polit 0 6 5.164786 0.000000 2115 +regent 0 5 5.347108 0.000000 2551 +radio 0 4 5.568345 0.000000 3025 +sole 0 4 5.568345 0.000000 2592 +cyberspac 0 3 5.857933 0.000000 3719 +harker 0 1 6.957497 0.000000 13445 +kharker 0 1 6.957497 0.000000 13446 +amateur 0 1 6.957497 0.000000 13447 +rocketri 0 1 6.957497 0.000000 13448 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..a1e4324f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +softwar 0 220 1.386294 0.000000 30 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +analysi 0 124 2.079442 0.000000 98 +high 0 130 2.079442 0.000000 101 +world 0 115 2.197225 0.000000 126 +mathemat 0 108 2.197225 0.000000 123 +topic 0 114 2.197225 0.000000 110 +technic 0 100 2.302585 0.000000 140 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +larg 0 82 2.484907 0.000000 168 +solut 0 82 2.484907 0.000000 162 +second 0 81 2.484907 0.000000 166 +method 0 80 2.564949 0.000000 213 +david 0 71 2.639057 0.000000 232 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +degre 0 69 2.708050 0.000000 259 +organ 0 65 2.772589 0.000000 265 +sever 0 56 2.890372 0.000000 322 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +basic 0 50 3.044522 0.000000 360 +algebra 0 45 3.135494 0.000000 394 +anoth 0 45 3.135494 0.000000 408 +linear 1 41 3.218876 3.218876 431 +award 0 34 3.401197 0.000000 523 +committe 0 34 3.401197 0.000000 522 +focus 0 29 3.583519 0.000000 584 +session 0 26 3.688879 0.000000 643 +equat 0 23 3.806662 0.000000 724 +honor 0 23 3.806662 0.000000 729 +recognit 0 23 3.806662 0.000000 723 +variabl 0 23 3.806662 0.000000 715 +director 0 22 3.850148 0.000000 767 +partial 0 18 4.060443 0.000000 900 +differenti 0 17 4.110874 0.000000 921 +young 0 16 4.174387 0.000000 991 +spars 0 16 4.174387 0.000000 989 +senior 0 14 4.317488 0.000000 1120 +researchmi 0 14 4.317488 0.000000 1119 +polynomi 0 14 4.317488 0.000000 1069 +nasa 0 13 4.382027 0.000000 1188 +iter 0 12 4.465908 0.000000 1206 +matric 0 10 4.653960 0.000000 1399 +congress 0 9 4.753590 0.000000 1592 +jersei 0 9 4.753590 0.000000 1587 +creativ 0 8 4.875197 0.000000 1777 +pacif 0 8 4.875197 0.000000 1674 +grove 0 8 4.875197 0.000000 1675 +edg 0 8 4.875197 0.000000 1647 +aris 0 7 5.010635 0.000000 1924 +brook 0 6 5.164786 0.000000 2152 +river 0 6 5.164786 0.000000 2220 +imac 0 3 5.857933 0.000000 3718 +certif 0 3 5.857933 0.000000 3859 +interestmathemat 0 3 5.857933 0.000000 3860 +ellipt 0 3 5.857933 0.000000 3774 +atlanta 0 3 5.857933 0.000000 3778 +stationari 0 3 5.857933 0.000000 3861 +kincaid 1 2 6.263398 6.263398 5617 +subprogram 0 2 6.263398 0.000000 5618 +cole 0 2 6.263398 0.000000 4697 +itpack 0 2 6.263398 0.000000 5619 +rassia 0 2 6.263398 0.000000 5620 +lecturerassoci 0 1 6.957497 0.000000 13449 +lamar 0 1 6.957497 0.000000 13450 +technicalinnov 0 1 6.957497 0.000000 13451 +andappli 0 1 6.957497 0.000000 13452 +coeffici 0 1 6.957497 0.000000 13453 +publicationsw 0 1 6.957497 0.000000 13454 +chenei 0 1 6.957497 0.000000 13455 +hay 0 1 6.957497 0.000000 13456 +coput 0 1 6.957497 0.000000 13457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..db31bcb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +parallel 0 169 1.791759 0.000000 60 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +process 0 142 1.945910 0.000000 72 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +mathemat 0 108 2.197225 0.000000 123 +academ 0 82 2.484907 0.000000 178 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +prof 0 64 2.772589 0.000000 273 +collect 0 65 2.772589 0.000000 268 +juli 0 60 2.833213 0.000000 305 +numer 0 49 3.044522 0.000000 369 +visitor 0 49 3.044522 0.000000 371 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +linear 0 41 3.218876 0.000000 431 +random 0 34 3.401197 0.000000 511 +administr 0 27 3.637586 0.000000 628 +mike 0 24 3.761200 0.000000 703 +busi 0 21 3.912023 0.000000 784 +particularli 0 19 4.007333 0.000000 867 +commerci 0 16 4.174387 0.000000 1005 +alan 0 13 4.382027 0.000000 1146 +emploi 0 12 4.465908 0.000000 1284 +walk 0 12 4.465908 0.000000 1281 +thedepart 0 11 4.553877 0.000000 1350 +ataustin 0 9 4.753590 0.000000 1610 +interestsi 0 7 5.010635 0.000000 1969 +misra 0 7 5.010635 0.000000 1856 +jayadev 0 4 5.568345 0.000000 3006 +kistler 0 3 5.857933 0.000000 3267 +syracus 0 3 5.857933 0.000000 3553 +cline 0 3 5.857933 0.000000 3218 +coursesfal 0 2 6.263398 0.000000 5225 +theperson 0 1 6.957497 0.000000 13458 +productsdivis 0 1 6.957497 0.000000 13459 +backgroundba 0 1 6.957497 0.000000 13460 +susquehanna 0 1 6.957497 0.000000 13461 +selinsgrov 0 1 6.957497 0.000000 13462 +stern 0 1 6.957497 0.000000 13463 +businessnew 0 1 6.957497 0.000000 13464 +iwith 0 1 6.957497 0.000000 13465 +algebrawith 0 1 6.957497 0.000000 13466 +pflugervil 0 1 6.957497 0.000000 13467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..09e6af53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +time 0 293 1.098612 0.000000 17 +welcom 0 122 2.079442 0.000000 99 +sinc 0 90 2.397895 0.000000 159 +march 0 61 2.833213 0.000000 295 +jacob 1 4 5.568345 5.568345 2667 +kornerup 1 3 5.857933 5.857933 3215 +kornerupjacob 0 1 6.957497 0.000000 13468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..c778d2a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +spring 0 131 2.079442 0.000000 88 +mathemat 0 108 2.197225 0.000000 123 +topic 0 114 2.197225 0.000000 110 +place 0 106 2.197225 0.000000 124 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +build 0 85 2.484907 0.000000 184 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +knowledg 1 67 2.708050 2.708050 243 +simul 0 66 2.708050 0.000000 255 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +colleg 0 61 2.833213 0.000000 300 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +describ 0 45 3.135494 0.000000 400 +press 0 42 3.218876 0.000000 419 +map 0 39 3.258097 0.000000 452 +robot 0 36 3.367296 0.000000 497 +represent 0 35 3.401197 0.000000 512 +limit 0 29 3.583519 0.000000 585 +strategi 0 25 3.737670 0.000000 682 +recognit 0 23 3.806662 0.000000 723 +emphasi 0 22 3.850148 0.000000 755 +expert 0 20 3.951244 0.000000 833 +agent 0 18 4.060443 0.000000 910 +spatial 0 16 4.174387 0.000000 988 +cognit 0 16 4.174387 0.000000 986 +cambridg 0 16 4.174387 0.000000 1008 +consider 0 14 4.317488 0.000000 1076 +benjamin 0 11 4.553877 0.000000 1296 +qualit 0 11 4.553877 0.000000 1362 +tour 0 11 4.553877 0.000000 1307 +incomplet 0 9 4.753590 0.000000 1575 +accomplish 0 8 4.875197 0.000000 1755 +centenni 0 7 5.010635 0.000000 1967 +distinct 0 5 5.347108 0.000000 2319 +commonsens 0 4 5.568345 0.000000 2998 +kuiper 0 3 5.857933 0.000000 3794 +qsim 0 3 5.857933 0.000000 3862 +swarthmor 0 2 6.263398 0.000000 5621 +thequalit 0 2 6.263398 0.000000 5622 +kuipersbenjamin 0 1 6.957497 0.000000 13469 +kuipersbruton 0 1 6.957497 0.000000 13470 +withparticular 0 1 6.957497 0.000000 13471 +grouphom 0 1 6.957497 0.000000 13472 +andavail 0 1 6.957497 0.000000 13473 +qualitativereason 0 1 6.957497 0.000000 13474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..1724f2b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +ieee 0 86 2.484907 0.000000 190 +internet 0 83 2.484907 0.000000 186 +taylor 0 63 2.772589 0.000000 287 +laboratori 0 63 2.772589 0.000000 292 +februari 0 54 2.944439 0.000000 328 +electron 0 47 3.091042 0.000000 379 +transact 0 39 3.258097 0.000000 438 +sciencesunivers 0 37 3.332205 0.000000 486 +photo 0 31 3.496508 0.000000 561 +profil 0 30 3.555348 0.000000 581 +turn 0 29 3.583519 0.000000 586 +campu 0 27 3.637586 0.000000 623 +administr 0 27 3.637586 0.000000 628 +american 0 27 3.637586 0.000000 634 +compress 0 23 3.806662 0.000000 719 +eduphon 0 15 4.248495 0.000000 1060 +front 0 13 4.382027 0.000000 1154 +tune 0 12 4.465908 0.000000 1227 +editori 0 9 4.753590 0.000000 1611 +simon 0 8 4.875197 0.000000 1697 +clip 0 7 5.010635 0.000000 1868 +sciencesdepart 0 6 5.164786 0.000000 2020 +carbon 0 3 5.857933 0.000000 3804 +cont 0 3 5.857933 0.000000 3171 +toss 0 2 6.263398 0.000000 5470 +kata 0 1 6.957497 0.000000 13475 +submissionnew 0 1 6.957497 0.000000 13476 +empt 0 1 6.957497 0.000000 13477 +statesman 0 1 6.957497 0.000000 13478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..47afae4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +test 0 66 2.708050 0.000000 252 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +foundat 0 62 2.772589 0.000000 286 +protocol 0 45 3.135494 0.000000 407 +secur 0 30 3.555348 0.000000 577 +fund 0 21 3.912023 0.000000 805 +entir 0 20 3.951244 0.000000 811 +verif 0 20 3.951244 0.000000 826 +supervis 0 20 3.951244 0.000000 840 +tune 0 12 4.465908 0.000000 1227 +cycl 0 11 4.553877 0.000000 1335 +underli 0 10 4.653960 0.000000 1410 +span 0 8 4.875197 0.000000 1751 +simon 0 8 4.875197 0.000000 1697 +lockhe 0 3 5.857933 0.000000 3863 +currentinterest 0 1 6.957497 0.000000 13479 +nsaunivers 0 1 6.957497 0.000000 13480 +videoservic 0 1 6.957497 0.000000 13481 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..439ae459 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +octob 0 89 2.397895 0.000000 156 +ofth 0 36 3.367296 0.000000 491 +robert 0 30 3.555348 0.000000 567 +edulast 0 17 4.110874 0.000000 927 +stori 0 14 4.317488 0.000000 1087 +convent 0 14 4.317488 0.000000 1072 +council 0 11 4.553877 0.000000 1364 +mountain 0 10 4.653960 0.000000 1456 +awai 0 10 4.653960 0.000000 1447 +christian 0 7 5.010635 0.000000 1949 +pageth 0 7 5.010635 0.000000 1939 +gordon 0 6 5.164786 0.000000 2032 +graham 0 4 5.568345 0.000000 2817 +republican 0 3 5.857933 0.000000 3815 +backbon 0 2 6.263398 0.000000 5623 +landrum 0 1 6.957497 0.000000 13482 +viruspictur 0 1 6.957497 0.000000 13483 +empirepch 0 1 6.957497 0.000000 13484 +retreattexa 0 1 6.957497 0.000000 13485 +rockrsumfamilyinterest 0 1 6.957497 0.000000 13486 +councillandrum 0 1 6.957497 0.000000 13487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..ff69ae44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +welcom 0 122 2.079442 0.000000 99 +assist 0 112 2.197225 0.000000 113 +activ 0 84 2.484907 0.000000 182 +improv 0 62 2.772589 0.000000 289 +electr 0 38 3.295837 0.000000 461 +greg 0 24 3.761200 0.000000 695 +recommend 0 22 3.850148 0.000000 737 +lavend 0 3 5.857933 0.000000 3217 +professordepart 0 2 6.263398 0.000000 5624 +biograph 0 2 6.263398 0.000000 5625 +austinr 0 1 6.957497 0.000000 13488 +lavenderadjunct 0 1 6.957497 0.000000 13489 +anddepart 0 1 6.957497 0.000000 13490 +engineeringth 0 1 6.957497 0.000000 13491 +informationsuggest 0 1 6.957497 0.000000 13492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..acfad648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +welcom 0 122 2.079442 0.000000 99 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +know 0 80 2.564949 0.000000 198 +nation 0 74 2.639057 0.000000 240 +degre 0 69 2.708050 0.000000 259 +taylor 0 63 2.772589 0.000000 287 +locat 0 59 2.833213 0.000000 303 +much 0 52 2.995732 0.000000 349 +small 0 39 3.258097 0.000000 447 +jame 0 35 3.401197 0.000000 507 +particip 0 29 3.583519 0.000000 589 +campu 0 27 3.637586 0.000000 623 +equat 0 23 3.806662 0.000000 724 +vlsi 0 21 3.912023 0.000000 795 +north 0 19 4.007333 0.000000 873 +bachelor 0 17 4.110874 0.000000 957 +normal 0 16 4.174387 0.000000 995 +atth 0 15 4.248495 0.000000 1019 +month 0 15 4.248495 0.000000 1025 +hong 0 14 4.317488 0.000000 1105 +wife 0 13 4.382027 0.000000 1196 +island 0 11 4.553877 0.000000 1345 +kong 0 9 4.753590 0.000000 1602 +sciencesat 0 7 5.010635 0.000000 1968 +smile 0 7 5.010635 0.000000 1807 +singapor 0 5 5.347108 0.000000 2487 +aliv 0 3 5.857933 0.000000 3864 +disc 0 2 6.263398 0.000000 5626 +tropic 0 2 6.263398 0.000000 5398 +aboutthi 0 2 6.263398 0.000000 5627 +addr 0 2 6.263398 0.000000 5628 +pageyeap 0 1 6.957497 0.000000 13493 +designalgorithm 0 1 6.957497 0.000000 13494 +communityi 0 1 6.957497 0.000000 13495 +lovesto 0 1 6.957497 0.000000 13496 +leekk 0 1 6.957497 0.000000 13497 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..5713c332 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +softwar 1 220 1.386294 1.386294 30 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +decemb 0 80 2.564949 0.000000 215 +main 0 67 2.708050 0.000000 256 +laboratori 0 63 2.772589 0.000000 292 +investig 0 51 2.995732 0.000000 353 +seminar 0 38 3.295837 0.000000 470 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +experiment 0 26 3.688879 0.000000 645 +wai 0 25 3.737670 0.000000 662 +less 1 18 4.060443 4.060443 892 +apart 0 7 5.010635 0.000000 1936 +distributedsystem 0 6 5.164786 0.000000 2022 +blumoferdb 0 5 5.347108 0.000000 2324 +oftexa 0 4 5.568345 0.000000 3003 +buildreli 0 1 6.957497 0.000000 13498 +projectsmemb 0 1 6.957497 0.000000 13499 +lablessss 0 1 6.957497 0.000000 13500 +seriessponsorslast 0 1 6.957497 0.000000 13501 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..5eed6720 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +thing 0 84 2.484907 0.000000 189 +journal 0 83 2.484907 0.000000 183 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +import 0 65 2.772589 0.000000 282 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +scientif 0 53 2.944439 0.000000 341 +postal 0 30 3.555348 0.000000 580 +multiprocessor 0 28 3.610918 0.000000 605 +arrai 0 27 3.637586 0.000000 627 +supercomput 0 25 3.737670 0.000000 681 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +portabl 0 20 3.951244 0.000000 819 +comparison 0 19 4.007333 0.000000 863 +novel 0 15 4.248495 0.000000 1039 +conf 0 13 4.382027 0.000000 1181 +decomposit 0 10 4.653960 0.000000 1439 +calvin 0 9 4.753590 0.000000 1518 +austinaustin 0 7 5.010635 0.000000 1966 +banerje 0 6 5.164786 0.000000 2018 +snyder 0 5 5.347108 0.000000 2359 +explicitli 0 5 5.347108 0.000000 2308 +parallelprogram 0 5 5.347108 0.000000 2379 +publicationsth 0 4 5.568345 0.000000 2859 +polymorph 0 4 5.568345 0.000000 2627 +pete 0 3 5.857933 0.000000 3865 +accommod 0 3 5.857933 0.000000 3337 +parallelprocess 0 3 5.857933 0.000000 3626 +performanceanalysi 0 2 6.263398 0.000000 5629 +padua 0 2 6.263398 0.000000 4544 +sciencesth 0 1 6.957497 0.000000 13502 +lincalvin 0 1 6.957497 0.000000 13503 +linassist 0 1 6.957497 0.000000 13504 +iswhat 0 1 6.957497 0.000000 13505 +_study_ 0 1 6.957497 0.000000 13506 +_play_ 0 1 6.957497 0.000000 13507 +carrilresearch 0 1 6.957497 0.000000 13508 +interestscompil 0 1 6.957497 0.000000 13509 +biologyalgorithm 0 1 6.957497 0.000000 13510 +dikaiako 0 1 6.957497 0.000000 13511 +manoussaki 0 1 6.957497 0.000000 13512 +woodward 0 1 6.957497 0.000000 13513 +internationalparallel 0 1 6.957497 0.000000 13514 +sublanguag 0 1 6.957497 0.000000 13515 +compilersfor 0 1 6.957497 0.000000 13516 +gelernt 0 1 6.957497 0.000000 13517 +nicolau 0 1 6.957497 0.000000 13518 +withl 0 1 6.957497 0.000000 13519 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..031465db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +construct 0 139 1.945910 0.000000 82 +professor 0 137 1.945910 0.000000 76 +perform 0 143 1.945910 0.000000 74 +welcom 0 122 2.079442 0.000000 99 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +real 0 93 2.397895 0.000000 144 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +good 0 77 2.564949 0.000000 200 +knowledg 0 67 2.708050 0.000000 243 +view 0 70 2.708050 0.000000 254 +result 0 65 2.772589 0.000000 281 +content 0 59 2.833213 0.000000 302 +summer 0 56 2.890372 0.000000 311 +mine 0 26 3.688879 0.000000 654 +theunivers 0 21 3.912023 0.000000 797 +permit 0 16 4.174387 0.000000 962 +replic 0 12 4.465908 0.000000 1231 +incomplet 0 9 4.753590 0.000000 1575 +researchi 0 8 4.875197 0.000000 1756 +apolog 0 6 5.164786 0.000000 2046 +guangtian 0 3 5.857933 0.000000 3810 +inconveni 0 3 5.857933 0.000000 3866 +internship 0 3 5.857933 0.000000 3764 +liugt 0 1 6.957497 0.000000 13520 +homepagehi 0 1 6.957497 0.000000 13521 +timeschedul 0 1 6.957497 0.000000 13522 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..20e1ccb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +taylor 0 63 2.772589 0.000000 287 +locat 0 59 2.833213 0.000000 303 +special 0 56 2.890372 0.000000 320 +physic 0 47 3.091042 0.000000 377 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +photo 0 31 3.496508 0.000000 561 +emphasi 0 22 3.850148 0.000000 755 +itali 0 11 4.553877 0.000000 1378 +interestsi 0 7 5.010635 0.000000 1969 +lorenzo 1 4 5.568345 5.568345 2588 +sytem 0 4 5.568345 0.000000 3015 +maria 0 4 5.568345 0.000000 2954 +alvisi 0 3 5.857933 0.000000 3095 +universit 0 2 6.263398 0.000000 5630 +bologna 0 2 6.263398 0.000000 5631 +laurea 0 1 6.957497 0.000000 13523 +agrav 0 1 6.957497 0.000000 13524 +taylorhal 0 1 6.957497 0.000000 13525 +campusshow 0 1 6.957497 0.000000 13526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..456fbd29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +site 0 106 2.197225 0.000000 119 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +internet 0 83 2.484907 0.000000 186 +ieee 0 86 2.484907 0.000000 190 +come 0 78 2.564949 0.000000 202 +onlin 0 75 2.639057 0.000000 223 +dept 0 64 2.772589 0.000000 291 +undergradu 0 54 2.944439 0.000000 338 +cool 0 49 3.044522 0.000000 374 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +live 0 40 3.258097 0.000000 451 +microsoft 0 38 3.295837 0.000000 468 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +weather 0 28 3.610918 0.000000 618 +univ 0 28 3.610918 0.000000 617 +campu 0 27 3.637586 0.000000 623 +thank 0 23 3.806662 0.000000 721 +corpor 0 21 3.912023 0.000000 802 +sigmod 0 19 4.007333 0.000000 877 +tsinghua 0 13 4.382027 0.000000 1195 +shop 0 10 4.653960 0.000000 1469 +siggraph 0 8 4.875197 0.000000 1773 +dictionari 0 8 4.875197 0.000000 1642 +hunt 0 7 5.010635 0.000000 1798 +sigcomm 0 5 5.347108 0.000000 2329 +sigir 0 2 6.263398 0.000000 4873 +addr 0 2 6.263398 0.000000 5628 +luxu 0 1 6.957497 0.000000 13527 +networksoth 0 1 6.957497 0.000000 13528 +studyut 0 1 6.957497 0.000000 13529 +universityaustin 0 1 6.957497 0.000000 13530 +siglink 0 1 6.957497 0.000000 13531 +sigmm 0 1 6.957497 0.000000 13532 +newsjob 0 1 6.957497 0.000000 13533 +forcast 0 1 6.957497 0.000000 13534 +xuelu 0 1 6.957497 0.000000 13535 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..ee53fa0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +softwar 1 220 1.386294 1.386294 30 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +texa 0 160 1.791759 0.000000 64 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +technic 0 100 2.302585 0.000000 140 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +educ 0 86 2.484907 0.000000 191 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +taylor 0 63 2.772589 0.000000 287 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +direct 0 57 2.890372 0.000000 316 +semest 0 58 2.890372 0.000000 312 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +committe 0 34 3.401197 0.000000 522 +john 0 33 3.433987 0.000000 532 +human 0 32 3.465736 0.000000 546 +chair 0 29 3.583519 0.000000 596 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +qualiti 0 20 3.951244 0.000000 832 +macintosh 0 17 4.110874 0.000000 920 +cognit 0 16 4.174387 0.000000 986 +researchmi 0 14 4.317488 0.000000 1119 +strength 0 9 4.753590 0.000000 1494 +metric 0 7 5.010635 0.000000 1831 +educurr 0 5 5.347108 0.000000 2504 +werth 0 4 5.568345 0.000000 3004 +engineeringc 0 4 5.568345 0.000000 2904 +contemporari 0 4 5.568345 0.000000 2719 +presentarea 0 4 5.568345 0.000000 3026 +andsoftwar 0 4 5.568345 0.000000 2753 +assur 0 4 5.568345 0.000000 2722 +ics 0 4 5.568345 0.000000 2779 +lauri 0 3 5.857933 0.000000 3867 +honour 0 2 6.263398 0.000000 5632 +werthlauri 0 1 6.957497 0.000000 13536 +werthlectur 0 1 6.957497 0.000000 13537 +lwerth 0 1 6.957497 0.000000 13538 +scienceprofession 0 1 6.957497 0.000000 13539 +servicevic 0 1 6.957497 0.000000 13540 +presentco 0 1 6.957497 0.000000 13541 +interestsoftwar 0 1 6.957497 0.000000 13542 +andenviron 0 1 6.957497 0.000000 13543 +publicationsl 0 1 6.957497 0.000000 13544 +tomayko 0 1 6.957497 0.000000 13545 +pagefaculti 0 1 6.957497 0.000000 13546 +profilesc 0 1 6.957497 0.000000 13547 +classeslast 0 1 6.957497 0.000000 13548 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..98ee0761 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +offic 0 299 1.098612 0.000000 13 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +hall 0 146 1.945910 0.000000 65 +theori 0 111 2.197225 0.000000 127 +site 0 106 2.197225 0.000000 119 +info 0 85 2.484907 0.000000 176 +taylor 0 63 2.772589 0.000000 287 +dept 0 64 2.772589 0.000000 291 +complex 0 64 2.772589 0.000000 269 +interact 0 62 2.772589 0.000000 270 +author 0 39 3.258097 0.000000 450 +india 0 32 3.465736 0.000000 550 +univ 0 28 3.610918 0.000000 617 +comp 0 26 3.688879 0.000000 650 +reach 0 24 3.761200 0.000000 688 +offici 0 18 4.060443 0.000000 894 +avenu 0 12 4.465908 0.000000 1277 +madra 0 8 4.875197 0.000000 1770 +colloquium 0 8 4.875197 0.000000 1734 +cricket 0 7 5.010635 0.000000 1945 +oncomput 0 5 5.347108 0.000000 2326 +reddi 0 3 5.857933 0.000000 3277 +worldwid 0 3 5.857933 0.000000 3704 +madhukar 1 2 6.263398 6.263398 5633 +espnet 0 2 6.263398 0.000000 5634 +korupoluwelcom 0 1 6.957497 0.000000 13549 +ahom 0 1 6.957497 0.000000 13550 +madrashomepag 0 1 6.957497 0.000000 13551 +ganga 0 1 6.957497 0.000000 13552 +alumniclass 0 1 6.957497 0.000000 13553 +utalgorithm 0 1 6.957497 0.000000 13554 +sportszon 0 1 6.957497 0.000000 13555 +batchu 0 1 6.957497 0.000000 13556 +korupoluemail 0 1 6.957497 0.000000 13557 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..2cf07e76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +taylor 1 63 2.772589 2.772589 287 +simpl 1 60 2.833213 2.833213 298 +natur 1 44 3.135494 3.135494 406 +richard 1 31 3.496508 3.496508 559 +produc 1 30 3.555348 3.555348 572 +explan 1 16 4.174387 4.174387 985 +mallori 1 2 6.263398 6.263398 5635 +malloryrichard 1 1 6.957497 6.957497 13558 +malloryresearchthesi 1 1 6.957497 6.957497 13559 +quasi 1 1 6.957497 6.957497 13560 +qsimsimul 1 1 6.957497 6.957497 13561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..b7b8aa80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +memori 0 101 2.302585 0.000000 139 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +octob 0 89 2.397895 0.000000 156 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +state 0 76 2.564949 0.000000 207 +appear 0 78 2.564949 0.000000 210 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +publish 0 57 2.890372 0.000000 326 +talk 0 53 2.944439 0.000000 336 +particular 0 51 2.995732 0.000000 352 +principl 0 48 3.044522 0.000000 357 +protocol 0 45 3.135494 0.000000 407 +execut 0 45 3.135494 0.000000 404 +third 0 43 3.178054 0.000000 412 +author 0 39 3.258097 0.000000 450 +annual 0 40 3.258097 0.000000 458 +submit 0 39 3.258097 0.000000 440 +correct 0 38 3.295837 0.000000 462 +vita 0 38 3.295837 0.000000 473 +respons 0 37 3.332205 0.000000 476 +tree 0 36 3.367296 0.000000 492 +survei 0 35 3.401197 0.000000 513 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +postal 0 30 3.555348 0.000000 580 +consid 0 29 3.583519 0.000000 590 +intend 0 28 3.610918 0.000000 599 +toward 0 25 3.737670 0.000000 668 +flow 0 24 3.761200 0.000000 700 +initi 0 23 3.806662 0.000000 717 +decis 0 23 3.806662 0.000000 728 +self 0 22 3.850148 0.000000 761 +identifi 0 22 3.850148 0.000000 760 +rout 0 21 3.912023 0.000000 793 +prepar 0 20 3.951244 0.000000 824 +finit 0 14 4.317488 0.000000 1106 +step 0 13 4.382027 0.000000 1138 +earlier 0 13 4.382027 0.000000 1140 +joint 0 13 4.382027 0.000000 1130 +stai 0 12 4.465908 0.000000 1215 +label 0 10 4.653960 0.000000 1423 +guarante 0 10 4.653960 0.000000 1391 +invit 0 10 4.653960 0.000000 1428 +minimum 0 9 4.753590 0.000000 1555 +candid 0 9 4.753590 0.000000 1606 +occur 0 9 4.753590 0.000000 1572 +said 0 9 4.753590 0.000000 1571 +depth 0 8 4.875197 0.000000 1636 +span 0 8 4.875197 0.000000 1751 +converg 0 7 5.010635 0.000000 1844 +kluwer 0 6 5.164786 0.000000 2143 +stabil 1 5 5.347108 5.347108 2286 +gouda 0 4 5.568345 0.000000 3021 +marco 0 4 5.568345 0.000000 2589 +maximum 0 4 5.568345 0.000000 2632 +implicit 0 4 5.568345 0.000000 2830 +arora 0 4 5.568345 0.000000 2658 +moham 0 3 5.857933 0.000000 3848 +fifteenth 0 3 5.857933 0.000000 3868 +forev 0 2 6.263398 0.000000 5636 +legitim 0 1 6.957497 0.000000 13562 +illegitim 0 1 6.957497 0.000000 13563 +schneidermarco 0 1 6.957497 0.000000 13564 +schneiderph 0 1 6.957497 0.000000 13565 +austinresearchth 0 1 6.957497 0.000000 13566 +itsstat 0 1 6.957497 0.000000 13567 +whenregardless 0 1 6.957497 0.000000 13568 +systemwhich 0 1 6.957497 0.000000 13569 +tolerantr 0 1 6.957497 0.000000 13570 +anish 0 1 6.957497 0.000000 13571 +silent 0 1 6.957497 0.000000 13572 +shlomi 0 1 6.957497 0.000000 13573 +dolev 0 1 6.957497 0.000000 13574 +ctaylor 0 1 6.957497 0.000000 13575 +usamarco 0 1 6.957497 0.000000 13576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..d486d095 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +avail 0 169 1.791759 0.000000 48 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +librari 0 87 2.484907 0.000000 181 +stuff 0 87 2.484907 0.000000 171 +orient 0 80 2.564949 0.000000 205 +addit 0 74 2.639057 0.000000 228 +taylor 0 63 2.772589 0.000000 287 +copi 0 63 2.772589 0.000000 284 +descript 0 64 2.772589 0.000000 271 +best 0 59 2.833213 0.000000 299 +semest 0 58 2.890372 0.000000 312 +allow 0 53 2.944439 0.000000 333 +finger 0 52 2.995732 0.000000 354 +run 0 51 2.995732 0.000000 347 +mark 0 44 3.135494 0.000000 403 +compani 0 41 3.218876 0.000000 423 +taught 0 33 3.433987 0.000000 526 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +full 0 28 3.610918 0.000000 615 +linux 0 27 3.637586 0.000000 631 +reach 0 24 3.761200 0.000000 688 +alloc 0 20 3.951244 0.000000 821 +along 0 18 4.060443 0.000000 878 +brief 0 16 4.174387 0.000000 1001 +intel 0 16 4.174387 0.000000 1000 +precis 0 15 4.248495 0.000000 1023 +appl 0 11 4.553877 0.000000 1303 +routin 0 9 4.753590 0.000000 1549 +motorola 0 9 4.753590 0.000000 1546 +oop 0 8 4.875197 0.000000 1778 +pentium 0 6 5.164786 0.000000 2077 +glenn 0 3 5.857933 0.000000 3869 +down 0 3 5.857933 0.000000 3870 +informationi 0 3 5.857933 0.000000 3871 +listof 0 3 5.857933 0.000000 3322 +publicli 0 3 5.857933 0.000000 3687 +isvia 0 2 6.263398 0.000000 5637 +johnston 0 2 6.263398 0.000000 5638 +somerset 0 2 6.263398 0.000000 5639 +johnstonemark 0 1 6.957497 0.000000 13577 +johnstonecontact 0 1 6.957497 0.000000 13578 +markj 0 1 6.957497 0.000000 13579 +byrichard 0 1 6.957497 0.000000 13580 +brice 0 1 6.957497 0.000000 13581 +analysisclass 0 1 6.957497 0.000000 13582 +somersetdesign 0 1 6.957497 0.000000 13583 +centerresearch 0 1 6.957497 0.000000 13584 +garbagecollector 0 1 6.957497 0.000000 13585 +ofstudi 0 1 6.957497 0.000000 13586 +dissertationpropos 0 1 6.957497 0.000000 13587 +timingof 0 1 6.957497 0.000000 13588 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..d53d689a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +click 0 142 1.945910 0.000000 78 +visit 0 63 2.772589 0.000000 288 +point 0 58 2.890372 0.000000 319 +friend 0 48 3.044522 0.000000 376 +mark 0 44 3.135494 0.000000 403 +markng 0 1 6.957497 0.000000 13589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..17d455c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +place 0 106 2.197225 0.000000 124 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +member 0 84 2.484907 0.000000 165 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +logic 0 71 2.639057 0.000000 230 +prof 0 64 2.772589 0.000000 273 +taylor 0 63 2.772589 0.000000 287 +result 0 65 2.772589 0.000000 281 +reason 0 57 2.890372 0.000000 318 +thesi 0 57 2.890372 0.000000 327 +archiv 0 49 3.044522 0.000000 364 +postal 0 30 3.555348 0.000000 580 +macintosh 0 17 4.110874 0.000000 920 +permit 0 16 4.174387 0.000000 962 +finit 0 14 4.317488 0.000000 1106 +verifi 0 12 4.465908 0.000000 1261 +worth 0 11 4.553877 0.000000 1294 +appl 0 11 4.553877 0.000000 1303 +incomplet 0 9 4.753590 0.000000 1575 +entri 0 8 4.875197 0.000000 1678 +researchi 0 8 4.875197 0.000000 1756 +misra 0 7 5.010635 0.000000 1856 +emerson 0 5 5.347108 0.000000 2547 +proposit 0 5 5.347108 0.000000 2339 +comprehens 0 4 5.568345 0.000000 2745 +marku 0 3 5.857933 0.000000 3872 +uniti 0 3 5.857933 0.000000 3812 +andwil 0 3 5.857933 0.000000 3335 +inconveni 0 3 5.857933 0.000000 3866 +groupand 0 3 5.857933 0.000000 3873 +isalso 0 2 6.263398 0.000000 5640 +kaltenbachmarku 0 1 6.957497 0.000000 13590 +kaltenbachintroductionwelcom 0 1 6.957497 0.000000 13591 +iapolog 0 1 6.957497 0.000000 13592 +spsp 0 1 6.957497 0.000000 13593 +stempor 0 1 6.957497 0.000000 13594 +checkerfor 0 1 6.957497 0.000000 13595 +avisit 0 1 6.957497 0.000000 13596 +theut 0 1 6.957497 0.000000 13597 +departmenthom 0 1 6.957497 0.000000 13598 +archivefor 0 1 6.957497 0.000000 13599 +sworld 0 1 6.957497 0.000000 13600 +supporthom 0 1 6.957497 0.000000 13601 +actansit 0 1 6.957497 0.000000 13602 +theatt 0 1 6.957497 0.000000 13603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..e3d9a6f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +click 0 142 1.945910 0.000000 78 +homepag 0 93 2.397895 0.000000 148 +stuff 0 87 2.484907 0.000000 171 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +taylor 0 63 2.772589 0.000000 287 +guid 0 63 2.772589 0.000000 267 +virtual 0 62 2.772589 0.000000 285 +local 0 55 2.944439 0.000000 334 +math 0 44 3.135494 0.000000 402 +kind 0 32 3.465736 0.000000 541 +postal 0 30 3.555348 0.000000 580 +neural 0 30 3.555348 0.000000 578 +utc 0 27 3.637586 0.000000 629 +demonstr 0 24 3.761200 0.000000 694 +daili 0 24 3.761200 0.000000 706 +displai 0 23 3.806662 0.000000 712 +applet 0 20 3.951244 0.000000 827 +hotlist 0 13 4.382027 0.000000 1199 +paus 0 4 5.568345 0.000000 2965 +mayberri 0 2 6.263398 0.000000 5641 +downtown 0 2 6.263398 0.000000 5642 +texan 0 2 6.263398 0.000000 5489 +memarti 0 1 6.957497 0.000000 13604 +researchal 0 1 6.957497 0.000000 13605 +martym 0 1 6.957497 0.000000 13606 +anywher 0 1 6.957497 0.000000 13607 +virtualc 0 1 6.957497 0.000000 13608 +internetrestaur 0 1 6.957497 0.000000 13609 +tnstechnolog 0 1 6.957497 0.000000 13610 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..69d337bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +postscript 0 131 2.079442 0.000000 90 +onlin 0 75 2.639057 0.000000 223 +main 0 67 2.708050 0.000000 256 +colleg 0 61 2.833213 0.000000 300 +reason 0 57 2.890372 0.000000 318 +thesi 0 57 2.890372 0.000000 327 +advisor 0 51 2.995732 0.000000 355 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +titl 0 31 3.496508 0.000000 556 +action 0 15 4.248495 0.000000 1038 +philosophi 0 13 4.382027 0.000000 1167 +usavoic 0 13 4.382027 0.000000 1198 +vladimir 0 11 4.553877 0.000000 1324 +mepost 0 10 4.653960 0.000000 1472 +sciencesat 0 7 5.010635 0.000000 1968 +baker 0 7 5.010635 0.000000 1812 +causal 0 6 5.164786 0.000000 2024 +lifschitz 0 5 5.347108 0.000000 2542 +commonsens 0 4 5.568345 0.000000 2998 +nonmonoton 0 4 5.568345 0.000000 3023 +norm 0 2 6.263398 0.000000 5643 +kansa 0 2 6.263398 0.000000 5591 +interestscommonsens 0 2 6.263398 0.000000 5596 +actionlog 0 2 6.263398 0.000000 5597 +reasoningmi 0 2 6.263398 0.000000 5598 +mccain 0 1 6.957497 0.000000 13611 +mccainabout 0 1 6.957497 0.000000 13612 +mephd 0 1 6.957497 0.000000 13613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..5b384767 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +check 0 115 2.197225 0.000000 118 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +educ 0 86 2.484907 0.000000 191 +logic 0 71 2.639057 0.000000 230 +taylor 0 63 2.772589 0.000000 287 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +postal 0 30 3.555348 0.000000 580 +english 0 15 4.248495 0.000000 1033 +mari 0 12 4.465908 0.000000 1266 +acquisit 0 10 4.653960 0.000000 1465 +elain 0 5 5.347108 0.000000 2496 +groupunivers 0 3 5.857933 0.000000 3831 +austinresearchmi 0 2 6.263398 0.000000 5644 +formor 0 2 6.263398 0.000000 5335 +mecaliff 0 2 6.263398 0.000000 5645 +baylor 0 1 6.957497 0.000000 13614 +califfmari 0 1 6.957497 0.000000 13615 +califfmachin 0 1 6.957497 0.000000 13616 +especiallyinduct 0 1 6.957497 0.000000 13617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..ccdb3eb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +austin 0 168 1.791759 0.000000 63 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +technolog 0 131 2.079442 0.000000 102 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +text 0 98 2.302585 0.000000 133 +part 0 98 2.302585 0.000000 129 +question 0 91 2.397895 0.000000 141 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +help 0 83 2.484907 0.000000 175 +larg 0 82 2.484907 0.000000 168 +contain 0 81 2.484907 0.000000 174 +requir 0 81 2.484907 0.000000 167 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +addit 0 74 2.639057 0.000000 228 +knowledg 1 67 2.708050 2.708050 243 +would 0 67 2.708050 0.000000 251 +test 0 66 2.708050 0.000000 252 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +simul 0 66 2.708050 0.000000 255 +result 0 65 2.772589 0.000000 281 +plan 0 65 2.772589 0.000000 272 +improv 0 62 2.772589 0.000000 289 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +collect 0 65 2.772589 0.000000 268 +automat 0 61 2.833213 0.000000 306 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +found 0 53 2.944439 0.000000 337 +extens 0 53 2.944439 0.000000 340 +numer 0 49 3.044522 0.000000 369 +pointer 0 48 3.044522 0.000000 368 +answer 0 45 3.135494 0.000000 391 +anoth 0 45 3.135494 0.000000 408 +natur 0 44 3.135494 0.000000 406 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +past 0 42 3.218876 0.000000 428 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +multipl 0 39 3.258097 0.000000 453 +littl 0 39 3.258097 0.000000 454 +ofth 0 36 3.367296 0.000000 491 +especi 0 36 3.367296 0.000000 496 +jame 0 35 3.401197 0.000000 507 +represent 0 35 3.401197 0.000000 512 +concept 0 32 3.465736 0.000000 537 +kind 0 32 3.465736 0.000000 541 +express 0 32 3.465736 0.000000 540 +extend 0 32 3.465736 0.000000 539 +domain 0 30 3.555348 0.000000 564 +steve 0 29 3.583519 0.000000 594 +built 0 29 3.583519 0.000000 592 +retriev 0 27 3.637586 0.000000 621 +task 0 25 3.737670 0.000000 678 +concern 0 25 3.737670 0.000000 666 +jeff 0 25 3.737670 0.000000 673 +begin 0 23 3.806662 0.000000 716 +varieti 0 22 3.850148 0.000000 740 +fact 0 21 3.912023 0.000000 780 +similar 0 21 3.912023 0.000000 771 +alumni 0 21 3.912023 0.000000 807 +expert 0 20 3.951244 0.000000 833 +predict 0 19 4.007333 0.000000 855 +encourag 0 18 4.060443 0.000000 880 +event 0 18 4.060443 0.000000 896 +appropri 0 18 4.060443 0.000000 883 +aid 0 18 4.060443 0.000000 904 +expand 0 17 4.110874 0.000000 928 +otherwis 0 17 4.110874 0.000000 922 +explan 0 16 4.174387 0.000000 985 +normal 0 16 4.174387 0.000000 995 +carl 0 15 4.248495 0.000000 1024 +biologi 0 15 4.248495 0.000000 1049 +english 0 15 4.248495 0.000000 1033 +shown 0 14 4.317488 0.000000 1080 +charl 0 13 4.382027 0.000000 1149 +composit 0 13 4.382027 0.000000 1150 +bruce 0 12 4.465908 0.000000 1226 +brad 0 12 4.465908 0.000000 1264 +peter 0 11 4.553877 0.000000 1316 +eight 0 11 4.553877 0.000000 1331 +qualit 0 11 4.553877 0.000000 1362 +rich 0 10 4.653960 0.000000 1396 +custom 0 10 4.653960 0.000000 1414 +significantli 0 9 4.753590 0.000000 1508 +tutor 0 9 4.753590 0.000000 1552 +mainten 0 9 4.753590 0.000000 1543 +herefor 0 9 4.753590 0.000000 1483 +erik 0 8 4.875197 0.000000 1701 +largest 0 7 5.010635 0.000000 1858 +fred 0 6 5.164786 0.000000 2072 +viewpoint 0 6 5.164786 0.000000 2116 +biolog 0 6 5.164786 0.000000 2147 +ongo 0 6 5.164786 0.000000 2215 +porter 0 5 5.347108 0.000000 2293 +correl 0 5 5.347108 0.000000 2279 +desk 0 5 5.347108 0.000000 2297 +oncomput 0 5 5.347108 0.000000 2326 +notabl 0 5 5.347108 0.000000 2276 +colleagu 0 5 5.347108 0.000000 2304 +focuss 0 5 5.347108 0.000000 2271 +clark 0 4 5.568345 0.000000 2705 +knight 0 4 5.568345 0.000000 2728 +souther 0 3 5.857933 0.000000 3795 +karl 0 3 5.857933 0.000000 3623 +multifunct 0 3 5.857933 0.000000 3826 +implicitli 0 3 5.857933 0.000000 3620 +qsim 0 3 5.857933 0.000000 3862 +proport 0 3 5.857933 0.000000 3293 +boe 0 3 5.857933 0.000000 3318 +mallori 0 2 6.263398 0.000000 5635 +bareiss 0 2 6.263398 0.000000 5646 +murrai 0 2 6.263398 0.000000 5647 +rickel 0 2 6.263398 0.000000 5648 +forconstruct 0 2 6.263398 0.000000 5649 +inon 0 2 6.263398 0.000000 4496 +arealso 0 2 6.263398 0.000000 5650 +knowledgebas 0 2 6.263398 0.000000 5136 +adequ 0 2 6.263398 0.000000 4116 +lexicon 0 2 6.263398 0.000000 5651 +brant 0 2 6.263398 0.000000 5652 +aroundth 0 2 6.263398 0.000000 5653 +prado 0 1 6.957497 0.000000 13618 +lester 0 1 6.957497 0.000000 13619 +callawai 0 1 6.957497 0.000000 13620 +andersen 0 1 6.957497 0.000000 13621 +acker 0 1 6.957497 0.000000 13622 +eilert 0 1 6.957497 0.000000 13623 +groupknowledg 0 1 6.957497 0.000000 13624 +overviewour 0 1 6.957497 0.000000 13625 +atuniv 0 1 6.957497 0.000000 13626 +currentexpert 0 1 6.957497 0.000000 13627 +broadknowledg 0 1 6.957497 0.000000 13628 +toexplain 0 1 6.957497 0.000000 13629 +answeringa 0 1 6.957497 0.000000 13630 +formallyrepres 0 1 6.957497 0.000000 13631 +thebiolog 0 1 6.957497 0.000000 13632 +andthos 0 1 6.957497 0.000000 13633 +beanswer 0 1 6.957497 0.000000 13634 +jeffrickel 0 1 6.957497 0.000000 13635 +taskof 0 1 6.957497 0.000000 13636 +thesimplest 0 1 6.957497 0.000000 13637 +dauntingrequir 0 1 6.957497 0.000000 13638 +manymodel 0 1 6.957497 0.000000 13639 +compilerand 0 1 6.957497 0.000000 13640 +bybuild 0 1 6.957497 0.000000 13641 +computingenviron 0 1 6.957497 0.000000 13642 +deskassist 0 1 6.957497 0.000000 13643 +squestion 0 1 6.957497 0.000000 13644 +projectsour 0 1 6.957497 0.000000 13645 +kned 0 1 6.957497 0.000000 13646 +kastl 0 1 6.957497 0.000000 13647 +fare 0 1 6.957497 0.000000 13648 +lex 0 1 6.957497 0.000000 13649 +tripel 0 1 6.957497 0.000000 13650 +theorist 0 1 6.957497 0.000000 13651 +searcher 0 1 6.957497 0.000000 13652 +alumna 0 1 6.957497 0.000000 13653 +lian 0 1 6.957497 0.000000 13654 +blumenth 0 1 6.957497 0.000000 13655 +eolu 0 1 6.957497 0.000000 13656 +uwyo 0 1 6.957497 0.000000 13657 +clarkp 0 1 6.957497 0.000000 13658 +redwood 0 1 6.957497 0.000000 13659 +ncsu 0 1 6.957497 0.000000 13660 +publicationsclick 0 1 6.957497 0.000000 13661 +projectsclick 0 1 6.957497 0.000000 13662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..26ac6e3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +algorithm 0 162 1.791759 0.000000 57 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +text 0 98 2.302585 0.000000 133 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +knowledg 0 67 2.708050 0.000000 243 +execut 0 45 3.135494 0.000000 404 +past 0 42 3.218876 0.000000 428 +soon 0 36 3.367296 0.000000 494 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +queri 0 33 3.433987 0.000000 524 +someth 0 31 3.496508 0.000000 554 +robert 0 30 3.555348 0.000000 567 +rule 1 26 3.688879 3.688879 638 +constraint 0 26 3.688879 0.000000 636 +sometim 0 24 3.761200 0.000000 696 +finish 0 22 3.850148 0.000000 748 +basi 0 20 3.951244 0.000000 828 +render 0 17 4.110874 0.000000 947 +match 0 16 4.174387 0.000000 965 +warn 0 14 4.317488 0.000000 1068 +daniel 0 12 4.465908 0.000000 1233 +evolv 0 12 4.465908 0.000000 1223 +candid 0 9 4.753590 0.000000 1606 +presenc 0 8 4.875197 0.000000 1671 +hold 0 8 4.875197 0.000000 1645 +lane 0 8 4.875197 0.000000 1720 +yang 0 8 4.875197 0.000000 1652 +wouldn 0 7 5.010635 0.000000 1970 +srinivasan 0 6 5.164786 0.000000 2175 +mirank 1 5 5.347108 5.347108 2543 +treat 0 5 5.347108 0.000000 2521 +breath 0 4 5.568345 0.000000 2946 +lanc 0 4 5.568345 0.000000 3022 +obsolet 0 3 5.857933 0.000000 3196 +byth 0 3 5.857933 0.000000 3874 +archi 0 3 5.857933 0.000000 3639 +ming 0 3 5.857933 0.000000 3712 +bibtex 0 2 6.263398 0.000000 5406 +leap 0 2 6.263398 0.000000 5654 +venu 0 2 6.263398 0.000000 5655 +usea 0 2 6.263398 0.000000 4800 +satisfact 0 2 6.263398 0.000000 5656 +roberto 0 2 6.263398 0.000000 5468 +bayardo 0 2 6.263398 0.000000 5467 +obermey 0 2 6.263398 0.000000 5657 +vaidyaraman 0 2 6.263398 0.000000 5658 +warshaw 0 2 6.263398 0.000000 5659 +rete 0 1 6.957497 0.000000 13663 +belat 0 1 6.957497 0.000000 13664 +fashionwai 0 1 6.957497 0.000000 13665 +itscomparison 0 1 6.957497 0.000000 13666 +encompass 0 1 6.957497 0.000000 13667 +fundamentalcomput 0 1 6.957497 0.000000 13668 +corollari 0 1 6.957497 0.000000 13669 +thatgoal 0 1 6.957497 0.000000 13670 +gadboi 0 1 6.957497 0.000000 13671 +vasili 0 1 6.957497 0.000000 13672 +samoladi 0 1 6.957497 0.000000 13673 +schrag 0 1 6.957497 0.000000 13674 +andrewsdavid 0 1 6.957497 0.000000 13675 +brantchin 0 1 6.957497 0.000000 13676 +kuoshiow 0 1 6.957497 0.000000 13677 +salvator 0 1 6.957497 0.000000 13678 +stolfo 0 1 6.957497 0.000000 13679 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..0c511506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +access 0 102 2.302585 0.000000 136 +select 0 91 2.397895 0.000000 154 +homepag 0 93 2.397895 0.000000 148 +institut 0 84 2.484907 0.000000 187 +ieee 0 86 2.484907 0.000000 190 +method 0 80 2.564949 0.000000 213 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +practic 0 70 2.708050 0.000000 246 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +reason 0 57 2.890372 0.000000 318 +profession 0 51 2.995732 0.000000 345 +electron 0 47 3.091042 0.000000 379 +futur 0 41 3.218876 0.000000 427 +formal 0 37 3.332205 0.000000 478 +tech 0 35 3.401197 0.000000 515 +award 0 34 3.401197 0.000000 523 +john 0 33 3.433987 0.000000 532 +chair 0 29 3.583519 0.000000 596 +synchron 0 29 3.583519 0.000000 588 +mind 0 27 3.637586 0.000000 632 +aspect 0 25 3.737670 0.000000 663 +fellow 0 24 3.761200 0.000000 701 +honor 0 23 3.806662 0.000000 729 +equat 0 23 3.806662 0.000000 724 +indian 0 22 3.850148 0.000000 769 +inth 0 22 3.850148 0.000000 741 +particularli 0 19 4.007333 0.000000 867 +north 0 19 4.007333 0.000000 873 +coupl 0 17 4.110874 0.000000 939 +letter 0 16 4.174387 0.000000 981 +weslei 0 16 4.174387 0.000000 983 +researchmi 0 14 4.317488 0.000000 1119 +classic 0 14 4.317488 0.000000 1084 +recurs 0 13 4.382027 0.000000 1127 +addison 0 12 4.465908 0.000000 1230 +kanpur 0 8 4.875197 0.000000 1744 +simon 0 8 4.875197 0.000000 1697 +guggenheim 0 8 4.875197 0.000000 1759 +misra 1 7 5.010635 5.010635 1856 +prentic 0 7 5.010635 0.000000 1838 +phase 0 6 5.164786 0.000000 1977 +holland 0 5 5.347108 0.000000 2490 +jayadev 0 4 5.568345 0.000000 3006 +essai 0 4 5.568345 0.000000 2948 +interestparallel 0 3 5.857933 0.000000 3806 +publicationsj 0 3 5.857933 0.000000 3808 +hoar 0 3 5.857933 0.000000 3875 +nondeterminist 0 3 5.857933 0.000000 3560 +powerlist 0 2 6.263398 0.000000 5660 +loos 0 2 6.263398 0.000000 4774 +chandi 0 2 6.263398 0.000000 5661 +seuss 0 2 6.263398 0.000000 5662 +misrareg 0 1 6.957497 0.000000 13680 +hopkin 0 1 6.957497 0.000000 13681 +fellowarea 0 1 6.957497 0.000000 13682 +asynchronoussystem 0 1 6.957497 0.000000 13683 +otherpap 0 1 6.957497 0.000000 13684 +anoverview 0 1 6.957497 0.000000 13685 +apostscript 0 1 6.957497 0.000000 13686 +versionaccess 0 1 6.957497 0.000000 13687 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..68fd96b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +texa 0 160 1.791759 0.000000 64 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +model 0 145 1.945910 0.000000 69 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +search 0 95 2.397895 0.000000 155 +learn 1 86 2.484907 2.484907 170 +journal 0 83 2.484907 0.000000 183 +control 0 82 2.484907 0.000000 164 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +nation 0 74 2.639057 0.000000 240 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +scientif 0 53 2.944439 0.000000 341 +standard 0 48 3.044522 0.000000 365 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +combin 0 42 3.218876 0.000000 421 +autom 0 41 3.218876 0.000000 434 +form 0 39 3.258097 0.000000 443 +paul 0 38 3.295837 0.000000 471 +john 0 33 3.433987 0.000000 532 +queri 0 33 3.433987 0.000000 524 +ad 0 32 3.465736 0.000000 544 +richard 0 31 3.496508 0.000000 559 +utc 0 27 3.637586 0.000000 629 +american 0 27 3.637586 0.000000 634 +revis 0 26 3.688879 0.000000 640 +rule 0 26 3.688879 0.000000 638 +experiment 0 26 3.688879 0.000000 645 +subject 0 26 3.688879 0.000000 647 +jeff 0 25 3.737670 0.000000 673 +decis 0 23 3.806662 0.000000 728 +alumni 0 21 3.912023 0.000000 807 +supervis 0 20 3.951244 0.000000 840 +comparison 0 19 4.007333 0.000000 863 +partial 0 18 4.060443 0.000000 900 +repositori 0 17 4.110874 0.000000 932 +fourth 0 16 4.174387 0.000000 999 +explan 0 16 4.174387 0.000000 985 +atth 0 15 4.248495 0.000000 1019 +prolog 0 13 4.382027 0.000000 1155 +joint 0 13 4.382027 0.000000 1130 +mellon 0 13 4.382027 0.000000 1179 +mari 0 12 4.465908 0.000000 1266 +carnegi 0 12 4.465908 0.000000 1260 +induct 0 11 4.553877 0.000000 1304 +qualit 0 11 4.553877 0.000000 1362 +refin 0 11 4.553877 0.000000 1363 +acquisit 0 10 4.653960 0.000000 1465 +linguist 0 9 4.753590 0.000000 1593 +moonei 0 9 4.753590 0.000000 1520 +ataustin 0 9 4.753590 0.000000 1610 +tutor 0 9 4.753590 0.000000 1552 +classif 0 9 4.753590 0.000000 1586 +aaai 0 8 4.875197 0.000000 1750 +european 0 8 4.875197 0.000000 1763 +empir 0 8 4.875197 0.000000 1722 +irvin 0 8 4.875197 0.000000 1660 +illinoi 0 7 5.010635 0.000000 1941 +predic 0 7 5.010635 0.000000 1806 +planner 0 7 5.010635 0.000000 1797 +thompson 0 6 5.164786 0.000000 2049 +neither 0 6 5.164786 0.000000 1990 +machinelearn 0 6 5.164786 0.000000 2084 +oxford 0 6 5.164786 0.000000 2121 +bradlei 0 5 5.347108 0.000000 2554 +elain 0 5 5.347108 0.000000 2496 +proposit 0 5 5.347108 0.000000 2339 +sowmya 0 4 5.568345 0.000000 2670 +diagnosi 0 4 5.568345 0.000000 3027 +uncertain 0 4 5.568345 0.000000 2758 +invent 0 4 5.568345 0.000000 3028 +ijcai 0 4 5.568345 0.000000 2901 +hermjakob 0 3 5.857933 0.000000 3876 +ramachandran 0 3 5.857933 0.000000 3742 +cindi 0 3 5.857933 0.000000 3830 +acad 0 3 5.857933 0.000000 3847 +signll 0 3 5.857933 0.000000 3877 +ucpop 0 3 5.857933 0.000000 3878 +estlin 0 2 6.263398 0.000000 5554 +abduct 0 2 6.263398 0.000000 5663 +focuseson 0 2 6.263398 0.000000 5433 +califf 0 2 6.263398 0.000000 5664 +mecaliff 0 2 6.263398 0.000000 5645 +tara 0 2 6.263398 0.000000 5555 +cthomp 0 2 6.263398 0.000000 5530 +dirk 0 2 6.263398 0.000000 5665 +subramanian 0 2 6.263398 0.000000 5666 +georgetown 0 2 6.263398 0.000000 5667 +drake 0 2 6.263398 0.000000 5668 +accel 0 2 6.263398 0.000000 5166 +foidl 0 2 6.263398 0.000000 4270 +icml 0 2 6.263398 0.000000 5669 +quinlan 0 2 6.263398 0.000000 4797 +learner 0 2 6.263398 0.000000 4508 +prodigi 0 2 6.263398 0.000000 5670 +baff 0 1 6.957497 0.000000 13688 +mahonei 0 1 6.957497 0.000000 13689 +speedup 0 1 6.957497 0.000000 13690 +knowledgerefin 0 1 6.957497 0.000000 13691 +scicomp 0 1 6.957497 0.000000 13692 +firstadvisor 0 1 6.957497 0.000000 13693 +hwee 0 1 6.957497 0.000000 13694 +nhweetou 0 1 6.957497 0.000000 13695 +trantor 0 1 6.957497 0.000000 13696 +ourston 0 1 6.957497 0.000000 13697 +dirk_ourston 0 1 6.957497 0.000000 13698 +cpqm 0 1 6.957497 0.000000 13699 +saic 0 1 6.957497 0.000000 13700 +furtwangen 0 1 6.957497 0.000000 13701 +siddarth 0 1 6.957497 0.000000 13702 +zell 0 1 6.957497 0.000000 13703 +reasoningher 0 1 6.957497 0.000000 13704 +fort 0 1 6.957497 0.000000 13705 +chillin 0 1 6.957497 0.000000 13706 +dolphin 0 1 6.957497 0.000000 13707 +ilpnet 0 1 6.957497 0.000000 13708 +sigart 0 1 6.957497 0.000000 13709 +aritfici 0 1 6.957497 0.000000 13710 +biblio 0 1 6.957497 0.000000 13711 +jair 0 1 6.957497 0.000000 13712 +foil 0 1 6.957497 0.000000 13713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..45cdd340 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +theori 0 111 2.197225 0.000000 127 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +complet 0 77 2.564949 0.000000 208 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +effici 0 73 2.639057 0.000000 233 +knowledg 0 67 2.708050 0.000000 243 +degre 0 69 2.708050 0.000000 259 +artifici 0 63 2.772589 0.000000 280 +improv 0 62 2.772589 0.000000 289 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +finger 0 52 2.995732 0.000000 354 +natur 0 44 3.135494 0.000000 406 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +vita 0 38 3.295837 0.000000 473 +word 0 34 3.401197 0.000000 508 +obtain 0 33 3.433987 0.000000 534 +posit 0 31 3.496508 0.000000 552 +computersci 0 30 3.555348 0.000000 562 +neural 0 30 3.555348 0.000000 578 +postal 0 30 3.555348 0.000000 580 +symbol 0 27 3.637586 0.000000 620 +rule 0 26 3.688879 0.000000 638 +compar 0 26 3.688879 0.000000 648 +qualiti 0 20 3.951244 0.000000 832 +lisp 0 18 4.060443 0.000000 897 +attend 0 18 4.060443 0.000000 893 +explan 0 16 4.174387 0.000000 985 +prolog 0 13 4.382027 0.000000 1155 +went 0 12 4.465908 0.000000 1279 +sens 0 11 4.553877 0.000000 1305 +induct 0 11 4.553877 0.000000 1304 +refin 0 11 4.553877 0.000000 1363 +acquisit 0 10 4.653960 0.000000 1465 +interestsmi 0 10 4.653960 0.000000 1462 +town 0 10 4.653960 0.000000 1458 +moonei 0 9 4.753590 0.000000 1520 +extract 0 8 4.875197 0.000000 1728 +empir 0 8 4.875197 0.000000 1722 +grew 0 8 4.875197 0.000000 1742 +illinoi 0 7 5.010635 0.000000 1941 +raymond 0 5 5.347108 0.000000 2313 +began 0 5 5.347108 0.000000 2498 +disambigu 0 4 5.568345 0.000000 2899 +bayesian 0 4 5.568345 0.000000 2671 +urbana 0 3 5.857933 0.000000 3879 +primarilyin 0 3 5.857933 0.000000 3832 +parser 0 3 5.857933 0.000000 3141 +myph 0 3 5.857933 0.000000 3880 +champaign 0 2 6.263398 0.000000 5671 +lexicon 0 2 6.263398 0.000000 5651 +highschool 0 2 6.263398 0.000000 5672 +homepageraymond 0 1 6.957497 0.000000 13714 +mooneyassoci 0 1 6.957497 0.000000 13715 +informationfal 0 1 6.957497 0.000000 13716 +learningspr 0 1 6.957497 0.000000 13717 +iiperson 0 1 6.957497 0.000000 13718 +historyi 0 1 6.957497 0.000000 13719 +fallon 0 1 6.957497 0.000000 13720 +wherestart 0 1 6.957497 0.000000 13721 +fallontownship 0 1 6.957497 0.000000 13722 +urbanato 0 1 6.957497 0.000000 13723 +learninggroup 0 1 6.957497 0.000000 13724 +gerald 0 1 6.957497 0.000000 13725 +dejong 0 1 6.957497 0.000000 13726 +meadowfir 0 1 6.957497 0.000000 13727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..5299002b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +world 0 115 2.197225 0.000000 126 +specif 0 106 2.197225 0.000000 106 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +homepag 0 93 2.397895 0.000000 148 +control 0 82 2.484907 0.000000 164 +resourc 0 81 2.484907 0.000000 172 +learn 0 86 2.484907 0.000000 170 +educ 0 86 2.484907 0.000000 191 +appear 0 78 2.564949 0.000000 210 +dynam 0 76 2.564949 0.000000 194 +state 0 76 2.564949 0.000000 207 +intellig 0 72 2.639057 0.000000 225 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +local 0 55 2.944439 0.000000 334 +visitor 0 49 3.044522 0.000000 371 +must 0 40 3.258097 0.000000 442 +game 0 36 3.367296 0.000000 498 +return 0 34 3.401197 0.000000 502 +obtain 0 33 3.433987 0.000000 534 +neural 0 30 3.555348 0.000000 578 +domain 0 30 3.555348 0.000000 564 +postal 0 30 3.555348 0.000000 580 +utc 0 27 3.637586 0.000000 629 +task 0 25 3.737670 0.000000 678 +sport 0 25 3.737670 0.000000 683 +decis 0 23 3.806662 0.000000 728 +sequenc 0 23 3.806662 0.000000 734 +sequenti 0 22 3.850148 0.000000 745 +rout 0 21 3.912023 0.000000 793 +alloc 0 20 3.951244 0.000000 821 +agent 0 18 4.060443 0.000000 910 +upon 0 16 4.174387 0.000000 978 +action 0 15 4.248495 0.000000 1038 +universityof 0 15 4.248495 0.000000 1061 +dave 0 14 4.317488 0.000000 1098 +finit 0 14 4.317488 0.000000 1106 +misc 0 13 4.382027 0.000000 1124 +evolv 0 12 4.465908 0.000000 1223 +enter 0 10 4.653960 0.000000 1454 +total 0 10 4.653960 0.000000 1398 +observ 0 9 4.753590 0.000000 1578 +character 0 8 4.875197 0.000000 1767 +canb 0 7 5.010635 0.000000 1846 +highest 0 4 5.568345 0.000000 2950 +thesystem 0 3 5.857933 0.000000 3881 +scenario 0 2 6.263398 0.000000 5524 +geneticalgorithm 0 2 6.263398 0.000000 5673 +amparticularli 0 2 6.263398 0.000000 5558 +unavail 0 2 6.263398 0.000000 5046 +tulan 0 2 6.263398 0.000000 5559 +moriarti 0 1 6.957497 0.000000 13728 +moriartydav 0 1 6.957497 0.000000 13729 +researchsequenti 0 1 6.957497 0.000000 13730 +problemsinclud 0 1 6.957497 0.000000 13731 +stateof 0 1 6.957497 0.000000 13732 +selectanoth 0 1 6.957497 0.000000 13733 +payoff 0 1 6.957497 0.000000 13734 +madeor 0 1 6.957497 0.000000 13735 +thesequ 0 1 6.957497 0.000000 13736 +cumulativepayoff 0 1 6.957497 0.000000 13737 +iscurr 0 1 6.957497 0.000000 13738 +costli 0 1 6.957497 0.000000 13739 +havestudi 0 1 6.957497 0.000000 13740 +constraintsatisfact 0 1 6.957497 0.000000 13741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..f46455ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +educ 0 86 2.484907 0.000000 191 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +map 0 39 3.258097 0.000000 452 +campu 0 27 3.637586 0.000000 623 +mine 0 26 3.688879 0.000000 654 +reach 0 24 3.761200 0.000000 688 +edulast 0 17 4.110874 0.000000 927 +utah 0 9 4.753590 0.000000 1585 +eduresearch 0 6 5.164786 0.000000 2205 +trail 0 6 5.164786 0.000000 2071 +mehom 0 4 5.568345 0.000000 2979 +wade 0 1 6.957497 0.000000 13742 +mwbarn 0 1 6.957497 0.000000 13743 +barnesm 0 1 6.957497 0.000000 13744 +barnesmwbarn 0 1 6.957497 0.000000 13745 +workhelp 0 1 6.957497 0.000000 13746 +pagestyp 0 1 6.957497 0.000000 13747 +literatureliteratur 0 1 6.957497 0.000000 13748 +notesclassesbackground 0 1 6.957497 0.000000 13749 +informationph 0 1 6.957497 0.000000 13750 +tanglebriar 0 1 6.957497 0.000000 13751 +yete 0 1 6.957497 0.000000 13752 +eduauthor 0 1 6.957497 0.000000 13753 +barnesemail 0 1 6.957497 0.000000 13754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..f718f8a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +teach 0 108 2.197225 0.000000 112 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +resum 0 79 2.564949 0.000000 217 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +receiv 0 66 2.708050 0.000000 244 +abstract 0 62 2.772589 0.000000 276 +creat 0 63 2.772589 0.000000 277 +room 1 59 2.833213 2.833213 301 +summer 0 56 2.890372 0.000000 311 +faculti 0 56 2.890372 0.000000 325 +direct 0 57 2.890372 0.000000 316 +profession 0 51 2.995732 0.000000 345 +right 0 48 3.044522 0.000000 363 +author 0 39 3.258097 0.000000 450 +vita 0 38 3.295837 0.000000 473 +feel 0 37 3.332205 0.000000 483 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +travel 0 30 3.555348 0.000000 579 +chair 0 29 3.583519 0.000000 596 +full 0 28 3.610918 0.000000 615 +load 0 28 3.610918 0.000000 601 +reach 0 24 3.761200 0.000000 688 +brows 0 23 3.806662 0.000000 726 +reserv 0 20 3.951244 0.000000 808 +els 0 19 4.007333 0.000000 843 +spend 0 19 4.007333 0.000000 850 +account 0 18 4.060443 0.000000 882 +senior 0 14 4.317488 0.000000 1120 +settimeout 0 5 5.347108 0.000000 2536 +dale 0 4 5.568345 0.000000 2687 +seed 0 4 5.568345 0.000000 2984 +websit 0 4 5.568345 0.000000 2726 +timertwo 0 4 5.568345 0.000000 2985 +oftexa 0 4 5.568345 0.000000 3003 +whichcontain 0 4 5.568345 0.000000 2714 +scrollit_rl 0 3 5.857933 0.000000 3882 +retir 0 2 6.263398 0.000000 5674 +nell 0 1 6.957497 0.000000 13755 +pagesunivers 0 1 6.957497 0.000000 13756 +departmentwelcom 0 1 6.957497 0.000000 13757 +utaustin 0 1 6.957497 0.000000 13758 +fromful 0 1 6.957497 0.000000 13759 +falland 0 1 6.957497 0.000000 13760 +ofdissert 0 1 6.957497 0.000000 13761 +memento 0 1 6.957497 0.000000 13762 +nontechn 0 1 6.957497 0.000000 13763 +anycorrespond 0 1 6.957497 0.000000 13764 +ndale 0 1 6.957497 0.000000 13765 +profilepublicationsresearch 0 1 6.957497 0.000000 13766 +interestsperson 0 1 6.957497 0.000000 13767 +interestsnel 0 1 6.957497 0.000000 13768 +westlak 0 1 6.957497 0.000000 13769 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..93dfe24c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +design 0 213 1.386294 0.000000 25 +support 0 132 1.945910 0.000000 83 +note 0 142 1.945910 0.000000 67 +pleas 0 113 2.197225 0.000000 114 +view 0 70 2.708050 0.000000 254 +browser 0 56 2.890372 0.000000 313 +without 0 50 3.044522 0.000000 370 +netscap 0 44 3.135494 0.000000 395 +keep 0 44 3.135494 0.000000 409 +download 0 36 3.367296 0.000000 489 +either 0 35 3.401197 0.000000 506 +mind 0 27 3.637586 0.000000 632 +background 0 25 3.737670 0.000000 664 +frame 0 24 3.761200 0.000000 684 +color 0 22 3.850148 0.000000 762 +navig 0 21 3.912023 0.000000 796 +choos 0 16 4.174387 0.000000 964 +pretti 0 13 4.382027 0.000000 1191 +latter 0 9 4.753590 0.000000 1522 +chosen 0 6 5.164786 0.000000 1984 +blame 0 3 5.857933 0.000000 3636 +neeraj 0 2 6.263398 0.000000 5577 +obnoxi 0 1 6.957497 0.000000 13770 +chartreus 0 1 6.957497 0.000000 13771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..a0491a77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +sciencesunivers 0 37 3.332205 0.000000 486 +log 0 19 4.007333 0.000000 857 +kumar 0 9 4.753590 0.000000 1506 +sciencedepart 0 6 5.164786 0.000000 2172 +natarajan 0 2 6.263398 0.000000 4377 +austini 0 2 6.263398 0.000000 5527 +gnan 0 1 6.957497 0.000000 13772 +pagegnana 0 1 6.957497 0.000000 13773 +edufind 0 1 6.957497 0.000000 13774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..796ca574 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +number 0 130 2.079442 0.000000 97 +look 1 107 2.197225 2.197225 115 +pictur 0 89 2.397895 0.000000 160 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +visitor 0 49 3.044522 0.000000 371 +term 0 43 3.178054 0.000000 411 +queri 0 33 3.433987 0.000000 524 +art 0 29 3.583519 0.000000 593 +quit 0 27 3.637586 0.000000 633 +altern 0 26 3.688879 0.000000 641 +output 0 21 3.912023 0.000000 788 +medic 0 17 4.110874 0.000000 958 +doesn 0 15 4.248495 0.000000 1055 +score 0 15 4.248495 0.000000 1017 +typic 0 11 4.553877 0.000000 1360 +hit 0 7 5.010635 0.000000 1965 +arora 1 4 5.568345 5.568345 2658 +ters 0 3 5.857933 0.000000 3297 +nimar 1 2 6.263398 6.263398 4188 +singh 0 2 6.263398 0.000000 5675 +knowwhat 0 2 6.263398 0.000000 5456 +clearer 0 2 6.263398 0.000000 5676 +bookmarksto 0 1 6.957497 0.000000 13775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..87f9ff93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +languag 1 227 1.386294 1.386294 26 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +associ 0 93 2.397895 0.000000 151 +learn 0 86 2.484907 0.000000 170 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +resum 0 79 2.564949 0.000000 217 +meet 0 72 2.639057 0.000000 229 +prof 0 64 2.772589 0.000000 273 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +week 0 52 2.995732 0.000000 343 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +discuss 0 45 3.135494 0.000000 399 +third 0 43 3.178054 0.000000 412 +probabl 0 40 3.258097 0.000000 455 +close 0 38 3.295837 0.000000 465 +ofth 0 36 3.367296 0.000000 491 +everi 0 34 3.401197 0.000000 519 +print 0 34 3.401197 0.000000 503 +neural 0 30 3.555348 0.000000 578 +particip 0 29 3.583519 0.000000 589 +usual 0 28 3.610918 0.000000 608 +propos 0 28 3.610918 0.000000 602 +utc 0 27 3.637586 0.000000 629 +previous 0 17 4.110874 0.000000 923 +coordin 0 13 4.382027 0.000000 1182 +mari 0 12 4.465908 0.000000 1266 +acquisit 0 10 4.653960 0.000000 1465 +moonei 0 9 4.753590 0.000000 1520 +risto 0 9 4.753590 0.000000 1523 +linguist 0 9 4.753590 0.000000 1593 +miikkulainen 0 8 4.875197 0.000000 1667 +thompson 0 6 5.164786 0.000000 2049 +elain 0 5 5.347108 0.000000 2496 +tang 0 5 5.347108 0.000000 2409 +hermjakob 0 3 5.857933 0.000000 3876 +poon 0 3 5.857933 0.000000 3820 +cindi 0 3 5.857933 0.000000 3830 +signll 0 3 5.857933 0.000000 3877 +groupat 0 2 6.263398 0.000000 5677 +bobbi 0 2 6.263398 0.000000 5678 +califf 0 2 6.263398 0.000000 5664 +marti 0 2 6.263398 0.000000 5679 +mayberri 0 2 6.263398 0.000000 5641 +rupert 0 2 6.263398 0.000000 5680 +acquist 0 1 6.957497 0.000000 13776 +groupnatur 0 1 6.957497 0.000000 13777 +austinw 0 1 6.957497 0.000000 13778 +acquisitionand 0 1 6.957497 0.000000 13779 +havedrawn 0 1 6.957497 0.000000 13780 +bryant 0 1 6.957497 0.000000 13781 +ataustinlast 0 1 6.957497 0.000000 13782 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..40cf4637 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +sourc 0 77 2.564949 0.000000 201 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +interact 0 62 2.772589 0.000000 270 +function 0 62 2.772589 0.000000 275 +detail 0 57 2.890372 0.000000 321 +visitor 0 49 3.044522 0.000000 371 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +neural 1 30 3.555348 3.555348 578 +utc 0 27 3.637586 0.000000 629 +decis 0 23 3.806662 0.000000 728 +self 0 22 3.850148 0.000000 761 +alumni 0 21 3.912023 0.000000 807 +newsgroup 0 21 3.912023 0.000000 783 +supervis 0 20 3.951244 0.000000 840 +hypertext 0 19 4.007333 0.000000 865 +concentr 0 18 4.060443 0.000000 906 +demo 0 18 4.060443 0.000000 888 +later 0 15 4.248495 0.000000 1043 +evolv 0 12 4.465908 0.000000 1223 +genet 0 10 4.653960 0.000000 1409 +ataustin 0 9 4.753590 0.000000 1610 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +poster 0 7 5.010635 0.000000 1814 +schema 0 6 5.164786 0.000000 1988 +groupth 0 5 5.347108 0.000000 2549 +net 0 4 5.568345 0.000000 2741 +episod 0 4 5.568345 0.000000 2747 +cortic 0 3 5.857933 0.000000 3857 +privat 0 3 5.857933 0.000000 3496 +andcognit 0 2 6.263398 0.000000 5681 +ristomiikkulainen 0 1 6.957497 0.000000 13783 +basedvis 0 1 6.957497 0.000000 13784 +mapbelow 0 1 6.957497 0.000000 13785 +thecortex 0 1 6.957497 0.000000 13786 +linkswusagemartym 0 1 6.957497 0.000000 13787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..cc8176ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +problem 0 147 1.945910 0.000000 75 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +graphic 0 90 2.397895 0.000000 147 +associ 0 93 2.397895 0.000000 151 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +server 0 76 2.564949 0.000000 204 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +interact 0 62 2.772589 0.000000 270 +artifici 0 63 2.772589 0.000000 280 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +physic 0 47 3.091042 0.000000 377 +vita 0 38 3.295837 0.000000 473 +connect 0 37 3.332205 0.000000 485 +common 0 30 3.555348 0.000000 574 +measur 0 28 3.610918 0.000000 609 +univ 0 28 3.610918 0.000000 617 +honor 0 23 3.806662 0.000000 729 +director 0 22 3.850148 0.000000 767 +unit 0 21 3.912023 0.000000 779 +expert 0 20 3.951244 0.000000 833 +lisp 1 18 4.060443 4.060443 897 +demo 0 18 4.060443 0.000000 888 +atth 0 15 4.248495 0.000000 1019 +english 0 15 4.248495 0.000000 1033 +draw 0 14 4.317488 0.000000 1086 +convert 0 13 4.382027 0.000000 1122 +speech 0 12 4.465908 0.000000 1222 +ofcomput 0 10 4.653960 0.000000 1442 +novak 0 9 4.753590 0.000000 1521 +reus 0 8 4.875197 0.000000 1661 +convers 0 8 4.875197 0.000000 1673 +gordon 0 6 5.164786 0.000000 2032 +shell 0 5 5.347108 0.000000 2353 +diagram 0 5 5.347108 0.000000 2346 +highest 0 4 5.568345 0.000000 2950 +intelligencec 0 4 5.568345 0.000000 2673 +isaac 0 3 5.857933 0.000000 3855 +compilersc 0 2 6.263398 0.000000 4237 +intelligencelaboratori 0 1 6.957497 0.000000 13788 +genericalgorithmssolv 0 1 6.957497 0.000000 13789 +specifiedinformallyartifici 0 1 6.957497 0.000000 13790 +intelligencecurriculum 0 1 6.957497 0.000000 13791 +publicationsemploymentgrantsprofession 0 1 6.957497 0.000000 13792 +honorscurriculum 0 1 6.957497 0.000000 13793 +vitaefre 0 1 6.957497 0.000000 13794 +tmycin 0 1 6.957497 0.000000 13795 +emycin 0 1 6.957497 0.000000 13796 +lispconvers 0 1 6.957497 0.000000 13797 +measurementsoftwar 0 1 6.957497 0.000000 13798 +schemec 0 1 6.957497 0.000000 13799 +programmingweb 0 1 6.957497 0.000000 13800 +linksweatheraddress 0 1 6.957497 0.000000 13801 +ctai 0 1 6.957497 0.000000 13802 +austinaustintexa 0 1 6.957497 0.000000 13803 +faxnovak 0 1 6.957497 0.000000 13804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..4f2d4434 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +welcom 0 122 2.079442 0.000000 99 +pleas 0 113 2.197225 0.000000 114 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +resum 0 79 2.564949 0.000000 217 +free 0 73 2.639057 0.000000 224 +august 0 66 2.708050 0.000000 257 +date 0 51 2.995732 0.000000 344 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +get 0 46 3.091042 0.000000 380 +paul 0 38 3.295837 0.000000 471 +feel 0 37 3.332205 0.000000 483 +download 0 36 3.367296 0.000000 489 +hope 0 28 3.610918 0.000000 610 +except 0 28 3.610918 0.000000 607 +thank 0 23 3.806662 0.000000 721 +size 0 23 3.806662 0.000000 713 +sent 0 22 3.850148 0.000000 763 +beauti 0 18 4.060443 0.000000 912 +anyon 0 17 4.110874 0.000000 916 +stock 0 16 4.174387 0.000000 1007 +wait 0 13 4.382027 0.000000 1168 +remov 0 12 4.465908 0.000000 1225 +enter 0 10 4.653960 0.000000 1454 +chanc 0 7 5.010635 0.000000 1960 +poster 0 7 5.010635 0.000000 1814 +hear 0 7 5.010635 0.000000 1940 +marri 0 7 5.010635 0.000000 1946 +feet 0 5 5.347108 0.000000 2492 +blow 0 5 5.347108 0.000000 2407 +complaint 0 4 5.568345 0.000000 2795 +queen 0 4 5.568345 0.000000 2919 +laugh 0 3 5.857933 0.000000 3659 +panic 0 2 6.263398 0.000000 5682 +gorgeou 0 2 6.263398 0.000000 5082 +meghan 0 1 6.957497 0.000000 13805 +insult 0 1 6.957497 0.000000 13806 +brienhi 0 1 6.957497 0.000000 13807 +wipe 0 1 6.957497 0.000000 13808 +crappi 0 1 6.957497 0.000000 13809 +obrien 0 1 6.957497 0.000000 13810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..1e1aa9b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +databas 0 122 2.079442 0.000000 86 +theunivers 0 21 3.912023 0.000000 797 +systemsth 0 3 5.857933 0.000000 3835 +oguer 1 1 6.957497 6.957497 13811 +gutierrezogu 0 1 6.957497 0.000000 13812 +gutierrezth 0 1 6.957497 0.000000 13813 +austinprojectsomioswwhlinksconfer 0 1 6.957497 0.000000 13814 +worldemail 0 1 6.957497 0.000000 13815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..b125b902 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +like 0 132 1.945910 0.000000 81 +compil 0 122 2.079442 0.000000 96 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +memori 0 101 2.302585 0.000000 139 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +larg 0 82 2.484907 0.000000 168 +info 0 85 2.484907 0.000000 176 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +master 0 76 2.564949 0.000000 216 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +dynam 0 76 2.564949 0.000000 194 +refer 0 78 2.564949 0.000000 203 +effici 0 73 2.639057 0.000000 233 +write 0 72 2.639057 0.000000 222 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +integr 0 67 2.708050 0.000000 245 +collect 0 65 2.772589 0.000000 268 +virtual 0 62 2.772589 0.000000 285 +descript 0 64 2.772589 0.000000 271 +prof 0 64 2.772589 0.000000 273 +written 0 63 2.772589 0.000000 278 +improv 0 62 2.772589 0.000000 289 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +thesi 0 57 2.890372 0.000000 327 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +extens 0 53 2.944439 0.000000 340 +three 0 54 2.944439 0.000000 330 +local 0 55 2.944439 0.000000 334 +hardwar 0 51 2.995732 0.000000 350 +much 0 52 2.995732 0.000000 349 +pointer 0 48 3.044522 0.000000 368 +standard 0 48 3.044522 0.000000 365 +basic 0 50 3.044522 0.000000 360 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +mark 0 44 3.135494 0.000000 403 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +cach 0 41 3.218876 0.000000 432 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +paul 0 38 3.295837 0.000000 471 +open 0 38 3.295837 0.000000 469 +especi 0 36 3.367296 0.000000 496 +survei 0 35 3.401197 0.000000 513 +michael 0 35 3.401197 0.000000 514 +bibliographi 0 34 3.401197 0.000000 518 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +extend 0 32 3.465736 0.000000 539 +storag 0 31 3.496508 0.000000 553 +hard 0 30 3.555348 0.000000 563 +travel 0 30 3.555348 0.000000 579 +releas 0 28 3.610918 0.000000 616 +progress 0 28 3.610918 0.000000 598 +great 0 27 3.637586 0.000000 626 +although 0 25 3.737670 0.000000 667 +store 0 24 3.761200 0.000000 693 +interpret 0 24 3.761200 0.000000 686 +mike 0 24 3.761200 0.000000 703 +highli 0 23 3.806662 0.000000 725 +thread 0 23 3.806662 0.000000 722 +brows 0 23 3.806662 0.000000 726 +hierarchi 0 22 3.850148 0.000000 744 +try 0 22 3.850148 0.000000 764 +scheme 1 20 3.951244 3.951244 818 +alloc 0 20 3.951244 0.000000 821 +supervis 0 20 3.951244 0.000000 840 +portabl 0 20 3.951244 0.000000 819 +mostli 0 19 4.007333 0.000000 869 +scott 0 18 4.060443 0.000000 884 +behavior 0 18 4.060443 0.000000 881 +debug 0 17 4.110874 0.000000 944 +coupl 0 17 4.110874 0.000000 939 +intro 0 17 4.110874 0.000000 915 +georg 0 16 4.174387 0.000000 994 +stock 0 16 4.174387 0.000000 1007 +brief 0 16 4.174387 0.000000 1001 +carl 0 15 4.248495 0.000000 1024 +anywai 0 15 4.248495 0.000000 1047 +ascii 0 15 4.248495 0.000000 1032 +draft 0 14 4.317488 0.000000 1085 +anonym 0 14 4.317488 0.000000 1100 +stephen 0 11 4.553877 0.000000 1342 +persist 0 11 4.553877 0.000000 1367 +smart 0 11 4.553877 0.000000 1352 +alpha 0 11 4.553877 0.000000 1348 +henri 0 10 4.653960 0.000000 1417 +wilson 0 9 4.753590 0.000000 1536 +oop 0 8 4.875197 0.000000 1778 +besid 0 8 4.875197 0.000000 1681 +readm 0 8 4.875197 0.000000 1699 +baker 0 7 5.010635 0.000000 1812 +garbag 0 6 5.164786 0.000000 1986 +oopsla 0 6 5.164786 0.000000 2221 +dougla 0 5 5.347108 0.000000 2471 +decad 0 5 5.347108 0.000000 2455 +overload 0 5 5.347108 0.000000 2403 +whichcontain 0 4 5.568345 0.000000 2714 +ajit 0 3 5.857933 0.000000 3299 +qing 0 3 5.857933 0.000000 3295 +swizzl 0 3 5.857933 0.000000 3883 +andoper 0 3 5.857933 0.000000 3621 +forfault 0 3 5.857933 0.000000 3748 +rscheme 0 3 5.857933 0.000000 3250 +tosupport 0 3 5.857933 0.000000 3613 +providesa 0 3 5.857933 0.000000 3884 +heap 0 3 5.857933 0.000000 3123 +collector 0 2 6.263398 0.000000 5683 +sheetal 0 2 6.263398 0.000000 5684 +kakkad 0 2 6.263398 0.000000 5685 +donovan 0 2 6.263398 0.000000 4371 +kolbl 0 2 6.263398 0.000000 4372 +macro 0 2 6.263398 0.000000 5686 +johnston 0 2 6.263398 0.000000 5638 +repair 0 2 6.263398 0.000000 4198 +damag 0 2 6.263398 0.000000 5687 +checkpoint 0 2 6.263398 0.000000 4205 +programmingsystem 0 2 6.263398 0.000000 5688 +socket 0 2 6.263398 0.000000 4725 +materiali 0 2 6.263398 0.000000 4214 +subdirectori 0 2 6.263398 0.000000 4133 +han 0 2 6.263398 0.000000 4535 +neeli 0 1 6.957497 0.000000 13816 +groupoop 0 1 6.957497 0.000000 13817 +groupthi 0 1 6.957497 0.000000 13818 +studentsin 0 1 6.957497 0.000000 13819 +kaplan 0 1 6.957497 0.000000 13820 +wieren 0 1 6.957497 0.000000 13821 +toimplement 0 1 6.957497 0.000000 13822 +whichattempt 0 1 6.957497 0.000000 13823 +unsoundstudi 0 1 6.957497 0.000000 13824 +generationaland 0 1 6.957497 0.000000 13825 +ongarbag 0 1 6.957497 0.000000 13826 +managementfor 0 1 6.957497 0.000000 13827 +andcompress 0 1 6.957497 0.000000 13828 +noteson 0 1 6.957497 0.000000 13829 +rawascii 0 1 6.957497 0.000000 13830 +andrschemear 0 1 6.957497 0.000000 13831 +thesiscontain 0 1 6.957497 0.000000 13832 +whicharen 0 1 6.957497 0.000000 13833 +sometimesoon 0 1 6.957497 0.000000 13834 +htmlformat 0 1 6.957497 0.000000 13835 +materialfrom 0 1 6.957497 0.000000 13836 +expandedpresent 0 1 6.957497 0.000000 13837 +texinfo 0 1 6.957497 0.000000 13838 +metaobject 0 1 6.957497 0.000000 13839 +backgroundread 0 1 6.957497 0.000000 13840 +fortexa 0 1 6.957497 0.000000 13841 +sftp 0 1 6.957497 0.000000 13842 +notb 0 1 6.957497 0.000000 13843 +boehm 0 1 6.957497 0.000000 13844 +severalgarbag 0 1 6.957497 0.000000 13845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..bdf8ed15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +robert 0 30 3.555348 0.000000 567 +otuomagieaddress 0 1 6.957497 0.000000 13846 +emailotu 0 1 6.957497 0.000000 13847 +eduuniververs 0 1 6.957497 0.000000 13848 +infouniversityth 0 1 6.957497 0.000000 13849 +txa 0 1 6.957497 0.000000 13850 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..1d5cafe0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +file 1 132 1.945910 1.945910 70 +construct 0 139 1.945910 0.000000 82 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +contain 0 81 2.484907 0.000000 174 +requir 0 81 2.484907 0.000000 167 +interfac 0 79 2.564949 0.000000 209 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +virtual 0 62 2.772589 0.000000 285 +creat 0 63 2.772589 0.000000 277 +variou 0 56 2.890372 0.000000 317 +investig 0 51 2.995732 0.000000 353 +physic 0 47 3.091042 0.000000 377 +featur 0 46 3.091042 0.000000 386 +prototyp 0 38 3.295837 0.000000 463 +microsoft 0 38 3.295837 0.000000 468 +manual 0 35 3.401197 0.000000 504 +dissert 0 32 3.465736 0.000000 549 +compon 0 30 3.555348 0.000000 570 +domain 0 30 3.555348 0.000000 564 +focu 0 30 3.555348 0.000000 571 +specifi 0 30 3.555348 0.000000 568 +effort 0 26 3.688879 0.000000 652 +reduc 0 22 3.850148 0.000000 759 +thu 0 21 3.912023 0.000000 773 +devic 0 16 4.174387 0.000000 1002 +brown 0 16 4.174387 0.000000 977 +draft 0 14 4.317488 0.000000 1085 +transpar 0 11 4.553877 0.000000 1325 +devis 0 10 4.653960 0.000000 1451 +researchi 0 8 4.875197 0.000000 1756 +driver 0 8 4.875197 0.000000 1657 +counter 0 8 4.875197 0.000000 1765 +creation 0 6 5.164786 0.000000 2069 +andimplement 0 4 5.568345 0.000000 3029 +multifunct 0 3 5.857933 0.000000 3826 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +export 0 2 6.263398 0.000000 5689 +manualfor 0 2 6.263398 0.000000 4720 +padgett 0 1 6.957497 0.000000 13851 +padgettdon 0 1 6.957497 0.000000 13852 +softar 0 1 6.957497 0.000000 13853 +powerpointvers 0 1 6.957497 0.000000 13854 +usafax 0 1 6.957497 0.000000 13855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..02e6d0ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +person 0 111 2.197225 0.000000 117 +access 0 102 2.302585 0.000000 136 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +librari 0 87 2.484907 0.000000 181 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +plan 0 65 2.772589 0.000000 272 +automat 0 61 2.833213 0.000000 306 +space 0 57 2.890372 0.000000 310 +standard 0 48 3.044522 0.000000 365 +cool 0 49 3.044522 0.000000 374 +get 0 46 3.091042 0.000000 380 +execut 0 45 3.135494 0.000000 404 +realli 0 40 3.258097 0.000000 444 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +go 0 33 3.433987 0.000000 529 +handl 0 24 3.761200 0.000000 685 +inth 0 22 3.850148 0.000000 741 +binari 0 20 3.951244 0.000000 823 +wrote 0 20 3.951244 0.000000 830 +geometr 0 19 4.007333 0.000000 852 +partit 0 16 4.174387 0.000000 984 +unfortun 0 13 4.382027 0.000000 1170 +solari 0 12 4.465908 0.000000 1238 +guess 0 10 4.653960 0.000000 1443 +elimin 0 9 4.753590 0.000000 1558 +fail 0 8 4.875197 0.000000 1655 +port 0 8 4.875197 0.000000 1766 +reus 0 8 4.875197 0.000000 1661 +bug 0 7 5.010635 0.000000 1801 +philip 0 6 5.164786 0.000000 2005 +templat 0 5 5.347108 0.000000 2311 +anda 0 5 5.347108 0.000000 2416 +suno 0 4 5.568345 0.000000 2790 +screenshot 0 4 5.568345 0.000000 2743 +campbel 0 3 5.857933 0.000000 3272 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +everywher 0 2 6.263398 0.000000 5690 +curli 0 2 6.263398 0.000000 5691 +battlebal 1 1 6.957497 6.957497 13856 +hardinphilip 0 1 6.957497 0.000000 13857 +hardinabout 0 1 6.957497 0.000000 13858 +fallback 0 1 6.957497 0.000000 13859 +multiplay 0 1 6.957497 0.000000 13860 +runsund 0 1 6.957497 0.000000 13861 +graphicssoftwar 0 1 6.957497 0.000000 13862 +programmingto 0 1 6.957497 0.000000 13863 +pahardin 0 1 6.957497 0.000000 13864 +usanetrek 0 1 6.957497 0.000000 13865 +pita 0 1 6.957497 0.000000 13866 +digitaldisast 0 1 6.957497 0.000000 13867 +plaster 0 1 6.957497 0.000000 13868 +congradul 0 1 6.957497 0.000000 13869 +smartest 0 1 6.957497 0.000000 13870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..96764d27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +check 0 115 2.197225 0.000000 118 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +learn 0 86 2.484907 0.000000 170 +librari 0 87 2.484907 0.000000 181 +exampl 0 77 2.564949 0.000000 195 +addit 0 74 2.639057 0.000000 228 +intellig 0 72 2.639057 0.000000 225 +onlin 0 75 2.639057 0.000000 223 +knowledg 0 67 2.708050 0.000000 243 +interact 0 62 2.772589 0.000000 270 +taylor 0 63 2.772589 0.000000 287 +artifici 0 63 2.772589 0.000000 280 +think 0 57 2.890372 0.000000 314 +pointer 0 48 3.044522 0.000000 368 +natur 0 44 3.135494 0.000000 406 +mechan 0 43 3.178054 0.000000 416 +might 0 41 3.218876 0.000000 426 +paul 0 38 3.295837 0.000000 471 +postal 0 30 3.555348 0.000000 580 +neural 0 30 3.555348 0.000000 578 +usual 0 28 3.610918 0.000000 608 +head 0 23 3.806662 0.000000 732 +reflect 0 15 4.248495 0.000000 1034 +dave 0 14 4.317488 0.000000 1098 +hotlist 0 13 4.382027 0.000000 1199 +pascal 0 12 4.465908 0.000000 1213 +evolut 1 11 4.553877 4.553877 1314 +surf 0 11 4.553877 0.000000 1301 +death 0 10 4.653960 0.000000 1457 +handi 0 6 5.164786 0.000000 2111 +mix 0 6 5.164786 0.000000 2200 +studentdepart 0 5 5.347108 0.000000 2505 +explicitli 0 5 5.347108 0.000000 2308 +seriou 0 5 5.347108 0.000000 2252 +wast 0 5 5.347108 0.000000 2537 +austindepart 0 4 5.568345 0.000000 3008 +websit 0 4 5.568345 0.000000 2726 +neuro 0 2 6.263398 0.000000 4265 +mcquestenpaul 0 1 6.957497 0.000000 13871 +mcquestenphd 0 1 6.957497 0.000000 13872 +bepract 0 1 6.957497 0.000000 13873 +paulmcq 0 1 6.957497 0.000000 13874 +forcsp 0 1 6.957497 0.000000 13875 +programmingmor 0 1 6.957497 0.000000 13876 +inmoriarti 0 1 6.957497 0.000000 13877 +atcnr 0 1 6.957497 0.000000 13878 +rome 0 1 6.957497 0.000000 13879 +tout 0 1 6.957497 0.000000 13880 +winer 0 1 6.957497 0.000000 13881 +cynb 0 1 6.957497 0.000000 13882 +humong 0 1 6.957497 0.000000 13883 +knick 0 1 6.957497 0.000000 13884 +knack 0 1 6.957497 0.000000 13885 +nut 0 1 6.957497 0.000000 13886 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..29bfc79e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +summari 0 73 2.639057 0.000000 237 +multimedia 1 68 2.708050 2.708050 258 +finger 0 52 2.995732 0.000000 354 +get 0 46 3.091042 0.000000 380 +protocol 0 45 3.135494 0.000000 407 +log 0 19 4.007333 0.000000 857 +affili 0 13 4.382027 0.000000 1194 +touch 0 12 4.465908 0.000000 1288 +goyal 0 3 5.857933 0.000000 3268 +pawang 0 1 6.957497 0.000000 13887 +pawan 0 1 6.957497 0.000000 13888 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..fce6b823 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +theori 0 111 2.197225 0.000000 127 +center 0 88 2.397895 0.000000 158 +journal 0 83 2.484907 0.000000 183 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +main 0 67 2.708050 0.000000 256 +previou 0 62 2.772589 0.000000 290 +explor 0 58 2.890372 0.000000 324 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +advisor 0 51 2.995732 0.000000 355 +numer 0 49 3.044522 0.000000 369 +visitor 0 49 3.044522 0.000000 371 +physic 0 47 3.091042 0.000000 377 +algebra 0 45 3.135494 0.000000 394 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +societi 0 40 3.258097 0.000000 456 +open 0 38 3.295837 0.000000 469 +vita 0 38 3.295837 0.000000 473 +field 0 37 3.332205 0.000000 482 +represent 0 35 3.401197 0.000000 512 +print 0 34 3.401197 0.000000 503 +obtain 0 33 3.433987 0.000000 534 +curriculum 0 33 3.433987 0.000000 535 +dissert 0 32 3.465736 0.000000 549 +posit 0 31 3.496508 0.000000 552 +spent 0 25 3.737670 0.000000 676 +finish 0 22 3.850148 0.000000 748 +sequenti 0 22 3.850148 0.000000 745 +half 0 21 3.912023 0.000000 776 +previous 0 17 4.110874 0.000000 923 +germani 0 17 4.110874 0.000000 946 +jose 0 16 4.174387 0.000000 976 +georg 0 16 4.174387 0.000000 994 +joint 0 13 4.382027 0.000000 1130 +econom 0 13 4.382027 0.000000 1184 +mellon 0 13 4.382027 0.000000 1179 +usavoic 0 13 4.382027 0.000000 1198 +calcul 0 12 4.465908 0.000000 1268 +carnegi 0 12 4.465908 0.000000 1260 +fill 0 11 4.553877 0.000000 1349 +cryptographi 0 9 4.753590 0.000000 1512 +rel 0 9 4.753590 0.000000 1487 +invari 0 8 4.875197 0.000000 1748 +pittsburgh 0 7 5.010635 0.000000 1938 +quantum 0 6 5.164786 0.000000 2214 +particl 0 5 5.347108 0.000000 2436 +invers 0 4 5.568345 0.000000 2764 +sudarshan 0 3 5.857933 0.000000 3885 +supervisor 0 3 5.857933 0.000000 3754 +quantiz 0 2 6.263398 0.000000 5692 +irreduc 0 2 6.263398 0.000000 4890 +lemk 0 2 6.263398 0.000000 5693 +thephys 0 2 6.263398 0.000000 5694 +symmetri 0 2 6.263398 0.000000 5517 +pecina 0 1 6.957497 0.000000 13889 +orpecina 0 1 6.957497 0.000000 13890 +pecinaabout 0 1 6.957497 0.000000 13891 +innuclear 0 1 6.957497 0.000000 13892 +workedinvestig 0 1 6.957497 0.000000 13893 +gaug 0 1 6.957497 0.000000 13894 +graviti 0 1 6.957497 0.000000 13895 +gravit 0 1 6.957497 0.000000 13896 +unitari 0 1 6.957497 0.000000 13897 +yuval 0 1 6.957497 0.000000 13898 +eman 0 1 6.957497 0.000000 13899 +jurgen 0 1 6.957497 0.000000 13900 +fromcologn 0 1 6.957497 0.000000 13901 +bureau 0 1 6.957497 0.000000 13902 +geologi 0 1 6.957497 0.000000 13903 +seismic 0 1 6.957497 0.000000 13904 +tomographi 0 1 6.957497 0.000000 13905 +hardag 0 1 6.957497 0.000000 13906 +geophys 0 1 6.957497 0.000000 13907 +geophysicist 0 1 6.957497 0.000000 13908 +comerci 0 1 6.957497 0.000000 13909 +solutionsin 0 1 6.957497 0.000000 13910 +chromodynamicsmi 0 1 6.957497 0.000000 13911 +defo 0 1 6.957497 0.000000 13912 +phy 0 1 6.957497 0.000000 13913 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..0886de97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +file 0 132 1.945910 0.000000 70 +welcom 0 122 2.079442 0.000000 99 +find 0 111 2.197225 0.000000 111 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +stuff 0 87 2.484907 0.000000 171 +chang 0 82 2.484907 0.000000 163 +know 1 80 2.564949 2.564949 198 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +window 0 68 2.708050 0.000000 242 +unix 0 58 2.890372 0.000000 308 +think 0 57 2.890372 0.000000 314 +cool 0 49 3.044522 0.000000 374 +could 0 46 3.091042 0.000000 383 +realli 0 40 3.258097 0.000000 444 +mean 0 37 3.332205 0.000000 477 +short 0 36 3.367296 0.000000 499 +staff 0 36 3.367296 0.000000 490 +experiment 0 26 3.688879 0.000000 645 +instal 0 22 3.850148 0.000000 754 +increas 0 20 3.951244 0.000000 829 +anyon 0 17 4.110874 0.000000 916 +protect 0 17 4.110874 0.000000 935 +drive 0 15 4.248495 0.000000 1052 +floor 0 14 4.317488 0.000000 1070 +stori 0 14 4.317488 0.000000 1087 +comic 0 14 4.317488 0.000000 1103 +neat 0 12 4.465908 0.000000 1263 +true 0 10 4.653960 0.000000 1422 +vista 0 10 4.653960 0.000000 1452 +chanc 0 7 5.010635 0.000000 1960 +escap 0 4 5.568345 0.000000 3016 +meyour 0 3 5.857933 0.000000 3858 +blah 0 2 6.263398 0.000000 5695 +drastic 0 2 6.263398 0.000000 4201 +woof 0 1 6.957497 0.000000 13914 +nettl 0 1 6.957497 0.000000 13915 +cornerinfolik 0 1 6.957497 0.000000 13916 +blahblah 0 1 6.957497 0.000000 13917 +eeek 0 1 6.957497 0.000000 13918 +ibm 0 1 6.957497 0.000000 13919 +afteri 0 1 6.957497 0.000000 13920 +theinnoc 0 1 6.957497 0.000000 13921 +buena 0 1 6.957497 0.000000 13922 +movieplex 0 1 6.957497 0.000000 13923 +employan 0 1 6.957497 0.000000 13924 +improb 0 1 6.957497 0.000000 13925 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..bfda7b7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +hall 1 146 1.945910 1.945910 65 +report 0 131 2.079442 0.000000 92 +decemb 0 80 2.564949 0.000000 215 +taylor 0 63 2.772589 0.000000 287 +annual 0 40 3.258097 0.000000 458 +postal 0 30 3.555348 0.000000 580 +greg 1 24 3.761200 3.761200 695 +eduphon 0 15 4.248495 0.000000 1060 +informationemail 0 9 4.753590 0.000000 1564 +austinaustin 0 7 5.010635 0.000000 1966 +plaxton 0 3 5.857933 0.000000 3886 +plaxtongreg 1 1 6.957497 6.957497 13926 +plaxtoncontact 0 1 6.957497 0.000000 13927 +sciencetaylor 0 1 6.957497 0.000000 13928 +profilepubl 0 1 6.957497 0.000000 13929 +plaxtonplaxton 0 1 6.957497 0.000000 13930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..4bf66d1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +theori 0 111 2.197225 0.000000 127 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +learn 1 86 2.484907 2.484907 170 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +knowledg 0 67 2.708050 0.000000 243 +abstract 0 62 2.772589 0.000000 276 +artifici 0 63 2.772589 0.000000 280 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +investig 0 51 2.995732 0.000000 353 +case 0 51 2.995732 0.000000 351 +california 0 46 3.091042 0.000000 388 +answer 0 45 3.135494 0.000000 391 +editor 0 41 3.218876 0.000000 433 +autom 0 41 3.218876 0.000000 434 +press 0 42 3.218876 0.000000 419 +award 0 34 3.401197 0.000000 523 +tech 0 35 3.401197 0.000000 515 +concept 0 32 3.465736 0.000000 537 +domain 0 30 3.555348 0.000000 564 +scale 0 28 3.610918 0.000000 613 +rule 0 26 3.688879 0.000000 638 +fellow 0 24 3.761200 0.000000 701 +honor 0 23 3.806662 0.000000 729 +predict 0 19 4.007333 0.000000 855 +young 0 16 4.174387 0.000000 991 +cambridg 0 16 4.174387 0.000000 1008 +weak 0 13 4.382027 0.000000 1159 +hotlist 0 13 4.382027 0.000000 1199 +bruce 0 12 4.465908 0.000000 1226 +classif 0 9 4.753590 0.000000 1586 +aaai 0 8 4.875197 0.000000 1750 +irvin 0 8 4.875197 0.000000 1660 +presidenti 0 8 4.875197 0.000000 1737 +boundari 0 7 5.010635 0.000000 1929 +heurist 0 6 5.164786 0.000000 2125 +porter 0 5 5.347108 0.000000 2293 +complementari 0 5 5.347108 0.000000 2523 +presentarea 0 4 5.568345 0.000000 3026 +thetim 0 3 5.857933 0.000000 3581 +preced 0 3 5.857933 0.000000 3107 +researchinterest 0 2 6.263398 0.000000 5123 +rickel 0 2 6.263398 0.000000 5648 +andpostscript 0 2 6.263398 0.000000 5696 +brant 0 2 6.263398 0.000000 5652 +warrant 0 2 6.263398 0.000000 5697 +bareiss 0 2 6.263398 0.000000 5646 +porterassoci 0 1 6.957497 0.000000 13931 +interestartifici 0 1 6.957497 0.000000 13932 +researchhead 0 1 6.957497 0.000000 13933 +basesand 0 1 6.957497 0.000000 13934 +aait 0 1 6.957497 0.000000 13935 +holt 0 1 6.957497 0.000000 13936 +abstractand 0 1 6.957497 0.000000 13937 +reportport 0 1 6.957497 0.000000 13938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..062d9fd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +introduct 0 126 2.079442 0.000000 87 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +book 0 99 2.302585 0.000000 131 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +member 0 84 2.484907 0.000000 165 +refer 0 78 2.564949 0.000000 203 +state 0 76 2.564949 0.000000 207 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +logic 0 71 2.639057 0.000000 230 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +differ 0 66 2.708050 0.000000 253 +written 0 63 2.772589 0.000000 278 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +improv 0 62 2.772589 0.000000 289 +simpl 0 60 2.833213 0.000000 298 +reason 0 57 2.890372 0.000000 318 +variou 0 56 2.890372 0.000000 317 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +overview 0 56 2.890372 0.000000 323 +thesi 0 57 2.890372 0.000000 327 +found 0 53 2.944439 0.000000 337 +give 0 50 3.044522 0.000000 359 +basic 0 50 3.044522 0.000000 360 +possibl 0 47 3.091042 0.000000 378 +electron 0 47 3.091042 0.000000 379 +understand 0 47 3.091042 0.000000 384 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +describ 0 45 3.135494 0.000000 400 +fast 0 42 3.218876 0.000000 429 +map 0 39 3.258097 0.000000 452 +correct 0 38 3.295837 0.000000 462 +concurr 0 34 3.401197 0.000000 501 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +chapter 0 32 3.465736 0.000000 536 +specifi 0 30 3.555348 0.000000 568 +power 0 30 3.555348 0.000000 573 +compon 0 30 3.555348 0.000000 570 +synchron 0 29 3.583519 0.000000 588 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +proof 0 23 3.806662 0.000000 720 +emphasi 0 22 3.850148 0.000000 755 +inth 0 22 3.850148 0.000000 741 +sort 0 22 3.850148 0.000000 738 +divis 0 21 3.912023 0.000000 803 +safeti 0 20 3.951244 0.000000 817 +assum 0 19 4.007333 0.000000 845 +prove 0 19 4.007333 0.000000 848 +stand 0 18 4.060443 0.000000 891 +adam 0 17 4.110874 0.000000 934 +former 0 17 4.110874 0.000000 956 +weslei 0 16 4.174387 0.000000 983 +reflect 0 15 4.248495 0.000000 1034 +hybrid 0 15 4.248495 0.000000 1057 +manner 0 14 4.317488 0.000000 1074 +balanc 0 14 4.317488 0.000000 1112 +circuit 0 13 4.382027 0.000000 1131 +recurs 0 13 4.382027 0.000000 1127 +composit 0 13 4.382027 0.000000 1150 +addison 0 12 4.465908 0.000000 1230 +verifi 0 12 4.465908 0.000000 1261 +summar 0 11 4.553877 0.000000 1295 +length 0 10 4.653960 0.000000 1400 +equal 0 10 4.653960 0.000000 1424 +arithmet 0 10 4.653960 0.000000 1388 +notat 0 9 4.753590 0.000000 1489 +ataustin 0 9 4.753590 0.000000 1610 +tempor 0 9 4.753590 0.000000 1584 +misra 0 7 5.010635 0.000000 1856 +restrict 0 6 5.164786 0.000000 2129 +multiprogram 0 6 5.164786 0.000000 2010 +jayadev 0 4 5.568345 0.000000 3006 +jacob 0 4 5.568345 0.000000 2667 +rigor 0 4 5.568345 0.000000 3030 +anddistribut 0 4 5.568345 0.000000 3031 +ofprogram 0 4 5.568345 0.000000 2624 +monograph 0 4 5.568345 0.000000 2860 +uniti 1 3 5.857933 5.857933 3812 +kornerup 0 3 5.857933 0.000000 3215 +marku 0 3 5.857933 0.000000 3872 +cohen 0 3 5.857933 0.000000 3652 +rajeev 0 3 5.857933 0.000000 3152 +checker 0 3 5.857933 0.000000 3644 +parallelalgorithm 0 3 5.857933 0.000000 3249 +alsoavail 0 3 5.857933 0.000000 3887 +powerlist 0 2 6.263398 0.000000 5660 +seuss 0 2 6.263398 0.000000 5662 +carruth 0 2 6.263398 0.000000 5495 +ofpap 0 2 6.263398 0.000000 4329 +erni 0 2 6.263398 0.000000 5104 +joshi 0 2 6.263398 0.000000 4202 +chandi 0 2 6.263398 0.000000 5661 +fourier 0 2 6.263398 0.000000 5698 +offspr 0 2 6.263398 0.000000 5699 +froma 0 2 6.263398 0.000000 4862 +basedprogram 0 2 6.263398 0.000000 5700 +kaltenbach 0 1 6.957497 0.000000 13939 +thepowerlist 0 1 6.957497 0.000000 13940 +austinpsp 0 1 6.957497 0.000000 13941 +austinthi 0 1 6.957497 0.000000 13942 +deriveparallel 0 1 6.957497 0.000000 13943 +issupervis 0 1 6.957497 0.000000 13944 +groupinclud 0 1 6.957497 0.000000 13945 +edgar 0 1 6.957497 0.000000 13946 +knapp 0 1 6.957497 0.000000 13947 +ingolf 0 1 6.957497 0.000000 13948 +krger 0 1 6.957497 0.000000 13949 +josyula 0 1 6.957497 0.000000 13950 +staskauska 0 1 6.957497 0.000000 13951 +publicationsbelow 0 1 6.957497 0.000000 13952 +wherev 0 1 6.957497 0.000000 13953 +topap 0 1 6.957497 0.000000 13954 +thenot 0 1 6.957497 0.000000 13955 +inchandi 0 1 6.957497 0.000000 13956 +amanuscript 0 1 6.957497 0.000000 13957 +newun 0 1 6.957497 0.000000 13958 +operatorco 0 1 6.957497 0.000000 13959 +forrefer 0 1 6.957497 0.000000 13960 +asymbol 0 1 6.957497 0.000000 13961 +forfinit 0 1 6.957497 0.000000 13962 +unityverifi 0 1 6.957497 0.000000 13963 +toinclud 0 1 6.957497 0.000000 13964 +twodiffer 0 1 6.957497 0.000000 13965 +succinct 0 1 6.957497 0.000000 13966 +givesnumer 0 1 6.957497 0.000000 13967 +batcher 0 1 6.957497 0.000000 13968 +asadd 0 1 6.957497 0.000000 13969 +multipli 0 1 6.957497 0.000000 13970 +addercircuit 0 1 6.957497 0.000000 13971 +programscan 0 1 6.957497 0.000000 13972 +speciallyhypercub 0 1 6.957497 0.000000 13973 +caninterfer 0 1 6.957497 0.000000 13974 +adisciplin 0 1 6.957497 0.000000 13975 +genrat 0 1 6.957497 0.000000 13976 +callsfor 0 1 6.957497 0.000000 13977 +anexperi 0 1 6.957497 0.000000 13978 +ingolfkrg 0 1 6.957497 0.000000 13979 +thepsp 0 1 6.957497 0.000000 13980 +sitejacob 0 1 6.957497 0.000000 13981 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..c3d0ad13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +construct 0 139 1.945910 0.000000 82 +spring 0 131 2.079442 0.000000 88 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +commun 0 95 2.397895 0.000000 157 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +server 0 76 2.564949 0.000000 204 +appli 0 71 2.639057 0.000000 226 +line 0 75 2.639057 0.000000 231 +name 0 72 2.639057 0.000000 220 +visit 0 63 2.772589 0.000000 288 +room 0 59 2.833213 0.000000 301 +march 0 61 2.833213 0.000000 295 +electr 0 38 3.295837 0.000000 461 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +client 0 25 3.737670 0.000000 679 +sign 0 16 4.174387 0.000000 970 +guest 1 12 4.465908 4.465908 1220 +huang 0 12 4.465908 0.000000 1202 +card 0 10 4.653960 0.000000 1435 +placement 0 10 4.653960 0.000000 1420 +telecommun 0 9 4.753590 0.000000 1565 +job 0 8 4.875197 0.000000 1702 +postcard 0 6 5.164786 0.000000 2181 +attract 0 5 5.347108 0.000000 2356 +hawaii 0 3 5.857933 0.000000 3888 +csc 0 3 5.857933 0.000000 3183 +nankai 0 2 6.263398 0.000000 5147 +tianjin 0 2 6.263398 0.000000 5148 +junk 0 2 6.263398 0.000000 5701 +qime 1 1 6.957497 6.957497 13982 +edureceiv 0 1 6.957497 0.000000 13983 +univeris 0 1 6.957497 0.000000 13984 +manoa 0 1 6.957497 0.000000 13985 +hawaiiwork 0 1 6.957497 0.000000 13986 +austincours 0 1 6.957497 0.000000 13987 +teamweb 0 1 6.957497 0.000000 13988 +utcssadaili 0 1 6.957497 0.000000 13989 +texanstock 0 1 6.957497 0.000000 13990 +picturesimageschines 0 1 6.957497 0.000000 13991 +popsend 0 1 6.957497 0.000000 13992 +jobtrakut 0 1 6.957497 0.000000 13993 +gopherftp 0 1 6.957497 0.000000 13994 +newstelnet 0 1 6.957497 0.000000 13995 +cschen 0 1 6.957497 0.000000 13996 +staffyour 0 1 6.957497 0.000000 13997 +commentsguest 0 1 6.957497 0.000000 13998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..b2352d0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +applic 0 170 1.791759 0.000000 56 +texa 0 160 1.791759 0.000000 64 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +professor 0 137 1.945910 0.000000 76 +document 0 121 2.079442 0.000000 89 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +reason 1 57 2.890372 2.890372 318 +index 0 56 2.890372 0.000000 309 +sever 0 56 2.890372 0.000000 322 +pointer 0 48 3.044522 0.000000 368 +visitor 0 49 3.044522 0.000000 371 +directori 0 45 3.135494 0.000000 396 +robot 0 36 3.367296 0.000000 497 +bibliographi 0 34 3.401197 0.000000 518 +represent 0 35 3.401197 0.000000 512 +dissert 0 32 3.465736 0.000000 549 +limit 0 29 3.583519 0.000000 585 +alumni 0 21 3.912023 0.000000 807 +supervis 0 20 3.951244 0.000000 840 +spatial 0 16 4.174387 0.000000 988 +atth 0 15 4.248495 0.000000 1019 +easili 0 14 4.317488 0.000000 1077 +qualit 0 11 4.553877 0.000000 1362 +tour 0 11 4.553877 0.000000 1307 +ataustin 0 9 4.753590 0.000000 1610 +yellow 0 9 4.753590 0.000000 1601 +qsim 0 3 5.857933 0.000000 3862 +kuiper 0 3 5.857933 0.000000 3794 +thephys 0 2 6.263398 0.000000 5694 +ourresearch 0 1 6.957497 0.000000 13999 +utexasqualit 0 1 6.957497 0.000000 14000 +utexasth 0 1 6.957497 0.000000 14001 +intelligentrobot 0 1 6.957497 0.000000 14002 +knowledgerepresent 0 1 6.957497 0.000000 14003 +algernon 0 1 6.957497 0.000000 14004 +benjaminkuip 0 1 6.957497 0.000000 14005 +areadescript 0 1 6.957497 0.000000 14006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..fae21a70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +introduct 0 126 2.079442 0.000000 87 +pleas 0 113 2.197225 0.000000 114 +technic 0 100 2.302585 0.000000 140 +comment 0 93 2.397895 0.000000 146 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +institut 0 84 2.484907 0.000000 187 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +java 0 70 2.708050 0.000000 248 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +still 0 50 3.044522 0.000000 362 +around 0 43 3.178054 0.000000 415 +feel 0 37 3.332205 0.000000 483 +game 0 36 3.367296 0.000000 498 +copyright 0 36 3.367296 0.000000 495 +john 0 33 3.433987 0.000000 532 +linux 0 27 3.637586 0.000000 631 +sport 0 25 3.737670 0.000000 683 +thank 0 23 3.806662 0.000000 721 +brows 0 23 3.806662 0.000000 726 +self 0 22 3.850148 0.000000 761 +leav 0 21 3.912023 0.000000 772 +els 0 19 4.007333 0.000000 843 +modif 0 17 4.110874 0.000000 913 +joke 0 8 4.875197 0.000000 1620 +settimeout 0 5 5.347108 0.000000 2536 +guestbook 0 5 5.347108 0.000000 2475 +seed 0 4 5.568345 0.000000 2984 +timertwo 0 4 5.568345 0.000000 2985 +scrollit_rl 0 3 5.857933 0.000000 3882 +underconstruct 0 3 5.857933 0.000000 3889 +com 0 2 6.263398 0.000000 5156 +qiang 0 1 6.957497 0.000000 14007 +seriousjunk 0 1 6.957497 0.000000 14008 +realjunk 0 1 6.957497 0.000000 14009 +struggleforliv 0 1 6.957497 0.000000 14010 +qzuo 0 1 6.957497 0.000000 14011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..9f6579f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +teach 0 108 2.197225 0.000000 112 +institut 0 84 2.484907 0.000000 187 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +laboratori 0 63 2.772589 0.000000 292 +abstract 0 62 2.772589 0.000000 276 +taylor 0 63 2.772589 0.000000 287 +type 0 61 2.833213 0.000000 296 +semest 0 58 2.890372 0.000000 312 +directori 0 45 3.135494 0.000000 396 +robert 0 30 3.555348 0.000000 567 +experiment 0 26 3.688879 0.000000 645 +runtim 0 19 4.007333 0.000000 858 +less 0 18 4.060443 0.000000 892 +brown 0 16 4.174387 0.000000 977 +eduphon 0 15 4.248495 0.000000 1060 +massachusett 0 14 4.317488 0.000000 1118 +ofcomput 0 10 4.653960 0.000000 1442 +informationemail 0 9 4.753590 0.000000 1564 +pronounc 0 7 5.010635 0.000000 1918 +austinaustin 0 7 5.010635 0.000000 1966 +blumoferdb 0 5 5.347108 0.000000 2324 +bloom 0 4 5.568345 0.000000 2913 +sciencestaylor 0 3 5.857933 0.000000 3814 +bobbi 0 2 6.263398 0.000000 5678 +informationassist 0 2 6.263398 0.000000 5531 +blumoferobert 0 1 6.957497 0.000000 14012 +blumofei 0 1 6.957497 0.000000 14013 +cilkmultithread 0 1 6.957497 0.000000 14014 +hallpost 0 1 6.957497 0.000000 14015 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..724cbfd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +construct 0 139 1.945910 0.000000 82 +analysi 0 124 2.079442 0.000000 98 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +homepag 0 93 2.397895 0.000000 148 +real 0 93 2.397895 0.000000 144 +comment 0 93 2.397895 0.000000 146 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +content 0 59 2.833213 0.000000 302 +rule 0 26 3.688879 0.000000 638 +brows 0 23 3.806662 0.000000 726 +wang 0 21 3.912023 0.000000 790 +theunivers 0 21 3.912023 0.000000 797 +permit 0 16 4.174387 0.000000 962 +candid 0 9 4.753590 0.000000 1606 +publicationsi 0 3 5.857933 0.000000 3827 +wangwelcom 0 1 6.957497 0.000000 14016 +rhwang 0 1 6.957497 0.000000 14017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..64316ab5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +intellig 0 72 2.639057 0.000000 225 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +goal 0 66 2.708050 0.000000 250 +artifici 0 63 2.772589 0.000000 280 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +detail 0 57 2.890372 0.000000 321 +undergradu 0 54 2.944439 0.000000 338 +finger 0 52 2.995732 0.000000 354 +visual 0 48 3.044522 0.000000 372 +vision 0 41 3.218876 0.000000 430 +seminar 0 38 3.295837 0.000000 470 +robot 0 36 3.367296 0.000000 497 +neural 1 30 3.555348 3.555348 578 +computersci 0 30 3.555348 0.000000 562 +postal 0 30 3.555348 0.000000 580 +utc 0 27 3.637586 0.000000 629 +strategi 0 25 3.737670 0.000000 682 +decis 0 23 3.806662 0.000000 728 +self 0 22 3.850148 0.000000 761 +concentr 0 18 4.060443 0.000000 906 +cognit 0 16 4.174387 0.000000 986 +evolv 0 12 4.465908 0.000000 1223 +genet 0 10 4.653960 0.000000 1409 +risto 0 9 4.753590 0.000000 1523 +schema 0 6 5.164786 0.000000 1988 +ucla 0 5 5.347108 0.000000 2502 +oftexa 0 4 5.568345 0.000000 3003 +episod 0 4 5.568345 0.000000 2747 +intereststh 0 3 5.857933 0.000000 3838 +cortex 0 3 5.857933 0.000000 3856 +helsinki 0 2 6.263398 0.000000 5702 +miikkulainenristo 0 1 6.957497 0.000000 14018 +miikkulainenassoci 0 1 6.957497 0.000000 14019 +processeswith 0 1 6.957497 0.000000 14020 +languageacquisit 0 1 6.957497 0.000000 14021 +networkswith 0 1 6.957497 0.000000 14022 +discoversequenti 0 1 6.957497 0.000000 14023 +classessumm 0 1 6.957497 0.000000 14024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..873848b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +rong 1 2 6.263398 6.263398 5703 +bigfoot 1 1 6.957497 6.957497 14025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..22bfcf62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +theori 0 111 2.197225 0.000000 127 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +onlin 0 75 2.639057 0.000000 223 +plan 0 65 2.772589 0.000000 272 +vita 0 38 3.295837 0.000000 473 +random 0 34 3.401197 0.000000 511 +curriculum 0 33 3.433987 0.000000 535 +postal 0 30 3.555348 0.000000 580 +particularli 0 19 4.007333 0.000000 867 +atth 0 15 4.248495 0.000000 1019 +ataustin 0 9 4.753590 0.000000 1610 +andcomput 0 8 4.875197 0.000000 1623 +supervisor 0 3 5.857933 0.000000 3754 +rajaraman 0 2 6.263398 0.000000 5704 +rraj 0 2 6.263398 0.000000 5705 +rajmohan 0 2 6.263398 0.000000 5706 +mydissert 0 2 6.263398 0.000000 5496 +mypubl 0 2 6.263398 0.000000 5707 +linkscontact 0 2 6.263398 0.000000 5708 +pagerajmohan 0 1 6.957497 0.000000 14026 +gregplaxton 0 1 6.957497 0.000000 14027 +incombinator 0 1 6.957497 0.000000 14028 +sciencemiscellan 0 1 6.957497 0.000000 14029 +ephon 0 1 6.957497 0.000000 14030 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..b3abae72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +technolog 0 131 2.079442 0.000000 102 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +librari 0 87 2.484907 0.000000 181 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +colleg 0 61 2.833213 0.000000 300 +telephon 0 50 3.044522 0.000000 373 +movi 1 40 3.258097 3.258097 459 +live 0 40 3.258097 0.000000 451 +ofth 0 36 3.367296 0.000000 491 +computersci 0 30 3.555348 0.000000 562 +weather 0 28 3.610918 0.000000 618 +spent 0 25 3.737670 0.000000 676 +yahoo 0 24 3.761200 0.000000 707 +beij 0 19 4.007333 0.000000 876 +five 0 19 4.007333 0.000000 841 +citi 0 19 4.007333 0.000000 874 +tsinghua 0 13 4.382027 0.000000 1195 +replic 0 12 4.465908 0.000000 1231 +entertain 0 12 4.465908 0.000000 1286 +catalog 0 10 4.653960 0.000000 1431 +login 0 9 4.753590 0.000000 1550 +film 0 8 4.875197 0.000000 1761 +capit 0 7 5.010635 0.000000 1957 +apart 0 7 5.010635 0.000000 1936 +chronicl 0 7 5.010635 0.000000 1952 +river 0 6 5.164786 0.000000 2220 +provinc 0 4 5.568345 0.000000 3009 +republ 0 4 5.568345 0.000000 3032 +sinanet 0 4 5.568345 0.000000 2883 +rong 0 2 6.263398 0.000000 5703 +zhai 0 2 6.263398 0.000000 5709 +utaccess 0 1 6.957497 0.000000 14031 +homepagea 0 1 6.957497 0.000000 14032 +chinachina 0 1 6.957497 0.000000 14033 +jinan 0 1 6.957497 0.000000 14034 +myhometown 0 1 6.957497 0.000000 14035 +shandong 0 1 6.957497 0.000000 14036 +gotmi 0 1 6.957497 0.000000 14037 +ofchina 0 1 6.957497 0.000000 14038 +texasaustin 0 1 6.957497 0.000000 14039 +rent 0 1 6.957497 0.000000 14040 +utcsth 0 1 6.957497 0.000000 14041 +utnetcat 0 1 6.957497 0.000000 14042 +browsabl 0 1 6.957497 0.000000 14043 +austininform 0 1 6.957497 0.000000 14044 +utcat 0 1 6.957497 0.000000 14045 +systemsdynam 0 1 6.957497 0.000000 14046 +graphicsc 0 1 6.957497 0.000000 14047 +logicc 0 1 6.957497 0.000000 14048 +moviesaustin 0 1 6.957497 0.000000 14049 +filmsmicrosoft 0 1 6.957497 0.000000 14050 +cinemania 0 1 6.957497 0.000000 14051 +onlineal 0 1 6.957497 0.000000 14052 +guidehollywood 0 1 6.957497 0.000000 14053 +onlineinternet 0 1 6.957497 0.000000 14054 +databaserog 0 1 6.957497 0.000000 14055 +ebert 0 1 6.957497 0.000000 14056 +moviesvisit 0 1 6.957497 0.000000 14057 +contactmail 0 1 6.957497 0.000000 14058 +aaustin 0 1 6.957497 0.000000 14059 +emailrtan 0 1 6.957497 0.000000 14060 +fingerclick 0 1 6.957497 0.000000 14061 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..e6caffce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +machin 0 129 2.079442 0.000000 95 +teach 0 108 2.197225 0.000000 112 +need 0 98 2.302585 0.000000 135 +academ 0 82 2.484907 0.000000 178 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +servic 0 72 2.639057 0.000000 236 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +dept 0 64 2.772589 0.000000 291 +think 0 57 2.890372 0.000000 314 +much 0 52 2.995732 0.000000 349 +life 0 50 3.044522 0.000000 375 +cool 0 49 3.044522 0.000000 374 +littl 0 39 3.258097 0.000000 454 +realli 0 40 3.258097 0.000000 444 +actual 0 28 3.610918 0.000000 604 +altern 0 26 3.688879 0.000000 641 +alwai 0 24 3.761200 0.000000 691 +wish 0 24 3.761200 0.000000 692 +almost 0 22 3.850148 0.000000 742 +entir 0 20 3.951244 0.000000 811 +nice 0 20 3.951244 0.000000 809 +modern 0 16 4.174387 0.000000 966 +anyth 0 16 4.174387 0.000000 998 +choos 0 16 4.174387 0.000000 964 +opportun 0 13 4.382027 0.000000 1161 +besid 0 8 4.875197 0.000000 1681 +tang 0 5 5.347108 0.000000 2409 +plant 0 5 5.347108 0.000000 2497 +aspir 0 4 5.568345 0.000000 3019 +fear 0 4 5.568345 0.000000 2911 +freedom 0 3 5.857933 0.000000 3890 +rupert 1 2 6.263398 6.263398 5680 +miracl 0 2 6.263398 0.000000 5710 +holi 0 2 6.263398 0.000000 5711 +stimul 0 2 6.263398 0.000000 5712 +empti 0 2 6.263398 0.000000 5478 +truck 0 2 6.263398 0.000000 5713 +wash 0 2 6.263398 0.000000 5714 +strangl 0 1 6.957497 0.000000 14062 +curious 0 1 6.957497 0.000000 14063 +inquiri 0 1 6.957497 0.000000 14064 +delic 0 1 6.957497 0.000000 14065 +depriv 0 1 6.957497 0.000000 14066 +distast 0 1 6.957497 0.000000 14067 +deni 0 1 6.957497 0.000000 14068 +duress 0 1 6.957497 0.000000 14069 +fate 0 1 6.957497 0.000000 14070 +messi 0 1 6.957497 0.000000 14071 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..08d1be6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +homepag 0 93 2.397895 0.000000 148 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +ruwei 0 1 6.957497 0.000000 14072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..386cd77f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +schedul 0 119 2.079442 0.000000 85 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +appli 0 71 2.639057 0.000000 226 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +meet 0 72 2.639057 0.000000 229 +evalu 0 64 2.772589 0.000000 266 +colleg 0 61 2.833213 0.000000 300 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +scientif 0 53 2.944439 0.000000 341 +case 0 51 2.995732 0.000000 351 +numer 0 49 3.044522 0.000000 369 +better 0 45 3.135494 0.000000 401 +http 0 41 3.218876 0.000000 420 +continu 0 39 3.258097 0.000000 448 +tutori 0 39 3.258097 0.000000 437 +robert 0 30 3.555348 0.000000 567 +effort 0 26 3.688879 0.000000 652 +supercomput 0 25 3.737670 0.000000 681 +famili 0 23 3.806662 0.000000 735 +prove 0 19 4.007333 0.000000 848 +former 0 17 4.110874 0.000000 956 +easili 0 14 4.317488 0.000000 1077 +infrastructur 0 12 4.465908 0.000000 1234 +forc 0 10 4.653960 0.000000 1384 +maryland 0 6 5.164786 0.000000 2140 +park 0 6 5.164786 0.000000 2218 +plapack 0 3 5.857933 0.000000 3849 +geijn 0 2 6.263398 0.000000 5715 +appliedmathemat 0 2 6.263398 0.000000 5716 +interestnumer 0 2 6.263398 0.000000 5717 +researchth 0 2 6.263398 0.000000 5492 +rvdg 0 1 6.957497 0.000000 14073 +geijnassoci 0 1 6.957497 0.000000 14074 +oftradit 0 1 6.957497 0.000000 14075 +sequentialmachin 0 1 6.957497 0.000000 14076 +inoth 0 1 6.957497 0.000000 14077 +researchconcentr 0 1 6.957497 0.000000 14078 +forimpl 0 1 6.957497 0.000000 14079 +allowssuch 0 1 6.957497 0.000000 14080 +parallelprocessor 0 1 6.957497 0.000000 14081 +intercom 0 1 6.957497 0.000000 14082 +sl_librari 0 1 6.957497 0.000000 14083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..c760ebbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +know 0 80 2.564949 0.000000 198 +window 0 68 2.708050 0.000000 242 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +visitor 0 49 3.044522 0.000000 371 +india 1 32 3.465736 3.465736 550 +computersci 0 30 3.555348 0.000000 562 +els 0 19 4.007333 0.000000 843 +region 0 19 4.007333 0.000000 875 +universityof 0 15 4.248495 0.000000 1061 +countri 0 15 4.248495 0.000000 1059 +settimeout 0 5 5.347108 0.000000 2536 +seed 1 4 5.568345 5.568345 2984 +abraham 0 4 5.568345 0.000000 2644 +timertwo 0 4 5.568345 0.000000 2985 +engg 0 4 5.568345 0.000000 2884 +scrollit_rl 0 3 5.857933 0.000000 3882 +kerala 0 3 5.857933 0.000000 3749 +sciencesand 0 2 6.263398 0.000000 4711 +hail 0 2 6.263398 0.000000 5583 +sundeep 0 1 6.957497 0.000000 14084 +sundeepabraham 0 1 6.957497 0.000000 14085 +calicut 0 1 6.957497 0.000000 14086 +tinkerwith 0 1 6.957497 0.000000 14087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..019e3c22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +world 0 115 2.197225 0.000000 126 +make 0 111 2.197225 0.000000 120 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +advanc 0 99 2.302585 0.000000 130 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +activ 0 84 2.484907 0.000000 182 +info 0 85 2.484907 0.000000 176 +refer 0 78 2.564949 0.000000 203 +state 0 76 2.564949 0.000000 207 +server 0 76 2.564949 0.000000 204 +free 0 73 2.639057 0.000000 224 +intellig 0 72 2.639057 0.000000 225 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +prof 0 64 2.772589 0.000000 273 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +virtual 0 62 2.772589 0.000000 285 +guid 0 63 2.772589 0.000000 267 +taylor 0 63 2.772589 0.000000 287 +right 0 48 3.044522 0.000000 363 +friend 0 48 3.044522 0.000000 376 +algebra 0 45 3.135494 0.000000 394 +press 0 42 3.218876 0.000000 419 +linear 0 41 3.218876 0.000000 431 +map 0 39 3.258097 0.000000 452 +express 0 32 3.465736 0.000000 540 +robert 0 30 3.555348 0.000000 567 +weather 0 28 3.610918 0.000000 618 +packag 0 28 3.610918 0.000000 614 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +jeff 0 25 3.737670 0.000000 673 +hill 0 25 3.737670 0.000000 670 +yahoo 0 24 3.761200 0.000000 707 +famili 0 23 3.806662 0.000000 735 +miscellan 0 23 3.806662 0.000000 731 +unit 0 21 3.912023 0.000000 779 +lyco 0 19 4.007333 0.000000 871 +exercis 0 19 4.007333 0.000000 842 +hypertext 0 19 4.007333 0.000000 865 +boston 0 19 4.007333 0.000000 862 +white 0 17 4.110874 0.000000 951 +sign 0 16 4.174387 0.000000 970 +hotlist 0 13 4.382027 0.000000 1199 +entertain 0 12 4.465908 0.000000 1286 +america 0 11 4.553877 0.000000 1370 +consortium 0 10 4.653960 0.000000 1467 +swim 0 9 4.753590 0.000000 1599 +yellow 0 9 4.753590 0.000000 1601 +congress 0 9 4.753590 0.000000 1592 +respect 0 9 4.753590 0.000000 1545 +establish 0 9 4.753590 0.000000 1532 +govern 0 9 4.753590 0.000000 1581 +calvin 0 9 4.753590 0.000000 1518 +yanni 0 8 4.875197 0.000000 1713 +hockei 0 8 4.875197 0.000000 1760 +opinion 0 8 4.875197 0.000000 1708 +chronicl 0 7 5.010635 0.000000 1952 +necessarili 0 7 5.010635 0.000000 1899 +altavista 0 6 5.164786 0.000000 2222 +constitut 0 6 5.164786 0.000000 2026 +quick 0 6 5.164786 0.000000 2184 +andrea 0 5 5.347108 0.000000 2375 +shall 0 3 5.857933 0.000000 3891 +abridg 0 3 5.857933 0.000000 3772 +freedom 0 3 5.857933 0.000000 3890 +plapack 0 3 5.857933 0.000000 3849 +health 0 3 5.857933 0.000000 3787 +concert 0 3 5.857933 0.000000 3533 +thereof 0 2 6.263398 0.000000 5484 +geijn 0 2 6.263398 0.000000 5715 +musician 0 2 6.263398 0.000000 5718 +hamilton 0 2 6.263398 0.000000 5719 +guyer 0 2 6.263398 0.000000 4171 +northwestern 0 2 6.263398 0.000000 5502 +nate 0 2 6.263398 0.000000 5720 +dell 0 2 6.263398 0.000000 4193 +fring 0 2 6.263398 0.000000 5721 +sammi 0 1 6.957497 0.000000 14088 +startingpoint 0 1 6.957497 0.000000 14089 +religion 0 1 6.957497 0.000000 14090 +orprohibit 0 1 6.957497 0.000000 14091 +ofspeech 0 1 6.957497 0.000000 14092 +peaceabl 0 1 6.957497 0.000000 14093 +toassembl 0 1 6.957497 0.000000 14094 +petit 0 1 6.957497 0.000000 14095 +redress 0 1 6.957497 0.000000 14096 +grievanc 0 1 6.957497 0.000000 14097 +herbarium 0 1 6.957497 0.000000 14098 +anagram 0 1 6.957497 0.000000 14099 +nil 0 1 6.957497 0.000000 14100 +reker 0 1 6.957497 0.000000 14101 +pop 0 1 6.957497 0.000000 14102 +anthropolog 0 1 6.957497 0.000000 14103 +kate 0 1 6.957497 0.000000 14104 +showbiz 0 1 6.957497 0.000000 14105 +pollstar 0 1 6.957497 0.000000 14106 +ryder 0 1 6.957497 0.000000 14107 +laptop 0 1 6.957497 0.000000 14108 +traveloc 0 1 6.957497 0.000000 14109 +eduth 0 1 6.957497 0.000000 14110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..67552e9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +file 0 132 1.945910 0.000000 70 +hall 0 146 1.945910 0.000000 65 +report 0 131 2.079442 0.000000 92 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +method 0 80 2.564949 0.000000 213 +logic 0 71 2.639057 0.000000 230 +main 0 67 2.708050 0.000000 256 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +frequent 0 49 3.044522 0.000000 367 +around 0 43 3.178054 0.000000 415 +edit 0 42 3.218876 0.000000 418 +formal 0 37 3.332205 0.000000 478 +dissert 0 32 3.465736 0.000000 549 +common 0 30 3.555348 0.000000 574 +propos 0 28 3.610918 0.000000 602 +univ 0 28 3.610918 0.000000 617 +fellow 0 24 3.761200 0.000000 701 +lisp 0 18 4.060443 0.000000 897 +boyer 0 6 5.164786 0.000000 2013 +sawada 0 3 5.857933 0.000000 3190 +oral 0 3 5.857933 0.000000 3189 +teacher 0 3 5.857933 0.000000 3892 +supplementari 0 2 6.263398 0.000000 4752 +bowen 0 2 6.263398 0.000000 4170 +sawadajun 0 1 6.957497 0.000000 14111 +sawadacontact 0 1 6.957497 0.000000 14112 +wooten 0 1 6.957497 0.000000 14113 +kbresourc 0 1 6.957497 0.000000 14114 +pvsother 0 1 6.957497 0.000000 14115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..2d2e464e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +avail 0 169 1.791759 0.000000 48 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +look 0 107 2.197225 0.000000 115 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +specif 0 106 2.197225 0.000000 106 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +take 0 97 2.302585 0.000000 134 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +decemb 0 80 2.564949 0.000000 215 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +simul 0 66 2.708050 0.000000 255 +complex 0 64 2.772589 0.000000 269 +result 0 65 2.772589 0.000000 281 +improv 0 62 2.772589 0.000000 289 +visit 0 63 2.772589 0.000000 288 +creat 0 63 2.772589 0.000000 277 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +index 0 56 2.890372 0.000000 309 +overview 0 56 2.890372 0.000000 323 +basic 0 50 3.044522 0.000000 360 +approach 0 48 3.044522 0.000000 366 +get 0 46 3.091042 0.000000 380 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +term 0 43 3.178054 0.000000 411 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +feel 0 37 3.332205 0.000000 483 +product 0 33 3.433987 0.000000 527 +independ 0 32 3.465736 0.000000 548 +transform 0 32 3.465736 0.000000 542 +titl 0 31 3.496508 0.000000 556 +compon 0 30 3.555348 0.000000 570 +domain 0 30 3.555348 0.000000 564 +scale 0 28 3.610918 0.000000 613 +releas 0 28 3.610918 0.000000 616 +utc 0 27 3.637586 0.000000 629 +manipul 0 27 3.637586 0.000000 624 +subject 0 26 3.688879 0.000000 647 +pattern 0 24 3.761200 0.000000 689 +demonstr 0 24 3.761200 0.000000 694 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +compress 0 23 3.806662 0.000000 719 +defin 0 22 3.850148 0.000000 746 +recommend 0 22 3.850148 0.000000 737 +period 0 22 3.850148 0.000000 743 +thu 0 21 3.912023 0.000000 773 +reflect 0 15 4.248495 0.000000 1034 +goe 0 15 4.248495 0.000000 1044 +composit 0 13 4.382027 0.000000 1150 +dbm 0 13 4.382027 0.000000 1136 +assembl 0 12 4.465908 0.000000 1207 +refin 0 11 4.553877 0.000000 1363 +pagewelcom 0 11 4.553877 0.000000 1344 +typic 0 11 4.553877 0.000000 1360 +valid 0 11 4.553877 0.000000 1299 +evolut 0 11 4.553877 0.000000 1314 +modul 0 10 4.653960 0.000000 1434 +relationship 0 10 4.653960 0.000000 1383 +bart 0 9 4.753590 0.000000 1559 +reus 0 8 4.875197 0.000000 1661 +successfulli 0 7 5.010635 0.000000 1869 +beyond 0 7 5.010635 0.000000 1834 +deliv 0 6 5.164786 0.000000 2070 +batori 0 4 5.568345 0.000000 2690 +avion 0 4 5.568345 0.000000 3018 +substanti 0 4 5.568345 0.000000 2921 +metadata 0 4 5.568345 0.000000 2945 +breadth 0 4 5.568345 0.000000 2695 +interchang 0 3 5.857933 0.000000 3893 +tokuda 0 3 5.857933 0.000000 3266 +smaragdaki 0 3 5.857933 0.000000 3851 +lightweight 0 3 5.857933 0.000000 3234 +encapsul 0 2 6.263398 0.000000 5541 +reusabl 0 2 6.263398 0.000000 4218 +marti 0 2 6.263398 0.000000 5679 +genvoca 0 1 6.957497 0.000000 14116 +ssgrg 0 1 6.957497 0.000000 14117 +professorangela 0 1 6.957497 0.000000 14118 +dappert 0 1 6.957497 0.000000 14119 +studentguillermo 0 1 6.957497 0.000000 14120 +jimenez 0 1 6.957497 0.000000 14121 +perezph 0 1 6.957497 0.000000 14122 +studentjeff 0 1 6.957497 0.000000 14123 +thomasph 0 1 6.957497 0.000000 14124 +studentl 0 1 6.957497 0.000000 14125 +studentyanni 0 1 6.957497 0.000000 14126 +studentk 0 1 6.957497 0.000000 14127 +shepherdresearch 0 1 6.957497 0.000000 14128 +associateform 0 1 6.957497 0.000000 14129 +datesdinesh 0 1 6.957497 0.000000 14130 +dasph 0 1 6.957497 0.000000 14131 +milli 0 1 6.957497 0.000000 14132 +villarrealph 0 1 6.957497 0.000000 14133 +geracipostdoc 0 1 6.957497 0.000000 14134 +sirkinph 0 1 6.957497 0.000000 14135 +sankar 0 1 6.957497 0.000000 14136 +dasarim 0 1 6.957497 0.000000 14137 +starter 0 1 6.957497 0.000000 14138 +reengin 0 1 6.957497 0.000000 14139 +generatorsautom 0 1 6.957497 0.000000 14140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..d8eae9cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +machin 0 129 2.079442 0.000000 95 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +well 0 109 2.197225 0.000000 121 +send 0 114 2.197225 0.000000 109 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +stuff 0 87 2.484907 0.000000 171 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +want 0 79 2.564949 0.000000 199 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +differ 0 66 2.708050 0.000000 253 +would 0 67 2.708050 0.000000 251 +order 0 69 2.708050 0.000000 249 +guid 0 63 2.772589 0.000000 267 +import 0 65 2.772589 0.000000 282 +colleg 0 61 2.833213 0.000000 300 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +maintain 0 51 2.995732 0.000000 342 +basic 0 50 3.044522 0.000000 360 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +without 0 50 3.044522 0.000000 370 +still 0 50 3.044522 0.000000 362 +could 0 46 3.091042 0.000000 383 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +futur 0 41 3.218876 0.000000 427 +might 0 41 3.218876 0.000000 426 +realli 0 40 3.258097 0.000000 444 +littl 0 39 3.258097 0.000000 454 +probabl 0 40 3.258097 0.000000 455 +mean 0 37 3.332205 0.000000 477 +expect 0 37 3.332205 0.000000 484 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +within 0 33 3.433987 0.000000 525 +kind 0 32 3.465736 0.000000 541 +actual 0 28 3.610918 0.000000 604 +load 0 28 3.610918 0.000000 601 +utc 0 27 3.637586 0.000000 629 +great 0 27 3.637586 0.000000 626 +linux 0 27 3.637586 0.000000 631 +spent 0 25 3.737670 0.000000 676 +miscellan 0 23 3.806662 0.000000 731 +try 0 22 3.850148 0.000000 764 +finish 0 22 3.850148 0.000000 748 +dai 0 22 3.850148 0.000000 753 +longer 0 20 3.951244 0.000000 816 +sure 0 20 3.951244 0.000000 813 +minut 0 20 3.951244 0.000000 810 +spend 0 19 4.007333 0.000000 850 +item 0 19 4.007333 0.000000 856 +five 0 19 4.007333 0.000000 841 +scott 0 18 4.060443 0.000000 884 +hobbi 0 16 4.174387 0.000000 1009 +doesn 0 15 4.248495 0.000000 1055 +wait 0 13 4.382027 0.000000 1168 +neat 0 12 4.465908 0.000000 1263 +appl 0 11 4.553877 0.000000 1303 +noth 0 11 4.553877 0.000000 1328 +santa 0 10 4.653960 0.000000 1441 +didn 0 9 4.753590 0.000000 1563 +oop 0 8 4.875197 0.000000 1778 +forget 0 8 4.875197 0.000000 1712 +perfect 0 7 5.010635 0.000000 1921 +pageth 0 7 5.010635 0.000000 1939 +encrypt 0 7 5.010635 0.000000 1835 +squash 0 6 5.164786 0.000000 2223 +beer 0 6 5.164786 0.000000 2216 +sharp 0 6 5.164786 0.000000 2100 +amherst 0 5 5.347108 0.000000 2484 +humor 0 5 5.347108 0.000000 2533 +amaz 0 4 5.568345 0.000000 2600 +pagescott 0 4 5.568345 0.000000 2978 +slight 0 3 5.857933 0.000000 3894 +glenn 0 3 5.857933 0.000000 3869 +down 0 3 5.857933 0.000000 3870 +wine 0 3 5.857933 0.000000 3895 +maker 0 3 5.857933 0.000000 3164 +dine 0 3 5.857933 0.000000 3472 +citizen 0 3 5.857933 0.000000 3238 +iici 0 3 5.857933 0.000000 3436 +bright 0 3 5.857933 0.000000 3596 +fanci 0 2 6.263398 0.000000 4992 +unpredict 0 2 6.263398 0.000000 5722 +stuffit 0 2 6.263398 0.000000 4127 +invalu 0 2 6.263398 0.000000 4680 +forev 0 2 6.263398 0.000000 5636 +grab 0 2 6.263398 0.000000 5723 +pageokai 0 1 6.957497 0.000000 14141 +overdu 0 1 6.957497 0.000000 14142 +mead 0 1 6.957497 0.000000 14143 +psion 0 1 6.957497 0.000000 14144 +palmtop 0 1 6.957497 0.000000 14145 +anastasi 0 1 6.957497 0.000000 14146 +poke 0 1 6.957497 0.000000 14147 +ala 0 1 6.957497 0.000000 14148 +bebox 0 1 6.957497 0.000000 14149 +sfkaplan 0 1 6.957497 0.000000 14150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..11e0a470 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +build 0 85 2.484907 0.000000 184 +onlin 0 75 2.639057 0.000000 223 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +finger 0 52 2.995732 0.000000 354 +get 0 46 3.091042 0.000000 380 +log 0 19 4.007333 0.000000 857 +floor 0 14 4.317488 0.000000 1070 +touch 0 12 4.465908 0.000000 1288 +river 0 6 5.164786 0.000000 2220 +shenoi 1 3 5.857933 5.857933 3269 +tower 0 3 5.857933 0.000000 3818 +prashant 0 2 6.263398 0.000000 4331 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..461d833c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +tabl 0 51 2.995732 0.000000 346 +finger 0 52 2.995732 0.000000 354 +campu 0 27 3.637586 0.000000 623 +vlsi 0 21 3.912023 0.000000 795 +citi 0 19 4.007333 0.000000 874 +vallei 0 7 5.010635 0.000000 1959 +ongo 0 6 5.164786 0.000000 2215 +coffe 0 5 5.347108 0.000000 2556 +pleasant 0 3 5.857933 0.000000 3825 +bookshelf 0 2 6.263398 0.000000 5724 +shaob 0 1 6.957497 0.000000 14151 +cyberhom 0 1 6.957497 0.000000 14152 +hardvar 0 1 6.957497 0.000000 14153 +verifc 0 1 6.957497 0.000000 14154 +shma 0 1 6.957497 0.000000 14155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..07111a5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +mathemat 0 108 2.197225 0.000000 123 +proceed 0 93 2.397895 0.000000 152 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +dynam 0 76 2.564949 0.000000 194 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +line 0 75 2.639057 0.000000 231 +appli 0 71 2.639057 0.000000 226 +symposium 0 72 2.639057 0.000000 238 +artifici 0 63 2.772589 0.000000 280 +processor 0 54 2.944439 0.000000 335 +life 0 50 3.044522 0.000000 375 +adapt 0 46 3.091042 0.000000 387 +music 0 42 3.218876 0.000000 436 +annual 0 40 3.258097 0.000000 458 +india 0 32 3.465736 0.000000 550 +neural 0 30 3.555348 0.000000 578 +qualiti 0 20 3.951244 0.000000 832 +massiv 0 15 4.248495 0.000000 1026 +nonlinear 0 14 4.317488 0.000000 1107 +affili 0 13 4.382027 0.000000 1194 +automata 0 13 4.382027 0.000000 1135 +avenu 0 12 4.465908 0.000000 1277 +itali 0 11 4.553877 0.000000 1378 +evolut 0 11 4.553877 0.000000 1314 +genet 0 10 4.653960 0.000000 1409 +kumar 0 9 4.753590 0.000000 1506 +risto 0 9 4.753590 0.000000 1523 +chao 0 8 4.875197 0.000000 1753 +miikkulainen 0 8 4.875197 0.000000 1667 +signal 0 7 5.010635 0.000000 1910 +edumi 0 6 5.164786 0.000000 2132 +cellular 0 5 5.347108 0.000000 2433 +dual 0 5 5.347108 0.000000 2522 +austindepart 0 4 5.568345 0.000000 3008 +reinforc 0 4 5.568345 0.000000 2674 +snail 0 4 5.568345 0.000000 2916 +sciencestaylor 0 3 5.857933 0.000000 3814 +patrick 0 3 5.857933 0.000000 3334 +shailesh 0 2 6.263398 0.000000 5578 +fuzzi 0 2 6.263398 0.000000 5423 +publicationson 0 2 6.263398 0.000000 4899 +singh 0 2 6.263398 0.000000 5675 +kumarshailesh 0 1 6.957497 0.000000 14156 +kumarth 0 1 6.957497 0.000000 14157 +skumar 0 1 6.957497 0.000000 14158 +resumeresearch 0 1 6.957497 0.000000 14159 +publicationscontact 0 1 6.957497 0.000000 14160 +mesrcm 0 1 6.957497 0.000000 14161 +spiritu 0 1 6.957497 0.000000 14162 +offersom 0 1 6.957497 0.000000 14163 +linkscognit 0 1 6.957497 0.000000 14164 +scienceutc 0 1 6.957497 0.000000 14165 +researchutc 0 1 6.957497 0.000000 14166 +groupresearch 0 1 6.957497 0.000000 14167 +neuroevolut 0 1 6.957497 0.000000 14168 +predistort 0 1 6.957497 0.000000 14169 +goetz 0 1 6.957497 0.000000 14170 +bari 0 1 6.957497 0.000000 14171 +bord 0 1 6.957497 0.000000 14172 +aprl 0 1 6.957497 0.000000 14173 +whiti 0 1 6.957497 0.000000 14174 +offernet 0 1 6.957497 0.000000 14175 +assistancesearch 0 1 6.957497 0.000000 14176 +institutewww 0 1 6.957497 0.000000 14177 +infoindia 0 1 6.957497 0.000000 14178 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..a816b29b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +applic 0 170 1.791759 0.000000 56 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +make 0 111 2.197225 0.000000 120 +world 0 115 2.197225 0.000000 126 +main 0 67 2.708050 0.000000 256 +plan 0 65 2.772589 0.000000 272 +favorit 0 44 3.135494 0.000000 410 +posit 0 31 3.496508 0.000000 552 +someth 0 31 3.496508 0.000000 554 +photo 0 31 3.496508 0.000000 561 +turn 0 29 3.583519 0.000000 586 +utc 1 27 3.637586 3.637586 629 +lead 0 23 3.806662 0.000000 718 +scheme 0 20 3.951244 0.000000 818 +particularli 0 19 4.007333 0.000000 867 +success 0 10 4.653960 0.000000 1390 +meta 0 9 4.753590 0.000000 1505 +yanni 1 8 4.875197 4.875197 1713 +gold 0 8 4.875197 0.000000 1745 +dictionari 0 8 4.875197 0.000000 1642 +moder 0 6 5.164786 0.000000 2112 +arrang 0 6 5.164786 0.000000 2023 +webster 0 5 5.347108 0.000000 2468 +album 0 4 5.568345 0.000000 2888 +smaragdaki 1 3 5.857933 5.857933 3851 +serious 0 3 5.857933 0.000000 3663 +alchemi 0 1 6.957497 0.000000 14179 +sitessmaragd 0 1 6.957497 0.000000 14180 +eduyanni 0 1 6.957497 0.000000 14181 +smaragdakisunivers 0 1 6.957497 0.000000 14182 +departmenttai 0 1 6.957497 0.000000 14183 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..6b74a05e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 0 384 0.693147 0.000000 11 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +note 0 142 1.945910 0.000000 67 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +center 0 88 2.397895 0.000000 158 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +addit 0 74 2.639057 0.000000 228 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +life 0 50 3.044522 0.000000 375 +visitor 0 49 3.044522 0.000000 371 +mean 0 37 3.332205 0.000000 477 +field 0 37 3.332205 0.000000 482 +effort 0 26 3.688879 0.000000 652 +tell 0 21 3.912023 0.000000 777 +basi 0 20 3.951244 0.000000 828 +ever 0 19 4.007333 0.000000 872 +wind 0 18 4.060443 0.000000 908 +stream 0 15 4.248495 0.000000 1015 +trip 0 14 4.317488 0.000000 1113 +bodi 0 13 4.382027 0.000000 1178 +danc 0 12 4.465908 0.000000 1278 +duli 0 12 4.465908 0.000000 1248 +absolut 0 8 4.875197 0.000000 1646 +wouldn 0 7 5.010635 0.000000 1970 +edward 0 6 5.164786 0.000000 2050 +greatest 0 6 5.164786 0.000000 2073 +ignor 0 5 5.347108 0.000000 2288 +shadow 0 3 5.857933 0.000000 3519 +haiku 0 3 5.857933 0.000000 3811 +eddi 0 3 5.857933 0.000000 3896 +danger 0 2 6.263398 0.000000 5725 +strictli 0 2 6.263398 0.000000 5726 +stimul 0 2 6.263398 0.000000 5712 +minion 1 1 6.957497 6.957497 14184 +asphalt 0 1 6.957497 0.000000 14185 +moonlight 0 1 6.957497 0.000000 14186 +nerv 0 1 6.957497 0.000000 14187 +dy 0 1 6.957497 0.000000 14188 +pania 0 1 6.957497 0.000000 14189 +leaf 0 1 6.957497 0.000000 14190 +afloat 0 1 6.957497 0.000000 14191 +waterfal 0 1 6.957497 0.000000 14192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..f7014dee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +member 0 84 2.484907 0.000000 165 +knowledg 0 67 2.708050 0.000000 243 +tech 0 35 3.401197 0.000000 515 +hotlist 0 13 4.382027 0.000000 1199 +souther 0 3 5.857933 0.000000 3795 +southerart 0 1 6.957497 0.000000 14193 +southerresearchbuild 0 1 6.957497 0.000000 14194 +reportsouth 0 1 6.957497 0.000000 14195 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..037b869d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +theori 0 111 2.197225 0.000000 127 +techniqu 0 99 2.302585 0.000000 138 +learn 1 86 2.484907 2.484907 170 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +exampl 0 77 2.564949 0.000000 195 +resum 0 79 2.564949 0.000000 217 +intellig 0 72 2.639057 0.000000 225 +appli 0 71 2.639057 0.000000 226 +creat 0 63 2.772589 0.000000 277 +taylor 0 63 2.772589 0.000000 287 +approach 0 48 3.044522 0.000000 366 +field 0 37 3.332205 0.000000 482 +tech 0 35 3.401197 0.000000 515 +india 0 32 3.465736 0.000000 550 +postal 0 30 3.555348 0.000000 580 +symbol 0 27 3.637586 0.000000 620 +challeng 0 26 3.688879 0.000000 653 +revis 0 26 3.688879 0.000000 640 +variabl 0 23 3.806662 0.000000 715 +indian 0 22 3.850148 0.000000 769 +madra 0 8 4.875197 0.000000 1770 +connectionist 0 5 5.347108 0.000000 2430 +sowmya 0 4 5.568345 0.000000 2670 +bayesian 0 4 5.568345 0.000000 2671 +groupunivers 0 3 5.857933 0.000000 3831 +multimediaappl 0 3 5.857933 0.000000 3274 +rutger 0 3 5.857933 0.000000 3566 +austinresearchmi 0 2 6.263398 0.000000 5644 +ramachandransowmya 0 1 6.957497 0.000000 14196 +ramachandranmachin 0 1 6.957497 0.000000 14197 +ofartif 0 1 6.957497 0.000000 14198 +learningbayesian 0 1 6.957497 0.000000 14199 +withhidden 0 1 6.957497 0.000000 14200 +thisproblem 0 1 6.957497 0.000000 14201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..66d658c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +multimedia 0 68 2.708050 0.000000 258 +prof 0 64 2.772589 0.000000 273 +locat 0 59 2.833213 0.000000 303 +advisor 0 51 2.995732 0.000000 355 +hill 0 25 3.737670 0.000000 670 +countri 0 15 4.248495 0.000000 1059 +central 0 13 4.382027 0.000000 1160 +herefor 0 9 4.753590 0.000000 1483 +informationabout 0 9 4.753590 0.000000 1515 +austinaustin 0 7 5.010635 0.000000 1966 +capit 0 7 5.010635 0.000000 1957 +sciencesdepart 0 6 5.164786 0.000000 2020 +isth 0 5 5.347108 0.000000 2532 +edudepart 0 3 5.857933 0.000000 3302 +sriram 0 2 6.263398 0.000000 4550 +multimediai 0 2 6.263398 0.000000 4337 +raocurr 0 1 6.957497 0.000000 14202 +systemoper 0 1 6.957497 0.000000 14203 +multimediagroup 0 1 6.957497 0.000000 14204 +harrickvinpublicationsminegroupcontact 0 1 6.957497 0.000000 14205 +informationofficetai 0 1 6.957497 0.000000 14206 +miscellaneousotherinterest 0 1 6.957497 0.000000 14207 +pagespicturesof 0 1 6.957497 0.000000 14208 +toweraustin 0 1 6.957497 0.000000 14209 +kannada 0 1 6.957497 0.000000 14210 +koota 0 1 6.957497 0.000000 14211 +tamil 0 1 6.957497 0.000000 14212 +sangam 0 1 6.957497 0.000000 14213 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..ebcbb818 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +tiger 0 3 5.857933 0.000000 3897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..0bb6fe54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +machin 0 129 2.079442 0.000000 95 +assist 0 112 2.197225 0.000000 113 +present 0 91 2.397895 0.000000 145 +real 0 93 2.397895 0.000000 144 +educ 0 86 2.484907 0.000000 191 +master 0 76 2.564949 0.000000 216 +nation 0 74 2.639057 0.000000 240 +august 0 66 2.708050 0.000000 257 +experi 0 64 2.772589 0.000000 283 +prof 0 64 2.772589 0.000000 273 +septemb 0 65 2.772589 0.000000 274 +finger 0 52 2.995732 0.000000 354 +author 0 39 3.258097 0.000000 450 +utc 0 27 3.637586 0.000000 629 +administr 0 27 3.637586 0.000000 628 +log 0 19 4.007333 0.000000 857 +edulast 0 17 4.110874 0.000000 927 +chemic 0 5 5.347108 0.000000 2552 +korea 0 4 5.568345 0.000000 2971 +seoul 0 3 5.857933 0.000000 3783 +aloysiu 0 3 5.857933 0.000000 3829 +choiwelcom 0 2 6.263398 0.000000 5727 +sunghe 0 1 6.957497 0.000000 14214 +choisunghe 0 1 6.957497 0.000000 14215 +nuec 0 1 6.957497 0.000000 14216 +choiemail 0 1 6.957497 0.000000 14217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..f78fd066 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +provid 0 121 2.079442 0.000000 94 +postscript 0 131 2.079442 0.000000 90 +pleas 0 113 2.197225 0.000000 114 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +larg 0 82 2.484907 0.000000 168 +effici 0 73 2.639057 0.000000 233 +taylor 0 63 2.772589 0.000000 287 +descript 0 64 2.772589 0.000000 271 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +best 0 59 2.833213 0.000000 299 +finger 0 52 2.995732 0.000000 354 +hardwar 0 51 2.995732 0.000000 350 +pointer 0 48 3.044522 0.000000 368 +standard 0 48 3.044522 0.000000 365 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +full 0 28 3.610918 0.000000 615 +reach 0 24 3.761200 0.000000 688 +inth 0 22 3.850148 0.000000 741 +along 0 18 4.060443 0.000000 878 +easi 0 16 4.174387 0.000000 969 +brief 0 16 4.174387 0.000000 1001 +novel 0 15 4.248495 0.000000 1039 +persist 0 11 4.553877 0.000000 1367 +motorola 0 9 4.753590 0.000000 1546 +oop 0 8 4.875197 0.000000 1778 +myresum 0 6 5.164786 0.000000 2199 +informationi 0 3 5.857933 0.000000 3871 +swizzl 0 3 5.857933 0.000000 3883 +sheetal 0 2 6.263398 0.000000 5684 +isvia 0 2 6.263398 0.000000 5637 +mypubl 0 2 6.263398 0.000000 5707 +somerset 0 2 6.263398 0.000000 5639 +kakkad 0 2 6.263398 0.000000 5685 +kakkadsheet 0 1 6.957497 0.000000 14218 +kakkadcontact 0 1 6.957497 0.000000 14219 +storagesystem 0 1 6.957497 0.000000 14220 +faulttim 0 1 6.957497 0.000000 14221 +whilefinish 0 1 6.957497 0.000000 14222 +svkakkad 0 1 6.957497 0.000000 14223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..6730f22b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +octob 0 89 2.397895 0.000000 156 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +suggest 0 53 2.944439 0.000000 331 +finger 0 52 2.995732 0.000000 354 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +todai 0 25 3.737670 0.000000 672 +highli 0 23 3.806662 0.000000 725 +voic 0 21 3.912023 0.000000 806 +item 0 19 4.007333 0.000000 856 +medic 0 17 4.110874 0.000000 958 +academi 0 8 4.875197 0.000000 1735 +scholar 0 6 5.164786 0.000000 2180 +appreci 0 5 5.347108 0.000000 2374 +sale 0 3 5.857933 0.000000 3688 +meyour 0 3 5.857933 0.000000 3858 +registrar 0 2 6.263398 0.000000 5611 +gradaut 0 2 6.263398 0.000000 5612 +studiesut 0 2 6.263398 0.000000 5613 +novelschines 0 2 6.263398 0.000000 5610 +visitorsinc 0 2 6.263398 0.000000 5616 +shengm 0 1 6.957497 0.000000 14224 +homepageabout 0 1 6.957497 0.000000 14225 +classmatesclass 0 1 6.957497 0.000000 14226 +ustc 0 1 6.957497 0.000000 14227 +sciencesus 0 1 6.957497 0.000000 14228 +linksut 0 1 6.957497 0.000000 14229 +libraryutaccesschines 0 1 6.957497 0.000000 14230 +associationchina 0 1 6.957497 0.000000 14231 +chinesechinainternet 0 1 6.957497 0.000000 14232 +magazinestsinghua 0 1 6.957497 0.000000 14233 +bbsncic 0 1 6.957497 0.000000 14234 +bbschines 0 1 6.957497 0.000000 14235 +classicsabout 0 1 6.957497 0.000000 14236 +austinwhat 0 1 6.957497 0.000000 14237 +citylimitsclassifi 0 1 6.957497 0.000000 14238 +austinto 0 1 6.957497 0.000000 14239 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..d37012f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +frame 1 24 3.761200 3.761200 684 +wang 0 21 3.912023 0.000000 790 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..fafc6052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..9552def8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +name 0 72 2.639057 0.000000 220 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +plai 0 60 2.833213 0.000000 307 +done 0 47 3.091042 0.000000 381 +option 0 30 3.555348 0.000000 575 +watson 0 8 4.875197 0.000000 1691 +bore 0 7 5.010635 0.000000 1948 +internship 0 3 5.857933 0.000000 3764 +renu 0 1 6.957497 0.000000 14240 +tewarirenu 0 1 6.957497 0.000000 14241 +tewariwhat 0 1 6.957497 0.000000 14242 +addresshom 0 1 6.957497 0.000000 14243 +tewari 0 1 6.957497 0.000000 14244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..63b5bd2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +austin 0 168 1.791759 0.000000 63 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +come 0 78 2.564949 0.000000 202 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +messag 0 76 2.564949 0.000000 212 +state 0 76 2.564949 0.000000 207 +resum 0 79 2.564949 0.000000 217 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +investig 0 51 2.995732 0.000000 353 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +formal 0 37 3.332205 0.000000 478 +mean 0 37 3.332205 0.000000 477 +soon 0 36 3.367296 0.000000 494 +random 0 34 3.401197 0.000000 511 +photo 0 31 3.496508 0.000000 561 +secur 0 30 3.555348 0.000000 577 +postal 0 30 3.555348 0.000000 580 +client 0 25 3.737670 0.000000 679 +synthesi 0 20 3.951244 0.000000 834 +verif 0 20 3.951244 0.000000 826 +analyz 0 17 4.110874 0.000000 925 +draft 0 14 4.317488 0.000000 1085 +finit 0 14 4.317488 0.000000 1106 +stai 0 12 4.465908 0.000000 1215 +tune 0 12 4.465908 0.000000 1227 +genet 0 10 4.653960 0.000000 1409 +metric 0 7 5.010635 0.000000 1831 +photographi 0 6 5.164786 0.000000 2146 +pierc 0 4 5.568345 0.000000 2623 +queu 0 4 5.568345 0.000000 2648 +blvd 0 4 5.568345 0.000000 3007 +evolutionari 0 3 5.857933 0.000000 3898 +amwork 0 2 6.263398 0.000000 4850 +communicatewith 0 2 6.263398 0.000000 5062 +tumlin 1 1 6.957497 6.957497 14245 +brenda 0 1 6.957497 0.000000 14246 +ladd 0 1 6.957497 0.000000 14247 +authenticationprotocol 0 1 6.957497 0.000000 14248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..5b08e585 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 0 168 1.791759 0.000000 63 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +associ 0 93 2.397895 0.000000 151 +homepag 0 93 2.397895 0.000000 148 +commun 0 95 2.397895 0.000000 157 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +real 0 93 2.397895 0.000000 144 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +school 0 84 2.484907 0.000000 188 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +resum 0 79 2.564949 0.000000 217 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +html 0 75 2.639057 0.000000 235 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +differ 0 66 2.708050 0.000000 253 +prof 1 64 2.772589 2.772589 273 +visit 0 63 2.772589 0.000000 288 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +plan 0 65 2.772589 0.000000 272 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +plai 0 60 2.833213 0.000000 307 +unix 0 58 2.890372 0.000000 308 +semest 0 58 2.890372 0.000000 312 +sampl 0 53 2.944439 0.000000 339 +maintain 0 51 2.995732 0.000000 342 +friend 0 48 3.044522 0.000000 376 +format 0 48 3.044522 0.000000 356 +life 0 50 3.044522 0.000000 375 +standard 0 48 3.044522 0.000000 365 +still 0 50 3.044522 0.000000 362 +visitor 0 49 3.044522 0.000000 371 +netscap 0 44 3.135494 0.000000 395 +anoth 0 45 3.135494 0.000000 408 +protocol 0 45 3.135494 0.000000 407 +made 0 44 3.135494 0.000000 398 +favorit 0 44 3.135494 0.000000 410 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +form 0 39 3.258097 0.000000 443 +movi 0 40 3.258097 0.000000 459 +tutori 0 39 3.258097 0.000000 437 +china 0 37 3.332205 0.000000 487 +robot 0 36 3.367296 0.000000 497 +copyright 0 36 3.367296 0.000000 495 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +kind 0 32 3.465736 0.000000 541 +anim 0 31 3.496508 0.000000 557 +compon 0 30 3.555348 0.000000 570 +chines 0 29 3.583519 0.000000 595 +synchron 0 29 3.583519 0.000000 588 +full 0 28 3.610918 0.000000 615 +quit 0 27 3.637586 0.000000 633 +never 0 25 3.737670 0.000000 671 +background 0 25 3.737670 0.000000 664 +reach 0 24 3.761200 0.000000 688 +mobil 0 23 3.806662 0.000000 730 +thank 0 23 3.806662 0.000000 721 +recommend 0 22 3.850148 0.000000 737 +wang 0 21 3.912023 0.000000 790 +viewer 0 21 3.912023 0.000000 787 +chen 0 21 3.912023 0.000000 791 +leav 0 21 3.912023 0.000000 772 +mpeg 0 20 3.951244 0.000000 831 +applet 0 20 3.951244 0.000000 827 +beij 0 19 4.007333 0.000000 876 +demo 0 18 4.060443 0.000000 888 +listen 0 18 4.060443 0.000000 907 +seek 0 17 4.110874 0.000000 954 +normal 0 16 4.174387 0.000000 995 +tsinghua 0 13 4.382027 0.000000 1195 +misc 0 13 4.382027 0.000000 1124 +clock 0 11 4.553877 0.000000 1320 +host 0 11 4.553877 0.000000 1306 +player 0 11 4.553877 0.000000 1371 +perl 0 11 4.553877 0.000000 1332 +lake 0 11 4.553877 0.000000 1373 +song 0 11 4.553877 0.000000 1380 +jersei 0 9 4.753590 0.000000 1587 +sound 0 9 4.753590 0.000000 1605 +trust 0 9 4.753590 0.000000 1583 +pure 0 8 4.875197 0.000000 1776 +univeristi 0 8 4.875197 0.000000 1754 +counter 0 8 4.875197 0.000000 1765 +misra 0 7 5.010635 0.000000 1856 +clip 0 7 5.010635 0.000000 1868 +attach 0 7 5.010635 0.000000 1785 +accord 0 7 5.010635 0.000000 1826 +bell 0 6 5.164786 0.000000 2224 +troubl 0 6 5.164786 0.000000 2002 +fussel 0 5 5.347108 0.000000 2300 +opengl 0 5 5.347108 0.000000 2299 +lang 0 5 5.347108 0.000000 2294 +shanghai 0 4 5.568345 0.000000 2925 +republ 0 4 5.568345 0.000000 3032 +restructur 0 4 5.568345 0.000000 2775 +gouda 0 4 5.568345 0.000000 3021 +batori 0 4 5.568345 0.000000 2690 +blvd 0 4 5.568345 0.000000 3007 +tong 0 3 5.857933 0.000000 3258 +zuckerman 0 3 5.857933 0.000000 3205 +underconstruct 0 3 5.857933 0.000000 3889 +nanj 0 2 6.263398 0.000000 5728 +mini 0 2 6.263398 0.000000 5548 +decod 0 2 6.263398 0.000000 4936 +zodiac 0 2 6.263398 0.000000 5729 +twang 0 2 6.263398 0.000000 5730 +eagl 0 2 6.263398 0.000000 5731 +jiao 0 1 6.957497 0.000000 14249 +lucent 0 1 6.957497 0.000000 14250 +nank 0 1 6.957497 0.000000 14251 +summerluc 0 1 6.957497 0.000000 14252 +thissumm 0 1 6.957497 0.000000 14253 +plexton 0 1 6.957497 0.000000 14254 +libari 0 1 6.957497 0.000000 14255 +glut 0 1 6.957497 0.000000 14256 +mariah 0 1 6.957497 0.000000 14257 +boyz 0 1 6.957497 0.000000 14258 +babyfac 0 1 6.957497 0.000000 14259 +haiq 0 1 6.957497 0.000000 14260 +shenfeng 0 1 6.957497 0.000000 14261 +deskmat 0 1 6.957497 0.000000 14262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..434adafa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +machin 0 129 2.079442 0.000000 95 +welcom 0 122 2.079442 0.000000 99 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +well 0 109 2.197225 0.000000 121 +associ 0 93 2.397895 0.000000 151 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +thing 0 84 2.484907 0.000000 189 +resourc 0 81 2.484907 0.000000 172 +orient 0 80 2.564949 0.000000 205 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +januari 0 62 2.772589 0.000000 264 +special 0 56 2.890372 0.000000 320 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +york 0 41 3.218876 0.000000 435 +print 0 34 3.401197 0.000000 503 +dissert 0 32 3.465736 0.000000 549 +focu 0 30 3.555348 0.000000 571 +consid 0 29 3.583519 0.000000 590 +weather 0 28 3.610918 0.000000 618 +utc 0 27 3.637586 0.000000 629 +yahoo 0 24 3.761200 0.000000 707 +decis 0 23 3.806662 0.000000 728 +voic 0 21 3.912023 0.000000 806 +supervis 0 20 3.951244 0.000000 840 +lyco 0 19 4.007333 0.000000 871 +context 0 13 4.382027 0.000000 1153 +perman 0 11 4.553877 0.000000 1372 +acquisit 0 10 4.653960 0.000000 1465 +moonei 0 9 4.753590 0.000000 1520 +linguist 0 9 4.753590 0.000000 1593 +european 0 8 4.875197 0.000000 1763 +altavista 0 6 5.164786 0.000000 2222 +infoseek 0 6 5.164786 0.000000 2188 +pars 0 5 5.347108 0.000000 2321 +raymond 0 5 5.347108 0.000000 2313 +hermjakob 0 3 5.857933 0.000000 3876 +groupand 0 3 5.857933 0.000000 3873 +signll 0 3 5.857933 0.000000 3877 +galaxi 0 3 5.857933 0.000000 3603 +deutsch 0 3 5.857933 0.000000 3802 +pageulf 0 1 6.957497 0.000000 14263 +hermjakobhello 0 1 6.957497 0.000000 14264 +thedept 0 1 6.957497 0.000000 14265 +austinand 0 1 6.957497 0.000000 14266 +aboutexampl 0 1 6.957497 0.000000 14267 +translationund 0 1 6.957497 0.000000 14268 +einet 0 1 6.957497 0.000000 14269 +dernir 0 1 6.957497 0.000000 14270 +nouvel 0 1 6.957497 0.000000 14271 +alsac 0 1 6.957497 0.000000 14272 +spiegel 0 1 6.957497 0.000000 14273 +svenska 0 1 6.957497 0.000000 14274 +dagbladet 0 1 6.957497 0.000000 14275 +tagesspiegel 0 1 6.957497 0.000000 14276 +vanguardia 0 1 6.957497 0.000000 14277 +welt 0 1 6.957497 0.000000 14278 +zeitplusacm 0 1 6.957497 0.000000 14279 +moltkestr 0 1 6.957497 0.000000 14280 +bnde 0 1 6.957497 0.000000 14281 +germanyphon 0 1 6.957497 0.000000 14282 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..d016ec4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +schedul 0 119 2.079442 0.000000 85 +peopl 0 96 2.302585 0.000000 132 +refer 0 78 2.564949 0.000000 203 +new 0 64 2.772589 0.000000 262 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +street 0 63 2.772589 0.000000 293 +directori 0 45 3.135494 0.000000 396 +sport 1 25 3.737670 3.737670 683 +entertain 0 12 4.465908 0.000000 1286 +magic 0 11 4.553877 0.000000 1358 +perman 0 11 4.553877 0.000000 1372 +gather 0 8 4.875197 0.000000 1719 +lanc 0 4 5.568345 0.000000 3022 +champion 0 4 5.568345 0.000000 2982 +tokuda 0 3 5.857933 0.000000 3266 +twelv 0 3 5.857933 0.000000 3899 +hawaii 0 3 5.857933 0.000000 3888 +intramur 0 2 6.263398 0.000000 5590 +unicron 0 1 6.957497 0.000000 14283 +financ 0 1 6.957497 0.000000 14284 +heeia 0 1 6.957497 0.000000 14285 +kaneoh 0 1 6.957497 0.000000 14286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..68b4957f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +hall 1 146 1.945910 1.945910 65 +click 0 142 1.945910 0.000000 78 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +taylor 0 63 2.772589 0.000000 287 +finger 0 52 2.995732 0.000000 354 +telephon 0 50 3.044522 0.000000 373 +eduoffic 0 33 3.433987 0.000000 531 +postal 0 30 3.555348 0.000000 580 +log 0 19 4.007333 0.000000 857 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +informationemail 0 9 4.753590 0.000000 1564 +painter 0 2 6.263398 0.000000 4187 +balayoghanv 0 1 6.957497 0.000000 14432 +balayoghancontact 0 1 6.957497 0.000000 14433 +ineosdi 0 1 6.957497 0.000000 14434 +bookmarksvbb 0 1 6.957497 0.000000 14435 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..dee8ddb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +confer 0 126 2.079442 0.000000 100 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +intern 0 108 2.197225 0.000000 128 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +advanc 0 99 2.302585 0.000000 130 +proceed 0 93 2.397895 0.000000 152 +commun 0 95 2.397895 0.000000 157 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +ieee 0 86 2.484907 0.000000 190 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +novemb 0 81 2.484907 0.000000 179 +second 0 81 2.484907 0.000000 166 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +server 0 76 2.564949 0.000000 204 +state 0 76 2.564949 0.000000 207 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +multimedia 1 68 2.708050 2.708050 258 +main 0 67 2.708050 0.000000 256 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +profession 0 51 2.995732 0.000000 345 +digit 0 52 2.995732 0.000000 348 +california 0 46 3.091042 0.000000 388 +video 0 44 3.135494 0.000000 405 +third 0 43 3.178054 0.000000 412 +tutori 0 39 3.258097 0.000000 437 +industri 0 38 3.295837 0.000000 464 +china 0 37 3.332205 0.000000 487 +award 0 34 3.401197 0.000000 523 +committe 0 34 3.401197 0.000000 522 +tech 0 35 3.401197 0.000000 515 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +storag 0 31 3.496508 0.000000 553 +rang 0 30 3.555348 0.000000 565 +chair 0 29 3.583519 0.000000 596 +scale 0 28 3.610918 0.000000 613 +arrai 0 27 3.637586 0.000000 627 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +initi 0 23 3.806662 0.000000 717 +honor 0 23 3.806662 0.000000 729 +disk 0 22 3.850148 0.000000 747 +director 0 22 3.850148 0.000000 767 +indian 0 22 3.850148 0.000000 769 +beij 0 19 4.007333 0.000000 876 +speed 0 18 4.060443 0.000000 911 +failur 0 18 4.060443 0.000000 898 +germani 0 17 4.110874 0.000000 946 +diego 0 16 4.174387 0.000000 992 +taiwan 0 16 4.174387 0.000000 1006 +intel 0 16 4.174387 0.000000 1000 +atth 0 15 4.248495 0.000000 1019 +audio 0 14 4.317488 0.000000 1094 +heterogen 0 14 4.317488 0.000000 1090 +nasa 0 13 4.382027 0.000000 1188 +career 0 12 4.465908 0.000000 1287 +placement 0 10 4.653960 0.000000 1420 +ataustin 0 9 4.753590 0.000000 1610 +editori 0 9 4.753590 0.000000 1611 +vice 0 9 4.753590 0.000000 1604 +transmiss 0 9 4.753590 0.000000 1588 +recoveri 0 9 4.753590 0.000000 1474 +creativ 0 8 4.875197 0.000000 1777 +harrick 0 7 5.010635 0.000000 1849 +bombai 0 7 5.010635 0.000000 1972 +sponsor 0 6 5.164786 0.000000 2133 +internationalconfer 0 6 5.164786 0.000000 2051 +microsystem 0 6 5.164786 0.000000 2160 +ofdistribut 0 5 5.347108 0.000000 2316 +row 0 5 5.347108 0.000000 2330 +colorado 0 4 5.568345 0.000000 2938 +innov 0 4 5.568345 0.000000 2933 +multimediasystem 0 4 5.568345 0.000000 2701 +venkat 0 4 5.568345 0.000000 2702 +goyal 0 3 5.857933 0.000000 3268 +multimediacomput 0 3 5.857933 0.000000 3841 +mobilecomput 0 3 5.857933 0.000000 3629 +shenoi 0 3 5.857933 0.000000 3269 +ftc 0 3 5.857933 0.000000 3275 +rangan 0 3 5.857933 0.000000 3270 +durham 0 3 5.857933 0.000000 3279 +hampshir 0 3 5.857933 0.000000 3280 +mitsubishi 0 3 5.857933 0.000000 3842 +merl 0 3 5.857933 0.000000 3843 +andnetwork 0 2 6.263398 0.000000 5751 +icdc 0 2 6.263398 0.000000 5191 +protocolsfor 0 2 6.263398 0.000000 5204 +inmulti 0 2 6.263398 0.000000 4334 +annualintern 0 2 6.263398 0.000000 4335 +pasadena 0 2 6.263398 0.000000 4336 +gemmel 0 2 6.263398 0.000000 4332 +kandlur 0 2 6.263398 0.000000 4321 +ofmultimedia 0 2 6.263398 0.000000 4322 +ieeeintern 0 2 6.263398 0.000000 4333 +icmc 0 2 6.263398 0.000000 4323 +delaybound 0 2 6.263398 0.000000 4342 +fordigit 0 2 6.263398 0.000000 5752 +nossdav 0 2 6.263398 0.000000 4344 +federalinstitut 0 2 6.263398 0.000000 5539 +eurograph 0 1 6.957497 0.000000 14436 +vinharrick 0 1 6.957497 0.000000 14437 +electronicimag 0 1 6.957497 0.000000 14438 +kaohsiung 0 1 6.957497 0.000000 14439 +rostock 0 1 6.957497 0.000000 14440 +interestmultimedia 0 1 6.957497 0.000000 14441 +anend 0 1 6.957497 0.000000 14442 +thintern 0 1 6.957497 0.000000 14443 +designingmultimedia 0 1 6.957497 0.000000 14444 +foundationresearch 0 1 6.957497 0.000000 14445 +electricresearch 0 1 6.957497 0.000000 14446 +electrospacesystem 0 1 6.957497 0.000000 14447 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..b8c387a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +number 1 130 2.079442 2.079442 97 +report 0 131 2.079442 0.000000 92 +pleas 1 113 2.197225 2.197225 114 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +street 0 63 2.772589 0.000000 293 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +give 0 50 3.044522 0.000000 359 +visitor 0 49 3.044522 0.000000 371 +soon 0 36 3.367296 0.000000 494 +go 0 33 3.433987 0.000000 529 +india 0 32 3.465736 0.000000 550 +art 0 29 3.583519 0.000000 593 +pass 0 28 3.610918 0.000000 611 +though 0 27 3.637586 0.000000 622 +never 0 25 3.737670 0.000000 671 +color 0 22 3.850148 0.000000 762 +increas 0 20 3.951244 0.000000 829 +sure 0 20 3.951244 0.000000 813 +log 0 19 4.007333 0.000000 857 +less 0 18 4.060443 0.000000 892 +medic 0 17 4.110874 0.000000 958 +match 0 16 4.174387 0.000000 965 +sign 0 16 4.174387 0.000000 970 +guest 0 12 4.465908 0.000000 1220 +incomplet 0 9 4.753590 0.000000 1575 +risk 0 8 4.875197 0.000000 1689 +yeah 0 6 5.164786 0.000000 2195 +put 0 6 5.164786 0.000000 2017 +guestbook 1 5 5.347108 5.347108 2475 +delhi 0 5 5.347108 0.000000 2530 +haven 1 4 5.568345 5.568345 3037 +shall 0 3 5.857933 0.000000 3891 +vipin 1 2 6.263398 6.263398 5579 +interestscours 0 2 6.263398 0.000000 5026 +reset 0 2 6.263398 0.000000 5236 +decreas 0 2 6.263398 0.000000 4877 +undergraduatefrom 0 1 6.957497 0.000000 14448 +interestsreportsy 0 1 6.957497 0.000000 14449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..1eae3e04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,156 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +mathemat 1 108 2.197225 2.197225 123 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +intern 0 108 2.197225 0.000000 128 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +school 0 84 2.484907 0.000000 188 +state 0 76 2.564949 0.000000 207 +line 0 75 2.639057 0.000000 231 +new 0 64 2.772589 0.000000 262 +import 0 65 2.772589 0.000000 282 +taylor 0 63 2.772589 0.000000 287 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +profession 0 51 2.995732 0.000000 345 +right 0 48 3.044522 0.000000 363 +california 0 46 3.091042 0.000000 388 +better 0 45 3.135494 0.000000 401 +favorit 0 44 3.135494 0.000000 410 +live 0 40 3.258097 0.000000 451 +paul 0 38 3.295837 0.000000 471 +close 0 38 3.295837 0.000000 465 +feel 0 37 3.332205 0.000000 483 +sciencesunivers 0 37 3.332205 0.000000 486 +survei 0 35 3.401197 0.000000 513 +human 0 32 3.465736 0.000000 546 +dissert 0 32 3.465736 0.000000 549 +taken 0 31 3.496508 0.000000 555 +scientist 0 31 3.496508 0.000000 560 +postal 0 30 3.555348 0.000000 580 +turn 0 29 3.583519 0.000000 586 +quot 0 29 3.583519 0.000000 582 +mind 0 27 3.637586 0.000000 632 +fellow 0 24 3.761200 0.000000 701 +famili 0 23 3.806662 0.000000 735 +initi 0 23 3.806662 0.000000 717 +finish 0 22 3.850148 0.000000 748 +wang 0 21 3.912023 0.000000 790 +appropri 0 18 4.060443 0.000000 883 +germani 0 17 4.110874 0.000000 946 +white 0 17 4.110874 0.000000 951 +precis 0 15 4.248495 0.000000 1023 +countri 0 15 4.248495 0.000000 1059 +incomput 0 14 4.317488 0.000000 1096 +becam 0 14 4.317488 0.000000 1117 +stori 0 14 4.317488 0.000000 1087 +daniel 0 12 4.465908 0.000000 1233 +vladimir 0 11 4.553877 0.000000 1324 +america 0 11 4.553877 0.000000 1370 +black 0 10 4.653960 0.000000 1418 +sentenc 0 10 4.653960 0.000000 1413 +gain 0 8 4.875197 0.000000 1730 +secretari 0 8 4.875197 0.000000 1775 +elect 0 8 4.875197 0.000000 1771 +joke 0 8 4.875197 0.000000 1620 +centenni 0 7 5.010635 0.000000 1967 +sciencesat 0 7 5.010635 0.000000 1968 +austinaustin 0 7 5.010635 0.000000 1966 +race 0 5 5.347108 0.000000 2417 +lifschitz 0 5 5.347108 0.000000 2542 +ortega 0 5 5.347108 0.000000 2559 +lost 0 5 5.347108 0.000000 2358 +petersburg 0 4 5.568345 0.000000 2989 +insight 0 4 5.568345 0.000000 3024 +nonmonoton 0 4 5.568345 0.000000 3023 +evid 0 4 5.568345 0.000000 2768 +ratio 0 4 5.568345 0.000000 2942 +silli 0 4 5.568345 0.000000 3038 +dijkstra 0 3 5.857933 0.000000 3173 +armi 0 3 5.857933 0.000000 3562 +prison 0 3 5.857933 0.000000 3907 +tortur 0 3 5.857933 0.000000 3634 +district 0 3 5.857933 0.000000 3756 +civil 0 3 5.857933 0.000000 3908 +russia 0 2 6.263398 0.000000 5756 +spirit 0 2 6.263398 0.000000 5234 +theamerican 0 2 6.263398 0.000000 5120 +programmingand 0 2 6.263398 0.000000 4940 +edsger 0 2 6.263398 0.000000 5740 +convoc 0 2 6.263398 0.000000 5757 +nomin 0 2 6.263398 0.000000 5758 +helm 0 2 6.263398 0.000000 4217 +burton 0 2 6.263398 0.000000 5759 +polic 0 2 6.263398 0.000000 5560 +democrat 0 2 6.263398 0.000000 5567 +admit 0 2 6.263398 0.000000 5429 +neutral 0 2 6.263398 0.000000 5760 +lifschitzwhen 0 1 6.957497 0.000000 14488 +burden 0 1 6.957497 0.000000 14489 +downcast 0 1 6.957497 0.000000 14490 +gladli 0 1 6.957497 0.000000 14491 +therealm 0 1 6.957497 0.000000 14492 +lucid 0 1 6.957497 0.000000 14493 +grasp 0 1 6.957497 0.000000 14494 +isobtain 0 1 6.957497 0.000000 14495 +pleasantli 0 1 6.957497 0.000000 14496 +conceptform 0 1 6.957497 0.000000 14497 +bernai 0 1 6.957497 0.000000 14498 +lifschitzgottesman 0 1 6.957497 0.000000 14499 +texasat 0 1 6.957497 0.000000 14500 +forartifici 0 1 6.957497 0.000000 14501 +intelligenceb 0 1 6.957497 0.000000 14502 +branchof 0 1 6.957497 0.000000 14503 +steklov 0 1 6.957497 0.000000 14504 +interesttempor 0 1 6.957497 0.000000 14505 +reasoningand 0 1 6.957497 0.000000 14506 +aboutactionslog 0 1 6.957497 0.000000 14507 +reasoningteachingoth 0 1 6.957497 0.000000 14508 +activitiespap 0 1 6.957497 0.000000 14509 +bylifschitz 0 1 6.957497 0.000000 14510 +studentsrecommend 0 1 6.957497 0.000000 14511 +speechgood 0 1 6.957497 0.000000 14512 +madelein 0 1 6.957497 0.000000 14513 +albright 0 1 6.957497 0.000000 14514 +regain 0 1 6.957497 0.000000 14515 +soviet 0 1 6.957497 0.000000 14516 +recycl 0 1 6.957497 0.000000 14517 +actbad 0 1 6.957497 0.000000 14518 +sequest 0 1 6.957497 0.000000 14519 +archeolog 0 1 6.957497 0.000000 14520 +societynot 0 1 6.957497 0.000000 14521 +redrawn 0 1 6.957497 0.000000 14522 +basisoth 0 1 6.957497 0.000000 14523 +amnesti 0 1 6.957497 0.000000 14524 +monthcontact 0 1 6.957497 0.000000 14525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..9cada8aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +algorithm 1 162 1.791759 1.791759 57 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +theori 0 111 2.197225 0.000000 127 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +complet 0 77 2.564949 0.000000 208 +april 0 77 2.564949 0.000000 196 +effici 0 73 2.639057 0.000000 233 +evalu 0 64 2.772589 0.000000 266 +copi 0 63 2.772589 0.000000 284 +taylor 0 63 2.772589 0.000000 287 +visit 0 63 2.772589 0.000000 288 +faculti 0 56 2.890372 0.000000 325 +vita 0 38 3.295837 0.000000 473 +profil 0 30 3.555348 0.000000 581 +postal 0 30 3.555348 0.000000 580 +experiment 0 26 3.688879 0.000000 645 +mine 0 26 3.688879 0.000000 654 +sequenti 0 22 3.850148 0.000000 745 +offici 0 18 4.060443 0.000000 894 +princeton 0 15 4.248495 0.000000 1042 +interestsmi 0 10 4.653960 0.000000 1462 +regent 0 5 5.347108 0.000000 2551 +vijaya 0 4 5.568345 0.000000 2677 +primarilyin 0 3 5.857933 0.000000 3832 +ramachandranvijaya 0 1 6.957497 0.000000 14450 +ramachandranblakemor 0 1 6.957497 0.000000 14451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..6e42b7c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +algorithm 0 162 1.791759 0.000000 57 +parallel 0 169 1.791759 0.000000 60 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +mani 0 92 2.397895 0.000000 150 +activ 0 84 2.484907 0.000000 182 +ieee 0 86 2.484907 0.000000 190 +resourc 0 81 2.484907 0.000000 172 +novemb 0 81 2.484907 0.000000 179 +member 0 84 2.484907 0.000000 165 +messag 0 76 2.564949 0.000000 212 +state 0 76 2.564949 0.000000 207 +method 0 80 2.564949 0.000000 213 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +differ 0 66 2.708050 0.000000 253 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +organ 0 65 2.772589 0.000000 265 +import 0 65 2.772589 0.000000 282 +virtual 0 62 2.772589 0.000000 285 +dept 0 64 2.772589 0.000000 291 +locat 0 59 2.833213 0.000000 303 +faculti 0 56 2.890372 0.000000 325 +sever 0 56 2.890372 0.000000 322 +major 0 56 2.890372 0.000000 315 +space 0 57 2.890372 0.000000 310 +semest 0 58 2.890372 0.000000 312 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +pointer 0 48 3.044522 0.000000 368 +electron 0 47 3.091042 0.000000 379 +execut 0 45 3.135494 0.000000 404 +term 0 43 3.178054 0.000000 411 +theoret 0 39 3.258097 0.000000 446 +announc 0 40 3.258097 0.000000 441 +seminar 0 38 3.295837 0.000000 470 +random 0 34 3.401197 0.000000 511 +bibliographi 0 34 3.401197 0.000000 518 +post 0 35 3.401197 0.000000 505 +next 0 34 3.401197 0.000000 517 +committe 0 34 3.401197 0.000000 522 +express 0 32 3.465736 0.000000 540 +ad 0 32 3.465736 0.000000 544 +often 0 31 3.496508 0.000000 551 +scientist 0 31 3.496508 0.000000 560 +graph 0 30 3.555348 0.000000 576 +focu 0 30 3.555348 0.000000 571 +focus 0 29 3.583519 0.000000 584 +measur 0 28 3.610918 0.000000 609 +held 0 28 3.610918 0.000000 600 +becom 0 28 3.610918 0.000000 603 +bound 0 26 3.688879 0.000000 659 +berkelei 0 26 3.688879 0.000000 657 +request 0 26 3.688879 0.000000 635 +consist 0 26 3.688879 0.000000 651 +fundament 0 25 3.737670 0.000000 661 +greg 0 24 3.761200 0.000000 695 +sent 0 22 3.850148 0.000000 763 +serv 0 22 3.850148 0.000000 758 +alumni 0 21 3.912023 0.000000 807 +siam 0 21 3.912023 0.000000 800 +region 0 19 4.007333 0.000000 875 +north 0 19 4.007333 0.000000 873 +hypertext 0 19 4.007333 0.000000 865 +stand 0 18 4.060443 0.000000 891 +lower 0 18 4.060443 0.000000 886 +attend 0 18 4.060443 0.000000 893 +affili 0 13 4.382027 0.000000 1194 +discret 0 13 4.382027 0.000000 1165 +remov 0 12 4.465908 0.000000 1225 +walk 0 12 4.465908 0.000000 1281 +outsid 0 12 4.465908 0.000000 1219 +distinguish 0 11 4.553877 0.000000 1357 +probabilist 0 11 4.553877 0.000000 1343 +typic 0 11 4.553877 0.000000 1360 +regard 0 11 4.553877 0.000000 1309 +cryptographi 0 9 4.753590 0.000000 1512 +folk 0 9 4.753590 0.000000 1597 +postdoc 0 8 4.875197 0.000000 1724 +elect 0 8 4.875197 0.000000 1771 +calendar 0 8 4.875197 0.000000 1649 +colloquium 0 8 4.875197 0.000000 1734 +bit 0 7 5.010635 0.000000 1833 +foc 0 7 5.010635 0.000000 1880 +sigact 0 6 5.164786 0.000000 2212 +zhou 0 6 5.164786 0.000000 2092 +forum 0 6 5.164786 0.000000 2027 +southern 0 6 5.164786 0.000000 2191 +pool 0 6 5.164786 0.000000 2225 +arrang 0 6 5.164786 0.000000 2023 +sponsor 0 6 5.164786 0.000000 2133 +soda 0 6 5.164786 0.000000 2189 +groupth 0 5 5.347108 0.000000 2549 +provabl 0 5 5.347108 0.000000 2558 +phil 0 5 5.347108 0.000000 2419 +speaker 0 5 5.347108 0.000000 2370 +stoc 0 5 5.347108 0.000000 2491 +vijaya 0 4 5.568345 0.000000 2677 +dalla 0 4 5.568345 0.000000 2930 +combinator 0 4 5.568345 0.000000 2915 +twice 0 4 5.568345 0.000000 2614 +algorithmsand 0 4 5.568345 0.000000 2680 +warm 0 3 5.857933 0.000000 3904 +plaxton 0 3 5.857933 0.000000 3886 +ramachandran 0 3 5.857933 0.000000 3742 +louisiana 0 3 5.857933 0.000000 3902 +zuckerman 0 3 5.857933 0.000000 3205 +sinica 0 3 5.857933 0.000000 3819 +poon 0 3 5.857933 0.000000 3820 +dozen 0 3 5.857933 0.000000 3905 +gripe 0 3 5.857933 0.000000 3257 +surround 0 3 5.857933 0.000000 3492 +spaa 0 3 5.857933 0.000000 3906 +madhukar 0 2 6.263398 0.000000 5633 +baruah 0 2 6.263398 0.000000 5753 +sheng 0 2 6.263398 0.000000 5153 +ckpoon 0 2 6.263398 0.000000 5510 +rajmohan 0 2 6.263398 0.000000 5706 +rajaraman 0 2 6.263398 0.000000 5704 +rraj 0 2 6.263398 0.000000 5705 +sinha 0 2 6.263398 0.000000 5754 +southwestern 0 2 6.263398 0.000000 5744 +andarchitectur 0 2 6.263398 0.000000 5755 +sanjoi 0 1 6.957497 0.000000 14452 +kelsen 0 1 6.957497 0.000000 14453 +ramgop 0 1 6.957497 0.000000 14454 +suel 0 1 6.957497 0.000000 14455 +yuke 0 1 6.957497 0.000000 14456 +grouput 0 1 6.957497 0.000000 14457 +emba 0 1 6.957497 0.000000 14458 +tsan 0 1 6.957497 0.000000 14459 +tshsu 0 1 6.957497 0.000000 14460 +pierr 0 1 6.957497 0.000000 14461 +korupolu 0 1 6.957497 0.000000 14462 +mackenzi 0 1 6.957497 0.000000 14463 +philmac 0 1 6.957497 0.000000 14464 +idbsu 0 1 6.957497 0.000000 14465 +mettu 0 1 6.957497 0.000000 14466 +santanu 0 1 6.957497 0.000000 14467 +ssinha 0 1 6.957497 0.000000 14468 +torsten 0 1 6.957497 0.000000 14469 +lowvolum 0 1 6.957497 0.000000 14470 +themidsouth 0 1 6.957497 0.000000 14471 +midsouthwest 0 1 6.957497 0.000000 14472 +keynot 0 1 6.957497 0.000000 14473 +atut 0 1 6.957497 0.000000 14474 +organizedanoth 0 1 6.957497 0.000000 14475 +methodist 0 1 6.957497 0.000000 14476 +oklahoma 0 1 6.957497 0.000000 14477 +beheld 0 1 6.957497 0.000000 14478 +algorithmsmail 0 1 6.957497 0.000000 14479 +usuallytri 0 1 6.957497 0.000000 14480 +ofaustin 0 1 6.957497 0.000000 14481 +thatinclud 0 1 6.957497 0.000000 14482 +sponsorsth 0 1 6.957497 0.000000 14483 +interestar 0 1 6.957497 0.000000 14484 +thesigact 0 1 6.957497 0.000000 14485 +eccc 0 1 6.957497 0.000000 14486 +rolodex 0 1 6.957497 0.000000 14487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..fcebfcc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,8 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +phone 1 175 1.791759 1.791759 45 +srinivasan 1 6 5.164786 5.164786 2175 +vaidyaraman 1 2 6.263398 6.263398 5658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..944ed9ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +problem 0 147 1.945910 0.000000 75 +architectur 0 139 1.945910 0.000000 77 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +memori 0 101 2.302585 0.000000 139 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +intellig 0 72 2.639057 0.000000 225 +solv 0 73 2.639057 0.000000 234 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +visual 0 48 3.044522 0.000000 372 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +term 0 43 3.178054 0.000000 411 +represent 0 35 3.401197 0.000000 512 +neural 0 30 3.555348 0.000000 578 +retriev 0 27 3.637586 0.000000 621 +background 0 25 3.737670 0.000000 664 +cognit 0 16 4.174387 0.000000 986 +usavoic 0 13 4.382027 0.000000 1198 +mepost 0 10 4.653960 0.000000 1472 +attent 0 8 4.875197 0.000000 1651 +interestsi 0 7 5.010635 0.000000 1969 +connectionist 0 5 5.347108 0.000000 2430 +howto 0 2 6.263398 0.000000 5761 +vurgun 1 1 6.957497 6.957497 14526 +sengul 0 1 6.957497 0.000000 14527 +sengulvurgun 0 1 6.957497 0.000000 14528 +ammainli 0 1 6.957497 0.000000 14529 +evolutionaryalgorithm 0 1 6.957497 0.000000 14530 +ofprefer 0 1 6.957497 0.000000 14531 +skillacquisit 0 1 6.957497 0.000000 14532 +mindto 0 1 6.957497 0.000000 14533 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..aa440f75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +person 0 111 2.197225 0.000000 117 +server 0 76 2.564949 0.000000 204 +locat 0 59 2.833213 0.000000 303 +chuck 0 14 4.317488 0.000000 1108 +enterpris 0 2 6.263398 0.000000 4839 +walbourn 0 1 6.957497 0.000000 14534 +walbournmi 0 1 6.957497 0.000000 14535 +charybdi 0 1 6.957497 0.000000 14536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..bcf5ae8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +fall 0 181 1.609438 0.000000 40 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +avail 0 169 1.791759 0.000000 48 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +mathemat 0 108 2.197225 0.000000 123 +follow 0 92 2.397895 0.000000 143 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +complet 0 77 2.564949 0.000000 208 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +creat 0 63 2.772589 0.000000 277 +colleg 0 61 2.833213 0.000000 300 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +summer 0 56 2.890372 0.000000 311 +profession 0 51 2.995732 0.000000 345 +appoint 0 49 3.044522 0.000000 358 +math 0 44 3.135494 0.000000 402 +http 0 41 3.218876 0.000000 420 +formal 0 37 3.332205 0.000000 478 +revis 0 26 3.688879 0.000000 640 +period 0 22 3.850148 0.000000 743 +regular 0 17 4.110874 0.000000 929 +photograph 0 15 4.248495 0.000000 1056 +senior 0 14 4.317488 0.000000 1120 +henri 0 10 4.653960 0.000000 1417 +jack 0 8 4.875197 0.000000 1780 +walker 1 3 5.857933 5.857933 3161 +tenur 0 3 5.857933 0.000000 3801 +mackai 0 2 6.263398 0.000000 5762 +grinnel 0 2 6.263398 0.000000 5763 +edua 0 2 6.263398 0.000000 5764 +grin 0 1 6.957497 0.000000 14537 +professorwalk 0 1 6.957497 0.000000 14538 +teachand 0 1 6.957497 0.000000 14539 +atgrinnel 0 1 6.957497 0.000000 14540 +robertson 0 1 6.957497 0.000000 14541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..45998cad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +manag 0 114 2.197225 0.000000 125 +follow 0 92 2.397895 0.000000 143 +school 0 84 2.484907 0.000000 188 +activ 0 84 2.484907 0.000000 182 +appli 0 71 2.639057 0.000000 226 +knowledg 0 67 2.708050 0.000000 243 +laboratori 0 63 2.772589 0.000000 292 +case 0 51 2.995732 0.000000 351 +maintain 0 51 2.995732 0.000000 342 +cool 0 49 3.044522 0.000000 374 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +posit 0 31 3.496508 0.000000 552 +rule 1 26 3.688879 3.688879 638 +mike 0 24 3.761200 0.000000 703 +basi 0 20 3.951244 0.000000 828 +item 0 19 4.007333 0.000000 856 +accept 0 18 4.060443 0.000000 879 +senior 0 14 4.317488 0.000000 1120 +usavoic 0 13 4.382027 0.000000 1198 +modul 0 10 4.653960 0.000000 1434 +mepost 0 10 4.653960 0.000000 1472 +declar 0 9 4.753590 0.000000 1526 +lane 0 8 4.875197 0.000000 1720 +unpublish 0 6 5.164786 0.000000 2226 +mirank 0 5 5.347108 0.000000 2543 +lanc 0 4 5.568345 0.000000 3022 +warshaw 1 2 6.263398 6.263398 5659 +venu 0 2 6.263398 0.000000 5655 +developedat 0 2 6.263398 0.000000 4078 +obermey 0 2 6.263398 0.000000 5657 +warshawlan 0 1 6.957497 0.000000 14542 +laboratoryinvolv 0 1 6.957497 0.000000 14543 +andat 0 1 6.957497 0.000000 14544 +arlut 0 1 6.957497 0.000000 14545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..a2985798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +analysi 0 124 2.079442 0.000000 98 +mathemat 0 108 2.197225 0.000000 123 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +book 0 99 2.302585 0.000000 131 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +java 0 70 2.708050 0.000000 248 +august 0 66 2.708050 0.000000 257 +dept 0 64 2.772589 0.000000 291 +juli 0 60 2.833213 0.000000 305 +unix 0 58 2.890372 0.000000 308 +sampl 0 53 2.944439 0.000000 339 +numer 0 49 3.044522 0.000000 369 +math 0 44 3.135494 0.000000 402 +china 0 37 3.332205 0.000000 487 +expect 0 37 3.332205 0.000000 484 +manual 0 35 3.401197 0.000000 504 +common 0 30 3.555348 0.000000 574 +load 0 28 3.610918 0.000000 601 +chen 0 21 3.912023 0.000000 791 +demo 0 18 4.060443 0.000000 888 +perl 0 11 4.553877 0.000000 1332 +gatewai 0 7 5.010635 0.000000 1942 +fudan 0 3 5.857933 0.000000 3707 +rosett 0 2 6.263398 0.000000 5595 +wchen 0 1 6.957497 0.000000 14546 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..3bdef74a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +memori 0 101 2.302585 0.000000 139 +section 0 94 2.397895 0.000000 149 +orient 0 80 2.564949 0.000000 205 +taylor 0 63 2.772589 0.000000 287 +best 0 59 2.833213 0.000000 299 +paul 0 38 3.295837 0.000000 471 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +reach 0 24 3.761200 0.000000 688 +lead 0 23 3.806662 0.000000 718 +thought 0 17 4.110874 0.000000 945 +wilson 0 9 4.753590 0.000000 1536 +oop 0 8 4.875197 0.000000 1778 +cross 0 8 4.875197 0.000000 1703 +informationi 0 3 5.857933 0.000000 3871 +novelti 0 2 6.263398 0.000000 5765 +ltwilson 0 1 6.957497 0.000000 14547 +headshot 0 1 6.957497 0.000000 14548 +workson 0 1 6.957497 0.000000 14549 +teachingin 0 1 6.957497 0.000000 14550 +sciencesnot 0 1 6.957497 0.000000 14551 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..b4bc3d94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +relat 0 139 1.945910 0.000000 68 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +thing 0 84 2.484907 0.000000 189 +start 0 83 2.484907 0.000000 173 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +come 0 78 2.564949 0.000000 202 +differ 0 66 2.708050 0.000000 253 +think 0 57 2.890372 0.000000 314 +major 0 56 2.890372 0.000000 315 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +without 0 50 3.044522 0.000000 370 +give 0 50 3.044522 0.000000 359 +made 0 44 3.135494 0.000000 398 +howev 0 41 3.218876 0.000000 422 +past 0 42 3.218876 0.000000 428 +live 0 40 3.258097 0.000000 451 +realli 0 40 3.258097 0.000000 444 +mean 0 37 3.332205 0.000000 477 +ofth 0 36 3.367296 0.000000 491 +john 0 33 3.433987 0.000000 532 +abl 0 30 3.555348 0.000000 566 +becom 0 28 3.610918 0.000000 603 +though 0 27 3.637586 0.000000 622 +quit 0 27 3.637586 0.000000 633 +decis 0 23 3.806662 0.000000 728 +lead 0 23 3.806662 0.000000 718 +dai 0 22 3.850148 0.000000 753 +born 0 21 3.912023 0.000000 798 +attend 0 18 4.060443 0.000000 893 +record 0 18 4.060443 0.000000 890 +listen 0 18 4.060443 0.000000 907 +accept 0 18 4.060443 0.000000 879 +thought 0 17 4.110874 0.000000 945 +seek 0 17 4.110874 0.000000 954 +whether 0 17 4.110874 0.000000 918 +whole 0 17 4.110874 0.000000 940 +earli 0 16 4.174387 0.000000 968 +month 0 15 4.248495 0.000000 1025 +enough 0 15 4.248495 0.000000 1040 +becam 0 14 4.317488 0.000000 1117 +hong 0 14 4.317488 0.000000 1105 +decid 0 14 4.317488 0.000000 1075 +believ 0 13 4.382027 0.000000 1187 +came 0 13 4.382027 0.000000 1197 +weak 0 13 4.382027 0.000000 1159 +opportun 0 13 4.382027 0.000000 1161 +count 0 12 4.465908 0.000000 1239 +true 0 10 4.653960 0.000000 1422 +reli 0 10 4.653960 0.000000 1411 +strength 0 9 4.753590 0.000000 1494 +kong 0 9 4.753590 0.000000 1602 +clear 0 9 4.753590 0.000000 1488 +trust 0 9 4.753590 0.000000 1583 +said 0 9 4.753590 0.000000 1571 +matter 0 8 4.875197 0.000000 1627 +realiz 0 8 4.875197 0.000000 1739 +christian 1 7 5.010635 5.010635 1949 +therefor 0 7 5.010635 0.000000 1822 +wrong 0 6 5.164786 0.000000 2025 +matthew 0 6 5.164786 0.000000 2193 +church 0 4 5.568345 0.000000 3011 +jesu 0 3 5.857933 0.000000 3624 +faith 0 3 5.857933 0.000000 3363 +bibl 0 3 5.857933 0.000000 3143 +credibl 0 3 5.857933 0.000000 3210 +shouldb 0 3 5.857933 0.000000 3673 +theywil 0 3 5.857933 0.000000 3102 +doubt 0 3 5.857933 0.000000 3119 +nota 0 3 5.857933 0.000000 3785 +holi 0 2 6.263398 0.000000 5711 +christ 0 2 6.263398 0.000000 5766 +arthur 0 2 6.263398 0.000000 5767 +religi 0 2 6.263398 0.000000 4816 +intent 0 2 6.263398 0.000000 5768 +stumbl 0 2 6.263398 0.000000 5349 +hei 0 2 6.263398 0.000000 5769 +forgiv 0 2 6.263398 0.000000 5770 +andto 0 2 6.263398 0.000000 5771 +differencebetween 0 2 6.263398 0.000000 5431 +deed 0 2 6.263398 0.000000 5077 +wedo 0 2 6.263398 0.000000 5772 +sick 0 2 6.263398 0.000000 5773 +ought 0 2 6.263398 0.000000 5365 +hesit 0 2 6.263398 0.000000 5774 +sin 0 1 6.957497 0.000000 14552 +cent 0 1 6.957497 0.000000 14553 +christiani 0 1 6.957497 0.000000 14554 +alittl 0 1 6.957497 0.000000 14555 +totallyunexpect 0 1 6.957497 0.000000 14556 +compulsori 0 1 6.957497 0.000000 14557 +thechristian 0 1 6.957497 0.000000 14558 +tobecom 0 1 6.957497 0.000000 14559 +slife 0 1 6.957497 0.000000 14560 +deepli 0 1 6.957497 0.000000 14561 +mylif 0 1 6.957497 0.000000 14562 +misconcept 0 1 6.957497 0.000000 14563 +christianwa 0 1 6.957497 0.000000 14564 +christianand 0 1 6.957497 0.000000 14565 +lovedeveri 0 1 6.957497 0.000000 14566 +achristian 0 1 6.957497 0.000000 14567 +virtuou 0 1 6.957497 0.000000 14568 +thefellowship 0 1 6.957497 0.000000 14569 +flesh 0 1 6.957497 0.000000 14570 +sinless 0 1 6.957497 0.000000 14571 +sympath 0 1 6.957497 0.000000 14572 +weconfess 0 1 6.957497 0.000000 14573 +cleans 0 1 6.957497 0.000000 14574 +unright 0 1 6.957497 0.000000 14575 +astheir 0 1 6.957497 0.000000 14576 +saviour 0 1 6.957497 0.000000 14577 +gratefulli 0 1 6.957497 0.000000 14578 +redempt 0 1 6.957497 0.000000 14579 +fortheir 0 1 6.957497 0.000000 14580 +justifi 0 1 6.957497 0.000000 14581 +roman 0 1 6.957497 0.000000 14582 +thecontrari 0 1 6.957497 0.000000 14583 +givesu 0 1 6.957497 0.000000 14584 +physician 0 1 6.957497 0.000000 14585 +onour 0 1 6.957497 0.000000 14586 +thetruth 0 1 6.957497 0.000000 14587 +thankgod 0 1 6.957497 0.000000 14588 +wkmak 0 1 6.957497 0.000000 14589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..cf7ecc58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +network 0 168 1.791759 0.000000 61 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +topic 0 114 2.197225 0.000000 110 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +well 0 109 2.197225 0.000000 121 +user 0 104 2.302585 0.000000 137 +comment 0 93 2.397895 0.000000 146 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +homepag 0 93 2.397895 0.000000 148 +associ 0 93 2.397895 0.000000 151 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +educ 0 86 2.484907 0.000000 191 +control 0 82 2.484907 0.000000 164 +member 0 84 2.484907 0.000000 165 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +intellig 0 72 2.639057 0.000000 225 +onlin 0 75 2.639057 0.000000 223 +multimedia 0 68 2.708050 0.000000 258 +artifici 0 63 2.772589 0.000000 280 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +explor 0 58 2.890372 0.000000 324 +reason 0 57 2.890372 0.000000 318 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +approach 0 48 3.044522 0.000000 366 +frequent 0 49 3.044522 0.000000 367 +physic 0 47 3.091042 0.000000 377 +vision 0 41 3.218876 0.000000 430 +fast 0 42 3.218876 0.000000 429 +movi 0 40 3.258097 0.000000 459 +robot 1 36 3.367296 3.367296 497 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +rang 0 30 3.555348 0.000000 565 +neural 0 30 3.555348 0.000000 578 +common 0 30 3.555348 0.000000 574 +built 0 29 3.583519 0.000000 592 +semant 0 29 3.583519 0.000000 587 +art 0 29 3.583519 0.000000 593 +ask 0 28 3.610918 0.000000 597 +manipul 0 27 3.637586 0.000000 624 +doctor 0 24 3.761200 0.000000 709 +yahoo 0 24 3.761200 0.000000 707 +mobil 0 23 3.806662 0.000000 730 +miscellan 0 23 3.806662 0.000000 731 +hierarchi 0 22 3.850148 0.000000 744 +navig 0 21 3.912023 0.000000 796 +unit 0 21 3.912023 0.000000 779 +love 0 21 3.912023 0.000000 804 +offici 0 18 4.060443 0.000000 894 +spatial 0 16 4.174387 0.000000 988 +commerci 0 16 4.174387 0.000000 1005 +remot 0 15 4.248495 0.000000 1041 +embed 0 14 4.317488 0.000000 1102 +hotlist 0 13 4.382027 0.000000 1199 +primarili 0 13 4.382027 0.000000 1185 +qualit 0 11 4.553877 0.000000 1362 +player 0 11 4.553877 0.000000 1371 +hello 0 10 4.653960 0.000000 1407 +catalog 0 10 4.653960 0.000000 1431 +meta 0 9 4.753590 0.000000 1505 +ring 0 8 4.875197 0.000000 1684 +guitar 0 8 4.875197 0.000000 1758 +autonom 0 8 4.875197 0.000000 1749 +sensor 0 7 5.010635 0.000000 1920 +spot 0 7 5.010635 0.000000 1894 +usenet 0 7 5.010635 0.000000 1839 +race 0 5 5.347108 0.000000 2417 +car 0 4 5.568345 0.000000 2931 +worki 0 4 5.568345 0.000000 3010 +fora 0 4 5.568345 0.000000 2697 +ncsa 0 4 5.568345 0.000000 2767 +motor 0 3 5.857933 0.000000 3909 +badminton 0 2 6.263398 0.000000 5221 +martial 0 2 6.263398 0.000000 5004 +worm 0 2 6.263398 0.000000 5775 +eduperson 0 2 6.263398 0.000000 5776 +ultrason 0 1 6.957497 0.000000 14590 +rhino 0 1 6.957497 0.000000 14591 +robokreta 0 1 6.957497 0.000000 14592 +wyle 0 1 6.957497 0.000000 14593 +clarinet 0 1 6.957497 0.000000 14594 +mobilerobot 0 1 6.957497 0.000000 14595 +rover 0 1 6.957497 0.000000 14596 +tall 0 1 6.957497 0.000000 14597 +robocac 0 1 6.957497 0.000000 14598 +robofest 0 1 6.957497 0.000000 14599 +besar 0 1 6.957497 0.000000 14600 +kicik 0 1 6.957497 0.000000 14601 +chassi 0 1 6.957497 0.000000 14602 +andqualit 0 1 6.957497 0.000000 14603 +malaysia 0 1 6.957497 0.000000 14604 +interestsavid 0 1 6.957497 0.000000 14605 +usba 0 1 6.957497 0.000000 14606 +miscellaneousinterest 0 1 6.957497 0.000000 14607 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..3b92530e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +west 0 83 2.484907 0.000000 192 +xfeng 0 2 6.263398 0.000000 4376 +qaustin 0 1 6.957497 0.000000 14608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..2b8b6784 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +search 0 95 2.397895 0.000000 155 +present 0 91 2.397895 0.000000 145 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +multimedia 0 68 2.708050 0.000000 258 +creat 0 63 2.772589 0.000000 277 +major 0 56 2.890372 0.000000 315 +visual 0 48 3.044522 0.000000 372 +right 0 48 3.044522 0.000000 363 +frequent 0 49 3.044522 0.000000 367 +pointer 0 48 3.044522 0.000000 368 +around 0 43 3.178054 0.000000 415 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +china 0 37 3.332205 0.000000 487 +soon 0 36 3.367296 0.000000 494 +express 0 32 3.465736 0.000000 540 +travel 0 30 3.555348 0.000000 579 +hard 0 30 3.555348 0.000000 563 +american 0 27 3.637586 0.000000 634 +sport 0 25 3.737670 0.000000 683 +head 0 23 3.806662 0.000000 732 +watch 0 21 3.912023 0.000000 789 +unit 0 21 3.912023 0.000000 779 +goe 0 15 4.248495 0.000000 1044 +club 0 15 4.248495 0.000000 1058 +classic 0 14 4.317488 0.000000 1084 +audio 0 14 4.317488 0.000000 1094 +rank 0 14 4.317488 0.000000 1063 +hopefulli 0 14 4.317488 0.000000 1071 +pretti 0 13 4.382027 0.000000 1191 +walk 0 12 4.465908 0.000000 1281 +newspap 0 12 4.465908 0.000000 1280 +string 0 11 4.553877 0.000000 1340 +keyword 0 11 4.553877 0.000000 1356 +vista 0 10 4.653960 0.000000 1452 +card 0 10 4.653960 0.000000 1435 +hang 0 9 4.753590 0.000000 1499 +soccer 0 8 4.875197 0.000000 1752 +surpris 0 7 5.010635 0.000000 1828 +harrick 0 7 5.010635 0.000000 1849 +photographi 0 6 5.164786 0.000000 2146 +infoseek 0 6 5.164786 0.000000 2188 +financi 0 6 5.164786 0.000000 2197 +atlant 0 5 5.347108 0.000000 2508 +alta 0 4 5.568345 0.000000 3039 +leagu 0 4 5.568345 0.000000 3040 +aswel 0 3 5.857933 0.000000 3286 +serious 0 3 5.857933 0.000000 3663 +monthli 0 3 5.857933 0.000000 3910 +imagin 0 2 6.263398 0.000000 5472 +clearer 0 2 6.263398 0.000000 5676 +marvel 0 2 6.263398 0.000000 5400 +morn 0 2 6.263398 0.000000 5162 +xingang 0 1 6.957497 0.000000 14609 +delight 0 1 6.957497 0.000000 14610 +temporaryresort 0 1 6.957497 0.000000 14611 +llgradual 0 1 6.957497 0.000000 14612 +havesometh 0 1 6.957497 0.000000 14613 +foliag 0 1 6.957497 0.000000 14614 +miata 0 1 6.957497 0.000000 14615 +xguo 0 1 6.957497 0.000000 14616 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..dbad7cfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +address 1 170 1.791759 1.791759 62 +geoffrei 1 3 5.857933 5.857933 3505 +pagemov 1 1 6.957497 6.957497 14617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..c0455c5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +yang 0 8 4.875197 0.000000 1652 +alert 0 5 5.347108 0.000000 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..710dde8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +first 0 140 1.945910 0.000000 71 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +make 0 111 2.197225 0.000000 120 +sinc 0 90 2.397895 0.000000 159 +homepag 0 93 2.397895 0.000000 148 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +street 0 63 2.772589 0.000000 293 +still 0 50 3.044522 0.000000 362 +life 0 50 3.044522 0.000000 375 +get 0 46 3.091042 0.000000 380 +favorit 0 44 3.135494 0.000000 410 +must 0 40 3.258097 0.000000 442 +word 0 34 3.401197 0.000000 508 +quot 0 29 3.583519 0.000000 582 +art 0 29 3.583519 0.000000 593 +hope 0 28 3.610918 0.000000 610 +utc 1 27 3.637586 3.637586 629 +team 0 27 3.637586 0.000000 625 +daili 0 24 3.761200 0.000000 706 +busi 0 21 3.912023 0.000000 784 +tell 0 21 3.912023 0.000000 777 +beauti 0 18 4.060443 0.000000 912 +sept 0 17 4.110874 0.000000 952 +took 0 16 4.174387 0.000000 1010 +classic 0 14 4.317488 0.000000 1084 +hong 0 14 4.317488 0.000000 1105 +stori 0 14 4.317488 0.000000 1087 +introduc 0 13 4.382027 0.000000 1139 +outsid 0 12 4.465908 0.000000 1219 +franc 0 12 4.465908 0.000000 1276 +hello 0 10 4.653960 0.000000 1407 +rich 0 10 4.653960 0.000000 1396 +label 0 10 4.653960 0.000000 1423 +establish 0 9 4.753590 0.000000 1532 +kong 0 9 4.753590 0.000000 1602 +french 0 9 4.753590 0.000000 1511 +guitar 1 8 4.875197 4.875197 1758 +grew 0 8 4.875197 0.000000 1742 +corner 0 7 5.010635 0.000000 1909 +footbal 0 7 5.010635 0.000000 1912 +whatev 0 6 5.164786 0.000000 2097 +artist 0 6 5.164786 0.000000 2127 +seriou 0 5 5.347108 0.000000 2252 +christoph 0 5 5.347108 0.000000 2512 +festiv 0 4 5.568345 0.000000 2952 +tire 0 4 5.568345 0.000000 2799 +align 0 4 5.568345 0.000000 2863 +concert 0 3 5.857933 0.000000 3533 +byth 0 3 5.857933 0.000000 3874 +jesu 0 3 5.857933 0.000000 3624 +passion 0 3 5.857933 0.000000 3633 +michel 0 3 5.857933 0.000000 3791 +medit 0 2 6.263398 0.000000 5777 +retir 0 2 6.263398 0.000000 5674 +christ 0 2 6.263398 0.000000 5766 +wwwdavid 0 1 6.957497 0.000000 14618 +assad 0 1 6.957497 0.000000 14619 +brothersin 0 1 6.957497 0.000000 14620 +parkeningi 0 1 6.957497 0.000000 14621 +guitarist 0 1 6.957497 0.000000 14622 +ofconcert 0 1 6.957497 0.000000 14623 +reconcili 0 1 6.957497 0.000000 14624 +rekindl 0 1 6.957497 0.000000 14625 +theamsterdam 0 1 6.957497 0.000000 14626 +trio 0 1 6.957497 0.000000 14627 +flair 0 1 6.957497 0.000000 14628 +platini 0 1 6.957497 0.000000 14629 +magazinepublish 0 1 6.957497 0.000000 14630 +minist 0 1 6.957497 0.000000 14631 +absmiddl 0 1 6.957497 0.000000 14632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..564184fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +yanbin 0 2 6.263398 0.000000 5599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..a2f0f3bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +pleas 1 113 2.197225 2.197225 114 +browser 1 56 2.890372 2.890372 313 +continu 1 39 3.258097 3.258097 448 +oop 1 8 4.875197 4.875197 1778 +yuan 1 3 5.857933 5.857933 3653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..8f59dd68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +number 0 130 2.079442 0.000000 97 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +sinc 0 90 2.397895 0.000000 159 +state 0 76 2.564949 0.000000 207 +servic 0 72 2.639057 0.000000 236 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +china 0 37 3.332205 0.000000 487 +travel 0 30 3.555348 0.000000 579 +postal 0 30 3.555348 0.000000 580 +unit 0 21 3.912023 0.000000 779 +beij 0 19 4.007333 0.000000 876 +beauti 0 18 4.060443 0.000000 912 +wife 0 13 4.382027 0.000000 1196 +tsinghua 0 13 4.382027 0.000000 1195 +stai 0 12 4.465908 0.000000 1215 +jersei 0 9 4.753590 0.000000 1587 +heavi 0 7 5.010635 0.000000 1841 +river 0 6 5.164786 0.000000 2220 +yong 1 4 5.568345 5.568345 2809 +rutger 0 3 5.857933 0.000000 3566 +brunswick 0 3 5.857933 0.000000 3567 +settl 0 2 6.263398 0.000000 5778 +homepageto 0 1 6.957497 0.000000 14633 +homepagey 0 1 6.957497 0.000000 14634 +milanitalian 0 1 6.957497 0.000000 14635 +soccerk 0 1 6.957497 0.000000 14636 +soccernba 0 1 6.957497 0.000000 14637 +sitefox 0 1 6.957497 0.000000 14638 +sportschicago 0 1 6.957497 0.000000 14639 +bullsmichael 0 1 6.957497 0.000000 14640 +jordannflnhlc 0 1 6.957497 0.000000 14641 +rankingmarri 0 1 6.957497 0.000000 14642 +childrenseinfeldcomput 0 1 6.957497 0.000000 14643 +sciencesutilitieshtml 0 1 6.957497 0.000000 14644 +convertersimag 0 1 6.957497 0.000000 14645 +collectionssystemshtmllatexcgitcl 0 1 6.957497 0.000000 14646 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 0 1 6.957497 0.000000 14647 +tmiscinternet 0 1 6.957497 0.000000 14648 +parcel 0 1 6.957497 0.000000 14649 +usp 0 1 6.957497 0.000000 14650 +fedexus 0 1 6.957497 0.000000 14651 +guidefun 0 1 6.957497 0.000000 14652 +todayu 0 1 6.957497 0.000000 14653 +newsstarwavesupermodel 0 1 6.957497 0.000000 14654 +yonglu 0 1 6.957497 0.000000 14655 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..459276c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +analysi 0 124 2.079442 0.000000 98 +high 0 130 2.079442 0.000000 101 +mathemat 0 108 2.197225 0.000000 123 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +associ 0 93 2.397895 0.000000 151 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +larg 0 82 2.484907 0.000000 168 +solut 0 82 2.484907 0.000000 162 +journal 0 83 2.484907 0.000000 183 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +level 0 87 2.484907 0.000000 180 +method 1 80 2.564949 2.564949 213 +david 0 71 2.639057 0.000000 232 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +differ 0 66 2.708050 0.000000 253 +degre 0 69 2.708050 0.000000 259 +previou 0 62 2.772589 0.000000 290 +share 0 59 2.833213 0.000000 304 +sever 0 56 2.890372 0.000000 322 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +algebra 0 45 3.135494 0.000000 394 +linear 1 41 3.218876 3.218876 431 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +press 0 42 3.218876 0.000000 419 +procedur 0 36 3.367296 0.000000 488 +award 0 34 3.401197 0.000000 523 +committe 0 34 3.401197 0.000000 522 +next 0 34 3.401197 0.000000 517 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +profil 0 30 3.555348 0.000000 581 +chair 0 29 3.583519 0.000000 596 +focus 0 29 3.583519 0.000000 584 +packag 0 28 3.610918 0.000000 614 +american 0 27 3.637586 0.000000 634 +supercomput 0 25 3.737670 0.000000 681 +fellow 0 24 3.761200 0.000000 701 +equat 0 23 3.806662 0.000000 724 +honor 0 23 3.806662 0.000000 729 +variabl 0 23 3.806662 0.000000 715 +director 0 22 3.850148 0.000000 767 +siam 0 21 3.912023 0.000000 800 +smith 0 20 3.951244 0.000000 820 +partial 0 18 4.060443 0.000000 900 +differenti 0 17 4.110874 0.000000 921 +young 1 16 4.174387 4.174387 991 +spars 0 16 4.174387 0.000000 989 +vector 0 16 4.174387 0.000000 961 +contribut 0 15 4.248495 0.000000 1021 +researchmi 0 14 4.317488 0.000000 1119 +finit 0 14 4.317488 0.000000 1106 +polynomi 0 14 4.317488 0.000000 1069 +iter 1 12 4.465908 4.465908 1206 +matric 0 10 4.653960 0.000000 1399 +suitabl 0 9 4.753590 0.000000 1486 +carei 0 8 4.875197 0.000000 1781 +harvard 0 7 5.010635 0.000000 1926 +converg 0 7 5.010635 0.000000 1844 +solver 0 7 5.010635 0.000000 1911 +outstand 0 6 5.164786 0.000000 2136 +argonn 0 5 5.347108 0.000000 2461 +singapor 0 5 5.347108 0.000000 2487 +ration 0 5 5.347108 0.000000 2427 +minneapoli 0 5 5.347108 0.000000 2480 +crai 0 4 5.568345 0.000000 3012 +naval 0 4 5.568345 0.000000 2920 +rapidli 0 4 5.568345 0.000000 2850 +graham 0 4 5.568345 0.000000 2817 +wilei 0 4 5.568345 0.000000 2669 +truste 0 3 5.857933 0.000000 3900 +stationari 0 3 5.857933 0.000000 3861 +kincaid 0 2 6.263398 0.000000 5617 +ofmathemat 0 2 6.263398 0.000000 4167 +interestnumer 0 2 6.263398 0.000000 5717 +methodsand 0 2 6.263398 0.000000 5779 +itpack 0 2 6.263398 0.000000 5619 +rassia 0 2 6.263398 0.000000 5620 +omega 0 2 6.263398 0.000000 4368 +pde 0 2 6.263398 0.000000 4505 +youngashbel 0 1 6.957497 0.000000 14656 +webb 0 1 6.957497 0.000000 14657 +issueded 0 1 6.957497 0.000000 14658 +mathematicalsocieti 0 1 6.957497 0.000000 14659 +matrixappl 0 1 6.957497 0.000000 14660 +numericallinear 0 1 6.957497 0.000000 14661 +partialdifferenti 0 1 6.957497 0.000000 14662 +oflinear 0 1 6.957497 0.000000 14663 +andspars 0 1 6.957497 0.000000 14664 +basedon 0 1 6.957497 0.000000 14665 +beingextend 0 1 6.957497 0.000000 14666 +distributedmemori 0 1 6.957497 0.000000 14667 +methodsbas 0 1 6.957497 0.000000 14668 +multilevel 0 1 6.957497 0.000000 14669 +beingdevelop 0 1 6.957497 0.000000 14670 +publicationsd 0 1 6.957497 0.000000 14671 +srivasiava 0 1 6.957497 0.000000 14672 +yanushauska 0 1 6.957497 0.000000 14673 +publ 0 1 6.957497 0.000000 14674 +vona 0 1 6.957497 0.000000 14675 +sepehrnoori 0 1 6.957497 0.000000 14676 +son 0 1 6.957497 0.000000 14677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..32683464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +report 0 131 2.079442 0.000000 92 +welcom 0 122 2.079442 0.000000 99 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +find 0 111 2.197225 0.000000 111 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +comment 0 93 2.397895 0.000000 146 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +master 0 76 2.564949 0.000000 216 +html 0 75 2.639057 0.000000 235 +summari 0 73 2.639057 0.000000 237 +august 0 66 2.708050 0.000000 257 +differ 0 66 2.708050 0.000000 253 +organ 0 65 2.772589 0.000000 265 +written 0 63 2.772589 0.000000 278 +prof 0 64 2.772589 0.000000 273 +interact 0 62 2.772589 0.000000 270 +function 0 62 2.772589 0.000000 275 +dept 0 64 2.772589 0.000000 291 +septemb 0 65 2.772589 0.000000 274 +thesi 0 57 2.890372 0.000000 327 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +visitor 0 49 3.044522 0.000000 371 +featur 0 46 3.091042 0.000000 386 +electron 0 47 3.091042 0.000000 379 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +press 0 42 3.218876 0.000000 419 +multipl 0 39 3.258097 0.000000 453 +map 0 39 3.258097 0.000000 452 +hand 0 37 3.332205 0.000000 475 +connect 0 37 3.332205 0.000000 485 +within 0 33 3.433987 0.000000 525 +ad 0 32 3.465736 0.000000 544 +extend 0 32 3.465736 0.000000 539 +photo 0 31 3.496508 0.000000 561 +neural 0 30 3.555348 0.000000 578 +synchron 0 29 3.583519 0.000000 588 +utc 0 27 3.637586 0.000000 629 +repres 0 26 3.688879 0.000000 656 +recognit 0 23 3.806662 0.000000 723 +self 0 22 3.850148 0.000000 761 +newsgroup 0 21 3.912023 0.000000 783 +event 0 18 4.060443 0.000000 896 +interconnect 0 17 4.110874 0.000000 937 +segment 0 17 4.110874 0.000000 931 +outlin 0 17 4.110874 0.000000 914 +cambridg 0 16 4.174387 0.000000 1008 +later 0 15 4.248495 0.000000 1043 +total 0 10 4.653960 0.000000 1398 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +isbn 0 7 5.010635 0.000000 1901 +bunch 0 7 5.010635 0.000000 1861 +joseph 0 5 5.347108 0.000000 2327 +korea 0 4 5.568345 0.000000 2971 +cortex 0 3 5.857933 0.000000 3856 +seoul 0 3 5.857933 0.000000 3783 +cortic 0 3 5.857933 0.000000 3857 +neuron 0 3 5.857933 0.000000 3798 +yoonsuck 1 2 6.263398 6.263398 4177 +choe 1 2 6.263398 6.263398 4178 +lissom 0 2 6.263398 0.000000 5605 +sirosh 0 2 6.263398 0.000000 5609 +yschoe 0 2 6.263398 0.000000 4179 +touretzki 0 2 6.263398 0.000000 4428 +spike 0 1 6.957497 0.000000 14678 +yonsei 0 1 6.957497 0.000000 14679 +systembas 0 1 6.957497 0.000000 14680 +laterali 0 1 6.957497 0.000000 14681 +synerget 0 1 6.957497 0.000000 14682 +actualspik 0 1 6.957497 0.000000 14683 +slissom 0 1 6.957497 0.000000 14684 +beself 0 1 6.957497 0.000000 14685 +retinabi 0 1 6.957497 0.000000 14686 +desynchron 0 1 6.957497 0.000000 14687 +mozer 0 1 6.957497 0.000000 14688 +hasselmo 0 1 6.957497 0.000000 14689 +handwritten 0 1 6.957497 0.000000 14690 +techic 0 1 6.957497 0.000000 14691 +unord 0 1 6.957497 0.000000 14692 +interestingcontact 0 1 6.957497 0.000000 14693 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..c1311939 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +object 0 138 1.945910 0.000000 79 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +mathemat 0 108 2.197225 0.000000 123 +select 0 91 2.397895 0.000000 154 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +interfac 0 79 2.564949 0.000000 209 +logic 0 71 2.639057 0.000000 230 +java 0 70 2.708050 0.000000 248 +previou 0 62 2.772589 0.000000 290 +prof 0 64 2.772589 0.000000 273 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +tabl 0 51 2.995732 0.000000 346 +china 1 37 3.332205 3.332205 487 +common 0 30 3.555348 0.000000 574 +chines 0 29 3.583519 0.000000 595 +client 0 25 3.737670 0.000000 679 +yahoo 0 24 3.761200 0.000000 707 +famili 0 23 3.806662 0.000000 735 +alumni 0 21 3.912023 0.000000 807 +wang 0 21 3.912023 0.000000 790 +chen 0 21 3.912023 0.000000 791 +break 0 20 3.951244 0.000000 812 +synthesi 0 20 3.951244 0.000000 834 +beij 0 19 4.007333 0.000000 876 +brown 0 16 4.174387 0.000000 977 +universityof 0 15 4.248495 0.000000 1061 +hong 0 14 4.317488 0.000000 1105 +nick 0 13 4.382027 0.000000 1180 +perl 0 11 4.553877 0.000000 1332 +engr 0 10 4.653960 0.000000 1427 +academi 0 8 4.875197 0.000000 1735 +yang 0 8 4.875197 0.000000 1652 +gatewai 0 7 5.010635 0.000000 1942 +munich 0 3 5.857933 0.000000 3570 +yuan 0 3 5.857933 0.000000 3653 +manchest 0 2 6.263398 0.000000 4828 +addresspictur 0 2 6.263398 0.000000 5584 +schedulec 0 2 6.263398 0.000000 4190 +gang 0 2 6.263398 0.000000 4530 +yuanj 0 1 6.957497 0.000000 14694 +xuwint 0 1 6.957497 0.000000 14695 +aziz 0 1 6.957497 0.000000 14696 +pagechines 0 1 6.957497 0.000000 14697 +hefei 0 1 6.957497 0.000000 14698 +chinaunivers 0 1 6.957497 0.000000 14699 +atmunich 0 1 6.957497 0.000000 14700 +germanyunivers 0 1 6.957497 0.000000 14701 +higham 0 1 6.957497 0.000000 14702 +lifan 0 1 6.957497 0.000000 14703 +guizhongustc 0 1 6.957497 0.000000 14704 +hailiang 0 1 6.957497 0.000000 14705 +yuhongfriend 0 1 6.957497 0.000000 14706 +linsoftwar 0 1 6.957497 0.000000 14707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..ec16bb8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +last 0 314 1.098612 0.000000 14 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +number 0 130 2.079442 0.000000 97 +sinc 0 90 2.397895 0.000000 159 +visitor 0 49 3.044522 0.000000 371 +ring 0 8 4.875197 0.000000 1684 +edufing 0 4 5.568345 0.000000 2713 +feng 1 3 5.857933 5.857933 3300 +yufeng 0 1 6.957497 0.000000 14708 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..58ebe430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +spring 0 131 2.079442 0.000000 88 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +peopl 0 96 2.302585 0.000000 132 +associ 0 93 2.397895 0.000000 151 +sinc 0 90 2.397895 0.000000 159 +wide 0 84 2.484907 0.000000 185 +info 0 85 2.484907 0.000000 176 +resum 0 79 2.564949 0.000000 217 +master 0 76 2.564949 0.000000 216 +view 0 70 2.708050 0.000000 254 +differ 0 66 2.708050 0.000000 253 +creat 0 63 2.772589 0.000000 277 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +still 0 50 3.044522 0.000000 362 +visitor 0 49 3.044522 0.000000 371 +china 1 37 3.332205 3.332205 487 +copyright 0 36 3.367296 0.000000 495 +john 0 33 3.433987 0.000000 532 +kind 0 32 3.465736 0.000000 541 +anim 0 31 3.496508 0.000000 557 +chines 0 29 3.583519 0.000000 595 +full 0 28 3.610918 0.000000 615 +thank 0 23 3.806662 0.000000 721 +chen 0 21 3.912023 0.000000 791 +els 0 19 4.007333 0.000000 843 +seek 0 17 4.110874 0.000000 954 +misc 0 13 4.382027 0.000000 1124 +calculu 0 12 4.465908 0.000000 1203 +counter 0 8 4.875197 0.000000 1765 +accord 0 7 5.010635 0.000000 1826 +republ 0 4 5.568345 0.000000 3032 +zhongshan 0 2 6.263398 0.000000 5547 +chenabout 0 2 6.263398 0.000000 5499 +postcript 0 2 6.263398 0.000000 4050 +zodiac 0 2 6.263398 0.000000 5729 +burton 0 2 6.263398 0.000000 5759 +zhii 0 1 6.957497 0.000000 14709 +guangzhou 0 1 6.957497 0.000000 14710 +pagezhii 0 1 6.957497 0.000000 14711 +mefrom 0 1 6.957497 0.000000 14712 +canton 0 1 6.957497 0.000000 14713 +dong 0 1 6.957497 0.000000 14714 +zchen 0 1 6.957497 0.000000 14715 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..dffbadc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +process 0 142 1.945910 0.000000 72 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +world 0 115 2.197225 0.000000 126 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +internet 0 83 2.484907 0.000000 186 +educ 0 86 2.484907 0.000000 191 +ieee 0 86 2.484907 0.000000 190 +librari 0 87 2.484907 0.000000 181 +build 0 85 2.484907 0.000000 184 +onlin 0 75 2.639057 0.000000 223 +html 0 75 2.639057 0.000000 235 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +guid 0 63 2.772589 0.000000 267 +taylor 0 63 2.772589 0.000000 287 +room 0 59 2.833213 0.000000 301 +life 0 50 3.044522 0.000000 375 +directori 0 45 3.135494 0.000000 396 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +movi 0 40 3.258097 0.000000 459 +societi 0 40 3.258097 0.000000 456 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +magazin 0 24 3.761200 0.000000 704 +daili 0 24 3.761200 0.000000 706 +yahoo 0 24 3.761200 0.000000 707 +beij 0 19 4.007333 0.000000 876 +lyco 0 19 4.007333 0.000000 871 +beauti 0 18 4.060443 0.000000 912 +sept 0 17 4.110874 0.000000 952 +script 0 13 4.382027 0.000000 1171 +land 0 12 4.465908 0.000000 1273 +entertain 0 12 4.465908 0.000000 1286 +career 0 12 4.465908 0.000000 1287 +sciencesat 0 7 5.010635 0.000000 1968 +digest 0 7 5.010635 0.000000 1864 +zhou 0 6 5.164786 0.000000 2092 +assistantship 0 3 5.857933 0.000000 3660 +stamp 0 3 5.857933 0.000000 3678 +giant 0 3 5.857933 0.000000 3137 +galaxi 0 3 5.857933 0.000000 3603 +kaleidoscop 0 2 6.263398 0.000000 5780 +zhai 0 2 6.263398 0.000000 5709 +zhouxiao 0 1 6.957497 0.000000 14716 +maggi 0 1 6.957497 0.000000 14717 +xiao 0 1 6.957497 0.000000 14718 +buaa 0 1 6.957497 0.000000 14719 +newspag 0 1 6.957497 0.000000 14720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..7baf6c31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +document 1 121 2.079442 2.079442 89 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +dynam 1 76 2.564949 2.564949 194 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +best 0 59 2.833213 0.000000 299 +maintain 0 51 2.995732 0.000000 342 +friend 0 48 3.044522 0.000000 376 +visitor 0 49 3.044522 0.000000 371 +netscap 0 44 3.135494 0.000000 395 +anoth 0 45 3.135494 0.000000 408 +live 0 40 3.258097 0.000000 451 +china 0 37 3.332205 0.000000 487 +titl 0 31 3.496508 0.000000 556 +travel 0 30 3.555348 0.000000 579 +enjoi 0 26 3.688879 0.000000 660 +inth 0 22 3.850148 0.000000 741 +born 0 21 3.912023 0.000000 798 +alumni 0 21 3.912023 0.000000 807 +beij 0 19 4.007333 0.000000 876 +citi 0 19 4.007333 0.000000 874 +lot 0 18 4.060443 0.000000 889 +former 0 17 4.110874 0.000000 956 +bachelor 0 17 4.110874 0.000000 957 +danc 0 12 4.465908 0.000000 1278 +classmat 0 9 4.753590 0.000000 1516 +capit 0 7 5.010635 0.000000 1957 +peek 0 6 5.164786 0.000000 2169 +peke 0 5 5.347108 0.000000 2539 +valuabl 0 5 5.347108 0.000000 2256 +qing 0 3 5.857933 0.000000 3295 +ofmi 0 3 5.857933 0.000000 3911 +sciencesaustin 0 3 5.857933 0.000000 3828 +oversea 0 2 6.263398 0.000000 5781 +informationand 0 2 6.263398 0.000000 4840 +clike 0 1 6.957497 0.000000 14721 +qinghi 0 1 6.957497 0.000000 14722 +scinc 0 1 6.957497 0.000000 14723 +findmor 0 1 6.957497 0.000000 14724 +pekingunivers 0 1 6.957497 0.000000 14725 +professionalinternetpc 0 1 6.957497 0.000000 14726 +relatedmac 0 1 6.957497 0.000000 14727 +relatedaft 0 1 6.957497 0.000000 14728 +worknetscap 0 1 6.957497 0.000000 14729 +testtwin 0 1 6.957497 0.000000 14730 +eldertwin 0 1 6.957497 0.000000 14731 +youngernetscap 0 1 6.957497 0.000000 14732 +testanim 0 1 6.957497 0.000000 14733 +titledanc 0 1 6.957497 0.000000 14734 +testanoth 0 1 6.957497 0.000000 14735 +testyet 0 1 6.957497 0.000000 14736 +testfriendsthi 0 1 6.957497 0.000000 14737 +xiaohai 0 1 6.957497 0.000000 14738 +shan 0 1 6.957497 0.000000 14739 +shinan 0 1 6.957497 0.000000 14740 +qingunivers 0 1 6.957497 0.000000 14741 +zhuqe 0 1 6.957497 0.000000 14742 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..c0d7923e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +technolog 0 131 2.079442 0.000000 102 +seattl 0 120 2.079442 0.000000 103 +intern 0 108 2.197225 0.000000 128 +peopl 0 96 2.302585 0.000000 132 +comment 0 93 2.397895 0.000000 146 +educ 0 86 2.484907 0.000000 191 +refer 0 78 2.564949 0.000000 203 +nation 0 74 2.639057 0.000000 240 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +laboratori 0 63 2.772589 0.000000 292 +faculti 0 56 2.890372 0.000000 325 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +visitor 0 49 3.044522 0.000000 371 +live 0 40 3.258097 0.000000 451 +staff 0 36 3.367296 0.000000 490 +autumn 0 31 3.496508 0.000000 558 +univ 0 28 3.610918 0.000000 617 +progress 0 28 3.610918 0.000000 598 +handl 0 24 3.761200 0.000000 685 +half 0 21 3.912023 0.000000 776 +voic 0 21 3.912023 0.000000 806 +region 0 19 4.007333 0.000000 875 +intel 0 16 4.174387 0.000000 1000 +webmast 0 15 4.248495 0.000000 1045 +desktop 0 10 4.653960 0.000000 1445 +perspect 0 10 4.653960 0.000000 1437 +deadlin 0 9 4.753590 0.000000 1502 +pacif 0 8 4.875197 0.000000 1674 +centuri 0 7 5.010635 0.000000 1935 +elsewher 0 5 5.347108 0.000000 2444 +highlight 0 5 5.347108 0.000000 2340 +medal 0 3 5.857933 0.000000 3912 +theimpact 0 3 5.857933 0.000000 3179 +exponenti 0 3 5.857933 0.000000 3529 +organizationsinclud 0 1 6.957497 0.000000 14743 +ouraffili 0 1 6.957497 0.000000 14744 +regioninclud 0 1 6.957497 0.000000 14745 +spotlightuwwin 0 1 6.957497 0.000000 14746 +programmingcontesttwovideo 0 1 6.957497 0.000000 14747 +initiativesourcolloquia 0 1 6.957497 0.000000 14748 +mbonemajordon 0 1 6.957497 0.000000 14749 +corporationdickkarp 0 1 6.957497 0.000000 14750 +scienceprofessionalmast 0 1 6.957497 0.000000 14751 +departmentoverview 0 1 6.957497 0.000000 14752 +staffposit 0 1 6.957497 0.000000 14753 +newscan 0 1 6.957497 0.000000 14754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..c377336b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +proceed 0 93 2.397895 0.000000 152 +imag 0 91 2.397895 0.000000 161 +appear 0 78 2.564949 0.000000 210 +effici 0 73 2.639057 0.000000 233 +unix 0 58 2.890372 0.000000 308 +special 0 56 2.890372 0.000000 320 +format 0 48 3.044522 0.000000 356 +fast 0 42 3.218876 0.000000 429 +multipl 0 39 3.258097 0.000000 453 +winter 0 36 3.367296 0.000000 500 +approxim 0 35 3.401197 0.000000 509 +measur 0 28 3.610918 0.000000 609 +retriev 0 27 3.637586 0.000000 621 +bookmark 0 26 3.688879 0.000000 639 +miscellan 0 23 3.806662 0.000000 731 +andrew 0 19 4.007333 0.000000 849 +beauti 0 18 4.060443 0.000000 912 +protect 0 17 4.110874 0.000000 935 +match 0 16 4.174387 0.000000 965 +wife 0 13 4.382027 0.000000 1196 +usenix 0 12 4.465908 0.000000 1240 +linda 0 10 4.653960 0.000000 1394 +distanc 0 9 4.753590 0.000000 1500 +erik 0 8 4.875197 0.000000 1701 +shapiro 0 8 4.875197 0.000000 1686 +daughter 0 7 5.010635 0.000000 1943 +spie 0 6 5.164786 0.000000 2119 +educomput 0 5 5.347108 0.000000 2524 +selberg 0 5 5.347108 0.000000 2441 +stupid 0 5 5.347108 0.000000 2489 +aberman 0 2 6.263398 0.000000 4429 +bourassa 0 2 6.263398 0.000000 5782 +virgil 0 2 6.263398 0.000000 5783 +melani 0 2 6.263398 0.000000 5784 +berman 1 1 6.957497 6.957497 14755 +debbi 0 1 6.957497 0.000000 14756 +pageandrew 0 1 6.957497 0.000000 14757 +tron 0 1 6.957497 0.000000 14758 +bothpostscript 0 1 6.957497 0.000000 14759 +andhtml 0 1 6.957497 0.000000 14760 +poison 0 1 6.957497 0.000000 14761 +donut 0 1 6.957497 0.000000 14762 +stupidmi 0 1 6.957497 0.000000 14763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..436467ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +good 0 77 2.564949 0.000000 200 +view 0 70 2.708050 0.000000 254 +visit 0 63 2.772589 0.000000 288 +plan 0 65 2.772589 0.000000 272 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +undergradu 0 54 2.944439 0.000000 338 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +physic 0 47 3.091042 0.000000 377 +made 0 44 3.135494 0.000000 398 +video 0 44 3.135494 0.000000 405 +live 0 40 3.258097 0.000000 451 +join 0 39 3.258097 0.000000 457 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +someth 0 31 3.496508 0.000000 554 +photo 0 31 3.496508 0.000000 561 +taken 0 31 3.496508 0.000000 555 +pass 0 28 3.610918 0.000000 611 +team 0 27 3.637586 0.000000 625 +great 0 27 3.637586 0.000000 626 +doctor 0 24 3.761200 0.000000 709 +finish 0 22 3.850148 0.000000 748 +hous 0 21 3.912023 0.000000 801 +wrote 0 20 3.951244 0.000000 830 +excel 0 19 4.007333 0.000000 868 +adam 1 17 4.110874 4.110874 934 +ultim 0 17 4.110874 0.000000 943 +earli 0 16 4.174387 0.000000 968 +stock 0 16 4.174387 0.000000 1007 +across 0 16 4.174387 0.000000 974 +bodi 0 13 4.382027 0.000000 1178 +menu 0 13 4.382027 0.000000 1156 +night 0 11 4.553877 0.000000 1319 +thecomput 0 10 4.653960 0.000000 1408 +drink 0 9 4.753590 0.000000 1607 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +trade 0 7 5.010635 0.000000 1815 +occasion 0 7 5.010635 0.000000 1905 +coffe 0 5 5.347108 0.000000 2556 +hair 0 5 5.347108 0.000000 2446 +formerli 0 5 5.347108 0.000000 2397 +multiresolut 0 5 5.347108 0.000000 2423 +frisbe 0 5 5.347108 0.000000 2560 +glass 0 3 5.857933 0.000000 3759 +swarthmor 0 2 6.263398 0.000000 5621 +comet 0 2 6.263398 0.000000 5785 +sculptur 0 2 6.263398 0.000000 5176 +mona 0 2 6.263398 0.000000 5786 +gothic 0 2 6.263398 0.000000 5787 +finkelstein 0 1 6.957497 0.000000 14764 +photocopi 0 1 6.957497 0.000000 14765 +cup 0 1 6.957497 0.000000 14766 +limp 0 1 6.957497 0.000000 14767 +andlack 0 1 6.957497 0.000000 14768 +atprinceton 0 1 6.957497 0.000000 14769 +tibco 0 1 6.957497 0.000000 14770 +teknekron 0 1 6.957497 0.000000 14771 +alarg 0 1 6.957497 0.000000 14772 +calledumatata 0 1 6.957497 0.000000 14773 +thehilari 0 1 6.957497 0.000000 14774 +caff 0 1 6.957497 0.000000 14775 +lardo 0 1 6.957497 0.000000 14776 +chilli 0 1 6.957497 0.000000 14777 +snoqualmi 0 1 6.957497 0.000000 14778 +hyakutak 0 1 6.957497 0.000000 14779 +marcu 0 1 6.957497 0.000000 14780 +dither 0 1 6.957497 0.000000 14781 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..d91dce93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +applic 0 170 1.791759 0.000000 56 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +present 0 91 2.397895 0.000000 145 +proceed 0 93 2.397895 0.000000 152 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +method 0 80 2.564949 0.000000 213 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +involv 0 71 2.639057 0.000000 227 +august 0 66 2.708050 0.000000 257 +experi 0 64 2.772589 0.000000 283 +laboratori 0 63 2.772589 0.000000 292 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +found 0 53 2.944439 0.000000 337 +visual 0 48 3.044522 0.000000 372 +set 0 50 3.044522 0.000000 361 +effect 0 46 3.091042 0.000000 385 +possibl 0 47 3.091042 0.000000 378 +describ 0 45 3.135494 0.000000 400 +show 0 43 3.178054 0.000000 417 +vision 0 41 3.218876 0.000000 430 +fast 0 42 3.218876 0.000000 429 +cost 0 37 3.332205 0.000000 480 +jame 0 35 3.401197 0.000000 507 +post 0 35 3.401197 0.000000 505 +queri 0 33 3.433987 0.000000 524 +scientist 0 31 3.496508 0.000000 560 +load 0 28 3.610918 0.000000 601 +intend 0 28 3.610918 0.000000 599 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +handl 0 24 3.761200 0.000000 685 +highli 0 23 3.806662 0.000000 725 +displai 0 23 3.806662 0.000000 712 +ofwashington 0 22 3.850148 0.000000 766 +avoid 0 21 3.912023 0.000000 799 +facil 0 20 3.951244 0.000000 814 +toolkit 0 20 3.951244 0.000000 835 +longer 0 20 3.951244 0.000000 816 +aid 0 18 4.060443 0.000000 904 +render 0 17 4.110874 0.000000 947 +steven 0 17 4.110874 0.000000 953 +massiv 0 15 4.248495 0.000000 1026 +balanc 0 14 4.317488 0.000000 1112 +save 0 14 4.317488 0.000000 1099 +charl 0 13 4.382027 0.000000 1149 +directli 0 13 4.382027 0.000000 1141 +promot 0 12 4.465908 0.000000 1235 +target 0 12 4.465908 0.000000 1282 +amount 0 12 4.465908 0.000000 1208 +typic 0 11 4.553877 0.000000 1360 +motiv 0 11 4.553877 0.000000 1346 +extrem 0 11 4.553877 0.000000 1330 +mesh 0 11 4.553877 0.000000 1351 +linda 0 10 4.653960 0.000000 1394 +tanimoto 0 10 4.653960 0.000000 1429 +occur 0 9 4.753590 0.000000 1572 +factor 0 9 4.753590 0.000000 1544 +frank 0 9 4.753590 0.000000 1568 +transmiss 0 9 4.753590 0.000000 1588 +polygon 0 8 4.875197 0.000000 1723 +shapiro 0 8 4.875197 0.000000 1686 +lewi 0 8 4.875197 0.000000 1700 +unifi 0 8 4.875197 0.000000 1774 +dataset 0 7 5.010635 0.000000 1914 +shade 0 7 5.010635 0.000000 1881 +huge 0 6 5.164786 0.000000 1991 +unnecessari 0 5 5.347108 0.000000 2506 +ortega 0 5 5.347108 0.000000 2559 +icpp 0 5 5.347108 0.000000 2382 +loss 0 3 5.857933 0.000000 3805 +jakobovit 0 3 5.857933 0.000000 3913 +lara 0 3 5.857933 0.000000 3914 +ahren 0 2 6.263398 0.000000 5788 +redistribut 0 2 6.263398 0.000000 5582 +hansen 0 2 6.263398 0.000000 4301 +alamo 0 2 6.263398 0.000000 4243 +degrad 0 2 6.263398 0.000000 5362 +brinklei 0 2 6.263398 0.000000 5789 +notebook 0 2 6.263398 0.000000 5790 +arbitrarili 0 2 6.263398 0.000000 5791 +onunbalanc 0 1 6.957497 0.000000 14782 +whichperform 0 1 6.957497 0.000000 14783 +outweighth 0 1 6.957497 0.000000 14784 +polygonrender 0 1 6.957497 0.000000 14785 +percent 0 1 6.957497 0.000000 14786 +onbalanc 0 1 6.957497 0.000000 14787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..b81ba839 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +proceed 0 93 2.397895 0.000000 152 +present 0 91 2.397895 0.000000 145 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +interfac 0 79 2.564949 0.000000 209 +symposium 0 72 2.639057 0.000000 238 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +differ 0 66 2.708050 0.000000 253 +experi 0 64 2.772589 0.000000 283 +interact 0 62 2.772589 0.000000 270 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +share 0 59 2.833213 0.000000 304 +overview 0 56 2.890372 0.000000 323 +februari 0 54 2.944439 0.000000 328 +scientif 0 53 2.944439 0.000000 341 +investig 0 51 2.995732 0.000000 353 +visual 0 48 3.044522 0.000000 372 +electron 0 47 3.091042 0.000000 379 +describ 0 45 3.135494 0.000000 400 +vision 1 41 3.218876 3.218876 430 +multi 0 36 3.367296 0.000000 493 +jame 0 35 3.401197 0.000000 507 +queri 0 33 3.433987 0.000000 524 +experiment 0 26 3.688879 0.000000 645 +store 0 24 3.761200 0.000000 693 +highli 0 23 3.806662 0.000000 725 +among 0 21 3.912023 0.000000 781 +definit 0 19 4.007333 0.000000 864 +aid 0 18 4.060443 0.000000 904 +steven 0 17 4.110874 0.000000 953 +hierarch 0 15 4.248495 0.000000 1018 +attribut 0 14 4.317488 0.000000 1092 +motiv 0 11 4.553877 0.000000 1346 +linda 0 10 4.653960 0.000000 1394 +tanimoto 0 10 4.653960 0.000000 1429 +princip 0 10 4.653960 0.000000 1397 +shapiro 0 8 4.875197 0.000000 1686 +lewi 0 8 4.875197 0.000000 1700 +databasesystem 0 8 4.875197 0.000000 1617 +spie 0 6 5.164786 0.000000 2119 +schema 0 6 5.164786 0.000000 1988 +scienceand 0 5 5.347108 0.000000 2348 +jakobovit 0 3 5.857933 0.000000 3913 +lara 0 3 5.857933 0.000000 3914 +entiti 0 3 5.857933 0.000000 3096 +ahren 0 2 6.263398 0.000000 5788 +brinklei 0 2 6.263398 0.000000 5789 +notebook 0 2 6.263398 0.000000 5790 +databaseenviron 0 2 6.263398 0.000000 5792 +datastructur 0 2 6.263398 0.000000 4685 +devr 0 1 6.957497 0.000000 14788 +wasdesign 0 1 6.957497 0.000000 14789 +andintend 0 1 6.957497 0.000000 14790 +unifieddata 0 1 6.957497 0.000000 14791 +queryfacil 0 1 6.957497 0.000000 14792 +andpromot 0 1 6.957497 0.000000 14793 +ofproperti 0 1 6.957497 0.000000 14794 +thepart 0 1 6.957497 0.000000 14795 +buildinst 0 1 6.957497 0.000000 14796 +inmodel 0 1 6.957497 0.000000 14797 +secondcad 0 1 6.957497 0.000000 14798 +flexibledata 0 1 6.957497 0.000000 14799 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..64161683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +nation 0 74 2.639057 0.000000 240 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +allow 0 53 2.944439 0.000000 333 +undergradu 0 54 2.944439 0.000000 338 +still 0 50 3.044522 0.000000 362 +principl 0 48 3.044522 0.000000 357 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +math 0 44 3.135494 0.000000 402 +past 0 42 3.218876 0.000000 428 +seminar 0 38 3.295837 0.000000 470 +autumn 0 31 3.496508 0.000000 558 +great 0 27 3.637586 0.000000 626 +william 0 22 3.850148 0.000000 765 +programminglanguag 0 21 3.912023 0.000000 782 +spend 0 19 4.007333 0.000000 850 +scott 0 18 4.060443 0.000000 884 +ultim 0 17 4.110874 0.000000 943 +women 0 16 4.174387 0.000000 1004 +front 0 13 4.382027 0.000000 1154 +automata 0 13 4.382027 0.000000 1135 +opportun 0 13 4.382027 0.000000 1161 +undergrad 0 9 4.753590 0.000000 1589 +christian 0 7 5.010635 0.000000 1949 +peterson 0 7 5.010635 0.000000 1850 +park 0 6 5.164786 0.000000 2218 +outdoor 0 5 5.347108 0.000000 2514 +frisbe 0 5 5.347108 0.000000 2560 +mentor 0 4 5.568345 0.000000 2591 +pile 0 2 6.263398 0.000000 5371 +blast 0 2 6.263398 0.000000 5172 +femal 0 2 6.263398 0.000000 4672 +pagelast 0 2 6.263398 0.000000 5793 +mentorship 0 1 6.957497 0.000000 14800 +bernheim 0 1 6.957497 0.000000 14801 +ofdigit 0 1 6.957497 0.000000 14802 +gorp 0 1 6.957497 0.000000 14803 +guideto 0 1 6.957497 0.000000 14804 +recreationfun 0 1 6.957497 0.000000 14805 +abig 0 1 6.957497 0.000000 14806 +scoobi 0 1 6.957497 0.000000 14807 +dooeduc 0 1 6.957497 0.000000 14808 +summerwork 0 1 6.957497 0.000000 14809 +highlyrecommend 0 1 6.957497 0.000000 14810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..1d384c3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +postscript 0 131 2.079442 0.000000 90 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +graphic 0 90 2.397895 0.000000 147 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +educ 0 86 2.484907 0.000000 191 +learn 0 86 2.484907 0.000000 170 +master 0 76 2.564949 0.000000 216 +optim 0 79 2.564949 0.000000 197 +write 0 72 2.639057 0.000000 222 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +simpl 0 60 2.833213 0.000000 298 +thesi 0 57 2.890372 0.000000 327 +explor 0 58 2.890372 0.000000 324 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +found 0 53 2.944439 0.000000 337 +particular 0 51 2.995732 0.000000 352 +better 0 45 3.135494 0.000000 401 +might 0 41 3.218876 0.000000 426 +error 0 40 3.258097 0.000000 449 +tree 0 36 3.367296 0.000000 492 +obtain 0 33 3.433987 0.000000 534 +scientist 0 31 3.496508 0.000000 560 +anim 0 31 3.496508 0.000000 557 +often 0 31 3.496508 0.000000 551 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +wai 0 25 3.737670 0.000000 662 +proof 1 23 3.806662 3.806662 720 +compress 0 23 3.806662 0.000000 719 +theorem 0 21 3.912023 0.000000 786 +avoid 0 21 3.912023 0.000000 799 +binari 0 20 3.951244 0.000000 823 +wrote 0 20 3.951244 0.000000 830 +seem 0 18 4.060443 0.000000 899 +whether 0 17 4.110874 0.000000 918 +universityof 0 15 4.248495 0.000000 1061 +hierarch 0 15 4.248495 0.000000 1018 +style 0 15 4.248495 0.000000 1036 +balanc 0 14 4.317488 0.000000 1112 +believ 0 13 4.382027 0.000000 1187 +unfortun 0 13 4.382027 0.000000 1170 +care 0 13 4.382027 0.000000 1177 +motiv 0 11 4.553877 0.000000 1346 +scienceat 0 11 4.553877 0.000000 1375 +ring 0 8 4.875197 0.000000 1684 +told 0 8 4.875197 0.000000 1658 +refere 0 7 5.010635 0.000000 1895 +toronto 0 6 5.164786 0.000000 2156 +meant 0 6 5.164786 0.000000 2055 +fewer 0 6 5.164786 0.000000 2074 +mistak 0 6 5.164786 0.000000 2110 +broadcast 0 5 5.347108 0.000000 2453 +shift 0 5 5.347108 0.000000 2357 +tend 0 4 5.568345 0.000000 3041 +conserv 0 4 5.568345 0.000000 2870 +will 0 4 5.568345 0.000000 2782 +glad 0 4 5.568345 0.000000 2657 +fear 0 4 5.568345 0.000000 2911 +reveal 0 4 5.568345 0.000000 2647 +complic 0 4 5.568345 0.000000 2902 +amir 0 3 5.857933 0.000000 3850 +hereto 0 3 5.857933 0.000000 3476 +quotat 0 3 5.857933 0.000000 3121 +theywil 0 3 5.857933 0.000000 3102 +incorrect 0 3 5.857933 0.000000 3134 +caught 0 3 5.857933 0.000000 3465 +obvious 0 3 5.857933 0.000000 3474 +hoar 0 3 5.857933 0.000000 3875 +mathematician 0 2 6.263398 0.000000 5189 +defici 0 2 6.263398 0.000000 5054 +persuad 0 2 6.263398 0.000000 5384 +obviou 0 2 6.263398 0.000000 5367 +michail 0 1 6.957497 0.000000 14811 +michailgradu 0 1 6.957497 0.000000 14812 +studenti 0 1 6.957497 0.000000 14813 +followingarea 0 1 6.957497 0.000000 14814 +summationfor 0 1 6.957497 0.000000 14815 +opsi 0 1 6.957497 0.000000 14816 +appletdesign 0 1 6.957497 0.000000 14817 +combinesprogram 0 1 6.957497 0.000000 14818 +lunar 0 1 6.957497 0.000000 14819 +lander 0 1 6.957497 0.000000 14820 +gamethat 0 1 6.957497 0.000000 14821 +unwillingto 0 1 6.957497 0.000000 14822 +embarrass 0 1 6.957497 0.000000 14823 +publishedincorrect 0 1 6.957497 0.000000 14824 +unconvent 0 1 6.957497 0.000000 14825 +proofstyl 0 1 6.957497 0.000000 14826 +theyhav 0 1 6.957497 0.000000 14827 +wasnot 0 1 6.957497 0.000000 14828 +stylethat 0 1 6.957497 0.000000 14829 +lesli 0 1 6.957497 0.000000 14830 +lamport 0 1 6.957497 0.000000 14831 +wayi 0 1 6.957497 0.000000 14832 +theother 0 1 6.957497 0.000000 14833 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..1ffb9545 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +resum 0 79 2.564949 0.000000 217 +receiv 0 66 2.708050 0.000000 244 +main 0 67 2.708050 0.000000 256 +visit 0 63 2.772589 0.000000 288 +evalu 0 64 2.772589 0.000000 266 +colleg 0 61 2.833213 0.000000 300 +scientif 0 53 2.944439 0.000000 341 +join 0 39 3.258097 0.000000 457 +award 0 34 3.401197 0.000000 523 +india 0 32 3.465736 0.000000 550 +richard 0 31 3.496508 0.000000 559 +travel 0 30 3.555348 0.000000 579 +progress 0 28 3.610918 0.000000 598 +spent 0 25 3.737670 0.000000 676 +indian 0 22 3.850148 0.000000 769 +geometri 0 22 3.850148 0.000000 752 +anderson 0 19 4.007333 0.000000 860 +postdoc 0 8 4.875197 0.000000 1724 +presidenti 0 8 4.875197 0.000000 1737 +qualifi 0 8 4.875197 0.000000 1721 +tourist 0 8 4.875197 0.000000 1710 +implementationof 0 7 5.010635 0.000000 1813 +reed 0 6 5.164786 0.000000 2086 +inmathemat 0 2 6.263398 0.000000 5098 +younginvestig 0 2 6.263398 0.000000 5794 +bangalor 0 2 6.263398 0.000000 5110 +theindian 0 2 6.263398 0.000000 5795 +stanfordin 0 1 6.957497 0.000000 14834 +aon 0 1 6.957497 0.000000 14835 +inberkelei 0 1 6.957497 0.000000 14836 +yeara 0 1 6.957497 0.000000 14837 +talksanderson 0 1 6.957497 0.000000 14838 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..2c4a5c61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +librari 0 87 2.484907 0.000000 181 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +academ 0 82 2.484907 0.000000 178 +thing 0 84 2.484907 0.000000 189 +educ 0 86 2.484907 0.000000 191 +name 0 72 2.639057 0.000000 220 +intellig 0 72 2.639057 0.000000 225 +write 0 72 2.639057 0.000000 222 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +plan 0 65 2.772589 0.000000 272 +artifici 0 63 2.772589 0.000000 280 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +investig 0 51 2.995732 0.000000 353 +life 0 50 3.044522 0.000000 375 +made 0 44 3.135494 0.000000 398 +show 0 43 3.178054 0.000000 417 +music 0 42 3.218876 0.000000 436 +combin 0 42 3.218876 0.000000 421 +theoret 0 39 3.258097 0.000000 446 +vita 0 38 3.295837 0.000000 473 +mean 0 37 3.332205 0.000000 477 +purpos 0 37 3.332205 0.000000 481 +soon 0 36 3.367296 0.000000 494 +represent 0 35 3.401197 0.000000 512 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +travel 0 30 3.555348 0.000000 579 +art 0 29 3.583519 0.000000 593 +decis 0 23 3.806662 0.000000 728 +honor 0 23 3.806662 0.000000 729 +finish 0 22 3.850148 0.000000 748 +ofwashington 0 22 3.850148 0.000000 766 +born 0 21 3.912023 0.000000 798 +mostli 0 19 4.007333 0.000000 869 +histori 0 19 4.007333 0.000000 853 +listen 0 18 4.060443 0.000000 907 +thought 0 17 4.110874 0.000000 945 +brother 0 13 4.382027 0.000000 1189 +employ 0 12 4.465908 0.000000 1291 +qualit 0 11 4.553877 0.000000 1362 +probabilist 0 11 4.553877 0.000000 1343 +literatur 0 11 4.553877 0.000000 1300 +creativ 0 8 4.875197 0.000000 1777 +switch 0 8 4.875197 0.000000 1718 +brought 0 7 5.010635 0.000000 1925 +gave 0 7 5.010635 0.000000 1922 +foreign 0 7 5.010635 0.000000 1919 +reconstruct 0 6 5.164786 0.000000 2170 +blue 0 6 5.164786 0.000000 2227 +mother 0 6 5.164786 0.000000 2083 +markov 0 5 5.347108 0.000000 2280 +jazz 0 5 5.347108 0.000000 2527 +paint 0 5 5.347108 0.000000 2400 +andengin 0 4 5.568345 0.000000 3042 +ofmi 0 3 5.857933 0.000000 3911 +revisit 0 3 5.857933 0.000000 3915 +father 0 3 5.857933 0.000000 3757 +birth 0 3 5.857933 0.000000 3594 +affair 0 3 5.857933 0.000000 3916 +anhai 0 2 6.263398 0.000000 4404 +doan 0 2 6.263398 0.000000 4405 +andscienc 0 2 6.263398 0.000000 5796 +milwauke 0 2 6.263398 0.000000 5797 +amcurr 0 2 6.263398 0.000000 5798 +vietnames 0 2 6.263398 0.000000 5593 +syllabl 0 1 6.957497 0.000000 14839 +hungari 0 1 6.957497 0.000000 14840 +birthplac 0 1 6.957497 0.000000 14841 +homepageanhai 0 1 6.957497 0.000000 14842 +vietnam 0 1 6.957497 0.000000 14843 +iwent 0 1 6.957497 0.000000 14844 +kossuth 0 1 6.957497 0.000000 14845 +lajo 0 1 6.957497 0.000000 14846 +debrecen 0 1 6.957497 0.000000 14847 +underuncertainti 0 1 6.957497 0.000000 14848 +calm 0 1 6.957497 0.000000 14849 +invietnames 0 1 6.957497 0.000000 14850 +nghean 0 1 6.957497 0.000000 14851 +haiphong 0 1 6.957497 0.000000 14852 +folkswer 0 1 6.957497 0.000000 14853 +younger 0 1 6.957497 0.000000 14854 +theysimpli 0 1 6.957497 0.000000 14855 +namehaian 0 1 6.957497 0.000000 14856 +comtemporari 0 1 6.957497 0.000000 14857 +snapshotsanhai 0 1 6.957497 0.000000 14858 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..0ba6a30d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +spring 0 131 2.079442 0.000000 88 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +mathemat 0 108 2.197225 0.000000 123 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +place 0 106 2.197225 0.000000 124 +graphic 0 90 2.397895 0.000000 147 +pictur 0 89 2.397895 0.000000 160 +follow 0 92 2.397895 0.000000 143 +control 0 82 2.484907 0.000000 164 +dynam 0 76 2.564949 0.000000 194 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +optim 0 79 2.564949 0.000000 197 +workshop 0 71 2.639057 0.000000 239 +sieg 0 69 2.708050 0.000000 260 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +automat 0 61 2.833213 0.000000 306 +februari 0 54 2.944439 0.000000 328 +without 0 50 3.044522 0.000000 370 +quarter 0 47 3.091042 0.000000 389 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +better 0 45 3.135494 0.000000 401 +show 0 43 3.178054 0.000000 417 +fast 0 42 3.218876 0.000000 429 +small 0 39 3.258097 0.000000 447 +origin 0 38 3.295837 0.000000 472 +slide 0 38 3.295837 0.000000 467 +seminar 0 38 3.295837 0.000000 470 +short 0 36 3.367296 0.000000 499 +anim 0 31 3.496508 0.000000 557 +autumn 0 31 3.496508 0.000000 558 +hope 0 28 3.610918 0.000000 610 +univ 0 28 3.610918 0.000000 617 +valu 0 25 3.737670 0.000000 665 +motion 0 24 3.761200 0.000000 699 +magazin 0 24 3.761200 0.000000 704 +compress 0 23 3.806662 0.000000 719 +sequenc 0 23 3.806662 0.000000 734 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +mpeg 0 20 3.951244 0.000000 831 +synthesi 0 20 3.951244 0.000000 834 +figur 0 18 4.060443 0.000000 903 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +speed 0 18 4.060443 0.000000 911 +brown 0 16 4.174387 0.000000 977 +took 0 16 4.174387 0.000000 1010 +qual 0 15 4.248495 0.000000 1062 +goe 0 15 4.248495 0.000000 1044 +piec 0 15 4.248495 0.000000 1020 +consider 0 14 4.317488 0.000000 1076 +benjamin 0 11 4.553877 0.000000 1296 +decomposit 0 10 4.653960 0.000000 1439 +jump 0 9 4.753590 0.000000 1603 +joel 0 8 4.875197 0.000000 1698 +chamber 0 8 4.875197 0.000000 1692 +egger 0 8 4.875197 0.000000 1695 +pldi 0 8 4.875197 0.000000 1704 +switch 0 8 4.875197 0.000000 1718 +dispatch 0 7 5.010635 0.000000 1791 +gave 0 7 5.010635 0.000000 1922 +mock 0 6 5.164786 0.000000 2087 +philipos 0 5 5.347108 0.000000 2373 +engineeringat 0 5 5.347108 0.000000 2561 +andp 0 4 5.568345 0.000000 2811 +pardyak 0 4 5.568345 0.000000 3043 +doubl 0 4 5.568345 0.000000 2951 +ausland 1 3 5.857933 5.857933 3917 +super 0 3 5.857933 0.000000 3918 +singular 0 3 5.857933 0.000000 3366 +multiflow 0 2 6.263398 0.000000 4473 +articul 0 2 6.263398 0.000000 5799 +acmtransact 0 2 6.263398 0.000000 4310 +wilkerson 0 2 6.263398 0.000000 4516 +mywork 0 2 6.263398 0.000000 5800 +orang 0 2 6.263398 0.000000 5163 +onit 0 1 6.957497 0.000000 14859 +andb 0 1 6.957497 0.000000 14860 +inextens 0 1 6.957497 0.000000 14861 +compilersupport 0 1 6.957497 0.000000 14862 +synthesisfor 0 1 6.957497 0.000000 14863 +fukunaga 0 1 6.957497 0.000000 14864 +partovi 0 1 6.957497 0.000000 14865 +christensen 0 1 6.957497 0.000000 14866 +reiss 0 1 6.957497 0.000000 14867 +shuman 0 1 6.957497 0.000000 14868 +leapfrog 0 1 6.957497 0.000000 14869 +lossili 0 1 6.957497 0.000000 14870 +animationthat 0 1 6.957497 0.000000 14871 +cartwheel 0 1 6.957497 0.000000 14872 +andshuffl 0 1 6.957497 0.000000 14873 +andcollaps 0 1 6.957497 0.000000 14874 +isjust 0 1 6.957497 0.000000 14875 +tosmooth 0 1 6.957497 0.000000 14876 +thetalk 0 1 6.957497 0.000000 14877 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..4d838997 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +look 0 107 2.197225 0.000000 115 +site 0 106 2.197225 0.000000 119 +find 0 111 2.197225 0.000000 111 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +info 1 85 2.484907 2.484907 176 +second 0 81 2.484907 0.000000 166 +institut 0 84 2.484907 0.000000 187 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +refer 0 78 2.564949 0.000000 203 +complet 0 77 2.564949 0.000000 208 +name 0 72 2.639057 0.000000 220 +onlin 0 75 2.639057 0.000000 223 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +test 0 66 2.708050 0.000000 252 +visit 0 63 2.772589 0.000000 288 +dept 0 64 2.772589 0.000000 291 +locat 0 59 2.833213 0.000000 303 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +favorit 0 44 3.135494 0.000000 410 +edit 0 42 3.218876 0.000000 418 +past 0 42 3.218876 0.000000 428 +brian 1 38 3.295837 3.295837 466 +origin 0 38 3.295837 0.000000 472 +random 0 34 3.401197 0.000000 511 +idea 0 32 3.465736 0.000000 545 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +stop 0 17 4.110874 0.000000 942 +alreadi 0 16 4.174387 0.000000 963 +doesn 0 15 4.248495 0.000000 1055 +hotlist 0 13 4.382027 0.000000 1199 +touch 0 12 4.465908 0.000000 1288 +brad 0 12 4.465908 0.000000 1264 +artist 0 6 5.164786 0.000000 2127 +band 0 6 5.164786 0.000000 2198 +girl 0 5 5.347108 0.000000 2410 +worst 0 5 5.347108 0.000000 2287 +poem 0 5 5.347108 0.000000 2483 +guestbook 0 5 5.347108 0.000000 2475 +snapshot 0 5 5.347108 0.000000 2303 +washingtonseattl 0 4 5.568345 0.000000 3044 +ling 0 4 5.568345 0.000000 3045 +confus 0 3 5.857933 0.000000 3144 +thrash 0 3 5.857933 0.000000 3400 +mossi 0 2 6.263398 0.000000 5801 +liber 0 2 6.263398 0.000000 5154 +glorifi 0 2 6.263398 0.000000 4114 +mental 0 2 6.263398 0.000000 5802 +stolen 0 2 6.263398 0.000000 5803 +boinge 0 1 6.957497 0.000000 14878 +michalowskidepart 0 1 6.957497 0.000000 14879 +engineeringmail 0 1 6.957497 0.000000 14880 +bitsthank 0 1 6.957497 0.000000 14881 +headscapewhenev 0 1 6.957497 0.000000 14882 +gradstud 0 1 6.957497 0.000000 14883 +inlinguist 0 1 6.957497 0.000000 14884 +ultrahotlist 0 1 6.957497 0.000000 14885 +ofal 0 1 6.957497 0.000000 14886 +forsometh 0 1 6.957497 0.000000 14887 +thave 0 1 6.957497 0.000000 14888 +urouletteto 0 1 6.957497 0.000000 14889 +ofwhich 0 1 6.957497 0.000000 14890 +songsand 0 1 6.957497 0.000000 14891 +fictiti 0 1 6.957497 0.000000 14892 +puriti 0 1 6.957497 0.000000 14893 +tokeep 0 1 6.957497 0.000000 14894 +pagesfrom 0 1 6.957497 0.000000 14895 +aslfingerspel 0 1 6.957497 0.000000 14896 +blatantli 0 1 6.957497 0.000000 14897 +chamberlain 0 1 6.957497 0.000000 14898 +michalowski 0 1 6.957497 0.000000 14899 +sanityerad 0 1 6.957497 0.000000 14900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..03c005ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +professor 0 137 1.945910 0.000000 76 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +associ 0 93 2.397895 0.000000 151 +receiv 0 66 2.708050 0.000000 244 +complex 0 64 2.772589 0.000000 269 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +join 0 39 3.258097 0.000000 457 +theoret 0 39 3.258097 0.000000 446 +paul 1 38 3.295837 3.295837 471 +connect 0 37 3.332205 0.000000 485 +post 0 35 3.401197 0.000000 505 +award 0 34 3.401197 0.000000 523 +autumn 0 31 3.496508 0.000000 558 +enjoi 0 26 3.688879 0.000000 660 +concern 0 25 3.737670 0.000000 666 +aspect 0 25 3.737670 0.000000 663 +sport 0 25 3.737670 0.000000 683 +doctor 0 24 3.761200 0.000000 709 +proof 0 23 3.806662 0.000000 720 +theunivers 0 21 3.912023 0.000000 797 +concentr 0 18 4.060443 0.000000 906 +qual 0 15 4.248495 0.000000 1062 +primarili 0 13 4.382027 0.000000 1185 +thedepart 0 11 4.553877 0.000000 1350 +softbal 0 9 4.753590 0.000000 1594 +toronto 0 6 5.164786 0.000000 2156 +squash 0 6 5.164786 0.000000 2223 +lack 0 6 5.164786 0.000000 1994 +beam 0 5 5.347108 0.000000 2344 +engineeringat 0 5 5.347108 0.000000 2561 +talent 0 3 5.857933 0.000000 3768 +sciencein 0 2 6.263398 0.000000 5804 +paralleland 0 2 6.263398 0.000000 5805 +beamepaul 0 1 6.957497 0.000000 14901 +computationalcomplex 0 1 6.957497 0.000000 14902 +academicyear 0 1 6.957497 0.000000 14903 +presidentialyoung 0 1 6.957497 0.000000 14904 +inproposit 0 1 6.957497 0.000000 14905 +enthusiasm 0 1 6.957497 0.000000 14906 +cancompens 0 1 6.957497 0.000000 14907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..21468966 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +develop 0 174 1.791759 0.000000 53 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +real 0 93 2.397895 0.000000 144 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +build 0 85 2.484907 0.000000 184 +environ 0 84 2.484907 0.000000 177 +librari 0 87 2.484907 0.000000 181 +optim 0 79 2.564949 0.000000 197 +david 0 71 2.639057 0.000000 232 +line 0 75 2.639057 0.000000 231 +free 0 73 2.639057 0.000000 224 +function 0 62 2.772589 0.000000 275 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +much 0 52 2.995732 0.000000 349 +done 0 47 3.091042 0.000000 381 +mark 0 44 3.135494 0.000000 403 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +respons 0 37 3.332205 0.000000 476 +platform 0 29 3.583519 0.000000 591 +measur 0 28 3.610918 0.000000 609 +team 0 27 3.637586 0.000000 625 +sport 1 25 3.737670 3.737670 683 +grad 0 20 3.951244 0.000000 837 +particularli 0 19 4.007333 0.000000 867 +ultim 1 17 4.110874 4.110874 943 +devic 1 16 4.174387 4.174387 1002 +goe 0 15 4.248495 0.000000 1044 +spin 0 14 4.317488 0.000000 1121 +econom 0 13 4.382027 0.000000 1184 +jump 0 9 4.753590 0.000000 1603 +volleybal 0 9 4.753590 0.000000 1598 +trust 0 9 4.753590 0.000000 1583 +spot 0 7 5.010635 0.000000 1894 +bunch 0 7 5.010635 0.000000 1861 +athlet 0 7 5.010635 0.000000 1933 +tri 0 6 5.164786 0.000000 2166 +railroad 0 6 5.164786 0.000000 2161 +frisbe 1 5 5.347108 5.347108 2560 +minnesota 0 5 5.347108 0.000000 2469 +anti 0 5 5.347108 0.000000 2434 +champion 1 4 5.568345 5.568345 2982 +skate 0 4 5.568345 0.000000 3046 +borrow 0 3 5.857933 0.000000 3725 +somedai 0 3 5.857933 0.000000 3919 +tripl 0 3 5.857933 0.000000 3160 +bank 0 3 5.857933 0.000000 3920 +getto 0 2 6.263398 0.000000 5806 +locomot 0 2 6.263398 0.000000 5807 +beckerdavid 0 1 6.957497 0.000000 14908 +beckercontact 0 1 6.957497 0.000000 14909 +makingspina 0 1 6.957497 0.000000 14910 +drvier 0 1 6.957497 0.000000 14911 +bethel 0 1 6.957497 0.000000 14912 +men 0 1 6.957497 0.000000 14913 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 0 1 6.957497 0.000000 14914 +handbal 0 1 6.957497 0.000000 14915 +playracquetballgolftenni 0 1 6.957497 0.000000 14916 +bridgecampingcanoeingdisc 0 1 6.957497 0.000000 14917 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 0 1 6.957497 0.000000 14918 +skiingweightliftingwhitewat 0 1 6.957497 0.000000 14919 +raftinghorseback 0 1 6.957497 0.000000 14920 +ridingmountain 0 1 6.957497 0.000000 14921 +bikingin 0 1 6.957497 0.000000 14922 +currenc 0 1 6.957497 0.000000 14923 +ssto 0 1 6.957497 0.000000 14924 +rlv 0 1 6.957497 0.000000 14925 +theologi 0 1 6.957497 0.000000 14926 +centurai 0 1 6.957497 0.000000 14927 +boot 0 1 6.957497 0.000000 14928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..ef6ed84d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,281 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +implement 0 152 1.791759 0.000000 52 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +octob 0 89 2.397895 0.000000 156 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +sinc 0 90 2.397895 0.000000 159 +larg 0 82 2.484907 0.000000 168 +ieee 0 86 2.484907 0.000000 190 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +master 0 76 2.564949 0.000000 216 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +street 0 63 2.772589 0.000000 293 +experi 0 64 2.772589 0.000000 283 +polici 0 64 2.772589 0.000000 279 +virtual 0 62 2.772589 0.000000 285 +evalu 0 64 2.772589 0.000000 266 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +share 0 59 2.833213 0.000000 304 +publish 0 57 2.890372 0.000000 326 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +extens 0 53 2.944439 0.000000 340 +februari 0 54 2.944439 0.000000 328 +local 0 55 2.944439 0.000000 334 +hardwar 0 51 2.995732 0.000000 350 +run 0 51 2.995732 0.000000 347 +life 0 50 3.044522 0.000000 375 +standard 0 48 3.044522 0.000000 365 +effect 0 46 3.091042 0.000000 385 +protocol 0 45 3.135494 0.000000 407 +fast 0 42 3.218876 0.000000 429 +cach 0 41 3.218876 0.000000 432 +map 0 39 3.258097 0.000000 452 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +brian 0 38 3.295837 0.000000 466 +industri 0 38 3.295837 0.000000 464 +winter 0 36 3.367296 0.000000 500 +post 0 35 3.401197 0.000000 505 +return 0 34 3.401197 0.000000 502 +concurr 0 34 3.401197 0.000000 501 +detect 0 26 3.688879 0.000000 646 +consist 0 26 3.688879 0.000000 651 +although 0 25 3.737670 0.000000 667 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +interpret 0 24 3.761200 0.000000 686 +greg 0 24 3.761200 0.000000 695 +mobil 0 23 3.806662 0.000000 730 +thread 0 23 3.806662 0.000000 722 +ofwashington 0 22 3.850148 0.000000 766 +reduc 0 22 3.850148 0.000000 759 +inth 0 22 3.850148 0.000000 741 +chen 0 21 3.912023 0.000000 791 +avoid 0 21 3.912023 0.000000 799 +hous 0 21 3.912023 0.000000 801 +kernel 0 20 3.951244 0.000000 825 +binari 0 20 3.951244 0.000000 823 +safeti 0 20 3.951244 0.000000 817 +increas 0 20 3.951244 0.000000 829 +comparison 0 19 4.007333 0.000000 863 +andrew 0 19 4.007333 0.000000 849 +miss 0 19 4.007333 0.000000 866 +citi 0 19 4.007333 0.000000 874 +bershad 1 18 4.060443 4.060443 902 +seem 0 18 4.060443 0.000000 899 +event 0 18 4.060443 0.000000 896 +asplo 0 17 4.110874 0.000000 948 +stock 0 16 4.174387 0.000000 1007 +took 0 16 4.174387 0.000000 1010 +brief 0 16 4.174387 0.000000 1001 +qual 0 15 4.248495 0.000000 1062 +driven 0 15 4.248495 0.000000 1048 +overhead 0 15 4.248495 0.000000 1035 +micro 0 15 4.248495 0.000000 1031 +spin 0 14 4.317488 0.000000 1121 +save 0 14 4.317488 0.000000 1099 +levi 0 14 4.317488 0.000000 1093 +consider 0 14 4.317488 0.000000 1076 +dean 0 14 4.317488 0.000000 1104 +karlin 0 13 4.382027 0.000000 1176 +mellon 0 13 4.382027 0.000000 1179 +charl 0 13 4.382027 0.000000 1149 +block 0 13 4.382027 0.000000 1183 +usenix 0 12 4.465908 0.000000 1240 +carnegi 0 12 4.465908 0.000000 1260 +anna 0 12 4.465908 0.000000 1292 +mari 0 12 4.465908 0.000000 1266 +safe 0 12 4.465908 0.000000 1274 +promot 0 12 4.465908 0.000000 1235 +isca 0 11 4.553877 0.000000 1354 +denni 0 11 4.553877 0.000000 1321 +baer 0 11 4.553877 0.000000 1353 +systemsc 0 11 4.553877 0.000000 1293 +impact 0 11 4.553877 0.000000 1334 +primit 0 11 4.553877 0.000000 1317 +arpa 0 11 4.553877 0.000000 1369 +sosp 0 10 4.653960 0.000000 1416 +jean 0 10 4.653960 0.000000 1440 +henri 0 10 4.653960 0.000000 1417 +packet 0 10 4.653960 0.000000 1415 +decomposit 0 10 4.653960 0.000000 1439 +osdi 0 9 4.753590 0.000000 1534 +voelker 0 9 4.753590 0.000000 1557 +hang 0 9 4.753590 0.000000 1499 +patterson 0 9 4.753590 0.000000 1554 +wong 0 9 4.753590 0.000000 1609 +modula 0 9 4.753590 0.000000 1613 +wilson 0 9 4.753590 0.000000 1536 +rel 0 9 4.753590 0.000000 1487 +romer 0 8 4.875197 0.000000 1706 +marc 0 8 4.875197 0.000000 1680 +sigop 0 8 4.875197 0.000000 1727 +chamber 0 8 4.875197 0.000000 1692 +egger 0 8 4.875197 0.000000 1695 +mach 0 8 4.875197 0.000000 1669 +besid 0 8 4.875197 0.000000 1681 +wayn 0 8 4.875197 0.000000 1738 +watson 0 8 4.875197 0.000000 1691 +uniprocessor 0 8 4.875197 0.000000 1696 +hash 0 8 4.875197 0.000000 1618 +cultur 0 7 5.010635 0.000000 1951 +northwest 0 7 5.010635 0.000000 1973 +instrument 0 7 5.010635 0.000000 1954 +dispatch 0 7 5.010635 0.000000 1791 +interrupt 0 7 5.010635 0.000000 1793 +prioriti 0 7 5.010635 0.000000 1792 +onoper 0 6 5.164786 0.000000 2048 +squash 0 6 5.164786 0.000000 2223 +prefetch 0 6 5.164786 0.000000 2039 +edward 0 6 5.164786 0.000000 2050 +wolman 0 6 5.164786 0.000000 2093 +loup 0 6 5.164786 0.000000 2228 +mock 0 6 5.164786 0.000000 2087 +conflict 0 6 5.164786 0.000000 2041 +rain 0 6 5.164786 0.000000 2137 +philipos 0 5 5.347108 0.000000 2373 +coffe 0 5 5.347108 0.000000 2556 +toc 0 5 5.347108 0.000000 2562 +bind 0 5 5.347108 0.000000 2250 +alec 0 5 5.347108 0.000000 2563 +ohlrich 0 5 5.347108 0.000000 2564 +mutual 0 5 5.347108 0.000000 2418 +pardyak 0 4 5.568345 0.000000 3043 +microkernel 0 4 5.568345 0.000000 3047 +savag 0 4 5.568345 0.000000 2777 +rocki 0 4 5.568345 0.000000 3048 +etch 0 4 5.568345 0.000000 2755 +compcon 0 4 5.568345 0.000000 2958 +exclus 0 4 5.568345 0.000000 2947 +fiuczynski 0 3 5.857933 0.000000 3390 +ausland 0 3 5.857933 0.000000 3917 +stefan 0 3 5.857933 0.000000 3921 +northeast 0 3 5.857933 0.000000 3922 +cachingtraci 0 3 5.857933 0.000000 3923 +kimbrel 0 3 5.857933 0.000000 3924 +felten 0 3 5.857933 0.000000 3925 +geoffrei 0 3 5.857933 0.000000 3505 +dynamiccompil 0 3 5.857933 0.000000 3926 +garrett 0 3 5.857933 0.000000 3377 +mobisa 0 3 5.857933 0.000000 3927 +mappedcach 0 3 5.857933 0.000000 3928 +forappl 0 3 5.857933 0.000000 3929 +irrelev 0 3 5.857933 0.000000 3823 +golub 0 3 5.857933 0.000000 3265 +alien 0 3 5.857933 0.000000 3930 +przemyslaw 0 2 6.263398 0.000000 5808 +implemen 0 2 6.263398 0.000000 5809 +emin 0 2 6.263398 0.000000 5810 +sirer 0 2 6.263398 0.000000 5811 +wwo 0 2 6.263398 0.000000 5812 +eduwork 0 2 6.263398 0.000000 5813 +hasappear 0 2 6.263398 0.000000 5099 +tomkin 0 2 6.263398 0.000000 5814 +hugo 0 2 6.263398 0.000000 5815 +garth 0 2 6.263398 0.000000 5816 +gibson 0 2 6.263398 0.000000 5817 +hsieh 0 2 6.263398 0.000000 5818 +onlinesuperpag 0 2 6.263398 0.000000 5819 +appearedin 0 2 6.263398 0.000000 5096 +endpoint 0 2 6.263398 0.000000 4967 +moss 0 2 6.263398 0.000000 5820 +redel 0 2 6.263398 0.000000 4358 +elli 0 2 6.263398 0.000000 4216 +baron 0 2 6.263398 0.000000 4317 +microbenchmark 0 2 6.263398 0.000000 5821 +rashid 0 2 6.263398 0.000000 4318 +abduct 0 2 6.263398 0.000000 5663 +maeda 0 1 6.957497 0.000000 14929 +midwai 0 1 6.957497 0.000000 14930 +zekauska 0 1 6.957497 0.000000 14931 +sawdon 0 1 6.957497 0.000000 14932 +machnix 0 1 6.957497 0.000000 14933 +drave 0 1 6.957497 0.000000 14934 +forin 0 1 6.957497 0.000000 14935 +respit 0 1 6.957497 0.000000 14936 +asigmetr 0 1 6.957497 0.000000 14937 +thestairmast 0 1 6.957497 0.000000 14938 +extensibleoper 0 1 6.957497 0.000000 14939 +parallelnetwork 0 1 6.957497 0.000000 14940 +thesequel 0 1 6.957497 0.000000 14941 +optimizationcours 0 1 6.957497 0.000000 14942 +youmight 0 1 6.957497 0.000000 14943 +extensiblesystem 0 1 6.957497 0.000000 14944 +theodor 0 1 6.957497 0.000000 14945 +implementationj 0 1 6.957497 0.000000 14946 +defouw 0 1 6.957497 0.000000 14947 +alapat 0 1 6.957497 0.000000 14948 +becker 0 1 6.957497 0.000000 14949 +sharedmemori 0 1 6.957497 0.000000 14950 +conflictresolut 0 1 6.957497 0.000000 14951 +uwtechn 0 1 6.957497 0.000000 14952 +demultiplex 0 1 6.957497 0.000000 14953 +yuhara 0 1 6.957497 0.000000 14954 +andmostli 0 1 6.957497 0.000000 14955 +moblic 0 1 6.957497 0.000000 14956 +wheeler 0 1 6.957497 0.000000 14957 +ginsburg 0 1 6.957497 0.000000 14958 +inoper 0 1 6.957497 0.000000 14959 +harrier 0 1 6.957497 0.000000 14960 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..1f4ef6e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,153 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +hall 0 146 1.945910 0.000000 65 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +world 0 115 2.197225 0.000000 126 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +text 0 98 2.302585 0.000000 133 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +good 0 77 2.564949 0.000000 200 +david 0 71 2.639057 0.000000 232 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +new 0 64 2.772589 0.000000 262 +previou 0 62 2.772589 0.000000 290 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +thesi 0 57 2.890372 0.000000 327 +index 0 56 2.890372 0.000000 309 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +pointer 0 48 3.044522 0.000000 368 +cool 0 49 3.044522 0.000000 374 +telephon 0 50 3.044522 0.000000 373 +life 0 50 3.044522 0.000000 375 +directori 0 45 3.135494 0.000000 396 +past 0 42 3.218876 0.000000 428 +open 0 38 3.295837 0.000000 469 +hand 0 37 3.332205 0.000000 475 +post 0 35 3.401197 0.000000 505 +scientist 0 31 3.496508 0.000000 560 +steve 0 29 3.583519 0.000000 594 +american 0 27 3.637586 0.000000 634 +todai 0 25 3.737670 0.000000 672 +sport 0 25 3.737670 0.000000 683 +yahoo 0 24 3.761200 0.000000 707 +daili 0 24 3.761200 0.000000 706 +magazin 0 24 3.761200 0.000000 704 +miscellan 0 23 3.806662 0.000000 731 +voic 0 21 3.912023 0.000000 806 +hous 0 21 3.912023 0.000000 801 +lyco 0 19 4.007333 0.000000 871 +boston 0 19 4.007333 0.000000 862 +agent 0 18 4.060443 0.000000 910 +white 0 17 4.110874 0.000000 951 +engineeringunivers 0 17 4.110874 0.000000 959 +repositori 0 17 4.110874 0.000000 932 +dilbert 0 16 4.174387 0.000000 996 +hierarch 0 15 4.248495 0.000000 1018 +balanc 0 14 4.317488 0.000000 1112 +washingtonbox 0 13 4.382027 0.000000 1200 +suit 0 13 4.382027 0.000000 1129 +canada 0 13 4.382027 0.000000 1158 +social 0 13 4.382027 0.000000 1123 +hank 0 12 4.465908 0.000000 1253 +excit 0 11 4.553877 0.000000 1329 +arpa 0 11 4.553877 0.000000 1369 +clock 0 11 4.553877 0.000000 1320 +shop 0 10 4.653960 0.000000 1469 +metacrawl 0 10 4.653960 0.000000 1455 +vista 0 10 4.653960 0.000000 1452 +yellow 0 9 4.753590 0.000000 1601 +weld 0 9 4.753590 0.000000 1538 +meta 0 9 4.753590 0.000000 1505 +govern 0 9 4.753590 0.000000 1581 +congress 0 9 4.753590 0.000000 1592 +entitl 0 9 4.753590 0.000000 1490 +postdoc 0 8 4.875197 0.000000 1724 +span 0 8 4.875197 0.000000 1751 +upcom 0 8 4.875197 0.000000 1685 +softbot 0 7 5.010635 0.000000 1974 +pittsburgh 0 7 5.010635 0.000000 1938 +strip 0 6 5.164786 0.000000 2203 +oren 0 6 5.164786 0.000000 2134 +etzioni 0 6 5.164786 0.000000 2135 +gopher 0 6 5.164786 0.000000 1982 +infoseek 0 6 5.164786 0.000000 2188 +slate 0 6 5.164786 0.000000 2021 +atlant 0 5 5.347108 0.000000 2508 +feder 0 5 5.347108 0.000000 2266 +union 0 4 5.568345 0.000000 2634 +alta 0 4 5.568345 0.000000 3039 +birthdai 0 4 5.568345 0.000000 2800 +letterman 0 3 5.857933 0.000000 3931 +shortcut 0 3 5.857933 0.000000 3932 +soar 0 3 5.857933 0.000000 3506 +headlin 0 3 5.857933 0.000000 3710 +monthli 0 3 5.857933 0.000000 3910 +espn 0 3 5.857933 0.000000 3724 +zone 0 3 5.857933 0.000000 3747 +museum 0 3 5.857933 0.000000 3933 +comedi 0 2 6.263398 0.000000 5822 +geeki 0 2 6.263398 0.000000 5823 +shopbot 0 2 6.263398 0.000000 5824 +sigma 0 2 6.263398 0.000000 4369 +magellan 0 2 6.263398 0.000000 5825 +reuter 0 2 6.263398 0.000000 4099 +cafe 0 2 6.263398 0.000000 5826 +salon 0 2 6.263398 0.000000 5827 +harper 0 2 6.263398 0.000000 5141 +commiss 0 2 6.263398 0.000000 4901 +reform 0 2 6.263398 0.000000 5828 +budget 0 1 6.957497 0.000000 14961 +doorenbo 0 1 6.957497 0.000000 14962 +pagebob 0 1 6.957497 0.000000 14963 +bobd 0 1 6.957497 0.000000 14964 +netbot 0 1 6.957497 0.000000 14965 +boffo 0 1 6.957497 0.000000 14966 +zdnet 0 1 6.957497 0.000000 14967 +anchordesk 0 1 6.957497 0.000000 14968 +savvysearch 0 1 6.957497 0.000000 14969 +inktomi 0 1 6.957497 0.000000 14970 +crawler 0 1 6.957497 0.000000 14971 +hotbot 0 1 6.957497 0.000000 14972 +pointcom 0 1 6.957497 0.000000 14973 +switchboard 0 1 6.957497 0.000000 14974 +cnnfn 0 1 6.957497 0.000000 14975 +newshour 0 1 6.957497 0.000000 14976 +globe 0 1 6.957497 0.000000 14977 +feed 0 1 6.957497 0.000000 14978 +fedworld 0 1 6.957497 0.000000 14979 +deficit 0 1 6.957497 0.000000 14980 +debt 0 1 6.957497 0.000000 14981 +concord 0 1 6.957497 0.000000 14982 +coalit 0 1 6.957497 0.000000 14983 +bipartisan 0 1 6.957497 0.000000 14984 +andfun 0 1 6.957497 0.000000 14985 +pagebobd 0 1 6.957497 0.000000 14986 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..000adf7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +seattl 0 120 2.079442 0.000000 103 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +associ 0 93 2.397895 0.000000 151 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +orient 0 80 2.564949 0.000000 205 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +solv 0 73 2.639057 0.000000 234 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +interact 0 62 2.772589 0.000000 270 +artifici 0 63 2.772589 0.000000 280 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +done 0 47 3.091042 0.000000 381 +mechan 0 43 3.178054 0.000000 416 +http 0 41 3.218876 0.000000 420 +societi 0 40 3.258097 0.000000 456 +join 0 39 3.258097 0.000000 457 +winter 0 36 3.367296 0.000000 500 +post 0 35 3.401197 0.000000 505 +taught 0 33 3.433987 0.000000 526 +human 0 32 3.465736 0.000000 546 +idea 0 32 3.465736 0.000000 545 +concept 0 32 3.465736 0.000000 537 +dissert 0 32 3.465736 0.000000 549 +autumn 0 31 3.496508 0.000000 558 +constraint 0 26 3.688879 0.000000 636 +spent 0 25 3.737670 0.000000 676 +concern 0 25 3.737670 0.000000 666 +born 0 21 3.912023 0.000000 798 +media 0 19 4.007333 0.000000 861 +cambridg 0 16 4.174387 0.000000 1008 +alan 0 13 4.382027 0.000000 1146 +xerox 0 8 4.875197 0.000000 1725 +pagei 0 8 4.875197 0.000000 1683 +grew 0 8 4.875197 0.000000 1742 +reed 0 6 5.164786 0.000000 2086 +england 0 5 5.347108 0.000000 2557 +scotland 0 4 5.568345 0.000000 3049 +sabbat 0 4 5.568345 0.000000 2824 +groupuw 0 3 5.857933 0.000000 3934 +atstanford 0 3 5.857933 0.000000 3935 +pagealan 0 2 6.263398 0.000000 5587 +computerinteract 0 2 6.263398 0.000000 5829 +satisfact 0 2 6.263398 0.000000 5656 +idaho 0 2 6.263398 0.000000 5055 +havebeen 0 2 6.263398 0.000000 5830 +eduwww 0 2 6.263398 0.000000 5138 +principalresearch 0 1 6.957497 0.000000 14987 +activitiesuwconstraint 0 1 6.957497 0.000000 14988 +domainsourc 0 1 6.957497 0.000000 14989 +democraci 0 1 6.957497 0.000000 14990 +qualsproject 0 1 6.957497 0.000000 14991 +teachingher 0 1 6.957497 0.000000 14992 +informationhistori 0 1 6.957497 0.000000 14993 +paloalto 0 1 6.957497 0.000000 14994 +simulationlaboratori 0 1 6.957497 0.000000 14995 +doctoralfellow 0 1 6.957497 0.000000 14996 +ofedinburgh 0 1 6.957497 0.000000 14997 +symbolicalgebra 0 1 6.957497 0.000000 14998 +andexcept 0 1 6.957497 0.000000 14999 +europarc 0 1 6.957497 0.000000 15000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..45353012 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +address 0 170 1.791759 0.000000 62 +like 0 132 1.945910 0.000000 81 +thing 1 84 2.484907 2.484907 189 +probabl 0 40 3.258097 0.000000 455 +credit 0 38 3.295837 0.000000 460 +ofth 0 36 3.367296 0.000000 491 +ad 0 32 3.465736 0.000000 544 +mike 0 24 3.761200 0.000000 703 +less 0 18 4.060443 0.000000 892 +care 0 13 4.382027 0.000000 1177 +brad 0 12 4.465908 0.000000 1264 +subset 0 10 4.653960 0.000000 1425 +couldn 0 4 5.568345 0.000000 2977 +pagebrad 0 1 6.957497 0.000000 15001 +chamberlainphoto 0 1 6.957497 0.000000 15002 +perkowitzth 0 1 6.957497 0.000000 15003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..65c89595 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +look 0 107 2.197225 0.000000 115 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +mathemat 0 108 2.197225 0.000000 123 +version 0 113 2.197225 0.000000 122 +part 0 98 2.302585 0.000000 129 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +academ 0 82 2.484907 0.000000 178 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +html 1 75 2.639057 2.639057 235 +appli 0 71 2.639057 0.000000 226 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +written 0 63 2.772589 0.000000 278 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +reason 0 57 2.890372 0.000000 318 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +math 0 44 3.135494 0.000000 402 +around 0 43 3.178054 0.000000 415 +map 0 39 3.258097 0.000000 452 +theoret 0 39 3.258097 0.000000 446 +error 0 40 3.258097 0.000000 449 +expect 0 37 3.332205 0.000000 484 +tree 0 36 3.367296 0.000000 492 +survei 0 35 3.401197 0.000000 513 +curriculum 0 33 3.433987 0.000000 535 +scientist 0 31 3.496508 0.000000 560 +photo 0 31 3.496508 0.000000 561 +power 0 30 3.555348 0.000000 573 +produc 0 30 3.555348 0.000000 572 +cluster 0 28 3.610918 0.000000 612 +hope 0 28 3.610918 0.000000 610 +challeng 0 26 3.688879 0.000000 653 +bound 0 26 3.688879 0.000000 659 +bookmark 0 26 3.688879 0.000000 639 +aspect 0 25 3.737670 0.000000 663 +sometim 0 24 3.761200 0.000000 696 +theunivers 0 21 3.912023 0.000000 797 +siam 0 21 3.912023 0.000000 800 +binari 0 20 3.951244 0.000000 823 +lower 0 18 4.060443 0.000000 886 +biologi 0 15 4.248495 0.000000 1049 +incomput 0 14 4.317488 0.000000 1096 +galleri 0 13 4.382027 0.000000 1192 +speak 0 12 4.465908 0.000000 1283 +readabl 0 12 4.465908 0.000000 1258 +scienceat 0 11 4.553877 0.000000 1375 +moment 0 11 4.553877 0.000000 1379 +cycl 0 11 4.553877 0.000000 1335 +enter 0 10 4.653960 0.000000 1454 +ski 0 10 4.653960 0.000000 1471 +drink 0 9 4.753590 0.000000 1607 +bridg 0 8 4.875197 0.000000 1764 +interestsi 0 7 5.010635 0.000000 1969 +poster 0 7 5.010635 0.000000 1814 +rough 0 6 5.164786 0.000000 2107 +quickli 0 6 5.164786 0.000000 2000 +alphabet 0 6 5.164786 0.000000 1980 +soda 0 6 5.164786 0.000000 2189 +overlap 0 5 5.347108 0.000000 2368 +upper 0 5 5.347108 0.000000 2481 +latexhtml 0 5 5.347108 0.000000 2347 +older 0 5 5.347108 0.000000 2387 +fulfil 0 4 5.568345 0.000000 2932 +climb 0 4 5.568345 0.000000 2936 +genom 0 3 5.857933 0.000000 3546 +astrophys 0 3 5.857933 0.000000 3936 +dimac 0 3 5.857933 0.000000 3574 +edufor 0 2 6.263398 0.000000 5831 +hpcc 0 2 6.263398 0.000000 5832 +clone 0 2 6.263398 0.000000 5833 +ismb 0 2 6.263398 0.000000 5834 +probe 0 2 6.263398 0.000000 5535 +mumei 0 1 6.957497 0.000000 15004 +brendan 0 1 6.957497 0.000000 15005 +pagebrendan 0 1 6.957497 0.000000 15006 +mumeyi 0 1 6.957497 0.000000 15007 +information 0 1 6.957497 0.000000 15008 +vitaein 0 1 6.957497 0.000000 15009 +htmlorpostscriptformat 0 1 6.957497 0.000000 15010 +landmark 0 1 6.957497 0.000000 15011 +tosolv 0 1 6.957497 0.000000 15012 +groupher 0 1 6.957497 0.000000 15013 +papersb 0 1 6.957497 0.000000 15014 +candidaci 0 1 6.957497 0.000000 15015 +klaw 0 1 6.957497 0.000000 15016 +ofdiscret 0 1 6.957497 0.000000 15017 +containsom 0 1 6.957497 0.000000 15018 +recreationhik 0 1 6.957497 0.000000 15019 +coffeeto 0 1 6.957497 0.000000 15020 +sailingand 0 1 6.957497 0.000000 15021 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..e3a39f47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +spring 0 131 2.079442 0.000000 88 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +school 0 84 2.484907 0.000000 188 +method 0 80 2.564949 0.000000 213 +interfac 0 79 2.564949 0.000000 209 +mondai 0 77 2.564949 0.000000 206 +june 0 79 2.564949 0.000000 214 +goal 0 66 2.708050 0.000000 250 +interact 0 62 2.772589 0.000000 270 +extens 0 53 2.944439 0.000000 340 +particular 0 51 2.995732 0.000000 352 +quarter 0 47 3.091042 0.000000 389 +even 0 45 3.135494 0.000000 393 +ask 0 28 3.610918 0.000000 597 +rather 0 26 3.688879 0.000000 642 +enjoi 0 26 3.688879 0.000000 660 +although 0 25 3.737670 0.000000 667 +self 0 22 3.850148 0.000000 761 +busi 0 21 3.912023 0.000000 784 +hobbi 0 16 4.174387 0.000000 1009 +excit 0 11 4.553877 0.000000 1329 +junior 0 5 5.347108 0.000000 2519 +kid 0 5 5.347108 0.000000 2516 +fairli 0 5 5.347108 0.000000 2322 +writeup 0 5 5.347108 0.000000 2352 +bricker 0 4 5.568345 0.000000 3050 +asystem 0 4 5.568345 0.000000 2612 +lauren 0 3 5.857933 0.000000 3251 +metip 0 3 5.857933 0.000000 3937 +workin 0 3 5.857933 0.000000 3938 +groupi 0 2 6.263398 0.000000 5544 +stevetanimoto 0 2 6.263398 0.000000 5835 +ofthi 0 2 6.263398 0.000000 5836 +cscl 0 2 6.263398 0.000000 5837 +inthi 0 2 6.263398 0.000000 5509 +studio 0 2 6.263398 0.000000 5838 +brickerlauren 0 1 6.957497 0.000000 15022 +clue 0 1 6.957497 0.000000 15023 +primarli 0 1 6.957497 0.000000 15024 +userinterfac 0 1 6.957497 0.000000 15025 +proclaim 0 1 6.957497 0.000000 15026 +mathematicsexperi 0 1 6.957497 0.000000 15027 +usingexploratori 0 1 6.957497 0.000000 15028 +rote 0 1 6.957497 0.000000 15029 +minterest 0 1 6.957497 0.000000 15030 +supportedcollabor 0 1 6.957497 0.000000 15031 +lawk 0 1 6.957497 0.000000 15032 +dawg 0 1 6.957497 0.000000 15033 +interfacea 0 1 6.957497 0.000000 15034 +resumeschool 0 1 6.957497 0.000000 15035 +dazethi 0 1 6.957497 0.000000 15036 +quarterdoth 0 1 6.957497 0.000000 15037 +quartershuman 0 1 6.957497 0.000000 15038 +writeupwhat 0 1 6.957497 0.000000 15039 +insocieti 0 1 6.957497 0.000000 15040 +lifesportscookingpotteri 0 1 6.957497 0.000000 15041 +garag 0 1 6.957497 0.000000 15042 +stuffbecaus 0 1 6.957497 0.000000 15043 +itaddress 0 1 6.957497 0.000000 15044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..91efb4ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +washington 0 236 1.386294 0.000000 32 +adam 0 17 4.110874 0.000000 934 +carlson 1 5 5.347108 5.347108 2351 +carlsonadam 0 1 6.957497 0.000000 15045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..25fdf11b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +paper 0 205 1.609438 0.000000 38 +model 0 145 1.945910 0.000000 69 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +look 0 107 2.197225 0.000000 115 +follow 0 92 2.397895 0.000000 143 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +direct 0 57 2.890372 0.000000 316 +browser 0 56 2.890372 0.000000 313 +give 0 50 3.044522 0.000000 359 +netscap 0 44 3.135494 0.000000 395 +workstat 0 37 3.332205 0.000000 479 +download 0 36 3.367296 0.000000 489 +viewer 0 21 3.912023 0.000000 787 +similar 0 21 3.912023 0.000000 771 +andrew 0 19 4.007333 0.000000 849 +fix 0 11 4.553877 0.000000 1327 +certain 1 10 4.653960 4.653960 1393 +werner 0 10 4.653960 0.000000 1385 +salesin 0 4 5.568345 0.000000 3051 +tonyderos 0 2 6.263398 0.000000 5839 +stuetzl 0 2 6.263398 0.000000 5840 +duchamp 0 2 6.263398 0.000000 5841 +jovan 0 2 6.263398 0.000000 5842 +theview 0 1 6.957497 0.000000 15046 +popov 0 1 6.957497 0.000000 15047 +scanningproject 0 1 6.957497 0.000000 15048 +sgigraph 0 1 6.957497 0.000000 15049 +shouldalso 0 1 6.957497 0.000000 15050 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..5a3c3e9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +school 0 84 2.484907 0.000000 188 +info 0 85 2.484907 0.000000 176 +requir 0 81 2.484907 0.000000 167 +line 0 75 2.639057 0.000000 231 +workshop 0 71 2.639057 0.000000 239 +java 0 70 2.708050 0.000000 248 +best 0 59 2.833213 0.000000 299 +think 0 57 2.890372 0.000000 314 +browser 0 56 2.890372 0.000000 313 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +open 0 38 3.295837 0.000000 469 +computersci 0 30 3.555348 0.000000 562 +chines 0 29 3.583519 0.000000 595 +though 0 27 3.637586 0.000000 622 +enjoi 0 26 3.688879 0.000000 660 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +instal 0 22 3.850148 0.000000 754 +grad 0 20 3.951244 0.000000 837 +applet 0 20 3.951244 0.000000 827 +taiwan 0 16 4.174387 0.000000 1006 +charact 0 15 4.248495 0.000000 1028 +promot 0 12 4.465908 0.000000 1235 +rice 0 11 4.553877 0.000000 1336 +cook 0 10 4.653960 0.000000 1464 +yellow 0 9 4.753590 0.000000 1601 +ball 0 9 4.753590 0.000000 1608 +absolut 0 8 4.875197 0.000000 1646 +chinook 0 6 5.164786 0.000000 2229 +greec 0 6 5.164786 0.000000 2208 +restaur 0 6 5.164786 0.000000 2230 +ross 0 5 5.347108 0.000000 2243 +chou 1 4 5.568345 5.568345 3033 +recip 0 3 5.857933 0.000000 3668 +infoth 0 2 6.263398 0.000000 5195 +projectmi 0 2 6.263398 0.000000 5482 +schedulemi 0 2 6.263398 0.000000 5843 +publicationscod 0 2 6.263398 0.000000 5520 +stir 0 2 6.263398 0.000000 4865 +fri 0 2 6.263398 0.000000 5844 +geek 0 2 6.263398 0.000000 5083 +toi 0 2 6.263398 0.000000 5184 +font 0 2 6.263398 0.000000 5845 +purpl 0 2 6.263398 0.000000 5372 +scari 0 1 6.957497 0.000000 15051 +codesignpersonalperson 0 1 6.957497 0.000000 15052 +resumefoodi 0 1 6.957497 0.000000 15053 +ofpeopl 0 1 6.957497 0.000000 15054 +dish 0 1 6.957497 0.000000 15055 +noodl 0 1 6.957497 0.000000 15056 +beefskew 0 1 6.957497 0.000000 15057 +toysb 0 1 6.957497 0.000000 15058 +computersand 0 1 6.957497 0.000000 15059 +taiwanesei 0 1 6.957497 0.000000 15060 +taiwaneselanguag 0 1 6.957497 0.000000 15061 +taiwanes 0 1 6.957497 0.000000 15062 +sureto 0 1 6.957497 0.000000 15063 +taiwanesedictionari 0 1 6.957497 0.000000 15064 +viewedif 0 1 6.957497 0.000000 15065 +beabl 0 1 6.957497 0.000000 15066 +bouncingov 0 1 6.957497 0.000000 15067 +barnei 0 1 6.957497 0.000000 15068 +dynosaur 0 1 6.957497 0.000000 15069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..3d0aa8e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +read 0 154 1.791759 0.000000 47 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +search 0 95 2.397895 0.000000 155 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +internet 0 83 2.484907 0.000000 186 +thing 0 84 2.484907 0.000000 189 +info 0 85 2.484907 0.000000 176 +help 0 83 2.484907 0.000000 175 +refer 0 78 2.564949 0.000000 203 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +mondai 0 77 2.564949 0.000000 206 +html 0 75 2.639057 0.000000 235 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +tuesdai 0 73 2.639057 0.000000 219 +august 0 66 2.708050 0.000000 257 +thursdai 0 70 2.708050 0.000000 241 +result 0 65 2.772589 0.000000 281 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +guid 0 63 2.772589 0.000000 267 +wednesdai 0 64 2.772589 0.000000 261 +automat 0 61 2.833213 0.000000 306 +plai 0 60 2.833213 0.000000 307 +colleg 0 61 2.833213 0.000000 300 +found 0 53 2.944439 0.000000 337 +set 0 50 3.044522 0.000000 361 +archiv 0 49 3.044522 0.000000 364 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +featur 0 46 3.091042 0.000000 386 +math 1 44 3.135494 3.135494 402 +netscap 0 44 3.135494 0.000000 395 +fridai 0 44 3.135494 0.000000 390 +term 0 43 3.178054 0.000000 411 +review 0 42 3.218876 0.000000 425 +late 0 40 3.258097 0.000000 439 +programm 0 39 3.258097 0.000000 445 +correct 0 38 3.295837 0.000000 462 +open 0 38 3.295837 0.000000 469 +statist 0 35 3.401197 0.000000 521 +tech 0 35 3.401197 0.000000 515 +chapter 0 32 3.465736 0.000000 536 +autumn 0 31 3.496508 0.000000 558 +linux 0 27 3.637586 0.000000 631 +lab 0 24 3.761200 0.000000 698 +color 0 22 3.850148 0.000000 762 +self 0 22 3.850148 0.000000 761 +hous 0 21 3.912023 0.000000 801 +region 0 19 4.007333 0.000000 875 +macintosh 0 17 4.110874 0.000000 920 +dilbert 0 16 4.174387 0.000000 996 +month 0 15 4.248495 0.000000 1025 +contribut 0 15 4.248495 0.000000 1021 +brother 0 13 4.382027 0.000000 1189 +tune 0 12 4.465908 0.000000 1227 +insid 0 12 4.465908 0.000000 1262 +fpga 0 10 4.653960 0.000000 1433 +pacif 0 8 4.875197 0.000000 1674 +babylon 0 8 4.875197 0.000000 1731 +duke 0 6 5.164786 0.000000 2231 +usag 0 6 5.164786 0.000000 2209 +peek 0 6 5.164786 0.000000 2169 +contest 0 5 5.347108 0.000000 2273 +compet 0 5 5.347108 0.000000 2462 +corei 0 4 5.568345 0.000000 2718 +wavelet 0 4 5.568345 0.000000 2874 +percept 0 3 5.857933 0.000000 3739 +rsum 0 3 5.857933 0.000000 3939 +zone 0 3 5.857933 0.000000 3747 +corin 0 3 5.857933 0.000000 3311 +induc 0 2 6.263398 0.000000 4795 +lurker 0 2 6.263398 0.000000 5050 +andersoncorei 0 1 6.957497 0.000000 15070 +andersonth 0 1 6.957497 0.000000 15071 +localtalk 0 1 6.957497 0.000000 15072 +collegi 0 1 6.957497 0.000000 15073 +univser 0 1 6.957497 0.000000 15074 +highlin 0 1 6.957497 0.000000 15075 +polli 0 1 6.957497 0.000000 15076 +treasuri 0 1 6.957497 0.000000 15077 +providercool 0 1 6.957497 0.000000 15078 +sunsit 0 1 6.957497 0.000000 15079 +pageus 0 1 6.957497 0.000000 15080 +washinton 0 1 6.957497 0.000000 15081 +uwtv 0 1 6.957497 0.000000 15082 +notesmi 0 1 6.957497 0.000000 15083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..4f943fa1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +address 0 170 1.791759 0.000000 62 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +well 0 109 2.197225 0.000000 121 +text 0 98 2.302585 0.000000 133 +homepag 0 93 2.397895 0.000000 148 +second 0 81 2.484907 0.000000 166 +start 0 83 2.484907 0.000000 173 +know 0 80 2.564949 0.000000 198 +experi 0 64 2.772589 0.000000 283 +locat 0 59 2.833213 0.000000 303 +found 0 53 2.944439 0.000000 337 +visitor 0 49 3.044522 0.000000 371 +express 0 32 3.465736 0.000000 540 +photo 0 31 3.496508 0.000000 561 +repres 0 26 3.688879 0.000000 656 +never 0 25 3.737670 0.000000 671 +grad 0 20 3.951244 0.000000 837 +wonder 0 20 3.951244 0.000000 815 +minut 0 20 3.951244 0.000000 810 +appropri 0 18 4.060443 0.000000 883 +anyon 0 17 4.110874 0.000000 916 +choos 0 16 4.174387 0.000000 964 +universityof 0 15 4.248495 0.000000 1061 +near 0 14 4.317488 0.000000 1091 +cannot 0 13 4.382027 0.000000 1144 +sai 0 13 4.382027 0.000000 1175 +ball 0 9 4.753590 0.000000 1608 +occur 0 9 4.753590 0.000000 1572 +didn 0 9 4.753590 0.000000 1563 +craig 0 7 5.010635 0.000000 1879 +fromth 0 7 5.010635 0.000000 1802 +saturdai 0 7 5.010635 0.000000 1794 +parent 0 6 5.164786 0.000000 2204 +situat 0 5 5.347108 0.000000 2365 +curiou 0 5 5.347108 0.000000 2541 +enjoy 0 4 5.568345 0.000000 2937 +waterloo 0 3 5.857933 0.000000 3523 +tomi 0 2 6.263398 0.000000 5846 +convoc 0 2 6.263398 0.000000 5757 +honour 0 2 6.263398 0.000000 5632 +felt 0 2 6.263398 0.000000 4978 +incident 0 2 6.263398 0.000000 5109 +ceremoni 0 2 6.263398 0.000000 5585 +forgiv 0 2 6.263398 0.000000 5770 +valedictorian 1 1 6.957497 6.957497 15084 +experiencecraig 0 1 6.957497 0.000000 15085 +kaplancurr 0 1 6.957497 0.000000 15086 +copyof 0 1 6.957497 0.000000 15087 +undergraduatewa 0 1 6.957497 0.000000 15088 +cskaplan 0 1 6.957497 0.000000 15089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..7c118c0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +welcom 0 122 2.079442 0.000000 99 +peopl 0 96 2.302585 0.000000 132 +take 0 97 2.302585 0.000000 134 +sinc 0 90 2.397895 0.000000 159 +thing 0 84 2.484907 0.000000 189 +java 0 70 2.708050 0.000000 248 +plan 0 65 2.772589 0.000000 272 +think 0 57 2.890372 0.000000 314 +visitor 0 49 3.044522 0.000000 371 +numer 0 49 3.044522 0.000000 369 +futur 0 41 3.218876 0.000000 427 +vision 0 41 3.218876 0.000000 430 +mean 0 37 3.332205 0.000000 477 +anim 0 31 3.496508 0.000000 557 +load 0 28 3.610918 0.000000 601 +applet 0 20 3.951244 0.000000 827 +agent 0 18 4.060443 0.000000 910 +wind 0 18 4.060443 0.000000 908 +figur 0 18 4.060443 0.000000 903 +thought 0 17 4.110874 0.000000 945 +edui 0 13 4.382027 0.000000 1193 +weld 0 9 4.753590 0.000000 1538 +vallei 0 7 5.010635 0.000000 1959 +chung 0 7 5.010635 0.000000 1964 +softbot 0 7 5.010635 0.000000 1974 +etzioni 0 6 5.164786 0.000000 2135 +andsoftwar 0 4 5.568345 0.000000 2753 +arch 0 4 5.568345 0.000000 2995 +codi 1 3 5.857933 5.857933 3940 +kwok 1 3 5.857933 5.857933 3941 +aliv 0 3 5.857933 0.000000 3864 +nausicaa 0 2 6.263398 0.000000 5218 +ingram 0 2 6.263398 0.000000 5847 +castl 0 2 6.263398 0.000000 5217 +doom 0 2 6.263398 0.000000 5848 +sanctuari 0 1 6.957497 0.000000 15090 +asami 0 1 6.957497 0.000000 15091 +chiaki 0 1 6.957497 0.000000 15092 +ctkwok 0 1 6.957497 0.000000 15093 +andoren 0 1 6.957497 0.000000 15094 +aiuw 0 1 6.957497 0.000000 15095 +informationleisur 0 1 6.957497 0.000000 15096 +windlaputa 0 1 6.957497 0.000000 15097 +skyhyp 0 1 6.957497 0.000000 15098 +gunnm 0 1 6.957497 0.000000 15099 +vile 0 1 6.957497 0.000000 15100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..10c3cc12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +postscript 0 131 2.079442 0.000000 90 +homepag 0 93 2.397895 0.000000 148 +resum 1 79 2.564949 2.564949 217 +complet 0 77 2.564949 0.000000 208 +html 0 75 2.639057 0.000000 235 +plan 0 65 2.772589 0.000000 272 +vita 0 38 3.295837 0.000000 473 +curriculum 1 33 3.433987 3.433987 535 +employ 0 12 4.465908 0.000000 1291 +rest 0 12 4.465908 0.000000 1259 +darren 1 5 5.347108 5.347108 2565 +cronquist 1 3 5.857933 5.857933 3942 +myph 0 3 5.857933 0.000000 3880 +underconstruct 0 3 5.857933 0.000000 3889 +darrenc 0 1 6.957497 0.000000 15101 +vitaperson 0 1 6.957497 0.000000 15102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..845d1e0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +read 0 154 1.791759 0.000000 47 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +find 0 111 2.197225 0.000000 111 +user 0 104 2.302585 0.000000 137 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +second 0 81 2.484907 0.000000 166 +activ 0 84 2.484907 0.000000 182 +build 0 85 2.484907 0.000000 184 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +librari 0 87 2.484907 0.000000 181 +help 0 83 2.484907 0.000000 175 +interfac 0 79 2.564949 0.000000 209 +appear 0 78 2.564949 0.000000 210 +david 0 71 2.639057 0.000000 232 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +symposium 0 72 2.639057 0.000000 238 +nation 0 74 2.639057 0.000000 240 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +plan 1 65 2.772589 2.772589 272 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +import 0 65 2.772589 0.000000 282 +automat 0 61 2.833213 0.000000 306 +simpl 0 60 2.833213 0.000000 298 +variou 0 56 2.890372 0.000000 317 +local 0 55 2.944439 0.000000 334 +undergradu 0 54 2.944439 0.000000 338 +week 0 52 2.995732 0.000000 343 +even 0 45 3.135494 0.000000 393 +favorit 0 44 3.135494 0.000000 410 +third 0 43 3.178054 0.000000 412 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +live 0 40 3.258097 0.000000 451 +map 0 39 3.258097 0.000000 452 +realli 0 40 3.258097 0.000000 444 +prototyp 0 38 3.295837 0.000000 463 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +game 0 36 3.367296 0.000000 498 +represent 0 35 3.401197 0.000000 512 +michael 0 35 3.401197 0.000000 514 +everi 0 34 3.401197 0.000000 519 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +human 0 32 3.465736 0.000000 546 +collabor 0 32 3.465736 0.000000 543 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +rather 0 26 3.688879 0.000000 642 +client 0 25 3.737670 0.000000 679 +spent 0 25 3.737670 0.000000 676 +store 0 24 3.761200 0.000000 693 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +famili 0 23 3.806662 0.000000 735 +william 0 22 3.850148 0.000000 765 +theunivers 0 21 3.912023 0.000000 797 +navig 0 21 3.912023 0.000000 796 +watch 0 21 3.912023 0.000000 789 +anderson 0 19 4.007333 0.000000 860 +boston 0 19 4.007333 0.000000 862 +agent 0 18 4.060443 0.000000 910 +adam 0 17 4.110874 0.000000 934 +match 0 16 4.174387 0.000000 965 +atth 0 15 4.248495 0.000000 1019 +mayb 0 15 4.248495 0.000000 1014 +camera 0 14 4.317488 0.000000 1115 +dave 0 14 4.317488 0.000000 1098 +senior 0 14 4.317488 0.000000 1120 +context 0 13 4.382027 0.000000 1153 +whose 0 13 4.382027 0.000000 1166 +sai 0 13 4.382027 0.000000 1175 +touch 0 12 4.465908 0.000000 1288 +grow 0 12 4.465908 0.000000 1209 +career 0 12 4.465908 0.000000 1287 +appl 0 11 4.553877 0.000000 1303 +michigan 0 11 4.553877 0.000000 1368 +surf 0 11 4.553877 0.000000 1301 +shop 0 10 4.653960 0.000000 1469 +death 0 10 4.653960 0.000000 1457 +weld 0 9 4.753590 0.000000 1538 +russel 0 9 4.753590 0.000000 1507 +debugg 0 9 4.753590 0.000000 1493 +declar 0 9 4.753590 0.000000 1526 +leader 0 9 4.753590 0.000000 1576 +juan 0 9 4.753590 0.000000 1580 +sean 0 8 4.875197 0.000000 1705 +aaai 0 8 4.875197 0.000000 1750 +edg 0 8 4.875197 0.000000 1647 +planner 0 7 5.010635 0.000000 1797 +sensor 0 7 5.010635 0.000000 1920 +fortun 0 7 5.010635 0.000000 1872 +spare 0 6 5.164786 0.000000 2177 +mix 0 6 5.164786 0.000000 2200 +chicago 0 6 5.164786 0.000000 2149 +babi 0 5 5.347108 0.000000 2493 +pars 0 5 5.347108 0.000000 2321 +darren 0 5 5.347108 0.000000 2565 +compet 0 5 5.347108 0.000000 2462 +salesin 0 4 5.568345 0.000000 3051 +midnight 0 4 5.568345 0.000000 2599 +gotten 0 4 5.568345 0.000000 2628 +cut 0 4 5.568345 0.000000 2620 +tick 0 4 5.568345 0.000000 2975 +cohen 0 3 5.857933 0.000000 3652 +workin 0 3 5.857933 0.000000 3938 +harold 0 3 5.857933 0.000000 3803 +gloriou 0 3 5.857933 0.000000 3816 +hero 0 3 5.857933 0.000000 3711 +fame 0 3 5.857933 0.000000 3793 +straight 0 3 5.857933 0.000000 3655 +evil 0 3 5.857933 0.000000 3943 +christianson 0 2 6.263398 0.000000 5849 +till 0 2 6.263398 0.000000 5850 +nowher 0 2 6.263398 0.000000 4292 +shopbot 0 2 6.263398 0.000000 5824 +chicken 0 2 6.263398 0.000000 5851 +theanim 0 2 6.263398 0.000000 5852 +thechateau 0 2 6.263398 0.000000 5853 +cynic 0 2 6.263398 0.000000 5854 +duel 0 2 6.263398 0.000000 5855 +christiansondbc 0 1 6.957497 0.000000 15103 +inaiand 0 1 6.957497 0.000000 15104 +graphicsa 0 1 6.957497 0.000000 15105 +directedbehavior 0 1 6.957497 0.000000 15106 +buzzwordacquisit 0 1 6.957497 0.000000 15107 +bobdoorenbo 0 1 6.957497 0.000000 15108 +somehowintegr 0 1 6.957497 0.000000 15109 +applicationthat 0 1 6.957497 0.000000 15110 +basket 0 1 6.957497 0.000000 15111 +determinewhat 0 1 6.957497 0.000000 15112 +moviethat 0 1 6.957497 0.000000 15113 +technologyinto 0 1 6.957497 0.000000 15114 +perpetr 0 1 6.957497 0.000000 15115 +theucpop 0 1 6.957497 0.000000 15116 +isher 0 1 6.957497 0.000000 15117 +carboload 0 1 6.957497 0.000000 15118 +publicationschristianson 0 1 6.957497 0.000000 15119 +cinematographi 0 1 6.957497 0.000000 15120 +firbi 0 1 6.957497 0.000000 15121 +mcdougal 0 1 6.957497 0.000000 15122 +fusion 0 1 6.957497 0.000000 15123 +withfreder 0 1 6.957497 0.000000 15124 +judo 0 1 6.957497 0.000000 15125 +sibl 0 1 6.957497 0.000000 15126 +sisterjust 0 1 6.957497 0.000000 15127 +supercollid 0 1 6.957497 0.000000 15128 +slack 0 1 6.957497 0.000000 15129 +mirski 0 1 6.957497 0.000000 15130 +youth 0 1 6.957497 0.000000 15131 +wwwf 0 1 6.957497 0.000000 15132 +grudg 0 1 6.957497 0.000000 15133 +doomgat 0 1 6.957497 0.000000 15134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..ad436b2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +assist 0 112 2.197225 0.000000 113 +look 0 107 2.197225 0.000000 115 +take 1 97 2.302585 2.302585 134 +activ 0 84 2.484907 0.000000 182 +mondai 0 77 2.564949 0.000000 206 +david 0 71 2.639057 0.000000 232 +creat 0 63 2.772589 0.000000 277 +give 0 50 3.044522 0.000000 359 +tutori 0 39 3.258097 0.000000 437 +form 0 39 3.258097 0.000000 443 +togeth 0 23 3.806662 0.000000 714 +navig 0 21 3.912023 0.000000 796 +hypertext 0 19 4.007333 0.000000 865 +quiz 1 16 4.174387 4.174387 990 +dave 0 14 4.317488 0.000000 1098 +johnson 1 13 4.382027 4.382027 1162 +script 0 13 4.382027 0.000000 1171 +basketbal 0 12 4.465908 0.000000 1289 +softbal 0 9 4.753590 0.000000 1594 +golf 0 6 5.164786 0.000000 2178 +fit 0 5 5.347108 0.000000 2285 +racquetbal 0 4 5.568345 0.000000 3052 +assess 0 4 5.568345 0.000000 2724 +readersproject 0 1 6.957497 0.000000 15135 +theracquetbal 0 1 6.957497 0.000000 15136 +thecreat 0 1 6.957497 0.000000 15137 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..86295be5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +first 0 140 1.945910 0.000000 71 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +seattl 1 120 2.079442 2.079442 103 +studi 0 120 2.079442 0.000000 91 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +manag 0 114 2.197225 0.000000 125 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +call 0 91 2.397895 0.000000 153 +commun 0 95 2.397895 0.000000 157 +homepag 0 93 2.397895 0.000000 148 +section 0 94 2.397895 0.000000 149 +real 0 93 2.397895 0.000000 144 +level 0 87 2.484907 0.000000 180 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +environ 0 84 2.484907 0.000000 177 +help 0 83 2.484907 0.000000 175 +institut 0 84 2.484907 0.000000 187 +learn 0 86 2.484907 0.000000 170 +server 0 76 2.564949 0.000000 204 +dynam 0 76 2.564949 0.000000 194 +know 0 80 2.564949 0.000000 198 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +solv 0 73 2.639057 0.000000 234 +line 0 75 2.639057 0.000000 231 +view 0 70 2.708050 0.000000 254 +main 0 67 2.708050 0.000000 256 +visit 0 63 2.772589 0.000000 288 +written 0 63 2.772589 0.000000 278 +previou 0 62 2.772589 0.000000 290 +guid 0 63 2.772589 0.000000 267 +unix 0 58 2.890372 0.000000 308 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +extens 0 53 2.944439 0.000000 340 +undergradu 0 54 2.944439 0.000000 338 +maintain 0 51 2.995732 0.000000 342 +run 0 51 2.995732 0.000000 347 +date 0 51 2.995732 0.000000 344 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +featur 0 46 3.091042 0.000000 386 +netscap 0 44 3.135494 0.000000 395 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +movi 0 40 3.258097 0.000000 459 +littl 0 39 3.258097 0.000000 454 +brian 0 38 3.295837 0.000000 466 +respons 0 37 3.332205 0.000000 476 +soon 0 36 3.367296 0.000000 494 +manual 0 35 3.401197 0.000000 504 +extend 0 32 3.465736 0.000000 539 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +team 0 27 3.637586 0.000000 625 +primari 0 25 3.737670 0.000000 669 +sport 0 25 3.737670 0.000000 683 +todai 0 25 3.737670 0.000000 672 +other 0 24 3.761200 0.000000 697 +dai 0 22 3.850148 0.000000 753 +recommend 0 22 3.850148 0.000000 737 +kernel 0 20 3.951244 0.000000 825 +safeti 0 20 3.951244 0.000000 817 +fine 0 20 3.951244 0.000000 822 +region 0 19 4.007333 0.000000 875 +excel 0 19 4.007333 0.000000 868 +bershad 0 18 4.060443 0.000000 902 +statu 0 18 4.060443 0.000000 885 +protect 0 17 4.110874 0.000000 935 +debug 0 17 4.110874 0.000000 944 +condit 0 16 4.174387 0.000000 975 +anyth 0 16 4.174387 0.000000 998 +dilbert 0 16 4.174387 0.000000 996 +mayb 0 15 4.248495 0.000000 1014 +spin 1 14 4.317488 4.317488 1121 +achiev 0 14 4.317488 0.000000 1088 +stai 0 12 4.465908 0.000000 1215 +touch 0 12 4.465908 0.000000 1288 +rest 0 12 4.465908 0.000000 1259 +surf 0 11 4.553877 0.000000 1301 +traffic 0 10 4.653960 0.000000 1421 +shop 0 10 4.653960 0.000000 1469 +modula 0 9 4.753590 0.000000 1613 +mach 0 8 4.875197 0.000000 1669 +transport 0 8 4.875197 0.000000 1672 +claim 0 8 4.875197 0.000000 1664 +hold 0 8 4.875197 0.000000 1645 +wouldn 0 7 5.010635 0.000000 1970 +occasion 0 7 5.010635 0.000000 1905 +athlet 0 7 5.010635 0.000000 1933 +yeah 0 6 5.164786 0.000000 2195 +impress 0 6 5.164786 0.000000 2096 +variant 0 6 5.164786 0.000000 2043 +subsystem 0 6 5.164786 0.000000 2015 +band 0 6 5.164786 0.000000 2198 +restaur 0 6 5.164786 0.000000 2230 +notr 0 4 5.568345 0.000000 2880 +dame 0 4 5.568345 0.000000 2881 +afraid 0 4 5.568345 0.000000 3053 +breath 0 4 5.568345 0.000000 2946 +countless 0 4 5.568345 0.000000 3020 +racquetbal 0 4 5.568345 0.000000 3052 +leagu 0 4 5.568345 0.000000 3040 +basebal 0 4 5.568345 0.000000 2969 +bean 0 4 5.568345 0.000000 2968 +slight 0 3 5.857933 0.000000 3894 +emul 0 3 5.857933 0.000000 3944 +meanwhil 0 3 5.857933 0.000000 3129 +distract 0 3 5.857933 0.000000 3945 +trumpet 0 3 5.857933 0.000000 3946 +espn 0 3 5.857933 0.000000 3724 +marin 0 3 5.857933 0.000000 3947 +dion 0 2 6.263398 0.000000 5856 +okai 0 2 6.263398 0.000000 4465 +occupi 0 2 6.263398 0.000000 5857 +ladder 0 2 6.263398 0.000000 5858 +outlet 0 2 6.263398 0.000000 5248 +infam 0 2 6.263398 0.000000 5859 +ddion 0 1 6.957497 0.000000 15138 +thespinoper 0 1 6.957497 0.000000 15139 +intercept 0 1 6.957497 0.000000 15140 +havework 0 1 6.957497 0.000000 15141 +ipromis 0 1 6.957497 0.000000 15142 +enhancedthi 0 1 6.957497 0.000000 15143 +vast 0 1 6.957497 0.000000 15144 +sportzon 0 1 6.957497 0.000000 15145 +eateri 0 1 6.957497 0.000000 15146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..39dfc863 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +address 0 170 1.791759 0.000000 62 +perform 0 143 1.945910 0.000000 74 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +topic 0 114 2.197225 0.000000 110 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +center 0 88 2.397895 0.000000 158 +environ 0 84 2.484907 0.000000 177 +start 0 83 2.484907 0.000000 173 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +collect 0 65 2.772589 0.000000 268 +interact 0 62 2.772589 0.000000 270 +colleg 0 61 2.833213 0.000000 300 +share 0 59 2.833213 0.000000 304 +index 0 56 2.890372 0.000000 309 +summer 0 56 2.890372 0.000000 311 +extens 0 53 2.944439 0.000000 340 +undergradu 0 54 2.944439 0.000000 338 +run 0 51 2.995732 0.000000 347 +protocol 0 45 3.135494 0.000000 407 +offer 0 43 3.178054 0.000000 414 +around 0 43 3.178054 0.000000 415 +howev 0 41 3.218876 0.000000 422 +field 0 37 3.332205 0.000000 482 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +independ 0 32 3.465736 0.000000 548 +dissert 0 32 3.465736 0.000000 549 +often 0 31 3.496508 0.000000 551 +administr 0 27 3.637586 0.000000 628 +campu 0 27 3.637586 0.000000 623 +arrai 0 27 3.637586 0.000000 627 +challeng 0 26 3.688879 0.000000 653 +task 0 25 3.737670 0.000000 678 +portabl 0 20 3.951244 0.000000 819 +beauti 0 18 4.060443 0.000000 912 +event 0 18 4.060443 0.000000 896 +sheet 0 16 4.174387 0.000000 973 +women 0 16 4.174387 0.000000 1004 +career 0 12 4.465908 0.000000 1287 +skill 0 12 4.465908 0.000000 1205 +asynchron 0 12 4.465908 0.000000 1229 +host 0 11 4.553877 0.000000 1306 +typic 0 11 4.553877 0.000000 1360 +extra 0 11 4.553877 0.000000 1312 +interestsmi 0 10 4.653960 0.000000 1462 +tutor 0 9 4.753590 0.000000 1552 +heart 0 8 4.875197 0.000000 1729 +ring 0 8 4.875197 0.000000 1684 +on 0 8 4.875197 0.000000 1628 +pursu 0 7 5.010635 0.000000 1902 +divers 0 6 5.164786 0.000000 2232 +averag 0 6 5.164786 0.000000 2098 +conveni 0 6 5.164786 0.000000 2088 +li 0 5 5.347108 0.000000 2500 +suffer 0 5 5.347108 0.000000 2268 +spaa 0 3 5.857933 0.000000 3906 +certif 0 3 5.857933 0.000000 3859 +token 0 2 6.263398 0.000000 4415 +foremost 0 2 6.263398 0.000000 5361 +ordistribut 0 2 6.263398 0.000000 5581 +distributedenviron 0 2 6.263398 0.000000 5183 +comm 0 2 6.263398 0.000000 4746 +newslet 0 2 6.263398 0.000000 5860 +derrick 0 1 6.957497 0.000000 15147 +weathersbi 0 1 6.957497 0.000000 15148 +bullssupersonicsi 0 1 6.957497 0.000000 15149 +phdin 0 1 6.957497 0.000000 15150 +ofseattl 0 1 6.957497 0.000000 15151 +prei 0 1 6.957497 0.000000 15152 +therebyextend 0 1 6.957497 0.000000 15153 +interestssignific 0 1 6.957497 0.000000 15154 +securityresearch 0 1 6.957497 0.000000 15155 +challengespres 0 1 6.957497 0.000000 15156 +theseenviron 0 1 6.957497 0.000000 15157 +daunt 0 1 6.957497 0.000000 15158 +projectacadem 0 1 6.957497 0.000000 15159 +achievementsinstructor 0 1 6.957497 0.000000 15160 +collegeinstructor 0 1 6.957497 0.000000 15161 +minoritystud 0 1 6.957497 0.000000 15162 +engineeringoutstand 0 1 6.957497 0.000000 15163 +cnnfinanciala 0 1 6.957497 0.000000 15164 +javaw 0 1 6.957497 0.000000 15165 +weathersbyderrick 0 1 6.957497 0.000000 15166 +edutu 0 1 6.957497 0.000000 15167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..b4dca446 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +develop 0 174 1.791759 0.000000 53 +note 1 142 1.945910 1.945910 67 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +postscript 1 131 2.079442 2.079442 90 +provid 0 121 2.079442 0.000000 94 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +book 0 99 2.302585 0.000000 131 +imag 0 91 2.397895 0.000000 161 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +know 0 80 2.564949 0.000000 198 +june 0 79 2.564949 0.000000 214 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +html 0 75 2.639057 0.000000 235 +tuesdai 0 73 2.639057 0.000000 219 +goal 0 66 2.708050 0.000000 250 +plai 0 60 2.833213 0.000000 307 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +finger 0 52 2.995732 0.000000 354 +much 0 52 2.995732 0.000000 349 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +possibl 0 47 3.091042 0.000000 378 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +late 0 40 3.258097 0.000000 439 +brian 0 38 3.295837 0.000000 466 +china 0 37 3.332205 0.000000 487 +feel 0 37 3.332205 0.000000 483 +game 1 36 3.367296 3.367296 498 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +statist 0 35 3.401197 0.000000 521 +return 0 34 3.401197 0.000000 502 +bibliographi 0 34 3.401197 0.000000 518 +anim 0 31 3.496508 0.000000 557 +progress 0 28 3.610918 0.000000 598 +color 0 22 3.850148 0.000000 762 +period 0 22 3.850148 0.000000 743 +feedback 0 19 4.007333 0.000000 854 +histori 0 19 4.007333 0.000000 853 +encourag 0 18 4.060443 0.000000 880 +edulast 0 17 4.110874 0.000000 927 +letter 0 16 4.174387 0.000000 981 +alreadi 0 16 4.174387 0.000000 963 +draft 0 14 4.317488 0.000000 1085 +hopefulli 0 14 4.317488 0.000000 1071 +trip 0 14 4.317488 0.000000 1113 +near 0 14 4.317488 0.000000 1091 +carri 0 13 4.382027 0.000000 1152 +readabl 0 12 4.465908 0.000000 1258 +bike 0 10 4.653960 0.000000 1468 +death 0 10 4.653960 0.000000 1457 +drink 0 9 4.753590 0.000000 1607 +ride 0 8 4.875197 0.000000 1741 +blue 0 6 5.164786 0.000000 2227 +seen 0 6 5.164786 0.000000 2202 +scienceand 0 5 5.347108 0.000000 2348 +atlant 0 5 5.347108 0.000000 2508 +semi 0 5 5.347108 0.000000 2510 +chaotic 0 5 5.347108 0.000000 2566 +ireland 0 4 5.568345 0.000000 2853 +pagebrian 0 4 5.568345 0.000000 3054 +myfavorit 0 3 5.857933 0.000000 3852 +interview 0 3 5.857933 0.000000 3324 +fascin 0 3 5.857933 0.000000 3948 +northern 1 2 6.263398 6.263398 5861 +terrorist 0 2 6.263398 0.000000 5190 +thorough 0 2 6.263398 0.000000 4134 +ocean 0 2 6.263398 0.000000 5375 +shoulder 0 2 6.263398 0.000000 4750 +thecurr 0 2 6.263398 0.000000 5862 +addict 0 2 6.263398 0.000000 5576 +dewei 0 1 6.957497 0.000000 15168 +deweyabout 0 1 6.957497 0.000000 15169 +doyou 0 1 6.957497 0.000000 15170 +ilov 0 1 6.957497 0.000000 15171 +oldroomm 0 1 6.957497 0.000000 15172 +irelandi 0 1 6.957497 0.000000 15173 +belfast 0 1 6.957497 0.000000 15174 +sixti 0 1 6.957497 0.000000 15175 +pagesof 0 1 6.957497 0.000000 15176 +luggag 0 1 6.957497 0.000000 15177 +getthos 0 1 6.957497 0.000000 15178 +enlighteningformat 0 1 6.957497 0.000000 15179 +thisproject 0 1 6.957497 0.000000 15180 +sinn 0 1 6.957497 0.000000 15181 +fein 0 1 6.957497 0.000000 15182 +injuri 0 1 6.957497 0.000000 15183 +recuri 0 1 6.957497 0.000000 15184 +ancient 0 1 6.957497 0.000000 15185 +imageek 0 1 6.957497 0.000000 15186 +cuni 0 1 6.957497 0.000000 15187 +jansteen 0 1 6.957497 0.000000 15188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..352f6dd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +find 0 111 2.197225 0.000000 111 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +graphic 0 90 2.397895 0.000000 147 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +contain 0 81 2.484907 0.000000 174 +academ 0 82 2.484907 0.000000 178 +appli 0 71 2.639057 0.000000 226 +write 0 72 2.639057 0.000000 222 +would 0 67 2.708050 0.000000 251 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +written 0 63 2.772589 0.000000 278 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +publish 0 57 2.890372 0.000000 326 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +friend 0 48 3.044522 0.000000 376 +favorit 0 44 3.135494 0.000000 410 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +press 0 42 3.218876 0.000000 419 +futur 0 41 3.218876 0.000000 427 +probabl 0 40 3.258097 0.000000 455 +random 0 34 3.401197 0.000000 511 +given 0 32 3.465736 0.000000 538 +taken 0 31 3.496508 0.000000 555 +computersci 0 30 3.555348 0.000000 562 +depend 0 29 3.583519 0.000000 583 +ask 0 28 3.610918 0.000000 597 +mine 0 26 3.688879 0.000000 654 +seri 0 24 3.761200 0.000000 708 +fellow 0 24 3.761200 0.000000 701 +recommend 0 22 3.850148 0.000000 737 +ofwashington 0 22 3.850148 0.000000 766 +identifi 0 22 3.850148 0.000000 760 +william 0 22 3.850148 0.000000 765 +fact 0 21 3.912023 0.000000 780 +tenni 0 20 3.951244 0.000000 838 +grad 0 20 3.951244 0.000000 837 +ever 0 19 4.007333 0.000000 872 +otherwis 0 17 4.110874 0.000000 922 +anyon 0 17 4.110874 0.000000 916 +former 0 17 4.110874 0.000000 956 +whether 0 17 4.110874 0.000000 918 +chateau 0 16 4.174387 0.000000 997 +explan 0 16 4.174387 0.000000 985 +biologi 0 15 4.248495 0.000000 1049 +charact 0 15 4.248495 0.000000 1028 +role 0 14 4.317488 0.000000 1101 +pagewelcom 0 11 4.553877 0.000000 1344 +poetri 0 9 4.753590 0.000000 1596 +distanc 0 9 4.753590 0.000000 1500 +illustr 0 8 4.875197 0.000000 1679 +babylon 0 8 4.875197 0.000000 1731 +creativ 0 8 4.875197 0.000000 1777 +absolut 0 8 4.875197 0.000000 1646 +sean 0 8 4.875197 0.000000 1705 +roger 0 7 5.010635 0.000000 1892 +chronicl 0 7 5.010635 0.000000 1952 +athlet 0 7 5.010635 0.000000 1933 +fiction 0 6 5.164786 0.000000 2217 +cat 0 6 5.164786 0.000000 2194 +fantasi 0 4 5.568345 0.000000 3055 +sandi 0 4 5.568345 0.000000 2765 +portrait 0 3 5.857933 0.000000 3491 +slight 0 3 5.857933 0.000000 3894 +kwon 0 3 5.857933 0.000000 3690 +woman 0 3 5.857933 0.000000 3539 +fasulo 0 2 6.263398 0.000000 4391 +honest 0 2 6.263398 0.000000 5060 +alumnu 0 2 6.263398 0.000000 5863 +wendi 0 2 6.263398 0.000000 5864 +belluomini 0 2 6.263398 0.000000 5865 +worthwhil 0 2 6.263398 0.000000 4951 +dfasulo 0 1 6.957497 0.000000 15189 +amber 0 1 6.957497 0.000000 15190 +williamscolleg 0 1 6.957497 0.000000 15191 +inaccuraci 0 1 6.957497 0.000000 15192 +eastlak 0 1 6.957497 0.000000 15193 +merlin 0 1 6.957497 0.000000 15194 +corwin 0 1 6.957497 0.000000 15195 +zelazni 0 1 6.957497 0.000000 15196 +drpg 0 1 6.957497 0.000000 15197 +phage 0 1 6.957497 0.000000 15198 +dress 0 1 6.957497 0.000000 15199 +dogbert 0 1 6.957497 0.000000 15200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..c336629d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +washington 0 236 1.386294 0.000000 32 +seattl 0 120 2.079442 0.000000 103 +site 0 106 2.197225 0.000000 119 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +tuesdai 0 73 2.639057 0.000000 219 +java 0 70 2.708050 0.000000 248 +septemb 0 65 2.772589 0.000000 274 +plai 0 60 2.833213 0.000000 307 +favorit 0 44 3.135494 0.000000 410 +autumn 0 31 3.496508 0.000000 558 +martin 1 21 3.912023 3.912023 794 +engineeringunivers 0 17 4.110874 0.000000 959 +weekli 0 17 4.110874 0.000000 919 +script 0 13 4.382027 0.000000 1171 +engr 0 10 4.653960 0.000000 1427 +sister 0 9 4.753590 0.000000 1524 +coffe 0 5 5.347108 0.000000 2556 +eduupd 0 4 5.568345 0.000000 3056 +dickei 1 2 6.263398 6.263398 4389 +garg 0 2 6.263398 0.000000 5533 +dickeycomput 0 1 6.957497 0.000000 15201 +washingtonwelcom 0 1 6.957497 0.000000 15202 +schedulenarr 0 1 6.957497 0.000000 15203 +blurbcs 0 1 6.957497 0.000000 15204 +housesfavorit 0 1 6.957497 0.000000 15205 +bookspirograph 0 1 6.957497 0.000000 15206 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..492d4b45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,188 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +peopl 1 96 2.302585 2.302585 132 +search 0 95 2.397895 0.000000 155 +present 0 91 2.397895 0.000000 145 +larg 0 82 2.484907 0.000000 168 +internet 0 83 2.484907 0.000000 186 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +issu 0 78 2.564949 0.000000 211 +dynam 0 76 2.564949 0.000000 194 +good 0 77 2.564949 0.000000 200 +come 0 78 2.564949 0.000000 202 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +sieg 0 69 2.708050 0.000000 260 +polici 0 64 2.772589 0.000000 279 +guid 0 63 2.772589 0.000000 267 +plan 0 65 2.772589 0.000000 272 +juli 0 60 2.833213 0.000000 305 +back 0 60 2.833213 0.000000 297 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +pointer 0 48 3.044522 0.000000 368 +appoint 0 49 3.044522 0.000000 358 +still 0 50 3.044522 0.000000 362 +without 0 50 3.044522 0.000000 370 +effect 0 46 3.091042 0.000000 385 +move 0 47 3.091042 0.000000 382 +could 0 46 3.091042 0.000000 383 +execut 0 45 3.135494 0.000000 404 +around 0 43 3.178054 0.000000 415 +show 0 43 3.178054 0.000000 417 +cach 0 41 3.218876 0.000000 432 +map 0 39 3.258097 0.000000 452 +realli 0 40 3.258097 0.000000 444 +live 0 40 3.258097 0.000000 451 +brian 0 38 3.295837 0.000000 466 +next 0 34 3.401197 0.000000 517 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +idea 0 32 3.465736 0.000000 545 +consid 0 29 3.583519 0.000000 590 +mind 0 27 3.637586 0.000000 632 +rather 0 26 3.688879 0.000000 642 +enjoi 0 26 3.688879 0.000000 660 +primari 0 25 3.737670 0.000000 669 +concern 0 25 3.737670 0.000000 666 +never 0 25 3.737670 0.000000 671 +alwai 0 24 3.761200 0.000000 691 +yahoo 0 24 3.761200 0.000000 707 +begin 0 23 3.806662 0.000000 716 +thread 0 23 3.806662 0.000000 722 +initi 0 23 3.806662 0.000000 717 +decis 0 23 3.806662 0.000000 728 +almost 0 22 3.850148 0.000000 742 +sort 0 22 3.850148 0.000000 738 +chen 0 21 3.912023 0.000000 791 +avoid 0 21 3.912023 0.000000 799 +among 0 21 3.912023 0.000000 781 +miss 0 19 4.007333 0.000000 866 +lyco 0 19 4.007333 0.000000 871 +definit 0 19 4.007333 0.000000 864 +bershad 0 18 4.060443 0.000000 902 +four 0 18 4.060443 0.000000 905 +element 0 18 4.060443 0.000000 895 +event 0 18 4.060443 0.000000 896 +asplo 0 17 4.110874 0.000000 948 +otherwis 0 17 4.110874 0.000000 922 +whole 0 17 4.110874 0.000000 940 +choic 0 16 4.174387 0.000000 979 +stream 0 15 4.248495 0.000000 1015 +draw 0 14 4.317488 0.000000 1086 +manner 0 14 4.317488 0.000000 1074 +alan 0 13 4.382027 0.000000 1146 +hotlist 0 13 4.382027 0.000000 1199 +brad 0 12 4.465908 0.000000 1264 +denni 0 11 4.553877 0.000000 1321 +baer 0 11 4.553877 0.000000 1353 +smart 0 11 4.553877 0.000000 1352 +isca 0 11 4.553877 0.000000 1354 +moment 0 11 4.553877 0.000000 1379 +magic 0 11 4.553877 0.000000 1358 +occur 0 9 4.753590 0.000000 1572 +osdi 0 9 4.753590 0.000000 1534 +yellow 0 9 4.753590 0.000000 1601 +russel 0 9 4.753590 0.000000 1507 +romer 0 8 4.875197 0.000000 1706 +qualifi 0 8 4.875197 0.000000 1721 +entri 0 8 4.875197 0.000000 1678 +gold 0 8 4.875197 0.000000 1745 +rais 0 8 4.875197 0.000000 1711 +baker 0 7 5.010635 0.000000 1812 +chanc 0 7 5.010635 0.000000 1960 +commit 0 6 5.164786 0.000000 2233 +conflict 0 6 5.164786 0.000000 2041 +dream 0 6 5.164786 0.000000 2165 +loup 0 6 5.164786 0.000000 2228 +presid 0 6 5.164786 0.000000 2196 +truth 0 6 5.164786 0.000000 2179 +whatev 0 6 5.164786 0.000000 2097 +fetch 0 5 5.347108 0.000000 2567 +million 0 5 5.347108 0.000000 2495 +lesson 0 5 5.347108 0.000000 2568 +ignor 0 5 5.347108 0.000000 2288 +favor 0 5 5.347108 0.000000 2414 +vote 0 4 5.568345 0.000000 2953 +ford 0 4 5.568345 0.000000 2636 +kill 0 4 5.568345 0.000000 3000 +countless 0 4 5.568345 0.000000 3020 +dlee 0 3 5.857933 0.000000 3949 +energi 0 3 5.857933 0.000000 3950 +specul 0 3 5.857933 0.000000 3951 +mappedcach 0 3 5.857933 0.000000 3928 +reorder 0 3 5.857933 0.000000 3952 +evil 0 3 5.857933 0.000000 3943 +act 0 3 5.857933 0.000000 3557 +researchwith 0 2 6.263398 0.000000 5594 +eustac 0 2 6.263398 0.000000 5866 +dirk 0 2 6.263398 0.000000 5665 +andt 0 2 6.263398 0.000000 5121 +resolutionon 0 2 6.263398 0.000000 5867 +nixon 0 2 6.263398 0.000000 5868 +court 0 2 6.263398 0.000000 4870 +silver 0 2 6.263398 0.000000 5374 +theblack 0 2 6.263398 0.000000 5869 +hesit 0 2 6.263398 0.000000 5774 +incid 0 2 6.263398 0.000000 5870 +goeth 0 2 6.263398 0.000000 5366 +calder 0 1 6.957497 0.000000 15207 +grunwald 0 1 6.957497 0.000000 15208 +huberthumphrei 0 1 6.957497 0.000000 15209 +likejean 0 1 6.957497 0.000000 15210 +enginefor 0 1 6.957497 0.000000 15211 +sensibl 0 1 6.957497 0.000000 15212 +conced 0 1 6.957497 0.000000 15213 +thatpolit 0 1 6.957497 0.000000 15214 +lesser 0 1 6.957497 0.000000 15215 +tweedledumand 0 1 6.957497 0.000000 15216 +tweedlede 0 1 6.957497 0.000000 15217 +abstain 0 1 6.957497 0.000000 15218 +theyar 0 1 6.957497 0.000000 15219 +torummag 0 1 6.957497 0.000000 15220 +allth 0 1 6.957497 0.000000 15221 +stew 0 1 6.957497 0.000000 15222 +humphrei 0 1 6.957497 0.000000 15223 +suprem 0 1 6.957497 0.000000 15224 +whentricia 0 1 6.957497 0.000000 15225 +flummeri 0 1 6.957497 0.000000 15226 +ineffect 0 1 6.957497 0.000000 15227 +splendid 0 1 6.957497 0.000000 15228 +unforeseen 0 1 6.957497 0.000000 15229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..7a3f31fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +databas 0 122 2.079442 0.000000 86 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +user 0 104 2.302585 0.000000 137 +homepag 0 93 2.397895 0.000000 148 +member 0 84 2.484907 0.000000 165 +novemb 0 81 2.484907 0.000000 179 +state 0 76 2.564949 0.000000 207 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +much 0 52 2.995732 0.000000 349 +date 0 51 2.995732 0.000000 344 +anoth 0 45 3.135494 0.000000 408 +math 0 44 3.135494 0.000000 402 +execut 0 45 3.135494 0.000000 404 +edit 0 42 3.218876 0.000000 418 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +actual 0 28 3.610918 0.000000 604 +usual 0 28 3.610918 0.000000 608 +intend 0 28 3.610918 0.000000 599 +todai 0 25 3.737670 0.000000 672 +background 0 25 3.737670 0.000000 664 +brows 0 23 3.806662 0.000000 726 +ofwashington 0 22 3.850148 0.000000 766 +sure 0 20 3.951244 0.000000 813 +grad 0 20 3.951244 0.000000 837 +anywai 0 15 4.248495 0.000000 1047 +club 0 15 4.248495 0.000000 1058 +player 0 11 4.553877 0.000000 1371 +michigan 0 11 4.553877 0.000000 1368 +strongli 0 10 4.653960 0.000000 1406 +death 0 10 4.653960 0.000000 1457 +doug 0 9 4.753590 0.000000 1517 +mention 0 9 4.753590 0.000000 1569 +unusu 0 9 4.753590 0.000000 1566 +vice 0 9 4.753590 0.000000 1604 +charg 0 9 4.753590 0.000000 1582 +screen 0 9 4.753590 0.000000 1577 +sister 0 9 4.753590 0.000000 1524 +ground 0 7 5.010635 0.000000 1955 +hit 0 7 5.010635 0.000000 1965 +lucki 0 6 5.164786 0.000000 2163 +presid 0 6 5.164786 0.000000 2196 +emerg 0 6 5.164786 0.000000 2038 +highwai 0 6 5.164786 0.000000 2095 +wast 0 5 5.347108 0.000000 2537 +unnecessari 0 5 5.347108 0.000000 2506 +keeper 0 5 5.347108 0.000000 2569 +bryan 0 5 5.347108 0.000000 2421 +worst 0 5 5.347108 0.000000 2287 +exhaust 0 4 5.568345 0.000000 2825 +engineeringdepart 0 4 5.568345 0.000000 2917 +backup 0 4 5.568345 0.000000 2645 +gear 0 4 5.568345 0.000000 2891 +bold 0 3 5.857933 0.000000 3846 +treasur 0 3 5.857933 0.000000 3229 +sit 0 3 5.857933 0.000000 3953 +tast 0 3 5.857933 0.000000 3666 +labor 0 3 5.857933 0.000000 3195 +sarcasm 0 2 6.263398 0.000000 5871 +casual 0 2 6.263398 0.000000 4542 +buti 0 2 6.263398 0.000000 4775 +meth 0 2 6.263398 0.000000 5872 +useless 0 2 6.263398 0.000000 5564 +caveat 0 2 6.263398 0.000000 4975 +apprentic 0 2 6.263398 0.000000 5873 +cart 0 2 6.263398 0.000000 5874 +stolen 0 2 6.263398 0.000000 5803 +zongker 0 1 6.957497 0.000000 15230 +pagececi 0 1 6.957497 0.000000 15231 +noless 0 1 6.957497 0.000000 15232 +classeshow 0 1 6.957497 0.000000 15233 +toxic 0 1 6.957497 0.000000 15234 +custard 0 1 6.957497 0.000000 15235 +filesth 0 1 6.957497 0.000000 15236 +mento 0 1 6.957497 0.000000 15237 +galleryvisit 0 1 6.957497 0.000000 15238 +supercolliderth 0 1 6.957497 0.000000 15239 +cron 0 1 6.957497 0.000000 15240 +avirtu 0 1 6.957497 0.000000 15241 +trove 0 1 6.957497 0.000000 15242 +whichmai 0 1 6.957497 0.000000 15243 +imajor 0 1 6.957497 0.000000 15244 +andminor 0 1 6.957497 0.000000 15245 +dubiou 0 1 6.957497 0.000000 15246 +honorsjunior 0 1 6.957497 0.000000 15247 +brotherhood 0 1 6.957497 0.000000 15248 +crouton 0 1 6.957497 0.000000 15249 +pizzicato 0 1 6.957497 0.000000 15250 +clicker 0 1 6.957497 0.000000 15251 +cruis 0 1 6.957497 0.000000 15252 +inhigh 0 1 6.957497 0.000000 15253 +buttstar 0 1 6.957497 0.000000 15254 +wheremi 0 1 6.957497 0.000000 15255 +dougz 0 1 6.957497 0.000000 15256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..00f1da6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +schedul 0 119 2.079442 0.000000 85 +seattl 0 120 2.079442 0.000000 103 +databas 0 122 2.079442 0.000000 86 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +activ 0 84 2.484907 0.000000 182 +help 0 83 2.484907 0.000000 175 +level 0 87 2.484907 0.000000 180 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +come 0 78 2.564949 0.000000 202 +addit 0 74 2.639057 0.000000 228 +java 0 70 2.708050 0.000000 248 +differ 0 66 2.708050 0.000000 253 +interact 0 62 2.772589 0.000000 270 +polici 0 64 2.772589 0.000000 279 +experi 0 64 2.772589 0.000000 283 +virtual 0 62 2.772589 0.000000 285 +improv 0 62 2.772589 0.000000 289 +thesi 0 57 2.890372 0.000000 327 +allow 0 53 2.944439 0.000000 333 +investig 0 51 2.995732 0.000000 353 +done 0 47 3.091042 0.000000 381 +quarter 0 47 3.091042 0.000000 389 +mechan 0 43 3.178054 0.000000 416 +seminar 0 38 3.295837 0.000000 470 +slide 0 38 3.295837 0.000000 467 +winter 0 36 3.367296 0.000000 500 +jame 0 35 3.401197 0.000000 507 +idea 0 32 3.465736 0.000000 545 +built 0 29 3.583519 0.000000 592 +demonstr 0 24 3.761200 0.000000 694 +thread 0 23 3.806662 0.000000 722 +lead 0 23 3.806662 0.000000 718 +kernel 0 20 3.951244 0.000000 825 +concentr 0 18 4.060443 0.000000 906 +spin 0 14 4.317488 0.000000 1121 +carri 0 13 4.382027 0.000000 1152 +signific 0 13 4.382027 0.000000 1125 +folk 0 9 4.753590 0.000000 1597 +voelker 0 9 4.753590 0.000000 1557 +dylan 0 8 4.875197 0.000000 1625 +slightli 0 7 5.010635 0.000000 1795 +dedic 0 7 5.010635 0.000000 1843 +gave 0 7 5.010635 0.000000 1922 +geoff 0 6 5.164786 0.000000 2124 +caus 0 5 5.347108 0.000000 2298 +commod 0 5 5.347108 0.000000 2415 +poorli 0 4 5.568345 0.000000 2781 +opal 0 4 5.568345 0.000000 3057 +oodb 0 3 5.857933 0.000000 3954 +mcname 0 2 6.263398 0.000000 5875 +properli 0 2 6.263398 0.000000 5454 +architecturethat 0 2 6.263398 0.000000 5876 +applicationsand 0 1 6.957497 0.000000 15257 +mechanismthat 0 1 6.957497 0.000000 15258 +replacementpolici 0 1 6.957497 0.000000 15259 +machoper 0 1 6.957497 0.000000 15260 +thathelp 0 1 6.957497 0.000000 15261 +kernelthread 0 1 6.957497 0.000000 15262 +tailor 0 1 6.957497 0.000000 15263 +betweenobject 0 1 6.957497 0.000000 15264 +achiv 0 1 6.957497 0.000000 15265 +improvementscan 0 1 6.957497 0.000000 15266 +betterserv 0 1 6.957497 0.000000 15267 +paperscv 0 1 6.957497 0.000000 15268 +lectureintroduc 0 1 6.957497 0.000000 15269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..0fa4d5f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +seattl 0 120 2.079442 0.000000 103 +make 0 111 2.197225 0.000000 120 +mondai 0 77 2.564949 0.000000 206 +sieg 0 69 2.708050 0.000000 260 +dept 0 64 2.772589 0.000000 291 +wednesdai 0 64 2.772589 0.000000 261 +septemb 0 65 2.772589 0.000000 274 +could 0 46 3.091042 0.000000 383 +tent 0 22 3.850148 0.000000 739 +thur 0 19 4.007333 0.000000 847 +hello 0 10 4.653960 0.000000 1407 +lewi 1 8 4.875197 4.875197 1700 +christoph 1 5 5.347108 5.347108 2512 +glad 0 4 5.568345 0.000000 2657 +echri 0 1 6.957497 0.000000 15270 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..13e02af1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +blank 1 3 5.857933 5.857933 3379 +ecrock 1 1 6.957497 6.957497 15271 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..f80c3cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +book 0 99 2.302585 0.000000 131 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +help 0 83 2.484907 0.000000 175 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +resum 0 79 2.564949 0.000000 217 +dynam 0 76 2.564949 0.000000 194 +come 0 78 2.564949 0.000000 202 +line 0 75 2.639057 0.000000 231 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +august 0 66 2.708050 0.000000 257 +plan 0 65 2.772589 0.000000 272 +creat 0 63 2.772589 0.000000 277 +visit 0 63 2.772589 0.000000 288 +import 0 65 2.772589 0.000000 282 +type 0 61 2.833213 0.000000 296 +room 0 59 2.833213 0.000000 301 +variou 0 56 2.890372 0.000000 317 +think 0 57 2.890372 0.000000 314 +found 0 53 2.944439 0.000000 337 +give 0 50 3.044522 0.000000 359 +friend 0 48 3.044522 0.000000 376 +anoth 0 45 3.135494 0.000000 408 +keep 0 44 3.135494 0.000000 409 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +littl 0 39 3.258097 0.000000 454 +form 0 39 3.258097 0.000000 443 +theoret 0 39 3.258097 0.000000 446 +industri 0 38 3.295837 0.000000 464 +hand 0 37 3.332205 0.000000 475 +power 0 30 3.555348 0.000000 573 +domain 0 30 3.555348 0.000000 564 +sometim 0 24 3.761200 0.000000 696 +alwai 0 24 3.761200 0.000000 691 +busi 0 21 3.912023 0.000000 784 +fact 0 21 3.912023 0.000000 780 +stand 0 18 4.060443 0.000000 891 +macintosh 0 17 4.110874 0.000000 920 +qual 0 15 4.248495 0.000000 1062 +countri 0 15 4.248495 0.000000 1059 +hong 0 14 4.317488 0.000000 1105 +karlin 0 13 4.382027 0.000000 1176 +anna 0 12 4.465908 0.000000 1292 +appl 0 11 4.553877 0.000000 1303 +guess 0 10 4.653960 0.000000 1443 +traffic 0 10 4.653960 0.000000 1421 +plain 0 9 4.753590 0.000000 1495 +sister 0 9 4.753590 0.000000 1524 +chamber 0 8 4.875197 0.000000 1692 +craig 0 7 5.010635 0.000000 1879 +happen 0 7 5.010635 0.000000 1790 +histor 0 6 5.164786 0.000000 2085 +mac 0 5 5.347108 0.000000 2292 +advic 0 5 5.347108 0.000000 2509 +insight 0 4 5.568345 0.000000 3024 +eddi 0 3 5.857933 0.000000 3896 +studentat 0 2 6.263398 0.000000 5877 +seig 0 2 6.263398 0.000000 4462 +commentari 0 2 6.263398 0.000000 4287 +bias 0 2 6.263398 0.000000 5033 +tosai 0 1 6.957497 0.000000 15272 +hadto 0 1 6.957497 0.000000 15273 +postcriptand 0 1 6.957497 0.000000 15274 +workingon 0 1 6.957497 0.000000 15275 +fordynam 0 1 6.957497 0.000000 15276 +activit 0 1 6.957497 0.000000 15277 +vine 0 1 6.957497 0.000000 15278 +branchesmi 0 1 6.957497 0.000000 15279 +knowof 0 1 6.957497 0.000000 15280 +daveneti 0 1 6.957497 0.000000 15281 +towardslik 0 1 6.957497 0.000000 15282 +eveneasi 0 1 6.957497 0.000000 15283 +freewai 0 1 6.957497 0.000000 15284 +worldher 0 1 6.957497 0.000000 15285 +edhong 0 1 6.957497 0.000000 15286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..108b59b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +architectur 0 139 1.945910 0.000000 77 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +compil 1 122 2.079442 2.079442 96 +schedul 0 119 2.079442 0.000000 85 +seattl 0 120 2.079442 0.000000 103 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +sieg 0 69 2.708050 0.000000 260 +degre 0 69 2.708050 0.000000 259 +new 0 64 2.772589 0.000000 262 +share 0 59 2.833213 0.000000 304 +back 0 60 2.833213 0.000000 297 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +http 0 41 3.218876 0.000000 420 +cach 0 41 3.218876 0.000000 432 +might 0 41 3.218876 0.000000 426 +committe 0 34 3.401197 0.000000 522 +computersci 0 30 3.555348 0.000000 562 +miscellan 0 23 3.806662 0.000000 731 +begin 0 23 3.806662 0.000000 716 +emphasi 0 22 3.850148 0.000000 755 +reduc 0 22 3.850148 0.000000 759 +voic 0 21 3.912023 0.000000 806 +asplo 0 17 4.110874 0.000000 948 +susan 0 15 4.248495 0.000000 1050 +qual 0 15 4.248495 0.000000 1062 +coher 0 14 4.317488 0.000000 1109 +workload 0 12 4.465908 0.000000 1210 +multithread 0 11 4.553877 0.000000 1315 +egger 0 8 4.875197 0.000000 1695 +prefetch 0 6 5.164786 0.000000 2039 +fals 0 4 5.568345 0.000000 2861 +amast 0 3 5.857933 0.000000 3955 +onexperiment 0 1 6.957497 0.000000 15287 +incompil 0 1 6.957497 0.000000 15288 +optimizationsand 0 1 6.957497 0.000000 15289 +multithreadedarchitectur 0 1 6.957497 0.000000 15290 +spinprevi 0 1 6.957497 0.000000 15291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..e456f6cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +activ 0 84 2.484907 0.000000 182 +educ 0 86 2.484907 0.000000 191 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +servic 0 72 2.639057 0.000000 236 +workshop 0 71 2.639057 0.000000 239 +write 0 72 2.639057 0.000000 222 +simul 0 66 2.708050 0.000000 255 +receiv 0 66 2.708050 0.000000 244 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +prof 0 64 2.772589 0.000000 273 +experi 0 64 2.772589 0.000000 283 +share 0 59 2.833213 0.000000 304 +march 0 61 2.833213 0.000000 295 +summer 0 56 2.890372 0.000000 311 +extens 0 53 2.944439 0.000000 340 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +describ 0 45 3.135494 0.000000 400 +mechan 0 43 3.178054 0.000000 416 +third 0 43 3.178054 0.000000 412 +http 0 41 3.218876 0.000000 420 +review 0 42 3.218876 0.000000 425 +brian 0 38 3.295837 0.000000 466 +prototyp 0 38 3.295837 0.000000 463 +slide 0 38 3.295837 0.000000 467 +mean 0 37 3.332205 0.000000 477 +fault 0 32 3.465736 0.000000 547 +independ 0 32 3.465736 0.000000 548 +posit 0 31 3.496508 0.000000 552 +domain 0 30 3.555348 0.000000 564 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +load 0 28 3.610918 0.000000 601 +measur 0 28 3.610918 0.000000 609 +symbol 0 27 3.637586 0.000000 620 +compar 0 26 3.688879 0.000000 648 +aspect 0 25 3.737670 0.000000 663 +toward 0 25 3.737670 0.000000 668 +spent 0 25 3.737670 0.000000 676 +thread 0 23 3.806662 0.000000 722 +displai 0 23 3.806662 0.000000 712 +miscellan 0 23 3.806662 0.000000 731 +reduc 0 22 3.850148 0.000000 759 +path 0 21 3.912023 0.000000 778 +kernel 0 20 3.951244 0.000000 825 +wrote 0 20 3.951244 0.000000 830 +fine 0 20 3.951244 0.000000 822 +safeti 0 20 3.951244 0.000000 817 +benchmark 0 19 4.007333 0.000000 859 +bershad 0 18 4.060443 0.000000 902 +protect 0 17 4.110874 0.000000 935 +coupl 0 17 4.110874 0.000000 939 +latenc 0 16 4.174387 0.000000 993 +princeton 0 15 4.248495 0.000000 1042 +novel 0 15 4.248495 0.000000 1039 +overhead 0 15 4.248495 0.000000 1035 +enough 0 15 4.248495 0.000000 1040 +spin 0 14 4.317488 0.000000 1121 +achiev 0 14 4.317488 0.000000 1088 +senior 0 14 4.317488 0.000000 1120 +opportun 0 13 4.382027 0.000000 1161 +safe 0 12 4.465908 0.000000 1274 +robust 0 12 4.465908 0.000000 1271 +arbitrari 0 11 4.553877 0.000000 1359 +grain 0 10 4.653960 0.000000 1448 +sosp 0 10 4.653960 0.000000 1416 +ski 0 10 4.653960 0.000000 1471 +jersei 0 9 4.753590 0.000000 1587 +modula 0 9 4.753590 0.000000 1613 +grew 0 8 4.875197 0.000000 1742 +mach 0 8 4.875197 0.000000 1669 +isol 0 8 4.875197 0.000000 1663 +spec 0 8 4.875197 0.000000 1640 +sigop 0 8 4.875197 0.000000 1727 +european 0 8 4.875197 0.000000 1763 +dylan 0 8 4.875197 0.000000 1625 +interrupt 0 7 5.010635 0.000000 1793 +bell 0 6 5.164786 0.000000 2224 +subsystem 0 6 5.164786 0.000000 2015 +simultan 0 6 5.164786 0.000000 2155 +spinproject 0 5 5.347108 0.000000 2570 +compat 0 5 5.347108 0.000000 2485 +sail 0 5 5.347108 0.000000 2571 +outdoor 0 5 5.347108 0.000000 2514 +microkernel 0 4 5.568345 0.000000 3047 +turkei 0 4 5.568345 0.000000 2914 +hide 0 4 5.568345 0.000000 2996 +mip 0 4 5.568345 0.000000 2738 +wcsss 0 3 5.857933 0.000000 3956 +thesumm 0 3 5.857933 0.000000 3763 +namespac 0 3 5.857933 0.000000 3957 +hoto 0 3 5.857933 0.000000 3577 +arizona 0 3 5.857933 0.000000 3700 +dive 0 3 5.857933 0.000000 3654 +emin 0 2 6.263398 0.000000 5810 +sirer 0 2 6.263398 0.000000 5811 +backgroundi 0 2 6.263398 0.000000 5878 +andsom 0 2 6.263398 0.000000 5483 +schedulingpolici 0 2 6.263398 0.000000 5879 +strand 0 2 6.263398 0.000000 5880 +ofnew 0 2 6.263398 0.000000 5881 +mipsi 0 2 6.263398 0.000000 5882 +tucson 0 2 6.263398 0.000000 5883 +cloth 0 2 6.263398 0.000000 5884 +sirereg 0 1 6.957497 0.000000 15292 +istanbul 0 1 6.957497 0.000000 15293 +labswork 0 1 6.957497 0.000000 15294 +theplan 0 1 6.957497 0.000000 15295 +thevesta 0 1 6.957497 0.000000 15296 +projectsmi 0 1 6.957497 0.000000 15297 +andprotect 0 1 6.957497 0.000000 15298 +specificaspect 0 1 6.957497 0.000000 15299 +alarm 0 1 6.957497 0.000000 15300 +ofextend 0 1 6.957497 0.000000 15301 +allowsu 0 1 6.957497 0.000000 15302 +isdesign 0 1 6.957497 0.000000 15303 +allowsisol 0 1 6.957497 0.000000 15304 +withconflict 0 1 6.957497 0.000000 15305 +beassur 0 1 6.957497 0.000000 15306 +clincher 0 1 6.957497 0.000000 15307 +extensionsthat 0 1 6.957497 0.000000 15308 +protectionenforc 0 1 6.957497 0.000000 15309 +performanceweb 0 1 6.957497 0.000000 15310 +networkingstack 0 1 6.957497 0.000000 15311 +andminim 0 1 6.957497 0.000000 15312 +calledmipsi 0 1 6.957497 0.000000 15313 +researchplatform 0 1 6.957497 0.000000 15314 +featuresand 0 1 6.957497 0.000000 15315 +talkslanguag 0 1 6.957497 0.000000 15316 +interestswhenev 0 1 6.957497 0.000000 15317 +windsurf 0 1 6.957497 0.000000 15318 +bikingmak 0 1 6.957497 0.000000 15319 +andhik 0 1 6.957497 0.000000 15320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..877a0e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +like 0 132 1.945910 0.000000 81 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +seattl 0 120 2.079442 0.000000 103 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +section 0 94 2.397895 0.000000 149 +solut 0 82 2.484907 0.000000 162 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +involv 0 71 2.639057 0.000000 227 +materi 0 75 2.639057 0.000000 221 +sieg 0 69 2.708050 0.000000 260 +simul 0 66 2.708050 0.000000 255 +prof 0 64 2.772589 0.000000 273 +street 0 63 2.772589 0.000000 293 +automat 0 61 2.833213 0.000000 306 +thesi 0 57 2.890372 0.000000 327 +particular 0 51 2.995732 0.000000 352 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +could 0 46 3.091042 0.000000 383 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +probabl 0 40 3.258097 0.000000 455 +realli 0 40 3.258097 0.000000 444 +small 0 39 3.258097 0.000000 447 +mean 0 37 3.332205 0.000000 477 +idea 0 32 3.465736 0.000000 545 +richard 0 31 3.496508 0.000000 559 +power 0 30 3.555348 0.000000 573 +weather 0 28 3.610918 0.000000 618 +mind 0 27 3.637586 0.000000 632 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +try 0 22 3.850148 0.000000 764 +finish 0 22 3.850148 0.000000 748 +fact 0 21 3.912023 0.000000 780 +longer 0 20 3.951244 0.000000 816 +applet 0 20 3.951244 0.000000 827 +eric 0 19 4.007333 0.000000 870 +els 0 19 4.007333 0.000000 843 +anderson 0 19 4.007333 0.000000 860 +mostli 0 19 4.007333 0.000000 869 +prove 0 19 4.007333 0.000000 848 +matrix 0 17 4.110874 0.000000 933 +sept 0 17 4.110874 0.000000 952 +fourth 0 16 4.174387 0.000000 999 +spatial 0 16 4.174387 0.000000 988 +spars 0 16 4.174387 0.000000 989 +qual 0 15 4.248495 0.000000 1062 +nonlinear 0 14 4.317488 0.000000 1107 +step 0 13 4.382027 0.000000 1138 +care 0 13 4.382027 0.000000 1177 +resolut 0 13 4.382027 0.000000 1172 +necessari 0 13 4.382027 0.000000 1147 +bodi 0 13 4.382027 0.000000 1178 +karlin 0 13 4.382027 0.000000 1176 +brother 0 13 4.382027 0.000000 1189 +speech 0 12 4.465908 0.000000 1222 +anna 0 12 4.465908 0.000000 1292 +black 0 10 4.653960 0.000000 1418 +henri 0 10 4.653960 0.000000 1417 +explicit 0 9 4.753590 0.000000 1525 +signal 0 7 5.010635 0.000000 1910 +newton 0 7 5.010635 0.000000 1824 +commit 0 6 5.164786 0.000000 2233 +duke 0 6 5.164786 0.000000 2231 +restrict 0 6 5.164786 0.000000 2129 +transcript 0 6 5.164786 0.000000 2067 +everybodi 0 5 5.347108 0.000000 2517 +stabil 0 5 5.347108 0.000000 2286 +aim 0 5 5.347108 0.000000 2477 +isth 0 5 5.347108 0.000000 2532 +snapshot 0 5 5.347108 0.000000 2303 +implicit 0 4 5.568345 0.000000 2830 +backward 0 4 5.568345 0.000000 2638 +freedom 0 3 5.857933 0.000000 3890 +euler 0 3 5.857933 0.000000 3174 +interplai 0 3 5.857933 0.000000 3726 +astrophys 0 3 5.857933 0.000000 3936 +aclu 0 2 6.263398 0.000000 5227 +reno 0 2 6.263398 0.000000 5228 +mere 0 2 6.263398 0.000000 5340 +panic 0 2 6.263398 0.000000 5682 +criterion 0 2 6.263398 0.000000 5885 +acoust 0 2 6.263398 0.000000 5355 +musician 0 2 6.263398 0.000000 5718 +preparedfor 0 2 6.263398 0.000000 5886 +meander 0 2 6.263398 0.000000 5887 +andersonwher 0 1 6.957497 0.000000 15321 +decisionin 0 1 6.957497 0.000000 15322 +thedecis 0 1 6.957497 0.000000 15323 +interim 0 1 6.957497 0.000000 15324 +feloni 0 1 6.957497 0.000000 15325 +themarketplac 0 1 6.957497 0.000000 15326 +imostli 0 1 6.957497 0.000000 15327 +greensideof 0 1 6.957497 0.000000 15328 +onsteadi 0 1 6.957497 0.000000 15329 +biharmon 0 1 6.957497 0.000000 15330 +timesteppingmethod 0 1 6.957497 0.000000 15331 +analysisissu 0 1 6.957497 0.000000 15332 +nonlinearequ 0 1 6.957497 0.000000 15333 +newtonstep 0 1 6.957497 0.000000 15334 +spiffi 0 1 6.957497 0.000000 15335 +structuresbi 0 1 6.957497 0.000000 15336 +andersoni 0 1 6.957497 0.000000 15337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..c8d51d28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +machin 0 129 2.079442 0.000000 95 +databas 0 122 2.079442 0.000000 86 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +world 0 115 2.197225 0.000000 126 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +learn 1 86 2.484907 2.484907 170 +internet 0 83 2.484907 0.000000 186 +contain 0 81 2.484907 0.000000 174 +second 0 81 2.484907 0.000000 166 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +resourc 0 81 2.484907 0.000000 172 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +servic 0 72 2.639057 0.000000 236 +effici 0 73 2.639057 0.000000 233 +intellig 0 72 2.639057 0.000000 225 +html 0 75 2.639057 0.000000 235 +addit 0 74 2.639057 0.000000 228 +knowledg 0 67 2.708050 0.000000 243 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +plan 0 65 2.772589 0.000000 272 +juli 0 60 2.833213 0.000000 305 +room 0 59 2.833213 0.000000 301 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +unix 0 58 2.890372 0.000000 308 +undergradu 0 54 2.944439 0.000000 338 +without 0 50 3.044522 0.000000 370 +understand 0 47 3.091042 0.000000 384 +move 0 47 3.091042 0.000000 382 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +field 0 37 3.332205 0.000000 482 +multi 0 36 3.367296 0.000000 493 +robot 0 36 3.367296 0.000000 497 +statist 0 35 3.401197 0.000000 521 +human 0 32 3.465736 0.000000 546 +photo 0 31 3.496508 0.000000 561 +robert 0 30 3.555348 0.000000 567 +neural 0 30 3.555348 0.000000 578 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +relev 0 26 3.688879 0.000000 637 +request 0 26 3.688879 0.000000 635 +magazin 0 24 3.761200 0.000000 704 +greg 0 24 3.761200 0.000000 695 +ofwashington 0 22 3.850148 0.000000 766 +william 0 22 3.850148 0.000000 765 +voic 0 21 3.912023 0.000000 806 +fact 0 21 3.912023 0.000000 780 +expert 0 20 3.951244 0.000000 833 +comparison 0 19 4.007333 0.000000 863 +agent 0 18 4.060443 0.000000 910 +repositori 0 17 4.110874 0.000000 932 +white 0 17 4.110874 0.000000 951 +analyz 0 17 4.110874 0.000000 925 +brief 0 16 4.174387 0.000000 1001 +choic 0 16 4.174387 0.000000 979 +indic 0 15 4.248495 0.000000 1013 +jonathan 0 13 4.382027 0.000000 1174 +food 0 12 4.465908 0.000000 1285 +bruce 0 12 4.465908 0.000000 1226 +count 0 12 4.465908 0.000000 1239 +stephen 0 11 4.553877 0.000000 1342 +induct 0 11 4.553877 0.000000 1304 +metacrawl 0 10 4.653960 0.000000 1455 +packard 0 10 4.653960 0.000000 1444 +shop 0 10 4.653960 0.000000 1469 +sound 0 9 4.753590 0.000000 1605 +incomplet 0 9 4.753590 0.000000 1575 +aaai 0 8 4.875197 0.000000 1750 +hewlett 0 8 4.875197 0.000000 1709 +irvin 0 8 4.875197 0.000000 1660 +accomplish 0 8 4.875197 0.000000 1755 +satisfi 0 8 4.875197 0.000000 1694 +gather 0 8 4.875197 0.000000 1719 +autonom 0 8 4.875197 0.000000 1749 +erik 0 8 4.875197 0.000000 1701 +softbot 1 7 5.010635 5.010635 1974 +discoveri 0 7 5.010635 0.000000 1915 +sparc 0 7 5.010635 0.000000 1860 +foc 0 7 5.010635 0.000000 1880 +planner 0 7 5.010635 0.000000 1797 +golden 0 7 5.010635 0.000000 1962 +hunt 0 7 5.010635 0.000000 1798 +illinoi 0 7 5.010635 0.000000 1941 +usenet 0 7 5.010635 0.000000 1839 +etzioni 0 6 5.164786 0.000000 2135 +oren 0 6 5.164786 0.000000 2134 +advis 0 6 5.164786 0.000000 2173 +fiction 0 6 5.164786 0.000000 2217 +forecast 0 6 5.164786 0.000000 2171 +brook 0 6 5.164786 0.000000 2152 +german 0 6 5.164786 0.000000 2190 +accuraci 0 5 5.347108 0.000000 2450 +cacm 0 5 5.347108 0.000000 2388 +keith 0 5 5.347108 0.000000 2528 +selberg 0 5 5.347108 0.000000 2441 +amherst 0 5 5.347108 0.000000 2484 +disambigu 0 4 5.568345 0.000000 2899 +innov 0 4 5.568345 0.000000 2933 +chain 0 4 5.568345 0.000000 2712 +repli 0 4 5.568345 0.000000 2689 +ijcai 0 4 5.568345 0.000000 2901 +sophist 0 3 5.857933 0.000000 3545 +ahoi 0 3 5.857933 0.000000 3532 +deploi 0 3 5.857933 0.000000 3750 +neal 0 3 5.857933 0.000000 3184 +lockhe 0 3 5.857933 0.000000 3863 +faq 0 3 5.857933 0.000000 3216 +pageoren 0 2 6.263398 0.000000 5888 +pagedepart 0 2 6.263398 0.000000 5052 +anddynam 0 2 6.263398 0.000000 5889 +finalist 0 2 6.263398 0.000000 5890 +discoveraward 0 2 6.263398 0.000000 5891 +brute 0 2 6.263398 0.000000 5892 +hypothes 0 2 6.263398 0.000000 5607 +ascal 0 2 6.263398 0.000000 5893 +toappear 0 2 6.263398 0.000000 4343 +bernard 0 2 6.263398 0.000000 5894 +lesh 0 2 6.263398 0.000000 5895 +goan 0 2 6.263398 0.000000 5896 +zamir 0 2 6.263398 0.000000 5897 +shake 0 2 6.263398 0.000000 5898 +umass 0 2 6.263398 0.000000 5899 +bioand 0 1 6.957497 0.000000 15338 +heor 0 1 6.957497 0.000000 15339 +searchmultipl 0 1 6.957497 0.000000 15340 +pruningopt 0 1 6.957497 0.000000 15341 +netrecommend 0 1 6.957497 0.000000 15342 +locatesindividu 0 1 6.957497 0.000000 15343 +bruteforc 0 1 6.957497 0.000000 15344 +whenrun 0 1 6.957497 0.000000 15345 +theweb 0 1 6.957497 0.000000 15346 +richardseg 0 1 6.957497 0.000000 15347 +fileretriev 0 1 6.957497 0.000000 15348 +universalquantif 0 1 6.957497 0.000000 15349 +terranc 0 1 6.957497 0.000000 15350 +mikeperkowitz 0 1 6.957497 0.000000 15351 +soderland 0 1 6.957497 0.000000 15352 +roomi 0 1 6.957497 0.000000 15353 +lesourd 0 1 6.957497 0.000000 15354 +spiger 0 1 6.957497 0.000000 15355 +alford 0 1 6.957497 0.000000 15356 +fitchenholtz 0 1 6.957497 0.000000 15357 +guido 0 1 6.957497 0.000000 15358 +dymitr 0 1 6.957497 0.000000 15359 +mozdyniewicz 0 1 6.957497 0.000000 15360 +quark 0 1 6.957497 0.000000 15361 +minecontain 0 1 6.957497 0.000000 15362 +neuroprosearch 0 1 6.957497 0.000000 15363 +statlib 0 1 6.957497 0.000000 15364 +learningtoolbox 0 1 6.957497 0.000000 15365 +bonn 0 1 6.957497 0.000000 15366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..e4bd1e4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +juli 0 60 2.833213 0.000000 305 +thesi 0 57 2.890372 0.000000 327 +faculti 0 56 2.890372 0.000000 325 +done 0 47 3.091042 0.000000 381 +join 0 39 3.258097 0.000000 457 +soon 0 36 3.367296 0.000000 494 +global 0 34 3.401197 0.000000 520 +concern 0 25 3.737670 0.000000 666 +mike 0 24 3.761200 0.000000 703 +finish 0 22 3.850148 0.000000 748 +feelei 1 7 5.010635 5.010635 1859 +british 0 5 5.347108 0.000000 2546 +columbia 0 2 6.263398 0.000000 5900 +papersmi 0 2 6.263398 0.000000 5462 +pagemik 0 1 6.957497 0.000000 15367 +workstationclust 0 1 6.957497 0.000000 15368 +opalproject 0 1 6.957497 0.000000 15369 +injanuari 0 1 6.957497 0.000000 15370 +summarycvsoutheast 0 1 6.957497 0.000000 15371 +idaholast 0 1 6.957497 0.000000 15372 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..d43d1d3c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +seattl 0 120 2.079442 0.000000 103 +pictur 0 89 2.397895 0.000000 160 +sieg 0 69 2.708050 0.000000 260 +room 0 59 2.833213 0.000000 301 +return 0 34 3.401197 0.000000 502 +voic 0 21 3.912023 0.000000 806 +chri 0 11 4.553877 0.000000 1311 +fisher 1 2 6.263398 6.263398 4794 +fisherdepart 0 1 6.957497 0.000000 15373 +engineeringbox 0 1 6.957497 0.000000 15374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..ae56b923 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +place 0 106 2.197225 0.000000 124 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +exam 0 86 2.484907 0.000000 169 +librari 0 87 2.484907 0.000000 181 +thing 0 84 2.484907 0.000000 189 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +found 0 53 2.944439 0.000000 337 +quarter 0 47 3.091042 0.000000 389 +around 0 43 3.178054 0.000000 415 +might 0 41 3.218876 0.000000 426 +music 0 42 3.218876 0.000000 436 +paul 0 38 3.295837 0.000000 471 +autumn 0 31 3.496508 0.000000 558 +mind 0 27 3.637586 0.000000 632 +rather 0 26 3.688879 0.000000 642 +geometri 0 22 3.850148 0.000000 752 +fund 0 21 3.912023 0.000000 805 +nice 0 20 3.951244 0.000000 809 +listen 0 18 4.060443 0.000000 907 +engineeringunivers 0 17 4.110874 0.000000 959 +otherwis 0 17 4.110874 0.000000 922 +chateau 0 16 4.174387 0.000000 997 +drive 0 15 4.248495 0.000000 1052 +galleri 0 13 4.382027 0.000000 1192 +captur 0 12 4.465908 0.000000 1232 +denni 0 11 4.553877 0.000000 1321 +moment 0 11 4.553877 0.000000 1379 +vista 0 10 4.653960 0.000000 1452 +seven 0 9 4.753590 0.000000 1561 +somewher 0 6 5.164786 0.000000 2176 +wolman 0 6 5.164786 0.000000 2093 +alec 0 5 5.347108 0.000000 2563 +lost 0 5 5.347108 0.000000 2358 +ta 0 4 5.568345 0.000000 3058 +soul 0 4 5.568345 0.000000 2907 +luci 0 3 5.857933 0.000000 3705 +schedulethi 0 2 6.263398 0.000000 4068 +meander 0 2 6.263398 0.000000 5887 +booth 0 2 6.263398 0.000000 5119 +scheduleto 0 1 6.957497 0.000000 15375 +probablyb 0 1 6.957497 0.000000 15376 +activitiesmi 0 1 6.957497 0.000000 15377 +areasof 0 1 6.957497 0.000000 15378 +outta 0 1 6.957497 0.000000 15379 +pea 0 1 6.957497 0.000000 15380 +mofo 0 1 6.957497 0.000000 15381 +peach 0 1 6.957497 0.000000 15382 +ruel 0 1 6.957497 0.000000 15383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..f0561b8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +public 0 202 1.609438 0.000000 43 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +relat 0 139 1.945910 0.000000 68 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +welcom 0 122 2.079442 0.000000 99 +machin 0 129 2.079442 0.000000 95 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +user 0 104 2.302585 0.000000 137 +real 0 93 2.397895 0.000000 144 +activ 0 84 2.484907 0.000000 182 +educ 0 86 2.484907 0.000000 191 +interfac 0 79 2.564949 0.000000 209 +onlin 0 75 2.639057 0.000000 223 +summari 0 73 2.639057 0.000000 237 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +august 0 66 2.708050 0.000000 257 +interact 0 62 2.772589 0.000000 270 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +profession 0 51 2.995732 0.000000 345 +basic 0 50 3.044522 0.000000 360 +formal 0 37 3.332205 0.000000 478 +concurr 0 34 3.401197 0.000000 501 +human 0 32 3.465736 0.000000 546 +collabor 0 32 3.465736 0.000000 543 +chair 0 29 3.583519 0.000000 596 +demonstr 0 24 3.761200 0.000000 694 +verif 0 20 3.951244 0.000000 826 +safeti 0 20 3.951244 0.000000 817 +engineeringunivers 0 17 4.110874 0.000000 959 +anyth 0 16 4.174387 0.000000 998 +mellon 0 13 4.382027 0.000000 1179 +washingtonbox 0 13 4.382027 0.000000 1200 +opportun 0 13 4.382027 0.000000 1161 +carnegi 0 12 4.465908 0.000000 1260 +ski 0 10 4.653960 0.000000 1471 +cook 0 10 4.653960 0.000000 1464 +cultur 0 7 5.010635 0.000000 1951 +spanish 0 4 5.568345 0.000000 3017 +ofmi 0 3 5.857933 0.000000 3911 +uist 0 2 6.263398 0.000000 5901 +vegetarian 0 2 6.263398 0.000000 5902 +greet 0 2 6.263398 0.000000 5903 +modugno 0 1 6.957497 0.000000 15384 +francesmari 0 1 6.957497 0.000000 15385 +pagefrancesmari 0 1 6.957497 0.000000 15386 +algorthim 0 1 6.957497 0.000000 15387 +includecycl 0 1 6.957497 0.000000 15388 +previouslyitalian 0 1 6.957497 0.000000 15389 +elleri 0 1 6.957497 0.000000 15390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..e006e70a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +compil 0 122 2.079442 0.000000 96 +pictur 0 89 2.397895 0.000000 160 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +optim 0 79 2.564949 0.000000 197 +free 0 73 2.639057 0.000000 224 +receiv 0 66 2.708050 0.000000 244 +written 0 63 2.772589 0.000000 278 +dept 0 64 2.772589 0.000000 291 +finger 0 52 2.995732 0.000000 354 +netscap 0 44 3.135494 0.000000 395 +live 0 40 3.258097 0.000000 451 +word 0 34 3.401197 0.000000 508 +someth 0 31 3.496508 0.000000 554 +anim 0 31 3.496508 0.000000 557 +weather 0 28 3.610918 0.000000 618 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +pattern 0 24 3.761200 0.000000 689 +mobil 0 23 3.806662 0.000000 730 +georg 0 16 4.174387 0.000000 994 +match 0 16 4.174387 0.000000 965 +script 0 13 4.382027 0.000000 1171 +song 0 11 4.553877 0.000000 1380 +debugg 0 9 4.753590 0.000000 1493 +pagei 0 8 4.875197 0.000000 1683 +handi 0 6 5.164786 0.000000 2111 +puzzl 0 5 5.347108 0.000000 2507 +water 0 5 5.347108 0.000000 2535 +forman 1 2 6.263398 6.263398 5904 +hyperlink 0 2 6.263398 0.000000 5447 +pagegeorg 0 1 6.957497 0.000000 15391 +ariadn 0 1 6.957497 0.000000 15392 +gforman 0 1 6.957497 0.000000 15393 +comhom 0 1 6.957497 0.000000 15394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..5ad2ff12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +file 0 132 1.945910 0.000000 70 +tool 0 117 2.079442 0.000000 93 +sinc 0 90 2.397895 0.000000 159 +chang 0 82 2.484907 0.000000 163 +refer 0 78 2.564949 0.000000 203 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +life 0 50 3.044522 0.000000 375 +visitor 0 49 3.044522 0.000000 371 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +everi 0 34 3.401197 0.000000 519 +collabor 0 32 3.465736 0.000000 543 +quot 0 29 3.583519 0.000000 582 +bookmark 0 26 3.688879 0.000000 639 +applet 0 20 3.951244 0.000000 827 +agent 0 18 4.060443 0.000000 910 +english 0 15 4.248495 0.000000 1033 +trip 0 14 4.317488 0.000000 1113 +bike 0 10 4.653960 0.000000 1468 +poetri 0 9 4.753590 0.000000 1596 +weld 0 9 4.753590 0.000000 1538 +marc 0 8 4.875197 0.000000 1680 +dictionari 0 8 4.875197 0.000000 1642 +gather 0 8 4.875197 0.000000 1719 +friedman 1 7 5.010635 5.010635 1886 +golden 0 7 5.010635 0.000000 1962 +planner 0 7 5.010635 0.000000 1797 +keith 0 5 5.347108 0.000000 2528 +camp 0 5 5.347108 0.000000 2545 +elsewher 0 5 5.347108 0.000000 2444 +spanish 0 4 5.568345 0.000000 3017 +codi 0 3 5.857933 0.000000 3940 +kwok 0 3 5.857933 0.000000 3941 +ucpop 0 3 5.857933 0.000000 3878 +watercolor 0 1 6.957497 0.000000 15395 +checklist 0 1 6.957497 0.000000 15396 +occam 0 1 6.957497 0.000000 15397 +wordbot 0 1 6.957497 0.000000 15398 +nietzschein 0 1 6.957497 0.000000 15399 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..ad63b890 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +file 0 132 1.945910 0.000000 70 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +graphic 0 90 2.397895 0.000000 147 +member 0 84 2.484907 0.000000 165 +line 0 75 2.639057 0.000000 231 +plai 0 60 2.833213 0.000000 307 +game 0 36 3.367296 0.000000 498 +neural 0 30 3.555348 0.000000 578 +ofwashington 0 22 3.850148 0.000000 766 +audio 0 14 4.317488 0.000000 1094 +genet 0 10 4.653960 0.000000 1409 +cecil 0 9 4.753590 0.000000 1547 +garrett 1 3 5.857933 5.857933 3377 +charli 0 2 6.263398 0.000000 5905 +bookshelf 0 2 6.263398 0.000000 5724 +algorithmspap 0 1 6.957497 0.000000 15400 +algorithmsformerli 0 1 6.957497 0.000000 15401 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..84482616 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +seattl 0 120 2.079442 0.000000 103 +techniqu 0 99 2.302585 0.000000 138 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +follow 0 92 2.397895 0.000000 143 +david 0 71 2.639057 0.000000 232 +appli 0 71 2.639057 0.000000 226 +multimedia 0 68 2.708050 0.000000 258 +laboratori 0 63 2.772589 0.000000 292 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +prototyp 0 38 3.295837 0.000000 463 +deal 0 22 3.850148 0.000000 736 +supervis 0 20 3.951244 0.000000 840 +dimension 0 18 4.060443 0.000000 909 +render 0 17 4.110874 0.000000 947 +engineeringunivers 0 17 4.110874 0.000000 959 +georg 0 16 4.174387 0.000000 994 +eduphon 0 15 4.248495 0.000000 1060 +washingtonbox 0 13 4.382027 0.000000 1200 +galleri 0 13 4.382027 0.000000 1192 +wife 0 13 4.382027 0.000000 1196 +tradit 0 10 4.653960 0.000000 1404 +illustr 0 8 4.875197 0.000000 1679 +salesin 0 4 5.568345 0.000000 3051 +grail 0 3 5.857933 0.000000 3356 +winkenbach 0 1 6.957497 0.000000 15402 +winkenbachdepart 0 1 6.957497 0.000000 15403 +georgew 0 1 6.957497 0.000000 15404 +doneund 0 1 6.957497 0.000000 15405 +theautomat 0 1 6.957497 0.000000 15406 +imagescr 0 1 6.957497 0.000000 15407 +taweewan 0 1 6.957497 0.000000 15408 +siwadun 0 1 6.957497 0.000000 15409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..bb97b67e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,212 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +file 0 132 1.945910 0.000000 70 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +commun 0 95 2.397895 0.000000 157 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +contain 0 81 2.484907 0.000000 174 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +complet 0 77 2.564949 0.000000 208 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +nation 0 74 2.639057 0.000000 240 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +view 0 70 2.708050 0.000000 254 +foundat 0 62 2.772589 0.000000 286 +creat 0 63 2.772589 0.000000 277 +virtual 0 62 2.772589 0.000000 285 +back 0 60 2.833213 0.000000 297 +locat 0 59 2.833213 0.000000 303 +best 0 59 2.833213 0.000000 299 +simpl 0 60 2.833213 0.000000 298 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +variou 0 56 2.890372 0.000000 317 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +sampl 0 53 2.944439 0.000000 339 +date 0 51 2.995732 0.000000 344 +archiv 0 49 3.044522 0.000000 364 +numer 0 49 3.044522 0.000000 369 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +keep 0 44 3.135494 0.000000 409 +show 0 43 3.178054 0.000000 417 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +small 0 39 3.258097 0.000000 447 +author 0 39 3.258097 0.000000 450 +origin 0 38 3.295837 0.000000 472 +seminar 0 38 3.295837 0.000000 470 +microsoft 0 38 3.295837 0.000000 468 +feel 0 37 3.332205 0.000000 483 +connect 0 37 3.332205 0.000000 485 +especi 0 36 3.367296 0.000000 496 +winter 0 36 3.367296 0.000000 500 +game 0 36 3.367296 0.000000 498 +procedur 0 36 3.367296 0.000000 488 +random 0 34 3.401197 0.000000 511 +global 0 34 3.401197 0.000000 520 +articl 0 33 3.433987 0.000000 530 +express 0 32 3.465736 0.000000 540 +scientist 0 31 3.496508 0.000000 560 +computersci 0 30 3.555348 0.000000 562 +full 0 28 3.610918 0.000000 615 +progress 0 28 3.610918 0.000000 598 +univ 0 28 3.610918 0.000000 617 +linux 0 27 3.637586 0.000000 631 +pagecs 0 26 3.688879 0.000000 658 +greg 0 24 3.761200 0.000000 695 +alwai 0 24 3.761200 0.000000 691 +magazin 0 24 3.761200 0.000000 704 +yahoo 0 24 3.761200 0.000000 707 +daili 0 24 3.761200 0.000000 706 +ofwashington 0 22 3.850148 0.000000 766 +recommend 0 22 3.850148 0.000000 737 +busi 0 21 3.912023 0.000000 784 +corpor 0 21 3.912023 0.000000 802 +navig 0 21 3.912023 0.000000 796 +tenni 0 20 3.951244 0.000000 838 +feedback 0 19 4.007333 0.000000 854 +lyco 0 19 4.007333 0.000000 871 +hobbi 0 16 4.174387 0.000000 1009 +devic 0 16 4.174387 0.000000 1002 +upon 0 16 4.174387 0.000000 978 +configur 0 15 4.248495 0.000000 1012 +reflect 0 15 4.248495 0.000000 1034 +incomput 0 14 4.317488 0.000000 1096 +senior 0 14 4.317488 0.000000 1120 +emac 0 13 4.382027 0.000000 1143 +philosophi 0 13 4.382027 0.000000 1167 +misc 0 13 4.382027 0.000000 1124 +newspap 0 12 4.465908 0.000000 1280 +emploi 0 12 4.465908 0.000000 1284 +basketbal 0 12 4.465908 0.000000 1289 +magic 0 11 4.553877 0.000000 1358 +perl 0 11 4.553877 0.000000 1332 +hello 0 10 4.653960 0.000000 1407 +desktop 0 10 4.653960 0.000000 1445 +ski 0 10 4.653960 0.000000 1471 +fellowship 0 10 4.653960 0.000000 1460 +volleybal 0 9 4.753590 0.000000 1598 +inter 0 9 4.753590 0.000000 1530 +competit 0 8 4.875197 0.000000 1635 +readm 0 8 4.875197 0.000000 1699 +joel 0 8 4.875197 0.000000 1698 +entri 0 8 4.875197 0.000000 1678 +extract 0 8 4.875197 0.000000 1728 +opinion 0 8 4.875197 0.000000 1708 +chronicl 0 7 5.010635 0.000000 1952 +gatewai 0 7 5.010635 0.000000 1942 +necessarili 0 7 5.010635 0.000000 1899 +duke 0 6 5.164786 0.000000 2231 +piano 0 6 5.164786 0.000000 2201 +histor 0 6 5.164786 0.000000 2085 +vertic 0 5 5.347108 0.000000 2270 +hole 0 5 5.347108 0.000000 2518 +billi 0 5 5.347108 0.000000 2404 +doubl 0 4 5.568345 0.000000 2951 +patch 0 4 5.568345 0.000000 2710 +drew 0 4 5.568345 0.000000 2980 +cube 0 4 5.568345 0.000000 2940 +jackson 0 3 5.857933 0.000000 3586 +eduaddress 0 3 5.857933 0.000000 3762 +rsum 0 3 5.857933 0.000000 3939 +freewar 0 3 5.857933 0.000000 3504 +hotjava 0 3 5.857933 0.000000 3220 +seinfeld 0 3 5.857933 0.000000 3958 +conclus 0 3 5.857933 0.000000 3367 +pagegreg 0 2 6.263398 0.000000 5906 +amcurr 0 2 6.263398 0.000000 5798 +bermuda 0 2 6.263398 0.000000 5907 +seminarcs 0 2 6.263398 0.000000 4521 +geneticalgorithm 0 2 6.263398 0.000000 5673 +ncaa 0 2 6.263398 0.000000 5908 +unoffici 0 2 6.263398 0.000000 5909 +unif 0 2 6.263398 0.000000 5910 +badro 0 1 6.957497 0.000000 15410 +zshell 0 1 6.957497 0.000000 15411 +nesbit 0 1 6.957497 0.000000 15412 +isuppos 0 1 6.957497 0.000000 15413 +excitingfeatur 0 1 6.957497 0.000000 15414 +dukeunivers 0 1 6.957497 0.000000 15415 +fortransworld 0 1 6.957497 0.000000 15416 +indurham 0 1 6.957497 0.000000 15417 +headquart 0 1 6.957497 0.000000 15418 +myapart 0 1 6.957497 0.000000 15419 +newer 0 1 6.957497 0.000000 15420 +fvwm 0 1 6.957497 0.000000 15421 +redhat 0 1 6.957497 0.000000 15422 +transworldnumer 0 1 6.957497 0.000000 15423 +ieeenat 0 1 6.957497 0.000000 15424 +victori 0 1 6.957497 0.000000 15425 +bycomput 0 1 6.957497 0.000000 15426 +canterburi 0 1 6.957497 0.000000 15427 +definitelynot 0 1 6.957497 0.000000 15428 +juggl 0 1 6.957497 0.000000 15429 +rubik 0 1 6.957497 0.000000 15430 +sarahmclachlan 0 1 6.957497 0.000000 15431 +parliamentari 0 1 6.957497 0.000000 15432 +sgml 0 1 6.957497 0.000000 15433 +sitcom 0 1 6.957497 0.000000 15434 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..6ab41c36 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,295 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +model 0 145 1.945910 0.000000 69 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +intern 0 108 2.197225 0.000000 128 +book 0 99 2.302585 0.000000 131 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +imag 0 91 2.397895 0.000000 161 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +associ 0 93 2.397895 0.000000 151 +graphic 0 90 2.397895 0.000000 147 +call 0 91 2.397895 0.000000 153 +learn 0 86 2.484907 0.000000 170 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +second 0 81 2.484907 0.000000 166 +stuff 0 87 2.484907 0.000000 171 +info 0 85 2.484907 0.000000 176 +complet 0 77 2.564949 0.000000 208 +decemb 0 80 2.564949 0.000000 215 +resum 0 79 2.564949 0.000000 217 +sourc 0 77 2.564949 0.000000 201 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +appli 0 71 2.639057 0.000000 226 +java 1 70 2.708050 2.708050 248 +goal 0 66 2.708050 0.000000 250 +artifici 0 63 2.772589 0.000000 280 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +evalu 0 64 2.772589 0.000000 266 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +major 0 56 2.890372 0.000000 315 +publish 0 57 2.890372 0.000000 326 +browser 0 56 2.890372 0.000000 313 +summer 0 56 2.890372 0.000000 311 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +found 0 53 2.944439 0.000000 337 +allow 0 53 2.944439 0.000000 333 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +cool 0 49 3.044522 0.000000 374 +life 0 50 3.044522 0.000000 375 +give 0 50 3.044522 0.000000 359 +archiv 0 49 3.044522 0.000000 364 +california 0 46 3.091042 0.000000 388 +could 0 46 3.091042 0.000000 383 +made 0 44 3.135494 0.000000 398 +even 0 45 3.135494 0.000000 393 +third 0 43 3.178054 0.000000 412 +show 0 43 3.178054 0.000000 417 +autom 0 41 3.218876 0.000000 434 +linear 0 41 3.218876 0.000000 431 +might 0 41 3.218876 0.000000 426 +movi 0 40 3.258097 0.000000 459 +submit 0 39 3.258097 0.000000 440 +multipl 0 39 3.258097 0.000000 453 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +tree 0 36 3.367296 0.000000 492 +staff 0 36 3.367296 0.000000 490 +ofth 0 36 3.367296 0.000000 491 +return 0 34 3.401197 0.000000 502 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +concept 0 32 3.465736 0.000000 537 +posit 0 31 3.496508 0.000000 552 +scientist 0 31 3.496508 0.000000 560 +anim 0 31 3.496508 0.000000 557 +travel 0 30 3.555348 0.000000 579 +graph 0 30 3.555348 0.000000 576 +neural 0 30 3.555348 0.000000 578 +quit 0 27 3.637586 0.000000 633 +though 0 27 3.637586 0.000000 622 +manipul 0 27 3.637586 0.000000 624 +constraint 0 26 3.688879 0.000000 636 +toward 0 25 3.737670 0.000000 668 +client 0 25 3.737670 0.000000 679 +trace 0 25 3.737670 0.000000 677 +greg 0 24 3.761200 0.000000 695 +demonstr 0 24 3.761200 0.000000 694 +pattern 0 24 3.761200 0.000000 689 +famili 0 23 3.806662 0.000000 735 +displai 0 23 3.806662 0.000000 712 +brows 0 23 3.806662 0.000000 726 +highli 0 23 3.806662 0.000000 725 +thread 0 23 3.806662 0.000000 722 +honor 0 23 3.806662 0.000000 729 +compress 0 23 3.806662 0.000000 719 +ofwashington 0 22 3.850148 0.000000 766 +instead 0 22 3.850148 0.000000 756 +love 0 21 3.912023 0.000000 804 +leav 0 21 3.912023 0.000000 772 +avoid 0 21 3.912023 0.000000 799 +applet 0 20 3.951244 0.000000 827 +qualiti 0 20 3.951244 0.000000 832 +wrote 0 20 3.951244 0.000000 830 +repositori 0 17 4.110874 0.000000 932 +thought 0 17 4.110874 0.000000 945 +adam 0 17 4.110874 0.000000 934 +diego 0 16 4.174387 0.000000 992 +earli 0 16 4.174387 0.000000 968 +spatial 0 16 4.174387 0.000000 988 +anyth 0 16 4.174387 0.000000 998 +dilbert 0 16 4.174387 0.000000 996 +cognit 0 16 4.174387 0.000000 986 +chateau 0 16 4.174387 0.000000 997 +reflect 0 15 4.248495 0.000000 1034 +rate 0 15 4.248495 0.000000 1037 +capabl 0 15 4.248495 0.000000 1016 +enough 0 15 4.248495 0.000000 1040 +doesn 0 15 4.248495 0.000000 1055 +draw 0 14 4.317488 0.000000 1086 +econom 0 13 4.382027 0.000000 1184 +iter 0 12 4.465908 0.000000 1206 +buffer 0 12 4.465908 0.000000 1211 +entertain 0 12 4.465908 0.000000 1286 +hank 0 12 4.465908 0.000000 1253 +assembl 0 12 4.465908 0.000000 1207 +transpar 0 11 4.553877 0.000000 1325 +keyword 0 11 4.553877 0.000000 1356 +enter 0 10 4.653960 0.000000 1454 +thecomput 0 10 4.653960 0.000000 1408 +awai 0 10 4.653960 0.000000 1447 +metacrawl 0 10 4.653960 0.000000 1455 +pick 0 9 4.753590 0.000000 1498 +prefer 0 9 4.753590 0.000000 1491 +congress 0 9 4.753590 0.000000 1592 +oop 0 8 4.875197 0.000000 1778 +realist 0 8 4.875197 0.000000 1665 +judg 0 8 4.875197 0.000000 1644 +guggenheim 0 8 4.875197 0.000000 1759 +hit 0 7 5.010635 0.000000 1965 +occasion 0 7 5.010635 0.000000 1905 +polit 0 6 5.164786 0.000000 2115 +impress 0 6 5.164786 0.000000 2096 +unpublish 0 6 5.164786 0.000000 2226 +stage 0 5 5.347108 0.000000 2488 +contest 0 5 5.347108 0.000000 2273 +particl 0 5 5.347108 0.000000 2436 +carlson 0 5 5.347108 0.000000 2351 +chess 0 5 5.347108 0.000000 2486 +annex 0 5 5.347108 0.000000 2572 +doubl 0 4 5.568345 0.000000 2951 +sorri 0 4 5.568345 0.000000 3059 +gradual 0 4 5.568345 0.000000 2997 +mess 0 4 5.568345 0.000000 2886 +jar 0 3 5.857933 0.000000 3223 +prison 0 3 5.857933 0.000000 3907 +slave 0 3 5.857933 0.000000 3959 +emul 0 3 5.857933 0.000000 3944 +cleaner 0 3 5.857933 0.000000 3775 +civil 0 3 5.857933 0.000000 3908 +evolutionari 0 3 5.857933 0.000000 3898 +boolean 0 3 5.857933 0.000000 3202 +recurr 0 3 5.857933 0.000000 3740 +sujai 0 3 5.857933 0.000000 3960 +parekh 0 3 5.857933 0.000000 3961 +shadow 0 3 5.857933 0.000000 3519 +inventor 0 3 5.857933 0.000000 3695 +quicktim 0 3 5.857933 0.000000 3493 +gamelan 0 2 6.263398 0.000000 4221 +elicit 0 2 6.263398 0.000000 4294 +flight 0 2 6.263398 0.000000 5911 +ucsd 0 2 6.263398 0.000000 5192 +scienceher 0 2 6.263398 0.000000 5912 +thejava 0 2 6.263398 0.000000 4704 +certainli 0 2 6.263398 0.000000 4090 +belew 0 2 6.263398 0.000000 4739 +lesh 0 2 6.263398 0.000000 5895 +tracer 0 2 6.263398 0.000000 5913 +inc 0 2 6.263398 0.000000 5914 +duel 0 2 6.263398 0.000000 5855 +rai 0 2 6.263398 0.000000 5915 +theanim 0 2 6.263398 0.000000 5852 +thed 0 2 6.263398 0.000000 4963 +mbquicktim 0 2 6.263398 0.000000 5916 +linden 0 1 6.957497 0.000000 15435 +webview 0 1 6.957497 0.000000 15436 +wasrat 0 1 6.957497 0.000000 15437 +andwa 0 1 6.957497 0.000000 15438 +neuralnetwork 0 1 6.957497 0.000000 15439 +headless 0 1 6.957497 0.000000 15440 +horseman 0 1 6.957497 0.000000 15441 +lindenmi 0 1 6.957497 0.000000 15442 +wifecorina 0 1 6.957497 0.000000 15443 +lofti 0 1 6.957497 0.000000 15444 +undergraduatedegre 0 1 6.957497 0.000000 15445 +anodd 0 1 6.957497 0.000000 15446 +mactiv 0 1 6.957497 0.000000 15447 +orset 0 1 6.957497 0.000000 15448 +altavistawebviewand 0 1 6.957497 0.000000 15449 +metawebview 0 1 6.957497 0.000000 15450 +foraltavista 0 1 6.957497 0.000000 15451 +searchservic 0 1 6.957497 0.000000 15452 +dialog 0 1 6.957497 0.000000 15453 +travelag 0 1 6.957497 0.000000 15454 +whileallow 0 1 6.957497 0.000000 15455 +andjar 0 1 6.957497 0.000000 15456 +altavistawebview 0 1 6.957497 0.000000 15457 +winner 0 1 6.957497 0.000000 15458 +walsh 0 1 6.957497 0.000000 15459 +meilleur 0 1 6.957497 0.000000 15460 +ballet 0 1 6.957497 0.000000 15461 +flicker 0 1 6.957497 0.000000 15462 +standardsto 0 1 6.957497 0.000000 15463 +mylgramm 0 1 6.957497 0.000000 15464 +lgrammer 0 1 6.957497 0.000000 15465 +theparticletre 0 1 6.957497 0.000000 15466 +thejar 0 1 6.957497 0.000000 15467 +dawn 0 1 6.957497 0.000000 15468 +ademonstr 0 1 6.957497 0.000000 15469 +cansuccessfulli 0 1 6.957497 0.000000 15470 +myriadsoftwar 0 1 6.957497 0.000000 15471 +filippo 0 1 6.957497 0.000000 15472 +menzer 0 1 6.957497 0.000000 15473 +latentenergi 0 1 6.957497 0.000000 15474 +developingartifici 0 1 6.957497 0.000000 15475 +enviro 0 1 6.957497 0.000000 15476 +theautom 0 1 6.957497 0.000000 15477 +assit 0 1 6.957497 0.000000 15478 +majeski 0 1 6.957497 0.000000 15479 +spitzer 0 1 6.957497 0.000000 15480 +localizedinteract 0 1 6.957497 0.000000 15481 +dilemma 0 1 6.957497 0.000000 15482 +krishnamoorthi 0 1 6.957497 0.000000 15483 +paturi 0 1 6.957497 0.000000 15484 +blume 0 1 6.957497 0.000000 15485 +liden 0 1 6.957497 0.000000 15486 +esen 0 1 6.957497 0.000000 15487 +hardwaretradeoff 0 1 6.957497 0.000000 15488 +sdilemma 0 1 6.957497 0.000000 15489 +funrai 0 1 6.957497 0.000000 15490 +closeup 0 1 6.957497 0.000000 15491 +sphere 0 1 6.957497 0.000000 15492 +withreflect 0 1 6.957497 0.000000 15493 +adaptivesampl 0 1 6.957497 0.000000 15494 +thespher 0 1 6.957497 0.000000 15495 +causingth 0 1 6.957497 0.000000 15496 +refract 0 1 6.957497 0.000000 15497 +surfaceand 0 1 6.957497 0.000000 15498 +strike 0 1 6.957497 0.000000 15499 +alow 0 1 6.957497 0.000000 15500 +anyfurth 0 1 6.957497 0.000000 15501 +resembl 0 1 6.957497 0.000000 15502 +glinden 0 1 6.957497 0.000000 15503 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..a90c576c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +octob 0 89 2.397895 0.000000 156 +homework 0 79 2.564949 0.000000 193 +brian 0 38 3.295837 0.000000 466 +computersci 0 30 3.555348 0.000000 562 +trip 0 14 4.317488 0.000000 1113 +grant 1 12 4.465908 4.465908 1216 +awai 0 10 4.653960 0.000000 1447 +daughter 0 7 5.010635 0.000000 1943 +pagebrian 0 4 5.568345 0.000000 3054 +groupuw 0 3 5.857933 0.000000 3934 +kri 0 1 6.957497 0.000000 15504 +infowork 0 1 6.957497 0.000000 15505 +backgrounduwdynam 0 1 6.957497 0.000000 15506 +engineeringperson 0 1 6.957497 0.000000 15507 +stuffperson 0 1 6.957497 0.000000 15508 +backgroundmi 0 1 6.957497 0.000000 15509 +isismi 0 1 6.957497 0.000000 15510 +singaporemi 0 1 6.957497 0.000000 15511 +bookmarksmi 0 1 6.957497 0.000000 15512 +keylast 0 1 6.957497 0.000000 15513 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..8b2404b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +pictur 0 89 2.397895 0.000000 160 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +complet 0 77 2.564949 0.000000 208 +sieg 0 69 2.708050 0.000000 260 +integr 0 67 2.708050 0.000000 245 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +summer 0 56 2.890372 0.000000 311 +much 0 52 2.995732 0.000000 349 +frequent 0 49 3.044522 0.000000 367 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +anoth 0 45 3.135494 0.000000 408 +author 0 39 3.258097 0.000000 450 +littl 0 39 3.258097 0.000000 454 +close 0 38 3.295837 0.000000 465 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +taken 0 31 3.496508 0.000000 555 +someth 0 31 3.496508 0.000000 554 +actual 0 28 3.610918 0.000000 604 +team 0 27 3.637586 0.000000 625 +consult 0 24 3.761200 0.000000 687 +sometim 0 24 3.761200 0.000000 696 +spend 1 19 4.007333 4.007333 850 +less 0 18 4.060443 0.000000 892 +along 0 18 4.060443 0.000000 878 +attempt 0 17 4.110874 0.000000 917 +white 0 17 4.110874 0.000000 951 +chateau 0 16 4.174387 0.000000 997 +took 0 16 4.174387 0.000000 1010 +month 0 15 4.248495 0.000000 1025 +dave 0 14 4.317488 0.000000 1098 +trip 0 14 4.317488 0.000000 1113 +council 0 11 4.553877 0.000000 1364 +cecil 0 9 4.753590 0.000000 1547 +hang 0 9 4.753590 0.000000 1499 +grove 1 8 4.875197 4.875197 1675 +pure 0 8 4.875197 0.000000 1776 +vehicl 0 7 5.010635 0.000000 1928 +wouldn 0 7 5.010635 0.000000 1970 +footbal 0 7 5.010635 0.000000 1912 +strip 0 6 5.164786 0.000000 2203 +toronto 0 6 5.164786 0.000000 2156 +spinproject 0 5 5.347108 0.000000 2570 +gui 0 5 5.347108 0.000000 2573 +water 0 5 5.347108 0.000000 2535 +worki 0 4 5.568345 0.000000 3010 +fantasi 0 4 5.568345 0.000000 3055 +silli 0 4 5.568345 0.000000 3038 +raft 0 4 5.568345 0.000000 3060 +langaug 0 3 5.857933 0.000000 3661 +hampshir 0 3 5.857933 0.000000 3280 +kick 0 3 5.857933 0.000000 3962 +hord 0 2 6.263398 0.000000 5917 +aroundth 0 2 6.263398 0.000000 5653 +fring 0 2 6.263398 0.000000 5721 +boi 0 2 6.263398 0.000000 5918 +toseattl 0 2 6.263398 0.000000 5919 +soonish 0 1 6.957497 0.000000 15514 +dilbertfix 0 1 6.957497 0.000000 15515 +thathit 0 1 6.957497 0.000000 15516 +underacheiv 0 1 6.957497 0.000000 15517 +scoutreserv 0 1 6.957497 0.000000 15518 +greaterlowel 0 1 6.957497 0.000000 15519 +casunset 0 1 6.957497 0.000000 15520 +cabin 0 1 6.957497 0.000000 15521 +drove 0 1 6.957497 0.000000 15522 +detour 0 1 6.957497 0.000000 15523 +somehihglight 0 1 6.957497 0.000000 15524 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..8d66bb4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +seattl 0 120 2.079442 0.000000 103 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +level 0 87 2.484907 0.000000 180 +educ 0 86 2.484907 0.000000 191 +june 0 79 2.564949 0.000000 214 +simul 0 66 2.708050 0.000000 255 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +hardwar 0 51 2.995732 0.000000 350 +prototyp 0 38 3.295837 0.000000 463 +multi 0 36 3.367296 0.000000 493 +survei 0 35 3.401197 0.000000 513 +board 0 33 3.433987 0.000000 528 +curriculum 0 33 3.433987 0.000000 535 +methodolog 0 23 3.806662 0.000000 733 +rout 0 21 3.912023 0.000000 793 +synthesi 0 20 3.951244 0.000000 834 +scott 0 18 4.060443 0.000000 884 +commerci 0 16 4.174387 0.000000 1005 +partit 0 16 4.174387 0.000000 984 +topolog 0 14 4.317488 0.000000 1089 +embed 0 14 4.317488 0.000000 1102 +circuit 0 13 4.382027 0.000000 1131 +asynchron 0 12 4.465908 0.000000 1229 +fpga 1 10 4.653960 4.653960 1433 +rapid 0 10 4.653960 0.000000 1453 +densiti 0 7 5.010635 0.000000 1927 +chinook 0 6 5.164786 0.000000 2229 +triptych 0 4 5.568345 0.000000 3061 +biographi 0 3 5.857933 0.000000 3658 +hauck 0 2 6.263398 0.000000 5920 +montag 0 2 6.263398 0.000000 5921 +springbok 0 2 6.263398 0.000000 5922 +thoughi 0 1 6.957497 0.000000 15525 +vitaeresearch 0 1 6.957497 0.000000 15526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..a76f3330 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +kevin 1 9 4.753590 4.753590 1482 +hinshaw 1 1 6.957497 6.957497 15527 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..65026449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +place 0 106 2.197225 0.000000 124 +part 0 98 2.302585 0.000000 129 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +chang 0 82 2.484907 0.000000 163 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +dept 0 64 2.772589 0.000000 291 +back 0 60 2.833213 0.000000 297 +thesi 0 57 2.890372 0.000000 327 +week 0 52 2.995732 0.000000 343 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +pointer 0 48 3.044522 0.000000 368 +possibl 0 47 3.091042 0.000000 378 +around 0 43 3.178054 0.000000 415 +profil 0 30 3.555348 0.000000 581 +pass 0 28 3.610918 0.000000 611 +univ 0 28 3.610918 0.000000 617 +subject 0 26 3.688879 0.000000 647 +notic 0 25 3.737670 0.000000 675 +head 0 23 3.806662 0.000000 732 +brows 0 23 3.806662 0.000000 726 +half 0 21 3.912023 0.000000 776 +busi 0 21 3.912023 0.000000 784 +unfortun 0 13 4.382027 0.000000 1170 +rememb 0 12 4.465908 0.000000 1217 +neat 0 12 4.465908 0.000000 1263 +daughter 0 7 5.010635 0.000000 1943 +chinook 0 6 5.164786 0.000000 2229 +upper 0 5 5.347108 0.000000 2481 +wast 0 5 5.347108 0.000000 2537 +silli 0 4 5.568345 0.000000 3038 +washingtonseattl 0 4 5.568345 0.000000 3044 +macduff 1 2 6.263398 6.263398 5923 +emma 0 2 6.263398 0.000000 5546 +obsess 0 2 6.263398 0.000000 5924 +ultrasound 0 1 6.957497 0.000000 15528 +elspeth 0 1 6.957497 0.000000 15529 +unborn 0 1 6.957497 0.000000 15530 +fromconcept 0 1 6.957497 0.000000 15531 +ripe 0 1 6.957497 0.000000 15532 +inmid 0 1 6.957497 0.000000 15533 +ly 0 1 6.957497 0.000000 15534 +lookingup 0 1 6.957497 0.000000 15535 +torso 0 1 6.957497 0.000000 15536 +theleft 0 1 6.957497 0.000000 15537 +impend 0 1 6.957497 0.000000 15538 +fatherhood 0 1 6.957497 0.000000 15539 +myspam 0 1 6.957497 0.000000 15540 +usingwebcrawl 0 1 6.957497 0.000000 15541 +frogstv 0 1 6.957497 0.000000 15542 +nationpenn 0 1 6.957497 0.000000 15543 +tellermus 0 1 6.957497 0.000000 15544 +lyricsian 0 1 6.957497 0.000000 15545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..61526ec8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,146 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +phone 0 175 1.791759 0.000000 45 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +object 0 138 1.945910 0.000000 79 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +high 0 130 2.079442 0.000000 101 +provid 0 121 2.079442 0.000000 94 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +memori 1 101 2.302585 2.302585 139 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +imag 0 91 2.397895 0.000000 161 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +orient 0 80 2.564949 0.000000 205 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +order 0 69 2.708050 0.000000 249 +septemb 0 65 2.772589 0.000000 274 +virtual 0 62 2.772589 0.000000 285 +visit 0 63 2.772589 0.000000 288 +back 0 60 2.833213 0.000000 297 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +local 0 55 2.944439 0.000000 334 +processor 0 54 2.944439 0.000000 335 +cach 0 41 3.218876 0.000000 432 +small 0 39 3.258097 0.000000 447 +map 0 39 3.258097 0.000000 452 +workstat 0 37 3.332205 0.000000 479 +cost 0 37 3.332205 0.000000 480 +mean 0 37 3.332205 0.000000 477 +global 0 34 3.401197 0.000000 520 +cluster 0 28 3.610918 0.000000 612 +load 0 28 3.610918 0.000000 601 +primari 0 25 3.737670 0.000000 669 +fundament 0 25 3.737670 0.000000 661 +mike 0 24 3.761200 0.000000 703 +store 0 24 3.761200 0.000000 693 +size 0 23 3.806662 0.000000 713 +reduc 0 22 3.850148 0.000000 759 +disk 0 22 3.850148 0.000000 747 +hierarchi 0 22 3.850148 0.000000 744 +unit 0 21 3.912023 0.000000 779 +thu 0 21 3.912023 0.000000 773 +increas 0 20 3.951244 0.000000 829 +speed 0 18 4.060443 0.000000 911 +encourag 0 18 4.060443 0.000000 880 +engineeringunivers 0 17 4.110874 0.000000 959 +debug 0 17 4.110874 0.000000 944 +transfer 0 16 4.174387 0.000000 967 +latenc 0 16 4.174387 0.000000 993 +modern 0 16 4.174387 0.000000 966 +remot 0 15 4.248495 0.000000 1041 +levi 0 14 4.317488 0.000000 1093 +karlin 0 13 4.382027 0.000000 1176 +washingtonbox 0 13 4.382027 0.000000 1200 +introduc 0 13 4.382027 0.000000 1139 +unfortun 0 13 4.382027 0.000000 1170 +galleri 0 13 4.382027 0.000000 1192 +hank 0 12 4.465908 0.000000 1253 +mari 0 12 4.465908 0.000000 1266 +anna 0 12 4.465908 0.000000 1292 +franc 0 12 4.465908 0.000000 1276 +node 0 11 4.553877 0.000000 1326 +extrem 0 11 4.553877 0.000000 1330 +vernon 0 9 4.753590 0.000000 1556 +voelker 0 9 4.753590 0.000000 1557 +factor 0 9 4.753590 0.000000 1544 +postdoc 0 8 4.875197 0.000000 1724 +evan 0 8 4.875197 0.000000 1633 +inproceed 0 8 4.875197 0.000000 1670 +feelei 0 7 5.010635 0.000000 1859 +trend 0 7 5.010635 0.000000 1842 +geoff 0 6 5.164786 0.000000 2124 +temporari 0 6 5.164786 0.000000 2090 +li 0 5 5.347108 0.000000 2500 +seventh 0 5 5.347108 0.000000 2464 +joseph 0 5 5.347108 0.000000 2327 +coverag 0 4 5.568345 0.000000 2656 +greatli 0 3 5.857933 0.000000 3541 +europ 0 3 5.857933 0.000000 3761 +jamrozik 0 2 6.263398 0.000000 5925 +subpag 0 2 6.263398 0.000000 5926 +amort 0 2 6.263398 0.000000 4370 +odd 0 2 6.263398 0.000000 5565 +theuniversit 0 2 6.263398 0.000000 5927 +fourier 0 2 6.263398 0.000000 5698 +grenobl 0 2 6.263398 0.000000 5928 +laboratoir 0 2 6.263398 0.000000 5929 +herv 0 1 6.957497 0.000000 15546 +jamrozikherv 0 1 6.957497 0.000000 15547 +memoi 0 1 6.957497 0.000000 15548 +therebi 0 1 6.957497 0.000000 15549 +intens 0 1 6.957497 0.000000 15550 +lightli 0 1 6.957497 0.000000 15551 +guideproject 0 1 6.957497 0.000000 15552 +bull 0 1 6.957497 0.000000 15553 +imaginstitut 0 1 6.957497 0.000000 15554 +snot 0 1 6.957497 0.000000 15555 +louvr 0 1 6.957497 0.000000 15556 +somefamili 0 1 6.957497 0.000000 15557 +somefriend 0 1 6.957497 0.000000 15558 +eduv 0 1 6.957497 0.000000 15559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..cc2d9440 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +hall 0 146 1.945910 0.000000 65 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +frequent 0 49 3.044522 0.000000 367 +weather 0 28 3.610918 0.000000 618 +channel 0 7 5.010635 0.000000 1836 +forecast 0 6 5.164786 0.000000 2171 +jason 1 3 5.857933 5.857933 3389 +eduaddress 0 3 5.857933 0.000000 3762 +secoski 0 2 6.263398 0.000000 4526 +pagejason 0 1 6.957497 0.000000 15560 +cunivers 0 1 6.957497 0.000000 15561 +boxseattl 0 1 6.957497 0.000000 15562 +projectseattl 0 1 6.957497 0.000000 15563 +secoskylast 0 1 6.957497 0.000000 15564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..05439b98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +look 0 107 2.197225 0.000000 115 +question 0 91 2.397895 0.000000 141 +educ 0 86 2.484907 0.000000 191 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +intellig 0 72 2.639057 0.000000 225 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +artifici 0 63 2.772589 0.000000 280 +virtual 0 62 2.772589 0.000000 285 +copi 0 63 2.772589 0.000000 284 +cool 0 49 3.044522 0.000000 374 +effect 0 46 3.091042 0.000000 385 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +music 0 42 3.218876 0.000000 436 +realli 0 40 3.258097 0.000000 444 +littl 0 39 3.258097 0.000000 454 +industri 0 38 3.295837 0.000000 464 +feel 0 37 3.332205 0.000000 483 +download 0 36 3.367296 0.000000 489 +human 0 32 3.465736 0.000000 546 +travel 0 30 3.555348 0.000000 579 +progress 0 28 3.610918 0.000000 598 +static 0 27 3.637586 0.000000 619 +mine 0 26 3.688879 0.000000 654 +experiment 0 26 3.688879 0.000000 645 +never 0 25 3.737670 0.000000 671 +william 0 22 3.850148 0.000000 765 +applet 1 20 3.951244 3.951244 827 +wrote 0 20 3.951244 0.000000 830 +spend 0 19 4.007333 0.000000 850 +demo 0 18 4.060443 0.000000 888 +layer 0 17 4.110874 0.000000 926 +macintosh 0 17 4.110874 0.000000 920 +signific 0 13 4.382027 0.000000 1125 +baer 0 11 4.553877 0.000000 1353 +scienceat 0 11 4.553877 0.000000 1375 +eight 0 11 4.553877 0.000000 1331 +metacrawl 0 10 4.653960 0.000000 1455 +creativ 0 8 4.875197 0.000000 1777 +dream 0 6 5.164786 0.000000 2165 +jeremi 0 5 5.347108 0.000000 2360 +interfer 0 5 5.347108 0.000000 2494 +puzzl 0 5 5.347108 0.000000 2507 +silli 0 4 5.568345 0.000000 3038 +thati 0 4 5.568345 0.000000 2616 +museum 0 3 5.857933 0.000000 3933 +computerinteract 0 2 6.263398 0.000000 5829 +stress 0 2 6.263398 0.000000 4146 +baerjeremi 0 1 6.957497 0.000000 15565 +twain 0 1 6.957497 0.000000 15566 +shakespearei 0 1 6.957497 0.000000 15567 +engineeringtool 0 1 6.957497 0.000000 15568 +pierian 0 1 6.957497 0.000000 15569 +softwareoregon 0 1 6.957497 0.000000 15570 +omsi 0 1 6.957497 0.000000 15571 +pomona 0 1 6.957497 0.000000 15572 +collegeher 0 1 6.957497 0.000000 15573 +searchcopyright 0 1 6.957497 0.000000 15574 +jbaer 0 1 6.957497 0.000000 15575 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..adfaa26e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +postscript 0 131 2.079442 0.000000 90 +control 0 82 2.484907 0.000000 164 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +come 0 78 2.564949 0.000000 202 +sieg 0 69 2.708050 0.000000 260 +import 0 65 2.772589 0.000000 282 +foundat 0 62 2.772589 0.000000 286 +browser 0 56 2.890372 0.000000 313 +undergradu 0 54 2.944439 0.000000 338 +finger 0 52 2.995732 0.000000 354 +electron 0 47 3.091042 0.000000 379 +tree 0 36 3.367296 0.000000 492 +soon 0 36 3.367296 0.000000 494 +return 0 34 3.401197 0.000000 502 +statu 0 18 4.060443 0.000000 885 +attempt 0 17 4.110874 0.000000 917 +latex 0 14 4.317488 0.000000 1064 +rice 0 11 4.553877 0.000000 1336 +transmiss 0 9 4.753590 0.000000 1588 +jeremi 0 5 5.347108 0.000000 2360 +adjust 0 5 5.347108 0.000000 2422 +frontier 0 3 5.857933 0.000000 3771 +alma 0 3 5.857933 0.000000 3963 +schedulemi 0 2 6.263398 0.000000 5843 +mater 0 2 6.263398 0.000000 5930 +buhler 0 1 6.957497 0.000000 15576 +jbuhler 0 1 6.957497 0.000000 15577 +pagejeremi 0 1 6.957497 0.000000 15578 +pagedo 0 1 6.957497 0.000000 15579 +tako 0 1 6.957497 0.000000 15580 +stufflectur 0 1 6.957497 0.000000 15581 +suffix 0 1 6.957497 0.000000 15582 +keycyb 0 1 6.957497 0.000000 15583 +grinsrecommend 0 1 6.957497 0.000000 15584 +readingmi 0 1 6.957497 0.000000 15585 +universityquot 0 1 6.957497 0.000000 15586 +quotesmi 0 1 6.957497 0.000000 15587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..688059fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,271 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +part 0 98 2.302585 0.000000 129 +real 0 93 2.397895 0.000000 144 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +environ 0 84 2.484907 0.000000 177 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +optim 1 79 2.564949 2.564949 197 +orient 0 80 2.564949 0.000000 205 +exampl 0 77 2.564949 0.000000 195 +dynam 0 76 2.564949 0.000000 194 +line 0 75 2.639057 0.000000 231 +effici 0 73 2.639057 0.000000 233 +appli 0 71 2.639057 0.000000 226 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +meet 0 72 2.639057 0.000000 229 +would 0 67 2.708050 0.000000 251 +view 0 70 2.708050 0.000000 254 +integr 0 67 2.708050 0.000000 245 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +guid 0 63 2.772589 0.000000 267 +plan 0 65 2.772589 0.000000 272 +laboratori 0 63 2.772589 0.000000 292 +experi 0 64 2.772589 0.000000 283 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +back 0 60 2.833213 0.000000 297 +explor 0 58 2.890372 0.000000 324 +summer 0 56 2.890372 0.000000 311 +direct 0 57 2.890372 0.000000 316 +extens 0 53 2.944439 0.000000 340 +three 0 54 2.944439 0.000000 330 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +adapt 0 46 3.091042 0.000000 387 +quarter 0 47 3.091042 0.000000 389 +featur 0 46 3.091042 0.000000 386 +keep 0 44 3.135494 0.000000 409 +made 0 44 3.135494 0.000000 398 +better 0 45 3.135494 0.000000 401 +even 0 45 3.135494 0.000000 393 +around 0 43 3.178054 0.000000 415 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +author 0 39 3.258097 0.000000 450 +join 0 39 3.258097 0.000000 457 +littl 0 39 3.258097 0.000000 454 +probabl 0 40 3.258097 0.000000 455 +realli 0 40 3.258097 0.000000 444 +vita 0 38 3.295837 0.000000 473 +seminar 0 38 3.295837 0.000000 470 +feel 0 37 3.332205 0.000000 483 +singl 0 34 3.401197 0.000000 510 +approxim 0 35 3.401197 0.000000 509 +word 0 34 3.401197 0.000000 508 +curriculum 0 33 3.433987 0.000000 535 +obtain 0 33 3.433987 0.000000 534 +kind 0 32 3.465736 0.000000 541 +independ 0 32 3.465736 0.000000 548 +someth 0 31 3.496508 0.000000 554 +exist 0 30 3.555348 0.000000 569 +profil 0 30 3.555348 0.000000 581 +specifi 0 30 3.555348 0.000000 568 +travel 0 30 3.555348 0.000000 579 +pass 0 28 3.610918 0.000000 611 +scale 0 28 3.610918 0.000000 613 +becom 0 28 3.610918 0.000000 603 +framework 0 28 3.610918 0.000000 606 +effort 0 26 3.688879 0.000000 652 +consist 0 26 3.688879 0.000000 651 +enjoi 0 26 3.688879 0.000000 660 +rather 0 26 3.688879 0.000000 642 +jeff 0 25 3.737670 0.000000 673 +spent 0 25 3.737670 0.000000 676 +concern 0 25 3.737670 0.000000 666 +toward 0 25 3.737670 0.000000 668 +wai 0 25 3.737670 0.000000 662 +client 0 25 3.737670 0.000000 679 +never 0 25 3.737670 0.000000 671 +highli 0 23 3.806662 0.000000 725 +defin 0 22 3.850148 0.000000 746 +recommend 0 22 3.850148 0.000000 737 +hous 0 21 3.912023 0.000000 801 +programminglanguag 0 21 3.912023 0.000000 782 +flexibl 0 21 3.912023 0.000000 792 +love 0 21 3.912023 0.000000 804 +entir 0 20 3.951244 0.000000 811 +nice 0 20 3.951244 0.000000 809 +minut 0 20 3.951244 0.000000 810 +predict 0 19 4.007333 0.000000 855 +four 0 18 4.060443 0.000000 905 +whole 0 17 4.110874 0.000000 940 +permit 0 16 4.174387 0.000000 962 +chateau 0 16 4.174387 0.000000 997 +letter 0 16 4.174387 0.000000 981 +anyth 0 16 4.174387 0.000000 998 +took 0 16 4.174387 0.000000 1010 +track 0 15 4.248495 0.000000 1029 +enough 0 15 4.248495 0.000000 1040 +doesn 0 15 4.248495 0.000000 1055 +dean 0 14 4.317488 0.000000 1104 +spin 0 14 4.317488 0.000000 1121 +split 0 14 4.317488 0.000000 1078 +wife 0 13 4.382027 0.000000 1196 +primarili 0 13 4.382027 0.000000 1185 +composit 0 13 4.382027 0.000000 1150 +hotlist 0 13 4.382027 0.000000 1199 +uniqu 0 12 4.465908 0.000000 1228 +iter 0 12 4.465908 0.000000 1206 +food 0 12 4.465908 0.000000 1285 +walk 0 12 4.465908 0.000000 1281 +valid 0 11 4.553877 0.000000 1299 +moment 0 11 4.553877 0.000000 1379 +true 0 10 4.653960 0.000000 1422 +guess 0 10 4.653960 0.000000 1443 +cecil 0 9 4.753590 0.000000 1547 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +hang 0 9 4.753590 0.000000 1499 +inter 0 9 4.753590 0.000000 1530 +compos 0 9 4.753590 0.000000 1527 +sound 0 9 4.753590 0.000000 1605 +ride 0 8 4.875197 0.000000 1741 +pure 0 8 4.875197 0.000000 1776 +isol 0 8 4.875197 0.000000 1663 +closur 0 8 4.875197 0.000000 1643 +bug 0 7 5.010635 0.000000 1801 +dead 0 7 5.010635 0.000000 1840 +daughter 0 7 5.010635 0.000000 1943 +affect 0 6 5.164786 0.000000 2044 +park 0 6 5.164786 0.000000 2218 +increment 0 6 5.164786 0.000000 2206 +creation 0 6 5.164786 0.000000 2069 +vortex 0 5 5.347108 0.000000 2362 +spinproject 0 5 5.347108 0.000000 2570 +unnecessari 0 5 5.347108 0.000000 2506 +lesson 0 5 5.347108 0.000000 2568 +western 0 4 5.568345 0.000000 3062 +usedto 0 4 5.568345 0.000000 2643 +inlin 0 4 5.568345 0.000000 2964 +enjoy 0 4 5.568345 0.000000 2937 +insur 0 4 5.568345 0.000000 2939 +coverag 0 4 5.568345 0.000000 2656 +nearbi 0 3 5.857933 0.000000 3291 +langaug 0 3 5.857933 0.000000 3661 +stillmaintain 0 3 5.857933 0.000000 3964 +ofobject 0 3 5.857933 0.000000 3399 +forobject 0 3 5.857933 0.000000 3965 +kick 0 3 5.857933 0.000000 3962 +habit 0 3 5.857933 0.000000 3777 +somedai 0 3 5.857933 0.000000 3919 +fantast 0 3 5.857933 0.000000 3966 +flight 0 2 6.263398 0.000000 5911 +bought 0 2 6.263398 0.000000 5165 +projectsi 0 2 6.263398 0.000000 5931 +andto 0 2 6.263398 0.000000 5771 +vortexcompil 0 2 6.263398 0.000000 5932 +interfacesand 0 2 6.263398 0.000000 5206 +andhow 0 2 6.263398 0.000000 5933 +intraprocedur 0 2 6.263398 0.000000 5934 +coke 0 2 6.263398 0.000000 5935 +caffein 0 2 6.263398 0.000000 5936 +galvin 0 2 6.263398 0.000000 4160 +fly 0 2 6.263398 0.000000 5937 +anymor 0 2 6.263398 0.000000 5938 +downtown 0 2 6.263398 0.000000 5642 +wing 0 2 6.263398 0.000000 4864 +lengthi 0 2 6.263398 0.000000 4273 +jdean 0 2 6.263398 0.000000 4455 +biplan 0 1 6.957497 0.000000 15588 +dang 0 1 6.957497 0.000000 15589 +weren 0 1 6.957497 0.000000 15590 +plansi 0 1 6.957497 0.000000 15591 +sunni 0 1 6.957497 0.000000 15592 +menlo 0 1 6.957497 0.000000 15593 +avehicl 0 1 6.957497 0.000000 15594 +weintend 0 1 6.957497 0.000000 15595 +codein 0 1 6.957497 0.000000 15596 +systemmicrokernel 0 1 6.957497 0.000000 15597 +especiallyprofil 0 1 6.957497 0.000000 15598 +howwhol 0 1 6.957497 0.000000 15599 +assumedthat 0 1 6.957497 0.000000 15600 +manycompromis 0 1 6.957497 0.000000 15601 +wholeprogram 0 1 6.957497 0.000000 15602 +underlyingimplement 0 1 6.957497 0.000000 15603 +principaldesign 0 1 6.957497 0.000000 15604 +independentintermedi 0 1 6.957497 0.000000 15605 +ishigh 0 1 6.957497 0.000000 15606 +messagesend 0 1 6.957497 0.000000 15607 +wayof 0 1 6.957497 0.000000 15608 +repeatedli 0 1 6.957497 0.000000 15609 +passessepar 0 1 6.957497 0.000000 15610 +classanalysi 0 1 6.957497 0.000000 15611 +aliasanalysi 0 1 6.957497 0.000000 15612 +structuringoptim 0 1 6.957497 0.000000 15613 +stillallow 0 1 6.957497 0.000000 15614 +eachoth 0 1 6.957497 0.000000 15615 +flowanalys 0 1 6.957497 0.000000 15616 +withrel 0 1 6.957497 0.000000 15617 +assignmentelimin 0 1 6.957497 0.000000 15618 +publicationssom 0 1 6.957497 0.000000 15619 +personali 0 1 6.957497 0.000000 15620 +spici 0 1 6.957497 0.000000 15621 +mild 0 1 6.957497 0.000000 15622 +heidi 0 1 6.957497 0.000000 15623 +victoria 0 1 6.957497 0.000000 15624 +honeymoon 0 1 6.957497 0.000000 15625 +kauai 0 1 6.957497 0.000000 15626 +hurrican 0 1 6.957497 0.000000 15627 +iniki 0 1 6.957497 0.000000 15628 +puget 0 1 6.957497 0.000000 15629 +dare 0 1 6.957497 0.000000 15630 +sadli 0 1 6.957497 0.000000 15631 +passeng 0 1 6.957497 0.000000 15632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..0db3ceaa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +document 0 121 2.079442 0.000000 89 +high 0 130 2.079442 0.000000 101 +specif 0 106 2.197225 0.000000 106 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +structur 0 106 2.197225 0.000000 105 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +requir 0 81 2.484907 0.000000 167 +control 0 82 2.484907 0.000000 164 +ieee 0 86 2.484907 0.000000 190 +academ 0 82 2.484907 0.000000 178 +school 0 84 2.484907 0.000000 188 +state 0 76 2.564949 0.000000 207 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +dynam 0 76 2.564949 0.000000 194 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +write 0 72 2.639057 0.000000 222 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +complex 0 64 2.772589 0.000000 269 +improv 0 62 2.772589 0.000000 289 +septemb 0 65 2.772589 0.000000 274 +januari 0 62 2.772589 0.000000 264 +experi 0 64 2.772589 0.000000 283 +best 0 59 2.833213 0.000000 299 +reason 0 57 2.890372 0.000000 318 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +februari 0 54 2.944439 0.000000 328 +hardwar 0 51 2.995732 0.000000 350 +possibl 0 47 3.091042 0.000000 378 +california 0 46 3.091042 0.000000 388 +discuss 0 45 3.135494 0.000000 399 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +procedur 0 36 3.367296 0.000000 488 +articl 0 33 3.433987 0.000000 530 +dissert 0 32 3.465736 0.000000 549 +concept 0 32 3.465736 0.000000 537 +rang 0 30 3.555348 0.000000 565 +specifi 0 30 3.555348 0.000000 568 +semant 0 29 3.583519 0.000000 587 +becom 0 28 3.610918 0.000000 603 +great 0 27 3.637586 0.000000 626 +doctor 0 24 3.761200 0.000000 709 +interpret 0 24 3.761200 0.000000 686 +displai 0 23 3.806662 0.000000 712 +flexibl 0 21 3.912023 0.000000 792 +safeti 0 20 3.951244 0.000000 817 +histori 0 19 4.007333 0.000000 853 +less 0 18 4.060443 0.000000 892 +behavior 0 18 4.060443 0.000000 881 +concentr 0 18 4.060443 0.000000 906 +engineeringunivers 0 17 4.110874 0.000000 959 +steven 0 17 4.110874 0.000000 953 +critic 0 16 4.174387 0.000000 982 +advantag 0 16 4.174387 0.000000 987 +chateau 0 16 4.174387 0.000000 997 +devic 0 16 4.174387 0.000000 1002 +convent 0 14 4.317488 0.000000 1072 +draft 0 14 4.317488 0.000000 1085 +washingtonbox 0 13 4.382027 0.000000 1200 +difficulti 0 13 4.382027 0.000000 1132 +signific 0 13 4.382027 0.000000 1125 +nanci 0 12 4.465908 0.000000 1256 +island 0 11 4.553877 0.000000 1345 +valid 0 11 4.553877 0.000000 1299 +summar 0 11 4.553877 0.000000 1295 +alpha 0 11 4.553877 0.000000 1348 +rice 0 11 4.553877 0.000000 1336 +itali 0 11 4.553877 0.000000 1378 +success 0 10 4.653960 0.000000 1390 +kurt 0 9 4.753590 0.000000 1548 +leveson 0 9 4.753590 0.000000 1540 +respect 0 9 4.753590 0.000000 1545 +linguist 0 9 4.753590 0.000000 1593 +guggenheim 0 8 4.875197 0.000000 1759 +fail 0 8 4.875197 0.000000 1655 +perhap 0 8 4.875197 0.000000 1693 +mile 0 8 4.875197 0.000000 1743 +sean 0 8 4.875197 0.000000 1705 +irvin 0 8 4.875197 0.000000 1660 +curv 0 8 4.875197 0.000000 1656 +awar 0 7 5.010635 0.000000 1800 +henc 0 7 5.010635 0.000000 1805 +sixth 0 7 5.010635 0.000000 1917 +price 0 6 5.164786 0.000000 1999 +emerg 0 6 5.164786 0.000000 2038 +transcript 0 6 5.164786 0.000000 2067 +variant 0 6 5.164786 0.000000 2043 +annex 0 5 5.347108 0.000000 2572 +caus 0 5 5.347108 0.000000 2298 +stage 0 5 5.347108 0.000000 2488 +colleagu 0 5 5.347108 0.000000 2304 +ortega 0 5 5.347108 0.000000 2559 +expens 0 4 5.568345 0.000000 2678 +avion 0 4 5.568345 0.000000 3018 +invent 0 4 5.568345 0.000000 3028 +sandi 0 4 5.568345 0.000000 2765 +rsml 0 3 5.857933 0.000000 3967 +hazard 0 3 5.857933 0.000000 3191 +partridg 0 3 5.857933 0.000000 3346 +diagnos 0 3 5.857933 0.000000 3968 +borrow 0 3 5.857933 0.000000 3725 +publicli 0 3 5.857933 0.000000 3687 +diagnost 0 3 5.857933 0.000000 3833 +deviat 0 2 6.263398 0.000000 4826 +rees 0 2 6.263398 0.000000 5939 +heimdahl 0 2 6.263398 0.000000 5940 +unpredict 0 2 6.263398 0.000000 5722 +incid 0 2 6.263398 0.000000 5870 +tca 0 2 6.263398 0.000000 5941 +mat 0 2 6.263398 0.000000 5942 +holli 0 2 6.263398 0.000000 5601 +damon 0 1 6.957497 0.000000 15633 +jdrees 0 1 6.957497 0.000000 15634 +hazop 0 1 6.957497 0.000000 15635 +waxahachi 0 1 6.957497 0.000000 15636 +hildreth 0 1 6.957497 0.000000 15637 +pagejon 0 1 6.957497 0.000000 15638 +reesepost 0 1 6.957497 0.000000 15639 +groupdepart 0 1 6.957497 0.000000 15640 +catastroph 0 1 6.957497 0.000000 15641 +wider 0 1 6.957497 0.000000 15642 +siang 0 1 6.957497 0.000000 15643 +dolin 0 1 6.957497 0.000000 15644 +statechart 0 1 6.957497 0.000000 15645 +como 0 1 6.957497 0.000000 15646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..5eb742ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +proceed 0 93 2.397895 0.000000 152 +pictur 0 89 2.397895 0.000000 160 +level 0 87 2.484907 0.000000 180 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +dynam 0 76 2.564949 0.000000 194 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +abstract 0 62 2.772589 0.000000 276 +written 0 63 2.772589 0.000000 278 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +juli 0 60 2.833213 0.000000 305 +room 0 59 2.833213 0.000000 301 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +california 0 46 3.091042 0.000000 388 +examin 0 42 3.218876 0.000000 424 +submit 0 39 3.258097 0.000000 440 +annual 0 40 3.258097 0.000000 458 +static 0 27 3.637586 0.000000 619 +compar 0 26 3.688879 0.000000 648 +thread 0 23 3.806662 0.000000 722 +exploit 0 20 3.951244 0.000000 836 +increas 0 20 3.951244 0.000000 829 +stanford 0 17 4.110874 0.000000 955 +coupl 0 17 4.110874 0.000000 939 +choic 0 16 4.174387 0.000000 979 +susan 0 15 4.248495 0.000000 1050 +levi 0 14 4.317488 0.000000 1093 +balanc 0 14 4.317488 0.000000 1112 +dean 0 14 4.317488 0.000000 1104 +convert 0 13 4.382027 0.000000 1122 +sigplan 0 13 4.382027 0.000000 1190 +philadelphia 0 12 4.465908 0.000000 1244 +multithread 0 11 4.553877 0.000000 1315 +henri 0 10 4.653960 0.000000 1417 +franklin 0 10 4.653960 0.000000 1436 +jack 1 8 4.875197 4.875197 1780 +egger 0 8 4.875197 0.000000 1695 +joel 0 8 4.875197 0.000000 1698 +qualifi 0 8 4.875197 0.000000 1721 +simultan 0 6 5.164786 0.000000 2155 +tullsen 0 6 5.164786 0.000000 2081 +rebecca 0 6 5.164786 0.000000 2174 +superscalar 0 6 5.164786 0.000000 2082 +fetch 0 5 5.347108 0.000000 2567 +jolla 0 4 5.568345 0.000000 2988 +emer 0 3 5.857933 0.000000 3969 +stamm 0 3 5.857933 0.000000 3970 +vliw 0 3 5.857933 0.000000 3514 +lojlo 0 2 6.263398 0.000000 5943 +suif 0 2 6.263398 0.000000 5944 +anddean 0 1 6.957497 0.000000 15647 +lojack 0 1 6.957497 0.000000 15648 +loph 0 1 6.957497 0.000000 15649 +eseattl 0 1 6.957497 0.000000 15650 +orsieg 0 1 6.957497 0.000000 15651 +paintbal 0 1 6.957497 0.000000 15652 +yahoojlo 0 1 6.957497 0.000000 15653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..fe331e3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +topic 0 114 2.197225 0.000000 110 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +activ 0 84 2.484907 0.000000 182 +interfac 1 79 2.564949 2.564949 209 +want 0 79 2.564949 0.000000 199 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +visual 0 48 3.044522 0.000000 372 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +survei 0 35 3.401197 0.000000 513 +navig 0 21 3.912023 0.000000 796 +engineeringunivers 0 17 4.110874 0.000000 959 +washingtonbox 0 13 4.382027 0.000000 1200 +impress 0 6 5.164786 0.000000 2096 +sherman 0 1 6.957497 0.000000 15654 +shermanjoebob 0 1 6.957497 0.000000 15655 +usami 0 1 6.957497 0.000000 15656 +designinform 0 1 6.957497 0.000000 15657 +useclass 0 1 6.957497 0.000000 15658 +hcreat 0 1 6.957497 0.000000 15659 +pagequ 0 1 6.957497 0.000000 15660 +sarahsoftballstuff 0 1 6.957497 0.000000 15661 +pagesif 0 1 6.957497 0.000000 15662 +tojoebob 0 1 6.957497 0.000000 15663 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..97583a34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +year 0 148 1.945910 0.000000 84 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +present 0 91 2.397895 0.000000 145 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +test 0 66 2.708050 0.000000 252 +abstract 0 62 2.772589 0.000000 276 +organ 0 65 2.772589 0.000000 265 +colleg 0 61 2.833213 0.000000 300 +sever 0 56 2.890372 0.000000 322 +discuss 0 45 3.135494 0.000000 399 +futur 0 41 3.218876 0.000000 427 +expect 0 37 3.332205 0.000000 484 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +neural 0 30 3.555348 0.000000 578 +travel 0 30 3.555348 0.000000 579 +task 0 25 3.737670 0.000000 678 +demonstr 0 24 3.761200 0.000000 694 +theunivers 0 21 3.912023 0.000000 797 +thought 0 17 4.110874 0.000000 945 +cognit 0 16 4.174387 0.000000 986 +achiev 0 14 4.317488 0.000000 1088 +context 0 13 4.382027 0.000000 1153 +accomplish 0 8 4.875197 0.000000 1755 +potenti 0 8 4.875197 0.000000 1690 +creativ 0 8 4.875197 0.000000 1777 +successfulli 0 7 5.010635 0.000000 1869 +earn 0 7 5.010635 0.000000 1788 +biolog 0 6 5.164786 0.000000 2147 +slate 0 6 5.164786 0.000000 2021 +addition 0 4 5.568345 0.000000 2593 +joshua 1 3 5.857933 5.857933 3333 +blank 0 3 5.857933 0.000000 3379 +emul 0 3 5.857933 0.000000 3944 +josh 1 2 6.263398 6.263398 5945 +overviewof 0 2 6.263398 0.000000 5469 +seim 1 1 6.957497 6.957497 15664 +begunin 0 1 6.957497 0.000000 15665 +lockean 0 1 6.957497 0.000000 15666 +observedbehavior 0 1 6.957497 0.000000 15667 +graduatingfrom 0 1 6.957497 0.000000 15668 +volit 0 1 6.957497 0.000000 15669 +taskw 0 1 6.957497 0.000000 15670 +ambulatori 0 1 6.957497 0.000000 15671 +academichierarchi 0 1 6.957497 0.000000 15672 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..6ea08ba6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +move 0 47 3.091042 0.000000 382 +jovan 1 2 6.263398 6.263398 5842 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..21e23b86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +recent 0 167 1.791759 0.000000 58 +site 0 106 2.197225 0.000000 119 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +academ 0 82 2.484907 0.000000 178 +school 0 84 2.484907 0.000000 188 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +david 0 71 2.639057 0.000000 232 +main 0 67 2.708050 0.000000 256 +major 0 56 2.890372 0.000000 315 +york 0 41 3.218876 0.000000 435 +especi 0 36 3.367296 0.000000 496 +power 0 30 3.555348 0.000000 573 +color 0 22 3.850148 0.000000 762 +grad 0 20 3.951244 0.000000 837 +eric 0 19 4.007333 0.000000 870 +women 0 16 4.174387 0.000000 1004 +biologi 0 15 4.248495 0.000000 1049 +comic 0 14 4.317488 0.000000 1103 +jonathan 0 13 4.382027 0.000000 1174 +brad 0 12 4.465908 0.000000 1264 +interestsmi 0 10 4.653960 0.000000 1462 +genet 0 10 4.653960 0.000000 1409 +gain 0 8 4.875197 0.000000 1730 +siggraph 0 8 4.875197 0.000000 1773 +sean 0 8 4.875197 0.000000 1705 +molecular 0 7 5.010635 0.000000 1887 +cat 0 6 5.164786 0.000000 2194 +salesin 0 4 5.568345 0.000000 3051 +alma 0 3 5.857933 0.000000 3963 +joanna 1 2 6.263398 6.263398 4503 +reproduc 0 2 6.263398 0.000000 5519 +powerjoanna 0 1 6.957497 0.000000 15673 +pagehi 0 1 6.957497 0.000000 15674 +uwneat 0 1 6.957497 0.000000 15675 +matercool 0 1 6.957497 0.000000 15676 +shadegraph 0 1 6.957497 0.000000 15677 +uwduoton 0 1 6.957497 0.000000 15678 +reproductionmi 0 1 6.957497 0.000000 15679 +matermost 0 1 6.957497 0.000000 15680 +employmentpubl 0 1 6.957497 0.000000 15681 +stollnitz 0 1 6.957497 0.000000 15682 +duoton 0 1 6.957497 0.000000 15683 +lifepast 0 1 6.957497 0.000000 15684 +homesdiversionsgend 0 1 6.957497 0.000000 15685 +issuesstatu 0 1 6.957497 0.000000 15686 +sciencenow 0 1 6.957497 0.000000 15687 +pagefeminist 0 1 6.957497 0.000000 15688 +onlineultim 0 1 6.957497 0.000000 15689 +frisbeefun 0 1 6.957497 0.000000 15690 +stufffroggi 0 1 6.957497 0.000000 15691 +quotesbrad 0 1 6.957497 0.000000 15692 +musicevan 0 1 6.957497 0.000000 15693 +jokes 0 1 6.957497 0.000000 15694 +pagesmi 0 1 6.957497 0.000000 15695 +herojpow 0 1 6.957497 0.000000 15696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..2138089a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +seattl 0 120 2.079442 0.000000 103 +homepag 0 93 2.397895 0.000000 148 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +jonathan 1 13 4.382027 4.382027 1174 +ahoi 0 3 5.857933 0.000000 3532 +shake 1 2 6.263398 6.263398 5898 +finderresumlinkslast 0 1 6.957497 0.000000 15697 +jshake 0 1 6.957497 0.000000 15698 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..66e8c83d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +juan 1 9 4.753590 4.753590 1580 +alemanyjuan 1 1 6.957497 6.957497 15699 +alemani 1 1 6.957497 6.957497 15700 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..51a63300 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +report 0 131 2.079442 0.000000 92 +seattl 0 120 2.079442 0.000000 103 +find 0 111 2.197225 0.000000 111 +mathemat 0 108 2.197225 0.000000 123 +topic 0 114 2.197225 0.000000 110 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +sieg 0 69 2.708050 0.000000 260 +function 0 62 2.772589 0.000000 275 +understand 0 47 3.091042 0.000000 384 +third 0 43 3.178054 0.000000 412 +vision 0 41 3.218876 0.000000 430 +combin 0 42 3.218876 0.000000 421 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +slide 0 38 3.295837 0.000000 467 +microsoft 0 38 3.295837 0.000000 468 +statist 0 35 3.401197 0.000000 521 +taught 0 33 3.433987 0.000000 526 +obtain 0 33 3.433987 0.000000 534 +rang 0 30 3.555348 0.000000 565 +actual 0 28 3.610918 0.000000 604 +pass 0 28 3.610918 0.000000 611 +aspect 0 25 3.737670 0.000000 663 +ofwashington 0 22 3.850148 0.000000 766 +try 0 22 3.850148 0.000000 764 +geometri 0 22 3.850148 0.000000 752 +left 0 19 4.007333 0.000000 851 +steven 0 17 4.110874 0.000000 953 +qual 0 15 4.248495 0.000000 1062 +universityof 0 15 4.248495 0.000000 1061 +reflect 0 15 4.248495 0.000000 1034 +remov 0 12 4.465908 0.000000 1225 +werner 0 10 4.653960 0.000000 1385 +linda 0 10 4.653960 0.000000 1394 +tanimoto 0 10 4.653960 0.000000 1429 +surfac 0 9 4.753590 0.000000 1574 +folk 0 9 4.753590 0.000000 1597 +siggraph 0 8 4.875197 0.000000 1773 +theclass 0 6 5.164786 0.000000 2060 +speaker 0 5 5.347108 0.000000 2370 +engineeringdepart 0 4 5.568345 0.000000 2917 +closest 0 4 5.568345 0.000000 2828 +addition 0 4 5.568345 0.000000 2593 +rick 0 4 5.568345 0.000000 2646 +wavelet 0 4 5.568345 0.000000 2874 +union 0 4 5.568345 0.000000 2634 +kari 0 2 6.263398 0.000000 4500 +andmathemat 0 2 6.263398 0.000000 4948 +tonyderos 0 2 6.263398 0.000000 5839 +stuetzl 0 2 6.263398 0.000000 5840 +duchamp 0 2 6.263398 0.000000 5841 +hopp 0 2 6.263398 0.000000 5092 +sketch 0 2 6.263398 0.000000 5946 +getto 0 2 6.263398 0.000000 5806 +herear 0 2 6.263398 0.000000 5947 +pulli 0 1 6.957497 0.000000 15701 +antero 0 1 6.957497 0.000000 15702 +subdivis 0 1 6.957497 0.000000 15703 +pagekari 0 1 6.957497 0.000000 15704 +pullii 0 1 6.957497 0.000000 15705 +thesedisciplin 0 1 6.957497 0.000000 15706 +uwfor 0 1 6.957497 0.000000 15707 +pixar 0 1 6.957497 0.000000 15708 +lindashapiro 0 1 6.957497 0.000000 15709 +andjohn 0 1 6.957497 0.000000 15710 +mcdonald 0 1 6.957497 0.000000 15711 +andhugu 0 1 6.957497 0.000000 15712 +szeliski 0 1 6.957497 0.000000 15713 +tribor 0 1 6.957497 0.000000 15714 +triplet 0 1 6.957497 0.000000 15715 +recognitionsystem 0 1 6.957497 0.000000 15716 +surfacereconstruct 0 1 6.957497 0.000000 15717 +baselin 0 1 6.957497 0.000000 15718 +camerasystem 0 1 6.957497 0.000000 15719 +waveletanalysi 0 1 6.957497 0.000000 15720 +rigidregistr 0 1 6.957497 0.000000 15721 +architecturesystem 0 1 6.957497 0.000000 15722 +susanegg 0 1 6.957497 0.000000 15723 +brianbershad 0 1 6.957497 0.000000 15724 +eacutesum 0 1 6.957497 0.000000 15725 +eacut 0 1 6.957497 0.000000 15726 +kapu 0 1 6.957497 0.000000 15727 +takavainionti 0 1 6.957497 0.000000 15728 +oulu 0 1 6.957497 0.000000 15729 +finland 0 1 6.957497 0.000000 15730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..dd1e0c65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +professor 0 137 1.945910 0.000000 76 +seattl 0 120 2.079442 0.000000 103 +anna 0 12 4.465908 0.000000 1292 +karlinanna 0 1 6.957497 0.000000 15731 +rochel 0 1 6.957497 0.000000 15732 +karlinassoci 0 1 6.957497 0.000000 15733 +sincejuli 0 1 6.957497 0.000000 15734 +paperskarlin 0 1 6.957497 0.000000 15735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..7d423d46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +email 0 220 1.386294 0.000000 29 +back 0 60 2.833213 0.000000 297 +yeunghom 0 1 6.957497 0.000000 15736 +yeungperson 0 1 6.957497 0.000000 15737 +infomi 0 1 6.957497 0.000000 15738 +picturemi 0 1 6.957497 0.000000 15739 +researchtelnet 0 1 6.957497 0.000000 15740 +machinessend 0 1 6.957497 0.000000 15741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..9d9e898c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +area 0 144 1.945910 0.000000 80 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +seattl 0 120 2.079442 0.000000 103 +specif 0 106 2.197225 0.000000 106 +user 0 104 2.302585 0.000000 137 +environ 0 84 2.484907 0.000000 177 +academ 0 82 2.484907 0.000000 178 +start 0 83 2.484907 0.000000 173 +school 0 84 2.484907 0.000000 188 +method 0 80 2.564949 0.000000 213 +interfac 0 79 2.564949 0.000000 209 +complet 0 77 2.564949 0.000000 208 +html 0 75 2.639057 0.000000 235 +name 0 72 2.639057 0.000000 220 +java 0 70 2.708050 0.000000 248 +interact 0 62 2.772589 0.000000 270 +visual 0 48 3.044522 0.000000 372 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +describ 0 45 3.135494 0.000000 400 +live 0 40 3.258097 0.000000 451 +formal 0 37 3.332205 0.000000 478 +human 0 32 3.465736 0.000000 546 +manipul 0 27 3.637586 0.000000 624 +berkelei 0 26 3.688879 0.000000 657 +background 0 25 3.737670 0.000000 664 +other 0 24 3.761200 0.000000 697 +love 0 21 3.912023 0.000000 804 +voic 0 21 3.912023 0.000000 806 +safeti 0 20 3.951244 0.000000 817 +binari 0 20 3.951244 0.000000 823 +qualiti 0 20 3.951244 0.000000 832 +critic 0 16 4.174387 0.000000 982 +nasa 0 13 4.382027 0.000000 1188 +readabl 0 12 4.465908 0.000000 1258 +nanci 0 12 4.465908 0.000000 1256 +kurt 1 9 4.753590 4.753590 1548 +leveson 0 9 4.753590 0.000000 1540 +sister 0 9 4.753590 0.000000 1524 +wayn 0 8 4.875197 0.000000 1738 +poster 0 7 5.010635 0.000000 1814 +usabl 0 7 5.010635 0.000000 1810 +corner 0 7 5.010635 0.000000 1909 +vivek 0 6 5.164786 0.000000 2210 +parent 0 6 5.164786 0.000000 2204 +ohlrich 0 5 5.347108 0.000000 2564 +humor 0 5 5.347108 0.000000 2533 +partridg 0 3 5.857933 0.000000 3346 +dabbl 0 3 5.857933 0.000000 3971 +preview 0 3 5.857933 0.000000 3306 +bauer 0 2 6.263398 0.000000 5117 +mat 0 2 6.263398 0.000000 5942 +heimdahl 0 2 6.263398 0.000000 5940 +ratan 0 2 6.263398 0.000000 5948 +rees 0 2 6.263398 0.000000 5939 +thousand 0 2 6.263398 0.000000 5949 +oak 0 2 6.263398 0.000000 5566 +kepart 0 2 6.263398 0.000000 4459 +bddtcl 0 1 6.957497 0.000000 15742 +decisiondiagram 0 1 6.957497 0.000000 15743 +suburban 0 1 6.957497 0.000000 15744 +oti 0 1 6.957497 0.000000 15745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..59940977 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +stuff 0 87 2.484907 0.000000 171 +complet 0 77 2.564949 0.000000 208 +advisor 0 51 2.995732 0.000000 355 +friend 0 48 3.044522 0.000000 376 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +collabor 0 32 3.465736 0.000000 543 +suit 0 13 4.382027 0.000000 1129 +tour 0 11 4.553877 0.000000 1307 +ofcomput 0 10 4.653960 0.000000 1442 +weld 0 9 4.753590 0.000000 1538 +dictionari 0 8 4.875197 0.000000 1642 +golden 1 7 5.010635 5.010635 1962 +photographi 0 6 5.164786 0.000000 2146 +oren 0 6 5.164786 0.000000 2134 +etzioni 0 6 5.164786 0.000000 2135 +keith 1 5 5.347108 5.347108 2528 +paint 0 5 5.347108 0.000000 2400 +coffe 0 5 5.347108 0.000000 2556 +lawyer 0 4 5.568345 0.000000 2836 +car 0 4 5.568345 0.000000 2931 +bicycl 0 2 6.263398 0.000000 5950 +questa 0 1 6.957497 0.000000 15746 +pagina 0 1 6.957497 0.000000 15747 +anch 0 1 6.957497 0.000000 15748 +italiano 0 1 6.957497 0.000000 15749 +researchsoftbotsplanningkrselect 0 1 6.957497 0.000000 15750 +publicationscurriculum 0 1 6.957497 0.000000 15751 +inpostscriptrandom 0 1 6.957497 0.000000 15752 +hackingwordbot 0 1 6.957497 0.000000 15753 +godless 0 1 6.957497 0.000000 15754 +pinko 0 1 6.957497 0.000000 15755 +dislik 0 1 6.957497 0.000000 15756 +ellenmarcruben 0 1 6.957497 0.000000 15757 +laurennickrich 0 1 6.957497 0.000000 15758 +joannavivek 0 1 6.957497 0.000000 15759 +keithgolden 0 1 6.957497 0.000000 15760 +kgolden 0 1 6.957497 0.000000 15761 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..3de54579 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,144 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +school 0 84 2.484907 0.000000 188 +resum 0 79 2.564949 0.000000 217 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +line 0 75 2.639057 0.000000 231 +workshop 0 71 2.639057 0.000000 239 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +polici 0 64 2.772589 0.000000 279 +new 0 64 2.772589 0.000000 262 +visit 0 63 2.772589 0.000000 288 +content 0 59 2.833213 0.000000 302 +automat 0 61 2.833213 0.000000 306 +march 0 61 2.833213 0.000000 295 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +advisor 0 51 2.995732 0.000000 355 +date 0 51 2.995732 0.000000 344 +format 0 48 3.044522 0.000000 356 +editor 0 41 3.218876 0.000000 433 +movi 0 40 3.258097 0.000000 459 +respons 0 37 3.332205 0.000000 476 +china 0 37 3.332205 0.000000 487 +manual 0 35 3.401197 0.000000 504 +transform 0 32 3.465736 0.000000 542 +dissert 0 32 3.465736 0.000000 549 +specifi 0 30 3.555348 0.000000 568 +quot 0 29 3.583519 0.000000 582 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +revis 0 26 3.688879 0.000000 640 +experiment 0 26 3.688879 0.000000 645 +alwai 0 24 3.761200 0.000000 691 +william 0 22 3.850148 0.000000 765 +wang 0 21 3.912023 0.000000 790 +watch 0 21 3.912023 0.000000 789 +fund 0 21 3.912023 0.000000 805 +qualiti 0 20 3.951244 0.000000 832 +minut 0 20 3.951244 0.000000 810 +citi 0 19 4.007333 0.000000 874 +thoma 0 18 4.060443 0.000000 901 +stock 0 16 4.174387 0.000000 1007 +driven 0 15 4.248495 0.000000 1048 +style 0 15 4.248495 0.000000 1036 +hong 0 14 4.317488 0.000000 1105 +asynchron 0 12 4.465908 0.000000 1229 +readi 0 12 4.465908 0.000000 1242 +evolut 0 11 4.553877 0.000000 1314 +market 0 11 4.553877 0.000000 1361 +kong 0 9 4.753590 0.000000 1602 +mainten 0 9 4.753590 0.000000 1543 +upcom 0 8 4.875197 0.000000 1685 +bridg 0 8 4.875197 0.000000 1764 +pacif 0 8 4.875197 0.000000 1674 +delai 0 7 5.010635 0.000000 1848 +highwai 0 6 5.164786 0.000000 2095 +invest 0 6 5.164786 0.000000 2153 +educomput 0 5 5.347108 0.000000 2524 +semi 0 5 5.347108 0.000000 2510 +these 0 5 5.347108 0.000000 2482 +mutual 0 5 5.347108 0.000000 2418 +commod 0 5 5.347108 0.000000 2415 +ics 0 4 5.568345 0.000000 2779 +chart 0 4 5.568345 0.000000 2653 +chow 0 3 5.857933 0.000000 3281 +notkin 0 3 5.857933 0.000000 3345 +polytechn 0 3 5.857933 0.000000 3222 +usathi 0 2 6.263398 0.000000 5951 +glossari 0 2 6.263398 0.000000 4418 +asia 0 2 6.263398 0.000000 5952 +alumnu 0 2 6.263398 0.000000 5863 +kingsum 0 1 6.957497 0.000000 15762 +pcct 0 1 6.957497 0.000000 15763 +feedbackresearchmi 0 1 6.957497 0.000000 15764 +toolspap 0 1 6.957497 0.000000 15765 +icsm 0 1 6.957497 0.000000 15766 +griswold 0 1 6.957497 0.000000 15767 +sorcererpcct 0 1 6.957497 0.000000 15768 +terrenc 0 1 6.957497 0.000000 15769 +parr 0 1 6.957497 0.000000 15770 +newbiesresumepleasedrop 0 1 6.957497 0.000000 15771 +mailto 0 1 6.957497 0.000000 15772 +kongchines 0 1 6.957497 0.000000 15773 +kongsingapor 0 1 6.957497 0.000000 15774 +sitessingapor 0 1 6.957497 0.000000 15775 +websom 0 1 6.957497 0.000000 15776 +friendstom 0 1 6.957497 0.000000 15777 +liew 0 1 6.957497 0.000000 15778 +fook 0 1 6.957497 0.000000 15779 +jiang 0 1 6.957497 0.000000 15780 +weidongu 0 1 6.957497 0.000000 15781 +relatedunivers 0 1 6.957497 0.000000 15782 +webserv 0 1 6.957497 0.000000 15783 +storeinvestmentsfre 0 1 6.957497 0.000000 15784 +analysismisc 0 1 6.957497 0.000000 15785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..34036255 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +like 0 132 1.945910 0.000000 81 +professor 0 137 1.945910 0.000000 76 +seattl 1 120 2.079442 2.079442 103 +high 0 130 2.079442 0.000000 101 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +teach 0 108 2.197225 0.000000 112 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +build 0 85 2.484907 0.000000 184 +want 0 79 2.564949 0.000000 199 +previou 0 62 2.772589 0.000000 290 +written 0 63 2.772589 0.000000 278 +visit 0 63 2.772589 0.000000 288 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +life 0 50 3.044522 0.000000 375 +archiv 0 49 3.044522 0.000000 364 +get 0 46 3.091042 0.000000 380 +adapt 0 46 3.091042 0.000000 387 +anoth 0 45 3.135494 0.000000 408 +form 0 39 3.258097 0.000000 443 +electr 0 38 3.295837 0.000000 461 +ofth 0 36 3.367296 0.000000 491 +photo 0 31 3.496508 0.000000 561 +rather 0 26 3.688879 0.000000 642 +task 0 25 3.737670 0.000000 678 +ofwashington 0 22 3.850148 0.000000 766 +rout 0 21 3.912023 0.000000 793 +spend 0 19 4.007333 0.000000 850 +speed 0 18 4.060443 0.000000 911 +minim 0 18 4.060443 0.000000 887 +took 0 16 4.174387 0.000000 1010 +doesn 0 15 4.248495 0.000000 1055 +signific 0 13 4.382027 0.000000 1125 +kevin 0 9 4.753590 0.000000 1482 +suitabl 0 9 4.753590 0.000000 1486 +pacif 0 8 4.875197 0.000000 1674 +root 0 8 4.875197 0.000000 1650 +brain 0 8 4.875197 0.000000 1638 +router 0 8 4.875197 0.000000 1772 +spot 0 7 5.010635 0.000000 1894 +explain 0 7 5.010635 0.000000 1816 +multicomput 0 7 5.010635 0.000000 1890 +rock 0 6 5.164786 0.000000 2164 +chaotic 0 5 5.347108 0.000000 2566 +coral 0 5 5.347108 0.000000 2538 +engineeringat 0 5 5.347108 0.000000 2561 +wander 0 4 5.568345 0.000000 2896 +chaoticrout 0 4 5.568345 0.000000 3063 +bold 0 3 5.857933 0.000000 3846 +tenur 0 3 5.857933 0.000000 3801 +researchassoci 0 3 5.857933 0.000000 3664 +nervou 0 2 6.263398 0.000000 5953 +conscious 0 2 6.263398 0.000000 5954 +boldingkwb 0 1 6.957497 0.000000 15786 +juvenil 0 1 6.957497 0.000000 15787 +squirt 0 1 6.957497 0.000000 15788 +hunk 0 1 6.957497 0.000000 15789 +cling 0 1 6.957497 0.000000 15790 +rudimentari 0 1 6.957497 0.000000 15791 +eat 0 1 6.957497 0.000000 15792 +dennett 0 1 6.957497 0.000000 15793 +latencylan 0 1 6.957497 0.000000 15794 +researchha 0 1 6.957497 0.000000 15795 +formass 0 1 6.957497 0.000000 15796 +comethyakutak 0 1 6.957497 0.000000 15797 +moustach 0 1 6.957497 0.000000 15798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..91b2100b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +structur 0 106 2.197225 0.000000 105 +commun 0 95 2.397895 0.000000 157 +sieg 0 69 2.708050 0.000000 260 +room 0 59 2.833213 0.000000 301 +quarter 1 47 3.091042 3.091042 389 +formal 0 37 3.332205 0.000000 478 +short 0 36 3.367296 0.000000 499 +winter 0 36 3.367296 0.000000 500 +richard 0 31 3.496508 0.000000 559 +ladner 0 6 5.164786 0.000000 2062 +ladnerrichard 0 1 6.957497 0.000000 15799 +ladnerprofessor 0 1 6.957497 0.000000 15800 +biographyresearch 0 1 6.957497 0.000000 15801 +studentsteachingcomput 0 1 6.957497 0.000000 15802 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..40f1b60d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +sinc 0 90 2.397895 0.000000 159 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +integr 0 67 2.708050 0.000000 245 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +laboratori 0 63 2.772589 0.000000 292 +function 0 62 2.772589 0.000000 275 +evalu 0 64 2.772589 0.000000 266 +creat 0 63 2.772589 0.000000 277 +room 0 59 2.833213 0.000000 301 +allow 0 53 2.944439 0.000000 333 +small 0 39 3.258097 0.000000 447 +join 0 39 3.258097 0.000000 457 +purpos 0 37 3.332205 0.000000 481 +cost 0 37 3.332205 0.000000 480 +staff 0 36 3.367296 0.000000 490 +ofth 0 36 3.367296 0.000000 491 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +articl 0 33 3.433987 0.000000 530 +focu 0 30 3.555348 0.000000 571 +packag 0 28 3.610918 0.000000 614 +symbol 0 27 3.637586 0.000000 620 +primari 0 25 3.737670 0.000000 669 +ofwashington 0 22 3.850148 0.000000 766 +director 0 22 3.850148 0.000000 767 +chip 0 21 3.912023 0.000000 770 +voic 0 21 3.912023 0.000000 806 +vlsi 0 21 3.912023 0.000000 795 +supervis 0 20 3.951244 0.000000 840 +spars 0 16 4.174387 0.000000 989 +later 0 15 4.248495 0.000000 1043 +driven 0 15 4.248495 0.000000 1048 +larri 1 13 4.382027 4.382027 1142 +calcul 0 12 4.465908 0.000000 1268 +captur 0 12 4.465908 0.000000 1232 +fpga 0 10 4.653960 0.000000 1433 +matric 0 10 4.653960 0.000000 1399 +router 0 8 4.875197 0.000000 1772 +upcom 0 8 4.875197 0.000000 1685 +northwest 0 7 5.010635 0.000000 1973 +densiti 0 7 5.010635 0.000000 1927 +quantum 0 6 5.164786 0.000000 2214 +chemistri 0 5 5.347108 0.000000 2405 +mcmurchi 0 4 5.568345 0.000000 2757 +western 0 4 5.568345 0.000000 3062 +comprehens 0 4 5.568345 0.000000 2745 +andengin 0 4 5.568345 0.000000 3042 +coauthor 0 4 5.568345 0.000000 3064 +tester 0 4 5.568345 0.000000 2754 +triptych 0 4 5.568345 0.000000 3061 +mactest 0 3 5.857933 0.000000 3972 +ofintegr 0 2 6.263398 0.000000 5324 +gaussian 0 2 6.263398 0.000000 4763 +molecul 0 2 6.263398 0.000000 5246 +representationof 0 2 6.263398 0.000000 4119 +andha 0 2 6.263398 0.000000 5955 +mcmurchiedepart 0 1 6.957497 0.000000 15803 +integratedsystem 0 1 6.957497 0.000000 15804 +hework 0 1 6.957497 0.000000 15805 +theconstruct 0 1 6.957497 0.000000 15806 +hamiltonian 0 1 6.957497 0.000000 15807 +coauthorof 0 1 6.957497 0.000000 15808 +meld 0 1 6.957497 0.000000 15809 +abinitio 0 1 6.957497 0.000000 15810 +wirec 0 1 6.957497 0.000000 15811 +aschemat 0 1 6.957497 0.000000 15812 +withschemat 0 1 6.957497 0.000000 15813 +concis 0 1 6.957497 0.000000 15814 +parameteriz 0 1 6.957497 0.000000 15815 +andcommerci 0 1 6.957497 0.000000 15816 +hardwareenviron 0 1 6.957497 0.000000 15817 +andsubsystem 0 1 6.957497 0.000000 15818 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..e1f72c20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +softwar 1 220 1.386294 1.386294 30 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +pleas 0 113 2.197225 0.000000 114 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +search 0 95 2.397895 0.000000 155 +question 0 91 2.397895 0.000000 141 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +interfac 0 79 2.564949 0.000000 209 +come 0 78 2.564949 0.000000 202 +state 0 76 2.564949 0.000000 207 +issu 0 78 2.564949 0.000000 211 +nation 0 74 2.639057 0.000000 240 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +result 0 65 2.772589 0.000000 281 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +copi 0 63 2.772589 0.000000 284 +interact 0 62 2.772589 0.000000 270 +space 0 57 2.890372 0.000000 310 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +publish 0 57 2.890372 0.000000 326 +finger 0 52 2.995732 0.000000 354 +life 0 50 3.044522 0.000000 375 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +favorit 0 44 3.135494 0.000000 410 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +join 0 39 3.258097 0.000000 457 +form 0 39 3.258097 0.000000 443 +late 0 40 3.258097 0.000000 439 +transact 0 39 3.258097 0.000000 438 +field 0 37 3.332205 0.000000 482 +tree 0 36 3.367296 0.000000 492 +committe 0 34 3.401197 0.000000 522 +award 0 34 3.401197 0.000000 523 +toler 0 33 3.433987 0.000000 533 +board 0 33 3.433987 0.000000 528 +human 0 32 3.465736 0.000000 546 +fault 0 32 3.465736 0.000000 547 +express 0 32 3.465736 0.000000 540 +titl 0 31 3.496508 0.000000 556 +specifi 0 30 3.555348 0.000000 568 +produc 0 30 3.555348 0.000000 572 +chair 0 29 3.583519 0.000000 596 +except 0 28 3.610918 0.000000 607 +actual 0 28 3.610918 0.000000 604 +though 0 27 3.637586 0.000000 622 +determin 0 27 3.637586 0.000000 630 +spent 0 25 3.737670 0.000000 676 +concern 0 25 3.737670 0.000000 666 +never 0 25 3.737670 0.000000 671 +wai 0 25 3.737670 0.000000 662 +fellow 0 24 3.761200 0.000000 701 +properti 0 22 3.850148 0.000000 749 +director 0 22 3.850148 0.000000 767 +avoid 0 21 3.912023 0.000000 799 +fact 0 21 3.912023 0.000000 780 +safeti 0 20 3.951244 0.000000 817 +verif 0 20 3.951244 0.000000 826 +citi 0 19 4.007333 0.000000 874 +failur 0 18 4.060443 0.000000 898 +seem 0 18 4.060443 0.000000 899 +behavior 0 18 4.060443 0.000000 881 +engineeringunivers 0 17 4.110874 0.000000 959 +analyz 0 17 4.110874 0.000000 925 +anyth 0 16 4.174387 0.000000 998 +advantag 0 16 4.174387 0.000000 987 +commerci 0 16 4.174387 0.000000 1005 +weslei 0 16 4.174387 0.000000 983 +anywai 0 15 4.248495 0.000000 1047 +contribut 0 15 4.248495 0.000000 1021 +qual 0 15 4.248495 0.000000 1062 +style 0 15 4.248495 0.000000 1036 +train 0 14 4.317488 0.000000 1066 +deriv 0 13 4.382027 0.000000 1145 +washingtonbox 0 13 4.382027 0.000000 1200 +conf 0 13 4.382027 0.000000 1181 +nanci 0 12 4.465908 0.000000 1256 +safe 0 12 4.465908 0.000000 1274 +addison 0 12 4.465908 0.000000 1230 +council 0 11 4.553877 0.000000 1364 +valid 0 11 4.553877 0.000000 1299 +leveson 0 9 4.753590 0.000000 1540 +mode 0 9 4.753590 0.000000 1492 +irvin 0 8 4.875197 0.000000 1660 +matter 0 8 4.875197 0.000000 1627 +claim 0 8 4.875197 0.000000 1664 +elect 0 8 4.875197 0.000000 1771 +analys 0 8 4.875197 0.000000 1666 +perhap 0 8 4.875197 0.000000 1693 +chief 0 7 5.010635 0.000000 1829 +awar 0 7 5.010635 0.000000 1800 +rain 0 6 5.164786 0.000000 2137 +highwai 0 6 5.164786 0.000000 2095 +softwareengin 0 6 5.164786 0.000000 2162 +ucla 0 5 5.347108 0.000000 2502 +lesson 0 5 5.347108 0.000000 2568 +adopt 0 5 5.347108 0.000000 2467 +aircraft 0 4 5.568345 0.000000 2872 +melbourn 0 4 5.568345 0.000000 3035 +rsml 0 3 5.857933 0.000000 3967 +loss 0 3 5.857933 0.000000 3805 +automobil 0 3 5.857933 0.000000 3709 +aerospac 0 3 5.857933 0.000000 3555 +hazard 0 3 5.857933 0.000000 3191 +tca 0 2 6.263398 0.000000 5941 +collis 0 2 6.263398 0.000000 5956 +nobodi 0 2 6.263398 0.000000 5474 +thatyou 0 2 6.263398 0.000000 4682 +computingresearch 0 2 6.263398 0.000000 5957 +shuttl 0 2 6.263398 0.000000 4787 +aiaa 0 2 6.263398 0.000000 5239 +aeronaut 0 2 6.263398 0.000000 5958 +andscienc 0 2 6.263398 0.000000 5796 +safewar 0 2 6.263398 0.000000 5959 +isalso 0 2 6.263398 0.000000 5640 +pressur 0 2 6.263398 0.000000 5960 +accid 0 2 6.263398 0.000000 5961 +airport 0 2 6.263398 0.000000 5962 +levesondepart 0 1 6.957497 0.000000 15819 +mathand 0 1 6.957497 0.000000 15820 +misanthrop 0 1 6.957497 0.000000 15821 +aform 0 1 6.957497 0.000000 15822 +airspac 0 1 6.957497 0.000000 15823 +theiroffici 0 1 6.957497 0.000000 15824 +safetyresearch 0 1 6.957497 0.000000 15825 +subtop 0 1 6.957497 0.000000 15826 +commissionon 0 1 6.957497 0.000000 15827 +levesoni 0 1 6.957497 0.000000 15828 +systemsaward 0 1 6.957497 0.000000 15829 +promotingrespons 0 1 6.957497 0.000000 15830 +propertyar 0 1 6.957497 0.000000 15831 +stake 0 1 6.957497 0.000000 15832 +keynoteaddress 0 1 6.957497 0.000000 15833 +steam 0 1 6.957497 0.000000 15834 +hazardanalysi 0 1 6.957497 0.000000 15835 +writtenin 0 1 6.957497 0.000000 15836 +newrequir 0 1 6.957497 0.000000 15837 +cockpit 0 1 6.957497 0.000000 15838 +problemsand 0 1 6.957497 0.000000 15839 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..299a229c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +octob 0 89 2.397895 0.000000 156 +select 0 91 2.397895 0.000000 154 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +novemb 0 81 2.484907 0.000000 179 +academ 0 82 2.484907 0.000000 178 +help 0 83 2.484907 0.000000 175 +control 0 82 2.484907 0.000000 164 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +integr 0 67 2.708050 0.000000 245 +plai 0 60 2.833213 0.000000 307 +share 0 59 2.833213 0.000000 304 +space 0 57 2.890372 0.000000 310 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +sampl 0 53 2.944439 0.000000 339 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +principl 0 48 3.044522 0.000000 357 +numer 0 49 3.044522 0.000000 369 +execut 0 45 3.135494 0.000000 404 +join 0 39 3.258097 0.000000 457 +author 0 39 3.258097 0.000000 450 +annual 0 40 3.258097 0.000000 458 +transact 0 39 3.258097 0.000000 438 +workstat 0 37 3.332205 0.000000 479 +michael 0 35 3.401197 0.000000 514 +singl 0 34 3.401197 0.000000 510 +global 0 34 3.401197 0.000000 520 +award 0 34 3.401197 0.000000 523 +posit 0 31 3.496508 0.000000 552 +focu 0 30 3.555348 0.000000 571 +produc 0 30 3.555348 0.000000 572 +rang 0 30 3.555348 0.000000 565 +chair 0 29 3.583519 0.000000 596 +held 0 28 3.610918 0.000000 600 +usual 0 28 3.610918 0.000000 608 +cluster 0 28 3.610918 0.000000 612 +except 0 28 3.610918 0.000000 607 +team 0 27 3.637586 0.000000 625 +proc 0 26 3.688879 0.000000 649 +consult 0 24 3.761200 0.000000 687 +fellow 0 24 3.761200 0.000000 701 +lab 0 24 3.761200 0.000000 698 +handl 0 24 3.761200 0.000000 685 +lead 0 23 3.806662 0.000000 718 +deal 0 22 3.850148 0.000000 736 +william 0 22 3.850148 0.000000 765 +corpor 0 21 3.912023 0.000000 802 +chip 0 21 3.912023 0.000000 770 +binari 0 20 3.951244 0.000000 823 +supervis 0 20 3.951244 0.000000 840 +tenni 0 20 3.951244 0.000000 838 +exploit 0 20 3.951244 0.000000 836 +particularli 0 19 4.007333 0.000000 867 +separ 0 19 4.007333 0.000000 844 +four 0 18 4.060443 0.000000 905 +asplo 0 17 4.110874 0.000000 948 +former 0 17 4.110874 0.000000 956 +protect 0 17 4.110874 0.000000 935 +latenc 0 16 4.174387 0.000000 993 +choic 0 16 4.174387 0.000000 979 +transfer 0 16 4.174387 0.000000 967 +susan 0 15 4.248495 0.000000 1050 +levi 0 14 4.317488 0.000000 1093 +dean 0 14 4.317488 0.000000 1104 +coher 0 14 4.317488 0.000000 1109 +karlin 0 13 4.382027 0.000000 1176 +conf 0 13 4.382027 0.000000 1181 +mellon 0 13 4.382027 0.000000 1179 +hank 0 12 4.465908 0.000000 1253 +carnegi 0 12 4.465908 0.000000 1260 +anna 0 12 4.465908 0.000000 1292 +multithread 0 11 4.553877 0.000000 1315 +thedepart 0 11 4.553877 0.000000 1350 +henri 0 10 4.653960 0.000000 1417 +equip 0 10 4.653960 0.000000 1459 +ski 0 10 4.653960 0.000000 1471 +bike 0 10 4.653960 0.000000 1468 +death 0 10 4.653960 0.000000 1457 +jeffrei 0 9 4.753590 0.000000 1612 +softbal 0 9 4.753590 0.000000 1594 +voelker 0 9 4.753590 0.000000 1557 +vernon 0 9 4.753590 0.000000 1556 +morgan 0 9 4.753590 0.000000 1484 +egger 0 8 4.875197 0.000000 1695 +sigop 0 8 4.875197 0.000000 1727 +hold 0 8 4.875197 0.000000 1645 +span 0 8 4.875197 0.000000 1751 +evan 0 8 4.875197 0.000000 1633 +inproceed 0 8 4.875197 0.000000 1670 +jack 0 8 4.875197 0.000000 1780 +feelei 0 7 5.010635 0.000000 1859 +instrument 0 7 5.010635 0.000000 1954 +smile 0 7 5.010635 0.000000 1807 +maxim 0 7 5.010635 0.000000 1944 +simultan 0 6 5.164786 0.000000 2155 +tullsen 0 6 5.164786 0.000000 2081 +outstand 0 6 5.164786 0.000000 2136 +onoper 0 6 5.164786 0.000000 2048 +tobe 0 6 5.164786 0.000000 1995 +scholar 0 6 5.164786 0.000000 2180 +nine 0 6 5.164786 0.000000 2047 +rebecca 0 6 5.164786 0.000000 2174 +vivek 0 6 5.164786 0.000000 2210 +theth 0 5 5.347108 0.000000 2325 +seventh 0 5 5.347108 0.000000 2464 +fetch 0 5 5.347108 0.000000 2567 +chase 0 4 5.568345 0.000000 2897 +lazowska 0 4 5.568345 0.000000 2694 +arch 0 4 5.568345 0.000000 2995 +prog 0 4 5.568345 0.000000 2740 +opal 0 4 5.568345 0.000000 3057 +fulbright 0 4 5.568345 0.000000 2963 +escap 0 4 5.568345 0.000000 3016 +pighin 0 4 5.568345 0.000000 2735 +narasayya 0 4 5.568345 0.000000 3065 +thekkath 0 3 5.857933 0.000000 3973 +recipi 0 3 5.857933 0.000000 3627 +eleven 0 3 5.857933 0.000000 3824 +freder 0 3 5.857933 0.000000 3352 +emer 0 3 5.857933 0.000000 3969 +stamm 0 3 5.857933 0.000000 3970 +dessert 0 2 6.263398 0.000000 5194 +projecti 0 2 6.263398 0.000000 5963 +befound 0 2 6.263398 0.000000 5964 +infam 0 2 6.263398 0.000000 5859 +subpag 0 2 6.263398 0.000000 5926 +jamrozik 0 2 6.263398 0.000000 5925 +chandramohan 0 2 6.263398 0.000000 5965 +projectcal 0 1 6.957497 0.000000 15840 +theetch 0 1 6.957497 0.000000 15841 +consecutiveacm 0 1 6.957497 0.000000 15842 +symposia 0 1 6.957497 0.000000 15843 +universityand 0 1 6.957497 0.000000 15844 +machineryand 0 1 6.957497 0.000000 15845 +survivedlevi 0 1 6.957497 0.000000 15846 +haveal 0 1 6.957497 0.000000 15847 +glu 0 1 6.957497 0.000000 15848 +potato 0 1 6.957497 0.000000 15849 +parlor 0 1 6.957497 0.000000 15850 +publicationsreduc 0 1 6.957497 0.000000 15851 +implementablesimultan 0 1 6.957497 0.000000 15852 +joen 0 1 6.957497 0.000000 15853 +edwardd 0 1 6.957497 0.000000 15854 +recover 0 1 6.957497 0.000000 15855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..779acb0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +provid 0 121 2.079442 0.000000 94 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +larg 0 82 2.484907 0.000000 168 +chang 0 82 2.484907 0.000000 163 +member 0 84 2.484907 0.000000 165 +dynam 0 76 2.564949 0.000000 194 +sourc 0 77 2.564949 0.000000 201 +differ 0 66 2.708050 0.000000 253 +complex 0 64 2.772589 0.000000 269 +virtual 0 62 2.772589 0.000000 285 +result 0 65 2.772589 0.000000 281 +organ 0 65 2.772589 0.000000 265 +share 0 59 2.833213 0.000000 304 +space 0 57 2.890372 0.000000 310 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +much 0 52 2.995732 0.000000 349 +right 0 48 3.044522 0.000000 363 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +execut 0 45 3.135494 0.000000 404 +anoth 0 45 3.135494 0.000000 408 +prototyp 0 38 3.295837 0.000000 463 +ofth 0 36 3.367296 0.000000 491 +singl 0 34 3.401197 0.000000 510 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +storag 0 31 3.496508 0.000000 553 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +depend 0 29 3.583519 0.000000 583 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +manipul 0 27 3.637586 0.000000 624 +enhanc 0 26 3.688879 0.000000 644 +jeff 0 25 3.737670 0.000000 673 +interpret 0 24 3.761200 0.000000 686 +mike 0 24 3.761200 0.000000 703 +thread 0 23 3.806662 0.000000 722 +cooper 0 22 3.850148 0.000000 757 +defin 0 22 3.850148 0.000000 746 +thu 0 21 3.912023 0.000000 773 +flexibl 0 21 3.912023 0.000000 792 +protect 0 17 4.110874 0.000000 935 +permit 0 16 4.174387 0.000000 962 +easili 0 14 4.317488 0.000000 1077 +levi 0 14 4.317488 0.000000 1093 +directli 0 13 4.382027 0.000000 1141 +translat 0 13 4.382027 0.000000 1164 +believ 0 13 4.382027 0.000000 1187 +uniqu 0 12 4.465908 0.000000 1228 +hank 0 12 4.465908 0.000000 1253 +alpha 0 11 4.553877 0.000000 1348 +persist 0 11 4.553877 0.000000 1367 +trust 0 9 4.753590 0.000000 1583 +parti 0 8 4.875197 0.000000 1676 +mach 0 8 4.875197 0.000000 1669 +dylan 0 8 4.875197 0.000000 1625 +secondari 0 7 5.010635 0.000000 1884 +feelei 0 7 5.010635 0.000000 1859 +huge 0 6 5.164786 0.000000 1991 +bestor 0 6 5.164786 0.000000 2099 +risc 0 6 5.164786 0.000000 2016 +duke 0 6 5.164786 0.000000 2231 +vivek 0 6 5.164786 0.000000 2210 +tiwari 0 5 5.347108 0.000000 2385 +opal 1 4 5.568345 5.568345 3057 +simplifi 0 4 5.568345 0.000000 3066 +mip 0 4 5.568345 0.000000 2738 +transmit 0 4 5.568345 0.000000 2835 +lazowska 0 4 5.568345 0.000000 2694 +chase 0 4 5.568345 0.000000 2897 +narasayya 0 4 5.568345 0.000000 3065 +databaseof 0 2 6.263398 0.000000 4696 +ashutosh 0 2 6.263398 0.000000 5966 +mcname 0 2 6.263398 0.000000 5875 +projectop 0 1 6.957497 0.000000 15856 +tunedto 0 1 6.957497 0.000000 15857 +numberof 0 1 6.957497 0.000000 15858 +andcooper 0 1 6.957497 0.000000 15859 +directlycommun 0 1 6.957497 0.000000 15860 +addressspac 0 1 6.957497 0.000000 15861 +domainthat 0 1 6.957497 0.000000 15862 +oneprocess 0 1 6.957497 0.000000 15863 +protectionstructur 0 1 6.957497 0.000000 15864 +relationshipbetween 0 1 6.957497 0.000000 15865 +canimprov 0 1 6.957497 0.000000 15866 +cooperatingappl 0 1 6.957497 0.000000 15867 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..d6b6405e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +object 0 138 1.945910 0.000000 79 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +octob 0 89 2.397895 0.000000 156 +school 0 84 2.484907 0.000000 188 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +meet 0 72 2.639057 0.000000 229 +sieg 0 69 2.708050 0.000000 260 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +virtual 0 62 2.772589 0.000000 285 +previou 0 62 2.772589 0.000000 290 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +direct 0 57 2.890372 0.000000 316 +cover 0 55 2.944439 0.000000 329 +archiv 0 49 3.044522 0.000000 364 +electron 0 47 3.091042 0.000000 379 +california 0 46 3.091042 0.000000 388 +editor 0 41 3.218876 0.000000 433 +past 0 42 3.218876 0.000000 428 +tutori 1 39 3.258097 3.258097 437 +submit 0 39 3.258097 0.000000 440 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +industri 0 38 3.295837 0.000000 464 +respons 0 37 3.332205 0.000000 476 +curriculum 0 33 3.433987 0.000000 535 +chair 0 29 3.583519 0.000000 596 +consid 0 29 3.583519 0.000000 590 +propos 0 28 3.610918 0.000000 602 +constraint 0 26 3.688879 0.000000 636 +request 0 26 3.688879 0.000000 635 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +known 0 24 3.761200 0.000000 702 +lead 0 23 3.806662 0.000000 718 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +born 0 21 3.912023 0.000000 798 +qualiti 0 20 3.951244 0.000000 832 +accept 0 18 4.060443 0.000000 879 +encourag 0 18 4.060443 0.000000 880 +anyon 0 17 4.110874 0.000000 916 +jose 0 16 4.174387 0.000000 976 +alan 0 13 4.382027 0.000000 1146 +speak 0 12 4.465908 0.000000 1283 +readi 0 12 4.465908 0.000000 1242 +submiss 0 11 4.553877 0.000000 1298 +itali 0 11 4.553877 0.000000 1378 +inproceed 0 8 4.875197 0.000000 1670 +european 0 8 4.875197 0.000000 1763 +upcom 0 8 4.875197 0.000000 1685 +depth 0 8 4.875197 0.000000 1636 +edg 0 8 4.875197 0.000000 1647 +star 0 8 4.875197 0.000000 1717 +portland 0 7 5.010635 0.000000 1878 +oopsla 0 6 5.164786 0.000000 2221 +oregon 0 5 5.347108 0.000000 2437 +imper 0 4 5.568345 0.000000 3067 +freeman 0 4 5.568345 0.000000 2725 +ident 0 4 5.568345 0.000000 2826 +andsoftwar 0 4 5.568345 0.000000 2753 +breadth 0 4 5.568345 0.000000 2695 +green 0 4 5.568345 0.000000 2848 +nato 0 3 5.857933 0.000000 3587 +kaleidoscop 0 2 6.263398 0.000000 5780 +bologna 0 2 6.263398 0.000000 5631 +programmingsystem 0 2 6.263398 0.000000 5688 +hotlin 0 2 6.263398 0.000000 5967 +hendrix 0 2 6.263398 0.000000 5490 +grave 0 2 6.263398 0.000000 5968 +war 0 2 6.263398 0.000000 5969 +collector 0 2 6.263398 0.000000 5683 +lopez 0 1 6.957497 0.000000 15868 +bjorn 0 1 6.957497 0.000000 15869 +benson 0 1 6.957497 0.000000 15870 +lopezgu 0 1 6.957497 0.000000 15871 +lopezlopez 0 1 6.957497 0.000000 15872 +dissertationresearch 0 1 6.957497 0.000000 15873 +publicationsgu 0 1 6.957497 0.000000 15874 +mayoh 0 1 6.957497 0.000000 15875 +tougu 0 1 6.957497 0.000000 15876 +jann 0 1 6.957497 0.000000 15877 +penjam 0 1 6.957497 0.000000 15878 +constraintprogram 0 1 6.957497 0.000000 15879 +instituteseri 0 1 6.957497 0.000000 15880 +publisheda 0 1 6.957497 0.000000 15881 +tutorialsi 0 1 6.957497 0.000000 15882 +conferencein 0 1 6.957497 0.000000 15883 +itsextens 0 1 6.957497 0.000000 15884 +tutorialshav 0 1 6.957497 0.000000 15885 +introductorysurvei 0 1 6.957497 0.000000 15886 +academicresearch 0 1 6.957497 0.000000 15887 +attende 0 1 6.957497 0.000000 15888 +weespeci 0 1 6.957497 0.000000 15889 +requestguidelin 0 1 6.957497 0.000000 15890 +theoopsla 0 1 6.957497 0.000000 15891 +enthusiast 0 1 6.957497 0.000000 15892 +proposalswithout 0 1 6.957497 0.000000 15893 +notif 0 1 6.957497 0.000000 15894 +withcamera 0 1 6.957497 0.000000 15895 +jimi 0 1 6.957497 0.000000 15896 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..ca25e1d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +look 0 107 2.197225 0.000000 115 +graphic 0 90 2.397895 0.000000 147 +academ 0 82 2.484907 0.000000 178 +want 0 79 2.564949 0.000000 199 +best 0 59 2.833213 0.000000 299 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +keep 0 44 3.135494 0.000000 409 +citi 0 19 4.007333 0.000000 874 +chateau 0 16 4.174387 0.000000 997 +fourth 0 16 4.174387 0.000000 999 +countri 0 15 4.248495 0.000000 1059 +suit 0 13 4.382027 0.000000 1129 +touch 0 12 4.465908 0.000000 1288 +curiou 0 5 5.347108 0.000000 2541 +areasinclud 0 2 6.263398 0.000000 5747 +omid 1 1 6.957497 6.957497 15897 +madani 1 1 6.957497 6.957497 15898 +bhello 0 1 6.957497 0.000000 15899 +enjoytheori 0 1 6.957497 0.000000 15900 +islamicarchitectur 0 1 6.957497 0.000000 15901 +isfahan 0 1 6.957497 0.000000 15902 +nomine 0 1 6.957497 0.000000 15903 +iran 0 1 6.957497 0.000000 15904 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..32ab57bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +washington 0 236 1.386294 0.000000 32 +area 0 144 1.945910 0.000000 80 +written 0 63 2.772589 0.000000 278 +music 0 42 3.218876 0.000000 436 +mike 1 24 3.761200 3.761200 703 +goe 0 15 4.248495 0.000000 1044 +creativ 0 8 4.875197 0.000000 1777 +academia 0 6 5.164786 0.000000 2036 +perkowitz 0 2 6.263398 0.000000 5970 +perkowitznewsflash 0 1 6.957497 0.000000 15905 +blond 0 1 6.957497 0.000000 15906 +randomfavorit 0 1 6.957497 0.000000 15907 +sheba 0 1 6.957497 0.000000 15908 +voyeur 0 1 6.957497 0.000000 15909 +grooveneedl 0 1 6.957497 0.000000 15910 +espressoresumemik 0 1 6.957497 0.000000 15911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..cb16fd62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +access 0 102 2.302585 0.000000 136 +follow 0 92 2.397895 0.000000 143 +resum 0 79 2.564949 0.000000 217 +sieg 0 69 2.708050 0.000000 260 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +back 0 60 2.833213 0.000000 297 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +http 0 41 3.218876 0.000000 420 +short 0 36 3.367296 0.000000 499 +spent 0 25 3.737670 0.000000 676 +finish 0 22 3.850148 0.000000 748 +theunivers 0 21 3.912023 0.000000 797 +germani 0 17 4.110874 0.000000 946 +marc 0 8 4.875197 0.000000 1680 +german 0 6 5.164786 0.000000 2190 +langheinrich 0 1 6.957497 0.000000 15912 +bielefeld 0 1 6.957497 0.000000 15913 +marclang 0 1 6.957497 0.000000 15914 +homepagemarc 0 1 6.957497 0.000000 15915 +langheinrichuniversitt 0 1 6.957497 0.000000 15916 +washingtontechnisch 0 1 6.957497 0.000000 15917 +fakultt 0 1 6.957497 0.000000 15918 +scienceemail 0 1 6.957497 0.000000 15919 +imlangh 0 1 6.957497 0.000000 15920 +techfak 0 1 6.957497 0.000000 15921 +eduabout 0 1 6.957497 0.000000 15922 +myselfi 0 1 6.957497 0.000000 15923 +thefulbright 0 1 6.957497 0.000000 15924 +depthinform 0 1 6.957497 0.000000 15925 +biopost 0 1 6.957497 0.000000 15926 +addressa 0 1 6.957497 0.000000 15927 +mastersat 0 1 6.957497 0.000000 15928 +homeschoolgermanyringstra 0 1 6.957497 0.000000 15929 +maintalphon 0 1 6.957497 0.000000 15930 +paulusplatz 0 1 6.957497 0.000000 15931 +bielefeldphon 0 1 6.957497 0.000000 15932 +woodlawn 0 1 6.957497 0.000000 15933 +formatmarc 0 1 6.957497 0.000000 15934 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..e0a2f102 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +applic 0 170 1.791759 0.000000 56 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +check 0 115 2.197225 0.000000 118 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +graphic 0 90 2.397895 0.000000 147 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +imag 0 91 2.397895 0.000000 161 +educ 0 86 2.484907 0.000000 191 +journal 0 83 2.484907 0.000000 183 +larg 0 82 2.484907 0.000000 168 +academ 0 82 2.484907 0.000000 178 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +interfac 0 79 2.564949 0.000000 209 +resum 0 79 2.564949 0.000000 217 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +sieg 0 69 2.708050 0.000000 260 +interact 0 62 2.772589 0.000000 270 +septemb 0 65 2.772589 0.000000 274 +share 0 59 2.833213 0.000000 304 +march 0 61 2.833213 0.000000 295 +explor 0 58 2.890372 0.000000 324 +space 0 57 2.890372 0.000000 310 +special 0 56 2.890372 0.000000 320 +investig 0 51 2.995732 0.000000 353 +visual 1 48 3.044522 3.044522 372 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +tutori 0 39 3.258097 0.000000 437 +collabor 0 32 3.465736 0.000000 543 +human 0 32 3.465736 0.000000 546 +given 0 32 3.465736 0.000000 538 +transform 0 32 3.465736 0.000000 542 +scale 0 28 3.610918 0.000000 613 +manipul 0 27 3.637586 0.000000 624 +wai 0 25 3.737670 0.000000 662 +displai 0 23 3.806662 0.000000 712 +cooper 0 22 3.850148 0.000000 757 +geometr 0 19 4.007333 0.000000 852 +hierarch 0 15 4.248495 0.000000 1018 +stephen 0 11 4.553877 0.000000 1342 +fill 0 11 4.553877 0.000000 1349 +itali 0 11 4.553877 0.000000 1378 +packard 0 10 4.653960 0.000000 1444 +classif 0 9 4.753590 0.000000 1586 +partner 0 8 4.875197 0.000000 1648 +yang 0 8 4.875197 0.000000 1652 +baker 0 7 5.010635 0.000000 1812 +chief 0 7 5.010635 0.000000 1829 +bell 0 6 5.164786 0.000000 2224 +carlson 0 5 5.347108 0.000000 2351 +patent 0 5 5.347108 0.000000 2574 +bricker 0 4 5.568345 0.000000 3050 +assess 0 4 5.568345 0.000000 2724 +lauren 0 3 5.857933 0.000000 3251 +metip 0 3 5.857933 0.000000 3937 +marla 0 2 6.263398 0.000000 4510 +eick 0 2 6.263398 0.000000 5971 +burnett 0 2 6.263398 0.000000 4578 +crime 0 2 6.263398 0.000000 5972 +cscl 0 2 6.263398 0.000000 5837 +stevetanimoto 0 2 6.263398 0.000000 5835 +bentlei 0 1 6.957497 0.000000 15935 +interestsgraph 0 1 6.957497 0.000000 15936 +coimag 0 1 6.957497 0.000000 15937 +devleop 0 1 6.957497 0.000000 15938 +contol 0 1 6.957497 0.000000 15939 +cansimultan 0 1 6.957497 0.000000 15940 +publicationsbak 0 1 6.957497 0.000000 15941 +bohu 0 1 6.957497 0.000000 15942 +margaret 0 1 6.957497 0.000000 15943 +sorento 0 1 6.957497 0.000000 15944 +apparatu 0 1 6.957497 0.000000 15945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..8d134c56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +phone 0 175 1.791759 0.000000 45 +implement 0 152 1.791759 0.000000 52 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +compil 1 122 2.079442 2.079442 96 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +stuff 0 87 2.484907 0.000000 171 +dynam 0 76 2.564949 0.000000 194 +optim 0 79 2.564949 0.000000 197 +good 0 77 2.564949 0.000000 200 +workshop 0 71 2.639057 0.000000 239 +java 0 70 2.708050 0.000000 248 +plai 0 60 2.833213 0.000000 307 +think 0 57 2.890372 0.000000 314 +browser 0 56 2.890372 0.000000 313 +processor 0 54 2.944439 0.000000 335 +extens 0 53 2.944439 0.000000 340 +februari 0 54 2.944439 0.000000 328 +local 0 55 2.944439 0.000000 334 +effect 0 46 3.091042 0.000000 385 +fast 0 42 3.218876 0.000000 429 +past 0 42 3.218876 0.000000 428 +produc 0 30 3.555348 0.000000 572 +constraint 0 26 3.688879 0.000000 636 +bookmark 0 26 3.688879 0.000000 639 +interpret 0 24 3.761200 0.000000 686 +runtim 0 19 4.007333 0.000000 858 +figur 0 18 4.060443 0.000000 903 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +modern 0 16 4.174387 0.000000 966 +side 0 15 4.248495 0.000000 1022 +goe 0 15 4.248495 0.000000 1044 +susan 0 15 4.248495 0.000000 1050 +black 0 10 4.653960 0.000000 1418 +chamber 0 8 4.875197 0.000000 1692 +egger 0 8 4.875197 0.000000 1695 +wire 0 8 4.875197 0.000000 1747 +craig 0 7 5.010635 0.000000 1879 +dispatch 0 7 5.010635 0.000000 1791 +mock 0 6 5.164786 0.000000 2087 +blue 0 6 5.164786 0.000000 2227 +philipos 0 5 5.347108 0.000000 2373 +asystem 0 4 5.568345 0.000000 2612 +andp 0 4 5.568345 0.000000 2811 +pardyak 0 4 5.568345 0.000000 3043 +ausland 0 3 5.857933 0.000000 3917 +matthai 0 2 6.263398 0.000000 4514 +withprofessor 0 2 6.263398 0.000000 5180 +eggersand 0 2 6.263398 0.000000 4522 +ribbon 0 2 6.263398 0.000000 5973 +compileri 0 1 6.957497 0.000000 15946 +beast 0 1 6.957497 0.000000 15947 +shortterm 0 1 6.957497 0.000000 15948 +basedsystem 0 1 6.957497 0.000000 15949 +canbenefit 0 1 6.957497 0.000000 15950 +onprogram 0 1 6.957497 0.000000 15951 +automaticdynam 0 1 6.957497 0.000000 15952 +frequentlymiscellan 0 1 6.957497 0.000000 15953 +importancefrom 0 1 6.957497 0.000000 15954 +abuwhi 0 1 6.957497 0.000000 15955 +campaign 0 1 6.957497 0.000000 15956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..796507ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +contact 0 153 1.791759 0.000000 59 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +proceed 0 93 2.397895 0.000000 152 +pictur 0 89 2.397895 0.000000 160 +octob 0 89 2.397895 0.000000 156 +contain 0 81 2.484907 0.000000 174 +messag 0 76 2.564949 0.000000 212 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +workshop 0 71 2.639057 0.000000 239 +laboratori 0 63 2.772589 0.000000 292 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +index 0 56 2.890372 0.000000 309 +faculti 0 56 2.890372 0.000000 325 +summer 0 56 2.890372 0.000000 311 +advisor 0 51 2.995732 0.000000 355 +maintain 0 51 2.995732 0.000000 342 +adapt 0 46 3.091042 0.000000 387 +answer 0 45 3.135494 0.000000 391 +past 0 42 3.218876 0.000000 428 +futur 0 41 3.218876 0.000000 427 +live 0 40 3.258097 0.000000 451 +electr 0 38 3.295837 0.000000 461 +industri 0 38 3.295837 0.000000 464 +game 0 36 3.367296 0.000000 498 +dissert 0 32 3.465736 0.000000 549 +graph 0 30 3.555348 0.000000 576 +produc 0 30 3.555348 0.000000 572 +pass 0 28 3.610918 0.000000 611 +bookmark 0 26 3.688879 0.000000 639 +head 0 23 3.806662 0.000000 732 +rout 0 21 3.912023 0.000000 793 +chip 0 21 3.912023 0.000000 770 +hous 0 21 3.912023 0.000000 801 +fine 0 20 3.951244 0.000000 822 +render 0 17 4.110874 0.000000 947 +medic 0 17 4.110874 0.000000 958 +cambridg 0 16 4.174387 0.000000 1008 +carl 0 15 4.248495 0.000000 1024 +countri 0 15 4.248495 0.000000 1059 +princeton 0 15 4.248495 0.000000 1042 +floor 0 14 4.317488 0.000000 1070 +massachusett 0 14 4.317488 0.000000 1118 +larri 0 13 4.382027 0.000000 1142 +menu 0 13 4.382027 0.000000 1156 +canada 0 13 4.382027 0.000000 1158 +speak 0 12 4.465908 0.000000 1283 +volum 0 11 4.553877 0.000000 1347 +mesh 0 11 4.553877 0.000000 1351 +packet 0 10 4.653960 0.000000 1415 +label 0 10 4.653960 0.000000 1423 +purdu 0 10 4.653960 0.000000 1466 +coast 0 8 4.875197 0.000000 1746 +angel 0 8 4.875197 0.000000 1779 +mile 0 8 4.875197 0.000000 1743 +creativ 0 8 4.875197 0.000000 1777 +virginia 0 8 4.875197 0.000000 1659 +shot 0 7 5.010635 0.000000 1898 +marri 0 7 5.010635 0.000000 1946 +adob 0 7 5.010635 0.000000 1873 +layout 0 6 5.164786 0.000000 2183 +east 0 5 5.347108 0.000000 2472 +chaotic 0 5 5.347108 0.000000 2566 +snyder 0 5 5.347108 0.000000 2359 +remain 0 5 5.347108 0.000000 2278 +amus 0 5 5.347108 0.000000 2366 +neil 0 4 5.568345 0.000000 2841 +ebel 0 4 5.568345 0.000000 2756 +tester 0 4 5.568345 0.000000 2754 +mcmurchi 0 4 5.568345 0.000000 2757 +gregori 0 4 5.568345 0.000000 2928 +merl 0 3 5.857933 0.000000 3843 +dine 0 3 5.857933 0.000000 3472 +mitsubishi 0 3 5.857933 0.000000 3842 +mactest 0 3 5.857933 0.000000 3972 +fashion 0 3 5.857933 0.000000 3699 +neighborhood 0 3 5.857933 0.000000 3242 +jar 0 3 5.857933 0.000000 3223 +mckenzi 1 2 6.263398 6.263398 5974 +gemini 0 2 6.263398 0.000000 5975 +andwork 0 2 6.263398 0.000000 5403 +projectsi 0 2 6.263398 0.000000 5931 +isomorph 0 2 6.263398 0.000000 5976 +knowna 0 2 6.263398 0.000000 5480 +shirt 0 2 6.263398 0.000000 5977 +farm 0 2 6.263398 0.000000 4115 +broadwai 0 1 6.957497 0.000000 15957 +projectsgonna 0 1 6.957497 0.000000 15958 +teenag 0 1 6.957497 0.000000 15959 +lobotomi 0 1 6.957497 0.000000 15960 +ramonesi 0 1 6.957497 0.000000 15961 +projectconcern 0 1 6.957497 0.000000 15962 +copiou 0 1 6.957497 0.000000 15963 +expatri 0 1 6.957497 0.000000 15964 +onchaot 0 1 6.957497 0.000000 15965 +routingwith 0 1 6.957497 0.000000 15966 +torusnetwork 0 1 6.957497 0.000000 15967 +thecranium 0 1 6.957497 0.000000 15968 +compatiblewith 0 1 6.957497 0.000000 15969 +netlist 0 1 6.957497 0.000000 15970 +calledgemini 0 1 6.957497 0.000000 15971 +schemat 0 1 6.957497 0.000000 15972 +cranium 0 1 6.957497 0.000000 15973 +packetrout 0 1 6.957497 0.000000 15974 +andcommun 0 1 6.957497 0.000000 15975 +tomactest 0 1 6.957497 0.000000 15976 +arlington 0 1 6.957497 0.000000 15977 +livein 0 1 6.957497 0.000000 15978 +ofballard 0 1 6.957497 0.000000 15979 +artworkcr 0 1 6.957497 0.000000 15980 +photoshop 0 1 6.957497 0.000000 15981 +ownedthi 0 1 6.957497 0.000000 15982 +onlyth 0 1 6.957497 0.000000 15983 +correctlyguess 0 1 6.957497 0.000000 15984 +toriddl 0 1 6.957497 0.000000 15985 +jour 0 1 6.957497 0.000000 15986 +honei 0 1 6.957497 0.000000 15987 +myuncl 0 1 6.957497 0.000000 15988 +edmonton 0 1 6.957497 0.000000 15989 +alberta 0 1 6.957497 0.000000 15990 +linkschairman 0 1 6.957497 0.000000 15991 +linksnorm 0 1 6.957497 0.000000 15992 +halcyon 0 1 6.957497 0.000000 15993 +eugen 0 1 6.957497 0.000000 15994 +spafford 0 1 6.957497 0.000000 15995 +randi 0 1 6.957497 0.000000 15996 +pausch 0 1 6.957497 0.000000 15997 +wallach 0 1 6.957497 0.000000 15998 +scool 0 1 6.957497 0.000000 15999 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..63e27a81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +seattl 0 120 2.079442 0.000000 103 +report 0 131 2.079442 0.000000 92 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +proceed 0 93 2.397895 0.000000 152 +ieee 0 86 2.484907 0.000000 190 +interfac 0 79 2.564949 0.000000 209 +appear 0 78 2.564949 0.000000 210 +dynam 0 76 2.564949 0.000000 194 +decemb 0 80 2.564949 0.000000 215 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +servic 0 72 2.639057 0.000000 236 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +receiv 0 66 2.708050 0.000000 244 +creat 0 63 2.772589 0.000000 277 +improv 0 62 2.772589 0.000000 289 +sever 0 56 2.890372 0.000000 322 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +extens 0 53 2.944439 0.000000 340 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +run 0 51 2.995732 0.000000 347 +hardwar 0 51 2.995732 0.000000 350 +telephon 0 50 3.044522 0.000000 373 +principl 0 48 3.044522 0.000000 357 +adapt 0 46 3.091042 0.000000 387 +protocol 0 45 3.135494 0.000000 407 +describ 0 45 3.135494 0.000000 400 +mechan 0 43 3.178054 0.000000 416 +http 0 41 3.218876 0.000000 420 +transact 0 39 3.258097 0.000000 438 +winter 0 36 3.367296 0.000000 500 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +posit 0 31 3.496508 0.000000 552 +rang 0 30 3.555348 0.000000 565 +graph 0 30 3.555348 0.000000 576 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +load 0 28 3.610918 0.000000 601 +compar 0 26 3.688879 0.000000 648 +request 0 26 3.688879 0.000000 635 +enabl 0 26 3.688879 0.000000 655 +spent 0 25 3.737670 0.000000 676 +primari 0 25 3.737670 0.000000 669 +demonstr 0 24 3.761200 0.000000 694 +lab 0 24 3.761200 0.000000 698 +flow 0 24 3.761200 0.000000 700 +deal 0 22 3.850148 0.000000 736 +corpor 0 21 3.912023 0.000000 802 +similar 0 21 3.912023 0.000000 771 +kernel 0 20 3.951244 0.000000 825 +safeti 0 20 3.951244 0.000000 817 +spend 0 19 4.007333 0.000000 850 +region 0 19 4.007333 0.000000 875 +protect 0 17 4.110874 0.000000 935 +germani 0 17 4.110874 0.000000 946 +anyon 0 17 4.110874 0.000000 916 +commerci 0 16 4.174387 0.000000 1005 +princeton 0 15 4.248495 0.000000 1042 +contribut 0 15 4.248495 0.000000 1021 +spin 0 14 4.317488 0.000000 1121 +achiev 0 14 4.317488 0.000000 1088 +near 0 14 4.317488 0.000000 1091 +happi 0 14 4.317488 0.000000 1079 +pretti 0 13 4.382027 0.000000 1191 +safe 0 12 4.465908 0.000000 1274 +usenix 0 12 4.465908 0.000000 1240 +abil 0 11 4.553877 0.000000 1341 +custom 0 10 4.653960 0.000000 1414 +elimin 0 9 4.753590 0.000000 1558 +marc 0 8 4.875197 0.000000 1680 +hack 0 7 5.010635 0.000000 1950 +fifth 0 7 5.010635 0.000000 1931 +bell 0 6 5.164786 0.000000 2224 +isth 0 5 5.347108 0.000000 2532 +sole 0 4 5.568345 0.000000 2592 +sell 0 4 5.568345 0.000000 2935 +fiuczynski 0 3 5.857933 0.000000 3390 +forappl 0 3 5.857933 0.000000 3929 +scratch 0 3 5.857933 0.000000 3140 +anin 0 3 5.857933 0.000000 3354 +fifteenth 0 3 5.857933 0.000000 3868 +linker 0 3 5.857933 0.000000 3157 +namespac 0 3 5.857933 0.000000 3957 +shortcom 0 2 6.263398 0.000000 5978 +backgroundi 0 2 6.263398 0.000000 5878 +highschool 0 2 6.263398 0.000000 5672 +ofproject 0 2 6.263398 0.000000 4446 +inord 0 2 6.263398 0.000000 4824 +linkabl 0 2 6.263398 0.000000 5979 +andcollect 0 2 6.263398 0.000000 4249 +contacthttp 0 1 6.957497 0.000000 16000 +grewup 0 1 6.957497 0.000000 16001 +sseldorf 0 1 6.957497 0.000000 16002 +fromrutg 0 1 6.957497 0.000000 16003 +mitr 0 1 6.957497 0.000000 16004 +proprietor 0 1 6.957497 0.000000 16005 +companythat 0 1 6.957497 0.000000 16006 +setof 0 1 6.957497 0.000000 16007 +chasi 0 1 6.957497 0.000000 16008 +univoic 0 1 6.957497 0.000000 16009 +cardsand 0 1 6.957497 0.000000 16010 +vxwork 0 1 6.957497 0.000000 16011 +compellingperform 0 1 6.957497 0.000000 16012 +tosimilar 0 1 6.957497 0.000000 16013 +anextens 0 1 6.957497 0.000000 16014 +betterperform 0 1 6.957497 0.000000 16015 +conventionaloper 0 1 6.957497 0.000000 16016 +technicalconfer 0 1 6.957497 0.000000 16017 +describeshow 0 1 6.957497 0.000000 16018 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..6e836036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +machin 0 129 2.079442 0.000000 95 +real 0 93 2.397895 0.000000 144 +help 0 83 2.484907 0.000000 175 +requir 0 81 2.484907 0.000000 167 +method 0 80 2.564949 0.000000 213 +practic 0 70 2.708050 0.000000 246 +compar 0 26 3.688879 0.000000 648 +bound 0 26 3.688879 0.000000 659 +sort 0 22 3.850148 0.000000 738 +rout 1 21 3.912023 3.912023 793 +predict 0 19 4.007333 0.000000 855 +minim 0 18 4.060443 0.000000 887 +lower 0 18 4.060443 0.000000 886 +topolog 0 14 4.317488 0.000000 1089 +mesh 0 11 4.553877 0.000000 1351 +router 0 8 4.875197 0.000000 1772 +versu 0 6 5.164786 0.000000 2052 +upper 0 5 5.347108 0.000000 2481 +melani 0 2 6.263398 0.000000 5784 +deflect 0 1 6.957497 0.000000 16019 +fulgham 0 1 6.957497 0.000000 16020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..84a8fb08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +databas 0 122 2.079442 0.000000 86 +provid 0 121 2.079442 0.000000 94 +take 0 97 2.302585 0.000000 134 +imag 0 91 2.397895 0.000000 161 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +stuff 0 87 2.484907 0.000000 171 +exampl 0 77 2.564949 0.000000 195 +nation 0 74 2.639057 0.000000 240 +main 0 67 2.708050 0.000000 256 +guid 0 63 2.772589 0.000000 267 +plan 0 65 2.772589 0.000000 272 +visit 0 63 2.772589 0.000000 288 +done 0 47 3.091042 0.000000 381 +kind 0 32 3.465736 0.000000 541 +transform 0 32 3.465736 0.000000 542 +retriev 0 27 3.637586 0.000000 621 +try 0 22 3.850148 0.000000 764 +similar 0 21 3.912023 0.000000 771 +fact 0 21 3.912023 0.000000 780 +engineeringunivers 0 17 4.110874 0.000000 959 +commerci 0 16 4.174387 0.000000 1005 +english 0 15 4.248495 0.000000 1033 +trip 0 14 4.317488 0.000000 1113 +meng 0 12 4.465908 0.000000 1214 +newspap 0 12 4.465908 0.000000 1280 +scienceat 0 11 4.553877 0.000000 1375 +island 0 11 4.553877 0.000000 1345 +undergrad 0 9 4.753590 0.000000 1589 +charg 0 9 4.753590 0.000000 1582 +pennsylvania 0 7 5.010635 0.000000 1932 +interestsi 0 7 5.010635 0.000000 1969 +huge 0 6 5.164786 0.000000 1991 +singapor 1 5 5.347108 5.347108 2487 +snapshot 0 5 5.347108 0.000000 2303 +washingtonseattl 0 4 5.568345 0.000000 3044 +heng 0 2 6.263398 0.000000 5202 +strait 0 2 6.263398 0.000000 5980 +homepagemenghe 0 1 6.957497 0.000000 16021 +edubox 0 1 6.957497 0.000000 16022 +findimag 0 1 6.957497 0.000000 16023 +virag 0 1 6.957497 0.000000 16024 +andqbicar 0 1 6.957497 0.000000 16025 +singaporesingapor 0 1 6.957497 0.000000 16026 +infomap 0 1 6.957497 0.000000 16027 +andstatist 0 1 6.957497 0.000000 16028 +singaporeonlin 0 1 6.957497 0.000000 16029 +boardi 0 1 6.957497 0.000000 16030 +anintellig 0 1 6.957497 0.000000 16031 +menghe 0 1 6.957497 0.000000 16032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..d3234b49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +technic 0 100 2.302585 0.000000 140 +real 0 93 2.397895 0.000000 144 +optim 0 79 2.564949 0.000000 197 +workshop 0 71 2.639057 0.000000 239 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +maintain 0 51 2.995732 0.000000 342 +frequent 0 49 3.044522 0.000000 367 +life 0 50 3.044522 0.000000 375 +microsoft 0 38 3.295837 0.000000 468 +game 0 36 3.367296 0.000000 498 +michael 0 35 3.401197 0.000000 514 +represent 0 35 3.401197 0.000000 512 +chair 0 29 3.583519 0.000000 596 +static 0 27 3.637586 0.000000 619 +properti 0 22 3.850148 0.000000 749 +particularli 0 19 4.007333 0.000000 867 +previous 0 17 4.110874 0.000000 923 +debug 0 17 4.110874 0.000000 944 +philosophi 0 13 4.382027 0.000000 1167 +carri 0 13 4.382027 0.000000 1152 +awai 0 10 4.653960 0.000000 1447 +intermedi 0 9 4.753590 0.000000 1497 +cryptographi 0 9 4.753590 0.000000 1512 +serial 0 7 5.010635 0.000000 1975 +intellectu 0 7 5.010635 0.000000 1847 +occasion 0 7 5.010635 0.000000 1905 +sciencedepart 0 6 5.164786 0.000000 2172 +slice 0 4 5.568345 0.000000 2622 +popl 0 4 5.568345 0.000000 3068 +denot 0 3 5.857933 0.000000 3147 +ernst 0 2 6.263398 0.000000 4525 +eec 0 2 6.263398 0.000000 5981 +pagemichael 0 1 6.957497 0.000000 16033 +ernsti 0 1 6.957497 0.000000 16034 +riceunivers 0 1 6.957497 0.000000 16035 +programanalysi 0 1 6.957497 0.000000 16036 +coloc 0 1 6.957497 0.000000 16037 +semanticsi 0 1 6.957497 0.000000 16038 +resourcesfor 0 1 6.957497 0.000000 16039 +slip 0 1 6.957497 0.000000 16040 +possibleinterest 0 1 6.957497 0.000000 16041 +mernst 0 1 6.957497 0.000000 16042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..4173e0d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +check 0 115 2.197225 0.000000 118 +well 0 109 2.197225 0.000000 121 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +search 0 95 2.397895 0.000000 155 +sinc 0 90 2.397895 0.000000 159 +stuff 0 87 2.484907 0.000000 171 +journal 0 83 2.484907 0.000000 183 +come 0 78 2.564949 0.000000 202 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +main 0 67 2.708050 0.000000 256 +view 0 70 2.708050 0.000000 254 +still 0 50 3.044522 0.000000 362 +cool 0 49 3.044522 0.000000 374 +even 0 45 3.135494 0.000000 393 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +electr 0 38 3.295837 0.000000 461 +expect 0 37 3.332205 0.000000 484 +field 0 37 3.332205 0.000000 482 +obtain 0 33 3.433987 0.000000 534 +travel 0 30 3.555348 0.000000 579 +quot 0 29 3.583519 0.000000 582 +consid 0 29 3.583519 0.000000 590 +american 0 27 3.637586 0.000000 634 +arrai 0 27 3.637586 0.000000 627 +spent 0 25 3.737670 0.000000 676 +grad 0 20 3.951244 0.000000 837 +left 0 19 4.007333 0.000000 851 +els 0 19 4.007333 0.000000 843 +event 0 18 4.060443 0.000000 896 +squar 0 14 4.317488 0.000000 1082 +spin 0 14 4.317488 0.000000 1121 +danc 0 12 4.465908 0.000000 1278 +grow 0 12 4.465908 0.000000 1209 +metacrawl 0 10 4.653960 0.000000 1455 +grew 0 8 4.875197 0.000000 1742 +solomon 0 8 4.875197 0.000000 1716 +cultur 0 7 5.010635 0.000000 1951 +bit 0 7 5.010635 0.000000 1833 +wouldn 0 7 5.010635 0.000000 1970 +mock 0 6 5.164786 0.000000 2087 +whichi 0 6 5.164786 0.000000 2056 +oopsla 0 6 5.164786 0.000000 2221 +altavista 0 6 5.164786 0.000000 2222 +neither 0 6 5.164786 0.000000 1990 +matthew 0 6 5.164786 0.000000 2193 +chess 0 5 5.347108 0.000000 2486 +upper 0 5 5.347108 0.000000 2481 +volunt 0 5 5.347108 0.000000 2307 +lili 0 5 5.347108 0.000000 2240 +fulbright 0 4 5.568345 0.000000 2963 +spanish 0 4 5.568345 0.000000 3017 +marku 0 3 5.857933 0.000000 3872 +district 0 3 5.857933 0.000000 3756 +karlsruh 0 3 5.857933 0.000000 3689 +latin 0 3 5.857933 0.000000 3741 +deutsch 0 3 5.857933 0.000000 3802 +diplom 0 2 6.263398 0.000000 5982 +umass 0 2 6.263398 0.000000 5899 +grante 0 2 6.263398 0.000000 4914 +labyrinth 0 2 6.263398 0.000000 5983 +mainstream 0 2 6.263398 0.000000 5437 +salsa 0 2 6.263398 0.000000 5984 +colloquia 0 2 6.263398 0.000000 4710 +mossi 0 2 6.263398 0.000000 5801 +rttemberg 0 1 6.957497 0.000000 16043 +anotherpart 0 1 6.957497 0.000000 16044 +biberach 0 1 6.957497 0.000000 16045 +swabia 0 1 6.957497 0.000000 16046 +oberschwaben 0 1 6.957497 0.000000 16047 +solitud 0 1 6.957497 0.000000 16048 +dieangst 0 1 6.957497 0.000000 16049 +torwart 0 1 6.957497 0.000000 16050 +beim 0 1 6.957497 0.000000 16051 +elfmet 0 1 6.957497 0.000000 16052 +handk 0 1 6.957497 0.000000 16053 +merengu 0 1 6.957497 0.000000 16054 +publicationssepar 0 1 6.957497 0.000000 16055 +olympiad 0 1 6.957497 0.000000 16056 +yerewan 0 1 6.957497 0.000000 16057 +csek 0 1 6.957497 0.000000 16058 +csebi 0 1 6.957497 0.000000 16059 +cse 0 1 6.957497 0.000000 16060 +studentsimag 0 1 6.957497 0.000000 16061 +engineeringy 0 1 6.957497 0.000000 16062 +realaudio 0 1 6.957497 0.000000 16063 +linksand 0 1 6.957497 0.000000 16064 +toil 0 1 6.957497 0.000000 16065 +unto 0 1 6.957497 0.000000 16066 +glorywa 0 1 6.957497 0.000000 16067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..3ac8f507 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +vivek 1 6 5.164786 5.164786 2210 +narasayya 1 4 5.568345 5.568345 3065 +informationresearch 0 3 5.857933 0.000000 3675 +nara 0 1 6.957497 0.000000 16068 +interestspap 0 1 6.957497 0.000000 16069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..23807f48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +seattl 0 120 2.079442 0.000000 103 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +internet 0 83 2.484907 0.000000 186 +name 0 72 2.639057 0.000000 220 +intellig 0 72 2.639057 0.000000 225 +sieg 0 69 2.708050 0.000000 260 +order 0 69 2.708050 0.000000 249 +artifici 0 63 2.772589 0.000000 280 +collect 0 65 2.772589 0.000000 268 +digit 0 52 2.995732 0.000000 348 +finger 0 52 2.995732 0.000000 354 +much 0 52 2.995732 0.000000 349 +principl 0 48 3.044522 0.000000 357 +friend 0 48 3.044522 0.000000 376 +quarter 0 47 3.091042 0.000000 389 +favorit 0 44 3.135494 0.000000 410 +movi 0 40 3.258097 0.000000 459 +tech 0 35 3.401197 0.000000 515 +india 0 32 3.465736 0.000000 550 +autumn 0 31 3.496508 0.000000 558 +travel 0 30 3.555348 0.000000 579 +finish 0 22 3.850148 0.000000 748 +stop 0 17 4.110874 0.000000 942 +adam 0 17 4.110874 0.000000 934 +cook 0 10 4.653960 0.000000 1464 +sound 0 9 4.753590 0.000000 1605 +heavi 0 7 5.010635 0.000000 1841 +alphabet 0 6 5.164786 0.000000 1980 +dougla 0 5 5.347108 0.000000 2471 +delhi 0 5 5.347108 0.000000 2530 +radio 0 4 5.568345 0.000000 3025 +skate 0 4 5.568345 0.000000 3046 +terri 0 3 5.857933 0.000000 3264 +impli 0 3 5.857933 0.000000 3348 +astronomi 0 3 5.857933 0.000000 3974 +coin 0 3 5.857933 0.000000 3799 +pelham 0 2 6.263398 0.000000 4988 +grenvil 0 2 6.263398 0.000000 4989 +himanshu 1 1 6.957497 6.957497 16070 +nautiy 1 1 6.957497 6.957497 16071 +pagehimanshu 0 1 6.957497 0.000000 16072 +nautiyalthi 0 1 6.957497 0.000000 16073 +nautiyaldept 0 1 6.957497 0.000000 16074 +edugod 0 1 6.957497 0.000000 16075 +gift 0 1 6.957497 0.000000 16076 +personkind 0 1 6.957497 0.000000 16077 +pratchett 0 1 6.957497 0.000000 16078 +wodehouseth 0 1 6.957497 0.000000 16079 +aviat 0 1 6.957497 0.000000 16080 +numismat 0 1 6.957497 0.000000 16081 +profound 0 1 6.957497 0.000000 16082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..27d8dc31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +avail 1 169 1.791759 1.791759 48 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +seattl 0 120 2.079442 0.000000 103 +version 0 113 2.197225 0.000000 122 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +need 1 98 2.302585 2.302585 135 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +comment 0 93 2.397895 0.000000 146 +resourc 0 81 2.484907 0.000000 172 +stuff 0 87 2.484907 0.000000 171 +know 0 80 2.564949 0.000000 198 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +line 0 75 2.639057 0.000000 231 +involv 0 71 2.639057 0.000000 227 +artifici 0 63 2.772589 0.000000 280 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +browser 0 56 2.890372 0.000000 313 +date 0 51 2.995732 0.000000 344 +week 0 52 2.995732 0.000000 343 +favorit 0 44 3.135494 0.000000 410 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +random 0 34 3.401197 0.000000 511 +return 0 34 3.401197 0.000000 502 +great 0 27 3.637586 0.000000 626 +enabl 0 26 3.688879 0.000000 655 +bookmark 0 26 3.688879 0.000000 639 +alwai 0 24 3.761200 0.000000 691 +color 0 22 3.850148 0.000000 762 +tell 0 21 3.912023 0.000000 777 +anonym 0 14 4.317488 0.000000 1100 +easili 0 14 4.317488 0.000000 1077 +preliminari 0 9 4.753590 0.000000 1480 +wilson 0 9 4.753590 0.000000 1536 +awar 0 7 5.010635 0.000000 1800 +guidelin 0 7 5.010635 0.000000 1832 +divers 0 6 5.164786 0.000000 2232 +handi 0 6 5.164786 0.000000 2111 +lost 0 5 5.347108 0.000000 2358 +alsointerest 0 3 5.857933 0.000000 3813 +shortli 0 3 5.857933 0.000000 3375 +surgeri 0 3 5.857933 0.000000 3975 +javascript 0 3 5.857933 0.000000 3221 +republican 0 3 5.857933 0.000000 3815 +miscellani 0 3 5.857933 0.000000 3976 +nichola 0 3 5.857933 0.000000 3252 +uwcs 0 3 5.857933 0.000000 3977 +labyrinth 0 2 6.263398 0.000000 5983 +cynic 0 2 6.263398 0.000000 5854 +andcognit 0 2 6.263398 0.000000 5681 +temperatur 0 2 6.263398 0.000000 5985 +bitter 0 2 6.263398 0.000000 5387 +ironi 0 2 6.263398 0.000000 5986 +nonetheless 0 2 6.263398 0.000000 4681 +madeavail 0 2 6.263398 0.000000 4326 +mediocr 0 1 6.957497 0.000000 16083 +bemoan 0 1 6.957497 0.000000 16084 +hype 0 1 6.957497 0.000000 16085 +skeptic 0 1 6.957497 0.000000 16086 +automaticconstruct 0 1 6.957497 0.000000 16087 +wrapper 0 1 6.957497 0.000000 16088 +beeninvolv 0 1 6.957497 0.000000 16089 +glbal 0 1 6.957497 0.000000 16090 +infrmatin 0 1 6.957497 0.000000 16091 +sperhighwai 0 1 6.957497 0.000000 16092 +meter 0 1 6.957497 0.000000 16093 +ronald 0 1 6.957497 0.000000 16094 +reagan 0 1 6.957497 0.000000 16095 +wendel 0 1 6.957497 0.000000 16096 +berri 0 1 6.957497 0.000000 16097 +constitutesgood 0 1 6.957497 0.000000 16098 +kushmerick 0 1 6.957497 0.000000 16099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..3bdfb150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +seattl 0 120 2.079442 0.000000 103 +analysi 0 124 2.079442 0.000000 98 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +memori 0 101 2.302585 0.000000 139 +octob 0 89 2.397895 0.000000 156 +contain 0 81 2.484907 0.000000 174 +onlin 0 75 2.639057 0.000000 223 +test 0 66 2.708050 0.000000 252 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +march 0 61 2.833213 0.000000 295 +sever 0 56 2.890372 0.000000 322 +summer 0 56 2.890372 0.000000 311 +local 0 55 2.944439 0.000000 334 +found 0 53 2.944439 0.000000 337 +brian 0 38 3.295837 0.000000 466 +game 0 36 3.367296 0.000000 498 +known 0 24 3.761200 0.000000 702 +reduc 0 22 3.850148 0.000000 759 +sort 0 22 3.850148 0.000000 738 +safeti 0 20 3.951244 0.000000 817 +wonder 0 20 3.951244 0.000000 815 +bershad 0 18 4.060443 0.000000 902 +chateau 0 16 4.174387 0.000000 997 +overhead 0 15 4.248495 0.000000 1035 +karlin 0 13 4.382027 0.000000 1176 +nanci 0 12 4.465908 0.000000 1256 +anna 0 12 4.465908 0.000000 1292 +promot 0 12 4.465908 0.000000 1235 +isca 0 11 4.553877 0.000000 1354 +itali 0 11 4.553877 0.000000 1378 +leveson 0 9 4.753590 0.000000 1540 +wayn 0 8 4.875197 0.000000 1738 +romer 0 8 4.875197 0.000000 1706 +guggenheim 0 8 4.875197 0.000000 1759 +invest 0 6 5.164786 0.000000 2153 +spare 0 6 5.164786 0.000000 2177 +ohlrich 0 5 5.347108 0.000000 2564 +annex 0 5 5.347108 0.000000 2572 +sytem 0 4 5.568345 0.000000 3015 +superpag 0 3 5.857933 0.000000 3978 +damag 0 2 6.263398 0.000000 5687 +debut 0 1 6.957497 0.000000 16100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..6d977601 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +hour 0 165 1.791759 0.000000 46 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +mathemat 0 108 2.197225 0.000000 123 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +resum 0 79 2.564949 0.000000 217 +summari 0 73 2.639057 0.000000 237 +sieg 0 69 2.708050 0.000000 260 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +wednesdai 0 64 2.772589 0.000000 261 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +plai 0 60 2.833213 0.000000 307 +summer 0 56 2.890372 0.000000 311 +three 0 54 2.944439 0.000000 330 +cool 0 49 3.044522 0.000000 374 +california 0 46 3.091042 0.000000 388 +move 0 47 3.091042 0.000000 382 +quarter 0 47 3.091042 0.000000 389 +made 0 44 3.135494 0.000000 398 +york 0 41 3.218876 0.000000 435 +live 0 40 3.258097 0.000000 451 +origin 0 38 3.295837 0.000000 472 +seminar 0 38 3.295837 0.000000 470 +option 0 30 3.555348 0.000000 575 +usual 0 28 3.610918 0.000000 608 +berkelei 0 26 3.688879 0.000000 657 +departmentunivers 0 24 3.761200 0.000000 711 +daili 0 24 3.761200 0.000000 706 +ofwashington 0 22 3.850148 0.000000 766 +whole 0 17 4.110874 0.000000 940 +took 0 16 4.174387 0.000000 1010 +basketbal 0 12 4.465908 0.000000 1289 +employ 0 12 4.465908 0.000000 1291 +realiti 0 12 4.465908 0.000000 1272 +israel 0 11 4.553877 0.000000 1366 +seven 0 9 4.753590 0.000000 1561 +angel 0 8 4.875197 0.000000 1779 +potenti 0 8 4.875197 0.000000 1690 +studentcomput 0 7 5.010635 0.000000 1963 +bunch 0 7 5.010635 0.000000 1861 +hike 0 6 5.164786 0.000000 2234 +northeast 0 3 5.857933 0.000000 3922 +haifa 0 3 5.857933 0.000000 3554 +kwon 0 3 5.857933 0.000000 3690 +gershoni 1 2 6.263398 6.263398 4513 +shirt 0 2 6.263398 0.000000 5977 +washingtonoffic 0 1 6.957497 0.000000 16101 +fouryear 0 1 6.957497 0.000000 16102 +lancast 0 1 6.957497 0.000000 16103 +holon 0 1 6.957497 0.000000 16104 +amta 0 1 6.957497 0.000000 16105 +aremondai 0 1 6.957497 0.000000 16106 +tose 0 1 6.957497 0.000000 16107 +graphicsprogram 0 1 6.957497 0.000000 16108 +riderlink 0 1 6.957497 0.000000 16109 +seattletransport 0 1 6.957497 0.000000 16110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..c9ceebe2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +find 0 111 2.197225 0.000000 111 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +section 0 94 2.397895 0.000000 149 +pictur 0 89 2.397895 0.000000 160 +school 0 84 2.484907 0.000000 188 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +educ 0 86 2.484907 0.000000 191 +would 0 67 2.708050 0.000000 251 +sieg 0 69 2.708050 0.000000 260 +experi 0 64 2.772589 0.000000 283 +advisor 0 51 2.995732 0.000000 355 +profession 0 51 2.995732 0.000000 345 +realli 0 40 3.258097 0.000000 444 +go 0 33 3.433987 0.000000 529 +sometim 0 24 3.761200 0.000000 696 +head 0 23 3.806662 0.000000 732 +color 0 22 3.850148 0.000000 762 +try 0 22 3.850148 0.000000 764 +leav 0 21 3.912023 0.000000 772 +boston 0 19 4.007333 0.000000 862 +offici 0 18 4.060443 0.000000 894 +decid 0 14 4.317488 0.000000 1075 +embed 0 14 4.317488 0.000000 1102 +came 0 13 4.382027 0.000000 1197 +jean 0 10 4.653960 0.000000 1440 +northwest 0 7 5.010635 0.000000 1973 +accord 0 7 5.010635 0.000000 1826 +hack 0 7 5.010635 0.000000 1950 +chinook 0 6 5.164786 0.000000 2229 +gaetano 0 6 5.164786 0.000000 2068 +beer 0 6 5.164786 0.000000 2216 +german 0 6 5.164786 0.000000 2190 +myresum 0 6 5.164786 0.000000 2199 +ortega 0 5 5.347108 0.000000 2559 +ross 0 5 5.347108 0.000000 2243 +knew 0 5 5.347108 0.000000 2445 +borriello 0 5 5.347108 0.000000 2349 +wear 0 4 5.568345 0.000000 2785 +albert 0 2 6.263398 0.000000 5987 +unoffici 0 2 6.263398 0.000000 5909 +brew 0 2 6.263398 0.000000 5988 +shepherd 0 2 6.263398 0.000000 4347 +behav 0 2 6.263398 0.000000 4670 +tequila 1 1 6.957497 6.957497 16111 +einstein 0 1 6.957497 0.000000 16112 +pageaft 0 1 6.957497 0.000000 16113 +puppi 0 1 6.957497 0.000000 16114 +updatedthu 0 1 6.957497 0.000000 16115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..e59d2083 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +relat 0 139 1.945910 0.000000 68 +seattl 1 120 2.079442 2.079442 103 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +site 0 106 2.197225 0.000000 119 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +graphic 0 90 2.397895 0.000000 147 +search 0 95 2.397895 0.000000 155 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +free 0 73 2.639057 0.000000 224 +nation 0 74 2.639057 0.000000 240 +simul 0 66 2.708050 0.000000 255 +share 0 59 2.833213 0.000000 304 +think 0 57 2.890372 0.000000 314 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +week 0 52 2.995732 0.000000 343 +particular 0 51 2.995732 0.000000 352 +featur 0 46 3.091042 0.000000 386 +show 0 43 3.178054 0.000000 417 +around 0 43 3.178054 0.000000 415 +http 0 41 3.218876 0.000000 420 +movi 0 40 3.258097 0.000000 459 +everi 0 34 3.401197 0.000000 519 +consid 0 29 3.583519 0.000000 590 +weather 0 28 3.610918 0.000000 618 +though 0 27 3.637586 0.000000 622 +background 0 25 3.737670 0.000000 664 +trace 0 25 3.737670 0.000000 677 +sometim 0 24 3.761200 0.000000 696 +disk 0 22 3.850148 0.000000 747 +item 0 19 4.007333 0.000000 856 +particularli 0 19 4.007333 0.000000 867 +runtim 0 19 4.007333 0.000000 858 +log 0 19 4.007333 0.000000 857 +engineeringunivers 0 17 4.110874 0.000000 959 +white 0 17 4.110874 0.000000 951 +regular 0 17 4.110874 0.000000 929 +letter 0 16 4.174387 0.000000 981 +month 0 15 4.248495 0.000000 1025 +doesn 0 15 4.248495 0.000000 1055 +stori 0 14 4.317488 0.000000 1087 +washingtonbox 0 13 4.382027 0.000000 1200 +weak 0 13 4.382027 0.000000 1159 +minor 0 12 4.465908 0.000000 1237 +newspap 0 12 4.465908 0.000000 1280 +bill 0 11 4.553877 0.000000 1297 +black 0 10 4.653960 0.000000 1418 +telecommun 0 9 4.753590 0.000000 1565 +transport 0 8 4.875197 0.000000 1672 +film 0 8 4.875197 0.000000 1761 +ethic 0 7 5.010635 0.000000 1786 +courtesi 0 7 5.010635 0.000000 1953 +blue 0 6 5.164786 0.000000 2227 +quick 0 6 5.164786 0.000000 2184 +famou 0 6 5.164786 0.000000 2185 +legal 0 6 5.164786 0.000000 2094 +privaci 0 6 5.164786 0.000000 2144 +everybodi 0 5 5.347108 0.000000 2517 +oncomput 0 5 5.347108 0.000000 2326 +truli 0 5 5.347108 0.000000 2476 +festiv 0 4 5.568345 0.000000 2952 +flat 0 3 5.857933 0.000000 3853 +icon 0 3 5.857933 0.000000 3362 +unrel 0 3 5.857933 0.000000 3837 +ribbon 0 2 6.263398 0.000000 5973 +quiet 0 2 6.263398 0.000000 5203 +likewis 0 2 6.263398 0.000000 4100 +blink 0 2 6.263398 0.000000 5067 +anymor 0 2 6.263398 0.000000 5938 +gross 0 2 6.263398 0.000000 5989 +beth 0 1 6.957497 0.000000 16116 +pardo 0 1 6.957497 0.000000 16117 +courtesei 0 1 6.957497 0.000000 16118 +untitl 0 1 6.957497 0.000000 16119 +morri 0 1 6.957497 0.000000 16120 +pardodepart 0 1 6.957497 0.000000 16121 +washingtonusapardo 0 1 6.957497 0.000000 16122 +edunot 0 1 6.957497 0.000000 16123 +asimgsrc 0 1 6.957497 0.000000 16124 +blueribbon 0 1 6.957497 0.000000 16125 +rib_trn_plain_sm 0 1 6.957497 0.000000 16126 +opposit 0 1 6.957497 0.000000 16127 +speechprohibit 0 1 6.957497 0.000000 16128 +academicsom 0 1 6.957497 0.000000 16129 +papersi 0 1 6.957497 0.000000 16130 +rtcg 0 1 6.957497 0.000000 16131 +architectureandcompil 0 1 6.957497 0.000000 16132 +otherpeopl 0 1 6.957497 0.000000 16133 +stylenon 0 1 6.957497 0.000000 16134 +academicfeatur 0 1 6.957497 0.000000 16135 +itemsbicyclesbusinessescomputersfoodhumori 0 1 6.957497 0.000000 16136 +weirdnesslinux 0 1 6.957497 0.000000 16137 +journalmusicgoofi 0 1 6.957497 0.000000 16138 +politicssci 0 1 6.957497 0.000000 16139 +dant 0 1 6.957497 0.000000 16140 +trepan 0 1 6.957497 0.000000 16141 +wors 0 1 6.957497 0.000000 16142 +newhous 0 1 6.957497 0.000000 16143 +yesterdai 0 1 6.957497 0.000000 16144 +stuffpardo 0 1 6.957497 0.000000 16145 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..6e917847 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +relat 0 139 1.945910 0.000000 68 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +find 0 111 2.197225 0.000000 111 +book 0 99 2.302585 0.000000 131 +commun 0 95 2.397895 0.000000 157 +school 0 84 2.484907 0.000000 188 +activ 0 84 2.484907 0.000000 182 +resourc 0 81 2.484907 0.000000 172 +resum 0 79 2.564949 0.000000 217 +descript 0 64 2.772589 0.000000 271 +variou 0 56 2.890372 0.000000 317 +extens 0 53 2.944439 0.000000 340 +week 0 52 2.995732 0.000000 343 +life 0 50 3.044522 0.000000 375 +mechan 0 43 3.178054 0.000000 416 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +short 0 36 3.367296 0.000000 499 +built 0 29 3.583519 0.000000 592 +enjoi 0 26 3.688879 0.000000 660 +mine 0 26 3.688879 0.000000 654 +miscellan 0 23 3.806662 0.000000 731 +theunivers 0 21 3.912023 0.000000 797 +busi 0 21 3.912023 0.000000 784 +grad 0 20 3.951244 0.000000 837 +bershad 0 18 4.060443 0.000000 902 +coupl 0 17 4.110874 0.000000 939 +later 0 15 4.248495 0.000000 1043 +fill 0 11 4.553877 0.000000 1349 +besid 0 8 4.875197 0.000000 1681 +hike 0 6 5.164786 0.000000 2234 +outdoor 0 5 5.347108 0.000000 2514 +pardyak 0 4 5.568345 0.000000 3043 +withth 0 4 5.568345 0.000000 2805 +outdat 0 4 5.568345 0.000000 2797 +somehow 0 4 5.568345 0.000000 2974 +ofoper 0 3 5.857933 0.000000 3292 +gloriou 0 3 5.857933 0.000000 3816 +leadership 0 3 5.857933 0.000000 3320 +emerald 0 3 5.857933 0.000000 3979 +poland 0 3 5.857933 0.000000 3665 +unrel 0 3 5.857933 0.000000 3837 +przemyslaw 0 2 6.263398 0.000000 5808 +basedprogram 0 2 6.263398 0.000000 5700 +pardi 0 1 6.957497 0.000000 16146 +drizzl 0 1 6.957497 0.000000 16147 +przemek 0 1 6.957497 0.000000 16148 +interast 0 1 6.957497 0.000000 16149 +notbusi 0 1 6.957497 0.000000 16150 +happenswhen 0 1 6.957497 0.000000 16151 +projectsspinan 0 1 6.957497 0.000000 16152 +systemsgroup 0 1 6.957497 0.000000 16153 +polish 0 1 6.957497 0.000000 16154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..c19547cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +contact 0 153 1.791759 0.000000 59 +recent 0 167 1.791759 0.000000 58 +year 1 148 1.945910 1.945910 84 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +school 0 84 2.484907 0.000000 188 +second 0 81 2.484907 0.000000 166 +stuff 0 87 2.484907 0.000000 171 +come 0 78 2.564949 0.000000 202 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +know 0 80 2.564949 0.000000 198 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +variou 0 56 2.890372 0.000000 317 +undergradu 0 54 2.944439 0.000000 338 +friend 0 48 3.044522 0.000000 376 +move 0 47 3.091042 0.000000 382 +done 0 47 3.091042 0.000000 381 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +made 0 44 3.135494 0.000000 398 +around 0 43 3.178054 0.000000 415 +might 0 41 3.218876 0.000000 426 +live 0 40 3.258097 0.000000 451 +annual 0 40 3.258097 0.000000 458 +paul 0 38 3.295837 0.000000 471 +electr 0 38 3.295837 0.000000 461 +jame 0 35 3.401197 0.000000 507 +return 0 34 3.401197 0.000000 502 +everi 0 34 3.401197 0.000000 519 +taught 0 33 3.433987 0.000000 526 +express 0 32 3.465736 0.000000 540 +photo 0 31 3.496508 0.000000 561 +someth 0 31 3.496508 0.000000 554 +travel 0 30 3.555348 0.000000 579 +univ 0 28 3.610918 0.000000 617 +usual 0 28 3.610918 0.000000 608 +hope 0 28 3.610918 0.000000 610 +concern 0 25 3.737670 0.000000 666 +hill 0 25 3.737670 0.000000 670 +never 0 25 3.737670 0.000000 671 +togeth 0 23 3.806662 0.000000 714 +dai 0 22 3.850148 0.000000 753 +inth 0 22 3.850148 0.000000 741 +love 0 21 3.912023 0.000000 804 +tell 0 21 3.912023 0.000000 777 +rout 0 21 3.912023 0.000000 793 +longer 0 20 3.951244 0.000000 816 +north 0 19 4.007333 0.000000 873 +particularli 0 19 4.007333 0.000000 867 +offici 0 18 4.060443 0.000000 894 +lot 0 18 4.060443 0.000000 889 +took 0 16 4.174387 0.000000 1010 +easi 0 16 4.174387 0.000000 969 +prolog 0 13 4.382027 0.000000 1155 +someon 0 13 4.382027 0.000000 1128 +everyon 0 13 4.382027 0.000000 1148 +brother 0 13 4.382027 0.000000 1189 +scan 0 12 4.465908 0.000000 1243 +rememb 0 12 4.465908 0.000000 1217 +rest 0 12 4.465908 0.000000 1259 +danc 0 12 4.465908 0.000000 1278 +chri 0 11 4.553877 0.000000 1311 +bike 0 10 4.653960 0.000000 1468 +franklin 0 10 4.653960 0.000000 1436 +rapid 0 10 4.653960 0.000000 1453 +town 0 10 4.653960 0.000000 1458 +folk 0 9 4.753590 0.000000 1597 +pagei 0 8 4.875197 0.000000 1683 +character 0 8 4.875197 0.000000 1767 +on 0 8 4.875197 0.000000 1628 +andcomput 0 8 4.875197 0.000000 1623 +partner 0 8 4.875197 0.000000 1648 +ride 0 8 4.875197 0.000000 1741 +davi 0 7 5.010635 0.000000 1888 +fortun 0 7 5.010635 0.000000 1872 +necessarili 0 7 5.010635 0.000000 1899 +throughout 0 7 5.010635 0.000000 1871 +portland 0 7 5.010635 0.000000 1878 +header 0 7 5.010635 0.000000 1787 +somewher 0 6 5.164786 0.000000 2176 +restaur 0 6 5.164786 0.000000 2230 +drop 0 6 5.164786 0.000000 2008 +poem 0 5 5.347108 0.000000 2483 +exchang 0 5 5.347108 0.000000 2310 +hire 0 4 5.568345 0.000000 2976 +glad 0 4 5.568345 0.000000 2657 +tend 0 4 5.568345 0.000000 3041 +kept 0 4 5.568345 0.000000 2762 +pagepaul 0 3 5.857933 0.000000 3669 +thesumm 0 3 5.857933 0.000000 3763 +matt 0 3 5.857933 0.000000 3792 +flat 0 3 5.857933 0.000000 3853 +aboutthi 0 2 6.263398 0.000000 5627 +oneof 0 2 6.263398 0.000000 4674 +diploma 0 2 6.263398 0.000000 5990 +bergen 0 2 6.263398 0.000000 5991 +itin 0 2 6.263398 0.000000 5992 +toseattl 0 2 6.263398 0.000000 5919 +myoffic 0 1 6.957497 0.000000 16155 +iliv 0 1 6.957497 0.000000 16156 +norwegian 0 1 6.957497 0.000000 16157 +likea 0 1 6.957497 0.000000 16158 +localchines 0 1 6.957497 0.000000 16159 +mundan 0 1 6.957497 0.000000 16160 +stuffi 0 1 6.957497 0.000000 16161 +hotlink 0 1 6.957497 0.000000 16162 +pagesstuff 0 1 6.957497 0.000000 16163 +maintainmi 0 1 6.957497 0.000000 16164 +mewher 0 1 6.957497 0.000000 16165 +inmorgan 0 1 6.957497 0.000000 16166 +fromuc 0 1 6.957497 0.000000 16167 +andy 0 1 6.957497 0.000000 16168 +ididn 0 1 6.957497 0.000000 16169 +ialso 0 1 6.957497 0.000000 16170 +stuffwhil 0 1 6.957497 0.000000 16171 +relatedact 0 1 6.957497 0.000000 16172 +evengot 0 1 6.957497 0.000000 16173 +marriag 0 1 6.957497 0.000000 16174 +joann 0 1 6.957497 0.000000 16175 +anexcus 0 1 6.957497 0.000000 16176 +ofbergen 0 1 6.957497 0.000000 16177 +hillier 0 1 6.957497 0.000000 16178 +returnedto 0 1 6.957497 0.000000 16179 +rollerblad 0 1 6.957497 0.000000 16180 +wasnow 0 1 6.957497 0.000000 16181 +hewlettpackard 0 1 6.957497 0.000000 16182 +vengeanc 0 1 6.957497 0.000000 16183 +intwo 0 1 6.957497 0.000000 16184 +inseason 0 1 6.957497 0.000000 16185 +justcommut 0 1 6.957497 0.000000 16186 +lindyhop 0 1 6.957497 0.000000 16187 +linethat 0 1 6.957497 0.000000 16188 +doctyp 0 1 6.957497 0.000000 16189 +ietf 0 1 6.957497 0.000000 16190 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..957a26b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +implement 0 152 1.791759 0.000000 52 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +proceed 0 93 2.397895 0.000000 152 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +symposium 0 72 2.639057 0.000000 238 +collect 0 65 2.772589 0.000000 268 +found 0 53 2.944439 0.000000 337 +much 0 52 2.995732 0.000000 349 +principl 0 48 3.044522 0.000000 357 +quarter 0 47 3.091042 0.000000 389 +even 0 45 3.135494 0.000000 393 +might 1 41 3.218876 3.218876 426 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +workstat 0 37 3.332205 0.000000 479 +everi 0 34 3.401197 0.000000 519 +global 0 34 3.401197 0.000000 520 +michael 0 35 3.401197 0.000000 514 +taught 0 33 3.433987 0.000000 526 +often 0 31 3.496508 0.000000 551 +cluster 0 28 3.610918 0.000000 612 +although 0 25 3.737670 0.000000 667 +william 0 22 3.850148 0.000000 765 +minut 0 20 3.951244 0.000000 810 +wonder 0 20 3.951244 0.000000 815 +mpeg 0 20 3.951244 0.000000 831 +supervis 0 20 3.951244 0.000000 840 +citi 0 19 4.007333 0.000000 874 +five 0 19 4.007333 0.000000 841 +otherwis 0 17 4.110874 0.000000 922 +action 0 15 4.248495 0.000000 1038 +qual 0 15 4.248495 0.000000 1062 +comic 0 14 4.317488 0.000000 1103 +levi 0 14 4.317488 0.000000 1093 +squar 0 14 4.317488 0.000000 1082 +karlin 1 13 4.382027 4.382027 1176 +nick 0 13 4.382027 0.000000 1180 +anna 1 12 4.465908 4.465908 1292 +rest 0 12 4.465908 0.000000 1259 +song 0 11 4.553877 0.000000 1380 +henri 0 10 4.653960 0.000000 1417 +swim 0 9 4.753590 0.000000 1599 +french 0 9 4.753590 0.000000 1511 +morgan 0 9 4.753590 0.000000 1484 +juan 0 9 4.753590 0.000000 1580 +tourist 0 8 4.875197 0.000000 1710 +surpris 0 7 5.010635 0.000000 1828 +feelei 0 7 5.010635 0.000000 1859 +rain 0 6 5.164786 0.000000 2137 +cat 0 6 5.164786 0.000000 2194 +pari 0 6 5.164786 0.000000 2158 +lucki 0 6 5.164786 0.000000 2163 +gui 0 5 5.347108 0.000000 2573 +british 0 5 5.347108 0.000000 2546 +formerli 0 5 5.347108 0.000000 2397 +door 0 5 5.347108 0.000000 2291 +darren 0 5 5.347108 0.000000 2565 +pighin 1 4 5.568345 5.568345 2735 +ta 0 4 5.568345 0.000000 3058 +breath 0 4 5.568345 0.000000 2946 +dark 0 4 5.568345 0.000000 2910 +berlin 0 3 5.857933 0.000000 3263 +marin 0 3 5.857933 0.000000 3947 +freder 0 3 5.857933 0.000000 3352 +thekkath 0 3 5.857933 0.000000 3973 +monti 0 2 6.263398 0.000000 4993 +python 0 2 6.263398 0.000000 4994 +cave 0 2 6.263398 0.000000 4959 +italian 0 2 6.263398 0.000000 5993 +simpson 0 2 6.263398 0.000000 5994 +chandramohan 0 2 6.263398 0.000000 5965 +refresh 0 1 6.957497 0.000000 16191 +frdric 0 1 6.957497 0.000000 16192 +lcommun 0 1 6.957497 0.000000 16193 +dani 0 1 6.957497 0.000000 16194 +corto 0 1 6.957497 0.000000 16195 +maltes 0 1 6.957497 0.000000 16196 +venis 0 1 6.957497 0.000000 16197 +traditionn 0 1 6.957497 0.000000 16198 +systemher 0 1 6.957497 0.000000 16199 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..356c3954 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +map 0 39 3.258097 0.000000 452 +anderson 1 19 4.007333 4.007333 860 +brother 0 13 4.382027 0.000000 1189 +ruth 1 7 5.010635 5.010635 1870 +wxyc 0 1 6.957497 0.000000 16200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..57f47b7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +redston 1 3 5.857933 5.857933 3332 +joshua 0 3 5.857933 0.000000 3333 +josh 0 2 6.263398 0.000000 5945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..d7abfbdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,163 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +seattl 0 120 2.079442 0.000000 103 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +site 0 106 2.197225 0.000000 119 +make 0 111 2.197225 0.000000 120 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +imag 0 91 2.397895 0.000000 161 +pictur 0 89 2.397895 0.000000 160 +build 0 85 2.484907 0.000000 184 +librari 0 87 2.484907 0.000000 181 +good 0 77 2.564949 0.000000 200 +onlin 0 75 2.639057 0.000000 223 +line 0 75 2.639057 0.000000 231 +write 0 72 2.639057 0.000000 222 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +java 0 70 2.708050 0.000000 248 +interact 0 62 2.772589 0.000000 270 +visit 0 63 2.772589 0.000000 288 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +local 0 55 2.944439 0.000000 334 +extens 0 53 2.944439 0.000000 340 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +run 0 51 2.995732 0.000000 347 +profession 0 51 2.995732 0.000000 345 +get 0 46 3.091042 0.000000 380 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +live 0 40 3.258097 0.000000 451 +probabl 0 40 3.258097 0.000000 455 +map 0 39 3.258097 0.000000 452 +movi 0 40 3.258097 0.000000 459 +multi 0 36 3.367296 0.000000 493 +everi 0 34 3.401197 0.000000 519 +jame 0 35 3.401197 0.000000 507 +word 0 34 3.401197 0.000000 508 +taught 0 33 3.433987 0.000000 526 +human 0 32 3.465736 0.000000 546 +framework 0 28 3.610918 0.000000 606 +team 0 27 3.637586 0.000000 625 +great 0 27 3.637586 0.000000 626 +todai 0 25 3.737670 0.000000 672 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +famili 0 23 3.806662 0.000000 735 +ofwashington 0 22 3.850148 0.000000 766 +color 0 22 3.850148 0.000000 762 +wonder 0 20 3.951244 0.000000 815 +nice 0 20 3.951244 0.000000 809 +toolkit 0 20 3.951244 0.000000 835 +expert 0 20 3.951244 0.000000 833 +wrote 0 20 3.951244 0.000000 830 +citi 0 19 4.007333 0.000000 874 +media 0 19 4.007333 0.000000 861 +els 0 19 4.007333 0.000000 843 +beauti 0 18 4.060443 0.000000 912 +stand 0 18 4.060443 0.000000 891 +repositori 0 17 4.110874 0.000000 932 +stat 0 17 4.110874 0.000000 924 +stock 0 16 4.174387 0.000000 1007 +psycholog 0 15 4.248495 0.000000 1054 +camera 0 14 4.317488 0.000000 1115 +happi 0 14 4.317488 0.000000 1079 +bodi 0 13 4.382027 0.000000 1178 +script 0 13 4.382027 0.000000 1171 +step 0 13 4.382027 0.000000 1138 +outsid 0 12 4.465908 0.000000 1219 +perl 0 11 4.553877 0.000000 1332 +magic 0 11 4.553877 0.000000 1358 +market 0 11 4.553877 0.000000 1361 +town 0 10 4.653960 0.000000 1458 +traffic 0 10 4.653960 0.000000 1421 +departmentof 0 9 4.753590 0.000000 1539 +clear 0 9 4.753590 0.000000 1488 +brain 0 8 4.875197 0.000000 1638 +rais 0 8 4.875197 0.000000 1711 +leon 0 8 4.875197 0.000000 1631 +chanc 0 7 5.010635 0.000000 1960 +vallei 0 7 5.010635 0.000000 1959 +footbal 0 7 5.010635 0.000000 1912 +rain 0 6 5.164786 0.000000 2137 +peek 0 6 5.164786 0.000000 2169 +camp 0 5 5.347108 0.000000 2545 +poem 0 5 5.347108 0.000000 2483 +fountain 0 4 5.568345 0.000000 3069 +catch 0 4 5.568345 0.000000 2602 +glimps 0 4 5.568345 0.000000 2778 +proud 0 4 5.568345 0.000000 2918 +leagu 0 4 5.568345 0.000000 3040 +fantasi 0 4 5.568345 0.000000 3055 +jakobovit 1 3 5.857933 5.857933 3913 +hawaii 0 3 5.857933 0.000000 3888 +foster 0 3 5.857933 0.000000 3159 +eddi 0 3 5.857933 0.000000 3896 +drag 0 3 5.857933 0.000000 3434 +mount 0 2 6.263398 0.000000 5995 +youcan 0 2 6.263398 0.000000 4373 +glori 0 2 6.263398 0.000000 5327 +consol 0 2 6.263398 0.000000 4048 +atla 0 2 6.263398 0.000000 5996 +databaseenviron 0 2 6.263398 0.000000 5792 +persistentprogram 0 2 6.263398 0.000000 5997 +creator 0 2 6.263398 0.000000 5998 +ladder 0 2 6.263398 0.000000 5858 +newslet 0 2 6.263398 0.000000 5860 +dian 0 2 6.263398 0.000000 5536 +judi 0 2 6.263398 0.000000 4442 +usai 0 1 6.957497 0.000000 16201 +thisup 0 1 6.957497 0.000000 16202 +weatherreport 0 1 6.957497 0.000000 16203 +sneak 0 1 6.957497 0.000000 16204 +drumhel 0 1 6.957497 0.000000 16205 +rainier 0 1 6.957497 0.000000 16206 +cleardai 0 1 6.957497 0.000000 16207 +astructur 0 1 6.957497 0.000000 16208 +anatomist 0 1 6.957497 0.000000 16209 +internetracquetbal 0 1 6.957497 0.000000 16210 +rotisseriebasebal 0 1 6.957497 0.000000 16211 +fromusa 0 1 6.957497 0.000000 16212 +africancichlid 0 1 6.957497 0.000000 16213 +honolulu 0 1 6.957497 0.000000 16214 +kalalau 0 1 6.957497 0.000000 16215 +gambl 0 1 6.957497 0.000000 16216 +darn 0 1 6.957497 0.000000 16217 +javafamili 0 1 6.957497 0.000000 16218 +mydad 0 1 6.957497 0.000000 16219 +whoi 0 1 6.957497 0.000000 16220 +polem 0 1 6.957497 0.000000 16221 +emanuel 0 1 6.957497 0.000000 16222 +swedenborg 0 1 6.957497 0.000000 16223 +nahl 0 1 6.957497 0.000000 16224 +whoprovid 0 1 6.957497 0.000000 16225 +realtor 0 1 6.957497 0.000000 16226 +uncl 0 1 6.957497 0.000000 16227 +bioscienc 0 1 6.957497 0.000000 16228 +bookmarksif 0 1 6.957497 0.000000 16229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..7e2587cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +welcom 1 122 2.079442 2.079442 99 +right 1 48 3.044522 3.044522 363 +robert 1 30 3.555348 3.555348 567 +head 1 23 3.806662 3.806662 732 +reserv 1 20 3.951244 3.951244 808 +five 1 19 4.007333 4.007333 841 +photograph 1 15 4.248495 4.248495 1056 +galleri 1 13 4.382027 4.382027 1192 +twenti 1 5 5.347108 5.347108 2540 +twilight 1 1 6.957497 6.957497 16230 +grimm 1 1 6.957497 6.957497 16231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..32685fbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +perform 0 143 1.945910 0.000000 74 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +memori 0 101 2.302585 0.000000 139 +peopl 0 96 2.302585 0.000000 132 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +addit 0 74 2.639057 0.000000 228 +sieg 0 69 2.708050 0.000000 260 +would 0 67 2.708050 0.000000 251 +polici 0 64 2.772589 0.000000 279 +abstract 0 62 2.772589 0.000000 276 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +direct 0 57 2.890372 0.000000 316 +three 0 54 2.944439 0.000000 330 +hardwar 0 51 2.995732 0.000000 350 +friend 0 48 3.044522 0.000000 376 +standard 0 48 3.044522 0.000000 365 +could 0 46 3.091042 0.000000 383 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +cach 0 41 3.218876 0.000000 432 +edit 0 42 3.218876 0.000000 418 +realli 0 40 3.258097 0.000000 444 +map 0 39 3.258097 0.000000 452 +origin 0 38 3.295837 0.000000 472 +bibliographi 0 34 3.401197 0.000000 518 +eduoffic 0 33 3.433987 0.000000 531 +scientist 0 31 3.496508 0.000000 560 +travel 0 30 3.555348 0.000000 579 +american 0 27 3.637586 0.000000 634 +symbol 0 27 3.637586 0.000000 620 +subject 0 26 3.688879 0.000000 647 +hill 0 25 3.737670 0.000000 670 +interpret 0 24 3.761200 0.000000 686 +togeth 0 23 3.806662 0.000000 714 +reduc 0 22 3.850148 0.000000 759 +chen 0 21 3.912023 0.000000 791 +avoid 0 21 3.912023 0.000000 799 +hous 0 21 3.912023 0.000000 801 +wrote 0 20 3.951244 0.000000 830 +miss 0 19 4.007333 0.000000 866 +els 0 19 4.007333 0.000000 843 +bershad 0 18 4.060443 0.000000 902 +attend 0 18 4.060443 0.000000 893 +asplo 0 17 4.110874 0.000000 948 +thought 0 17 4.110874 0.000000 945 +chateau 0 16 4.174387 0.000000 997 +took 0 16 4.174387 0.000000 1010 +overhead 0 15 4.248495 0.000000 1035 +countri 0 15 4.248495 0.000000 1059 +levi 0 14 4.317488 0.000000 1093 +conduct 0 14 4.317488 0.000000 1065 +karlin 0 13 4.382027 0.000000 1176 +alan 0 13 4.382027 0.000000 1146 +brad 0 12 4.465908 0.000000 1264 +anna 0 12 4.465908 0.000000 1292 +promot 0 12 4.465908 0.000000 1235 +food 0 12 4.465908 0.000000 1285 +speak 0 12 4.465908 0.000000 1283 +isca 0 11 4.553877 0.000000 1354 +smart 0 11 4.553877 0.000000 1352 +denni 0 11 4.553877 0.000000 1321 +baer 0 11 4.553877 0.000000 1353 +wong 0 9 4.753590 0.000000 1609 +osdi 0 9 4.753590 0.000000 1534 +voelker 0 9 4.753590 0.000000 1557 +said 0 9 4.753590 0.000000 1571 +didn 0 9 4.753590 0.000000 1563 +romer 1 8 4.875197 4.875197 1706 +wayn 0 8 4.875197 0.000000 1738 +judg 0 8 4.875197 0.000000 1644 +interestsi 0 7 5.010635 0.000000 1969 +supportfor 0 7 5.010635 0.000000 1854 +tip 0 7 5.010635 0.000000 1863 +conflict 0 6 5.164786 0.000000 2041 +theproject 0 6 5.164786 0.000000 1981 +wolman 0 6 5.164786 0.000000 2093 +edward 0 6 5.164786 0.000000 2050 +ohlrich 0 5 5.347108 0.000000 2564 +hair 0 5 5.347108 0.000000 2446 +ration 0 5 5.347108 0.000000 2427 +unknown 0 5 5.347108 0.000000 2318 +tuft 0 5 5.347108 0.000000 2575 +couldn 0 4 5.568345 0.000000 2977 +accompani 0 4 5.568345 0.000000 2666 +mappedcach 0 3 5.857933 0.000000 3928 +knee 0 3 5.857933 0.000000 3980 +surgeri 0 3 5.857933 0.000000 3975 +europ 0 3 5.857933 0.000000 3761 +lunch 0 3 5.857933 0.000000 3369 +father 0 3 5.857933 0.000000 3757 +systemswith 0 2 6.263398 0.000000 5342 +eustac 0 2 6.263398 0.000000 5866 +onlinesuperpag 0 2 6.263398 0.000000 5819 +resolutionon 0 2 6.263398 0.000000 5867 +stuffa 0 2 6.263398 0.000000 5999 +rai 0 2 6.263398 0.000000 5915 +ticker 0 2 6.263398 0.000000 5247 +likebrian 0 1 6.957497 0.000000 16232 +andwayn 0 1 6.957497 0.000000 16233 +ofinterpret 0 1 6.957497 0.000000 16234 +rockyhom 0 1 6.957497 0.000000 16235 +lobo 0 1 6.957497 0.000000 16236 +listrandom 0 1 6.957497 0.000000 16237 +limb 0 1 6.957497 0.000000 16238 +arthroscop 0 1 6.957497 0.000000 16239 +wrist 0 1 6.957497 0.000000 16240 +dylansaid 0 1 6.957497 0.000000 16241 +flowbe 0 1 6.957497 0.000000 16242 +beingexperiment 0 1 6.957497 0.000000 16243 +somepictur 0 1 6.957497 0.000000 16244 +eatsomeon 0 1 6.957497 0.000000 16245 +sincer 0 1 6.957497 0.000000 16246 +forexampl 0 1 6.957497 0.000000 16247 +leftth 0 1 6.957497 0.000000 16248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..42f1de42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,158 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +architectur 0 139 1.945910 0.000000 77 +report 0 131 2.079442 0.000000 92 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +memori 1 101 2.302585 2.302585 139 +techniqu 0 99 2.302585 0.000000 138 +follow 0 92 2.397895 0.000000 143 +larg 0 82 2.484907 0.000000 168 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +good 0 77 2.564949 0.000000 200 +master 0 76 2.564949 0.000000 216 +onlin 0 75 2.639057 0.000000 223 +order 0 69 2.708050 0.000000 249 +simul 0 66 2.708050 0.000000 255 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +polici 0 64 2.772589 0.000000 279 +improv 0 62 2.772589 0.000000 289 +descript 0 64 2.772589 0.000000 271 +collect 0 65 2.772589 0.000000 268 +result 0 65 2.772589 0.000000 281 +copi 0 63 2.772589 0.000000 284 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +sever 0 56 2.890372 0.000000 322 +space 0 57 2.890372 0.000000 310 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +cover 0 55 2.944439 0.000000 329 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +investig 0 51 2.995732 0.000000 353 +without 0 50 3.044522 0.000000 370 +standard 0 48 3.044522 0.000000 365 +featur 0 46 3.091042 0.000000 386 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +combin 0 42 3.218876 0.000000 421 +futur 0 41 3.218876 0.000000 427 +small 0 39 3.258097 0.000000 447 +map 0 39 3.258097 0.000000 452 +brian 0 38 3.295837 0.000000 466 +cost 0 37 3.332205 0.000000 480 +bibliographi 0 34 3.401197 0.000000 518 +compon 0 30 3.555348 0.000000 570 +pattern 0 24 3.761200 0.000000 689 +size 0 23 3.806662 0.000000 713 +identifi 0 22 3.850148 0.000000 760 +reduc 0 22 3.850148 0.000000 759 +chen 0 21 3.912023 0.000000 791 +avoid 0 21 3.912023 0.000000 799 +miss 0 19 4.007333 0.000000 866 +region 0 19 4.007333 0.000000 875 +runtim 0 19 4.007333 0.000000 858 +comparison 0 19 4.007333 0.000000 863 +bershad 0 18 4.060443 0.000000 902 +behavior 0 18 4.060443 0.000000 881 +monitor 0 17 4.110874 0.000000 941 +modif 0 17 4.110874 0.000000 913 +asplo 0 17 4.110874 0.000000 948 +overhead 0 15 4.248495 0.000000 1035 +qual 0 15 4.248495 0.000000 1062 +karlin 0 13 4.382027 0.000000 1176 +whose 0 13 4.382027 0.000000 1166 +someon 0 13 4.382027 0.000000 1128 +resolut 0 13 4.382027 0.000000 1172 +promot 0 12 4.465908 0.000000 1235 +overal 0 12 4.465908 0.000000 1254 +anna 0 12 4.465908 0.000000 1292 +isca 0 11 4.553877 0.000000 1354 +denni 0 11 4.553877 0.000000 1321 +alpha 0 11 4.553877 0.000000 1348 +operatingsystem 0 10 4.653960 0.000000 1401 +reli 0 10 4.653960 0.000000 1411 +wong 0 9 4.753590 0.000000 1609 +significantli 0 9 4.753590 0.000000 1508 +osdi 0 9 4.753590 0.000000 1534 +romer 0 8 4.875197 0.000000 1706 +wayn 0 8 4.875197 0.000000 1738 +poor 0 8 4.875197 0.000000 1736 +potenti 0 8 4.875197 0.000000 1690 +delai 0 7 5.010635 0.000000 1848 +larger 0 7 5.010635 0.000000 1875 +reduct 0 7 5.010635 0.000000 1877 +conflict 0 6 5.164786 0.000000 2041 +benefit 0 6 5.164786 0.000000 2213 +ohlrich 0 5 5.347108 0.000000 2564 +resolv 0 4 5.568345 0.000000 2675 +bottleneck 0 4 5.568345 0.000000 2769 +mip 0 4 5.568345 0.000000 2738 +superpag 0 3 5.857933 0.000000 3978 +peoplefaculti 0 3 5.857933 0.000000 3981 +dlee 0 3 5.857933 0.000000 3949 +waynew 0 3 5.857933 0.000000 3982 +reorder 0 3 5.857933 0.000000 3952 +fragment 0 2 6.263398 0.000000 6000 +contigu 0 2 6.263398 0.000000 6001 +warrant 0 2 6.263398 0.000000 5697 +washingtonmemori 0 1 6.957497 0.000000 16249 +researchdepart 0 1 6.957497 0.000000 16250 +sharesth 0 1 6.957497 0.000000 16251 +incur 0 1 6.957497 0.000000 16252 +monitorappl 0 1 6.957497 0.000000 16253 +resolvetlb 0 1 6.957497 0.000000 16254 +tlbi 0 1 6.957497 0.000000 16255 +severalmodern 0 1 6.957497 0.000000 16256 +amultipl 0 1 6.957497 0.000000 16257 +tlbperform 0 1 6.957497 0.000000 16258 +ofwast 0 1 6.957497 0.000000 16259 +todiffer 0 1 6.957497 0.000000 16260 +constructingsuperpag 0 1 6.957497 0.000000 16261 +ofmemori 0 1 6.957497 0.000000 16262 +balancesth 0 1 6.957497 0.000000 16263 +tlbmiss 0 1 6.957497 0.000000 16264 +memorycopi 0 1 6.957497 0.000000 16265 +misspattern 0 1 6.957497 0.000000 16266 +attain 0 1 6.957497 0.000000 16267 +largepag 0 1 6.957497 0.000000 16268 +makea 0 1 6.957497 0.000000 16269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..1f49ace0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +appear 0 78 2.564949 0.000000 210 +sourc 0 77 2.564949 0.000000 201 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +collect 0 65 2.772589 0.000000 268 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +processor 0 54 2.944439 0.000000 335 +execut 0 45 3.135494 0.000000 404 +examin 0 42 3.218876 0.000000 424 +brian 0 38 3.295837 0.000000 466 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +strategi 0 25 3.737670 0.000000 682 +trace 0 25 3.737670 0.000000 677 +interpret 0 24 3.761200 0.000000 686 +util 0 21 3.912023 0.000000 774 +similar 0 21 3.912023 0.000000 771 +portabl 0 20 3.951244 0.000000 819 +safeti 0 20 3.951244 0.000000 817 +basi 0 20 3.951244 0.000000 828 +binari 0 20 3.951244 0.000000 823 +benchmark 0 19 4.007333 0.000000 859 +bershad 0 18 4.060443 0.000000 902 +asplo 0 17 4.110874 0.000000 948 +levi 0 14 4.317488 0.000000 1093 +demand 0 14 4.317488 0.000000 1073 +characterist 0 12 4.465908 0.000000 1257 +baer 0 11 4.553877 0.000000 1353 +perl 0 11 4.553877 0.000000 1332 +denni 0 11 4.553877 0.000000 1321 +perspect 0 10 4.653960 0.000000 1437 +jean 0 10 4.653960 0.000000 1440 +henri 0 10 4.653960 0.000000 1417 +voelker 0 9 4.753590 0.000000 1557 +wong 0 9 4.753590 0.000000 1609 +romer 0 8 4.875197 0.000000 1706 +gain 0 8 4.875197 0.000000 1730 +wayn 0 8 4.875197 0.000000 1738 +instrument 0 7 5.010635 0.000000 1954 +wolman 0 6 5.164786 0.000000 2093 +loup 0 6 5.164786 0.000000 2228 +geoff 0 6 5.164786 0.000000 2124 +eas 0 5 5.347108 0.000000 2267 +alec 0 5 5.347108 0.000000 2563 +rewrit 0 5 5.347108 0.000000 2367 +rocki 0 4 5.568345 0.000000 3048 +etch 0 4 5.568345 0.000000 2755 +increasingli 0 4 5.568345 0.000000 2766 +popular 0 4 5.568345 0.000000 2802 +insight 0 4 5.568345 0.000000 3024 +peoplefaculti 0 3 5.857933 0.000000 3981 +dlee 0 3 5.857933 0.000000 3949 +waynew 0 3 5.857933 0.000000 3982 +microbenchmark 0 2 6.263398 0.000000 5821 +mipsi 0 2 6.263398 0.000000 5882 +papersrom 0 1 6.957497 0.000000 16270 +abstractpostscriptjava 0 1 6.957497 0.000000 16271 +xjava 0 1 6.957497 0.000000 16272 +benchmarkstoolsto 0 1 6.957497 0.000000 16273 +vebeen 0 1 6.957497 0.000000 16274 +yetpublicli 0 1 6.957497 0.000000 16275 +etchhom 0 1 6.957497 0.000000 16276 +documentationproject 0 1 6.957497 0.000000 16277 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..cb15f5ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +imag 0 91 2.397895 0.000000 161 +commun 0 95 2.397895 0.000000 157 +center 0 88 2.397895 0.000000 158 +grade 0 90 2.397895 0.000000 142 +librari 0 87 2.484907 0.000000 181 +help 0 83 2.484907 0.000000 175 +educ 0 86 2.484907 0.000000 191 +intellig 0 72 2.639057 0.000000 225 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +simul 0 66 2.708050 0.000000 255 +order 0 69 2.708050 0.000000 249 +foundat 0 62 2.772589 0.000000 286 +best 0 59 2.833213 0.000000 299 +summer 0 56 2.890372 0.000000 311 +februari 0 54 2.944439 0.000000 328 +basic 0 50 3.044522 0.000000 360 +offer 0 43 3.178054 0.000000 414 +long 0 43 3.178054 0.000000 413 +obtain 0 33 3.433987 0.000000 534 +richard 0 31 3.496508 0.000000 559 +produc 0 30 3.555348 0.000000 572 +arrai 0 27 3.637586 0.000000 627 +departmentunivers 0 24 3.761200 0.000000 711 +recognit 0 23 3.806662 0.000000 723 +director 0 22 3.850148 0.000000 767 +fund 0 21 3.912023 0.000000 805 +facil 0 20 3.951244 0.000000 814 +increas 0 20 3.951244 0.000000 829 +beauti 0 18 4.060443 0.000000 912 +chateau 0 16 4.174387 0.000000 997 +massiv 0 15 4.248495 0.000000 1026 +charact 0 15 4.248495 0.000000 1028 +optic 0 12 4.465908 0.000000 1221 +minor 0 12 4.465908 0.000000 1237 +island 0 11 4.553877 0.000000 1345 +length 0 10 4.653960 0.000000 1400 +juan 0 9 4.753590 0.000000 1580 +extract 0 8 4.875197 0.000000 1728 +roger 0 7 5.010635 0.000000 1892 +northwest 0 7 5.010635 0.000000 1973 +usaoffic 0 6 5.164786 0.000000 2159 +layout 0 6 5.164786 0.000000 2183 +camp 0 5 5.347108 0.000000 2545 +educomput 0 5 5.347108 0.000000 2524 +cellular 0 5 5.347108 0.000000 2433 +girl 0 5 5.347108 0.000000 2410 +snake 0 5 5.347108 0.000000 2281 +radio 0 4 5.568345 0.000000 3025 +bake 0 2 6.263398 0.000000 4468 +scam 0 1 6.957497 0.000000 16278 +splash 0 1 6.957497 0.000000 16279 +rogersrrog 0 1 6.957497 0.000000 16280 +laboratri 0 1 6.957497 0.000000 16281 +systol 0 1 6.957497 0.000000 16282 +morpholog 0 1 6.957497 0.000000 16283 +groundtruth 0 1 6.957497 0.000000 16284 +environment 0 1 6.957497 0.000000 16285 +ncee 0 1 6.957497 0.000000 16286 +ag 0 1 6.957497 0.000000 16287 +corn 0 1 6.957497 0.000000 16288 +jessica 0 1 6.957497 0.000000 16289 +squishi 0 1 6.957497 0.000000 16290 +kuow 0 1 6.957497 0.000000 16291 +stationi 0 1 6.957497 0.000000 16292 +pecan 0 1 6.957497 0.000000 16293 +seattlelast 0 1 6.957497 0.000000 16294 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..e89a40d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +school 0 84 2.484907 0.000000 188 +stuff 0 87 2.484907 0.000000 171 +sieg 0 69 2.708050 0.000000 260 +friend 0 48 3.044522 0.000000 376 +mike 1 24 3.761200 3.761200 703 +chateau 0 16 4.174387 0.000000 997 +usaoffic 0 6 5.164786 0.000000 2159 +salisburysalisbur 0 1 6.957497 0.000000 16295 +lifehistori 0 1 6.957497 0.000000 16296 +vitacool 0 1 6.957497 0.000000 16297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..5bd75b14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +first 0 140 1.945910 0.000000 71 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +version 0 113 2.197225 0.000000 122 +specif 0 106 2.197225 0.000000 106 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +need 0 98 2.302585 0.000000 135 +technic 0 100 2.302585 0.000000 140 +user 0 104 2.302585 0.000000 137 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +control 0 82 2.484907 0.000000 164 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +issu 0 78 2.564949 0.000000 211 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +servic 0 72 2.639057 0.000000 236 +write 0 72 2.639057 0.000000 222 +multimedia 0 68 2.708050 0.000000 258 +integr 0 67 2.708050 0.000000 245 +abstract 0 62 2.772589 0.000000 276 +januari 0 62 2.772589 0.000000 264 +march 0 61 2.833213 0.000000 295 +best 0 59 2.833213 0.000000 299 +direct 0 57 2.890372 0.000000 316 +extens 0 53 2.944439 0.000000 340 +talk 0 53 2.944439 0.000000 336 +processor 0 54 2.944439 0.000000 335 +sampl 0 53 2.944439 0.000000 339 +right 0 48 3.044522 0.000000 363 +principl 0 48 3.044522 0.000000 357 +frequent 0 49 3.044522 0.000000 367 +adapt 0 46 3.091042 0.000000 387 +third 0 43 3.178054 0.000000 412 +review 0 42 3.218876 0.000000 425 +music 0 42 3.218876 0.000000 436 +slide 0 38 3.295837 0.000000 467 +industri 0 38 3.295837 0.000000 464 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +winter 0 36 3.367296 0.000000 500 +tech 0 35 3.401197 0.000000 515 +post 0 35 3.401197 0.000000 505 +independ 0 32 3.465736 0.000000 548 +platform 0 29 3.583519 0.000000 591 +american 0 27 3.637586 0.000000 634 +quit 0 27 3.637586 0.000000 633 +arrai 0 27 3.637586 0.000000 627 +disk 0 22 3.850148 0.000000 747 +similar 0 21 3.912023 0.000000 771 +reserv 0 20 3.951244 0.000000 808 +safeti 0 20 3.951244 0.000000 817 +longer 0 20 3.951244 0.000000 816 +histori 0 19 4.007333 0.000000 853 +boston 0 19 4.007333 0.000000 862 +bershad 0 18 4.060443 0.000000 902 +protect 0 17 4.110874 0.000000 935 +modern 0 16 4.174387 0.000000 966 +match 0 16 4.174387 0.000000 965 +diego 0 16 4.174387 0.000000 992 +fourth 0 16 4.174387 0.000000 999 +spin 0 14 4.317488 0.000000 1121 +rank 0 14 4.317488 0.000000 1063 +mellon 0 13 4.382027 0.000000 1179 +usenix 0 12 4.465908 0.000000 1240 +rest 0 12 4.465908 0.000000 1259 +carnegi 0 12 4.465908 0.000000 1260 +statement 0 11 4.553877 0.000000 1313 +island 0 11 4.553877 0.000000 1345 +rich 0 10 4.653960 0.000000 1396 +sosp 0 10 4.653960 0.000000 1416 +mountain 0 10 4.653960 0.000000 1456 +santa 0 10 4.653960 0.000000 1441 +inter 0 9 4.753590 0.000000 1530 +modula 0 9 4.753590 0.000000 1613 +osdi 0 9 4.753590 0.000000 1534 +capac 0 8 4.875197 0.000000 1740 +mach 0 8 4.875197 0.000000 1669 +ride 0 8 4.875197 0.000000 1741 +isol 0 8 4.875197 0.000000 1663 +sigop 0 8 4.875197 0.000000 1727 +european 0 8 4.875197 0.000000 1763 +cultur 0 7 5.010635 0.000000 1951 +migrat 0 7 5.010635 0.000000 1851 +centuri 0 7 5.010635 0.000000 1935 +microprocessor 0 7 5.010635 0.000000 1808 +trend 0 7 5.010635 0.000000 1842 +fifth 0 7 5.010635 0.000000 1931 +sixth 0 7 5.010635 0.000000 1917 +band 0 6 5.164786 0.000000 2198 +unpublish 0 6 5.164786 0.000000 2226 +usag 0 6 5.164786 0.000000 2209 +favor 0 5 5.347108 0.000000 2414 +panel 0 5 5.347108 0.000000 2463 +savag 0 4 5.568345 0.000000 2777 +microkernel 0 4 5.568345 0.000000 3047 +afraid 0 4 5.568345 0.000000 3053 +gradual 0 4 5.568345 0.000000 2997 +witha 0 4 5.568345 0.000000 2617 +firm 0 4 5.568345 0.000000 2684 +peer 0 4 5.568345 0.000000 2742 +andimplement 0 4 5.568345 0.000000 3029 +redund 0 4 5.568345 0.000000 2839 +stefan 0 3 5.857933 0.000000 3921 +wcsss 0 3 5.857933 0.000000 3956 +caught 0 3 5.857933 0.000000 3465 +irrelev 0 3 5.857933 0.000000 3823 +inconveni 0 3 5.857933 0.000000 3866 +distract 0 3 5.857933 0.000000 3945 +stillmaintain 0 3 5.857933 0.000000 3964 +copper 0 3 5.857933 0.000000 3536 +hoto 0 3 5.857933 0.000000 3577 +orca 0 3 5.857933 0.000000 3578 +tucson 0 2 6.263398 0.000000 5883 +fool 0 2 6.263398 0.000000 5353 +ofappl 0 2 6.263398 0.000000 6002 +whichsupport 0 2 6.263398 0.000000 6003 +monterei 0 2 6.263398 0.000000 4362 +wwo 0 2 6.263398 0.000000 5812 +export 0 2 6.263398 0.000000 5689 +pittsburghfor 0 1 6.957497 0.000000 16298 +mnow 0 1 6.957497 0.000000 16299 +strongbackground 0 1 6.957497 0.000000 16300 +trash 0 1 6.957497 0.000000 16301 +tocqeuvil 0 1 6.957497 0.000000 16302 +tiresom 0 1 6.957497 0.000000 16303 +exercisepolit 0 1 6.957497 0.000000 16304 +tocurr 0 1 6.957497 0.000000 16305 +merri 0 1 6.957497 0.000000 16306 +onan 0 1 6.957497 0.000000 16307 +projectsspinspin 0 1 6.957497 0.000000 16308 +omnifemtokernel 0 1 6.957497 0.000000 16309 +writingspin 0 1 6.957497 0.000000 16310 +napa 0 1 6.957497 0.000000 16311 +timer 0 1 6.957497 0.000000 16312 +hikingthi 0 1 6.957497 0.000000 16313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..cd654326 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +david 0 71 2.639057 0.000000 232 +juli 0 60 2.833213 0.000000 305 +revis 0 26 3.688879 0.000000 640 +sean 1 8 4.875197 4.875197 1705 +sandi 1 4 5.568345 5.568345 2765 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..be243470 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +seattl 0 120 2.079442 0.000000 103 +postscript 0 131 2.079442 0.000000 90 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +overview 0 56 2.890372 0.000000 323 +better 0 45 3.135494 0.000000 401 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +richard 1 31 3.496508 3.496508 559 +famili 0 23 3.806662 0.000000 735 +half 0 21 3.912023 0.000000 776 +washingtonbox 0 13 4.382027 0.000000 1200 +ski 0 10 4.653960 0.000000 1471 +softbal 0 9 4.753590 0.000000 1594 +softbot 0 7 5.010635 0.000000 1974 +amus 0 5 5.347108 0.000000 2366 +racquetbal 0 4 5.568345 0.000000 3052 +biographi 0 3 5.857933 0.000000 3658 +brute 0 2 6.263398 0.000000 5892 +bicycl 0 2 6.263398 0.000000 5950 +segal 1 1 6.957497 6.957497 16314 +segaldepart 0 1 6.957497 0.000000 16315 +archeri 0 1 6.957497 0.000000 16316 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..46324087 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +phone 0 175 1.791759 0.000000 45 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +spring 0 131 2.079442 0.000000 88 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +pictur 0 89 2.397895 0.000000 160 +activ 0 84 2.484907 0.000000 182 +school 0 84 2.484907 0.000000 188 +solut 0 82 2.484907 0.000000 162 +contain 0 81 2.484907 0.000000 174 +complet 0 77 2.564949 0.000000 208 +come 0 78 2.564949 0.000000 202 +sourc 0 77 2.564949 0.000000 201 +resum 0 79 2.564949 0.000000 217 +line 0 75 2.639057 0.000000 231 +receiv 0 66 2.708050 0.000000 244 +main 0 67 2.708050 0.000000 256 +sieg 0 69 2.708050 0.000000 260 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +hardwar 0 51 2.995732 0.000000 350 +finger 0 52 2.995732 0.000000 354 +date 0 51 2.995732 0.000000 344 +without 0 50 3.044522 0.000000 370 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +even 0 45 3.135494 0.000000 393 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +linear 0 41 3.218876 0.000000 431 +field 0 37 3.332205 0.000000 482 +expect 0 37 3.332205 0.000000 484 +print 0 34 3.401197 0.000000 503 +return 0 34 3.401197 0.000000 502 +weather 0 28 3.610918 0.000000 618 +bookmark 0 26 3.688879 0.000000 639 +toward 0 25 3.737670 0.000000 668 +sometim 0 24 3.761200 0.000000 696 +honor 0 23 3.806662 0.000000 729 +sort 0 22 3.850148 0.000000 738 +born 0 21 3.912023 0.000000 798 +unit 0 21 3.912023 0.000000 779 +els 0 19 4.007333 0.000000 843 +comparison 0 19 4.007333 0.000000 863 +north 0 19 4.007333 0.000000 873 +germani 0 17 4.110874 0.000000 946 +bachelor 0 17 4.110874 0.000000 957 +qual 0 15 4.248495 0.000000 1062 +squar 0 14 4.317488 0.000000 1082 +warn 0 14 4.317488 0.000000 1068 +excit 0 11 4.553877 0.000000 1329 +moment 0 11 4.553877 0.000000 1379 +didn 0 9 4.753590 0.000000 1563 +exact 0 9 4.753590 0.000000 1509 +reduct 0 7 5.010635 0.000000 1877 +byte 0 6 5.164786 0.000000 2108 +indiana 0 6 5.164786 0.000000 2057 +trail 0 6 5.164786 0.000000 2071 +fewer 0 6 5.164786 0.000000 2074 +fals 0 4 5.568345 0.000000 2861 +raft 0 4 5.568345 0.000000 3060 +crazi 0 4 5.568345 0.000000 2822 +shouldn 0 4 5.568345 0.000000 2606 +stefan 0 3 5.857933 0.000000 3921 +char 0 2 6.263398 0.000000 4716 +berg 0 2 6.263398 0.000000 4970 +reif 0 2 6.263398 0.000000 5015 +diploma 0 2 6.263398 0.000000 5990 +bloomington 0 2 6.263398 0.000000 5034 +itin 0 2 6.263398 0.000000 5992 +printf 0 1 6.957497 0.000000 16317 +cologn 0 1 6.957497 0.000000 16318 +putchar 0 1 6.957497 0.000000 16319 +bergstefan 0 1 6.957497 0.000000 16320 +sgberg 0 1 6.957497 0.000000 16321 +mittler 0 1 6.957497 0.000000 16322 +thgrade 0 1 6.957497 0.000000 16323 +schillergymnasium 0 1 6.957497 0.000000 16324 +statesto 0 1 6.957497 0.000000 16325 +distinctionin 0 1 6.957497 0.000000 16326 +fromindiana 0 1 6.957497 0.000000 16327 +momenth 0 1 6.957497 0.000000 16328 +thiscenturi 0 1 6.957497 0.000000 16329 +yourselfsometh 0 1 6.957497 0.000000 16330 +particularsolut 0 1 6.957497 0.000000 16331 +sall 0 1 6.957497 0.000000 16332 +carriag 0 1 6.957497 0.000000 16333 +inpostscript 0 1 6.957497 0.000000 16334 +andtex 0 1 6.957497 0.000000 16335 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..dc309b58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +make 0 111 2.197225 0.000000 120 +pictur 1 89 2.397895 2.397895 160 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +thing 1 84 2.484907 2.484907 189 +info 0 85 2.484907 0.000000 176 +wide 0 84 2.484907 0.000000 185 +differ 0 66 2.708050 0.000000 253 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +laboratori 0 63 2.772589 0.000000 292 +plan 0 65 2.772589 0.000000 272 +browser 0 56 2.890372 0.000000 313 +get 0 46 3.091042 0.000000 380 +done 0 47 3.091042 0.000000 381 +third 0 43 3.178054 0.000000 412 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +anim 0 31 3.496508 0.000000 557 +travel 0 30 3.555348 0.000000 579 +daili 0 24 3.761200 0.000000 706 +grad 0 20 3.951244 0.000000 837 +left 0 19 4.007333 0.000000 851 +lower 0 18 4.060443 0.000000 886 +lot 0 18 4.060443 0.000000 889 +scene 0 14 4.317488 0.000000 1114 +island 0 11 4.553877 0.000000 1345 +siggraph 0 8 4.875197 0.000000 1773 +corner 0 7 5.010635 0.000000 1909 +pixel 0 4 5.568345 0.000000 2831 +shortcut 0 3 5.857933 0.000000 3932 +ward 0 2 6.263398 0.000000 4506 +hereat 0 2 6.263398 0.000000 5048 +shadegreet 0 1 6.957497 0.000000 16336 +salut 0 1 6.957497 0.000000 16337 +dubcs 0 1 6.957497 0.000000 16338 +renderingof 0 1 6.957497 0.000000 16339 +walkthruproject 0 1 6.957497 0.000000 16340 +amonglot 0 1 6.957497 0.000000 16341 +paperdescrib 0 1 6.957497 0.000000 16342 +thepictur 0 1 6.957497 0.000000 16343 +aspectsof 0 1 6.957497 0.000000 16344 +thegraph 0 1 6.957497 0.000000 16345 +scrunch 0 1 6.957497 0.000000 16346 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..a824a83c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +summari 0 73 2.639057 0.000000 237 +prof 0 64 2.772589 0.000000 273 +januari 0 62 2.772589 0.000000 264 +pointer 0 48 3.044522 0.000000 368 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +request 0 26 3.688879 0.000000 635 +ofwashington 0 22 3.850148 0.000000 766 +upon 0 16 4.174387 0.000000 978 +andengin 0 4 5.568345 0.000000 3042 +shun 1 2 6.263398 6.263398 4533 +leung 1 2 6.263398 6.263398 4534 +johnzahorjan 0 2 6.263398 0.000000 6004 +leungshun 0 1 6.957497 0.000000 16347 +shuntak 0 1 6.957497 0.000000 16348 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..82c34df3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +relat 0 139 1.945910 0.000000 68 +studi 0 120 2.079442 0.000000 91 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +mathemat 0 108 2.197225 0.000000 123 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +info 0 85 2.484907 0.000000 176 +interfac 0 79 2.564949 0.000000 209 +involv 0 71 2.639057 0.000000 227 +name 0 72 2.639057 0.000000 220 +degre 0 69 2.708050 0.000000 259 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +origin 0 38 3.295837 0.000000 472 +mean 0 37 3.332205 0.000000 477 +dissert 0 32 3.465736 0.000000 549 +quit 0 27 3.637586 0.000000 633 +notic 0 25 3.737670 0.000000 675 +departmentunivers 0 24 3.761200 0.000000 711 +ofwashington 0 22 3.850148 0.000000 766 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +less 0 18 4.060443 0.000000 892 +universityof 0 15 4.248495 0.000000 1061 +anywai 0 15 4.248495 0.000000 1047 +neat 0 12 4.465908 0.000000 1263 +virginia 0 8 4.875197 0.000000 1659 +studentcomput 0 7 5.010635 0.000000 1963 +foreign 0 7 5.010635 0.000000 1919 +smaller 0 7 5.010635 0.000000 1874 +hunt 0 7 5.010635 0.000000 1798 +bug 0 7 5.010635 0.000000 1801 +slate 0 6 5.164786 0.000000 2021 +sciencedepart 0 6 5.164786 0.000000 2172 +haven 0 4 5.568345 0.000000 3037 +asian 0 3 5.857933 0.000000 3598 +heavili 0 3 5.857933 0.000000 3572 +groupand 0 3 5.857933 0.000000 3873 +shuichi 1 2 6.263398 6.263398 4498 +myqual 0 2 6.263398 0.000000 6005 +degreein 0 2 6.263398 0.000000 5116 +koga 0 1 6.957497 0.000000 16349 +skoga 0 1 6.957497 0.000000 16350 +bynow 0 1 6.957497 0.000000 16351 +alsoheavili 0 1 6.957497 0.000000 16352 +andgovern 0 1 6.957497 0.000000 16353 +alic 0 1 6.957497 0.000000 16354 +destroi 0 1 6.957497 0.000000 16355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..17eaa978 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +schedul 0 119 2.079442 0.000000 85 +seattl 0 120 2.079442 0.000000 103 +site 0 106 2.197225 0.000000 119 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +dept 0 64 2.772589 0.000000 291 +previou 0 62 2.772589 0.000000 290 +experi 0 64 2.772589 0.000000 283 +profession 0 51 2.995732 0.000000 345 +littl 0 39 3.258097 0.000000 454 +photo 0 31 3.496508 0.000000 561 +busi 0 21 3.912023 0.000000 784 +vlsi 0 21 3.912023 0.000000 795 +weekli 0 17 4.110874 0.000000 919 +circuit 0 13 4.382027 0.000000 1131 +galleri 0 13 4.382027 0.000000 1192 +chao 0 8 4.875197 0.000000 1753 +patent 0 5 5.347108 0.000000 2574 +soha 1 2 6.263398 6.263398 6006 +hassoun 0 2 6.263398 0.000000 6007 +retim 0 2 6.263398 0.000000 6008 +hassounit 0 1 6.957497 0.000000 16356 +whoturn 0 1 6.957497 0.000000 16357 +onarchitectur 0 1 6.957497 0.000000 16358 +carlebel 0 1 6.957497 0.000000 16359 +deede 0 1 6.957497 0.000000 16360 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..98cbda84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +contact 0 153 1.791759 0.000000 59 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +hall 0 146 1.945910 0.000000 65 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +report 0 131 2.079442 0.000000 92 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +search 0 95 2.397895 0.000000 155 +associ 0 93 2.397895 0.000000 151 +control 0 82 2.484907 0.000000 164 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +sieg 0 69 2.708050 0.000000 260 +evalu 0 64 2.772589 0.000000 266 +particular 0 51 2.995732 0.000000 352 +right 0 48 3.044522 0.000000 363 +quarter 0 47 3.091042 0.000000 389 +done 0 47 3.091042 0.000000 381 +favorit 1 44 3.135494 3.135494 410 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +music 0 42 3.218876 0.000000 436 +seminar 0 38 3.295837 0.000000 470 +procedur 0 36 3.367296 0.000000 488 +random 0 34 3.401197 0.000000 511 +bookmark 0 26 3.688879 0.000000 639 +sport 0 25 3.737670 0.000000 683 +sort 0 22 3.850148 0.000000 738 +fund 0 21 3.912023 0.000000 805 +corpor 0 21 3.912023 0.000000 802 +tenni 0 20 3.951244 0.000000 838 +chateau 0 16 4.174387 0.000000 997 +cognit 0 16 4.174387 0.000000 986 +action 0 15 4.248495 0.000000 1038 +track 0 15 4.248495 0.000000 1029 +psycholog 0 15 4.248495 0.000000 1054 +convent 0 14 4.317488 0.000000 1072 +philosophi 0 13 4.382027 0.000000 1167 +danc 0 12 4.465908 0.000000 1278 +food 0 12 4.465908 0.000000 1285 +emploi 0 12 4.465908 0.000000 1284 +remov 0 12 4.465908 0.000000 1225 +island 0 11 4.553877 0.000000 1345 +peter 0 11 4.553877 0.000000 1316 +french 0 9 4.753590 0.000000 1511 +volleybal 0 9 4.753590 0.000000 1598 +soccer 0 8 4.875197 0.000000 1752 +guggenheim 0 8 4.875197 0.000000 1759 +simon 0 8 4.875197 0.000000 1697 +coast 0 8 4.875197 0.000000 1746 +softbot 0 7 5.010635 0.000000 1974 +squash 0 6 5.164786 0.000000 2223 +rock 0 6 5.164786 0.000000 2164 +annex 0 5 5.347108 0.000000 2572 +sail 0 5 5.347108 0.000000 2571 +east 0 5 5.347108 0.000000 2472 +phil 0 5 5.347108 0.000000 2419 +oracl 0 4 5.568345 0.000000 2823 +swing 0 4 5.568345 0.000000 2887 +floyd 0 4 5.568345 0.000000 2682 +sujai 0 3 5.857933 0.000000 3960 +parekh 0 3 5.857933 0.000000 3961 +ballroom 0 3 5.857933 0.000000 3983 +spud 0 2 6.263398 0.000000 6009 +chicken 0 2 6.263398 0.000000 5851 +tango 0 2 6.263398 0.000000 6010 +salsa 0 2 6.263398 0.000000 5984 +strait 0 2 6.263398 0.000000 5980 +genesi 0 2 6.263398 0.000000 6011 +gabriel 0 2 6.263398 0.000000 5029 +simultaneousmultithread 0 1 6.957497 0.000000 16361 +tomultithread 0 1 6.957497 0.000000 16362 +controlsystem 0 1 6.957497 0.000000 16363 +patio 0 1 6.957497 0.000000 16364 +workspac 0 1 6.957497 0.000000 16365 +stottler 0 1 6.957497 0.000000 16366 +henk 0 1 6.957497 0.000000 16367 +oondhiu 0 1 6.957497 0.000000 16368 +mango 0 1 6.957497 0.000000 16369 +phad 0 1 6.957497 0.000000 16370 +thai 0 1 6.957497 0.000000 16371 +kung 0 1 6.957497 0.000000 16372 +beverag 0 1 6.957497 0.000000 16373 +screwdriv 0 1 6.957497 0.000000 16374 +scotch 0 1 6.957497 0.000000 16375 +ic 0 1 6.957497 0.000000 16376 +dire 0 1 6.957497 0.000000 16377 +pink 0 1 6.957497 0.000000 16378 +collin 0 1 6.957497 0.000000 16379 +petti 0 1 6.957497 0.000000 16380 +sparekh 0 1 6.957497 0.000000 16381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..1d7da619 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +address 0 170 1.791759 0.000000 62 +machin 0 129 2.079442 0.000000 95 +peopl 0 96 2.302585 0.000000 132 +academ 0 82 2.484907 0.000000 178 +name 0 72 2.639057 0.000000 220 +improv 0 62 2.772589 0.000000 289 +colleg 0 61 2.833213 0.000000 300 +friend 0 48 3.044522 0.000000 376 +compani 0 41 3.218876 0.000000 423 +live 0 40 3.258097 0.000000 451 +power 0 30 3.555348 0.000000 573 +sport 0 25 3.737670 0.000000 683 +famili 0 23 3.806662 0.000000 735 +almost 0 22 3.850148 0.000000 742 +hous 0 21 3.912023 0.000000 801 +speed 1 18 4.060443 4.060443 911 +ultim 0 17 4.110874 0.000000 943 +drive 0 15 4.248495 0.000000 1052 +comic 0 14 4.317488 0.000000 1103 +food 0 12 4.465908 0.000000 1285 +mari 0 12 4.465908 0.000000 1266 +lake 0 11 4.553877 0.000000 1373 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +ski 0 10 4.653960 0.000000 1471 +drink 0 9 4.753590 0.000000 1607 +softbal 0 9 4.753590 0.000000 1594 +utah 0 9 4.753590 0.000000 1585 +erik 1 8 4.875197 4.875197 1701 +lewi 0 8 4.875197 0.000000 1700 +star 0 8 4.875197 0.000000 1717 +brain 0 8 4.875197 0.000000 1638 +babylon 0 8 4.875197 0.000000 1731 +cultur 0 7 5.010635 0.000000 1951 +fish 0 6 5.164786 0.000000 2207 +selberg 0 5 5.347108 0.000000 2441 +salt 0 5 5.347108 0.000000 2413 +bean 0 4 5.568345 0.000000 2968 +lara 0 3 5.857933 0.000000 3914 +disc 0 2 6.263398 0.000000 5626 +spud 0 2 6.263398 0.000000 6009 +raquetbal 0 2 6.263398 0.000000 6012 +pepper 0 2 6.263398 0.000000 6013 +war 0 2 6.263398 0.000000 5969 +toon 0 2 6.263398 0.000000 4120 +fishcam 0 1 6.957497 0.000000 16382 +memorialhappi 0 1 6.957497 0.000000 16383 +kay 0 1 6.957497 0.000000 16384 +pasti 0 1 6.957497 0.000000 16385 +ur 0 1 6.957497 0.000000 16386 +pro 0 1 6.957497 0.000000 16387 +wedgwood 0 1 6.957497 0.000000 16388 +diet 0 1 6.957497 0.000000 16389 +roast 0 1 6.957497 0.000000 16390 +bagel 0 1 6.957497 0.000000 16391 +racer 0 1 6.957497 0.000000 16392 +tini 0 1 6.957497 0.000000 16393 +pinki 0 1 6.957497 0.000000 16394 +phantom 0 1 6.957497 0.000000 16395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..4dffe01b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +seattl 0 120 2.079442 0.000000 103 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +simul 1 66 2.708050 2.708050 255 +goal 0 66 2.708050 0.000000 250 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +result 0 65 2.772589 0.000000 281 +plai 0 60 2.833213 0.000000 307 +colleg 0 61 2.833213 0.000000 300 +think 0 57 2.890372 0.000000 314 +processor 0 54 2.944439 0.000000 335 +visual 0 48 3.044522 0.000000 372 +quarter 0 47 3.091042 0.000000 389 +anoth 0 45 3.135494 0.000000 408 +late 0 40 3.258097 0.000000 439 +littl 0 39 3.258097 0.000000 454 +movi 0 40 3.258097 0.000000 459 +must 0 40 3.258097 0.000000 442 +game 0 36 3.367296 0.000000 498 +independ 0 32 3.465736 0.000000 548 +quit 0 27 3.637586 0.000000 633 +enjoi 0 26 3.688879 0.000000 660 +primari 0 25 3.737670 0.000000 669 +cooper 0 22 3.850148 0.000000 757 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +watch 0 21 3.912023 0.000000 789 +divis 0 21 3.912023 0.000000 803 +mostli 0 19 4.007333 0.000000 869 +exercis 0 19 4.007333 0.000000 842 +left 0 19 4.007333 0.000000 851 +listen 0 18 4.060443 0.000000 907 +front 0 13 4.382027 0.000000 1154 +came 0 13 4.382027 0.000000 1197 +unfortun 0 13 4.382027 0.000000 1170 +weight 0 12 4.465908 0.000000 1204 +hang 0 9 4.753590 0.000000 1499 +drink 0 9 4.753590 0.000000 1607 +swim 0 9 4.753590 0.000000 1599 +router 0 8 4.875197 0.000000 1772 +soccer 0 8 4.875197 0.000000 1752 +sung 0 6 5.164786 0.000000 2075 +superscalar 0 6 5.164786 0.000000 2082 +seen 0 6 5.164786 0.000000 2202 +yeah 0 6 5.164786 0.000000 2195 +ta 0 4 5.568345 0.000000 3058 +choi 0 4 5.568345 0.000000 2732 +astronomi 0 3 5.857933 0.000000 3974 +comfort 0 3 5.857933 0.000000 3136 +wine 0 3 5.857933 0.000000 3895 +knee 0 3 5.857933 0.000000 3980 +choiwelcom 0 2 6.263398 0.000000 5727 +ironman 0 2 6.263398 0.000000 4226 +vegetarian 0 2 6.263398 0.000000 5902 +season 0 2 6.263398 0.000000 4872 +scrub 0 1 6.957497 0.000000 16396 +thehomepag 0 1 6.957497 0.000000 16397 +ofsung 0 1 6.957497 0.000000 16398 +eunchoi 0 1 6.957497 0.000000 16399 +myschoollifemi 0 1 6.957497 0.000000 16400 +zplcompil 0 1 6.957497 0.000000 16401 +beenspend 0 1 6.957497 0.000000 16402 +communicationgener 0 1 6.957497 0.000000 16403 +architechtur 0 1 6.957497 0.000000 16404 +communicationlibrari 0 1 6.957497 0.000000 16405 +programson 0 1 6.957497 0.000000 16406 +nodeperform 0 1 6.957497 0.000000 16407 +alsobeen 0 1 6.957497 0.000000 16408 +chaosrout 0 1 6.957497 0.000000 16409 +thatexperi 0 1 6.957497 0.000000 16410 +inzpl 0 1 6.957497 0.000000 16411 +myjunior 0 1 6.957497 0.000000 16412 +dinner 0 1 6.957497 0.000000 16413 +samewithout 0 1 6.957497 0.000000 16414 +twosoccerteam 0 1 6.957497 0.000000 16415 +cousin 0 1 6.957497 0.000000 16416 +recdivis 0 1 6.957497 0.000000 16417 +andcoop 0 1 6.957497 0.000000 16418 +sacrifiedmi 0 1 6.957497 0.000000 16419 +usualstep 0 1 6.957497 0.000000 16420 +aerobicsclass 0 1 6.957497 0.000000 16421 +trainingclass 0 1 6.957497 0.000000 16422 +abit 0 1 6.957497 0.000000 16423 +shakespear 0 1 6.957497 0.000000 16424 +publictelevis 0 1 6.957497 0.000000 16425 +classicalmus 0 1 6.957497 0.000000 16426 +myotherlif 0 1 6.957497 0.000000 16427 +sungeun 0 1 6.957497 0.000000 16428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..051ff9d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +support 0 132 1.945910 0.000000 83 +schedul 0 119 2.079442 0.000000 85 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +world 0 115 2.197225 0.000000 126 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +real 0 93 2.397895 0.000000 144 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +complet 0 77 2.564949 0.000000 208 +advisor 0 51 2.995732 0.000000 355 +run 0 51 2.995732 0.000000 347 +visual 0 48 3.044522 0.000000 372 +execut 0 45 3.135494 0.000000 404 +workstat 0 37 3.332205 0.000000 479 +secur 0 30 3.555348 0.000000 577 +measur 0 28 3.610918 0.000000 609 +decis 0 23 3.806662 0.000000 728 +runtim 0 19 4.007333 0.000000 858 +partial 0 18 4.060443 0.000000 900 +engineeringunivers 0 17 4.110874 0.000000 959 +washingtonbox 0 13 4.382027 0.000000 1200 +characterist 0 12 4.465908 0.000000 1257 +multiprogram 0 6 5.164786 0.000000 2010 +nguyen 0 3 5.857933 0.000000 3290 +andparallel 0 2 6.263398 0.000000 6014 +johnzahorjan 0 2 6.263398 0.000000 6004 +soft 0 2 6.263398 0.000000 5072 +idl 0 2 6.263398 0.000000 4256 +ofappl 0 2 6.263398 0.000000 6002 +tominim 0 2 6.263398 0.000000 5436 +multiprocessorsenviron 0 1 6.957497 0.000000 16429 +frommi 0 1 6.957497 0.000000 16430 +timeappl 0 1 6.957497 0.000000 16431 +innow 0 1 6.957497 0.000000 16432 +uniprogram 0 1 6.957497 0.000000 16433 +goodglob 0 1 6.957497 0.000000 16434 +cvpublic 0 1 6.957497 0.000000 16435 +worldvietnameseresourc 0 1 6.957497 0.000000 16436 +netcyclingplayground 0 1 6.957497 0.000000 16437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..5a33ba8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +click 0 142 1.945910 0.000000 78 +part 0 98 2.302585 0.000000 129 +homepag 0 93 2.397895 0.000000 148 +html 0 75 2.639057 0.000000 235 +browser 0 56 2.890372 0.000000 313 +standard 0 48 3.044522 0.000000 365 +even 0 45 3.135494 0.000000 393 +frame 1 24 3.761200 3.761200 684 +yellow 0 9 4.753590 0.000000 1601 +turkei 0 4 5.568345 0.000000 2914 +tian 0 3 5.857933 0.000000 3680 +homepageyour 0 1 6.957497 0.000000 16438 +rusti 0 1 6.957497 0.000000 16439 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..6d294593 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +address 0 170 1.791759 0.000000 62 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +object 1 138 1.945910 1.945910 79 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +databas 0 122 2.079442 0.000000 86 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +intern 0 108 2.197225 0.000000 128 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +octob 0 89 2.397895 0.000000 156 +follow 0 92 2.397895 0.000000 143 +proceed 0 93 2.397895 0.000000 152 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +organ 0 65 2.772589 0.000000 265 +creat 0 63 2.772589 0.000000 277 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +advisor 0 51 2.995732 0.000000 355 +profession 0 51 2.995732 0.000000 345 +past 0 42 3.218876 0.000000 428 +multipl 0 39 3.258097 0.000000 453 +close 0 38 3.295837 0.000000 465 +singl 0 34 3.401197 0.000000 510 +global 0 34 3.401197 0.000000 520 +measur 0 28 3.610918 0.000000 609 +except 0 28 3.610918 0.000000 607 +full 0 28 3.610918 0.000000 615 +proc 0 26 3.688879 0.000000 649 +jeff 0 25 3.737670 0.000000 673 +primari 0 25 3.737670 0.000000 669 +handl 0 24 3.761200 0.000000 685 +basi 0 20 3.951244 0.000000 828 +benchmark 0 19 4.007333 0.000000 859 +mostli 0 19 4.007333 0.000000 869 +behavior 0 18 4.060443 0.000000 881 +fourth 0 16 4.174387 0.000000 999 +across 0 16 4.174387 0.000000 974 +levi 0 14 4.317488 0.000000 1093 +opportun 0 13 4.382027 0.000000 1161 +workload 0 12 4.465908 0.000000 1210 +infrastructur 0 12 4.465908 0.000000 1234 +hank 0 12 4.465908 0.000000 1253 +career 0 12 4.465908 0.000000 1287 +persist 0 11 4.553877 0.000000 1367 +architect 0 8 4.875197 0.000000 1624 +character 0 8 4.875197 0.000000 1767 +oop 0 8 4.875197 0.000000 1778 +oopsla 0 6 5.164786 0.000000 2221 +spare 0 6 5.164786 0.000000 2177 +tiwari 0 5 5.347108 0.000000 2385 +opal 0 4 5.568345 0.000000 3057 +chase 0 4 5.568345 0.000000 2897 +narasayya 0 4 5.568345 0.000000 3065 +boe 0 3 5.857933 0.000000 3318 +addendum 0 3 5.857933 0.000000 3150 +ashutosh 0 2 6.263398 0.000000 5966 +projectsopali 0 1 6.957497 0.000000 16440 +thisexperi 0 1 6.957497 0.000000 16441 +distrbut 0 1 6.957497 0.000000 16442 +ecoop 0 1 6.957497 0.000000 16443 +bosch 0 1 6.957497 0.000000 16444 +messeng 0 1 6.957497 0.000000 16445 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..007c5c18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +avail 0 169 1.791759 0.000000 48 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +search 0 95 2.397895 0.000000 155 +pictur 0 89 2.397895 0.000000 160 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +onlin 0 75 2.639057 0.000000 223 +java 0 70 2.708050 0.000000 248 +share 0 59 2.833213 0.000000 304 +plai 0 60 2.833213 0.000000 307 +simpl 0 60 2.833213 0.000000 298 +found 0 53 2.944439 0.000000 337 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +anoth 0 45 3.135494 0.000000 408 +game 0 36 3.367296 0.000000 498 +copyright 0 36 3.367296 0.000000 495 +platform 0 29 3.583519 0.000000 591 +linux 1 27 3.637586 3.637586 631 +great 0 27 3.637586 0.000000 626 +known 0 24 3.761200 0.000000 702 +honor 0 23 3.806662 0.000000 729 +sort 0 22 3.850148 0.000000 738 +applet 0 20 3.951244 0.000000 827 +histori 0 19 4.007333 0.000000 853 +seek 0 17 4.110874 0.000000 954 +attempt 0 17 4.110874 0.000000 917 +qual 0 15 4.248495 0.000000 1062 +pretti 0 13 4.382027 0.000000 1191 +employ 0 12 4.465908 0.000000 1291 +gain 0 8 4.875197 0.000000 1730 +dead 0 7 5.010635 0.000000 1840 +apart 0 7 5.010635 0.000000 1936 +appar 0 7 5.010635 0.000000 1958 +bookstor 0 7 5.010635 0.000000 1837 +myresum 0 6 5.164786 0.000000 2199 +cat 0 6 5.164786 0.000000 2194 +commit 0 6 5.164786 0.000000 2233 +curiou 0 5 5.347108 0.000000 2541 +superhighwai 0 4 5.568345 0.000000 2943 +scotland 0 4 5.568345 0.000000 3049 +fulfil 0 4 5.568345 0.000000 2932 +breadth 0 4 5.568345 0.000000 2695 +gambit 0 3 5.857933 0.000000 3227 +tessa 0 2 6.263398 0.000000 4507 +yeargradu 0 2 6.263398 0.000000 6015 +maze 0 2 6.263398 0.000000 4843 +knit 0 2 6.263398 0.000000 4906 +relatedgoodi 0 1 6.957497 0.000000 16446 +clio 0 1 6.957497 0.000000 16447 +andbrows 0 1 6.957497 0.000000 16448 +kittyi 0 1 6.957497 0.000000 16449 +siames 0 1 6.957497 0.000000 16450 +memor 0 1 6.957497 0.000000 16451 +therear 0 1 6.957497 0.000000 16452 +tofind 0 1 6.957497 0.000000 16453 +alsor 0 1 6.957497 0.000000 16454 +classesi 0 1 6.957497 0.000000 16455 +ofeight 0 1 6.957497 0.000000 16456 +seminarlinux 0 1 6.957497 0.000000 16457 +gameseverybodi 0 1 6.957497 0.000000 16458 +gametom 0 1 6.957497 0.000000 16459 +coolgam 0 1 6.957497 0.000000 16460 +sleepingi 0 1 6.957497 0.000000 16461 +crochet 0 1 6.957497 0.000000 16462 +tlau 0 1 6.957497 0.000000 16463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..c87010ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +provid 0 121 2.079442 0.000000 94 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +build 0 85 2.484907 0.000000 184 +educ 0 86 2.484907 0.000000 191 +resourc 0 81 2.484907 0.000000 172 +finger 0 52 2.995732 0.000000 354 +electron 0 47 3.091042 0.000000 379 +articl 0 33 3.433987 0.000000 530 +collabor 0 32 3.465736 0.000000 543 +photo 0 31 3.496508 0.000000 561 +martin 1 21 3.912023 3.912023 794 +among 0 21 3.912023 0.000000 781 +across 0 16 4.174387 0.000000 974 +photograph 0 15 4.248495 0.000000 1056 +holidai 0 12 4.465908 0.000000 1224 +lane 0 8 4.875197 0.000000 1720 +courtesi 0 7 5.010635 0.000000 1953 +moon 0 4 5.568345 0.000000 2991 +pierc 0 4 5.568345 0.000000 2623 +tompa 1 3 5.857933 5.857933 3305 +health 0 3 5.857933 0.000000 3787 +trajectori 0 2 6.263398 0.000000 4260 +pearl 0 2 6.263398 0.000000 4485 +wash 0 2 6.263398 0.000000 5714 +receptionist 0 1 6.957497 0.000000 16464 +thelma 0 1 6.957497 0.000000 16465 +louis 0 1 6.957497 0.000000 16466 +oyster 0 1 6.957497 0.000000 16467 +surrealist 0 1 6.957497 0.000000 16468 +propheci 0 1 6.957497 0.000000 16469 +carol 0 1 6.957497 0.000000 16470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..b425a624 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +version 0 113 2.197225 0.000000 122 +associ 0 93 2.397895 0.000000 151 +proceed 0 93 2.397895 0.000000 152 +institut 0 84 2.484907 0.000000 187 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +appear 0 78 2.564949 0.000000 210 +optim 0 79 2.564949 0.000000 197 +free 0 73 2.639057 0.000000 224 +symposium 0 72 2.639057 0.000000 238 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +foundat 0 62 2.772589 0.000000 286 +abstract 0 62 2.772589 0.000000 276 +detail 0 57 2.890372 0.000000 321 +without 0 50 3.044522 0.000000 370 +move 0 47 3.091042 0.000000 382 +done 0 47 3.091042 0.000000 381 +cach 0 41 3.218876 0.000000 432 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +return 0 34 3.401197 0.000000 502 +random 0 34 3.401197 0.000000 511 +curriculum 0 33 3.433987 0.000000 535 +product 0 33 3.433987 0.000000 527 +extend 0 32 3.465736 0.000000 539 +held 0 28 3.610918 0.000000 600 +trace 0 25 3.737670 0.000000 677 +longer 0 20 3.951244 0.000000 816 +histori 0 19 4.007333 0.000000 853 +comparison 0 19 4.007333 0.000000 863 +andrew 0 19 4.007333 0.000000 849 +bershad 0 18 4.060443 0.000000 902 +matrix 0 17 4.110874 0.000000 933 +letter 0 16 4.174387 0.000000 981 +driven 0 15 4.248495 0.000000 1048 +near 0 14 4.317488 0.000000 1091 +squar 0 14 4.317488 0.000000 1082 +karlin 0 13 4.382027 0.000000 1176 +sigmetr 0 13 4.382027 0.000000 1173 +anna 0 12 4.465908 0.000000 1292 +usenix 0 12 4.465908 0.000000 1240 +verifi 0 12 4.465908 0.000000 1261 +statement 0 11 4.553877 0.000000 1313 +probabilist 0 11 4.553877 0.000000 1343 +forc 0 10 4.653960 0.000000 1384 +charg 0 9 4.753590 0.000000 1582 +patterson 0 9 4.753590 0.000000 1554 +kumar 0 9 4.753590 0.000000 1506 +sigop 0 8 4.875197 0.000000 1727 +bit 0 7 5.010635 0.000000 1833 +prefetch 0 6 5.164786 0.000000 2039 +edward 0 6 5.164786 0.000000 2050 +promis 0 6 5.164786 0.000000 2037 +onoper 0 6 5.164786 0.000000 2048 +escap 0 4 5.568345 0.000000 3016 +kimbrel 1 3 5.857933 5.857933 3924 +traci 0 3 5.857933 0.000000 3984 +prison 0 3 5.857933 0.000000 3907 +cachingtraci 0 3 5.857933 0.000000 3923 +felten 0 3 5.857933 0.000000 3925 +eduher 0 3 5.857933 0.000000 3499 +tomanufactur 0 2 6.263398 0.000000 6016 +airplan 0 2 6.263398 0.000000 4917 +tomkin 0 2 6.263398 0.000000 5814 +hugo 0 2 6.263398 0.000000 5815 +garth 0 2 6.263398 0.000000 5816 +gibson 0 2 6.263398 0.000000 5817 +implemen 0 2 6.263398 0.000000 5809 +rakesh 0 2 6.263398 0.000000 6017 +sinha 0 2 6.263398 0.000000 5754 +imprison 0 1 6.957497 0.000000 16471 +captor 0 1 6.957497 0.000000 16472 +washingtonsinc 0 1 6.957497 0.000000 16473 +trial 0 1 6.957497 0.000000 16474 +toanoth 0 1 6.957497 0.000000 16475 +inmat 0 1 6.957497 0.000000 16476 +wasrecaptur 0 1 6.957497 0.000000 16477 +hisplight 0 1 6.957497 0.000000 16478 +rescu 0 1 6.957497 0.000000 16479 +ofwhat 0 1 6.957497 0.000000 16480 +tracyk 0 1 6.957497 0.000000 16481 +ieeesymposium 0 1 6.957497 0.000000 16482 +measurementand 0 1 6.957497 0.000000 16483 +usingo 0 1 6.957497 0.000000 16484 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..4ead1f83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +help 0 83 2.484907 0.000000 175 +run 0 51 2.995732 0.000000 347 +quarter 0 47 3.091042 0.000000 389 +understand 0 47 3.091042 0.000000 384 +keep 0 44 3.135494 0.000000 409 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +dissert 0 32 3.465736 0.000000 549 +half 0 21 3.912023 0.000000 776 +corpor 0 21 3.912023 0.000000 802 +latest 0 21 3.912023 0.000000 785 +predict 0 19 4.007333 0.000000 855 +statu 0 18 4.060443 0.000000 885 +side 0 15 4.248495 0.000000 1022 +spin 0 14 4.317488 0.000000 1121 +lock 0 9 4.753590 0.000000 1551 +craig 0 7 5.010635 0.000000 1879 +pool 0 6 5.164786 0.000000 2225 +consum 0 5 5.347108 0.000000 2334 +queu 0 4 5.568345 0.000000 2648 +travi 1 3 5.857933 5.857933 3985 +motor 0 3 5.857933 0.000000 3909 +submarin 0 2 6.263398 0.000000 6018 +restor 0 1 6.957497 0.000000 16485 +arctic 0 1 6.957497 0.000000 16486 +esca 0 1 6.957497 0.000000 16487 +volvo 0 1 6.957497 0.000000 16488 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..e1dbaa74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +download 0 36 3.367296 0.000000 489 +bibliographi 0 34 3.401197 0.000000 518 +hobbi 0 16 4.174387 0.000000 1009 +dean 1 14 4.317488 4.317488 1104 +tullsen 1 6 5.164786 5.164786 2081 +biograph 0 2 6.263398 0.000000 5625 +resumemi 0 2 6.263398 0.000000 4971 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..d9bbfc06 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,236 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +imag 0 91 2.397895 0.000000 161 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +thing 0 84 2.484907 0.000000 189 +activ 0 84 2.484907 0.000000 182 +larg 0 82 2.484907 0.000000 168 +come 0 78 2.564949 0.000000 202 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +solv 0 73 2.639057 0.000000 234 +free 0 73 2.639057 0.000000 224 +would 0 67 2.708050 0.000000 251 +degre 0 69 2.708050 0.000000 259 +differ 0 66 2.708050 0.000000 253 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +improv 0 62 2.772589 0.000000 289 +visit 0 63 2.772589 0.000000 288 +locat 0 59 2.833213 0.000000 303 +summer 0 56 2.890372 0.000000 311 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +extens 0 53 2.944439 0.000000 340 +talk 0 53 2.944439 0.000000 336 +hardwar 0 51 2.995732 0.000000 350 +life 0 50 3.044522 0.000000 375 +still 0 50 3.044522 0.000000 362 +quarter 0 47 3.091042 0.000000 389 +could 0 46 3.091042 0.000000 383 +get 0 46 3.091042 0.000000 380 +made 0 44 3.135494 0.000000 398 +anoth 0 45 3.135494 0.000000 408 +fridai 0 44 3.135494 0.000000 390 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +programm 0 39 3.258097 0.000000 445 +continu 0 39 3.258097 0.000000 448 +littl 0 39 3.258097 0.000000 454 +realli 0 40 3.258097 0.000000 444 +small 0 39 3.258097 0.000000 447 +winter 0 36 3.367296 0.000000 500 +staff 0 36 3.367296 0.000000 490 +soon 0 36 3.367296 0.000000 494 +michael 0 35 3.401197 0.000000 514 +post 0 35 3.401197 0.000000 505 +within 0 33 3.433987 0.000000 525 +taught 0 33 3.433987 0.000000 526 +product 0 33 3.433987 0.000000 527 +kind 0 32 3.465736 0.000000 541 +chapter 0 32 3.465736 0.000000 536 +taken 0 31 3.496508 0.000000 555 +photo 0 31 3.496508 0.000000 561 +particip 0 29 3.583519 0.000000 589 +steve 0 29 3.583519 0.000000 594 +enjoi 0 26 3.688879 0.000000 660 +subject 0 26 3.688879 0.000000 647 +valu 0 25 3.737670 0.000000 665 +mike 1 24 3.761200 3.761200 703 +other 0 24 3.761200 0.000000 697 +doctor 0 24 3.761200 0.000000 709 +thank 0 23 3.806662 0.000000 721 +begin 0 23 3.806662 0.000000 716 +finish 0 22 3.850148 0.000000 748 +director 0 22 3.850148 0.000000 767 +born 0 21 3.912023 0.000000 798 +wrote 0 20 3.951244 0.000000 830 +wonder 0 20 3.951244 0.000000 815 +citi 0 19 4.007333 0.000000 874 +eric 0 19 4.007333 0.000000 870 +lot 0 18 4.060443 0.000000 889 +demo 0 18 4.060443 0.000000 888 +debug 0 17 4.110874 0.000000 944 +took 0 16 4.174387 0.000000 1010 +contribut 0 15 4.248495 0.000000 1021 +countri 0 15 4.248495 0.000000 1059 +hopefulli 0 14 4.317488 0.000000 1071 +wife 0 13 4.382027 0.000000 1196 +front 0 13 4.382027 0.000000 1154 +earlier 0 13 4.382027 0.000000 1140 +forth 0 13 4.382027 0.000000 1186 +stai 0 12 4.465908 0.000000 1215 +franc 0 12 4.465908 0.000000 1276 +skill 0 12 4.465908 0.000000 1205 +bill 0 11 4.553877 0.000000 1297 +fix 0 11 4.553877 0.000000 1327 +america 0 11 4.553877 0.000000 1370 +motiv 0 11 4.553877 0.000000 1346 +chri 0 11 4.553877 0.000000 1311 +lake 0 11 4.553877 0.000000 1373 +prior 0 10 4.653960 0.000000 1438 +acquisit 0 10 4.653960 0.000000 1465 +ski 0 10 4.653960 0.000000 1471 +correctli 0 9 4.753590 0.000000 1478 +doug 0 9 4.753590 0.000000 1517 +mention 0 9 4.753590 0.000000 1569 +french 0 9 4.753590 0.000000 1511 +folk 0 9 4.753590 0.000000 1597 +screen 0 9 4.753590 0.000000 1577 +swim 0 9 4.753590 0.000000 1599 +cross 0 8 4.875197 0.000000 1703 +harvard 0 7 5.010635 0.000000 1926 +brought 0 7 5.010635 0.000000 1925 +poster 0 7 5.010635 0.000000 1814 +earn 0 7 5.010635 0.000000 1788 +iowa 0 7 5.010635 0.000000 1971 +oopsla 0 6 5.164786 0.000000 2221 +pari 0 6 5.164786 0.000000 2158 +nativ 0 6 5.164786 0.000000 2192 +south 0 6 5.164786 0.000000 2167 +hike 0 6 5.164786 0.000000 2234 +truli 0 5 5.347108 0.000000 2476 +sail 0 5 5.347108 0.000000 2571 +observatori 0 4 5.568345 0.000000 3070 +countless 0 4 5.568345 0.000000 3020 +theintern 0 4 5.568345 0.000000 2981 +theacm 0 4 5.568345 0.000000 2698 +sigsoft 0 4 5.568345 0.000000 3036 +ti 0 4 5.568345 0.000000 3005 +marco 0 4 5.568345 0.000000 2589 +luck 0 3 5.857933 0.000000 3201 +immedi 0 3 5.857933 0.000000 3117 +motif 0 3 5.857933 0.000000 3752 +astrophys 0 3 5.857933 0.000000 3936 +schwarz 0 3 5.857933 0.000000 3986 +talent 0 3 5.857933 0.000000 3768 +traci 0 3 5.857933 0.000000 3984 +harold 0 3 5.857933 0.000000 3803 +scanner 0 3 5.857933 0.000000 3437 +eduperson 0 2 6.263398 0.000000 5776 +contractor 0 2 6.263398 0.000000 4915 +widget 0 2 6.263398 0.000000 5347 +convinc 0 2 6.263398 0.000000 6019 +calibr 0 2 6.263398 0.000000 4502 +francais 0 2 6.263398 0.000000 6020 +uist 0 2 6.263398 0.000000 5901 +grinnel 0 2 6.263398 0.000000 5763 +alexand 0 2 6.263398 0.000000 5329 +smithsonian 0 1 6.957497 0.000000 16489 +uwin 0 1 6.957497 0.000000 16490 +vanhilst 0 1 6.957497 0.000000 16491 +angela 0 1 6.957497 0.000000 16492 +vanhilstmichael 0 1 6.957497 0.000000 16493 +vanhilstvanhilst 0 1 6.957497 0.000000 16494 +edumvh 0 1 6.957497 0.000000 16495 +usaclick 0 1 6.957497 0.000000 16496 +personalmik 0 1 6.957497 0.000000 16497 +theend 0 1 6.957497 0.000000 16498 +udub 0 1 6.957497 0.000000 16499 +atibm 0 1 6.957497 0.000000 16500 +unterfac 0 1 6.957497 0.000000 16501 +sdata 0 1 6.957497 0.000000 16502 +maintainingcomput 0 1 6.957497 0.000000 16503 +saoimagewhich 0 1 6.957497 0.000000 16504 +astronom 0 1 6.957497 0.000000 16505 +saoimag 0 1 6.957497 0.000000 16506 +gnudistribut 0 1 6.957497 0.000000 16507 +wyatt 0 1 6.957497 0.000000 16508 +mandel 0 1 6.957497 0.000000 16509 +minkfor 0 1 6.957497 0.000000 16510 +seismologistsin 0 1 6.957497 0.000000 16511 +theallianc 0 1 6.957497 0.000000 16512 +colombiain 0 1 6.957497 0.000000 16513 +studentsbrows 0 1 6.957497 0.000000 16514 +pine 0 1 6.957497 0.000000 16515 +shirei 0 1 6.957497 0.000000 16516 +stenvik 0 1 6.957497 0.000000 16517 +frommicrosoft 0 1 6.957497 0.000000 16518 +sacrif 0 1 6.957497 0.000000 16519 +isota 0 1 6.957497 0.000000 16520 +inarchitectur 0 1 6.957497 0.000000 16521 +wooden 0 1 6.957497 0.000000 16522 +planningfrom 0 1 6.957497 0.000000 16523 +mitand 0 1 6.957497 0.000000 16524 +visualdesign 0 1 6.957497 0.000000 16525 +andkayak 0 1 6.957497 0.000000 16526 +bronson 0 1 6.957497 0.000000 16527 +sebastien 0 1 6.957497 0.000000 16528 +hilst 0 1 6.957497 0.000000 16529 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..4f21556a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +access 0 102 2.302585 0.000000 136 +peopl 0 96 2.302585 0.000000 132 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +come 0 78 2.564949 0.000000 202 +orient 0 80 2.564949 0.000000 205 +appear 0 78 2.564949 0.000000 210 +write 0 72 2.639057 0.000000 222 +html 0 75 2.639057 0.000000 235 +collect 0 65 2.772589 0.000000 268 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +cool 0 49 3.044522 0.000000 374 +visitor 0 49 3.044522 0.000000 371 +even 0 45 3.135494 0.000000 393 +http 0 41 3.218876 0.000000 420 +live 0 40 3.258097 0.000000 451 +realli 0 40 3.258097 0.000000 444 +join 0 39 3.258097 0.000000 457 +slide 0 38 3.295837 0.000000 467 +staff 0 36 3.367296 0.000000 490 +download 0 36 3.367296 0.000000 489 +short 0 36 3.367296 0.000000 499 +graph 0 30 3.555348 0.000000 576 +quit 0 27 3.637586 0.000000 633 +arrai 0 27 3.637586 0.000000 627 +repres 0 26 3.688879 0.000000 656 +handl 0 24 3.761200 0.000000 685 +thu 0 21 3.912023 0.000000 773 +expand 0 17 4.110874 0.000000 928 +young 0 16 4.174387 0.000000 991 +mayb 0 15 4.248495 0.000000 1014 +qual 0 15 4.248495 0.000000 1062 +infrastructur 0 12 4.465908 0.000000 1234 +hello 0 10 4.653960 0.000000 1407 +mosaic 0 10 4.653960 0.000000 1426 +cecil 0 9 4.753590 0.000000 1547 +pure 0 8 4.875197 0.000000 1776 +irregular 0 8 4.875197 0.000000 1768 +mirror 0 6 5.164786 0.000000 2028 +shortest 0 5 5.347108 0.000000 2424 +writeup 0 5 5.347108 0.000000 2352 +vass 0 2 6.263398 0.000000 4449 +pageuw 0 2 6.263398 0.000000 6021 +pagerec 0 2 6.263398 0.000000 6022 +cecilproject 0 2 6.263398 0.000000 4457 +cooler 0 2 6.263398 0.000000 6023 +anddynam 0 2 6.263398 0.000000 5889 +myqual 0 2 6.263398 0.000000 6005 +closer 0 2 6.263398 0.000000 6024 +vassilylong 0 1 6.957497 0.000000 16530 +linki 0 1 6.957497 0.000000 16531 +fewfil 0 1 6.957497 0.000000 16532 +thisstuff 0 1 6.957497 0.000000 16533 +quotesrussian 0 1 6.957497 0.000000 16534 +pagesvari 0 1 6.957497 0.000000 16535 +linksguid 0 1 6.957497 0.000000 16536 +formsoth 0 1 6.957497 0.000000 16537 +pagencsa 0 1 6.957497 0.000000 16538 +andvortex 0 1 6.957497 0.000000 16539 +befast 0 1 6.957497 0.000000 16540 +themvi 0 1 6.957497 0.000000 16541 +ourdepartment 0 1 6.957497 0.000000 16542 +beenupgrad 0 1 6.957497 0.000000 16543 +thezpl 0 1 6.957497 0.000000 16544 +languageto 0 1 6.957497 0.000000 16545 +repartit 0 1 6.957497 0.000000 16546 +theslidesfrom 0 1 6.957497 0.000000 16547 +toresourc 0 1 6.957497 0.000000 16548 +eduobject 0 1 6.957497 0.000000 16549 +pastor 0 1 6.957497 0.000000 16550 +vybrasyvalsya 0 1 6.957497 0.000000 16551 +okna 0 1 6.957497 0.000000 16552 +pyatyi 0 1 6.957497 0.000000 16553 +deystvov 0 1 6.957497 0.000000 16554 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..083396de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +present 0 91 2.397895 0.000000 145 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +servic 0 72 2.639057 0.000000 236 +receiv 0 66 2.708050 0.000000 244 +profession 0 51 2.995732 0.000000 345 +join 0 39 3.258097 0.000000 457 +electr 0 38 3.295837 0.000000 461 +scientist 0 31 3.496508 0.000000 560 +recommend 0 22 3.850148 0.000000 737 +theunivers 0 21 3.912023 0.000000 797 +divis 0 21 3.912023 0.000000 803 +histori 0 19 4.007333 0.000000 853 +letter 0 16 4.174387 0.000000 981 +achiev 0 14 4.317488 0.000000 1088 +evan 0 8 4.875197 0.000000 1633 +patent 0 5 5.347108 0.000000 2574 +invent 0 4 5.568345 0.000000 3028 +arizona 0 3 5.857933 0.000000 3700 +electricalengin 0 3 5.857933 0.000000 3987 +expertis 0 3 5.857933 0.000000 3321 +virgil 0 2 6.263398 0.000000 5783 +bourassa 0 2 6.263398 0.000000 5782 +uwvirgil 0 1 6.957497 0.000000 16555 +bourassavirgil 0 1 6.957497 0.000000 16556 +interestsinclud 0 1 6.957497 0.000000 16557 +boeingin 0 1 6.957497 0.000000 16558 +scienceorgan 0 1 6.957497 0.000000 16559 +bellevu 0 1 6.957497 0.000000 16560 +arizonast 0 1 6.957497 0.000000 16561 +temp 0 1 6.957497 0.000000 16562 +accesswhat 0 1 6.957497 0.000000 16563 +statusoccasion 0 1 6.957497 0.000000 16564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..716348e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +avail 1 169 1.791759 1.791759 48 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +provid 0 121 2.079442 0.000000 94 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +look 0 107 2.197225 0.000000 115 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +place 0 106 2.197225 0.000000 124 +intern 0 108 2.197225 0.000000 128 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +follow 0 92 2.397895 0.000000 143 +center 0 88 2.397895 0.000000 158 +learn 0 86 2.484907 0.000000 170 +academ 0 82 2.484907 0.000000 178 +activ 0 84 2.484907 0.000000 182 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +state 0 76 2.564949 0.000000 207 +involv 0 71 2.639057 0.000000 227 +nation 0 74 2.639057 0.000000 240 +servic 0 72 2.639057 0.000000 236 +simul 0 66 2.708050 0.000000 255 +receiv 0 66 2.708050 0.000000 244 +written 0 63 2.772589 0.000000 278 +organ 0 65 2.772589 0.000000 265 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +detail 0 57 2.890372 0.000000 321 +found 0 53 2.944439 0.000000 337 +undergradu 0 54 2.944439 0.000000 338 +particular 0 51 2.995732 0.000000 352 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +right 0 48 3.044522 0.000000 363 +physic 0 47 3.091042 0.000000 377 +directori 0 45 3.135494 0.000000 396 +math 0 44 3.135494 0.000000 402 +mechan 0 43 3.178054 0.000000 416 +continu 0 39 3.258097 0.000000 448 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +chapter 0 32 3.465736 0.000000 536 +india 0 32 3.465736 0.000000 550 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +held 0 28 3.610918 0.000000 600 +effort 0 26 3.688879 0.000000 652 +enhanc 0 26 3.688879 0.000000 644 +session 0 26 3.688879 0.000000 643 +highli 0 23 3.806662 0.000000 725 +methodolog 0 23 3.806662 0.000000 733 +head 0 23 3.806662 0.000000 732 +period 0 22 3.850148 0.000000 743 +unit 0 21 3.912023 0.000000 779 +leav 0 21 3.912023 0.000000 772 +born 0 21 3.912023 0.000000 798 +half 0 21 3.912023 0.000000 776 +safeti 0 20 3.951244 0.000000 817 +scheme 0 20 3.951244 0.000000 818 +tenni 0 20 3.951244 0.000000 838 +exploit 0 20 3.951244 0.000000 836 +failur 0 18 4.060443 0.000000 898 +attend 0 18 4.060443 0.000000 893 +english 0 15 4.248495 0.000000 1033 +came 0 13 4.382027 0.000000 1197 +danc 0 12 4.465908 0.000000 1278 +replic 0 12 4.465908 0.000000 1231 +nanci 0 12 4.465908 0.000000 1256 +eight 0 11 4.553877 0.000000 1331 +council 0 11 4.553877 0.000000 1364 +literatur 0 11 4.553877 0.000000 1300 +leveson 0 9 4.753590 0.000000 1540 +poetri 0 9 4.753590 0.000000 1596 +simpli 0 8 4.875197 0.000000 1626 +presenc 0 8 4.875197 0.000000 1671 +coast 0 8 4.875197 0.000000 1746 +cricket 0 7 5.010635 0.000000 1945 +brought 0 7 5.010635 0.000000 1925 +whenev 0 7 5.010635 0.000000 1883 +occasion 0 7 5.010635 0.000000 1905 +saturdai 0 7 5.010635 0.000000 1794 +throughout 0 7 5.010635 0.000000 1871 +vivek 0 6 5.164786 0.000000 2210 +squash 0 6 5.164786 0.000000 2223 +band 0 6 5.164786 0.000000 2198 +corba 0 5 5.347108 0.000000 2320 +focuss 0 5 5.347108 0.000000 2271 +cell 0 5 5.347108 0.000000 2274 +nuclear 0 5 5.347108 0.000000 2576 +toolset 0 4 5.568345 0.000000 3014 +murphi 0 4 5.568345 0.000000 2737 +racquetbal 0 4 5.568345 0.000000 3052 +suffic 0 4 5.568345 0.000000 2869 +ultra 0 4 5.568345 0.000000 2889 +swing 0 4 5.568345 0.000000 2887 +restructur 0 4 5.568345 0.000000 2775 +tend 0 4 5.568345 0.000000 3041 +rsml 0 3 5.857933 0.000000 3967 +wesleyan 0 3 5.857933 0.000000 3988 +marin 0 3 5.857933 0.000000 3947 +ballroom 0 3 5.857933 0.000000 3983 +dabbl 0 3 5.857933 0.000000 3971 +bank 0 3 5.857933 0.000000 3920 +mirza 0 3 5.857933 0.000000 3989 +bellcor 0 2 6.263398 0.000000 5174 +ratan 0 2 6.263398 0.000000 5948 +adher 0 2 6.263398 0.000000 6025 +tango 0 2 6.263398 0.000000 6010 +reform 0 2 6.263398 0.000000 5828 +growth 0 2 6.263398 0.000000 4084 +angelo 0 1 6.957497 0.000000 16565 +scientistat 0 1 6.957497 0.000000 16566 +morristown 0 1 6.957497 0.000000 16567 +researchwork 0 1 6.957497 0.000000 16568 +distributedsoftwar 0 1 6.957497 0.000000 16569 +anatida 0 1 6.957497 0.000000 16570 +indc 0 1 6.957497 0.000000 16571 +foundher 0 1 6.957497 0.000000 16572 +integrationof 0 1 6.957497 0.000000 16573 +bydr 0 1 6.957497 0.000000 16574 +fromrequir 0 1 6.957497 0.000000 16575 +middletown 0 1 6.957497 0.000000 16576 +purus 0 1 6.957497 0.000000 16577 +lesserext 0 1 6.957497 0.000000 16578 +ardent 0 1 6.957497 0.000000 16579 +folow 0 1 6.957497 0.000000 16580 +superson 0 1 6.957497 0.000000 16581 +cowboi 0 1 6.957497 0.000000 16582 +keen 0 1 6.957497 0.000000 16583 +waltz 0 1 6.957497 0.000000 16584 +foxtrot 0 1 6.957497 0.000000 16585 +chacha 0 1 6.957497 0.000000 16586 +rhumba 0 1 6.957497 0.000000 16587 +mambo 0 1 6.957497 0.000000 16588 +ecosoc 0 1 6.957497 0.000000 16589 +rapidpopul 0 1 6.957497 0.000000 16590 +prolifer 0 1 6.957497 0.000000 16591 +ghalib 0 1 6.957497 0.000000 16592 +centuryindian 0 1 6.957497 0.000000 16593 +poet 0 1 6.957497 0.000000 16594 +romant 0 1 6.957497 0.000000 16595 +victorian 0 1 6.957497 0.000000 16596 +obligatori 0 1 6.957497 0.000000 16597 +sitesthat 0 1 6.957497 0.000000 16598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..41b2f319 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +call 0 91 2.397895 0.000000 153 +master 0 76 2.564949 0.000000 216 +window 0 68 2.708050 0.000000 242 +thesi 0 57 2.890372 0.000000 327 +mobil 0 23 3.806662 0.000000 730 +avoid 0 21 3.912023 0.000000 799 +emac 0 13 4.382027 0.000000 1143 +voelker 0 9 4.753590 0.000000 1557 +guggenheim 0 8 4.875197 0.000000 1759 +geoff 0 6 5.164786 0.000000 2124 +annex 0 5 5.347108 0.000000 2572 +wireless 0 4 5.568345 0.000000 2693 +washingtonseattl 0 4 5.568345 0.000000 3044 +mobisa 0 3 5.857933 0.000000 3927 +inseattl 0 2 6.263398 0.000000 6026 +whati 0 2 6.263398 0.000000 6027 +andbuild 0 2 6.263398 0.000000 6028 +settl 0 2 6.263398 0.000000 5778 +skywhoi 0 1 6.957497 0.000000 16599 +wherechateau 0 1 6.957497 0.000000 16600 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..9b68bac7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +list 1 201 1.609438 1.609438 39 +paper 0 205 1.609438 0.000000 38 +perform 0 143 1.945910 0.000000 74 +seattl 0 120 2.079442 0.000000 103 +look 0 107 2.197225 0.000000 115 +version 0 113 2.197225 0.000000 122 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +memori 0 101 2.302585 0.000000 139 +peopl 0 96 2.302585 0.000000 132 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +test 0 66 2.708050 0.000000 252 +organ 0 65 2.772589 0.000000 265 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +cool 0 49 3.044522 0.000000 374 +keep 0 44 3.135494 0.000000 409 +howev 0 41 3.218876 0.000000 422 +littl 0 39 3.258097 0.000000 454 +actual 0 28 3.610918 0.000000 604 +interpret 0 24 3.761200 0.000000 686 +other 0 24 3.761200 0.000000 697 +earli 0 16 4.174387 0.000000 968 +baer 0 11 4.553877 0.000000 1353 +denni 0 11 4.553877 0.000000 1321 +alpha 0 11 4.553877 0.000000 1348 +jean 0 10 4.653960 0.000000 1440 +jump 0 9 4.753590 0.000000 1603 +wong 0 9 4.753590 0.000000 1609 +wayn 0 8 4.875197 0.000000 1738 +loup 0 6 5.164786 0.000000 2228 +geoff 0 6 5.164786 0.000000 2124 +fish 0 6 5.164786 0.000000 2207 +alec 0 5 5.347108 0.000000 2563 +rocki 0 4 5.568345 0.000000 3048 +waynew 0 3 5.857933 0.000000 3982 +differentmemori 0 1 6.957497 0.000000 16601 +beingdon 0 1 6.957497 0.000000 16602 +rightnow 0 1 6.957497 0.000000 16603 +peoplewho 0 1 6.957497 0.000000 16604 +testwayn 0 1 6.957497 0.000000 16605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..d6d45d83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +time 1 293 1.098612 1.098612 17 +washington 0 236 1.386294 0.000000 32 +william 0 22 3.850148 0.000000 765 +spend 0 19 4.007333 0.000000 850 +hang 0 9 4.753590 0.000000 1499 +pagei 0 8 4.875197 0.000000 1683 +chan 1 7 5.010635 5.010635 1876 +spare 0 6 5.164786 0.000000 2177 +hell 0 4 5.568345 0.000000 2885 +heaven 0 3 5.857933 0.000000 3589 +wchan 0 3 5.857933 0.000000 3338 +pagewilliam 0 1 6.957497 0.000000 16606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..7994b57c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +report 0 131 2.079442 0.000000 92 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +world 0 115 2.197225 0.000000 126 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +select 0 91 2.397895 0.000000 154 +journal 0 83 2.484907 0.000000 183 +control 0 82 2.484907 0.000000 164 +wide 0 84 2.484907 0.000000 185 +internet 0 83 2.484907 0.000000 186 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +intellig 0 72 2.639057 0.000000 225 +nation 0 74 2.639057 0.000000 240 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +plan 0 65 2.772589 0.000000 272 +artifici 0 63 2.772589 0.000000 280 +dept 0 64 2.772589 0.000000 291 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +visit 0 63 2.772589 0.000000 288 +juli 0 60 2.833213 0.000000 305 +plai 0 60 2.833213 0.000000 307 +publish 0 57 2.890372 0.000000 326 +found 0 53 2.944439 0.000000 337 +investig 0 51 2.995732 0.000000 353 +electron 0 47 3.091042 0.000000 379 +favorit 0 44 3.135494 0.000000 410 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +past 0 42 3.218876 0.000000 428 +winter 0 36 3.367296 0.000000 500 +award 0 34 3.401197 0.000000 523 +least 0 35 3.401197 0.000000 516 +board 0 33 3.433987 0.000000 528 +travel 0 30 3.555348 0.000000 579 +chair 0 29 3.583519 0.000000 596 +repres 0 26 3.688879 0.000000 656 +enjoi 0 26 3.688879 0.000000 660 +reach 0 24 3.761200 0.000000 688 +magazin 0 24 3.761200 0.000000 704 +ofwashington 0 22 3.850148 0.000000 766 +almost 0 22 3.850148 0.000000 742 +comparison 0 19 4.007333 0.000000 863 +agent 0 18 4.060443 0.000000 910 +bachelor 0 17 4.110874 0.000000 957 +adam 0 17 4.110874 0.000000 934 +young 0 16 4.174387 0.000000 991 +action 0 15 4.248495 0.000000 1038 +photograph 0 15 4.248495 0.000000 1056 +role 0 14 4.317488 0.000000 1101 +stori 0 14 4.317488 0.000000 1087 +galleri 0 13 4.382027 0.000000 1192 +daniel 0 12 4.465908 0.000000 1233 +land 0 12 4.465908 0.000000 1273 +guest 0 12 4.465908 0.000000 1220 +infrastructur 0 12 4.465908 0.000000 1234 +sens 0 11 4.553877 0.000000 1305 +shop 0 10 4.653960 0.000000 1469 +invit 0 10 4.653960 0.000000 1428 +weld 1 9 4.753590 4.753590 1538 +hundr 0 9 4.753590 0.000000 1528 +aaai 0 8 4.875197 0.000000 1750 +presidenti 0 8 4.875197 0.000000 1737 +gather 0 8 4.875197 0.000000 1719 +pacif 0 8 4.875197 0.000000 1674 +illustr 0 8 4.875197 0.000000 1679 +planner 0 7 5.010635 0.000000 1797 +ground 0 7 5.010635 0.000000 1955 +softbot 0 7 5.010635 0.000000 1974 +northwest 0 7 5.010635 0.000000 1973 +yale 0 6 5.164786 0.000000 2003 +commit 0 6 5.164786 0.000000 2233 +engineeringat 0 5 5.347108 0.000000 2561 +middl 0 5 5.347108 0.000000 2372 +cacm 0 5 5.347108 0.000000 2388 +allegro 0 5 5.347108 0.000000 2314 +naval 0 4 5.568345 0.000000 2920 +climb 0 4 5.568345 0.000000 2936 +biochemistri 0 3 5.857933 0.000000 3513 +ucpop 0 3 5.857933 0.000000 3878 +revisit 0 3 5.857933 0.000000 3915 +recreat 0 3 5.857933 0.000000 3990 +theworld 0 3 5.857933 0.000000 3158 +twin 0 3 5.857933 0.000000 3657 +younginvestig 0 2 6.263398 0.000000 5794 +ascal 0 2 6.263398 0.000000 5893 +anintroduct 0 2 6.263398 0.000000 4156 +absent 0 2 6.263398 0.000000 4825 +cafe 0 2 6.263398 0.000000 5826 +boi 0 2 6.263398 0.000000 5918 +wilder 0 2 6.263398 0.000000 5516 +theadvisori 0 1 6.957497 0.000000 16607 +airesearch 0 1 6.957497 0.000000 16608 +ofintellig 0 1 6.957497 0.000000 16609 +isco 0 1 6.957497 0.000000 16610 +scad 0 1 6.957497 0.000000 16611 +seattlewa 0 1 6.957497 0.000000 16612 +sitesworldwid 0 1 6.957497 0.000000 16613 +arehi 0 1 6.957497 0.000000 16614 +aip 0 1 6.957497 0.000000 16615 +exhaustivelist 0 1 6.957497 0.000000 16616 +stormymountain 0 1 6.957497 0.000000 16617 +galen 0 1 6.957497 0.000000 16618 +desert 0 1 6.957497 0.000000 16619 +morocco 0 1 6.957497 0.000000 16620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..54985533 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +master 0 76 2.564949 0.000000 216 +univ 0 28 3.610918 0.000000 617 +utah 0 9 4.753590 0.000000 1585 +wendi 1 2 6.263398 6.263398 5864 +belluomini 0 2 6.263398 0.000000 5865 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..0baa922c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +architectur 0 139 1.945910 0.000000 77 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +select 0 91 2.397895 0.000000 154 +member 0 84 2.484907 0.000000 165 +chang 0 82 2.484907 0.000000 163 +receiv 0 66 2.708050 0.000000 244 +sieg 0 69 2.708050 0.000000 260 +interact 0 62 2.772589 0.000000 270 +advisor 0 51 2.995732 0.000000 355 +move 0 47 3.091042 0.000000 382 +electr 0 38 3.295837 0.000000 461 +theunivers 0 21 3.912023 0.000000 797 +among 0 21 3.912023 0.000000 781 +programminglanguag 0 21 3.912023 0.000000 782 +voic 0 21 3.912023 0.000000 806 +runtim 0 19 4.007333 0.000000 858 +thedepart 0 11 4.553877 0.000000 1350 +wilson 0 9 4.753590 0.000000 1536 +postdoc 0 8 4.875197 0.000000 1724 +myresearch 0 4 5.568345 0.000000 2842 +weihl 0 3 5.857933 0.000000 3284 +inseattl 0 2 6.263398 0.000000 6026 +thespin 0 2 6.263398 0.000000 6029 +sciencein 0 2 6.263398 0.000000 5804 +thelaboratori 0 2 6.263398 0.000000 4424 +linksperson 0 2 6.263398 0.000000 5143 +hsieh 0 2 6.263398 0.000000 5818 +hsiehwilson 0 1 6.957497 0.000000 16621 +hsiehi 0 1 6.957497 0.000000 16622 +theschool 0 1 6.957497 0.000000 16623 +engineeringatmit 0 1 6.957497 0.000000 16624 +werefran 0 1 6.957497 0.000000 16625 +kaashoekandbil 0 1 6.957497 0.000000 16626 +publicationsselect 0 1 6.957497 0.000000 16627 +interestswilson 0 1 6.957497 0.000000 16628 +numberha 0 1 6.957497 0.000000 16629 +whsieh 0 1 6.957497 0.000000 16630 +keyoctob 0 1 6.957497 0.000000 16631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..16f39e32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +seattl 0 120 2.079442 0.000000 103 +analysi 0 124 2.079442 0.000000 98 +structur 0 106 2.197225 0.000000 105 +school 0 84 2.484907 0.000000 188 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +line 0 75 2.639057 0.000000 231 +plai 0 60 2.833213 0.000000 307 +summer 0 56 2.890372 0.000000 311 +realli 0 40 3.258097 0.000000 444 +winter 0 36 3.367296 0.000000 500 +idea 0 32 3.465736 0.000000 545 +interpret 0 24 3.761200 0.000000 686 +departmentunivers 0 24 3.761200 0.000000 711 +scalabl 0 24 3.761200 0.000000 705 +binari 0 20 3.951244 0.000000 823 +bershad 0 18 4.060443 0.000000 902 +asplo 0 17 4.110874 0.000000 948 +cambridg 0 16 4.174387 0.000000 1008 +latenc 0 16 4.174387 0.000000 993 +levi 0 14 4.317488 0.000000 1093 +washingtonbox 0 13 4.382027 0.000000 1200 +usenix 0 12 4.465908 0.000000 1240 +baer 0 11 4.553877 0.000000 1353 +thecomput 0 10 4.653960 0.000000 1408 +equip 0 10 4.653960 0.000000 1459 +voelker 0 9 4.753590 0.000000 1557 +wong 0 9 4.753590 0.000000 1609 +romer 0 8 4.875197 0.000000 1706 +guitar 0 8 4.875197 0.000000 1758 +instrument 0 7 5.010635 0.000000 1954 +wolman 1 6 5.164786 5.164786 2093 +corp 0 6 5.164786 0.000000 2139 +strang 0 6 5.164786 0.000000 2064 +alec 0 5 5.347108 0.000000 2563 +departmentat 0 5 5.347108 0.000000 2513 +treat 0 5 5.347108 0.000000 2521 +gradual 0 4 5.568345 0.000000 2997 +etch 0 4 5.568345 0.000000 2755 +thekkath 0 3 5.857933 0.000000 3973 +habit 0 3 5.857933 0.000000 3777 +thechateau 0 2 6.263398 0.000000 5853 +fordigit 0 2 6.263398 0.000000 5752 +firewal 0 2 6.263398 0.000000 5407 +relai 0 2 6.263398 0.000000 5404 +hungri 0 2 6.263398 0.000000 5511 +otter 0 2 6.263398 0.000000 4166 +nervou 0 2 6.263398 0.000000 5953 +pressur 0 2 6.263398 0.000000 5960 +wolmanwolman 0 1 6.957497 0.000000 16632 +eduworkcomput 0 1 6.957497 0.000000 16633 +isroom 0 1 6.957497 0.000000 16634 +executablesrocki 0 1 6.957497 0.000000 16635 +performanceon 0 1 6.957497 0.000000 16636 +trees 0 1 6.957497 0.000000 16637 +fixha 0 1 6.957497 0.000000 16638 +hallwolman 0 1 6.957497 0.000000 16639 +diseasewolman 0 1 6.957497 0.000000 16640 +lumber 0 1 6.957497 0.000000 16641 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..ac833e3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +phone 0 175 1.791759 0.000000 45 +base 0 165 1.791759 0.000000 50 +implement 0 152 1.791759 0.000000 52 +perform 1 143 1.945910 1.945910 74 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +confer 0 126 2.079442 0.000000 100 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +proceed 0 93 2.397895 0.000000 152 +school 0 84 2.484907 0.000000 188 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +stuff 0 87 2.484907 0.000000 171 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +simul 0 66 2.708050 0.000000 255 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +evalu 0 64 2.772589 0.000000 266 +dept 0 64 2.772589 0.000000 291 +visual 0 48 3.044522 0.000000 372 +possibl 0 47 3.091042 0.000000 378 +term 0 43 3.178054 0.000000 411 +cach 0 41 3.218876 0.000000 432 +submit 0 39 3.258097 0.000000 440 +short 0 36 3.367296 0.000000 499 +soon 0 36 3.367296 0.000000 494 +award 0 34 3.401197 0.000000 523 +tech 0 35 3.401197 0.000000 515 +photo 0 31 3.496508 0.000000 561 +graph 0 30 3.555348 0.000000 576 +multiprocessor 0 28 3.610918 0.000000 605 +cluster 0 28 3.610918 0.000000 612 +univ 0 28 3.610918 0.000000 617 +compar 0 26 3.688879 0.000000 648 +trace 0 25 3.737670 0.000000 677 +predict 0 19 4.007333 0.000000 855 +monitor 0 17 4.110874 0.000000 941 +zhang 0 16 4.174387 0.000000 980 +driven 0 15 4.248495 0.000000 1048 +coher 0 14 4.317488 0.000000 1109 +baer 1 11 4.553877 4.553877 1353 +jean 0 10 4.653960 0.000000 1440 +explicit 0 9 4.753590 0.000000 1525 +loup 0 6 5.164786 0.000000 2228 +optimist 0 5 5.347108 0.000000 2501 +conserv 0 4 5.568345 0.000000 2870 +tran 0 3 5.857933 0.000000 3384 +communicationprimit 0 2 6.263398 0.000000 5449 +hpca 0 2 6.263398 0.000000 6030 +toolfor 0 2 6.263398 0.000000 6031 +numa 0 2 6.263398 0.000000 4905 +xiaohan 0 1 6.957497 0.000000 16642 +xqin 0 1 6.957497 0.000000 16643 +basedmultiprocessor 0 1 6.957497 0.000000 16644 +nalluri 0 1 6.957497 0.000000 16645 +processingon 0 1 6.957497 0.000000 16646 +chinaread 0 1 6.957497 0.000000 16647 +chinesesearch 0 1 6.957497 0.000000 16648 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..a4804ef8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +schedul 0 119 2.079442 0.000000 85 +intern 0 108 2.197225 0.000000 128 +text 0 98 2.302585 0.000000 133 +info 1 85 2.484907 2.484907 176 +second 0 81 2.484907 0.000000 166 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +servic 0 72 2.639057 0.000000 236 +type 0 61 2.833213 0.000000 296 +index 0 56 2.890372 0.000000 309 +talk 0 53 2.944439 0.000000 336 +finger 0 52 2.995732 0.000000 354 +transact 0 39 3.258097 0.000000 438 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +tech 0 35 3.401197 0.000000 515 +survei 0 35 3.401197 0.000000 513 +random 0 34 3.401197 0.000000 511 +linux 0 27 3.637586 0.000000 631 +yahoo 0 24 3.761200 0.000000 707 +lyco 0 19 4.007333 0.000000 871 +bershad 0 18 4.060443 0.000000 902 +qual 0 15 4.248495 0.000000 1062 +spin 0 14 4.317488 0.000000 1121 +touch 0 12 4.465908 0.000000 1288 +perl 0 11 4.553877 0.000000 1332 +desktop 0 10 4.653960 0.000000 1445 +metacrawl 0 10 4.653960 0.000000 1455 +vista 0 10 4.653960 0.000000 1452 +meta 0 9 4.753590 0.000000 1505 +modula 0 9 4.753590 0.000000 1613 +japan 0 8 4.875197 0.000000 1762 +gatewai 0 7 5.010635 0.000000 1942 +lesson 0 5 5.347108 0.000000 2568 +alta 0 4 5.568345 0.000000 3039 +japanes 0 4 5.568345 0.000000 2934 +patch 0 4 5.568345 0.000000 2710 +archi 0 3 5.857933 0.000000 3639 +javascript 0 3 5.857933 0.000000 3221 +thespin 0 2 6.263398 0.000000 6029 +apprentic 0 2 6.263398 0.000000 5873 +yasushi 0 1 6.957497 0.000000 16649 +saitoyasushi 0 1 6.957497 0.000000 16650 +saito 0 1 6.957497 0.000000 16651 +atdepart 0 1 6.957497 0.000000 16652 +workingwith 0 1 6.957497 0.000000 16653 +andperson 0 1 6.957497 0.000000 16654 +sightse 0 1 6.957497 0.000000 16655 +trainer 0 1 6.957497 0.000000 16656 +dvorak 0 1 6.957497 0.000000 16657 +trycanva 0 1 6.957497 0.000000 16658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..1aaa3bde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +mathemat 0 108 2.197225 0.000000 123 +part 0 98 2.302585 0.000000 129 +search 0 95 2.397895 0.000000 155 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +resum 0 79 2.564949 0.000000 217 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +servic 0 72 2.639057 0.000000 236 +degre 0 69 2.708050 0.000000 259 +artifici 0 63 2.772589 0.000000 280 +result 0 65 2.772589 0.000000 281 +basic 0 50 3.044522 0.000000 360 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +field 0 37 3.332205 0.000000 482 +idea 0 32 3.465736 0.000000 545 +computersci 0 30 3.555348 0.000000 562 +cluster 0 28 3.610918 0.000000 612 +retriev 0 27 3.637586 0.000000 621 +along 0 18 4.060443 0.000000 878 +engineeringunivers 0 17 4.110874 0.000000 959 +chateau 0 16 4.174387 0.000000 997 +trip 0 14 4.317488 0.000000 1113 +washingtonbox 0 13 4.382027 0.000000 1200 +edui 0 13 4.382027 0.000000 1193 +israel 0 11 4.553877 0.000000 1366 +metacrawl 0 10 4.653960 0.000000 1455 +ski 0 10 4.653960 0.000000 1471 +hundr 0 9 4.753590 0.000000 1528 +erik 0 8 4.875197 0.000000 1701 +oren 0 6 5.164786 0.000000 2134 +softwareengin 0 6 5.164786 0.000000 2162 +selberg 0 5 5.347108 0.000000 2441 +algorithmsfor 0 4 5.568345 0.000000 2748 +worki 0 4 5.568345 0.000000 3010 +raft 0 4 5.568345 0.000000 3060 +dive 0 3 5.857933 0.000000 3654 +zamir 1 2 6.263398 6.263398 5897 +pageoren 0 2 6.263398 0.000000 5888 +jerusalem 0 2 6.263398 0.000000 4918 +isra 0 1 6.957497 0.000000 16659 +myundergradu 0 1 6.957497 0.000000 16660 +hebrewunivers 0 1 6.957497 0.000000 16661 +userwith 0 1 6.957497 0.000000 16662 +orenetzioni 0 1 6.957497 0.000000 16663 +sinai 0 1 6.957497 0.000000 16664 +jeeptour 0 1 6.957497 0.000000 16665 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..07248edd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +perform 0 143 1.945910 0.000000 74 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +present 0 91 2.397895 0.000000 145 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +nation 0 74 2.639057 0.000000 240 +involv 0 71 2.639057 0.000000 227 +simul 0 66 2.708050 0.000000 255 +receiv 0 66 2.708050 0.000000 244 +improv 0 62 2.772589 0.000000 289 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +approach 0 48 3.044522 0.000000 366 +protocol 0 45 3.135494 0.000000 407 +textbook 0 44 3.135494 0.000000 397 +execut 0 45 3.135494 0.000000 404 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +author 0 39 3.258097 0.000000 450 +join 0 39 3.258097 0.000000 457 +electr 0 38 3.295837 0.000000 461 +industri 0 38 3.295837 0.000000 464 +ofth 0 36 3.367296 0.000000 491 +singl 0 34 3.401197 0.000000 510 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +chair 0 29 3.583519 0.000000 596 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +although 0 25 3.737670 0.000000 667 +trace 0 25 3.737670 0.000000 677 +fellow 0 24 3.761200 0.000000 701 +serv 0 22 3.850148 0.000000 758 +comparison 0 19 4.007333 0.000000 863 +asplo 0 17 4.110874 0.000000 948 +driven 0 15 4.248495 0.000000 1048 +coher 0 14 4.317488 0.000000 1109 +difficulti 0 13 4.382027 0.000000 1132 +block 0 13 4.382027 0.000000 1183 +franc 0 12 4.465908 0.000000 1276 +baer 0 11 4.553877 0.000000 1353 +cycl 0 11 4.553877 0.000000 1335 +primit 0 11 4.553877 0.000000 1317 +isca 0 11 4.553877 0.000000 1354 +impact 0 11 4.553877 0.000000 1334 +denni 0 11 4.553877 0.000000 1321 +jean 0 10 4.653960 0.000000 1440 +prior 0 10 4.653960 0.000000 1438 +french 0 9 4.753590 0.000000 1511 +guggenheim 0 8 4.875197 0.000000 1759 +uniprocessor 0 8 4.875197 0.000000 1696 +prefetch 0 6 5.164786 0.000000 2039 +loup 0 6 5.164786 0.000000 2228 +ucla 0 5 5.347108 0.000000 2502 +icpp 0 5 5.347108 0.000000 2382 +anddistribut 0 4 5.568345 0.000000 3031 +coauthor 0 4 5.568345 0.000000 3064 +conserv 0 4 5.568345 0.000000 2870 +chairman 0 3 5.857933 0.000000 3991 +electricalengin 0 3 5.857933 0.000000 3987 +parallelprocess 0 3 5.857933 0.000000 3626 +twelv 0 3 5.857933 0.000000 3899 +specul 0 3 5.857933 0.000000 3951 +grenobl 0 2 6.263398 0.000000 5928 +internationalsymposium 0 2 6.263398 0.000000 6032 +adjunct 0 2 6.263398 0.000000 6033 +diplom 0 2 6.263398 0.000000 5982 +theuniversit 0 2 6.263398 0.000000 5927 +laboratoir 0 2 6.263398 0.000000 5929 +universit 0 2 6.263398 0.000000 5630 +retain 0 2 6.263398 0.000000 5443 +hpca 0 2 6.263398 0.000000 6030 +professorand 0 1 6.957497 0.000000 16666 +ingnieur 0 1 6.957497 0.000000 16667 +doctorat 0 1 6.957497 0.000000 16668 +decalcul 0 1 6.957497 0.000000 16669 +technologygroup 0 1 6.957497 0.000000 16670 +thesearea 0 1 6.957497 0.000000 16671 +distinguishedvisitor 0 1 6.957497 0.000000 16672 +asprogram 0 1 6.957497 0.000000 16673 +sigarch 0 1 6.957497 0.000000 16674 +eighteen 0 1 6.957497 0.000000 16675 +professorba 0 1 6.957497 0.000000 16676 +laboratoriesand 0 1 6.957497 0.000000 16677 +inacademia 0 1 6.957497 0.000000 16678 +hashad 0 1 6.957497 0.000000 16679 +accent 0 1 6.957497 0.000000 16680 +comparisonwith 0 1 6.957497 0.000000 16681 +andisca 0 1 6.957497 0.000000 16682 +optimisticapproach 0 1 6.957497 0.000000 16683 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..d6b892f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +object 0 138 1.945910 0.000000 79 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +assist 0 112 2.197225 0.000000 113 +advanc 0 99 2.302585 0.000000 130 +member 0 84 2.484907 0.000000 165 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +april 0 77 2.564949 0.000000 196 +effici 0 73 2.639057 0.000000 233 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +sieg 0 69 2.708050 0.000000 260 +guid 0 63 2.772589 0.000000 267 +street 0 63 2.772589 0.000000 293 +type 0 61 2.833213 0.000000 296 +room 0 59 2.833213 0.000000 301 +faculti 0 56 2.890372 0.000000 325 +direct 0 57 2.890372 0.000000 316 +extens 0 53 2.944439 0.000000 340 +undergradu 0 54 2.944439 0.000000 338 +investig 0 51 2.995732 0.000000 353 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +pointer 0 48 3.044522 0.000000 368 +featur 0 46 3.091042 0.000000 386 +adapt 0 46 3.091042 0.000000 387 +join 0 39 3.258097 0.000000 457 +multi 0 36 3.367296 0.000000 493 +express 0 32 3.465736 0.000000 540 +profil 0 30 3.555348 0.000000 581 +static 0 27 3.637586 0.000000 619 +lead 0 23 3.806662 0.000000 718 +self 0 22 3.850148 0.000000 761 +programminglanguag 0 21 3.912023 0.000000 782 +util 0 21 3.912023 0.000000 774 +kernel 0 20 3.951244 0.000000 825 +fine 0 20 3.951244 0.000000 822 +stanford 0 17 4.110874 0.000000 955 +previous 0 17 4.110874 0.000000 923 +engineeringunivers 0 17 4.110874 0.000000 959 +spin 0 14 4.317488 0.000000 1121 +achiev 0 14 4.317488 0.000000 1088 +incorpor 0 13 4.382027 0.000000 1163 +washingtonbox 0 13 4.382027 0.000000 1200 +safe 0 12 4.465908 0.000000 1274 +modul 0 10 4.653960 0.000000 1434 +reli 0 10 4.653960 0.000000 1411 +cecil 0 9 4.753590 0.000000 1547 +end 0 9 4.753590 0.000000 1567 +modula 0 9 4.753590 0.000000 1613 +herefor 0 9 4.753590 0.000000 1483 +chamber 1 8 4.875197 4.875197 1692 +pure 0 8 4.875197 0.000000 1776 +analys 0 8 4.875197 0.000000 1666 +isol 0 8 4.875197 0.000000 1663 +craig 0 7 5.010635 0.000000 1879 +implementationof 0 7 5.010635 0.000000 1813 +vehicl 0 7 5.010635 0.000000 1928 +vortex 0 5 5.347108 0.000000 2362 +spinproject 0 5 5.347108 0.000000 2570 +despit 0 5 5.347108 0.000000 2317 +languagesand 0 4 5.568345 0.000000 3071 +microkernel 0 4 5.568345 0.000000 3047 +intra 0 3 5.857933 0.000000 3243 +dialect 0 3 5.857933 0.000000 3226 +dynamiccompil 0 3 5.857933 0.000000 3926 +programmingenviron 0 2 6.263398 0.000000 5240 +whichsupport 0 2 6.263398 0.000000 6003 +ceciland 0 1 6.957497 0.000000 16684 +languageserv 0 1 6.957497 0.000000 16685 +compilersystem 0 1 6.957497 0.000000 16686 +andinterprocedur 0 1 6.957497 0.000000 16687 +withfront 0 1 6.957497 0.000000 16688 +chamberswa 0 1 6.957497 0.000000 16689 +implementationsund 0 1 6.957497 0.000000 16690 +systemintegr 0 1 6.957497 0.000000 16691 +themodula 0 1 6.957497 0.000000 16692 +spinalso 0 1 6.957497 0.000000 16693 +grainedextens 0 1 6.957497 0.000000 16694 +researchproject 0 1 6.957497 0.000000 16695 +informationprof 0 1 6.957497 0.000000 16696 +chambersdepart 0 1 6.957497 0.000000 16697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..cb328c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +advanc 0 99 2.302585 0.000000 130 +associ 0 93 2.397895 0.000000 151 +graphic 0 90 2.397895 0.000000 147 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +involv 0 71 2.639057 0.000000 227 +logic 0 71 2.639057 0.000000 230 +workshop 0 71 2.639057 0.000000 239 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +room 0 59 2.833213 0.000000 301 +digit 0 52 2.995732 0.000000 348 +physic 0 47 3.091042 0.000000 377 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +paul 0 38 3.295837 0.000000 471 +field 0 37 3.332205 0.000000 482 +articl 0 33 3.433987 0.000000 530 +focu 0 30 3.555348 0.000000 571 +travel 0 30 3.555348 0.000000 579 +arrai 0 27 3.637586 0.000000 627 +ofwashington 0 22 3.850148 0.000000 766 +voic 0 21 3.912023 0.000000 806 +vlsi 0 21 3.912023 0.000000 795 +chip 0 21 3.912023 0.000000 770 +rout 0 21 3.912023 0.000000 793 +particularli 0 19 4.007333 0.000000 867 +aid 0 18 4.060443 0.000000 904 +carl 1 15 4.248495 4.248495 1024 +draw 0 14 4.317488 0.000000 1086 +circuit 0 13 4.382027 0.000000 1131 +carnegi 0 12 4.465908 0.000000 1260 +fpga 0 10 4.653960 0.000000 1433 +franklin 0 10 4.653960 0.000000 1436 +chao 0 8 4.875197 0.000000 1753 +curv 0 8 4.875197 0.000000 1656 +sensit 0 8 4.875197 0.000000 1726 +router 0 8 4.875197 0.000000 1772 +multicomput 0 7 5.010635 0.000000 1890 +northwest 0 7 5.010635 0.000000 1973 +densiti 0 7 5.010635 0.000000 1927 +southern 0 6 5.164786 0.000000 2191 +spline 0 6 5.164786 0.000000 2007 +gate 0 6 5.164786 0.000000 2182 +categori 0 5 5.347108 0.000000 2261 +darren 0 5 5.347108 0.000000 2565 +ebel 1 4 5.568345 5.568345 2756 +triptych 0 4 5.568345 0.000000 3061 +neil 0 4 5.568345 0.000000 2841 +theperform 0 3 5.857933 0.000000 3262 +cronquist 0 3 5.857933 0.000000 3942 +haswork 0 2 6.263398 0.000000 5182 +andsurfac 0 2 6.263398 0.000000 5735 +hei 0 2 6.263398 0.000000 5769 +latch 0 2 6.263398 0.000000 6034 +soha 0 2 6.263398 0.000000 6006 +hassoun 0 2 6.263398 0.000000 6007 +mckenzi 0 2 6.263398 0.000000 5974 +ebelingdepart 0 1 6.957497 0.000000 16698 +wheatoncolleg 0 1 6.957497 0.000000 16699 +illinoisunivers 0 1 6.957497 0.000000 16700 +mellonunivers 0 1 6.957497 0.000000 16701 +vlsiarchitectur 0 1 6.957497 0.000000 16702 +hitech 0 1 6.957497 0.000000 16703 +chessmachin 0 1 6.957497 0.000000 16704 +apex 0 1 6.957497 0.000000 16705 +routingnetwork 0 1 6.957497 0.000000 16706 +placementand 0 1 6.957497 0.000000 16707 +teachingspr 0 1 6.957497 0.000000 16708 +designoffic 0 1 6.957497 0.000000 16709 +fccm 0 1 6.957497 0.000000 16710 +napamai 0 1 6.957497 0.000000 16711 +burlington 0 1 6.957497 0.000000 16712 +chicagojun 0 1 6.957497 0.000000 16713 +vegasresearch 0 1 6.957497 0.000000 16714 +amara 0 1 6.957497 0.000000 16715 +galleryelan 0 1 6.957497 0.000000 16716 +galleryebel 0 1 6.957497 0.000000 16717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..5f2a1e61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +seattl 0 120 2.079442 0.000000 103 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +confer 0 126 2.079442 0.000000 100 +server 0 76 2.564949 0.000000 204 +new 0 64 2.772589 0.000000 262 +photo 0 31 3.496508 0.000000 561 +steve 0 29 3.583519 0.000000 594 +magazin 0 24 3.761200 0.000000 704 +tenni 0 20 3.951244 0.000000 838 +agent 0 18 4.060443 0.000000 910 +hank 0 12 4.465908 0.000000 1253 +uncertainti 0 7 5.010635 0.000000 1882 +restaur 0 6 5.164786 0.000000 2230 +seriou 0 5 5.347108 0.000000 2252 +carlo 0 5 5.347108 0.000000 2515 +maria 0 4 5.568345 0.000000 2954 +wine 0 3 5.857933 0.000000 3895 +hanksunivers 0 1 6.957497 0.000000 16718 +washingtondepart 0 1 6.957497 0.000000 16719 +architecturesai 0 1 6.957497 0.000000 16720 +symphoni 0 1 6.957497 0.000000 16721 +opera 0 1 6.957497 0.000000 16722 +edita 0 1 6.957497 0.000000 16723 +gruberova 0 1 6.957497 0.000000 16724 +giulini 0 1 6.957497 0.000000 16725 +discographi 0 1 6.957497 0.000000 16726 +sumac 0 1 6.957497 0.000000 16727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..1a29f68f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +take 0 97 2.302585 0.000000 134 +school 0 84 2.484907 0.000000 188 +intellig 0 72 2.639057 0.000000 225 +degre 0 69 2.708050 0.000000 259 +knowledg 0 67 2.708050 0.000000 243 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +colleg 0 61 2.833213 0.000000 300 +faculti 0 56 2.890372 0.000000 325 +understand 0 47 3.091042 0.000000 384 +math 0 44 3.135494 0.000000 402 +electr 0 38 3.295837 0.000000 461 +origin 0 38 3.295837 0.000000 472 +within 0 33 3.433987 0.000000 525 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +neural 0 30 3.555348 0.000000 578 +symbol 0 27 3.637586 0.000000 620 +spent 1 25 3.737670 3.737670 676 +initi 0 23 3.806662 0.000000 717 +divis 0 21 3.912023 0.000000 803 +corpor 0 21 3.912023 0.000000 802 +verif 0 20 3.951244 0.000000 826 +expert 0 20 3.951244 0.000000 833 +mostli 0 19 4.007333 0.000000 869 +aid 0 18 4.060443 0.000000 904 +speech 0 12 4.465908 0.000000 1222 +fellowship 0 10 4.653960 0.000000 1460 +yale 0 6 5.164786 0.000000 2003 +british 0 5 5.347108 0.000000 2546 +broadcast 0 5 5.347108 0.000000 2453 +began 0 5 5.347108 0.000000 2498 +scotland 0 4 5.568345 0.000000 3049 +withth 0 4 5.568345 0.000000 2805 +alistair 0 3 5.857933 0.000000 3315 +holden 0 3 5.857933 0.000000 3314 +london 0 3 5.857933 0.000000 3282 +imperi 0 2 6.263398 0.000000 5389 +highland 0 1 6.957497 0.000000 16728 +receivedhi 0 1 6.957497 0.000000 16729 +glasgow 0 1 6.957497 0.000000 16730 +graduateapprentic 0 1 6.957497 0.000000 16731 +edison 0 1 6.957497 0.000000 16732 +phddegre 0 1 6.957497 0.000000 16733 +learningin 0 1 6.957497 0.000000 16734 +coursefrom 0 1 6.957497 0.000000 16735 +colin 0 1 6.957497 0.000000 16736 +cherri 0 1 6.957497 0.000000 16737 +thebbc 0 1 6.957497 0.000000 16738 +theuw 0 1 6.957497 0.000000 16739 +departmentsform 0 1 6.957497 0.000000 16740 +netmethodolog 0 1 6.957497 0.000000 16741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..22102ec9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +mathemat 0 108 2.197225 0.000000 123 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +academ 0 82 2.484907 0.000000 178 +journal 0 83 2.484907 0.000000 183 +nation 0 74 2.639057 0.000000 240 +effici 0 73 2.639057 0.000000 233 +complex 0 64 2.772589 0.000000 269 +improv 0 62 2.772589 0.000000 289 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +press 0 42 3.218876 0.000000 419 +societi 1 40 3.258097 3.258097 456 +theoret 0 39 3.258097 0.000000 446 +industri 0 38 3.295837 0.000000 464 +respons 0 37 3.332205 0.000000 476 +award 0 34 3.401197 0.000000 523 +random 0 34 3.401197 0.000000 511 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +travel 0 30 3.555348 0.000000 579 +art 0 29 3.583519 0.000000 593 +chair 0 29 3.583519 0.000000 596 +held 0 28 3.610918 0.000000 600 +american 0 27 3.637586 0.000000 634 +berkelei 0 26 3.688879 0.000000 657 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +flow 0 24 3.761200 0.000000 700 +ofwashington 0 22 3.850148 0.000000 766 +reduc 0 22 3.850148 0.000000 759 +among 0 21 3.912023 0.000000 781 +miller 0 17 4.110874 0.000000 949 +match 0 16 4.174387 0.000000 965 +partit 0 16 4.174387 0.000000 984 +massachusett 0 14 4.317488 0.000000 1118 +america 0 11 4.553877 0.000000 1370 +probabilist 0 11 4.553877 0.000000 1343 +minimum 0 9 4.753590 0.000000 1555 +academi 0 8 4.875197 0.000000 1735 +combinatori 0 8 4.875197 0.000000 1629 +pennsylvania 0 7 5.010635 0.000000 1932 +perfect 0 7 5.010635 0.000000 1921 +prize 0 6 5.164786 0.000000 2150 +ture 0 6 5.164786 0.000000 1997 +advisori 0 6 5.164786 0.000000 2148 +plane 0 6 5.164786 0.000000 2187 +karp 0 5 5.347108 0.000000 2284 +weyl 0 4 5.568345 0.000000 2854 +technion 0 4 5.568345 0.000000 2856 +weizmann 0 4 5.568345 0.000000 2858 +combinator 0 4 5.568345 0.000000 2915 +theacm 0 4 5.568345 0.000000 2698 +neumann 0 3 5.857933 0.000000 3720 +medal 0 3 5.857933 0.000000 3912 +truste 0 3 5.857933 0.000000 3900 +combinatorica 0 3 5.857933 0.000000 3649 +ofoper 0 3 5.857933 0.000000 3292 +dick 0 2 6.263398 0.000000 5396 +sciencesmemb 0 2 6.263398 0.000000 5742 +engineeringfellow 0 2 6.263398 0.000000 4902 +sciencesfellow 0 2 6.263398 0.000000 4903 +appliedmathemat 0 2 6.263398 0.000000 5716 +honorari 0 2 6.263398 0.000000 5741 +georgetown 0 2 6.263398 0.000000 5667 +wigderson 0 2 6.263398 0.000000 6035 +fornetwork 0 2 6.263398 0.000000 5580 +edmond 0 2 6.263398 0.000000 4144 +plenum 0 2 6.263398 0.000000 6036 +presentmemb 0 1 6.957497 0.000000 16742 +salesman 0 1 6.957497 0.000000 16743 +karprichard 0 1 6.957497 0.000000 16744 +karpprofessor 0 1 6.957497 0.000000 16745 +ofcomputersci 0 1 6.957497 0.000000 16746 +andadjunct 0 1 6.957497 0.000000 16747 +ofmolecularbiotechnologyunivers 0 1 6.957497 0.000000 16748 +eduaward 0 1 6.957497 0.000000 16749 +membershipsn 0 1 6.957497 0.000000 16750 +babbag 0 1 6.957497 0.000000 16751 +sciencedistinguish 0 1 6.957497 0.000000 16752 +senat 0 1 6.957497 0.000000 16753 +berkeleylanchest 0 1 6.957497 0.000000 16754 +fulkerson 0 1 6.957497 0.000000 16755 +hermann 0 1 6.957497 0.000000 16756 +forsoci 0 1 6.957497 0.000000 16757 +governor 0 1 6.957497 0.000000 16758 +scienceinstitut 0 1 6.957497 0.000000 16759 +presentselect 0 1 6.957497 0.000000 16760 +turingaward 0 1 6.957497 0.000000 16761 +upfal 0 1 6.957497 0.000000 16762 +spanningtre 0 1 6.957497 0.000000 16763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..c23eb40a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,251 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +lectur 0 135 1.945910 0.000000 73 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +technolog 0 131 2.079442 0.000000 102 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +person 0 111 2.197225 0.000000 117 +version 0 113 2.197225 0.000000 122 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +nation 0 74 2.639057 0.000000 240 +servic 0 72 2.639057 0.000000 236 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +test 0 66 2.708050 0.000000 252 +degre 0 69 2.708050 0.000000 259 +polici 0 64 2.772589 0.000000 279 +foundat 0 62 2.772589 0.000000 286 +colleg 0 61 2.833213 0.000000 300 +faculti 0 56 2.890372 0.000000 325 +reason 0 57 2.890372 0.000000 318 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +talk 0 53 2.944439 0.000000 336 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +profession 0 51 2.995732 0.000000 345 +frequent 0 49 3.044522 0.000000 367 +visitor 0 49 3.044522 0.000000 371 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +execut 0 45 3.135494 0.000000 404 +review 0 42 3.218876 0.000000 425 +examin 0 42 3.218876 0.000000 424 +http 0 41 3.218876 0.000000 420 +transact 0 39 3.258097 0.000000 438 +annual 0 40 3.258097 0.000000 458 +industri 0 38 3.295837 0.000000 464 +electr 0 38 3.295837 0.000000 461 +field 0 37 3.332205 0.000000 482 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +committe 0 34 3.401197 0.000000 522 +award 0 34 3.401197 0.000000 523 +board 0 33 3.433987 0.000000 528 +product 0 33 3.433987 0.000000 527 +chair 0 29 3.583519 0.000000 596 +chines 0 29 3.583519 0.000000 595 +intend 0 28 3.610918 0.000000 599 +berkelei 0 26 3.688879 0.000000 657 +concern 0 25 3.737670 0.000000 666 +doctor 0 24 3.761200 0.000000 709 +sometim 0 24 3.761200 0.000000 696 +miscellan 0 23 3.806662 0.000000 731 +famili 0 23 3.806662 0.000000 735 +serv 0 22 3.850148 0.000000 758 +director 0 22 3.850148 0.000000 767 +corpor 0 21 3.912023 0.000000 802 +fund 0 21 3.912023 0.000000 805 +theunivers 0 21 3.912023 0.000000 797 +hous 0 21 3.912023 0.000000 801 +region 0 19 4.007333 0.000000 875 +seem 0 18 4.060443 0.000000 899 +appropri 0 18 4.060443 0.000000 883 +lot 0 18 4.060443 0.000000 889 +stand 0 18 4.060443 0.000000 891 +record 0 18 4.060443 0.000000 890 +demo 0 18 4.060443 0.000000 888 +universityof 0 15 4.248495 0.000000 1061 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +trip 0 14 4.317488 0.000000 1113 +essenti 0 13 4.382027 0.000000 1137 +sigmetr 0 13 4.382027 0.000000 1173 +forth 0 13 4.382027 0.000000 1186 +speech 0 12 4.465908 0.000000 1222 +thedepart 0 11 4.553877 0.000000 1350 +council 0 11 4.553877 0.000000 1364 +host 0 11 4.553877 0.000000 1306 +player 0 11 4.553877 0.000000 1371 +cook 0 10 4.653960 0.000000 1464 +perspect 0 10 4.653960 0.000000 1437 +congress 0 9 4.753590 0.000000 1592 +pick 0 9 4.753590 0.000000 1498 +govern 0 9 4.753590 0.000000 1581 +telecommun 0 9 4.753590 0.000000 1565 +andth 0 9 4.753590 0.000000 1481 +vice 0 9 4.753590 0.000000 1604 +lane 0 8 4.875197 0.000000 1720 +mile 0 8 4.875197 0.000000 1743 +virginia 0 8 4.875197 0.000000 1659 +driver 0 8 4.875197 0.000000 1657 +centuri 0 7 5.010635 0.000000 1935 +surpris 0 7 5.010635 0.000000 1828 +molecular 0 7 5.010635 0.000000 1887 +advisori 0 6 5.164786 0.000000 2148 +ture 0 6 5.164786 0.000000 1997 +brook 0 6 5.164786 0.000000 2152 +deliv 0 6 5.164786 0.000000 2070 +highwai 0 6 5.164786 0.000000 2095 +presid 0 6 5.164786 0.000000 2196 +duke 0 6 5.164786 0.000000 2231 +lazowska 0 4 5.568345 0.000000 2694 +invent 0 4 5.568345 0.000000 3028 +machineri 0 4 5.568345 0.000000 2851 +push 0 4 5.568345 0.000000 2635 +andengin 0 4 5.568345 0.000000 3042 +rack 0 3 5.857933 0.000000 3176 +researchassoci 0 3 5.857933 0.000000 3664 +affair 0 3 5.857933 0.000000 3916 +belong 0 3 5.857933 0.000000 3797 +atstanford 0 3 5.857933 0.000000 3935 +hongkong 0 3 5.857933 0.000000 3677 +theimpact 0 3 5.857933 0.000000 3179 +uwcs 0 3 5.857933 0.000000 3977 +informationtechnolog 0 3 5.857933 0.000000 3836 +down 0 3 5.857933 0.000000 3870 +celebr 0 2 6.263398 0.000000 4946 +onthi 0 2 6.263398 0.000000 5357 +mbquicktim 0 2 6.263398 0.000000 5916 +advisorycommitte 0 2 6.263398 0.000000 6037 +ofdata 0 2 6.263398 0.000000 6038 +ventur 0 2 6.263398 0.000000 4938 +andha 0 2 6.263398 0.000000 5955 +theinstitut 0 2 6.263398 0.000000 6039 +hpcc 0 2 6.263398 0.000000 5832 +pagerec 0 2 6.263398 0.000000 6022 +ahalf 0 1 6.957497 0.000000 16764 +exponentialprogress 0 1 6.957497 0.000000 16765 +annualfaculti 0 1 6.957497 0.000000 16766 +vicepresid 0 1 6.957497 0.000000 16767 +gore 0 1 6.957497 0.000000 16768 +eniac 0 1 6.957497 0.000000 16769 +thanniversari 0 1 6.957497 0.000000 16770 +nathanmyhrvold 0 1 6.957497 0.000000 16771 +joinsedlazowska 0 1 6.957497 0.000000 16772 +theuwcs 0 1 6.957497 0.000000 16773 +testimonyto 0 1 6.957497 0.000000 16774 +georgejetson 0 1 6.957497 0.000000 16775 +forfr 0 1 6.957497 0.000000 16776 +flintston 0 1 6.957497 0.000000 16777 +mostlywearsti 0 1 6.957497 0.000000 16778 +flier 0 1 6.957497 0.000000 16779 +healso 0 1 6.957497 0.000000 16780 +havefunnynos 0 1 6.957497 0.000000 16781 +allgradu 0 1 6.957497 0.000000 16782 +laboratoriesin 0 1 6.957497 0.000000 16783 +ofcra 0 1 6.957497 0.000000 16784 +scomputersci 0 1 6.957497 0.000000 16785 +formicrosoft 0 1 6.957497 0.000000 16786 +personnationalsemiconductor 0 1 6.957497 0.000000 16787 +academicadvisori 0 1 6.957497 0.000000 16788 +forcabl 0 1 6.957497 0.000000 16789 +hows 0 1 6.957497 0.000000 16790 +cascadia 0 1 6.957497 0.000000 16791 +committeesfor 0 1 6.957497 0.000000 16792 +eecsat 0 1 6.957497 0.000000 16793 +councilpanel 0 1 6.957497 0.000000 16794 +agencyhigh 0 1 6.957497 0.000000 16795 +computingand 0 1 6.957497 0.000000 16796 +sutherland 0 1 6.957497 0.000000 16797 +examinersfor 0 1 6.957497 0.000000 16798 +sspecial 0 1 6.957497 0.000000 16799 +chairof 0 1 6.957497 0.000000 16800 +andeditor 0 1 6.957497 0.000000 16801 +servinga 0 1 6.957497 0.000000 16802 +onacadem 0 1 6.957497 0.000000 16803 +thecommitte 0 1 6.957497 0.000000 16804 +deanship 0 1 6.957497 0.000000 16805 +artsand 0 1 6.957497 0.000000 16806 +biotechnolog 0 1 6.957497 0.000000 16807 +amemb 0 1 6.957497 0.000000 16808 +deanof 0 1 6.957497 0.000000 16809 +fellowof 0 1 6.957497 0.000000 16810 +associationfor 0 1 6.957497 0.000000 16811 +andelectron 0 1 6.957497 0.000000 16812 +seventeenph 0 1 6.957497 0.000000 16813 +studentshav 0 1 6.957497 0.000000 16814 +integratedoverview 0 1 6.957497 0.000000 16815 +apersuas 0 1 6.957497 0.000000 16816 +forloc 0 1 6.957497 0.000000 16817 +consumpt 0 1 6.957497 0.000000 16818 +persuas 0 1 6.957497 0.000000 16819 +playertopten 0 1 6.957497 0.000000 16820 +csebuild 0 1 6.957497 0.000000 16821 +abbrevi 0 1 6.957497 0.000000 16822 +cvcomputingresearch 0 1 6.957497 0.000000 16823 +forwardmassi 0 1 6.957497 0.000000 16824 +goldmanreport 0 1 6.957497 0.000000 16825 +alleg 0 1 6.957497 0.000000 16826 +cseph 0 1 6.957497 0.000000 16827 +flaw 0 1 6.957497 0.000000 16828 +medianyear 0 1 6.957497 0.000000 16829 +boardstudi 0 1 6.957497 0.000000 16830 +saturdayseminar 0 1 6.957497 0.000000 16831 +houseappropri 0 1 6.957497 0.000000 16832 +interestinghom 0 1 6.957497 0.000000 16833 +odeto 0 1 6.957497 0.000000 16834 +tallman 0 1 6.957497 0.000000 16835 +trask 0 1 6.957497 0.000000 16836 +departsfor 0 1 6.957497 0.000000 16837 +lanelazowska 0 1 6.957497 0.000000 16838 +pagedirect 0 1 6.957497 0.000000 16839 +houseshilshol 0 1 6.957497 0.000000 16840 +aquat 0 1 6.957497 0.000000 16841 +discoveredreview 0 1 6.957497 0.000000 16842 +poetryfing 0 1 6.957497 0.000000 16843 +scheduleinform 0 1 6.957497 0.000000 16844 +reflector 0 1 6.957497 0.000000 16845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..848f9029 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +professor 1 137 1.945910 1.945910 76 +document 0 121 2.079442 0.000000 89 +mathemat 0 108 2.197225 0.000000 123 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +associ 0 93 2.397895 0.000000 151 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +method 0 80 2.564949 0.000000 213 +good 0 77 2.564949 0.000000 200 +addit 0 74 2.639057 0.000000 228 +visit 0 63 2.772589 0.000000 288 +physic 0 47 3.091042 0.000000 377 +textbook 0 44 3.135494 0.000000 397 +linear 0 41 3.218876 0.000000 431 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +transact 0 39 3.258097 0.000000 438 +live 0 40 3.258097 0.000000 451 +committe 0 34 3.401197 0.000000 522 +award 0 34 3.401197 0.000000 523 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +serv 0 22 3.850148 0.000000 758 +half 0 21 3.912023 0.000000 776 +corpor 0 21 3.912023 0.000000 802 +among 0 21 3.912023 0.000000 781 +prepar 0 20 3.951244 0.000000 824 +supervis 0 20 3.951244 0.000000 840 +tenni 0 20 3.951244 0.000000 838 +stanford 0 17 4.110874 0.000000 955 +bachelor 0 17 4.110874 0.000000 957 +former 0 17 4.110874 0.000000 956 +hobbi 0 16 4.174387 0.000000 1009 +atth 0 15 4.248495 0.000000 1019 +incomput 0 14 4.317488 0.000000 1096 +alan 0 13 4.382027 0.000000 1146 +guest 0 12 4.465908 0.000000 1220 +food 0 12 4.465908 0.000000 1285 +distinguish 0 11 4.553877 0.000000 1357 +bike 0 10 4.653960 0.000000 1468 +introductori 0 9 4.753590 0.000000 1479 +editori 0 9 4.753590 0.000000 1611 +toronto 0 6 5.164786 0.000000 2156 +scholar 0 6 5.164786 0.000000 2180 +pari 0 6 5.164786 0.000000 2158 +softwareengin 0 6 5.164786 0.000000 2162 +hike 0 6 5.164786 0.000000 2234 +these 0 5 5.347108 0.000000 2482 +fulbright 0 4 5.568345 0.000000 2963 +hasbeen 0 4 5.568345 0.000000 2661 +amast 0 3 5.857933 0.000000 3955 +informat 0 3 5.857933 0.000000 3839 +zurich 0 3 5.857933 0.000000 3550 +memberof 0 3 5.857933 0.000000 3169 +trumpet 0 3 5.857933 0.000000 3946 +sdegre 0 2 6.263398 0.000000 6040 +acceler 0 2 6.263398 0.000000 5411 +fifteen 0 2 6.263398 0.000000 5399 +shaw 0 1 6.957497 0.000000 16846 +facultyappoint 0 1 6.957497 0.000000 16847 +theibm 0 1 6.957497 0.000000 16848 +publicationsinclud 0 1 6.957497 0.000000 16849 +andan 0 1 6.957497 0.000000 16850 +sciencescreen 0 1 6.957497 0.000000 16851 +associateeditor 0 1 6.957497 0.000000 16852 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..1517bfcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +parallel 1 169 1.791759 1.791759 60 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +mathemat 0 108 2.197225 0.000000 123 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +journal 0 83 2.484907 0.000000 183 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +nation 0 74 2.639057 0.000000 240 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +receiv 0 66 2.708050 0.000000 244 +visit 0 63 2.772589 0.000000 288 +polici 0 64 2.772589 0.000000 279 +guid 0 63 2.772589 0.000000 267 +faculti 0 56 2.890372 0.000000 325 +direct 0 57 2.890372 0.000000 316 +investig 0 51 2.995732 0.000000 353 +numer 0 49 3.044522 0.000000 369 +quarter 0 47 3.091042 0.000000 389 +editor 0 41 3.218876 0.000000 433 +futur 0 41 3.218876 0.000000 427 +join 0 39 3.258097 0.000000 457 +transact 0 39 3.258097 0.000000 438 +ofth 0 36 3.367296 0.000000 491 +committe 0 34 3.401197 0.000000 522 +singl 0 34 3.401197 0.000000 510 +award 0 34 3.401197 0.000000 523 +titl 0 31 3.496508 0.000000 556 +computersci 0 30 3.555348 0.000000 562 +rang 0 30 3.555348 0.000000 565 +chair 0 29 3.583519 0.000000 596 +particip 0 29 3.583519 0.000000 589 +doctor 0 24 3.761200 0.000000 709 +proof 0 23 3.806662 0.000000 720 +highli 0 23 3.806662 0.000000 725 +serv 0 22 3.850148 0.000000 758 +properti 0 22 3.850148 0.000000 749 +chip 0 21 3.912023 0.000000 770 +divis 0 21 3.912023 0.000000 803 +bachelor 0 17 4.110874 0.000000 957 +configur 0 15 4.248495 0.000000 1012 +econom 0 13 4.382027 0.000000 1184 +mellon 0 13 4.382027 0.000000 1179 +carnegi 0 12 4.465908 0.000000 1260 +onth 0 12 4.465908 0.000000 1218 +perman 0 11 4.553877 0.000000 1372 +distinguish 0 11 4.553877 0.000000 1357 +purdu 0 10 4.653960 0.000000 1466 +andcomput 0 8 4.875197 0.000000 1623 +lawrenc 0 7 5.010635 0.000000 1908 +iowa 0 7 5.010635 0.000000 1971 +harvard 0 7 5.010635 0.000000 1926 +microprocessor 0 7 5.010635 0.000000 1808 +scholar 0 6 5.164786 0.000000 2180 +yale 0 6 5.164786 0.000000 2003 +blue 0 6 5.164786 0.000000 2227 +snyder 0 5 5.347108 0.000000 2359 +chaoticrout 0 4 5.568345 0.000000 3063 +anddistribut 0 4 5.568345 0.000000 3031 +algorithmsand 0 4 5.568345 0.000000 2680 +cmo 0 3 5.857933 0.000000 3992 +inventor 0 3 5.857933 0.000000 3695 +orca 0 3 5.857933 0.000000 3578 +dozen 0 3 5.857933 0.000000 3905 +sdegre 0 2 6.263398 0.000000 6040 +developmentof 0 2 6.263398 0.000000 6041 +hors 0 2 6.263398 0.000000 5348 +advisorycommitte 0 2 6.263398 0.000000 6037 +andin 0 1 6.957497 0.000000 16853 +scholarat 0 1 6.957497 0.000000 16854 +theundecid 0 1 6.957497 0.000000 16855 +hecreat 0 1 6.957497 0.000000 16856 +thepok 0 1 6.957497 0.000000 16857 +nowprincip 0 1 6.957497 0.000000 16858 +nwli 0 1 6.957497 0.000000 16859 +computerand 0 1 6.957497 0.000000 16860 +foundationadvisori 0 1 6.957497 0.000000 16861 +doctoraldissert 0 1 6.957497 0.000000 16862 +degreesund 0 1 6.957497 0.000000 16863 +seniorproject 0 1 6.957497 0.000000 16864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..6f3e043e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +design 0 213 1.386294 0.000000 25 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +memori 0 101 2.302585 0.000000 139 +control 0 82 2.484907 0.000000 164 +issu 0 78 2.564949 0.000000 211 +involv 0 71 2.639057 0.000000 227 +degre 0 69 2.708050 0.000000 259 +integr 0 67 2.708050 0.000000 245 +evalu 0 64 2.772589 0.000000 266 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +explor 0 58 2.890372 0.000000 324 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +tabl 0 51 2.995732 0.000000 346 +autom 0 41 3.218876 0.000000 434 +electr 0 38 3.295837 0.000000 461 +toler 0 33 3.433987 0.000000 533 +india 0 32 3.465736 0.000000 550 +fault 0 32 3.465736 0.000000 547 +multiprocessor 0 28 3.610918 0.000000 605 +indian 0 22 3.850148 0.000000 769 +period 0 22 3.850148 0.000000 743 +tenni 0 20 3.951244 0.000000 838 +interconnect 0 17 4.110874 0.000000 937 +canada 0 13 4.382027 0.000000 1158 +food 0 12 4.465908 0.000000 1285 +prior 0 10 4.653960 0.000000 1438 +grain 0 10 4.653960 0.000000 1448 +cook 0 10 4.653960 0.000000 1464 +respect 0 9 4.753590 0.000000 1545 +classif 0 9 4.753590 0.000000 1586 +bridg 0 8 4.875197 0.000000 1764 +earn 0 7 5.010635 0.000000 1788 +montreal 0 7 5.010635 0.000000 1961 +hike 0 6 5.164786 0.000000 2234 +delhi 0 5 5.347108 0.000000 2530 +anti 0 5 5.347108 0.000000 2434 +arun 0 4 5.568345 0.000000 2736 +redund 0 4 5.568345 0.000000 2839 +congest 0 3 5.857933 0.000000 3993 +reconfigur 0 3 5.857933 0.000000 3556 +somani 0 2 6.263398 0.000000 4432 +submarin 0 2 6.263398 0.000000 6018 +warfar 0 2 6.263398 0.000000 4910 +navi 0 2 6.263398 0.000000 5155 +proteu 0 1 6.957497 0.000000 16865 +msee 0 1 6.957497 0.000000 16866 +mcgill 0 1 6.957497 0.000000 16867 +govt 0 1 6.957497 0.000000 16868 +offault 0 1 6.957497 0.000000 16869 +tocach 0 1 6.957497 0.000000 16870 +broadband 0 1 6.957497 0.000000 16871 +generalizedenhanc 0 1 6.957497 0.000000 16872 +hypercub 0 1 6.957497 0.000000 16873 +coars 0 1 6.957497 0.000000 16874 +dpcnl 0 1 6.957497 0.000000 16875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..5e8edcbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +mathemat 0 108 2.197225 0.000000 123 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +take 0 97 2.302585 0.000000 134 +book 0 99 2.302585 0.000000 131 +imag 0 91 2.397895 0.000000 161 +sinc 0 90 2.397895 0.000000 159 +commun 0 95 2.397895 0.000000 157 +grade 0 90 2.397895 0.000000 142 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +educ 0 86 2.484907 0.000000 191 +activ 0 84 2.484907 0.000000 182 +june 0 79 2.564949 0.000000 214 +intellig 0 72 2.639057 0.000000 225 +addit 0 74 2.639057 0.000000 228 +meet 0 72 2.639057 0.000000 229 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +visit 0 63 2.772589 0.000000 288 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +organ 0 65 2.772589 0.000000 265 +polici 0 64 2.772589 0.000000 279 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +faculti 0 56 2.890372 0.000000 325 +think 0 57 2.890372 0.000000 314 +variou 0 56 2.890372 0.000000 317 +publish 0 57 2.890372 0.000000 326 +processor 0 54 2.944439 0.000000 335 +visual 0 48 3.044522 0.000000 372 +numer 0 49 3.044522 0.000000 369 +understand 0 47 3.091042 0.000000 384 +textbook 0 44 3.135494 0.000000 397 +edit 0 42 3.218876 0.000000 418 +vision 0 41 3.218876 0.000000 430 +editor 0 41 3.218876 0.000000 433 +music 0 42 3.218876 0.000000 436 +societi 0 40 3.258097 0.000000 456 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +author 0 39 3.258097 0.000000 450 +committe 0 34 3.401197 0.000000 522 +scientist 0 31 3.496508 0.000000 560 +common 0 30 3.555348 0.000000 574 +chair 0 29 3.583519 0.000000 596 +steve 0 29 3.583519 0.000000 594 +held 0 28 3.610918 0.000000 600 +subject 0 26 3.688879 0.000000 647 +enjoi 0 26 3.688879 0.000000 660 +pattern 0 24 3.761200 0.000000 689 +fellow 0 24 3.761200 0.000000 701 +recognit 0 23 3.806662 0.000000 723 +serv 0 22 3.850148 0.000000 758 +theunivers 0 21 3.912023 0.000000 797 +corpor 0 21 3.912023 0.000000 802 +particularli 0 19 4.007333 0.000000 867 +lisp 0 18 4.060443 0.000000 897 +element 0 18 4.060443 0.000000 895 +steven 0 17 4.110874 0.000000 953 +cambridg 0 16 4.174387 0.000000 1008 +princeton 0 15 4.248495 0.000000 1042 +atth 0 15 4.248495 0.000000 1019 +massachusett 0 14 4.317488 0.000000 1118 +whose 0 13 4.382027 0.000000 1166 +franc 0 12 4.465908 0.000000 1276 +outsid 0 12 4.465908 0.000000 1219 +motiv 0 11 4.553877 0.000000 1346 +council 0 11 4.553877 0.000000 1364 +tanimoto 0 10 4.653960 0.000000 1429 +conferenceon 0 9 4.753590 0.000000 1595 +entitl 0 9 4.753590 0.000000 1490 +vice 0 9 4.753590 0.000000 1604 +japan 0 8 4.875197 0.000000 1762 +elect 0 8 4.875197 0.000000 1771 +sweden 0 7 5.010635 0.000000 1885 +chief 0 7 5.010635 0.000000 1829 +pari 0 6 5.164786 0.000000 2158 +scholar 0 6 5.164786 0.000000 2180 +sponsor 0 6 5.164786 0.000000 2133 +piano 0 6 5.164786 0.000000 2201 +anda 0 5 5.347108 0.000000 2416 +ofparallel 0 5 5.347108 0.000000 2380 +steer 0 5 5.347108 0.000000 2328 +jazz 0 5 5.347108 0.000000 2527 +devot 0 4 5.568345 0.000000 2711 +coauthor 0 4 5.568345 0.000000 3064 +electricalengin 0 3 5.857933 0.000000 3987 +chairman 0 3 5.857933 0.000000 3991 +adjunct 0 2 6.263398 0.000000 6033 +theinstitut 0 2 6.263398 0.000000 6039 +internationalworkshop 0 2 6.263398 0.000000 5012 +bergen 0 2 6.263398 0.000000 5991 +norwai 0 2 6.263398 0.000000 4908 +programcommitte 0 2 6.263398 0.000000 6042 +theieee 0 2 6.263398 0.000000 6043 +ieeetransact 0 2 6.263398 0.000000 4315 +andclass 0 2 6.263398 0.000000 4330 +linkp 0 1 6.957497 0.000000 16876 +fromharvard 0 1 6.957497 0.000000 16877 +connecticut 0 1 6.957497 0.000000 16878 +professorat 0 1 6.957497 0.000000 16879 +hasalso 0 1 6.957497 0.000000 16880 +atkob 0 1 6.957497 0.000000 16881 +enseign 0 1 6.957497 0.000000 16882 +superieur 0 1 6.957497 0.000000 16883 +techniquesd 0 1 6.957497 0.000000 16884 +electroniqu 0 1 6.957497 0.000000 16885 +irest 0 1 6.957497 0.000000 16886 +nant 0 1 6.957497 0.000000 16887 +hasrec 0 1 6.957497 0.000000 16888 +forimag 0 1 6.957497 0.000000 16889 +processingand 0 1 6.957497 0.000000 16890 +bordeaux 0 1 6.957497 0.000000 16891 +ofimag 0 1 6.957497 0.000000 16892 +currentlydirect 0 1 6.957497 0.000000 16893 +throughimag 0 1 6.957497 0.000000 16894 +softwarethat 0 1 6.957497 0.000000 16895 +thebook 0 1 6.957497 0.000000 16896 +introductionus 0 1 6.957497 0.000000 16897 +accompanyingsoftwar 0 1 6.957497 0.000000 16898 +serveda 0 1 6.957497 0.000000 16899 +subconfer 0 1 6.957497 0.000000 16900 +patternrecognit 0 1 6.957497 0.000000 16901 +societyworkshop 0 1 6.957497 0.000000 16902 +machineintellig 0 1 6.957497 0.000000 16903 +symposiaon 0 1 6.957497 0.000000 16904 +editorialboard 0 1 6.957497 0.000000 16905 +cvgip 0 1 6.957497 0.000000 16906 +engineeringeduc 0 1 6.957497 0.000000 16907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..b9643111 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +structur 0 106 2.197225 0.000000 105 +associ 0 93 2.397895 0.000000 151 +question 0 91 2.397895 0.000000 141 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +state 0 76 2.564949 0.000000 207 +issu 0 78 2.564949 0.000000 211 +complet 0 77 2.564949 0.000000 208 +nation 0 74 2.639057 0.000000 240 +receiv 0 66 2.708050 0.000000 244 +foundat 0 62 2.772589 0.000000 286 +complex 0 64 2.772589 0.000000 269 +visit 0 63 2.772589 0.000000 288 +colleg 0 61 2.833213 0.000000 300 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +sever 0 56 2.890372 0.000000 322 +three 0 54 2.944439 0.000000 330 +california 0 46 3.091042 0.000000 388 +textbook 0 44 3.135494 0.000000 397 +theoret 0 39 3.258097 0.000000 446 +societi 0 40 3.258097 0.000000 456 +author 0 39 3.258097 0.000000 450 +paul 0 38 3.295837 0.000000 471 +industri 0 38 3.295837 0.000000 464 +connect 0 37 3.332205 0.000000 485 +ofth 0 36 3.367296 0.000000 491 +committe 0 34 3.401197 0.000000 522 +board 0 33 3.433987 0.000000 528 +taught 0 33 3.433987 0.000000 526 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +chair 0 29 3.583519 0.000000 596 +berkelei 0 26 3.688879 0.000000 657 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +serv 0 22 3.850148 0.000000 758 +emphasi 0 22 3.850148 0.000000 755 +unit 0 21 3.912023 0.000000 779 +theunivers 0 21 3.912023 0.000000 797 +divis 0 21 3.912023 0.000000 803 +facil 0 20 3.951244 0.000000 814 +histori 0 19 4.007333 0.000000 853 +young 0 16 4.174387 0.000000 991 +earli 0 16 4.174387 0.000000 968 +becam 0 14 4.317488 0.000000 1117 +dean 0 14 4.317488 0.000000 1104 +employ 0 12 4.465908 0.000000 1291 +eight 0 11 4.553877 0.000000 1331 +ofcomput 0 10 4.653960 0.000000 1442 +editori 0 9 4.753590 0.000000 1611 +vice 0 9 4.753590 0.000000 1604 +hold 0 8 4.875197 0.000000 1645 +foc 0 7 5.010635 0.000000 1880 +reed 0 6 5.164786 0.000000 2086 +symposiumon 0 6 5.164786 0.000000 2054 +sigact 0 6 5.164786 0.000000 2212 +chosen 0 6 5.164786 0.000000 1984 +twice 0 4 5.568345 0.000000 2614 +coauthor 0 4 5.568345 0.000000 3064 +notr 0 4 5.568345 0.000000 2880 +dame 0 4 5.568345 0.000000 2881 +gone 0 4 5.568345 0.000000 3072 +chairman 0 3 5.857933 0.000000 3991 +atstanford 0 3 5.857933 0.000000 3935 +briefli 0 3 5.857933 0.000000 3459 +thegener 0 3 5.857933 0.000000 3648 +mathematicallog 0 3 5.857933 0.000000 3796 +eleven 0 3 5.857933 0.000000 3824 +postdoctor 0 2 6.263398 0.000000 5059 +mexico 0 2 6.263398 0.000000 6044 +nomin 0 2 6.263398 0.000000 5758 +programcommitte 0 2 6.263398 0.000000 6042 +annal 0 2 6.263398 0.000000 4912 +underprofessor 0 2 6.263398 0.000000 6045 +ratherthan 0 2 6.263398 0.000000 6046 +graduateof 0 1 6.957497 0.000000 16908 +antioch 0 1 6.957497 0.000000 16909 +hejoin 0 1 6.957497 0.000000 16910 +seventeen 0 1 6.957497 0.000000 16911 +atpurdu 0 1 6.957497 0.000000 16912 +inperhap 0 1 6.957497 0.000000 16913 +aschairman 0 1 6.957497 0.000000 16914 +professorin 0 1 6.957497 0.000000 16915 +iscoauthor 0 1 6.957497 0.000000 16916 +executivecommitte 0 1 6.957497 0.000000 16917 +interestgroup 0 1 6.957497 0.000000 16918 +chairmanof 0 1 6.957497 0.000000 16919 +annualsymposium 0 1 6.957497 0.000000 16920 +hasserv 0 1 6.957497 0.000000 16921 +stechnic 0 1 6.957497 0.000000 16922 +advisorysubcommitte 0 1 6.957497 0.000000 16923 +thiscommitte 0 1 6.957497 0.000000 16924 +formallog 0 1 6.957497 0.000000 16925 +dopostdoctor 0 1 6.957497 0.000000 16926 +ofcalifornia 0 1 6.957497 0.000000 16927 +avarieti 0 1 6.957497 0.000000 16928 +leather 0 1 6.957497 0.000000 16929 +motorcycl 0 1 6.957497 0.000000 16930 +jacket 0 1 6.957497 0.000000 16931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..91ed9389 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +includ 0 208 1.609438 0.000000 42 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +chang 0 82 2.484907 0.000000 163 +activ 0 84 2.484907 0.000000 182 +ieee 0 86 2.484907 0.000000 190 +interfac 0 79 2.564949 0.000000 209 +involv 0 71 2.639057 0.000000 227 +receiv 0 66 2.708050 0.000000 244 +polici 0 64 2.772589 0.000000 279 +written 0 63 2.772589 0.000000 278 +allow 0 53 2.944439 0.000000 333 +video 0 44 3.135494 0.000000 405 +continu 0 39 3.258097 0.000000 448 +survei 0 35 3.401197 0.000000 513 +john 0 33 3.433987 0.000000 532 +board 0 33 3.433987 0.000000 528 +titl 0 31 3.496508 0.000000 556 +focu 0 30 3.555348 0.000000 571 +platform 0 29 3.583519 0.000000 591 +load 0 28 3.610918 0.000000 601 +intend 0 28 3.610918 0.000000 599 +primari 0 25 3.737670 0.000000 669 +mobil 0 23 3.806662 0.000000 730 +sequenti 0 22 3.850148 0.000000 745 +runtim 0 19 4.007333 0.000000 858 +young 0 16 4.174387 0.000000 991 +audio 0 14 4.317488 0.000000 1094 +easili 0 14 4.317488 0.000000 1077 +editori 0 9 4.753590 0.000000 1611 +presidenti 0 8 4.875197 0.000000 1737 +supportfor 0 7 5.010635 0.000000 1854 +thegoal 0 6 5.164786 0.000000 2033 +zahorjan 0 3 5.857933 0.000000 3383 +formobil 0 3 5.857933 0.000000 3261 +parallelsystem 0 2 6.263398 0.000000 5746 +exhibit 0 2 6.263398 0.000000 5529 +frombrown 0 1 6.957497 0.000000 16932 +oftoronto 0 1 6.957497 0.000000 16933 +investigatoraward 0 1 6.957497 0.000000 16934 +mediaappl 0 1 6.957497 0.000000 16935 +torespond 0 1 6.957497 0.000000 16936 +parallelizationof 0 1 6.957497 0.000000 16937 +bothcontrol 0 1 6.957497 0.000000 16938 +transactionson 0 1 6.957497 0.000000 16939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..ef08d159 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +version 0 113 2.197225 0.000000 122 +teach 0 108 2.197225 0.000000 112 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +technic 0 100 2.302585 0.000000 140 +imag 1 91 2.397895 2.397895 161 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +educ 0 86 2.484907 0.000000 191 +help 0 83 2.484907 0.000000 175 +environ 0 84 2.484907 0.000000 177 +thing 0 84 2.484907 0.000000 189 +exampl 0 77 2.564949 0.000000 195 +know 0 80 2.564949 0.000000 198 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +meet 0 72 2.639057 0.000000 229 +involv 0 71 2.639057 0.000000 227 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +tuesdai 0 73 2.639057 0.000000 219 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +window 0 68 2.708050 0.000000 242 +test 0 66 2.708050 0.000000 252 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +copi 0 63 2.772589 0.000000 284 +collect 0 65 2.772589 0.000000 268 +creat 0 63 2.772589 0.000000 277 +foundat 0 62 2.772589 0.000000 286 +plai 0 60 2.833213 0.000000 307 +variou 0 56 2.890372 0.000000 317 +explor 0 58 2.890372 0.000000 324 +direct 0 57 2.890372 0.000000 316 +allow 0 53 2.944439 0.000000 333 +februari 0 54 2.944439 0.000000 328 +digit 0 52 2.995732 0.000000 348 +particular 0 51 2.995732 0.000000 352 +approach 0 48 3.044522 0.000000 366 +visual 0 48 3.044522 0.000000 372 +done 0 47 3.091042 0.000000 381 +effect 0 46 3.091042 0.000000 385 +discuss 0 45 3.135494 0.000000 399 +keep 0 44 3.135494 0.000000 409 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +littl 0 39 3.258097 0.000000 454 +open 0 38 3.295837 0.000000 469 +microsoft 0 38 3.295837 0.000000 468 +close 0 38 3.295837 0.000000 465 +seminar 0 38 3.295837 0.000000 470 +winter 0 36 3.367296 0.000000 500 +curriculum 0 33 3.433987 0.000000 535 +transform 0 32 3.465736 0.000000 542 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +hard 0 30 3.555348 0.000000 563 +common 0 30 3.555348 0.000000 574 +particip 0 29 3.583519 0.000000 589 +intend 0 28 3.610918 0.000000 599 +framework 0 28 3.610918 0.000000 606 +manipul 0 27 3.637586 0.000000 624 +rather 0 26 3.688879 0.000000 642 +experiment 0 26 3.688879 0.000000 645 +todai 0 25 3.737670 0.000000 672 +seri 0 24 3.761200 0.000000 708 +demonstr 0 24 3.761200 0.000000 694 +togeth 0 23 3.806662 0.000000 714 +director 0 22 3.850148 0.000000 767 +disk 0 22 3.850148 0.000000 747 +instal 0 22 3.850148 0.000000 754 +encourag 0 18 4.060443 0.000000 880 +record 0 18 4.060443 0.000000 890 +lisp 0 18 4.060443 0.000000 897 +steven 0 17 4.110874 0.000000 953 +macintosh 0 17 4.110874 0.000000 920 +choic 0 16 4.174387 0.000000 979 +took 0 16 4.174387 0.000000 1010 +role 0 14 4.317488 0.000000 1101 +primarili 0 13 4.382027 0.000000 1185 +forth 0 13 4.382027 0.000000 1186 +essenti 0 13 4.382027 0.000000 1137 +calcul 0 12 4.465908 0.000000 1268 +neat 0 12 4.465908 0.000000 1263 +appl 0 11 4.553877 0.000000 1303 +tanimoto 0 10 4.653960 0.000000 1429 +subset 0 10 4.653960 0.000000 1425 +thecomput 0 10 4.653960 0.000000 1408 +end 0 9 4.753590 0.000000 1567 +successfulli 0 7 5.010635 0.000000 1869 +classroom 0 6 5.164786 0.000000 2006 +pentium 0 6 5.164786 0.000000 2077 +put 0 6 5.164786 0.000000 2017 +volunt 0 5 5.347108 0.000000 2307 +own 0 5 5.347108 0.000000 2531 +pixel 0 4 5.568345 0.000000 2831 +emphas 0 4 5.568345 0.000000 2672 +exploratori 0 4 5.568345 0.000000 3073 +prospect 0 4 5.568345 0.000000 3013 +witha 0 4 5.568345 0.000000 2617 +bricker 0 4 5.568345 0.000000 3050 +metip 0 3 5.857933 0.000000 3937 +teacher 0 3 5.857933 0.000000 3892 +alsoavail 0 3 5.857933 0.000000 3887 +newapproach 0 2 6.263398 0.000000 6047 +pursuit 0 2 6.263398 0.000000 6048 +portrai 0 2 6.263398 0.000000 5386 +xform 0 1 6.957497 0.000000 16940 +theseobject 0 1 6.957497 0.000000 16941 +applicationsdesign 0 1 6.957497 0.000000 16942 +enrich 0 1 6.957497 0.000000 16943 +astandard 0 1 6.957497 0.000000 16944 +withthes 0 1 6.957497 0.000000 16945 +catalyz 0 1 6.957497 0.000000 16946 +bylead 0 1 6.957497 0.000000 16947 +theconcept 0 1 6.957497 0.000000 16948 +toexplor 0 1 6.957497 0.000000 16949 +warper 0 1 6.957497 0.000000 16950 +srun 0 1 6.957497 0.000000 16951 +mathematicsteach 0 1 6.957497 0.000000 16952 +transcriptproject 0 1 6.957497 0.000000 16953 +willfacilit 0 1 6.957497 0.000000 16954 +academicinform 0 1 6.957497 0.000000 16955 +floppi 0 1 6.957497 0.000000 16956 +themetip 0 1 6.957497 0.000000 16957 +ofmultiplay 0 1 6.957497 0.000000 16958 +tointegr 0 1 6.957497 0.000000 16959 +itsxform 0 1 6.957497 0.000000 16960 +somethingfun 0 1 6.957497 0.000000 16961 +beenput 0 1 6.957497 0.000000 16962 +fundamentalattract 0 1 6.957497 0.000000 16963 +digitalimag 0 1 6.957497 0.000000 16964 +discussteach 0 1 6.957497 0.000000 16965 +undergr 0 1 6.957497 0.000000 16966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..98f562d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +applic 0 170 1.791759 0.000000 56 +manag 0 114 2.197225 0.000000 125 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +prof 0 64 2.772589 0.000000 273 +overview 0 56 2.890372 0.000000 323 +describ 0 45 3.135494 0.000000 400 +transact 0 39 3.258097 0.000000 438 +brian 0 38 3.295837 0.000000 466 +field 0 37 3.332205 0.000000 482 +survei 0 35 3.401197 0.000000 513 +john 0 33 3.433987 0.000000 532 +graph 0 30 3.555348 0.000000 576 +challeng 0 26 3.688879 0.000000 653 +task 0 25 3.737670 0.000000 678 +fundament 0 25 3.737670 0.000000 661 +mobil 1 23 3.806662 3.806662 730 +variabl 0 23 3.806662 0.000000 715 +methodolog 0 23 3.806662 0.000000 733 +infrastructur 0 12 4.465908 0.000000 1234 +hank 0 12 4.465908 0.000000 1253 +gaetano 0 6 5.164786 0.000000 2068 +wireless 0 4 5.568345 0.000000 2693 +disconnect 0 4 5.568345 0.000000 2664 +mobisa 0 3 5.857933 0.000000 3927 +mobilecomput 0 3 5.857933 0.000000 3629 +zahorjan 0 3 5.857933 0.000000 3383 +ubiquit 0 2 6.263398 0.000000 6049 +computingresearch 0 2 6.263398 0.000000 5957 +cope 0 2 6.263398 0.000000 6050 +forman 0 2 6.263398 0.000000 5904 +washingtonher 0 1 6.957497 0.000000 16967 +handheld 0 1 6.957497 0.000000 16968 +operationdistribut 0 1 6.957497 0.000000 16969 +systemcontact 0 1 6.957497 0.000000 16970 +bershadprof 0 1 6.957497 0.000000 16971 +borriellomarc 0 1 6.957497 0.000000 16972 +fiuczynskigeorg 0 1 6.957497 0.000000 16973 +formanprof 0 1 6.957497 0.000000 16974 +levygeoff 0 1 6.957497 0.000000 16975 +voelkerterri 0 1 6.957497 0.000000 16976 +watsonprof 0 1 6.957497 0.000000 16977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..bab0d402 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +version 0 113 2.197225 0.000000 122 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +level 0 87 2.484907 0.000000 180 +member 0 84 2.484907 0.000000 165 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +messag 0 76 2.564949 0.000000 212 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +sourc 0 77 2.564949 0.000000 201 +java 0 70 2.708050 0.000000 248 +receiv 0 66 2.708050 0.000000 244 +goal 0 66 2.708050 0.000000 250 +august 0 66 2.708050 0.000000 257 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +overview 0 56 2.890372 0.000000 323 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +sampl 0 53 2.944439 0.000000 339 +extens 0 53 2.944439 0.000000 340 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +pointer 0 48 3.044522 0.000000 368 +describ 0 45 3.135494 0.000000 400 +mechan 0 43 3.178054 0.000000 416 +past 0 42 3.218876 0.000000 428 +form 0 39 3.258097 0.000000 443 +prototyp 0 38 3.295837 0.000000 463 +procedur 0 36 3.367296 0.000000 488 +either 0 35 3.401197 0.000000 506 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +releas 0 28 3.610918 0.000000 616 +intend 0 28 3.610918 0.000000 599 +static 0 27 3.637586 0.000000 619 +request 0 26 3.688879 0.000000 635 +initi 0 23 3.806662 0.000000 717 +emphasi 0 22 3.850148 0.000000 755 +finish 0 22 3.850148 0.000000 748 +flexibl 0 21 3.912023 0.000000 792 +qualiti 0 20 3.951244 0.000000 832 +entir 0 20 3.951244 0.000000 811 +predict 0 19 4.007333 0.000000 855 +hybrid 0 15 4.248495 0.000000 1057 +conduct 0 14 4.317488 0.000000 1065 +split 0 14 4.317488 0.000000 1078 +bodi 0 13 4.382027 0.000000 1178 +infrastructur 0 12 4.465908 0.000000 1234 +target 0 12 4.465908 0.000000 1282 +solari 0 12 4.465908 0.000000 1238 +modul 0 10 4.653960 0.000000 1434 +cecil 1 9 4.753590 4.753590 1547 +elimin 0 9 4.753590 0.000000 1558 +subscrib 0 9 4.753590 0.000000 1541 +modula 0 9 4.753590 0.000000 1613 +analys 0 8 4.875197 0.000000 1666 +pure 0 8 4.875197 0.000000 1776 +closur 0 8 4.875197 0.000000 1643 +parti 0 8 4.875197 0.000000 1676 +dead 0 7 5.010635 0.000000 1840 +sparc 0 7 5.010635 0.000000 1860 +freeli 0 6 5.164786 0.000000 2014 +beta 0 6 5.164786 0.000000 1993 +vortex 0 5 5.347108 0.000000 2362 +ofinterest 0 5 5.347108 0.000000 2323 +suno 0 4 5.568345 0.000000 2790 +inlin 0 4 5.568345 0.000000 2964 +tosupport 0 3 5.857933 0.000000 3613 +inherit 0 3 5.857933 0.000000 3122 +forobject 0 3 5.857933 0.000000 3965 +uwcs 0 3 5.857933 0.000000 3977 +intraprocedur 0 2 6.263398 0.000000 5934 +orientedlanguag 0 2 6.263398 0.000000 4079 +acollect 0 2 6.263398 0.000000 5039 +vortexcompil 0 2 6.263398 0.000000 5932 +projectuw 0 1 6.957497 0.000000 16978 +projectwelcom 0 1 6.957497 0.000000 16979 +rapidconstruct 0 1 6.957497 0.000000 16980 +incorporatesmulti 0 1 6.957497 0.000000 16981 +basedencapsul 0 1 6.957497 0.000000 16982 +allowsstat 0 1 6.957497 0.000000 16983 +pureobject 0 1 6.957497 0.000000 16984 +incorporateshigh 0 1 6.957497 0.000000 16985 +hierachyanalysi 0 1 6.957497 0.000000 16986 +guidedselect 0 1 6.957497 0.000000 16987 +commonsubexpress 0 1 6.957497 0.000000 16988 +currentlyavail 0 1 6.957497 0.000000 16989 +thebeta 0 1 6.957497 0.000000 16990 +projectslast 0 1 6.957497 0.000000 16991 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..37a5c643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +data 0 170 1.791759 0.000000 49 +seattl 0 120 2.079442 0.000000 103 +peopl 0 96 2.302585 0.000000 132 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +comment 0 93 2.397895 0.000000 146 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +juli 0 60 2.833213 0.000000 305 +local 0 55 2.944439 0.000000 334 +cool 0 49 3.044522 0.000000 374 +disk 0 22 3.850148 0.000000 747 +theunivers 0 21 3.912023 0.000000 797 +usag 0 6 5.164786 0.000000 2209 +these 0 5 5.347108 0.000000 2482 +grail 1 3 5.857933 5.857933 3356 +neighborhood 0 3 5.857933 0.000000 3242 +laboratorywelcom 0 2 6.263398 0.000000 5439 +mtwong 0 1 6.957497 0.000000 16992 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..328fa700 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +seattl 0 120 2.079442 0.000000 103 +report 0 131 2.079442 0.000000 92 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +build 0 85 2.484907 0.000000 184 +come 0 78 2.564949 0.000000 202 +workshop 0 71 2.639057 0.000000 239 +simul 0 66 2.708050 0.000000 255 +test 0 66 2.708050 0.000000 252 +result 0 65 2.772589 0.000000 281 +hardwar 0 51 2.995732 0.000000 350 +friend 0 48 3.044522 0.000000 376 +standard 0 48 3.044522 0.000000 365 +better 0 45 3.135494 0.000000 401 +discuss 0 45 3.135494 0.000000 399 +describ 0 45 3.135494 0.000000 400 +power 0 30 3.555348 0.000000 573 +abl 0 30 3.555348 0.000000 566 +built 0 29 3.583519 0.000000 592 +held 0 28 3.610918 0.000000 600 +mine 0 26 3.688879 0.000000 654 +sort 0 22 3.850148 0.000000 738 +rout 1 21 3.912023 3.912023 793 +chip 0 21 3.912023 0.000000 770 +nice 0 20 3.951244 0.000000 809 +repositori 0 17 4.110874 0.000000 932 +interconnect 0 17 4.110874 0.000000 937 +web 0 12 4.465908 0.000000 1249 +chao 0 8 4.875197 0.000000 1753 +router 0 8 4.875197 0.000000 1772 +dylan 0 8 4.875197 0.000000 1625 +univeristi 0 8 4.875197 0.000000 1754 +guidelin 0 7 5.010635 0.000000 1832 +chaotic 0 5 5.347108 0.000000 2566 +chaoticrout 0 4 5.568345 0.000000 3063 +micron 0 3 5.857933 0.000000 3341 +cmo 0 3 5.857933 0.000000 3992 +redesign 0 3 5.857933 0.000000 3540 +thathav 0 3 5.857933 0.000000 3735 +papersand 0 2 6.263398 0.000000 4867 +pcrcw 0 1 6.957497 0.000000 16993 +peopleal 0 1 6.957497 0.000000 16994 +allsort 0 1 6.957497 0.000000 16995 +graphicalfront 0 1 6.957497 0.000000 16996 +presentationof 0 1 6.957497 0.000000 16997 +upwith 0 1 6.957497 0.000000 16998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..66091377 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +schedul 0 119 2.079442 0.000000 85 +version 0 113 2.197225 0.000000 122 +make 0 111 2.197225 0.000000 120 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +need 0 98 2.302585 0.000000 135 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +level 0 87 2.484907 0.000000 180 +activ 0 84 2.484907 0.000000 182 +larg 0 82 2.484907 0.000000 168 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +school 0 84 2.484907 0.000000 188 +june 0 79 2.564949 0.000000 214 +complet 0 77 2.564949 0.000000 208 +issu 0 78 2.564949 0.000000 211 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +nation 0 74 2.639057 0.000000 240 +name 0 72 2.639057 0.000000 220 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +simul 0 66 2.708050 0.000000 255 +main 0 67 2.708050 0.000000 256 +evalu 0 64 2.772589 0.000000 266 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +improv 0 62 2.772589 0.000000 289 +foundat 0 62 2.772589 0.000000 286 +automat 0 61 2.833213 0.000000 306 +detail 0 57 2.890372 0.000000 321 +summer 0 56 2.890372 0.000000 311 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +maintain 0 51 2.995732 0.000000 342 +even 0 45 3.135494 0.000000 393 +autom 0 41 3.218876 0.000000 434 +map 0 39 3.258097 0.000000 452 +connect 0 37 3.332205 0.000000 485 +singl 0 34 3.401197 0.000000 510 +concurr 0 34 3.401197 0.000000 501 +compon 0 30 3.555348 0.000000 570 +robert 0 30 3.555348 0.000000 567 +becom 0 28 3.610918 0.000000 603 +american 0 27 3.637586 0.000000 634 +rather 0 26 3.688879 0.000000 642 +constraint 0 26 3.688879 0.000000 636 +enabl 0 26 3.688879 0.000000 655 +accur 0 25 3.737670 0.000000 680 +sometim 0 24 3.761200 0.000000 696 +demonstr 0 24 3.761200 0.000000 694 +input 0 23 3.806662 0.000000 727 +togeth 0 23 3.806662 0.000000 714 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +sequenti 0 22 3.850148 0.000000 745 +util 0 21 3.912023 0.000000 774 +output 0 21 3.912023 0.000000 788 +synthesi 0 20 3.951244 0.000000 834 +kernel 0 20 3.951244 0.000000 825 +wind 0 18 4.060443 0.000000 908 +behavior 0 18 4.060443 0.000000 881 +partit 0 16 4.174387 0.000000 984 +earli 0 16 4.174387 0.000000 968 +advantag 0 16 4.174387 0.000000 987 +diego 0 16 4.174387 0.000000 992 +devic 0 16 4.174387 0.000000 1002 +universityof 0 15 4.248495 0.000000 1061 +embed 0 14 4.317488 0.000000 1102 +believ 0 13 4.382027 0.000000 1187 +incorpor 0 13 4.382027 0.000000 1163 +target 0 12 4.465908 0.000000 1282 +grant 0 12 4.465908 0.000000 1216 +fill 0 11 4.553877 0.000000 1349 +cycl 0 11 4.553877 0.000000 1335 +fix 0 11 4.553877 0.000000 1327 +itali 0 11 4.553877 0.000000 1378 +fellowship 0 10 4.653960 0.000000 1460 +mountain 0 10 4.653960 0.000000 1456 +forc 0 10 4.653960 0.000000 1384 +reli 0 10 4.653960 0.000000 1411 +pacif 0 8 4.875197 0.000000 1674 +character 0 8 4.875197 0.000000 1767 +driver 0 8 4.875197 0.000000 1657 +maxim 0 7 5.010635 0.000000 1944 +chinook 1 6 5.164786 5.164786 2229 +averag 0 6 5.164786 0.000000 2098 +contract 0 6 5.164786 0.000000 1985 +blow 0 5 5.347108 0.000000 2407 +east 0 5 5.347108 0.000000 2472 +synthes 0 5 5.347108 0.000000 2451 +ross 0 5 5.347108 0.000000 2243 +ortega 0 5 5.347108 0.000000 2559 +rocki 0 4 5.568345 0.000000 3048 +chou 0 4 5.568345 0.000000 3033 +ti 0 4 5.568345 0.000000 3005 +shelf 0 4 5.568345 0.000000 2621 +harri 0 4 5.568345 0.000000 3034 +warm 0 3 5.857933 0.000000 3904 +retarget 0 3 5.857933 0.000000 3994 +domin 0 3 5.857933 0.000000 3995 +moredetail 0 3 5.857933 0.000000 3854 +shortli 0 3 5.857933 0.000000 3375 +nato 0 3 5.857933 0.000000 3587 +salmon 0 2 6.263398 0.000000 4802 +rare 0 2 6.263398 0.000000 4184 +toolfor 0 2 6.263398 0.000000 6031 +neededto 0 2 6.263398 0.000000 5379 +ratherthan 0 2 6.263398 0.000000 6046 +differentarchitectur 0 2 6.263398 0.000000 6051 +verilog 0 2 6.263398 0.000000 4441 +softwareprogram 0 2 6.263398 0.000000 4889 +moreeffici 0 2 6.263398 0.000000 4209 +macduff 0 2 6.263398 0.000000 5923 +hauck 0 2 6.263398 0.000000 5920 +shinook 0 1 6.957497 0.000000 16999 +oncorhynchu 0 1 6.957497 0.000000 17000 +tshawytscha 0 1 6.957497 0.000000 17001 +amer 0 1 6.957497 0.000000 17002 +tribe 0 1 6.957497 0.000000 17003 +southerli 0 1 6.957497 0.000000 17004 +sled 0 1 6.957497 0.000000 17005 +doga 0 1 6.957497 0.000000 17006 +cadtool 0 1 6.957497 0.000000 17007 +reactivesystem 0 1 6.957497 0.000000 17008 +descriptionto 0 1 6.957497 0.000000 17009 +designdecis 0 1 6.957497 0.000000 17010 +reiterateaft 0 1 6.957497 0.000000 17011 +willnot 0 1 6.957497 0.000000 17012 +designerto 0 1 6.957497 0.000000 17013 +legacycod 0 1 6.957497 0.000000 17014 +currentlyw 0 1 6.957497 0.000000 17015 +interprocessorcommun 0 1 6.957497 0.000000 17016 +assumesmanu 0 1 6.957497 0.000000 17017 +intricateand 0 1 6.957497 0.000000 17018 +asicarchitectur 0 1 6.957497 0.000000 17019 +onoff 0 1 6.957497 0.000000 17020 +discourag 0 1 6.957497 0.000000 17021 +innovemb 0 1 6.957497 0.000000 17022 +shownat 0 1 6.957497 0.000000 17023 +mainfeatur 0 1 6.957497 0.000000 17024 +peripheraldevic 0 1 6.957497 0.000000 17025 +andsynthes 0 1 6.957497 0.000000 17026 +hardwarenetlist 0 1 6.957497 0.000000 17027 +interfacingproblem 0 1 6.957497 0.000000 17028 +timingconstraint 0 1 6.957497 0.000000 17029 +swcodedesign 0 1 6.957497 0.000000 17030 +tremezzo 0 1 6.957497 0.000000 17031 +severalmor 0 1 6.957497 0.000000 17032 +chinookersfacultygaetano 0 1 6.957497 0.000000 17033 +borriellogradu 0 1 6.957497 0.000000 17034 +ortegaken 0 1 6.957497 0.000000 17035 +hinesian 0 1 6.957497 0.000000 17036 +selizabeth 0 1 6.957497 0.000000 17037 +walkupscott 0 1 6.957497 0.000000 17038 +henrik 0 1 6.957497 0.000000 17039 +hulgaardstafflarri 0 1 6.957497 0.000000 17040 +mcmurchielist 0 1 6.957497 0.000000 17041 +paperschinook 0 1 6.957497 0.000000 17042 +sponsorsarpa 0 1 6.957497 0.000000 17043 +walkup 0 1 6.957497 0.000000 17044 +patricia 0 1 6.957497 0.000000 17045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..33bb3ff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +develop 0 174 1.791759 0.000000 53 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +person 0 111 2.197225 0.000000 117 +need 0 98 2.302585 0.000000 135 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +contain 0 81 2.484907 0.000000 174 +appear 0 78 2.564949 0.000000 210 +logic 0 71 2.639057 0.000000 230 +symposium 0 72 2.639057 0.000000 238 +would 0 67 2.708050 0.000000 251 +evalu 0 64 2.772589 0.000000 266 +copi 0 63 2.772589 0.000000 284 +februari 0 54 2.944439 0.000000 328 +allow 0 53 2.944439 0.000000 333 +maintain 0 51 2.995732 0.000000 342 +right 0 48 3.044522 0.000000 363 +without 0 50 3.044522 0.000000 370 +featur 0 46 3.091042 0.000000 386 +offer 0 43 3.178054 0.000000 414 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +fast 0 42 3.218876 0.000000 429 +map 0 39 3.258097 0.000000 452 +author 0 39 3.258097 0.000000 450 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +copyright 0 36 3.367296 0.000000 495 +global 0 34 3.401197 0.000000 520 +given 0 32 3.465736 0.000000 538 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +specifi 0 30 3.555348 0.000000 568 +propos 0 28 3.610918 0.000000 602 +arrai 0 27 3.637586 0.000000 627 +constraint 0 26 3.688879 0.000000 636 +reliabl 0 25 3.737670 0.000000 674 +accur 0 25 3.737670 0.000000 680 +frame 0 24 3.761200 0.000000 684 +rout 0 21 3.912023 0.000000 793 +basi 0 20 3.951244 0.000000 828 +definit 0 19 4.007333 0.000000 864 +partit 0 16 4.174387 0.000000 984 +commerci 0 16 4.174387 0.000000 1005 +fourth 0 16 4.174387 0.000000 999 +driven 0 15 4.248495 0.000000 1048 +carl 0 15 4.248495 0.000000 1024 +contribut 0 15 4.248495 0.000000 1021 +larri 0 13 4.382027 0.000000 1142 +unfortun 0 13 4.382027 0.000000 1170 +block 0 13 4.382027 0.000000 1183 +fpga 0 10 4.653960 0.000000 1433 +placement 0 10 4.653960 0.000000 1420 +face 0 9 4.753590 0.000000 1501 +router 0 8 4.875197 0.000000 1772 +satisfi 0 8 4.875197 0.000000 1694 +heart 0 8 4.875197 0.000000 1729 +metric 0 7 5.010635 0.000000 1831 +gate 0 6 5.164786 0.000000 2182 +phase 0 6 5.164786 0.000000 1977 +quickli 0 6 5.164786 0.000000 2000 +ensur 0 6 5.164786 0.000000 2012 +invok 0 6 5.164786 0.000000 2079 +darren 0 5 5.347108 0.000000 2565 +variat 0 5 5.347108 0.000000 2248 +understood 0 5 5.347108 0.000000 2364 +mcmurchi 0 4 5.568345 0.000000 2757 +ebel 0 4 5.568345 0.000000 2756 +permiss 0 4 5.568345 0.000000 2642 +emerald 1 3 5.857933 5.857933 3979 +cronquist 0 3 5.857933 0.000000 3942 +moreov 0 3 5.857933 0.000000 3200 +byth 0 3 5.857933 0.000000 3874 +performanceevalu 0 2 6.263398 0.000000 6052 +thoroughli 0 2 6.263398 0.000000 4801 +parameter 0 2 6.263398 0.000000 5540 +dissemin 0 2 6.263398 0.000000 5080 +adher 0 2 6.263398 0.000000 6025 +sigda 0 2 6.263398 0.000000 5493 +pathfind 0 2 6.263398 0.000000 6053 +negoti 0 2 6.263398 0.000000 6054 +basedperform 0 2 6.263398 0.000000 6055 +projectid 0 1 6.957497 0.000000 17046 +makeus 0 1 6.957497 0.000000 17047 +quickproduct 0 1 6.957497 0.000000 17048 +isoften 0 1 6.957497 0.000000 17049 +postpon 0 1 6.957497 0.000000 17050 +beenfrozen 0 1 6.957497 0.000000 17051 +havedesign 0 1 6.957497 0.000000 17052 +quickdevelop 0 1 6.957497 0.000000 17053 +basicfeatur 0 1 6.957497 0.000000 17054 +synthesisand 0 1 6.957497 0.000000 17055 +anddetail 0 1 6.957497 0.000000 17056 +aneffici 0 1 6.957497 0.000000 17057 +blockarchitectur 0 1 6.957497 0.000000 17058 +tailorplac 0 1 6.957497 0.000000 17059 +schematicspecif 0 1 6.957497 0.000000 17060 +capturedand 0 1 6.957497 0.000000 17061 +ofscholarli 0 1 6.957497 0.000000 17062 +andal 0 1 6.957497 0.000000 17063 +therein 0 1 6.957497 0.000000 17064 +copyrighthold 0 1 6.957497 0.000000 17065 +notwithstand 0 1 6.957497 0.000000 17066 +hereelectron 0 1 6.957497 0.000000 17067 +thisinform 0 1 6.957497 0.000000 17068 +eachauthor 0 1 6.957497 0.000000 17069 +repost 0 1 6.957497 0.000000 17070 +theexplicit 0 1 6.957497 0.000000 17071 +holder 0 1 6.957497 0.000000 17072 +emeraldlarri 0 1 6.957497 0.000000 17073 +arraysaid 0 1 6.957497 0.000000 17074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..492d4114 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +sinc 0 90 2.397895 0.000000 159 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +educ 0 86 2.484907 0.000000 191 +wide 0 84 2.484907 0.000000 185 +complet 0 77 2.564949 0.000000 208 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +effici 0 73 2.639057 0.000000 233 +integr 0 67 2.708050 0.000000 245 +simul 0 66 2.708050 0.000000 255 +improv 0 62 2.772589 0.000000 289 +laboratori 0 63 2.772589 0.000000 292 +descript 0 64 2.772589 0.000000 271 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +variou 0 56 2.890372 0.000000 317 +direct 0 57 2.890372 0.000000 316 +overview 0 56 2.890372 0.000000 323 +hardwar 0 51 2.995732 0.000000 350 +digit 0 52 2.995732 0.000000 348 +adapt 0 46 3.091042 0.000000 387 +late 0 40 3.258097 0.000000 439 +map 0 39 3.258097 0.000000 452 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +multi 0 36 3.367296 0.000000 493 +survei 0 35 3.401197 0.000000 513 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +focu 0 30 3.555348 0.000000 571 +synchron 0 29 3.583519 0.000000 588 +scale 0 28 3.610918 0.000000 613 +compar 0 26 3.688879 0.000000 648 +todai 0 25 3.737670 0.000000 672 +methodolog 0 23 3.806662 0.000000 733 +varieti 0 22 3.850148 0.000000 740 +reduc 0 22 3.850148 0.000000 759 +self 0 22 3.850148 0.000000 761 +vlsi 0 21 3.912023 0.000000 795 +rout 0 21 3.912023 0.000000 793 +chip 0 21 3.912023 0.000000 770 +synthesi 0 20 3.951244 0.000000 834 +verif 0 20 3.951244 0.000000 826 +separ 0 19 4.007333 0.000000 844 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +event 0 18 4.060443 0.000000 896 +commerci 0 16 4.174387 0.000000 1005 +partit 0 16 4.174387 0.000000 984 +latenc 0 16 4.174387 0.000000 993 +embed 0 14 4.317488 0.000000 1102 +topolog 0 14 4.317488 0.000000 1089 +circuit 1 13 4.382027 4.382027 1131 +asynchron 0 12 4.465908 0.000000 1229 +tune 0 12 4.465908 0.000000 1227 +clock 0 11 4.553877 0.000000 1320 +valid 0 11 4.553877 0.000000 1299 +arpa 0 11 4.553877 0.000000 1369 +fpga 0 10 4.653960 0.000000 1433 +rapid 0 10 4.653960 0.000000 1453 +placement 0 10 4.653960 0.000000 1420 +paragraph 0 10 4.653960 0.000000 1449 +sensit 0 8 4.875197 0.000000 1726 +accomplish 0 8 4.875197 0.000000 1755 +northwest 0 7 5.010635 0.000000 1973 +densiti 0 7 5.010635 0.000000 1927 +metric 0 7 5.010635 0.000000 1831 +chinook 0 6 5.164786 0.000000 2229 +layout 0 6 5.164786 0.000000 2183 +sytem 0 4 5.568345 0.000000 3015 +triptych 0 4 5.568345 0.000000 3061 +toolset 0 4 5.568345 0.000000 3014 +chaoticrout 0 4 5.568345 0.000000 3063 +tester 0 4 5.568345 0.000000 2754 +emerald 0 3 5.857933 0.000000 3979 +systemsth 0 3 5.857933 0.000000 3835 +mactest 0 3 5.857933 0.000000 3972 +cmo 0 3 5.857933 0.000000 3992 +montag 0 2 6.263398 0.000000 5921 +retim 0 2 6.263398 0.000000 6008 +usath 0 2 6.263398 0.000000 6056 +engag 0 2 6.263398 0.000000 4937 +springbok 0 2 6.263398 0.000000 5922 +latch 0 2 6.263398 0.000000 6034 +skew 0 2 6.263398 0.000000 6057 +gemini 0 2 6.263398 0.000000 5975 +voltag 0 2 6.263398 0.000000 5475 +verificationtim 0 1 6.957497 0.000000 17075 +prototypingtriptych 0 1 6.957497 0.000000 17076 +toolscan 0 1 6.957497 0.000000 17077 +fpgaarchitectur 0 1 6.957497 0.000000 17078 +incorporatedinto 0 1 6.957497 0.000000 17079 +circuitsretim 0 1 6.957497 0.000000 17080 +uselevel 0 1 6.957497 0.000000 17081 +andincreas 0 1 6.957497 0.000000 17082 +synchronouscircuit 0 1 6.957497 0.000000 17083 +contraint 0 1 6.957497 0.000000 17084 +routerth 0 1 6.957497 0.000000 17085 +systemsself 0 1 6.957497 0.000000 17086 +kehlprevi 0 1 6.957497 0.000000 17087 +reportsarpa 0 1 6.957497 0.000000 17088 +bluebook 0 1 6.957497 0.000000 17089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..ef26b1ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +specif 0 106 2.197225 0.000000 106 +techniqu 0 99 2.302585 0.000000 138 +contain 0 81 2.484907 0.000000 174 +build 0 85 2.484907 0.000000 184 +issu 0 78 2.564949 0.000000 211 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +test 0 66 2.708050 0.000000 252 +goal 0 66 2.708050 0.000000 250 +foundat 0 62 2.772589 0.000000 286 +import 0 65 2.772589 0.000000 282 +plai 0 60 2.833213 0.000000 307 +space 0 57 2.890372 0.000000 310 +digit 0 52 2.995732 0.000000 348 +review 0 42 3.218876 0.000000 425 +must 0 40 3.258097 0.000000 442 +theoret 0 39 3.258097 0.000000 446 +prototyp 0 38 3.295837 0.000000 463 +exist 0 30 3.555348 0.000000 569 +built 0 29 3.583519 0.000000 592 +pass 0 28 3.610918 0.000000 611 +although 0 25 3.737670 0.000000 667 +methodolog 0 23 3.806662 0.000000 733 +safeti 1 20 3.951244 3.951244 817 +medic 0 17 4.110874 0.000000 958 +critic 0 16 4.174387 0.000000 982 +upon 0 16 4.174387 0.000000 978 +role 0 14 4.317488 0.000000 1101 +nanci 0 12 4.465908 0.000000 1256 +summar 0 11 4.553877 0.000000 1295 +valid 0 11 4.553877 0.000000 1299 +equip 0 10 4.653960 0.000000 1459 +leveson 0 9 4.753590 0.000000 1540 +consequ 0 6 5.164786 0.000000 1989 +nuclear 0 5 5.347108 0.000000 2576 +chemic 0 5 5.347108 0.000000 2552 +plant 0 5 5.347108 0.000000 2497 +decad 0 5 5.347108 0.000000 2455 +increasingli 0 4 5.568345 0.000000 2766 +aircraft 0 4 5.568345 0.000000 2872 +rigor 0 4 5.568345 0.000000 3030 +lai 0 3 5.857933 0.000000 3694 +safewar 0 2 6.263398 0.000000 5959 +reactor 0 1 6.957497 0.000000 17090 +defenc 0 1 6.957497 0.000000 17091 +malfunct 0 1 6.957497 0.000000 17092 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..c9e30d46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +world 0 115 2.197225 0.000000 126 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +graphic 0 90 2.397895 0.000000 147 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +internet 0 83 2.484907 0.000000 186 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +wide 0 84 2.484907 0.000000 185 +resourc 0 81 2.484907 0.000000 172 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +goal 0 66 2.708050 0.000000 250 +plan 0 65 2.772589 0.000000 272 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +back 0 60 2.833213 0.000000 297 +juli 0 60 2.833213 0.000000 305 +unix 0 58 2.890372 0.000000 308 +space 0 57 2.890372 0.000000 310 +browser 0 56 2.890372 0.000000 313 +found 0 53 2.944439 0.000000 337 +allow 0 53 2.944439 0.000000 333 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +maintain 0 51 2.995732 0.000000 342 +without 0 50 3.044522 0.000000 370 +execut 0 45 3.135494 0.000000 404 +protocol 0 45 3.135494 0.000000 407 +multipl 0 39 3.258097 0.000000 453 +field 0 37 3.332205 0.000000 482 +robot 0 36 3.367296 0.000000 497 +procedur 0 36 3.367296 0.000000 488 +multi 0 36 3.367296 0.000000 493 +articl 0 33 3.433987 0.000000 530 +human 0 32 3.465736 0.000000 546 +extend 0 32 3.465736 0.000000 539 +collabor 0 32 3.465736 0.000000 543 +taken 0 31 3.496508 0.000000 555 +rang 0 30 3.555348 0.000000 565 +option 0 30 3.555348 0.000000 575 +specifi 0 30 3.555348 0.000000 568 +domain 0 30 3.555348 0.000000 564 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +challeng 0 26 3.688879 0.000000 653 +compar 0 26 3.688879 0.000000 648 +rule 0 26 3.688879 0.000000 638 +magazin 0 24 3.761200 0.000000 704 +mike 0 24 3.761200 0.000000 703 +methodolog 0 23 3.806662 0.000000 733 +util 0 21 3.912023 0.000000 774 +alumni 0 21 3.912023 0.000000 807 +agent 0 18 4.060443 0.000000 910 +accept 0 18 4.060443 0.000000 879 +debug 0 17 4.110874 0.000000 944 +indic 0 15 4.248495 0.000000 1013 +achiev 0 14 4.317488 0.000000 1088 +easili 0 14 4.317488 0.000000 1077 +dave 0 14 4.317488 0.000000 1098 +daniel 0 12 4.465908 0.000000 1233 +tour 0 11 4.553877 0.000000 1307 +motiv 0 11 4.553877 0.000000 1346 +princip 0 10 4.653960 0.000000 1397 +metacrawl 0 10 4.653960 0.000000 1455 +weld 0 9 4.753590 0.000000 1538 +autonom 0 8 4.875197 0.000000 1749 +claim 0 8 4.875197 0.000000 1664 +accomplish 0 8 4.875197 0.000000 1755 +gather 0 8 4.875197 0.000000 1719 +softbot 1 7 5.010635 5.010635 1974 +golden 0 7 5.010635 0.000000 1962 +intellectu 0 7 5.010635 0.000000 1847 +planner 0 7 5.010635 0.000000 1797 +etzioni 0 6 5.164786 0.000000 2135 +oren 0 6 5.164786 0.000000 2134 +moder 0 6 5.164786 0.000000 2112 +brook 0 6 5.164786 0.000000 2152 +versu 0 6 5.164786 0.000000 2052 +keith 0 5 5.347108 0.000000 2528 +shell 0 5 5.347108 0.000000 2353 +cacm 0 5 5.347108 0.000000 2388 +innov 0 4 5.568345 0.000000 2933 +substrat 0 4 5.568345 0.000000 2857 +disambigu 0 4 5.568345 0.000000 2899 +repli 0 4 5.568345 0.000000 2689 +toth 0 4 5.568345 0.000000 2595 +reactiv 0 3 5.857933 0.000000 3575 +kwok 0 3 5.857933 0.000000 3941 +sujai 0 3 5.857933 0.000000 3960 +parekh 0 3 5.857933 0.000000 3961 +hacker 0 3 5.857933 0.000000 3996 +finalist 0 2 6.263398 0.000000 5890 +discoveraward 0 2 6.263398 0.000000 5891 +learningtechniqu 0 2 6.263398 0.000000 5028 +christianson 0 2 6.263398 0.000000 5849 +negoti 0 2 6.263398 0.000000 6054 +goan 0 2 6.263398 0.000000 5896 +ingram 0 2 6.263398 0.000000 5847 +perkowitz 0 2 6.263398 0.000000 5970 +softbotinternet 0 1 6.957497 0.000000 17093 +softbotth 0 1 6.957497 0.000000 17094 +softwareenviron 0 1 6.957497 0.000000 17095 +pragmaticallyconveni 0 1 6.957497 0.000000 17096 +acustomiz 0 1 6.957497 0.000000 17097 +internetaccess 0 1 6.957497 0.000000 17098 +generatesand 0 1 6.957497 0.000000 17099 +itsexperi 0 1 6.957497 0.000000 17100 +requestand 0 1 6.957497 0.000000 17101 +satisfyit 0 1 6.957497 0.000000 17102 +interactwith 0 1 6.957497 0.000000 17103 +sgraphic 0 1 6.957497 0.000000 17104 +tosearch 0 1 6.957497 0.000000 17105 +sophisticatedprun 0 1 6.957497 0.000000 17106 +cartoonrepresent 0 1 6.957497 0.000000 17107 +blanchard 0 1 6.957497 0.000000 17108 +ofcolumn 0 1 6.957497 0.000000 17109 +xiiplann 0 1 6.957497 0.000000 17110 +ilalearn 0 1 6.957497 0.000000 17111 +ying 0 1 6.957497 0.000000 17112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..e8ece73a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,214 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +code 0 108 2.197225 0.000000 116 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +control 0 82 2.484907 0.000000 164 +member 0 84 2.484907 0.000000 165 +resourc 0 81 2.484907 0.000000 172 +build 0 85 2.484907 0.000000 184 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +master 0 76 2.564949 0.000000 216 +servic 0 72 2.639057 0.000000 236 +effici 0 73 2.639057 0.000000 233 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +integr 0 67 2.708050 0.000000 245 +order 0 69 2.708050 0.000000 249 +degre 0 69 2.708050 0.000000 259 +collect 0 65 2.772589 0.000000 268 +result 0 65 2.772589 0.000000 281 +creat 0 63 2.772589 0.000000 277 +copi 0 63 2.772589 0.000000 284 +written 0 63 2.772589 0.000000 278 +function 0 62 2.772589 0.000000 275 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +unix 0 58 2.890372 0.000000 308 +direct 0 57 2.890372 0.000000 316 +space 0 57 2.890372 0.000000 310 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +overview 0 56 2.890372 0.000000 323 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +undergradu 0 54 2.944439 0.000000 338 +run 0 51 2.995732 0.000000 347 +maintain 0 51 2.995732 0.000000 342 +basic 0 50 3.044522 0.000000 360 +friend 0 48 3.044522 0.000000 376 +pointer 0 48 3.044522 0.000000 368 +adapt 0 46 3.091042 0.000000 387 +could 0 46 3.091042 0.000000 383 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +video 0 44 3.135494 0.000000 405 +anoth 0 45 3.135494 0.000000 408 +protocol 0 45 3.135494 0.000000 407 +mechan 0 43 3.178054 0.000000 416 +show 0 43 3.178054 0.000000 417 +join 0 39 3.258097 0.000000 457 +realli 0 40 3.258097 0.000000 444 +credit 0 38 3.295837 0.000000 460 +industri 0 38 3.295837 0.000000 464 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +purpos 0 37 3.332205 0.000000 481 +connect 0 37 3.332205 0.000000 485 +procedur 0 36 3.367296 0.000000 488 +winter 0 36 3.367296 0.000000 500 +extend 0 32 3.465736 0.000000 539 +fault 0 32 3.465736 0.000000 547 +posit 0 31 3.496508 0.000000 552 +synchron 0 29 3.583519 0.000000 588 +load 0 28 3.610918 0.000000 601 +pass 0 28 3.610918 0.000000 611 +manipul 0 27 3.637586 0.000000 624 +rather 0 26 3.688879 0.000000 642 +effort 0 26 3.688879 0.000000 652 +handl 0 24 3.761200 0.000000 685 +thread 0 23 3.806662 0.000000 722 +almost 0 22 3.850148 0.000000 742 +properti 0 22 3.850148 0.000000 749 +deal 0 22 3.850148 0.000000 736 +flexibl 0 21 3.912023 0.000000 792 +latest 0 21 3.912023 0.000000 785 +fund 0 21 3.912023 0.000000 805 +kernel 0 20 3.951244 0.000000 825 +longer 0 20 3.951244 0.000000 816 +safeti 0 20 3.951244 0.000000 817 +facil 0 20 3.951244 0.000000 814 +benchmark 0 19 4.007333 0.000000 859 +runtim 0 19 4.007333 0.000000 858 +bershad 0 18 4.060443 0.000000 902 +less 0 18 4.060443 0.000000 892 +statu 0 18 4.060443 0.000000 885 +encourag 0 18 4.060443 0.000000 880 +regular 0 17 4.110874 0.000000 929 +critic 0 16 4.174387 0.000000 982 +capabl 0 15 4.248495 0.000000 1016 +piec 0 15 4.248495 0.000000 1020 +overhead 0 15 4.248495 0.000000 1035 +spin 0 14 4.317488 0.000000 1121 +happi 0 14 4.317488 0.000000 1079 +decid 0 14 4.317488 0.000000 1075 +sai 0 13 4.382027 0.000000 1175 +pretti 0 13 4.382027 0.000000 1191 +wait 0 13 4.382027 0.000000 1168 +safe 0 12 4.465908 0.000000 1274 +usenix 0 12 4.465908 0.000000 1240 +alpha 0 11 4.553877 0.000000 1348 +arbitrari 0 11 4.553877 0.000000 1359 +abil 0 11 4.553877 0.000000 1341 +arpa 0 11 4.553877 0.000000 1369 +sosp 0 10 4.653960 0.000000 1416 +modula 0 9 4.753590 0.000000 1613 +inter 0 9 4.753590 0.000000 1530 +osdi 0 9 4.753590 0.000000 1534 +clear 0 9 4.753590 0.000000 1488 +isol 0 8 4.875197 0.000000 1663 +crash 0 8 4.875197 0.000000 1616 +cross 0 8 4.875197 0.000000 1703 +mach 0 8 4.875197 0.000000 1669 +core 0 7 5.010635 0.000000 1809 +prevent 0 7 5.010635 0.000000 1827 +bottom 0 7 5.010635 0.000000 1906 +quick 0 6 5.164786 0.000000 2184 +recov 0 6 5.164786 0.000000 2235 +trail 0 6 5.164786 0.000000 2071 +academia 0 6 5.164786 0.000000 2036 +bind 0 5 5.347108 0.000000 2250 +distinct 0 5 5.347108 0.000000 2319 +adopt 0 5 5.347108 0.000000 2467 +termin 0 4 5.568345 0.000000 2852 +andimplement 0 4 5.568345 0.000000 3029 +fork 0 4 5.568345 0.000000 2801 +gotten 0 4 5.568345 0.000000 2628 +stillmaintain 0 3 5.857933 0.000000 3964 +providesa 0 3 5.857933 0.000000 3884 +thesear 0 3 5.857933 0.000000 3456 +forappl 0 3 5.857933 0.000000 3929 +linker 0 3 5.857933 0.000000 3157 +namespac 0 3 5.857933 0.000000 3957 +arrow 0 3 5.857933 0.000000 3520 +microsecond 0 2 6.263398 0.000000 5435 +shortcom 0 2 6.263398 0.000000 5978 +wella 0 2 6.263398 0.000000 4289 +linkabl 0 2 6.263398 0.000000 5979 +barb 0 2 6.263398 0.000000 6058 +qualif 0 2 6.263398 0.000000 6059 +mascot 0 2 6.263398 0.000000 6060 +systemspin 0 1 6.957497 0.000000 17113 +thatsupport 0 1 6.957497 0.000000 17114 +atruntim 0 1 6.957497 0.000000 17115 +accesshardwar 0 1 6.957497 0.000000 17116 +nooverhead 0 1 6.957497 0.000000 17117 +byrefer 0 1 6.957497 0.000000 17118 +systemservic 0 1 6.957497 0.000000 17119 +allextens 0 1 6.957497 0.000000 17120 +typesaf 0 1 6.957497 0.000000 17121 +oftypesafeti 0 1 6.957497 0.000000 17122 +attemptingto 0 1 6.957497 0.000000 17123 +writeboth 0 1 6.957497 0.000000 17124 +machinerun 0 1 6.957497 0.000000 17125 +withlow 0 1 6.957497 0.000000 17126 +executeit 0 1 6.957497 0.000000 17127 +protectedprocedur 0 1 6.957497 0.000000 17128 +overethernet 0 1 6.957497 0.000000 17129 +oldadapt 0 1 6.957497 0.000000 17130 +operationsund 0 1 6.957497 0.000000 17131 +samehardwar 0 1 6.957497 0.000000 17132 +saveyourself 0 1 6.957497 0.000000 17133 +invoc 0 1 6.957497 0.000000 17134 +andsimpl 0 1 6.957497 0.000000 17135 +interposit 0 1 6.957497 0.000000 17136 +raship 0 1 6.957497 0.000000 17137 +ourmascot 0 1 6.957497 0.000000 17138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..740de38c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +relat 0 139 1.945910 0.000000 68 +perform 0 143 1.945910 0.000000 74 +compil 1 122 2.079442 2.079442 96 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +start 0 83 2.484907 0.000000 173 +build 0 85 2.484907 0.000000 184 +second 0 81 2.484907 0.000000 166 +dynam 0 76 2.564949 0.000000 194 +optim 0 79 2.564949 0.000000 197 +exampl 0 77 2.564949 0.000000 195 +effici 0 73 2.639057 0.000000 233 +appli 0 71 2.639057 0.000000 226 +august 0 66 2.708050 0.000000 257 +copi 0 63 2.772589 0.000000 284 +automat 0 61 2.833213 0.000000 306 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +detail 0 57 2.890372 0.000000 321 +approach 0 48 3.044522 0.000000 366 +execut 0 45 3.135494 0.000000 404 +describ 0 45 3.135494 0.000000 400 +howev 0 41 3.218876 0.000000 422 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +purpos 0 37 3.332205 0.000000 481 +soon 0 36 3.367296 0.000000 494 +produc 0 30 3.555348 0.000000 572 +rang 0 30 3.555348 0.000000 565 +releas 0 28 3.610918 0.000000 616 +static 0 27 3.637586 0.000000 619 +enabl 0 26 3.688879 0.000000 655 +bound 0 26 3.688879 0.000000 659 +valu 0 25 3.737670 0.000000 665 +interpret 0 24 3.761200 0.000000 686 +variabl 0 23 3.806662 0.000000 715 +initi 0 23 3.806662 0.000000 717 +identifi 0 22 3.850148 0.000000 760 +annot 0 21 3.912023 0.000000 775 +kernel 0 20 3.951244 0.000000 825 +region 0 19 4.007333 0.000000 875 +spin 0 14 4.317488 0.000000 1121 +remov 0 12 4.465908 0.000000 1225 +target 0 12 4.465908 0.000000 1282 +grant 0 12 4.465908 0.000000 1216 +branch 0 11 4.553877 0.000000 1318 +loop 0 11 4.553877 0.000000 1310 +elimin 0 9 4.753590 0.000000 1558 +pair 0 9 4.753590 0.000000 1503 +analys 0 8 4.875197 0.000000 1666 +pldi 0 8 4.875197 0.000000 1704 +dispatch 0 7 5.010635 0.000000 1791 +constant 0 5 5.347108 0.000000 2251 +templat 0 5 5.347108 0.000000 2311 +dataflow 0 5 5.347108 0.000000 2390 +willb 0 5 5.347108 0.000000 2277 +spinproject 0 5 5.347108 0.000000 2570 +fold 0 4 5.568345 0.000000 2615 +fulli 0 4 5.568345 0.000000 2986 +theprogram 0 4 5.568345 0.000000 2686 +patch 0 4 5.568345 0.000000 2710 +imper 0 4 5.568345 0.000000 3067 +eventu 0 4 5.568345 0.000000 3074 +wewil 0 4 5.568345 0.000000 2688 +projectth 0 3 5.857933 0.000000 3344 +propag 0 3 5.857933 0.000000 3997 +dynamiccompil 0 3 5.857933 0.000000 3926 +optimizingcompil 0 2 6.263398 0.000000 4456 +projectmor 0 1 6.957497 0.000000 17139 +projectsuw 0 1 6.957497 0.000000 17140 +webdynam 0 1 6.957497 0.000000 17141 +ofinvari 0 1 6.957497 0.000000 17142 +theserun 0 1 6.957497 0.000000 17143 +memoryload 0 1 6.957497 0.000000 17144 +theydetermin 0 1 6.957497 0.000000 17145 +unrol 0 1 6.957497 0.000000 17146 +performancebenefit 0 1 6.957497 0.000000 17147 +offsetbi 0 1 6.957497 0.000000 17148 +strive 0 1 6.957497 0.000000 17149 +qualitydynam 0 1 6.957497 0.000000 17150 +thetempl 0 1 6.957497 0.000000 17151 +initialexperi 0 1 6.957497 0.000000 17152 +producedspeedup 0 1 6.957497 0.000000 17153 +dynamicallycompil 0 1 6.957497 0.000000 17154 +spinev 0 1 6.957497 0.000000 17155 +otherposs 0 1 6.957497 0.000000 17156 +invirtu 0 1 6.957497 0.000000 17157 +systemi 0 1 6.957497 0.000000 17158 +arenow 0 1 6.957497 0.000000 17159 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..f1d85b6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +info 0 85 2.484907 0.000000 176 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +member 0 84 2.484907 0.000000 165 +help 0 83 2.484907 0.000000 175 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +would 0 67 2.708050 0.000000 251 +written 0 63 2.772589 0.000000 278 +descript 0 64 2.772589 0.000000 271 +direct 0 57 2.890372 0.000000 316 +overview 0 56 2.890372 0.000000 323 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +detail 0 57 2.890372 0.000000 321 +scientif 0 53 2.944439 0.000000 341 +sampl 0 53 2.944439 0.000000 339 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +fast 0 42 3.218876 0.000000 429 +futur 0 41 3.218876 0.000000 427 +error 0 40 3.258097 0.000000 449 +small 0 39 3.258097 0.000000 447 +programm 0 39 3.258097 0.000000 445 +manual 0 35 3.401197 0.000000 504 +concept 0 32 3.465736 0.000000 537 +independ 0 32 3.465736 0.000000 548 +scientist 0 31 3.496508 0.000000 560 +autumn 0 31 3.496508 0.000000 558 +arrai 0 27 3.637586 0.000000 627 +higher 0 24 3.761200 0.000000 690 +flow 0 24 3.761200 0.000000 700 +sequenti 0 22 3.850148 0.000000 745 +minut 0 20 3.951244 0.000000 810 +region 0 19 4.007333 0.000000 875 +previous 0 17 4.110874 0.000000 923 +modif 0 17 4.110874 0.000000 913 +fortran 0 15 4.248495 0.000000 1027 +easili 0 14 4.317488 0.000000 1077 +necessari 0 13 4.382027 0.000000 1147 +walk 0 12 4.465908 0.000000 1281 +loop 0 11 4.553877 0.000000 1310 +typic 0 11 4.553877 0.000000 1360 +suitabl 0 9 4.753590 0.000000 1486 +elimin 0 9 4.753590 0.000000 1558 +ideal 0 8 4.875197 0.000000 1630 +understood 0 5 5.347108 0.000000 2364 +enrol 0 4 5.568345 0.000000 2613 +tediou 0 3 5.857933 0.000000 3731 +shorter 0 3 5.857933 0.000000 3998 +conclus 0 3 5.857933 0.000000 3367 +horizon 0 3 5.857933 0.000000 3746 +border 0 2 6.263398 0.000000 4980 +prone 0 2 6.263398 0.000000 5178 +shouldconsid 0 2 6.263398 0.000000 6061 +acknowledg 0 2 6.263398 0.000000 6062 +eduzpl 0 1 6.957497 0.000000 17160 +recompil 0 1 6.957497 0.000000 17161 +shatter 0 1 6.957497 0.000000 17162 +yourmachin 0 1 6.957497 0.000000 17163 +zpthi 0 1 6.957497 0.000000 17164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..04efb654 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +architectur 0 139 1.945910 0.000000 77 +problem 0 147 1.945910 0.000000 75 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +intern 0 108 2.197225 0.000000 128 +check 0 115 2.197225 0.000000 118 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +proceed 0 93 2.397895 0.000000 152 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +issu 0 78 2.564949 0.000000 211 +dynam 0 76 2.564949 0.000000 194 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +differ 0 66 2.708050 0.000000 253 +abstract 0 62 2.772589 0.000000 276 +function 0 62 2.772589 0.000000 275 +januari 0 62 2.772589 0.000000 264 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +maintain 0 51 2.995732 0.000000 342 +hardwar 0 51 2.995732 0.000000 350 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +still 0 50 3.044522 0.000000 362 +effect 0 46 3.091042 0.000000 385 +execut 0 45 3.135494 0.000000 404 +long 0 43 3.178054 0.000000 413 +combin 0 42 3.218876 0.000000 421 +futur 0 41 3.218876 0.000000 427 +fast 0 42 3.218876 0.000000 429 +multipl 0 39 3.258097 0.000000 453 +annual 0 40 3.258097 0.000000 458 +submit 0 39 3.258097 0.000000 440 +singl 0 34 3.401197 0.000000 510 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +collabor 0 32 3.465736 0.000000 543 +limit 0 29 3.583519 0.000000 585 +though 0 27 3.637586 0.000000 622 +enabl 0 26 3.688879 0.000000 655 +todai 0 25 3.737670 0.000000 672 +thread 0 23 3.806662 0.000000 722 +util 0 21 3.912023 0.000000 774 +unit 0 21 3.912023 0.000000 779 +corpor 0 21 3.912023 0.000000 802 +chip 0 21 3.912023 0.000000 770 +increas 0 20 3.951244 0.000000 829 +exploit 0 20 3.951244 0.000000 836 +speed 0 18 4.060443 0.000000 911 +minim 0 18 4.060443 0.000000 887 +ultim 0 17 4.110874 0.000000 943 +stanford 0 17 4.110874 0.000000 955 +latenc 0 16 4.174387 0.000000 993 +modern 0 16 4.174387 0.000000 966 +permit 0 16 4.174387 0.000000 962 +choic 0 16 4.174387 0.000000 979 +susan 0 15 4.248495 0.000000 1050 +levi 0 14 4.317488 0.000000 1093 +shown 0 14 4.317488 0.000000 1080 +conduct 0 14 4.317488 0.000000 1065 +dean 0 14 4.317488 0.000000 1104 +convert 0 13 4.382027 0.000000 1122 +amount 0 12 4.465908 0.000000 1208 +hank 0 12 4.465908 0.000000 1253 +philadelphia 0 12 4.465908 0.000000 1244 +multithread 1 11 4.553877 4.553877 1315 +cycl 0 11 4.553877 0.000000 1335 +itali 0 11 4.553877 0.000000 1378 +equip 0 10 4.653960 0.000000 1459 +santa 0 10 4.653960 0.000000 1441 +face 0 9 4.753590 0.000000 1501 +significantli 0 9 4.753590 0.000000 1508 +egger 0 8 4.875197 0.000000 1695 +jack 0 8 4.875197 0.000000 1780 +gain 0 8 4.875197 0.000000 1730 +joel 0 8 4.875197 0.000000 1698 +microprocessor 0 7 5.010635 0.000000 1808 +maxim 0 7 5.010635 0.000000 1944 +simultan 0 6 5.164786 0.000000 2155 +tullsen 0 6 5.164786 0.000000 2081 +superscalar 0 6 5.164786 0.000000 2082 +multiprogram 0 6 5.164786 0.000000 2010 +rebecca 0 6 5.164786 0.000000 2174 +crucial 0 5 5.347108 0.000000 2384 +compet 0 5 5.347108 0.000000 2462 +fetch 0 5 5.347108 0.000000 2567 +hide 0 4 5.568345 0.000000 2996 +throughput 0 4 5.568345 0.000000 2993 +emer 0 3 5.857933 0.000000 3969 +stamm 0 3 5.857933 0.000000 3970 +allevi 0 3 5.857933 0.000000 3643 +interchang 0 3 5.857933 0.000000 3893 +peoplefaculti 0 3 5.857933 0.000000 3981 +affair 0 3 5.857933 0.000000 3916 +andd 0 2 6.263398 0.000000 4346 +suif 0 2 6.263398 0.000000 5944 +lojlo 0 2 6.263398 0.000000 5943 +pagesimultan 0 1 6.957497 0.000000 17165 +projectoverviewpeoplepubl 0 1 6.957497 0.000000 17166 +overviewth 0 1 6.957497 0.000000 17167 +interleav 0 1 6.957497 0.000000 17168 +differentthread 0 1 6.957497 0.000000 17169 +issuefeatur 0 1 6.957497 0.000000 17170 +abilityof 0 1 6.957497 0.000000 17171 +contextsar 0 1 6.957497 0.000000 17172 +exploitthread 0 1 6.957497 0.000000 17173 +formsof 0 1 6.957497 0.000000 17174 +havedemonstr 0 1 6.957497 0.000000 17175 +improvesprocessor 0 1 6.957497 0.000000 17176 +parallelworkload 0 1 6.957497 0.000000 17177 +achievedin 0 1 6.957497 0.000000 17178 +ordersuperscalar 0 1 6.957497 0.000000 17179 +synchronizationtechniqu 0 1 6.957497 0.000000 17180 +otherarchitectur 0 1 6.957497 0.000000 17181 +levygradu 0 1 6.957497 0.000000 17182 +tullsenindustri 0 1 6.957497 0.000000 17183 +andh 0 1 6.957497 0.000000 17184 +margherita 0 1 6.957497 0.000000 17185 +ligur 0 1 6.957497 0.000000 17186 +doon 0 1 6.957497 0.000000 17187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..47431b33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +develop 0 174 1.791759 0.000000 53 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +report 0 131 2.079442 0.000000 92 +dayton 0 119 2.079442 0.000000 104 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +question 0 91 2.397895 0.000000 141 +member 0 84 2.484907 0.000000 165 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +faculti 0 56 2.890372 0.000000 325 +three 0 54 2.944439 0.000000 330 +undergradu 0 54 2.944439 0.000000 338 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +frequent 0 49 3.044522 0.000000 367 +answer 0 45 3.135494 0.000000 391 +offer 0 43 3.178054 0.000000 414 +futur 0 41 3.218876 0.000000 427 +form 0 39 3.258097 0.000000 443 +annual 0 40 3.258097 0.000000 458 +streetmadison 0 38 3.295837 0.000000 474 +award 0 34 3.401197 0.000000 523 +statist 0 35 3.401197 0.000000 521 +dissert 0 32 3.465736 0.000000 549 +scientist 0 31 3.496508 0.000000 560 +ask 0 28 3.610918 0.000000 597 +consist 0 26 3.688879 0.000000 651 +doctor 0 24 3.761200 0.000000 709 +departmentunivers 0 24 3.761200 0.000000 711 +alumni 0 21 3.912023 0.000000 807 +util 0 21 3.912023 0.000000 774 +voic 0 21 3.912023 0.000000 806 +excel 0 19 4.007333 0.000000 868 +young 0 16 4.174387 0.000000 991 +women 0 16 4.174387 0.000000 1004 +countri 0 15 4.248495 0.000000 1059 +rank 0 14 4.317488 0.000000 1063 +packard 0 10 4.653960 0.000000 1444 +fellowship 0 10 4.653960 0.000000 1460 +presidenti 0 8 4.875197 0.000000 1737 +pagecomput 0 7 5.010635 0.000000 1900 +timet 0 3 5.857933 0.000000 3471 +guidebook 0 2 6.263398 0.000000 4643 +departmentabout 0 1 6.957497 0.000000 17188 +departmentour 0 1 6.957497 0.000000 17189 +fourteen 0 1 6.957497 0.000000 17190 +incent 0 1 6.957497 0.000000 17191 +colophon 0 1 6.957497 0.000000 17192 +infocomput 0 1 6.957497 0.000000 17193 +madisona 0 1 6.957497 0.000000 17194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..5aee86d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +provid 0 121 2.079442 0.000000 94 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +imag 0 91 2.397895 0.000000 161 +select 0 91 2.397895 0.000000 154 +control 0 82 2.484907 0.000000 164 +activ 0 84 2.484907 0.000000 182 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +order 0 69 2.708050 0.000000 249 +view 0 70 2.708050 0.000000 254 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +maintain 0 51 2.995732 0.000000 342 +approach 0 48 3.044522 0.000000 366 +show 0 43 3.178054 0.000000 417 +combin 0 42 3.218876 0.000000 421 +correct 0 38 3.295837 0.000000 462 +slide 0 38 3.295837 0.000000 467 +purpos 0 37 3.332205 0.000000 481 +connect 0 37 3.332205 0.000000 485 +global 0 34 3.401197 0.000000 520 +either 0 35 3.401197 0.000000 506 +posit 0 31 3.496508 0.000000 552 +exist 0 30 3.555348 0.000000 569 +consid 0 29 3.583519 0.000000 590 +focus 0 29 3.583519 0.000000 584 +task 1 25 3.737670 3.737670 678 +strategi 0 25 3.737670 0.000000 682 +motion 0 24 3.761200 0.000000 699 +other 0 24 3.761200 0.000000 697 +reach 0 24 3.761200 0.000000 688 +frame 0 24 3.761200 0.000000 684 +decis 0 23 3.806662 0.000000 728 +mobil 0 23 3.806662 0.000000 730 +lead 0 23 3.806662 0.000000 718 +defin 0 22 3.850148 0.000000 746 +navig 0 21 3.912023 0.000000 796 +avoid 0 21 3.912023 0.000000 799 +region 0 19 4.007333 0.000000 875 +geometr 0 19 4.007333 0.000000 852 +behavior 0 18 4.060443 0.000000 881 +minim 0 18 4.060443 0.000000 887 +attempt 0 17 4.110874 0.000000 917 +scene 0 14 4.317488 0.000000 1114 +achiev 0 14 4.317488 0.000000 1088 +chuck 0 14 4.317488 0.000000 1108 +consider 0 14 4.317488 0.000000 1076 +deriv 0 13 4.382027 0.000000 1145 +emploi 0 12 4.465908 0.000000 1284 +shape 0 12 4.465908 0.000000 1245 +abil 0 11 4.553877 0.000000 1341 +arbitrari 0 11 4.553877 0.000000 1359 +princip 0 10 4.653960 0.000000 1397 +relationship 0 10 4.653960 0.000000 1383 +observ 0 9 4.753590 0.000000 1578 +surfac 0 9 4.753590 0.000000 1574 +recoveri 0 9 4.753590 0.000000 1474 +dyer 0 9 4.753590 0.000000 1573 +formul 0 8 4.875197 0.000000 1733 +maxim 0 7 5.010635 0.000000 1944 +smooth 0 7 5.010635 0.000000 1855 +viewpoint 0 6 5.164786 0.000000 2116 +reconstruct 0 6 5.164786 0.000000 2170 +recov 0 6 5.164786 0.000000 2235 +provabl 0 5 5.347108 0.000000 2558 +align 0 4 5.568345 0.000000 2863 +visibl 0 4 5.568345 0.000000 2994 +simplifi 0 4 5.568345 0.000000 3066 +kyro 0 2 6.263398 0.000000 6063 +kutulako 0 2 6.263398 0.000000 6064 +descriptionof 0 2 6.263398 0.000000 5513 +thequalit 0 2 6.263398 0.000000 5622 +smoothli 0 1 6.957497 0.000000 17195 +simpleobserv 0 1 6.957497 0.000000 17196 +propertieseasi 0 1 6.957497 0.000000 17197 +fixat 0 1 6.957497 0.000000 17198 +toperform 0 1 6.957497 0.000000 17199 +obstacl 0 1 6.957497 0.000000 17200 +ourwork 0 1 6.957497 0.000000 17201 +pointof 0 1 6.957497 0.000000 17202 +makesimpl 0 1 6.957497 0.000000 17203 +geometryof 0 1 6.957497 0.000000 17204 +thesurfac 0 1 6.957497 0.000000 17205 +generalobserv 0 1 6.957497 0.000000 17206 +objectthan 0 1 6.957497 0.000000 17207 +beexploit 0 1 6.957497 0.000000 17208 +anddeterminist 0 1 6.957497 0.000000 17209 +localshap 0 1 6.957497 0.000000 17210 +qualitativestrategi 0 1 6.957497 0.000000 17211 +viewingdirect 0 1 6.957497 0.000000 17212 +selectedpoint 0 1 6.957497 0.000000 17213 +observationso 0 1 6.957497 0.000000 17214 +observationand 0 1 6.957497 0.000000 17215 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..c87e4a72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +studi 0 120 2.079442 0.000000 91 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +imag 0 91 2.397895 0.000000 161 +contain 0 81 2.484907 0.000000 174 +activ 0 84 2.484907 0.000000 182 +level 0 87 2.484907 0.000000 180 +optim 0 79 2.564949 0.000000 197 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +special 0 56 2.890372 0.000000 320 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +case 0 51 2.995732 0.000000 351 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +visual 0 48 3.044522 0.000000 372 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +field 0 37 3.332205 0.000000 482 +random 0 34 3.401197 0.000000 511 +global 0 34 3.401197 0.000000 520 +transform 0 32 3.465736 0.000000 542 +consid 0 29 3.583519 0.000000 590 +turn 0 29 3.583519 0.000000 586 +framework 0 28 3.610918 0.000000 606 +determin 0 27 3.637586 0.000000 630 +detect 0 26 3.688879 0.000000 646 +experiment 0 26 3.688879 0.000000 645 +valu 0 25 3.737670 0.000000 665 +task 0 25 3.737670 0.000000 678 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +region 0 19 4.007333 0.000000 875 +along 0 18 4.060443 0.000000 878 +minim 0 18 4.060443 0.000000 887 +lower 0 18 4.060443 0.000000 886 +regular 0 17 4.110874 0.000000 929 +estim 0 17 4.110874 0.000000 930 +conduct 0 14 4.317488 0.000000 1065 +deriv 0 13 4.382027 0.000000 1145 +directli 0 13 4.382027 0.000000 1141 +arbitrari 0 11 4.553877 0.000000 1359 +valid 0 11 4.553877 0.000000 1299 +classif 0 9 4.753590 0.000000 1586 +classifi 0 9 4.753590 0.000000 1537 +equival 0 9 4.753590 0.000000 1496 +extract 0 8 4.875197 0.000000 1728 +formul 0 8 4.875197 0.000000 1733 +invari 0 8 4.875197 0.000000 1748 +furthermor 0 6 5.164786 0.000000 2141 +snake 0 5 5.347108 0.000000 2281 +yield 0 5 5.347108 0.000000 2458 +chin 0 5 5.347108 0.000000 2408 +stabl 0 5 5.347108 0.000000 2309 +markov 0 5 5.347108 0.000000 2280 +contour 1 4 5.568345 5.568345 2812 +subsequ 0 4 5.568345 0.000000 2665 +bayesian 0 4 5.568345 0.000000 2671 +rigor 0 4 5.568345 0.000000 3030 +energi 0 3 5.857933 0.000000 3950 +implicitli 0 3 5.857933 0.000000 3620 +hough 0 3 5.857933 0.000000 3527 +influenc 0 3 5.857933 0.000000 3349 +deform 0 2 6.263398 0.000000 6065 +criterion 0 2 6.263398 0.000000 5885 +pearson 0 2 6.263398 0.000000 5245 +summat 0 2 6.263398 0.000000 5325 +peak 0 2 6.263398 0.000000 5553 +confirm 0 2 6.263398 0.000000 4101 +noisi 0 1 6.957497 0.000000 17216 +fung 0 1 6.957497 0.000000 17217 +roland 0 1 6.957497 0.000000 17218 +ofact 0 1 6.957497 0.000000 17219 +minimax 0 1 6.957497 0.000000 17220 +wherebi 0 1 6.957497 0.000000 17221 +anduniqu 0 1 6.957497 0.000000 17222 +priordistribut 0 1 6.957497 0.000000 17223 +exert 0 1 6.957497 0.000000 17224 +posterior 0 1 6.957497 0.000000 17225 +withpattern 0 1 6.957497 0.000000 17226 +nearman 0 1 6.957497 0.000000 17227 +lemma 0 1 6.957497 0.000000 17228 +classificationtest 0 1 6.957497 0.000000 17229 +margin 0 1 6.957497 0.000000 17230 +gsnake 0 1 6.957497 0.000000 17231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..160bae53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +data 1 170 1.791759 1.791759 49 +base 0 165 1.791759 0.000000 50 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +construct 0 139 1.945910 0.000000 82 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +specif 0 106 2.197225 0.000000 106 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +follow 0 92 2.397895 0.000000 143 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +contain 0 81 2.484907 0.000000 174 +control 0 82 2.484907 0.000000 164 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +complet 0 77 2.564949 0.000000 208 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +order 0 69 2.708050 0.000000 249 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +abstract 0 62 2.772589 0.000000 276 +guid 0 63 2.772589 0.000000 267 +experi 0 64 2.772589 0.000000 283 +creat 0 63 2.772589 0.000000 277 +complex 0 64 2.772589 0.000000 269 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +scientif 0 53 2.944439 0.000000 341 +allow 0 53 2.944439 0.000000 333 +particular 0 51 2.995732 0.000000 352 +visual 0 48 3.044522 0.000000 372 +principl 0 48 3.044522 0.000000 357 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +show 0 43 3.178054 0.000000 417 +howev 0 41 3.218876 0.000000 422 +map 0 39 3.258097 0.000000 452 +brian 0 38 3.295837 0.000000 466 +paul 0 38 3.295837 0.000000 471 +prototyp 0 38 3.295837 0.000000 463 +close 0 38 3.295837 0.000000 465 +purpos 0 37 3.332205 0.000000 481 +tree 0 36 3.367296 0.000000 492 +approxim 0 35 3.401197 0.000000 509 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +express 0 32 3.465736 0.000000 540 +scientist 0 31 3.496508 0.000000 560 +anim 0 31 3.496508 0.000000 557 +specifi 0 30 3.555348 0.000000 568 +domain 0 30 3.555348 0.000000 564 +graph 0 30 3.555348 0.000000 576 +built 0 29 3.583519 0.000000 592 +arrai 0 27 3.637586 0.000000 627 +quit 0 27 3.637586 0.000000 633 +repres 0 26 3.688879 0.000000 656 +altern 0 26 3.688879 0.000000 641 +fundament 0 25 3.737670 0.000000 661 +frame 0 24 3.761200 0.000000 684 +interpret 0 24 3.761200 0.000000 686 +seri 0 24 3.761200 0.000000 708 +flow 0 24 3.761200 0.000000 700 +displai 0 23 3.806662 0.000000 712 +variabl 0 23 3.806662 0.000000 715 +sequenc 0 23 3.806662 0.000000 734 +size 0 23 3.806662 0.000000 713 +defin 0 22 3.850148 0.000000 746 +color 0 22 3.850148 0.000000 762 +thu 0 21 3.912023 0.000000 773 +fact 0 21 3.912023 0.000000 780 +assum 0 19 4.007333 0.000000 845 +appropri 0 18 4.060443 0.000000 883 +along 0 18 4.060443 0.000000 878 +render 0 17 4.110874 0.000000 947 +condit 0 16 4.174387 0.000000 975 +upon 0 16 4.174387 0.000000 978 +alreadi 0 16 4.174387 0.000000 963 +precis 0 15 4.248495 0.000000 1023 +finit 0 14 4.317488 0.000000 1106 +chuck 0 14 4.317488 0.000000 1108 +context 0 13 4.382027 0.000000 1153 +recurs 0 13 4.382027 0.000000 1127 +amount 0 12 4.465908 0.000000 1208 +primit 0 11 4.553877 0.000000 1317 +bill 0 11 4.553877 0.000000 1297 +sens 0 11 4.553877 0.000000 1305 +volum 0 11 4.553877 0.000000 1347 +relationship 0 10 4.653960 0.000000 1383 +dyer 0 9 4.753590 0.000000 1573 +assumpt 0 9 4.753590 0.000000 1514 +ideal 0 8 4.875197 0.000000 1630 +satisfi 0 8 4.875197 0.000000 1694 +therefor 0 7 5.010635 0.000000 1822 +fromth 0 7 5.010635 0.000000 1802 +pipelin 0 7 5.010635 0.000000 1830 +analyt 0 7 5.010635 0.000000 1913 +consequ 0 6 5.164786 0.000000 1989 +tupl 0 5 5.347108 0.000000 2244 +steer 0 5 5.347108 0.000000 2328 +infinit 0 4 5.568345 0.000000 2596 +pixel 0 4 5.568345 0.000000 2831 +wherea 0 4 5.568345 0.000000 2597 +encod 0 4 5.568345 0.000000 2929 +rigor 0 4 5.568345 0.000000 3030 +fora 0 4 5.568345 0.000000 2697 +lattic 0 3 5.857933 0.000000 3721 +interfacefor 0 3 5.857933 0.000000 3534 +scalar 0 2 6.263398 0.000000 4815 +temperatur 0 2 6.263398 0.000000 5985 +ofdata 0 2 6.263398 0.000000 6038 +hibbard 0 2 6.263398 0.000000 6066 +theidea 0 2 6.263398 0.000000 5428 +themathemat 0 2 6.263398 0.000000 4421 +isomorph 0 2 6.263398 0.000000 5976 +scientificdata 0 2 6.263398 0.000000 6067 +radianc 0 2 6.263398 0.000000 6068 +ofcours 0 2 6.263398 0.000000 4064 +axi 0 2 6.263398 0.000000 6069 +remark 0 2 6.263398 0.000000 4124 +wedo 0 2 6.263398 0.000000 5772 +datatyp 0 2 6.263398 0.000000 4129 +expressivenesscondit 0 1 6.957497 0.000000 17232 +voxel 0 1 6.957497 0.000000 17233 +calleda 0 1 6.957497 0.000000 17234 +adha 0 1 6.957497 0.000000 17235 +objectsrepres 0 1 6.957497 0.000000 17236 +objectsfrequ 0 1 6.957497 0.000000 17237 +functionswith 0 1 6.957497 0.000000 17238 +containfinit 0 1 6.957497 0.000000 17239 +chosenfrom 0 1 6.957497 0.000000 17240 +palett 0 1 6.957497 0.000000 17241 +numbersof 0 1 6.957497 0.000000 17242 +computationalmodel 0 1 6.957497 0.000000 17243 +informationcont 0 1 6.957497 0.000000 17244 +thatdisplai 0 1 6.957497 0.000000 17245 +onlythos 0 1 6.957497 0.000000 17246 +itimpl 0 1 6.957497 0.000000 17247 +satisfyingth 0 1 6.957497 0.000000 17248 +expressivenss 0 1 6.957497 0.000000 17249 +onhow 0 1 6.957497 0.000000 17250 +wecan 0 1 6.957497 0.000000 17251 +howprecis 0 1 6.957497 0.000000 17252 +voxelresolut 0 1 6.957497 0.000000 17253 +visualizationprocess 0 1 6.957497 0.000000 17254 +objectsto 0 1 6.957497 0.000000 17255 +theexpress 0 1 6.957497 0.000000 17256 +primitivevari 0 1 6.957497 0.000000 17257 +latitud 0 1 6.957497 0.000000 17258 +constructor 0 1 6.957497 0.000000 17259 +appropriatefor 0 1 6.957497 0.000000 17260 +containsth 0 1 6.957497 0.000000 17261 +canalso 0 1 6.957497 0.000000 17262 +displayi 0 1 6.957497 0.000000 17263 +graphicsprimit 0 1 6.957497 0.000000 17264 +locationand 0 1 6.957497 0.000000 17265 +animationsequ 0 1 6.957497 0.000000 17266 +thedisplai 0 1 6.957497 0.000000 17267 +isnatur 0 1 6.957497 0.000000 17268 +andtemperatur 0 1 6.957497 0.000000 17269 +calledvi 0 1 6.957497 0.000000 17270 +adthat 0 1 6.957497 0.000000 17271 +theircomput 0 1 6.957497 0.000000 17272 +theirprogram 0 1 6.957497 0.000000 17273 +thevi 0 1 6.957497 0.000000 17274 +vvof 0 1 6.957497 0.000000 17275 +thatsatisfi 0 1 6.957497 0.000000 17276 +implementationi 0 1 6.957497 0.000000 17277 +auser 0 1 6.957497 0.000000 17278 +abstractionof 0 1 6.957497 0.000000 17279 +ofmap 0 1 6.957497 0.000000 17280 +defineddata 0 1 6.957497 0.000000 17281 +ingener 0 1 6.957497 0.000000 17282 +usualapproach 0 1 6.957497 0.000000 17283 +bywrit 0 1 6.957497 0.000000 17284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..4696f90a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +world 0 115 2.197225 0.000000 126 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +larg 0 82 2.484907 0.000000 168 +resourc 0 81 2.484907 0.000000 172 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +goal 0 66 2.708050 0.000000 250 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +scientist 0 31 3.496508 0.000000 560 +team 0 27 3.637586 0.000000 625 +challeng 0 26 3.688879 0.000000 653 +enabl 0 26 3.688879 0.000000 655 +increas 0 20 3.951244 0.000000 829 +edulast 0 17 4.110874 0.000000 927 +admin 0 9 4.753590 0.000000 1476 +pool 0 6 5.164786 0.000000 2225 +condor 1 5 5.347108 5.347108 2577 +own 0 5 5.347108 0.000000 2531 +throughput 0 4 5.568345 0.000000 2993 +deploi 0 3 5.857933 0.000000 3750 +evaluatemechan 0 1 6.957497 0.000000 17285 +technologicaland 0 1 6.957497 0.000000 17286 +sociolog 0 1 6.957497 0.000000 17287 +suggestionscondor 0 1 6.957497 0.000000 17288 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..2024b75e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +septemb 0 65 2.772589 0.000000 274 +next 1 34 3.401197 3.401197 517 +miron 0 14 4.317488 0.000000 1110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..a2fdb9ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +file 0 132 1.945910 0.000000 70 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +provid 0 121 2.079442 0.000000 94 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +structur 0 106 2.197225 0.000000 105 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +octob 0 89 2.397895 0.000000 156 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +contain 0 81 2.484907 0.000000 174 +wide 0 84 2.484907 0.000000 185 +member 0 84 2.484907 0.000000 165 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +interfac 0 79 2.564949 0.000000 209 +optim 0 79 2.564949 0.000000 197 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +sourc 0 77 2.564949 0.000000 201 +effici 0 73 2.639057 0.000000 233 +addit 0 74 2.639057 0.000000 228 +degre 0 69 2.708050 0.000000 259 +main 0 67 2.708050 0.000000 256 +evalu 0 64 2.772589 0.000000 266 +complex 0 64 2.772589 0.000000 269 +organ 0 65 2.772589 0.000000 265 +collect 0 65 2.772589 0.000000 268 +interact 0 62 2.772589 0.000000 270 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +variou 0 56 2.890372 0.000000 317 +sever 0 56 2.890372 0.000000 322 +overview 0 56 2.890372 0.000000 323 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +instruct 0 53 2.944439 0.000000 332 +investig 0 51 2.995732 0.000000 353 +made 0 44 3.135494 0.000000 398 +term 0 43 3.178054 0.000000 411 +combin 0 42 3.218876 0.000000 421 +announc 0 40 3.258097 0.000000 441 +programm 0 39 3.258097 0.000000 445 +transact 0 39 3.258097 0.000000 438 +submit 0 39 3.258097 0.000000 440 +manual 0 35 3.401197 0.000000 504 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +rang 0 30 3.555348 0.000000 565 +domain 0 30 3.555348 0.000000 564 +releas 0 28 3.610918 0.000000 616 +linux 0 27 3.637586 0.000000 631 +rule 0 26 3.688879 0.000000 638 +enhanc 0 26 3.688879 0.000000 644 +relev 0 26 3.688879 0.000000 637 +comp 0 26 3.688879 0.000000 650 +strategi 0 25 3.737670 0.000000 682 +seri 0 24 3.761200 0.000000 708 +variabl 0 23 3.806662 0.000000 715 +instal 0 22 3.850148 0.000000 754 +disk 0 22 3.850148 0.000000 747 +among 0 21 3.912023 0.000000 781 +newsgroup 0 21 3.912023 0.000000 783 +binari 0 20 3.951244 0.000000 823 +edulast 0 17 4.110874 0.000000 927 +choos 0 16 4.174387 0.000000 964 +permit 0 16 4.174387 0.000000 962 +choic 0 16 4.174387 0.000000 979 +atth 0 15 4.248495 0.000000 1019 +indic 0 15 4.248495 0.000000 1013 +forth 0 13 4.382027 0.000000 1186 +misc 0 13 4.382027 0.000000 1124 +solari 0 12 4.465908 0.000000 1238 +robust 0 12 4.465908 0.000000 1271 +deduct 0 12 4.465908 0.000000 1236 +stai 0 12 4.465908 0.000000 1215 +primit 0 11 4.553877 0.000000 1317 +modul 0 10 4.653960 0.000000 1434 +resid 0 10 4.653960 0.000000 1461 +underli 0 10 4.653960 0.000000 1410 +rich 0 10 4.653960 0.000000 1396 +declar 0 9 4.753590 0.000000 1526 +desir 0 9 4.753590 0.000000 1542 +readm 0 8 4.875197 0.000000 1699 +canb 0 7 5.010635 0.000000 1846 +aggreg 0 6 5.164786 0.000000 2219 +coral 1 5 5.347108 5.347108 2538 +augment 0 5 5.347108 0.000000 2350 +tupl 0 5 5.347108 0.000000 2244 +quantifi 0 5 5.347108 0.000000 2525 +lang 0 5 5.347108 0.000000 2294 +imper 0 4 5.568345 0.000000 3067 +delet 0 4 5.568345 0.000000 2691 +suno 0 4 5.568345 0.000000 2790 +claus 0 3 5.857933 0.000000 3733 +hpux 0 3 5.857933 0.000000 3780 +grab 0 2 6.263398 0.000000 5723 +objectiveoverviewreleas 0 2 6.263398 0.000000 6070 +informationse 0 2 6.263398 0.000000 6071 +horn 0 2 6.263398 0.000000 6072 +negat 0 2 6.263398 0.000000 6073 +andautomat 0 2 6.263398 0.000000 5413 +reciev 0 2 6.263398 0.000000 5600 +nobin 0 1 6.957497 0.000000 17289 +projectcor 0 1 6.957497 0.000000 17290 +projectdocu 0 1 6.957497 0.000000 17291 +coralpeopl 0 1 6.957497 0.000000 17292 +coraloth 0 1 6.957497 0.000000 17293 +madisonobject 0 1 6.957497 0.000000 17294 +efficientdeduct 0 1 6.957497 0.000000 17295 +coralsystem 0 1 6.957497 0.000000 17296 +durationof 0 1 6.957497 0.000000 17297 +declaritiveand 0 1 6.957497 0.000000 17298 +supportsgener 0 1 6.957497 0.000000 17299 +coralimplement 0 1 6.957497 0.000000 17300 +modulein 0 1 6.957497 0.000000 17301 +insertand 0 1 6.957497 0.000000 17302 +canprogram 0 1 6.957497 0.000000 17303 +withcor 0 1 6.957497 0.000000 17304 +allowingc 0 1 6.957497 0.000000 17305 +coralimplemen 0 1 6.957497 0.000000 17306 +theexodusstorag 0 1 6.957497 0.000000 17307 +manang 0 1 6.957497 0.000000 17308 +aclient 0 1 6.957497 0.000000 17309 +requiringy 0 1 6.957497 0.000000 17310 +announcemnt 0 1 6.957497 0.000000 17311 +listwhich 0 1 6.957497 0.000000 17312 +shawn 0 1 6.957497 0.000000 17313 +flisakowski 0 1 6.957497 0.000000 17314 +flisakow 0 1 6.957497 0.000000 17315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..39b604ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +data 0 170 1.791759 0.000000 49 +problem 1 147 1.945910 1.945910 75 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +look 0 107 2.197225 0.000000 115 +mathemat 0 108 2.197225 0.000000 123 +access 0 102 2.302585 0.000000 136 +techniqu 0 99 2.302585 0.000000 138 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +requir 0 81 2.484907 0.000000 167 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +appli 0 71 2.639057 0.000000 226 +nation 0 74 2.639057 0.000000 240 +solv 0 73 2.639057 0.000000 234 +function 0 62 2.772589 0.000000 275 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +evalu 0 64 2.772589 0.000000 266 +laboratori 0 63 2.772589 0.000000 292 +content 0 59 2.833213 0.000000 302 +point 0 58 2.890372 0.000000 319 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +major 0 56 2.890372 0.000000 315 +overview 0 56 2.890372 0.000000 323 +three 0 54 2.944439 0.000000 330 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +give 0 50 3.044522 0.000000 359 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +possibl 0 47 3.091042 0.000000 378 +directori 0 45 3.135494 0.000000 396 +describ 0 45 3.135494 0.000000 400 +keep 0 44 3.135494 0.000000 409 +linear 0 41 3.218876 0.000000 431 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +origin 0 38 3.295837 0.000000 472 +download 0 36 3.367296 0.000000 489 +approxim 0 35 3.401197 0.000000 509 +michael 0 35 3.401197 0.000000 514 +survei 0 35 3.401197 0.000000 513 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +given 0 32 3.465736 0.000000 538 +taken 0 31 3.496508 0.000000 555 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +steve 0 29 3.583519 0.000000 594 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +determin 0 27 3.637586 0.000000 630 +relev 0 26 3.688879 0.000000 637 +consist 0 26 3.688879 0.000000 651 +subject 0 26 3.688879 0.000000 647 +compar 0 26 3.688879 0.000000 648 +strategi 0 25 3.737670 0.000000 682 +known 0 24 3.761200 0.000000 702 +equat 0 23 3.806662 0.000000 724 +sequenc 0 23 3.806662 0.000000 734 +serv 0 22 3.850148 0.000000 758 +almost 0 22 3.850148 0.000000 742 +path 0 21 3.912023 0.000000 778 +similar 0 21 3.912023 0.000000 771 +avoid 0 21 3.912023 0.000000 799 +entir 0 20 3.951244 0.000000 811 +along 0 18 4.060443 0.000000 878 +spars 0 16 4.174387 0.000000 989 +matlab 0 14 4.317488 0.000000 1081 +role 0 14 4.317488 0.000000 1101 +nonlinear 0 14 4.317488 0.000000 1107 +easili 0 14 4.317488 0.000000 1077 +econom 0 13 4.382027 0.000000 1184 +cannot 0 13 4.382027 0.000000 1144 +step 0 13 4.382027 0.000000 1138 +directli 0 13 4.382027 0.000000 1141 +forth 0 13 4.382027 0.000000 1186 +deriv 0 13 4.382027 0.000000 1145 +emploi 0 12 4.465908 0.000000 1284 +iter 0 12 4.465908 0.000000 1206 +evolv 0 12 4.465908 0.000000 1223 +regard 0 11 4.553877 0.000000 1309 +underli 0 10 4.653960 0.000000 1410 +establish 0 9 4.753590 0.000000 1532 +routin 0 9 4.753590 0.000000 1549 +mile 0 8 4.875197 0.000000 1743 +ferri 0 8 4.875197 0.000000 1715 +formul 0 8 4.875197 0.000000 1733 +solver 0 7 5.010635 0.000000 1911 +newton 0 7 5.010635 0.000000 1824 +smooth 0 7 5.010635 0.000000 1855 +secondari 0 7 5.010635 0.000000 1884 +converg 0 7 5.010635 0.000000 1844 +zero 0 7 5.010635 0.000000 1896 +divers 0 6 5.164786 0.000000 2232 +mix 0 6 5.164786 0.000000 2200 +freeli 0 6 5.164786 0.000000 2014 +subsystem 0 6 5.164786 0.000000 2015 +interior 0 5 5.347108 0.000000 2439 +decad 0 5 5.347108 0.000000 2455 +complementari 0 5 5.347108 0.000000 2523 +pivot 0 5 5.347108 0.000000 2426 +merit 0 5 5.347108 0.000000 2466 +argonn 0 5 5.347108 0.000000 2461 +monograph 0 4 5.568345 0.000000 2860 +areavail 0 4 5.568345 0.000000 2810 +colorado 0 4 5.568345 0.000000 2938 +algorithmsand 0 4 5.568345 0.000000 2680 +trick 0 4 5.568345 0.000000 2967 +complementar 1 3 5.857933 5.857933 3999 +toolbox 0 3 5.857933 0.000000 3112 +neta 0 3 5.857933 0.000000 3789 +forthes 0 3 5.857933 0.000000 3199 +andm 0 3 5.857933 0.000000 3901 +violat 0 3 5.857933 0.000000 3211 +engineeringand 0 3 5.857933 0.000000 3779 +preprocessor 0 3 5.857933 0.000000 3844 +energi 0 3 5.857933 0.000000 3950 +gam 0 2 6.263398 0.000000 4758 +edufor 0 2 6.263398 0.000000 5831 +lemk 0 2 6.263398 0.000000 5693 +similarto 0 2 6.263398 0.000000 6074 +anapproxim 0 2 6.263398 0.000000 5045 +norm 0 2 6.263398 0.000000 5643 +perturb 0 2 6.263398 0.000000 6075 +leadto 0 2 6.263398 0.000000 5350 +residu 0 2 6.263398 0.000000 4741 +thecurr 0 2 6.263398 0.000000 5862 +equilibrium 0 2 6.263398 0.000000 4259 +thegam 0 2 6.263398 0.000000 5430 +pointmethod 0 2 6.263398 0.000000 4835 +nonsmooth 0 1 6.957497 0.000000 17316 +mcplib 0 1 6.957497 0.000000 17317 +functionevalu 0 1 6.957497 0.000000 17318 +subproblem 0 1 6.957497 0.000000 17319 +uponreformul 0 1 6.957497 0.000000 17320 +fruitfuldisciplin 0 1 6.957497 0.000000 17321 +incomplementar 0 1 6.957497 0.000000 17322 +meetingsof 0 1 6.957497 0.000000 17323 +forcomplementar 0 1 6.957497 0.000000 17324 +researcherssoftwar 0 1 6.957497 0.000000 17325 +problemdescript 0 1 6.957497 0.000000 17326 +frommatlab 0 1 6.957497 0.000000 17327 +jacobian 0 1 6.957497 0.000000 17328 +specificvers 0 1 6.957497 0.000000 17329 +hook 0 1 6.957497 0.000000 17330 +rutherford 0 1 6.957497 0.000000 17331 +classicaljosephi 0 1 6.957497 0.000000 17332 +linearizedsubproblem 0 1 6.957497 0.000000 17333 +defineth 0 1 6.957497 0.000000 17334 +dampedlinesearch 0 1 6.957497 0.000000 17335 +infeas 0 1 6.957497 0.000000 17336 +restartprocedur 0 1 6.957497 0.000000 17337 +totermin 0 1 6.957497 0.000000 17338 +rescal 0 1 6.957497 0.000000 17339 +equilibr 0 1 6.957497 0.000000 17340 +elementsappear 0 1 6.957497 0.000000 17341 +mcpor 0 1 6.957497 0.000000 17342 +anonsmooth 0 1 6.957497 0.000000 17343 +reformul 0 1 6.957497 0.000000 17344 +algorithmconsist 0 1 6.957497 0.000000 17345 +pathto 0 1 6.957497 0.000000 17346 +aposs 0 1 6.957497 0.000000 17347 +thepath 0 1 6.957497 0.000000 17348 +partiallycomput 0 1 6.957497 0.000000 17349 +relinear 0 1 6.957497 0.000000 17350 +anonmonoton 0 1 6.957497 0.000000 17351 +watchdog 0 1 6.957497 0.000000 17352 +minima 0 1 6.957497 0.000000 17353 +robustnessimprov 0 1 6.957497 0.000000 17354 +proxim 0 1 6.957497 0.000000 17355 +qpcomp 0 1 6.957497 0.000000 17356 +ishandl 0 1 6.957497 0.000000 17357 +thenapproxim 0 1 6.957497 0.000000 17358 +theaccuraci 0 1 6.957497 0.000000 17359 +mpsge 0 1 6.957497 0.000000 17360 +thatallow 0 1 6.957497 0.000000 17361 +nemsth 0 1 6.957497 0.000000 17362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..6d51fc22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +manag 0 114 2.197225 0.000000 125 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +requir 0 81 2.484907 0.000000 167 +orient 0 80 2.564949 0.000000 205 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +still 0 50 3.044522 0.000000 362 +michael 0 35 3.401197 0.000000 514 +storag 0 31 3.496508 0.000000 553 +mike 0 24 3.761200 0.000000 703 +prepar 0 20 3.951244 0.000000 824 +benchmark 0 19 4.007333 0.000000 859 +minim 0 18 4.060443 0.000000 887 +carei 0 8 4.875197 0.000000 1781 +licens 0 5 5.347108 0.000000 2520 +exodu 1 4 5.568345 5.568345 3075 +zwill 0 4 5.568345 0.000000 3076 +successor 0 3 5.857933 0.000000 3576 +theexodu 0 2 6.263398 0.000000 6076 +persistentprogram 0 2 6.263398 0.000000 5997 +pageexodu 0 1 6.957497 0.000000 17363 +toolkitnot 0 1 6.957497 0.000000 17364 +succed 0 1 6.957497 0.000000 17365 +theshor 0 1 6.957497 0.000000 17366 +eduprincip 0 1 6.957497 0.000000 17367 +dewittse 0 1 6.957497 0.000000 17368 +exodusshor 0 1 6.957497 0.000000 17369 +exoduslatest 0 1 6.957497 0.000000 17370 +compilercontribut 0 1 6.957497 0.000000 17371 +managera 0 1 6.957497 0.000000 17372 +exodus_al 0 1 6.957497 0.000000 17373 +oodbsdat 0 1 6.957497 0.000000 17374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..aaac917d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +wisconsin 0 169 1.791759 0.000000 54 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +databas 0 122 2.079442 0.000000 86 +provid 0 121 2.079442 0.000000 94 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +peopl 0 96 2.302585 0.000000 132 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +help 0 83 2.484907 0.000000 175 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +addit 0 74 2.639057 0.000000 228 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +order 0 69 2.708050 0.000000 249 +creat 0 63 2.772589 0.000000 277 +result 0 65 2.772589 0.000000 281 +complex 0 64 2.772589 0.000000 269 +prof 0 64 2.772589 0.000000 273 +type 0 61 2.833213 0.000000 296 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +point 0 58 2.890372 0.000000 319 +sever 0 56 2.890372 0.000000 322 +sampl 0 53 2.944439 0.000000 339 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +tabl 0 51 2.995732 0.000000 346 +set 0 50 3.044522 0.000000 361 +could 0 46 3.091042 0.000000 383 +execut 0 45 3.135494 0.000000 404 +video 0 44 3.135494 0.000000 405 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +streetmadison 0 38 3.295837 0.000000 474 +connect 0 37 3.332205 0.000000 485 +especi 0 36 3.367296 0.000000 496 +either 0 35 3.401197 0.000000 506 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +ad 0 32 3.465736 0.000000 544 +built 0 29 3.583519 0.000000 592 +hope 0 28 3.610918 0.000000 610 +manipul 0 27 3.637586 0.000000 624 +client 0 25 3.737670 0.000000 679 +store 0 24 3.761200 0.000000 693 +scalabl 0 24 3.761200 0.000000 705 +handl 0 24 3.761200 0.000000 685 +displai 0 23 3.806662 0.000000 712 +brows 0 23 3.806662 0.000000 726 +size 0 23 3.806662 0.000000 713 +thread 0 23 3.806662 0.000000 722 +defin 0 22 3.850148 0.000000 746 +mpeg 0 20 3.951244 0.000000 831 +benchmark 0 19 4.007333 0.000000 859 +layer 0 17 4.110874 0.000000 926 +spatial 0 16 4.174387 0.000000 988 +massiv 0 15 4.248495 0.000000 1026 +indic 0 15 4.248495 0.000000 1013 +attribut 0 14 4.317488 0.000000 1092 +front 0 13 4.382027 0.000000 1154 +script 0 13 4.382027 0.000000 1171 +menu 0 13 4.382027 0.000000 1156 +composit 0 13 4.382027 0.000000 1150 +context 0 13 4.382027 0.000000 1153 +calcul 0 12 4.465908 0.000000 1268 +emploi 0 12 4.465908 0.000000 1284 +shore 0 11 4.553877 0.000000 1377 +string 0 11 4.553877 0.000000 1340 +persist 0 11 4.553877 0.000000 1367 +abil 0 11 4.553877 0.000000 1341 +subset 0 10 4.653960 0.000000 1425 +vldb 0 10 4.653960 0.000000 1470 +underli 0 10 4.653960 0.000000 1410 +correspond 0 10 4.653960 0.000000 1382 +custom 0 10 4.653960 0.000000 1414 +label 0 10 4.653960 0.000000 1423 +compos 0 9 4.753590 0.000000 1527 +paradis 1 8 4.875197 4.875197 1782 +polygon 0 8 4.875197 0.000000 1723 +databasesystem 0 8 4.875197 0.000000 1617 +sensit 0 8 4.875197 0.000000 1726 +insert 0 8 4.875197 0.000000 1687 +successfulli 0 7 5.010635 0.000000 1869 +geograph 0 6 5.164786 0.000000 2236 +drop 0 6 5.164786 0.000000 2008 +band 0 6 5.164786 0.000000 2198 +invok 0 6 5.164786 0.000000 2079 +syntax 0 6 5.164786 0.000000 2030 +ship 0 5 5.347108 0.000000 2534 +aim 0 5 5.347108 0.000000 2477 +tupl 0 5 5.347108 0.000000 2244 +madisoncomput 0 5 5.347108 0.000000 2391 +andevalu 0 4 5.568345 0.000000 2706 +zoom 0 4 5.568345 0.000000 2961 +insur 0 4 5.568345 0.000000 2939 +providesa 0 3 5.857933 0.000000 3884 +informationse 0 2 6.263398 0.000000 6071 +serverobject 0 2 6.263398 0.000000 6077 +raster 0 2 6.263398 0.000000 6078 +polylin 0 2 6.263398 0.000000 6079 +sketch 0 2 6.263398 0.000000 5946 +extent 0 2 6.263398 0.000000 6080 +paid 0 2 6.263398 0.000000 6081 +biswadeep 0 2 6.263398 0.000000 4805 +projectparadis 0 1 6.957497 0.000000 17375 +frontend 0 1 6.957497 0.000000 17376 +sequoia 0 1 6.957497 0.000000 17377 +iscap 0 1 6.957497 0.000000 17378 +applyingobject 0 1 6.957497 0.000000 17379 +ofstor 0 1 6.957497 0.000000 17380 +tosignificantli 0 1 6.957497 0.000000 17381 +thatcan 0 1 6.957497 0.000000 17382 +andsupport 0 1 6.957497 0.000000 17383 +paradiseprovid 0 1 6.957497 0.000000 17384 +gisappl 0 1 6.957497 0.000000 17385 +asinteg 0 1 6.957497 0.000000 17386 +circl 0 1 6.957497 0.000000 17387 +spatialattribut 0 1 6.957497 0.000000 17388 +foroverlap 0 1 6.957497 0.000000 17389 +selectingcolor 0 1 6.957497 0.000000 17390 +withad 0 1 6.957497 0.000000 17391 +issueimplicit 0 1 6.957497 0.000000 17392 +arubb 0 1 6.957497 0.000000 17393 +querycompos 0 1 6.957497 0.000000 17394 +databaseschema 0 1 6.957497 0.000000 17395 +beview 0 1 6.957497 0.000000 17396 +bedisplai 0 1 6.957497 0.000000 17397 +sqlwe 0 1 6.957497 0.000000 17398 +extendedset 0 1 6.957497 0.000000 17399 +byus 0 1 6.957497 0.000000 17400 +standarddatabas 0 1 6.957497 0.000000 17401 +anddrop 0 1 6.957497 0.000000 17402 +paradiseserv 0 1 6.957497 0.000000 17403 +theresult 0 1 6.957497 0.000000 17404 +ismulti 0 1 6.957497 0.000000 17405 +sameserv 0 1 6.957497 0.000000 17406 +carefulattent 0 1 6.957497 0.000000 17407 +processqueri 0 1 6.957497 0.000000 17408 +largevolum 0 1 6.957497 0.000000 17409 +frontendeurop 0 1 6.957497 0.000000 17410 +pressher 0 1 6.957497 0.000000 17411 +projectattn 0 1 6.957497 0.000000 17412 +dewittunivers 0 1 6.957497 0.000000 17413 +edumor 0 1 6.957497 0.000000 17414 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..fcbd021f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +support 0 132 1.945910 0.000000 83 +report 0 131 2.079442 0.000000 92 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +welcom 0 122 2.079442 0.000000 99 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +site 0 106 2.197225 0.000000 119 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +comment 0 93 2.397895 0.000000 146 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +educ 0 86 2.484907 0.000000 191 +know 0 80 2.564949 0.000000 198 +servic 0 72 2.639057 0.000000 236 +onlin 0 75 2.639057 0.000000 223 +goal 0 66 2.708050 0.000000 250 +organ 0 65 2.772589 0.000000 265 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +sever 0 56 2.890372 0.000000 322 +suggest 0 53 2.944439 0.000000 331 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +format 0 48 3.044522 0.000000 356 +effect 0 46 3.091042 0.000000 385 +show 0 43 3.178054 0.000000 417 +offer 0 43 3.178054 0.000000 414 +howev 0 41 3.218876 0.000000 422 +announc 0 40 3.258097 0.000000 441 +primari 0 25 3.737670 0.000000 669 +daili 0 24 3.761200 0.000000 706 +annot 0 21 3.912023 0.000000 775 +theunivers 0 21 3.912023 0.000000 797 +longer 0 20 3.951244 0.000000 816 +entir 0 20 3.951244 0.000000 811 +toolkit 0 20 3.951244 0.000000 835 +universityof 0 15 4.248495 0.000000 1061 +everyon 0 13 4.382027 0.000000 1148 +summar 0 11 4.553877 0.000000 1295 +discov 0 9 4.753590 0.000000 1562 +hundr 0 9 4.753590 0.000000 1528 +filter 0 8 4.875197 0.000000 1641 +scout 1 7 5.010635 5.010635 1903 +happen 0 7 5.010635 0.000000 1790 +valuabl 0 5 5.347108 0.000000 2256 +newli 0 3 5.857933 0.000000 3786 +useth 0 3 5.857933 0.000000 3110 +thescout 0 2 6.263398 0.000000 6082 +homepagego 0 1 6.957497 0.000000 17415 +versionnewslett 0 1 6.957497 0.000000 17416 +newand 0 1 6.957497 0.000000 17417 +toolsinternet 0 1 6.957497 0.000000 17418 +effectiveinternet 0 1 6.957497 0.000000 17419 +availablea 0 1 6.957497 0.000000 17420 +studentssurf 0 1 6.957497 0.000000 17421 +smarter 0 1 6.957497 0.000000 17422 +canchoos 0 1 6.957497 0.000000 17423 +annoucementseach 0 1 6.957497 0.000000 17424 +networktool 0 1 6.957497 0.000000 17425 +vefound 0 1 6.957497 0.000000 17426 +byeduc 0 1 6.957497 0.000000 17427 +encouragefeedback 0 1 6.957497 0.000000 17428 +ournewest 0 1 6.957497 0.000000 17429 +feedbackscout 0 1 6.957497 0.000000 17430 +servicesfor 0 1 6.957497 0.000000 17431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..98b25cac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,482 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +provid 0 121 2.079442 0.000000 94 +document 0 121 2.079442 0.000000 89 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +comment 0 93 2.397895 0.000000 146 +environ 0 84 2.484907 0.000000 177 +larg 0 82 2.484907 0.000000 168 +chang 0 82 2.484907 0.000000 163 +second 0 81 2.484907 0.000000 166 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +requir 0 81 2.484907 0.000000 167 +start 0 83 2.484907 0.000000 173 +server 0 76 2.564949 0.000000 204 +messag 0 76 2.564949 0.000000 212 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +sourc 0 77 2.564949 0.000000 201 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +write 0 72 2.639057 0.000000 222 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +degre 0 69 2.708050 0.000000 259 +order 0 69 2.708050 0.000000 249 +august 0 66 2.708050 0.000000 257 +receiv 0 66 2.708050 0.000000 244 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +improv 0 62 2.772589 0.000000 289 +copi 0 63 2.772589 0.000000 284 +type 0 61 2.833213 0.000000 296 +content 0 59 2.833213 0.000000 302 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +plai 0 60 2.833213 0.000000 307 +unix 0 58 2.890372 0.000000 308 +space 0 57 2.890372 0.000000 310 +major 0 56 2.890372 0.000000 315 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +point 0 58 2.890372 0.000000 319 +processor 0 54 2.944439 0.000000 335 +three 0 54 2.944439 0.000000 330 +extens 0 53 2.944439 0.000000 340 +found 0 53 2.944439 0.000000 337 +hardwar 0 51 2.995732 0.000000 350 +much 0 52 2.995732 0.000000 349 +digit 0 52 2.995732 0.000000 348 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +date 0 51 2.995732 0.000000 344 +basic 0 50 3.044522 0.000000 360 +set 0 50 3.044522 0.000000 361 +standard 0 48 3.044522 0.000000 365 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +featur 0 46 3.091042 0.000000 386 +could 0 46 3.091042 0.000000 383 +possibl 0 47 3.091042 0.000000 378 +get 0 46 3.091042 0.000000 380 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +video 0 44 3.135494 0.000000 405 +anoth 0 45 3.135494 0.000000 408 +term 0 43 3.178054 0.000000 411 +mechan 0 43 3.178054 0.000000 416 +futur 0 41 3.218876 0.000000 427 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +field 0 37 3.332205 0.000000 482 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +feel 0 37 3.332205 0.000000 483 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +tree 0 36 3.367296 0.000000 492 +singl 0 34 3.401197 0.000000 510 +either 0 35 3.401197 0.000000 506 +everi 0 34 3.401197 0.000000 519 +approxim 0 35 3.401197 0.000000 509 +post 0 35 3.401197 0.000000 505 +return 0 34 3.401197 0.000000 502 +concurr 0 34 3.401197 0.000000 501 +go 0 33 3.433987 0.000000 529 +product 0 33 3.433987 0.000000 527 +queri 0 33 3.433987 0.000000 524 +obtain 0 33 3.433987 0.000000 534 +ad 0 32 3.465736 0.000000 544 +kind 0 32 3.465736 0.000000 541 +photo 0 31 3.496508 0.000000 561 +someth 0 31 3.496508 0.000000 554 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +option 0 30 3.555348 0.000000 575 +built 0 29 3.583519 0.000000 592 +turn 0 29 3.583519 0.000000 586 +depend 0 29 3.583519 0.000000 583 +particip 0 29 3.583519 0.000000 589 +releas 0 28 3.610918 0.000000 616 +framework 0 28 3.610918 0.000000 606 +intend 0 28 3.610918 0.000000 599 +becom 0 28 3.610918 0.000000 603 +propos 0 28 3.610918 0.000000 602 +hope 0 28 3.610918 0.000000 610 +retriev 0 27 3.637586 0.000000 621 +mind 0 27 3.637586 0.000000 632 +manipul 0 27 3.637586 0.000000 624 +quit 0 27 3.637586 0.000000 633 +symbol 0 27 3.637586 0.000000 620 +linux 0 27 3.637586 0.000000 631 +team 0 27 3.637586 0.000000 625 +effort 0 26 3.688879 0.000000 652 +request 0 26 3.688879 0.000000 635 +rather 0 26 3.688879 0.000000 642 +enhanc 0 26 3.688879 0.000000 644 +enabl 0 26 3.688879 0.000000 655 +subject 0 26 3.688879 0.000000 647 +client 0 25 3.737670 0.000000 679 +wai 0 25 3.737670 0.000000 662 +valu 0 25 3.737670 0.000000 665 +task 0 25 3.737670 0.000000 678 +scalabl 0 24 3.761200 0.000000 705 +store 0 24 3.761200 0.000000 693 +reach 0 24 3.761200 0.000000 688 +defin 0 22 3.850148 0.000000 746 +serv 0 22 3.850148 0.000000 758 +varieti 0 22 3.850148 0.000000 740 +almost 0 22 3.850148 0.000000 742 +deal 0 22 3.850148 0.000000 736 +emphasi 0 22 3.850148 0.000000 755 +sent 0 22 3.850148 0.000000 763 +programminglanguag 0 21 3.912023 0.000000 782 +fund 0 21 3.912023 0.000000 805 +flexibl 0 21 3.912023 0.000000 792 +latest 0 21 3.912023 0.000000 785 +thu 0 21 3.912023 0.000000 773 +similar 0 21 3.912023 0.000000 771 +binari 0 20 3.951244 0.000000 823 +entir 0 20 3.951244 0.000000 811 +fine 0 20 3.951244 0.000000 822 +benchmark 0 19 4.007333 0.000000 859 +media 0 19 4.007333 0.000000 861 +definit 0 19 4.007333 0.000000 864 +separ 0 19 4.007333 0.000000 844 +concentr 0 18 4.060443 0.000000 906 +along 0 18 4.060443 0.000000 878 +regist 0 17 4.110874 0.000000 938 +weekli 0 17 4.110874 0.000000 919 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +whether 0 17 4.110874 0.000000 918 +stop 0 17 4.110874 0.000000 942 +ultim 0 17 4.110874 0.000000 943 +attempt 0 17 4.110874 0.000000 917 +sept 0 17 4.110874 0.000000 952 +anyon 0 17 4.110874 0.000000 916 +commerci 0 16 4.174387 0.000000 1005 +intel 0 16 4.174387 0.000000 1000 +alreadi 0 16 4.174387 0.000000 963 +portion 0 16 4.174387 0.000000 971 +sign 0 16 4.174387 0.000000 970 +transit 0 15 4.248495 0.000000 1046 +capabl 0 15 4.248495 0.000000 1016 +hierarch 0 15 4.248495 0.000000 1018 +hybrid 0 15 4.248495 0.000000 1057 +piec 0 15 4.248495 0.000000 1020 +stream 0 15 4.248495 0.000000 1015 +charact 0 15 4.248495 0.000000 1028 +heterogen 0 14 4.317488 0.000000 1090 +anonym 0 14 4.317488 0.000000 1100 +attribut 0 14 4.317488 0.000000 1092 +role 0 14 4.317488 0.000000 1101 +shown 0 14 4.317488 0.000000 1080 +decid 0 14 4.317488 0.000000 1075 +directli 0 13 4.382027 0.000000 1141 +nasa 0 13 4.382027 0.000000 1188 +earlier 0 13 4.382027 0.000000 1140 +cannot 0 13 4.382027 0.000000 1144 +individu 0 13 4.382027 0.000000 1126 +convert 0 13 4.382027 0.000000 1122 +uniqu 0 12 4.465908 0.000000 1228 +target 0 12 4.465908 0.000000 1282 +safe 0 12 4.465908 0.000000 1274 +solari 0 12 4.465908 0.000000 1238 +nanci 0 12 4.465908 0.000000 1256 +shore 1 11 4.553877 4.553877 1377 +persist 0 11 4.553877 0.000000 1367 +string 0 11 4.553877 0.000000 1340 +arpa 0 11 4.553877 0.000000 1369 +distinguish 0 11 4.553877 0.000000 1357 +fix 0 11 4.553877 0.000000 1327 +facilit 0 10 4.653960 0.000000 1412 +equal 0 10 4.653960 0.000000 1424 +consortium 0 10 4.653960 0.000000 1467 +length 0 10 4.653960 0.000000 1400 +subscrib 0 9 4.753590 0.000000 1541 +inter 0 9 4.753590 0.000000 1530 +rel 0 9 4.753590 0.000000 1487 +mention 0 9 4.753590 0.000000 1569 +familiar 0 9 4.753590 0.000000 1485 +contrast 0 8 4.875197 0.000000 1637 +root 0 8 4.875197 0.000000 1650 +realiz 0 8 4.875197 0.000000 1739 +cross 0 8 4.875197 0.000000 1703 +port 0 8 4.875197 0.000000 1766 +parti 0 8 4.875197 0.000000 1676 +digest 0 7 5.010635 0.000000 1864 +supportfor 0 7 5.010635 0.000000 1854 +sparc 0 7 5.010635 0.000000 1860 +henc 0 7 5.010635 0.000000 1805 +beta 0 6 5.164786 0.000000 1993 +byte 0 6 5.164786 0.000000 2108 +geograph 0 6 5.164786 0.000000 2236 +furthermor 0 6 5.164786 0.000000 2141 +conveni 0 6 5.164786 0.000000 2088 +pool 0 6 5.164786 0.000000 2225 +feasibl 0 6 5.164786 0.000000 2157 +gzip 0 6 5.164786 0.000000 2117 +moder 0 6 5.164786 0.000000 2112 +notifi 0 6 5.164786 0.000000 2106 +compat 0 5 5.347108 0.000000 2485 +eas 0 5 5.347108 0.000000 2267 +default 0 5 5.347108 0.000000 2335 +anda 0 5 5.347108 0.000000 2416 +greater 0 5 5.347108 0.000000 2258 +began 0 5 5.347108 0.000000 2498 +remain 0 5 5.347108 0.000000 2278 +peer 0 4 5.568345 0.000000 2742 +repli 0 4 5.568345 0.000000 2689 +andevalu 0 4 5.568345 0.000000 2706 +satellit 0 4 5.568345 0.000000 3077 +eventu 0 4 5.568345 0.000000 3074 +symmetr 0 4 5.568345 0.000000 2908 +exodu 0 4 5.568345 0.000000 3075 +customiz 0 4 5.568345 0.000000 2966 +simplifi 0 4 5.568345 0.000000 3066 +bulk 0 3 5.857933 0.000000 4000 +oodb 0 3 5.857933 0.000000 3954 +subscript 0 3 5.857933 0.000000 3469 +predecessor 0 3 5.857933 0.000000 3585 +briefli 0 3 5.857933 0.000000 3459 +sector 0 3 5.857933 0.000000 3766 +paragon 0 3 5.857933 0.000000 3359 +serverarchitectur 0 3 5.857933 0.000000 3736 +gigabyt 0 3 5.857933 0.000000 3548 +embodi 0 3 5.857933 0.000000 3236 +intra 0 3 5.857933 0.000000 3243 +reachabl 0 3 5.857933 0.000000 4001 +eduand 0 3 5.857933 0.000000 3452 +membership 0 3 5.857933 0.000000 3751 +oodbm 0 2 6.263398 0.000000 6083 +objectiveoverviewreleas 0 2 6.263398 0.000000 6070 +serverobject 0 2 6.263398 0.000000 6077 +provis 0 2 6.263398 0.000000 4683 +neutral 0 2 6.263398 0.000000 5760 +mount 0 2 6.263398 0.000000 5995 +eduthi 0 2 6.263398 0.000000 5382 +junk 0 2 6.263398 0.000000 5701 +mailbox 0 2 6.263398 0.000000 6084 +sender 0 2 6.263398 0.000000 5064 +shore_al 0 1 6.957497 0.000000 17432 +odmg 0 1 6.957497 0.000000 17433 +listproc 0 1 6.957497 0.000000 17434 +shore_support 0 1 6.957497 0.000000 17435 +informationsystem 0 1 6.957497 0.000000 17436 +ashor 0 1 6.957497 0.000000 17437 +vendor 0 1 6.957497 0.000000 17438 +flatten 0 1 6.957497 0.000000 17439 +legaci 0 1 6.957497 0.000000 17440 +clutter 0 1 6.957497 0.000000 17441 +pageshor 0 1 6.957497 0.000000 17442 +repositorydocu 0 1 6.957497 0.000000 17443 +informationmail 0 1 6.957497 0.000000 17444 +listsse 0 1 6.957497 0.000000 17445 +shorepeopl 0 1 6.957497 0.000000 17446 +shorelatest 0 1 6.957497 0.000000 17447 +arpaparadis 0 1 6.957497 0.000000 17448 +shoreexodu 0 1 6.957497 0.000000 17449 +shoreoo 0 1 6.957497 0.000000 17450 +oodbsshor 0 1 6.957497 0.000000 17451 +albumuw 0 1 6.957497 0.000000 17452 +widevarieti 0 1 6.957497 0.000000 17453 +cadsystem 0 1 6.957497 0.000000 17454 +usedexodusstorag 0 1 6.957497 0.000000 17455 +ofwai 0 1 6.957497 0.000000 17456 +thisinterfac 0 1 6.957497 0.000000 17457 +theunix 0 1 6.957497 0.000000 17458 +viand 0 1 6.957497 0.000000 17459 +withoutmodif 0 1 6.957497 0.000000 17460 +shoreobject 0 1 6.957497 0.000000 17461 +inheritingcharacterist 0 1 6.957497 0.000000 17462 +fromfil 0 1 6.957497 0.000000 17463 +ofshor 0 1 6.957497 0.000000 17464 +scalabilitysupport 0 1 6.957497 0.000000 17465 +heterogeneitysupport 0 1 6.957497 0.000000 17466 +applicationswhen 0 1 6.957497 0.000000 17467 +uniqueamong 0 1 6.957497 0.000000 17468 +languageheterogen 0 1 6.957497 0.000000 17469 +persistentstorag 0 1 6.957497 0.000000 17470 +basicallycompat 0 1 6.957497 0.000000 17471 +betransf 0 1 6.957497 0.000000 17472 +architectureshor 0 1 6.957497 0.000000 17473 +distributedarchitectur 0 1 6.957497 0.000000 17474 +disksattach 0 1 6.957497 0.000000 17475 +architectureus 0 1 6.957497 0.000000 17476 +typicallyus 0 1 6.957497 0.000000 17477 +notionof 0 1 6.957497 0.000000 17478 +runsin 0 1 6.957497 0.000000 17479 +forus 0 1 6.957497 0.000000 17480 +theparadis 0 1 6.957497 0.000000 17481 +seosdi 0 1 6.957497 0.000000 17482 +aimport 0 1 6.957497 0.000000 17483 +endeavor 0 1 6.957497 0.000000 17484 +certainlydepend 0 1 6.957497 0.000000 17485 +transmitobject 0 1 6.957497 0.000000 17486 +whilecurr 0 1 6.957497 0.000000 17487 +orientedtoward 0 1 6.957497 0.000000 17488 +terabyt 0 1 6.957497 0.000000 17489 +libraryar 0 1 6.957497 0.000000 17490 +heterogeneityobject 0 1 6.957497 0.000000 17491 +neutraltyp 0 1 6.957497 0.000000 17492 +databasefeatur 0 1 6.957497 0.000000 17493 +ofsupport 0 1 6.957497 0.000000 17494 +feasibleto 0 1 6.957497 0.000000 17495 +wasrec 0 1 6.957497 0.000000 17496 +onprovid 0 1 6.957497 0.000000 17497 +withina 0 1 6.957497 0.000000 17498 +applicationsa 0 1 6.957497 0.000000 17499 +currentlyus 0 1 6.957497 0.000000 17500 +untyp 0 1 6.957497 0.000000 17501 +structuredobject 0 1 6.957497 0.000000 17502 +displac 0 1 6.957497 0.000000 17503 +orientedfil 0 1 6.957497 0.000000 17504 +standpoint 0 1 6.957497 0.000000 17505 +manypersist 0 1 6.957497 0.000000 17506 +indirectli 0 1 6.957497 0.000000 17507 +usersa 0 1 6.957497 0.000000 17508 +individualpersist 0 1 6.957497 0.000000 17509 +oflarg 0 1 6.957497 0.000000 17510 +unnam 0 1 6.957497 0.000000 17511 +involvessever 0 1 6.957497 0.000000 17512 +includingdirectori 0 1 6.957497 0.000000 17513 +unixappl 0 1 6.957497 0.000000 17514 +fromtradit 0 1 6.957497 0.000000 17515 +standardunix 0 1 6.957497 0.000000 17516 +mkdir 0 1 6.957497 0.000000 17517 +chdir 0 1 6.957497 0.000000 17518 +callsposs 0 1 6.957497 0.000000 17519 +onevari 0 1 6.957497 0.000000 17520 +asb 0 1 6.957497 0.000000 17521 +objectthrough 0 1 6.957497 0.000000 17522 +counterpart 0 1 6.957497 0.000000 17523 +callswil 0 1 6.957497 0.000000 17524 +thatwish 0 1 6.957497 0.000000 17525 +datacontain 0 1 6.957497 0.000000 17526 +bothnew 0 1 6.957497 0.000000 17527 +componentof 0 1 6.957497 0.000000 17528 +morestructur 0 1 6.957497 0.000000 17529 +rleas 0 1 6.957497 0.000000 17530 +completeimplement 0 1 6.957497 0.000000 17531 +tosolari 0 1 6.957497 0.000000 17532 +andpentium 0 1 6.957497 0.000000 17533 +atftp 0 1 6.957497 0.000000 17534 +liststher 0 1 6.957497 0.000000 17535 +usebi 0 1 6.957497 0.000000 17536 +madisonc 0 1 6.957497 0.000000 17537 +unmoder 0 1 6.957497 0.000000 17538 +unlikelyev 0 1 6.957497 0.000000 17539 +isalreadi 0 1 6.957497 0.000000 17540 +belowfor 0 1 6.957497 0.000000 17541 +sentwhen 0 1 6.957497 0.000000 17542 +beingpost 0 1 6.957497 0.000000 17543 +yourrepli 0 1 6.957497 0.000000 17544 +maysubscrib 0 1 6.957497 0.000000 17545 +existenceof 0 1 6.957497 0.000000 17546 +whenit 0 1 6.957497 0.000000 17547 +yoursubscript 0 1 6.957497 0.000000 17548 +conceal 0 1 6.957497 0.000000 17549 +subscriberscannot 0 1 6.957497 0.000000 17550 +specialmessag 0 1 6.957497 0.000000 17551 +sendthi 0 1 6.957497 0.000000 17552 +unsubscrib 0 1 6.957497 0.000000 17553 +messageshould 0 1 6.957497 0.000000 17554 +helplast 0 1 6.957497 0.000000 17555 +nhall 0 1 6.957497 0.000000 17556 +footnot 0 1 6.957497 0.000000 17557 +odlshor 0 1 6.957497 0.000000 17558 +modelidl 0 1 6.957497 0.000000 17559 +odlar 0 1 6.957497 0.000000 17560 +stabilizesw 0 1 6.957497 0.000000 17561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..e09aaab1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +find 0 111 2.197225 0.000000 111 +stuff 0 87 2.484907 0.000000 171 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +gupta 0 12 4.465908 0.000000 1241 +avenu 0 12 4.465908 0.000000 1277 +newspap 0 12 4.465908 0.000000 1280 +whereabout 0 4 5.568345 0.000000 3078 +abhinav 0 3 5.857933 0.000000 3428 +agupta 0 3 5.857933 0.000000 3429 +kendal 0 2 6.263398 0.000000 6085 +residenceoffic 0 1 6.957497 0.000000 17562 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..b643fefd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +click 0 142 1.945910 0.000000 78 +pictur 0 89 2.397895 0.000000 160 +live 0 40 3.258097 0.000000 451 +ever 0 19 4.007333 0.000000 872 +larger 0 7 5.010635 0.000000 1875 +largest 0 7 5.010635 0.000000 1858 +alain 1 2 6.263398 6.263398 6086 +pagealain 0 1 6.957497 0.000000 17563 +carnivor 0 1 6.957497 0.000000 17564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..1a0d5e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +dayton 0 119 2.079442 0.000000 104 +learn 0 86 2.484907 0.000000 170 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +state 0 76 2.564949 0.000000 207 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +neural 0 30 3.555348 0.000000 578 +departmentunivers 0 24 3.761200 0.000000 711 +sequenc 0 23 3.806662 0.000000 734 +biologi 0 15 4.248495 0.000000 1049 +train 0 14 4.317488 0.000000 1066 +edutelephon 0 10 4.653960 0.000000 1473 +purdu 0 10 4.653960 0.000000 1466 +molecular 0 7 5.010635 0.000000 1887 +jude 0 6 5.164786 0.000000 2123 +fold 0 4 5.568345 0.000000 2615 +allex 0 2 6.263398 0.000000 6087 +ismb 0 2 6.263398 0.000000 5834 +carolyn 0 2 6.263398 0.000000 6088 +studentbiotechnolog 0 1 6.957497 0.000000 17565 +traineecomput 0 1 6.957497 0.000000 17566 +shavlikinterest 0 1 6.957497 0.000000 17567 +protein 0 1 6.957497 0.000000 17568 +networkseduc 0 1 6.957497 0.000000 17569 +madisonb 0 1 6.957497 0.000000 17570 +universityb 0 1 6.957497 0.000000 17571 +mankato 0 1 6.957497 0.000000 17572 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..6b29cf6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +find 0 111 2.197225 0.000000 111 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +peopl 0 96 2.302585 0.000000 132 +associ 0 93 2.397895 0.000000 151 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +thing 0 84 2.484907 0.000000 189 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +resum 0 79 2.564949 0.000000 217 +method 0 80 2.564949 0.000000 213 +master 0 76 2.564949 0.000000 216 +optim 0 79 2.564949 0.000000 197 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +david 0 71 2.639057 0.000000 232 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +copi 0 63 2.772589 0.000000 284 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +think 0 57 2.890372 0.000000 314 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +three 0 54 2.944439 0.000000 330 +talk 0 53 2.944439 0.000000 336 +week 0 52 2.995732 0.000000 343 +advisor 0 51 2.995732 0.000000 355 +much 0 52 2.995732 0.000000 349 +friend 0 48 3.044522 0.000000 376 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +get 0 46 3.091042 0.000000 380 +featur 0 46 3.091042 0.000000 386 +better 0 45 3.135494 0.000000 401 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +linear 0 41 3.218876 0.000000 431 +live 0 40 3.258097 0.000000 451 +author 0 39 3.258097 0.000000 450 +seminar 0 38 3.295837 0.000000 470 +return 0 34 3.401197 0.000000 502 +go 0 33 3.433987 0.000000 529 +articl 0 33 3.433987 0.000000 530 +depend 0 29 3.583519 0.000000 583 +enhanc 0 26 3.688879 0.000000 644 +never 0 25 3.737670 0.000000 671 +magazin 0 24 3.761200 0.000000 704 +watch 0 21 3.912023 0.000000 789 +love 0 21 3.912023 0.000000 804 +leav 0 21 3.912023 0.000000 772 +minut 0 20 3.951244 0.000000 810 +five 0 19 4.007333 0.000000 841 +beauti 0 18 4.060443 0.000000 912 +regist 0 17 4.110874 0.000000 938 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +score 0 15 4.248495 0.000000 1017 +went 0 12 4.465908 0.000000 1279 +multiscalar 0 8 4.875197 0.000000 1783 +partner 0 8 4.875197 0.000000 1648 +parti 0 8 4.875197 0.000000 1676 +vallei 0 7 5.010635 0.000000 1959 +shot 0 7 5.010635 0.000000 1898 +yale 0 6 5.164786 0.000000 2003 +truth 0 6 5.164786 0.000000 2179 +sohi 0 6 5.164786 0.000000 2237 +cat 0 6 5.164786 0.000000 2194 +presid 0 6 5.164786 0.000000 2196 +promis 0 6 5.164786 0.000000 2037 +guri 0 5 5.347108 0.000000 2578 +girlfriend 0 5 5.347108 0.000000 2579 +everybodi 0 5 5.347108 0.000000 2517 +gui 0 5 5.347108 0.000000 2573 +kid 0 5 5.347108 0.000000 2516 +arch 0 4 5.568345 0.000000 2995 +metal 0 4 5.568345 0.000000 3079 +soul 0 4 5.568345 0.000000 2907 +drew 0 4 5.568345 0.000000 2980 +amir 1 3 5.857933 5.857933 3850 +super 0 3 5.857933 0.000000 3918 +preprocessor 0 3 5.857933 0.000000 3844 +detector 0 3 5.857933 0.000000 3745 +allevi 0 3 5.857933 0.000000 3643 +recip 0 3 5.857933 0.000000 3668 +terri 0 3 5.857933 0.000000 3264 +carbon 0 3 5.857933 0.000000 3804 +vagu 0 3 5.857933 0.000000 3393 +roth 0 2 6.263398 0.000000 6089 +out 0 2 6.263398 0.000000 6090 +barb 0 2 6.263398 0.000000 6058 +delphi 0 2 6.263398 0.000000 4192 +airport 0 2 6.263398 0.000000 5962 +curli 0 2 6.263398 0.000000 5691 +fri 0 2 6.263398 0.000000 5844 +charli 0 2 6.263398 0.000000 5905 +regress 0 2 6.263398 0.000000 4501 +weird 0 2 6.263398 0.000000 5503 +subba 0 2 6.263398 0.000000 6091 +officem 0 2 6.263398 0.000000 6092 +wierd 0 2 6.263398 0.000000 6093 +marci 0 1 6.957497 0.000000 17573 +maven 0 1 6.957497 0.000000 17574 +erin 0 1 6.957497 0.000000 17575 +occasionali 0 1 6.957497 0.000000 17576 +cvte 0 1 6.957497 0.000000 17577 +deleg 0 1 6.957497 0.000000 17578 +existencei 0 1 6.957497 0.000000 17579 +nail 0 1 6.957497 0.000000 17580 +lafollett 0 1 6.957497 0.000000 17581 +meantim 0 1 6.957497 0.000000 17582 +wacki 0 1 6.957497 0.000000 17583 +eggplant 0 1 6.957497 0.000000 17584 +daddi 0 1 6.957497 0.000000 17585 +titanium 0 1 6.957497 0.000000 17586 +screw 0 1 6.957497 0.000000 17587 +desi 0 1 6.957497 0.000000 17588 +relaford 0 1 6.957497 0.000000 17589 +mulholland 0 1 6.957497 0.000000 17590 +oxygen 0 1 6.957497 0.000000 17591 +dioxid 0 1 6.957497 0.000000 17592 +whack 0 1 6.957497 0.000000 17593 +scaryarea 0 1 6.957497 0.000000 17594 +rabid 0 1 6.957497 0.000000 17595 +interestth 0 1 6.957497 0.000000 17596 +hmmm 0 1 6.957497 0.000000 17597 +handyinformatik 0 1 6.957497 0.000000 17598 +madcat 0 1 6.957497 0.000000 17599 +sportslin 0 1 6.957497 0.000000 17600 +philli 0 1 6.957497 0.000000 17601 +ickyth 0 1 6.957497 0.000000 17602 +kemin 0 1 6.957497 0.000000 17603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..5024bb9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..5612a8dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +theori 1 111 2.197225 2.197225 127 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +present 0 91 2.397895 0.000000 145 +homepag 0 93 2.397895 0.000000 148 +search 0 95 2.397895 0.000000 155 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +comment 0 93 2.397895 0.000000 146 +activ 0 84 2.484907 0.000000 182 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +view 0 70 2.708050 0.000000 254 +order 0 69 2.708050 0.000000 249 +main 0 67 2.708050 0.000000 256 +goal 0 66 2.708050 0.000000 250 +function 0 62 2.772589 0.000000 275 +copi 0 63 2.772589 0.000000 284 +handout 0 64 2.772589 0.000000 263 +abstract 0 62 2.772589 0.000000 276 +variou 0 56 2.890372 0.000000 317 +space 0 57 2.890372 0.000000 310 +unix 0 58 2.890372 0.000000 308 +found 0 53 2.944439 0.000000 337 +tabl 0 51 2.995732 0.000000 346 +maintain 0 51 2.995732 0.000000 342 +telephon 0 50 3.044522 0.000000 373 +numer 0 49 3.044522 0.000000 369 +netscap 0 44 3.135494 0.000000 395 +futur 0 41 3.218876 0.000000 427 +vita 0 38 3.295837 0.000000 473 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +download 0 36 3.367296 0.000000 489 +short 0 36 3.367296 0.000000 499 +approxim 1 35 3.401197 3.401197 509 +word 0 34 3.401197 0.000000 508 +articl 0 33 3.433987 0.000000 530 +enhanc 0 26 3.688879 0.000000 644 +mine 0 26 3.688879 0.000000 654 +wish 0 24 3.761200 0.000000 692 +compress 0 23 3.806662 0.000000 719 +togeth 0 23 3.806662 0.000000 714 +miscellan 0 23 3.806662 0.000000 731 +recommend 0 22 3.850148 0.000000 737 +basi 0 20 3.951244 0.000000 828 +item 0 19 4.007333 0.000000 856 +offici 0 18 4.060443 0.000000 894 +otherwis 0 17 4.110874 0.000000 922 +choos 0 16 4.174387 0.000000 964 +carl 0 15 4.248495 0.000000 1024 +anonym 0 14 4.317488 0.000000 1100 +invari 0 8 4.875197 0.000000 1748 +univeristi 0 8 4.875197 0.000000 1754 +none 0 7 5.010635 0.000000 1811 +spline 0 6 5.164786 0.000000 2007 +clickher 0 5 5.347108 0.000000 2428 +shift 0 5 5.347108 0.000000 2357 +wavelet 0 4 5.568345 0.000000 2874 +usa 0 4 5.568345 0.000000 3080 +thin 0 3 5.857933 0.000000 3488 +shen 0 3 5.857933 0.000000 3370 +uncompress 0 3 5.857933 0.000000 3177 +boor 0 3 5.857933 0.000000 3482 +ofwisconsin 0 3 5.857933 0.000000 4002 +amo 1 2 6.263398 6.263398 6094 +professordepart 0 2 6.263398 0.000000 5624 +deposit 0 2 6.263398 0.000000 6095 +mailbox 0 2 6.263398 0.000000 6084 +boxsplin 0 1 6.957497 0.000000 17604 +radial 0 1 6.957497 0.000000 17605 +toscatt 0 1 6.957497 0.000000 17606 +multiquadr 0 1 6.957497 0.000000 17607 +plate 0 1 6.957497 0.000000 17608 +splinesthi 0 1 6.957497 0.000000 17609 +linksat 0 1 6.957497 0.000000 17610 +paperaffin 0 1 6.957497 0.000000 17611 +operatorof 0 1 6.957497 0.000000 17612 +zuowei 0 1 6.957497 0.000000 17613 +fromher 0 1 6.957497 0.000000 17614 +directlyfrom 0 1 6.957497 0.000000 17615 +accounther 0 1 6.957497 0.000000 17616 +articlesof 0 1 6.957497 0.000000 17617 +containspostscript 0 1 6.957497 0.000000 17618 +theapproxim 0 1 6.957497 0.000000 17619 +filesconcern 0 1 6.957497 0.000000 17620 +andpubl 0 1 6.957497 0.000000 17621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..eb09bad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +andi 1 4 5.568345 5.568345 3081 +pageandi 1 2 6.263398 6.263398 6096 +therber 1 1 6.957497 6.957497 17622 +therberoffic 1 1 6.957497 6.957497 17623 +sphone 1 1 6.957497 6.957497 17624 +andyt 1 1 6.957497 6.957497 17625 +eduzooresumebookmarksapplet 1 1 6.957497 6.957497 17626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..3acb2f42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +world 0 115 2.197225 0.000000 126 +present 0 91 2.397895 0.000000 145 +collect 0 65 2.772589 0.000000 268 +finger 0 52 2.995732 0.000000 354 +log 0 19 4.007333 0.000000 857 +classic 0 14 4.317488 0.000000 1084 +fascin 0 3 5.857933 0.000000 3948 +arvind 1 1 6.957497 6.957497 17627 +ranganathan 0 1 6.957497 0.000000 17628 +workplac 0 1 6.957497 0.000000 17629 +ranga 0 1 6.957497 0.000000 17630 +erstwhil 0 1 6.957497 0.000000 17631 +indiaworld 0 1 6.957497 0.000000 17632 +escher 0 1 6.957497 0.000000 17633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..8b553f7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +visit 0 63 2.772589 0.000000 288 +undergradu 0 54 2.944439 0.000000 338 +india 0 32 3.465736 0.000000 550 +altern 0 26 3.688879 0.000000 641 +worth 0 11 4.553877 0.000000 1294 +ashish 1 5 5.347108 5.347108 2473 +delhi 0 5 5.347108 0.000000 2530 +whereabout 0 4 5.568345 0.000000 3078 +indianinstitut 0 3 5.857933 0.000000 4003 +fantast 0 3 5.857933 0.000000 3966 +hadmi 0 2 6.263398 0.000000 6097 +canfing 0 2 6.263398 0.000000 6098 +thusoo 0 1 6.957497 0.000000 17634 +iitd 0 1 6.957497 0.000000 17635 +ashisht 0 1 6.957497 0.000000 17636 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..548a51d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +dayton 0 119 2.079442 0.000000 104 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +west 0 83 2.484907 0.000000 192 +info 0 85 2.484907 0.000000 176 +june 0 79 2.564949 0.000000 214 +view 0 70 2.708050 0.000000 254 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +juli 0 60 2.833213 0.000000 305 +finger 0 52 2.995732 0.000000 354 +usaphon 0 9 4.753590 0.000000 1600 +ashraf 0 3 5.857933 0.000000 3421 +aboulnaga 0 3 5.857933 0.000000 3426 +edueduc 0 3 5.857933 0.000000 4004 +egypt 0 2 6.263398 0.000000 4856 +desautel 0 2 6.263398 0.000000 4791 +alexandria 1 1 6.957497 6.957497 17637 +pageashraf 0 1 6.957497 0.000000 17638 +aboulnagacomput 0 1 6.957497 0.000000 17639 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..495f7d11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +seattl 0 120 2.079442 0.000000 103 +back 0 60 2.833213 0.000000 297 +appoint 0 49 3.044522 0.000000 358 +basketbal 0 12 4.465908 0.000000 1289 +tuth 0 9 4.753590 0.000000 1519 +phil 0 5 5.347108 0.000000 2419 +educurr 0 5 5.347108 0.000000 2504 +win 0 3 5.857933 0.000000 3593 +atkinson 1 2 6.263398 6.263398 4722 +ncaa 0 2 6.263398 0.000000 5908 +infooffic 0 1 6.957497 0.000000 17640 +pageucla 0 1 6.957497 0.000000 17641 +bannon 0 1 6.957497 0.000000 17642 +championship 0 1 6.957497 0.000000 17643 +researchsailinghors 0 1 6.957497 0.000000 17644 +ridingscuba 0 1 6.957497 0.000000 17645 +divingc 0 1 6.957497 0.000000 17646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..c9834e83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +number 1 130 2.079442 2.079442 97 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +question 0 91 2.397895 0.000000 141 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +proceed 0 93 2.397895 0.000000 152 +larg 0 82 2.484907 0.000000 168 +info 0 85 2.484907 0.000000 176 +exampl 0 77 2.564949 0.000000 195 +complet 0 77 2.564949 0.000000 208 +effici 0 73 2.639057 0.000000 233 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +complex 0 64 2.772589 0.000000 269 +improv 0 62 2.772589 0.000000 289 +creat 0 63 2.772589 0.000000 277 +simpl 0 60 2.833213 0.000000 298 +juli 0 60 2.833213 0.000000 305 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +california 0 46 3.091042 0.000000 388 +algebra 0 45 3.135494 0.000000 394 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +examin 0 42 3.218876 0.000000 424 +press 0 42 3.218876 0.000000 419 +theoret 0 39 3.258097 0.000000 446 +probabl 0 40 3.258097 0.000000 455 +small 0 39 3.258097 0.000000 447 +annual 0 40 3.258097 0.000000 458 +error 0 40 3.258097 0.000000 449 +vita 0 38 3.295837 0.000000 473 +random 0 34 3.401197 0.000000 511 +least 0 35 3.401197 0.000000 516 +approxim 0 35 3.401197 0.000000 509 +product 0 33 3.433987 0.000000 527 +curriculum 0 33 3.433987 0.000000 535 +given 0 32 3.465736 0.000000 538 +secur 0 30 3.555348 0.000000 577 +usual 0 28 3.610918 0.000000 608 +proc 0 26 3.688879 0.000000 649 +berkelei 0 26 3.688879 0.000000 657 +lead 0 23 3.806662 0.000000 718 +among 0 21 3.912023 0.000000 781 +similar 0 21 3.912023 0.000000 771 +eric 0 19 4.007333 0.000000 870 +prove 0 19 4.007333 0.000000 848 +automata 0 13 4.382027 0.000000 1135 +conf 0 13 4.382027 0.000000 1181 +string 0 11 4.553877 0.000000 1340 +probabilist 0 11 4.553877 0.000000 1343 +volum 0 11 4.553877 0.000000 1347 +cryptographi 0 9 4.753590 0.000000 1512 +transmiss 0 9 4.753590 0.000000 1588 +assumpt 0 9 4.753590 0.000000 1514 +eduto 0 7 5.010635 0.000000 1956 +bach 0 4 5.568345 0.000000 2708 +wit 0 3 5.857933 0.000000 4005 +euler 0 3 5.857933 0.000000 3174 +canadian 0 3 5.857933 0.000000 3508 +condon 0 3 5.857933 0.000000 3309 +prime 0 2 6.263398 0.000000 6099 +designand 0 2 6.263398 0.000000 6100 +functionof 0 2 6.263398 0.000000 5415 +algebraicalgorithm 0 1 6.957497 0.000000 17647 +solvealgebra 0 1 6.957497 0.000000 17648 +onetel 0 1 6.957497 0.000000 17649 +possiblefactor 0 1 6.957497 0.000000 17650 +intrins 0 1 6.957497 0.000000 17651 +forreli 0 1 6.957497 0.000000 17652 +iscomposit 0 1 6.957497 0.000000 17653 +auxiliarynumb 0 1 6.957497 0.000000 17654 +witnessbi 0 1 6.957497 0.000000 17655 +followingnatur 0 1 6.957497 0.000000 17656 +accurateheurist 0 1 6.957497 0.000000 17657 +allowsthi 0 1 6.957497 0.000000 17658 +cnta 0 1 6.957497 0.000000 17659 +glaser 0 1 6.957497 0.000000 17660 +tanguai 0 1 6.957497 0.000000 17661 +shallit 0 1 6.957497 0.000000 17662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..328717b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +perform 0 143 1.945910 0.000000 74 +tool 0 117 2.079442 0.000000 93 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +follow 0 92 2.397895 0.000000 143 +center 0 88 2.397895 0.000000 158 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +symposium 0 72 2.639057 0.000000 238 +undergradu 0 54 2.944439 0.000000 338 +streetmadison 0 38 3.295837 0.000000 474 +seminar 0 38 3.295837 0.000000 470 +random 0 34 3.401197 0.000000 511 +departmentunivers 0 24 3.761200 0.000000 711 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +director 0 22 3.850148 0.000000 767 +offici 0 18 4.060443 0.000000 894 +miller 0 17 4.110874 0.000000 949 +convent 0 14 4.317488 0.000000 1072 +bart 0 9 4.753590 0.000000 1559 +paradyn 0 9 4.753590 0.000000 1614 +frank 0 9 4.753590 0.000000 1568 +lloyd 0 6 5.164786 0.000000 2103 +advisori 0 6 5.164786 0.000000 2148 +barton 0 5 5.347108 0.000000 2371 +professorcomput 0 3 5.857933 0.000000 3714 +usath 0 2 6.263398 0.000000 6056 +wright 0 2 6.263398 0.000000 5177 +fuzz 0 1 6.957497 0.000000 17663 +testingteach 0 1 6.957497 0.000000 17664 +graduatesprofession 0 1 6.957497 0.000000 17665 +monona 0 1 6.957497 0.000000 17666 +terrac 0 1 6.957497 0.000000 17667 +groupperson 0 1 6.957497 0.000000 17668 +photosbart 0 1 6.957497 0.000000 17669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..5bd2102c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +wisconsin 0 169 1.791759 0.000000 54 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +look 0 107 2.197225 0.000000 115 +west 0 83 2.484907 0.000000 192 +internet 0 83 2.484907 0.000000 186 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +street 0 63 2.772589 0.000000 293 +wednesdai 0 64 2.772589 0.000000 261 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +fridai 0 44 3.135494 0.000000 390 +word 0 34 3.401197 0.000000 508 +someth 0 31 3.496508 0.000000 554 +ultim 0 17 4.110874 0.000000 943 +miron 0 14 4.317488 0.000000 1110 +readabl 0 12 4.465908 0.000000 1258 +benjamin 0 11 4.553877 0.000000 1296 +teitelbaum 0 6 5.164786 0.000000 2102 +garbag 0 6 5.164786 0.000000 1986 +hyper 0 5 5.347108 0.000000 2435 +usaben 0 1 6.957497 0.000000 17670 +edursumquinc 0 1 6.957497 0.000000 17671 +gamezillion 0 1 6.957497 0.000000 17672 +bookmarksspr 0 1 6.957497 0.000000 17673 +dbseminar 0 1 6.957497 0.000000 17674 +osseminar 0 1 6.957497 0.000000 17675 +condormeet 0 1 6.957497 0.000000 17676 +plseminar 0 1 6.957497 0.000000 17677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..734a8148 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +structur 0 106 2.197225 0.000000 105 +world 0 115 2.197225 0.000000 126 +teach 0 108 2.197225 0.000000 112 +mathemat 0 108 2.197225 0.000000 123 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +imag 0 91 2.397895 0.000000 161 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +search 0 95 2.397895 0.000000 155 +wide 0 84 2.484907 0.000000 185 +requir 0 81 2.484907 0.000000 167 +environ 0 84 2.484907 0.000000 177 +school 0 84 2.484907 0.000000 188 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +orient 0 80 2.564949 0.000000 205 +solv 0 73 2.639057 0.000000 234 +servic 0 72 2.639057 0.000000 236 +addit 0 74 2.639057 0.000000 228 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +logic 0 71 2.639057 0.000000 230 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +experi 0 64 2.772589 0.000000 283 +street 0 63 2.772589 0.000000 293 +abstract 0 62 2.772589 0.000000 276 +import 0 65 2.772589 0.000000 282 +result 0 65 2.772589 0.000000 281 +prof 0 64 2.772589 0.000000 273 +virtual 0 62 2.772589 0.000000 285 +artifici 0 63 2.772589 0.000000 280 +copi 0 63 2.772589 0.000000 284 +januari 0 62 2.772589 0.000000 264 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +colleg 0 61 2.833213 0.000000 300 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +major 0 56 2.890372 0.000000 315 +cover 0 55 2.944439 0.000000 329 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +finger 0 52 2.995732 0.000000 354 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +algebra 0 45 3.135494 0.000000 394 +vision 0 41 3.218876 0.000000 430 +examin 0 42 3.218876 0.000000 424 +http 0 41 3.218876 0.000000 420 +howev 0 41 3.218876 0.000000 422 +multipl 0 39 3.258097 0.000000 453 +error 0 40 3.258097 0.000000 449 +littl 0 39 3.258097 0.000000 454 +vita 0 38 3.295837 0.000000 473 +credit 0 38 3.295837 0.000000 460 +open 0 38 3.295837 0.000000 469 +robot 0 36 3.367296 0.000000 497 +procedur 0 36 3.367296 0.000000 488 +copyright 0 36 3.367296 0.000000 495 +concurr 0 34 3.401197 0.000000 501 +approxim 0 35 3.401197 0.000000 509 +least 0 35 3.401197 0.000000 516 +survei 0 35 3.401197 0.000000 513 +statist 0 35 3.401197 0.000000 521 +next 0 34 3.401197 0.000000 517 +taught 0 33 3.433987 0.000000 526 +curriculum 0 33 3.433987 0.000000 535 +within 0 33 3.433987 0.000000 525 +dissert 0 32 3.465736 0.000000 549 +transform 0 32 3.465736 0.000000 542 +given 0 32 3.465736 0.000000 538 +posit 0 31 3.496508 0.000000 552 +titl 0 31 3.496508 0.000000 556 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +intend 0 28 3.610918 0.000000 599 +administr 0 27 3.637586 0.000000 628 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +accur 0 25 3.737670 0.000000 680 +motion 0 24 3.761200 0.000000 699 +honor 0 23 3.806662 0.000000 729 +instead 0 22 3.850148 0.000000 756 +defin 0 22 3.850148 0.000000 746 +identifi 0 22 3.850148 0.000000 760 +navig 0 21 3.912023 0.000000 796 +prepar 0 20 3.951244 0.000000 824 +entir 0 20 3.951244 0.000000 811 +geometr 0 19 4.007333 0.000000 852 +assum 0 19 4.007333 0.000000 845 +lyco 0 19 4.007333 0.000000 871 +minim 0 18 4.060443 0.000000 887 +fortran 0 15 4.248495 0.000000 1027 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +scene 0 14 4.317488 0.000000 1114 +camera 0 14 4.317488 0.000000 1115 +primarili 0 13 4.382027 0.000000 1185 +essenti 0 13 4.382027 0.000000 1137 +charl 0 13 4.382027 0.000000 1149 +optic 0 12 4.465908 0.000000 1221 +realiti 0 12 4.465908 0.000000 1272 +pascal 0 12 4.465908 0.000000 1213 +pagewelcom 0 11 4.553877 0.000000 1344 +keyword 0 11 4.553877 0.000000 1356 +perspect 0 10 4.653960 0.000000 1437 +prior 0 10 4.653960 0.000000 1438 +observ 0 9 4.753590 0.000000 1578 +assumpt 0 9 4.753590 0.000000 1514 +minimum 0 9 4.753590 0.000000 1555 +occur 0 9 4.753590 0.000000 1572 +dyer 0 9 4.753590 0.000000 1573 +sensit 0 8 4.875197 0.000000 1726 +dimens 0 7 5.010635 0.000000 1930 +elementari 0 7 5.010635 0.000000 1825 +bestor 0 6 5.164786 0.000000 2099 +recov 0 6 5.164786 0.000000 2235 +restrict 0 6 5.164786 0.000000 2129 +constrain 0 6 5.164786 0.000000 2042 +gareth 0 5 5.347108 0.000000 2392 +rigid 0 5 5.347108 0.000000 2432 +unknown 0 5 5.347108 0.000000 2318 +consent 0 5 5.347108 0.000000 2389 +invers 0 4 5.568345 0.000000 2764 +observatori 0 4 5.568345 0.000000 3070 +projector 0 3 5.857933 0.000000 3409 +edueduc 0 3 5.857933 0.000000 4004 +coursework 0 3 5.857933 0.000000 3588 +duti 0 3 5.857933 0.000000 3317 +prereq 0 3 5.857933 0.000000 3178 +wiscinfo 0 3 5.857933 0.000000 3106 +hoofer 0 2 6.263398 0.000000 6101 +out 0 2 6.263398 0.000000 6090 +nextstep 0 2 6.263398 0.000000 6102 +zealand 0 1 6.957497 0.000000 17678 +massei 0 1 6.957497 0.000000 17679 +pagegareth 0 1 6.957497 0.000000 17680 +dpl 0 1 6.957497 0.000000 17681 +dacc 0 1 6.957497 0.000000 17682 +nois 0 1 6.957497 0.000000 17683 +tradition 0 1 6.957497 0.000000 17684 +intersect 0 1 6.957497 0.000000 17685 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..af9a2341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +area 0 144 1.945910 0.000000 80 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +west 0 83 2.484907 0.000000 192 +local 0 55 2.944439 0.000000 334 +undergradu 0 54 2.944439 0.000000 338 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +ramakrishnan 0 16 4.174387 0.000000 972 +raghu 0 12 4.465908 0.000000 1212 +kevin 0 9 4.753590 0.000000 1482 +assistantdepart 0 8 4.875197 0.000000 1784 +coral 0 5 5.347108 0.000000 2538 +beyer 1 2 6.263398 6.263398 6103 +caution 0 2 6.263398 0.000000 4754 +pagekevin 0 1 6.957497 0.000000 17686 +beyerbey 0 1 6.957497 0.000000 17687 +researchresearch 0 1 6.957497 0.000000 17688 +coursesinstruct 0 1 6.957497 0.000000 17689 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..50d34bab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +advanc 0 99 2.302585 0.000000 130 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +html 0 75 2.639057 0.000000 235 +window 0 68 2.708050 0.000000 242 +street 0 63 2.772589 0.000000 293 +back 0 60 2.833213 0.000000 297 +point 0 58 2.890372 0.000000 319 +past 0 42 3.218876 0.000000 428 +press 0 42 3.218876 0.000000 419 +cach 0 41 3.218876 0.000000 432 +microsoft 0 38 3.295837 0.000000 468 +express 0 32 3.465736 0.000000 540 +actual 0 28 3.610918 0.000000 604 +packag 0 28 3.610918 0.000000 614 +consist 0 26 3.688879 0.000000 651 +size 0 23 3.806662 0.000000 713 +miller 0 17 4.110874 0.000000 949 +todd 0 15 4.248495 0.000000 1051 +introduc 0 13 4.382027 0.000000 1139 +faster 0 11 4.553877 0.000000 1323 +bart 0 9 4.753590 0.000000 1559 +microprocessor 0 7 5.010635 0.000000 1808 +yeah 0 6 5.164786 0.000000 2195 +locomot 0 2 6.263398 0.000000 5807 +skew 0 2 6.263398 0.000000 6057 +bezenek 1 1 6.957497 6.957497 17690 +pith 0 1 6.957497 0.000000 17691 +toddm 0 1 6.957497 0.000000 17692 +cpu 0 1 6.957497 0.000000 17693 +_great 0 1 6.957497 0.000000 17694 +present_ 0 1 6.957497 0.000000 17695 +uregina 0 1 6.957497 0.000000 17696 +bayko 0 1 6.957497 0.000000 17697 +squeez 0 1 6.957497 0.000000 17698 +skateboard 0 1 6.957497 0.000000 17699 +helen 0 1 6.957497 0.000000 17700 +custer 0 1 6.957497 0.000000 17701 +_insid 0 1 6.957497 0.000000 17702 +pithi 0 1 6.957497 0.000000 17703 +abound 0 1 6.957497 0.000000 17704 +edubezenek 0 1 6.957497 0.000000 17705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..8834c908 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +hour 0 165 1.791759 0.000000 46 +distribut 0 162 1.791759 0.000000 51 +click 0 142 1.945910 0.000000 78 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +schedul 0 119 2.079442 0.000000 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +make 0 111 2.197225 0.000000 120 +site 0 106 2.197225 0.000000 119 +section 1 94 2.397895 2.397895 149 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +mondai 0 77 2.564949 0.000000 206 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +simul 0 66 2.708050 0.000000 255 +wednesdai 0 64 2.772589 0.000000 261 +back 0 60 2.833213 0.000000 297 +anoth 0 45 3.135494 0.000000 408 +review 0 42 3.218876 0.000000 425 +word 0 34 3.401197 0.000000 508 +held 0 28 3.610918 0.000000 600 +dai 0 22 3.850148 0.000000 753 +quiz 0 16 4.174387 0.000000 990 +anywai 0 15 4.248495 0.000000 1047 +speech 0 12 4.465908 0.000000 1222 +neat 0 12 4.465908 0.000000 1263 +averag 0 6 5.164786 0.000000 2098 +jpeg 0 6 5.164786 0.000000 2053 +condor 0 5 5.347108 0.000000 2577 +nathan 0 4 5.568345 0.000000 2794 +bockrath 1 3 5.857933 5.857933 3420 +viru 0 2 6.263398 0.000000 4782 +nate 0 2 6.263398 0.000000 5720 +macro 0 2 6.263398 0.000000 5686 +pageoth 0 2 6.263398 0.000000 6104 +pageback 0 1 6.957497 0.000000 17706 +oraclesend 0 1 6.957497 0.000000 17707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..01c5049b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,289 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +databas 0 122 2.079442 0.000000 86 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +site 0 106 2.197225 0.000000 119 +part 0 98 2.302585 0.000000 129 +take 0 97 2.302585 0.000000 134 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +associ 0 93 2.397895 0.000000 151 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +follow 0 92 2.397895 0.000000 143 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +thing 0 84 2.484907 0.000000 189 +member 0 84 2.484907 0.000000 165 +internet 0 83 2.484907 0.000000 186 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +school 0 84 2.484907 0.000000 188 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +line 0 75 2.639057 0.000000 231 +appli 0 71 2.639057 0.000000 226 +david 0 71 2.639057 0.000000 232 +addit 0 74 2.639057 0.000000 228 +free 0 73 2.639057 0.000000 224 +meet 0 72 2.639057 0.000000 229 +degre 0 69 2.708050 0.000000 259 +thursdai 0 70 2.708050 0.000000 241 +new 0 64 2.772589 0.000000 262 +creat 0 63 2.772589 0.000000 277 +organ 0 65 2.772589 0.000000 265 +visit 0 63 2.772589 0.000000 288 +import 0 65 2.772589 0.000000 282 +type 0 61 2.833213 0.000000 296 +plai 0 60 2.833213 0.000000 307 +locat 0 59 2.833213 0.000000 303 +unix 0 58 2.890372 0.000000 308 +local 0 55 2.944439 0.000000 334 +much 0 52 2.995732 0.000000 349 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +right 0 48 3.044522 0.000000 363 +friend 0 48 3.044522 0.000000 376 +done 0 47 3.091042 0.000000 381 +howev 0 41 3.218876 0.000000 422 +realli 0 40 3.258097 0.000000 444 +societi 0 40 3.258097 0.000000 456 +prototyp 0 38 3.295837 0.000000 463 +hand 0 37 3.332205 0.000000 475 +game 0 36 3.367296 0.000000 498 +short 0 36 3.367296 0.000000 499 +everi 0 34 3.401197 0.000000 519 +word 0 34 3.401197 0.000000 508 +either 0 35 3.401197 0.000000 506 +print 0 34 3.401197 0.000000 503 +queri 0 33 3.433987 0.000000 524 +kind 0 32 3.465736 0.000000 541 +scientist 0 31 3.496508 0.000000 560 +often 0 31 3.496508 0.000000 551 +taken 0 31 3.496508 0.000000 555 +storag 0 31 3.496508 0.000000 553 +someth 0 31 3.496508 0.000000 554 +except 0 28 3.610918 0.000000 607 +quit 0 27 3.637586 0.000000 633 +administr 0 27 3.637586 0.000000 628 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +rather 0 26 3.688879 0.000000 642 +enjoi 0 26 3.688879 0.000000 660 +experiment 0 26 3.688879 0.000000 645 +although 0 25 3.737670 0.000000 667 +wai 0 25 3.737670 0.000000 662 +notic 0 25 3.737670 0.000000 675 +task 0 25 3.737670 0.000000 678 +store 0 24 3.761200 0.000000 693 +interpret 0 24 3.761200 0.000000 686 +consult 0 24 3.761200 0.000000 687 +alwai 0 24 3.761200 0.000000 691 +methodolog 0 23 3.806662 0.000000 733 +try 0 22 3.850148 0.000000 764 +defin 0 22 3.850148 0.000000 746 +william 0 22 3.850148 0.000000 765 +util 0 21 3.912023 0.000000 774 +voic 0 21 3.912023 0.000000 806 +tell 0 21 3.912023 0.000000 777 +wonder 0 20 3.951244 0.000000 815 +sure 0 20 3.951244 0.000000 813 +kernel 0 20 3.951244 0.000000 825 +longer 0 20 3.951244 0.000000 816 +entir 0 20 3.951244 0.000000 811 +els 0 19 4.007333 0.000000 843 +five 0 19 4.007333 0.000000 841 +histori 0 19 4.007333 0.000000 853 +along 0 18 4.060443 0.000000 878 +thoma 0 18 4.060443 0.000000 901 +seem 0 18 4.060443 0.000000 899 +whole 0 17 4.110874 0.000000 940 +anyth 0 16 4.174387 0.000000 998 +across 0 16 4.174387 0.000000 974 +enough 0 15 4.248495 0.000000 1040 +purchas 0 15 4.248495 0.000000 1030 +drive 0 15 4.248495 0.000000 1052 +goe 0 15 4.248495 0.000000 1044 +draw 0 14 4.317488 0.000000 1086 +comic 0 14 4.317488 0.000000 1103 +role 0 14 4.317488 0.000000 1101 +becam 0 14 4.317488 0.000000 1117 +care 0 13 4.382027 0.000000 1177 +everyon 0 13 4.382027 0.000000 1148 +forth 0 13 4.382027 0.000000 1186 +everyth 0 13 4.382027 0.000000 1169 +dewitt 0 12 4.465908 0.000000 1270 +reader 0 12 4.465908 0.000000 1246 +usenix 0 12 4.465908 0.000000 1240 +shore 0 11 4.553877 0.000000 1377 +road 0 11 4.553877 0.000000 1374 +lake 0 11 4.553877 0.000000 1373 +night 0 11 4.553877 0.000000 1319 +thecomput 0 10 4.653960 0.000000 1408 +drink 0 9 4.753590 0.000000 1607 +occur 0 9 4.753590 0.000000 1572 +departmentof 0 9 4.753590 0.000000 1539 +architect 0 8 4.875197 0.000000 1624 +job 0 8 4.875197 0.000000 1702 +paradis 0 8 4.875197 0.000000 1782 +port 0 8 4.875197 0.000000 1766 +burger 0 7 5.010635 0.000000 1889 +bore 0 7 5.010635 0.000000 1948 +usenet 0 7 5.010635 0.000000 1839 +throughout 0 7 5.010635 0.000000 1871 +parent 0 6 5.164786 0.000000 2204 +beer 0 6 5.164786 0.000000 2216 +sleep 0 6 5.164786 0.000000 2211 +relax 0 6 5.164786 0.000000 2120 +fiction 0 6 5.164786 0.000000 2217 +railroad 0 6 5.164786 0.000000 2161 +put 0 6 5.164786 0.000000 2017 +gate 0 6 5.164786 0.000000 2182 +famou 0 6 5.164786 0.000000 2185 +geograph 0 6 5.164786 0.000000 2236 +whatev 0 6 5.164786 0.000000 2097 +benefit 0 6 5.164786 0.000000 2213 +divers 0 6 5.164786 0.000000 2232 +semi 0 5 5.347108 0.000000 2510 +east 0 5 5.347108 0.000000 2472 +matur 0 5 5.347108 0.000000 2269 +advic 0 5 5.347108 0.000000 2509 +aircraft 0 4 5.568345 0.000000 2872 +moon 0 4 5.568345 0.000000 2991 +haven 0 4 5.568345 0.000000 3037 +hacker 0 3 5.857933 0.000000 3996 +tiger 0 3 5.857933 0.000000 3897 +roll 0 3 5.857933 0.000000 3723 +pai 0 3 5.857933 0.000000 3672 +tremend 0 3 5.857933 0.000000 3453 +insan 0 3 5.857933 0.000000 4006 +beat 0 3 5.857933 0.000000 3840 +gamma 0 3 5.857933 0.000000 3219 +workin 0 3 5.857933 0.000000 3938 +agre 0 3 5.857933 0.000000 4007 +owner 0 3 5.857933 0.000000 3531 +pilot 0 3 5.857933 0.000000 4008 +acquaint 0 3 5.857933 0.000000 3468 +weekend 0 3 5.857933 0.000000 3357 +timeoper 0 2 6.263398 0.000000 4363 +woodwork 0 2 6.263398 0.000000 5463 +brew 0 2 6.263398 0.000000 5988 +disagre 0 2 6.263398 0.000000 6105 +pursuit 0 2 6.263398 0.000000 6048 +fly 0 2 6.263398 0.000000 5937 +stripe 0 2 6.263398 0.000000 6106 +creatur 0 2 6.263398 0.000000 6107 +leap 0 2 6.263398 0.000000 5654 +tovisit 0 2 6.263398 0.000000 4686 +that 0 2 6.263398 0.000000 5111 +differentarchitectur 0 2 6.263398 0.000000 6051 +hord 0 2 6.263398 0.000000 5917 +slowli 0 2 6.263398 0.000000 5363 +pagelast 0 2 6.263398 0.000000 5793 +bolo 0 1 6.957497 0.000000 17708 +uwvax 0 1 6.957497 0.000000 17709 +josef 0 1 6.957497 0.000000 17710 +uucp 0 1 6.957497 0.000000 17711 +essen 0 1 6.957497 0.000000 17712 +hau 0 1 6.957497 0.000000 17713 +bolobologreet 0 1 6.957497 0.000000 17714 +christen 0 1 6.957497 0.000000 17715 +mebolo 0 1 6.957497 0.000000 17716 +bestexplan 0 1 6.957497 0.000000 17717 +bywhat 0 1 6.957497 0.000000 17718 +acomput 0 1 6.957497 0.000000 17719 +shudder 0 1 6.957497 0.000000 17720 +newoper 0 1 6.957497 0.000000 17721 +sameto 0 1 6.957497 0.000000 17722 +myroomm 0 1 6.957497 0.000000 17723 +sublim 0 1 6.957497 0.000000 17724 +thetig 0 1 6.957497 0.000000 17725 +blake 0 1 6.957497 0.000000 17726 +poemtyg 0 1 6.957497 0.000000 17727 +tyger 0 1 6.957497 0.000000 17728 +againin 0 1 6.957497 0.000000 17729 +ahous 0 1 6.957497 0.000000 17730 +isjosef 0 1 6.957497 0.000000 17731 +roadmonona 0 1 6.957497 0.000000 17732 +workwork 0 1 6.957497 0.000000 17733 +banana 0 1 6.957497 0.000000 17734 +grung 0 1 6.957497 0.000000 17735 +perhapssom 0 1 6.957497 0.000000 17736 +othermonth 0 1 6.957497 0.000000 17737 +intosubmiss 0 1 6.957497 0.000000 17738 +andstar 0 1 6.957497 0.000000 17739 +fordav 0 1 6.957497 0.000000 17740 +wiss 0 1 6.957497 0.000000 17741 +themadison 0 1 6.957497 0.000000 17742 +campusof 0 1 6.957497 0.000000 17743 +peninsula 0 1 6.957497 0.000000 17744 +technicalexpertis 0 1 6.957497 0.000000 17745 +newsystem 0 1 6.957497 0.000000 17746 +reviv 0 1 6.957497 0.000000 17747 +oddbal 0 1 6.957497 0.000000 17748 +tasksar 0 1 6.957497 0.000000 17749 +serverbut 0 1 6.957497 0.000000 17750 +mostlyempti 0 1 6.957497 0.000000 17751 +activitiesuwvaxi 0 1 6.957497 0.000000 17752 +svolunt 0 1 6.957497 0.000000 17753 +organizationsi 0 1 6.957497 0.000000 17754 +oftenhav 0 1 6.957497 0.000000 17755 +usersof 0 1 6.957497 0.000000 17756 +aopa 0 1 6.957497 0.000000 17757 +blitz 0 1 6.957497 0.000000 17758 +drinkingwhen 0 1 6.957497 0.000000 17759 +friendsand 0 1 6.957497 0.000000 17760 +loftili 0 1 6.957497 0.000000 17761 +labelledblitz 0 1 6.957497 0.000000 17762 +ofoctoberfest 0 1 6.957497 0.000000 17763 +chud 0 1 6.957497 0.000000 17764 +accumulateda 0 1 6.957497 0.000000 17765 +whatnotof 0 1 6.957497 0.000000 17766 +charad 0 1 6.957497 0.000000 17767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..421287fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +network 0 168 1.791759 0.000000 61 +model 0 145 1.945910 0.000000 69 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +would 0 67 2.708050 0.000000 251 +much 0 52 2.995732 0.000000 349 +possibl 0 47 3.091042 0.000000 378 +probabl 0 40 3.258097 0.000000 455 +intro 0 17 4.110874 0.000000 915 +brad 0 12 4.465908 0.000000 1264 +bore 0 7 5.010635 0.000000 1948 +athlet 0 7 5.010635 0.000000 1933 +altavista 0 6 5.164786 0.000000 2222 +aim 0 5 5.347108 0.000000 2477 +jazz 0 5 5.347108 0.000000 2527 +thayer 0 3 5.857933 0.000000 3441 +thec 0 3 5.857933 0.000000 3132 +badger 0 3 5.857933 0.000000 3502 +packer 0 3 5.857933 0.000000 3728 +foolish 0 2 6.263398 0.000000 6108 +pepper 0 2 6.263398 0.000000 6013 +pagesom 0 2 6.263398 0.000000 6109 +adress 0 2 6.263398 0.000000 5168 +pageuw 0 2 6.263398 0.000000 6021 +neglect 0 1 6.957497 0.000000 17768 +seminaranywai 0 1 6.957497 0.000000 17769 +beaucoup 0 1 6.957497 0.000000 17770 +boir 0 1 6.957497 0.000000 17771 +enginefind 0 1 6.957497 0.000000 17772 +wideth 0 1 6.957497 0.000000 17773 +duan 0 1 6.957497 0.000000 17774 +mclaughlin 0 1 6.957497 0.000000 17775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..edc6b8cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +architectur 0 139 1.945910 0.000000 77 +file 0 132 1.945910 0.000000 70 +dayton 0 119 2.079442 0.000000 104 +intern 0 108 2.197225 0.000000 128 +access 0 102 2.302585 0.000000 136 +west 0 83 2.484907 0.000000 192 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +septemb 0 65 2.772589 0.000000 274 +pointer 0 48 3.044522 0.000000 368 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +arrai 0 27 3.637586 0.000000 627 +detect 0 26 3.688879 0.000000 646 +scott 0 18 4.060443 0.000000 884 +regist 0 17 4.110874 0.000000 938 +mellon 0 13 4.382027 0.000000 1179 +carnegi 0 12 4.465908 0.000000 1260 +multiscalar 0 8 4.875197 0.000000 1783 +sohi 0 6 5.164786 0.000000 2237 +microarchitectur 0 6 5.164786 0.000000 2238 +pagescott 0 4 5.568345 0.000000 2978 +breach 1 3 5.857933 5.857933 4009 +recreat 0 3 5.857933 0.000000 3990 +anatomi 0 3 5.857933 0.000000 4010 +vijaykumar 0 3 5.857933 0.000000 4011 +gurindar 0 2 6.263398 0.000000 6110 +usatel 0 2 6.263398 0.000000 6111 +educationph 0 2 6.263398 0.000000 6112 +interestscomput 0 2 6.263398 0.000000 6113 +addresseseducationresearch 0 1 6.957497 0.000000 17776 +associatesaddressesscott 0 1 6.957497 0.000000 17777 +breachdepart 0 1 6.957497 0.000000 17778 +advisorguri 0 1 6.957497 0.000000 17779 +sohiresearch 0 1 6.957497 0.000000 17780 +architecturemultiscalarpublicationsmultiscalar 0 1 6.957497 0.000000 17781 +processorsgurindar 0 1 6.957497 0.000000 17782 +vijaykumarnd 0 1 6.957497 0.000000 17783 +processorscott 0 1 6.957497 0.000000 17784 +sohith 0 1 6.957497 0.000000 17785 +errorstodd 0 1 6.957497 0.000000 17786 +sohiconfer 0 1 6.957497 0.000000 17787 +recreationwingsbeersquidtvassociatestodd 0 1 6.957497 0.000000 17788 +austindoug 0 1 6.957497 0.000000 17789 +burgerbabak 0 1 6.957497 0.000000 17790 +falsafialain 0 1 6.957497 0.000000 17791 +kagit 0 1 6.957497 0.000000 17792 +vijaykumarlast 0 1 6.957497 0.000000 17793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..aebfa164 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +fall 0 181 1.609438 0.000000 40 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +make 0 111 2.197225 0.000000 120 +sinc 0 90 2.397895 0.000000 159 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +name 0 72 2.639057 0.000000 220 +differ 0 66 2.708050 0.000000 253 +get 0 46 3.091042 0.000000 380 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +bookmark 0 26 3.688879 0.000000 639 +notic 0 25 3.737670 0.000000 675 +brief 0 16 4.174387 0.000000 1001 +hobbi 0 16 4.174387 0.000000 1009 +zhang 0 16 4.174387 0.000000 980 +becam 0 14 4.317488 0.000000 1117 +unfortun 0 13 4.382027 0.000000 1170 +stai 0 12 4.465908 0.000000 1215 +said 0 9 4.753590 0.000000 1571 +poor 0 8 4.875197 0.000000 1736 +perhap 0 8 4.875197 0.000000 1693 +smooth 0 7 5.010635 0.000000 1855 +whenev 0 7 5.010635 0.000000 1883 +wouldn 0 7 5.010635 0.000000 1970 +smile 0 7 5.010635 0.000000 1807 +pool 0 6 5.164786 0.000000 2225 +suni 0 5 5.347108 0.000000 2452 +skin 0 4 5.568345 0.000000 2840 +temporarili 0 3 5.857933 0.000000 3692 +crack 0 3 5.857933 0.000000 3435 +roll 0 3 5.857933 0.000000 3723 +isaac 0 3 5.857933 0.000000 3855 +albani 0 2 6.263398 0.000000 4892 +tragic 0 2 6.263398 0.000000 6114 +theblack 0 2 6.263398 0.000000 5869 +eggleston 0 2 6.263398 0.000000 4581 +bleed 0 1 6.957497 0.000000 17794 +nontrivi 0 1 6.957497 0.000000 17795 +waysher 0 1 6.957497 0.000000 17796 +underst 0 1 6.957497 0.000000 17797 +unadorn 0 1 6.957497 0.000000 17798 +pizza 0 1 6.957497 0.000000 17799 +stinkin 0 1 6.957497 0.000000 17800 +myclass 0 1 6.957497 0.000000 17801 +hypersensit 0 1 6.957497 0.000000 17802 +rockjock 0 1 6.957497 0.000000 17803 +cretin 0 1 6.957497 0.000000 17804 +brood 0 1 6.957497 0.000000 17805 +glare 0 1 6.957497 0.000000 17806 +clenchesfist 0 1 6.957497 0.000000 17807 +knuckl 0 1 6.957497 0.000000 17808 +flightyfemm 0 1 6.957497 0.000000 17809 +razz 0 1 6.957497 0.000000 17810 +asskick 0 1 6.957497 0.000000 17811 +thirdgrad 0 1 6.957497 0.000000 17812 +hardbodi 0 1 6.957497 0.000000 17813 +leatherboi 0 1 6.957497 0.000000 17814 +leer 0 1 6.957497 0.000000 17815 +atm 0 1 6.957497 0.000000 17816 +todayi 0 1 6.957497 0.000000 17817 +giggl 0 1 6.957497 0.000000 17818 +aprostitut 0 1 6.957497 0.000000 17819 +bigotri 0 1 6.957497 0.000000 17820 +pedagodi 0 1 6.957497 0.000000 17821 +goat 0 1 6.957497 0.000000 17822 +refus 0 1 6.957497 0.000000 17823 +claw 0 1 6.957497 0.000000 17824 +sssuuuhhh 0 1 6.957497 0.000000 17825 +mmuuuhhhh 0 1 6.957497 0.000000 17826 +dddduuuuuhhhhh 0 1 6.957497 0.000000 17827 +mmmmuuuhhhh 0 1 6.957497 0.000000 17828 +maaaahhhjaaaaaahhhhh 0 1 6.957497 0.000000 17829 +fffuuuhhhhh 0 1 6.957497 0.000000 17830 +yyyyyyyuuuuuhhhhh 0 1 6.957497 0.000000 17831 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 0 1 6.957497 0.000000 17832 +uuuhhh 0 1 6.957497 0.000000 17833 +uuummmm 0 1 6.957497 0.000000 17834 +uuuhhhh 0 1 6.957497 0.000000 17835 +wwwwwhhhhuuuuuhhhhh 0 1 6.957497 0.000000 17836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..54616208 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,155 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +file 0 132 1.945910 0.000000 70 +perform 0 143 1.945910 0.000000 74 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +architectur 0 139 1.945910 0.000000 77 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +control 0 82 2.484907 0.000000 164 +educ 0 86 2.484907 0.000000 191 +resourc 0 81 2.484907 0.000000 172 +west 0 83 2.484907 0.000000 192 +novemb 0 81 2.484907 0.000000 179 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +good 0 77 2.564949 0.000000 200 +summari 0 73 2.639057 0.000000 237 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +integr 0 67 2.708050 0.000000 245 +simul 0 66 2.708050 0.000000 255 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +thesi 0 57 2.890372 0.000000 327 +summer 0 56 2.890372 0.000000 311 +talk 0 53 2.944439 0.000000 336 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +physic 0 47 3.091042 0.000000 377 +cach 0 41 3.218876 0.000000 432 +streetmadison 0 38 3.295837 0.000000 474 +slide 0 38 3.295837 0.000000 467 +prototyp 0 38 3.295837 0.000000 463 +sciencesunivers 0 37 3.332205 0.000000 486 +china 0 37 3.332205 0.000000 487 +respons 0 37 3.332205 0.000000 476 +tech 0 35 3.401197 0.000000 515 +global 0 34 3.401197 0.000000 520 +john 0 33 3.433987 0.000000 532 +extend 0 32 3.465736 0.000000 539 +storag 0 31 3.496508 0.000000 553 +focus 0 29 3.583519 0.000000 584 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +disk 0 22 3.850148 0.000000 747 +alloc 0 20 3.951244 0.000000 821 +kernel 0 20 3.951244 0.000000 825 +department 0 20 3.951244 0.000000 839 +beij 0 19 4.007333 0.000000 876 +princeton 0 15 4.248495 0.000000 1042 +eduphon 0 15 4.248495 0.000000 1060 +decid 0 14 4.317488 0.000000 1075 +karlin 0 13 4.382027 0.000000 1176 +sigmetr 0 13 4.382027 0.000000 1173 +tsinghua 0 13 4.382027 0.000000 1195 +anna 0 12 4.465908 0.000000 1292 +usenix 0 12 4.465908 0.000000 1240 +isca 0 11 4.553877 0.000000 1354 +osdi 0 9 4.753590 0.000000 1534 +significantli 0 9 4.753590 0.000000 1508 +uniprocessor 0 8 4.875197 0.000000 1696 +replac 0 8 4.875197 0.000000 1668 +prefetch 0 6 5.164786 0.000000 2039 +edward 0 6 5.164786 0.000000 2050 +sciencedepart 0 6 5.164786 0.000000 2172 +carefulli 0 6 5.164786 0.000000 2045 +chosen 0 6 5.164786 0.000000 1984 +toc 0 5 5.347108 0.000000 2562 +summarymi 0 5 5.347108 0.000000 2580 +havedevelop 0 4 5.568345 0.000000 2681 +felten 0 3 5.857933 0.000000 3925 +cachingtraci 0 3 5.857933 0.000000 3923 +kimbrel 0 3 5.857933 0.000000 3924 +shorter 0 3 5.857933 0.000000 3998 +raid 0 3 5.857933 0.000000 4012 +aggress 0 3 5.857933 0.000000 3240 +wilk 0 2 6.263398 0.000000 4548 +andpostscript 0 2 6.263398 0.000000 5696 +andparallel 0 2 6.263398 0.000000 6014 +princetonunivers 0 1 6.957497 0.000000 17837 +usacao 0 1 6.957497 0.000000 17838 +cachingacf 0 1 6.957497 0.000000 17839 +tracesrec 0 1 6.957497 0.000000 17840 +papersintegr 0 1 6.957497 0.000000 17841 +schedulingpei 0 1 6.957497 0.000000 17842 +strategiespei 0 1 6.957497 0.000000 17843 +peform 0 1 6.957497 0.000000 17844 +tickertaip 0 1 6.957497 0.000000 17845 +swee 0 1 6.957497 0.000000 17846 +boon 0 1 6.957497 0.000000 17847 +shivakumar 0 1 6.957497 0.000000 17848 +venkataraman 0 1 6.957497 0.000000 17849 +talksslid 0 1 6.957497 0.000000 17850 +improvefil 0 1 6.957497 0.000000 17851 +filecach 0 1 6.957497 0.000000 17852 +individualappl 0 1 6.957497 0.000000 17853 +useit 0 1 6.957497 0.000000 17854 +fairglob 0 1 6.957497 0.000000 17855 +cachereplac 0 1 6.957497 0.000000 17856 +implementationon 0 1 6.957497 0.000000 17857 +demonstratedthat 0 1 6.957497 0.000000 17858 +informationcan 0 1 6.957497 0.000000 17859 +amdevelop 0 1 6.957497 0.000000 17860 +diskarrai 0 1 6.957497 0.000000 17861 +managementproblem 0 1 6.957497 0.000000 17862 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..e0cb83f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +data 0 170 1.791759 0.000000 49 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +austin 0 168 1.791759 0.000000 63 +algorithm 0 162 1.791759 0.000000 57 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +memori 0 101 2.302585 0.000000 139 +center 0 88 2.397895 0.000000 158 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +build 0 85 2.484907 0.000000 184 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +environ 0 84 2.484907 0.000000 177 +academ 0 82 2.484907 0.000000 178 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +come 0 78 2.564949 0.000000 202 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +involv 0 71 2.639057 0.000000 227 +workshop 0 71 2.639057 0.000000 239 +appli 0 71 2.639057 0.000000 226 +meet 0 72 2.639057 0.000000 229 +goal 0 66 2.708050 0.000000 250 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +experi 0 64 2.772589 0.000000 283 +evalu 0 64 2.772589 0.000000 266 +complex 0 64 2.772589 0.000000 269 +septemb 0 65 2.772589 0.000000 274 +street 0 63 2.772589 0.000000 293 +march 0 61 2.833213 0.000000 295 +share 0 59 2.833213 0.000000 304 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +content 0 59 2.833213 0.000000 302 +unix 0 58 2.890372 0.000000 308 +browser 0 56 2.890372 0.000000 313 +index 0 56 2.890372 0.000000 309 +publish 0 57 2.890372 0.000000 326 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +move 0 47 3.091042 0.000000 382 +past 0 42 3.218876 0.000000 428 +autom 0 41 3.218876 0.000000 434 +cach 0 41 3.218876 0.000000 432 +transact 0 39 3.258097 0.000000 438 +live 0 40 3.258097 0.000000 451 +multipl 0 39 3.258097 0.000000 453 +join 0 39 3.258097 0.000000 457 +industri 0 38 3.295837 0.000000 464 +staff 0 36 3.367296 0.000000 490 +multi 0 36 3.367296 0.000000 493 +next 0 34 3.401197 0.000000 517 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +storag 0 31 3.496508 0.000000 553 +specifi 0 30 3.555348 0.000000 568 +focus 0 29 3.583519 0.000000 584 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +manipul 0 27 3.637586 0.000000 624 +though 0 27 3.637586 0.000000 622 +proc 0 26 3.688879 0.000000 649 +effort 0 26 3.688879 0.000000 652 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +spent 0 25 3.737670 0.000000 676 +toward 0 25 3.737670 0.000000 668 +primari 0 25 3.737670 0.000000 669 +accur 0 25 3.737670 0.000000 680 +client 0 25 3.737670 0.000000 679 +mike 0 24 3.761200 0.000000 703 +scalabl 0 24 3.761200 0.000000 705 +known 0 24 3.761200 0.000000 702 +initi 0 23 3.806662 0.000000 717 +william 0 22 3.850148 0.000000 765 +varieti 0 22 3.850148 0.000000 740 +leav 0 21 3.912023 0.000000 772 +fine 0 20 3.951244 0.000000 822 +sigmod 0 19 4.007333 0.000000 877 +benchmark 0 19 4.007333 0.000000 859 +thoma 0 18 4.060443 0.000000 901 +statu 0 18 4.060443 0.000000 885 +repositori 0 17 4.110874 0.000000 932 +white 0 17 4.110874 0.000000 951 +jose 0 16 4.174387 0.000000 976 +upon 0 16 4.174387 0.000000 978 +taiwan 0 16 4.174387 0.000000 1006 +brown 0 16 4.174387 0.000000 977 +cambridg 0 16 4.174387 0.000000 1008 +livni 0 15 4.248495 0.000000 1053 +hybrid 0 15 4.248495 0.000000 1057 +heterogen 0 14 4.317488 0.000000 1090 +conf 0 13 4.382027 0.000000 1181 +dbm 0 13 4.382027 0.000000 1136 +forth 0 13 4.382027 0.000000 1186 +signific 0 13 4.382027 0.000000 1125 +front 0 13 4.382027 0.000000 1154 +sigmetr 0 13 4.382027 0.000000 1173 +workload 0 12 4.465908 0.000000 1210 +dewitt 0 12 4.465908 0.000000 1270 +tune 0 12 4.465908 0.000000 1227 +franc 0 12 4.465908 0.000000 1276 +shore 0 11 4.553877 0.000000 1377 +persist 0 11 4.553877 0.000000 1367 +road 0 11 4.553877 0.000000 1374 +franklin 0 10 4.653960 0.000000 1436 +naughton 0 10 4.653960 0.000000 1450 +resid 0 10 4.653960 0.000000 1461 +grain 0 10 4.653960 0.000000 1448 +rel 0 9 4.753590 0.000000 1487 +vernon 0 9 4.753590 0.000000 1556 +lock 0 9 4.753590 0.000000 1551 +morgan 0 9 4.753590 0.000000 1484 +carei 0 8 4.875197 0.000000 1781 +replac 0 8 4.875197 0.000000 1668 +ride 0 8 4.875197 0.000000 1741 +solomon 0 8 4.875197 0.000000 1716 +hash 0 8 4.875197 0.000000 1618 +portland 0 7 5.010635 0.000000 1878 +eduresearch 0 6 5.164786 0.000000 2205 +whichi 0 6 5.164786 0.000000 2056 +academia 0 6 5.164786 0.000000 2036 +tobe 0 6 5.164786 0.000000 1995 +oopsla 0 6 5.164786 0.000000 2221 +srinivasan 0 6 5.164786 0.000000 2175 +patel 0 6 5.164786 0.000000 2154 +almaden 0 5 5.347108 0.000000 2511 +minneapoli 0 5 5.347108 0.000000 2480 +fraction 0 5 5.347108 0.000000 2259 +ifip 0 5 5.347108 0.000000 2459 +tsatalo 0 5 5.347108 0.000000 2581 +england 0 5 5.347108 0.000000 2557 +kaufmann 0 5 5.347108 0.000000 2254 +harri 0 4 5.568345 0.000000 3034 +exodu 0 4 5.568345 0.000000 3075 +sabbat 0 4 5.568345 0.000000 2824 +taipei 0 4 5.568345 0.000000 2926 +chile 0 4 5.568345 0.000000 3082 +mcauliff 0 4 5.568345 0.000000 3083 +zwill 0 4 5.568345 0.000000 3076 +nashvil 0 4 5.568345 0.000000 2867 +andp 0 4 5.568345 0.000000 2811 +twelv 0 3 5.857933 0.000000 3899 +codi 0 3 5.857933 0.000000 3940 +schwarz 0 3 5.857933 0.000000 3986 +andm 0 3 5.857933 0.000000 3901 +tradeoff 0 3 5.857933 0.000000 3387 +forobject 0 3 5.857933 0.000000 3965 +oodb 0 3 5.857933 0.000000 3954 +switzerland 0 3 5.857933 0.000000 3551 +santiago 0 3 5.857933 0.000000 4013 +schuh 0 3 5.857933 0.000000 4014 +pang 0 3 5.857933 0.000000 3509 +revisit 0 3 5.857933 0.000000 3915 +haa 0 2 6.263398 0.000000 6115 +niblack 0 2 6.263398 0.000000 4364 +oodbm 0 2 6.263398 0.000000 6083 +interestsdatabas 0 2 6.263398 0.000000 6116 +theexodu 0 2 6.263398 0.000000 6076 +aimedat 0 2 6.263398 0.000000 6117 +researchgroup 0 2 6.263398 0.000000 5588 +homogen 0 2 6.263398 0.000000 4821 +roth 0 2 6.263398 0.000000 6089 +lausann 0 2 6.263398 0.000000 4955 +and 0 2 6.263398 0.000000 5241 +dataengin 0 2 6.263398 0.000000 6118 +zaharioudaki 0 2 6.263398 0.000000 6119 +modelingof 0 2 6.263398 0.000000 5734 +indistribut 0 2 6.263398 0.000000 4257 +garlic 0 1 6.957497 0.000000 17863 +arya 0 1 6.957497 0.000000 17864 +fagin 0 1 6.957497 0.000000 17865 +flickner 0 1 6.957497 0.000000 17866 +petkov 0 1 6.957497 0.000000 17867 +wimmer 0 1 6.957497 0.000000 17868 +careymichael 0 1 6.957497 0.000000 17869 +careyprofessor 0 1 6.957497 0.000000 17870 +performanceand 0 1 6.957497 0.000000 17871 +topicsof 0 1 6.957497 0.000000 17872 +algorithmsrel 0 1 6.957497 0.000000 17873 +userdatabas 0 1 6.957497 0.000000 17874 +persistentobject 0 1 6.957497 0.000000 17875 +objectmanag 0 1 6.957497 0.000000 17876 +applicationssuch 0 1 6.957497 0.000000 17877 +greatyear 0 1 6.957497 0.000000 17878 +tackl 0 1 6.957497 0.000000 17879 +anddiffer 0 1 6.957497 0.000000 17880 +thesourc 0 1 6.957497 0.000000 17881 +projectther 0 1 6.957497 0.000000 17882 +multimediainform 0 1 6.957497 0.000000 17883 +objectdatabas 0 1 6.957497 0.000000 17884 +continuedto 0 1 6.957497 0.000000 17885 +aqueri 0 1 6.957497 0.000000 17886 +pesto 0 1 6.957497 0.000000 17887 +thegarl 0 1 6.957497 0.000000 17888 +kiernan 0 1 6.957497 0.000000 17889 +orientedprogram 0 1 6.957497 0.000000 17890 +tork 0 1 6.957497 0.000000 17891 +visualdatabas 0 1 6.957497 0.000000 17892 +garlicapproach 0 1 6.957497 0.000000 17893 +luniewski 0 1 6.957497 0.000000 17894 +withd 0 1 6.957497 0.000000 17895 +kant 0 1 6.957497 0.000000 17896 +onobject 0 1 6.957497 0.000000 17897 +mehta 0 1 6.957497 0.000000 17898 +thint 0 1 6.957497 0.000000 17899 +smrc 0 1 6.957497 0.000000 17900 +withb 0 1 6.957497 0.000000 17901 +reinwald 0 1 6.957497 0.000000 17902 +desslock 0 1 6.957497 0.000000 17903 +lehman 0 1 6.957497 0.000000 17904 +pirahesh 0 1 6.957497 0.000000 17905 +tarascon 0 1 6.957497 0.000000 17906 +provenc 0 1 6.957497 0.000000 17907 +sigmodint 0 1 6.957497 0.000000 17908 +managementof 0 1 6.957497 0.000000 17909 +multivers 0 1 6.957497 0.000000 17910 +bober 0 1 6.957497 0.000000 17911 +oszu 0 1 6.957497 0.000000 17912 +dayal 0 1 6.957497 0.000000 17913 +valduriez 0 1 6.957497 0.000000 17914 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..842218ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +structur 0 106 2.197225 0.000000 105 +west 0 83 2.484907 0.000000 192 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +fridai 0 44 3.135494 0.000000 390 +streetmadison 0 38 3.295837 0.000000 474 +edutelephon 0 10 4.653960 0.000000 1473 +chin 0 5 5.347108 0.000000 2408 +tang 0 5 5.347108 0.000000 2409 +bldg 0 4 5.568345 0.000000 2983 +biochemistri 0 3 5.857933 0.000000 3513 +cchin 0 2 6.263398 0.000000 4691 +pagechin 0 1 6.957497 0.000000 17915 +tanggradu 0 1 6.957497 0.000000 17916 +ameduc 0 1 6.957497 0.000000 17917 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..3c9ab602 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +come 0 78 2.564949 0.000000 202 +summari 0 73 2.639057 0.000000 237 +york 0 41 3.218876 0.000000 435 +movi 0 40 3.258097 0.000000 459 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +soon 0 36 3.367296 0.000000 494 +miscellan 0 23 3.806662 0.000000 731 +log 0 19 4.007333 0.000000 857 +usaphon 0 9 4.753590 0.000000 1600 +assistantdepart 0 8 4.875197 0.000000 1784 +cultur 0 7 5.010635 0.000000 1951 +chandra 1 6 5.164786 5.164786 2091 +altavista 0 6 5.164786 0.000000 2222 +satish 0 4 5.568345 0.000000 2833 +wodehous 0 2 6.263398 0.000000 4990 +italian 0 2 6.263398 0.000000 5993 +nostalgia 0 1 6.957497 0.000000 17918 +linksclick 0 1 6.957497 0.000000 17919 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..bc6f675c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +state 0 76 2.564949 0.000000 207 +messag 0 76 2.564949 0.000000 212 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +integr 0 67 2.708050 0.000000 245 +august 0 66 2.708050 0.000000 257 +share 0 59 2.833213 0.000000 304 +automat 0 61 2.833213 0.000000 306 +suggest 0 53 2.944439 0.000000 331 +visual 0 48 3.044522 0.000000 372 +physic 0 47 3.091042 0.000000 377 +electron 0 47 3.091042 0.000000 379 +examin 0 42 3.218876 0.000000 424 +movi 0 40 3.258097 0.000000 459 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 0 35 3.401197 0.000000 507 +tech 0 35 3.401197 0.000000 515 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +pass 0 28 3.610918 0.000000 611 +enhanc 0 26 3.688879 0.000000 644 +supercomput 0 25 3.737670 0.000000 681 +honor 0 23 3.806662 0.000000 729 +miscellan 0 23 3.806662 0.000000 731 +indian 0 22 3.850148 0.000000 769 +annot 0 21 3.912023 0.000000 775 +wind 0 18 4.060443 0.000000 908 +thoma 0 18 4.060443 0.000000 901 +stephen 0 11 4.553877 0.000000 1342 +laru 0 9 4.753590 0.000000 1560 +ball 0 9 4.753590 0.000000 1608 +assistantdepart 0 8 4.875197 0.000000 1784 +insert 0 8 4.875197 0.000000 1687 +gold 0 8 4.875197 0.000000 1745 +bombai 0 7 5.010635 0.000000 1972 +dream 0 6 5.164786 0.000000 2165 +merit 0 5 5.347108 0.000000 2466 +icpp 0 5 5.347108 0.000000 2382 +chilimbi 1 3 5.857933 5.857933 4015 +trishul 0 3 5.857933 0.000000 4016 +usaadvisor 0 3 5.857933 0.000000 4017 +certif 0 3 5.857933 0.000000 3859 +medal 0 3 5.857933 0.000000 3912 +cico 0 2 6.263398 0.000000 6120 +eick 0 2 6.263398 0.000000 5971 +megradu 0 1 6.957497 0.000000 17920 +designresearch 0 1 6.957497 0.000000 17921 +tunneleduc 0 1 6.957497 0.000000 17922 +publicationscachi 0 1 6.957497 0.000000 17923 +stormwatch 0 1 6.957497 0.000000 17924 +protocolstrishul 0 1 6.957497 0.000000 17925 +olympiadpresid 0 1 6.957497 0.000000 17926 +examinationcertif 0 1 6.957497 0.000000 17927 +chemistrycertif 0 1 6.957497 0.000000 17928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..0d783417 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +june 0 79 2.564949 0.000000 214 +septemb 0 65 2.772589 0.000000 274 +mark 0 44 3.135494 0.000000 403 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +hill 0 25 3.737670 0.000000 670 +indian 0 22 3.850148 0.000000 769 +madra 0 8 4.875197 0.000000 1770 +univeristi 0 8 4.875197 0.000000 1754 +studentdepart 0 5 5.347108 0.000000 2505 +usaadvisor 0 3 5.857933 0.000000 4017 +chandrasekaran 1 2 6.263398 6.263398 6121 +sashikanth 1 2 6.263398 6.263398 6122 +btech 0 2 6.263398 0.000000 6123 +csashi 0 1 6.957497 0.000000 17929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..f57728f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +develop 0 174 1.791759 0.000000 53 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +databas 1 122 2.079442 2.079442 86 +site 0 106 2.197225 0.000000 119 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +world 0 115 2.197225 0.000000 126 +text 0 98 2.302585 0.000000 133 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +java 0 70 2.708050 0.000000 248 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +locat 0 59 2.833213 0.000000 303 +index 0 56 2.890372 0.000000 309 +standard 0 48 3.044522 0.000000 365 +life 0 50 3.044522 0.000000 375 +approach 0 48 3.044522 0.000000 366 +transact 0 39 3.258097 0.000000 438 +map 0 39 3.258097 0.000000 452 +prototyp 0 38 3.295837 0.000000 463 +microsoft 0 38 3.295837 0.000000 468 +open 0 38 3.295837 0.000000 469 +sciencesunivers 0 37 3.332205 0.000000 486 +global 0 34 3.401197 0.000000 520 +survei 0 35 3.401197 0.000000 513 +posit 0 31 3.496508 0.000000 552 +focu 0 30 3.555348 0.000000 571 +weather 0 28 3.610918 0.000000 618 +item 0 19 4.007333 0.000000 856 +white 0 17 4.110874 0.000000 951 +dilbert 0 16 4.174387 0.000000 996 +stock 0 16 4.174387 0.000000 1007 +track 0 15 4.248495 0.000000 1029 +doit 0 14 4.317488 0.000000 1111 +draft 0 14 4.317488 0.000000 1085 +land 0 12 4.465908 0.000000 1273 +shore 0 11 4.553877 0.000000 1377 +council 0 11 4.553877 0.000000 1364 +appl 0 11 4.553877 0.000000 1303 +market 0 11 4.553877 0.000000 1361 +consortium 0 10 4.653960 0.000000 1467 +govern 0 9 4.753590 0.000000 1581 +paradis 0 8 4.875197 0.000000 1782 +gopher 0 6 5.164786 0.000000 1982 +feder 0 5 5.347108 0.000000 2266 +wiscinfo 0 3 5.857933 0.000000 3106 +dienst 0 3 5.857933 0.000000 3640 +commerc 0 3 5.857933 0.000000 3209 +eosdi 0 2 6.263398 0.000000 6124 +nebraska 0 2 6.263398 0.000000 5574 +lincoln 0 2 6.263398 0.000000 5575 +metrowerk 0 2 6.263398 0.000000 4131 +worm 0 2 6.263398 0.000000 5775 +projectdepart 0 2 6.263398 0.000000 6125 +curt 0 1 6.957497 0.000000 17930 +ellmann 0 1 6.957497 0.000000 17931 +webgnat 0 1 6.957497 0.000000 17932 +defect 0 1 6.957497 0.000000 17933 +opengi 0 1 6.957497 0.000000 17934 +calmit 0 1 6.957497 0.000000 17935 +illustra 0 1 6.957497 0.000000 17936 +papersmiscellan 0 1 6.957497 0.000000 17937 +sitescampu 0 1 6.957497 0.000000 17938 +wyrm 0 1 6.957497 0.000000 17939 +hoard 0 1 6.957497 0.000000 17940 +wiscnet 0 1 6.957497 0.000000 17941 +netcorpor 0 1 6.957497 0.000000 17942 +paww 0 1 6.957497 0.000000 17943 +taligentsearch 0 1 6.957497 0.000000 17944 +savvi 0 1 6.957497 0.000000 17945 +webcrawl 0 1 6.957497 0.000000 17946 +winsock 0 1 6.957497 0.000000 17947 +geolog 0 1 6.957497 0.000000 17948 +gil 0 1 6.957497 0.000000 17949 +oakridg 0 1 6.957497 0.000000 17950 +datacurt 0 1 6.957497 0.000000 17951 +ellmanncurt 0 1 6.957497 0.000000 17952 +eduparadis 0 1 6.957497 0.000000 17953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..0978c0e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +yong 1 4 5.568345 5.568345 2809 +chee 1 3 5.857933 5.857933 3480 +cychan 0 2 6.263398 0.000000 4737 +pagechan 0 1 6.957497 0.000000 17954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..0bcf140a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +schedul 0 119 2.079442 0.000000 85 +dayton 0 119 2.079442 0.000000 104 +mathemat 0 108 2.197225 0.000000 123 +educ 0 86 2.484907 0.000000 191 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +colleg 0 61 2.833213 0.000000 300 +friend 0 48 3.044522 0.000000 376 +physic 0 47 3.091042 0.000000 377 +math 0 44 3.135494 0.000000 402 +mechan 0 43 3.178054 0.000000 416 +statist 0 35 3.401197 0.000000 521 +lewi 0 8 4.875197 0.000000 1700 +nuclear 0 5 5.347108 0.000000 2576 +clark 0 4 5.568345 0.000000 2705 +mace 0 2 6.263398 0.000000 4849 +astronaut 0 2 6.263398 0.000000 5748 +sara 0 1 6.957497 0.000000 17955 +bauman 0 1 6.957497 0.000000 17956 +dailei 0 1 6.957497 0.000000 17957 +baumandailei 0 1 6.957497 0.000000 17958 +edugradu 0 1 6.957497 0.000000 17959 +pagessend 0 1 6.957497 0.000000 17960 +daileytu 0 1 6.957497 0.000000 17961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..8a80a5cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,314 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +intern 0 108 2.197225 0.000000 128 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +take 0 97 2.302585 0.000000 134 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +activ 0 84 2.484907 0.000000 182 +novemb 0 81 2.484907 0.000000 179 +chang 0 82 2.484907 0.000000 163 +librari 0 87 2.484907 0.000000 181 +interfac 0 79 2.564949 0.000000 209 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +state 0 76 2.564949 0.000000 207 +dynam 0 76 2.564949 0.000000 194 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +method 0 80 2.564949 0.000000 213 +david 0 71 2.639057 0.000000 232 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +effici 0 73 2.639057 0.000000 233 +write 0 72 2.639057 0.000000 222 +simul 0 66 2.708050 0.000000 255 +integr 0 67 2.708050 0.000000 245 +august 0 66 2.708050 0.000000 257 +main 0 67 2.708050 0.000000 256 +goal 0 66 2.708050 0.000000 250 +evalu 0 64 2.772589 0.000000 266 +abstract 0 62 2.772589 0.000000 276 +new 0 64 2.772589 0.000000 262 +organ 0 65 2.772589 0.000000 265 +virtual 0 62 2.772589 0.000000 285 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +type 0 61 2.833213 0.000000 296 +major 0 56 2.890372 0.000000 315 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +hardwar 0 51 2.995732 0.000000 350 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +investig 0 51 2.995732 0.000000 353 +frequent 0 49 3.044522 0.000000 367 +california 0 46 3.091042 0.000000 388 +effect 0 46 3.091042 0.000000 385 +understand 0 47 3.091042 0.000000 384 +mark 0 44 3.135494 0.000000 403 +protocol 0 45 3.135494 0.000000 407 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +fast 0 42 3.218876 0.000000 429 +combin 0 42 3.218876 0.000000 421 +programm 0 39 3.258097 0.000000 445 +transact 0 39 3.258097 0.000000 438 +electr 0 38 3.295837 0.000000 461 +streetmadison 0 38 3.295837 0.000000 474 +brian 0 38 3.295837 0.000000 466 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +sciencesunivers 0 37 3.332205 0.000000 486 +workstat 0 37 3.332205 0.000000 479 +cost 0 37 3.332205 0.000000 480 +especi 0 36 3.367296 0.000000 496 +multi 0 36 3.367296 0.000000 493 +procedur 0 36 3.367296 0.000000 488 +jame 0 35 3.401197 0.000000 507 +bibliographi 0 34 3.401197 0.000000 518 +fault 0 32 3.465736 0.000000 547 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +power 0 30 3.555348 0.000000 573 +robert 0 30 3.555348 0.000000 567 +profil 0 30 3.555348 0.000000 581 +common 0 30 3.555348 0.000000 574 +steve 0 29 3.583519 0.000000 594 +synchron 0 29 3.583519 0.000000 588 +depend 0 29 3.583519 0.000000 583 +platform 0 29 3.583519 0.000000 591 +multiprocessor 0 28 3.610918 0.000000 605 +pass 0 28 3.610918 0.000000 611 +propos 0 28 3.610918 0.000000 602 +manipul 0 27 3.637586 0.000000 624 +berkelei 0 26 3.688879 0.000000 657 +revis 0 26 3.688879 0.000000 640 +altern 0 26 3.688879 0.000000 641 +detect 0 26 3.688879 0.000000 646 +hill 0 25 3.737670 0.000000 670 +supercomput 0 25 3.737670 0.000000 681 +wai 0 25 3.737670 0.000000 662 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +self 0 22 3.850148 0.000000 761 +reduc 0 22 3.850148 0.000000 759 +cooper 0 22 3.850148 0.000000 757 +vlsi 0 21 3.912023 0.000000 795 +annot 0 21 3.912023 0.000000 775 +fine 0 20 3.951244 0.000000 822 +department 0 20 3.951244 0.000000 839 +portabl 0 20 3.951244 0.000000 819 +exploit 0 20 3.951244 0.000000 836 +binari 0 20 3.951244 0.000000 823 +benchmark 0 19 4.007333 0.000000 859 +five 0 19 4.007333 0.000000 841 +wind 0 18 4.060443 0.000000 908 +steven 0 17 4.110874 0.000000 953 +interconnect 0 17 4.110874 0.000000 937 +asplo 0 17 4.110874 0.000000 948 +intel 0 16 4.174387 0.000000 1000 +advantag 0 16 4.174387 0.000000 987 +transfer 0 16 4.174387 0.000000 967 +upon 0 16 4.174387 0.000000 978 +overhead 0 15 4.248495 0.000000 1035 +eduphon 0 15 4.248495 0.000000 1060 +hybrid 0 15 4.248495 0.000000 1057 +remot 0 15 4.248495 0.000000 1041 +action 0 15 4.248495 0.000000 1038 +driven 0 15 4.248495 0.000000 1048 +coher 0 14 4.317488 0.000000 1109 +convent 0 14 4.317488 0.000000 1072 +block 0 13 4.382027 0.000000 1183 +tune 0 12 4.465908 0.000000 1227 +wood 0 11 4.553877 0.000000 1355 +isca 0 11 4.553877 0.000000 1354 +transpar 0 11 4.553877 0.000000 1325 +faster 0 11 4.553877 0.000000 1323 +grain 0 10 4.653960 0.000000 1448 +facilit 0 10 4.653960 0.000000 1412 +laru 0 9 4.753590 0.000000 1560 +tunnel 0 9 4.753590 0.000000 1615 +significantli 0 9 4.753590 0.000000 1508 +paradigm 0 8 4.875197 0.000000 1662 +secretari 0 8 4.875197 0.000000 1775 +uniprocessor 0 8 4.875197 0.000000 1696 +spec 0 8 4.875197 0.000000 1640 +burger 0 7 5.010635 0.000000 1889 +roger 0 7 5.010635 0.000000 1892 +hit 0 7 5.010635 0.000000 1965 +tag 0 7 5.010635 0.000000 1821 +instrument 0 7 5.010635 0.000000 1954 +duke 0 6 5.164786 0.000000 2231 +microsystem 0 6 5.164786 0.000000 2160 +ann 0 6 5.164786 0.000000 2065 +feasibl 0 6 5.164786 0.000000 2157 +byte 0 6 5.164786 0.000000 2108 +invok 0 6 5.164786 0.000000 2079 +lebeck 0 5 5.347108 0.000000 2582 +reinhardt 0 5 5.347108 0.000000 2583 +babak 0 5 5.347108 0.000000 2584 +falsafi 0 5 5.347108 0.000000 2585 +tempest 0 5 5.347108 0.000000 2548 +ioanni 0 5 5.347108 0.000000 2553 +rewrit 0 5 5.347108 0.000000 2367 +scienceand 0 5 5.347108 0.000000 2348 +mukherje 0 5 5.347108 0.000000 2586 +accuraci 0 5 5.347108 0.000000 2450 +dougla 0 5 5.347108 0.000000 2471 +toc 0 5 5.347108 0.000000 2562 +computerarchitectur 0 5 5.347108 0.000000 2290 +focuss 0 5 5.347108 0.000000 2271 +hypothet 0 5 5.347108 0.000000 2474 +lookup 0 5 5.347108 0.000000 2399 +sparcstat 0 5 5.347108 0.000000 2406 +alvin 0 4 5.568345 0.000000 3084 +wart 0 4 5.568345 0.000000 2987 +hyder 0 4 5.568345 0.000000 2772 +schoina 0 4 5.568345 0.000000 3085 +engineeringdepart 0 4 5.568345 0.000000 2917 +decoupl 0 4 5.568345 0.000000 2898 +talluri 0 4 5.568345 0.000000 2820 +rapidli 0 4 5.568345 0.000000 2850 +myresearch 0 4 5.568345 0.000000 2842 +fulli 0 4 5.568345 0.000000 2986 +pfile 0 3 5.857933 0.000000 3100 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +programmingc 0 3 5.857933 0.000000 3232 +madhusudhan 0 3 5.857933 0.000000 4021 +bulk 0 3 5.857933 0.000000 4000 +anddavid 0 2 6.263398 0.000000 6126 +architecturec 0 2 6.263398 0.000000 6127 +invalid 0 2 6.263398 0.000000 5476 +callaghan 0 2 6.263398 0.000000 6128 +virtualmemori 0 2 6.263398 0.000000 4305 +anal 0 2 6.263398 0.000000 4834 +null 0 2 6.263398 0.000000 4714 +typhoon 0 1 6.957497 0.000000 17962 +usadavid 0 1 6.957497 0.000000 17963 +toonenrec 0 1 6.957497 0.000000 17964 +rahmat 0 1 6.957497 0.000000 17965 +alvi 0 1 6.957497 0.000000 17966 +informix 0 1 6.957497 0.000000 17967 +memorysteven 0 1 6.957497 0.000000 17968 +communicationshubhendu 0 1 6.957497 0.000000 17969 +costrahmat 0 1 6.957497 0.000000 17970 +multiprocessorsalvin 0 1 6.957497 0.000000 17971 +simulationalvin 0 1 6.957497 0.000000 17972 +sigmetricsmai 0 1 6.957497 0.000000 17973 +thrust 0 1 6.957497 0.000000 17974 +hybridprogram 0 1 6.957497 0.000000 17975 +similaritesof 0 1 6.957497 0.000000 17976 +calledtempest 0 1 6.957497 0.000000 17977 +handler 0 1 6.957497 0.000000 17978 +suppliedmechan 0 1 6.957497 0.000000 17979 +tempestmechan 0 1 6.957497 0.000000 17980 +novelmechan 0 1 6.957497 0.000000 17981 +tagblock 0 1 6.957497 0.000000 17982 +theloc 0 1 6.957497 0.000000 17983 +hardwareplatform 0 1 6.957497 0.000000 17984 +revers 0 1 6.957497 0.000000 17985 +translationt 0 1 6.957497 0.000000 17986 +rtlb 0 1 6.957497 0.000000 17987 +grainaccess 0 1 6.957497 0.000000 17988 +thata 0 1 6.957497 0.000000 17989 +performscompar 0 1 6.957497 0.000000 17990 +memoryprogram 0 1 6.957497 0.000000 17991 +thatoptim 0 1 6.957497 0.000000 17992 +reducingsimul 0 1 6.957497 0.000000 17993 +tightli 0 1 6.957497 0.000000 17994 +byprovid 0 1 6.957497 0.000000 17995 +referenceinvok 0 1 6.957497 0.000000 17996 +andmemori 0 1 6.957497 0.000000 17997 +processedbi 0 1 6.957497 0.000000 17998 +functionfor 0 1 6.957497 0.000000 17999 +usingbinari 0 1 6.957497 0.000000 18000 +memoryrefer 0 1 6.957497 0.000000 18001 +tothre 0 1 6.957497 0.000000 18002 +thatcal 0 1 6.957497 0.000000 18003 +onlythre 0 1 6.957497 0.000000 18004 +slower 0 1 6.957497 0.000000 18005 +techniquesto 0 1 6.957497 0.000000 18006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..29da9248 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +tool 0 117 2.079442 0.000000 93 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +maintain 0 51 2.995732 0.000000 342 +grad 0 20 3.951244 0.000000 837 +wind 0 18 4.060443 0.000000 908 +doug 1 9 4.753590 4.753590 1517 +burger 1 7 5.010635 5.010635 1889 +galileo 0 4 5.568345 0.000000 3086 +damn 0 2 6.263398 0.000000 6129 +pageprofession 0 1 6.957497 0.000000 18007 +summaryresum 0 1 6.957497 0.000000 18008 +cvtranscriptcours 0 1 6.957497 0.000000 18009 +projectsadvisoraffili 0 1 6.957497 0.000000 18010 +sciwisconsin 0 1 6.957497 0.000000 18011 +tunnelpag 0 1 6.957497 0.000000 18012 +architectureuw 0 1 6.957497 0.000000 18013 +architecturesimplescalar 0 1 6.957497 0.000000 18014 +setgenericasacmperson 0 1 6.957497 0.000000 18015 +meus 0 1 6.957497 0.000000 18016 +linksphoto 0 1 6.957497 0.000000 18017 +galleryrid 0 1 6.957497 0.000000 18018 +demonhunt 0 1 6.957497 0.000000 18019 +catsbewar 0 1 6.957497 0.000000 18020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..6d4fbbbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +professor 0 137 1.945910 0.000000 76 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +teach 0 108 2.197225 0.000000 112 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +mathemat 0 108 2.197225 0.000000 123 +access 0 102 2.302585 0.000000 136 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +center 0 88 2.397895 0.000000 158 +pictur 0 89 2.397895 0.000000 160 +journal 0 83 2.484907 0.000000 183 +chang 0 82 2.484907 0.000000 163 +west 0 83 2.484907 0.000000 192 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +contain 0 81 2.484907 0.000000 174 +html 0 75 2.639057 0.000000 235 +david 0 71 2.639057 0.000000 232 +practic 0 70 2.708050 0.000000 246 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +variou 0 56 2.890372 0.000000 317 +publish 0 57 2.890372 0.000000 326 +much 0 52 2.995732 0.000000 349 +numer 0 49 3.044522 0.000000 369 +telephon 0 50 3.044522 0.000000 373 +approach 0 48 3.044522 0.000000 366 +even 0 45 3.135494 0.000000 393 +better 0 45 3.135494 0.000000 401 +math 0 44 3.135494 0.000000 402 +third 0 43 3.178054 0.000000 412 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +author 0 39 3.258097 0.000000 450 +tutori 0 39 3.258097 0.000000 437 +paul 0 38 3.295837 0.000000 471 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +approxim 0 35 3.401197 0.000000 509 +print 0 34 3.401197 0.000000 503 +bibliographi 0 34 3.401197 0.000000 518 +next 0 34 3.401197 0.000000 517 +articl 0 33 3.433987 0.000000 530 +postal 0 30 3.555348 0.000000 580 +great 0 27 3.637586 0.000000 626 +thank 0 23 3.806662 0.000000 721 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +latest 0 21 3.912023 0.000000 785 +hous 0 21 3.912023 0.000000 801 +accept 0 18 4.060443 0.000000 879 +stand 0 18 4.060443 0.000000 891 +former 0 17 4.110874 0.000000 956 +seek 0 17 4.110874 0.000000 954 +carl 0 15 4.248495 0.000000 1024 +anonym 0 14 4.317488 0.000000 1100 +individu 0 13 4.382027 0.000000 1126 +errata 0 10 4.653960 0.000000 1403 +town 0 10 4.653960 0.000000 1458 +death 0 10 4.653960 0.000000 1457 +subset 0 10 4.653960 0.000000 1425 +latter 0 9 4.753590 0.000000 1522 +screen 0 9 4.753590 0.000000 1577 +unusu 0 9 4.753590 0.000000 1566 +end 0 9 4.753590 0.000000 1567 +driver 0 8 4.875197 0.000000 1657 +elementari 0 7 5.010635 0.000000 1825 +spline 0 6 5.164786 0.000000 2007 +usaoffic 0 6 5.164786 0.000000 2159 +button 0 5 5.347108 0.000000 2337 +door 0 5 5.347108 0.000000 2291 +areavail 0 4 5.568345 0.000000 2810 +allan 0 4 5.568345 0.000000 2849 +technion 0 4 5.568345 0.000000 2856 +boor 0 3 5.857933 0.000000 3482 +cont 0 3 5.857933 0.000000 3171 +shall 0 3 5.857933 0.000000 3891 +clickabl 0 2 6.263398 0.000000 4788 +deboor 0 2 6.263398 0.000000 4744 +thevari 0 2 6.263398 0.000000 6130 +forconstruct 0 2 6.263398 0.000000 5649 +amo 0 2 6.263398 0.000000 6094 +joi 0 2 6.263398 0.000000 5208 +hermit 0 2 6.263398 0.000000 4150 +soup 0 2 6.263398 0.000000 6131 +kitchen 0 2 6.263398 0.000000 6132 +occupi 0 2 6.263398 0.000000 5857 +ditto 0 1 6.957497 0.000000 18021 +nevai 0 1 6.957497 0.000000 18022 +pinku 0 1 6.957497 0.000000 18023 +mathematicsdepart 0 1 6.957497 0.000000 18024 +schoenberg 0 1 6.957497 0.000000 18025 +approx 0 1 6.957497 0.000000 18026 +theclick 0 1 6.957497 0.000000 18027 +ofapproxim 0 1 6.957497 0.000000 18028 +publishedpap 0 1 6.957497 0.000000 18029 +andmuch 0 1 6.957497 0.000000 18030 +foreast 0 1 6.957497 0.000000 18031 +theirtabl 0 1 6.957497 0.000000 18032 +singli 0 1 6.957497 0.000000 18033 +thishandi 0 1 6.957497 0.000000 18034 +alsoapproxim 0 1 6.957497 0.000000 18035 +slist 0 1 6.957497 0.000000 18036 +ila 0 1 6.957497 0.000000 18037 +seeviva_vi 0 1 6.957497 0.000000 18038 +alsoon 0 1 6.957497 0.000000 18039 +thehtml 0 1 6.957497 0.000000 18040 +primermight 0 1 6.957497 0.000000 18041 +ever_chang 0 1 6.957497 0.000000 18042 +griffeath 0 1 6.957497 0.000000 18043 +sprimordi 0 1 6.957497 0.000000 18044 +seeodd 0 1 6.957497 0.000000 18045 +techunix 0 1 6.957497 0.000000 18046 +nevaiif 0 1 6.957497 0.000000 18047 +makehi 0 1 6.957497 0.000000 18048 +outputavail 0 1 6.957497 0.000000 18049 +taki 0 1 6.957497 0.000000 18050 +souganid 0 1 6.957497 0.000000 18051 +andthaleia 0 1 6.957497 0.000000 18052 +zariphopoul 0 1 6.957497 0.000000 18053 +szego 0 1 6.957497 0.000000 18054 +bust 0 1 6.957497 0.000000 18055 +inscript 0 1 6.957497 0.000000 18056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..64ca6b1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +data 1 170 1.791759 1.791759 49 +base 0 165 1.791759 0.000000 50 +support 0 132 1.945910 0.000000 83 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +proceed 0 93 2.397895 0.000000 152 +follow 0 92 2.397895 0.000000 143 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +environ 0 84 2.484907 0.000000 177 +help 0 83 2.484907 0.000000 175 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +librari 0 87 2.484907 0.000000 181 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +window 0 68 2.708050 0.000000 242 +differ 0 66 2.708050 0.000000 253 +descript 0 64 2.772589 0.000000 271 +januari 0 62 2.772589 0.000000 264 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +explor 0 58 2.890372 0.000000 324 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +februari 0 54 2.944439 0.000000 328 +date 0 51 2.995732 0.000000 344 +visual 0 48 3.044522 0.000000 372 +cool 0 49 3.044522 0.000000 374 +featur 0 46 3.091042 0.000000 386 +execut 0 45 3.135494 0.000000 404 +mechan 0 43 3.178054 0.000000 416 +map 0 39 3.258097 0.000000 452 +tree 0 36 3.367296 0.000000 492 +download 0 36 3.367296 0.000000 489 +michael 0 35 3.401197 0.000000 514 +next 0 34 3.401197 0.000000 517 +queri 0 33 3.433987 0.000000 524 +within 0 33 3.433987 0.000000 525 +platform 0 29 3.583519 0.000000 591 +releas 0 28 3.610918 0.000000 616 +repres 0 26 3.688879 0.000000 656 +compar 0 26 3.688879 0.000000 648 +handl 0 24 3.761200 0.000000 685 +input 0 23 3.806662 0.000000 727 +togeth 0 23 3.806662 0.000000 714 +famili 0 23 3.806662 0.000000 735 +sequenc 0 23 3.806662 0.000000 734 +variabl 0 23 3.806662 0.000000 715 +color 0 22 3.850148 0.000000 762 +flexibl 0 21 3.912023 0.000000 792 +viewer 0 21 3.912023 0.000000 787 +output 0 21 3.912023 0.000000 788 +chen 0 21 3.912023 0.000000 791 +comparison 0 19 4.007333 0.000000 863 +record 0 18 4.060443 0.000000 890 +appropri 0 18 4.060443 0.000000 883 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 0 15 4.248495 0.000000 1053 +ascii 0 15 4.248495 0.000000 1032 +biologi 0 15 4.248495 0.000000 1049 +stream 0 15 4.248495 0.000000 1015 +miron 0 14 4.317488 0.000000 1110 +save 0 14 4.317488 0.000000 1099 +individu 0 13 4.382027 0.000000 1126 +raghu 0 12 4.465908 0.000000 1212 +shape 0 12 4.465908 0.000000 1245 +solari 0 12 4.465908 0.000000 1238 +distinguish 0 11 4.553877 0.000000 1357 +abil 0 11 4.553877 0.000000 1341 +string 0 11 4.553877 0.000000 1340 +devis 0 10 4.653960 0.000000 1451 +cheng 0 10 4.653960 0.000000 1381 +relationship 0 10 4.653960 0.000000 1383 +float 0 9 4.753590 0.000000 1504 +integ 0 8 4.875197 0.000000 1688 +inproceed 0 8 4.875197 0.000000 1670 +larger 0 7 5.010635 0.000000 1875 +dataset 0 7 5.010635 0.000000 1914 +spie 0 6 5.164786 0.000000 2119 +layout 0 6 5.164786 0.000000 2183 +quick 0 6 5.164786 0.000000 2184 +medicin 0 5 5.347108 0.000000 2448 +cell 0 5 5.347108 0.000000 2274 +complementari 0 5 5.347108 0.000000 2523 +kent 0 4 5.568345 0.000000 2744 +myllymaki 0 3 5.857933 0.000000 4022 +asid 0 3 5.857933 0.000000 3770 +comad 0 3 5.857933 0.000000 3737 +wenger 0 3 5.857933 0.000000 4023 +jussi 0 2 6.263398 0.000000 6133 +hotlin 0 2 6.263398 0.000000 5967 +oneset 0 2 6.263398 0.000000 6134 +viewsof 0 2 6.263398 0.000000 6135 +birch 0 2 6.263398 0.000000 6136 +andanalysi 0 2 6.263398 0.000000 4271 +workth 0 2 6.263398 0.000000 6137 +guangshun 0 2 6.263398 0.000000 6138 +pagedevis 0 1 6.957497 0.000000 18057 +visualizationt 0 1 6.957497 0.000000 18058 +featuresexamplesin 0 1 6.957497 0.000000 18059 +depthpublicationsrel 0 1 6.957497 0.000000 18060 +workreleasecontactsfeaturesthes 0 1 6.957497 0.000000 18061 +cancontrol 0 1 6.957497 0.000000 18062 +ax 0 1 6.957497 0.000000 18063 +cursor 0 1 6.957497 0.000000 18064 +examplescheck 0 1 6.957497 0.000000 18065 +validationmolecular 0 1 6.957497 0.000000 18066 +soil 0 1 6.957497 0.000000 18067 +clusteringfinanci 0 1 6.957497 0.000000 18068 +explorationfamili 0 1 6.957497 0.000000 18069 +climatedata 0 1 6.957497 0.000000 18070 +centergeograph 0 1 6.957497 0.000000 18071 +systemsoil 0 1 6.957497 0.000000 18072 +sciencefil 0 1 6.957497 0.000000 18073 +serverprogram 0 1 6.957497 0.000000 18074 +tracesclin 0 1 6.957497 0.000000 18075 +moreexampl 0 1 6.957497 0.000000 18076 +depthfor 0 1 6.957497 0.000000 18077 +visualizationvisu 0 1 6.957497 0.000000 18078 +interfaceperform 0 1 6.957497 0.000000 18079 +issuespublicationsmiron 0 1 6.957497 0.000000 18080 +dataexplor 0 1 6.957497 0.000000 18081 +praveenseshadri 0 1 6.957497 0.000000 18082 +sequencequeri 0 1 6.957497 0.000000 18083 +themanag 0 1 6.957497 0.000000 18084 +seqproject 0 1 6.957497 0.000000 18085 +queryrecord 0 1 6.957497 0.000000 18086 +bevisu 0 1 6.957497 0.000000 18087 +informationw 0 1 6.957497 0.000000 18088 +executablesfor 0 1 6.957497 0.000000 18089 +ld_library_path 0 1 6.957497 0.000000 18090 +rundevis 0 1 6.957497 0.000000 18091 +arestat 0 1 6.957497 0.000000 18092 +shareabl 0 1 6.957497 0.000000 18093 +contactsfor 0 1 6.957497 0.000000 18094 +contactmiron 0 1 6.957497 0.000000 18095 +usersupport 0 1 6.957497 0.000000 18096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..122d99b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +relat 0 139 1.945910 0.000000 68 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +report 0 131 2.079442 0.000000 92 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +part 0 98 2.302585 0.000000 129 +proceed 0 93 2.397895 0.000000 152 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +interfac 0 79 2.564949 0.000000 209 +david 0 71 2.639057 0.000000 232 +summari 0 73 2.639057 0.000000 237 +name 0 72 2.639057 0.000000 220 +appli 0 71 2.639057 0.000000 226 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +complex 0 64 2.772589 0.000000 269 +evalu 0 64 2.772589 0.000000 266 +prof 0 64 2.772589 0.000000 273 +creat 0 63 2.772589 0.000000 277 +januari 0 62 2.772589 0.000000 264 +type 0 61 2.833213 0.000000 296 +automat 0 61 2.833213 0.000000 306 +unix 0 58 2.890372 0.000000 308 +space 0 57 2.890372 0.000000 310 +talk 0 53 2.944439 0.000000 336 +sampl 0 53 2.944439 0.000000 339 +hardwar 0 51 2.995732 0.000000 350 +basic 0 50 3.044522 0.000000 360 +set 0 50 3.044522 0.000000 361 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +anoth 0 45 3.135494 0.000000 408 +execut 0 45 3.135494 0.000000 404 +multipl 0 39 3.258097 0.000000 453 +must 0 40 3.258097 0.000000 442 +error 0 40 3.258097 0.000000 449 +field 0 37 3.332205 0.000000 482 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +either 0 35 3.401197 0.000000 506 +singl 0 34 3.401197 0.000000 510 +storag 0 31 3.496508 0.000000 553 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +rang 0 30 3.555348 0.000000 565 +platform 0 29 3.583519 0.000000 591 +intend 0 28 3.610918 0.000000 599 +becom 0 28 3.610918 0.000000 603 +scale 0 28 3.610918 0.000000 613 +multiprocessor 0 28 3.610918 0.000000 605 +cluster 0 28 3.610918 0.000000 612 +manipul 0 27 3.637586 0.000000 624 +client 0 25 3.737670 0.000000 679 +wai 0 25 3.737670 0.000000 662 +store 0 24 3.761200 0.000000 693 +fellow 0 24 3.761200 0.000000 701 +serv 0 22 3.850148 0.000000 758 +varieti 0 22 3.850148 0.000000 740 +instead 0 22 3.850148 0.000000 756 +programminglanguag 0 21 3.912023 0.000000 782 +fund 0 21 3.912023 0.000000 805 +busi 0 21 3.912023 0.000000 784 +sigmod 0 19 4.007333 0.000000 877 +excel 0 19 4.007333 0.000000 868 +benchmark 0 19 4.007333 0.000000 859 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +modif 0 17 4.110874 0.000000 913 +attempt 0 17 4.110874 0.000000 917 +white 0 17 4.110874 0.000000 951 +match 0 16 4.174387 0.000000 965 +intel 0 16 4.174387 0.000000 1000 +transit 0 15 4.248495 0.000000 1046 +capabl 0 15 4.248495 0.000000 1016 +heterogen 0 14 4.317488 0.000000 1090 +signific 0 13 4.382027 0.000000 1125 +dewitt 0 12 4.465908 0.000000 1270 +target 0 12 4.465908 0.000000 1282 +emploi 0 12 4.465908 0.000000 1284 +fromindividu 0 12 4.465908 0.000000 1290 +shore 1 11 4.553877 4.553877 1377 +persist 0 11 4.553877 0.000000 1367 +michigan 0 11 4.553877 0.000000 1368 +arpa 0 11 4.553877 0.000000 1369 +naughton 0 10 4.653960 0.000000 1450 +facilit 0 10 4.653960 0.000000 1412 +franklin 0 10 4.653960 0.000000 1436 +vldb 0 10 4.653960 0.000000 1470 +invit 0 10 4.653960 0.000000 1428 +conferenceon 0 9 4.753590 0.000000 1595 +paradis 0 8 4.875197 0.000000 1782 +carei 0 8 4.875197 0.000000 1781 +solomon 0 8 4.875197 0.000000 1716 +databasesystem 0 8 4.875197 0.000000 1617 +hold 0 8 4.875197 0.000000 1645 +poor 0 8 4.875197 0.000000 1736 +polygon 0 8 4.875197 0.000000 1723 +sparc 0 7 5.010635 0.000000 1860 +geograph 0 6 5.164786 0.000000 2236 +patel 0 6 5.164786 0.000000 2154 +pub 0 6 5.164786 0.000000 2239 +compat 0 5 5.347108 0.000000 2485 +tsatalo 0 5 5.347108 0.000000 2581 +minneapoli 0 5 5.347108 0.000000 2480 +proceedingsof 0 5 5.347108 0.000000 2331 +satellit 0 4 5.568345 0.000000 3077 +exodu 0 4 5.568345 0.000000 3075 +mcauliff 0 4 5.568345 0.000000 3083 +zwill 0 4 5.568345 0.000000 3076 +chile 0 4 5.568345 0.000000 3082 +gamma 0 3 5.857933 0.000000 3219 +orth 0 3 5.857933 0.000000 3685 +paragon 0 3 5.857933 0.000000 3359 +summit 0 3 5.857933 0.000000 3684 +developeda 0 2 6.263398 0.000000 5205 +polylin 0 2 6.263398 0.000000 6079 +projecti 0 2 6.263398 0.000000 5963 +kabra 0 2 6.263398 0.000000 6139 +romn 0 1 6.957497 0.000000 18097 +databasebenchmark 0 1 6.957497 0.000000 18098 +objectiveof 0 1 6.957497 0.000000 18099 +objectsystem 0 1 6.957497 0.000000 18100 +applicationsinclud 0 1 6.957497 0.000000 18101 +capabilitiesof 0 1 6.957497 0.000000 18102 +typedobject 0 1 6.957497 0.000000 18103 +hierarchicalnam 0 1 6.957497 0.000000 18104 +interfaceto 0 1 6.957497 0.000000 18105 +toeas 0 1 6.957497 0.000000 18106 +systemenviron 0 1 6.957497 0.000000 18107 +ccwill 0 1 6.957497 0.000000 18108 +networksto 0 1 6.957497 0.000000 18109 +ajoint 0 1 6.957497 0.000000 18110 +relationaldatabas 0 1 6.957497 0.000000 18111 +thetask 0 1 6.957497 0.000000 18112 +formanag 0 1 6.957497 0.000000 18113 +modelingne 0 1 6.957497 0.000000 18114 +manipulatingmuch 0 1 6.957497 0.000000 18115 +muchbett 0 1 6.957497 0.000000 18116 +differencefrom 0 1 6.957497 0.000000 18117 +parallelismto 0 1 6.957497 0.000000 18118 +assatellit 0 1 6.957497 0.000000 18119 +withm 0 1 6.957497 0.000000 18120 +persistentappl 0 1 6.957497 0.000000 18121 +chuh 0 1 6.957497 0.000000 18122 +santiego 0 1 6.957497 0.000000 18123 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..fd3d2ce3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +analysi 0 124 2.079442 0.000000 98 +spring 0 131 2.079442 0.000000 88 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +imag 0 91 2.397895 0.000000 161 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +control 0 82 2.484907 0.000000 164 +activ 0 84 2.484907 0.000000 182 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +ieee 0 86 2.484907 0.000000 190 +west 0 83 2.484907 0.000000 192 +start 0 83 2.484907 0.000000 173 +chang 0 82 2.484907 0.000000 163 +appear 0 78 2.564949 0.000000 210 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +refer 0 78 2.564949 0.000000 203 +complet 0 77 2.564949 0.000000 208 +workshop 0 71 2.639057 0.000000 239 +intellig 0 72 2.639057 0.000000 225 +view 0 70 2.708050 0.000000 254 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +interact 0 62 2.772589 0.000000 270 +virtual 0 62 2.772589 0.000000 285 +import 0 65 2.772589 0.000000 282 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +guid 0 63 2.772589 0.000000 267 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +simpl 0 60 2.833213 0.000000 298 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +explor 0 58 2.890372 0.000000 324 +point 0 58 2.890372 0.000000 319 +space 0 57 2.890372 0.000000 310 +three 0 54 2.944439 0.000000 330 +scientif 0 53 2.944439 0.000000 341 +finger 0 52 2.995732 0.000000 354 +investig 0 51 2.995732 0.000000 353 +visual 0 48 3.044522 0.000000 372 +without 0 50 3.044522 0.000000 370 +basic 0 50 3.044522 0.000000 360 +approach 0 48 3.044522 0.000000 366 +move 0 47 3.091042 0.000000 382 +understand 0 47 3.091042 0.000000 384 +could 0 46 3.091042 0.000000 383 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +around 0 43 3.178054 0.000000 415 +vision 0 41 3.218876 0.000000 430 +combin 0 42 3.218876 0.000000 421 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +autom 0 41 3.218876 0.000000 434 +continu 0 39 3.258097 0.000000 448 +map 0 39 3.258097 0.000000 452 +societi 0 40 3.258097 0.000000 456 +paul 0 38 3.295837 0.000000 471 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +close 0 38 3.295837 0.000000 465 +correct 0 38 3.295837 0.000000 462 +brian 0 38 3.295837 0.000000 466 +purpos 0 37 3.332205 0.000000 481 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +procedur 0 36 3.367296 0.000000 488 +especi 0 36 3.367296 0.000000 496 +robot 0 36 3.367296 0.000000 497 +global 0 34 3.401197 0.000000 520 +represent 0 35 3.401197 0.000000 512 +either 0 35 3.401197 0.000000 506 +curriculum 0 33 3.433987 0.000000 535 +taught 0 33 3.433987 0.000000 526 +rang 0 30 3.555348 0.000000 565 +power 0 30 3.555348 0.000000 573 +chair 0 29 3.583519 0.000000 596 +steve 0 29 3.583519 0.000000 594 +progress 0 28 3.610918 0.000000 598 +measur 0 28 3.610918 0.000000 609 +proc 0 26 3.688879 0.000000 649 +detect 0 26 3.688879 0.000000 646 +bookmark 0 26 3.688879 0.000000 639 +toward 0 25 3.737670 0.000000 668 +task 0 25 3.737670 0.000000 678 +accur 0 25 3.737670 0.000000 680 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +motion 0 24 3.761200 0.000000 699 +pattern 0 24 3.761200 0.000000 689 +flow 0 24 3.761200 0.000000 700 +recognit 0 23 3.806662 0.000000 723 +displai 0 23 3.806662 0.000000 712 +input 0 23 3.806662 0.000000 727 +mobil 0 23 3.806662 0.000000 730 +sequenc 0 23 3.806662 0.000000 734 +defin 0 22 3.850148 0.000000 746 +period 0 22 3.850148 0.000000 743 +geometri 0 22 3.850148 0.000000 752 +path 0 21 3.912023 0.000000 778 +output 0 21 3.912023 0.000000 788 +navig 0 21 3.912023 0.000000 796 +viewer 0 21 3.912023 0.000000 787 +synthesi 0 20 3.951244 0.000000 834 +basi 0 20 3.951244 0.000000 828 +boston 0 19 4.007333 0.000000 862 +separ 0 19 4.007333 0.000000 844 +dimension 0 18 4.060443 0.000000 909 +behavior 0 18 4.060443 0.000000 881 +four 0 18 4.060443 0.000000 905 +differenti 0 17 4.110874 0.000000 921 +coupl 0 17 4.110874 0.000000 939 +precis 0 15 4.248495 0.000000 1023 +capabl 0 15 4.248495 0.000000 1016 +track 0 15 4.248495 0.000000 1029 +scene 0 14 4.317488 0.000000 1114 +camera 0 14 4.317488 0.000000 1115 +conf 0 13 4.382027 0.000000 1181 +charl 0 13 4.382027 0.000000 1149 +step 0 13 4.382027 0.000000 1138 +coordin 0 13 4.382027 0.000000 1182 +believ 0 13 4.382027 0.000000 1187 +shape 0 12 4.465908 0.000000 1245 +fix 0 11 4.553877 0.000000 1327 +valid 0 11 4.553877 0.000000 1299 +bill 0 11 4.553877 0.000000 1297 +correspond 0 10 4.653960 0.000000 1382 +edutelephon 0 10 4.653960 0.000000 1473 +custom 0 10 4.653960 0.000000 1414 +acquisit 0 10 4.653960 0.000000 1465 +earth 0 10 4.653960 0.000000 1463 +dyer 1 9 4.753590 4.753590 1573 +surfac 0 9 4.753590 0.000000 1574 +observ 0 9 4.753590 0.000000 1578 +leader 0 9 4.753590 0.000000 1576 +intermedi 0 9 4.753590 0.000000 1497 +recoveri 0 9 4.753590 0.000000 1474 +morgan 0 9 4.753590 0.000000 1484 +invari 0 8 4.875197 0.000000 1748 +autonom 0 8 4.875197 0.000000 1749 +siggraph 0 8 4.875197 0.000000 1773 +irregular 0 8 4.875197 0.000000 1768 +curv 0 8 4.875197 0.000000 1656 +edg 0 8 4.875197 0.000000 1647 +textur 0 8 4.875197 0.000000 1677 +seitz 0 7 5.010635 0.000000 1976 +morph 0 7 5.010635 0.000000 1937 +interpol 0 7 5.010635 0.000000 1823 +paramet 0 7 5.010635 0.000000 1796 +smooth 0 7 5.010635 0.000000 1855 +dimens 0 7 5.010635 0.000000 1930 +stereo 0 7 5.010635 0.000000 1818 +viewpoint 0 6 5.164786 0.000000 2116 +reconstruct 0 6 5.164786 0.000000 2170 +recov 0 6 5.164786 0.000000 2235 +maryland 0 6 5.164786 0.000000 2140 +kluwer 0 6 5.164786 0.000000 2143 +bestor 0 6 5.164786 0.000000 2099 +cyclic 0 5 5.347108 0.000000 2383 +unknown 0 5 5.347108 0.000000 2318 +provabl 0 5 5.347108 0.000000 2558 +affin 0 5 5.347108 0.000000 2378 +recogn 0 5 5.347108 0.000000 2302 +revolut 0 5 5.347108 0.000000 2315 +jain 0 5 5.347108 0.000000 2332 +adjust 0 5 5.347108 0.000000 2422 +rigid 0 5 5.347108 0.000000 2432 +gareth 0 5 5.347108 0.000000 2392 +connectionist 0 5 5.347108 0.000000 2430 +bradlei 0 5 5.347108 0.000000 2554 +contour 0 4 5.568345 0.000000 2812 +usa 0 4 5.568345 0.000000 3080 +cvpr 0 4 5.568345 0.000000 2761 +theus 0 4 5.568345 0.000000 2992 +simplifi 0 4 5.568345 0.000000 3066 +satellit 0 4 5.568345 0.000000 3077 +shah 0 4 5.568345 0.000000 2814 +lumelski 0 4 5.568345 0.000000 2837 +harri 0 4 5.568345 0.000000 3034 +asid 0 3 5.857933 0.000000 3770 +stationari 0 3 5.857933 0.000000 3861 +alamito 0 3 5.857933 0.000000 3558 +lattic 0 3 5.857933 0.000000 3721 +groupcours 0 3 5.857933 0.000000 3092 +macc 0 3 5.857933 0.000000 3414 +kutulako 0 2 6.263398 0.000000 6064 +hibbard 0 2 6.263398 0.000000 6066 +stewart 0 2 6.263398 0.000000 5739 +acquir 0 2 6.263398 0.000000 5557 +panoram 0 2 6.263398 0.000000 4755 +howto 0 2 6.263398 0.000000 5761 +arbitrarili 0 2 6.263398 0.000000 5791 +discrimin 0 2 6.263398 0.000000 6140 +festschrift 0 2 6.263398 0.000000 6141 +rosenfeld 0 2 6.263398 0.000000 4495 +articul 0 2 6.263398 0.000000 5799 +kyro 0 2 6.263398 0.000000 6063 +rochest 0 2 6.263398 0.000000 6142 +seal 0 1 6.957497 0.000000 18124 +allmen 0 1 6.957497 0.000000 18125 +kjell 0 1 6.957497 0.000000 18126 +pagecharl 0 1 6.957497 0.000000 18127 +dyerprofessordepart 0 1 6.957497 0.000000 18128 +infoph 0 1 6.957497 0.000000 18129 +visualizationgroup 0 1 6.957497 0.000000 18130 +groupprogram 0 1 6.957497 0.000000 18131 +synthesisth 0 1 6.957497 0.000000 18132 +controllingin 0 1 6.957497 0.000000 18133 +cameraof 0 1 6.957497 0.000000 18134 +videostream 0 1 6.957497 0.000000 18135 +whicha 0 1 6.957497 0.000000 18136 +througha 0 1 6.957497 0.000000 18137 +thesit 0 1 6.957497 0.000000 18138 +predetermin 0 1 6.957497 0.000000 18139 +researchquest 0 1 6.957497 0.000000 18140 +synthesizenew 0 1 6.957497 0.000000 18141 +reconstructiona 0 1 6.957497 0.000000 18142 +innovativetechniqu 0 1 6.957497 0.000000 18143 +callview 0 1 6.957497 0.000000 18144 +basisimag 0 1 6.957497 0.000000 18145 +explorationcomput 0 1 6.957497 0.000000 18146 +controllingcamera 0 1 6.957497 0.000000 18147 +purposefulli 0 1 6.957497 0.000000 18148 +theposit 0 1 6.957497 0.000000 18149 +adjustviewpoint 0 1 6.957497 0.000000 18150 +forsolv 0 1 6.957497 0.000000 18151 +findspecif 0 1 6.957497 0.000000 18152 +unknownshap 0 1 6.957497 0.000000 18153 +appearanceof 0 1 6.957497 0.000000 18154 +computationsrequir 0 1 6.957497 0.000000 18155 +andelimin 0 1 6.957497 0.000000 18156 +thecamera 0 1 6.957497 0.000000 18157 +towardsviewpoint 0 1 6.957497 0.000000 18158 +viewedobject 0 1 6.957497 0.000000 18159 +thisapproach 0 1 6.957497 0.000000 18160 +visualizationin 0 1 6.957497 0.000000 18161 +techniquescap 0 1 6.957497 0.000000 18162 +specificgraph 0 1 6.957497 0.000000 18163 +displayingarbitrari 0 1 6.957497 0.000000 18164 +commonfram 0 1 6.957497 0.000000 18165 +algorithmexecut 0 1 6.957497 0.000000 18166 +dataanalysi 0 1 6.957497 0.000000 18167 +forexperi 0 1 6.957497 0.000000 18168 +visualizingintermedi 0 1 6.957497 0.000000 18169 +forproblem 0 1 6.957497 0.000000 18170 +cloud 0 1 6.957497 0.000000 18171 +azriel 0 1 6.957497 0.000000 18172 +occlud 0 1 6.957497 0.000000 18173 +battaiola 0 1 6.957497 0.000000 18174 +santek 0 1 6.957497 0.000000 18175 +voidrot 0 1 6.957497 0.000000 18176 +martinez 0 1 6.957497 0.000000 18177 +liangyin 0 1 6.957497 0.000000 18178 +yuph 0 1 6.957497 0.000000 18179 +whibbard 0 1 6.957497 0.000000 18180 +onlattic 0 1 6.957497 0.000000 18181 +kiriako 0 1 6.957497 0.000000 18182 +ofobserv 0 1 6.957497 0.000000 18183 +iutech 0 1 6.957497 0.000000 18184 +spatiotempor 0 1 6.957497 0.000000 18185 +brent 0 1 6.957497 0.000000 18186 +dimensionalshap 0 1 6.957497 0.000000 18187 +plantinga 0 1 6.957497 0.000000 18188 +wheaton 0 1 6.957497 0.000000 18189 +representationfor 0 1 6.957497 0.000000 18190 +ccsua 0 1 6.957497 0.000000 18191 +ctstateu 0 1 6.957497 0.000000 18192 +measureslink 0 1 6.957497 0.000000 18193 +interestmi 0 1 6.957497 0.000000 18194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..85a9a2de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +class 0 199 1.609438 0.000000 37 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +teach 0 108 2.197225 0.000000 112 +section 0 94 2.397895 0.000000 149 +educ 0 86 2.484907 0.000000 191 +friend 0 48 3.044522 0.000000 376 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +curriculum 0 33 3.433987 0.000000 535 +task 0 25 3.737670 0.000000 678 +bring 0 10 4.653960 0.000000 1430 +appreci 0 5 5.347108 0.000000 2374 +patienc 0 2 6.263398 0.000000 5466 +machinew 0 1 6.957497 0.000000 18195 +arduou 0 1 6.957497 0.000000 18196 +vitaecheck 0 1 6.957497 0.000000 18197 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..dfc282b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,166 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +intern 0 108 2.197225 0.000000 128 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +sinc 0 90 2.397895 0.000000 159 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +requir 0 81 2.484907 0.000000 167 +complet 0 77 2.564949 0.000000 208 +want 0 79 2.564949 0.000000 199 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +servic 0 72 2.639057 0.000000 236 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +instruct 0 53 2.944439 0.000000 332 +talk 0 53 2.944439 0.000000 336 +digit 0 52 2.995732 0.000000 348 +date 0 51 2.995732 0.000000 344 +cool 0 49 3.044522 0.000000 374 +standard 0 48 3.044522 0.000000 365 +archiv 0 49 3.044522 0.000000 364 +could 0 46 3.091042 0.000000 383 +video 0 44 3.135494 0.000000 405 +even 0 45 3.135494 0.000000 393 +answer 0 45 3.135494 0.000000 391 +discuss 0 45 3.135494 0.000000 399 +made 0 44 3.135494 0.000000 398 +around 0 43 3.178054 0.000000 415 +societi 0 40 3.258097 0.000000 456 +purpos 0 37 3.332205 0.000000 481 +curriculum 0 33 3.433987 0.000000 535 +anim 0 31 3.496508 0.000000 557 +domain 0 30 3.555348 0.000000 564 +secur 0 30 3.555348 0.000000 577 +full 0 28 3.610918 0.000000 615 +never 0 25 3.737670 0.000000 671 +spent 0 25 3.737670 0.000000 676 +magazin 0 24 3.761200 0.000000 704 +serv 0 22 3.850148 0.000000 758 +half 0 21 3.912023 0.000000 776 +among 0 21 3.912023 0.000000 781 +wonder 0 20 3.951244 0.000000 815 +eric 0 19 4.007333 0.000000 870 +anderson 0 19 4.007333 0.000000 860 +offici 0 18 4.060443 0.000000 894 +edulast 0 17 4.110874 0.000000 927 +explan 0 16 4.174387 0.000000 985 +biologi 0 15 4.248495 0.000000 1049 +susan 0 15 4.248495 0.000000 1050 +shown 0 14 4.317488 0.000000 1080 +wait 0 13 4.382027 0.000000 1168 +philosophi 0 13 4.382027 0.000000 1167 +neat 0 12 4.465908 0.000000 1263 +entertain 0 12 4.465908 0.000000 1286 +holidai 0 12 4.465908 0.000000 1224 +see 0 11 4.553877 0.000000 1337 +fix 0 11 4.553877 0.000000 1327 +leader 0 9 4.753590 0.000000 1576 +mainten 0 9 4.753590 0.000000 1543 +told 0 8 4.875197 0.000000 1658 +scout 1 7 5.010635 5.010635 1903 +molecular 0 7 5.010635 0.000000 1887 +explain 0 7 5.010635 0.000000 1816 +monei 0 7 5.010635 0.000000 1934 +philosoph 0 7 5.010635 0.000000 1904 +meant 0 6 5.164786 0.000000 2055 +lucki 0 6 5.164786 0.000000 2163 +mac 0 5 5.347108 0.000000 2292 +registr 0 5 5.347108 0.000000 2249 +commod 0 5 5.347108 0.000000 2415 +girlfriend 0 5 5.347108 0.000000 2579 +billi 0 5 5.347108 0.000000 2404 +couldn 0 4 5.568345 0.000000 2977 +green 0 4 5.568345 0.000000 2848 +pageer 0 3 5.857933 0.000000 3776 +ofwisconsin 0 3 5.857933 0.000000 4002 +pete 0 3 5.857933 0.000000 3865 +specialist 0 3 5.857933 0.000000 3319 +lauri 0 3 5.857933 0.000000 3867 +wit 0 3 5.857933 0.000000 4005 +popul 0 3 5.857933 0.000000 3235 +facstaff 0 3 5.857933 0.000000 3433 +hazen 0 2 6.263398 0.000000 6143 +calcari 0 2 6.263398 0.000000 6144 +devri 0 2 6.263398 0.000000 6145 +broken 0 2 6.263398 0.000000 5074 +mice 0 2 6.263398 0.000000 5069 +shameless 0 2 6.263398 0.000000 6146 +salon 0 2 6.263398 0.000000 5827 +nixon 0 2 6.263398 0.000000 5868 +hazennon 0 1 6.957497 0.000000 18198 +professorroom 0 1 6.957497 0.000000 18199 +fornet 0 1 6.957497 0.000000 18200 +elegantli 0 1 6.957497 0.000000 18201 +fearless 0 1 6.957497 0.000000 18202 +withtech 0 1 6.957497 0.000000 18203 +capitalist 0 1 6.957497 0.000000 18204 +pragmatist 0 1 6.957497 0.000000 18205 +metaphys 0 1 6.957497 0.000000 18206 +makethi 0 1 6.957497 0.000000 18207 +drosophila 0 1 6.957497 0.000000 18208 +geneticist 0 1 6.957497 0.000000 18209 +ezin 0 1 6.957497 0.000000 18210 +shockwav 0 1 6.957497 0.000000 18211 +kudon 0 1 6.957497 0.000000 18212 +quicktimevr 0 1 6.957497 0.000000 18213 +documentari 0 1 6.957497 0.000000 18214 +plight 0 1 6.957497 0.000000 18215 +bosnia 0 1 6.957497 0.000000 18216 +uproot 0 1 6.957497 0.000000 18217 +preslei 0 1 6.957497 0.000000 18218 +meetingsejhazen 0 1 6.957497 0.000000 18219 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..3b3f62c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +offic 0 299 1.098612 0.000000 13 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +illinoi 0 7 5.010635 0.000000 1941 +tina 0 3 5.857933 0.000000 3744 +urbana 0 3 5.857933 0.000000 3879 +eliassi 1 2 6.263398 6.263398 6147 +champaign 0 2 6.263398 0.000000 5671 +pagetina 0 1 6.957497 0.000000 18220 +bldgphone 0 1 6.957497 0.000000 18221 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..0348a464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html @@ -0,0 +1 @@ +term, tf, in documents count, idf, tfidf, wordid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..a74194e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +parallel 0 169 1.791759 0.000000 60 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +assign 0 135 1.945910 0.000000 66 +high 0 130 2.079442 0.000000 101 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +topic 0 114 2.197225 0.000000 110 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +west 0 83 2.484907 0.000000 192 +level 0 87 2.484907 0.000000 180 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +april 0 77 2.564949 0.000000 196 +resum 0 79 2.564949 0.000000 217 +symposium 0 72 2.639057 0.000000 238 +street 0 63 2.772589 0.000000 293 +instruct 0 53 2.944439 0.000000 332 +advisor 0 51 2.995732 0.000000 355 +approach 0 48 3.044522 0.000000 366 +anoth 0 45 3.135494 0.000000 408 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +jame 0 35 3.401197 0.000000 507 +steve 0 29 3.583519 0.000000 594 +trace 0 25 3.737670 0.000000 677 +smith 0 20 3.951244 0.000000 820 +eric 0 19 4.007333 0.000000 870 +predict 0 19 4.007333 0.000000 855 +latenc 0 16 4.174387 0.000000 993 +condit 0 16 4.174387 0.000000 975 +drive 0 15 4.248495 0.000000 1052 +johnson 0 13 4.382027 0.000000 1162 +bandwidth 0 11 4.553877 0.000000 1365 +branch 0 11 4.553877 0.000000 1318 +multiscalar 0 8 4.875197 0.000000 1783 +erik 0 8 4.875197 0.000000 1701 +microarchitectur 0 6 5.164786 0.000000 2238 +fetch 0 5 5.347108 0.000000 2567 +kestrel 0 4 5.568345 0.000000 2990 +confid 0 3 5.857933 0.000000 3691 +bennett 0 3 5.857933 0.000000 4024 +cold 0 3 5.857933 0.000000 3637 +rotenberg 0 1 6.957497 0.000000 18222 +passsth 0 1 6.957497 0.000000 18223 +budweisth 0 1 6.957497 0.000000 18224 +ericro 0 1 6.957497 0.000000 18225 +smithresearch 0 1 6.957497 0.000000 18226 +mispredict 0 1 6.957497 0.000000 18227 +tolerancepubl 0 1 6.957497 0.000000 18228 +jacobsen 0 1 6.957497 0.000000 18229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..abceec48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +phone 0 175 1.791759 0.000000 45 +like 0 132 1.945910 0.000000 81 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +west 0 83 2.484907 0.000000 192 +school 0 84 2.484907 0.000000 188 +june 0 79 2.564949 0.000000 214 +decemb 0 80 2.564949 0.000000 215 +would 0 67 2.708050 0.000000 251 +evalu 0 64 2.772589 0.000000 266 +streetmadison 0 38 3.295837 0.000000 474 +electr 0 38 3.295837 0.000000 461 +sciencesunivers 0 37 3.332205 0.000000 486 +next 0 34 3.401197 0.000000 517 +idea 0 32 3.465736 0.000000 545 +measur 0 28 3.610918 0.000000 609 +american 0 27 3.637586 0.000000 634 +rather 0 26 3.688879 0.000000 642 +miscellan 0 23 3.806662 0.000000 731 +listen 0 18 4.060443 0.000000 907 +drink 0 9 4.753590 0.000000 1607 +french 0 9 4.753590 0.000000 1511 +assistantdepart 0 8 4.875197 0.000000 1784 +fail 0 8 4.875197 0.000000 1655 +convers 0 8 4.875197 0.000000 1673 +architect 0 8 4.875197 0.000000 1624 +partner 0 8 4.875197 0.000000 1648 +hack 0 7 5.010635 0.000000 1950 +babak 0 5 5.347108 0.000000 2584 +falsafi 0 5 5.347108 0.000000 2585 +suni 0 5 5.347108 0.000000 2452 +queen 0 4 5.568345 0.000000 2919 +buffalo 0 2 6.263398 0.000000 4947 +usatel 0 2 6.263398 0.000000 6111 +shubu 0 2 6.263398 0.000000 6148 +crime 0 2 6.263398 0.000000 5972 +mentorcultresearch 0 1 6.957497 0.000000 18230 +modelseduc 0 1 6.957497 0.000000 18231 +morf 0 1 6.957497 0.000000 18232 +dionosi 0 1 6.957497 0.000000 18233 +hillari 0 1 6.957497 0.000000 18234 +profan 0 1 6.957497 0.000000 18235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..84920d16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +avail 0 169 1.791759 0.000000 48 +problem 1 147 1.945910 1.945910 75 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +mathemat 0 108 2.197225 0.000000 123 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +look 0 107 2.197225 0.000000 115 +techniqu 0 99 2.302585 0.000000 138 +associ 0 93 2.397895 0.000000 151 +center 0 88 2.397895 0.000000 158 +larg 0 82 2.484907 0.000000 168 +member 0 84 2.484907 0.000000 165 +west 0 83 2.484907 0.000000 192 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +street 0 63 2.772589 0.000000 293 +januari 0 62 2.772589 0.000000 264 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +telephon 0 50 3.044522 0.000000 373 +numer 0 49 3.044522 0.000000 369 +effect 0 46 3.091042 0.000000 385 +electron 0 47 3.091042 0.000000 379 +linear 0 41 3.218876 0.000000 431 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +purpos 0 37 3.332205 0.000000 481 +michael 0 35 3.401197 0.000000 514 +within 0 33 3.433987 0.000000 525 +graph 0 30 3.555348 0.000000 576 +consid 0 29 3.583519 0.000000 590 +scale 0 28 3.610918 0.000000 613 +framework 0 28 3.610918 0.000000 606 +determin 0 27 3.637586 0.000000 630 +relev 0 26 3.688879 0.000000 637 +emphasi 0 22 3.850148 0.000000 755 +properti 0 22 3.850148 0.000000 749 +identifi 0 22 3.850148 0.000000 760 +path 0 21 3.912023 0.000000 778 +exploit 0 20 3.951244 0.000000 836 +mostli 0 19 4.007333 0.000000 869 +cambridg 0 16 4.174387 0.000000 1008 +pagec 0 15 4.248495 0.000000 1011 +nonlinear 0 14 4.317488 0.000000 1107 +consider 0 14 4.317488 0.000000 1076 +econom 0 13 4.382027 0.000000 1184 +directli 0 13 4.382027 0.000000 1141 +robust 0 12 4.465908 0.000000 1271 +success 0 10 4.653960 0.000000 1390 +traffic 0 10 4.653960 0.000000 1421 +underli 0 10 4.653960 0.000000 1410 +ferri 0 8 4.875197 0.000000 1715 +pivot 0 5 5.347108 0.000000 2426 +chemic 0 5 5.347108 0.000000 2552 +condor 0 5 5.347108 0.000000 2577 +complementar 0 3 5.857933 0.000000 3999 +engineeringand 0 3 5.857933 0.000000 3779 +congest 0 3 5.857933 0.000000 3993 +followingtechniqu 0 2 6.263398 0.000000 5514 +equilibria 0 2 6.263398 0.000000 4760 +taxat 0 2 6.263398 0.000000 4524 +toll 0 2 6.263398 0.000000 6149 +arealso 0 2 6.263398 0.000000 5650 +beinginvestig 0 2 6.263398 0.000000 5745 +variationalinequ 0 1 6.957497 0.000000 18236 +toproblem 0 1 6.957497 0.000000 18237 +andinterfac 0 1 6.957497 0.000000 18238 +beingconsid 0 1 6.957497 0.000000 18239 +oncarbon 0 1 6.957497 0.000000 18240 +emiss 0 1 6.957497 0.000000 18241 +solvingproblem 0 1 6.957497 0.000000 18242 +partitioningtechniqu 0 1 6.957497 0.000000 18243 +forexploit 0 1 6.957497 0.000000 18244 +underlyingmodel 0 1 6.957497 0.000000 18245 +cpnet 0 1 6.957497 0.000000 18246 +prgram 0 1 6.957497 0.000000 18247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..ed5216ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,255 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +perform 0 143 1.945910 0.000000 74 +file 0 132 1.945910 0.000000 70 +dayton 0 119 2.079442 0.000000 104 +welcom 0 122 2.079442 0.000000 99 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +world 0 115 2.197225 0.000000 126 +make 0 111 2.197225 0.000000 120 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +place 0 106 2.197225 0.000000 124 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +comment 0 93 2.397895 0.000000 146 +present 0 91 2.397895 0.000000 145 +pictur 0 89 2.397895 0.000000 160 +homepag 0 93 2.397895 0.000000 148 +octob 0 89 2.397895 0.000000 156 +learn 1 86 2.484907 2.484907 170 +environ 0 84 2.484907 0.000000 177 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +intellig 0 72 2.639057 0.000000 225 +david 0 71 2.639057 0.000000 232 +free 0 73 2.639057 0.000000 224 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +goal 0 66 2.708050 0.000000 250 +degre 0 69 2.708050 0.000000 259 +order 0 69 2.708050 0.000000 249 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +import 0 65 2.772589 0.000000 282 +street 0 63 2.772589 0.000000 293 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +copi 0 63 2.772589 0.000000 284 +function 0 62 2.772589 0.000000 275 +visit 0 63 2.772589 0.000000 288 +virtual 0 62 2.772589 0.000000 285 +plai 0 60 2.833213 0.000000 307 +browser 0 56 2.890372 0.000000 313 +explor 0 58 2.890372 0.000000 324 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +point 0 58 2.890372 0.000000 319 +think 0 57 2.890372 0.000000 314 +allow 0 53 2.944439 0.000000 333 +advisor 0 51 2.995732 0.000000 355 +finger 0 52 2.995732 0.000000 354 +case 0 51 2.995732 0.000000 351 +investig 0 51 2.995732 0.000000 353 +date 0 51 2.995732 0.000000 344 +understand 0 47 3.091042 0.000000 384 +adapt 0 46 3.091042 0.000000 387 +featur 0 46 3.091042 0.000000 386 +math 0 44 3.135494 0.000000 402 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +netscap 0 44 3.135494 0.000000 395 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +term 0 43 3.178054 0.000000 411 +music 0 42 3.218876 0.000000 436 +combin 0 42 3.218876 0.000000 421 +late 0 40 3.258097 0.000000 439 +littl 0 39 3.258097 0.000000 454 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +represent 0 35 3.401197 0.000000 512 +next 0 34 3.401197 0.000000 517 +word 0 34 3.401197 0.000000 508 +dissert 0 32 3.465736 0.000000 549 +kind 0 32 3.465736 0.000000 541 +express 0 32 3.465736 0.000000 540 +independ 0 32 3.465736 0.000000 548 +posit 0 31 3.496508 0.000000 552 +specifi 0 30 3.555348 0.000000 568 +platform 0 29 3.583519 0.000000 591 +measur 0 28 3.610918 0.000000 609 +hope 0 28 3.610918 0.000000 610 +actual 0 28 3.610918 0.000000 604 +releas 0 28 3.610918 0.000000 616 +relev 0 26 3.688879 0.000000 637 +bookmark 0 26 3.688879 0.000000 639 +wai 0 25 3.737670 0.000000 662 +task 0 25 3.737670 0.000000 678 +daili 0 24 3.761200 0.000000 706 +reach 0 24 3.761200 0.000000 688 +input 0 23 3.806662 0.000000 727 +head 0 23 3.806662 0.000000 732 +instead 0 22 3.850148 0.000000 756 +output 0 21 3.912023 0.000000 788 +tell 0 21 3.912023 0.000000 777 +grad 0 20 3.951244 0.000000 837 +exploit 0 20 3.951244 0.000000 836 +basi 0 20 3.951244 0.000000 828 +left 0 19 4.007333 0.000000 851 +feedback 0 19 4.007333 0.000000 854 +citi 0 19 4.007333 0.000000 874 +listen 0 18 4.060443 0.000000 907 +whether 0 17 4.110874 0.000000 918 +dilbert 0 16 4.174387 0.000000 996 +action 0 15 4.248495 0.000000 1038 +contribut 0 15 4.248495 0.000000 1021 +balanc 0 14 4.317488 0.000000 1112 +weak 0 13 4.382027 0.000000 1159 +hotlist 0 13 4.382027 0.000000 1199 +nasa 0 13 4.382027 0.000000 1188 +employ 0 12 4.465908 0.000000 1291 +minor 0 12 4.465908 0.000000 1237 +michigan 0 11 4.553877 0.000000 1368 +smart 0 11 4.553877 0.000000 1352 +abil 0 11 4.553877 0.000000 1341 +sens 0 11 4.553877 0.000000 1305 +bill 0 11 4.553877 0.000000 1297 +rapid 0 10 4.653960 0.000000 1453 +traffic 0 10 4.653960 0.000000 1421 +fellowship 0 10 4.653960 0.000000 1460 +true 0 10 4.653960 0.000000 1422 +volleybal 0 9 4.753590 0.000000 1598 +pair 0 9 4.753590 0.000000 1503 +editori 0 9 4.753590 0.000000 1611 +star 0 8 4.875197 0.000000 1717 +grew 0 8 4.875197 0.000000 1742 +gain 0 8 4.875197 0.000000 1730 +irregular 0 8 4.875197 0.000000 1768 +on 0 8 4.875197 0.000000 1628 +extract 0 8 4.875197 0.000000 1728 +opinion 0 8 4.875197 0.000000 1708 +tourist 0 8 4.875197 0.000000 1710 +earn 0 7 5.010635 0.000000 1788 +notion 0 7 5.010635 0.000000 1947 +piano 0 6 5.164786 0.000000 2201 +benefit 0 6 5.164786 0.000000 2213 +variant 0 6 5.164786 0.000000 2043 +gate 0 6 5.164786 0.000000 2182 +sponsor 0 6 5.164786 0.000000 2133 +grand 0 5 5.347108 0.000000 2425 +race 0 5 5.347108 0.000000 2417 +treat 0 5 5.347108 0.000000 2521 +focuss 0 5 5.347108 0.000000 2271 +blow 0 5 5.347108 0.000000 2407 +reinforc 0 4 5.568345 0.000000 2674 +thumb 0 4 5.568345 0.000000 2816 +sorri 0 4 5.568345 0.000000 3059 +trek 0 3 5.857933 0.000000 4025 +trumpet 0 3 5.857933 0.000000 3946 +arm 0 3 5.857933 0.000000 3697 +neg 0 3 5.857933 0.000000 3451 +teacher 0 3 5.857933 0.000000 3892 +thesystem 0 3 5.857933 0.000000 3881 +interv 0 3 5.857933 0.000000 3253 +wit 0 3 5.857933 0.000000 4005 +bibl 0 3 5.857933 0.000000 3143 +glenn 0 3 5.857933 0.000000 3869 +gould 0 3 5.857933 0.000000 3559 +scienceher 0 2 6.263398 0.000000 5912 +essenc 0 2 6.263398 0.000000 6150 +agood 0 2 6.263398 0.000000 5380 +pagefor 0 2 6.263398 0.000000 6151 +nextstep 0 2 6.263398 0.000000 6102 +foral 0 2 6.263398 0.000000 4290 +isthmu 0 2 6.263398 0.000000 6152 +pagesom 0 2 6.263398 0.000000 6109 +finton 0 1 6.957497 0.000000 18248 +openstep 0 1 6.957497 0.000000 18249 +nerdin 0 1 6.957497 0.000000 18250 +intelligenceher 0 1 6.957497 0.000000 18251 +softwarefor 0 1 6.957497 0.000000 18252 +trusti 0 1 6.957497 0.000000 18253 +nextstationor 0 1 6.957497 0.000000 18254 +enjoyplai 0 1 6.957497 0.000000 18255 +longhair 0 1 6.957497 0.000000 18256 +intervarsityfolk 0 1 6.957497 0.000000 18257 +supersoak 0 1 6.957497 0.000000 18258 +accountto 0 1 6.957497 0.000000 18259 +intelligenti 0 1 6.957497 0.000000 18260 +intelligencei 0 1 6.957497 0.000000 18261 +actappropri 0 1 6.957497 0.000000 18262 +todistinguish 0 1 6.957497 0.000000 18263 +orimport 0 1 6.957497 0.000000 18264 +basedfeatur 0 1 6.957497 0.000000 18265 +learningprocess 0 1 6.957497 0.000000 18266 +intelligentadapt 0 1 6.957497 0.000000 18267 +whichwil 0 1 6.957497 0.000000 18268 +hotlistthi 0 1 6.957497 0.000000 18269 +omniweb 0 1 6.957497 0.000000 18270 +eleg 0 1 6.957497 0.000000 18271 +omniwebi 0 1 6.957497 0.000000 18272 +responseto 0 1 6.957497 0.000000 18273 +jehovah 0 1 6.957497 0.000000 18274 +deiti 0 1 6.957497 0.000000 18275 +christwisconsin 0 1 6.957497 0.000000 18276 +intervars 0 1 6.957497 0.000000 18277 +weatherin 0 1 6.957497 0.000000 18278 +nebula 0 1 6.957497 0.000000 18279 +crosssearch 0 1 6.957497 0.000000 18280 +farsid 0 1 6.957497 0.000000 18281 +voyagerent 0 1 6.957497 0.000000 18282 +zoneroam 0 1 6.957497 0.000000 18283 +stereogram 0 1 6.957497 0.000000 18284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..c94ef463 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +model 0 145 1.945910 0.000000 69 +architectur 0 139 1.945910 0.000000 77 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +complet 0 77 2.564949 0.000000 208 +decemb 0 80 2.564949 0.000000 215 +messag 0 76 2.564949 0.000000 212 +know 0 80 2.564949 0.000000 198 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +involv 0 71 2.639057 0.000000 227 +symposium 0 72 2.639057 0.000000 238 +logic 0 71 2.639057 0.000000 230 +august 0 66 2.708050 0.000000 257 +practic 0 70 2.708050 0.000000 246 +integr 0 67 2.708050 0.000000 245 +januari 0 62 2.772589 0.000000 264 +evalu 0 64 2.772589 0.000000 266 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +septemb 0 65 2.772589 0.000000 274 +best 0 59 2.833213 0.000000 299 +juli 0 60 2.833213 0.000000 305 +share 0 59 2.833213 0.000000 304 +automat 0 61 2.833213 0.000000 306 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +approach 0 48 3.044522 0.000000 366 +pointer 0 48 3.044522 0.000000 368 +telephon 0 50 3.044522 0.000000 373 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +effect 0 46 3.091042 0.000000 385 +possibl 0 47 3.091042 0.000000 378 +execut 0 45 3.135494 0.000000 404 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +error 0 40 3.258097 0.000000 449 +transact 0 39 3.258097 0.000000 438 +must 0 40 3.258097 0.000000 442 +littl 0 39 3.258097 0.000000 454 +author 0 39 3.258097 0.000000 450 +correct 0 38 3.295837 0.000000 462 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +procedur 0 36 3.367296 0.000000 488 +especi 0 36 3.367296 0.000000 496 +soon 0 36 3.367296 0.000000 494 +short 0 36 3.367296 0.000000 499 +least 0 35 3.401197 0.000000 516 +global 0 34 3.401197 0.000000 520 +concurr 0 34 3.401197 0.000000 501 +john 0 33 3.433987 0.000000 532 +express 0 32 3.465736 0.000000 540 +extend 0 32 3.465736 0.000000 539 +richard 0 31 3.496508 0.000000 559 +focu 0 30 3.555348 0.000000 571 +graph 0 30 3.555348 0.000000 576 +common 0 30 3.555348 0.000000 574 +rang 0 30 3.555348 0.000000 565 +semant 0 29 3.583519 0.000000 587 +steve 0 29 3.583519 0.000000 594 +multiprocessor 0 28 3.610918 0.000000 605 +load 0 28 3.610918 0.000000 601 +framework 0 28 3.610918 0.000000 606 +arrai 0 27 3.637586 0.000000 627 +revis 0 26 3.688879 0.000000 640 +wai 0 25 3.737670 0.000000 662 +store 0 24 3.761200 0.000000 693 +flow 0 24 3.761200 0.000000 700 +william 0 22 3.850148 0.000000 765 +color 0 22 3.850148 0.000000 762 +almost 0 22 3.850148 0.000000 742 +avoid 0 21 3.912023 0.000000 799 +among 0 21 3.912023 0.000000 781 +programminglanguag 0 21 3.912023 0.000000 782 +alloc 0 20 3.951244 0.000000 821 +exploit 0 20 3.951244 0.000000 836 +supervis 0 20 3.951244 0.000000 840 +facil 0 20 3.951244 0.000000 814 +definit 0 19 4.007333 0.000000 864 +seem 0 18 4.060443 0.000000 899 +regist 0 17 4.110874 0.000000 938 +steven 0 17 4.110874 0.000000 953 +monitor 0 17 4.110874 0.000000 941 +ultim 0 17 4.110874 0.000000 943 +analyz 0 17 4.110874 0.000000 925 +anyon 0 17 4.110874 0.000000 916 +modern 0 16 4.174387 0.000000 966 +easi 0 16 4.174387 0.000000 969 +todd 0 15 4.248495 0.000000 1051 +mayb 0 15 4.248495 0.000000 1014 +indic 0 15 4.248495 0.000000 1013 +driven 0 15 4.248495 0.000000 1048 +attribut 0 14 4.317488 0.000000 1092 +polynomi 0 14 4.317488 0.000000 1069 +demand 0 14 4.317488 0.000000 1073 +split 0 14 4.317488 0.000000 1078 +charl 0 13 4.382027 0.000000 1149 +context 0 13 4.382027 0.000000 1153 +sigplan 0 13 4.382027 0.000000 1190 +care 0 13 4.382027 0.000000 1177 +johnson 0 13 4.382027 0.000000 1162 +bruce 0 12 4.465908 0.000000 1226 +benjamin 0 11 4.553877 0.000000 1296 +stephen 0 11 4.553877 0.000000 1342 +transpar 0 11 4.553877 0.000000 1325 +arithmet 0 10 4.653960 0.000000 1388 +routin 0 9 4.753590 0.000000 1549 +minimum 0 9 4.753590 0.000000 1555 +cum 0 8 4.875197 0.000000 1619 +sensit 0 8 4.875197 0.000000 1726 +fischer 0 7 5.010635 0.000000 1893 +delai 0 7 5.010635 0.000000 1848 +pipelin 0 7 5.010635 0.000000 1830 +appar 0 7 5.010635 0.000000 1958 +zero 0 7 5.010635 0.000000 1896 +bookstor 0 7 5.010635 0.000000 1837 +grammar 0 6 5.164786 0.000000 2058 +benefit 0 6 5.164786 0.000000 2213 +mistak 0 6 5.164786 0.000000 2110 +sigact 0 6 5.164786 0.000000 2212 +unnecessari 0 5 5.347108 0.000000 2506 +explicitli 0 5 5.347108 0.000000 2308 +quantifi 0 5 5.347108 0.000000 2525 +attract 0 5 5.347108 0.000000 2356 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +pars 0 5 5.347108 0.000000 2321 +interprocedur 0 4 5.568345 0.000000 2771 +vital 0 4 5.568345 0.000000 2733 +popl 0 4 5.568345 0.000000 3068 +gregori 0 4 5.568345 0.000000 2928 +teachingc 0 3 5.857933 0.000000 3614 +domin 0 3 5.857933 0.000000 3995 +likelihood 0 3 5.857933 0.000000 3172 +topla 0 3 5.857933 0.000000 3563 +retarget 0 3 5.857933 0.000000 3994 +syntact 0 2 6.263398 0.000000 5552 +insoftwar 0 2 6.263398 0.000000 4932 +everywher 0 2 6.263398 0.000000 5690 +educationph 0 2 6.263398 0.000000 6112 +milton 0 2 6.263398 0.000000 6153 +bernard 0 2 6.263398 0.000000 5894 +dion 0 2 6.263398 0.000000 5856 +venkatesh 0 2 6.263398 0.000000 6154 +nbsp 1 1 6.957497 6.957497 18285 +kurland 0 1 6.957497 0.000000 18286 +proebst 0 1 6.957497 0.000000 18287 +harish 0 1 6.957497 0.000000 18288 +patil 0 1 6.957497 0.000000 18289 +nbspcharl 0 1 6.957497 0.000000 18290 +nbspprofessor 0 1 6.957497 0.000000 18291 +nbspunivers 0 1 6.957497 0.000000 18292 +enormouscap 0 1 6.957497 0.000000 18293 +haveinvestig 0 1 6.957497 0.000000 18294 +registerresid 0 1 6.957497 0.000000 18295 +loadsand 0 1 6.957497 0.000000 18296 +theprocedur 0 1 6.957497 0.000000 18297 +studiedinterprocedur 0 1 6.957497 0.000000 18298 +modelsthat 0 1 6.957497 0.000000 18299 +optimallyalloc 0 1 6.957497 0.000000 18300 +toautomat 0 1 6.957497 0.000000 18301 +orno 0 1 6.957497 0.000000 18302 +slowdown 0 1 6.957497 0.000000 18303 +inacm 0 1 6.957497 0.000000 18304 +activitiesa 0 1 6.957497 0.000000 18305 +cytronand 0 1 6.957497 0.000000 18306 +studentsdonn 0 1 6.957497 0.000000 18307 +rowland 0 1 6.957497 0.000000 18308 +skedzielewski 0 1 6.957497 0.000000 18309 +reevalu 0 1 6.957497 0.000000 18310 +corrector 0 1 6.957497 0.000000 18311 +sensitivepars 0 1 6.957497 0.000000 18312 +mahadevan 0 1 6.957497 0.000000 18313 +ganapathi 0 1 6.957497 0.000000 18314 +vimal 0 1 6.957497 0.000000 18315 +begwami 0 1 6.957497 0.000000 18316 +maunei 0 1 6.957497 0.000000 18317 +anil 0 1 6.957497 0.000000 18318 +winsborough 0 1 6.957497 0.000000 18319 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..96e08961 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,260 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +follow 0 92 2.397895 0.000000 143 +octob 0 89 2.397895 0.000000 156 +larg 0 82 2.484907 0.000000 168 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +second 0 81 2.484907 0.000000 166 +appear 0 78 2.564949 0.000000 210 +interfac 0 79 2.564949 0.000000 209 +complet 0 77 2.564949 0.000000 208 +dynam 0 76 2.564949 0.000000 194 +optim 0 79 2.564949 0.000000 197 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +involv 0 71 2.639057 0.000000 227 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +main 0 67 2.708050 0.000000 256 +differ 0 66 2.708050 0.000000 253 +simul 0 66 2.708050 0.000000 255 +januari 0 62 2.772589 0.000000 264 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +processor 0 54 2.944439 0.000000 335 +februari 0 54 2.944439 0.000000 328 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +possibl 0 47 3.091042 0.000000 378 +effect 0 46 3.091042 0.000000 385 +execut 0 45 3.135494 0.000000 404 +protocol 0 45 3.135494 0.000000 407 +mechan 0 43 3.178054 0.000000 416 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +close 0 38 3.295837 0.000000 465 +purpos 0 37 3.332205 0.000000 481 +cost 0 37 3.332205 0.000000 480 +jame 0 35 3.401197 0.000000 507 +least 0 35 3.401197 0.000000 516 +extend 0 32 3.465736 0.000000 539 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +specifi 0 30 3.555348 0.000000 568 +synchron 0 29 3.583519 0.000000 588 +focus 0 29 3.583519 0.000000 584 +limit 0 29 3.583519 0.000000 585 +platform 0 29 3.583519 0.000000 591 +particip 0 29 3.583519 0.000000 589 +scale 0 28 3.610918 0.000000 613 +multiprocessor 0 28 3.610918 0.000000 605 +cluster 0 28 3.610918 0.000000 612 +repres 0 26 3.688879 0.000000 656 +consist 0 26 3.688879 0.000000 651 +bound 0 26 3.688879 0.000000 659 +todai 0 25 3.737670 0.000000 672 +wai 0 25 3.737670 0.000000 662 +supercomput 0 25 3.737670 0.000000 681 +scalabl 0 24 3.761200 0.000000 705 +pattern 0 24 3.761200 0.000000 689 +hierarchi 0 22 3.850148 0.000000 744 +chip 0 21 3.912023 0.000000 770 +alumni 0 21 3.912023 0.000000 807 +increas 0 20 3.951244 0.000000 829 +exploit 0 20 3.951244 0.000000 836 +separ 0 19 4.007333 0.000000 844 +along 0 18 4.060443 0.000000 878 +element 0 18 4.060443 0.000000 895 +minim 0 18 4.060443 0.000000 887 +wind 0 18 4.060443 0.000000 908 +scott 0 18 4.060443 0.000000 884 +lower 0 18 4.060443 0.000000 886 +interconnect 0 17 4.110874 0.000000 937 +layer 0 17 4.110874 0.000000 926 +latenc 0 16 4.174387 0.000000 993 +capabl 0 15 4.248495 0.000000 1016 +massiv 0 15 4.248495 0.000000 1026 +overhead 0 15 4.248495 0.000000 1035 +hierarch 0 15 4.248495 0.000000 1018 +coher 0 14 4.317488 0.000000 1109 +conduct 0 14 4.317488 0.000000 1065 +topolog 0 14 4.317488 0.000000 1089 +johnson 0 13 4.382027 0.000000 1162 +central 0 13 4.382027 0.000000 1160 +individu 0 13 4.382027 0.000000 1126 +optic 0 12 4.465908 0.000000 1221 +mari 0 12 4.465908 0.000000 1266 +grow 0 12 4.465908 0.000000 1209 +bandwidth 0 11 4.553877 0.000000 1365 +primit 0 11 4.553877 0.000000 1317 +evolut 0 11 4.553877 0.000000 1314 +impact 0 11 4.553877 0.000000 1334 +extrem 0 11 4.553877 0.000000 1330 +penalti 0 10 4.653960 0.000000 1405 +modul 0 10 4.653960 0.000000 1434 +resid 0 10 4.653960 0.000000 1461 +label 0 10 4.653960 0.000000 1423 +queue 0 10 4.653960 0.000000 1386 +vernon 0 9 4.753590 0.000000 1556 +doug 0 9 4.753590 0.000000 1517 +elimin 0 9 4.753590 0.000000 1558 +lock 0 9 4.753590 0.000000 1551 +transport 0 8 4.875197 0.000000 1672 +univeristi 0 8 4.875197 0.000000 1754 +evan 0 8 4.875197 0.000000 1633 +goodman 0 7 5.010635 0.000000 1891 +burger 0 7 5.010635 0.000000 1889 +merg 0 7 5.010635 0.000000 1862 +migrat 0 7 5.010635 0.000000 1851 +microprocessor 0 7 5.010635 0.000000 1808 +serial 0 7 5.010635 0.000000 1975 +philip 0 6 5.164786 0.000000 2005 +onto 0 6 5.164786 0.000000 2089 +diagram 0 5 5.347108 0.000000 2346 +quantifi 0 5 5.347108 0.000000 2525 +galileo 0 4 5.568345 0.000000 3086 +medium 0 4 5.568345 0.000000 2834 +eventu 0 4 5.568345 0.000000 3074 +stefano 0 3 5.857933 0.000000 3372 +kaxira 0 3 5.857933 0.000000 3373 +arrow 0 3 5.857933 0.000000 3520 +bank 0 3 5.857933 0.000000 3920 +aswel 0 3 5.857933 0.000000 3286 +fresh 0 3 5.857933 0.000000 3706 +stein 0 3 5.857933 0.000000 3646 +alain 0 2 6.263398 0.000000 6086 +iram 0 2 6.263398 0.000000 4520 +datascalar 0 2 6.263398 0.000000 4518 +wisconsint 0 2 6.263398 0.000000 6155 +groupat 0 2 6.263398 0.000000 5677 +emphasison 0 2 6.263398 0.000000 4157 +extent 0 2 6.263398 0.000000 6080 +dram 0 2 6.263398 0.000000 4173 +spsd 0 2 6.263398 0.000000 4519 +declin 0 2 6.263398 0.000000 5385 +logarithm 0 2 6.263398 0.000000 5322 +multiprocessorsa 0 2 6.263398 0.000000 5455 +gjess 0 2 6.263398 0.000000 6156 +woest 0 1 6.957497 0.000000 18320 +nagi 0 1 6.957497 0.000000 18321 +contentsgalileoproject 0 1 6.957497 0.000000 18322 +descriptionpublicationsrel 0 1 6.957497 0.000000 18323 +projectssci 0 1 6.957497 0.000000 18324 +wisconsinproject 0 1 6.957497 0.000000 18325 +descriptionpublicationsproject 0 1 6.957497 0.000000 18326 +membersgalileo 0 1 6.957497 0.000000 18327 +wisconsingalileo 0 1 6.957497 0.000000 18328 +therelationship 0 1 6.957497 0.000000 18329 +futuresystem 0 1 6.957497 0.000000 18330 +issuabl 0 1 6.957497 0.000000 18331 +orlimit 0 1 6.957497 0.000000 18332 +capacityon 0 1 6.957497 0.000000 18333 +sizabl 0 1 6.957497 0.000000 18334 +fractionof 0 1 6.957497 0.000000 18335 +mopin 0 1 6.957497 0.000000 18336 +ofprocessor 0 1 6.957497 0.000000 18337 +eventuallyobvi 0 1 6.957497 0.000000 18338 +andlimit 0 1 6.957497 0.000000 18339 +systemsperform 0 1 6.957497 0.000000 18340 +theprocessor 0 1 6.957497 0.000000 18341 +spectrumcach 0 1 6.957497 0.000000 18342 +systemsdesign 0 1 6.957497 0.000000 18343 +systemprogram 0 1 6.957497 0.000000 18344 +bottlenecksdoug 0 1 6.957497 0.000000 18345 +modeldoug 0 1 6.957497 0.000000 18346 +microprocessorsdoug 0 1 6.957497 0.000000 18347 +microprocessorsdougla 0 1 6.957497 0.000000 18348 +berkeleyppram 0 1 6.957497 0.000000 18349 +kyushu 0 1 6.957497 0.000000 18350 +japansci 0 1 6.957497 0.000000 18351 +wisconsinour 0 1 6.957497 0.000000 18352 +coherentshar 0 1 6.957497 0.000000 18353 +coherentinterfac 0 1 6.957497 0.000000 18354 +qolb 0 1 6.957497 0.000000 18355 +pairwis 0 1 6.957497 0.000000 18356 +definitionfor 0 1 6.957497 0.000000 18357 +betweenprocess 0 1 6.957497 0.000000 18358 +structureseffici 0 1 6.957497 0.000000 18359 +extensionsaggress 0 1 6.957497 0.000000 18360 +multiprocessorswisconsin 0 1 6.957497 0.000000 18361 +tunneldougla 0 1 6.957497 0.000000 18362 +scijam 0 1 6.957497 0.000000 18363 +memoryross 0 1 6.957497 0.000000 18364 +aboulenein 0 1 6.957497 0.000000 18365 +ringsross 0 1 6.957497 0.000000 18366 +ringsteven 0 1 6.957497 0.000000 18367 +coherenceross 0 1 6.957497 0.000000 18368 +multiprocessorsphilip 0 1 6.957497 0.000000 18369 +multiprocessorjam 0 1 6.957497 0.000000 18370 +abouleneinross 0 1 6.957497 0.000000 18371 +johnsonstev 0 1 6.957497 0.000000 18372 +scottlast 0 1 6.957497 0.000000 18373 +dburger 0 1 6.957497 0.000000 18374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..5e21c7b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +structur 0 106 2.197225 0.000000 105 +peopl 0 96 2.302585 0.000000 132 +grade 0 90 2.397895 0.000000 142 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +paul 0 38 3.295837 0.000000 471 +post 0 35 3.401197 0.000000 505 +martin 0 21 3.912023 0.000000 794 +andrew 0 19 4.007333 0.000000 849 +jean 0 10 4.653960 0.000000 1440 +regent 0 5 5.347108 0.000000 2551 +geeri 1 3 5.857933 5.857933 3422 +albert 0 2 6.263398 0.000000 5987 +friedrich 0 2 6.263398 0.000000 5175 +madisonin 0 1 6.957497 0.000000 18375 +compsci 0 1 6.957497 0.000000 18376 +pontif 0 1 6.957497 0.000000 18377 +jacqu 0 1 6.957497 0.000000 18378 +derrida 0 1 6.957497 0.000000 18379 +heidegg 0 1 6.957497 0.000000 18380 +camu 0 1 6.957497 0.000000 18381 +sartr 0 1 6.957497 0.000000 18382 +nietzsch 0 1 6.957497 0.000000 18383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..07aaf7a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +madison 0 165 1.791759 0.000000 55 +avail 0 169 1.791759 0.000000 48 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +machin 0 129 2.079442 0.000000 95 +report 0 131 2.079442 0.000000 92 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +well 0 109 2.197225 0.000000 121 +book 0 99 2.302585 0.000000 131 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +imag 0 91 2.397895 0.000000 161 +pictur 0 89 2.397895 0.000000 160 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +orient 0 80 2.564949 0.000000 205 +main 0 67 2.708050 0.000000 256 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +share 0 59 2.833213 0.000000 304 +back 0 60 2.833213 0.000000 297 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +sever 0 56 2.890372 0.000000 322 +think 0 57 2.890372 0.000000 314 +sampl 0 53 2.944439 0.000000 339 +case 0 51 2.995732 0.000000 351 +right 0 48 3.044522 0.000000 363 +move 0 47 3.091042 0.000000 382 +netscap 0 44 3.135494 0.000000 395 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +fast 0 42 3.218876 0.000000 429 +continu 0 39 3.258097 0.000000 448 +movi 0 40 3.258097 0.000000 459 +small 0 39 3.258097 0.000000 447 +cost 0 37 3.332205 0.000000 480 +either 0 35 3.401197 0.000000 506 +given 0 32 3.465736 0.000000 538 +someth 0 31 3.496508 0.000000 554 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +load 0 28 3.610918 0.000000 601 +actual 0 28 3.610918 0.000000 604 +great 0 27 3.637586 0.000000 626 +bookmark 0 26 3.688879 0.000000 639 +sometim 0 24 3.761200 0.000000 696 +higher 0 24 3.761200 0.000000 690 +thank 0 23 3.806662 0.000000 721 +sent 0 22 3.850148 0.000000 763 +grad 0 20 3.951244 0.000000 837 +wrote 0 20 3.951244 0.000000 830 +mostli 0 19 4.007333 0.000000 869 +exercis 0 19 4.007333 0.000000 842 +stop 0 17 4.110874 0.000000 942 +mayb 0 15 4.248495 0.000000 1014 +purchas 0 15 4.248495 0.000000 1030 +floor 0 14 4.317488 0.000000 1070 +wait 0 13 4.382027 0.000000 1168 +walk 0 12 4.465908 0.000000 1281 +outsid 0 12 4.465908 0.000000 1219 +grow 0 12 4.465908 0.000000 1209 +noth 0 11 4.553877 0.000000 1328 +denni 0 11 4.553877 0.000000 1321 +calvin 0 9 4.753590 0.000000 1518 +claim 0 8 4.875197 0.000000 1664 +unifi 0 8 4.875197 0.000000 1774 +reload 0 8 4.875197 0.000000 1682 +told 0 8 4.875197 0.000000 1658 +accord 0 7 5.010635 0.000000 1826 +none 0 7 5.010635 0.000000 1811 +monei 0 7 5.010635 0.000000 1934 +christian 0 7 5.010635 0.000000 1949 +huge 0 6 5.164786 0.000000 1991 +handbook 0 6 5.164786 0.000000 2061 +gui 0 5 5.347108 0.000000 2573 +feet 0 5 5.347108 0.000000 2492 +anti 0 5 5.347108 0.000000 2434 +cheap 0 4 5.568345 0.000000 2751 +prospect 0 4 5.568345 0.000000 3013 +shelf 0 4 5.568345 0.000000 2621 +fork 0 4 5.568345 0.000000 2801 +kill 0 4 5.568345 0.000000 3000 +suppos 0 4 5.568345 0.000000 3002 +suffic 0 4 5.568345 0.000000 2869 +glass 0 3 5.857933 0.000000 3759 +dutch 0 3 5.857933 0.000000 3592 +influenc 0 3 5.857933 0.000000 3349 +cash 0 3 5.857933 0.000000 3355 +dabbl 0 3 5.857933 0.000000 3971 +forward 0 3 5.857933 0.000000 3784 +deposit 0 2 6.263398 0.000000 6095 +cooler 0 2 6.263398 0.000000 6023 +suspect 0 2 6.263398 0.000000 5187 +nearest 0 2 6.263398 0.000000 4922 +roommat 0 2 6.263398 0.000000 6157 +withno 0 2 6.263398 0.000000 5370 +eventhough 0 2 6.263398 0.000000 6158 +anyhow 0 2 6.263398 0.000000 5188 +killer 0 2 6.263398 0.000000 6159 +programmingin 0 2 6.263398 0.000000 4135 +ritchi 0 2 6.263398 0.000000 4306 +creator 0 2 6.263398 0.000000 5998 +gideon 0 1 6.957497 0.000000 18384 +tweak 0 1 6.957497 0.000000 18385 +toonion 0 1 6.957497 0.000000 18386 +seethi 0 1 6.957497 0.000000 18387 +blockbust 0 1 6.957497 0.000000 18388 +predica 0 1 6.957497 0.000000 18389 +dismal 0 1 6.957497 0.000000 18390 +donationto 0 1 6.957497 0.000000 18391 +defrai 0 1 6.957497 0.000000 18392 +orderscan 0 1 6.957497 0.000000 18393 +monro 0 1 6.957497 0.000000 18394 +usathank 0 1 6.957497 0.000000 18395 +unread 0 1 6.957497 0.000000 18396 +achil 0 1 6.957497 0.000000 18397 +cstechreport 0 1 6.957497 0.000000 18398 +otherstuff 0 1 6.957497 0.000000 18399 +averagewil 0 1 6.957497 0.000000 18400 +doofu 0 1 6.957497 0.000000 18401 +zippi 0 1 6.957497 0.000000 18402 +pinheadha 0 1 6.957497 0.000000 18403 +justtri 0 1 6.957497 0.000000 18404 +mozilla 0 1 6.957497 0.000000 18405 +buttonher 0 1 6.957497 0.000000 18406 +somethingin 0 1 6.957497 0.000000 18407 +hater 0 1 6.957497 0.000000 18408 +mailand 0 1 6.957497 0.000000 18409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..85d167c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,407 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +algorithm 0 162 1.791759 0.000000 57 +first 0 140 1.945910 0.000000 71 +architectur 0 139 1.945910 0.000000 77 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +schedul 0 119 2.079442 0.000000 85 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +version 0 113 2.197225 0.000000 122 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +imag 0 91 2.397895 0.000000 161 +sinc 0 90 2.397895 0.000000 159 +real 0 93 2.397895 0.000000 144 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +thing 0 84 2.484907 0.000000 189 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +control 0 82 2.484907 0.000000 164 +requir 0 81 2.484907 0.000000 167 +level 0 87 2.484907 0.000000 180 +educ 0 86 2.484907 0.000000 191 +internet 0 83 2.484907 0.000000 186 +build 0 85 2.484907 0.000000 184 +chang 0 82 2.484907 0.000000 163 +resum 0 79 2.564949 0.000000 217 +dynam 0 76 2.564949 0.000000 194 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +meet 1 72 2.639057 2.639057 229 +html 0 75 2.639057 0.000000 235 +involv 0 71 2.639057 0.000000 227 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +workshop 0 71 2.639057 0.000000 239 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +test 0 66 2.708050 0.000000 252 +integr 0 67 2.708050 0.000000 245 +view 0 70 2.708050 0.000000 254 +creat 0 63 2.772589 0.000000 277 +organ 0 65 2.772589 0.000000 265 +previou 0 62 2.772589 0.000000 290 +plan 0 65 2.772589 0.000000 272 +best 0 59 2.833213 0.000000 299 +unix 0 58 2.890372 0.000000 308 +summer 0 56 2.890372 0.000000 311 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +suggest 0 53 2.944439 0.000000 331 +instruct 0 53 2.944439 0.000000 332 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +week 0 52 2.995732 0.000000 343 +approach 0 48 3.044522 0.000000 366 +frequent 0 49 3.044522 0.000000 367 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +possibl 0 47 3.091042 0.000000 378 +get 0 46 3.091042 0.000000 380 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +keep 0 44 3.135494 0.000000 409 +even 0 45 3.135494 0.000000 393 +favorit 0 44 3.135494 0.000000 410 +textbook 0 44 3.135494 0.000000 397 +describ 0 45 3.135494 0.000000 400 +mark 0 44 3.135494 0.000000 403 +futur 0 41 3.218876 0.000000 427 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +compani 0 41 3.218876 0.000000 423 +linear 0 41 3.218876 0.000000 431 +past 0 42 3.218876 0.000000 428 +author 0 39 3.258097 0.000000 450 +form 0 39 3.258097 0.000000 443 +multipl 0 39 3.258097 0.000000 453 +programm 0 39 3.258097 0.000000 445 +must 0 40 3.258097 0.000000 442 +open 0 38 3.295837 0.000000 469 +brian 0 38 3.295837 0.000000 466 +credit 0 38 3.295837 0.000000 460 +microsoft 0 38 3.295837 0.000000 468 +connect 0 37 3.332205 0.000000 485 +workstat 0 37 3.332205 0.000000 479 +mean 0 37 3.332205 0.000000 477 +download 0 36 3.367296 0.000000 489 +tree 0 36 3.367296 0.000000 492 +least 0 35 3.401197 0.000000 516 +concurr 0 34 3.401197 0.000000 501 +print 0 34 3.401197 0.000000 503 +singl 0 34 3.401197 0.000000 510 +manual 0 35 3.401197 0.000000 504 +someth 0 31 3.496508 0.000000 554 +often 0 31 3.496508 0.000000 551 +domain 0 30 3.555348 0.000000 564 +synchron 0 29 3.583519 0.000000 588 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +usual 0 28 3.610918 0.000000 608 +static 0 27 3.637586 0.000000 619 +quit 0 27 3.637586 0.000000 633 +team 0 27 3.637586 0.000000 625 +rather 0 26 3.688879 0.000000 642 +enhanc 0 26 3.688879 0.000000 644 +although 0 25 3.737670 0.000000 667 +wai 0 25 3.737670 0.000000 662 +never 0 25 3.737670 0.000000 671 +alwai 0 24 3.761200 0.000000 691 +higher 0 24 3.761200 0.000000 690 +known 0 24 3.761200 0.000000 702 +mike 0 24 3.761200 0.000000 703 +miscellan 0 23 3.806662 0.000000 731 +self 0 22 3.850148 0.000000 761 +william 0 22 3.850148 0.000000 765 +sort 0 22 3.850148 0.000000 738 +identifi 0 22 3.850148 0.000000 760 +sent 0 22 3.850148 0.000000 763 +busi 0 21 3.912023 0.000000 784 +chip 0 21 3.912023 0.000000 770 +love 0 21 3.912023 0.000000 804 +divis 0 21 3.912023 0.000000 803 +similar 0 21 3.912023 0.000000 771 +voic 0 21 3.912023 0.000000 806 +watch 0 21 3.912023 0.000000 789 +tell 0 21 3.912023 0.000000 777 +avoid 0 21 3.912023 0.000000 799 +minut 0 20 3.951244 0.000000 810 +kernel 0 20 3.951244 0.000000 825 +portabl 0 20 3.951244 0.000000 819 +ever 0 19 4.007333 0.000000 872 +miss 0 19 4.007333 0.000000 866 +less 0 18 4.060443 0.000000 892 +element 0 18 4.060443 0.000000 895 +seem 0 18 4.060443 0.000000 899 +record 0 18 4.060443 0.000000 890 +aid 0 18 4.060443 0.000000 904 +thought 0 17 4.110874 0.000000 945 +anyon 0 17 4.110874 0.000000 916 +weekli 0 17 4.110874 0.000000 919 +intel 0 16 4.174387 0.000000 1000 +alreadi 0 16 4.174387 0.000000 963 +critic 0 16 4.174387 0.000000 982 +advantag 0 16 4.174387 0.000000 987 +configur 0 15 4.248495 0.000000 1012 +enough 0 15 4.248495 0.000000 1040 +piec 0 15 4.248495 0.000000 1020 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +becam 0 14 4.317488 0.000000 1117 +cannot 0 13 4.382027 0.000000 1144 +sai 0 13 4.382027 0.000000 1175 +care 0 13 4.382027 0.000000 1177 +central 0 13 4.382027 0.000000 1160 +everyth 0 13 4.382027 0.000000 1169 +weak 0 13 4.382027 0.000000 1159 +touch 0 12 4.465908 0.000000 1288 +assembl 0 12 4.465908 0.000000 1207 +usenix 0 12 4.465908 0.000000 1240 +went 0 12 4.465908 0.000000 1279 +overal 0 12 4.465908 0.000000 1254 +see 0 11 4.553877 0.000000 1337 +mapl 0 11 4.553877 0.000000 1376 +wood 0 11 4.553877 0.000000 1355 +fix 0 11 4.553877 0.000000 1327 +card 0 10 4.653960 0.000000 1435 +relationship 0 10 4.653960 0.000000 1383 +mainli 0 10 4.653960 0.000000 1432 +invit 0 10 4.653960 0.000000 1428 +bring 0 10 4.653960 0.000000 1430 +lock 0 9 4.753590 0.000000 1551 +trust 0 9 4.753590 0.000000 1583 +motorola 0 9 4.753590 0.000000 1546 +entitl 0 9 4.753590 0.000000 1490 +patterson 0 9 4.753590 0.000000 1554 +charg 0 9 4.753590 0.000000 1582 +login 0 9 4.753590 0.000000 1550 +prefer 0 9 4.753590 0.000000 1491 +admin 0 9 4.753590 0.000000 1476 +calendar 0 8 4.875197 0.000000 1649 +architect 0 8 4.875197 0.000000 1624 +claim 0 8 4.875197 0.000000 1664 +poor 0 8 4.875197 0.000000 1736 +perhap 0 8 4.875197 0.000000 1693 +driver 0 8 4.875197 0.000000 1657 +accomplish 0 8 4.875197 0.000000 1755 +mass 0 8 4.875197 0.000000 1732 +isol 0 8 4.875197 0.000000 1663 +realiz 0 8 4.875197 0.000000 1739 +montreal 0 7 5.010635 0.000000 1961 +serial 0 7 5.010635 0.000000 1975 +suffici 0 7 5.010635 0.000000 1897 +therefor 0 7 5.010635 0.000000 1822 +header 0 7 5.010635 0.000000 1787 +arrang 0 6 5.164786 0.000000 2023 +commit 0 6 5.164786 0.000000 2233 +sohi 0 6 5.164786 0.000000 2237 +versu 0 6 5.164786 0.000000 2052 +snow 0 6 5.164786 0.000000 2031 +sleep 0 6 5.164786 0.000000 2211 +microarchitectur 0 6 5.164786 0.000000 2238 +pentium 0 6 5.164786 0.000000 2077 +quickli 0 6 5.164786 0.000000 2000 +proce 0 6 5.164786 0.000000 2114 +relax 0 6 5.164786 0.000000 2120 +strip 0 6 5.164786 0.000000 2203 +apolog 0 6 5.164786 0.000000 2046 +optimist 0 5 5.347108 0.000000 2501 +pagethi 0 5 5.347108 0.000000 2336 +ship 0 5 5.347108 0.000000 2534 +default 0 5 5.347108 0.000000 2335 +patent 0 5 5.347108 0.000000 2574 +chapel 0 5 5.347108 0.000000 2457 +keeper 0 5 5.347108 0.000000 2569 +knew 0 5 5.347108 0.000000 2445 +exchang 0 5 5.347108 0.000000 2310 +adopt 0 5 5.347108 0.000000 2467 +hennessi 0 5 5.347108 0.000000 2289 +appreci 0 5 5.347108 0.000000 2374 +hate 0 5 5.347108 0.000000 2529 +recogn 0 5 5.347108 0.000000 2302 +truli 0 5 5.347108 0.000000 2476 +notabl 0 5 5.347108 0.000000 2276 +camp 0 5 5.347108 0.000000 2545 +andi 0 4 5.568345 0.000000 3081 +filesystem 0 4 5.568345 0.000000 2587 +unless 0 4 5.568345 0.000000 2607 +scribe 0 4 5.568345 0.000000 2631 +church 0 4 5.568345 0.000000 3011 +aspir 0 4 5.568345 0.000000 3019 +wear 0 4 5.568345 0.000000 2785 +trick 0 4 5.568345 0.000000 2967 +wander 0 4 5.568345 0.000000 2896 +wherea 0 4 5.568345 0.000000 2597 +cshrc 0 4 5.568345 0.000000 2759 +disconnect 0 4 5.568345 0.000000 2664 +pilot 0 3 5.857933 0.000000 4008 +warm 0 3 5.857933 0.000000 3904 +hacker 0 3 5.857933 0.000000 3996 +gould 0 3 5.857933 0.000000 3559 +urgent 0 3 5.857933 0.000000 3316 +fame 0 3 5.857933 0.000000 3793 +coin 0 3 5.857933 0.000000 3799 +harm 0 3 5.857933 0.000000 3515 +advertis 0 3 5.857933 0.000000 3788 +redesign 0 3 5.857933 0.000000 3540 +wilkinson 0 3 5.857933 0.000000 3579 +berlin 0 3 5.857933 0.000000 3263 +fascin 0 3 5.857933 0.000000 3948 +glew 0 2 6.263398 0.000000 4162 +pageandi 0 2 6.263398 0.000000 6096 +pope 0 2 6.263398 0.000000 5506 +parson 0 2 6.263398 0.000000 4528 +king 0 2 6.263398 0.000000 5737 +strand 0 2 6.263398 0.000000 5880 +chop 0 2 6.263398 0.000000 6160 +beard 0 2 6.263398 0.000000 6161 +constantli 0 2 6.263398 0.000000 4181 +verg 0 2 6.263398 0.000000 5488 +disagre 0 2 6.263398 0.000000 6105 +defunct 0 2 6.263398 0.000000 6162 +startup 0 2 6.263398 0.000000 4676 +clone 0 2 6.263398 0.000000 5833 +aitken 0 2 6.263398 0.000000 4941 +ubiquit 0 2 6.263398 0.000000 6049 +intervent 0 2 6.263398 0.000000 6163 +bother 0 2 6.263398 0.000000 6164 +advoc 0 1 6.957497 0.000000 18410 +beef 0 1 6.957497 0.000000 18411 +krazi 0 1 6.957497 0.000000 18412 +wannab 0 1 6.957497 0.000000 18413 +softwareto 0 1 6.957497 0.000000 18414 +teresa 0 1 6.957497 0.000000 18415 +largelyform 0 1 6.957497 0.000000 18416 +snippet 0 1 6.957497 0.000000 18417 +stylishor 0 1 6.957497 0.000000 18418 +summarycontact 0 1 6.957497 0.000000 18419 +addressescalendar 0 1 6.957497 0.000000 18420 +taker 0 1 6.957497 0.000000 18421 +priest 0 1 6.957497 0.000000 18422 +boyn 0 1 6.957497 0.000000 18423 +frost 0 1 6.957497 0.000000 18424 +almighti 0 1 6.957497 0.000000 18425 +dollar 0 1 6.957497 0.000000 18426 +bellow 0 1 6.957497 0.000000 18427 +ranter 0 1 6.957497 0.000000 18428 +preacher 0 1 6.957497 0.000000 18429 +beecher 0 1 6.957497 0.000000 18430 +harbour 0 1 6.957497 0.000000 18431 +deplor 0 1 6.957497 0.000000 18432 +churchmen 0 1 6.957497 0.000000 18433 +notori 0 1 6.957497 0.000000 18434 +atheist 0 1 6.957497 0.000000 18435 +chariti 0 1 6.957497 0.000000 18436 +sailor 0 1 6.957497 0.000000 18437 +chord 0 1 6.957497 0.000000 18438 +firewood 0 1 6.957497 0.000000 18439 +meal 0 1 6.957497 0.000000 18440 +manifesto 0 1 6.957497 0.000000 18441 +handbil 0 1 6.957497 0.000000 18442 +hackeralthough 0 1 6.957497 0.000000 18443 +formerlyhad 0 1 6.957497 0.000000 18444 +fake 0 1 6.957497 0.000000 18445 +andstil 0 1 6.957497 0.000000 18446 +wistfulli 0 1 6.957497 0.000000 18447 +suspend 0 1 6.957497 0.000000 18448 +bald 0 1 6.957497 0.000000 18449 +architectureonc 0 1 6.957497 0.000000 18450 +architecturei 0 1 6.957497 0.000000 18451 +grabbag 0 1 6.957497 0.000000 18452 +antidot 0 1 6.957497 0.000000 18453 +afford 0 1 6.957497 0.000000 18454 +diskspac 0 1 6.957497 0.000000 18455 +provideror 0 1 6.957497 0.000000 18456 +architectureon 0 1 6.957497 0.000000 18457 +datasheet 0 1 6.957497 0.000000 18458 +netscapebookmarksstockscod 0 1 6.957497 0.000000 18459 +standardsroi 0 1 6.957497 0.000000 18460 +standardsi 0 1 6.957497 0.000000 18461 +enfopris 0 1 6.957497 0.000000 18462 +writingto 0 1 6.957497 0.000000 18463 +longstand 0 1 6.957497 0.000000 18464 +configurationmanag 0 1 6.957497 0.000000 18465 +scc 0 1 6.957497 0.000000 18466 +box 0 1 6.957497 0.000000 18467 +hardlink 0 1 6.957497 0.000000 18468 +deprec 0 1 6.957497 0.000000 18469 +livelock 0 1 6.957497 0.000000 18470 +insist 0 1 6.957497 0.000000 18471 +checkinsso 0 1 6.957497 0.000000 18472 +approachin 0 1 6.957497 0.000000 18473 +fetterman 0 1 6.957497 0.000000 18474 +deserv 0 1 6.957497 0.000000 18475 +wisconsinhow 0 1 6.957497 0.000000 18476 +programat 0 1 6.957497 0.000000 18477 +cmtool 0 1 6.957497 0.000000 18478 +ical 0 1 6.957497 0.000000 18479 +anyof 0 1 6.957497 0.000000 18480 +manuallyadd 0 1 6.957497 0.000000 18481 +intelat 0 1 6.957497 0.000000 18482 +devout 0 1 6.957497 0.000000 18483 +ontim 0 1 6.957497 0.000000 18484 +meetingswith 0 1 6.957497 0.000000 18485 +reserveth 0 1 6.957497 0.000000 18486 +blindli 0 1 6.957497 0.000000 18487 +proposeif 0 1 6.957497 0.000000 18488 +overallschedul 0 1 6.957497 0.000000 18489 +secretariesand 0 1 6.957497 0.000000 18490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..0a891b6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +good 0 77 2.564949 0.000000 200 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 0 35 3.401197 0.000000 507 +lot 0 18 4.060443 0.000000 889 +goodman 1 7 5.010635 5.010635 1891 +sciencesdepart 0 6 5.164786 0.000000 2020 +galileo 0 4 5.568345 0.000000 3086 +usaresearch 0 1 6.957497 0.000000 18491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..88be8415 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,339 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +site 0 106 2.197225 0.000000 119 +manag 0 114 2.197225 0.000000 125 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +version 0 113 2.197225 0.000000 122 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +section 0 94 2.397895 0.000000 149 +select 0 91 2.397895 0.000000 154 +pictur 0 89 2.397895 0.000000 160 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +wide 0 84 2.484907 0.000000 185 +academ 0 82 2.484907 0.000000 178 +refer 0 78 2.564949 0.000000 203 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +homework 0 79 2.564949 0.000000 193 +html 0 75 2.639057 0.000000 235 +line 0 75 2.639057 0.000000 231 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +simul 0 66 2.708050 0.000000 255 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +guid 0 63 2.772589 0.000000 267 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +new 0 64 2.772589 0.000000 262 +back 0 60 2.833213 0.000000 297 +automat 0 61 2.833213 0.000000 306 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +browser 0 56 2.890372 0.000000 313 +hardwar 0 51 2.995732 0.000000 350 +cool 0 49 3.044522 0.000000 374 +archiv 0 49 3.044522 0.000000 364 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +numer 0 49 3.044522 0.000000 369 +give 0 50 3.044522 0.000000 359 +fridai 0 44 3.135494 0.000000 390 +algebra 0 45 3.135494 0.000000 394 +directori 0 45 3.135494 0.000000 396 +video 0 44 3.135494 0.000000 405 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +mark 0 44 3.135494 0.000000 403 +vision 0 41 3.218876 0.000000 430 +linear 0 41 3.218876 0.000000 431 +might 0 41 3.218876 0.000000 426 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +theoret 0 39 3.258097 0.000000 446 +electr 0 38 3.295837 0.000000 461 +robot 0 36 3.367296 0.000000 497 +everi 0 34 3.401197 0.000000 519 +tech 0 35 3.401197 0.000000 515 +survei 0 35 3.401197 0.000000 513 +eduoffic 0 33 3.433987 0.000000 531 +board 0 33 3.433987 0.000000 528 +idea 0 32 3.465736 0.000000 545 +independ 0 32 3.465736 0.000000 548 +storag 0 31 3.496508 0.000000 553 +rang 0 30 3.555348 0.000000 565 +specifi 0 30 3.555348 0.000000 568 +univ 0 28 3.610918 0.000000 617 +packag 0 28 3.610918 0.000000 614 +retriev 0 27 3.637586 0.000000 621 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +trace 0 25 3.737670 0.000000 677 +todai 0 25 3.737670 0.000000 672 +client 0 25 3.737670 0.000000 679 +greg 0 24 3.761200 0.000000 695 +yahoo 0 24 3.761200 0.000000 707 +recognit 0 23 3.806662 0.000000 723 +thank 0 23 3.806662 0.000000 721 +geometri 0 22 3.850148 0.000000 752 +fund 0 21 3.912023 0.000000 805 +util 0 21 3.912023 0.000000 774 +hous 0 21 3.912023 0.000000 801 +nice 0 20 3.951244 0.000000 809 +portabl 0 20 3.951244 0.000000 819 +toolkit 0 20 3.951244 0.000000 835 +histori 0 19 4.007333 0.000000 853 +lyco 0 19 4.007333 0.000000 871 +feedback 0 19 4.007333 0.000000 854 +thoma 0 18 4.060443 0.000000 901 +lot 0 18 4.060443 0.000000 889 +stanford 0 17 4.110874 0.000000 955 +medic 0 17 4.110874 0.000000 958 +germani 0 17 4.110874 0.000000 946 +repositori 0 17 4.110874 0.000000 932 +white 0 17 4.110874 0.000000 951 +english 0 15 4.248495 0.000000 1033 +overhead 0 15 4.248495 0.000000 1035 +todd 0 15 4.248495 0.000000 1051 +charact 0 15 4.248495 0.000000 1028 +massiv 0 15 4.248495 0.000000 1026 +draft 0 14 4.317488 0.000000 1085 +pretti 0 13 4.382027 0.000000 1191 +suit 0 13 4.382027 0.000000 1129 +resolut 0 13 4.382027 0.000000 1172 +misc 0 13 4.382027 0.000000 1124 +charl 0 13 4.382027 0.000000 1149 +shape 0 12 4.465908 0.000000 1245 +optic 0 12 4.465908 0.000000 1221 +string 0 11 4.553877 0.000000 1340 +excit 0 11 4.553877 0.000000 1329 +night 0 11 4.553877 0.000000 1319 +worth 0 11 4.553877 0.000000 1294 +perl 0 11 4.553877 0.000000 1332 +literatur 0 11 4.553877 0.000000 1300 +metacrawl 0 10 4.653960 0.000000 1455 +card 0 10 4.653960 0.000000 1435 +enter 0 10 4.653960 0.000000 1454 +custom 0 10 4.653960 0.000000 1414 +utah 0 9 4.753590 0.000000 1585 +patterson 0 9 4.753590 0.000000 1554 +linguist 0 9 4.753590 0.000000 1593 +editori 0 9 4.753590 0.000000 1611 +japan 0 8 4.875197 0.000000 1762 +textur 0 8 4.875197 0.000000 1677 +dictionari 0 8 4.875197 0.000000 1642 +jack 0 8 4.875197 0.000000 1780 +attach 0 7 5.010635 0.000000 1785 +shade 0 7 5.010635 0.000000 1881 +usenet 0 7 5.010635 0.000000 1839 +shot 0 7 5.010635 0.000000 1898 +mirror 0 6 5.164786 0.000000 2028 +sharp 0 6 5.164786 0.000000 2100 +invest 0 6 5.164786 0.000000 2153 +altavista 0 6 5.164786 0.000000 2222 +infoseek 0 6 5.164786 0.000000 2188 +postcard 0 6 5.164786 0.000000 2181 +textual 0 6 5.164786 0.000000 1979 +apolog 0 6 5.164786 0.000000 2046 +garbag 0 6 5.164786 0.000000 1986 +price 0 6 5.164786 0.000000 1999 +hyper 0 5 5.347108 0.000000 2435 +appt 0 5 5.347108 0.000000 2312 +multiresolut 0 5 5.347108 0.000000 2423 +medicin 0 5 5.347108 0.000000 2448 +hennessi 0 5 5.347108 0.000000 2289 +travers 0 5 5.347108 0.000000 2363 +row 0 5 5.347108 0.000000 2330 +japanes 0 4 5.568345 0.000000 2934 +planet 0 4 5.568345 0.000000 2912 +zoom 0 4 5.568345 0.000000 2961 +moon 0 4 5.568345 0.000000 2991 +diagnosi 0 4 5.568345 0.000000 3027 +nist 0 4 5.568345 0.000000 2973 +wander 0 4 5.568345 0.000000 2896 +freewar 0 3 5.857933 0.000000 3504 +motif 0 3 5.857933 0.000000 3752 +sharewar 0 3 5.857933 0.000000 3503 +atmospher 0 3 5.857933 0.000000 3388 +rack 0 3 5.857933 0.000000 3176 +lockhe 0 3 5.857933 0.000000 3863 +georgia 0 3 5.857933 0.000000 3834 +trec 0 3 5.857933 0.000000 3547 +gigabyt 0 3 5.857933 0.000000 3548 +spider 0 3 5.857933 0.000000 3605 +latin 0 3 5.857933 0.000000 3741 +citizen 0 3 5.857933 0.000000 3238 +belong 0 3 5.857933 0.000000 3797 +tracer 0 2 6.263398 0.000000 5913 +pagegreg 0 2 6.263398 0.000000 5906 +pagenam 0 2 6.263398 0.000000 6165 +sharpemail 0 2 6.263398 0.000000 4766 +dejanew 0 2 6.263398 0.000000 5602 +harmoni 0 2 6.263398 0.000000 5235 +solar 0 2 6.263398 0.000000 5003 +comet 0 2 6.263398 0.000000 5785 +catalogu 0 2 6.263398 0.000000 6166 +gothic 0 2 6.263398 0.000000 5787 +soup 0 2 6.263398 0.000000 6131 +kitchen 0 2 6.263398 0.000000 6132 +awesom 0 2 6.263398 0.000000 6167 +diagon 0 2 6.263398 0.000000 4974 +atla 0 2 6.263398 0.000000 5996 +raster 0 2 6.263398 0.000000 6078 +radianc 0 2 6.263398 0.000000 6068 +radios 0 2 6.263398 0.000000 4504 +mexico 0 2 6.263398 0.000000 6044 +monash 0 2 6.263398 0.000000 4460 +strictli 0 2 6.263398 0.000000 5726 +pointcast 0 2 6.263398 0.000000 5377 +portfolio 0 2 6.263398 0.000000 4408 +offens 0 2 6.263398 0.000000 6168 +brill 0 2 6.263398 0.000000 4137 +sharpgreg 0 2 6.263398 0.000000 4767 +pic 0 1 6.957497 0.000000 18492 +wyom 0 1 6.957497 0.000000 18493 +satelit 0 1 6.957497 0.000000 18494 +handwrit 0 1 6.957497 0.000000 18495 +schwab 0 1 6.957497 0.000000 18496 +tgif 0 1 6.957497 0.000000 18497 +notesclass 0 1 6.957497 0.000000 18498 +aboutsearch 0 1 6.957497 0.000000 18499 +ohioc 0 1 6.957497 0.000000 18500 +cygnu 0 1 6.957497 0.000000 18501 +mumit 0 1 6.957497 0.000000 18502 +newbi 0 1 6.957497 0.000000 18503 +guideplatform 0 1 6.957497 0.000000 18504 +kit 0 1 6.957497 0.000000 18505 +amulet 0 1 6.957497 0.000000 18506 +dclap 0 1 6.957497 0.000000 18507 +wxwindow 0 1 6.957497 0.000000 18508 +yacl 0 1 6.957497 0.000000 18509 +projectclass 0 1 6.957497 0.000000 18510 +projectmisc 0 1 6.957497 0.000000 18511 +cygwin 0 1 6.957497 0.000000 18512 +gimp 0 1 6.957497 0.000000 18513 +harmonai 0 1 6.957497 0.000000 18514 +vasc 0 1 6.957497 0.000000 18515 +jaida 0 1 6.957497 0.000000 18516 +seamless 0 1 6.957497 0.000000 18517 +meteor 0 1 6.957497 0.000000 18518 +antarctica 0 1 6.957497 0.000000 18519 +niae 0 1 6.957497 0.000000 18520 +vistex 0 1 6.957497 0.000000 18521 +databaseartifici 0 1 6.957497 0.000000 18522 +primoridi 0 1 6.957497 0.000000 18523 +dermatolog 0 1 6.957497 0.000000 18524 +erlang 0 1 6.957497 0.000000 18525 +orthopaed 0 1 6.957497 0.000000 18526 +ecvnet 0 1 6.957497 0.000000 18527 +nici 0 1 6.957497 0.000000 18528 +groupimag 0 1 6.957497 0.000000 18529 +raytrac 0 1 6.957497 0.000000 18530 +rayshad 0 1 6.957497 0.000000 18531 +avalon 0 1 6.957497 0.000000 18532 +grimstead 0 1 6.957497 0.000000 18533 +dsite 0 1 6.957497 0.000000 18534 +intergraph 0 1 6.957497 0.000000 18535 +glint 0 1 6.957497 0.000000 18536 +chipset 0 1 6.957497 0.000000 18537 +nvidia 0 1 6.957497 0.000000 18538 +chipsetcomput 0 1 6.957497 0.000000 18539 +geometeri 0 1 6.957497 0.000000 18540 +geometrylispuseless 0 1 6.957497 0.000000 18541 +pagescomput 0 1 6.957497 0.000000 18542 +superdlx 0 1 6.957497 0.000000 18543 +parl 0 1 6.957497 0.000000 18544 +washingt 0 1 6.957497 0.000000 18545 +groupjapanes 0 1 6.957497 0.000000 18546 +unvers 0 1 6.957497 0.000000 18547 +infowav 0 1 6.957497 0.000000 18548 +edict 0 1 6.957497 0.000000 18549 +shodouka 0 1 6.957497 0.000000 18550 +asiasoftinform 0 1 6.957497 0.000000 18551 +retrev 0 1 6.957497 0.000000 18552 +peregrin 0 1 6.957497 0.000000 18553 +infomin 0 1 6.957497 0.000000 18554 +other_sw 0 1 6.957497 0.000000 18555 +info_retriev 0 1 6.957497 0.000000 18556 +jedi 0 1 6.957497 0.000000 18557 +hartlib 0 1 6.957497 0.000000 18558 +stemmer 0 1 6.957497 0.000000 18559 +twainhumor 0 1 6.957497 0.000000 18560 +threw 0 1 6.957497 0.000000 18561 +investorweb 0 1 6.957497 0.000000 18562 +networth 0 1 6.957497 0.000000 18563 +fundscap 0 1 6.957497 0.000000 18564 +stockmastermutu 0 1 6.957497 0.000000 18565 +brokerag 0 1 6.957497 0.000000 18566 +fidel 0 1 6.957497 0.000000 18567 +vanguard 0 1 6.957497 0.000000 18568 +gabelli 0 1 6.957497 0.000000 18569 +mutualsmisc 0 1 6.957497 0.000000 18570 +psnuplast 0 1 6.957497 0.000000 18571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..d7e668de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +sridhar 1 2 6.263398 6.263398 4807 +gopalsridhar 1 1 6.957497 6.957497 18572 +gopalgsri 1 1 6.957497 6.957497 18573 +edubon 1 1 6.957497 6.957497 18574 +marrow 1 1 6.957497 6.957497 18575 +pageresumest 1 1 6.957497 6.957497 18576 +pagecalvin 1 1 6.957497 6.957497 18577 +hobbesbookmark 1 1 6.957497 6.957497 18578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..b44537f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +relat 0 139 1.945910 0.000000 68 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +analysi 0 124 2.079442 0.000000 98 +send 0 114 2.197225 0.000000 109 +manag 0 114 2.197225 0.000000 125 +sinc 0 90 2.397895 0.000000 159 +grade 0 90 2.397895 0.000000 142 +educ 0 86 2.484907 0.000000 191 +environ 0 84 2.484907 0.000000 177 +stuff 0 87 2.484907 0.000000 171 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +plan 0 65 2.772589 0.000000 272 +explor 0 58 2.890372 0.000000 324 +visitor 0 49 3.044522 0.000000 371 +visual 0 48 3.044522 0.000000 372 +california 0 46 3.091042 0.000000 388 +physic 0 47 3.091042 0.000000 377 +around 0 43 3.178054 0.000000 415 +chines 0 29 3.583519 0.000000 595 +weather 0 28 3.610918 0.000000 618 +famili 0 23 3.806662 0.000000 735 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +raghu 0 12 4.465908 0.000000 1212 +career 0 12 4.465908 0.000000 1287 +devis 0 10 4.653960 0.000000 1451 +angel 0 8 4.875197 0.000000 1779 +advis 0 6 5.164786 0.000000 2173 +forecast 0 6 5.164786 0.000000 2171 +peke 0 5 5.347108 0.000000 2539 +medicin 0 5 5.347108 0.000000 2448 +miscellani 0 3 5.857933 0.000000 3976 +guangshun 0 2 6.263398 0.000000 6138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..1566eb9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +data 1 170 1.791759 1.791759 49 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +west 0 83 2.484907 0.000000 192 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +summari 0 73 2.639057 0.000000 237 +thesi 0 57 2.890372 0.000000 327 +detail 0 57 2.890372 0.000000 321 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +execut 0 45 3.135494 0.000000 404 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +hand 0 37 3.332205 0.000000 475 +focus 0 29 3.583519 0.000000 584 +target 0 12 4.465908 0.000000 1282 +laru 0 9 4.753590 0.000000 1560 +studentdepart 0 5 5.347108 0.000000 2505 +parallelprogram 0 5 5.347108 0.000000 2379 +parallellanguag 0 3 5.857933 0.000000 4026 +usaadvisor 0 3 5.857933 0.000000 4017 +guhan 0 2 6.263398 0.000000 6169 +viswanathan 0 2 6.263398 0.000000 6170 +amor 0 2 6.263398 0.000000 5486 +gviswana 0 1 6.957497 0.000000 18579 +parallelappl 0 1 6.957497 0.000000 18580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..d687f5e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +architectur 0 139 1.945910 0.000000 77 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +manag 0 114 2.197225 0.000000 125 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +meet 0 72 2.639057 0.000000 229 +line 0 75 2.639057 0.000000 231 +would 0 67 2.708050 0.000000 251 +prof 0 64 2.772589 0.000000 273 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +electron 0 47 3.091042 0.000000 379 +mark 0 44 3.135494 0.000000 403 +jame 0 35 3.401197 0.000000 507 +india 0 32 3.465736 0.000000 550 +photo 0 31 3.496508 0.000000 561 +hill 0 25 3.737670 0.000000 670 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +sept 0 17 4.110874 0.000000 952 +ramakrishnan 0 16 4.174387 0.000000 972 +photograph 0 15 4.248495 0.000000 1056 +draw 0 14 4.317488 0.000000 1086 +warn 0 14 4.317488 0.000000 1068 +raghu 0 12 4.465908 0.000000 1212 +newspap 0 12 4.465908 0.000000 1280 +classmat 0 9 4.753590 0.000000 1516 +folk 0 9 4.753590 0.000000 1597 +counter 0 8 4.875197 0.000000 1765 +goodman 0 7 5.010635 0.000000 1891 +courtesi 0 7 5.010635 0.000000 1953 +famou 0 6 5.164786 0.000000 2185 +mirza 0 3 5.857933 0.000000 3989 +osmania 0 2 6.263398 0.000000 5573 +hyderabad 0 2 6.263398 0.000000 5570 +sastri 0 2 6.263398 0.000000 6171 +roommat 0 2 6.263398 0.000000 6157 +saeed 0 2 6.263398 0.000000 6172 +dust 0 2 6.263398 0.000000 5551 +harit 0 1 6.957497 0.000000 18581 +mvsr 0 1 6.957497 0.000000 18582 +murthi 0 1 6.957497 0.000000 18583 +zubber 0 1 6.957497 0.000000 18584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..d985151c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +first 0 140 1.945910 0.000000 71 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +mathemat 0 108 2.197225 0.000000 123 +java 0 70 2.708050 0.000000 248 +dept 0 64 2.772589 0.000000 291 +septemb 0 65 2.772589 0.000000 274 +colleg 0 61 2.833213 0.000000 300 +telephon 0 50 3.044522 0.000000 373 +seminar 0 38 3.295837 0.000000 470 +departmentunivers 0 24 3.761200 0.000000 711 +applet 0 20 3.951244 0.000000 827 +basketbal 0 12 4.465908 0.000000 1289 +edutelephon 0 10 4.653960 0.000000 1473 +engr 0 10 4.653960 0.000000 1427 +volleybal 0 9 4.753590 0.000000 1598 +softbal 0 9 4.753590 0.000000 1594 +rebecca 0 6 5.164786 0.000000 2174 +noland 0 5 5.347108 0.000000 2420 +assistantcomput 0 3 5.857933 0.000000 4027 +hasti 0 2 6.263398 0.000000 6173 +carleton 0 2 6.263398 0.000000 5381 +linkag 0 2 6.263398 0.000000 5139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..8c943d54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +hour 0 165 1.791759 0.000000 46 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +person 0 111 2.197225 0.000000 117 +final 0 116 2.197225 0.000000 108 +manag 0 114 2.197225 0.000000 125 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +section 0 94 2.397895 0.000000 149 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +internet 0 83 2.484907 0.000000 186 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +differ 0 66 2.708050 0.000000 253 +dept 0 64 2.772589 0.000000 291 +new 0 64 2.772589 0.000000 262 +talk 0 53 2.944439 0.000000 336 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +get 0 46 3.091042 0.000000 380 +expect 0 37 3.332205 0.000000 484 +download 0 36 3.367296 0.000000 489 +semant 0 29 3.583519 0.000000 587 +hope 0 28 3.610918 0.000000 610 +retriev 0 27 3.637586 0.000000 621 +enjoi 0 26 3.688879 0.000000 660 +tell 0 21 3.912023 0.000000 777 +stand 0 18 4.060443 0.000000 891 +stop 0 17 4.110874 0.000000 942 +seek 0 17 4.110874 0.000000 954 +thought 0 17 4.110874 0.000000 945 +alan 0 13 4.382027 0.000000 1146 +philosophi 0 13 4.382027 0.000000 1167 +brother 0 13 4.382027 0.000000 1189 +neat 0 12 4.465908 0.000000 1263 +count 0 12 4.465908 0.000000 1239 +minor 0 12 4.465908 0.000000 1237 +linguist 0 9 4.753590 0.000000 1593 +bart 0 9 4.753590 0.000000 1559 +lane 0 8 4.875197 0.000000 1720 +rais 0 8 4.875197 0.000000 1711 +jack 0 8 4.875197 0.000000 1780 +reload 0 8 4.875197 0.000000 1682 +on 0 8 4.875197 0.000000 1628 +marri 0 7 5.010635 0.000000 1946 +accord 0 7 5.010635 0.000000 1826 +creation 0 6 5.164786 0.000000 2069 +handi 0 6 5.164786 0.000000 2111 +advic 0 5 5.347108 0.000000 2509 +formerli 0 5 5.347108 0.000000 2397 +truli 0 5 5.347108 0.000000 2476 +plant 0 5 5.347108 0.000000 2497 +ling 0 4 5.568345 0.000000 3045 +cyber 0 4 5.568345 0.000000 2909 +luck 0 3 5.857933 0.000000 3201 +audit 0 3 5.857933 0.000000 3391 +northeast 0 3 5.857933 0.000000 3922 +chad 0 2 6.263398 0.000000 4768 +biggest 0 2 6.263398 0.000000 4790 +arthur 0 2 6.263398 0.000000 5767 +deep 0 2 6.263398 0.000000 5528 +claud 0 1 6.957497 0.000000 18585 +welcomethank 0 1 6.957497 0.000000 18586 +bestbet 0 1 6.957497 0.000000 18587 +onmai 0 1 6.957497 0.000000 18588 +nichol 0 1 6.957497 0.000000 18589 +discours 0 1 6.957497 0.000000 18590 +barwis 0 1 6.957497 0.000000 18591 +epigram 0 1 6.957497 0.000000 18592 +perli 0 1 6.957497 0.000000 18593 +laud 0 1 6.957497 0.000000 18594 +truman 0 1 6.957497 0.000000 18595 +missouri 0 1 6.957497 0.000000 18596 +poop 0 1 6.957497 0.000000 18597 +unabash 0 1 6.957497 0.000000 18598 +psychot 0 1 6.957497 0.000000 18599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..e4fd9a2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +educ 0 86 2.484907 0.000000 191 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +august 0 66 2.708050 0.000000 257 +simul 0 66 2.708050 0.000000 255 +plan 0 65 2.772589 0.000000 272 +abstract 0 62 2.772589 0.000000 276 +laboratori 0 63 2.772589 0.000000 292 +juli 0 60 2.833213 0.000000 305 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +move 0 47 3.091042 0.000000 382 +autom 0 41 3.218876 0.000000 434 +multipl 0 39 3.258097 0.000000 453 +vita 0 38 3.295837 0.000000 473 +robot 1 36 3.367296 3.367296 497 +curriculum 0 33 3.433987 0.000000 535 +extend 0 32 3.465736 0.000000 539 +anim 0 31 3.496508 0.000000 557 +common 0 30 3.555348 0.000000 574 +travel 0 30 3.555348 0.000000 579 +proc 0 26 3.688879 0.000000 649 +experiment 0 26 3.688879 0.000000 645 +motion 0 24 3.761200 0.000000 699 +geometri 0 22 3.850148 0.000000 752 +rout 0 21 3.912023 0.000000 793 +unit 0 21 3.912023 0.000000 779 +basi 0 20 3.951244 0.000000 828 +geometr 0 19 4.007333 0.000000 852 +susan 0 15 4.248495 0.000000 1050 +configur 0 15 4.248495 0.000000 1012 +vladimir 0 11 4.553877 0.000000 1324 +arbitrari 0 11 4.553877 0.000000 1359 +cook 0 10 4.653960 0.000000 1464 +congress 0 9 4.753590 0.000000 1592 +autonom 0 8 4.875197 0.000000 1749 +curv 0 8 4.875197 0.000000 1656 +plane 0 6 5.164786 0.000000 2187 +bind 0 5 5.347108 0.000000 2250 +tiwari 0 5 5.347108 0.000000 2385 +lumelski 0 4 5.568345 0.000000 2837 +ti 0 4 5.568345 0.000000 3005 +underwat 0 4 5.568345 0.000000 2838 +shelf 0 4 5.568345 0.000000 2621 +canadian 0 3 5.857933 0.000000 3508 +planar 0 3 5.857933 0.000000 3647 +hert 0 2 6.263398 0.000000 4848 +tether 0 2 6.263398 0.000000 4844 +deform 0 2 6.263398 0.000000 6065 +terrain 0 2 6.263398 0.000000 6174 +epicuri 0 2 6.263398 0.000000 5105 +veggi 0 2 6.263398 0.000000 5426 +alogirthm 0 1 6.957497 0.000000 18600 +sanjai 0 1 6.957497 0.000000 18601 +reznik 0 1 6.957497 0.000000 18602 +samantha 0 1 6.957497 0.000000 18603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..f34d001a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +visit 0 63 2.772589 0.000000 288 +experi 0 64 2.772589 0.000000 283 +finger 0 52 2.995732 0.000000 354 +maintain 0 51 2.995732 0.000000 342 +appoint 0 49 3.044522 0.000000 358 +departmentunivers 0 24 3.761200 0.000000 711 +serv 0 22 3.850148 0.000000 758 +countri 0 15 4.248495 0.000000 1059 +wife 0 13 4.382027 0.000000 1196 +sai 0 13 4.382027 0.000000 1175 +tue 0 11 4.553877 0.000000 1308 +edutelephon 0 10 4.653960 0.000000 1473 +counter 0 8 4.875197 0.000000 1765 +studentcomput 0 7 5.010635 0.000000 1963 +none 0 7 5.010635 0.000000 1811 +corp 0 6 5.164786 0.000000 2139 +peac 0 3 5.857933 0.000000 3769 +kirk 0 2 6.263398 0.000000 6175 +hogenson 0 1 6.957497 0.000000 18604 +myschedul 0 1 6.957497 0.000000 18605 +workout 0 1 6.957497 0.000000 18606 +tryto 0 1 6.957497 0.000000 18607 +ghana 0 1 6.957497 0.000000 18608 +usernam 0 1 6.957497 0.000000 18609 +pnhp 0 1 6.957497 0.000000 18610 +eilun 0 1 6.957497 0.000000 18611 +accessedtim 0 1 6.957497 0.000000 18612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..c2e4b588 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +linear 0 41 3.218876 0.000000 431 +road 0 11 4.553877 0.000000 1374 +jeffrei 0 9 4.753590 0.000000 1612 +wise 0 3 5.857933 0.000000 3631 +horn 1 2 6.263398 6.263398 6072 +swanton 0 1 6.957497 0.000000 18613 +familyemploymenteducationresearchgenealog 0 1 6.957497 0.000000 18614 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..febd97e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +teach 0 108 2.197225 0.000000 112 +proceed 0 93 2.397895 0.000000 152 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +larg 0 82 2.484907 0.000000 168 +second 0 81 2.484907 0.000000 166 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +appli 0 71 2.639057 0.000000 226 +differ 0 66 2.708050 0.000000 253 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +januari 0 62 2.772589 0.000000 264 +previou 0 62 2.772589 0.000000 290 +foundat 0 62 2.772589 0.000000 286 +point 0 58 2.890372 0.000000 319 +telephon 0 50 3.044522 0.000000 373 +understand 0 47 3.091042 0.000000 384 +combin 0 42 3.218876 0.000000 421 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +streetmadison 0 38 3.295837 0.000000 474 +represent 0 35 3.401197 0.000000 512 +graph 0 30 3.555348 0.000000 576 +exist 0 30 3.555348 0.000000 569 +produc 0 30 3.555348 0.000000 572 +depend 0 29 3.583519 0.000000 583 +semant 0 29 3.583519 0.000000 587 +propos 0 28 3.610918 0.000000 602 +accur 0 25 3.737670 0.000000 680 +departmentunivers 0 24 3.761200 0.000000 711 +flow 0 24 3.761200 0.000000 700 +identifi 0 22 3.850148 0.000000 760 +record 0 18 4.060443 0.000000 890 +thoma 0 18 4.060443 0.000000 901 +modif 0 17 4.110874 0.000000 913 +white 0 17 4.110874 0.000000 951 +fourth 0 16 4.174387 0.000000 999 +precis 0 15 4.248495 0.000000 1023 +susan 0 15 4.248495 0.000000 1050 +piec 0 15 4.248495 0.000000 1020 +demand 0 14 4.317488 0.000000 1073 +francisco 0 14 4.317488 0.000000 1095 +individu 0 13 4.382027 0.000000 1126 +joint 0 13 4.382027 0.000000 1130 +sigplan 0 13 4.382027 0.000000 1190 +franc 0 12 4.465908 0.000000 1276 +mainli 0 10 4.653960 0.000000 1432 +guarante 0 10 4.653960 0.000000 1391 +conferenceon 0 9 4.753590 0.000000 1595 +plain 0 9 4.753590 0.000000 1495 +secretari 0 8 4.875197 0.000000 1775 +shapiro 0 8 4.875197 0.000000 1686 +implementationof 0 7 5.010635 0.000000 1813 +necessarili 0 7 5.010635 0.000000 1899 +sixth 0 7 5.010635 0.000000 1917 +textual 0 6 5.164786 0.000000 1979 +pari 0 6 5.164786 0.000000 2158 +softwareengin 0 6 5.164786 0.000000 2162 +increment 0 6 5.164786 0.000000 2206 +horwitz 0 5 5.347108 0.000000 2411 +dataflow 0 5 5.347108 0.000000 2390 +twenti 0 5 5.347108 0.000000 2540 +summarymi 0 5 5.347108 0.000000 2580 +australia 0 5 5.347108 0.000000 2478 +rep 0 4 5.568345 0.000000 3087 +interprocedur 0 4 5.568345 0.000000 2771 +slice 0 4 5.568345 0.000000 2622 +usa 0 4 5.568345 0.000000 3080 +theprogram 0 4 5.568345 0.000000 2686 +insensit 0 4 5.568345 0.000000 2716 +sigsoft 0 4 5.568345 0.000000 3036 +melbourn 0 4 5.568345 0.000000 3035 +principlesof 0 3 5.857933 0.000000 3145 +onprincipl 0 3 5.857933 0.000000 3701 +theoryand 0 3 5.857933 0.000000 3350 +denmark 0 3 5.857933 0.000000 3676 +reachabl 0 3 5.857933 0.000000 4001 +twentieth 0 3 5.857933 0.000000 3760 +fourteenth 0 3 5.857933 0.000000 3615 +sagiv 0 2 6.263398 0.000000 6176 +differenc 0 2 6.263398 0.000000 6177 +interproceduraldataflow 0 2 6.263398 0.000000 6178 +mooli 0 2 6.263398 0.000000 6179 +aarhu 0 2 6.263398 0.000000 6180 +charleston 0 2 6.263398 0.000000 6181 +aprogram 0 2 6.263398 0.000000 4943 +languagedesign 0 2 6.263398 0.000000 6182 +horwitzsusan 0 1 6.957497 0.000000 18615 +horwitzprofessorcomput 0 1 6.957497 0.000000 18616 +environmentsprogram 0 1 6.957497 0.000000 18617 +mergingstat 0 1 6.957497 0.000000 18618 +programsinterprocedur 0 1 6.957497 0.000000 18619 +analysisresearch 0 1 6.957497 0.000000 18620 +affectedbi 0 1 6.957497 0.000000 18621 +betweentwo 0 1 6.957497 0.000000 18622 +retest 0 1 6.957497 0.000000 18623 +certainsemant 0 1 6.957497 0.000000 18624 +concentratedeith 0 1 6.957497 0.000000 18625 +newalgorithm 0 1 6.957497 0.000000 18626 +publicationsm 0 1 6.957497 0.000000 18627 +constantpropag 0 1 6.957497 0.000000 18628 +bate 0 1 6.957497 0.000000 18629 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..e7ec5273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +hummert 1 3 5.857933 5.857933 3416 +pagenam 1 2 6.263398 6.263398 6165 +sidnei 1 2 6.263398 6.263398 4587 +edua 1 2 6.263398 6.263398 5764 +pagesid 1 1 6.957497 6.957497 18630 +hummertoffic 1 1 6.957497 6.957497 18631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..c5e9642a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 0 247 1.386294 0.000000 24 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +like 0 132 1.945910 0.000000 81 +dayton 0 119 2.079442 0.000000 104 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +particular 0 51 2.995732 0.000000 352 +math 0 44 3.135494 0.000000 402 +vision 0 41 3.218876 0.000000 430 +streetmadison 0 38 3.295837 0.000000 474 +feel 0 37 3.332205 0.000000 483 +robot 0 36 3.367296 0.000000 497 +actual 0 28 3.610918 0.000000 604 +departmentunivers 0 24 3.761200 0.000000 711 +ever 0 19 4.007333 0.000000 872 +alreadi 0 16 4.174387 0.000000 963 +speak 0 12 4.465908 0.000000 1283 +undergrad 0 9 4.753590 0.000000 1589 +disclaim 0 4 5.568345 0.000000 2847 +alien 1 3 5.857933 5.857933 3930 +slave 0 3 5.857933 0.000000 3959 +igor 0 2 6.263398 0.000000 6183 +ivanisev 0 2 6.263398 0.000000 6184 +newest 0 2 6.263398 0.000000 5518 +needless 0 2 6.263398 0.000000 4694 +drake 0 2 6.263398 0.000000 5668 +pageigorivanisev 0 1 6.957497 0.000000 18632 +generalgradu 0 1 6.957497 0.000000 18633 +departmentwa 0 1 6.957497 0.000000 18634 +departmentaddress 0 1 6.957497 0.000000 18635 +iigor 0 1 6.957497 0.000000 18636 +eduiigor 0 1 6.957497 0.000000 18637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..072bcbd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +pageireland 1 1 6.957497 6.957497 18638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..c005f354 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +section 0 94 2.397895 0.000000 149 +comment 0 93 2.397895 0.000000 146 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +profil 0 30 3.555348 0.000000 581 +peterson 0 7 5.010635 0.000000 1850 +sharenow 1 3 5.857933 5.857933 3439 +jone 0 3 5.857933 0.000000 3703 +recreat 0 3 5.857933 0.000000 3990 +madisonmadison 0 1 6.957497 0.000000 18639 +sciencestelephon 0 1 6.957497 0.000000 18640 +pmsection 0 1 6.957497 0.000000 18641 +pmboth 0 1 6.957497 0.000000 18642 +sciencesc 0 1 6.957497 0.000000 18643 +announcementshandoutsmoth 0 1 6.957497 0.000000 18644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..46361aa4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +area 0 144 1.945910 0.000000 80 +dayton 0 119 2.079442 0.000000 104 +resum 0 79 2.564949 0.000000 217 +onlin 0 75 2.639057 0.000000 223 +receiv 0 66 2.708050 0.000000 244 +prof 0 64 2.772589 0.000000 273 +previou 0 62 2.772589 0.000000 290 +colleg 0 61 2.833213 0.000000 300 +eduoffic 0 33 3.433987 0.000000 531 +departmentunivers 0 24 3.761200 0.000000 711 +livni 0 15 4.248495 0.000000 1053 +english 0 15 4.248495 0.000000 1033 +miron 0 14 4.317488 0.000000 1110 +condor 0 5 5.347108 0.000000 2577 +webpag 0 4 5.568345 0.000000 2660 +assistantcomput 0 3 5.857933 0.000000 4027 +statisticsoffic 0 2 6.263398 0.000000 4810 +andnetwork 0 2 6.263398 0.000000 5751 +basnei 0 2 6.263398 0.000000 4804 +basneyjim 0 1 6.957497 0.000000 18645 +basneygradu 0 1 6.957497 0.000000 18646 +jbasnei 0 1 6.957497 0.000000 18647 +directionof 0 1 6.957497 0.000000 18648 +fromoberlin 0 1 6.957497 0.000000 18649 +oberlin 0 1 6.957497 0.000000 18650 +codefrom 0 1 6.957497 0.000000 18651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..73d76a6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +first 0 140 1.945910 0.000000 71 +like 0 132 1.945910 0.000000 81 +databas 0 122 2.079442 0.000000 86 +assist 0 112 2.197225 0.000000 113 +stuff 0 87 2.484907 0.000000 171 +know 0 80 2.564949 0.000000 198 +plai 0 60 2.833213 0.000000 307 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +around 0 43 3.178054 0.000000 415 +error 0 40 3.258097 0.000000 449 +electr 0 38 3.295837 0.000000 461 +origin 0 38 3.295837 0.000000 472 +respons 0 37 3.332205 0.000000 476 +soon 0 36 3.367296 0.000000 494 +abl 0 30 3.555348 0.000000 566 +wai 0 25 3.737670 0.000000 662 +instal 0 22 3.850148 0.000000 754 +watch 0 21 3.912023 0.000000 789 +mostli 0 19 4.007333 0.000000 869 +record 0 18 4.060443 0.000000 890 +stand 0 18 4.060443 0.000000 891 +train 0 14 4.317488 0.000000 1066 +classic 0 14 4.317488 0.000000 1084 +believ 0 13 4.382027 0.000000 1187 +mainli 0 10 4.653960 0.000000 1432 +didn 0 9 4.753590 0.000000 1563 +guitar 0 8 4.875197 0.000000 1758 +hit 0 7 5.010635 0.000000 1965 +seen 0 6 5.164786 0.000000 2202 +golf 0 6 5.164786 0.000000 2178 +pool 0 6 5.164786 0.000000 2225 +yeah 0 6 5.164786 0.000000 2195 +backup 0 4 5.568345 0.000000 2645 +metal 0 4 5.568345 0.000000 3079 +hourli 0 3 5.857933 0.000000 3734 +thrash 0 3 5.857933 0.000000 3400 +evil 0 3 5.857933 0.000000 3943 +mackai 1 2 6.263398 6.263398 5762 +fulltim 0 2 6.263398 0.000000 5170 +sybas 0 2 6.263398 0.000000 4723 +ingr 0 2 6.263398 0.000000 4097 +ey 0 2 6.263398 0.000000 5068 +metallica 0 2 6.263398 0.000000 4991 +raquetbal 0 2 6.263398 0.000000 6012 +towel 0 2 6.263398 0.000000 4793 +jerel 1 1 6.957497 6.957497 18652 +pagejerel 0 1 6.957497 0.000000 18653 +specialti 0 1 6.957497 0.000000 18654 +violin 0 1 6.957497 0.000000 18655 +baroqu 0 1 6.957497 0.000000 18656 +shock 0 1 6.957497 0.000000 18657 +funni 0 1 6.957497 0.000000 18658 +abba 0 1 6.957497 0.000000 18659 +shoot 0 1 6.957497 0.000000 18660 +jerellast 0 1 6.957497 0.000000 18661 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..6ef1426e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +johan 1 2 6.263398 6.263398 4900 +larson 1 1 6.957497 6.957497 18662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..87ea4e9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +number 0 130 2.079442 0.000000 97 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +take 0 97 2.302585 0.000000 134 +memori 0 101 2.302585 0.000000 139 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +homepag 0 93 2.397895 0.000000 148 +search 0 95 2.397895 0.000000 155 +mani 0 92 2.397895 0.000000 150 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +would 0 67 2.708050 0.000000 251 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +date 0 51 2.995732 0.000000 344 +cool 0 49 3.044522 0.000000 374 +friend 0 48 3.044522 0.000000 376 +format 0 48 3.044522 0.000000 356 +archiv 0 49 3.044522 0.000000 364 +made 0 44 3.135494 0.000000 398 +directori 0 45 3.135494 0.000000 396 +realli 0 40 3.258097 0.000000 444 +societi 0 40 3.258097 0.000000 456 +join 0 39 3.258097 0.000000 457 +movi 0 40 3.258097 0.000000 459 +go 0 33 3.433987 0.000000 529 +anim 0 31 3.496508 0.000000 557 +someth 0 31 3.496508 0.000000 554 +exist 0 30 3.555348 0.000000 569 +though 0 27 3.637586 0.000000 622 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +enjoi 0 26 3.688879 0.000000 660 +rule 0 26 3.688879 0.000000 638 +todai 0 25 3.737670 0.000000 672 +frame 0 24 3.761200 0.000000 684 +seri 0 24 3.761200 0.000000 708 +head 0 23 3.806662 0.000000 732 +sequenc 0 23 3.806662 0.000000 734 +almost 0 22 3.850148 0.000000 742 +hierarchi 0 22 3.850148 0.000000 744 +half 0 21 3.912023 0.000000 776 +grad 0 20 3.951244 0.000000 837 +mpeg 0 20 3.951244 0.000000 831 +els 0 19 4.007333 0.000000 843 +club 0 15 4.248495 0.000000 1058 +happi 0 14 4.317488 0.000000 1079 +becam 0 14 4.317488 0.000000 1117 +trip 0 14 4.317488 0.000000 1113 +pretti 0 13 4.382027 0.000000 1191 +neat 0 12 4.465908 0.000000 1263 +realiti 0 12 4.465908 0.000000 1272 +death 0 10 4.653960 0.000000 1457 +float 0 9 4.753590 0.000000 1504 +explicit 0 9 4.753590 0.000000 1525 +said 0 9 4.753590 0.000000 1571 +sound 0 9 4.753590 0.000000 1605 +jack 0 8 4.875197 0.000000 1780 +relax 0 6 5.164786 0.000000 2120 +jpeg 0 6 5.164786 0.000000 2053 +apolog 0 6 5.164786 0.000000 2046 +benefit 0 6 5.164786 0.000000 2213 +default 0 5 5.347108 0.000000 2335 +girlfriend 0 5 5.347108 0.000000 2579 +frog 0 5 5.347108 0.000000 2479 +chaotic 0 5 5.347108 0.000000 2566 +semi 0 5 5.347108 0.000000 2510 +paus 0 4 5.568345 0.000000 2965 +notr 0 4 5.568345 0.000000 2880 +dame 0 4 5.568345 0.000000 2881 +relief 0 4 5.568345 0.000000 2784 +afraid 0 4 5.568345 0.000000 3053 +repeat 0 4 5.568345 0.000000 2798 +vital 0 4 5.568345 0.000000 2733 +bear 0 4 5.568345 0.000000 2651 +exploratori 0 4 5.568345 0.000000 3073 +exclus 0 4 5.568345 0.000000 2947 +jherro 0 3 5.857933 0.000000 3427 +let 0 3 5.857933 0.000000 3790 +tortur 0 3 5.857933 0.000000 3634 +nicknam 0 3 5.857933 0.000000 3716 +lame 0 3 5.857933 0.000000 3717 +haiku 0 3 5.857933 0.000000 3811 +bout 0 3 5.857933 0.000000 3670 +cano 0 2 6.263398 0.000000 5207 +apictur 0 2 6.263398 0.000000 5024 +kermit 0 2 6.263398 0.000000 4742 +intervent 0 2 6.263398 0.000000 6163 +shack 0 2 6.263398 0.000000 5369 +roomat 0 1 6.957497 0.000000 18663 +censorship 0 1 6.957497 0.000000 18664 +disembody 0 1 6.957497 0.000000 18665 +millisecond 0 1 6.957497 0.000000 18666 +overriden 0 1 6.957497 0.000000 18667 +aquir 0 1 6.957497 0.000000 18668 +skellington 0 1 6.957497 0.000000 18669 +thath 0 1 6.957497 0.000000 18670 +forgotten 0 1 6.957497 0.000000 18671 +cult 0 1 6.957497 0.000000 18672 +hippothi 0 1 6.957497 0.000000 18673 +matriarch 0 1 6.957497 0.000000 18674 +yahooooooooooooo 0 1 6.957497 0.000000 18675 +bazillion 0 1 6.957497 0.000000 18676 +muppet 0 1 6.957497 0.000000 18677 +rachel 0 1 6.957497 0.000000 18678 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..58b68575 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +octob 0 89 2.397895 0.000000 156 +proceed 0 93 2.397895 0.000000 152 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +educ 0 86 2.484907 0.000000 191 +west 0 83 2.484907 0.000000 192 +larg 0 82 2.484907 0.000000 168 +ieee 0 86 2.484907 0.000000 190 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +orient 0 80 2.564949 0.000000 205 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +workshop 0 71 2.639057 0.000000 239 +septemb 0 65 2.772589 0.000000 274 +prof 0 64 2.772589 0.000000 273 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +index 0 56 2.890372 0.000000 309 +februari 0 54 2.944439 0.000000 328 +advisor 0 51 2.995732 0.000000 355 +pointer 0 48 3.044522 0.000000 368 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +execut 0 45 3.135494 0.000000 404 +submit 0 39 3.258097 0.000000 440 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +tree 0 36 3.367296 0.000000 492 +queri 0 33 3.433987 0.000000 524 +storag 1 31 3.496508 3.496508 553 +full 0 28 3.610918 0.000000 615 +constraint 0 26 3.688879 0.000000 636 +client 0 25 3.737670 0.000000 679 +store 0 24 3.761200 0.000000 693 +size 0 23 3.806662 0.000000 713 +tenni 0 20 3.951244 0.000000 838 +sigmod 0 19 4.007333 0.000000 877 +white 0 17 4.110874 0.000000 951 +hobbi 0 16 4.174387 0.000000 1009 +spatial 0 16 4.174387 0.000000 988 +ramakrishnan 0 16 4.174387 0.000000 972 +dbm 0 13 4.382027 0.000000 1136 +nasa 0 13 4.382027 0.000000 1188 +dewitt 1 12 4.465908 4.465908 1270 +impact 0 11 4.553877 0.000000 1334 +naughton 0 10 4.653960 0.000000 1450 +resid 0 10 4.653960 0.000000 1461 +conferenceon 0 9 4.753590 0.000000 1595 +volleybal 0 9 4.753590 0.000000 1598 +paradis 0 8 4.875197 0.000000 1782 +assistantdepart 0 8 4.875197 0.000000 1784 +mass 0 8 4.875197 0.000000 1732 +burger 0 7 5.010635 0.000000 1889 +patel 0 6 5.164786 0.000000 2154 +geograph 0 6 5.164786 0.000000 2236 +goldstein 0 6 5.164786 0.000000 2168 +tuft 0 5 5.347108 0.000000 2575 +houston 0 5 5.347108 0.000000 2460 +water 0 5 5.347108 0.000000 2535 +ramasami 0 4 5.568345 0.000000 3088 +batch 0 4 5.568345 0.000000 2700 +tape 0 4 5.568345 0.000000 2959 +satellit 0 4 5.568345 0.000000 3077 +chile 0 4 5.568345 0.000000 3082 +yong 0 4 5.568345 0.000000 2809 +raft 0 4 5.568345 0.000000 3060 +tertiari 0 3 5.857933 0.000000 3193 +informationresearch 0 3 5.857933 0.000000 3675 +edueduc 0 3 5.857933 0.000000 4004 +shorter 0 3 5.857933 0.000000 3998 +santiago 0 3 5.857933 0.000000 4013 +kabra 0 2 6.263398 0.000000 6139 +dewittresearch 0 2 6.263398 0.000000 6185 +shaft 0 2 6.263398 0.000000 6186 +eosdi 0 2 6.263398 0.000000 6124 +bing 0 1 6.957497 0.000000 18679 +jieb 0 1 6.957497 0.000000 18680 +systemsresearch 0 1 6.957497 0.000000 18681 +shorepublicationsbuild 0 1 6.957497 0.000000 18682 +scaleabl 0 1 6.957497 0.000000 18683 +implment 0 1 6.957497 0.000000 18684 +lueder 0 1 6.957497 0.000000 18685 +ellman 0 1 6.957497 0.000000 18686 +kupsch 0 1 6.957497 0.000000 18687 +prong 0 1 6.957497 0.000000 18688 +tile 0 1 6.957497 0.000000 18689 +goddard 0 1 6.957497 0.000000 18690 +reclam 0 1 6.957497 0.000000 18691 +reorgan 0 1 6.957497 0.000000 18692 +serverpersist 0 1 6.957497 0.000000 18693 +grouphobbi 0 1 6.957497 0.000000 18694 +volleyballweb 0 1 6.957497 0.000000 18695 +whitewat 0 1 6.957497 0.000000 18696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..db29da33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +virtual 0 62 2.772589 0.000000 285 +publish 1 57 2.890372 2.890372 326 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +join 0 39 3.258097 0.000000 457 +bookmark 0 26 3.688879 0.000000 639 +client 0 25 3.737670 0.000000 679 +accur 0 25 3.737670 0.000000 680 +miscellan 0 23 3.806662 0.000000 731 +sigmod 0 19 4.007333 0.000000 877 +partit 0 16 4.174387 0.000000 984 +spatial 0 16 4.174387 0.000000 988 +hybrid 0 15 4.248495 0.000000 1057 +sigmetr 0 13 4.382027 0.000000 1173 +dewitt 0 12 4.465908 0.000000 1270 +vldb 0 10 4.653960 0.000000 1470 +paradis 1 8 4.875197 4.875197 1782 +hash 0 8 4.875197 0.000000 1618 +tourist 0 8 4.875197 0.000000 1710 +merg 0 7 5.010635 0.000000 1862 +patel 0 6 5.164786 0.000000 2154 +inlin 0 4 5.568345 0.000000 2964 +skate 0 4 5.568345 0.000000 3046 +jignesh 1 1 6.957497 6.957497 18697 +madhuri 0 1 6.957497 0.000000 18698 +kashmir 0 1 6.957497 0.000000 18699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..46dd1b88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +click 0 142 1.945910 0.000000 78 +peopl 0 96 2.302585 0.000000 132 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +georg 0 16 4.174387 0.000000 994 +warn 0 14 4.317488 0.000000 1068 +pretti 0 13 4.382027 0.000000 1191 +varghes 0 3 5.857933 0.000000 3442 +lame 0 3 5.857933 0.000000 3717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..39b29dfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +real 0 93 2.397895 0.000000 144 +proceed 0 93 2.397895 0.000000 152 +resourc 0 81 2.484907 0.000000 172 +ieee 0 86 2.484907 0.000000 190 +west 0 83 2.484907 0.000000 192 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +street 0 63 2.772589 0.000000 293 +share 0 59 2.833213 0.000000 304 +room 0 59 2.833213 0.000000 301 +februari 0 54 2.944439 0.000000 328 +multipl 0 39 3.258097 0.000000 453 +mine 0 26 3.688879 0.000000 654 +task 0 25 3.737670 0.000000 678 +period 0 22 3.850148 0.000000 743 +alloc 0 20 3.951244 0.000000 821 +expand 0 17 4.110874 0.000000 928 +universityof 0 15 4.248495 0.000000 1061 +inproceed 0 8 4.875197 0.000000 1670 +sciencesdepart 0 6 5.164786 0.000000 2020 +height 0 4 5.568345 0.000000 2890 +johann 0 3 5.857933 0.000000 3758 +plaxton 0 3 5.857933 0.000000 3886 +ofwisconsin 0 3 5.857933 0.000000 4002 +proport 0 3 5.857933 0.000000 3293 +baruah 0 2 6.263398 0.000000 5753 +studentat 0 2 6.263398 0.000000 5877 +databasemanag 0 2 6.263398 0.000000 4089 +underprofessor 0 2 6.263398 0.000000 6045 +linkscontact 0 2 6.263398 0.000000 5708 +eagl 0 2 6.263398 0.000000 5731 +jeffai 0 2 6.263398 0.000000 4357 +technicalreport 0 2 6.263398 0.000000 5615 +gehrk 1 1 6.957497 6.957497 18700 +homepagejohann 0 1 6.957497 0.000000 18701 +gehrkewelcom 0 1 6.957497 0.000000 18702 +raghuramakrishnan 0 1 6.957497 0.000000 18703 +stoica 0 1 6.957497 0.000000 18704 +abdel 0 1 6.957497 0.000000 18705 +wahab 0 1 6.957497 0.000000 18706 +algorithmfor 0 1 6.957497 0.000000 18707 +anexpand 0 1 6.957497 0.000000 18708 +fastschedul 0 1 6.957497 0.000000 18709 +processingsymposium 0 1 6.957497 0.000000 18710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..bf6201ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +site 0 106 2.197225 0.000000 119 +look 0 107 2.197225 0.000000 115 +check 0 115 2.197225 0.000000 118 +manag 0 114 2.197225 0.000000 125 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +thing 0 84 2.484907 0.000000 189 +good 0 77 2.564949 0.000000 200 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +maintain 0 51 2.995732 0.000000 342 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +industri 0 38 3.295837 0.000000 464 +often 0 31 3.496508 0.000000 551 +ask 0 28 3.610918 0.000000 597 +releas 0 28 3.610918 0.000000 616 +great 0 27 3.637586 0.000000 626 +mind 0 27 3.637586 0.000000 632 +latest 0 21 3.912023 0.000000 785 +dilbert 0 16 4.174387 0.000000 996 +appl 0 11 4.553877 0.000000 1303 +pagewelcom 0 11 4.553877 0.000000 1344 +spot 0 7 5.010635 0.000000 1894 +bodner 0 5 5.347108 0.000000 2401 +alma 0 3 5.857933 0.000000 3963 +herei 0 2 6.263398 0.000000 6187 +mater 0 2 6.263398 0.000000 5930 +nando 0 2 6.263398 0.000000 5458 +numb 0 2 6.263398 0.000000 5505 +soap 0 2 6.263398 0.000000 4511 +jonb 0 2 6.263398 0.000000 4771 +mound 0 2 6.263398 0.000000 4773 +powerbook 0 1 6.957497 0.000000 18711 +amass 0 1 6.957497 0.000000 18712 +catagori 0 1 6.957497 0.000000 18713 +needsth 0 1 6.957497 0.000000 18714 +coverageth 0 1 6.957497 0.000000 18715 +operaish 0 1 6.957497 0.000000 18716 +drivelziffnet 0 1 6.957497 0.000000 18717 +newsc 0 1 6.957497 0.000000 18718 +classworktodai 0 1 6.957497 0.000000 18719 +chucklejon 0 1 6.957497 0.000000 18720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..b98aab3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +process 0 142 1.945910 0.000000 72 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +imag 0 91 2.397895 0.000000 161 +commun 0 95 2.397895 0.000000 157 +sinc 0 90 2.397895 0.000000 159 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +refer 0 78 2.564949 0.000000 203 +degre 0 69 2.708050 0.000000 259 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +autom 0 41 3.218876 0.000000 434 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +beij 0 19 4.007333 0.000000 876 +stanford 0 17 4.110874 0.000000 955 +medic 0 17 4.110874 0.000000 958 +came 0 13 4.382027 0.000000 1197 +academi 0 8 4.875197 0.000000 1735 +signal 0 7 5.010635 0.000000 1910 +provinc 0 4 5.568345 0.000000 3009 +biomed 0 4 5.568345 0.000000 2905 +hometown 0 3 5.857933 0.000000 3817 +nanj 0 2 6.263398 0.000000 5728 +jiangsu 0 2 6.263398 0.000000 5586 +southeast 0 2 6.263398 0.000000 6188 +frombeij 0 1 6.957497 0.000000 18721 +capitol 0 1 6.957497 0.000000 18722 +specil 0 1 6.957497 0.000000 18723 +chinacurr 0 1 6.957497 0.000000 18724 +tele 0 1 6.957497 0.000000 18725 +stuffjava 0 1 6.957497 0.000000 18726 +placeshor 0 1 6.957497 0.000000 18727 +tutorialchina 0 1 6.957497 0.000000 18728 +affairchina 0 1 6.957497 0.000000 18729 +democracybeij 0 1 6.957497 0.000000 18730 +groupstanford 0 1 6.957497 0.000000 18731 +informaticsmit 0 1 6.957497 0.000000 18732 +processingjob 0 1 6.957497 0.000000 18733 +newsyou 0 1 6.957497 0.000000 18734 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..272df369 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +high 0 130 2.079442 0.000000 101 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +school 0 84 2.484907 0.000000 188 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +state 0 76 2.564949 0.000000 207 +view 0 70 2.708050 0.000000 254 +receiv 0 66 2.708050 0.000000 244 +window 0 68 2.708050 0.000000 242 +new 0 64 2.772589 0.000000 262 +plan 0 65 2.772589 0.000000 272 +appoint 0 49 3.044522 0.000000 358 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +join 0 39 3.258097 0.000000 457 +electr 0 38 3.295837 0.000000 461 +print 0 34 3.401197 0.000000 503 +team 0 27 3.637586 0.000000 625 +jeff 0 25 3.737670 0.000000 673 +background 0 25 3.737670 0.000000 664 +sport 0 25 3.737670 0.000000 683 +miscellan 0 23 3.806662 0.000000 731 +emphasi 0 22 3.850148 0.000000 755 +deal 0 22 3.850148 0.000000 736 +hous 0 21 3.912023 0.000000 801 +thur 0 19 4.007333 0.000000 847 +statu 0 18 4.060443 0.000000 885 +jose 1 16 4.174387 4.174387 976 +diego 0 16 4.174387 0.000000 992 +club 0 15 4.248495 0.000000 1058 +francisco 0 14 4.317488 0.000000 1095 +basketbal 0 12 4.465908 0.000000 1289 +song 0 11 4.553877 0.000000 1380 +tue 0 11 4.553877 0.000000 1308 +town 0 10 4.653960 0.000000 1458 +vista 0 10 4.653960 0.000000 1452 +theme 0 8 4.875197 0.000000 1707 +hockei 0 8 4.875197 0.000000 1760 +golden 0 7 5.010635 0.000000 1962 +footbal 0 7 5.010635 0.000000 1912 +tip 0 7 5.010635 0.000000 1863 +cupertino 0 2 6.263398 0.000000 4956 +columbia 0 2 6.263398 0.000000 5900 +mercuri 0 1 6.957497 0.000000 18735 +andnando 0 1 6.957497 0.000000 18736 +shabel 0 1 6.957497 0.000000 18737 +pagech 0 1 6.957497 0.000000 18738 +wisconsinch 0 1 6.957497 0.000000 18739 +informationmajor 0 1 6.957497 0.000000 18740 +monta 0 1 6.957497 0.000000 18741 +warrior 0 1 6.957497 0.000000 18742 +shark 0 1 6.957497 0.000000 18743 +oakland 0 1 6.957497 0.000000 18744 +newsmus 0 1 6.957497 0.000000 18745 +jshabel 0 1 6.957497 0.000000 18746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..41f27443 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +data 1 170 1.791759 1.791759 49 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +larg 0 82 2.484907 0.000000 168 +west 0 83 2.484907 0.000000 192 +environ 0 84 2.484907 0.000000 177 +master 0 76 2.564949 0.000000 216 +server 0 76 2.564949 0.000000 204 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +involv 0 71 2.639057 0.000000 227 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +august 0 66 2.708050 0.000000 257 +organ 0 65 2.772589 0.000000 265 +street 0 63 2.772589 0.000000 293 +prof 0 64 2.772589 0.000000 273 +improv 0 62 2.772589 0.000000 289 +complex 0 64 2.772589 0.000000 269 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +experi 0 64 2.772589 0.000000 283 +explor 0 58 2.890372 0.000000 324 +thesi 0 57 2.890372 0.000000 327 +overview 0 56 2.890372 0.000000 323 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +found 0 53 2.944439 0.000000 337 +digit 0 52 2.995732 0.000000 348 +advisor 0 51 2.995732 0.000000 355 +visual 0 48 3.044522 0.000000 372 +set 0 50 3.044522 0.000000 361 +telephon 0 50 3.044522 0.000000 373 +frequent 0 49 3.044522 0.000000 367 +adapt 0 46 3.091042 0.000000 387 +discuss 0 45 3.135494 0.000000 399 +third 0 43 3.178054 0.000000 412 +linear 0 41 3.218876 0.000000 431 +join 0 39 3.258097 0.000000 457 +submit 0 39 3.258097 0.000000 440 +societi 0 40 3.258097 0.000000 456 +industri 0 38 3.295837 0.000000 464 +concurr 0 34 3.401197 0.000000 501 +queri 0 33 3.433987 0.000000 524 +storag 0 31 3.496508 0.000000 553 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +measur 0 28 3.610918 0.000000 609 +arrai 0 27 3.637586 0.000000 627 +client 0 25 3.737670 0.000000 679 +jeff 0 25 3.737670 0.000000 673 +disk 0 22 3.850148 0.000000 747 +chen 0 21 3.912023 0.000000 791 +media 0 19 4.007333 0.000000 861 +sigmod 0 19 4.007333 0.000000 877 +miller 0 17 4.110874 0.000000 949 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +dbm 0 13 4.382027 0.000000 1136 +buffer 0 12 4.465908 0.000000 1211 +characterist 0 12 4.465908 0.000000 1257 +daniel 0 12 4.465908 0.000000 1233 +raghu 0 12 4.465908 0.000000 1212 +optic 0 12 4.465908 0.000000 1221 +volum 0 11 4.553877 0.000000 1347 +alpha 0 11 4.553877 0.000000 1348 +resid 0 10 4.653960 0.000000 1461 +unifi 0 8 4.875197 0.000000 1774 +dataset 0 7 5.010635 0.000000 1914 +refere 0 7 5.010635 0.000000 1895 +eduresearch 0 6 5.164786 0.000000 2205 +divers 0 6 5.164786 0.000000 2232 +spie 0 6 5.164786 0.000000 2119 +quantum 0 6 5.164786 0.000000 2214 +almaden 0 5 5.347108 0.000000 2511 +tape 0 4 5.568345 0.000000 2959 +theintern 0 4 5.568345 0.000000 2981 +metadata 0 4 5.568345 0.000000 2945 +karen 0 4 5.568345 0.000000 2796 +ford 0 4 5.568345 0.000000 2636 +filesystem 0 4 5.568345 0.000000 2587 +tertiari 0 3 5.857933 0.000000 3193 +myllymaki 0 3 5.857933 0.000000 4022 +alsoavail 0 3 5.857933 0.000000 3887 +wenger 0 3 5.857933 0.000000 4023 +schwarz 0 3 5.857933 0.000000 3986 +trishul 0 3 5.857933 0.000000 4016 +chilimbi 0 3 5.857933 0.000000 4015 +raid 0 3 5.857933 0.000000 4012 +jussi 0 2 6.263398 0.000000 6133 +andvisu 0 2 6.263398 0.000000 6189 +karavan 0 2 6.263398 0.000000 6190 +andtool 0 2 6.263398 0.000000 5126 +beyer 0 2 6.263398 0.000000 6103 +lawand 0 2 6.263398 0.000000 6191 +dataengin 0 2 6.263398 0.000000 6118 +helsinki 0 2 6.263398 0.000000 5702 +storageto 0 1 6.957497 0.000000 18747 +andtap 0 1 6.957497 0.000000 18748 +yoav 0 1 6.957497 0.000000 18749 +weiss 0 1 6.957497 0.000000 18750 +scsi 0 1 6.957497 0.000000 18751 +myllymakijussi 0 1 6.957497 0.000000 18752 +summaryi 0 1 6.957497 0.000000 18753 +onadvanc 0 1 6.957497 0.000000 18754 +mcurrent 0 1 6.957497 0.000000 18755 +deviseproject 0 1 6.957497 0.000000 18756 +mironlivni 0 1 6.957497 0.000000 18757 +joinsof 0 1 6.957497 0.000000 18758 +listbelow 0 1 6.957497 0.000000 18759 +andfunct 0 1 6.957497 0.000000 18760 +datavisu 0 1 6.957497 0.000000 18761 +managementissu 0 1 6.957497 0.000000 18762 +publicationseffici 0 1 6.957497 0.000000 18763 +programperform 0 1 6.957497 0.000000 18764 +bartonp 0 1 6.957497 0.000000 18765 +tertiarystorag 0 1 6.957497 0.000000 18766 +withmiron 0 1 6.957497 0.000000 18767 +acmsigmetr 0 1 6.957497 0.000000 18768 +publicationdevis 0 1 6.957497 0.000000 18769 +donjerkov 0 1 6.957497 0.000000 18770 +andmiron 0 1 6.957497 0.000000 18771 +publicationsdisk 0 1 6.957497 0.000000 18772 +tapeaccess 0 1 6.957497 0.000000 18773 +degreeproject 0 1 6.957497 0.000000 18774 +networkarchitectur 0 1 6.957497 0.000000 18775 +finnish 0 1 6.957497 0.000000 18776 +documentsimplement 0 1 6.957497 0.000000 18777 +treealgorithm 0 1 6.957497 0.000000 18778 +productsoverview 0 1 6.957497 0.000000 18779 +supplier 0 1 6.957497 0.000000 18780 +productssom 0 1 6.957497 0.000000 18781 +adaptec 0 1 6.957497 0.000000 18782 +workstationsandpcsandtechn 0 1 6.957497 0.000000 18783 +journaland 0 1 6.957497 0.000000 18784 +whitepap 0 1 6.957497 0.000000 18785 +researchandcyberjourn 0 1 6.957497 0.000000 18786 +tapeanddlt 0 1 6.957497 0.000000 18787 +faqandwhitepap 0 1 6.957497 0.000000 18788 +solarisandsparcstationsandtechn 0 1 6.957497 0.000000 18789 +faqandstorag 0 1 6.957497 0.000000 18790 +faqand 0 1 6.957497 0.000000 18791 +otherusenet 0 1 6.957497 0.000000 18792 +faqsmani 0 1 6.957497 0.000000 18793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..1d5696e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +other 1 24 3.761200 3.761200 697 +sorri 1 4 5.568345 5.568345 3059 +jyothi 1 3 5.857933 5.857933 3423 +jyothithi 1 1 6.957497 6.957497 18794 +dissappoint 1 1 6.957497 6.957497 18795 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..11e8a713 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +school 0 84 2.484907 0.000000 188 +learn 0 86 2.484907 0.000000 170 +west 0 83 2.484907 0.000000 192 +environ 0 84 2.484907 0.000000 177 +thing 0 84 2.484907 0.000000 189 +know 0 80 2.564949 0.000000 198 +servic 0 72 2.639057 0.000000 236 +free 0 73 2.639057 0.000000 224 +street 0 63 2.772589 0.000000 293 +undergradu 0 54 2.944439 0.000000 338 +cool 0 49 3.044522 0.000000 374 +life 0 50 3.044522 0.000000 375 +could 0 46 3.091042 0.000000 383 +autom 0 41 3.218876 0.000000 434 +alumni 0 21 3.912023 0.000000 807 +miss 0 19 4.007333 0.000000 866 +thoma 0 18 4.060443 0.000000 901 +women 0 16 4.174387 0.000000 1004 +anyth 0 16 4.174387 0.000000 998 +save 0 14 4.317488 0.000000 1099 +tune 0 12 4.465908 0.000000 1227 +safe 0 12 4.465908 0.000000 1274 +paradyn 0 9 4.753590 0.000000 1614 +tutor 0 9 4.753590 0.000000 1552 +port 0 8 4.875197 0.000000 1766 +pursu 0 7 5.010635 0.000000 1902 +constitut 0 6 5.164786 0.000000 2026 +ship 0 5 5.347108 0.000000 2534 +salt 0 5 5.347108 0.000000 2413 +water 0 5 5.347108 0.000000 2535 +sail 0 5 5.347108 0.000000 2571 +karen 0 4 5.568345 0.000000 2796 +frontier 0 3 5.857933 0.000000 3771 +counti 0 3 5.857933 0.000000 3682 +karavan 0 2 6.263398 0.000000 6190 +wic 0 2 6.263398 0.000000 4673 +dane 0 2 6.263398 0.000000 5534 +pagefor 0 2 6.263398 0.000000 6151 +lover 0 2 6.263398 0.000000 6192 +tear 0 2 6.263398 0.000000 5076 +karavaniceveryth 0 1 6.957497 0.000000 18796 +karavanicresearch 0 1 6.957497 0.000000 18797 +databasesask 0 1 6.957497 0.000000 18798 +studentstrio 0 1 6.957497 0.000000 18799 +safer 0 1 6.957497 0.000000 18800 +chocol 0 1 6.957497 0.000000 18801 +onlystuyves 0 1 6.957497 0.000000 18802 +associationstuyves 0 1 6.957497 0.000000 18803 +legisl 0 1 6.957497 0.000000 18804 +internetth 0 1 6.957497 0.000000 18805 +cure 0 1 6.957497 0.000000 18806 +sweat 0 1 6.957497 0.000000 18807 +isak 0 1 6.957497 0.000000 18808 +dinesen 0 1 6.957497 0.000000 18809 +admir 0 1 6.957497 0.000000 18810 +grace 0 1 6.957497 0.000000 18811 +hopper 0 1 6.957497 0.000000 18812 +pioneer 0 1 6.957497 0.000000 18813 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..83159a61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +us 0 329 1.098612 0.000000 16 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +intern 0 108 2.197225 0.000000 128 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +ieee 0 86 2.484907 0.000000 190 +wide 0 84 2.484907 0.000000 185 +appear 0 78 2.564949 0.000000 210 +interfac 0 79 2.564949 0.000000 209 +april 0 77 2.564949 0.000000 196 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +david 0 71 2.639057 0.000000 232 +symposium 0 72 2.639057 0.000000 238 +august 0 66 2.708050 0.000000 257 +simul 0 66 2.708050 0.000000 255 +dept 0 64 2.772589 0.000000 291 +share 0 59 2.833213 0.000000 304 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +extens 1 53 2.944439 2.944439 340 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +protocol 0 45 3.135494 0.000000 407 +cach 0 41 3.218876 0.000000 432 +editor 0 41 3.218876 0.000000 433 +examin 0 42 3.218876 0.000000 424 +autom 0 41 3.218876 0.000000 434 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +jame 0 35 3.401197 0.000000 507 +option 0 30 3.555348 0.000000 575 +multiprocessor 0 28 3.610918 0.000000 605 +aspect 0 25 3.737670 0.000000 663 +supercomput 0 25 3.737670 0.000000 681 +scalabl 0 24 3.761200 0.000000 705 +methodolog 0 23 3.806662 0.000000 733 +synthesi 0 20 3.951244 0.000000 834 +north 0 19 4.007333 0.000000 873 +wind 0 18 4.060443 0.000000 908 +monitor 0 17 4.110874 0.000000 941 +hierarch 0 15 4.248495 0.000000 1018 +coher 0 14 4.317488 0.000000 1109 +prolog 0 13 4.382027 0.000000 1155 +introduc 0 13 4.382027 0.000000 1139 +tunnel 0 9 4.753590 0.000000 1615 +depth 0 8 4.875197 0.000000 1636 +upcom 0 8 4.875197 0.000000 1685 +goodman 0 7 5.010635 0.000000 1891 +dedic 0 7 5.010635 0.000000 1843 +greec 0 6 5.164786 0.000000 2208 +holland 0 5 5.347108 0.000000 2490 +publicationsresearch 0 4 5.568345 0.000000 2876 +galileo 0 4 5.568345 0.000000 3086 +microprogram 0 4 5.568345 0.000000 2604 +stefano 0 3 5.857933 0.000000 3372 +kaxira 0 3 5.857933 0.000000 3373 +stein 0 3 5.857933 0.000000 3646 +multiprocess 0 2 6.263398 0.000000 5142 +gjess 0 2 6.263398 0.000000 6156 +kiloprocessor 0 1 6.957497 0.000000 18814 +glow 0 1 6.957497 0.000000 18815 +papakonstantin 0 1 6.957497 0.000000 18816 +tsanaka 0 1 6.957497 0.000000 18817 +sciresearch 0 1 6.957497 0.000000 18818 +collaborationwith 0 1 6.957497 0.000000 18819 +incolabor 0 1 6.957497 0.000000 18820 +goodmanto 0 1 6.957497 0.000000 18821 +kaxirasto 0 1 6.957497 0.000000 18822 +goodmannd 0 1 6.957497 0.000000 18823 +goodmanst 0 1 6.957497 0.000000 18824 +kaxirasunivers 0 1 6.957497 0.000000 18825 +stafylopati 0 1 6.957497 0.000000 18826 +kaxirasinform 0 1 6.957497 0.000000 18827 +pekmestzi 0 1 6.957497 0.000000 18828 +kaxirasp 0 1 6.957497 0.000000 18829 +kaxirasmicroprocess 0 1 6.957497 0.000000 18830 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..c661cf3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,252 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +contact 0 153 1.791759 0.000000 59 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +world 0 115 2.197225 0.000000 126 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +person 0 111 2.197225 0.000000 117 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +sinc 0 90 2.397895 0.000000 159 +associ 0 93 2.397895 0.000000 151 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +level 0 87 2.484907 0.000000 180 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +interfac 0 79 2.564949 0.000000 209 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +html 0 75 2.639057 0.000000 235 +would 0 67 2.708050 0.000000 251 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +result 0 65 2.772589 0.000000 281 +locat 0 59 2.833213 0.000000 303 +index 0 56 2.890372 0.000000 309 +publish 0 57 2.890372 0.000000 326 +variou 0 56 2.890372 0.000000 317 +think 0 57 2.890372 0.000000 314 +talk 0 53 2.944439 0.000000 336 +allow 0 53 2.944439 0.000000 333 +week 0 52 2.995732 0.000000 343 +date 0 51 2.995732 0.000000 344 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +visual 0 48 3.044522 0.000000 372 +give 0 50 3.044522 0.000000 359 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +possibl 0 47 3.091042 0.000000 378 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +made 0 44 3.135494 0.000000 398 +directori 0 45 3.135494 0.000000 396 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +long 0 43 3.178054 0.000000 413 +might 0 41 3.218876 0.000000 426 +probabl 0 40 3.258097 0.000000 455 +littl 0 39 3.258097 0.000000 454 +realli 0 40 3.258097 0.000000 444 +programm 0 39 3.258097 0.000000 445 +form 0 39 3.258097 0.000000 443 +small 0 39 3.258097 0.000000 447 +slide 0 38 3.295837 0.000000 467 +especi 0 36 3.367296 0.000000 496 +short 0 36 3.367296 0.000000 499 +soon 0 36 3.367296 0.000000 494 +print 0 34 3.401197 0.000000 503 +taught 0 33 3.433987 0.000000 526 +idea 0 32 3.465736 0.000000 545 +often 0 31 3.496508 0.000000 551 +someth 0 31 3.496508 0.000000 554 +quot 0 29 3.583519 0.000000 582 +pass 0 28 3.610918 0.000000 611 +becom 0 28 3.610918 0.000000 603 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +never 0 25 3.737670 0.000000 671 +hill 0 25 3.737670 0.000000 670 +reliabl 0 25 3.737670 0.000000 674 +notic 0 25 3.737670 0.000000 675 +wish 0 24 3.761200 0.000000 692 +consult 0 24 3.761200 0.000000 687 +displai 0 23 3.806662 0.000000 712 +proof 0 23 3.806662 0.000000 720 +varieti 0 22 3.850148 0.000000 740 +hierarchi 0 22 3.850148 0.000000 744 +properti 0 22 3.850148 0.000000 749 +leav 0 21 3.912023 0.000000 772 +love 0 21 3.912023 0.000000 804 +break 0 20 3.951244 0.000000 812 +ever 0 19 4.007333 0.000000 872 +anderson 0 19 4.007333 0.000000 860 +steven 0 17 4.110874 0.000000 953 +thought 0 17 4.110874 0.000000 945 +anyon 0 17 4.110874 0.000000 916 +anyth 0 16 4.174387 0.000000 998 +doesn 0 15 4.248495 0.000000 1055 +indic 0 15 4.248495 0.000000 1013 +piec 0 15 4.248495 0.000000 1020 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +rate 0 15 4.248495 0.000000 1037 +atth 0 15 4.248495 0.000000 1019 +psycholog 0 15 4.248495 0.000000 1054 +convent 0 14 4.317488 0.000000 1072 +attribut 0 14 4.317488 0.000000 1092 +everyth 0 13 4.382027 0.000000 1169 +care 0 13 4.382027 0.000000 1177 +front 0 13 4.382027 0.000000 1154 +unfortun 0 13 4.382027 0.000000 1170 +translat 0 13 4.382027 0.000000 1164 +insid 0 12 4.465908 0.000000 1262 +prelim 0 12 4.465908 0.000000 1201 +skill 0 12 4.465908 0.000000 1205 +uniqu 0 12 4.465908 0.000000 1228 +primit 0 11 4.553877 0.000000 1317 +perman 0 11 4.553877 0.000000 1372 +success 0 10 4.653960 0.000000 1390 +enter 0 10 4.653960 0.000000 1454 +subset 0 10 4.653960 0.000000 1425 +light 0 9 4.753590 0.000000 1533 +end 0 9 4.753590 0.000000 1567 +discov 0 9 4.753590 0.000000 1562 +clear 0 9 4.753590 0.000000 1488 +angel 0 8 4.875197 0.000000 1779 +unifi 0 8 4.875197 0.000000 1774 +gave 0 7 5.010635 0.000000 1922 +surpris 0 7 5.010635 0.000000 1828 +perfect 0 7 5.010635 0.000000 1921 +tag 0 7 5.010635 0.000000 1821 +intellectu 0 7 5.010635 0.000000 1847 +serial 0 7 5.010635 0.000000 1975 +adob 0 7 5.010635 0.000000 1873 +chanc 0 7 5.010635 0.000000 1960 +shot 0 7 5.010635 0.000000 1898 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +meant 0 6 5.164786 0.000000 2055 +vari 0 6 5.164786 0.000000 2001 +impress 0 6 5.164786 0.000000 2096 +ifyou 0 6 5.164786 0.000000 1992 +creation 0 6 5.164786 0.000000 2069 +somewher 0 6 5.164786 0.000000 2176 +keeper 0 5 5.347108 0.000000 2569 +stupid 0 5 5.347108 0.000000 2489 +junior 0 5 5.347108 0.000000 2519 +explicitli 0 5 5.347108 0.000000 2308 +hate 0 5 5.347108 0.000000 2529 +bean 0 4 5.568345 0.000000 2968 +hell 0 4 5.568345 0.000000 2885 +suppos 0 4 5.568345 0.000000 3002 +cheap 0 4 5.568345 0.000000 2751 +aliv 0 3 5.857933 0.000000 3864 +heaven 0 3 5.857933 0.000000 3589 +lauri 0 3 5.857933 0.000000 3867 +wasn 0 3 5.857933 0.000000 3800 +argu 0 3 5.857933 0.000000 3698 +rsum 0 3 5.857933 0.000000 3939 +outof 0 3 5.857933 0.000000 3296 +aren 0 3 5.857933 0.000000 3512 +easier 0 3 5.857933 0.000000 3470 +caltech 0 2 6.263398 0.000000 5223 +ironi 0 2 6.263398 0.000000 5986 +sarcasm 0 2 6.263398 0.000000 5871 +offens 0 2 6.263398 0.000000 6168 +miracl 0 2 6.263398 0.000000 5710 +convinc 0 2 6.263398 0.000000 6019 +defunct 0 2 6.263398 0.000000 6162 +personnel 0 2 6.263398 0.000000 4381 +danger 0 2 6.263398 0.000000 5725 +informationag 0 2 6.263398 0.000000 5446 +bui 0 2 6.263398 0.000000 4486 +ofread 0 2 6.263398 0.000000 4417 +possibleto 0 2 6.263398 0.000000 4942 +hedgehog 0 1 6.957497 0.000000 18831 +pager 0 1 6.957497 0.000000 18832 +foughtthei 0 1 6.957497 0.000000 18833 +bitmap 0 1 6.957497 0.000000 18834 +theblind 0 1 6.957497 0.000000 18835 +whateverbrows 0 1 6.957497 0.000000 18836 +literari 0 1 6.957497 0.000000 18837 +satir 0 1 6.957497 0.000000 18838 +butnoth 0 1 6.957497 0.000000 18839 +herein 0 1 6.957497 0.000000 18840 +areoffend 0 1 6.957497 0.000000 18841 +firsttwo 0 1 6.957497 0.000000 18842 +addup 0 1 6.957497 0.000000 18843 +fizzl 0 1 6.957497 0.000000 18844 +areobtain 0 1 6.957497 0.000000 18845 +creatingkiosk 0 1 6.957497 0.000000 18846 +thosewho 0 1 6.957497 0.000000 18847 +mybe 0 1 6.957497 0.000000 18848 +thoughtson 0 1 6.957497 0.000000 18849 +wantto 0 1 6.957497 0.000000 18850 +todo 0 1 6.957497 0.000000 18851 +sporad 0 1 6.957497 0.000000 18852 +danenet 0 1 6.957497 0.000000 18853 +dilhr 0 1 6.957497 0.000000 18854 +jobnet 0 1 6.957497 0.000000 18855 +photonet 0 1 6.957497 0.000000 18856 +databaseus 0 1 6.957497 0.000000 18857 +freez 0 1 6.957497 0.000000 18858 +fought 0 1 6.957497 0.000000 18859 +sfuai 0 1 6.957497 0.000000 18860 +assigna 0 1 6.957497 0.000000 18861 +contextu 0 1 6.957497 0.000000 18862 +distil 0 1 6.957497 0.000000 18863 +rsuminto 0 1 6.957497 0.000000 18864 +pinch 0 1 6.957497 0.000000 18865 +certaintruth 0 1 6.957497 0.000000 18866 +eventuallypick 0 1 6.957497 0.000000 18867 +mull 0 1 6.957497 0.000000 18868 +accessibleto 0 1 6.957497 0.000000 18869 +tough 0 1 6.957497 0.000000 18870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..41d6e73b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +perform 1 143 1.945910 1.945910 74 +welcom 0 122 2.079442 0.000000 99 +postscript 0 131 2.079442 0.000000 90 +search 0 95 2.397895 0.000000 155 +music 0 42 3.218876 0.000000 436 +indian 0 22 3.850148 0.000000 769 +classic 0 14 4.317488 0.000000 1084 +gzip 0 6 5.164786 0.000000 2117 +steer 0 5 5.347108 0.000000 2328 +krishna 0 3 5.857933 0.000000 3495 +kunchithapadamkrishna 0 1 6.957497 0.000000 18871 +kunchithapadamgreet 0 1 6.957497 0.000000 18872 +miscellaneouspubl 0 1 6.957497 0.000000 18873 +toolsresum 0 1 6.957497 0.000000 18874 +bykk 0 1 6.957497 0.000000 18875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..bb9cccec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +sigmod 0 19 4.007333 0.000000 877 +dbm 0 13 4.382027 0.000000 1136 +dewitt 0 12 4.465908 0.000000 1270 +tuft 1 5 5.347108 5.347108 2575 +kristin 0 4 5.568345 0.000000 3089 +miscellani 0 3 5.857933 0.000000 3976 +pagekristin 0 1 6.957497 0.000000 18876 +eduadvisor 0 1 6.957497 0.000000 18877 +serveruw 0 1 6.957497 0.000000 18878 +groupacm 0 1 6.957497 0.000000 18879 +pageeo 0 1 6.957497 0.000000 18880 +officelast 0 1 6.957497 0.000000 18881 +tuftekristin 0 1 6.957497 0.000000 18882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..8b33d843 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +person 0 111 2.197225 0.000000 117 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +follow 0 92 2.397895 0.000000 143 +homepag 0 93 2.397895 0.000000 148 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +good 0 77 2.564949 0.000000 200 +import 0 65 2.772589 0.000000 282 +life 0 50 3.044522 0.000000 375 +electron 0 47 3.091042 0.000000 379 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +compani 0 41 3.218876 0.000000 423 +short 0 36 3.367296 0.000000 499 +whole 0 17 4.110874 0.000000 940 +hobbi 0 16 4.174387 0.000000 1009 +uniqu 0 12 4.465908 0.000000 1228 +opinion 0 8 4.875197 0.000000 1708 +pursu 0 7 5.010635 0.000000 1902 +entiti 0 3 5.857933 0.000000 3096 +krung 1 1 6.957497 6.957497 18883 +homepageupd 0 1 6.957497 0.000000 18884 +underconstructioni 0 1 6.957497 0.000000 18885 +serf 0 1 6.957497 0.000000 18886 +cometh 0 1 6.957497 0.000000 18887 +linkedth 0 1 6.957497 0.000000 18888 +sinapiromsaran 0 1 6.957497 0.000000 18889 +emailkrung 0 1 6.957497 0.000000 18890 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..a80d72e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +applic 0 170 1.791759 0.000000 56 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +report 0 131 2.079442 0.000000 92 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +structur 0 106 2.197225 0.000000 105 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +chang 0 82 2.484907 0.000000 163 +appear 0 78 2.564949 0.000000 210 +complet 0 77 2.564949 0.000000 208 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +abstract 0 62 2.772589 0.000000 276 +function 0 62 2.772589 0.000000 275 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +artifici 0 63 2.772589 0.000000 280 +reason 0 57 2.890372 0.000000 318 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +space 0 57 2.890372 0.000000 310 +local 0 55 2.944439 0.000000 334 +extens 0 53 2.944439 0.000000 340 +right 0 48 3.044522 0.000000 363 +basic 0 50 3.044522 0.000000 360 +algebra 0 45 3.135494 0.000000 394 +math 0 44 3.135494 0.000000 402 +answer 0 45 3.135494 0.000000 391 +autom 0 41 3.218876 0.000000 434 +review 0 42 3.218876 0.000000 425 +press 0 42 3.218876 0.000000 419 +theoret 0 39 3.258097 0.000000 446 +close 0 38 3.295837 0.000000 465 +singl 0 34 3.401197 0.000000 510 +taught 0 33 3.433987 0.000000 526 +independ 0 32 3.465736 0.000000 548 +semant 0 29 3.583519 0.000000 587 +consid 0 29 3.583519 0.000000 590 +turn 0 29 3.583519 0.000000 586 +measur 0 28 3.610918 0.000000 609 +american 0 27 3.637586 0.000000 634 +comp 0 26 3.688879 0.000000 650 +theorem 0 21 3.912023 0.000000 786 +prove 0 19 4.007333 0.000000 848 +geometr 0 19 4.007333 0.000000 852 +failur 0 18 4.060443 0.000000 898 +stanford 0 17 4.110874 0.000000 955 +moor 0 17 4.110874 0.000000 936 +style 0 15 4.248495 0.000000 1036 +topolog 0 14 4.317488 0.000000 1089 +draft 0 14 4.317488 0.000000 1085 +prolog 0 13 4.382027 0.000000 1155 +resolut 0 13 4.382027 0.000000 1172 +weak 0 13 4.382027 0.000000 1159 +deduct 0 12 4.465908 0.000000 1236 +kenneth 0 12 4.465908 0.000000 1265 +loop 0 11 4.553877 0.000000 1310 +typic 0 11 4.553877 0.000000 1360 +fix 0 11 4.553877 0.000000 1327 +edutelephon 0 10 4.653960 0.000000 1473 +besid 0 8 4.875197 0.000000 1681 +compact 0 7 5.010635 0.000000 1907 +boyer 0 6 5.164786 0.000000 2013 +rough 0 6 5.164786 0.000000 2107 +infer 0 6 5.164786 0.000000 2040 +shortest 0 5 5.347108 0.000000 2424 +constant 0 5 5.347108 0.000000 2251 +kunen 1 3 5.857933 5.857933 3500 +mathematica 0 3 5.857933 0.000000 3303 +preprint 0 3 5.857933 0.000000 3481 +axiomat 0 3 5.857933 0.000000 3288 +monthli 0 3 5.857933 0.000000 3910 +expon 0 2 6.263398 0.000000 5323 +negat 0 2 6.263398 0.000000 6073 +mill 0 2 6.263398 0.000000 6193 +liter 0 2 6.263398 0.000000 4689 +law 0 2 6.263398 0.000000 4896 +hart 0 1 6.957497 0.000000 18891 +axiom 0 1 6.957497 0.000000 18892 +fundamenta 0 1 6.957497 0.000000 18893 +quasigroup 0 1 6.957497 0.000000 18894 +professormath 0 1 6.957497 0.000000 18895 +resolutionto 0 1 6.957497 0.000000 18896 +likeprolog 0 1 6.957497 0.000000 18897 +prologus 0 1 6.957497 0.000000 18898 +incompat 0 1 6.957497 0.000000 18899 +betweenleast 0 1 6.957497 0.000000 18900 +backtrack 0 1 6.957497 0.000000 18901 +thissubject 0 1 6.957497 0.000000 18902 +usualaxiom 0 1 6.957497 0.000000 18903 +ramsei 0 1 6.957497 0.000000 18904 +corson 0 1 6.957497 0.000000 18905 +moufang 0 1 6.957497 0.000000 18906 +conjugaci 0 1 6.957497 0.000000 18907 +moschovaki 0 1 6.957497 0.000000 18908 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..43de9150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,246 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +compil 0 122 2.079442 0.000000 96 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +memori 0 101 2.302585 0.000000 139 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +octob 0 89 2.397895 0.000000 156 +associ 0 93 2.397895 0.000000 151 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +educ 0 86 2.484907 0.000000 191 +larg 0 82 2.484907 0.000000 168 +level 0 87 2.484907 0.000000 180 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +summari 0 73 2.639057 0.000000 237 +write 0 72 2.639057 0.000000 222 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +java 0 70 2.708050 0.000000 248 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +evalu 0 64 2.772589 0.000000 266 +improv 0 62 2.772589 0.000000 289 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +colleg 0 61 2.833213 0.000000 300 +march 0 61 2.833213 0.000000 295 +automat 0 61 2.833213 0.000000 306 +instruct 0 53 2.944439 0.000000 332 +februari 0 54 2.944439 0.000000 328 +hardwar 0 51 2.995732 0.000000 350 +particular 0 51 2.995732 0.000000 352 +principl 0 48 3.044522 0.000000 357 +frequent 0 49 3.044522 0.000000 367 +california 0 46 3.091042 0.000000 388 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +mark 0 44 3.135494 0.000000 403 +execut 0 45 3.135494 0.000000 404 +protocol 0 45 3.135494 0.000000 407 +better 0 45 3.135494 0.000000 401 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +transact 0 39 3.258097 0.000000 438 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 0 35 3.401197 0.000000 507 +bibliographi 0 34 3.401197 0.000000 518 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +richard 0 31 3.496508 0.000000 559 +profil 0 30 3.555348 0.000000 581 +power 0 30 3.555348 0.000000 573 +depend 0 29 3.583519 0.000000 583 +focus 0 29 3.583519 0.000000 584 +pass 0 28 3.610918 0.000000 611 +multiprocessor 0 28 3.610918 0.000000 605 +symbol 0 27 3.637586 0.000000 620 +static 0 27 3.637586 0.000000 619 +berkelei 0 26 3.688879 0.000000 657 +revis 0 26 3.688879 0.000000 640 +hill 0 25 3.737670 0.000000 670 +trace 0 25 3.737670 0.000000 677 +supercomput 0 25 3.737670 0.000000 681 +spent 0 25 3.737670 0.000000 676 +lab 0 24 3.761200 0.000000 698 +flow 0 24 3.761200 0.000000 700 +scalabl 0 24 3.761200 0.000000 705 +demonstr 0 24 3.761200 0.000000 694 +cooper 0 22 3.850148 0.000000 757 +path 0 21 3.912023 0.000000 778 +annot 0 21 3.912023 0.000000 775 +programminglanguag 0 21 3.912023 0.000000 782 +department 0 20 3.951244 0.000000 839 +portabl 0 20 3.951244 0.000000 819 +fine 0 20 3.951244 0.000000 822 +exploit 0 20 3.951244 0.000000 836 +eric 0 19 4.007333 0.000000 870 +wind 0 18 4.060443 0.000000 908 +thoma 0 18 4.060443 0.000000 901 +steven 0 17 4.110874 0.000000 953 +asplo 0 17 4.110874 0.000000 948 +micro 0 15 4.248495 0.000000 1031 +eduphon 0 15 4.248495 0.000000 1060 +hybrid 0 15 4.248495 0.000000 1057 +coher 0 14 4.317488 0.000000 1109 +sigplan 0 13 4.382027 0.000000 1190 +employ 0 12 4.465908 0.000000 1291 +brad 0 12 4.465908 0.000000 1264 +wood 0 11 4.553877 0.000000 1355 +branch 0 11 4.553877 0.000000 1318 +grain 0 10 4.653960 0.000000 1448 +facilit 0 10 4.653960 0.000000 1412 +laru 1 9 4.753590 4.753590 1560 +tunnel 0 9 4.753590 0.000000 1615 +ball 0 9 4.753590 0.000000 1608 +wilson 0 9 4.753590 0.000000 1536 +routin 0 9 4.753590 0.000000 1549 +pldi 0 8 4.875197 0.000000 1704 +secretari 0 8 4.875197 0.000000 1775 +upcom 0 8 4.875197 0.000000 1685 +irregular 0 8 4.875197 0.000000 1768 +joel 0 8 4.875197 0.000000 1698 +insert 0 8 4.875197 0.000000 1687 +sixth 0 7 5.010635 0.000000 1917 +roger 0 7 5.010635 0.000000 1892 +harvard 0 7 5.010635 0.000000 1926 +fifth 0 7 5.010635 0.000000 1931 +chandra 0 6 5.164786 0.000000 2091 +ann 0 6 5.164786 0.000000 2065 +bell 0 6 5.164786 0.000000 2224 +sciencedepart 0 6 5.164786 0.000000 2172 +microarchitectur 0 6 5.164786 0.000000 2238 +unpublish 0 6 5.164786 0.000000 2226 +lebeck 0 5 5.347108 0.000000 2582 +reinhardt 0 5 5.347108 0.000000 2583 +forprogram 0 5 5.347108 0.000000 2361 +babak 0 5 5.347108 0.000000 2584 +falsafi 0 5 5.347108 0.000000 2585 +ioanni 0 5 5.347108 0.000000 2553 +mukherje 0 5 5.347108 0.000000 2586 +tempest 0 5 5.347108 0.000000 2548 +icpp 0 5 5.347108 0.000000 2382 +toc 0 5 5.347108 0.000000 2562 +summarymi 0 5 5.347108 0.000000 2580 +computerarchitectur 0 5 5.347108 0.000000 2290 +alvin 0 4 5.568345 0.000000 3084 +satish 0 4 5.568345 0.000000 2833 +schoina 0 4 5.568345 0.000000 3085 +languagesand 0 4 5.568345 0.000000 3071 +gregori 0 4 5.568345 0.000000 2928 +sharma 0 4 5.568345 0.000000 2752 +ppopp 0 4 5.568345 0.000000 2774 +substrat 0 4 5.568345 0.000000 2857 +compcon 0 4 5.568345 0.000000 2958 +markhil 0 4 5.568345 0.000000 2819 +manuscript 0 4 5.568345 0.000000 2750 +wart 0 4 5.568345 0.000000 2987 +talluri 0 4 5.568345 0.000000 2820 +oracl 0 4 5.568345 0.000000 2823 +andi 0 4 5.568345 0.000000 3081 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +wcsss 0 3 5.857933 0.000000 3956 +shubhendu 0 3 5.857933 0.000000 4028 +saltz 0 3 5.857933 0.000000 3385 +frequenc 0 3 5.857933 0.000000 3206 +trishul 0 3 5.857933 0.000000 4016 +chilimbi 0 3 5.857933 0.000000 4015 +madhusudhan 0 3 5.857933 0.000000 4021 +parallelmachin 0 3 5.857933 0.000000 3693 +moredetail 0 3 5.857933 0.000000 3854 +guhan 0 2 6.263398 0.000000 6169 +viswanathan 0 2 6.263398 0.000000 6170 +schnarr 0 2 6.263398 0.000000 6194 +lorenz 0 2 6.263398 0.000000 4846 +shamik 0 2 6.263398 0.000000 6195 +cico 0 2 6.263398 0.000000 6120 +thewisconsin 0 2 6.263398 0.000000 6196 +usalaru 0 1 6.957497 0.000000 18909 +structuresc 0 1 6.957497 0.000000 18910 +spim 0 1 6.957497 0.000000 18911 +wartsrec 0 1 6.957497 0.000000 18912 +paperseffici 0 1 6.957497 0.000000 18913 +teapot 0 1 6.957497 0.000000 18914 +andjam 0 1 6.957497 0.000000 18915 +annerog 0 1 6.957497 0.000000 18916 +practiceof 0 1 6.957497 0.000000 18917 +languagesdesign 0 1 6.957497 0.000000 18918 +youfeng 0 1 6.957497 0.000000 18919 +jameslaru 0 1 6.957497 0.000000 18920 +cachier 0 1 6.957497 0.000000 18921 +graduatesbrad 0 1 6.957497 0.000000 18922 +vassar 0 1 6.957497 0.000000 18923 +languagesfirst 0 1 6.957497 0.000000 18924 +huelsbergen 0 1 6.957497 0.000000 18925 +tball 0 1 6.957497 0.000000 18926 +havehelp 0 1 6.957497 0.000000 18927 +coherencepolici 0 1 6.957497 0.000000 18928 +programmersunderstand 0 1 6.957497 0.000000 18929 +hasidentifi 0 1 6.957497 0.000000 18930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..e0f9165c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +tuesdai 1 73 2.639057 2.639057 219 +eduoffic 1 33 3.433987 3.433987 531 +nick 1 13 4.382027 4.382027 1180 +leavi 1 3 5.857933 5.857933 3438 +pagenick 1 1 6.957497 6.957497 18931 +pageoffic 1 1 6.957497 6.957497 18932 +wednessdai 1 1 6.957497 6.957497 18933 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..2377a622 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +includ 0 208 1.609438 0.000000 42 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +file 0 132 1.945910 0.000000 70 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +book 0 99 2.302585 0.000000 131 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +html 0 75 2.639057 0.000000 235 +order 0 69 2.708050 0.000000 249 +would 0 67 2.708050 0.000000 251 +dept 0 64 2.772589 0.000000 291 +sever 0 56 2.890372 0.000000 322 +publish 0 57 2.890372 0.000000 326 +finger 0 52 2.995732 0.000000 354 +algebra 0 45 3.135494 0.000000 394 +keep 0 44 3.135494 0.000000 409 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +http 0 41 3.218876 0.000000 420 +origin 0 38 3.295837 0.000000 472 +committe 0 34 3.401197 0.000000 522 +statist 0 35 3.401197 0.000000 521 +univ 0 28 3.610918 0.000000 617 +intend 0 28 3.610918 0.000000 599 +mind 0 27 3.637586 0.000000 632 +other 0 24 3.761200 0.000000 697 +compress 0 23 3.806662 0.000000 719 +wind 0 18 4.060443 0.000000 908 +steven 0 17 4.110874 0.000000 953 +draft 0 14 4.317488 0.000000 1085 +individu 0 13 4.382027 0.000000 1126 +tunnel 0 9 4.753590 0.000000 1615 +isbn 0 7 5.010635 0.000000 1901 +forum 0 6 5.164786 0.000000 2027 +ongo 0 6 5.164786 0.000000 2215 +heavili 0 3 5.857933 0.000000 3572 +andit 0 3 5.857933 0.000000 3328 +thewisconsin 0 2 6.263398 0.000000 6196 +lederman 1 1 6.957497 6.957497 18934 +huss 0 1 6.957497 0.000000 18935 +mpistandard 0 1 6.957497 0.000000 18936 +iscov 0 1 6.957497 0.000000 18937 +prismproject 0 1 6.957497 0.000000 18938 +invol 0 1 6.957497 0.000000 18939 +ongoingwork 0 1 6.957497 0.000000 18940 +compressedtar 0 1 6.957497 0.000000 18941 +desper 0 1 6.957497 0.000000 18942 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..203e88d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +network 1 168 1.791759 1.791759 61 +wisconsin 0 169 1.791759 0.000000 54 +implement 0 152 1.791759 0.000000 52 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +proceed 0 93 2.397895 0.000000 152 +control 0 82 2.484907 0.000000 164 +second 0 81 2.484907 0.000000 166 +ieee 0 86 2.484907 0.000000 190 +dynam 0 76 2.564949 0.000000 194 +april 0 77 2.564949 0.000000 196 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +window 0 68 2.708050 0.000000 242 +august 0 66 2.708050 0.000000 257 +virtual 0 62 2.772589 0.000000 285 +sampl 0 53 2.944439 0.000000 339 +tabl 0 51 2.995732 0.000000 346 +telephon 0 50 3.044522 0.000000 373 +protocol 0 45 3.135494 0.000000 407 +fast 0 42 3.218876 0.000000 429 +combin 0 42 3.218876 0.000000 421 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +focus 0 29 3.583519 0.000000 584 +feedback 0 19 4.007333 0.000000 854 +speed 0 18 4.060443 0.000000 911 +rate 0 15 4.248495 0.000000 1037 +francisco 0 14 4.317488 0.000000 1095 +circuit 0 13 4.382027 0.000000 1131 +loop 0 11 4.553877 0.000000 1310 +clock 0 11 4.553877 0.000000 1320 +purdu 0 10 4.653960 0.000000 1466 +packet 0 10 4.653960 0.000000 1415 +lawrenc 0 7 5.010635 0.000000 1908 +conferenc 0 7 5.010635 0.000000 1857 +mukherje 0 5 5.347108 0.000000 2586 +testb 0 5 5.347108 0.000000 2456 +admiss 0 4 5.568345 0.000000 2704 +darpa 0 4 5.568345 0.000000 2944 +phenomena 0 4 5.568345 0.000000 2962 +landweb 0 3 5.857933 0.000000 3402 +congest 0 3 5.857933 0.000000 3993 +infocom 0 3 5.857933 0.000000 3283 +atmospher 0 3 5.857933 0.000000 3388 +baltimor 0 3 5.857933 0.000000 3809 +theieee 0 2 6.263398 0.000000 6043 +florenc 0 2 6.263398 0.000000 4950 +faber 0 1 6.957497 0.000000 18943 +electronicmail 0 1 6.957497 0.000000 18944 +participatingin 0 1 6.957497 0.000000 18945 +gigabit 0 1 6.957497 0.000000 18946 +involvesth 0 1 6.957497 0.000000 18947 +atgigabit 0 1 6.957497 0.000000 18948 +onissu 0 1 6.957497 0.000000 18949 +visualizationof 0 1 6.957497 0.000000 18950 +establishmentmethod 0 1 6.957497 0.000000 18951 +olsen 0 1 6.957497 0.000000 18952 +witht 0 1 6.957497 0.000000 18953 +sigcommconfer 0 1 6.957497 0.000000 18954 +coursesconnect 0 1 6.957497 0.000000 18955 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..3eeb5f24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +lectur 0 135 1.945910 0.000000 73 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +person 0 111 2.197225 0.000000 117 +section 0 94 2.397895 0.000000 149 +west 0 83 2.484907 0.000000 192 +servic 0 72 2.639057 0.000000 236 +intellig 0 72 2.639057 0.000000 225 +thursdai 0 70 2.708050 0.000000 241 +street 0 63 2.772589 0.000000 293 +wednesdai 0 64 2.772589 0.000000 261 +artifici 0 63 2.772589 0.000000 280 +variou 0 56 2.890372 0.000000 317 +appoint 0 49 3.044522 0.000000 358 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +respons 0 37 3.332205 0.000000 476 +comp 0 26 3.688879 0.000000 650 +women 0 16 4.174387 0.000000 1004 +cognit 0 16 4.174387 0.000000 986 +career 0 12 4.465908 0.000000 1287 +linguist 0 9 4.753590 0.000000 1593 +utah 0 9 4.753590 0.000000 1585 +lloyd 0 6 5.164786 0.000000 2103 +chemistri 0 5 5.347108 0.000000 2405 +shannon 0 1 6.957497 0.000000 18956 +xsoft 0 1 6.957497 0.000000 18957 +lexdemo 0 1 6.957497 0.000000 18958 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..7256b1ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +pictur 0 89 2.397895 0.000000 160 +thing 0 84 2.484907 0.000000 189 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +meet 0 72 2.639057 0.000000 229 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +java 0 70 2.708050 0.000000 248 +interact 0 62 2.772589 0.000000 270 +virtual 0 62 2.772589 0.000000 285 +organ 0 65 2.772589 0.000000 265 +automat 0 61 2.833213 0.000000 306 +semest 0 58 2.890372 0.000000 312 +space 0 57 2.890372 0.000000 310 +advisor 0 51 2.995732 0.000000 355 +maintain 0 51 2.995732 0.000000 342 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +cool 0 49 3.044522 0.000000 374 +friend 0 48 3.044522 0.000000 376 +electron 0 47 3.091042 0.000000 379 +favorit 0 44 3.135494 0.000000 410 +realli 0 40 3.258097 0.000000 444 +submit 0 39 3.258097 0.000000 440 +seminar 0 38 3.295837 0.000000 470 +feel 0 37 3.332205 0.000000 483 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +someth 0 31 3.496508 0.000000 554 +option 1 30 3.555348 3.555348 575 +becom 0 28 3.610918 0.000000 603 +concern 0 25 3.737670 0.000000 666 +higher 0 24 3.761200 0.000000 690 +longer 0 20 3.951244 0.000000 816 +figur 0 18 4.060443 0.000000 903 +women 0 16 4.174387 0.000000 1004 +stock 0 16 4.174387 0.000000 1007 +todd 0 15 4.248495 0.000000 1051 +anywai 0 15 4.248495 0.000000 1047 +mayb 0 15 4.248495 0.000000 1014 +save 0 14 4.317488 0.000000 1099 +edui 0 13 4.382027 0.000000 1193 +coordin 0 13 4.382027 0.000000 1182 +wait 0 13 4.382027 0.000000 1168 +wife 0 13 4.382027 0.000000 1196 +entertain 0 12 4.465908 0.000000 1286 +basketbal 0 12 4.465908 0.000000 1289 +readi 0 12 4.465908 0.000000 1242 +food 0 12 4.465908 0.000000 1285 +fill 0 11 4.553877 0.000000 1349 +market 0 11 4.553877 0.000000 1361 +keyword 0 11 4.553877 0.000000 1356 +candid 0 9 4.753590 0.000000 1606 +jump 0 9 4.753590 0.000000 1603 +simpli 0 8 4.875197 0.000000 1626 +reload 0 8 4.875197 0.000000 1682 +appar 0 7 5.010635 0.000000 1958 +iowa 0 7 5.010635 0.000000 1971 +polit 0 6 5.164786 0.000000 2115 +legal 0 6 5.164786 0.000000 2094 +troubl 0 6 5.164786 0.000000 2002 +christoph 0 5 5.347108 0.000000 2512 +czar 0 5 5.347108 0.000000 2503 +tuft 0 5 5.347108 0.000000 2575 +amus 0 5 5.347108 0.000000 2366 +sing 0 5 5.347108 0.000000 2499 +gui 0 5 5.347108 0.000000 2573 +girlfriend 0 5 5.347108 0.000000 2579 +festiv 0 4 5.568345 0.000000 2952 +turnidg 0 4 5.568345 0.000000 2829 +superhighwai 0 4 5.568345 0.000000 2943 +chees 0 4 5.568345 0.000000 3090 +rival 0 3 5.857933 0.000000 3583 +tiger 0 3 5.857933 0.000000 3897 +wealth 0 3 5.857933 0.000000 3353 +traci 0 3 5.857933 0.000000 3984 +child 0 3 5.857933 0.000000 3542 +tast 0 3 5.857933 0.000000 3666 +kick 0 3 5.857933 0.000000 3962 +alien 0 3 5.857933 0.000000 3930 +laugh 0 3 5.857933 0.000000 3659 +defeat 0 2 6.263398 0.000000 5401 +kirk 0 2 6.263398 0.000000 6175 +killer 0 2 6.263398 0.000000 6159 +tragic 0 2 6.263398 0.000000 6114 +junki 0 2 6.263398 0.000000 5457 +outer 0 2 6.263398 0.000000 4464 +gross 0 2 6.263398 0.000000 5989 +nake 0 2 6.263398 0.000000 6197 +offspr 0 2 6.263398 0.000000 5699 +bogu 0 2 6.263398 0.000000 5471 +wierd 0 2 6.263398 0.000000 6093 +luka 0 1 6.957497 0.000000 18959 +lone 0 1 6.957497 0.000000 18960 +checkbox 0 1 6.957497 0.000000 18961 +pagechristoph 0 1 6.957497 0.000000 18962 +lukasrelev 0 1 6.957497 0.000000 18963 +mspl 0 1 6.957497 0.000000 18964 +workshipi 0 1 6.957497 0.000000 18965 +quest 0 1 6.957497 0.000000 18966 +sunivers 0 1 6.957497 0.000000 18967 +prisonerthi 0 1 6.957497 0.000000 18968 +quoteserv 0 1 6.957497 0.000000 18969 +fabul 0 1 6.957497 0.000000 18970 +pagebet 0 1 6.957497 0.000000 18971 +identitycaptain 0 1 6.957497 0.000000 18972 +throughamaz 0 1 6.957497 0.000000 18973 +withtri 0 1 6.957497 0.000000 18974 +teri 0 1 6.957497 0.000000 18975 +incred 0 1 6.957497 0.000000 18976 +catthi 0 1 6.957497 0.000000 18977 +buttmunchextrem 0 1 6.957497 0.000000 18978 +dudemichael 0 1 6.957497 0.000000 18979 +nesmith 0 1 6.957497 0.000000 18980 +fanfoolmyth 0 1 6.957497 0.000000 18981 +figurewick 0 1 6.957497 0.000000 18982 +playervalu 0 1 6.957497 0.000000 18983 +studentment 0 1 6.957497 0.000000 18984 +defectivea 0 1 6.957497 0.000000 18985 +wkrp 0 1 6.957497 0.000000 18986 +cincinatti 0 1 6.957497 0.000000 18987 +figuregeek 0 1 6.957497 0.000000 18988 +tradesgonzo 0 1 6.957497 0.000000 18989 +admirernetscap 0 1 6.957497 0.000000 18990 +pornpersonifi 0 1 6.957497 0.000000 18991 +condom 0 1 6.957497 0.000000 18992 +stretch 0 1 6.957497 0.000000 18993 +blowflam 0 1 6.957497 0.000000 18994 +testicl 0 1 6.957497 0.000000 18995 +goodpoetri 0 1 6.957497 0.000000 18996 +guruhogwildthi 0 1 6.957497 0.000000 18997 +assman 0 1 6.957497 0.000000 18998 +manbig 0 1 6.957497 0.000000 18999 +dudeuh 0 1 6.957497 0.000000 19000 +ohprofession 0 1 6.957497 0.000000 19001 +muff 0 1 6.957497 0.000000 19002 +diverregress 0 1 6.957497 0.000000 19003 +lifeformherald 0 1 6.957497 0.000000 19004 +invas 0 1 6.957497 0.000000 19005 +forcechri 0 1 6.957497 0.000000 19006 +formsalienherpetophiletodd 0 1 6.957497 0.000000 19007 +hatth 0 1 6.957497 0.000000 19008 +mancreepi 0 1 6.957497 0.000000 19009 +headsmal 0 1 6.957497 0.000000 19010 +planetdr 0 1 6.957497 0.000000 19011 +companioneast 0 1 6.957497 0.000000 19012 +bunnycyberweenietcl 0 1 6.957497 0.000000 19013 +hellbeast 0 1 6.957497 0.000000 19014 +drug 0 1 6.957497 0.000000 19015 +cosmo 0 1 6.957497 0.000000 19016 +irrit 0 1 6.957497 0.000000 19017 +scatolog 0 1 6.957497 0.000000 19018 +pervert 0 1 6.957497 0.000000 19019 +etymolog 0 1 6.957497 0.000000 19020 +phat 0 1 6.957497 0.000000 19021 +gnarli 0 1 6.957497 0.000000 19022 +cybermuffin 0 1 6.957497 0.000000 19023 +erotica 0 1 6.957497 0.000000 19024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..c69035b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +look 0 107 2.197225 0.000000 115 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +educ 0 86 2.484907 0.000000 191 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +dept 0 64 2.772589 0.000000 291 +januari 0 62 2.772589 0.000000 264 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +suggest 0 53 2.944439 0.000000 331 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +compani 0 41 3.218876 0.000000 423 +china 0 37 3.332205 0.000000 487 +thank 0 23 3.806662 0.000000 721 +self 0 22 3.850148 0.000000 761 +alumni 0 21 3.912023 0.000000 807 +miller 0 17 4.110874 0.000000 949 +side 0 15 4.248495 0.000000 1022 +promot 0 12 4.465908 0.000000 1235 +surf 0 11 4.553877 0.000000 1301 +america 0 11 4.553877 0.000000 1370 +paradyn 0 9 4.753590 0.000000 1614 +charg 0 9 4.753590 0.000000 1582 +port 0 8 4.875197 0.000000 1766 +hack 0 7 5.010635 0.000000 1950 +iowa 0 7 5.010635 0.000000 1971 +onto 0 6 5.164786 0.000000 2089 +barton 0 5 5.347108 0.000000 2371 +girlfriend 0 5 5.347108 0.000000 2579 +ignor 0 5 5.347108 0.000000 2288 +ling 0 4 5.568345 0.000000 3045 +hpux 0 3 5.857933 0.000000 3780 +temporarili 0 3 5.857933 0.000000 3692 +wuhan 0 2 6.263398 0.000000 5589 +sheboygan 0 2 6.263398 0.000000 6198 +shameless 0 2 6.263398 0.000000 6146 +chinaand 0 2 6.263398 0.000000 5151 +officem 0 2 6.263398 0.000000 6092 +marcelo 0 2 6.263398 0.000000 6199 +infom 0 2 6.263398 0.000000 5425 +bother 0 2 6.263398 0.000000 6164 +zheng 0 1 6.957497 0.000000 19025 +lzheng 0 1 6.957497 0.000000 19026 +boss 0 1 6.957497 0.000000 19027 +prese 0 1 6.957497 0.000000 19028 +winsconsin 0 1 6.957497 0.000000 19029 +goncalv 0 1 6.957497 0.000000 19030 +hereif 0 1 6.957497 0.000000 19031 +sthe 0 1 6.957497 0.000000 19032 +schoolssend 0 1 6.957497 0.000000 19033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..907eaf10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +wisc 0 242 1.386294 0.000000 33 +like 0 132 1.945910 0.000000 81 +look 0 107 2.197225 0.000000 115 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +start 0 83 2.484907 0.000000 173 +know 1 80 2.564949 2.564949 198 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +plai 0 60 2.833213 0.000000 307 +advisor 0 51 2.995732 0.000000 355 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +photo 0 31 3.496508 0.000000 561 +turn 0 29 3.583519 0.000000 586 +team 0 27 3.637586 0.000000 625 +dai 0 22 3.850148 0.000000 753 +later 0 15 4.248495 0.000000 1043 +america 0 11 4.553877 0.000000 1370 +hello 0 10 4.653960 0.000000 1407 +sundai 0 10 4.653960 0.000000 1387 +said 0 9 4.753590 0.000000 1571 +round 0 8 4.875197 0.000000 1769 +monei 0 7 5.010635 0.000000 1934 +golf 0 6 5.164786 0.000000 2178 +leagu 0 4 5.568345 0.000000 3040 +passion 0 3 5.857933 0.000000 3633 +manuvir 0 1 6.957497 0.000000 19034 +pagemanuvir 0 1 6.957497 0.000000 19035 +dasnow 0 1 6.957497 0.000000 19036 +andwhat 0 1 6.957497 0.000000 19037 +feelfre 0 1 6.957497 0.000000 19038 +somethingsend 0 1 6.957497 0.000000 19039 +anact 0 1 6.957497 0.000000 19040 +manuvirwhat 0 1 6.957497 0.000000 19041 +thisto 0 1 6.957497 0.000000 19042 +theorigin 0 1 6.957497 0.000000 19043 +consin 0 1 6.957497 0.000000 19044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..94cfa63a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,395 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +like 0 132 1.945910 0.000000 81 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +confer 0 126 2.079442 0.000000 100 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +search 0 95 2.397895 0.000000 155 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +educ 0 86 2.484907 0.000000 191 +wide 0 84 2.484907 0.000000 185 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +journal 0 83 2.484907 0.000000 183 +help 0 83 2.484907 0.000000 175 +june 0 79 2.564949 0.000000 214 +interfac 0 79 2.564949 0.000000 209 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +complet 0 77 2.564949 0.000000 208 +decemb 0 80 2.564949 0.000000 215 +david 0 71 2.639057 0.000000 232 +symposium 0 72 2.639057 0.000000 238 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +effici 0 73 2.639057 0.000000 233 +solv 0 73 2.639057 0.000000 234 +august 0 66 2.708050 0.000000 257 +simul 0 66 2.708050 0.000000 255 +java 0 70 2.708050 0.000000 248 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +wednesdai 0 64 2.772589 0.000000 261 +organ 0 65 2.772589 0.000000 265 +import 0 65 2.772589 0.000000 282 +prof 0 64 2.772589 0.000000 273 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +virtual 0 62 2.772589 0.000000 285 +abstract 0 62 2.772589 0.000000 276 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +content 0 59 2.833213 0.000000 302 +march 0 61 2.833213 0.000000 295 +space 0 57 2.890372 0.000000 310 +think 0 57 2.890372 0.000000 314 +index 0 56 2.890372 0.000000 309 +sampl 0 53 2.944439 0.000000 339 +talk 0 53 2.944439 0.000000 336 +februari 0 54 2.944439 0.000000 328 +hardwar 0 51 2.995732 0.000000 350 +tabl 0 51 2.995732 0.000000 346 +much 0 52 2.995732 0.000000 349 +investig 0 51 2.995732 0.000000 353 +give 0 50 3.044522 0.000000 359 +appoint 0 49 3.044522 0.000000 358 +frequent 0 49 3.044522 0.000000 367 +california 0 46 3.091042 0.000000 388 +effect 0 46 3.091042 0.000000 385 +mark 0 44 3.135494 0.000000 403 +protocol 0 45 3.135494 0.000000 407 +execut 0 45 3.135494 0.000000 404 +directori 0 45 3.135494 0.000000 396 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +transact 0 39 3.258097 0.000000 438 +programm 0 39 3.258097 0.000000 445 +streetmadison 0 38 3.295837 0.000000 474 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +sciencesunivers 0 37 3.332205 0.000000 486 +cost 0 37 3.332205 0.000000 480 +formal 0 37 3.332205 0.000000 478 +multi 0 36 3.367296 0.000000 493 +jame 0 35 3.401197 0.000000 507 +bibliographi 0 34 3.401197 0.000000 518 +award 0 34 3.401197 0.000000 523 +toler 0 33 3.433987 0.000000 533 +john 0 33 3.433987 0.000000 532 +fault 0 32 3.465736 0.000000 547 +extend 0 32 3.465736 0.000000 539 +richard 0 31 3.496508 0.000000 559 +often 0 31 3.496508 0.000000 551 +option 0 30 3.555348 0.000000 575 +robert 0 30 3.555348 0.000000 567 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +pass 0 28 3.610918 0.000000 611 +propos 0 28 3.610918 0.000000 602 +scale 0 28 3.610918 0.000000 613 +determin 0 27 3.637586 0.000000 630 +consist 0 26 3.688879 0.000000 651 +berkelei 0 26 3.688879 0.000000 657 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +revis 0 26 3.688879 0.000000 640 +detect 0 26 3.688879 0.000000 646 +hill 0 25 3.737670 0.000000 670 +supercomput 0 25 3.737670 0.000000 681 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +higher 0 24 3.761200 0.000000 690 +proof 0 23 3.806662 0.000000 720 +size 0 23 3.806662 0.000000 713 +highli 0 23 3.806662 0.000000 725 +cooper 0 22 3.850148 0.000000 757 +sequenti 0 22 3.850148 0.000000 745 +hierarchi 0 22 3.850148 0.000000 744 +annot 0 21 3.912023 0.000000 775 +department 0 20 3.951244 0.000000 839 +fine 0 20 3.951244 0.000000 822 +smith 0 20 3.951244 0.000000 820 +scheme 0 20 3.951244 0.000000 818 +benchmark 0 19 4.007333 0.000000 859 +comparison 0 19 4.007333 0.000000 863 +miss 0 19 4.007333 0.000000 866 +definit 0 19 4.007333 0.000000 864 +wind 0 18 4.060443 0.000000 908 +partial 0 18 4.060443 0.000000 900 +less 0 18 4.060443 0.000000 892 +four 0 18 4.060443 0.000000 905 +steven 0 17 4.110874 0.000000 953 +seek 0 17 4.110874 0.000000 954 +asplo 0 17 4.110874 0.000000 948 +miller 0 17 4.110874 0.000000 949 +estim 0 17 4.110874 0.000000 930 +transfer 0 16 4.174387 0.000000 967 +young 0 16 4.174387 0.000000 991 +eduphon 0 15 4.248495 0.000000 1060 +massiv 0 15 4.248495 0.000000 1026 +hybrid 0 15 4.248495 0.000000 1057 +micro 0 15 4.248495 0.000000 1031 +coher 0 14 4.317488 0.000000 1109 +manner 0 14 4.317488 0.000000 1074 +convent 0 14 4.317488 0.000000 1072 +rank 0 14 4.317488 0.000000 1063 +sigmetr 0 13 4.382027 0.000000 1173 +suit 0 13 4.382027 0.000000 1129 +translat 0 13 4.382027 0.000000 1164 +weak 0 13 4.382027 0.000000 1159 +alan 0 13 4.382027 0.000000 1146 +employ 0 12 4.465908 0.000000 1291 +target 0 12 4.465908 0.000000 1282 +buffer 0 12 4.465908 0.000000 1211 +gupta 0 12 4.465908 0.000000 1241 +mari 0 12 4.465908 0.000000 1266 +wood 0 11 4.553877 0.000000 1355 +isca 0 11 4.553877 0.000000 1354 +rice 0 11 4.553877 0.000000 1336 +michigan 0 11 4.553877 0.000000 1368 +node 0 11 4.553877 0.000000 1326 +transpar 0 11 4.553877 0.000000 1325 +keyword 0 11 4.553877 0.000000 1356 +catalog 0 10 4.653960 0.000000 1431 +grain 0 10 4.653960 0.000000 1448 +princip 0 10 4.653960 0.000000 1397 +sosp 0 10 4.653960 0.000000 1416 +placement 0 10 4.653960 0.000000 1420 +stack 0 10 4.653960 0.000000 1389 +laru 0 9 4.753590 0.000000 1560 +tunnel 0 9 4.753590 0.000000 1615 +patterson 0 9 4.753590 0.000000 1554 +sound 0 9 4.753590 0.000000 1605 +frank 0 9 4.753590 0.000000 1568 +jeffrei 0 9 4.753590 0.000000 1612 +kong 0 9 4.753590 0.000000 1602 +vernon 0 9 4.753590 0.000000 1556 +spec 0 8 4.875197 0.000000 1640 +lewi 0 8 4.875197 0.000000 1700 +secretari 0 8 4.875197 0.000000 1775 +uniprocessor 0 8 4.875197 0.000000 1696 +quantit 0 8 4.875197 0.000000 1654 +presidenti 0 8 4.875197 0.000000 1737 +irregular 0 8 4.875197 0.000000 1768 +joel 0 8 4.875197 0.000000 1698 +unifi 0 8 4.875197 0.000000 1774 +roger 0 7 5.010635 0.000000 1892 +secondari 0 7 5.010635 0.000000 1884 +ann 0 6 5.164786 0.000000 2065 +consensu 0 6 5.164786 0.000000 2080 +unpublish 0 6 5.164786 0.000000 2226 +chandra 0 6 5.164786 0.000000 2091 +microsystem 0 6 5.164786 0.000000 2160 +mukherje 0 5 5.347108 0.000000 2586 +lebeck 0 5 5.347108 0.000000 2582 +reinhardt 0 5 5.347108 0.000000 2583 +babak 0 5 5.347108 0.000000 2584 +falsafi 0 5 5.347108 0.000000 2585 +tempest 0 5 5.347108 0.000000 2548 +engineeringat 0 5 5.347108 0.000000 2561 +advic 0 5 5.347108 0.000000 2509 +educurr 0 5 5.347108 0.000000 2504 +summarymi 0 5 5.347108 0.000000 2580 +middl 0 5 5.347108 0.000000 2372 +hypothet 0 5 5.347108 0.000000 2474 +optimist 0 5 5.347108 0.000000 2501 +chemic 0 5 5.347108 0.000000 2552 +ioanni 0 5 5.347108 0.000000 2553 +forprogram 0 5 5.347108 0.000000 2361 +toc 0 5 5.347108 0.000000 2562 +andrea 0 5 5.347108 0.000000 2375 +hennessi 0 5 5.347108 0.000000 2289 +race 0 5 5.347108 0.000000 2417 +barton 0 5 5.347108 0.000000 2371 +talluri 0 4 5.568345 0.000000 2820 +alvin 0 4 5.568345 0.000000 3084 +markhil 0 4 5.568345 0.000000 2819 +wart 0 4 5.568345 0.000000 2987 +crai 0 4 5.568345 0.000000 3012 +emphas 0 4 5.568345 0.000000 2672 +languagesand 0 4 5.568345 0.000000 3071 +align 0 4 5.568345 0.000000 2863 +manuscript 0 4 5.568345 0.000000 2750 +sabbat 0 4 5.568345 0.000000 2824 +sharma 0 4 5.568345 0.000000 2752 +ppopp 0 4 5.568345 0.000000 2774 +schoina 0 4 5.568345 0.000000 3085 +medium 0 4 5.568345 0.000000 2834 +implic 0 4 5.568345 0.000000 2696 +satish 0 4 5.568345 0.000000 2833 +anoop 0 4 5.568345 0.000000 2770 +ratio 0 4 5.568345 0.000000 2942 +shubhendu 0 3 5.857933 0.000000 4028 +madhusudhan 0 3 5.857933 0.000000 4021 +superpag 0 3 5.857933 0.000000 3978 +megabyt 0 3 5.857933 0.000000 3732 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +programmingc 0 3 5.857933 0.000000 3232 +saltz 0 3 5.857933 0.000000 3385 +surpass 0 3 5.857933 0.000000 3247 +tradeoff 0 3 5.857933 0.000000 3387 +adv 0 2 6.263398 0.000000 4540 +andelectr 0 2 6.263398 0.000000 6200 +wisconsint 0 2 6.263398 0.000000 6155 +teachingfal 0 2 6.263398 0.000000 5532 +ifal 0 2 6.263398 0.000000 4776 +architecturec 0 2 6.263398 0.000000 6127 +sustain 0 2 6.263398 0.000000 6201 +mywork 0 2 6.263398 0.000000 5800 +projectwith 0 2 6.263398 0.000000 4986 +uniformli 0 2 6.263398 0.000000 6202 +todevelop 0 2 6.263398 0.000000 5448 +aredevelop 0 2 6.263398 0.000000 4930 +similarto 0 2 6.263398 0.000000 6074 +aeronaut 0 2 6.263398 0.000000 5958 +anddavid 0 2 6.263398 0.000000 6126 +sashikanth 0 2 6.263398 0.000000 6122 +chandrasekaran 0 2 6.263398 0.000000 6121 +shamik 0 2 6.263398 0.000000 6195 +memorymultiprocessor 0 2 6.263398 0.000000 4529 +dionisio 0 2 6.263398 0.000000 6203 +pnevmatikato 0 2 6.263398 0.000000 6204 +subbarao 0 2 6.263398 0.000000 6205 +shing 0 2 6.263398 0.000000 5146 +sarita 0 1 6.957497 0.000000 19045 +kessler 0 1 6.957497 0.000000 19046 +subblock 0 1 6.957497 0.000000 19047 +sampler 0 1 6.957497 0.000000 19048 +madhu 0 1 6.957497 0.000000 19049 +tlb 0 1 6.957497 0.000000 19050 +pagemark 0 1 6.957497 0.000000 19051 +andsummari 0 1 6.957497 0.000000 19052 +graduateslink 0 1 6.957497 0.000000 19053 +oralpresent 0 1 6.957497 0.000000 19054 +forcach 0 1 6.957497 0.000000 19055 +usamarkhil 0 1 6.957497 0.000000 19056 +icatalog 0 1 6.957497 0.000000 19057 +teachc 0 1 6.957497 0.000000 19058 +iieduc 0 1 6.957497 0.000000 19059 +evaluationresearch 0 1 6.957497 0.000000 19060 +multiprocessorsand 0 1 6.957497 0.000000 19061 +evaluationtechniqu 0 1 6.957497 0.000000 19062 +windtunnel 0 1 6.957497 0.000000 19063 +manystud 0 1 6.957497 0.000000 19064 +computerswil 0 1 6.957497 0.000000 19065 +levelparallel 0 1 6.957497 0.000000 19066 +inwhich 0 1 6.957497 0.000000 19067 +recentlypropos 0 1 6.957497 0.000000 19068 +aclust 0 1 6.957497 0.000000 19069 +toolsto 0 1 6.957497 0.000000 19070 +cull 0 1 6.957497 0.000000 19071 +designairplan 0 1 6.957497 0.000000 19072 +talluritarget 0 1 6.957497 0.000000 19073 +lookasid 0 1 6.957497 0.000000 19074 +superpagesand 0 1 6.957497 0.000000 19075 +asplosandsosppap 0 1 6.957497 0.000000 19076 +papersth 0 1 6.957497 0.000000 19077 +bidirect 0 1 6.957497 0.000000 19078 +pad 0 1 6.957497 0.000000 19079 +yousef 0 1 6.957497 0.000000 19080 +khalidi 0 1 6.957497 0.000000 19081 +microstructur 0 1 6.957497 0.000000 19082 +electrostat 0 1 6.957497 0.000000 19083 +traenkl 0 1 6.957497 0.000000 19084 +sangta 0 1 6.957497 0.000000 19085 +tpd 0 1 6.957497 0.000000 19086 +farid 0 1 6.957497 0.000000 19087 +pour 0 1 6.957497 0.000000 19088 +palacharla 0 1 6.957497 0.000000 19089 +kourosh 0 1 6.957497 0.000000 19090 +gharachorloo 0 1 6.957497 0.000000 19091 +netzer 0 1 6.957497 0.000000 19092 +vikram 0 1 6.957497 0.000000 19093 +kessleracm 0 1 6.957497 0.000000 19094 +graduatesmadhusudhan 0 1 6.957497 0.000000 19095 +updatedw 0 1 6.957497 0.000000 19096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..9b6c9d49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +usaphon 0 9 4.753590 0.000000 1600 +assistantdepart 0 8 4.875197 0.000000 1784 +zaharioudaki 0 2 6.263398 0.000000 6119 +marko 1 1 6.957497 6.957497 19097 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..140ba41f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +section 0 94 2.397895 0.000000 149 +michael 0 35 3.401197 0.000000 514 +birk 0 4 5.568345 0.000000 2791 +mbirk 0 3 5.857933 0.000000 3501 +alltraxx 0 1 6.957497 0.000000 19098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..5293deba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +object 0 138 1.945910 0.000000 79 +hall 0 146 1.945910 0.000000 65 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +manag 0 114 2.197225 0.000000 125 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +ieee 0 86 2.484907 0.000000 190 +orient 0 80 2.564949 0.000000 205 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +simul 0 66 2.708050 0.000000 255 +march 0 61 2.833213 0.000000 295 +space 0 57 2.890372 0.000000 310 +pointer 0 48 3.044522 0.000000 368 +mark 0 44 3.135494 0.000000 403 +michael 0 35 3.401197 0.000000 514 +proc 0 26 3.688879 0.000000 649 +sigmod 0 19 4.007333 0.000000 877 +white 0 17 4.110874 0.000000 951 +dewitt 0 12 4.465908 0.000000 1270 +persist 0 11 4.553877 0.000000 1367 +franklin 0 10 4.653960 0.000000 1436 +naughton 0 10 4.653960 0.000000 1450 +solomon 0 8 4.875197 0.000000 1716 +carei 0 8 4.875197 0.000000 1781 +tsatalo 0 5 5.347108 0.000000 2581 +mcauliff 1 4 5.568345 5.568345 3083 +marvin 0 4 5.568345 0.000000 2806 +zwill 0 4 5.568345 0.000000 3076 +schuh 0 3 5.857933 0.000000 4014 +swizzl 0 3 5.857933 0.000000 3883 +shoringup 0 1 6.957497 0.000000 19099 +atrac 0 1 6.957497 0.000000 19100 +towardseffect 0 1 6.957497 0.000000 19101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..98f78634 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +peopl 0 96 2.302585 0.000000 132 +school 0 84 2.484907 0.000000 188 +level 0 87 2.484907 0.000000 180 +appear 0 78 2.564949 0.000000 210 +know 0 80 2.564949 0.000000 198 +html 0 75 2.639057 0.000000 235 +symposium 0 72 2.639057 0.000000 238 +meet 0 72 2.639057 0.000000 229 +dept 0 64 2.772589 0.000000 291 +automat 0 61 2.833213 0.000000 306 +think 0 57 2.890372 0.000000 314 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +pointer 0 48 3.044522 0.000000 368 +cool 0 49 3.044522 0.000000 374 +principl 0 48 3.044522 0.000000 357 +term 0 43 3.178054 0.000000 411 +fast 0 42 3.218876 0.000000 429 +movi 0 40 3.258097 0.000000 459 +littl 0 39 3.258097 0.000000 454 +submit 0 39 3.258097 0.000000 440 +realli 0 40 3.258097 0.000000 444 +paul 0 38 3.295837 0.000000 471 +abl 0 30 3.555348 0.000000 566 +hope 0 28 3.610918 0.000000 610 +never 0 25 3.737670 0.000000 671 +accur 0 25 3.737670 0.000000 680 +flow 0 24 3.761200 0.000000 700 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +hous 0 21 3.912023 0.000000 801 +watch 0 21 3.912023 0.000000 789 +wrote 0 20 3.951244 0.000000 830 +mostli 0 19 4.007333 0.000000 869 +miss 0 19 4.007333 0.000000 866 +lot 0 18 4.060443 0.000000 889 +previous 0 17 4.110874 0.000000 923 +brown 0 16 4.174387 0.000000 977 +todd 0 15 4.248495 0.000000 1051 +doesn 0 15 4.248495 0.000000 1055 +susan 0 15 4.248495 0.000000 1050 +believ 0 13 4.382027 0.000000 1187 +recurs 0 13 4.382027 0.000000 1127 +step 0 13 4.382027 0.000000 1138 +emac 0 13 4.382027 0.000000 1143 +menu 0 13 4.382027 0.000000 1156 +jonathan 0 13 4.382027 0.000000 1174 +submiss 0 11 4.553877 0.000000 1298 +mode 0 9 4.753590 0.000000 1492 +marc 1 8 4.875197 4.875197 1680 +shapiro 0 8 4.875197 0.000000 1686 +analys 0 8 4.875197 0.000000 1666 +pldi 0 8 4.875197 0.000000 1704 +chan 0 7 5.010635 0.000000 1876 +elementari 0 7 5.010635 0.000000 1825 +interrupt 0 7 5.010635 0.000000 1793 +tag 0 7 5.010635 0.000000 1821 +lawrenc 0 7 5.010635 0.000000 1908 +recov 0 6 5.164786 0.000000 2235 +goldstein 0 6 5.164786 0.000000 2168 +elain 0 5 5.347108 0.000000 2496 +hyper 0 5 5.347108 0.000000 2435 +horwitz 0 5 5.347108 0.000000 2411 +fear 0 4 5.568345 0.000000 2911 +backward 0 4 5.568345 0.000000 2638 +popl 0 4 5.568345 0.000000 3068 +insensit 0 4 5.568345 0.000000 2716 +hoar 0 3 5.857933 0.000000 3875 +obsess 0 2 6.263398 0.000000 5924 +disappear 0 2 6.263398 0.000000 4748 +accid 0 2 6.263398 0.000000 5961 +softwarei 0 2 6.263398 0.000000 4960 +tautolog 0 1 6.957497 0.000000 19102 +fond 0 1 6.957497 0.000000 19103 +repuls 0 1 6.957497 0.000000 19104 +ponder 0 1 6.957497 0.000000 19105 +jacki 0 1 6.957497 0.000000 19106 +dimasi 0 1 6.957497 0.000000 19107 +twisti 0 1 6.957497 0.000000 19108 +amanda 0 1 6.957497 0.000000 19109 +peet 0 1 6.957497 0.000000 19110 +retreather 0 1 6.957497 0.000000 19111 +thepul 0 1 6.957497 0.000000 19112 +cobbl 0 1 6.957497 0.000000 19113 +nowinclud 0 1 6.957497 0.000000 19114 +shapiroand 0 1 6.957497 0.000000 19115 +marion 0 1 6.957497 0.000000 19116 +ferguson 0 1 6.957497 0.000000 19117 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..1a119493 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +modifi 0 178 1.609438 0.000000 35 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +introduct 0 126 2.079442 0.000000 87 +build 0 85 2.484907 0.000000 184 +august 0 66 2.708050 0.000000 257 +appoint 0 49 3.044522 0.000000 358 +mellen 0 2 6.263398 0.000000 4708 +mellencamp 0 2 6.263398 0.000000 4707 +pagerob 0 1 6.957497 0.000000 19118 +minimalist 0 1 6.957497 0.000000 19119 +taship 0 1 6.957497 0.000000 19120 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..3454894c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +second 0 81 2.484907 0.000000 166 +want 0 79 2.564949 0.000000 199 +david 0 71 2.639057 0.000000 232 +plan 0 65 2.772589 0.000000 272 +back 0 60 2.833213 0.000000 297 +semest 0 58 2.890372 0.000000 312 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +even 0 45 3.135494 0.000000 393 +map 0 39 3.258097 0.000000 452 +often 0 31 3.496508 0.000000 551 +great 0 27 3.637586 0.000000 626 +determin 0 27 3.637586 0.000000 630 +spent 0 25 3.737670 0.000000 676 +eric 0 19 4.007333 0.000000 870 +miss 0 19 4.007333 0.000000 866 +coupl 0 17 4.110874 0.000000 939 +brother 0 13 4.382027 0.000000 1189 +sister 0 9 4.753590 0.000000 1524 +undergrad 0 9 4.753590 0.000000 1589 +soccer 0 8 4.875197 0.000000 1752 +chanc 0 7 5.010635 0.000000 1960 +chess 0 5 5.347108 0.000000 2486 +rewrit 0 5 5.347108 0.000000 2367 +rep 0 4 5.568345 0.000000 3087 +somedai 0 3 5.857933 0.000000 3919 +michel 0 3 5.857933 0.000000 3791 +distract 0 3 5.857933 0.000000 3945 +melski 1 2 6.263398 6.263398 4780 +pagedavid 0 2 6.263398 0.000000 5114 +mill 0 2 6.263398 0.000000 6193 +awesom 0 2 6.263398 0.000000 6167 +russia 0 2 6.263398 0.000000 5756 +hasti 0 2 6.263398 0.000000 6173 +steal 0 2 6.263398 0.000000 5485 +russian 0 1 6.957497 0.000000 19121 +melskicurr 0 1 6.957497 0.000000 19122 +statisticsmadison 0 1 6.957497 0.000000 19123 +permen 0 1 6.957497 0.000000 19124 +ivesmarshfield 0 1 6.957497 0.000000 19125 +kasei 0 1 6.957497 0.000000 19126 +myexact 0 1 6.957497 0.000000 19127 +studiesher 0 1 6.957497 0.000000 19128 +semesterof 0 1 6.957497 0.000000 19129 +beenbik 0 1 6.957497 0.000000 19130 +numerousbook 0 1 6.957497 0.000000 19131 +tomapquest 0 1 6.957497 0.000000 19132 +alot 0 1 6.957497 0.000000 19133 +marshfield 0 1 6.957497 0.000000 19134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..4d8b707b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +mathemat 0 108 2.197225 0.000000 123 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +techniqu 0 99 2.302585 0.000000 138 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +present 0 91 2.397895 0.000000 145 +imag 0 91 2.397895 0.000000 161 +sinc 0 90 2.397895 0.000000 159 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +ieee 0 86 2.484907 0.000000 190 +school 0 84 2.484907 0.000000 188 +resourc 0 81 2.484907 0.000000 172 +educ 0 86 2.484907 0.000000 191 +build 0 85 2.484907 0.000000 184 +optim 0 79 2.564949 0.000000 197 +know 0 80 2.564949 0.000000 198 +dynam 0 76 2.564949 0.000000 194 +tuesdai 0 73 2.639057 0.000000 219 +addit 0 74 2.639057 0.000000 228 +nation 0 74 2.639057 0.000000 240 +symposium 0 72 2.639057 0.000000 238 +java 0 70 2.708050 0.000000 248 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +receiv 0 66 2.708050 0.000000 244 +evalu 0 64 2.772589 0.000000 266 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +best 0 59 2.833213 0.000000 299 +simpl 0 60 2.833213 0.000000 298 +direct 0 57 2.890372 0.000000 316 +explor 0 58 2.890372 0.000000 324 +summer 0 56 2.890372 0.000000 311 +thesi 0 57 2.890372 0.000000 327 +space 0 57 2.890372 0.000000 310 +found 0 53 2.944439 0.000000 337 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +profession 0 51 2.995732 0.000000 345 +standard 0 48 3.044522 0.000000 365 +right 0 48 3.044522 0.000000 363 +mark 0 44 3.135494 0.000000 403 +algebra 0 45 3.135494 0.000000 394 +even 0 45 3.135494 0.000000 393 +show 0 43 3.178054 0.000000 417 +fast 0 42 3.218876 0.000000 429 +compani 0 41 3.218876 0.000000 423 +combin 0 42 3.218876 0.000000 421 +live 0 40 3.258097 0.000000 451 +streetmadison 0 38 3.295837 0.000000 474 +open 0 38 3.295837 0.000000 469 +game 0 36 3.367296 0.000000 498 +multi 0 36 3.367296 0.000000 493 +next 0 34 3.401197 0.000000 517 +jame 0 35 3.401197 0.000000 507 +eduoffic 0 33 3.433987 0.000000 531 +quot 0 29 3.583519 0.000000 582 +team 0 27 3.637586 0.000000 625 +detect 0 26 3.688879 0.000000 646 +rule 0 26 3.688879 0.000000 638 +challeng 0 26 3.688879 0.000000 653 +hill 0 25 3.737670 0.000000 670 +departmentunivers 0 24 3.761200 0.000000 711 +mobil 0 23 3.806662 0.000000 730 +honor 0 23 3.806662 0.000000 729 +head 0 23 3.806662 0.000000 732 +serv 0 22 3.850148 0.000000 758 +martin 1 21 3.912023 3.912023 794 +programminglanguag 0 21 3.912023 0.000000 782 +divis 0 21 3.912023 0.000000 803 +wrote 0 20 3.951244 0.000000 830 +wonder 0 20 3.951244 0.000000 815 +element 0 18 4.060443 0.000000 895 +ultim 0 17 4.110874 0.000000 943 +medic 0 17 4.110874 0.000000 958 +senior 0 14 4.317488 0.000000 1120 +train 0 14 4.317488 0.000000 1066 +charl 0 13 4.382027 0.000000 1149 +everyon 0 13 4.382027 0.000000 1148 +land 0 12 4.465908 0.000000 1273 +promot 0 12 4.465908 0.000000 1235 +basketbal 0 12 4.465908 0.000000 1289 +player 0 11 4.553877 0.000000 1371 +transmiss 0 9 4.753590 0.000000 1588 +discov 0 9 4.753590 0.000000 1562 +babylon 0 8 4.875197 0.000000 1731 +footbal 0 7 5.010635 0.000000 1912 +fischer 0 7 5.010635 0.000000 1893 +interestsi 0 7 5.010635 0.000000 1969 +paramet 0 7 5.010635 0.000000 1796 +dedic 0 7 5.010635 0.000000 1843 +ethic 0 7 5.010635 0.000000 1786 +trade 0 7 5.010635 0.000000 1815 +advis 0 6 5.164786 0.000000 2173 +reconstruct 0 6 5.164786 0.000000 2170 +determinist 0 6 5.164786 0.000000 2034 +pace 0 6 5.164786 0.000000 2011 +minnesota 0 5 5.347108 0.000000 2469 +argonn 0 5 5.347108 0.000000 2461 +nuclear 0 5 5.347108 0.000000 2576 +frisbe 0 5 5.347108 0.000000 2560 +publicationsresearch 0 4 5.568345 0.000000 2876 +chees 0 4 5.568345 0.000000 3090 +fink 0 3 5.857933 0.000000 3425 +assistantcomput 0 3 5.857933 0.000000 4027 +usaemail 0 3 5.857933 0.000000 3722 +sit 0 3 5.857933 0.000000 3953 +informationtechnolog 0 3 5.857933 0.000000 3836 +interchang 0 3 5.857933 0.000000 3893 +myfavorit 0 3 5.857933 0.000000 3852 +armi 0 3 5.857933 0.000000 3562 +milo 1 2 6.263398 6.263398 4781 +spectroscopi 0 2 6.263398 0.000000 6206 +meth 0 2 6.263398 0.000000 5872 +eventhough 0 2 6.263398 0.000000 6158 +conquer 0 2 6.263398 0.000000 5112 +combat 0 2 6.263398 0.000000 5473 +fight 0 2 6.263398 0.000000 5209 +monster 0 2 6.263398 0.000000 6207 +gustavu 0 1 6.957497 0.000000 19135 +adolphu 0 1 6.957497 0.000000 19136 +atlanti 0 1 6.957497 0.000000 19137 +humm 0 1 6.957497 0.000000 19138 +micklich 0 1 6.957497 0.000000 19139 +illicitsubst 0 1 6.957497 0.000000 19140 +neutron 0 1 6.957497 0.000000 19141 +hailperin 0 1 6.957497 0.000000 19142 +pagemilo 0 1 6.957497 0.000000 19143 +byappointmentba 0 1 6.957497 0.000000 19144 +larusteach 0 1 6.957497 0.000000 19145 +beinfluenc 0 1 6.957497 0.000000 19146 +yule 0 1 6.957497 0.000000 19147 +sagalovski 0 1 6.957497 0.000000 19148 +nucl 0 1 6.957497 0.000000 19149 +inst 0 1 6.957497 0.000000 19150 +languageflex 0 1 6.957497 0.000000 19151 +anintern 0 1 6.957497 0.000000 19152 +toadvanc 0 1 6.957497 0.000000 19153 +fosteringth 0 1 6.957497 0.000000 19154 +highestprofession 0 1 6.957497 0.000000 19155 +bignfl 0 1 6.957497 0.000000 19156 +vike 0 1 6.957497 0.000000 19157 +colon 0 1 6.957497 0.000000 19158 +imho 0 1 6.957497 0.000000 19159 +mythic 0 1 6.957497 0.000000 19160 +engaug 0 1 6.957497 0.000000 19161 +wizard 0 1 6.957497 0.000000 19162 +underworld 0 1 6.957497 0.000000 19163 +ofsocc 0 1 6.957497 0.000000 19164 +afrisbe 0 1 6.957497 0.000000 19165 +quarterback 0 1 6.957497 0.000000 19166 +ultimatein 0 1 6.957497 0.000000 19167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..2b8a5e97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +algorithm 0 162 1.791759 0.000000 57 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +report 0 131 2.079442 0.000000 92 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +technic 0 100 2.302585 0.000000 140 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +resourc 0 81 2.484907 0.000000 172 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +optim 0 79 2.564949 0.000000 197 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +server 0 76 2.564949 0.000000 204 +free 0 73 2.639057 0.000000 224 +logic 0 71 2.639057 0.000000 230 +multimedia 0 68 2.708050 0.000000 258 +view 0 70 2.708050 0.000000 254 +dept 0 64 2.772589 0.000000 291 +complex 0 64 2.772589 0.000000 269 +abstract 0 62 2.772589 0.000000 276 +juli 0 60 2.833213 0.000000 305 +februari 0 54 2.944439 0.000000 328 +advisor 0 51 2.995732 0.000000 355 +pointer 0 48 3.044522 0.000000 368 +effect 0 46 3.091042 0.000000 385 +continu 0 39 3.258097 0.000000 448 +submit 0 39 3.258097 0.000000 440 +societi 0 40 3.258097 0.000000 456 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +feel 0 37 3.332205 0.000000 483 +multi 0 36 3.367296 0.000000 493 +survei 0 35 3.401197 0.000000 513 +michael 0 35 3.401197 0.000000 514 +queri 0 33 3.433987 0.000000 524 +enhanc 0 26 3.688879 0.000000 644 +sequenti 0 22 3.850148 0.000000 745 +sigmod 0 19 4.007333 0.000000 877 +media 0 19 4.007333 0.000000 861 +dimension 0 18 4.060443 0.000000 909 +stat 0 17 4.110874 0.000000 924 +canada 0 13 4.382027 0.000000 1158 +dbm 0 13 4.382027 0.000000 1136 +probabilist 0 11 4.553877 0.000000 1343 +vldb 0 10 4.653960 0.000000 1470 +candid 0 9 4.753590 0.000000 1606 +yanni 0 8 4.875197 0.000000 1713 +ioannidi 0 8 4.875197 0.000000 1714 +watson 0 8 4.875197 0.000000 1691 +refere 0 7 5.010635 0.000000 1895 +montreal 0 7 5.010635 0.000000 1961 +usaoffic 0 6 5.164786 0.000000 2159 +silberschatz 0 6 5.164786 0.000000 1978 +peek 0 6 5.164786 0.000000 2169 +almaden 0 5 5.347108 0.000000 2511 +informat 0 3 5.857933 0.000000 3839 +mino 0 2 6.263398 0.000000 6208 +garofalaki 0 2 6.263398 0.000000 6209 +patra 0 2 6.263398 0.000000 5537 +ozden 0 2 6.263398 0.000000 5749 +reasearch 0 2 6.263398 0.000000 5538 +hellen 0 2 6.263398 0.000000 6210 +garofalakismino 0 1 6.957497 0.000000 19168 +eduphd 0 1 6.957497 0.000000 19169 +workresearch 0 1 6.957497 0.000000 19170 +theoryeduc 0 1 6.957497 0.000000 19171 +banu 0 1 6.957497 0.000000 19172 +ioannidismor 0 1 6.957497 0.000000 19173 +centerdr 0 1 6.957497 0.000000 19174 +bibliograpi 0 1 6.957497 0.000000 19175 +perpetu 0 1 6.957497 0.000000 19176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..edfead46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +dayton 0 119 2.079442 0.000000 104 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +sciencesunivers 0 37 3.332205 0.000000 486 +paradyn 0 9 4.753590 0.000000 1614 +marcelo 0 2 6.263398 0.000000 6199 +sheboygan 0 2 6.263398 0.000000 6198 +gonalv 0 1 6.957497 0.000000 19177 +mjrg 0 1 6.957497 0.000000 19178 +addresswork 0 1 6.957497 0.000000 19179 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..8ddf78a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +imag 0 91 2.397895 0.000000 161 +west 0 83 2.484907 0.000000 192 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +brian 1 38 3.295837 3.295837 466 +streetmadison 0 38 3.295837 0.000000 474 +compress 0 23 3.806662 0.000000 719 +chuck 0 14 4.317488 0.000000 1108 +bandwidth 0 11 4.553877 0.000000 1365 +morgan 1 9 4.753590 4.753590 1484 +conferenc 1 7 5.010635 5.010635 1857 +studentcomput 0 7 5.010635 0.000000 1963 +morgangradu 0 1 6.957497 0.000000 19180 +dyerresearch 0 1 6.957497 0.000000 19181 +interestsvirtu 0 1 6.957497 0.000000 19182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..03b32ccf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +postscript 0 131 2.079442 0.000000 90 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +level 0 87 2.484907 0.000000 180 +chang 0 82 2.484907 0.000000 163 +want 0 79 2.564949 0.000000 199 +write 0 72 2.639057 0.000000 222 +meet 0 72 2.639057 0.000000 229 +nation 0 74 2.639057 0.000000 240 +free 0 73 2.639057 0.000000 224 +degre 0 69 2.708050 0.000000 259 +descript 0 64 2.772589 0.000000 271 +visit 0 63 2.772589 0.000000 288 +copi 0 63 2.772589 0.000000 284 +processor 0 54 2.944439 0.000000 335 +sampl 0 53 2.944439 0.000000 339 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +advisor 0 51 2.995732 0.000000 355 +numer 0 49 3.044522 0.000000 369 +futur 0 41 3.218876 0.000000 427 +york 0 41 3.218876 0.000000 435 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +slide 0 38 3.295837 0.000000 467 +sciencesunivers 0 37 3.332205 0.000000 486 +download 0 36 3.367296 0.000000 489 +short 0 36 3.367296 0.000000 499 +obtain 0 33 3.433987 0.000000 534 +depend 0 29 3.583519 0.000000 583 +load 0 28 3.610918 0.000000 601 +bookmark 0 26 3.688879 0.000000 639 +compress 0 23 3.806662 0.000000 719 +instal 0 22 3.850148 0.000000 754 +leav 0 21 3.912023 0.000000 772 +vlsi 0 21 3.912023 0.000000 795 +sure 0 20 3.951244 0.000000 813 +excel 0 19 4.007333 0.000000 868 +transfer 0 16 4.174387 0.000000 967 +balanc 0 14 4.317488 0.000000 1112 +brother 0 13 4.382027 0.000000 1189 +wife 0 13 4.382027 0.000000 1196 +resid 0 10 4.653960 0.000000 1461 +poetri 0 9 4.753590 0.000000 1596 +herefor 0 9 4.753590 0.000000 1483 +multiscalar 0 8 4.875197 0.000000 1783 +dictionari 0 8 4.875197 0.000000 1642 +earn 0 7 5.010635 0.000000 1788 +pipelin 0 7 5.010635 0.000000 1830 +greec 0 6 5.164786 0.000000 2208 +peek 0 6 5.164786 0.000000 2169 +andrea 0 5 5.347108 0.000000 2375 +guri 0 5 5.347108 0.000000 2578 +hyper 0 5 5.347108 0.000000 2435 +kestrel 0 4 5.568345 0.000000 2990 +decoupl 0 4 5.568345 0.000000 2898 +mess 0 4 5.568345 0.000000 2886 +specul 0 3 5.857933 0.000000 3951 +crete 0 3 5.857933 0.000000 3773 +greek 0 3 5.857933 0.000000 3595 +uncompress 0 3 5.857933 0.000000 3177 +moshovo 0 2 6.263398 0.000000 6211 +madisonadvisor 0 2 6.263398 0.000000 6212 +instituteof 0 2 6.263398 0.000000 5507 +architecturethat 0 2 6.263398 0.000000 5876 +hellen 0 2 6.263398 0.000000 6210 +font 0 2 6.263398 0.000000 5845 +moshovosresearch 0 1 6.957497 0.000000 19183 +sohigroup 0 1 6.957497 0.000000 19184 +notese 0 1 6.957497 0.000000 19185 +aroundw 0 1 6.957497 0.000000 19186 +clickheremi 0 1 6.957497 0.000000 19187 +explot 0 1 6.957497 0.000000 19188 +thecour 0 1 6.957497 0.000000 19189 +theopportun 0 1 6.957497 0.000000 19190 +kateveni 0 1 6.957497 0.000000 19191 +viha 0 1 6.957497 0.000000 19192 +resouc 0 1 6.957497 0.000000 19193 +atwww 0 1 6.957497 0.000000 19194 +devil 0 1 6.957497 0.000000 19195 +fraud 0 1 6.957497 0.000000 19196 +centerusenet 0 1 6.957497 0.000000 19197 +afax 0 1 6.957497 0.000000 19198 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..af189065 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +octob 0 89 2.397895 0.000000 156 +chang 0 82 2.484907 0.000000 163 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +main 0 67 2.708050 0.000000 256 +back 0 60 2.833213 0.000000 297 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +index 0 56 2.890372 0.000000 309 +friend 0 48 3.044522 0.000000 376 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +background 0 25 3.737670 0.000000 664 +navig 0 21 3.912023 0.000000 796 +brief 0 16 4.174387 0.000000 1001 +minor 0 12 4.465908 0.000000 1237 +black 0 10 4.653960 0.000000 1418 +prefer 0 9 4.753590 0.000000 1491 +contrast 0 8 4.875197 0.000000 1637 +older 0 5 5.347108 0.000000 2387 +toni 0 3 5.857933 0.000000 3415 +herear 0 2 6.263398 0.000000 5947 +pagewhat 0 1 6.957497 0.000000 19199 +newoctob 0 1 6.957497 0.000000 19200 +inmadison 0 1 6.957497 0.000000 19201 +informationlast 0 1 6.957497 0.000000 19202 +educopyright 0 1 6.957497 0.000000 19203 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..03e78b93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +thing 0 84 2.484907 0.000000 189 +educ 0 86 2.484907 0.000000 191 +learn 0 86 2.484907 0.000000 170 +stuff 0 87 2.484907 0.000000 171 +want 0 79 2.564949 0.000000 199 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +logic 0 71 2.639057 0.000000 230 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +line 0 75 2.639057 0.000000 231 +dept 0 64 2.772589 0.000000 291 +previou 0 62 2.772589 0.000000 290 +plai 0 60 2.833213 0.000000 307 +reason 0 57 2.890372 0.000000 318 +summer 0 56 2.890372 0.000000 311 +talk 0 53 2.944439 0.000000 336 +februari 0 54 2.944439 0.000000 328 +finger 0 52 2.995732 0.000000 354 +particular 0 51 2.995732 0.000000 352 +digit 0 52 2.995732 0.000000 348 +telephon 0 50 3.044522 0.000000 373 +life 0 50 3.044522 0.000000 375 +even 0 45 3.135494 0.000000 393 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +around 0 43 3.178054 0.000000 415 +might 0 41 3.218876 0.000000 426 +examin 0 42 3.218876 0.000000 424 +realli 0 40 3.258097 0.000000 444 +probabl 0 40 3.258097 0.000000 455 +feel 0 37 3.332205 0.000000 483 +product 0 33 3.433987 0.000000 527 +often 0 31 3.496508 0.000000 551 +usual 0 28 3.610918 0.000000 608 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +enjoi 0 26 3.688879 0.000000 660 +notic 0 25 3.737670 0.000000 675 +departmentunivers 0 24 3.761200 0.000000 711 +sometim 0 24 3.761200 0.000000 696 +alwai 0 24 3.761200 0.000000 691 +wish 0 24 3.761200 0.000000 692 +head 0 23 3.806662 0.000000 732 +hierarchi 0 22 3.850148 0.000000 744 +martin 0 21 3.912023 0.000000 794 +exploit 0 20 3.951244 0.000000 836 +exercis 0 19 4.007333 0.000000 842 +concentr 0 18 4.060443 0.000000 906 +ultim 0 17 4.110874 0.000000 943 +doesn 0 15 4.248495 0.000000 1055 +qual 0 15 4.248495 0.000000 1062 +senior 0 14 4.317488 0.000000 1120 +role 0 14 4.317488 0.000000 1101 +affili 0 13 4.382027 0.000000 1194 +basketbal 0 12 4.465908 0.000000 1289 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +mention 0 9 4.753590 0.000000 1569 +qualifi 0 8 4.875197 0.000000 1721 +heart 0 8 4.875197 0.000000 1729 +besid 0 8 4.875197 0.000000 1681 +round 0 8 4.875197 0.000000 1769 +relax 0 6 5.164786 0.000000 2120 +squash 0 6 5.164786 0.000000 2223 +adjust 0 5 5.347108 0.000000 2422 +crucial 0 5 5.347108 0.000000 2384 +frisbe 0 5 5.347108 0.000000 2560 +gone 0 4 5.568345 0.000000 3072 +afraid 0 4 5.568345 0.000000 3053 +poorli 0 4 5.568345 0.000000 2781 +wesleyan 0 3 5.857933 0.000000 3988 +coke 1 2 6.263398 6.263398 5935 +ream 0 2 6.263398 0.000000 4783 +mream 0 2 6.263398 0.000000 4784 +terrain 0 2 6.263398 0.000000 6174 +logicprogram 0 2 6.263398 0.000000 4262 +interestsin 0 2 6.263398 0.000000 6213 +unif 0 2 6.263398 0.000000 5910 +mighti 0 2 6.263398 0.000000 4863 +tomi 0 2 6.263398 0.000000 5846 +mental 0 2 6.263398 0.000000 5802 +yeargradu 0 2 6.263398 0.000000 6015 +poobah 0 1 6.957497 0.000000 19204 +edufal 0 1 6.957497 0.000000 19205 +scheduleresearch 0 1 6.957497 0.000000 19206 +tin 0 1 6.957497 0.000000 19207 +orientedenviron 0 1 6.957497 0.000000 19208 +postscriptand 0 1 6.957497 0.000000 19209 +faint 0 1 6.957497 0.000000 19210 +alink 0 1 6.957497 0.000000 19211 +honorsthesi 0 1 6.957497 0.000000 19212 +poobahlook 0 1 6.957497 0.000000 19213 +dear 0 1 6.957497 0.000000 19214 +tosomeon 0 1 6.957497 0.000000 19215 +youshould 0 1 6.957497 0.000000 19216 +elton 0 1 6.957497 0.000000 19217 +imaginethat 0 1 6.957497 0.000000 19218 +aforement 0 1 6.957497 0.000000 19219 +poobahship 0 1 6.957497 0.000000 19220 +ill 0 1 6.957497 0.000000 19221 +afew 0 1 6.957497 0.000000 19222 +indatabas 0 1 6.957497 0.000000 19223 +inearli 0 1 6.957497 0.000000 19224 +andinfrequ 0 1 6.957497 0.000000 19225 +rapidlyrid 0 1 6.957497 0.000000 19226 +chilliest 0 1 6.957497 0.000000 19227 +helmet 0 1 6.957497 0.000000 19228 +mynot 0 1 6.957497 0.000000 19229 +ilik 0 1 6.957497 0.000000 19230 +librarylast 0 1 6.957497 0.000000 19231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..f2c0a1d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +avail 0 169 1.791759 0.000000 48 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +wide 0 84 2.484907 0.000000 185 +sourc 0 77 2.564949 0.000000 201 +talk 0 53 2.944439 0.000000 336 +local 0 55 2.944439 0.000000 334 +februari 0 54 2.944439 0.000000 328 +given 0 32 3.465736 0.000000 538 +fund 0 21 3.912023 0.000000 805 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 0 6 5.164786 0.000000 2237 +departmentat 0 5 5.347108 0.000000 2513 +guri 0 5 5.347108 0.000000 2578 +contributor 0 2 6.263398 0.000000 6214 +pagewisconsin 0 1 6.957497 0.000000 19232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..401aa02e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +perform 0 143 1.945910 0.000000 74 +introduct 0 126 2.079442 0.000000 87 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +question 0 91 2.397895 0.000000 141 +center 0 88 2.397895 0.000000 158 +section 0 94 2.397895 0.000000 149 +pictur 0 89 2.397895 0.000000 160 +build 0 85 2.484907 0.000000 184 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +come 0 78 2.564949 0.000000 202 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +intellig 0 72 2.639057 0.000000 225 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +test 0 66 2.708050 0.000000 252 +artifici 0 63 2.772589 0.000000 280 +back 0 60 2.833213 0.000000 297 +semest 0 58 2.890372 0.000000 312 +undergradu 0 54 2.944439 0.000000 338 +friend 0 48 3.044522 0.000000 376 +frequent 0 49 3.044522 0.000000 367 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +favorit 0 44 3.135494 0.000000 410 +algebra 0 45 3.135494 0.000000 394 +around 0 43 3.178054 0.000000 415 +vision 0 41 3.218876 0.000000 430 +hand 0 37 3.332205 0.000000 475 +michael 0 35 3.401197 0.000000 514 +eduoffic 0 33 3.433987 0.000000 531 +photo 0 31 3.496508 0.000000 561 +ask 0 28 3.610918 0.000000 597 +univ 0 28 3.610918 0.000000 617 +hope 0 28 3.610918 0.000000 610 +administr 0 27 3.637586 0.000000 628 +comp 0 26 3.688879 0.000000 650 +sport 0 25 3.737670 0.000000 683 +mike 0 24 3.761200 0.000000 703 +sometim 0 24 3.761200 0.000000 696 +theunivers 0 21 3.912023 0.000000 797 +lower 0 18 4.060443 0.000000 886 +stat 0 17 4.110874 0.000000 924 +bachelor 0 17 4.110874 0.000000 957 +chuck 0 14 4.317488 0.000000 1108 +dave 0 14 4.317488 0.000000 1098 +sai 0 13 4.382027 0.000000 1175 +suit 0 13 4.382027 0.000000 1129 +land 0 12 4.465908 0.000000 1273 +mari 0 12 4.465908 0.000000 1266 +touch 0 12 4.465908 0.000000 1288 +host 0 11 4.553877 0.000000 1306 +folk 0 9 4.753590 0.000000 1597 +joel 0 8 4.875197 0.000000 1698 +usenet 0 7 5.010635 0.000000 1839 +corner 0 7 5.010635 0.000000 1909 +maryland 0 6 5.164786 0.000000 2140 +gzip 0 6 5.164786 0.000000 2117 +billi 0 5 5.347108 0.000000 2404 +madisoncomput 0 5 5.347108 0.000000 2391 +steel 0 4 5.568345 0.000000 2818 +chees 0 4 5.568345 0.000000 3090 +kill 0 4 5.568345 0.000000 3000 +sit 0 3 5.857933 0.000000 3953 +stamp 0 3 5.857933 0.000000 3678 +artificialintellig 0 3 5.857933 0.000000 3608 +narrow 0 3 5.857933 0.000000 3807 +predat 0 3 5.857933 0.000000 3135 +forgot 0 2 6.263398 0.000000 4769 +linksmi 0 2 6.263398 0.000000 6215 +barri 0 2 6.263398 0.000000 5149 +eduunivers 0 2 6.263398 0.000000 6216 +homepagemik 0 1 6.957497 0.000000 19233 +homepagemsteel 0 1 6.957497 0.000000 19234 +struggl 0 1 6.957497 0.000000 19235 +sometimearound 0 1 6.957497 0.000000 19236 +motto 0 1 6.957497 0.000000 19237 +freezein 0 1 6.957497 0.000000 19238 +graduateinstructor 0 1 6.957497 0.000000 19239 +scomput 0 1 6.957497 0.000000 19240 +publicationsgrindston 0 1 6.957497 0.000000 19241 +jefferyk 0 1 6.957497 0.000000 19242 +hollingsworth 0 1 6.957497 0.000000 19243 +reportc 0 1 6.957497 0.000000 19244 +postscriptfil 0 1 6.957497 0.000000 19245 +semesterc 0 1 6.957497 0.000000 19246 +vernonc 0 1 6.957497 0.000000 19247 +dyermi 0 1 6.957497 0.000000 19248 +pagesinform 0 1 6.957497 0.000000 19249 +gettingin 0 1 6.957497 0.000000 19250 +marylandwhom 0 1 6.957497 0.000000 19251 +teamssom 0 1 6.957497 0.000000 19252 +listth 0 1 6.957497 0.000000 19253 +listi 0 1 6.957497 0.000000 19254 +thefruit 0 1 6.957497 0.000000 19255 +ofmaryland 0 1 6.957497 0.000000 19256 +insidejok 0 1 6.957497 0.000000 19257 +andnow 0 1 6.957497 0.000000 19258 +someinfrar 0 1 6.957497 0.000000 19259 +looklik 0 1 6.957497 0.000000 19260 +infrar 0 1 6.957497 0.000000 19261 +memik 0 1 6.957497 0.000000 19262 +steelemsteel 0 1 6.957497 0.000000 19263 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..2fb55bb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +section 0 94 2.397895 0.000000 149 +want 0 79 2.564949 0.000000 199 +visit 1 63 2.772589 2.772589 288 +colleg 0 61 2.833213 0.000000 300 +might 0 41 3.218876 0.000000 426 +mayb 0 15 4.248495 0.000000 1014 +maryland 0 6 5.164786 0.000000 2140 +park 0 6 5.164786 0.000000 2218 +maria 0 4 5.568345 0.000000 2954 +pagemaria 0 1 6.957497 0.000000 19264 +pagehow 0 1 6.957497 0.000000 19265 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..0b80fb8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +check 0 115 2.197225 0.000000 118 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +pictur 0 89 2.397895 0.000000 160 +member 0 84 2.484907 0.000000 165 +good 0 77 2.564949 0.000000 200 +main 0 67 2.708050 0.000000 256 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +finger 0 52 2.995732 0.000000 354 +visual 0 48 3.044522 0.000000 372 +around 0 43 3.178054 0.000000 415 +music 0 42 3.218876 0.000000 436 +howev 0 41 3.218876 0.000000 422 +staff 0 36 3.367296 0.000000 490 +known 0 24 3.761200 0.000000 702 +finish 0 22 3.850148 0.000000 748 +born 0 21 3.912023 0.000000 798 +fact 0 21 3.912023 0.000000 780 +watch 0 21 3.912023 0.000000 789 +citi 0 19 4.007333 0.000000 874 +spend 0 19 4.007333 0.000000 850 +beauti 0 18 4.060443 0.000000 912 +stori 0 14 4.317488 0.000000 1087 +central 0 13 4.382027 0.000000 1160 +cook 0 10 4.653960 0.000000 1464 +paradyn 0 9 4.753590 0.000000 1614 +undergrad 0 9 4.753590 0.000000 1589 +guitar 0 8 4.875197 0.000000 1758 +simon 0 8 4.875197 0.000000 1697 +capit 0 7 5.010635 0.000000 1957 +apart 0 7 5.010635 0.000000 1936 +antonio 0 6 5.164786 0.000000 2186 +england 0 5 5.347108 0.000000 2557 +million 0 5 5.347108 0.000000 2495 +western 0 4 5.568345 0.000000 3062 +basebal 0 4 5.568345 0.000000 2969 +myph 0 3 5.857933 0.000000 3880 +popul 0 3 5.857933 0.000000 3235 +pleaseclick 0 2 6.263398 0.000000 5432 +venezuela 1 1 6.957497 6.957497 19266 +barquisimeto 0 1 6.957497 0.000000 19267 +naim 0 1 6.957497 0.000000 19268 +oscar 0 1 6.957497 0.000000 19269 +bienvenido 0 1 6.957497 0.000000 19270 +southampton 0 1 6.957497 0.000000 19271 +universidad 0 1 6.957497 0.000000 19272 +bolivar 0 1 6.957497 0.000000 19273 +caraca 0 1 6.957497 0.000000 19274 +barquisimetoi 0 1 6.957497 0.000000 19275 +ofabout 0 1 6.957497 0.000000 19276 +playclass 0 1 6.957497 0.000000 19277 +excellentmaestro 0 1 6.957497 0.000000 19278 +rodrigo 0 1 6.957497 0.000000 19279 +riera 0 1 6.957497 0.000000 19280 +lauro 0 1 6.957497 0.000000 19281 +sherlock 0 1 6.957497 0.000000 19282 +holm 0 1 6.957497 0.000000 19283 +beati 0 1 6.957497 0.000000 19284 +mundo 0 1 6.957497 0.000000 19285 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..4d1ce34e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +find 0 111 2.197225 0.000000 111 +make 0 111 2.197225 0.000000 120 +send 0 114 2.197225 0.000000 109 +pictur 0 89 2.397895 0.000000 160 +want 0 79 2.564949 0.000000 199 +street 0 63 2.772589 0.000000 293 +import 0 65 2.772589 0.000000 282 +guid 0 63 2.772589 0.000000 267 +visit 0 63 2.772589 0.000000 288 +realli 0 40 3.258097 0.000000 444 +notic 0 25 3.737670 0.000000 675 +nice 0 20 3.951244 0.000000 809 +georg 0 16 4.174387 0.000000 994 +worth 0 11 4.553877 0.000000 1294 +alex 0 6 5.164786 0.000000 2130 +greek 0 3 5.857933 0.000000 3595 +decent 0 2 6.263398 0.000000 5542 +rochest 0 2 6.263398 0.000000 6142 +anastassia 0 1 6.957497 0.000000 19286 +ailamaki 0 1 6.957497 0.000000 19287 +islandsar 0 1 6.957497 0.000000 19288 +natassa 0 1 6.957497 0.000000 19289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..fefc7e30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +databas 1 122 2.079442 2.079442 86 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +larg 0 82 2.484907 0.000000 168 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +improv 0 62 2.772589 0.000000 289 +three 0 54 2.944439 0.000000 330 +submit 0 39 3.258097 0.000000 440 +multi 0 36 3.367296 0.000000 493 +michael 0 35 3.401197 0.000000 514 +storag 0 31 3.496508 0.000000 553 +arrai 0 27 3.637586 0.000000 627 +toward 0 25 3.737670 0.000000 668 +hierarchi 0 22 3.850148 0.000000 744 +prepar 0 20 3.951244 0.000000 824 +benchmark 0 19 4.007333 0.000000 859 +dimension 0 18 4.060443 0.000000 909 +estim 0 17 4.110874 0.000000 930 +spatial 0 16 4.174387 0.000000 988 +ramakrishnan 0 16 4.174387 0.000000 972 +indic 0 15 4.248495 0.000000 1013 +dbm 0 13 4.382027 0.000000 1136 +overal 0 12 4.465908 0.000000 1254 +workload 0 12 4.465908 0.000000 1210 +gupta 0 12 4.465908 0.000000 1241 +raghu 0 12 4.465908 0.000000 1212 +dewitt 0 12 4.465908 0.000000 1270 +jeffrei 0 9 4.753590 0.000000 1612 +presenc 0 8 4.875197 0.000000 1671 +carei 0 8 4.875197 0.000000 1781 +bombai 0 7 5.010635 0.000000 1972 +prasad 0 6 5.164786 0.000000 2126 +eduresearch 0 6 5.164786 0.000000 2205 +aggreg 0 6 5.164786 0.000000 2219 +deshpand 0 5 5.347108 0.000000 2431 +eas 0 5 5.347108 0.000000 2267 +ofinterest 0 5 5.347108 0.000000 2323 +ashish 0 5 5.347108 0.000000 2473 +tuft 0 5 5.347108 0.000000 2575 +multidimension 0 4 5.568345 0.000000 3091 +cube 0 4 5.568345 0.000000 2940 +amit 0 4 5.568345 0.000000 2972 +ramasami 0 4 5.568345 0.000000 3088 +shah 0 4 5.568345 0.000000 2814 +zhao 0 4 5.568345 0.000000 2699 +kristin 0 4 5.568345 0.000000 3089 +mumbai 0 3 5.857933 0.000000 4029 +surpass 0 3 5.857933 0.000000 3247 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +johann 0 3 5.857933 0.000000 3758 +moham 0 3 5.857933 0.000000 3848 +asgarian 0 3 5.857933 0.000000 3447 +andprocess 0 2 6.263398 0.000000 4925 +agarw 0 2 6.263398 0.000000 5352 +rakesh 0 2 6.263398 0.000000 6017 +agraw 0 2 6.263398 0.000000 4536 +molap 0 2 6.263398 0.000000 6217 +naughtonjeffrei 0 1 6.957497 0.000000 19290 +naughtonnaughton 0 1 6.957497 0.000000 19291 +interestsolap 0 1 6.957497 0.000000 19292 +relationaldbm 0 1 6.957497 0.000000 19293 +ofdatabas 0 1 6.957497 0.000000 19294 +inperform 0 1 6.957497 0.000000 19295 +ofmulti 0 1 6.957497 0.000000 19296 +computingth 0 1 6.957497 0.000000 19297 +valuedattribut 0 1 6.957497 0.000000 19298 +withsameet 0 1 6.957497 0.000000 19299 +sunita 0 1 6.957497 0.000000 19300 +sarawagi 0 1 6.957497 0.000000 19301 +thend 0 1 6.957497 0.000000 19302 +aggregatesin 0 1 6.957497 0.000000 19303 +bucki 0 1 6.957497 0.000000 19304 +gerhk 0 1 6.957497 0.000000 19305 +dhaval 0 1 6.957497 0.000000 19306 +withyihong 0 1 6.957497 0.000000 19307 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..ccc61c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +databas 0 122 2.079442 0.000000 86 +look 0 107 2.197225 0.000000 115 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +optim 0 79 2.564949 0.000000 197 +david 0 71 2.639057 0.000000 232 +plan 0 65 2.772589 0.000000 272 +explor 0 58 2.890372 0.000000 324 +archiv 0 49 3.044522 0.000000 364 +could 0 46 3.091042 0.000000 383 +better 0 45 3.135494 0.000000 401 +queri 0 33 3.433987 0.000000 524 +bookmark 0 26 3.688879 0.000000 639 +indian 0 22 3.850148 0.000000 769 +among 0 21 3.912023 0.000000 781 +noth 0 11 4.553877 0.000000 1328 +song 0 11 4.553877 0.000000 1380 +paradis 0 8 4.875197 0.000000 1782 +customiz 0 4 5.568345 0.000000 2966 +hindi 0 3 5.857933 0.000000 3753 +navin 1 2 6.263398 6.263398 5351 +madisonadvisor 0 2 6.263398 0.000000 6212 +dewittresearch 0 2 6.263398 0.000000 6185 +kabranavin 0 1 6.957497 0.000000 19308 +kabragradu 0 1 6.957497 0.000000 19309 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..b873b2ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +perform 1 143 1.945910 1.945910 74 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +pictur 0 89 2.397895 0.000000 160 +chang 0 82 2.484907 0.000000 163 +java 0 70 2.708050 0.000000 248 +august 0 66 2.708050 0.000000 257 +telephon 0 50 3.044522 0.000000 373 +scalabl 0 24 3.761200 0.000000 705 +predict 0 19 4.007333 0.000000 855 +bart 0 9 4.753590 0.000000 1559 +newhal 1 1 6.957497 6.957497 19310 +newhalltia 0 1 6.957497 0.000000 19311 +paradynadvisor 0 1 6.957497 0.000000 19312 +millermummi 0 1 6.957497 0.000000 19313 +guanajuato 0 1 6.957497 0.000000 19314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..67e75bd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +object 0 138 1.945910 0.000000 79 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +scalabl 0 24 3.761200 0.000000 705 +repositori 0 17 4.110874 0.000000 932 +heterogen 0 14 4.317488 0.000000 1090 +nanci 0 12 4.465908 0.000000 1256 +shore 0 11 4.553877 0.000000 1377 +hallcomput 0 1 6.957497 0.000000 19315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..5cf0dc02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +dayton 0 119 2.079442 0.000000 104 +mathemat 1 108 2.197225 2.197225 123 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +techniqu 0 99 2.302585 0.000000 138 +octob 0 89 2.397895 0.000000 156 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +learn 0 86 2.484907 0.000000 170 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +optim 0 79 2.564949 0.000000 197 +decemb 0 80 2.564949 0.000000 215 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +solv 0 73 2.639057 0.000000 234 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +view 0 70 2.708050 0.000000 254 +street 0 63 2.772589 0.000000 293 +function 0 62 2.772589 0.000000 275 +januari 0 62 2.772589 0.000000 264 +septemb 0 65 2.772589 0.000000 274 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +new 0 64 2.772589 0.000000 262 +juli 0 60 2.833213 0.000000 305 +publish 0 57 2.890372 0.000000 326 +februari 0 54 2.944439 0.000000 328 +telephon 0 50 3.044522 0.000000 373 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +california 0 46 3.091042 0.000000 388 +linear 0 41 3.218876 0.000000 431 +editor 0 41 3.218876 0.000000 433 +press 0 42 3.218876 0.000000 419 +submit 0 39 3.258097 0.000000 440 +error 0 40 3.258097 0.000000 449 +paul 0 38 3.295837 0.000000 471 +download 0 36 3.367296 0.000000 489 +global 0 34 3.401197 0.000000 520 +bibliographi 0 34 3.401197 0.000000 518 +john 0 33 3.433987 0.000000 532 +toler 0 33 3.433987 0.000000 533 +neural 0 30 3.555348 0.000000 578 +rang 0 30 3.555348 0.000000 565 +cluster 0 28 3.610918 0.000000 612 +revis 0 26 3.688879 0.000000 640 +bound 0 26 3.688879 0.000000 659 +constraint 0 26 3.688879 0.000000 636 +aspect 0 25 3.737670 0.000000 663 +accur 0 25 3.737670 0.000000 680 +proof 0 23 3.806662 0.000000 720 +variabl 0 23 3.806662 0.000000 715 +equat 0 23 3.806662 0.000000 724 +verlag 0 22 3.850148 0.000000 751 +period 0 22 3.850148 0.000000 743 +chen 0 21 3.912023 0.000000 791 +siam 0 21 3.912023 0.000000 800 +predict 0 19 4.007333 0.000000 855 +eric 0 19 4.007333 0.000000 870 +separ 0 19 4.007333 0.000000 844 +minim 0 18 4.060443 0.000000 887 +differenti 0 17 4.110874 0.000000 921 +germani 0 17 4.110874 0.000000 946 +hybrid 0 15 4.248495 0.000000 1057 +nonlinear 0 14 4.317488 0.000000 1107 +train 0 14 4.317488 0.000000 1066 +francisco 0 14 4.317488 0.000000 1095 +nick 0 13 4.382027 0.000000 1180 +context 0 13 4.382027 0.000000 1153 +individu 0 13 4.382027 0.000000 1126 +broad 0 11 4.553877 0.000000 1302 +rich 0 10 4.653960 0.000000 1396 +strongli 0 10 4.653960 0.000000 1406 +penalti 0 10 4.653960 0.000000 1405 +mangasarian 0 9 4.753590 0.000000 1570 +pose 0 9 4.753590 0.000000 1535 +morgan 0 9 4.753590 0.000000 1484 +converg 0 7 5.010635 0.000000 1844 +smooth 0 7 5.010635 0.000000 1855 +harvard 0 7 5.010635 0.000000 1926 +fischer 0 7 5.010635 0.000000 1893 +serial 0 7 5.010635 0.000000 1975 +olvi 0 6 5.164786 0.000000 2109 +inequ 0 6 5.164786 0.000000 2113 +constrain 0 6 5.164786 0.000000 2042 +strong 0 6 5.164786 0.000000 2029 +mix 0 6 5.164786 0.000000 2200 +determinist 0 6 5.164786 0.000000 2034 +bradlei 0 5 5.347108 0.000000 2554 +variat 0 5 5.347108 0.000000 2248 +kaufmann 0 5 5.347108 0.000000 2254 +convex 0 4 5.568345 0.000000 2807 +concav 0 4 5.568345 0.000000 2808 +diagnosi 0 4 5.568345 0.000000 3027 +nonmonoton 0 4 5.568345 0.000000 3023 +net 0 4 5.568345 0.000000 2741 +complementar 0 3 5.857933 0.000000 3999 +cancer 0 3 5.857933 0.000000 4032 +breast 0 3 5.857933 0.000000 4033 +backpropag 0 3 5.857933 0.000000 3507 +neumann 0 3 5.857933 0.000000 3720 +programsand 0 3 5.857933 0.000000 3111 +programmingtechniqu 0 3 5.857933 0.000000 3113 +diagnos 0 3 5.857933 0.000000 3968 +diagnost 0 3 5.857933 0.000000 3833 +baltimor 0 3 5.857933 0.000000 3809 +chronolog 0 3 5.857933 0.000000 4034 +wolberg 0 2 6.263398 0.000000 6218 +perturb 0 2 6.263398 0.000000 6075 +interestsin 0 2 6.263398 0.000000 6213 +spectrum 0 2 6.263398 0.000000 5405 +applicationsto 0 2 6.263398 0.000000 4254 +computer 0 2 6.263398 0.000000 6219 +linearli 0 2 6.263398 0.000000 6220 +qualif 0 2 6.263398 0.000000 6059 +prognost 0 2 6.263398 0.000000 6221 +polyhedr 0 2 6.263398 0.000000 5412 +festschrift 0 2 6.263398 0.000000 6141 +klau 0 2 6.263398 0.000000 4999 +internationalsymposium 0 2 6.263398 0.000000 6032 +plenum 0 2 6.263398 0.000000 6036 +prognosi 0 2 6.263398 0.000000 6222 +chunhui 0 1 6.957497 0.000000 19316 +misclassif 0 1 6.957497 0.000000 19317 +solodov 0 1 6.957497 0.000000 19318 +effectivecomputation 0 1 6.957497 0.000000 19319 +encompassestheoret 0 1 6.957497 0.000000 19320 +parallelgradi 0 1 6.957497 0.000000 19321 +problemsa 0 1 6.957497 0.000000 19322 +animport 0 1 6.957497 0.000000 19323 +ahighli 0 1 6.957497 0.000000 19324 +useat 0 1 6.957497 0.000000 19325 +hospit 0 1 6.957497 0.000000 19326 +solodova 0 1 6.957497 0.000000 19327 +descent 0 1 6.957497 0.000000 19328 +monotonecomplementar 0 1 6.957497 0.000000 19329 +jong 0 1 6.957497 0.000000 19330 +pangexact 0 1 6.957497 0.000000 19331 +programswith 0 1 6.957497 0.000000 19332 +mangasarianmathemat 0 1 6.957497 0.000000 19333 +miningmathemat 0 1 6.957497 0.000000 19334 +mangasarianerror 0 1 6.957497 0.000000 19335 +nondifferenti 0 1 6.957497 0.000000 19336 +slater 0 1 6.957497 0.000000 19337 +ritter 0 1 6.957497 0.000000 19338 +riedmuel 0 1 6.957497 0.000000 19339 +schaeffler 0 1 6.957497 0.000000 19340 +physica 0 1 6.957497 0.000000 19341 +siag 0 1 6.957497 0.000000 19342 +bilinear 0 1 6.957497 0.000000 19343 +cowan 0 1 6.957497 0.000000 19344 +tesauro 0 1 6.957497 0.000000 19345 +alspector 0 1 6.957497 0.000000 19346 +inequalitiesand 0 1 6.957497 0.000000 19347 +vianonmonoton 0 1 6.957497 0.000000 19348 +minimn 0 1 6.957497 0.000000 19349 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..16872ab1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,422 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +year 0 148 1.945910 0.000000 84 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +machin 0 129 2.079442 0.000000 95 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +pleas 0 113 2.197225 0.000000 114 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +theori 0 111 2.197225 0.000000 127 +user 0 104 2.302585 0.000000 137 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +imag 0 91 2.397895 0.000000 161 +proceed 0 93 2.397895 0.000000 152 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +center 0 88 2.397895 0.000000 158 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +april 0 77 2.564949 0.000000 196 +method 0 80 2.564949 0.000000 213 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +exampl 0 77 2.564949 0.000000 195 +optim 0 79 2.564949 0.000000 197 +june 0 79 2.564949 0.000000 214 +free 0 73 2.639057 0.000000 224 +appli 0 71 2.639057 0.000000 226 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +nation 0 74 2.639057 0.000000 240 +august 0 66 2.708050 0.000000 257 +goal 0 66 2.708050 0.000000 250 +abstract 0 62 2.772589 0.000000 276 +street 0 63 2.772589 0.000000 293 +result 0 65 2.772589 0.000000 281 +new 0 64 2.772589 0.000000 262 +januari 0 62 2.772589 0.000000 264 +copi 0 63 2.772589 0.000000 284 +prof 0 64 2.772589 0.000000 273 +interact 0 62 2.772589 0.000000 270 +previou 0 62 2.772589 0.000000 290 +function 0 62 2.772589 0.000000 275 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +septemb 0 65 2.772589 0.000000 274 +march 0 61 2.833213 0.000000 295 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +variou 0 56 2.890372 0.000000 317 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +point 0 58 2.890372 0.000000 319 +detail 0 57 2.890372 0.000000 321 +sampl 0 53 2.944439 0.000000 339 +local 0 55 2.944439 0.000000 334 +allow 0 53 2.944439 0.000000 333 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +case 0 51 2.995732 0.000000 351 +tabl 0 51 2.995732 0.000000 346 +digit 0 52 2.995732 0.000000 348 +date 0 51 2.995732 0.000000 344 +particular 0 51 2.995732 0.000000 352 +approach 0 48 3.044522 0.000000 366 +visual 0 48 3.044522 0.000000 372 +pointer 0 48 3.044522 0.000000 368 +right 0 48 3.044522 0.000000 363 +format 0 48 3.044522 0.000000 356 +numer 0 49 3.044522 0.000000 369 +without 0 50 3.044522 0.000000 370 +friend 0 48 3.044522 0.000000 376 +featur 0 46 3.091042 0.000000 386 +could 0 46 3.091042 0.000000 383 +describ 0 45 3.135494 0.000000 400 +better 0 45 3.135494 0.000000 401 +netscap 0 44 3.135494 0.000000 395 +term 0 43 3.178054 0.000000 411 +long 0 43 3.178054 0.000000 413 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +vision 0 41 3.218876 0.000000 430 +examin 0 42 3.218876 0.000000 424 +probabl 0 40 3.258097 0.000000 455 +small 0 39 3.258097 0.000000 447 +submit 0 39 3.258097 0.000000 440 +societi 0 40 3.258097 0.000000 456 +author 0 39 3.258097 0.000000 450 +slide 0 38 3.295837 0.000000 467 +seminar 0 38 3.295837 0.000000 470 +mean 0 37 3.332205 0.000000 477 +ofth 0 36 3.367296 0.000000 491 +procedur 0 36 3.367296 0.000000 488 +download 0 36 3.367296 0.000000 489 +approxim 0 35 3.401197 0.000000 509 +bibliographi 0 34 3.401197 0.000000 518 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +print 0 34 3.401197 0.000000 503 +tech 0 35 3.401197 0.000000 515 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +collabor 0 32 3.465736 0.000000 543 +human 0 32 3.465736 0.000000 546 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +power 0 30 3.555348 0.000000 573 +consid 0 29 3.583519 0.000000 590 +releas 0 28 3.610918 0.000000 616 +measur 0 28 3.610918 0.000000 609 +actual 0 28 3.610918 0.000000 604 +scale 0 28 3.610918 0.000000 613 +progress 0 28 3.610918 0.000000 598 +american 0 27 3.637586 0.000000 634 +team 0 27 3.637586 0.000000 625 +repres 0 26 3.688879 0.000000 656 +consist 0 26 3.688879 0.000000 651 +compar 0 26 3.688879 0.000000 648 +detect 0 26 3.688879 0.000000 646 +valu 0 25 3.737670 0.000000 665 +accur 0 25 3.737670 0.000000 680 +todai 0 25 3.737670 0.000000 672 +known 0 24 3.761200 0.000000 702 +pattern 0 24 3.761200 0.000000 689 +interpret 0 24 3.761200 0.000000 686 +recognit 0 23 3.806662 0.000000 723 +size 0 23 3.806662 0.000000 713 +togeth 0 23 3.806662 0.000000 714 +william 0 22 3.850148 0.000000 765 +identifi 0 22 3.850148 0.000000 760 +siam 0 21 3.912023 0.000000 800 +theunivers 0 21 3.912023 0.000000 797 +util 0 21 3.912023 0.000000 774 +similar 0 21 3.912023 0.000000 771 +viewer 0 21 3.912023 0.000000 787 +fine 0 20 3.951244 0.000000 822 +minut 0 20 3.951244 0.000000 810 +predict 0 19 4.007333 0.000000 855 +separ 0 19 4.007333 0.000000 844 +five 0 19 4.007333 0.000000 841 +comparison 0 19 4.007333 0.000000 863 +aid 0 18 4.060443 0.000000 904 +behavior 0 18 4.060443 0.000000 881 +statu 0 18 4.060443 0.000000 885 +medic 0 17 4.110874 0.000000 958 +differenti 0 17 4.110874 0.000000 921 +segment 0 17 4.110874 0.000000 931 +analyz 0 17 4.110874 0.000000 925 +seek 0 17 4.110874 0.000000 954 +portion 0 16 4.174387 0.000000 971 +capabl 0 15 4.248495 0.000000 1016 +indic 0 15 4.248495 0.000000 1013 +precis 0 15 4.248495 0.000000 1023 +ascii 0 15 4.248495 0.000000 1032 +train 0 14 4.317488 0.000000 1066 +shown 0 14 4.317488 0.000000 1080 +camera 0 14 4.317488 0.000000 1115 +draw 0 14 4.317488 0.000000 1086 +individu 0 13 4.382027 0.000000 1126 +deriv 0 13 4.382027 0.000000 1145 +nick 0 13 4.382027 0.000000 1180 +incorpor 0 13 4.382027 0.000000 1163 +characterist 0 12 4.465908 0.000000 1257 +scan 0 12 4.465908 0.000000 1243 +shape 0 12 4.465908 0.000000 1245 +remov 0 12 4.465908 0.000000 1225 +philadelphia 0 12 4.465908 0.000000 1244 +extrem 0 11 4.553877 0.000000 1330 +node 0 11 4.553877 0.000000 1326 +eight 0 11 4.553877 0.000000 1331 +distinguish 0 11 4.553877 0.000000 1357 +induct 0 11 4.553877 0.000000 1304 +total 0 10 4.653960 0.000000 1398 +subset 0 10 4.653960 0.000000 1425 +black 0 10 4.653960 0.000000 1418 +equal 0 10 4.653960 0.000000 1424 +tradit 0 10 4.653960 0.000000 1404 +perspect 0 10 4.653960 0.000000 1437 +mangasarian 0 9 4.753590 0.000000 1570 +surfac 0 9 4.753590 0.000000 1574 +factor 0 9 4.753590 0.000000 1544 +desir 0 9 4.753590 0.000000 1542 +exact 0 9 4.753590 0.000000 1509 +classifi 0 9 4.753590 0.000000 1537 +hundr 0 9 4.753590 0.000000 1528 +correctli 0 9 4.753590 0.000000 1478 +russel 0 9 4.753590 0.000000 1507 +morgan 0 9 4.753590 0.000000 1484 +curv 0 8 4.875197 0.000000 1656 +mass 0 8 4.875197 0.000000 1732 +isol 0 8 4.875197 0.000000 1663 +textur 0 8 4.875197 0.000000 1677 +quantit 0 8 4.875197 0.000000 1654 +grew 0 8 4.875197 0.000000 1742 +judg 0 8 4.875197 0.000000 1644 +aaai 0 8 4.875197 0.000000 1750 +replac 0 8 4.875197 0.000000 1668 +angel 0 8 4.875197 0.000000 1779 +boundari 0 7 5.010635 0.000000 1929 +analyt 0 7 5.010635 0.000000 1913 +hunt 0 7 5.010635 0.000000 1798 +converg 0 7 5.010635 0.000000 1844 +smooth 0 7 5.010635 0.000000 1855 +densiti 0 7 5.010635 0.000000 1927 +ruth 0 7 5.010635 0.000000 1870 +chronicl 0 7 5.010635 0.000000 1952 +capit 0 7 5.010635 0.000000 1957 +nine 0 6 5.164786 0.000000 2047 +plane 0 6 5.164786 0.000000 2187 +olvi 0 6 5.164786 0.000000 2109 +onto 0 6 5.164786 0.000000 2089 +versu 0 6 5.164786 0.000000 2052 +averag 0 6 5.164786 0.000000 2098 +nuclear 0 5 5.347108 0.000000 2576 +medicin 0 5 5.347108 0.000000 2448 +began 0 5 5.347108 0.000000 2498 +highlight 0 5 5.347108 0.000000 2340 +cell 0 5 5.347108 0.000000 2274 +snake 0 5 5.347108 0.000000 2281 +accuraci 0 5 5.347108 0.000000 2450 +shift 0 5 5.347108 0.000000 2357 +kaufmann 0 5 5.347108 0.000000 2254 +houston 0 5 5.347108 0.000000 2460 +diagnosi 0 4 5.568345 0.000000 3027 +surviv 0 4 5.568345 0.000000 2734 +aspir 0 4 5.568345 0.000000 3019 +popular 0 4 5.568345 0.000000 2802 +biomed 0 4 5.568345 0.000000 2905 +writer 0 4 5.568345 0.000000 2783 +sole 0 4 5.568345 0.000000 2592 +assess 0 4 5.568345 0.000000 2724 +kristin 0 4 5.568345 0.000000 3089 +prospect 0 4 5.568345 0.000000 3013 +cancer 1 3 5.857933 5.857933 4032 +breast 0 3 5.857933 0.000000 4033 +recurr 0 3 5.857933 0.000000 3740 +diagnos 0 3 5.857933 0.000000 3968 +diseas 0 3 5.857933 0.000000 3635 +surgeri 0 3 5.857933 0.000000 3975 +citat 0 3 5.857933 0.000000 3617 +bennett 0 3 5.857933 0.000000 4024 +microscop 0 3 5.857933 0.000000 4035 +confid 0 3 5.857933 0.000000 3691 +pain 0 3 5.857933 0.000000 3460 +chronolog 0 3 5.857933 0.000000 4034 +man 0 3 5.857933 0.000000 3417 +detroit 0 3 5.857933 0.000000 3565 +paulb 0 3 5.857933 0.000000 4036 +wolberg 0 2 6.263398 0.000000 6218 +prognosi 0 2 6.263398 0.000000 6222 +patient 0 2 6.263398 0.000000 6223 +benign 0 2 6.263398 0.000000 4893 +prognost 0 2 6.263398 0.000000 6221 +plot 0 2 6.263398 0.000000 4236 +milwauke 0 2 6.263398 0.000000 5797 +rudi 0 2 6.263398 0.000000 5487 +multisurfac 0 2 6.263398 0.000000 6224 +ofthi 0 2 6.263398 0.000000 5836 +grabber 0 2 6.263398 0.000000 5521 +nucleu 0 2 6.263398 0.000000 4302 +thenorm 0 2 6.263398 0.000000 4412 +ofvari 0 2 6.263398 0.000000 4582 +twelfth 0 2 6.263398 0.000000 5035 +icml 0 2 6.263398 0.000000 5669 +prime 0 2 6.263398 0.000000 6099 +computer 0 2 6.263398 0.000000 6219 +cope 0 2 6.263398 0.000000 6050 +nevada 0 2 6.263398 0.000000 4875 +malign 0 1 6.957497 0.000000 19350 +nuclei 0 1 6.957497 0.000000 19351 +cytolog 0 1 6.957497 0.000000 19352 +biopsi 0 1 6.957497 0.000000 19353 +oncolog 0 1 6.957497 0.000000 19354 +needl 0 1 6.957497 0.000000 19355 +xcyt 0 1 6.957497 0.000000 19356 +setiono 0 1 6.957497 0.000000 19357 +ofeach 0 1 6.957497 0.000000 19358 +ofdiseas 0 1 6.957497 0.000000 19359 +lymph 0 1 6.957497 0.000000 19360 +histolog 0 1 6.957497 0.000000 19361 +heisei 0 1 6.957497 0.000000 19362 +prognosismachin 0 1 6.957497 0.000000 19363 +prognosisthi 0 1 6.957497 0.000000 19364 +learningapproach 0 1 6.957497 0.000000 19365 +ofbreast 0 1 6.957497 0.000000 19366 +betweenprof 0 1 6.957497 0.000000 19367 +anddr 0 1 6.957497 0.000000 19368 +wolbergof 0 1 6.957497 0.000000 19369 +thepress 0 1 6.957497 0.000000 19370 +inmarch 0 1 6.957497 0.000000 19371 +linksdiagnosisthi 0 1 6.957497 0.000000 19372 +diagnosebreast 0 1 6.957497 0.000000 19373 +heidentifi 0 1 6.957497 0.000000 19374 +consideredrelev 0 1 6.957497 0.000000 19375 +andtwo 0 1 6.957497 0.000000 19376 +aclassifi 0 1 6.957497 0.000000 19377 +thatsuccessfulli 0 1 6.957497 0.000000 19378 +iswel 0 1 6.957497 0.000000 19379 +streetto 0 1 6.957497 0.000000 19380 +adigit 0 1 6.957497 0.000000 19381 +consolid 0 1 6.957497 0.000000 19382 +clinicalpractic 0 1 6.957497 0.000000 19383 +thenmount 0 1 6.957497 0.000000 19384 +stain 0 1 6.957497 0.000000 19385 +cellularnuclei 0 1 6.957497 0.000000 19386 +arewel 0 1 6.957497 0.000000 19387 +afram 0 1 6.957497 0.000000 19388 +mous 0 1 6.957497 0.000000 19389 +showingxcyt 0 1 6.957497 0.000000 19390 +thisfas 0 1 6.957497 0.000000 19391 +standarderror 0 1 6.957497 0.000000 19392 +wasconstruct 0 1 6.957497 0.000000 19393 +thisclassifi 0 1 6.957497 0.000000 19394 +threeof 0 1 6.957497 0.000000 19395 +bayesiancomput 0 1 6.957497 0.000000 19396 +thesedens 0 1 6.957497 0.000000 19397 +consecut 0 1 6.957497 0.000000 19398 +newpati 0 1 6.957497 0.000000 19399 +didxcyt 0 1 6.957497 0.000000 19400 +suspici 0 1 6.957497 0.000000 19401 +estimatedprob 0 1 6.957497 0.000000 19402 +goodtest 0 1 6.957497 0.000000 19403 +petsegment 0 1 6.957497 0.000000 19404 +inthes 0 1 6.957497 0.000000 19405 +prognosisth 0 1 6.957497 0.000000 19406 +haveapproach 0 1 6.957497 0.000000 19407 +inputfeatur 0 1 6.957497 0.000000 19408 +atim 0 1 6.957497 0.000000 19409 +censor 0 1 6.957497 0.000000 19410 +linearprogram 0 1 6.957497 0.000000 19411 +fornew 0 1 6.957497 0.000000 19412 +caseswith 0 1 6.957497 0.000000 19413 +anindividu 0 1 6.957497 0.000000 19414 +intoxcyt 0 1 6.957497 0.000000 19415 +ourorigin 0 1 6.957497 0.000000 19416 +thereforeha 0 1 6.957497 0.000000 19417 +freeafter 0 1 6.957497 0.000000 19418 +xcytgiv 0 1 6.957497 0.000000 19419 +tumors 0 1 6.957497 0.000000 19420 +corrobor 0 1 6.957497 0.000000 19421 +axillari 0 1 6.957497 0.000000 19422 +bibliographylink 0 1 6.957497 0.000000 19423 +notlink 0 1 6.957497 0.000000 19424 +patholog 0 1 6.957497 0.000000 19425 +priediti 0 1 6.957497 0.000000 19426 +teagu 0 1 6.957497 0.000000 19427 +indetermin 0 1 6.957497 0.000000 19428 +imit 0 1 6.957497 0.000000 19429 +sentinel 0 1 6.957497 0.000000 19430 +marilynn 0 1 6.957497 0.000000 19431 +marchion 0 1 6.957497 0.000000 19432 +sorel 0 1 6.957497 0.000000 19433 +surgic 0 1 6.957497 0.000000 19434 +column 0 1 6.957497 0.000000 19435 +schooloth 0 1 6.957497 0.000000 19436 +oncolink 0 1 6.957497 0.000000 19437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..79d3cfe9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +assign 0 135 1.945910 0.000000 66 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +confer 0 126 2.079442 0.000000 100 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +follow 0 92 2.397895 0.000000 143 +section 0 94 2.397895 0.000000 149 +proceed 0 93 2.397895 0.000000 152 +contain 0 81 2.484907 0.000000 174 +learn 0 86 2.484907 0.000000 170 +requir 0 81 2.484907 0.000000 167 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +solv 0 73 2.639057 0.000000 234 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +view 0 70 2.708050 0.000000 254 +goal 0 66 2.708050 0.000000 250 +artifici 0 63 2.772589 0.000000 280 +street 0 63 2.772589 0.000000 293 +descript 0 64 2.772589 0.000000 271 +best 0 59 2.833213 0.000000 299 +point 1 58 2.890372 2.890372 319 +space 0 57 2.890372 0.000000 310 +found 0 53 2.944439 0.000000 337 +particular 0 51 2.995732 0.000000 352 +much 0 52 2.995732 0.000000 349 +set 0 50 3.044522 0.000000 361 +approach 0 48 3.044522 0.000000 366 +numer 0 49 3.044522 0.000000 369 +could 0 46 3.091042 0.000000 383 +describ 0 45 3.135494 0.000000 400 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +linear 0 41 3.218876 0.000000 431 +howev 0 41 3.218876 0.000000 422 +error 0 40 3.258097 0.000000 449 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +paul 0 38 3.295837 0.000000 471 +close 0 38 3.295837 0.000000 465 +tree 0 36 3.367296 0.000000 492 +procedur 0 36 3.367296 0.000000 488 +bibliographi 0 34 3.401197 0.000000 518 +concept 0 32 3.465736 0.000000 537 +neural 0 30 3.555348 0.000000 578 +packag 0 28 3.610918 0.000000 614 +determin 0 27 3.637586 0.000000 630 +pattern 0 24 3.761200 0.000000 689 +reach 0 24 3.761200 0.000000 688 +togeth 0 23 3.806662 0.000000 714 +sequenc 0 23 3.806662 0.000000 734 +decis 0 23 3.806662 0.000000 728 +avoid 0 21 3.912023 0.000000 799 +separ 0 19 4.007333 0.000000 844 +region 0 19 4.007333 0.000000 875 +mostli 0 19 4.007333 0.000000 869 +histori 0 19 4.007333 0.000000 853 +dimension 0 18 4.060443 0.000000 909 +minim 0 18 4.060443 0.000000 887 +stop 0 17 4.110874 0.000000 942 +otherwis 0 17 4.110874 0.000000 922 +layer 0 17 4.110874 0.000000 926 +brief 0 16 4.174387 0.000000 1001 +choos 0 16 4.174387 0.000000 964 +advantag 0 16 4.174387 0.000000 987 +cognit 0 16 4.174387 0.000000 986 +side 0 15 4.248495 0.000000 1022 +nonlinear 0 14 4.317488 0.000000 1107 +finit 0 14 4.317488 0.000000 1106 +split 0 14 4.317488 0.000000 1078 +matlab 0 14 4.317488 0.000000 1081 +polynomi 0 14 4.317488 0.000000 1069 +shown 0 14 4.317488 0.000000 1080 +train 0 14 4.317488 0.000000 1066 +difficulti 0 13 4.382027 0.000000 1132 +nick 0 13 4.382027 0.000000 1180 +robust 0 12 4.465908 0.000000 1271 +node 0 11 4.553877 0.000000 1326 +branch 0 11 4.553877 0.000000 1318 +faster 0 11 4.553877 0.000000 1323 +surfac 0 9 4.753590 0.000000 1574 +mangasarian 0 9 4.753590 0.000000 1570 +distanc 0 9 4.753590 0.000000 1500 +formul 0 8 4.875197 0.000000 1733 +paramet 0 7 5.010635 0.000000 1796 +whenev 0 7 5.010635 0.000000 1883 +plane 0 6 5.164786 0.000000 2187 +variant 0 6 5.164786 0.000000 2043 +averag 0 6 5.164786 0.000000 2098 +hidden 0 6 5.164786 0.000000 1987 +proce 0 6 5.164786 0.000000 2114 +li 0 5 5.347108 0.000000 2500 +bradlei 0 5 5.347108 0.000000 2554 +disjoint 0 4 5.568345 0.000000 2709 +repeat 0 4 5.568345 0.000000 2798 +kristin 0 4 5.568345 0.000000 3089 +bennett 0 3 5.857933 0.000000 4024 +todetermin 0 3 5.857933 0.000000 3182 +similarli 0 3 5.857933 0.000000 3241 +backpropag 0 3 5.857933 0.000000 3507 +chronolog 0 3 5.857933 0.000000 4034 +paulb 0 3 5.857933 0.000000 4036 +multisurfac 0 2 6.263398 0.000000 6224 +linearli 0 2 6.263398 0.000000 6220 +euclidean 0 2 6.263398 0.000000 5198 +quadrat 0 2 6.263398 0.000000 4497 +oneset 0 2 6.263398 0.000000 6134 +cart 0 2 6.263398 0.000000 5874 +mino 0 2 6.263398 0.000000 6208 +midwest 0 2 6.263398 0.000000 6225 +discrimin 0 2 6.263398 0.000000 6140 +misclassifi 0 1 6.957497 0.000000 19438 +euclideanspac 0 1 6.957497 0.000000 19439 +programmingpattern 0 1 6.957497 0.000000 19440 +programmingthi 0 1 6.957497 0.000000 19441 +outlinemathemat 0 1 6.957497 0.000000 19442 +failon 0 1 6.957497 0.000000 19443 +discard 0 1 6.957497 0.000000 19444 +eachnod 0 1 6.957497 0.000000 19445 +thesam 0 1 6.957497 0.000000 19446 +astrain 0 1 6.957497 0.000000 19447 +traditionallearn 0 1 6.957497 0.000000 19448 +inthat 0 1 6.957497 0.000000 19449 +insepar 0 1 6.957497 0.000000 19450 +orsa 0 1 6.957497 0.000000 19451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..0570d718 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +dayton 0 119 2.079442 0.000000 104 +version 0 113 2.197225 0.000000 122 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +contain 0 81 2.484907 0.000000 174 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +west 0 83 2.484907 0.000000 192 +meet 0 72 2.639057 0.000000 229 +symposium 0 72 2.639057 0.000000 238 +copi 0 63 2.772589 0.000000 284 +explor 0 58 2.890372 0.000000 324 +tabl 0 51 2.995732 0.000000 346 +made 0 44 3.135494 0.000000 398 +describ 0 45 3.135494 0.000000 400 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +staff 0 36 3.367296 0.000000 490 +common 0 30 3.555348 0.000000 574 +releas 0 28 3.610918 0.000000 616 +symbol 0 27 3.637586 0.000000 620 +effort 0 26 3.688879 0.000000 652 +scalabl 0 24 3.761200 0.000000 705 +hypertext 0 19 4.007333 0.000000 865 +statu 0 18 4.060443 0.000000 885 +sigmetr 0 13 4.382027 0.000000 1173 +arpa 0 11 4.553877 0.000000 1369 +paradyn 1 9 4.753590 4.753590 1614 +routin 0 9 4.753590 0.000000 1549 +bart 0 9 4.753590 0.000000 1559 +poster 0 7 5.010635 0.000000 1814 +antonio 0 6 5.164786 0.000000 2186 +restaur 0 6 5.164786 0.000000 2230 +temporari 0 6 5.164786 0.000000 2090 +panel 0 5 5.347108 0.000000 2463 +elsewher 0 5 5.347108 0.000000 2444 +super 0 3 5.857933 0.000000 3918 +insan 0 3 5.857933 0.000000 4006 +parallellanguag 0 3 5.857933 0.000000 4026 +informationthi 0 2 6.263398 0.000000 5477 +ofreleas 0 2 6.263398 0.000000 4860 +newapproach 0 2 6.263398 0.000000 6047 +blizzard 0 2 6.263398 0.000000 6226 +projectdepart 0 2 6.263398 0.000000 6125 +edufax 0 2 6.263398 0.000000 5479 +csto 0 1 6.957497 0.000000 19452 +presentationthi 0 1 6.957497 0.000000 19453 +goalsth 0 1 6.957497 0.000000 19454 +manualsstatu 0 1 6.957497 0.000000 19455 +reporta 0 1 6.957497 0.000000 19456 +inflorida 0 1 6.957497 0.000000 19457 +tocompil 0 1 6.957497 0.000000 19458 +postera 0 1 6.957497 0.000000 19459 +spdt 0 1 6.957497 0.000000 19460 +toolsyou 0 1 6.957497 0.000000 19461 +placehold 0 1 6.957497 0.000000 19462 +informationparadyn 0 1 6.957497 0.000000 19463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..b9f0a434 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +send 0 114 2.197225 0.000000 109 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +numer 0 49 3.044522 0.000000 369 +math 0 44 3.135494 0.000000 402 +steven 0 17 4.110874 0.000000 953 +employ 0 12 4.465908 0.000000 1291 +depth 0 8 4.875197 0.000000 1636 +parker 1 1 6.957497 6.957497 19464 +prism 0 1 6.957497 0.000000 19465 +projectfal 0 1 6.957497 0.000000 19466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..b3229938 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +postscript 0 131 2.079442 0.000000 90 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +text 0 98 2.302585 0.000000 133 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +street 0 63 2.772589 0.000000 293 +abstract 0 62 2.772589 0.000000 276 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +advisor 0 51 2.995732 0.000000 355 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +featur 0 46 3.091042 0.000000 386 +netscap 0 44 3.135494 0.000000 395 +linear 0 41 3.218876 0.000000 431 +submit 0 39 3.258097 0.000000 440 +paul 0 38 3.295837 0.000000 471 +download 0 36 3.367296 0.000000 489 +print 0 34 3.401197 0.000000 503 +eduoffic 0 33 3.433987 0.000000 531 +neural 0 30 3.555348 0.000000 578 +cluster 0 28 3.610918 0.000000 612 +revis 0 26 3.688879 0.000000 640 +store 0 24 3.761200 0.000000 693 +viewer 0 21 3.912023 0.000000 787 +minim 0 18 4.060443 0.000000 887 +accept 0 18 4.060443 0.000000 879 +ascii 0 15 4.248495 0.000000 1032 +nonlinear 0 14 4.317488 0.000000 1107 +nick 0 13 4.382027 0.000000 1180 +nasa 0 13 4.382027 0.000000 1188 +induct 0 11 4.553877 0.000000 1304 +mangasarian 0 9 4.753590 0.000000 1570 +dead 0 7 5.010635 0.000000 1840 +fish 0 6 5.164786 0.000000 2207 +bradlei 0 5 5.347108 0.000000 2554 +shift 0 5 5.347108 0.000000 2357 +frog 0 5 5.347108 0.000000 2479 +concav 0 4 5.568345 0.000000 2808 +paulb 0 3 5.857933 0.000000 4036 +csphone 0 3 5.857933 0.000000 3394 +backcountri 0 3 5.857933 0.000000 3686 +espnet 0 2 6.263398 0.000000 5634 +bradleygradu 0 1 6.957497 0.000000 19467 +mangasarianinterestsmathemat 0 1 6.957497 0.000000 19468 +programmingmachin 0 1 6.957497 0.000000 19469 +learningfli 0 1 6.957497 0.000000 19470 +currentlyb 0 1 6.957497 0.000000 19471 +madisonmathemat 0 1 6.957497 0.000000 19472 +thiswork 0 1 6.957497 0.000000 19473 +olvimangasarian 0 1 6.957497 0.000000 19474 +publicationsal 0 1 6.957497 0.000000 19475 +picksthes 0 1 6.957497 0.000000 19476 +grate 0 1 6.957497 0.000000 19477 +timesfax 0 1 6.957497 0.000000 19478 +uroullett 0 1 6.957497 0.000000 19479 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..8d5a1d8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +develop 1 174 1.791759 1.791759 53 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +journal 0 83 2.484907 0.000000 183 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +info 0 85 2.484907 0.000000 176 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +decemb 0 80 2.564949 0.000000 215 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +tuesdai 0 73 2.639057 0.000000 219 +integr 0 67 2.708050 0.000000 245 +receiv 0 66 2.708050 0.000000 244 +prof 0 64 2.772589 0.000000 273 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +guid 0 63 2.772589 0.000000 267 +experi 0 64 2.772589 0.000000 283 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +think 0 57 2.890372 0.000000 314 +major 0 56 2.890372 0.000000 315 +cover 0 55 2.944439 0.000000 329 +basic 0 50 3.044522 0.000000 360 +cool 0 49 3.044522 0.000000 374 +standard 0 48 3.044522 0.000000 365 +join 0 39 3.258097 0.000000 457 +multipl 0 39 3.258097 0.000000 453 +mean 0 37 3.332205 0.000000 477 +articl 0 33 3.433987 0.000000 530 +photo 0 31 3.496508 0.000000 561 +great 0 27 3.637586 0.000000 626 +team 0 27 3.637586 0.000000 625 +rather 0 26 3.688879 0.000000 642 +although 0 25 3.737670 0.000000 667 +lab 0 24 3.761200 0.000000 698 +inth 0 22 3.850148 0.000000 741 +tell 0 21 3.912023 0.000000 777 +toolkit 0 20 3.951244 0.000000 835 +eric 0 19 4.007333 0.000000 870 +excel 0 19 4.007333 0.000000 868 +dimension 0 18 4.060443 0.000000 909 +anywai 0 15 4.248495 0.000000 1047 +webmast 0 15 4.248495 0.000000 1045 +biologi 0 15 4.248495 0.000000 1049 +everyth 0 13 4.382027 0.000000 1169 +rest 0 12 4.465908 0.000000 1259 +guest 0 12 4.465908 0.000000 1220 +peter 0 11 4.553877 0.000000 1316 +sens 0 11 4.553877 0.000000 1305 +eight 0 11 4.553877 0.000000 1331 +label 0 10 4.653960 0.000000 1423 +star 0 8 4.875197 0.000000 1717 +scout 0 7 5.010635 0.000000 1903 +fortun 0 7 5.010635 0.000000 1872 +molecular 0 7 5.010635 0.000000 1887 +isthat 0 4 5.568345 0.000000 2723 +biomed 0 4 5.568345 0.000000 2905 +specialist 0 3 5.857933 0.000000 3319 +microscop 0 3 5.857933 0.000000 4035 +pete 0 3 5.857933 0.000000 3865 +devri 0 2 6.263398 0.000000 6145 +foolish 0 2 6.263398 0.000000 6108 +hazen 0 2 6.263398 0.000000 6143 +nearli 0 2 6.263398 0.000000 5608 +magellan 0 2 6.263398 0.000000 5825 +isdescrib 0 2 6.263398 0.000000 5444 +molecularbiolog 0 1 6.957497 0.000000 19480 +embryo 0 1 6.957497 0.000000 19481 +westdayton 0 1 6.957497 0.000000 19482 +pdevri 0 1 6.957497 0.000000 19483 +andthen 0 1 6.957497 0.000000 19484 +iread 0 1 6.957497 0.000000 19485 +topai 0 1 6.957497 0.000000 19486 +alsoprovid 0 1 6.957497 0.000000 19487 +folksat 0 1 6.957497 0.000000 19488 +microscopi 0 1 6.957497 0.000000 19489 +seancarrol 0 1 6.957497 0.000000 19490 +confoc 0 1 6.957497 0.000000 19491 +lotof 0 1 6.957497 0.000000 19492 +johnwhit 0 1 6.957497 0.000000 19493 +imrstaff 0 1 6.957497 0.000000 19494 +augustnd 0 1 6.957497 0.000000 19495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..7e24bc85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +state 0 76 2.564949 0.000000 207 +dept 0 64 2.772589 0.000000 291 +visit 0 63 2.772589 0.000000 288 +street 0 63 2.772589 0.000000 293 +major 0 56 2.890372 0.000000 315 +variou 0 56 2.890372 0.000000 317 +tabl 0 51 2.995732 0.000000 346 +friend 0 48 3.044522 0.000000 376 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +origin 0 38 3.295837 0.000000 472 +industri 0 38 3.295837 0.000000 464 +either 0 35 3.401197 0.000000 506 +everi 0 34 3.401197 0.000000 519 +india 0 32 3.465736 0.000000 550 +though 0 27 3.637586 0.000000 622 +altern 0 26 3.688879 0.000000 641 +bookmark 0 26 3.688879 0.000000 639 +log 0 19 4.007333 0.000000 857 +north 0 19 4.007333 0.000000 873 +countri 0 15 4.248495 0.000000 1059 +galleri 0 13 4.382027 0.000000 1192 +stai 0 12 4.465908 0.000000 1215 +avenu 0 12 4.465908 0.000000 1277 +undergrad 0 9 4.753590 0.000000 1589 +kanpur 0 8 4.875197 0.000000 1744 +hack 0 7 5.010635 0.000000 1950 +gatewai 0 7 5.010635 0.000000 1942 +corner 0 7 5.010635 0.000000 1909 +rock 0 6 5.164786 0.000000 2164 +blue 0 6 5.164786 0.000000 2227 +chat 0 6 5.164786 0.000000 2128 +metal 0 4 5.568345 0.000000 3079 +randal 0 4 5.568345 0.000000 2776 +venkat 0 4 5.568345 0.000000 2702 +slave 0 3 5.857933 0.000000 3959 +kerala 0 3 5.857933 0.000000 3749 +assistantship 0 3 5.857933 0.000000 3660 +acad 0 3 5.857933 0.000000 3847 +icon 0 3 5.857933 0.000000 3362 +plakal 0 2 6.263398 0.000000 5568 +blah 0 2 6.263398 0.000000 5695 +geeki 0 2 6.263398 0.000000 5823 +iitk 0 2 6.263398 0.000000 6227 +snap 0 2 6.263398 0.000000 4962 +nerd 0 2 6.263398 0.000000 5231 +acknowledg 0 2 6.263398 0.000000 6062 +nifti 0 2 6.263398 0.000000 5504 +igor 0 2 6.263398 0.000000 6183 +ivanisev 0 2 6.263398 0.000000 6184 +calcutta 0 1 6.957497 0.000000 19496 +bosco 0 1 6.957497 0.000000 19497 +yumpe 0 1 6.957497 0.000000 19498 +manoj 0 1 6.957497 0.000000 19499 +universityofwisconsin 0 1 6.957497 0.000000 19500 +salesian 0 1 6.957497 0.000000 19501 +stare 0 1 6.957497 0.000000 19502 +barrel 0 1 6.957497 0.000000 19503 +nerdi 0 1 6.957497 0.000000 19504 +seealso 0 1 6.957497 0.000000 19505 +pinup 0 1 6.957497 0.000000 19506 +suresh 0 1 6.957497 0.000000 19507 +wisecrack 0 1 6.957497 0.000000 19508 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..abb6db5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 0 111 2.197225 0.000000 127 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +sinc 0 90 2.397895 0.000000 159 +build 0 85 2.484907 0.000000 184 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +want 0 79 2.564949 0.000000 199 +meet 0 72 2.639057 0.000000 229 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +music 0 42 3.218876 0.000000 436 +download 0 36 3.367296 0.000000 489 +random 0 34 3.401197 0.000000 511 +india 0 32 3.465736 0.000000 550 +storag 0 31 3.496508 0.000000 553 +packag 0 28 3.610918 0.000000 614 +bookmark 0 26 3.688879 0.000000 639 +jeff 0 25 3.737670 0.000000 673 +hierarchi 0 22 3.850148 0.000000 744 +spend 0 19 4.007333 0.000000 850 +estim 0 17 4.110874 0.000000 930 +dilbert 0 16 4.174387 0.000000 996 +princeton 0 15 4.248495 0.000000 1042 +econom 0 13 4.382027 0.000000 1184 +vldb 0 10 4.653960 0.000000 1470 +naughton 0 10 4.653960 0.000000 1450 +presenc 0 8 4.875197 0.000000 1671 +prasad 0 6 5.164786 0.000000 2126 +invest 0 6 5.164786 0.000000 2153 +aggreg 0 6 5.164786 0.000000 2219 +deshpand 0 5 5.347108 0.000000 2431 +multidimension 0 4 5.568345 0.000000 3091 +meanwhil 0 3 5.857933 0.000000 3129 +manageri 0 2 6.263398 0.000000 5135 +constuct 0 1 6.957497 0.000000 19509 +depar 0 1 6.957497 0.000000 19510 +multidimensionalaggreg 0 1 6.957497 0.000000 19511 +timex 0 1 6.957497 0.000000 19512 +comix 0 1 6.957497 0.000000 19513 +hakuna 0 1 6.957497 0.000000 19514 +matata 0 1 6.957497 0.000000 19515 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..8907934c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +databas 0 122 2.079442 0.000000 86 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +summari 0 73 2.639057 0.000000 237 +html 0 75 2.639057 0.000000 235 +street 0 63 2.772589 0.000000 293 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +advisor 0 51 2.995732 0.000000 355 +basic 0 50 3.044522 0.000000 360 +india 0 32 3.465736 0.000000 550 +yanni 0 8 4.875197 0.000000 1713 +ioannidi 0 8 4.875197 0.000000 1714 +asha 0 3 5.857933 0.000000 4037 +poosala 0 2 6.263398 0.000000 6228 +vishi 0 1 6.957497 0.000000 19516 +viswanath 0 1 6.957497 0.000000 19517 +reseach 0 1 6.957497 0.000000 19518 +voluntari 0 1 6.957497 0.000000 19519 +interestsuw 0 1 6.957497 0.000000 19520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..8c7c4c97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +mani 0 92 2.397895 0.000000 150 +proceed 0 93 2.397895 0.000000 152 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +order 0 69 2.708050 0.000000 249 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +evalu 0 64 2.772589 0.000000 266 +import 0 65 2.772589 0.000000 282 +complex 0 64 2.772589 0.000000 269 +plan 0 65 2.772589 0.000000 272 +collect 0 65 2.772589 0.000000 268 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +street 0 63 2.772589 0.000000 293 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +variou 0 56 2.890372 0.000000 317 +detail 0 57 2.890372 0.000000 321 +publish 0 57 2.890372 0.000000 326 +explor 0 58 2.890372 0.000000 324 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +case 0 51 2.995732 0.000000 351 +set 0 50 3.044522 0.000000 361 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +answer 0 45 3.135494 0.000000 391 +natur 0 44 3.135494 0.000000 406 +algebra 0 45 3.135494 0.000000 394 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +even 0 45 3.135494 0.000000 393 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +form 0 39 3.258097 0.000000 443 +join 0 39 3.258097 0.000000 457 +map 0 39 3.258097 0.000000 452 +probabl 0 40 3.258097 0.000000 455 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +mean 0 37 3.332205 0.000000 477 +cost 0 37 3.332205 0.000000 480 +multi 0 36 3.367296 0.000000 493 +next 0 34 3.401197 0.000000 517 +singl 0 34 3.401197 0.000000 510 +either 0 35 3.401197 0.000000 506 +michael 0 35 3.401197 0.000000 514 +queri 0 33 3.433987 0.000000 524 +express 0 32 3.465736 0.000000 540 +kind 0 32 3.465736 0.000000 541 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +posit 0 31 3.496508 0.000000 552 +storag 0 31 3.496508 0.000000 553 +scientist 0 31 3.496508 0.000000 560 +domain 0 30 3.555348 0.000000 564 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +propos 0 28 3.610918 0.000000 602 +weather 0 28 3.610918 0.000000 618 +ask 0 28 3.610918 0.000000 597 +scale 0 28 3.610918 0.000000 613 +except 0 28 3.610918 0.000000 607 +framework 0 28 3.610918 0.000000 606 +client 0 25 3.737670 0.000000 679 +valu 0 25 3.737670 0.000000 665 +wai 0 25 3.737670 0.000000 662 +strategi 0 25 3.737670 0.000000 682 +demonstr 0 24 3.761200 0.000000 694 +store 0 24 3.761200 0.000000 693 +daili 0 24 3.761200 0.000000 706 +sequenc 1 23 3.806662 3.806662 734 +input 0 23 3.806662 0.000000 727 +thread 0 23 3.806662 0.000000 722 +defin 0 22 3.850148 0.000000 746 +sequenti 0 22 3.850148 0.000000 745 +sort 0 22 3.850148 0.000000 738 +serv 0 22 3.850148 0.000000 758 +identifi 0 22 3.850148 0.000000 760 +disk 0 22 3.850148 0.000000 747 +deal 0 22 3.850148 0.000000 736 +instead 0 22 3.850148 0.000000 756 +similar 0 21 3.912023 0.000000 771 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +record 0 18 4.060443 0.000000 890 +statu 0 18 4.060443 0.000000 885 +event 0 18 4.060443 0.000000 896 +account 0 18 4.060443 0.000000 882 +expand 0 17 4.110874 0.000000 928 +medic 0 17 4.110874 0.000000 958 +monitor 0 17 4.110874 0.000000 941 +weekli 0 17 4.110874 0.000000 919 +estim 0 17 4.110874 0.000000 930 +ramakrishnan 0 16 4.174387 0.000000 972 +advantag 0 16 4.174387 0.000000 987 +easi 0 16 4.174387 0.000000 969 +livni 0 15 4.248495 0.000000 1053 +indic 0 15 4.248495 0.000000 1013 +stream 0 15 4.248495 0.000000 1015 +miron 0 14 4.317488 0.000000 1110 +manner 0 14 4.317488 0.000000 1074 +embed 0 14 4.317488 0.000000 1102 +convent 0 14 4.317488 0.000000 1072 +econom 0 13 4.382027 0.000000 1184 +social 0 13 4.382027 0.000000 1123 +opportun 0 13 4.382027 0.000000 1161 +composit 0 13 4.382027 0.000000 1150 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +raghu 0 12 4.465908 0.000000 1212 +scan 0 12 4.465908 0.000000 1243 +buffer 0 12 4.465908 0.000000 1211 +insid 0 12 4.465908 0.000000 1262 +amount 0 12 4.465908 0.000000 1208 +uniqu 0 12 4.465908 0.000000 1228 +shore 0 11 4.553877 0.000000 1377 +regard 0 11 4.553877 0.000000 1309 +motiv 0 11 4.553877 0.000000 1346 +instanc 0 11 4.553877 0.000000 1322 +devis 0 10 4.653960 0.000000 1451 +relationship 0 10 4.653960 0.000000 1383 +reli 0 10 4.653960 0.000000 1411 +subset 0 10 4.653960 0.000000 1425 +vldb 0 10 4.653960 0.000000 1470 +cheng 0 10 4.653960 0.000000 1381 +declar 0 9 4.753590 0.000000 1526 +tempor 0 9 4.753590 0.000000 1584 +strength 0 9 4.753590 0.000000 1494 +compos 0 9 4.753590 0.000000 1527 +vice 0 9 4.753590 0.000000 1604 +lock 0 9 4.753590 0.000000 1551 +respect 0 9 4.753590 0.000000 1545 +meta 0 9 4.753590 0.000000 1505 +intermedi 0 9 4.753590 0.000000 1497 +observ 0 9 4.753590 0.000000 1578 +mode 0 9 4.753590 0.000000 1492 +pose 0 9 4.753590 0.000000 1535 +seshadri 0 7 5.010635 0.000000 1803 +pageth 0 7 5.010635 0.000000 1939 +notion 0 7 5.010635 0.000000 1947 +merg 0 7 5.010635 0.000000 1862 +whenev 0 7 5.010635 0.000000 1883 +therefor 0 7 5.010635 0.000000 1822 +supportfor 0 7 5.010635 0.000000 1854 +praveen 0 6 5.164786 0.000000 1996 +nest 0 6 5.164786 0.000000 2151 +feasibl 0 6 5.164786 0.000000 2157 +financi 0 6 5.164786 0.000000 2197 +histor 0 6 5.164786 0.000000 2085 +consequ 0 6 5.164786 0.000000 1989 +temporari 0 6 5.164786 0.000000 2090 +greater 0 5 5.347108 0.000000 2258 +treat 0 5 5.347108 0.000000 2521 +correl 0 5 5.347108 0.000000 2279 +dual 0 5 5.347108 0.000000 2522 +distinct 0 5 5.347108 0.000000 2319 +overlap 0 5 5.347108 0.000000 2368 +complementari 0 5 5.347108 0.000000 2523 +educomput 0 5 5.347108 0.000000 2524 +zoom 0 4 5.568345 0.000000 2961 +phenomena 0 4 5.568345 0.000000 2962 +flavor 0 4 5.568345 0.000000 2625 +richter 0 4 5.568345 0.000000 2957 +collaps 0 3 5.857933 0.000000 3729 +inadequ 0 3 5.857933 0.000000 3730 +tediou 0 3 5.857933 0.000000 3731 +ineffici 0 3 5.857933 0.000000 3457 +megabyt 0 3 5.857933 0.000000 3732 +claus 0 3 5.857933 0.000000 3733 +offset 0 3 5.857933 0.000000 3467 +hourli 0 3 5.857933 0.000000 3734 +thathav 0 3 5.857933 0.000000 3735 +serverarchitectur 0 3 5.857933 0.000000 3736 +comad 0 3 5.857933 0.000000 3737 +informationfor 0 3 5.857933 0.000000 3738 +sequin 0 2 6.263398 0.000000 5250 +earthquak 0 2 6.263398 0.000000 5251 +volcano 0 2 6.263398 0.000000 5252 +meteorolog 0 2 6.263398 0.000000 5253 +aredescrib 0 2 6.263398 0.000000 5254 +objectivescurr 0 2 6.263398 0.000000 5255 +statusmotiv 0 2 6.263398 0.000000 5256 +exampleseq 0 2 6.263398 0.000000 5257 +languageoptim 0 2 6.263398 0.000000 5258 +techniquesseq 0 2 6.263398 0.000000 5259 +developmentpublicationsrel 0 2 6.263398 0.000000 5260 +workcontact 0 2 6.263398 0.000000 5261 +informationproject 0 2 6.263398 0.000000 5262 +processingof 0 2 6.263398 0.000000 5263 +theseappl 0 2 6.263398 0.000000 5264 +metereolog 0 2 6.263398 0.000000 5265 +andbiolog 0 2 6.263398 0.000000 5266 +semanticstak 0 2 6.263398 0.000000 5267 +evaluationintegr 0 2 6.263398 0.000000 5268 +canstor 0 2 6.263398 0.000000 5269 +sequencesthes 0 2 6.263398 0.000000 5270 +themost 0 2 6.263398 0.000000 5271 +statusth 0 2 6.263398 0.000000 5272 +algebraicqueri 0 2 6.263398 0.000000 5273 +analogousto 0 2 6.263398 0.000000 5274 +candeclar 0 2 6.263398 0.000000 5275 +likesql 0 2 6.263398 0.000000 5276 +versa 0 2 6.263398 0.000000 5277 +querya 0 2 6.263398 0.000000 5278 +occurr 0 2 6.263398 0.000000 5279 +erupt 0 2 6.263398 0.000000 5280 +didth 0 2 6.263398 0.000000 5281 +groupbi 0 2 6.263398 0.000000 5282 +subqueri 0 2 6.263398 0.000000 5283 +aggregatefunct 0 2 6.263398 0.000000 5284 +sequencesord 0 2 6.263398 0.000000 5285 +modelth 0 2 6.263398 0.000000 5286 +gist 0 2 6.263398 0.000000 5287 +ordereddomain 0 2 6.263398 0.000000 5288 +andposit 0 2 6.263398 0.000000 5289 +recordsmap 0 2 6.263398 0.000000 5290 +rise 0 2 6.263398 0.000000 5291 +relationaloper 0 2 6.263398 0.000000 5292 +andaggreg 0 2 6.263398 0.000000 5293 +researchersin 0 2 6.263398 0.000000 5294 +movingaggreg 0 2 6.263398 0.000000 5295 +worldsitu 0 2 6.263398 0.000000 5296 +extensionof 0 2 6.263398 0.000000 5297 +ofseq 0 2 6.263398 0.000000 5298 +languagew 0 2 6.263398 0.000000 5299 +usingwhich 0 2 6.263398 0.000000 5300 +languagei 0 2 6.263398 0.000000 5301 +queriesa 0 2 6.263398 0.000000 5302 +techniquesw 0 2 6.263398 0.000000 5303 +developmentth 0 2 6.263398 0.000000 5304 +viaa 0 2 6.263398 0.000000 5305 +ontop 0 2 6.263398 0.000000 5306 +languageswhich 0 2 6.263398 0.000000 5307 +arbitrarylevel 0 2 6.263398 0.000000 5308 +viceversa 0 2 6.263398 0.000000 5309 +detailson 0 2 6.263398 0.000000 5310 +publicationssequ 0 2 6.263398 0.000000 5311 +datapraveen 0 2 6.263398 0.000000 5312 +systempraveen 0 2 6.263398 0.000000 5313 +queriesraghu 0 2 6.263398 0.000000 5314 +workthedevis 0 2 6.263398 0.000000 5315 +visualizationenviron 0 2 6.263398 0.000000 5316 +servercontact 0 2 6.263398 0.000000 5317 +eduraghu 0 2 6.263398 0.000000 5318 +edumiron 0 2 6.263398 0.000000 5319 +seshadripraveen 0 2 6.263398 0.000000 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..9086464a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +hour 0 165 1.791759 0.000000 46 +madison 0 165 1.791759 0.000000 55 +person 0 111 2.197225 0.000000 117 +school 1 84 2.484907 2.484907 188 +resum 0 79 2.564949 0.000000 217 +bookmark 0 26 3.688879 0.000000 639 +andrew 1 19 4.007333 4.007333 849 +histori 0 19 4.007333 0.000000 853 +vista 1 10 4.653960 4.653960 1452 +alta 1 4 5.568345 5.568345 3039 +prock 1 2 6.263398 6.263398 4786 +clemen 0 1 6.957497 0.000000 19521 +hockert 0 1 6.957497 0.000000 19522 +prockoffic 0 1 6.957497 0.000000 19523 +doonesburi 0 1 6.957497 0.000000 19524 +trot 0 1 6.957497 0.000000 19525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..dbfb25ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +theori 1 111 2.197225 2.197225 127 +well 0 109 2.197225 0.000000 121 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +journal 0 83 2.484907 0.000000 183 +solut 0 82 2.484907 0.000000 162 +exampl 0 77 2.564949 0.000000 195 +complet 0 77 2.564949 0.000000 208 +state 0 76 2.564949 0.000000 207 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +symposium 0 72 2.639057 0.000000 238 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +result 0 65 2.772589 0.000000 281 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +sampl 0 53 2.944439 0.000000 339 +februari 0 54 2.944439 0.000000 328 +much 0 52 2.995732 0.000000 349 +telephon 0 50 3.044522 0.000000 373 +understand 0 47 3.091042 0.000000 384 +long 0 43 3.178054 0.000000 413 +combin 0 42 3.218876 0.000000 421 +annual 0 40 3.258097 0.000000 458 +error 0 40 3.258097 0.000000 449 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +random 0 34 3.401197 0.000000 511 +approxim 0 35 3.401197 0.000000 509 +posit 0 31 3.496508 0.000000 552 +hard 0 30 3.555348 0.000000 563 +graph 0 30 3.555348 0.000000 576 +turn 0 29 3.583519 0.000000 586 +synchron 0 29 3.583519 0.000000 588 +progress 0 28 3.610918 0.000000 598 +bound 0 26 3.688879 0.000000 659 +although 0 25 3.737670 0.000000 667 +strategi 0 25 3.737670 0.000000 682 +proof 0 23 3.806662 0.000000 720 +identifi 0 22 3.850148 0.000000 760 +prove 0 19 4.007333 0.000000 848 +easi 0 16 4.174387 0.000000 969 +novel 0 15 4.248495 0.000000 1039 +polynomi 0 14 4.317488 0.000000 1069 +finit 0 14 4.317488 0.000000 1106 +automata 0 13 4.382027 0.000000 1135 +minimum 0 9 4.753590 0.000000 1555 +span 0 8 4.875197 0.000000 1751 +prover 0 8 4.875197 0.000000 1653 +round 0 8 4.875197 0.000000 1769 +aris 0 7 5.010635 0.000000 1924 +eduto 0 7 5.010635 0.000000 1956 +ann 0 6 5.164786 0.000000 2065 +ladner 0 6 5.164786 0.000000 2062 +pub 0 6 5.164786 0.000000 2239 +proceedingsof 0 5 5.347108 0.000000 2331 +provabl 0 5 5.347108 0.000000 2558 +surprisingli 0 4 5.568345 0.000000 2609 +expens 0 4 5.568345 0.000000 2678 +condon 0 3 5.857933 0.000000 3309 +neg 0 3 5.857933 0.000000 3451 +theoryand 0 3 5.857933 0.000000 3350 +nondeterminist 0 3 5.857933 0.000000 3560 +wigderson 0 2 6.263398 0.000000 6035 +complexityclass 0 1 6.957497 0.000000 19526 +interactiveproof 0 1 6.957497 0.000000 19527 +nondetermin 0 1 6.957497 0.000000 19528 +suchmodel 0 1 6.957497 0.000000 19529 +proven 0 1 6.957497 0.000000 19530 +classicproblem 0 1 6.957497 0.000000 19531 +theoryof 0 1 6.957497 0.000000 19532 +computationalproblem 0 1 6.957497 0.000000 19533 +whichhard 0 1 6.957497 0.000000 19534 +recentresult 0 1 6.957497 0.000000 19535 +modelsof 0 1 6.957497 0.000000 19536 +approximabilityresult 0 1 6.957497 0.000000 19537 +developingboth 0 1 6.957497 0.000000 19538 +hardcombinatori 0 1 6.957497 0.000000 19539 +forsort 0 1 6.957497 0.000000 19540 +costscan 0 1 6.957497 0.000000 19541 +probabilisticst 0 1 6.957497 0.000000 19542 +hellerstein 0 1 6.957497 0.000000 19543 +pottl 0 1 6.957497 0.000000 19544 +pspace 0 1 6.957497 0.000000 19545 +caiand 0 1 6.957497 0.000000 19546 +lipton 0 1 6.957497 0.000000 19547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..cec9db6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +structur 0 106 2.197225 0.000000 105 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +techniqu 0 99 2.302585 0.000000 138 +question 0 91 2.397895 0.000000 141 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +larg 0 82 2.484907 0.000000 168 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +appli 0 71 2.639057 0.000000 226 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +degre 0 69 2.708050 0.000000 259 +complex 0 64 2.772589 0.000000 269 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +automat 0 61 2.833213 0.000000 306 +sampl 0 53 2.944439 0.000000 339 +local 0 55 2.944439 0.000000 334 +set 0 50 3.044522 0.000000 361 +telephon 0 50 3.044522 0.000000 373 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +error 0 40 3.258097 0.000000 449 +formal 0 37 3.332205 0.000000 478 +global 0 34 3.401197 0.000000 520 +obtain 0 33 3.433987 0.000000 534 +graph 0 30 3.555348 0.000000 576 +scale 0 28 3.610918 0.000000 613 +great 0 27 3.637586 0.000000 626 +effort 0 26 3.688879 0.000000 652 +detect 0 26 3.688879 0.000000 646 +concern 0 25 3.737670 0.000000 666 +known 0 24 3.761200 0.000000 702 +sequenc 0 23 3.806662 0.000000 734 +proof 0 23 3.806662 0.000000 720 +properti 0 22 3.850148 0.000000 749 +geometri 0 22 3.850148 0.000000 752 +deal 0 22 3.850148 0.000000 736 +inth 0 22 3.850148 0.000000 741 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +similar 0 21 3.912023 0.000000 771 +fact 0 21 3.912023 0.000000 780 +util 0 21 3.912023 0.000000 774 +whether 0 17 4.110874 0.000000 918 +young 0 16 4.174387 0.000000 991 +spars 0 16 4.174387 0.000000 989 +fourth 0 16 4.174387 0.000000 999 +biologi 0 15 4.248495 0.000000 1049 +decid 0 14 4.317488 0.000000 1075 +polynomi 0 14 4.317488 0.000000 1069 +anonym 0 14 4.317488 0.000000 1100 +incomput 0 14 4.317488 0.000000 1096 +necessari 0 13 4.382027 0.000000 1147 +primarili 0 13 4.382027 0.000000 1185 +discret 0 13 4.382027 0.000000 1165 +assembl 0 12 4.465908 0.000000 1207 +weight 0 12 4.465908 0.000000 1204 +purdu 0 10 4.653960 0.000000 1466 +rapid 0 10 4.653960 0.000000 1453 +eduto 0 7 5.010635 0.000000 1956 +biolog 0 6 5.164786 0.000000 2147 +determinist 0 6 5.164786 0.000000 2034 +pub 0 6 5.164786 0.000000 2239 +joseph 0 5 5.347108 0.000000 2327 +twenti 0 5 5.347108 0.000000 2540 +despit 0 5 5.347108 0.000000 2317 +tiwari 0 5 5.347108 0.000000 2385 +gone 0 4 5.568345 0.000000 3072 +resolv 0 4 5.568345 0.000000 2675 +algorithmsfor 0 4 5.568345 0.000000 2748 +genom 0 3 5.857933 0.000000 3546 +collaps 0 3 5.857933 0.000000 3729 +ninth 0 3 5.857933 0.000000 3616 +soar 0 3 5.857933 0.000000 3506 +adequaci 0 2 6.263398 0.000000 6229 +fragment 0 2 6.263398 0.000000 6000 +homolog 0 2 6.263398 0.000000 5441 +analysisof 0 2 6.263398 0.000000 4277 +deborah 0 1 6.957497 0.000000 19548 +studyingth 0 1 6.957497 0.000000 19549 +andnondeterminist 0 1 6.957497 0.000000 19550 +stillknow 0 1 6.957497 0.000000 19551 +computerscientist 0 1 6.957497 0.000000 19552 +techniquesfor 0 1 6.957497 0.000000 19553 +investigatesth 0 1 6.957497 0.000000 19554 +exploresin 0 1 6.957497 0.000000 19555 +resolveproblem 0 1 6.957497 0.000000 19556 +theseinclud 0 1 6.957497 0.000000 19557 +handlingrepetit 0 1 6.957497 0.000000 19558 +graphtheoret 0 1 6.957497 0.000000 19559 +subexponenti 0 1 6.957497 0.000000 19560 +pruim 0 1 6.957497 0.000000 19561 +theoryconfer 0 1 6.957497 0.000000 19562 +spanner 0 1 6.957497 0.000000 19563 +althof 0 1 6.957497 0.000000 19564 +dobkin 0 1 6.957497 0.000000 19565 +meidanisand 0 1 6.957497 0.000000 19566 +scandinavianworkshop 0 1 6.957497 0.000000 19567 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..2659f94f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +proceed 0 93 2.397895 0.000000 152 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +octob 0 89 2.397895 0.000000 156 +resourc 0 81 2.484907 0.000000 172 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +simul 1 66 2.708050 2.708050 255 +differ 0 66 2.708050 0.000000 253 +polici 0 64 2.772589 0.000000 279 +laboratori 0 63 2.772589 0.000000 292 +foundat 0 62 2.772589 0.000000 286 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +sampl 0 53 2.944439 0.000000 339 +telephon 0 50 3.044522 0.000000 373 +visual 0 48 3.044522 0.000000 372 +join 0 39 3.258097 0.000000 457 +error 0 40 3.258097 0.000000 449 +purpos 0 37 3.332205 0.000000 481 +queri 0 33 3.433987 0.000000 524 +consid 0 29 3.583519 0.000000 590 +synchron 0 29 3.583519 0.000000 588 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +disk 0 22 3.850148 0.000000 747 +emphasi 0 22 3.850148 0.000000 755 +properti 0 22 3.850148 0.000000 749 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +event 0 18 4.060443 0.000000 896 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 0 15 4.248495 0.000000 1053 +novel 0 15 4.248495 0.000000 1039 +miron 0 14 4.317488 0.000000 1110 +discret 0 13 4.382027 0.000000 1165 +sigmetr 0 13 4.382027 0.000000 1173 +emploi 0 12 4.465908 0.000000 1284 +israel 0 11 4.553877 0.000000 1366 +ioannidi 0 8 4.875197 0.000000 1714 +eduto 0 7 5.010635 0.000000 1956 +schema 0 6 5.164786 0.000000 1988 +pub 0 6 5.164786 0.000000 2239 +tape 0 4 5.568345 0.000000 2959 +weizmann 0 4 5.568345 0.000000 2858 +evaluationof 0 3 5.857933 0.000000 3192 +interplai 0 3 5.857933 0.000000 3726 +myllymaki 0 3 5.857933 0.000000 4022 +metaphor 0 3 5.857933 0.000000 4038 +rehovot 0 2 6.263398 0.000000 4891 +developmentof 0 2 6.263398 0.000000 6041 +managementsystem 0 2 6.263398 0.000000 4365 +researchinvolv 0 2 6.263398 0.000000 5556 +asreal 0 1 6.957497 0.000000 19568 +specialemphasi 0 1 6.957497 0.000000 19569 +systemand 0 1 6.957497 0.000000 19570 +performancestudi 0 1 6.957497 0.000000 19571 +modelingand 0 1 6.957497 0.000000 19572 +implementinga 0 1 6.957497 0.000000 19573 +visualizationtool 0 1 6.957497 0.000000 19574 +sashadri 0 1 6.957497 0.000000 19575 +haberand 0 1 6.957497 0.000000 19576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..4226ed55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +part 0 98 2.302585 0.000000 129 +real 0 93 2.397895 0.000000 144 +octob 0 89 2.397895 0.000000 156 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +method 1 80 2.564949 2.564949 213 +appear 0 78 2.564949 0.000000 210 +summari 0 73 2.639057 0.000000 237 +order 0 69 2.708050 0.000000 249 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +sever 0 56 2.890372 0.000000 322 +sampl 0 53 2.944439 0.000000 339 +numer 0 49 3.044522 0.000000 369 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +effect 0 46 3.091042 0.000000 385 +york 0 41 3.218876 0.000000 435 +small 0 39 3.258097 0.000000 447 +error 0 40 3.258097 0.000000 449 +hand 0 37 3.332205 0.000000 475 +procedur 0 36 3.367296 0.000000 488 +concept 0 32 3.465736 0.000000 537 +posit 0 31 3.496508 0.000000 552 +valu 0 25 3.737670 0.000000 665 +equat 0 23 3.806662 0.000000 724 +emphasi 0 22 3.850148 0.000000 755 +siam 0 21 3.912023 0.000000 800 +thu 0 21 3.912023 0.000000 773 +definit 0 19 4.007333 0.000000 864 +partial 0 18 4.060443 0.000000 900 +differenti 0 17 4.110874 0.000000 921 +attempt 0 17 4.110874 0.000000 917 +estim 0 17 4.110874 0.000000 930 +condit 0 16 4.174387 0.000000 975 +spars 0 16 4.174387 0.000000 989 +discret 0 13 4.382027 0.000000 1165 +boundari 0 7 5.010635 0.000000 1929 +eduto 0 7 5.010635 0.000000 1956 +mix 0 6 5.164786 0.000000 2200 +pub 0 6 5.164786 0.000000 2239 +pivot 0 5 5.347108 0.000000 2426 +symmetr 0 4 5.568345 0.000000 2908 +ellipt 0 3 5.857933 0.000000 3774 +singular 0 3 5.857933 0.000000 3366 +preserv 0 3 5.857933 0.000000 3628 +thesystem 0 3 5.857933 0.000000 3881 +encount 0 3 5.857933 0.000000 3128 +attack 0 3 5.857933 0.000000 3168 +parter 0 2 6.263398 0.000000 4075 +solutionof 0 2 6.263398 0.000000 5056 +linearalgebra 0 2 6.263398 0.000000 4833 +scientificcomput 0 2 6.263398 0.000000 4145 +precondit 0 1 6.957497 0.000000 19577 +seymour 0 1 6.957497 0.000000 19578 +indefinit 0 1 6.957497 0.000000 19579 +classicalit 0 1 6.957497 0.000000 19580 +multigrid 0 1 6.957497 0.000000 19581 +effectivelywhen 0 1 6.957497 0.000000 19582 +bemad 0 1 6.957497 0.000000 19583 +operatori 0 1 6.957497 0.000000 19584 +casedirect 0 1 6.957497 0.000000 19585 +challengingproblem 0 1 6.957497 0.000000 19586 +nowinvolv 0 1 6.957497 0.000000 19587 +specialmultigrid 0 1 6.957497 0.000000 19588 +chebyshev 0 1 6.957497 0.000000 19589 +collact 0 1 6.957497 0.000000 19590 +ellipticparti 0 1 6.957497 0.000000 19591 +journalon 0 1 6.957497 0.000000 19592 +numbersand 0 1 6.957497 0.000000 19593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..315c771d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +parallel 1 169 1.791759 1.791759 60 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +techniqu 0 99 2.302585 0.000000 138 +proceed 0 93 2.397895 0.000000 152 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +ieee 0 86 2.484907 0.000000 190 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +server 0 76 2.564949 0.000000 204 +complet 0 77 2.564949 0.000000 208 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +effici 0 73 2.639057 0.000000 233 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +polici 0 64 2.772589 0.000000 279 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +processor 0 54 2.944439 0.000000 335 +sampl 0 53 2.944439 0.000000 339 +telephon 0 50 3.044522 0.000000 373 +california 0 46 3.091042 0.000000 388 +featur 0 46 3.091042 0.000000 386 +join 0 39 3.258097 0.000000 457 +error 0 40 3.258097 0.000000 449 +industri 0 38 3.295837 0.000000 464 +workstat 0 37 3.332205 0.000000 479 +approxim 0 35 3.401197 0.000000 509 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +repres 0 26 3.688879 0.000000 656 +valu 0 25 3.737670 0.000000 665 +accur 0 25 3.737670 0.000000 680 +togeth 0 23 3.806662 0.000000 714 +emphasi 0 22 3.850148 0.000000 755 +alloc 0 20 3.951244 0.000000 821 +precis 0 15 4.248495 0.000000 1023 +hybrid 0 15 4.248495 0.000000 1057 +sigmetr 0 13 4.382027 0.000000 1173 +mari 0 12 4.465908 0.000000 1266 +workload 0 12 4.465908 0.000000 1210 +characterist 0 12 4.465908 0.000000 1257 +operatingsystem 0 10 4.653960 0.000000 1401 +custom 0 10 4.653960 0.000000 1414 +vernon 0 9 4.753590 0.000000 1556 +angel 0 8 4.875197 0.000000 1779 +character 0 8 4.875197 0.000000 1767 +reus 0 8 4.875197 0.000000 1661 +hash 0 8 4.875197 0.000000 1618 +carei 0 8 4.875197 0.000000 1781 +analyt 0 7 5.010635 0.000000 1913 +prioriti 0 7 5.010635 0.000000 1792 +interpol 0 7 5.010635 0.000000 1823 +chiang 0 7 5.010635 0.000000 1853 +eduto 0 7 5.010635 0.000000 1956 +pub 0 6 5.164786 0.000000 2239 +yield 0 5 5.347108 0.000000 2458 +fair 0 5 5.347108 0.000000 2333 +infocom 0 3 5.857933 0.000000 3283 +paralleland 0 2 6.263398 0.000000 5805 +petri 0 2 6.263398 0.000000 4414 +intuit 0 2 6.263398 0.000000 4921 +performanceanalysi 0 2 6.263398 0.000000 5629 +schedulingpolici 0 2 6.263398 0.000000 5879 +memorymanag 0 2 6.263398 0.000000 4158 +preemption 0 2 6.263398 0.000000 6230 +mansharamani 0 2 6.263398 0.000000 6231 +applicationto 0 1 6.957497 0.000000 19594 +techniquesi 0 1 6.957497 0.000000 19595 +colleaguesinclud 0 1 6.957497 0.000000 19596 +customizedmean 0 1 6.957497 0.000000 19597 +gtpn 0 1 6.957497 0.000000 19598 +systemfeatur 0 1 6.957497 0.000000 19599 +equationsthat 0 1 6.957497 0.000000 19600 +butcan 0 1 6.957497 0.000000 19601 +proposedth 0 1 6.957497 0.000000 19602 +approximationsfor 0 1 6.957497 0.000000 19603 +techniquemai 0 1 6.957497 0.000000 19604 +broader 0 1 6.957497 0.000000 19605 +performanceparallel 0 1 6.957497 0.000000 19606 +dqdb 0 1 6.957497 0.000000 19607 +slot 0 1 6.957497 0.000000 19608 +brewster 0 1 6.957497 0.000000 19609 +pateland 0 1 6.957497 0.000000 19610 +forrun 0 1 6.957497 0.000000 19611 +with 0 1 6.957497 0.000000 19612 +sigmetricsconfer 0 1 6.957497 0.000000 19613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..df0ebc8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +welcom 0 122 2.079442 0.000000 99 +wang 1 21 3.912023 3.912023 790 +edulast 0 17 4.110874 0.000000 927 +qinqin 1 1 6.957497 6.957497 19614 +pageqw 0 1 6.957497 0.000000 19615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..a014e373 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +data 1 170 1.791759 1.791759 49 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +wisconsin 0 169 1.791759 0.000000 54 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +databas 0 122 2.079442 0.000000 86 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +teach 0 108 2.197225 0.000000 112 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +take 0 97 2.302585 0.000000 134 +imag 0 91 2.397895 0.000000 161 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +educ 0 86 2.484907 0.000000 191 +activ 0 84 2.484907 0.000000 182 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +school 0 84 2.484907 0.000000 188 +sourc 0 77 2.564949 0.000000 201 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +logic 0 71 2.639057 0.000000 230 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +august 0 66 2.708050 0.000000 257 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +prof 0 64 2.772589 0.000000 273 +evalu 0 64 2.772589 0.000000 266 +street 0 63 2.772589 0.000000 293 +result 0 65 2.772589 0.000000 281 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +collect 0 65 2.772589 0.000000 268 +content 0 59 2.833213 0.000000 302 +sever 0 56 2.890372 0.000000 322 +explor 0 58 2.890372 0.000000 324 +publish 0 57 2.890372 0.000000 326 +index 0 56 2.890372 0.000000 309 +cover 0 55 2.944439 0.000000 329 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +set 0 50 3.044522 0.000000 361 +visual 0 48 3.044522 0.000000 372 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +sciencesunivers 0 37 3.332205 0.000000 486 +formal 0 37 3.332205 0.000000 478 +next 0 34 3.401197 0.000000 517 +tech 0 35 3.401197 0.000000 515 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +independ 0 32 3.465736 0.000000 548 +express 0 32 3.465736 0.000000 540 +given 0 32 3.465736 0.000000 538 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +specifi 0 30 3.555348 0.000000 568 +rang 0 30 3.555348 0.000000 565 +focus 0 29 3.583519 0.000000 584 +cluster 0 28 3.610918 0.000000 612 +retriev 0 27 3.637586 0.000000 621 +constraint 0 26 3.688879 0.000000 636 +mine 0 26 3.688879 0.000000 654 +hill 0 25 3.737670 0.000000 670 +aspect 0 25 3.737670 0.000000 663 +lab 0 24 3.761200 0.000000 698 +pattern 0 24 3.761200 0.000000 689 +sequenc 0 23 3.806662 0.000000 734 +deal 0 22 3.850148 0.000000 736 +indian 0 22 3.850148 0.000000 769 +identifi 0 22 3.850148 0.000000 760 +toolkit 0 20 3.951244 0.000000 835 +definit 0 19 4.007333 0.000000 864 +stand 0 18 4.060443 0.000000 891 +ramakrishnan 0 16 4.174387 0.000000 972 +advantag 0 16 4.174387 0.000000 987 +upon 0 16 4.174387 0.000000 978 +livni 0 15 4.248495 0.000000 1053 +transit 0 15 4.248495 0.000000 1046 +heterogen 0 14 4.317488 0.000000 1090 +easili 0 14 4.317488 0.000000 1077 +joint 0 13 4.382027 0.000000 1130 +dbm 0 13 4.382027 0.000000 1136 +recurs 0 13 4.382027 0.000000 1127 +employ 0 12 4.465908 0.000000 1291 +raghu 0 12 4.465908 0.000000 1212 +grow 0 12 4.465908 0.000000 1209 +deduct 0 12 4.465908 0.000000 1236 +broad 0 11 4.553877 0.000000 1302 +usaphon 0 9 4.753590 0.000000 1600 +madra 0 8 4.875197 0.000000 1770 +ioannidi 0 8 4.875197 0.000000 1714 +closur 0 8 4.875197 0.000000 1643 +bottom 0 7 5.010635 0.000000 1906 +dataset 0 7 5.010635 0.000000 1914 +seshadri 0 7 5.010635 0.000000 1803 +bell 0 6 5.164786 0.000000 2224 +ongo 0 6 5.164786 0.000000 2215 +praveen 0 6 5.164786 0.000000 1996 +coral 0 5 5.347108 0.000000 2538 +mcgraw 0 5 5.347108 0.000000 2262 +minibas 0 4 5.568345 0.000000 2608 +exploratori 0 4 5.568345 0.000000 3073 +ofinform 0 4 5.568345 0.000000 2707 +successor 0 3 5.857933 0.000000 3576 +sudarshan 0 3 5.857933 0.000000 3885 +murrai 0 2 6.263398 0.000000 5647 +aimedat 0 2 6.263398 0.000000 6117 +srivastava 0 2 6.263398 0.000000 5395 +minibaseand 0 1 6.957497 0.000000 19616 +coralth 0 1 6.957497 0.000000 19617 +undergraduateand 0 1 6.957497 0.000000 19618 +inconjunct 0 1 6.957497 0.000000 19619 +coursesthat 0 1 6.957497 0.000000 19620 +deductiona 0 1 6.957497 0.000000 19621 +diversifi 0 1 6.957497 0.000000 19622 +increasinglyimport 0 1 6.957497 0.000000 19623 +dispers 0 1 6.957497 0.000000 19624 +rodin 0 1 6.957497 0.000000 19625 +severalissu 0 1 6.957497 0.000000 19626 +forsemant 0 1 6.957497 0.000000 19627 +serviceand 0 1 6.957497 0.000000 19628 +networkedclust 0 1 6.957497 0.000000 19629 +explorationfrom 0 1 6.957497 0.000000 19630 +assequ 0 1 6.957497 0.000000 19631 +seqsystem 0 1 6.957497 0.000000 19632 +optimizationissu 0 1 6.957497 0.000000 19633 +identifyingtrend 0 1 6.957497 0.000000 19634 +fromlarg 0 1 6.957497 0.000000 19635 +implementingan 0 1 6.957497 0.000000 19636 +customizea 0 1 6.957497 0.000000 19637 +specializedinform 0 1 6.957497 0.000000 19638 +indexedand 0 1 6.957497 0.000000 19639 +andmin 0 1 6.957497 0.000000 19640 +birchfor 0 1 6.957497 0.000000 19641 +devisea 0 1 6.957497 0.000000 19642 +databasequeri 0 1 6.957497 0.000000 19643 +featuressuch 0 1 6.957497 0.000000 19644 +ofarithmet 0 1 6.957497 0.000000 19645 +morecompactli 0 1 6.957497 0.000000 19646 +coraldeduct 0 1 6.957497 0.000000 19647 +fixpointevalu 0 1 6.957497 0.000000 19648 +efficientacross 0 1 6.957497 0.000000 19649 +sudarsha 0 1 6.957497 0.000000 19650 +divesh 0 1 6.957497 0.000000 19651 +managementfirst 0 1 6.957497 0.000000 19652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..9d747053 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +madison 1 165 1.791759 1.791759 55 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +call 0 91 2.397895 0.000000 153 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +learn 0 86 2.484907 0.000000 170 +know 0 80 2.564949 0.000000 198 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +good 0 77 2.564949 0.000000 200 +come 0 78 2.564949 0.000000 202 +degre 0 69 2.708050 0.000000 259 +collect 0 65 2.772589 0.000000 268 +plai 0 60 2.833213 0.000000 307 +semest 0 58 2.890372 0.000000 312 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +much 0 52 2.995732 0.000000 349 +california 0 46 3.091042 0.000000 388 +get 0 46 3.091042 0.000000 380 +could 0 46 3.091042 0.000000 383 +natur 0 44 3.135494 0.000000 406 +long 0 43 3.178054 0.000000 413 +show 0 43 3.178054 0.000000 417 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +littl 0 39 3.258097 0.000000 454 +small 0 39 3.258097 0.000000 447 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +form 0 39 3.258097 0.000000 443 +winter 0 36 3.367296 0.000000 500 +go 0 33 3.433987 0.000000 529 +india 0 32 3.465736 0.000000 550 +kind 0 32 3.465736 0.000000 541 +travel 0 30 3.555348 0.000000 579 +hope 0 28 3.610918 0.000000 610 +great 0 27 3.637586 0.000000 626 +though 0 27 3.637586 0.000000 622 +enjoi 0 26 3.688879 0.000000 660 +sport 0 25 3.737670 0.000000 683 +concern 0 25 3.737670 0.000000 666 +magazin 0 24 3.761200 0.000000 704 +wish 0 24 3.761200 0.000000 692 +famili 0 23 3.806662 0.000000 735 +almost 0 22 3.850148 0.000000 742 +try 0 22 3.850148 0.000000 764 +love 0 21 3.912023 0.000000 804 +born 0 21 3.912023 0.000000 798 +watch 0 21 3.912023 0.000000 789 +nice 0 20 3.951244 0.000000 809 +tenni 0 20 3.951244 0.000000 838 +citi 0 19 4.007333 0.000000 874 +offici 0 18 4.060443 0.000000 894 +figur 0 18 4.060443 0.000000 903 +bachelor 0 17 4.110874 0.000000 957 +normal 0 16 4.174387 0.000000 995 +jose 0 16 4.174387 0.000000 976 +enough 0 15 4.248495 0.000000 1040 +rate 0 15 4.248495 0.000000 1037 +anywai 0 15 4.248495 0.000000 1047 +novel 0 15 4.248495 0.000000 1039 +came 0 13 4.382027 0.000000 1197 +cannot 0 13 4.382027 0.000000 1144 +sai 0 13 4.382027 0.000000 1175 +philosophi 0 13 4.382027 0.000000 1167 +stai 0 12 4.465908 0.000000 1215 +employ 0 12 4.465908 0.000000 1291 +walk 0 12 4.465908 0.000000 1281 +rest 0 12 4.465908 0.000000 1259 +surf 0 11 4.553877 0.000000 1301 +town 0 10 4.653960 0.000000 1458 +guess 0 10 4.653960 0.000000 1443 +sister 0 9 4.753590 0.000000 1524 +prefer 0 9 4.753590 0.000000 1491 +swim 0 9 4.753590 0.000000 1599 +kanpur 0 8 4.875197 0.000000 1744 +star 0 8 4.875197 0.000000 1717 +bridg 0 8 4.875197 0.000000 1764 +job 0 8 4.875197 0.000000 1702 +fortun 0 7 5.010635 0.000000 1872 +monei 0 7 5.010635 0.000000 1934 +cricket 0 7 5.010635 0.000000 1945 +slightli 0 7 5.010635 0.000000 1795 +parent 0 6 5.164786 0.000000 2204 +whatev 0 6 5.164786 0.000000 2097 +hike 0 6 5.164786 0.000000 2234 +televis 0 6 5.164786 0.000000 2118 +almaden 0 5 5.347108 0.000000 2511 +cyber 0 4 5.568345 0.000000 2909 +gone 0 4 5.568345 0.000000 3072 +compris 0 4 5.568345 0.000000 2862 +shouldn 0 4 5.568345 0.000000 2606 +suppos 0 4 5.568345 0.000000 3002 +skate 0 4 5.568345 0.000000 3046 +rahul 0 3 5.857933 0.000000 3464 +indianinstitut 0 3 5.857933 0.000000 4003 +romanc 0 3 5.857933 0.000000 3632 +trek 0 3 5.857933 0.000000 4025 +win 0 3 5.857933 0.000000 3593 +comedi 0 2 6.263398 0.000000 5822 +surfer 0 2 6.263398 0.000000 4982 +centr 0 2 6.263398 0.000000 4222 +northern 0 2 6.263398 0.000000 5861 +lover 0 2 6.263398 0.000000 6192 +paid 0 2 6.263398 0.000000 6081 +livabl 0 1 6.957497 0.000000 19653 +kapoorhello 0 1 6.957497 0.000000 19654 +schedulemydepartmentmyuniversityiitkanpuriitkclass 0 1 6.957497 0.000000 19655 +relatedlink 0 1 6.957497 0.000000 19656 +menow 0 1 6.957497 0.000000 19657 +andrais 0 1 6.957497 0.000000 19658 +elder 0 1 6.957497 0.000000 19659 +moneymagazin 0 1 6.957497 0.000000 19660 +editormust 0 1 6.957497 0.000000 19661 +greenland 0 1 6.957497 0.000000 19662 +complain 0 1 6.957497 0.000000 19663 +isawesom 0 1 6.957497 0.000000 19664 +regret 0 1 6.957497 0.000000 19665 +genr 0 1 6.957497 0.000000 19666 +gymnast 0 1 6.957497 0.000000 19667 +cloudi 0 1 6.957497 0.000000 19668 +breezi 0 1 6.957497 0.000000 19669 +youget 0 1 6.957497 0.000000 19670 +musicstuffmovi 0 1 6.957497 0.000000 19671 +televisioninternettravelotherbookmark 0 1 6.957497 0.000000 19672 +meget 0 1 6.957497 0.000000 19673 +guestbookrahul 0 1 6.957497 0.000000 19674 +eduh 0 1 6.957497 0.000000 19675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..3ac8e16c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +mathemat 0 108 2.197225 0.000000 123 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +thing 0 84 2.484907 0.000000 189 +integr 0 67 2.708050 0.000000 245 +street 0 63 2.772589 0.000000 293 +evalu 0 64 2.772589 0.000000 266 +major 0 56 2.890372 0.000000 315 +telephon 0 50 3.044522 0.000000 373 +music 0 42 3.218876 0.000000 436 +error 0 40 3.258097 0.000000 449 +vita 0 38 3.295837 0.000000 473 +committe 0 34 3.401197 0.000000 522 +curriculum 0 33 3.433987 0.000000 535 +team 0 27 3.637586 0.000000 625 +bookmark 0 26 3.688879 0.000000 639 +subject 0 26 3.688879 0.000000 647 +livni 0 15 4.248495 0.000000 1053 +minor 0 12 4.465908 0.000000 1237 +pascal 0 12 4.465908 0.000000 1213 +earth 0 10 4.653960 0.000000 1463 +chao 0 8 4.875197 0.000000 1753 +judg 0 8 4.875197 0.000000 1644 +uncertainti 0 7 5.010635 0.000000 1882 +truth 0 6 5.164786 0.000000 2179 +ohio 0 5 5.347108 0.000000 2447 +condor 0 5 5.347108 0.000000 2577 +raman 1 4 5.568345 5.568345 2827 +rajesh 0 3 5.857933 0.000000 3511 +off 0 3 5.857933 0.000000 3170 +wesleyan 0 3 5.857933 0.000000 3988 +saluja 0 3 5.857933 0.000000 3104 +novelti 0 2 6.263398 0.000000 5765 +monster 0 2 6.263398 0.000000 6207 +prodigi 0 2 6.263398 0.000000 5670 +old 0 1 6.957497 0.000000 19676 +homm 0 1 6.957497 0.000000 19677 +winsonsin 0 1 6.957497 0.000000 19678 +chimera 0 1 6.957497 0.000000 19679 +contradict 0 1 6.957497 0.000000 19680 +feebleworm 0 1 6.957497 0.000000 19681 +depositari 0 1 6.957497 0.000000 19682 +cloaca 0 1 6.957497 0.000000 19683 +theglori 0 1 6.957497 0.000000 19684 +shame 0 1 6.957497 0.000000 19685 +blais 0 1 6.957497 0.000000 19686 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..0a6de1bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +address 0 170 1.791759 0.000000 62 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +databas 1 122 2.079442 2.079442 86 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +comment 0 93 2.397895 0.000000 146 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +improv 0 62 2.772589 0.000000 289 +think 0 57 2.890372 0.000000 314 +electron 0 47 3.091042 0.000000 379 +might 0 41 3.218876 0.000000 426 +eduoffic 0 33 3.433987 0.000000 531 +altern 0 26 3.688879 0.000000 641 +scalabl 0 24 3.761200 0.000000 705 +hierarchi 0 22 3.850148 0.000000 744 +tell 0 21 3.912023 0.000000 777 +mostli 0 19 4.007333 0.000000 869 +estim 0 17 4.110874 0.000000 930 +dewitt 0 12 4.465908 0.000000 1270 +naughton 1 10 4.653960 4.653960 1450 +jeffrei 1 9 4.753590 4.753590 1612 +wall 0 9 4.753590 0.000000 1553 +paradis 0 8 4.875197 0.000000 1782 +presenc 0 8 4.875197 0.000000 1671 +bombai 0 7 5.010635 0.000000 1972 +aggreg 0 6 5.164786 0.000000 2219 +prasad 0 6 5.164786 0.000000 2126 +deshpand 0 5 5.347108 0.000000 2431 +multidimension 0 4 5.568345 0.000000 3091 +amit 0 4 5.568345 0.000000 2972 +ramasami 0 4 5.568345 0.000000 3088 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +mumbai 0 3 5.857933 0.000000 4029 +karthik 0 1 6.957497 0.000000 19687 +pagekarthikeyan 0 1 6.957497 0.000000 19688 +ramasamyabouti 0 1 6.957497 0.000000 19689 +projectshack 0 1 6.957497 0.000000 19690 +connectivityparadis 0 1 6.957497 0.000000 19691 +pthread 0 1 6.957497 0.000000 19692 +wrapperspublicationsstorag 0 1 6.957497 0.000000 19693 +presentationsweb 0 1 6.957497 0.000000 19694 +picturearchitectur 0 1 6.957497 0.000000 19695 +serversphoto 0 1 6.957497 0.000000 19696 +albumencount 0 1 6.957497 0.000000 19697 +leafperson 0 1 6.957497 0.000000 19698 +inforesum 0 1 6.957497 0.000000 19699 +financemonei 0 1 6.957497 0.000000 19700 +interestshack 0 1 6.957497 0.000000 19701 +photographycontact 0 1 6.957497 0.000000 19702 +informationstreet 0 1 6.957497 0.000000 19703 +addresskarthik 0 1 6.957497 0.000000 19704 +suggestionspleas 0 1 6.957497 0.000000 19705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..502589d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +construct 0 139 1.945910 0.000000 82 +place 0 106 2.197225 0.000000 124 +visit 0 63 2.772589 0.000000 288 +space 0 57 2.890372 0.000000 310 +edulast 0 17 4.110874 0.000000 927 +stai 0 12 4.465908 0.000000 1215 +tune 0 12 4.465908 0.000000 1227 +login 0 9 4.753590 0.000000 1550 +kelli 1 4 5.568345 5.568345 2793 +ratliffoffic 0 1 6.957497 0.000000 19706 +genealog 0 1 6.957497 0.000000 19707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..791b128b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +imag 0 91 2.397895 0.000000 161 +control 0 82 2.484907 0.000000 164 +thing 0 84 2.484907 0.000000 189 +west 0 83 2.484907 0.000000 192 +main 0 67 2.708050 0.000000 256 +street 0 63 2.772589 0.000000 293 +digit 0 52 2.995732 0.000000 348 +advisor 0 51 2.995732 0.000000 355 +video 0 44 3.135494 0.000000 405 +product 0 33 3.433987 0.000000 527 +compress 1 23 3.806662 3.806662 719 +qualiti 0 20 3.951244 0.000000 832 +vector 0 16 4.174387 0.000000 961 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +mode 0 9 4.753590 0.000000 1492 +invok 0 6 5.164786 0.000000 2079 +reveal 0 4 5.568345 0.000000 2647 +fractal 0 3 5.857933 0.000000 3475 +quantiz 0 2 6.263398 0.000000 5692 +ratnakar 0 1 6.957497 0.000000 19708 +viresh 0 1 6.957497 0.000000 19709 +lossi 0 1 6.957497 0.000000 19710 +qclicauthor 0 1 6.957497 0.000000 19711 +qclic 0 1 6.957497 0.000000 19712 +qclicbrows 0 1 6.957497 0.000000 19713 +rever 0 1 6.957497 0.000000 19714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..377ba424 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +address 0 170 1.791759 0.000000 62 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +distribut 0 162 1.791759 0.000000 51 +architectur 0 139 1.945910 0.000000 77 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +person 0 111 2.197225 0.000000 117 +check 0 115 2.197225 0.000000 118 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +mani 0 92 2.397895 0.000000 150 +graphic 0 90 2.397895 0.000000 147 +west 0 83 2.484907 0.000000 192 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +wednesdai 0 64 2.772589 0.000000 261 +street 0 63 2.772589 0.000000 293 +best 0 59 2.833213 0.000000 299 +publish 0 57 2.890372 0.000000 326 +profession 0 51 2.995732 0.000000 345 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +fridai 0 44 3.135494 0.000000 390 +richard 0 31 3.496508 0.000000 559 +load 0 28 3.610918 0.000000 601 +fellow 0 24 3.761200 0.000000 701 +emphasi 0 22 3.850148 0.000000 755 +thur 0 19 4.007333 0.000000 847 +ever 0 19 4.007333 0.000000 872 +whole 0 17 4.110874 0.000000 940 +brother 0 13 4.382027 0.000000 1189 +sundai 0 10 4.653960 0.000000 1387 +desktop 0 10 4.653960 0.000000 1445 +prevent 0 7 5.010635 0.000000 1827 +saturdai 0 7 5.010635 0.000000 1794 +shade 0 7 5.010635 0.000000 1881 +polit 0 6 5.164786 0.000000 2115 +artist 0 6 5.164786 0.000000 2127 +seriou 0 5 5.347108 0.000000 2252 +haven 0 4 5.568345 0.000000 3037 +underwat 0 4 5.568345 0.000000 2838 +fire 0 4 5.568345 0.000000 3001 +beard 0 2 6.263398 0.000000 6161 +grave 0 2 6.263398 0.000000 5968 +doom 0 2 6.263398 0.000000 5848 +goofi 0 2 6.263398 0.000000 4074 +omin 0 1 6.957497 0.000000 19715 +monasteriu 0 1 6.957497 0.000000 19716 +doominu 0 1 6.957497 0.000000 19717 +rcarl 0 1 6.957497 0.000000 19718 +subsurfac 0 1 6.957497 0.000000 19719 +depositori 0 1 6.957497 0.000000 19720 +dig 0 1 6.957497 0.000000 19721 +solitari 0 1 6.957497 0.000000 19722 +innebri 0 1 6.957497 0.000000 19723 +vampir 0 1 6.957497 0.000000 19724 +nostalg 0 1 6.957497 0.000000 19725 +funki 0 1 6.957497 0.000000 19726 +monk 0 1 6.957497 0.000000 19727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..d3e180d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,559 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +washington 0 236 1.386294 0.000000 32 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +report 0 131 2.079442 0.000000 92 +dayton 0 119 2.079442 0.000000 104 +provid 0 121 2.079442 0.000000 94 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +world 0 115 2.197225 0.000000 126 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +access 0 102 2.302585 0.000000 136 +book 0 99 2.302585 0.000000 131 +text 0 98 2.302585 0.000000 133 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +center 0 88 2.397895 0.000000 158 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +larg 0 82 2.484907 0.000000 168 +solut 0 82 2.484907 0.000000 162 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +academ 0 82 2.484907 0.000000 178 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +institut 0 84 2.484907 0.000000 187 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +refer 0 78 2.564949 0.000000 203 +dynam 0 76 2.564949 0.000000 194 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +optim 0 79 2.564949 0.000000 197 +symposium 0 72 2.639057 0.000000 238 +solv 0 73 2.639057 0.000000 234 +logic 0 71 2.639057 0.000000 230 +david 0 71 2.639057 0.000000 232 +workshop 0 71 2.639057 0.000000 239 +summari 0 73 2.639057 0.000000 237 +line 0 75 2.639057 0.000000 231 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +integr 0 67 2.708050 0.000000 245 +august 0 66 2.708050 0.000000 257 +test 0 66 2.708050 0.000000 252 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +differ 0 66 2.708050 0.000000 253 +januari 0 62 2.772589 0.000000 264 +evalu 0 64 2.772589 0.000000 266 +foundat 0 62 2.772589 0.000000 286 +septemb 0 65 2.772589 0.000000 274 +complex 0 64 2.772589 0.000000 269 +interact 0 62 2.772589 0.000000 270 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +content 0 59 2.833213 0.000000 302 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +direct 0 57 2.890372 0.000000 316 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +explor 0 58 2.890372 0.000000 324 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +scientif 0 53 2.944439 0.000000 341 +allow 0 53 2.944439 0.000000 333 +particular 0 51 2.995732 0.000000 352 +maintain 0 51 2.995732 0.000000 342 +principl 0 48 3.044522 0.000000 357 +pointer 0 48 3.044522 0.000000 368 +visitor 0 49 3.044522 0.000000 371 +telephon 0 50 3.044522 0.000000 373 +set 0 50 3.044522 0.000000 361 +california 0 46 3.091042 0.000000 388 +understand 0 47 3.091042 0.000000 384 +algebra 0 45 3.135494 0.000000 394 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +third 0 43 3.178054 0.000000 412 +show 0 43 3.178054 0.000000 417 +york 0 41 3.218876 0.000000 435 +editor 0 41 3.218876 0.000000 433 +combin 0 42 3.218876 0.000000 421 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +compani 0 41 3.218876 0.000000 423 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +submit 0 39 3.258097 0.000000 440 +theoret 0 39 3.258097 0.000000 446 +tutori 0 39 3.258097 0.000000 437 +small 0 39 3.258097 0.000000 447 +probabl 0 40 3.258097 0.000000 455 +seminar 0 38 3.295837 0.000000 470 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +connect 0 37 3.332205 0.000000 485 +mean 0 37 3.332205 0.000000 477 +china 0 37 3.332205 0.000000 487 +procedur 0 36 3.367296 0.000000 488 +tree 0 36 3.367296 0.000000 492 +multi 0 36 3.367296 0.000000 493 +manual 0 35 3.401197 0.000000 504 +post 0 35 3.401197 0.000000 505 +tech 0 35 3.401197 0.000000 515 +represent 0 35 3.401197 0.000000 512 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +obtain 0 33 3.433987 0.000000 534 +dissert 0 32 3.465736 0.000000 549 +transform 0 32 3.465736 0.000000 542 +kind 0 32 3.465736 0.000000 541 +chapter 0 32 3.465736 0.000000 536 +graph 0 30 3.555348 0.000000 576 +power 0 30 3.555348 0.000000 573 +robert 0 30 3.555348 0.000000 567 +semant 0 29 3.583519 0.000000 587 +depend 0 29 3.583519 0.000000 583 +chines 0 29 3.583519 0.000000 595 +releas 0 28 3.610918 0.000000 616 +univ 0 28 3.610918 0.000000 617 +manipul 0 27 3.637586 0.000000 624 +static 0 27 3.637586 0.000000 619 +consist 0 26 3.688879 0.000000 651 +subject 0 26 3.688879 0.000000 647 +repres 0 26 3.688879 0.000000 656 +bound 0 26 3.688879 0.000000 659 +notic 0 25 3.737670 0.000000 675 +hill 0 25 3.737670 0.000000 670 +valu 0 25 3.737670 0.000000 665 +fundament 0 25 3.737670 0.000000 661 +doctor 0 24 3.761200 0.000000 709 +handl 0 24 3.761200 0.000000 685 +departmentunivers 0 24 3.761200 0.000000 711 +demonstr 0 24 3.761200 0.000000 694 +methodolog 0 23 3.806662 0.000000 733 +variabl 0 23 3.806662 0.000000 715 +miscellan 0 23 3.806662 0.000000 731 +proof 0 23 3.806662 0.000000 720 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +properti 0 22 3.850148 0.000000 749 +serv 0 22 3.850148 0.000000 758 +sequenti 0 22 3.850148 0.000000 745 +identifi 0 22 3.850148 0.000000 760 +path 0 21 3.912023 0.000000 778 +latest 0 21 3.912023 0.000000 785 +corpor 0 21 3.912023 0.000000 802 +programminglanguag 0 21 3.912023 0.000000 782 +theorem 0 21 3.912023 0.000000 786 +basi 0 20 3.951244 0.000000 828 +citi 0 19 4.007333 0.000000 874 +beij 0 19 4.007333 0.000000 876 +comparison 0 19 4.007333 0.000000 863 +boston 0 19 4.007333 0.000000 862 +north 0 19 4.007333 0.000000 873 +record 0 18 4.060443 0.000000 890 +thoma 0 18 4.060443 0.000000 901 +element 0 18 4.060443 0.000000 895 +partial 0 18 4.060443 0.000000 900 +speed 0 18 4.060443 0.000000 911 +germani 0 17 4.110874 0.000000 946 +modif 0 17 4.110874 0.000000 913 +debug 0 17 4.110874 0.000000 944 +fourth 0 16 4.174387 0.000000 999 +diego 0 16 4.174387 0.000000 992 +cambridg 0 16 4.174387 0.000000 1008 +letter 0 16 4.174387 0.000000 981 +ramakrishnan 0 16 4.174387 0.000000 972 +taiwan 0 16 4.174387 0.000000 1006 +precis 0 15 4.248495 0.000000 1023 +transit 0 15 4.248495 0.000000 1046 +princeton 0 15 4.248495 0.000000 1042 +configur 0 15 4.248495 0.000000 1012 +remot 0 15 4.248495 0.000000 1041 +reprint 0 14 4.317488 0.000000 1097 +attribut 0 14 4.317488 0.000000 1092 +demand 0 14 4.317488 0.000000 1073 +francisco 0 14 4.317488 0.000000 1095 +polynomi 0 14 4.317488 0.000000 1069 +sigplan 0 13 4.382027 0.000000 1190 +directli 0 13 4.382027 0.000000 1141 +carri 0 13 4.382027 0.000000 1152 +context 0 13 4.382027 0.000000 1153 +shape 0 12 4.465908 0.000000 1245 +pascal 0 12 4.465908 0.000000 1213 +nanci 0 12 4.465908 0.000000 1256 +franc 0 12 4.465908 0.000000 1276 +onth 0 12 4.465908 0.000000 1218 +scan 0 12 4.465908 0.000000 1243 +instanc 0 11 4.553877 0.000000 1322 +magic 0 11 4.553877 0.000000 1358 +impact 0 11 4.553877 0.000000 1334 +lake 0 11 4.553877 0.000000 1373 +israel 0 11 4.553877 0.000000 1366 +decomposit 0 10 4.653960 0.000000 1439 +underli 0 10 4.653960 0.000000 1410 +invit 0 10 4.653960 0.000000 1428 +respect 0 9 4.753590 0.000000 1545 +mainten 0 9 4.753590 0.000000 1543 +establish 0 9 4.753590 0.000000 1532 +conferenceon 0 9 4.753590 0.000000 1595 +utah 0 9 4.753590 0.000000 1585 +herefor 0 9 4.753590 0.000000 1483 +ball 0 9 4.753590 0.000000 1608 +equival 0 9 4.753590 0.000000 1496 +yang 0 8 4.875197 0.000000 1652 +colloquium 0 8 4.875197 0.000000 1734 +reus 0 8 4.875197 0.000000 1661 +european 0 8 4.875197 0.000000 1763 +secretari 0 8 4.875197 0.000000 1775 +pldi 0 8 4.875197 0.000000 1704 +competit 0 8 4.875197 0.000000 1635 +irvin 0 8 4.875197 0.000000 1660 +illustr 0 8 4.875197 0.000000 1679 +analys 0 8 4.875197 0.000000 1666 +merg 0 7 5.010635 0.000000 1862 +portland 0 7 5.010635 0.000000 1878 +fifth 0 7 5.010635 0.000000 1931 +iowa 0 7 5.010635 0.000000 1971 +bottom 0 7 5.010635 0.000000 1906 +prioriti 0 7 5.010635 0.000000 1792 +pittsburgh 0 7 5.010635 0.000000 1938 +digest 0 7 5.010635 0.000000 1864 +increment 0 6 5.164786 0.000000 2206 +teitelbaum 0 6 5.164786 0.000000 2102 +symposiumon 0 6 5.164786 0.000000 2054 +grammar 0 6 5.164786 0.000000 2058 +syntax 0 6 5.164786 0.000000 2030 +affect 0 6 5.164786 0.000000 2044 +textual 0 6 5.164786 0.000000 1979 +kluwer 0 6 5.164786 0.000000 2143 +variant 0 6 5.164786 0.000000 2043 +german 0 6 5.164786 0.000000 2190 +unpublish 0 6 5.164786 0.000000 2226 +carolina 0 6 5.164786 0.000000 2142 +horwitz 0 5 5.347108 0.000000 2411 +synthes 0 5 5.347108 0.000000 2451 +dataflow 0 5 5.347108 0.000000 2390 +licens 0 5 5.347108 0.000000 2520 +twenti 0 5 5.347108 0.000000 2540 +aim 0 5 5.347108 0.000000 2477 +summarymi 0 5 5.347108 0.000000 2580 +cacm 0 5 5.347108 0.000000 2388 +shortest 0 5 5.347108 0.000000 2424 +australia 0 5 5.347108 0.000000 2478 +singapor 0 5 5.347108 0.000000 2487 +mcgraw 0 5 5.347108 0.000000 2262 +bind 0 5 5.347108 0.000000 2250 +orlean 0 5 5.347108 0.000000 2550 +interfer 0 5 5.347108 0.000000 2494 +forprogram 0 5 5.347108 0.000000 2361 +salt 0 5 5.347108 0.000000 2413 +patent 0 5 5.347108 0.000000 2574 +chapel 0 5 5.347108 0.000000 2457 +rep 0 4 5.568345 0.000000 3087 +slice 0 4 5.568345 0.000000 2622 +interprocedur 0 4 5.568345 0.000000 2771 +popl 0 4 5.568345 0.000000 3068 +sigsoft 0 4 5.568345 0.000000 3036 +dagstuhl 0 4 5.568345 0.000000 2871 +compcon 0 4 5.568345 0.000000 2958 +ofprogram 0 4 5.568345 0.000000 2624 +ics 0 4 5.568345 0.000000 2779 +petersburg 0 4 5.568345 0.000000 2989 +jolla 0 4 5.568345 0.000000 2988 +bricker 0 4 5.568345 0.000000 3050 +usa 0 4 5.568345 0.000000 3080 +exhaust 0 4 5.568345 0.000000 2825 +melbourn 0 4 5.568345 0.000000 3035 +turnidg 0 4 5.568345 0.000000 2829 +imper 0 4 5.568345 0.000000 3067 +scotland 0 4 5.568345 0.000000 3049 +topla 0 3 5.857933 0.000000 3563 +reachabl 0 3 5.857933 0.000000 4001 +alamito 0 3 5.857933 0.000000 3558 +categor 0 3 5.857933 0.000000 3765 +schloss 0 3 5.857933 0.000000 3727 +denmark 0 3 5.857933 0.000000 3676 +amast 0 3 5.857933 0.000000 3955 +spaa 0 3 5.857933 0.000000 3906 +propag 0 3 5.857933 0.000000 3997 +accommod 0 3 5.857933 0.000000 3337 +fifteenth 0 3 5.857933 0.000000 3868 +principlesof 0 3 5.857933 0.000000 3145 +twentieth 0 3 5.857933 0.000000 3760 +thedevelop 0 3 5.857933 0.000000 3903 +meaning 0 3 5.857933 0.000000 3458 +nearbi 0 3 5.857933 0.000000 3291 +retarget 0 3 5.857933 0.000000 3994 +fourteenth 0 3 5.857933 0.000000 3615 +domin 0 3 5.857933 0.000000 3995 +preserv 0 3 5.857933 0.000000 3628 +jone 0 3 5.857933 0.000000 3703 +atlanta 0 3 5.857933 0.000000 3778 +onprincipl 0 3 5.857933 0.000000 3701 +ninth 0 3 5.857933 0.000000 3616 +sagiv 0 2 6.263398 0.000000 6176 +acta 0 2 6.263398 0.000000 5124 +differenc 0 2 6.263398 0.000000 6177 +chop 0 2 6.263398 0.000000 6160 +informatica 0 2 6.263398 0.000000 5125 +destruct 0 2 6.263398 0.000000 6232 +copenhagen 0 2 6.263398 0.000000 5145 +alia 0 2 6.263398 0.000000 5383 +charleston 0 2 6.263398 0.000000 6181 +thevari 0 2 6.263398 0.000000 6130 +contigu 0 2 6.263398 0.000000 6001 +worker 0 2 6.263398 0.000000 4841 +andbuild 0 2 6.263398 0.000000 6028 +clickherefor 0 2 6.263398 0.000000 5344 +interproceduraldataflow 0 2 6.263398 0.000000 6178 +unrestrict 0 2 6.263398 0.000000 4879 +arnold 0 2 6.263398 0.000000 4705 +wasserman 0 2 6.263398 0.000000 5331 +aarhu 0 2 6.263398 0.000000 6180 +moss 0 2 6.263398 0.000000 5820 +fritzson 0 2 6.263398 0.000000 4546 +andarchitectur 0 2 6.263398 0.000000 5755 +languagedesign 0 2 6.263398 0.000000 6182 +spain 0 2 6.263398 0.000000 5522 +adequaci 0 2 6.263398 0.000000 6229 +thirteenth 0 2 6.263398 0.000000 5733 +eleventh 0 2 6.263398 0.000000 5031 +eighth 0 2 6.263398 0.000000 5750 +leeuwen 0 2 6.263398 0.000000 5543 +doc 0 2 6.263398 0.000000 5022 +mooli 0 2 6.263398 0.000000 6179 +tung 0 2 6.263398 0.000000 4709 +binklei 0 1 6.957497 0.000000 19728 +ramalingam 0 1 6.957497 0.000000 19729 +prin 0 1 6.957497 0.000000 19730 +idfa 0 1 6.957497 0.000000 19731 +interf 0 1 6.957497 0.000000 19732 +wilhelm 0 1 6.957497 0.000000 19733 +tosem 0 1 6.957497 0.000000 19734 +pfeiffer 0 1 6.957497 0.000000 19735 +demer 0 1 6.957497 0.000000 19736 +fromacm 0 1 6.957497 0.000000 19737 +berzin 0 1 6.957497 0.000000 19738 +sigsoftsymposium 0 1 6.957497 0.000000 19739 +wadern 0 1 6.957497 0.000000 19740 +rosai 0 1 6.957497 0.000000 19741 +fseb 0 1 6.957497 0.000000 19742 +thesiswuu 0 1 6.957497 0.000000 19743 +esop 0 1 6.957497 0.000000 19744 +poplb 0 1 6.957497 0.000000 19745 +pepma 0 1 6.957497 0.000000 19746 +fsea 0 1 6.957497 0.000000 19747 +diku 0 1 6.957497 0.000000 19748 +fase 0 1 6.957497 0.000000 19749 +pepmb 0 1 6.957497 0.000000 19750 +lape 0 1 6.957497 0.000000 19751 +psde 0 1 6.957497 0.000000 19752 +toconst 0 1 6.957497 0.000000 19753 +paradigmsfor 0 1 6.957497 0.000000 19754 +brighton 0 1 6.957497 0.000000 19755 +abramski 0 1 6.957497 0.000000 19756 +maibaum 0 1 6.957497 0.000000 19757 +wherefor 0 1 6.957497 0.000000 19758 +sigoa 0 1 6.957497 0.000000 19759 +pepm 0 1 6.957497 0.000000 19760 +onparti 0 1 6.957497 0.000000 19761 +ibfi 0 1 6.957497 0.000000 19762 +repsprofessorcomput 0 1 6.957497 0.000000 19763 +thehom 0 1 6.957497 0.000000 19764 +createtool 0 1 6.957497 0.000000 19765 +manipulationoper 0 1 6.957497 0.000000 19766 +slicingcan 0 1 6.957497 0.000000 19767 +elementss 0 1 6.957497 0.000000 19768 +thatmight 0 1 6.957497 0.000000 19769 +findsemant 0 1 6.957497 0.000000 19770 +thedecomposit 0 1 6.957497 0.000000 19771 +solvingmani 0 1 6.957497 0.000000 19772 +applicationsin 0 1 6.957497 0.000000 19773 +atimprov 0 1 6.957497 0.000000 19774 +relatedoper 0 1 6.957497 0.000000 19775 +slicer 0 1 6.957497 0.000000 19776 +unexpect 0 1 6.957497 0.000000 19777 +betweeninterprocedur 0 1 6.957497 0.000000 19778 +oninterprocedur 0 1 6.957497 0.000000 19779 +transformingthem 0 1 6.957497 0.000000 19780 +timebi 0 1 6.957497 0.000000 19781 +probleminst 0 1 6.957497 0.000000 19782 +publicationsprogram 0 1 6.957497 0.000000 19783 +slicing_pat 0 1 6.957497 0.000000 19784 +thesismerg 0 1 6.957497 0.000000 19785 +iwscm 0 1 6.957497 0.000000 19786 +popla 0 1 6.957497 0.000000 19787 +iwsvcc 0 1 6.957497 0.000000 19788 +ccpsd 0 1 6.957497 0.000000 19789 +npfo_submiss 0 1 6.957497 0.000000 19790 +ccipl 0 1 6.957497 0.000000 19791 +prog_integration_system 0 1 6.957497 0.000000 19792 +prog_integration_manu 0 1 6.957497 0.000000 19793 +subsetof 0 1 6.957497 0.000000 19794 +clickingher 0 1 6.957497 0.000000 19795 +andexpect 0 1 6.957497 0.000000 19796 +anddifferenc 0 1 6.957497 0.000000 19797 +thesesdavid 0 1 6.957497 0.000000 19798 +thesisphil 0 1 6.957497 0.000000 19799 +thesisinterprocedur 0 1 6.957497 0.000000 19800 +analysisdemand 0 1 6.957497 0.000000 19801 +tcs_ide_pap 0 1 6.957497 0.000000 19802 +ptime 0 1 6.957497 0.000000 19803 +acta_pap 0 1 6.957497 0.000000 19804 +pfeiffer_thesi 0 1 6.957497 0.000000 19805 +jalg_pap 0 1 6.957497 0.000000 19806 +popl_not 0 1 6.957497 0.000000 19807 +publicationsbooksrep 0 1 6.957497 0.000000 19808 +constructinglanguag 0 1 6.957497 0.000000 19809 +publicationssagiv 0 1 6.957497 0.000000 19810 +j_alg 0 1 6.957497 0.000000 19811 +preservingtransform 0 1 6.957497 0.000000 19812 +grammarswith 0 1 6.957497 0.000000 19813 +movement 0 1 6.957497 0.000000 19814 +sublinear 0 1 6.957497 0.000000 19815 +papershorwitz 0 1 6.957497 0.000000 19816 +ganzing 0 1 6.957497 0.000000 19817 +chaptersrep 0 1 6.957497 0.000000 19818 +bohner 0 1 6.957497 0.000000 19819 +fromproceed 0 1 6.957497 0.000000 19820 +ichikawa 0 1 6.957497 0.000000 19821 +tsubotani 0 1 6.957497 0.000000 19822 +barstow 0 1 6.957497 0.000000 19823 +sandewal 0 1 6.957497 0.000000 19824 +shrobe 0 1 6.957497 0.000000 19825 +publicationssiff 0 1 6.957497 0.000000 19826 +danvi 0 1 6.957497 0.000000 19827 +glueck 0 1 6.957497 0.000000 19828 +thiemann 0 1 6.957497 0.000000 19829 +hentenryck 0 1 6.957497 0.000000 19830 +formalapproach 0 1 6.957497 0.000000 19831 +nielsen 0 1 6.957497 0.000000 19832 +schwartzbach 0 1 6.957497 0.000000 19833 +tapsoft 0 1 6.957497 0.000000 19834 +compilerconstruct 0 1 6.957497 0.000000 19835 +edinburgh 0 1 6.957497 0.000000 19836 +reducibleflowgraph 0 1 6.957497 0.000000 19837 +velen 0 1 6.957497 0.000000 19838 +onalgebra 0 1 6.957497 0.000000 19839 +softwareconfigur 0 1 6.957497 0.000000 19840 +issuesin 0 1 6.957497 0.000000 19841 +barcelona 0 1 6.957497 0.000000 19842 +diaz 0 1 6.957497 0.000000 19843 +oreja 0 1 6.957497 0.000000 19844 +versionand 0 1 6.957497 0.000000 19845 +grassau 0 1 6.957497 0.000000 19846 +bericht 0 1 6.957497 0.000000 19847 +winkler 0 1 6.957497 0.000000 19848 +teubner 0 1 6.957497 0.000000 19849 +stuttgart 0 1 6.957497 0.000000 19850 +marceau 0 1 6.957497 0.000000 19851 +engineeringsymposium 0 1 6.957497 0.000000 19852 +alpern 0 1 6.957497 0.000000 19853 +albuquerqu 0 1 6.957497 0.000000 19854 +tosyntax 0 1 6.957497 0.000000 19855 +williamsburg 0 1 6.957497 0.000000 19856 +softwarerep 0 1 6.957497 0.000000 19857 +patentsrep 0 1 6.957497 0.000000 19858 +pend 0 1 6.957497 0.000000 19859 +submissionsrep 0 1 6.957497 0.000000 19860 +reportsrep 0 1 6.957497 0.000000 19861 +mehlhorn 0 1 6.957497 0.000000 19862 +datalogisk 0 1 6.957497 0.000000 19863 +psramalingam 0 1 6.957497 0.000000 19864 +klint 0 1 6.957497 0.000000 19865 +snelt 0 1 6.957497 0.000000 19866 +extendedabstract 0 1 6.957497 0.000000 19867 +reconstitut 0 1 6.957497 0.000000 19868 +studentsvisitor 0 1 6.957497 0.000000 19869 +jiazhen 0 1 6.957497 0.000000 19870 +paig 0 1 6.957497 0.000000 19871 +chiao 0 1 6.957497 0.000000 19872 +studentsramalingam 0 1 6.957497 0.000000 19873 +programintegr 0 1 6.957497 0.000000 19874 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..ee22a42f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +institut 0 84 2.484907 0.000000 187 +window 0 68 2.708050 0.000000 242 +function 0 62 2.772589 0.000000 275 +street 0 63 2.772589 0.000000 293 +copi 0 63 2.772589 0.000000 284 +best 0 59 2.833213 0.000000 299 +suggest 0 53 2.944439 0.000000 331 +date 0 51 2.995732 0.000000 344 +right 0 48 3.044522 0.000000 363 +friend 0 48 3.044522 0.000000 376 +tech 0 35 3.401197 0.000000 515 +india 0 32 3.465736 0.000000 550 +univ 0 28 3.610918 0.000000 617 +comp 0 26 3.688879 0.000000 650 +indian 0 22 3.850148 0.000000 769 +love 0 21 3.912023 0.000000 804 +reserv 0 20 3.951244 0.000000 808 +spend 0 19 4.007333 0.000000 850 +beauti 0 18 4.060443 0.000000 912 +statu 0 18 4.060443 0.000000 885 +speed 0 18 4.060443 0.000000 911 +seem 0 18 4.060443 0.000000 899 +comic 0 14 4.317488 0.000000 1103 +song 0 11 4.553877 0.000000 1380 +calvin 0 9 4.753590 0.000000 1518 +kanpur 0 8 4.875197 0.000000 1744 +film 0 8 4.875197 0.000000 1761 +apart 0 7 5.010635 0.000000 1936 +settimeout 0 5 5.347108 0.000000 2536 +guestbook 0 5 5.347108 0.000000 2475 +randal 0 4 5.568345 0.000000 2776 +mirza 0 3 5.857933 0.000000 3989 +hero 0 3 5.857933 0.000000 3711 +saeed 1 2 6.263398 6.263398 6172 +statusclock 0 1 6.957497 0.000000 19875 +pagespe 0 1 6.957497 0.000000 19876 +clearid 0 1 6.957497 0.000000 19877 +cleartimeout 0 1 6.957497 0.000000 19878 +lucknow 0 1 6.957497 0.000000 19879 +listn 0 1 6.957497 0.000000 19880 +netsurf 0 1 6.957497 0.000000 19881 +wismad 0 1 6.957497 0.000000 19882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..8b50beb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +lectur 0 135 1.945910 0.000000 73 +dayton 0 119 2.079442 0.000000 104 +real 0 93 2.397895 0.000000 144 +chang 0 82 2.484907 0.000000 163 +septemb 0 65 2.772589 0.000000 274 +taught 0 33 3.433987 0.000000 526 +comp 0 26 3.688879 0.000000 650 +edutelephon 0 10 4.653960 0.000000 1473 +desktop 0 10 4.653960 0.000000 1445 +peterson 0 7 5.010635 0.000000 1850 +salli 1 3 5.857933 5.857933 3432 +goodwin 0 1 6.957497 0.000000 19883 +lecturercomput 0 1 6.957497 0.000000 19884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..0037f646 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,130 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +databas 1 122 2.079442 2.079442 86 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +postscript 0 131 2.079442 0.000000 90 +seattl 0 120 2.079442 0.000000 103 +introduct 0 126 2.079442 0.000000 87 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +person 0 111 2.197225 0.000000 117 +present 0 91 2.397895 0.000000 145 +educ 0 86 2.484907 0.000000 191 +larg 0 82 2.484907 0.000000 168 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +help 0 83 2.484907 0.000000 175 +server 0 76 2.564949 0.000000 204 +master 0 76 2.564949 0.000000 216 +sourc 0 77 2.564949 0.000000 201 +onlin 0 75 2.639057 0.000000 223 +logic 0 71 2.639057 0.000000 230 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +evalu 0 64 2.772589 0.000000 266 +run 0 51 2.995732 0.000000 347 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +pointer 0 48 3.044522 0.000000 368 +archiv 0 49 3.044522 0.000000 364 +protocol 0 45 3.135494 0.000000 407 +york 0 41 3.218876 0.000000 435 +transact 0 39 3.258097 0.000000 438 +slide 0 38 3.295837 0.000000 467 +industri 0 38 3.295837 0.000000 464 +bibliographi 0 34 3.401197 0.000000 518 +queri 0 33 3.433987 0.000000 524 +articl 0 33 3.433987 0.000000 530 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +profil 0 30 3.555348 0.000000 581 +toward 0 25 3.737670 0.000000 668 +jeff 0 25 3.737670 0.000000 673 +reach 0 24 3.761200 0.000000 688 +initi 0 23 3.806662 0.000000 717 +sort 0 22 3.850148 0.000000 738 +indian 0 22 3.850148 0.000000 769 +hierarchi 0 22 3.850148 0.000000 744 +cooper 0 22 3.850148 0.000000 757 +boston 0 19 4.007333 0.000000 862 +sigmod 0 19 4.007333 0.000000 877 +bachelor 0 17 4.110874 0.000000 957 +estim 0 17 4.110874 0.000000 930 +georg 0 16 4.174387 0.000000 994 +princeton 0 15 4.248495 0.000000 1042 +massiv 0 15 4.248495 0.000000 1026 +warn 0 14 4.317488 0.000000 1068 +infrastructur 0 12 4.465908 0.000000 1234 +council 0 11 4.553877 0.000000 1364 +naughton 0 10 4.653960 0.000000 1450 +vldb 0 10 4.653960 0.000000 1470 +consortium 0 10 4.653960 0.000000 1467 +jeffrei 0 9 4.753590 0.000000 1612 +utah 0 9 4.753590 0.000000 1585 +madra 0 8 4.875197 0.000000 1770 +presenc 0 8 4.875197 0.000000 1671 +competit 0 8 4.875197 0.000000 1635 +spec 0 8 4.875197 0.000000 1640 +analyt 0 7 5.010635 0.000000 1913 +bombai 0 7 5.010635 0.000000 1972 +aggreg 0 6 5.164786 0.000000 2219 +prasad 0 6 5.164786 0.000000 2126 +chicago 0 6 5.164786 0.000000 2149 +deshpand 0 5 5.347108 0.000000 2431 +amit 0 4 5.568345 0.000000 2972 +snail 0 4 5.568345 0.000000 2916 +multidimension 0 4 5.568345 0.000000 3091 +ramasami 0 4 5.568345 0.000000 3088 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +mumbai 0 3 5.857933 0.000000 4029 +pilot 0 3 5.857933 0.000000 4008 +children 0 3 5.857933 0.000000 3767 +asha 0 3 5.857933 0.000000 4037 +marathon 0 2 6.263398 0.000000 5592 +olap 0 2 6.263398 0.000000 6233 +endow 0 2 6.263398 0.000000 6234 +guidanc 0 1 6.957497 0.000000 19885 +trier 0 1 6.957497 0.000000 19886 +mdd 0 1 6.957497 0.000000 19887 +niiip 0 1 6.957497 0.000000 19888 +transcoop 0 1 6.957497 0.000000 19889 +needi 0 1 6.957497 0.000000 19890 +pageand 0 1 6.957497 0.000000 19891 +bookmarksar 0 1 6.957497 0.000000 19892 +garfield 0 1 6.957497 0.000000 19893 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..e3507e19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +visit 0 63 2.772589 0.000000 288 +undergradu 0 54 2.944439 0.000000 338 +india 0 32 3.465736 0.000000 550 +altern 0 26 3.688879 0.000000 641 +bookmark 0 26 3.688879 0.000000 639 +worth 0 11 4.553877 0.000000 1294 +bombai 0 7 5.010635 0.000000 1972 +whereabout 0 4 5.568345 0.000000 3078 +indianinstitut 0 3 5.857933 0.000000 4003 +fantast 0 3 5.857933 0.000000 3966 +hadmi 0 2 6.263398 0.000000 6097 +canfing 0 2 6.263398 0.000000 6098 +ashwin 1 1 6.957497 6.957497 19894 +iitb 0 1 6.957497 0.000000 19895 +meto 0 1 6.957497 0.000000 19896 +sashwin 0 1 6.957497 0.000000 19897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..c19ef69f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +like 1 132 1.945910 1.945910 81 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +make 0 111 2.197225 0.000000 120 +send 0 114 2.197225 0.000000 109 +mani 0 92 2.397895 0.000000 150 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +contain 0 81 2.484907 0.000000 174 +academ 0 82 2.484907 0.000000 178 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +solv 0 73 2.639057 0.000000 234 +would 0 67 2.708050 0.000000 251 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +back 0 60 2.833213 0.000000 297 +plai 0 60 2.833213 0.000000 307 +undergradu 0 54 2.944439 0.000000 338 +talk 0 53 2.944439 0.000000 336 +suggest 0 53 2.944439 0.000000 331 +much 0 52 2.995732 0.000000 349 +maintain 0 51 2.995732 0.000000 342 +date 0 51 2.995732 0.000000 344 +friend 0 48 3.044522 0.000000 376 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +past 0 42 3.218876 0.000000 428 +must 0 40 3.258097 0.000000 442 +realli 0 40 3.258097 0.000000 444 +author 0 39 3.258097 0.000000 450 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +india 0 32 3.465736 0.000000 550 +kind 0 32 3.465736 0.000000 541 +photo 0 31 3.496508 0.000000 561 +someth 0 31 3.496508 0.000000 554 +hard 0 30 3.555348 0.000000 563 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +hope 0 28 3.610918 0.000000 610 +mine 0 26 3.688879 0.000000 654 +bookmark 0 26 3.688879 0.000000 639 +enjoi 0 26 3.688879 0.000000 660 +rather 0 26 3.688879 0.000000 642 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +inth 0 22 3.850148 0.000000 741 +watch 0 21 3.912023 0.000000 789 +wonder 0 20 3.951244 0.000000 815 +tenni 0 20 3.951244 0.000000 838 +beauti 0 18 4.060443 0.000000 912 +listen 0 18 4.060443 0.000000 907 +anyth 0 16 4.174387 0.000000 998 +across 0 16 4.174387 0.000000 974 +hobbi 0 16 4.174387 0.000000 1009 +photograph 0 15 4.248495 0.000000 1056 +goe 0 15 4.248495 0.000000 1044 +near 0 14 4.317488 0.000000 1091 +unfortun 0 13 4.382027 0.000000 1170 +scan 0 12 4.465908 0.000000 1243 +reader 0 12 4.465908 0.000000 1246 +awai 0 10 4.653960 0.000000 1447 +town 0 10 4.653960 0.000000 1458 +interestsmi 0 10 4.653960 0.000000 1462 +earth 0 10 4.653960 0.000000 1463 +hint 0 10 4.653960 0.000000 1419 +ball 0 9 4.753590 0.000000 1608 +jeffrei 0 9 4.753590 0.000000 1612 +pick 0 9 4.753590 0.000000 1498 +kanpur 0 8 4.875197 0.000000 1744 +pagei 0 8 4.875197 0.000000 1683 +empir 0 8 4.875197 0.000000 1722 +bridg 0 8 4.875197 0.000000 1764 +cricket 0 7 5.010635 0.000000 1945 +river 0 6 5.164786 0.000000 2220 +rock 0 6 5.164786 0.000000 2164 +whatev 0 6 5.164786 0.000000 2097 +neither 0 6 5.164786 0.000000 1990 +fiction 0 6 5.164786 0.000000 2217 +tri 0 6 5.164786 0.000000 2166 +album 0 4 5.568345 0.000000 2888 +gokul 0 4 5.568345 0.000000 2668 +thati 0 4 5.568345 0.000000 2616 +metal 0 4 5.568345 0.000000 3079 +fantasi 0 4 5.568345 0.000000 3055 +devot 0 4 5.568345 0.000000 2711 +dont 0 3 5.857933 0.000000 3473 +pleasant 0 3 5.857933 0.000000 3825 +seinfeld 0 3 5.857933 0.000000 3958 +romanc 0 3 5.857933 0.000000 3632 +iitk 0 2 6.263398 0.000000 6227 +sastri 0 2 6.263398 0.000000 6171 +mugshot 0 2 6.263398 0.000000 4984 +karnataka 0 2 6.263398 0.000000 5106 +whati 0 2 6.263398 0.000000 6027 +horror 0 2 6.263398 0.000000 5075 +eduunivers 0 2 6.263398 0.000000 6216 +subramanya 0 1 6.957497 0.000000 19898 +hospet 0 1 6.957497 0.000000 19899 +tungabhadra 0 1 6.957497 0.000000 19900 +favourit 0 1 6.957497 0.000000 19901 +hampi 0 1 6.957497 0.000000 19902 +ruin 0 1 6.957497 0.000000 19903 +vijayanagara 0 1 6.957497 0.000000 19904 +fewphotograph 0 1 6.957497 0.000000 19905 +classmatesat 0 1 6.957497 0.000000 19906 +presentcurr 0 1 6.957497 0.000000 19907 +registeredfor 0 1 6.957497 0.000000 19908 +playphatta 0 1 6.957497 0.000000 19909 +champ 0 1 6.957497 0.000000 19910 +entertainmentin 0 1 6.957497 0.000000 19911 +donot 0 1 6.957497 0.000000 19912 +sshow 0 1 6.957497 0.000000 19913 +voraci 0 1 6.957497 0.000000 19914 +unsuccesfulli 0 1 6.957497 0.000000 19915 +grip 0 1 6.957497 0.000000 19916 +ifposs 0 1 6.957497 0.000000 19917 +archer 0 1 6.957497 0.000000 19918 +jane 0 1 6.957497 0.000000 19919 +austen 0 1 6.957497 0.000000 19920 +pride 0 1 6.957497 0.000000 19921 +prejudic 0 1 6.957497 0.000000 19922 +ramesh 0 1 6.957497 0.000000 19923 +mahadeven 0 1 6.957497 0.000000 19924 +sarticl 0 1 6.957497 0.000000 19925 +wonderfulgam 0 1 6.957497 0.000000 19926 +itagain 0 1 6.957497 0.000000 19927 +crossword 0 1 6.957497 0.000000 19928 +cryptic 0 1 6.957497 0.000000 19929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..37ddf5e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,220 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +report 0 131 2.079442 0.000000 92 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +site 0 106 2.197225 0.000000 119 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +user 0 104 2.302585 0.000000 137 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +commun 0 95 2.397895 0.000000 157 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +sinc 0 90 2.397895 0.000000 159 +internet 0 83 2.484907 0.000000 186 +educ 0 86 2.484907 0.000000 191 +resourc 0 81 2.484907 0.000000 172 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +orient 0 80 2.564949 0.000000 205 +servic 0 72 2.639057 0.000000 236 +nation 0 74 2.639057 0.000000 240 +addit 0 74 2.639057 0.000000 228 +involv 0 71 2.639057 0.000000 227 +appli 0 71 2.639057 0.000000 226 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +import 0 65 2.772589 0.000000 282 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +plan 0 65 2.772589 0.000000 272 +visit 0 63 2.772589 0.000000 288 +descript 0 64 2.772589 0.000000 271 +street 0 63 2.772589 0.000000 293 +best 0 59 2.833213 0.000000 299 +special 0 56 2.890372 0.000000 320 +three 0 54 2.944439 0.000000 330 +undergradu 0 54 2.944439 0.000000 338 +sampl 0 53 2.944439 0.000000 339 +week 0 52 2.995732 0.000000 343 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +telephon 0 50 3.044522 0.000000 373 +effect 0 46 3.091042 0.000000 385 +natur 0 44 3.135494 0.000000 406 +third 0 43 3.178054 0.000000 412 +edit 0 42 3.218876 0.000000 418 +futur 0 41 3.218876 0.000000 427 +join 0 39 3.258097 0.000000 457 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +open 0 38 3.295837 0.000000 469 +seminar 0 38 3.295837 0.000000 470 +feel 0 37 3.332205 0.000000 483 +staff 0 36 3.367296 0.000000 490 +everi 0 34 3.401197 0.000000 519 +award 0 34 3.401197 0.000000 523 +kind 0 32 3.465736 0.000000 541 +collabor 0 32 3.465736 0.000000 543 +posit 0 31 3.496508 0.000000 552 +titl 0 31 3.496508 0.000000 556 +produc 0 30 3.555348 0.000000 572 +focus 0 29 3.583519 0.000000 584 +depend 0 29 3.583519 0.000000 583 +propos 0 28 3.610918 0.000000 602 +retriev 0 27 3.637586 0.000000 621 +background 0 25 3.737670 0.000000 664 +spent 0 25 3.737670 0.000000 676 +departmentunivers 0 24 3.761200 0.000000 711 +higher 0 24 3.761200 0.000000 690 +seri 0 24 3.761200 0.000000 708 +director 0 22 3.850148 0.000000 767 +cooper 0 22 3.850148 0.000000 757 +newsgroup 0 21 3.912023 0.000000 783 +divis 0 21 3.912023 0.000000 803 +toolkit 0 20 3.951244 0.000000 835 +wrote 0 20 3.951244 0.000000 830 +item 0 19 4.007333 0.000000 856 +expand 0 17 4.110874 0.000000 928 +diego 0 16 4.174387 0.000000 992 +susan 0 15 4.248495 0.000000 1050 +later 0 15 4.248495 0.000000 1043 +becam 0 14 4.317488 0.000000 1117 +speak 0 12 4.465908 0.000000 1283 +onth 0 12 4.465908 0.000000 1218 +branch 0 11 4.553877 0.000000 1318 +thecomput 0 10 4.653960 0.000000 1408 +hundr 0 9 4.753590 0.000000 1528 +discov 0 9 4.753590 0.000000 1562 +respect 0 9 4.753590 0.000000 1545 +filter 0 8 4.875197 0.000000 1641 +elect 0 8 4.875197 0.000000 1771 +jack 0 8 4.875197 0.000000 1780 +potenti 0 8 4.875197 0.000000 1690 +gather 0 8 4.875197 0.000000 1719 +scout 1 7 5.010635 5.010635 1903 +usabl 0 7 5.010635 0.000000 1810 +happen 0 7 5.010635 0.000000 1790 +discoveri 0 7 5.010635 0.000000 1915 +edumi 0 6 5.164786 0.000000 2132 +approv 0 6 5.164786 0.000000 2078 +matthew 0 6 5.164786 0.000000 2193 +ifyou 0 6 5.164786 0.000000 1992 +kid 0 5 5.347108 0.000000 2516 +merit 0 5 5.347108 0.000000 2466 +devot 0 4 5.568345 0.000000 2711 +newslett 0 4 5.568345 0.000000 2873 +termin 0 4 5.568345 0.000000 2852 +chose 0 4 5.568345 0.000000 2629 +hire 0 4 5.568345 0.000000 2976 +agreement 0 3 5.857933 0.000000 3207 +newli 0 3 5.857933 0.000000 3786 +orth 0 3 5.857933 0.000000 3685 +moreinform 0 3 5.857933 0.000000 3307 +audienc 0 3 5.857933 0.000000 3180 +aproject 0 3 5.857933 0.000000 3142 +expans 0 3 5.857933 0.000000 3755 +disciplin 0 3 5.857933 0.000000 3392 +sciencefound 0 2 6.263398 0.000000 5150 +calcari 0 2 6.263398 0.000000 6144 +thehigh 0 2 6.263398 0.000000 4095 +thousand 0 2 6.263398 0.000000 5949 +arbor 0 2 6.263398 0.000000 6235 +backbon 0 2 6.263398 0.000000 5623 +thescout 0 2 6.263398 0.000000 6082 +andeduc 0 1 6.957497 0.000000 19930 +reloc 0 1 6.957497 0.000000 19931 +speciallibrarian 0 1 6.957497 0.000000 19932 +systemadministr 0 1 6.957497 0.000000 19933 +calcarimanag 0 1 6.957497 0.000000 19934 +servicescomput 0 1 6.957497 0.000000 19935 +madisonsc 0 1 6.957497 0.000000 19936 +scoutservic 0 1 6.957497 0.000000 19937 +internicand 0 1 6.957497 0.000000 19938 +bestresourc 0 1 6.957497 0.000000 19939 +soonth 0 1 6.957497 0.000000 19940 +sprout 0 1 6.957497 0.000000 19941 +andthousand 0 1 6.957497 0.000000 19942 +annotatedlist 0 1 6.957497 0.000000 19943 +itemsinclud 0 1 6.957497 0.000000 19944 +happeningspost 0 1 6.957497 0.000000 19945 +weekdai 0 1 6.957497 0.000000 19946 +wheni 0 1 6.957497 0.000000 19947 +thensfnet 0 1 6.957497 0.000000 19948 +informationservic 0 1 6.957497 0.000000 19949 +tonat 0 1 6.957497 0.000000 19950 +internetand 0 1 6.957497 0.000000 19951 +seminarseri 0 1 6.957497 0.000000 19952 +internetend 0 1 6.957497 0.000000 19953 +forcerfnet 0 1 6.957497 0.000000 19954 +internicproject 0 1 6.957497 0.000000 19955 +theport 0 1 6.957497 0.000000 19956 +workof 0 1 6.957497 0.000000 19957 +andrequest 0 1 6.957497 0.000000 19958 +heartilyagre 0 1 6.957497 0.000000 19959 +servicesat 0 1 6.957497 0.000000 19960 +solock 0 1 6.957497 0.000000 19961 +theaddit 0 1 6.957497 0.000000 19962 +livesei 0 1 6.957497 0.000000 19963 +asscout 0 1 6.957497 0.000000 19964 +researcharea 0 1 6.957497 0.000000 19965 +campus 0 1 6.957497 0.000000 19966 +includenetwork 0 1 6.957497 0.000000 19967 +nidr 0 1 6.957497 0.000000 19968 +anddisciplin 0 1 6.957497 0.000000 19969 +willincludecomput 0 1 6.957497 0.000000 19970 +ofour 0 1 6.957497 0.000000 19971 +theonlin 0 1 6.957497 0.000000 19972 +librarian 0 1 6.957497 0.000000 19973 +aresum 0 1 6.957497 0.000000 19974 +contactm 0 1 6.957497 0.000000 19975 +calcariinternet 0 1 6.957497 0.000000 19976 +scal 0 1 6.957497 0.000000 19977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..7d57fe2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +welcom 0 122 2.079442 0.000000 99 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +june 0 79 2.564949 0.000000 214 +state 0 76 2.564949 0.000000 207 +dept 0 64 2.772589 0.000000 291 +major 0 56 2.890372 0.000000 315 +undergradu 0 54 2.944439 0.000000 338 +finger 0 52 2.995732 0.000000 354 +past 0 42 3.218876 0.000000 428 +higher 0 24 3.761200 0.000000 690 +born 0 21 3.912023 0.000000 798 +happi 0 14 4.317488 0.000000 1079 +avenu 0 12 4.465908 0.000000 1277 +resid 0 10 4.653960 0.000000 1461 +secondari 0 7 5.010635 0.000000 1884 +southern 0 6 5.164786 0.000000 2191 +whereabout 0 4 5.568345 0.000000 3078 +worri 0 3 5.857933 0.000000 3130 +coimbator 0 2 6.263398 0.000000 5130 +theindian 0 2 6.263398 0.000000 5795 +kharagpur 0 2 6.263398 0.000000 6236 +kendal 0 2 6.263398 0.000000 6085 +chandrasekar 1 1 6.957497 6.957497 19978 +tamilnadu 0 1 6.957497 0.000000 19979 +inindia 0 1 6.957497 0.000000 19980 +officedept 0 1 6.957497 0.000000 19981 +sivasankaran 0 1 6.957497 0.000000 19982 +schandra 0 1 6.957497 0.000000 19983 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..9a9dc701 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +descript 0 64 2.772589 0.000000 271 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +eric 0 19 4.007333 0.000000 870 +wind 0 18 4.060443 0.000000 908 +club 0 15 4.248495 0.000000 1058 +usaphon 0 9 4.753590 0.000000 1600 +tunnel 0 9 4.753590 0.000000 1615 +assistantdepart 0 8 4.875197 0.000000 1784 +hockei 0 8 4.875197 0.000000 1760 +byte 0 6 5.164786 0.000000 2108 +pageer 0 3 5.857933 0.000000 3776 +schnarr 1 2 6.263398 6.263398 6194 +dragon 0 2 6.263398 0.000000 4176 +larusresearch 0 1 6.957497 0.000000 19984 +languagesfunct 0 1 6.957497 0.000000 19985 +designinterest 0 1 6.957497 0.000000 19986 +sacm 0 1 6.957497 0.000000 19987 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..df667b95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +educ 0 86 2.484907 0.000000 191 +david 0 71 2.639057 0.000000 232 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +advisor 0 51 2.995732 0.000000 355 +mark 0 44 3.135494 0.000000 403 +protocol 0 45 3.135494 0.000000 407 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 0 35 3.401197 0.000000 507 +hill 0 25 3.737670 0.000000 670 +supercomput 0 25 3.737670 0.000000 681 +programminglanguag 0 21 3.912023 0.000000 782 +fine 0 20 3.951244 0.000000 822 +steven 0 17 4.110874 0.000000 953 +asplo 0 17 4.110874 0.000000 948 +wood 0 11 4.553877 0.000000 1355 +grain 0 10 4.653960 0.000000 1448 +cook 0 10 4.653960 0.000000 1464 +laru 0 9 4.753590 0.000000 1560 +yanni 0 8 4.875197 0.000000 1713 +assistantdepart 0 8 4.875197 0.000000 1784 +sixth 0 7 5.010635 0.000000 1917 +roger 0 7 5.010635 0.000000 1892 +ann 0 6 5.164786 0.000000 2065 +ioanni 0 5 5.347108 0.000000 2553 +babak 0 5 5.347108 0.000000 2584 +falsafi 0 5 5.347108 0.000000 2585 +lebeck 0 5 5.347108 0.000000 2582 +reinhardt 0 5 5.347108 0.000000 2583 +schoina 1 4 5.568345 5.568345 3085 +alvin 0 4 5.568345 0.000000 3084 +crete 0 3 5.857933 0.000000 3773 +iraklio 0 1 6.957497 0.000000 19988 +systemspubl 0 1 6.957497 0.000000 19989 +cretan 0 1 6.957497 0.000000 19990 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..0cbc9530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +includ 0 208 1.609438 0.000000 42 +version 0 113 2.197225 0.000000 122 +degre 0 69 2.708050 0.000000 259 +differ 0 66 2.708050 0.000000 253 +special 0 56 2.890372 0.000000 320 +finger 0 52 2.995732 0.000000 354 +give 0 50 3.044522 0.000000 359 +could 0 46 3.091042 0.000000 383 +india 0 32 3.465736 0.000000 550 +mine 0 26 3.688879 0.000000 654 +wish 0 24 3.761200 0.000000 692 +instead 0 22 3.850148 0.000000 756 +grad 0 20 3.951244 0.000000 837 +account 0 18 4.060443 0.000000 882 +regist 0 17 4.110874 0.000000 938 +biologi 0 15 4.248495 0.000000 1049 +classic 0 14 4.317488 0.000000 1084 +danc 0 12 4.465908 0.000000 1278 +switch 0 8 4.875197 0.000000 1718 +keeper 0 5 5.347108 0.000000 2569 +keyboard 0 4 5.568345 0.000000 2970 +asian 0 3 5.857933 0.000000 3598 +southeast 0 2 6.263398 0.000000 6188 +asia 0 2 6.263398 0.000000 5952 +hairbal 0 2 6.263398 0.000000 6237 +beverli 0 1 6.957497 0.000000 19991 +seavei 0 1 6.957497 0.000000 19992 +ramayana 0 1 6.957497 0.000000 19993 +drama 0 1 6.957497 0.000000 19994 +ramakien 0 1 6.957497 0.000000 19995 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..d375e0f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +wisconsin 0 169 1.791759 0.000000 54 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +world 0 115 2.197225 0.000000 126 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +book 0 99 2.302585 0.000000 131 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +addit 0 74 2.639057 0.000000 228 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +back 0 60 2.833213 0.000000 297 +game 0 36 3.367296 0.000000 498 +eduoffic 0 33 3.433987 0.000000 531 +enjoi 0 26 3.688879 0.000000 660 +fact 0 21 3.912023 0.000000 780 +scott 0 18 4.060443 0.000000 884 +rate 0 15 4.248495 0.000000 1037 +english 0 15 4.248495 0.000000 1033 +hopefulli 0 14 4.317488 0.000000 1071 +franc 0 12 4.465908 0.000000 1276 +ball 0 9 4.753590 0.000000 1608 +drink 0 9 4.753590 0.000000 1607 +lock 0 9 4.753590 0.000000 1551 +poetri 0 9 4.753590 0.000000 1596 +absolut 0 8 4.875197 0.000000 1646 +dictionari 0 8 4.875197 0.000000 1642 +largest 0 7 5.010635 0.000000 1858 +seen 0 6 5.164786 0.000000 2202 +beer 0 6 5.164786 0.000000 2216 +soda 0 6 5.164786 0.000000 2189 +constitut 0 6 5.164786 0.000000 2026 +pagescott 0 4 5.568345 0.000000 2978 +chees 0 4 5.568345 0.000000 3090 +add 0 3 5.857933 0.000000 3131 +uwisc 0 2 6.263398 0.000000 4738 +caffein 0 2 6.263398 0.000000 5936 +thesauru 0 2 6.263398 0.000000 6238 +colvil 0 1 6.957497 0.000000 19996 +pagein 0 1 6.957497 0.000000 19997 +pickingand 0 1 6.957497 0.000000 19998 +artsi 0 1 6.957497 0.000000 19999 +roget 0 1 6.957497 0.000000 20000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..e738391f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +includ 0 208 1.609438 0.000000 42 +applic 0 170 1.791759 0.000000 56 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +analysi 0 124 2.079442 0.000000 98 +high 0 130 2.079442 0.000000 101 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +virtual 0 62 2.772589 0.000000 285 +robot 0 36 3.367296 0.000000 497 +anim 0 31 3.496508 0.000000 557 +steve 0 29 3.583519 0.000000 594 +task 0 25 3.737670 0.000000 678 +motion 1 24 3.761200 3.761200 699 +store 0 24 3.761200 0.000000 693 +input 0 23 3.806662 0.000000 727 +sequenc 0 23 3.806662 0.000000 734 +period 0 22 3.850148 0.000000 743 +modern 0 16 4.174387 0.000000 966 +devic 0 16 4.174387 0.000000 1002 +charact 0 15 4.248495 0.000000 1028 +track 0 15 4.248495 0.000000 1029 +chuck 0 14 4.317488 0.000000 1108 +directli 0 13 4.382027 0.000000 1141 +realiti 0 12 4.465908 0.000000 1272 +walk 0 12 4.465908 0.000000 1281 +motiv 0 11 4.553877 0.000000 1346 +realist 0 8 4.875197 0.000000 1665 +root 0 8 4.875197 0.000000 1650 +seitz 0 7 5.010635 0.000000 1976 +smile 0 7 5.010635 0.000000 1807 +infer 0 6 5.164786 0.000000 2040 +writeup 0 5 5.347108 0.000000 2352 +rigid 0 5 5.347108 0.000000 2432 +tocomput 0 3 5.857933 0.000000 3162 +endow 0 2 6.263398 0.000000 6234 +cue 0 2 6.263398 0.000000 5391 +anabstract 0 2 6.263398 0.000000 5491 +dyerour 0 1 6.957497 0.000000 20001 +teachinga 0 1 6.957497 0.000000 20002 +hasit 0 1 6.957497 0.000000 20003 +cartoon 0 1 6.957497 0.000000 20004 +teleconferenc 0 1 6.957497 0.000000 20005 +performa 0 1 6.957497 0.000000 20006 +repertoir 0 1 6.957497 0.000000 20007 +beinvok 0 1 6.957497 0.000000 20008 +cu 0 1 6.957497 0.000000 20009 +levelev 0 1 6.957497 0.000000 20010 +nonrigid 0 1 6.957497 0.000000 20011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..4f152b87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +recent 0 167 1.791759 0.000000 58 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +provid 0 121 2.079442 0.000000 94 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +imag 1 91 2.397895 2.397895 161 +center 0 88 2.397895 0.000000 158 +chang 0 82 2.484907 0.000000 163 +june 0 79 2.564949 0.000000 214 +workshop 0 71 2.639057 0.000000 239 +view 1 70 2.708050 2.708050 254 +differ 0 66 2.708050 0.000000 253 +creat 0 63 2.772589 0.000000 277 +interact 0 62 2.772589 0.000000 270 +guid 0 63 2.772589 0.000000 267 +three 0 54 2.944439 0.000000 330 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +right 0 48 3.044522 0.000000 363 +visual 0 48 3.044522 0.000000 372 +physic 0 47 3.091042 0.000000 377 +describ 0 45 3.135494 0.000000 400 +answer 0 45 3.135494 0.000000 391 +show 0 43 3.178054 0.000000 417 +autom 0 41 3.218876 0.000000 434 +theoret 0 39 3.258097 0.000000 446 +movi 0 40 3.258097 0.000000 459 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +procedur 0 36 3.367296 0.000000 488 +represent 0 35 3.401197 0.000000 512 +produc 0 30 3.555348 0.000000 572 +steve 0 29 3.583519 0.000000 594 +consid 0 29 3.583519 0.000000 590 +enjoi 0 26 3.688879 0.000000 660 +proc 0 26 3.688879 0.000000 649 +although 0 25 3.737670 0.000000 667 +known 0 24 3.761200 0.000000 702 +sequenc 0 23 3.806662 0.000000 734 +synthesi 0 20 3.951244 0.000000 834 +basi 0 20 3.951244 0.000000 828 +mpeg 0 20 3.951244 0.000000 831 +geometr 0 19 4.007333 0.000000 852 +left 0 19 4.007333 0.000000 851 +scene 0 14 4.317488 0.000000 1114 +chuck 0 14 4.317488 0.000000 1108 +shown 0 14 4.317488 0.000000 1080 +valid 0 11 4.553877 0.000000 1299 +devis 0 10 4.653960 0.000000 1451 +reli 0 10 4.653960 0.000000 1411 +certain 0 10 4.653960 0.000000 1393 +correspond 0 10 4.653960 0.000000 1382 +intermedi 0 9 4.753590 0.000000 1497 +establish 0 9 4.753590 0.000000 1532 +assumpt 0 9 4.753590 0.000000 1514 +pair 0 9 4.753590 0.000000 1503 +dyer 0 9 4.753590 0.000000 1573 +satisfi 0 8 4.875197 0.000000 1694 +interpol 1 7 5.010635 5.010635 1823 +seitz 0 7 5.010635 0.000000 1976 +morph 0 7 5.010635 0.000000 1937 +stereo 0 7 5.010635 0.000000 1818 +theproject 0 6 5.164786 0.000000 1981 +provabl 0 5 5.347108 0.000000 2558 +surprisingli 0 4 5.568345 0.000000 2609 +visibl 0 4 5.568345 0.000000 2994 +todetermin 0 3 5.857933 0.000000 3182 +widespread 0 2 6.263398 0.000000 4911 +viewsof 0 2 6.263398 0.000000 6135 +undergo 0 2 6.263398 0.000000 4253 +dyerw 0 1 6.957497 0.000000 20012 +graphicscommun 0 1 6.957497 0.000000 20013 +techniquescurr 0 1 6.957497 0.000000 20014 +validityha 0 1 6.957497 0.000000 20015 +ofthat 0 1 6.957497 0.000000 20016 +simplerectif 0 1 6.957497 0.000000 20017 +therectifi 0 1 6.957497 0.000000 20018 +theinterpol 0 1 6.957497 0.000000 20019 +computedinterpol 0 1 6.957497 0.000000 20020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..76b93cec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +click 0 142 1.945910 0.000000 78 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +high 0 130 2.079442 0.000000 101 +well 0 109 2.197225 0.000000 121 +techniqu 0 99 2.302585 0.000000 138 +imag 0 91 2.397895 0.000000 161 +call 0 91 2.397895 0.000000 153 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +wide 0 84 2.484907 0.000000 185 +appear 0 78 2.564949 0.000000 210 +view 0 70 2.708050 0.000000 254 +differ 0 66 2.708050 0.000000 253 +knowledg 0 67 2.708050 0.000000 243 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +simpl 0 60 2.833213 0.000000 298 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +basic 0 50 3.044522 0.000000 360 +principl 0 48 3.044522 0.000000 357 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +move 0 47 3.091042 0.000000 382 +howev 0 41 3.218876 0.000000 422 +movi 0 40 3.258097 0.000000 459 +correct 0 38 3.295837 0.000000 462 +represent 0 35 3.401197 0.000000 512 +manual 0 35 3.401197 0.000000 504 +transform 0 32 3.465736 0.000000 542 +often 0 31 3.496508 0.000000 551 +taken 0 31 3.496508 0.000000 555 +produc 0 30 3.555348 0.000000 572 +steve 0 29 3.583519 0.000000 594 +toward 0 25 3.737670 0.000000 668 +frame 0 24 3.761200 0.000000 684 +handl 0 24 3.761200 0.000000 685 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +mpeg 0 20 3.951244 0.000000 831 +render 0 17 4.110874 0.000000 947 +transit 0 15 4.248495 0.000000 1046 +photograph 0 15 4.248495 0.000000 1056 +reflect 0 15 4.248495 0.000000 1034 +scene 0 14 4.317488 0.000000 1114 +chuck 0 14 4.317488 0.000000 1108 +camera 0 14 4.317488 0.000000 1115 +draw 0 14 4.317488 0.000000 1086 +resolut 0 13 4.382027 0.000000 1172 +introduc 0 13 4.382027 0.000000 1139 +shape 0 12 4.465908 0.000000 1245 +abil 0 11 4.553877 0.000000 1341 +dyer 0 9 4.753590 0.000000 1573 +pose 0 9 4.753590 0.000000 1535 +correctli 0 9 4.753590 0.000000 1478 +face 0 9 4.753590 0.000000 1501 +siggraph 0 8 4.875197 0.000000 1773 +morph 1 7 5.010635 5.010635 1937 +interpol 0 7 5.010635 0.000000 1823 +seitz 0 7 5.010635 0.000000 1976 +viewpoint 0 6 5.164786 0.000000 2116 +difficult 0 6 5.164786 0.000000 2035 +simultan 0 6 5.164786 0.000000 2155 +jude 0 6 5.164786 0.000000 2123 +synthes 0 5 5.347108 0.000000 2451 +facial 0 5 5.347108 0.000000 2438 +shavlik 0 5 5.347108 0.000000 2429 +illus 0 4 5.568345 0.000000 2603 +mona 0 2 6.263398 0.000000 5786 +lisa 0 2 6.263398 0.000000 5427 +icpr 0 1 6.957497 0.000000 20021 +compel 0 1 6.957497 0.000000 20022 +betweenimag 0 1 6.957497 0.000000 20023 +causeunnatur 0 1 6.957497 0.000000 20024 +distort 0 1 6.957497 0.000000 20025 +projectivegeometri 0 1 6.957497 0.000000 20026 +morphingthat 0 1 6.957497 0.000000 20027 +prewarp 0 1 6.957497 0.000000 20028 +imagesprior 0 1 6.957497 0.000000 20029 +postwarp 0 1 6.957497 0.000000 20030 +appliedto 0 1 6.957497 0.000000 20031 +structureafford 0 1 6.957497 0.000000 20032 +imagetransform 0 1 6.957497 0.000000 20033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..c092e16f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +theori 0 111 2.197225 0.000000 127 +imag 0 91 2.397895 0.000000 161 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +refer 0 78 2.564949 0.000000 203 +line 0 75 2.639057 0.000000 231 +addit 0 74 2.639057 0.000000 228 +view 0 70 2.708050 0.000000 254 +previou 0 62 2.772589 0.000000 290 +import 0 65 2.772589 0.000000 282 +locat 0 59 2.833213 0.000000 303 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +run 0 51 2.995732 0.000000 347 +life 0 50 3.044522 0.000000 375 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +move 0 47 3.091042 0.000000 382 +could 0 46 3.091042 0.000000 383 +physic 0 47 3.091042 0.000000 377 +featur 0 46 3.091042 0.000000 386 +even 0 45 3.135494 0.000000 393 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +show 0 43 3.178054 0.000000 417 +error 0 40 3.258097 0.000000 449 +mean 0 37 3.332205 0.000000 477 +next 0 34 3.401197 0.000000 517 +represent 0 35 3.401197 0.000000 512 +singl 0 34 3.401197 0.000000 510 +human 0 32 3.465736 0.000000 546 +steve 0 29 3.583519 0.000000 594 +determin 0 27 3.637586 0.000000 630 +enhanc 0 26 3.688879 0.000000 644 +trace 0 25 3.737670 0.000000 677 +reliabl 0 25 3.737670 0.000000 674 +motion 0 24 3.761200 0.000000 699 +frame 0 24 3.761200 0.000000 684 +sequenc 0 23 3.806662 0.000000 734 +period 1 22 3.850148 3.850148 743 +defin 0 22 3.850148 0.000000 746 +identifi 0 22 3.850148 0.000000 760 +attempt 0 17 4.110874 0.000000 917 +medic 0 17 4.110874 0.000000 958 +spatial 0 16 4.174387 0.000000 988 +scene 0 14 4.317488 0.000000 1114 +chuck 0 14 4.317488 0.000000 1108 +camera 0 14 4.317488 0.000000 1115 +composit 0 13 4.382027 0.000000 1150 +whose 0 13 4.382027 0.000000 1166 +deriv 0 13 4.382027 0.000000 1145 +walk 0 12 4.465908 0.000000 1281 +cycl 0 11 4.553877 0.000000 1335 +instanc 0 11 4.553877 0.000000 1322 +moment 0 11 4.553877 0.000000 1379 +correspond 0 10 4.653960 0.000000 1382 +tempor 0 9 4.753590 0.000000 1584 +surfac 0 9 4.753590 0.000000 1574 +explicit 0 9 4.753590 0.000000 1525 +pure 0 8 4.875197 0.000000 1776 +invari 0 8 4.875197 0.000000 1748 +film 0 8 4.875197 0.000000 1761 +irregular 0 8 4.875197 0.000000 1768 +heart 0 8 4.875197 0.000000 1729 +seitz 0 7 5.010635 0.000000 1976 +compact 0 7 5.010635 0.000000 1907 +canb 0 7 5.010635 0.000000 1846 +bottom 0 7 5.010635 0.000000 1906 +appar 0 7 5.010635 0.000000 1958 +recov 0 6 5.164786 0.000000 2235 +furthermor 0 6 5.164786 0.000000 2141 +cyclic 0 5 5.347108 0.000000 2383 +skip 0 5 5.347108 0.000000 2402 +variat 0 5 5.347108 0.000000 2248 +affin 0 5 5.347108 0.000000 2378 +clickher 0 5 5.347108 0.000000 2428 +havedevelop 0 4 5.568345 0.000000 2681 +repeat 0 4 5.568345 0.000000 2798 +tend 0 4 5.568345 0.000000 3041 +visibl 0 4 5.568345 0.000000 2994 +fashion 0 3 5.857933 0.000000 3699 +unlik 0 2 6.263398 0.000000 5063 +slow 0 2 6.263398 0.000000 5341 +perfectli 0 2 6.263398 0.000000 5569 +poscript 0 1 6.957497 0.000000 20034 +turntabl 0 1 6.957497 0.000000 20035 +dyermani 0 1 6.957497 0.000000 20036 +locomotori 0 1 6.957497 0.000000 20037 +shuffl 0 1 6.957497 0.000000 20038 +areperiod 0 1 6.957497 0.000000 20039 +beenproduc 0 1 6.957497 0.000000 20040 +ourapproach 0 1 6.957497 0.000000 20041 +tracethi 0 1 6.957497 0.000000 20042 +imagesequ 0 1 6.957497 0.000000 20043 +phonograph 0 1 6.957497 0.000000 20044 +ramp 0 1 6.957497 0.000000 20045 +timewher 0 1 6.957497 0.000000 20046 +momentarili 0 1 6.957497 0.000000 20047 +shownsuperimpos 0 1 6.957497 0.000000 20048 +variesslightli 0 1 6.957497 0.000000 20049 +changesin 0 1 6.957497 0.000000 20050 +motionsthat 0 1 6.957497 0.000000 20051 +evolutionof 0 1 6.957497 0.000000 20052 +quantiti 0 1 6.957497 0.000000 20053 +asposit 0 1 6.957497 0.000000 20054 +veloc 0 1 6.957497 0.000000 20055 +delimit 0 1 6.957497 0.000000 20056 +correspondencesacross 0 1 6.957497 0.000000 20057 +parsinga 0 1 6.957497 0.000000 20058 +tracecan 0 1 6.957497 0.000000 20059 +fromdiffer 0 1 6.957497 0.000000 20060 +recoveredfrom 0 1 6.957497 0.000000 20061 +angiograph 0 1 6.957497 0.000000 20062 +additionalstructur 0 1 6.957497 0.000000 20063 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..c4d197ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +look 0 107 2.197225 0.000000 115 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +stuff 0 87 2.484907 0.000000 171 +chang 0 82 2.484907 0.000000 163 +exampl 0 77 2.564949 0.000000 195 +view 0 70 2.708050 0.000000 254 +juli 0 60 2.833213 0.000000 305 +frequent 0 49 3.044522 0.000000 367 +math 0 44 3.135494 0.000000 402 +show 0 43 3.178054 0.000000 417 +vision 0 41 3.218876 0.000000 430 +cach 0 41 3.218876 0.000000 432 +movi 0 40 3.258097 0.000000 459 +steve 0 29 3.583519 0.000000 594 +berkelei 0 26 3.688879 0.000000 657 +motion 0 24 3.761200 0.000000 699 +mpeg 0 20 3.951244 0.000000 831 +synthesi 0 20 3.951244 0.000000 834 +left 0 19 4.007333 0.000000 851 +render 0 17 4.110874 0.000000 947 +seitz 1 7 5.010635 5.010635 1976 +morph 0 7 5.010635 0.000000 1937 +interpol 0 7 5.010635 0.000000 1823 +cyclic 0 5 5.347108 0.000000 2383 +closer 0 2 6.263398 0.000000 6024 +surreal 0 1 6.957497 0.000000 20064 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..d68ac685 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +wisconsin 0 169 1.791759 0.000000 54 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +machin 0 129 2.079442 0.000000 95 +schedul 0 119 2.079442 0.000000 85 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +access 0 102 2.302585 0.000000 136 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +member 0 84 2.484907 0.000000 165 +librari 0 87 2.484907 0.000000 181 +school 0 84 2.484907 0.000000 188 +journal 0 83 2.484907 0.000000 183 +david 0 71 2.639057 0.000000 232 +line 0 75 2.639057 0.000000 231 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +dept 0 64 2.772589 0.000000 291 +prof 0 64 2.772589 0.000000 273 +abstract 0 62 2.772589 0.000000 276 +content 0 59 2.833213 0.000000 302 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +archiv 0 49 3.044522 0.000000 364 +mark 0 44 3.135494 0.000000 403 +describ 0 45 3.135494 0.000000 400 +directori 0 45 3.135494 0.000000 396 +math 0 44 3.135494 0.000000 402 +vision 0 41 3.218876 0.000000 430 +seminar 0 38 3.295837 0.000000 470 +robot 0 36 3.367296 0.000000 497 +richard 0 31 3.496508 0.000000 559 +domain 0 30 3.555348 0.000000 564 +held 0 28 3.610918 0.000000 600 +relev 0 26 3.688879 0.000000 637 +proc 0 26 3.688879 0.000000 649 +mostli 0 19 4.007333 0.000000 869 +agent 0 18 4.060443 0.000000 910 +ascii 0 15 4.248495 0.000000 1032 +biologi 0 15 4.248495 0.000000 1049 +doit 0 14 4.317488 0.000000 1111 +readabl 0 12 4.465908 0.000000 1258 +wendt 0 10 4.653960 0.000000 1446 +kevin 0 9 4.753590 0.000000 1482 +mangasarian 0 9 4.753590 0.000000 1570 +dataset 0 7 5.010635 0.000000 1914 +jude 0 6 5.164786 0.000000 2123 +extern 0 6 5.164786 0.000000 2105 +olvi 0 6 5.164786 0.000000 2109 +gopher 0 6 5.164786 0.000000 1982 +shavlik 0 5 5.347108 0.000000 2429 +bodner 0 5 5.347108 0.000000 2401 +testb 0 5 5.347108 0.000000 2456 +tina 0 3 5.857933 0.000000 3744 +breast 0 3 5.857933 0.000000 4033 +cancer 0 3 5.857933 0.000000 4032 +carolyn 0 2 6.263398 0.000000 6088 +allex 0 2 6.263398 0.000000 6087 +eliassi 0 2 6.263398 0.000000 6147 +mlrg 0 1 6.957497 0.000000 20065 +thememb 0 1 6.957497 0.000000 20066 +jonathon 0 1 6.957497 0.000000 20067 +cherkauer 0 1 6.957497 0.000000 20068 +craven 0 1 6.957497 0.000000 20069 +maclin 0 1 6.957497 0.000000 20070 +opitz 0 1 6.957497 0.000000 20071 +papersvisit 0 1 6.957497 0.000000 20072 +recentabstractsi 0 1 6.957497 0.000000 20073 +theoriesy 0 1 6.957497 0.000000 20074 +severalml 0 1 6.957497 0.000000 20075 +sgroup 0 1 6.957497 0.000000 20076 +neurosci 0 1 6.957497 0.000000 20077 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..ed94bc73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +memori 0 101 2.302585 0.000000 139 +west 0 83 2.484907 0.000000 192 +wide 0 84 2.484907 0.000000 185 +institut 0 84 2.484907 0.000000 187 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +simul 0 66 2.708050 0.000000 255 +street 0 63 2.772589 0.000000 293 +copi 0 63 2.772589 0.000000 284 +share 0 59 2.833213 0.000000 304 +space 0 57 2.890372 0.000000 310 +advisor 0 51 2.995732 0.000000 355 +right 0 48 3.044522 0.000000 363 +protocol 0 45 3.135494 0.000000 407 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +submit 0 39 3.258097 0.000000 440 +tutori 0 39 3.258097 0.000000 437 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +copyright 0 36 3.367296 0.000000 495 +random 0 34 3.401197 0.000000 511 +tech 0 35 3.401197 0.000000 515 +articl 0 33 3.433987 0.000000 530 +india 0 32 3.465736 0.000000 550 +dissert 0 32 3.465736 0.000000 549 +steve 0 29 3.583519 0.000000 594 +progress 0 28 3.610918 0.000000 598 +team 0 27 3.637586 0.000000 625 +hill 0 25 3.737670 0.000000 670 +departmentunivers 0 24 3.761200 0.000000 711 +indian 0 22 3.850148 0.000000 769 +cooper 0 22 3.850148 0.000000 757 +reserv 0 20 3.951244 0.000000 808 +wind 0 18 4.060443 0.000000 908 +hobbi 0 16 4.174387 0.000000 1009 +month 0 15 4.248495 0.000000 1025 +coher 0 14 4.317488 0.000000 1109 +danc 0 12 4.465908 0.000000 1278 +isca 0 11 4.553877 0.000000 1354 +correspond 0 10 4.653960 0.000000 1382 +queue 0 10 4.653960 0.000000 1386 +custom 0 10 4.653960 0.000000 1414 +tunnel 0 9 4.753590 0.000000 1615 +usaphon 0 9 4.753590 0.000000 1600 +jump 0 9 4.753590 0.000000 1603 +architect 0 8 4.875197 0.000000 1624 +kanpur 0 8 4.875197 0.000000 1744 +irregular 0 8 4.875197 0.000000 1768 +morph 0 7 5.010635 0.000000 1937 +courtesi 0 7 5.010635 0.000000 1953 +seitz 0 7 5.010635 0.000000 1976 +mukherje 0 5 5.347108 0.000000 2586 +button 0 5 5.347108 0.000000 2337 +commod 0 5 5.347108 0.000000 2415 +ppopp 0 4 5.568345 0.000000 2774 +shubhendu 0 3 5.857933 0.000000 4028 +badger 0 3 5.857933 0.000000 3502 +ballroom 0 3 5.857933 0.000000 3983 +shubu 0 2 6.263398 0.000000 6148 +fiance 0 2 6.263398 0.000000 5497 +nephew 0 2 6.263398 0.000000 5332 +dionisio 0 2 6.263398 0.000000 6203 +grai 0 2 6.263398 0.000000 4098 +mimi 0 1 6.957497 0.000000 20078 +avirup 0 1 6.957497 0.000000 20079 +linkseducationph 0 1 6.957497 0.000000 20080 +cachabl 0 1 6.957497 0.000000 20081 +dirsw 0 1 6.957497 0.000000 20082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..ab8f3f9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +fall 0 181 1.609438 0.000000 40 +book 0 99 2.302585 0.000000 131 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +new 0 64 2.772589 0.000000 262 +run 0 51 2.995732 0.000000 347 +movi 0 40 3.258097 0.000000 459 +seminar 0 38 3.295837 0.000000 470 +michael 1 35 3.401197 3.401197 514 +wai 0 25 3.737670 0.000000 662 +sport 0 25 3.737670 0.000000 683 +wonder 0 20 3.951244 0.000000 815 +club 0 15 4.248495 0.000000 1058 +philosophi 0 13 4.382027 0.000000 1167 +televis 0 6 5.164786 0.000000 2118 +wast 0 5 5.347108 0.000000 2537 +humor 0 5 5.347108 0.000000 2533 +midwest 0 2 6.263398 0.000000 6225 +siff 1 1 6.957497 6.957497 20083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..014ef4b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +februari 0 54 2.944439 0.000000 328 +departmentunivers 0 24 3.761200 0.000000 711 +edutelephon 0 10 4.653960 0.000000 1473 +studentcomput 0 7 5.010635 0.000000 1963 +skrentni 1 6 5.164786 5.164786 2104 +lecturerc 0 1 6.957497 0.000000 20084 +coordinatorgradu 0 1 6.957497 0.000000 20085 +sciencesemail 0 1 6.957497 0.000000 20086 +groupskrentni 0 1 6.957497 0.000000 20087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..32fb1976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +relat 0 139 1.945910 0.000000 68 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +help 0 83 2.484907 0.000000 175 +activ 0 84 2.484907 0.000000 182 +april 0 77 2.564949 0.000000 196 +intellig 0 72 2.639057 0.000000 225 +knowledg 0 67 2.708050 0.000000 243 +interact 0 62 2.772589 0.000000 270 +unix 0 58 2.890372 0.000000 308 +advisor 0 51 2.995732 0.000000 355 +represent 0 35 3.401197 0.000000 512 +human 0 32 3.465736 0.000000 546 +toward 0 25 3.737670 0.000000 668 +reliabl 0 25 3.737670 0.000000 674 +departmentunivers 0 24 3.761200 0.000000 711 +util 0 21 3.912023 0.000000 774 +miller 0 17 4.110874 0.000000 949 +step 0 13 4.382027 0.000000 1138 +larri 0 13 4.382027 0.000000 1142 +edutelephon 0 10 4.653960 0.000000 1473 +purdu 0 10 4.653960 0.000000 1466 +empir 0 8 4.875197 0.000000 1722 +studentcomput 0 7 5.010635 0.000000 1963 +bryan 0 5 5.347108 0.000000 2421 +travi 0 3 5.857933 0.000000 3985 +fredriksen 0 1 6.957497 0.000000 20139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..0a12474d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +technolog 0 131 2.079442 0.000000 102 +analysi 0 124 2.079442 0.000000 98 +manag 0 114 2.197225 0.000000 125 +center 0 88 2.397895 0.000000 158 +associ 0 93 2.397895 0.000000 151 +control 0 82 2.484907 0.000000 164 +larg 0 82 2.484907 0.000000 168 +west 0 83 2.484907 0.000000 192 +level 0 87 2.484907 0.000000 180 +activ 0 84 2.484907 0.000000 182 +journal 0 83 2.484907 0.000000 183 +chang 0 82 2.484907 0.000000 163 +issu 0 78 2.564949 0.000000 211 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +intellig 1 72 2.639057 2.639057 225 +summari 0 73 2.639057 0.000000 237 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +integr 0 67 2.708050 0.000000 245 +knowledg 0 67 2.708050 0.000000 243 +test 0 66 2.708050 0.000000 252 +artifici 0 63 2.772589 0.000000 280 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +automat 0 61 2.833213 0.000000 306 +special 0 56 2.890372 0.000000 320 +sever 0 56 2.890372 0.000000 322 +visual 0 48 3.044522 0.000000 372 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +could 0 46 3.091042 0.000000 383 +around 0 43 3.178054 0.000000 415 +examin 0 42 3.218876 0.000000 424 +form 0 39 3.258097 0.000000 443 +map 0 39 3.258097 0.000000 452 +societi 0 40 3.258097 0.000000 456 +formal 0 37 3.332205 0.000000 478 +procedur 0 36 3.367296 0.000000 488 +represent 0 35 3.401197 0.000000 512 +singl 0 34 3.401197 0.000000 510 +focus 0 29 3.583519 0.000000 584 +altern 0 26 3.688879 0.000000 641 +enhanc 0 26 3.688879 0.000000 644 +magazin 0 24 3.761200 0.000000 704 +departmentunivers 0 24 3.761200 0.000000 711 +pattern 0 24 3.761200 0.000000 689 +displai 0 23 3.806662 0.000000 712 +expert 0 20 3.951244 0.000000 833 +basi 0 20 3.951244 0.000000 828 +aid 0 18 4.060443 0.000000 904 +scott 0 18 4.060443 0.000000 884 +failur 0 18 4.060443 0.000000 898 +heterogen 0 14 4.317488 0.000000 1090 +chuck 0 14 4.317488 0.000000 1108 +larri 0 13 4.382027 0.000000 1142 +social 0 13 4.382027 0.000000 1123 +incorpor 0 13 4.382027 0.000000 1163 +deduct 0 12 4.465908 0.000000 1236 +edutelephon 0 10 4.653960 0.000000 1473 +angel 0 8 4.875197 0.000000 1779 +databasesystem 0 8 4.875197 0.000000 1617 +attent 0 8 4.875197 0.000000 1651 +philosoph 0 7 5.010635 0.000000 1904 +geograph 0 6 5.164786 0.000000 2236 +augment 0 5 5.347108 0.000000 2350 +bryan 0 5 5.347108 0.000000 2421 +implic 0 4 5.568345 0.000000 2696 +andi 0 4 5.568345 0.000000 3081 +travi 0 3 5.857933 0.000000 3985 +metaphor 0 3 5.857933 0.000000 4038 +landscap 0 3 5.857933 0.000000 3525 +waysthat 0 2 6.263398 0.000000 5445 +andwith 0 2 6.263398 0.000000 5051 +derek 0 2 6.263398 0.000000 4537 +travisprofessorcomput 0 1 6.957497 0.000000 20140 +californa 0 1 6.957497 0.000000 20141 +ofartifici 0 1 6.957497 0.000000 20142 +automaticdeduct 0 1 6.957497 0.000000 20143 +contruct 0 1 6.957497 0.000000 20144 +informationcontain 0 1 6.957497 0.000000 20145 +beingdevot 0 1 6.957497 0.000000 20146 +visualiz 0 1 6.957497 0.000000 20147 +organiz 0 1 6.957497 0.000000 20148 +suppositionsunderli 0 1 6.957497 0.000000 20149 +ohar 0 1 6.957497 0.000000 20150 +swanson 0 1 6.957497 0.000000 20151 +whitsitt 0 1 6.957497 0.000000 20152 +zahn 0 1 6.957497 0.000000 20153 +oravec 0 1 6.957497 0.000000 20154 +reflex 0 1 6.957497 0.000000 20155 +falsework 0 1 6.957497 0.000000 20156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..0569dec7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +compil 0 122 2.079442 0.000000 96 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +center 0 88 2.397895 0.000000 158 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +free 0 73 2.639057 0.000000 224 +java 0 70 2.708050 0.000000 248 +street 0 63 2.772589 0.000000 293 +new 0 64 2.772589 0.000000 262 +juli 0 60 2.833213 0.000000 305 +directori 0 45 3.135494 0.000000 396 +edit 0 42 3.218876 0.000000 418 +download 0 36 3.367296 0.000000 489 +tech 0 35 3.401197 0.000000 515 +random 0 34 3.401197 0.000000 511 +india 0 32 3.465736 0.000000 550 +packag 0 28 3.610918 0.000000 614 +indian 0 22 3.850148 0.000000 769 +rank 0 14 4.317488 0.000000 1063 +multiscalar 0 8 4.875197 0.000000 1783 +cricket 0 7 5.010635 0.000000 1945 +kestrel 0 4 5.568345 0.000000 2990 +batch 0 4 5.568345 0.000000 2700 +will 0 4 5.568345 0.000000 2782 +avinash 0 3 5.857933 0.000000 3510 +mate 0 3 5.857933 0.000000 3127 +hindu 0 3 5.857933 0.000000 3590 +sodani 0 2 6.263398 0.000000 4803 +kharagpur 0 2 6.263398 0.000000 6236 +toll 0 2 6.263398 0.000000 6149 +hon 0 1 6.957497 0.000000 20088 +kgpite 0 1 6.957497 0.000000 20089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..56f6374c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,253 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +place 0 106 2.197225 0.000000 124 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +proceed 0 93 2.397895 0.000000 152 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +educ 0 86 2.484907 0.000000 191 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +summari 0 73 2.639057 0.000000 237 +effici 0 73 2.639057 0.000000 233 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +simul 0 66 2.708050 0.000000 255 +window 0 68 2.708050 0.000000 242 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +septemb 0 65 2.772589 0.000000 274 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +talk 0 53 2.944439 0.000000 336 +investig 0 51 2.995732 0.000000 353 +hardwar 0 51 2.995732 0.000000 350 +pointer 0 48 3.044522 0.000000 368 +numer 0 49 3.044522 0.000000 369 +set 0 50 3.044522 0.000000 361 +electron 0 47 3.091042 0.000000 379 +possibl 0 47 3.091042 0.000000 378 +understand 0 47 3.091042 0.000000 384 +could 0 46 3.091042 0.000000 383 +execut 0 45 3.135494 0.000000 404 +mechan 0 43 3.178054 0.000000 416 +futur 0 41 3.218876 0.000000 427 +cach 0 41 3.218876 0.000000 432 +fast 0 42 3.218876 0.000000 429 +combin 0 42 3.218876 0.000000 421 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +transact 0 39 3.258097 0.000000 438 +continu 0 39 3.258097 0.000000 448 +multipl 0 39 3.258097 0.000000 453 +error 0 40 3.258097 0.000000 449 +electr 0 38 3.295837 0.000000 461 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +given 0 32 3.465736 0.000000 538 +india 0 32 3.465736 0.000000 550 +concept 0 32 3.465736 0.000000 537 +exist 0 30 3.555348 0.000000 569 +focus 0 29 3.583519 0.000000 584 +depend 0 29 3.583519 0.000000 583 +multiprocessor 0 28 3.610918 0.000000 605 +load 0 28 3.610918 0.000000 601 +held 0 28 3.610918 0.000000 600 +arrai 0 27 3.637586 0.000000 627 +detect 0 26 3.688879 0.000000 646 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +request 0 26 3.688879 0.000000 635 +flow 0 24 3.761200 0.000000 700 +compress 0 23 3.806662 0.000000 719 +reduc 0 22 3.850148 0.000000 759 +sequenti 0 22 3.850148 0.000000 745 +hierarchi 0 22 3.850148 0.000000 744 +chip 0 21 3.912023 0.000000 770 +exploit 0 20 3.951244 0.000000 836 +fine 0 20 3.951244 0.000000 822 +department 0 20 3.951244 0.000000 839 +smith 0 20 3.951244 0.000000 820 +predict 0 19 4.007333 0.000000 855 +scott 0 18 4.060443 0.000000 884 +regist 0 17 4.110874 0.000000 938 +interconnect 0 17 4.110874 0.000000 937 +expand 0 17 4.110874 0.000000 928 +micro 0 15 4.248495 0.000000 1031 +eduphon 0 15 4.248495 0.000000 1060 +novel 0 15 4.248495 0.000000 1039 +todd 0 15 4.248495 0.000000 1051 +achiev 0 14 4.317488 0.000000 1088 +split 0 14 4.317488 0.000000 1078 +translat 0 13 4.382027 0.000000 1164 +sigplan 0 13 4.382027 0.000000 1190 +incorpor 0 13 4.382027 0.000000 1163 +prolog 0 13 4.382027 0.000000 1155 +target 0 12 4.465908 0.000000 1282 +calcul 0 12 4.465908 0.000000 1268 +cycl 0 11 4.553877 0.000000 1335 +branch 0 11 4.553877 0.000000 1318 +bandwidth 0 11 4.553877 0.000000 1365 +arbitrari 0 11 4.553877 0.000000 1359 +franklin 0 10 4.653960 0.000000 1436 +grain 0 10 4.653960 0.000000 1448 +traffic 0 10 4.653960 0.000000 1421 +inter 0 9 4.753590 0.000000 1530 +multiscalar 0 8 4.875197 0.000000 1783 +character 0 8 4.875197 0.000000 1767 +paradigm 0 8 4.875197 0.000000 1662 +elect 0 8 4.875197 0.000000 1771 +uniprocessor 0 8 4.875197 0.000000 1696 +potenti 0 8 4.875197 0.000000 1690 +watson 0 8 4.875197 0.000000 1691 +illinoi 0 7 5.010635 0.000000 1941 +zero 0 7 5.010635 0.000000 1896 +goodman 0 7 5.010635 0.000000 1891 +serial 0 7 5.010635 0.000000 1975 +friedman 0 7 5.010635 0.000000 1886 +chiang 0 7 5.010635 0.000000 1853 +sohi 1 6 5.164786 5.164786 2237 +microarchitectur 0 6 5.164786 0.000000 2238 +risc 0 6 5.164786 0.000000 2016 +superscalar 0 6 5.164786 0.000000 2082 +handbook 0 6 5.164786 0.000000 2061 +guri 0 5 5.347108 0.000000 2578 +andrea 0 5 5.347108 0.000000 2375 +lebeck 0 5 5.347108 0.000000 2582 +highest 0 4 5.568345 0.000000 2950 +resolv 0 4 5.568345 0.000000 2675 +height 0 4 5.568345 0.000000 2890 +appendix 0 4 5.568345 0.000000 2739 +crai 0 4 5.568345 0.000000 3012 +breach 0 3 5.857933 0.000000 4009 +vijaykumar 0 3 5.857933 0.000000 4011 +urbana 0 3 5.857933 0.000000 3879 +ordinari 0 3 5.857933 0.000000 3233 +streamlin 0 3 5.857933 0.000000 3573 +bulk 0 3 5.857933 0.000000 4000 +thedevelop 0 3 5.857933 0.000000 3903 +reorder 0 3 5.857933 0.000000 3952 +anatomi 0 3 5.857933 0.000000 4010 +chow 0 3 5.857933 0.000000 3281 +pnevmatikato 0 2 6.263398 0.000000 6204 +guard 0 2 6.263398 0.000000 5738 +gurindar 0 2 6.263398 0.000000 6110 +andelectr 0 2 6.263398 0.000000 6200 +birla 0 2 6.263398 0.000000 6239 +pilani 0 2 6.263398 0.000000 6240 +plenti 0 2 6.263398 0.000000 5465 +sustain 0 2 6.263398 0.000000 6201 +needto 0 2 6.263398 0.000000 4927 +andhow 0 2 6.263398 0.000000 5933 +expend 0 2 6.263398 0.000000 5451 +moshovo 0 2 6.263398 0.000000 6211 +inrd 0 2 6.263398 0.000000 4531 +tetra 0 2 6.263398 0.000000 5196 +framemak 0 1 6.957497 0.000000 20090 +graduatesaddress 0 1 6.957497 0.000000 20091 +usasohi 0 1 6.957497 0.000000 20092 +thehighest 0 1 6.957497 0.000000 20093 +circa 0 1 6.957497 0.000000 20094 +transistor 0 1 6.957497 0.000000 20095 +availableon 0 1 6.957497 0.000000 20096 +getth 0 1 6.957497 0.000000 20097 +ofov 0 1 6.957497 0.000000 20098 +thenatur 0 1 6.957497 0.000000 20099 +numericappl 0 1 6.957497 0.000000 20100 +andcarri 0 1 6.957497 0.000000 20101 +assessth 0 1 6.957497 0.000000 20102 +vijaykumarrec 0 1 6.957497 0.000000 20103 +talkswil 0 1 6.957497 0.000000 20104 +researchcent 0 1 6.957497 0.000000 20105 +yorktown 0 1 6.957497 0.000000 20106 +publicationshigh 0 1 6.957497 0.000000 20107 +ofdetail 0 1 6.957497 0.000000 20108 +resultsi 0 1 6.957497 0.000000 20109 +latencyt 0 1 6.957497 0.000000 20110 +processorsj 0 1 6.957497 0.000000 20111 +referencesm 0 1 6.957497 0.000000 20112 +communicationin 0 1 6.957497 0.000000 20113 +errorst 0 1 6.957497 0.000000 20114 +processorsd 0 1 6.957497 0.000000 20115 +knapsack 0 1 6.957497 0.000000 20116 +componentt 0 1 6.957497 0.000000 20117 +processorst 0 1 6.957497 0.000000 20118 +gradstodd 0 1 6.957497 0.000000 20119 +latencydionisio 0 1 6.957497 0.000000 20120 +setsmanoj 0 1 6.957497 0.000000 20121 +architecturemark 0 1 6.957497 0.000000 20122 +executionsriram 0 1 6.957497 0.000000 20123 +vajapeyam 0 1 6.957497 0.000000 20124 +processormen 0 1 6.957497 0.000000 20125 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..b1151939 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +object 0 138 1.945910 0.000000 79 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +report 0 131 2.079442 0.000000 92 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +spring 0 131 2.079442 0.000000 88 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +graphic 0 90 2.397895 0.000000 147 +environ 0 84 2.484907 0.000000 177 +west 0 83 2.484907 0.000000 192 +larg 0 82 2.484907 0.000000 168 +june 0 79 2.564949 0.000000 214 +appear 0 78 2.564949 0.000000 210 +orient 0 80 2.564949 0.000000 205 +april 0 77 2.564949 0.000000 196 +interfac 0 79 2.564949 0.000000 209 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +david 0 71 2.639057 0.000000 232 +workshop 0 71 2.639057 0.000000 239 +logic 0 71 2.639057 0.000000 230 +java 0 70 2.708050 0.000000 248 +street 0 63 2.772589 0.000000 293 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +space 0 57 2.890372 0.000000 310 +overview 0 56 2.890372 0.000000 323 +point 0 58 2.890372 0.000000 319 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +paul 0 38 3.295837 0.000000 471 +michael 0 35 3.401197 0.000000 514 +independ 0 32 3.465736 0.000000 548 +chair 0 29 3.583519 0.000000 596 +built 0 29 3.583519 0.000000 592 +univ 0 28 3.610918 0.000000 617 +proc 0 26 3.688879 0.000000 649 +todai 0 25 3.737670 0.000000 672 +inth 0 22 3.850148 0.000000 741 +sigmod 0 19 4.007333 0.000000 877 +adam 0 17 4.110874 0.000000 934 +former 0 17 4.110874 0.000000 956 +white 0 17 4.110874 0.000000 951 +fourth 0 16 4.174387 0.000000 999 +dilbert 0 16 4.174387 0.000000 996 +configur 0 15 4.248495 0.000000 1012 +conf 0 13 4.382027 0.000000 1181 +dewitt 0 12 4.465908 0.000000 1270 +nanci 0 12 4.465908 0.000000 1256 +daniel 0 12 4.465908 0.000000 1233 +persist 0 11 4.553877 0.000000 1367 +shore 0 11 4.553877 0.000000 1377 +vldb 0 10 4.653960 0.000000 1470 +franklin 0 10 4.653960 0.000000 1436 +naughton 0 10 4.653960 0.000000 1450 +jeffrei 0 9 4.753590 0.000000 1612 +solomon 1 8 4.875197 4.875197 1716 +carei 0 8 4.875197 0.000000 1781 +ioannidi 0 8 4.875197 0.000000 1714 +goodman 0 7 5.010635 0.000000 1891 +tsatalo 0 5 5.347108 0.000000 2581 +marvin 0 4 5.568345 0.000000 2806 +mcauliff 0 4 5.568345 0.000000 3083 +schuh 0 3 5.857933 0.000000 4014 +gmap 0 2 6.263398 0.000000 6241 +versatil 0 2 6.263398 0.000000 6242 +seth 0 2 6.263398 0.000000 4998 +andmarvin 0 1 6.957497 0.000000 20126 +astech 0 1 6.957497 0.000000 20127 +odyssea 0 1 6.957497 0.000000 20128 +publicationstoward 0 1 6.957497 0.000000 20129 +abstractpostscriptth 0 1 6.957497 0.000000 20130 +andyanni 0 1 6.957497 0.000000 20131 +abstractpostscriptexpand 0 1 6.957497 0.000000 20132 +journalv 0 1 6.957497 0.000000 20133 +abstractpostscriptshor 0 1 6.957497 0.000000 20134 +andmichael 0 1 6.957497 0.000000 20135 +zwillingavail 0 1 6.957497 0.000000 20136 +capitl 0 1 6.957497 0.000000 20137 +photoalbum 0 1 6.957497 0.000000 20138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..edd9ec46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +welcom 0 122 2.079442 0.000000 99 +sowmya 1 4 5.568345 5.568345 2670 +subramanian 0 2 6.263398 0.000000 5666 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..97ad9545 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +relat 0 139 1.945910 0.000000 68 +first 0 140 1.945910 0.000000 71 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +stuff 0 87 2.484907 0.000000 171 +info 0 85 2.484907 0.000000 176 +second 0 81 2.484907 0.000000 166 +resum 0 79 2.564949 0.000000 217 +want 0 79 2.564949 0.000000 199 +june 0 79 2.564949 0.000000 214 +html 0 75 2.639057 0.000000 235 +meet 0 72 2.639057 0.000000 229 +java 0 70 2.708050 0.000000 248 +juli 0 60 2.833213 0.000000 305 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +finger 0 52 2.995732 0.000000 354 +cool 0 49 3.044522 0.000000 374 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +decis 0 23 3.806662 0.000000 728 +love 0 21 3.912023 0.000000 804 +women 0 16 4.174387 0.000000 1004 +ascii 0 15 4.248495 0.000000 1032 +philadelphia 0 12 4.465908 0.000000 1244 +guest 0 12 4.465908 0.000000 1220 +pagei 0 8 4.875197 0.000000 1683 +judg 0 8 4.875197 0.000000 1644 +constitut 0 6 5.164786 0.000000 2026 +sail 0 5 5.347108 0.000000 2571 +panel 0 5 5.347108 0.000000 2463 +vote 0 4 5.568345 0.000000 2953 +lawand 0 2 6.263398 0.000000 6191 +stuffa 0 2 6.263398 0.000000 5999 +resours 0 2 6.263398 0.000000 5211 +serverth 0 2 6.263398 0.000000 4448 +hoofer 0 2 6.263398 0.000000 6101 +shilpa 1 1 6.957497 6.957497 20157 +pastfor 0 1 6.957497 0.000000 20158 +schoolher 0 1 6.957497 0.000000 20159 +syster 0 1 6.957497 0.000000 20160 +madisonsurf 0 1 6.957497 0.000000 20161 +madisonst 0 1 6.957497 0.000000 20162 +clubowl 0 1 6.957497 0.000000 20163 +signatur 0 1 6.957497 0.000000 20164 +lovesnowi 0 1 6.957497 0.000000 20165 +linksher 0 1 6.957497 0.000000 20166 +iswher 0 1 6.957497 0.000000 20167 +tossl 0 1 6.957497 0.000000 20168 +shilpal 0 1 6.957497 0.000000 20169 +thru 0 1 6.957497 0.000000 20170 +formlast 0 1 6.957497 0.000000 20171 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..323cd094 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +teach 0 108 2.197225 0.000000 112 +take 0 97 2.302585 0.000000 134 +section 0 94 2.397895 0.000000 149 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +sport 0 25 3.737670 0.000000 683 +pageth 0 7 5.010635 0.000000 1939 +jeremi 0 5 5.347108 0.000000 2360 +simpson 0 2 6.263398 0.000000 5994 +stenglein 0 1 6.957497 0.000000 20172 +stenglei 0 1 6.957497 0.000000 20173 +pageespn 0 1 6.957497 0.000000 20174 +hotwir 0 1 6.957497 0.000000 20175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..a7e6fcf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +phone 0 175 1.791759 0.000000 45 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +west 0 83 2.484907 0.000000 192 +june 0 79 2.564949 0.000000 214 +david 0 71 2.639057 0.000000 232 +free 0 73 2.639057 0.000000 224 +summari 0 73 2.639057 0.000000 237 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +faculti 0 56 2.890372 0.000000 325 +advisor 0 51 2.995732 0.000000 355 +finger 0 52 2.995732 0.000000 354 +mark 0 44 3.135494 0.000000 403 +join 0 39 3.258097 0.000000 457 +streetmadison 0 38 3.295837 0.000000 474 +feel 0 37 3.332205 0.000000 483 +sciencesunivers 0 37 3.332205 0.000000 486 +ofth 0 36 3.367296 0.000000 491 +often 0 31 3.496508 0.000000 551 +steve 0 29 3.583519 0.000000 594 +although 0 25 3.737670 0.000000 667 +hill 0 25 3.737670 0.000000 670 +finish 0 22 3.850148 0.000000 748 +tell 0 21 3.912023 0.000000 777 +wind 0 18 4.060443 0.000000 908 +steven 0 17 4.110874 0.000000 953 +wood 0 11 4.553877 0.000000 1355 +michigan 0 11 4.553877 0.000000 1368 +laru 0 9 4.753590 0.000000 1560 +reinhardt 1 5 5.347108 5.347108 2583 +computerarchitectur 0 5 5.347108 0.000000 2290 +publicationsresearch 0 4 5.568345 0.000000 2876 +eec 0 2 6.263398 0.000000 5981 +tunnelgroup 0 1 6.957497 0.000000 20176 +andjim 0 1 6.957497 0.000000 20177 +mewhat 0 1 6.957497 0.000000 20178 +stever 0 1 6.957497 0.000000 20179 +usalast 0 1 6.957497 0.000000 20180 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..9e38c0a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +search 0 95 2.397895 0.000000 155 +west 0 83 2.484907 0.000000 192 +exam 0 86 2.484907 0.000000 169 +stuff 0 87 2.484907 0.000000 171 +solv 0 73 2.639057 0.000000 234 +nation 0 74 2.639057 0.000000 240 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +talk 0 53 2.944439 0.000000 336 +numer 0 49 3.044522 0.000000 369 +telephon 0 50 3.044522 0.000000 373 +show 0 43 3.178054 0.000000 417 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +field 0 37 3.332205 0.000000 482 +john 0 33 3.433987 0.000000 532 +begin 0 23 3.806662 0.000000 716 +rate 0 15 4.248495 0.000000 1037 +qualifi 0 8 4.875197 0.000000 1721 +chicago 0 6 5.164786 0.000000 2149 +fluid 0 5 5.347108 0.000000 2440 +kid 0 5 5.347108 0.000000 2516 +nathan 0 4 5.568345 0.000000 2794 +radio 0 4 5.568345 0.000000 3025 +car 0 4 5.568345 0.000000 2931 +drew 0 4 5.568345 0.000000 2980 +museum 0 3 5.857933 0.000000 3933 +pageoth 0 2 6.263398 0.000000 6104 +strikwerda 0 1 6.957497 0.000000 20181 +strikwerdadepart 0 1 6.957497 0.000000 20182 +strik 0 1 6.957497 0.000000 20183 +dynamicsmyoffici 0 1 6.957497 0.000000 20184 +tribun 0 1 6.957497 0.000000 20185 +footballmi 0 1 6.957497 0.000000 20186 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..45aad437 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +thing 0 84 2.484907 0.000000 189 +david 0 71 2.639057 0.000000 232 +evalu 0 64 2.772589 0.000000 266 +processor 0 54 2.944439 0.000000 335 +execut 0 45 3.135494 0.000000 404 +show 0 43 3.178054 0.000000 417 +cach 0 41 3.218876 0.000000 432 +late 0 40 3.258097 0.000000 439 +enjoi 0 26 3.688879 0.000000 660 +cambridg 0 16 4.174387 0.000000 1008 +stream 0 15 4.248495 0.000000 1015 +buffer 0 12 4.465908 0.000000 1211 +calvin 0 9 4.753590 0.000000 1518 +replac 0 8 4.875197 0.000000 1668 +integ 0 8 4.875197 0.000000 1688 +secondari 0 7 5.010635 0.000000 1884 +histor 0 6 5.164786 0.000000 2085 +superscalar 0 6 5.164786 0.000000 2082 +hobb 0 4 5.568345 0.000000 2893 +decoupl 0 4 5.568345 0.000000 2898 +letterman 0 3 5.857933 0.000000 3931 +seinfeld 0 3 5.857933 0.000000 3958 +subba 0 2 6.263398 0.000000 6091 +subbarao 0 2 6.263398 0.000000 6205 +prooocessor 0 1 6.957497 0.000000 20187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..745c9946 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +schedul 0 119 2.079442 0.000000 85 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +search 0 95 2.397895 0.000000 155 +resourc 0 81 2.484907 0.000000 172 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +complet 0 77 2.564949 0.000000 208 +dynam 0 76 2.564949 0.000000 194 +april 0 77 2.564949 0.000000 196 +sourc 0 77 2.564949 0.000000 201 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +polici 0 64 2.772589 0.000000 279 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +guid 0 63 2.772589 0.000000 267 +processor 0 54 2.944439 0.000000 335 +still 0 50 3.044522 0.000000 362 +bibliographi 0 34 3.401197 0.000000 518 +limit 0 29 3.583519 0.000000 585 +measur 0 28 3.610918 0.000000 609 +static 0 27 3.637586 0.000000 619 +proc 0 26 3.688879 0.000000 649 +subject 0 26 3.688879 0.000000 647 +strategi 0 25 3.737670 0.000000 682 +yahoo 0 24 3.761200 0.000000 707 +alloc 0 20 3.951244 0.000000 821 +thur 0 19 4.007333 0.000000 847 +taiwan 1 16 4.174387 4.174387 1006 +sigmetr 0 13 4.382027 0.000000 1173 +conf 0 13 4.382027 0.000000 1181 +mari 0 12 4.465908 0.000000 1266 +characterist 0 12 4.465908 0.000000 1257 +shop 0 10 4.653960 0.000000 1469 +vernon 0 9 4.753590 0.000000 1556 +job 0 8 4.875197 0.000000 1702 +calendar 0 8 4.875197 0.000000 1649 +chiang 0 7 5.010635 0.000000 1853 +quantum 0 6 5.164786 0.000000 2214 +academia 0 6 5.164786 0.000000 2036 +ta 0 4 5.568345 0.000000 3058 +nashvil 0 4 5.568345 0.000000 2867 +sinanet 0 4 5.568345 0.000000 2883 +suhui 0 3 5.857933 0.000000 3430 +educlick 0 3 5.857933 0.000000 3612 +rajesh 0 3 5.857933 0.000000 3511 +conjunct 0 3 5.857933 0.000000 3743 +ipp 0 3 5.857933 0.000000 3381 +sinica 0 3 5.857933 0.000000 3819 +preemption 0 2 6.263398 0.000000 6230 +mansharamani 0 2 6.263398 0.000000 6231 +catalogu 0 2 6.263398 0.000000 6166 +magzin 0 2 6.263398 0.000000 5614 +madisonoffic 0 1 6.957497 0.000000 20188 +stelephon 0 1 6.957497 0.000000 20189 +emailoffic 0 1 6.957497 0.000000 20190 +seednet 0 1 6.957497 0.000000 20191 +vistor 0 1 6.957497 0.000000 20192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..9c606235 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,193 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +site 0 106 2.197225 0.000000 119 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +homepag 0 93 2.397895 0.000000 148 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +select 0 91 2.397895 0.000000 154 +follow 0 92 2.397895 0.000000 143 +info 0 85 2.484907 0.000000 176 +institut 0 84 2.484907 0.000000 187 +contain 0 81 2.484907 0.000000 174 +sourc 0 77 2.564949 0.000000 201 +david 0 71 2.639057 0.000000 232 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +view 0 70 2.708050 0.000000 254 +new 0 64 2.772589 0.000000 262 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +foundat 0 62 2.772589 0.000000 286 +written 0 63 2.772589 0.000000 278 +back 0 60 2.833213 0.000000 297 +variou 0 56 2.890372 0.000000 317 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +publish 0 57 2.890372 0.000000 326 +scientif 0 53 2.944439 0.000000 341 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +numer 0 49 3.044522 0.000000 369 +physic 0 47 3.091042 0.000000 377 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +show 0 43 3.178054 0.000000 417 +edit 0 42 3.218876 0.000000 418 +review 0 42 3.218876 0.000000 425 +realli 0 40 3.258097 0.000000 444 +late 0 40 3.258097 0.000000 439 +movi 0 40 3.258097 0.000000 459 +hand 0 37 3.332205 0.000000 475 +print 0 34 3.401197 0.000000 503 +articl 0 33 3.433987 0.000000 530 +obtain 0 33 3.433987 0.000000 534 +within 0 33 3.433987 0.000000 525 +india 0 32 3.465736 0.000000 550 +scientist 0 31 3.496508 0.000000 560 +steve 0 29 3.583519 0.000000 594 +weather 0 28 3.610918 0.000000 618 +american 0 27 3.637586 0.000000 634 +great 0 27 3.637586 0.000000 626 +william 0 22 3.850148 0.000000 765 +tell 0 21 3.912023 0.000000 777 +ever 0 19 4.007333 0.000000 872 +hypertext 0 19 4.007333 0.000000 865 +histori 0 19 4.007333 0.000000 853 +thoma 0 18 4.060443 0.000000 901 +regist 0 17 4.110874 0.000000 938 +weekli 0 17 4.110874 0.000000 919 +upon 0 16 4.174387 0.000000 978 +choos 0 16 4.174387 0.000000 964 +brief 0 16 4.174387 0.000000 1001 +dilbert 0 16 4.174387 0.000000 996 +advantag 0 16 4.174387 0.000000 987 +piec 0 15 4.248495 0.000000 1020 +floor 0 14 4.317488 0.000000 1070 +camera 0 14 4.317488 0.000000 1115 +care 0 13 4.382027 0.000000 1177 +wait 0 13 4.382027 0.000000 1168 +reader 0 12 4.465908 0.000000 1246 +outsid 0 12 4.465908 0.000000 1219 +newspap 0 12 4.465908 0.000000 1280 +smart 0 11 4.553877 0.000000 1352 +see 0 11 4.553877 0.000000 1337 +fill 0 11 4.553877 0.000000 1349 +santa 0 10 4.653960 0.000000 1441 +jump 0 9 4.753590 0.000000 1603 +joke 0 8 4.875197 0.000000 1620 +philosoph 0 7 5.010635 0.000000 1904 +channel 0 7 5.010635 0.000000 1836 +dedic 0 7 5.010635 0.000000 1843 +centuri 0 7 5.010635 0.000000 1935 +smaller 0 7 5.010635 0.000000 1874 +tri 0 6 5.164786 0.000000 2166 +strip 0 6 5.164786 0.000000 2203 +somewher 0 6 5.164786 0.000000 2176 +artist 0 6 5.164786 0.000000 2127 +feet 0 5 5.347108 0.000000 2492 +optimist 0 5 5.347108 0.000000 2501 +push 0 4 5.568345 0.000000 2635 +climb 0 4 5.568345 0.000000 2936 +surviv 0 4 5.568345 0.000000 2734 +writer 0 4 5.568345 0.000000 2783 +observatori 0 4 5.568345 0.000000 3070 +ultra 0 4 5.568345 0.000000 2889 +sundaram 0 3 5.857933 0.000000 3463 +astronomi 0 3 5.857933 0.000000 3974 +blind 0 3 5.857933 0.000000 3662 +scream 0 3 5.857933 0.000000 3609 +knee 0 3 5.857933 0.000000 3980 +woman 0 3 5.857933 0.000000 3539 +lunch 0 3 5.857933 0.000000 3369 +beat 0 3 5.857933 0.000000 3840 +packer 0 3 5.857933 0.000000 3728 +dozen 0 3 5.857933 0.000000 3905 +hindu 0 3 5.857933 0.000000 3590 +stukel 0 2 6.263398 0.000000 4698 +patient 0 2 6.263398 0.000000 6223 +spurt 0 2 6.263398 0.000000 5464 +crawl 0 2 6.263398 0.000000 5561 +damn 0 2 6.263398 0.000000 6129 +nake 0 2 6.263398 0.000000 6197 +cloth 0 2 6.263398 0.000000 5884 +conscious 0 2 6.263398 0.000000 5954 +destruct 0 2 6.263398 0.000000 6232 +phoenix 0 2 6.263398 0.000000 4552 +reward 0 2 6.263398 0.000000 5402 +disinform 0 2 6.263398 0.000000 5494 +trendi 0 2 6.263398 0.000000 4717 +browbeck 0 1 6.957497 0.000000 20193 +effronteri 0 1 6.957497 0.000000 20194 +femor 0 1 6.957497 0.000000 20195 +arteri 0 1 6.957497 0.000000 20196 +blood 0 1 6.957497 0.000000 20197 +anesthetist 0 1 6.957497 0.000000 20198 +groin 0 1 6.957497 0.000000 20199 +hamstr 0 1 6.957497 0.000000 20200 +scalpel 0 1 6.957497 0.000000 20201 +stab 0 1 6.957497 0.000000 20202 +leg 0 1 6.957497 0.000000 20203 +voilet 0 1 6.957497 0.000000 20204 +baboon 0 1 6.957497 0.000000 20205 +wig 0 1 6.957497 0.000000 20206 +pois 0 1 6.957497 0.000000 20207 +stomp 0 1 6.957497 0.000000 20208 +cop 0 1 6.957497 0.000000 20209 +rush 0 1 6.957497 0.000000 20210 +burrough 0 1 6.957497 0.000000 20211 +catapult 0 1 6.957497 0.000000 20212 +mann 0 1 6.957497 0.000000 20213 +wearabl 0 1 6.957497 0.000000 20214 +tierra 0 1 6.957497 0.000000 20215 +arcosanti 0 1 6.957497 0.000000 20216 +arcolog 0 1 6.957497 0.000000 20217 +krishnamurti 0 1 6.957497 0.000000 20218 +harass 0 1 6.957497 0.000000 20219 +factoid 0 1 6.957497 0.000000 20220 +astound 0 1 6.957497 0.000000 20221 +onion 0 1 6.957497 0.000000 20222 +washburn 0 1 6.957497 0.000000 20223 +len 0 1 6.957497 0.000000 20224 +insignific 0 1 6.957497 0.000000 20225 +webweath 0 1 6.957497 0.000000 20226 +timothi 0 1 6.957497 0.000000 20227 +leari 0 1 6.957497 0.000000 20228 +noam 0 1 6.957497 0.000000 20229 +chomski 0 1 6.957497 0.000000 20230 +conspiraci 0 1 6.957497 0.000000 20231 +buri 0 1 6.957497 0.000000 20232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..6049dfbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +hour 0 165 1.791759 0.000000 46 +think 0 57 2.890372 0.000000 314 +mark 0 44 3.135494 0.000000 403 +brian 0 38 3.295837 0.000000 466 +bookmark 0 26 3.688879 0.000000 639 +pagebrian 0 4 5.568345 0.000000 3054 +swander 1 3 5.857933 5.857933 3440 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..d8e07bce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +spring 0 131 2.079442 0.000000 88 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +version 0 113 2.197225 0.000000 122 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +memori 0 101 2.302585 0.000000 139 +peopl 0 96 2.302585 0.000000 132 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +april 0 77 2.564949 0.000000 196 +free 0 73 2.639057 0.000000 224 +differ 0 66 2.708050 0.000000 253 +street 0 63 2.772589 0.000000 293 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +share 0 59 2.833213 0.000000 304 +colleg 0 61 2.833213 0.000000 300 +thesi 0 57 2.890372 0.000000 327 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +cool 0 49 3.044522 0.000000 374 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +concurr 0 34 3.401197 0.000000 501 +multiprocessor 0 28 3.610918 0.000000 605 +enhanc 0 26 3.688879 0.000000 644 +supercomput 0 25 3.737670 0.000000 681 +highli 0 23 3.806662 0.000000 725 +els 0 19 4.007333 0.000000 843 +interconnect 0 17 4.110874 0.000000 937 +countri 0 15 4.248495 0.000000 1059 +fortran 0 15 4.248495 0.000000 1027 +block 0 13 4.382027 0.000000 1183 +wait 0 13 4.382027 0.000000 1168 +asynchron 0 12 4.465908 0.000000 1229 +typic 0 11 4.553877 0.000000 1360 +pose 0 9 4.753590 0.000000 1535 +paradyn 0 9 4.753590 0.000000 1614 +joke 0 8 4.875197 0.000000 1620 +attach 0 7 5.010635 0.000000 1785 +park 0 6 5.164786 0.000000 2218 +risc 0 6 5.164786 0.000000 2016 +matur 0 5 5.347108 0.000000 2269 +hate 0 5 5.347108 0.000000 2529 +sorri 0 4 5.568345 0.000000 3059 +eventu 0 4 5.568345 0.000000 3074 +cleaner 0 3 5.857933 0.000000 3775 +raid 0 3 5.857933 0.000000 4012 +snowboard 1 2 6.263398 6.263398 5084 +angri 0 2 6.263398 0.000000 5088 +greet 0 2 6.263398 0.000000 5903 +dude 0 2 6.263398 0.000000 4977 +callaghan 0 2 6.263398 0.000000 6128 +stripe 0 2 6.263398 0.000000 6106 +tamch 0 1 6.957497 0.000000 20233 +ariel 0 1 6.957497 0.000000 20234 +municip 0 1 6.957497 0.000000 20235 +bond 0 1 6.957497 0.000000 20236 +tamchesari 0 1 6.957497 0.000000 20237 +assistantemail 0 1 6.957497 0.000000 20238 +posei 0 1 6.957497 0.000000 20239 +sresearch 0 1 6.957497 0.000000 20240 +toolsstatu 0 1 6.957497 0.000000 20241 +toolsparallel 0 1 6.957497 0.000000 20242 +systemsbluesth 0 1 6.957497 0.000000 20243 +simpsonsseinfeldskiingskinetkeyston 0 1 6.957497 0.000000 20244 +vacum 0 1 6.957497 0.000000 20245 +dirt 0 1 6.957497 0.000000 20246 +whoa 0 1 6.957497 0.000000 20247 +incom 0 1 6.957497 0.000000 20248 +yahooespncpu 0 1 6.957497 0.000000 20249 +infoskinetoth 0 1 6.957497 0.000000 20250 +exokernel 0 1 6.957497 0.000000 20251 +zebra 0 1 6.957497 0.000000 20252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..dfed1adc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +high 0 130 2.079442 0.000000 101 +look 0 107 2.197225 0.000000 115 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +pictur 1 89 2.397895 2.397895 160 +school 0 84 2.484907 0.000000 188 +academ 0 82 2.484907 0.000000 178 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +copi 0 63 2.772589 0.000000 284 +dept 0 64 2.772589 0.000000 291 +import 0 65 2.772589 0.000000 282 +juli 0 60 2.833213 0.000000 305 +think 0 57 2.890372 0.000000 314 +found 0 53 2.944439 0.000000 337 +still 0 50 3.044522 0.000000 362 +basic 0 50 3.044522 0.000000 360 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +least 0 35 3.401197 0.000000 516 +human 0 32 3.465736 0.000000 546 +turn 0 29 3.583519 0.000000 586 +jeff 0 25 3.737670 0.000000 673 +never 0 25 3.737670 0.000000 671 +wish 0 24 3.761200 0.000000 692 +miscellan 0 23 3.806662 0.000000 731 +voic 0 21 3.912023 0.000000 806 +newsgroup 0 21 3.912023 0.000000 783 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +coupl 0 17 4.110874 0.000000 939 +monitor 0 17 4.110874 0.000000 941 +seek 0 17 4.110874 0.000000 954 +took 0 16 4.174387 0.000000 1010 +sign 0 16 4.174387 0.000000 970 +choos 0 16 4.174387 0.000000 964 +hobbi 0 16 4.174387 0.000000 1009 +doesn 0 15 4.248495 0.000000 1055 +club 0 15 4.248495 0.000000 1058 +someon 0 13 4.382027 0.000000 1128 +song 0 11 4.553877 0.000000 1380 +night 0 11 4.553877 0.000000 1319 +sound 0 9 4.753590 0.000000 1605 +theme 0 8 4.875197 0.000000 1707 +henc 0 7 5.010635 0.000000 1805 +pace 0 6 5.164786 0.000000 2011 +plu 0 6 5.164786 0.000000 2004 +lampert 0 5 5.347108 0.000000 2398 +babi 0 5 5.347108 0.000000 2493 +tick 0 4 5.568345 0.000000 2975 +heard 0 4 5.568345 0.000000 2895 +fire 0 4 5.568345 0.000000 3001 +nota 0 3 5.857933 0.000000 3785 +crow 0 3 5.857933 0.000000 3845 +straight 0 3 5.857933 0.000000 3655 +forward 0 3 5.857933 0.000000 3784 +yearbook 0 2 6.263398 0.000000 6243 +cute 0 2 6.263398 0.000000 5108 +befound 0 2 6.263398 0.000000 5964 +sick 0 2 6.263398 0.000000 5773 +mstk 0 2 6.263398 0.000000 5501 +pagejeff 0 1 6.957497 0.000000 20253 +ricardo 0 1 6.957497 0.000000 20254 +montalban 0 1 6.957497 0.000000 20255 +foron 0 1 6.957497 0.000000 20256 +incrimin 0 1 6.957497 0.000000 20257 +aconvict 0 1 6.957497 0.000000 20258 +lasttim 0 1 6.957497 0.000000 20259 +threaten 0 1 6.957497 0.000000 20260 +intoa 0 1 6.957497 0.000000 20261 +dispens 0 1 6.957497 0.000000 20262 +anautograph 0 1 6.957497 0.000000 20263 +pictureappear 0 1 6.957497 0.000000 20264 +weasel 0 1 6.957497 0.000000 20265 +factswho 0 1 6.957497 0.000000 20266 +relatedwhat 0 1 6.957497 0.000000 20267 +entertainmentbook 0 1 6.957497 0.000000 20268 +subjectsfriendsno 0 1 6.957497 0.000000 20269 +organizationsgroup 0 1 6.957497 0.000000 20270 +inmi 0 1 6.957497 0.000000 20271 +linksugh 0 1 6.957497 0.000000 20272 +servo 0 1 6.957497 0.000000 20273 +eclect 0 1 6.957497 0.000000 20274 +paraphenaliai 0 1 6.957497 0.000000 20275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..cf50394d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +area 0 144 1.945910 0.000000 80 +teach 0 108 2.197225 0.000000 112 +mathemat 0 108 2.197225 0.000000 123 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +todd 0 15 4.248495 0.000000 1051 +wealth 0 3 5.857933 0.000000 3353 +plug 0 2 6.263398 0.000000 5167 +tmunson 0 2 6.263398 0.000000 4809 +mathematicalprogram 1 1 6.957497 6.957497 20276 +homepagetodd 0 1 6.957497 0.000000 20277 +homepagein 0 1 6.957497 0.000000 20278 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..9d6b8c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +thing 1 84 2.484907 2.484907 189 +life 1 50 3.044522 3.044522 375 +brian 1 38 3.295837 3.295837 466 +streetmadison 1 38 3.295837 3.295837 474 +departmentunivers 1 24 3.761200 3.761200 711 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +chief 1 7 5.010635 5.010635 1829 +ground 1 7 5.010635 5.010635 1955 +pagebrian 1 4 5.568345 5.568345 3054 +bear 1 4 5.568345 5.568345 2651 +civil 1 3 5.857933 5.857933 3908 +medit 1 2 6.263398 6.263398 5777 +creatur 1 2 6.263398 6.263398 6107 +essenc 1 2 6.263398 6.263398 6150 +toonen 1 1 6.957497 6.957497 20279 +cswhatev 1 1 6.957497 6.957497 20280 +seattleth 1 1 6.957497 6.957497 20281 +tipi 1 1 6.957497 6.957497 20282 +itsmean 1 1 6.957497 6.957497 20283 +kinship 1 1 6.957497 6.957497 20284 +acknowledgingun 1 1 6.957497 6.957497 20285 +infus 1 1 6.957497 6.957497 20286 +thetru 1 1 6.957497 6.957497 20287 +luther 1 1 6.957497 6.957497 20288 +oglala 1 1 6.957497 6.957497 20289 +siouxlast 1 1 6.957497 6.957497 20290 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..1ca7ba30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +read 0 154 1.791759 0.000000 47 +site 0 106 2.197225 0.000000 119 +version 0 113 2.197225 0.000000 122 +browser 0 56 2.890372 0.000000 313 +netscap 0 44 3.135494 0.000000 395 +option 0 30 3.555348 0.000000 575 +enhanc 0 26 3.688879 0.000000 644 +latest 0 21 3.912023 0.000000 785 +thano 0 3 5.857933 0.000000 3424 +tsioli 0 3 5.857933 0.000000 3418 +shouldconsid 0 2 6.263398 0.000000 6061 +upgrad 0 1 6.957497 0.000000 20291 +ifthat 0 1 6.957497 0.000000 20292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..e51fc43a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +section 0 94 2.397895 0.000000 149 +locat 0 59 2.833213 0.000000 303 +case 0 51 2.995732 0.000000 351 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +compani 0 41 3.218876 0.000000 423 +live 0 40 3.258097 0.000000 451 +origin 0 38 3.295837 0.000000 472 +computersci 0 30 3.555348 0.000000 562 +departmentunivers 0 24 3.761200 0.000000 711 +left 0 19 4.007333 0.000000 851 +todd 0 15 4.248495 0.000000 1051 +enough 0 15 4.248495 0.000000 1040 +awai 0 10 4.653960 0.000000 1447 +babylon 0 8 4.875197 0.000000 1731 +hold 0 8 4.875197 0.000000 1645 +judg 0 8 4.875197 0.000000 1644 +sciencesat 0 7 5.010635 0.000000 1968 +ohio 0 5 5.347108 0.000000 2447 +dougla 0 5 5.347108 0.000000 2471 +amus 0 5 5.347108 0.000000 2366 +turnidg 0 4 5.568345 0.000000 2829 +rep 0 4 5.568345 0.000000 3087 +western 0 4 5.568345 0.000000 3062 +kent 0 4 5.568345 0.000000 2744 +evid 0 4 5.568345 0.000000 2768 +shortcut 0 3 5.857933 0.000000 3932 +axi 0 2 6.263398 0.000000 6069 +milton 0 2 6.263398 0.000000 6153 +professorthoma 0 2 6.263398 0.000000 5053 +turnidgeschoolcomput 0 1 6.957497 0.000000 20293 +homemuppet 0 1 6.957497 0.000000 20294 +eyesightright 0 1 6.957497 0.000000 20295 +studyingprogram 0 1 6.957497 0.000000 20296 +mathematicsand 0 1 6.957497 0.000000 20297 +reserveunivers 0 1 6.957497 0.000000 20298 +cleveland 0 1 6.957497 0.000000 20299 +myfamili 0 1 6.957497 0.000000 20300 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..3752736e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +sinc 0 90 2.397895 0.000000 159 +june 0 79 2.564949 0.000000 214 +street 0 63 2.772589 0.000000 293 +visit 0 63 2.772589 0.000000 288 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +wang 0 21 3.912023 0.000000 790 +heavi 0 7 5.010635 0.000000 1841 +bldg 0 4 5.568345 0.000000 2983 +taxiao 0 2 6.263398 0.000000 4806 +twang 0 2 6.263398 0.000000 5730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..23824786 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +wisc 0 242 1.386294 0.000000 33 +start 0 83 2.484907 0.000000 173 +meet 1 72 2.639057 2.639057 229 +window 0 68 2.708050 0.000000 242 +microsoft 0 38 3.295837 0.000000 468 +trek 1 3 5.857933 5.857933 4025 +shaft 1 2 6.263398 6.263398 6186 +pageuri 0 1 6.957497 0.000000 20301 +pageemail 0 1 6.957497 0.000000 20302 +eduinterest 0 1 6.957497 0.000000 20303 +diversionsstart 0 1 6.957497 0.000000 20304 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..77076140 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +homepag 0 93 2.397895 0.000000 148 +real 0 93 2.397895 0.000000 144 +present 0 91 2.397895 0.000000 145 +info 0 85 2.484907 0.000000 176 +educ 0 86 2.484907 0.000000 191 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +onlin 0 75 2.639057 0.000000 223 +januari 0 62 2.772589 0.000000 264 +undergradu 0 54 2.944439 0.000000 338 +basic 0 50 3.044522 0.000000 360 +past 0 42 3.218876 0.000000 428 +india 0 32 3.465736 0.000000 550 +hope 0 28 3.610918 0.000000 610 +sometim 0 24 3.761200 0.000000 696 +earlier 0 13 4.382027 0.000000 1140 +usaphon 0 9 4.753590 0.000000 1600 +madra 0 8 4.875197 0.000000 1770 +nativ 0 6 5.164786 0.000000 2192 +asha 0 3 5.857933 0.000000 4037 +venkatesh 0 2 6.263398 0.000000 6154 +andhra 0 2 6.263398 0.000000 5571 +pradesh 0 2 6.263398 0.000000 5572 +yearbook 0 2 6.263398 0.000000 6243 +till 0 2 6.263398 0.000000 5850 +btech 0 2 6.263398 0.000000 6123 +genesi 0 2 6.263398 0.000000 6011 +ganti 0 1 6.957497 0.000000 20305 +godav 0 1 6.957497 0.000000 20306 +pagevenkatesh 0 1 6.957497 0.000000 20307 +vganti 0 1 6.957497 0.000000 20308 +studentoffic 0 1 6.957497 0.000000 20309 +kakinada 0 1 6.957497 0.000000 20310 +hostel 0 1 6.957497 0.000000 20311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..30bfe392 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +file 0 132 1.945910 0.000000 70 +compil 0 122 2.079442 0.000000 96 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +schedul 0 119 2.079442 0.000000 85 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +symposium 0 72 2.639057 0.000000 238 +august 0 66 2.708050 0.000000 257 +street 0 63 2.772589 0.000000 293 +processor 0 54 2.944439 0.000000 335 +undergradu 0 54 2.944439 0.000000 338 +profession 0 51 2.995732 0.000000 345 +advisor 0 51 2.995732 0.000000 355 +annual 0 40 3.258097 0.000000 458 +submit 0 39 3.258097 0.000000 440 +go 0 33 3.433987 0.000000 529 +india 0 32 3.465736 0.000000 550 +dissert 0 32 3.465736 0.000000 549 +depend 0 29 3.583519 0.000000 583 +strategi 0 25 3.737670 0.000000 682 +doctor 0 24 3.761200 0.000000 709 +predict 0 19 4.007333 0.000000 855 +regist 0 17 4.110874 0.000000 938 +micro 0 15 4.248495 0.000000 1031 +side 0 15 4.248495 0.000000 1022 +affili 0 13 4.382027 0.000000 1194 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 0 6 5.164786 0.000000 2237 +microarchitectur 0 6 5.164786 0.000000 2238 +guri 0 5 5.347108 0.000000 2578 +vijai 0 4 5.568345 0.000000 2960 +vijaykumar 0 3 5.857933 0.000000 4011 +breach 0 3 5.857933 0.000000 4009 +anatomi 0 3 5.857933 0.000000 4010 +birla 0 2 6.263398 0.000000 6239 +pilani 0 2 6.263398 0.000000 6240 +architecturet 0 1 6.957497 0.000000 20312 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..fbe3eedb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +site 0 106 2.197225 0.000000 119 +associ 0 93 2.397895 0.000000 151 +refer 0 78 2.564949 0.000000 203 +laboratori 0 63 2.772589 0.000000 292 +foundat 0 62 2.772589 0.000000 286 +telephon 0 50 3.044522 0.000000 373 +archiv 0 49 3.044522 0.000000 364 +physic 0 47 3.091042 0.000000 377 +theoret 0 39 3.258097 0.000000 446 +streetmadison 0 38 3.295837 0.000000 474 +bibliographi 0 34 3.401197 0.000000 518 +approxim 0 35 3.401197 0.000000 509 +random 0 34 3.401197 0.000000 511 +john 0 33 3.433987 0.000000 532 +proc 0 26 3.688879 0.000000 649 +departmentunivers 0 24 3.761200 0.000000 711 +daili 0 24 3.761200 0.000000 706 +theorem 0 21 3.912023 0.000000 786 +hypertext 0 19 4.007333 0.000000 865 +dimension 0 18 4.060443 0.000000 909 +element 0 18 4.060443 0.000000 895 +stanford 0 17 4.110874 0.000000 955 +fourth 0 16 4.174387 0.000000 999 +polynomi 0 14 4.317488 0.000000 1069 +automata 0 13 4.382027 0.000000 1135 +montreal 0 7 5.010635 0.000000 1961 +quantum 1 6 5.164786 5.164786 2214 +oxford 0 6 5.164786 0.000000 2121 +forecast 0 6 5.164786 0.000000 2171 +cellular 0 5 5.347108 0.000000 2433 +symp 0 5 5.347108 0.000000 2376 +particl 0 5 5.347108 0.000000 2436 +beam 0 5 5.347108 0.000000 2344 +ucla 0 5 5.347108 0.000000 2502 +webster 0 5 5.347108 0.000000 2468 +preprint 0 3 5.857933 0.000000 3481 +quotat 0 3 5.857933 0.000000 3121 +pagejohn 0 2 6.263398 0.000000 5603 +thesauru 0 2 6.263398 0.000000 6238 +isthmu 0 2 6.263398 0.000000 6152 +watrou 0 1 6.957497 0.000000 20313 +artin 0 1 6.957497 0.000000 20314 +whapl 0 1 6.957497 0.000000 20315 +canadiannumb 0 1 6.957497 0.000000 20316 +assort 0 1 6.957497 0.000000 20317 +lanl 0 1 6.957497 0.000000 20318 +hypatia 0 1 6.957497 0.000000 20319 +stylehypertext 0 1 6.957497 0.000000 20320 +interfaceroget 0 1 6.957497 0.000000 20321 +parasol 0 1 6.957497 0.000000 20322 +recordsplayst 0 1 6.957497 0.000000 20323 +linksweath 0 1 6.957497 0.000000 20324 +madisonth 0 1 6.957497 0.000000 20325 +pagemathemat 0 1 6.957497 0.000000 20326 +servermathematician 0 1 6.957497 0.000000 20327 +biographiesgeek 0 1 6.957497 0.000000 20328 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..dda66d84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +david 0 71 2.639057 0.000000 232 +test 0 66 2.708050 0.000000 252 +guid 0 63 2.772589 0.000000 267 +plai 0 60 2.833213 0.000000 307 +think 0 57 2.890372 0.000000 314 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +date 0 51 2.995732 0.000000 344 +friend 0 48 3.044522 0.000000 376 +possibl 0 47 3.091042 0.000000 378 +physic 0 47 3.091042 0.000000 377 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +around 0 43 3.178054 0.000000 415 +review 0 42 3.218876 0.000000 425 +movi 0 40 3.258097 0.000000 459 +must 0 40 3.258097 0.000000 442 +littl 0 39 3.258097 0.000000 454 +late 0 40 3.258097 0.000000 439 +correct 0 38 3.295837 0.000000 462 +game 0 36 3.367296 0.000000 498 +least 0 35 3.401197 0.000000 516 +richard 0 31 3.496508 0.000000 559 +steve 0 29 3.583519 0.000000 594 +weather 0 28 3.610918 0.000000 618 +great 0 27 3.637586 0.000000 626 +sometim 0 24 3.761200 0.000000 696 +seri 0 24 3.761200 0.000000 708 +wish 0 24 3.761200 0.000000 692 +deal 0 22 3.850148 0.000000 736 +reduc 0 22 3.850148 0.000000 759 +alumni 0 21 3.912023 0.000000 807 +increas 0 20 3.951244 0.000000 829 +longer 0 20 3.951244 0.000000 816 +miss 0 19 4.007333 0.000000 866 +english 0 15 4.248495 0.000000 1033 +manner 0 14 4.317488 0.000000 1074 +someon 0 13 4.382027 0.000000 1128 +entertain 1 12 4.465908 4.465908 1286 +food 0 12 4.465908 0.000000 1285 +amount 0 12 4.465908 0.000000 1208 +player 0 11 4.553877 0.000000 1371 +recit 0 9 4.753590 0.000000 1475 +french 0 9 4.753590 0.000000 1511 +hockei 0 8 4.875197 0.000000 1760 +affect 0 6 5.164786 0.000000 2044 +commit 0 6 5.164786 0.000000 2233 +moder 0 6 5.164786 0.000000 2112 +postcard 0 6 5.164786 0.000000 2181 +forecast 0 6 5.164786 0.000000 2171 +ship 0 5 5.347108 0.000000 2534 +speaker 0 5 5.347108 0.000000 2370 +feet 0 5 5.347108 0.000000 2492 +circumst 0 5 5.347108 0.000000 2283 +peke 0 5 5.347108 0.000000 2539 +cell 0 5 5.347108 0.000000 2274 +lesson 0 5 5.347108 0.000000 2568 +insan 0 3 5.857933 0.000000 4006 +omit 0 3 5.857933 0.000000 3466 +letterman 0 3 5.857933 0.000000 3931 +truck 0 2 6.263398 0.000000 5713 +proportion 0 2 6.263398 0.000000 4091 +behaviour 0 2 6.263398 0.000000 4724 +studio 0 2 6.263398 0.000000 5838 +francais 0 2 6.263398 0.000000 6020 +weiru 0 1 6.957497 0.000000 20329 +eiru 0 1 6.957497 0.000000 20330 +ppppleas 0 1 6.957497 0.000000 20331 +asylum 0 1 6.957497 0.000000 20332 +verbal 0 1 6.957497 0.000000 20333 +cargo 0 1 6.957497 0.000000 20334 +havenos 0 1 6.957497 0.000000 20335 +smell 0 1 6.957497 0.000000 20336 +leder 0 1 6.957497 0.000000 20337 +beoffer 0 1 6.957497 0.000000 20338 +customari 0 1 6.957497 0.000000 20339 +begina 0 1 6.957497 0.000000 20340 +amountof 0 1 6.957497 0.000000 20341 +merest 0 1 6.957497 0.000000 20342 +ofaffect 0 1 6.957497 0.000000 20343 +excruciatingli 0 1 6.957497 0.000000 20344 +atmadison 0 1 6.957497 0.000000 20345 +grei 0 1 6.957497 0.000000 20346 +francai 0 1 6.957497 0.000000 20347 +dictionnairefrancai 0 1 6.957497 0.000000 20348 +anglai 0 1 6.957497 0.000000 20349 +dictionnair 0 1 6.957497 0.000000 20350 +relatif 0 1 6.957497 0.000000 20351 +lafrancophoni 0 1 6.957497 0.000000 20352 +degrammair 0 1 6.957497 0.000000 20353 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..19a7972d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +technolog 0 131 2.079442 0.000000 102 +welcom 0 122 2.079442 0.000000 99 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +peopl 0 96 2.302585 0.000000 132 +memori 0 101 2.302585 0.000000 139 +center 0 88 2.397895 0.000000 158 +educ 0 86 2.484907 0.000000 191 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +state 0 76 2.564949 0.000000 207 +differ 0 66 2.708050 0.000000 253 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +variou 0 56 2.890372 0.000000 317 +tabl 0 51 2.995732 0.000000 346 +without 0 50 3.044522 0.000000 370 +california 0 46 3.091042 0.000000 388 +china 0 37 3.332205 0.000000 487 +game 0 36 3.367296 0.000000 498 +within 0 33 3.433987 0.000000 525 +platform 0 29 3.583519 0.000000 591 +quot 0 29 3.583519 0.000000 582 +framework 0 28 3.610918 0.000000 606 +mine 0 26 3.688879 0.000000 654 +corpor 0 21 3.912023 0.000000 802 +tenni 0 20 3.951244 0.000000 838 +expert 0 20 3.951244 0.000000 833 +beij 0 19 4.007333 0.000000 876 +ultim 0 17 4.110874 0.000000 943 +jose 0 16 4.174387 0.000000 976 +zhang 0 16 4.174387 0.000000 980 +tsinghua 0 13 4.382027 0.000000 1195 +nasa 0 13 4.382027 0.000000 1188 +solari 0 12 4.465908 0.000000 1238 +card 0 10 4.653960 0.000000 1435 +cheat 0 10 4.653960 0.000000 1395 +puzzl 0 5 5.347108 0.000000 2507 +republ 0 4 5.568345 0.000000 3032 +shanghai 0 4 5.568345 0.000000 2925 +hometown 0 3 5.857933 0.000000 3817 +am 0 3 5.857933 0.000000 3386 +weiz 0 2 6.263398 0.000000 4693 +eduwork 0 2 6.263398 0.000000 5813 +windowsnt 0 2 6.263398 0.000000 5440 +tandem 0 2 6.263398 0.000000 5027 +chinaemail 0 1 6.957497 0.000000 20354 +experiencecontractor 0 1 6.957497 0.000000 20355 +tuxedo 0 1 6.957497 0.000000 20356 +pathwai 0 1 6.957497 0.000000 20357 +sherpa 0 1 6.957497 0.000000 20358 +hobbiesma 0 1 6.957497 0.000000 20359 +jiangbridg 0 1 6.957497 0.000000 20360 +pingpong 0 1 6.957497 0.000000 20361 +joggingth 0 1 6.957497 0.000000 20362 +challengesolv 0 1 6.957497 0.000000 20363 +sweeper 0 1 6.957497 0.000000 20364 +dayth 0 1 6.957497 0.000000 20365 +ackowledgementthi 0 1 6.957497 0.000000 20366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..81bcd023 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +provid 0 121 2.079442 0.000000 94 +need 0 98 2.302585 0.000000 135 +peopl 0 96 2.302585 0.000000 132 +pictur 0 89 2.397895 0.000000 160 +west 0 83 2.484907 0.000000 192 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +main 0 67 2.708050 0.000000 256 +explor 0 58 2.890372 0.000000 324 +still 0 50 3.044522 0.000000 362 +telephon 0 50 3.044522 0.000000 373 +streetmadison 0 38 3.295837 0.000000 474 +cluster 0 28 3.610918 0.000000 612 +departmentunivers 0 24 3.761200 0.000000 711 +definit 0 19 4.007333 0.000000 864 +miron 0 14 4.317488 0.000000 1110 +dbm 0 13 4.382027 0.000000 1136 +scan 0 12 4.465908 0.000000 1243 +devis 0 10 4.653960 0.000000 1451 +yanni 0 8 4.875197 0.000000 1713 +ioannidi 0 8 4.875197 0.000000 1714 +wouldn 0 7 5.010635 0.000000 1970 +kent 0 4 5.568345 0.000000 2744 +edufing 0 4 5.568345 0.000000 2713 +wenger 0 3 5.857933 0.000000 4023 +agre 0 3 5.857933 0.000000 4007 +groupuw 0 3 5.857933 0.000000 3934 +preparedfor 0 2 6.263398 0.000000 5886 +workth 0 2 6.263398 0.000000 6137 +andvisu 0 2 6.263398 0.000000 6189 +pothol 0 1 6.957497 0.000000 20367 +wengerassoci 0 1 6.957497 0.000000 20368 +researchercomput 0 1 6.957497 0.000000 20369 +arecod 0 1 6.957497 0.000000 20370 +anddevis 0 1 6.957497 0.000000 20371 +acronym 0 1 6.957497 0.000000 20372 +importantpart 0 1 6.957497 0.000000 20373 +visualizationproduc 0 1 6.957497 0.000000 20374 +livnyraghu 0 1 6.957497 0.000000 20375 +ramakrishnanmor 0 1 6.957497 0.000000 20376 +pagewiscinfo 0 1 6.957497 0.000000 20377 +personallinksimageslast 0 1 6.957497 0.000000 20378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..17b81d76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +provid 0 121 2.079442 0.000000 94 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +memori 0 101 2.302585 0.000000 139 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +level 0 87 2.484907 0.000000 180 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +wide 0 84 2.484907 0.000000 185 +interfac 0 79 2.564949 0.000000 209 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 0 71 2.639057 0.000000 230 +name 0 72 2.639057 0.000000 220 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +space 0 57 2.890372 0.000000 310 +think 0 57 2.890372 0.000000 314 +overview 0 56 2.890372 0.000000 323 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +hardwar 0 51 2.995732 0.000000 350 +week 0 52 2.995732 0.000000 343 +approach 0 48 3.044522 0.000000 366 +adapt 0 46 3.091042 0.000000 387 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +programm 0 39 3.258097 0.000000 445 +slide 0 38 3.295837 0.000000 467 +origin 0 38 3.295837 0.000000 472 +bibliographi 0 34 3.401197 0.000000 518 +articl 0 33 3.433987 0.000000 530 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +propos 0 28 3.610918 0.000000 602 +pass 0 28 3.610918 0.000000 611 +cluster 0 28 3.610918 0.000000 612 +hill 0 25 3.737670 0.000000 670 +annot 0 21 3.912023 0.000000 775 +fund 0 21 3.912023 0.000000 805 +wind 0 18 4.060443 0.000000 908 +four 0 18 4.060443 0.000000 905 +seek 0 17 4.110874 0.000000 954 +massiv 0 15 4.248495 0.000000 1026 +hybrid 0 15 4.248495 0.000000 1057 +node 0 11 4.553877 0.000000 1326 +fpga 0 10 4.653960 0.000000 1433 +tunnel 0 9 4.753590 0.000000 1615 +paradyn 0 9 4.753590 0.000000 1614 +consensu 0 6 5.164786 0.000000 2080 +tempest 0 5 5.347108 0.000000 2548 +middl 0 5 5.347108 0.000000 2372 +hypothet 0 5 5.347108 0.000000 2474 +departmentat 0 5 5.347108 0.000000 2513 +aboutth 0 4 5.568345 0.000000 2720 +ofworkst 0 4 5.568345 0.000000 2679 +markhil 0 4 5.568345 0.000000 2819 +parallellanguag 0 3 5.857933 0.000000 4026 +evolutionari 0 3 5.857933 0.000000 3898 +uniformli 0 2 6.263398 0.000000 6202 +havebeen 0 2 6.263398 0.000000 5830 +snoop 0 2 6.263398 0.000000 5364 +contributor 0 2 6.263398 0.000000 6214 +projectmost 0 1 6.957497 0.000000 20379 +fromworkst 0 1 6.957497 0.000000 20380 +whichprocess 0 1 6.957497 0.000000 20381 +abovesystem 0 1 6.957497 0.000000 20382 +wascoop 0 1 6.957497 0.000000 20383 +toconvent 0 1 6.957497 0.000000 20384 +revolutionari 0 1 6.957497 0.000000 20385 +andprogram 0 1 6.957497 0.000000 20386 +transparentshar 0 1 6.957497 0.000000 20387 +developingimplement 0 1 6.957497 0.000000 20388 +wisconsincow 0 1 6.957497 0.000000 20389 +cowus 0 1 6.957497 0.000000 20390 +sram 0 1 6.957497 0.000000 20391 +collaboratingwith 0 1 6.957497 0.000000 20392 +overviewand 0 1 6.957497 0.000000 20393 +pageor 0 1 6.957497 0.000000 20394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..bdd1a497 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +first 1 140 1.945910 1.945910 71 +like 0 132 1.945910 0.000000 81 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +thing 0 84 2.484907 0.000000 189 +appear 0 78 2.564949 0.000000 210 +creat 0 63 2.772589 0.000000 277 +back 0 60 2.833213 0.000000 297 +finger 0 52 2.995732 0.000000 354 +give 0 50 3.044522 0.000000 359 +featur 0 46 3.091042 0.000000 386 +could 0 46 3.091042 0.000000 383 +obtain 0 33 3.433987 0.000000 534 +product 0 33 3.433987 0.000000 527 +human 0 32 3.465736 0.000000 546 +anim 0 31 3.496508 0.000000 557 +pass 0 28 3.610918 0.000000 611 +team 0 27 3.637586 0.000000 625 +seri 0 24 3.761200 0.000000 708 +wish 0 24 3.761200 0.000000 692 +instead 0 22 3.850148 0.000000 756 +spend 0 19 4.007333 0.000000 850 +statu 0 18 4.060443 0.000000 885 +seem 0 18 4.060443 0.000000 899 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +across 0 16 4.174387 0.000000 974 +charact 0 15 4.248495 0.000000 1028 +save 0 14 4.317488 0.000000 1099 +shown 0 14 4.317488 0.000000 1080 +comic 0 14 4.317488 0.000000 1103 +charl 0 13 4.382027 0.000000 1149 +newspap 0 12 4.465908 0.000000 1280 +remov 0 12 4.465908 0.000000 1225 +magic 0 11 4.553877 0.000000 1358 +pick 0 9 4.753590 0.000000 1498 +didn 0 9 4.753590 0.000000 1563 +hundr 0 9 4.753590 0.000000 1528 +star 0 8 4.875197 0.000000 1717 +film 0 8 4.875197 0.000000 1761 +successfulli 0 7 5.010635 0.000000 1869 +televis 1 6 5.164786 5.164786 2118 +famou 0 6 5.164786 0.000000 2185 +strip 0 6 5.164786 0.000000 2203 +put 0 6 5.164786 0.000000 2017 +chat 0 6 5.164786 0.000000 2128 +keeper 0 5 5.347108 0.000000 2569 +trick 0 4 5.568345 0.000000 2967 +accompani 0 4 5.568345 0.000000 2666 +transmit 0 4 5.568345 0.000000 2835 +somehow 0 4 5.568345 0.000000 2974 +keyboard 0 4 5.568345 0.000000 2970 +rival 0 3 5.857933 0.000000 3583 +agre 0 3 5.857933 0.000000 4007 +advertis 0 3 5.857933 0.000000 3788 +felix 0 2 6.263398 0.000000 5103 +princ 0 2 6.263398 0.000000 4813 +wale 0 2 6.263398 0.000000 4827 +mascot 0 2 6.263398 0.000000 6060 +ear 0 2 6.263398 0.000000 5071 +hairbal 0 2 6.263398 0.000000 6237 +xuelin 0 1 6.957497 0.000000 20395 +otto 0 1 6.957497 0.000000 20396 +messmer 0 1 6.957497 0.000000 20397 +whichwa 0 1 6.957497 0.000000 20398 +chaplin 0 1 6.957497 0.000000 20399 +keaton 0 1 6.957497 0.000000 20400 +polo 0 1 6.957497 0.000000 20401 +lindbergh 0 1 6.957497 0.000000 20402 +theatlant 0 1 6.957497 0.000000 20403 +oneev 0 1 6.957497 0.000000 20404 +teeth 0 1 6.957497 0.000000 20405 +whisker 0 1 6.957497 0.000000 20406 +tail 0 1 6.957497 0.000000 20407 +sui 0 1 6.957497 0.000000 20408 +vritabl 0 1 6.957497 0.000000 20409 +partout 0 1 6.957497 0.000000 20410 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..49517107 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +problem 0 147 1.945910 0.000000 75 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +file 0 132 1.945910 0.000000 70 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +associ 0 93 2.397895 0.000000 151 +imag 0 91 2.397895 0.000000 161 +solut 0 82 2.484907 0.000000 162 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +help 0 83 2.484907 0.000000 175 +contain 0 81 2.484907 0.000000 174 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +optim 0 79 2.564949 0.000000 197 +issu 0 78 2.564949 0.000000 211 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +orient 0 80 2.564949 0.000000 205 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +integr 0 67 2.708050 0.000000 245 +simul 0 66 2.708050 0.000000 255 +august 0 66 2.708050 0.000000 257 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +differ 0 66 2.708050 0.000000 253 +knowledg 0 67 2.708050 0.000000 243 +septemb 0 65 2.772589 0.000000 274 +complex 0 64 2.772589 0.000000 269 +result 0 65 2.772589 0.000000 281 +evalu 0 64 2.772589 0.000000 266 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +special 0 56 2.890372 0.000000 320 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +index 0 56 2.890372 0.000000 309 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +investig 0 51 2.995732 0.000000 353 +much 0 52 2.995732 0.000000 349 +maintain 0 51 2.995732 0.000000 342 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +visual 0 48 3.044522 0.000000 372 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +format 0 48 3.044522 0.000000 356 +set 0 50 3.044522 0.000000 361 +natur 0 44 3.135494 0.000000 406 +futur 0 41 3.218876 0.000000 427 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +transact 0 39 3.258097 0.000000 438 +error 0 40 3.258097 0.000000 449 +join 0 39 3.258097 0.000000 457 +must 0 40 3.258097 0.000000 442 +multipl 0 39 3.258097 0.000000 453 +cost 0 37 3.332205 0.000000 480 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +multi 0 36 3.367296 0.000000 493 +survei 0 35 3.401197 0.000000 513 +random 0 34 3.401197 0.000000 511 +approxim 0 35 3.401197 0.000000 509 +statist 0 35 3.401197 0.000000 521 +queri 0 33 3.433987 0.000000 524 +india 0 32 3.465736 0.000000 550 +independ 0 32 3.465736 0.000000 548 +express 0 32 3.465736 0.000000 540 +scientist 0 31 3.496508 0.000000 560 +compon 0 30 3.555348 0.000000 570 +power 0 30 3.555348 0.000000 573 +graph 0 30 3.555348 0.000000 576 +rang 0 30 3.555348 0.000000 565 +focus 0 29 3.583519 0.000000 584 +limit 0 29 3.583519 0.000000 585 +ask 0 28 3.610918 0.000000 597 +load 0 28 3.610918 0.000000 601 +framework 0 28 3.610918 0.000000 606 +proc 0 26 3.688879 0.000000 649 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +repres 0 26 3.688879 0.000000 656 +effort 0 26 3.688879 0.000000 652 +valu 0 25 3.737670 0.000000 665 +primari 0 25 3.737670 0.000000 669 +although 0 25 3.737670 0.000000 667 +higher 0 24 3.761200 0.000000 690 +size 0 23 3.806662 0.000000 713 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +identifi 0 22 3.850148 0.000000 760 +try 0 22 3.850148 0.000000 764 +properti 0 22 3.850148 0.000000 749 +thu 0 21 3.912023 0.000000 773 +among 0 21 3.912023 0.000000 781 +path 0 21 3.912023 0.000000 778 +flexibl 0 21 3.912023 0.000000 792 +sigmod 0 19 4.007333 0.000000 877 +boston 0 19 4.007333 0.000000 862 +concentr 0 18 4.060443 0.000000 906 +record 0 18 4.060443 0.000000 890 +dimension 0 18 4.060443 0.000000 909 +estim 0 17 4.110874 0.000000 930 +miller 0 17 4.110874 0.000000 949 +attempt 0 17 4.110874 0.000000 917 +former 0 17 4.110874 0.000000 956 +ramakrishnan 0 16 4.174387 0.000000 972 +advantag 0 16 4.174387 0.000000 987 +jose 0 16 4.174387 0.000000 976 +cambridg 0 16 4.174387 0.000000 1008 +livni 0 15 4.248495 0.000000 1053 +transit 0 15 4.248495 0.000000 1046 +heterogen 0 14 4.317488 0.000000 1090 +balanc 0 14 4.317488 0.000000 1112 +attribut 0 14 4.317488 0.000000 1092 +primarili 0 13 4.382027 0.000000 1185 +translat 0 13 4.382027 0.000000 1164 +canada 0 13 4.382027 0.000000 1158 +readi 0 12 4.465908 0.000000 1242 +gupta 0 12 4.465908 0.000000 1241 +extrem 0 11 4.553877 0.000000 1330 +cycl 0 11 4.553877 0.000000 1335 +itali 0 11 4.553877 0.000000 1378 +desktop 0 10 4.653960 0.000000 1445 +tradit 0 10 4.653960 0.000000 1404 +genet 0 10 4.653960 0.000000 1409 +vldb 0 10 4.653960 0.000000 1470 +pose 0 9 4.753590 0.000000 1535 +significantli 0 9 4.753590 0.000000 1508 +mode 0 9 4.753590 0.000000 1492 +latter 0 9 4.753590 0.000000 1522 +conferenceon 0 9 4.753590 0.000000 1595 +incomplet 0 9 4.753590 0.000000 1575 +ioannidi 1 8 4.875197 4.875197 1714 +yanni 0 8 4.875197 0.000000 1713 +bridg 0 8 4.875197 0.000000 1764 +closur 0 8 4.875197 0.000000 1643 +solomon 0 8 4.875197 0.000000 1716 +unifi 0 8 4.875197 0.000000 1774 +databasesystem 0 8 4.875197 0.000000 1617 +aris 0 7 5.010635 0.000000 1924 +bombai 0 7 5.010635 0.000000 1972 +montreal 0 7 5.010635 0.000000 1961 +paramet 0 7 5.010635 0.000000 1796 +parametr 0 7 5.010635 0.000000 1819 +throughout 0 7 5.010635 0.000000 1871 +sweden 0 7 5.010635 0.000000 1885 +predic 0 7 5.010635 0.000000 1806 +serial 0 7 5.010635 0.000000 1975 +schema 0 6 5.164786 0.000000 1988 +eduresearch 0 6 5.164786 0.000000 2205 +divers 0 6 5.164786 0.000000 2232 +greec 0 6 5.164786 0.000000 2208 +tsatalo 0 5 5.347108 0.000000 2581 +travers 0 5 5.347108 0.000000 2363 +frog 0 5 5.347108 0.000000 2479 +desk 0 5 5.347108 0.000000 2297 +minneapoli 0 5 5.347108 0.000000 2480 +england 0 5 5.347108 0.000000 2557 +ireland 0 4 5.568345 0.000000 2853 +algorithmsfor 0 4 5.568345 0.000000 2748 +multimediasystem 0 4 5.568345 0.000000 2701 +forparallel 0 4 5.568345 0.000000 2703 +customiz 0 4 5.568345 0.000000 2966 +chile 0 4 5.568345 0.000000 3082 +histogram 0 3 5.857933 0.000000 3490 +propag 0 3 5.857933 0.000000 3997 +disciplin 0 3 5.857933 0.000000 3392 +metaphor 0 3 5.857933 0.000000 4038 +inadequ 0 3 5.857933 0.000000 3730 +andsemant 0 3 5.857933 0.000000 3246 +microscop 0 3 5.857933 0.000000 4035 +publicationsi 0 3 5.857933 0.000000 3827 +conjunct 0 3 5.857933 0.000000 3743 +stockholm 0 3 5.857933 0.000000 3715 +zurich 0 3 5.857933 0.000000 3550 +switzerland 0 3 5.857933 0.000000 3551 +santiago 0 3 5.857933 0.000000 4013 +poosala 0 2 6.263398 0.000000 6228 +queryoptim 0 2 6.263398 0.000000 4057 +garofalaki 0 2 6.263398 0.000000 6209 +dublin 0 2 6.263398 0.000000 4883 +interestsdatabas 0 2 6.263398 0.000000 6116 +andinform 0 2 6.263398 0.000000 5550 +scientificdata 0 2 6.263398 0.000000 6067 +algorithmsa 0 2 6.263398 0.000000 4487 +anneal 0 2 6.263398 0.000000 4136 +basedperform 0 2 6.263398 0.000000 6055 +spectroscopi 0 2 6.263398 0.000000 6206 +anniversari 0 2 6.263398 0.000000 4945 +performanceevalu 0 2 6.263398 0.000000 6052 +bermuda 0 2 6.263398 0.000000 5907 +turtl 0 2 6.263398 0.000000 4235 +haa 0 2 6.263398 0.000000 6115 +gmap 0 2 6.263398 0.000000 6241 +versatil 0 2 6.263398 0.000000 6242 +haber 0 1 6.957497 0.000000 20411 +vldbconfer 0 1 6.957497 0.000000 20412 +tod 0 1 6.957497 0.000000 20413 +ofheterogen 0 1 6.957497 0.000000 20414 +ondatabas 0 1 6.957497 0.000000 20415 +opossum 0 1 6.957497 0.000000 20416 +ioannidisyanni 0 1 6.957497 0.000000 20417 +toqueri 0 1 6.957497 0.000000 20418 +thanin 0 1 6.957497 0.000000 20419 +highera 0 1 6.957497 0.000000 20420 +tooptim 0 1 6.957497 0.000000 20421 +querywil 0 1 6.957497 0.000000 20422 +optimum 0 1 6.957497 0.000000 20423 +viabl 0 1 6.957497 0.000000 20424 +propertiesof 0 1 6.957497 0.000000 20425 +especiallythos 0 1 6.957497 0.000000 20426 +alsopart 0 1 6.957497 0.000000 20427 +appropriateinform 0 1 6.957497 0.000000 20428 +thepropag 0 1 6.957497 0.000000 20429 +ofoptim 0 1 6.957497 0.000000 20430 +inrel 0 1 6.957497 0.000000 20431 +manyexperi 0 1 6.957497 0.000000 20432 +aspectsthat 0 1 6.957497 0.000000 20433 +managementenviron 0 1 6.957497 0.000000 20434 +theirexperiment 0 1 6.957497 0.000000 20435 +arefor 0 1 6.957497 0.000000 20436 +scientistsso 0 1 6.957497 0.000000 20437 +facilitatetransl 0 1 6.957497 0.000000 20438 +experimentalscientif 0 1 6.957497 0.000000 20439 +specificproject 0 1 6.957497 0.000000 20440 +plantgrowth 0 1 6.957497 0.000000 20441 +issueon 0 1 6.957497 0.000000 20442 +beyondrel 0 1 6.957497 0.000000 20443 +forschema 0 1 6.957497 0.000000 20444 +tsangari 0 1 6.957497 0.000000 20445 +tkde 0 1 6.957497 0.000000 20446 +christodoulaki 0 1 6.957497 0.000000 20447 +limitingworst 0 1 6.957497 0.000000 20448 +winger 0 1 6.957497 0.000000 20449 +algorithmsbas 0 1 6.957497 0.000000 20450 +databaseestim 0 1 6.957497 0.000000 20451 +ponnekanti 0 1 6.957497 0.000000 20452 +experimentmanag 0 1 6.957497 0.000000 20453 +itsappl 0 1 6.957497 0.000000 20454 +anjur 0 1 6.957497 0.000000 20455 +bridgesbetween 0 1 6.957497 0.000000 20456 +shekita 0 1 6.957497 0.000000 20457 +forselect 0 1 6.957497 0.000000 20458 +internationalacm 0 1 6.957497 0.000000 20459 +layoutat 0 1 6.957497 0.000000 20460 +granular 0 1 6.957497 0.000000 20461 +advancedvisu 0 1 6.957497 0.000000 20462 +gubbio 0 1 6.957497 0.000000 20463 +managementthrough 0 1 6.957497 0.000000 20464 +practicalityfor 0 1 6.957497 0.000000 20465 +sigmodconfer 0 1 6.957497 0.000000 20466 +forphys 0 1 6.957497 0.000000 20467 +dexa 0 1 6.957497 0.000000 20468 +athen 0 1 6.957497 0.000000 20469 +lashkari 0 1 6.957497 0.000000 20470 +theirdisambigu 0 1 6.957497 0.000000 20471 +schemavisu 0 1 6.957497 0.000000 20472 +edbt 0 1 6.957497 0.000000 20473 +internationalvldb 0 1 6.957497 0.000000 20474 +capacityin 0 1 6.957497 0.000000 20475 +wiener 0 1 6.957497 0.000000 20476 +moos 0 1 6.957497 0.000000 20477 +withdata 0 1 6.957497 0.000000 20478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..eb414f0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +homepag 1 93 2.397895 2.397895 148 +kevin 1 9 4.753590 4.753590 1482 +zhongbin 1 1 6.957497 6.957497 20496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..c47b2e7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +modifi 0 178 1.609438 0.000000 35 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +support 0 132 1.945910 0.000000 83 +welcom 0 122 2.079442 0.000000 99 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +info 0 85 2.484907 0.000000 176 +good 0 77 2.564949 0.000000 200 +collect 0 65 2.772589 0.000000 268 +organ 0 65 2.772589 0.000000 265 +give 0 50 3.044522 0.000000 359 +telephon 0 50 3.044522 0.000000 373 +visitor 0 49 3.044522 0.000000 371 +press 0 42 3.218876 0.000000 419 +littl 0 39 3.258097 0.000000 454 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +articl 0 33 3.433987 0.000000 530 +travel 0 30 3.555348 0.000000 579 +chines 0 29 3.583519 0.000000 595 +univ 0 28 3.610918 0.000000 617 +comp 0 26 3.688879 0.000000 650 +client 0 25 3.737670 0.000000 679 +sometim 0 24 3.761200 0.000000 696 +alumni 0 21 3.912023 0.000000 807 +wrote 0 20 3.951244 0.000000 830 +stat 0 17 4.110874 0.000000 924 +took 0 16 4.174387 0.000000 1010 +month 0 15 4.248495 0.000000 1025 +trip 0 14 4.317488 0.000000 1113 +employ 0 12 4.465908 0.000000 1291 +classmat 0 9 4.753590 0.000000 1516 +chicago 0 6 5.164786 0.000000 2149 +bldg 0 4 5.568345 0.000000 2983 +amaz 0 4 5.568345 0.000000 2600 +usathi 0 2 6.263398 0.000000 5951 +diari 0 2 6.263398 0.000000 4740 +linksmi 0 2 6.263398 0.000000 6215 +oversea 0 2 6.263398 0.000000 5781 +yinng 0 1 6.957497 0.000000 20479 +pageindexofyinongwei 0 1 6.957497 0.000000 20480 +spagehi 0 1 6.957497 0.000000 20481 +alsolink 0 1 6.957497 0.000000 20482 +inforesumehobbiestravel 0 1 6.957497 0.000000 20483 +pointersr 0 1 6.957497 0.000000 20484 +computingmacin 0 1 6.957497 0.000000 20485 +learningpattern 0 1 6.957497 0.000000 20486 +recognitioncomputatin 0 1 6.957497 0.000000 20487 +geometrydatabasevisionacadem 0 1 6.957497 0.000000 20488 +diarythi 0 1 6.957497 0.000000 20489 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 0 1 6.957497 0.000000 20490 +beida 0 1 6.957497 0.000000 20491 +classmatespek 0 1 6.957497 0.000000 20492 +ciumi 0 1 6.957497 0.000000 20493 +bookmarkcom 0 1 6.957497 0.000000 20494 +yinong 0 1 6.957497 0.000000 20495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..069a0755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +call 1 91 2.397895 2.397895 153 +room 0 59 2.833213 0.000000 301 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +california 0 46 3.091042 0.000000 388 +around 0 43 3.178054 0.000000 415 +strategi 0 25 3.737670 0.000000 682 +voic 0 21 3.912023 0.000000 806 +hous 0 21 3.912023 0.000000 801 +beauti 0 18 4.060443 0.000000 912 +drive 0 15 4.248495 0.000000 1052 +food 1 12 4.465908 4.465908 1285 +poor 1 8 4.875197 4.875197 1736 +matthew 0 6 5.164786 0.000000 2193 +parent 0 6 5.164786 0.000000 2204 +observatori 0 4 5.568345 0.000000 3070 +matt 0 3 5.857933 0.000000 3792 +beach 0 3 5.857933 0.000000 3782 +convuls 1 1 6.957497 6.957497 20497 +sera 1 1 6.957497 6.957497 20498 +pageuntil 0 1 6.957497 0.000000 20499 +zeidenbergcent 0 1 6.957497 0.000000 20500 +gilson 0 1 6.957497 0.000000 20501 +zeiden 0 1 6.957497 0.000000 20502 +eduzeidenb 0 1 6.957497 0.000000 20503 +eduwhen 0 1 6.957497 0.000000 20504 +coho 0 1 6.957497 0.000000 20505 +huntington 0 1 6.957497 0.000000 20506 +breton 0 1 6.957497 0.000000 20507 +nadja 0 1 6.957497 0.000000 20508 +beaut 0 1 6.957497 0.000000 20509 +saint 0 1 6.957497 0.000000 20510 +whyth 0 1 6.957497 0.000000 20511 +communist 0 1 6.957497 0.000000 20512 +helder 0 1 6.957497 0.000000 20513 +camara 0 1 6.957497 0.000000 20514 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..923426e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +databas 0 122 2.079442 0.000000 86 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +document 0 121 2.079442 0.000000 89 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +find 0 111 2.197225 0.000000 111 +technic 0 100 2.302585 0.000000 140 +need 0 98 2.302585 0.000000 135 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +select 0 91 2.397895 0.000000 154 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +educ 0 86 2.484907 0.000000 191 +resourc 0 81 2.484907 0.000000 172 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +effici 0 73 2.639057 0.000000 233 +intellig 0 72 2.639057 0.000000 225 +symposium 0 72 2.639057 0.000000 238 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +prof 0 64 2.772589 0.000000 273 +plan 0 65 2.772589 0.000000 272 +dept 0 64 2.772589 0.000000 291 +function 0 62 2.772589 0.000000 275 +import 0 65 2.772589 0.000000 282 +interact 0 62 2.772589 0.000000 270 +organ 0 65 2.772589 0.000000 265 +room 0 59 2.833213 0.000000 301 +juli 0 60 2.833213 0.000000 305 +major 0 56 2.890372 0.000000 315 +thesi 0 57 2.890372 0.000000 327 +space 0 57 2.890372 0.000000 310 +run 0 51 2.995732 0.000000 347 +profession 0 51 2.995732 0.000000 345 +telephon 0 50 3.044522 0.000000 373 +fast 0 42 3.218876 0.000000 429 +submit 0 39 3.258097 0.000000 440 +probabl 0 40 3.258097 0.000000 455 +purpos 0 37 3.332205 0.000000 481 +china 0 37 3.332205 0.000000 487 +robot 0 36 3.367296 0.000000 497 +multi 0 36 3.367296 0.000000 493 +statist 0 35 3.401197 0.000000 521 +eduoffic 0 33 3.433987 0.000000 531 +given 0 32 3.465736 0.000000 538 +domain 0 30 3.555348 0.000000 564 +limit 0 29 3.583519 0.000000 585 +cluster 0 28 3.610918 0.000000 612 +univ 0 28 3.610918 0.000000 617 +manipul 0 27 3.637586 0.000000 624 +mine 0 26 3.688879 0.000000 654 +proc 0 26 3.688879 0.000000 649 +relev 0 26 3.688879 0.000000 637 +accur 0 25 3.737670 0.000000 680 +pattern 0 24 3.761200 0.000000 689 +motion 0 24 3.761200 0.000000 699 +compress 0 23 3.806662 0.000000 719 +recognit 0 23 3.806662 0.000000 723 +mobil 0 23 3.806662 0.000000 730 +identifi 0 22 3.850148 0.000000 760 +cooper 0 22 3.850148 0.000000 757 +divis 0 21 3.912023 0.000000 803 +path 0 21 3.912023 0.000000 778 +kernel 0 20 3.951244 0.000000 825 +sigmod 0 19 4.007333 0.000000 877 +beij 0 19 4.007333 0.000000 876 +region 0 19 4.007333 0.000000 875 +concentr 0 18 4.060443 0.000000 906 +dimension 0 18 4.060443 0.000000 909 +estim 0 17 4.110874 0.000000 930 +zhang 1 16 4.174387 4.174387 980 +ramakrishnan 0 16 4.174387 0.000000 972 +spars 0 16 4.174387 0.000000 989 +young 0 16 4.174387 0.000000 991 +livni 0 15 4.248495 0.000000 1053 +configur 0 15 4.248495 0.000000 1012 +miron 0 14 4.317488 0.000000 1110 +topolog 0 14 4.317488 0.000000 1089 +finit 0 14 4.317488 0.000000 1106 +joint 0 13 4.382027 0.000000 1130 +canada 0 13 4.382027 0.000000 1158 +conf 0 13 4.382027 0.000000 1181 +raghu 0 12 4.465908 0.000000 1212 +grow 0 12 4.465908 0.000000 1209 +amount 0 12 4.465908 0.000000 1208 +overal 0 12 4.465908 0.000000 1254 +branch 0 11 4.553877 0.000000 1318 +discov 0 9 4.753590 0.000000 1562 +classif 0 9 4.753590 0.000000 1586 +manufactur 0 8 4.875197 0.000000 1634 +dataset 0 7 5.010635 0.000000 1914 +densiti 0 7 5.010635 0.000000 1927 +discoveri 0 7 5.010635 0.000000 1915 +trend 0 7 5.010635 0.000000 1842 +dimens 0 7 5.010635 0.000000 1930 +reduct 0 7 5.010635 0.000000 1877 +financi 0 6 5.164786 0.000000 2197 +invest 0 6 5.164786 0.000000 2153 +ling 0 4 5.568345 0.000000 3045 +exploratori 0 4 5.568345 0.000000 3073 +ijcai 0 4 5.568345 0.000000 2901 +tian 0 3 5.857933 0.000000 3680 +birch 0 2 6.263398 0.000000 6136 +ortool 0 2 6.263398 0.000000 4169 +andmanufactur 0 2 6.263398 0.000000 6244 +collis 0 2 6.263398 0.000000 5956 +jianwei 0 1 6.957497 0.000000 20515 +assistantadvisor 0 1 6.957497 0.000000 20516 +compilerminor 0 1 6.957497 0.000000 20517 +bankingoffic 0 1 6.957497 0.000000 20518 +intereststher 0 1 6.957497 0.000000 20519 +territori 0 1 6.957497 0.000000 20520 +densityanalysi 0 1 6.957497 0.000000 20521 +crowd 0 1 6.957497 0.000000 20522 +dataclassif 0 1 6.957497 0.000000 20523 +knowledgediscoveri 0 1 6.957497 0.000000 20524 +dimensionreduct 0 1 6.957497 0.000000 20525 +findpath 0 1 6.957497 0.000000 20526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..2d8c1e07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +data 1 170 1.791759 1.791759 49 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +dayton 0 119 2.079442 0.000000 104 +site 1 106 2.197225 2.197225 119 +assist 0 112 2.197225 0.000000 113 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +west 0 83 2.484907 0.000000 192 +server 1 76 2.564949 2.564949 204 +line 0 75 2.639057 0.000000 231 +new 0 64 2.772589 0.000000 262 +prof 0 64 2.772589 0.000000 273 +detail 0 57 2.890372 0.000000 321 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +graph 0 30 3.555348 0.000000 576 +chines 0 29 3.583519 0.000000 595 +mine 0 26 3.688879 0.000000 654 +jeff 0 25 3.737670 0.000000 673 +todai 0 25 3.737670 0.000000 672 +daili 0 24 3.761200 0.000000 706 +yahoo 0 24 3.761200 0.000000 707 +benchmark 0 19 4.007333 0.000000 859 +north 0 19 4.007333 0.000000 873 +sigmod 0 19 4.007333 0.000000 877 +lyco 0 19 4.007333 0.000000 871 +stock 0 16 4.174387 0.000000 1007 +taiwan 0 16 4.174387 0.000000 1006 +club 0 15 4.248495 0.000000 1058 +dbm 0 13 4.382027 0.000000 1136 +excit 0 11 4.553877 0.000000 1329 +surf 0 11 4.553877 0.000000 1301 +naughton 0 10 4.653960 0.000000 1450 +analyt 0 7 5.010635 0.000000 1913 +monei 0 7 5.010635 0.000000 1934 +financi 0 6 5.164786 0.000000 2197 +advis 0 6 5.164786 0.000000 2173 +carolina 0 6 5.164786 0.000000 2142 +maryland 0 6 5.164786 0.000000 2140 +chapel 0 5 5.347108 0.000000 2457 +zhao 0 4 5.568345 0.000000 2699 +ters 0 3 5.857933 0.000000 3297 +pathfind 0 2 6.263398 0.000000 6053 +olap 0 2 6.263398 0.000000 6233 +arbor 0 2 6.263398 0.000000 6235 +molap 0 2 6.263398 0.000000 6217 +yihong 0 1 6.957497 0.000000 20527 +educationb 0 1 6.957497 0.000000 20528 +hillm 0 1 6.957497 0.000000 20529 +wiscosin 0 1 6.957497 0.000000 20530 +datamin 0 1 6.957497 0.000000 20531 +microstrategi 0 1 6.957497 0.000000 20532 +rolap 0 1 6.957497 0.000000 20533 +lombard 0 1 6.957497 0.000000 20534 +kiwi 0 1 6.957497 0.000000 20535 +pgmo 0 1 6.957497 0.000000 20536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..8f8b65fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +address 1 170 1.791759 1.791759 62 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +homepag 0 93 2.397895 0.000000 148 +street 0 63 2.772589 0.000000 293 +still 0 50 3.044522 0.000000 362 +offer 0 43 3.178054 0.000000 414 +wang 0 21 3.912023 0.000000 790 +johnson 0 13 4.382027 0.000000 1162 +zhewang 0 1 6.957497 0.000000 20537 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..be92993d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +compil 0 122 2.079442 0.000000 96 +dayton 0 119 2.079442 0.000000 104 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +postscript 0 131 2.079442 0.000000 90 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +intern 0 108 2.197225 0.000000 128 +place 0 106 2.197225 0.000000 124 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +environ 0 84 2.484907 0.000000 177 +issu 0 78 2.564949 0.000000 211 +orient 0 80 2.564949 0.000000 205 +nation 0 74 2.639057 0.000000 240 +html 0 75 2.639057 0.000000 235 +simul 0 66 2.708050 0.000000 255 +java 0 70 2.708050 0.000000 248 +evalu 0 64 2.772589 0.000000 266 +laboratori 0 63 2.772589 0.000000 292 +juli 0 60 2.833213 0.000000 305 +share 0 59 2.833213 0.000000 304 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +sever 0 56 2.890372 0.000000 322 +advisor 0 51 2.995732 0.000000 355 +friend 0 48 3.044522 0.000000 376 +press 0 42 3.218876 0.000000 419 +combin 0 42 3.218876 0.000000 421 +workstat 0 37 3.332205 0.000000 479 +field 0 37 3.332205 0.000000 482 +china 0 37 3.332205 0.000000 487 +jame 0 35 3.401197 0.000000 507 +award 0 34 3.401197 0.000000 523 +particip 0 29 3.583519 0.000000 589 +cluster 0 28 3.610918 0.000000 612 +detect 0 26 3.688879 0.000000 646 +supercomput 0 25 3.737670 0.000000 681 +benchmark 0 19 4.007333 0.000000 859 +predict 0 19 4.007333 0.000000 855 +asplo 0 17 4.110874 0.000000 948 +novel 0 15 4.248495 0.000000 1039 +paradyn 0 9 4.753590 0.000000 1614 +tunnel 0 9 4.753590 0.000000 1615 +andth 0 9 4.753590 0.000000 1481 +antonio 0 6 5.164786 0.000000 2186 +barton 0 5 5.347108 0.000000 2371 +ofparallel 0 5 5.347108 0.000000 2380 +departmentat 0 5 5.347108 0.000000 2513 +anddistribut 0 4 5.568345 0.000000 3031 +bottleneck 0 4 5.568345 0.000000 2769 +fudan 0 3 5.857933 0.000000 3707 +blizzard 0 2 6.263398 0.000000 6226 +levelprogram 0 2 6.263398 0.000000 5452 +zhichen 0 1 6.957497 0.000000 20538 +larusprofessor 0 1 6.957497 0.000000 20539 +millerawardbest 0 1 6.957497 0.000000 20540 +eliminateperform 0 1 6.957497 0.000000 20541 +toolwith 0 1 6.957497 0.000000 20542 +wisconsinwind 0 1 6.957497 0.000000 20543 +interestprogram 0 1 6.957497 0.000000 20544 +andimcrement 0 1 6.957497 0.000000 20545 +programjourn 0 1 6.957497 0.000000 20546 +researchchines 0 1 6.957497 0.000000 20547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..f1445fe0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 0 119 2.079442 0.000000 104 +pictur 0 89 2.397895 0.000000 160 +west 0 83 2.484907 0.000000 192 +name 0 72 2.639057 0.000000 220 +street 0 63 2.772589 0.000000 293 +taken 0 31 3.496508 0.000000 555 +chen 0 21 3.912023 0.000000 791 +wang 0 21 3.912023 0.000000 790 +zhang 1 16 4.174387 4.174387 980 +tsinghua 0 13 4.382027 0.000000 1195 +hello 0 10 4.653960 0.000000 1407 +invit 0 10 4.653960 0.000000 1428 +restaur 0 6 5.164786 0.000000 2230 +theth 0 5 5.347108 0.000000 2325 +tong 0 3 5.857933 0.000000 3258 +supper 0 1 6.957497 0.000000 20548 +weihai 0 1 6.957497 0.000000 20549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..61b49ea9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +student 0 343 1.098612 0.000000 19 +number 0 130 2.079442 0.000000 97 +pictur 0 89 2.397895 0.000000 160 +visitor 0 49 3.044522 0.000000 371 +thank 0 23 3.806662 0.000000 721 +stop 0 17 4.110874 0.000000 942 +poland 0 3 5.857933 0.000000 3665 +inc 0 2 6.263398 0.000000 5914 +krzysztof 0 1 6.957497 0.000000 20550 +zmudzinskikrzysztof 0 1 6.957497 0.000000 20551 +zmudzinskispin 0 1 6.957497 0.000000 20552 +pole 0 1 6.957497 0.000000 20553 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..5114e5af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +number 0 130 2.079442 0.000000 97 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +educ 0 86 2.484907 0.000000 191 +help 0 83 2.484907 0.000000 175 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +logic 0 71 2.639057 0.000000 230 +servic 0 72 2.639057 0.000000 236 +test 1 66 2.708050 2.708050 252 +integr 0 67 2.708050 0.000000 245 +simul 0 66 2.708050 0.000000 255 +goal 0 66 2.708050 0.000000 250 +view 0 70 2.708050 0.000000 254 +laboratori 0 63 2.772589 0.000000 292 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +best 0 59 2.833213 0.000000 299 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +much 0 52 2.995732 0.000000 349 +tabl 0 51 2.995732 0.000000 346 +anoth 0 45 3.135494 0.000000 408 +fridai 0 44 3.135494 0.000000 390 +combin 0 42 3.218876 0.000000 421 +littl 0 39 3.258097 0.000000 454 +programm 0 39 3.258097 0.000000 445 +continu 0 39 3.258097 0.000000 448 +credit 0 38 3.295837 0.000000 460 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +profil 0 30 3.555348 0.000000 581 +built 0 29 3.583519 0.000000 592 +arrai 0 27 3.637586 0.000000 627 +enhanc 0 26 3.688879 0.000000 644 +reliabl 0 25 3.737670 0.000000 674 +wai 0 25 3.737670 0.000000 662 +compress 0 23 3.806662 0.000000 719 +self 0 22 3.850148 0.000000 761 +vlsi 0 21 3.912023 0.000000 795 +hous 0 21 3.912023 0.000000 801 +facil 0 20 3.951244 0.000000 814 +concentr 0 18 4.060443 0.000000 906 +engineeringunivers 0 17 4.110874 0.000000 959 +modif 0 17 4.110874 0.000000 913 +monitor 0 17 4.110874 0.000000 941 +normal 0 16 4.174387 0.000000 995 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +circuit 0 13 4.382027 0.000000 1131 +carri 0 13 4.382027 0.000000 1152 +station 0 13 4.382027 0.000000 1157 +engr 0 10 4.653960 0.000000 1427 +penalti 0 10 4.653960 0.000000 1405 +iowa 0 7 5.010635 0.000000 1971 +compact 0 7 5.010635 0.000000 1907 +asystem 0 4 5.568345 0.000000 2612 +termin 0 4 5.568345 0.000000 2852 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +saluja 0 3 5.857933 0.000000 3104 +eduportrait 0 3 5.857933 0.000000 4039 +fabric 0 3 5.857933 0.000000 3607 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +testabl 0 2 6.263398 0.000000 5606 +kewal 0 2 6.263398 0.000000 4072 +drivemadison 0 2 6.263398 0.000000 6245 +andsequenti 0 2 6.263398 0.000000 4532 +salujaprofessor 0 1 6.957497 0.000000 20554 +jpgdepartmentselectr 0 1 6.957497 0.000000 20555 +engineeringcomput 0 1 6.957497 0.000000 20556 +interestsdesign 0 1 6.957497 0.000000 20557 +testableand 0 1 6.957497 0.000000 20558 +thisarea 0 1 6.957497 0.000000 20559 +theresearch 0 1 6.957497 0.000000 20560 +testgener 0 1 6.957497 0.000000 20561 +inself 0 1 6.957497 0.000000 20562 +andfault 0 1 6.957497 0.000000 20563 +methodsapplic 0 1 6.957497 0.000000 20564 +testenviron 0 1 6.957497 0.000000 20565 +regularstructur 0 1 6.957497 0.000000 20566 +ram 0 1 6.957497 0.000000 20567 +areinvestig 0 1 6.957497 0.000000 20568 +inhardwar 0 1 6.957497 0.000000 20569 +projectw 0 1 6.957497 0.000000 20570 +thatth 0 1 6.957497 0.000000 20571 +noimpact 0 1 6.957497 0.000000 20572 +digitalsystem 0 1 6.957497 0.000000 20573 +withcolor 0 1 6.957497 0.000000 20574 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..1ab2eab0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +perform 0 143 1.945910 0.000000 74 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +schedul 0 119 2.079442 0.000000 85 +theori 0 111 2.197225 0.000000 127 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +advanc 0 99 2.302585 0.000000 130 +center 0 88 2.397895 0.000000 158 +real 0 93 2.397895 0.000000 144 +associ 0 93 2.397895 0.000000 151 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +help 0 83 2.484907 0.000000 175 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +involv 0 71 2.639057 0.000000 227 +materi 0 75 2.639057 0.000000 221 +servic 0 72 2.639057 0.000000 236 +tuesdai 0 73 2.639057 0.000000 219 +integr 0 67 2.708050 0.000000 245 +test 0 66 2.708050 0.000000 252 +view 0 70 2.708050 0.000000 254 +guid 0 63 2.772589 0.000000 267 +complex 0 64 2.772589 0.000000 269 +evalu 0 64 2.772589 0.000000 266 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +autom 0 41 3.218876 0.000000 434 +author 0 39 3.258097 0.000000 450 +close 0 38 3.295837 0.000000 465 +industri 0 38 3.295837 0.000000 464 +credit 0 38 3.295837 0.000000 460 +cost 0 37 3.332205 0.000000 480 +robot 0 36 3.367296 0.000000 497 +product 0 33 3.433987 0.000000 527 +toler 0 33 3.433987 0.000000 533 +human 0 32 3.465736 0.000000 546 +fault 0 32 3.465736 0.000000 547 +profil 0 30 3.555348 0.000000 581 +hope 0 28 3.610918 0.000000 610 +scale 0 28 3.610918 0.000000 613 +experiment 0 26 3.688879 0.000000 645 +handl 0 24 3.761200 0.000000 685 +highli 0 23 3.806662 0.000000 725 +self 0 22 3.850148 0.000000 761 +finish 0 22 3.850148 0.000000 748 +reduc 0 22 3.850148 0.000000 759 +properti 0 22 3.850148 0.000000 749 +director 0 22 3.850148 0.000000 767 +flexibl 0 21 3.912023 0.000000 792 +fund 0 21 3.912023 0.000000 805 +increas 0 20 3.951244 0.000000 829 +feedback 0 19 4.007333 0.000000 854 +engineeringunivers 0 17 4.110874 0.000000 959 +precis 0 15 4.248495 0.000000 1023 +hierarch 0 15 4.248495 0.000000 1018 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +incorpor 0 13 4.382027 0.000000 1163 +nasa 0 13 4.382027 0.000000 1188 +carri 0 13 4.382027 0.000000 1152 +engr 0 10 4.653960 0.000000 1427 +weld 0 9 4.753590 0.000000 1538 +factor 0 9 4.753590 0.000000 1544 +manufactur 0 8 4.875197 0.000000 1634 +sensor 0 7 5.010635 0.000000 1920 +explain 0 7 5.010635 0.000000 1816 +actuat 0 5 5.347108 0.000000 2442 +neil 0 4 5.568345 0.000000 2841 +fulli 0 4 5.568345 0.000000 2986 +emphas 0 4 5.568345 0.000000 2672 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +eduportrait 0 3 5.857933 0.000000 4039 +aerospac 0 3 5.857933 0.000000 3555 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +duffi 0 2 6.263398 0.000000 4845 +telerobot 0 2 6.263398 0.000000 4847 +drivemadison 0 2 6.263398 0.000000 6245 +departmentsmechan 0 1 6.957497 0.000000 20575 +engineeringeducationb 0 1 6.957497 0.000000 20576 +madisonm 0 1 6.957497 0.000000 20577 +madisonphd 0 1 6.957497 0.000000 20578 +madisonresearch 0 1 6.957497 0.000000 20579 +interestsrobot 0 1 6.957497 0.000000 20580 +micromechanismscent 0 1 6.957497 0.000000 20581 +consortiamanufactur 0 1 6.957497 0.000000 20582 +programwisconsin 0 1 6.957497 0.000000 20583 +roboticsprofessor 0 1 6.957497 0.000000 20584 +inspect 0 1 6.957497 0.000000 20585 +mold 0 1 6.957497 0.000000 20586 +rework 0 1 6.957497 0.000000 20587 +agricultur 0 1 6.957497 0.000000 20588 +tactil 0 1 6.957497 0.000000 20589 +sensori 0 1 6.957497 0.000000 20590 +fatigu 0 1 6.957497 0.000000 20591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..8483f7dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +activ 0 84 2.484907 0.000000 182 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +april 0 77 2.564949 0.000000 196 +appli 0 71 2.639057 0.000000 226 +servic 0 72 2.639057 0.000000 236 +simul 0 66 2.708050 0.000000 255 +practic 0 70 2.708050 0.000000 246 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +januari 0 62 2.772589 0.000000 264 +creat 0 63 2.772589 0.000000 277 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +simpl 0 60 2.833213 0.000000 298 +best 0 59 2.833213 0.000000 299 +space 0 57 2.890372 0.000000 310 +major 0 56 2.890372 0.000000 315 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +suggest 0 53 2.944439 0.000000 331 +investig 0 51 2.995732 0.000000 353 +maintain 0 51 2.995732 0.000000 342 +tabl 0 51 2.995732 0.000000 346 +physic 0 47 3.091042 0.000000 377 +california 0 46 3.091042 0.000000 388 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +mechan 0 43 3.178054 0.000000 416 +term 0 43 3.178054 0.000000 411 +http 0 41 3.218876 0.000000 420 +york 0 41 3.218876 0.000000 435 +autom 0 41 3.218876 0.000000 434 +form 0 39 3.258097 0.000000 443 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +credit 0 38 3.295837 0.000000 460 +formal 0 37 3.332205 0.000000 478 +represent 0 35 3.401197 0.000000 512 +award 0 34 3.401197 0.000000 523 +product 0 33 3.433987 0.000000 527 +collabor 0 32 3.465736 0.000000 543 +focu 0 30 3.555348 0.000000 571 +profil 0 30 3.555348 0.000000 581 +manipul 0 27 3.637586 0.000000 624 +repres 0 26 3.688879 0.000000 656 +consist 0 26 3.688879 0.000000 651 +effort 0 26 3.688879 0.000000 652 +reliabl 0 25 3.737670 0.000000 674 +todai 0 25 3.737670 0.000000 672 +aspect 0 25 3.737670 0.000000 663 +fellow 0 24 3.761200 0.000000 701 +famili 0 23 3.806662 0.000000 735 +geometri 0 22 3.850148 0.000000 752 +deal 0 22 3.850148 0.000000 736 +thu 0 21 3.912023 0.000000 773 +basi 0 20 3.951244 0.000000 828 +geometr 0 19 4.007333 0.000000 852 +separ 0 19 4.007333 0.000000 844 +behavior 0 18 4.060443 0.000000 881 +aid 0 18 4.060443 0.000000 904 +engineeringunivers 0 17 4.110874 0.000000 959 +analyz 0 17 4.110874 0.000000 925 +seek 0 17 4.110874 0.000000 954 +novel 0 15 4.248495 0.000000 1039 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +topolog 0 14 4.317488 0.000000 1089 +convert 0 13 4.382027 0.000000 1122 +cannot 0 13 4.382027 0.000000 1144 +discret 0 13 4.382027 0.000000 1165 +career 0 12 4.465908 0.000000 1287 +captur 0 12 4.465908 0.000000 1232 +abil 0 11 4.553877 0.000000 1341 +engr 0 10 4.653960 0.000000 1427 +decomposit 0 10 4.653960 0.000000 1439 +relationship 0 10 4.653960 0.000000 1383 +facilit 0 10 4.653960 0.000000 1412 +mainten 0 9 4.753590 0.000000 1543 +establish 0 9 4.753590 0.000000 1532 +shapiro 0 8 4.875197 0.000000 1686 +manufactur 0 8 4.875197 0.000000 1634 +combinatori 0 8 4.875197 0.000000 1629 +competit 0 8 4.875197 0.000000 1635 +convers 0 8 4.875197 0.000000 1673 +boundari 0 7 5.010635 0.000000 1929 +appar 0 7 5.010635 0.000000 1958 +ongo 0 6 5.164786 0.000000 2215 +lack 0 6 5.164786 0.000000 1994 +solid 0 5 5.347108 0.000000 2255 +rigid 0 5 5.347108 0.000000 2432 +chain 0 4 5.568345 0.000000 2712 +phenomena 0 4 5.568345 0.000000 2962 +languagesand 0 4 5.568345 0.000000 3071 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +systemat 0 3 5.857933 0.000000 3781 +eduportrait 0 3 5.857933 0.000000 4039 +motor 0 3 5.857933 0.000000 3909 +fabric 0 3 5.857933 0.000000 3607 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +artifact 0 2 6.263398 0.000000 5346 +avenuemadison 0 2 6.263398 0.000000 4842 +interestscomput 0 2 6.263398 0.000000 6113 +palmer 0 2 6.263398 0.000000 5453 +methodsand 0 2 6.263398 0.000000 5779 +amajor 0 2 6.263398 0.000000 5343 +designand 0 2 6.263398 0.000000 6100 +andmanufactur 0 2 6.263398 0.000000 6244 +tomanufactur 0 2 6.263398 0.000000 6016 +ofnew 0 2 6.263398 0.000000 5881 +vadim 0 1 6.957497 0.000000 20592 +vshapiro 0 1 6.957497 0.000000 20593 +jpgurl 0 1 6.957497 0.000000 20594 +departmentscomput 0 1 6.957497 0.000000 20595 +sciencemechan 0 1 6.957497 0.000000 20596 +engineeringeducationba 0 1 6.957497 0.000000 20597 +universitym 0 1 6.957497 0.000000 20598 +angelesm 0 1 6.957497 0.000000 20599 +universityphd 0 1 6.957497 0.000000 20600 +univeristyresearch 0 1 6.957497 0.000000 20601 +automationcent 0 1 6.957497 0.000000 20602 +consortiamathemat 0 1 6.957497 0.000000 20603 +programmanufactur 0 1 6.957497 0.000000 20604 +programspati 0 1 6.957497 0.000000 20605 +laboratoryselect 0 1 6.957497 0.000000 20606 +honorsn 0 1 6.957497 0.000000 20607 +vossler 0 1 6.957497 0.000000 20608 +betweengeometri 0 1 6.957497 0.000000 20609 +bemodel 0 1 6.957497 0.000000 20610 +manufacturedbas 0 1 6.957497 0.000000 20611 +ofdistinct 0 1 6.957497 0.000000 20612 +technologicalbarri 0 1 6.957497 0.000000 20613 +undermin 0 1 6.957497 0.000000 20614 +commercialgeometr 0 1 6.957497 0.000000 20615 +eliminatingambigu 0 1 6.957497 0.000000 20616 +ofparametr 0 1 6.957497 0.000000 20617 +bedescrib 0 1 6.957497 0.000000 20618 +interactingprimit 0 1 6.957497 0.000000 20619 +roadblock 0 1 6.957497 0.000000 20620 +withtheoret 0 1 6.957497 0.000000 20621 +smoothintegr 0 1 6.957497 0.000000 20622 +thedesir 0 1 6.957497 0.000000 20623 +tounifi 0 1 6.957497 0.000000 20624 +theseand 0 1 6.957497 0.000000 20625 +physicalobject 0 1 6.957497 0.000000 20626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..c5d2b29a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +texa 0 160 1.791759 0.000000 64 +avail 0 169 1.791759 0.000000 48 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +mathemat 0 108 2.197225 0.000000 123 +site 0 106 2.197225 0.000000 119 +theori 0 111 2.197225 0.000000 127 +part 0 98 2.302585 0.000000 129 +present 0 91 2.397895 0.000000 145 +method 0 80 2.564949 0.000000 213 +want 0 79 2.564949 0.000000 199 +logic 0 71 2.639057 0.000000 230 +appli 0 71 2.639057 0.000000 226 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +previou 0 62 2.772589 0.000000 290 +improv 0 62 2.772589 0.000000 289 +descript 0 64 2.772589 0.000000 271 +index 0 56 2.890372 0.000000 309 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +visitor 0 49 3.044522 0.000000 371 +electron 0 47 3.091042 0.000000 379 +done 0 47 3.091042 0.000000 381 +natur 0 44 3.135494 0.000000 406 +autom 0 41 3.218876 0.000000 434 +past 0 42 3.218876 0.000000 428 +continu 0 39 3.258097 0.000000 448 +late 0 40 3.258097 0.000000 439 +tech 0 35 3.401197 0.000000 515 +ad 0 32 3.465736 0.000000 544 +produc 0 30 3.555348 0.000000 572 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +profil 0 30 3.555348 0.000000 581 +intend 0 28 3.610918 0.000000 599 +higher 0 24 3.761200 0.000000 690 +seri 0 24 3.761200 0.000000 708 +other 0 24 3.761200 0.000000 697 +proof 0 23 3.806662 0.000000 720 +geometri 0 22 3.850148 0.000000 752 +theorem 0 21 3.912023 0.000000 786 +prove 0 19 4.007333 0.000000 848 +feedback 0 19 4.007333 0.000000 854 +primarili 0 13 4.382027 0.000000 1185 +deduct 0 12 4.465908 0.000000 1236 +benjamin 0 11 4.553877 0.000000 1296 +incomplet 0 9 4.753590 0.000000 1575 +ataustin 0 9 4.753590 0.000000 1610 +prover 1 8 4.875197 4.875197 1653 +boyer 0 6 5.164786 0.000000 2013 +inequ 0 6 5.164786 0.000000 2113 +groupth 0 5 5.347108 0.000000 2549 +bledso 0 4 5.568345 0.000000 2999 +systemsand 0 4 5.568345 0.000000 2804 +chou 0 4 5.568345 0.000000 3033 +analog 0 4 5.568345 0.000000 2875 +feng 0 3 5.857933 0.000000 3300 +woodi 0 2 6.263398 0.000000 5459 +hine 0 2 6.263398 0.000000 4475 +intent 0 2 6.263398 0.000000 5768 +herei 0 2 6.263398 0.000000 6187 +hein 0 1 6.957497 0.000000 20627 +borel 0 1 6.957497 0.000000 20628 +groupautom 0 1 6.957497 0.000000 20629 +techreport 0 1 6.957497 0.000000 20630 +reportseri 0 1 6.957497 0.000000 20631 +grouplarri 0 1 6.957497 0.000000 20632 +hinesmarti 0 1 6.957497 0.000000 20633 +mayberrybenjamin 0 1 6.957497 0.000000 20634 +shultsalumniprevi 0 1 6.957497 0.000000 20635 +robertboyerj 0 1 6.957497 0.000000 20636 +strother 0 1 6.957497 0.000000 20637 +moorethi 0 1 6.957497 0.000000 20638 +collaboratorswhat 0 1 6.957497 0.000000 20639 +implyth 0 1 6.957497 0.000000 20640 +proverstrivelarri 0 1 6.957497 0.000000 20641 +struvelarri 0 1 6.957497 0.000000 20642 +proverand 0 1 6.957497 0.000000 20643 +theretoinclud 0 1 6.957497 0.000000 20644 +mcphee 0 1 6.957497 0.000000 20645 +theoryimplement 0 1 6.957497 0.000000 20646 +theoremprecondit 0 1 6.957497 0.000000 20647 +proverbledso 0 1 6.957497 0.000000 20648 +theoremnqthmboy 0 1 6.957497 0.000000 20649 +andmoor 0 1 6.957497 0.000000 20650 +clinc 0 1 6.957497 0.000000 20651 +iprshult 0 1 6.957497 0.000000 20652 +relatedlinksdo 0 1 6.957497 0.000000 20653 +shult 0 1 6.957497 0.000000 20654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..b81f8eea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 0 47 3.091042 0.000000 382 +perman 0 11 4.553877 0.000000 1372 +moveddocu 0 2 6.263398 0.000000 6246 +movedthi 0 2 6.263398 0.000000 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..b81f8eea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_aug/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 0 47 3.091042 0.000000 382 +perman 0 11 4.553877 0.000000 1372 +moveddocu 0 2 6.263398 0.000000 6246 +movedthi 0 2 6.263398 0.000000 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..c0ac730b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +structur 1 106 2.197225 2.197225 105 +solut 1 82 2.484907 2.484907 162 +dynam 1 76 2.564949 2.564949 194 +homework 1 79 2.564949 2.564949 193 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +new 1 64 2.772589 2.772589 262 +wednesdai 1 64 2.772589 2.772589 261 +unix 1 58 2.890372 2.890372 308 +fridai 1 44 3.135494 3.135494 390 +static 1 27 3.637586 3.637586 619 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +prelim 1 12 4.465908 4.465908 1201 +huang 1 12 4.465908 4.465908 1202 +systemsc 1 11 4.553877 4.553877 1293 +cheng 1 10 4.653960 4.653960 1381 +lili 1 5 5.347108 5.347108 2240 +filesystem 1 4 5.568345 5.568345 2587 +groupcours 1 3 5.857933 5.857933 3092 +ychuang 1 3 5.857933 5.857933 3093 +systemkenneth 1 2 6.263398 6.263398 4043 +birmanc 1 2 6.263398 6.263398 4044 +syllabuslectur 1 2 6.263398 6.263398 4045 +taslili 1 2 6.263398 6.263398 4046 +mihai 1 2 6.263398 6.263398 4047 +budiu 1 2 6.263398 6.263398 4042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..0c786c35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +schedul 1 119 2.079442 2.079442 85 +specif 1 106 2.197225 2.197225 106 +question 1 91 2.397895 2.397895 141 +chang 1 82 2.484907 2.484907 163 +exampl 1 77 2.564949 2.564949 195 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +principl 1 48 3.044522 3.044522 357 +answer 1 45 3.135494 3.135494 391 +tutori 1 39 3.258097 3.258097 437 +hand 1 37 3.332205 3.332205 475 +symbol 1 27 3.637586 3.637586 620 +displai 1 23 3.806662 3.806662 712 +chip 1 21 3.912023 3.912023 770 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +configur 1 15 4.248495 4.248495 1012 +systemsc 1 11 4.553877 4.553877 1293 +correspond 1 10 4.653960 4.653960 1382 +phase 1 6 5.164786 5.164786 1977 +hoca 1 5 5.347108 5.347108 2241 +lorenzo 1 4 5.568345 5.568345 2588 +penn 1 3 5.857933 5.857933 3094 +alvisi 1 3 5.857933 5.857933 3095 +systemsselect 1 2 6.263398 6.263398 4049 +postcript 1 2 6.263398 6.263398 4050 +consol 1 2 6.263398 6.263398 4048 +postcriptdocu 1 1 6.957497 6.957497 6248 +hocacours 1 1 6.957497 6.957497 6249 +broccoli 1 1 6.957497 6.957497 6250 +fileth 1 1 6.957497 6.957497 6251 +systemth 1 1 6.957497 6.957497 6252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..53684c20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,241 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +advanc 1 99 2.302585 2.302585 130 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +member 1 84 2.484907 2.484907 165 +learn 1 86 2.484907 2.484907 170 +homework 1 79 2.564949 2.564949 193 +know 1 80 2.564949 2.564949 198 +exampl 1 77 2.564949 2.564949 195 +want 1 79 2.564949 2.564949 199 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +materi 1 75 2.639057 2.639057 221 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +integr 1 67 2.708050 2.708050 245 +organ 1 65 2.772589 2.772589 265 +evalu 1 64 2.772589 2.772589 266 +wednesdai 1 64 2.772589 2.772589 261 +guid 1 63 2.772589 2.772589 267 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +march 1 61 2.833213 2.833213 295 +summer 1 56 2.890372 2.890372 311 +index 1 56 2.890372 2.890372 309 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +februari 1 54 2.944439 2.944439 328 +week 1 52 2.995732 2.995732 343 +date 1 51 2.995732 2.995732 344 +principl 1 48 3.044522 3.044522 357 +appoint 1 49 3.044522 3.044522 358 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +midterm 1 45 3.135494 3.135494 392 +even 1 45 3.135494 3.135494 393 +algebra 1 45 3.135494 3.135494 394 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +late 1 40 3.258097 3.258097 439 +submit 1 39 3.258097 3.258097 440 +transact 1 39 3.258097 3.258097 438 +credit 1 38 3.295837 3.295837 460 +concurr 1 34 3.401197 3.401197 501 +return 1 34 3.401197 3.401197 502 +queri 1 33 3.433987 3.433987 524 +within 1 33 3.433987 3.433987 525 +concept 1 32 3.465736 3.465736 537 +chapter 1 32 3.465736 3.465736 536 +computersci 1 30 3.555348 3.555348 562 +hard 1 30 3.555348 3.555348 563 +domain 1 30 3.555348 3.555348 564 +retriev 1 27 3.637586 3.637586 621 +though 1 27 3.637586 3.637586 622 +request 1 26 3.688879 3.688879 635 +constraint 1 26 3.688879 3.688879 636 +relev 1 26 3.688879 3.688879 637 +fundament 1 25 3.737670 3.737670 661 +deal 1 22 3.850148 3.850148 736 +recommend 1 22 3.850148 3.850148 737 +sort 1 22 3.850148 3.850148 738 +tent 1 22 3.850148 3.850148 739 +similar 1 21 3.912023 3.912023 771 +nice 1 20 3.951244 3.951244 809 +minut 1 20 3.951244 3.951244 810 +reserv 1 20 3.951244 3.951244 808 +entir 1 20 3.951244 3.951244 811 +break 1 20 3.951244 3.951244 812 +five 1 19 4.007333 4.007333 841 +along 1 18 4.060443 4.060443 878 +accept 1 18 4.060443 4.060443 879 +encourag 1 18 4.060443 4.060443 880 +vector 1 16 4.174387 4.174387 961 +permit 1 16 4.174387 4.174387 962 +alreadi 1 16 4.174387 4.174387 963 +pagec 1 15 4.248495 4.248495 1011 +indic 1 15 4.248495 4.248495 1013 +rank 1 14 4.317488 4.317488 1063 +latex 1 14 4.317488 4.317488 1064 +weight 1 12 4.465908 4.465908 1204 +calculu 1 12 4.465908 4.465908 1203 +prelim 1 12 4.465908 4.465908 1201 +summar 1 11 4.553877 4.553877 1295 +benjamin 1 11 4.553877 4.553877 1296 +worth 1 11 4.553877 4.553877 1294 +bill 1 11 4.553877 4.553877 1297 +relationship 1 10 4.653960 4.653960 1383 +forc 1 10 4.653960 4.653960 1384 +recoveri 1 9 4.753590 4.753590 1474 +databasesystem 1 8 4.875197 4.875197 1617 +hash 1 8 4.875197 4.875197 1618 +crash 1 8 4.875197 4.875197 1616 +cum 1 8 4.875197 4.875197 1619 +attach 1 7 5.010635 5.010635 1785 +textual 1 6 5.164786 5.164786 1979 +silberschatz 1 6 5.164786 5.164786 1978 +alphabet 1 6 5.164786 5.164786 1980 +phrase 1 5 5.347108 5.347108 2242 +ross 1 5 5.347108 5.347108 2243 +tupl 1 5 5.347108 5.347108 2244 +marco 1 4 5.568345 5.568345 2589 +clearli 1 4 5.568345 5.568345 2590 +roughli 1 3 5.857933 5.857933 3097 +entiti 1 3 5.857933 5.857933 3096 +singhal 1 3 5.857933 5.857933 3098 +sendmail 1 3 5.857933 5.857933 3099 +universityspr 1 2 6.263398 6.263398 4055 +introductionthi 1 2 6.263398 6.263398 4056 +queryoptim 1 2 6.263398 6.263398 4057 +prerequisitesc 1 2 6.263398 6.263398 4058 +korth 1 2 6.263398 6.263398 4051 +elmasri 1 2 6.263398 6.263398 4059 +salton 1 2 6.263398 6.263398 4060 +amitsingh 1 2 6.263398 6.263398 4061 +aguilera 1 2 6.263398 6.263398 4052 +amith 1 2 6.263398 6.263398 4053 +yamasani 1 2 6.263398 6.263398 4062 +ofyour 1 2 6.263398 6.263398 4063 +thegroup 1 2 6.263398 6.263398 4054 +ofcours 1 2 6.263398 6.263398 4064 +throughth 1 2 6.263398 6.263398 4065 +iti 1 2 6.263398 6.263398 4066 +dole 1 2 6.263398 6.263398 4067 +schedulethi 1 2 6.263398 6.263398 4068 +retrievaldepart 1 1 6.957497 6.957497 6257 +gradeshav 1 1 6.957497 6.957497 6258 +twothird 1 1 6.957497 6.957497 6259 +systemsinclud 1 1 6.957497 6.957497 6260 +transactionprocess 1 1 6.957497 6.957497 6261 +usefulinform 1 1 6.957497 6.957497 6262 +willcov 1 1 6.957497 6.957497 6263 +invert 1 1 6.957497 6.957497 6264 +smartsystem 1 1 6.957497 6.957497 6265 +relevancefeedback 1 1 6.957497 6.957497 6266 +thesaurusconstruct 1 1 6.957497 6.957497 6267 +automatictext 1 1 6.957497 6.957497 6268 +placetuesdai 1 1 6.957497 6.957497 6269 +thurston 1 1 6.957497 6.957497 6270 +booksdatabas 1 1 6.957497 6.957497 6271 +mcgrawhil 1 1 6.957497 6.957497 6272 +andnavath 1 1 6.957497 6.957497 6273 +byullman 1 1 6.957497 6.957497 6274 +photocopiedmateri 1 1 6.957497 6.957497 6275 +sophia 1 1 6.957497 6.957497 6276 +georgiakaki 1 1 6.957497 6.957497 6277 +officehour 1 1 6.957497 6.957497 6278 +gradingexam 1 1 6.957497 6.957497 6279 +yourfin 1 1 6.957497 6.957497 6280 +policiesy 1 1 6.957497 6.957497 6281 +samegrad 1 1 6.957497 6.957497 6282 +tuesdayand 1 1 6.957497 6.957497 6283 +illeg 1 1 6.957497 6.957497 6284 +latexif 1 1 6.957497 6.957497 6285 +goodopportun 1 1 6.957497 6.957497 6286 +submissionpleas 1 1 6.957497 6.957497 6287 +clinton 1 1 6.957497 6.957497 6288 +perot 1 1 6.957497 6.957497 6289 +homeworksgrad 1 1 6.957497 6.957497 6290 +sortedalphabet 1 1 6.957497 6.957497 6291 +thecov 1 1 6.957497 6.957497 6292 +pagefollow 1 1 6.957497 6.957497 6293 +regrad 1 1 6.957497 6.957497 6255 +policyal 1 1 6.957497 6.957497 6294 +inwrit 1 1 6.957497 6.957497 6295 +referto 1 1 6.957497 6.957497 6296 +availablethursdai 1 1 6.957497 6.957497 6253 +duetuesdai 1 1 6.957497 6.957497 6254 +retrievalthursdai 1 1 6.957497 6.957497 6256 +modelhomework 1 1 6.957497 6.957497 6297 +weightingthursdai 1 1 6.957497 6.957497 6298 +indexinghomework 1 1 6.957497 6.957497 6299 +evaluationtuesdai 1 1 6.957497 6.957497 6300 +feedbackthursdai 1 1 6.957497 6.957497 6301 +clusteringhomework 1 1 6.957497 6.957497 6302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..ddf6e538 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +postscript 1 131 2.079442 2.079442 90 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +text 1 98 2.302585 2.302585 133 +peopl 1 96 2.302585 2.302585 132 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +come 1 78 2.564949 2.564949 202 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +new 1 64 2.772589 2.772589 262 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +electron 1 47 3.091042 3.091042 379 +submit 1 39 3.258097 3.258097 440 +respons 1 37 3.332205 3.332205 476 +procedur 1 36 3.367296 3.367296 488 +print 1 34 3.401197 3.401197 503 +quot 1 29 3.583519 3.583519 582 +ask 1 28 3.610918 3.610918 597 +wai 1 25 3.737670 3.737670 662 +leav 1 21 3.912023 3.912023 772 +break 1 20 3.951244 3.951244 812 +nice 1 20 3.951244 3.951244 809 +modif 1 17 4.110874 4.110874 913 +mayb 1 15 4.248495 4.248495 1014 +social 1 13 4.382027 4.382027 1123 +misc 1 13 4.382027 4.382027 1124 +convert 1 13 4.382027 4.382027 1122 +submiss 1 11 4.553877 4.553877 1298 +admin 1 9 4.753590 4.753590 1476 +recit 1 9 4.753590 4.753590 1475 +joke 1 8 4.875197 4.875197 1620 +printer 1 8 4.875197 4.875197 1621 +ethic 1 7 5.010635 5.010635 1786 +header 1 7 5.010635 5.010635 1787 +sumedh 1 3 5.857933 5.857933 3101 +pfile 1 3 5.857933 5.857933 3100 +incl 1 2 6.263398 6.263398 4070 +enscript 1 2 6.263398 6.263398 4069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..9062803b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +real 1 93 2.397895 2.397895 144 +present 1 91 2.397895 2.397895 145 +comment 1 93 2.397895 2.397895 146 +practic 1 70 2.708050 2.708050 246 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +descript 1 64 2.772589 2.772589 271 +plan 1 65 2.772589 2.772589 272 +simpl 1 60 2.833213 2.833213 298 +semest 1 58 2.890372 2.890372 312 +instruct 1 53 2.944439 2.944439 332 +tabl 1 51 2.995732 2.995732 346 +basic 1 50 3.044522 3.044522 360 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +rang 1 30 3.555348 3.555348 565 +depend 1 29 3.583519 3.583519 583 +progress 1 28 3.610918 3.610918 598 +aspect 1 25 3.737670 3.737670 663 +size 1 23 3.806662 3.806662 713 +varieti 1 22 3.850148 3.850148 740 +practicum 1 16 4.174387 4.174387 960 +choos 1 16 4.174387 4.174387 964 +signific 1 13 4.382027 4.382027 1125 +werner 1 10 4.653960 4.653960 1385 +vogel 1 8 4.875197 4.875197 1622 +earn 1 7 5.010635 5.010635 1788 +theywil 1 3 5.857933 5.857933 3102 +contentspag 1 3 5.857933 5.857933 3103 +offersa 1 2 6.263398 6.263398 4071 +systemsor 1 1 6.957497 6.957497 6303 +dirti 1 1 6.957497 6.957497 6304 +internetworkingto 1 1 6.957497 6.957497 6305 +teamsof 1 1 6.957497 6.957497 6306 +trough 1 1 6.957497 6.957497 6307 +complexityof 1 1 6.957497 6.957497 6308 +offcial 1 1 6.957497 6.957497 6309 +pageslink 1 1 6.957497 6.957497 6310 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..8c2d50ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +avail 1 169 1.791759 1.791759 48 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +homepag 1 93 2.397895 2.397895 148 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +contain 1 81 2.484907 2.484907 174 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +server 1 76 2.564949 2.564949 204 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +semest 1 58 2.890372 2.890372 312 +get 1 46 3.091042 3.091042 380 +announc 1 40 3.258097 3.258097 441 +electr 1 38 3.295837 3.295837 461 +manual 1 35 3.401197 3.401197 504 +intend 1 28 3.610918 3.610918 599 +intro 1 17 4.110874 4.110874 915 +outlin 1 17 4.110874 4.110874 914 +anyon 1 17 4.110874 4.110874 916 +conduct 1 14 4.317488 4.317488 1065 +train 1 14 4.317488 4.317488 1066 +valid 1 11 4.553877 4.553877 1299 +literatur 1 11 4.553877 4.553877 1300 +surf 1 11 4.553877 4.553877 1301 +andcomput 1 8 4.875197 4.875197 1623 +architect 1 8 4.875197 4.875197 1624 +theproject 1 6 5.164786 5.164786 1981 +gopher 1 6 5.164786 5.164786 1982 +sole 1 4 5.568345 5.568345 2592 +mentor 1 4 5.568345 5.568345 2591 +addition 1 4 5.568345 5.568345 2593 +saluja 1 3 5.857933 5.857933 3104 +duedat 1 3 5.857933 5.857933 3105 +wiscinfo 1 3 5.857933 5.857933 3106 +studentsenrol 1 2 6.263398 6.263398 4073 +kewal 1 2 6.263398 6.263398 4072 +sorin 1 1 6.957497 6.957497 6311 +generalinform 1 1 6.957497 6.957497 6312 +midtermsyllabu 1 1 6.957497 6.957497 6313 +midtermi 1 1 6.957497 6.957497 6314 +caeworkst 1 1 6.957497 6.957497 6315 +whomai 1 1 6.957497 6.957497 6316 +throughbold_brows 1 1 6.957497 6.957497 6317 +gettingstart 1 1 6.957497 6.957497 6318 +workbook 1 1 6.957497 6.957497 6319 +quicksim 1 1 6.957497 6.957497 6320 +trainingworkbook 1 1 6.957497 6.957497 6321 +exersis 1 1 6.957497 6.957497 6322 +thesedocu 1 1 6.957497 6.957497 6323 +uwengin 1 1 6.957497 6.957497 6324 +pmcst 1 1 6.957497 6.957497 6325 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..6246c336 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +pleas 1 113 2.197225 2.197225 114 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +exam 1 86 2.484907 2.484907 169 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +browser 1 56 2.890372 2.890372 313 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +abl 1 30 3.555348 3.555348 566 +frame 1 24 3.761200 3.761200 684 +handl 1 24 3.761200 3.761200 685 +individu 1 13 4.382027 4.382027 1126 +criteria 1 9 4.753590 4.753590 1477 +preced 1 3 5.857933 5.857933 3107 +goofi 1 2 6.263398 6.263398 4074 +herelink 1 1 6.957497 6.957497 6326 +motw 1 1 6.957497 6.957497 6327 +stuffnot 1 1 6.957497 6.957497 6328 +edupag 1 1 6.957497 6.957497 6329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..6450fe14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +requir 1 81 2.484907 2.484907 167 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +materi 1 75 2.639057 2.639057 221 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +directori 1 45 3.135494 3.135494 396 +announc 1 40 3.258097 3.258097 441 +request 1 26 3.688879 3.688879 635 +interpret 1 24 3.761200 3.761200 686 +thu 1 21 3.912023 3.912023 773 +util 1 21 3.912023 3.912023 774 +behavior 1 18 4.060443 4.060443 881 +attempt 1 17 4.110874 4.110874 917 +pagec 1 15 4.248495 4.248495 1011 +correctli 1 9 4.753590 4.753590 1478 +dylan 1 8 4.875197 4.875197 1625 +password 1 4 5.568345 5.568345 2594 +parter 1 2 6.263398 6.263398 4075 +noodll 1 1 6.957497 6.957497 6330 +inconsist 1 1 6.957497 6.957497 6331 +partnerjoin 1 1 6.957497 6.957497 6332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..853993ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,360 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +well 1 109 2.197225 2.197225 121 +topic 1 114 2.197225 2.197225 110 +teach 1 108 2.197225 2.197225 112 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +structur 1 106 2.197225 2.197225 105 +techniqu 1 99 2.302585 2.302585 138 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +peopl 1 96 2.302585 2.302585 132 +memori 1 101 2.302585 2.302585 139 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +info 1 85 2.484907 2.484907 176 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +mondai 1 77 2.564949 2.564949 206 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +come 1 78 2.564949 2.564949 202 +exampl 1 77 2.564949 2.564949 195 +server 1 76 2.564949 2.564949 204 +state 1 76 2.564949 2.564949 207 +optim 1 79 2.564949 2.564949 197 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +free 1 73 2.639057 2.639057 224 +name 1 72 2.639057 2.639057 220 +goal 1 66 2.708050 2.708050 250 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +would 1 67 2.708050 2.708050 251 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +evalu 1 64 2.772589 2.772589 266 +handout 1 64 2.772589 2.772589 263 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +colleg 1 61 2.833213 2.833213 300 +type 1 61 2.833213 2.833213 296 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +direct 1 57 2.890372 2.890372 316 +browser 1 56 2.890372 2.890372 313 +variou 1 56 2.890372 2.890372 317 +reason 1 57 2.890372 2.890372 318 +cover 1 55 2.944439 2.944439 329 +allow 1 53 2.944439 2.944439 333 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +date 1 51 2.995732 2.995732 344 +set 1 50 3.044522 3.044522 361 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +even 1 45 3.135494 3.135494 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +http 1 41 3.218876 3.218876 420 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +late 1 40 3.258097 3.258097 439 +must 1 40 3.258097 3.258097 442 +submit 1 39 3.258097 3.258097 440 +correct 1 38 3.295837 3.295837 462 +credit 1 38 3.295837 3.295837 460 +respons 1 37 3.332205 3.332205 476 +hand 1 37 3.332205 3.332205 475 +ofth 1 36 3.367296 3.367296 491 +staff 1 36 3.367296 3.367296 490 +download 1 36 3.367296 3.367296 489 +procedur 1 36 3.367296 3.367296 488 +tree 1 36 3.367296 3.367296 492 +either 1 35 3.401197 3.401197 506 +post 1 35 3.401197 3.401197 505 +jame 1 35 3.401197 3.401197 507 +word 1 34 3.401197 3.401197 508 +approxim 1 35 3.401197 3.401197 509 +return 1 34 3.401197 3.401197 502 +singl 1 34 3.401197 3.401197 510 +random 1 34 3.401197 3.401197 511 +concept 1 32 3.465736 3.465736 537 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +express 1 32 3.465736 3.465736 540 +often 1 31 3.496508 3.496508 551 +rang 1 30 3.555348 3.555348 565 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +focus 1 29 3.583519 3.583519 584 +ask 1 28 3.610918 3.610918 597 +held 1 28 3.610918 3.610918 600 +campu 1 27 3.637586 3.637586 623 +symbol 1 27 3.637586 3.637586 620 +request 1 26 3.688879 3.688879 635 +rule 1 26 3.688879 3.688879 638 +background 1 25 3.737670 3.737670 664 +valu 1 25 3.737670 3.737670 665 +pattern 1 24 3.761200 3.761200 689 +reach 1 24 3.761200 3.761200 688 +consult 1 24 3.761200 3.761200 687 +interpret 1 24 3.761200 3.761200 686 +higher 1 24 3.761200 3.761200 690 +togeth 1 23 3.806662 3.806662 714 +variabl 1 23 3.806662 3.806662 715 +inth 1 22 3.850148 3.850148 741 +almost 1 22 3.850148 3.850148 742 +period 1 22 3.850148 3.850148 743 +annot 1 21 3.912023 3.912023 775 +thu 1 21 3.912023 3.912023 773 +half 1 21 3.912023 3.912023 776 +tell 1 21 3.912023 3.912023 777 +wonder 1 20 3.951244 3.951244 815 +sure 1 20 3.951244 3.951244 813 +break 1 20 3.951244 3.951244 812 +facil 1 20 3.951244 3.951244 814 +exercis 1 19 4.007333 4.007333 842 +els 1 19 4.007333 4.007333 843 +encourag 1 18 4.060443 4.060443 880 +account 1 18 4.060443 4.060443 882 +accept 1 18 4.060443 4.060443 879 +whether 1 17 4.110874 4.110874 918 +weekli 1 17 4.110874 4.110874 919 +macintosh 1 17 4.110874 4.110874 920 +outlin 1 17 4.110874 4.110874 914 +differenti 1 17 4.110874 4.110874 921 +match 1 16 4.174387 4.174387 965 +modern 1 16 4.174387 4.174387 966 +transfer 1 16 4.174387 4.174387 967 +earli 1 16 4.174387 4.174387 968 +easi 1 16 4.174387 4.174387 969 +capabl 1 15 4.248495 4.248495 1016 +score 1 15 4.248495 4.248495 1017 +hierarch 1 15 4.248495 4.248495 1018 +stream 1 15 4.248495 4.248495 1015 +borland 1 14 4.317488 4.317488 1067 +warn 1 14 4.317488 4.317488 1068 +polynomi 1 14 4.317488 4.317488 1069 +recurs 1 13 4.382027 4.382027 1127 +suit 1 13 4.382027 4.382027 1129 +joint 1 13 4.382027 4.382027 1130 +someon 1 13 4.382027 4.382027 1128 +skill 1 12 4.465908 4.465908 1205 +prelim 1 12 4.465908 4.465908 1201 +iter 1 12 4.465908 4.465908 1206 +broad 1 11 4.553877 4.553877 1302 +appl 1 11 4.553877 4.553877 1303 +induct 1 11 4.553877 4.553877 1304 +sundai 1 10 4.653960 4.653960 1387 +arithmet 1 10 4.653960 4.653960 1388 +stack 1 10 4.653960 4.653960 1389 +queue 1 10 4.653960 4.653960 1386 +introductori 1 9 4.753590 4.753590 1479 +recit 1 9 4.753590 4.753590 1475 +preliminari 1 9 4.753590 4.753590 1480 +andth 1 9 4.753590 4.753590 1481 +dylan 1 8 4.875197 4.875197 1625 +simpli 1 8 4.875197 4.875197 1626 +matter 1 8 4.875197 4.875197 1627 +on 1 8 4.875197 4.875197 1628 +happen 1 7 5.010635 5.010635 1790 +justin 1 7 5.010635 5.010635 1789 +dispatch 1 7 5.010635 5.010635 1791 +prioriti 1 7 5.010635 5.010635 1792 +chosen 1 6 5.164786 5.164786 1984 +huttenloch 1 6 5.164786 5.164786 1983 +contract 1 6 5.164786 5.164786 1985 +garbag 1 6 5.164786 5.164786 1986 +conot 1 5 5.347108 5.347108 2245 +hardcopi 1 5 5.347108 5.347108 2246 +substitut 1 5 5.347108 5.347108 2247 +variat 1 5 5.347108 5.347108 2248 +clearli 1 4 5.568345 5.568345 2590 +wherea 1 4 5.568345 5.568345 2597 +exposur 1 4 5.568345 5.568345 2598 +password 1 4 5.568345 5.568345 2594 +toth 1 4 5.568345 5.568345 2595 +midnight 1 4 5.568345 5.568345 2599 +amaz 1 4 5.568345 5.568345 2600 +thiscours 1 4 5.568345 5.568345 2601 +infinit 1 4 5.568345 5.568345 2596 +catch 1 4 5.568345 5.568345 2602 +illus 1 4 5.568345 5.568345 2603 +useth 1 3 5.857933 5.857933 3110 +programsand 1 3 5.857933 5.857933 3111 +toolbox 1 3 5.857933 5.857933 3112 +programmingtechniqu 1 3 5.857933 5.857933 3113 +szewczyk 1 3 5.857933 5.857933 3108 +voskuhl 1 3 5.857933 5.857933 3109 +kimbal 1 3 5.857933 5.857933 3114 +andon 1 3 5.857933 5.857933 3115 +requirementsstud 1 3 5.857933 5.857933 3116 +immedi 1 3 5.857933 5.857933 3117 +jointli 1 3 5.857933 5.857933 3118 +doubt 1 3 5.857933 5.857933 3119 +argument 1 3 5.857933 5.857933 3120 +quotat 1 3 5.857933 5.857933 3121 +inherit 1 3 5.857933 5.857933 3122 +heap 1 3 5.857933 5.857933 3123 +exit 1 3 5.857933 5.857933 3124 +developedat 1 2 6.263398 6.263398 4078 +rangeof 1 2 6.263398 6.263398 4076 +orientedlanguag 1 2 6.263398 6.263398 4079 +standalon 1 2 6.263398 6.263398 4077 +therewil 1 2 6.263398 6.263398 4080 +combinationof 1 2 6.263398 6.263398 4081 +programmingproblem 1 2 6.263398 6.263398 4082 +youwork 1 2 6.263398 6.263398 4083 +growth 1 2 6.263398 6.263398 4084 +informationaugust 1 1 6.957497 6.957497 6339 +courseabout 1 1 6.957497 6.957497 6340 +notationthat 1 1 6.957497 6.957497 6341 +takec 1 1 6.957497 6.957497 6342 +programmingparadigm 1 1 6.957497 6.957497 6343 +imperativeprogram 1 1 6.957497 6.957497 6344 +goodform 1 1 6.957497 6.957497 6345 +probablytak 1 1 6.957497 6.957497 6346 +questionsor 1 1 6.957497 6.957497 6347 +serverwhich 1 1 6.957497 6.957497 6348 +answersa 1 1 6.957497 6.957497 6349 +idand 1 1 6.957497 6.957497 6334 +thisweek 1 1 6.957497 6.957497 6350 +edubut 1 1 6.957497 6.957497 6351 +aboutproblem 1 1 6.957497 6.957497 6352 +tobia 1 1 6.957497 6.957497 6335 +mayr 1 1 6.957497 6.957497 6336 +upsonjam 1 1 6.957497 6.957497 6353 +hamblin 1 1 6.957497 6.957497 6337 +ugrad 1 1 6.957497 6.957497 6333 +tarobert 1 1 6.957497 6.957497 6354 +tajustin 1 1 6.957497 6.957497 6355 +taandra 1 1 6.957497 6.957497 6356 +ferencz 1 1 6.957497 6.957497 6357 +melissa 1 1 6.957497 6.957497 6358 +consultantwhen 1 1 6.957497 6.957497 6359 +meetlectur 1 1 6.957497 6.957497 6360 +andrecit 1 1 6.957497 6.957497 6361 +recitationsexpand 1 1 6.957497 6.957497 6362 +opportunityto 1 1 6.957497 6.957497 6363 +eachproblem 1 1 6.957497 6.957497 6364 +setsdu 1 1 6.957497 6.957497 6365 +mondayeven 1 1 6.957497 6.957497 6366 +consultinghour 1 1 6.957497 6.957497 6367 +voskuhltba 1 1 6.957497 6.957497 6368 +materialsther 1 1 6.957497 6.957497 6369 +handoutsand 1 1 6.957497 6.957497 6370 +implementedin 1 1 6.957497 6.957497 6371 +downloadonto 1 1 6.957497 6.957497 6372 +ontoyour 1 1 6.957497 6.957497 6373 +recentvers 1 1 6.957497 6.957497 6374 +gradeswil 1 1 6.957497 6.957497 6375 +thetot 1 1 6.957497 6.957497 6376 +willgener 1 1 6.957497 6.957497 6377 +followingclass 1 1 6.957497 6.957497 6378 +sittingdown 1 1 6.957497 6.957497 6379 +sink 1 1 6.957497 6.957497 6380 +beforesit 1 1 6.957497 6.957497 6381 +workmuch 1 1 6.957497 6.957497 6382 +jointassign 1 1 6.957497 6.957497 6383 +circumstancesmai 1 1 6.957497 6.957497 6384 +yourown 1 1 6.957497 6.957497 6385 +yougot 1 1 6.957497 6.957497 6386 +whenpeopl 1 1 6.957497 6.957497 6387 +lifeunpleas 1 1 6.957497 6.957497 6388 +facilitiescit 1 1 6.957497 6.957497 6389 +andpc 1 1 6.957497 6.957497 6390 +upsonmac 1 1 6.957497 6.957497 6391 +datesal 1 1 6.957497 6.957497 6392 +mondaynight 1 1 6.957497 6.957497 6393 +submityour 1 1 6.957497 6.957497 6394 +multimethod 1 1 6.957497 6.957497 6395 +mutabl 1 1 6.957497 6.957497 6338 +heapsort 1 1 6.957497 6.957497 6396 +metacircular 1 1 6.957497 6.957497 6397 +nonloc 1 1 6.957497 6.957497 6398 +throw 1 1 6.957497 6.957497 6399 +quicksort 1 1 6.957497 6.957497 6400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..e3c9cef0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +send 1 114 2.197225 2.197225 109 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +memori 1 101 2.302585 2.302585 139 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +control 1 82 2.484907 2.484907 164 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +homework 1 79 2.564949 2.564949 193 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +materi 1 75 2.639057 2.639057 221 +organ 1 65 2.772589 2.772589 265 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +digit 1 52 2.995732 2.995732 348 +date 1 51 2.995732 2.995732 344 +maintain 1 51 2.995732 2.995732 342 +made 1 44 3.135494 3.135494 398 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +post 1 35 3.401197 3.401197 505 +bookmark 1 26 3.688879 3.688879 639 +request 1 26 3.688879 3.688879 635 +consult 1 24 3.761200 3.761200 687 +hierarchi 1 22 3.850148 3.850148 744 +sequenti 1 22 3.850148 3.850148 745 +path 1 21 3.912023 3.912023 778 +unit 1 21 3.912023 3.912023 779 +annot 1 21 3.912023 3.912023 775 +separ 1 19 4.007333 4.007333 844 +appropri 1 18 4.060443 4.060443 883 +account 1 18 4.060443 4.060443 882 +otherwis 1 17 4.110874 4.110874 922 +sign 1 16 4.174387 4.174387 970 +circuit 1 13 4.382027 4.382027 1131 +difficulti 1 13 4.382027 4.382027 1132 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +assembl 1 12 4.465908 4.465908 1207 +combinatori 1 8 4.875197 4.875197 1629 +interrupt 1 7 5.010635 5.010635 1793 +saturdai 1 7 5.010635 5.010635 1794 +hidden 1 6 5.164786 5.164786 1987 +conot 1 5 5.347108 5.347108 2245 +registr 1 5 5.347108 5.347108 2249 +microprogram 1 4 5.568345 5.568345 2604 +eickenfal 1 3 5.857933 5.857933 3125 +kimbal 1 3 5.857933 5.857933 3114 +helpif 1 3 5.857933 5.857933 3126 +mate 1 3 5.857933 5.857933 3127 +encount 1 3 5.857933 5.857933 3128 +btopic 1 2 6.263398 6.263398 4085 +organizationthorsten 1 1 6.957497 6.957497 6401 +materialsal 1 1 6.957497 6.957497 6402 +listlist 1 1 6.957497 6.957497 6403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..38d071a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +postscript 1 131 2.079442 2.079442 90 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +text 1 98 2.302585 2.302585 133 +peopl 1 96 2.302585 2.302585 132 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +mondai 1 77 2.564949 2.564949 206 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +new 1 64 2.772589 2.772589 262 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +electron 1 47 3.091042 3.091042 379 +submit 1 39 3.258097 3.258097 440 +respons 1 37 3.332205 3.332205 476 +procedur 1 36 3.367296 3.367296 488 +print 1 34 3.401197 3.401197 503 +quot 1 29 3.583519 3.583519 582 +held 1 28 3.610918 3.610918 600 +ask 1 28 3.610918 3.610918 597 +wai 1 25 3.737670 3.737670 662 +leav 1 21 3.912023 3.912023 772 +break 1 20 3.951244 3.951244 812 +nice 1 20 3.951244 3.951244 809 +modif 1 17 4.110874 4.110874 913 +mayb 1 15 4.248495 4.248495 1014 +floor 1 14 4.317488 4.317488 1070 +social 1 13 4.382027 4.382027 1123 +misc 1 13 4.382027 4.382027 1124 +convert 1 13 4.382027 4.382027 1122 +submiss 1 11 4.553877 4.553877 1298 +admin 1 9 4.753590 4.753590 1476 +recit 1 9 4.753590 4.753590 1475 +joke 1 8 4.875197 4.875197 1620 +printer 1 8 4.875197 4.875197 1621 +ethic 1 7 5.010635 5.010635 1786 +header 1 7 5.010635 5.010635 1787 +sumedh 1 3 5.857933 5.857933 3101 +pfile 1 3 5.857933 5.857933 3100 +incl 1 2 6.263398 6.263398 4070 +enscript 1 2 6.263398 6.263398 4069 +csuglab 1 1 6.957497 6.957497 6404 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..b87ef5b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +structur 1 106 2.197225 2.197225 105 +solut 1 82 2.484907 2.484907 162 +dynam 1 76 2.564949 2.564949 194 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +new 1 64 2.772589 2.772589 262 +wednesdai 1 64 2.772589 2.772589 261 +unix 1 58 2.890372 2.890372 308 +fridai 1 44 3.135494 3.135494 390 +static 1 27 3.637586 3.637586 619 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +prelim 1 12 4.465908 4.465908 1201 +huang 1 12 4.465908 4.465908 1202 +systemsc 1 11 4.553877 4.553877 1293 +cheng 1 10 4.653960 4.653960 1381 +lili 1 5 5.347108 5.347108 2240 +filesystem 1 4 5.568345 5.568345 2587 +groupcours 1 3 5.857933 5.857933 3092 +ychuang 1 3 5.857933 5.857933 3093 +systemkenneth 1 2 6.263398 6.263398 4043 +birmanc 1 2 6.263398 6.263398 4044 +syllabuslectur 1 2 6.263398 6.263398 4045 +taslili 1 2 6.263398 6.263398 4046 +mihai 1 2 6.263398 6.263398 4047 +budiu 1 2 6.263398 6.263398 4042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..f72a8a1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +includ 1 208 1.609438 1.609438 42 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +academ 1 82 2.484907 2.484907 178 +solut 1 82 2.484907 2.484907 162 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +complet 1 77 2.564949 2.564949 208 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +appli 1 71 2.639057 2.639057 226 +tuesdai 1 73 2.639057 2.639057 219 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +result 1 65 2.772589 2.772589 281 +special 1 56 2.890372 2.890372 320 +found 1 53 2.944439 2.944439 337 +date 1 51 2.995732 2.995732 344 +still 1 50 3.044522 3.044522 362 +right 1 48 3.044522 3.044522 363 +move 1 47 3.091042 3.091042 382 +get 1 46 3.091042 3.091042 380 +netscap 1 44 3.135494 3.135494 395 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +third 1 43 3.178054 3.178054 412 +announc 1 40 3.258097 3.258097 441 +either 1 35 3.401197 3.401197 506 +queri 1 33 3.433987 3.433987 524 +posit 1 31 3.496508 3.496508 552 +abl 1 30 3.555348 3.555348 566 +specifi 1 30 3.555348 3.555348 568 +exist 1 30 3.555348 3.555348 569 +limit 1 29 3.583519 3.583519 585 +load 1 28 3.610918 3.610918 601 +concern 1 25 3.737670 3.737670 666 +begin 1 23 3.806662 3.806662 716 +variabl 1 23 3.806662 3.806662 715 +defin 1 22 3.850148 3.850148 746 +fact 1 21 3.912023 3.912023 780 +longer 1 20 3.951244 3.951244 816 +assum 1 19 4.007333 4.007333 845 +scott 1 18 4.060443 4.060443 884 +account 1 18 4.060443 4.060443 882 +statu 1 18 4.060443 4.060443 885 +previous 1 17 4.110874 4.110874 923 +otherwis 1 17 4.110874 4.110874 922 +alreadi 1 16 4.174387 4.174387 963 +portion 1 16 4.174387 4.174387 971 +atth 1 15 4.248495 4.248495 1019 +success 1 10 4.653960 4.653960 1390 +guarante 1 10 4.653960 4.653960 1391 +kevin 1 9 4.753590 4.753590 1482 +slightli 1 7 5.010635 5.010635 1795 +paramet 1 7 5.010635 5.010635 1796 +planner 1 7 5.010635 5.010635 1797 +hunt 1 7 5.010635 5.010635 1798 +remind 1 7 5.010635 5.010635 1799 +schema 1 6 5.164786 5.164786 1988 +constant 1 5 5.347108 5.347108 2251 +bind 1 5 5.347108 5.347108 2250 +seriou 1 5 5.347108 5.347108 2252 +clarif 1 5 5.347108 5.347108 2253 +clair 1 4 5.568345 5.568345 2605 +shouldn 1 4 5.568345 5.568345 2606 +unless 1 4 5.568345 5.568345 2607 +meanwhil 1 3 5.857933 5.857933 3129 +worri 1 3 5.857933 5.857933 3130 +add 1 3 5.857933 5.857933 3131 +thec 1 3 5.857933 5.857933 3132 +pagesc 1 3 5.857933 5.857933 3133 +pagecsfound 1 2 6.263398 6.263398 4086 +notethat 1 1 6.957497 6.957497 6406 +rubix 1 1 6.957497 6.957497 6407 +thefunct 1 1 6.957497 6.957497 6408 +rearrang 1 1 6.957497 6.957497 6409 +appeas 1 1 6.957497 6.957497 6410 +var 1 1 6.957497 6.957497 6411 +bracket 1 1 6.957497 6.957497 6412 +youus 1 1 6.957497 6.957497 6413 +machinesshould 1 1 6.957497 6.957497 6414 +sbin 1 1 6.957497 6.957497 6415 +ksaunder 1 1 6.957497 6.957497 6416 +sbinfor 1 1 6.957497 6.957497 6417 +gremlin 1 1 6.957497 6.957497 6418 +codefor 1 1 6.957497 6.957497 6419 +andget 1 1 6.957497 6.957497 6420 +uponcomplet 1 1 6.957497 6.957497 6421 +thoseus 1 1 6.957497 6.957497 6422 +zeroon 1 1 6.957497 6.957497 6423 +asspecifi 1 1 6.957497 6.957497 6424 +oneassign 1 1 6.957497 6.957497 6425 +dodg 1 1 6.957497 6.957497 6405 +vanto 1 1 6.957497 6.957497 6426 +thisclarif 1 1 6.957497 6.957497 6427 +newhomework 1 1 6.957497 6.957497 6428 +coursemateri 1 1 6.957497 6.957497 6429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..a4058f73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +studi 1 120 2.079442 2.079442 91 +pleas 1 113 2.197225 2.197225 114 +exam 1 86 2.484907 2.484907 169 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +homework 1 79 2.564949 2.564949 193 +integr 1 67 2.708050 2.708050 245 +room 1 59 2.833213 2.833213 301 +date 1 51 2.995732 2.995732 344 +set 1 50 3.044522 3.044522 361 +revis 1 26 3.688879 3.688879 640 +pagec 1 15 4.248495 4.248495 1011 +automata 1 13 4.382027 4.382027 1135 +prelim 1 12 4.465908 4.465908 1201 +hardcopi 1 5 5.347108 5.347108 2246 +incorrect 1 3 5.857933 5.857933 3134 +nikolai 1 2 6.263398 6.263398 4087 +theorywelcom 1 1 6.957497 6.957497 6430 +guideannounc 1 1 6.957497 6.957497 6431 +erratum 1 1 6.957497 6.957497 6432 +hourscod 1 1 6.957497 6.957497 6433 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..84af2de6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +advanc 1 99 2.302585 2.302585 130 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +homepag 1 93 2.397895 2.397895 148 +question 1 91 2.397895 2.397895 141 +exam 1 86 2.484907 2.484907 169 +info 1 85 2.484907 2.484907 176 +activ 1 84 2.484907 2.484907 182 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +build 1 85 2.484907 2.484907 184 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +resourc 1 81 2.484907 2.484907 172 +complet 1 77 2.564949 2.564949 208 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +homework 1 79 2.564949 2.564949 193 +appear 1 78 2.564949 2.564949 210 +interfac 1 79 2.564949 2.564949 209 +come 1 78 2.564949 2.564949 202 +optim 1 79 2.564949 2.564949 197 +know 1 80 2.564949 2.564949 198 +tuesdai 1 73 2.639057 2.639057 219 +upson 1 71 2.639057 2.639057 218 +materi 1 75 2.639057 2.639057 221 +involv 1 71 2.639057 2.639057 227 +write 1 72 2.639057 2.639057 222 +addit 1 74 2.639057 2.639057 228 +meet 1 72 2.639057 2.639057 229 +free 1 73 2.639057 2.639057 224 +thursdai 1 70 2.708050 2.708050 241 +differ 1 66 2.708050 2.708050 253 +test 1 66 2.708050 2.708050 252 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +evalu 1 64 2.772589 2.772589 266 +result 1 65 2.772589 2.772589 281 +handout 1 64 2.772589 2.772589 263 +descript 1 64 2.772589 2.772589 271 +written 1 63 2.772589 2.772589 278 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +collect 1 65 2.772589 2.772589 268 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +type 1 61 2.833213 2.833213 296 +variou 1 56 2.890372 2.890372 317 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +much 1 52 2.995732 2.995732 349 +date 1 51 2.995732 2.995732 344 +archiv 1 49 3.044522 3.044522 364 +give 1 50 3.044522 3.044522 359 +principl 1 48 3.044522 3.044522 357 +basic 1 50 3.044522 3.044522 360 +standard 1 48 3.044522 3.044522 365 +possibl 1 47 3.091042 3.091042 378 +understand 1 47 3.091042 3.091042 384 +could 1 46 3.091042 3.091042 383 +answer 1 45 3.135494 3.135494 391 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +made 1 44 3.135494 3.135494 398 +even 1 45 3.135494 3.135494 393 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +examin 1 42 3.218876 3.218876 424 +howev 1 41 3.218876 3.218876 422 +review 1 42 3.218876 3.218876 425 +edit 1 42 3.218876 3.218876 418 +might 1 41 3.218876 3.218876 426 +form 1 39 3.258097 3.258097 443 +realli 1 40 3.258097 3.258097 444 +must 1 40 3.258097 3.258097 442 +transact 1 39 3.258097 3.258097 438 +tutori 1 39 3.258097 3.258097 437 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +close 1 38 3.295837 3.295837 465 +mean 1 37 3.332205 3.332205 477 +hand 1 37 3.332205 3.332205 475 +survei 1 35 3.401197 3.401197 513 +singl 1 34 3.401197 3.401197 510 +michael 1 35 3.401197 3.401197 514 +concurr 1 34 3.401197 3.401197 501 +taught 1 33 3.433987 3.433987 526 +queri 1 33 3.433987 3.433987 524 +concept 1 32 3.465736 3.465736 537 +storag 1 31 3.496508 3.496508 553 +someth 1 31 3.496508 3.496508 554 +domain 1 30 3.555348 3.555348 564 +compon 1 30 3.555348 3.555348 570 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +produc 1 30 3.555348 3.555348 572 +turn 1 29 3.583519 3.583519 586 +depend 1 29 3.583519 3.583519 583 +semant 1 29 3.583519 3.583519 587 +propos 1 28 3.610918 3.610918 602 +becom 1 28 3.610918 3.610918 603 +intend 1 28 3.610918 3.610918 599 +actual 1 28 3.610918 3.610918 604 +progress 1 28 3.610918 3.610918 598 +manipul 1 27 3.637586 3.637586 624 +team 1 27 3.637586 3.637586 625 +altern 1 26 3.688879 3.688879 641 +background 1 25 3.737670 3.737670 664 +fundament 1 25 3.737670 3.737670 661 +although 1 25 3.737670 3.737670 667 +toward 1 25 3.737670 3.737670 668 +primari 1 25 3.737670 3.737670 669 +hill 1 25 3.737670 3.737670 670 +aspect 1 25 3.737670 3.737670 663 +alwai 1 24 3.761200 3.761200 691 +wish 1 24 3.761200 3.761200 692 +higher 1 24 3.761200 3.761200 690 +store 1 24 3.761200 3.761200 693 +begin 1 23 3.806662 3.806662 716 +initi 1 23 3.806662 3.806662 717 +lead 1 23 3.806662 3.806662 718 +tent 1 22 3.850148 3.850148 739 +disk 1 22 3.850148 3.850148 747 +recommend 1 22 3.850148 3.850148 737 +finish 1 22 3.850148 3.850148 748 +period 1 22 3.850148 3.850148 743 +among 1 21 3.912023 3.912023 781 +half 1 21 3.912023 3.912023 776 +prerequisit 1 19 4.007333 4.007333 846 +thur 1 19 4.007333 4.007333 847 +minim 1 18 4.060443 4.060443 887 +lower 1 18 4.060443 4.060443 886 +demo 1 18 4.060443 4.060443 888 +stat 1 17 4.110874 4.110874 924 +anyon 1 17 4.110874 4.110874 916 +whether 1 17 4.110874 4.110874 918 +choos 1 16 4.174387 4.174387 964 +ramakrishnan 1 16 4.174387 4.174387 972 +piec 1 15 4.248495 4.248495 1020 +contribut 1 15 4.248495 4.248495 1021 +demand 1 14 4.317488 4.317488 1073 +hopefulli 1 14 4.317488 4.317488 1071 +manner 1 14 4.317488 4.317488 1074 +decid 1 14 4.317488 4.317488 1075 +convent 1 14 4.317488 4.317488 1072 +dbm 1 13 4.382027 4.382027 1136 +essenti 1 13 4.382027 4.382027 1137 +signific 1 13 4.382027 4.382027 1125 +step 1 13 4.382027 4.382027 1138 +introduc 1 13 4.382027 4.382027 1139 +earlier 1 13 4.382027 4.382027 1140 +prelim 1 12 4.465908 4.465908 1201 +grow 1 12 4.465908 4.465908 1209 +amount 1 12 4.465908 4.465908 1208 +workload 1 12 4.465908 4.465908 1210 +buffer 1 12 4.465908 4.465908 1211 +raghu 1 12 4.465908 4.465908 1212 +sens 1 11 4.553877 4.553877 1305 +submiss 1 11 4.553877 4.553877 1298 +benjamin 1 11 4.553877 4.553877 1296 +modular 1 10 4.653960 4.653960 1392 +forc 1 10 4.653960 4.653960 1384 +certain 1 10 4.653960 4.653960 1393 +introductori 1 9 4.753590 4.753590 1479 +herefor 1 9 4.753590 4.753590 1483 +familiar 1 9 4.753590 4.753590 1485 +suitabl 1 9 4.753590 4.753590 1486 +morgan 1 9 4.753590 4.753590 1484 +rel 1 9 4.753590 4.753590 1487 +clear 1 9 4.753590 4.753590 1488 +recoveri 1 9 4.753590 4.753590 1474 +databasesystem 1 8 4.875197 4.875197 1617 +cum 1 8 4.875197 4.875197 1619 +awar 1 7 5.010635 5.010635 1800 +bug 1 7 5.010635 5.010635 1801 +fromth 1 7 5.010635 5.010635 1802 +seshadri 1 7 5.010635 5.010635 1803 +noon 1 7 5.010635 5.010635 1804 +consequ 1 6 5.164786 5.164786 1989 +huge 1 6 5.164786 5.164786 1991 +ifyou 1 6 5.164786 5.164786 1992 +neither 1 6 5.164786 5.164786 1990 +beta 1 6 5.164786 5.164786 1993 +silberschatz 1 6 5.164786 5.164786 1978 +lack 1 6 5.164786 5.164786 1994 +tobe 1 6 5.164786 5.164786 1995 +praveen 1 6 5.164786 5.164786 1996 +solid 1 5 5.347108 5.347108 2255 +valuabl 1 5 5.347108 5.347108 2256 +thrive 1 5 5.347108 5.347108 2257 +greater 1 5 5.347108 5.347108 2258 +fraction 1 5 5.347108 5.347108 2259 +interestedin 1 5 5.347108 5.347108 2260 +categori 1 5 5.347108 5.347108 2261 +mcgraw 1 5 5.347108 5.347108 2262 +kaufmann 1 5 5.347108 5.347108 2254 +surprisingli 1 4 5.568345 5.568345 2609 +behind 1 4 5.568345 5.568345 2610 +suppli 1 4 5.568345 5.568345 2611 +thiscours 1 4 5.568345 5.568345 2601 +asystem 1 4 5.568345 5.568345 2612 +enrol 1 4 5.568345 5.568345 2613 +twice 1 4 5.568345 5.568345 2614 +fold 1 4 5.568345 5.568345 2615 +minibas 1 4 5.568345 5.568345 2608 +thati 1 4 5.568345 5.568345 2616 +giant 1 3 5.857933 5.857933 3137 +explos 1 3 5.857933 5.857933 3138 +alon 1 3 5.857933 5.857933 3139 +comfort 1 3 5.857933 5.857933 3136 +scratch 1 3 5.857933 5.857933 3140 +parser 1 3 5.857933 5.857933 3141 +predat 1 3 5.857933 5.857933 3135 +aproject 1 3 5.857933 5.857933 3142 +bibl 1 3 5.857933 5.857933 3143 +confus 1 3 5.857933 5.857933 3144 +databasemanag 1 2 6.263398 6.263398 4089 +certainli 1 2 6.263398 6.263398 4090 +proportion 1 2 6.263398 6.263398 4091 +thefirst 1 2 6.263398 6.263398 4092 +youto 1 2 6.263398 6.263398 4093 +builton 1 2 6.263398 6.263398 4094 +thehigh 1 2 6.263398 6.263398 4095 +korth 1 2 6.263398 6.263398 4051 +secondedit 1 2 6.263398 6.263398 4096 +ingr 1 2 6.263398 6.263398 4097 +elmasri 1 2 6.263398 6.263398 4059 +grai 1 2 6.263398 6.263398 4098 +reuter 1 2 6.263398 6.263398 4099 +likewis 1 2 6.263398 6.263398 4100 +confirm 1 2 6.263398 6.263398 4101 +weitsang 1 2 6.263398 6.263398 4088 +samplequest 1 1 6.957497 6.957497 6436 +outsidefirewal 1 1 6.957497 6.957497 6434 +predatordbm 1 1 6.957497 6.957497 6437 +currentproject 1 1 6.957497 6.957497 6438 +coursedescript 1 1 6.957497 6.957497 6439 +intendedto 1 1 6.957497 6.957497 6440 +slargest 1 1 6.957497 6.957497 6441 +piecesof 1 1 6.957497 6.957497 6442 +knowledgeabledatabas 1 1 6.957497 6.957497 6443 +researchcommun 1 1 6.957497 6.957497 6444 +addressedbecaus 1 1 6.957497 6.957497 6445 +informedus 1 1 6.957497 6.957497 6446 +teller 1 1 6.957497 6.957497 6447 +newcours 1 1 6.957497 6.957497 6448 +quickreview 1 1 6.957497 6.957497 6449 +abreadth 1 1 6.957497 6.957497 6450 +advancedtop 1 1 6.957497 6.957497 6451 +thepurpos 1 1 6.957497 6.957497 6452 +coursei 1 1 6.957497 6.957497 6453 +weeksaft 1 1 6.957497 6.957497 6454 +requireread 1 1 6.957497 6.957497 6455 +engineeringlibrari 1 1 6.957497 6.957497 6456 +pursueaddit 1 1 6.957497 6.957497 6457 +forinform 1 1 6.957497 6.957497 6458 +examtim 1 1 6.957497 6.957497 6459 +developmentproject 1 1 6.957497 6.957497 6460 +involvea 1 1 6.957497 6.957497 6461 +wishto 1 1 6.957497 6.957497 6462 +willinvolv 1 1 6.957497 6.957497 6463 +andmodifi 1 1 6.957497 6.957497 6464 +andrar 1 1 6.957497 6.957497 6465 +luxuri 1 1 6.957497 6.957497 6466 +thediffer 1 1 6.957497 6.957497 6467 +inevit 1 1 6.957497 6.957497 6468 +varioussystem 1 1 6.957497 6.957497 6469 +buffermanag 1 1 6.957497 6.957497 6470 +enginethat 1 1 6.957497 6.957497 6471 +possibleproject 1 1 6.957497 6.957497 6472 +likecomplex 1 1 6.957497 6.957497 6473 +becauseth 1 1 6.957497 6.957497 6474 +betweenminibas 1 1 6.957497 6.957497 6475 +somegener 1 1 6.957497 6.957497 6476 +ideaon 1 1 6.957497 6.957497 6477 +advanceof 1 1 6.957497 6.957497 6478 +submitan 1 1 6.957497 6.957497 6479 +discussth 1 1 6.957497 6.957497 6480 +particularsystem 1 1 6.957497 6.957497 6481 +documentwil 1 1 6.957497 6.957497 6482 +picki 1 1 6.957497 6.957497 6483 +geton 1 1 6.957497 6.957497 6484 +oftest 1 1 6.957497 6.957497 6485 +coursetextbook 1 1 6.957497 6.957497 6486 +bookcontain 1 1 6.957497 6.957497 6487 +databasebook 1 1 6.957497 6.957497 6488 +thecampu 1 1 6.957497 6.957497 6489 +stonebrak 1 1 6.957497 6.957497 6435 +collectedand 1 1 6.957497 6.957497 6490 +postgr 1 1 6.957497 6.957497 6491 +andillustra 1 1 6.957497 6.957497 6492 +corearea 1 1 6.957497 6.957497 6493 +navath 1 1 6.957497 6.957497 6494 +tellsyou 1 1 6.957497 6.957497 6495 +wonderfulrefer 1 1 6.957497 6.957497 6496 +debuggingwith 1 1 6.957497 6.957497 6497 +gradingpolici 1 1 6.957497 6.957497 6498 +percentag 1 1 6.957497 6.957497 6499 +anextra 1 1 6.957497 6.957497 6500 +thefin 1 1 6.957497 6.957497 6501 +willfocu 1 1 6.957497 6.957497 6502 +coveredin 1 1 6.957497 6.957497 6503 +professorpraveen 1 1 6.957497 6.957497 6504 +teachingassist 1 1 6.957497 6.957497 6505 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..ac602839 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +specif 1 106 2.197225 2.197225 106 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +call 1 91 2.397895 2.397895 153 +learn 1 86 2.484907 2.484907 170 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +logic 1 71 2.639057 2.639057 230 +line 1 75 2.639057 2.639057 231 +java 1 70 2.708050 2.708050 248 +goal 1 66 2.708050 2.708050 250 +knowledg 1 67 2.708050 2.708050 243 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +experi 1 64 2.772589 2.772589 283 +function 1 62 2.772589 2.772589 275 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +allow 1 53 2.944439 2.944439 333 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +tabl 1 51 2.995732 2.995732 346 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +understand 1 47 3.091042 3.091042 384 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +textbook 1 44 3.135494 3.135494 397 +math 1 44 3.135494 3.135494 402 +mark 1 44 3.135494 3.135494 403 +mechan 1 43 3.178054 3.178054 416 +edit 1 42 3.218876 3.218876 418 +programm 1 39 3.258097 3.258097 445 +theoret 1 39 3.258097 3.258097 446 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +formal 1 37 3.332205 3.332205 478 +survei 1 35 3.401197 3.401197 513 +tech 1 35 3.401197 3.401197 515 +least 1 35 3.401197 3.401197 516 +concept 1 32 3.465736 3.465736 537 +specifi 1 30 3.555348 3.555348 568 +semant 1 29 3.583519 3.583519 587 +turn 1 29 3.583519 3.583519 586 +though 1 27 3.637586 3.637586 622 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +concern 1 25 3.737670 3.737670 666 +demonstr 1 24 3.761200 3.761200 694 +greg 1 24 3.761200 3.761200 695 +compress 1 23 3.806662 3.806662 719 +lead 1 23 3.806662 3.806662 718 +proof 1 23 3.806662 3.806662 720 +properti 1 22 3.850148 3.850148 749 +programminglanguag 1 21 3.912023 3.912023 782 +newsgroup 1 21 3.912023 3.912023 783 +safeti 1 20 3.951244 3.951244 817 +scheme 1 20 3.951244 3.951244 818 +prove 1 19 4.007333 4.007333 848 +prerequisit 1 19 4.007333 4.007333 846 +assum 1 19 4.007333 4.007333 845 +andrew 1 19 4.007333 4.007333 849 +analyz 1 17 4.110874 4.110874 925 +precis 1 15 4.248495 4.248495 1023 +carl 1 15 4.248495 4.248495 1024 +side 1 15 4.248495 4.248495 1022 +conduct 1 14 4.317488 4.317488 1065 +directli 1 13 4.382027 4.382027 1141 +larri 1 13 4.382027 4.382027 1142 +recurs 1 13 4.382027 4.382027 1127 +emac 1 13 4.382027 4.382027 1143 +pascal 1 12 4.465908 4.465908 1213 +calculu 1 12 4.465908 4.465908 1203 +meng 1 12 4.465908 4.465908 1214 +broad 1 11 4.553877 4.553877 1302 +induct 1 11 4.553877 4.553877 1304 +linda 1 10 4.653960 4.653960 1394 +entitl 1 9 4.753590 4.753590 1490 +notat 1 9 4.753590 4.753590 1489 +prefer 1 9 4.753590 4.753590 1491 +suitabl 1 9 4.753590 4.753590 1486 +admin 1 9 4.753590 4.753590 1476 +mode 1 9 4.753590 4.753590 1492 +ideal 1 8 4.875197 4.875197 1630 +leon 1 8 4.875197 4.875197 1631 +cum 1 8 4.875197 4.875197 1619 +dispatch 1 7 5.010635 5.010635 1791 +henc 1 7 5.010635 5.010635 1805 +predic 1 7 5.010635 5.010635 1806 +ture 1 6 5.164786 5.164786 1997 +morrisett 1 5 5.347108 5.347108 2263 +gentl 1 5 5.347108 5.347108 2264 +witha 1 4 5.568345 5.568345 2617 +haskel 1 4 5.568345 5.568345 2618 +principlesof 1 3 5.857933 5.857933 3145 +deeper 1 3 5.857933 5.857933 3146 +denot 1 3 5.857933 5.857933 3147 +noteshomework 1 2 6.263398 6.263398 4102 +profici 1 2 6.263398 6.263398 4103 +andlog 1 2 6.263398 6.263398 4104 +competillo 1 2 6.263398 6.263398 4105 +lfar 1 2 6.263398 6.263398 4106 +erlingsson 1 2 6.263398 6.263398 4107 +indexdocument 1 2 6.263398 6.263398 4108 +toolsa 1 2 6.263398 6.263398 4109 +prerequisiteshandoutsscrib 1 1 6.957497 6.957497 6506 +assignmentscontact 1 1 6.957497 6.957497 6507 +informationrelev 1 1 6.957497 6.957497 6508 +goalof 1 1 6.957497 6.957497 6509 +multipleinherit 1 1 6.957497 6.957497 6510 +subsum 1 1 6.957497 6.957497 6511 +thestudi 1 1 6.957497 6.957497 6512 +abstractli 1 1 6.957497 6.957497 6513 +howprogram 1 1 6.957497 6.957497 6514 +asnot 1 1 6.957497 6.957497 6515 +preciser 1 1 6.957497 6.957497 6516 +forform 1 1 6.957497 6.957497 6517 +somethingabout 1 1 6.957497 6.957497 6518 +tomanipul 1 1 6.957497 6.957497 6519 +gunter 1 1 6.957497 6.957497 6520 +paulson 1 1 6.957497 6.957497 6521 +undergraduatemathemat 1 1 6.957497 6.957497 6522 +mathematicalmatur 1 1 6.957497 6.957497 6523 +anmeng 1 1 6.957497 6.957497 6524 +ifth 1 1 6.957497 6.957497 6525 +ulfar 1 1 6.957497 6.957497 6526 +pmrelev 1 1 6.957497 6.957497 6527 +comint 1 1 6.957497 6.957497 6528 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..4b52c6fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +schedul 1 119 2.079442 2.079442 85 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +info 1 85 2.484907 2.484907 176 +materi 1 75 2.639057 2.639057 221 +staff 1 36 3.367296 3.367296 490 +newsgroup 1 21 3.912023 3.912023 783 +rivl 1 8 4.875197 4.875197 1632 +systemscomput 1 3 5.857933 5.857933 3148 +janosi 1 3 5.857933 5.857933 3149 +pagecsmultimedia 1 2 6.263398 6.263398 4110 +anounc 1 2 6.263398 6.263398 4111 +bugcom 1 2 6.263398 6.263398 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..b702397d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +question 1 91 2.397895 2.397895 141 +info 1 85 2.484907 2.484907 176 +materi 1 75 2.639057 2.639057 221 +staff 1 36 3.367296 3.367296 490 +newsgroup 1 21 3.912023 3.912023 783 +rivl 1 8 4.875197 4.875197 1632 +systemscomput 1 3 5.857933 5.857933 3148 +janosi 1 3 5.857933 5.857933 3149 +pagecsmultimedia 1 2 6.263398 6.263398 4110 +anounc 1 2 6.263398 6.263398 4111 +bugcom 1 2 6.263398 6.263398 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..16ae91bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +modifi 1 178 1.609438 1.609438 35 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +analysi 1 124 2.079442 2.079442 98 +instructor 1 108 2.197225 2.197225 107 +make 1 111 2.197225 2.197225 120 +text 1 98 2.302585 2.302585 133 +homepag 1 93 2.397895 2.397895 148 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +copi 1 63 2.772589 2.772589 284 +locat 1 59 2.833213 2.833213 303 +talk 1 53 2.944439 2.944439 336 +announc 1 40 3.258097 3.258097 441 +approxim 1 35 3.401197 3.401197 509 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +sheet 1 16 4.174387 4.174387 973 +cannot 1 13 4.382027 4.382027 1144 +cheat 1 10 4.653960 4.653960 1395 +evan 1 8 4.875197 4.875197 1633 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +kozen 1 4 5.568345 5.568345 2619 +moran 1 3 5.857933 5.857933 3151 +addendum 1 3 5.857933 5.857933 3150 +rajeev 1 3 5.857933 5.857933 3152 +inupson 1 1 6.957497 6.957497 6529 +tome 1 1 6.957497 6.957497 6530 +reschedul 1 1 6.957497 6.957497 6531 +motwani 1 1 6.957497 6.957497 6532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..490526ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +parallel 1 169 1.791759 1.791759 60 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +look 1 107 2.197225 2.197225 115 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +issu 1 78 2.564949 2.564949 211 +complet 1 77 2.564949 2.564949 208 +upson 1 71 2.639057 2.639057 218 +line 1 75 2.639057 2.639057 231 +integr 1 67 2.708050 2.708050 245 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +locat 1 59 2.833213 2.833213 303 +share 1 59 2.833213 2.833213 304 +point 1 58 2.890372 2.890372 319 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +much 1 52 2.995732 2.995732 349 +week 1 52 2.995732 2.995732 343 +hardwar 1 51 2.995732 2.995732 350 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +execut 1 45 3.135494 3.135494 404 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +howev 1 41 3.218876 3.218876 422 +futur 1 41 3.218876 3.218876 427 +past 1 42 3.218876 3.218876 428 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +product 1 33 3.433987 3.433987 527 +taken 1 31 3.496508 3.496508 555 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +multiprocessor 1 28 3.610918 3.610918 605 +altern 1 26 3.688879 3.688879 641 +although 1 25 3.737670 3.737670 667 +aspect 1 25 3.737670 3.737670 663 +almost 1 22 3.850148 3.850148 742 +sequenti 1 22 3.850148 3.850148 745 +fact 1 21 3.912023 3.912023 780 +busi 1 21 3.912023 3.912023 784 +latest 1 21 3.912023 3.912023 785 +portabl 1 20 3.951244 3.951244 819 +spend 1 19 4.007333 4.007333 850 +layer 1 17 4.110874 4.110874 926 +across 1 16 4.174387 4.174387 974 +month 1 15 4.248495 4.248495 1025 +massiv 1 15 4.248495 4.248495 1026 +consider 1 14 4.317488 4.317488 1076 +easili 1 14 4.317488 4.317488 1077 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +cannot 1 13 4.382027 4.382027 1144 +stai 1 12 4.465908 4.465908 1215 +workload 1 12 4.465908 4.465908 1210 +grant 1 12 4.465908 4.465908 1216 +host 1 11 4.553877 4.553877 1306 +debugg 1 9 4.753590 4.753590 1493 +manufactur 1 8 4.875197 4.875197 1634 +competit 1 8 4.875197 4.875197 1635 +depth 1 8 4.875197 4.875197 1636 +contrast 1 8 4.875197 4.875197 1637 +smile 1 7 5.010635 5.010635 1807 +microprocessor 1 7 5.010635 5.010635 1808 +core 1 7 5.010635 5.010635 1809 +usabl 1 7 5.010635 5.010635 1810 +price 1 6 5.164786 5.164786 1999 +quickli 1 6 5.164786 5.164786 2000 +vari 1 6 5.164786 5.164786 2001 +feder 1 5 5.347108 5.347108 2266 +eas 1 5 5.347108 5.347108 2267 +suffer 1 5 5.347108 5.347108 2268 +matur 1 5 5.347108 5.347108 2269 +vertic 1 5 5.347108 5.347108 2270 +focuss 1 5 5.347108 5.347108 2271 +cut 1 4 5.568345 5.568345 2620 +shelf 1 4 5.568345 5.568345 2621 +slice 1 4 5.568345 5.568345 2622 +eickenfal 1 3 5.857933 5.857933 3125 +leverag 1 3 5.857933 5.857933 3153 +heat 1 2 6.263398 6.263398 4113 +glorifi 1 2 6.263398 6.263398 4114 +farm 1 2 6.263398 6.263398 4115 +adequ 1 2 6.263398 6.263398 4116 +horizont 1 2 6.263398 6.263398 4117 +pagefronti 1 1 6.957497 6.957497 6534 +pmoffic 1 1 6.957497 6.957497 6535 +pmcours 1 1 6.957497 6.957497 6536 +descriptionparallel 1 1 6.957497 6.957497 6537 +underscor 1 1 6.957497 6.957497 6538 +debat 1 1 6.957497 6.957497 6533 +erad 1 1 6.957497 6.957497 6539 +competitor 1 1 6.957497 6.957497 6540 +dash 1 1 6.957497 6.957497 6541 +materialscours 1 1 6.957497 6.957497 6542 +formatlectur 1 1 6.957497 6.957497 6543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..7798c597 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +search 1 95 2.397895 2.397895 155 +question 1 91 2.397895 2.397895 141 +control 1 82 2.484907 2.484907 164 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +homework 1 79 2.564949 2.564949 193 +know 1 80 2.564949 2.564949 198 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +come 1 78 2.564949 2.564949 202 +materi 1 75 2.639057 2.639057 221 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +organ 1 65 2.772589 2.772589 265 +guid 1 63 2.772589 2.772589 267 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +sever 1 56 2.890372 2.890372 322 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +digit 1 52 2.995732 2.995732 348 +case 1 51 2.995732 2.995732 351 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +get 1 46 3.091042 3.091042 380 +video 1 44 3.135494 3.135494 405 +small 1 39 3.258097 3.258097 447 +tutori 1 39 3.258097 3.258097 437 +staff 1 36 3.367296 3.367296 490 +procedur 1 36 3.367296 3.367296 488 +common 1 30 3.555348 3.555348 574 +ask 1 28 3.610918 3.610918 597 +great 1 27 3.637586 3.637586 626 +never 1 25 3.737670 3.737670 671 +todai 1 25 3.737670 3.737670 672 +consult 1 24 3.761200 3.761200 687 +wish 1 24 3.761200 3.761200 692 +hierarchi 1 22 3.850148 3.850148 744 +path 1 21 3.912023 3.912023 778 +unit 1 21 3.912023 3.912023 779 +annot 1 21 3.912023 3.912023 775 +newsgroup 1 21 3.912023 3.912023 783 +lot 1 18 4.060443 4.060443 889 +otherwis 1 17 4.110874 4.110874 922 +fortran 1 15 4.248495 4.248495 1027 +thorsten 1 13 4.382027 4.382027 1133 +assembl 1 12 4.465908 4.465908 1207 +pascal 1 12 4.465908 4.465908 1213 +surf 1 11 4.553877 4.553877 1301 +combinatori 1 8 4.875197 4.875197 1629 +brain 1 8 4.875197 4.875197 1638 +interrupt 1 7 5.010635 5.010635 1793 +conot 1 5 5.347108 5.347108 2245 +eickenfal 1 3 5.857933 5.857933 3125 +kimbal 1 3 5.857933 5.857933 3114 +helpif 1 3 5.857933 5.857933 3126 +aproject 1 3 5.857933 5.857933 3142 +mate 1 3 5.857933 5.857933 3127 +cardiff 1 3 5.857933 5.857933 3154 +programsand 1 3 5.857933 5.857933 3111 +btopic 1 2 6.263398 6.263398 4085 +representationof 1 2 6.263398 6.263398 4119 +toon 1 2 6.263398 6.263398 4120 +marshal 1 2 6.263398 6.263398 4118 +pageintroduct 1 1 6.957497 6.957497 6544 +sequentialcircuit 1 1 6.957497 6.957497 6545 +andmicroprogram 1 1 6.957497 6.957497 6546 +theappropri 1 1 6.957497 6.957497 6547 +gethelp 1 1 6.957497 6.957497 6548 +informationcoursemateri 1 1 6.957497 6.957497 6549 +announcementsannounc 1 1 6.957497 6.957497 6550 +onlinean 1 1 6.957497 6.957497 6551 +forpeopl 1 1 6.957497 6.957497 6552 +cclass 1 1 6.957497 6.957497 6553 +learnc 1 1 6.957497 6.957497 6554 +theyahoo 1 1 6.957497 6.957497 6555 +ofmor 1 1 6.957497 6.957497 6556 +inansw 1 1 6.957497 6.957497 6557 +voneicken 1 1 6.957497 6.957497 6558 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..e3ae8232 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +welcom 1 122 2.079442 2.079442 99 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +grade 1 90 2.397895 2.397895 142 +academ 1 82 2.484907 2.484907 178 +exam 1 86 2.484907 2.484907 169 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +upson 1 71 2.639057 2.639057 218 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +date 1 51 2.995732 2.995732 344 +announc 1 40 3.258097 3.258097 441 +request 1 26 3.688879 3.688879 635 +altern 1 26 3.688879 3.688879 641 +sometim 1 24 3.761200 3.761200 696 +saturdai 1 7 5.010635 5.010635 1794 +clair 1 4 5.568345 5.568345 2605 +pagesc 1 3 5.857933 5.857933 3133 +pagecsfound 1 2 6.263398 6.263398 4086 +yourgrad 1 2 6.263398 6.263398 4121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..792f0ac1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +technolog 1 131 2.079442 2.079442 102 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +upson 1 71 2.639057 2.639057 218 +materi 1 75 2.639057 2.639057 221 +line 1 75 2.639057 2.639057 231 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +overview 1 56 2.890372 2.890372 323 +frequent 1 49 3.044522 3.044522 367 +staff 1 36 3.367296 3.367296 490 +ask 1 28 3.610918 3.610918 597 +edulast 1 17 4.110874 4.110874 927 +pagec 1 15 4.248495 4.248495 1011 +borland 1 14 4.317488 4.317488 1067 +recit 1 9 4.753590 4.753590 1475 +vineet 1 8 4.875197 4.875197 1639 +none 1 7 5.010635 5.010635 1811 +buch 1 5 5.347108 5.347108 2272 +samuel 1 3 5.857933 5.857933 3155 +weber 1 3 5.857933 5.857933 3156 +yaron 1 2 6.263398 6.263398 4122 +minski 1 2 6.263398 6.263398 4123 +remark 1 2 6.263398 6.263398 4124 +techniquescomput 1 1 6.957497 6.957497 6559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..9f731249 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +design 1 213 1.386294 1.386294 25 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +homework 1 79 2.564949 2.564949 193 +logic 1 71 2.639057 2.639057 230 +guid 1 63 2.772589 2.772589 267 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +tabl 1 51 2.995732 2.995732 346 +brian 1 38 3.295837 3.295837 466 +slide 1 38 3.295837 3.295837 467 +procedur 1 36 3.367296 3.367296 488 +smith 1 20 3.951244 3.951244 820 +recurs 1 13 4.382027 4.382027 1127 +assembl 1 12 4.465908 4.465908 1207 +tour 1 11 4.553877 4.553877 1307 +stack 1 10 4.653960 4.653960 1389 +spec 1 8 4.875197 4.875197 1640 +interrupt 1 7 5.010635 5.010635 1793 +linker 1 3 5.857933 5.857933 3157 +loader 1 1 6.957497 6.957497 6560 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..e4089d05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +wide 1 84 2.484907 2.484907 185 +exam 1 86 2.484907 2.484907 169 +messag 1 76 2.564949 2.564949 212 +april 1 77 2.564949 2.564949 196 +tuesdai 1 73 2.639057 2.639057 219 +materi 1 75 2.639057 2.639057 221 +import 1 65 2.772589 2.772589 282 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +get 1 46 3.091042 3.091042 380 +review 1 42 3.218876 3.218876 425 +held 1 28 3.610918 3.610918 600 +session 1 26 3.688879 3.688879 643 +thur 1 19 4.007333 4.007333 847 +rememb 1 12 4.465908 4.465908 1217 +prelim 1 12 4.465908 4.465908 1201 +regard 1 11 4.553877 4.553877 1309 +tue 1 11 4.553877 4.553877 1308 +baker 1 7 5.010635 5.010635 1812 +pierc 1 4 5.568345 5.568345 2623 +theworld 1 3 5.857933 5.857933 3158 +codewarrior 1 2 6.263398 6.263398 4125 +frequentlyfor 1 1 6.957497 6.957497 6561 +onsundai 1 1 6.957497 6.957497 6562 +personalmac 1 1 6.957497 6.957497 6563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..4056c1d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,261 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +address 1 170 1.791759 1.791759 62 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +architectur 1 139 1.945910 1.945910 77 +process 1 142 1.945910 1.945910 72 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +need 1 98 2.302585 2.302585 135 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +level 1 87 2.484907 2.484907 180 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +april 1 77 2.564949 2.564949 196 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +name 1 72 2.639057 2.639057 220 +addit 1 74 2.639057 2.639057 228 +upson 1 71 2.639057 2.639057 218 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +window 1 68 2.708050 2.708050 242 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +complex 1 64 2.772589 2.772589 269 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +unix 1 58 2.890372 2.890372 308 +think 1 57 2.890372 2.890372 314 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +date 1 51 2.995732 2.995732 344 +appoint 1 49 3.044522 3.044522 358 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +netscap 1 44 3.135494 3.135494 395 +fridai 1 44 3.135494 3.135494 390 +http 1 41 3.218876 3.218876 420 +credit 1 38 3.295837 3.295837 460 +correct 1 38 3.295837 3.295837 462 +microsoft 1 38 3.295837 3.295837 468 +industri 1 38 3.295837 3.295837 464 +soon 1 36 3.367296 3.367296 494 +staff 1 36 3.367296 3.367296 490 +tree 1 36 3.367296 3.367296 492 +word 1 34 3.401197 3.401197 508 +next 1 34 3.401197 3.401197 517 +print 1 34 3.401197 3.401197 503 +manual 1 35 3.401197 3.401197 504 +given 1 32 3.465736 3.465736 538 +storag 1 31 3.496508 3.496508 553 +held 1 28 3.610918 3.610918 600 +framework 1 28 3.610918 3.610918 606 +arrai 1 27 3.637586 3.637586 627 +session 1 26 3.688879 3.688879 643 +enhanc 1 26 3.688879 3.688879 644 +jeff 1 25 3.737670 3.737670 673 +consult 1 24 3.761200 3.761200 687 +other 1 24 3.761200 3.761200 697 +lab 1 24 3.761200 3.761200 698 +thank 1 23 3.806662 3.806662 721 +proof 1 23 3.806662 3.806662 720 +almost 1 22 3.850148 3.850148 742 +inth 1 22 3.850148 3.850148 741 +alloc 1 20 3.951244 3.951244 821 +fine 1 20 3.951244 3.951244 822 +binari 1 20 3.951244 3.951244 823 +prerequisit 1 19 4.007333 4.007333 846 +prove 1 19 4.007333 4.007333 848 +regular 1 17 4.110874 4.110874 929 +macintosh 1 17 4.110874 4.110874 920 +expand 1 17 4.110874 4.110874 928 +intro 1 17 4.110874 4.110874 915 +jose 1 16 4.174387 4.174387 976 +condit 1 16 4.174387 4.174387 975 +charact 1 15 4.248495 4.248495 1028 +piec 1 15 4.248495 4.248495 1020 +recurs 1 13 4.382027 4.382027 1127 +alan 1 13 4.382027 4.382027 1146 +deriv 1 13 4.382027 4.382027 1145 +prelim 1 12 4.465908 4.465908 1201 +grant 1 12 4.465908 4.465908 1216 +iter 1 12 4.465908 4.465908 1206 +onth 1 12 4.465908 4.465908 1218 +chri 1 11 4.553877 4.553877 1311 +loop 1 11 4.553877 4.553877 1310 +princip 1 10 4.653960 4.653960 1397 +sundai 1 10 4.653960 4.653960 1387 +rich 1 10 4.653960 4.653960 1396 +equival 1 9 4.753590 4.753590 1496 +intermedi 1 9 4.753590 4.753590 1497 +plain 1 9 4.753590 4.753590 1495 +preliminari 1 9 4.753590 4.753590 1480 +notat 1 9 4.753590 4.753590 1489 +strength 1 9 4.753590 4.753590 1494 +filter 1 8 4.875197 4.875197 1641 +printer 1 8 4.875197 4.875197 1621 +dictionari 1 8 4.875197 4.875197 1642 +noon 1 7 5.010635 5.010635 1804 +none 1 7 5.010635 5.010635 1811 +troubl 1 6 5.164786 5.164786 2002 +yale 1 6 5.164786 5.164786 2003 +contest 1 5 5.347108 5.347108 2273 +cell 1 5 5.347108 5.347108 2274 +ofprogram 1 4 5.568345 5.568345 2624 +flavor 1 4 5.568345 5.568345 2625 +haskel 1 4 5.568345 5.568345 2618 +cuinfo 1 4 5.568345 5.568345 2626 +foster 1 3 5.857933 5.857933 3159 +tocomput 1 3 5.857933 5.857933 3162 +walker 1 3 5.857933 5.857933 3161 +tripl 1 3 5.857933 5.857933 3160 +ghostview 1 3 5.857933 5.857933 3163 +maker 1 3 5.857933 5.857933 3164 +thesecond 1 2 6.263398 6.263398 4128 +datatyp 1 2 6.263398 6.263398 4129 +kwan 1 2 6.263398 6.263398 4126 +stuffit 1 2 6.263398 6.263398 4127 +codewarrior 1 2 6.263398 6.263398 4125 +csdepart 1 2 6.263398 6.263398 4130 +metrowerk 1 2 6.263398 6.263398 4131 +jfoster 1 1 6.957497 6.957497 6566 +earlyvers 1 1 6.957497 6.957497 6576 +announcetim 1 1 6.957497 6.957497 6577 +theprelim 1 1 6.957497 6.957497 6578 +wereannounc 1 1 6.957497 6.957497 6579 +olin 1 1 6.957497 6.957497 6571 +lastnam 1 1 6.957497 6.957497 6580 +covereveryth 1 1 6.957497 6.957497 6581 +topicsconv 1 1 6.957497 6.957497 6582 +daywhenwherewhomondai 1 1 6.957497 6.957497 6583 +davetuesdai 1 1 6.957497 6.957497 6584 +jeffwednesdai 1 1 6.957497 6.957497 6585 +davethursdai 1 1 6.957497 6.957497 6586 +halfridai 1 1 6.957497 6.957497 6587 +halsaturdai 1 1 6.957497 6.957497 6588 +breview 1 1 6.957497 6.957497 6589 +chrisand 1 1 6.957497 6.957497 6590 +gofer 1 1 6.957497 6.957497 6564 +engrd 1 1 6.957497 6.957497 6591 +bothcom 1 1 6.957497 6.957497 6592 +programmingexperi 1 1 6.957497 6.957497 6593 +ofalgorithm 1 1 6.957497 6.957497 6594 +perkin 1 1 6.957497 6.957497 6595 +sectionsdaytimeroominstructortuesdai 1 1 6.957497 6.957497 6596 +ahal 1 1 6.957497 6.957497 6572 +perkinstuesdai 1 1 6.957497 6.957497 6597 +perkinswednesdai 1 1 6.957497 6.957497 6598 +hollist 1 1 6.957497 6.957497 6567 +walkerwednesdai 1 1 6.957497 6.957497 6573 +walkerthursdai 1 1 6.957497 6.957497 6599 +fosterfridai 1 1 6.957497 6.957497 6600 +ofclass 1 1 6.957497 6.957497 6601 +consultingsundaymondaytuesdaywednesdaythursdayfridai 1 1 6.957497 6.957497 6602 +steveerickylechrisjpkyl 1 1 6.957497 6.957497 6603 +steveerickylechrisjpvasantha 1 1 6.957497 6.957497 6604 +josejosekayjosejpvasantha 1 1 6.957497 6.957497 6605 +josejosekayjosejp 1 1 6.957497 6.957497 6606 +kaykylesteveericvasantha 1 1 6.957497 6.957497 6574 +danerickaychrisdan 1 1 6.957497 6.957497 6575 +binhqx 1 1 6.957497 6.957497 6568 +macbinari 1 1 6.957497 6.957497 6607 +parseabl 1 1 6.957497 6.957497 6608 +waspost 1 1 6.957497 6.957497 6609 +dynamicdata 1 1 6.957497 6.957497 6569 +curri 1 1 6.957497 6.957497 6570 +foraladdin 1 1 6.957497 6.957497 6610 +armandonunez 1 1 6.957497 6.957497 6611 +anylas 1 1 6.957497 6.957497 6612 +applicationlik 1 1 6.957497 6.957497 6613 +macgof 1 1 6.957497 6.957497 6565 +ishaskel 1 1 6.957497 6.957497 6614 +systemsz 1 1 6.957497 6.957497 6615 +ofgof 1 1 6.957497 6.957497 6616 +itavail 1 1 6.957497 6.957497 6617 +enhance_assign 1 1 6.957497 6.957497 6618 +aladdin 1 1 6.957497 6.957497 6619 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..6acd035d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +section 1 94 2.397895 2.397895 149 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +materi 1 75 2.639057 2.639057 221 +room 1 59 2.833213 2.833213 301 +extens 1 53 2.944439 2.944439 340 +date 1 51 2.995732 2.995732 344 +announc 1 40 3.258097 3.258097 441 +staff 1 36 3.367296 3.367296 490 +demo 1 18 4.060443 4.060443 888 +pagec 1 15 4.248495 4.248495 1011 +happi 1 14 4.317488 4.317488 1079 +emac 1 13 4.382027 4.382027 1143 +prelim 1 12 4.465908 4.465908 1201 +departmentcornel 1 5 5.347108 5.347108 2275 +grader 1 3 5.857933 5.857933 3165 +universityspr 1 2 6.263398 6.263398 4055 +interpretationof 1 1 6.957497 6.957497 6620 +programscomput 1 1 6.957497 6.957497 6621 +macmarlai 1 1 6.957497 6.957497 6622 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..e0cc69ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +advanc 1 99 2.302585 2.302585 130 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +orient 1 80 2.564949 2.564949 205 +refer 1 78 2.564949 2.564949 203 +effici 1 73 2.639057 2.639057 233 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +line 1 75 2.639057 2.639057 231 +java 1 70 2.708050 2.708050 248 +thursdai 1 70 2.708050 2.708050 241 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +collect 1 65 2.772589 2.772589 268 +function 1 62 2.772589 2.772589 275 +handout 1 64 2.772589 2.772589 263 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +featur 1 46 3.091042 3.091042 386 +mark 1 44 3.135494 3.135494 403 +continu 1 39 3.258097 3.258097 448 +survei 1 35 3.401197 3.401197 513 +kind 1 32 3.465736 3.465736 541 +focu 1 30 3.555348 3.555348 571 +synchron 1 29 3.583519 3.583519 588 +except 1 28 3.610918 3.610918 607 +greg 1 24 3.761200 3.761200 695 +thread 1 23 3.806662 3.806662 722 +separ 1 19 4.007333 4.007333 844 +andrew 1 19 4.007333 4.007333 849 +modern 1 16 4.174387 4.174387 966 +linda 1 10 4.653960 4.653960 1394 +admin 1 9 4.753590 4.753590 1476 +dylan 1 8 4.875197 4.875197 1625 +closur 1 8 4.875197 4.875197 1643 +evan 1 8 4.875197 4.875197 1633 +leon 1 8 4.875197 4.875197 1631 +cum 1 8 4.875197 4.875197 1619 +implementationof 1 7 5.010635 5.010635 1813 +garbag 1 6 5.164786 5.164786 1986 +notabl 1 5 5.347108 5.347108 2276 +morrisett 1 5 5.347108 5.347108 2263 +gentl 1 5 5.347108 5.347108 2264 +haskel 1 4 5.568345 5.568345 2618 +polymorph 1 4 5.568345 5.568345 2627 +administrivia 1 3 5.857933 5.857933 3166 +moran 1 3 5.857933 5.857933 3151 +competillo 1 2 6.263398 6.263398 4105 +indexdocument 1 2 6.263398 6.263398 4108 +toolsa 1 2 6.263398 6.263398 4109 +descriptionhandoutsadministriviaweb 1 1 6.957497 6.957497 6623 +ofmodern 1 1 6.957497 6.957497 6624 +connectionsto 1 1 6.957497 6.957497 6625 +pmweb 1 1 6.957497 6.957497 6626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..c7d4364f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +postscript 1 131 2.079442 2.079442 90 +final 1 116 2.197225 2.197225 108 +homework 1 79 2.564949 2.564949 193 +practic 1 70 2.708050 2.708050 246 +overview 1 56 2.890372 2.890372 323 +examin 1 42 3.218876 3.218876 424 +bibliographi 1 34 3.401197 3.401197 518 +annot 1 21 3.912023 3.912023 775 +prepar 1 20 3.951244 3.951244 824 +necessari 1 13 4.382027 4.382027 1147 +registr 1 5 5.347108 5.347108 2249 +informationcours 1 3 5.857933 5.857933 3167 +systemspract 1 1 6.957497 6.957497 6627 +takingc 1 1 6.957497 6.957497 6628 +logist 1 1 6.957497 6.957497 6629 +homeworkshomework 1 1 6.957497 6.957497 6630 +amexaminationsmidterm 1 1 6.957497 6.957497 6631 +bibliographiesselect 1 1 6.957497 6.957497 6632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..987e1563 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +report 1 131 2.079442 2.079442 92 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +solut 1 82 2.484907 2.484907 162 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +start 1 83 2.484907 2.484907 173 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +know 1 80 2.564949 2.564949 198 +homework 1 79 2.564949 2.564949 193 +upson 1 71 2.639057 2.639057 218 +solv 1 73 2.639057 2.639057 234 +order 1 69 2.708050 2.708050 249 +result 1 65 2.772589 2.772589 281 +import 1 65 2.772589 2.772589 282 +think 1 57 2.890372 2.890372 314 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +give 1 50 3.044522 3.044522 359 +discuss 1 45 3.135494 3.135494 399 +futur 1 41 3.218876 3.218876 427 +might 1 41 3.218876 3.218876 426 +late 1 40 3.258097 3.258097 439 +must 1 40 3.258097 3.258097 442 +open 1 38 3.295837 3.295837 469 +everi 1 34 3.401197 3.401197 519 +board 1 33 3.433987 3.433987 528 +taken 1 31 3.496508 3.496508 555 +option 1 30 3.555348 3.555348 575 +limit 1 29 3.583519 3.583519 585 +particip 1 29 3.583519 3.583519 589 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +measur 1 28 3.610918 3.610918 609 +session 1 26 3.688879 3.688879 643 +experiment 1 26 3.688879 3.688879 645 +begin 1 23 3.806662 3.806662 716 +minut 1 20 3.951244 3.951244 810 +left 1 19 4.007333 4.007333 851 +sign 1 16 4.174387 4.174387 970 +across 1 16 4.174387 4.174387 974 +contribut 1 15 4.248495 4.248495 1021 +consider 1 14 4.317488 4.317488 1076 +shown 1 14 4.317488 4.317488 1080 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +everyon 1 13 4.382027 4.382027 1148 +difficulti 1 13 4.382027 4.382027 1132 +eicken 1 13 4.382027 4.382027 1134 +outsid 1 12 4.465908 4.465908 1219 +pick 1 9 4.753590 4.753590 1498 +hang 1 9 4.753590 4.753590 1499 +andth 1 9 4.753590 4.753590 1481 +hold 1 8 4.875197 4.875197 1645 +judg 1 8 4.875197 4.875197 1644 +absolut 1 8 4.875197 4.875197 1646 +poster 1 7 5.010635 5.010635 1814 +noon 1 7 5.010635 5.010635 1804 +trade 1 7 5.010635 5.010635 1815 +explain 1 7 5.010635 5.010635 1816 +exactli 1 7 5.010635 5.010635 1817 +plu 1 6 5.164786 5.164786 2004 +willb 1 5 5.347108 5.347108 2277 +remain 1 5 5.347108 5.347108 2278 +gotten 1 4 5.568345 5.568345 2628 +chose 1 4 5.568345 5.568345 2629 +cuc 1 4 5.568345 5.568345 2630 +attack 1 3 5.857933 5.857933 3168 +memberof 1 3 5.857933 5.857933 3169 +off 1 3 5.857933 5.857933 3170 +arriv 1 2 6.263398 6.263398 4132 +subdirectori 1 2 6.263398 6.263398 4133 +thorough 1 2 6.263398 6.263398 4134 +programmingin 1 2 6.263398 6.263398 4135 +pagehigh 1 1 6.957497 6.957497 6633 +eickenspr 1 1 6.957497 6.957497 6634 +sessionthu 1 1 6.957497 6.957497 6635 +tbdpleas 1 1 6.957497 6.957497 6636 +willdetermin 1 1 6.957497 6.957497 6637 +postersess 1 1 6.957497 6.957497 6638 +cindywilliam 1 1 6.957497 6.957497 6639 +ithorizont 1 1 6.957497 6.957497 6640 +corridor 1 1 6.957497 6.957497 6641 +presentyour 1 1 6.957497 6.957497 6642 +asens 1 1 6.957497 6.957497 6643 +contempl 1 1 6.957497 6.957497 6644 +presentationswil 1 1 6.957497 6.957497 6645 +nativespeak 1 1 6.957497 6.957497 6646 +thelongest 1 1 6.957497 6.957497 6647 +tocom 1 1 6.957497 6.957497 6648 +finalreport 1 1 6.957497 6.957497 6649 +aretri 1 1 6.957497 6.957497 6650 +thesolut 1 1 6.957497 6.957497 6651 +youreject 1 1 6.957497 6.957497 6652 +webread 1 1 6.957497 6.957497 6653 +convic 1 1 6.957497 6.957497 6654 +bestsolut 1 1 6.957497 6.957497 6655 +showcas 1 1 6.957497 6.957497 6656 +ampl 1 1 6.957497 6.957497 6657 +goodexplan 1 1 6.957497 6.957497 6658 +whatyou 1 1 6.957497 6.957497 6659 +projectsproject 1 1 6.957497 6.957497 6660 +reportsproject 1 1 6.957497 6.957497 6661 +proposalsiniti 1 1 6.957497 6.957497 6662 +ideascours 1 1 6.957497 6.957497 6663 +materialshomework 1 1 6.957497 6.957497 6664 +pagebefor 1 1 6.957497 6.957497 6665 +introc 1 1 6.957497 6.957497 6666 +casec 1 1 6.957497 6.957497 6667 +technologyc 1 1 6.957497 6.957497 6668 +cachesc 1 1 6.957497 6.957497 6669 +netsc 1 1 6.957497 6.957497 6670 +spc 1 1 6.957497 6.957497 6671 +cyou 1 1 6.957497 6.957497 6672 +emdc 1 1 6.957497 6.957497 6673 +sortingc 1 1 6.957497 6.957497 6674 +spamc 1 1 6.957497 6.957497 6675 +msgpassc 1 1 6.957497 6.957497 6676 +mpic 1 1 6.957497 6.957497 6677 +cachecohc 1 1 6.957497 6.957497 6678 +locksc 1 1 6.957497 6.957497 6679 +threadsc 1 1 6.957497 6.957497 6680 +atmc 1 1 6.957497 6.957497 6681 +netc 1 1 6.957497 6.957497 6682 +scoreboardc 1 1 6.957497 6.957497 6683 +tomasuloc 1 1 6.957497 6.957497 6684 +predc 1 1 6.957497 6.957497 6685 +superscalarc 1 1 6.957497 6.957497 6686 +busesc 1 1 6.957497 6.957497 6687 +pentiummaintain 1 1 6.957497 6.957497 6688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..728ed73e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +class 1 199 1.609438 1.609438 37 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +sourc 1 77 2.564949 2.564949 201 +simul 1 66 2.708050 2.708050 255 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +week 1 52 2.995732 2.995732 343 +set 1 50 3.044522 3.044522 361 +vision 1 41 3.218876 3.218876 430 +continu 1 39 3.258097 3.258097 448 +field 1 37 3.332205 3.332205 482 +staff 1 36 3.367296 3.367296 490 +random 1 34 3.401197 3.401197 511 +transform 1 32 3.465736 3.465736 542 +detect 1 26 3.688879 3.688879 646 +constraint 1 26 3.688879 3.688879 636 +motion 1 24 3.761200 3.761200 699 +flow 1 24 3.761200 3.761200 700 +recognit 1 23 3.806662 3.806662 723 +geometri 1 22 3.850148 3.850148 752 +geometr 1 19 4.007333 4.007333 852 +histori 1 19 4.007333 4.007333 853 +regular 1 17 4.110874 4.110874 929 +estim 1 17 4.110874 4.110874 930 +segment 1 17 4.110874 4.110874 931 +track 1 15 4.248495 4.248495 1029 +calculu 1 12 4.465908 4.465908 1203 +guest 1 12 4.465908 4.465908 1220 +optic 1 12 4.465908 4.465908 1221 +distanc 1 9 4.753590 4.753590 1500 +face 1 9 4.753590 4.753590 1501 +edg 1 8 4.875197 4.875197 1647 +ramin 1 7 5.010635 5.010635 1820 +justin 1 7 5.010635 5.010635 1789 +stereo 1 7 5.010635 5.010635 1818 +parametr 1 7 5.010635 5.010635 1819 +variat 1 5 5.347108 5.347108 2248 +markov 1 5 5.347108 5.347108 2280 +snake 1 5 5.347108 5.347108 2281 +correl 1 5 5.347108 5.347108 2279 +scribe 1 4 5.568345 5.568345 2631 +maximum 1 4 5.568345 5.568345 2632 +hausdorff 1 4 5.568345 5.568345 2633 +likelihood 1 3 5.857933 5.857933 3172 +cont 1 3 5.857933 5.857933 3171 +anneal 1 2 6.263398 6.263398 4136 +zabihteach 1 1 6.957497 6.957497 6689 +millerclass 1 1 6.957497 6.957497 6690 +phillip 1 1 6.957497 6.957497 6691 +suggestionsproblem 1 1 6.957497 6.957497 6692 +mestim 1 1 6.957497 6.957497 6693 +censu 1 1 6.957497 6.957497 6694 +eigenhausdorff 1 1 6.957497 6.957497 6695 +recognitionsect 1 1 6.957497 6.957497 6696 +equationoth 1 1 6.957497 6.957497 6697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..e82a1d44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +site 1 106 2.197225 2.197225 119 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +materi 1 75 2.639057 2.639057 221 +integr 1 67 2.708050 2.708050 245 +descript 1 64 2.772589 2.772589 271 +content 1 59 2.833213 2.833213 302 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +natur 1 44 3.135494 3.135494 406 +directori 1 45 3.135494 3.135494 396 +execut 1 45 3.135494 3.135494 404 +small 1 39 3.258097 3.258097 447 +ofth 1 36 3.367296 3.367296 491 +compon 1 30 3.555348 3.555348 570 +turn 1 29 3.583519 3.583519 586 +variabl 1 23 3.806662 3.806662 715 +annot 1 21 3.912023 3.912023 775 +sure 1 20 3.951244 3.951244 813 +repositori 1 17 4.110874 4.110874 932 +brown 1 16 4.174387 4.174387 977 +speech 1 12 4.465908 4.465908 1222 +tag 1 7 5.010635 5.010635 1821 +corpu 1 5 5.347108 5.347108 2282 +penn 1 3 5.857933 5.857933 3094 +pagesc 1 3 5.857933 5.857933 3133 +brill 1 2 6.263398 6.263398 4137 +treebank 1 2 6.263398 6.263398 4138 +schedulewhat 1 2 6.263398 6.263398 4139 +pagecsintroduct 1 1 6.957497 6.957497 6699 +understandingcomput 1 1 6.957497 6.957497 6700 +announcementsher 1 1 6.957497 6.957497 6701 +taggerbrown 1 1 6.957497 6.957497 6702 +withpart 1 1 6.957497 6.957497 6703 +wordnet 1 1 6.957497 6.957497 6698 +wnsearchdir 1 1 6.957497 6.957497 6704 +dict 1 1 6.957497 6.957497 6705 +iicollect 1 1 6.957497 6.957497 6706 +canus 1 1 6.957497 6.957497 6707 +francisabout 1 1 6.957497 6.957497 6708 +computationallinguist 1 1 6.957497 6.957497 6709 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..6a1fb13e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +follow 1 92 2.397895 2.397895 143 +section 1 94 2.397895 2.397895 149 +info 1 85 2.484907 2.484907 176 +exam 1 86 2.484907 2.484907 169 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +room 1 59 2.833213 2.833213 301 +cover 1 55 2.944439 2.944439 329 +still 1 50 3.044522 3.044522 362 +close 1 38 3.295837 3.295837 465 +usual 1 28 3.610918 3.610918 608 +lab 1 24 3.761200 3.761200 698 +prelim 1 12 4.465908 4.465908 1201 +therefor 1 7 5.010635 5.010635 1822 +philip 1 6 5.164786 5.164786 2005 +circumst 1 5 5.347108 5.347108 2283 +materialcov 1 2 6.263398 6.263398 4140 +announcementsroom 1 1 6.957497 6.957497 6710 +unforseen 1 1 6.957497 6.957497 6711 +unableto 1 1 6.957497 6.957497 6712 +maclab 1 1 6.957497 6.957497 6713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..26b4b213 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +find 1 111 2.197225 2.197225 111 +solut 1 82 2.484907 2.484907 162 +dynam 1 76 2.564949 2.564949 194 +explor 1 58 2.890372 2.890372 324 +tree 1 36 3.367296 3.367296 492 +graph 1 30 3.555348 3.555348 576 +theorem 1 21 3.912023 3.912023 786 +matrix 1 17 4.110874 4.110874 933 +closur 1 8 4.875197 4.875197 1643 +karp 1 5 5.347108 5.347108 2284 +ford 1 4 5.568345 5.568345 2636 +union 1 4 5.568345 5.568345 2634 +push 1 4 5.568345 5.568345 2635 +dijkstra 1 3 5.857933 5.857933 3173 +heap 1 3 5.857933 5.857933 3123 +monika 1 2 6.263398 6.263398 4141 +rauch 1 2 6.263398 6.263398 4142 +greedi 1 2 6.263398 6.263398 4143 +edmond 1 2 6.263398 6.263398 4144 +henzingeremail 1 1 6.957497 6.957497 6718 +informationhomework 1 1 6.957497 6.957497 6719 +matroid 1 1 6.957497 6.957497 6715 +bellman 1 1 6.957497 6.957497 6720 +binomi 1 1 6.957497 6.957497 6716 +fibonacci 1 1 6.957497 6.957497 6721 +treap 1 1 6.957497 6.957497 6722 +randomizedsearch 1 1 6.957497 6.957497 6723 +maxflow 1 1 6.957497 6.957497 6714 +mincut 1 1 6.957497 6.957497 6724 +dinitz 1 1 6.957497 6.957497 6725 +preflow 1 1 6.957497 6.957497 6717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..cef2c7eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,213 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +need 1 98 2.302585 2.302585 135 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +environ 1 84 2.484907 2.484907 177 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +sourc 1 77 2.564949 2.564949 201 +june 1 79 2.564949 2.564949 214 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +solv 1 73 2.639057 2.639057 234 +effici 1 73 2.639057 2.639057 233 +upson 1 71 2.639057 2.639057 218 +meet 1 72 2.639057 2.639057 229 +name 1 72 2.639057 2.639057 220 +addit 1 74 2.639057 2.639057 228 +syllabu 1 67 2.708050 2.708050 247 +integr 1 67 2.708050 2.708050 245 +order 1 69 2.708050 2.708050 249 +handout 1 64 2.772589 2.772589 263 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +function 1 62 2.772589 2.772589 275 +plan 1 65 2.772589 2.772589 272 +best 1 59 2.833213 2.833213 299 +juli 1 60 2.833213 2.833213 305 +locat 1 59 2.833213 2.833213 303 +point 1 58 2.890372 2.890372 319 +unix 1 58 2.890372 2.890372 308 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +numer 1 49 3.044522 3.044522 369 +appoint 1 49 3.044522 3.044522 358 +set 1 50 3.044522 3.044522 361 +done 1 47 3.091042 3.091042 381 +adapt 1 46 3.091042 3.091042 387 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +linear 1 41 3.218876 3.218876 431 +review 1 42 3.218876 3.218876 425 +late 1 40 3.258097 3.258097 439 +error 1 40 3.258097 3.258097 449 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +least 1 35 3.401197 3.401197 516 +everi 1 34 3.401197 3.401197 519 +return 1 34 3.401197 3.401197 502 +given 1 32 3.465736 3.465736 538 +chapter 1 32 3.465736 3.465736 536 +administr 1 27 3.637586 3.637586 628 +though 1 27 3.637586 3.637586 622 +rule 1 26 3.688879 3.688879 638 +session 1 26 3.688879 3.688879 643 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +valu 1 25 3.737670 3.737670 665 +lab 1 24 3.761200 3.761200 698 +equat 1 23 3.806662 3.806662 724 +begin 1 23 3.806662 3.806662 716 +variabl 1 23 3.806662 3.806662 715 +initi 1 23 3.806662 3.806662 717 +highli 1 23 3.806662 3.806662 725 +brows 1 23 3.806662 3.806662 726 +dai 1 22 3.850148 3.850148 753 +recommend 1 22 3.850148 3.850148 737 +instal 1 22 3.850148 3.850148 754 +viewer 1 21 3.912023 3.912023 787 +record 1 18 4.060443 4.060443 890 +account 1 18 4.060443 4.060443 882 +accept 1 18 4.060443 4.060443 879 +minim 1 18 4.060443 4.060443 887 +stand 1 18 4.060443 4.060443 891 +matrix 1 17 4.110874 4.110874 933 +macintosh 1 17 4.110874 4.110874 920 +adam 1 17 4.110874 4.110874 934 +vector 1 16 4.174387 4.174387 961 +purchas 1 15 4.248495 4.248495 1030 +score 1 15 4.248495 4.248495 1017 +squar 1 14 4.317488 4.317488 1082 +matlab 1 14 4.317488 4.317488 1081 +rank 1 14 4.317488 4.317488 1063 +polynomi 1 14 4.317488 4.317488 1069 +command 1 14 4.317488 4.317488 1083 +charl 1 13 4.382027 4.382027 1149 +composit 1 13 4.382027 4.382027 1150 +outsid 1 12 4.465908 4.465908 1219 +extra 1 11 4.553877 4.553877 1312 +total 1 10 4.653960 4.653960 1398 +matric 1 10 4.653960 4.653960 1399 +length 1 10 4.653960 4.653960 1400 +pair 1 9 4.753590 4.753590 1503 +rel 1 9 4.753590 4.753590 1487 +float 1 9 4.753590 4.753590 1504 +deadlin 1 9 4.753590 4.753590 1502 +partner 1 8 4.875197 4.875197 1648 +calendar 1 8 4.875197 4.875197 1649 +root 1 8 4.875197 4.875197 1650 +on 1 8 4.875197 4.875197 1628 +elementari 1 7 5.010635 5.010635 1825 +interpol 1 7 5.010635 5.010635 1823 +accord 1 7 5.010635 5.010635 1826 +newton 1 7 5.010635 5.010635 1824 +spline 1 6 5.164786 5.164786 2007 +drop 1 6 5.164786 5.164786 2008 +classroom 1 6 5.164786 5.164786 2006 +otherthan 1 6 5.164786 5.164786 2009 +fit 1 5 5.347108 5.347108 2285 +stabil 1 5 5.347108 5.347108 2286 +worst 1 5 5.347108 5.347108 2287 +ignor 1 5 5.347108 5.347108 2288 +registr 1 5 5.347108 5.347108 2249 +niko 1 4 5.568345 5.568345 2637 +backward 1 4 5.568345 5.568345 2638 +pitsiani 1 3 5.857933 5.857933 3175 +rack 1 3 5.857933 5.857933 3176 +duedat 1 3 5.857933 5.857933 3105 +alon 1 3 5.857933 5.857933 3139 +euler 1 3 5.857933 5.857933 3174 +uncompress 1 3 5.857933 5.857933 3177 +scientificcomput 1 2 6.263398 6.263398 4145 +stress 1 2 6.263398 6.263398 4146 +prerequisitesc 1 2 6.263398 6.263398 4058 +loan 1 2 6.263398 6.263398 4147 +renssela 1 2 6.263398 6.263398 4148 +examsther 1 2 6.263398 6.263398 4149 +hermit 1 2 6.263398 6.263398 4150 +multivari 1 2 6.263398 6.263398 4151 +folder 1 2 6.263398 6.263398 4152 +computationsumm 1 1 6.957497 6.957497 6733 +setsan 1 1 6.957497 6.957497 6734 +quadratur 1 1 6.957497 6.957497 6727 +andnonlinear 1 1 6.957497 6.957497 6735 +ordinarydifferenti 1 1 6.957497 6.957497 6736 +informationstaff 1 1 6.957497 6.957497 6737 +ozan 1 1 6.957497 6.957497 6728 +hafizogullari 1 1 6.957497 6.957497 6738 +lecturesclass 1 1 6.957497 6.957497 6739 +administrationlauri 1 1 6.957497 6.957497 6740 +buck 1 1 6.957497 6.957497 6741 +addressedto 1 1 6.957497 6.957497 6742 +corequisit 1 1 6.957497 6.957497 6743 +materialstext 1 1 6.957497 6.957497 6744 +approachus 1 1 6.957497 6.957497 6745 +eitherth 1 1 6.957497 6.957497 6746 +labsthi 1 1 6.957497 6.957497 6747 +siblei 1 1 6.957497 6.957497 6729 +martha 1 1 6.957497 6.957497 6730 +setsther 1 1 6.957497 6.957497 6748 +orfrom 1 1 6.957497 6.957497 6749 +computingproblem 1 1 6.957497 6.957497 6750 +behandl 1 1 6.957497 6.957497 6751 +gradefrom 1 1 6.957497 6.957497 6752 +printyour 1 1 6.957497 6.957497 6753 +firstpag 1 1 6.957497 6.957497 6754 +partnernam 1 1 6.957497 6.957497 6755 +gradingyour 1 1 6.957497 6.957497 6756 +beassign 1 1 6.957497 6.957497 6757 +onyour 1 1 6.957497 6.957497 6758 +vandermond 1 1 6.957497 6.957497 6759 +piecewis 1 1 6.957497 6.957497 6760 +cubic 1 1 6.957497 6.957497 6731 +cote 1 1 6.957497 6.957497 6761 +choleski 1 1 6.957497 6.957497 6762 +rung 1 1 6.957497 6.957497 6763 +kutta 1 1 6.957497 6.957497 6764 +computingat 1 1 6.957497 6.957497 6765 +rennselaerhal 1 1 6.957497 6.957497 6766 +scmv 1 1 6.957497 6.957497 6726 +untar 1 1 6.957497 6.957497 6767 +zcat 1 1 6.957497 6.957497 6732 +randperm 1 1 6.957497 6.957497 6768 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..9f2b8c1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +follow 1 92 2.397895 2.397895 143 +section 1 94 2.397895 2.397895 149 +environ 1 84 2.484907 2.484907 177 +requir 1 81 2.484907 2.484907 167 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +wide 1 84 2.484907 2.484907 185 +second 1 81 2.484907 2.484907 166 +method 1 80 2.564949 2.564949 213 +issu 1 78 2.564949 2.564949 211 +complet 1 77 2.564949 2.564949 208 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +materi 1 75 2.639057 2.639057 221 +logic 1 71 2.639057 2.639057 230 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +tuesdai 1 73 2.639057 2.639057 219 +upson 1 71 2.639057 2.639057 218 +knowledg 1 67 2.708050 2.708050 243 +thursdai 1 70 2.708050 2.708050 241 +descript 1 64 2.772589 2.772589 271 +polici 1 64 2.772589 2.772589 279 +organ 1 65 2.772589 2.772589 265 +virtual 1 62 2.772589 2.772589 285 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +share 1 59 2.833213 2.833213 304 +content 1 59 2.833213 2.833213 302 +summer 1 56 2.890372 2.890372 311 +overview 1 56 2.890372 2.890372 323 +variou 1 56 2.890372 2.890372 317 +cover 1 55 2.944439 2.944439 329 +particular 1 51 2.995732 2.995732 352 +week 1 52 2.995732 2.995732 343 +maintain 1 51 2.995732 2.995732 342 +archiv 1 49 3.044522 3.044522 364 +understand 1 47 3.091042 3.091042 384 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +fast 1 42 3.218876 3.218876 429 +form 1 39 3.258097 3.258097 443 +submit 1 39 3.258097 3.258097 440 +close 1 38 3.295837 3.295837 465 +purpos 1 37 3.332205 3.332205 481 +hand 1 37 3.332205 3.332205 475 +concurr 1 34 3.401197 3.401197 501 +next 1 34 3.401197 3.401197 517 +collabor 1 32 3.465736 3.465736 543 +secur 1 30 3.555348 3.555348 577 +depend 1 29 3.583519 3.583519 583 +synchron 1 29 3.583519 3.583519 588 +multiprocessor 1 28 3.610918 3.610918 605 +subject 1 26 3.688879 3.688879 647 +detect 1 26 3.688879 3.688879 646 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +programminglanguag 1 21 3.912023 3.912023 782 +kernel 1 20 3.951244 3.951244 825 +assum 1 19 4.007333 4.007333 845 +feedback 1 19 4.007333 4.007333 854 +outlin 1 17 4.110874 4.110874 914 +protect 1 17 4.110874 4.110874 935 +segment 1 17 4.110874 4.110874 931 +weekli 1 17 4.110874 4.110874 919 +micro 1 15 4.248495 4.248495 1031 +quizz 1 13 4.382027 4.382027 1151 +carri 1 13 4.382027 4.382027 1152 +assembl 1 12 4.465908 4.465908 1207 +statement 1 11 4.553877 4.553877 1313 +evolut 1 11 4.553877 4.553877 1314 +multithread 1 11 4.553877 4.553877 1315 +peter 1 11 4.553877 4.553877 1316 +worth 1 11 4.553877 4.553877 1294 +operatingsystem 1 10 4.653960 4.653960 1401 +princip 1 10 4.653960 4.653960 1397 +familiar 1 9 4.753590 4.753590 1485 +attent 1 8 4.875197 4.875197 1651 +remind 1 7 5.010635 5.010635 1799 +prevent 1 7 5.010635 5.010635 1827 +surpris 1 7 5.010635 5.010635 1828 +multiprogram 1 6 5.164786 5.164786 2010 +pace 1 6 5.164786 5.164786 2011 +ensur 1 6 5.164786 5.164786 2012 +silberschatz 1 6 5.164786 5.164786 1978 +permiss 1 4 5.568345 5.568345 2642 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +deadlock 1 4 5.568345 5.568345 2641 +usedto 1 4 5.568345 5.568345 2643 +abraham 1 4 5.568345 5.568345 2644 +prereq 1 3 5.857933 5.857933 3178 +theimpact 1 3 5.857933 5.857933 3179 +audienc 1 3 5.857933 5.857933 3180 +roughli 1 3 5.857933 5.857933 3097 +serverless 1 3 5.857933 5.857933 3181 +todetermin 1 3 5.857933 5.857933 3182 +praka 1 2 6.263398 6.263398 4155 +nawaaz 1 2 6.263398 6.263398 4153 +ahm 1 2 6.263398 6.263398 4154 +anintroduct 1 2 6.263398 6.263398 4156 +emphasison 1 2 6.263398 6.263398 4157 +memorymanag 1 2 6.263398 6.263398 4158 +thetradit 1 2 6.263398 6.263398 4159 +galvin 1 2 6.263398 6.263398 4160 +motd 1 1 6.957497 6.957497 6769 +prerequsit 1 1 6.957497 6.957497 6771 +processsynchron 1 1 6.957497 6.957497 6772 +requiringconst 1 1 6.957497 6.957497 6773 +prerequsitescomplet 1 1 6.957497 6.957497 6774 +inparticular 1 1 6.957497 6.957497 6775 +theintroductori 1 1 6.957497 6.957497 6776 +thatwil 1 1 6.957497 6.957497 6777 +outlineth 1 1 6.957497 6.957497 6778 +theorder 1 1 6.957497 6.957497 6779 +lldiscuss 1 1 6.957497 6.957497 6770 +mutualexclus 1 1 6.957497 6.957497 6780 +timepermit 1 1 6.957497 6.957497 6781 +textbooksth 1 1 6.957497 6.957497 6782 +conceptsbook 1 1 6.957497 6.957497 6783 +distributeclass 1 1 6.957497 6.957497 6784 +noteswil 1 1 6.957497 6.957497 6785 +pageat 1 1 6.957497 6.957497 6786 +mondaythru 1 1 6.957497 6.957497 6787 +thesewil 1 1 6.957497 6.957497 6788 +thursdayat 1 1 6.957497 6.957497 6789 +gradingeach 1 1 6.957497 6.957497 6790 +weightag 1 1 6.957497 6.957497 6791 +combinedweightag 1 1 6.957497 6.957497 6792 +twomidterm 1 1 6.957497 6.957497 6793 +collaborationat 1 1 6.957497 6.957497 6794 +eachhomework 1 1 6.957497 6.957497 6795 +thehomework 1 1 6.957497 6.957497 6796 +closednot 1 1 6.957497 6.957497 6797 +induprakaskodukula 1 1 6.957497 6.957497 6798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..48725a9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +advanc 1 99 2.302585 2.302585 130 +octob 1 89 2.397895 2.397895 156 +start 1 83 2.484907 2.484907 173 +solut 1 82 2.484907 2.484907 162 +novemb 1 81 2.484907 2.484907 179 +homework 1 79 2.564949 2.564949 193 +decemb 1 80 2.564949 2.564949 215 +upson 1 71 2.639057 2.639057 218 +septemb 1 65 2.772589 2.772589 274 +appoint 1 49 3.044522 3.044522 358 +get 1 46 3.091042 3.091042 380 +midterm 1 45 3.135494 3.135494 392 +meta 1 9 4.753590 4.753590 1505 +scribe 1 4 5.568345 5.568345 2631 +csc 1 3 5.857933 5.857933 3183 +neal 1 3 5.857933 5.857933 3184 +languagesfal 1 2 6.263398 6.263398 4161 +glew 1 2 6.263398 6.263398 4162 +informationhandout 1 2 6.263398 6.263398 4163 +henzingerupson 1 1 6.957497 6.957497 6799 +glewupson 1 1 6.957497 6.957497 6800 +handoutshandout 1 1 6.957497 6.957497 6801 +mlhandout 1 1 6.957497 6.957497 6802 +lambdahomeworkshomework 1 1 6.957497 6.957497 6803 +grieshomework 1 1 6.957497 6.957497 6804 +notesraw 1 1 6.957497 6.957497 6805 +noteslectur 1 1 6.957497 6.957497 6806 +mllectur 1 1 6.957497 6.957497 6807 +grieslectur 1 1 6.957497 6.957497 6808 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..98958bf8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +pleas 1 113 2.197225 2.197225 114 +advanc 1 99 2.302585 2.302585 130 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +mondai 1 77 2.564949 2.564949 206 +upson 1 71 2.639057 2.639057 218 +prof 1 64 2.772589 2.772589 273 +faculti 1 56 2.890372 2.890372 325 +suggest 1 53 2.944439 2.944439 331 +robert 1 30 3.555348 3.555348 567 +thur 1 19 4.007333 4.007333 847 +classic 1 14 4.317488 4.317488 1084 +nuprl 1 10 4.653960 4.653960 1402 +kumar 1 9 4.753590 4.753590 1506 +constabl 1 3 5.857933 5.857933 3186 +ravi 1 3 5.857933 5.857933 3185 +pavel 1 2 6.263398 6.263398 4164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..526d8a73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +spring 1 131 2.079442 2.079442 88 +member 1 84 2.484907 2.484907 165 +addit 1 74 2.639057 2.639057 228 +maintain 1 51 2.995732 2.995732 342 +consult 1 24 3.761200 3.761200 687 +coursesc 1 4 5.568345 5.568345 2692 +individualfaculti 1 1 6.957497 6.957497 7418 +contactgloria 1 1 6.957497 6.957497 7419 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..df54ae26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +austin 1 168 1.791759 1.791759 63 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +analysi 1 124 2.079442 2.079442 98 +welcom 1 122 2.079442 2.079442 99 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +new 1 64 2.772589 2.772589 262 +suggest 1 53 2.944439 2.944439 331 +announc 1 40 3.258097 3.258097 441 +tutori 1 39 3.258097 3.258097 437 +prepar 1 20 3.951244 3.951244 824 +yang 1 8 4.875197 4.875197 1652 +vicki 1 3 5.857933 5.857933 3187 +almstrum 1 2 6.263398 6.263398 4165 +linyuan 1 1 6.957497 6.957497 6809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..756edcf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +spring 1 131 2.079442 2.079442 88 +tool 1 117 2.079442 2.079442 93 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +mani 1 92 2.397895 2.397895 150 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +help 1 83 2.484907 2.484907 175 +exampl 1 77 2.564949 2.564949 195 +method 1 80 2.564949 2.564949 213 +good 1 77 2.564949 2.564949 200 +april 1 77 2.564949 2.564949 196 +logic 1 71 2.639057 2.639057 230 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +foundat 1 62 2.772589 2.772589 286 +taylor 1 63 2.772589 2.772589 287 +interact 1 62 2.772589 2.772589 270 +copi 1 63 2.772589 2.772589 284 +type 1 61 2.833213 2.833213 296 +reason 1 57 2.890372 2.890372 318 +approach 1 48 3.044522 3.044522 366 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +mechan 1 43 3.178054 3.178054 416 +examin 1 42 3.218876 3.218876 424 +continu 1 39 3.258097 3.258097 448 +formal 1 37 3.332205 3.332205 478 +either 1 35 3.401197 3.401197 506 +specifi 1 30 3.555348 3.555348 568 +hope 1 28 3.610918 3.610918 610 +higher 1 24 3.761200 3.761200 690 +consult 1 24 3.761200 3.761200 687 +equat 1 23 3.806662 3.806662 724 +tent 1 22 3.850148 3.850148 739 +moor 1 17 4.110874 4.110874 936 +choos 1 16 4.174387 4.174387 964 +upon 1 16 4.174387 4.174387 978 +choic 1 16 4.174387 4.174387 979 +squar 1 14 4.317488 4.317488 1082 +recurs 1 13 4.382027 4.382027 1127 +guest 1 12 4.465908 4.465908 1220 +primit 1 11 4.553877 4.553877 1317 +arithmet 1 10 4.653960 4.653960 1388 +nuprl 1 10 4.653960 4.653960 1402 +pair 1 9 4.753590 4.753590 1503 +russel 1 9 4.753590 4.753590 1507 +prover 1 8 4.875197 4.875197 1653 +root 1 8 4.875197 4.875197 1650 +chief 1 7 5.010635 5.010635 1829 +boyer 1 6 5.164786 5.164786 2013 +freeli 1 6 5.164786 5.164786 2014 +backup 1 4 5.568345 5.568345 2645 +rick 1 4 5.568345 5.568345 2646 +lego 1 3 5.857933 5.857933 3188 +oral 1 3 5.857933 5.857933 3189 +samuel 1 3 5.857933 5.857933 3155 +sawada 1 3 5.857933 5.857933 3190 +ofmathemat 1 2 6.263398 6.263398 4167 +otter 1 2 6.263398 6.263398 4166 +nelson 1 2 6.263398 6.263398 4168 +ortool 1 2 6.263398 6.263398 4169 +bowen 1 2 6.263398 6.263398 4170 +guyer 1 2 6.263398 6.263398 4171 +blurb 1 1 6.957497 6.957497 6810 +theobject 1 1 6.957497 6.957497 6811 +formalizationof 1 1 6.957497 6.957497 6812 +creationof 1 1 6.957497 6.957497 6813 +systemsfor 1 1 6.957497 6.957497 6814 +formalmethod 1 1 6.957497 6.957497 6815 +suchsystem 1 1 6.957497 6.957497 6816 +imp 1 1 6.957497 6.957497 6817 +mizar 1 1 6.957497 6.957497 6818 +quaif 1 1 6.957497 6.957497 6819 +coqstud 1 1 6.957497 6.957497 6820 +aboutthes 1 1 6.957497 6.957497 6821 +projecthtml 1 1 6.957497 6.957497 6822 +theqe 1 1 6.957497 6.957497 6823 +manifestoplain 1 1 6.957497 6.957497 6824 +qedmanifestobowen 1 1 6.957497 6.957497 6825 +localform 1 1 6.957497 6.957497 6826 +tannei 1 1 6.957497 6.957497 6827 +trevor 1 1 6.957497 6.957497 6828 +hick 1 1 6.957497 6.957497 6829 +ruben 1 1 6.957497 6.957497 6830 +gamboa 1 1 6.957497 6.957497 6831 +circal 1 1 6.957497 6.957497 6832 +turpin 1 1 6.957497 6.957497 6833 +galoi 1 1 6.957497 6.957497 6834 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..7774e358 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +note 1 142 1.945910 1.945910 67 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +advanc 1 99 2.302585 2.302585 130 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +orient 1 80 2.564949 2.564949 205 +homework 1 79 2.564949 2.564949 193 +dynam 1 76 2.564949 2.564949 194 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +differ 1 66 2.708050 2.708050 253 +written 1 63 2.772589 2.772589 278 +major 1 56 2.890372 2.890372 315 +variou 1 56 2.890372 2.890372 317 +space 1 57 2.890372 2.890372 310 +instruct 1 53 2.944439 2.944439 332 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +pointer 1 48 3.044522 3.044522 368 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +edit 1 42 3.218876 3.218876 418 +review 1 42 3.218876 3.218876 425 +cach 1 41 3.218876 3.218876 432 +compani 1 41 3.218876 3.218876 423 +cost 1 37 3.332205 3.332205 480 +tech 1 35 3.401197 3.401197 515 +bibliographi 1 34 3.401197 3.401197 518 +product 1 33 3.433987 3.433987 527 +compon 1 30 3.555348 3.555348 570 +focus 1 29 3.583519 3.583519 584 +particip 1 29 3.583519 3.583519 589 +limit 1 29 3.583519 3.583519 585 +administr 1 27 3.637586 3.637586 628 +static 1 27 3.637586 3.637586 619 +compar 1 26 3.688879 3.688879 648 +altern 1 26 3.688879 3.688879 641 +aspect 1 25 3.737670 3.737670 663 +input 1 23 3.806662 3.806662 727 +emphasi 1 22 3.850148 3.850148 755 +recommend 1 22 3.850148 3.850148 737 +disk 1 22 3.850148 3.850148 747 +output 1 21 3.912023 3.912023 788 +watch 1 21 3.912023 3.912023 789 +predict 1 19 4.007333 4.007333 855 +appropri 1 18 4.060443 4.060443 883 +interconnect 1 17 4.110874 4.110874 937 +attempt 1 17 4.110874 4.110874 917 +intro 1 17 4.110874 4.110874 915 +modern 1 16 4.174387 4.174387 966 +choic 1 16 4.174387 4.174387 979 +choos 1 16 4.174387 4.174387 964 +vector 1 16 4.174387 4.174387 961 +branch 1 11 4.553877 4.553877 1318 +errata 1 10 4.653960 4.653960 1403 +significantli 1 9 4.753590 4.753590 1508 +pair 1 9 4.753590 4.753590 1503 +admin 1 9 4.753590 4.753590 1476 +quantit 1 8 4.875197 4.875197 1654 +pipelin 1 7 5.010635 5.010635 1830 +metric 1 7 5.010635 5.010635 1831 +subsystem 1 6 5.164786 5.164786 2015 +hennessi 1 5 5.347108 5.347108 2289 +computerarchitectur 1 5 5.347108 5.347108 2290 +reveal 1 4 5.568345 5.568345 2647 +queu 1 4 5.568345 5.568345 2648 +buss 1 4 5.568345 5.568345 2649 +evaluationof 1 3 5.857933 5.857933 3192 +hazard 1 3 5.857933 5.857933 3191 +tertiari 1 3 5.857933 5.857933 3193 +mpp 1 3 5.857933 5.857933 3194 +insystem 1 2 6.263398 6.263398 4172 +dram 1 2 6.263398 6.263398 4173 +architecturethi 1 1 6.957497 6.957497 6838 +benchmarksto 1 1 6.957497 6.957497 6839 +highperform 1 1 6.957497 6.957497 6840 +memoryhierarchi 1 1 6.957497 6.957497 6841 +studentswil 1 1 6.957497 6.957497 6842 +undertak 1 1 6.957497 6.957497 6843 +oftheir 1 1 6.957497 6.957497 6844 +informationuniqu 1 1 6.957497 6.957497 6845 +mikedahlinoffic 1 1 6.957497 6.957497 6846 +tbdtaoffic 1 1 6.957497 6.957497 6847 +tbdreadingstextbook 1 1 6.957497 6.957497 6848 +patteson 1 1 6.957497 6.957497 6849 +stedit 1 1 6.957497 6.957497 6835 +sheetfor 1 1 6.957497 6.957497 6850 +pattersonin 1 1 6.957497 6.957497 6851 +currentcomput 1 1 6.957497 6.957497 6852 +readinglist 1 1 6.957497 6.957497 6853 +scheduleweekdatetopicreadingduejan 1 1 6.957497 6.957497 6854 +perf 1 1 6.957497 6.957497 6855 +amdahl 1 1 6.957497 6.957497 6856 +trendsch 1 1 6.957497 6.957497 6857 +isa 1 1 6.957497 6.957497 6858 +predictionch 1 1 6.957497 6.957497 6836 +mlkholidayf 1 1 6.957497 6.957497 6859 +proposalfeb 1 1 6.957497 6.957497 6860 +scoreboard 1 1 6.957497 6.957497 6861 +tomasulu 1 1 6.957497 6.957497 6862 +speculationch 1 1 6.957497 6.957497 6863 +processorsch 1 1 6.957497 6.957497 6864 +dfeb 1 1 6.957497 6.957497 6865 +hierarchych 1 1 6.957497 6.957497 6866 +surveyfeb 1 1 6.957497 6.957497 6867 +banksf 1 1 6.957497 6.957497 6868 +revieww 1 1 6.957497 6.957497 6837 +breakm 1 1 6.957497 6.957497 6869 +breakmar 1 1 6.957497 6.957497 6870 +raidch 1 1 6.957497 6.957497 6871 +networksf 1 1 6.957497 6.957497 6872 +networksch 1 1 6.957497 6.957497 6873 +checkpointapr 1 1 6.957497 6.957497 6874 +architecturesf 1 1 6.957497 6.957497 6875 +mppsch 1 1 6.957497 6.957497 6876 +preseantationsm 1 1 6.957497 6.957497 6877 +presentationsfri 1 1 6.957497 6.957497 6878 +classesm 1 1 6.957497 6.957497 6879 +reportaddit 1 1 6.957497 6.957497 6880 +resourcescours 1 1 6.957497 6.957497 6881 +reportsyahoo 1 1 6.957497 6.957497 6882 +businessand 1 1 6.957497 6.957497 6883 +economi 1 1 6.957497 6.957497 6884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..c38f2541 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +internet 1 83 2.484907 2.484907 186 +state 1 76 2.564949 2.564949 207 +refer 1 78 2.564949 2.564949 203 +solv 1 73 2.639057 2.639057 234 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +organ 1 65 2.772589 2.772589 265 +talk 1 53 2.944439 2.944439 336 +protocol 1 45 3.135494 3.135494 407 +must 1 40 3.258097 3.258097 442 +purpos 1 37 3.332205 3.332205 481 +secur 1 30 3.555348 3.555348 577 +concern 1 25 3.737670 3.737670 666 +alloc 1 20 3.951244 3.951244 821 +longer 1 20 3.951244 3.951244 816 +verif 1 20 3.951244 3.951244 826 +less 1 18 4.060443 4.060443 892 +context 1 13 4.382027 4.382027 1153 +evolv 1 12 4.465908 4.465908 1223 +operatingsystem 1 10 4.653960 4.653960 1401 +tradit 1 10 4.653960 4.653960 1404 +guidelin 1 7 5.010635 5.010635 1832 +matur 1 5 5.347108 5.347108 2269 +hypothesi 1 4 5.568345 5.568345 2650 +behind 1 4 5.568345 5.568345 2610 +bear 1 4 5.568345 5.568345 2651 +explos 1 3 5.857933 5.857933 3138 +thetradit 1 2 6.263398 6.263398 4159 +interprocess 1 2 6.263398 6.263398 4174 +systemsuniqu 1 1 6.957497 6.957497 6885 +resultedin 1 1 6.957497 6.957497 6886 +contextof 1 1 6.957497 6.957497 6887 +understandingof 1 1 6.957497 6.957497 6888 +addressproblem 1 1 6.957497 6.957497 6889 +theissu 1 1 6.957497 6.957497 6890 +addressedin 1 1 6.957497 6.957497 6891 +occasionallyread 1 1 6.957497 6.957497 6892 +understandingcurr 1 1 6.957497 6.957497 6893 +reportspoint 1 1 6.957497 6.957497 6894 +rosterhandout 1 1 6.957497 6.957497 6895 +sslprotocol 1 1 6.957497 6.957497 6896 +proofsketch 1 1 6.957497 6.957497 6897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..43467abf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +austin 1 168 1.791759 1.791759 63 +construct 1 139 1.945910 1.945910 82 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +schedul 1 119 2.079442 2.079442 85 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +final 1 116 2.197225 2.197225 108 +check 1 115 2.197225 2.197225 118 +version 1 113 2.197225 2.197225 122 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +user 1 104 2.302585 2.302585 137 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +real 1 93 2.397895 2.397895 144 +call 1 91 2.397895 2.397895 153 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +academ 1 82 2.484907 2.484907 178 +start 1 83 2.484907 2.484907 173 +chang 1 82 2.484907 2.484907 163 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +exampl 1 77 2.564949 2.564949 195 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +sourc 1 77 2.564949 2.564949 201 +meet 1 72 2.639057 2.639057 229 +appli 1 71 2.639057 2.639057 226 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +handout 1 64 2.772589 2.772589 263 +new 1 64 2.772589 2.772589 262 +visit 1 63 2.772589 2.772589 288 +wednesdai 1 64 2.772589 2.772589 261 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +polici 1 64 2.772589 2.772589 279 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +locat 1 59 2.833213 2.833213 303 +automat 1 61 2.833213 2.833213 306 +reason 1 57 2.890372 2.890372 318 +overview 1 56 2.890372 2.890372 323 +instruct 1 53 2.944439 2.944439 332 +date 1 51 2.995732 2.995732 344 +maintain 1 51 2.995732 2.995732 342 +electron 1 47 3.091042 3.091042 379 +discuss 1 45 3.135494 3.135494 399 +fridai 1 44 3.135494 3.135494 390 +might 1 41 3.218876 3.218876 426 +late 1 40 3.258097 3.258097 439 +announc 1 40 3.258097 3.258097 441 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +slide 1 38 3.295837 3.295837 467 +procedur 1 36 3.367296 3.367296 488 +least 1 35 3.401197 3.401197 516 +post 1 35 3.401197 3.401197 505 +singl 1 34 3.401197 3.401197 510 +manual 1 35 3.401197 3.401197 504 +next 1 34 3.401197 3.401197 517 +jame 1 35 3.401197 3.401197 507 +go 1 33 3.433987 3.433987 529 +titl 1 31 3.496508 3.496508 556 +turn 1 29 3.583519 3.583519 586 +pass 1 28 3.610918 3.610918 611 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +utc 1 27 3.637586 3.637586 629 +session 1 26 3.688879 3.688879 643 +proc 1 26 3.688879 3.688879 649 +valu 1 25 3.737670 3.737670 665 +toward 1 25 3.737670 3.737670 668 +begin 1 23 3.806662 3.806662 716 +thank 1 23 3.806662 3.806662 721 +period 1 22 3.850148 3.850148 743 +hierarchi 1 22 3.850148 3.850148 744 +disk 1 22 3.850148 3.850148 747 +newsgroup 1 21 3.912023 3.912023 783 +output 1 21 3.912023 3.912023 788 +prerequisit 1 19 4.007333 4.007333 846 +thur 1 19 4.007333 4.007333 847 +attend 1 18 4.060443 4.060443 893 +offici 1 18 4.060443 4.060443 894 +regist 1 17 4.110874 4.110874 938 +zhang 1 16 4.174387 4.174387 980 +earli 1 16 4.174387 4.174387 968 +letter 1 16 4.174387 4.174387 981 +ascii 1 15 4.248495 4.248495 1032 +charact 1 15 4.248495 4.248495 1028 +conduct 1 14 4.317488 4.317488 1065 +front 1 13 4.382027 4.382027 1154 +holidai 1 12 4.465908 4.465908 1224 +remov 1 12 4.465908 4.465908 1225 +pascal 1 12 4.465908 4.465908 1213 +chri 1 11 4.553877 4.553877 1311 +tue 1 11 4.553877 4.553877 1308 +extra 1 11 4.553877 4.553877 1312 +night 1 11 4.553877 4.553877 1319 +penalti 1 10 4.653960 4.653960 1405 +stack 1 10 4.653960 4.653960 1389 +cheat 1 10 4.653960 4.653960 1395 +deadlin 1 9 4.753590 4.753590 1502 +pick 1 9 4.753590 4.753590 1498 +calendar 1 8 4.875197 4.875197 1649 +fail 1 8 4.875197 4.875197 1655 +spec 1 8 4.875197 4.875197 1640 +noon 1 7 5.010635 5.010635 1804 +bit 1 7 5.010635 5.010635 1833 +saturdai 1 7 5.010635 5.010635 1794 +paramet 1 7 5.010635 5.010635 1796 +drop 1 6 5.164786 5.164786 2008 +risc 1 6 5.164786 5.164786 2016 +circumst 1 5 5.347108 5.347108 2283 +registr 1 5 5.347108 5.347108 2249 +door 1 5 5.347108 5.347108 2291 +mac 1 5 5.347108 5.347108 2292 +glanc 1 4 5.568345 5.568345 2652 +chart 1 4 5.568345 5.568345 2653 +turnin 1 4 5.568345 5.568345 2654 +labor 1 3 5.857933 5.857933 3195 +obsolet 1 3 5.857933 5.857933 3196 +constantli 1 2 6.263398 6.263398 4181 +edmondson 1 2 6.263398 6.263398 4182 +yurkanan 1 2 6.263398 6.263398 4175 +dragon 1 2 6.263398 6.263398 4176 +yoonsuck 1 2 6.263398 6.263398 4177 +choe 1 2 6.263398 6.263398 4178 +yschoe 1 2 6.263398 6.263398 4179 +gzhang 1 2 6.263398 6.263398 4183 +rare 1 2 6.263398 6.263398 4184 +thanksgiv 1 2 6.263398 6.263398 4185 +appeal 1 2 6.263398 6.263398 4186 +typo 1 2 6.263398 6.263398 4180 +folder 1 2 6.263398 6.263398 4152 +onmon 1 1 6.957497 6.957497 6910 +fantasm 1 1 6.957497 6.957497 6898 +edum 1 1 6.957497 6.957497 6904 +cynthia 1 1 6.957497 6.957497 6911 +deepa 1 1 6.957497 6.957497 6912 +ramani 1 1 6.957497 6.957497 6913 +dparam 1 1 6.957497 6.957497 6914 +eduw 1 1 6.957497 6.957497 6915 +eduf 1 1 6.957497 6.957497 6916 +refund 1 1 6.957497 6.957497 6917 +extenu 1 1 6.957497 6.957497 6918 +withdraw 1 1 6.957497 6.957497 6905 +rightmost 1 1 6.957497 6.957497 6906 +bonu 1 1 6.957497 6.957497 6900 +procudur 1 1 6.957497 6.957497 6907 +electronc 1 1 6.957497 6.957497 6901 +boxin 1 1 6.957497 6.957497 6919 +endia 1 1 6.957497 6.957497 6920 +powermac 1 1 6.957497 6.957497 6908 +quadra 1 1 6.957497 6.957497 6909 +p_global 1 1 6.957497 6.957497 6899 +macsbug 1 1 6.957497 6.957497 6902 +func 1 1 6.957497 6.957497 6921 +practiv 1 1 6.957497 6.957497 6922 +electoron 1 1 6.957497 6.957497 6903 +questionair 1 1 6.957497 6.957497 6923 +brett 1 1 6.957497 6.957497 6924 +subroutine_fil 1 1 6.957497 6.957497 6925 +exception_fil 1 1 6.957497 6.957497 6926 +avali 1 1 6.957497 6.957497 6927 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..c8211605 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +assign 1 135 1.945910 1.945910 66 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +version 1 113 2.197225 2.197225 122 +structur 1 106 2.197225 2.197225 105 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +technic 1 100 2.302585 2.302585 140 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +activ 1 84 2.484907 2.484907 182 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +decemb 1 80 2.564949 2.564949 215 +mondai 1 77 2.564949 2.564949 206 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +import 1 65 2.772589 2.772589 282 +complex 1 64 2.772589 2.772589 269 +new 1 64 2.772589 2.772589 262 +guid 1 63 2.772589 2.772589 267 +descript 1 64 2.772589 2.772589 271 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +suggest 1 53 2.944439 2.944439 331 +maintain 1 51 2.995732 2.995732 342 +week 1 52 2.995732 2.995732 343 +right 1 48 3.044522 3.044522 363 +format 1 48 3.044522 3.044522 356 +frequent 1 49 3.044522 3.044522 367 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +howev 1 41 3.218876 3.218876 422 +review 1 42 3.218876 3.218876 425 +might 1 41 3.218876 3.218876 426 +announc 1 40 3.258097 3.258097 441 +tutori 1 39 3.258097 3.258097 437 +programm 1 39 3.258097 3.258097 445 +slide 1 38 3.295837 3.295837 467 +feel 1 37 3.332205 3.332205 483 +soon 1 36 3.367296 3.367296 494 +download 1 36 3.367296 3.367296 489 +next 1 34 3.401197 3.401197 517 +post 1 35 3.401197 3.401197 505 +articl 1 33 3.433987 3.433987 530 +ad 1 32 3.465736 3.465736 544 +concept 1 32 3.465736 3.465736 537 +option 1 30 3.555348 3.555348 575 +becom 1 28 3.610918 3.610918 603 +progress 1 28 3.610918 3.610918 598 +held 1 28 3.610918 3.610918 600 +hope 1 28 3.610918 3.610918 610 +ask 1 28 3.610918 3.610918 597 +usual 1 28 3.610918 3.610918 608 +relev 1 26 3.688879 3.688879 637 +session 1 26 3.688879 3.688879 643 +comp 1 26 3.688879 3.688879 650 +todai 1 25 3.737670 3.737670 672 +lead 1 23 3.806662 3.806662 718 +dai 1 22 3.850148 3.850148 753 +almost 1 22 3.850148 3.850148 742 +newsgroup 1 21 3.912023 3.912023 783 +reserv 1 20 3.951244 3.951244 808 +item 1 19 4.007333 4.007333 856 +coupl 1 17 4.110874 4.110874 939 +whole 1 17 4.110874 4.110874 940 +sheet 1 16 4.174387 4.174387 973 +critic 1 16 4.174387 4.174387 982 +warn 1 14 4.317488 4.317488 1068 +someon 1 13 4.382027 4.382027 1128 +circuit 1 13 4.382027 4.382027 1131 +difficulti 1 13 4.382027 4.382027 1132 +prolog 1 13 4.382027 4.382027 1155 +menu 1 13 4.382027 4.382027 1156 +bruce 1 12 4.465908 4.465908 1226 +tune 1 12 4.465908 4.465908 1227 +uniqu 1 12 4.465908 4.465908 1228 +pascal 1 12 4.465908 4.465908 1213 +rememb 1 12 4.465908 4.465908 1217 +regard 1 11 4.553877 4.553877 1309 +summar 1 11 4.553877 4.553877 1295 +total 1 10 4.653960 4.653960 1398 +length 1 10 4.653960 4.653960 1400 +exact 1 9 4.753590 4.753590 1509 +prefer 1 9 4.753590 4.753590 1491 +bit 1 7 5.010635 5.010635 1833 +beyond 1 7 5.010635 5.010635 1834 +put 1 6 5.164786 5.164786 2017 +banerje 1 6 5.164786 5.164786 2018 +assignmentsprogram 1 6 5.164786 5.164786 2019 +porter 1 5 5.347108 5.347108 2293 +door 1 5 5.347108 5.347108 2291 +rotat 1 5 5.347108 5.347108 2295 +scope 1 5 5.347108 5.347108 2296 +desk 1 5 5.347108 5.347108 2297 +caus 1 5 5.347108 5.347108 2298 +lang 1 5 5.347108 5.347108 2294 +coverag 1 4 5.568345 5.568345 2656 +glad 1 4 5.568345 5.568345 2657 +arora 1 4 5.568345 5.568345 2658 +somewhat 1 4 5.568345 5.568345 2659 +webpag 1 4 5.568345 5.568345 2660 +welch 1 4 5.568345 5.568345 2655 +forthes 1 3 5.857933 5.857933 3199 +moreov 1 3 5.857933 5.857933 3200 +luck 1 3 5.857933 5.857933 3201 +dwip 1 3 5.857933 5.857933 3197 +boolean 1 3 5.857933 5.857933 3202 +experienc 1 3 5.857933 5.857933 3203 +addendum 1 3 5.857933 5.857933 3150 +ansi 1 3 5.857933 5.857933 3198 +painter 1 2 6.263398 6.263398 4187 +therewil 1 2 6.263398 6.263398 4080 +nimar 1 2 6.263398 6.263398 4188 +disregard 1 2 6.263398 6.263398 4189 +schedulec 1 2 6.263398 6.263398 4190 +newgroup 1 2 6.263398 6.263398 4191 +delphi 1 2 6.263398 6.263398 4192 +dell 1 2 6.263398 6.263398 4193 +andther 1 1 6.957497 6.957497 6929 +unabl 1 1 6.957497 6.957497 6930 +luckfor 1 1 6.957497 6.957497 6931 +dependon 1 1 6.957497 6.957497 6932 +availib 1 1 6.957497 6.957497 6933 +uptoth 1 1 6.957497 6.957497 6934 +resolutio 1 1 6.957497 6.957497 6935 +porterquest 1 1 6.957497 6.957497 6936 +thecont 1 1 6.957497 6.957497 6937 +atugl 1 1 6.957497 6.957497 6938 +sostai 1 1 6.957497 6.957497 6939 +iinstructorbruc 1 1 6.957497 6.957497 6940 +tasoffic 1 1 6.957497 6.957497 6941 +hourslab 1 1 6.957497 6.957497 6942 +descriptionclass 1 1 6.957497 6.957497 6943 +scheduleclass 1 1 6.957497 6.957497 6944 +articlesclass 1 1 6.957497 6.957497 6945 +newsgroupprogram 1 1 6.957497 6.957497 6946 +pascaltutori 1 1 6.957497 6.957497 6947 +faqyou 1 1 6.957497 6.957497 6948 +turbo 1 1 6.957497 6.957497 6928 +zipe 1 1 6.957497 6.957497 6949 +isocomp 1 1 6.957497 6.957497 6950 +maccomp 1 1 6.957497 6.957497 6951 +borlandcomp 1 1 6.957497 6.957497 6952 +misccomp 1 1 6.957497 6.957497 6953 +miscfj 1 1 6.957497 6.957497 6954 +serverto 1 1 6.957497 6.957497 6955 +importantstuff 1 1 6.957497 6.957497 6956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..b80cd8ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +instructor 1 108 2.197225 2.197225 107 +specif 1 106 2.197225 2.197225 106 +code 1 108 2.197225 2.197225 116 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +librari 1 87 2.484907 2.484907 181 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +syllabu 1 67 2.708050 2.708050 247 +descript 1 64 2.772589 2.772589 271 +copi 1 63 2.772589 2.772589 284 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +directori 1 45 3.135494 3.135494 396 +show 1 43 3.178054 3.178054 417 +submit 1 39 3.258097 3.258097 440 +workstat 1 37 3.332205 3.332205 479 +manual 1 35 3.401197 3.401197 504 +option 1 30 3.555348 3.555348 575 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +utc 1 27 3.637586 3.637586 629 +wish 1 24 3.761200 3.761200 692 +higher 1 24 3.761200 3.761200 690 +instal 1 22 3.850148 3.850148 754 +score 1 15 4.248495 4.248495 1017 +donald 1 9 4.753590 4.753590 1510 +curv 1 8 4.875197 4.875197 1656 +driver 1 8 4.875197 4.875197 1657 +sciencesdepart 1 6 5.164786 5.164786 2020 +slate 1 6 5.164786 5.164786 2021 +fussel 1 5 5.347108 5.347108 2300 +opengl 1 5 5.347108 5.347108 2299 +ousterhout 1 5 5.347108 5.347108 2301 +hasbeen 1 4 5.568345 5.568345 2661 +makefil 1 4 5.568345 5.568345 2662 +welch 1 4 5.568345 5.568345 2655 +xlib 1 3 5.857933 5.857933 3204 +cscomput 1 2 6.263398 6.263398 4195 +mesa 1 2 6.263398 6.263398 4194 +anopengl 1 2 6.263398 6.263398 4196 +billthecat 1 2 6.263398 6.263398 4197 +graphicsspr 1 1 6.957497 6.957497 6957 +oneor 1 1 6.957497 6.957497 6958 +examwil 1 1 6.957497 6.957497 6959 +bothmai 1 1 6.957497 6.957497 6960 +willcount 1 1 6.957497 6.957497 6961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..7fb0c864 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +instructor 1 108 2.197225 2.197225 107 +specif 1 106 2.197225 2.197225 106 +code 1 108 2.197225 2.197225 116 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +sourc 1 77 2.564949 2.564949 201 +syllabu 1 67 2.708050 2.708050 247 +descript 1 64 2.772589 2.772589 271 +copi 1 63 2.772589 2.772589 284 +new 1 64 2.772589 2.772589 262 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +directori 1 45 3.135494 3.135494 396 +workstat 1 37 3.332205 3.332205 479 +manual 1 35 3.401197 3.401197 504 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +utc 1 27 3.637586 3.637586 629 +wish 1 24 3.761200 3.761200 692 +instal 1 22 3.850148 3.850148 754 +demo 1 18 4.060443 4.060443 888 +donald 1 9 4.753590 4.753590 1510 +driver 1 8 4.875197 4.875197 1657 +sciencesdepart 1 6 5.164786 5.164786 2020 +slate 1 6 5.164786 5.164786 2021 +fussel 1 5 5.347108 5.347108 2300 +opengl 1 5 5.347108 5.347108 2299 +ousterhout 1 5 5.347108 5.347108 2301 +hasbeen 1 4 5.568345 5.568345 2661 +makefil 1 4 5.568345 5.568345 2662 +welch 1 4 5.568345 5.568345 2655 +turnin 1 4 5.568345 5.568345 2654 +xlib 1 3 5.857933 5.857933 3204 +walker 1 3 5.857933 5.857933 3161 +mesa 1 2 6.263398 6.263398 4194 +anopengl 1 2 6.263398 6.263398 4196 +billthecat 1 2 6.263398 6.263398 4197 +repair 1 2 6.263398 6.263398 4198 +gcomput 1 1 6.957497 6.957497 6963 +graphicsfal 1 1 6.957497 6.957497 6964 +libtcl 1 1 6.957497 6.957497 6965 +libtk 1 1 6.957497 6.957497 6966 +reinstal 1 1 6.957497 6.957497 6962 +tclsh 1 1 6.957497 6.957497 6967 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..981eab2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +mathemat 1 108 2.197225 2.197225 123 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +chang 1 82 2.484907 2.484907 163 +homework 1 79 2.564949 2.564949 193 +appear 1 78 2.564949 2.564949 210 +david 1 71 2.639057 2.639057 232 +syllabu 1 67 2.708050 2.708050 247 +taylor 1 63 2.772589 2.772589 287 +septemb 1 65 2.772589 2.772589 274 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +howev 1 41 3.218876 3.218876 422 +word 1 34 3.401197 3.401197 508 +abl 1 30 3.555348 3.555348 566 +common 1 30 3.555348 3.555348 574 +background 1 25 3.737670 3.737670 664 +notic 1 25 3.737670 3.737670 675 +letter 1 16 4.174387 4.174387 981 +english 1 15 4.248495 4.248495 1033 +station 1 13 4.382027 4.382027 1157 +canada 1 13 4.382027 4.382027 1158 +cryptographi 1 9 4.753590 4.753590 1512 +french 1 9 4.753590 4.753590 1511 +recogn 1 5 5.347108 5.347108 2302 +basement 1 4 5.568345 5.568345 2663 +zuckerman 1 3 5.857933 5.857933 3205 +frequenc 1 3 5.857933 5.857933 3206 +huiqun 1 2 6.263398 6.263398 4200 +hqliu 1 2 6.263398 6.263398 4199 +drastic 1 2 6.263398 6.263398 4201 +ciphertext 1 1 6.957497 6.957497 6968 +digram 1 1 6.957497 6.957497 6969 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..330d7ac8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,315 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +area 1 144 1.945910 1.945910 80 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +real 1 93 2.397895 2.397895 144 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +build 1 85 2.484907 2.484907 184 +wide 1 84 2.484907 2.484907 185 +academ 1 82 2.484907 2.484907 178 +start 1 83 2.484907 2.484907 173 +contain 1 81 2.484907 2.484907 174 +librari 1 87 2.484907 2.484907 181 +mondai 1 77 2.564949 2.564949 206 +come 1 78 2.564949 2.564949 202 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +logic 1 71 2.639057 2.639057 230 +servic 1 72 2.639057 2.639057 236 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +effici 1 73 2.639057 2.639057 233 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +taylor 1 63 2.772589 2.772589 287 +wednesdai 1 64 2.772589 2.772589 261 +abstract 1 62 2.772589 2.772589 276 +written 1 63 2.772589 2.772589 278 +improv 1 62 2.772589 2.772589 289 +visit 1 63 2.772589 2.772589 288 +locat 1 59 2.833213 2.833213 303 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +publish 1 57 2.890372 2.890372 326 +point 1 58 2.890372 2.890372 319 +special 1 56 2.890372 2.890372 320 +suggest 1 53 2.944439 2.944439 331 +instruct 1 53 2.944439 2.944439 332 +cover 1 55 2.944439 2.944439 329 +allow 1 53 2.944439 2.944439 333 +three 1 54 2.944439 2.944439 330 +case 1 51 2.995732 2.995732 351 +week 1 52 2.995732 2.995732 343 +particular 1 51 2.995732 2.995732 352 +set 1 50 3.044522 3.044522 361 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +electron 1 47 3.091042 3.091042 379 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +protocol 1 45 3.135494 3.135494 407 +discuss 1 45 3.135494 3.135494 399 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +show 1 43 3.178054 3.178054 417 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +howev 1 41 3.218876 3.218876 422 +author 1 39 3.258097 3.258097 450 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +expect 1 37 3.332205 3.332205 484 +cost 1 37 3.332205 3.332205 480 +feel 1 37 3.332205 3.332205 483 +ofth 1 36 3.367296 3.367296 491 +global 1 34 3.401197 3.401197 520 +singl 1 34 3.401197 3.401197 510 +survei 1 35 3.401197 3.401197 513 +given 1 32 3.465736 3.465736 538 +collabor 1 32 3.465736 3.465736 543 +idea 1 32 3.465736 3.465736 545 +someth 1 31 3.496508 3.496508 554 +often 1 31 3.496508 3.496508 551 +robert 1 30 3.555348 3.555348 567 +secur 1 30 3.555348 3.555348 577 +option 1 30 3.555348 3.555348 575 +produc 1 30 3.555348 3.555348 572 +synchron 1 29 3.583519 3.583519 588 +depend 1 29 3.583519 3.583519 583 +consid 1 29 3.583519 3.583519 590 +ask 1 28 3.610918 3.610918 597 +team 1 27 3.637586 3.637586 625 +detect 1 26 3.688879 3.688879 646 +consist 1 26 3.688879 3.688879 651 +effort 1 26 3.688879 3.688879 652 +primari 1 25 3.737670 3.737670 669 +demonstr 1 24 3.761200 3.761200 694 +size 1 23 3.806662 3.806662 713 +proof 1 23 3.806662 3.806662 720 +properti 1 22 3.850148 3.850148 749 +newsgroup 1 21 3.912023 3.912023 783 +kernel 1 20 3.951244 3.951244 825 +prepar 1 20 3.951244 3.951244 824 +prove 1 19 4.007333 4.007333 848 +log 1 19 4.007333 4.007333 857 +assum 1 19 4.007333 4.007333 845 +appropri 1 18 4.060443 4.060443 883 +encourag 1 18 4.060443 4.060443 880 +moor 1 17 4.110874 4.110874 936 +whether 1 17 4.110874 4.110874 918 +previous 1 17 4.110874 4.110874 923 +otherwis 1 17 4.110874 4.110874 922 +monitor 1 17 4.110874 4.110874 941 +weslei 1 16 4.174387 4.174387 983 +vector 1 16 4.174387 4.174387 961 +precis 1 15 4.248495 4.248495 1023 +weak 1 13 4.382027 4.382027 1159 +cannot 1 13 4.382027 4.382027 1144 +deriv 1 13 4.382027 4.382027 1145 +central 1 13 4.382027 4.382027 1160 +addison 1 12 4.465908 4.465908 1230 +replic 1 12 4.465908 4.465908 1231 +skill 1 12 4.465908 4.465908 1205 +asynchron 1 12 4.465908 4.465908 1229 +onth 1 12 4.465908 4.465908 1218 +clock 1 11 4.553877 4.553877 1320 +imposs 1 9 4.753590 4.753590 1513 +clear 1 9 4.753590 4.753590 1488 +assumpt 1 9 4.753590 4.753590 1514 +matter 1 8 4.875197 4.875197 1627 +told 1 8 4.875197 4.875197 1658 +ideal 1 8 4.875197 4.875197 1630 +encrypt 1 7 5.010635 5.010635 1835 +henc 1 7 5.010635 5.010635 1805 +guidelin 1 7 5.010635 5.010635 1832 +channel 1 7 5.010635 5.010635 1836 +predic 1 7 5.010635 5.010635 1806 +arrang 1 6 5.164786 5.164786 2023 +tobe 1 6 5.164786 5.164786 1995 +distributedsystem 1 6 5.164786 5.164786 2022 +causal 1 6 5.164786 5.164786 2024 +wrong 1 6 5.164786 5.164786 2025 +constitut 1 6 5.164786 5.164786 2026 +multicast 1 5 5.347108 5.347108 2305 +authent 1 5 5.347108 5.347108 2306 +colleagu 1 5 5.347108 5.347108 2304 +volunt 1 5 5.347108 5.347108 2307 +explicitli 1 5 5.347108 5.347108 2308 +snapshot 1 5 5.347108 5.347108 2303 +stabl 1 5 5.347108 5.347108 2309 +exchang 1 5 5.347108 5.347108 2310 +lorenzo 1 4 5.568345 5.568345 2588 +cut 1 4 5.568345 5.568345 2620 +disconnect 1 4 5.568345 5.568345 2664 +subsequ 1 4 5.568345 5.568345 2665 +accompani 1 4 5.568345 5.568345 2666 +unless 1 4 5.568345 5.568345 2607 +deadlock 1 4 5.568345 5.568345 2641 +rajeev 1 3 5.857933 5.857933 3152 +pertain 1 3 5.857933 5.857933 3208 +alvisi 1 3 5.857933 5.857933 3095 +agreement 1 3 5.857933 5.857933 3207 +commerc 1 3 5.857933 5.857933 3209 +credibl 1 3 5.857933 5.857933 3210 +violat 1 3 5.857933 5.857933 3211 +urg 1 3 5.857933 5.857933 3212 +agener 1 3 5.857933 5.857933 3213 +conceptu 1 3 5.857933 5.857933 3214 +joshi 1 2 6.263398 6.263398 4202 +requiredtextbook 1 2 6.263398 6.263398 4204 +checkpoint 1 2 6.263398 6.263398 4205 +byzantin 1 2 6.263398 6.263398 4203 +replica 1 2 6.263398 6.263398 4206 +towrit 1 2 6.263398 6.263398 4207 +algorithmi 1 2 6.263398 6.263398 4208 +moreeffici 1 2 6.263398 6.263398 4209 +simpler 1 2 6.263398 6.263398 4210 +ispr 1 1 6.957497 6.957497 6971 +alvisiteach 1 1 6.957497 6.957497 6972 +joshicont 1 1 6.957497 6.957497 6973 +stafflorenzo 1 1 6.957497 6.957497 6974 +mechanicsi 1 1 6.957497 6.957497 6975 +remaind 1 1 6.957497 6.957497 6976 +classat 1 1 6.957497 6.957497 6977 +isutexa 1 1 6.957497 6.957497 6978 +mullend 1 1 6.957497 6.957497 6979 +acmpress 1 1 6.957497 6.957497 6980 +contentc 1 1 6.957497 6.957497 6981 +tomorrow 1 1 6.957497 6.957497 6982 +messagedeliveri 1 1 6.957497 6.957497 6983 +backupapproach 1 1 6.957497 6.957497 6984 +thepresent 1 1 6.957497 6.957497 6985 +exemplifi 1 1 6.957497 6.957497 6986 +principleshav 1 1 6.957497 6.957497 6987 +meor 1 1 6.957497 6.957497 6988 +apresent 1 1 6.957497 6.957497 6989 +networksgradingther 1 1 6.957497 6.957497 6990 +begrad 1 1 6.957497 6.957497 6991 +onbehalf 1 1 6.957497 6.957497 6992 +willrec 1 1 6.957497 6.957497 6993 +ispermit 1 1 6.957497 6.957497 6994 +acollabor 1 1 6.957497 6.957497 6995 +forgrad 1 1 6.957497 6.957497 6996 +collaborationswil 1 1 6.957497 6.957497 6997 +nocollabor 1 1 6.957497 6.957497 6998 +issuesthat 1 1 6.957497 6.957497 6999 +bedistribut 1 1 6.957497 6.957497 7000 +tocomplet 1 1 6.957497 6.957497 7001 +twolectur 1 1 6.957497 6.957497 7002 +choosethi 1 1 6.957497 6.957497 7003 +asingl 1 1 6.957497 6.957497 7004 +warmli 1 1 6.957497 6.957497 7005 +toconsid 1 1 6.957497 6.957497 7006 +excellentopportun 1 1 6.957497 6.957497 7007 +setsin 1 1 6.957497 6.957497 7008 +shouldconform 1 1 6.957497 6.957497 7009 +synonym 1 1 6.957497 6.957497 7010 +isrequir 1 1 6.957497 6.957497 7011 +thatmak 1 1 6.957497 6.957497 7012 +insuffici 1 1 6.957497 6.957497 7013 +ofcorrect 1 1 6.957497 6.957497 7014 +thetextbook 1 1 6.957497 6.957497 7015 +fifo 1 1 6.957497 6.957497 6970 +asnapshot 1 1 6.957497 6.957497 7016 +theprotocol 1 1 6.957497 6.957497 7017 +atmost 1 1 6.957497 6.957497 7018 +mattern 1 1 6.957497 6.957497 7019 +thatcontain 1 1 6.957497 6.957497 7020 +resist 1 1 6.957497 6.957497 7021 +thetempt 1 1 6.957497 6.957497 7022 +monitorprocess 1 1 6.957497 6.957497 7023 +basedsnapshot 1 1 6.957497 6.957497 7024 +nowonlin 1 1 6.957497 6.957497 7025 +filedescrib 1 1 6.957497 6.957497 7026 +examth 1 1 6.957497 6.957497 7027 +fridaymai 1 1 6.957497 6.957497 7028 +thepostscript 1 1 6.957497 6.957497 7029 +freeto 1 1 6.957497 6.957497 7030 +yoursuggest 1 1 6.957497 6.957497 7031 +edurajeev 1 1 6.957497 6.957497 7032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..0a31d6a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +note 1 142 1.945910 1.945910 67 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +version 1 113 2.197225 2.197225 122 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +technic 1 100 2.302585 2.302585 140 +homepag 1 93 2.397895 2.397895 148 +solut 1 82 2.484907 2.484907 162 +requir 1 81 2.484907 2.484907 167 +homework 1 79 2.564949 2.564949 193 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +differ 1 66 2.708050 2.708050 253 +practic 1 70 2.708050 2.708050 246 +syllabu 1 67 2.708050 2.708050 247 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +electron 1 47 3.091042 3.091042 379 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +taught 1 33 3.433987 3.433987 526 +chapter 1 32 3.465736 3.465736 536 +turn 1 29 3.583519 3.583519 586 +adam 1 17 4.110874 4.110874 934 +reflect 1 15 4.248495 4.248495 1034 +overhead 1 15 4.248495 4.248495 1035 +correspond 1 10 4.653960 4.653960 1382 +informationabout 1 9 4.753590 4.753590 1515 +scope 1 5 5.347108 5.347108 2296 +jacob 1 4 5.568345 5.568345 2667 +kornerup 1 3 5.857933 5.857933 3215 +bywil 1 1 6.957497 6.957497 7033 +linea 1 1 6.957497 6.957497 7034 +inhomework 1 1 6.957497 6.957497 7035 +crude 1 1 6.957497 6.957497 7036 +newsgrouputexa 1 1 6.957497 6.957497 7037 +takesplac 1 1 6.957497 6.957497 7038 +pascalprogramm 1 1 6.957497 6.957497 7039 +viewinginform 1 1 6.957497 6.957497 7040 +projecthow 1 1 6.957497 6.957497 7041 +examand 1 1 6.957497 6.957497 7042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..ad0e4bda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +text 1 98 2.302585 2.302585 133 +advanc 1 99 2.302585 2.302585 130 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +librari 1 87 2.484907 2.484907 181 +solut 1 82 2.484907 2.484907 162 +internet 1 83 2.484907 2.484907 186 +institut 1 84 2.484907 2.484907 187 +info 1 85 2.484907 2.484907 176 +orient 1 80 2.564949 2.564949 205 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +refer 1 78 2.564949 2.564949 203 +know 1 80 2.564949 2.564949 198 +server 1 76 2.564949 2.564949 204 +good 1 77 2.564949 2.564949 200 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +summari 1 73 2.639057 2.639057 237 +java 1 70 2.708050 2.708050 248 +wednesdai 1 64 2.772589 2.772589 261 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +think 1 57 2.890372 2.890372 314 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +give 1 50 3.044522 3.044522 359 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +discuss 1 45 3.135494 3.135494 399 +offer 1 43 3.178054 3.178054 414 +edit 1 42 3.218876 3.218876 418 +programm 1 39 3.258097 3.258097 445 +open 1 38 3.295837 3.295837 469 +manual 1 35 3.401197 3.401197 504 +tech 1 35 3.401197 3.401197 515 +eduoffic 1 33 3.433987 3.433987 531 +john 1 33 3.433987 3.433987 532 +product 1 33 3.433987 3.433987 527 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +challeng 1 26 3.688879 3.688879 653 +comp 1 26 3.688879 3.688879 650 +greg 1 24 3.761200 3.761200 695 +pattern 1 24 3.761200 3.761200 689 +fellow 1 24 3.761200 3.761200 701 +lab 1 24 3.761200 3.761200 698 +newsgroup 1 21 3.912023 3.912023 783 +annot 1 21 3.912023 3.912023 775 +applet 1 20 3.951244 3.951244 827 +element 1 18 4.060443 4.060443 895 +encourag 1 18 4.060443 4.060443 880 +event 1 18 4.060443 4.060443 896 +alreadi 1 16 4.174387 4.174387 963 +weslei 1 16 4.174387 4.174387 983 +style 1 15 4.248495 4.248495 1036 +pagec 1 15 4.248495 4.248495 1011 +rate 1 15 4.248495 4.248495 1037 +manner 1 14 4.317488 4.317488 1074 +draft 1 14 4.317488 4.317488 1085 +station 1 13 4.382027 4.382027 1157 +opportun 1 13 4.382027 4.382027 1161 +johnson 1 13 4.382027 4.382027 1162 +misc 1 13 4.382027 4.382027 1124 +captur 1 12 4.465908 4.465908 1232 +addison 1 12 4.465908 4.465908 1230 +evolut 1 11 4.553877 4.553877 1314 +denni 1 11 4.553877 4.553877 1321 +strongli 1 10 4.653960 4.653960 1406 +classmat 1 9 4.753590 4.753590 1516 +doug 1 9 4.753590 4.753590 1517 +virginia 1 8 4.875197 4.875197 1659 +irvin 1 8 4.875197 4.875197 1660 +bookstor 1 7 5.010635 5.010635 1837 +prentic 1 7 5.010635 5.010635 1838 +usenet 1 7 5.010635 5.010635 1839 +dead 1 7 5.010635 5.010635 1840 +forum 1 6 5.164786 5.164786 2027 +mirror 1 6 5.164786 5.164786 2028 +huge 1 6 5.164786 5.164786 1991 +appt 1 5 5.347108 5.347108 2312 +templat 1 5 5.347108 5.347108 2311 +lang 1 5 5.347108 5.347108 2294 +gokul 1 4 5.568345 5.568345 2668 +polymorph 1 4 5.568345 5.568345 2627 +wilei 1 4 5.568345 5.568345 2669 +lavend 1 3 5.857933 5.857933 3217 +cline 1 3 5.857933 5.857933 3218 +faq 1 3 5.857933 5.857933 3216 +gamma 1 3 5.857933 5.857933 3219 +hotjava 1 3 5.857933 5.857933 3220 +javascript 1 3 5.857933 5.857933 3221 +ansi 1 3 5.857933 5.857933 3198 +polytechn 1 3 5.857933 5.857933 3222 +jar 1 3 5.857933 5.857933 3223 +infocours 1 2 6.263398 6.263398 4212 +noteshomework 1 2 6.263398 6.263398 4102 +profici 1 2 6.263398 6.263398 4103 +coop 1 2 6.263398 6.263398 4213 +materiali 1 2 6.263398 6.263398 4214 +drawn 1 2 6.263398 6.263398 4215 +elli 1 2 6.263398 6.263398 4216 +helm 1 2 6.263398 6.263398 4217 +reusabl 1 2 6.263398 6.263398 4218 +setup 1 2 6.263398 6.263398 4211 +renssela 1 2 6.263398 6.263398 4148 +sourcesth 1 2 6.263398 6.263398 4219 +javasoft 1 2 6.263398 6.263398 4220 +gamelan 1 2 6.263398 6.263398 4221 +centr 1 2 6.263398 6.263398 4222 +descriptionc 1 1 6.957497 6.957497 7045 +programminglast 1 1 6.957497 6.957497 7046 +rajaram 1 1 6.957497 6.957497 7047 +lavendercours 1 1 6.957497 6.957497 7048 +syllabusannouncementslectur 1 1 6.957497 6.957497 7049 +solutionsprogram 1 1 6.957497 6.957497 7050 +assignmentsgnu 1 1 6.957497 6.957497 7051 +manualsstandard 1 1 6.957497 6.957497 7052 +codesocket 1 1 6.957497 6.957497 7053 +manualdescript 1 1 6.957497 6.957497 7054 +anintroductori 1 1 6.957497 6.957497 7055 +reusablepattern 1 1 6.957497 6.957497 7056 +typehierarchi 1 1 6.957497 6.957497 7057 +professionallyus 1 1 6.957497 6.957497 7058 +horstmann 1 1 6.957497 6.957497 7059 +stroustrup 1 1 6.957497 6.957497 7043 +cargil 1 1 6.957497 6.957497 7060 +lomow 1 1 6.957497 6.957497 7061 +coplien 1 1 6.957497 6.957497 7062 +idiom 1 1 6.957497 6.957497 7063 +plauger 1 1 6.957497 6.957497 7064 +vlissid 1 1 6.957497 6.957497 7065 +announcementsabout 1 1 6.957497 6.957497 7066 +linediscuss 1 1 6.957497 6.957497 7067 +lavendery 1 1 6.957497 6.957497 7068 +helpjava 1 1 6.957497 6.957497 7069 +advocaci 1 1 6.957497 6.957497 7070 +oopth 1 1 6.957497 6.957497 7071 +objectspac 1 1 6.957497 6.957497 7072 +libg 1 1 6.957497 6.957497 7044 +libstdc 1 1 6.957497 6.957497 7073 +mitgnu 1 1 6.957497 6.957497 7074 +cygnusgnu 1 1 6.957497 6.957497 7075 +ftpobject 1 1 6.957497 6.957497 7076 +developmentindex 1 1 6.957497 6.957497 7077 +librariesth 1 1 6.957497 6.957497 7078 +libraryindex 1 1 6.957497 6.957497 7079 +talig 1 1 6.957497 6.957497 7080 +frameworkjava 1 1 6.957497 6.957497 7081 +registri 1 1 6.957497 6.957497 7082 +espresso 1 1 6.957497 6.957497 7083 +kafura 1 1 6.957497 6.957497 7084 +techdoug 1 1 6.957497 6.957497 7085 +schmidt 1 1 6.957497 6.957497 7086 +universitydoug 1 1 6.957497 6.957497 7087 +sunyintroductori 1 1 6.957497 6.957497 7088 +groningen 1 1 6.957497 6.957497 7089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..03b57b7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +decemb 1 80 2.564949 2.564949 215 +tuesdai 1 73 2.639057 2.639057 219 +onlin 1 75 2.639057 2.639057 223 +thursdai 1 70 2.708050 2.708050 241 +practic 1 70 2.708050 2.708050 246 +taylor 1 63 2.772589 2.772589 287 +handout 1 64 2.772589 2.772589 263 +foundat 1 62 2.772589 2.772589 286 +share 1 59 2.833213 2.833213 304 +case 1 51 2.995732 2.995732 351 +adapt 1 46 3.091042 3.091042 387 +tutori 1 39 3.258097 3.258097 437 +copyright 1 36 3.367296 3.367296 495 +manual 1 35 3.401197 3.401197 504 +pass 1 28 3.610918 3.610918 611 +spent 1 25 3.737670 3.737670 676 +thread 1 23 3.806662 3.806662 722 +partit 1 16 4.174387 4.174387 984 +hello 1 10 4.653960 4.653960 1407 +calvin 1 9 4.753590 4.753590 1518 +compilersfal 1 2 6.263398 6.263398 4223 +tera 1 2 6.263398 6.263398 4224 +skeleton 1 2 6.263398 6.263398 4225 +ironman 1 2 6.263398 6.263398 4226 +logp 1 2 6.263398 6.263398 4227 +grid 1 2 6.263398 6.263398 4228 +compilerscst 1 1 6.957497 6.957497 7090 +posix 1 1 6.957497 6.957497 7091 +hierarchieslast 1 1 6.957497 6.957497 7092 +linlin 1 1 6.957497 6.957497 7093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..2d9e2b4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +read 1 154 1.791759 1.791759 47 +assign 1 135 1.945910 1.945910 66 +send 1 114 2.197225 2.197225 109 +handout 1 64 2.772589 2.772589 263 +prof 1 64 2.772589 2.772589 273 +newsgroup 1 21 3.912023 3.912023 783 +csintroduct 1 1 6.957497 6.957497 7094 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..1a105497 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +topic 1 114 2.197225 2.197225 110 +thot 1 1 6.957497 6.957497 7095 +systemsfil 1 1 6.957497 6.957497 7096 +systemstopolog 1 1 6.957497 6.957497 7097 +systemselectron 1 1 6.957497 6.957497 7098 +commenrcefailur 1 1 6.957497 6.957497 7099 +detectorsdistribut 1 1 6.957497 6.957497 7100 +objectsconsistencysecuregroup 1 1 6.957497 6.957497 7101 +communicationlanguag 1 1 6.957497 6.957497 7102 +dsmmobil 1 1 6.957497 6.957497 7103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..3139a014 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +list 1 201 1.609438 1.609438 39 +databas 1 122 2.079442 2.079442 86 +materi 1 75 2.639057 2.639057 221 +term 1 43 3.178054 3.178054 411 +mine 1 26 3.688879 3.688879 654 +monitor 1 17 4.110874 4.110874 941 +daniel 1 12 4.465908 4.465908 1233 +databasesprof 1 1 6.957497 6.957497 7104 +mirankernew 1 1 6.957497 6.957497 7105 +seminarschedul 1 1 6.957497 6.957497 7106 +overviewtentativeread 1 1 6.957497 6.957497 7107 +homeworkproject 1 1 6.957497 6.957497 7108 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..4b492a28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +intellig 1 72 2.639057 2.639057 225 +syllabu 1 67 2.708050 2.708050 247 +artifici 1 63 2.772589 2.772589 280 +taylor 1 63 2.772589 2.772589 287 +trace 1 25 3.737670 3.737670 677 +tuth 1 9 4.753590 4.753590 1519 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +mooneytim 1 2 6.263398 6.263398 4229 +sheetand 1 2 6.263398 6.263398 4230 +placespr 1 1 6.957497 6.957497 7109 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..6f57d86b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +info 1 85 2.484907 2.484907 176 +homework 1 79 2.564949 2.564949 193 +intellig 1 72 2.639057 2.639057 225 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +artifici 1 63 2.772589 2.772589 280 +case 1 51 2.995732 2.995732 351 +common 1 30 3.555348 3.555348 574 +symbol 1 27 3.637586 3.637586 620 +trace 1 25 3.737670 3.737670 677 +lisp 1 18 4.060443 4.060443 897 +sheet 1 16 4.174387 4.174387 973 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +allegro 1 5 5.347108 5.347108 2314 +sowmya 1 4 5.568345 5.568345 2670 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +placetu 1 2 6.263398 6.263398 4231 +informationon 1 2 6.263398 6.263398 4232 +mooneyteach 1 1 6.957497 6.957497 7110 +ramachandrantim 1 1 6.957497 6.957497 7111 +alsout 1 1 6.957497 6.957497 7112 +textparadigm 1 1 6.957497 6.957497 7113 +lispassignmentsse 1 1 6.957497 6.957497 7114 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..00956099 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +file 1 132 1.945910 1.945910 70 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +instructor 1 108 2.197225 2.197225 107 +theori 1 111 2.197225 2.197225 127 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +learn 1 86 2.484907 2.484907 170 +homework 1 79 2.564949 2.564949 193 +logic 1 71 2.639057 2.639057 230 +syllabu 1 67 2.708050 2.708050 247 +order 1 69 2.708050 2.708050 249 +evalu 1 64 2.772589 2.772589 266 +suggest 1 53 2.944439 2.944439 331 +talk 1 53 2.944439 2.944439 336 +format 1 48 3.044522 3.044522 356 +slide 1 38 3.295837 3.295837 467 +tree 1 36 3.367296 3.367296 492 +concept 1 32 3.465736 3.465736 537 +neural 1 30 3.555348 3.555348 578 +cluster 1 28 3.610918 3.610918 612 +experiment 1 26 3.688879 3.688879 645 +rule 1 26 3.688879 3.688879 638 +trace 1 25 3.737670 3.737670 677 +decis 1 23 3.806662 3.806662 728 +outlin 1 17 4.110874 4.110874 914 +explan 1 16 4.174387 4.174387 985 +induct 1 11 4.553877 4.553877 1304 +instanc 1 11 4.553877 4.553877 1322 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +bayesian 1 4 5.568345 5.568345 2671 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +mooneytim 1 2 6.263398 6.263398 4229 +placetu 1 2 6.263398 6.263398 4231 +sheetand 1 2 6.263398 6.263398 4230 +unsupervis 1 2 6.263398 6.263398 4233 +textmachinelearninglectur 1 1 6.957497 6.957497 7115 +learningassignmentsse 1 1 6.957497 6.957497 7116 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..525e4f3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +list 1 201 1.609438 1.609438 39 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +year 1 148 1.945910 1.945910 84 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +school 1 84 2.484907 2.484907 188 +chang 1 82 2.484907 2.484907 163 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +syllabu 1 67 2.708050 2.708050 247 +foundat 1 62 2.772589 2.772589 286 +previou 1 62 2.772589 2.772589 290 +guid 1 63 2.772589 2.772589 267 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +run 1 51 2.995732 2.995732 347 +basic 1 50 3.044522 3.044522 360 +move 1 47 3.091042 3.091042 382 +math 1 44 3.135494 3.135494 402 +directori 1 45 3.135494 3.135494 396 +next 1 34 3.401197 3.401197 517 +least 1 35 3.401197 3.401197 516 +concept 1 32 3.465736 3.465736 537 +express 1 32 3.465736 3.465736 540 +taken 1 31 3.496508 3.496508 555 +hard 1 30 3.555348 3.555348 563 +intend 1 28 3.610918 3.610918 599 +symbol 1 27 3.637586 3.637586 620 +never 1 25 3.737670 3.737670 671 +background 1 25 3.737670 3.737670 664 +instead 1 22 3.850148 3.850148 756 +scheme 1 20 3.951244 3.951244 818 +thur 1 19 4.007333 4.007333 847 +lisp 1 18 4.060443 4.060443 897 +hopefulli 1 14 4.317488 4.317488 1071 +draw 1 14 4.317488 4.317488 1086 +faster 1 11 4.553877 4.553877 1323 +surf 1 11 4.553877 4.553877 1301 +submiss 1 11 4.553877 4.553877 1298 +novak 1 9 4.753590 4.753590 1521 +strong 1 6 5.164786 5.164786 2029 +syntax 1 6 5.164786 5.164786 2030 +snow 1 6 5.164786 5.164786 2031 +gordon 1 6 5.164786 5.164786 2032 +assignmentsprogram 1 6 5.164786 5.164786 2019 +willb 1 5 5.347108 5.347108 2277 +porter 1 5 5.347108 5.347108 2293 +emphas 1 4 5.568345 5.568345 2672 +dialect 1 3 5.857933 5.857933 3226 +gambit 1 3 5.857933 5.857933 3227 +macintoshcomput 1 3 5.857933 5.857933 3228 +treasur 1 3 5.857933 5.857933 3229 +gradingmidterm 1 3 5.857933 5.857933 3230 +guidefin 1 3 5.857933 5.857933 3231 +peano 1 2 6.263398 6.263398 4234 +turtl 1 2 6.263398 6.263398 4235 +plot 1 2 6.263398 6.263398 4236 +sciencec 1 1 6.957497 6.957497 7117 +atleast 1 1 6.957497 6.957497 7119 +programmingcours 1 1 6.957497 6.957497 7120 +precalculu 1 1 6.957497 6.957497 7121 +theschem 1 1 6.957497 6.957497 7122 +coursesand 1 1 6.957497 6.957497 7123 +learninga 1 1 6.957497 6.957497 7124 +tutorcopi 1 1 6.957497 6.957497 7125 +pcassign 1 1 6.957497 6.957497 7126 +simulationassign 1 1 6.957497 6.957497 7127 +webassign 1 1 6.957497 6.957497 7128 +schemeassign 1 1 6.957497 6.957497 7129 +gamblingassign 1 1 6.957497 6.957497 7130 +graphicsassign 1 1 6.957497 6.957497 7131 +treesassign 1 1 6.957497 6.957497 7118 +manipulationstudi 1 1 6.957497 6.957497 7132 +vocabulari 1 1 6.957497 6.957497 7133 +thickensassign 1 1 6.957497 6.957497 7134 +huntassign 1 1 6.957497 6.957497 7135 +algebraassign 1 1 6.957497 6.957497 7136 +matricesstudi 1 1 6.957497 6.957497 7137 +unparsingassign 1 1 6.957497 6.957497 7138 +translationstudi 1 1 6.957497 6.957497 7139 +descriptionsprogram 1 1 6.957497 6.957497 7140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..d1ff159a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +take 1 97 2.302585 2.302585 134 +exam 1 86 2.484907 2.484907 169 +server 1 76 2.564949 2.564949 204 +write 1 72 2.639057 2.639057 222 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +summer 1 56 2.890372 2.890372 311 +cover 1 55 2.944439 2.944439 329 +processor 1 54 2.944439 2.944439 335 +week 1 52 2.995732 2.995732 343 +directori 1 45 3.135494 3.135494 396 +live 1 40 3.258097 3.258097 451 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +programminglanguag 1 21 3.912023 3.912023 782 +chip 1 21 3.912023 3.912023 770 +five 1 19 4.007333 4.007333 841 +incorpor 1 13 4.382027 4.382027 1163 +pascal 1 12 4.465908 4.465908 1213 +workload 1 12 4.465908 4.465908 1210 +submiss 1 11 4.553877 4.553877 1298 +novak 1 9 4.753590 4.753590 1521 +heavi 1 7 5.010635 5.010635 1841 +assignmentsprogram 1 6 5.164786 5.164786 2019 +gradingmidterm 1 3 5.857933 5.857933 3230 +guidefin 1 3 5.857933 5.857933 3231 +compilersc 1 2 6.263398 6.263398 4237 +powerpc 1 2 6.263398 6.263398 4238 +syllabusprogram 1 2 6.263398 6.263398 4239 +codei 1 1 6.957497 6.957497 7141 +dedicatetheir 1 1 6.957497 6.957497 7142 +guidegordon 1 1 6.957497 6.957497 7143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..762c65d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +exam 1 86 2.484907 2.484907 169 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +artifici 1 63 2.772589 2.772589 280 +major 1 56 2.890372 2.890372 315 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +survei 1 35 3.401197 3.401197 513 +represent 1 35 3.401197 3.401197 512 +bibliographi 1 34 3.401197 3.401197 518 +human 1 32 3.465736 3.465736 546 +defin 1 22 3.850148 3.850148 746 +behavior 1 18 4.060443 4.060443 881 +appropri 1 18 4.060443 4.060443 883 +attempt 1 17 4.110874 4.110874 917 +action 1 15 4.248495 4.248495 1038 +achiev 1 14 4.317488 4.317488 1088 +stori 1 14 4.317488 4.317488 1087 +calculu 1 12 4.465908 4.465908 1203 +thecomput 1 10 4.653960 4.653960 1408 +novak 1 9 4.753590 4.753590 1521 +brain 1 8 4.875197 4.875197 1638 +thegoal 1 6 5.164786 5.164786 2033 +assignmentsprogram 1 6 5.164786 5.164786 2019 +intelligencec 1 4 5.568345 5.568345 2673 +coverag 1 4 5.568345 5.568345 2656 +guidefin 1 3 5.857933 5.857933 3231 +actor 1 2 6.263398 6.263398 4240 +syllabusprogram 1 2 6.263398 6.263398 4239 +problemssolut 1 2 6.263398 6.263398 4241 +intelligenceartifici 1 1 6.957497 6.957497 7144 +todupl 1 1 6.957497 6.957497 7145 +connectspercept 1 1 6.957497 6.957497 7146 +andknowledg 1 1 6.957497 6.957497 7147 +withbrief 1 1 6.957497 6.957497 7148 +descriptionsmidterm 1 1 6.957497 6.957497 7149 +guidepred 1 1 6.957497 6.957497 7150 +problemsnot 1 1 6.957497 6.957497 7151 +braingordon 1 1 6.957497 6.957497 7152 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..275462c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +compil 1 122 2.079442 2.079442 96 +specif 1 106 2.197225 2.197225 106 +present 1 91 2.397895 2.397895 145 +graphic 1 90 2.397895 2.397895 147 +level 1 87 2.484907 2.484907 180 +homework 1 79 2.564949 2.564949 193 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +materi 1 75 2.639057 2.639057 221 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +sever 1 56 2.890372 2.890372 322 +cover 1 55 2.944439 2.944439 329 +done 1 47 3.091042 3.091042 381 +execut 1 45 3.135494 3.135494 404 +third 1 43 3.178054 3.178054 412 +long 1 43 3.178054 3.178054 413 +expect 1 37 3.332205 3.332205 484 +given 1 32 3.465736 3.465736 538 +kind 1 32 3.465736 3.465736 541 +consist 1 26 3.688879 3.688879 651 +higher 1 24 3.761200 3.761200 690 +literatur 1 11 4.553877 4.553877 1300 +latter 1 9 4.753590 4.753590 1522 +novak 1 9 4.753590 4.753590 1521 +programmingc 1 3 5.857933 5.857933 3232 +ordinari 1 3 5.857933 5.857933 3233 +programmingautomat 1 1 6.957497 6.957497 7153 +programsfrom 1 1 6.957497 6.957497 7154 +illustrateth 1 1 6.957497 6.957497 7155 +requirelearn 1 1 6.957497 6.957497 7156 +partof 1 1 6.957497 6.957497 7157 +syllabusbibliographyassign 1 1 6.957497 6.957497 7158 +handpattern 1 1 6.957497 6.957497 7159 +matchingobject 1 1 6.957497 6.957497 7160 +programmingintroduct 1 1 6.957497 6.957497 7161 +glispview 1 1 6.957497 6.957497 7162 +programminggordon 1 1 6.957497 6.957497 7163 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..61bce5a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,676 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +lectur 1 135 1.945910 1.945910 73 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +introduct 1 126 2.079442 2.079442 87 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +structur 1 106 2.197225 2.197225 105 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +specif 1 106 2.197225 2.197225 106 +site 1 106 2.197225 2.197225 119 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +librari 1 87 2.484907 2.484907 181 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +chang 1 82 2.484907 2.484907 163 +requir 1 81 2.484907 2.484907 167 +solut 1 82 2.484907 2.484907 162 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +build 1 85 2.484907 2.484907 184 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +thing 1 84 2.484907 2.484907 189 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +messag 1 76 2.564949 2.564949 212 +issu 1 78 2.564949 2.564949 211 +dynam 1 76 2.564949 2.564949 194 +state 1 76 2.564949 2.564949 207 +method 1 80 2.564949 2.564949 213 +interfac 1 79 2.564949 2.564949 209 +optim 1 79 2.564949 2.564949 197 +sourc 1 77 2.564949 2.564949 201 +orient 1 80 2.564949 2.564949 205 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +free 1 73 2.639057 2.639057 224 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +thursdai 1 70 2.708050 2.708050 241 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +differ 1 66 2.708050 2.708050 253 +view 1 70 2.708050 2.708050 254 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +main 1 67 2.708050 2.708050 256 +taylor 1 63 2.772589 2.772589 287 +evalu 1 64 2.772589 2.772589 266 +organ 1 65 2.772589 2.772589 265 +abstract 1 62 2.772589 2.772589 276 +foundat 1 62 2.772589 2.772589 286 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +virtual 1 62 2.772589 2.772589 285 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +artifici 1 63 2.772589 2.772589 280 +previou 1 62 2.772589 2.772589 290 +written 1 63 2.772589 2.772589 278 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +sever 1 56 2.890372 2.890372 322 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +point 1 58 2.890372 2.890372 319 +reason 1 57 2.890372 2.890372 318 +direct 1 57 2.890372 2.890372 316 +major 1 56 2.890372 2.890372 315 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +allow 1 53 2.944439 2.944439 333 +local 1 55 2.944439 2.944439 334 +instruct 1 53 2.944439 2.944439 332 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +extens 1 53 2.944439 2.944439 340 +run 1 51 2.995732 2.995732 347 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +principl 1 48 3.044522 3.044522 357 +give 1 50 3.044522 3.044522 359 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +standard 1 48 3.044522 3.044522 365 +adapt 1 46 3.091042 3.091042 387 +move 1 47 3.091042 3.091042 382 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +natur 1 44 3.135494 3.135494 406 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +discuss 1 45 3.135494 3.135494 399 +offer 1 43 3.178054 3.178054 414 +fast 1 42 3.218876 3.218876 429 +autom 1 41 3.218876 3.218876 434 +past 1 42 3.218876 3.218876 428 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +must 1 40 3.258097 3.258097 442 +programm 1 39 3.258097 3.258097 445 +continu 1 39 3.258097 3.258097 448 +map 1 39 3.258097 3.258097 452 +form 1 39 3.258097 3.258097 443 +theoret 1 39 3.258097 3.258097 446 +seminar 1 38 3.295837 3.295837 470 +credit 1 38 3.295837 3.295837 460 +paul 1 38 3.295837 3.295837 471 +open 1 38 3.295837 3.295837 469 +close 1 38 3.295837 3.295837 465 +feel 1 37 3.332205 3.332205 483 +hand 1 37 3.332205 3.332205 475 +cost 1 37 3.332205 3.332205 480 +workstat 1 37 3.332205 3.332205 479 +mean 1 37 3.332205 3.332205 477 +formal 1 37 3.332205 3.332205 478 +robot 1 36 3.367296 3.367296 497 +game 1 36 3.367296 3.367296 498 +procedur 1 36 3.367296 3.367296 488 +least 1 35 3.401197 3.401197 516 +singl 1 34 3.401197 3.401197 510 +represent 1 35 3.401197 3.401197 512 +random 1 34 3.401197 3.401197 511 +toler 1 33 3.433987 3.433987 533 +within 1 33 3.433987 3.433987 525 +queri 1 33 3.433987 3.433987 524 +obtain 1 33 3.433987 3.433987 534 +fault 1 32 3.465736 3.465736 547 +human 1 32 3.465736 3.465736 546 +given 1 32 3.465736 3.465736 538 +idea 1 32 3.465736 3.465736 545 +ad 1 32 3.465736 3.465736 544 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +robert 1 30 3.555348 3.555348 567 +neural 1 30 3.555348 3.555348 578 +power 1 30 3.555348 3.555348 573 +graph 1 30 3.555348 3.555348 576 +produc 1 30 3.555348 3.555348 572 +abl 1 30 3.555348 3.555348 566 +domain 1 30 3.555348 3.555348 564 +rang 1 30 3.555348 3.555348 565 +common 1 30 3.555348 3.555348 574 +compon 1 30 3.555348 3.555348 570 +focu 1 30 3.555348 3.555348 571 +semant 1 29 3.583519 3.583519 587 +depend 1 29 3.583519 3.583519 583 +limit 1 29 3.583519 3.583519 585 +turn 1 29 3.583519 3.583519 586 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +consid 1 29 3.583519 3.583519 590 +cluster 1 28 3.610918 3.610918 612 +scale 1 28 3.610918 3.610918 613 +propos 1 28 3.610918 3.610918 602 +usual 1 28 3.610918 3.610918 608 +framework 1 28 3.610918 3.610918 606 +enabl 1 26 3.688879 3.688879 655 +detect 1 26 3.688879 3.688879 646 +request 1 26 3.688879 3.688879 635 +challeng 1 26 3.688879 3.688879 653 +consist 1 26 3.688879 3.688879 651 +effort 1 26 3.688879 3.688879 652 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +task 1 25 3.737670 3.737670 678 +client 1 25 3.737670 3.737670 679 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +mike 1 24 3.761200 3.761200 703 +greg 1 24 3.761200 3.761200 695 +lab 1 24 3.761200 3.761200 698 +known 1 24 3.761200 3.761200 702 +higher 1 24 3.761200 3.761200 690 +honor 1 23 3.806662 3.806662 729 +decis 1 23 3.806662 3.806662 728 +highli 1 23 3.806662 3.806662 725 +hierarchi 1 22 3.850148 3.850148 744 +sequenti 1 22 3.850148 3.850148 745 +cooper 1 22 3.850148 3.850148 757 +emphasi 1 22 3.850148 3.850148 755 +instead 1 22 3.850148 3.850148 756 +inth 1 22 3.850148 3.850148 741 +serv 1 22 3.850148 3.850148 758 +deal 1 22 3.850148 3.850148 736 +reduc 1 22 3.850148 3.850148 759 +busi 1 21 3.912023 3.912023 784 +among 1 21 3.912023 3.912023 781 +path 1 21 3.912023 3.912023 778 +fact 1 21 3.912023 3.912023 780 +output 1 21 3.912023 3.912023 788 +basi 1 20 3.951244 3.951244 828 +increas 1 20 3.951244 3.951244 829 +facil 1 20 3.951244 3.951244 814 +portabl 1 20 3.951244 3.951244 819 +runtim 1 19 4.007333 4.007333 858 +separ 1 19 4.007333 4.007333 844 +attend 1 18 4.060443 4.060443 893 +failur 1 18 4.060443 4.060443 898 +encourag 1 18 4.060443 4.060443 880 +seem 1 18 4.060443 4.060443 899 +appropri 1 18 4.060443 4.060443 883 +stop 1 17 4.110874 4.110874 942 +ultim 1 17 4.110874 4.110874 943 +previous 1 17 4.110874 4.110874 923 +modif 1 17 4.110874 4.110874 913 +spatial 1 16 4.174387 4.174387 988 +cognit 1 16 4.174387 4.174387 986 +advantag 1 16 4.174387 4.174387 987 +spars 1 16 4.174387 4.174387 989 +vector 1 16 4.174387 4.174387 961 +easi 1 16 4.174387 4.174387 969 +across 1 16 4.174387 4.174387 974 +action 1 15 4.248495 4.248495 1038 +novel 1 15 4.248495 4.248495 1039 +english 1 15 4.248495 4.248495 1033 +side 1 15 4.248495 4.248495 1022 +massiv 1 15 4.248495 4.248495 1026 +reflect 1 15 4.248495 4.248495 1034 +topolog 1 14 4.317488 4.317488 1089 +heterogen 1 14 4.317488 4.317488 1090 +convent 1 14 4.317488 4.317488 1072 +translat 1 13 4.382027 4.382027 1164 +whose 1 13 4.382027 4.382027 1166 +discret 1 13 4.382027 4.382027 1165 +prolog 1 13 4.382027 4.382027 1155 +central 1 13 4.382027 4.382027 1160 +signific 1 13 4.382027 4.382027 1125 +difficulti 1 13 4.382027 4.382027 1132 +infrastructur 1 12 4.465908 4.465908 1234 +grow 1 12 4.465908 4.465908 1209 +onth 1 12 4.465908 4.465908 1218 +amount 1 12 4.465908 4.465908 1208 +evolv 1 12 4.465908 4.465908 1223 +promot 1 12 4.465908 4.465908 1235 +buffer 1 12 4.465908 4.465908 1211 +deduct 1 12 4.465908 4.465908 1236 +minor 1 12 4.465908 4.465908 1237 +evolut 1 11 4.553877 4.553877 1314 +vladimir 1 11 4.553877 4.553877 1324 +transpar 1 11 4.553877 4.553877 1325 +multithread 1 11 4.553877 4.553877 1315 +node 1 11 4.553877 4.553877 1326 +broad 1 11 4.553877 4.553877 1302 +induct 1 11 4.553877 4.553877 1304 +fix 1 11 4.553877 4.553877 1327 +noth 1 11 4.553877 4.553877 1328 +guarante 1 10 4.653960 4.653960 1391 +reli 1 10 4.653960 4.653960 1411 +facilit 1 10 4.653960 4.653960 1412 +genet 1 10 4.653960 4.653960 1409 +length 1 10 4.653960 4.653960 1400 +sentenc 1 10 4.653960 4.653960 1413 +custom 1 10 4.653960 4.653960 1414 +underli 1 10 4.653960 4.653960 1410 +certain 1 10 4.653960 4.653960 1393 +tradit 1 10 4.653960 4.653960 1404 +modular 1 10 4.653960 4.653960 1392 +risto 1 9 4.753590 4.753590 1523 +calvin 1 9 4.753590 4.753590 1518 +sister 1 9 4.753590 4.753590 1524 +explicit 1 9 4.753590 4.753590 1525 +declar 1 9 4.753590 4.753590 1526 +compos 1 9 4.753590 4.753590 1527 +meta 1 9 4.753590 4.753590 1505 +novak 1 9 4.753590 4.753590 1521 +assumpt 1 9 4.753590 4.753590 1514 +notat 1 9 4.753590 4.753590 1489 +significantli 1 9 4.753590 4.753590 1508 +fail 1 8 4.875197 4.875197 1655 +reus 1 8 4.875197 4.875197 1661 +paradigm 1 8 4.875197 4.875197 1662 +simpli 1 8 4.875197 4.875197 1626 +isol 1 8 4.875197 4.875197 1663 +claim 1 8 4.875197 4.875197 1664 +realist 1 8 4.875197 4.875197 1665 +analys 1 8 4.875197 4.875197 1666 +beyond 1 7 5.010635 5.010635 1834 +trend 1 7 5.010635 5.010635 1842 +dedic 1 7 5.010635 5.010635 1843 +metric 1 7 5.010635 5.010635 1831 +usabl 1 7 5.010635 5.010635 1810 +converg 1 7 5.010635 5.010635 1844 +uniform 1 7 5.010635 5.010635 1845 +gordon 1 6 5.164786 5.164786 2032 +academia 1 6 5.164786 5.164786 2036 +promis 1 6 5.164786 5.164786 2037 +pace 1 6 5.164786 5.164786 2011 +emerg 1 6 5.164786 5.164786 2038 +causal 1 6 5.164786 5.164786 2024 +prefetch 1 6 5.164786 5.164786 2039 +infer 1 6 5.164786 5.164786 2040 +conflict 1 6 5.164786 5.164786 2041 +constrain 1 6 5.164786 5.164786 2042 +determinist 1 6 5.164786 5.164786 2034 +difficult 1 6 5.164786 5.164786 2035 +variant 1 6 5.164786 5.164786 2043 +affect 1 6 5.164786 5.164786 2044 +revolut 1 5 5.347108 5.347108 2315 +ofdistribut 1 5 5.347108 5.347108 2316 +scope 1 5 5.347108 5.347108 2296 +despit 1 5 5.347108 5.347108 2317 +unknown 1 5 5.347108 5.347108 2318 +stabl 1 5 5.347108 5.347108 2309 +distinct 1 5 5.347108 5.347108 2319 +corba 1 5 5.347108 5.347108 2320 +corpu 1 5 5.347108 5.347108 2282 +pars 1 5 5.347108 5.347108 2321 +fairli 1 5 5.347108 5.347108 2322 +ofinterest 1 5 5.347108 5.347108 2323 +blumoferdb 1 5 5.347108 5.347108 2324 +vijaya 1 4 5.568345 5.568345 2677 +lorenzo 1 4 5.568345 5.568345 2588 +expens 1 4 5.568345 5.568345 2678 +ofworkst 1 4 5.568345 5.568345 2679 +reinforc 1 4 5.568345 5.568345 2674 +illus 1 4 5.568345 5.568345 2603 +algorithmsand 1 4 5.568345 5.568345 2680 +resolv 1 4 5.568345 5.568345 2675 +havedevelop 1 4 5.568345 5.568345 2681 +clearli 1 4 5.568345 5.568345 2590 +asymptot 1 4 5.568345 5.568345 2676 +surprisingli 1 4 5.568345 5.568345 2609 +floyd 1 4 5.568345 5.568345 2682 +queu 1 4 5.568345 5.568345 2648 +blumof 1 3 5.857933 5.857933 3237 +citizen 1 3 5.857933 5.857933 3238 +dramat 1 3 5.857933 5.857933 3239 +lightweight 1 3 5.857933 5.857933 3234 +aggress 1 3 5.857933 5.857933 3240 +similarli 1 3 5.857933 5.857933 3241 +neighborhood 1 3 5.857933 5.857933 3242 +popul 1 3 5.857933 5.857933 3235 +intra 1 3 5.857933 5.857933 3243 +enumer 1 3 5.857933 5.857933 3244 +compliant 1 3 5.857933 5.857933 3245 +embodi 1 3 5.857933 5.857933 3236 +andsemant 1 3 5.857933 5.857933 3246 +parser 1 3 5.857933 5.857933 3141 +surpass 1 3 5.857933 5.857933 3247 +agener 1 3 5.857933 5.857933 3213 +theworld 1 3 5.857933 5.857933 3158 +conceptu 1 3 5.857933 5.857933 3214 +rivest 1 3 5.857933 5.857933 3248 +parallelalgorithm 1 3 5.857933 5.857933 3249 +rscheme 1 3 5.857933 5.857933 3250 +sciencefal 1 2 6.263398 6.263398 4246 +cilk 1 2 6.263398 6.263398 4242 +andresearch 1 2 6.263398 6.263398 4247 +government 1 2 6.263398 6.263398 4248 +andcollect 1 2 6.263398 6.263398 4249 +todramat 1 2 6.263398 6.263398 4250 +thedesign 1 2 6.263398 6.263398 4251 +har 1 2 6.263398 6.263398 4252 +undergo 1 2 6.263398 6.263398 4253 +applicationsto 1 2 6.263398 6.263398 4254 +offailur 1 2 6.263398 6.263398 4255 +idl 1 2 6.263398 6.263398 4256 +indistribut 1 2 6.263398 6.263398 4257 +andmap 1 2 6.263398 6.263398 4258 +equilibrium 1 2 6.263398 6.263398 4259 +trajectori 1 2 6.263398 6.263398 4260 +sufficientto 1 2 6.263398 6.263398 4261 +logicprogram 1 2 6.263398 6.263398 4262 +thesetechniqu 1 2 6.263398 6.263398 4263 +ofneur 1 2 6.263398 6.263398 4264 +neuro 1 2 6.263398 6.263398 4265 +resourcemanag 1 2 6.263398 6.263398 4266 +alamo 1 2 6.263398 6.263398 4243 +anobject 1 2 6.263398 6.263398 4267 +anticip 1 2 6.263398 6.263398 4268 +chill 1 2 6.263398 6.263398 4244 +corpora 1 2 6.263398 6.263398 4269 +foidl 1 2 6.263398 6.263398 4270 +andanalysi 1 2 6.263398 6.263398 4271 +straightforward 1 2 6.263398 6.263398 4272 +theform 1 2 6.263398 6.263398 4245 +lengthi 1 2 6.263398 6.263398 4273 +andform 1 2 6.263398 6.263398 4274 +succinctli 1 2 6.263398 6.263398 4275 +concret 1 2 6.263398 6.263398 4276 +analysisof 1 2 6.263398 6.263398 4277 +tarjan 1 2 6.263398 6.263398 4278 +maspar 1 2 6.263398 6.263398 4279 +workon 1 2 6.263398 6.263398 4280 +sciencecst 1 1 6.957497 6.957497 7173 +apass 1 1 6.957497 6.957497 7174 +beregist 1 1 6.957497 6.957497 7175 +schedulespeakertitleseptemb 1 1 6.957497 6.957497 7176 +mirankeralamo 1 1 6.957497 6.957497 7177 +warehouseseptemb 1 1 6.957497 6.957497 7178 +kuipersth 1 1 6.957497 6.957497 7179 +humanand 1 1 6.957497 6.957497 7180 +mapsseptemb 1 1 6.957497 6.957497 7181 +blumofecilk 1 1 6.957497 6.957497 7182 +reliableparallel 1 1 6.957497 6.957497 7183 +workstationsseptemb 1 1 6.957497 6.957497 7184 +miikkulainenlearn 1 1 6.957497 6.957497 7185 +throughsymbiot 1 1 6.957497 6.957497 7186 +networksoctob 1 1 6.957497 6.957497 7187 +lifschitzmathemat 1 1 6.957497 6.957497 7188 +programmingoctob 1 1 6.957497 6.957497 7166 +wilsonextens 1 1 6.957497 6.957497 7167 +reflectionoctob 1 1 6.957497 6.957497 7189 +mooneylearn 1 1 6.957497 6.957497 7190 +usinginduct 1 1 6.957497 6.957497 7191 +dahlindistribut 1 1 6.957497 6.957497 7192 +internetsnovemb 1 1 6.957497 6.957497 7193 +novaksoftwar 1 1 6.957497 6.957497 7194 +genericprocedur 1 1 6.957497 6.957497 7195 +viewsnovemb 1 1 6.957497 6.957497 7196 +ramachandranth 1 1 6.957497 6.957497 7168 +parallelalgorithmsnovemb 1 1 6.957497 6.957497 7197 +alvisilighweight 1 1 6.957497 6.957497 7198 +tolerancenovemb 1 1 6.957497 6.957497 7199 +linadapt 1 1 6.957497 6.957497 7200 +optimizationdecemb 1 1 6.957497 6.957497 7201 +plaxtonanalysi 1 1 6.957497 6.957497 7202 +algorithmslighweight 1 1 6.957497 6.957497 7203 +tolerancelorenzo 1 1 6.957497 6.957497 7204 +alvisidistribut 1 1 6.957497 6.957497 7205 +confin 1 1 6.957497 6.957497 7206 +revolution 1 1 6.957497 6.957497 7207 +beyondth 1 1 6.957497 6.957497 7208 +toleranttechniqu 1 1 6.957497 6.957497 7209 +willceas 1 1 6.957497 6.957497 7210 +exot 1 1 6.957497 6.957497 7211 +distributedinform 1 1 6.957497 6.957497 7212 +acompetit 1 1 6.957497 6.957497 7213 +criticalinform 1 1 6.957497 6.957497 7214 +engineerfault 1 1 6.957497 6.957497 7215 +negligibleimpact 1 1 6.957497 6.957497 7216 +theapplic 1 1 6.957497 6.957497 7169 +communicatethrough 1 1 6.957497 6.957497 7217 +onnetwork 1 1 6.957497 6.957497 7218 +workstationsrobert 1 1 6.957497 6.957497 7219 +blumofethi 1 1 6.957497 6.957497 7220 +pronouncedsilk 1 1 6.957497 6.957497 7221 +andcilk 1 1 6.957497 6.957497 7222 +functionalsubset 1 1 6.957497 6.957497 7223 +providesadapt 1 1 6.957497 6.957497 7224 +tranpar 1 1 6.957497 6.957497 7225 +touser 1 1 6.957497 6.957497 7226 +shrinkdynam 1 1 6.957497 6.957497 7227 +cilkprogram 1 1 6.957497 6.957497 7228 +workstationscrash 1 1 6.957497 6.957497 7229 +andrecov 1 1 6.957497 6.957497 7230 +livedemonstr 1 1 6.957497 6.957497 7231 +internetsmik 1 1 6.957497 6.957497 7232 +dahlinthi 1 1 6.957497 6.957497 7233 +applicationsmotiv 1 1 6.957497 6.957497 7234 +inclust 1 1 6.957497 6.957497 7235 +servicei 1 1 6.957497 6.957497 7236 +nodesto 1 1 6.957497 6.957497 7237 +centralserv 1 1 6.957497 6.957497 7238 +goodperform 1 1 6.957497 6.957497 7239 +networkperform 1 1 6.957497 6.957497 7240 +projectwil 1 1 6.957497 6.957497 7241 +mapsbenjamin 1 1 6.957497 6.957497 7242 +kuipershuman 1 1 6.957497 6.957497 7243 +forlarg 1 1 6.957497 6.957497 7244 +ontolog 1 1 6.957497 6.957497 7245 +varietyof 1 1 6.957497 6.957497 7246 +cast 1 1 6.957497 6.957497 7247 +diverserepresent 1 1 6.957497 6.957497 7248 +spatialsemant 1 1 6.957497 6.957497 7249 +andassumpt 1 1 6.957497 6.957497 7250 +thecontrol 1 1 6.957497 6.957497 7251 +beabstract 1 1 6.957497 6.957497 7252 +givinga 1 1 6.957497 6.957497 7253 +causalgraph 1 1 6.957497 6.957497 7254 +topologicalnetwork 1 1 6.957497 6.957497 7255 +occupancygrid 1 1 6.957497 6.957497 7256 +theframework 1 1 6.957497 6.957497 7257 +ofglob 1 1 6.957497 6.957497 7258 +programmingvladimir 1 1 6.957497 6.957497 7259 +lifschitzlog 1 1 6.957497 6.957497 7260 +functionalprogram 1 1 6.957497 6.957497 7261 +notne 1 1 6.957497 6.957497 7262 +itcan 1 1 6.957497 6.957497 7263 +executedus 1 1 6.957497 6.957497 7264 +withdefin 1 1 6.957497 6.957497 7265 +thereason 1 1 6.957497 6.957497 7266 +thesound 1 1 6.957497 6.957497 7267 +optimizationcalvin 1 1 6.957497 6.957497 7268 +linthi 1 1 6.957497 6.957497 7269 +andtheir 1 1 6.957497 6.957497 7270 +differenthardwar 1 1 6.957497 6.957497 7271 +efficientand 1 1 6.957497 6.957497 7272 +suchlibrari 1 1 6.957497 6.957497 7273 +weexplain 1 1 6.957497 6.957497 7274 +symbiot 1 1 6.957497 6.957497 7164 +networksristo 1 1 6.957497 6.957497 7275 +miikkulainena 1 1 6.957497 6.957497 7276 +sane 1 1 6.957497 6.957497 7170 +neuronsthrough 1 1 6.957497 6.957497 7277 +anddiscourag 1 1 6.957497 6.957497 7278 +suboptim 1 1 6.957497 6.957497 7279 +toextract 1 1 6.957497 6.957497 7280 +sequentialdecis 1 1 6.957497 6.957497 7281 +warehousedan 1 1 6.957497 6.957497 7282 +mirankerth 1 1 6.957497 6.957497 7283 +andint 1 1 6.957497 6.957497 7284 +datasourc 1 1 6.957497 6.957497 7171 +theuser 1 1 6.957497 6.957497 7285 +byqueri 1 1 6.957497 6.957497 7286 +theabstract 1 1 6.957497 6.957497 7172 +interfacethat 1 1 6.957497 6.957497 7287 +ofabstract 1 1 6.957497 6.957497 7288 +clever 1 1 6.957497 6.957497 7289 +anddata 1 1 6.957497 6.957497 7290 +activedatabas 1 1 6.957497 6.957497 7291 +constructedus 1 1 6.957497 6.957497 7292 +databasefacil 1 1 6.957497 6.957497 7293 +thealamo 1 1 6.957497 6.957497 7294 +dataintegr 1 1 6.957497 6.957497 7295 +elementsof 1 1 6.957497 6.957497 7296 +furthercomposit 1 1 6.957497 6.957497 7297 +answerhigh 1 1 6.957497 6.957497 7298 +logicprogrammingraymond 1 1 6.957497 6.957497 7299 +mooneyinduct 1 1 6.957497 6.957497 7300 +learningprolog 1 1 6.957497 6.957497 7301 +offirst 1 1 6.957497 6.957497 7302 +learningmethod 1 1 6.957497 6.957497 7303 +areappli 1 1 6.957497 6.957497 7304 +believethi 1 1 6.957497 6.957497 7305 +richer 1 1 6.957497 6.957497 7306 +parsersfrom 1 1 6.957497 6.957497 7307 +superior 1 1 6.957497 6.957497 7308 +onsever 1 1 6.957497 6.957497 7309 +networkmethod 1 1 6.957497 6.957497 7310 +ati 1 1 6.957497 6.957497 7311 +ofairlin 1 1 6.957497 6.957497 7312 +automaticallydevelop 1 1 6.957497 6.957497 7313 +englishdatabas 1 1 6.957497 6.957497 7314 +moreaccur 1 1 6.957497 6.957497 7315 +smallgeograph 1 1 6.957497 6.957497 7316 +tens 1 1 6.957497 6.957497 7317 +treemethod 1 1 6.957497 6.957497 7318 +throughviewsgordon 1 1 6.957497 6.957497 7319 +toachiev 1 1 6.957497 6.957497 7320 +thesoftwar 1 1 6.957497 6.957497 7321 +typesus 1 1 6.957497 6.957497 7322 +specifyview 1 1 6.957497 6.957497 7323 +adesir 1 1 6.957497 6.957497 7324 +algorithmsgreg 1 1 6.957497 6.957497 7325 +plaxtona 1 1 6.957497 6.957497 7326 +forspecif 1 1 6.957497 6.957497 7327 +notuncommon 1 1 6.957497 6.957497 7328 +havelittl 1 1 6.957497 6.957497 7329 +suchpap 1 1 6.957497 6.957497 7330 +gapsinher 1 1 6.957497 6.957497 7331 +inadequatefor 1 1 6.957497 6.957497 7332 +straightforwardalgorithm 1 1 6.957497 6.957497 7333 +theconceptu 1 1 6.957497 6.957497 7334 +trivialclass 1 1 6.957497 6.957497 7335 +blum 1 1 6.957497 6.957497 7336 +pratt 1 1 6.957497 6.957497 7337 +algorithmsvijaya 1 1 6.957497 6.957497 7338 +forcombinatori 1 1 6.957497 6.957497 7339 +recentyear 1 1 6.957497 6.957497 7340 +pram 1 1 6.957497 6.957497 7165 +willdescrib 1 1 6.957497 6.957497 7341 +thesealgorithm 1 1 6.957497 6.957497 7342 +thendescrib 1 1 6.957497 6.957497 7343 +wepropos 1 1 6.957497 6.957497 7344 +parallelshar 1 1 6.957497 6.957497 7345 +reflectionpaul 1 1 6.957497 6.957497 7346 +addnew 1 1 6.957497 6.957497 7347 +structureaccordingli 1 1 6.957497 6.957497 7348 +adapat 1 1 6.957497 6.957497 7349 +extensiblelanguag 1 1 6.957497 6.957497 7350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..e61ffa12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +book 1 99 2.302585 2.302585 131 +octob 1 89 2.397895 2.397895 156 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +free 1 73 2.639057 2.639057 224 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +wednesdai 1 64 2.772589 2.772589 261 +taylor 1 63 2.772589 2.772589 287 +handout 1 64 2.772589 2.772589 263 +septemb 1 65 2.772589 2.772589 274 +found 1 53 2.944439 2.944439 337 +cover 1 55 2.944439 2.944439 329 +date 1 51 2.995732 2.995732 344 +midterm 1 45 3.135494 3.135494 392 +long 1 43 3.178054 3.178054 413 +map 1 39 3.258097 3.258097 452 +feel 1 37 3.332205 3.332205 483 +least 1 35 3.401197 3.401197 516 +chapter 1 32 3.465736 3.465736 536 +robert 1 30 3.555348 3.555348 567 +except 1 28 3.610918 3.610918 607 +assum 1 19 4.007333 4.007333 845 +stop 1 17 4.110874 4.110874 942 +station 1 13 4.382027 4.382027 1157 +solari 1 12 4.465908 4.465908 1238 +systemsc 1 11 4.553877 4.553877 1293 +canb 1 7 5.010635 5.010635 1846 +encrypt 1 7 5.010635 5.010635 1835 +blumoferdb 1 5 5.347108 5.347108 2324 +systemsfal 1 4 5.568345 5.568345 2683 +blumof 1 3 5.857933 5.857933 3237 +subramanyam 1 2 6.263398 6.263398 4282 +gooti 1 2 6.263398 6.263398 4281 +crypt 1 1 6.957497 6.957497 7351 +multiplemap 1 1 6.957497 6.957497 7352 +themap 1 1 6.957497 6.957497 7353 +decrypt 1 1 6.957497 6.957497 7354 +solutionsread 1 1 6.957497 6.957497 7355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..2ca98617 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +homework 1 79 2.564949 2.564949 193 +copi 1 63 2.772589 2.772589 284 +detail 1 57 2.890372 2.890372 321 +midterm 1 45 3.135494 3.135494 392 +slide 1 38 3.295837 3.295837 467 +neural 1 30 3.555348 3.555348 578 +fundament 1 25 3.737670 3.737670 661 +station 1 13 4.382027 4.382027 1157 +uniqu 1 12 4.465908 4.465908 1228 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +lauren 1 3 5.857933 5.857933 3251 +bednar 1 2 6.263398 6.263398 4283 +jbednar 1 2 6.263398 6.263398 4284 +ofneur 1 2 6.263398 6.263398 4264 +cliff 1 2 6.263398 6.263398 4285 +edusun 1 2 6.263398 6.263398 4286 +networksfal 1 1 6.957497 6.957497 7356 +fausett 1 1 6.957497 6.957497 7357 +englewood 1 1 6.957497 6.957497 7358 +prenticehal 1 1 6.957497 6.957497 7359 +schedulehomework 1 1 6.957497 6.957497 7360 +assignmentsexamsclass 1 1 6.957497 6.957497 7361 +resourcesa 1 1 6.957497 6.957497 7362 +versionof 1 1 6.957497 6.957497 7363 +syllabusristo 1 1 6.957497 6.957497 7364 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..b9faf37b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +center 1 88 2.397895 2.397895 158 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +write 1 72 2.639057 2.639057 222 +dept 1 64 2.772589 2.772589 291 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +detail 1 57 2.890372 2.890372 321 +pointer 1 48 3.044522 3.044522 368 +discuss 1 45 3.135494 3.135494 399 +submit 1 39 3.258097 3.258097 440 +short 1 36 3.367296 3.367296 499 +approxim 1 35 3.401197 3.401197 509 +collabor 1 32 3.465736 3.465736 543 +particip 1 29 3.583519 3.583519 589 +toward 1 25 3.737670 3.737670 668 +attend 1 18 4.060443 4.060443 893 +regular 1 17 4.110874 4.110874 929 +cognit 1 16 4.174387 4.174387 986 +critic 1 16 4.174387 4.174387 982 +philosophi 1 13 4.382027 4.382027 1167 +signific 1 13 4.382027 4.382027 1125 +count 1 12 4.465908 4.465908 1239 +packet 1 10 4.653960 4.653960 1415 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +appt 1 5 5.347108 5.347108 2312 +ofinterest 1 5 5.347108 5.347108 2323 +nichola 1 3 5.857933 5.857933 3252 +interv 1 3 5.857933 5.857933 3253 +thepap 1 3 5.857933 5.857933 3254 +sciencefal 1 2 6.263398 6.263398 4246 +commentari 1 2 6.263398 6.263398 4287 +edusun 1 2 6.263398 6.263398 4286 +asher 1 1 6.957497 6.957497 7365 +waggen 1 1 6.957497 6.957497 7366 +nasher 1 1 6.957497 6.957497 7367 +berti 1 1 6.957497 6.957497 7368 +posner 1 1 6.957497 6.957497 7369 +mitpress 1 1 6.957497 6.957497 7370 +withanoth 1 1 6.957497 6.957497 7371 +alsorequir 1 1 6.957497 6.957497 7372 +descriptioncours 1 1 6.957497 6.957497 7373 +schedulediscuss 1 1 6.957497 6.957497 7374 +notesperson 1 1 6.957497 6.957497 7375 +adscollabor 1 1 6.957497 6.957497 7376 +paperclass 1 1 6.957497 6.957497 7377 +resourcesstud 1 1 6.957497 6.957497 7378 +questionnaireus 1 1 6.957497 6.957497 7379 +sciencefaculti 1 1 6.957497 6.957497 7380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..e78618cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,191 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +materi 1 75 2.639057 2.639057 221 +syllabu 1 67 2.708050 2.708050 247 +foundat 1 62 2.772589 2.772589 286 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +laboratori 1 63 2.772589 2.772589 292 +room 1 59 2.833213 2.833213 301 +detail 1 57 2.890372 2.890372 321 +semest 1 58 2.890372 2.890372 312 +sever 1 56 2.890372 2.890372 322 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +date 1 51 2.995732 2.995732 344 +give 1 50 3.044522 3.044522 359 +without 1 50 3.044522 3.044522 370 +frequent 1 49 3.044522 3.044522 367 +still 1 50 3.044522 3.044522 362 +get 1 46 3.091042 3.091042 380 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +discuss 1 45 3.135494 3.135494 399 +even 1 45 3.135494 3.135494 393 +long 1 43 3.178054 3.178054 413 +howev 1 41 3.218876 3.218876 422 +late 1 40 3.258097 3.258097 439 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +form 1 39 3.258097 3.258097 443 +credit 1 38 3.295837 3.295837 460 +respons 1 37 3.332205 3.332205 476 +expect 1 37 3.332205 3.332205 484 +feel 1 37 3.332205 3.332205 483 +procedur 1 36 3.367296 3.367296 488 +ofth 1 36 3.367296 3.367296 491 +soon 1 36 3.367296 3.367296 494 +staff 1 36 3.367296 3.367296 490 +everi 1 34 3.401197 3.401197 519 +within 1 33 3.433987 3.433987 525 +articl 1 33 3.433987 3.433987 530 +chapter 1 32 3.465736 3.465736 536 +often 1 31 3.496508 3.496508 551 +depend 1 29 3.583519 3.583519 583 +turn 1 29 3.583519 3.583519 586 +limit 1 29 3.583519 3.583519 585 +becom 1 28 3.610918 3.610918 603 +challeng 1 26 3.688879 3.688879 653 +effort 1 26 3.688879 3.688879 652 +background 1 25 3.737670 3.737670 664 +highli 1 23 3.806662 3.806662 725 +begin 1 23 3.806662 3.806662 716 +identifi 1 22 3.850148 3.850148 760 +self 1 22 3.850148 3.850148 761 +half 1 21 3.912023 3.912023 776 +prepar 1 20 3.951244 3.951244 824 +break 1 20 3.951244 3.951244 812 +event 1 18 4.060443 4.060443 896 +attend 1 18 4.060443 4.060443 893 +less 1 18 4.060443 4.060443 892 +partial 1 18 4.060443 4.060443 900 +monitor 1 17 4.110874 4.110874 941 +debug 1 17 4.110874 4.110874 944 +thought 1 17 4.110874 4.110874 945 +quiz 1 16 4.174387 4.174387 990 +earli 1 16 4.174387 4.174387 968 +enough 1 15 4.248495 4.248495 1040 +near 1 14 4.317488 4.317488 1091 +warn 1 14 4.317488 4.317488 1068 +consider 1 14 4.317488 4.317488 1076 +everyth 1 13 4.382027 4.382027 1169 +wait 1 13 4.382027 4.382027 1168 +unfortun 1 13 4.382027 4.382027 1170 +everyon 1 13 4.382027 4.382027 1148 +quizz 1 13 4.382027 4.382027 1151 +individu 1 13 4.382027 4.382027 1126 +pascal 1 12 4.465908 4.465908 1213 +uniqu 1 12 4.465908 4.465908 1228 +excit 1 11 4.553877 4.553877 1329 +extrem 1 11 4.553877 4.553877 1330 +eight 1 11 4.553877 4.553877 1331 +hundr 1 9 4.753590 4.753590 1528 +deadlin 1 9 4.753590 4.753590 1502 +intellectu 1 7 5.010635 5.010635 1847 +delai 1 7 5.010635 5.010635 1848 +carefulli 1 6 5.164786 5.164786 2045 +difficult 1 6 5.164786 5.164786 2035 +apolog 1 6 5.164786 5.164786 2046 +nine 1 6 5.164786 5.164786 2047 +vari 1 6 5.164786 5.164786 2001 +pace 1 6 5.164786 5.164786 2011 +firm 1 4 5.568345 5.568345 2684 +thecours 1 4 5.568345 5.568345 2685 +behind 1 4 5.568345 5.568345 2610 +welch 1 4 5.568345 5.568345 2655 +theprogram 1 4 5.568345 5.568345 2686 +dale 1 4 5.568345 5.568345 2687 +wewil 1 4 5.568345 5.568345 2688 +thiscours 1 4 5.568345 5.568345 2601 +repli 1 4 5.568345 5.568345 2689 +guadalup 1 3 5.857933 5.857933 3255 +andyou 1 3 5.857933 5.857933 3256 +gripe 1 3 5.857933 5.857933 3257 +suzi 1 2 6.263398 6.263398 4288 +wella 1 2 6.263398 6.263398 4289 +foral 1 2 6.263398 6.263398 4290 +riski 1 2 6.263398 6.263398 4291 +nowher 1 2 6.263398 6.263398 4292 +gallagh 1 2 6.263398 6.263398 4293 +requiredtextbook 1 2 6.263398 6.263398 4204 +elicit 1 2 6.263398 6.263398 4294 +programmingcsp 1 1 6.957497 6.957497 7383 +pascalintroductori 1 1 6.957497 6.957497 7384 +programminginstructor 1 1 6.957497 6.957497 7385 +gallagherwelcom 1 1 6.957497 6.957497 7386 +cspi 1 1 6.957497 6.957497 7387 +andso 1 1 6.957497 6.957497 7388 +otherdeadlin 1 1 6.957497 6.957497 7389 +thesyllabu 1 1 6.957497 6.957497 7390 +jenn 1 1 6.957497 6.957497 7391 +takethi 1 1 6.957497 6.957497 7392 +courseeach 1 1 6.957497 6.957497 7393 +thenewsgroup 1 1 6.957497 6.957497 7381 +howwel 1 1 6.957497 6.957497 7394 +deadlineto 1 1 6.957497 6.957497 7395 +thursdayeven 1 1 6.957497 6.957497 7396 +intosmal 1 1 6.957497 6.957497 7397 +ateach 1 1 6.957497 6.957497 7398 +thatlaboratori 1 1 6.957497 6.957497 7399 +thatgrad 1 1 6.957497 6.957497 7400 +thetest 1 1 6.957497 6.957497 7382 +thattest 1 1 6.957497 6.957497 7401 +limitedand 1 1 6.957497 6.957497 7402 +foravail 1 1 6.957497 6.957497 7403 +proctor 1 1 6.957497 6.957497 7404 +hoursbefor 1 1 6.957497 6.957497 7405 +andquizz 1 1 6.957497 6.957497 7406 +betaken 1 1 6.957497 6.957497 7407 +prescrib 1 1 6.957497 6.957497 7408 +openedfor 1 1 6.957497 6.957497 7409 +yourstud 1 1 6.957497 6.957497 7410 +orsak 1 1 6.957497 6.957497 7411 +weem 1 1 6.957497 6.957497 7412 +liabl 1 1 6.957497 6.957497 7413 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..084e0e8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +databas 1 122 2.079442 2.079442 86 +pleas 1 113 2.197225 2.197225 114 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +solut 1 82 2.484907 2.484907 162 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +reason 1 57 2.890372 2.890372 318 +sampl 1 53 2.944439 2.944439 339 +suggest 1 53 2.944439 2.944439 331 +run 1 51 2.995732 2.995732 347 +without 1 50 3.044522 3.044522 370 +anoth 1 45 3.135494 3.135494 408 +error 1 40 3.258097 3.258097 449 +transform 1 32 3.465736 3.465736 542 +turn 1 29 3.583519 3.583519 586 +measur 1 28 3.610918 3.610918 609 +pass 1 28 3.610918 3.610918 611 +retriev 1 27 3.637586 3.637586 621 +compar 1 26 3.688879 3.688879 648 +wang 1 21 3.912023 3.912023 790 +output 1 21 3.912023 3.912023 788 +wrote 1 20 3.951244 3.951244 830 +benchmark 1 19 4.007333 4.007333 859 +attribut 1 14 4.317488 4.317488 1092 +script 1 13 4.382027 4.382027 1171 +perl 1 11 4.553877 4.553877 1332 +recoveri 1 9 4.753590 4.753590 1474 +replac 1 8 4.875197 4.875197 1668 +fail 1 8 4.875197 4.875197 1655 +contest 1 5 5.347108 5.347108 2273 +tupl 1 5 5.347108 5.347108 2244 +batori 1 4 5.568345 5.568345 2690 +delet 1 4 5.568345 5.568345 2691 +tong 1 3 5.857933 5.857933 3258 +append 1 2 6.263398 6.263398 4295 +ret_into 1 1 6.957497 6.957497 7414 +mdb 1 1 6.957497 6.957497 7415 +diff 1 1 6.957497 6.957497 7416 +medec 1 1 6.957497 6.957497 7417 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..9b3cd11e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,303 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +structur 1 106 2.197225 2.197225 105 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +real 1 93 2.397895 2.397895 144 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +call 1 91 2.397895 2.397895 153 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +septemb 1 65 2.772589 2.772589 274 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +unix 1 58 2.890372 2.890372 308 +overview 1 56 2.890372 2.890372 323 +summer 1 56 2.890372 2.890372 311 +undergradu 1 54 2.944439 2.944439 338 +cover 1 55 2.944439 2.944439 329 +februari 1 54 2.944439 2.944439 328 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +principl 1 48 3.044522 3.044522 357 +basic 1 50 3.044522 3.044522 360 +textbook 1 44 3.135494 3.135494 397 +made 1 44 3.135494 3.135494 398 +execut 1 45 3.135494 3.135494 404 +offer 1 43 3.178054 3.178054 414 +examin 1 42 3.218876 3.218876 424 +review 1 42 3.218876 3.218876 425 +fast 1 42 3.218876 3.218876 429 +cach 1 41 3.218876 3.218876 432 +theoret 1 39 3.258097 3.258097 446 +transact 1 39 3.258097 3.258097 438 +programm 1 39 3.258097 3.258097 445 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +procedur 1 36 3.367296 3.367296 488 +survei 1 35 3.401197 3.401197 513 +concurr 1 34 3.401197 3.401197 501 +articl 1 33 3.433987 3.433987 530 +toler 1 33 3.433987 3.433987 533 +idea 1 32 3.465736 3.465736 545 +chapter 1 32 3.465736 3.465736 536 +concept 1 32 3.465736 3.465736 537 +fault 1 32 3.465736 3.465736 547 +titl 1 31 3.496508 3.496508 556 +robert 1 30 3.555348 3.555348 567 +hard 1 30 3.555348 3.555348 563 +art 1 29 3.583519 3.583519 593 +multiprocessor 1 28 3.610918 3.610918 605 +determin 1 27 3.637586 3.637586 630 +altern 1 26 3.688879 3.688879 641 +proc 1 26 3.688879 3.688879 649 +constraint 1 26 3.688879 3.688879 636 +reliabl 1 25 3.737670 3.737670 674 +mobil 1 23 3.806662 3.806662 730 +thread 1 23 3.806662 3.806662 722 +highli 1 23 3.806662 3.806662 725 +inth 1 22 3.850148 3.850148 741 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +kernel 1 20 3.951244 3.951244 825 +prerequisit 1 19 4.007333 4.007333 846 +andrew 1 19 4.007333 4.007333 849 +anderson 1 19 4.007333 4.007333 860 +stand 1 18 4.060443 4.060443 891 +thoma 1 18 4.060443 4.060443 901 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +germani 1 17 4.110874 4.110874 946 +protect 1 17 4.110874 4.110874 935 +earli 1 16 4.174387 4.174387 968 +young 1 16 4.174387 4.174387 991 +remot 1 15 4.248495 4.248495 1041 +princeton 1 15 4.248495 4.248495 1042 +levi 1 14 4.317488 4.317488 1093 +joint 1 13 4.382027 4.382027 1130 +gupta 1 12 4.465908 4.465908 1241 +usenix 1 12 4.465908 4.465908 1240 +bruce 1 12 4.465908 4.465908 1226 +denni 1 11 4.553877 4.553877 1321 +reness 1 11 4.553877 4.553877 1333 +impact 1 11 4.553877 4.553877 1334 +operatingsystem 1 10 4.653960 4.653960 1401 +sosp 1 10 4.653960 4.653960 1416 +henri 1 10 4.653960 4.653960 1417 +black 1 10 4.653960 4.653960 1418 +hint 1 10 4.653960 4.653960 1419 +familiar 1 9 4.753590 4.753590 1485 +robbert 1 9 4.753590 4.753590 1529 +inter 1 9 4.753590 4.753590 1530 +birman 1 9 4.753590 4.753590 1531 +kumar 1 9 4.753590 4.753590 1506 +mach 1 8 4.875197 4.875197 1669 +inproceed 1 8 4.875197 4.875197 1670 +presenc 1 8 4.875197 4.875197 1671 +harrick 1 7 5.010635 5.010635 1849 +peterson 1 7 5.010635 5.010635 1850 +migrat 1 7 5.010635 5.010635 1851 +beyond 1 7 5.010635 5.010635 1834 +encrypt 1 7 5.010635 5.010635 1835 +multiprogram 1 6 5.164786 5.164786 2010 +thompson 1 6 5.164786 5.164786 2049 +silberschatz 1 6 5.164786 5.164786 1978 +edward 1 6 5.164786 5.164786 2050 +onoper 1 6 5.164786 5.164786 2048 +internationalconfer 1 6 5.164786 5.164786 2051 +distributedsystem 1 6 5.164786 5.164786 2022 +versu 1 6 5.164786 5.164786 2052 +ofdistribut 1 5 5.347108 5.347108 2316 +theth 1 5 5.347108 5.347108 2325 +oncomput 1 5 5.347108 5.347108 2326 +ousterhout 1 5 5.347108 5.347108 2301 +joseph 1 5 5.347108 5.347108 2327 +authent 1 5 5.347108 5.347108 2306 +steer 1 5 5.347108 5.347108 2328 +wireless 1 4 5.568345 5.568345 2693 +breadth 1 4 5.568345 5.568345 2695 +lazowska 1 4 5.568345 5.568345 2694 +implic 1 4 5.568345 5.568345 2696 +fora 1 4 5.568345 5.568345 2697 +theacm 1 4 5.568345 5.568345 2698 +zhao 1 4 5.568345 5.568345 2699 +synopsi 1 3 5.857933 5.857933 3260 +formobil 1 3 5.857933 5.857933 3261 +requirementsstud 1 3 5.857933 5.857933 3116 +theperform 1 3 5.857933 5.857933 3262 +cheriton 1 3 5.857933 5.857933 3259 +berlin 1 3 5.857933 5.857933 3263 +terri 1 3 5.857933 5.857933 3264 +golub 1 3 5.857933 5.857933 3265 +tokuda 1 3 5.857933 5.857933 3266 +kistler 1 3 5.857933 5.857933 3267 +synopsisc 1 2 6.263398 6.263398 4296 +systemdesign 1 2 6.263398 6.263398 4297 +theinstructor 1 2 6.263398 6.263398 4298 +anexperiment 1 2 6.263398 6.263398 4299 +afip 1 2 6.263398 6.263398 4300 +hansen 1 2 6.263398 6.263398 4301 +nucleu 1 2 6.263398 6.263398 4302 +bensoussan 1 2 6.263398 6.263398 4303 +multic 1 2 6.263398 6.263398 4304 +virtualmemori 1 2 6.263398 6.263398 4305 +ritchi 1 2 6.263398 6.263398 4306 +tucker 1 2 6.263398 6.263398 4307 +bunt 1 2 6.263398 6.263398 4308 +barrera 1 2 6.263398 6.263398 4309 +acmtransact 1 2 6.263398 6.263398 4310 +nelson 1 2 6.263398 6.263398 4168 +cristian 1 2 6.263398 6.263398 4311 +offailur 1 2 6.263398 6.263398 4255 +systemsr 1 2 6.263398 6.263398 4312 +goldberg 1 2 6.263398 6.263398 4313 +rosenblum 1 2 6.263398 6.263398 4314 +ieeetransact 1 2 6.263398 6.263398 4315 +oninform 1 2 6.263398 6.263398 4316 +baron 1 2 6.263398 6.263398 4317 +rashid 1 2 6.263398 6.263398 4318 +preemptiv 1 2 6.263398 6.263398 4319 +ondistribut 1 2 6.263398 6.263398 4320 +prerequisitegradu 1 1 6.957497 6.957497 7427 +systemssuch 1 1 6.957497 6.957497 7428 +materialin 1 1 6.957497 6.957497 7429 +andsilberschatz 1 1 6.957497 6.957497 7430 +coveringboth 1 1 6.957497 6.957497 7431 +anemphasi 1 1 6.957497 6.957497 7432 +anddiscuss 1 1 6.957497 6.957497 7433 +aterm 1 1 6.957497 6.957497 7434 +systemsfernando 1 1 6.957497 6.957497 7435 +corbato 1 1 6.957497 6.957497 7436 +marjori 1 1 6.957497 6.957497 7437 +merwin 1 1 6.957497 6.957497 7438 +daggett 1 1 6.957497 6.957497 7439 +dalei 1 1 6.957497 6.957497 7420 +brinch 1 1 6.957497 6.957497 7440 +clingen 1 1 6.957497 6.957497 7441 +tannenbaum 1 1 6.957497 6.957497 7442 +andexampl 1 1 6.957497 6.957497 7443 +managementa 1 1 6.957497 6.957497 7444 +formultiprogram 1 1 6.957497 6.957497 7421 +forshar 1 1 6.957497 6.957497 7445 +schedulingr 1 1 6.957497 6.957497 7446 +communicationj 1 1 6.957497 6.957497 7447 +birel 1 1 6.957497 6.957497 7448 +rpc 1 1 6.957497 6.957497 7449 +lightweightremot 1 1 6.957497 6.957497 7450 +migrationf 1 1 6.957497 6.957497 7451 +dougli 1 1 6.957497 6.957497 7452 +spriteoper 1 1 6.957497 6.957497 7453 +theimer 1 1 6.957497 6.957497 7454 +lantz 1 1 6.957497 6.957497 7455 +preemptabl 1 1 6.957497 6.957497 7456 +tolerancef 1 1 6.957497 6.957497 7457 +sand 1 1 6.957497 6.957497 7458 +karshmer 1 1 6.957497 6.957497 7422 +nehmer 1 1 6.957497 6.957497 7423 +sandberg 1 1 6.957497 6.957497 7459 +kleiman 1 1 6.957497 6.957497 7460 +ofsun 1 1 6.957497 6.957497 7461 +mckusick 1 1 6.957497 6.957497 7462 +leffler 1 1 6.957497 6.957497 7463 +fabri 1 1 6.957497 6.957497 7464 +fastfil 1 1 6.957497 6.957497 7465 +alog 1 1 6.957497 6.957497 7466 +systemsm 1 1 6.957497 6.957497 7467 +schroeder 1 1 6.957497 6.957497 7424 +gifford 1 1 6.957497 6.957497 7468 +needham 1 1 6.957497 6.957497 7425 +securityr 1 1 6.957497 6.957497 7469 +inlarg 1 1 6.957497 6.957497 7470 +butler 1 1 6.957497 6.957497 7471 +lampson 1 1 6.957497 6.957497 7472 +accetta 1 1 6.957497 6.957497 7473 +boloski 1 1 6.957497 6.957497 7474 +tevanian 1 1 6.957497 6.957497 7475 +systemsh 1 1 6.957497 6.957497 7476 +kopetz 1 1 6.957497 6.957497 7477 +trigger 1 1 6.957497 6.957497 7426 +timesystem 1 1 6.957497 6.957497 7478 +layland 1 1 6.957497 6.957497 7479 +ramamritham 1 1 6.957497 6.957497 7480 +stankov 1 1 6.957497 6.957497 7481 +schedulingund 1 1 6.957497 6.957497 7482 +mercer 1 1 6.957497 6.957497 7483 +computingb 1 1 6.957497 6.957497 7484 +badrinath 1 1 6.957497 6.957497 7485 +acharya 1 1 6.957497 6.957497 7486 +imielinski 1 1 6.957497 6.957497 7487 +satyanarayanan 1 1 6.957497 6.957497 7488 +okasaki 1 1 6.957497 6.957497 7489 +siegel 1 1 6.957497 6.957497 7490 +coda 1 1 6.957497 6.957497 7491 +distributedworkst 1 1 6.957497 6.957497 7492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..c3706f84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +note 1 142 1.945910 1.945910 67 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +pictur 1 89 2.397895 2.397895 160 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +select 1 91 2.397895 2.397895 154 +imag 1 91 2.397895 2.397895 161 +requir 1 81 2.484907 2.484907 167 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +internet 1 83 2.484907 2.484907 186 +journal 1 83 2.484907 2.484907 183 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +second 1 81 2.484907 2.484907 166 +environ 1 84 2.484907 2.484907 177 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +optim 1 79 2.564949 2.564949 197 +june 1 79 2.564949 2.564949 214 +dynam 1 76 2.564949 2.564949 194 +refer 1 78 2.564949 2.564949 203 +servic 1 72 2.639057 2.639057 236 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +multimedia 1 68 2.708050 2.708050 258 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +septemb 1 65 2.772589 2.772589 274 +taylor 1 63 2.772589 2.772589 287 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +januari 1 62 2.772589 2.772589 264 +content 1 59 2.833213 2.833213 302 +locat 1 59 2.833213 2.833213 303 +march 1 61 2.833213 2.833213 295 +simpl 1 60 2.833213 2.833213 298 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +summer 1 56 2.890372 2.890372 311 +space 1 57 2.890372 2.890372 310 +processor 1 54 2.944439 2.944439 335 +cover 1 55 2.944439 2.944439 329 +tabl 1 51 2.995732 2.995732 346 +digit 1 52 2.995732 2.995732 348 +case 1 51 2.995732 2.995732 351 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +still 1 50 3.044522 3.044522 362 +standard 1 48 3.044522 3.044522 365 +possibl 1 47 3.091042 3.091042 378 +california 1 46 3.091042 3.091042 388 +adapt 1 46 3.091042 3.091042 387 +textbook 1 44 3.135494 3.135494 397 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +discuss 1 45 3.135494 3.135494 399 +answer 1 45 3.135494 3.135494 391 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +long 1 43 3.178054 3.178054 413 +third 1 43 3.178054 3.178054 412 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +futur 1 41 3.218876 3.218876 427 +review 1 42 3.218876 3.218876 425 +tutori 1 39 3.258097 3.258097 437 +multipl 1 39 3.258097 3.258097 453 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +multi 1 36 3.367296 3.367296 493 +ofth 1 36 3.367296 3.367296 491 +survei 1 35 3.401197 3.401197 513 +represent 1 35 3.401197 3.401197 512 +statist 1 35 3.401197 3.401197 521 +articl 1 33 3.433987 3.433987 530 +toler 1 33 3.433987 3.433987 533 +concept 1 32 3.465736 3.465736 537 +fault 1 32 3.465736 3.465736 547 +storag 1 31 3.496508 3.496508 553 +titl 1 31 3.496508 3.496508 556 +particip 1 29 3.583519 3.583519 589 +synchron 1 29 3.583519 3.583519 588 +packag 1 28 3.610918 3.610918 614 +scale 1 28 3.610918 3.610918 613 +framework 1 28 3.610918 3.610918 606 +retriev 1 27 3.637586 3.637586 621 +determin 1 27 3.637586 3.637586 630 +arrai 1 27 3.637586 3.637586 627 +session 1 26 3.688879 3.688879 643 +fundament 1 25 3.737670 3.737670 661 +scalabl 1 24 3.761200 3.761200 705 +magazin 1 24 3.761200 3.761200 704 +compress 1 23 3.806662 3.806662 719 +variabl 1 23 3.806662 3.806662 715 +emphasi 1 22 3.850148 3.850148 755 +disk 1 22 3.850148 3.850148 747 +reduc 1 22 3.850148 3.850148 759 +chen 1 21 3.912023 3.912023 791 +flexibl 1 21 3.912023 3.912023 792 +mpeg 1 20 3.951244 3.951244 831 +sure 1 20 3.951244 3.951244 813 +scheme 1 20 3.951244 3.951244 818 +qualiti 1 20 3.951244 3.951244 832 +prerequisit 1 19 4.007333 4.007333 846 +media 1 19 4.007333 4.007333 861 +boston 1 19 4.007333 4.007333 862 +comparison 1 19 4.007333 4.007333 863 +anderson 1 19 4.007333 4.007333 860 +stand 1 18 4.060443 4.060443 891 +failur 1 18 4.060443 4.060443 898 +layer 1 17 4.110874 4.110874 926 +diego 1 16 4.174387 4.174387 992 +zhang 1 16 4.174387 4.174387 980 +transfer 1 16 4.174387 4.174387 967 +contribut 1 15 4.248495 4.248495 1021 +hierarch 1 15 4.248495 4.248495 1018 +stream 1 15 4.248495 4.248495 1015 +rate 1 15 4.248495 4.248495 1037 +incomput 1 14 4.317488 4.317488 1096 +audio 1 14 4.317488 4.317488 1094 +francisco 1 14 4.317488 4.317488 1095 +demand 1 14 4.317488 4.317488 1073 +heterogen 1 14 4.317488 4.317488 1090 +consider 1 14 4.317488 4.317488 1076 +introduc 1 13 4.382027 4.382027 1139 +carri 1 13 4.382027 4.382027 1152 +resolut 1 13 4.382027 4.382027 1172 +sigmetr 1 13 4.382027 4.382027 1173 +uniqu 1 12 4.465908 4.465908 1228 +readi 1 12 4.465908 4.465908 1242 +scan 1 12 4.465908 4.465908 1243 +buffer 1 12 4.465908 4.465908 1211 +philadelphia 1 12 4.465908 4.465908 1244 +weight 1 12 4.465908 4.465908 1204 +placement 1 10 4.653960 4.653960 1420 +packet 1 10 4.653960 4.653960 1415 +traffic 1 10 4.653960 4.653960 1421 +operatingsystem 1 10 4.653960 4.653960 1401 +establish 1 9 4.753590 4.753590 1532 +familiar 1 9 4.753590 4.753590 1485 +recoveri 1 9 4.753590 4.753590 1474 +light 1 9 4.753590 4.753590 1533 +osdi 1 9 4.753590 4.753590 1534 +transport 1 8 4.875197 4.875197 1672 +convers 1 8 4.875197 4.875197 1673 +pacif 1 8 4.875197 4.875197 1674 +grove 1 8 4.875197 4.875197 1675 +harrick 1 7 5.010635 5.010635 1849 +trend 1 7 5.010635 5.010635 1842 +channel 1 7 5.010635 5.010635 1836 +chiang 1 7 5.010635 5.010635 1853 +supportfor 1 7 5.010635 5.010635 1854 +keshav 1 7 5.010635 5.010635 1852 +smooth 1 7 5.010635 5.010635 1855 +misra 1 7 5.010635 5.010635 1856 +peterson 1 7 5.010635 5.010635 1850 +conferenc 1 7 5.010635 5.010635 1857 +jpeg 1 6 5.164786 5.164786 2053 +internationalconfer 1 6 5.164786 5.164786 2051 +subsystem 1 6 5.164786 5.164786 2015 +symposiumon 1 6 5.164786 5.164786 2054 +row 1 5 5.347108 5.347108 2330 +sigcomm 1 5 5.347108 5.347108 2329 +proceedingsof 1 5 5.347108 5.347108 2331 +batch 1 4 5.568345 5.568345 2700 +multimediasystem 1 4 5.568345 5.568345 2701 +addition 1 4 5.568345 5.568345 2593 +venkat 1 4 5.568345 5.568345 2702 +forparallel 1 4 5.568345 5.568345 2703 +admiss 1 4 5.568345 5.568345 2704 +clark 1 4 5.568345 5.568345 2705 +floyd 1 4 5.568345 5.568345 2682 +buss 1 4 5.568345 5.568345 2649 +synopsi 1 3 5.857933 5.857933 3260 +informationcours 1 3 5.857933 5.857933 3167 +shenoi 1 3 5.857933 5.857933 3269 +guadalup 1 3 5.857933 5.857933 3255 +multimediaappl 1 3 5.857933 5.857933 3274 +goyal 1 3 5.857933 5.857933 3268 +rangan 1 3 5.857933 5.857933 3270 +ftc 1 3 5.857933 5.857933 3275 +katz 1 3 5.857933 5.857933 3276 +anaheim 1 3 5.857933 5.857933 3271 +reddi 1 3 5.857933 5.857933 3277 +deliveri 1 3 5.857933 5.857933 3278 +durham 1 3 5.857933 5.857933 3279 +hampshir 1 3 5.857933 5.857933 3280 +chow 1 3 5.857933 5.857933 3281 +london 1 3 5.857933 5.857933 3282 +campbel 1 3 5.857933 5.857933 3272 +mccann 1 3 5.857933 5.857933 3273 +prerequisitesgradu 1 2 6.263398 6.263398 4325 +synopsisc 1 2 6.263398 6.263398 4296 +madeavail 1 2 6.263398 6.263398 4326 +thetop 1 2 6.263398 6.263398 4327 +critiqu 1 2 6.263398 6.263398 4328 +ofpap 1 2 6.263398 6.263398 4329 +andclass 1 2 6.263398 6.263398 4330 +prashant 1 2 6.263398 6.263398 4331 +gemmel 1 2 6.263398 6.263398 4332 +kandlur 1 2 6.263398 6.263398 4321 +ofmultimedia 1 2 6.263398 6.263398 4322 +ieeeintern 1 2 6.263398 6.263398 4333 +icmc 1 2 6.263398 6.263398 4323 +inmulti 1 2 6.263398 6.263398 4334 +annualintern 1 2 6.263398 6.263398 4335 +pasadena 1 2 6.263398 6.263398 4336 +multimediai 1 2 6.263398 6.263398 4337 +acmmultimedia 1 2 6.263398 6.263398 4338 +sanfrancisco 1 2 6.263398 6.263398 4339 +shenker 1 2 6.263398 6.263398 4340 +verma 1 2 6.263398 6.263398 4341 +delaybound 1 2 6.263398 6.263398 4342 +toappear 1 2 6.263398 6.263398 4343 +nossdav 1 2 6.263398 6.263398 4344 +acmsigcomm 1 2 6.263398 6.263398 4345 +jacobson 1 2 6.263398 6.263398 4324 +andd 1 2 6.263398 6.263398 4346 +shepherd 1 2 6.263398 6.263398 4347 +basedcommun 1 2 6.263398 6.263398 4348 +incommun 1 2 6.263398 6.263398 4349 +govindan 1 2 6.263398 6.263398 4350 +forcontinu 1 2 6.263398 6.263398 4351 +formultimedia 1 2 6.263398 6.263398 4352 +zellweg 1 2 6.263398 6.263398 4353 +swinehart 1 2 6.263398 6.263398 4354 +etherphon 1 2 6.263398 6.263398 4355 +descriptiongener 1 1 6.957497 6.957497 7496 +boththeoret 1 1 6.957497 6.957497 7497 +systemsupport 1 1 6.957497 6.957497 7498 +transportprotocol 1 1 6.957497 6.957497 7499 +designissu 1 1 6.957497 6.957497 7500 +textbooka 1 1 6.957497 6.957497 7501 +requirementsth 1 1 6.957497 6.957497 7502 +relatedpap 1 1 6.957497 6.957497 7503 +tounderstand 1 1 6.957497 6.957497 7504 +asemest 1 1 6.957497 6.957497 7505 +vintuesdai 1 1 6.957497 6.957497 7506 +assistantmr 1 1 6.957497 6.957497 7507 +eduread 1 1 6.957497 6.957497 7508 +cntain 1 1 6.957497 6.957497 7509 +theread 1 1 6.957497 6.957497 7510 +speedwai 1 1 6.957497 6.957497 7511 +dobi 1 1 6.957497 6.957497 7512 +mall 1 1 6.957497 6.957497 7513 +callthem 1 1 6.957497 6.957497 7514 +compressionr 1 1 6.957497 6.957497 7515 +steinmetz 1 1 6.957497 6.957497 7493 +wallac 1 1 6.957497 6.957497 7516 +gall 1 1 6.957497 6.957497 7517 +anastassi 1 1 6.957497 6.957497 7518 +digitaltelevis 1 1 6.957497 6.957497 7519 +serversoverview 1 1 6.957497 6.957497 7520 +serverdesign 1 1 6.957497 6.957497 7521 +chiueh 1 1 6.957497 6.957497 7522 +groupedsweep 1 1 6.957497 6.957497 7523 +ofthird 1 1 6.957497 6.957497 7524 +narasimha 1 1 6.957497 6.957497 7525 +wylli 1 1 6.957497 6.957497 7526 +admissioncontrol 1 1 6.957497 6.957497 7527 +designinglarg 1 1 6.957497 6.957497 7528 +inmultimedia 1 1 6.957497 6.957497 7529 +interactivevideo 1 1 6.957497 6.957497 7530 +playout 1 1 6.957497 6.957497 7531 +sitaram 1 1 6.957497 6.957497 7494 +shahabuddin 1 1 6.957497 6.957497 7532 +foran 1 1 6.957497 6.957497 7533 +demandvideo 1 1 6.957497 6.957497 7534 +papadimitri 1 1 6.957497 6.957497 7535 +ramanathan 1 1 6.957497 6.957497 7536 +informationcach 1 1 6.957497 6.957497 7537 +homeentertain 1 1 6.957497 6.957497 7538 +multimedianetwork 1 1 6.957497 6.957497 7539 +ferrari 1 1 6.957497 6.957497 7540 +channelestablish 1 1 6.957497 6.957497 7541 +areasin 1 1 6.957497 6.957497 7542 +servicedisciplin 1 1 6.957497 6.957497 7543 +workshopon 1 1 6.957497 6.957497 7544 +losslesssmooth 1 1 6.957497 6.957497 7545 +salehi 1 1 6.957497 6.957497 7546 +kuros 1 1 6.957497 6.957497 7547 +towslei 1 1 6.957497 6.957497 7548 +storedvideo 1 1 6.957497 6.957497 7549 +requirementsthrough 1 1 6.957497 6.957497 7550 +grossglaus 1 1 6.957497 6.957497 7551 +rcbr 1 1 6.957497 6.957497 7552 +efficientservic 1 1 6.957497 6.957497 7553 +kanakia 1 1 6.957497 6.957497 7554 +reibman 1 1 6.957497 6.957497 7555 +congestioncontrol 1 1 6.957497 6.957497 7556 +tennenhous 1 1 6.957497 6.957497 7557 +newgener 1 1 6.957497 6.957497 7558 +coulson 1 1 6.957497 6.957497 7495 +hutchison 1 1 6.957497 6.957497 7559 +servicearchitectur 1 1 6.957497 6.957497 7560 +turner 1 1 6.957497 6.957497 7561 +reliablemulticast 1 1 6.957497 6.957497 7562 +levelfram 1 1 6.957497 6.957497 7563 +deffner 1 1 6.957497 6.957497 7564 +schulzrinn 1 1 6.957497 6.957497 7565 +blakowski 1 1 6.957497 6.957497 7566 +onselect 1 1 6.957497 6.957497 7567 +januaryoper 1 1 6.957497 6.957497 7568 +multimediag 1 1 6.957497 6.957497 7569 +robin 1 1 6.957497 6.957497 7570 +blair 1 1 6.957497 6.957497 7571 +papathoma 1 1 6.957497 6.957497 7572 +choru 1 1 6.957497 6.957497 7573 +druschel 1 1 6.957497 6.957497 7574 +abbott 1 1 6.957497 6.957497 7575 +pagel 1 1 6.957497 6.957497 7576 +systemssupport 1 1 6.957497 6.957497 7577 +conferencingh 1 1 6.957497 6.957497 7578 +venkatrangan 1 1 6.957497 6.957497 7579 +packetvideo 1 1 6.957497 6.957497 7580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..47c07712 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,297 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +seattl 1 120 2.079442 2.079442 103 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +novemb 1 81 2.484907 2.484907 179 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +servic 1 72 2.639057 2.639057 236 +meet 1 72 2.639057 2.639057 229 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +multimedia 1 68 2.708050 2.708050 258 +receiv 1 66 2.708050 2.708050 244 +simul 1 66 2.708050 2.708050 255 +august 1 66 2.708050 2.708050 257 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +copi 1 63 2.772589 2.772589 284 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +locat 1 59 2.833213 2.833213 303 +semest 1 58 2.890372 2.890372 312 +processor 1 54 2.944439 2.944439 335 +februari 1 54 2.944439 2.944439 328 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +textbook 1 44 3.135494 3.135494 397 +fridai 1 44 3.135494 3.135494 390 +protocol 1 45 3.135494 3.135494 407 +made 1 44 3.135494 3.135494 398 +discuss 1 45 3.135494 3.135494 399 +video 1 44 3.135494 3.135494 405 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +submit 1 39 3.258097 3.258097 440 +transact 1 39 3.258097 3.258097 438 +paul 1 38 3.295837 3.295837 471 +cost 1 37 3.332205 3.332205 480 +connect 1 37 3.332205 3.332205 485 +tree 1 36 3.367296 3.367296 492 +multi 1 36 3.367296 3.367296 493 +ofth 1 36 3.367296 3.367296 491 +articl 1 33 3.433987 3.433987 530 +queri 1 33 3.433987 3.433987 524 +concept 1 32 3.465736 3.465736 537 +collabor 1 32 3.465736 3.465736 543 +extend 1 32 3.465736 3.465736 539 +titl 1 31 3.496508 3.496508 556 +domain 1 30 3.555348 3.555348 564 +particip 1 29 3.583519 3.583519 589 +semant 1 29 3.583519 3.583519 587 +framework 1 28 3.610918 3.610918 606 +retriev 1 27 3.637586 3.637586 621 +berkelei 1 26 3.688879 3.688879 657 +reliabl 1 25 3.737670 3.737670 674 +scalabl 1 24 3.761200 3.761200 705 +frame 1 24 3.761200 3.761200 684 +mobil 1 23 3.806662 3.806662 730 +emphasi 1 22 3.850148 3.850148 755 +self 1 22 3.850148 3.850148 761 +color 1 22 3.850148 3.850148 762 +rout 1 21 3.912023 3.912023 793 +scheme 1 20 3.951244 3.951244 818 +prerequisit 1 19 4.007333 4.007333 846 +boston 1 19 4.007333 4.007333 862 +anderson 1 19 4.007333 4.007333 860 +media 1 19 4.007333 4.007333 861 +predict 1 19 4.007333 4.007333 855 +stand 1 18 4.060443 4.060443 891 +letter 1 16 4.174387 4.174387 981 +vector 1 16 4.174387 4.174387 961 +diego 1 16 4.174387 4.174387 992 +zhang 1 16 4.174387 4.174387 980 +latenc 1 16 4.174387 4.174387 993 +hierarch 1 15 4.248495 4.248495 1018 +francisco 1 14 4.317488 4.317488 1095 +audio 1 14 4.317488 4.317488 1094 +gupta 1 12 4.465908 4.465908 1241 +replic 1 12 4.465908 4.465908 1231 +shape 1 12 4.465908 4.465908 1245 +clock 1 11 4.553877 4.553877 1320 +packet 1 10 4.653960 4.653960 1415 +queue 1 10 4.653960 4.653960 1386 +sosp 1 10 4.653960 4.653960 1416 +familiar 1 9 4.753590 4.753590 1485 +inter 1 9 4.753590 4.753590 1530 +distanc 1 9 4.753590 4.753590 1500 +light 1 9 4.753590 4.753590 1533 +osdi 1 9 4.753590 4.753590 1534 +face 1 9 4.753590 4.753590 1501 +transport 1 8 4.875197 4.875197 1672 +parti 1 8 4.875197 4.875197 1676 +paradigm 1 8 4.875197 4.875197 1662 +textur 1 8 4.875197 4.875197 1677 +harrick 1 7 5.010635 5.010635 1849 +conferenc 1 7 5.010635 5.010635 1857 +core 1 7 5.010635 5.010635 1809 +channel 1 7 5.010635 5.010635 1836 +determinist 1 6 5.164786 5.164786 2034 +onoper 1 6 5.164786 5.164786 2048 +multicast 1 5 5.347108 5.347108 2305 +sigcomm 1 5 5.347108 5.347108 2329 +fair 1 5 5.347108 5.347108 2333 +consum 1 5 5.347108 5.347108 2334 +jain 1 5 5.347108 5.347108 2332 +multimediasystem 1 4 5.568345 5.568345 2701 +venkat 1 4 5.568345 5.568345 2702 +andevalu 1 4 5.568345 5.568345 2706 +floyd 1 4 5.568345 5.568345 2682 +theacm 1 4 5.568345 5.568345 2698 +ofinform 1 4 5.568345 5.568345 2707 +bach 1 4 5.568345 5.568345 2708 +synopsi 1 3 5.857933 5.857933 3260 +networkprotocol 1 3 5.857933 5.857933 3285 +requirementsstud 1 3 5.857933 5.857933 3116 +aswel 1 3 5.857933 5.857933 3286 +mccann 1 3 5.857933 5.857933 3273 +kistler 1 3 5.857933 5.857933 3267 +rangan 1 3 5.857933 5.857933 3270 +cheriton 1 3 5.857933 5.857933 3259 +franci 1 3 5.857933 5.857933 3287 +singhal 1 3 5.857933 5.857933 3098 +axiomat 1 3 5.857933 5.857933 3288 +how 1 3 5.857933 5.857933 3289 +moran 1 3 5.857933 5.857933 3151 +nguyen 1 3 5.857933 5.857933 3290 +infocom 1 3 5.857933 5.857933 3283 +nearbi 1 3 5.857933 5.857933 3291 +weihl 1 3 5.857933 5.857933 3284 +ofoper 1 3 5.857933 5.857933 3292 +proport 1 3 5.857933 5.857933 3293 +qbic 1 3 5.857933 5.857933 3294 +prerequisitesgradu 1 2 6.263398 6.263398 4325 +formultimedia 1 2 6.263398 6.263398 4352 +andresearch 1 2 6.263398 6.263398 4247 +theinstructor 1 2 6.263398 6.263398 4298 +studentsenrol 1 2 6.263398 6.263398 4073 +jacobson 1 2 6.263398 6.263398 4324 +acmmultimedia 1 2 6.263398 6.263398 4338 +redel 1 2 6.263398 6.263398 4358 +zellweg 1 2 6.263398 6.263398 4353 +swinehart 1 2 6.263398 6.263398 4354 +etherphon 1 2 6.263398 6.263398 4355 +deer 1 2 6.263398 6.263398 4356 +lan 1 2 6.263398 6.263398 4359 +computersystem 1 2 6.263398 6.263398 4360 +mbone 1 2 6.263398 6.263398 4361 +shenker 1 2 6.263398 6.263398 4340 +acmsigcomm 1 2 6.263398 6.263398 4345 +resourcemanag 1 2 6.263398 6.263398 4266 +govindan 1 2 6.263398 6.263398 4350 +forcontinu 1 2 6.263398 6.263398 4351 +monterei 1 2 6.263398 6.263398 4362 +jeffai 1 2 6.263398 6.263398 4357 +timeoper 1 2 6.263398 6.263398 4363 +niblack 1 2 6.263398 6.263398 4364 +managementsystem 1 2 6.263398 6.263398 4365 +knowledgeand 1 2 6.263398 6.263398 4366 +onveri 1 2 6.263398 6.263398 4367 +synopsisthi 1 1 6.957497 6.957497 7583 +bediscuss 1 1 6.957497 6.957497 7584 +andmultimedia 1 1 6.957497 6.957497 7585 +multimediadatabas 1 1 6.957497 6.957497 7586 +determinedbas 1 1 6.957497 6.957497 7587 +orcarri 1 1 6.957497 6.957497 7588 +hoursfridai 1 1 6.957497 6.957497 7589 +flexibleframework 1 1 6.957497 6.957497 7590 +handlei 1 1 6.957497 6.957497 7591 +wakeman 1 1 6.957497 6.957497 7592 +crowcroft 1 1 6.957497 6.957497 7581 +controlchannel 1 1 6.957497 6.957497 7593 +cccp 1 1 6.957497 6.957497 7594 +conferencecontrol 1 1 6.957497 6.957497 7595 +gajewska 1 1 6.957497 6.957497 7596 +manass 1 1 6.957497 6.957497 7597 +argo 1 1 6.957497 6.957497 7598 +systemfor 1 1 6.957497 6.957497 7599 +gong 1 1 6.957497 6.957497 7600 +multipoint 1 1 6.957497 6.957497 7601 +basedmultimedia 1 1 6.957497 6.957497 7602 +ieeecomput 1 1 6.957497 6.957497 7603 +datagraminternetwork 1 1 6.957497 6.957497 7604 +ballardi 1 1 6.957497 6.957497 7605 +thyagarajan 1 1 6.957497 6.957497 7606 +widyono 1 1 6.957497 6.957497 7607 +msthesi 1 1 6.957497 6.957497 7608 +kompella 1 1 6.957497 6.957497 7609 +pasqual 1 1 6.957497 6.957497 7610 +polyzo 1 1 6.957497 6.957497 7611 +multimediacommun 1 1 6.957497 6.957497 7612 +weightsess 1 1 6.957497 6.957497 7613 +ofacm 1 1 6.957497 6.957497 7614 +holbrook 1 1 6.957497 6.957497 7615 +fordistribut 1 1 6.957497 6.957497 7616 +herzog 1 1 6.957497 6.957497 7617 +estrin 1 1 6.957497 6.957497 7618 +timecommun 1 1 6.957497 6.957497 7619 +servicesj 1 1 6.957497 6.957497 7620 +guyton 1 1 6.957497 6.957497 7621 +schwartz 1 1 6.957497 6.957497 7622 +mogul 1 1 6.957497 6.957497 7623 +forpersist 1 1 6.957497 6.957497 7624 +supportc 1 1 6.957497 6.957497 7625 +warldersburg 1 1 6.957497 6.957497 7582 +lotteri 1 1 6.957497 6.957497 7626 +flexibleproport 1 1 6.957497 6.957497 7627 +mangement 1 1 6.957497 6.957497 7628 +strideschedul 1 1 6.957497 6.957497 7629 +golestani 1 1 6.957497 6.957497 7630 +speedappl 1 1 6.957497 6.957497 7631 +timeproduc 1 1 6.957497 6.957497 7632 +ofeffici 1 1 6.957497 6.957497 7633 +sigapp 1 1 6.957497 6.957497 7634 +intim 1 1 6.957497 6.957497 7635 +databasesw 1 1 6.957497 6.957497 7636 +contentus 1 1 6.957497 6.957497 7637 +cawkel 1 1 6.957497 6.957497 7638 +weymouth 1 1 6.957497 6.957497 7639 +vimsi 1 1 6.957497 6.957497 7640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..2b4ace8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +number 1 130 2.079442 2.079442 97 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +well 1 109 2.197225 2.197225 121 +structur 1 106 2.197225 2.197225 105 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +question 1 91 2.397895 2.397895 141 +exam 1 86 2.484907 2.484907 169 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +refer 1 78 2.564949 2.564949 203 +decemb 1 80 2.564949 2.564949 215 +mondai 1 77 2.564949 2.564949 206 +receiv 1 66 2.708050 2.708050 244 +differ 1 66 2.708050 2.708050 253 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +think 1 57 2.890372 2.890372 314 +instruct 1 53 2.944439 2.944439 332 +case 1 51 2.995732 2.995732 351 +set 1 50 3.044522 3.044522 361 +right 1 48 3.044522 3.044522 363 +answer 1 45 3.135494 3.135494 391 +fridai 1 44 3.135494 3.135494 390 +littl 1 39 3.258097 3.258097 454 +respons 1 37 3.332205 3.332205 476 +word 1 34 3.401197 3.401197 508 +either 1 35 3.401197 3.401197 506 +chapter 1 32 3.465736 3.465736 536 +quot 1 29 3.583519 3.583519 582 +known 1 24 3.761200 3.761200 702 +size 1 23 3.806662 3.806662 713 +inth 1 22 3.850148 3.850148 741 +sent 1 22 3.850148 3.850148 763 +cycl 1 11 4.553877 4.553877 1335 +sentenc 1 10 4.653960 4.653960 1413 +total 1 10 4.653960 4.653960 1398 +label 1 10 4.653960 4.653960 1423 +true 1 10 4.653960 4.653960 1422 +equal 1 10 4.653960 4.653960 1424 +pose 1 9 4.753590 4.753590 1535 +face 1 9 4.753590 4.753590 1501 +entri 1 8 4.875197 4.875197 1678 +largest 1 7 5.010635 5.010635 1858 +meant 1 6 5.164786 5.164786 2055 +worst 1 5 5.347108 5.347108 2287 +vertic 1 5 5.347108 5.347108 2270 +vijaya 1 4 5.568345 5.568345 2677 +disjoint 1 4 5.568345 5.568345 2709 +denot 1 3 5.857933 5.857933 3147 +sigma 1 2 6.263398 6.263398 4369 +amort 1 2 6.263398 6.263398 4370 +omega 1 2 6.263398 6.263398 4368 +ramachandranuniqu 1 1 6.957497 6.957497 7642 +onsigma 1 1 6.957497 6.957497 7643 +oroth 1 1 6.957497 6.957497 7644 +isther 1 1 6.957497 6.957497 7645 +paragraphof 1 1 6.957497 6.957497 7646 +containdistinct 1 1 6.957497 6.957497 7647 +cancontain 1 1 6.957497 6.957497 7648 +unclear 1 1 6.957497 6.957497 7649 +submatrix 1 1 6.957497 6.957497 7641 +somek 1 1 6.957497 6.957497 7650 +bepost 1 1 6.957497 6.957497 7651 +youhav 1 1 6.957497 6.957497 7652 +yourbest 1 1 6.957497 6.957497 7653 +judgment 1 1 6.957497 6.957497 7654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..2e78ea79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,228 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +find 1 111 2.197225 2.197225 111 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +pictur 1 89 2.397895 2.397895 160 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +stuff 1 87 2.484907 2.484907 171 +help 1 83 2.484907 2.484907 175 +second 1 81 2.484907 2.484907 166 +solut 1 82 2.484907 2.484907 162 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +homework 1 79 2.564949 2.564949 193 +orient 1 80 2.564949 2.564949 205 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +logic 1 71 2.639057 2.639057 230 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +interact 1 62 2.772589 2.772589 270 +simpl 1 60 2.833213 2.833213 298 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +reason 1 57 2.890372 2.890372 318 +sever 1 56 2.890372 2.890372 322 +unix 1 58 2.890372 2.890372 308 +variou 1 56 2.890372 2.890372 317 +suggest 1 53 2.944439 2.944439 331 +allow 1 53 2.944439 2.944439 333 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +run 1 51 2.995732 2.995732 347 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +featur 1 46 3.091042 3.091042 386 +get 1 46 3.091042 3.091042 380 +describ 1 45 3.135494 3.135494 400 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +mark 1 44 3.135494 3.135494 403 +third 1 43 3.178054 3.178054 412 +show 1 43 3.178054 3.178054 417 +past 1 42 3.218876 3.218876 428 +might 1 41 3.218876 3.218876 426 +tutori 1 39 3.258097 3.258097 437 +error 1 40 3.258097 3.258097 449 +author 1 39 3.258097 3.258097 450 +littl 1 39 3.258097 3.258097 454 +correct 1 38 3.295837 3.295837 462 +especi 1 36 3.367296 3.367296 496 +procedur 1 36 3.367296 3.367296 488 +chapter 1 32 3.465736 3.465736 536 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +anim 1 31 3.496508 3.496508 557 +someth 1 31 3.496508 3.496508 554 +abl 1 30 3.555348 3.555348 566 +actual 1 28 3.610918 3.610918 604 +linux 1 27 3.637586 3.637586 631 +subject 1 26 3.688879 3.688879 647 +rather 1 26 3.688879 3.688879 642 +comp 1 26 3.688879 3.688879 650 +brows 1 23 3.806662 3.806662 726 +sort 1 22 3.850148 3.850148 738 +self 1 22 3.850148 3.850148 761 +instal 1 22 3.850148 3.850148 754 +recommend 1 22 3.850148 3.850148 737 +theorem 1 21 3.912023 3.912023 786 +newsgroup 1 21 3.912023 3.912023 783 +sure 1 20 3.951244 3.951244 813 +scheme 1 20 3.951244 3.951244 818 +definit 1 19 4.007333 4.007333 864 +along 1 18 4.060443 4.060443 878 +lot 1 18 4.060443 4.060443 889 +regular 1 17 4.110874 4.110874 929 +repositori 1 17 4.110874 4.110874 932 +quiz 1 16 4.174387 4.174387 990 +explan 1 16 4.174387 4.174387 985 +advantag 1 16 4.174387 4.174387 987 +pagec 1 15 4.248495 4.248495 1011 +later 1 15 4.248495 4.248495 1043 +goe 1 15 4.248495 4.248495 1044 +convent 1 14 4.317488 4.317488 1072 +draw 1 14 4.317488 4.317488 1086 +command 1 14 4.317488 4.317488 1083 +essenti 1 13 4.382027 4.382027 1137 +prolog 1 13 4.382027 4.382027 1155 +reader 1 12 4.465908 4.465908 1246 +calculu 1 12 4.465908 4.465908 1203 +solari 1 12 4.465908 4.465908 1238 +see 1 11 4.553877 4.553877 1337 +instanc 1 11 4.553877 4.553877 1322 +rice 1 11 4.553877 4.553877 1336 +subset 1 10 4.653960 4.653960 1425 +wilson 1 9 4.753590 4.753590 1536 +classifi 1 9 4.753590 4.753590 1537 +latter 1 9 4.753590 4.753590 1522 +reload 1 8 4.875197 4.875197 1682 +prover 1 8 4.875197 4.875197 1653 +illustr 1 8 4.875197 4.875197 1679 +marc 1 8 4.875197 4.875197 1680 +besid 1 8 4.875197 4.875197 1681 +exactli 1 7 5.010635 5.010635 1817 +merg 1 7 5.010635 5.010635 1862 +sparc 1 7 5.010635 5.010635 1860 +feelei 1 7 5.010635 5.010635 1859 +bunch 1 7 5.010635 5.010635 1861 +grammar 1 6 5.164786 5.164786 2058 +whichi 1 6 5.164786 5.164786 2056 +indiana 1 6 5.164786 5.164786 2057 +pagethi 1 5 5.347108 5.347108 2336 +button 1 5 5.347108 5.347108 2337 +ahead 1 5 5.347108 5.347108 2338 +hardcopi 1 5 5.347108 5.347108 2246 +proposit 1 5 5.347108 5.347108 2339 +default 1 5 5.347108 5.347108 2335 +lang 1 5 5.347108 5.347108 2294 +backward 1 4 5.568345 5.568345 2638 +chain 1 4 5.568345 5.568345 2712 +patch 1 4 5.568345 5.568345 2710 +devot 1 4 5.568345 5.568345 2711 +outof 1 3 5.857933 5.857933 3296 +ters 1 3 5.857933 5.857933 3297 +theoremprov 1 3 5.857933 5.857933 3298 +inherit 1 3 5.857933 5.857933 3122 +rscheme 1 3 5.857933 5.857933 3250 +qing 1 3 5.857933 5.857933 3295 +gambit 1 3 5.857933 5.857933 3227 +indent 1 2 6.263398 6.263398 4374 +subtyp 1 2 6.263398 6.263398 4375 +donovan 1 2 6.263398 6.263398 4371 +kolbl 1 2 6.263398 6.263398 4372 +youcan 1 2 6.263398 6.263398 4373 +paulwilson 1 1 6.957497 6.957497 7670 +yourbrows 1 1 6.957497 6.957497 7671 +mostrec 1 1 6.957497 6.957497 7672 +ondeclar 1 1 6.957497 6.957497 7673 +arereason 1 1 6.957497 6.957497 7674 +willchang 1 1 6.957497 6.957497 7675 +islik 1 1 6.957497 6.957497 7676 +adventur 1 1 6.957497 6.957497 7677 +usinga 1 1 6.957497 6.957497 7678 +throughchapt 1 1 6.957497 6.957497 7679 +sanoth 1 1 6.957497 6.957497 7680 +thanprint 1 1 6.957497 6.957497 7681 +weget 1 1 6.957497 6.957497 7682 +onlinebrows 1 1 6.957497 6.957497 7683 +coursenot 1 1 6.957497 6.957497 7684 +miscellanousfunct 1 1 6.957497 6.957497 7685 +shouldconsult 1 1 6.957497 6.957497 7686 +itsens 1 1 6.957497 6.957497 7687 +andnot 1 1 6.957497 6.957497 7688 +setofrul 1 1 6.957497 6.957497 7689 +ofanim 1 1 6.957497 6.957497 7690 +simpleobject 1 1 6.957497 6.957497 7691 +metaclass 1 1 6.957497 6.957497 7692 +circular 1 1 6.957497 6.957497 7693 +onclass 1 1 6.957497 6.957497 7694 +runschem 1 1 6.957497 6.957497 7656 +orani 1 1 6.957497 6.957497 7657 +andinstal 1 1 6.957497 6.957497 7658 +itfrom 1 1 6.957497 6.957497 7659 +friendlier 1 1 6.957497 6.957497 7660 +fornewbi 1 1 6.957497 6.957497 7661 +gettinggambit 1 1 6.957497 6.957497 7662 +bestschem 1 1 6.957497 6.957497 7663 +guil 1 1 6.957497 6.957497 7664 +mzscheme 1 1 6.957497 6.957497 7665 +meroon 1 1 6.957497 6.957497 7655 +doingobject 1 1 6.957497 6.957497 7666 +tous 1 1 6.957497 6.957497 7667 +freeimplement 1 1 6.957497 6.957497 7668 +getinterest 1 1 6.957497 6.957497 7669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..0984fa38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +texa 1 160 1.791759 1.791759 64 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +sourc 1 77 2.564949 2.564949 201 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +test 1 66 2.708050 2.708050 252 +view 1 70 2.708050 2.708050 254 +thursdai 1 70 2.708050 2.708050 241 +guid 1 63 2.772589 2.772589 267 +new 1 64 2.772589 2.772589 262 +locat 1 59 2.833213 2.833213 303 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +tabl 1 51 2.995732 2.995732 346 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +possibl 1 47 3.091042 3.091042 378 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +made 1 44 3.135494 3.135494 398 +review 1 42 3.218876 3.218876 425 +announc 1 40 3.258097 3.258097 441 +realli 1 40 3.258097 3.258097 444 +slide 1 38 3.295837 3.295837 467 +sciencesunivers 1 37 3.332205 3.332205 486 +download 1 36 3.367296 3.367296 489 +print 1 34 3.401197 3.401197 503 +taught 1 33 3.433987 3.433987 526 +linux 1 27 3.637586 3.637586 631 +session 1 26 3.688879 3.688879 643 +half 1 21 3.912023 3.912023 776 +wang 1 21 3.912023 3.912023 790 +exercis 1 19 4.007333 4.007333 842 +adam 1 17 4.110874 4.110874 934 +weekli 1 17 4.110874 4.110874 919 +modif 1 17 4.110874 4.110874 913 +georg 1 16 4.174387 4.174387 994 +score 1 15 4.248495 4.248495 1017 +station 1 13 4.382027 4.382027 1157 +kumar 1 9 4.753590 4.753590 1506 +surpris 1 7 5.010635 5.010635 1828 +tip 1 7 5.010635 5.010635 1863 +ajit 1 3 5.857933 5.857933 3299 +feng 1 3 5.857933 5.857933 3300 +warren 1 3 5.857933 5.857933 3301 +edudepart 1 3 5.857933 5.857933 3302 +natarajan 1 2 6.263398 6.263398 4377 +xfeng 1 2 6.263398 6.263398 4376 +xunnow 1 1 6.957497 6.957497 7698 +homeworksreview 1 1 6.957497 6.957497 7699 +slidesth 1 1 6.957497 6.957497 7700 +onlineif 1 1 6.957497 6.957497 7701 +updatedhomework 1 1 6.957497 6.957497 7702 +filemidterm 1 1 6.957497 6.957497 7703 +webta 1 1 6.957497 6.957497 7704 +timetableta 1 1 6.957497 6.957497 7705 +guana 1 1 6.957497 6.957497 7706 +eduxun 1 1 6.957497 6.957497 7707 +wordlist 1 1 6.957497 6.957497 7708 +wwang 1 1 6.957497 6.957497 7709 +afternoon 1 1 6.957497 6.957497 7710 +decimalinteg 1 1 6.957497 6.957497 7695 +hexinteg 1 1 6.957497 6.957497 7696 +octalinteg 1 1 6.957497 6.957497 7697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..a678ca5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +contact 1 153 1.791759 1.791759 59 +architectur 1 139 1.945910 1.945910 77 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +welcom 1 122 2.079442 2.079442 99 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +solut 1 82 2.484907 2.484907 162 +resourc 1 81 2.484907 2.484907 172 +syllabu 1 67 2.708050 2.708050 247 +august 1 66 2.708050 2.708050 257 +new 1 64 2.772589 2.772589 262 +creat 1 63 2.772589 2.772589 277 +print 1 34 3.401197 3.401197 503 +statist 1 35 3.401197 3.401197 521 +station 1 13 4.382027 4.382027 1157 +classmat 1 9 4.753590 4.753590 1516 +yang 1 8 4.875197 4.875197 1652 +pagei 1 8 4.875197 4.875197 1683 +herb 1 1 6.957497 6.957497 7714 +schwetman 1 1 6.957497 6.957497 7711 +mesquit 1 1 6.957497 6.957497 7712 +appointmentcontact 1 1 6.957497 6.957497 7715 +yangyang 1 1 6.957497 6.957497 7713 +statisticsassign 1 1 6.957497 6.957497 7716 +asga 1 1 6.957497 6.957497 7717 +statisticsyour 1 1 6.957497 6.957497 7718 +gradesect 1 1 6.957497 6.957497 7719 +microsparc 1 1 6.957497 6.957497 7720 +datasheetonlin 1 1 6.957497 6.957497 7721 +ruiliu 1 1 6.957497 6.957497 7722 +postmessag 1 1 6.957497 6.957497 7723 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..5762eb59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +schedul 1 119 2.079442 2.079442 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +sampl 1 53 2.944439 2.944439 339 +protocol 1 45 3.135494 3.135494 407 +http 1 41 3.218876 3.218876 420 +tutori 1 39 3.258097 3.258097 437 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +comp 1 26 3.688879 3.688879 650 +background 1 25 3.737670 3.737670 664 +mobil 1 23 3.806662 3.806662 730 +newsgroup 1 21 3.912023 3.912023 783 +rout 1 21 3.912023 3.912023 793 +alloc 1 20 3.951244 3.951244 821 +prerequisit 1 19 4.007333 4.007333 846 +configur 1 15 4.248495 4.248495 1012 +draft 1 14 4.317488 4.317488 1085 +station 1 13 4.382027 4.382027 1157 +individu 1 13 4.382027 4.382027 1126 +ring 1 8 4.875197 4.875197 1684 +digest 1 7 5.010635 5.010635 1864 +multicast 1 5 5.347108 5.347108 2305 +authent 1 5 5.347108 5.347108 2306 +edufing 1 4 5.568345 5.568345 2713 +csnet 1 1 6.957497 6.957497 7724 +wensdai 1 1 6.957497 6.957497 7725 +netsim 1 1 6.957497 6.957497 7726 +corejava 1 1 6.957497 6.957497 7727 +fengyufeng 1 1 6.957497 6.957497 7728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..6c70ce33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +topic 1 114 2.197225 2.197225 110 +techniqu 1 99 2.302585 2.302585 138 +follow 1 92 2.397895 2.397895 143 +help 1 83 2.484907 2.484907 175 +wide 1 84 2.484907 2.484907 185 +chang 1 82 2.484907 2.484907 163 +resourc 1 81 2.484907 2.484907 172 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +html 1 75 2.639057 2.639057 235 +syllabu 1 67 2.708050 2.708050 247 +degre 1 69 2.708050 2.708050 259 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +room 1 59 2.833213 2.833213 301 +colleg 1 61 2.833213 2.833213 300 +locat 1 59 2.833213 2.833213 303 +visitor 1 49 3.044522 3.044522 371 +basic 1 50 3.044522 3.044522 360 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +offer 1 43 3.178054 3.178054 414 +announc 1 40 3.258097 3.258097 441 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +art 1 29 3.583519 3.583519 593 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +accur 1 25 3.737670 3.737670 680 +item 1 19 4.007333 4.007333 856 +hypertext 1 19 4.007333 4.007333 865 +offici 1 18 4.060443 4.060443 894 +charact 1 15 4.248495 4.248495 1028 +reprint 1 14 4.317488 4.317488 1097 +hypermedia 1 12 4.465908 4.465908 1247 +duli 1 12 4.465908 4.465908 1248 +regularli 1 11 4.553877 4.553877 1338 +nonprofit 1 11 4.553877 4.553877 1339 +mosaic 1 10 4.653960 4.653960 1426 +engr 1 10 4.653960 4.653960 1427 +weld 1 9 4.753590 4.753590 1538 +departmentof 1 9 4.753590 4.753590 1539 +uniform 1 7 5.010635 5.010635 1845 +markup 1 6 5.164786 5.164786 2059 +highlight 1 5 5.347108 5.347108 2340 +foracadem 1 5 5.347108 5.347108 2341 +whichcontain 1 4 5.568345 5.568345 2714 +bounti 1 4 5.568345 5.568345 2715 +mathematica 1 3 5.857933 5.857933 3303 +quotedand 1 3 5.857933 5.857933 3304 +quarterwelcom 1 2 6.263398 6.263398 4378 +thatthi 1 2 6.263398 6.263398 4379 +addedfrequ 1 2 6.263398 6.263398 4380 +personnel 1 2 6.263398 6.263398 4381 +mvi 1 2 6.263398 6.263398 4382 +usinglynx 1 2 6.263398 6.263398 4383 +pageclick 1 1 6.957497 6.957497 7729 +gradesoth 1 1 6.957497 6.957497 7730 +browserport 1 1 6.957497 6.957497 7731 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..6f4aa91f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +activ 1 84 2.484907 2.484907 182 +info 1 85 2.484907 2.484907 176 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +onlin 1 75 2.639057 2.639057 223 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +guid 1 63 2.772589 2.772589 267 +previou 1 62 2.772589 2.772589 290 +colleg 1 61 2.833213 2.833213 300 +think 1 57 2.890372 2.890372 314 +special 1 56 2.890372 2.890372 320 +summer 1 56 2.890372 2.890372 311 +major 1 56 2.890372 2.890372 315 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +particular 1 51 2.995732 2.995732 352 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +midterm 1 45 3.135494 3.135494 392 +netscap 1 44 3.135494 3.135494 395 +textbook 1 44 3.135494 3.135494 397 +offer 1 43 3.178054 3.178054 414 +might 1 41 3.218876 3.218876 426 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +staff 1 36 3.367296 3.367296 490 +winter 1 36 3.367296 3.367296 500 +copyright 1 36 3.367296 3.367296 495 +word 1 34 3.401197 3.401197 508 +board 1 33 3.433987 3.433987 528 +ad 1 32 3.465736 3.465736 544 +autumn 1 31 3.496508 3.496508 558 +art 1 29 3.583519 3.583519 593 +consid 1 29 3.583519 3.583519 590 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +miscellan 1 23 3.806662 3.806662 731 +martin 1 21 3.912023 3.912023 794 +item 1 19 4.007333 4.007333 856 +demo 1 18 4.060443 4.060443 888 +less 1 18 4.060443 4.060443 892 +previous 1 17 4.110874 4.110874 923 +whole 1 17 4.110874 4.110874 940 +match 1 16 4.174387 4.174387 965 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +earlier 1 13 4.382027 4.382027 1140 +menu 1 13 4.382027 4.382027 1156 +hypermedia 1 12 4.465908 4.465908 1247 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +regularli 1 11 4.553877 4.553877 1338 +instanc 1 11 4.553877 4.553877 1322 +nonprofit 1 11 4.553877 4.553877 1339 +engr 1 10 4.653960 4.653960 1427 +debugg 1 9 4.753590 4.753590 1493 +departmentof 1 9 4.753590 4.753590 1539 +documentfor 1 7 5.010635 5.010635 1865 +tip 1 7 5.010635 5.010635 1863 +theclass 1 6 5.164786 5.164786 2060 +handbook 1 6 5.164786 5.164786 2061 +newinform 1 5 5.347108 5.347108 2342 +highlight 1 5 5.347108 5.347108 2340 +mac 1 5 5.347108 5.347108 2292 +bulletin 1 5 5.347108 5.347108 2343 +foracadem 1 5 5.347108 5.347108 2341 +bounti 1 4 5.568345 5.568345 2715 +insensit 1 4 5.568345 5.568345 2716 +tompa 1 3 5.857933 5.857933 3305 +preview 1 3 5.857933 5.857933 3306 +quotedand 1 3 5.857933 5.857933 3304 +raini 1 2 6.263398 6.263398 4384 +intact 1 2 6.263398 6.263398 4385 +nonmajor 1 2 6.263398 6.263398 4386 +itemsund 1 2 6.263398 6.263398 4387 +balloon 1 2 6.263398 6.263398 4388 +dugan 1 1 6.957497 6.957497 7732 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..52f5b282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +comment 1 93 2.397895 2.397895 146 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +guid 1 63 2.772589 2.772589 267 +colleg 1 61 2.833213 2.833213 300 +summer 1 56 2.890372 2.890372 311 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +week 1 52 2.995732 2.995732 343 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +textbook 1 44 3.135494 3.135494 397 +examin 1 42 3.218876 3.218876 424 +might 1 41 3.218876 3.218876 426 +tutori 1 39 3.258097 3.258097 437 +slide 1 38 3.295837 3.295837 467 +short 1 36 3.367296 3.367296 499 +staff 1 36 3.367296 3.367296 490 +winter 1 36 3.367296 3.367296 500 +kind 1 32 3.465736 3.465736 541 +autumn 1 31 3.496508 3.496508 558 +richard 1 31 3.496508 3.496508 559 +art 1 29 3.583519 3.583519 593 +notic 1 25 3.737670 3.737670 675 +lab 1 24 3.761200 3.761200 698 +brows 1 23 3.806662 3.806662 726 +sort 1 22 3.850148 3.850148 738 +martin 1 21 3.912023 3.912023 794 +watch 1 21 3.912023 3.912023 789 +item 1 19 4.007333 4.007333 856 +demo 1 18 4.060443 4.060443 888 +less 1 18 4.060443 4.060443 892 +macintosh 1 17 4.110874 4.110874 920 +previous 1 17 4.110874 4.110874 923 +webmast 1 15 4.248495 4.248495 1045 +earlier 1 13 4.382027 4.382027 1140 +web 1 12 4.465908 4.465908 1249 +engr 1 10 4.653960 4.653960 1427 +invit 1 10 4.653960 4.653960 1428 +informationabout 1 9 4.753590 4.753590 1515 +andth 1 9 4.753590 4.753590 1481 +tip 1 7 5.010635 5.010635 1863 +ladner 1 6 5.164786 5.164786 2062 +highlight 1 5 5.347108 5.347108 2340 +bounti 1 4 5.568345 5.568345 2715 +moreinform 1 3 5.857933 5.857933 3307 +dickei 1 2 6.263398 6.263398 4389 +nonmajor 1 2 6.263398 6.263398 4386 +hypermediadocu 1 1 6.957497 6.957497 7733 +schedulesth 1 1 6.957497 6.957497 7734 +glanceweek 1 1 6.957497 6.957497 7735 +schedulecomput 1 1 6.957497 6.957497 7736 +includinglab 1 1 6.957497 6.957497 7737 +andta 1 1 6.957497 6.957497 7738 +audiofrom 1 1 6.957497 6.957497 7739 +midtermand 1 1 6.957497 6.957497 7740 +originallyschedul 1 1 6.957497 6.957497 7741 +andtim 1 1 6.957497 6.957497 7742 +usingth 1 1 6.957497 6.957497 7743 +intactand 1 1 6.957497 6.957497 7744 +forinst 1 1 6.957497 6.957497 7745 +andrel 1 1 6.957497 6.957497 7746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..8dfced73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +seattl 1 120 2.079442 2.079442 103 +structur 1 106 2.197225 2.197225 105 +teach 1 108 2.197225 2.197225 112 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +novemb 1 81 2.484907 2.484907 179 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +previou 1 62 2.772589 2.772589 290 +browser 1 56 2.890372 2.890372 313 +sampl 1 53 2.944439 2.944439 339 +appoint 1 49 3.044522 3.044522 358 +set 1 50 3.044522 3.044522 361 +midterm 1 45 3.135494 3.135494 392 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +defin 1 22 3.850148 3.850148 746 +edulast 1 17 4.110874 4.110874 927 +discret 1 13 4.382027 4.382027 1165 +johnson 1 13 4.382027 4.382027 1162 +recurs 1 13 4.382027 4.382027 1127 +loew 1 12 4.465908 4.465908 1252 +reader 1 12 4.465908 4.465908 1246 +web 1 12 4.465908 4.465908 1249 +induct 1 11 4.553877 4.553877 1304 +leveson 1 9 4.753590 4.753590 1540 +acrobat 1 6 5.164786 5.164786 2063 +beam 1 5 5.347108 5.347108 2344 +karp 1 5 5.347108 5.347108 2284 +ruzzo 1 5 5.347108 5.347108 2345 +nowitz 1 2 6.263398 6.263398 4390 +ofyour 1 2 6.263398 6.263398 4063 +instructorpaul 1 1 6.957497 6.957497 7747 +edulectur 1 1 6.957497 6.957497 7748 +assistantjonathan 1 1 6.957497 6.957497 7749 +edusect 1 1 6.957497 6.957497 7750 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..0d6369d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +includ 1 208 1.609438 1.609438 42 +read 1 154 1.791759 1.791759 47 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +mani 1 92 2.397895 2.397895 150 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +state 1 76 2.564949 2.564949 207 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +three 1 54 2.944439 2.944439 330 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +quarter 1 47 3.091042 3.091042 389 +midterm 1 45 3.135494 3.135494 392 +math 1 44 3.135494 3.135494 402 +review 1 42 3.218876 3.218876 425 +origin 1 38 3.295837 3.295837 472 +formal 1 37 3.332205 3.332205 478 +winter 1 36 3.367296 3.367296 500 +express 1 32 3.465736 3.465736 540 +richard 1 31 3.496508 3.496508 559 +autumn 1 31 3.496508 3.496508 558 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +handl 1 24 3.761200 3.761200 685 +proof 1 23 3.806662 3.806662 720 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +regular 1 17 4.110874 4.110874 929 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +web 1 12 4.465908 4.465908 1249 +extra 1 11 4.553877 4.553877 1312 +regard 1 11 4.553877 4.553877 1309 +notat 1 9 4.753590 4.753590 1489 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +grammar 1 6 5.164786 5.164786 2058 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +ladner 1 6 5.164786 5.164786 2062 +pars 1 5 5.347108 5.347108 2321 +diagram 1 5 5.347108 5.347108 2346 +latexhtml 1 5 5.347108 5.347108 2347 +rambl 1 3 5.857933 5.857933 3308 +ladnerclass 1 1 6.957497 6.957497 7751 +construc 1 1 6.957497 6.957497 7752 +halt 1 1 6.957497 6.957497 7753 +undecidableexam 1 1 6.957497 6.957497 7754 +edufix 1 1 6.957497 6.957497 7755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..f6bc0e8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +model 1 145 1.945910 1.945910 69 +lectur 1 135 1.945910 1.945910 73 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +handout 1 64 2.772589 2.772589 263 +previou 1 62 2.772589 2.772589 290 +septemb 1 65 2.772589 2.772589 274 +content 1 59 2.833213 2.833213 302 +set 1 50 3.044522 3.044522 361 +pointer 1 48 3.044522 3.044522 368 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +formal 1 37 3.332205 3.332205 478 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +sent 1 22 3.850148 3.850148 763 +log 1 19 4.007333 4.007333 857 +intro 1 17 4.110874 4.110874 915 +web 1 12 4.465908 4.465908 1249 +regularli 1 11 4.553877 4.553877 1338 +subscrib 1 9 4.753590 4.753590 1541 +upcom 1 8 4.875197 4.875197 1685 +ann 1 6 5.164786 5.164786 2065 +majordomo 1 6 5.164786 5.164786 2066 +willb 1 5 5.347108 5.347108 2277 +condon 1 3 5.857933 5.857933 3309 +findhomework 1 1 6.957497 6.957497 7756 +userid 1 1 6.957497 6.957497 7757 +edukaye 1 1 6.957497 6.957497 7758 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..7176202f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +spring 1 131 2.079442 2.079442 88 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +messag 1 76 2.564949 2.564949 212 +previou 1 62 2.772589 2.772589 290 +unix 1 58 2.890372 2.890372 308 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +request 1 26 3.688879 3.688879 635 +martin 1 21 3.912023 3.912023 794 +web 1 12 4.465908 4.465908 1249 +tompaclass 1 3 5.857933 5.857933 3310 +informationlab 1 1 6.957497 6.957497 7759 +technot 1 1 6.957497 6.957497 7760 +questionnaireloc 1 1 6.957497 6.957497 7761 +cdeletemin 1 1 6.957497 6.957497 7762 +treeshomework 1 1 6.957497 6.957497 7763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..9b278a74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +seattl 1 120 2.079442 2.079442 103 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +ad 1 32 3.465736 3.465736 544 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +reprint 1 14 4.317488 4.317488 1097 +hypermedia 1 12 4.465908 4.465908 1247 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +departmentof 1 9 4.753590 4.753590 1539 +documentfor 1 7 5.010635 5.010635 1865 +ladner 1 6 5.164786 5.164786 2062 +theclass 1 6 5.164786 5.164786 2060 +newinform 1 5 5.347108 5.347108 2342 +foracadem 1 5 5.347108 5.347108 2341 +quotedand 1 3 5.857933 5.857933 3304 +fasulo 1 2 6.263398 6.263398 4391 +structuresrichard 1 1 6.957497 6.957497 7764 +instructordan 1 1 6.957497 6.957497 7765 +assistantthi 1 1 6.957497 6.957497 7766 +overheadsport 1 1 6.957497 6.957497 7767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..96d4f2e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +sourc 1 77 2.564949 2.564949 201 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +offer 1 43 3.178054 3.178054 414 +purpos 1 37 3.332205 3.332205 481 +winter 1 36 3.367296 3.367296 500 +copyright 1 36 3.367296 3.367296 495 +everi 1 34 3.401197 3.401197 519 +autumn 1 31 3.496508 3.496508 558 +computersci 1 30 3.555348 3.555348 562 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +scienceand 1 5 5.347108 5.347108 2348 +languagesfal 1 2 6.263398 6.263398 4161 +informationth 1 2 6.263398 6.263398 4393 +listinfo 1 2 6.263398 6.263398 4394 +pagehom 1 2 6.263398 6.263398 4395 +ofcs 1 2 6.263398 6.263398 4392 +engineeringport 1 2 6.263398 6.263398 4396 +academicnonprofit 1 2 6.263398 6.263398 4397 +dulycredit 1 2 6.263398 6.263398 4398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..d4dd4645 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +postscript 1 131 2.079442 2.079442 90 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +final 1 116 2.197225 2.197225 108 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +solut 1 82 2.484907 2.484907 162 +build 1 85 2.484907 2.484907 184 +mondai 1 77 2.564949 2.564949 206 +june 1 79 2.564949 2.564949 214 +homework 1 79 2.564949 2.564949 193 +april 1 77 2.564949 2.564949 196 +interfac 1 79 2.564949 2.564949 209 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +new 1 64 2.772589 2.772589 262 +march 1 61 2.833213 2.833213 295 +unix 1 58 2.890372 2.890372 308 +reason 1 57 2.890372 2.890372 318 +sampl 1 53 2.944439 2.944439 339 +suggest 1 53 2.944439 2.944439 331 +quarter 1 47 3.091042 3.091042 389 +electron 1 47 3.091042 3.091042 379 +done 1 47 3.091042 3.091042 381 +adapt 1 46 3.091042 3.091042 387 +netscap 1 44 3.135494 3.135494 395 +offer 1 43 3.178054 3.178054 414 +review 1 42 3.218876 3.218876 425 +hand 1 37 3.332205 3.332205 475 +purpos 1 37 3.332205 3.332205 481 +winter 1 36 3.367296 3.367296 500 +copyright 1 36 3.367296 3.367296 495 +everi 1 34 3.401197 3.401197 519 +eduoffic 1 33 3.433987 3.433987 531 +autumn 1 31 3.496508 3.496508 558 +computersci 1 30 3.555348 3.555348 562 +steve 1 29 3.583519 3.583519 594 +quot 1 29 3.583519 3.583519 582 +full 1 28 3.610918 3.610918 615 +administr 1 27 3.637586 3.637586 628 +pagecs 1 26 3.688879 3.688879 658 +session 1 26 3.688879 3.688879 643 +accur 1 25 3.737670 3.737670 680 +daili 1 24 3.761200 3.761200 706 +newsgroup 1 21 3.912023 3.912023 783 +miss 1 19 4.007333 4.007333 866 +lisp 1 18 4.060443 4.060443 897 +figur 1 18 4.060443 4.060443 903 +partial 1 18 4.060443 4.060443 900 +quiz 1 16 4.174387 4.174387 990 +webmast 1 15 4.248495 4.248495 1045 +dave 1 14 4.317488 4.317488 1098 +save 1 14 4.317488 4.317488 1099 +reprint 1 14 4.317488 4.317488 1097 +emac 1 13 4.382027 4.382027 1143 +prolog 1 13 4.382027 4.382027 1155 +hank 1 12 4.465908 4.465908 1253 +submiss 1 11 4.553877 4.553877 1298 +grove 1 8 4.875197 4.875197 1675 +clip 1 7 5.010635 5.010635 1868 +transcript 1 6 5.164786 5.164786 2067 +scienceand 1 5 5.347108 5.347108 2348 +turnin 1 4 5.568345 5.568345 2654 +employe 1 4 5.568345 5.568345 2717 +overviewcours 1 2 6.263398 6.263398 4399 +informationth 1 2 6.263398 6.263398 4393 +listinfo 1 2 6.263398 6.263398 4394 +pagehom 1 2 6.263398 6.263398 4395 +ofcs 1 2 6.263398 6.263398 4392 +engineeringport 1 2 6.263398 6.263398 4396 +academicnonprofit 1 2 6.263398 6.263398 4397 +dulycredit 1 2 6.263398 6.263398 4398 +languagesspr 1 1 6.957497 6.957497 7771 +hanks 1 1 6.957497 6.957497 7772 +documentsgeneralintroduct 1 1 6.957497 6.957497 7773 +relatedrun 1 1 6.957497 6.957497 7774 +smalltalk 1 1 6.957497 6.957497 7768 +htmlpostscript 1 1 6.957497 6.957497 7770 +transcipt 1 1 6.957497 6.957497 7769 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..57f13a9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +academ 1 82 2.484907 2.484907 178 +sourc 1 77 2.564949 2.564949 201 +degre 1 69 2.708050 2.708050 259 +previou 1 62 2.772589 2.772589 290 +colleg 1 61 2.833213 2.833213 300 +index 1 56 2.890372 2.890372 309 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +offer 1 43 3.178054 3.178054 414 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +art 1 29 3.583519 3.583519 593 +quot 1 29 3.583519 3.583519 582 +accur 1 25 3.737670 3.737670 680 +less 1 18 4.060443 4.060443 892 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +earlier 1 13 4.382027 4.382027 1140 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +instanc 1 11 4.553877 4.553877 1322 +nonprofit 1 11 4.553877 4.553877 1339 +intact 1 2 6.263398 6.263398 4385 +pagecurr 1 1 6.957497 6.957497 7775 +quarterth 1 1 6.957497 6.957497 7776 +quarterscours 1 1 6.957497 6.957497 7777 +younotic 1 1 6.957497 6.957497 7778 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..2d2408ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +send 1 114 2.197225 2.197225 109 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +contain 1 81 2.484907 2.484907 174 +exam 1 86 2.484907 2.484907 169 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +sourc 1 77 2.564949 2.564949 201 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +logic 1 71 2.639057 2.639057 230 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +sieg 1 69 2.708050 2.708050 260 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +publish 1 57 2.890372 2.890372 326 +think 1 57 2.890372 2.890372 314 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +even 1 45 3.135494 3.135494 393 +announc 1 40 3.258097 3.258097 441 +author 1 39 3.258097 3.258097 450 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +go 1 33 3.433987 3.433987 529 +ad 1 32 3.465736 3.465736 544 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +quot 1 29 3.583519 3.583519 582 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +administr 1 27 3.637586 3.637586 628 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +tell 1 21 3.912023 3.912023 777 +anderson 1 19 4.007333 4.007333 860 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +whole 1 17 4.110874 4.110874 940 +weekli 1 17 4.110874 4.110874 919 +weslei 1 16 4.174387 4.174387 983 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +anonym 1 14 4.317488 4.317488 1100 +reprint 1 14 4.317488 4.317488 1097 +everyon 1 13 4.382027 4.382027 1148 +quizz 1 13 4.382027 4.382027 1151 +workload 1 12 4.465908 4.465908 1210 +overal 1 12 4.465908 4.465908 1254 +addison 1 12 4.465908 4.465908 1230 +duli 1 12 4.465908 4.465908 1248 +benjamin 1 11 4.553877 4.553877 1296 +evolut 1 11 4.553877 4.553877 1314 +nonprofit 1 11 4.553877 4.553877 1339 +cheat 1 10 4.653960 4.653960 1395 +desir 1 9 4.753590 4.753590 1542 +cum 1 8 4.875197 4.875197 1619 +bunch 1 7 5.010635 5.010635 1861 +gaetano 1 6 5.164786 5.164786 2068 +borriello 1 5 5.347108 5.347108 2349 +corei 1 4 5.568345 5.568345 2718 +contemporari 1 4 5.568345 5.568345 2719 +corin 1 3 5.857933 5.857933 3311 +aweekli 1 3 5.857933 5.857933 3312 +katz 1 3 5.857933 5.857933 3276 +andersonwelcom 1 2 6.263398 6.263398 4400 +tocs 1 2 6.263398 6.263398 4401 +messagess 1 2 6.263398 6.263398 4402 +synario 1 2 6.263398 6.263398 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..73ab290d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +grade 1 90 2.397895 2.397895 142 +requir 1 81 2.484907 2.484907 167 +sieg 1 69 2.708050 2.708050 260 +practic 1 70 2.708050 2.708050 246 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +publish 1 57 2.890372 2.890372 326 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +keep 1 44 3.135494 3.135494 409 +offer 1 43 3.178054 3.178054 414 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +option 1 30 3.555348 3.555348 575 +steve 1 29 3.583519 3.583519 594 +pagecs 1 26 3.688879 3.688879 658 +dai 1 22 3.850148 3.850148 753 +tent 1 22 3.850148 3.850148 739 +smith 1 20 3.951244 3.951244 820 +facil 1 20 3.951244 3.951244 814 +account 1 18 4.060443 4.060443 882 +lisp 1 18 4.060443 4.060443 897 +encourag 1 18 4.060443 4.060443 880 +deduct 1 12 4.465908 4.465908 1236 +tanimoto 1 10 4.653960 4.653960 1429 +penalti 1 10 4.653960 4.653960 1405 +prentic 1 7 5.010635 5.010635 1838 +aboutth 1 4 5.568345 5.568345 2720 +punctual 1 3 5.857933 5.857933 3313 +anhai 1 2 6.263398 6.263398 4404 +doan 1 2 6.263398 6.263398 4405 +mscc 1 2 6.263398 6.263398 4406 +breakdown 1 2 6.263398 6.263398 4407 +algorithmsautumn 1 1 6.957497 6.957497 7779 +shaffer 1 1 6.957497 6.957497 7780 +examinform 1 1 6.957497 6.957497 7781 +exambas 1 1 6.957497 6.957497 7782 +compilerassignmentssolut 1 1 6.957497 6.957497 7783 +assignmentsteach 1 1 6.957497 6.957497 7784 +informationscheduleweb 1 1 6.957497 6.957497 7785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..c902bc13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +materi 1 75 2.639057 2.639057 221 +syllabu 1 67 2.708050 2.708050 247 +degre 1 69 2.708050 2.708050 259 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +offer 1 43 3.178054 3.178054 414 +art 1 29 3.583519 3.583519 593 +pagecs 1 26 3.688879 3.688879 658 +demo 1 18 4.060443 4.060443 888 +jonathan 1 13 4.382027 4.382027 1174 +mosaic 1 10 4.653960 4.653960 1426 +alistair 1 3 5.857933 5.857933 3315 +holden 1 3 5.857933 5.857933 3314 +nowitz 1 2 6.263398 6.263398 4390 +raini 1 2 6.263398 6.263398 4384 +algorithmsspr 1 1 6.957497 6.957497 7786 +funnowitz 1 1 6.957497 6.957497 7787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..a917ff93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +homework 1 79 2.564949 2.564949 193 +state 1 76 2.564949 2.564949 207 +come 1 78 2.564949 2.564949 202 +meet 1 72 2.639057 2.639057 229 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +set 1 50 3.044522 3.044522 361 +answer 1 45 3.135494 3.135494 391 +long 1 43 3.178054 3.178054 413 +tree 1 36 3.367296 3.367296 492 +next 1 34 3.401197 3.401197 517 +eduoffic 1 33 3.433987 3.433987 531 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +valu 1 25 3.737670 3.737670 665 +begin 1 23 3.806662 3.806662 716 +sheet 1 16 4.174387 4.174387 973 +indic 1 15 4.248495 4.248495 1013 +charact 1 15 4.248495 4.248495 1028 +denis 1 12 4.465908 4.465908 1255 +string 1 11 4.553877 4.553877 1340 +linda 1 10 4.653960 4.653960 1394 +length 1 10 4.653960 4.653960 1400 +shapiro 1 8 4.875197 4.875197 1686 +insert 1 8 4.875197 4.875197 1687 +integ 1 8 4.875197 4.875197 1688 +assignmentsprogram 1 6 5.164786 5.164786 2019 +assignmentshomework 1 4 5.568345 5.568345 2721 +algorithmswint 1 1 6.957497 6.957497 7790 +shapirooffic 1 1 6.957497 6.957497 7791 +siegtelephon 1 1 6.957497 6.957497 7789 +pinneloffic 1 1 6.957497 6.957497 7792 +denisep 1 1 6.957497 6.957497 7793 +syllabustransparencieshomework 1 1 6.957497 6.957497 7794 +enclos 1 1 6.957497 6.957497 7788 +inquot 1 1 6.957497 6.957497 7795 +associatedvalu 1 1 6.957497 6.957497 7796 +linebegin 1 1 6.957497 6.957497 7797 +isfollow 1 1 6.957497 6.957497 7798 +graphimag 1 1 6.957497 6.957497 7799 +graphreview 1 1 6.957497 6.957497 7800 +listsfin 1 1 6.957497 6.957497 7801 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..2301b4e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +assign 1 135 1.945910 1.945910 66 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +world 1 115 2.197225 2.197225 126 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +announc 1 40 3.258097 3.258097 441 +ad 1 32 3.465736 3.465736 544 +static 1 27 3.637586 3.637586 619 +hypermedia 1 12 4.465908 4.465908 1247 +admin 1 9 4.753590 4.753590 1476 +documentfor 1 7 5.010635 5.010635 1865 +willb 1 5 5.347108 5.347108 2277 +urgent 1 3 5.857933 5.857933 3316 +classhomethi 1 1 6.957497 6.957497 7802 +inmind 1 1 6.957497 6.957497 7803 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..4550f4d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,235 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +document 1 121 2.079442 2.079442 89 +teach 1 108 2.197225 2.197225 112 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +technic 1 100 2.302585 2.302585 140 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +search 1 95 2.397895 2.397895 155 +present 1 91 2.397895 2.397895 145 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +master 1 76 2.564949 2.564949 216 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +addit 1 74 2.639057 2.639057 228 +meet 1 72 2.639057 2.639057 229 +write 1 72 2.639057 2.639057 222 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +import 1 65 2.772589 2.772589 282 +organ 1 65 2.772589 2.772589 265 +plan 1 65 2.772589 2.772589 272 +evalu 1 64 2.772589 2.772589 266 +creat 1 63 2.772589 2.772589 277 +guid 1 63 2.772589 2.772589 267 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +allow 1 53 2.944439 2.944439 333 +profession 1 51 2.995732 2.995732 345 +hardwar 1 51 2.995732 2.995732 350 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +principl 1 48 3.044522 3.044522 357 +quarter 1 47 3.091042 3.091042 389 +possibl 1 47 3.091042 3.091042 378 +done 1 47 3.091042 3.091042 381 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +discuss 1 45 3.135494 3.135494 399 +natur 1 44 3.135494 3.135494 406 +howev 1 41 3.218876 3.218876 422 +review 1 42 3.218876 3.218876 425 +industri 1 38 3.295837 3.295837 464 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +ofth 1 36 3.367296 3.367296 491 +survei 1 35 3.401197 3.401197 513 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +concept 1 32 3.465736 3.465736 537 +human 1 32 3.465736 3.465736 546 +often 1 31 3.496508 3.496508 551 +posit 1 31 3.496508 3.496508 552 +produc 1 30 3.555348 3.555348 572 +hard 1 30 3.555348 3.555348 563 +exist 1 30 3.555348 3.555348 569 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +platform 1 29 3.583519 3.583519 591 +usual 1 28 3.610918 3.610918 608 +held 1 28 3.610918 3.610918 600 +releas 1 28 3.610918 3.610918 616 +team 1 27 3.637586 3.637586 625 +administr 1 27 3.637586 3.637586 628 +determin 1 27 3.637586 3.637586 630 +pagecs 1 26 3.688879 3.688879 658 +experiment 1 26 3.688879 3.688879 645 +session 1 26 3.688879 3.688879 643 +consist 1 26 3.688879 3.688879 651 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +primari 1 25 3.737670 3.737670 669 +reliabl 1 25 3.737670 3.737670 674 +handl 1 24 3.761200 3.761200 685 +head 1 23 3.806662 3.806662 732 +try 1 22 3.850148 3.850148 764 +qualiti 1 20 3.951244 3.951244 832 +verif 1 20 3.951244 3.951244 826 +safeti 1 20 3.951244 3.951244 817 +sure 1 20 3.951244 3.951244 813 +feedback 1 19 4.007333 4.007333 854 +statu 1 18 4.060443 4.060443 885 +regular 1 17 4.110874 4.110874 929 +outlin 1 17 4.110874 4.110874 914 +estim 1 17 4.110874 4.110874 930 +normal 1 16 4.174387 4.174387 995 +enough 1 15 4.248495 4.248495 1040 +configur 1 15 4.248495 4.248495 1012 +track 1 15 4.248495 4.248495 1029 +transit 1 15 4.248495 4.248495 1046 +role 1 14 4.317488 4.317488 1101 +embed 1 14 4.317488 4.317488 1102 +conduct 1 14 4.317488 4.317488 1065 +essenti 1 13 4.382027 4.382027 1137 +sai 1 13 4.382027 4.382027 1175 +everyon 1 13 4.382027 4.382027 1148 +necessari 1 13 4.382027 4.382027 1147 +nanci 1 12 4.465908 4.465908 1256 +skill 1 12 4.465908 4.465908 1205 +overal 1 12 4.465908 4.465908 1254 +characterist 1 12 4.465908 4.465908 1257 +readabl 1 12 4.465908 4.465908 1258 +valid 1 11 4.553877 4.553877 1299 +evolut 1 11 4.553877 4.553877 1314 +princip 1 10 4.653960 4.653960 1397 +leveson 1 9 4.753590 4.753590 1540 +latter 1 9 4.753590 4.753590 1522 +mainten 1 9 4.753590 4.753590 1543 +factor 1 9 4.753590 4.753590 1544 +respect 1 9 4.753590 4.753590 1545 +realist 1 8 4.875197 4.875197 1665 +risk 1 8 4.875197 4.875197 1689 +reus 1 8 4.875197 4.875197 1661 +architect 1 8 4.875197 4.875197 1624 +successfulli 1 7 5.010635 5.010635 1869 +attach 1 7 5.010635 5.010635 1785 +metric 1 7 5.010635 5.010635 1831 +ethic 1 7 5.010635 5.010635 1786 +lack 1 6 5.164786 5.164786 1994 +phase 1 6 5.164786 5.164786 1977 +theproject 1 6 5.164786 5.164786 1981 +creation 1 6 5.164786 5.164786 2069 +ensur 1 6 5.164786 5.164786 2012 +deliv 1 6 5.164786 5.164786 2070 +augment 1 5 5.347108 5.347108 2350 +isthat 1 4 5.568345 5.568345 2723 +assess 1 4 5.568345 5.568345 2724 +employe 1 4 5.568345 5.568345 2717 +assur 1 4 5.568345 5.568345 2722 +boe 1 3 5.857933 5.857933 3318 +oral 1 3 5.857933 5.857933 3189 +leadership 1 3 5.857933 5.857933 3320 +listof 1 3 5.857933 5.857933 3322 +duti 1 3 5.857933 5.857933 3317 +proper 1 3 5.857933 5.857933 3323 +specialist 1 3 5.857933 5.857933 3319 +interview 1 3 5.857933 5.857933 3324 +expertis 1 3 5.857933 5.857933 3321 +educours 1 2 6.263398 6.263398 4409 +terminolog 1 2 6.263398 6.263398 4410 +portfolio 1 2 6.263398 6.263398 4408 +thegroup 1 2 6.263398 6.263398 4054 +beavoid 1 2 6.263398 6.263398 4411 +thenorm 1 2 6.263398 6.263398 4412 +clariti 1 2 6.263398 6.263398 4413 +descriptioninstruct 1 1 6.957497 6.957497 7806 +softwaresystem 1 1 6.957497 6.957497 7807 +tocreat 1 1 6.957497 6.957497 7808 +effectiveor 1 1 6.957497 6.957497 7809 +topicsar 1 1 6.957497 6.957497 7810 +employersand 1 1 6.957497 6.957497 7811 +realbo 1 1 6.957497 6.957497 7812 +largegroup 1 1 6.957497 6.957497 7813 +cannotlearn 1 1 6.957497 6.957497 7814 +devotedto 1 1 6.957497 6.957497 7815 +isto 1 1 6.957497 6.957497 7816 +effectivelytogeth 1 1 6.957497 6.957497 7817 +disast 1 1 6.957497 6.957497 7818 +worktogeth 1 1 6.957497 6.957497 7819 +requirementsanalysi 1 1 6.957497 6.957497 7820 +areal 1 1 6.957497 6.957497 7821 +engineeringinstitut 1 1 6.957497 6.957497 7822 +providedat 1 1 6.957497 6.957497 7823 +playthat 1 1 6.957497 6.957497 7824 +projectso 1 1 6.957497 6.957497 7825 +softwaredevelop 1 1 6.957497 6.957497 7826 +responsiblefor 1 1 6.957497 6.957497 7827 +duri 1 1 6.957497 6.957497 7828 +deliver 1 1 6.957497 6.957497 7804 +mockup 1 1 6.957497 6.957497 7805 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..adad48ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +question 1 91 2.397895 2.397895 141 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +mondai 1 77 2.564949 2.564949 206 +method 1 80 2.564949 2.564949 213 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +creat 1 63 2.772589 2.772589 277 +complex 1 64 2.772589 2.772589 269 +new 1 64 2.772589 2.772589 262 +locat 1 59 2.833213 2.833213 303 +sampl 1 53 2.944439 2.944439 339 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +fridai 1 44 3.135494 3.135494 390 +winter 1 36 3.367296 3.367296 500 +concept 1 32 3.465736 3.465736 537 +produc 1 30 3.555348 3.555348 572 +pagecs 1 26 3.688879 3.688879 658 +comp 1 26 3.688879 3.688879 650 +request 1 26 3.688879 3.688879 635 +fundament 1 25 3.737670 3.737670 661 +newsgroup 1 21 3.912023 3.912023 783 +feedback 1 19 4.007333 4.007333 854 +adam 1 17 4.110874 4.110874 934 +coupl 1 17 4.110874 4.110874 939 +essenti 1 13 4.382027 4.382027 1137 +loew 1 12 4.465908 4.465908 1252 +nanci 1 12 4.465908 4.465908 1256 +leveson 1 9 4.753590 4.753590 1540 +risk 1 8 4.875197 4.875197 1689 +successfulli 1 7 5.010635 5.010635 1869 +prentic 1 7 5.010635 5.010635 1838 +carlson 1 5 5.347108 5.347108 2351 +consum 1 5 5.347108 5.347108 2334 +interview 1 3 5.857933 5.857933 3324 +axiomat 1 3 5.857933 5.857933 3288 +mailinglist 1 3 5.857933 5.857933 3325 +militari 1 3 5.857933 5.857933 3326 +defens 1 3 5.857933 5.857933 3327 +educours 1 2 6.263398 6.263398 4409 +petri 1 2 6.263398 6.263398 4414 +engineeringmeet 1 1 6.957497 6.957497 7829 +eduta 1 1 6.957497 6.957497 7830 +descriptionthi 1 1 6.957497 6.957497 7831 +textbookghezzi 1 1 6.957497 6.957497 7832 +jazayeri 1 1 6.957497 6.957497 7833 +mandrioli 1 1 6.957497 6.957497 7834 +cohes 1 1 6.957497 6.957497 7835 +departmentsuggest 1 1 6.957497 6.957497 7836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..d18e5b42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,347 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +provid 1 121 2.079442 2.079442 94 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +text 1 98 2.302585 2.302585 133 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +mani 1 92 2.397895 2.397895 150 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +build 1 85 2.484907 2.484907 184 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +internet 1 83 2.484907 2.484907 186 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +addit 1 74 2.639057 2.639057 228 +free 1 73 2.639057 2.639057 224 +thursdai 1 70 2.708050 2.708050 241 +sieg 1 69 2.708050 2.708050 260 +order 1 69 2.708050 2.708050 249 +java 1 70 2.708050 2.708050 248 +integr 1 67 2.708050 2.708050 245 +window 1 68 2.708050 2.708050 242 +test 1 66 2.708050 2.708050 252 +receiv 1 66 2.708050 2.708050 244 +wednesdai 1 64 2.772589 2.772589 261 +artifici 1 63 2.772589 2.772589 280 +copi 1 63 2.772589 2.772589 284 +januari 1 62 2.772589 2.772589 264 +guid 1 63 2.772589 2.772589 267 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +descript 1 64 2.772589 2.772589 271 +room 1 59 2.833213 2.833213 301 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +march 1 61 2.833213 2.833213 295 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +unix 1 58 2.890372 2.890372 308 +special 1 56 2.890372 2.890372 320 +point 1 58 2.890372 2.890372 319 +detail 1 57 2.890372 2.890372 321 +cover 1 55 2.944439 2.944439 329 +extens 1 53 2.944439 2.944439 340 +februari 1 54 2.944439 2.944439 328 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +visual 1 48 3.044522 3.044522 372 +format 1 48 3.044522 3.044522 356 +understand 1 47 3.091042 3.091042 384 +get 1 46 3.091042 3.091042 380 +quarter 1 47 3.091042 3.091042 389 +move 1 47 3.091042 3.091042 382 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +fridai 1 44 3.135494 3.135494 390 +mark 1 44 3.135494 3.135494 403 +offer 1 43 3.178054 3.178054 414 +show 1 43 3.178054 3.178054 417 +examin 1 42 3.218876 3.218876 424 +past 1 42 3.218876 3.218876 428 +combin 1 42 3.218876 3.218876 421 +edit 1 42 3.218876 3.218876 418 +compani 1 41 3.218876 3.218876 423 +editor 1 41 3.218876 3.218876 433 +howev 1 41 3.218876 3.218876 422 +might 1 41 3.218876 3.218876 426 +review 1 42 3.218876 3.218876 425 +tutori 1 39 3.258097 3.258097 437 +announc 1 40 3.258097 3.258097 441 +must 1 40 3.258097 3.258097 442 +multipl 1 39 3.258097 3.258097 453 +form 1 39 3.258097 3.258097 443 +credit 1 38 3.295837 3.295837 460 +close 1 38 3.295837 3.295837 465 +cost 1 37 3.332205 3.332205 480 +winter 1 36 3.367296 3.367296 500 +download 1 36 3.367296 3.367296 489 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +approxim 1 35 3.401197 3.401197 509 +go 1 33 3.433987 3.433987 529 +chapter 1 32 3.465736 3.465736 536 +given 1 32 3.465736 3.465736 538 +common 1 30 3.555348 3.555348 574 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +hard 1 30 3.555348 3.555348 563 +option 1 30 3.555348 3.555348 575 +steve 1 29 3.583519 3.583519 594 +particip 1 29 3.583519 3.583519 589 +limit 1 29 3.583519 3.583519 585 +turn 1 29 3.583519 3.583519 586 +except 1 28 3.610918 3.610918 607 +full 1 28 3.610918 3.610918 615 +packag 1 28 3.610918 3.610918 614 +held 1 28 3.610918 3.610918 600 +symbol 1 27 3.637586 3.637586 620 +pagecs 1 26 3.688879 3.688879 658 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +altern 1 26 3.688879 3.688879 641 +session 1 26 3.688879 3.688879 643 +although 1 25 3.737670 3.737670 667 +todai 1 25 3.737670 3.737670 672 +alwai 1 24 3.761200 3.761200 691 +interpret 1 24 3.761200 3.761200 686 +wish 1 24 3.761200 3.761200 692 +demonstr 1 24 3.761200 3.761200 694 +displai 1 23 3.806662 3.806662 712 +tent 1 22 3.850148 3.850148 739 +try 1 22 3.850148 3.850148 764 +varieti 1 22 3.850148 3.850148 740 +instead 1 22 3.850148 3.850148 756 +sent 1 22 3.850148 3.850148 763 +programminglanguag 1 21 3.912023 3.912023 782 +path 1 21 3.912023 3.912023 778 +viewer 1 21 3.912023 3.912023 787 +expert 1 20 3.951244 3.951244 833 +entir 1 20 3.951244 3.951244 811 +facil 1 20 3.951244 3.951244 814 +particularli 1 19 4.007333 4.007333 867 +lisp 1 18 4.060443 4.060443 897 +element 1 18 4.060443 4.060443 895 +seem 1 18 4.060443 4.060443 899 +whole 1 17 4.110874 4.110874 940 +macintosh 1 17 4.110874 4.110874 920 +stat 1 17 4.110874 4.110874 924 +regular 1 17 4.110874 4.110874 929 +modif 1 17 4.110874 4.110874 913 +transfer 1 16 4.174387 4.174387 967 +normal 1 16 4.174387 4.174387 995 +choic 1 16 4.174387 4.174387 979 +purchas 1 15 4.248495 4.248495 1030 +micro 1 15 4.248495 4.248495 1031 +easili 1 14 4.317488 4.317488 1077 +prolog 1 13 4.382027 4.382027 1155 +difficulti 1 13 4.382027 4.382027 1132 +emac 1 13 4.382027 4.382027 1143 +introduc 1 13 4.382027 4.382027 1139 +convert 1 13 4.382027 4.382027 1122 +transpar 1 11 4.553877 4.553877 1325 +host 1 11 4.553877 4.553877 1306 +abil 1 11 4.553877 4.553877 1341 +string 1 11 4.553877 4.553877 1340 +sens 1 11 4.553877 4.553877 1305 +tanimoto 1 10 4.653960 4.653960 1429 +catalog 1 10 4.653960 4.653960 1431 +mainli 1 10 4.653960 4.653960 1432 +tradit 1 10 4.653960 4.653960 1404 +bring 1 10 4.653960 4.653960 1430 +entitl 1 9 4.753590 4.753590 1490 +deadlin 1 9 4.753590 4.753590 1502 +attent 1 8 4.875197 4.875197 1651 +ruth 1 7 5.010635 5.010635 1870 +throughout 1 7 5.010635 5.010635 1871 +bookstor 1 7 5.010635 5.010635 1837 +fromth 1 7 5.010635 5.010635 1802 +fortun 1 7 5.010635 5.010635 1872 +remind 1 7 5.010635 5.010635 1799 +thompson 1 6 5.164786 5.164786 2049 +grammar 1 6 5.164786 5.164786 2058 +trail 1 6 5.164786 5.164786 2071 +fred 1 6 5.164786 5.164786 2072 +classroom 1 6 5.164786 5.164786 2006 +plu 1 6 5.164786 5.164786 2004 +hardcopi 1 5 5.347108 5.347108 2246 +shell 1 5 5.347108 5.347108 2353 +respond 1 5 5.347108 5.347108 2354 +mac 1 5 5.347108 5.347108 2292 +supplement 1 5 5.347108 5.347108 2355 +allegro 1 5 5.347108 5.347108 2314 +attract 1 5 5.347108 5.347108 2356 +writeup 1 5 5.347108 5.347108 2352 +freeman 1 4 5.568345 5.568345 2725 +basement 1 4 5.568345 5.568345 2663 +websit 1 4 5.568345 5.568345 2726 +andit 1 3 5.857933 5.857933 3328 +contentspag 1 3 5.857933 5.857933 3103 +orpostscript 1 3 5.857933 5.857933 3329 +mathematica 1 3 5.857933 5.857933 3303 +beginn 1 3 5.857933 5.857933 3330 +insieg 1 3 5.857933 5.857933 3331 +ofread 1 2 6.263398 6.263398 4417 +glossari 1 2 6.263398 6.263398 4418 +referenceon 1 2 6.263398 6.263398 4419 +usingcommon 1 2 6.263398 6.263398 4420 +mscc 1 2 6.263398 6.263398 4406 +themathemat 1 2 6.263398 6.263398 4421 +yacc 1 2 6.263398 6.263398 4422 +franz 1 2 6.263398 6.263398 4423 +thelaboratori 1 2 6.263398 6.263398 4424 +token 1 2 6.263398 6.263398 4415 +onthursdai 1 2 6.263398 6.263398 4425 +pencil 1 2 6.263398 6.263398 4426 +mileston 1 2 6.263398 6.263398 4416 +andersonmeet 1 1 6.957497 6.957497 7839 +andpars 1 1 6.957497 6.957497 7840 +incommon 1 1 6.957497 6.957497 7841 +purchasedsepar 1 1 6.957497 6.957497 7842 +fordigitool 1 1 6.957497 6.957497 7843 +thatmaintain 1 1 6.957497 6.957497 7844 +currentinform 1 1 6.957497 6.957497 7845 +introductionto 1 1 6.957497 6.957497 7846 +thatdoesn 1 1 6.957497 6.957497 7847 +promptli 1 1 6.957497 6.957497 7848 +theirimplement 1 1 6.957497 6.957497 7849 +buildingprogram 1 1 6.957497 6.957497 7850 +tointepret 1 1 6.957497 6.957497 7851 +alsolook 1 1 6.957497 6.957497 7852 +programmingfacil 1 1 6.957497 6.957497 7853 +thebas 1 1 6.957497 6.957497 7854 +allegrocommon 1 1 6.957497 6.957497 7855 +powerfulenviron 1 1 6.957497 6.957497 7856 +graphicsand 1 1 6.957497 6.957497 7857 +machinesof 1 1 6.957497 6.957497 7858 +inthompson 1 1 6.957497 6.957497 7837 +theirown 1 1 6.957497 6.957497 7859 +xlisp 1 1 6.957497 6.957497 7860 +theseresourc 1 1 6.957497 6.957497 7861 +thatxlisp 1 1 6.957497 6.957497 7862 +bare 1 1 6.957497 6.957497 7863 +bone 1 1 6.957497 6.957497 7864 +nothav 1 1 6.957497 6.957497 7865 +disadvantag 1 1 6.957497 6.957497 7866 +labunless 1 1 6.957497 6.957497 7867 +fromdigitool 1 1 6.957497 6.957497 7868 +dealallow 1 1 6.957497 6.957497 7869 +lispfor 1 1 6.957497 6.957497 7870 +thistim 1 1 6.957497 6.957497 7871 +regardingread 1 1 6.957497 6.957497 7872 +printout 1 1 6.957497 6.957497 7873 +becov 1 1 6.957497 6.957497 7874 +announcedearli 1 1 6.957497 6.957497 7875 +koch 1 1 6.957497 6.957497 7838 +snowflak 1 1 6.957497 6.957497 7876 +projectgener 1 1 6.957497 6.957497 7877 +aboutdemonstr 1 1 6.957497 6.957497 7878 +onmondai 1 1 6.957497 6.957497 7879 +exercisestokenizerassign 1 1 6.957497 6.957497 7880 +andpart 1 1 6.957497 6.957497 7881 +parsertokenizerpart 1 1 6.957497 6.957497 7882 +snowflakeassign 1 1 6.957497 6.957497 7883 +ondemonstr 1 1 6.957497 6.957497 7884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..d8bd6d10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +hour 1 165 1.791759 1.791759 46 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +final 1 116 2.197225 2.197225 108 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +homework 1 79 2.564949 2.564949 193 +june 1 79 2.564949 2.564949 214 +interfac 1 79 2.564949 2.564949 209 +intellig 1 72 2.639057 2.639057 225 +thursdai 1 70 2.708050 2.708050 241 +artifici 1 63 2.772589 2.772589 280 +type 1 61 2.833213 2.833213 296 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +basic 1 50 3.044522 3.044522 360 +third 1 43 3.178054 3.178054 412 +download 1 36 3.367296 3.367296 489 +common 1 30 3.555348 3.555348 574 +lisp 1 18 4.060443 4.060443 897 +record 1 18 4.060443 4.060443 890 +outlin 1 17 4.110874 4.110874 914 +macintosh 1 17 4.110874 4.110874 920 +emac 1 13 4.382027 4.382027 1143 +rich 1 10 4.653960 4.653960 1396 +noon 1 7 5.010635 5.010635 1804 +thompson 1 6 5.164786 5.164786 2049 +gentl 1 5 5.347108 5.347108 2264 +allegro 1 5 5.347108 5.347108 2314 +csoffic 1 4 5.568345 5.568345 2727 +knight 1 4 5.568345 5.568345 2728 +turnin 1 4 5.568345 5.568345 2654 +alistair 1 3 5.857933 5.857933 3315 +holden 1 3 5.857933 5.857933 3314 +joshua 1 3 5.857933 5.857933 3333 +redston 1 3 5.857933 5.857933 3332 +noonta 1 2 6.263398 6.263398 4427 +secondedit 1 2 6.263398 6.263398 4096 +touretzki 1 2 6.263398 6.263398 4428 +intelligencecs 1 1 6.957497 6.957497 7886 +msoffic 1 1 6.957497 6.957497 7887 +symboliccomput 1 1 6.957497 6.957497 7888 +emacsinterfac 1 1 6.957497 6.957497 7889 +refcard 1 1 6.957497 6.957497 7885 +standalonelisp 1 1 6.957497 6.957497 7890 +gradesredston 1 1 6.957497 6.957497 7891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..c752ee18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +algorithm 1 162 1.791759 1.791759 57 +read 1 154 1.791759 1.791759 47 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +previou 1 62 2.772589 2.772589 290 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +three 1 54 2.944439 2.944439 330 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +quarter 1 47 3.091042 3.091042 389 +math 1 44 3.135494 3.135494 402 +winter 1 36 3.367296 3.367296 500 +print 1 34 3.401197 3.401197 503 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +martin 1 21 3.912023 3.912023 794 +latest 1 21 3.912023 3.912023 785 +viewer 1 21 3.912023 3.912023 787 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +render 1 17 4.110874 4.110874 947 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +larri 1 13 4.382027 4.382027 1142 +karlin 1 13 4.382027 4.382027 1176 +web 1 12 4.465908 4.465908 1249 +errata 1 10 4.653960 4.653960 1403 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 1 5 5.347108 5.347108 2345 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +tompaclass 1 3 5.857933 5.857933 3310 +tompa 1 3 5.857933 5.857933 3305 +aberman 1 2 6.263398 6.263398 4429 +midtem 1 1 6.957497 6.957497 7892 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..2143b493 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +three 1 54 2.944439 2.944439 330 +format 1 48 3.044522 3.044522 356 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +math 1 44 3.135494 3.135494 402 +print 1 34 3.401197 3.401197 503 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +tent 1 22 3.850148 3.850148 739 +sent 1 22 3.850148 3.850148 763 +latest 1 21 3.912023 3.912023 785 +viewer 1 21 3.912023 3.912023 787 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +render 1 17 4.110874 4.110874 947 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +larri 1 13 4.382027 4.382027 1142 +everyth 1 13 4.382027 4.382027 1169 +web 1 12 4.465908 4.465908 1249 +errata 1 10 4.653960 4.653960 1403 +admin 1 9 4.753590 4.753590 1476 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 1 5 5.347108 5.347108 2345 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +sendmail 1 3 5.857933 5.857933 3099 +jayram 1 1 6.957497 6.957497 7893 +thathachar 1 1 6.957497 6.957497 7894 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..7b9e8c77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +imag 1 91 2.397895 2.397895 161 +homework 1 79 2.564949 2.564949 193 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +telephon 1 50 3.044522 3.044522 373 +quarter 1 47 3.091042 3.091042 389 +announc 1 40 3.258097 3.258097 441 +probabl 1 40 3.258097 3.258097 455 +word 1 34 3.401197 3.401197 508 +queri 1 33 3.433987 3.433987 524 +request 1 26 3.688879 3.688879 635 +left 1 19 4.007333 4.007333 851 +save 1 14 4.317488 4.317488 1099 +dbm 1 13 4.382027 4.382027 1136 +linda 1 10 4.653960 4.653960 1394 +shapiro 1 8 4.875197 4.875197 1686 +potenti 1 8 4.875197 4.875197 1690 +shift 1 5 5.347108 5.347108 2357 +systemsfal 1 4 5.568345 5.568345 2683 +patrick 1 3 5.857933 5.857933 3334 +qbic 1 3 5.857933 5.857933 3294 +systemscs 1 1 6.957497 6.957497 7895 +crowlei 1 1 6.957497 6.957497 7896 +pcrowlei 1 1 6.957497 6.957497 7897 +unisql 1 1 6.957497 6.957497 7898 +webcs 1 1 6.957497 6.957497 7899 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..d7b001f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +click 1 142 1.945910 1.945910 78 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +solut 1 82 2.484907 2.484907 162 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +materi 1 75 2.639057 2.639057 221 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +handout 1 64 2.772589 2.772589 263 +space 1 57 2.890372 2.890372 310 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +archiv 1 49 3.044522 3.044522 364 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +brian 1 38 3.295837 3.295837 466 +slide 1 38 3.295837 3.295837 467 +word 1 34 3.401197 3.401197 508 +autumn 1 31 3.496508 3.496508 558 +scale 1 28 3.610918 3.610918 613 +actual 1 28 3.610918 3.610918 604 +administr 1 27 3.637586 3.637586 628 +wish 1 24 3.761200 3.761200 692 +sent 1 22 3.850148 3.850148 763 +feedback 1 19 4.007333 4.007333 854 +bershad 1 18 4.060443 4.060443 902 +intro 1 17 4.110874 4.110874 915 +outlin 1 17 4.110874 4.110874 914 +reflect 1 15 4.248495 4.248495 1034 +webmast 1 15 4.248495 4.248495 1045 +anonym 1 14 4.317488 4.317488 1100 +regularli 1 11 4.553877 4.553877 1338 +hint 1 10 4.653960 4.653960 1419 +sung 1 6 5.164786 5.164786 2075 +pace 1 6 5.164786 5.164786 2011 +carefulli 1 6 5.164786 5.164786 2045 +lost 1 5 5.347108 5.347108 2358 +choi 1 4 5.568345 5.568345 2732 +vital 1 4 5.568345 5.568345 2733 +surviv 1 4 5.568345 5.568345 2734 +aggress 1 3 5.857933 5.857933 3240 +andwil 1 3 5.857933 5.857933 3335 +wisdom 1 2 6.263398 6.263398 4430 +schedulewhat 1 2 6.263398 6.263398 4139 +adminth 1 1 6.957497 6.957497 7900 +andoth 1 1 6.957497 6.957497 7901 +projectsdescript 1 1 6.957497 6.957497 7902 +solutionsto 1 1 6.957497 6.957497 7903 +notesnot 1 1 6.957497 6.957497 7904 +watchthi 1 1 6.957497 6.957497 7905 +andgrad 1 1 6.957497 6.957497 7906 +onproject 1 1 6.957497 6.957497 7907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..cdfaff7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +grade 1 90 2.397895 2.397895 142 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exampl 1 77 2.564949 2.564949 195 +write 1 72 2.639057 2.639057 222 +degre 1 69 2.708050 2.708050 259 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +instruct 1 53 2.944439 2.944439 332 +frequent 1 49 3.044522 3.044522 367 +visitor 1 49 3.044522 3.044522 371 +quarter 1 47 3.091042 3.091042 389 +netscap 1 44 3.135494 3.135494 395 +mean 1 37 3.332205 3.332205 477 +winter 1 36 3.367296 3.367296 500 +ad 1 32 3.465736 3.465736 544 +autumn 1 31 3.496508 3.496508 558 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +hypermedia 1 12 4.465908 4.465908 1247 +baker 1 7 5.010635 5.010635 1812 +silicon 1 6 5.164786 5.164786 2076 +opengl 1 5 5.347108 5.347108 2299 +bounti 1 4 5.568345 5.568345 2715 +pighin 1 4 5.568345 5.568345 2735 +assignmentshomework 1 4 5.568345 5.568345 2721 +thisdocu 1 3 5.857933 5.857933 3336 +quarterwelcom 1 2 6.263398 6.263398 4378 +indi 1 2 6.263398 6.263398 4431 +keepin 1 1 6.957497 6.957497 7908 +informationwil 1 1 6.957497 6.957497 7909 +classpersonnel 1 1 6.957497 6.957497 7910 +syllabuscours 1 1 6.957497 6.957497 7911 +calendarta 1 1 6.957497 6.957497 7912 +hourshandout 1 1 6.957497 6.957497 7913 +assignmentslectur 1 1 6.957497 6.957497 7914 +notesread 1 1 6.957497 6.957497 7915 +assignmentsprojectsproject 1 1 6.957497 6.957497 7916 +handoutsproject 1 1 6.957497 6.957497 7917 +artifactsproject 1 1 6.957497 6.957497 7918 +sessionsproject 1 1 6.957497 6.957497 7919 +policyproject 1 1 6.957497 6.957497 7920 +upslibui 1 1 6.957497 6.957497 7921 +documentationoth 1 1 6.957497 6.957497 7922 +informationget 1 1 6.957497 6.957497 7923 +classhearn 1 1 6.957497 6.957497 7924 +erratath 1 1 6.957497 6.957497 7925 +labus 1 1 6.957497 6.957497 7926 +pagegraph 1 1 6.957497 6.957497 7927 +linkssgi 1 1 6.957497 6.957497 7928 +surfgrafica 1 1 6.957497 6.957497 7929 +obscurasiggraphgrailgraph 1 1 6.957497 6.957497 7930 +indexoth 1 1 6.957497 6.957497 7931 +linksmvi 1 1 6.957497 6.957497 7932 +departmentth 1 1 6.957497 6.957497 7933 +programth 1 1 6.957497 6.957497 7934 +programweb 1 1 6.957497 6.957497 7935 +helpbas 1 1 6.957497 6.957497 7936 +helpmosa 1 1 6.957497 6.957497 7937 +lynxus 1 1 6.957497 6.957497 7938 +indyspighin 1 1 6.957497 6.957497 7939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..d97b1f22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +servic 1 72 2.639057 2.639057 236 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +would 1 67 2.708050 2.708050 251 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +discuss 1 45 3.135494 3.135494 399 +math 1 44 3.135494 3.135494 402 +might 1 41 3.218876 3.218876 426 +origin 1 38 3.295837 3.295837 472 +everi 1 34 3.401197 3.401197 519 +autumn 1 31 3.496508 3.496508 558 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +request 1 26 3.688879 3.688879 635 +consult 1 24 3.761200 3.761200 687 +handl 1 24 3.761200 3.761200 685 +thu 1 21 3.912023 3.912023 773 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +letter 1 16 4.174387 4.174387 981 +indic 1 15 4.248495 4.248495 1013 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +larger 1 7 5.010635 5.010635 1875 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +latexhtml 1 5 5.347108 5.347108 2347 +arun 1 4 5.568345 5.568345 2736 +csoffic 1 4 5.568345 5.568345 2727 +accommod 1 3 5.857933 5.857933 3337 +somani 1 2 6.263398 6.263398 4432 +cslectur 1 2 6.263398 6.263398 4433 +havea 1 2 6.263398 6.263398 4434 +networksautumn 1 1 6.957497 6.957497 7942 +eebphon 1 1 6.957497 6.957497 7943 +jari 1 1 6.957497 6.957497 7941 +kristensen 1 1 6.957497 6.957497 7944 +tomatch 1 1 6.957497 6.957497 7945 +andprovid 1 1 6.957497 6.957497 7946 +timewindow 1 1 6.957497 6.957497 7947 +overheadshomeworksprojectsinterest 1 1 6.957497 6.957497 7948 +stuffattentionif 1 1 6.957497 6.957497 7949 +disabl 1 1 6.957497 6.957497 7940 +pleasecontact 1 1 6.957497 6.957497 7950 +schmitz 1 1 6.957497 6.957497 7951 +requiresacadem 1 1 6.957497 6.957497 7952 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..960dddcf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +three 1 54 2.944439 2.944439 330 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +math 1 44 3.135494 3.135494 402 +origin 1 38 3.295837 3.295837 472 +richard 1 31 3.496508 3.496508 559 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +handl 1 24 3.761200 3.761200 685 +william 1 22 3.850148 3.850148 765 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +chan 1 7 5.010635 5.010635 1876 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +ladner 1 6 5.164786 5.164786 2062 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +latexhtml 1 5 5.347108 5.347108 2347 +csoffic 1 4 5.568345 5.568345 2727 +wchan 1 3 5.857933 5.857933 3338 +cslectur 1 2 6.263398 6.263398 4433 +noonta 1 2 6.263398 6.263398 4427 +eduwchan 1 2 6.263398 6.263398 4435 +networksspr 1 1 6.957497 6.957497 7953 +overheadshomeworksprojectsabout 1 1 6.957497 6.957497 7954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..e25dc513 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +advanc 1 99 2.302585 2.302585 130 +found 1 53 2.944439 2.944439 337 +digit 1 52 2.995732 2.995732 348 +pagecs 1 26 3.688879 3.688879 658 +designt 1 2 6.263398 6.263398 4436 +kehl 1 2 6.263398 6.263398 4437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..eb2fa9a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +resourc 1 81 2.484907 2.484907 172 +academ 1 82 2.484907 2.484907 178 +homework 1 79 2.564949 2.564949 193 +state 1 76 2.564949 2.564949 207 +server 1 76 2.564949 2.564949 204 +sourc 1 77 2.564949 2.564949 201 +summari 1 73 2.639057 2.639057 237 +logic 1 71 2.639057 2.639057 230 +nation 1 74 2.639057 2.639057 240 +test 1 66 2.708050 2.708050 252 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +collect 1 65 2.772589 2.772589 268 +march 1 61 2.833213 2.833213 295 +cover 1 55 2.944439 2.944439 329 +februari 1 54 2.944439 2.944439 328 +sampl 1 53 2.944439 2.944439 339 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +adapt 1 46 3.091042 3.091042 387 +mark 1 44 3.135494 3.135494 403 +fridai 1 44 3.135494 3.135494 390 +review 1 42 3.218876 3.218876 425 +combin 1 42 3.218876 3.218876 421 +announc 1 40 3.258097 3.258097 441 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +staff 1 36 3.367296 3.367296 490 +copyright 1 36 3.367296 3.367296 495 +richard 1 31 3.496508 3.496508 559 +option 1 30 3.555348 3.555348 575 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +sequenti 1 22 3.850148 3.850148 745 +vlsi 1 21 3.912023 3.912023 795 +thur 1 19 4.007333 4.007333 847 +scott 1 18 4.060443 4.060443 884 +dilbert 1 16 4.174387 4.174387 996 +sheet 1 16 4.174387 4.174387 973 +portion 1 16 4.174387 4.174387 971 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +johnson 1 13 4.382027 4.382027 1162 +care 1 13 4.382027 4.382027 1177 +duli 1 12 4.465908 4.465908 1248 +tue 1 11 4.553877 4.553877 1308 +stephen 1 11 4.553877 4.553877 1342 +nonprofit 1 11 4.553877 4.553877 1339 +fpga 1 10 4.653960 4.653960 1433 +motorola 1 9 4.753590 4.753590 1546 +gaetano 1 6 5.164786 5.164786 2068 +philip 1 6 5.164786 5.164786 2005 +writeup 1 5 5.347108 5.347108 2352 +borriello 1 5 5.347108 5.347108 2349 +midnight 1 4 5.568345 5.568345 2599 +murphi 1 4 5.568345 5.568345 2737 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +micron 1 3 5.857933 5.857933 3341 +designt 1 2 6.263398 6.263398 4436 +kehl 1 2 6.263398 6.263398 4437 +aaron 1 2 6.263398 6.263398 4438 +comprehensivelist 1 2 6.263398 6.263398 4439 +icmanufactur 1 2 6.263398 6.263398 4440 +syllabusschedul 1 1 6.957497 6.957497 7956 +savoi 1 1 6.957497 6.957497 7957 +savac 1 1 6.957497 6.957497 7958 +chinn 1 1 6.957497 6.957497 7959 +richin 1 1 6.957497 6.957497 7960 +howard 1 1 6.957497 6.957497 7961 +shchang 1 1 6.957497 6.957497 7962 +csjason 1 1 6.957497 6.957497 7963 +quarterhomework 1 1 6.957497 6.957497 7964 +assignmentsweb 1 1 6.957497 6.957497 7965 +duehomework 1 1 6.957497 6.957497 7966 +abel 1 1 6.957497 6.957497 7967 +fixtur 1 1 6.957497 6.957497 7968 +optionlab 1 1 6.957497 6.957497 7955 +communicationoth 1 1 6.957497 6.957497 7969 +sheetsth 1 1 6.957497 6.957497 7970 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..b726a8c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +model 1 145 1.945910 1.945910 69 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +memori 1 101 2.302585 2.302585 139 +follow 1 92 2.397895 2.397895 143 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +test 1 66 2.708050 2.708050 252 +organ 1 65 2.772589 2.772589 265 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +simpl 1 60 2.833213 2.833213 298 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +hardwar 1 51 2.995732 2.995732 350 +appoint 1 49 3.044522 3.044522 358 +quarter 1 47 3.091042 3.091042 389 +fridai 1 44 3.135494 3.135494 390 +answer 1 45 3.135494 3.135494 391 +midterm 1 45 3.135494 3.135494 392 +review 1 42 3.218876 3.218876 425 +fast 1 42 3.218876 3.218876 429 +form 1 39 3.258097 3.258097 443 +robert 1 30 3.555348 3.555348 567 +compon 1 30 3.555348 3.555348 570 +common 1 30 3.555348 3.555348 574 +revis 1 26 3.688879 3.688879 640 +hierarchi 1 22 3.850148 3.850148 744 +color 1 22 3.850148 3.850148 762 +unit 1 21 3.912023 3.912023 779 +binari 1 20 3.951244 3.951244 823 +prerequisit 1 19 4.007333 4.007333 846 +regist 1 17 4.110874 4.110874 938 +interconnect 1 17 4.110874 4.110874 937 +segment 1 17 4.110874 4.110874 931 +transfer 1 16 4.174387 4.174387 967 +sheet 1 16 4.174387 4.174387 973 +larri 1 13 4.382027 4.382027 1142 +loew 1 12 4.465908 4.465908 1252 +holidai 1 12 4.465908 4.465908 1224 +assembl 1 12 4.465908 4.465908 1207 +catalog 1 10 4.653960 4.653960 1431 +arithmet 1 10 4.653960 4.653960 1388 +modul 1 10 4.653960 4.653960 1434 +card 1 10 4.653960 4.653960 1435 +watson 1 8 4.875197 4.875197 1691 +pipelin 1 7 5.010635 5.010635 1830 +snyder 1 5 5.347108 5.347108 2359 +microprogram 1 4 5.568345 5.568345 2604 +appendix 1 4 5.568345 5.568345 2739 +mip 1 4 5.568345 5.568345 2738 +prog 1 4 5.568345 5.568345 2740 +judi 1 2 6.263398 6.263398 4442 +andorgan 1 2 6.263398 6.263398 4443 +verilog 1 2 6.263398 6.263398 4441 +jwatson 1 1 6.957497 6.957497 7972 +chenoffic 1 1 6.957497 6.957497 7973 +thursdays 1 1 6.957497 6.957497 7974 +chensg 1 1 6.957497 6.957497 7975 +laboratoryproject 1 1 6.957497 6.957497 7976 +setprocessor 1 1 6.957497 6.957497 7977 +skim 1 1 6.957497 6.957497 7971 +chap 1 1 6.957497 6.957497 7978 +referencesthi 1 1 6.957497 6.957497 7979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..afa8b6ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +seattl 1 120 2.079442 2.079442 103 +machin 1 129 2.079442 2.079442 95 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +code 1 108 2.197225 2.197225 116 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +take 1 97 2.302585 2.302585 134 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +exam 1 86 2.484907 2.484907 169 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +mondai 1 77 2.564949 2.564949 206 +messag 1 76 2.564949 2.564949 212 +interfac 1 79 2.564949 2.564949 209 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +know 1 80 2.564949 2.564949 198 +april 1 77 2.564949 2.564949 196 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +intellig 1 72 2.639057 2.639057 225 +tuesdai 1 73 2.639057 2.639057 219 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +meet 1 72 2.639057 2.639057 229 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +window 1 68 2.708050 2.708050 242 +test 1 66 2.708050 2.708050 252 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +wednesdai 1 64 2.772589 2.772589 261 +creat 1 63 2.772589 2.772589 277 +new 1 64 2.772589 2.772589 262 +laboratori 1 63 2.772589 2.772589 292 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +evalu 1 64 2.772589 2.772589 266 +room 1 59 2.833213 2.833213 301 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +reason 1 57 2.890372 2.890372 318 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +featur 1 46 3.091042 3.091042 386 +understand 1 47 3.091042 3.091042 384 +move 1 47 3.091042 3.091042 382 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +mark 1 44 3.135494 3.135494 403 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +examin 1 42 3.218876 3.218876 424 +vision 1 41 3.218876 3.218876 430 +review 1 42 3.218876 3.218876 425 +multipl 1 39 3.258097 3.258097 453 +form 1 39 3.258097 3.258097 443 +announc 1 40 3.258097 3.258097 441 +continu 1 39 3.258097 3.258097 448 +credit 1 38 3.295837 3.295837 460 +close 1 38 3.295837 3.295837 465 +download 1 36 3.367296 3.367296 489 +staff 1 36 3.367296 3.367296 490 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +represent 1 35 3.401197 3.401197 512 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +kind 1 32 3.465736 3.465736 541 +given 1 32 3.465736 3.465736 538 +common 1 30 3.555348 3.555348 574 +hard 1 30 3.555348 3.555348 563 +option 1 30 3.555348 3.555348 575 +neural 1 30 3.555348 3.555348 578 +steve 1 29 3.583519 3.583519 594 +particip 1 29 3.583519 3.583519 589 +turn 1 29 3.583519 3.583519 586 +held 1 28 3.610918 3.610918 600 +propos 1 28 3.610918 3.610918 602 +progress 1 28 3.610918 3.610918 598 +pagecs 1 26 3.688879 3.688879 658 +rather 1 26 3.688879 3.688879 642 +session 1 26 3.688879 3.688879 643 +demonstr 1 24 3.761200 3.761200 694 +tent 1 22 3.850148 3.850148 739 +self 1 22 3.850148 3.850148 761 +try 1 22 3.850148 3.850148 764 +newsgroup 1 21 3.912023 3.912023 783 +entir 1 20 3.951244 3.951244 811 +facil 1 20 3.951244 3.951244 814 +expert 1 20 3.951244 3.951244 833 +separ 1 19 4.007333 4.007333 844 +excel 1 19 4.007333 4.007333 868 +exercis 1 19 4.007333 4.007333 842 +element 1 18 4.060443 4.060443 895 +lisp 1 18 4.060443 4.060443 897 +seem 1 18 4.060443 4.060443 899 +statu 1 18 4.060443 4.060443 885 +demo 1 18 4.060443 4.060443 888 +regular 1 17 4.110874 4.110874 929 +advantag 1 16 4.174387 4.174387 987 +choic 1 16 4.174387 4.174387 979 +explan 1 16 4.174387 4.174387 985 +portion 1 16 4.174387 4.174387 971 +purchas 1 15 4.248495 4.248495 1030 +workload 1 12 4.465908 4.465908 1210 +rest 1 12 4.465908 4.465908 1259 +holidai 1 12 4.465908 4.465908 1224 +sens 1 11 4.553877 4.553877 1305 +probabilist 1 11 4.553877 4.553877 1343 +tanimoto 1 10 4.653960 4.653960 1429 +bring 1 10 4.653960 4.653960 1430 +preliminari 1 9 4.753590 4.753590 1480 +implementationof 1 7 5.010635 5.010635 1813 +reduct 1 7 5.010635 5.010635 1877 +pentium 1 6 5.164786 5.164786 2077 +approv 1 6 5.164786 5.164786 2078 +jeremi 1 5 5.347108 5.347108 2360 +hardcopi 1 5 5.347108 5.347108 2246 +forprogram 1 5 5.347108 5.347108 2361 +allegro 1 5 5.347108 5.347108 2314 +attract 1 5 5.347108 5.347108 2356 +freeman 1 4 5.568345 5.568345 2725 +net 1 4 5.568345 5.568345 2741 +screenshot 1 4 5.568345 5.568345 2743 +peer 1 4 5.568345 5.568345 2742 +andit 1 3 5.857933 5.857933 3328 +contentspag 1 3 5.857933 5.857933 3103 +orpostscript 1 3 5.857933 5.857933 3329 +programmingtechniqu 1 3 5.857933 5.857933 3113 +insieg 1 3 5.857933 5.857933 3331 +evaluationof 1 3 5.857933 5.857933 3192 +assignmentsassign 1 3 5.857933 5.857933 3342 +youdon 1 2 6.263398 6.263398 4444 +referenceon 1 2 6.263398 6.263398 4419 +usingcommon 1 2 6.263398 6.263398 4420 +franz 1 2 6.263398 6.263398 4423 +inour 1 2 6.263398 6.263398 4445 +ofproject 1 2 6.263398 6.263398 4446 +baermeet 1 1 6.957497 6.957497 7982 +csor 1 1 6.957497 6.957497 7980 +pnew 1 1 6.957497 6.957497 7981 +windowsimplement 1 1 6.957497 6.957497 7983 +programdevelop 1 1 6.957497 6.957497 7984 +theintel 1 1 6.957497 6.957497 7985 +isfor 1 1 6.957497 6.957497 7986 +bedownload 1 1 6.957497 6.957497 7987 +givenaccord 1 1 6.957497 6.957497 7988 +alist 1 1 6.957497 6.957497 7989 +coversboth 1 1 6.957497 6.957497 7990 +logicalreason 1 1 6.957497 6.957497 7991 +clo 1 1 6.957497 6.957497 7992 +programmingpart 1 1 6.957497 6.957497 7993 +ofhow 1 1 6.957497 6.957497 7994 +circul 1 1 6.957497 6.957497 7995 +orturn 1 1 6.957497 6.957497 7996 +wrap 1 1 6.957497 6.957497 7997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..cf729426 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +area 1 144 1.945910 1.945910 80 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +book 1 99 2.302585 2.302585 131 +info 1 85 2.484907 2.484907 176 +resourc 1 81 2.484907 2.484907 172 +academ 1 82 2.484907 2.484907 178 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +syllabu 1 67 2.708050 2.708050 247 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +collect 1 65 2.772589 2.772589 268 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +adapt 1 46 3.091042 3.091042 387 +announc 1 40 3.258097 3.258097 441 +societi 1 40 3.258097 3.258097 456 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +staff 1 36 3.367296 3.367296 490 +robot 1 36 3.367296 3.367296 497 +copyright 1 36 3.367296 3.367296 495 +steve 1 29 3.583519 3.583519 594 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +smith 1 20 3.951244 3.951244 820 +nice 1 20 3.951244 3.951244 809 +anderson 1 19 4.007333 4.007333 860 +dilbert 1 16 4.174387 4.174387 996 +sheet 1 16 4.174387 4.174387 973 +portion 1 16 4.174387 4.174387 971 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +care 1 13 4.382027 4.382027 1177 +loew 1 12 4.465908 4.465908 1252 +duli 1 12 4.465908 4.465908 1248 +stephen 1 11 4.553877 4.553877 1342 +nonprofit 1 11 4.553877 4.553877 1339 +motorola 1 9 4.753590 4.753590 1546 +portland 1 7 5.010635 5.010635 1878 +fred 1 6 5.164786 5.164786 2072 +gaetano 1 6 5.164786 5.164786 2068 +philip 1 6 5.164786 5.164786 2005 +borriello 1 5 5.347108 5.347108 2349 +kent 1 4 5.568345 5.568345 2744 +murphi 1 4 5.568345 5.568345 2737 +comprehens 1 4 5.568345 5.568345 2745 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +burn 1 2 6.263398 6.263398 4447 +serverth 1 2 6.263398 6.263398 4448 +designstev 1 1 6.957497 6.957497 7998 +casei 1 1 6.957497 6.957497 7999 +studentslab 1 1 6.957497 6.957497 8000 +mchc 1 1 6.957497 6.957497 8001 +martinrobot 1 1 6.957497 6.957497 8002 +societyoth 1 1 6.957497 6.957497 8003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..fcc8ca71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +handout 1 64 2.772589 2.772589 263 +march 1 61 2.833213 2.833213 295 +usual 1 28 3.610918 3.610918 608 +session 1 26 3.688879 3.688879 643 +pagewelcom 1 11 4.553877 4.553877 1344 +bug 1 7 5.010635 5.010635 1801 +encount 1 3 5.857933 5.857933 3128 +bevi 1 1 6.957497 6.957497 8004 +relatingto 1 1 6.957497 6.957497 8005 +frequentlychang 1 1 6.957497 6.957497 8006 +bswest 1 1 6.957497 6.957497 8007 +csif 1 1 6.957497 6.957497 8008 +classpersonnelsyllabuslectur 1 1 6.957497 6.957497 8009 +scheduleguest 1 1 6.957497 6.957497 8010 +scheduleoffic 1 1 6.957497 6.957497 8011 +hoursproject 1 1 6.957497 6.957497 8012 +projectoth 1 1 6.957497 6.957497 8013 +erratarefer 1 1 6.957497 6.957497 8014 +pagesmidterm 1 1 6.957497 6.957497 8015 +questionnairebswest 1 1 6.957497 6.957497 8016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..cf464381 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +question 1 91 2.397895 2.397895 141 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +homework 1 79 2.564949 2.564949 193 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +refer 1 78 2.564949 2.564949 203 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +import 1 65 2.772589 2.772589 282 +handout 1 64 2.772589 2.772589 263 +descript 1 64 2.772589 2.772589 271 +previou 1 62 2.772589 2.772589 290 +simpl 1 60 2.833213 2.833213 298 +sampl 1 53 2.944439 2.944439 339 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +quarter 1 47 3.091042 3.091042 389 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +slide 1 38 3.295837 3.295837 467 +manual 1 35 3.401197 3.401197 504 +kind 1 32 3.465736 3.465736 541 +full 1 28 3.610918 3.610918 615 +pagecs 1 26 3.688879 3.688879 658 +sent 1 22 3.850148 3.850148 763 +floor 1 14 4.317488 4.317488 1070 +front 1 13 4.382027 4.382027 1154 +cecil 1 9 4.753590 4.753590 1547 +chamber 1 8 4.875197 4.875197 1692 +leon 1 8 4.875197 4.875197 1631 +affect 1 6 5.164786 5.164786 2044 +textual 1 6 5.164786 5.164786 1979 +vortex 1 5 5.347108 5.347108 2362 +travers 1 5 5.347108 5.347108 2363 +litvinov 1 3 5.857933 5.857933 3343 +informationmeet 1 2 6.263398 6.263398 4450 +vass 1 2 6.263398 6.263398 4449 +cubicl 1 2 6.263398 6.263398 4451 +archivesslid 1 2 6.263398 6.263398 4452 +informationhandout 1 2 6.263398 6.263398 4163 +tutorialth 1 2 6.263398 6.263398 4453 +onmark 1 2 6.263398 6.263398 4454 +languageswint 1 1 6.957497 6.957497 8017 +craigchamb 1 1 6.957497 6.957497 8018 +archivedher 1 1 6.957497 6.957497 8019 +closedbook 1 1 6.957497 6.957497 8020 +wereask 1 1 6.957497 6.957497 8021 +tutorialsth 1 1 6.957497 6.957497 8022 +tutorialhow 1 1 6.957497 6.957497 8023 +enda 1 1 6.957497 6.957497 8024 +interestdead 1 1 6.957497 6.957497 8025 +elim 1 1 6.957497 6.957497 8026 +idfacfg 1 1 6.957497 6.957497 8027 +frameworkvortex 1 1 6.957497 6.957497 8028 +grammarcecil 1 1 6.957497 6.957497 8029 +documentationdocument 1 1 6.957497 6.957497 8030 +resourcesth 1 1 6.957497 6.957497 8031 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..678c1bbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +instructor 1 108 2.197225 2.197225 107 +find 1 111 2.197225 2.197225 111 +come 1 78 2.564949 2.564949 202 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +sieg 1 69 2.708050 2.708050 260 +handout 1 64 2.772589 2.772589 263 +written 1 63 2.772589 2.772589 278 +room 1 59 2.833213 2.833213 301 +found 1 53 2.944439 2.944439 337 +archiv 1 49 3.044522 3.044522 364 +slide 1 38 3.295837 3.295837 467 +manual 1 35 3.401197 3.401197 504 +full 1 28 3.610918 3.610918 615 +pagecs 1 26 3.688879 3.688879 658 +jeff 1 25 3.737670 3.737670 673 +sent 1 22 3.850148 3.850148 763 +sort 1 22 3.850148 3.850148 738 +chateau 1 16 4.174387 4.174387 997 +dean 1 14 4.317488 4.317488 1104 +dave 1 14 4.317488 4.317488 1098 +cecil 1 9 4.753590 4.753590 1547 +chamber 1 8 4.875197 4.875197 1692 +grove 1 8 4.875197 4.875197 1675 +leon 1 8 4.875197 4.875197 1631 +craig 1 7 5.010635 5.010635 1879 +vortex 1 5 5.347108 5.347108 2362 +projectth 1 3 5.857933 5.857933 3344 +informationmeet 1 2 6.263398 6.263398 4450 +jdean 1 2 6.263398 6.263398 4455 +archivesslid 1 2 6.263398 6.263398 4452 +optimizingcompil 1 2 6.263398 6.263398 4456 +cecilproject 1 2 6.263398 6.263398 4457 +onmark 1 2 6.263398 6.263398 4454 +languagesimport 1 1 6.957497 6.957497 8032 +turori 1 1 6.957497 6.957497 8033 +andtransform 1 1 6.957497 6.957497 8034 +resourcesmor 1 1 6.957497 6.957497 8035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..ebd70dc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +assign 1 135 1.945910 1.945910 66 +spring 1 131 2.079442 2.079442 88 +handout 1 64 2.772589 2.772589 263 +sampl 1 53 2.944439 2.944439 339 +pagecs 1 26 3.688879 3.688879 658 +introductori 1 9 4.753590 4.753590 1479 +notkin 1 3 5.857933 5.857933 3345 +engineeringdavid 1 1 6.957497 6.957497 8036 +kwic 1 1 6.957497 6.957497 8037 +projectsnotkin 1 1 6.957497 6.957497 8038 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..15a975b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +postscript 1 131 2.079442 2.079442 90 +studi 1 120 2.079442 2.079442 91 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +question 1 91 2.397895 2.397895 141 +resourc 1 81 2.484907 2.484907 172 +info 1 85 2.484907 2.484907 176 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +david 1 71 2.639057 2.639057 232 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +handout 1 64 2.772589 2.772589 263 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +mark 1 44 3.135494 3.135494 403 +singl 1 34 3.401197 3.401197 510 +concept 1 32 3.465736 3.465736 537 +administr 1 27 3.637586 3.637586 628 +pagecs 1 26 3.688879 3.688879 658 +subject 1 26 3.688879 3.688879 647 +wish 1 24 3.761200 3.761200 692 +yahoo 1 24 3.761200 3.761200 707 +thread 1 23 3.806662 3.806662 722 +sent 1 22 3.850148 3.850148 763 +self 1 22 3.850148 3.850148 761 +scheme 1 20 3.951244 3.951244 818 +excel 1 19 4.007333 4.007333 868 +previous 1 17 4.110874 4.110874 923 +floor 1 14 4.317488 4.317488 1070 +bodi 1 13 4.382027 4.382027 1178 +mellon 1 13 4.382027 4.382027 1179 +loew 1 12 4.465908 4.465908 1252 +calculu 1 12 4.465908 4.465908 1203 +carnegi 1 12 4.465908 4.465908 1260 +appl 1 11 4.553877 4.553877 1303 +kurt 1 9 4.753590 4.753590 1548 +introductori 1 9 4.753590 4.753590 1479 +subscrib 1 9 4.753590 4.753590 1541 +cecil 1 9 4.753590 4.753590 1547 +leon 1 8 4.875197 4.875197 1631 +dylan 1 8 4.875197 4.875197 1625 +majordomo 1 6 5.164786 5.164786 2066 +gentl 1 5 5.347108 5.347108 2264 +notkin 1 3 5.857933 5.857933 3345 +partridg 1 3 5.857933 5.857933 3346 +kepart 1 2 6.263398 6.263398 4459 +lambda 1 2 6.263398 6.263398 4458 +monash 1 2 6.263398 6.263398 4460 +languagesautumn 1 1 6.957497 6.957497 8039 +byappoint 1 1 6.957497 6.957497 8040 +cubiclescours 1 1 6.957497 6.957497 8041 +readingsmail 1 1 6.957497 6.957497 8042 +archivesw 1 1 6.957497 6.957497 8043 +instructionalpurpos 1 1 6.957497 6.957497 8044 +emailto 1 1 6.957497 6.957497 8045 +csegener 1 1 6.957497 6.957497 8046 +pagesprogram 1 1 6.957497 6.957497 8047 +critiquesgari 1 1 6.957497 6.957497 8048 +leaven 1 1 6.957497 6.957497 8049 +pagefunct 1 1 6.957497 6.957497 8050 +resourcesmit 1 1 6.957497 6.957497 8051 +pagecmu 1 1 6.957497 6.957497 8052 +pagea 1 1 6.957497 6.957497 8053 +mlhaskel 1 1 6.957497 6.957497 8054 +universityobject 1 1 6.957497 6.957497 8055 +geneva 1 1 6.957497 6.957497 8056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..882ced67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +book 1 99 2.302585 2.302585 131 +question 1 91 2.397895 2.397895 141 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +learn 1 86 2.484907 2.484907 170 +mondai 1 77 2.564949 2.564949 206 +know 1 80 2.564949 2.564949 198 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +materi 1 75 2.639057 2.639057 221 +solv 1 73 2.639057 2.639057 234 +write 1 72 2.639057 2.639057 222 +sieg 1 69 2.708050 2.708050 260 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +written 1 63 2.772589 2.772589 278 +room 1 59 2.833213 2.833213 301 +march 1 61 2.833213 2.833213 295 +undergradu 1 54 2.944439 2.944439 338 +suggest 1 53 2.944439 2.944439 331 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +set 1 50 3.044522 3.044522 361 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +discuss 1 45 3.135494 3.135494 399 +realli 1 40 3.258097 3.258097 444 +probabl 1 40 3.258097 3.258097 455 +must 1 40 3.258097 3.258097 442 +close 1 38 3.295837 3.295837 465 +winter 1 36 3.367296 3.367296 500 +soon 1 36 3.367296 3.367296 494 +short 1 36 3.367296 3.367296 499 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +richard 1 31 3.496508 3.496508 559 +particip 1 29 3.583519 3.583519 589 +rule 1 26 3.688879 3.688879 638 +background 1 25 3.737670 3.737670 664 +togeth 1 23 3.806662 3.806662 714 +william 1 22 3.850148 3.850148 765 +half 1 21 3.912023 3.912023 776 +anderson 1 19 4.007333 4.007333 860 +els 1 19 4.007333 4.007333 843 +prerequisit 1 19 4.007333 4.007333 846 +assum 1 19 4.007333 4.007333 845 +chateau 1 16 4.174387 4.174387 997 +alreadi 1 16 4.174387 4.174387 963 +quiz 1 16 4.174387 4.174387 990 +upon 1 16 4.174387 4.174387 978 +anyth 1 16 4.174387 4.174387 998 +floor 1 14 4.317488 4.317488 1070 +script 1 13 4.382027 4.382027 1171 +verifi 1 12 4.465908 4.465908 1261 +island 1 11 4.553877 4.553877 1345 +errata 1 10 4.653960 4.653960 1403 +classmat 1 9 4.753590 4.753590 1516 +equival 1 9 4.753590 4.753590 1496 +told 1 8 4.875197 4.875197 1658 +chan 1 7 5.010635 5.010635 1876 +wrong 1 6 5.164786 5.164786 2025 +lack 1 6 5.164786 5.164786 1994 +invok 1 6 5.164786 5.164786 2079 +understood 1 5 5.347108 5.347108 2364 +cancel 1 4 5.568345 5.568345 2746 +episod 1 4 5.568345 5.568345 2747 +wchan 1 3 5.857933 5.857933 3338 +preview 1 3 5.857933 5.857933 3306 +algorithmscs 1 2 6.263398 6.263398 4461 +seig 1 2 6.263398 6.263398 4462 +cubicl 1 2 6.263398 6.263398 4451 +somebodi 1 2 6.263398 6.263398 4463 +outer 1 2 6.263398 6.263398 4464 +okai 1 2 6.263398 6.263398 4465 +eduwchan 1 2 6.263398 6.263398 4435 +readingtextbook 1 1 6.957497 6.957497 8058 +sapplet 1 1 6.957497 6.957497 8059 +willconsist 1 1 6.957497 6.957497 8060 +bureaucrat 1 1 6.957497 6.957497 8061 +stuffgrad 1 1 6.957497 6.957497 8062 +homeworkproblem 1 1 6.957497 6.957497 8063 +upindepend 1 1 6.957497 6.957497 8064 +gilligan 1 1 6.957497 6.957497 8057 +betweenani 1 1 6.957497 6.957497 8065 +mustwatch 1 1 6.957497 6.957497 8066 +thatan 1 1 6.957497 6.957497 8067 +reboot 1 1 6.957497 6.957497 8068 +thatsurv 1 1 6.957497 6.957497 8069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..98ee1a2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +teach 1 108 2.197225 2.197225 112 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +real 1 93 2.397895 2.397895 144 +present 1 91 2.397895 2.397895 145 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +homework 1 79 2.564949 2.564949 193 +april 1 77 2.564949 2.564949 196 +refer 1 78 2.564949 2.564949 203 +come 1 78 2.564949 2.564949 202 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +effici 1 73 2.639057 2.639057 233 +tuesdai 1 73 2.639057 2.639057 219 +write 1 72 2.639057 2.639057 222 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +would 1 67 2.708050 2.708050 251 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +major 1 56 2.890372 2.890372 315 +special 1 56 2.890372 2.890372 320 +think 1 57 2.890372 2.890372 314 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +particular 1 51 2.995732 2.995732 352 +approach 1 48 3.044522 3.044522 366 +pointer 1 48 3.044522 3.044522 368 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +could 1 46 3.091042 3.091042 383 +term 1 43 3.178054 3.178054 411 +howev 1 41 3.218876 3.218876 422 +fast 1 42 3.218876 3.218876 429 +probabl 1 40 3.258097 3.258097 455 +correct 1 38 3.295837 3.295837 462 +close 1 38 3.295837 3.295837 465 +origin 1 38 3.295837 3.295837 472 +open 1 38 3.295837 3.295837 469 +connect 1 37 3.332205 3.332205 485 +cost 1 37 3.332205 3.332205 480 +expect 1 37 3.332205 3.332205 484 +feel 1 37 3.332205 3.332205 483 +next 1 34 3.401197 3.401197 517 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +richard 1 31 3.496508 3.496508 559 +titl 1 31 3.496508 3.496508 556 +graph 1 30 3.555348 3.555348 576 +compon 1 30 3.555348 3.555348 570 +exist 1 30 3.555348 3.555348 569 +limit 1 29 3.583519 3.583519 585 +consid 1 29 3.583519 3.583519 590 +progress 1 28 3.610918 3.610918 598 +quit 1 27 3.637586 3.637586 633 +mind 1 27 3.637586 3.637586 632 +challeng 1 26 3.688879 3.688879 653 +fundament 1 25 3.737670 3.737670 661 +although 1 25 3.737670 3.737670 667 +supercomput 1 25 3.737670 3.737670 681 +other 1 24 3.761200 3.761200 697 +sort 1 22 3.850148 3.850148 738 +emphasi 1 22 3.850148 3.850148 755 +instead 1 22 3.850148 3.850148 756 +theorem 1 21 3.912023 3.912023 786 +flexibl 1 21 3.912023 3.912023 792 +half 1 21 3.912023 3.912023 776 +nice 1 20 3.951244 3.951244 809 +anderson 1 19 4.007333 4.007333 860 +prerequisit 1 19 4.007333 4.007333 846 +spend 1 19 4.007333 4.007333 850 +prove 1 19 4.007333 4.007333 848 +four 1 18 4.060443 4.060443 905 +matrix 1 17 4.110874 4.110874 933 +interconnect 1 17 4.110874 4.110874 937 +upon 1 16 4.174387 4.174387 978 +choic 1 16 4.174387 4.174387 979 +mayb 1 15 4.248495 4.248495 1014 +indic 1 15 4.248495 4.248495 1013 +purchas 1 15 4.248495 4.248495 1030 +rank 1 14 4.317488 4.317488 1063 +latex 1 14 4.317488 4.317488 1064 +topolog 1 14 4.317488 4.317488 1089 +consider 1 14 4.317488 4.317488 1076 +happi 1 14 4.317488 4.317488 1079 +insid 1 12 4.465908 4.465908 1262 +asynchron 1 12 4.465908 4.465908 1229 +transpar 1 11 4.553877 4.553877 1325 +sens 1 11 4.553877 4.553877 1305 +motiv 1 11 4.553877 4.553877 1346 +volum 1 11 4.553877 4.553877 1347 +catalog 1 10 4.653960 4.653960 1431 +arithmet 1 10 4.653960 4.653960 1388 +tradit 1 10 4.653960 4.653960 1404 +equival 1 9 4.753590 4.753590 1496 +routin 1 9 4.753590 4.753590 1549 +foc 1 7 5.010635 5.010635 1880 +uniform 1 7 5.010635 5.010635 1845 +plu 1 6 5.164786 5.164786 2004 +consensu 1 6 5.164786 5.164786 2080 +situat 1 5 5.347108 5.347108 2365 +volunt 1 5 5.347108 5.347108 2307 +algorithmsfor 1 4 5.568345 5.568345 2748 +ullman 1 4 5.568345 5.568345 2749 +union 1 4 5.568345 5.568345 2634 +buss 1 4 5.568345 5.568345 2649 +manuscript 1 4 5.568345 5.568345 2750 +wewil 1 4 5.568345 5.568345 2688 +cheap 1 4 5.568345 5.568345 2751 +chose 1 4 5.568345 5.568345 2629 +rambl 1 3 5.857933 5.857933 3308 +crew 1 3 5.857933 5.857933 3347 +impli 1 3 5.857933 5.857933 3348 +pertain 1 3 5.857933 5.857933 3208 +andyou 1 3 5.857933 5.857933 3256 +parallelalgorithm 1 3 5.857933 5.857933 3249 +influenc 1 3 5.857933 5.857933 3349 +algorithmscs 1 2 6.263398 6.263398 4461 +algorithmi 1 2 6.263398 6.263398 4208 +simpler 1 2 6.263398 6.263398 4210 +swap 1 2 6.263398 6.263398 4466 +exception 1 2 6.263398 6.263398 4467 +bake 1 2 6.263398 6.263398 4468 +ideason 1 2 6.263398 6.263398 4469 +appointment 1 1 6.957497 6.957497 8070 +developingfast 1 1 6.957497 6.957497 8071 +theirefficaci 1 1 6.957497 6.957497 8072 +commentsabout 1 1 6.957497 6.957497 8073 +analysisfor 1 1 6.957497 6.957497 8074 +referencesfor 1 1 6.957497 6.957497 8075 +erew 1 1 6.957497 6.957497 8076 +yannakaki 1 1 6.957497 6.957497 8077 +certifi 1 1 6.957497 6.957497 8078 +likelysometh 1 1 6.957497 6.957497 8079 +martel 1 1 6.957497 6.957497 8080 +whim 1 1 6.957497 6.957497 8081 +smpc 1 1 6.957497 6.957497 8082 +lookingat 1 1 6.957497 6.957497 8083 +isnon 1 1 6.957497 6.957497 8084 +notconsid 1 1 6.957497 6.957497 8085 +indevelop 1 1 6.957497 6.957497 8086 +algorithmswhich 1 1 6.957497 6.957497 8087 +conceiv 1 1 6.957497 6.957497 8088 +goingto 1 1 6.957497 6.957497 8089 +outsidework 1 1 6.957497 6.957497 8090 +befollow 1 1 6.957497 6.957497 8091 +youcould 1 1 6.957497 6.957497 8092 +textwould 1 1 6.957497 6.957497 8093 +artof 1 1 6.957497 6.957497 8094 +mychoic 1 1 6.957497 6.957497 8095 +interestingor 1 1 6.957497 6.957497 8096 +uninterest 1 1 6.957497 6.957497 8097 +aseith 1 1 6.957497 6.957497 8098 +researchcont 1 1 6.957497 6.957497 8099 +turninto 1 1 6.957497 6.957497 8100 +andenergi 1 1 6.957497 6.957497 8101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..2a5b951d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +provid 1 121 2.079442 2.079442 94 +comment 1 93 2.397895 2.397895 146 +sourc 1 77 2.564949 2.564949 201 +complex 1 64 2.772589 2.772589 269 +move 1 47 3.091042 3.091042 382 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +autumn 1 31 3.496508 3.496508 558 +ofwashington 1 22 3.850148 3.850148 766 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +automata 1 13 4.382027 4.382027 1135 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +foracadem 1 5 5.347108 5.347108 2341 +accuratelyquot 1 2 6.263398 6.263398 4470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..ec9360d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +wide 1 84 2.484907 2.484907 185 +exam 1 86 2.484907 2.484907 169 +paul 1 38 3.295837 3.295837 471 +short 1 36 3.367296 3.367296 499 +quiz 1 16 4.174387 4.174387 990 +latex 1 14 4.317488 4.317488 1064 +hypermedia 1 12 4.465908 4.465908 1247 +documentfor 1 7 5.010635 5.010635 1865 +beam 1 5 5.347108 5.347108 2344 +automataautumn 1 1 6.957497 6.957497 8102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..1525a1b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +comment 1 93 2.397895 2.397895 146 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +complex 1 64 2.772589 2.772589 269 +handout 1 64 2.772589 2.772589 263 +organ 1 65 2.772589 2.772589 265 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +three 1 54 2.944439 2.944439 330 +format 1 48 3.044522 3.044522 356 +adapt 1 46 3.091042 3.091042 387 +textbook 1 44 3.135494 3.135494 397 +midterm 1 45 3.135494 3.135494 392 +math 1 44 3.135494 3.135494 402 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +print 1 34 3.401197 3.401197 503 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +sent 1 22 3.850148 3.850148 763 +ofwashington 1 22 3.850148 3.850148 766 +latest 1 21 3.912023 3.912023 785 +viewer 1 21 3.912023 3.912023 787 +figur 1 18 4.060443 4.060443 903 +render 1 17 4.110874 4.110874 947 +portion 1 16 4.174387 4.174387 971 +ascii 1 15 4.248495 4.248495 1032 +webmast 1 15 4.248495 4.248495 1045 +latex 1 14 4.317488 4.317488 1064 +command 1 14 4.317488 4.317488 1083 +reprint 1 14 4.317488 4.317488 1097 +automata 1 13 4.382027 4.382027 1135 +larri 1 13 4.382027 4.382027 1142 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +errata 1 10 4.653960 4.653960 1403 +tuth 1 9 4.753590 4.753590 1519 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +perhap 1 8 4.875197 4.875197 1693 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 1 5 5.347108 5.347108 2345 +foracadem 1 5 5.347108 5.347108 2341 +sharma 1 4 5.568345 5.568345 2752 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +administrivia 1 3 5.857933 5.857933 3166 +ghostview 1 3 5.857933 5.857933 3163 +accuratelyquot 1 2 6.263398 6.263398 4470 +staffnameemailphoneoffic 1 1 6.957497 6.957497 8104 +nitin 1 1 6.957497 6.957497 8103 +csmw 1 1 6.957497 6.957497 8105 +acroread 1 1 6.957497 6.957497 8106 +aavail 1 1 6.957497 6.957497 8107 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..cd5c3a8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +well 1 109 2.197225 2.197225 121 +search 1 95 2.397895 2.397895 155 +thing 1 84 2.484907 2.484907 189 +issu 1 78 2.564949 2.564949 211 +good 1 77 2.564949 2.564949 200 +june 1 79 2.564949 2.564949 214 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +complex 1 64 2.772589 2.772589 269 +talk 1 53 2.944439 2.944439 336 +give 1 50 3.044522 3.044522 359 +even 1 45 3.135494 3.135494 393 +autom 1 41 3.218876 3.218876 434 +examin 1 42 3.218876 3.218876 424 +theoret 1 39 3.258097 3.258097 446 +paul 1 38 3.295837 3.295837 471 +slide 1 38 3.295837 3.295837 467 +survei 1 35 3.401197 3.401197 513 +within 1 33 3.433987 3.433987 525 +autumn 1 31 3.496508 3.496508 558 +often 1 31 3.496508 3.496508 551 +consid 1 29 3.583519 3.583519 590 +compar 1 26 3.688879 3.688879 648 +strategi 1 25 3.737670 3.737670 682 +higher 1 24 3.761200 3.761200 690 +interpret 1 24 3.761200 3.761200 686 +proof 1 23 3.806662 3.806662 720 +varieti 1 22 3.850148 3.850148 740 +instal 1 22 3.850148 3.850148 754 +theorem 1 21 3.912023 3.912023 786 +vlsi 1 21 3.912023 3.912023 795 +verif 1 20 3.951244 3.951244 826 +prove 1 19 4.007333 4.007333 848 +aid 1 18 4.060443 4.060443 904 +concentr 1 18 4.060443 4.060443 906 +attempt 1 17 4.110874 4.110874 917 +moor 1 17 4.110874 4.110874 936 +choic 1 16 4.174387 4.174387 979 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +decid 1 14 4.317488 4.317488 1075 +consider 1 14 4.317488 4.317488 1076 +loew 1 12 4.465908 4.465908 1252 +statement 1 11 4.553877 4.553877 1313 +rel 1 9 4.753590 4.753590 1487 +satisfi 1 8 4.875197 4.875197 1694 +prover 1 8 4.875197 4.875197 1653 +proposit 1 5 5.347108 5.347108 2339 +beam 1 5 5.347108 5.347108 2344 +amus 1 5 5.347108 5.347108 2366 +andsoftwar 1 4 5.568345 5.568345 2753 +tester 1 4 5.568345 5.568345 2754 +theoremprov 1 3 5.857933 5.857933 3298 +theoryand 1 3 5.857933 5.857933 3350 +scatter 1 3 5.857933 5.857933 3351 +truthof 1 1 6.957497 6.957497 8108 +casea 1 1 6.957497 6.957497 8109 +flip 1 1 6.957497 6.957497 8110 +oftheorem 1 1 6.957497 6.957497 8111 +finitedomain 1 1 6.957497 6.957497 8112 +thesequest 1 1 6.957497 6.957497 8113 +complexityand 1 1 6.957497 6.957497 8114 +anumb 1 1 6.957497 6.957497 8115 +urquhart 1 1 6.957497 6.957497 8116 +sato 1 1 6.957497 6.957497 8117 +andboy 1 1 6.957497 6.957497 8118 +gsat 1 1 6.957497 6.957497 8119 +thedirectori 1 1 6.957497 6.957497 8120 +proversther 1 1 6.957497 6.957497 8121 +ofinstal 1 1 6.957497 6.957497 8122 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..a329dac8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +confer 1 126 2.079442 2.079442 100 +topic 1 114 2.197225 2.197225 110 +solut 1 82 2.484907 2.484907 162 +mondai 1 77 2.564949 2.564949 206 +meet 1 72 2.639057 2.639057 229 +wednesdai 1 64 2.772589 2.772589 261 +fridai 1 44 3.135494 3.135494 390 +packag 1 28 3.610918 3.610918 614 +measur 1 28 3.610918 3.610918 609 +pagecs 1 26 3.688879 3.688879 658 +sigmetr 1 13 4.382027 4.382027 1173 +loew 1 12 4.465908 4.465908 1252 +host 1 11 4.553877 4.553877 1306 +queue 1 10 4.653960 4.653960 1386 +systemperform 1 1 6.957497 6.957497 8123 +modelingspr 1 1 6.957497 6.957497 8124 +lazowskaandmaryvernonwelcom 1 1 6.957497 6.957497 8125 +performancemodel 1 1 6.957497 6.957497 8126 +hourstent 1 1 6.957497 6.957497 8127 +schedulecom 1 1 6.957497 6.957497 8128 +goingsassignmentsproject 1 1 6.957497 6.957497 8129 +informationmap 1 1 6.957497 6.957497 8130 +emailoth 1 1 6.957497 6.957497 8131 +computersystemsuw 1 1 6.957497 6.957497 8132 +engineeringlazowska 1 1 6.957497 6.957497 8133 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..e19eb4e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +postscript 1 131 2.079442 2.079442 90 +schedul 1 119 2.079442 2.079442 85 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +code 1 108 2.197225 2.197225 116 +user 1 104 2.302585 2.302585 137 +center 1 88 2.397895 2.397895 158 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +info 1 85 2.484907 2.484907 176 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +simul 1 66 2.708050 2.708050 255 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +set 1 50 3.044522 3.044522 361 +execut 1 45 3.135494 3.135494 404 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +continu 1 39 3.258097 3.258097 448 +close 1 38 3.295837 3.295837 465 +manual 1 35 3.401197 3.401197 504 +multiprocessor 1 28 3.610918 3.610918 605 +binari 1 20 3.951244 3.951244 823 +histori 1 19 4.007333 4.007333 853 +benchmark 1 19 4.007333 4.007333 859 +analyz 1 17 4.110874 4.110874 925 +monitor 1 17 4.110874 4.110874 941 +rate 1 15 4.248495 4.248495 1037 +driven 1 15 4.248495 4.248495 1048 +neat 1 12 4.465908 4.465908 1263 +alpha 1 11 4.553877 4.553877 1348 +tuth 1 9 4.753590 4.753590 1519 +egger 1 8 4.875197 4.875197 1695 +uniprocessor 1 8 4.875197 4.875197 1696 +spec 1 8 4.875197 4.875197 1640 +shade 1 7 5.010635 5.010635 1881 +sparc 1 7 5.010635 5.010635 1860 +tullsen 1 6 5.164786 5.164786 2081 +superscalar 1 6 5.164786 5.164786 2082 +pentium 1 6 5.164786 5.164786 2077 +rewrit 1 5 5.347108 5.347108 2367 +etch 1 4 5.568345 5.568345 2755 +redston 1 3 5.857933 5.857933 3332 +specmark 1 2 6.263398 6.263398 4471 +atom 1 2 6.263398 6.263398 4472 +multiflow 1 2 6.263398 6.263398 4473 +powerpc 1 2 6.263398 6.263398 4238 +architecturewint 1 1 6.957497 6.957497 8134 +instructorsusan 1 1 6.957497 6.957497 8135 +tajoshua 1 1 6.957497 6.957497 8136 +instuct 1 1 6.957497 6.957497 8137 +pixi 1 1 6.957497 6.957497 8138 +dinero 1 1 6.957497 6.957497 8139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..fa32a963 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +number 1 130 2.079442 2.079442 97 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +instructor 1 108 2.197225 2.197225 107 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +april 1 77 2.564949 2.564949 196 +meet 1 72 2.639057 2.639057 229 +room 1 59 2.833213 2.833213 301 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +announc 1 40 3.258097 3.258097 441 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +ad 1 32 3.465736 3.465736 544 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +unit 1 21 3.912023 3.912023 779 +chateau 1 16 4.174387 4.174387 997 +levi 1 14 4.317488 4.317488 1093 +hank 1 12 4.465908 4.465908 1253 +hypermedia 1 12 4.465908 4.465908 1247 +readi 1 12 4.465908 4.465908 1242 +pighin 1 4 5.568345 5.568345 2735 +freder 1 3 5.857933 5.857933 3352 +thisdocu 1 3 5.857933 5.857933 3336 +iti 1 2 6.263398 6.263398 4066 +forcs 1 1 6.957497 6.957497 8140 +classmessag 1 1 6.957497 6.957497 8141 +projectlevi 1 1 6.957497 6.957497 8142 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..df0eabde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +grade 1 90 2.397895 2.397895 142 +imag 1 91 2.397895 2.397895 161 +follow 1 92 2.397895 2.397895 143 +wide 1 84 2.484907 2.484907 185 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +resourc 1 81 2.484907 2.484907 172 +homework 1 79 2.564949 2.564949 193 +addit 1 74 2.639057 2.639057 228 +html 1 75 2.639057 2.639057 235 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +room 1 59 2.833213 2.833213 301 +colleg 1 61 2.833213 2.833213 300 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +instruct 1 53 2.944439 2.944439 332 +cool 1 49 3.044522 3.044522 374 +visitor 1 49 3.044522 3.044522 371 +basic 1 50 3.044522 3.044522 360 +quarter 1 47 3.091042 3.091042 389 +get 1 46 3.091042 3.091042 380 +keep 1 44 3.135494 3.135494 409 +offer 1 43 3.178054 3.178054 414 +art 1 29 3.583519 3.583519 593 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +hypertext 1 19 4.007333 4.007333 865 +charact 1 15 4.248495 4.248495 1028 +hypermedia 1 12 4.465908 4.465908 1247 +mosaic 1 10 4.653960 4.653960 1426 +uniform 1 7 5.010635 5.010635 1845 +markup 1 6 5.164786 5.164786 2059 +whichcontain 1 4 5.568345 5.568345 2714 +wealth 1 3 5.857933 5.857933 3353 +thatthi 1 2 6.263398 6.263398 4379 +addedfrequ 1 2 6.263398 6.263398 4380 +deros 1 2 6.263398 6.263398 4474 +indi 1 2 6.263398 6.263398 4431 +mvi 1 2 6.263398 6.263398 4382 +usinglynx 1 2 6.263398 6.263398 4383 +graphicsautumn 1 1 6.957497 6.957497 8143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..135b8b0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +site 1 106 2.197225 2.197225 119 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +exam 1 86 2.484907 2.484907 169 +academ 1 82 2.484907 2.484907 178 +mondai 1 77 2.564949 2.564949 206 +complet 1 77 2.564949 2.564949 208 +homework 1 79 2.564949 2.564949 193 +server 1 76 2.564949 2.564949 204 +sourc 1 77 2.564949 2.564949 201 +summari 1 73 2.639057 2.639057 237 +tuesdai 1 73 2.639057 2.639057 219 +logic 1 71 2.639057 2.639057 230 +nation 1 74 2.639057 2.639057 240 +syllabu 1 67 2.708050 2.708050 247 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +import 1 65 2.772589 2.772589 282 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +cover 1 55 2.944439 2.944439 329 +digit 1 52 2.995732 2.995732 348 +principl 1 48 3.044522 3.044522 357 +still 1 50 3.044522 3.044522 362 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +fridai 1 44 3.135494 3.135494 390 +review 1 42 3.218876 3.218876 425 +combin 1 42 3.218876 3.218876 421 +announc 1 40 3.258097 3.258097 441 +paul 1 38 3.295837 3.295837 471 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +purpos 1 37 3.332205 3.332205 481 +staff 1 36 3.367296 3.367296 490 +copyright 1 36 3.367296 3.367296 495 +board 1 33 3.433987 3.433987 528 +quot 1 29 3.583519 3.583519 582 +accur 1 25 3.737670 3.737670 680 +begin 1 23 3.806662 3.806662 716 +sequenti 1 22 3.850148 3.850148 745 +vlsi 1 21 3.912023 3.912023 795 +synthesi 1 20 3.951244 3.951244 834 +mostli 1 19 4.007333 4.007333 869 +dilbert 1 16 4.174387 4.174387 996 +sheet 1 16 4.174387 4.174387 973 +portion 1 16 4.174387 4.174387 971 +carl 1 15 4.248495 4.248495 1024 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +larri 1 13 4.382027 4.382027 1142 +loew 1 12 4.465908 4.465908 1252 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +franklin 1 10 4.653960 4.653960 1436 +fpga 1 10 4.653960 4.653960 1433 +motorola 1 9 4.753590 4.753590 1546 +mother 1 6 5.164786 5.164786 2083 +philip 1 6 5.164786 5.164786 2005 +ebel 1 4 5.568345 5.568345 2756 +mcmurchi 1 4 5.568345 5.568345 2757 +murphi 1 4 5.568345 5.568345 2737 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +micron 1 3 5.857933 5.857933 3341 +hine 1 2 6.263398 6.263398 4475 +guru 1 2 6.263398 6.263398 4476 +comprehensivelist 1 2 6.263398 6.263398 4439 +icmanufactur 1 2 6.263398 6.263398 4440 +hineskj 1 1 6.957497 6.957497 8144 +pamett 1 1 6.957497 6.957497 8145 +groupsfin 1 1 6.957497 6.957497 8146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..c6808e1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +topic 1 114 2.197225 2.197225 110 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +build 1 85 2.484907 2.484907 184 +messag 1 76 2.564949 2.564949 212 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +sieg 1 69 2.708050 2.708050 260 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +index 1 56 2.890372 2.890372 309 +reason 1 57 2.890372 2.890372 318 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +past 1 42 3.218876 3.218876 428 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +pagecs 1 26 3.688879 3.688879 658 +fundament 1 25 3.737670 3.737670 661 +methodolog 1 23 3.806662 3.806662 733 +outlin 1 17 4.110874 4.110874 914 +nick 1 13 4.382027 4.382027 1180 +pose 1 9 4.753590 4.753590 1535 +depth 1 8 4.875197 4.875197 1636 +marc 1 8 4.875197 4.875197 1680 +uncertainti 1 7 5.010635 5.010635 1882 +machinelearn 1 6 5.164786 5.164786 2084 +anin 1 3 5.857933 5.857933 3354 +assignmentsassign 1 3 5.857933 5.857933 3342 +mailinglist 1 3 5.857933 5.857933 3325 +intelligencefal 1 2 6.263398 6.263398 4477 +andchalleng 1 2 6.263398 6.263398 4478 +intelligentmachin 1 2 6.263398 6.263398 4479 +agentarchitectur 1 2 6.263398 6.263398 4480 +weldweld 1 2 6.263398 6.263398 4481 +friedmanfriedman 1 2 6.263398 6.263398 4482 +kushmericknick 1 2 6.263398 6.263398 4483 +examsgradingresourcesth 1 2 6.263398 6.263398 4484 +topicsread 1 1 6.957497 6.957497 8147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..620beec5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +postscript 1 131 2.079442 2.079442 90 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +graphic 1 90 2.397895 2.397895 147 +member 1 84 2.484907 2.484907 165 +requir 1 81 2.484907 2.484907 167 +librari 1 87 2.484907 2.484907 181 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +artifici 1 63 2.772589 2.772589 280 +collect 1 65 2.772589 2.772589 268 +foundat 1 62 2.772589 2.772589 286 +copi 1 63 2.772589 2.772589 284 +written 1 63 2.772589 2.772589 278 +juli 1 60 2.833213 2.833213 305 +reason 1 57 2.890372 2.890372 318 +sever 1 56 2.890372 2.890372 322 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +around 1 43 3.178054 3.178054 415 +edit 1 42 3.218876 3.218876 418 +probabl 1 40 3.258097 3.258097 455 +winter 1 36 3.367296 3.367296 500 +statist 1 35 3.401197 3.401197 521 +go 1 33 3.433987 3.433987 529 +chapter 1 32 3.465736 3.465736 536 +steve 1 29 3.583519 3.583519 594 +though 1 27 3.637586 3.637586 622 +request 1 26 3.688879 3.688879 635 +decis 1 23 3.806662 3.806662 728 +nice 1 20 3.951244 3.951244 809 +expert 1 20 3.951244 3.951244 833 +definit 1 19 4.007333 4.007333 864 +goe 1 15 4.248495 4.248495 1044 +signific 1 13 4.382027 4.382027 1125 +hank 1 12 4.465908 4.465908 1253 +probabilist 1 11 4.553877 4.553877 1343 +extrem 1 11 4.553877 4.553877 1330 +worth 1 11 4.553877 4.553877 1294 +perspect 1 10 4.653960 4.653960 1437 +uncertainti 1 7 5.010635 5.010635 1882 +whenev 1 7 5.010635 5.010635 1883 +heavi 1 7 5.010635 5.010635 1841 +secondari 1 7 5.010635 5.010635 1884 +histor 1 6 5.164786 5.164786 2085 +arrang 1 6 5.164786 5.164786 2023 +overlap 1 5 5.347108 5.347108 2368 +uncertain 1 4 5.568345 5.568345 2758 +cash 1 3 5.857933 5.857933 3355 +grail 1 3 5.857933 5.857933 3356 +alon 1 3 5.857933 5.857933 3139 +pearl 1 2 6.263398 6.263398 4485 +bui 1 2 6.263398 6.263398 4486 +algorithmsa 1 2 6.263398 6.263398 4487 +systemsthi 1 1 6.957497 6.957497 8148 +strappedfor 1 1 6.957497 6.957497 8149 +shafer 1 1 6.957497 6.957497 8150 +reasoningthi 1 1 6.957497 6.957497 8151 +jayn 1 1 6.957497 6.957497 8152 +fragmentari 1 1 6.957497 6.957497 8153 +foundationsof 1 1 6.957497 6.957497 8154 +beautifulli 1 1 6.957497 6.957497 8155 +neapolitan 1 1 6.957497 6.957497 8156 +propagationalgorithm 1 1 6.957497 6.957497 8157 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..6835318b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,272 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +pleas 1 113 2.197225 2.197225 114 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +imag 1 91 2.397895 2.397895 161 +follow 1 92 2.397895 2.397895 143 +proceed 1 93 2.397895 2.397895 152 +select 1 91 2.397895 2.397895 154 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +environ 1 84 2.484907 2.484907 177 +educ 1 86 2.484907 2.484907 191 +level 1 87 2.484907 2.484907 180 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +start 1 83 2.484907 2.484907 173 +resourc 1 81 2.484907 2.484907 172 +help 1 83 2.484907 2.484907 175 +messag 1 76 2.564949 2.564949 212 +april 1 77 2.564949 2.564949 196 +mondai 1 77 2.564949 2.564949 206 +june 1 79 2.564949 2.564949 214 +know 1 80 2.564949 2.564949 198 +materi 1 75 2.639057 2.639057 221 +meet 1 72 2.639057 2.639057 229 +name 1 72 2.639057 2.639057 220 +onlin 1 75 2.639057 2.639057 223 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +sieg 1 69 2.708050 2.708050 260 +wednesdai 1 64 2.772589 2.772589 261 +experi 1 64 2.772589 2.772589 283 +plan 1 65 2.772589 2.772589 272 +copi 1 63 2.772589 2.772589 284 +import 1 65 2.772589 2.772589 282 +laboratori 1 63 2.772589 2.772589 292 +room 1 59 2.833213 2.833213 301 +march 1 61 2.833213 2.833213 295 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +local 1 55 2.944439 2.944439 334 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +undergradu 1 54 2.944439 2.944439 338 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +frequent 1 49 3.044522 3.044522 367 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +midterm 1 45 3.135494 3.135494 392 +fridai 1 44 3.135494 3.135494 390 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +review 1 42 3.218876 3.218876 425 +vision 1 41 3.218876 3.218876 430 +tutori 1 39 3.258097 3.258097 437 +announc 1 40 3.258097 3.258097 441 +form 1 39 3.258097 3.258097 443 +correct 1 38 3.295837 3.295837 462 +slide 1 38 3.295837 3.295837 467 +workstat 1 37 3.332205 3.332205 479 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +ofth 1 36 3.367296 3.367296 491 +next 1 34 3.401197 3.401197 517 +least 1 35 3.401197 3.401197 516 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +articl 1 33 3.433987 3.433987 530 +ad 1 32 3.465736 3.465736 544 +chapter 1 32 3.465736 3.465736 536 +transform 1 32 3.465736 3.465736 542 +turn 1 29 3.583519 3.583519 586 +univ 1 28 3.610918 3.610918 617 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +determin 1 27 3.637586 3.637586 630 +pagecs 1 26 3.688879 3.688879 658 +effort 1 26 3.688879 3.688879 652 +notic 1 25 3.737670 3.737670 675 +pattern 1 24 3.761200 3.761200 689 +store 1 24 3.761200 3.761200 693 +displai 1 23 3.806662 3.806662 712 +variabl 1 23 3.806662 3.806662 715 +recognit 1 23 3.806662 3.806662 723 +instal 1 22 3.850148 3.850148 754 +period 1 22 3.850148 3.850148 743 +path 1 21 3.912023 3.912023 778 +navig 1 21 3.912023 3.912023 796 +exercis 1 19 4.007333 4.007333 842 +comparison 1 19 4.007333 4.007333 863 +appropri 1 18 4.060443 4.060443 883 +statu 1 18 4.060443 4.060443 885 +account 1 18 4.060443 4.060443 882 +outlin 1 17 4.110874 4.110874 914 +regular 1 17 4.110874 4.110874 929 +intro 1 17 4.110874 4.110874 915 +georg 1 16 4.174387 4.174387 994 +spatial 1 16 4.174387 4.174387 988 +fourth 1 16 4.174387 4.174387 999 +permit 1 16 4.174387 4.174387 962 +sign 1 16 4.174387 4.174387 970 +alreadi 1 16 4.174387 4.174387 963 +earli 1 16 4.174387 4.174387 968 +overhead 1 15 4.248495 4.248495 1035 +floor 1 14 4.317488 4.317488 1070 +resolut 1 13 4.382027 4.382027 1172 +introduc 1 13 4.382027 4.382027 1139 +care 1 13 4.382027 4.382027 1177 +menu 1 13 4.382027 4.382027 1156 +hypermedia 1 12 4.465908 4.465908 1247 +noth 1 11 4.553877 4.553877 1328 +transpar 1 11 4.553877 4.553877 1325 +fill 1 11 4.553877 4.553877 1349 +thedepart 1 11 4.553877 4.553877 1350 +sundai 1 10 4.653960 4.653960 1387 +packet 1 10 4.653960 4.653960 1415 +prior 1 10 4.653960 4.653960 1438 +card 1 10 4.653960 4.653960 1435 +mosaic 1 10 4.653960 4.653960 1426 +login 1 9 4.753590 4.753590 1550 +classifi 1 9 4.753590 4.753590 1537 +pick 1 9 4.753590 4.753590 1498 +lock 1 9 4.753590 4.753590 1551 +ideal 1 8 4.875197 4.875197 1630 +evan 1 8 4.875197 4.875197 1633 +documentfor 1 7 5.010635 5.010635 1865 +remind 1 7 5.010635 5.010635 1799 +theclass 1 6 5.164786 5.164786 2060 +mock 1 6 5.164786 5.164786 2087 +otherthan 1 6 5.164786 5.164786 2009 +conveni 1 6 5.164786 5.164786 2088 +onto 1 6 5.164786 5.164786 2089 +approv 1 6 5.164786 5.164786 2078 +temporari 1 6 5.164786 5.164786 2090 +contract 1 6 5.164786 5.164786 1985 +pentium 1 6 5.164786 5.164786 2077 +reed 1 6 5.164786 5.164786 2086 +arrang 1 6 5.164786 5.164786 2023 +newinform 1 5 5.347108 5.347108 2342 +subjectto 1 5 5.347108 5.347108 2369 +ahead 1 5 5.347108 5.347108 2338 +cshrc 1 4 5.568345 5.568345 2759 +assignmentsand 1 4 5.568345 5.568345 2760 +cvpr 1 4 5.568345 5.568345 2761 +net 1 4 5.568345 5.568345 2741 +accompani 1 4 5.568345 5.568345 2666 +password 1 4 5.568345 5.568345 2594 +kept 1 4 5.568345 5.568345 2762 +insieg 1 3 5.857933 5.857933 3331 +weekend 1 3 5.857933 5.857933 3357 +khoro 1 2 6.263398 6.263398 4488 +cantata 1 2 6.263398 6.263398 4489 +sun 1 2 6.263398 6.263398 4490 +setenv 1 2 6.263398 6.263398 4491 +pmin 1 2 6.263398 6.263398 4492 +combinationof 1 2 6.263398 6.263398 4081 +includingth 1 2 6.263398 6.263398 4493 +onthursdai 1 2 6.263398 6.263398 4425 +itemsund 1 2 6.263398 6.263398 4387 +balloon 1 2 6.263398 6.263398 4388 +understandingwelcom 1 1 6.957497 6.957497 8161 +doexercis 1 1 6.957497 6.957497 8162 +torun 1 1 6.957497 6.957497 8163 +aslillith 1 1 6.957497 6.957497 8164 +containxhost 1 1 6.957497 6.957497 8165 +lilliththen 1 1 6.957497 6.957497 8166 +khoros_hom 1 1 6.957497 6.957497 8158 +manpath 1 1 6.957497 6.957497 8167 +rlogin 1 1 6.957497 6.957497 8168 +lillith 1 1 6.957497 6.957497 8169 +rhost 1 1 6.957497 6.957497 8170 +typecantata 1 1 6.957497 6.957497 8171 +prompt 1 1 6.957497 6.957497 8172 +haskhoro 1 1 6.957497 6.957497 8173 +wwwhttp 1 1 6.957497 6.957497 8174 +htmland 1 1 6.957497 6.957497 8175 +itscours 1 1 6.957497 6.957497 8176 +twotop 1 1 6.957497 6.957497 8177 +pagesand 1 1 6.957497 6.957497 8178 +huerta 1 1 6.957497 6.957497 8179 +andnevatia 1 1 6.957497 6.957497 8180 +tolook 1 1 6.957497 6.957497 8181 +wolff 1 1 6.957497 6.957497 8182 +msvc 1 1 6.957497 6.957497 8159 +onneur 1 1 6.957497 6.957497 8183 +trainabl 1 1 6.957497 6.957497 8184 +ofmatlab 1 1 6.957497 6.957497 8185 +requirethat 1 1 6.957497 6.957497 8186 +mclain 1 1 6.957497 6.957497 8187 +documentexplain 1 1 6.957497 6.957497 8188 +withkhoro 1 1 6.957497 6.957497 8189 +accesskhoro 1 1 6.957497 6.957497 8190 +rene 1 1 6.957497 6.957497 8160 +youraccount 1 1 6.957497 6.957497 8191 +itov 1 1 6.957497 6.957497 8192 +arelimit 1 1 6.957497 6.957497 8193 +andsh 1 1 6.957497 6.957497 8194 +knock 1 1 6.957497 6.957497 8195 +orhav 1 1 6.957497 6.957497 8196 +willhav 1 1 6.957497 6.957497 8197 +delft 1 1 6.957497 6.957497 8198 +brochur 1 1 6.957497 6.957497 8199 +brochuremosa 1 1 6.957497 6.957497 8200 +macmosa 1 1 6.957497 6.957497 8201 +itemund 1 1 6.957497 6.957497 8202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..7cb6aa39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +world 1 115 2.197225 2.197225 126 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +site 1 106 2.197225 2.197225 119 +final 1 116 2.197225 2.197225 108 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +imag 1 91 2.397895 2.397895 161 +octob 1 89 2.397895 2.397895 156 +present 1 91 2.397895 2.397895 145 +center 1 88 2.397895 2.397895 158 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +novemb 1 81 2.484907 2.484907 179 +start 1 83 2.484907 2.484907 173 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +resourc 1 81 2.484907 2.484907 172 +exam 1 86 2.484907 2.484907 169 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +orient 1 80 2.564949 2.564949 205 +decemb 1 80 2.564949 2.564949 215 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +thursdai 1 70 2.708050 2.708050 241 +view 1 70 2.708050 2.708050 254 +sieg 1 69 2.708050 2.708050 260 +prof 1 64 2.772589 2.772589 273 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +import 1 65 2.772589 2.772589 282 +wednesdai 1 64 2.772589 2.772589 261 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +found 1 53 2.944439 2.944439 337 +digit 1 52 2.995732 2.995732 348 +week 1 52 2.995732 2.995732 343 +approach 1 48 3.044522 3.044522 366 +keep 1 44 3.135494 3.135494 409 +discuss 1 45 3.135494 3.135494 399 +made 1 44 3.135494 3.135494 398 +fridai 1 44 3.135494 3.135494 390 +term 1 43 3.178054 3.178054 411 +review 1 42 3.218876 3.218876 425 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +hand 1 37 3.332205 3.332205 475 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +ofth 1 36 3.367296 3.367296 491 +ad 1 32 3.465736 3.465736 544 +titl 1 31 3.496508 3.496508 556 +neural 1 30 3.555348 3.555348 578 +scale 1 28 3.610918 3.610918 613 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +symbol 1 27 3.637586 3.637586 620 +arrai 1 27 3.637586 3.637586 627 +pagecs 1 26 3.688879 3.688879 658 +session 1 26 3.688879 3.688879 643 +supercomput 1 25 3.737670 3.737670 681 +notic 1 25 3.737670 3.737670 675 +begin 1 23 3.806662 3.806662 716 +recognit 1 23 3.806662 3.806662 723 +dai 1 22 3.850148 3.850148 753 +instal 1 22 3.850148 3.850148 754 +half 1 21 3.912023 3.912023 776 +theunivers 1 21 3.912023 3.912023 797 +demo 1 18 4.060443 4.060443 888 +segment 1 17 4.110874 4.110874 931 +normal 1 16 4.174387 4.174387 995 +brief 1 16 4.174387 4.174387 1001 +permit 1 16 4.174387 4.174387 962 +intel 1 16 4.174387 4.174387 1000 +hong 1 14 4.317488 4.317488 1105 +embed 1 14 4.317488 4.317488 1102 +heterogen 1 14 4.317488 4.317488 1090 +canada 1 13 4.382027 4.382027 1158 +hypermedia 1 12 4.465908 4.465908 1247 +guest 1 12 4.465908 4.465908 1220 +onth 1 12 4.465908 4.465908 1218 +mesh 1 11 4.553877 4.553877 1351 +simon 1 8 4.875197 4.875197 1697 +documentfor 1 7 5.010635 5.010635 1865 +sweden 1 7 5.010635 5.010635 1885 +friedman 1 7 5.010635 5.010635 1886 +theclass 1 6 5.164786 5.164786 2060 +conveni 1 6 5.164786 5.164786 2088 +otherthan 1 6 5.164786 5.164786 2009 +newinform 1 5 5.347108 5.347108 2342 +speaker 1 5 5.347108 5.347108 2370 +templat 1 5 5.347108 5.347108 2311 +subjectto 1 5 5.347108 5.347108 2369 +tennesse 1 4 5.568345 5.568345 2763 +simd 1 3 5.857933 5.857933 3360 +mimd 1 3 5.857933 5.857933 3361 +pyramid 1 3 5.857933 5.857933 3358 +icon 1 3 5.857933 5.857933 3362 +paragon 1 3 5.857933 5.857933 3359 +neal 1 3 5.857933 5.857933 3184 +informationon 1 2 6.263398 6.263398 4232 +burt 1 2 6.263398 6.263398 4494 +rosenfeld 1 2 6.263398 6.263398 4495 +inon 1 2 6.263398 6.263398 4496 +maspar 1 2 6.263398 6.263398 4279 +processingwelcom 1 1 6.957497 6.957497 8203 +hourearli 1 1 6.957497 6.957497 8204 +nian 1 1 6.957497 6.957497 8205 +fraser 1 1 6.957497 6.957497 8206 +burnabi 1 1 6.957497 6.957497 8207 +bharath 1 1 6.957497 6.957497 8208 +modayur 1 1 6.957497 6.957497 8209 +invariantoper 1 1 6.957497 6.957497 8210 +hierarchicalrelax 1 1 6.957497 6.957497 8211 +isodata 1 1 6.957497 6.957497 8212 +treatment 1 1 6.957497 6.957497 8213 +topicsdur 1 1 6.957497 6.957497 8214 +activelyexplor 1 1 6.957497 6.957497 8215 +writeupsi 1 1 6.957497 6.957497 8216 +resourcespvm 1 1 6.957497 6.957497 8217 +virtualmachin 1 1 6.957497 6.957497 8218 +layear 1 1 6.957497 6.957497 8219 +aviru 1 1 6.957497 6.957497 8220 +moreworkst 1 1 6.957497 6.957497 8221 +studydistribut 1 1 6.957497 6.957497 8222 +technicalpubl 1 1 6.957497 6.957497 8223 +paragonparallel 1 1 6.957497 6.957497 8224 +variousvendor 1 1 6.957497 6.957497 8225 +correctionsto 1 1 6.957497 6.957497 8226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..25ae43b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +find 1 111 2.197225 2.197225 111 +graphic 1 90 2.397895 2.397895 147 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +optim 1 79 2.564949 2.564949 197 +exampl 1 77 2.564949 2.564949 195 +solv 1 73 2.639057 2.639057 234 +differ 1 66 2.708050 2.708050 253 +numer 1 49 3.044522 3.044522 369 +linear 1 41 3.218876 3.218876 431 +global 1 34 3.401197 3.401197 520 +valu 1 25 3.737670 3.737670 665 +mike 1 24 3.761200 3.761200 703 +equat 1 23 3.806662 3.806662 724 +properti 1 22 3.850148 3.850148 749 +definit 1 19 4.007333 4.007333 864 +eric 1 19 4.007333 4.007333 870 +element 1 18 4.060443 4.060443 895 +matrix 1 17 4.110874 4.110874 933 +intro 1 17 4.110874 4.110874 915 +differenti 1 17 4.110874 4.110874 921 +adam 1 17 4.110874 4.110874 934 +nonlinear 1 14 4.317488 4.317488 1107 +chuck 1 14 4.317488 4.317488 1108 +finit 1 14 4.317488 4.317488 1106 +discret 1 13 4.382027 4.382027 1165 +jonathan 1 13 4.382027 4.382027 1174 +brad 1 12 4.465908 4.465908 1264 +daniel 1 12 4.465908 4.465908 1233 +decomposit 1 10 4.653960 4.653960 1439 +arithmet 1 10 4.653960 4.653960 1388 +kevin 1 9 4.753590 4.753590 1482 +joel 1 8 4.875197 4.875197 1698 +root 1 8 4.875197 4.875197 1650 +constrain 1 6 5.164786 5.164786 2042 +fred 1 6 5.164786 5.164786 2072 +fit 1 5 5.347108 5.347108 2285 +invers 1 4 5.568345 5.568345 2764 +corei 1 4 5.568345 5.568345 2718 +eigenvalu 1 3 5.857933 5.857933 3364 +eigenvector 1 3 5.857933 5.857933 3365 +singular 1 3 5.857933 5.857933 3366 +conclus 1 3 5.857933 5.857933 3367 +ordinari 1 3 5.857933 5.857933 3233 +interv 1 3 5.857933 5.857933 3253 +shuichi 1 2 6.263398 6.263398 4498 +unconstrain 1 2 6.263398 6.263398 4499 +kari 1 2 6.263398 6.263398 4500 +quadrat 1 2 6.263398 6.263398 4497 +regress 1 2 6.263398 6.263398 4501 +calibr 1 2 6.263398 6.263398 4502 +joanna 1 2 6.263398 6.263398 4503 +radios 1 2 6.263398 6.263398 4504 +pde 1 2 6.263398 6.263398 4505 +seminarc 1 1 6.957497 6.957497 8228 +rspring 1 1 6.957497 6.957497 8229 +ronen 1 1 6.957497 6.957497 8230 +troi 1 1 6.957497 6.957497 8231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..590c5cdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +materi 1 75 2.639057 2.639057 221 +view 1 70 2.708050 2.708050 254 +experi 1 64 2.772589 2.772589 283 +septemb 1 65 2.772589 2.772589 274 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +quarter 1 47 3.091042 3.091042 389 +seminar 1 38 3.295837 3.295837 470 +winter 1 36 3.367296 3.367296 500 +copyright 1 36 3.367296 3.367296 495 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +notic 1 25 3.737670 3.737670 675 +instal 1 22 3.850148 3.850148 754 +varieti 1 22 3.850148 3.850148 740 +tanimoto 1 10 4.653960 4.653960 1429 +transcript 1 6 5.164786 5.164786 2067 +otherthan 1 6 5.164786 5.164786 2009 +subjectto 1 5 5.347108 5.347108 2369 +useof 1 3 5.857933 5.857933 3368 +quarterscs 1 1 6.957497 6.957497 8232 +topicssteven 1 1 6.957497 6.957497 8233 +instructorcs 1 1 6.957497 6.957497 8234 +varyfrom 1 1 6.957497 6.957497 8235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..b1dd96c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +document 1 121 2.079442 2.079442 89 +technolog 1 131 2.079442 2.079442 102 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +technic 1 100 2.302585 2.302585 140 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +octob 1 89 2.397895 2.397895 156 +present 1 91 2.397895 2.397895 145 +proceed 1 93 2.397895 2.397895 152 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +environ 1 84 2.484907 2.484907 177 +novemb 1 81 2.484907 2.484907 179 +second 1 81 2.484907 2.484907 166 +internet 1 83 2.484907 2.484907 186 +state 1 76 2.564949 2.564949 207 +materi 1 75 2.639057 2.639057 221 +intellig 1 72 2.639057 2.639057 225 +meet 1 72 2.639057 2.639057 229 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +onlin 1 75 2.639057 2.639057 223 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +degre 1 69 2.708050 2.708050 259 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +share 1 59 2.833213 2.833213 304 +possibl 1 47 3.091042 3.091042 378 +keep 1 44 3.135494 3.135494 409 +discuss 1 45 3.135494 3.135494 399 +describ 1 45 3.135494 3.135494 400 +netscap 1 44 3.135494 3.135494 395 +protocol 1 45 3.135494 3.135494 407 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +combin 1 42 3.218876 3.218876 421 +vision 1 41 3.218876 3.218876 430 +futur 1 41 3.218876 3.218876 427 +map 1 39 3.258097 3.258097 452 +littl 1 39 3.258097 3.258097 454 +tutori 1 39 3.258097 3.258097 437 +paul 1 38 3.295837 3.295837 471 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +either 1 35 3.401197 3.401197 506 +michael 1 35 3.401197 3.401197 514 +john 1 33 3.433987 3.433987 532 +ad 1 32 3.465736 3.465736 544 +concept 1 32 3.465736 3.465736 537 +autumn 1 31 3.496508 3.496508 558 +someth 1 31 3.496508 3.496508 554 +option 1 30 3.555348 3.555348 575 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +enhanc 1 26 3.688879 3.688879 644 +notic 1 25 3.737670 3.737670 675 +brows 1 23 3.806662 3.806662 726 +instal 1 22 3.850148 3.850148 754 +geometri 1 22 3.850148 3.850148 752 +annot 1 21 3.912023 3.912023 775 +navig 1 21 3.912023 3.912023 796 +toolkit 1 20 3.951244 3.951244 835 +anderson 1 19 4.007333 4.007333 860 +particularli 1 19 4.007333 4.007333 867 +lower 1 18 4.060443 4.060443 886 +layer 1 17 4.110874 4.110874 926 +adam 1 17 4.110874 4.110874 934 +choic 1 16 4.174387 4.174387 979 +piec 1 15 4.248495 4.248495 1020 +achiev 1 14 4.317488 4.317488 1088 +hong 1 14 4.317488 4.317488 1105 +central 1 13 4.382027 4.382027 1160 +hypermedia 1 12 4.465908 4.465908 1247 +promot 1 12 4.465908 4.465908 1235 +infrastructur 1 12 4.465908 4.465908 1234 +noth 1 11 4.553877 4.553877 1328 +smart 1 11 4.553877 4.553877 1352 +baer 1 11 4.553877 4.553877 1353 +mosaic 1 10 4.653960 4.653960 1426 +tutor 1 9 4.753590 4.753590 1552 +documentfor 1 7 5.010635 5.010635 1865 +davi 1 7 5.010635 5.010635 1888 +beyond 1 7 5.010635 5.010635 1834 +baker 1 7 5.010635 5.010635 1812 +transcript 1 6 5.164786 5.164786 2067 +theclass 1 6 5.164786 5.164786 2060 +otherthan 1 6 5.164786 5.164786 2009 +plu 1 6 5.164786 5.164786 2004 +trail 1 6 5.164786 5.164786 2071 +newinform 1 5 5.347108 5.347108 2342 +subjectto 1 5 5.347108 5.347108 2369 +barton 1 5 5.347108 5.347108 2371 +jeremi 1 5 5.347108 5.347108 2360 +carlson 1 5 5.347108 5.347108 2351 +sandi 1 4 5.568345 5.568345 2765 +increasingli 1 4 5.568345 5.568345 2766 +ncsa 1 4 5.568345 5.568345 2767 +tessa 1 2 6.263398 6.263398 4507 +learner 1 2 6.263398 6.263398 4508 +uiuc 1 2 6.263398 6.263398 4509 +ward 1 2 6.263398 6.263398 4506 +marla 1 2 6.263398 6.263398 4510 +soap 1 2 6.263398 6.263398 4511 +wwwwelcom 1 1 6.957497 6.957497 8238 +mccalla 1 1 6.957497 6.957497 8239 +importanceof 1 1 6.957497 6.957497 8240 +youngquist 1 1 6.957497 6.957497 8241 +aboutinternet 1 1 6.957497 6.957497 8242 +labord 1 1 6.957497 6.957497 8237 +microworld 1 1 6.957497 6.957497 8243 +tointellig 1 1 6.957497 6.957497 8244 +bartel 1 1 6.957497 6.957497 8245 +mathematicsconnect 1 1 6.957497 6.957497 8246 +gari 1 1 6.957497 6.957497 8247 +ambiti 1 1 6.957497 6.957497 8248 +thethem 1 1 6.957497 6.957497 8249 +moresophist 1 1 6.957497 6.957497 8250 +elabor 1 1 6.957497 6.957497 8251 +ofwww 1 1 6.957497 6.957497 8252 +intechn 1 1 6.957497 6.957497 8253 +couldmak 1 1 6.957497 6.957497 8254 +applicationsthat 1 1 6.957497 6.957497 8255 +webhttp 1 1 6.957497 6.957497 8256 +presentor 1 1 6.957497 6.957497 8236 +empow 1 1 6.957497 6.957497 8257 +agehttp 1 1 6.957497 6.957497 8258 +communitieshttp 1 1 6.957497 6.957497 8259 +dietz 1 1 6.957497 6.957497 8260 +serviceshttp 1 1 6.957497 6.957497 8261 +dcewebkit 1 1 6.957497 6.957497 8262 +zhumeet 1 1 6.957497 6.957497 8263 +aboutcurriculum 1 1 6.957497 6.957497 8264 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..1e5e51b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +site 1 106 2.197225 2.197225 119 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +learn 1 86 2.484907 2.484907 170 +school 1 84 2.484907 2.484907 188 +come 1 78 2.564949 2.564949 202 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +view 1 70 2.708050 2.708050 254 +descript 1 64 2.772589 2.772589 271 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +explor 1 58 2.890372 2.890372 324 +cover 1 55 2.944439 2.944439 329 +visual 1 48 3.044522 3.044522 372 +possibl 1 47 3.091042 3.091042 378 +move 1 47 3.091042 3.091042 382 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +seminar 1 38 3.295837 3.295837 470 +respons 1 37 3.332205 3.332205 476 +copyright 1 36 3.367296 3.367296 495 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +particip 1 29 3.583519 3.583519 589 +depend 1 29 3.583519 3.583519 583 +notic 1 25 3.737670 3.737670 675 +wai 1 25 3.737670 3.737670 662 +togeth 1 23 3.806662 3.806662 714 +instal 1 22 3.850148 3.850148 754 +decid 1 14 4.317488 4.317488 1075 +context 1 13 4.382027 4.382027 1153 +subset 1 10 4.653960 4.653960 1425 +tanimoto 1 10 4.653960 4.653960 1429 +otherthan 1 6 5.164786 5.164786 2009 +subjectto 1 5 5.347108 5.347108 2369 +middl 1 5 5.347108 5.347108 2372 +evid 1 4 5.568345 5.568345 2768 +innew 1 2 6.263398 6.263398 4512 +learningwelcom 1 1 6.957497 6.957497 8265 +methodologiesfor 1 1 6.957497 6.957497 8266 +forcollabor 1 1 6.957497 6.957497 8267 +willtak 1 1 6.957497 6.957497 8268 +ofthes 1 1 6.957497 6.957497 8269 +ofai 1 1 6.957497 6.957497 8270 +ofstud 1 1 6.957497 6.957497 8271 +intopeopl 1 1 6.957497 6.957497 8272 +meani 1 1 6.957497 6.957497 8273 +schoolmai 1 1 6.957497 6.957497 8274 +participatingstud 1 1 6.957497 6.957497 8275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..83f5f5ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +follow 1 92 2.397895 2.397895 143 +octob 1 89 2.397895 2.397895 156 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +requir 1 81 2.484907 2.484907 167 +activ 1 84 2.484907 2.484907 182 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +appear 1 78 2.564949 2.564949 210 +workshop 1 71 2.639057 2.639057 239 +interact 1 62 2.772589 2.772589 270 +polici 1 64 2.772589 2.772589 279 +result 1 65 2.772589 2.772589 281 +share 1 59 2.833213 2.833213 304 +processor 1 54 2.944439 2.944439 335 +scientif 1 53 2.944439 2.944439 341 +cover 1 55 2.944439 2.944439 329 +principl 1 48 3.044522 3.044522 357 +set 1 50 3.044522 3.044522 361 +adapt 1 46 3.091042 3.091042 387 +discuss 1 45 3.135494 3.135494 399 +tutori 1 39 3.258097 3.258097 437 +transact 1 39 3.258097 3.258097 438 +theoret 1 39 3.258097 3.258097 446 +open 1 38 3.295837 3.295837 469 +workstat 1 37 3.332205 3.332205 479 +ofth 1 36 3.367296 3.367296 491 +global 1 34 3.401197 3.401197 520 +concurr 1 34 3.401197 3.401197 501 +product 1 33 3.433987 3.433987 527 +richard 1 31 3.496508 3.496508 559 +multiprocessor 1 28 3.610918 3.610918 605 +measur 1 28 3.610918 3.610918 609 +univ 1 28 3.610918 3.610918 617 +proc 1 26 3.688879 3.688879 649 +supercomput 1 25 3.737670 3.737670 681 +strategi 1 25 3.737670 3.737670 682 +ofwashington 1 22 3.850148 3.850148 766 +william 1 22 3.850148 3.850148 765 +programminglanguag 1 21 3.912023 3.912023 782 +alloc 1 20 3.951244 3.951244 821 +smith 1 20 3.951244 3.951244 820 +anderson 1 19 4.007333 4.007333 860 +runtim 1 19 4.007333 4.007333 858 +eric 1 19 4.007333 4.007333 870 +thoma 1 18 4.060443 4.060443 901 +scott 1 18 4.060443 4.060443 884 +expand 1 17 4.110874 4.110874 928 +miller 1 17 4.110874 4.110874 949 +asplo 1 17 4.110874 4.110874 948 +partit 1 16 4.174387 4.174387 984 +jose 1 16 4.174387 4.174387 976 +demand 1 14 4.317488 4.317488 1073 +conf 1 13 4.382027 4.382027 1181 +sigmetr 1 13 4.382027 4.382027 1173 +coordin 1 13 4.382027 4.382027 1182 +karlin 1 13 4.382027 4.382027 1176 +mari 1 12 4.465908 4.465908 1266 +workload 1 12 4.465908 4.465908 1210 +gupta 1 12 4.465908 4.465908 1241 +kenneth 1 12 4.465908 4.465908 1265 +characterist 1 12 4.465908 4.465908 1257 +philadelphia 1 12 4.465908 4.465908 1244 +bill 1 11 4.553877 4.553877 1297 +impact 1 11 4.553877 4.553877 1334 +wood 1 11 4.553877 4.553877 1355 +santa 1 10 4.653960 4.653960 1441 +ofcomput 1 10 4.653960 4.653960 1442 +patterson 1 9 4.753590 4.753590 1554 +vernon 1 9 4.753590 4.753590 1556 +job 1 8 4.875197 4.875197 1702 +migrat 1 7 5.010635 5.010635 1851 +burger 1 7 5.010635 5.010635 1889 +multiprogram 1 6 5.164786 5.164786 2010 +chandra 1 6 5.164786 5.164786 2091 +ousterhout 1 5 5.347108 5.347108 2301 +symp 1 5 5.347108 5.347108 2376 +leblanc 1 5 5.347108 5.347108 2377 +affin 1 5 5.347108 5.347108 2378 +parallelprogram 1 5 5.347108 5.347108 2379 +ofparallel 1 5 5.347108 5.347108 2380 +culler 1 5 5.347108 5.347108 2381 +hyder 1 4 5.568345 5.568345 2772 +anoop 1 4 5.568345 5.568345 2770 +identif 1 4 5.568345 5.568345 2773 +dusseau 1 3 5.857933 5.857933 3382 +nguyen 1 3 5.857933 5.857933 3290 +zahorjan 1 3 5.857933 5.857933 3383 +tran 1 3 5.857933 5.857933 3384 +barbara 1 3 5.857933 5.857933 3380 +saltz 1 3 5.857933 5.857933 3385 +ipp 1 3 5.857933 5.857933 3381 +am 1 3 5.857933 5.857933 3386 +mccann 1 3 5.857933 5.857933 3273 +patrick 1 3 5.857933 5.857933 3334 +weihl 1 3 5.857933 5.857933 3284 +tradeoff 1 3 5.857933 5.857933 3387 +gang 1 2 6.263398 6.263398 4530 +inrd 1 2 6.263398 6.263398 4531 +andsequenti 1 2 6.263398 6.263398 4532 +tucker 1 2 6.263398 6.263398 4307 +shun 1 2 6.263398 6.263398 4533 +leung 1 2 6.263398 6.263398 4534 +han 1 2 6.263398 6.263398 4535 +agraw 1 2 6.263398 6.263398 4536 +derek 1 2 6.263398 6.263398 4537 +bunt 1 2 6.263398 6.263398 4308 +parson 1 2 6.263398 6.263398 4528 +memorymultiprocessor 1 2 6.263398 6.263398 4529 +rosenblum 1 2 6.263398 6.263398 4314 +tera 1 2 6.263398 6.263398 4224 +computersystem 1 2 6.263398 6.263398 4360 +mvmv 1 1 6.957497 6.957497 8350 +systemsprofessor 1 1 6.957497 6.957497 8351 +vernontim 1 1 6.957497 6.957497 8352 +pmlocat 1 1 6.957497 6.957497 8353 +now 1 1 6.957497 6.957497 8354 +arpaci 1 1 6.957497 6.957497 8345 +vahdat 1 1 6.957497 6.957497 8355 +equi 1 1 6.957497 6.957497 8356 +issuesfor 1 1 6.957497 6.957497 8357 +vaswani 1 1 6.957497 6.957497 8346 +workloadcharacterist 1 1 6.957497 6.957497 8358 +evangelo 1 1 6.957497 6.957497 8359 +markato 1 1 6.957497 6.957497 8360 +loopschedul 1 1 6.957497 6.957497 8361 +iniee 1 1 6.957497 6.957497 8362 +zima 1 1 6.957497 6.957497 8363 +chapman 1 1 6.957497 6.957497 8364 +edjlali 1 1 6.957497 6.957497 8365 +sussman 1 1 6.957497 6.957497 8366 +comparisonsshikharesh 1 1 6.957497 6.957497 8367 +majumdar 1 1 6.957497 6.957497 8368 +eager 1 1 6.957497 6.957497 8369 +sevcik 1 1 6.957497 6.957497 8347 +variabilityservic 1 1 6.957497 6.957497 8370 +dror 1 1 6.957497 6.957497 8371 +feitelson 1 1 6.957497 6.957497 8348 +nitzberg 1 1 6.957497 6.957497 8372 +thenasa 1 1 6.957497 6.957497 8373 +ipsc 1 1 6.957497 6.957497 8374 +leutenegg 1 1 6.957497 6.957497 8375 +sobalvarro 1 1 6.957497 6.957497 8376 +coschedul 1 1 6.957497 6.957497 8349 +rohit 1 1 6.957497 6.957497 8377 +devin 1 1 6.957497 6.957497 8378 +verghes 1 1 6.957497 6.957497 8379 +mendel 1 1 6.957497 6.957497 8380 +multiprocessorcomput 1 1 6.957497 6.957497 8381 +alverson 1 1 6.957497 6.957497 8382 +kahan 1 1 6.957497 6.957497 8383 +korri 1 1 6.957497 6.957497 8384 +effectivedistribut 1 1 6.957497 6.957497 8385 +rudolph 1 1 6.957497 6.957497 8386 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..1fb91682 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +algorithm 1 162 1.791759 1.791759 57 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +format 1 48 3.044522 3.044522 356 +fast 1 42 3.218876 3.218876 429 +slide 1 38 3.295837 3.295837 467 +origin 1 38 3.295837 3.295837 472 +winter 1 36 3.367296 3.367296 500 +print 1 34 3.401197 3.401197 503 +richard 1 31 3.496508 3.496508 559 +titl 1 31 3.496508 3.496508 556 +usual 1 28 3.610918 3.610918 608 +load 1 28 3.610918 3.610918 601 +administr 1 27 3.637586 3.637586 628 +linux 1 27 3.637586 3.637586 631 +martin 1 21 3.912023 3.912023 794 +latest 1 21 3.912023 3.912023 785 +viewer 1 21 3.912023 3.912023 787 +render 1 17 4.110874 4.110874 947 +biologi 1 15 4.248495 4.248495 1049 +draft 1 14 4.317488 4.317488 1085 +latex 1 14 4.317488 4.317488 1064 +larri 1 13 4.382027 4.382027 1142 +translat 1 13 4.382027 4.382027 1164 +readabl 1 12 4.465908 4.465908 1258 +molecular 1 7 5.010635 5.010635 1887 +adob 1 7 5.010635 5.010635 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +legibl 1 7 5.010635 5.010635 1866 +acrobat 1 6 5.164786 5.164786 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +karp 1 5 5.347108 5.347108 2284 +ruzzo 1 5 5.347108 5.347108 2345 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +tompaclass 1 3 5.857933 5.857933 3310 +faith 1 3 5.857933 5.857933 3363 +bboard 1 1 6.957497 6.957497 8227 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..3391c641 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +welcom 1 122 2.079442 2.079442 99 +send 1 114 2.197225 2.197225 109 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +follow 1 92 2.397895 2.397895 143 +member 1 84 2.484907 2.484907 165 +start 1 83 2.484907 2.484907 173 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +tuesdai 1 73 2.639057 2.639057 219 +workshop 1 71 2.639057 2.639057 239 +summari 1 73 2.639057 2.639057 237 +meet 1 72 2.639057 2.639057 229 +line 1 75 2.639057 2.639057 231 +intellig 1 72 2.639057 2.639057 225 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +prof 1 64 2.772589 2.772589 273 +juli 1 60 2.833213 2.833213 305 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +week 1 52 2.995732 2.995732 343 +case 1 51 2.995732 2.995732 351 +format 1 48 3.044522 3.044522 356 +quarter 1 47 3.091042 3.091042 389 +discuss 1 45 3.135494 3.135494 399 +execut 1 45 3.135494 3.135494 404 +might 1 41 3.218876 3.218876 426 +continu 1 39 3.258097 3.258097 448 +author 1 39 3.258097 3.258097 450 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +formal 1 37 3.332205 3.332205 478 +ofth 1 36 3.367296 3.367296 491 +short 1 36 3.367296 3.367296 499 +either 1 35 3.401197 3.401197 506 +bibliographi 1 34 3.401197 3.401197 518 +jame 1 35 3.401197 3.401197 507 +posit 1 31 3.496508 3.496508 552 +progress 1 28 3.610918 3.610918 598 +usual 1 28 3.610918 3.610918 608 +load 1 28 3.610918 3.610918 601 +subject 1 26 3.688879 3.688879 647 +valu 1 25 3.737670 3.737670 665 +begin 1 23 3.806662 3.806662 716 +lead 1 23 3.806662 3.806662 718 +thank 1 23 3.806662 3.806662 721 +variabl 1 23 3.806662 3.806662 715 +chip 1 21 3.912023 3.912023 770 +leav 1 21 3.912023 3.912023 772 +anderson 1 19 4.007333 4.007333 860 +predict 1 19 4.007333 4.007333 855 +miss 1 19 4.007333 4.007333 866 +asplo 1 17 4.110874 4.110874 948 +brown 1 16 4.174387 4.174387 977 +micro 1 15 4.248495 4.248495 1031 +hopefulli 1 14 4.317488 4.317488 1071 +sai 1 13 4.382027 4.382027 1175 +rememb 1 12 4.465908 4.465908 1217 +literatur 1 11 4.553877 4.553877 1300 +isca 1 11 4.553877 4.553877 1354 +baer 1 11 4.553877 4.553877 1353 +jean 1 10 4.653960 4.653960 1440 +herefor 1 9 4.753590 4.753590 1483 +wall 1 9 4.753590 4.753590 1553 +patterson 1 9 4.753590 4.753590 1554 +subscrib 1 9 4.753590 4.753590 1541 +readm 1 8 4.875197 4.875197 1699 +ruth 1 7 5.010635 5.010635 1870 +burger 1 7 5.010635 5.010635 1889 +multicomput 1 7 5.010635 5.010635 1890 +goodman 1 7 5.010635 5.010635 1891 +fortun 1 7 5.010635 5.010635 1872 +tobe 1 6 5.164786 5.164786 1995 +ifyou 1 6 5.164786 5.164786 1992 +majordomo 1 6 5.164786 5.164786 2066 +philipos 1 5 5.347108 5.347108 2373 +appreci 1 5 5.347108 5.347108 2374 +volunt 1 5 5.347108 5.347108 2307 +andrea 1 5 5.347108 5.347108 2375 +lunch 1 3 5.857933 5.857933 3369 +shen 1 3 5.857933 5.857933 3370 +pong 1 3 5.857933 5.857933 3371 +stefano 1 3 5.857933 5.857933 3372 +kaxira 1 3 5.857933 5.857933 3373 +yelick 1 3 5.857933 5.857933 3374 +shortli 1 3 5.857933 5.857933 3375 +heat 1 2 6.263398 6.263398 4113 +gershoni 1 2 6.263398 6.263398 4513 +matthai 1 2 6.263398 6.263398 4514 +tabular 1 2 6.263398 6.263398 4515 +guru 1 2 6.263398 6.263398 4476 +wilkerson 1 2 6.263398 6.263398 4516 +dalli 1 2 6.263398 6.263398 4517 +datascalar 1 2 6.263398 6.263398 4518 +spsd 1 2 6.263398 6.263398 4519 +iram 1 2 6.263398 6.263398 4520 +lunchcs 1 1 6.957497 6.957497 8277 +lunchcours 1 1 6.957497 6.957497 8278 +loupbaermeet 1 1 6.957497 6.957497 8279 +cseg 1 1 6.957497 6.957497 8276 +withalmost 1 1 6.957497 6.957497 8280 +discussedat 1 1 6.957497 6.957497 8281 +byesteem 1 1 6.957497 6.957497 8282 +mostlyw 1 1 6.957497 6.957497 8283 +discussionson 1 1 6.957497 6.957497 8284 +quartersi 1 1 6.957497 6.957497 8285 +fromparticip 1 1 6.957497 6.957497 8286 +oncrit 1 1 6.957497 6.957497 8287 +hereread 1 1 6.957497 6.957497 8288 +morethem 1 1 6.957497 6.957497 8289 +molli 1 1 6.957497 6.957497 8290 +thestud 1 1 6.957497 6.957497 8291 +informallyor 1 1 6.957497 6.957497 8292 +lipasti 1 1 6.957497 6.957497 8293 +advanceprogrami 1 1 6.957497 6.957497 8294 +thesaulsburi 1 1 6.957497 6.957497 8295 +readashlei 1 1 6.957497 6.957497 8296 +saulsburi 1 1 6.957497 6.957497 8297 +fong 1 1 6.957497 6.957497 8298 +nowatzyk 1 1 6.957497 6.957497 8299 +fillo 1 1 6.957497 6.957497 8300 +keckler 1 1 6.957497 6.957497 8301 +machinelink 1 1 6.957497 6.957497 8302 +readdoug 1 1 6.957497 6.957497 8303 +neton 1 1 6.957497 6.957497 8304 +cardwel 1 1 6.957497 6.957497 8305 +fromm 1 1 6.957497 6.957497 8306 +keeton 1 1 6.957497 6.957497 8307 +kozyraki 1 1 6.957497 6.957497 8308 +thomasand 1 1 6.957497 6.957497 8309 +availableher 1 1 6.957497 6.957497 8310 +themajordomo 1 1 6.957497 6.957497 8311 +shouldinclud 1 1 6.957497 6.957497 8312 +lineblank 1 1 6.957497 6.957497 8313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..8008358f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +spring 1 131 2.079442 2.079442 88 +interact 1 62 2.772589 2.772589 270 +offer 1 43 3.178054 3.178054 414 +human 1 32 3.465736 3.465736 546 +pagecs 1 26 3.688879 3.688879 658 +experiment 1 26 3.688879 3.688879 645 +born 1 21 3.912023 3.912023 798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..e47aa1a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +welcom 1 122 2.079442 2.079442 99 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +memori 1 101 2.302585 2.302585 139 +second 1 81 2.484907 2.484907 166 +refer 1 78 2.564949 2.564949 203 +optim 1 79 2.564949 2.564949 197 +messag 1 76 2.564949 2.564949 212 +meet 1 72 2.639057 2.639057 229 +line 1 75 2.639057 2.639057 231 +practic 1 70 2.708050 2.708050 246 +organ 1 65 2.772589 2.772589 265 +wednesdai 1 64 2.772589 2.772589 261 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +special 1 56 2.890372 2.890372 320 +week 1 52 2.995732 2.995732 343 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +cost 1 37 3.332205 3.332205 480 +represent 1 35 3.401197 3.401197 512 +michael 1 35 3.401197 3.401197 514 +graph 1 30 3.555348 3.555348 576 +depend 1 29 3.583519 3.583519 583 +framework 1 28 3.610918 3.610918 606 +arrai 1 27 3.637586 3.637586 627 +subject 1 26 3.688879 3.688879 647 +valu 1 25 3.737670 3.737670 665 +flow 1 24 3.761200 3.761200 700 +leav 1 21 3.912023 3.912023 772 +alloc 1 20 3.951244 3.951244 821 +anderson 1 19 4.007333 4.007333 860 +andrew 1 19 4.007333 4.007333 849 +offici 1 18 4.060443 4.060443 894 +thoma 1 18 4.060443 4.060443 901 +analyz 1 17 4.110874 4.110874 925 +regist 1 17 4.110874 4.110874 938 +georg 1 16 4.174387 4.174387 994 +susan 1 15 4.248495 4.248495 1050 +todd 1 15 4.248495 4.248495 1051 +floor 1 14 4.317488 4.317488 1070 +dean 1 14 4.317488 4.317488 1104 +charl 1 13 4.382027 4.382027 1149 +sai 1 13 4.382027 4.382027 1175 +loew 1 12 4.465908 4.465908 1252 +gupta 1 12 4.465908 4.465908 1241 +iter 1 12 4.465908 4.465908 1206 +kenneth 1 12 4.465908 4.465908 1265 +grant 1 12 4.465908 4.465908 1216 +minimum 1 9 4.753590 4.753590 1555 +strength 1 9 4.753590 4.753590 1494 +subscrib 1 9 4.753590 4.753590 1541 +paradigm 1 8 4.875197 4.875197 1662 +lewi 1 8 4.875197 4.875197 1700 +erik 1 8 4.875197 4.875197 1701 +grove 1 8 4.875197 4.875197 1675 +roger 1 7 5.010635 5.010635 1892 +multicomput 1 7 5.010635 5.010635 1890 +fischer 1 7 5.010635 5.010635 1893 +reduct 1 7 5.010635 5.010635 1877 +ruth 1 7 5.010635 5.010635 1870 +banerje 1 6 5.164786 5.164786 2018 +mock 1 6 5.164786 5.164786 2087 +tullsen 1 6 5.164786 5.164786 2081 +majordomo 1 6 5.164786 5.164786 2066 +bottleneck 1 4 5.568345 5.568345 2769 +anoop 1 4 5.568345 5.568345 2770 +interprocedur 1 4 5.568345 5.568345 2771 +noel 1 3 5.857933 5.857933 3376 +crew 1 3 5.857933 5.857933 3347 +litvinov 1 3 5.857933 5.857933 3343 +garrett 1 3 5.857933 5.857933 3377 +jen 1 3 5.857933 5.857933 3378 +blank 1 3 5.857933 5.857933 3379 +seminarcs 1 2 6.263398 6.263398 4521 +eggersand 1 2 6.263398 6.263398 4522 +francoi 1 2 6.263398 6.263398 4523 +taxat 1 2 6.263398 6.263398 4524 +ernst 1 2 6.263398 6.263398 4525 +secoski 1 2 6.263398 6.263398 4526 +lazi 1 2 6.263398 6.263398 4527 +seminarcours 1 1 6.957497 6.957497 8314 +craigchambersmeet 1 1 6.957497 6.957497 8315 +butreal 1 1 6.957497 6.957497 8316 +atrium 1 1 6.957497 6.957497 8317 +scheduleweek 1 1 6.957497 6.957497 8318 +memspi 1 1 6.957497 6.957497 8319 +margaretmartonosi 1 1 6.957497 6.957497 8320 +consel 1 1 6.957497 6.957497 8321 +itsus 1 1 6.957497 6.957497 8322 +evelyn 1 1 6.957497 6.957497 8323 +duesterwald 1 1 6.957497 6.957497 8324 +rajiv 1 1 6.957497 6.957497 8325 +maryl 1 1 6.957497 6.957497 8326 +soffa 1 1 6.957497 6.957497 8327 +danielweis 1 1 6.957497 6.957497 8328 +bjarn 1 1 6.957497 6.957497 8329 +steensgaard 1 1 6.957497 6.957497 8330 +coalesc 1 1 6.957497 6.957497 8331 +appel 1 1 6.957497 6.957497 8332 +hooverand 1 1 6.957497 6.957497 8333 +zadeck 1 1 6.957497 6.957497 8334 +byprivthviraj 1 1 6.957497 6.957497 8335 +stevenkurland 1 1 6.957497 6.957497 8336 +knoblock 1 1 6.957497 6.957497 8337 +knoop 1 1 6.957497 6.957497 8338 +oliv 1 1 6.957497 6.957497 8339 +andbernhard 1 1 6.957497 6.957497 8340 +steffen 1 1 6.957497 6.957497 8341 +subscribecsek 1 1 6.957497 6.957497 8342 +shortlyrec 1 1 6.957497 6.957497 8343 +melodi 1 1 6.957497 6.957497 8344 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..bea95605 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +access 1 102 2.302585 2.302585 136 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +messag 1 76 2.564949 2.564949 212 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +effici 1 73 2.639057 2.639057 233 +tuesdai 1 73 2.639057 2.639057 219 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +septemb 1 65 2.772589 2.772589 274 +local 1 55 2.944439 2.944439 334 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +quarter 1 47 3.091042 3.091042 389 +discuss 1 45 3.135494 3.135494 399 +cach 1 41 3.218876 3.218876 432 +live 1 40 3.258097 3.258097 451 +seminar 1 38 3.295837 3.295837 470 +open 1 38 3.295837 3.295837 469 +expect 1 37 3.332205 3.332205 484 +global 1 34 3.401197 3.401197 520 +supercomput 1 25 3.737670 3.737670 681 +task 1 25 3.737670 3.737670 678 +strategi 1 25 3.737670 3.737670 682 +scalabl 1 24 3.761200 3.761200 705 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +output 1 21 3.912023 3.912023 788 +util 1 21 3.912023 3.912023 774 +exploit 1 20 3.951244 3.951244 836 +portabl 1 20 3.951244 3.951244 819 +eric 1 19 4.007333 4.007333 870 +runtim 1 19 4.007333 4.007333 858 +attend 1 18 4.060443 4.060443 893 +debug 1 17 4.110874 4.110874 944 +sign 1 16 4.174387 4.174387 970 +driven 1 15 4.248495 4.248495 1048 +stream 1 15 4.248495 4.248495 1015 +hopefulli 1 14 4.317488 4.317488 1071 +matlab 1 14 4.317488 4.317488 1081 +everyon 1 13 4.382027 4.382027 1148 +bodi 1 13 4.382027 4.382027 1178 +block 1 13 4.382027 4.382027 1183 +loew 1 12 4.465908 4.465908 1252 +gupta 1 12 4.465908 4.465908 1241 +characterist 1 12 4.465908 4.465908 1257 +holidai 1 12 4.465908 4.465908 1224 +brad 1 12 4.465908 4.465908 1264 +iter 1 12 4.465908 4.465908 1206 +loop 1 11 4.553877 4.553877 1310 +multithread 1 11 4.553877 4.553877 1315 +subscrib 1 9 4.753590 4.753590 1541 +rel 1 9 4.753590 4.753590 1487 +cross 1 8 4.875197 4.875197 1703 +reus 1 8 4.875197 4.875197 1661 +pldi 1 8 4.875197 4.875197 1704 +sean 1 8 4.875197 4.875197 1705 +spot 1 7 5.010635 5.010635 1894 +ruth 1 7 5.010635 5.010635 1870 +core 1 7 5.010635 5.010635 1809 +majordomo 1 6 5.164786 5.164786 2066 +banerje 1 6 5.164786 5.164786 2018 +sung 1 6 5.164786 5.164786 2075 +reed 1 6 5.164786 5.164786 2086 +zhou 1 6 5.164786 5.164786 2092 +icpp 1 5 5.347108 5.347108 2382 +cyclic 1 5 5.347108 5.347108 2383 +ppopp 1 4 5.568345 5.568345 2774 +restructur 1 4 5.568345 5.568345 2775 +choi 1 4 5.568345 5.568345 2732 +randal 1 4 5.568345 5.568345 2776 +ipp 1 3 5.857933 5.857933 3381 +atmospher 1 3 5.857933 5.857933 3388 +andwil 1 3 5.857933 5.857933 3335 +thepap 1 3 5.857933 5.857933 3254 +jason 1 3 5.857933 5.857933 3389 +blumof 1 3 5.857933 5.857933 3237 +foster 1 3 5.857933 5.857933 3159 +lcpc 1 2 6.263398 6.263398 4538 +casual 1 2 6.263398 6.263398 4542 +subscribeto 1 2 6.263398 6.263398 4543 +deros 1 2 6.263398 6.263398 4474 +padua 1 2 6.263398 6.263398 4544 +kale 1 2 6.263398 6.263398 4545 +kennedi 1 2 6.263398 6.263398 4539 +adv 1 2 6.263398 6.263398 4540 +chien 1 2 6.263398 6.263398 4541 +cilk 1 2 6.263398 6.263398 4242 +fritzson 1 2 6.263398 6.263398 4546 +potpourri 1 2 6.263398 6.263398 4547 +environmentslarri 1 1 6.957497 6.957497 8403 +snyderautumn 1 1 6.957497 6.957497 8404 +ten 1 1 6.957497 6.957497 8405 +ignit 1 1 6.957497 6.957497 8406 +hurri 1 1 6.957497 6.957497 8407 +cseo 1 1 6.957497 6.957497 8408 +datepaperpresentor 1 1 6.957497 6.957497 8409 +scalapack 1 1 6.957497 6.957497 8387 +ramaswami 1 1 6.957497 6.957497 8388 +hodg 1 1 6.957497 6.957497 8389 +falcon 1 1 6.957497 6.957497 8410 +gallivan 1 1 6.957497 6.957497 8411 +gallopoulo 1 1 6.957497 6.957497 8412 +marsolf 1 1 6.957497 6.957497 8413 +ramkumar 1 1 6.957497 6.957497 8414 +forb 1 1 6.957497 6.957497 8415 +mcintosh 1 1 6.957497 6.957497 8390 +chakarabarti 1 1 6.957497 6.957497 8391 +integer 1 1 6.957497 6.957497 8392 +crandal 1 1 6.957497 6.957497 8393 +aydt 1 1 6.957497 6.957497 8394 +gotwal 1 1 6.957497 6.957497 8416 +sriniva 1 1 6.957497 6.957497 8417 +gannon 1 1 6.957497 6.957497 8418 +bordawekar 1 1 6.957497 6.957497 8395 +choudahari 1 1 6.957497 6.957497 8396 +koelbel 1 1 6.957497 6.957497 8397 +paleczni 1 1 6.957497 6.957497 8398 +midkiff 1 1 6.957497 6.957497 8399 +fahring 1 1 6.957497 6.957497 8400 +hain 1 1 6.957497 6.957497 8401 +mehrotra 1 1 6.957497 6.957497 8402 +joerg 1 1 6.957497 6.957497 8419 +kuszmaul 1 1 6.957497 6.957497 8420 +leiserson 1 1 6.957497 6.957497 8421 +andersson 1 1 6.957497 6.957497 8422 +realign 1 1 6.957497 6.957497 8423 +kamachi 1 1 6.957497 6.957497 8424 +kusano 1 1 6.957497 6.957497 8425 +suehiro 1 1 6.957497 6.957497 8426 +tamura 1 1 6.957497 6.957497 8427 +sakon 1 1 6.957497 6.957497 8428 +rinard 1 1 6.957497 6.957497 8429 +abramson 1 1 6.957497 6.957497 8430 +michalak 1 1 6.957497 6.957497 8431 +sosic 1 1 6.957497 6.957497 8432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..83dabd0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +mail 1 238 1.386294 1.386294 22 +list 1 201 1.609438 1.609438 39 +spring 1 131 2.079442 2.079442 88 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +messag 1 76 2.564949 2.564949 212 +line 1 75 2.639057 2.639057 231 +variou 1 56 2.890372 2.890372 317 +summer 1 56 2.890372 2.890372 311 +week 1 52 2.995732 2.995732 343 +seminar 1 38 3.295837 3.295837 470 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +request 1 26 3.688879 3.688879 635 +alreadi 1 16 4.174387 4.174387 963 +bodi 1 13 4.382027 4.382027 1178 +web 1 12 4.465908 4.465908 1249 +subscrib 1 9 4.753590 4.753590 1541 +bit 1 7 5.010635 5.010635 1833 +crucial 1 5 5.347108 5.347108 2384 +ofinform 1 4 5.568345 5.568345 2707 +cancel 1 4 5.568345 5.568345 2746 +preliminariesif 1 1 6.957497 6.957497 8433 +besent 1 1 6.957497 6.957497 8434 +systemsin 1 1 6.957497 6.957497 8435 +quarterli 1 1 6.957497 6.957497 8436 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..7719554e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +file 1 132 1.945910 1.945910 70 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +appear 1 78 2.564949 2.564949 210 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +summer 1 56 2.890372 2.890372 311 +principl 1 48 3.044522 3.044522 357 +quarter 1 47 3.091042 3.091042 389 +fridai 1 44 3.135494 3.135494 390 +discuss 1 45 3.135494 3.135494 399 +cach 1 41 3.218876 3.218876 432 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +storag 1 31 3.496508 3.496508 553 +cluster 1 28 3.610918 3.610918 612 +mobil 1 23 3.806662 3.806662 730 +exploit 1 20 3.951244 3.951244 836 +log 1 19 4.007333 4.007333 857 +anderson 1 19 4.007333 4.007333 860 +hierarch 1 15 4.248495 4.248495 1018 +coher 1 14 4.317488 4.317488 1109 +weak 1 13 4.382027 4.382027 1159 +loew 1 12 4.465908 4.465908 1252 +impact 1 11 4.553877 4.553877 1334 +sosp 1 10 4.653960 4.653960 1416 +franklin 1 10 4.653960 4.653960 1436 +voelker 1 9 4.753590 4.753590 1557 +romer 1 8 4.875197 4.875197 1706 +feelei 1 7 5.010635 5.010635 1859 +chan 1 7 5.010635 5.010635 1876 +trend 1 7 5.010635 5.010635 1842 +wolman 1 6 5.164786 5.164786 2093 +tiwari 1 5 5.347108 5.347108 2385 +philipos 1 5 5.347108 5.347108 2373 +wewil 1 4 5.568345 5.568345 2688 +savag 1 4 5.568345 5.568345 2777 +serverless 1 3 5.857933 5.857933 3181 +litvinov 1 3 5.857933 5.857933 3343 +fiuczynski 1 3 5.857933 5.857933 3390 +wilk 1 2 6.263398 6.263398 4548 +hypervisor 1 2 6.263398 6.263398 4549 +sriram 1 2 6.263398 6.263398 4550 +quarterw 1 1 6.957497 6.957497 8437 +upcomingacm 1 1 6.957497 6.957497 8438 +havean 1 1 6.957497 6.957497 8439 +scheduleoct 1 1 6.957497 6.957497 8440 +autoraid 1 1 6.957497 6.957497 8441 +montgomeri 1 1 6.957497 6.957497 8442 +stackabl 1 1 6.957497 6.957497 8443 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..0b35dbf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +perform 1 143 1.945910 1.945910 74 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +high 1 130 2.079442 2.079442 101 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +commun 1 95 2.397895 2.397895 157 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +learn 1 86 2.484907 2.484907 170 +messag 1 76 2.564949 2.564949 212 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +appli 1 71 2.639057 2.639057 226 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +index 1 56 2.890372 2.890372 309 +unix 1 58 2.890372 2.890372 308 +faculti 1 56 2.890372 2.890372 325 +scientif 1 53 2.944439 2.944439 341 +allow 1 53 2.944439 2.944439 333 +cover 1 55 2.944439 2.944439 329 +found 1 53 2.944439 2.944439 337 +run 1 51 2.995732 2.995732 347 +quarter 1 47 3.091042 3.091042 389 +effect 1 46 3.091042 3.091042 385 +fast 1 42 3.218876 3.218876 429 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +close 1 38 3.295837 3.295837 465 +credit 1 38 3.295837 3.295837 460 +workstat 1 37 3.332205 3.332205 479 +staff 1 36 3.367296 3.367296 490 +ad 1 32 3.465736 3.465736 544 +scientist 1 31 3.496508 3.496508 560 +rang 1 30 3.555348 3.555348 565 +semant 1 29 3.583519 3.583519 587 +platform 1 29 3.583519 3.583519 591 +releas 1 28 3.610918 3.610918 616 +arrai 1 27 3.637586 3.637586 627 +supercomput 1 25 3.737670 3.737670 681 +variabl 1 23 3.806662 3.806662 715 +dai 1 22 3.850148 3.850148 753 +exploit 1 20 3.951244 3.951244 836 +kernel 1 20 3.951244 3.951244 825 +assum 1 19 4.007333 4.007333 845 +account 1 18 4.060443 4.060443 882 +previous 1 17 4.110874 4.110874 923 +debug 1 17 4.110874 4.110874 944 +whole 1 17 4.110874 4.110874 940 +modern 1 16 4.174387 4.174387 966 +fortran 1 15 4.248495 4.248495 1027 +easili 1 14 4.317488 4.317488 1077 +matlab 1 14 4.317488 4.317488 1081 +bodi 1 13 4.382027 4.382027 1178 +block 1 13 4.382027 4.382027 1183 +loew 1 12 4.465908 4.465908 1252 +grant 1 12 4.465908 4.465908 1216 +loop 1 11 4.553877 4.553877 1310 +faster 1 11 4.553877 4.553877 1323 +reli 1 10 4.653960 4.653960 1411 +subscrib 1 9 4.753590 4.753590 1541 +informationabout 1 9 4.753590 4.753590 1515 +suitabl 1 9 4.753590 4.753590 1486 +elimin 1 9 4.753590 4.753590 1558 +simpli 1 8 4.875197 4.875197 1626 +migrat 1 7 5.010635 5.010635 1851 +largest 1 7 5.010635 5.010635 1858 +majordomo 1 6 5.164786 5.164786 2066 +syntax 1 6 5.164786 5.164786 2030 +sung 1 6 5.164786 5.164786 2075 +snyder 1 5 5.347108 5.347108 2359 +toth 1 4 5.568345 5.568345 2595 +ncsa 1 4 5.568345 5.568345 2767 +choi 1 4 5.568345 5.568345 2732 +audit 1 3 5.857933 5.857933 3391 +disciplin 1 3 5.857933 5.857933 3392 +informationcours 1 3 5.857933 5.857933 3167 +subscribeto 1 2 6.263398 6.263398 4543 +inner 1 2 6.263398 6.263398 4551 +zphigh 1 1 6.957497 6.957497 8444 +zpllarri 1 1 6.957497 6.957497 8445 +teamautumn 1 1 6.957497 6.957497 8446 +csezpl 1 1 6.957497 6.957497 8447 +usersmail 1 1 6.957497 6.957497 8448 +librarai 1 1 6.957497 6.957497 8449 +relatedinform 1 1 6.957497 6.957497 8450 +descriptionzpl 1 1 6.957497 6.957497 8451 +scientificprogram 1 1 6.957497 6.957497 8452 +infortran 1 1 6.957497 6.957497 8453 +dramaticallysimplifi 1 1 6.957497 6.957497 8454 +nuisanc 1 1 6.957497 6.957497 8455 +andtrivi 1 1 6.957497 6.957497 8456 +byrecompil 1 1 6.957497 6.957497 8457 +wysiwyg 1 1 6.957497 6.957497 8458 +booknon 1 1 6.957497 6.957497 8459 +onin 1 1 6.957497 6.957497 8460 +zplprogram 1 1 6.957497 6.957497 8461 +prerequisitesfamiliar 1 1 6.957497 6.957497 8462 +ormatlab 1 1 6.957497 6.957497 8463 +remotezpl 1 1 6.957497 6.957497 8464 +compileroth 1 1 6.957497 6.957497 8465 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..e15e1f73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +send 1 114 2.197225 2.197225 109 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +contain 1 81 2.484907 2.484907 174 +exam 1 86 2.484907 2.484907 169 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +logic 1 71 2.639057 2.639057 230 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +sieg 1 69 2.708050 2.708050 260 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +previou 1 62 2.772589 2.772589 290 +publish 1 57 2.890372 2.890372 326 +think 1 57 2.890372 2.890372 314 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +even 1 45 3.135494 3.135494 393 +announc 1 40 3.258097 3.258097 441 +author 1 39 3.258097 3.258097 450 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +go 1 33 3.433987 3.433987 529 +ad 1 32 3.465736 3.465736 544 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +quot 1 29 3.583519 3.583519 582 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +administr 1 27 3.637586 3.637586 628 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +tell 1 21 3.912023 3.912023 777 +anderson 1 19 4.007333 4.007333 860 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +whole 1 17 4.110874 4.110874 940 +weekli 1 17 4.110874 4.110874 919 +weslei 1 16 4.174387 4.174387 983 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +anonym 1 14 4.317488 4.317488 1100 +reprint 1 14 4.317488 4.317488 1097 +everyon 1 13 4.382027 4.382027 1148 +quizz 1 13 4.382027 4.382027 1151 +workload 1 12 4.465908 4.465908 1210 +overal 1 12 4.465908 4.465908 1254 +addison 1 12 4.465908 4.465908 1230 +duli 1 12 4.465908 4.465908 1248 +benjamin 1 11 4.553877 4.553877 1296 +evolut 1 11 4.553877 4.553877 1314 +nonprofit 1 11 4.553877 4.553877 1339 +cheat 1 10 4.653960 4.653960 1395 +desir 1 9 4.753590 4.753590 1542 +cum 1 8 4.875197 4.875197 1619 +bunch 1 7 5.010635 5.010635 1861 +gaetano 1 6 5.164786 5.164786 2068 +borriello 1 5 5.347108 5.347108 2349 +corei 1 4 5.568345 5.568345 2718 +contemporari 1 4 5.568345 5.568345 2719 +corin 1 3 5.857933 5.857933 3311 +aweekli 1 3 5.857933 5.857933 3312 +katz 1 3 5.857933 5.857933 3276 +andersonwelcom 1 2 6.263398 6.263398 4400 +tocs 1 2 6.263398 6.263398 4401 +messagess 1 2 6.263398 6.263398 4402 +synario 1 2 6.263398 6.263398 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..ee00dbbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +instructor 1 108 2.197225 2.197225 107 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +journal 1 83 2.484907 2.484907 183 +nation 1 74 2.639057 2.639057 240 +sieg 1 69 2.708050 2.708050 260 +futur 1 41 3.218876 3.218876 427 +societi 1 40 3.258097 3.258097 456 +focu 1 30 3.555348 3.555348 571 +relev 1 26 3.688879 3.688879 637 +born 1 21 3.912023 3.912023 798 +thur 1 19 4.007333 4.007333 847 +social 1 13 4.382027 4.382027 1123 +econom 1 13 4.382027 4.382027 1184 +alan 1 13 4.382027 4.382027 1146 +tue 1 11 4.553877 4.553877 1308 +ethic 1 7 5.010635 5.010635 1786 +legal 1 6 5.164786 5.164786 2094 +highwai 1 6 5.164786 5.164786 2095 +implic 1 4 5.568345 5.568345 2696 +societycs 1 1 6.957497 6.957497 8466 +societywelcom 1 1 6.957497 6.957497 8467 +wintercs 1 1 6.957497 6.957497 8468 +andglob 1 1 6.957497 6.957497 8469 +syllabusclass 1 1 6.957497 6.957497 8470 +schedulelink 1 1 6.957497 6.957497 8471 +sitesbook 1 1 6.957497 6.957497 8472 +referenceassignmentsassign 1 1 6.957497 6.957497 8473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..7994a799 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +topic 1 114 2.197225 2.197225 110 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +build 1 85 2.484907 2.484907 184 +messag 1 76 2.564949 2.564949 212 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +sieg 1 69 2.708050 2.708050 260 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +index 1 56 2.890372 2.890372 309 +reason 1 57 2.890372 2.890372 318 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +past 1 42 3.218876 3.218876 428 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +pagecs 1 26 3.688879 3.688879 658 +fundament 1 25 3.737670 3.737670 661 +methodolog 1 23 3.806662 3.806662 733 +outlin 1 17 4.110874 4.110874 914 +nick 1 13 4.382027 4.382027 1180 +pose 1 9 4.753590 4.753590 1535 +depth 1 8 4.875197 4.875197 1636 +marc 1 8 4.875197 4.875197 1680 +uncertainti 1 7 5.010635 5.010635 1882 +machinelearn 1 6 5.164786 5.164786 2084 +anin 1 3 5.857933 5.857933 3354 +assignmentsassign 1 3 5.857933 5.857933 3342 +mailinglist 1 3 5.857933 5.857933 3325 +intelligencefal 1 2 6.263398 6.263398 4477 +andchalleng 1 2 6.263398 6.263398 4478 +intelligentmachin 1 2 6.263398 6.263398 4479 +agentarchitectur 1 2 6.263398 6.263398 4480 +weldweld 1 2 6.263398 6.263398 4481 +friedmanfriedman 1 2 6.263398 6.263398 4482 +kushmericknick 1 2 6.263398 6.263398 4483 +examsgradingresourcesth 1 2 6.263398 6.263398 4484 +topicsprojectread 1 1 6.957497 6.957497 8474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..d7af19d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +decemb 1 80 2.564949 2.564949 215 +intellig 1 72 2.639057 2.639057 225 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +summari 1 73 2.639057 2.639057 237 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +back 1 60 2.833213 2.833213 297 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +index 1 56 2.890372 2.890372 309 +detail 1 57 2.890372 2.890372 321 +execut 1 45 3.135494 3.135494 404 +anoth 1 45 3.135494 3.135494 408 +discuss 1 45 3.135494 3.135494 399 +show 1 43 3.178054 3.178054 417 +futur 1 41 3.218876 3.218876 427 +review 1 42 3.218876 3.218876 425 +paul 1 38 3.295837 3.295837 471 +winter 1 36 3.367296 3.367296 500 +short 1 36 3.367296 3.367296 499 +manual 1 35 3.401197 3.401197 504 +articl 1 33 3.433987 3.433987 530 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +releas 1 28 3.610918 3.610918 616 +except 1 28 3.610918 3.610918 607 +rather 1 26 3.688879 3.688879 642 +wai 1 25 3.737670 3.737670 662 +mike 1 24 3.761200 3.761200 703 +displai 1 23 3.806662 3.806662 712 +miscellan 1 23 3.806662 3.806662 731 +instal 1 22 3.850148 3.850148 754 +sent 1 22 3.850148 3.850148 763 +offici 1 18 4.060443 4.060443 894 +statu 1 18 4.060443 4.060443 885 +regist 1 17 4.110874 4.110874 938 +anyon 1 17 4.110874 4.110874 916 +side 1 15 4.248495 4.248495 1022 +someon 1 13 4.382027 4.382027 1128 +nick 1 13 4.382027 4.382027 1180 +rememb 1 12 4.465908 4.465908 1217 +usenix 1 12 4.465908 4.465908 1240 +extra 1 11 4.553877 4.553877 1312 +guess 1 10 4.653960 4.653960 1443 +mosaic 1 10 4.653960 4.653960 1426 +bring 1 10 4.653960 4.653960 1430 +perspect 1 10 4.653960 4.653960 1437 +kurt 1 9 4.753590 4.753590 1548 +on 1 8 4.875197 4.875197 1628 +filter 1 8 4.875197 4.875197 1641 +guidelin 1 7 5.010635 5.010635 1832 +impress 1 6 5.164786 5.164786 2096 +begun 1 5 5.347108 5.347108 2386 +older 1 5 5.347108 5.347108 2387 +cacm 1 5 5.347108 5.347108 2388 +ics 1 4 5.568345 5.568345 2779 +glimps 1 4 5.568345 5.568345 2778 +rambl 1 3 5.857933 5.857933 3308 +vagu 1 3 5.857933 5.857933 3393 +towrit 1 2 6.263398 6.263398 4207 +phoenix 1 2 6.263398 6.263398 4552 +belief 1 2 6.263398 6.263398 4553 +siegcreat 1 1 6.957497 6.957497 8476 +scriptspleas 1 1 6.957497 6.957497 8477 +tothem 1 1 6.957497 6.957497 8478 +zwhere 1 1 6.957497 6.957497 8479 +mosiac 1 1 6.957497 6.957497 8480 +zephyr 1 1 6.957497 6.957497 8475 +znol 1 1 6.957497 6.957497 8481 +zwatch 1 1 6.957497 6.957497 8482 +zlocat 1 1 6.957497 6.957497 8483 +releg 1 1 6.957497 6.957497 8484 +grumbl 1 1 6.957497 6.957497 8485 +luddit 1 1 6.957497 6.957497 8486 +itout 1 1 6.957497 6.957497 8487 +withci 1 1 6.957497 6.957497 8488 +theentir 1 1 6.957497 6.957497 8489 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..3e4f1285 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +instructor 1 108 2.197225 2.197225 107 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +control 1 82 2.484907 2.484907 164 +info 1 85 2.484907 2.484907 176 +involv 1 71 2.639057 2.639057 227 +organ 1 65 2.772589 2.772589 265 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +small 1 39 3.258097 3.258097 447 +credit 1 38 3.295837 3.295837 460 +open 1 38 3.295837 3.295837 469 +taken 1 31 3.496508 3.496508 555 +prerequisit 1 19 4.007333 4.007333 846 +devic 1 16 4.174387 4.174387 1002 +consent 1 5 5.347108 5.347108 2389 +semesterli 1 4 5.568345 5.568345 2780 +freshmen 1 2 6.263398 6.263398 4554 +computerhardwar 1 1 6.957497 6.957497 8490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..dc2f992c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +address 1 170 1.791759 1.791759 62 +architectur 1 139 1.945910 1.945910 77 +introduct 1 126 2.079442 2.079442 87 +structur 1 106 2.197225 2.197225 105 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +control 1 82 2.484907 2.484907 164 +info 1 85 2.484907 2.484907 176 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +cach 1 41 3.218876 3.218876 432 +credit 1 38 3.295837 3.295837 460 +compon 1 30 3.555348 3.555348 570 +hierarchi 1 22 3.850148 3.850148 744 +prerequisit 1 19 4.007333 4.007333 846 +interrupt 1 7 5.010635 5.010635 1793 +microprogram 1 4 5.568345 5.568345 2604 +semesterli 1 4 5.568345 5.568345 2780 +andc 1 1 6.957497 6.957497 8491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..ffb93bdf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +architectur 1 139 1.945910 1.945910 77 +process 1 142 1.945910 1.945910 72 +high 1 130 2.079442 2.079442 101 +advanc 1 99 2.302585 2.302585 130 +techniqu 1 99 2.302585 2.302585 138 +special 1 56 2.890372 2.890372 320 +processor 1 54 2.944439 2.944439 335 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +multiprocessor 1 28 3.610918 3.610918 605 +flow 1 24 3.761200 3.761200 700 +prerequisit 1 19 4.007333 4.007333 846 +semesterli 1 4 5.568345 5.568345 2780 +andpipelin 1 1 6.957497 6.957497 8492 +performancemachin 1 1 6.957497 6.957497 8493 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..33ba3e68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +fall 1 181 1.609438 1.609438 40 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +architectur 1 139 1.945910 1.945910 77 +machin 1 129 2.079442 2.079442 95 +instructor 1 108 2.197225 2.197225 107 +advanc 1 99 2.302585 2.302585 130 +info 1 85 2.484907 2.484907 176 +special 1 56 2.890372 2.890372 320 +principl 1 48 3.044522 3.044522 357 +credit 1 38 3.295837 3.295837 460 +multi 1 36 3.367296 3.367296 493 +detect 1 26 3.688879 3.688879 646 +prerequisit 1 19 4.007333 4.007333 846 +interconnect 1 17 4.110874 4.110874 937 +coher 1 14 4.317488 4.317488 1109 +dataflow 1 5 5.347108 5.347108 2390 +consent 1 5 5.347108 5.347108 2389 +semesterli 1 4 5.568345 5.568345 2780 +simd 1 3 5.857933 5.857933 3360 +mimd 1 3 5.857933 5.857933 3361 +vectorizingcompil 1 1 6.957497 6.957497 8494 +processorsynchron 1 1 6.957497 6.957497 8495 +purposeprocessor 1 1 6.957497 6.957497 8496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..93d81f68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,244 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +need 1 98 2.302585 2.302585 135 +book 1 99 2.302585 2.302585 131 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +chang 1 82 2.484907 2.484907 163 +mondai 1 77 2.564949 2.564949 206 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +homework 1 79 2.564949 2.564949 193 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +receiv 1 66 2.708050 2.708050 244 +window 1 68 2.708050 2.708050 242 +wednesdai 1 64 2.772589 2.772589 261 +organ 1 65 2.772589 2.772589 265 +copi 1 63 2.772589 2.772589 284 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +januari 1 62 2.772589 2.772589 264 +virtual 1 62 2.772589 2.772589 285 +room 1 59 2.833213 2.833213 301 +march 1 61 2.833213 2.833213 295 +detail 1 57 2.890372 2.890372 321 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +overview 1 56 2.890372 2.890372 323 +februari 1 54 2.944439 2.944439 328 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +hardwar 1 51 2.995732 2.995732 350 +date 1 51 2.995732 2.995732 344 +run 1 51 2.995732 2.995732 347 +set 1 50 3.044522 3.044522 361 +appoint 1 49 3.044522 3.044522 358 +done 1 47 3.091042 3.091042 381 +could 1 46 3.091042 3.091042 383 +answer 1 45 3.135494 3.135494 391 +fridai 1 44 3.135494 3.135494 390 +favorit 1 44 3.135494 3.135494 410 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +mark 1 44 3.135494 3.135494 403 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +late 1 40 3.258097 3.258097 439 +probabl 1 40 3.258097 3.258097 455 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +workstat 1 37 3.332205 3.332205 479 +ofth 1 36 3.367296 3.367296 491 +post 1 35 3.401197 3.401197 505 +print 1 34 3.401197 3.401197 503 +concurr 1 34 3.401197 3.401197 501 +eduoffic 1 33 3.433987 3.433987 531 +taught 1 33 3.433987 3.433987 526 +independ 1 32 3.465736 3.465736 548 +option 1 30 3.555348 3.555348 575 +rang 1 30 3.555348 3.555348 565 +secur 1 30 3.555348 3.555348 577 +turn 1 29 3.583519 3.583519 586 +though 1 27 3.637586 3.637586 622 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +store 1 24 3.761200 3.761200 693 +dai 1 22 3.850148 3.850148 753 +inth 1 22 3.850148 3.850148 741 +tent 1 22 3.850148 3.850148 739 +cooper 1 22 3.850148 3.850148 757 +disk 1 22 3.850148 3.850148 747 +leav 1 21 3.912023 3.912023 772 +sure 1 20 3.951244 3.951244 813 +minut 1 20 3.951244 3.951244 810 +entir 1 20 3.951244 3.951244 811 +break 1 20 3.951244 3.951244 812 +alloc 1 20 3.951244 3.951244 821 +attend 1 18 4.060443 4.060443 893 +accept 1 18 4.060443 4.060443 879 +account 1 18 4.060443 4.060443 882 +weekli 1 17 4.110874 4.110874 919 +monitor 1 17 4.110874 4.110874 941 +analyz 1 17 4.110874 4.110874 925 +regist 1 17 4.110874 4.110874 938 +debug 1 17 4.110874 4.110874 944 +segment 1 17 4.110874 4.110874 931 +modern 1 16 4.174387 4.174387 966 +quiz 1 16 4.174387 4.174387 990 +devic 1 16 4.174387 4.174387 1002 +purchas 1 15 4.248495 4.248495 1030 +drive 1 15 4.248495 4.248495 1052 +driven 1 15 4.248495 4.248495 1048 +happi 1 14 4.317488 4.317488 1079 +quizz 1 13 4.382027 4.382027 1151 +jonathan 1 13 4.382027 4.382027 1174 +cannot 1 13 4.382027 4.382027 1144 +readi 1 12 4.465908 4.465908 1242 +solari 1 12 4.465908 4.465908 1238 +count 1 12 4.465908 4.465908 1239 +extra 1 11 4.553877 4.553877 1312 +mainli 1 10 4.653960 4.653960 1432 +penalti 1 10 4.653960 4.653960 1405 +bart 1 9 4.753590 4.753590 1559 +recit 1 9 4.753590 4.753590 1475 +quantit 1 8 4.875197 4.875197 1654 +absolut 1 8 4.875197 4.875197 1646 +partner 1 8 4.875197 4.875197 1648 +replac 1 8 4.875197 4.875197 1668 +dispatch 1 7 5.010635 5.010635 1791 +whatev 1 6 5.164786 5.164786 2097 +transcript 1 6 5.164786 5.164786 2067 +drop 1 6 5.164786 5.164786 2008 +averag 1 6 5.164786 5.164786 2098 +madisoncomput 1 5 5.347108 5.347108 2391 +poorli 1 4 5.568345 5.568345 2781 +maximum 1 4 5.568345 5.568345 2632 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +csphone 1 3 5.857933 5.857933 3394 +tanenbaum 1 3 5.857933 5.857933 3397 +programmingassign 1 3 5.857933 5.857933 3398 +ofobject 1 3 5.857933 5.857933 3399 +thrash 1 3 5.857933 5.857933 3400 +bybart 1 3 5.857933 5.857933 3401 +milleremail 1 2 6.263398 6.263398 4556 +noonor 1 2 6.263398 6.263398 4557 +weyer 1 2 6.263398 6.263398 4558 +notesar 1 2 6.263398 6.263398 4559 +materialcov 1 2 6.263398 6.263398 4140 +semaphor 1 2 6.263398 6.263398 4555 +youdon 1 2 6.263398 6.263398 4444 +problemssolut 1 2 6.263398 6.263398 4241 +theproblem 1 2 6.263398 6.263398 4560 +andlook 1 2 6.263398 6.263398 4561 +havethre 1 2 6.263398 6.263398 4562 +daysof 1 2 6.263398 6.263398 4563 +eachof 1 2 6.263398 6.263398 4564 +lowest 1 2 6.263398 6.263398 4565 +breakweek 1 2 6.263398 6.263398 4566 +systemsnew 1 1 6.957497 6.957497 8497 +stufffin 1 1 6.957497 6.957497 8498 +staffinstructor 1 1 6.957497 6.957497 8499 +karuna 1 1 6.957497 6.957497 8500 +muthiahemail 1 1 6.957497 6.957497 8501 +muthiah 1 1 6.957497 6.957497 8502 +weyersemail 1 1 6.957497 6.957497 8503 +materialsth 1 1 6.957497 6.957497 8504 +thelectur 1 1 6.957497 6.957497 8505 +textbookmodern 1 1 6.957497 6.957497 8506 +pohl 1 1 6.957497 6.957497 8507 +systemsandobject 1 1 6.957497 6.957497 8508 +sectionslectur 1 1 6.957497 6.957497 8509 +sciencesdiscuss 1 1 6.957497 6.957497 8510 +nolandnot 1 1 6.957497 6.957497 8511 +occas 1 1 6.957497 6.957497 8512 +quizzesther 1 1 6.957497 6.957497 8513 +thediscuss 1 1 6.957497 6.957497 8514 +usetrac 1 1 6.957497 6.957497 8515 +setsdur 1 1 6.957497 6.957497 8516 +severalwritten 1 1 6.957497 6.957497 8517 +synchronizationprimit 1 1 6.957497 6.957497 8518 +workassign 1 1 6.957497 6.957497 8519 +assignmentthat 1 1 6.957497 6.957497 8520 +weekof 1 1 6.957497 6.957497 8521 +cheatingprogram 1 1 6.957497 6.957497 8522 +cheater 1 1 6.957497 6.957497 8523 +receivingan 1 1 6.957497 6.957497 8524 +facilitiesw 1 1 6.957497 6.957497 8525 +policyif 1 1 6.957497 6.957497 8526 +beno 1 1 6.957497 6.957497 8527 +scheduleth 1 1 6.957497 6.957497 8528 +processesweek 1 1 6.957497 6.957497 8529 +creationweek 1 1 6.957497 6.957497 8530 +synchronizationweek 1 1 6.957497 6.957497 8531 +semaphoresweek 1 1 6.957497 6.957497 8532 +monitorsweek 1 1 6.957497 6.957497 8533 +deadlocksweek 1 1 6.957497 6.957497 8534 +relocationweek 1 1 6.957497 6.957497 8535 +tlbsweek 1 1 6.957497 6.957497 8536 +filesweek 1 1 6.957497 6.957497 8537 +directoriesweek 1 1 6.957497 6.957497 8538 +protectionweek 1 1 6.957497 6.957497 8539 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..329565a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +follow 1 92 2.397895 2.397895 143 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +requir 1 81 2.484907 2.484907 167 +chang 1 82 2.484907 2.484907 163 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +week 1 52 2.995732 2.995732 343 +appoint 1 49 3.044522 3.044522 358 +right 1 48 3.044522 3.044522 363 +could 1 46 3.091042 3.091042 383 +fridai 1 44 3.135494 3.135494 390 +discuss 1 45 3.135494 3.135494 399 +procedur 1 36 3.367296 3.367296 488 +eduoffic 1 33 3.433987 3.433987 531 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +client 1 25 3.737670 3.737670 679 +honor 1 23 3.806662 3.806662 729 +mostli 1 19 4.007333 4.007333 869 +eric 1 19 4.007333 4.007333 870 +miller 1 17 4.110874 4.110874 949 +remot 1 15 4.248495 4.248495 1041 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +larri 1 13 4.382027 4.382027 1142 +wood 1 11 4.553877 4.553877 1355 +bart 1 9 4.753590 4.753590 1559 +laru 1 9 4.753590 4.753590 1560 +madisoncomput 1 5 5.347108 5.347108 2391 +bach 1 4 5.568345 5.568345 2708 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +csphone 1 3 5.857933 5.857933 3394 +landweb 1 3 5.857933 5.857933 3402 +bybart 1 3 5.857933 5.857933 3401 +milleremail 1 2 6.263398 6.263398 4556 +noonor 1 2 6.263398 6.263398 4557 +breakweek 1 2 6.263398 6.263398 4566 +seminarunivers 1 1 6.957497 6.957497 8540 +seminarinstructor 1 1 6.957497 6.957497 8541 +lectureslectur 1 1 6.957497 6.957497 8542 +sciencesclass 1 1 6.957497 6.957497 8543 +schedulether 1 1 6.957497 6.957497 8544 +attendal 1 1 6.957497 6.957497 8545 +overviewweek 1 1 6.957497 6.957497 8546 +protocolsweek 1 1 6.957497 6.957497 8547 +callsweek 1 1 6.957497 6.957497 8548 +securityweek 1 1 6.957497 6.957497 8549 +encryptionweek 1 1 6.957497 6.957497 8550 +netweek 1 1 6.957497 6.957497 8551 +systemsweek 1 1 6.957497 6.957497 8552 +supercomputerweek 1 1 6.957497 6.957497 8553 +javaweek 1 1 6.957497 6.957497 8554 +discussionslast 1 1 6.957497 6.957497 8555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..c52e3533 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +file 1 132 1.945910 1.945910 70 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +schedul 1 119 2.079442 2.079442 85 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +part 1 98 2.302585 2.302585 129 +proceed 1 93 2.397895 2.397895 152 +comment 1 93 2.397895 2.397895 146 +grade 1 90 2.397895 2.397895 142 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +issu 1 78 2.564949 2.564949 211 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +meet 1 72 2.639057 2.639057 229 +write 1 72 2.639057 2.639057 222 +involv 1 71 2.639057 2.639057 227 +summari 1 73 2.639057 2.639057 237 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +import 1 65 2.772589 2.772589 282 +detail 1 57 2.890372 2.890372 321 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +talk 1 53 2.944439 2.944439 336 +extens 1 53 2.944439 2.944439 340 +week 1 52 2.995732 2.995732 343 +give 1 50 3.044522 3.044522 359 +discuss 1 45 3.135494 3.135494 399 +textbook 1 44 3.135494 3.135494 397 +anoth 1 45 3.135494 3.135494 408 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +review 1 42 3.218876 3.218876 425 +realli 1 40 3.258097 3.258097 444 +form 1 39 3.258097 3.258097 443 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +short 1 36 3.367296 3.367296 499 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +articl 1 33 3.433987 3.433987 530 +idea 1 32 3.465736 3.465736 545 +secur 1 30 3.555348 3.555348 577 +abl 1 30 3.555348 3.555348 566 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +pass 1 28 3.610918 3.610918 611 +relev 1 26 3.688879 3.688879 637 +revis 1 26 3.688879 3.688879 640 +daili 1 24 3.761200 3.761200 706 +try 1 22 3.850148 3.850148 764 +busi 1 21 3.912023 3.912023 784 +kernel 1 20 3.951244 3.951244 825 +longer 1 20 3.951244 3.951244 816 +els 1 19 4.007333 4.007333 843 +listen 1 18 4.060443 4.060443 907 +protect 1 17 4.110874 4.110874 935 +sheet 1 16 4.174387 4.174387 973 +choos 1 16 4.174387 4.174387 964 +critic 1 16 4.174387 4.174387 982 +purchas 1 15 4.248495 4.248495 1030 +score 1 15 4.248495 4.248495 1017 +doit 1 14 4.317488 4.317488 1111 +someon 1 13 4.382027 4.382027 1128 +rest 1 12 4.465908 4.465908 1259 +reader 1 12 4.465908 4.465908 1246 +broad 1 11 4.553877 4.553877 1302 +literatur 1 11 4.553877 4.553877 1300 +success 1 10 4.653960 4.653960 1390 +bart 1 9 4.753590 4.753590 1559 +classmat 1 9 4.753590 4.753590 1516 +theme 1 8 4.875197 4.875197 1707 +opinion 1 8 4.875197 4.875197 1708 +refere 1 7 5.010635 5.010635 1895 +fromth 1 7 5.010635 5.010635 1802 +carefulli 1 6 5.164786 5.164786 2045 +madisoncomput 1 5 5.347108 5.347108 2391 +understood 1 5 5.347108 5.347108 2364 +exposur 1 4 5.568345 5.568345 2598 +twice 1 4 5.568345 5.568345 2614 +will 1 4 5.568345 5.568345 2782 +writer 1 4 5.568345 5.568345 2783 +csoffic 1 4 5.568345 5.568345 2727 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +advancedoper 1 3 5.857933 5.857933 3403 +focal 1 3 5.857933 5.857933 3404 +formula 1 3 5.857933 5.857933 3405 +thepap 1 3 5.857933 5.857933 3254 +andon 1 3 5.857933 5.857933 3115 +bybart 1 3 5.857933 5.857933 3401 +satisfactori 1 2 6.263398 6.263398 4567 +andconfer 1 2 6.263398 6.263398 4568 +willinstead 1 2 6.263398 6.263398 4569 +adiscuss 1 2 6.263398 6.263398 4570 +geta 1 2 6.263398 6.263398 4571 +quietli 1 2 6.263398 6.263398 4572 +examsther 1 2 6.263398 6.263398 4149 +assignmenti 1 2 6.263398 6.263398 4573 +systemssummarythi 1 1 6.957497 6.957497 8557 +textther 1 1 6.957497 6.957497 8558 +operatingsystemsclass 1 1 6.957497 6.957497 8559 +meetonc 1 1 6.957497 6.957497 8560 +listaccord 1 1 6.957497 6.957497 8561 +papersindepend 1 1 6.957497 6.957497 8562 +identifyth 1 1 6.957497 6.957497 8563 +discussionsclass 1 1 6.957497 6.957497 8564 +besupport 1 1 6.957497 6.957497 8565 +beveri 1 1 6.957497 6.957497 8566 +unhappi 1 1 6.957497 6.957497 8567 +papersdur 1 1 6.957497 6.957497 8568 +paperwil 1 1 6.957497 6.957497 8569 +facilityand 1 1 6.957497 6.957497 8570 +summaryof 1 1 6.957497 6.957497 8571 +aselect 1 1 6.957497 6.957497 8572 +topicsfrom 1 1 6.957497 6.957497 8573 +fellowstud 1 1 6.957497 6.957497 8574 +giveth 1 1 6.957497 6.957497 8575 +gradesscor 1 1 6.957497 6.957497 8576 +availbl 1 1 6.957497 6.957497 8556 +proposalsi 1 1 6.957497 6.957497 8577 +gradesar 1 1 6.957497 6.957497 8578 +detailstim 1 1 6.957497 6.957497 8579 +noonlast 1 1 6.957497 6.957497 8580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..dd0e853a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +search 1 95 2.397895 2.397895 155 +requir 1 81 2.484907 2.484907 167 +school 1 84 2.484907 2.484907 188 +solut 1 82 2.484907 2.484907 162 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +experi 1 64 2.772589 2.772589 283 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +simpl 1 60 2.833213 2.833213 298 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +cover 1 55 2.944439 2.944439 329 +instruct 1 53 2.944439 2.944439 332 +particular 1 51 2.995732 2.995732 352 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +even 1 45 3.135494 3.135494 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +howev 1 41 3.218876 3.218876 422 +littl 1 39 3.258097 3.258097 454 +announc 1 40 3.258097 3.258097 441 +probabl 1 40 3.258097 3.258097 455 +credit 1 38 3.295837 3.295837 460 +microsoft 1 38 3.295837 3.295837 468 +open 1 38 3.295837 3.295837 469 +copyright 1 36 3.367296 3.367296 495 +statist 1 35 3.401197 3.401197 521 +taught 1 33 3.433987 3.433987 526 +depend 1 29 3.583519 3.583519 583 +intend 1 28 3.610918 3.610918 599 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +enabl 1 26 3.688879 3.688879 655 +lab 1 24 3.761200 3.761200 698 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +prepar 1 20 3.951244 3.951244 824 +entir 1 20 3.951244 3.951244 811 +assum 1 19 4.007333 4.007333 845 +exercis 1 19 4.007333 4.007333 842 +lyco 1 19 4.007333 4.007333 871 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +pagec 1 15 4.248495 4.248495 1011 +fortran 1 15 4.248495 4.248495 1027 +psycholog 1 15 4.248495 4.248495 1054 +purchas 1 15 4.248495 4.248495 1030 +comic 1 14 4.317488 4.317488 1103 +primarili 1 13 4.382027 4.382027 1185 +menu 1 13 4.382027 4.382027 1156 +overal 1 12 4.465908 4.465908 1254 +vectra 1 12 4.465908 4.465908 1267 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +prior 1 10 4.653960 4.653960 1438 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +seven 1 9 4.753590 4.753590 1561 +hewlett 1 8 4.875197 4.875197 1709 +printer 1 8 4.875197 4.875197 1621 +elementari 1 7 5.010635 5.010635 1825 +bestor 1 6 5.164786 5.164786 2099 +gareth 1 5 5.347108 5.347108 2392 +relief 1 4 5.568345 5.568345 2784 +punctual 1 3 5.857933 5.857933 3313 +labyou 1 3 5.857933 5.857933 3406 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +programmingsect 1 2 6.263398 6.263398 4574 +disturb 1 2 6.263398 6.263398 4575 +subroutin 1 2 6.263398 6.263398 4576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..b6a8f59c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +contact 1 153 1.791759 1.791759 59 +algorithm 1 162 1.791759 1.791759 57 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +hall 1 146 1.945910 1.945910 65 +compil 1 122 2.079442 2.079442 96 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +text 1 98 2.302585 2.302585 133 +need 1 98 2.302585 2.302585 135 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +academ 1 82 2.484907 2.484907 178 +help 1 83 2.484907 2.484907 175 +requir 1 81 2.484907 2.484907 167 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +come 1 78 2.564949 2.564949 202 +exampl 1 77 2.564949 2.564949 195 +good 1 77 2.564949 2.564949 200 +mondai 1 77 2.564949 2.564949 206 +messag 1 76 2.564949 2.564949 212 +want 1 79 2.564949 2.564949 199 +solv 1 73 2.639057 2.639057 234 +line 1 75 2.639057 2.639057 231 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +syllabu 1 67 2.708050 2.708050 247 +receiv 1 66 2.708050 2.708050 244 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +think 1 57 2.890372 2.890372 314 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +februari 1 54 2.944439 2.944439 328 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +appoint 1 49 3.044522 3.044522 358 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +understand 1 47 3.091042 3.091042 384 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +directori 1 45 3.135494 3.135494 396 +fridai 1 44 3.135494 3.135494 390 +answer 1 45 3.135494 3.135494 391 +netscap 1 44 3.135494 3.135494 395 +textbook 1 44 3.135494 3.135494 397 +term 1 43 3.178054 3.178054 411 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +edit 1 42 3.218876 3.218876 418 +howev 1 41 3.218876 3.218876 422 +announc 1 40 3.258097 3.258097 441 +must 1 40 3.258097 3.258097 442 +realli 1 40 3.258097 3.258097 444 +error 1 40 3.258097 3.258097 449 +late 1 40 3.258097 3.258097 439 +programm 1 39 3.258097 3.258097 445 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +close 1 38 3.295837 3.295837 465 +microsoft 1 38 3.295837 3.295837 468 +open 1 38 3.295837 3.295837 469 +hand 1 37 3.332205 3.332205 475 +mean 1 37 3.332205 3.332205 477 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +approxim 1 35 3.401197 3.401197 509 +everi 1 34 3.401197 3.401197 519 +statist 1 35 3.401197 3.401197 521 +taught 1 33 3.433987 3.433987 526 +board 1 33 3.433987 3.433987 528 +dissert 1 32 3.465736 3.465736 549 +someth 1 31 3.496508 3.496508 554 +rang 1 30 3.555348 3.555348 565 +abl 1 30 3.555348 3.555348 566 +depend 1 29 3.583519 3.583519 583 +intend 1 28 3.610918 3.610918 599 +pass 1 28 3.610918 3.610918 611 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +relev 1 26 3.688879 3.688879 637 +consult 1 24 3.761200 3.761200 687 +lab 1 24 3.761200 3.761200 698 +tent 1 22 3.850148 3.850148 739 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +entir 1 20 3.951244 3.951244 811 +longer 1 20 3.951244 3.951244 816 +exercis 1 19 4.007333 4.007333 842 +thur 1 19 4.007333 4.007333 847 +lyco 1 19 4.007333 4.007333 871 +attempt 1 17 4.110874 4.110874 917 +weekli 1 17 4.110874 4.110874 919 +anyth 1 16 4.174387 4.174387 998 +misconduct 1 16 4.174387 4.174387 1003 +explan 1 16 4.174387 4.174387 985 +normal 1 16 4.174387 4.174387 995 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +pagec 1 15 4.248495 4.248495 1011 +fortran 1 15 4.248495 4.248495 1027 +psycholog 1 15 4.248495 4.248495 1054 +score 1 15 4.248495 4.248495 1017 +contribut 1 15 4.248495 4.248495 1021 +overhead 1 15 4.248495 4.248495 1035 +doesn 1 15 4.248495 4.248495 1055 +purchas 1 15 4.248495 4.248495 1030 +easili 1 14 4.317488 4.317488 1077 +comic 1 14 4.317488 4.317488 1103 +primarili 1 13 4.382027 4.382027 1185 +menu 1 13 4.382027 4.382027 1156 +wait 1 13 4.382027 4.382027 1168 +necessari 1 13 4.382027 4.382027 1147 +step 1 13 4.382027 4.382027 1138 +stai 1 12 4.465908 4.465908 1215 +calcul 1 12 4.465908 4.465908 1268 +outsid 1 12 4.465908 4.465908 1219 +skill 1 12 4.465908 4.465908 1205 +overal 1 12 4.465908 4.465908 1254 +vectra 1 12 4.465908 4.465908 1267 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +extra 1 11 4.553877 4.553877 1312 +regularli 1 11 4.553877 4.553877 1338 +distinguish 1 11 4.553877 4.553877 1357 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +bring 1 10 4.653960 4.653960 1430 +cheat 1 10 4.653960 4.653960 1395 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +discov 1 9 4.753590 4.753590 1562 +didn 1 9 4.753590 4.753590 1563 +seven 1 9 4.753590 4.753590 1561 +login 1 9 4.753590 4.753590 1550 +familiar 1 9 4.753590 4.753590 1485 +curv 1 8 4.875197 4.875197 1656 +risk 1 8 4.875197 4.875197 1689 +fail 1 8 4.875197 4.875197 1655 +printer 1 8 4.875197 4.875197 1621 +matter 1 8 4.875197 4.875197 1627 +hewlett 1 8 4.875197 4.875197 1709 +friedman 1 7 5.010635 5.010635 1886 +zero 1 7 5.010635 5.010635 1896 +tag 1 7 5.010635 5.010635 1821 +therefor 1 7 5.010635 5.010635 1822 +bestor 1 6 5.164786 5.164786 2099 +constitut 1 6 5.164786 5.164786 2026 +ensur 1 6 5.164786 5.164786 2012 +syntax 1 6 5.164786 5.164786 2030 +gareth 1 5 5.347108 5.347108 2392 +substitut 1 5 5.347108 5.347108 2247 +handin 1 5 5.347108 5.347108 2393 +identif 1 4 5.568345 5.568345 2773 +wear 1 4 5.568345 5.568345 2785 +trivial 1 4 5.568345 5.568345 2786 +relief 1 4 5.568345 5.568345 2784 +punctual 1 3 5.857933 5.857933 3313 +projector 1 3 5.857933 5.857933 3409 +duti 1 3 5.857933 5.857933 3317 +labyou 1 3 5.857933 5.857933 3406 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +programmingsect 1 2 6.263398 6.263398 4574 +disturb 1 2 6.263398 6.263398 4575 +lowest 1 2 6.263398 6.263398 4565 +regardless 1 2 6.263398 6.263398 4577 +pencil 1 2 6.263398 6.263398 4426 +subroutin 1 2 6.263398 6.263398 4576 +elig 1 1 6.957497 6.957497 8581 +amclick 1 1 6.957497 6.957497 8582 +unsur 1 1 6.957497 6.957497 8583 +notestext 1 1 6.957497 6.957497 8584 +koffman 1 1 6.957497 6.957497 8585 +assignmentsther 1 1 6.957497 6.957497 8586 +gradesheet 1 1 6.957497 6.957497 8587 +pmhow 1 1 6.957497 6.957497 8588 +modem 1 1 6.957497 6.957497 8589 +exerciseson 1 1 6.957497 6.957497 8590 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..37ddaa17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +name 1 72 2.639057 2.639057 220 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +publish 1 57 2.890372 2.890372 326 +week 1 52 2.995732 2.995732 343 +algebra 1 45 3.135494 3.135494 394 +compani 1 41 3.218876 3.218876 423 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +eduoffic 1 33 3.433987 3.433987 531 +ask 1 28 3.610918 3.610918 597 +valu 1 25 3.737670 3.737670 665 +known 1 24 3.761200 3.761200 702 +greg 1 24 3.761200 3.761200 695 +dai 1 22 3.850148 3.850148 753 +output 1 21 3.912023 3.912023 788 +walter 1 17 4.110874 4.110874 950 +quiz 1 16 4.174387 4.174387 990 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +dave 1 14 4.317488 4.317488 1098 +scan 1 12 4.465908 4.465908 1243 +informationemail 1 9 4.753590 4.753590 1564 +sharp 1 6 5.164786 5.164786 2100 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +prog 1 4 5.568345 5.568345 2740 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +objectivesvectra 1 3 5.857933 5.857933 3410 +homeclass 1 3 5.857933 5.857933 3411 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +burnett 1 2 6.263398 6.263398 4578 +consultantssyllabuswork 1 2 6.263398 6.263398 4579 +archivepolici 1 2 6.263398 6.263398 4580 +eggleston 1 2 6.263398 6.263398 4581 +egglestonemail 1 1 6.957497 6.957497 8591 +hourlywork 1 1 6.957497 6.957497 8592 +classread 1 1 6.957497 6.957497 8593 +gradeshomeworkexam 1 1 6.957497 6.957497 8594 +quizzesmiscellan 1 1 6.957497 6.957497 8595 +policytextproblem 1 1 6.957497 6.957497 8596 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..b171ca98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +hour 1 165 1.791759 1.791759 46 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +need 1 98 2.302585 2.302585 135 +peopl 1 96 2.302585 2.302585 132 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +requir 1 81 2.484907 2.484907 167 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +activ 1 84 2.484907 2.484907 182 +exam 1 86 2.484907 2.484907 169 +second 1 81 2.484907 2.484907 166 +come 1 78 2.564949 2.564949 202 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +differ 1 66 2.708050 2.708050 253 +window 1 68 2.708050 2.708050 242 +import 1 65 2.772589 2.772589 282 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +detail 1 57 2.890372 2.890372 321 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +cover 1 55 2.944439 2.944439 329 +suggest 1 53 2.944439 2.944439 331 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +give 1 50 3.044522 3.044522 359 +appoint 1 49 3.044522 3.044522 358 +discuss 1 45 3.135494 3.135494 399 +review 1 42 3.218876 3.218876 425 +prototyp 1 38 3.295837 3.295837 463 +slide 1 38 3.295837 3.295837 467 +hand 1 37 3.332205 3.332205 475 +either 1 35 3.401197 3.401197 506 +compon 1 30 3.555348 3.555348 570 +secur 1 30 3.555348 3.555348 577 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +propos 1 28 3.610918 3.610918 602 +measur 1 28 3.610918 3.610918 609 +linux 1 27 3.637586 3.637586 631 +team 1 27 3.637586 3.637586 625 +relev 1 26 3.688879 3.688879 637 +instead 1 22 3.850148 3.850148 756 +tent 1 22 3.850148 3.850148 739 +benchmark 1 19 4.007333 4.007333 859 +encourag 1 18 4.060443 4.060443 880 +protect 1 17 4.110874 4.110874 935 +choos 1 16 4.174387 4.174387 964 +purchas 1 15 4.248495 4.248495 1030 +classic 1 14 4.317488 4.317488 1084 +doit 1 14 4.317488 4.317488 1111 +suit 1 13 4.382027 4.382027 1129 +solari 1 12 4.465908 4.465908 1238 +count 1 12 4.465908 4.465908 1239 +broad 1 11 4.553877 4.553877 1302 +strongli 1 10 4.653960 4.653960 1406 +total 1 10 4.653960 4.653960 1398 +theme 1 8 4.875197 4.875197 1707 +formerli 1 5 5.347108 5.347108 2397 +exposur 1 4 5.568345 5.568345 2598 +suno 1 4 5.568345 5.568345 2790 +advancedoper 1 3 5.857933 5.857933 3403 +focal 1 3 5.857933 5.857933 3404 +macc 1 3 5.857933 5.857933 3414 +ofvari 1 2 6.263398 6.263398 4582 +anexperiment 1 2 6.263398 6.263398 4299 +halloffic 1 2 6.263398 6.263398 4583 +deskfor 1 2 6.263398 6.263398 4584 +performanceof 1 2 6.263398 6.263398 4585 +topicsinclud 1 1 6.957497 6.957497 8597 +tochoos 1 1 6.957497 6.957497 8598 +rathera 1 1 6.957497 6.957497 8599 +manya 1 1 6.957497 6.957497 8600 +assig 1 1 6.957497 6.957497 8601 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..08d34b21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +introduct 1 126 2.079442 2.079442 87 +need 1 98 2.302585 2.302585 135 +section 1 94 2.397895 2.397895 149 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +simpl 1 60 2.833213 2.833213 298 +cover 1 55 2.944439 2.944439 329 +basic 1 50 3.044522 3.044522 360 +credit 1 38 3.295837 3.295837 460 +michael 1 35 3.401197 3.401197 514 +jeff 1 25 3.737670 3.737670 673 +half 1 21 3.912023 3.912023 776 +martin 1 21 3.912023 3.912023 794 +prepar 1 20 3.951244 3.951244 824 +fortran 1 15 4.248495 4.248495 1027 +essenti 1 13 4.382027 4.382027 1137 +russel 1 9 4.753590 4.753590 1507 +suffici 1 7 5.010635 5.010635 1897 +lampert 1 5 5.347108 5.347108 2398 +birk 1 4 5.568345 5.568345 2791 +anthoni 1 4 5.568345 5.568345 2792 +toni 1 3 5.857933 5.857933 3415 +hummert 1 3 5.857933 5.857933 3416 +man 1 3 5.857933 5.857933 3417 +silva 1 2 6.263398 6.263398 4586 +sidnei 1 2 6.263398 6.263398 4587 +programmingstructur 1 1 6.957497 6.957497 8602 +elementaryengin 1 1 6.957497 6.957497 8603 +enableth 1 1 6.957497 6.957497 8604 +inelementari 1 1 6.957497 6.957497 8605 +reameslast 1 1 6.957497 6.957497 8606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..9c21169d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +databas 1 122 2.079442 2.079442 86 +teach 1 108 2.197225 2.197225 112 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +name 1 72 2.639057 2.639057 220 +wednesdai 1 64 2.772589 2.772589 261 +dept 1 64 2.772589 2.772589 291 +virtual 1 62 2.772589 2.772589 285 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +hardwar 1 51 2.995732 2.995732 350 +maintain 1 51 2.995732 2.995732 342 +appoint 1 49 3.044522 3.044522 358 +cool 1 49 3.044522 3.044522 374 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +origin 1 38 3.295837 3.295837 472 +especi 1 36 3.367296 3.367296 496 +jame 1 35 3.401197 3.401197 507 +yahoo 1 24 3.761200 3.761200 707 +andrew 1 19 4.007333 4.007333 849 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +borland 1 14 4.317488 4.317488 1067 +whose 1 13 4.382027 4.382027 1166 +gupta 1 12 4.465908 4.465908 1241 +packard 1 10 4.653960 4.653960 1444 +classifi 1 9 4.753590 4.753590 1537 +hewlett 1 8 4.875197 4.875197 1709 +tourist 1 8 4.875197 4.875197 1710 +chiang 1 7 5.010635 5.010635 1853 +sciencesoffic 1 6 5.164786 5.164786 2101 +mother 1 6 5.164786 5.164786 2083 +alphabet 1 6 5.164786 5.164786 1980 +teitelbaum 1 6 5.164786 5.164786 2102 +categori 1 5 5.347108 5.347108 2261 +lookup 1 5 5.347108 5.347108 2399 +kelli 1 4 5.568345 5.568345 2793 +nathan 1 4 5.568345 5.568345 2794 +ratliff 1 3 5.857933 5.857933 3419 +bockrath 1 3 5.857933 5.857933 3420 +fink 1 3 5.857933 5.857933 3425 +ashraf 1 3 5.857933 5.857933 3421 +aboulnaga 1 3 5.857933 5.857933 3426 +geeri 1 3 5.857933 5.857933 3422 +jherro 1 3 5.857933 5.857933 3427 +abhinav 1 3 5.857933 5.857933 3428 +agupta 1 3 5.857933 5.857933 3429 +jyothi 1 3 5.857933 5.857933 3423 +suhui 1 3 5.857933 5.857933 3430 +thano 1 3 5.857933 5.857933 3424 +tsioli 1 3 5.857933 5.857933 3418 +enorm 1 3 5.857933 5.857933 3431 +keyinstructorprofessor 1 2 6.263398 6.263398 4589 +desautelsoffic 1 2 6.263398 6.263398 4590 +assistantsfollow 1 2 6.263398 6.263398 4591 +rehnuma 1 2 6.263398 6.263398 4588 +rahman 1 2 6.263398 6.263398 4592 +jaim 1 2 6.263398 6.263398 4593 +jfink 1 2 6.263398 6.263398 4594 +herro 1 2 6.263398 6.263398 4595 +krothap 1 2 6.263398 6.263398 4596 +gradesexplor 1 2 6.263398 6.263398 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..9c21169d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +databas 1 122 2.079442 2.079442 86 +teach 1 108 2.197225 2.197225 112 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +name 1 72 2.639057 2.639057 220 +wednesdai 1 64 2.772589 2.772589 261 +dept 1 64 2.772589 2.772589 291 +virtual 1 62 2.772589 2.772589 285 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +hardwar 1 51 2.995732 2.995732 350 +maintain 1 51 2.995732 2.995732 342 +appoint 1 49 3.044522 3.044522 358 +cool 1 49 3.044522 3.044522 374 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +origin 1 38 3.295837 3.295837 472 +especi 1 36 3.367296 3.367296 496 +jame 1 35 3.401197 3.401197 507 +yahoo 1 24 3.761200 3.761200 707 +andrew 1 19 4.007333 4.007333 849 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +borland 1 14 4.317488 4.317488 1067 +whose 1 13 4.382027 4.382027 1166 +gupta 1 12 4.465908 4.465908 1241 +packard 1 10 4.653960 4.653960 1444 +classifi 1 9 4.753590 4.753590 1537 +hewlett 1 8 4.875197 4.875197 1709 +tourist 1 8 4.875197 4.875197 1710 +chiang 1 7 5.010635 5.010635 1853 +sciencesoffic 1 6 5.164786 5.164786 2101 +mother 1 6 5.164786 5.164786 2083 +alphabet 1 6 5.164786 5.164786 1980 +teitelbaum 1 6 5.164786 5.164786 2102 +categori 1 5 5.347108 5.347108 2261 +lookup 1 5 5.347108 5.347108 2399 +kelli 1 4 5.568345 5.568345 2793 +nathan 1 4 5.568345 5.568345 2794 +ratliff 1 3 5.857933 5.857933 3419 +bockrath 1 3 5.857933 5.857933 3420 +fink 1 3 5.857933 5.857933 3425 +ashraf 1 3 5.857933 5.857933 3421 +aboulnaga 1 3 5.857933 5.857933 3426 +geeri 1 3 5.857933 5.857933 3422 +jherro 1 3 5.857933 5.857933 3427 +abhinav 1 3 5.857933 5.857933 3428 +agupta 1 3 5.857933 5.857933 3429 +jyothi 1 3 5.857933 5.857933 3423 +suhui 1 3 5.857933 5.857933 3430 +thano 1 3 5.857933 5.857933 3424 +tsioli 1 3 5.857933 5.857933 3418 +enorm 1 3 5.857933 5.857933 3431 +keyinstructorprofessor 1 2 6.263398 6.263398 4589 +desautelsoffic 1 2 6.263398 6.263398 4590 +assistantsfollow 1 2 6.263398 6.263398 4591 +rehnuma 1 2 6.263398 6.263398 4588 +rahman 1 2 6.263398 6.263398 4592 +jaim 1 2 6.263398 6.263398 4593 +jfink 1 2 6.263398 6.263398 4594 +herro 1 2 6.263398 6.263398 4595 +krothap 1 2 6.263398 6.263398 4596 +gradesexplor 1 2 6.263398 6.263398 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..5913b238 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +lectur 1 135 1.945910 1.945910 73 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +assign 1 135 1.945910 1.945910 66 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +exam 1 86 2.484907 2.484907 169 +contain 1 81 2.484907 2.484907 174 +issu 1 78 2.564949 2.564949 211 +tuesdai 1 73 2.639057 2.639057 219 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +syllabu 1 67 2.708050 2.708050 247 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +point 1 58 2.890372 2.890372 319 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +electron 1 47 3.091042 3.091042 379 +discuss 1 45 3.135494 3.135494 399 +netscap 1 44 3.135494 3.135494 395 +term 1 43 3.178054 3.178054 411 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +word 1 34 3.401197 3.401197 508 +eduoffic 1 33 3.433987 3.433987 531 +taught 1 33 3.433987 3.433987 526 +storag 1 31 3.496508 3.496508 553 +compon 1 30 3.555348 3.555348 570 +held 1 28 3.610918 3.610918 600 +background 1 25 3.737670 3.737670 664 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +newsgroup 1 21 3.912023 3.912023 783 +expert 1 20 3.951244 3.951244 833 +qualiti 1 20 3.951244 3.951244 832 +excel 1 19 4.007333 4.007333 868 +macintosh 1 17 4.110874 4.110874 920 +regular 1 17 4.110874 4.110874 929 +devic 1 16 4.174387 4.174387 1002 +draw 1 14 4.317488 4.317488 1086 +social 1 13 4.382027 4.382027 1123 +necessari 1 13 4.382027 4.382027 1147 +quizz 1 13 4.382027 4.382027 1151 +skill 1 12 4.465908 4.465908 1205 +desktop 1 10 4.653960 4.653960 1445 +rich 1 10 4.653960 4.653960 1396 +telecommun 1 9 4.753590 4.753590 1565 +zero 1 7 5.010635 5.010635 1896 +shot 1 7 5.010635 5.010635 1898 +necessarili 1 7 5.010635 5.010635 1899 +thegoal 1 6 5.164786 5.164786 2033 +lloyd 1 6 5.164786 5.164786 2103 +paint 1 5 5.347108 5.347108 2400 +bodner 1 5 5.347108 5.347108 2401 +chart 1 4 5.568345 5.568345 2653 +assignmentsand 1 4 5.568345 5.568345 2760 +glanc 1 4 5.568345 5.568345 2652 +salli 1 3 5.857933 5.857933 3432 +facstaff 1 3 5.857933 5.857933 3433 +drag 1 3 5.857933 5.857933 3434 +crack 1 3 5.857933 5.857933 3435 +macintoshcomput 1 3 5.857933 5.857933 3228 +iici 1 3 5.857933 5.857933 3436 +scanner 1 3 5.857933 5.857933 3437 +leavi 1 3 5.857933 5.857933 3438 +sharenow 1 3 5.857933 5.857933 3439 +swander 1 3 5.857933 5.857933 3440 +thayer 1 3 5.857933 5.857933 3441 +varghes 1 3 5.857933 5.857933 3442 +weinberg 1 3 5.857933 5.857933 3443 +computersinstructor 1 2 6.263398 6.263398 4600 +petersonoffic 1 2 6.263398 6.263398 4601 +sciencephon 1 2 6.263398 6.263398 4602 +slpeter 1 2 6.263398 6.263398 4603 +appointmentvit 1 2 6.263398 6.263398 4604 +halllectur 1 2 6.263398 6.263398 4605 +laudon 1 2 6.263398 6.263398 4606 +traver 1 2 6.263398 6.263398 4607 +laudonlab 1 2 6.263398 6.263398 4608 +petersoncours 1 2 6.263398 6.263398 4609 +computersto 1 2 6.263398 6.263398 4610 +throughcolleg 1 2 6.263398 6.263398 4611 +arena 1 2 6.263398 6.263398 4612 +csuse 1 2 6.263398 6.263398 4613 +spreadsheet 1 2 6.263398 6.263398 4598 +experienceon 1 2 6.263398 6.263398 4614 +eudora 1 2 6.263398 6.263398 4615 +aldu 1 2 6.263398 6.263398 4599 +superpaint 1 2 6.263398 6.263398 4616 +filemak 1 2 6.263398 6.263398 4617 +hypercard 1 2 6.263398 6.263398 4618 +pagemak 1 2 6.263398 6.263398 4619 +educationalexperi 1 2 6.263398 6.263398 4620 +namesectiontimedai 1 2 6.263398 6.263398 4621 +mwnick 1 2 6.263398 6.263398 4622 +mwtrshannon 1 2 6.263398 6.263398 4623 +trtrjeff 1 2 6.263398 6.263398 4624 +reminga 1 2 6.263398 6.263398 4625 +mwfmwira 1 2 6.263398 6.263398 4626 +trtrbrian 1 2 6.263398 6.263398 4627 +mwfmwfbrad 1 2 6.263398 6.263398 4628 +mwfmwfjoe 1 2 6.263398 6.263398 4629 +trtrgeoff 1 2 6.263398 6.263398 4630 +mwftrmaria 1 2 6.263398 6.263398 4631 +yuin 1 2 6.263398 6.263398 4632 +mwfmwrecommend 1 2 6.263398 6.263398 4633 +nitti 1 2 6.263398 6.263398 4634 +gritti 1 2 6.263398 6.263398 4635 +superpaintassign 1 2 6.263398 6.263398 4636 +excellast 1 2 6.263398 6.263398 4637 +jonbodn 1 2 6.263398 6.263398 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..5913b238 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +lectur 1 135 1.945910 1.945910 73 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +assign 1 135 1.945910 1.945910 66 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +exam 1 86 2.484907 2.484907 169 +contain 1 81 2.484907 2.484907 174 +issu 1 78 2.564949 2.564949 211 +tuesdai 1 73 2.639057 2.639057 219 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +syllabu 1 67 2.708050 2.708050 247 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +point 1 58 2.890372 2.890372 319 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +electron 1 47 3.091042 3.091042 379 +discuss 1 45 3.135494 3.135494 399 +netscap 1 44 3.135494 3.135494 395 +term 1 43 3.178054 3.178054 411 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +word 1 34 3.401197 3.401197 508 +eduoffic 1 33 3.433987 3.433987 531 +taught 1 33 3.433987 3.433987 526 +storag 1 31 3.496508 3.496508 553 +compon 1 30 3.555348 3.555348 570 +held 1 28 3.610918 3.610918 600 +background 1 25 3.737670 3.737670 664 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +newsgroup 1 21 3.912023 3.912023 783 +expert 1 20 3.951244 3.951244 833 +qualiti 1 20 3.951244 3.951244 832 +excel 1 19 4.007333 4.007333 868 +macintosh 1 17 4.110874 4.110874 920 +regular 1 17 4.110874 4.110874 929 +devic 1 16 4.174387 4.174387 1002 +draw 1 14 4.317488 4.317488 1086 +social 1 13 4.382027 4.382027 1123 +necessari 1 13 4.382027 4.382027 1147 +quizz 1 13 4.382027 4.382027 1151 +skill 1 12 4.465908 4.465908 1205 +desktop 1 10 4.653960 4.653960 1445 +rich 1 10 4.653960 4.653960 1396 +telecommun 1 9 4.753590 4.753590 1565 +zero 1 7 5.010635 5.010635 1896 +shot 1 7 5.010635 5.010635 1898 +necessarili 1 7 5.010635 5.010635 1899 +thegoal 1 6 5.164786 5.164786 2033 +lloyd 1 6 5.164786 5.164786 2103 +paint 1 5 5.347108 5.347108 2400 +bodner 1 5 5.347108 5.347108 2401 +chart 1 4 5.568345 5.568345 2653 +assignmentsand 1 4 5.568345 5.568345 2760 +glanc 1 4 5.568345 5.568345 2652 +salli 1 3 5.857933 5.857933 3432 +facstaff 1 3 5.857933 5.857933 3433 +drag 1 3 5.857933 5.857933 3434 +crack 1 3 5.857933 5.857933 3435 +macintoshcomput 1 3 5.857933 5.857933 3228 +iici 1 3 5.857933 5.857933 3436 +scanner 1 3 5.857933 5.857933 3437 +leavi 1 3 5.857933 5.857933 3438 +sharenow 1 3 5.857933 5.857933 3439 +swander 1 3 5.857933 5.857933 3440 +thayer 1 3 5.857933 5.857933 3441 +varghes 1 3 5.857933 5.857933 3442 +weinberg 1 3 5.857933 5.857933 3443 +computersinstructor 1 2 6.263398 6.263398 4600 +petersonoffic 1 2 6.263398 6.263398 4601 +sciencephon 1 2 6.263398 6.263398 4602 +slpeter 1 2 6.263398 6.263398 4603 +appointmentvit 1 2 6.263398 6.263398 4604 +halllectur 1 2 6.263398 6.263398 4605 +laudon 1 2 6.263398 6.263398 4606 +traver 1 2 6.263398 6.263398 4607 +laudonlab 1 2 6.263398 6.263398 4608 +petersoncours 1 2 6.263398 6.263398 4609 +computersto 1 2 6.263398 6.263398 4610 +throughcolleg 1 2 6.263398 6.263398 4611 +arena 1 2 6.263398 6.263398 4612 +csuse 1 2 6.263398 6.263398 4613 +spreadsheet 1 2 6.263398 6.263398 4598 +experienceon 1 2 6.263398 6.263398 4614 +eudora 1 2 6.263398 6.263398 4615 +aldu 1 2 6.263398 6.263398 4599 +superpaint 1 2 6.263398 6.263398 4616 +filemak 1 2 6.263398 6.263398 4617 +hypercard 1 2 6.263398 6.263398 4618 +pagemak 1 2 6.263398 6.263398 4619 +educationalexperi 1 2 6.263398 6.263398 4620 +namesectiontimedai 1 2 6.263398 6.263398 4621 +mwnick 1 2 6.263398 6.263398 4622 +mwtrshannon 1 2 6.263398 6.263398 4623 +trtrjeff 1 2 6.263398 6.263398 4624 +reminga 1 2 6.263398 6.263398 4625 +mwfmwira 1 2 6.263398 6.263398 4626 +trtrbrian 1 2 6.263398 6.263398 4627 +mwfmwfbrad 1 2 6.263398 6.263398 4628 +mwfmwfjoe 1 2 6.263398 6.263398 4629 +trtrgeoff 1 2 6.263398 6.263398 4630 +mwftrmaria 1 2 6.263398 6.263398 4631 +yuin 1 2 6.263398 6.263398 4632 +mwfmwrecommend 1 2 6.263398 6.263398 4633 +nitti 1 2 6.263398 6.263398 4634 +gritti 1 2 6.263398 6.263398 4635 +superpaintassign 1 2 6.263398 6.263398 4636 +excellast 1 2 6.263398 6.263398 4637 +jonbodn 1 2 6.263398 6.263398 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..092999d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +compil 1 122 2.079442 2.079442 96 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +overview 1 56 2.890372 2.890372 323 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +algebra 1 45 3.135494 3.135494 394 +netscap 1 44 3.135494 3.135494 395 +offer 1 43 3.178054 3.178054 414 +microsoft 1 38 3.295837 3.295837 468 +ask 1 28 3.610918 3.610918 597 +jeff 1 25 3.737670 3.737670 673 +consult 1 24 3.761200 3.761200 687 +feedback 1 19 4.007333 4.007333 854 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +coordin 1 13 4.382027 4.382027 1182 +savitch 1 12 4.465908 4.465908 1269 +mainli 1 10 4.653960 4.653960 1432 +hint 1 10 4.653960 4.653960 1419 +tutor 1 9 4.753590 4.753590 1552 +pagecomput 1 7 5.010635 5.010635 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 1 5 5.347108 5.347108 2398 +complaint 1 4 5.568345 5.568345 2795 +microcomput 1 3 5.857933 5.857933 3444 +instructorsw 1 2 6.263398 6.263398 4639 +csinform 1 2 6.263398 6.263398 4640 +subdirectoriesc 1 2 6.263398 6.263398 4641 +environmentfortran 1 2 6.263398 6.263398 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..27f7262b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +algorithm 1 162 1.791759 1.791759 57 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +instructor 1 108 2.197225 2.197225 107 +advanc 1 99 2.302585 2.302585 130 +techniqu 1 99 2.302585 2.302585 138 +school 1 84 2.484907 2.484907 188 +orient 1 80 2.564949 2.564949 205 +solv 1 73 2.639057 2.639057 234 +logic 1 71 2.639057 2.639057 230 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +instruct 1 53 2.944439 2.944439 332 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +least 1 35 3.401197 3.401197 516 +survei 1 35 3.401197 3.401197 513 +statist 1 35 3.401197 3.401197 521 +prepar 1 20 3.951244 3.951244 824 +fortran 1 15 4.248495 4.248495 1027 +pascal 1 12 4.465908 4.465908 1213 +consent 1 5 5.347108 5.347108 2389 +prereq 1 3 5.857933 5.857933 3178 +infocours 1 2 6.263398 6.263398 4212 +guidebook 1 2 6.263398 6.263398 4643 +cscours 1 1 6.957497 6.957497 8607 +descriptionfrom 1 1 6.957497 6.957497 8608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..092999d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +compil 1 122 2.079442 2.079442 96 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +overview 1 56 2.890372 2.890372 323 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +algebra 1 45 3.135494 3.135494 394 +netscap 1 44 3.135494 3.135494 395 +offer 1 43 3.178054 3.178054 414 +microsoft 1 38 3.295837 3.295837 468 +ask 1 28 3.610918 3.610918 597 +jeff 1 25 3.737670 3.737670 673 +consult 1 24 3.761200 3.761200 687 +feedback 1 19 4.007333 4.007333 854 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +coordin 1 13 4.382027 4.382027 1182 +savitch 1 12 4.465908 4.465908 1269 +mainli 1 10 4.653960 4.653960 1432 +hint 1 10 4.653960 4.653960 1419 +tutor 1 9 4.753590 4.753590 1552 +pagecomput 1 7 5.010635 5.010635 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 1 5 5.347108 5.347108 2398 +complaint 1 4 5.568345 5.568345 2795 +microcomput 1 3 5.857933 5.857933 3444 +instructorsw 1 2 6.263398 6.263398 4639 +csinform 1 2 6.263398 6.263398 4640 +subdirectoriesc 1 2 6.263398 6.263398 4641 +environmentfortran 1 2 6.263398 6.263398 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..270e2c41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +solv 1 73 2.639057 2.639057 234 +servic 1 72 2.639057 2.639057 236 +syllabu 1 67 2.708050 2.708050 247 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +direct 1 57 2.890372 2.890372 316 +suggest 1 53 2.944439 2.944439 331 +local 1 55 2.944439 2.944439 334 +directori 1 45 3.135494 3.135494 396 +examin 1 42 3.218876 3.218876 424 +past 1 42 3.218876 3.218876 428 +obtain 1 33 3.433987 3.433987 534 +lab 1 24 3.761200 3.761200 698 +viewer 1 21 3.912023 3.912023 787 +explan 1 16 4.174387 4.174387 985 +readm 1 8 4.875197 4.875197 1699 +ghost 1 2 6.263398 6.263398 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..270e2c41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +solv 1 73 2.639057 2.639057 234 +servic 1 72 2.639057 2.639057 236 +syllabu 1 67 2.708050 2.708050 247 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +direct 1 57 2.890372 2.890372 316 +suggest 1 53 2.944439 2.944439 331 +local 1 55 2.944439 2.944439 334 +directori 1 45 3.135494 3.135494 396 +examin 1 42 3.218876 3.218876 424 +past 1 42 3.218876 3.218876 428 +obtain 1 33 3.433987 3.433987 534 +lab 1 24 3.761200 3.761200 698 +viewer 1 21 3.912023 3.912023 787 +explan 1 16 4.174387 4.174387 985 +readm 1 8 4.875197 4.875197 1699 +ghost 1 2 6.263398 6.263398 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..1f0deee3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +book 1 99 2.302585 2.302585 131 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +octob 1 89 2.397895 2.397895 156 +graphic 1 90 2.397895 2.397895 147 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +html 1 75 2.639057 2.639057 235 +simul 1 66 2.708050 2.708050 255 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +septemb 1 65 2.772589 2.772589 274 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +case 1 51 2.995732 2.995732 351 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +featur 1 46 3.091042 3.091042 386 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +programm 1 39 3.258097 3.258097 445 +probabl 1 40 3.258097 3.258097 455 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +manual 1 35 3.401197 3.401197 504 +represent 1 35 3.401197 3.401197 512 +chapter 1 32 3.465736 3.465736 536 +ask 1 28 3.610918 3.610918 597 +except 1 28 3.610918 3.610918 607 +revis 1 26 3.688879 3.688879 640 +miller 1 17 4.110874 4.110874 949 +regist 1 17 4.110874 4.110874 938 +quiz 1 16 4.174387 4.174387 990 +psycholog 1 15 4.248495 4.248495 1054 +quizz 1 13 4.382027 4.382027 1151 +calcul 1 12 4.465908 4.465908 1268 +assembl 1 12 4.465908 4.465908 1207 +arithmet 1 10 4.653960 4.653960 1388 +tuth 1 9 4.753590 4.753590 1519 +float 1 9 4.753590 4.753590 1504 +integ 1 8 4.875197 4.875197 1688 +lookup 1 5 5.347108 5.347108 2399 +cancel 1 4 5.568345 5.568345 2746 +karen 1 4 5.568345 5.568345 2796 +jerri 1 3 5.857933 5.857933 3445 +suen 1 3 5.857933 5.857933 3446 +asgarian 1 3 5.857933 5.857933 3447 +architecur 1 3 5.857933 5.857933 3448 +tusch 1 2 6.263398 6.263398 4645 +tutsch 1 2 6.263398 6.263398 4646 +execpc 1 2 6.263398 6.263398 4647 +nolandsect 1 2 6.263398 6.263398 4648 +smoler 1 2 6.263398 6.263398 4649 +sunlung 1 2 6.263398 6.263398 4650 +ssuen 1 2 6.263398 6.263398 4651 +edusridevi 1 2 6.263398 6.263398 4652 +bhamidipati 1 2 6.263398 6.263398 4653 +bsri 1 2 6.263398 6.263398 4654 +edumohammad 1 2 6.263398 6.263398 4655 +programs 1 2 6.263398 6.263398 4656 +examsal 1 2 6.263398 6.263398 4657 +noteskaren 1 2 6.263398 6.263398 4658 +updatedmondai 1 2 6.263398 6.263398 4659 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..14802141 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +book 1 99 2.302585 2.302585 131 +advanc 1 99 2.302585 2.302585 130 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +octob 1 89 2.397895 2.397895 156 +graphic 1 90 2.397895 2.397895 147 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +html 1 75 2.639057 2.639057 235 +simul 1 66 2.708050 2.708050 255 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +septemb 1 65 2.772589 2.772589 274 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +case 1 51 2.995732 2.995732 351 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +featur 1 46 3.091042 3.091042 386 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +probabl 1 40 3.258097 3.258097 455 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +manual 1 35 3.401197 3.401197 504 +represent 1 35 3.401197 3.401197 512 +chapter 1 32 3.465736 3.465736 536 +option 1 30 3.555348 3.555348 575 +ask 1 28 3.610918 3.610918 597 +except 1 28 3.610918 3.610918 607 +revis 1 26 3.688879 3.688879 640 +miller 1 17 4.110874 4.110874 949 +regist 1 17 4.110874 4.110874 938 +quiz 1 16 4.174387 4.174387 990 +sign 1 16 4.174387 4.174387 970 +psycholog 1 15 4.248495 4.248495 1054 +quizz 1 13 4.382027 4.382027 1151 +calcul 1 12 4.465908 4.465908 1268 +assembl 1 12 4.465908 4.465908 1207 +arithmet 1 10 4.653960 4.653960 1388 +tuth 1 9 4.753590 4.753590 1519 +float 1 9 4.753590 4.753590 1504 +rais 1 8 4.875197 4.875197 1711 +integ 1 8 4.875197 4.875197 1688 +difficult 1 6 5.164786 5.164786 2035 +lookup 1 5 5.347108 5.347108 2399 +cancel 1 4 5.568345 5.568345 2746 +karen 1 4 5.568345 5.568345 2796 +jerri 1 3 5.857933 5.857933 3445 +suen 1 3 5.857933 5.857933 3446 +asgarian 1 3 5.857933 5.857933 3447 +architecur 1 3 5.857933 5.857933 3448 +tusch 1 2 6.263398 6.263398 4645 +tutsch 1 2 6.263398 6.263398 4646 +execpc 1 2 6.263398 6.263398 4647 +nolandsect 1 2 6.263398 6.263398 4648 +smoler 1 2 6.263398 6.263398 4649 +sunlung 1 2 6.263398 6.263398 4650 +ssuen 1 2 6.263398 6.263398 4651 +edusridevi 1 2 6.263398 6.263398 4652 +bhamidipati 1 2 6.263398 6.263398 4653 +bsri 1 2 6.263398 6.263398 4654 +edumohammad 1 2 6.263398 6.263398 4655 +programs 1 2 6.263398 6.263398 4656 +examsal 1 2 6.263398 6.263398 4657 +noteskaren 1 2 6.263398 6.263398 4658 +updatedmondai 1 2 6.263398 6.263398 4659 +programa 1 1 6.957497 6.957497 8609 +programb 1 1 6.957497 6.957497 8610 +cumul 1 1 6.957497 6.957497 8611 +desperateto 1 1 6.957497 6.957497 8612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..f16988d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +oper 1 180 1.609438 1.609438 34 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +need 1 98 2.302585 2.302585 135 +memori 1 101 2.302585 2.302585 139 +search 1 95 2.397895 2.397895 155 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +dynam 1 76 2.564949 2.564949 194 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +simul 1 66 2.708050 2.708050 255 +copi 1 63 2.772589 2.772589 284 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +sampl 1 53 2.944439 2.944439 339 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +pointer 1 48 3.044522 3.044522 368 +basic 1 50 3.044522 3.044522 360 +get 1 46 3.091042 3.091042 380 +done 1 47 3.091042 3.091042 381 +directori 1 45 3.135494 3.135494 396 +discuss 1 45 3.135494 3.135494 399 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +tree 1 36 3.367296 3.367296 492 +either 1 35 3.401197 3.401197 506 +jame 1 35 3.401197 3.401197 507 +chapter 1 32 3.465736 3.465736 536 +common 1 30 3.555348 3.555348 574 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +becom 1 28 3.610918 3.610918 603 +lab 1 24 3.761200 3.761200 698 +sort 1 22 3.850148 3.850148 738 +binari 1 20 3.951244 3.951244 823 +reserv 1 20 3.951244 3.951244 808 +alloc 1 20 3.951244 3.951244 821 +thur 1 19 4.007333 4.007333 847 +comparison 1 19 4.007333 4.007333 863 +attend 1 18 4.060443 4.060443 893 +otherwis 1 17 4.110874 4.110874 922 +balanc 1 14 4.317488 4.317488 1112 +recurs 1 13 4.382027 4.382027 1127 +magic 1 11 4.553877 4.553877 1358 +wendt 1 10 4.653960 4.653960 1446 +queue 1 10 4.653960 4.653960 1386 +stack 1 10 4.653960 4.653960 1389 +cheng 1 10 4.653960 4.653960 1381 +kurt 1 9 4.753590 4.753590 1548 +unusu 1 9 4.753590 4.753590 1566 +forget 1 8 4.875197 4.875197 1712 +reload 1 8 4.875197 4.875197 1682 +hash 1 8 4.875197 4.875197 1618 +skrentni 1 6 5.164786 5.164786 2104 +handin 1 5 5.347108 5.347108 2393 +skip 1 5 5.347108 5.347108 2402 +overload 1 5 5.347108 5.347108 2403 +billi 1 5 5.347108 5.347108 2404 +outdat 1 4 5.568345 5.568345 2797 +appendix 1 4 5.568345 5.568345 2739 +makeup 1 3 5.857933 5.857933 3449 +vega 1 3 5.857933 5.857933 3450 +lec 1 2 6.263398 6.263398 4661 +structureslectur 1 2 6.263398 6.263398 4662 +psychologylectur 1 2 6.263398 6.263398 4663 +psychologycours 1 2 6.263398 6.263398 4664 +stale 1 2 6.263398 6.263398 4660 +baicheng 1 2 6.263398 6.263398 4665 +liao 1 2 6.263398 6.263398 4666 +bail 1 2 6.263398 6.263398 4667 +jiacheng 1 2 6.263398 6.263398 4668 +pmcopyright 1 2 6.263398 6.263398 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..c91ecbe2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,549 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +read 1 154 1.791759 1.791759 47 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +document 1 121 2.079442 2.079442 89 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +welcom 1 122 2.079442 2.079442 99 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +peopl 1 96 2.302585 2.302585 132 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +search 1 95 2.397895 2.397895 155 +call 1 91 2.397895 2.397895 153 +follow 1 92 2.397895 2.397895 143 +octob 1 89 2.397895 2.397895 156 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +wide 1 84 2.484907 2.484907 185 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +activ 1 84 2.484907 2.484907 182 +contain 1 81 2.484907 2.484907 174 +stuff 1 87 2.484907 2.484907 171 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +thing 1 84 2.484907 2.484907 189 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +decemb 1 80 2.564949 2.564949 215 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +state 1 76 2.564949 2.564949 207 +appear 1 78 2.564949 2.564949 210 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +david 1 71 2.639057 2.639057 232 +addit 1 74 2.639057 2.639057 228 +effici 1 73 2.639057 2.639057 233 +name 1 72 2.639057 2.639057 220 +line 1 75 2.639057 2.639057 231 +appli 1 71 2.639057 2.639057 226 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +goal 1 66 2.708050 2.708050 250 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +main 1 67 2.708050 2.708050 256 +new 1 64 2.772589 2.772589 262 +polici 1 64 2.772589 2.772589 279 +abstract 1 62 2.772589 2.772589 276 +written 1 63 2.772589 2.772589 278 +street 1 63 2.772589 2.772589 293 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +descript 1 64 2.772589 2.772589 271 +previou 1 62 2.772589 2.772589 290 +result 1 65 2.772589 2.772589 281 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +room 1 59 2.833213 2.833213 301 +share 1 59 2.833213 2.833213 304 +best 1 59 2.833213 2.833213 299 +type 1 61 2.833213 2.833213 296 +unix 1 58 2.890372 2.890372 308 +faculti 1 56 2.890372 2.890372 325 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +special 1 56 2.890372 2.890372 320 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +sampl 1 53 2.944439 2.944439 339 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +cover 1 55 2.944439 2.944439 329 +suggest 1 53 2.944439 2.944439 331 +local 1 55 2.944439 2.944439 334 +instruct 1 53 2.944439 2.944439 332 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +appoint 1 49 3.044522 3.044522 358 +still 1 50 3.044522 3.044522 362 +basic 1 50 3.044522 3.044522 360 +right 1 48 3.044522 3.044522 363 +numer 1 49 3.044522 3.044522 369 +give 1 50 3.044522 3.044522 359 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +possibl 1 47 3.091042 3.091042 378 +understand 1 47 3.091042 3.091042 384 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +midterm 1 45 3.135494 3.135494 392 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +made 1 44 3.135494 3.135494 398 +describ 1 45 3.135494 3.135494 400 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +howev 1 41 3.218876 3.218876 422 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +must 1 40 3.258097 3.258097 442 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +tutori 1 39 3.258097 3.258097 437 +error 1 40 3.258097 3.258097 449 +open 1 38 3.295837 3.295837 469 +correct 1 38 3.295837 3.295837 462 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +respons 1 37 3.332205 3.332205 476 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +download 1 36 3.367296 3.367296 489 +soon 1 36 3.367296 3.367296 494 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +either 1 35 3.401197 3.401197 506 +approxim 1 35 3.401197 3.401197 509 +everi 1 34 3.401197 3.401197 519 +singl 1 34 3.401197 3.401197 510 +next 1 34 3.401197 3.401197 517 +product 1 33 3.433987 3.433987 527 +within 1 33 3.433987 3.433987 525 +go 1 33 3.433987 3.433987 529 +express 1 32 3.465736 3.465736 540 +ad 1 32 3.465736 3.465736 544 +given 1 32 3.465736 3.465736 538 +often 1 31 3.496508 3.496508 551 +posit 1 31 3.496508 3.496508 552 +computersci 1 30 3.555348 3.555348 562 +abl 1 30 3.555348 3.555348 566 +hard 1 30 3.555348 3.555348 563 +turn 1 29 3.583519 3.583519 586 +limit 1 29 3.583519 3.583519 585 +actual 1 28 3.610918 3.610918 604 +load 1 28 3.610918 3.610918 601 +except 1 28 3.610918 3.610918 607 +intend 1 28 3.610918 3.610918 599 +full 1 28 3.610918 3.610918 615 +becom 1 28 3.610918 3.610918 603 +held 1 28 3.610918 3.610918 600 +manipul 1 27 3.637586 3.637586 624 +determin 1 27 3.637586 3.637586 630 +administr 1 27 3.637586 3.637586 628 +quit 1 27 3.637586 3.637586 633 +subject 1 26 3.688879 3.688879 647 +bound 1 26 3.688879 3.688879 659 +consist 1 26 3.688879 3.688879 651 +comp 1 26 3.688879 3.688879 650 +session 1 26 3.688879 3.688879 643 +reliabl 1 25 3.737670 3.737670 674 +never 1 25 3.737670 3.737670 671 +valu 1 25 3.737670 3.737670 665 +aspect 1 25 3.737670 3.737670 663 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +handl 1 24 3.761200 3.761200 685 +sometim 1 24 3.761200 3.761200 696 +wish 1 24 3.761200 3.761200 692 +size 1 23 3.806662 3.806662 713 +input 1 23 3.806662 3.806662 727 +begin 1 23 3.806662 3.806662 716 +variabl 1 23 3.806662 3.806662 715 +period 1 22 3.850148 3.850148 743 +recommend 1 22 3.850148 3.850148 737 +almost 1 22 3.850148 3.850148 742 +sort 1 22 3.850148 3.850148 738 +identifi 1 22 3.850148 3.850148 760 +inth 1 22 3.850148 3.850148 741 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +tell 1 21 3.912023 3.912023 777 +output 1 21 3.912023 3.912023 788 +prepar 1 20 3.951244 3.951244 824 +binari 1 20 3.951244 3.951244 823 +grad 1 20 3.951244 3.951244 837 +sure 1 20 3.951244 3.951244 813 +minut 1 20 3.951244 3.951244 810 +qualiti 1 20 3.951244 3.951244 832 +scheme 1 20 3.951244 3.951244 818 +break 1 20 3.951244 3.951244 812 +item 1 19 4.007333 4.007333 856 +ever 1 19 4.007333 4.007333 872 +separ 1 19 4.007333 4.007333 844 +exercis 1 19 4.007333 4.007333 842 +log 1 19 4.007333 4.007333 857 +five 1 19 4.007333 4.007333 841 +assum 1 19 4.007333 4.007333 845 +particularli 1 19 4.007333 4.007333 867 +encourag 1 18 4.060443 4.060443 880 +account 1 18 4.060443 4.060443 882 +record 1 18 4.060443 4.060443 890 +accept 1 18 4.060443 4.060443 879 +along 1 18 4.060443 4.060443 878 +behavior 1 18 4.060443 4.060443 881 +four 1 18 4.060443 4.060443 905 +element 1 18 4.060443 4.060443 895 +attend 1 18 4.060443 4.060443 893 +debug 1 17 4.110874 4.110874 944 +outlin 1 17 4.110874 4.110874 914 +segment 1 17 4.110874 4.110874 931 +macintosh 1 17 4.110874 4.110874 920 +women 1 16 4.174387 4.174387 1004 +earli 1 16 4.174387 4.174387 968 +normal 1 16 4.174387 4.174387 995 +easi 1 16 4.174387 4.174387 969 +style 1 15 4.248495 4.248495 1036 +purchas 1 15 4.248495 4.248495 1030 +later 1 15 4.248495 4.248495 1043 +capabl 1 15 4.248495 4.248495 1016 +incomput 1 14 4.317488 4.317488 1096 +consider 1 14 4.317488 4.317488 1076 +doit 1 14 4.317488 4.317488 1111 +near 1 14 4.317488 4.317488 1091 +floor 1 14 4.317488 4.317488 1070 +manner 1 14 4.317488 4.317488 1074 +easili 1 14 4.317488 4.317488 1077 +convent 1 14 4.317488 4.317488 1072 +menu 1 13 4.382027 4.382027 1156 +someon 1 13 4.382027 4.382027 1128 +wait 1 13 4.382027 4.382027 1168 +incorpor 1 13 4.382027 4.382027 1163 +necessari 1 13 4.382027 4.382027 1147 +readi 1 12 4.465908 4.465908 1242 +grow 1 12 4.465908 4.465908 1209 +pascal 1 12 4.465908 4.465908 1213 +dewitt 1 12 4.465908 4.465908 1270 +onth 1 12 4.465908 4.465908 1218 +weight 1 12 4.465908 4.465908 1204 +count 1 12 4.465908 4.465908 1239 +robust 1 12 4.465908 4.465908 1271 +reader 1 12 4.465908 4.465908 1246 +cycl 1 11 4.553877 4.553877 1335 +extra 1 11 4.553877 4.553877 1312 +extrem 1 11 4.553877 4.553877 1330 +arbitrari 1 11 4.553877 4.553877 1359 +statement 1 11 4.553877 4.553877 1313 +loop 1 11 4.553877 4.553877 1310 +typic 1 11 4.553877 4.553877 1360 +summar 1 11 4.553877 4.553877 1295 +submiss 1 11 4.553877 4.553877 1298 +cheat 1 10 4.653960 4.653960 1395 +true 1 10 4.653960 4.653960 1422 +equal 1 10 4.653960 4.653960 1424 +hint 1 10 4.653960 4.653960 1419 +awai 1 10 4.653960 4.653960 1447 +certain 1 10 4.653960 4.653960 1393 +thecomput 1 10 4.653960 4.653960 1408 +modular 1 10 4.653960 4.653960 1392 +label 1 10 4.653960 4.653960 1423 +strongli 1 10 4.653960 4.653960 1406 +notat 1 9 4.753590 4.753590 1489 +end 1 9 4.753590 4.753590 1567 +wall 1 9 4.753590 4.753590 1553 +frank 1 9 4.753590 4.753590 1568 +mention 1 9 4.753590 4.753590 1569 +andth 1 9 4.753590 4.753590 1481 +familiar 1 9 4.753590 4.753590 1485 +equival 1 9 4.753590 4.753590 1496 +prefer 1 9 4.753590 4.753590 1491 +criteria 1 9 4.753590 4.753590 1477 +correctli 1 9 4.753590 4.753590 1478 +informationabout 1 9 4.753590 4.753590 1515 +unusu 1 9 4.753590 4.753590 1566 +clear 1 9 4.753590 4.753590 1488 +assumpt 1 9 4.753590 4.753590 1514 +declar 1 9 4.753590 4.753590 1526 +pick 1 9 4.753590 4.753590 1498 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +simpli 1 8 4.875197 4.875197 1626 +isbn 1 7 5.010635 5.010635 1901 +exactli 1 7 5.010635 5.010635 1817 +explain 1 7 5.010635 5.010635 1816 +pursu 1 7 5.010635 5.010635 1902 +paramet 1 7 5.010635 5.010635 1796 +whenev 1 7 5.010635 5.010635 1883 +bug 1 7 5.010635 5.010635 1801 +header 1 7 5.010635 5.010635 1787 +sciencesoffic 1 6 5.164786 5.164786 2101 +extern 1 6 5.164786 5.164786 2105 +mirror 1 6 5.164786 5.164786 2028 +notifi 1 6 5.164786 5.164786 2106 +wrong 1 6 5.164786 5.164786 2025 +approv 1 6 5.164786 5.164786 2078 +troubl 1 6 5.164786 5.164786 2002 +desk 1 5 5.347108 5.347108 2297 +supplement 1 5 5.347108 5.347108 2355 +chemistri 1 5 5.347108 5.347108 2405 +sparcstat 1 5 5.347108 5.347108 2406 +caus 1 5 5.347108 5.347108 2298 +respond 1 5 5.347108 5.347108 2354 +situat 1 5 5.347108 5.347108 2365 +blow 1 5 5.347108 5.347108 2407 +skip 1 5 5.347108 5.347108 2402 +thiscours 1 4 5.568345 5.568345 2601 +expens 1 4 5.568345 5.568345 2678 +repeat 1 4 5.568345 5.568345 2798 +suppli 1 4 5.568345 5.568345 2611 +tire 1 4 5.568345 5.568345 2799 +ghostview 1 3 5.857933 5.857933 3163 +eduand 1 3 5.857933 5.857933 3452 +tremend 1 3 5.857933 5.857933 3453 +narr 1 3 5.857933 5.857933 3454 +gradingther 1 3 5.857933 5.857933 3455 +programmingassign 1 3 5.857933 5.857933 3398 +thesear 1 3 5.857933 5.857933 3456 +thec 1 3 5.857933 5.857933 3132 +duedat 1 3 5.857933 5.857933 3105 +helpif 1 3 5.857933 5.857933 3126 +confus 1 3 5.857933 5.857933 3144 +ineffici 1 3 5.857933 5.857933 3457 +useof 1 3 5.857933 5.857933 3368 +meaning 1 3 5.857933 5.857933 3458 +argument 1 3 5.857933 5.857933 3120 +neg 1 3 5.857933 5.857933 3451 +briefli 1 3 5.857933 5.857933 3459 +urg 1 3 5.857933 5.857933 3212 +comfort 1 3 5.857933 5.857933 3136 +pain 1 3 5.857933 5.857933 3460 +amoffic 1 2 6.263398 6.263398 4671 +femal 1 2 6.263398 6.263398 4672 +wic 1 2 6.263398 6.263398 4673 +oneof 1 2 6.263398 6.263398 4674 +tomak 1 2 6.263398 6.263398 4675 +startup 1 2 6.263398 6.263398 4676 +textth 1 2 6.263398 6.263398 4677 +carrano 1 2 6.263398 6.263398 4678 +lecturenot 1 2 6.263398 6.263398 4679 +notesar 1 2 6.263398 6.263398 4559 +invalu 1 2 6.263398 6.263398 4680 +nonetheless 1 2 6.263398 6.263398 4681 +thatyou 1 2 6.263398 6.263398 4682 +youwork 1 2 6.263398 6.263398 4083 +provis 1 2 6.263398 6.263398 4683 +excus 1 2 6.263398 6.263398 4684 +datastructur 1 2 6.263398 6.263398 4685 +tovisit 1 2 6.263398 6.263398 4686 +behav 1 2 6.263398 6.263398 4670 +clariti 1 2 6.263398 6.263398 4413 +andlog 1 2 6.263398 6.263398 4104 +facet 1 2 6.263398 6.263398 4687 +unnecessarili 1 2 6.263398 6.263398 4688 +liter 1 2 6.263398 6.263398 4689 +indent 1 2 6.263398 6.263398 4374 +convei 1 2 6.263398 6.263398 4690 +beavoid 1 2 6.263398 6.263398 4411 +thefirst 1 2 6.263398 6.263398 4092 +structuresfal 1 1 6.957497 6.957497 8616 +htmlinstructor 1 1 6.957497 6.957497 8617 +newsassign 1 1 6.957497 6.957497 8618 +statisticssom 1 1 6.957497 6.957497 8619 +median 1 1 6.957497 6.957497 8620 +midterma 1 1 6.957497 6.957497 8621 +oldmidterm 1 1 6.957497 6.957497 8622 +ownmidterm 1 1 6.957497 6.957497 8623 +searchth 1 1 6.957497 6.957497 8624 +filemenu 1 1 6.957497 6.957497 8625 +andchoos 1 1 6.957497 6.957497 8626 +sciencesom 1 1 6.957497 6.957497 8627 +haveform 1 1 6.957497 6.957497 8628 +becomecomput 1 1 6.957497 6.957497 8629 +thisclass 1 1 6.957497 6.957497 8630 +withtheir 1 1 6.957497 6.957497 8631 +classwork 1 1 6.957497 6.957497 8632 +suzan 1 1 6.957497 6.957497 8614 +stodder 1 1 6.957497 6.957497 8633 +theodd 1 1 6.957497 6.957497 8634 +statementi 1 1 6.957497 6.957497 8635 +aniniti 1 1 6.957497 6.957497 8636 +exceptionsy 1 1 6.957497 6.957497 8637 +isdata 1 1 6.957497 6.957497 8638 +notnecessari 1 1 6.957497 6.957497 8639 +isveri 1 1 6.957497 6.957497 8640 +whichar 1 1 6.957497 6.957497 8641 +entranceof 1 1 6.957497 6.957497 8642 +needsom 1 1 6.957497 6.957497 8643 +handoutc 1 1 6.957497 6.957497 8644 +althoughi 1 1 6.957497 6.957497 8645 +courseof 1 1 6.957497 6.957497 8646 +apoint 1 1 6.957497 6.957497 8647 +prerequisitecours 1 1 6.957497 6.957497 8648 +certainrestrict 1 1 6.957497 6.957497 8649 +emailand 1 1 6.957497 6.957497 8650 +toyour 1 1 6.957497 6.957497 8651 +runwith 1 1 6.957497 6.957497 8652 +inassign 1 1 6.957497 6.957497 8653 +allelectron 1 1 6.957497 6.957497 8654 +policyno 1 1 6.957497 6.957497 8655 +coincid 1 1 6.957497 6.957497 8656 +oneach 1 1 6.957497 6.957497 8657 +thelast 1 1 6.957497 6.957497 8658 +cheatingth 1 1 6.957497 6.957497 8659 +linest 1 1 6.957497 6.957497 8660 +tocommun 1 1 6.957497 6.957497 8661 +butther 1 1 6.957497 6.957497 8662 +obei 1 1 6.957497 6.957497 8663 +policiesgovern 1 1 6.957497 6.957497 8664 +policiesif 1 1 6.957497 6.957497 8665 +currenthard 1 1 6.957497 6.957497 8666 +conceptsthat 1 1 6.957497 6.957497 8667 +emailsever 1 1 6.957497 6.957497 8668 +gradingprogram 1 1 6.957497 6.957497 8669 +typicalinput 1 1 6.957497 6.957497 8670 +projectspecif 1 1 6.957497 6.957497 8671 +shoulddemonstr 1 1 6.957497 6.957497 8672 +includingunusu 1 1 6.957497 6.957497 8673 +considerationof 1 1 6.957497 6.957497 8674 +orcomplex 1 1 6.957497 6.957497 8675 +definedconst 1 1 6.957497 6.957497 8676 +thosevalu 1 1 6.957497 6.957497 8677 +styleus 1 1 6.957497 6.957497 8678 +variable_nam 1 1 6.957497 6.957497 8679 +function_nam 1 1 6.957497 6.957497 8680 +const 1 1 6.957497 6.957497 8681 +defined_const 1 1 6.957497 6.957497 8682 +enum 1 1 6.957497 6.957497 8683 +enumtyp 1 1 6.957497 6.957497 8684 +classnam 1 1 6.957497 6.957497 8685 +notesfor 1 1 6.957497 6.957497 8686 +meaningfulli 1 1 6.957497 6.957497 8687 +documentationthi 1 1 6.957497 6.957497 8688 +yourprogram 1 1 6.957497 6.957497 8689 +someonewho 1 1 6.957497 6.957497 8690 +superfici 1 1 6.957497 6.957497 8691 +unawar 1 1 6.957497 6.957497 8692 +descriptionne 1 1 6.957497 6.957497 8693 +thensuffici 1 1 6.957497 6.957497 8694 +documentationther 1 1 6.957497 6.957497 8695 +structuresshould 1 1 6.957497 6.957497 8696 +membershould 1 1 6.957497 6.957497 8697 +sname 1 1 6.957497 6.957497 8698 +withoutmak 1 1 6.957497 6.957497 8699 +stackyou 1 1 6.957497 6.957497 8700 +tricki 1 1 6.957497 6.957497 8701 +opaqu 1 1 6.957497 6.957497 8702 +commentcan 1 1 6.957497 6.957497 8703 +clarifi 1 1 6.957497 6.957497 8704 +outlineof 1 1 6.957497 6.957497 8705 +vimani 1 1 6.957497 6.957497 8706 +becomecomfort 1 1 6.957497 6.957497 8707 +youronli 1 1 6.957497 6.957497 8708 +macpasc 1 1 6.957497 6.957497 8709 +withunix 1 1 6.957497 6.957497 8710 +wellspent 1 1 6.957497 6.957497 8711 +thefollow 1 1 6.957497 6.957497 8712 +tbayou 1 1 6.957497 6.957497 8713 +inputfil 1 1 6.957497 6.957497 8615 +outputfil 1 1 6.957497 6.957497 8613 +goto 1 1 6.957497 6.957497 8714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..ffcfbc64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +read 1 154 1.791759 1.791759 47 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +wide 1 84 2.484907 2.484907 185 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +activ 1 84 2.484907 2.484907 182 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +chang 1 82 2.484907 2.484907 163 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +stuff 1 87 2.484907 2.484907 171 +school 1 84 2.484907 2.484907 188 +second 1 81 2.484907 2.484907 166 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +complet 1 77 2.564949 2.564949 208 +want 1 79 2.564949 2.564949 199 +appear 1 78 2.564949 2.564949 210 +know 1 80 2.564949 2.564949 198 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +html 1 75 2.639057 2.639057 235 +tuesdai 1 73 2.639057 2.639057 219 +write 1 72 2.639057 2.639057 222 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +david 1 71 2.639057 2.639057 232 +free 1 73 2.639057 2.639057 224 +addit 1 74 2.639057 2.639057 228 +name 1 72 2.639057 2.639057 220 +line 1 75 2.639057 2.639057 231 +thursdai 1 70 2.708050 2.708050 241 +abstract 1 62 2.772589 2.772589 276 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +street 1 63 2.772589 2.772589 293 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +function 1 62 2.772589 2.772589 275 +previou 1 62 2.772589 2.772589 290 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +detail 1 57 2.890372 2.890372 321 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +index 1 56 2.890372 2.890372 309 +summer 1 56 2.890372 2.890372 311 +cover 1 55 2.944439 2.944439 329 +maintain 1 51 2.995732 2.995732 342 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +principl 1 48 3.044522 3.044522 357 +give 1 50 3.044522 3.044522 359 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +electron 1 47 3.091042 3.091042 379 +fridai 1 44 3.135494 3.135494 390 +answer 1 45 3.135494 3.135494 391 +textbook 1 44 3.135494 3.135494 397 +even 1 45 3.135494 3.135494 393 +anoth 1 45 3.135494 3.135494 408 +describ 1 45 3.135494 3.135494 400 +long 1 43 3.178054 3.178054 413 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +littl 1 39 3.258097 3.258097 454 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +close 1 38 3.295837 3.295837 465 +connect 1 37 3.332205 3.332205 485 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +tree 1 36 3.367296 3.367296 492 +jame 1 35 3.401197 3.401197 507 +approxim 1 35 3.401197 3.401197 509 +taught 1 33 3.433987 3.433987 526 +concept 1 32 3.465736 3.465736 537 +given 1 32 3.465736 3.465736 538 +often 1 31 3.496508 3.496508 551 +photo 1 31 3.496508 3.496508 561 +abl 1 30 3.555348 3.555348 566 +produc 1 30 3.555348 3.555348 572 +turn 1 29 3.583519 3.583519 586 +determin 1 27 3.637586 3.637586 630 +manipul 1 27 3.637586 3.637586 624 +revis 1 26 3.688879 3.688879 640 +bound 1 26 3.688879 3.688879 659 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +background 1 25 3.737670 3.737670 664 +alwai 1 24 3.761200 3.761200 691 +size 1 23 3.806662 3.806662 713 +sequenc 1 23 3.806662 3.806662 734 +identifi 1 22 3.850148 3.850148 760 +emphasi 1 22 3.850148 3.850148 755 +recommend 1 22 3.850148 3.850148 737 +sort 1 22 3.850148 3.850148 738 +color 1 22 3.850148 3.850148 762 +basi 1 20 3.951244 3.951244 828 +binari 1 20 3.951244 3.951244 823 +tenni 1 20 3.951244 3.951244 838 +exercis 1 19 4.007333 4.007333 842 +separ 1 19 4.007333 4.007333 844 +log 1 19 4.007333 4.007333 857 +assum 1 19 4.007333 4.007333 845 +five 1 19 4.007333 4.007333 841 +appropri 1 18 4.060443 4.060443 883 +wind 1 18 4.060443 4.060443 908 +account 1 18 4.060443 4.060443 882 +attend 1 18 4.060443 4.060443 893 +record 1 18 4.060443 4.060443 890 +debug 1 17 4.110874 4.110874 944 +outlin 1 17 4.110874 4.110874 914 +white 1 17 4.110874 4.110874 951 +zhang 1 16 4.174387 4.174387 980 +portion 1 16 4.174387 4.174387 971 +purchas 1 15 4.248495 4.248495 1030 +psycholog 1 15 4.248495 4.248495 1054 +later 1 15 4.248495 4.248495 1043 +photograph 1 15 4.248495 4.248495 1056 +score 1 15 4.248495 4.248495 1017 +happi 1 14 4.317488 4.317488 1079 +doit 1 14 4.317488 4.317488 1111 +trip 1 14 4.317488 4.317488 1113 +block 1 13 4.382027 4.382027 1183 +necessari 1 13 4.382027 4.382027 1147 +introduc 1 13 4.382027 4.382027 1139 +essenti 1 13 4.382027 4.382027 1137 +dewitt 1 12 4.465908 4.465908 1270 +weight 1 12 4.465908 4.465908 1204 +count 1 12 4.465908 4.465908 1239 +regularli 1 11 4.553877 4.553877 1338 +modular 1 10 4.653960 4.653960 1392 +sundai 1 10 4.653960 4.653960 1387 +true 1 10 4.653960 4.653960 1422 +strongli 1 10 4.653960 4.653960 1406 +hint 1 10 4.653960 4.653960 1419 +equal 1 10 4.653960 4.653960 1424 +card 1 10 4.653960 4.653960 1435 +black 1 10 4.653960 4.653960 1418 +laru 1 9 4.753590 4.753590 1560 +clear 1 9 4.753590 4.753590 1488 +wall 1 9 4.753590 4.753590 1553 +frank 1 9 4.753590 4.753590 1568 +mention 1 9 4.753590 4.753590 1569 +debugg 1 9 4.753590 4.753590 1493 +notat 1 9 4.753590 4.753590 1489 +login 1 9 4.753590 4.753590 1550 +absolut 1 8 4.875197 4.875197 1646 +integ 1 8 4.875197 4.875197 1688 +hash 1 8 4.875197 4.875197 1618 +isbn 1 7 5.010635 5.010635 1901 +scout 1 7 5.010635 5.010635 1903 +strong 1 6 5.164786 5.164786 2029 +troubl 1 6 5.164786 5.164786 2002 +sciencesoffic 1 6 5.164786 5.164786 2101 +mirror 1 6 5.164786 5.164786 2028 +notifi 1 6 5.164786 5.164786 2106 +skrentni 1 6 5.164786 5.164786 2104 +difficult 1 6 5.164786 5.164786 2035 +rough 1 6 5.164786 5.164786 2107 +byte 1 6 5.164786 5.164786 2108 +situat 1 5 5.347108 5.347108 2365 +chin 1 5 5.347108 5.347108 2408 +tang 1 5 5.347108 5.347108 2409 +skip 1 5 5.347108 5.347108 2402 +desk 1 5 5.347108 5.347108 2297 +crucial 1 5 5.347108 5.347108 2384 +chemistri 1 5 5.347108 5.347108 2405 +girl 1 5 5.347108 5.347108 2410 +assignmentsand 1 4 5.568345 5.568345 2760 +unless 1 4 5.568345 5.568345 2607 +birthdai 1 4 5.568345 5.568345 2800 +fora 1 4 5.568345 5.568345 2697 +reiter 1 3 5.857933 5.857933 3461 +narr 1 3 5.857933 5.857933 3454 +gradingther 1 3 5.857933 5.857933 3455 +freshman 1 3 5.857933 5.857933 3462 +compuer 1 2 6.263398 6.263398 4692 +weiz 1 2 6.263398 6.263398 4693 +amoffic 1 2 6.263398 6.263398 4671 +cchin 1 2 6.263398 6.263398 4691 +textth 1 2 6.263398 6.263398 4677 +carrano 1 2 6.263398 6.263398 4678 +needless 1 2 6.263398 6.263398 4694 +sophomor 1 2 6.263398 6.263398 4695 +databaseof 1 2 6.263398 6.263398 4696 +larusinstructor 1 1 6.957497 6.957497 8715 +laruslaru 1 1 6.957497 6.957497 8716 +amcontentsteach 1 1 6.957497 6.957497 8717 +assistantstextlectur 1 1 6.957497 6.957497 8718 +informationelectron 1 1 6.957497 6.957497 8719 +mailth 1 1 6.957497 6.957497 8720 +languagegradingexamscours 1 1 6.957497 6.957497 8721 +scheduleassign 1 1 6.957497 6.957497 8722 +assignmentscours 1 1 6.957497 6.957497 8723 +objectivesc 1 1 6.957497 6.957497 8724 +assistantswei 1 1 6.957497 6.957497 8725 +forthi 1 1 6.957497 6.957497 8726 +theassign 1 1 6.957497 6.957497 8727 +zhangoffic 1 1 6.957497 6.957497 8728 +entranc 1 1 6.957497 6.957497 8729 +maili 1 1 6.957497 6.957497 8730 +gdbthere 1 1 6.957497 6.957497 8731 +administrationbas 1 1 6.957497 6.957497 8732 +storagelectur 1 1 6.957497 6.957497 8733 +listslectur 1 1 6.957497 6.957497 8734 +stackslectur 1 1 6.957497 6.957497 8735 +queueslectur 1 1 6.957497 6.957497 8736 +hashinglectur 1 1 6.957497 6.957497 8737 +recursionlectur 1 1 6.957497 6.957497 8738 +treesbinari 1 1 6.957497 6.957497 8739 +searchlectur 1 1 6.957497 6.957497 8740 +treesgraphslectur 1 1 6.957497 6.957497 8741 +sortinglectur 1 1 6.957497 6.957497 8742 +tbaassign 1 1 6.957497 6.957497 8743 +nameyear 1 1 6.957497 6.957497 8744 +coursesprevi 1 1 6.957497 6.957497 8745 +experiencerec 1 1 6.957497 6.957497 8746 +tournament 1 1 6.957497 6.957497 8747 +aconcord 1 1 6.957497 6.957497 8748 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..3c188c4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +oper 1 180 1.609438 1.609438 34 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +need 1 98 2.302585 2.302585 135 +memori 1 101 2.302585 2.302585 139 +search 1 95 2.397895 2.397895 155 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +dynam 1 76 2.564949 2.564949 194 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +simul 1 66 2.708050 2.708050 255 +copi 1 63 2.772589 2.772589 284 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +sampl 1 53 2.944439 2.944439 339 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +pointer 1 48 3.044522 3.044522 368 +basic 1 50 3.044522 3.044522 360 +get 1 46 3.091042 3.091042 380 +done 1 47 3.091042 3.091042 381 +directori 1 45 3.135494 3.135494 396 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +tree 1 36 3.367296 3.367296 492 +either 1 35 3.401197 3.401197 506 +jame 1 35 3.401197 3.401197 507 +chapter 1 32 3.465736 3.465736 536 +common 1 30 3.555348 3.555348 574 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +becom 1 28 3.610918 3.610918 603 +lab 1 24 3.761200 3.761200 698 +sort 1 22 3.850148 3.850148 738 +reserv 1 20 3.951244 3.951244 808 +alloc 1 20 3.951244 3.951244 821 +thur 1 19 4.007333 4.007333 847 +attend 1 18 4.060443 4.060443 893 +otherwis 1 17 4.110874 4.110874 922 +recurs 1 13 4.382027 4.382027 1127 +magic 1 11 4.553877 4.553877 1358 +wendt 1 10 4.653960 4.653960 1446 +queue 1 10 4.653960 4.653960 1386 +stack 1 10 4.653960 4.653960 1389 +cheng 1 10 4.653960 4.653960 1381 +kurt 1 9 4.753590 4.753590 1548 +unusu 1 9 4.753590 4.753590 1566 +forget 1 8 4.875197 4.875197 1712 +reload 1 8 4.875197 4.875197 1682 +hash 1 8 4.875197 4.875197 1618 +skrentni 1 6 5.164786 5.164786 2104 +handin 1 5 5.347108 5.347108 2393 +skip 1 5 5.347108 5.347108 2402 +overload 1 5 5.347108 5.347108 2403 +billi 1 5 5.347108 5.347108 2404 +outdat 1 4 5.568345 5.568345 2797 +appendix 1 4 5.568345 5.568345 2739 +makeup 1 3 5.857933 5.857933 3449 +vega 1 3 5.857933 5.857933 3450 +lec 1 2 6.263398 6.263398 4661 +structureslectur 1 2 6.263398 6.263398 4662 +psychologylectur 1 2 6.263398 6.263398 4663 +psychologycours 1 2 6.263398 6.263398 4664 +stale 1 2 6.263398 6.263398 4660 +baicheng 1 2 6.263398 6.263398 4665 +liao 1 2 6.263398 6.263398 4666 +bail 1 2 6.263398 6.263398 4667 +jiacheng 1 2 6.263398 6.263398 4668 +pmcopyright 1 2 6.263398 6.263398 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..ab7e6a0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +mondai 1 77 2.564949 2.564949 206 +state 1 76 2.564949 2.564949 207 +david 1 71 2.639057 2.639057 232 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +written 1 63 2.772589 2.772589 278 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +archiv 1 49 3.044522 3.044522 364 +fridai 1 44 3.135494 3.135494 390 +term 1 43 3.178054 3.178054 411 +examin 1 42 3.218876 3.218876 424 +theoret 1 39 3.258097 3.258097 446 +brian 1 38 3.295837 3.295837 466 +john 1 33 3.433987 3.433987 532 +hill 1 25 3.737670 3.737670 670 +tent 1 22 3.850148 3.850148 739 +martin 1 21 3.912023 3.912023 794 +north 1 19 4.007333 4.007333 873 +isbn 1 7 5.010635 5.010635 1901 +mcgraw 1 5 5.347108 5.347108 2262 +clarif 1 5 5.347108 5.347108 2253 +sundaram 1 3 5.857933 5.857933 3463 +cole 1 2 6.263398 6.263398 4697 +stukel 1 2 6.263398 6.263398 4698 +dakota 1 1 6.957497 6.957497 8749 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..9487bc27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +schedul 1 119 2.079442 2.079442 85 +databas 1 122 2.079442 2.079442 86 +final 1 116 2.197225 2.197225 108 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +book 1 99 2.302585 2.302585 131 +octob 1 89 2.397895 2.397895 156 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +solut 1 82 2.484907 2.484907 162 +decemb 1 80 2.564949 2.564949 215 +homework 1 79 2.564949 2.564949 193 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +septemb 1 65 2.772589 2.772589 274 +locat 1 59 2.833213 2.833213 303 +march 1 61 2.833213 2.833213 295 +semest 1 58 2.890372 2.890372 312 +overview 1 56 2.890372 2.890372 323 +variou 1 56 2.890372 2.890372 317 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +telephon 1 50 3.044522 3.044522 373 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +textbook 1 44 3.135494 3.135494 397 +mechan 1 43 3.178054 3.178054 416 +linear 1 41 3.218876 3.218876 431 +open 1 38 3.295837 3.295837 469 +comp 1 26 3.688879 3.688879 650 +relev 1 26 3.688879 3.688879 637 +period 1 22 3.850148 3.850148 743 +reserv 1 20 3.951244 3.951244 808 +item 1 19 4.007333 4.007333 856 +stat 1 17 4.110874 4.110874 924 +matlab 1 14 4.317488 4.317488 1081 +doit 1 14 4.317488 4.317488 1111 +wendt 1 10 4.653960 4.653960 1446 +mangasarian 1 9 4.753590 4.753590 1570 +preliminari 1 9 4.753590 4.753590 1480 +kurt 1 9 4.753590 4.753590 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 1 6 5.164786 5.164786 2109 +setup 1 2 6.263398 6.263398 4211 +bibliograph 1 2 6.263398 6.263398 4699 +programmingfal 1 1 6.957497 6.957497 8750 +pphone 1 1 6.957497 6.957497 8751 +searchabl 1 1 6.957497 6.957497 8752 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..68417e24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +techniqu 1 99 2.302585 2.302585 138 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +librari 1 87 2.484907 2.484907 181 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +fridai 1 44 3.135494 3.135494 390 +examin 1 42 3.218876 3.218876 424 +late 1 40 3.258097 3.258097 439 +comp 1 26 3.688879 3.688879 650 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +month 1 15 4.248495 4.248495 1025 +psycholog 1 15 4.248495 4.248495 1054 +susan 1 15 4.248495 4.248495 1050 +stori 1 14 4.317488 4.317488 1087 +regularli 1 11 4.553877 4.553877 1338 +wendt 1 10 4.653960 4.653960 1446 +tuth 1 9 4.753590 4.753590 1519 +recit 1 9 4.753590 4.753590 1475 +fischer 1 7 5.010635 5.010635 1893 +horwitz 1 5 5.347108 5.347108 2411 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +ullman 1 4 5.568345 5.568345 2749 +rahul 1 3 5.857933 5.857933 3464 +compilersspr 1 2 6.263398 6.263398 4700 +kapoor 1 2 6.263398 6.263398 4701 +sethi 1 2 6.263398 6.263398 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..8fc3a97c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,527 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +relat 1 139 1.945910 1.945910 68 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +introduct 1 126 2.079442 2.079442 87 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +follow 1 92 2.397895 2.397895 143 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +exam 1 86 2.484907 2.484907 169 +larg 1 82 2.484907 2.484907 168 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +librari 1 87 2.484907 2.484907 181 +learn 1 86 2.484907 2.484907 170 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +sourc 1 77 2.564949 2.564949 201 +mondai 1 77 2.564949 2.564949 206 +orient 1 80 2.564949 2.564949 205 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +decemb 1 80 2.564949 2.564949 215 +state 1 76 2.564949 2.564949 207 +come 1 78 2.564949 2.564949 202 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +name 1 72 2.639057 2.639057 220 +tuesdai 1 73 2.639057 2.639057 219 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +thursdai 1 70 2.708050 2.708050 241 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +java 1 70 2.708050 2.708050 248 +receiv 1 66 2.708050 2.708050 244 +simul 1 66 2.708050 2.708050 255 +main 1 67 2.708050 2.708050 256 +knowledg 1 67 2.708050 2.708050 243 +view 1 70 2.708050 2.708050 254 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +creat 1 63 2.772589 2.772589 277 +copi 1 63 2.772589 2.772589 284 +virtual 1 62 2.772589 2.772589 285 +wednesdai 1 64 2.772589 2.772589 261 +collect 1 65 2.772589 2.772589 268 +room 1 59 2.833213 2.833213 301 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +space 1 57 2.890372 2.890372 310 +detail 1 57 2.890372 2.890372 321 +semest 1 58 2.890372 2.890372 312 +direct 1 57 2.890372 2.890372 316 +point 1 58 2.890372 2.890372 319 +think 1 57 2.890372 2.890372 314 +unix 1 58 2.890372 2.890372 308 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +three 1 54 2.944439 2.944439 330 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +date 1 51 2.995732 2.995732 344 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +archiv 1 49 3.044522 3.044522 364 +set 1 50 3.044522 3.044522 361 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +frequent 1 49 3.044522 3.044522 367 +right 1 48 3.044522 3.044522 363 +move 1 47 3.091042 3.091042 382 +electron 1 47 3.091042 3.091042 379 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +get 1 46 3.091042 3.091042 380 +answer 1 45 3.135494 3.135494 391 +midterm 1 45 3.135494 3.135494 392 +discuss 1 45 3.135494 3.135494 399 +directori 1 45 3.135494 3.135494 396 +describ 1 45 3.135494 3.135494 400 +even 1 45 3.135494 3.135494 393 +long 1 43 3.178054 3.178054 413 +show 1 43 3.178054 3.178054 417 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +fast 1 42 3.218876 3.218876 429 +howev 1 41 3.218876 3.218876 422 +error 1 40 3.258097 3.258097 449 +tutori 1 39 3.258097 3.258097 437 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +author 1 39 3.258097 3.258097 450 +programm 1 39 3.258097 3.258097 445 +correct 1 38 3.295837 3.295837 462 +seminar 1 38 3.295837 3.295837 470 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +workstat 1 37 3.332205 3.332205 479 +respons 1 37 3.332205 3.332205 476 +feel 1 37 3.332205 3.332205 483 +procedur 1 36 3.367296 3.367296 488 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +least 1 35 3.401197 3.401197 516 +jame 1 35 3.401197 3.401197 507 +random 1 34 3.401197 3.401197 511 +concurr 1 34 3.401197 3.401197 501 +manual 1 35 3.401197 3.401197 504 +statist 1 35 3.401197 3.401197 521 +go 1 33 3.433987 3.433987 529 +ad 1 32 3.465736 3.465736 544 +kind 1 32 3.465736 3.465736 541 +chapter 1 32 3.465736 3.465736 536 +storag 1 31 3.496508 3.496508 553 +specifi 1 30 3.555348 3.555348 568 +graph 1 30 3.555348 3.555348 576 +secur 1 30 3.555348 3.555348 577 +option 1 30 3.555348 3.555348 575 +focu 1 30 3.555348 3.555348 571 +synchron 1 29 3.583519 3.583519 588 +built 1 29 3.583519 3.583519 592 +intend 1 28 3.610918 3.610918 599 +packag 1 28 3.610918 3.610918 614 +becom 1 28 3.610918 3.610918 603 +except 1 28 3.610918 3.610918 607 +quit 1 27 3.637586 3.637586 633 +determin 1 27 3.637586 3.637586 630 +arrai 1 27 3.637586 3.637586 627 +comp 1 26 3.688879 3.688879 650 +request 1 26 3.688879 3.688879 635 +session 1 26 3.688879 3.688879 643 +bound 1 26 3.688879 3.688879 659 +detect 1 26 3.688879 3.688879 646 +primari 1 25 3.737670 3.737670 669 +although 1 25 3.737670 3.737670 667 +other 1 24 3.761200 3.761200 697 +thank 1 23 3.806662 3.806662 721 +initi 1 23 3.806662 3.806662 717 +begin 1 23 3.806662 3.806662 716 +input 1 23 3.806662 3.806662 727 +variabl 1 23 3.806662 3.806662 715 +thread 1 23 3.806662 3.806662 722 +togeth 1 23 3.806662 3.806662 714 +sent 1 22 3.850148 3.850148 763 +finish 1 22 3.850148 3.850148 748 +disk 1 22 3.850148 3.850148 747 +recommend 1 22 3.850148 3.850148 737 +dai 1 22 3.850148 3.850148 753 +varieti 1 22 3.850148 3.850148 740 +tent 1 22 3.850148 3.850148 739 +watch 1 21 3.912023 3.912023 789 +latest 1 21 3.912023 3.912023 785 +avoid 1 21 3.912023 3.912023 799 +theunivers 1 21 3.912023 3.912023 797 +path 1 21 3.912023 3.912023 778 +among 1 21 3.912023 3.912023 781 +output 1 21 3.912023 3.912023 788 +programminglanguag 1 21 3.912023 3.912023 782 +entir 1 20 3.951244 3.951244 811 +alloc 1 20 3.951244 3.951244 821 +reserv 1 20 3.951244 3.951244 808 +definit 1 19 4.007333 4.007333 864 +separ 1 19 4.007333 4.007333 844 +thur 1 19 4.007333 4.007333 847 +andrew 1 19 4.007333 4.007333 849 +five 1 19 4.007333 4.007333 841 +exercis 1 19 4.007333 4.007333 842 +runtim 1 19 4.007333 4.007333 858 +histori 1 19 4.007333 4.007333 853 +excel 1 19 4.007333 4.007333 868 +assum 1 19 4.007333 4.007333 845 +lot 1 18 4.060443 4.060443 889 +behavior 1 18 4.060443 4.060443 881 +encourag 1 18 4.060443 4.060443 880 +four 1 18 4.060443 4.060443 905 +sept 1 17 4.110874 4.110874 952 +regist 1 17 4.110874 4.110874 938 +weekli 1 17 4.110874 4.110874 919 +segment 1 17 4.110874 4.110874 931 +protect 1 17 4.110874 4.110874 935 +anyon 1 17 4.110874 4.110874 916 +monitor 1 17 4.110874 4.110874 941 +outlin 1 17 4.110874 4.110874 914 +zhang 1 16 4.174387 4.174387 980 +devic 1 16 4.174387 4.174387 1002 +modern 1 16 4.174387 4.174387 966 +weslei 1 16 4.174387 4.174387 983 +transfer 1 16 4.174387 4.174387 967 +easi 1 16 4.174387 4.174387 969 +choos 1 16 4.174387 4.174387 964 +condit 1 16 4.174387 4.174387 975 +critic 1 16 4.174387 4.174387 982 +score 1 15 4.248495 4.248495 1017 +later 1 15 4.248495 4.248495 1043 +todd 1 15 4.248495 4.248495 1051 +enough 1 15 4.248495 4.248495 1040 +demand 1 14 4.317488 4.317488 1073 +warn 1 14 4.317488 4.317488 1068 +shown 1 14 4.317488 4.317488 1080 +topolog 1 14 4.317488 4.317488 1089 +central 1 13 4.382027 4.382027 1160 +directli 1 13 4.382027 4.382027 1141 +forth 1 13 4.382027 4.382027 1186 +everyth 1 13 4.382027 4.382027 1169 +minor 1 12 4.465908 4.465908 1237 +rememb 1 12 4.465908 4.465908 1217 +solari 1 12 4.465908 4.465908 1238 +readi 1 12 4.465908 4.465908 1242 +addison 1 12 4.465908 4.465908 1230 +grow 1 12 4.465908 4.465908 1209 +readabl 1 12 4.465908 4.465908 1258 +count 1 12 4.465908 4.465908 1239 +buffer 1 12 4.465908 4.465908 1211 +fix 1 11 4.553877 4.553877 1327 +string 1 11 4.553877 4.553877 1340 +tue 1 11 4.553877 4.553877 1308 +regard 1 11 4.553877 4.553877 1309 +extrem 1 11 4.553877 4.553877 1330 +faster 1 11 4.553877 4.553877 1323 +market 1 11 4.553877 4.553877 1361 +grain 1 10 4.653960 4.653960 1448 +paragraph 1 10 4.653960 4.653960 1449 +placement 1 10 4.653960 4.653960 1420 +hint 1 10 4.653960 4.653960 1419 +strongli 1 10 4.653960 4.653960 1406 +cheat 1 10 4.653960 4.653960 1395 +correctli 1 9 4.753590 4.753590 1478 +mention 1 9 4.753590 4.753590 1569 +recoveri 1 9 4.753590 4.753590 1474 +familiar 1 9 4.753590 4.753590 1485 +pair 1 9 4.753590 4.753590 1503 +said 1 9 4.753590 4.753590 1571 +introductori 1 9 4.753590 4.753590 1479 +solomon 1 8 4.875197 4.875197 1716 +star 1 8 4.875197 4.875197 1717 +replac 1 8 4.875197 4.875197 1668 +simpli 1 8 4.875197 4.875197 1626 +rais 1 8 4.875197 4.875197 1711 +partner 1 8 4.875197 4.875197 1648 +crash 1 8 4.875197 4.875197 1616 +switch 1 8 4.875197 4.875197 1718 +gather 1 8 4.875197 4.875197 1719 +bug 1 7 5.010635 5.010635 1801 +slightli 1 7 5.010635 5.010635 1795 +peterson 1 7 5.010635 5.010635 1850 +philosoph 1 7 5.010635 5.010635 1904 +chan 1 7 5.010635 5.010635 1876 +occasion 1 7 5.010635 5.010635 1905 +awar 1 7 5.010635 5.010635 1800 +bookstor 1 7 5.010635 5.010635 1837 +prevent 1 7 5.010635 5.010635 1827 +prentic 1 7 5.010635 5.010635 1838 +spot 1 7 5.010635 5.010635 1894 +fortun 1 7 5.010635 5.010635 1872 +bottom 1 7 5.010635 5.010635 1906 +compact 1 7 5.010635 5.010635 1907 +sciencesoffic 1 6 5.164786 5.164786 2101 +theproject 1 6 5.164786 5.164786 1981 +notifi 1 6 5.164786 5.164786 2106 +nine 1 6 5.164786 5.164786 2047 +mistak 1 6 5.164786 5.164786 2110 +creation 1 6 5.164786 5.164786 2069 +garbag 1 6 5.164786 5.164786 1986 +handi 1 6 5.164786 5.164786 2111 +neither 1 6 5.164786 5.164786 1990 +salt 1 5 5.347108 5.347108 2413 +forprogram 1 5 5.347108 5.347108 2361 +caus 1 5 5.347108 5.347108 2298 +sparcstat 1 5 5.347108 5.347108 2406 +favor 1 5 5.347108 5.347108 2414 +commod 1 5 5.347108 5.347108 2415 +eas 1 5 5.347108 5.347108 2267 +anda 1 5 5.347108 5.347108 2416 +remain 1 5 5.347108 5.347108 2278 +race 1 5 5.347108 5.347108 2417 +deadlock 1 4 5.568345 5.568345 2641 +popular 1 4 5.568345 5.568345 2802 +theprogram 1 4 5.568345 5.568345 2686 +fork 1 4 5.568345 5.568345 2801 +makefil 1 4 5.568345 5.568345 2662 +multitask 1 4 5.568345 5.568345 2803 +systemsand 1 4 5.568345 5.568345 2804 +cshrc 1 4 5.568345 5.568345 2759 +usedto 1 4 5.568345 5.568345 2643 +subsequ 1 4 5.568345 5.568345 2665 +withth 1 4 5.568345 5.568345 2805 +marvin 1 4 5.568345 5.568345 2806 +omit 1 3 5.857933 5.857933 3466 +argument 1 3 5.857933 5.857933 3120 +offset 1 3 5.857933 5.857933 3467 +urgent 1 3 5.857933 5.857933 3316 +listof 1 3 5.857933 5.857933 3322 +sendmail 1 3 5.857933 5.857933 3099 +tanenbaum 1 3 5.857933 5.857933 3397 +dialect 1 3 5.857933 5.857933 3226 +acquaint 1 3 5.857933 5.857933 3468 +subscript 1 3 5.857933 5.857933 3469 +caught 1 3 5.857933 5.857933 3465 +easier 1 3 5.857933 5.857933 3470 +timet 1 3 5.857933 5.857933 3471 +dine 1 3 5.857933 5.857933 3472 +troffic 1 2 6.263398 6.263398 4706 +mellencamp 1 2 6.263398 6.263398 4707 +mellen 1 2 6.263398 6.263398 4708 +breakdown 1 2 6.263398 6.263398 4407 +avaiabl 1 2 6.263398 6.263398 4703 +typo 1 2 6.263398 6.263398 4180 +thejava 1 2 6.263398 6.263398 4704 +tung 1 2 6.263398 6.263398 4709 +preemptiv 1 2 6.263398 6.263398 4319 +colloquia 1 2 6.263398 6.263398 4710 +swap 1 2 6.263398 6.263398 4466 +sciencesand 1 2 6.263398 6.263398 4711 +arnold 1 2 6.263398 6.263398 4705 +tutorialth 1 2 6.263398 6.263398 4453 +designedto 1 2 6.263398 6.263398 4712 +havethre 1 2 6.263398 6.263398 4562 +daysof 1 2 6.263398 6.263398 4563 +eachof 1 2 6.263398 6.263398 4564 +congeni 1 2 6.263398 6.263398 4713 +null 1 2 6.263398 6.263398 4714 +mysteri 1 2 6.263398 6.263398 4715 +char 1 2 6.263398 6.263398 4716 +trendi 1 2 6.263398 6.263398 4717 +coursewil 1 2 6.263398 6.263398 4718 +primer 1 2 6.263398 6.263398 4719 +manualfor 1 2 6.263398 6.263398 4720 +yourgrad 1 2 6.263398 6.263398 4121 +semaphor 1 2 6.263398 6.263398 4555 +terminolog 1 2 6.263398 6.263398 4410 +eduthu 1 2 6.263398 6.263398 4721 +systemssect 1 1 6.957497 6.957497 8761 +instructormarvin 1 1 6.957497 6.957497 8762 +tarob 1 1 6.957497 6.957497 8763 +mwfoffic 1 1 6.957497 6.957497 8764 +distributioni 1 1 6.957497 6.957497 8765 +forproject 1 1 6.957497 6.957497 8754 +typograph 1 1 6.957497 6.957497 8766 +importantli 1 1 6.957497 6.957497 8767 +arraywa 1 1 6.957497 6.957497 8768 +isavail 1 1 6.957497 6.957497 8769 +courseus 1 1 6.957497 6.957497 8770 +likelyb 1 1 6.957497 6.957497 8771 +presentedin 1 1 6.957497 6.957497 8772 +givefork 1 1 6.957497 6.957497 8773 +specificationshould 1 1 6.957497 6.957497 8774 +garbl 1 1 6.957497 6.957497 8775 +jake 1 1 6.957497 6.957497 8776 +dawlei 1 1 6.957497 6.957497 8777 +carr 1 1 6.957497 6.957497 8778 +detailssect 1 1 6.957497 6.957497 8779 +threadschedul 1 1 6.957497 6.957497 8753 +sched 1 1 6.957497 6.957497 8756 +lipe 1 1 6.957497 6.957497 8780 +srccontain 1 1 6.957497 6.957497 8781 +javacontain 1 1 6.957497 6.957497 8782 +classgraphdescrib 1 1 6.957497 6.957497 8783 +petersoncycl 1 1 6.957497 6.957497 8784 +graphcontain 1 1 6.957497 6.957497 8755 +notacycl 1 1 6.957497 6.957497 8785 +petersonacycl 1 1 6.957497 6.957497 8786 +acycl 1 1 6.957497 6.957497 8787 +sharingfork 1 1 6.957497 6.957497 8788 +jenner 1 1 6.957497 6.957497 8789 +maxthink 1 1 6.957497 6.957497 8790 +maxeat 1 1 6.957497 6.957497 8791 +versionha 1 1 6.957497 6.957497 8792 +substr 1 1 6.957497 6.957497 8757 +argumenti 1 1 6.957497 6.957497 8793 +charactersin 1 1 6.957497 6.957497 8794 +franco 1 1 6.957497 6.957497 8795 +maketo 1 1 6.957497 6.957497 8796 +compilewithout 1 1 6.957497 6.957497 8797 +computershav 1 1 6.957497 6.957497 8798 +tutoriali 1 1 6.957497 6.957497 8799 +onthread 1 1 6.957497 6.957497 8800 +checkth 1 1 6.957497 6.957497 8801 +ajava 1 1 6.957497 6.957497 8802 +afil 1 1 6.957497 6.957497 8803 +onelin 1 1 6.957497 6.957497 8804 +localor 1 1 6.957497 6.957497 8805 +csmon 1 1 6.957497 6.957497 8806 +cslast 1 1 6.957497 6.957497 8807 +beprocess 1 1 6.957497 6.957497 8808 +replacementalgorithm 1 1 6.957497 6.957497 8809 +statisticsdiscuss 1 1 6.957497 6.957497 8810 +psychologyth 1 1 6.957497 6.957497 8811 +thejavaprogram 1 1 6.957497 6.957497 8758 +anyquest 1 1 6.957497 6.957497 8812 +thetext 1 1 6.957497 6.957497 8813 +systemsbi 1 1 6.957497 6.957497 8814 +languagebi 1 1 6.957497 6.957497 8759 +gosl 1 1 6.957497 6.957497 8760 +specificationjava 1 1 6.957497 6.957497 8815 +documentationwatch 1 1 6.957497 6.957497 8816 +unixoper 1 1 6.957497 6.957497 8817 +anycomput 1 1 6.957497 6.957497 8818 +requireddata 1 1 6.957497 6.957497 8819 +involveprocess 1 1 6.957497 6.957497 8820 +butyou 1 1 6.957497 6.957497 8821 +vigor 1 1 6.957497 6.957497 8822 +punish 1 1 6.957497 6.957497 8823 +dateind 1 1 6.957497 6.957497 8824 +uniniti 1 1 6.957497 6.957497 8825 +runtimerath 1 1 6.957497 6.957497 8826 +byproduct 1 1 6.957497 6.957497 8827 +withlanguag 1 1 6.957497 6.957497 8828 +alwaysa 1 1 6.957497 6.957497 8829 +disloc 1 1 6.957497 6.957497 8830 +thetransit 1 1 6.957497 6.957497 8831 +amazingli 1 1 6.957497 6.957497 8832 +youalreadi 1 1 6.957497 6.957497 8833 +arefer 1 1 6.957497 6.957497 8834 +manuali 1 1 6.957497 6.957497 8835 +wayfrom 1 1 6.957497 6.957497 8836 +sophisticatedprogram 1 1 6.957497 6.957497 8837 +ofoth 1 1 6.957497 6.957497 8838 +niceonlin 1 1 6.957497 6.957497 8839 +tutorialabout 1 1 6.957497 6.957497 8840 +javaoct 1 1 6.957497 6.957497 8841 +synchronizationoct 1 1 6.957497 6.957497 8842 +schedulingoct 1 1 6.957497 6.957497 8843 +schedulingdec 1 1 6.957497 6.957497 8844 +systemsdec 1 1 6.957497 6.957497 8845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..dd52bbc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +object 1 138 1.945910 1.945910 79 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +solut 1 82 2.484907 2.484907 162 +tuesdai 1 73 2.639057 2.639057 219 +name 1 72 2.639057 2.639057 220 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +virtual 1 62 2.772589 2.772589 285 +space 1 57 2.890372 2.890372 310 +date 1 51 2.995732 2.995732 344 +discuss 1 45 3.135494 3.135494 399 +fridai 1 44 3.135494 3.135494 390 +textbook 1 44 3.135494 3.135494 397 +cach 1 41 3.218876 3.218876 432 +review 1 42 3.218876 3.218876 425 +procedur 1 36 3.367296 3.367296 488 +concurr 1 34 3.401197 3.401197 501 +survei 1 35 3.401197 3.401197 513 +global 1 34 3.401197 3.401197 520 +chapter 1 32 3.465736 3.465736 536 +secur 1 30 3.555348 3.555348 577 +synchron 1 29 3.583519 3.583519 588 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +monitor 1 17 4.110874 4.110874 941 +protect 1 17 4.110874 4.110874 935 +quiz 1 16 4.174387 4.174387 990 +pagec 1 15 4.248495 4.248495 1011 +remot 1 15 4.248495 4.248495 1041 +demand 1 14 4.317488 4.317488 1073 +quizz 1 13 4.382027 4.382027 1151 +translat 1 13 4.382027 4.382027 1164 +host 1 11 4.553877 4.553877 1306 +vernon 1 9 4.753590 4.753590 1556 +core 1 7 5.010635 5.010635 1809 +mutual 1 5 5.347108 5.347108 2418 +systemsfal 1 4 5.568345 5.568345 2683 +deadlock 1 4 5.568345 5.568345 2641 +thanksgiv 1 2 6.263398 6.263398 4185 +maryvernon 1 1 6.957497 6.957497 8846 +andkarunamuthiah 1 1 6.957497 6.957497 8847 +beinterchang 1 1 6.957497 6.957497 8848 +archiveapproxim 1 1 6.957497 6.957497 8849 +topicsweek 1 1 6.957497 6.957497 8850 +oftopicsreadingsep 1 1 6.957497 6.957497 8851 +processeschapt 1 1 6.957497 6.957497 8852 +threadschapt 1 1 6.957497 6.957497 8853 +exclusioncont 1 1 6.957497 6.957497 8854 +semaphorescont 1 1 6.957497 6.957497 8855 +summarycont 1 1 6.957497 6.957497 8856 +doct 1 1 6.957497 6.957497 8857 +schedulingchapt 1 1 6.957497 6.957497 8858 +tlbschapter 1 1 6.957497 6.957497 8859 +memorycont 1 1 6.957497 6.957497 8860 +systemschapt 1 1 6.957497 6.957497 8861 +directorieschapt 1 1 6.957497 6.957497 8862 +methodstbanov 1 1 6.957497 6.957497 8863 +reviewchapt 1 1 6.957497 6.957497 8864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..f06dfa01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +spring 1 131 2.079442 2.079442 88 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +decemb 1 80 2.564949 2.564949 215 +david 1 71 2.639057 2.639057 232 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +descript 1 64 2.772589 2.772589 271 +previou 1 62 2.772589 2.772589 290 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +sampl 1 53 2.944439 2.944439 339 +appoint 1 49 3.044522 3.044522 358 +get 1 46 3.091042 3.091042 380 +answer 1 45 3.135494 3.135494 391 +midterm 1 45 3.135494 3.135494 392 +error 1 40 3.258097 3.258097 449 +correct 1 38 3.295837 3.295837 462 +demonstr 1 24 3.761200 3.761200 694 +wood 1 11 4.553877 4.553877 1355 +deadlin 1 9 4.753590 4.753590 1502 +phil 1 5 5.347108 5.347108 2419 +mentor 1 4 5.568345 5.568345 2591 +atkinson 1 2 6.263398 6.263398 4722 +vhdl 1 1 6.957497 6.957497 8865 +mentorassign 1 1 6.957497 6.957497 8866 +projectthi 1 1 6.957497 6.957497 8867 +examsth 1 1 6.957497 6.957497 8868 +endterm 1 1 6.957497 6.957497 8869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..75406cf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +topic 1 114 2.197225 2.197225 110 +check 1 115 2.197225 2.197225 118 +code 1 108 2.197225 2.197225 116 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +chang 1 82 2.484907 2.484907 163 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +help 1 83 2.484907 2.484907 175 +resourc 1 81 2.484907 2.484907 172 +issu 1 78 2.564949 2.564949 211 +html 1 75 2.639057 2.639057 235 +handout 1 64 2.772589 2.772589 263 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +experi 1 64 2.772589 2.772589 283 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +discuss 1 45 3.135494 3.135494 399 +tutori 1 39 3.258097 3.258097 437 +tree 1 36 3.367296 3.367296 492 +print 1 34 3.401197 3.401197 503 +chapter 1 32 3.465736 3.465736 536 +yahoo 1 24 3.761200 3.761200 707 +instead 1 22 3.850148 3.850148 756 +exercis 1 19 4.007333 4.007333 842 +prerequisit 1 19 4.007333 4.007333 846 +thur 1 19 4.007333 4.007333 847 +sept 1 17 4.110874 4.110874 952 +ramakrishnan 1 16 4.174387 4.174387 972 +convent 1 14 4.317488 4.317488 1072 +raghu 1 12 4.465908 4.465908 1212 +tue 1 11 4.553877 4.553877 1308 +debugg 1 9 4.753590 4.753590 1493 +entri 1 8 4.875197 4.875197 1678 +minibas 1 4 5.568345 5.568345 2608 +dont 1 3 5.857933 5.857933 3473 +sybas 1 2 6.263398 6.263398 4723 +implementationc 1 1 6.957497 6.957497 8871 +implementationcours 1 1 6.957497 6.957497 8872 +assignmentoth 1 1 6.957497 6.957497 8873 +ingraham 1 1 6.957497 6.957497 8874 +xuemei 1 1 6.957497 6.957497 8875 +xbao 1 1 6.957497 6.957497 8870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..689e41e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +lectur 1 135 1.945910 1.945910 73 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +import 1 65 2.772589 2.772589 282 +wednesdai 1 64 2.772589 2.772589 261 +semest 1 58 2.890372 2.890372 312 +particular 1 51 2.995732 2.995732 352 +still 1 50 3.044522 3.044522 362 +discuss 1 45 3.135494 3.135494 399 +fridai 1 44 3.135494 3.135494 390 +probabl 1 40 3.258097 3.258097 455 +close 1 38 3.295837 3.295837 465 +taught 1 33 3.433987 3.433987 526 +option 1 30 3.555348 3.555348 575 +progress 1 28 3.610918 3.610918 598 +jeff 1 25 3.737670 3.737670 673 +lab 1 24 3.761200 3.761200 698 +cooper 1 22 3.850148 3.850148 757 +fact 1 21 3.912023 3.912023 780 +psycholog 1 15 4.248495 4.248495 1054 +naughton 1 10 4.653960 4.653960 1450 +russel 1 9 4.753590 4.753590 1507 +minibas 1 4 5.568345 5.568345 2608 +obvious 1 3 5.857933 5.857933 3474 +addinginform 1 1 6.957497 6.957497 8876 +meetingroom 1 1 6.957497 6.957497 8877 +labsfor 1 1 6.957497 6.957497 8878 +beenmov 1 1 6.957497 6.957497 8879 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..71400ca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +algorithm 1 162 1.791759 1.791759 57 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +book 1 99 2.302585 2.302585 131 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +descript 1 64 2.772589 2.772589 271 +organ 1 65 2.772589 2.772589 265 +archiv 1 49 3.044522 3.044522 364 +midterm 1 45 3.135494 3.135494 392 +graph 1 30 3.555348 3.555348 576 +reserv 1 20 3.951244 3.951244 808 +eric 1 19 4.007333 4.007333 870 +bill 1 11 4.553877 4.553877 1297 +appt 1 5 5.347108 5.347108 2312 +bach 1 4 5.568345 5.568345 2708 +fractal 1 3 5.857933 5.857933 3475 +behaviour 1 2 6.263398 6.263398 4724 +donaldson 1 1 6.957497 6.957497 8881 +raji 1 1 6.957497 6.957497 8880 +gopalakrishnan 1 1 6.957497 6.957497 8882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..aadae449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +introduct 1 126 2.079442 2.079442 87 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +intern 1 108 2.197225 2.197225 128 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +select 1 91 2.397895 2.397895 154 +pictur 1 89 2.397895 2.397895 160 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +refer 1 78 2.564949 2.564949 203 +mondai 1 77 2.564949 2.564949 206 +complet 1 77 2.564949 2.564949 208 +interfac 1 79 2.564949 2.564949 209 +free 1 73 2.639057 2.639057 224 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +content 1 59 2.833213 2.833213 302 +overview 1 56 2.890372 2.890372 323 +unix 1 58 2.890372 2.890372 308 +tabl 1 51 2.995732 2.995732 346 +archiv 1 49 3.044522 3.044522 364 +appoint 1 49 3.044522 3.044522 358 +adapt 1 46 3.091042 3.091042 387 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +review 1 42 3.218876 3.218876 425 +error 1 40 3.258097 3.258097 449 +form 1 39 3.258097 3.258097 443 +slide 1 38 3.295837 3.295837 467 +connect 1 37 3.332205 3.332205 485 +feel 1 37 3.332205 3.332205 483 +eduoffic 1 33 3.433987 3.433987 531 +richard 1 31 3.496508 3.496508 559 +option 1 30 3.555348 3.555348 575 +packag 1 28 3.610918 3.610918 614 +comp 1 26 3.688879 3.688879 650 +reliabl 1 25 3.737670 3.737670 674 +latest 1 21 3.912023 3.912023 785 +annot 1 21 3.912023 3.912023 775 +partial 1 18 4.060443 4.060443 900 +layer 1 17 4.110874 4.110874 926 +steven 1 17 4.110874 4.110874 953 +warn 1 14 4.317488 4.317488 1068 +prior 1 10 4.653960 4.653960 1438 +criteria 1 9 4.753590 4.753590 1477 +lawrenc 1 7 5.010635 5.010635 1908 +prentic 1 7 5.010635 5.010635 1838 +isbn 1 7 5.010635 5.010635 1901 +conveni 1 6 5.164786 5.164786 2088 +moder 1 6 5.164786 5.164786 2112 +landweb 1 3 5.857933 5.857933 3402 +hereto 1 3 5.857933 5.857933 3476 +gradingmidterm 1 3 5.857933 5.857933 3230 +statphon 1 2 6.263398 6.263398 4726 +socket 1 2 6.263398 6.263398 4725 +ipng 1 2 6.263398 6.263398 4727 +networksintroduct 1 1 6.957497 6.957497 8884 +readingsclick 1 1 6.957497 6.957497 8885 +networkingcours 1 1 6.957497 6.957497 8886 +madisoncours 1 1 6.957497 6.957497 8887 +informationlecturetim 1 1 6.957497 6.957497 8888 +mwfplace 1 1 6.957497 6.957497 8889 +statclass 1 1 6.957497 6.957497 8890 +listinstructor 1 1 6.957497 6.957497 8891 +landweberoffic 1 1 6.957497 6.957497 8892 +srinivasa 1 1 6.957497 6.957497 8893 +narayananoffic 1 1 6.957497 6.957497 8894 +teitelbaumoffic 1 1 6.957497 6.957497 8895 +naemail 1 1 6.957497 6.957497 8896 +powerpoint 1 1 6.957497 6.957497 8883 +garbler 1 1 6.957497 6.957497 8897 +bibliographyread 1 1 6.957497 6.957497 8898 +icmp 1 1 6.957497 6.957497 8899 +ospf 1 1 6.957497 6.957497 8900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..386a91b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +part 1 98 2.302585 2.302585 129 +solut 1 82 2.484907 2.484907 162 +homework 1 79 2.564949 2.564949 193 +descript 1 64 2.772589 2.772589 271 +robert 1 30 3.555348 3.555348 567 +option 1 30 3.555348 3.555348 575 +comp 1 26 3.688879 3.688879 650 +meyer 1 2 6.263398 6.263398 4728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..2b93a101 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +grade 1 90 2.397895 2.397895 142 +second 1 81 2.484907 2.484907 166 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +optim 1 79 2.564949 2.564949 197 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +wednesdai 1 64 2.772589 2.772589 261 +function 1 62 2.772589 2.772589 275 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +semest 1 58 2.890372 2.890372 312 +scientif 1 53 2.944439 2.944439 341 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +michael 1 35 3.401197 3.401197 514 +altern 1 26 3.688879 3.688879 641 +period 1 22 3.850148 3.850148 743 +siam 1 21 3.912023 3.912023 800 +theorem 1 21 3.912023 3.912023 786 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +differenti 1 17 4.110874 4.110874 921 +condit 1 16 4.174387 4.174387 975 +nonlinear 1 14 4.317488 4.317488 1107 +philadelphia 1 12 4.465908 4.465908 1244 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +mangasarian 1 9 4.753590 4.753590 1570 +criteria 1 9 4.753590 4.753590 1477 +exact 1 9 4.753590 4.753590 1509 +kurt 1 9 4.753590 4.753590 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +augment 1 5 5.347108 5.347108 2350 +wilei 1 4 5.568345 5.568345 2669 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +gradient 1 3 5.857933 5.857933 3479 +applicationsfal 1 2 6.263398 6.263398 4729 +bazaraa 1 2 6.263398 6.263398 4730 +sherali 1 2 6.263398 6.263398 4731 +shetti 1 2 6.263398 6.263398 4732 +athena 1 2 6.263398 6.263398 4733 +saddlepoint 1 2 6.263398 6.263398 4734 +dualiti 1 2 6.263398 6.263398 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..2b93a101 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +grade 1 90 2.397895 2.397895 142 +second 1 81 2.484907 2.484907 166 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +mondai 1 77 2.564949 2.564949 206 +optim 1 79 2.564949 2.564949 197 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +wednesdai 1 64 2.772589 2.772589 261 +function 1 62 2.772589 2.772589 275 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +semest 1 58 2.890372 2.890372 312 +scientif 1 53 2.944439 2.944439 341 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +michael 1 35 3.401197 3.401197 514 +altern 1 26 3.688879 3.688879 641 +period 1 22 3.850148 3.850148 743 +siam 1 21 3.912023 3.912023 800 +theorem 1 21 3.912023 3.912023 786 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +differenti 1 17 4.110874 4.110874 921 +condit 1 16 4.174387 4.174387 975 +nonlinear 1 14 4.317488 4.317488 1107 +philadelphia 1 12 4.465908 4.465908 1244 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +mangasarian 1 9 4.753590 4.753590 1570 +criteria 1 9 4.753590 4.753590 1477 +exact 1 9 4.753590 4.753590 1509 +kurt 1 9 4.753590 4.753590 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +augment 1 5 5.347108 5.347108 2350 +wilei 1 4 5.568345 5.568345 2669 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +gradient 1 3 5.857933 5.857933 3479 +applicationsfal 1 2 6.263398 6.263398 4729 +bazaraa 1 2 6.263398 6.263398 4730 +sherali 1 2 6.263398 6.263398 4731 +shetti 1 2 6.263398 6.263398 4732 +athena 1 2 6.263398 6.263398 4733 +saddlepoint 1 2 6.263398 6.263398 4734 +dualiti 1 2 6.263398 6.263398 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..ac84b6b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +professor 1 137 1.945910 1.945910 76 +postscript 1 131 2.079442 2.079442 90 +pleas 1 113 2.197225 2.197225 114 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +text 1 98 2.302585 2.302585 133 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +comment 1 93 2.397895 2.397895 146 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exampl 1 77 2.564949 2.564949 195 +html 1 75 2.639057 2.639057 235 +onlin 1 75 2.639057 2.639057 223 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +instruct 1 53 2.944439 2.944439 332 +suggest 1 53 2.944439 2.944439 331 +tutori 1 39 3.258097 3.258097 437 +manual 1 35 3.401197 3.401197 504 +print 1 34 3.401197 3.401197 503 +least 1 35 3.401197 3.401197 516 +initi 1 23 3.806662 3.806662 717 +half 1 21 3.912023 3.912023 776 +sept 1 17 4.110874 4.110874 952 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +devis 1 10 4.653960 4.653960 1451 +chan 1 7 5.010635 5.010635 1876 +yong 1 4 5.568345 5.568345 2809 +chee 1 3 5.857933 5.857933 3480 +mimic 1 2 6.263398 6.263398 4736 +cychan 1 2 6.263398 6.263398 4737 +qnet 1 1 6.957497 6.957497 8901 +devc 1 1 6.957497 6.957497 8902 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..317cfccc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +relat 1 139 1.945910 1.945910 68 +assign 1 135 1.945910 1.945910 66 +technolog 1 131 2.079442 2.079442 102 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +topic 1 114 2.197225 2.197225 110 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +complet 1 77 2.564949 2.564949 208 +knowledg 1 67 2.708050 2.708050 243 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +room 1 59 2.833213 2.833213 301 +major 1 56 2.890372 2.890372 315 +overview 1 56 2.890372 2.890372 323 +suggest 1 53 2.944439 2.944439 331 +much 1 52 2.995732 2.995732 349 +understand 1 47 3.091042 3.091042 384 +represent 1 35 3.401197 3.401197 512 +taken 1 31 3.496508 3.496508 555 +consid 1 29 3.583519 3.583519 590 +univ 1 28 3.610918 3.610918 617 +retriev 1 27 3.637586 3.637586 621 +tent 1 22 3.850148 3.850148 739 +minut 1 20 3.951244 3.951244 810 +thur 1 19 4.007333 4.007333 847 +seek 1 17 4.110874 4.110874 954 +sept 1 17 4.110874 4.110874 952 +onth 1 12 4.465908 4.465908 1218 +excit 1 11 4.553877 4.553877 1329 +underli 1 10 4.653960 4.653960 1410 +occur 1 9 4.753590 4.753590 1572 +compos 1 9 4.753590 4.753590 1527 +digest 1 7 5.010635 5.010635 1864 +machinelearn 1 6 5.164786 5.164786 2084 +proce 1 6 5.164786 5.164786 2114 +polit 1 6 5.164786 5.164786 2115 +anda 1 5 5.347108 5.347108 2416 +aboutth 1 4 5.568345 5.568345 2720 +thesear 1 3 5.857933 5.857933 3456 +uwisc 1 2 6.263398 6.263398 4738 +belew 1 2 6.263398 6.263398 4739 +knowledgerichard 1 1 6.957497 6.957497 8903 +belewvisit 1 1 6.957497 6.957497 8904 +professorc 1 1 6.957497 6.957497 8905 +departmentfal 1 1 6.957497 6.957497 8906 +acal 1 1 6.957497 6.957497 8907 +engrthi 1 1 6.957497 6.957497 8908 +coures 1 1 6.957497 6.957497 8909 +canse 1 1 6.957497 6.957497 8910 +asyllabu 1 1 6.957497 6.957497 8911 +mapof 1 1 6.957497 6.957497 8912 +semesterwil 1 1 6.957497 6.957497 8913 +infidel 1 1 6.957497 6.957497 8914 +hypermai 1 1 6.957497 6.957497 8915 +classrel 1 1 6.957497 6.957497 8916 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..6f1917bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +relat 1 139 1.945910 1.945910 68 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +sinc 1 90 2.397895 2.397895 159 +question 1 91 2.397895 2.397895 141 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +solut 1 82 2.484907 2.484907 162 +method 1 80 2.564949 2.564949 213 +orient 1 80 2.564949 2.564949 205 +good 1 77 2.564949 2.564949 200 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +name 1 72 2.639057 2.639057 220 +syllabu 1 67 2.708050 2.708050 247 +order 1 69 2.708050 2.708050 249 +complex 1 64 2.772589 2.772589 269 +organ 1 65 2.772589 2.772589 265 +plan 1 65 2.772589 2.772589 272 +handout 1 64 2.772589 2.772589 263 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +unix 1 58 2.890372 2.890372 308 +explor 1 58 2.890372 2.890372 324 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +numer 1 49 3.044522 3.044522 369 +frequent 1 49 3.044522 3.044522 367 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +edit 1 42 3.218876 3.218876 418 +might 1 41 3.218876 3.218876 426 +error 1 40 3.258097 3.258097 449 +tutori 1 39 3.258097 3.258097 437 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +approxim 1 35 3.401197 3.401197 509 +word 1 34 3.401197 3.401197 508 +chapter 1 32 3.465736 3.465736 536 +ask 1 28 3.610918 3.610918 597 +mind 1 27 3.637586 3.637586 632 +though 1 27 3.637586 3.637586 622 +subject 1 26 3.688879 3.688879 647 +session 1 26 3.688879 3.688879 643 +rather 1 26 3.688879 3.688879 642 +concern 1 25 3.737670 3.737670 666 +todai 1 25 3.737670 3.737670 672 +sometim 1 24 3.761200 3.761200 696 +wish 1 24 3.761200 3.761200 692 +begin 1 23 3.806662 3.806662 716 +tent 1 22 3.850148 3.850148 739 +cooper 1 22 3.850148 3.850148 757 +lower 1 18 4.060443 4.060443 886 +four 1 18 4.060443 4.060443 905 +condit 1 16 4.174387 4.174387 975 +score 1 15 4.248495 4.248495 1017 +carl 1 15 4.248495 4.248495 1024 +fortran 1 15 4.248495 4.248495 1027 +squar 1 14 4.317488 4.317488 1082 +matlab 1 14 4.317488 4.317488 1081 +doit 1 14 4.317488 4.317488 1111 +conduct 1 14 4.317488 4.317488 1065 +total 1 10 4.653960 4.653960 1398 +errata 1 10 4.653960 4.653960 1403 +mention 1 9 4.753590 4.753590 1569 +smile 1 7 5.010635 5.010635 1807 +slightli 1 7 5.010635 5.010635 1795 +awar 1 7 5.010635 5.010635 1800 +supplement 1 5 5.347108 5.347108 2355 +rick 1 4 5.568345 5.568345 2646 +areavail 1 4 5.568345 5.568345 2810 +andp 1 4 5.568345 5.568345 2811 +preprint 1 3 5.857933 5.857933 3481 +diari 1 2 6.263398 6.263398 4740 +residu 1 2 6.263398 6.263398 4741 +kermit 1 2 6.263398 6.263398 4742 +primer 1 2 6.263398 6.263398 4719 +overviewcours 1 2 6.263398 6.263398 4399 +linksyou 1 2 6.263398 6.263398 4743 +csdepart 1 2 6.263398 6.263398 4130 +methodsthi 1 1 6.957497 6.957497 8918 +orderli 1 1 6.957497 6.957497 8919 +assignmentson 1 1 6.957497 6.957497 8920 +numericalanalysi 1 1 6.957497 6.957497 8921 +foremostmathematician 1 1 6.957497 6.957497 8922 +trickytop 1 1 6.957497 6.957497 8923 +textmai 1 1 6.957497 6.957497 8924 +byaddit 1 1 6.957497 6.957497 8925 +capitallett 1 1 6.957497 6.957497 8926 +caselett 1 1 6.957497 6.957497 8927 +sigmon 1 1 6.957497 6.957497 8928 +reaction 1 1 6.957497 6.957497 8929 +telnet 1 1 6.957497 6.957497 8917 +winor 1 1 6.957497 6.957497 8930 +referenceviva 1 1 6.957497 6.957497 8931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..30485d4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +analysi 1 124 2.079442 2.079442 98 +version 1 113 2.197225 2.197225 122 +well 1 109 2.197225 2.197225 121 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +homework 1 79 2.564949 2.564949 193 +line 1 75 2.639057 2.639057 231 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +locat 1 59 2.833213 2.833213 303 +index 1 56 2.890372 2.890372 309 +explor 1 58 2.890372 2.890372 324 +numer 1 49 3.044522 3.044522 369 +math 1 44 3.135494 3.135494 402 +directori 1 45 3.135494 3.135494 396 +might 1 41 3.218876 3.218876 426 +announc 1 40 3.258097 3.258097 441 +post 1 35 3.401197 3.401197 505 +hard 1 30 3.555348 3.555348 563 +concern 1 25 3.737670 3.737670 666 +wish 1 24 3.761200 3.761200 692 +stat 1 17 4.110874 4.110874 924 +carl 1 15 4.248495 4.248495 1024 +doit 1 14 4.317488 4.317488 1111 +none 1 7 5.010635 5.010635 1811 +boor 1 3 5.857933 5.857933 3482 +deboor 1 2 6.263398 6.263398 4744 +linksyou 1 2 6.263398 6.263398 4743 +analysisthi 1 1 6.957497 6.957497 8932 +statlectur 1 1 6.957497 6.957497 8933 +classnot 1 1 6.957497 6.957497 8934 +viii 1 1 6.957497 6.957497 8935 +courseoff 1 1 6.957497 6.957497 8936 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..42dee5d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +note 1 142 1.945910 1.945910 67 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +math 1 44 3.135494 3.135494 402 +approxim 1 35 3.401197 3.401197 509 +theorythi 1 1 6.957497 6.957497 8937 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..ba5a3932 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +academ 1 82 2.484907 2.484907 178 +solut 1 82 2.484907 2.484907 162 +solv 1 73 2.639057 2.639057 234 +tuesdai 1 73 2.639057 2.639057 219 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +semest 1 58 2.890372 2.890372 312 +appoint 1 49 3.044522 3.044522 358 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +statist 1 35 3.401197 3.401197 521 +specifi 1 30 3.555348 3.555348 568 +comp 1 26 3.688879 3.688879 650 +tent 1 22 3.850148 3.850148 739 +walter 1 17 4.110874 4.110874 950 +sept 1 17 4.110874 4.110874 952 +stat 1 17 4.110874 4.110874 924 +misconduct 1 16 4.174387 4.174387 1003 +quiz 1 16 4.174387 4.174387 990 +borland 1 14 4.317488 4.317488 1067 +quizz 1 13 4.382027 4.382027 1151 +savitch 1 12 4.465908 4.465908 1269 +criteria 1 9 4.753590 4.753590 1477 +noland 1 5 5.347108 5.347108 2420 +anthoni 1 4 5.568345 5.568345 2792 +toni 1 3 5.857933 5.857933 3415 +textbookproblem 1 3 5.857933 5.857933 3483 +timet 1 3 5.857933 5.857933 3471 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +silva 1 2 6.263398 6.263398 4586 +chamberlin 1 2 6.263398 6.263398 4745 +sectioncsm 1 1 6.957497 6.957497 8939 +dsilva 1 1 6.957497 6.957497 8938 +firstdai 1 1 6.957497 6.957497 8940 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..b08c3a89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,433 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +imag 1 91 2.397895 2.397895 161 +grade 1 90 2.397895 2.397895 142 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +graphic 1 90 2.397895 2.397895 147 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +level 1 87 2.484907 2.484907 180 +activ 1 84 2.484907 2.484907 182 +control 1 82 2.484907 2.484907 164 +exam 1 86 2.484907 2.484907 169 +novemb 1 81 2.484907 2.484907 179 +requir 1 81 2.484907 2.484907 167 +journal 1 83 2.484907 2.484907 183 +thing 1 84 2.484907 2.484907 189 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +chang 1 82 2.484907 2.484907 163 +larg 1 82 2.484907 2.484907 168 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +help 1 83 2.484907 2.484907 175 +environ 1 84 2.484907 2.484907 177 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +mondai 1 77 2.564949 2.564949 206 +method 1 80 2.564949 2.564949 213 +homework 1 79 2.564949 2.564949 193 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +tuesdai 1 73 2.639057 2.639057 219 +onlin 1 75 2.639057 2.639057 223 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +appli 1 71 2.639057 2.639057 226 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +test 1 66 2.708050 2.708050 252 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +main 1 67 2.708050 2.708050 256 +wednesdai 1 64 2.772589 2.772589 261 +collect 1 65 2.772589 2.772589 268 +handout 1 64 2.772589 2.772589 263 +copi 1 63 2.772589 2.772589 284 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +experi 1 64 2.772589 2.772589 283 +abstract 1 62 2.772589 2.772589 276 +virtual 1 62 2.772589 2.772589 285 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +simpl 1 60 2.833213 2.833213 298 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +unix 1 58 2.890372 2.890372 308 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +cover 1 55 2.944439 2.944439 329 +talk 1 53 2.944439 2.944439 336 +date 1 51 2.995732 2.995732 344 +case 1 51 2.995732 2.995732 351 +finger 1 52 2.995732 2.995732 354 +digit 1 52 2.995732 2.995732 348 +format 1 48 3.044522 3.044522 356 +principl 1 48 3.044522 3.044522 357 +right 1 48 3.044522 3.044522 363 +approach 1 48 3.044522 3.044522 366 +basic 1 50 3.044522 3.044522 360 +numer 1 49 3.044522 3.044522 369 +visual 1 48 3.044522 3.044522 372 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +done 1 47 3.091042 3.091042 381 +fridai 1 44 3.135494 3.135494 390 +algebra 1 45 3.135494 3.135494 394 +midterm 1 45 3.135494 3.135494 392 +directori 1 45 3.135494 3.135494 396 +execut 1 45 3.135494 3.135494 404 +textbook 1 44 3.135494 3.135494 397 +video 1 44 3.135494 3.135494 405 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +vision 1 41 3.218876 3.218876 430 +linear 1 41 3.218876 3.218876 431 +york 1 41 3.218876 3.218876 435 +editor 1 41 3.218876 3.218876 433 +might 1 41 3.218876 3.218876 426 +edit 1 42 3.218876 3.218876 418 +fast 1 42 3.218876 3.218876 429 +howev 1 41 3.218876 3.218876 422 +examin 1 42 3.218876 3.218876 424 +probabl 1 40 3.258097 3.258097 455 +small 1 39 3.258097 3.258097 447 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +feel 1 37 3.332205 3.332205 483 +hand 1 37 3.332205 3.332205 475 +especi 1 36 3.367296 3.367296 496 +manual 1 35 3.401197 3.401197 504 +least 1 35 3.401197 3.401197 516 +print 1 34 3.401197 3.401197 503 +committe 1 34 3.401197 3.401197 522 +eduoffic 1 33 3.433987 3.433987 531 +board 1 33 3.433987 3.433987 528 +product 1 33 3.433987 3.433987 527 +chapter 1 32 3.465736 3.465736 536 +transform 1 32 3.465736 3.465736 542 +idea 1 32 3.465736 3.465736 545 +photo 1 31 3.496508 3.496508 561 +posit 1 31 3.496508 3.496508 552 +titl 1 31 3.496508 3.496508 556 +focu 1 30 3.555348 3.555348 571 +option 1 30 3.555348 3.555348 575 +produc 1 30 3.555348 3.555348 572 +particip 1 29 3.583519 3.583519 589 +except 1 28 3.610918 3.610918 607 +packag 1 28 3.610918 3.610918 614 +held 1 28 3.610918 3.610918 600 +ask 1 28 3.610918 3.610918 597 +determin 1 27 3.637586 3.637586 630 +detect 1 26 3.688879 3.688879 646 +enhanc 1 26 3.688879 3.688879 644 +altern 1 26 3.688879 3.688879 641 +relev 1 26 3.688879 3.688879 637 +fundament 1 25 3.737670 3.737670 661 +hill 1 25 3.737670 3.737670 670 +although 1 25 3.737670 3.737670 667 +motion 1 24 3.761200 3.761200 699 +wish 1 24 3.761200 3.761200 692 +pattern 1 24 3.761200 3.761200 689 +store 1 24 3.761200 3.761200 693 +other 1 24 3.761200 3.761200 697 +recognit 1 23 3.806662 3.806662 723 +head 1 23 3.806662 3.806662 732 +compress 1 23 3.806662 3.806662 719 +displai 1 23 3.806662 3.806662 712 +proof 1 23 3.806662 3.806662 720 +highli 1 23 3.806662 3.806662 725 +miscellan 1 23 3.806662 3.806662 731 +defin 1 22 3.850148 3.850148 746 +color 1 22 3.850148 3.850148 762 +instead 1 22 3.850148 3.850148 756 +william 1 22 3.850148 3.850148 765 +disk 1 22 3.850148 3.850148 747 +sent 1 22 3.850148 3.850148 763 +varieti 1 22 3.850148 3.850148 740 +recommend 1 22 3.850148 3.850148 737 +tell 1 21 3.912023 3.912023 777 +output 1 21 3.912023 3.912023 788 +fact 1 21 3.912023 3.912023 780 +wang 1 21 3.912023 3.912023 790 +entir 1 20 3.951244 3.951244 811 +sure 1 20 3.951244 3.951244 813 +toolkit 1 20 3.951244 3.951244 835 +prerequisit 1 19 4.007333 4.007333 846 +citi 1 19 4.007333 4.007333 874 +comparison 1 19 4.007333 4.007333 863 +boston 1 19 4.007333 4.007333 862 +dimension 1 18 4.060443 4.060443 909 +four 1 18 4.060443 4.060443 905 +account 1 18 4.060443 4.060443 882 +demo 1 18 4.060443 4.060443 888 +segment 1 17 4.110874 4.110874 931 +modif 1 17 4.110874 4.110874 913 +matrix 1 17 4.110874 4.110874 933 +estim 1 17 4.110874 4.110874 930 +stanford 1 17 4.110874 4.110874 955 +regular 1 17 4.110874 4.110874 929 +condit 1 16 4.174387 4.174387 975 +zhang 1 16 4.174387 4.174387 980 +earli 1 16 4.174387 4.174387 968 +sheet 1 16 4.174387 4.174387 973 +track 1 15 4.248495 4.248495 1029 +score 1 15 4.248495 4.248495 1017 +transit 1 15 4.248495 4.248495 1046 +goe 1 15 4.248495 4.248495 1044 +side 1 15 4.248495 4.248495 1022 +rate 1 15 4.248495 4.248495 1037 +chuck 1 14 4.317488 4.317488 1108 +scene 1 14 4.317488 4.317488 1114 +doit 1 14 4.317488 4.317488 1111 +save 1 14 4.317488 4.317488 1099 +manner 1 14 4.317488 4.317488 1074 +consider 1 14 4.317488 4.317488 1076 +command 1 14 4.317488 4.317488 1083 +matlab 1 14 4.317488 4.317488 1081 +primarili 1 13 4.382027 4.382027 1185 +block 1 13 4.382027 4.382027 1183 +convert 1 13 4.382027 4.382027 1122 +emac 1 13 4.382027 4.382027 1143 +everyon 1 13 4.382027 4.382027 1148 +calculu 1 12 4.465908 4.465908 1203 +overal 1 12 4.465908 4.465908 1254 +shape 1 12 4.465908 4.465908 1245 +count 1 12 4.465908 4.465908 1239 +optic 1 12 4.465908 4.465908 1221 +realiti 1 12 4.465908 4.465908 1272 +qualit 1 11 4.553877 4.553877 1362 +appl 1 11 4.553877 4.553877 1303 +modul 1 10 4.653960 4.653960 1434 +vista 1 10 4.653960 4.653960 1452 +mosaic 1 10 4.653960 4.653960 1426 +hint 1 10 4.653960 4.653960 1419 +queue 1 10 4.653960 4.653960 1386 +rapid 1 10 4.653960 4.653960 1453 +bring 1 10 4.653960 4.653960 1430 +dyer 1 9 4.753590 4.753590 1573 +recoveri 1 9 4.753590 4.753590 1474 +face 1 9 4.753590 4.753590 1501 +distanc 1 9 4.753590 4.753590 1500 +edg 1 8 4.875197 4.875197 1647 +depth 1 8 4.875197 4.875197 1636 +contrast 1 8 4.875197 4.875197 1637 +printer 1 8 4.875197 4.875197 1621 +job 1 8 4.875197 4.875197 1702 +convers 1 8 4.875197 4.875197 1673 +virginia 1 8 4.875197 4.875197 1659 +shade 1 7 5.010635 5.010635 1881 +stereo 1 7 5.010635 5.010635 1818 +prevent 1 7 5.010635 5.010635 1827 +corner 1 7 5.010635 5.010635 1909 +header 1 7 5.010635 5.010635 1787 +compact 1 7 5.010635 5.010635 1907 +signal 1 7 5.010635 5.010635 1910 +sweden 1 7 5.010635 5.010635 1885 +viewpoint 1 6 5.164786 5.164786 2116 +spline 1 6 5.164786 5.164786 2007 +gzip 1 6 5.164786 5.164786 2117 +invok 1 6 5.164786 5.164786 2079 +classroom 1 6 5.164786 5.164786 2006 +televis 1 6 5.164786 5.164786 2118 +spie 1 6 5.164786 5.164786 2119 +bryan 1 5 5.347108 5.347108 2421 +jain 1 5 5.347108 5.347108 2332 +mcgraw 1 5 5.347108 5.347108 2262 +rotat 1 5 5.347108 5.347108 2295 +adjust 1 5 5.347108 5.347108 2422 +button 1 5 5.347108 5.347108 2337 +constant 1 5 5.347108 5.347108 2251 +multiresolut 1 5 5.347108 5.347108 2423 +snake 1 5 5.347108 5.347108 2281 +sparcstat 1 5 5.347108 5.347108 2406 +shortest 1 5 5.347108 5.347108 2424 +grand 1 5 5.347108 5.347108 2425 +contour 1 4 5.568345 5.568345 2812 +sold 1 4 5.568345 5.568345 2813 +assignmentshomework 1 4 5.568345 5.568345 2721 +delet 1 4 5.568345 5.568345 2691 +shah 1 4 5.568345 5.568345 2814 +visionc 1 3 5.857933 5.857933 3489 +histogram 1 3 5.857933 5.857933 3490 +portrait 1 3 5.857933 5.857933 3491 +thin 1 3 5.857933 5.857933 3488 +gradient 1 3 5.857933 5.857933 3479 +surround 1 3 5.857933 5.857933 3492 +suen 1 3 5.857933 5.857933 3446 +pyramid 1 3 5.857933 5.857933 3358 +tran 1 3 5.857933 5.857933 3384 +faq 1 3 5.857933 5.857933 3216 +toolbox 1 3 5.857933 5.857933 3112 +quicktim 1 3 5.857933 5.857933 3493 +qbic 1 3 5.857933 5.857933 3294 +cardiff 1 3 5.857933 5.857933 3154 +visionfal 1 2 6.263398 6.263398 4749 +shoulder 1 2 6.263398 6.263398 4750 +skeleton 1 2 6.263398 6.263398 4225 +disappear 1 2 6.263398 6.263398 4748 +altogeth 1 2 6.263398 6.263398 4751 +comm 1 2 6.263398 6.263398 4746 +burt 1 2 6.263398 6.263398 4494 +supplementari 1 2 6.263398 6.263398 4752 +quota 1 2 6.263398 6.263398 4753 +laser 1 2 6.263398 6.263398 4747 +caution 1 2 6.263398 6.263398 4754 +khoro 1 2 6.263398 6.263398 4488 +cantata 1 2 6.263398 6.263398 4489 +panoram 1 2 6.263398 6.263398 4755 +royal 1 2 6.263398 6.263398 4756 +dyeroffic 1 1 6.957497 6.957497 8947 +csstelephon 1 1 6.957497 6.957497 8942 +appointmentteach 1 1 6.957497 6.957497 8948 +sooffic 1 1 6.957497 6.957497 8949 +appointmentstud 1 1 6.957497 6.957497 8950 +informationfundament 1 1 6.957497 6.957497 8951 +featuredetect 1 1 6.957497 6.957497 8952 +forreconstruct 1 1 6.957497 6.957497 8953 +usingtechniqu 1 1 6.957497 6.957497 8954 +asshap 1 1 6.957497 6.957497 8955 +andocclud 1 1 6.957497 6.957497 8956 +kasturi 1 1 6.957497 6.957497 8957 +schunck 1 1 6.957497 6.957497 8958 +readingsfrom 1 1 6.957497 6.957497 8959 +batchessupplementari 1 1 6.957497 6.957497 8960 +sourcesonlin 1 1 6.957497 6.957497 8961 +informationmost 1 1 6.957497 6.957497 8962 +urlhttp 1 1 6.957497 6.957497 8963 +byfirst 1 1 6.957497 6.957497 8964 +crop 1 1 6.957497 6.957497 8965 +theintens 1 1 6.957497 6.957497 8966 +thewindow 1 1 6.957497 6.957497 8967 +colorif 1 1 6.957497 6.957497 8968 +grayscal 1 1 6.957497 6.957497 8969 +transformationsav 1 1 6.957497 6.957497 8970 +andput 1 1 6.957497 6.957497 8971 +whereth 1 1 6.957497 6.957497 8972 +whatintens 1 1 6.957497 6.957497 8973 +qualityof 1 1 6.957497 6.957497 8974 +ownweb 1 1 6.957497 6.957497 8975 +infin 1 1 6.957497 6.957497 8976 +chessboard 1 1 6.957497 6.957497 8977 +ubyt 1 1 6.957497 6.957497 8943 +vconvert 1 1 6.957497 6.957497 8978 +clean 1 1 6.957497 6.957497 8979 +repn 1 1 6.957497 6.957497 8980 +component_interp 1 1 6.957497 6.957497 8981 +low_threshold 1 1 6.957497 6.957497 8982 +high_threshold 1 1 6.957497 6.957497 8983 +vlink 1 1 6.957497 6.957497 8984 +vsegedg 1 1 6.957497 6.957497 8985 +adelson 1 1 6.957497 6.957497 8941 +laplacian 1 1 6.957497 6.957497 8986 +kass 1 1 6.957497 6.957497 8987 +witkin 1 1 6.957497 6.957497 8988 +terzopoulo 1 1 6.957497 6.957497 8989 +curvatur 1 1 6.957497 6.957497 8990 +laserprint 1 1 6.957497 6.957497 8991 +imgstar 1 1 6.957497 6.957497 8944 +netpbm 1 1 6.957497 6.957497 8992 +pbmplu 1 1 6.957497 6.957497 8993 +wandel 1 1 6.957497 6.957497 8994 +hdtv 1 1 6.957497 6.957497 8945 +allianc 1 1 6.957497 6.957497 8995 +atsc 1 1 6.957497 6.957497 8946 +panoramix 1 1 6.957497 6.957497 8996 +decfac 1 1 6.957497 6.957497 8997 +synthet 1 1 6.957497 6.957497 8998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..9b94012f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +number 1 130 2.079442 2.079442 97 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +section 1 94 2.397895 2.397895 149 +octob 1 89 2.397895 2.397895 156 +novemb 1 81 2.484907 2.484907 179 +meet 1 72 2.639057 2.639057 229 +solv 1 73 2.639057 2.639057 234 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +algebra 1 45 3.135494 3.135494 394 +fridai 1 44 3.135494 3.135494 390 +compani 1 41 3.218876 3.218876 423 +known 1 24 3.761200 3.761200 702 +greg 1 24 3.761200 3.761200 695 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +style 1 15 4.248495 4.248495 1036 +dave 1 14 4.317488 4.317488 1098 +readi 1 12 4.465908 4.465908 1242 +informationemail 1 9 4.753590 4.753590 1564 +sharp 1 6 5.164786 5.164786 2100 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +objectivesvectra 1 3 5.857933 5.857933 3410 +homeclass 1 3 5.857933 5.857933 3411 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +consultantssyllabuswork 1 2 6.263398 6.263398 4579 +referenc 1 2 6.263398 6.263398 4757 +zimmermannemail 1 1 6.957497 6.957497 9000 +dzimm 1 1 6.957497 6.957497 8999 +educlass 1 1 6.957497 6.957497 9001 +nolandoffic 1 1 6.957497 6.957497 9002 +announcementsprogram 1 1 6.957497 6.957497 9003 +handoutsprogramsexam 1 1 6.957497 6.957497 9004 +quizzeslectur 1 1 6.957497 6.957497 9005 +notesgreg 1 1 6.957497 6.957497 9006 +guidegrad 1 1 6.957497 6.957497 9007 +quizzesprogramsexam 1 1 6.957497 6.957497 9008 +policytext 1 1 6.957497 6.957497 9009 +zimmermann 1 1 6.957497 6.957497 9010 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..b0442e05 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +spring 1 131 2.079442 2.079442 88 +instructor 1 108 2.197225 2.197225 107 +theori 1 111 2.197225 2.197225 127 +larg 1 82 2.484907 2.484907 168 +method 1 80 2.564949 2.564949 213 +januari 1 62 2.772589 2.772589 264 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +offer 1 43 3.178054 3.178054 414 +linear 1 41 3.218876 3.218876 431 +michael 1 35 3.401197 3.401197 514 +flow 1 24 3.761200 3.761200 700 +spars 1 16 4.174387 4.174387 989 +nonlinear 1 14 4.317488 4.317488 1107 +mangasarian 1 9 4.753590 4.753590 1570 +ferri 1 8 4.875197 4.875197 1715 +integ 1 8 4.875197 4.875197 1688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..5a2f6a86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +lectur 1 135 1.945910 1.945910 73 +hall 1 146 1.945910 1.945910 65 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +mondai 1 77 2.564949 2.564949 206 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +homework 1 79 2.564949 2.564949 193 +april 1 77 2.564949 2.564949 196 +orient 1 80 2.564949 2.564949 205 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +unix 1 58 2.890372 2.890372 308 +februari 1 54 2.944439 2.944439 328 +allow 1 53 2.944439 2.944439 333 +local 1 55 2.944439 2.944439 334 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +fridai 1 44 3.135494 3.135494 390 +directori 1 45 3.135494 3.135494 396 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +examin 1 42 3.218876 3.218876 424 +close 1 38 3.295837 3.295837 465 +cost 1 37 3.332205 3.332205 480 +workstat 1 37 3.332205 3.332205 479 +tree 1 36 3.367296 3.367296 492 +michael 1 35 3.401197 3.401197 514 +jame 1 35 3.401197 3.401197 507 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +repres 1 26 3.688879 3.688879 656 +session 1 26 3.688879 3.688879 643 +flow 1 24 3.761200 3.761200 700 +path 1 21 3.912023 3.912023 778 +minut 1 20 3.951244 3.951244 810 +prerequisit 1 19 4.007333 4.007333 846 +thoma 1 18 4.060443 4.060443 901 +appropri 1 18 4.060443 4.060443 883 +previous 1 17 4.110874 4.110874 923 +sheet 1 16 4.174387 4.174387 973 +solari 1 12 4.465908 4.465908 1238 +cycl 1 11 4.553877 4.553877 1335 +login 1 9 4.753590 4.753590 1550 +ferri 1 8 4.875197 4.875197 1715 +prentic 1 7 5.010635 5.010635 1838 +relax 1 6 5.164786 5.164786 2120 +shortest 1 5 5.347108 5.347108 2424 +freeman 1 4 5.568345 5.568345 2725 +convex 1 4 5.568345 5.568345 2807 +novic 1 4 5.568345 5.568345 2815 +cshrc 1 4 5.568345 5.568345 2759 +ahuja 1 3 5.857933 5.857933 3494 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +leei 1 2 6.263398 6.263398 4759 +equilibria 1 2 6.263398 6.263398 4760 +multicommod 1 2 6.263398 6.263398 4761 +gam 1 2 6.263398 6.263398 4758 +flowsspr 1 1 6.957497 6.957497 9011 +ravindra 1 1 6.957497 6.957497 9012 +magnanti 1 1 6.957497 6.957497 9013 +orlin 1 1 6.957497 6.957497 9014 +chvatal 1 1 6.957497 6.957497 9015 +simplex 1 1 6.957497 6.957497 9016 +alter 1 1 6.957497 6.957497 9017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..1d85d265 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +user 1 104 2.302585 2.302585 137 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +mondai 1 77 2.564949 2.564949 206 +optim 1 79 2.564949 2.564949 197 +homework 1 79 2.564949 2.564949 193 +april 1 77 2.564949 2.564949 196 +orient 1 80 2.564949 2.564949 205 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +direct 1 57 2.890372 2.890372 316 +space 1 57 2.890372 2.890372 310 +overview 1 56 2.890372 2.890372 323 +unix 1 58 2.890372 2.890372 308 +februari 1 54 2.944439 2.944439 328 +local 1 55 2.944439 2.944439 334 +allow 1 53 2.944439 2.944439 333 +instruct 1 53 2.944439 2.944439 332 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +math 1 44 3.135494 3.135494 402 +fridai 1 44 3.135494 3.135494 390 +textbook 1 44 3.135494 3.135494 397 +press 1 42 3.218876 3.218876 419 +edit 1 42 3.218876 3.218876 418 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +workstat 1 37 3.332205 3.332205 479 +michael 1 35 3.401197 3.401197 514 +least 1 35 3.401197 3.401197 516 +john 1 33 3.433987 3.433987 532 +storag 1 31 3.496508 3.496508 553 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +repres 1 26 3.688879 3.688879 656 +session 1 26 3.688879 3.688879 643 +strategi 1 25 3.737670 3.737670 682 +equat 1 23 3.806662 3.806662 724 +recommend 1 22 3.850148 3.850148 737 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +scheme 1 20 3.951244 3.951244 818 +minut 1 20 3.951244 3.951244 810 +prerequisit 1 19 4.007333 4.007333 846 +dimension 1 18 4.060443 4.060443 909 +matrix 1 17 4.110874 4.110874 933 +modif 1 17 4.110874 4.110874 913 +previous 1 17 4.110874 4.110874 923 +spars 1 16 4.174387 4.174387 989 +vector 1 16 4.174387 4.174387 961 +sheet 1 16 4.174387 4.174387 973 +finit 1 14 4.317488 4.317488 1106 +squar 1 14 4.317488 4.317488 1082 +nonlinear 1 14 4.317488 4.317488 1107 +matlab 1 14 4.317488 4.317488 1081 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +arithmet 1 10 4.653960 4.653960 1388 +elimin 1 9 4.753590 4.753590 1558 +ferri 1 8 4.875197 4.875197 1715 +solver 1 7 5.010635 5.010635 1911 +oxford 1 6 5.164786 5.164786 2121 +dens 1 6 5.164786 5.164786 2122 +pivot 1 5 5.347108 5.347108 2426 +consent 1 5 5.347108 5.347108 2389 +novic 1 4 5.568345 5.568345 2815 +golub 1 3 5.857933 5.857933 3265 +eigenvalu 1 3 5.857933 5.857933 3364 +eigenvector 1 3 5.857933 5.857933 3365 +systemsspr 1 2 6.263398 6.263398 4762 +leei 1 2 6.263398 6.263398 4759 +loan 1 2 6.263398 6.263398 4147 +gaussian 1 2 6.263398 6.263398 4763 +hopkinsunivers 1 1 6.957497 6.957497 9018 +duff 1 1 6.957497 6.957497 9019 +erisman 1 1 6.957497 6.957497 9020 +reid 1 1 6.957497 6.957497 9021 +halmo 1 1 6.957497 6.957497 9022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..61a1ae29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +start 1 83 2.484907 2.484907 173 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +get 1 46 3.091042 3.091042 380 +fridai 1 44 3.135494 3.135494 390 +examin 1 42 3.218876 3.218876 424 +richard 1 31 3.496508 3.496508 559 +charl 1 13 4.382027 4.382027 1149 +benjamin 1 11 4.553877 4.553877 1296 +regularli 1 11 4.553877 4.553877 1338 +tuth 1 9 4.753590 4.753590 1519 +cum 1 8 4.875197 4.875197 1619 +fischer 1 7 5.010635 5.010635 1893 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +krishna 1 3 5.857933 5.857933 3495 +compilersfal 1 2 6.263398 6.263398 4223 +csst 1 2 6.263398 6.263398 4764 +krisna 1 2 6.263398 6.263398 4765 +weyer 1 2 6.263398 6.263398 4558 +kunchithapadam 1 1 6.957497 6.957497 9023 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..eeeafe01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +object 1 138 1.945910 1.945910 79 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +section 1 94 2.397895 2.397895 149 +solv 1 73 2.639057 2.639057 234 +html 1 75 2.639057 2.639057 235 +polici 1 64 2.772589 2.772589 279 +publish 1 57 2.890372 2.890372 326 +algebra 1 45 3.135494 3.135494 394 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +must 1 40 3.258097 3.258097 442 +eduoffic 1 33 3.433987 3.433987 531 +greg 1 24 3.761200 3.761200 695 +known 1 24 3.761200 3.761200 702 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +informationemail 1 9 4.753590 4.753590 1564 +appt 1 5 5.347108 5.347108 2312 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +grader 1 3 5.857933 5.857933 3165 +krishna 1 3 5.857933 5.857933 3495 +objectivesvectra 1 3 5.857933 5.857933 3410 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +sharpemail 1 2 6.263398 6.263398 4766 +krisna 1 2 6.263398 6.263398 4765 +archivepolici 1 2 6.263398 6.263398 4580 +sharpgreg 1 2 6.263398 6.263398 4767 +kunchithapadamemail 1 1 6.957497 6.957497 9024 +edugener 1 1 6.957497 6.957497 9025 +consultantssyllabuscours 1 1 6.957497 6.957497 9026 +difficultywork 1 1 6.957497 6.957497 9027 +homenewsstartup 1 1 6.957497 6.957497 9028 +informationclass 1 1 6.957497 6.957497 9029 +noteshomeworkexam 1 1 6.957497 6.957497 9030 +quizzesstyl 1 1 6.957497 6.957497 9031 +guideemail 1 1 6.957497 6.957497 9032 +textproblem 1 1 6.957497 6.957497 9033 +porgrammingwalt 1 1 6.957497 6.957497 9034 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..5b26dbd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,160 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +compil 1 122 2.079442 2.079442 96 +introduct 1 126 2.079442 2.079442 87 +version 1 113 2.197225 2.197225 122 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +chang 1 82 2.484907 2.484907 163 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +requir 1 81 2.484907 2.484907 167 +stuff 1 87 2.484907 2.484907 171 +academ 1 82 2.484907 2.484907 178 +second 1 81 2.484907 2.484907 166 +want 1 79 2.564949 2.564949 199 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +window 1 68 2.708050 2.708050 242 +import 1 65 2.772589 2.772589 282 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +type 1 61 2.833213 2.833213 296 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +suggest 1 53 2.944439 2.944439 331 +sampl 1 53 2.944439 2.944439 339 +basic 1 50 3.044522 3.044522 360 +understand 1 47 3.091042 3.091042 384 +algebra 1 45 3.135494 3.135494 394 +directori 1 45 3.135494 3.135494 396 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +might 1 41 3.218876 3.218876 426 +compani 1 41 3.218876 3.218876 423 +announc 1 40 3.258097 3.258097 441 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +mean 1 37 3.332205 3.332205 477 +either 1 35 3.401197 3.401197 506 +within 1 33 3.433987 3.433987 525 +ad 1 32 3.465736 3.465736 544 +someth 1 31 3.496508 3.496508 554 +hard 1 30 3.555348 3.555348 563 +great 1 27 3.637586 3.637586 626 +administr 1 27 3.637586 3.637586 628 +consist 1 26 3.688879 3.688879 651 +valu 1 25 3.737670 3.737670 665 +consult 1 24 3.761200 3.761200 687 +togeth 1 23 3.806662 3.806662 714 +defin 1 22 3.850148 3.850148 746 +tent 1 22 3.850148 3.850148 739 +sure 1 20 3.951244 3.951244 813 +entir 1 20 3.951244 3.951244 811 +fine 1 20 3.951244 3.951244 822 +prepar 1 20 3.951244 3.951244 824 +definit 1 19 4.007333 4.007333 864 +attend 1 18 4.060443 4.060443 893 +attempt 1 17 4.110874 4.110874 917 +walter 1 17 4.110874 4.110874 950 +earli 1 16 4.174387 4.174387 968 +weslei 1 16 4.174387 4.174387 983 +misconduct 1 16 4.174387 4.174387 1003 +piec 1 15 4.248495 4.248495 1020 +style 1 15 4.248495 4.248495 1036 +shown 1 14 4.317488 4.317488 1080 +borland 1 14 4.317488 4.317488 1067 +bodi 1 13 4.382027 4.382027 1178 +everyth 1 13 4.382027 4.382027 1169 +quizz 1 13 4.382027 4.382027 1151 +outsid 1 12 4.465908 4.465908 1219 +insid 1 12 4.465908 4.465908 1262 +readi 1 12 4.465908 4.465908 1242 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +vectra 1 12 4.465908 4.465908 1267 +tue 1 11 4.553877 4.553877 1308 +chri 1 11 4.553877 4.553877 1311 +noth 1 11 4.553877 4.553877 1328 +extra 1 11 4.553877 4.553877 1312 +correspond 1 10 4.653960 4.653960 1382 +bring 1 10 4.653960 4.653960 1430 +declar 1 9 4.753590 4.753590 1526 +lane 1 8 4.875197 4.875197 1720 +matter 1 8 4.875197 4.875197 1627 +bottom 1 7 5.010635 5.010635 1906 +throughout 1 7 5.010635 5.010635 1871 +sharp 1 6 5.164786 5.164786 2100 +recogn 1 5 5.347108 5.347108 2302 +crucial 1 5 5.347108 5.347108 2384 +prog 1 4 5.568345 5.568345 2740 +shouldn 1 4 5.568345 5.568345 2606 +thumb 1 4 5.568345 5.568345 2816 +enumer 1 3 5.857933 5.857933 3244 +privat 1 3 5.857933 5.857933 3496 +bump 1 3 5.857933 5.857933 3497 +obsolet 1 3 5.857933 5.857933 3196 +freshman 1 3 5.857933 5.857933 3462 +chad 1 2 6.263398 6.263398 4768 +forgot 1 2 6.263398 6.263398 4769 +weaver 1 2 6.263398 6.263398 4770 +freshmen 1 2 6.263398 6.263398 4554 +disregard 1 2 6.263398 6.263398 4189 +tribbl 1 1 6.957497 6.957497 9035 +randomintinrang 1 1 6.957497 6.957497 9036 +uppercas 1 1 6.957497 6.957497 9037 +overwrit 1 1 6.957497 6.957497 9038 +discrep 1 1 6.957497 6.957497 9039 +solutionscours 1 1 6.957497 6.957497 9040 +vleck 1 1 6.957497 6.957497 9041 +guidelast 1 1 6.957497 6.957497 9042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..68417e24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +techniqu 1 99 2.302585 2.302585 138 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +librari 1 87 2.484907 2.484907 181 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +fridai 1 44 3.135494 3.135494 390 +examin 1 42 3.218876 3.218876 424 +late 1 40 3.258097 3.258097 439 +comp 1 26 3.688879 3.688879 650 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +month 1 15 4.248495 4.248495 1025 +psycholog 1 15 4.248495 4.248495 1054 +susan 1 15 4.248495 4.248495 1050 +stori 1 14 4.317488 4.317488 1087 +regularli 1 11 4.553877 4.553877 1338 +wendt 1 10 4.653960 4.653960 1446 +tuth 1 9 4.753590 4.753590 1519 +recit 1 9 4.753590 4.753590 1475 +fischer 1 7 5.010635 5.010635 1893 +horwitz 1 5 5.347108 5.347108 2411 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +ullman 1 4 5.568345 5.568345 2749 +rahul 1 3 5.857933 5.857933 3464 +compilersspr 1 2 6.263398 6.263398 4700 +kapoor 1 2 6.263398 6.263398 4701 +sethi 1 2 6.263398 6.263398 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..8e59604e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +academ 1 82 2.484907 2.484907 178 +mondai 1 77 2.564949 2.564949 206 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +semest 1 58 2.890372 2.890372 312 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +statist 1 35 3.401197 3.401197 521 +tent 1 22 3.850148 3.850148 739 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +savitch 1 12 4.465908 4.465908 1269 +criteria 1 9 4.753590 4.753590 1477 +hummert 1 3 5.857933 5.857933 3416 +textbookproblem 1 3 5.857933 5.857933 3483 +psych 1 3 5.857933 5.857933 3498 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +sectionsc 1 1 6.957497 6.957497 9043 +viewgraph 1 1 6.957497 6.957497 9044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..01319dda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +madison 1 165 1.791759 1.791759 55 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +number 1 130 2.079442 2.079442 97 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +thing 1 84 2.484907 2.484907 189 +mondai 1 77 2.564949 2.564949 206 +name 1 72 2.639057 2.639057 220 +thursdai 1 70 2.708050 2.708050 241 +copi 1 63 2.772589 2.772589 284 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +keep 1 44 3.135494 3.135494 409 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +mind 1 27 3.637586 3.637586 632 +stop 1 17 4.110874 4.110874 942 +choos 1 16 4.174387 4.174387 964 +doit 1 14 4.317488 4.317488 1111 +menu 1 13 4.382027 4.382027 1156 +bodner 1 5 5.347108 5.347108 2401 +eduher 1 3 5.857933 5.857933 3499 +infoc 1 2 6.263398 6.263398 4772 +jonb 1 2 6.263398 6.263398 4771 +mound 1 2 6.263398 6.263398 4773 +bodnersect 1 1 6.957497 6.957497 9045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..a6226b27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +teach 1 108 2.197225 2.197225 112 +book 1 99 2.302585 2.302585 131 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +sinc 1 90 2.397895 2.397895 159 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +exam 1 86 2.484907 2.484907 169 +learn 1 86 2.484907 2.484907 170 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +appear 1 78 2.564949 2.564949 210 +refer 1 78 2.564949 2.564949 203 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +line 1 75 2.639057 2.639057 231 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +previou 1 62 2.772589 2.772589 290 +plai 1 60 2.833213 2.833213 307 +best 1 59 2.833213 2.833213 299 +detail 1 57 2.890372 2.890372 321 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +give 1 50 3.044522 3.044522 359 +still 1 50 3.044522 3.044522 362 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +discuss 1 45 3.135494 3.135494 399 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +directori 1 45 3.135494 3.135494 396 +edit 1 42 3.218876 3.218876 418 +review 1 42 3.218876 3.218876 425 +late 1 40 3.258097 3.258097 439 +probabl 1 40 3.258097 3.258097 455 +game 1 36 3.367296 3.367296 498 +eduoffic 1 33 3.433987 3.433987 531 +within 1 33 3.433987 3.433987 525 +neural 1 30 3.555348 3.555348 578 +common 1 30 3.555348 3.555348 574 +turn 1 29 3.583519 3.583519 586 +usual 1 28 3.610918 3.610918 608 +session 1 26 3.688879 3.688879 643 +notic 1 25 3.737670 3.737670 675 +begin 1 23 3.806662 3.806662 716 +entir 1 20 3.951244 3.951244 811 +minut 1 20 3.951244 3.951244 810 +four 1 18 4.060443 4.060443 905 +lisp 1 18 4.060443 4.060443 897 +attend 1 18 4.060443 4.060443 893 +intro 1 17 4.110874 4.110874 915 +stat 1 17 4.110874 4.110874 924 +ultim 1 17 4.110874 4.110874 943 +modern 1 16 4.174387 4.174387 966 +later 1 15 4.248495 4.248495 1043 +prolog 1 13 4.382027 4.382027 1155 +count 1 12 4.465908 4.465908 1239 +deduct 1 12 4.465908 4.465908 1236 +alpha 1 11 4.553877 4.553877 1348 +engr 1 10 4.653960 4.653960 1427 +hint 1 10 4.653960 4.653960 1419 +russel 1 9 4.753590 4.753590 1507 +recit 1 9 4.753590 4.753590 1475 +beta 1 6 5.164786 5.164786 1993 +supplement 1 5 5.347108 5.347108 2355 +older 1 5 5.347108 5.347108 2387 +midnight 1 4 5.568345 5.568345 2599 +uncertain 1 4 5.568345 5.568345 2758 +graham 1 4 5.568345 5.568345 2817 +steel 1 4 5.568345 5.568345 2818 +kunen 1 3 5.857933 5.857933 3500 +ansi 1 3 5.857933 5.857933 3198 +psych 1 3 5.857933 5.857933 3498 +coursewil 1 2 6.263398 6.263398 4718 +loos 1 2 6.263398 6.263398 4774 +buti 1 2 6.263398 6.263398 4775 +assignmenti 1 2 6.263398 6.263398 4573 +sun 1 2 6.263398 6.263398 4490 +kunenoffic 1 1 6.957497 6.957497 9046 +buildingtelephon 1 1 6.957497 6.957497 9047 +thirdexam 1 1 6.957497 6.957497 9048 +thedai 1 1 6.957497 6.957497 9049 +usefulto 1 1 6.957497 6.957497 9050 +lecturesand 1 1 6.957497 6.957497 9051 +manypaperback 1 1 6.957497 6.957497 9052 +lispcraft 1 1 6.957497 6.957497 9053 +wilenski 1 1 6.957497 6.957497 9054 +norvig 1 1 6.957497 6.957497 9055 +essentiallli 1 1 6.957497 6.957497 9056 +alpha_beta 1 1 6.957497 6.957497 9057 +astar 1 1 6.957497 6.957497 9058 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..db782dd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +introduct 1 126 2.079442 2.079442 87 +report 1 131 2.079442 2.079442 92 +spring 1 131 2.079442 2.079442 88 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +question 1 91 2.397895 2.397895 141 +level 1 87 2.484907 2.484907 180 +solut 1 82 2.484907 2.484907 162 +novemb 1 81 2.484907 2.484907 179 +exam 1 86 2.484907 2.484907 169 +wide 1 84 2.484907 2.484907 185 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +decemb 1 80 2.564949 2.564949 215 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +content 1 59 2.833213 2.833213 302 +instruct 1 53 2.944439 2.944439 332 +talk 1 53 2.944439 2.944439 336 +tabl 1 51 2.995732 2.995732 346 +appoint 1 49 3.044522 3.044522 358 +give 1 50 3.044522 3.044522 359 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +mark 1 44 3.135494 3.135494 403 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +cach 1 41 3.218876 3.218876 432 +edit 1 42 3.218876 3.218876 418 +seminar 1 38 3.295837 3.295837 470 +cost 1 37 3.332205 3.332205 480 +eduoffic 1 33 3.433987 3.433987 531 +chapter 1 32 3.465736 3.465736 536 +hard 1 30 3.555348 3.555348 563 +full 1 28 3.610918 3.610918 615 +propos 1 28 3.610918 3.610918 602 +comp 1 26 3.688879 3.688879 650 +hill 1 25 3.737670 3.737670 670 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +interconnect 1 17 4.110874 4.110874 937 +doit 1 14 4.317488 4.317488 1111 +reader 1 12 4.465908 4.465908 1246 +patterson 1 9 4.753590 4.753590 1554 +qualifi 1 8 4.875197 4.875197 1721 +pipelin 1 7 5.010635 5.010635 1830 +hennessi 1 5 5.347108 5.347108 2289 +markhil 1 4 5.568345 5.568345 2819 +talluri 1 4 5.568345 5.568345 2820 +ifal 1 2 6.263398 6.263398 4776 +statphon 1 2 6.263398 6.263398 4726 +hilloffic 1 1 6.957497 6.957497 9059 +statemail 1 1 6.957497 6.957497 9060 +shenoffic 1 1 6.957497 6.957497 9061 +mshen 1 1 6.957497 6.957497 9062 +miscellaneawhat 1 1 6.957497 6.957497 9063 +talksread 1 1 6.957497 6.957497 9064 +solutionproject 1 1 6.957497 6.957497 9065 +noonmiscellanea 1 1 6.957497 6.957497 9066 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..60acd8bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +structur 1 106 2.197225 2.197225 105 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +academ 1 82 2.484907 2.484907 178 +exampl 1 77 2.564949 2.564949 195 +dynam 1 76 2.564949 2.564949 194 +solv 1 73 2.639057 2.639057 234 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +polici 1 64 2.772589 2.772589 279 +complex 1 64 2.772589 2.772589 269 +room 1 59 2.833213 2.833213 301 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +anoth 1 45 3.135494 3.135494 408 +late 1 40 3.258097 3.258097 439 +tutori 1 39 3.258097 3.258097 437 +microsoft 1 38 3.295837 3.295837 468 +procedur 1 36 3.367296 3.367296 488 +michael 1 35 3.401197 3.401197 514 +administr 1 27 3.637586 3.637586 628 +comp 1 26 3.688879 3.688879 650 +repres 1 26 3.688879 3.688879 656 +consult 1 24 3.761200 3.761200 687 +size 1 23 3.806662 3.806662 713 +alloc 1 20 3.951244 3.951244 821 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +psycholog 1 15 4.248495 4.248495 1054 +borland 1 14 4.317488 4.317488 1067 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +string 1 11 4.553877 4.553877 1340 +cheat 1 10 4.653960 4.653960 1395 +float 1 9 4.753590 4.753590 1504 +debugg 1 9 4.753590 4.753590 1493 +handin 1 5 5.347108 5.347108 2393 +ration 1 5 5.347108 5.347108 2427 +overload 1 5 5.347108 5.347108 2403 +birk 1 4 5.568345 5.568345 2791 +struct 1 4 5.568345 5.568345 2821 +mbirk 1 3 5.857933 5.857933 3501 +intstack 1 2 6.263398 6.263398 4777 +unlimit 1 2 6.263398 6.263398 4778 +classinfo 1 2 6.263398 6.263398 4779 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..35d1cdf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +second 1 81 2.484907 2.484907 166 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +mondai 1 77 2.564949 2.564949 206 +come 1 78 2.564949 2.564949 202 +exampl 1 77 2.564949 2.564949 195 +dynam 1 76 2.564949 2.564949 194 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +solv 1 73 2.639057 2.639057 234 +tuesdai 1 73 2.639057 2.639057 219 +test 1 66 2.708050 2.708050 252 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +polici 1 64 2.772589 2.772589 279 +complex 1 64 2.772589 2.772589 269 +room 1 59 2.833213 2.833213 301 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +instruct 1 53 2.944439 2.944439 332 +case 1 51 2.995732 2.995732 351 +digit 1 52 2.995732 2.995732 348 +week 1 52 2.995732 2.995732 343 +appoint 1 49 3.044522 3.044522 358 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +discuss 1 45 3.135494 3.135494 399 +anoth 1 45 3.135494 3.135494 408 +past 1 42 3.218876 3.218876 428 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +tutori 1 39 3.258097 3.258097 437 +microsoft 1 38 3.295837 3.295837 468 +soon 1 36 3.367296 3.367296 494 +procedur 1 36 3.367296 3.367296 488 +michael 1 35 3.401197 3.401197 514 +print 1 34 3.401197 3.401197 503 +administr 1 27 3.637586 3.637586 628 +comp 1 26 3.688879 3.688879 650 +repres 1 26 3.688879 3.688879 656 +consult 1 24 3.761200 3.761200 687 +size 1 23 3.806662 3.806662 713 +output 1 21 3.912023 3.912023 788 +alloc 1 20 3.951244 3.951244 821 +four 1 18 4.060443 4.060443 905 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +rank 1 14 4.317488 4.317488 1063 +borland 1 14 4.317488 4.317488 1067 +outsid 1 12 4.465908 4.465908 1219 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +eight 1 11 4.553877 4.553877 1331 +string 1 11 4.553877 4.553877 1340 +cheat 1 10 4.653960 4.653960 1395 +float 1 9 4.753590 4.753590 1504 +debugg 1 9 4.753590 4.753590 1493 +handin 1 5 5.347108 5.347108 2393 +ration 1 5 5.347108 5.347108 2427 +overload 1 5 5.347108 5.347108 2403 +birk 1 4 5.568345 5.568345 2791 +struct 1 4 5.568345 5.568345 2821 +mbirk 1 3 5.857933 5.857933 3501 +intstack 1 2 6.263398 6.263398 4777 +unlimit 1 2 6.263398 6.263398 4778 +classinfo 1 2 6.263398 6.263398 4779 +dice 1 1 6.957497 6.957497 9067 +hangman 1 1 6.957497 6.957497 9068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..a959e116 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +code 1 108 2.197225 2.197225 116 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +stuff 1 87 2.484907 2.484907 171 +help 1 83 2.484907 2.484907 175 +info 1 85 2.484907 2.484907 176 +academ 1 82 2.484907 2.484907 178 +solut 1 82 2.484907 2.484907 162 +refer 1 78 2.564949 2.564949 203 +david 1 71 2.639057 2.639057 232 +meet 1 72 2.639057 2.639057 229 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +window 1 68 2.708050 2.708050 242 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +share 1 59 2.833213 2.833213 304 +frequent 1 49 3.044522 3.044522 367 +archiv 1 49 3.044522 3.044522 364 +get 1 46 3.091042 3.091042 380 +press 1 42 3.218876 3.218876 419 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +statist 1 35 3.401197 3.401197 521 +often 1 31 3.496508 3.496508 551 +rule 1 26 3.688879 3.688879 638 +daili 1 24 3.761200 3.761200 706 +consult 1 24 3.761200 3.761200 687 +walter 1 17 4.110874 4.110874 950 +alreadi 1 16 4.174387 4.174387 963 +misconduct 1 16 4.174387 4.174387 1003 +psycholog 1 15 4.248495 4.248495 1054 +dave 1 14 4.317488 4.317488 1098 +floor 1 14 4.317488 4.317488 1070 +essenti 1 13 4.382027 4.382027 1137 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +andth 1 9 4.753590 4.753590 1481 +tutor 1 9 4.753590 4.753590 1552 +reload 1 8 4.875197 4.875197 1682 +attach 1 7 5.010635 5.010635 1785 +rough 1 6 5.164786 5.164786 2107 +button 1 5 5.347108 5.347108 2337 +noland 1 5 5.347108 5.347108 2420 +thumb 1 4 5.568345 5.568345 2816 +melski 1 2 6.263398 6.263398 4780 +melskithes 1 1 6.957497 6.957497 9069 +uncomfort 1 1 6.957497 6.957497 9070 +wesleypublish 1 1 6.957497 6.957497 9071 +usingborland 1 1 6.957497 6.957497 9072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..e121b077 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +section 1 94 2.397895 2.397895 149 +homepag 1 93 2.397895 2.397895 148 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +exam 1 86 2.484907 2.484907 169 +academ 1 82 2.484907 2.484907 178 +requir 1 81 2.484907 2.484907 167 +solut 1 82 2.484907 2.484907 162 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +decemb 1 80 2.564949 2.564949 215 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +syllabu 1 67 2.708050 2.708050 247 +main 1 67 2.708050 2.708050 256 +wednesdai 1 64 2.772589 2.772589 261 +guid 1 63 2.772589 2.772589 267 +creat 1 63 2.772589 2.772589 277 +function 1 62 2.772589 2.772589 275 +septemb 1 65 2.772589 2.772589 274 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +publish 1 57 2.890372 2.890372 326 +case 1 51 2.995732 2.995732 351 +appoint 1 49 3.044522 3.044522 358 +frequent 1 49 3.044522 3.044522 367 +standard 1 48 3.044522 3.044522 365 +algebra 1 45 3.135494 3.135494 394 +fridai 1 44 3.135494 3.135494 390 +textbook 1 44 3.135494 3.135494 397 +compani 1 41 3.218876 3.218876 423 +form 1 39 3.258097 3.258097 443 +respons 1 37 3.332205 3.332205 476 +survei 1 35 3.401197 3.401197 513 +eduoffic 1 33 3.433987 3.433987 531 +given 1 32 3.465736 3.465736 538 +ad 1 32 3.465736 3.465736 544 +often 1 31 3.496508 3.496508 551 +titl 1 31 3.496508 3.496508 556 +specifi 1 30 3.555348 3.555348 568 +turn 1 29 3.583519 3.583519 586 +releas 1 28 3.610918 3.610918 616 +todai 1 25 3.737670 3.737670 672 +valu 1 25 3.737670 3.737670 665 +consult 1 24 3.761200 3.761200 687 +known 1 24 3.761200 3.761200 702 +input 1 23 3.806662 3.806662 727 +martin 1 21 3.912023 3.912023 794 +latest 1 21 3.912023 3.912023 785 +sure 1 20 3.951244 3.951244 813 +coupl 1 17 4.110874 4.110874 939 +walter 1 17 4.110874 4.110874 950 +quiz 1 16 4.174387 4.174387 990 +sheet 1 16 4.174387 4.174387 973 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +style 1 15 4.248495 4.248495 1036 +score 1 15 4.248495 4.248495 1017 +introduc 1 13 4.382027 4.382027 1139 +onth 1 12 4.465908 4.465908 1218 +vectra 1 12 4.465908 4.465908 1267 +errata 1 10 4.653960 4.653960 1403 +minimum 1 9 4.753590 4.753590 1555 +remind 1 7 5.010635 5.010635 1799 +ethic 1 7 5.010635 5.010635 1786 +savitchaddison 1 5 5.347108 5.347108 2396 +struct 1 4 5.568345 5.568345 2821 +maximum 1 4 5.568345 5.568345 2632 +toth 1 4 5.568345 5.568345 2595 +beginn 1 3 5.857933 5.857933 3330 +milo 1 2 6.263398 6.263398 4781 +viru 1 2 6.263398 6.263398 4782 +psychologyinstructor 1 1 6.957497 6.957497 9075 +announcementsthi 1 1 6.957497 6.957497 9076 +scheduledfor 1 1 6.957497 6.957497 9077 +haseveryth 1 1 6.957497 6.957497 9078 +isaccur 1 1 6.957497 6.957497 9079 +withinform 1 1 6.957497 6.957497 9080 +linksar 1 1 6.957497 6.957497 9081 +onfridai 1 1 6.957497 6.957497 9082 +policyclass 1 1 6.957497 6.957497 9083 +bankaccount 1 1 6.957497 6.957497 9073 +minmax 1 1 6.957497 6.957497 9084 +findth 1 1 6.957497 6.957497 9085 +enteredfrom 1 1 6.957497 6.957497 9086 +stdin 1 1 6.957497 6.957497 9087 +formlett 1 1 6.957497 6.957497 9088 +theopen_fil 1 1 6.957497 6.957497 9089 +withprompt 1 1 6.957497 6.957497 9074 +hoax 1 1 6.957497 6.957497 9090 +andprofession 1 1 6.957497 6.957497 9091 +conductassign 1 1 6.957497 6.957497 9092 +questionar 1 1 6.957497 6.957497 9093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..22ffbab6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +code 1 108 2.197225 2.197225 116 +instructor 1 108 2.197225 2.197225 107 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +contain 1 81 2.484907 2.484907 174 +messag 1 76 2.564949 2.564949 212 +april 1 77 2.564949 2.564949 196 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +syllabu 1 67 2.708050 2.708050 247 +januari 1 62 2.772589 2.772589 264 +polici 1 64 2.772589 2.772589 279 +march 1 61 2.833213 2.833213 295 +share 1 59 2.833213 2.833213 304 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +februari 1 54 2.944439 2.944439 328 +talk 1 53 2.944439 2.944439 336 +run 1 51 2.995732 2.995732 347 +appoint 1 49 3.044522 3.044522 358 +algebra 1 45 3.135494 3.135494 394 +made 1 44 3.135494 3.135494 398 +late 1 40 3.258097 3.258097 439 +statist 1 35 3.401197 3.401197 521 +rule 1 26 3.688879 3.688879 638 +sent 1 22 3.850148 3.850148 763 +tent 1 22 3.850148 3.850148 739 +martin 1 21 3.912023 3.912023 794 +walter 1 17 4.110874 4.110874 950 +former 1 17 4.110874 4.110874 956 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +extra 1 11 4.553877 4.553877 1312 +criteria 1 9 4.753590 4.753590 1477 +absolut 1 8 4.875197 4.875197 1646 +calendar 1 8 4.875197 4.875197 1649 +informationc 1 5 5.347108 5.347108 2394 +noland 1 5 5.347108 5.347108 2420 +thumb 1 4 5.568345 5.568345 2816 +textbookproblem 1 3 5.857933 5.857933 3483 +ream 1 2 6.263398 6.263398 4783 +mream 1 2 6.263398 6.263398 4784 +csst 1 2 6.263398 6.263398 4764 +classc 1 1 6.957497 6.957497 9094 +programmingspr 1 1 6.957497 6.957497 9095 +nothingeverydai 1 1 6.957497 6.957497 9096 +pagescommon 1 1 6.957497 6.957497 9097 +programmingmistakesarch 1 1 6.957497 6.957497 9098 +placeto 1 1 6.957497 6.957497 9099 +announcedcours 1 1 6.957497 6.957497 9100 +andborland 1 1 6.957497 6.957497 9101 +academicmisconduct 1 1 6.957497 6.957497 9102 +anyform 1 1 6.957497 6.957497 9103 +bigtodd 1 1 6.957497 6.957497 9104 +thielwendi 1 1 6.957497 6.957497 9105 +staatsabout 1 1 6.957497 6.957497 9106 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..e3d6a1a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +novemb 1 81 2.484907 2.484907 179 +stuff 1 87 2.484907 2.484907 171 +academ 1 82 2.484907 2.484907 178 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +tuesdai 1 73 2.639057 2.639057 219 +line 1 75 2.639057 2.639057 231 +solv 1 73 2.639057 2.639057 234 +syllabu 1 67 2.708050 2.708050 247 +import 1 65 2.772589 2.772589 282 +function 1 62 2.772589 2.772589 275 +polici 1 64 2.772589 2.772589 279 +publish 1 57 2.890372 2.890372 326 +sampl 1 53 2.944439 2.944439 339 +week 1 52 2.995732 2.995732 343 +appoint 1 49 3.044522 3.044522 358 +understand 1 47 3.091042 3.091042 384 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +compani 1 41 3.218876 3.218876 423 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +hand 1 37 3.332205 3.332205 475 +eduoffic 1 33 3.433987 3.433987 531 +extend 1 32 3.465736 3.465736 539 +taken 1 31 3.496508 3.496508 555 +comp 1 26 3.688879 3.688879 650 +mike 1 24 3.761200 3.761200 703 +greg 1 24 3.761200 3.761200 695 +known 1 24 3.761200 3.761200 702 +miss 1 19 4.007333 4.007333 866 +stat 1 17 4.110874 4.110874 924 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +near 1 14 4.317488 4.317488 1091 +borland 1 14 4.317488 4.317488 1067 +everyth 1 13 4.382027 4.382027 1169 +rememb 1 12 4.465908 4.465908 1217 +vectra 1 12 4.465908 4.465908 1267 +fill 1 11 4.553877 4.553877 1349 +deadlin 1 9 4.753590 4.753590 1502 +didn 1 9 4.753590 4.753590 1563 +informationemail 1 9 4.753590 4.753590 1564 +bottom 1 7 5.010635 5.010635 1906 +ifyou 1 6 5.164786 5.164786 1992 +sharp 1 6 5.164786 5.164786 2100 +clarif 1 5 5.347108 5.347108 2253 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +steel 1 4 5.568345 5.568345 2818 +programmingassign 1 3 5.857933 5.857933 3398 +homeclass 1 3 5.857933 5.857933 3411 +nolandinstructor 1 2 6.263398 6.263398 4785 +steeleemail 1 1 6.957497 6.957497 9108 +msteel 1 1 6.957497 6.957497 9107 +buildingoffic 1 1 6.957497 6.957497 9109 +soffic 1 1 6.957497 6.957497 9110 +announcementsi 1 1 6.957497 6.957497 9111 +thenew 1 1 6.957497 6.957497 9112 +informationmidterm 1 1 6.957497 6.957497 9113 +pastfew 1 1 6.957497 6.957497 9114 +gloss 1 1 6.957497 6.957497 9115 +makefulli 1 1 6.957497 6.957497 9116 +objectivesabout 1 1 6.957497 6.957497 9117 +consultantscours 1 1 6.957497 6.957497 9118 +assignmentsnot 1 1 6.957497 6.957497 9119 +handoutsprogram 1 1 6.957497 6.957497 9120 +assignmentsexam 1 1 6.957497 6.957497 9121 +quizzessom 1 1 6.957497 6.957497 9122 +examplespolici 1 1 6.957497 6.957497 9123 +policyus 1 1 6.957497 6.957497 9124 +pagesintroduct 1 1 6.957497 6.957497 9125 +windowsintroduct 1 1 6.957497 6.957497 9126 +styleguid 1 1 6.957497 6.957497 9127 +codetextproblem 1 1 6.957497 6.957497 9128 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..121ccc11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +class 1 199 1.609438 1.609438 37 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +welcom 1 122 2.079442 2.079442 99 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +topic 1 114 2.197225 2.197225 110 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +mani 1 92 2.397895 2.397895 150 +exam 1 86 2.484907 2.484907 169 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +second 1 81 2.484907 2.484907 166 +good 1 77 2.564949 2.564949 200 +decemb 1 80 2.564949 2.564949 215 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +onlin 1 75 2.639057 2.639057 223 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +test 1 66 2.708050 2.708050 252 +knowledg 1 67 2.708050 2.708050 243 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +copi 1 63 2.772589 2.772589 284 +result 1 65 2.772589 2.772589 281 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +guid 1 63 2.772589 2.772589 267 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +think 1 57 2.890372 2.890372 314 +sampl 1 53 2.944439 2.944439 339 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +done 1 47 3.091042 3.091042 381 +made 1 44 3.135494 3.135494 398 +anoth 1 45 3.135494 3.135494 408 +mark 1 44 3.135494 3.135494 403 +answer 1 45 3.135494 3.135494 391 +error 1 40 3.258097 3.258097 449 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +feel 1 37 3.332205 3.332205 483 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +rang 1 30 3.555348 3.555348 565 +administr 1 27 3.637586 3.637586 628 +session 1 26 3.688879 3.688879 643 +todai 1 25 3.737670 3.737670 672 +notic 1 25 3.737670 3.737670 675 +consult 1 24 3.761200 3.761200 687 +tent 1 22 3.850148 3.850148 739 +sure 1 20 3.951244 3.951244 813 +andrew 1 19 4.007333 4.007333 849 +five 1 19 4.007333 4.007333 841 +seem 1 18 4.060443 4.060443 899 +modif 1 17 4.110874 4.110874 913 +walter 1 17 4.110874 4.110874 950 +alreadi 1 16 4.174387 4.174387 963 +quiz 1 16 4.174387 4.174387 990 +score 1 15 4.248495 4.248495 1017 +style 1 15 4.248495 4.248495 1036 +borland 1 14 4.317488 4.317488 1067 +everyon 1 13 4.382027 4.382027 1148 +verifi 1 12 4.465908 4.465908 1261 +minor 1 12 4.465908 4.465908 1237 +rememb 1 12 4.465908 4.465908 1217 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +extra 1 11 4.553877 4.553877 1312 +calendar 1 8 4.875197 4.875197 1649 +carefulli 1 6 5.164786 5.164786 2045 +ahead 1 5 5.347108 5.347108 2338 +crazi 1 4 5.568345 5.568345 2822 +webpag 1 4 5.568345 5.568345 2660 +infoc 1 2 6.263398 6.263398 4772 +prock 1 2 6.263398 6.263398 4786 +sessionalgebra 1 1 6.957497 6.957497 9129 +prockemail 1 1 6.957497 6.957497 9130 +thgrader 1 1 6.957497 6.957497 9131 +haihong 1 1 6.957497 6.957497 9132 +wangemail 1 1 6.957497 6.957497 9133 +mtwrannounc 1 1 6.957497 6.957497 9134 +gotton 1 1 6.957497 6.957497 9135 +perus 1 1 6.957497 6.957497 9136 +assing 1 1 6.957497 6.957497 9137 +misconductcours 1 1 6.957497 6.957497 9138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..94259313 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +search 1 95 2.397895 2.397895 155 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +stuff 1 87 2.484907 2.484907 171 +resourc 1 81 2.484907 2.484907 172 +internet 1 83 2.484907 2.484907 186 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +addit 1 74 2.639057 2.639057 228 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +virtual 1 62 2.772589 2.772589 285 +visit 1 63 2.772589 2.772589 288 +type 1 61 2.833213 2.833213 296 +best 1 59 2.833213 2.833213 299 +semest 1 58 2.890372 2.890372 312 +index 1 56 2.890372 2.890372 309 +space 1 57 2.890372 2.890372 310 +maintain 1 51 2.995732 2.995732 342 +without 1 50 3.044522 3.044522 370 +archiv 1 49 3.044522 3.044522 364 +might 1 41 3.218876 3.218876 426 +everi 1 34 3.401197 3.401197 519 +word 1 34 3.401197 3.401197 508 +post 1 35 3.401197 3.401197 505 +quot 1 29 3.583519 3.583519 582 +usual 1 28 3.610918 3.610918 608 +compress 1 23 3.806662 3.806662 719 +sent 1 22 3.850148 3.850148 763 +disk 1 22 3.850148 3.850148 747 +try 1 22 3.850148 3.850148 764 +command 1 14 4.317488 4.317488 1083 +keyword 1 11 4.553877 4.553877 1356 +enter 1 10 4.653960 4.653960 1454 +paragraph 1 10 4.653960 4.653960 1449 +tourist 1 8 4.875197 4.875197 1710 +usenet 1 7 5.010635 5.010635 1839 +nine 1 6 5.164786 5.164786 2047 +ignor 1 5 5.347108 5.347108 2288 +clickher 1 5 5.347108 5.347108 2428 +kelli 1 4 5.568345 5.568345 2793 +backup 1 4 5.568345 5.568345 2645 +oracl 1 4 5.568345 5.568345 2823 +ratliff 1 3 5.857933 5.857933 3419 +badger 1 3 5.857933 5.857933 3502 +sharewar 1 3 5.857933 5.857933 3503 +freewar 1 3 5.857933 5.857933 3504 +shuttl 1 2 6.263398 6.263398 4787 +clickabl 1 2 6.263398 6.263398 4788 +herald 1 2 6.263398 6.263398 4789 +biggest 1 2 6.263398 6.263398 4790 +desautel 1 2 6.263398 6.263398 4791 +wildcard 1 1 6.957497 6.957497 9140 +filesviru 1 1 6.957497 6.957497 9141 +faqfun 1 1 6.957497 6.957497 9142 +mapth 1 1 6.957497 6.957497 9143 +comicshumor 1 1 6.957497 6.957497 9144 +abort 1 1 6.957497 6.957497 9145 +retri 1 1 6.957497 6.957497 9146 +usersfin 1 1 6.957497 6.957497 9147 +weeklab 1 1 6.957497 6.957497 9148 +jokesget 1 1 6.957497 6.957497 9149 +tryingsom 1 1 6.957497 6.957497 9150 +somecompress 1 1 6.957497 6.957497 9151 +unpack 1 1 6.957497 6.957497 9152 +reviewsom 1 1 6.957497 6.957497 9153 +simtel 1 1 6.957497 6.957497 9139 +minclud 1 1 6.957497 6.957497 9154 +infocompress 1 1 6.957497 6.957497 9155 +infofavorit 1 1 6.957497 6.957497 9156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..d0849252 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +like 1 132 1.945910 1.945910 81 +lectur 1 135 1.945910 1.945910 73 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +build 1 85 2.484907 2.484907 184 +novemb 1 81 2.484907 2.484907 179 +academ 1 82 2.484907 2.484907 178 +come 1 78 2.564949 2.564949 202 +mondai 1 77 2.564949 2.564949 206 +solv 1 73 2.639057 2.639057 234 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +room 1 59 2.833213 2.833213 301 +semest 1 58 2.890372 2.890372 312 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +compani 1 41 3.218876 3.218876 423 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +game 1 36 3.367296 3.367296 498 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +except 1 28 3.610918 3.610918 607 +although 1 25 3.737670 3.737670 667 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +quizz 1 13 4.382027 4.382027 1151 +vectra 1 12 4.465908 4.465908 1267 +savitch 1 12 4.465908 4.465908 1269 +sundai 1 10 4.653960 4.653960 1387 +russel 1 9 4.753590 4.753590 1507 +saturdai 1 7 5.010635 5.010635 1794 +footbal 1 7 5.010635 5.010635 1912 +prioriti 1 7 5.010635 5.010635 1792 +none 1 7 5.010635 5.010635 1811 +basement 1 4 5.568345 5.568345 2663 +man 1 3 5.857933 5.857933 3417 +csc 1 3 5.857933 5.857933 3183 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +russ 1 1 6.957497 6.957497 9157 +manningemail 1 1 6.957497 6.957497 9158 +rman 1 1 6.957497 6.957497 9159 +scienceoffic 1 1 6.957497 6.957497 9160 +rotc 1 1 6.957497 6.957497 9161 +textold 1 1 6.957497 6.957497 9162 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..18eacc2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +techniqu 1 99 2.302585 2.302585 138 +section 1 94 2.397895 2.397895 149 +search 1 95 2.397895 2.397895 155 +academ 1 82 2.484907 2.484907 178 +learn 1 86 2.484907 2.484907 170 +exam 1 86 2.484907 2.484907 169 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +content 1 59 2.833213 2.833213 302 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +principl 1 48 3.044522 3.044522 357 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +offer 1 43 3.178054 3.178054 414 +autom 1 41 3.218876 3.218876 434 +vision 1 41 3.218876 3.218876 430 +seminar 1 38 3.295837 3.295837 470 +game 1 36 3.367296 3.367296 498 +robot 1 36 3.367296 3.367296 497 +represent 1 35 3.401197 3.401197 512 +semant 1 29 3.583519 3.583519 587 +rule 1 26 3.688879 3.688879 638 +frame 1 24 3.761200 3.761200 684 +motion 1 24 3.761200 3.761200 699 +expert 1 20 3.951244 3.951244 833 +mostli 1 19 4.007333 4.007333 869 +lisp 1 18 4.060443 4.060443 897 +biologi 1 15 4.248495 4.248495 1049 +chuck 1 14 4.317488 4.317488 1108 +prolog 1 13 4.382027 4.382027 1155 +deduct 1 12 4.465908 4.465908 1236 +readabl 1 12 4.465908 4.465908 1258 +alpha 1 11 4.553877 4.553877 1348 +wendt 1 10 4.653960 4.653960 1446 +dyer 1 9 4.753590 4.753590 1573 +qualifi 1 8 4.875197 4.875197 1721 +predic 1 7 5.010635 5.010635 1806 +beta 1 6 5.164786 5.164786 1993 +jude 1 6 5.164786 5.164786 2123 +extern 1 6 5.164786 5.164786 2105 +connectionist 1 5 5.347108 5.347108 2430 +shavlik 1 5 5.347108 5.347108 2429 +bryan 1 5 5.347108 5.347108 2421 +sabbat 1 4 5.568345 5.568345 2824 +kunen 1 3 5.857933 5.857933 3500 +thefal 1 1 6.957497 6.957497 9163 +assumedprerequisit 1 1 6.957497 6.957497 9164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..695e8f2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,217 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +question 1 91 2.397895 2.397895 141 +learn 1 86 2.484907 2.484907 170 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +start 1 83 2.484907 2.484907 173 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +ieee 1 86 2.484907 2.484907 190 +institut 1 84 2.484907 2.484907 187 +help 1 83 2.484907 2.484907 175 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +homework 1 79 2.564949 2.564949 193 +server 1 76 2.564949 2.564949 204 +refer 1 78 2.564949 2.564949 203 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +line 1 75 2.639057 2.639057 231 +write 1 72 2.639057 2.639057 222 +syllabu 1 67 2.708050 2.708050 247 +knowledg 1 67 2.708050 2.708050 243 +wednesdai 1 64 2.772589 2.772589 261 +artifici 1 63 2.772589 2.772589 280 +januari 1 62 2.772589 2.772589 264 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +march 1 61 2.833213 2.833213 295 +best 1 59 2.833213 2.833213 299 +content 1 59 2.833213 2.833213 302 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +sever 1 56 2.890372 2.890372 322 +suggest 1 53 2.944439 2.944439 331 +februari 1 54 2.944439 2.944439 328 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +week 1 52 2.995732 2.995732 343 +tabl 1 51 2.995732 2.995732 346 +archiv 1 49 3.044522 3.044522 364 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +edit 1 42 3.218876 3.218876 418 +author 1 39 3.258097 3.258097 450 +late 1 40 3.258097 3.258097 439 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +connect 1 37 3.332205 3.332205 485 +workstat 1 37 3.332205 3.332205 479 +tree 1 36 3.367296 3.367296 492 +next 1 34 3.401197 3.401197 517 +bibliographi 1 34 3.401197 3.401197 518 +manual 1 35 3.401197 3.401197 504 +print 1 34 3.401197 3.401197 503 +articl 1 33 3.433987 3.433987 530 +chapter 1 32 3.465736 3.465736 536 +concept 1 32 3.465736 3.465736 537 +idea 1 32 3.465736 3.465736 545 +neural 1 30 3.555348 3.555348 578 +common 1 30 3.555348 3.555348 574 +measur 1 28 3.610918 3.610918 609 +progress 1 28 3.610918 3.610918 598 +ask 1 28 3.610918 3.610918 597 +compar 1 26 3.688879 3.688879 648 +experiment 1 26 3.688879 3.688879 645 +comp 1 26 3.688879 3.688879 650 +lab 1 24 3.761200 3.761200 698 +lead 1 23 3.806662 3.806662 718 +methodolog 1 23 3.806662 3.806662 733 +decis 1 23 3.806662 3.806662 728 +instead 1 22 3.850148 3.850148 756 +dai 1 22 3.850148 3.850148 753 +sure 1 20 3.951244 3.951244 813 +department 1 20 3.951244 3.951244 839 +feedback 1 19 4.007333 4.007333 854 +definit 1 19 4.007333 4.007333 864 +five 1 19 4.007333 4.007333 841 +mostli 1 19 4.007333 4.007333 869 +benchmark 1 19 4.007333 4.007333 859 +accept 1 18 4.060443 4.060443 879 +behavior 1 18 4.060443 4.060443 881 +lisp 1 18 4.060443 4.060443 897 +stat 1 17 4.110874 4.110874 924 +analyz 1 17 4.110874 4.110874 925 +previous 1 17 4.110874 4.110874 923 +sheet 1 16 4.174387 4.174387 973 +explan 1 16 4.174387 4.174387 985 +biologi 1 15 4.248495 4.248495 1049 +train 1 14 4.317488 4.317488 1066 +emac 1 13 4.382027 4.382027 1143 +readabl 1 12 4.465908 4.465908 1258 +refin 1 11 4.553877 4.553877 1363 +summar 1 11 4.553877 4.553877 1295 +council 1 11 4.553877 4.553877 1364 +genet 1 10 4.653960 4.653960 1409 +sentenc 1 10 4.653960 4.653960 1413 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +moonei 1 9 4.753590 4.753590 1520 +debugg 1 9 4.753590 4.753590 1493 +empir 1 8 4.875197 4.875197 1722 +irvin 1 8 4.875197 4.875197 1660 +printer 1 8 4.875197 4.875197 1621 +analyt 1 7 5.010635 5.010635 1913 +noon 1 7 5.010635 5.010635 1804 +migrat 1 7 5.010635 5.010635 1851 +dataset 1 7 5.010635 5.010635 1914 +discoveri 1 7 5.010635 5.010635 1915 +tip 1 7 5.010635 5.010635 1863 +jude 1 6 5.164786 5.164786 2123 +geoff 1 6 5.164786 5.164786 2124 +highwai 1 6 5.164786 5.164786 2095 +heurist 1 6 5.164786 5.164786 2125 +extern 1 6 5.164786 5.164786 2105 +shavlik 1 5 5.347108 5.347108 2429 +basement 1 4 5.568345 5.568345 2663 +reinforc 1 4 5.568345 5.568345 2674 +exhaust 1 4 5.568345 5.568345 2825 +novic 1 4 5.568345 5.568345 2815 +steel 1 4 5.568345 5.568345 2818 +weinberg 1 3 5.857933 5.857933 3443 +geoffrei 1 3 5.857933 5.857933 3505 +soar 1 3 5.857933 5.857933 3506 +backpropag 1 3 5.857933 5.857933 3507 +weekend 1 3 5.857933 5.857933 3357 +canadian 1 3 5.857933 5.857933 3508 +mitchel 1 2 6.263398 6.263398 4792 +quinlan 1 2 6.263398 6.263398 4797 +towel 1 2 6.263398 6.263398 4793 +unsupervis 1 2 6.263398 6.263398 4233 +fisher 1 2 6.263398 6.263398 4794 +cogsci 1 2 6.263398 6.263398 4798 +induc 1 2 6.263398 6.263398 4795 +tractabl 1 2 6.263398 6.263398 4799 +akcl 1 2 6.263398 6.263398 4796 +chunk 1 1 6.957497 6.957497 9169 +laird 1 1 6.957497 6.957497 9170 +rosenbloom 1 1 6.957497 6.957497 9171 +newel 1 1 6.957497 6.957497 9172 +dietterich 1 1 6.957497 6.957497 9173 +rumelhart 1 1 6.957497 6.957497 9165 +zipser 1 1 6.957497 6.957497 9174 +lenat 1 1 6.957497 6.957497 9175 +backprop 1 1 6.957497 6.957497 9166 +kibler 1 1 6.957497 6.957497 9176 +kbann 1 1 6.957497 6.957497 9177 +cobweb 1 1 6.957497 6.957497 9167 +brr 1 1 6.957497 6.957497 9178 +ineedagoodicon 1 1 6.957497 6.957497 9179 +nip 1 1 6.957497 6.957497 9180 +premier 1 1 6.957497 6.957497 9181 +austrian 1 1 6.957497 6.957497 9168 +shavlikshavlik 1 1 6.957497 6.957497 9182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..cf0f6d86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,248 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +present 1 91 2.397895 2.397895 145 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +exam 1 86 2.484907 2.484907 169 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +requir 1 81 2.484907 2.484907 167 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +tuesdai 1 73 2.639057 2.639057 219 +involv 1 71 2.639057 2.639057 227 +write 1 72 2.639057 2.639057 222 +meet 1 72 2.639057 2.639057 229 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +new 1 64 2.772589 2.772589 262 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +result 1 65 2.772589 2.772589 281 +room 1 59 2.833213 2.833213 301 +content 1 59 2.833213 2.833213 302 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +sampl 1 53 2.944439 2.944439 339 +suggest 1 53 2.944439 2.944439 331 +cover 1 55 2.944439 2.944439 329 +week 1 52 2.995732 2.995732 343 +much 1 52 2.995732 2.995732 349 +case 1 51 2.995732 2.995732 351 +give 1 50 3.044522 3.044522 359 +standard 1 48 3.044522 3.044522 365 +done 1 47 3.091042 3.091042 381 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +around 1 43 3.178054 3.178054 415 +term 1 43 3.178054 3.178054 411 +past 1 42 3.218876 3.218876 428 +review 1 42 3.218876 3.218876 425 +realli 1 40 3.258097 3.258097 444 +must 1 40 3.258097 3.258097 442 +expect 1 37 3.332205 3.332205 484 +statist 1 35 3.401197 3.401197 521 +survei 1 35 3.401197 3.401197 513 +approxim 1 35 3.401197 3.401197 509 +jame 1 35 3.401197 3.401197 507 +michael 1 35 3.401197 3.401197 514 +articl 1 33 3.433987 3.433987 530 +richard 1 31 3.496508 3.496508 559 +secur 1 30 3.555348 3.555348 577 +compon 1 30 3.555348 3.555348 570 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +measur 1 28 3.610918 3.610918 609 +relev 1 26 3.688879 3.688879 637 +experiment 1 26 3.688879 3.688879 645 +daili 1 24 3.761200 3.761200 706 +tent 1 22 3.850148 3.850148 739 +watch 1 21 3.912023 3.912023 789 +latest 1 21 3.912023 3.912023 785 +similar 1 21 3.912023 3.912023 771 +wang 1 21 3.912023 3.912023 790 +chen 1 21 3.912023 3.912023 791 +basi 1 20 3.951244 3.951244 828 +qualiti 1 20 3.951244 3.951244 832 +assum 1 19 4.007333 4.007333 845 +andrew 1 19 4.007333 4.007333 849 +eric 1 19 4.007333 4.007333 870 +figur 1 18 4.060443 4.060443 903 +encourag 1 18 4.060443 4.060443 880 +listen 1 18 4.060443 4.060443 907 +protect 1 17 4.110874 4.110874 935 +normal 1 16 4.174387 4.174387 995 +zhang 1 16 4.174387 4.174387 980 +purchas 1 15 4.248495 4.248495 1030 +todd 1 15 4.248495 4.248495 1051 +classic 1 14 4.317488 4.317488 1084 +doit 1 14 4.317488 4.317488 1111 +individu 1 13 4.382027 4.382027 1126 +readabl 1 12 4.465908 4.465908 1258 +verifi 1 12 4.465908 4.465908 1261 +broad 1 11 4.553877 4.553877 1302 +worth 1 11 4.553877 4.553877 1294 +literatur 1 11 4.553877 4.553877 1300 +summar 1 11 4.553877 4.553877 1295 +operatingsystem 1 10 4.653960 4.653960 1401 +strongli 1 10 4.653960 4.653960 1406 +total 1 10 4.653960 4.653960 1398 +exact 1 9 4.753590 4.753590 1509 +informationabout 1 9 4.753590 4.753590 1515 +equival 1 9 4.753590 4.753590 1496 +herefor 1 9 4.753590 4.753590 1483 +solomon 1 8 4.875197 4.875197 1716 +theme 1 8 4.875197 4.875197 1707 +noon 1 7 5.010635 5.010635 1804 +suffici 1 7 5.010635 5.010635 1897 +larger 1 7 5.010635 5.010635 1875 +smaller 1 7 5.010635 5.010635 1874 +craig 1 7 5.010635 5.010635 1879 +sciencesoffic 1 6 5.164786 5.164786 2101 +onoper 1 6 5.164786 5.164786 2048 +carefulli 1 6 5.164786 5.164786 2045 +approv 1 6 5.164786 5.164786 2078 +prasad 1 6 5.164786 5.164786 2126 +formerli 1 5 5.347108 5.347108 2397 +deshpand 1 5 5.347108 5.347108 2431 +systemsfal 1 4 5.568345 5.568345 2683 +marvin 1 4 5.568345 5.568345 2806 +exposur 1 4 5.568345 5.568345 2598 +ident 1 4 5.568345 5.568345 2826 +will 1 4 5.568345 5.568345 2782 +raman 1 4 5.568345 5.568345 2827 +advancedoper 1 3 5.857933 5.857933 3403 +macc 1 3 5.857933 5.857933 3414 +focal 1 3 5.857933 5.857933 3404 +gradingther 1 3 5.857933 5.857933 3455 +franci 1 3 5.857933 5.857933 3287 +pang 1 3 5.857933 5.857933 3509 +avinash 1 3 5.857933 5.857933 3510 +rajesh 1 3 5.857933 5.857933 3511 +troffic 1 2 6.263398 6.263398 4706 +pmin 1 2 6.263398 6.263398 4492 +avaiabl 1 2 6.263398 6.263398 4703 +multic 1 2 6.263398 6.263398 4304 +interprocess 1 2 6.263398 6.263398 4174 +satisfactori 1 2 6.263398 6.263398 4567 +usea 1 2 6.263398 6.263398 4800 +andconfer 1 2 6.263398 6.263398 4568 +deskfor 1 2 6.263398 6.263398 4584 +youto 1 2 6.263398 6.263398 4093 +willinstead 1 2 6.263398 6.263398 4569 +adiscuss 1 2 6.263398 6.263398 4570 +geta 1 2 6.263398 6.263398 4571 +quietli 1 2 6.263398 6.263398 4572 +thoroughli 1 2 6.263398 6.263398 4801 +salmon 1 2 6.263398 6.263398 4802 +chien 1 2 6.263398 6.263398 4541 +sodani 1 2 6.263398 6.263398 4803 +basnei 1 2 6.263398 6.263398 4804 +biswadeep 1 2 6.263398 6.263398 4805 +taxiao 1 2 6.263398 6.263398 4806 +sridhar 1 2 6.263398 6.263398 4807 +eduthu 1 2 6.263398 6.263398 4721 +inroom 1 1 6.957497 6.957497 9183 +examtogeth 1 1 6.957497 6.957497 9184 +bedetermin 1 1 6.957497 6.957497 9185 +inconsider 1 1 6.957497 6.957497 9186 +sciencestextther 1 1 6.957497 6.957497 9187 +papersa 1 1 6.957497 6.957497 9188 +thoseof 1 1 6.957497 6.957497 9189 +lessout 1 1 6.957497 6.957497 9190 +projecty 1 1 6.957497 6.957497 9191 +implementationsof 1 1 6.957497 6.957497 9192 +unvalid 1 1 6.957497 6.957497 9193 +ashort 1 1 6.957497 6.957497 9194 +presentationabout 1 1 6.957497 6.957497 9195 +presentationsher 1 1 6.957497 6.957497 9196 +presen 1 1 6.957497 6.957497 9197 +manyan 1 1 6.957497 6.957497 9198 +stubb 1 1 6.957497 6.957497 9199 +bigg 1 1 6.957497 6.957497 9200 +gunawan 1 1 6.957497 6.957497 9201 +agu 1 1 6.957497 6.957497 9202 +qingmin 1 1 6.957497 6.957497 9203 +larsen 1 1 6.957497 6.957497 9204 +conroi 1 1 6.957497 6.957497 9205 +fritz 1 1 6.957497 6.957497 9206 +jordan 1 1 6.957497 6.957497 9207 +yanm 1 1 6.957497 6.957497 9208 +xinyu 1 1 6.957497 6.957497 9209 +munson 1 1 6.957497 6.957497 9210 +wenjun 1 1 6.957497 6.957497 9211 +xinyi 1 1 6.957497 6.957497 9212 +yufei 1 1 6.957497 6.957497 9213 +zeyu 1 1 6.957497 6.957497 9214 +gopal 1 1 6.957497 6.957497 9215 +leesolomon 1 1 6.957497 6.957497 9216 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..a438255e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +time 1 293 1.098612 1.098612 17 +homepag 1 93 2.397895 2.397895 148 +chiang 1 7 5.010635 5.010635 1853 +gradesgo 1 1 6.957497 6.957497 9217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..a0268513 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +solut 1 82 2.484907 2.484907 162 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +solv 1 73 2.639057 2.639057 234 +write 1 72 2.639057 2.639057 222 +window 1 68 2.708050 2.708050 242 +copi 1 63 2.772589 2.772589 284 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +cover 1 55 2.944439 2.944439 329 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +pointer 1 48 3.044522 3.044522 368 +even 1 45 3.135494 3.135494 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +howev 1 41 3.218876 3.218876 422 +must 1 40 3.258097 3.258097 442 +probabl 1 40 3.258097 3.258097 455 +microsoft 1 38 3.295837 3.295837 468 +open 1 38 3.295837 3.295837 469 +hand 1 37 3.332205 3.332205 475 +copyright 1 36 3.367296 3.367296 495 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +depend 1 29 3.583519 3.583519 583 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +toward 1 25 3.737670 3.737670 668 +jeff 1 25 3.737670 3.737670 673 +lab 1 24 3.761200 3.761200 698 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +increas 1 20 3.951244 3.951244 829 +exercis 1 19 4.007333 4.007333 842 +left 1 19 4.007333 4.007333 851 +lyco 1 19 4.007333 4.007333 871 +along 1 18 4.060443 4.060443 878 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +fortran 1 15 4.248495 4.248495 1027 +configur 1 15 4.248495 4.248495 1012 +purchas 1 15 4.248495 4.248495 1030 +comic 1 14 4.317488 4.317488 1103 +primarili 1 13 4.382027 4.382027 1185 +overal 1 12 4.465908 4.465908 1254 +vectra 1 12 4.465908 4.465908 1267 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +seven 1 9 4.753590 4.753590 1561 +prefer 1 9 4.753590 4.753590 1491 +correctli 1 9 4.753590 4.753590 1478 +wall 1 9 4.753590 4.753590 1553 +hewlett 1 8 4.875197 4.875197 1709 +printer 1 8 4.875197 4.875197 1621 +bestor 1 6 5.164786 5.164786 2099 +lampert 1 5 5.347108 5.347108 2398 +gareth 1 5 5.347108 5.347108 2392 +closest 1 4 5.568345 5.568345 2828 +relief 1 4 5.568345 5.568345 2784 +labyou 1 3 5.857933 5.857933 3406 +aren 1 3 5.857933 5.857933 3512 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +quota 1 2 6.263398 6.263398 4753 +exce 1 1 6.957497 6.957497 9218 +bewar 1 1 6.957497 6.957497 9219 +outsidehallwai 1 1 6.957497 6.957497 9220 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..cef6ced1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +check 1 115 2.197225 2.197225 118 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +chang 1 82 2.484907 2.484907 163 +solv 1 73 2.639057 2.639057 234 +frequent 1 49 3.044522 3.044522 367 +textbook 1 44 3.135494 3.135494 397 +purpos 1 37 3.332205 3.332205 481 +eduoffic 1 33 3.433987 3.433987 531 +often 1 31 3.496508 3.496508 551 +walter 1 17 4.110874 4.110874 950 +todd 1 15 4.248495 4.248495 1051 +assignmentsprogram 1 6 5.164786 5.164786 2019 +assignmentshomework 1 4 5.568345 5.568345 2721 +pertain 1 3 5.857933 5.857933 3208 +homepagewelcom 1 2 6.263398 6.263398 4808 +tmunson 1 2 6.263398 6.263398 4809 +statisticsoffic 1 2 6.263398 6.263398 4810 +homepagec 1 1 6.957497 6.957497 9221 +responsibilityto 1 1 6.957497 6.957497 9222 +informationinstructor 1 1 6.957497 6.957497 9223 +munsonemail 1 1 6.957497 6.957497 9224 +appointmentsect 1 1 6.957497 6.957497 9225 +savitchclass 1 1 6.957497 6.957497 9226 +informationexpectationssyllabusexam 1 1 6.957497 6.957497 9227 +schedule 1 1 6.957497 6.957497 9228 +mailgradingl 1 1 6.957497 6.957497 9229 +assignmentsextra 1 1 6.957497 6.957497 9230 +creditpoliciesconsult 1 1 6.957497 6.957497 9231 +responsibilitiesacadem 1 1 6.957497 6.957497 9232 +misconductoth 1 1 6.957497 6.957497 9233 +informationdaili 1 1 6.957497 6.957497 9234 +classoth 1 1 6.957497 6.957497 9235 +resourcesc 1 1 6.957497 6.957497 9236 +homepagetmunson 1 1 6.957497 6.957497 9237 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..7016ca4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +start 1 83 2.484907 2.484907 173 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +date 1 51 2.995732 2.995732 344 +get 1 46 3.091042 3.091042 380 +netscap 1 44 3.135494 3.135494 395 +consult 1 24 3.761200 3.761200 687 +tent 1 22 3.850148 3.850148 739 +facil 1 20 3.951244 3.951244 814 +whole 1 17 4.110874 4.110874 940 +todd 1 15 4.248495 4.248495 1051 +difficulti 1 13 4.382027 4.382027 1132 +tutor 1 9 4.753590 4.753590 1552 +struct 1 4 5.568345 5.568345 2821 +turnidg 1 4 5.568345 5.568345 2829 +nolandinstructor 1 2 6.263398 6.263398 4785 +muchinform 1 2 6.263398 6.263398 4811 +turnidgeoffic 1 1 6.957497 6.957497 9238 +tbalab 1 1 6.957497 6.957497 9239 +tbaannouncementsclass 1 1 6.957497 6.957497 9240 +classa 1 1 6.957497 6.957497 9241 +byother 1 1 6.957497 6.957497 9242 +gregorysharp 1 1 6.957497 6.957497 9243 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..ca335668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +sourc 1 77 2.564949 2.564949 201 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +locat 1 59 2.833213 2.833213 303 +week 1 52 2.995732 2.995732 343 +appoint 1 49 3.044522 3.044522 358 +still 1 50 3.044522 3.044522 362 +algebra 1 45 3.135494 3.135494 394 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +everi 1 34 3.401197 3.401197 519 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +consult 1 24 3.761200 3.761200 687 +dai 1 22 3.850148 3.850148 753 +walter 1 17 4.110874 4.110874 950 +zhang 1 16 4.174387 4.174387 980 +weslei 1 16 4.174387 4.174387 983 +misconduct 1 16 4.174387 4.174387 1003 +quiz 1 16 4.174387 4.174387 990 +style 1 15 4.248495 4.248495 1036 +vectra 1 12 4.465908 4.465908 1267 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +chri 1 11 4.553877 4.553877 1311 +statement 1 11 4.553877 4.553877 1313 +errata 1 10 4.653960 4.653960 1403 +login 1 9 4.753590 4.753590 1550 +seven 1 9 4.753590 4.753590 1561 +reload 1 8 4.875197 4.875197 1682 +isbn 1 7 5.010635 5.010635 1901 +guidelin 1 7 5.010635 5.010635 1832 +rough 1 6 5.164786 5.164786 2107 +noland 1 5 5.347108 5.347108 2420 +psych 1 3 5.857933 5.857933 3498 +grader 1 3 5.857933 5.857933 3165 +weaver 1 2 6.263398 6.263398 4770 +kei 1 2 6.263398 6.263398 4812 +staf 1 1 6.957497 6.957497 9244 +policyassign 1 1 6.957497 6.957497 9245 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..aa71d606 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +architectur 1 139 1.945910 1.945910 77 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +perform 1 143 1.945910 1.945910 74 +assign 1 135 1.945910 1.945910 66 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +homework 1 79 2.564949 2.564949 193 +summari 1 73 2.639057 2.639057 237 +main 1 67 2.708050 2.708050 256 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +content 1 59 2.833213 2.833213 302 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +instruct 1 53 2.944439 2.944439 332 +tabl 1 51 2.995732 2.995732 346 +set 1 50 3.044522 3.044522 361 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +cach 1 41 3.218876 3.218876 432 +review 1 42 3.218876 3.218876 425 +cost 1 37 3.332205 3.332205 480 +jame 1 35 3.401197 3.401197 507 +limit 1 29 3.583519 3.583519 585 +consid 1 29 3.583519 3.583519 590 +full 1 28 3.610918 3.610918 615 +multiprocessor 1 28 3.610918 3.610918 605 +arrai 1 27 3.637586 3.637586 627 +trace 1 25 3.737670 3.737670 677 +miscellan 1 23 3.806662 3.806662 731 +disk 1 22 3.850148 3.850148 747 +rout 1 21 3.912023 3.912023 793 +smith 1 20 3.951244 3.951244 820 +reserv 1 20 3.951244 3.951244 808 +thur 1 19 4.007333 4.007333 847 +interconnect 1 17 4.110874 4.110874 937 +vector 1 16 4.174387 4.174387 961 +doit 1 14 4.317488 4.317488 1111 +station 1 13 4.382027 4.382027 1157 +tue 1 11 4.553877 4.553877 1308 +clock 1 11 4.553877 4.553877 1320 +pipelin 1 7 5.010635 5.010635 1830 +biochemistri 1 3 5.857933 5.857933 3513 +vliw 1 3 5.857933 5.857933 3514 +harm 1 3 5.857933 5.857933 3515 +princ 1 2 6.263398 6.263398 4813 +specmark 1 2 6.263398 6.263398 4471 +princeoffic 1 1 6.957497 6.957497 9246 +miscellaneousnew 1 1 6.957497 6.957497 9247 +soln 1 1 6.957497 6.957497 9248 +pmread 1 1 6.957497 6.957497 9249 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..0fad95b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,245 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +welcom 1 122 2.079442 2.079442 99 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +imag 1 91 2.397895 2.397895 161 +comment 1 93 2.397895 2.397895 146 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +exam 1 86 2.484907 2.484907 169 +homework 1 79 2.564949 2.564949 193 +dynam 1 76 2.564949 2.564949 194 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +state 1 76 2.564949 2.564949 207 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +tuesdai 1 73 2.639057 2.639057 219 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +main 1 67 2.708050 2.708050 256 +practic 1 70 2.708050 2.708050 246 +view 1 70 2.708050 2.708050 254 +simul 1 66 2.708050 2.708050 255 +test 1 66 2.708050 2.708050 252 +order 1 69 2.708050 2.708050 249 +complex 1 64 2.772589 2.772589 269 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +march 1 61 2.833213 2.833213 295 +colleg 1 61 2.833213 2.833213 300 +space 1 57 2.890372 2.890372 310 +faculti 1 56 2.890372 2.890372 325 +scientif 1 53 2.944439 2.944439 341 +cover 1 55 2.944439 2.944439 329 +visual 1 48 3.044522 3.044522 372 +principl 1 48 3.044522 3.044522 357 +standard 1 48 3.044522 3.044522 365 +california 1 46 3.091042 3.091042 388 +textbook 1 44 3.135494 3.135494 397 +math 1 44 3.135494 3.135494 402 +combin 1 42 3.218876 3.218876 421 +vision 1 41 3.218876 3.218876 430 +examin 1 42 3.218876 3.218876 424 +error 1 40 3.258097 3.258097 449 +form 1 39 3.258097 3.258097 443 +map 1 39 3.258097 3.258097 452 +prototyp 1 38 3.295837 3.295837 463 +connect 1 37 3.332205 3.332205 485 +field 1 37 3.332205 3.332205 482 +mean 1 37 3.332205 3.332205 477 +staff 1 36 3.367296 3.367296 490 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +product 1 33 3.433987 3.433987 527 +transform 1 32 3.465736 3.465736 542 +human 1 32 3.465736 3.465736 546 +anim 1 31 3.496508 3.496508 557 +focus 1 29 3.583519 3.583519 584 +limit 1 29 3.583519 3.583519 585 +relev 1 26 3.688879 3.688879 637 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +other 1 24 3.761200 3.761200 697 +begin 1 23 3.806662 3.806662 716 +equat 1 23 3.806662 3.806662 724 +deal 1 22 3.850148 3.850148 736 +color 1 22 3.850148 3.850148 762 +properti 1 22 3.850148 3.850148 749 +serv 1 22 3.850148 3.850148 758 +period 1 22 3.850148 3.850148 743 +sent 1 22 3.850148 3.850148 763 +among 1 21 3.912023 3.912023 781 +viewer 1 21 3.912023 3.912023 787 +break 1 20 3.951244 3.951244 812 +prepar 1 20 3.951244 3.951244 824 +geometr 1 19 4.007333 4.007333 852 +miss 1 19 4.007333 4.007333 866 +figur 1 18 4.060443 4.060443 903 +dimension 1 18 4.060443 4.060443 909 +differenti 1 17 4.110874 4.110874 921 +render 1 17 4.110874 4.110874 947 +modif 1 17 4.110874 4.110874 913 +normal 1 16 4.174387 4.174387 995 +devic 1 16 4.174387 4.174387 1002 +vector 1 16 4.174387 4.174387 961 +atth 1 15 4.248495 4.248495 1019 +hierarch 1 15 4.248495 4.248495 1018 +style 1 15 4.248495 4.248495 1036 +hopefulli 1 14 4.317488 4.317488 1071 +scene 1 14 4.317488 4.317488 1114 +camera 1 14 4.317488 4.317488 1115 +alan 1 13 4.382027 4.382027 1146 +composit 1 13 4.382027 4.382027 1150 +coordin 1 13 4.382027 4.382027 1182 +automata 1 13 4.382027 4.382027 1135 +difficulti 1 13 4.382027 4.382027 1132 +opportun 1 13 4.382027 4.382027 1161 +evolv 1 12 4.465908 4.465908 1223 +skill 1 12 4.465908 4.465908 1205 +optic 1 12 4.465908 4.465908 1221 +remov 1 12 4.465908 4.465908 1225 +buffer 1 12 4.465908 4.465908 1211 +scan 1 12 4.465908 4.465908 1243 +prelim 1 12 4.465908 4.465908 1201 +holidai 1 12 4.465908 4.465908 1224 +bruce 1 12 4.465908 4.465908 1226 +land 1 12 4.465908 4.465908 1273 +huang 1 12 4.465908 4.465908 1202 +volum 1 11 4.553877 4.553877 1347 +transpar 1 11 4.553877 4.553877 1325 +statement 1 11 4.553877 4.553877 1313 +forc 1 10 4.653960 4.653960 1384 +perspect 1 10 4.653960 4.653960 1437 +facilit 1 10 4.653960 4.653960 1412 +incomplet 1 9 4.753590 4.753590 1575 +leader 1 9 4.753590 4.753590 1576 +surfac 1 9 4.753590 4.753590 1574 +explicit 1 9 4.753590 4.753590 1525 +screen 1 9 4.753590 4.753590 1577 +light 1 9 4.753590 4.753590 1533 +wall 1 9 4.753590 4.753590 1553 +observ 1 9 4.753590 4.753590 1578 +rhode 1 9 4.753590 4.753590 1579 +polygon 1 8 4.875197 4.875197 1723 +convers 1 8 4.875197 4.875197 1673 +textur 1 8 4.875197 4.875197 1677 +pagecomput 1 7 5.010635 5.010635 1900 +parametr 1 7 5.010635 5.010635 1819 +clip 1 7 5.010635 5.010635 1868 +stereo 1 7 5.010635 5.010635 1818 +shade 1 7 5.010635 5.010635 1881 +channel 1 7 5.010635 5.010635 1836 +justin 1 7 5.010635 5.010635 1789 +davi 1 7 5.010635 5.010635 1888 +artist 1 6 5.164786 5.164786 2127 +onto 1 6 5.164786 5.164786 2089 +hidden 1 6 5.164786 5.164786 1987 +notifi 1 6 5.164786 5.164786 2106 +rotat 1 5 5.347108 5.347108 2295 +rigid 1 5 5.347108 5.347108 2432 +cellular 1 5 5.347108 5.347108 2433 +anti 1 5 5.347108 5.347108 2434 +hyper 1 5 5.347108 5.347108 2435 +particl 1 5 5.347108 5.347108 2436 +oregon 1 5 5.347108 5.347108 2437 +implicit 1 4 5.568345 5.568345 2830 +invers 1 4 5.568345 5.568345 2764 +pixel 1 4 5.568345 5.568345 2831 +contour 1 4 5.568345 5.568345 2812 +computergraph 1 3 5.857933 5.857933 3517 +administrivia 1 3 5.857933 5.857933 3166 +kinemat 1 3 5.857933 5.857933 3516 +wave 1 3 5.857933 5.857933 3518 +shadow 1 3 5.857933 5.857933 3519 +bump 1 3 5.857933 5.857933 3497 +arrow 1 3 5.857933 5.857933 3520 +makeup 1 3 5.857933 5.857933 3449 +jing 1 3 5.857933 5.857933 3521 +mccune 1 3 5.857933 5.857933 3522 +waterloo 1 3 5.857933 5.857933 3523 +cardiff 1 3 5.857933 5.857933 3154 +folei 1 2 6.263398 6.263398 4817 +watt 1 2 6.263398 6.263398 4814 +bruceland 1 2 6.263398 6.263398 4818 +designedto 1 2 6.263398 6.263398 4712 +illumin 1 2 6.263398 6.263398 4819 +blobbi 1 2 6.263398 6.263398 4820 +homogen 1 2 6.263398 6.263398 4821 +mimic 1 2 6.263398 6.263398 4736 +phong 1 2 6.263398 6.263398 4822 +alias 1 2 6.263398 6.263398 4823 +scalar 1 2 6.263398 6.263398 4815 +religi 1 2 6.263398 6.263398 4816 +tomak 1 2 6.263398 6.263398 4675 +belief 1 2 6.263398 6.263398 4553 +inord 1 2 6.263398 6.263398 4824 +absent 1 2 6.263398 6.263398 4825 +deviat 1 2 6.263398 6.263398 4826 +wale 1 2 6.263398 6.263398 4827 +manchest 1 2 6.263398 6.263398 4828 +todoc 1 2 6.263398 6.263398 4829 +quadric 1 1 6.957497 6.957497 9250 +swept 1 1 6.957497 6.957497 9251 +tensor 1 1 6.957497 6.957497 9252 +tessel 1 1 6.957497 6.957497 9253 +gourand 1 1 6.957497 6.957497 9254 +vernier 1 1 6.957497 6.957497 9255 +acuiti 1 1 6.957497 6.957497 9256 +mispercept 1 1 6.957497 6.957497 9257 +advect 1 1 6.957497 6.957497 9258 +multiparamet 1 1 6.957497 6.957497 9259 +educationlaw 1 1 6.957497 6.957497 9260 +mandat 1 1 6.957497 6.957497 9261 +intendingto 1 1 6.957497 6.957497 9262 +requestedto 1 1 6.957497 6.957497 9263 +jmccune 1 1 6.957497 6.957497 9264 +csrelev 1 1 6.957497 6.957497 9265 +universityrel 1 1 6.957497 6.957497 9266 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..cc7f58dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +first 1 140 1.945910 1.945910 71 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +welcom 1 122 2.079442 2.079442 99 +site 1 106 2.197225 2.197225 119 +theori 1 111 2.197225 2.197225 127 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +graphic 1 90 2.397895 2.397895 147 +section 1 94 2.397895 2.397895 149 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +comment 1 93 2.397895 2.397895 146 +contain 1 81 2.484907 2.484907 174 +level 1 87 2.484907 2.484907 180 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +refer 1 78 2.564949 2.564949 203 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +main 1 67 2.708050 2.708050 256 +practic 1 70 2.708050 2.708050 246 +order 1 69 2.708050 2.708050 249 +laboratori 1 63 2.772589 2.772589 292 +result 1 65 2.772589 2.772589 281 +virtual 1 62 2.772589 2.772589 285 +scientif 1 53 2.944439 2.944439 341 +visual 1 48 3.044522 3.044522 372 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +mark 1 44 3.135494 3.135494 403 +map 1 39 3.258097 3.258097 452 +procedur 1 36 3.367296 3.367296 488 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +transform 1 32 3.465736 3.465736 542 +anim 1 31 3.496508 3.496508 557 +deal 1 22 3.850148 3.850148 736 +sent 1 22 3.850148 3.850148 763 +facil 1 20 3.951244 3.951244 814 +exercis 1 19 4.007333 4.007333 842 +atth 1 15 4.248495 4.248495 1019 +train 1 14 4.317488 4.317488 1066 +camera 1 14 4.317488 4.317488 1115 +land 1 12 4.465908 4.465908 1273 +statement 1 11 4.553877 4.553877 1313 +perspect 1 10 4.653960 4.653960 1437 +leader 1 9 4.753590 4.753590 1576 +surfac 1 9 4.753590 4.753590 1574 +light 1 9 4.753590 4.753590 1533 +competit 1 8 4.875197 4.875197 1635 +polygon 1 8 4.875197 4.875197 1723 +textur 1 8 4.875197 4.875197 1677 +pagecomput 1 7 5.010635 5.010635 1900 +parametr 1 7 5.010635 5.010635 1819 +chat 1 6 5.164786 5.164786 2128 +restrict 1 6 5.164786 5.164786 2129 +implicit 1 4 5.568345 5.568345 2830 +enrol 1 4 5.568345 5.568345 2613 +computergraph 1 3 5.857933 5.857933 3517 +bump 1 3 5.857933 5.857933 3497 +folei 1 2 6.263398 6.263398 4817 +watt 1 2 6.263398 6.263398 4814 +bruceland 1 2 6.263398 6.263398 4818 +todoc 1 2 6.263398 6.263398 4829 +exercisesthi 1 1 6.957497 6.957497 9267 +universityundergradu 1 1 6.957497 6.957497 9268 +dcomput 1 1 6.957497 6.957497 9269 +sigucc 1 1 6.957497 6.957497 9270 +basededuc 1 1 6.957497 6.957497 9271 +areinclud 1 1 6.957497 6.957497 9272 +aboutc 1 1 6.957497 6.957497 9273 +semesteraccess 1 1 6.957497 6.957497 9274 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..f9a13567 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +document 1 121 2.079442 2.079442 89 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +window 1 68 2.708050 2.708050 242 +content 1 59 2.833213 2.833213 302 +sampl 1 53 2.944439 2.944439 339 +visual 1 48 3.044522 3.044522 372 +video 1 44 3.135494 3.135494 405 +michael 1 35 3.401197 3.401197 514 +human 1 32 3.465736 3.465736 546 +express 1 32 3.465736 3.465736 540 +retriev 1 27 3.637586 3.637586 621 +appropri 1 18 4.060443 4.060443 883 +spatial 1 16 4.174387 4.174387 988 +huang 1 12 4.465908 4.465908 1202 +explicit 1 9 4.753590 4.753590 1525 +sean 1 8 4.875197 4.875197 1705 +justin 1 7 5.010635 5.010635 1789 +deliv 1 6 5.164786 5.164786 2070 +chosen 1 6 5.164786 5.164786 1984 +alex 1 6 5.164786 5.164786 2130 +facial 1 5 5.347108 5.347108 2438 +interior 1 5 5.347108 5.347108 2439 +particl 1 5 5.347108 5.347108 2436 +chose 1 4 5.568345 5.568345 2629 +arun 1 4 5.568345 5.568345 2736 +computergraph 1 3 5.857933 5.857933 3517 +hung 1 3 5.857933 5.857933 3524 +mccune 1 3 5.857933 5.857933 3522 +landscap 1 3 5.857933 5.857933 3525 +landi 1 2 6.263398 6.263398 4830 +tsai 1 2 6.263398 6.263398 4831 +stochast 1 2 6.263398 6.263398 4832 +semestereach 1 1 6.957497 6.957497 9275 +anddocu 1 1 6.957497 6.957497 9276 +metabal 1 1 6.957497 6.957497 9277 +arcuri 1 1 6.957497 6.957497 9278 +benton 1 1 6.957497 6.957497 9279 +interdepend 1 1 6.957497 6.957497 9280 +diffus 1 1 6.957497 6.957497 9281 +pollut 1 1 6.957497 6.957497 9282 +modelsfu 1 1 6.957497 6.957497 9283 +antialias 1 1 6.957497 6.957497 9284 +vermach 1 1 6.957497 6.957497 9285 +hsun 1 1 6.957497 6.957497 9286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..90976d3d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +applic 1 170 1.791759 1.791759 56 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +resum 1 79 2.564949 2.564949 217 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +nation 1 74 2.639057 2.639057 240 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +goal 1 66 2.708050 2.708050 250 +laboratori 1 63 2.772589 2.772589 292 +thesi 1 57 2.890372 2.890372 327 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +committe 1 34 3.401197 3.401197 522 +tech 1 35 3.401197 3.401197 515 +least 1 35 3.401197 3.401197 516 +steve 1 29 3.583519 3.583519 594 +hous 1 21 3.912023 3.912023 801 +siam 1 21 3.912023 3.912023 800 +matrix 1 17 4.110874 4.110874 933 +squar 1 14 4.317488 4.317488 1082 +weight 1 12 4.465908 4.465908 1204 +statement 1 11 4.553877 4.553877 1313 +decomposit 1 10 4.653960 4.653960 1439 +rhode 1 9 4.753590 4.753590 1579 +juan 1 9 4.753590 4.753590 1580 +postdoc 1 8 4.875197 4.875197 1724 +cornellunivers 1 7 5.010635 5.010635 1916 +whichi 1 6 5.164786 5.164786 2056 +stabl 1 5 5.347108 5.347108 2309 +interior 1 5 5.347108 5.347108 2439 +orthogon 1 4 5.568345 5.568345 2832 +hough 1 3 5.857933 5.857933 3527 +vavasi 1 3 5.857933 5.857933 3526 +linearalgebra 1 2 6.263398 6.263398 4833 +anal 1 2 6.263398 6.263398 4834 +pointmethod 1 2 6.263398 6.263398 4835 +patti 1 1 6.957497 6.957497 9289 +houghpatti 1 1 6.957497 6.957497 9290 +sandia 1 1 6.957497 6.957497 9287 +livermor 1 1 6.957497 6.957497 9288 +frankh 1 1 6.957497 6.957497 9291 +nicktrefethen 1 1 6.957497 6.957497 9292 +schatz 1 1 6.957497 6.957497 9293 +optimizationi 1 1 6.957497 6.957497 9294 +meza 1 1 6.957497 6.957497 9295 +nationallaboratori 1 1 6.957497 6.957497 9296 +ofweight 1 1 6.957497 6.957497 9297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..b1096d47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +applic 1 170 1.791759 1.791759 56 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +postscript 1 131 2.079442 2.079442 90 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +center 1 88 2.397895 2.397895 158 +activ 1 84 2.484907 2.484907 182 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +exampl 1 77 2.564949 2.564949 195 +appli 1 71 2.639057 2.639057 226 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +summer 1 56 2.890372 2.890372 311 +detail 1 57 2.890372 2.890372 321 +scientif 1 53 2.944439 2.944439 341 +physic 1 47 3.091042 3.091042 377 +mechan 1 43 3.178054 3.178054 416 +linear 1 41 3.218876 3.218876 431 +continu 1 39 3.258097 3.258097 448 +submit 1 39 3.258097 3.258097 440 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +next 1 34 3.401197 3.401197 517 +curriculum 1 33 3.433987 3.433987 535 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +propos 1 28 3.610918 3.610918 602 +jeff 1 25 3.737670 3.737670 673 +background 1 25 3.737670 3.737670 664 +hill 1 25 3.737670 3.737670 670 +equat 1 23 3.806662 3.806662 724 +finish 1 22 3.850148 3.850148 748 +supervis 1 20 3.951244 3.951244 840 +mostli 1 19 4.007333 4.007333 869 +dimension 1 18 4.060443 4.060443 909 +seek 1 17 4.110874 4.110874 954 +outlin 1 17 4.110874 4.110874 914 +coupl 1 17 4.110874 4.110874 939 +normal 1 16 4.174387 4.174387 995 +transit 1 15 4.248495 4.248495 1046 +nick 1 13 4.382027 4.382027 1180 +misc 1 13 4.382027 4.382027 1124 +frank 1 9 4.753590 4.753590 1568 +rhode 1 9 4.753590 4.753590 1579 +unusu 1 9 4.753590 4.753590 1566 +sixth 1 7 5.010635 5.010635 1917 +atcornel 1 6 5.164786 5.164786 2131 +versu 1 6 5.164786 5.164786 2052 +stabil 1 5 5.347108 5.347108 2286 +fluid 1 5 5.347108 5.347108 2440 +satish 1 4 5.568345 5.568345 2833 +trefethen 1 3 5.857933 5.857933 3528 +exponenti 1 3 5.857933 5.857933 3529 +driscol 1 2 6.263398 6.263398 4836 +spectral 1 2 6.263398 6.263398 4837 +baggettjeff 1 1 6.957497 6.957497 9300 +baggett 1 1 6.957497 6.957497 9298 +hydrodynam 1 1 6.957497 6.957497 9301 +blend 1 1 6.957497 6.957497 9302 +iwould 1 1 6.957497 6.957497 9303 +turbul 1 1 6.957497 6.957497 9299 +abscissa 1 1 6.957497 6.957497 9304 +andphillip 1 1 6.957497 6.957497 9305 +subcrit 1 1 6.957497 6.957497 9306 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..b8a2bff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +report 1 131 2.079442 2.079442 92 +document 1 121 2.079442 2.079442 89 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +search 1 95 2.397895 2.397895 155 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +contain 1 81 2.484907 2.484907 174 +want 1 79 2.564949 2.564949 199 +goal 1 66 2.708050 2.708050 250 +laboratori 1 63 2.772589 2.772589 292 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +abstract 1 62 2.772589 2.772589 276 +sever 1 56 2.890372 2.890372 322 +allow 1 53 2.944439 2.944439 333 +made 1 44 3.135494 3.135494 398 +form 1 39 3.258097 3.258097 443 +author 1 39 3.258097 3.258097 450 +join 1 39 3.258097 3.258097 457 +industri 1 38 3.295837 3.295837 464 +field 1 37 3.332205 3.332205 482 +word 1 34 3.401197 3.401197 508 +titl 1 31 3.496508 3.496508 556 +limit 1 29 3.583519 3.583519 585 +particip 1 29 3.583519 3.583519 589 +packag 1 28 3.610918 3.610918 614 +background 1 25 3.737670 3.737670 664 +brows 1 23 3.806662 3.806662 726 +among 1 21 3.912023 3.912023 781 +tell 1 21 3.912023 3.912023 777 +offici 1 18 4.060443 4.060443 894 +commerci 1 16 4.174387 4.174387 1005 +whose 1 13 4.382027 4.382027 1166 +enter 1 10 4.653960 4.653960 1454 +govern 1 9 4.753590 4.753590 1581 +pronounc 1 7 5.010635 5.010635 1918 +ncstrl 1 3 5.857933 5.857933 3530 +interoper 1 2 6.263398 6.263398 4838 +andorgan 1 2 6.263398 6.263398 4443 +bibliograph 1 2 6.263398 6.263398 4699 +libraryncstrl 1 1 6.957497 6.957497 9307 +ancestr 1 1 6.957497 6.957497 9308 +internationalcollect 1 1 6.957497 6.957497 9309 +departmentsand 1 1 6.957497 6.957497 9310 +availablefor 1 1 6.957497 6.957497 9311 +eduat 1 1 6.957497 6.957497 9312 +ncstrlcollect 1 1 6.957497 6.957497 9313 +serversoper 1 1 6.957497 6.957497 9314 +participatinginstitut 1 1 6.957497 6.957497 9315 +ncstrlpress 1 1 6.957497 6.957497 9316 +theparticip 1 1 6.957497 6.957497 9317 +moreread 1 1 6.957497 6.957497 9318 +forinstitut 1 1 6.957497 6.957497 9319 +informationfind 1 1 6.957497 6.957497 9320 +snew 1 1 6.957497 6.957497 9321 +totech 1 1 6.957497 6.957497 9322 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..535fd06c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +institut 1 84 2.484907 2.484907 187 +server 1 76 2.564949 2.564949 204 +integr 1 67 2.708050 2.708050 245 +anoth 1 45 3.135494 3.135494 408 +mike 1 24 3.761200 3.761200 703 +brows 1 23 3.806662 3.806662 726 +enterpris 1 2 6.263398 6.263398 4839 +informationand 1 2 6.263398 6.263398 4840 +instituteabout 1 1 6.957497 6.957497 9323 +researchersat 1 1 6.957497 6.957497 9324 +searchal 1 1 6.957497 6.957497 9325 +reportssearch 1 1 6.957497 6.957497 9326 +ipic 1 1 6.957497 6.957497 9327 +itisingapor 1 1 6.957497 6.957497 9328 +altavistaforum 1 1 6.957497 6.957497 9329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..7aa2c227 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +build 1 85 2.484907 2.484907 184 +institut 1 84 2.484907 2.484907 187 +requir 1 81 2.484907 2.484907 167 +learn 1 86 2.484907 2.484907 170 +resourc 1 81 2.484907 2.484907 172 +messag 1 76 2.564949 2.564949 212 +server 1 76 2.564949 2.564949 204 +sourc 1 77 2.564949 2.564949 201 +resum 1 79 2.564949 2.564949 217 +write 1 72 2.639057 2.639057 222 +onlin 1 75 2.639057 2.639057 223 +goal 1 66 2.708050 2.708050 250 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +improv 1 62 2.772589 2.772589 289 +copi 1 63 2.772589 2.772589 284 +collect 1 65 2.772589 2.772589 268 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +explor 1 58 2.890372 2.890372 324 +run 1 51 2.995732 2.995732 347 +investig 1 51 2.995732 2.995732 353 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +understand 1 47 3.091042 3.091042 384 +even 1 45 3.135494 3.135494 393 +made 1 44 3.135494 3.135494 398 +answer 1 45 3.135494 3.135494 391 +natur 1 44 3.135494 3.135494 406 +futur 1 41 3.218876 3.218876 427 +might 1 41 3.218876 3.218876 426 +author 1 39 3.258097 3.258097 450 +mean 1 37 3.332205 3.332205 477 +ofth 1 36 3.367296 3.367296 491 +staff 1 36 3.367296 3.367296 490 +especi 1 36 3.367296 3.367296 496 +product 1 33 3.433987 3.433987 527 +obtain 1 33 3.433987 3.433987 534 +collabor 1 32 3.465736 3.465736 543 +often 1 31 3.496508 3.496508 551 +produc 1 30 3.555348 3.555348 572 +abl 1 30 3.555348 3.555348 566 +ask 1 28 3.610918 3.610918 597 +manipul 1 27 3.637586 3.637586 624 +effort 1 26 3.688879 3.688879 652 +sport 1 25 3.737670 3.737670 683 +decis 1 23 3.806662 3.806662 728 +initi 1 23 3.806662 3.806662 717 +thank 1 23 3.806662 3.806662 721 +reduc 1 22 3.850148 3.850148 759 +among 1 21 3.912023 3.912023 781 +corpor 1 21 3.912023 3.912023 802 +annot 1 21 3.912023 3.912023 775 +increas 1 20 3.951244 3.951244 829 +qualiti 1 20 3.951244 3.951244 832 +fine 1 20 3.951244 3.951244 822 +media 1 19 4.007333 4.007333 861 +feedback 1 19 4.007333 4.007333 854 +agent 1 18 4.060443 4.060443 910 +seem 1 18 4.060443 4.060443 899 +whether 1 17 4.110874 4.110874 918 +remot 1 15 4.248495 4.248495 1041 +believ 1 13 4.382027 4.382027 1187 +captur 1 12 4.465908 4.465908 1232 +safe 1 12 4.465908 4.465908 1274 +market 1 11 4.553877 4.553877 1361 +end 1 9 4.753590 4.753590 1567 +correctli 1 9 4.753590 4.753590 1478 +risk 1 8 4.875197 4.875197 1689 +xerox 1 8 4.875197 4.875197 1725 +davi 1 7 5.010635 5.010635 1888 +intellectu 1 7 5.010635 5.010635 1847 +dead 1 7 5.010635 5.010635 1840 +foreign 1 7 5.010635 5.010635 1919 +edumi 1 6 5.164786 5.164786 2132 +sponsor 1 6 5.164786 5.164786 2133 +whichi 1 6 5.164786 5.164786 2056 +contract 1 6 5.164786 5.164786 1985 +huttenloch 1 6 5.164786 5.164786 1983 +begun 1 5 5.347108 5.347108 2386 +clarif 1 5 5.347108 5.347108 2253 +medium 1 4 5.568345 5.568345 2834 +transmit 1 4 5.568345 5.568345 2835 +lawyer 1 4 5.568345 5.568345 2836 +evid 1 4 5.568345 5.568345 2768 +isthat 1 4 5.568345 5.568345 2723 +owner 1 3 5.857933 5.857933 3531 +narr 1 3 5.857933 5.857933 3454 +worker 1 2 6.263398 6.263398 4841 +institutejim 1 1 6.957497 6.957497 9330 +davisxerox 1 1 6.957497 6.957497 9331 +corporationphd 1 1 6.957497 6.957497 9332 +improvecommun 1 1 6.957497 6.957497 9333 +andcont 1 1 6.957497 6.957497 9334 +reformat 1 1 6.957497 6.957497 9335 +inhypertext 1 1 6.957497 6.957497 9336 +thecstr 1 1 6.957497 6.957497 9337 +anarpa 1 1 6.957497 6.957497 9338 +moreeasili 1 1 6.957497 6.957497 9339 +electronicsystem 1 1 6.957497 6.957497 9340 +ofor 1 1 6.957497 6.957497 9341 +memoryinclud 1 1 6.957497 6.957497 9342 +sscreenplai 1 1 6.957497 6.957497 9343 +producedth 1 1 6.957497 6.957497 9344 +andjustif 1 1 6.957497 6.957497 9345 +developingcorpor 1 1 6.957497 6.957497 9346 +sharedannot 1 1 6.957497 6.957497 9347 +howpeopl 1 1 6.957497 6.957497 9348 +inelectron 1 1 6.957497 6.957497 9349 +prototypeimplement 1 1 6.957497 6.957497 9350 +shareddocu 1 1 6.957497 6.957497 9351 +nnotat 1 1 6.957497 6.957497 9352 +berequest 1 1 6.957497 6.957497 9353 +orcorrect 1 1 6.957497 6.957497 9354 +aus 1 1 6.957497 6.957497 9355 +willfind 1 1 6.957497 6.957497 9356 +whetherstud 1 1 6.957497 6.957497 9357 +usefulmean 1 1 6.957497 6.957497 9358 +designof 1 1 6.957497 6.957497 9359 +proxi 1 1 6.957497 6.957497 9360 +reliablycarri 1 1 6.957497 6.957497 9361 +toeither 1 1 6.957497 6.957497 9362 +alsopap 1 1 6.957497 6.957497 9363 +publicatiion 1 1 6.957497 6.957497 9364 +thedrimi 1 1 6.957497 6.957497 9365 +meprofession 1 1 6.957497 6.957497 9366 +historythi 1 1 6.957497 6.957497 9367 +improvisationi 1 1 6.957497 6.957497 9368 +resumeno 1 1 6.957497 6.957497 9369 +likeit 1 1 6.957497 6.957497 9370 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..a5b27afe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +person 1 111 2.197225 2.197225 117 +search 1 95 2.397895 2.397895 155 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +result 1 65 2.772589 2.772589 281 +right 1 48 3.044522 3.044522 363 +word 1 34 3.401197 3.401197 508 +relev 1 26 3.688879 3.688879 637 +greg 1 24 3.761200 3.761200 695 +reserv 1 20 3.951244 3.951244 808 +configur 1 15 4.248495 4.248495 1012 +metacrawl 1 10 4.653960 4.653960 1455 +erik 1 8 4.875197 4.875197 1701 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +selberg 1 5 5.347108 5.347108 2441 +phrase 1 5 5.347108 5.347108 2242 +ahoi 1 3 5.857933 5.857933 3532 +searchingmetacrawlerbi 1 1 6.957497 6.957497 9371 +lauckhartand 1 1 6.957497 6.957497 9372 +etzioniif 1 1 6.957497 6.957497 9373 +wordssort 1 1 6.957497 6.957497 9374 +locationcontrol 1 1 6.957497 6.957497 9375 +problemswebmast 1 1 6.957497 6.957497 9376 +comcopyright 1 1 6.957497 6.957497 9377 +lauckhart 1 1 6.957497 6.957497 9378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..d3cb4010 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +peopl 1 96 2.302585 2.302585 132 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +activ 1 84 2.484907 2.484907 182 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +ieee 1 86 2.484907 2.484907 190 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +dynam 1 76 2.564949 2.564949 194 +server 1 76 2.564949 2.564949 204 +intellig 1 72 2.639057 2.639057 225 +simul 1 66 2.708050 2.708050 255 +abstract 1 62 2.772589 2.772589 276 +plan 1 65 2.772589 2.772589 272 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +special 1 56 2.890372 2.890372 320 +local 1 55 2.944439 2.944439 334 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +effect 1 46 3.091042 3.091042 385 +mechan 1 43 3.178054 3.178054 416 +review 1 42 3.218876 3.218876 425 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +societi 1 40 3.258097 3.258097 456 +seminar 1 38 3.295837 3.295837 470 +robot 1 36 3.367296 3.367296 497 +tech 1 35 3.401197 3.401197 515 +committe 1 34 3.401197 3.401197 522 +human 1 32 3.465736 3.465736 546 +robert 1 30 3.555348 3.555348 567 +ask 1 28 3.610918 3.610918 597 +motion 1 24 3.761200 3.761200 699 +compress 1 23 3.806662 3.806662 719 +director 1 22 3.850148 3.850148 767 +geometri 1 22 3.850148 3.850148 752 +path 1 21 3.912023 3.912023 778 +brief 1 16 4.174387 4.174387 1001 +chuck 1 14 4.317488 4.317488 1108 +nasa 1 13 4.382027 4.382027 1188 +vladimir 1 11 4.553877 4.553877 1324 +sens 1 11 4.553877 4.553877 1305 +errata 1 10 4.653960 4.653960 1403 +dyer 1 9 4.753590 4.753590 1573 +sensit 1 8 4.875197 4.875197 1726 +manufactur 1 8 4.875197 4.875197 1634 +sensor 1 7 5.010635 5.010635 1920 +jude 1 6 5.164786 5.164786 2123 +actuat 1 5 5.347108 5.347108 2442 +shavlik 1 5 5.347108 5.347108 2429 +lumelski 1 4 5.568345 5.568345 2837 +underwat 1 4 5.568345 5.568345 2838 +redund 1 4 5.568345 5.568345 2839 +skin 1 4 5.568345 5.568345 2840 +neil 1 4 5.568345 5.568345 2841 +kinemat 1 3 5.857933 5.857933 3516 +avenuemadison 1 2 6.263398 6.263398 4842 +maze 1 2 6.263398 6.263398 4843 +tether 1 2 6.263398 6.263398 4844 +duffi 1 2 6.263398 6.263398 4845 +lorenz 1 2 6.263398 6.263398 4846 +telerobot 1 2 6.263398 6.263398 4847 +hert 1 2 6.263398 6.263398 4848 +jogger 1 1 6.957497 6.957497 9379 +decentr 1 1 6.957497 6.957497 9380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..0609e9cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +wisconsin 1 169 1.791759 1.791759 54 +mathemat 1 108 2.197225 2.197225 123 +center 1 88 2.397895 2.397895 158 +ieee 1 86 2.484907 2.484907 190 +institut 1 84 2.484907 2.484907 187 +plan 1 65 2.772589 2.772589 272 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +mechan 1 43 3.178054 3.178054 416 +autom 1 41 3.218876 3.218876 434 +societi 1 40 3.258097 3.258097 456 +electr 1 38 3.295837 3.295837 461 +robot 1 36 3.367296 3.367296 497 +global 1 34 3.401197 3.401197 520 +tech 1 35 3.401197 3.401197 515 +committe 1 34 3.401197 3.401197 522 +human 1 32 3.465736 3.465736 546 +motion 1 24 3.761200 3.761200 699 +geometri 1 22 3.850148 3.850148 752 +path 1 21 3.912023 3.912023 778 +grant 1 12 4.465908 4.465908 1216 +vladimir 1 11 4.553877 4.553877 1324 +sensit 1 8 4.875197 4.875197 1726 +lumelski 1 4 5.568345 5.568345 2837 +underwat 1 4 5.568345 5.568345 2838 +redund 1 4 5.568345 5.568345 2839 +skin 1 4 5.568345 5.568345 2840 +kinemat 1 3 5.857933 5.857933 3516 +mace 1 2 6.263398 6.263398 4849 +lumelskyprofessormechan 1 1 6.957497 6.957497 9381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..00ca7565 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +find 1 111 2.197225 2.197225 111 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +structur 1 106 2.197225 2.197225 105 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +proceed 1 93 2.397895 2.397895 152 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +method 1 80 2.564949 2.564949 213 +want 1 79 2.564949 2.564949 199 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +integr 1 67 2.708050 2.708050 245 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +ithaca 1 65 2.772589 2.772589 294 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +function 1 62 2.772589 2.772589 275 +septemb 1 65 2.772589 2.772589 274 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +explor 1 58 2.890372 2.890372 324 +adapt 1 46 3.091042 3.091042 387 +done 1 47 3.091042 3.091042 381 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +video 1 44 3.135494 3.135494 405 +mark 1 44 3.135494 3.135494 403 +mechan 1 43 3.178054 3.178054 416 +third 1 43 3.178054 3.178054 412 +small 1 39 3.258097 3.258097 447 +brian 1 38 3.295837 3.295837 466 +respons 1 37 3.332205 3.332205 476 +focu 1 30 3.555348 3.555348 571 +abl 1 30 3.555348 3.555348 566 +cluster 1 28 3.610918 3.610918 612 +pass 1 28 3.610918 3.610918 611 +packag 1 28 3.610918 3.610918 614 +detect 1 26 3.688879 3.688879 646 +bound 1 26 3.688879 3.688879 659 +reliabl 1 25 3.737670 3.737670 674 +reach 1 24 3.761200 3.761200 688 +pattern 1 24 3.761200 3.761200 689 +highli 1 23 3.806662 3.806662 725 +lead 1 23 3.806662 3.806662 718 +sciencecornel 1 22 3.850148 3.850148 768 +deal 1 22 3.850148 3.850148 736 +cooper 1 22 3.850148 3.850148 757 +tell 1 21 3.912023 3.912023 777 +flexibl 1 21 3.912023 3.912023 792 +exploit 1 20 3.951244 3.951244 836 +smith 1 20 3.951244 3.951244 820 +left 1 19 4.007333 4.007333 851 +predict 1 19 4.007333 4.007333 855 +miss 1 19 4.007333 4.007333 866 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +anyon 1 17 4.110874 4.110874 916 +latenc 1 16 4.174387 4.174387 993 +transfer 1 16 4.174387 4.174387 967 +devic 1 16 4.174387 4.174387 1002 +practicum 1 16 4.174387 4.174387 960 +horu 1 14 4.317488 4.317488 1116 +achiev 1 14 4.317488 4.317488 1088 +demand 1 14 4.317488 4.317488 1073 +eicken 1 13 4.382027 4.382027 1134 +thorsten 1 13 4.382027 4.382027 1133 +kenneth 1 12 4.465908 4.465908 1265 +brad 1 12 4.465908 4.465908 1264 +noth 1 11 4.553877 4.553877 1328 +bandwidth 1 11 4.553877 4.553877 1365 +node 1 11 4.553877 4.553877 1326 +reness 1 11 4.553877 4.553877 1333 +werner 1 10 4.653960 4.653960 1385 +awai 1 10 4.653960 4.653960 1447 +guarante 1 10 4.653960 4.653960 1391 +mountain 1 10 4.653960 4.653960 1456 +desir 1 9 4.753590 4.753590 1542 +deadlin 1 9 4.753590 4.753590 1502 +robbert 1 9 4.753590 4.753590 1529 +birman 1 9 4.753590 4.753590 1531 +vogel 1 8 4.875197 4.875197 1622 +extract 1 8 4.875197 4.875197 1728 +sigop 1 8 4.875197 4.875197 1727 +vineet 1 8 4.875197 4.875197 1639 +perfect 1 7 5.010635 5.010635 1921 +gave 1 7 5.010635 5.010635 1922 +synchroni 1 7 5.010635 5.010635 1923 +implementationof 1 7 5.010635 5.010635 1813 +deliv 1 6 5.164786 5.164786 2070 +subsystem 1 6 5.164786 5.164786 2015 +alex 1 6 5.164786 5.164786 2130 +situat 1 5 5.347108 5.347108 2365 +scope 1 5 5.347108 5.347108 2296 +buch 1 5 5.347108 5.347108 2272 +myresearch 1 4 5.568345 5.568345 2842 +behind 1 4 5.568345 5.568345 2610 +basu 1 4 5.568345 5.568345 2843 +hayden 1 4 5.568345 5.568345 2844 +hickei 1 4 5.568345 5.568345 2845 +vaysburd 1 4 5.568345 5.568345 2846 +concert 1 3 5.857933 5.857933 3533 +interfacefor 1 3 5.857933 5.857933 3534 +anindya 1 3 5.857933 5.857933 3535 +copper 1 3 5.857933 5.857933 3536 +glade 1 3 5.857933 5.857933 3537 +takako 1 3 5.857933 5.857933 3538 +amwork 1 2 6.263398 6.263398 4850 +regardless 1 2 6.263398 6.263398 4577 +katherin 1 2 6.263398 6.263398 4851 +dalia 1 2 6.263398 6.263398 4852 +malki 1 2 6.263398 6.263398 4853 +researchera 1 1 6.957497 6.957497 9384 +halldept 1 1 6.957497 6.957497 9385 +thehorusand 1 1 6.957497 6.957497 9386 +bandwith 1 1 6.957497 6.957497 9387 +horuswith 1 1 6.957497 6.957497 9388 +fallen 1 1 6.957497 6.957497 9389 +latencyfor 1 1 6.957497 6.957497 9390 +protocolsar 1 1 6.957497 6.957497 9391 +structureand 1 1 6.957497 6.957497 9392 +guarant 1 1 6.957497 6.957497 9393 +acur 1 1 6.957497 6.957497 9394 +aglob 1 1 6.957497 6.957497 9395 +supportfailur 1 1 6.957497 6.957497 9396 +suspis 1 1 6.957497 6.957497 9397 +workwith 1 1 6.957497 6.957497 9398 +middlewar 1 1 6.957497 6.957497 9399 +brainchild 1 1 6.957497 6.957497 9400 +andken 1 1 6.957497 6.957497 9401 +withthorsten 1 1 6.957497 6.957497 9402 +horusexperi 1 1 6.957497 6.957497 9403 +lectureson 1 1 6.957497 6.957497 9404 +workshopconnamoran 1 1 6.957497 6.957497 9382 +ierland 1 1 6.957497 6.957497 9383 +virtuallysynchron 1 1 6.957497 6.957497 9405 +princpl 1 1 6.957497 6.957497 9406 +hpc 1 1 6.957497 6.957497 9407 +kati 1 1 6.957497 6.957497 9408 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..d40f3bf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +provid 1 121 2.079442 2.079442 94 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +book 1 99 2.302585 2.302585 131 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +comment 1 93 2.397895 2.397895 146 +requir 1 81 2.484907 2.484907 167 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +practic 1 70 2.708050 2.708050 246 +java 1 70 2.708050 2.708050 248 +multimedia 1 68 2.708050 2.708050 258 +collect 1 65 2.772589 2.772589 268 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +type 1 61 2.833213 2.833213 296 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +publish 1 57 2.890372 2.890372 326 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +found 1 53 2.944439 2.944439 337 +scientif 1 53 2.944439 2.944439 341 +much 1 52 2.995732 2.995732 349 +life 1 50 3.044522 3.044522 375 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +effect 1 46 3.091042 3.091042 385 +describ 1 45 3.135494 3.135494 400 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +compani 1 41 3.218876 3.218876 423 +cach 1 41 3.218876 3.218876 432 +origin 1 38 3.295837 3.295837 472 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +workstat 1 37 3.332205 3.332205 479 +ofth 1 36 3.367296 3.367296 491 +everi 1 34 3.401197 3.401197 519 +word 1 34 3.401197 3.401197 508 +toler 1 33 3.433987 3.433987 533 +within 1 33 3.433987 3.433987 525 +articl 1 33 3.433987 3.433987 530 +fault 1 32 3.465736 3.465736 547 +independ 1 32 3.465736 3.465736 548 +collabor 1 32 3.465736 3.465736 543 +idea 1 32 3.465736 3.465736 545 +secur 1 30 3.555348 3.555348 577 +exist 1 30 3.555348 3.555348 569 +power 1 30 3.555348 3.555348 573 +platform 1 29 3.583519 3.583519 591 +framework 1 28 3.610918 3.610918 606 +cluster 1 28 3.610918 3.610918 612 +american 1 27 3.637586 3.637586 634 +effort 1 26 3.688879 3.688879 652 +toward 1 25 3.737670 3.737670 668 +never 1 25 3.737670 3.737670 671 +reliabl 1 25 3.737670 3.737670 674 +wish 1 24 3.761200 3.761200 692 +seri 1 24 3.761200 3.761200 708 +initi 1 23 3.806662 3.806662 717 +varieti 1 22 3.850148 3.850148 740 +unit 1 21 3.912023 3.912023 779 +thu 1 21 3.912023 3.912023 773 +born 1 21 3.912023 3.912023 798 +runtim 1 19 4.007333 4.007333 858 +minim 1 18 4.060443 4.060443 887 +earli 1 16 4.174387 4.174387 968 +style 1 15 4.248495 4.248495 1036 +contribut 1 15 4.248495 4.248495 1021 +horu 1 14 4.317488 4.317488 1116 +attribut 1 14 4.317488 4.317488 1092 +coher 1 14 4.317488 4.317488 1109 +becam 1 14 4.317488 4.317488 1117 +whose 1 13 4.382027 4.382027 1166 +bodi 1 13 4.382027 4.382027 1178 +brother 1 13 4.382027 4.382027 1189 +robust 1 12 4.465908 4.465908 1271 +replic 1 12 4.465908 4.465908 1231 +evolv 1 12 4.465908 4.465908 1223 +weight 1 12 4.465908 4.465908 1204 +rest 1 12 4.465908 4.465908 1259 +kenneth 1 12 4.465908 4.465908 1265 +faster 1 11 4.553877 4.553877 1323 +reness 1 11 4.553877 4.553877 1333 +volum 1 11 4.553877 4.553877 1347 +modular 1 10 4.653960 4.653960 1392 +death 1 10 4.653960 4.653960 1457 +modul 1 10 4.653960 4.653960 1434 +length 1 10 4.653960 4.653960 1400 +sentenc 1 10 4.653960 4.653960 1413 +werner 1 10 4.653960 4.653960 1385 +light 1 9 4.753590 4.753590 1533 +birman 1 9 4.753590 4.753590 1531 +robbert 1 9 4.753590 4.753590 1529 +heart 1 8 4.875197 4.875197 1729 +gain 1 8 4.875197 4.875197 1730 +vogel 1 8 4.875197 4.875197 1622 +aris 1 7 5.010635 5.010635 1924 +exactli 1 7 5.010635 5.010635 1817 +synchroni 1 7 5.010635 5.010635 1923 +usabl 1 7 5.010635 5.010635 1810 +conferenc 1 7 5.010635 5.010635 1857 +dead 1 7 5.010635 5.010635 1840 +brought 1 7 5.010635 5.010635 1925 +restrict 1 6 5.164786 5.164786 2129 +outstand 1 6 5.164786 5.164786 2136 +mother 1 6 5.164786 5.164786 2083 +greatest 1 6 5.164786 5.164786 2073 +isi 1 5 5.347108 5.347108 2443 +elsewher 1 5 5.347108 5.347108 2444 +circumst 1 5 5.347108 5.347108 2283 +knew 1 5 5.347108 5.347108 2445 +hair 1 5 5.347108 5.347108 2446 +firm 1 4 5.568345 5.568345 2684 +areavail 1 4 5.568345 5.568345 2810 +projectth 1 3 5.857933 5.857933 3344 +woman 1 3 5.857933 5.857933 3539 +redesign 1 3 5.857933 5.857933 3540 +greatli 1 3 5.857933 5.857933 3541 +child 1 3 5.857933 5.857933 3542 +london 1 3 5.857933 5.857933 3282 +egypt 1 2 6.263398 6.263398 4856 +oppos 1 2 6.263398 6.263398 4855 +groupwar 1 2 6.263398 6.263398 4857 +toconstruct 1 2 6.263398 6.263398 4858 +communicationarchitectur 1 2 6.263398 6.263398 4859 +ensembl 1 2 6.263398 6.263398 4854 +ofreleas 1 2 6.263398 6.263398 4860 +transi 1 2 6.263398 6.263398 4861 +froma 1 2 6.263398 6.263398 4862 +mighti 1 2 6.263398 6.263398 4863 +wing 1 2 6.263398 6.263398 4864 +stir 1 2 6.263398 6.263398 4865 +lament 1 2 6.263398 6.263398 4866 +papersand 1 2 6.263398 6.263398 4867 +silvano 1 2 6.263398 6.263398 4868 +mytholog 1 2 6.263398 6.263398 4869 +court 1 2 6.263398 6.263398 4870 +god 1 1 6.957497 6.957497 9411 +rejoic 1 1 6.957497 6.957497 9412 +osiri 1 1 6.957497 6.957497 9409 +triumphant 1 1 6.957497 6.957497 9414 +ofisi 1 1 6.957497 6.957497 9415 +heir 1 1 6.957497 6.957497 9416 +groupcommun 1 1 6.957497 6.957497 9413 +appealedstrongli 1 1 6.957497 6.957497 9417 +egyptian 1 1 6.957497 6.957497 9410 +becausein 1 1 6.957497 6.957497 9418 +possess 1 1 6.957497 6.957497 9419 +renew 1 1 6.957497 6.957497 9420 +movementa 1 1 6.957497 6.957497 9421 +inact 1 1 6.957497 6.957497 9422 +applicationsbas 1 1 6.957497 6.957497 9423 +infault 1 1 6.957497 6.957497 9424 +thatexploit 1 1 6.957497 6.957497 9425 +theoveral 1 1 6.957497 6.957497 9426 +applicationprotocol 1 1 6.957497 6.957497 9427 +applicationrequir 1 1 6.957497 6.957497 9428 +launch 1 1 6.957497 6.957497 9429 +theisi 1 1 6.957497 6.957497 9430 +robustdistribut 1 1 6.957497 6.957497 9431 +unsuit 1 1 6.957497 6.957497 9432 +asappl 1 1 6.957497 6.957497 9433 +besidesth 1 1 6.957497 6.957497 9434 +usedfor 1 1 6.957497 6.957497 9435 +sametim 1 1 6.957497 6.957497 9436 +lighter 1 1 6.957497 6.957497 9437 +beus 1 1 6.957497 6.957497 9438 +commericalright 1 1 6.957497 6.957497 9439 +manyoth 1 1 6.957497 6.957497 9440 +nofe 1 1 6.957497 6.957497 9441 +ensemblewil 1 1 6.957497 6.957497 9442 +groupwareappl 1 1 6.957497 6.957497 9443 +differentclass 1 1 6.957497 6.957497 9444 +onnext 1 1 6.957497 6.957497 9445 +speedcommun 1 1 6.957497 6.957497 9446 +systemsproject 1 1 6.957497 6.957497 9447 +navtech 1 1 6.957497 6.957497 9448 +stormcast 1 1 6.957497 6.957497 9449 +tacomaproject 1 1 6.957497 6.957497 9450 +thesepag 1 1 6.957497 6.957497 9451 +begotten 1 1 6.957497 6.957497 9452 +sorrow 1 1 6.957497 6.957497 9453 +herhusband 1 1 6.957497 6.957497 9454 +goddess 1 1 6.957497 6.957497 9455 +distress 1 1 6.957497 6.957497 9456 +equippedwith 1 1 6.957497 6.957497 9457 +utter 1 1 6.957497 6.957497 9458 +mighthav 1 1 6.957497 6.957497 9459 +secret 1 1 6.957497 6.957497 9460 +suckl 1 1 6.957497 6.957497 9461 +rear 1 1 6.957497 6.957497 9462 +horusvisit 1 1 6.957497 6.957497 9463 +abstractpag 1 1 6.957497 6.957497 9464 +relatedto 1 1 6.957497 6.957497 9465 +maffei 1 1 6.957497 6.957497 9466 +flexiblegroup 1 1 6.957497 6.957497 9467 +hyme 1 1 6.957497 6.957497 9468 +osirisfrom 1 1 6.957497 6.957497 9469 +papyru 1 1 6.957497 6.957497 9470 +walli 1 1 6.957497 6.957497 9471 +budg 1 1 6.957497 6.957497 9472 +studiesin 1 1 6.957497 6.957497 9473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..23d62079 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +read 1 154 1.791759 1.791759 47 +report 1 131 2.079442 2.079442 92 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +comment 1 93 2.397895 2.397895 146 +info 1 85 2.484907 2.484907 176 +academ 1 82 2.484907 2.484907 178 +activ 1 84 2.484907 2.484907 182 +know 1 80 2.564949 2.564949 198 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +degre 1 69 2.708050 2.708050 259 +locat 1 59 2.833213 2.833213 303 +faculti 1 56 2.890372 2.890372 325 +semest 1 58 2.890372 2.890372 312 +direct 1 57 2.890372 2.890372 316 +undergradu 1 54 2.944439 2.944439 338 +standard 1 48 3.044522 3.044522 365 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +annual 1 40 3.258097 3.258097 458 +feel 1 37 3.332205 3.332205 483 +staff 1 36 3.367296 3.367296 490 +tech 1 35 3.401197 3.401197 515 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +collabor 1 32 3.465736 3.465736 543 +team 1 27 3.637586 3.637586 625 +doctor 1 24 3.761200 3.761200 709 +brows 1 23 3.806662 3.806662 726 +size 1 23 3.806662 3.806662 713 +offici 1 18 4.060443 4.060443 894 +anonym 1 14 4.317488 4.317488 1100 +outstand 1 6 5.164786 5.164786 2136 +disclaim 1 4 5.568345 5.568345 2847 +projector 1 3 5.857933 5.857933 3409 +universitydepart 1 2 6.263398 6.263398 4871 +infoget 1 1 6.957497 6.957497 9474 +contactswithin 1 1 6.957497 6.957497 9475 +facultyfind 1 1 6.957497 6.957497 9476 +ortheir 1 1 6.957497 6.957497 9477 +researchcheck 1 1 6.957497 6.957497 9478 +aboutour 1 1 6.957497 6.957497 9479 +publicationsfind 1 1 6.957497 6.957497 9480 +researcherseith 1 1 6.957497 6.957497 9481 +degreeslook 1 1 6.957497 6.957497 9482 +orundergradu 1 1 6.957497 6.957497 9483 +academicsrefer 1 1 6.957497 6.957497 9484 +webfor 1 1 6.957497 6.957497 9485 +generalcoursedescript 1 1 6.957497 6.957497 9486 +peopleget 1 1 6.957497 6.957497 9487 +directorylist 1 1 6.957497 6.957497 9488 +activitiesfind 1 1 6.957497 6.957497 9489 +theassoci 1 1 6.957497 6.957497 9490 +excellenthockei 1 1 6.957497 6.957497 9491 +serverscheck 1 1 6.957497 6.957497 9492 +gopherserv 1 1 6.957497 6.957497 9493 +ftpserver 1 1 6.957497 6.957497 9494 +sitesquest 1 1 6.957497 6.957497 9495 +informationpres 1 1 6.957497 6.957497 9496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..d23386f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +world 1 115 2.197225 2.197225 126 +pleas 1 113 2.197225 2.197225 114 +text 1 98 2.302585 2.302585 133 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +search 1 95 2.397895 2.397895 155 +section 1 94 2.397895 2.397895 149 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +addit 1 74 2.639057 2.639057 228 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +meet 1 72 2.639057 2.639057 229 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +free 1 73 2.639057 2.639057 224 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +degre 1 69 2.708050 2.708050 259 +main 1 67 2.708050 2.708050 256 +differ 1 66 2.708050 2.708050 253 +test 1 66 2.708050 2.708050 252 +multimedia 1 68 2.708050 2.708050 258 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +copi 1 63 2.772589 2.772589 284 +automat 1 61 2.833213 2.833213 306 +colleg 1 61 2.833213 2.833213 300 +unix 1 58 2.890372 2.890372 308 +publish 1 57 2.890372 2.890372 326 +sever 1 56 2.890372 2.890372 322 +faculti 1 56 2.890372 2.890372 325 +extens 1 53 2.944439 2.944439 340 +cover 1 55 2.944439 2.944439 329 +februari 1 54 2.944439 2.944439 328 +without 1 50 3.044522 3.044522 370 +approach 1 48 3.044522 3.044522 366 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +natur 1 44 3.135494 3.135494 406 +made 1 44 3.135494 3.135494 398 +third 1 43 3.178054 3.178054 412 +around 1 43 3.178054 3.178054 415 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +microsoft 1 38 3.295837 3.295837 468 +field 1 37 3.332205 3.332205 482 +purpos 1 37 3.332205 3.332205 481 +mean 1 37 3.332205 3.332205 477 +respons 1 37 3.332205 3.332205 476 +word 1 34 3.401197 3.401197 508 +statist 1 35 3.401197 3.401197 521 +represent 1 35 3.401197 3.401197 512 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +queri 1 33 3.433987 3.433987 524 +articl 1 33 3.433987 3.433987 530 +express 1 32 3.465736 3.465736 540 +storag 1 31 3.496508 3.496508 553 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +semant 1 29 3.583519 3.583519 587 +full 1 28 3.610918 3.610918 615 +progress 1 28 3.610918 3.610918 598 +retriev 1 27 3.637586 3.637586 621 +determin 1 27 3.637586 3.637586 630 +subject 1 26 3.688879 3.688879 647 +consist 1 26 3.688879 3.688879 651 +store 1 24 3.761200 3.761200 693 +handl 1 24 3.761200 3.761200 685 +size 1 23 3.806662 3.806662 713 +brows 1 23 3.806662 3.806662 726 +similar 1 21 3.912023 3.912023 771 +flexibl 1 21 3.912023 3.912023 792 +util 1 21 3.912023 3.912023 774 +corpor 1 21 3.912023 3.912023 802 +department 1 20 3.951244 3.951244 839 +media 1 19 4.007333 4.007333 861 +item 1 19 4.007333 4.007333 856 +hypertext 1 19 4.007333 4.007333 865 +appropri 1 18 4.060443 4.060443 883 +expand 1 17 4.110874 4.110874 928 +analyz 1 17 4.110874 4.110874 925 +germani 1 17 4.110874 4.110874 946 +moor 1 17 4.110874 4.110874 936 +vector 1 16 4.174387 4.174387 961 +piec 1 15 4.248495 4.248495 1020 +capabl 1 15 4.248495 4.248495 1016 +demand 1 14 4.317488 4.317488 1073 +conduct 1 14 4.317488 4.317488 1065 +rank 1 14 4.317488 4.317488 1063 +context 1 13 4.382027 4.382027 1153 +station 1 13 4.382027 4.382027 1157 +readabl 1 12 4.465908 4.465908 1258 +optic 1 12 4.465908 4.465908 1221 +hypermedia 1 12 4.465908 4.465908 1247 +realiti 1 12 4.465908 4.465908 1272 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +refin 1 11 4.553877 4.553877 1363 +probabilist 1 11 4.553877 4.553877 1343 +smart 1 11 4.553877 4.553877 1352 +rapid 1 10 4.653960 4.653960 1453 +paragraph 1 10 4.653960 4.653960 1449 +sentenc 1 10 4.653960 4.653960 1413 +equip 1 10 4.653960 4.653960 1459 +criteria 1 9 4.753590 4.753590 1477 +charg 1 9 4.753590 4.753590 1582 +hundr 1 9 4.753590 4.753590 1528 +mass 1 8 4.875197 4.875197 1732 +formul 1 8 4.875197 4.875197 1733 +matter 1 8 4.875197 4.875197 1627 +colloquium 1 8 4.875197 4.875197 1734 +harvard 1 7 5.010635 5.010635 1926 +densiti 1 7 5.010635 5.010635 1927 +vehicl 1 7 5.010635 5.010635 1928 +sparc 1 7 5.010635 5.010635 1860 +furthermor 1 6 5.164786 5.164786 2141 +restrict 1 6 5.164786 5.164786 2129 +maryland 1 6 5.164786 5.164786 2140 +eduph 1 5 5.347108 5.347108 2449 +corpu 1 5 5.347108 5.347108 2282 +accuraci 1 5 5.347108 5.347108 2450 +medicin 1 5 5.347108 5.347108 2448 +feder 1 5 5.347108 5.347108 2266 +ohio 1 5 5.347108 5.347108 2447 +travers 1 5 5.347108 5.347108 2363 +rapidli 1 4 5.568345 5.568345 2850 +termin 1 4 5.568345 5.568345 2852 +ireland 1 4 5.568345 5.568345 2853 +machineri 1 4 5.568345 5.568345 2851 +allan 1 4 5.568345 5.568345 2849 +sophist 1 3 5.857933 5.857933 3545 +trec 1 3 5.857933 5.857933 3547 +gigabyt 1 3 5.857933 5.857933 3548 +activitiesmemb 1 3 5.857933 5.857933 3549 +genom 1 3 5.857933 5.857933 3546 +zurich 1 3 5.857933 5.857933 3550 +switzerland 1 3 5.857933 5.857933 3551 +vega 1 3 5.857933 5.857933 3450 +softwareth 1 3 5.857933 5.857933 3552 +gerard 1 2 6.263398 6.263398 4876 +decreas 1 2 6.263398 6.263398 4877 +absenc 1 2 6.263398 6.263398 4878 +unrestrict 1 2 6.263398 6.263398 4879 +excerpt 1 2 6.263398 6.263398 4880 +activitiesassoci 1 2 6.263398 6.263398 4881 +systemsprogram 1 2 6.263398 6.263398 4882 +sigir 1 2 6.263398 6.263398 4873 +dublin 1 2 6.263398 6.263398 4883 +moscow 1 2 6.263398 6.263398 4884 +bucklei 1 2 6.263398 6.263398 4874 +nevada 1 2 6.263398 6.263398 4875 +saltongerard 1 1 6.957497 6.957497 9508 +saltonprofessorg 1 1 6.957497 6.957497 9509 +cheapli 1 1 6.957497 6.957497 9510 +encyclopedia 1 1 6.957497 6.957497 9505 +funk 1 1 6.957497 6.957497 9511 +wagnal 1 1 6.957497 6.957497 9512 +committeeprofession 1 1 6.957497 6.957497 9513 +seventeenth 1 1 6.957497 6.957497 9514 +darmstadt 1 1 6.957497 6.957497 9515 +bethesda 1 1 6.957497 6.957497 9506 +lecturesautomat 1 1 6.957497 6.957497 9516 +konstanz 1 1 6.957497 6.957497 9517 +asi 1 1 6.957497 6.957497 9518 +columbu 1 1 6.957497 6.957497 9507 +publicationsapproach 1 1 6.957497 6.957497 9519 +passag 1 1 6.957497 6.957497 9520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..b00ba3f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +problem 1 147 1.945910 1.945910 75 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +world 1 115 2.197225 2.197225 126 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +august 1 66 2.708050 2.708050 257 +function 1 62 2.772589 2.772589 275 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +numer 1 49 3.044522 3.044522 369 +approach 1 48 3.044522 3.044522 366 +annual 1 40 3.258097 3.258097 458 +china 1 37 3.332205 3.332205 487 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +scientist 1 31 3.496508 3.496508 560 +exist 1 30 3.555348 3.555348 569 +consid 1 29 3.583519 3.583519 590 +chines 1 29 3.583519 3.583519 595 +scale 1 28 3.610918 3.610918 613 +enhanc 1 26 3.688879 3.688879 644 +subject 1 26 3.688879 3.688879 647 +bound 1 26 3.688879 3.688879 659 +siam 1 21 3.912023 3.912023 800 +department 1 20 3.951244 3.951244 839 +region 1 19 4.007333 4.007333 875 +beij 1 19 4.007333 4.007333 876 +minim 1 18 4.060443 4.060443 887 +accept 1 18 4.060443 4.060443 879 +moor 1 17 4.110874 4.110874 936 +condit 1 16 4.174387 4.174387 975 +young 1 16 4.174387 4.174387 991 +reflect 1 15 4.248495 4.248495 1034 +nonlinear 1 14 4.317488 4.317488 1107 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +penalti 1 10 4.653960 4.653960 1405 +trust 1 9 4.753590 4.753590 1583 +exact 1 9 4.753590 4.753590 1509 +converg 1 7 5.010635 5.010635 1844 +constrain 1 6 5.164786 5.164786 2042 +eduph 1 5 5.347108 5.347108 2449 +affin 1 5 5.347108 5.347108 2378 +interior 1 5 5.347108 5.347108 2439 +waterloo 1 3 5.857933 5.857933 3523 +unconstrain 1 2 6.263398 6.263398 4499 +publicationsa 1 2 6.263398 6.263398 4885 +yui 1 1 6.957497 6.957497 9522 +liyui 1 1 6.957497 6.957497 9523 +liresearch 1 1 6.957497 6.957497 9524 +associateyui 1 1 6.957497 6.957497 9525 +nonlinearli 1 1 6.957497 6.957497 9521 +lecturesan 1 1 6.957497 6.957497 9526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..3b62e958 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +dynam 1 76 2.564949 2.564949 194 +method 1 80 2.564949 2.564949 213 +orient 1 80 2.564949 2.564949 205 +decemb 1 80 2.564949 2.564949 215 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +function 1 62 2.772589 2.772589 275 +septemb 1 65 2.772589 2.772589 274 +januari 1 62 2.772589 2.772589 264 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +special 1 56 2.890372 2.890372 320 +space 1 57 2.890372 2.890372 310 +reason 1 57 2.890372 2.890372 318 +publish 1 57 2.890372 2.890372 326 +scientif 1 53 2.944439 2.944439 341 +allow 1 53 2.944439 2.944439 333 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +principl 1 48 3.044522 3.044522 357 +done 1 47 3.091042 3.091042 381 +effect 1 46 3.091042 3.091042 385 +algebra 1 45 3.135494 3.135494 394 +mechan 1 43 3.178054 3.178054 416 +autom 1 41 3.218876 3.218876 434 +review 1 42 3.218876 3.218876 425 +york 1 41 3.218876 3.218876 435 +transact 1 39 3.258097 3.258097 438 +error 1 40 3.258097 3.258097 449 +societi 1 40 3.258097 3.258097 456 +annual 1 40 3.258097 3.258097 458 +correct 1 38 3.295837 3.295837 462 +electr 1 38 3.295837 3.295837 461 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +extend 1 32 3.465736 3.465736 539 +richard 1 31 3.496508 3.496508 559 +scientist 1 31 3.496508 3.496508 560 +specifi 1 30 3.555348 3.555348 568 +compon 1 30 3.555348 3.555348 570 +common 1 30 3.555348 3.555348 574 +focus 1 29 3.583519 3.583519 584 +symbol 1 27 3.637586 3.637586 620 +american 1 27 3.637586 3.637586 634 +challeng 1 26 3.688879 3.688879 653 +constraint 1 26 3.688879 3.688879 636 +equat 1 23 3.806662 3.806662 724 +toolkit 1 20 3.951244 3.951244 835 +department 1 20 3.951244 3.951244 839 +region 1 19 4.007333 4.007333 875 +north 1 19 4.007333 4.007333 873 +boston 1 19 4.007333 4.007333 862 +lisp 1 18 4.060443 4.060443 897 +differenti 1 17 4.110874 4.110874 921 +layer 1 17 4.110874 4.110874 926 +moor 1 17 4.110874 4.110874 936 +modern 1 16 4.174387 4.174387 966 +vector 1 16 4.174387 4.174387 961 +susan 1 15 4.248495 4.248495 1050 +polynomi 1 14 4.317488 4.317488 1069 +dean 1 14 4.317488 4.317488 1104 +massachusett 1 14 4.317488 4.317488 1118 +convert 1 13 4.382027 4.382027 1122 +opportun 1 13 4.382027 4.382027 1161 +joint 1 13 4.382027 4.382027 1130 +calcul 1 12 4.465908 4.465908 1268 +deduct 1 12 4.465908 4.465908 1236 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +israel 1 11 4.553877 4.553877 1366 +matric 1 10 4.653960 4.653960 1399 +decomposit 1 10 4.653960 4.653960 1439 +vista 1 10 4.653960 4.653960 1452 +modular 1 10 4.653960 4.653960 1392 +factor 1 9 4.753590 4.753590 1544 +ring 1 8 4.875197 4.875197 1684 +ideal 1 8 4.875197 4.875197 1630 +aris 1 7 5.010635 5.010635 1924 +boundari 1 7 5.010635 5.010635 1929 +pursu 1 7 5.010635 5.010635 1902 +refere 1 7 5.010635 5.010635 1895 +interpol 1 7 5.010635 5.010635 1823 +carolina 1 6 5.164786 5.164786 2142 +rubinfeld 1 6 5.164786 5.164786 1998 +kluwer 1 6 5.164786 5.164786 2143 +eduph 1 5 5.347108 5.347108 2449 +fluid 1 5 5.347108 5.347108 2440 +colleagu 1 5 5.347108 5.347108 2304 +ration 1 5 5.347108 5.347108 2427 +synthes 1 5 5.347108 5.347108 2451 +suni 1 5 5.347108 5.347108 2452 +ronitt 1 5 5.347108 5.347108 2265 +suppli 1 4 5.568345 5.568345 2611 +substrat 1 4 5.568345 5.568345 2857 +weyl 1 4 5.568345 5.568345 2854 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +technion 1 4 5.568345 5.568345 2856 +weizmann 1 4 5.568345 5.568345 2858 +aerospac 1 3 5.857933 5.857933 3555 +syracus 1 3 5.857933 5.857933 3553 +durham 1 3 5.857933 5.857933 3279 +haifa 1 3 5.857933 5.857933 3554 +activitieseditori 1 2 6.263398 6.263398 4888 +softwareprogram 1 2 6.263398 6.263398 4889 +irreduc 1 2 6.263398 6.263398 4890 +microstorag 1 2 6.263398 6.263398 4887 +dawson 1 2 6.263398 6.263398 4886 +rehovot 1 2 6.263398 6.263398 4891 +albani 1 2 6.263398 6.263398 4892 +multivari 1 2 6.263398 6.263398 4151 +zippelrichard 1 1 6.957497 6.957497 9528 +zippelsenior 1 1 6.957497 6.957497 9529 +associaterz 1 1 6.957497 6.957497 9530 +computationlecturesalgebra 1 1 6.957497 6.957497 9531 +landau 1 1 6.957497 6.957497 9527 +publicationseffect 1 1 6.957497 6.957497 9532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..57759eb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +pleas 1 113 2.197225 2.197225 114 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +start 1 83 2.484907 2.484907 173 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +academ 1 82 2.484907 2.484907 178 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +refer 1 78 2.564949 2.564949 203 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +multimedia 1 68 2.708050 2.708050 258 +integr 1 67 2.708050 2.708050 245 +virtual 1 62 2.772589 2.772589 285 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +polici 1 64 2.772589 2.772589 279 +reason 1 57 2.890372 2.890372 318 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +faculti 1 56 2.890372 2.890372 325 +extens 1 53 2.944439 2.944439 340 +much 1 52 2.995732 2.995732 349 +profession 1 51 2.995732 2.995732 345 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +approach 1 48 3.044522 3.044522 366 +california 1 46 3.091042 3.091042 388 +done 1 47 3.091042 3.091042 381 +featur 1 46 3.091042 3.091042 386 +execut 1 45 3.135494 3.135494 404 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +combin 1 42 3.218876 3.218876 421 +press 1 42 3.218876 3.218876 419 +theoret 1 39 3.258097 3.258097 446 +transact 1 39 3.258097 3.258097 438 +societi 1 40 3.258097 3.258097 456 +annual 1 40 3.258097 3.258097 458 +origin 1 38 3.295837 3.295837 472 +brian 1 38 3.295837 3.295837 466 +purpos 1 37 3.332205 3.332205 481 +singl 1 34 3.401197 3.401197 510 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +toler 1 33 3.433987 3.433987 533 +within 1 33 3.433987 3.433987 525 +fault 1 32 3.465736 3.465736 547 +idea 1 32 3.465736 3.465736 545 +ad 1 32 3.465736 3.465736 544 +collabor 1 32 3.465736 3.465736 543 +scientist 1 31 3.496508 3.496508 560 +focu 1 30 3.555348 3.555348 571 +secur 1 30 3.555348 3.555348 577 +option 1 30 3.555348 3.555348 575 +specifi 1 30 3.555348 3.555348 568 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +univ 1 28 3.610918 3.610918 617 +intend 1 28 3.610918 3.610918 599 +actual 1 28 3.610918 3.610918 604 +packag 1 28 3.610918 3.610918 614 +berkelei 1 26 3.688879 3.688879 657 +effort 1 26 3.688879 3.688879 652 +consist 1 26 3.688879 3.688879 651 +concern 1 25 3.737670 3.737670 666 +although 1 25 3.737670 3.737670 667 +supercomput 1 25 3.737670 3.737670 681 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +reliabl 1 25 3.737670 3.737670 674 +mike 1 24 3.761200 3.761200 703 +head 1 23 3.806662 3.806662 732 +cooper 1 22 3.850148 3.850148 757 +properti 1 22 3.850148 3.850148 749 +flexibl 1 21 3.912023 3.912023 792 +toolkit 1 20 3.951244 3.951244 835 +exploit 1 20 3.951244 3.951244 836 +smith 1 20 3.951244 3.951244 820 +department 1 20 3.951244 3.951244 839 +prove 1 19 4.007333 4.007333 848 +runtim 1 19 4.007333 4.007333 858 +failur 1 18 4.060443 4.060443 898 +speed 1 18 4.060443 4.060443 911 +element 1 18 4.060443 4.060443 895 +layer 1 17 4.110874 4.110874 926 +seek 1 17 4.110874 4.110874 954 +former 1 17 4.110874 4.110874 956 +moor 1 17 4.110874 4.110874 936 +upon 1 16 4.174387 4.174387 978 +permit 1 16 4.174387 4.174387 962 +critic 1 16 4.174387 4.174387 982 +action 1 15 4.248495 4.248495 1038 +side 1 15 4.248495 4.248495 1022 +becam 1 14 4.317488 4.317488 1117 +horu 1 14 4.317488 4.317488 1116 +coordin 1 13 4.382027 4.382027 1182 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +kenneth 1 12 4.465908 4.465908 1265 +replic 1 12 4.465908 4.465908 1231 +robust 1 12 4.465908 4.465908 1271 +infrastructur 1 12 4.465908 4.465908 1234 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +broad 1 11 4.553877 4.553877 1302 +probabilist 1 11 4.553877 4.553877 1343 +primit 1 11 4.553877 4.553877 1317 +reness 1 11 4.553877 4.553877 1333 +certain 1 10 4.653960 4.653960 1393 +nuprl 1 10 4.653960 4.653960 1402 +guarante 1 10 4.653960 4.653960 1391 +werner 1 10 4.653960 4.653960 1385 +unusu 1 9 4.753590 4.753590 1566 +latter 1 9 4.753590 4.753590 1522 +correctli 1 9 4.753590 4.753590 1478 +robbert 1 9 4.753590 4.753590 1529 +birman 1 9 4.753590 4.753590 1531 +vogel 1 8 4.875197 4.875197 1622 +synchroni 1 7 5.010635 5.010635 1923 +friedman 1 7 5.010635 5.010635 1886 +chief 1 7 5.010635 5.010635 1829 +emerg 1 6 5.164786 5.164786 2038 +privaci 1 6 5.164786 5.164786 2144 +recruit 1 6 5.164786 5.164786 2145 +isi 1 5 5.347108 5.347108 2443 +notabl 1 5 5.347108 5.347108 2276 +broadcast 1 5 5.347108 5.347108 2453 +activitieseditor 1 5 5.347108 5.347108 2454 +popular 1 4 5.568345 5.568345 2802 +hayden 1 4 5.568345 5.568345 2844 +publicationsth 1 4 5.568345 5.568345 2859 +reconfigur 1 3 5.857933 5.857933 3556 +leverag 1 3 5.857933 5.857933 3153 +embodi 1 3 5.857933 5.857933 3236 +reiter 1 3 5.857933 5.857933 3461 +constabl 1 3 5.857933 5.857933 3186 +act 1 3 5.857933 5.857933 3557 +leadership 1 3 5.857933 5.857933 3320 +alamito 1 3 5.857933 5.857933 3558 +glade 1 3 5.857933 5.857933 3537 +benign 1 2 6.263398 6.263398 4893 +activitieschair 1 2 6.263398 6.263398 4894 +isat 1 2 6.263398 6.263398 4895 +birmankenneth 1 1 6.957497 6.957497 9533 +birmanprofessorphd 1 1 6.957497 6.957497 9534 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..2a4d8149 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +center 1 88 2.397895 2.397895 158 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +method 1 80 2.564949 2.564949 213 +dynam 1 76 2.564949 2.564949 194 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +differ 1 66 2.708050 2.708050 253 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +creat 1 63 2.772589 2.772589 277 +laboratori 1 63 2.772589 2.772589 292 +interact 1 62 2.772589 2.772589 270 +improv 1 62 2.772589 2.772589 289 +foundat 1 62 2.772589 2.772589 286 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +visual 1 48 3.044522 3.044522 372 +california 1 46 3.091042 3.091042 388 +made 1 44 3.135494 3.135494 398 +past 1 42 3.218876 3.218876 428 +annual 1 40 3.258097 3.258097 458 +multi 1 36 3.367296 3.367296 493 +within 1 33 3.433987 3.433987 525 +board 1 33 3.433987 3.433987 528 +anim 1 31 3.496508 3.496508 557 +rang 1 30 3.555348 3.555348 565 +focu 1 30 3.555348 3.555348 571 +particip 1 29 3.583519 3.583519 589 +progress 1 28 3.610918 3.610918 598 +determin 1 27 3.637586 3.637586 630 +american 1 27 3.637586 3.637586 634 +constraint 1 26 3.688879 3.688879 636 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +hill 1 25 3.737670 3.737670 670 +motion 1 24 3.761200 3.761200 699 +fellow 1 24 3.761200 3.761200 701 +input 1 23 3.806662 3.806662 727 +displai 1 23 3.806662 3.806662 712 +director 1 22 3.850148 3.850148 767 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +geometri 1 22 3.850148 3.850148 752 +flexibl 1 21 3.912023 3.912023 792 +synthesi 1 20 3.951244 3.951244 834 +facil 1 20 3.951244 3.951244 814 +department 1 20 3.951244 3.951244 839 +geometr 1 19 4.007333 4.007333 852 +media 1 19 4.007333 4.007333 861 +north 1 19 4.007333 4.007333 873 +dimension 1 18 4.060443 4.060443 909 +previous 1 17 4.110874 4.110874 923 +render 1 17 4.110874 4.110874 947 +medic 1 17 4.110874 4.110874 958 +moor 1 17 4.110874 4.110874 936 +spatial 1 16 4.174387 4.174387 988 +brown 1 16 4.174387 4.174387 977 +reflect 1 15 4.248495 4.248495 1034 +micro 1 15 4.248495 4.248495 1031 +conduct 1 14 4.317488 4.317488 1065 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +host 1 11 4.553877 4.553877 1306 +volum 1 11 4.553877 4.553877 1347 +modular 1 10 4.653960 4.653960 1392 +donald 1 9 4.753590 4.753590 1510 +routin 1 9 4.753590 4.753590 1549 +surfac 1 9 4.753590 4.753590 1574 +tempor 1 9 4.753590 4.753590 1584 +light 1 9 4.753590 4.753590 1533 +utah 1 9 4.753590 4.753590 1585 +polygon 1 8 4.875197 4.875197 1723 +textur 1 8 4.875197 4.875197 1677 +realist 1 8 4.875197 4.875197 1665 +academi 1 8 4.875197 4.875197 1735 +clip 1 7 5.010635 5.010635 1868 +parametr 1 7 5.010635 5.010635 1819 +suffici 1 7 5.010635 5.010635 1897 +core 1 7 5.010635 5.010635 1809 +hidden 1 6 5.164786 5.164786 1987 +photographi 1 6 5.164786 5.164786 2146 +carolina 1 6 5.164786 5.164786 2142 +biolog 1 6 5.164786 5.164786 2147 +decad 1 5 5.347108 5.347108 2455 +testb 1 5 5.347108 5.347108 2456 +anti 1 5 5.347108 5.347108 2434 +chapel 1 5 5.347108 5.347108 2457 +jacob 1 4 5.568345 5.568345 2667 +gould 1 3 5.857933 5.857933 3559 +alias 1 2 6.263398 6.263398 4823 +activitieseditori 1 2 6.263398 6.263398 4888 +greenbergdonald 1 1 6.957497 6.957497 9535 +greenberg 1 1 6.957497 6.957497 9536 +schurman 1 1 6.957497 6.957497 9537 +perceptu 1 1 6.957497 6.957497 9538 +activitiesdirector 1 1 6.957497 6.957497 9539 +visualizationprofession 1 1 6.957497 6.957497 9540 +acmreturn 1 1 6.957497 6.957497 9541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..bf0b07ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +intern 1 108 2.197225 2.197225 128 +structur 1 106 2.197225 2.197225 105 +world 1 115 2.197225 2.197225 126 +pleas 1 113 2.197225 2.197225 114 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +institut 1 84 2.484907 2.484907 187 +resourc 1 81 2.484907 2.484907 172 +requir 1 81 2.484907 2.484907 167 +member 1 84 2.484907 2.484907 165 +journal 1 83 2.484907 2.484907 183 +school 1 84 2.484907 2.484907 188 +novemb 1 81 2.484907 2.484907 179 +chang 1 82 2.484907 2.484907 163 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +solv 1 73 2.639057 2.639057 234 +nation 1 74 2.639057 2.639057 240 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +degre 1 69 2.708050 2.708050 259 +august 1 66 2.708050 2.708050 257 +complex 1 64 2.772589 2.772589 269 +guid 1 63 2.772589 2.772589 267 +interact 1 62 2.772589 2.772589 270 +foundat 1 62 2.772589 2.772589 286 +visit 1 63 2.772589 2.772589 288 +variou 1 56 2.890372 2.890372 317 +explor 1 58 2.890372 2.890372 324 +faculti 1 56 2.890372 2.890372 325 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +advisor 1 51 2.995732 2.995732 355 +california 1 46 3.091042 3.091042 388 +understand 1 47 3.091042 3.091042 384 +physic 1 47 3.091042 3.091042 377 +electron 1 47 3.091042 3.091042 379 +natur 1 44 3.135494 3.135494 406 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +futur 1 41 3.218876 3.218876 427 +theoret 1 39 3.258097 3.258097 446 +annual 1 40 3.258097 3.258097 458 +random 1 34 3.401197 3.401197 511 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +within 1 33 3.433987 3.433987 525 +board 1 33 3.433987 3.433987 528 +given 1 32 3.465736 3.465736 538 +richard 1 31 3.496508 3.496508 559 +hard 1 30 3.555348 3.555348 563 +art 1 29 3.583519 3.583519 593 +american 1 27 3.637586 3.637586 634 +consist 1 26 3.688879 3.688879 651 +bound 1 26 3.688879 3.688879 659 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +seri 1 24 3.761200 3.761200 708 +sequenti 1 22 3.850148 3.850148 745 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +divis 1 21 3.912023 3.912023 803 +exploit 1 20 3.951244 3.951244 836 +department 1 20 3.951244 3.951244 839 +walter 1 17 4.110874 4.110874 950 +germani 1 17 4.110874 4.110874 946 +moor 1 17 4.110874 4.110874 936 +georg 1 16 4.174387 4.174387 994 +brown 1 16 4.174387 4.174387 977 +contribut 1 15 4.248495 4.248495 1021 +topolog 1 14 4.317488 4.317488 1089 +essenti 1 13 4.382027 4.382027 1137 +grow 1 12 4.465908 4.465908 1209 +amount 1 12 4.465908 4.465908 1208 +speech 1 12 4.465908 4.465908 1222 +weight 1 12 4.465908 4.465908 1204 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +council 1 11 4.553877 4.553877 1364 +rice 1 11 4.553877 4.553877 1336 +distinguish 1 11 4.553877 4.553877 1357 +govern 1 9 4.753590 4.753590 1581 +classifi 1 9 4.753590 4.753590 1537 +classif 1 9 4.753590 4.753590 1586 +telecommun 1 9 4.753590 4.753590 1565 +observ 1 9 4.753590 4.753590 1578 +quantit 1 8 4.875197 4.875197 1654 +gain 1 8 4.875197 4.875197 1730 +attent 1 8 4.875197 4.875197 1651 +academi 1 8 4.875197 4.875197 1735 +virginia 1 8 4.875197 4.875197 1659 +trade 1 7 5.010635 5.010635 1815 +foreign 1 7 5.010635 5.010635 1919 +dimens 1 7 5.010635 5.010635 1930 +recruit 1 6 5.164786 5.164786 2145 +ture 1 6 5.164786 5.164786 1997 +advisori 1 6 5.164786 5.164786 2148 +chicago 1 6 5.164786 5.164786 2149 +prize 1 6 5.164786 5.164786 2150 +yield 1 5 5.347108 5.347108 2458 +activitieseditor 1 5 5.347108 5.347108 2454 +ifip 1 5 5.347108 5.347108 2459 +houston 1 5 5.347108 5.347108 2460 +scope 1 5 5.347108 5.347108 2296 +bulletin 1 5 5.347108 5.347108 2343 +comprehens 1 4 5.568345 5.568345 2745 +monograph 1 4 5.568345 5.568345 2860 +peer 1 4 5.568345 5.568345 2742 +tennesse 1 4 5.568345 5.568345 2763 +oracl 1 4 5.568345 5.568345 2823 +hypothesi 1 4 5.568345 5.568345 2650 +fals 1 4 5.568345 5.568345 2861 +hausdorff 1 4 5.568345 5.568345 2633 +explos 1 3 5.857933 5.857933 3138 +deeper 1 3 5.857933 5.857933 3146 +off 1 3 5.857933 5.857933 3170 +nondeterminist 1 3 5.857933 5.857933 3560 +activitiesmemb 1 3 5.857933 5.857933 3549 +law 1 2 6.263398 6.263398 4896 +har 1 2 6.263398 6.263398 4252 +aaa 1 2 6.263398 6.263398 4897 +banquet 1 2 6.263398 6.263398 4898 +publicationson 1 2 6.263398 6.263398 4899 +johan 1 2 6.263398 6.263398 4900 +juri 1 1 6.957497 6.957497 9543 +hartmanisjuri 1 1 6.957497 6.957497 9544 +hartmani 1 1 6.957497 6.957497 9545 +strateg 1 1 6.957497 6.957497 9546 +representativeschair 1 1 6.957497 6.957497 9547 +committeehonorsacm 1 1 6.957497 6.957497 9548 +stearn 1 1 6.957497 6.957497 9549 +latvian 1 1 6.957497 6.957497 9550 +charter 1 1 6.957497 6.957497 9551 +eatc 1 1 6.957497 6.957497 9542 +presseditori 1 1 6.957497 6.957497 9552 +sciencegoedel 1 1 6.957497 6.957497 9553 +awardshonorari 1 1 6.957497 6.957497 9554 +dortmund 1 1 6.957497 6.957497 9555 +lecturessom 1 1 6.957497 6.957497 9556 +benni 1 1 6.957497 6.957497 9557 +chor 1 1 6.957497 6.957497 9558 +od 1 1 6.957497 6.957497 9559 +goldreich 1 1 6.957497 6.957497 9560 +hastad 1 1 6.957497 6.957497 9561 +desh 1 1 6.957497 6.957497 9562 +ranjan 1 1 6.957497 6.957497 9563 +pankaj 1 1 6.957497 6.957497 9564 +rohatgi 1 1 6.957497 6.957497 9565 +kolmogorov 1 1 6.957497 6.957497 9566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..8be7da93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +modifi 1 178 1.609438 1.609438 35 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +well 1 109 2.197225 2.197225 121 +manag 1 114 2.197225 2.197225 125 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +imag 1 91 2.397895 2.397895 161 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +academ 1 82 2.484907 2.484907 178 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +interfac 1 79 2.564949 2.564949 209 +state 1 76 2.564949 2.564949 207 +involv 1 71 2.639057 2.639057 227 +nation 1 74 2.639057 2.639057 240 +david 1 71 2.639057 2.639057 232 +simul 1 66 2.708050 2.708050 255 +multimedia 1 68 2.708050 2.708050 258 +knowledg 1 67 2.708050 2.708050 243 +januari 1 62 2.772589 2.772589 264 +organ 1 65 2.772589 2.772589 265 +colleg 1 61 2.833213 2.833213 300 +variou 1 56 2.890372 2.890372 317 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +appoint 1 49 3.044522 3.044522 358 +physic 1 47 3.091042 3.091042 377 +electron 1 47 3.091042 3.091042 379 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +continu 1 39 3.258097 3.258097 448 +annual 1 40 3.258097 3.258097 458 +electr 1 38 3.295837 3.295837 461 +represent 1 35 3.401197 3.401197 512 +committe 1 34 3.401197 3.401197 522 +john 1 33 3.433987 3.433987 532 +within 1 33 3.433987 3.433987 525 +board 1 33 3.433987 3.433987 528 +collabor 1 32 3.465736 3.465736 543 +storag 1 31 3.496508 3.496508 553 +art 1 29 3.583519 3.583519 593 +american 1 27 3.637586 3.637586 634 +supercomput 1 25 3.737670 3.737670 681 +fellow 1 24 3.761200 3.761200 701 +seri 1 24 3.761200 3.761200 708 +geometri 1 22 3.850148 3.850148 752 +unit 1 21 3.912023 3.912023 779 +among 1 21 3.912023 3.912023 781 +siam 1 21 3.912023 3.912023 800 +department 1 20 3.951244 3.951244 839 +geometr 1 19 4.007333 4.007333 852 +stanford 1 17 4.110874 4.110874 955 +moor 1 17 4.110874 4.110874 936 +remot 1 15 4.248495 4.248495 1041 +dean 1 14 4.317488 4.317488 1104 +heterogen 1 14 4.317488 4.317488 1090 +discret 1 13 4.382027 4.382027 1165 +robust 1 12 4.465908 4.465908 1271 +captur 1 12 4.465908 4.465908 1232 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +persist 1 11 4.553877 4.553877 1367 +council 1 11 4.553877 4.553877 1364 +facilit 1 10 4.653960 4.653960 1412 +packard 1 10 4.653960 4.653960 1444 +fellowship 1 10 4.653960 4.653960 1460 +academi 1 8 4.875197 4.875197 1735 +advisori 1 6 5.164786 5.164786 2148 +oxford 1 6 5.164786 5.164786 2121 +compris 1 4 5.568345 5.568345 2862 +activitiesmemb 1 3 5.857933 5.857933 3549 +defens 1 3 5.857933 5.857933 3327 +algorithmica 1 3 5.857933 5.857933 3561 +commiss 1 2 6.263398 6.263398 4901 +engineeringfellow 1 2 6.263398 6.263398 4902 +sciencesfellow 1 2 6.263398 6.263398 4903 +aaa 1 2 6.263398 6.263398 4897 +hopcroftjohn 1 1 6.957497 6.957497 9567 +hopcroftjoseph 1 1 6.957497 6.957497 9568 +silbert 1 1 6.957497 6.957497 9569 +engineeringprofessor 1 1 6.957497 6.957497 9570 +sciencephd 1 1 6.957497 6.957497 9571 +overse 1 1 6.957497 6.957497 9572 +applicationsmemb 1 1 6.957497 6.957497 9573 +boardmemb 1 1 6.957497 6.957497 9574 +forcememb 1 1 6.957497 6.957497 9575 +machinerychairman 1 1 6.957497 6.957497 9576 +trusteesmemb 1 1 6.957497 6.957497 9577 +lucil 1 1 6.957497 6.957497 9578 +foundationmemb 1 1 6.957497 6.957497 9579 +sloan 1 1 6.957497 6.957497 9580 +committeeadvisori 1 1 6.957497 6.957497 9581 +analysiseditor 1 1 6.957497 6.957497 9582 +geometryassoci 1 1 6.957497 6.957497 9583 +sciencesreturn 1 1 6.957497 6.957497 9584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..0f228523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +proceed 1 93 2.397895 2.397895 152 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +method 1 80 2.564949 2.564949 213 +good 1 77 2.564949 2.564949 200 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +optim 1 79 2.564949 2.564949 197 +exampl 1 77 2.564949 2.564949 195 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +involv 1 71 2.639057 2.639057 227 +solv 1 73 2.639057 2.639057 234 +line 1 75 2.639057 2.639057 231 +symposium 1 72 2.639057 2.639057 238 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +organ 1 65 2.772589 2.772589 265 +januari 1 62 2.772589 2.772589 264 +laboratori 1 63 2.772589 2.772589 292 +ithaca 1 65 2.772589 2.772589 294 +best 1 59 2.833213 2.833213 299 +automat 1 61 2.833213 2.833213 306 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +faculti 1 56 2.890372 2.890372 325 +scientif 1 53 2.944439 2.944439 341 +processor 1 54 2.944439 2.944439 335 +local 1 55 2.944439 2.944439 334 +februari 1 54 2.944439 2.944439 328 +profession 1 51 2.995732 2.995732 345 +investig 1 51 2.995732 2.995732 353 +numer 1 49 3.044522 3.044522 369 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +show 1 43 3.178054 3.178054 417 +linear 1 41 3.218876 3.218876 431 +review 1 42 3.218876 3.218876 425 +must 1 40 3.258097 3.258097 442 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +open 1 38 3.295837 3.295837 469 +microsoft 1 38 3.295837 3.295837 468 +paul 1 38 3.295837 3.295837 471 +hand 1 37 3.332205 3.332205 475 +singl 1 34 3.401197 3.401197 510 +award 1 34 3.401197 3.401197 523 +return 1 34 3.401197 3.401197 502 +product 1 33 3.433987 3.433987 527 +board 1 33 3.433987 3.433987 528 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +independ 1 32 3.465736 3.465736 548 +produc 1 30 3.555348 3.555348 572 +exist 1 30 3.555348 3.555348 569 +graph 1 30 3.555348 3.555348 576 +depend 1 29 3.583519 3.583519 583 +multiprocessor 1 28 3.610918 3.610918 605 +framework 1 28 3.610918 3.610918 606 +static 1 27 3.637586 3.637586 619 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +alwai 1 24 3.761200 3.761200 691 +known 1 24 3.761200 3.761200 702 +flow 1 24 3.761200 3.761200 700 +consult 1 24 3.761200 3.761200 687 +lab 1 24 3.761200 3.761200 698 +equat 1 23 3.806662 3.806662 724 +togeth 1 23 3.806662 3.806662 714 +deal 1 22 3.850148 3.850148 736 +almost 1 22 3.850148 3.850148 742 +sequenti 1 22 3.850148 3.850148 745 +corpor 1 21 3.912023 3.912023 802 +increas 1 20 3.951244 3.951244 829 +toolkit 1 20 3.951244 3.951244 835 +department 1 20 3.951244 3.951244 839 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +matrix 1 17 4.110874 4.110874 933 +asplo 1 17 4.110874 4.110874 948 +moor 1 17 4.110874 4.110874 936 +spars 1 16 4.174387 4.174387 989 +match 1 16 4.174387 4.174387 965 +transfer 1 16 4.174387 4.174387 967 +normal 1 16 4.174387 4.174387 995 +permit 1 16 4.174387 4.174387 962 +young 1 16 4.174387 4.174387 991 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +fortran 1 15 4.248495 4.248495 1027 +massachusett 1 14 4.317488 4.317488 1118 +unfortun 1 13 4.382027 4.382027 1170 +earlier 1 13 4.382027 4.382027 1140 +block 1 13 4.382027 4.382027 1183 +incorpor 1 13 4.382027 4.382027 1163 +sigplan 1 13 4.382027 4.382027 1190 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +mesh 1 11 4.553877 4.553877 1351 +refin 1 11 4.553877 4.553877 1363 +faster 1 11 4.553877 4.553877 1323 +loop 1 11 4.553877 4.553877 1310 +summar 1 11 4.553877 4.553877 1295 +michigan 1 11 4.553877 4.553877 1368 +vladimir 1 11 4.553877 4.553877 1324 +matric 1 10 4.653960 4.653960 1399 +packard 1 10 4.653960 4.653960 1444 +preliminari 1 9 4.753590 4.753590 1480 +prefer 1 9 4.753590 4.753590 1491 +jersei 1 9 4.753590 4.753590 1587 +poor 1 8 4.875197 4.875197 1736 +competit 1 8 4.875197 4.875197 1635 +potenti 1 8 4.875197 4.875197 1690 +hewlett 1 8 4.875197 4.875197 1709 +uniprocessor 1 8 4.875197 4.875197 1696 +presidenti 1 8 4.875197 4.875197 1737 +wayn 1 8 4.875197 4.875197 1738 +pldi 1 8 4.875197 4.875197 1704 +keshav 1 7 5.010635 5.010635 1852 +solver 1 7 5.010635 5.010635 1911 +uniform 1 7 5.010635 5.010635 1845 +refere 1 7 5.010635 5.010635 1895 +elementari 1 7 5.010635 5.010635 1825 +dens 1 6 5.164786 5.164786 2122 +ensur 1 6 5.164786 5.164786 2012 +nest 1 6 5.164786 5.164786 2151 +prize 1 6 5.164786 5.164786 2150 +argonn 1 5 5.347108 5.347108 2461 +compet 1 5 5.347108 5.347108 2462 +decad 1 5 5.347108 5.347108 2455 +panel 1 5 5.347108 5.347108 2463 +seventh 1 5 5.347108 5.347108 2464 +restructur 1 4 5.568345 5.568345 2775 +align 1 4 5.568345 5.568345 2863 +flavor 1 4 5.568345 5.568345 2625 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +stodghil 1 4 5.568345 5.568345 2864 +armi 1 3 5.857933 5.857933 3562 +topla 1 3 5.857933 5.857933 3563 +chelmsford 1 3 5.857933 5.857933 3564 +detroit 1 3 5.857933 5.857933 3565 +rutger 1 3 5.857933 5.857933 3566 +brunswick 1 3 5.857933 5.857933 3567 +redmond 1 3 5.857933 5.857933 3568 +professorphd 1 2 6.263398 6.263398 4904 +numa 1 2 6.263398 6.263398 4905 +lambda 1 2 6.263398 6.263398 4458 +knit 1 2 6.263398 6.263398 4906 +lcpc 1 2 6.263398 6.263398 4538 +kotlyar 1 2 6.263398 6.263398 4907 +pingalikeshav 1 1 6.957497 6.957497 9585 +pingaliassoci 1 1 6.957497 6.957497 9586 +radic 1 1 6.957497 6.957497 9587 +sparsiti 1 1 6.957497 6.957497 9588 +krylov 1 1 6.957497 6.957497 9589 +petsc 1 1 6.957497 6.957497 9590 +activitiespanel 1 1 6.957497 6.957497 9591 +ballist 1 1 6.957497 6.957497 9592 +odyssei 1 1 6.957497 6.957497 9593 +computereditori 1 1 6.957497 6.957497 9594 +awardsn 1 1 6.957497 6.957497 9595 +lecturesfast 1 1 6.957497 6.957497 9596 +publicationssolv 1 1 6.957497 6.957497 9597 +gianfranco 1 1 6.957497 6.957497 9598 +bilardi 1 1 6.957497 6.957497 9599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..c9de5020 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,367 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +note 1 142 1.945910 1.945910 67 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +specif 1 106 2.197225 2.197225 106 +teach 1 108 2.197225 2.197225 112 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +pleas 1 113 2.197225 2.197225 114 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +build 1 85 2.484907 2.484907 184 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +ieee 1 86 2.484907 2.484907 190 +member 1 84 2.484907 2.484907 165 +school 1 84 2.484907 2.484907 188 +control 1 82 2.484907 2.484907 164 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +educ 1 86 2.484907 2.484907 191 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +messag 1 76 2.564949 2.564949 212 +involv 1 71 2.639057 2.639057 227 +appli 1 71 2.639057 2.639057 226 +david 1 71 2.639057 2.639057 232 +logic 1 71 2.639057 2.639057 230 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +nation 1 74 2.639057 2.639057 240 +meet 1 72 2.639057 2.639057 229 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +summari 1 73 2.639057 2.639057 237 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +view 1 70 2.708050 2.708050 254 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +polici 1 64 2.772589 2.772589 279 +organ 1 65 2.772589 2.772589 265 +septemb 1 65 2.772589 2.772589 274 +foundat 1 62 2.772589 2.772589 286 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +reason 1 57 2.890372 2.890372 318 +summer 1 56 2.890372 2.890372 311 +publish 1 57 2.890372 2.890372 326 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +investig 1 51 2.995732 2.995732 353 +run 1 51 2.995732 2.995732 347 +hardwar 1 51 2.995732 2.995732 350 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +standard 1 48 3.044522 3.044522 365 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +physic 1 47 3.091042 3.091042 377 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +possibl 1 47 3.091042 3.091042 378 +textbook 1 44 3.135494 3.135494 397 +protocol 1 45 3.135494 3.135494 407 +execut 1 45 3.135494 3.135494 404 +term 1 43 3.178054 3.178054 411 +third 1 43 3.178054 3.178054 412 +past 1 42 3.218876 3.218876 428 +editor 1 41 3.218876 3.218876 433 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +programm 1 39 3.258097 3.258097 445 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +formal 1 37 3.332205 3.332205 478 +respons 1 37 3.332205 3.332205 476 +hand 1 37 3.332205 3.332205 475 +concurr 1 34 3.401197 3.401197 501 +least 1 35 3.401197 3.401197 516 +singl 1 34 3.401197 3.401197 510 +everi 1 34 3.401197 3.401197 519 +global 1 34 3.401197 3.401197 520 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +given 1 32 3.465736 3.465736 538 +ad 1 32 3.465736 3.465736 544 +exist 1 30 3.555348 3.555348 569 +focus 1 29 3.583519 3.583519 584 +depend 1 29 3.583519 3.583519 583 +synchron 1 29 3.583519 3.583519 588 +art 1 29 3.583519 3.583519 593 +univ 1 28 3.610918 3.610918 617 +becom 1 28 3.610918 3.610918 603 +pass 1 28 3.610918 3.610918 611 +american 1 27 3.637586 3.637586 634 +subject 1 26 3.688879 3.688879 647 +rule 1 26 3.688879 3.688879 638 +relev 1 26 3.688879 3.688879 637 +detect 1 26 3.688879 3.688879 646 +compar 1 26 3.688879 3.688879 648 +concern 1 25 3.737670 3.737670 666 +task 1 25 3.737670 3.737670 678 +hill 1 25 3.737670 3.737670 670 +handl 1 24 3.761200 3.761200 685 +seri 1 24 3.761200 3.761200 708 +equat 1 23 3.806662 3.806662 724 +sequenc 1 23 3.806662 3.806662 734 +mobil 1 23 3.806662 3.806662 730 +methodolog 1 23 3.806662 3.806662 733 +proof 1 23 3.806662 3.806662 720 +reduc 1 22 3.850148 3.850148 759 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +leav 1 21 3.912023 3.912023 772 +avoid 1 21 3.912023 3.912023 799 +verif 1 20 3.951244 3.951244 826 +exploit 1 20 3.951244 3.951244 836 +safeti 1 20 3.951244 3.951244 817 +department 1 20 3.951244 3.951244 839 +north 1 19 4.007333 4.007333 873 +along 1 18 4.060443 4.060443 878 +partial 1 18 4.060443 4.060443 900 +thoma 1 18 4.060443 4.060443 901 +agent 1 18 4.060443 4.060443 910 +scott 1 18 4.060443 4.060443 884 +analyz 1 17 4.110874 4.110874 925 +otherwis 1 17 4.110874 4.110874 922 +whether 1 17 4.110874 4.110874 918 +outlin 1 17 4.110874 4.110874 914 +germani 1 17 4.110874 4.110874 946 +sept 1 17 4.110874 4.110874 952 +moor 1 17 4.110874 4.110874 936 +critic 1 16 4.174387 4.174387 982 +georg 1 16 4.174387 4.174387 994 +letter 1 16 4.174387 4.174387 981 +hybrid 1 15 4.248495 4.248495 1057 +coordin 1 13 4.382027 4.382027 1182 +carri 1 13 4.382027 4.382027 1152 +nasa 1 13 4.382027 4.382027 1188 +discret 1 13 4.382027 4.382027 1165 +replic 1 12 4.465908 4.465908 1231 +asynchron 1 12 4.465908 4.465908 1229 +speech 1 12 4.465908 4.465908 1222 +verifi 1 12 4.465908 4.465908 1261 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +reness 1 11 4.553877 4.553877 1333 +bandwidth 1 11 4.553877 4.553877 1365 +typic 1 11 4.553877 4.553877 1360 +refin 1 11 4.553877 4.553877 1363 +distinguish 1 11 4.553877 4.553877 1357 +israel 1 11 4.553877 4.553877 1366 +arpa 1 11 4.553877 4.553877 1369 +volum 1 11 4.553877 4.553877 1347 +island 1 11 4.553877 4.553877 1345 +resid 1 10 4.653960 4.653960 1461 +tradit 1 10 4.653960 4.653960 1404 +invit 1 10 4.653960 4.653960 1428 +traffic 1 10 4.653960 4.653960 1421 +forc 1 10 4.653960 4.653960 1384 +robbert 1 9 4.753590 4.753590 1529 +compos 1 9 4.753590 4.753590 1527 +realiz 1 8 4.875197 4.875197 1739 +filter 1 8 4.875197 4.875197 1641 +satisfi 1 8 4.875197 4.875197 1694 +virginia 1 8 4.875197 4.875197 1659 +colloquium 1 8 4.875197 4.875197 1734 +paradigm 1 8 4.875197 4.875197 1662 +predic 1 7 5.010635 5.010635 1806 +chief 1 7 5.010635 5.010635 1829 +fifth 1 7 5.010635 5.010635 1931 +merg 1 7 5.010635 5.010635 1862 +fred 1 6 5.164786 5.164786 2072 +brook 1 6 5.164786 5.164786 2152 +infer 1 6 5.164786 5.164786 2040 +risc 1 6 5.164786 5.164786 2016 +ensur 1 6 5.164786 5.164786 2012 +moder 1 6 5.164786 5.164786 2112 +mistak 1 6 5.164786 5.164786 2110 +carolina 1 6 5.164786 5.164786 2142 +kluwer 1 6 5.164786 5.164786 2143 +causal 1 6 5.164786 5.164786 2024 +mission 1 5 5.347108 5.347108 2465 +attract 1 5 5.347108 5.347108 2356 +activitieseditor 1 5 5.347108 5.347108 2454 +ifip 1 5 5.347108 5.347108 2459 +merit 1 5 5.347108 5.347108 2466 +speaker 1 5 5.347108 5.347108 2370 +chapel 1 5 5.347108 5.347108 2457 +panel 1 5 5.347108 5.347108 2463 +proposit 1 5 5.347108 5.347108 2339 +bulletin 1 5 5.347108 5.347108 2343 +schneider 1 4 5.568345 5.568345 2868 +increasingli 1 4 5.568345 5.568345 2766 +ident 1 4 5.568345 5.568345 2826 +suffic 1 4 5.568345 5.568345 2869 +conserv 1 4 5.568345 5.568345 2870 +stoller 1 4 5.568345 5.568345 2866 +monograph 1 4 5.568345 5.568345 2860 +assur 1 4 5.568345 5.568345 2722 +dagstuhl 1 4 5.568345 5.568345 2871 +sigcs 1 4 5.568345 5.568345 2865 +nashvil 1 4 5.568345 5.568345 2867 +tennesse 1 4 5.568345 5.568345 2763 +technion 1 4 5.568345 5.568345 2856 +aircraft 1 4 5.568345 5.568345 2872 +newslett 1 4 5.568345 5.568345 2873 +stoni 1 3 5.857933 5.857933 3571 +heavili 1 3 5.857933 5.857933 3572 +gri 1 3 5.857933 5.857933 3569 +streamlin 1 3 5.857933 5.857933 3573 +jointli 1 3 5.857933 5.857933 3118 +dimac 1 3 5.857933 5.857933 3574 +defens 1 3 5.857933 5.857933 3327 +reactiv 1 3 5.857933 5.857933 3575 +am 1 3 5.857933 5.857933 3386 +munich 1 3 5.857933 5.857933 3570 +haifa 1 3 5.857933 5.857933 3554 +successor 1 3 5.857933 5.857933 3576 +hoto 1 3 5.857933 5.857933 3577 +orca 1 3 5.857933 5.857933 3578 +professorphd 1 2 6.263398 6.263398 4904 +widespread 1 2 6.263398 6.263398 4911 +hypervisor 1 2 6.263398 6.263398 4549 +replica 1 2 6.263398 6.263398 4206 +norwai 1 2 6.263398 6.263398 4908 +tacoma 1 2 6.263398 6.263398 4909 +annal 1 2 6.263398 6.263398 4912 +systemsprogram 1 2 6.263398 6.263398 4882 +isat 1 2 6.263398 6.263398 4895 +warfar 1 2 6.263398 6.263398 4910 +hebrew 1 2 6.263398 6.263398 4913 +banquet 1 2 6.263398 6.263398 4898 +grante 1 2 6.263398 6.263398 4914 +contractor 1 2 6.263398 6.263398 4915 +mason 1 2 6.263398 6.263398 4916 +airplan 1 2 6.263398 6.263398 4917 +jerusalem 1 2 6.263398 6.263398 4918 +marzullo 1 2 6.263398 6.263398 4919 +schneiderfr 1 1 6.957497 6.957497 9605 +assert 1 1 6.957497 6.957497 9606 +undefin 1 1 6.957497 6.957497 9607 +bressoud 1 1 6.957497 6.957497 9608 +instantli 1 1 6.957497 6.957497 9609 +freed 1 1 6.957497 6.957497 9610 +johansen 1 1 6.957497 6.957497 9603 +trom 1 1 6.957497 6.957497 9600 +roam 1 1 6.957497 6.957497 9611 +activitiessabbat 1 1 6.957497 6.957497 9612 +computingeditor 1 1 6.957497 6.957497 9613 +letterseditor 1 1 6.957497 6.957497 9614 +engineeringeditor 1 1 6.957497 6.957497 9604 +systemseditor 1 1 6.957497 6.957497 9615 +surveysco 1 1 6.957497 6.957497 9616 +verlagprogram 1 1 6.957497 6.957497 9617 +constructionprogram 1 1 6.957497 6.957497 9618 +resili 1 1 6.957497 6.957497 9619 +applicationsprogram 1 1 6.957497 6.957497 9620 +sixteenth 1 1 6.957497 6.957497 9621 +symposiumprogram 1 1 6.957497 6.957497 9622 +systemsst 1 1 6.957497 6.957497 9623 +chissa 1 1 6.957497 6.957497 9624 +technologymemb 1 1 6.957497 6.957497 9625 +agencyreview 1 1 6.957497 6.957497 9626 +leibniz 1 1 6.957497 6.957497 9627 +universitymemb 1 1 6.957497 6.957497 9628 +awardsfellow 1 1 6.957497 6.957497 9629 +sciencefellow 1 1 6.957497 6.957497 9630 +machinerylecturesproof 1 1 6.957497 6.957497 9631 +marktoberdorf 1 1 6.957497 6.957497 9601 +afosr 1 1 6.957497 6.957497 9632 +lubeck 1 1 6.957497 6.957497 9602 +panelist 1 1 6.957497 6.957497 9633 +publicationsreason 1 1 6.957497 6.957497 9634 +icalp 1 1 6.957497 6.957497 9635 +boll 1 1 6.957497 6.957497 9636 +limor 1 1 6.957497 6.957497 9637 +ultradepend 1 1 6.957497 6.957497 9638 +dehn 1 1 6.957497 6.957497 9639 +primu 1 1 6.957497 6.957497 9640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..a12250dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +pleas 1 113 2.197225 2.197225 114 +text 1 98 2.302585 2.302585 133 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +method 1 80 2.564949 2.564949 213 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +solv 1 73 2.639057 2.639057 234 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +organ 1 65 2.772589 2.772589 265 +januari 1 62 2.772589 2.772589 264 +plai 1 60 2.833213 2.833213 307 +variou 1 56 2.890372 2.890372 317 +semest 1 58 2.890372 2.890372 312 +undergradu 1 54 2.944439 2.944439 338 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +adapt 1 46 3.091042 3.091042 387 +fast 1 42 3.218876 3.218876 429 +continu 1 39 3.258097 3.258097 448 +multipl 1 39 3.258097 3.258097 453 +societi 1 40 3.258097 3.258097 456 +annual 1 40 3.258097 3.258097 458 +close 1 38 3.295837 3.295837 465 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +product 1 33 3.433987 3.433987 527 +curriculum 1 33 3.433987 3.433987 535 +transform 1 32 3.465736 3.465736 542 +art 1 29 3.583519 3.583519 593 +chair 1 29 3.583519 3.583519 596 +subject 1 26 3.688879 3.688879 647 +constraint 1 26 3.688879 3.688879 636 +repres 1 26 3.688879 3.688879 656 +siam 1 21 3.912023 3.912023 800 +department 1 20 3.951244 3.951244 839 +four 1 18 4.060443 4.060443 905 +matrix 1 17 4.110874 4.110874 933 +moor 1 17 4.110874 4.110874 936 +role 1 14 4.317488 4.317488 1101 +charl 1 13 4.382027 4.382027 1149 +deriv 1 13 4.382027 4.382027 1145 +translat 1 13 4.382027 4.382027 1164 +reader 1 12 4.465908 4.465908 1246 +optic 1 12 4.465908 4.465908 1221 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +michigan 1 11 4.553877 4.553877 1368 +loop 1 11 4.553877 4.553877 1310 +bandwidth 1 11 4.553877 4.553877 1365 +america 1 11 4.553877 4.553877 1370 +matric 1 10 4.653960 4.653960 1399 +factor 1 9 4.753590 4.753590 1544 +signal 1 7 5.010635 5.010635 1910 +sweden 1 7 5.010635 5.010635 1885 +proce 1 6 5.164786 5.164786 2114 +prize 1 6 5.164786 5.164786 2150 +markov 1 5 5.347108 5.347108 2280 +activitieseditor 1 5 5.347108 5.347108 2454 +ohio 1 5 5.347108 5.347108 2447 +wavelet 1 4 5.568345 5.568345 2874 +analog 1 4 5.568345 5.568345 2875 +admiss 1 4 5.568345 5.568345 2704 +wilkinson 1 3 5.857933 5.857933 3579 +pitsiani 1 3 5.857933 5.857933 3175 +nearest 1 2 6.263398 6.263398 4922 +anticip 1 2 6.263398 6.263398 4268 +activitiescomput 1 2 6.263398 6.263398 4923 +household 1 2 6.263398 6.263398 4920 +intuit 1 2 6.263398 6.263398 4921 +loancharl 1 1 6.957497 6.957497 9643 +loanprofessorphd 1 1 6.957497 6.957497 9644 +kroneck 1 1 6.957497 6.957497 9641 +inhomogen 1 1 6.957497 6.957497 9645 +committeedepart 1 1 6.957497 6.957497 9646 +meetingfreshman 1 1 6.957497 6.957497 9647 +analysismemb 1 1 6.957497 6.957497 9648 +diprima 1 1 6.957497 6.957497 9649 +lecturesappl 1 1 6.957497 6.957497 9650 +linkop 1 1 6.957497 6.957497 9651 +umea 1 1 6.957497 6.957497 9642 +publicationsoptim 1 1 6.957497 6.957497 9652 +ellerbroek 1 1 6.957497 6.957497 9653 +plemmon 1 1 6.957497 6.957497 9654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..54df325e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +report 1 131 2.079442 2.079442 92 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +mathemat 1 108 2.197225 2.197225 123 +pleas 1 113 2.197225 2.197225 114 +associ 1 93 2.397895 2.397895 151 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +level 1 87 2.484907 2.484907 180 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +logic 1 71 2.639057 2.639057 230 +undergradu 1 54 2.944439 2.944439 338 +profession 1 51 2.995732 2.995732 345 +electron 1 47 3.091042 3.091042 379 +annual 1 40 3.258097 3.258097 458 +electr 1 38 3.295837 3.295837 461 +respons 1 37 3.332205 3.332205 476 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +curriculum 1 33 3.433987 3.433987 535 +symbol 1 27 3.637586 3.637586 620 +revis 1 26 3.688879 3.688879 640 +primari 1 25 3.737670 3.737670 669 +other 1 24 3.761200 3.761200 697 +prepar 1 20 3.951244 3.951244 824 +department 1 20 3.951244 3.951244 839 +lower 1 18 4.060443 4.060443 886 +moor 1 17 4.110874 4.110874 936 +women 1 16 4.174387 4.174387 1004 +pageif 1 12 4.465908 4.465908 1275 +denis 1 12 4.465908 4.465908 1255 +introductori 1 9 4.753590 4.753590 1479 +machineri 1 4 5.568345 5.568345 2851 +activitiescomput 1 2 6.263398 6.263398 4923 +activitiesassoci 1 2 6.263398 6.263398 4881 +catherin 1 1 6.957497 6.957497 9655 +wagnercatherin 1 1 6.957497 6.957497 9656 +wagnersenior 1 1 6.957497 6.957497 9657 +lecturerphd 1 1 6.957497 6.957497 9658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..0564316d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,351 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +novemb 1 81 2.484907 2.484907 179 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +ieee 1 86 2.484907 2.484907 190 +server 1 76 2.564949 2.564949 204 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +june 1 79 2.564949 2.564949 214 +upson 1 71 2.639057 2.639057 218 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +line 1 75 2.639057 2.639057 231 +html 1 75 2.639057 2.639057 235 +david 1 71 2.639057 2.639057 232 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +laboratori 1 63 2.772589 2.772589 292 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +septemb 1 65 2.772589 2.772589 274 +type 1 61 2.833213 2.833213 296 +plai 1 60 2.833213 2.833213 307 +automat 1 61 2.833213 2.833213 306 +locat 1 59 2.833213 2.833213 303 +best 1 59 2.833213 2.833213 299 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +faculti 1 56 2.890372 2.890372 325 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +local 1 55 2.944439 2.944439 334 +found 1 53 2.944439 2.944439 337 +talk 1 53 2.944439 2.944439 336 +undergradu 1 54 2.944439 2.944439 338 +februari 1 54 2.944439 2.944439 328 +hardwar 1 51 2.995732 2.995732 350 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +digit 1 52 2.995732 2.995732 348 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +format 1 48 3.044522 3.044522 356 +california 1 46 3.091042 3.091042 388 +possibl 1 47 3.091042 3.091042 378 +effect 1 46 3.091042 3.091042 385 +electron 1 47 3.091042 3.091042 379 +video 1 44 3.135494 3.135494 405 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +anoth 1 45 3.135494 3.135494 408 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +third 1 43 3.178054 3.178054 412 +review 1 42 3.218876 3.218876 425 +music 1 42 3.218876 3.218876 436 +fast 1 42 3.218876 3.218876 429 +continu 1 39 3.258097 3.258097 448 +must 1 40 3.258097 3.258097 442 +brian 1 38 3.295837 3.295837 466 +slide 1 38 3.295837 3.295837 467 +connect 1 37 3.332205 3.332205 485 +workstat 1 37 3.332205 3.332205 479 +represent 1 35 3.401197 3.401197 512 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +queri 1 33 3.433987 3.433987 524 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +independ 1 32 3.465736 3.465736 548 +storag 1 31 3.496508 3.496508 553 +common 1 30 3.555348 3.555348 574 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +load 1 28 3.610918 3.610918 601 +retriev 1 27 3.637586 3.637586 621 +manipul 1 27 3.637586 3.637586 624 +berkelei 1 26 3.688879 3.688879 657 +compar 1 26 3.688879 3.688879 648 +effort 1 26 3.688879 3.688879 652 +proc 1 26 3.688879 3.688879 649 +toward 1 25 3.737670 3.737670 668 +client 1 25 3.737670 3.737670 679 +store 1 24 3.761200 3.761200 693 +scalabl 1 24 3.761200 3.761200 705 +motion 1 24 3.761200 3.761200 699 +initi 1 23 3.806662 3.806662 717 +lead 1 23 3.806662 3.806662 718 +compress 1 23 3.806662 3.806662 719 +decis 1 23 3.806662 3.806662 728 +famili 1 23 3.806662 3.806662 735 +serv 1 22 3.850148 3.850148 758 +almost 1 22 3.850148 3.850148 742 +sent 1 22 3.850148 3.850148 763 +util 1 21 3.912023 3.912023 774 +programminglanguag 1 21 3.912023 3.912023 782 +thu 1 21 3.912023 3.912023 773 +smith 1 20 3.951244 3.951244 820 +reserv 1 20 3.951244 3.951244 808 +basi 1 20 3.951244 3.951244 828 +qualiti 1 20 3.951244 3.951244 832 +mpeg 1 20 3.951244 3.951244 831 +thur 1 19 4.007333 4.007333 847 +media 1 19 4.007333 4.007333 861 +appropri 1 18 4.060443 4.060443 883 +speed 1 18 4.060443 4.060443 911 +advantag 1 16 4.174387 4.174387 987 +across 1 16 4.174387 4.174387 974 +earli 1 16 4.174387 4.174387 968 +jose 1 16 4.174387 4.174387 976 +diego 1 16 4.174387 4.174387 992 +driven 1 15 4.248495 4.248495 1048 +balanc 1 14 4.317488 4.317488 1112 +audio 1 14 4.317488 4.317488 1094 +heterogen 1 14 4.317488 4.317488 1090 +camera 1 14 4.317488 4.317488 1115 +francisco 1 14 4.317488 4.317488 1095 +central 1 13 4.382027 4.382027 1160 +suit 1 13 4.382027 4.382027 1129 +translat 1 13 4.382027 4.382027 1164 +jonathan 1 13 4.382027 4.382027 1174 +resolut 1 13 4.382027 4.382027 1172 +canada 1 13 4.382027 4.382027 1158 +misc 1 13 4.382027 4.382027 1124 +verifi 1 12 4.465908 4.465908 1261 +infrastructur 1 12 4.465908 4.465908 1234 +promot 1 12 4.465908 4.465908 1235 +remov 1 12 4.465908 4.465908 1225 +amount 1 12 4.465908 4.465908 1208 +readi 1 12 4.465908 4.465908 1242 +tue 1 11 4.553877 4.553877 1308 +peter 1 11 4.553877 4.553877 1316 +stephen 1 11 4.553877 4.553877 1342 +player 1 11 4.553877 4.553877 1371 +interestsmi 1 10 4.653960 4.653960 1462 +operatingsystem 1 10 4.653960 4.653960 1401 +equal 1 10 4.653960 4.653960 1424 +establish 1 9 4.753590 4.753590 1532 +charg 1 9 4.753590 4.753590 1582 +transmiss 1 9 4.753590 4.753590 1588 +xerox 1 8 4.875197 4.875197 1725 +contrast 1 8 4.875197 4.875197 1637 +transport 1 8 4.875197 4.875197 1672 +rivl 1 8 4.875197 4.875197 1632 +theme 1 8 4.875197 4.875197 1707 +colloquium 1 8 4.875197 4.875197 1734 +usabl 1 7 5.010635 5.010635 1810 +suffici 1 7 5.010635 5.010635 1897 +pronounc 1 7 5.010635 5.010635 1918 +lawrenc 1 7 5.010635 5.010635 1908 +prioriti 1 7 5.010635 5.010635 1792 +supportfor 1 7 5.010635 5.010635 1854 +simultan 1 6 5.164786 5.164786 2155 +invest 1 6 5.164786 5.164786 2153 +jpeg 1 6 5.164786 5.164786 2053 +quickli 1 6 5.164786 5.164786 2000 +toronto 1 6 5.164786 5.164786 2156 +patel 1 6 5.164786 5.164786 2154 +spie 1 6 5.164786 5.164786 2119 +internationalconfer 1 6 5.164786 5.164786 2051 +adopt 1 5 5.347108 5.347108 2467 +cyclic 1 5 5.347108 5.347108 2383 +consum 1 5 5.347108 5.347108 2334 +computerarchitectur 1 5 5.347108 5.347108 2290 +row 1 5 5.347108 5.347108 2330 +webster 1 5 5.347108 5.347108 2468 +minnesota 1 5 5.347108 5.347108 2469 +publicationsresearch 1 4 5.568345 5.568345 2876 +isthat 1 4 5.568345 5.568345 2723 +hypothesi 1 4 5.568345 5.568345 2650 +ofworkst 1 4 5.568345 5.568345 2679 +poorli 1 4 5.568345 5.568345 2781 +commonli 1 4 5.568345 5.568345 2877 +fold 1 4 5.568345 5.568345 2615 +swartz 1 4 5.568345 5.568345 2878 +zeno 1 3 5.857933 5.857933 3580 +networkprotocol 1 3 5.857933 5.857933 3285 +thetim 1 3 5.857933 5.857933 3581 +magnitud 1 3 5.857933 5.857933 3582 +rival 1 3 5.857933 5.857933 3583 +quicktim 1 3 5.857933 5.857933 3493 +anaheim 1 3 5.857933 5.857933 3271 +intereststeachingselect 1 2 6.263398 6.263398 4924 +andprocess 1 2 6.263398 6.263398 4925 +withlarg 1 2 6.263398 6.263398 4926 +needto 1 2 6.263398 6.263398 4927 +thecommun 1 2 6.263398 6.263398 4928 +builton 1 2 6.263398 6.263398 4094 +availableonlin 1 2 6.263398 6.263398 4929 +aredevelop 1 2 6.263398 6.263398 4930 +todramat 1 2 6.263398 6.263398 4250 +animplement 1 2 6.263398 6.263398 4931 +insoftwar 1 2 6.263398 6.263398 4932 +asif 1 2 6.263398 6.263398 4933 +ghia 1 2 6.263398 6.263398 4934 +chamberlin 1 2 6.263398 6.263398 4745 +hum 1 2 6.263398 6.263398 4935 +sanfrancisco 1 2 6.263398 6.263398 4339 +decod 1 2 6.263398 6.263398 4936 +bsmith 1 1 6.957497 6.957497 9664 +talksmisc 1 1 6.957497 6.957497 9665 +linksresearch 1 1 6.957497 6.957497 9666 +ourcomput 1 1 6.957497 6.957497 9667 +commercialand 1 1 6.957497 6.957497 9668 +aredesign 1 1 6.957497 6.957497 9669 +premis 1 1 6.957497 6.957497 9670 +infrastructurei 1 1 6.957497 6.957497 9671 +andappl 1 1 6.957497 6.957497 9672 +workingsystem 1 1 6.957497 6.957497 9673 +zenodistribut 1 1 6.957497 6.957497 9674 +anethernet 1 1 6.957497 6.957497 9675 +serverof 1 1 6.957497 6.957497 9676 +videostor 1 1 6.957497 6.957497 9677 +fileserv 1 1 6.957497 6.957497 9659 +severalserv 1 1 6.957497 6.957497 9678 +effortdeliveri 1 1 6.957497 6.957497 9679 +resourcereserv 1 1 6.957497 6.957497 9680 +communicationinfrastructur 1 1 6.957497 6.957497 9681 +forbandwidth 1 1 6.957497 6.957497 9682 +networkenviron 1 1 6.957497 6.957497 9683 +accessiblebi 1 1 6.957497 6.957497 9684 +latterenviron 1 1 6.957497 6.957497 9685 +datagram 1 1 6.957497 6.957497 9686 +audioand 1 1 6.957497 6.957497 9687 +playback 1 1 6.957497 6.957497 9660 +metropolitan 1 1 6.957497 6.957497 9688 +andwid 1 1 6.957497 6.957497 9689 +todeliv 1 1 6.957497 6.957497 9690 +compressedrepresent 1 1 6.957497 6.957497 9691 +decompress 1 1 6.957497 6.957497 9661 +reducesth 1 1 6.957497 6.957497 9692 +indicatesthat 1 1 6.957497 6.957497 9693 +fasterthan 1 1 6.957497 6.957497 9694 +currentlyextend 1 1 6.957497 6.957497 9695 +transcod 1 1 6.957497 6.957497 9662 +onecompress 1 1 6.957497 6.957497 9696 +compresseddomain 1 1 6.957497 6.957497 9697 +simplifyexperiment 1 1 6.957497 6.957497 9698 +calledrivl 1 1 6.957497 6.957497 9699 +allowsvideo 1 1 6.957497 6.957497 9700 +resolutionand 1 1 6.957497 6.957497 9701 +whatpostscript 1 1 6.957497 6.957497 9702 +resolutionindepend 1 1 6.957497 6.957497 9703 +sameprogram 1 1 6.957497 6.957497 9704 +whileedit 1 1 6.957497 6.957497 9705 +qualityfinish 1 1 6.957497 6.957497 9706 +bepreview 1 1 6.957497 6.957497 9707 +dpiprint 1 1 6.957497 6.957497 9708 +onvideo 1 1 6.957497 6.957497 9709 +videous 1 1 6.957497 6.957497 9710 +teachingat 1 1 6.957497 6.957497 9711 +logan 1 1 6.957497 6.957497 9712 +ontario 1 1 6.957497 6.957497 9713 +ketan 1 1 6.957497 6.957497 9663 +annett 1 1 6.957497 6.957497 9714 +hanna 1 1 6.957497 6.957497 9715 +mmcn 1 1 6.957497 6.957497 9716 +documentationth 1 1 6.957497 6.957497 9717 +priceweb 1 1 6.957497 6.957497 9718 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..b5c63d52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,280 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +studi 1 120 2.079442 2.079442 91 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +assist 1 112 2.197225 2.197225 113 +mathemat 1 108 2.197225 2.197225 123 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +associ 1 93 2.397895 2.397895 151 +wide 1 84 2.484907 2.484907 185 +build 1 85 2.484907 2.484907 184 +second 1 81 2.484907 2.484907 166 +journal 1 83 2.484907 2.484907 183 +academ 1 82 2.484907 2.484907 178 +control 1 82 2.484907 2.484907 164 +method 1 80 2.564949 2.564949 213 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +decemb 1 80 2.564949 2.564949 215 +state 1 76 2.564949 2.564949 207 +solv 1 73 2.639057 2.639057 234 +involv 1 71 2.639057 2.639057 227 +line 1 75 2.639057 2.639057 231 +logic 1 71 2.639057 2.639057 230 +david 1 71 2.639057 2.639057 232 +write 1 72 2.639057 2.639057 222 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +appli 1 71 2.639057 2.639057 226 +differ 1 66 2.708050 2.708050 253 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +experi 1 64 2.772589 2.772589 283 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +content 1 59 2.833213 2.833213 302 +march 1 61 2.833213 2.833213 295 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +effect 1 46 3.091042 3.091042 385 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +mark 1 44 3.135494 3.135494 403 +algebra 1 45 3.135494 3.135494 394 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +mechan 1 43 3.178054 3.178054 416 +past 1 42 3.218876 3.218876 428 +futur 1 41 3.218876 3.218876 427 +review 1 42 3.218876 3.218876 425 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +theoret 1 39 3.258097 3.258097 446 +annual 1 40 3.258097 3.258097 458 +paul 1 38 3.295837 3.295837 471 +electr 1 38 3.295837 3.295837 461 +formal 1 37 3.332205 3.332205 478 +hand 1 37 3.332205 3.332205 475 +expect 1 37 3.332205 3.332205 484 +connect 1 37 3.332205 3.332205 485 +especi 1 36 3.367296 3.367296 496 +soon 1 36 3.367296 3.367296 494 +committe 1 34 3.401197 3.401197 522 +transform 1 32 3.465736 3.465736 542 +collabor 1 32 3.465736 3.465736 543 +richard 1 31 3.496508 3.496508 559 +robert 1 30 3.555348 3.555348 567 +power 1 30 3.555348 3.555348 573 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +chair 1 29 3.583519 3.583519 596 +built 1 29 3.583519 3.583519 592 +packag 1 28 3.610918 3.610918 614 +hope 1 28 3.610918 3.610918 610 +great 1 27 3.637586 3.637586 626 +symbol 1 27 3.637586 3.637586 620 +american 1 27 3.637586 3.637586 634 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +effort 1 26 3.688879 3.688879 652 +wai 1 25 3.737670 3.737670 662 +aspect 1 25 3.737670 3.737670 663 +fundament 1 25 3.737670 3.737670 661 +seri 1 24 3.761200 3.761200 708 +proof 1 23 3.806662 3.806662 720 +togeth 1 23 3.806662 3.806662 714 +varieti 1 22 3.850148 3.850148 740 +william 1 22 3.850148 3.850148 765 +deal 1 22 3.850148 3.850148 736 +inth 1 22 3.850148 3.850148 741 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +theorem 1 21 3.912023 3.912023 786 +divis 1 21 3.912023 3.912023 803 +entir 1 20 3.951244 3.951244 811 +synthesi 1 20 3.951244 3.951244 834 +verif 1 20 3.951244 3.951244 826 +expert 1 20 3.951244 3.951244 833 +facil 1 20 3.951244 3.951244 814 +prove 1 19 4.007333 4.007333 848 +definit 1 19 4.007333 4.007333 864 +north 1 19 4.007333 4.007333 873 +minim 1 18 4.060443 4.060443 887 +explan 1 16 4.174387 4.174387 985 +stream 1 15 4.248495 4.248495 1015 +contribut 1 15 4.248495 4.248495 1021 +style 1 15 4.248495 4.248495 1036 +consider 1 14 4.317488 4.317488 1076 +incomput 1 14 4.317488 4.317488 1096 +near 1 14 4.317488 4.317488 1091 +joint 1 13 4.382027 4.382027 1130 +circuit 1 13 4.382027 4.382027 1131 +weak 1 13 4.382027 4.382027 1159 +canada 1 13 4.382027 4.382027 1158 +captur 1 12 4.465908 4.465908 1232 +franc 1 12 4.465908 4.465908 1276 +philadelphia 1 12 4.465908 4.465908 1244 +excit 1 11 4.553877 4.553877 1329 +refin 1 11 4.553877 4.553877 1363 +israel 1 11 4.553877 4.553877 1366 +nuprl 1 10 4.653960 4.653960 1402 +modular 1 10 4.653960 4.653960 1392 +devis 1 10 4.653960 4.653960 1451 +suitabl 1 9 4.753590 4.753590 1486 +colloquium 1 8 4.875197 4.875197 1734 +beyond 1 7 5.010635 5.010635 1834 +pennsylvania 1 7 5.010635 5.010635 1932 +feasibl 1 6 5.164786 5.164786 2157 +handbook 1 6 5.164786 5.164786 2061 +oxford 1 6 5.164786 5.164786 2121 +recruit 1 6 5.164786 5.164786 2145 +pari 1 6 5.164786 5.164786 2158 +indiana 1 6 5.164786 5.164786 2057 +eduph 1 5 5.347108 5.347108 2449 +allen 1 5 5.347108 5.347108 2470 +dougla 1 5 5.347108 5.347108 2471 +begun 1 5 5.347108 5.347108 2386 +activitieseditor 1 5 5.347108 5.347108 2454 +weyl 1 4 5.568345 5.568345 2854 +zippel 1 4 5.568345 5.568345 2879 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +theoremprov 1 3 5.857933 5.857933 3298 +moreov 1 3 5.857933 5.857933 3200 +predecessor 1 3 5.857933 5.857933 3585 +stuart 1 3 5.857933 5.857933 3584 +jackson 1 3 5.857933 5.857933 3586 +how 1 3 5.857933 5.857933 3289 +boolean 1 3 5.857933 5.857933 3202 +gri 1 3 5.857933 5.857933 3569 +nato 1 3 5.857933 5.857933 3587 +engag 1 2 6.263398 6.263398 4937 +programmingand 1 2 6.263398 6.263398 4940 +theform 1 2 6.263398 6.263398 4245 +aitken 1 2 6.263398 6.263398 4941 +possibleto 1 2 6.263398 6.263398 4942 +ventur 1 2 6.263398 6.263398 4938 +polya 1 2 6.263398 6.263398 4939 +aprogram 1 2 6.263398 6.263398 4943 +thiswil 1 2 6.263398 6.263398 4944 +activitieschair 1 2 6.263398 6.263398 4894 +anniversari 1 2 6.263398 6.263398 4945 +celebr 1 2 6.263398 6.263398 4946 +buffalo 1 2 6.263398 6.263398 4947 +bensoussan 1 2 6.263398 6.263398 4303 +andmathemat 1 2 6.263398 6.263398 4948 +manfr 1 2 6.263398 6.263398 4949 +constablerobert 1 1 6.957497 6.957497 9723 +constabledepart 1 1 6.957497 6.957497 9724 +professorrc 1 1 6.957497 6.957497 9725 +researchw 1 1 6.957497 6.957497 9726 +providemechan 1 1 6.957497 6.957497 9727 +implementedthre 1 1 6.957497 6.957497 9728 +lispprogram 1 1 6.957497 6.957497 9729 +sucha 1 1 6.957497 6.957497 9730 +canexpress 1 1 6.957497 6.957497 9731 +asmetalevel 1 1 6.957497 6.957497 9732 +canevalu 1 1 6.957497 6.957497 9733 +nuprli 1 1 6.957497 6.957497 9734 +fomal 1 1 6.957497 6.957497 9735 +iscal 1 1 6.957497 6.957497 9736 +termeditor 1 1 6.957497 6.957497 9737 +eaton 1 1 6.957497 6.957497 9720 +itsintern 1 1 6.957497 6.957497 9738 +hedefinit 1 1 6.957497 6.957497 9739 +inconstruct 1 1 6.957497 6.957497 9740 +mechanismha 1 1 6.957497 6.957497 9741 +rebuilt 1 1 6.957497 6.957497 9742 +thework 1 1 6.957497 6.957497 9743 +isan 1 1 6.957497 6.957497 9744 +builtprincip 1 1 6.957497 6.957497 9745 +internaldescript 1 1 6.957497 6.957497 9746 +withmiriam 1 1 6.957497 6.957497 9747 +leeser 1 1 6.957497 6.957497 9719 +davidgri 1 1 6.957497 6.957497 9748 +richardzippel 1 1 6.957497 6.957497 9749 +withless 1 1 6.957497 6.957497 9750 +aagard 1 1 6.957497 6.957497 9751 +thecorrect 1 1 6.957497 6.957497 9752 +bedrocsystem 1 1 6.957497 6.957497 9753 +widelyus 1 1 6.957497 6.957497 9754 +efforttaught 1 1 6.957497 6.957497 9755 +themann 1 1 6.957497 6.957497 9756 +programmingprocess 1 1 6.957497 6.957497 9757 +givn 1 1 6.957497 6.957497 9758 +ofpolya 1 1 6.957497 6.957497 9759 +tryingto 1 1 6.957497 6.957497 9760 +thepolya 1 1 6.957497 6.957497 9761 +conal 1 1 6.957497 6.957497 9762 +mannion 1 1 6.957497 6.957497 9763 +ofus 1 1 6.957497 6.957497 9764 +discussingproblem 1 1 6.957497 6.957497 9765 +ssymbol 1 1 6.957497 6.957497 9766 +computingsoftwar 1 1 6.957497 6.957497 9767 +isbuild 1 1 6.957497 6.957497 9768 +computationeditor 1 1 6.957497 6.957497 9721 +presseditor 1 1 6.957497 6.957497 9769 +pressgener 1 1 6.957497 6.957497 9770 +licsprogram 1 1 6.957497 6.957497 9771 +jumelageprogram 1 1 6.957497 6.957497 9772 +softwarerefere 1 1 6.957497 6.957497 9773 +nserc 1 1 6.957497 6.957497 9774 +scienceunivers 1 1 6.957497 6.957497 9775 +committeecomput 1 1 6.957497 6.957497 9776 +committeeprovost 1 1 6.957497 6.957497 9777 +mathematicslecturesform 1 1 6.957497 6.957497 9778 +betweencomput 1 1 6.957497 6.957497 9722 +inria 1 1 6.957497 6.957497 9779 +bengurion 1 1 6.957497 6.957497 9780 +sheva 1 1 6.957497 6.957497 9781 +aviv 1 1 6.957497 6.957497 9782 +metaprogram 1 1 6.957497 6.957497 9783 +engineeringworkshop 1 1 6.957497 6.957497 9784 +publicationsform 1 1 6.957497 6.957497 9785 +tendenc 1 1 6.957497 6.957497 9786 +verju 1 1 6.957497 6.957497 9787 +metalevel 1 1 6.957497 6.957497 9788 +broi 1 1 6.957497 6.957497 9789 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..b1e63876 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +analysi 1 124 2.079442 2.079442 98 +find 1 111 2.197225 2.197225 111 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +upson 1 71 2.639057 2.639057 218 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +numer 1 49 3.044522 3.044522 369 +correct 1 38 3.295837 3.295837 462 +adam 1 17 4.110874 4.110874 934 +sept 1 17 4.110874 4.110874 952 +incomput 1 14 4.317488 4.317488 1096 +athlet 1 7 5.010635 5.010635 1933 +atcornel 1 6 5.164786 5.164786 2131 +florenc 1 2 6.263398 6.263398 4950 +aflorenc 1 1 6.957497 6.957497 9790 +professionalinterest 1 1 6.957497 6.957497 9791 +academicsresearchworkinterest 1 1 6.957497 6.957497 9792 +mewith 1 1 6.957497 6.957497 9793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..a187ec90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +check 1 115 2.197225 2.197225 118 +second 1 81 2.484907 2.484907 166 +solv 1 73 2.639057 2.639057 234 +ithaca 1 65 2.772589 2.772589 294 +suggest 1 53 2.944439 2.944439 331 +approach 1 48 3.044522 3.044522 366 +detect 1 26 3.688879 3.688879 646 +failur 1 18 4.060443 4.060443 898 +hybrid 1 15 4.248495 4.248495 1057 +warn 1 14 4.317488 4.317488 1068 +tour 1 11 4.553877 4.553877 1307 +perman 1 11 4.553877 4.553877 1372 +marco 1 4 5.568345 5.568345 2589 +aguilera 1 2 6.263398 6.263398 4052 +kawazo 1 1 6.957497 6.957497 9794 +algorithmsrandom 1 1 6.957497 6.957497 9795 +consensusgo 1 1 6.957497 6.957497 9796 +brazil 1 1 6.957497 6.957497 9797 +constructionmarco 1 1 6.957497 6.957497 9798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..a9bd8742 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +requir 1 81 2.484907 2.484907 167 +stuff 1 87 2.484907 2.484907 171 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +get 1 46 3.091042 3.091042 380 +chines 1 29 3.583519 3.583519 595 +annot 1 21 3.912023 3.912023 775 +taiwan 1 16 4.174387 4.174387 1006 +hong 1 14 4.317488 4.317488 1105 +essenti 1 13 4.382027 4.382027 1137 +usaoffic 1 6 5.164786 5.164786 2159 +corba 1 5 5.347108 5.347108 2320 +alfr 1 4 5.568345 5.568345 2882 +sinanet 1 4 5.568345 5.568345 2883 +worthwhil 1 2 6.263398 6.263398 4951 +dizzi 1 1 6.957497 6.957497 9799 +nandonet 1 1 6.957497 6.957497 9800 +sunworld 1 1 6.957497 6.957497 9801 +javaworldcours 1 1 6.957497 6.957497 9802 +bibliographyc 1 1 6.957497 6.957497 9803 +reportalfr 1 1 6.957497 6.957497 9804 +ahong 1 1 6.957497 6.957497 9805 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..a1c2349e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +academ 1 82 2.484907 2.484907 178 +orient 1 80 2.564949 2.564949 205 +resum 1 79 2.564949 2.564949 217 +html 1 75 2.639057 2.639057 235 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +automata 1 13 4.382027 4.382027 1135 +vineet 1 8 4.875197 4.875197 1639 +capac 1 8 4.875197 4.875197 1740 +engg 1 4 5.568345 5.568345 2884 +ahuja 1 3 5.857933 5.857933 3494 +coursework 1 3 5.857933 5.857933 3588 +hasbrouck 1 2 6.263398 6.263398 4952 +pagevineet 1 1 6.957497 6.957497 9806 +ahujam 1 1 6.957497 6.957497 9807 +apt 1 1 6.957497 6.957497 9808 +reportfal 1 1 6.957497 6.957497 9809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..7d3653c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +pair 1 9 4.753590 4.753590 1503 +ching 1 1 6.957497 6.957497 9810 +chinglan 1 1 6.957497 6.957497 9811 +edumast 1 1 6.957497 6.957497 9812 +beau 1 1 6.957497 6.957497 9813 +seneca 1 1 6.957497 6.957497 9814 +examplegraph 1 1 6.957497 6.957497 9815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..43efbb29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +technic 1 100 2.302585 2.302585 140 +commun 1 95 2.397895 2.397895 157 +search 1 95 2.397895 2.397895 155 +larg 1 82 2.484907 2.484907 168 +wide 1 84 2.484907 2.484907 185 +orient 1 80 2.564949 2.564949 205 +state 1 76 2.564949 2.564949 207 +new 1 64 2.772589 2.772589 262 +ithaca 1 65 2.772589 2.772589 294 +direct 1 57 2.890372 2.890372 316 +directori 1 45 3.135494 3.135494 396 +music 1 42 3.218876 3.218876 436 +staff 1 36 3.367296 3.367296 490 +within 1 33 3.433987 3.433987 525 +art 1 29 3.583519 3.583519 593 +scale 1 28 3.610918 3.610918 613 +weather 1 28 3.610918 3.610918 618 +seri 1 24 3.761200 3.761200 708 +lead 1 23 3.806662 3.806662 718 +lyco 1 19 4.007333 4.007333 871 +event 1 18 4.060443 4.060443 896 +condit 1 16 4.174387 4.174387 975 +brief 1 16 4.174387 4.174387 1001 +horu 1 14 4.317488 4.317488 1116 +replic 1 12 4.465908 4.465908 1231 +reness 1 11 4.553877 4.553877 1333 +birman 1 9 4.753590 4.753590 1531 +friedman 1 7 5.010635 5.010635 1886 +gopher 1 6 5.164786 5.164786 1982 +broadcast 1 5 5.347108 5.347108 2453 +vaysburd 1 4 5.568345 5.568345 2846 +cuinfo 1 4 5.568345 5.568345 2626 +androbbert 1 2 6.263398 6.263398 4953 +partition 1 2 6.263398 6.263398 4954 +lausann 1 2 6.263398 6.263398 4955 +hebrew 1 2 6.263398 6.263398 4913 +transi 1 2 6.263398 6.263398 4861 +alexei 1 1 6.957497 6.957497 9816 +pagealexei 1 1 6.957497 6.957497 9817 +vaysburdalexei 1 1 6.957497 6.957497 9818 +andobject 1 1 6.957497 6.957497 9819 +ecol 1 1 6.957497 6.957497 9820 +polytechniqu 1 1 6.957497 6.957497 9821 +federal 1 1 6.957497 6.957497 9822 +cornellcornel 1 1 6.957497 6.957497 9823 +directorycornel 1 1 6.957497 6.957497 9824 +directorycours 1 1 6.957497 6.957497 9825 +examscornel 1 1 6.957497 6.957497 9826 +calendarcornel 1 1 6.957497 6.957497 9827 +musicbailei 1 1 6.957497 6.957497 9828 +concertscornel 1 1 6.957497 6.957497 9829 +ithacaworld 1 1 6.957497 6.957497 9830 +odessa 1 1 6.957497 6.957497 9831 +odessaweb 1 1 6.957497 6.957497 9832 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..c6dfa441 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +second 1 81 2.484907 2.484907 166 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +involv 1 71 2.639057 2.639057 227 +view 1 70 2.708050 2.708050 254 +multimedia 1 68 2.708050 2.708050 258 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +descript 1 64 2.772589 2.772589 271 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +written 1 63 2.772589 2.772589 278 +content 1 59 2.833213 2.833213 302 +processor 1 54 2.944439 2.944439 335 +sampl 1 53 2.944439 2.944439 339 +give 1 50 3.044522 3.044522 359 +california 1 46 3.091042 3.091042 388 +done 1 47 3.091042 3.091042 381 +better 1 45 3.135494 3.135494 401 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +might 1 41 3.218876 3.218876 426 +past 1 42 3.218876 3.218876 428 +futur 1 41 3.218876 3.218876 427 +ofth 1 36 3.367296 3.367296 491 +game 1 36 3.367296 3.367296 498 +kind 1 32 3.465736 3.465736 541 +propos 1 28 3.610918 3.610918 602 +hope 1 28 3.610918 3.610918 610 +though 1 27 3.637586 3.637586 622 +bookmark 1 26 3.688879 3.688879 639 +trace 1 25 3.737670 3.737670 677 +store 1 24 3.761200 3.761200 693 +pattern 1 24 3.761200 3.761200 689 +compress 1 23 3.806662 3.806662 719 +watch 1 21 3.912023 3.912023 789 +chip 1 21 3.912023 3.912023 770 +vlsi 1 21 3.912023 3.912023 795 +mpeg 1 20 3.951244 3.951244 831 +speed 1 18 4.060443 4.060443 911 +render 1 17 4.110874 4.110874 947 +track 1 15 4.248495 4.248495 1029 +stream 1 15 4.248495 4.248495 1015 +rate 1 15 4.248495 4.248495 1037 +transit 1 15 4.248495 4.248495 1046 +train 1 14 4.317488 4.317488 1066 +station 1 13 4.382027 4.382027 1157 +suit 1 13 4.382027 4.382027 1129 +avenu 1 12 4.465908 4.465908 1277 +realiti 1 12 4.465908 4.465908 1272 +death 1 10 4.653960 4.653960 1457 +earth 1 10 4.653960 4.653960 1463 +mountain 1 10 4.653960 4.653960 1456 +santa 1 10 4.653960 4.653960 1441 +wall 1 9 4.753590 4.753590 1553 +occur 1 9 4.753590 4.753590 1572 +routin 1 9 4.753590 4.753590 1549 +screen 1 9 4.753590 4.753590 1577 +ride 1 8 4.875197 4.875197 1741 +switch 1 8 4.875197 4.875197 1718 +accord 1 7 5.010635 5.010635 1826 +monei 1 7 5.010635 5.010635 1934 +stereo 1 7 5.010635 5.010635 1818 +microsystem 1 6 5.164786 5.164786 2160 +railroad 1 6 5.164786 5.164786 2161 +silicon 1 6 5.164786 5.164786 2076 +extern 1 6 5.164786 5.164786 2105 +byte 1 6 5.164786 5.164786 2108 +snapshot 1 5 5.347108 5.347108 2303 +hell 1 4 5.568345 5.568345 2885 +mess 1 4 5.568345 5.568345 2886 +engg 1 4 5.568345 5.568345 2884 +hindu 1 3 5.857933 5.857933 3590 +heaven 1 3 5.857933 5.857933 3589 +landscap 1 3 5.857933 5.857933 3525 +xlib 1 3 5.857933 5.857933 3204 +fernandez 1 3 5.857933 5.857933 3591 +mytholog 1 2 6.263398 6.263398 4869 +cupertino 1 2 6.263398 6.263398 4956 +javasoft 1 2 6.263398 6.263398 4220 +clara 1 2 6.263398 6.263398 4958 +amith 1 2 6.263398 6.263398 4053 +yamasani 1 2 6.263398 6.263398 4062 +cscomput 1 2 6.263398 6.263398 4195 +cave 1 2 6.263398 6.263398 4959 +softwarei 1 2 6.263398 6.263398 4960 +nano 1 2 6.263398 6.263398 4961 +snap 1 2 6.263398 6.263398 4962 +pyramania 1 2 6.263398 6.263398 4957 +thed 1 2 6.263398 6.263398 4963 +pal 1 2 6.263398 6.263398 4964 +joselui 1 2 6.263398 6.263398 4965 +yama 1 1 6.957497 6.957497 9833 +downto 1 1 6.957497 6.957497 9835 +imparti 1 1 6.957497 6.957497 9836 +amithyamasanim 1 1 6.957497 6.957497 9837 +yorki 1 1 6.957497 6.957497 9838 +garcia 1 1 6.957497 6.957497 9839 +mailstop 1 1 6.957497 6.957497 9840 +ucup 1 1 6.957497 6.957497 9841 +warburton 1 1 6.957497 6.957497 9842 +comi 1 1 6.957497 6.957497 9843 +currentlyemploi 1 1 6.957497 6.957497 9844 +workingin 1 1 6.957497 6.957497 9845 +javamedia 1 1 6.957497 6.957497 9846 +groupeducationfal 1 1 6.957497 6.957497 9847 +coursesvlsi 1 1 6.957497 6.957497 9834 +cssoftwar 1 1 6.957497 6.957497 9848 +csspring 1 1 6.957497 6.957497 9849 +csproject 1 1 6.957497 6.957497 9850 +railroadsystem 1 1 6.957497 6.957497 9851 +onyx 1 1 6.957497 6.957497 9852 +openinventord 1 1 6.957497 6.957497 9853 +through 1 1 6.957497 6.957497 9854 +documentimag 1 1 6.957497 6.957497 9855 +chipdevelop 1 1 6.957497 6.957497 9856 +basicallycompress 1 1 6.957497 6.957497 9857 +thisalgorithm 1 1 6.957497 6.957497 9858 +cachecam 1 1 6.957497 6.957497 9859 +inputstream 1 1 6.957497 6.957497 9860 +capableof 1 1 6.957497 6.957497 9861 +nowai 1 1 6.957497 6.957497 9862 +rivlproposalpresentationand 1 1 6.957497 6.957497 9863 +dthi 1 1 6.957497 6.957497 9864 +itswritten 1 1 6.957497 6.957497 9865 +parallelomania 1 1 6.957497 6.957497 9866 +resumehtmlpostscript 1 1 6.957497 6.957497 9867 +satyaprasad 1 1 6.957497 6.957497 9868 +avinashgupta 1 1 6.957497 6.957497 9869 +kartikh 1 1 6.957497 6.957497 9870 +kapadia 1 1 6.957497 6.957497 9871 +hrishikeshdixit 1 1 6.957497 6.957497 9872 +vineetahuja 1 1 6.957497 6.957497 9873 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..857f0ff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +homepag 1 93 2.397895 2.397895 148 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +school 1 84 2.484907 2.484907 188 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +david 1 71 2.639057 2.639057 232 +multimedia 1 68 2.708050 2.708050 258 +order 1 69 2.708050 2.708050 249 +street 1 63 2.772589 2.772589 293 +ithaca 1 65 2.772589 2.772589 294 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +summer 1 56 2.890372 2.890372 311 +understand 1 47 3.091042 3.091042 384 +video 1 44 3.135494 3.135494 405 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +industri 1 38 3.295837 3.295837 464 +bibliographi 1 34 3.401197 3.401197 518 +curriculum 1 33 3.433987 3.433987 535 +independ 1 32 3.465736 3.465736 548 +photo 1 31 3.496508 3.496508 561 +common 1 30 3.555348 3.555348 574 +platform 1 29 3.583519 3.583519 591 +limit 1 29 3.583519 3.583519 585 +request 1 26 3.688879 3.688879 635 +annot 1 21 3.912023 3.912023 775 +smith 1 20 3.951244 3.951244 820 +toolkit 1 20 3.951244 3.951244 835 +kernel 1 20 3.951244 3.951244 825 +mellon 1 13 4.382027 4.382027 1179 +realiti 1 12 4.465908 4.465908 1272 +carnegi 1 12 4.465908 4.465908 1260 +total 1 10 4.653960 4.653960 1398 +patel 1 6 5.164786 5.164786 2154 +causal 1 6 5.164786 5.164786 2024 +east 1 5 5.347108 5.347108 2472 +corba 1 5 5.347108 5.347108 2320 +dale 1 4 5.568345 5.568345 2687 +zeno 1 3 5.857933 5.857933 3580 +cheriton 1 3 5.857933 5.857933 3259 +ankit 1 2 6.263398 6.263398 4966 +endpoint 1 2 6.263398 6.263398 4967 +broker 1 2 6.263398 6.263398 4968 +critiqu 1 2 6.263398 6.263398 4328 +apatel 1 1 6.957497 6.957497 9874 +galleria 1 1 6.957497 6.957497 9875 +chronologia 1 1 6.957497 6.957497 9876 +universityresumedepart 1 1 6.957497 6.957497 9877 +enrolledgradu 1 1 6.957497 6.957497 9878 +canvasd 1 1 6.957497 6.957497 9879 +conferencingmultimedia 1 1 6.957497 6.957497 9880 +assignmentsproject 1 1 6.957497 6.957497 9881 +netan 1 1 6.957497 6.957497 9882 +skeen 1 1 6.957497 6.957497 9883 +scienceworld 1 1 6.957497 6.957497 9884 +multimediamaharaja 1 1 6.957497 6.957497 9885 +sayajirao 1 1 6.957497 6.957497 9886 +academicsfriend 1 1 6.957497 6.957497 9887 +techoreli 1 1 6.957497 6.957497 9888 +limitedjob 1 1 6.957497 6.957497 9889 +profilelif 1 1 6.957497 6.957497 9890 +relianc 1 1 6.957497 6.957497 9891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..8b387100 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +languag 1 227 1.386294 1.386294 26 +databas 1 122 2.079442 2.079442 86 +softwareengin 1 6 5.164786 5.164786 2162 +ashish 1 5 5.347108 5.347108 2473 +sciencemast 1 2 6.263398 6.263398 4969 +jhaveriashish 1 1 6.957497 6.957497 9892 +jhaveridepart 1 1 6.957497 6.957497 9893 +engineeringresumehtmlpost 1 1 6.957497 6.957497 9894 +scriptcourseworkadvanc 1 1 6.957497 6.957497 9895 +systemscsmultimedia 1 1 6.957497 6.957497 9896 +systemscsengin 1 1 6.957497 6.957497 9897 +networkscsprogram 1 1 6.957497 6.957497 9898 +csashish 1 1 6.957497 6.957497 9899 +jhaveri 1 1 6.957497 6.957497 9900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..0b9b01cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +server 1 76 2.564949 2.564949 204 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +guid 1 63 2.772589 2.772589 267 +life 1 50 3.044522 3.044522 375 +eduoffic 1 33 3.433987 3.433987 531 +transform 1 32 3.465736 3.465736 542 +famili 1 23 3.806662 3.806662 735 +record 1 18 4.060443 4.060443 890 +drive 1 15 4.248495 4.248495 1052 +club 1 15 4.248495 4.248495 1058 +danc 1 12 4.465908 4.465908 1278 +jean 1 10 4.653960 4.653960 1440 +atcornel 1 6 5.164786 5.164786 2131 +album 1 4 5.568345 5.568345 2888 +swing 1 4 5.568345 5.568345 2887 +dutch 1 3 5.857933 5.857933 3592 +berg 1 2 6.263398 6.263398 4970 +aswin 1 1 6.957497 6.957497 9901 +skyacr 1 1 6.957497 6.957497 9902 +systemmi 1 1 6.957497 6.957497 9903 +annek 1 1 6.957497 6.957497 9904 +deejay 1 1 6.957497 6.957497 9905 +isdn 1 1 6.957497 6.957497 9906 +hop 1 1 6.957497 6.957497 9907 +nederlands 1 1 6.957497 6.957497 9908 +clubi 1 1 6.957497 6.957497 9909 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..095a8ddf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,123 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +make 1 111 2.197225 2.197225 120 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +sinc 1 90 2.397895 2.397895 159 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +interfac 1 79 2.564949 2.564949 209 +appli 1 71 2.639057 2.639057 226 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +window 1 68 2.708050 2.708050 242 +virtual 1 62 2.772589 2.772589 285 +guid 1 63 2.772589 2.772589 267 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +still 1 50 3.044522 3.044522 362 +friend 1 48 3.044522 3.044522 376 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +even 1 45 3.135494 3.135494 393 +protocol 1 45 3.135494 3.135494 407 +multipl 1 39 3.258097 3.258097 453 +mean 1 37 3.332205 3.332205 477 +game 1 36 3.367296 3.367296 498 +either 1 35 3.401197 3.401197 506 +next 1 34 3.401197 3.401197 517 +everi 1 34 3.401197 3.401197 519 +board 1 33 3.433987 3.433987 528 +abl 1 30 3.555348 3.555348 566 +full 1 28 3.610918 3.610918 615 +subject 1 26 3.688879 3.688879 647 +universityithaca 1 24 3.761200 3.761200 710 +magazin 1 24 3.761200 3.761200 704 +reach 1 24 3.761200 3.761200 688 +brows 1 23 3.806662 3.806662 726 +sciencecornel 1 22 3.850148 3.850148 768 +almost 1 22 3.850148 3.850148 742 +chip 1 21 3.912023 3.912023 770 +five 1 19 4.007333 4.007333 841 +sign 1 16 4.174387 4.174387 970 +transit 1 15 4.248495 4.248495 1046 +piec 1 15 4.248495 4.248495 1020 +stream 1 15 4.248495 4.248495 1015 +hierarch 1 15 4.248495 4.248495 1018 +scene 1 14 4.317488 4.317488 1114 +skill 1 12 4.465908 4.465908 1205 +player 1 11 4.553877 4.553877 1371 +earth 1 10 4.653960 4.653960 1463 +pair 1 9 4.753590 4.753590 1503 +rivl 1 8 4.875197 4.875197 1632 +pronounc 1 7 5.010635 5.010635 1918 +earn 1 7 5.010635 5.010635 1788 +hoca 1 5 5.347108 5.347108 2241 +hypothet 1 5 5.347108 5.347108 2474 +vertic 1 5 5.347108 5.347108 2270 +guestbook 1 5 5.347108 5.347108 2475 +engg 1 4 5.568345 5.568345 2884 +multitask 1 4 5.568345 5.568345 2803 +screenshot 1 4 5.568345 5.568345 2743 +avinash 1 3 5.857933 5.857933 3510 +win 1 3 5.857933 5.857933 3593 +resumemi 1 2 6.263398 6.263398 4971 +hodja 1 2 6.263398 6.263398 4972 +fledg 1 2 6.263398 6.263398 4973 +horizont 1 2 6.263398 6.263398 4117 +diagon 1 2 6.263398 6.263398 4974 +ipng 1 2 6.263398 6.263398 4727 +caveat 1 2 6.263398 6.263398 4975 +welcomeavinash 1 1 6.957497 6.957497 9911 +guptam 1 1 6.957497 6.957497 9912 +streetcambridg 1 1 6.957497 6.957497 9913 +thecia 1 1 6.957497 6.957497 9914 +presentationpent 1 1 6.957497 6.957497 9915 +skillpent 1 1 6.957497 6.957497 9916 +trap 1 1 6.957497 6.957497 9910 +oppon 1 1 6.957497 6.957497 9917 +gamedownload 1 1 6.957497 6.957497 9918 +ipvimpl 1 1 6.957497 6.957497 9919 +proposalprogress 1 1 6.957497 6.957497 9920 +reportsam 1 1 6.957497 6.957497 9921 +pageon 1 1 6.957497 6.957497 9922 +internethytelnetth 1 1 6.957497 6.957497 9923 +catalogeinet 1 1 6.957497 6.957497 9924 +galaxyplanet 1 1 6.957497 6.957497 9925 +pagejoel 1 1 6.957497 6.957497 9926 +indexyahoo 1 1 6.957497 6.957497 9927 +wwwwebcrawlerlycosmi 1 1 6.957497 6.957497 9928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..60e84613 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +chang 1 82 2.484907 2.484907 163 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +upson 1 71 2.639057 2.639057 218 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +previou 1 62 2.772589 2.772589 290 +plai 1 60 2.833213 2.833213 307 +share 1 59 2.833213 2.833213 304 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +summer 1 56 2.890372 2.890372 311 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +digit 1 52 2.995732 2.995732 348 +cool 1 49 3.044522 3.044522 374 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +electron 1 47 3.091042 3.091042 379 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +algebra 1 45 3.135494 3.135494 394 +favorit 1 44 3.135494 3.135494 410 +keep 1 44 3.135494 3.135494 409 +futur 1 41 3.218876 3.218876 427 +past 1 42 3.218876 3.218876 428 +linear 1 41 3.218876 3.218876 431 +live 1 40 3.258097 3.258097 451 +continu 1 39 3.258097 3.258097 448 +form 1 39 3.258097 3.258097 443 +realli 1 40 3.258097 3.258097 444 +hand 1 37 3.332205 3.332205 475 +feel 1 37 3.332205 3.332205 483 +soon 1 36 3.367296 3.367296 494 +least 1 35 3.401197 3.401197 516 +taught 1 33 3.433987 3.433987 526 +product 1 33 3.433987 3.433987 527 +kind 1 32 3.465736 3.465736 541 +concept 1 32 3.465736 3.465736 537 +independ 1 32 3.465736 3.465736 548 +taken 1 31 3.496508 3.496508 555 +computersci 1 30 3.555348 3.555348 562 +hard 1 30 3.555348 3.555348 563 +depend 1 29 3.583519 3.583519 583 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +framework 1 28 3.610918 3.610918 606 +actual 1 28 3.610918 3.610918 604 +campu 1 27 3.637586 3.637586 623 +session 1 26 3.688879 3.688879 643 +spent 1 25 3.737670 3.737670 676 +fundament 1 25 3.737670 3.737670 661 +greg 1 24 3.761200 3.761200 695 +frame 1 24 3.761200 3.761200 684 +brows 1 23 3.806662 3.806662 726 +finish 1 22 3.850148 3.850148 748 +dai 1 22 3.850148 3.850148 753 +half 1 21 3.912023 3.912023 776 +hous 1 21 3.912023 3.912023 801 +mpeg 1 20 3.951244 3.951244 831 +nice 1 20 3.951244 3.951244 809 +media 1 19 4.007333 4.007333 861 +offici 1 18 4.060443 4.060443 894 +coupl 1 17 4.110874 4.110874 939 +thought 1 17 4.110874 4.110874 945 +otherwis 1 17 4.110874 4.110874 922 +intel 1 16 4.174387 4.174387 1000 +georg 1 16 4.174387 4.174387 994 +goe 1 15 4.248495 4.248495 1044 +fortran 1 15 4.248495 4.248495 1027 +stream 1 15 4.248495 4.248495 1015 +trip 1 14 4.317488 4.317488 1113 +hopefulli 1 14 4.317488 4.317488 1071 +camera 1 14 4.317488 4.317488 1115 +decid 1 14 4.317488 4.317488 1075 +dave 1 14 4.317488 4.317488 1098 +jonathan 1 13 4.382027 4.382027 1174 +believ 1 13 4.382027 4.382027 1187 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +resolut 1 13 4.382027 4.382027 1172 +unfortun 1 13 4.382027 4.382027 1170 +pretti 1 13 4.382027 4.382027 1191 +went 1 12 4.465908 4.465908 1279 +pageif 1 12 4.465908 4.465908 1275 +lake 1 11 4.553877 4.553877 1373 +bandwidth 1 11 4.553877 4.553877 1365 +smart 1 11 4.553877 4.553877 1352 +perman 1 11 4.553877 4.553877 1372 +road 1 11 4.553877 4.553877 1374 +sentenc 1 10 4.653960 4.653960 1413 +town 1 10 4.653960 4.653960 1458 +packet 1 10 4.653960 4.653960 1415 +bring 1 10 4.653960 4.653960 1430 +forc 1 10 4.653960 4.653960 1384 +undergrad 1 9 4.753590 4.753590 1589 +jersei 1 9 4.753590 4.753590 1587 +introductori 1 9 4.753590 4.753590 1479 +trust 1 9 4.753590 4.753590 1583 +grew 1 8 4.875197 4.875197 1742 +mile 1 8 4.875197 4.875197 1743 +filter 1 8 4.875197 4.875197 1641 +rivl 1 8 4.875197 4.875197 1632 +cornellunivers 1 7 5.010635 5.010635 1916 +encrypt 1 7 5.010635 5.010635 1835 +portland 1 7 5.010635 5.010635 1878 +conveni 1 6 5.164786 5.164786 2088 +corp 1 6 5.164786 5.164786 2139 +lucki 1 6 5.164786 5.164786 2163 +oregon 1 5 5.347108 5.347108 2437 +ahead 1 5 5.347108 5.347108 2338 +clarif 1 5 5.347108 5.347108 2253 +fork 1 4 5.568345 5.568345 2801 +skin 1 4 5.568345 5.568345 2840 +cheap 1 4 5.568345 5.568345 2751 +ultra 1 4 5.568345 5.568345 2889 +height 1 4 5.568345 5.568345 2890 +gear 1 4 5.568345 5.568345 2891 +birth 1 3 5.857933 5.857933 3594 +greek 1 3 5.857933 5.857933 3595 +labor 1 3 5.857933 5.857933 3195 +weber 1 3 5.857933 5.857933 3156 +bright 1 3 5.857933 5.857933 3596 +pack 1 3 5.857933 5.857933 3597 +urg 1 3 5.857933 5.857933 3212 +dude 1 2 6.263398 6.263398 4977 +felt 1 2 6.263398 6.263398 4978 +fratern 1 2 6.263398 6.263398 4979 +border 1 2 6.263398 6.263398 4980 +sugata 1 2 6.263398 6.263398 4976 +mukhopadhyai 1 2 6.263398 6.263398 4981 +surfer 1 2 6.263398 6.263398 4982 +captain 1 2 6.263398 6.263398 4983 +barber 1 1 6.957497 6.957497 9929 +fleshpooooooooooooooch 1 1 6.957497 6.957497 9934 +inclin 1 1 6.957497 6.957497 9935 +callm 1 1 6.957497 6.957497 9936 +ponch 1 1 6.957497 6.957497 9931 +orpooch 1 1 6.957497 6.957497 9937 +guppi 1 1 6.957497 6.957497 9938 +mama 1 1 6.957497 6.957497 9939 +phin 1 1 6.957497 6.957497 9940 +attendedmontgomeri 1 1 6.957497 6.957497 9941 +collegetown 1 1 6.957497 6.957497 9942 +adjac 1 1 6.957497 6.957497 9943 +sublet 1 1 6.957497 6.957497 9944 +oncolleg 1 1 6.957497 6.957497 9945 +radiu 1 1 6.957497 6.957497 9946 +epsilon 1 1 6.957497 6.957497 9947 +cayuga 1 1 6.957497 6.957497 9948 +thefilt 1 1 6.957497 6.957497 9949 +ofc 1 1 6.957497 6.957497 9950 +intereststhi 1 1 6.957497 6.957497 9951 +rivlan 1 1 6.957497 6.957497 9952 +tracker 1 1 6.957497 6.957497 9953 +rivli 1 1 6.957497 6.957497 9954 +smpd 1 1 6.957497 6.957497 9955 +generatorfor 1 1 6.957497 6.957497 9956 +webar 1 1 6.957497 6.957497 9957 +buddi 1 1 6.957497 6.957497 9958 +inde 1 1 6.957497 6.957497 9932 +resourceful 1 1 6.957497 6.957497 9959 +pipe 1 1 6.957497 6.957497 9960 +meanth 1 1 6.957497 6.957497 9961 +comrad 1 1 6.957497 6.957497 9962 +bush 1 1 6.957497 6.957497 9933 +ofhi 1 1 6.957497 6.957497 9963 +swirl 1 1 6.957497 6.957497 9964 +nefari 1 1 6.957497 6.957497 9965 +toilet 1 1 6.957497 6.957497 9966 +mukhopadyai 1 1 6.957497 6.957497 9967 +bonei 1 1 6.957497 6.957497 9968 +magoo 1 1 6.957497 6.957497 9969 +fletop 1 1 6.957497 6.957497 9970 +bigro 1 1 6.957497 6.957497 9971 +koster 1 1 6.957497 6.957497 9972 +bot 1 1 6.957497 6.957497 9973 +tffl 1 1 6.957497 6.957497 9974 +bulli 1 1 6.957497 6.957497 9930 +pageuuencod 1 1 6.957497 6.957497 9975 +pagetar 1 1 6.957497 6.957497 9976 +zip 1 1 6.957497 6.957497 9977 +downloadsgraphicsbarb 1 1 6.957497 6.957497 9978 +gifponch 1 1 6.957497 6.957497 9979 +htmlres_htmlres_curemmittemmitt 1 1 6.957497 6.957497 9980 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..805f3e70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +look 1 107 2.197225 2.197225 115 +final 1 116 2.197225 2.197225 108 +user 1 104 2.302585 2.302585 137 +homepag 1 93 2.397895 2.397895 148 +commun 1 95 2.397895 2.397895 157 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +interfac 1 79 2.564949 2.564949 209 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +addit 1 74 2.639057 2.639057 228 +david 1 71 2.639057 2.639057 232 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +ithaca 1 65 2.772589 2.772589 294 +locat 1 59 2.833213 2.833213 303 +back 1 60 2.833213 2.833213 297 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +run 1 51 2.995732 2.995732 347 +cool 1 49 3.044522 3.044522 374 +archiv 1 49 3.044522 3.044522 364 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +review 1 42 3.218876 3.218876 425 +fast 1 42 3.218876 3.218876 429 +realli 1 40 3.258097 3.258097 444 +live 1 40 3.258097 3.258097 451 +especi 1 36 3.367296 3.367296 496 +singl 1 34 3.401197 3.401197 510 +collabor 1 32 3.465736 3.465736 543 +someth 1 31 3.496508 3.496508 554 +turn 1 29 3.583519 3.583519 586 +hope 1 28 3.610918 3.610918 610 +cluster 1 28 3.610918 3.610918 612 +team 1 27 3.637586 3.637586 625 +compar 1 26 3.688879 3.688879 648 +berkelei 1 26 3.688879 3.688879 657 +enabl 1 26 3.688879 3.688879 655 +known 1 24 3.761200 3.761200 702 +size 1 23 3.806662 3.806662 713 +indian 1 22 3.850148 3.850148 769 +love 1 21 3.912023 3.912023 804 +listen 1 18 4.060443 4.060443 907 +layer 1 17 4.110874 4.110874 926 +segment 1 17 4.110874 4.110874 931 +interconnect 1 17 4.110874 4.110874 937 +latenc 1 16 4.174387 4.174387 993 +photograph 1 15 4.248495 4.248495 1056 +micro 1 15 4.248495 4.248495 1031 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +avenu 1 12 4.465908 4.465908 1277 +went 1 12 4.465908 4.465908 1279 +philadelphia 1 12 4.465908 4.465908 1244 +scienceat 1 11 4.553877 4.553877 1375 +see 1 11 4.553877 4.553877 1337 +motiv 1 11 4.553877 4.553877 1346 +cook 1 10 4.653960 4.653960 1464 +werner 1 10 4.653960 4.653960 1385 +sosp 1 10 4.653960 4.653960 1416 +calvin 1 9 4.753590 4.753590 1518 +trust 1 9 4.753590 4.753590 1583 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +kanpur 1 8 4.875197 4.875197 1744 +realiz 1 8 4.875197 4.875197 1739 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +vineet 1 8 4.875197 4.875197 1639 +vogel 1 8 4.875197 4.875197 1622 +centuri 1 7 5.010635 5.010635 1935 +happen 1 7 5.010635 5.010635 1790 +rock 1 6 5.164786 5.164786 2164 +dream 1 6 5.164786 5.164786 2165 +tri 1 6 5.164786 5.164786 2166 +south 1 6 5.164786 5.164786 2167 +goldstein 1 6 5.164786 5.164786 2168 +truli 1 5 5.347108 5.347108 2476 +aim 1 5 5.347108 5.347108 2477 +culler 1 5 5.347108 5.347108 2381 +symp 1 5 5.347108 5.347108 2376 +australia 1 5 5.347108 5.347108 2478 +buch 1 5 5.347108 5.347108 2272 +basu 1 4 5.568345 5.568345 2843 +thecornel 1 4 5.568345 5.568345 2892 +hobb 1 4 5.568345 5.568345 2893 +writer 1 4 5.568345 5.568345 2783 +cuinfo 1 4 5.568345 5.568345 2626 +ofworkst 1 4 5.568345 5.568345 2679 +withth 1 4 5.568345 5.568345 2805 +anindya 1 3 5.857933 5.857933 3535 +experienc 1 3 5.857933 5.857933 3203 +asian 1 3 5.857933 5.857933 3598 +mpp 1 3 5.857933 5.857933 3194 +schauser 1 3 5.857933 5.857933 3599 +avula 1 3 5.857933 5.857933 3600 +mugshot 1 2 6.263398 6.263398 4984 +goof 1 2 6.263398 6.263398 4985 +projectwith 1 2 6.263398 6.263398 4986 +thegreat 1 2 6.263398 6.263398 4987 +pelham 1 2 6.263398 6.263398 4988 +grenvil 1 2 6.263398 6.263398 4989 +wodehous 1 2 6.263398 6.263398 4990 +metallica 1 2 6.263398 6.263398 4991 +fanci 1 2 6.263398 6.263398 4992 +monti 1 2 6.263398 6.263398 4993 +python 1 2 6.263398 6.263398 4994 +beavi 1 2 6.263398 6.263398 4995 +meiko 1 2 6.263398 6.263398 4996 +untrust 1 2 6.263398 6.263398 4997 +seth 1 2 6.263398 6.263398 4998 +klau 1 2 6.263398 6.263398 4999 +veena 1 2 6.263398 6.263398 5000 +homepagelast 1 2 6.263398 6.263398 5001 +delawar 1 1 6.957497 6.957497 9981 +eduwhat 1 1 6.957497 6.957497 9982 +musicor 1 1 6.957497 6.957497 9983 +coollik 1 1 6.957497 6.957497 9984 +indiawho 1 1 6.957497 6.957497 9985 +hardpink 1 1 6.957497 6.957497 9986 +floydfanat 1 1 6.957497 6.957497 9987 +childhood 1 1 6.957497 6.957497 9988 +livelast 1 1 6.957497 6.957497 9989 +plum 1 1 6.957497 6.957497 9990 +unwash 1 1 6.957497 6.957497 9991 +attendedwoodstock 1 1 6.957497 6.957497 9992 +onlinewoodstock 1 1 6.957497 6.957497 9993 +woodstock 1 1 6.957497 6.957497 9994 +thesocc 1 1 6.957497 6.957497 9995 +worldcup 1 1 6.957497 6.957497 9996 +butunfortun 1 1 6.957497 6.957497 9997 +putsomerecip 1 1 6.957497 6.957497 9998 +connoisseurof 1 1 6.957497 6.957497 9999 +whiski 1 1 6.957497 6.957497 10000 +malt 1 1 6.957497 6.957497 10001 +cheer 1 1 6.957497 6.957497 10002 +buttheadoth 1 1 6.957497 6.957497 10003 +gopherand 1 1 6.957497 6.957497 10004 +projectwhich 1 1 6.957497 6.957497 10005 +acheiv 1 1 6.957497 6.957497 10006 +passinglay 1 1 6.957497 6.957497 10007 +thatshow 1 1 6.957497 6.957497 10008 +saturateth 1 1 6.957497 6.957497 10009 +fibr 1 1 6.957497 6.957497 10010 +specificationfor 1 1 6.957497 6.957497 10011 +processesboth 1 1 6.957497 6.957497 10012 +abridgedvers 1 1 6.957497 6.957497 10013 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..4e6b1af0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +studi 1 120 2.079442 2.079442 91 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +intern 1 108 2.197225 2.197225 128 +librari 1 87 2.484907 2.484907 181 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +info 1 85 2.484907 2.484907 176 +upson 1 71 2.639057 2.639057 218 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +ithaca 1 65 2.772589 2.772589 294 +dept 1 64 2.772589 2.772589 291 +interact 1 62 2.772589 2.772589 270 +simpl 1 60 2.833213 2.833213 298 +overview 1 56 2.890372 2.890372 323 +processor 1 54 2.944439 2.944439 335 +local 1 55 2.944439 2.944439 334 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +life 1 50 3.044522 3.044522 375 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +map 1 39 3.258097 3.258097 452 +tutori 1 39 3.258097 3.258097 437 +seminar 1 38 3.295837 3.295837 470 +global 1 34 3.401197 3.401197 520 +richard 1 31 3.496508 3.496508 559 +weather 1 28 3.610918 3.610918 618 +magazin 1 24 3.761200 3.761200 704 +famili 1 23 3.806662 3.806662 735 +geometri 1 22 3.850148 3.850148 752 +navig 1 21 3.912023 3.912023 796 +synthesi 1 20 3.951244 3.951244 834 +georg 1 16 4.174387 4.174387 994 +stock 1 16 4.174387 4.174387 1007 +massiv 1 15 4.248495 4.248495 1026 +incomput 1 14 4.317488 4.317488 1096 +francisco 1 14 4.317488 4.317488 1095 +levi 1 14 4.317488 4.317488 1093 +forth 1 13 4.382027 4.382027 1186 +galleri 1 13 4.382027 4.382027 1192 +insid 1 12 4.465908 4.465908 1262 +newspap 1 12 4.465908 4.465908 1280 +neat 1 12 4.465908 4.465908 1263 +congress 1 9 4.753590 4.753590 1592 +forget 1 8 4.875197 4.875197 1712 +wire 1 8 4.875197 4.875197 1747 +microsystem 1 6 5.164786 5.164786 2160 +peek 1 6 5.164786 5.164786 2169 +frog 1 5 5.347108 5.347108 2479 +hallithaca 1 4 5.568345 5.568345 2894 +zippel 1 4 5.568345 5.568345 2879 +heard 1 4 5.568345 5.568345 2895 +aboutth 1 4 5.568345 5.568345 2720 +wander 1 4 5.568345 5.568345 2896 +educornel 1 3 5.857933 5.857933 3601 +universitydept 1 3 5.857933 5.857933 3602 +galaxi 1 3 5.857933 5.857933 3603 +underground 1 3 5.857933 5.857933 3604 +spider 1 3 5.857933 5.857933 3605 +intertext 1 2 6.263398 6.263398 5002 +solar 1 2 6.263398 6.263398 5003 +martial 1 2 6.263398 6.263398 5004 +homepageben 1 1 6.957497 6.957497 10015 +haogradu 1 1 6.957497 6.957497 10016 +studentbhao 1 1 6.957497 6.957497 10017 +flea 1 1 6.957497 6.957497 10018 +taylorwhen 1 1 6.957497 6.957497 10019 +itsgorg 1 1 6.957497 6.957497 10020 +cornellwhat 1 1 6.957497 6.957497 10021 +dissectionmagazin 1 1 6.957497 6.957497 10022 +magazinea 1 1 6.957497 6.957497 10023 +shoemak 1 1 6.957497 6.957497 10024 +weblouvr 1 1 6.957497 6.957497 10025 +xmorphia 1 1 6.957497 6.957497 10026 +kaleidospac 1 1 6.957497 6.957497 10027 +jpop 1 1 6.957497 6.957497 10014 +bonsai 1 1 6.957497 6.957497 10028 +seiyuu 1 1 6.957497 6.957497 10029 +archivenetwork 1 1 6.957497 6.957497 10030 +edgelibrari 1 1 6.957497 6.957497 10031 +infonih 1 1 6.957497 6.957497 10032 +courseth 1 1 6.957497 6.957497 10033 +guidecern 1 1 6.957497 6.957497 10034 +bhao 1 1 6.957497 6.957497 10035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..a918f0e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +network 1 168 1.791759 1.791759 61 +ithaca 1 65 2.772589 2.772589 294 +york 1 41 3.218876 3.218876 435 +apart 1 7 5.010635 5.010635 1936 +aastha 1 2 6.263398 6.263398 5005 +sciencemast 1 2 6.263398 6.263398 4969 +hasbrouck 1 2 6.263398 6.263398 4952 +pageaastha 1 1 6.957497 6.957497 10036 +bhardwajdepart 1 1 6.957497 6.957497 10037 +ofengineeeringresumehtmlpost 1 1 6.957497 6.957497 10038 +scriptcourseworkadvanceddatabas 1 1 6.957497 6.957497 10039 +csmultimediasystem 1 1 6.957497 6.957497 10040 +csengineeringcomput 1 1 6.957497 6.957497 10041 +cssoftwareengin 1 1 6.957497 6.957497 10042 +cscontact 1 1 6.957497 6.957497 10043 +bhardwaj 1 1 6.957497 6.957497 10044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..d5f2bd21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,284 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +click 1 142 1.945910 1.945910 78 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +orient 1 80 2.564949 2.564949 205 +april 1 77 2.564949 2.564949 196 +optim 1 79 2.564949 2.564949 197 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +intellig 1 72 2.639057 2.639057 225 +write 1 72 2.639057 2.639057 222 +symposium 1 72 2.639057 2.639057 238 +onlin 1 75 2.639057 2.639057 223 +view 1 70 2.708050 2.708050 254 +laboratori 1 63 2.772589 2.772589 292 +plan 1 65 2.772589 2.772589 272 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +ithaca 1 65 2.772589 2.772589 294 +automat 1 61 2.833213 2.833213 306 +plai 1 60 2.833213 2.833213 307 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +found 1 53 2.944439 2.944439 337 +februari 1 54 2.944439 2.944439 328 +without 1 50 3.044522 3.044522 370 +visual 1 48 3.044522 3.044522 372 +could 1 46 3.091042 3.091042 383 +california 1 46 3.091042 3.091042 388 +move 1 47 3.091042 3.091042 382 +video 1 44 3.135494 3.135494 405 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +around 1 43 3.178054 3.178054 415 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +press 1 42 3.218876 3.218876 419 +small 1 39 3.258097 3.258097 447 +programm 1 39 3.258097 3.258097 445 +societi 1 40 3.258097 3.258097 456 +open 1 38 3.295837 3.295837 469 +field 1 37 3.332205 3.332205 482 +robot 1 36 3.367296 3.367296 497 +singl 1 34 3.401197 3.401197 510 +approxim 1 35 3.401197 3.401197 509 +post 1 35 3.401197 3.401197 505 +tech 1 35 3.401197 3.401197 515 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +built 1 29 3.583519 3.583519 592 +weather 1 28 3.610918 3.610918 618 +scale 1 28 3.610918 3.610918 613 +arrai 1 27 3.637586 3.637586 627 +team 1 27 3.637586 3.637586 625 +manipul 1 27 3.637586 3.637586 624 +detect 1 26 3.688879 3.688879 646 +bound 1 26 3.688879 3.688879 659 +experiment 1 26 3.688879 3.688879 645 +revis 1 26 3.688879 3.688879 640 +proc 1 26 3.688879 3.688879 649 +task 1 25 3.737670 3.737670 678 +motion 1 24 3.761200 3.761200 699 +sometim 1 24 3.761200 3.761200 696 +mobil 1 23 3.806662 3.806662 730 +famili 1 23 3.806662 3.806662 735 +lead 1 23 3.806662 3.806662 718 +cooper 1 22 3.850148 3.850148 757 +chip 1 21 3.912023 3.912023 770 +vlsi 1 21 3.912023 3.912023 795 +department 1 20 3.951244 3.951244 839 +mpeg 1 20 3.951244 3.951244 831 +scheme 1 20 3.951244 3.951244 818 +boston 1 19 4.007333 4.007333 862 +offici 1 18 4.060443 4.060443 894 +demo 1 18 4.060443 4.060443 888 +lower 1 18 4.060443 4.060443 886 +minim 1 18 4.060443 4.060443 887 +agent 1 18 4.060443 4.060443 910 +stanford 1 17 4.110874 4.110874 955 +vector 1 16 4.174387 4.174387 961 +diego 1 16 4.174387 4.174387 992 +explan 1 16 4.174387 4.174387 985 +massiv 1 15 4.248495 4.248495 1026 +micro 1 15 4.248495 4.248495 1031 +track 1 15 4.248495 4.248495 1029 +configur 1 15 4.248495 4.248495 1012 +researchmi 1 14 4.317488 4.317488 1119 +draft 1 14 4.317488 4.317488 1085 +train 1 14 4.317488 4.317488 1066 +anonym 1 14 4.317488 4.317488 1100 +cannot 1 13 4.382027 4.382027 1144 +jonathan 1 13 4.382027 4.382027 1174 +bruce 1 12 4.465908 4.465908 1226 +walk 1 12 4.465908 4.465908 1281 +target 1 12 4.465908 4.465908 1282 +franc 1 12 4.465908 4.465908 1276 +peter 1 11 4.553877 4.553877 1316 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +face 1 9 4.753590 4.753590 1501 +donald 1 9 4.753590 4.753590 1510 +classif 1 9 4.753590 4.753590 1586 +entitl 1 9 4.753590 4.753590 1490 +wilson 1 9 4.753590 4.753590 1536 +herefor 1 9 4.753590 4.753590 1483 +invari 1 8 4.875197 4.875197 1748 +autonom 1 8 4.875197 4.875197 1749 +sensor 1 7 5.010635 5.010635 1920 +morph 1 7 5.010635 5.010635 1937 +pittsburgh 1 7 5.010635 5.010635 1938 +beyond 1 7 5.010635 5.010635 1834 +huttenloch 1 6 5.164786 5.164786 1983 +silicon 1 6 5.164786 5.164786 2076 +eduph 1 5 5.347108 5.347108 2449 +actuat 1 5 5.347108 5.347108 2442 +lili 1 5 5.347108 5.347108 2240 +upper 1 5 5.347108 5.347108 2481 +minneapoli 1 5 5.347108 5.347108 2480 +minnesota 1 5 5.347108 5.347108 2469 +these 1 5 5.347108 5.347108 2482 +rotat 1 5 5.347108 5.347108 2295 +poem 1 5 5.347108 5.347108 2483 +clickher 1 5 5.347108 5.347108 2428 +randal 1 4 5.568345 5.568345 2776 +thecornel 1 4 5.568345 5.568345 2892 +chase 1 4 5.568345 5.568345 2897 +decoupl 1 4 5.568345 5.568345 2898 +chain 1 4 5.568345 5.568345 2712 +push 1 4 5.568345 5.568345 2635 +alfr 1 4 5.568345 5.568345 2882 +noel 1 3 5.857933 5.857933 3376 +scream 1 3 5.857933 5.857933 3609 +microfabr 1 3 5.857933 5.857933 3610 +fabric 1 3 5.857933 5.857933 3607 +bhringer 1 3 5.857933 5.857933 3606 +algorithmica 1 3 5.857933 5.857933 3561 +jen 1 3 5.857933 5.857933 3378 +artificialintellig 1 3 5.857933 5.857933 3608 +daniela 1 3 5.857933 5.857933 3611 +portrait 1 3 5.857933 5.857933 3491 +hereto 1 3 5.857933 5.857933 3476 +harm 1 3 5.857933 5.857933 3515 +macdonald 1 2 6.263398 6.263398 5006 +nanofabr 1 2 6.263398 6.263398 5010 +toconstruct 1 2 6.263398 6.263398 4858 +ofmobil 1 2 6.263398 6.263398 5011 +brigg 1 2 6.263398 6.263398 5008 +internationalworkshop 1 2 6.263398 6.263398 5012 +crystal 1 2 6.263398 6.263398 5013 +electro 1 2 6.263398 6.263398 5014 +mem 1 2 6.263398 6.263398 5007 +reif 1 2 6.263398 6.263398 5015 +furnitur 1 2 6.263398 6.263398 5016 +actuatorarrai 1 2 6.263398 6.263398 5017 +mihailovich 1 2 6.263398 6.263398 5018 +ree 1 2 6.263398 6.263398 5009 +automationnic 1 2 6.263398 6.263398 5019 +andj 1 2 6.263398 6.263398 5020 +latomb 1 2 6.263398 6.263398 5021 +doc 1 2 6.263398 6.263398 5022 +catalogc 1 2 6.263398 6.263398 5023 +apictur 1 2 6.263398 6.263398 5024 +drawn 1 2 6.263398 6.263398 4215 +swallow 1 2 6.263398 6.263398 5025 +donaldbruc 1 1 6.957497 6.957497 10055 +donaldassoci 1 1 6.957497 6.957497 10056 +professorbrd 1 1 6.957497 6.957497 10057 +laboratorydan 1 1 6.957497 6.957497 10058 +microactu 1 1 6.957497 6.957497 10059 +arrayi 1 1 6.957497 6.957497 10060 +squarecentemet 1 1 6.957497 6.957497 10061 +sensoryfeedback 1 1 6.957497 6.957497 10062 +feeder 1 1 6.957497 6.957497 10046 +buildself 1 1 6.957497 6.957497 10063 +propel 1 1 6.957497 6.957497 10064 +amybrigg 1 1 6.957497 6.957497 10065 +surveil 1 1 6.957497 6.957497 10066 +andintercept 1 1 6.957497 6.957497 10067 +tommi 1 1 6.957497 6.957497 10045 +developedbi 1 1 6.957497 6.957497 10068 +ourlab 1 1 6.957497 6.957497 10050 +informationalon 1 1 6.957497 6.957497 10069 +andlow 1 1 6.957497 6.957497 10070 +memsand 1 1 6.957497 6.957497 10071 +vibratori 1 1 6.957497 6.957497 10047 +thealgorithm 1 1 6.957497 6.957497 10072 +toulous 1 1 6.957497 6.957497 10051 +robustgeometr 1 1 6.957497 6.957497 10073 +icra 1 1 6.957497 6.957497 10052 +andimprov 1 1 6.957497 6.957497 10074 +partsfeed 1 1 6.957497 6.957497 10075 +partii 1 1 6.957497 6.957497 10076 +provablygood 1 1 6.957497 6.957497 10053 +kinodynam 1 1 6.957497 6.957497 10048 +robotswith 1 1 6.957497 6.957497 10077 +xavier 1 1 6.957497 6.957497 10049 +forcartesian 1 1 6.957497 6.957497 10078 +canni 1 1 6.957497 6.957497 10079 +inpress 1 1 6.957497 6.957497 10080 +supermodular 1 1 6.957497 6.957497 10081 +andtheoret 1 1 6.957497 6.957497 10082 +jetai 1 1 6.957497 6.957497 10083 +firstquart 1 1 6.957497 6.957497 10084 +inminim 1 1 6.957497 6.957497 10085 +iser 1 1 6.957497 6.957497 10086 +automon 1 1 6.957497 6.957497 10087 +ofjapan 1 1 6.957497 6.957497 10088 +iro 1 1 6.957497 6.957497 10089 +sensorlessmanipul 1 1 6.957497 6.957497 10090 +andautom 1 1 6.957497 6.957497 10091 +ofrobot 1 1 6.957497 6.957497 10092 +otherpubl 1 1 6.957497 6.957497 10093 +dinesh 1 1 6.957497 6.957497 10094 +aval 1 1 6.957497 6.957497 10095 +indexobtain 1 1 6.957497 6.957497 10096 +paperscopi 1 1 6.957497 6.957497 10097 +teamof 1 1 6.957497 6.957497 10098 +movefurnitur 1 1 6.957497 6.957497 10099 +mobot 1 1 6.957497 6.957497 10100 +couch 1 1 6.957497 6.957497 10054 +loretta 1 1 6.957497 6.957497 10101 +pompilio 1 1 6.957497 6.957497 10102 +discoverychannel 1 1 6.957497 6.957497 10103 +funa 1 1 6.957497 6.957497 10104 +moreoth 1 1 6.957497 6.957497 10105 +tallest 1 1 6.957497 6.957497 10106 +darkest 1 1 6.957497 6.957497 10107 +hollywood 1 1 6.957497 6.957497 10108 +merian 1 1 6.957497 6.957497 10109 +wrai 1 1 6.957497 6.957497 10110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..d99f3626 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +code 1 108 2.197225 2.197225 116 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +text 1 98 2.302585 2.302585 133 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +build 1 85 2.484907 2.484907 184 +activ 1 84 2.484907 2.484907 182 +contain 1 81 2.484907 2.484907 174 +resourc 1 81 2.484907 2.484907 172 +method 1 80 2.564949 2.564949 213 +upson 1 71 2.639057 2.639057 218 +intellig 1 72 2.639057 2.639057 225 +effici 1 73 2.639057 2.639057 233 +addit 1 74 2.639057 2.639057 228 +workshop 1 71 2.639057 2.639057 239 +nation 1 74 2.639057 2.639057 240 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +artifici 1 63 2.772589 2.772589 280 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +foundat 1 62 2.772589 2.772589 286 +improv 1 62 2.772589 2.772589 289 +content 1 59 2.833213 2.833213 302 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +variou 1 56 2.890372 2.890372 317 +finger 1 52 2.995732 2.995732 354 +case 1 51 2.995732 2.995732 351 +investig 1 51 2.995732 2.995732 353 +approach 1 48 3.044522 3.044522 366 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +understand 1 47 3.091042 3.091042 384 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +natur 1 44 3.135494 3.135494 406 +mechan 1 43 3.178054 3.178054 416 +offer 1 43 3.178054 3.178054 414 +autom 1 41 3.218876 3.218876 434 +combin 1 42 3.218876 3.218876 421 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +societi 1 40 3.258097 3.258097 456 +seminar 1 38 3.295837 3.295837 470 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +cost 1 37 3.332205 3.332205 480 +tree 1 36 3.367296 3.367296 492 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +within 1 33 3.433987 3.433987 525 +extend 1 32 3.465736 3.465736 539 +chapter 1 32 3.465736 3.465736 536 +focu 1 30 3.555348 3.555348 571 +domain 1 30 3.555348 3.555348 564 +compon 1 30 3.555348 3.555348 570 +focus 1 29 3.583519 3.583519 584 +semant 1 29 3.583519 3.583519 587 +framework 1 28 3.610918 3.610918 606 +symbol 1 27 3.637586 3.637586 620 +determin 1 27 3.637586 3.637586 630 +task 1 25 3.737670 3.737670 678 +handl 1 24 3.761200 3.761200 685 +brows 1 23 3.806662 3.806662 726 +decis 1 23 3.806662 3.806662 728 +springer 1 22 3.850148 3.850148 750 +stat 1 17 4.110874 4.110874 924 +analyz 1 17 4.110874 4.110874 925 +repositori 1 17 4.110874 4.110874 932 +condit 1 16 4.174387 4.174387 975 +practicum 1 16 4.174387 4.174387 960 +jose 1 16 4.174387 4.174387 976 +cognit 1 16 4.174387 4.174387 986 +cambridg 1 16 4.174387 4.174387 1008 +train 1 14 4.317488 4.317488 1066 +embed 1 14 4.317488 4.317488 1102 +massachusett 1 14 4.317488 4.317488 1118 +primarili 1 13 4.382027 4.382027 1185 +context 1 13 4.382027 4.382027 1153 +robust 1 12 4.465908 4.465908 1271 +speech 1 12 4.465908 4.465908 1222 +lake 1 11 4.553877 4.553877 1373 +tour 1 11 4.553877 4.553877 1307 +underli 1 10 4.653960 4.653960 1410 +acquisit 1 10 4.653960 4.653960 1465 +sentenc 1 10 4.653960 4.653960 1413 +prior 1 10 4.653960 4.653960 1438 +softbal 1 9 4.753590 4.753590 1594 +linguist 1 9 4.753590 4.753590 1593 +rel 1 9 4.753590 4.753590 1487 +conferenceon 1 9 4.753590 4.753590 1595 +introductori 1 9 4.753590 4.753590 1479 +morgan 1 9 4.753590 4.753590 1484 +entri 1 8 4.875197 4.875197 1678 +span 1 8 4.875197 4.875197 1751 +extract 1 8 4.875197 4.875197 1728 +empir 1 8 4.875197 4.875197 1722 +aaai 1 8 4.875197 4.875197 1750 +tag 1 7 5.010635 5.010635 1821 +larger 1 7 5.010635 5.010635 1875 +pennsylvania 1 7 5.010635 5.010635 1932 +lawrenc 1 7 5.010635 5.010635 1908 +machinelearn 1 6 5.164786 5.164786 2084 +heurist 1 6 5.164786 5.164786 2125 +syntax 1 6 5.164786 5.164786 2030 +connectionist 1 5 5.347108 5.347108 2430 +amherst 1 5 5.347108 5.347108 2484 +kaufmann 1 5 5.347108 5.347108 2254 +corpu 1 5 5.347108 5.347108 2282 +clair 1 4 5.568345 5.568345 2605 +hallphon 1 4 5.568345 5.568345 2900 +compris 1 4 5.568345 5.568345 2862 +ijcai 1 4 5.568345 5.568345 2901 +disambigu 1 4 5.568345 5.568345 2899 +complic 1 4 5.568345 5.568345 2902 +educlick 1 3 5.857933 5.857933 3612 +tosupport 1 3 5.857933 5.857933 3613 +teachingc 1 3 5.857933 5.857933 3614 +agener 1 3 5.857933 5.857933 3213 +conceptu 1 3 5.857933 5.857933 3214 +fourteenth 1 3 5.857933 5.857933 3615 +ninth 1 3 5.857933 5.857933 3616 +anaheim 1 3 5.857933 5.857933 3271 +citat 1 3 5.857933 5.857933 3617 +penn 1 3 5.857933 5.857933 3094 +interestscours 1 2 6.263398 6.263398 5026 +tandem 1 2 6.263398 6.263398 5027 +learningtechniqu 1 2 6.263398 6.263398 5028 +corpora 1 2 6.263398 6.263398 4269 +gabriel 1 2 6.263398 6.263398 5029 +jointconfer 1 2 6.263398 6.263398 5030 +eleventh 1 2 6.263398 6.263398 5031 +newark 1 2 6.263398 6.263398 5032 +bias 1 2 6.263398 6.263398 5033 +bloomington 1 2 6.263398 6.263398 5034 +twelfth 1 2 6.263398 6.263398 5035 +treebank 1 2 6.263398 6.263398 4138 +cardieclair 1 1 6.957497 6.957497 10120 +cardi 1 1 6.957497 6.957497 10111 +teachselect 1 1 6.957497 6.957497 10121 +publicationsnlp 1 1 6.957497 6.957497 10122 +amalgam 1 1 6.957497 6.957497 10123 +westi 1 1 6.957497 6.957497 10124 +interestsalthough 1 1 6.957497 6.957497 10125 +subfield 1 1 6.957497 6.957497 10126 +cognitivemodel 1 1 6.957497 6.957497 10127 +naturallanguag 1 1 6.957497 6.957497 10114 +forexplor 1 1 6.957497 6.957497 10128 +tworel 1 1 6.957497 6.957497 10129 +reliablyextract 1 1 6.957497 6.957497 10130 +cstr 1 1 6.957497 6.957497 10131 +kenmor 1 1 6.957497 6.957497 10112 +knowledgeacquisit 1 1 6.957497 6.957497 10115 +kenmoreacquir 1 1 6.957497 6.957497 10132 +tworeal 1 1 6.957497 6.957497 10133 +andconcept 1 1 6.957497 6.957497 10134 +anteced 1 1 6.957497 6.957497 10135 +pronoun 1 1 6.957497 6.957497 10113 +disambiguationtask 1 1 6.957497 6.957497 10136 +learningcompon 1 1 6.957497 6.957497 10137 +isembed 1 1 6.957497 6.957497 10138 +inartifici 1 1 6.957497 6.957497 10139 +understandingselect 1 1 6.957497 6.957497 10140 +publicationsautom 1 1 6.957497 6.957497 10141 +wermter 1 1 6.957497 6.957497 10142 +riloff 1 1 6.957497 6.957497 10116 +scheler 1 1 6.957497 6.957497 10143 +andsymbol 1 1 6.957497 6.957497 10144 +tolearn 1 1 6.957497 6.957497 10145 +conceptualsent 1 1 6.957497 6.957497 10146 +cmpsci 1 1 6.957497 6.957497 10147 +tenth 1 1 6.957497 6.957497 10117 +erlbaumassoci 1 1 6.957497 6.957497 10118 +onconstrain 1 1 6.957497 6.957497 10148 +plausibl 1 1 6.957497 6.957497 10149 +lehnert 1 1 6.957497 6.957497 10119 +linkscomput 1 1 6.957497 6.957497 10150 +linguistics 1 1 6.957497 6.957497 10151 +aclspeci 1 1 6.957497 6.957497 10152 +learningmachin 1 1 6.957497 6.957497 10153 +digestmachinelearn 1 1 6.957497 6.957497 10154 +researchersmachin 1 1 6.957497 6.957497 10155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..640201a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +mathemat 1 108 2.197225 2.197225 123 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +java 1 70 2.708050 2.708050 248 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +creat 1 63 2.772589 2.772589 277 +plan 1 65 2.772589 2.772589 272 +ithaca 1 65 2.772589 2.772589 294 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +paul 1 38 3.295837 3.295837 471 +tech 1 35 3.401197 3.401197 515 +concept 1 32 3.465736 3.465736 537 +transform 1 32 3.465736 3.465736 542 +express 1 32 3.465736 3.465736 540 +specifi 1 30 3.555348 3.555348 568 +symbol 1 27 3.637586 3.637586 620 +primari 1 25 3.737670 3.737670 669 +motion 1 24 3.761200 3.761200 699 +equat 1 23 3.806662 3.806662 724 +emphasi 1 22 3.850148 3.850148 755 +thu 1 21 3.912023 3.912023 773 +applet 1 20 3.951244 3.951244 827 +geometr 1 19 4.007333 4.007333 852 +comparison 1 19 4.007333 4.007333 863 +senior 1 14 4.317488 4.317488 1120 +shape 1 12 4.465908 4.465908 1245 +sens 1 11 4.553877 4.553877 1305 +mesh 1 11 4.553877 4.553877 1351 +purdu 1 10 4.653960 4.653960 1466 +rhode 1 9 4.753590 4.753590 1579 +rais 1 8 4.875197 4.875197 1711 +canb 1 7 5.010635 5.010635 1846 +beta 1 6 5.164786 5.164786 1993 +compat 1 5 5.347108 5.347108 2485 +diagram 1 5 5.347108 5.347108 2346 +triangul 1 4 5.568345 5.568345 2903 +chew 1 3 5.857933 5.857933 3618 +delaunai 1 3 5.857933 5.857933 3619 +implicitli 1 3 5.857933 5.857933 3620 +voronoi 1 2 6.263398 6.263398 5036 +agenda 1 2 6.263398 6.263398 5037 +scientificsoftwar 1 2 6.263398 6.263398 5038 +acollect 1 2 6.263398 6.263398 5039 +associatephd 1 1 6.957497 6.957497 10156 +eduappletsy 1 1 6.957497 6.957497 10157 +asnetscap 1 1 6.957497 6.957497 10158 +avoronoi 1 1 6.957497 6.957497 10159 +onpract 1 1 6.957497 6.957497 10160 +includedplac 1 1 6.957497 6.957497 10161 +thataris 1 1 6.957497 6.957497 10162 +isspecifi 1 1 6.957497 6.957497 10163 +ofphys 1 1 6.957497 6.957497 10164 +techniquesar 1 1 6.957497 6.957497 10165 +effectiveprogram 1 1 6.957497 6.957497 10166 +myonlin 1 1 6.957497 6.957497 10167 +reportscornel 1 1 6.957497 6.957497 10168 +computerscienceth 1 1 6.957497 6.957497 10169 +simlabprojectaddress 1 1 6.957497 6.957497 10170 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..c922571a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +network 1 168 1.791759 1.791759 61 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +perform 1 143 1.945910 1.945910 74 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +world 1 115 2.197225 2.197225 126 +commun 1 95 2.397895 2.397895 157 +chang 1 82 2.484907 2.484907 163 +activ 1 84 2.484907 2.484907 182 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +server 1 76 2.564949 2.564949 204 +effici 1 73 2.639057 2.639057 233 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +guid 1 63 2.772589 2.772589 267 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +processor 1 54 2.944439 2.944439 335 +advisor 1 51 2.995732 2.995732 355 +cool 1 49 3.044522 3.044522 374 +multipl 1 39 3.258097 3.258097 453 +microsoft 1 38 3.295837 3.295837 468 +concurr 1 34 3.401197 3.401197 501 +toward 1 25 3.737670 3.737670 668 +sport 1 25 3.737670 3.737670 683 +latest 1 21 3.912023 3.912023 785 +runtim 1 19 4.007333 4.007333 858 +anyon 1 17 4.110874 4.110874 916 +latenc 1 16 4.174387 4.174387 993 +matlab 1 14 4.317488 4.317488 1081 +edui 1 13 4.382027 4.382027 1193 +eicken 1 13 4.382027 4.382027 1134 +composit 1 13 4.382027 4.382027 1150 +thedepart 1 11 4.553877 4.553877 1350 +scienceat 1 11 4.553877 4.553877 1375 +chao 1 8 4.875197 4.875197 1753 +soccer 1 8 4.875197 4.875197 1752 +risc 1 6 5.164786 5.164786 2016 +chess 1 5 5.347108 5.347108 2486 +andoper 1 3 5.857933 5.857933 3621 +messageslow 1 2 6.263398 6.263398 5040 +homepagelast 1 2 6.263398 6.263398 5001 +chichao 1 1 6.957497 6.957497 10171 +isthorsten 1 1 6.957497 6.957497 10172 +addressesand 1 1 6.957497 6.957497 10173 +overheterogen 1 1 6.957497 6.957497 10174 +tham 1 1 6.957497 6.957497 10175 +multimatlab 1 1 6.957497 6.957497 10176 +newsbraziliansocc 1 1 6.957497 6.957497 10177 +portugues 1 1 6.957497 6.957497 10178 +andhomepagesoliv 1 1 6.957497 6.957497 10179 +lubrasa 1 1 6.957497 6.957497 10180 +luso 1 1 6.957497 6.957497 10181 +brazilian 1 1 6.957497 6.957497 10182 +associationu 1 1 6.957497 6.957497 10183 +centerjorn 1 1 6.957497 6.957497 10184 +brasilmi 1 1 6.957497 6.957497 10185 +carstockmasterjayhawk 1 1 6.957497 6.957497 10186 +basketballwww 1 1 6.957497 6.957497 10187 +tennisserverback 1 1 6.957497 6.957497 10188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..b6c5df59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +avail 1 169 1.791759 1.791759 48 +construct 1 139 1.945910 1.945910 82 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +find 1 111 2.197225 2.197225 111 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +master 1 76 2.564949 2.564949 216 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +improv 1 62 2.772589 2.772589 289 +ithaca 1 65 2.772589 2.772589 294 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +programm 1 39 3.258097 3.258097 445 +small 1 39 3.258097 3.258097 447 +soon 1 36 3.367296 3.367296 494 +edulast 1 17 4.110874 4.110874 927 +countri 1 15 4.248495 4.248495 1059 +hopefulli 1 14 4.317488 4.317488 1071 +forth 1 13 4.382027 4.382027 1186 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +apolog 1 6 5.164786 5.164786 2046 +singapor 1 5 5.347108 5.347108 2487 +intelligencec 1 4 5.568345 5.568345 2673 +engineeringc 1 4 5.568345 5.568345 2904 +chee 1 3 5.857933 5.857933 3480 +tokyo 1 3 5.857933 5.857933 3622 +keong 1 1 6.957497 6.957497 10189 +liau 1 1 6.957497 6.957497 10190 +liauwelcom 1 1 6.957497 6.957497 10191 +networksc 1 1 6.957497 6.957497 10192 +systemsbaccalaur 1 1 6.957497 6.957497 10193 +japanhomei 1 1 6.957497 6.957497 10194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..799c314f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +tool 1 117 2.079442 2.079442 93 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +center 1 88 2.397895 2.397895 158 +imag 1 91 2.397895 2.397895 161 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +optim 1 79 2.564949 2.564949 197 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +understand 1 47 3.091042 3.091042 384 +http 1 41 3.218876 3.218876 420 +linear 1 41 3.218876 3.218876 431 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +scale 1 28 3.610918 3.610918 613 +constraint 1 26 3.688879 3.688879 636 +concern 1 25 3.737670 3.737670 666 +primari 1 25 3.737670 3.737670 669 +director 1 22 3.850148 3.850148 767 +thoma 1 18 4.060443 4.060443 901 +minim 1 18 4.060443 4.060443 887 +differenti 1 17 4.110874 4.110874 921 +former 1 17 4.110874 4.110874 956 +match 1 16 4.174387 4.174387 965 +nonlinear 1 14 4.317488 4.317488 1107 +affili 1 13 4.382027 4.382027 1194 +discret 1 13 4.382027 4.382027 1165 +equal 1 10 4.653960 4.653960 1424 +rhode 1 9 4.753590 4.753590 1579 +postdoc 1 8 4.875197 4.875197 1724 +strong 1 6 5.164786 5.164786 2029 +reconstruct 1 6 5.164786 5.164786 2170 +inequ 1 6 5.164786 5.164786 2113 +biomed 1 4 5.568345 5.568345 2905 +coleman 1 2 6.263398 6.263398 5041 +colemanthoma 1 1 6.957497 6.957497 10195 +colemancornel 1 1 6.957497 6.957497 10196 +universityi 1 1 6.957497 6.957497 10197 +professcp 1 1 6.957497 6.957497 10198 +ccop 1 1 6.957497 6.957497 10199 +broadfield 1 1 6.957497 6.957497 10200 +programmi 1 1 6.957497 6.957497 10201 +computationalmethod 1 1 6.957497 6.957497 10202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..093aae65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +resourc 1 81 2.484907 2.484907 172 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +name 1 72 2.639057 2.639057 220 +logic 1 71 2.639057 2.639057 230 +onlin 1 75 2.639057 2.639057 223 +practic 1 70 2.708050 2.708050 246 +view 1 70 2.708050 2.708050 254 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +type 1 61 2.833213 2.833213 296 +maintain 1 51 2.995732 2.995732 342 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +answer 1 45 3.135494 3.135494 391 +mark 1 44 3.135494 3.135494 403 +combin 1 42 3.218876 3.218876 421 +theoret 1 39 3.258097 3.258097 446 +map 1 39 3.258097 3.258097 452 +form 1 39 3.258097 3.258097 443 +origin 1 38 3.295837 3.295837 472 +close 1 38 3.295837 3.295837 465 +formal 1 37 3.332205 3.332205 478 +approxim 1 35 3.401197 3.401197 509 +committe 1 34 3.401197 3.401197 522 +eduoffic 1 33 3.433987 3.433987 531 +often 1 31 3.496508 3.496508 551 +semant 1 29 3.583519 3.583519 587 +mind 1 27 3.637586 3.637586 632 +consist 1 26 3.688879 3.688879 651 +primari 1 25 3.737670 3.737670 669 +strategi 1 25 3.737670 3.737670 682 +seri 1 24 3.761200 3.761200 708 +interpret 1 24 3.761200 3.761200 686 +greg 1 24 3.761200 3.761200 695 +emphasi 1 22 3.850148 3.850148 755 +programminglanguag 1 21 3.912023 3.912023 782 +martin 1 21 3.912023 3.912023 794 +love 1 21 3.912023 3.912023 804 +safeti 1 20 3.951244 3.951244 817 +grad 1 20 3.951244 3.951244 837 +particularli 1 19 4.007333 4.007333 867 +lower 1 18 4.060443 4.060443 886 +whole 1 17 4.110874 4.110874 940 +embed 1 14 4.317488 4.317488 1102 +command 1 14 4.317488 4.317488 1083 +translat 1 13 4.382027 4.382027 1164 +care 1 13 4.382027 4.382027 1177 +speak 1 12 4.465908 4.465908 1283 +calculu 1 12 4.465908 4.465908 1203 +israel 1 11 4.553877 4.553877 1366 +modular 1 10 4.653960 4.653960 1392 +relationship 1 10 4.653960 4.653960 1383 +correspond 1 10 4.653960 4.653960 1382 +guarante 1 10 4.653960 4.653960 1391 +nuprl 1 10 4.653960 4.653960 1402 +intermedi 1 9 4.753590 4.753590 1497 +andth 1 9 4.753590 4.753590 1481 +strength 1 9 4.753590 4.753590 1494 +formul 1 8 4.875197 4.875197 1733 +convers 1 8 4.875197 4.875197 1673 +paradigm 1 8 4.875197 4.875197 1662 +invari 1 8 4.875197 4.875197 1748 +leon 1 8 4.875197 4.875197 1631 +babylon 1 8 4.875197 4.875197 1731 +heart 1 8 4.875197 4.875197 1729 +pageth 1 7 5.010635 5.010635 1939 +hear 1 7 5.010635 5.010635 1940 +understood 1 5 5.347108 5.347108 2364 +stage 1 5 5.347108 5.347108 2488 +morrisett 1 5 5.347108 5.347108 2263 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +ofprogram 1 4 5.568345 5.568345 2624 +lord 1 4 5.568345 5.568345 2906 +soul 1 4 5.568345 5.568345 2907 +karl 1 3 5.857933 5.857933 3623 +constabl 1 3 5.857933 5.857933 3186 +jesu 1 3 5.857933 5.857933 3624 +pagekarl 1 2 6.263398 6.263398 5043 +halloffic 1 2 6.263398 6.263398 4583 +subtyp 1 2 6.263398 6.263398 4375 +atyp 1 2 6.263398 6.263398 5042 +tractabl 1 2 6.263398 6.263398 4799 +intract 1 2 6.263398 6.263398 5044 +anapproxim 1 2 6.263398 6.263398 5045 +unavail 1 2 6.263398 6.263398 5046 +thenuprl 1 2 6.263398 6.263398 5047 +hereat 1 2 6.263398 6.263398 5048 +papersoth 1 2 6.263398 6.263398 5049 +lurker 1 2 6.263398 6.263398 5050 +andwith 1 2 6.263398 6.263398 5051 +thesecond 1 2 6.263398 6.263398 4128 +pagedepart 1 2 6.263398 6.263398 5052 +crari 1 1 6.957497 6.957497 10204 +crarycrari 1 1 6.957497 6.957497 10205 +researchbroadli 1 1 6.957497 6.957497 10206 +implementationand 1 1 6.957497 6.957497 10207 +kmlwhich 1 1 6.957497 6.957497 10208 +richworld 1 1 6.957497 6.957497 10209 +newprogram 1 1 6.957497 6.957497 10210 +aminterest 1 1 6.957497 6.957497 10211 +deepen 1 1 6.957497 6.957497 10212 +mitig 1 1 6.957497 6.957497 10213 +calculi 1 1 6.957497 6.957497 10203 +modelallow 1 1 6.957497 6.957497 10214 +allowsth 1 1 6.957497 6.957497 10215 +andcorrect 1 1 6.957497 6.957497 10216 +additionaloptim 1 1 6.957497 6.957497 10217 +automatedreason 1 1 6.957497 6.957497 10218 +ofrobert 1 1 6.957497 6.957497 10219 +jasonhickei 1 1 6.957497 6.957497 10220 +linksmark 1 1 6.957497 6.957497 10221 +cansearch 1 1 6.957497 6.957497 10222 +biblestudi 1 1 6.957497 6.957497 10223 +thelord 1 1 6.957497 6.957497 10224 +neighbor 1 1 6.957497 6.957497 10225 +commandmentgreat 1 1 6.957497 6.957497 10226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..d561ae29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +phone 1 175 1.791759 1.791759 45 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +welcom 1 122 2.079442 2.079442 99 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +octob 1 89 2.397895 2.397895 156 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +institut 1 84 2.484907 2.484907 187 +solut 1 82 2.484907 2.484907 162 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +journal 1 83 2.484907 2.484907 183 +larg 1 82 2.484907 2.484907 168 +state 1 76 2.564949 2.564949 207 +optim 1 79 2.564949 2.564949 197 +decemb 1 80 2.564949 2.564949 215 +septemb 1 65 2.772589 2.772589 274 +ithaca 1 65 2.772589 2.772589 294 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +linear 1 41 3.218876 3.218876 431 +map 1 39 3.258097 3.258097 452 +close 1 38 3.295837 3.295837 465 +tree 1 36 3.367296 3.367296 492 +least 1 35 3.401197 3.401197 516 +posit 1 31 3.496508 3.496508 552 +packag 1 28 3.610918 3.610918 614 +multiprocessor 1 28 3.610918 3.610918 605 +scale 1 28 3.610918 3.610918 613 +bound 1 26 3.688879 3.688879 659 +deal 1 22 3.850148 3.850148 736 +siam 1 21 3.912023 3.912023 800 +definit 1 19 4.007333 4.007333 864 +matrix 1 17 4.110874 4.110874 933 +spars 1 16 4.174387 4.174387 989 +rank 1 14 4.317488 4.317488 1063 +squar 1 14 4.317488 4.317488 1082 +francisco 1 14 4.317488 4.317488 1095 +affili 1 13 4.382027 4.382027 1194 +philadelphia 1 12 4.465908 4.465908 1244 +matric 1 10 4.653960 4.653960 1399 +factor 1 9 4.753590 4.753590 1544 +conferenceon 1 9 4.753590 4.753590 1595 +simon 1 8 4.875197 4.875197 1697 +watson 1 8 4.875197 4.875197 1691 +univeristi 1 8 4.875197 4.875197 1754 +pennsylvania 1 7 5.010635 5.010635 1932 +cornellunivers 1 7 5.010635 5.010635 1916 +sixth 1 7 5.010635 5.010635 1917 +fifth 1 7 5.010635 5.010635 1931 +compact 1 7 5.010635 5.010635 1907 +dens 1 6 5.164786 5.164786 2122 +reed 1 6 5.164786 5.164786 2086 +row 1 5 5.347108 5.347108 2330 +seventh 1 5 5.347108 5.347108 2464 +thecornel 1 4 5.568345 5.568345 2892 +symmetr 1 4 5.568345 5.568345 2908 +orthogon 1 4 5.568345 5.568345 2832 +parallelprocess 1 3 5.857933 5.857933 3626 +ctctr 1 3 5.857933 5.857933 3625 +professorthoma 1 2 6.263398 6.263398 5053 +coleman 1 2 6.263398 6.263398 5041 +defici 1 2 6.263398 6.263398 5054 +idaho 1 2 6.263398 6.263398 5055 +ondistribut 1 2 6.263398 6.263398 4320 +solutionof 1 2 6.263398 6.263398 5056 +key 1 2 6.263398 6.263398 5057 +dongarra 1 2 6.263398 6.263398 5058 +kennedi 1 2 6.263398 6.263398 4539 +chunguang 1 1 6.957497 6.957497 10229 +sunchunguang 1 1 6.957497 6.957497 10232 +sunphd 1 1 6.957497 6.957497 10233 +ppcx 1 1 6.957497 6.957497 10234 +pssl 1 1 6.957497 6.957497 10235 +psspd 1 1 6.957497 6.957497 10236 +systemsrec 1 1 6.957497 6.957497 10237 +lecturesparallel 1 1 6.957497 6.957497 10238 +coeur 1 1 6.957497 6.957497 10239 +alen 1 1 6.957497 6.957497 10240 +multifront 1 1 6.957497 6.957497 10227 +bailei 1 1 6.957497 6.957497 10241 +bjorstad 1 1 6.957497 6.957497 10242 +gilbert 1 1 6.957497 6.957497 10243 +mascagni 1 1 6.957497 6.957497 10244 +schreiber 1 1 6.957497 6.957497 10245 +torczon 1 1 6.957497 6.957497 10246 +choleskyfactor 1 1 6.957497 6.957497 10247 +pothen 1 1 6.957497 6.957497 10228 +matriceson 1 1 6.957497 6.957497 10248 +processingfor 1 1 6.957497 6.957497 10230 +sinovec 1 1 6.957497 6.957497 10249 +leuz 1 1 6.957497 6.957497 10250 +petzold 1 1 6.957497 6.957497 10251 +cliqu 1 1 6.957497 6.957497 10231 +messina 1 1 6.957497 6.957497 10252 +sorensen 1 1 6.957497 6.957497 10253 +voigt 1 1 6.957497 6.957497 10254 +structuresin 1 1 6.957497 6.957497 10255 +csun 1 1 6.957497 6.957497 10256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..6b9358a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +hall 1 146 1.945910 1.945910 65 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +user 1 104 2.302585 2.302585 137 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +sinc 1 90 2.397895 2.397895 159 +proceed 1 93 2.397895 2.397895 152 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +ieee 1 86 2.484907 2.484907 190 +exampl 1 77 2.564949 2.564949 195 +server 1 76 2.564949 2.564949 204 +messag 1 76 2.564949 2.564949 212 +david 1 71 2.639057 2.639057 232 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +servic 1 72 2.639057 2.639057 236 +addit 1 74 2.639057 2.639057 228 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +would 1 67 2.708050 2.708050 251 +receiv 1 66 2.708050 2.708050 244 +august 1 66 2.708050 2.708050 257 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +type 1 61 2.833213 2.833213 296 +locat 1 59 2.833213 2.833213 303 +thesi 1 57 2.890372 2.890372 327 +allow 1 53 2.944439 2.944439 333 +februari 1 54 2.944439 2.944439 328 +maintain 1 51 2.995732 2.995732 342 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +keep 1 44 3.135494 3.135494 409 +made 1 44 3.135494 3.135494 398 +protocol 1 45 3.135494 3.135494 407 +might 1 41 3.218876 3.218876 426 +howev 1 41 3.218876 3.218876 422 +join 1 39 3.258097 3.258097 457 +origin 1 38 3.295837 3.295837 472 +respons 1 37 3.332205 3.332205 476 +toler 1 33 3.433987 3.433987 533 +within 1 33 3.433987 3.433987 525 +fault 1 32 3.465736 3.465736 547 +dissert 1 32 3.465736 3.465736 549 +secur 1 30 3.555348 3.555348 577 +semant 1 29 3.583519 3.583519 587 +limit 1 29 3.583519 3.583519 585 +propos 1 28 3.610918 3.610918 602 +static 1 27 3.637586 3.637586 619 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +client 1 25 3.737670 3.737670 679 +mike 1 24 3.761200 3.761200 703 +higher 1 24 3.761200 3.761200 690 +wish 1 24 3.761200 3.761200 692 +mobil 1 23 3.806662 3.806662 730 +cooper 1 22 3.850148 3.850148 757 +scheme 1 20 3.951244 3.951244 818 +accept 1 18 4.060443 4.060443 879 +along 1 18 4.060443 4.060443 878 +layer 1 17 4.110874 4.110874 926 +permit 1 16 4.174387 4.174387 962 +devic 1 16 4.174387 4.174387 1002 +overhead 1 15 4.248495 4.248495 1035 +horu 1 14 4.317488 4.317488 1116 +command 1 14 4.317488 4.317488 1083 +necessari 1 13 4.382027 4.382027 1147 +whose 1 13 4.382027 4.382027 1166 +carri 1 13 4.382027 4.382027 1152 +kenneth 1 12 4.465908 4.465908 1265 +arbitrari 1 11 4.553877 4.553877 1359 +ofcomput 1 10 4.653960 4.653960 1442 +assumpt 1 9 4.753590 4.753590 1514 +trust 1 9 4.753590 4.753590 1583 +desir 1 9 4.753590 4.753590 1542 +screen 1 9 4.753590 4.753590 1577 +informationabout 1 9 4.753590 4.753590 1515 +birman 1 9 4.753590 4.753590 1531 +crash 1 8 4.875197 4.875197 1616 +accomplish 1 8 4.875197 4.875197 1755 +synchroni 1 7 5.010635 5.010635 1923 +fromth 1 7 5.010635 5.010635 1802 +slightli 1 7 5.010635 5.010635 1795 +encrypt 1 7 5.010635 5.010635 1835 +prevent 1 7 5.010635 5.010635 1827 +privaci 1 6 5.164786 5.164786 2144 +extern 1 6 5.164786 5.164786 2105 +authent 1 5 5.347108 5.347108 2306 +trivial 1 4 5.568345 5.568345 2786 +witha 1 4 5.568345 5.568345 2617 +complic 1 4 5.568345 5.568345 2902 +ofinform 1 4 5.568345 5.568345 2707 +reveal 1 4 5.568345 5.568345 2647 +wireless 1 4 5.568345 5.568345 2693 +anthoni 1 4 5.568345 5.568345 2792 +reiter 1 3 5.857933 5.857933 3461 +privat 1 3 5.857933 5.857933 3496 +proper 1 3 5.857933 5.857933 3323 +recipi 1 3 5.857933 5.857933 3627 +owner 1 3 5.857933 5.857933 3531 +attack 1 3 5.857933 5.857933 3168 +preserv 1 3 5.857933 5.857933 3628 +mobilecomput 1 3 5.857933 5.857933 3629 +postdoctor 1 2 6.263398 6.263398 5059 +honest 1 2 6.263398 6.263398 5060 +ofvirtu 1 2 6.263398 6.263398 5061 +untrust 1 2 6.263398 6.263398 4997 +communicatewith 1 2 6.263398 6.263398 5062 +unlik 1 2 6.263398 6.263398 5063 +sender 1 2 6.263398 6.263398 5064 +adversari 1 2 6.263398 6.263398 5065 +ofmobil 1 2 6.263398 6.263398 5011 +securityand 1 2 6.263398 6.263398 5066 +dcooper 1 1 6.957497 6.957497 10258 +securityarchitectur 1 1 6.957497 6.957497 10259 +horuswhich 1 1 6.957497 6.957497 10260 +kerberosnetwork 1 1 6.957497 6.957497 10261 +cryptograph 1 1 6.957497 6.957497 10262 +toprovid 1 1 6.957497 6.957497 10263 +originalimplement 1 1 6.957497 6.957497 10264 +failuremodel 1 1 6.957497 6.957497 10265 +anyprocess 1 1 6.957497 6.957497 10266 +isposs 1 1 6.957497 6.957497 10267 +weaker 1 1 6.957497 6.957497 10268 +untrustedprocess 1 1 6.957497 6.957497 10269 +clientsto 1 1 6.957497 6.957497 10270 +horussecur 1 1 6.957497 6.957497 10271 +relationshipsamong 1 1 6.957497 6.957497 10257 +keymanag 1 1 6.957497 6.957497 10272 +impersonateanoth 1 1 6.957497 6.957497 10273 +achieveth 1 1 6.957497 6.957497 10274 +asclient 1 1 6.957497 6.957497 10275 +inherentin 1 1 6.957497 6.957497 10276 +contentsof 1 1 6.957497 6.957497 10277 +hiddenwith 1 1 6.957497 6.957497 10278 +outsidersfrom 1 1 6.957497 6.957497 10279 +maintainingth 1 1 6.957497 6.957497 10280 +unlink 1 1 6.957497 6.957497 10281 +chaum 1 1 6.957497 6.957497 10282 +severaloth 1 1 6.957497 6.957497 10283 +staticnetwork 1 1 6.957497 6.957497 10284 +mobilecommun 1 1 6.957497 6.957497 10285 +themessag 1 1 6.957497 6.957497 10286 +advisorken 1 1 6.957497 6.957497 10287 +internaland 1 1 6.957497 6.957497 10288 +apriv 1 1 6.957497 6.957497 10289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..c1e36682 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +project 1 340 1.098612 1.098612 18 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +resum 1 79 2.564949 2.564949 217 +would 1 67 2.708050 2.708050 251 +type 1 61 2.833213 2.833213 296 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +hand 1 37 3.332205 3.332205 475 +field 1 37 3.332205 3.332205 482 +game 1 36 3.367296 3.367296 498 +word 1 34 3.401197 3.401197 508 +express 1 32 3.465736 3.465736 540 +fault 1 32 3.465736 3.465736 547 +human 1 32 3.465736 3.465736 546 +scientist 1 31 3.496508 3.496508 560 +travel 1 30 3.555348 3.555348 579 +releas 1 28 3.610918 3.610918 616 +hope 1 28 3.610918 3.610918 610 +mine 1 26 3.688879 3.688879 654 +never 1 25 3.737670 3.737670 671 +reach 1 24 3.761200 3.761200 688 +instead 1 22 3.850148 3.850148 756 +born 1 21 3.912023 3.912023 798 +love 1 21 3.912023 3.912023 804 +ever 1 19 4.007333 4.007333 872 +brief 1 16 4.174387 4.174387 1001 +role 1 14 4.317488 4.317488 1101 +sai 1 13 4.382027 4.382027 1175 +pretti 1 13 4.382027 4.382027 1191 +cannot 1 13 4.382027 4.382027 1144 +rest 1 12 4.465908 4.465908 1259 +surf 1 11 4.553877 4.553877 1301 +shore 1 11 4.553877 4.553877 1377 +night 1 11 4.553877 4.553877 1319 +road 1 11 4.553877 4.553877 1374 +true 1 10 4.653960 4.653960 1422 +poetri 1 9 4.753590 4.753590 1596 +told 1 8 4.875197 4.875197 1658 +heart 1 8 4.875197 4.875197 1729 +hold 1 8 4.875197 4.875197 1645 +prize 1 6 5.164786 5.164786 2150 +gentl 1 5 5.347108 5.347108 2264 +facial 1 5 5.347108 5.347108 2438 +unknown 1 5 5.347108 5.347108 2318 +favor 1 5 5.347108 5.347108 2414 +suffer 1 5 5.347108 5.347108 2268 +cyber 1 4 5.568345 5.568345 2909 +lawyer 1 4 5.568345 5.568345 2836 +uncertain 1 4 5.568345 5.568345 2758 +fals 1 4 5.568345 5.568345 2861 +dark 1 4 5.568345 5.568345 2910 +soul 1 4 5.568345 5.568345 2907 +fear 1 4 5.568345 5.568345 2911 +faith 1 3 5.857933 5.857933 3363 +wise 1 3 5.857933 5.857933 3631 +dread 1 3 5.857933 5.857933 3630 +romanc 1 3 5.857933 5.857933 3632 +passion 1 3 5.857933 5.857933 3633 +tortur 1 3 5.857933 5.857933 3634 +diseas 1 3 5.857933 5.857933 3635 +pain 1 3 5.857933 5.857933 3460 +blame 1 3 5.857933 5.857933 3636 +cold 1 3 5.857933 5.857933 3637 +blink 1 2 6.263398 6.263398 5067 +ey 1 2 6.263398 6.263398 5068 +kei 1 2 6.263398 6.263398 4812 +mice 1 2 6.263398 6.263398 5069 +autobiographi 1 2 6.263398 6.263398 5070 +concret 1 2 6.263398 6.263398 4276 +ear 1 2 6.263398 6.263398 5071 +burn 1 2 6.263398 6.263398 4447 +soft 1 2 6.263398 6.263398 5072 +belov 1 2 6.263398 6.263398 5073 +broken 1 2 6.263398 6.263398 5074 +horror 1 2 6.263398 6.263398 5075 +tear 1 2 6.263398 6.263398 5076 +deed 1 2 6.263398 6.263398 5077 +frozen 1 2 6.263398 6.263398 5078 +abodedan 1 1 6.957497 6.957497 10292 +abodegreet 1 1 6.957497 6.957497 10293 +humbl 1 1 6.957497 6.957497 10294 +prithe 1 1 6.957497 6.957497 10295 +teari 1 1 6.957497 6.957497 10296 +weari 1 1 6.957497 6.957497 10297 +thyselv 1 1 6.957497 6.957497 10298 +abod 1 1 6.957497 6.957497 10299 +deidr 1 1 6.957497 6.957497 10290 +emot 1 1 6.957497 6.957497 10300 +simnet 1 1 6.957497 6.957497 10301 +builder 1 1 6.957497 6.957497 10302 +faiththei 1 1 6.957497 6.957497 10303 +hardli 1 1 6.957497 6.957497 10304 +ferro 1 1 6.957497 6.957497 10305 +scorn 1 1 6.957497 6.957497 10306 +bend 1 1 6.957497 6.957497 10307 +tone 1 1 6.957497 6.957497 10308 +unseen 1 1 6.957497 6.957497 10309 +unheard 1 1 6.957497 6.957497 10310 +untouch 1 1 6.957497 6.957497 10311 +silenc 1 1 6.957497 6.957497 10312 +yearn 1 1 6.957497 6.957497 10313 +lordlovewarm 1 1 6.957497 6.957497 10314 +friendship 1 1 6.957497 6.957497 10315 +mindless 1 1 6.957497 6.957497 10316 +infatu 1 1 6.957497 6.957497 10317 +sensual 1 1 6.957497 6.957497 10318 +sigh 1 1 6.957497 6.957497 10319 +hopemyth 1 1 6.957497 6.957497 10320 +beauteou 1 1 6.957497 6.957497 10321 +pandora 1 1 6.957497 6.957497 10291 +demon 1 1 6.957497 6.957497 10322 +astrai 1 1 6.957497 6.957497 10323 +glimmer 1 1 6.957497 6.957497 10324 +tread 1 1 6.957497 6.957497 10325 +amidst 1 1 6.957497 6.957497 10326 +thorn 1 1 6.957497 6.957497 10327 +filthi 1 1 6.957497 6.957497 10328 +miseri 1 1 6.957497 6.957497 10329 +etern 1 1 6.957497 6.957497 10330 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..5dd13200 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +select 1 91 2.397895 2.397895 154 +imag 1 91 2.397895 2.397895 161 +search 1 95 2.397895 2.397895 155 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +server 1 76 2.564949 2.564949 204 +refer 1 78 2.564949 2.564949 203 +collect 1 65 2.772589 2.772589 268 +ithaca 1 65 2.772589 2.772589 294 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +digit 1 52 2.995732 2.995732 348 +understand 1 47 3.091042 3.091042 384 +directori 1 45 3.135494 3.135494 396 +music 1 42 3.218876 3.218876 436 +global 1 34 3.401197 3.401197 520 +secur 1 30 3.555348 3.555348 577 +weather 1 28 3.610918 3.610918 618 +berkelei 1 26 3.688879 3.688879 657 +recognit 1 23 3.806662 3.806662 723 +togeth 1 23 3.806662 3.806662 714 +head 1 23 3.806662 3.806662 732 +siam 1 21 3.912023 3.912023 800 +navig 1 21 3.912023 3.912023 796 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +stanford 1 17 4.110874 4.110874 955 +whole 1 17 4.110874 4.110874 940 +charact 1 15 4.248495 4.248495 1028 +dean 1 14 4.317488 4.317488 1104 +anonym 1 14 4.317488 4.317488 1100 +audio 1 14 4.317488 4.317488 1094 +captur 1 12 4.465908 4.465908 1232 +michigan 1 11 4.553877 4.553877 1368 +earth 1 10 4.653960 4.653960 1463 +catalog 1 10 4.653960 4.653960 1431 +folk 1 9 4.753590 4.753590 1597 +illinoi 1 7 5.010635 5.010635 1941 +gatewai 1 7 5.010635 5.010635 1942 +scout 1 7 5.010635 5.010635 1903 +legal 1 6 5.164786 5.164786 2094 +gopher 1 6 5.164786 5.164786 1982 +forecast 1 6 5.164786 5.164786 2171 +elsewher 1 5 5.347108 5.347108 2444 +cuinfo 1 4 5.568345 5.568345 2626 +planet 1 4 5.568345 5.568345 2912 +gear 1 4 5.568345 5.568345 2891 +krafft 1 3 5.857933 5.857933 3638 +archi 1 3 5.857933 5.857933 3639 +cern 1 2 6.263398 6.263398 5079 +urlsdean 1 1 6.957497 6.957497 10331 +interestcornel 1 1 6.957497 6.957497 10332 +dimund 1 1 6.957497 6.957497 10333 +librarysearch 1 1 6.957497 6.957497 10334 +veronica 1 1 6.957497 6.957497 10335 +faqsvari 1 1 6.957497 6.957497 10336 +folkbook 1 1 6.957497 6.957497 10337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..fd420c12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +select 1 91 2.397895 2.397895 154 +build 1 85 2.484907 2.484907 184 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +issu 1 78 2.564949 2.564949 211 +upson 1 71 2.639057 2.639057 218 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +materi 1 75 2.639057 2.639057 221 +investig 1 51 2.995732 2.995732 353 +protocol 1 45 3.135494 3.135494 407 +secur 1 30 3.555348 3.555348 577 +intend 1 28 3.610918 3.610918 599 +administr 1 27 3.637586 3.637586 628 +universityithaca 1 24 3.761200 3.761200 710 +initi 1 23 3.806662 3.806662 717 +director 1 22 3.850148 3.850148 767 +sciencecornel 1 22 3.850148 3.850148 768 +serv 1 22 3.850148 3.850148 758 +inth 1 22 3.850148 3.850148 741 +fund 1 21 3.912023 3.912023 805 +similar 1 21 3.912023 3.912023 771 +facil 1 20 3.951244 3.951244 814 +break 1 20 3.951244 3.951244 812 +five 1 19 4.007333 4.007333 841 +side 1 15 4.248495 4.248495 1022 +carl 1 15 4.248495 4.248495 1024 +dean 1 14 4.317488 4.317488 1104 +emploi 1 12 4.465908 4.465908 1284 +arpa 1 11 4.553877 4.553877 1369 +eight 1 11 4.553877 4.553877 1331 +princip 1 10 4.653960 4.653960 1397 +consortium 1 10 4.653960 4.653960 1467 +rapid 1 10 4.653960 4.653960 1453 +researchi 1 8 4.875197 4.875197 1756 +xerox 1 8 4.875197 4.875197 1725 +davi 1 7 5.010635 5.010635 1888 +sciencedepart 1 6 5.164786 5.164786 2172 +interestedin 1 5 5.347108 5.347108 2260 +employe 1 4 5.568345 5.568345 2717 +krafft 1 3 5.857933 5.857933 3638 +halldepart 1 3 5.857933 5.857933 3641 +dienst 1 3 5.857933 5.857933 3640 +dissemin 1 2 6.263398 6.263398 5080 +thedesign 1 2 6.263398 6.263398 4251 +lagoz 1 2 6.263398 6.263398 5081 +facilitiesaddress 1 1 6.957497 6.957497 10338 +guis 1 1 6.957497 6.957497 10339 +anadministr 1 1 6.957497 6.957497 10340 +andworri 1 1 6.957497 6.957497 10341 +spart 1 1 6.957497 6.957497 10342 +thecorpor 1 1 6.957497 6.957497 10343 +cnri 1 1 6.957497 6.957497 10344 +technicalresearch 1 1 6.957497 6.957497 10345 +theexist 1 1 6.957497 6.957497 10346 +disseminationov 1 1 6.957497 6.957497 10347 +atechn 1 1 6.957497 6.957497 10348 +ondienst 1 1 6.957497 6.957497 10349 +togethera 1 1 6.957497 6.957497 10350 +url 1 1 6.957497 6.957497 10351 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..7bb26f02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +analysi 1 124 2.079442 2.079442 98 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +good 1 77 2.564949 2.564949 200 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +numer 1 49 3.044522 3.044522 369 +advis 1 6 5.164786 5.164786 2173 +divakar 1 1 6.957497 6.957497 10352 +pagedivakar 1 1 6.957497 6.957497 10353 +viswanathdivakar 1 1 6.957497 6.957497 10354 +isnumer 1 1 6.957497 6.957497 10355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..9838d1fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +advanc 1 99 2.302585 2.302585 130 +institut 1 84 2.484907 2.484907 187 +know 1 80 2.564949 2.564949 198 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +practic 1 70 2.708050 2.708050 246 +receiv 1 66 2.708050 2.708050 244 +virtual 1 62 2.772589 2.772589 285 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +locat 1 59 2.833213 2.833213 303 +friend 1 48 3.044522 3.044522 376 +physic 1 47 3.091042 3.091042 377 +york 1 41 3.218876 3.218876 435 +littl 1 39 3.258097 3.258097 454 +live 1 40 3.258097 3.258097 451 +china 1 37 3.332205 3.332205 487 +winter 1 36 3.367296 3.367296 500 +except 1 28 3.610918 3.610918 607 +campu 1 27 3.637586 3.637586 623 +beij 1 19 4.007333 4.007333 876 +miss 1 19 4.007333 4.007333 866 +beauti 1 18 4.060443 4.060443 912 +miller 1 17 4.110874 4.110874 949 +practicum 1 16 4.174387 4.174387 960 +mayb 1 15 4.248495 4.248495 1014 +anywai 1 15 4.248495 4.248495 1047 +translat 1 13 4.382027 4.382027 1164 +central 1 13 4.382027 4.382027 1160 +tsinghua 1 13 4.382027 4.382027 1195 +realiti 1 12 4.465908 4.465908 1272 +jersei 1 9 4.753590 4.753590 1587 +brought 1 7 5.010635 5.010635 1925 +railroad 1 6 5.164786 5.164786 2161 +coursesc 1 4 5.568345 5.568345 2692 +engineeringc 1 4 5.568345 5.568345 2904 +doubt 1 3 5.857933 5.857933 3119 +gorgeou 1 2 6.263398 6.263398 5082 +newark 1 2 6.263398 6.263398 5032 +diyu 1 1 6.957497 6.957497 10356 +pagediyu 1 1 6.957497 6.957497 10357 +daisi 1 1 6.957497 6.957497 10358 +translatorsfal 1 1 6.957497 6.957497 10359 +systemc 1 1 6.957497 6.957497 10360 +systemsel 1 1 6.957497 6.957497 10361 +telecommunicationsm 1 1 6.957497 6.957497 10362 +projectorigin 1 1 6.957497 6.957497 10363 +projectsinc 1 1 6.957497 6.957497 10364 +unviers 1 1 6.957497 6.957497 10365 +linksjava 1 1 6.957497 6.957497 10366 +tkfavorit 1 1 6.957497 6.957497 10367 +sitestimecnnlondon 1 1 6.957497 6.957497 10368 +timeswashington 1 1 6.957497 6.957497 10369 +postchines 1 1 6.957497 6.957497 10370 +digestchina 1 1 6.957497 6.957497 10371 +digestfeng 1 1 6.957497 6.957497 10372 +yuanxin 1 1 6.957497 6.957497 10373 +siart 1 1 6.957497 6.957497 10374 +chinaloc 1 1 6.957497 6.957497 10375 +connectionsctc 1 1 6.957497 6.957497 10376 +sunlabweathermovi 1 1 6.957497 6.957497 10377 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..90957caf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +teach 1 108 2.197225 2.197225 112 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +wide 1 84 2.484907 2.484907 185 +educ 1 86 2.484907 2.484907 191 +activ 1 84 2.484907 2.484907 182 +start 1 83 2.484907 2.484907 173 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +main 1 67 2.708050 2.708050 256 +view 1 70 2.708050 2.708050 254 +evalu 1 64 2.772589 2.772589 266 +type 1 61 2.833213 2.833213 296 +share 1 59 2.833213 2.833213 304 +index 1 56 2.890372 2.890372 309 +profession 1 51 2.995732 2.995732 345 +investig 1 51 2.995732 2.995732 353 +visual 1 48 3.044522 3.044522 372 +format 1 48 3.044522 3.044522 356 +without 1 50 3.044522 3.044522 370 +cool 1 49 3.044522 3.044522 374 +electron 1 47 3.091042 3.091042 379 +video 1 44 3.135494 3.135494 405 +favorit 1 44 3.135494 3.135494 410 +offer 1 43 3.178054 3.178054 414 +fast 1 42 3.218876 3.218876 429 +vision 1 41 3.218876 3.218876 430 +theoret 1 39 3.258097 3.258097 446 +author 1 39 3.258097 3.258097 450 +small 1 39 3.258097 3.258097 447 +brian 1 38 3.295837 3.295837 466 +approxim 1 35 3.401197 3.401197 509 +collabor 1 32 3.465736 3.465736 543 +rang 1 30 3.555348 3.555348 565 +chair 1 29 3.583519 3.583519 596 +held 1 28 3.610918 3.610918 600 +compar 1 26 3.688879 3.688879 648 +sport 1 25 3.737670 3.737670 683 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +highli 1 23 3.806662 3.806662 725 +geometri 1 22 3.850148 3.850148 752 +smith 1 20 3.951244 3.951244 820 +geometr 1 19 4.007333 4.007333 852 +monitor 1 17 4.110874 4.110874 941 +match 1 16 4.174387 4.174387 965 +remot 1 15 4.248495 4.248495 1041 +track 1 15 4.248495 4.248495 1029 +matlab 1 14 4.317488 4.317488 1081 +daniel 1 12 4.465908 4.465908 1233 +target 1 12 4.465908 4.465908 1282 +extrem 1 11 4.553877 4.553877 1330 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +juan 1 9 4.753590 4.753590 1580 +xerox 1 8 4.875197 4.875197 1725 +compact 1 7 5.010635 5.010635 1907 +huttenloch 1 6 5.164786 5.164786 1983 +fraction 1 5 5.347108 5.347108 2259 +conot 1 5 5.347108 5.347108 2245 +stupid 1 5 5.347108 5.347108 2489 +hausdorff 1 4 5.568345 5.568345 2633 +identif 1 4 5.568345 5.568345 2773 +cvpr 1 4 5.568345 5.568345 2761 +geek 1 2 6.263398 6.263398 5083 +snowboard 1 2 6.263398 6.263398 5084 +professordph 1 1 6.957497 6.957497 10378 +eigenspac 1 1 6.957497 6.957497 10379 +digipap 1 1 6.957497 6.957497 10380 +viewabl 1 1 6.957497 6.957497 10381 +parc 1 1 6.957497 6.957497 10382 +attitud 1 1 6.957497 6.957497 10383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..ecd08ee8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +mani 1 92 2.397895 2.397895 150 +present 1 91 2.397895 2.397895 145 +wide 1 84 2.484907 2.484907 185 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +june 1 79 2.564949 2.564949 214 +addit 1 74 2.639057 2.639057 228 +order 1 69 2.708050 2.708050 249 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +investig 1 51 2.995732 2.995732 353 +basic 1 50 3.044522 3.044522 360 +understand 1 47 3.091042 3.091042 384 +algebra 1 45 3.135494 3.135494 394 +better 1 45 3.135494 3.135494 401 +form 1 39 3.258097 3.258097 443 +theoret 1 39 3.258097 3.258097 446 +prototyp 1 38 3.295837 3.295837 463 +concurr 1 34 3.401197 3.401197 501 +express 1 32 3.465736 3.465736 540 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +computersci 1 30 3.555348 3.555348 562 +semant 1 29 3.583519 3.583519 587 +becom 1 28 3.610918 3.610918 603 +full 1 28 3.610918 3.610918 615 +effort 1 26 3.688879 3.688879 652 +compar 1 26 3.688879 3.688879 648 +input 1 23 3.806662 3.806662 727 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +former 1 17 4.110874 4.110874 956 +edui 1 13 4.382027 4.382027 1193 +calculu 1 12 4.465908 4.465908 1203 +verifi 1 12 4.465908 4.465908 1261 +custom 1 10 4.653960 4.653960 1414 +latter 1 9 4.753590 4.753590 1522 +bloom 1 4 5.568345 5.568345 2913 +commonli 1 4 5.568345 5.568345 2877 +metatheori 1 3 5.857933 5.857933 3642 +allevi 1 3 5.857933 5.857933 3643 +checker 1 3 5.857933 5.857933 3644 +theproblem 1 2 6.263398 6.263398 4560 +inher 1 2 6.263398 6.263398 5086 +lnc 1 2 6.263398 6.263398 5085 +ashvin 1 1 6.957497 6.957497 10385 +dsouza 1 1 6.957497 6.957497 10384 +bard 1 1 6.957497 6.957497 10386 +oftool 1 1 6.957497 6.957497 10387 +andverif 1 1 6.957497 6.957497 10388 +withrespect 1 1 6.957497 6.957497 10389 +immediatelyavail 1 1 6.957497 6.957497 10390 +duplic 1 1 6.957497 6.957497 10391 +gso 1 1 6.957497 6.957497 10392 +loto 1 1 6.957497 6.957497 10393 +exploringappl 1 1 6.957497 6.957497 10394 +bdd 1 1 6.957497 6.957497 10395 +algebraterm 1 1 6.957497 6.957497 10396 +postscipt 1 1 6.957497 6.957497 10397 +lite 1 1 6.957497 6.957497 10398 +presentedth 1 1 6.957497 6.957497 10399 +compass 1 1 6.957497 6.957497 10400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..1447ae4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +sinc 1 90 2.397895 2.397895 159 +stuff 1 87 2.484907 2.484907 171 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +improv 1 62 2.772589 2.772589 289 +dept 1 64 2.772589 2.772589 291 +prof 1 64 2.772589 2.772589 273 +written 1 63 2.772589 2.772589 278 +visit 1 63 2.772589 2.772589 288 +might 1 41 3.218876 3.218876 426 +origin 1 38 3.295837 3.295837 472 +art 1 29 3.583519 3.583519 593 +alwai 1 24 3.761200 3.761200 691 +fine 1 20 3.951244 3.951244 822 +ever 1 19 4.007333 4.007333 872 +warn 1 14 4.317488 4.317488 1068 +minor 1 12 4.465908 4.465908 1237 +undergrad 1 9 4.753590 4.753590 1589 +risk 1 8 4.875197 4.875197 1689 +heavi 1 7 5.010635 5.010635 1841 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +paint 1 5 5.347108 5.347108 2400 +turkei 1 4 5.568345 5.568345 2914 +funda 1 3 5.857933 5.857933 3645 +ergun 1 2 6.263398 6.263398 5087 +angri 1 2 6.263398 6.263398 5088 +dog 1 2 6.263398 6.263398 5089 +pagefunda 1 1 6.957497 6.957497 10401 +ergn 1 1 6.957497 6.957497 10402 +eduhi 1 1 6.957497 6.957497 10403 +studentin 1 1 6.957497 6.957497 10404 +programcheck 1 1 6.957497 6.957497 10405 +researchpag 1 1 6.957497 6.957497 10406 +izmir 1 1 6.957497 6.957497 10407 +bilkentunivers 1 1 6.957497 6.957497 10408 +ankara 1 1 6.957497 6.957497 10409 +encounterpag 1 1 6.957497 6.957497 10410 +turkish 1 1 6.957497 6.957497 10411 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..5270f356 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +phone 1 175 1.791759 1.791759 45 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +high 1 130 2.079442 2.079442 101 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +activ 1 84 2.484907 2.484907 182 +school 1 84 2.484907 2.484907 188 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +upson 1 71 2.639057 2.639057 218 +symposium 1 72 2.639057 2.639057 238 +practic 1 70 2.708050 2.708050 246 +complex 1 64 2.772589 2.772589 269 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +foundat 1 62 2.772589 2.772589 286 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +summer 1 56 2.890372 2.890372 311 +cover 1 55 2.944439 2.944439 329 +particular 1 51 2.995732 2.995732 352 +linear 1 41 3.218876 3.218876 431 +fast 1 42 3.218876 3.218876 429 +annual 1 40 3.258097 3.258097 458 +industri 1 38 3.295837 3.295837 464 +approxim 1 35 3.401197 3.401197 509 +concurr 1 34 3.401197 3.401197 501 +survei 1 35 3.401197 3.401197 513 +bibliographi 1 34 3.401197 3.401197 518 +graph 1 30 3.555348 3.555348 576 +computersci 1 30 3.555348 3.555348 562 +bound 1 26 3.688879 3.688879 659 +proc 1 26 3.688879 3.688879 649 +aspect 1 25 3.737670 3.737670 663 +universityithaca 1 24 3.761200 3.761200 710 +flow 1 24 3.761200 3.761200 700 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +siam 1 21 3.912023 3.912023 800 +unit 1 21 3.912023 3.912023 779 +rout 1 21 3.912023 3.912023 793 +path 1 21 3.912023 3.912023 778 +vlsi 1 21 3.912023 3.912023 795 +theorem 1 21 3.912023 3.912023 786 +annot 1 21 3.912023 3.912023 775 +separ 1 19 4.007333 4.007333 844 +north 1 19 4.007333 4.007333 873 +sept 1 17 4.110874 4.110874 952 +spars 1 16 4.174387 4.174387 989 +polynomi 1 14 4.317488 4.317488 1069 +embed 1 14 4.317488 4.317488 1102 +discret 1 13 4.382027 4.382027 1165 +speak 1 12 4.465908 4.465908 1283 +arbitrari 1 11 4.553877 4.553877 1359 +itali 1 11 4.553877 4.553877 1378 +packet 1 10 4.653960 4.653960 1415 +strongli 1 10 4.653960 4.653960 1406 +preliminari 1 9 4.753590 4.753590 1480 +congress 1 9 4.753590 4.753590 1592 +hallcornel 1 8 4.875197 4.875197 1757 +combinatori 1 8 4.875197 4.875197 1629 +integ 1 8 4.875197 4.875197 1688 +capac 1 8 4.875197 4.875197 1740 +switch 1 8 4.875197 4.875197 1718 +daughter 1 7 5.010635 5.010635 1943 +maxim 1 7 5.010635 5.010635 1944 +rebecca 1 6 5.164786 5.164786 2174 +dens 1 6 5.164786 5.164786 2122 +inequ 1 6 5.164786 5.164786 2113 +handbook 1 6 5.164786 5.164786 2061 +oncomput 1 5 5.347108 5.347108 2326 +stoc 1 5 5.347108 5.347108 2491 +fraction 1 5 5.347108 5.347108 2259 +proceedingsof 1 5 5.347108 5.347108 2331 +holland 1 5 5.347108 5.347108 2490 +cut 1 4 5.568345 5.568345 2620 +disjoint 1 4 5.568345 5.568345 2709 +combinator 1 4 5.568345 5.568345 2915 +graham 1 4 5.568345 5.568345 2817 +thegener 1 3 5.857933 5.857933 3648 +combinatorica 1 3 5.857933 5.857933 3649 +stein 1 3 5.857933 5.857933 3646 +pack 1 3 5.857933 5.857933 3597 +planar 1 3 5.857933 5.857933 3647 +violat 1 3 5.857933 5.857933 3211 +tokyo 1 3 5.857933 5.857933 3622 +netherland 1 3 5.857933 5.857933 3650 +tardo 1 2 6.263398 6.263398 5090 +julia 1 2 6.263398 6.263398 5094 +broadli 1 2 6.263398 6.263398 5095 +programmingproblem 1 2 6.263398 6.263398 4082 +multicommod 1 2 6.263398 6.263398 4761 +appearedin 1 2 6.263398 6.263398 5096 +leighton 1 2 6.263398 6.263398 5097 +inmathemat 1 2 6.263398 6.263398 5098 +hasappear 1 2 6.263398 6.263398 5099 +goeman 1 2 6.263398 6.263398 5100 +goldberg 1 2 6.263398 6.263398 4313 +williamson 1 2 6.263398 6.263398 5101 +hopp 1 2 6.263398 6.263398 5092 +kleinberg 1 2 6.263398 6.263398 5093 +diamet 1 2 6.263398 6.263398 5102 +tarjan 1 2 6.263398 6.263398 4278 +lovasz 1 2 6.263398 6.263398 5091 +tardosassoci 1 1 6.957497 6.957497 10418 +engineeringphon 1 1 6.957497 6.957497 10419 +ori 1 1 6.957497 6.957497 10412 +shmoi 1 1 6.957497 6.957497 10413 +researchrec 1 1 6.957497 6.957497 10420 +mostlywork 1 1 6.957497 6.957497 10421 +networkproblem 1 1 6.957497 6.957497 10422 +paperssurvei 1 1 6.957497 6.957497 10423 +plotkin 1 1 6.957497 6.957497 10414 +cutratio 1 1 6.957497 6.957497 10424 +klein 1 1 6.957497 6.957497 10416 +fasterapproxim 1 1 6.957497 6.957497 10425 +problemwith 1 1 6.957497 6.957497 10426 +makedon 1 1 6.957497 6.957497 10427 +tragouda 1 1 6.957497 6.957497 10428 +flowproblem 1 1 6.957497 6.957497 10429 +annualacm 1 1 6.957497 6.957497 10430 +approximationalgorithm 1 1 6.957497 6.957497 10415 +thefound 1 1 6.957497 6.957497 10431 +designproblem 1 1 6.957497 6.957497 10432 +discretealgorithm 1 1 6.957497 6.957497 10433 +someevacu 1 1 6.957497 6.957497 10434 +ondiscret 1 1 6.957497 6.957497 10435 +quickest 1 1 6.957497 6.957497 10436 +transship 1 1 6.957497 6.957497 10437 +theproceed 1 1 6.957497 6.957497 10438 +steiner 1 1 6.957497 6.957497 10439 +multicut 1 1 6.957497 6.957497 10440 +pathsproblem 1 1 6.957497 6.957497 10441 +annualiee 1 1 6.957497 6.957497 10442 +rabani 1 1 6.957497 6.957497 10443 +fleischer 1 1 6.957497 6.957497 10444 +comb 1 1 6.957497 6.957497 10445 +ipco 1 1 6.957497 6.957497 10446 +kort 1 1 6.957497 6.957497 10447 +lovaszand 1 1 6.957497 6.957497 10448 +schrijver 1 1 6.957497 6.957497 10449 +inoptim 1 1 6.957497 6.957497 10450 +ofmathematician 1 1 6.957497 6.957497 10451 +kyoto 1 1 6.957497 6.957497 10452 +grotschel 1 1 6.957497 6.957497 10417 +inproc 1 1 6.957497 6.957497 10453 +maastricht 1 1 6.957497 6.957497 10454 +networkoptim 1 1 6.957497 6.957497 10455 +netflow 1 1 6.957497 6.957497 10456 +miniato 1 1 6.957497 6.957497 10457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..6f5faf80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +california 1 46 3.091042 3.091042 388 +departmentcornel 1 5 5.347108 5.347108 2275 +franci 1 3 5.857933 5.857933 3287 +universitycomput 1 3 5.857933 5.857933 3651 +berkeleymathemat 1 1 6.957497 6.957497 10458 +departmentcomput 1 1 6.957497 6.957497 10459 +departmenthumorfcc 1 1 6.957497 6.957497 10460 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..592c7022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +world 1 115 2.197225 2.197225 126 +felix 1 2 6.263398 6.263398 5103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..7c779fbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +click 1 142 1.945910 1.945910 78 +report 1 131 2.079442 2.079442 92 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +person 1 111 2.197225 2.197225 117 +technic 1 100 2.302585 2.302585 140 +take 1 97 2.302585 2.302585 134 +homepag 1 93 2.397895 2.397895 148 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +upson 1 71 2.639057 2.639057 218 +test 1 66 2.708050 2.708050 252 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +complex 1 64 2.772589 2.772589 269 +juli 1 60 2.833213 2.833213 305 +algebra 1 45 3.135494 3.135494 394 +math 1 44 3.135494 3.135494 402 +semant 1 29 3.583519 3.583519 587 +universityithaca 1 24 3.761200 3.761200 710 +greg 1 24 3.761200 3.761200 695 +sciencecornel 1 22 3.850148 3.850148 768 +smith 1 20 3.951244 3.951244 820 +grad 1 20 3.951244 3.951244 837 +decid 1 14 4.317488 4.317488 1075 +food 1 12 4.465908 4.465908 1285 +sundai 1 10 4.653960 4.653960 1387 +morrisett 1 5 5.347108 5.347108 2263 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +catch 1 4 5.568345 5.568345 2602 +halldepart 1 3 5.857933 5.857933 3641 +cohen 1 3 5.857933 5.857933 3652 +erni 1 2 6.263398 6.263398 5104 +epicuri 1 2 6.263398 6.263398 5105 +frederick 1 1 6.957497 6.957497 10461 +homepagefrederick 1 1 6.957497 6.957497 10463 +kleen 1 1 6.957497 6.957497 10462 +zine 1 1 6.957497 6.957497 10464 +cartalk 1 1 6.957497 6.957497 10465 +clack 1 1 6.957497 6.957497 10466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..0212f40a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +updat 1 191 1.609438 1.609438 41 +phone 1 175 1.791759 1.791759 45 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +improv 1 62 2.772589 2.772589 289 +electron 1 47 3.091042 3.091042 379 +either 1 35 3.401197 3.401197 506 +post 1 35 3.401197 3.401197 505 +actual 1 28 3.610918 3.610918 604 +doctor 1 24 3.761200 3.761200 709 +happi 1 14 4.317488 4.317488 1079 +frank 1 9 4.753590 4.753590 1568 +matter 1 8 4.875197 4.875197 1627 +xerox 1 8 4.875197 4.875197 1725 +planet 1 4 5.568345 5.568345 2912 +adelstein 1 1 6.957497 6.957497 10467 +checkout 1 1 6.957497 6.957497 10468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..c9068418 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +theori 1 111 2.197225 2.197225 127 +peopl 1 96 2.302585 2.302585 132 +center 1 88 2.397895 2.397895 158 +section 1 94 2.397895 2.397895 149 +resum 1 79 2.564949 2.564949 217 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +previou 1 62 2.772589 2.772589 290 +visual 1 48 3.044522 3.044522 372 +http 1 41 3.218876 3.218876 420 +soon 1 36 3.367296 3.367296 494 +next 1 34 3.401197 3.401197 517 +anim 1 31 3.496508 3.496508 557 +berkelei 1 26 3.688879 3.688879 657 +other 1 24 3.761200 3.761200 697 +sciencecornel 1 22 3.850148 3.850148 768 +hobbi 1 16 4.174387 4.174387 1009 +mayb 1 15 4.248495 4.248495 1014 +drive 1 15 4.248495 4.248495 1052 +shown 1 14 4.317488 4.317488 1080 +massachusett 1 14 4.317488 4.317488 1118 +affili 1 13 4.382027 4.382027 1194 +hewlett 1 8 4.875197 4.875197 1709 +guitar 1 8 4.875197 4.875197 1758 +lawrenc 1 7 5.010635 5.010635 1908 +fred 1 6 5.164786 5.164786 2072 +photographi 1 6 5.164786 5.164786 2146 +feet 1 5 5.347108 5.347108 2492 +snail 1 4 5.568345 5.568345 2916 +yuan 1 3 5.857933 5.857933 3653 +binghamton 1 3 5.857933 5.857933 3544 +chelmsford 1 3 5.857933 5.857933 3564 +scramo 1 1 6.957497 6.957497 10470 +midi 1 1 6.957497 6.957497 10471 +choreograph 1 1 6.957497 6.957497 10472 +vpla 1 1 6.957497 6.957497 10473 +animationlink 1 1 6.957497 6.957497 10474 +packardlink 1 1 6.957497 6.957497 10475 +laboratoryinterest 1 1 6.957497 6.957497 10476 +cello 1 1 6.957497 6.957497 10477 +aquarium 1 1 6.957497 6.957497 10478 +burl 1 1 6.957497 6.957497 10479 +fredhsu 1 1 6.957497 6.957497 10480 +apollo 1 1 6.957497 6.957497 10469 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..e9d3b98f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +commun 1 95 2.397895 2.397895 157 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +educ 1 86 2.484907 2.484907 191 +start 1 83 2.484907 2.484907 173 +novemb 1 81 2.484907 2.484907 179 +chang 1 82 2.484907 2.484907 163 +school 1 84 2.484907 2.484907 188 +resum 1 79 2.564949 2.564949 217 +complet 1 77 2.564949 2.564949 208 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +intellig 1 72 2.639057 2.639057 225 +involv 1 71 2.639057 2.639057 227 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +degre 1 69 2.708050 2.708050 259 +artifici 1 63 2.772589 2.772589 280 +prof 1 64 2.772589 2.772589 273 +back 1 60 2.833213 2.833213 297 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +undergradu 1 54 2.944439 2.944439 338 +still 1 50 3.044522 3.044522 362 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +better 1 45 3.135494 3.135494 401 +anoth 1 45 3.135494 3.135494 408 +long 1 43 3.178054 3.178054 413 +littl 1 39 3.258097 3.258097 454 +probabl 1 40 3.258097 3.258097 455 +brian 1 38 3.295837 3.295837 466 +mean 1 37 3.332205 3.332205 477 +michael 1 35 3.401197 3.401197 514 +approxim 1 35 3.401197 3.401197 509 +india 1 32 3.465736 3.465736 550 +taken 1 31 3.496508 3.496508 555 +actual 1 28 3.610918 3.610918 604 +hope 1 28 3.610918 3.610918 610 +never 1 25 3.737670 3.737670 671 +sciencecornel 1 22 3.850148 3.850148 768 +deal 1 22 3.850148 3.850148 736 +born 1 21 3.912023 3.912023 798 +leav 1 21 3.912023 3.912023 772 +smith 1 20 3.951244 3.951244 820 +wonder 1 20 3.951244 3.951244 815 +region 1 19 4.007333 4.007333 875 +miss 1 19 4.007333 4.007333 866 +four 1 18 4.060443 4.060443 905 +upon 1 16 4.174387 4.174387 978 +took 1 16 4.174387 4.174387 1010 +match 1 16 4.174387 4.174387 965 +goe 1 15 4.248495 4.248495 1044 +precis 1 15 4.248495 4.248495 1023 +earlier 1 13 4.382027 4.382027 1140 +someon 1 13 4.382027 4.382027 1128 +land 1 12 4.465908 4.465908 1273 +outsid 1 12 4.465908 4.465908 1219 +went 1 12 4.465908 4.465908 1279 +holidai 1 12 4.465908 4.465908 1224 +noth 1 11 4.553877 4.553877 1328 +light 1 9 4.753590 4.753590 1533 +vineet 1 8 4.875197 4.875197 1639 +pursu 1 7 5.010635 5.010635 1902 +seshadri 1 7 5.010635 5.010635 1803 +keshav 1 7 5.010635 5.010635 1852 +praveen 1 6 5.164786 5.164786 1996 +srinivasan 1 6 5.164786 5.164786 2175 +lucki 1 6 5.164786 5.164786 2163 +somewher 1 6 5.164786 5.164786 2176 +babi 1 5 5.347108 5.347108 2493 +interfer 1 5 5.347108 5.347108 2494 +greater 1 5 5.347108 5.347108 2258 +ashish 1 5 5.347108 5.347108 2473 +engineeringdepart 1 4 5.568345 5.568345 2917 +dive 1 3 5.857933 5.857933 3654 +straight 1 3 5.857933 5.857933 3655 +indira 1 3 5.857933 5.857933 3656 +karnataka 1 2 6.263398 6.263398 5106 +bharat 1 2 6.263398 6.263398 5107 +cute 1 2 6.263398 6.263398 5108 +incident 1 2 6.263398 6.263398 5109 +bangalor 1 2 6.263398 6.263398 5110 +that 1 2 6.263398 6.263398 5111 +conquer 1 2 6.263398 6.263398 5112 +aastha 1 2 6.263398 6.263398 5005 +ankit 1 2 6.263398 6.263398 4966 +deepak 1 1 6.957497 6.957497 10481 +balakrishna 1 1 6.957497 6.957497 10482 +balakrishnamast 1 1 6.957497 6.957497 10483 +resumeeducationcoursesperson 1 1 6.957497 6.957497 10484 +surathk 1 1 6.957497 6.957497 10485 +specialis 1 1 6.957497 6.957497 10486 +godfrei 1 1 6.957497 6.957497 10487 +chubbi 1 1 6.957497 6.957497 10488 +weigh 1 1 6.957497 6.957497 10489 +pound 1 1 6.957497 6.957497 10490 +divin 1 1 6.957497 6.957497 10491 +aishwarya 1 1 6.957497 6.957497 10492 +miniscul 1 1 6.957497 6.957497 10493 +krec 1 1 6.957497 6.957497 10494 +here 1 1 6.957497 6.957497 10495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..d1c3fa44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +sinc 1 90 2.397895 2.397895 159 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +solut 1 82 2.484907 2.484907 162 +educ 1 86 2.484907 2.484907 191 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +onlin 1 75 2.639057 2.639057 223 +multimedia 1 68 2.708050 2.708050 258 +degre 1 69 2.708050 2.708050 259 +plan 1 65 2.772589 2.772589 272 +unix 1 58 2.890372 2.890372 308 +music 1 42 3.218876 3.218876 436 +respons 1 37 3.332205 3.332205 476 +global 1 34 3.401197 3.401197 520 +administr 1 27 3.637586 3.637586 628 +leav 1 21 3.912023 3.912023 772 +bachelor 1 17 4.110874 4.110874 957 +warn 1 14 4.317488 4.317488 1068 +hopefulli 1 14 4.317488 4.317488 1071 +cricket 1 7 5.010635 5.010635 1945 +publicationsth 1 4 5.568345 5.568345 2859 +asif 1 2 6.263398 6.263398 4933 +ghia 1 2 6.263398 6.263398 4934 +mywww 1 2 6.263398 6.263398 5113 +uddin 1 1 6.957497 6.957497 10496 +ghiasasif 1 1 6.957497 6.957497 10497 +constructioni 1 1 6.957497 6.957497 10498 +karachi 1 1 6.957497 6.957497 10499 +pakistan 1 1 6.957497 6.957497 10500 +installationso 1 1 6.957497 6.957497 10501 +astronomyasif 1 1 6.957497 6.957497 10502 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..54d61f79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +visit 1 63 2.772589 2.772589 288 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +berkelei 1 26 3.688879 3.688879 657 +million 1 5 5.347108 5.347108 2495 +dglaser 1 1 6.957497 6.957497 10503 +htmlpleas 1 1 6.957497 6.957497 10504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..ffeda7dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,334 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +hour 1 165 1.791759 1.791759 46 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +document 1 121 2.079442 2.079442 89 +teach 1 108 2.197225 2.197225 112 +topic 1 114 2.197225 2.197225 110 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +call 1 91 2.397895 2.397895 153 +institut 1 84 2.484907 2.484907 187 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +educ 1 86 2.484907 2.484907 191 +help 1 83 2.484907 2.484907 175 +ieee 1 86 2.484907 2.484907 190 +master 1 76 2.564949 2.564949 216 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +know 1 80 2.564949 2.564949 198 +david 1 71 2.639057 2.639057 232 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +effici 1 73 2.639057 2.639057 233 +servic 1 72 2.639057 2.639057 236 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +written 1 63 2.772589 2.772589 278 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +import 1 65 2.772589 2.772589 282 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +content 1 59 2.833213 2.833213 302 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +particular 1 51 2.995732 2.995732 352 +tabl 1 51 2.995732 2.995732 346 +numer 1 49 3.044522 3.044522 369 +telephon 1 50 3.044522 3.044522 373 +give 1 50 3.044522 3.044522 359 +understand 1 47 3.091042 3.091042 384 +move 1 47 3.091042 3.091042 382 +effect 1 46 3.091042 3.091042 385 +math 1 44 3.135494 3.135494 402 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +york 1 41 3.218876 3.218876 435 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +howev 1 41 3.218876 3.218876 422 +announc 1 40 3.258097 3.258097 441 +programm 1 39 3.258097 3.258097 445 +societi 1 40 3.258097 3.258097 456 +author 1 39 3.258097 3.258097 450 +late 1 40 3.258097 3.258097 439 +vita 1 38 3.295837 3.295837 473 +paul 1 38 3.295837 3.295837 471 +correct 1 38 3.295837 3.295837 462 +open 1 38 3.295837 3.295837 469 +formal 1 37 3.332205 3.332205 478 +respons 1 37 3.332205 3.332205 476 +china 1 37 3.332205 3.332205 487 +short 1 36 3.367296 3.367296 499 +return 1 34 3.401197 3.401197 502 +award 1 34 3.401197 3.401197 523 +survei 1 35 3.401197 3.401197 513 +curriculum 1 33 3.433987 3.433987 535 +go 1 33 3.433987 3.433987 529 +articl 1 33 3.433987 3.433987 530 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +dissert 1 32 3.465736 3.465736 549 +concept 1 32 3.465736 3.465736 537 +taken 1 31 3.496508 3.496508 555 +abl 1 30 3.555348 3.555348 566 +produc 1 30 3.555348 3.555348 572 +semant 1 29 3.583519 3.583519 587 +chair 1 29 3.583519 3.583519 596 +art 1 29 3.583519 3.583519 593 +full 1 28 3.610918 3.610918 615 +usual 1 28 3.610918 3.610918 608 +weather 1 28 3.610918 3.610918 618 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +compar 1 26 3.688879 3.688879 648 +enjoi 1 26 3.688879 3.688879 660 +concern 1 25 3.737670 3.737670 666 +spent 1 25 3.737670 3.737670 676 +toward 1 25 3.737670 3.737670 668 +aspect 1 25 3.737670 3.737670 663 +sport 1 25 3.737670 3.737670 683 +universityithaca 1 24 3.761200 3.761200 710 +doctor 1 24 3.761200 3.761200 709 +known 1 24 3.761200 3.761200 702 +interpret 1 24 3.761200 3.761200 686 +methodolog 1 23 3.806662 3.806662 733 +proof 1 23 3.806662 3.806662 720 +almost 1 22 3.850148 3.850148 742 +william 1 22 3.850148 3.850148 765 +serv 1 22 3.850148 3.850148 758 +period 1 22 3.850148 3.850148 743 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +programminglanguag 1 21 3.912023 3.912023 782 +fact 1 21 3.912023 3.912023 780 +born 1 21 3.912023 3.912023 798 +busi 1 21 3.912023 3.912023 784 +hous 1 21 3.912023 3.912023 801 +tenni 1 20 3.951244 3.951244 838 +item 1 19 4.007333 4.007333 856 +left 1 19 4.007333 4.007333 851 +ever 1 19 4.007333 4.007333 872 +figur 1 18 4.060443 4.060443 903 +four 1 18 4.060443 4.060443 905 +stand 1 18 4.060443 4.060443 891 +germani 1 17 4.110874 4.110874 946 +stanford 1 17 4.110874 4.110874 955 +whether 1 17 4.110874 4.110874 918 +alreadi 1 16 4.174387 4.174387 963 +later 1 15 4.248495 4.248495 1043 +susan 1 15 4.248495 4.248495 1050 +contribut 1 15 4.248495 4.248495 1021 +rate 1 15 4.248495 4.248495 1037 +becam 1 14 4.317488 4.317488 1117 +latex 1 14 4.317488 4.317488 1064 +manner 1 14 4.317488 4.317488 1074 +conduct 1 14 4.317488 4.317488 1065 +wife 1 13 4.382027 4.382027 1196 +essenti 1 13 4.382027 4.382027 1137 +individu 1 13 4.382027 4.382027 1126 +believ 1 13 4.382027 4.382027 1187 +went 1 12 4.465908 4.465908 1279 +speak 1 12 4.465908 4.465908 1283 +grant 1 12 4.465908 4.465908 1216 +excit 1 11 4.553877 4.553877 1329 +fellowship 1 10 4.653960 4.653960 1460 +thecomput 1 10 4.653960 4.653960 1408 +end 1 9 4.753590 4.753590 1567 +respect 1 9 4.753590 4.753590 1545 +softbal 1 9 4.753590 4.753590 1594 +volleybal 1 9 4.753590 4.753590 1598 +swim 1 9 4.753590 4.753590 1599 +mention 1 9 4.753590 4.753590 1569 +lewi 1 8 4.875197 4.875197 1700 +hallcornel 1 8 4.875197 4.875197 1757 +guggenheim 1 8 4.875197 4.875197 1759 +told 1 8 4.875197 4.875197 1658 +joke 1 8 4.875197 4.875197 1620 +wire 1 8 4.875197 4.875197 1747 +pagecomput 1 7 5.010635 5.010635 1900 +marri 1 7 5.010635 5.010635 1946 +illinoi 1 7 5.010635 5.010635 1941 +notion 1 7 5.010635 5.010635 1947 +explain 1 7 5.010635 5.010635 1816 +snow 1 6 5.164786 5.164786 2031 +outstand 1 6 5.164786 5.164786 2136 +spare 1 6 5.164786 5.164786 2177 +golf 1 6 5.164786 5.164786 2178 +truth 1 6 5.164786 5.164786 2179 +elain 1 5 5.347108 5.347108 2496 +joseph 1 5 5.347108 5.347108 2327 +these 1 5 5.347108 5.347108 2482 +plant 1 5 5.347108 5.347108 2497 +feder 1 5 5.347108 5.347108 2266 +ofparallel 1 5 5.347108 5.347108 2380 +interfer 1 5 5.347108 5.347108 2494 +began 1 5 5.347108 5.347108 2498 +rewrit 1 5 5.347108 5.347108 2367 +sing 1 5 5.347108 5.347108 2499 +yield 1 5 5.347108 5.347108 2458 +queen 1 4 5.568345 5.568345 2919 +naval 1 4 5.568345 5.568345 2920 +birthdai 1 4 5.568345 5.568345 2800 +bloom 1 4 5.568345 5.568345 2913 +proud 1 4 5.568345 5.568345 2918 +raman 1 4 5.568345 5.568345 2827 +substanti 1 4 5.568345 5.568345 2921 +schneider 1 4 5.568345 5.568345 2868 +ping 1 4 5.568345 5.568345 2922 +gri 1 3 5.857933 5.857933 3569 +munich 1 3 5.857933 5.857933 3570 +freshman 1 3 5.857933 5.857933 3462 +biographi 1 3 5.857933 5.857933 3658 +dimac 1 3 5.857933 5.857933 3574 +assistantship 1 3 5.857933 5.857933 3660 +twin 1 3 5.857933 5.857933 3657 +langaug 1 3 5.857933 5.857933 3661 +blind 1 3 5.857933 5.857933 3662 +serious 1 3 5.857933 5.857933 3663 +researchassoci 1 3 5.857933 5.857933 3664 +pong 1 3 5.857933 5.857933 3371 +audienc 1 3 5.857933 5.857933 3180 +laugh 1 3 5.857933 5.857933 3659 +pagedavid 1 2 6.263398 6.263398 5114 +sophomor 1 2 6.263398 6.263398 4695 +polya 1 2 6.263398 6.263398 4939 +weapon 1 2 6.263398 6.263398 5115 +degreein 1 2 6.263398 6.263398 5116 +manfr 1 2 6.263398 6.263398 4949 +bauer 1 2 6.263398 6.263398 5117 +cake 1 2 6.263398 6.263398 5118 +booth 1 2 6.263398 6.263398 5119 +theamerican 1 2 6.263398 6.263398 5120 +afip 1 2 6.263398 6.263398 4300 +andt 1 2 6.263398 6.263398 5121 +spoken 1 2 6.263398 6.263398 5122 +researchinterest 1 2 6.263398 6.263398 5123 +acta 1 2 6.263398 6.263398 5124 +informatica 1 2 6.263398 6.263398 5125 +andtool 1 2 6.263398 6.263398 5126 +grieswilliam 1 1 6.957497 6.957497 10505 +engineeringdr 1 1 6.957497 6.957497 10506 +formaldevelop 1 1 6.957497 6.957497 10507 +asinterest 1 1 6.957497 6.957497 10508 +researchin 1 1 6.957497 6.957497 10509 +taughta 1 1 6.957497 6.957497 10510 +anoverrid 1 1 6.957497 6.957497 10511 +edushort 1 1 6.957497 6.957497 10512 +griesi 1 1 6.957497 6.957497 10513 +flush 1 1 6.957497 6.957497 10514 +iescap 1 1 6.957497 6.957497 10515 +workfor 1 1 6.957497 6.957497 10516 +civilian 1 1 6.957497 6.957497 10517 +amathematician 1 1 6.957497 6.957497 10518 +fewmonth 1 1 6.957497 6.957497 10519 +twogerman 1 1 6.957497 6.957497 10520 +ruedig 1 1 6.957497 6.957497 10521 +wiehl 1 1 6.957497 6.957497 10522 +algol 1 1 6.957497 6.957497 10523 +compilerfor 1 1 6.957497 6.957497 10524 +implementrecurs 1 1 6.957497 6.957497 10525 +stoer 1 1 6.957497 6.957497 10526 +wasin 1 1 6.957497 6.957497 10527 +notyet 1 1 6.957497 6.957497 10528 +kosher 1 1 6.957497 6.957497 10529 +thebirthdai 1 1 6.957497 6.957497 10530 +intown 1 1 6.957497 6.957497 10531 +whichha 1 1 6.957497 6.957497 10532 +wasdepart 1 1 6.957497 6.957497 10533 +lewisprofessor 1 1 6.957497 6.957497 10534 +contentsi 1 1 6.957497 6.957497 10535 +mytext 1 1 6.957497 6.957497 10536 +writingand 1 1 6.957497 6.957497 10537 +thewond 1 1 6.957497 6.957497 10538 +wherey 1 1 6.957497 6.957497 10539 +contributionsto 1 1 6.957497 6.957497 10540 +sigcseaward 1 1 6.957497 6.957497 10541 +clarkaward 1 1 6.957497 6.957497 10542 +advise 1 1 6.957497 6.957497 10543 +susanowicki 1 1 6.957497 6.957497 10544 +laid 1 1 6.957497 6.957497 10545 +freeness 1 1 6.957497 6.957497 10546 +bestpap 1 1 6.957497 6.957497 10547 +sthesi 1 1 6.957497 6.957497 10548 +designedand 1 1 6.957497 6.957497 10549 +printedor 1 1 6.957497 6.957497 10550 +speakmathemat 1 1 6.957497 6.957497 10551 +audiocassett 1 1 6.957497 6.957497 10552 +officein 1 1 6.957497 6.957497 10553 +taulbe 1 1 6.957497 6.957497 10554 +responsesfrom 1 1 6.957497 6.957497 10555 +noother 1 1 6.957497 6.957497 10556 +itrequir 1 1 6.957497 6.957497 10557 +sendin 1 1 6.957497 6.957497 10558 +questionnair 1 1 6.957497 6.957497 10559 +forchair 1 1 6.957497 6.957497 10560 +andrespons 1 1 6.957497 6.957497 10561 +takean 1 1 6.957497 6.957497 10562 +willsuggest 1 1 6.957497 6.957497 10563 +servewher 1 1 6.957497 6.957497 10564 +fredb 1 1 6.957497 6.957497 10565 +andmonograph 1 1 6.957497 6.957497 10566 +isplit 1 1 6.957497 6.957497 10567 +pant 1 1 6.957497 6.957497 10568 +alectur 1 1 6.957497 6.957497 10569 +turnedaround 1 1 6.957497 6.957497 10570 +spoke 1 1 6.957497 6.957497 10571 +everyonelaugh 1 1 6.957497 6.957497 10572 +justsaid 1 1 6.957497 6.957497 10573 +barbershop 1 1 6.957497 6.957497 10574 +andgilbert 1 1 6.957497 6.957497 10575 +sullivan 1 1 6.957497 6.957497 10576 +carpentri 1 1 6.957497 6.957497 10577 +remodel 1 1 6.957497 6.957497 10578 +considerablesatisfact 1 1 6.957497 6.957497 10579 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..d5d0ffce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +first 1 140 1.945910 1.945910 71 +learn 1 86 2.484907 2.484907 170 +html 1 75 2.639057 2.639057 235 +week 1 52 2.995732 2.995732 343 +express 1 32 3.465736 3.465736 540 +sciencecornel 1 22 3.850148 3.850148 768 +shop 1 10 4.653960 4.653960 1469 +bore 1 7 5.010635 5.010635 1948 +alex 1 6 5.164786 5.164786 2130 +grinzayd 1 1 6.957497 6.957497 10580 +homepagealex 1 1 6.957497 6.957497 10581 +grinzaydm 1 1 6.957497 6.957497 10582 +universitytel 1 1 6.957497 6.957497 10583 +necx 1 1 6.957497 6.957497 10584 +directinternet 1 1 6.957497 6.957497 10585 +networkcomput 1 1 6.957497 6.957497 10586 +damarkwarn 1 1 6.957497 6.957497 10587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..449115d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +homepag 1 93 2.397895 2.397895 148 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +novemb 1 81 2.484907 2.484907 179 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +involv 1 71 2.639057 2.639057 227 +degre 1 69 2.708050 2.708050 259 +ithaca 1 65 2.772589 2.772589 294 +sever 1 56 2.890372 2.890372 322 +advisor 1 51 2.995732 2.995732 355 +york 1 41 3.218876 3.218876 435 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +scienceat 1 11 4.553877 4.553877 1375 +charg 1 9 4.753590 4.753590 1582 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +cuc 1 4 5.568345 5.568345 2630 +poland 1 3 5.857933 5.857933 3665 +czajkowskidepart 1 1 6.957497 6.957497 10589 +grze 1 1 6.957497 6.957497 10588 +krakow 1 1 6.957497 6.957497 10590 +administ 1 1 6.957497 6.957497 10591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..ffadac4e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +like 1 132 1.945910 1.945910 81 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +check 1 115 2.197225 2.197225 118 +teach 1 108 2.197225 2.197225 112 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +activ 1 84 2.484907 2.484907 182 +resum 1 79 2.564949 2.564949 217 +upson 1 71 2.639057 2.639057 218 +knowledg 1 67 2.708050 2.708050 243 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +septemb 1 65 2.772589 2.772589 274 +best 1 59 2.833213 2.833213 299 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +talk 1 53 2.944439 2.944439 336 +case 1 51 2.995732 2.995732 351 +give 1 50 3.044522 3.044522 359 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +describ 1 45 3.135494 3.135494 400 +continu 1 39 3.258097 3.258097 448 +probabl 1 40 3.258097 3.258097 455 +field 1 37 3.332205 3.332205 482 +game 1 36 3.367296 3.367296 498 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +scientist 1 31 3.496508 3.496508 560 +focus 1 29 3.583519 3.583519 584 +semant 1 29 3.583519 3.583519 587 +subject 1 26 3.688879 3.688879 647 +although 1 25 3.737670 3.737670 667 +princeton 1 15 4.248495 4.248495 1042 +econom 1 13 4.382027 4.382027 1184 +someon 1 13 4.382027 4.382027 1128 +mainli 1 10 4.653960 4.653960 1432 +sentenc 1 10 4.653960 4.653960 1413 +uncertainti 1 7 5.010635 5.010635 1882 +boundari 1 7 5.010635 5.010635 1929 +gave 1 7 5.010635 5.010635 1922 +philosoph 1 7 5.010635 5.010635 1904 +li 1 5 5.347108 5.347108 2500 +hallithaca 1 4 5.568345 5.568345 2894 +universitycomput 1 3 5.857933 5.857933 3651 +halpern 1 1 6.957497 6.957497 10592 +pagejoseph 1 1 6.957497 6.957497 10593 +professorcornel 1 1 6.957497 6.957497 10594 +economist 1 1 6.957497 6.957497 10595 +abouta 1 1 6.957497 6.957497 10596 +sequel 1 1 6.957497 6.957497 10597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..eb1ab623 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +cornel 1 215 1.386294 1.386294 23 +construct 1 139 1.945910 1.945910 82 +tsuneshi 1 1 6.957497 6.957497 10598 +hashimototsuneshi 1 1 6.957497 6.957497 10600 +hashimotothi 1 1 6.957497 6.957497 10601 +hashimoto 1 1 6.957497 6.957497 10599 +cstsuneshi 1 1 6.957497 6.957497 10602 +hashi 1 1 6.957497 6.957497 10603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..be3304d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +teach 1 108 2.197225 2.197225 112 +commun 1 95 2.397895 2.397895 157 +novemb 1 81 2.484907 2.484907 179 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +unix 1 58 2.890372 2.890372 308 +mark 1 44 3.135494 3.135494 403 +proof 1 23 3.806662 3.806662 720 +horu 1 14 4.317488 4.317488 1116 +nuprl 1 10 4.653960 4.653960 1402 +hockei 1 8 4.875197 4.875197 1760 +hayden 1 4 5.568345 5.568345 2844 +tast 1 3 5.857933 5.857933 3666 +ensembl 1 2 6.263398 6.263398 4854 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..9c08aa24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +improv 1 62 2.772589 2.772589 289 +cyber 1 4 5.568345 5.568345 2909 +pond 1 2 6.263398 6.263398 5127 +heji 1 1 6.957497 6.957497 10604 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..43727a32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +modifi 1 178 1.609438 1.609438 35 +year 1 148 1.945910 1.945910 84 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +faculti 1 56 2.890372 2.890372 325 +undergradu 1 54 2.944439 2.944439 338 +advisor 1 51 2.995732 2.995732 355 +third 1 43 3.178054 3.178054 412 +china 1 37 3.332205 3.332205 487 +berkelei 1 26 3.688879 3.688879 657 +born 1 21 3.912023 3.912023 798 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +hallithaca 1 4 5.568345 5.568345 2894 +shanghai 1 4 5.568345 5.568345 2925 +universitydept 1 3 5.857933 5.857933 3602 +deyu 1 1 6.957497 6.957497 10606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..cbf2550b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +mathemat 1 108 2.197225 2.197225 123 +check 1 115 2.197225 2.197225 118 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +homepag 1 93 2.397895 2.397895 148 +academ 1 82 2.484907 2.484907 178 +learn 1 86 2.484907 2.484907 170 +resourc 1 81 2.484907 2.484907 172 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +bibliographi 1 34 3.401197 3.401197 518 +chines 1 29 3.583519 3.583519 595 +retriev 1 27 3.637586 3.637586 621 +motion 1 24 3.761200 3.761200 699 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +annot 1 21 3.912023 3.912023 775 +beij 1 19 4.007333 4.007333 876 +track 1 15 4.248495 4.248495 1029 +tsinghua 1 13 4.382027 4.382027 1195 +huang 1 12 4.465908 4.465908 1202 +thedepart 1 11 4.553877 4.553877 1350 +scienceat 1 11 4.553877 4.553877 1375 +fellowship 1 10 4.653960 4.653960 1460 +ramin 1 7 5.010635 5.010635 1820 +christian 1 7 5.010635 5.010635 1949 +zabih 1 6 5.164786 5.164786 2138 +mission 1 5 5.347108 5.347108 2465 +jing 1 3 5.857933 5.857933 3521 +bachelorand 1 2 6.263398 6.263398 5128 +chinami 1 2 6.263398 6.263398 5129 +evangel 1 1 6.957497 6.957497 10605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..b3426085 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +graphic 1 90 2.397895 2.397895 147 +resum 1 79 2.564949 2.564949 217 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +china 1 37 3.332205 3.332205 487 +chen 1 21 3.912023 3.912023 791 +taiwan 1 16 4.174387 4.174387 1006 +practicum 1 16 4.174387 4.174387 960 +mapl 1 11 4.553877 4.553877 1376 +perman 1 11 4.553877 4.553877 1372 +sung 1 6 5.164786 5.164786 2075 +chin 1 5 5.347108 5.347108 2408 +taipei 1 4 5.568345 5.568345 2926 +album 1 4 5.568345 5.568345 2888 +icchen 1 1 6.957497 6.957497 10607 +nctu 1 1 6.957497 6.957497 10608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..55a19008 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +network 1 168 1.791759 1.791759 61 +construct 1 139 1.945910 1.945910 82 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +advanc 1 99 2.302585 2.302585 130 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +post 1 35 3.401197 3.401197 505 +script 1 13 4.382027 4.382027 1171 +indira 1 3 5.857933 5.857933 3656 +malik 1 1 6.957497 6.957497 10609 +imalik 1 1 6.957497 6.957497 10610 +tap 1 1 6.957497 6.957497 10611 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..0966a2bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +homepag 1 93 2.397895 2.397895 148 +graphic 1 90 2.397895 2.397895 147 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +august 1 66 2.708050 2.708050 257 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +polici 1 64 2.772589 2.772589 279 +colleg 1 61 2.833213 2.833213 300 +cool 1 49 3.044522 3.044522 374 +directori 1 45 3.135494 3.135494 396 +india 1 32 3.465736 3.465736 550 +independ 1 32 3.465736 3.465736 548 +taken 1 31 3.496508 3.496508 555 +anim 1 31 3.496508 3.496508 557 +sciencecornel 1 22 3.850148 3.850148 768 +indian 1 22 3.850148 3.850148 769 +toolkit 1 20 3.951244 3.951244 835 +applet 1 20 3.951244 3.951244 827 +practicum 1 16 4.174387 4.174387 960 +drive 1 15 4.248495 4.248495 1052 +audio 1 14 4.317488 4.317488 1094 +camera 1 14 4.317488 4.317488 1115 +galleri 1 13 4.382027 4.382027 1192 +magic 1 11 4.553877 4.553877 1358 +wood 1 11 4.553877 4.553877 1355 +perspect 1 10 4.653960 4.653960 1437 +hoca 1 5 5.347108 5.347108 2241 +hobb 1 4 5.568345 5.568345 2893 +nashvil 1 4 5.568345 5.568345 2867 +tennesse 1 4 5.568345 5.568345 2763 +indira 1 3 5.857933 5.857933 3656 +engineeringclass 1 3 5.857933 5.857933 3667 +recip 1 3 5.857933 5.857933 3668 +coimbator 1 2 6.263398 6.263398 5130 +cornelluniversityfal 1 2 6.263398 6.263398 5131 +cspracticum 1 2 6.263398 6.263398 5132 +carpet 1 2 6.263398 6.263398 5133 +colloqium 1 2 6.263398 6.263398 5134 +manageri 1 2 6.263398 6.263398 5135 +vidyaprakash 1 1 6.957497 6.957497 10612 +vidyaprakashmast 1 1 6.957497 6.957497 10613 +universitywelcom 1 1 6.957497 6.957497 10614 +financesumm 1 1 6.957497 6.957497 10615 +tracingin 1 1 6.957497 6.957497 10616 +perspectivetransform 1 1 6.957497 6.957497 10617 +myresumeclick 1 1 6.957497 6.957497 10618 +transformssom 1 1 6.957497 6.957497 10619 +sgamelan 1 1 6.957497 6.957497 10620 +calvinand 1 1 6.957497 6.957497 10621 +gif 1 1 6.957497 6.957497 10622 +chicker 1 1 6.957497 6.957497 10623 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..9d193b02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +file 1 132 1.945910 1.945910 70 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +user 1 104 2.302585 2.302585 137 +section 1 94 2.397895 2.397895 149 +environ 1 84 2.484907 2.484907 177 +start 1 83 2.484907 2.484907 173 +solut 1 82 2.484907 2.484907 162 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +come 1 78 2.564949 2.564949 202 +server 1 76 2.564949 2.564949 204 +homework 1 79 2.564949 2.564949 193 +write 1 72 2.639057 2.639057 222 +knowledg 1 67 2.708050 2.708050 243 +test 1 66 2.708050 2.708050 252 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +virtual 1 62 2.772589 2.772589 285 +index 1 56 2.890372 2.890372 309 +detail 1 57 2.890372 2.890372 321 +instruct 1 53 2.944439 2.944439 332 +extens 1 53 2.944439 2.944439 340 +cover 1 55 2.944439 2.944439 329 +much 1 52 2.995732 2.995732 349 +week 1 52 2.995732 2.995732 343 +set 1 50 3.044522 3.044522 361 +right 1 48 3.044522 3.044522 363 +get 1 46 3.091042 3.091042 380 +directori 1 45 3.135494 3.135494 396 +video 1 44 3.135494 3.135494 405 +protocol 1 45 3.135494 3.135494 407 +http 1 41 3.218876 3.218876 420 +programm 1 39 3.258097 3.258097 445 +tutori 1 39 3.258097 3.258097 437 +brian 1 38 3.295837 3.295837 466 +prototyp 1 38 3.295837 3.295837 463 +multi 1 36 3.367296 3.367296 493 +short 1 36 3.367296 3.367296 499 +manual 1 35 3.401197 3.401197 504 +packag 1 28 3.610918 3.610918 614 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +smith 1 20 3.951244 3.951244 820 +mpeg 1 20 3.951244 3.951244 831 +media 1 19 4.007333 4.007333 861 +repositori 1 17 4.110874 4.110874 932 +doesn 1 15 4.248495 4.248495 1055 +remot 1 15 4.248495 4.248495 1041 +script 1 13 4.382027 4.382027 1171 +suit 1 13 4.382027 4.382027 1129 +realiti 1 12 4.465908 4.465908 1272 +guidelin 1 7 5.010635 5.010635 1832 +conferenc 1 7 5.010635 5.010635 1857 +put 1 6 5.164786 5.164786 2017 +valuabl 1 5 5.347108 5.347108 2256 +templat 1 5 5.347108 5.347108 2311 +spam 1 4 5.568345 5.568345 2927 +knowledgebas 1 2 6.263398 6.263398 5136 +homeless 1 1 6.957497 6.957497 10625 +pageioi 1 1 6.957497 6.957497 10624 +lamioi 1 1 6.957497 6.957497 10626 +multim 1 1 6.957497 6.957497 10627 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..8b8b3ec9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +world 1 115 2.197225 2.197225 126 +make 1 111 2.197225 2.197225 120 +peopl 1 96 2.302585 2.302585 132 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +info 1 85 2.484907 2.484907 176 +environ 1 84 2.484907 2.484907 177 +wide 1 84 2.484907 2.484907 185 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +summari 1 73 2.639057 2.639057 237 +ithaca 1 65 2.772589 2.772589 294 +abstract 1 62 2.772589 2.772589 276 +collect 1 65 2.772589 2.772589 268 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +still 1 50 3.044522 3.044522 362 +algebra 1 45 3.135494 3.135494 394 +http 1 41 3.218876 3.218876 420 +editor 1 41 3.218876 3.218876 433 +live 1 40 3.258097 3.258097 451 +paul 1 38 3.295837 3.295837 471 +correct 1 38 3.295837 3.295837 462 +formal 1 37 3.332205 3.332205 478 +post 1 35 3.401197 3.401197 505 +next 1 34 3.401197 3.401197 517 +full 1 28 3.610918 3.610918 615 +load 1 28 3.610918 3.610918 601 +enhanc 1 26 3.688879 3.688879 644 +session 1 26 3.688879 3.688879 643 +doctor 1 24 3.761200 3.761200 709 +sometim 1 24 3.761200 3.761200 696 +proof 1 23 3.806662 3.806662 720 +methodolog 1 23 3.806662 3.806662 733 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +dai 1 22 3.850148 3.850148 753 +theorem 1 21 3.912023 3.912023 786 +synthesi 1 20 3.951244 3.951244 834 +toolkit 1 20 3.951244 3.951244 835 +prove 1 19 4.007333 4.007333 848 +north 1 19 4.007333 4.007333 873 +els 1 19 4.007333 4.007333 843 +hypertext 1 19 4.007333 4.007333 865 +definit 1 19 4.007333 4.007333 864 +coupl 1 17 4.110874 4.110874 939 +month 1 15 4.248495 4.248495 1025 +circuit 1 13 4.382027 4.382027 1131 +someon 1 13 4.382027 4.382027 1128 +moment 1 11 4.553877 4.553877 1379 +nuprl 1 10 4.653960 4.653960 1402 +usaphon 1 9 4.753590 4.753590 1600 +entitl 1 9 4.753590 4.753590 1490 +inter 1 9 4.753590 4.753590 1530 +float 1 9 4.753590 4.753590 1504 +prover 1 8 4.875197 4.875197 1653 +attent 1 8 4.875197 4.875197 1651 +ifip 1 5 5.347108 5.347108 2459 +holland 1 5 5.347108 5.347108 2490 +jackson 1 3 5.857933 5.857933 3586 +pagepaul 1 3 5.857933 5.857933 3669 +bout 1 3 5.857933 5.857933 3670 +elsevi 1 3 5.857933 5.857933 3671 +pai 1 3 5.857933 5.857933 3672 +shouldb 1 3 5.857933 5.857933 3673 +associatecornel 1 2 6.263398 6.263398 5137 +eduwww 1 2 6.263398 6.263398 5138 +linkag 1 2 6.263398 6.263398 5139 +thenuprl 1 2 6.263398 6.263398 5047 +workon 1 2 6.263398 6.263398 4280 +htmladdress 1 1 6.957497 6.957497 10628 +intereststheorem 1 1 6.957497 6.957497 10629 +andhardwar 1 1 6.957497 6.957497 10630 +informationmi 1 1 6.957497 6.957497 10631 +developmentsystem 1 1 6.957497 6.957497 10632 +bundi 1 1 6.957497 6.957497 10633 +automateddeduct 1 1 6.957497 6.957497 10634 +artif 1 1 6.957497 6.957497 10635 +stavrid 1 1 6.957497 6.957497 10636 +melham 1 1 6.957497 6.957497 10637 +transactionsa 1 1 6.957497 6.957497 10638 +theadvanc 1 1 6.957497 6.957497 10639 +nuprlth 1 1 6.957497 6.957497 10640 +getround 1 1 6.957497 6.957497 10641 +thetheori 1 1 6.957497 6.957497 10642 +foreach 1 1 6.957497 6.957497 10643 +andtheorem 1 1 6.957497 6.957497 10644 +thepolynomi 1 1 6.957497 6.957497 10645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..25bda0fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +hani 1 2 6.263398 6.263398 5140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..ee8f563f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +perman 1 11 4.553877 4.553877 1372 +usaoffic 1 6 5.164786 5.164786 2159 +janosi 1 3 5.857933 5.857933 3149 +mywww 1 2 6.263398 6.263398 5113 +tibor 1 1 6.957497 6.957497 10646 +jnositibor 1 1 6.957497 6.957497 10647 +jnosiwelcom 1 1 6.957497 6.957497 10648 +constructionoffic 1 1 6.957497 6.957497 10649 +zenotibor 1 1 6.957497 6.957497 10650 +jnosi 1 1 6.957497 6.957497 10651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..436b4675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +master 1 76 2.564949 2.564949 216 +ithaca 1 65 2.772589 2.772589 294 +telephon 1 50 3.044522 3.044522 373 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +janwun 1 1 6.957497 6.957497 10652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..fc0188d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +member 1 84 2.484907 2.484907 165 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +optim 1 79 2.564949 2.564949 197 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +upson 1 71 2.639057 2.639057 218 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +evalu 1 64 2.772589 2.772589 266 +abstract 1 62 2.772589 2.772589 276 +septemb 1 65 2.772589 2.772589 274 +function 1 62 2.772589 2.772589 275 +januari 1 62 2.772589 2.772589 264 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +tabl 1 51 2.995732 2.995732 346 +standard 1 48 3.044522 3.044522 365 +principl 1 48 3.044522 3.044522 357 +mark 1 44 3.135494 3.135494 403 +late 1 40 3.258097 3.258097 439 +annual 1 40 3.258097 3.258097 458 +bibliographi 1 34 3.401197 3.401197 518 +extend 1 32 3.465736 3.465736 539 +ad 1 32 3.465736 3.465736 544 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +robert 1 30 3.555348 3.555348 567 +focus 1 29 3.583519 3.583519 584 +semant 1 29 3.583519 3.583519 587 +platform 1 29 3.583519 3.583519 591 +proc 1 26 3.688879 3.688879 649 +primari 1 25 3.737670 3.737670 669 +greg 1 24 3.761200 3.761200 695 +interpret 1 24 3.761200 3.761200 686 +store 1 24 3.761200 3.761200 693 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +portabl 1 20 3.951244 3.951244 819 +particularli 1 19 4.007333 4.007333 867 +andrew 1 19 4.007333 4.007333 849 +eric 1 19 4.007333 4.007333 870 +concentr 1 18 4.060443 4.060443 906 +less 1 18 4.060443 4.060443 892 +partial 1 18 4.060443 4.060443 900 +fourth 1 16 4.174387 4.174387 999 +diego 1 16 4.174387 4.174387 992 +princeton 1 15 4.248495 4.248495 1042 +francisco 1 14 4.317488 4.317488 1095 +sigplan 1 13 4.382027 4.382027 1190 +conf 1 13 4.382027 4.382027 1181 +mellon 1 13 4.382027 4.382027 1179 +onth 1 12 4.465908 4.465908 1218 +carnegi 1 12 4.465908 4.465908 1260 +faster 1 11 4.553877 4.553877 1323 +refin 1 11 4.553877 4.553877 1363 +road 1 11 4.553877 4.553877 1374 +interestsmi 1 10 4.653960 4.653960 1462 +operatingsystem 1 10 4.653960 4.653960 1401 +bring 1 10 4.653960 4.653960 1430 +cheng 1 10 4.653960 4.653960 1381 +lock 1 9 4.753590 4.753590 1551 +jersei 1 9 4.753590 4.753590 1587 +closur 1 8 4.875197 4.875197 1643 +convers 1 8 4.875197 4.875197 1673 +andcomput 1 8 4.875197 4.875197 1623 +leon 1 8 4.875197 4.875197 1631 +hack 1 7 5.010635 5.010635 1950 +bit 1 7 5.010635 5.010635 1833 +gzip 1 6 5.164786 5.164786 2117 +morrisett 1 5 5.347108 5.347108 2263 +interestedin 1 5 5.347108 5.347108 2260 +consum 1 5 5.347108 5.347108 2334 +optimist 1 5 5.347108 5.347108 2501 +kept 1 4 5.568345 5.568345 2762 +polymorph 1 4 5.568345 5.568345 2627 +gregori 1 4 5.568345 5.568345 2928 +informationresearch 1 3 5.857933 5.857933 3675 +teachingc 1 3 5.857933 5.857933 3614 +stone 1 3 5.857933 5.857933 3674 +denmark 1 3 5.857933 5.857933 3676 +warren 1 3 5.857933 5.857933 3301 +intereststeachingselect 1 2 6.263398 6.263398 4924 +linksperson 1 2 6.263398 6.263398 5143 +harper 1 2 6.263398 6.263398 5141 +herlihi 1 2 6.263398 6.263398 5144 +copenhagen 1 2 6.263398 6.263398 5145 +multiprocess 1 2 6.263398 6.263398 5142 +papersrel 1 1 6.957497 6.957497 10655 +ofadvanc 1 1 6.957497 6.957497 10656 +forbuild 1 1 6.957497 6.957497 10657 +safelanguag 1 1 6.957497 6.957497 10658 +toolsfrom 1 1 6.957497 6.957497 10659 +systemssoftwar 1 1 6.957497 6.957497 10660 +paperssemant 1 1 6.957497 6.957497 10661 +tarditi 1 1 6.957497 6.957497 10653 +safetythrough 1 1 6.957497 6.957497 10662 +yasuhiko 1 1 6.957497 6.957497 10663 +minamid 1 1 6.957497 6.957497 10664 +matthia 1 1 6.957497 6.957497 10665 +felleisen 1 1 6.957497 6.957497 10666 +reportcmu 1 1 6.957497 6.957497 10667 +notecmu 1 1 6.957497 6.957497 10668 +intensionaltyp 1 1 6.957497 6.957497 10669 +parallelizationgreg 1 1 6.957497 6.957497 10670 +mauric 1 1 6.957497 6.957497 10671 +tolmach 1 1 6.957497 6.957497 10654 +scienceperson 1 1 6.957497 6.957497 10672 +informationhom 1 1 6.957497 6.957497 10673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..a427e950 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +resum 1 79 2.564949 2.564949 217 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +taiwan 1 16 4.174387 4.174387 1006 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +perman 1 11 4.553877 4.553877 1372 +taipei 1 4 5.568345 5.568345 2926 +shing 1 2 6.263398 6.263398 5146 +jiun 1 1 6.957497 6.957497 10674 +jhlin 1 1 6.957497 6.957497 10675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..70b1d4a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +homepag 1 93 2.397895 2.397895 148 +jerri 1 3 5.857933 5.857933 3445 +edujerri 1 1 6.957497 6.957497 10676 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..979125b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +phone 1 175 1.791759 1.791759 45 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +educ 1 86 2.484907 2.484907 191 +school 1 84 2.484907 2.484907 188 +ieee 1 86 2.484907 2.484907 190 +state 1 76 2.564949 2.564949 207 +upson 1 71 2.639057 2.639057 218 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +degre 1 69 2.708050 2.708050 259 +window 1 68 2.708050 2.708050 242 +new 1 64 2.772589 2.772589 262 +foundat 1 62 2.772589 2.772589 286 +guid 1 63 2.772589 2.772589 267 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +cool 1 49 3.044522 3.044522 374 +directori 1 45 3.135494 3.135494 396 +york 1 41 3.218876 3.218876 435 +review 1 42 3.218876 3.218876 425 +music 1 42 3.218876 3.218876 436 +futur 1 41 3.218876 3.218876 427 +societi 1 40 3.258097 3.258097 456 +china 1 37 3.332205 3.332205 487 +award 1 34 3.401197 3.401197 523 +chines 1 29 3.583519 3.583519 595 +art 1 29 3.583519 3.583519 593 +weather 1 28 3.610918 3.610918 618 +daili 1 24 3.761200 3.761200 706 +magazin 1 24 3.761200 3.761200 704 +yahoo 1 24 3.761200 3.761200 707 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +wang 1 21 3.912023 3.912023 790 +beij 1 19 4.007333 4.007333 876 +histori 1 19 4.007333 4.007333 853 +transfer 1 16 4.174387 4.174387 967 +taiwan 1 16 4.174387 4.174387 1006 +rank 1 14 4.317488 4.317488 1063 +incomput 1 14 4.317488 4.317488 1096 +entertain 1 12 4.465908 4.465908 1286 +tour 1 11 4.553877 4.553877 1307 +america 1 11 4.553877 4.553877 1370 +fellowship 1 10 4.653960 4.653960 1460 +sister 1 9 4.753590 4.753590 1524 +film 1 8 4.875197 4.875197 1761 +tourist 1 8 4.875197 4.875197 1710 +digest 1 7 5.010635 5.010635 1864 +cultur 1 7 5.010635 5.010635 1951 +monei 1 7 5.010635 5.010635 1934 +peterson 1 7 5.010635 5.010635 1850 +christian 1 7 5.010635 5.010635 1949 +scholar 1 6 5.164786 5.164786 2180 +forum 1 6 5.164786 5.164786 2027 +postcard 1 6 5.164786 5.164786 2181 +brook 1 6 5.164786 5.164786 2152 +suni 1 5 5.347108 5.347108 2452 +ucla 1 5 5.347108 5.347108 2502 +hallithaca 1 4 5.568345 5.568345 2894 +twin 1 3 5.857933 5.857933 3657 +binghamton 1 3 5.857933 5.857933 3544 +hongkong 1 3 5.857933 5.857933 3677 +stamp 1 3 5.857933 5.857933 3678 +ryan 1 3 5.857933 5.857933 3679 +tian 1 3 5.857933 5.857933 3680 +stoni 1 3 5.857933 5.857933 3571 +nankai 1 2 6.263398 6.263398 5147 +tianjin 1 2 6.263398 6.263398 5148 +barri 1 2 6.263398 6.263398 5149 +sciencefound 1 2 6.263398 6.263398 5150 +chinaand 1 2 6.263398 6.263398 5151 +sceneri 1 2 6.263398 6.263398 5152 +sheng 1 2 6.263398 6.263398 5153 +liber 1 2 6.263398 6.263398 5154 +wangphd 1 1 6.957497 6.957497 10677 +jiawang 1 1 6.957497 6.957497 10678 +goldwat 1 1 6.957497 6.957497 10679 +cbnet 1 1 6.957497 6.957497 10680 +chinanet 1 1 6.957497 6.957497 10681 +chinesecalendar 1 1 6.957497 6.957497 10682 +mediainform 1 1 6.957497 6.957497 10683 +hongkonglaserdisccent 1 1 6.957497 6.957497 10684 +internetdistribut 1 1 6.957497 6.957497 10685 +multilingu 1 1 6.957497 6.957497 10686 +smovieplex 1 1 6.957497 6.957497 10687 +diwww 1 1 6.957497 6.957497 10688 +thesenior 1 1 6.957497 6.957497 10689 +worldmap 1 1 6.957497 6.957497 10690 +mandarin 1 1 6.957497 6.957497 10691 +cssa 1 1 6.957497 6.957497 10692 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..985c47af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +process 1 142 1.945910 1.945910 72 +machin 1 129 2.079442 2.079442 95 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +level 1 87 2.484907 2.484907 180 +info 1 85 2.484907 2.484907 176 +master 1 76 2.564949 2.564949 216 +prof 1 64 2.772589 2.772589 273 +colleg 1 61 2.833213 2.833213 300 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +long 1 43 3.178054 3.178054 413 +vision 1 41 3.218876 3.218876 430 +robot 1 36 3.367296 3.367296 497 +primari 1 25 3.737670 3.737670 669 +particularli 1 19 4.007333 4.007333 867 +miller 1 17 4.110874 4.110874 949 +night 1 11 4.553877 4.553877 1319 +justin 1 7 5.010635 5.010635 1789 +uniform 1 7 5.010635 5.010635 1845 +ramin 1 7 5.010635 5.010635 1820 +zabih 1 6 5.164786 5.164786 2138 +csrvl 1 3 5.857933 5.857933 3543 +navi 1 2 6.263398 6.263398 5155 +com 1 2 6.263398 6.263398 5156 +ofengin 1 1 6.957497 6.957497 10693 +assistantwork 1 1 6.957497 6.957497 10694 +ismachin 1 1 6.957497 6.957497 10695 +informationsom 1 1 6.957497 6.957497 10696 +rant 1 1 6.957497 6.957497 10697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..59e028f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +high 1 130 2.079442 2.079442 101 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +west 1 83 2.484907 2.484907 192 +school 1 84 2.484907 2.484907 188 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +free 1 73 2.639057 2.639057 224 +receiv 1 66 2.708050 2.708050 244 +practic 1 70 2.708050 2.708050 246 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +creat 1 63 2.772589 2.772589 277 +street 1 63 2.772589 2.772589 293 +new 1 64 2.772589 2.772589 262 +januari 1 62 2.772589 2.772589 264 +cool 1 49 3.044522 3.044522 374 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +seminar 1 38 3.295837 3.295837 470 +formal 1 37 3.332205 3.332205 478 +game 1 36 3.367296 3.367296 498 +product 1 33 3.433987 3.433987 527 +campu 1 27 3.637586 3.637586 623 +jeff 1 25 3.737670 3.737670 673 +daili 1 24 3.761200 3.761200 706 +divis 1 21 3.912023 3.912023 803 +mpeg 1 20 3.951244 3.951244 831 +anyon 1 17 4.110874 4.110874 916 +intel 1 16 4.174387 4.174387 1000 +practicum 1 16 4.174387 4.174387 960 +jose 1 16 4.174387 4.174387 976 +francisco 1 14 4.317488 4.317488 1095 +went 1 12 4.465908 4.465908 1279 +entertain 1 12 4.465908 4.465908 1286 +newspap 1 12 4.465908 4.465908 1280 +systemsc 1 11 4.553877 4.553877 1293 +mapl 1 11 4.553877 4.553877 1376 +purdu 1 10 4.653960 4.653960 1466 +sundai 1 10 4.653960 4.653960 1387 +leader 1 9 4.753590 4.753590 1576 +sister 1 9 4.753590 4.753590 1524 +portland 1 7 5.010635 5.010635 1878 +chronicl 1 7 5.010635 5.010635 1952 +indiana 1 6 5.164786 5.164786 2057 +oregon 1 5 5.347108 5.347108 2437 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +encod 1 4 5.568345 5.568345 2929 +dalla 1 4 5.568345 5.568345 2930 +classesc 1 3 5.857933 5.857933 3681 +detroit 1 3 5.857933 5.857933 3565 +counti 1 3 5.857933 5.857933 3682 +cornellopoli 1 2 6.263398 6.263398 5157 +techniquec 1 2 6.263398 6.263398 5158 +methodsc 1 2 6.263398 6.263398 5159 +colloquiumc 1 2 6.263398 6.263398 5160 +magazinepc 1 2 6.263398 6.263398 5161 +morn 1 2 6.263398 6.263398 5162 +orang 1 2 6.263398 6.263398 5163 +herald 1 2 6.263398 6.263398 4789 +moorejeff 1 1 6.957497 6.957497 10699 +moorewel 1 1 6.957497 6.957497 10700 +mastersof 1 1 6.957497 6.957497 10701 +lafayett 1 1 6.957497 6.957497 10702 +hillsboro 1 1 6.957497 6.957497 10698 +suburb 1 1 6.957497 6.957497 10703 +employmentmi 1 1 6.957497 6.957497 10704 +classesnba 1 1 6.957497 6.957497 10705 +sectorc 1 1 6.957497 6.957497 10706 +researchfal 1 1 6.957497 6.957497 10707 +paperc 1 1 6.957497 6.957497 10708 +opendoc 1 1 6.957497 6.957497 10709 +mfcoptim 1 1 6.957497 6.957497 10710 +researchsoftwar 1 1 6.957497 6.957497 10711 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 1 1 6.957497 6.957497 10712 +companiesintelsilicon 1 1 6.957497 6.957497 10713 +graphicsibmsunapplemagazinespc 1 1 6.957497 6.957497 10714 +weekpc 1 1 6.957497 6.957497 10715 +computingcomput 1 1 6.957497 6.957497 10716 +shopperwindow 1 1 6.957497 6.957497 10717 +sourcescomput 1 1 6.957497 6.957497 10718 +lifemacusermacweekinteract 1 1 6.957497 6.957497 10719 +weekfamili 1 1 6.957497 6.957497 10720 +pccomput 1 1 6.957497 6.957497 10721 +worldelectron 1 1 6.957497 6.957497 10722 +newspapersusa 1 1 6.957497 6.957497 10723 +todaywal 1 1 6.957497 6.957497 10724 +journalnew 1 1 6.957497 6.957497 10725 +timesphiladelphia 1 1 6.957497 6.957497 10726 +onlineth 1 1 6.957497 6.957497 10727 +worldwideth 1 1 6.957497 6.957497 10728 +opinionsth 1 1 6.957497 6.957497 10729 +gopherth 1 1 6.957497 6.957497 10730 +knoxvil 1 1 6.957497 6.957497 10731 +sentinelth 1 1 6.957497 6.957497 10732 +onlinelat 1 1 6.957497 6.957497 10733 +serviceth 1 1 6.957497 6.957497 10734 +nugget 1 1 6.957497 6.957497 10735 +oregonrworld 1 1 6.957497 6.957497 10736 +registerth 1 1 6.957497 6.957497 10737 +examinersan 1 1 6.957497 6.957497 10738 +mercuryth 1 1 6.957497 6.957497 10739 +timesnando 1 1 6.957497 6.957497 10740 +netusa 1 1 6.957497 6.957497 10741 +todayboston 1 1 6.957497 6.957497 10742 +globeportland 1 1 6.957497 6.957497 10743 +telegramvisitor 1 1 6.957497 6.957497 10744 +fdithaca 1 1 6.957497 6.957497 10745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..cd389107 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +click 1 142 1.945910 1.945910 78 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +code 1 108 2.197225 2.197225 116 +text 1 98 2.302585 2.302585 133 +memori 1 101 2.302585 2.302585 139 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +resum 1 79 2.564949 2.564949 217 +exampl 1 77 2.564949 2.564949 195 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +multimedia 1 68 2.708050 2.708050 258 +simul 1 66 2.708050 2.708050 255 +view 1 70 2.708050 2.708050 254 +virtual 1 62 2.772589 2.772589 285 +browser 1 56 2.890372 2.890372 313 +direct 1 57 2.890372 2.890372 316 +friend 1 48 3.044522 3.044522 376 +visitor 1 49 3.044522 3.044522 371 +video 1 44 3.135494 3.135494 405 +better 1 45 3.135494 3.135494 401 +movi 1 40 3.258097 3.258097 459 +vita 1 38 3.295837 3.295837 473 +game 1 36 3.367296 3.367296 498 +curriculum 1 33 3.433987 3.433987 535 +anim 1 31 3.496508 3.496508 557 +enabl 1 26 3.688879 3.688879 655 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +jose 1 16 4.174387 4.174387 976 +sign 1 16 4.174387 4.174387 970 +hobbi 1 16 4.174387 4.174387 1009 +transit 1 15 4.248495 4.248495 1046 +avenu 1 12 4.465908 4.465908 1277 +meng 1 12 4.465908 4.465908 1214 +mapl 1 11 4.553877 4.553877 1376 +clock 1 11 4.553877 4.553877 1320 +bill 1 11 4.553877 4.553877 1297 +rivl 1 8 4.875197 4.875197 1632 +autonom 1 8 4.875197 4.875197 1749 +vehicl 1 7 5.010635 5.010635 1928 +courtesi 1 7 5.010635 5.010635 1953 +photographi 1 6 5.164786 5.164786 2146 +recruit 1 6 5.164786 5.164786 2145 +hoca 1 5 5.347108 5.347108 2241 +multitask 1 4 5.568345 5.568345 2803 +crazi 1 4 5.568345 5.568345 2822 +fernandez 1 3 5.857933 5.857933 3591 +lui 1 2 6.263398 6.263398 5164 +joselui 1 2 6.263398 6.263398 4965 +pyramania 1 2 6.263398 6.263398 4957 +actor 1 2 6.263398 6.263398 4240 +pagejos 1 1 6.957497 6.957497 10746 +fernandezjos 1 1 6.957497 6.957497 10747 +fernandezmast 1 1 6.957497 6.957497 10748 +ebithaca 1 1 6.957497 6.957497 10749 +scroll 1 1 6.957497 6.957497 10750 +presentationc 1 1 6.957497 6.957497 10751 +spaceship 1 1 6.957497 6.957497 10752 +battl 1 1 6.957497 6.957497 10753 +picturesmusiccomputerswrit 1 1 6.957497 6.957497 10754 +giel 1 1 6.957497 6.957497 10755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..d95250c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +distribut 1 162 1.791759 1.791759 51 +click 1 142 1.945910 1.945910 78 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +simul 1 66 2.708050 2.708050 255 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +administr 1 27 3.637586 3.637586 628 +busi 1 21 3.912023 3.912023 784 +johnson 1 13 4.382027 4.382027 1162 +linda 1 10 4.653960 4.653960 1394 +autonom 1 8 4.875197 4.875197 1749 +vehicl 1 7 5.010635 5.010635 1928 +julin 1 1 6.957497 6.957497 10757 +hurtado 1 1 6.957497 6.957497 10756 +pagejulin 1 1 6.957497 6.957497 10758 +universitymast 1 1 6.957497 6.957497 10759 +managementmast 1 1 6.957497 6.957497 10760 +science 1 1 6.957497 6.957497 10761 +colombia 1 1 6.957497 6.957497 10762 +er 1 1 6.957497 6.957497 10763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..2401f94e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +texa 1 160 1.791759 1.791759 64 +welcom 1 122 2.079442 2.079442 99 +send 1 114 2.197225 2.197225 109 +thing 1 84 2.484907 2.484907 189 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +august 1 66 2.708050 2.708050 257 +septemb 1 65 2.772589 2.772589 274 +favorit 1 44 3.135494 3.135494 410 +join 1 39 3.258097 3.258097 457 +ad 1 32 3.465736 3.465736 544 +instrument 1 7 5.010635 5.010635 1954 +edumi 1 6 5.164786 5.164786 2132 +dalla 1 4 5.568345 5.568345 2930 +janeen 1 1 6.957497 6.957497 10764 +homepagejaneen 1 1 6.957497 6.957497 10765 +reich 1 1 6.957497 6.957497 10766 +jreich 1 1 6.957497 6.957497 10767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..33bcde66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +resum 1 79 2.564949 2.564949 217 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +come 1 78 2.564949 2.564949 202 +receiv 1 66 2.708050 2.708050 244 +multimedia 1 68 2.708050 2.708050 258 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +septemb 1 65 2.772589 2.772589 274 +januari 1 62 2.772589 2.772589 264 +visit 1 63 2.772589 2.772589 288 +still 1 50 3.044522 3.044522 362 +get 1 46 3.091042 3.091042 380 +cost 1 37 3.332205 3.332205 480 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +although 1 25 3.737670 3.737670 667 +recognit 1 23 3.806662 3.806662 723 +chip 1 21 3.912023 3.912023 770 +portabl 1 20 3.951244 3.951244 819 +speed 1 18 4.060443 4.060443 911 +stock 1 16 4.174387 4.174387 1007 +massachusett 1 14 4.317488 4.317488 1118 +bodi 1 13 4.382027 4.382027 1178 +meng 1 12 4.465908 4.465908 1214 +speech 1 12 4.465908 4.465908 1222 +grant 1 12 4.465908 4.465908 1216 +systemsc 1 11 4.553877 4.553877 1293 +desktop 1 10 4.653960 4.653960 1445 +capac 1 8 4.875197 4.875197 1740 +filter 1 8 4.875197 4.875197 1641 +mile 1 8 4.875197 4.875197 1743 +ground 1 7 5.010635 5.010635 1955 +amherst 1 5 5.347108 5.347108 2484 +thrive 1 5 5.347108 5.347108 2257 +facial 1 5 5.347108 5.347108 2438 +stage 1 5 5.347108 5.347108 2488 +steer 1 5 5.347108 5.347108 2328 +car 1 4 5.568345 5.568345 2931 +sold 1 4 5.568345 5.568345 2813 +exhaust 1 4 5.568345 5.568345 2825 +ford 1 4 5.568345 5.568345 2636 +gear 1 4 5.568345 5.568345 2891 +visionc 1 3 5.857933 5.857933 3489 +obvious 1 3 5.857933 5.857933 3474 +memberof 1 3 5.857933 5.857933 3169 +bought 1 2 6.263398 6.263398 5165 +accel 1 2 6.263398 6.263398 5166 +plug 1 2 6.263398 6.263398 5167 +jodi 1 1 6.957497 6.957497 10770 +shapirojodi 1 1 6.957497 6.957497 10771 +shapiroeduc 1 1 6.957497 6.957497 10772 +engineeringe 1 1 6.957497 6.957497 10773 +telecommunicationc 1 1 6.957497 6.957497 10774 +researchspr 1 1 6.957497 6.957497 10775 +systemse 1 1 6.957497 6.957497 10776 +networksnba 1 1 6.957497 6.957497 10777 +revolutionc 1 1 6.957497 6.957497 10778 +researchma 1 1 6.957497 6.957497 10779 +automot 1 1 6.957497 6.957497 10780 +engineeringinterest 1 1 6.957497 6.957497 10781 +animationlow 1 1 6.957497 6.957497 10782 +videoconferenc 1 1 6.957497 6.957497 10783 +recognitioninterest 1 1 6.957497 6.957497 10784 +camaro 1 1 6.957497 6.957497 10768 +firebird 1 1 6.957497 6.957497 10785 +yourselfelectron 1 1 6.957497 6.957497 10786 +fuel 1 1 6.957497 6.957497 10787 +inject 1 1 6.957497 6.957497 10788 +alwayshav 1 1 6.957497 6.957497 10789 +designingan 1 1 6.957497 6.957497 10790 +pageefi 1 1 6.957497 6.957497 10791 +pagethes 1 1 6.957497 6.957497 10792 +chevi 1 1 6.957497 6.957497 10769 +gearsmodif 1 1 6.957497 6.957497 10793 +hypertech 1 1 6.957497 6.957497 10794 +flowmast 1 1 6.957497 6.957497 10795 +hurst 1 1 6.957497 6.957497 10796 +shifter 1 1 6.957497 6.957497 10797 +wheel 1 1 6.957497 6.957497 10798 +mustang 1 1 6.957497 6.957497 10799 +speedmodif 1 1 6.957497 6.957497 10800 +motorsport 1 1 6.957497 6.957497 10801 +wiresbest 1 1 6.957497 6.957497 10802 +mphbest 1 1 6.957497 6.957497 10803 +pagenumb 1 1 6.957497 6.957497 10804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..85cab4e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +cornel 1 215 1.386294 1.386294 23 +continu 1 39 3.258097 3.258097 448 +eduto 1 7 5.010635 5.010635 1956 +julia 1 2 6.263398 6.263398 5094 +pagejulia 1 1 6.957497 6.957497 10805 +komissarchik 1 1 6.957497 6.957497 10806 +juliak 1 1 6.957497 6.957497 10807 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..62919d0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +high 1 130 2.079442 2.079442 101 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +pictur 1 89 2.397895 2.397895 160 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +wide 1 84 2.484907 2.484907 185 +master 1 76 2.564949 2.564949 216 +complet 1 77 2.564949 2.564949 208 +server 1 76 2.564949 2.564949 204 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +addit 1 74 2.639057 2.639057 228 +ithaca 1 65 2.772589 2.772589 294 +hardwar 1 51 2.995732 2.995732 350 +pointer 1 48 3.044522 3.044522 368 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +video 1 44 3.135494 3.135494 405 +http 1 41 3.218876 3.218876 420 +fast 1 42 3.218876 3.218876 429 +realli 1 40 3.258097 3.258097 444 +workstat 1 37 3.332205 3.332205 479 +global 1 34 3.401197 3.401197 520 +toler 1 33 3.433987 3.433987 533 +independ 1 32 3.465736 3.465736 548 +fault 1 32 3.465736 3.465736 547 +produc 1 30 3.555348 3.555348 572 +platform 1 29 3.583519 3.583519 591 +becom 1 28 3.610918 3.610918 603 +full 1 28 3.610918 3.610918 615 +cluster 1 28 3.610918 3.610918 612 +campu 1 27 3.637586 3.637586 623 +administr 1 27 3.637586 3.637586 628 +compress 1 23 3.806662 3.806662 719 +color 1 22 3.850148 3.850148 762 +toolkit 1 20 3.951244 3.951244 835 +increas 1 20 3.951244 3.951244 829 +commerci 1 16 4.174387 4.174387 1005 +critic 1 16 4.174387 4.174387 982 +topolog 1 14 4.317488 4.317488 1089 +demand 1 14 4.317488 4.317488 1073 +horu 1 14 4.317488 4.317488 1116 +achiev 1 14 4.317488 4.317488 1088 +grow 1 12 4.465908 4.465908 1209 +faster 1 11 4.553877 4.553877 1323 +screen 1 9 4.753590 4.753590 1577 +hallcornel 1 8 4.875197 4.875197 1757 +capit 1 7 5.010635 5.010635 1957 +thegoal 1 6 5.164786 5.164786 2033 +sparcstat 1 5 5.347108 5.347108 2406 +fulfil 1 4 5.568345 5.568345 2932 +innov 1 4 5.568345 5.568345 2933 +emilio 1 3 5.857933 5.857933 3683 +summit 1 3 5.857933 5.857933 3684 +adress 1 2 6.263398 6.263398 5168 +occup 1 2 6.263398 6.263398 5169 +fulltim 1 2 6.263398 6.263398 5170 +ethernet 1 2 6.263398 6.263398 5171 +blast 1 2 6.263398 6.263398 5172 +julian 1 1 6.957497 6.957497 10808 +pelenur 1 1 6.957497 6.957497 10809 +centerithaca 1 1 6.957497 6.957497 10810 +wfinger 1 1 6.957497 6.957497 10811 +cyberserv 1 1 6.957497 6.957497 10812 +httpserver 1 1 6.957497 6.957497 10813 +prvf 1 1 6.957497 6.957497 10814 +poss 1 1 6.957497 6.957497 10815 +screenmot 1 1 6.957497 6.957497 10816 +showthat 1 1 6.957497 6.957497 10817 +snarf 1 1 6.957497 6.957497 10818 +transferwith 1 1 6.957497 6.957497 10819 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..dc33387b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +relat 1 139 1.945910 1.945910 68 +perform 1 143 1.945910 1.945910 74 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +technic 1 100 2.302585 2.302585 140 +take 1 97 2.302585 2.302585 134 +center 1 88 2.397895 2.397895 158 +resourc 1 81 2.484907 2.484907 172 +info 1 85 2.484907 2.484907 176 +want 1 79 2.564949 2.564949 199 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +practic 1 70 2.708050 2.708050 246 +abstract 1 62 2.772589 2.772589 276 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +talk 1 53 2.944439 2.944439 336 +done 1 47 3.091042 3.091042 381 +slide 1 38 3.295837 3.295837 467 +seminar 1 38 3.295837 3.295837 470 +especi 1 36 3.367296 3.367296 496 +bibliographi 1 34 3.401197 3.401197 518 +art 1 29 3.583519 3.583519 593 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +higher 1 24 3.761200 3.761200 690 +sequenc 1 23 3.806662 3.806662 734 +supervis 1 20 3.951244 3.951244 840 +verif 1 20 3.951244 3.951244 826 +fine 1 20 3.951244 3.951244 822 +statu 1 18 4.060443 4.060443 885 +pretti 1 13 4.382027 4.382027 1191 +forth 1 13 4.382027 4.382027 1186 +mainli 1 10 4.653960 4.653960 1432 +nuprl 1 10 4.653960 4.653960 1402 +equip 1 10 4.653960 4.653960 1459 +hockei 1 8 4.875197 4.875197 1760 +forum 1 6 5.164786 5.164786 2027 +czar 1 5 5.347108 5.347108 2503 +hickei 1 4 5.568345 5.568345 2845 +identif 1 4 5.568345 5.568345 2773 +jason 1 3 5.857933 5.857933 3389 +orth 1 3 5.857933 5.857933 3685 +backcountri 1 3 5.857933 5.857933 3686 +publicli 1 3 5.857933 5.857933 3687 +universitydepart 1 2 6.263398 6.263398 4871 +bellcor 1 2 6.263398 6.263398 5174 +theatr 1 2 6.263398 6.263398 5173 +robertconst 1 1 6.957497 6.957497 10820 +thefox 1 1 6.957497 6.957497 10821 +markleon 1 1 6.957497 6.957497 10822 +formalsystem 1 1 6.957497 6.957497 10823 +levelmodul 1 1 6.957497 6.957497 10824 +cornella 1 1 6.957497 6.957497 10825 +publishedat 1 1 6.957497 6.957497 10826 +havegiven 1 1 6.957497 6.957497 10827 +theygiv 1 1 6.957497 6.957497 10828 +galleryof 1 1 6.957497 6.957497 10829 +tryth 1 1 6.957497 6.957497 10830 +fineart 1 1 6.957497 6.957497 10831 +cucshockei 1 1 6.957497 6.957497 10832 +thebackcountri 1 1 6.957497 6.957497 10833 +maintainedsoftwar 1 1 6.957497 6.957497 10834 +hockeyfor 1 1 6.957497 6.957497 10835 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..dd6e67da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +thing 1 84 2.484907 2.484907 189 +ithaca 1 65 2.772589 2.772589 294 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +go 1 33 3.433987 3.433987 529 +reach 1 24 3.761200 3.761200 688 +accept 1 18 4.060443 4.060443 879 +english 1 15 4.248495 4.248495 1033 +japan 1 8 4.875197 4.875197 1762 +japanes 1 4 5.568345 5.568345 2934 +sell 1 4 5.568345 5.568345 2935 +sold 1 4 5.568345 5.568345 2813 +sale 1 3 5.857933 5.857933 3688 +koichi 1 1 6.957497 6.957497 10837 +kamijokoichi 1 1 6.957497 6.957497 10838 +kamijo 1 1 6.957497 6.957497 10836 +papershometownseduc 1 1 6.957497 6.957497 10839 +experienceskoichi 1 1 6.957497 6.957497 10840 +muriel 1 1 6.957497 6.957497 10841 +kkamijoh 1 1 6.957497 6.957497 10842 +vnet 1 1 6.957497 6.957497 10843 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..6fc26c35 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +upson 1 71 2.639057 2.639057 218 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +laboratori 1 63 2.772589 2.772589 292 +previou 1 62 2.772589 2.772589 290 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +investig 1 51 2.995732 2.995732 353 +advisor 1 51 2.995732 2.995732 355 +better 1 45 3.135494 3.135494 401 +video 1 44 3.135494 3.135494 405 +autom 1 41 3.218876 3.218876 434 +vision 1 41 3.218876 3.218876 430 +york 1 41 3.218876 3.218876 435 +announc 1 40 3.258097 3.258097 441 +close 1 38 3.295837 3.295837 465 +field 1 37 3.332205 3.332205 482 +robot 1 36 3.367296 3.367296 497 +next 1 34 3.401197 3.401197 517 +articl 1 33 3.433987 3.433987 530 +collabor 1 32 3.465736 3.465736 543 +anim 1 31 3.496508 3.496508 557 +graph 1 30 3.555348 3.555348 576 +manipul 1 27 3.637586 3.637586 624 +arrai 1 27 3.637586 3.637586 627 +strategi 1 25 3.737670 3.737670 682 +handl 1 24 3.761200 3.761200 685 +magazin 1 24 3.761200 3.761200 704 +higher 1 24 3.761200 3.761200 690 +director 1 22 3.850148 3.850148 767 +navig 1 21 3.912023 3.912023 796 +facil 1 20 3.951244 3.951244 814 +stanford 1 17 4.110874 4.110874 955 +germani 1 17 4.110874 4.110874 946 +devic 1 16 4.174387 4.174387 1002 +vector 1 16 4.174387 4.174387 961 +micro 1 15 4.248495 4.248495 1031 +club 1 15 4.248495 4.248495 1058 +earlier 1 13 4.382027 4.382027 1140 +forc 1 10 4.653960 4.653960 1384 +donald 1 9 4.753590 4.753590 1510 +frank 1 9 4.753590 4.753590 1568 +wall 1 9 4.753590 4.753590 1553 +wire 1 8 4.875197 4.875197 1747 +gate 1 6 5.164786 5.164786 2182 +layout 1 6 5.164786 5.164786 2183 +lloyd 1 6 5.164786 5.164786 2103 +educurr 1 5 5.347108 5.347108 2504 +actuat 1 5 5.347108 5.347108 2442 +climb 1 4 5.568345 5.568345 2936 +karl 1 3 5.857933 5.857933 3623 +bhringer 1 3 5.857933 5.857933 3606 +karlsruh 1 3 5.857933 5.857933 3689 +microfabr 1 3 5.857933 5.857933 3610 +noel 1 3 5.857933 5.857933 3376 +kwon 1 3 5.857933 5.857933 3690 +deeper 1 3 5.857933 5.857933 3146 +friedrich 1 2 6.263398 6.263398 5175 +nanofabr 1 2 6.263398 6.263398 5010 +innew 1 2 6.263398 6.263398 4512 +sculptur 1 2 6.263398 6.263398 5176 +wright 1 2 6.263398 6.263398 5177 +nano 1 2 6.263398 6.263398 4961 +pagekarl 1 2 6.263398 6.263398 5043 +dipl 1 1 6.957497 6.957497 10845 +andassembl 1 1 6.957497 6.957497 10844 +implementmicro 1 1 6.957497 6.957497 10846 +withprogramm 1 1 6.957497 6.957497 10847 +professorbruc 1 1 6.957497 6.957497 10848 +founder 1 1 6.957497 6.957497 10849 +macdonaldand 1 1 6.957497 6.957497 10850 +hisresearch 1 1 6.957497 6.957497 10851 +invis 1 1 6.957497 6.957497 10852 +cantilev 1 1 6.957497 6.957497 10853 +fallingwat 1 1 6.957497 6.957497 10854 +outin 1 1 6.957497 6.957497 10855 +lindseth 1 1 6.957497 6.957497 10856 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..35db64a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +mathemat 1 108 2.197225 2.197225 123 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +commun 1 95 2.397895 2.397895 157 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +help 1 83 2.484907 2.484907 175 +wide 1 84 2.484907 2.484907 185 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +exampl 1 77 2.564949 2.564949 195 +june 1 79 2.564949 2.564949 214 +david 1 71 2.639057 2.639057 232 +upson 1 71 2.639057 2.639057 218 +html 1 75 2.639057 2.639057 235 +logic 1 71 2.639057 2.639057 230 +write 1 72 2.639057 2.639057 222 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +ithaca 1 65 2.772589 2.772589 294 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +browser 1 56 2.890372 2.890372 313 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +protocol 1 45 3.135494 3.135494 407 +even 1 45 3.135494 3.135494 393 +execut 1 45 3.135494 3.135494 404 +offer 1 43 3.178054 3.178054 414 +might 1 41 3.218876 3.218876 426 +programm 1 39 3.258097 3.258097 445 +multipl 1 39 3.258097 3.258097 453 +correct 1 38 3.295837 3.295837 462 +field 1 37 3.332205 3.332205 482 +cost 1 37 3.332205 3.332205 480 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +download 1 36 3.367296 3.367296 489 +statist 1 35 3.401197 3.401197 521 +concurr 1 34 3.401197 3.401197 501 +random 1 34 3.401197 3.401197 511 +dissert 1 32 3.465736 3.465736 549 +kind 1 32 3.465736 3.465736 541 +specifi 1 30 3.555348 3.555348 568 +abl 1 30 3.555348 3.555348 566 +depend 1 29 3.583519 3.583519 583 +intend 1 28 3.610918 3.610918 599 +propos 1 28 3.610918 3.610918 602 +becom 1 28 3.610918 3.610918 603 +progress 1 28 3.610918 3.610918 598 +measur 1 28 3.610918 3.610918 609 +load 1 28 3.610918 3.610918 601 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +consist 1 26 3.688879 3.688879 651 +revis 1 26 3.688879 3.688879 640 +reliabl 1 25 3.737670 3.737670 674 +fundament 1 25 3.737670 3.737670 661 +wai 1 25 3.737670 3.737670 662 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +sciencecornel 1 22 3.850148 3.850148 768 +properti 1 22 3.850148 3.850148 749 +avoid 1 21 3.912023 3.912023 799 +verif 1 20 3.951244 3.951244 826 +basi 1 20 3.951244 3.951244 828 +applet 1 20 3.951244 3.951244 827 +assum 1 19 4.007333 4.007333 845 +hypertext 1 19 4.007333 4.007333 865 +concentr 1 18 4.060443 4.060443 906 +failur 1 18 4.060443 4.060443 898 +behavior 1 18 4.060443 4.060443 881 +encourag 1 18 4.060443 4.060443 880 +layer 1 17 4.110874 4.110874 926 +ultim 1 17 4.110874 4.110874 943 +condit 1 16 4.174387 4.174387 975 +portion 1 16 4.174387 4.174387 971 +action 1 15 4.248495 4.248495 1038 +horu 1 14 4.317488 4.317488 1116 +edui 1 13 4.382027 4.382027 1193 +weak 1 13 4.382027 4.382027 1159 +suit 1 13 4.382027 4.382027 1129 +whose 1 13 4.382027 4.382027 1166 +kenneth 1 12 4.465908 4.465908 1265 +minor 1 12 4.465908 4.465908 1237 +verifi 1 12 4.465908 4.465908 1261 +emploi 1 12 4.465908 4.465908 1284 +calcul 1 12 4.465908 4.465908 1268 +scienceat 1 11 4.553877 4.553877 1375 +reness 1 11 4.553877 4.553877 1333 +host 1 11 4.553877 4.553877 1306 +stack 1 10 4.653960 4.653960 1389 +guarante 1 10 4.653960 4.653960 1391 +certain 1 10 4.653960 4.653960 1393 +equal 1 10 4.653960 4.653960 1424 +robbert 1 9 4.753590 4.753590 1529 +tempor 1 9 4.753590 4.753590 1584 +desir 1 9 4.753590 4.753590 1542 +crash 1 8 4.875197 4.875197 1616 +notion 1 7 5.010635 5.010635 1947 +appar 1 7 5.010635 5.010635 1958 +dedic 1 7 5.010635 5.010635 1843 +hack 1 7 5.010635 5.010635 1950 +furthermor 1 6 5.164786 5.164786 2141 +rough 1 6 5.164786 5.164786 2107 +studentdepart 1 5 5.347108 5.347108 2505 +unnecessari 1 5 5.347108 5.347108 2506 +lost 1 5 5.347108 5.347108 2358 +ofdistribut 1 5 5.347108 5.347108 2316 +notabl 1 5 5.347108 5.347108 2276 +puzzl 1 5 5.347108 5.347108 2507 +disconnect 1 4 5.568345 5.568345 2664 +clearli 1 4 5.568345 5.568345 2590 +formula 1 3 5.857933 5.857933 3405 +omit 1 3 5.857933 5.857933 3466 +confid 1 3 5.857933 5.857933 3691 +temporarili 1 3 5.857933 5.857933 3692 +parallelmachin 1 3 5.857933 5.857933 3693 +lego 1 3 5.857933 5.857933 3188 +thehoru 1 2 6.263398 6.263398 5179 +withprofessor 1 2 6.263398 6.263398 5180 +ofhoru 1 2 6.263398 6.263398 5181 +straightforward 1 2 6.263398 6.263398 4272 +thetop 1 2 6.263398 6.263398 4327 +atyp 1 2 6.263398 6.263398 5042 +sufficientto 1 2 6.263398 6.263398 4261 +haswork 1 2 6.263398 6.263398 5182 +prone 1 2 6.263398 6.263398 5178 +distributedenviron 1 2 6.263398 6.263398 5183 +toi 1 2 6.263398 6.263398 5184 +linksfor 1 2 6.263398 6.263398 5185 +karrdavid 1 1 6.957497 6.957497 10858 +karrphd 1 1 6.957497 6.957497 10859 +karr 1 1 6.957497 6.957497 10857 +birmananddr 1 1 6.957497 6.957497 10860 +protocolsmi 1 1 6.957497 6.957497 10861 +formalspecif 1 1 6.957497 6.957497 10862 +variousinterest 1 1 6.957497 6.957497 10863 +usedin 1 1 6.957497 6.957497 10864 +stylefor 1 1 6.957497 6.957497 10865 +itsinterfac 1 1 6.957497 6.957497 10866 +andbelow 1 1 6.957497 6.957497 10867 +agiven 1 1 6.957497 6.957497 10868 +unusualcombin 1 1 6.957497 6.957497 10869 +systemsshould 1 1 6.957497 6.957497 10870 +constructcustom 1 1 6.957497 6.957497 10871 +theirassoci 1 1 6.957497 6.957497 10872 +thesecur 1 1 6.957497 6.957497 10873 +harden 1 1 6.957497 6.957497 10874 +ofverifi 1 1 6.957497 6.957497 10875 +stem 1 1 6.957497 6.957497 10876 +thepromis 1 1 6.957497 6.957497 10877 +variousguarante 1 1 6.957497 6.957497 10878 +passingenviron 1 1 6.957497 6.957497 10879 +delayedor 1 1 6.957497 6.957497 10880 +componentswer 1 1 6.957497 6.957497 10881 +considerablepromis 1 1 6.957497 6.957497 10882 +consistencywhil 1 1 6.957497 6.957497 10883 +filesin 1 1 6.957497 6.957497 10884 +partitionedinto 1 1 6.957497 6.957497 10885 +wouldallow 1 1 6.957497 6.957497 10886 +performancemi 1 1 6.957497 6.957497 10887 +andeffici 1 1 6.957497 6.957497 10888 +ofsystem 1 1 6.957497 6.957497 10889 +suscept 1 1 6.957497 6.957497 10890 +javath 1 1 6.957497 6.957497 10891 +applicationwith 1 1 6.957497 6.957497 10892 +tonavig 1 1 6.957497 6.957497 10893 +myweb 1 1 6.957497 6.957497 10894 +anetscap 1 1 6.957497 6.957497 10895 +abirthdai 1 1 6.957497 6.957497 10896 +forverifi 1 1 6.957497 6.957497 10897 +affiliationsi 1 1 6.957497 6.957497 10898 +andmaa 1 1 6.957497 6.957497 10899 +informationseemi 1 1 6.957497 6.957497 10900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..7ddec3b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +help 1 83 2.484907 2.484907 175 +interfac 1 79 2.564949 2.564949 209 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +window 1 68 2.708050 2.708050 242 +view 1 70 2.708050 2.708050 254 +simul 1 66 2.708050 2.708050 255 +virtual 1 62 2.772589 2.772589 285 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +scientif 1 53 2.944439 2.944439 341 +visual 1 48 3.044522 3.044522 372 +cool 1 49 3.044522 3.044522 374 +california 1 46 3.091042 3.091042 388 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +video 1 44 3.135494 3.135494 405 +favorit 1 44 3.135494 3.135494 410 +combin 1 42 3.218876 3.218876 421 +music 1 42 3.218876 3.218876 436 +brian 1 38 3.295837 3.295837 466 +open 1 38 3.295837 3.295837 469 +game 1 36 3.367296 3.367296 498 +singl 1 34 3.401197 3.401197 510 +independ 1 32 3.465736 3.465736 548 +platform 1 29 3.583519 3.583519 591 +full 1 28 3.610918 3.610918 615 +enhanc 1 26 3.688879 3.688879 644 +aspect 1 25 3.737670 3.737670 663 +universityithaca 1 24 3.761200 3.761200 710 +motion 1 24 3.761200 3.761200 699 +sciencecornel 1 22 3.850148 3.850148 768 +chip 1 21 3.912023 3.912023 770 +break 1 20 3.951244 3.951244 812 +smith 1 20 3.951244 3.951244 820 +facil 1 20 3.951244 3.951244 814 +mpeg 1 20 3.951244 3.951244 831 +excel 1 19 4.007333 4.007333 868 +stand 1 18 4.060443 4.060443 891 +lot 1 18 4.060443 4.060443 889 +diego 1 16 4.174387 4.174387 992 +took 1 16 4.174387 4.174387 1010 +drive 1 15 4.248495 4.248495 1052 +transit 1 15 4.248495 4.248495 1046 +track 1 15 4.248495 4.248495 1029 +scene 1 14 4.317488 4.317488 1114 +resolut 1 13 4.382027 4.382027 1172 +jonathan 1 13 4.382027 4.382027 1174 +incorpor 1 13 4.382027 4.382027 1163 +entertain 1 12 4.465908 4.465908 1286 +captur 1 12 4.465908 4.465908 1232 +realiti 1 12 4.465908 4.465908 1272 +player 1 11 4.553877 4.553877 1371 +primit 1 11 4.553877 4.553877 1317 +screen 1 9 4.753590 4.753590 1577 +capac 1 8 4.875197 4.875197 1740 +rivl 1 8 4.875197 4.875197 1632 +star 1 8 4.875197 4.875197 1717 +pronounc 1 7 5.010635 5.010635 1918 +vehicl 1 7 5.010635 5.010635 1928 +clip 1 7 5.010635 5.010635 1868 +quick 1 6 5.164786 5.164786 2184 +railroad 1 6 5.164786 5.164786 2161 +hypothet 1 5 5.347108 5.347108 2474 +opengl 1 5 5.347108 5.347108 2299 +multitask 1 4 5.568345 5.568345 2803 +enjoy 1 4 5.568345 5.568345 2937 +screenshot 1 4 5.568345 5.568345 2743 +swartz 1 4 5.568345 5.568345 2878 +engineeringclass 1 3 5.857933 5.857933 3667 +lai 1 3 5.857933 5.857933 3694 +inventor 1 3 5.857933 5.857933 3695 +hodja 1 2 6.263398 6.263398 4972 +fledg 1 2 6.263398 6.263398 4973 +resumesom 1 2 6.263398 6.263398 5186 +pagekartik 1 1 6.957497 6.957497 10903 +kapadiamast 1 1 6.957497 6.957497 10904 +dabnei 1 1 6.957497 6.957497 10905 +kkapadia 1 1 6.957497 6.957497 10906 +qualcomm 1 1 6.957497 6.957497 10902 +comcurr 1 1 6.957497 6.957497 10907 +incorporatedmi 1 1 6.957497 6.957497 10908 +projectshoca 1 1 6.957497 6.957497 10909 +chiphoca 1 1 6.957497 6.957497 10910 +hogman 1 1 6.957497 6.957497 10901 +gameboard 1 1 6.957497 6.957497 10911 +rivlrivl 1 1 6.957497 6.957497 10912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..e15011c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +sinc 1 90 2.397895 2.397895 159 +center 1 88 2.397895 2.397895 158 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +wide 1 84 2.484907 2.484907 185 +onlin 1 75 2.639057 2.639057 223 +write 1 72 2.639057 2.639057 222 +written 1 63 2.772589 2.772589 278 +back 1 60 2.833213 2.833213 297 +publish 1 57 2.890372 2.890372 326 +think 1 57 2.890372 2.890372 314 +much 1 52 2.995732 2.995732 349 +keep 1 44 3.135494 3.135494 409 +press 1 42 3.218876 3.218876 419 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +go 1 33 3.433987 3.433987 529 +mine 1 26 3.688879 3.688879 654 +rather 1 26 3.688879 3.688879 642 +other 1 24 3.761200 3.761200 697 +wonder 1 20 3.951244 3.951244 815 +wrote 1 20 3.951244 3.951244 830 +letter 1 16 4.174387 4.174387 981 +went 1 12 4.465908 4.465908 1279 +newspap 1 12 4.465908 4.465908 1280 +true 1 10 4.653960 4.653960 1422 +rapid 1 10 4.653960 4.653960 1453 +poetri 1 9 4.753590 4.753590 1596 +mile 1 8 4.875197 4.875197 1743 +conflict 1 6 5.164786 5.164786 2041 +famou 1 6 5.164786 5.164786 2185 +grand 1 5 5.347108 5.347108 2425 +mess 1 4 5.568345 5.568345 2886 +flame 1 3 5.857933 5.857933 3696 +arm 1 3 5.857933 5.857933 3697 +argu 1 3 5.857933 5.857933 3698 +dread 1 3 5.857933 5.857933 3630 +anyhow 1 2 6.263398 6.263398 5188 +suspect 1 2 6.263398 6.263398 5187 +mathematician 1 2 6.263398 6.263398 5189 +terrorist 1 2 6.263398 6.263398 5190 +writingsi 1 1 6.957497 6.957497 10913 +proudof 1 1 6.957497 6.957497 10914 +morethought 1 1 6.957497 6.957497 10915 +conscienti 1 1 6.957497 6.957497 10916 +objector 1 1 6.957497 6.957497 10917 +myfirst 1 1 6.957497 6.957497 10918 +fewyear 1 1 6.957497 6.957497 10919 +gulf 1 1 6.957497 6.957497 10920 +vestart 1 1 6.957497 6.957497 10921 +lest 1 1 6.957497 6.957497 10922 +dprobabl 1 1 6.957497 6.957497 10923 +essayist 1 1 6.957497 6.957497 10924 +byron 1 1 6.957497 6.957497 10925 +asuburb 1 1 6.957497 6.957497 10926 +unabomb 1 1 6.957497 6.957497 10927 +mathematiciansar 1 1 6.957497 6.957497 10928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..70a738db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,269 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +search 1 95 2.397895 2.397895 155 +ieee 1 86 2.484907 2.484907 190 +info 1 85 2.484907 2.484907 176 +journal 1 83 2.484907 2.484907 183 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +appear 1 78 2.564949 2.564949 210 +interfac 1 79 2.564949 2.564949 209 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +html 1 75 2.639057 2.639057 235 +david 1 71 2.639057 2.639057 232 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +ithaca 1 65 2.772589 2.772589 294 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +dept 1 64 2.772589 2.772589 291 +plan 1 65 2.772589 2.772589 272 +new 1 64 2.772589 2.772589 262 +street 1 63 2.772589 2.772589 293 +march 1 61 2.833213 2.833213 295 +simpl 1 60 2.833213 2.833213 298 +content 1 59 2.833213 2.833213 302 +direct 1 57 2.890372 2.890372 316 +explor 1 58 2.890372 2.890372 324 +overview 1 56 2.890372 2.890372 323 +index 1 56 2.890372 2.890372 309 +tabl 1 51 2.995732 2.995732 346 +cool 1 49 3.044522 3.044522 374 +pointer 1 48 3.044522 3.044522 368 +life 1 50 3.044522 3.044522 375 +electron 1 47 3.091042 3.091042 379 +mark 1 44 3.135494 3.135494 403 +netscap 1 44 3.135494 3.135494 395 +music 1 42 3.218876 3.218876 436 +map 1 39 3.258097 3.258097 452 +industri 1 38 3.295837 3.295837 464 +open 1 38 3.295837 3.295837 469 +china 1 37 3.332205 3.332205 487 +global 1 34 3.401197 3.401197 520 +common 1 30 3.555348 3.555348 574 +synchron 1 29 3.583519 3.583519 588 +weather 1 28 3.610918 3.610918 618 +bound 1 26 3.688879 3.688879 659 +reliabl 1 25 3.737670 3.737670 674 +sport 1 25 3.737670 3.737670 683 +scalabl 1 24 3.761200 3.761200 705 +other 1 24 3.761200 3.761200 697 +yahoo 1 24 3.761200 3.761200 707 +flexibl 1 21 3.912023 3.912023 792 +navig 1 21 3.912023 3.912023 796 +kernel 1 20 3.951244 3.951244 825 +binari 1 20 3.951244 3.951244 823 +lyco 1 19 4.007333 4.007333 871 +commerci 1 16 4.174387 4.174387 1005 +women 1 16 4.174387 4.174387 1004 +brief 1 16 4.174387 4.174387 1001 +stock 1 16 4.174387 4.174387 1007 +horu 1 14 4.317488 4.317488 1116 +canada 1 13 4.382027 4.382027 1158 +alan 1 13 4.382027 4.382027 1146 +brad 1 12 4.465908 4.465908 1264 +weight 1 12 4.465908 4.465908 1204 +kenneth 1 12 4.465908 4.465908 1265 +reness 1 11 4.553877 4.553877 1333 +transpar 1 11 4.553877 4.553877 1325 +lake 1 11 4.553877 4.553877 1373 +itali 1 11 4.553877 4.553877 1378 +werner 1 10 4.653960 4.653960 1385 +consortium 1 10 4.653960 4.653960 1467 +mosaic 1 10 4.653960 4.653960 1426 +sosp 1 10 4.653960 4.653960 1416 +mountain 1 10 4.653960 4.653960 1456 +cook 1 10 4.653960 4.653960 1464 +jean 1 10 4.653960 4.653960 1440 +cheng 1 10 4.653960 4.653960 1381 +birman 1 9 4.753590 4.753590 1531 +robbert 1 9 4.753590 4.753590 1529 +light 1 9 4.753590 4.753590 1533 +motorola 1 9 4.753590 4.753590 1546 +wall 1 9 4.753590 4.753590 1553 +vogel 1 8 4.875197 4.875197 1622 +sigop 1 8 4.875197 4.875197 1727 +european 1 8 4.875197 4.875197 1763 +heart 1 8 4.875197 4.875197 1729 +grove 1 8 4.875197 4.875197 1675 +wire 1 8 4.875197 4.875197 1747 +edg 1 8 4.875197 4.875197 1647 +synchroni 1 7 5.010635 5.010635 1923 +gatewai 1 7 5.010635 5.010635 1942 +discoveri 1 7 5.010635 5.010635 1915 +hunt 1 7 5.010635 5.010635 1798 +scout 1 7 5.010635 5.010635 1903 +antonio 1 6 5.164786 5.164786 2186 +symposiumon 1 6 5.164786 5.164786 2054 +alex 1 6 5.164786 5.164786 2130 +theproject 1 6 5.164786 5.164786 1981 +postcard 1 6 5.164786 5.164786 2181 +atlant 1 5 5.347108 5.347108 2508 +advic 1 5 5.347108 5.347108 2509 +ireland 1 4 5.568345 5.568345 2853 +hayden 1 4 5.568345 5.568345 2844 +hickei 1 4 5.568345 5.568345 2845 +vaysburd 1 4 5.568345 5.568345 2846 +colorado 1 4 5.568345 5.568345 2938 +ncsa 1 4 5.568345 5.568345 2767 +insur 1 4 5.568345 5.568345 2939 +educornel 1 3 5.857933 5.857933 3601 +universitydept 1 3 5.857933 5.857933 3602 +glade 1 3 5.857933 5.857933 3537 +takako 1 3 5.857933 5.857933 3538 +systemscomput 1 3 5.857933 5.857933 3148 +arizona 1 3 5.857933 5.857933 3700 +beginn 1 3 5.857933 5.857933 3330 +hotjava 1 3 5.857933 5.857933 3220 +ftc 1 3 5.857933 5.857933 3275 +elsevi 1 3 5.857933 5.857933 3671 +copper 1 3 5.857933 5.857933 3536 +summit 1 3 5.857933 5.857933 3684 +counti 1 3 5.857933 5.857933 3682 +fashion 1 3 5.857933 5.857933 3699 +crew 1 3 5.857933 5.857933 3347 +underground 1 3 5.857933 5.857933 3604 +spider 1 3 5.857933 5.857933 3605 +katherin 1 2 6.263398 6.263398 4851 +ofvirtu 1 2 6.263398 6.263398 5061 +lui 1 2 6.263398 6.263398 5164 +dalia 1 2 6.263398 6.263398 4852 +malki 1 2 6.263398 6.263398 4853 +uiuc 1 2 6.263398 6.263398 4509 +cern 1 2 6.263398 6.263398 5079 +icdc 1 2 6.263398 6.263398 5191 +ucsd 1 2 6.263398 6.263398 5192 +amazon 1 2 6.263398 6.263398 5193 +dessert 1 2 6.263398 6.263398 5194 +intertext 1 2 6.263398 6.263398 5002 +infoth 1 2 6.263398 6.263398 5195 +guokguo 1 1 6.957497 6.957497 10934 +multicastprotocol 1 1 6.957497 6.957497 10935 +publicationskatherin 1 1 6.957497 6.957497 10936 +connemara 1 1 6.957497 6.957497 10937 +rodrigu 1 1 6.957497 6.957497 10938 +sargento 1 1 6.957497 6.957497 10939 +paulo 1 1 6.957497 6.957497 10940 +verisimo 1 1 6.957497 6.957497 10941 +niagara 1 1 6.957497 6.957497 10942 +infodistribut 1 1 6.957497 6.957497 10929 +networkscool 1 1 6.957497 6.957497 10943 +toolsbibliographyconferencesjournalsacademia 1 1 6.957497 6.957497 10944 +infocompani 1 1 6.957497 6.957497 10930 +infoschool 1 1 6.957497 6.957497 10945 +infojob 1 1 6.957497 6.957497 10946 +searchinterest 1 1 6.957497 6.957497 10947 +lisboa 1 1 6.957497 6.957497 10931 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 1 1 6.957497 6.957497 10948 +inforesearch 1 1 6.957497 6.957497 10949 +systempointershoru 1 1 6.957497 6.957497 10950 +productspringtotemtransisx 1 1 6.957497 6.957497 10951 +microsystemslab 1 1 6.957497 6.957497 10952 +networksmulticast 1 1 6.957497 6.957497 10953 +protocolsn 1 1 6.957497 6.957497 10954 +fromlblgun 1 1 6.957497 6.957497 10955 +sguid 1 1 6.957497 6.957497 10956 +quickrefer 1 1 6.957497 6.957497 10957 +htmldocument 1 1 6.957497 6.957497 10958 +httpd 1 1 6.957497 6.957497 10932 +finder 1 1 6.957497 6.957497 10959 +xmosaic 1 1 6.957497 6.957497 10933 +bibliographybibliographi 1 1 6.957497 6.957497 10960 +oldindex 1 1 6.957497 6.957497 10961 +hpdc 1 1 6.957497 6.957497 10962 +srd 1 1 6.957497 6.957497 10963 +jsac 1 1 6.957497 6.957497 10964 +scienceacademia 1 1 6.957497 6.957497 10965 +openingsibmdelltandemtiapplebel 1 1 6.957497 6.957497 10966 +gradschool 1 1 6.957497 6.957497 10967 +gradjob 1 1 6.957497 6.957497 10968 +ukinterest 1 1 6.957497 6.957497 10969 +moviesbailei 1 1 6.957497 6.957497 10970 +concertslibrari 1 1 6.957497 6.957497 10971 +hightechin 1 1 6.957497 6.957497 10972 +inesc 1 1 6.957497 6.957497 10973 +resort 1 1 6.957497 6.957497 10974 +coloradooth 1 1 6.957497 6.957497 10975 +infoart 1 1 6.957497 6.957497 10976 +weblouvreth 1 1 6.957497 6.957497 10977 +linebook 1 1 6.957497 6.957497 10978 +calvinhobb 1 1 6.957497 6.957497 10979 +archivecardsmagicchinaart 1 1 6.957497 6.957497 10980 +gourmetl 1 1 6.957497 6.957497 10981 +cordonbleu 1 1 6.957497 6.957497 10982 +fashional 1 1 6.957497 6.957497 10983 +linksa 1 1 6.957497 6.957497 10984 +cjlutz 1 1 6.957497 6.957497 10985 +wwweb 1 1 6.957497 6.957497 10986 +pagewith 1 1 6.957497 6.957497 10987 +tmexpressfirst 1 1 6.957497 6.957497 10988 +wireirc 1 1 6.957497 6.957497 10989 +faqfashion 1 1 6.957497 6.957497 10990 +nethair 1 1 6.957497 6.957497 10991 +diesel 1 1 6.957497 6.957497 10992 +guessfriend 1 1 6.957497 6.957497 10993 +deng 1 1 6.957497 6.957497 10994 +shiji 1 1 6.957497 6.957497 10995 +edulibrari 1 1 6.957497 6.957497 10996 +congressmagazin 1 1 6.957497 6.957497 10997 +timegeorg 1 1 6.957497 6.957497 10998 +gilder 1 1 6.957497 6.957497 10999 +archivesinanet 1 1 6.957497 6.957497 11000 +newsworld 1 1 6.957497 6.957497 11001 +olymp 1 1 6.957497 6.957497 11002 +streetheadlin 1 1 6.957497 6.957497 11003 +weatherhunt 1 1 6.957497 6.957497 11004 +informationglob 1 1 6.957497 6.957497 11005 +navigatorhom 1 1 6.957497 6.957497 11006 +wanderersand 1 1 6.957497 6.957497 11007 +kguo 1 1 6.957497 6.957497 11008 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..96d3c92a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +object 1 138 1.945910 1.945910 79 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +user 1 104 2.302585 2.302585 137 +graphic 1 90 2.397895 2.397895 147 +learn 1 86 2.484907 2.484907 170 +environ 1 84 2.484907 2.484907 177 +chang 1 82 2.484907 2.484907 163 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +java 1 70 2.708050 2.708050 248 +written 1 63 2.772589 2.772589 278 +creat 1 63 2.772589 2.772589 277 +interact 1 62 2.772589 2.772589 270 +simpl 1 60 2.833213 2.833213 298 +understand 1 47 3.091042 3.091042 384 +form 1 39 3.258097 3.258097 443 +multi 1 36 3.367296 3.367296 493 +power 1 30 3.555348 3.555348 573 +platform 1 29 3.583519 3.583519 591 +input 1 23 3.806662 3.806662 727 +portabl 1 20 3.951244 3.951244 819 +applet 1 20 3.951244 3.951244 827 +safe 1 12 4.465908 4.465908 1274 +polygon 1 8 4.875197 4.875197 1723 +rotat 1 5 5.347108 5.347108 2295 +vertic 1 5 5.347108 5.347108 2270 +cube 1 4 5.568345 5.568345 2940 +introductionthi 1 2 6.263398 6.263398 4056 +tetra 1 2 6.263398 6.263398 5196 +wirefram 1 1 6.957497 6.957497 11009 +desgin 1 1 6.957497 6.957497 11010 +threader 1 1 6.957497 6.957497 11011 +speific 1 1 6.957497 6.957497 11012 +react 1 1 6.957497 6.957497 11013 +cone 1 1 6.957497 6.957497 11014 +cylind 1 1 6.957497 6.957497 11015 +toru 1 1 6.957497 6.957497 11016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..f1fe464d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +academ 1 82 2.484907 2.484907 178 +ieee 1 86 2.484907 2.484907 190 +resourc 1 81 2.484907 2.484907 172 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +ithaca 1 65 2.772589 2.772589 294 +visit 1 63 2.772589 2.772589 288 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +content 1 59 2.833213 2.833213 302 +juli 1 60 2.833213 2.833213 305 +thesi 1 57 2.890372 2.890372 327 +point 1 58 2.890372 2.890372 319 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +talk 1 53 2.944439 2.944439 336 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +set 1 50 3.044522 3.044522 361 +protocol 1 45 3.135494 3.135494 407 +math 1 44 3.135494 3.135494 402 +combin 1 42 3.218876 3.218876 421 +error 1 40 3.258097 3.258097 449 +connect 1 37 3.332205 3.332205 485 +short 1 36 3.367296 3.367296 499 +robot 1 36 3.367296 3.367296 497 +approxim 1 35 3.401197 3.401197 509 +singl 1 34 3.401197 3.401197 510 +bibliographi 1 34 3.401197 3.401197 518 +posit 1 31 3.496508 3.496508 552 +graph 1 30 3.555348 3.555348 576 +computersci 1 30 3.555348 3.555348 562 +secur 1 30 3.555348 3.555348 577 +proc 1 26 3.688879 3.688879 649 +bound 1 26 3.688879 3.688879 659 +compar 1 26 3.688879 3.688879 648 +berkelei 1 26 3.688879 3.688879 657 +jeff 1 25 3.737670 3.737670 673 +flow 1 24 3.761200 3.761200 700 +motion 1 24 3.761200 3.761200 699 +yahoo 1 24 3.761200 3.761200 707 +universityithaca 1 24 3.761200 3.761200 710 +emphasi 1 22 3.850148 3.850148 755 +geometri 1 22 3.850148 3.850148 752 +rout 1 21 3.912023 3.912023 793 +path 1 21 3.912023 3.912023 778 +vlsi 1 21 3.912023 3.912023 795 +siam 1 21 3.912023 3.912023 800 +tenni 1 20 3.951244 3.951244 838 +geometr 1 19 4.007333 4.007333 852 +definit 1 19 4.007333 4.007333 864 +spend 1 19 4.007333 4.007333 850 +andrew 1 19 4.007333 4.007333 849 +hypertext 1 19 4.007333 4.007333 865 +lower 1 18 4.060443 4.060443 886 +dimension 1 18 4.060443 4.060443 909 +analyz 1 17 4.110874 4.110874 925 +expand 1 17 4.110874 4.110874 928 +segment 1 17 4.110874 4.110874 931 +stanford 1 17 4.110874 4.110874 955 +latenc 1 16 4.174387 4.174387 993 +letter 1 16 4.174387 4.174387 981 +biologi 1 15 4.248495 4.248495 1049 +princeton 1 15 4.248495 4.248495 1042 +embed 1 14 4.317488 4.317488 1102 +discret 1 13 4.382027 4.382027 1165 +resolut 1 13 4.382027 4.382027 1172 +safe 1 12 4.465908 4.465908 1274 +probabilist 1 11 4.553877 4.553877 1343 +node 1 11 4.553877 4.553877 1326 +mesh 1 11 4.553877 4.553877 1351 +israel 1 11 4.553877 4.553877 1366 +arbitrari 1 11 4.553877 4.553877 1359 +excit 1 11 4.553877 4.553877 1329 +queue 1 10 4.653960 4.653960 1386 +assumpt 1 9 4.753590 4.753590 1514 +minimum 1 9 4.753590 4.753590 1555 +distanc 1 9 4.753590 4.753590 1500 +yellow 1 9 4.753590 4.753590 1601 +cryptographi 1 9 4.753590 4.753590 1512 +combinatori 1 8 4.875197 4.875197 1629 +hallcornel 1 8 4.875197 4.875197 1757 +molecular 1 7 5.010635 5.010635 1887 +trade 1 7 5.010635 5.010635 1815 +foc 1 7 5.010635 5.010635 1880 +rubinfeld 1 6 5.164786 5.164786 1998 +dens 1 6 5.164786 5.164786 2122 +layout 1 6 5.164786 5.164786 2183 +relax 1 6 5.164786 5.164786 2120 +consensu 1 6 5.164786 5.164786 2080 +reconstruct 1 6 5.164786 5.164786 2170 +huttenloch 1 6 5.164786 5.164786 1983 +plane 1 6 5.164786 5.164786 2187 +symposiumon 1 6 5.164786 5.164786 2054 +infoseek 1 6 5.164786 5.164786 2188 +soda 1 6 5.164786 5.164786 2189 +corp 1 6 5.164786 5.164786 2139 +semi 1 5 5.347108 5.347108 2510 +almaden 1 5 5.347108 5.347108 2511 +stabil 1 5 5.347108 5.347108 2286 +diagram 1 5 5.347108 5.347108 2346 +stoc 1 5 5.347108 5.347108 2491 +chess 1 5 5.347108 5.347108 2486 +conform 1 4 5.568345 5.568345 2941 +disjoint 1 4 5.568345 5.568345 2709 +ratio 1 4 5.568345 5.568345 2942 +hausdorff 1 4 5.568345 5.568345 2633 +glimps 1 4 5.568345 5.568345 2778 +planar 1 3 5.857933 5.857933 3647 +formobil 1 3 5.857933 5.857933 3261 +fernandez 1 3 5.857933 5.857933 3591 +deliveri 1 3 5.857933 5.857933 3278 +onprincipl 1 3 5.857933 5.857933 3701 +berger 1 3 5.857933 5.857933 3702 +universitycomput 1 3 5.857933 5.857933 3651 +ncstrl 1 3 5.857933 5.857933 3530 +jone 1 3 5.857933 5.857933 3703 +rivest 1 3 5.857933 5.857933 3248 +kleinberg 1 2 6.263398 6.263398 5093 +adversari 1 2 6.263398 6.263398 5065 +tardo 1 2 6.263398 6.263398 5090 +diamet 1 2 6.263398 6.263398 5102 +williamson 1 2 6.263398 6.263398 5101 +goeman 1 2 6.263398 6.263398 5100 +lovasz 1 2 6.263398 6.263398 5091 +leighton 1 2 6.263398 6.263398 5097 +greedi 1 2 6.263398 6.263398 4143 +attiya 1 2 6.263398 6.263398 5197 +voronoi 1 2 6.263398 6.263398 5036 +euclidean 1 2 6.263398 6.263398 5198 +sdsc 1 2 6.263398 6.263398 5199 +kleinber 1 1 6.957497 6.957497 11017 +anddisjoint 1 1 6.957497 6.957497 11019 +stabilityof 1 1 6.957497 6.957497 11020 +particularlyth 1 1 6.957497 6.957497 11021 +seeselect 1 1 6.957497 6.957497 11022 +publicationsmiscellan 1 1 6.957497 6.957497 11023 +linkspapersapproxim 1 1 6.957497 6.957497 11024 +unsplitt 1 1 6.957497 6.957497 11025 +disjointpath 1 1 6.957497 6.957497 11018 +aggarw 1 1 6.957497 6.957497 11026 +improvedapproxim 1 1 6.957497 6.957497 11027 +thetafunct 1 1 6.957497 6.957497 11028 +vertex 1 1 6.957497 6.957497 11029 +simplepolygon 1 1 6.957497 6.957497 11030 +serverbalanc 1 1 6.957497 6.957497 11031 +yaniv 1 1 6.957497 6.957497 11032 +serveralgorithm 1 1 6.957497 6.957497 11033 +robotnavig 1 1 6.957497 6.957497 11034 +awerbuch 1 1 6.957497 6.957497 11035 +borodin 1 1 6.957497 6.957497 11036 +raghavan 1 1 6.957497 6.957497 11037 +sudan 1 1 6.957497 6.957497 11038 +lynch 1 1 6.957497 6.957497 11039 +offsbetween 1 1 6.957497 6.957497 11040 +quiesc 1 1 6.957497 6.957497 11041 +managementprotocol 1 1 6.957497 6.957497 11042 +mullainathan 1 1 6.957497 6.957497 11043 +boundsand 1 1 6.957497 6.957497 11044 +athre 1 1 6.957497 6.957497 11045 +kedem 1 1 6.957497 6.957497 11046 +pointset 1 1 6.957497 6.957497 11047 +invariantsof 1 1 6.957497 6.957497 11048 +linkssearch 1 1 6.957497 6.957497 11049 +bibliographiesaltavista 1 1 6.957497 6.957497 11050 +nynex 1 1 6.957497 6.957497 11051 +sitescornel 1 1 6.957497 6.957497 11052 +computingtc 1 1 6.957497 6.957497 11053 +crescenzi 1 1 6.957497 6.957497 11054 +kann 1 1 6.957497 6.957497 11055 +compendium 1 1 6.957497 6.957497 11056 +biologycomput 1 1 6.957497 6.957497 11057 +carb 1 1 6.957497 6.957497 11058 +biocomput 1 1 6.957497 6.957497 11059 +geometrydavid 1 1 6.957497 6.957497 11060 +eppstein 1 1 6.957497 6.957497 11061 +junkyard 1 1 6.957497 6.957497 11062 +erickson 1 1 6.957497 6.957497 11063 +securitymitr 1 1 6.957497 6.957497 11064 +miscellaneousnetscap 1 1 6.957497 6.957497 11065 +intellicast 1 1 6.957497 6.957497 11066 +kleinbergdepart 1 1 6.957497 6.957497 11067 +scienceupson 1 1 6.957497 6.957497 11068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..c59eed8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +thing 1 84 2.484907 2.484907 189 +master 1 76 2.564949 2.564949 216 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +html 1 75 2.639057 2.639057 235 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +back 1 60 2.833213 2.833213 297 +march 1 61 2.833213 2.833213 295 +think 1 57 2.890372 2.890372 314 +februari 1 54 2.944439 2.944439 328 +move 1 47 3.091042 3.091042 382 +music 1 42 3.218876 3.218876 436 +electr 1 38 3.295837 3.295837 461 +return 1 34 3.401197 3.401197 502 +corpor 1 21 3.912023 3.912023 802 +worth 1 11 4.553877 4.553877 1294 +forc 1 10 4.653960 4.653960 1384 +japan 1 8 4.875197 4.875197 1762 +cornellunivers 1 7 5.010635 5.010635 1916 +superhighwai 1 4 5.568345 5.568345 2943 +sell 1 4 5.568345 5.568345 2935 +tokyo 1 3 5.857933 5.857933 3622 +acquaint 1 3 5.857933 5.857933 3468 +sale 1 3 5.857933 5.857933 3688 +melco 1 2 6.263398 6.263398 5200 +advert 1 2 6.263398 6.263398 5201 +kazushi 1 1 6.957497 6.957497 11069 +otakota 1 1 6.957497 6.957497 11070 +edukazushi 1 1 6.957497 6.957497 11071 +mitusbishi 1 1 6.957497 6.957497 11072 +isund 1 1 6.957497 6.957497 11073 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..0d2b55c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +technic 1 100 2.302585 2.302585 140 +pictur 1 89 2.397895 2.397895 160 +logic 1 71 2.639057 2.639057 230 +complex 1 64 2.772589 2.772589 269 +type 1 61 2.833213 2.833213 296 +faculti 1 56 2.890372 2.890372 325 +algebra 1 45 3.135494 3.135494 394 +york 1 41 3.218876 3.218876 435 +constraint 1 26 3.688879 3.688879 636 +interpret 1 24 3.761200 3.761200 686 +universityithaca 1 24 3.761200 3.761200 710 +decis 1 23 3.806662 3.806662 728 +automata 1 13 4.382027 4.382027 1135 +interestsmi 1 10 4.653960 4.653960 1462 +hallcornel 1 8 4.875197 4.875197 1757 +newton 1 7 5.010635 5.010635 1824 +infer 1 6 5.164786 5.164786 2040 +dexter 1 4 5.568345 5.568345 2855 +andsemant 1 3 5.857933 5.857933 3246 +kozendext 1 1 6.957497 6.957497 11074 +kozenjoseph 1 1 6.957497 6.957497 11075 +engineeringphd 1 1 6.957497 6.957497 11076 +especiallycomplex 1 1 6.957497 6.957497 11077 +onlinekleen 1 1 6.957497 6.957497 11078 +algebraautomata 1 1 6.957497 6.957497 11079 +logicbibliographylist 1 1 6.957497 6.957497 11080 +reportscours 1 1 6.957497 6.957497 11081 +notesc 1 1 6.957497 6.957497 11082 +programsc 1 1 6.957497 6.957497 11083 +theoryfun 1 1 6.957497 6.957497 11084 +stufffamili 1 1 6.957497 6.957497 11085 +rugbi 1 1 6.957497 6.957497 11086 +effectcomput 1 1 6.957497 6.957497 11087 +departmentupson 1 1 6.957497 6.957497 11088 +usakozen 1 1 6.957497 6.957497 11089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..1de710db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +novemb 1 81 2.484907 2.484907 179 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +autom 1 41 3.218876 3.218876 434 +soon 1 36 3.367296 3.367296 494 +synthesi 1 20 3.951244 3.951244 834 +deduct 1 12 4.465908 4.465908 1236 +german 1 6 5.164786 5.164786 2190 +christoph 1 5 5.347108 5.347108 2512 +kreitz 1 1 6.957497 6.957497 11090 +lehr 1 1 6.957497 6.957497 11091 +lernen 1 1 6.957497 6.957497 11092 +vorlesungsskript 1 1 6.957497 6.957497 11093 +medienunterst 1 1 6.957497 6.957497 11094 +uumltzt 1 1 6.957497 6.957497 11095 +lehren 1 1 6.957497 6.957497 11096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..fc257a1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +place 1 106 2.197225 2.197225 124 +master 1 76 2.564949 2.564949 216 +multimedia 1 68 2.708050 2.708050 258 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +enjoi 1 26 3.688879 3.688879 660 +daili 1 24 3.761200 3.761200 706 +taiwan 1 16 4.174387 4.174387 1006 +countri 1 15 4.248495 4.248495 1059 +grove 1 8 4.875197 4.875197 1675 +newton 1 7 5.010635 5.010635 1824 +isi 1 5 5.347108 5.347108 2443 +heng 1 2 6.263398 6.263398 5202 +kuen 1 1 6.957497 6.957497 11097 +myproject 1 1 6.957497 6.957497 11098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..8f43ca00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +site 1 106 2.197225 2.197225 119 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +commun 1 95 2.397895 2.397895 157 +pictur 1 89 2.397895 2.397895 160 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +build 1 85 2.484907 2.484907 184 +member 1 84 2.484907 2.484907 165 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +server 1 76 2.564949 2.564949 204 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +know 1 80 2.564949 2.564949 198 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +meet 1 72 2.639057 2.639057 229 +publish 1 57 2.890372 2.890372 326 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +life 1 50 3.044522 3.044522 375 +give 1 50 3.044522 3.044522 359 +protocol 1 45 3.135494 3.135494 407 +fast 1 42 3.218876 3.218876 429 +author 1 39 3.258097 3.258097 450 +littl 1 39 3.258097 3.258097 454 +open 1 38 3.295837 3.295837 469 +mean 1 37 3.332205 3.332205 477 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +chapter 1 32 3.465736 3.465736 536 +collabor 1 32 3.465736 3.465736 543 +extend 1 32 3.465736 3.465736 539 +posit 1 31 3.496508 3.496508 552 +secur 1 30 3.555348 3.555348 577 +framework 1 28 3.610918 3.610918 606 +releas 1 28 3.610918 3.610918 616 +hope 1 28 3.610918 3.610918 610 +enabl 1 26 3.688879 3.688879 655 +rule 1 26 3.688879 3.688879 638 +challeng 1 26 3.688879 3.688879 653 +primari 1 25 3.737670 3.737670 669 +never 1 25 3.737670 3.737670 671 +universityithaca 1 24 3.761200 3.761200 710 +store 1 24 3.761200 3.761200 693 +magazin 1 24 3.761200 3.761200 704 +sometim 1 24 3.761200 3.761200 696 +lead 1 23 3.806662 3.806662 718 +mobil 1 23 3.806662 3.806662 730 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +defin 1 22 3.850148 3.850148 746 +corpor 1 21 3.912023 3.912023 802 +fund 1 21 3.912023 3.912023 805 +tell 1 21 3.912023 3.912023 777 +spend 1 19 4.007333 4.007333 850 +ever 1 19 4.007333 4.007333 872 +beauti 1 18 4.060443 4.060443 912 +repositori 1 17 4.110874 4.110874 932 +carl 1 15 4.248495 4.248495 1024 +charact 1 15 4.248495 4.248495 1028 +edui 1 13 4.382027 4.382027 1193 +joint 1 13 4.382027 4.382027 1130 +infrastructur 1 12 4.465908 4.465908 1234 +road 1 11 4.553877 4.553877 1374 +consortium 1 10 4.653960 4.653960 1467 +bike 1 10 4.653960 4.653960 1468 +leader 1 9 4.753590 4.753590 1576 +desir 1 9 4.753590 4.753590 1542 +poor 1 8 4.875197 4.875197 1736 +pagei 1 8 4.875197 4.875197 1683 +davi 1 7 5.010635 5.010635 1888 +daughter 1 7 5.010635 5.010635 1943 +drop 1 6 5.164786 5.164786 2008 +trail 1 6 5.164786 5.164786 2071 +departmentat 1 5 5.347108 5.347108 2513 +substitut 1 5 5.347108 5.347108 2247 +constant 1 5 5.347108 5.347108 2251 +outdoor 1 5 5.347108 5.347108 2514 +interfer 1 5 5.347108 5.347108 2494 +darpa 1 4 5.568345 5.568345 2944 +metadata 1 4 5.568345 5.568345 2945 +breath 1 4 5.568345 5.568345 2946 +ncstrl 1 3 5.857933 5.857933 3530 +worldwid 1 3 5.857933 5.857933 3704 +dienst 1 3 5.857933 5.857933 3640 +luci 1 3 5.857933 5.857933 3705 +fresh 1 3 5.857933 5.857933 3706 +lagoz 1 2 6.263398 6.263398 5081 +protocolsfor 1 2 6.263398 6.263398 5204 +interoper 1 2 6.263398 6.263398 4838 +developeda 1 2 6.263398 6.263398 5205 +interfacesand 1 2 6.263398 6.263398 5206 +quiet 1 2 6.263398 6.263398 5203 +cano 1 2 6.263398 6.263398 5207 +joi 1 2 6.263398 6.263398 5208 +fight 1 2 6.263398 6.263398 5209 +groupin 1 1 6.957497 6.957497 11099 +ourgroup 1 1 6.957497 6.957497 11100 +adistribut 1 1 6.957497 6.957497 11101 +collaborateson 1 1 6.957497 6.957497 11102 +thedienstsoftwar 1 1 6.957497 6.957497 11103 +providesdistribut 1 1 6.957497 6.957497 11104 +initiativesto 1 1 6.957497 6.957497 11105 +iso 1 1 6.957497 6.957497 11106 +dlib 1 1 6.957497 6.957497 11107 +dlibwork 1 1 6.957497 6.957497 11108 +iiin 1 1 6.957497 6.957497 11109 +warwick 1 1 6.957497 6.957497 11110 +amveri 1 1 6.957497 6.957497 11111 +distributedobject 1 1 6.957497 6.957497 11112 +paperfor 1 1 6.957497 6.957497 11113 +codeworkshop 1 1 6.957497 6.957497 11114 +meetm 1 1 6.957497 6.957497 11115 +moreabout 1 1 6.957497 6.957497 11116 +outsideof 1 1 6.957497 6.957497 11117 +toddler 1 1 6.957497 6.957497 11118 +lucyg 1 1 6.957497 6.957497 11119 +avid 1 1 6.957497 6.957497 11120 +movingwat 1 1 6.957497 6.957497 11121 +lakeand 1 1 6.957497 6.957497 11122 +itch 1 1 6.957497 6.957497 11123 +ridingalong 1 1 6.957497 6.957497 11124 +backwood 1 1 6.957497 6.957497 11125 +sparehour 1 1 6.957497 6.957497 11126 +shoe 1 1 6.957497 6.957497 11127 +deeplyth 1 1 6.957497 6.957497 11128 +physicalnor 1 1 6.957497 6.957497 11129 +itspreserv 1 1 6.957497 6.957497 11130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..6fb1a685 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +center 1 88 2.397895 2.397895 158 +level 1 87 2.484907 2.484907 180 +internet 1 83 2.484907 2.484907 186 +school 1 84 2.484907 2.484907 188 +materi 1 75 2.639057 2.639057 221 +servic 1 72 2.639057 2.639057 236 +onlin 1 75 2.639057 2.639057 223 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +guid 1 63 2.772589 2.772589 267 +colleg 1 61 2.833213 2.833213 300 +back 1 60 2.833213 2.833213 297 +local 1 55 2.944439 2.944439 334 +standard 1 48 3.044522 3.044522 365 +friend 1 48 3.044522 3.044522 376 +basic 1 50 3.044522 3.044522 360 +author 1 39 3.258097 3.258097 450 +tutori 1 39 3.258097 3.258097 437 +open 1 38 3.295837 3.295837 469 +robot 1 36 3.367296 3.367296 497 +concept 1 32 3.465736 3.465736 537 +secur 1 30 3.555348 3.555348 577 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +grad 1 20 3.951244 3.951244 837 +ultim 1 17 4.110874 4.110874 943 +cambridg 1 16 4.174387 4.174387 1008 +massiv 1 15 4.248495 4.248495 1026 +rank 1 14 4.317488 4.317488 1063 +opportun 1 13 4.382027 4.382027 1161 +safe 1 12 4.465908 4.465908 1274 +career 1 12 4.465908 4.465908 1287 +classmat 1 9 4.753590 4.753590 1516 +yellow 1 9 4.753590 4.753590 1601 +sigop 1 8 4.875197 4.875197 1727 +soccer 1 8 4.875197 4.875197 1752 +zhou 1 6 5.164786 5.164786 2092 +legal 1 6 5.164786 5.164786 2094 +authent 1 5 5.347108 5.347108 2306 +exclus 1 4 5.568345 5.568345 2947 +insur 1 4 5.568345 5.568345 2939 +surviv 1 4 5.568345 5.568345 2734 +legion 1 3 5.857933 5.857933 3708 +fudan 1 3 5.857933 5.857933 3707 +automobil 1 3 5.857933 5.857933 3709 +resours 1 2 6.263398 6.263398 5211 +sunris 1 2 6.263398 6.263398 5212 +edmund 1 2 6.263398 6.263398 5213 +buyer 1 2 6.263398 6.263398 5210 +succe 1 2 6.263398 6.263398 5214 +lidong 1 1 6.957497 6.957497 11131 +oasi 1 1 6.957497 6.957497 11133 +adag 1 1 6.957497 6.957497 11134 +sirac 1 1 6.957497 6.957497 11135 +kerbero 1 1 6.957497 6.957497 11136 +ocaml 1 1 6.957497 6.957497 11137 +jobtrak 1 1 6.957497 6.957497 11138 +hunter 1 1 6.957497 6.957497 11139 +careermosa 1 1 6.957497 6.957497 11140 +jobweb 1 1 6.957497 6.957497 11141 +xjob 1 1 6.957497 6.957497 11142 +yingjun 1 1 6.957497 6.957497 11143 +isso 1 1 6.957497 6.957497 11144 +autosit 1 1 6.957497 6.957497 11145 +auto 1 1 6.957497 6.957497 11132 +agenc 1 1 6.957497 6.957497 11146 +indexlast 1 1 6.957497 6.957497 11147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..f3cbcfd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +final 1 116 2.197225 2.197225 108 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +look 1 107 2.197225 2.197225 115 +book 1 99 2.302585 2.302585 131 +take 1 97 2.302585 2.302585 134 +octob 1 89 2.397895 2.397895 156 +info 1 85 2.484907 2.484907 176 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +advisor 1 51 2.995732 2.995732 355 +still 1 50 3.044522 3.044522 362 +visual 1 48 3.044522 3.044522 372 +cool 1 49 3.044522 3.044522 374 +move 1 47 3.091042 3.091042 382 +video 1 44 3.135494 3.135494 405 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +york 1 41 3.218876 3.218876 435 +small 1 39 3.258097 3.258097 447 +annual 1 40 3.258097 3.258097 458 +movi 1 40 3.258097 3.258097 459 +connect 1 37 3.332205 3.332205 485 +chapter 1 32 3.465736 3.465736 536 +anim 1 31 3.496508 3.496508 557 +quot 1 29 3.583519 3.583519 582 +chines 1 29 3.583519 3.583519 595 +releas 1 28 3.610918 3.610918 616 +linux 1 27 3.637586 3.637586 631 +berkelei 1 26 3.688879 3.688879 657 +wang 1 21 3.912023 3.912023 790 +born 1 21 3.912023 3.912023 798 +annot 1 21 3.912023 3.912023 775 +wind 1 18 4.060443 4.060443 908 +edulast 1 17 4.110874 4.110874 927 +taiwan 1 16 4.174387 4.174387 1006 +practicum 1 16 4.174387 4.174387 960 +track 1 15 4.248495 4.248495 1029 +scene 1 14 4.317488 4.317488 1114 +classic 1 14 4.317488 4.317488 1084 +hong 1 14 4.317488 4.317488 1105 +galleri 1 13 4.382027 4.382027 1192 +remov 1 12 4.465908 4.465908 1225 +scienceat 1 11 4.553877 4.553877 1375 +moment 1 11 4.553877 4.553877 1379 +alpha 1 11 4.553877 4.553877 1348 +earth 1 10 4.653960 4.653960 1463 +kong 1 9 4.753590 4.753590 1602 +jump 1 9 4.753590 4.753590 1603 +coast 1 8 4.875197 4.875197 1746 +edg 1 8 4.875197 4.875197 1647 +bridg 1 8 4.875197 4.875197 1764 +ramin 1 7 5.010635 5.010635 1820 +bookstor 1 7 5.010635 5.010635 1837 +vallei 1 7 5.010635 5.010635 1959 +southern 1 6 5.164786 5.164786 2191 +ohio 1 5 5.347108 5.347108 2447 +carlo 1 5 5.347108 5.347108 2515 +sinanet 1 4 5.568345 5.568345 2883 +swartz 1 4 5.568345 5.568345 2878 +sell 1 4 5.568345 5.568345 2935 +japanes 1 4 5.568345 5.568345 2934 +classesc 1 3 5.857933 5.857933 3681 +visionc 1 3 5.857933 5.857933 3489 +audit 1 3 5.857933 5.857933 3391 +headlin 1 3 5.857933 5.857933 3710 +hongkong 1 3 5.857933 5.857933 3677 +hero 1 3 5.857933 5.857933 3711 +ming 1 3 5.857933 5.857933 3712 +villag 1 2 6.263398 6.263398 5215 +computingc 1 2 6.263398 6.263398 5216 +linksfor 1 2 6.263398 6.263398 5185 +castl 1 2 6.263398 6.263398 5217 +nausicaa 1 2 6.263398 6.263398 5218 +galact 1 2 6.263398 6.263398 5219 +hsian 1 1 6.957497 6.957497 11148 +wangthi 1 1 6.957497 6.957497 11151 +constructionlin 1 1 6.957497 6.957497 11152 +fangliao 1 1 6.957497 6.957497 11153 +orwel 1 1 6.957497 6.957497 11149 +videoe 1 1 6.957497 6.957497 11154 +networkse 1 1 6.957497 6.957497 11155 +amidonc 1 1 6.957497 6.957497 11156 +transcrib 1 1 6.957497 6.957497 11157 +zabihspr 1 1 6.957497 6.957497 11158 +processingc 1 1 6.957497 6.957497 11159 +managementc 1 1 6.957497 6.957497 11160 +colloquimc 1 1 6.957497 6.957497 11161 +webspac 1 1 6.957497 6.957497 11162 +stuffscornel 1 1 6.957497 6.957497 11163 +reportiee 1 1 6.957497 6.957497 11164 +societytaiwan 1 1 6.957497 6.957497 11165 +comth 1 1 6.957497 6.957497 11166 +musicmovi 1 1 6.957497 6.957497 11167 +movieweb 1 1 6.957497 6.957497 11168 +moviemania 1 1 6.957497 6.957497 11169 +picturesth 1 1 6.957497 6.957497 11170 +linkstcl 1 1 6.957497 6.957497 11171 +hacksth 1 1 6.957497 6.957497 11172 +pagemiscellan 1 1 6.957497 6.957497 11173 +cja 1 1 6.957497 6.957497 11174 +calanimag 1 1 6.957497 6.957497 11175 +totoro 1 1 6.957497 6.957497 11150 +pagelaputa 1 1 6.957497 6.957497 11176 +conan 1 1 6.957497 6.957497 11177 +slump 1 1 6.957497 6.957497 11178 +kiki 1 1 6.957497 6.957497 11179 +legend 1 1 6.957497 6.957497 11180 +pagecampu 1 1 6.957497 6.957497 11181 +uptown 1 1 6.957497 6.957497 11182 +eithaca 1 1 6.957497 6.957497 11183 +linhsian 1 1 6.957497 6.957497 11184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..2236524f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +thing 1 84 2.484907 2.484907 189 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +collect 1 65 2.772589 2.772589 268 +septemb 1 65 2.772589 2.772589 274 +digit 1 52 2.995732 2.995732 348 +video 1 44 3.135494 3.135494 405 +show 1 43 3.178054 3.178054 417 +http 1 41 3.218876 3.218876 420 +littl 1 39 3.258097 3.258097 454 +download 1 36 3.367296 3.367296 489 +thought 1 17 4.110874 4.110874 945 +sept 1 17 4.110874 4.110874 952 +pagewelcom 1 11 4.553877 4.553877 1344 +song 1 11 4.553877 4.553877 1380 +theme 1 8 4.875197 4.875197 1707 +counter 1 8 4.875197 4.875197 1765 +clip 1 7 5.010635 5.010635 1868 +courtesi 1 7 5.010635 5.010635 1953 +essai 1 4 5.568345 5.568345 2948 +libbi 1 1 6.957497 6.957497 11185 +lista 1 1 6.957497 6.957497 11186 +projectemail 1 1 6.957497 6.957497 11187 +mehit 1 1 6.957497 6.957497 11188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..c63433bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +take 1 97 2.302585 2.302585 134 +name 1 72 2.639057 2.639057 220 +upson 1 71 2.639057 2.639057 218 +lili 1 5 5.347108 5.347108 2240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..ac67db23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +analysi 1 124 2.079442 2.079442 98 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +center 1 88 2.397895 2.397895 158 +method 1 80 2.564949 2.564949 213 +appli 1 71 2.639057 2.639057 226 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +scientif 1 53 2.944439 2.944439 341 +much 1 52 2.995732 2.995732 349 +appoint 1 49 3.044522 3.044522 358 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +textbook 1 44 3.135494 3.135494 397 +mechan 1 43 3.178054 3.178054 416 +linear 1 41 3.218876 3.218876 431 +map 1 39 3.258097 3.258097 452 +multipl 1 39 3.258097 3.258097 453 +field 1 37 3.332205 3.332205 482 +mean 1 37 3.332205 3.332205 477 +approxim 1 35 3.401197 3.401197 509 +bibliographi 1 34 3.401197 3.401197 518 +jeff 1 25 3.737670 3.737670 673 +siam 1 21 3.912023 3.912023 800 +walter 1 17 4.110874 4.110874 950 +normal 1 16 4.174387 4.174387 995 +matlab 1 14 4.317488 4.317488 1081 +affili 1 13 4.382027 4.382027 1194 +whose 1 13 4.382027 4.382027 1166 +nick 1 13 4.382027 4.382027 1180 +iter 1 12 4.465908 4.465908 1206 +peter 1 11 4.553877 4.553877 1316 +thecomput 1 10 4.653960 4.653960 1408 +matric 1 10 4.653960 4.653960 1399 +lloyd 1 6 5.164786 5.164786 2103 +edumi 1 6 5.164786 5.164786 2132 +fluid 1 5 5.347108 5.347108 2440 +thecornel 1 4 5.568345 5.568345 2892 +conform 1 4 5.568345 5.568345 2941 +hasbeen 1 4 5.568345 5.568345 2661 +trefethen 1 3 5.857933 5.857933 3528 +eigenvector 1 3 5.857933 5.857933 3365 +vicki 1 3 5.857933 5.857933 3187 +reddi 1 3 5.857933 5.857933 3277 +havea 1 2 6.263398 6.263398 4434 +spectral 1 2 6.263398 6.263398 4837 +papersoth 1 2 6.263398 6.263398 5049 +loui 1 2 6.263398 6.263398 5220 +trefethenprofessorlnt 1 1 6.957497 6.957497 11189 +thecent 1 1 6.957497 6.957497 11190 +numericalsolut 1 1 6.957497 6.957497 11191 +notorthogon 1 1 6.957497 6.957497 11192 +textbooksfinit 1 1 6.957497 6.957497 11193 +papersmultimatlab 1 1 6.957497 6.957497 11194 +processorsmatrix 1 1 6.957497 6.957497 11195 +gap 1 1 6.957497 6.957497 11196 +betweenpotenti 1 1 6.957497 6.957497 11197 +convergencepseudospectra 1 1 6.957497 6.957497 11198 +operatorssom 1 1 6.957497 6.957497 11199 +itemsclass 1 1 6.957497 6.957497 11200 +analysiscurriculum 1 1 6.957497 6.957497 11201 +vitaepseudospectra 1 1 6.957497 6.957497 11202 +alfeldcurr 1 1 6.957497 6.957497 11203 +howlegubjrn 1 1 6.957497 6.957497 11204 +jnsson 1 1 6.957497 6.957497 11205 +yohan 1 1 6.957497 6.957497 11206 +kimdivakar 1 1 6.957497 6.957497 11207 +viswanathprevi 1 1 6.957497 6.957497 11208 +baggetttobi 1 1 6.957497 6.957497 11209 +driscollalan 1 1 6.957497 6.957497 11210 +edelman 1 1 6.957497 6.957497 11211 +howel 1 1 6.957497 6.957497 11212 +mascarenhasnoel 1 1 6.957497 6.957497 11213 +nachtigalsatish 1 1 6.957497 6.957497 11214 +chuan 1 1 6.957497 6.957497 11215 +tohsom 1 1 6.957497 6.957497 11216 +colleaguesjim 1 1 6.957497 6.957497 11217 +demmelann 1 1 6.957497 6.957497 11218 +greenbaummartin 1 1 6.957497 6.957497 11219 +gutknechtd 1 1 6.957497 6.957497 11220 +highamann 1 1 6.957497 6.957497 11221 +trefethenandr 1 1 6.957497 6.957497 11222 +weideman 1 1 6.957497 6.957497 11223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..354c2af7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +welcom 1 122 2.079442 2.079442 99 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +person 1 111 2.197225 2.197225 117 +search 1 95 2.397895 2.397895 155 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +librari 1 87 2.484907 2.484907 181 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +archiv 1 49 3.044522 3.044522 364 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +music 1 42 3.218876 3.218876 436 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +china 1 37 3.332205 3.332205 487 +connect 1 37 3.332205 3.332205 485 +product 1 33 3.433987 3.433987 527 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +programminglanguag 1 21 3.912023 3.912023 782 +busi 1 21 3.912023 3.912023 784 +hobbi 1 16 4.174387 4.174387 1009 +stock 1 16 4.174387 4.174387 1007 +practicum 1 16 4.174387 4.174387 960 +photograph 1 15 4.248495 4.248495 1056 +novel 1 15 4.248495 4.248495 1039 +misc 1 13 4.382027 4.382027 1124 +galleri 1 13 4.382027 4.382027 1192 +tune 1 12 4.465908 4.465908 1227 +catalog 1 10 4.653960 4.653960 1431 +swim 1 9 4.753590 4.753590 1599 +corba 1 5 5.347108 5.347108 2320 +ping 1 4 5.568345 5.568345 2922 +vrml 1 4 5.568345 5.568345 2949 +cube 1 4 5.568345 5.568345 2940 +luci 1 3 5.857933 5.857933 3705 +pong 1 3 5.857933 5.857933 3371 +underground 1 3 5.857933 5.857933 3604 +badminton 1 2 6.263398 6.263398 5221 +silvano 1 2 6.263398 6.263398 4868 +sunlab 1 2 6.263398 6.263398 5222 +caltech 1 2 6.263398 6.263398 5223 +whiz 1 1 6.957497 6.957497 11224 +systemscontact 1 1 6.957497 6.957497 11225 +yuwu 1 1 6.957497 6.957497 11226 +tkcgi 1 1 6.957497 6.957497 11227 +securitypc 1 1 6.957497 6.957497 11228 +lube 1 1 6.957497 6.957497 11229 +ipngip_atmcomput 1 1 6.957497 6.957497 11230 +sapient 1 1 6.957497 6.957497 11231 +jobtrack 1 1 6.957497 6.957497 11232 +artvark 1 1 6.957497 6.957497 11233 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..f63fb2ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +network 1 168 1.791759 1.791759 61 +click 1 142 1.945910 1.945910 78 +sinc 1 90 2.397895 2.397895 159 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +multimedia 1 68 2.708050 2.708050 258 +digit 1 52 2.995732 2.995732 348 +electron 1 47 3.091042 3.091042 379 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +layer 1 17 4.110874 4.110874 926 +massachusett 1 14 4.317488 4.317488 1118 +linda 1 10 4.653960 4.653960 1394 +equip 1 10 4.653960 4.653960 1459 +stack 1 10 4.653960 4.653960 1389 +corp 1 6 5.164786 5.164786 2139 +nativ 1 6 5.164786 5.164786 2192 +multicast 1 5 5.347108 5.347108 2305 +commerc 1 3 5.857933 5.857933 3209 +lowel 1 2 6.263398 6.263398 5224 +coursesfal 1 2 6.263398 6.263398 5225 +universitylinda 1 1 6.957497 6.957497 11234 +lxwu 1 1 6.957497 6.957497 11235 +univsers 1 1 6.957497 6.957497 11236 +banyan 1 1 6.957497 6.957497 11237 +mulitimedia 1 1 6.957497 6.957497 11238 +kramer 1 1 6.957497 6.957497 11239 +mart 1 1 6.957497 6.957497 11240 +photoesus 1 1 6.957497 6.957497 11241 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..f4b962ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +upson 1 71 2.639057 2.639057 218 +juli 1 60 2.833213 2.833213 305 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +halldepart 1 3 5.857933 5.857933 3641 +nikolai 1 2 6.263398 6.263398 4087 +mateevnikolai 1 1 6.957497 6.957497 11242 +mateevgradu 1 1 6.957497 6.957497 11243 +studentmateev 1 1 6.957497 6.957497 11244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..ebea6690 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +year 1 148 1.945910 1.945910 84 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +select 1 91 2.397895 2.397895 154 +academ 1 82 2.484907 2.484907 178 +come 1 78 2.564949 2.564949 202 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +laboratori 1 63 2.772589 2.772589 292 +summer 1 56 2.890372 2.890372 311 +soon 1 36 3.367296 3.367296 494 +scientist 1 31 3.496508 3.496508 560 +universityithaca 1 24 3.761200 3.761200 710 +fellow 1 24 3.761200 3.761200 701 +fund 1 21 3.912023 3.912023 805 +supervis 1 20 3.951244 3.951244 840 +stop 1 17 4.110874 4.110874 942 +heterogen 1 14 4.317488 4.317488 1090 +arpa 1 11 4.553877 4.553877 1369 +princip 1 10 4.653960 4.653960 1397 +xerox 1 8 4.875197 4.875197 1725 +matthew 1 6 5.164786 5.164786 2193 +chat 1 6 5.164786 5.164786 2128 +metadata 1 4 5.568345 5.568345 2945 +morgenstern 1 1 6.957497 6.957497 11245 +pagematthew 1 1 6.957497 6.957497 11246 +morgensternresearch 1 1 6.957497 6.957497 11247 +leaderaddress 1 1 6.957497 6.957497 11248 +centerxerox 1 1 6.957497 6.957497 11249 +institutecornel 1 1 6.957497 6.957497 11250 +edustatu 1 1 6.957497 6.957497 11251 +scienceproject 1 1 6.957497 6.957497 11252 +fundedresearch 1 1 6.957497 6.957497 11253 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..ff68bd41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +homepag 1 93 2.397895 2.397895 148 +dynam 1 76 2.564949 2.564949 194 +digit 1 52 2.995732 2.995732 348 +especi 1 36 3.367296 3.367296 496 +random 1 34 3.401197 3.401197 511 +committe 1 34 3.401197 3.401197 522 +graph 1 30 3.555348 3.555348 576 +bound 1 26 3.688879 3.688879 659 +universityithaca 1 24 3.761200 3.761200 710 +leav 1 21 3.912023 3.912023 772 +corpor 1 21 3.912023 3.912023 802 +lower 1 18 4.060443 4.060443 886 +eduphon 1 15 4.248495 4.248495 1060 +equip 1 10 4.653960 4.653960 1459 +soda 1 6 5.164786 5.164786 2189 +departmentcornel 1 5 5.347108 5.347108 2275 +stoc 1 5 5.347108 5.347108 2491 +henzing 1 3 5.857933 5.857933 3713 +professorcomput 1 3 5.857933 5.857933 3714 +monika 1 2 6.263398 6.263398 4141 +rauch 1 2 6.263398 6.263398 4142 +homepagemonika 1 1 6.957497 6.957497 11254 +henzingerassist 1 1 6.957497 6.957497 11255 +centerhomepageresearch 1 1 6.957497 6.957497 11256 +interestscombinatori 1 1 6.957497 6.957497 11257 +pageprogram 1 1 6.957497 6.957497 11258 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..111b8f1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +final 1 116 2.197225 2.197225 108 +user 1 104 2.302585 2.302585 137 +technic 1 100 2.302585 2.302585 140 +exam 1 86 2.484907 2.484907 169 +level 1 87 2.484907 2.484907 180 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +integr 1 67 2.708050 2.708050 245 +ithaca 1 65 2.772589 2.772589 294 +plan 1 65 2.772589 2.772589 272 +collect 1 65 2.772589 2.772589 268 +share 1 59 2.833213 2.833213 304 +movi 1 40 3.258097 3.258097 459 +field 1 37 3.332205 3.332205 482 +travel 1 30 3.555348 3.555348 579 +tenni 1 20 3.951244 3.951244 838 +bachelor 1 17 4.110874 4.110874 957 +horu 1 14 4.317488 4.317488 1116 +huang 1 12 4.465908 4.465908 1202 +reness 1 11 4.553877 4.553877 1333 +werner 1 10 4.653960 4.653960 1385 +ride 1 8 4.875197 4.875197 1741 +vogel 1 8 4.875197 4.875197 1622 +sheldon 1 2 6.263398 6.263398 5226 +stanlei 1 1 6.957497 6.957497 11259 +huangmast 1 1 6.957497 6.957497 11261 +studentmhuang 1 1 6.957497 6.957497 11262 +courtcornel 1 1 6.957497 6.957497 11263 +kentucki 1 1 6.957497 6.957497 11260 +systemsdistribut 1 1 6.957497 6.957497 11264 +systemsdatabas 1 1 6.957497 6.957497 11265 +retrievalgraph 1 1 6.957497 6.957497 11266 +interfacesoth 1 1 6.957497 6.957497 11267 +horse_back 1 1 6.957497 6.957497 11268 +myadvisor 1 1 6.957497 6.957497 11269 +robbertvan 1 1 6.957497 6.957497 11270 +planplan 1 1 6.957497 6.957497 11271 +distributionplan 1 1 6.957497 6.957497 11272 +updateplan 1 1 6.957497 6.957497 11273 +faqhorusc 1 1 6.957497 6.957497 11274 +memorydistribut 1 1 6.957497 6.957497 11275 +memorysom 1 1 6.957497 6.957497 11276 +communicationsnapshotu 1 1 6.957497 6.957497 11277 +architecturejobscar 1 1 6.957497 6.957497 11278 +pathbai 1 1 6.957497 6.957497 11279 +jobscyberezumescar 1 1 6.957497 6.957497 11280 +opportunitiesus 1 1 6.957497 6.957497 11281 +stufftechn 1 1 6.957497 6.957497 11282 +searchbel 1 1 6.957497 6.957497 11283 +labsspbsd 1 1 6.957497 6.957497 11284 +sourcesjavarfclast 1 1 6.957497 6.957497 11285 +mhuang 1 1 6.957497 6.957497 11286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..eb037dbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +comment 1 93 2.397895 2.397895 146 +second 1 81 2.484907 2.484907 166 +west 1 83 2.484907 2.484907 192 +know 1 80 2.564949 2.564949 198 +copi 1 63 2.772589 2.772589 284 +copyright 1 36 3.367296 3.367296 495 +express 1 32 3.465736 3.465736 540 +abl 1 30 3.555348 3.555348 566 +challeng 1 26 3.688879 3.688879 653 +never 1 25 3.737670 3.737670 671 +universityithaca 1 24 3.761200 3.761200 710 +decis 1 23 3.806662 3.806662 728 +sciencecornel 1 22 3.850148 3.850148 768 +protect 1 17 4.110874 4.110874 935 +differenti 1 17 4.110874 4.110874 921 +precis 1 15 4.248495 4.248495 1023 +script 1 13 4.382027 4.382027 1171 +speech 1 12 4.465908 4.465908 1222 +holidai 1 12 4.465908 4.465908 1224 +mass 1 8 4.875197 4.875197 1732 +parti 1 8 4.875197 4.875197 1676 +cat 1 6 5.164786 5.164786 2194 +rebecca 1 6 5.164786 5.164786 2174 +highest 1 4 5.568345 5.568345 2950 +cuc 1 4 5.568345 5.568345 2630 +government 1 2 6.263398 6.263398 4248 +aclu 1 2 6.263398 6.263398 5227 +reno 1 2 6.263398 6.263398 5228 +lynett 1 1 6.957497 6.957497 11288 +millett 1 1 6.957497 6.957497 11287 +millettdepart 1 1 6.957497 6.957497 11289 +participatoryform 1 1 6.957497 6.957497 11290 +internetdeserv 1 1 6.957497 6.957497 11291 +intrus 1 1 6.957497 6.957497 11292 +skit 1 1 6.957497 6.957497 11293 +femin 1 1 6.957497 6.957497 11294 +feminist 1 1 6.957497 6.957497 11295 +whenver 1 1 6.957497 6.957497 11296 +sentiment 1 1 6.957497 6.957497 11297 +doormat 1 1 6.957497 6.957497 11298 +prostitut 1 1 6.957497 6.957497 11299 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..deacbabd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +perform 1 143 1.945910 1.945910 74 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +involv 1 71 2.639057 2.639057 227 +servic 1 72 2.639057 2.639057 236 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +major 1 56 2.890372 2.890372 315 +reason 1 57 2.890372 2.890372 318 +extens 1 53 2.944439 2.944439 340 +cool 1 49 3.044522 3.044522 374 +video 1 44 3.135494 3.135494 405 +offer 1 43 3.178054 3.178054 414 +vision 1 41 3.218876 3.218876 430 +electr 1 38 3.295837 3.295837 461 +soon 1 36 3.367296 3.367296 494 +computersci 1 30 3.555348 3.555348 562 +hope 1 28 3.610918 3.610918 610 +weather 1 28 3.610918 3.610918 618 +latest 1 21 3.912023 3.912023 785 +sure 1 20 3.951244 3.951244 813 +ever 1 19 4.007333 4.007333 872 +practicum 1 16 4.174387 4.174387 960 +stock 1 16 4.174387 4.174387 1007 +menu 1 13 4.382027 4.382027 1156 +everyth 1 13 4.382027 4.382027 1169 +meng 1 12 4.465908 4.465908 1214 +earth 1 10 4.653960 4.653960 1463 +transmiss 1 9 4.753590 4.753590 1588 +andcomput 1 8 4.875197 4.875197 1623 +capac 1 8 4.875197 4.875197 1740 +film 1 8 4.875197 4.875197 1761 +temporari 1 6 5.164786 5.164786 2090 +wrong 1 6 5.164786 5.164786 2025 +conot 1 5 5.347108 5.347108 2245 +doubl 1 4 5.568345 5.568345 2951 +festiv 1 4 5.568345 5.568345 2952 +polytechn 1 3 5.857933 5.857933 3222 +educornel 1 3 5.857933 5.857933 3601 +coolest 1 2 6.263398 6.263398 5229 +newgroup 1 2 6.263398 6.263398 4191 +pagemi 1 2 6.263398 6.263398 5230 +nerd 1 2 6.263398 6.263398 5231 +mishaal 1 1 6.957497 6.957497 11301 +pagemisha 1 1 6.957497 6.957497 11302 +kuwaiti 1 1 6.957497 6.957497 11303 +mengc 1 1 6.957497 6.957497 11304 +worcest 1 1 6.957497 6.957497 11305 +inworcest 1 1 6.957497 6.957497 11306 +bearaccess 1 1 6.957497 6.957497 11307 +newgroupc 1 1 6.957497 6.957497 11300 +newgroupnba 1 1 6.957497 6.957497 11308 +newgroupoptim 1 1 6.957497 6.957497 11309 +kuwait 1 1 6.957497 6.957497 11310 +quotescool 1 1 6.957497 6.957497 11311 +cann 1 1 6.957497 6.957497 11312 +accus 1 1 6.957497 6.957497 11313 +almashanmisha 1 1 6.957497 6.957497 11314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..f60de7e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +number 1 130 2.079442 2.079442 97 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +stuff 1 87 2.484907 2.484907 171 +member 1 84 2.484907 2.484907 165 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +complet 1 77 2.564949 2.564949 208 +want 1 79 2.564949 2.564949 199 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +onlin 1 75 2.639057 2.639057 223 +line 1 75 2.639057 2.639057 231 +would 1 67 2.708050 2.708050 251 +result 1 65 2.772589 2.772589 281 +plai 1 60 2.833213 2.833213 307 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +much 1 52 2.995732 2.995732 349 +cool 1 49 3.044522 3.044522 374 +right 1 48 3.044522 3.044522 363 +visitor 1 49 3.044522 3.044522 371 +better 1 45 3.135494 3.135494 401 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +keep 1 44 3.135494 3.135494 409 +realli 1 40 3.258097 3.258097 444 +movi 1 40 3.258097 3.258097 459 +join 1 39 3.258097 3.258097 457 +connect 1 37 3.332205 3.332205 485 +hand 1 37 3.332205 3.332205 475 +statist 1 35 3.401197 3.401197 521 +random 1 34 3.401197 3.401197 511 +idea 1 32 3.465736 3.465736 545 +anim 1 31 3.496508 3.496508 557 +someth 1 31 3.496508 3.496508 554 +quit 1 27 3.637586 3.637586 633 +mike 1 24 3.761200 3.761200 703 +other 1 24 3.761200 3.761200 697 +togeth 1 23 3.806662 3.806662 714 +thank 1 23 3.806662 3.806662 721 +love 1 21 3.912023 3.912023 804 +mpeg 1 20 3.951244 3.951244 831 +andrew 1 19 4.007333 4.007333 849 +coupl 1 17 4.110874 4.110874 939 +stop 1 17 4.110874 4.110874 942 +expand 1 17 4.110874 4.110874 928 +whole 1 17 4.110874 4.110874 940 +sign 1 16 4.174387 4.174387 970 +anyth 1 16 4.174387 4.174387 998 +pretti 1 13 4.382027 4.382027 1191 +count 1 12 4.465908 4.465908 1239 +danc 1 12 4.465908 4.465908 1278 +guess 1 10 4.653960 4.653960 1443 +card 1 10 4.653960 4.653960 1435 +kevin 1 9 4.753590 4.753590 1482 +pick 1 9 4.753590 4.753590 1498 +opinion 1 8 4.875197 4.875197 1708 +attent 1 8 4.875197 4.875197 1651 +chanc 1 7 5.010635 5.010635 1960 +brought 1 7 5.010635 5.010635 1925 +bunch 1 7 5.010635 5.010635 1861 +yeah 1 6 5.164786 5.164786 2195 +golf 1 6 5.164786 5.164786 2178 +pagethi 1 5 5.347108 5.347108 2336 +frog 1 5 5.347108 5.347108 2479 +kid 1 5 5.347108 5.347108 2516 +exchang 1 5 5.347108 5.347108 2310 +sing 1 5 5.347108 5.347108 2499 +everybodi 1 5 5.347108 5.347108 2517 +dark 1 4 5.568345 5.568345 2910 +vote 1 4 5.568345 5.568345 2953 +maria 1 4 5.568345 5.568345 2954 +amaz 1 4 5.568345 5.568345 2600 +stockholm 1 3 5.857933 5.857933 3715 +ryan 1 3 5.857933 5.857933 3679 +nicknam 1 3 5.857933 5.857933 3716 +lame 1 3 5.857933 5.857933 3717 +beavi 1 2 6.263398 6.263398 4995 +suck 1 2 6.263398 6.263398 5232 +donnel 1 2 6.263398 6.263398 5233 +spirit 1 2 6.263398 6.263398 5234 +harmoni 1 2 6.263398 6.263398 5235 +reset 1 2 6.263398 6.263398 5236 +myguestbook 1 1 6.957497 6.957497 11316 +poll 1 1 6.957497 6.957497 11317 +vitya 1 1 6.957497 6.957497 11318 +korbi 1 1 6.957497 6.957497 11315 +corbett 1 1 6.957497 6.957497 11319 +eryn 1 1 6.957497 6.957497 11320 +crave 1 1 6.957497 6.957497 11321 +guttermouth 1 1 6.957497 6.957497 11322 +byjust 1 1 6.957497 6.957497 11323 +peic 1 1 6.957497 6.957497 11324 +accuar 1 1 6.957497 6.957497 11325 +atmak 1 1 6.957497 6.957497 11326 +edubas 1 1 6.957497 6.957497 11327 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..58b39aa4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +fall 1 181 1.609438 1.609438 40 +construct 1 139 1.945910 1.945910 82 +spring 1 131 2.079442 2.079442 88 +graphic 1 90 2.397895 2.397895 147 +multimedia 1 68 2.708050 2.708050 258 +semest 1 58 2.890372 2.890372 312 +effect 1 46 3.091042 3.091042 385 +made 1 44 3.135494 3.135494 398 +anim 1 31 3.496508 3.496508 557 +titl 1 31 3.496508 3.496508 556 +jpeg 1 6 5.164786 5.164786 2053 +nobuhiko 1 1 6.957497 6.957497 11328 +mukainobuhiko 1 1 6.957497 6.957497 11330 +mukai 1 1 6.957497 6.957497 11329 +compressionon 1 1 6.957497 6.957497 11331 +magicon 1 1 6.957497 6.957497 11332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..0e3e293d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +person 1 111 2.197225 2.197225 117 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +info 1 85 2.484907 2.484907 176 +resum 1 79 2.564949 2.564949 217 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +eduoffic 1 33 3.433987 3.433987 531 +photo 1 31 3.496508 3.496508 561 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +studentdepart 1 5 5.347108 5.347108 2505 +nichola 1 3 5.857933 5.857933 3252 +how 1 3 5.857933 5.857933 3289 +nihow 1 1 6.957497 6.957497 11333 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..9be0c0d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +sinc 1 90 2.397895 2.397895 159 +upson 1 71 2.639057 2.639057 218 +java 1 70 2.708050 2.708050 248 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +vita 1 38 3.295837 3.295837 473 +niko 1 4 5.568345 5.568345 2637 +pitsiani 1 3 5.857933 5.857933 3175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..979034e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +phone 1 175 1.791759 1.791759 45 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +hall 1 146 1.945910 1.945910 65 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +mani 1 92 2.397895 2.397895 150 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +state 1 76 2.564949 2.564949 207 +method 1 80 2.564949 2.564949 213 +solv 1 73 2.639057 2.639057 234 +workshop 1 71 2.639057 2.639057 239 +appli 1 71 2.639057 2.639057 226 +nation 1 74 2.639057 2.639057 240 +symposium 1 72 2.639057 2.639057 238 +simul 1 66 2.708050 2.708050 255 +foundat 1 62 2.772589 2.772589 286 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +room 1 59 2.833213 2.833213 301 +special 1 56 2.890372 2.890372 320 +undergradu 1 54 2.944439 2.944439 338 +scientif 1 53 2.944439 2.944439 341 +numer 1 49 3.044522 3.044522 369 +adapt 1 46 3.091042 3.091042 387 +map 1 39 3.258097 3.258097 452 +field 1 37 3.332205 3.332205 482 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +load 1 28 3.610918 3.610918 601 +challeng 1 26 3.688879 3.688879 653 +altern 1 26 3.688879 3.688879 641 +task 1 25 3.737670 3.737670 678 +known 1 24 3.761200 3.761200 702 +scalabl 1 24 3.761200 3.761200 705 +equat 1 23 3.806662 3.806662 724 +geometri 1 22 3.850148 3.850148 752 +siam 1 21 3.912023 3.912023 800 +wang 1 21 3.912023 3.912023 790 +portabl 1 20 3.951244 3.951244 819 +binari 1 20 3.951244 3.951244 823 +toolkit 1 20 3.951244 3.951244 835 +runtim 1 19 4.007333 4.007333 858 +partial 1 18 4.060443 4.060443 900 +former 1 17 4.110874 4.110874 956 +differenti 1 17 4.110874 4.110874 921 +partit 1 16 4.174387 4.174387 984 +balanc 1 14 4.317488 4.317488 1112 +incomput 1 14 4.317488 4.317488 1096 +menu 1 13 4.382027 4.382027 1156 +touch 1 12 4.465908 4.465908 1288 +iter 1 12 4.465908 4.465908 1206 +characterist 1 12 4.465908 4.465908 1257 +multithread 1 11 4.553877 4.553877 1315 +rice 1 11 4.553877 4.553877 1336 +purdu 1 10 4.653960 4.653960 1466 +black 1 10 4.653960 4.653960 1418 +decomposit 1 10 4.653960 4.653960 1439 +rhode 1 9 4.753590 4.753590 1579 +sensit 1 8 4.875197 4.875197 1726 +vineet 1 8 4.875197 4.875197 1639 +watson 1 8 4.875197 4.875197 1691 +yang 1 8 4.875197 4.875197 1652 +univeristi 1 8 4.875197 4.875197 1754 +multicomput 1 7 5.010635 5.010635 1890 +solver 1 7 5.010635 5.010635 1911 +thompson 1 6 5.164786 5.164786 2049 +heurist 1 6 5.164786 5.164786 2125 +hole 1 5 5.347108 5.347108 2518 +grand 1 5 5.347108 5.347108 2425 +fluid 1 5 5.347108 5.347108 2440 +niko 1 4 5.568345 5.568345 2637 +bernoulli 1 4 5.568345 5.568345 2955 +kodukula 1 4 5.568345 5.568345 2640 +indupraka 1 4 5.568345 5.568345 2639 +pingali 1 4 5.568345 5.568345 2956 +contemporari 1 4 5.568345 5.568345 2719 +colorado 1 4 5.568345 5.568345 2938 +knight 1 4 5.568345 5.568345 2728 +richter 1 4 5.568345 5.568345 2957 +architecur 1 3 5.857933 5.857933 3448 +ahuja 1 3 5.857933 5.857933 3494 +ctctr 1 3 5.857933 5.857933 3625 +imac 1 3 5.857933 5.857933 3718 +brunswick 1 3 5.857933 5.857933 3567 +mimd 1 3 5.857933 5.857933 3361 +chrisochoid 1 2 6.263398 6.263398 5237 +facet 1 2 6.263398 6.263398 4687 +prema 1 2 6.263398 6.263398 5238 +grid 1 2 6.263398 6.263398 4228 +kale 1 2 6.263398 6.263398 4545 +key 1 2 6.263398 6.263398 5057 +aiaa 1 2 6.263398 6.263398 5239 +moscow 1 2 6.263398 6.263398 4884 +programmingenviron 1 2 6.263398 6.263398 5240 +and 1 2 6.263398 6.263398 5241 +nikosc 1 2 6.263398 6.263398 5242 +ellpack 1 1 6.957497 6.957497 11335 +florian 1 1 6.957497 6.957497 11339 +sukup 1 1 6.957497 6.957497 11336 +reza 1 1 6.957497 6.957497 11340 +behforooz 1 1 6.957497 6.957497 11341 +animesh 1 1 6.957497 6.957497 11342 +chatterje 1 1 6.957497 6.957497 11343 +rajani 1 1 6.957497 6.957497 11344 +vaidyanathan 1 1 6.957497 6.957497 11345 +bowyer 1 1 6.957497 6.957497 11346 +offifth 1 1 6.957497 6.957497 11347 +kohl 1 1 6.957497 6.957497 11348 +yellick 1 1 6.957497 6.957497 11349 +unstructur 1 1 6.957497 6.957497 11350 +housti 1 1 6.957497 6.957497 11334 +collid 1 1 6.957497 6.957497 11351 +haupt 1 1 6.957497 6.957497 11352 +scalableparallel 1 1 6.957497 6.957497 11353 +engineeringresearch 1 1 6.957497 6.957497 11354 +mississippi 1 1 6.957497 6.957497 11337 +parallelhardwar 1 1 6.957497 6.957497 11355 +differentialequ 1 1 6.957497 6.957497 11356 +vichnevetski 1 1 6.957497 6.957497 11357 +decompos 1 1 6.957497 6.957497 11358 +papachi 1 1 6.957497 6.957497 11338 +kortesi 1 1 6.957497 6.957497 11359 +domaindecomposit 1 1 6.957497 6.957497 11360 +ussr 1 1 6.957497 6.957497 11361 +glowinski 1 1 6.957497 6.957497 11362 +karathanas 1 1 6.957497 6.957497 11363 +samartzi 1 1 6.957497 6.957497 11364 +vavali 1 1 6.957497 6.957497 11365 +weerawarana 1 1 6.957497 6.957497 11366 +onsupercomput 1 1 6.957497 6.957497 11367 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..7196e0be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +center 1 88 2.397895 2.397895 158 +environ 1 84 2.484907 2.484907 177 +institut 1 84 2.484907 2.484907 187 +build 1 85 2.484907 2.484907 184 +journal 1 83 2.484907 2.484907 183 +dynam 1 76 2.564949 2.564949 194 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +copi 1 63 2.772589 2.772589 284 +automat 1 61 2.833213 2.833213 306 +share 1 59 2.833213 2.833213 304 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +scientif 1 53 2.944439 2.944439 341 +numer 1 49 3.044522 3.044522 369 +adapt 1 46 3.091042 3.091042 387 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +multi 1 36 3.367296 3.367296 493 +copyright 1 36 3.367296 3.367296 495 +global 1 34 3.401197 3.401197 520 +load 1 28 3.610918 3.610918 601 +task 1 25 3.737670 3.737670 678 +thread 1 23 3.806662 3.806662 722 +varieti 1 22 3.850148 3.850148 740 +portabl 1 20 3.951244 3.951244 819 +runtim 1 19 4.007333 4.007333 858 +style 1 15 4.248495 4.248495 1036 +balanc 1 14 4.317488 4.317488 1112 +target 1 12 4.465908 4.465908 1282 +multithread 1 11 4.553877 4.553877 1315 +consortium 1 10 4.653960 4.653960 1467 +port 1 8 4.875197 4.875197 1766 +multicomput 1 7 5.010635 5.010635 1890 +niko 1 4 5.568345 5.568345 2637 +ctctr 1 3 5.857933 5.857933 3625 +prema 1 2 6.263398 6.263398 5238 +suppot 1 2 6.263398 6.263398 5243 +chrisochoid 1 2 6.263398 6.263398 5237 +nikosc 1 2 6.263398 6.263398 5242 +andproblem 1 1 6.957497 6.957497 11368 +computingappl 1 1 6.957497 6.957497 11369 +pdecomput 1 1 6.957497 6.957497 11370 +pcrc 1 1 6.957497 6.957497 11371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..b5c76121 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +welcom 1 122 2.079442 2.079442 99 +world 1 115 2.197225 2.197225 126 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +real 1 93 2.397895 2.397895 144 +learn 1 86 2.484907 2.484907 170 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +written 1 63 2.772589 2.772589 278 +locat 1 59 2.833213 2.833213 303 +game 1 36 3.367296 3.367296 498 +photo 1 31 3.496508 3.496508 561 +galleri 1 13 4.382027 4.382027 1192 +rest 1 12 4.465908 4.465908 1259 +invit 1 10 4.653960 4.653960 1428 +nuprl 1 10 4.653960 4.653960 1402 +sundai 1 10 4.653960 4.653960 1387 +tire 1 4 5.568345 5.568345 2799 +cyberspac 1 3 5.857933 5.857933 3719 +pavel 1 2 6.263398 6.263398 4164 +cinema 1 2 6.263398 6.263398 5244 +naumov 1 1 6.957497 6.957497 11372 +orplai 1 1 6.957497 6.957497 11373 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..ad6875c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,195 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +area 1 144 1.945910 1.945910 80 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +need 1 98 2.302585 2.302585 135 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +select 1 91 2.397895 2.397895 154 +requir 1 81 2.484907 2.484907 167 +wide 1 84 2.484907 2.484907 185 +thing 1 84 2.484907 2.484907 189 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +david 1 71 2.639057 2.639057 232 +materi 1 75 2.639057 2.639057 221 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +upson 1 71 2.639057 2.639057 218 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +simul 1 66 2.708050 2.708050 255 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +publish 1 57 2.890372 2.890372 326 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +set 1 50 3.044522 3.044522 361 +physic 1 47 3.091042 3.091042 377 +could 1 46 3.091042 3.091042 383 +video 1 44 3.135494 3.135494 405 +long 1 43 3.178054 3.178054 413 +offer 1 43 3.178054 3.178054 414 +vision 1 41 3.218876 3.218876 430 +futur 1 41 3.218876 3.218876 427 +fast 1 42 3.218876 3.218876 429 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +york 1 41 3.218876 3.218876 435 +must 1 40 3.258097 3.258097 442 +theoret 1 39 3.258097 3.258097 446 +realli 1 40 3.258097 3.258097 444 +societi 1 40 3.258097 3.258097 456 +transact 1 39 3.258097 3.258097 438 +connect 1 37 3.332205 3.332205 485 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +singl 1 34 3.401197 3.401197 510 +tech 1 35 3.401197 3.401197 515 +global 1 34 3.401197 3.401197 520 +taken 1 31 3.496508 3.496508 555 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +becom 1 28 3.610918 3.610918 603 +multiprocessor 1 28 3.610918 3.610918 605 +campu 1 27 3.637586 3.637586 623 +challeng 1 26 3.688879 3.688879 653 +notic 1 25 3.737670 3.737670 675 +scalabl 1 24 3.761200 3.761200 705 +universityithaca 1 24 3.761200 3.761200 710 +highli 1 23 3.806662 3.806662 725 +sequenti 1 22 3.850148 3.850148 745 +chip 1 21 3.912023 3.912023 770 +vlsi 1 21 3.912023 3.912023 795 +prepar 1 20 3.951244 3.951244 824 +exploit 1 20 3.951244 3.951244 836 +alloc 1 20 3.951244 3.951244 821 +region 1 19 4.007333 4.007333 875 +dimension 1 18 4.060443 4.060443 909 +speed 1 18 4.060443 4.060443 911 +element 1 18 4.060443 4.060443 895 +estim 1 17 4.110874 4.110874 930 +regist 1 17 4.110874 4.110874 938 +sept 1 17 4.110874 4.110874 952 +spatial 1 16 4.174387 4.174387 988 +reflect 1 15 4.248495 4.248495 1034 +near 1 14 4.317488 4.317488 1091 +polynomi 1 14 4.317488 4.317488 1069 +believ 1 13 4.382027 4.382027 1187 +johnson 1 13 4.382027 4.382027 1162 +sigplan 1 13 4.382027 4.382027 1190 +mesh 1 11 4.553877 4.553877 1351 +desktop 1 10 4.653960 4.653960 1445 +placement 1 10 4.653960 4.653960 1420 +cryptographi 1 9 4.753590 4.753590 1512 +perhap 1 8 4.875197 4.875197 1693 +realiz 1 8 4.875197 4.875197 1739 +attent 1 8 4.875197 4.875197 1651 +character 1 8 4.875197 4.875197 1767 +entri 1 8 4.875197 4.875197 1678 +pldi 1 8 4.875197 4.875197 1704 +irregular 1 8 4.875197 4.875197 1768 +hallcornel 1 8 4.875197 4.875197 1757 +henc 1 7 5.010635 5.010635 1805 +pursu 1 7 5.010635 5.010635 1902 +sensor 1 7 5.010635 5.010635 1920 +maxim 1 7 5.010635 5.010635 1944 +hidden 1 6 5.164786 5.164786 1987 +feasibl 1 6 5.164786 5.164786 2157 +cellular 1 5 5.347108 5.347108 2433 +grand 1 5 5.347108 5.347108 2425 +sold 1 4 5.568345 5.568345 2813 +compcon 1 4 5.568345 5.568345 2958 +pingali 1 4 5.568345 5.568345 2956 +zippel 1 4 5.568345 5.568345 2879 +lattic 1 3 5.857933 5.857933 3721 +neumann 1 3 5.857933 5.857933 3720 +parallelmachin 1 3 5.857933 5.857933 3693 +arm 1 3 5.857933 5.857933 3697 +exit 1 3 5.857933 5.857933 3124 +simd 1 3 5.857933 5.857933 3360 +usaemail 1 3 5.857933 5.857933 3722 +pearson 1 2 6.263398 6.263398 5245 +crystal 1 2 6.263398 6.263398 5013 +molecul 1 2 6.263398 6.263398 5246 +succe 1 2 6.263398 6.263398 5214 +consistingof 1 1 6.957497 6.957497 11376 +theubiquit 1 1 6.957497 6.957497 11377 +heed 1 1 6.957497 6.957497 11378 +lawsof 1 1 6.957497 6.957497 11379 +layoutand 1 1 6.957497 6.957497 11380 +accomplishedbi 1 1 6.957497 6.957497 11381 +ihav 1 1 6.957497 6.957497 11382 +couldb 1 1 6.957497 6.957497 11383 +thisarchitectur 1 1 6.957497 6.957497 11384 +designfor 1 1 6.957497 6.957497 11385 +proteinstructur 1 1 6.957497 6.957497 11386 +parallelcomput 1 1 6.957497 6.957497 11387 +commodityand 1 1 6.957497 6.957497 11388 +architectureand 1 1 6.957497 6.957497 11389 +hideth 1 1 6.957497 6.957497 11390 +underlyingvon 1 1 6.957497 6.957497 11391 +architectureha 1 1 6.957497 6.957497 11392 +easyto 1 1 6.957497 6.957497 11393 +dunten 1 1 6.957497 6.957497 11394 +kiewit 1 1 6.957497 6.957497 11395 +pillai 1 1 6.957497 6.957497 11396 +vazirani 1 1 6.957497 6.957497 11374 +bipartit 1 1 6.957497 6.957497 11375 +irregularli 1 1 6.957497 6.957497 11397 +allerton 1 1 6.957497 6.957497 11398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..6a96413f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +contact 1 153 1.791759 1.791759 59 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +intern 1 108 2.197225 2.197225 128 +final 1 116 2.197225 2.197225 108 +part 1 98 2.302585 2.302585 129 +pictur 1 89 2.397895 2.397895 160 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +mani 1 92 2.397895 2.397895 150 +school 1 84 2.484907 2.484907 188 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +resum 1 79 2.564949 2.564949 217 +master 1 76 2.564949 2.564949 216 +complet 1 77 2.564949 2.564949 208 +server 1 76 2.564949 2.564949 204 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +degre 1 69 2.708050 2.708050 259 +receiv 1 66 2.708050 2.708050 244 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +practic 1 70 2.708050 2.708050 246 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +content 1 59 2.833213 2.833213 302 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +februari 1 54 2.944439 2.944439 328 +run 1 51 2.995732 2.995732 347 +case 1 51 2.995732 2.995732 351 +visitor 1 49 3.044522 3.044522 371 +still 1 50 3.044522 3.044522 362 +possibl 1 47 3.091042 3.091042 378 +favorit 1 44 3.135494 3.135494 410 +offer 1 43 3.178054 3.178054 414 +compani 1 41 3.218876 3.218876 423 +soon 1 36 3.367296 3.367296 494 +chapter 1 32 3.465736 3.465736 536 +taken 1 31 3.496508 3.496508 555 +abl 1 30 3.555348 3.555348 566 +quot 1 29 3.583519 3.583519 582 +becom 1 28 3.610918 3.610918 603 +hope 1 28 3.610918 3.610918 610 +administr 1 27 3.637586 3.637586 628 +request 1 26 3.688879 3.688879 635 +enabl 1 26 3.688879 3.688879 655 +enhanc 1 26 3.688879 3.688879 644 +client 1 25 3.737670 3.737670 679 +reach 1 24 3.761200 3.761200 688 +alwai 1 24 3.761200 3.761200 691 +cooper 1 22 3.850148 3.850148 757 +busi 1 21 3.912023 3.912023 784 +mpeg 1 20 3.951244 3.951244 831 +applet 1 20 3.951244 3.951244 827 +wonder 1 20 3.951244 3.951244 815 +log 1 19 4.007333 4.007333 857 +stock 1 16 4.174387 4.174387 1007 +todd 1 15 4.248495 4.248495 1051 +joint 1 13 4.382027 4.382027 1130 +johnson 1 13 4.382027 4.382027 1162 +brother 1 13 4.382027 4.382027 1189 +meng 1 12 4.465908 4.465908 1214 +round 1 8 4.875197 4.875197 1769 +presid 1 6 5.164786 5.164786 2196 +quickli 1 6 5.164786 5.164786 2000 +classroom 1 6 5.164786 5.164786 2006 +microsystem 1 6 5.164786 5.164786 2160 +junior 1 5 5.347108 5.347108 2519 +supplement 1 5 5.347108 5.347108 2355 +suppli 1 4 5.568345 5.568345 2611 +tape 1 4 5.568345 5.568345 2959 +permiss 1 4 5.568345 5.568345 2642 +jointli 1 3 5.857933 5.857933 3118 +eduand 1 3 5.857933 5.857933 3452 +roll 1 3 5.857933 5.857933 3723 +espn 1 3 5.857933 5.857933 3724 +borrow 1 3 5.857933 5.857933 3725 +fratern 1 2 6.263398 6.263398 4979 +throughth 1 2 6.263398 6.263398 4065 +ticker 1 2 6.263398 6.263398 5247 +peskin 1 1 6.957497 6.957497 11399 +acacia 1 1 6.957497 6.957497 11400 +andyour 1 1 6.957497 6.957497 11401 +workeda 1 1 6.957497 6.957497 11402 +cornellundergradu 1 1 6.957497 6.957497 11403 +theirfield 1 1 6.957497 6.957497 11404 +isrun 1 1 6.957497 6.957497 11405 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..c3b8af86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +cornel 1 215 1.386294 1.386294 23 +list 1 201 1.609438 1.609438 39 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +mani 1 92 2.397895 2.397895 150 +stuff 1 87 2.484907 2.484907 171 +second 1 81 2.484907 2.484907 166 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +david 1 71 2.639057 2.639057 232 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +without 1 50 3.044522 3.044522 370 +favorit 1 44 3.135494 3.135494 410 +howev 1 41 3.218876 3.218876 422 +approxim 1 35 3.401197 3.401197 509 +go 1 33 3.433987 3.433987 529 +quot 1 29 3.583519 3.583519 582 +great 1 27 3.637586 3.637586 626 +although 1 25 3.737670 3.737670 667 +citi 1 19 4.007333 4.007333 874 +otherwis 1 17 4.110874 4.110874 922 +alreadi 1 16 4.174387 4.174387 963 +month 1 15 4.248495 4.248495 1025 +dave 1 14 4.317488 4.317488 1098 +philadelphia 1 12 4.465908 4.465908 1244 +resid 1 10 4.653960 4.653960 1461 +shop 1 10 4.653960 4.653960 1469 +imposs 1 9 4.753590 4.753590 1513 +pittsburgh 1 7 5.010635 5.010635 1938 +pennsylvania 1 7 5.010635 5.010635 1932 +famou 1 6 5.164786 5.164786 2185 +pierc 1 4 5.568345 5.568345 2623 +outlet 1 2 6.263398 6.263398 5248 +valentin 1 1 6.957497 6.957497 11406 +familycurr 1 1 6.957497 6.957497 11407 +halfwai 1 1 6.957497 6.957497 11408 +andharrisburg 1 1 6.957497 6.957497 11409 +younev 1 1 6.957497 6.957497 11410 +sinceit 1 1 6.957497 6.957497 11411 +throughpittsburgh 1 1 6.957497 6.957497 11412 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..1862d14a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +octob 1 89 2.397895 2.397895 156 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +level 1 87 2.484907 2.484907 180 +info 1 85 2.484907 2.484907 176 +april 1 77 2.564949 2.564949 196 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +prof 1 64 2.772589 2.772589 273 +summer 1 56 2.890372 2.890372 311 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +extens 1 53 2.944439 2.944439 340 +maintain 1 51 2.995732 2.995732 342 +paul 1 38 3.295837 3.295837 471 +seminar 1 38 3.295837 3.295837 470 +multi 1 36 3.367296 3.367296 493 +random 1 34 3.401197 3.401197 511 +taught 1 33 3.433987 3.433987 526 +given 1 32 3.465736 3.465736 538 +transform 1 32 3.465736 3.465736 542 +abl 1 30 3.555348 3.555348 566 +framework 1 28 3.610918 3.610918 606 +packag 1 28 3.610918 3.610918 614 +seri 1 24 3.761200 3.761200 708 +handl 1 24 3.761200 3.761200 685 +lab 1 24 3.761200 3.761200 698 +deal 1 22 3.850148 3.850148 736 +instal 1 22 3.850148 3.850148 754 +runtim 1 19 4.007333 4.007333 858 +affili 1 13 4.382027 4.382027 1194 +deriv 1 13 4.382027 4.382027 1145 +block 1 13 4.382027 4.382027 1183 +vladimir 1 11 4.553877 4.553877 1324 +loop 1 11 4.553877 4.553877 1310 +regard 1 11 4.553877 4.553877 1309 +prior 1 10 4.653960 4.653960 1438 +tradit 1 10 4.653960 4.653960 1404 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +madra 1 8 4.875197 4.875197 1770 +watson 1 8 4.875197 4.875197 1691 +keshav 1 7 5.010635 5.010635 1852 +tip 1 7 5.010635 5.010635 1863 +dens 1 6 5.164786 5.164786 2122 +nest 1 6 5.164786 5.164786 2151 +handi 1 6 5.164786 5.164786 2111 +czar 1 5 5.347108 5.347108 2503 +licens 1 5 5.347108 5.347108 2520 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +bernoulli 1 4 5.568345 5.568345 2955 +pingali 1 4 5.568345 5.568345 2956 +vijai 1 4 5.568345 5.568345 2960 +stodghil 1 4 5.568345 5.568345 2864 +trivial 1 4 5.568345 5.568345 2786 +dagstuhl 1 4 5.568345 5.568345 2871 +interplai 1 3 5.857933 5.857933 3726 +vliw 1 3 5.857933 5.857933 3514 +chelmsford 1 3 5.857933 5.857933 3564 +schloss 1 3 5.857933 5.857933 3727 +useof 1 3 5.857933 5.857933 3368 +andoper 1 3 5.857933 5.857933 3621 +praka 1 2 6.263398 6.263398 4155 +nawaaz 1 2 6.263398 6.263398 4153 +ahm 1 2 6.263398 6.263398 4154 +kotlyar 1 2 6.263398 6.263398 4907 +menon 1 2 6.263398 6.263398 5249 +tothat 1 1 6.957497 6.957497 11414 +andmultiprocessor 1 1 6.957497 6.957497 11415 +fromscientif 1 1 6.957497 6.957497 11416 +withibm 1 1 6.957497 6.957497 11417 +hasinterest 1 1 6.957497 6.957497 11418 +athp 1 1 6.957497 6.957497 11419 +wasabout 1 1 6.957497 6.957497 11420 +necess 1 1 6.957497 6.957497 11421 +imperfectli 1 1 6.957497 6.957497 11413 +looptransform 1 1 6.957497 6.957497 11422 +loopparallel 1 1 6.957497 6.957497 11423 +regardingdata 1 1 6.957497 6.957497 11424 +centric 1 1 6.957497 6.957497 11425 +availableund 1 1 6.957497 6.957497 11426 +departmentmachin 1 1 6.957497 6.957497 11427 +andfind 1 1 6.957497 6.957497 11428 +alsofind 1 1 6.957497 6.957497 11429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..94ce3b98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +know 1 80 2.564949 2.564949 198 +upson 1 71 2.639057 2.639057 218 +order 1 69 2.708050 2.708050 249 +ithaca 1 65 2.772589 2.772589 294 +abstract 1 62 2.772589 2.772589 276 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +case 1 51 2.995732 2.995732 351 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +tree 1 36 3.367296 3.367296 492 +enhanc 1 26 3.688879 3.688879 644 +sequenc 1 23 3.806662 3.806662 734 +sigmod 1 19 4.007333 4.007333 877 +save 1 14 4.317488 4.317488 1099 +dbm 1 13 4.382027 4.382027 1136 +submiss 1 11 4.553877 4.553877 1298 +road 1 11 4.553877 4.553877 1374 +seshadri 1 7 5.010635 5.010635 1803 +praveen 1 6 5.164786 5.164786 1996 +green 1 4 5.568345 5.568345 2848 +predat 1 3 5.857933 5.857933 3135 +warren 1 3 5.857933 5.857933 3301 +packer 1 3 5.857933 5.857933 3728 +adt 1 1 6.957497 6.957497 11430 +ranjani 1 1 6.957497 6.957497 11431 +ramamurthi 1 1 6.957497 6.957497 11432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..e1aaf237 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +dayton 1 119 2.079442 2.079442 104 +manag 1 114 2.197225 2.197225 125 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +proceed 1 93 2.397895 2.397895 152 +graphic 1 90 2.397895 2.397895 147 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +optim 1 79 2.564949 2.564949 197 +issu 1 78 2.564949 2.564949 211 +exampl 1 77 2.564949 2.564949 195 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +decemb 1 80 2.564949 2.564949 215 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +view 1 70 2.708050 2.708050 254 +practic 1 70 2.708050 2.708050 246 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +abstract 1 62 2.772589 2.772589 276 +previou 1 62 2.772589 2.772589 290 +complex 1 64 2.772589 2.772589 269 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 1 51 2.995732 2.995732 351 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +natur 1 44 3.135494 3.135494 406 +algebra 1 45 3.135494 3.135494 394 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +theoret 1 39 3.258097 3.258097 446 +form 1 39 3.258097 3.258097 443 +join 1 39 3.258097 3.258097 457 +littl 1 39 3.258097 3.258097 454 +map 1 39 3.258097 3.258097 452 +probabl 1 40 3.258097 3.258097 455 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +mean 1 37 3.332205 3.332205 477 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +next 1 34 3.401197 3.401197 517 +singl 1 34 3.401197 3.401197 510 +either 1 35 3.401197 3.401197 506 +michael 1 35 3.401197 3.401197 514 +queri 1 33 3.433987 3.433987 524 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +storag 1 31 3.496508 3.496508 553 +scientist 1 31 3.496508 3.496508 560 +posit 1 31 3.496508 3.496508 552 +domain 1 30 3.555348 3.555348 564 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +consid 1 29 3.583519 3.583519 590 +built 1 29 3.583519 3.583519 592 +propos 1 28 3.610918 3.610918 602 +weather 1 28 3.610918 3.610918 618 +ask 1 28 3.610918 3.610918 597 +scale 1 28 3.610918 3.610918 613 +except 1 28 3.610918 3.610918 607 +framework 1 28 3.610918 3.610918 606 +valu 1 25 3.737670 3.737670 665 +wai 1 25 3.737670 3.737670 662 +strategi 1 25 3.737670 3.737670 682 +client 1 25 3.737670 3.737670 679 +demonstr 1 24 3.761200 3.761200 694 +store 1 24 3.761200 3.761200 693 +daili 1 24 3.761200 3.761200 706 +sequenc 1 23 3.806662 3.806662 734 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +serv 1 22 3.850148 3.850148 758 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +disk 1 22 3.850148 3.850148 747 +sequenti 1 22 3.850148 3.850148 745 +sort 1 22 3.850148 3.850148 738 +deal 1 22 3.850148 3.850148 736 +instead 1 22 3.850148 3.850148 756 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +sigmod 1 19 4.007333 4.007333 877 +statu 1 18 4.060443 4.060443 885 +record 1 18 4.060443 4.060443 890 +event 1 18 4.060443 4.060443 896 +account 1 18 4.060443 4.060443 882 +medic 1 17 4.110874 4.110874 958 +monitor 1 17 4.110874 4.110874 941 +expand 1 17 4.110874 4.110874 928 +weekli 1 17 4.110874 4.110874 919 +estim 1 17 4.110874 4.110874 930 +advantag 1 16 4.174387 4.174387 987 +easi 1 16 4.174387 4.174387 969 +ramakrishnan 1 16 4.174387 4.174387 972 +indic 1 15 4.248495 4.248495 1013 +stream 1 15 4.248495 4.248495 1015 +livni 1 15 4.248495 4.248495 1053 +manner 1 14 4.317488 4.317488 1074 +embed 1 14 4.317488 4.317488 1102 +convent 1 14 4.317488 4.317488 1072 +miron 1 14 4.317488 4.317488 1110 +econom 1 13 4.382027 4.382027 1184 +social 1 13 4.382027 4.382027 1123 +opportun 1 13 4.382027 4.382027 1161 +composit 1 13 4.382027 4.382027 1150 +step 1 13 4.382027 4.382027 1138 +front 1 13 4.382027 4.382027 1154 +amount 1 12 4.465908 4.465908 1208 +uniqu 1 12 4.465908 4.465908 1228 +scan 1 12 4.465908 4.465908 1243 +buffer 1 12 4.465908 4.465908 1211 +insid 1 12 4.465908 4.465908 1262 +raghu 1 12 4.465908 4.465908 1212 +regard 1 11 4.553877 4.553877 1309 +shore 1 11 4.553877 4.553877 1377 +motiv 1 11 4.553877 4.553877 1346 +instanc 1 11 4.553877 4.553877 1322 +relationship 1 10 4.653960 4.653960 1383 +devis 1 10 4.653960 4.653960 1451 +reli 1 10 4.653960 4.653960 1411 +subset 1 10 4.653960 4.653960 1425 +vldb 1 10 4.653960 4.653960 1470 +cheng 1 10 4.653960 4.653960 1381 +declar 1 9 4.753590 4.753590 1526 +tempor 1 9 4.753590 4.753590 1584 +compos 1 9 4.753590 4.753590 1527 +vice 1 9 4.753590 4.753590 1604 +strength 1 9 4.753590 4.753590 1494 +lock 1 9 4.753590 4.753590 1551 +respect 1 9 4.753590 4.753590 1545 +meta 1 9 4.753590 4.753590 1505 +intermedi 1 9 4.753590 4.753590 1497 +observ 1 9 4.753590 4.753590 1578 +mode 1 9 4.753590 4.753590 1492 +pose 1 9 4.753590 4.753590 1535 +pageth 1 7 5.010635 5.010635 1939 +notion 1 7 5.010635 5.010635 1947 +merg 1 7 5.010635 5.010635 1862 +whenev 1 7 5.010635 5.010635 1883 +therefor 1 7 5.010635 5.010635 1822 +supportfor 1 7 5.010635 5.010635 1854 +seshadri 1 7 5.010635 5.010635 1803 +financi 1 6 5.164786 5.164786 2197 +histor 1 6 5.164786 5.164786 2085 +consequ 1 6 5.164786 5.164786 1989 +feasibl 1 6 5.164786 5.164786 2157 +nest 1 6 5.164786 5.164786 2151 +temporari 1 6 5.164786 5.164786 2090 +praveen 1 6 5.164786 5.164786 1996 +treat 1 5 5.347108 5.347108 2521 +greater 1 5 5.347108 5.347108 2258 +correl 1 5 5.347108 5.347108 2279 +dual 1 5 5.347108 5.347108 2522 +distinct 1 5 5.347108 5.347108 2319 +overlap 1 5 5.347108 5.347108 2368 +complementari 1 5 5.347108 5.347108 2523 +educomput 1 5 5.347108 5.347108 2524 +phenomena 1 4 5.568345 5.568345 2962 +richter 1 4 5.568345 5.568345 2957 +flavor 1 4 5.568345 5.568345 2625 +zoom 1 4 5.568345 5.568345 2961 +inadequ 1 3 5.857933 5.857933 3730 +tediou 1 3 5.857933 5.857933 3731 +ineffici 1 3 5.857933 5.857933 3457 +megabyt 1 3 5.857933 5.857933 3732 +claus 1 3 5.857933 5.857933 3733 +offset 1 3 5.857933 5.857933 3467 +collaps 1 3 5.857933 5.857933 3729 +hourli 1 3 5.857933 5.857933 3734 +thathav 1 3 5.857933 5.857933 3735 +serverarchitectur 1 3 5.857933 5.857933 3736 +comad 1 3 5.857933 5.857933 3737 +informationfor 1 3 5.857933 5.857933 3738 +objectivescurr 1 2 6.263398 6.263398 5255 +statusmotiv 1 2 6.263398 6.263398 5256 +exampleseq 1 2 6.263398 6.263398 5257 +sequin 1 2 6.263398 6.263398 5250 +languageoptim 1 2 6.263398 6.263398 5258 +techniquesseq 1 2 6.263398 6.263398 5259 +developmentpublicationsrel 1 2 6.263398 6.263398 5260 +workcontact 1 2 6.263398 6.263398 5261 +informationproject 1 2 6.263398 6.263398 5262 +processingof 1 2 6.263398 6.263398 5263 +theseappl 1 2 6.263398 6.263398 5264 +metereolog 1 2 6.263398 6.263398 5265 +andbiolog 1 2 6.263398 6.263398 5266 +semanticstak 1 2 6.263398 6.263398 5267 +evaluationintegr 1 2 6.263398 6.263398 5268 +canstor 1 2 6.263398 6.263398 5269 +sequencesthes 1 2 6.263398 6.263398 5270 +themost 1 2 6.263398 6.263398 5271 +statusth 1 2 6.263398 6.263398 5272 +algebraicqueri 1 2 6.263398 6.263398 5273 +analogousto 1 2 6.263398 6.263398 5274 +candeclar 1 2 6.263398 6.263398 5275 +likesql 1 2 6.263398 6.263398 5276 +versa 1 2 6.263398 6.263398 5277 +querya 1 2 6.263398 6.263398 5278 +meteorolog 1 2 6.263398 6.263398 5253 +occurr 1 2 6.263398 6.263398 5279 +volcano 1 2 6.263398 6.263398 5252 +erupt 1 2 6.263398 6.263398 5280 +didth 1 2 6.263398 6.263398 5281 +earthquak 1 2 6.263398 6.263398 5251 +groupbi 1 2 6.263398 6.263398 5282 +subqueri 1 2 6.263398 6.263398 5283 +aggregatefunct 1 2 6.263398 6.263398 5284 +sequencesord 1 2 6.263398 6.263398 5285 +modelth 1 2 6.263398 6.263398 5286 +aredescrib 1 2 6.263398 6.263398 5254 +gist 1 2 6.263398 6.263398 5287 +ordereddomain 1 2 6.263398 6.263398 5288 +andposit 1 2 6.263398 6.263398 5289 +recordsmap 1 2 6.263398 6.263398 5290 +rise 1 2 6.263398 6.263398 5291 +relationaloper 1 2 6.263398 6.263398 5292 +andaggreg 1 2 6.263398 6.263398 5293 +researchersin 1 2 6.263398 6.263398 5294 +movingaggreg 1 2 6.263398 6.263398 5295 +worldsitu 1 2 6.263398 6.263398 5296 +extensionof 1 2 6.263398 6.263398 5297 +ofseq 1 2 6.263398 6.263398 5298 +languagew 1 2 6.263398 6.263398 5299 +usingwhich 1 2 6.263398 6.263398 5300 +languagei 1 2 6.263398 6.263398 5301 +queriesa 1 2 6.263398 6.263398 5302 +techniquesw 1 2 6.263398 6.263398 5303 +developmentth 1 2 6.263398 6.263398 5304 +viaa 1 2 6.263398 6.263398 5305 +ontop 1 2 6.263398 6.263398 5306 +languageswhich 1 2 6.263398 6.263398 5307 +arbitrarylevel 1 2 6.263398 6.263398 5308 +viceversa 1 2 6.263398 6.263398 5309 +detailson 1 2 6.263398 6.263398 5310 +publicationssequ 1 2 6.263398 6.263398 5311 +datapraveen 1 2 6.263398 6.263398 5312 +systempraveen 1 2 6.263398 6.263398 5313 +queriesraghu 1 2 6.263398 6.263398 5314 +workthedevis 1 2 6.263398 6.263398 5315 +visualizationenviron 1 2 6.263398 6.263398 5316 +servercontact 1 2 6.263398 6.263398 5317 +eduraghu 1 2 6.263398 6.263398 5318 +edumiron 1 2 6.263398 6.263398 5319 +seshadripraveen 1 2 6.263398 6.263398 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..78225962 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +advanc 1 99 2.302585 2.302585 130 +member 1 84 2.484907 2.484907 165 +upson 1 71 2.639057 2.639057 218 +august 1 66 2.708050 2.708050 257 +knowledg 1 67 2.708050 2.708050 243 +reason 1 57 2.890372 2.890372 318 +taken 1 31 3.496508 3.496508 555 +fellow 1 24 3.761200 3.761200 701 +germani 1 17 4.110874 4.110874 946 +stori 1 14 4.317488 4.317488 1087 +german 1 6 5.164786 5.164786 2190 +exchang 1 5 5.347108 5.347108 2310 +fulbright 1 4 5.568345 5.568345 2963 +karlsruh 1 3 5.857933 5.857933 3689 +ralph 1 1 6.957497 6.957497 11433 +benzingerralph 1 1 6.957497 6.957497 11434 +benzingerw 1 1 6.957497 6.957497 11435 +sich 1 1 6.957497 6.957497 11436 +seinen 1 1 6.957497 6.957497 11437 +lorbeeren 1 1 6.957497 6.957497 11438 +ausruht 1 1 6.957497 6.957497 11439 +trgt 1 1 6.957497 6.957497 11440 +derfalschen 1 1 6.957497 6.957497 11441 +stell 1 1 6.957497 6.957497 11442 +studienstiftung 1 1 6.957497 6.957497 11443 +deutschen 1 1 6.957497 6.957497 11444 +volk 1 1 6.957497 6.957497 11445 +siemen 1 1 6.957497 6.957497 11446 +international 1 1 6.957497 6.957497 11447 +studentenkrei 1 1 6.957497 6.957497 11448 +alumnusat 1 1 6.957497 6.957497 11449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..b6846862 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,292 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +imag 1 91 2.397895 2.397895 161 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +school 1 84 2.484907 2.484907 188 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +exampl 1 77 2.564949 2.564949 195 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +complet 1 77 2.564949 2.564949 208 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +interfac 1 79 2.564949 2.564949 209 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +guid 1 63 2.772589 2.772589 267 +abstract 1 62 2.772589 2.772589 276 +written 1 63 2.772589 2.772589 278 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +talk 1 53 2.944439 2.944439 336 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +allow 1 53 2.944439 2.944439 333 +digit 1 52 2.995732 2.995732 348 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +format 1 48 3.044522 3.044522 356 +visual 1 48 3.044522 3.044522 372 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +effect 1 46 3.091042 3.091042 385 +move 1 47 3.091042 3.091042 382 +featur 1 46 3.091042 3.091042 386 +made 1 44 3.135494 3.135494 398 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +continu 1 39 3.258097 3.258097 448 +probabl 1 40 3.258097 3.258097 455 +error 1 40 3.258097 3.258097 449 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +short 1 36 3.367296 3.367296 499 +tree 1 36 3.367296 3.367296 492 +soon 1 36 3.367296 3.367296 494 +approxim 1 35 3.401197 3.401197 509 +print 1 34 3.401197 3.401197 503 +singl 1 34 3.401197 3.401197 510 +express 1 32 3.465736 3.465736 540 +independ 1 32 3.465736 3.465736 548 +concept 1 32 3.465736 3.465736 537 +human 1 32 3.465736 3.465736 546 +titl 1 31 3.496508 3.496508 556 +taken 1 31 3.496508 3.496508 555 +produc 1 30 3.555348 3.555348 572 +compon 1 30 3.555348 3.555348 570 +power 1 30 3.555348 3.555348 573 +hard 1 30 3.555348 3.555348 563 +limit 1 29 3.583519 3.583519 585 +progress 1 28 3.610918 3.610918 598 +load 1 28 3.610918 3.610918 601 +full 1 28 3.610918 3.610918 615 +symbol 1 27 3.637586 3.637586 620 +determin 1 27 3.637586 3.637586 630 +enhanc 1 26 3.688879 3.688879 644 +rule 1 26 3.688879 3.688879 638 +enabl 1 26 3.688879 3.688879 655 +challeng 1 26 3.688879 3.688879 653 +request 1 26 3.688879 3.688879 635 +constraint 1 26 3.688879 3.688879 636 +notic 1 25 3.737670 3.737670 675 +demonstr 1 24 3.761200 3.761200 694 +seri 1 24 3.761200 3.761200 708 +interpret 1 24 3.761200 3.761200 686 +input 1 23 3.806662 3.806662 727 +initi 1 23 3.806662 3.806662 717 +variabl 1 23 3.806662 3.806662 715 +brows 1 23 3.806662 3.806662 726 +equat 1 23 3.806662 3.806662 724 +size 1 23 3.806662 3.806662 713 +head 1 23 3.806662 3.806662 732 +begin 1 23 3.806662 3.806662 716 +sequenti 1 22 3.850148 3.850148 745 +defin 1 22 3.850148 3.850148 746 +reduc 1 22 3.850148 3.850148 759 +identifi 1 22 3.850148 3.850148 760 +output 1 21 3.912023 3.912023 788 +voic 1 21 3.912023 3.912023 806 +util 1 21 3.912023 3.912023 774 +hypertext 1 19 4.007333 4.007333 865 +left 1 19 4.007333 4.007333 851 +separ 1 19 4.007333 4.007333 844 +record 1 18 4.060443 4.060443 890 +demo 1 18 4.060443 4.060443 888 +along 1 18 4.060443 4.060443 878 +listen 1 18 4.060443 4.060443 907 +dimension 1 18 4.060443 4.060443 909 +element 1 18 4.060443 4.060443 895 +lower 1 18 4.060443 4.060443 886 +render 1 17 4.110874 4.110874 947 +matrix 1 17 4.110874 4.110874 933 +choic 1 16 4.174387 4.174387 979 +cognit 1 16 4.174387 4.174387 986 +took 1 16 4.174387 4.174387 1010 +upon 1 16 4.174387 4.174387 978 +piec 1 15 4.248495 4.248495 1020 +later 1 15 4.248495 4.248495 1043 +audio 1 14 4.317488 4.317488 1094 +latex 1 14 4.317488 4.317488 1064 +attribut 1 14 4.317488 4.317488 1092 +squar 1 14 4.317488 4.317488 1082 +shown 1 14 4.317488 4.317488 1080 +context 1 13 4.382027 4.382027 1153 +emac 1 13 4.382027 4.382027 1143 +directli 1 13 4.382027 4.382027 1141 +speech 1 12 4.465908 4.465908 1222 +speak 1 12 4.465908 4.465908 1283 +reader 1 12 4.465908 4.465908 1246 +calcul 1 12 4.465908 4.465908 1268 +shape 1 12 4.465908 4.465908 1245 +typic 1 11 4.553877 4.553877 1360 +ofcomput 1 10 4.653960 4.653960 1442 +donald 1 9 4.753590 4.753590 1510 +notat 1 9 4.753590 4.753590 1489 +imposs 1 9 4.753590 4.753590 1513 +distanc 1 9 4.753590 4.753590 1500 +equival 1 9 4.753590 4.753590 1496 +root 1 8 4.875197 4.875197 1650 +wire 1 8 4.875197 4.875197 1747 +cross 1 8 4.875197 4.875197 1703 +illustr 1 8 4.875197 4.875197 1679 +forget 1 8 4.875197 4.875197 1712 +replac 1 8 4.875197 4.875197 1668 +dedic 1 7 5.010635 5.010635 1843 +stereo 1 7 5.010635 5.010635 1818 +dimens 1 7 5.010635 5.010635 1930 +notion 1 7 5.010635 5.010635 1947 +hear 1 7 5.010635 5.010635 1940 +difficult 1 6 5.164786 5.164786 2035 +quick 1 6 5.164786 5.164786 2184 +vari 1 6 5.164786 5.164786 2001 +nest 1 6 5.164786 5.164786 2151 +heurist 1 6 5.164786 5.164786 2125 +chosen 1 6 5.164786 5.164786 1984 +meant 1 6 5.164786 5.164786 2055 +fraction 1 5 5.347108 5.347108 2259 +mutual 1 5 5.347108 5.347108 2418 +recogn 1 5 5.347108 5.347108 2302 +quantifi 1 5 5.347108 5.347108 2525 +substitut 1 5 5.347108 5.347108 2247 +inlin 1 4 5.568345 5.568345 2964 +encod 1 4 5.568345 5.568345 2929 +paus 1 4 5.568345 5.568345 2965 +orthogon 1 4 5.568345 5.568345 2832 +compris 1 4 5.568345 5.568345 2862 +vital 1 4 5.568345 5.568345 2733 +ident 1 4 5.568345 5.568345 2826 +customiz 1 4 5.568345 5.568345 2966 +trick 1 4 5.568345 5.568345 2967 +thati 1 4 5.568345 5.568345 2616 +heard 1 4 5.568345 5.568345 2895 +raman 1 4 5.568345 5.568345 2827 +blind 1 3 5.857933 5.857933 3662 +formula 1 3 5.857933 5.857933 3405 +subscript 1 3 5.857933 5.857933 3469 +percept 1 3 5.857933 5.857933 3739 +forthes 1 3 5.857933 5.857933 3199 +tripl 1 3 5.857933 5.857933 3160 +experienc 1 3 5.857933 5.857933 3203 +meaning 1 3 5.857933 5.857933 3458 +exponenti 1 3 5.857933 5.857933 3529 +thetim 1 3 5.857933 5.857933 3581 +proper 1 3 5.857933 5.857933 3323 +orpostscript 1 3 5.857933 5.857933 3329 +spoken 1 2 6.263398 6.263398 5122 +convei 1 2 6.263398 6.263398 4690 +succinctli 1 2 6.263398 6.263398 4275 +expon 1 2 6.263398 6.263398 5323 +monoton 1 2 6.263398 6.263398 5321 +absenc 1 2 6.263398 6.263398 4878 +logarithm 1 2 6.263398 6.263398 5322 +oppos 1 2 6.263398 6.263398 4855 +ofintegr 1 2 6.263398 6.263398 5324 +summat 1 2 6.263398 6.263398 5325 +referenc 1 2 6.263398 6.263398 4757 +justa 1 2 6.263398 6.263398 5326 +glori 1 2 6.263398 6.263398 5327 +aster 1 1 6.957497 6.957497 11450 +demonstrationi 1 1 6.957497 6.957497 11457 +forrend 1 1 6.957497 6.957497 11458 +myphd 1 1 6.957497 6.957497 11459 +dectalk 1 1 6.957497 6.957497 11460 +mulaw 1 1 6.957497 6.957497 11461 +mono 1 1 6.957497 6.957497 11462 +dvip 1 1 6.957497 6.957497 11463 +andround 1 1 6.957497 6.957497 11464 +faad 1 1 6.957497 6.957497 11465 +bruno 1 1 6.957497 6.957497 11451 +casey 1 1 6.957497 6.957497 11466 +examplessinc 1 1 6.957497 6.957497 11467 +inflect 1 1 6.957497 6.957497 11468 +toconvei 1 1 6.957497 6.957497 11469 +renderingsub 1 1 6.957497 6.957497 11470 +superscript 1 1 6.957497 6.957497 11452 +audiost 1 1 6.957497 6.957497 11471 +dimensionus 1 1 6.957497 6.957497 11472 +knuth 1 1 6.957497 6.957497 11453 +verbatim 1 1 6.957497 6.957497 11473 +layoutoper 1 1 6.957497 6.957497 11474 +verydiffer 1 1 6.957497 6.957497 11475 +unambigu 1 1 6.957497 6.957497 11454 +monotonicchang 1 1 6.957497 6.957497 11476 +trigonometr 1 1 6.957497 6.957497 11477 +ambigu 1 1 6.957497 6.957497 11478 +parenthesi 1 1 6.957497 6.957497 11479 +asexpon 1 1 6.957497 6.957497 11480 +isfulli 1 1 6.957497 6.957497 11481 +innocu 1 1 6.957497 6.957497 11482 +mostdifficult 1 1 6.957497 6.957497 11483 +theintegr 1 1 6.957497 6.957497 11484 +ofhuman 1 1 6.957497 6.957497 11485 +ofcross 1 1 6.957497 6.957497 11486 +referenceableobject 1 1 6.957497 6.957497 11487 +latercross 1 1 6.957497 6.957497 11488 +inton 1 1 6.957497 6.957497 11455 +intermix 1 1 6.957497 6.957497 11456 +followingdeepli 1 1 6.957497 6.957497 11489 +fledgedsymbol 1 1 6.957497 6.957497 11490 +thematrix 1 1 6.957497 6.957497 11491 +commenc 1 1 6.957497 6.957497 11492 +aseach 1 1 6.957497 6.957497 11493 +secondsto 1 1 6.957497 6.957497 11494 +spacenot 1 1 6.957497 6.957497 11495 +changeth 1 1 6.957497 6.957497 11496 +techniquefor 1 1 6.957497 6.957497 11497 +renderingsconvei 1 1 6.957497 6.957497 11498 +thesub 1 1 6.957497 6.957497 11499 +denomin 1 1 6.957497 6.957497 11500 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..9de61f5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +parallel 1 169 1.791759 1.791759 60 +process 1 142 1.945910 1.945910 72 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +check 1 115 2.197225 2.197225 118 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +octob 1 89 2.397895 2.397895 156 +ieee 1 86 2.484907 2.484907 190 +learn 1 86 2.484907 2.484907 170 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +effici 1 73 2.639057 2.639057 233 +test 1 66 2.708050 2.708050 252 +august 1 66 2.708050 2.708050 257 +ithaca 1 65 2.772589 2.772589 294 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +juli 1 60 2.833213 2.833213 305 +extens 1 53 2.944439 2.944439 340 +without 1 50 3.044522 3.044522 370 +linear 1 41 3.218876 3.218876 431 +theoret 1 39 3.258097 3.258097 446 +correct 1 38 3.295837 3.295837 462 +approxim 1 35 3.401197 3.401197 509 +bound 1 26 3.688879 3.688879 659 +scalabl 1 24 3.761200 3.761200 705 +equat 1 23 3.806662 3.806662 724 +self 1 22 3.850148 3.850148 761 +polynomi 1 14 4.317488 4.317488 1069 +squar 1 14 4.317488 4.317488 1082 +branch 1 11 4.553877 4.553877 1318 +kumar 1 9 4.753590 4.753590 1506 +russel 1 9 4.753590 4.753590 1507 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +bottleneck 1 4 5.568345 5.568345 2769 +combinator 1 4 5.568345 5.568345 2915 +shah 1 4 5.568345 5.568345 2814 +ravi 1 3 5.857933 5.857933 3185 +funda 1 3 5.857933 5.857933 3645 +recurr 1 3 5.857933 5.857933 3740 +latin 1 3 5.857933 5.857933 3741 +sundaram 1 3 5.857933 5.857933 3463 +ramachandran 1 3 5.857933 5.857933 3742 +lnc 1 2 6.263398 6.263398 5085 +width 1 2 6.263398 6.263398 5328 +alexand 1 2 6.263398 6.263398 5329 +uumln 1 1 6.957497 6.957497 11501 +sivakumar 1 1 6.957497 6.957497 11502 +jeyakumar 1 1 6.957497 6.957497 11503 +muthukumarasami 1 1 6.957497 6.957497 11504 +umakishor 1 1 6.957497 6.957497 11505 +gautam 1 1 6.957497 6.957497 11506 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..459c6399 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +teach 1 108 2.197225 2.197225 112 +access 1 102 2.302585 2.302585 136 +search 1 95 2.397895 2.397895 155 +imag 1 91 2.397895 2.397895 161 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +free 1 73 2.639057 2.639057 224 +workshop 1 71 2.639057 2.639057 239 +multimedia 1 68 2.708050 2.708050 258 +differ 1 66 2.708050 2.708050 253 +organ 1 65 2.772589 2.772589 265 +march 1 61 2.833213 2.833213 295 +content 1 59 2.833213 2.833213 302 +think 1 57 2.890372 2.890372 314 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +visual 1 48 3.044522 3.044522 372 +electron 1 47 3.091042 3.091042 379 +featur 1 46 3.091042 3.091042 386 +california 1 46 3.091042 3.091042 388 +video 1 44 3.135494 3.135494 405 +third 1 43 3.178054 3.178054 412 +vision 1 41 3.218876 3.218876 430 +committe 1 34 3.401197 3.401197 522 +john 1 33 3.433987 3.433987 532 +taught 1 33 3.433987 3.433987 526 +transform 1 32 3.465736 3.465736 542 +pass 1 28 3.610918 3.610918 611 +held 1 28 3.610918 3.610918 600 +retriev 1 27 3.637586 3.637586 621 +subject 1 26 3.688879 3.688879 647 +compar 1 26 3.688879 3.688879 648 +detect 1 26 3.688879 3.688879 646 +greg 1 24 3.761200 3.761200 695 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +break 1 20 3.951244 3.951244 812 +spend 1 19 4.007333 4.007333 850 +boston 1 19 4.007333 4.007333 862 +scott 1 18 4.060443 4.060443 884 +miller 1 17 4.110874 4.110874 949 +vector 1 16 4.174387 4.174387 961 +fourth 1 16 4.174387 4.174387 999 +researchmi 1 14 4.317488 4.317488 1119 +coher 1 14 4.317488 4.317488 1109 +massachusett 1 14 4.317488 4.317488 1118 +scene 1 14 4.317488 4.317488 1114 +francisco 1 14 4.317488 4.317488 1095 +econom 1 13 4.382027 4.382027 1184 +huang 1 12 4.465908 4.465908 1202 +amount 1 12 4.465908 4.465908 1208 +reader 1 12 4.465908 4.465908 1246 +impact 1 11 4.553877 4.553877 1334 +refin 1 11 4.553877 4.553877 1363 +princip 1 10 4.653960 4.653960 1397 +correspond 1 10 4.653960 4.653960 1382 +observ 1 9 4.753590 4.753590 1578 +classifi 1 9 4.753590 4.753590 1537 +kevin 1 9 4.753590 4.753590 1482 +juan 1 9 4.753590 4.753590 1580 +european 1 8 4.875197 4.875197 1763 +ramin 1 7 5.010635 5.010635 1820 +justin 1 7 5.010635 5.010635 1789 +adob 1 7 5.010635 5.010635 1873 +parametr 1 7 5.010635 5.010635 1819 +sweden 1 7 5.010635 5.010635 1885 +courtesi 1 7 5.010635 5.010635 1953 +zabih 1 6 5.164786 5.164786 2138 +freeli 1 6 5.164786 5.164786 2014 +price 1 6 5.164786 5.164786 1999 +acrobat 1 6 5.164786 5.164786 2063 +phil 1 5 5.347108 5.347108 2419 +fair 1 5 5.347108 5.347108 2333 +florida 1 5 5.347108 5.347108 2526 +essai 1 4 5.568345 5.568345 2948 +newslett 1 4 5.568345 5.568345 2873 +scribe 1 4 5.568345 5.568345 2631 +cvpr 1 4 5.568345 5.568345 2761 +jing 1 3 5.857933 5.857933 3521 +voskuhl 1 3 5.857933 5.857933 3109 +szewczyk 1 3 5.857933 5.857933 3108 +histogram 1 3 5.857933 5.857933 3490 +stockholm 1 3 5.857933 5.857933 3715 +conjunct 1 3 5.857933 5.857933 3743 +cytacki 1 2 6.263398 6.263398 5330 +pageramin 1 1 6.957497 6.957497 11507 +zabihassist 1 1 6.957497 6.957497 11508 +professorrdz 1 1 6.957497 6.957497 11509 +agr 1 1 6.957497 6.957497 11510 +studentsi 1 1 6.957497 6.957497 11511 +vera 1 1 6.957497 6.957497 11512 +kettnak 1 1 6.957497 6.957497 11513 +olga 1 1 6.957497 6.957497 11514 +veksler 1 1 6.957497 6.957497 11515 +publicationsmost 1 1 6.957497 6.957497 11516 +sarasota 1 1 6.957497 6.957497 11517 +woodfil 1 1 6.957497 6.957497 11518 +teachingi 1 1 6.957497 6.957497 11519 +activitiesi 1 1 6.957497 6.957497 11520 +comitte 1 1 6.957497 6.957497 11521 +acknowledgementsthi 1 1 6.957497 6.957497 11522 +huttenlocherlast 1 1 6.957497 6.957497 11523 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..a6e08e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +roderick 1 1 6.957497 6.957497 11524 +moten 1 1 6.957497 6.957497 11525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..51be7955 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +homepag 1 93 2.397895 2.397895 148 +upson 1 71 2.639057 2.639057 218 +result 1 65 2.772589 2.772589 281 +talk 1 53 2.944439 2.944439 336 +telephon 1 50 3.044522 3.044522 373 +describ 1 45 3.135494 3.135494 400 +york 1 41 3.218876 3.218876 435 +random 1 34 3.401197 3.401197 511 +universityithaca 1 24 3.761200 3.761200 710 +kumar 1 9 4.753590 4.753590 1506 +hallcornel 1 8 4.875197 4.875197 1757 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +fair 1 5 5.347108 5.347108 2333 +funda 1 3 5.857933 5.857933 3645 +ravi 1 3 5.857933 5.857933 3185 +ergun 1 2 6.263398 6.263398 5087 +wasserman 1 2 6.263398 6.263398 5331 +nephew 1 2 6.263398 6.263398 5332 +homepageronitt 1 1 6.957497 6.957497 11526 +rubinfeldi 1 1 6.957497 6.957497 11527 +rubinfeldcomput 1 1 6.957497 6.957497 11528 +edupictur 1 1 6.957497 6.957497 11529 +eitan 1 1 6.957497 6.957497 11530 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..45de3285 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +institut 1 84 2.484907 2.484907 187 +appear 1 78 2.564949 2.564949 210 +state 1 76 2.564949 2.564949 207 +involv 1 71 2.639057 2.639057 227 +receiv 1 66 2.708050 2.708050 244 +share 1 59 2.833213 2.833213 304 +thesi 1 57 2.890372 2.890372 327 +advisor 1 51 2.995732 2.995732 355 +post 1 35 3.401197 3.401197 505 +titl 1 31 3.496508 3.496508 556 +full 1 28 3.610918 3.610918 615 +consist 1 26 3.688879 3.688879 651 +reliabl 1 25 3.737670 3.737670 674 +doctor 1 24 3.761200 3.761200 709 +condit 1 16 4.174387 4.174387 975 +edui 1 13 4.382027 4.382027 1193 +replic 1 12 4.465908 4.465908 1231 +thedepart 1 11 4.553877 4.553877 1350 +israel 1 11 4.553877 4.553877 1366 +mainli 1 10 4.653960 4.653960 1432 +birman 1 9 4.753590 4.753590 1531 +friedman 1 7 5.010635 5.010635 1886 +trade 1 7 5.010635 5.010635 1815 +cornellunivers 1 7 5.010635 5.010635 1916 +clickher 1 5 5.347108 5.347108 2428 +vaysburd 1 4 5.568345 5.568345 2846 +tina 1 3 5.857933 5.857933 3744 +scienceatcornel 1 2 6.263398 6.263398 5333 +withken 1 2 6.263398 6.263398 5334 +androbbert 1 2 6.263398 6.263398 4953 +thehoru 1 2 6.263398 6.263398 5179 +attiya 1 2 6.263398 6.263398 5197 +partition 1 2 6.263398 6.263398 4954 +friedmanroi 1 1 6.957497 6.957497 11532 +friedmanpost 1 1 6.957497 6.957497 11533 +universityroi 1 1 6.957497 6.957497 11534 +rennessein 1 1 6.957497 6.957497 11535 +thetechnion 1 1 6.957497 6.957497 11531 +washagit 1 1 6.957497 6.957497 11536 +wasconsist 1 1 6.957497 6.957497 11537 +themilliped 1 1 6.957497 6.957497 11538 +withassaf 1 1 6.957497 6.957497 11539 +schuster 1 1 6.957497 6.957497 11540 +papersr 1 1 6.957497 6.957497 11541 +scalabledistribut 1 1 6.957497 6.957497 11542 +coprocessor 1 1 6.957497 6.957497 11543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..03cde2f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +cornel 1 215 1.386294 1.386294 23 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +report 1 131 2.079442 2.079442 92 +version 1 113 2.197225 2.197225 122 +associ 1 93 2.397895 2.397895 151 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +dept 1 64 2.772589 2.772589 291 +tech 1 35 3.401197 3.401197 515 +photograph 1 15 4.248495 4.248495 1056 +daniela 1 3 5.857933 5.857933 3611 +catalogc 1 2 6.263398 6.263398 5023 +infodesign 1 1 6.957497 6.957497 11544 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..425709a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +contain 1 81 2.484907 2.484907 174 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +html 1 75 2.639057 2.639057 235 +ithaca 1 65 2.772589 2.772589 294 +virtual 1 62 2.772589 2.772589 285 +complex 1 64 2.772589 2.772589 269 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +protocol 1 45 3.135494 3.135494 407 +multi 1 36 3.367296 3.367296 493 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +secur 1 30 3.555348 3.555348 577 +framework 1 28 3.610918 3.610918 606 +american 1 27 3.637586 3.637586 634 +reliabl 1 25 3.737670 3.737670 674 +flow 1 24 3.761200 3.761200 700 +mobil 1 23 3.806662 3.806662 730 +hous 1 21 3.912023 3.912023 801 +flexibl 1 21 3.912023 3.912023 792 +applet 1 20 3.951244 3.951244 827 +media 1 19 4.007333 4.007333 861 +agent 1 18 4.060443 4.060443 910 +club 1 15 4.248495 4.248495 1058 +senior 1 14 4.317488 4.317488 1120 +horu 1 14 4.317488 4.317488 1116 +edui 1 13 4.382027 4.382027 1193 +composit 1 13 4.382027 4.382027 1150 +incorpor 1 13 4.382027 4.382027 1163 +weak 1 13 4.382027 4.382027 1159 +danc 1 12 4.465908 4.465908 1278 +market 1 11 4.553877 4.553877 1361 +interestsmi 1 10 4.653960 4.653960 1462 +robbert 1 9 4.753590 4.753590 1529 +guitar 1 8 4.875197 4.875197 1758 +synchroni 1 7 5.010635 5.010635 1923 +band 1 6 5.164786 5.164786 2198 +strong 1 6 5.164786 5.164786 2029 +babi 1 5 5.347108 5.347108 2493 +girl 1 5 5.347108 5.347108 2410 +jazz 1 5 5.347108 5.347108 2527 +swing 1 4 5.568345 5.568345 2887 +tanenbaum 1 3 5.857933 5.857933 3397 +sharewar 1 3 5.857933 5.857933 3503 +dutch 1 3 5.857933 5.857933 3592 +netherland 1 3 5.857933 5.857933 3650 +lightweight 1 3 5.857933 5.857933 3234 +associatecornel 1 2 6.263398 6.263398 5137 +scienceatcornel 1 2 6.263398 6.263398 5333 +withken 1 2 6.263398 6.263398 5334 +tacoma 1 2 6.263398 6.263398 4909 +renesserobbert 1 1 6.957497 6.957497 11546 +renessesenior 1 1 6.957497 6.957497 11547 +universityrvr 1 1 6.957497 6.957497 11548 +universityinithaca 1 1 6.957497 6.957497 11549 +birmanin 1 1 6.957497 6.957497 11550 +wasandi 1 1 6.957497 6.957497 11551 +brand 1 1 6.957497 6.957497 11545 +caml 1 1 6.957497 6.957497 11552 +nynetth 1 1 6.957497 6.957497 11553 +ageless 1 1 6.957497 6.957497 11554 +accordion 1 1 6.957497 6.957497 11555 +stuffcornel 1 1 6.957497 6.957497 11556 +ithacaithacanet 1 1 6.957497 6.957497 11557 +spinner 1 1 6.957497 6.957497 11558 +paperssoftwar 1 1 6.957497 6.957497 11559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..959e12cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +free 1 73 2.639057 2.639057 224 +august 1 66 2.708050 2.708050 257 +copi 1 63 2.772589 2.772589 284 +experi 1 64 2.772589 2.772589 283 +guid 1 63 2.772589 2.772589 267 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +februari 1 54 2.944439 2.944439 328 +sampl 1 53 2.944439 2.944439 339 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +electron 1 47 3.091042 3.091042 379 +answer 1 45 3.135494 3.135494 391 +submit 1 39 3.258097 3.258097 440 +annual 1 40 3.258097 3.258097 458 +especi 1 36 3.367296 3.367296 496 +approxim 1 35 3.401197 3.401197 509 +award 1 34 3.401197 3.401197 523 +survei 1 35 3.401197 3.401197 513 +detect 1 26 3.688879 3.688879 646 +revis 1 26 3.688879 3.688879 640 +consist 1 26 3.688879 3.688879 651 +reliabl 1 25 3.737670 3.737670 674 +doctor 1 24 3.761200 3.761200 709 +seri 1 24 3.761200 3.761200 708 +finish 1 22 3.850148 3.850148 748 +properti 1 22 3.850148 3.850148 749 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +failur 1 18 4.060443 4.060443 898 +expand 1 17 4.110874 4.110874 928 +diego 1 16 4.174387 4.174387 992 +letter 1 16 4.174387 4.174387 981 +asynchron 1 12 4.465908 4.465908 1229 +elect 1 8 4.875197 4.875197 1771 +perfect 1 7 5.010635 5.010635 1921 +chandra 1 6 5.164786 5.164786 2091 +consensu 1 6 5.164786 5.164786 2080 +prize 1 6 5.164786 5.164786 2150 +keith 1 5 5.347108 5.347108 2528 +stabl 1 5 5.347108 5.347108 2309 +blow 1 5 5.347108 5.347108 2407 +spam 1 4 5.568345 5.568345 2927 +bean 1 4 5.568345 5.568345 2968 +detector 1 3 5.857933 5.857933 3745 +horizon 1 3 5.857933 5.857933 3746 +zone 1 3 5.857933 5.857933 3747 +cash 1 3 5.857933 5.857933 3355 +marzullo 1 2 6.263398 6.263398 4919 +formor 1 2 6.263398 6.263398 5335 +distributedcomput 1 2 6.263398 6.263398 5336 +lecturenot 1 2 6.263398 6.263398 4679 +laura 1 1 6.957497 6.957497 11561 +sabel 1 1 6.957497 6.957497 11560 +professorkeith 1 1 6.957497 6.957497 11565 +tushar 1 1 6.957497 6.957497 11566 +sfailur 1 1 6.957497 6.957497 11567 +asynchronousdistribut 1 1 6.957497 6.957497 11562 +subcut 1 1 6.957497 6.957497 11568 +wdag 1 1 6.957497 6.957497 11569 +cow 1 1 6.957497 6.957497 11570 +strawberri 1 1 6.957497 6.957497 11571 +tart 1 1 6.957497 6.957497 11572 +torch 1 1 6.957497 6.957497 11573 +alpacanet 1 1 6.957497 6.957497 11574 +gourmet 1 1 6.957497 6.957497 11575 +thebobbi 1 1 6.957497 6.957497 11576 +jelli 1 1 6.957497 6.957497 11563 +belli 1 1 6.957497 6.957497 11577 +bingo 1 1 6.957497 6.957497 11564 +canplai 1 1 6.957497 6.957497 11578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..78f81808 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +assign 1 135 1.945910 1.945910 66 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +databas 1 122 2.079442 2.079442 86 +well 1 109 2.197225 2.197225 121 +memori 1 101 2.302585 2.302585 139 +need 1 98 2.302585 2.302585 135 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +messag 1 76 2.564949 2.564949 212 +optim 1 79 2.564949 2.564949 197 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +solv 1 73 2.639057 2.639057 234 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +august 1 66 2.708050 2.708050 257 +simul 1 66 2.708050 2.708050 255 +knowledg 1 67 2.708050 2.708050 243 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +share 1 59 2.833213 2.833213 304 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +faculti 1 56 2.890372 2.890372 325 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +maintain 1 51 2.995732 2.995732 342 +principl 1 48 3.044522 3.044522 357 +even 1 45 3.135494 3.135494 393 +protocol 1 45 3.135494 3.135494 407 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +show 1 43 3.178054 3.178054 417 +theoret 1 39 3.258097 3.258097 446 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +correct 1 38 3.295837 3.295837 462 +respons 1 37 3.332205 3.332205 476 +least 1 35 3.401197 3.401197 516 +concurr 1 34 3.401197 3.401197 501 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +collabor 1 32 3.465736 3.465736 543 +abl 1 30 3.555348 3.555348 566 +exist 1 30 3.555348 3.555348 569 +common 1 30 3.555348 3.555348 574 +synchron 1 29 3.583519 3.583519 588 +pass 1 28 3.610918 3.610918 611 +determin 1 27 3.637586 3.637586 630 +consist 1 26 3.688879 3.688879 651 +fundament 1 25 3.737670 3.737670 661 +known 1 24 3.761200 3.761200 702 +methodolog 1 23 3.806662 3.806662 733 +hierarchi 1 22 3.850148 3.850148 744 +thu 1 21 3.912023 3.912023 773 +increas 1 20 3.951244 3.951244 829 +prove 1 19 4.007333 4.007333 848 +failur 1 18 4.060443 4.060443 898 +whether 1 17 4.110874 4.110874 918 +partit 1 16 4.174387 4.174387 984 +princeton 1 15 4.248495 4.248495 1042 +shown 1 14 4.317488 4.317488 1080 +wait 1 13 4.382027 4.382027 1168 +cannot 1 13 4.382027 4.382027 1144 +difficulti 1 13 4.382027 4.382027 1132 +necessari 1 13 4.382027 4.382027 1147 +canada 1 13 4.382027 4.382027 1158 +asynchron 1 12 4.465908 4.465908 1229 +robust 1 12 4.465908 4.465908 1271 +replic 1 12 4.465908 4.465908 1231 +abil 1 11 4.553877 4.553877 1341 +clock 1 11 4.553877 4.553877 1320 +interestsmi 1 10 4.653960 4.653960 1462 +reli 1 10 4.653960 4.653960 1411 +guarante 1 10 4.653960 4.653960 1391 +informationabout 1 9 4.753590 4.753590 1515 +paradigm 1 8 4.875197 4.875197 1662 +bridg 1 8 4.875197 4.875197 1764 +crash 1 8 4.875197 4.875197 1616 +exactli 1 7 5.010635 5.010635 1817 +suffici 1 7 5.010635 5.010635 1897 +montreal 1 7 5.010635 5.010635 1961 +pittsburgh 1 7 5.010635 5.010635 1938 +pennsylvania 1 7 5.010635 5.010635 1932 +prasad 1 6 5.164786 5.164786 2126 +consensu 1 6 5.164786 5.164786 2080 +mistak 1 6 5.164786 5.164786 2110 +chandra 1 6 5.164786 5.164786 2091 +broadcast 1 5 5.347108 5.347108 2453 +infinit 1 4 5.568345 5.568345 2596 +forfault 1 3 5.857933 5.857933 3748 +detector 1 3 5.857933 5.857933 3745 +ofobject 1 3 5.857933 5.857933 3399 +toueg 1 2 6.263398 6.263398 5339 +inher 1 2 6.263398 6.263398 5086 +mere 1 2 6.263398 6.263398 5340 +slow 1 2 6.263398 6.263398 5341 +inour 1 2 6.263398 6.263398 4445 +systemswith 1 2 6.263398 6.263398 5342 +muchinform 1 2 6.263398 6.263398 4811 +amajor 1 2 6.263398 6.263398 5343 +herlihi 1 2 6.263398 6.263398 5144 +anobject 1 2 6.263398 6.263398 4267 +hadzilaco 1 2 6.263398 6.263398 5338 +professorph 1 1 6.957497 6.957497 11588 +toleranceand 1 1 6.957497 6.957497 11589 +andshar 1 1 6.957497 6.957497 11590 +gapbetween 1 1 6.957497 6.957497 11591 +practicalsolut 1 1 6.957497 6.957497 11592 +withtushar 1 1 6.957497 6.957497 11593 +chandraand 1 1 6.957497 6.957497 11594 +jayanti 1 1 6.957497 6.957497 11583 +onunreli 1 1 6.957497 6.957497 11595 +failuredetector 1 1 6.957497 6.957497 11584 +computingst 1 1 6.957497 6.957497 11596 +adeterminist 1 1 6.957497 6.957497 11597 +impossibilityresult 1 1 6.957497 6.957497 11598 +aprocess 1 1 6.957497 6.957497 11599 +wefirst 1 1 6.957497 6.957497 11600 +unreli 1 1 6.957497 6.957497 11585 +canmak 1 1 6.957497 6.957497 11601 +solveconsensu 1 1 6.957497 6.957497 11602 +weakest 1 1 6.957497 6.957497 11586 +practicalityof 1 1 6.957497 6.957497 11603 +theircorrect 1 1 6.957497 6.957497 11604 +sharedobject 1 1 6.957497 6.957497 11605 +accessesthi 1 1 6.957497 6.957497 11606 +otherprocess 1 1 6.957497 6.957497 11607 +thatcorrespond 1 1 6.957497 6.957497 11608 +atani 1 1 6.957497 6.957497 11609 +whetherrobust 1 1 6.957497 6.957497 11610 +bracha 1 1 6.957497 6.957497 11611 +srikanth 1 1 6.957497 6.957497 11612 +abbadi 1 1 6.957497 6.957497 11613 +neiger 1 1 6.957497 6.957497 11587 +detectorfor 1 1 6.957497 6.957497 11614 +vancouv 1 1 6.957497 6.957497 11615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..94ae6a5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +techniqu 1 99 2.302585 2.302585 138 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +proceed 1 93 2.397895 2.397895 152 +journal 1 83 2.484907 2.484907 183 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +foundat 1 62 2.772589 2.772589 286 +complex 1 64 2.772589 2.772589 269 +septemb 1 65 2.772589 2.772589 274 +thesi 1 57 2.890372 2.890372 327 +algebra 1 45 3.135494 3.135494 394 +editor 1 41 3.218876 3.218876 433 +submit 1 39 3.258097 3.258097 440 +small 1 39 3.258097 3.258097 447 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +semant 1 29 3.583519 3.583519 587 +bound 1 26 3.688879 3.688879 659 +director 1 22 3.850148 3.850148 767 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +vlsi 1 21 3.912023 3.912023 795 +verif 1 20 3.951244 3.951244 826 +scheme 1 20 3.951244 3.951244 818 +binari 1 20 3.951244 3.951244 823 +exercis 1 19 4.007333 4.007333 842 +failur 1 18 4.060443 4.060443 898 +brown 1 16 4.174387 4.174387 977 +circuit 1 13 4.382027 4.382027 1131 +sigplan 1 13 4.382027 4.382027 1190 +calculu 1 12 4.465908 4.465908 1203 +verifi 1 12 4.465908 4.465908 1261 +meta 1 9 4.753590 4.753590 1505 +crash 1 8 4.875197 4.875197 1616 +delai 1 7 5.010635 5.010635 1848 +cornellunivers 1 7 5.010635 5.010635 1916 +seshadri 1 7 5.010635 5.010635 1803 +silicon 1 6 5.164786 5.164786 2076 +toronto 1 6 5.164786 5.164786 2156 +educurr 1 5 5.347108 5.347108 2504 +hallphon 1 4 5.568345 5.568345 2900 +bloom 1 4 5.568345 5.568345 2913 +insensit 1 4 5.568345 5.568345 2716 +knight 1 4 5.568345 5.568345 2728 +savag 1 4 5.568345 5.568345 2777 +samuel 1 3 5.857933 5.857933 3155 +weber 1 3 5.857933 5.857933 3156 +act 1 3 5.857933 5.857933 3557 +metatheori 1 3 5.857933 5.857933 3642 +agreement 1 3 5.857933 5.857933 3207 +bakker 1 2 6.263398 6.263398 5337 +hadzilaco 1 2 6.263398 6.263398 5338 +byzantin 1 2 6.263398 6.263398 4203 +roever 1 1 6.957497 6.957497 11579 +rozenberg 1 1 6.957497 6.957497 11580 +amdur 1 1 6.957497 6.957497 11581 +wortman 1 1 6.957497 6.957497 11582 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..27ea8874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +object 1 138 1.945910 1.945910 79 +databas 1 122 2.079442 2.079442 86 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +topic 1 114 2.197225 2.197225 110 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +window 1 68 2.708050 2.708050 242 +content 1 59 2.833213 2.833213 302 +plai 1 60 2.833213 2.833213 307 +sampl 1 53 2.944439 2.944439 339 +favorit 1 44 3.135494 3.135494 410 +past 1 42 3.218876 3.218876 428 +combin 1 42 3.218876 3.218876 421 +product 1 33 3.433987 3.433987 527 +releas 1 28 3.610918 3.610918 616 +retriev 1 27 3.637586 3.637586 621 +team 1 27 3.637586 3.637586 625 +request 1 26 3.688879 3.688879 635 +reach 1 24 3.761200 3.761200 688 +lead 1 23 3.806662 3.806662 718 +color 1 22 3.850148 3.850148 762 +divis 1 21 3.912023 3.912023 803 +render 1 17 4.110874 4.110874 947 +analyz 1 17 4.110874 4.110874 925 +sheet 1 16 4.174387 4.174387 973 +draw 1 14 4.317488 4.317488 1086 +systemsc 1 11 4.553877 4.553877 1293 +ski 1 10 4.653960 4.653960 1471 +card 1 10 4.653960 4.653960 1435 +softbal 1 9 4.753590 4.753590 1594 +sean 1 8 4.875197 4.875197 1705 +golf 1 6 5.164786 5.164786 2178 +interior 1 5 5.347108 5.347108 2439 +isi 1 5 5.347108 5.347108 2443 +basebal 1 4 5.568345 5.568345 2969 +percept 1 3 5.857933 5.857933 3739 +compliant 1 3 5.857933 5.857933 3245 +landi 1 2 6.263398 6.263398 4830 +clickherefor 1 2 6.263398 6.263398 5344 +stratu 1 2 6.263398 6.263398 5345 +broker 1 2 6.263398 6.263398 4968 +landissean 1 1 6.957497 6.957497 11617 +sciencewelcom 1 1 6.957497 6.957497 11618 +weanalyz 1 1 6.957497 6.957497 11619 +patternsprofession 1 1 6.957497 6.957497 11620 +orbix 1 1 6.957497 6.957497 11616 +acorba 1 1 6.957497 6.957497 11621 +iona 1 1 6.957497 6.957497 11622 +alpin 1 1 6.957497 6.957497 11623 +collectingi 1 1 6.957497 6.957497 11624 +comeduc 1 1 6.957497 6.957497 11625 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..aa0a604b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +graphic 1 90 2.397895 2.397895 147 +june 1 79 2.564949 2.564949 214 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +colleg 1 61 2.833213 2.833213 300 +summer 1 56 2.890372 2.890372 311 +york 1 41 3.218876 3.218876 435 +tech 1 35 3.401197 3.401197 515 +post 1 35 3.401197 3.401197 505 +india 1 32 3.465736 3.465736 550 +taken 1 31 3.496508 3.496508 555 +anim 1 31 3.496508 3.496508 557 +equat 1 23 3.806662 3.806662 724 +sciencecornel 1 22 3.850148 3.850148 768 +viewer 1 21 3.912023 3.912023 787 +practicum 1 16 4.174387 4.174387 960 +magic 1 11 4.553877 4.553877 1358 +mapl 1 11 4.553877 4.553877 1376 +reduct 1 7 5.010635 5.010635 1877 +parametr 1 7 5.010635 5.010635 1819 +jpeg 1 6 5.164786 5.164786 2053 +myresum 1 6 5.164786 5.164786 2199 +hoca 1 5 5.347108 5.347108 2241 +engineeringclass 1 3 5.857933 5.857933 3667 +kerala 1 3 5.857933 5.857933 3749 +cornelluniversityfal 1 2 6.263398 6.263398 5131 +artifact 1 2 6.263398 6.263398 5346 +cspracticum 1 2 6.263398 6.263398 5132 +carpet 1 2 6.263398 6.263398 5133 +colloqium 1 2 6.263398 6.263398 5134 +seena 1 1 6.957497 6.957497 11626 +cherangara 1 1 6.957497 6.957497 11627 +cherangaramast 1 1 6.957497 6.957497 11628 +homepagecurr 1 1 6.957497 6.957497 11629 +trivandrum 1 1 6.957497 6.957497 11630 +processingalgorithm 1 1 6.957497 6.957497 11631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..9b48c394 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,239 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +site 1 106 2.197225 2.197225 119 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +peopl 1 96 2.302585 2.302585 132 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +internet 1 83 2.484907 2.484907 186 +start 1 83 2.484907 2.484907 173 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +multimedia 1 68 2.708050 2.708050 258 +test 1 66 2.708050 2.708050 252 +window 1 68 2.708050 2.708050 242 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +dept 1 64 2.772589 2.772589 291 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +sever 1 56 2.890372 2.890372 322 +thesi 1 57 2.890372 2.890372 327 +instruct 1 53 2.944439 2.944439 332 +allow 1 53 2.944439 2.944439 333 +format 1 48 3.044522 3.044522 356 +cool 1 49 3.044522 3.044522 374 +give 1 50 3.044522 3.044522 359 +friend 1 48 3.044522 3.044522 376 +quarter 1 47 3.091042 3.091042 389 +video 1 44 3.135494 3.135494 405 +protocol 1 45 3.135494 3.135494 407 +directori 1 45 3.135494 3.135494 396 +term 1 43 3.178054 3.178054 411 +compani 1 41 3.218876 3.218876 423 +fast 1 42 3.218876 3.218876 429 +live 1 40 3.258097 3.258097 451 +small 1 39 3.258097 3.258097 447 +prototyp 1 38 3.295837 3.295837 463 +slide 1 38 3.295837 3.295837 467 +microsoft 1 38 3.295837 3.295837 468 +mean 1 37 3.332205 3.332205 477 +statist 1 35 3.401197 3.401197 521 +transform 1 32 3.465736 3.465736 542 +given 1 32 3.465736 3.465736 538 +someth 1 31 3.496508 3.496508 554 +often 1 31 3.496508 3.496508 551 +steve 1 29 3.583519 3.583519 594 +limit 1 29 3.583519 3.583519 585 +consid 1 29 3.583519 3.583519 590 +semant 1 29 3.583519 3.583519 587 +chines 1 29 3.583519 3.583519 595 +releas 1 28 3.610918 3.610918 616 +ask 1 28 3.610918 3.610918 597 +manipul 1 27 3.637586 3.637586 624 +altern 1 26 3.688879 3.688879 641 +enhanc 1 26 3.688879 3.688879 644 +spent 1 25 3.737670 3.737670 676 +client 1 25 3.737670 3.737670 679 +motion 1 24 3.761200 3.761200 699 +interpret 1 24 3.761200 3.761200 686 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +variabl 1 23 3.806662 3.806662 715 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +indian 1 22 3.850148 3.850148 769 +fact 1 21 3.912023 3.912023 780 +similar 1 21 3.912023 3.912023 771 +viewer 1 21 3.912023 3.912023 787 +wonder 1 20 3.951244 3.951244 815 +media 1 19 4.007333 4.007333 861 +ever 1 19 4.007333 4.007333 872 +event 1 18 4.060443 4.060443 896 +record 1 18 4.060443 4.060443 890 +lot 1 18 4.060443 4.060443 889 +stanford 1 17 4.110874 4.110874 955 +coupl 1 17 4.110874 4.110874 939 +layer 1 17 4.110874 4.110874 926 +segment 1 17 4.110874 4.110874 931 +bachelor 1 17 4.110874 4.110874 957 +upon 1 16 4.174387 4.174387 978 +portion 1 16 4.174387 4.174387 971 +stream 1 15 4.248495 4.248495 1015 +remot 1 15 4.248495 4.248495 1041 +charact 1 15 4.248495 4.248495 1028 +rate 1 15 4.248495 4.248495 1037 +audio 1 14 4.317488 4.317488 1094 +came 1 13 4.382027 4.382027 1197 +script 1 13 4.382027 4.382027 1171 +asynchron 1 12 4.465908 4.465908 1229 +gupta 1 12 4.465908 4.465908 1241 +optic 1 12 4.465908 4.465908 1221 +bill 1 11 4.553877 4.553877 1297 +player 1 11 4.553877 4.553877 1371 +fix 1 11 4.553877 4.553877 1327 +smart 1 11 4.553877 4.553877 1352 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +mode 1 9 4.753590 4.753590 1492 +leader 1 9 4.753590 4.753590 1576 +character 1 8 4.875197 4.875197 1767 +claim 1 8 4.875197 4.875197 1664 +vallei 1 7 5.010635 5.010635 1959 +signal 1 7 5.010635 5.010635 1910 +keshav 1 7 5.010635 5.010635 1852 +conferenc 1 7 5.010635 5.010635 1857 +classroom 1 6 5.164786 5.164786 2006 +televis 1 6 5.164786 5.164786 2118 +silicon 1 6 5.164786 5.164786 2076 +nativ 1 6 5.164786 5.164786 2192 +mix 1 6 5.164786 5.164786 2200 +multicast 1 5 5.347108 5.347108 2305 +sigcomm 1 5 5.347108 5.347108 2329 +affin 1 5 5.347108 5.347108 2378 +hate 1 5 5.347108 5.347108 2529 +shell 1 5 5.347108 5.347108 2353 +hole 1 5 5.347108 5.347108 2518 +stupid 1 5 5.347108 5.347108 2489 +sharma 1 4 5.568345 5.568345 2752 +anoop 1 4 5.568345 5.568345 2770 +height 1 4 5.568345 5.568345 2890 +deploi 1 3 5.857933 5.857933 3750 +greatli 1 3 5.857933 5.857933 3541 +deliveri 1 3 5.857933 5.857933 3278 +membership 1 3 5.857933 5.857933 3751 +predecessor 1 3 5.857933 5.857933 3585 +motif 1 3 5.857933 5.857933 3752 +fractal 1 3 5.857933 5.857933 3475 +hindi 1 3 5.857933 5.857933 3753 +nicknam 1 3 5.857933 5.857933 3716 +stumbl 1 2 6.263398 6.263398 5349 +leadto 1 2 6.263398 6.263398 5350 +navin 1 2 6.263398 6.263398 5351 +agarw 1 2 6.263398 6.263398 5352 +deer 1 2 6.263398 6.263398 4356 +width 1 2 6.263398 6.263398 5328 +yacc 1 2 6.263398 6.263398 4422 +widget 1 2 6.263398 6.263398 5347 +coolest 1 2 6.263398 6.263398 5229 +fool 1 2 6.263398 6.263398 5353 +hors 1 2 6.263398 6.263398 5348 +frozen 1 2 6.263398 6.263398 5078 +rosen 1 1 6.957497 6.957497 11632 +modifiedigmp 1 1 6.957497 6.957497 11636 +unicast 1 1 6.957497 6.957497 11637 +sitn 1 1 6.957497 6.957497 11638 +microwav 1 1 6.957497 6.957497 11639 +vxtreme 1 1 6.957497 6.957497 11634 +chaddha 1 1 6.957497 6.957497 11640 +avneesh 1 1 6.957497 6.957497 11641 +asilomar 1 1 6.957497 6.957497 11642 +igmp 1 1 6.957497 6.957497 11643 +internetdraft 1 1 6.957497 6.957497 11644 +fenner 1 1 6.957497 6.957497 11645 +niten 1 1 6.957497 6.957497 11646 +malhan 1 1 6.957497 6.957497 11647 +delhiunpublish 1 1 6.957497 6.957497 11648 +preform 1 1 6.957497 6.957497 11649 +blur 1 1 6.957497 6.957497 11650 +speckl 1 1 6.957497 6.957497 11651 +subband 1 1 6.957497 6.957497 11652 +estmat 1 1 6.957497 6.957497 11653 +writen 1 1 6.957497 6.957497 11654 +flavour 1 1 6.957497 6.957497 11655 +imagefram 1 1 6.957497 6.957497 11635 +ifram 1 1 6.957497 6.957497 11656 +nodisplai 1 1 6.957497 6.957497 11657 +filenam 1 1 6.957497 6.957497 11658 +putimageincanva 1 1 6.957497 6.957497 11659 +dummi 1 1 6.957497 6.957497 11660 +snooper 1 1 6.957497 6.957497 11661 +doesnt 1 1 6.957497 6.957497 11662 +replai 1 1 6.957497 6.957497 11663 +kludg 1 1 6.957497 6.957497 11664 +dissalow 1 1 6.957497 6.957497 11665 +gaveth 1 1 6.957497 6.957497 11666 +tongu 1 1 6.957497 6.957497 11667 +sharm 1 1 6.957497 6.957497 11668 +shyness 1 1 6.957497 6.957497 11669 +sharmila 1 1 6.957497 6.957497 11633 +actress 1 1 6.957497 6.957497 11670 +tagor 1 1 6.957497 6.957497 11671 +ealri 1 1 6.957497 6.957497 11672 +jewish 1 1 6.957497 6.957497 11673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..f8b73771 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +view 1 70 2.708050 2.708050 254 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +california 1 46 3.091042 3.091042 388 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +futur 1 41 3.218876 3.218876 427 +movi 1 40 3.258097 3.258097 459 +soon 1 36 3.367296 3.367296 494 +john 1 33 3.433987 3.433987 532 +transform 1 32 3.465736 3.465736 542 +sciencecornel 1 22 3.850148 3.850148 768 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +eric 1 19 4.007333 4.007333 870 +citi 1 19 4.007333 4.007333 874 +miss 1 19 4.007333 4.007333 866 +listen 1 18 4.060443 4.060443 907 +young 1 16 4.174387 4.174387 991 +camera 1 14 4.317488 4.317488 1115 +hong 1 14 4.317488 4.317488 1105 +near 1 14 4.317488 4.317488 1091 +meng 1 12 4.465908 4.465908 1214 +went 1 12 4.465908 4.465908 1279 +pagewelcom 1 11 4.553877 4.553877 1344 +french 1 9 4.753590 4.753590 1511 +guitar 1 8 4.875197 4.875197 1758 +mile 1 8 4.875197 4.875197 1743 +instrument 1 7 5.010635 5.010635 1954 +davi 1 7 5.010635 5.010635 1888 +piano 1 6 5.164786 5.164786 2201 +antonio 1 6 5.164786 5.164786 2186 +carlo 1 5 5.347108 5.347108 2515 +jazz 1 5 5.347108 5.347108 2527 +middl 1 5 5.347108 5.347108 2372 +keyboard 1 4 5.568345 5.568345 2970 +korea 1 4 5.568345 5.568345 2971 +cyberspac 1 3 5.857933 5.857933 3719 +sang 1 2 6.263398 6.263398 5356 +korean 1 2 6.263398 6.263398 5354 +onthi 1 2 6.263398 6.263398 5357 +acoust 1 2 6.263398 6.263398 5355 +chopin 1 2 6.263398 6.263398 5358 +cinema 1 2 6.263398 6.263398 5244 +miser 1 2 6.263398 6.263398 5359 +melco 1 2 6.263398 6.263398 5200 +kwan 1 2 6.263398 6.263398 4126 +kang 1 2 6.263398 6.263398 5360 +shim 1 1 6.957497 6.957497 11674 +shimmast 1 1 6.957497 6.957497 11675 +dryden 1 1 6.957497 6.957497 11676 +irvinestudi 1 1 6.957497 6.957497 11677 +classi 1 1 6.957497 6.957497 11678 +stan 1 1 6.957497 6.957497 11679 +getz 1 1 6.957497 6.957497 11680 +jobim 1 1 6.957497 6.957497 11681 +coltran 1 1 6.957497 6.957497 11682 +earl 1 1 6.957497 6.957497 11683 +klugh 1 1 6.957497 6.957497 11684 +metheni 1 1 6.957497 6.957497 11685 +archemi 1 1 6.957497 6.957497 11686 +paradiso 1 1 6.957497 6.957497 11687 +kiss 1 1 6.957497 6.957497 11688 +saigon 1 1 6.957497 6.957497 11689 +newswant 1 1 6.957497 6.957497 11690 +anybodi 1 1 6.957497 6.957497 11691 +hana 1 1 6.957497 6.957497 11692 +jung 1 1 6.957497 6.957497 11693 +hwan 1 1 6.957497 6.957497 11694 +victor 1 1 6.957497 6.957497 11695 +jiyang 1 1 6.957497 6.957497 11696 +timessinc 1 1 6.957497 6.957497 11697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..bc295e86 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +structur 1 106 2.197225 2.197225 105 +text 1 98 2.302585 2.302585 133 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +sinc 1 90 2.397895 2.397895 159 +help 1 83 2.484907 2.484907 175 +resum 1 79 2.564949 2.564949 217 +come 1 78 2.564949 2.564949 202 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +involv 1 71 2.639057 2.639057 227 +david 1 71 2.639057 2.639057 232 +prof 1 64 2.772589 2.772589 273 +copi 1 63 2.772589 2.772589 284 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +best 1 59 2.833213 2.833213 299 +automat 1 61 2.833213 2.833213 306 +thesi 1 57 2.890372 2.890372 327 +variou 1 56 2.890372 2.890372 317 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +advisor 1 51 2.995732 2.995732 355 +approach 1 48 3.044522 3.044522 366 +visitor 1 49 3.044522 3.044522 371 +effect 1 46 3.091042 3.091042 385 +term 1 43 3.178054 3.178054 411 +show 1 43 3.178054 3.178054 417 +third 1 43 3.178054 3.178054 412 +late 1 40 3.258097 3.258097 439 +field 1 37 3.332205 3.332205 482 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +jame 1 35 3.401197 3.401197 507 +queri 1 33 3.433987 3.433987 524 +independ 1 32 3.465736 3.465736 548 +exist 1 30 3.555348 3.555348 569 +propos 1 28 3.610918 3.610918 602 +usual 1 28 3.610918 3.610918 608 +retriev 1 27 3.637586 3.637586 621 +relev 1 26 3.688879 3.688879 637 +other 1 24 3.761200 3.761200 697 +size 1 23 3.806662 3.806662 713 +brows 1 23 3.806662 3.806662 726 +thank 1 23 3.806662 3.806662 721 +hypertext 1 19 4.007333 4.007333 865 +segment 1 17 4.110874 4.110874 931 +normal 1 16 4.174387 4.174387 995 +eduphon 1 15 4.248495 4.248495 1060 +senior 1 14 4.317488 4.317488 1120 +weight 1 12 4.465908 4.465908 1204 +reader 1 12 4.465908 4.465908 1246 +chri 1 11 4.553877 4.553877 1311 +smart 1 11 4.553877 4.553877 1352 +arpa 1 11 4.553877 4.553877 1369 +summar 1 11 4.553877 4.553877 1295 +length 1 10 4.653960 4.653960 1400 +decomposit 1 10 4.653960 4.653960 1439 +theme 1 8 4.875197 4.875197 1707 +counter 1 8 4.875197 4.875197 1765 +vari 1 6 5.164786 5.164786 2001 +fairli 1 5 5.347108 5.347108 2322 +pivot 1 5 5.347108 5.347108 2426 +yield 1 5 5.347108 5.347108 2458 +testb 1 5 5.347108 5.347108 2456 +circumst 1 5 5.347108 5.347108 2283 +proceedingsof 1 5 5.347108 5.347108 2331 +dual 1 5 5.347108 5.347108 2522 +travers 1 5 5.347108 5.347108 2363 +amit 1 4 5.568345 5.568345 2972 +commonli 1 4 5.568345 5.568345 2877 +substanti 1 4 5.568345 5.568345 2921 +nist 1 4 5.568345 5.568345 2973 +allan 1 4 5.568345 5.568345 2849 +singhal 1 3 5.857933 5.857933 3098 +supervisor 1 3 5.857933 5.857933 3754 +likelihood 1 3 5.857933 5.857933 3172 +trec 1 3 5.857933 5.857933 3547 +expans 1 3 5.857933 5.857933 3755 +bucklei 1 2 6.263398 6.263398 4874 +foremost 1 2 6.263398 6.263398 5361 +excerpt 1 2 6.263398 6.263398 4880 +gerard 1 2 6.263398 6.263398 4876 +salton 1 2 6.263398 6.263398 4060 +degrad 1 2 6.263398 6.263398 5362 +amitsingh 1 2 6.263398 6.263398 4061 +slowli 1 2 6.263398 6.263398 5363 +pageamit 1 1 6.957497 6.957497 11703 +singhaldepart 1 1 6.957497 6.957497 11704 +universitysingh 1 1 6.957497 6.957497 11705 +andtext 1 1 6.957497 6.957497 11706 +gerardsalton 1 1 6.957497 6.957497 11699 +clairecardieher 1 1 6.957497 6.957497 11707 +beenon 1 1 6.957497 6.957497 11708 +informationretriev 1 1 6.957497 6.957497 11709 +thirti 1 1 6.957497 6.957497 11710 +lengthnorm 1 1 6.957497 6.957497 11700 +thateffect 1 1 6.957497 6.957497 11711 +chancessimilar 1 1 6.957497 6.957497 11712 +normalizationfunct 1 1 6.957497 6.957497 11713 +retrievaleffect 1 1 6.957497 6.957497 11714 +normalizationtechniqu 1 1 6.957497 6.957497 11715 +trecparticipationtext 1 1 6.957497 6.957497 11716 +sponsoredeffort 1 1 6.957497 6.957497 11717 +retrievaltechniqu 1 1 6.957497 6.957497 11718 +hasconsist 1 1 6.957497 6.957497 11719 +somepap 1 1 6.957497 6.957497 11720 +summarizationnon 1 1 6.957497 6.957497 11721 +expositori 1 1 6.957497 6.957497 11722 +tocov 1 1 6.957497 6.957497 11723 +selectiveaccess 1 1 6.957497 6.957497 11724 +toanalyz 1 1 6.957497 6.957497 11725 +texttravers 1 1 6.957497 6.957497 11726 +papersnorm 1 1 6.957497 6.957497 11727 +mandarmitra 1 1 6.957497 6.957497 11701 +documentlength 1 1 6.957497 6.957497 11728 +mandar 1 1 6.957497 6.957497 11698 +mitraand 1 1 6.957497 6.957497 11729 +usingsmart 1 1 6.957497 6.957497 11730 +textthem 1 1 6.957497 6.957497 11731 +mitra 1 1 6.957497 6.957497 11702 +andmanag 1 1 6.957497 6.957497 11732 +vectorspac 1 1 6.957497 6.957497 11733 +machineread 1 1 6.957497 6.957497 11734 +groupmemb 1 1 6.957497 6.957497 11735 +fluctuat 1 1 6.957497 6.957497 11736 +iinstal 1 1 6.957497 6.957497 11737 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..abad07f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,154 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +postscript 1 131 2.079442 2.079442 90 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +version 1 113 2.197225 2.197225 122 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +sinc 1 90 2.397895 2.397895 159 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +build 1 85 2.484907 2.484907 184 +wide 1 84 2.484907 2.484907 185 +internet 1 83 2.484907 2.484907 186 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +upson 1 71 2.639057 2.639057 218 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +virtual 1 62 2.772589 2.772589 285 +reason 1 57 2.890372 2.890372 318 +talk 1 53 2.944439 2.944439 336 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +still 1 50 3.044522 3.044522 362 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +littl 1 39 3.258097 3.258097 454 +everi 1 34 3.401197 3.401197 519 +least 1 35 3.401197 3.401197 516 +word 1 34 3.401197 3.401197 508 +independ 1 32 3.465736 3.465736 548 +idea 1 32 3.465736 3.465736 545 +india 1 32 3.465736 3.465736 550 +anim 1 31 3.496508 3.496508 557 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +quot 1 29 3.583519 3.583519 582 +actual 1 28 3.610918 3.610918 604 +releas 1 28 3.610918 3.610918 616 +sometim 1 24 3.761200 3.761200 696 +head 1 23 3.806662 3.806662 732 +instal 1 22 3.850148 3.850148 754 +latest 1 21 3.912023 3.912023 785 +thu 1 21 3.912023 3.912023 773 +driven 1 15 4.248495 4.248495 1048 +remot 1 15 4.248495 4.248495 1041 +goe 1 15 4.248495 4.248495 1044 +squar 1 14 4.317488 4.317488 1082 +edui 1 13 4.382027 4.382027 1193 +directli 1 13 4.382027 4.382027 1141 +circuit 1 13 4.382027 4.382027 1131 +unfortun 1 13 4.382027 4.382027 1170 +speak 1 12 4.465908 4.465908 1283 +song 1 11 4.553877 4.553877 1380 +equip 1 10 4.653960 4.653960 1459 +stack 1 10 4.653960 4.653960 1389 +packet 1 10 4.653960 4.653960 1415 +custom 1 10 4.653960 4.653960 1414 +mode 1 9 4.753590 4.753590 1492 +lane 1 8 4.875197 4.875197 1720 +router 1 8 4.875197 4.875197 1772 +switch 1 8 4.875197 4.875197 1718 +span 1 8 4.875197 4.875197 1751 +round 1 8 4.875197 4.875197 1769 +keshav 1 7 5.010635 5.010635 1852 +signal 1 7 5.010635 5.010635 1910 +canb 1 7 5.010635 5.010635 1846 +hear 1 7 5.010635 5.010635 1940 +nativ 1 6 5.164786 5.164786 2192 +south 1 6 5.164786 5.164786 2167 +srinivasan 1 6 5.164786 5.164786 2175 +christoph 1 5 5.347108 5.347108 2512 +delhi 1 5 5.347108 5.347108 2530 +facial 1 5 5.347108 5.347108 2438 +hole 1 5 5.347108 5.347108 2518 +poem 1 5 5.347108 5.347108 2483 +systemsand 1 4 5.568345 5.568345 2804 +scratch 1 3 5.857933 5.857933 3140 +compliant 1 3 5.857933 5.857933 3245 +district 1 3 5.857933 5.857933 3756 +father 1 3 5.857933 5.857933 3757 +johann 1 3 5.857933 5.857933 3758 +goof 1 2 6.263398 6.263398 4985 +snoop 1 2 6.263398 6.263398 5364 +mbone 1 2 6.263398 6.263398 4361 +pagemi 1 2 6.263398 6.263398 5230 +villag 1 2 6.263398 6.263398 5215 +ought 1 2 6.263398 6.263398 5365 +goeth 1 2 6.263398 6.263398 5366 +keshavemail 1 1 6.957497 6.957497 11740 +skeshav 1 1 6.957497 6.957497 11738 +spentfiv 1 1 6.957497 6.957497 11741 +xunet 1 1 6.957497 6.957497 11742 +idlinet 1 1 6.957497 6.957497 11739 +incollabor 1 1 6.957497 6.957497 11743 +fore 1 1 6.957497 6.957497 11744 +zeitnet 1 1 6.957497 6.957497 11745 +idlinetsourc 1 1 6.957497 6.957497 11746 +applicationget 1 1 6.957497 6.957497 11747 +linkspapersher 1 1 6.957497 6.957497 11748 +linkto 1 1 6.957497 6.957497 11749 +reali 1 1 6.957497 6.957497 11750 +beout 1 1 6.957497 6.957497 11751 +native_mod 1 1 6.957497 6.957497 11752 +namein 1 1 6.957497 6.957497 11753 +thanjavur 1 1 6.957497 6.957497 11754 +beprecis 1 1 6.957497 6.957497 11755 +prefix 1 1 6.957497 6.957497 11756 +sonli 1 1 6.957497 6.957497 11757 +surnam 1 1 6.957497 6.957497 11758 +myfath 1 1 6.957497 6.957497 11759 +intoth 1 1 6.957497 6.957497 11760 +beconfus 1 1 6.957497 6.957497 11761 +quotabl 1 1 6.957497 6.957497 11762 +wolfgang 1 1 6.957497 6.957497 11763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..c8ac6b2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +construct 1 139 1.945910 1.945910 82 +place 1 106 2.197225 2.197225 124 +sinc 1 90 2.397895 2.397895 159 +prof 1 64 2.772589 2.772589 273 +electr 1 38 3.295837 3.295837 461 +alwai 1 24 3.761200 3.761200 691 +came 1 13 4.382027 4.382027 1197 +kenneth 1 12 4.465908 4.465908 1265 +meng 1 12 4.465908 4.465908 1214 +road 1 11 4.553877 4.553877 1374 +success 1 10 4.653960 4.653960 1390 +zabih 1 6 5.164786 5.164786 2138 +sunlab 1 2 6.263398 6.263398 5222 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..c99c4fc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +high 1 130 2.079442 2.079442 101 +place 1 106 2.197225 2.197225 124 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +teach 1 108 2.197225 2.197225 112 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +pictur 1 89 2.397895 2.397895 160 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +school 1 84 2.484907 2.484907 188 +thing 1 84 2.484907 2.484907 189 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +novemb 1 81 2.484907 2.484907 179 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +june 1 79 2.564949 2.564949 214 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +plan 1 65 2.772589 2.772589 272 +written 1 63 2.772589 2.772589 278 +visit 1 63 2.772589 2.772589 288 +dept 1 64 2.772589 2.772589 291 +best 1 59 2.833213 2.833213 299 +direct 1 57 2.890372 2.890372 316 +point 1 58 2.890372 2.890372 319 +summer 1 56 2.890372 2.890372 311 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +talk 1 53 2.944439 2.944439 336 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +week 1 52 2.995732 2.995732 343 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +could 1 46 3.091042 3.091042 383 +featur 1 46 3.091042 3.091042 386 +better 1 45 3.135494 3.135494 401 +even 1 45 3.135494 3.135494 393 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +must 1 40 3.258097 3.258097 442 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +late 1 40 3.258097 3.258097 439 +theoret 1 39 3.258097 3.258097 446 +live 1 40 3.258097 3.258097 451 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +tree 1 36 3.367296 3.367296 492 +especi 1 36 3.367296 3.367296 496 +winter 1 36 3.367296 3.367296 500 +random 1 34 3.401197 3.401197 511 +next 1 34 3.401197 3.401197 517 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +kind 1 32 3.465736 3.465736 541 +power 1 30 3.555348 3.555348 573 +actual 1 28 3.610918 3.610918 604 +quit 1 27 3.637586 3.637586 633 +great 1 27 3.637586 3.637586 626 +rather 1 26 3.688879 3.688879 642 +never 1 25 3.737670 3.737670 671 +spent 1 25 3.737670 3.737670 676 +magazin 1 24 3.761200 3.761200 704 +alwai 1 24 3.761200 3.761200 691 +pattern 1 24 3.761200 3.761200 689 +sometim 1 24 3.761200 3.761200 696 +togeth 1 23 3.806662 3.806662 714 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +inth 1 22 3.850148 3.850148 741 +instead 1 22 3.850148 3.850148 756 +leav 1 21 3.912023 3.912023 772 +path 1 21 3.912023 3.912023 778 +hous 1 21 3.912023 3.912023 801 +tell 1 21 3.912023 3.912023 777 +theorem 1 21 3.912023 3.912023 786 +half 1 21 3.912023 3.912023 776 +busi 1 21 3.912023 3.912023 784 +nice 1 20 3.951244 3.951244 809 +mostli 1 19 4.007333 4.007333 869 +left 1 19 4.007333 4.007333 851 +seem 1 18 4.060443 4.060443 899 +figur 1 18 4.060443 4.060443 903 +listen 1 18 4.060443 4.060443 907 +coupl 1 17 4.110874 4.110874 939 +stop 1 17 4.110874 4.110874 942 +sign 1 16 4.174387 4.174387 970 +modern 1 16 4.174387 4.174387 966 +brown 1 16 4.174387 4.174387 977 +side 1 15 4.248495 4.248495 1022 +score 1 15 4.248495 4.248495 1017 +english 1 15 4.248495 4.248495 1033 +indic 1 15 4.248495 4.248495 1013 +near 1 14 4.317488 4.317488 1091 +warn 1 14 4.317488 4.317488 1068 +stori 1 14 4.317488 4.317488 1087 +step 1 13 4.382027 4.382027 1138 +sai 1 13 4.382027 4.382027 1175 +front 1 13 4.382027 4.382027 1154 +everyth 1 13 4.382027 4.382027 1169 +someon 1 13 4.382027 4.382027 1128 +nick 1 13 4.382027 4.382027 1180 +walk 1 12 4.465908 4.465908 1281 +insid 1 12 4.465908 4.465908 1262 +tune 1 12 4.465908 4.465908 1227 +went 1 12 4.465908 4.465908 1279 +daniel 1 12 4.465908 4.465908 1233 +neat 1 12 4.465908 4.465908 1263 +america 1 11 4.553877 4.553877 1370 +mountain 1 10 4.653960 4.653960 1456 +label 1 10 4.653960 4.653960 1423 +traffic 1 10 4.653960 4.653960 1421 +hello 1 10 4.653960 4.653960 1407 +paragraph 1 10 4.653960 4.653960 1449 +awai 1 10 4.653960 4.653960 1447 +clear 1 9 4.753590 4.753590 1488 +distanc 1 9 4.753590 4.753590 1500 +sound 1 9 4.753590 4.753590 1605 +realiz 1 8 4.875197 4.875197 1739 +perhap 1 8 4.875197 4.875197 1693 +contrast 1 8 4.875197 4.875197 1637 +gold 1 8 4.875197 4.875197 1745 +ring 1 8 4.875197 4.875197 1684 +switch 1 8 4.875197 4.875197 1718 +manufactur 1 8 4.875197 4.875197 1634 +japan 1 8 4.875197 4.875197 1762 +ideal 1 8 4.875197 4.875197 1630 +hear 1 7 5.010635 5.010635 1940 +throughout 1 7 5.010635 5.010635 1871 +corner 1 7 5.010635 5.010635 1909 +smile 1 7 5.010635 5.010635 1807 +golden 1 7 5.010635 5.010635 1962 +centuri 1 7 5.010635 5.010635 1935 +intellectu 1 7 5.010635 5.010635 1847 +occasion 1 7 5.010635 5.010635 1905 +pronounc 1 7 5.010635 5.010635 1918 +largest 1 7 5.010635 5.010635 1858 +happen 1 7 5.010635 5.010635 1790 +rain 1 6 5.164786 5.164786 2137 +snow 1 6 5.164786 5.164786 2031 +strang 1 6 5.164786 5.164786 2064 +mother 1 6 5.164786 5.164786 2083 +seen 1 6 5.164786 5.164786 2202 +ifyou 1 6 5.164786 5.164786 1992 +polit 1 6 5.164786 5.164786 2115 +fairli 1 5 5.347108 5.347108 2322 +respond 1 5 5.347108 5.347108 2354 +door 1 5 5.347108 5.347108 2291 +hair 1 5 5.347108 5.347108 2446 +own 1 5 5.347108 5.347108 2531 +isth 1 5 5.347108 5.347108 2532 +sing 1 5 5.347108 5.347108 2499 +somehow 1 4 5.568345 5.568345 2974 +hell 1 4 5.568345 5.568345 2885 +glanc 1 4 5.568345 5.568345 2652 +tick 1 4 5.568345 5.568345 2975 +somewhat 1 4 5.568345 5.568345 2659 +heard 1 4 5.568345 5.568345 2895 +paus 1 4 5.568345 5.568345 2965 +wear 1 4 5.568345 5.568345 2785 +dark 1 4 5.568345 5.568345 2910 +usedto 1 4 5.568345 5.568345 2643 +gotten 1 4 5.568345 5.568345 2628 +hire 1 4 5.568345 5.568345 2976 +couldn 1 4 5.568345 5.568345 2977 +surround 1 3 5.857933 5.857933 3492 +glass 1 3 5.857933 5.857933 3759 +stone 1 3 5.857933 5.857933 3674 +scatter 1 3 5.857933 5.857933 3351 +vagu 1 3 5.857933 5.857933 3393 +worri 1 3 5.857933 5.857933 3130 +thin 1 3 5.857933 5.857933 3488 +bright 1 3 5.857933 5.857933 3596 +twentieth 1 3 5.857933 5.857933 3760 +wave 1 3 5.857933 5.857933 3518 +europ 1 3 5.857933 5.857933 3761 +child 1 3 5.857933 5.857933 3542 +blame 1 3 5.857933 5.857933 3636 +dread 1 3 5.857933 5.857933 3630 +wise 1 3 5.857933 5.857933 3631 +pyramid 1 3 5.857933 5.857933 3358 +maker 1 3 5.857933 5.857933 3164 +forest 1 2 6.263398 6.263398 5368 +shack 1 2 6.263398 6.263398 5369 +withno 1 2 6.263398 6.263398 5370 +pile 1 2 6.263398 6.263398 5371 +obviou 1 2 6.263398 6.263398 5367 +hum 1 2 6.263398 6.263398 4935 +purpl 1 2 6.263398 6.263398 5372 +silk 1 2 6.263398 6.263398 5373 +silver 1 2 6.263398 6.263398 5374 +ocean 1 2 6.263398 6.263398 5375 +andlook 1 2 6.263398 6.263398 4561 +altogeth 1 2 6.263398 6.263398 4751 +nowadai 1 2 6.263398 6.263398 5376 +pointcast 1 2 6.263398 6.263398 5377 +inner 1 2 6.263398 6.263398 4551 +furnitur 1 2 6.263398 6.263398 5016 +haveth 1 2 6.263398 6.263398 5378 +hani 1 2 6.263398 6.263398 5140 +neededto 1 2 6.263398 6.263398 5379 +agood 1 2 6.263398 6.263398 5380 +clearinglook 1 1 6.957497 6.957497 11768 +thanyou 1 1 6.957497 6.957497 11769 +onal 1 1 6.957497 6.957497 11770 +theweath 1 1 6.957497 6.957497 11771 +overcast 1 1 6.957497 6.957497 11772 +ifit 1 1 6.957497 6.957497 11773 +quiteclear 1 1 6.957497 6.957497 11774 +bird 1 1 6.957497 6.957497 11775 +chirp 1 1 6.957497 6.957497 11776 +theymai 1 1 6.957497 6.957497 11777 +louder 1 1 6.957497 6.957497 11778 +nearbywaterfal 1 1 6.957497 6.957497 11779 +gotta 1 1 6.957497 6.957497 11780 +apath 1 1 6.957497 6.957497 11781 +asign 1 1 6.957497 6.957497 11782 +hillschool 1 1 6.957497 6.957497 11783 +wormhol 1 1 6.957497 6.957497 11784 +nearbyhous 1 1 6.957497 6.957497 11785 +presum 1 1 6.957497 6.957497 11764 +clearinginsid 1 1 6.957497 6.957497 11786 +offand 1 1 6.957497 6.957497 11787 +theclear 1 1 6.957497 6.957497 11788 +rhyme 1 1 6.957497 6.957497 11789 +reasonto 1 1 6.957497 6.957497 11790 +sortsof 1 1 6.957497 6.957497 11791 +betteridea 1 1 6.957497 6.957497 11792 +itseem 1 1 6.957497 6.957497 11793 +importantth 1 1 6.957497 6.957497 11794 +clearingh 1 1 6.957497 6.957497 11795 +oftendescrib 1 1 6.957497 6.957497 11796 +mostdistinct 1 1 6.957497 6.957497 11797 +quitelong 1 1 6.957497 6.957497 11798 +elfin 1 1 6.957497 6.957497 11799 +peoplebefor 1 1 6.957497 6.957497 11800 +theresoon 1 1 6.957497 6.957497 11801 +startstel 1 1 6.957497 6.957497 11802 +whynichola 1 1 6.957497 6.957497 11803 +negropont 1 1 6.957497 6.957497 11804 +moron 1 1 6.957497 6.957497 11805 +thenh 1 1 6.957497 6.957497 11806 +obscur 1 1 6.957497 6.957497 11807 +hetend 1 1 6.957497 6.957497 11808 +appearanceinstead 1 1 6.957497 6.957497 11809 +turquois 1 1 6.957497 6.957497 11810 +linen 1 1 6.957497 6.957497 11811 +heha 1 1 6.957497 6.957497 11765 +imageof 1 1 6.957497 6.957497 11812 +beaver 1 1 6.957497 6.957497 11766 +fromhim 1 1 6.957497 6.957497 11813 +pewter 1 1 6.957497 6.957497 11814 +pentacl 1 1 6.957497 6.957497 11815 +neck 1 1 6.957497 6.957497 11816 +hippi 1 1 6.957497 6.957497 11817 +asclass 1 1 6.957497 6.957497 11818 +clearingdan 1 1 6.957497 6.957497 11819 +briani 1 1 6.957497 6.957497 11820 +anundergrad 1 1 6.957497 6.957497 11821 +newsprovid 1 1 6.957497 6.957497 11822 +ancamosoiu 1 1 6.957497 6.957497 11823 +schwa 1 1 6.957497 6.957497 11824 +backwhen 1 1 6.957497 6.957497 11825 +twoand 1 1 6.957497 6.957497 11826 +wegradu 1 1 6.957497 6.957497 11827 +shejust 1 1 6.957497 6.957497 11828 +onewav 1 1 6.957497 6.957497 11829 +issomeon 1 1 6.957497 6.957497 11830 +severalmonth 1 1 6.957497 6.957497 11831 +becamemuch 1 1 6.957497 6.957497 11832 +eedepart 1 1 6.957497 6.957497 11833 +multimediastud 1 1 6.957497 6.957497 11834 +friendof 1 1 6.957497 6.957497 11835 +perri 1 1 6.957497 6.957497 11767 +finlei 1 1 6.957497 6.957497 11836 +notanymor 1 1 6.957497 6.957497 11837 +steelcas 1 1 6.957497 6.957497 11838 +dserver 1 1 6.957497 6.957497 11839 +kinda 1 1 6.957497 6.957497 11840 +cheesi 1 1 6.957497 6.957497 11841 +thebuild 1 1 6.957497 6.957497 11842 +ius 1 1 6.957497 6.957497 11843 +programcal 1 1 6.957497 6.957497 11844 +graduatedfrom 1 1 6.957497 6.957497 11845 +cuter 1 1 6.957497 6.957497 11846 +thanth 1 1 6.957497 6.957497 11847 +blurri 1 1 6.957497 6.957497 11848 +dreamer 1 1 6.957497 6.957497 11849 +ofdream 1 1 6.957497 6.957497 11850 +aphex 1 1 6.957497 6.957497 11851 +twindan 1 1 6.957497 6.957497 11852 +snowman 1 1 6.957497 6.957497 11853 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..c4fcc761 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cornel 1 215 1.386294 1.386294 23 +take 1 97 2.302585 2.302585 134 +cool 1 49 3.044522 3.044522 374 +keep 1 44 3.135494 3.135494 409 +long 1 43 3.178054 3.178054 413 +soon 1 36 3.367296 3.367296 494 +lot 1 18 4.060443 4.060443 889 +rate 1 15 4.248495 4.248495 1037 +ashish 1 5 5.347108 5.347108 2473 +ultra 1 4 5.568345 5.568345 2889 +autobiographi 1 2 6.263398 6.263398 5070 +upkeep 1 1 6.957497 6.957497 11855 +lookin 1 1 6.957497 6.957497 11854 +setuup 1 1 6.957497 6.957497 11856 +doingajaymanishanujmom 1 1 6.957497 6.957497 11857 +daddepart 1 1 6.957497 6.957497 11858 +sciencesearch 1 1 6.957497 6.957497 11859 +netentertain 1 1 6.957497 6.957497 11860 +weeklycricket 1 1 6.957497 6.957497 11861 +soni 1 1 6.957497 6.957497 11862 +sonia 1 1 6.957497 6.957497 11863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..ea8c79c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +would 1 67 2.708050 2.708050 251 +browser 1 56 2.890372 2.890372 313 +better 1 45 3.135494 3.135494 401 +form 1 39 3.258097 3.258097 443 +scott 1 18 4.060443 4.060443 884 +capabl 1 15 4.248495 4.248495 1016 +dawson 1 2 6.263398 6.263398 4886 +padif 1 1 6.957497 6.957497 11864 +scottdawson 1 1 6.957497 6.957497 11865 +shomebas 1 1 6.957497 6.957497 11866 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..ac2f1467 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +cornel 1 215 1.386294 1.386294 23 +hall 1 146 1.945910 1.945910 65 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +institut 1 84 2.484907 2.484907 187 +paul 1 38 3.295837 3.295837 471 +scheme 1 20 3.951244 3.951244 818 +ultim 1 17 4.110874 4.110874 943 +affili 1 13 4.382027 4.382027 1194 +rhode 1 9 4.753590 4.753590 1579 +hockei 1 8 4.875197 4.875197 1760 +atcornel 1 6 5.164786 5.164786 2131 +stodghil 1 4 5.568345 5.568345 2864 +bernoulli 1 4 5.568345 5.568345 2955 +pagepaul 1 3 5.857933 5.857933 3669 +stodghillstodghil 1 1 6.957497 6.957497 11867 +acri 1 1 6.957497 6.957497 11868 +projectinterest 1 1 6.957497 6.957497 11869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..642536df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +updat 1 191 1.609438 1.609438 41 +august 1 66 2.708050 2.708050 257 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +scott 1 18 4.060443 4.060443 884 +former 1 17 4.110874 4.110874 956 +indiana 1 6 5.164786 5.164786 2057 +stoller 1 4 5.568345 5.568345 2866 +pagescott 1 4 5.568345 5.568345 2978 +hyplan 1 1 6.957497 6.957497 11870 +htmllast 1 1 6.957497 6.957497 11871 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..9963a838 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +person 1 111 2.197225 2.197225 117 +theori 1 111 2.197225 2.197225 127 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +multimedia 1 68 2.708050 2.708050 258 +prof 1 64 2.772589 2.772589 273 +previou 1 62 2.772589 2.772589 290 +semest 1 58 2.890372 2.890372 312 +brian 1 38 3.295837 3.295837 466 +seminar 1 38 3.295837 3.295837 470 +option 1 30 3.555348 3.555348 575 +progress 1 28 3.610918 3.610918 598 +smith 1 20 3.951244 3.951244 820 +wonder 1 20 3.951244 3.951244 815 +ofcomput 1 10 4.653960 4.653960 1442 +earth 1 10 4.653960 4.653960 1463 +marri 1 7 5.010635 5.010635 1946 +price 1 6 5.164786 5.164786 1999 +czar 1 5 5.347108 5.347108 2503 +mehom 1 4 5.568345 5.568345 2979 +eduaddress 1 3 5.857933 5.857933 3762 +sugata 1 2 6.263398 6.263398 4976 +mukhopadhyai 1 2 6.263398 6.263398 4981 +ritu 1 1 6.957497 6.957497 11872 +mailsugata 1 1 6.957497 6.957497 11873 +hichori 1 1 6.957497 6.957497 11874 +estat 1 1 6.957497 6.957497 11875 +owego 1 1 6.957497 6.957497 11876 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..2a1260bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +need 1 98 2.302585 2.302585 135 +present 1 91 2.397895 2.397895 145 +resum 1 79 2.564949 2.564949 217 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +ithaca 1 65 2.772589 2.772589 294 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +physic 1 47 3.091042 3.091042 377 +paul 1 38 3.295837 3.295837 471 +anim 1 31 3.496508 3.496508 557 +background 1 25 3.737670 3.737670 664 +rout 1 21 3.912023 3.912023 793 +capabl 1 15 4.248495 4.248495 1016 +philosophi 1 13 4.382027 4.382027 1167 +clock 1 11 4.553877 4.553877 1320 +tick 1 4 5.568345 5.568345 2975 +carleton 1 2 6.263398 6.263398 5381 +sukhpal 1 1 6.957497 6.957497 11877 +sanghera 1 1 6.957497 6.957497 11878 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..42d10ea4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +read 1 154 1.791759 1.791759 47 +first 1 140 1.945910 1.945910 71 +high 1 130 2.079442 2.079442 101 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +want 1 79 2.564949 2.564949 199 +orient 1 80 2.564949 2.564949 205 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +summer 1 56 2.890372 2.890372 311 +space 1 57 2.890372 2.890372 310 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +understand 1 47 3.091042 3.091042 384 +video 1 44 3.135494 3.135494 405 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +realli 1 40 3.258097 3.258097 444 +microsoft 1 38 3.295837 3.295837 468 +industri 1 38 3.295837 3.295837 464 +feel 1 37 3.332205 3.332205 483 +hand 1 37 3.332205 3.332205 475 +post 1 35 3.401197 3.401197 505 +print 1 34 3.401197 3.401197 503 +everi 1 34 3.401197 3.401197 519 +within 1 33 3.433987 3.433987 525 +kind 1 32 3.465736 3.465736 541 +progress 1 28 3.610918 3.610918 598 +enjoi 1 26 3.688879 3.688879 660 +session 1 26 3.688879 3.688879 643 +notic 1 25 3.737670 3.737670 675 +never 1 25 3.737670 3.737670 671 +toward 1 25 3.737670 3.737670 668 +seri 1 24 3.761200 3.761200 708 +recommend 1 22 3.850148 3.850148 737 +corpor 1 21 3.912023 3.912023 802 +stand 1 18 4.060443 4.060443 891 +weekli 1 17 4.110874 4.110874 919 +whether 1 17 4.110874 4.110874 918 +attempt 1 17 4.110874 4.110874 917 +sign 1 16 4.174387 4.174387 970 +anywai 1 15 4.248495 4.248495 1047 +comic 1 14 4.317488 4.317488 1103 +social 1 13 4.382027 4.382027 1123 +everyon 1 13 4.382027 4.382027 1148 +regularli 1 11 4.553877 4.553877 1338 +fill 1 11 4.553877 4.553877 1349 +strongli 1 10 4.653960 4.653960 1406 +didn 1 9 4.753590 4.753590 1563 +empir 1 8 4.875197 4.875197 1722 +illustr 1 8 4.875197 4.875197 1679 +told 1 8 4.875197 4.875197 1658 +parti 1 8 4.875197 4.875197 1676 +strip 1 6 5.164786 5.164786 2203 +tri 1 6 5.164786 5.164786 2166 +viewpoint 1 6 5.164786 5.164786 2116 +gate 1 6 5.164786 5.164786 2182 +begun 1 5 5.347108 5.347108 2386 +suffer 1 5 5.347108 5.347108 2268 +fit 1 5 5.347108 5.347108 2285 +drew 1 4 5.568345 5.568345 2980 +theintern 1 4 5.568345 5.568345 2981 +employe 1 4 5.568345 5.568345 2717 +lord 1 4 5.568345 5.568345 2906 +episod 1 4 5.568345 5.568345 2747 +newslett 1 4 5.568345 5.568345 2873 +sumedh 1 3 5.857933 5.857933 3101 +thesumm 1 3 5.857933 5.857933 3763 +redmond 1 3 5.857933 5.857933 3568 +internship 1 3 5.857933 5.857933 3764 +flame 1 3 5.857933 5.857933 3696 +galact 1 2 6.263398 6.263398 5219 +eduthi 1 2 6.263398 6.263398 5382 +alia 1 2 6.263398 6.263398 5383 +arriv 1 2 6.263398 6.263398 4132 +persuad 1 2 6.263398 6.263398 5384 +declin 1 2 6.263398 6.263398 5385 +portrai 1 2 6.263398 6.263398 5386 +bitter 1 2 6.263398 6.263398 5387 +rebel 1 2 6.263398 6.263398 5388 +imperi 1 2 6.263398 6.263398 5389 +bilth 1 1 6.957497 6.957497 11880 +gater 1 1 6.957497 6.957497 11879 +empirewritten 1 1 6.957497 6.957497 11881 +kanetkaremail 1 1 6.957497 6.957497 11882 +kanetkar 1 1 6.957497 6.957497 11883 +atmicrosoft 1 1 6.957497 6.957497 11884 +artwork 1 1 6.957497 6.957497 11885 +leak 1 1 6.957497 6.957497 11886 +theful 1 1 6.957497 6.957497 11887 +stripi 1 1 6.957497 6.957497 11888 +themicrosoft 1 1 6.957497 6.957497 11889 +perceiv 1 1 6.957497 6.957497 11890 +problemand 1 1 6.957497 6.957497 11891 +evilempir 1 1 6.957497 6.957497 11892 +comicstrip 1 1 6.957497 6.957497 11893 +theoutsid 1 1 6.957497 6.957497 11894 +eitherbil 1 1 6.957497 6.957497 11895 +heck 1 1 6.957497 6.957497 11896 +summersof 1 1 6.957497 6.957497 11897 +anyoneinterest 1 1 6.957497 6.957497 11898 +thateveri 1 1 6.957497 6.957497 11899 +theyshow 1 1 6.957497 6.957497 11900 +trivia 1 1 6.957497 6.957497 11901 +makey 1 1 6.957497 6.957497 11902 +disclosur 1 1 6.957497 6.957497 11903 +agreeement 1 1 6.957497 6.957497 11904 +theymad 1 1 6.957497 6.957497 11905 +theirheart 1 1 6.957497 6.957497 11906 +pledg 1 1 6.957497 6.957497 11907 +alleig 1 1 6.957497 6.957497 11908 +everydesk 1 1 6.957497 6.957497 11909 +roosterepisod 1 1 6.957497 6.957497 11910 +threatepisod 1 1 6.957497 6.957497 11911 +insigniaepisod 1 1 6.957497 6.957497 11912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..67f71512 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +document 1 121 2.079442 2.079442 89 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +proceed 1 93 2.397895 2.397895 152 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +upson 1 71 2.639057 2.639057 218 +logic 1 71 2.639057 2.639057 230 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +goal 1 66 2.708050 2.708050 250 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +august 1 66 2.708050 2.708050 257 +evalu 1 64 2.772589 2.772589 266 +previou 1 62 2.772589 2.772589 290 +descript 1 64 2.772589 2.772589 271 +experi 1 64 2.772589 2.772589 283 +type 1 61 2.833213 2.833213 296 +best 1 59 2.833213 2.833213 299 +summer 1 56 2.890372 2.890372 311 +index 1 56 2.890372 2.890372 309 +space 1 57 2.890372 2.890372 310 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +principl 1 48 3.044522 3.044522 357 +electron 1 47 3.091042 3.091042 379 +term 1 43 3.178054 3.178054 411 +editor 1 41 3.218876 3.218876 433 +autom 1 41 3.218876 3.218876 434 +theoret 1 39 3.258097 3.258097 446 +author 1 39 3.258097 3.258097 450 +close 1 38 3.295837 3.295837 465 +correct 1 38 3.295837 3.295837 462 +return 1 34 3.401197 3.401197 502 +given 1 32 3.465736 3.465736 538 +chapter 1 32 3.465736 3.465736 536 +abl 1 30 3.555348 3.555348 566 +compon 1 30 3.555348 3.555348 570 +limit 1 29 3.583519 3.583519 585 +retriev 1 27 3.637586 3.637586 621 +relev 1 26 3.688879 3.688879 637 +enabl 1 26 3.688879 3.688879 655 +primari 1 25 3.737670 3.737670 669 +task 1 25 3.737670 3.737670 678 +toward 1 25 3.737670 3.737670 668 +magazin 1 24 3.761200 3.761200 704 +handl 1 24 3.761200 3.761200 685 +seri 1 24 3.761200 3.761200 708 +brows 1 23 3.806662 3.806662 726 +input 1 23 3.806662 3.806662 727 +head 1 23 3.806662 3.806662 732 +recognit 1 23 3.806662 3.806662 723 +identifi 1 22 3.850148 3.850148 760 +hierarchi 1 22 3.850148 3.850148 744 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +output 1 21 3.912023 3.912023 788 +busi 1 21 3.912023 3.912023 784 +flexibl 1 21 3.912023 3.912023 792 +geometr 1 19 4.007333 4.007333 852 +boston 1 19 4.007333 4.007333 862 +segment 1 17 4.110874 4.110874 931 +stop 1 17 4.110874 4.110874 942 +adam 1 17 4.110874 4.110874 934 +white 1 17 4.110874 4.110874 951 +letter 1 16 4.174387 4.174387 981 +piec 1 15 4.248495 4.248495 1020 +hierarch 1 15 4.248495 4.248495 1018 +near 1 14 4.317488 4.317488 1091 +bodi 1 13 4.382027 4.382027 1178 +johnson 1 13 4.382027 4.382027 1162 +captur 1 12 4.465908 4.465908 1232 +scan 1 12 4.465908 4.465908 1243 +bruce 1 12 4.465908 4.465908 1226 +arbitrari 1 11 4.553877 4.553877 1359 +paragraph 1 10 4.653960 4.653960 1449 +discov 1 9 4.753590 4.753590 1562 +donald 1 9 4.753590 4.753590 1510 +classif 1 9 4.753590 4.753590 1586 +rais 1 8 4.875197 4.875197 1711 +tobe 1 6 5.164786 5.164786 1995 +textual 1 6 5.164786 5.164786 1979 +superhighwai 1 4 5.568345 5.568345 2943 +similarli 1 3 5.857933 5.857933 3241 +categor 1 3 5.857933 5.857933 3765 +daniela 1 3 5.857933 5.857933 3611 +recipi 1 3 5.857933 5.857933 3627 +andclassif 1 2 6.263398 6.263398 5390 +cue 1 2 6.263398 6.263398 5391 +bharat 1 2 6.263398 6.263398 5107 +forthcom 1 2 6.263398 6.263398 5392 +dartmouth 1 2 6.263398 6.263398 5393 +montral 1 2 6.263398 6.263398 5394 +kristen 1 1 6.957497 6.957497 11913 +accessresearch 1 1 6.957497 6.957497 11915 +mylong 1 1 6.957497 6.957497 11916 +forsophist 1 1 6.957497 6.957497 11917 +manipulationtool 1 1 6.957497 6.957497 11918 +logicalstructur 1 1 6.957497 6.957497 11919 +documentrepresent 1 1 6.957497 6.957497 11920 +hierarchyof 1 1 6.957497 6.957497 11921 +postscriptvers 1 1 6.957497 6.957497 11922 +divid 1 1 6.957497 6.957497 11914 +croft 1 1 6.957497 6.957497 11923 +determininglog 1 1 6.957497 6.957497 11924 +soin 1 1 6.957497 6.957497 11925 +ofmultipl 1 1 6.957497 6.957497 11926 +browsingco 1 1 6.957497 6.957497 11927 +nabil 1 1 6.957497 6.957497 11928 +bhargava 1 1 6.957497 6.957497 11929 +yelena 1 1 6.957497 6.957497 11930 +yesha 1 1 6.957497 6.957497 11931 +seeheim 1 1 6.957497 6.957497 11932 +podp 1 1 6.957497 6.957497 11933 +taxonomi 1 1 6.957497 6.957497 11934 +structureselectron 1 1 6.957497 6.957497 11935 +dag 1 1 6.957497 6.957497 11936 +scholaraward 1 1 6.957497 6.957497 11937 +wordless 1 1 6.957497 6.957497 11938 +analysisand 1 1 6.957497 6.957497 11939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..6621b54a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +model 1 145 1.945910 1.945910 69 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +databas 1 122 2.079442 2.079442 86 +structur 1 106 2.197225 2.197225 105 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +techniqu 1 99 2.302585 2.302585 138 +graphic 1 90 2.397895 2.397895 147 +optim 1 79 2.564949 2.564949 197 +resum 1 79 2.564949 2.564949 217 +would 1 67 2.708050 2.708050 251 +java 1 70 2.708050 2.708050 248 +simul 1 66 2.708050 2.708050 255 +multimedia 1 68 2.708050 2.708050 258 +polici 1 64 2.772589 2.772589 279 +browser 1 56 2.890372 2.890372 313 +summer 1 56 2.890372 2.890372 311 +case 1 51 2.995732 2.995732 351 +probabl 1 40 3.258097 3.258097 455 +prototyp 1 38 3.295837 3.295837 463 +statist 1 35 3.401197 3.401197 521 +independ 1 32 3.465736 3.465736 548 +displai 1 23 3.806662 3.806662 712 +applet 1 20 3.951244 3.951244 827 +telecommun 1 9 4.753590 4.753590 1565 +polygon 1 8 4.875197 4.875197 1723 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +sector 1 3 5.857933 5.857933 3766 +suppot 1 2 6.263398 6.263398 5243 +stochast 1 2 6.263398 6.263398 4832 +masafumi 1 1 6.957497 6.957497 11940 +suzukither 1 1 6.957497 6.957497 11941 +suzukisuzuki 1 1 6.957497 6.957497 11942 +educlassesfal 1 1 6.957497 6.957497 11943 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..02f4685e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +process 1 142 1.945910 1.945910 72 +number 1 130 2.079442 2.079442 97 +multimedia 1 68 2.708050 2.708050 258 +littl 1 39 3.258097 3.258097 454 +movi 1 40 3.258097 3.258097 459 +spend 1 19 4.007333 4.007333 850 +jonathan 1 13 4.382027 4.382027 1174 +edui 1 13 4.382027 4.382027 1193 +departmentof 1 9 4.753590 4.753590 1539 +humor 1 5 5.347108 5.347108 2533 +swartz 1 4 5.568345 5.568345 2878 +heredevelopingrivl 1 1 6.957497 6.957497 11944 +myaddress 1 1 6.957497 6.957497 11945 +brighten 1 1 6.957497 6.957497 11946 +dayjon 1 1 6.957497 6.957497 11947 +connectioncool 1 1 6.957497 6.957497 11948 +siteslast 1 1 6.957497 6.957497 11949 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..641d69af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +pagewelcom 1 11 4.553877 4.553877 1344 +studentcomput 1 7 5.010635 5.010635 1963 +departmentcornel 1 5 5.347108 5.347108 2275 +srivastava 1 2 6.263398 6.263398 5395 +sunil 1 1 6.957497 6.957497 11950 +srivastavamast 1 1 6.957497 6.957497 11951 +linkscom 1 1 6.957497 6.957497 11952 +sxsriva 1 1 6.957497 6.957497 11953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..09e33b2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +area 1 144 1.945910 1.945910 80 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +check 1 115 2.197225 2.197225 118 +part 1 98 2.302585 2.302585 129 +pictur 1 89 2.397895 2.397895 160 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +graphic 1 90 2.397895 2.397895 147 +octob 1 89 2.397895 2.397895 156 +second 1 81 2.484907 2.484907 166 +internet 1 83 2.484907 2.484907 186 +build 1 85 2.484907 2.484907 184 +thing 1 84 2.484907 2.484907 189 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +involv 1 71 2.639057 2.639057 227 +practic 1 70 2.708050 2.708050 246 +import 1 65 2.772589 2.772589 282 +septemb 1 65 2.772589 2.772589 274 +street 1 63 2.772589 2.772589 293 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +unix 1 58 2.890372 2.890372 308 +found 1 53 2.944439 2.944439 337 +three 1 54 2.944439 2.944439 330 +run 1 51 2.995732 2.995732 347 +life 1 50 3.044522 3.044522 375 +effect 1 46 3.091042 3.091042 385 +made 1 44 3.135494 3.135494 398 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +music 1 42 3.218876 3.218876 436 +editor 1 41 3.218876 3.218876 433 +live 1 40 3.258097 3.258097 451 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +survei 1 35 3.401197 3.401197 513 +human 1 32 3.465736 3.465736 546 +independ 1 32 3.465736 3.465736 548 +becom 1 28 3.610918 3.610918 603 +quit 1 27 3.637586 3.637586 633 +though 1 27 3.637586 3.637586 622 +linux 1 27 3.637586 3.637586 631 +rather 1 26 3.688879 3.688879 642 +higher 1 24 3.761200 3.761200 690 +other 1 24 3.761200 3.761200 697 +wish 1 24 3.761200 3.761200 692 +almost 1 22 3.850148 3.850148 742 +self 1 22 3.850148 3.850148 761 +thu 1 21 3.912023 3.912023 773 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +nice 1 20 3.951244 3.951244 809 +supervis 1 20 3.951244 3.951244 840 +ever 1 19 4.007333 4.007333 872 +four 1 18 4.060443 4.060443 905 +listen 1 18 4.060443 4.060443 907 +concentr 1 18 4.060443 4.060443 906 +steven 1 17 4.110874 4.110874 953 +taiwan 1 16 4.174387 4.174387 1006 +hobbi 1 16 4.174387 4.174387 1009 +anyth 1 16 4.174387 4.174387 998 +intel 1 16 4.174387 4.174387 1000 +later 1 15 4.248495 4.248495 1043 +enough 1 15 4.248495 4.248495 1040 +becam 1 14 4.317488 4.317488 1117 +stori 1 14 4.317488 4.317488 1087 +draw 1 14 4.317488 4.317488 1086 +happi 1 14 4.317488 4.317488 1079 +audio 1 14 4.317488 4.317488 1094 +everyth 1 13 4.382027 4.382027 1169 +resolut 1 13 4.382027 4.382027 1172 +huang 1 12 4.465908 4.465908 1202 +career 1 12 4.465908 4.465908 1287 +promot 1 12 4.465908 4.465908 1235 +bruce 1 12 4.465908 4.465908 1226 +land 1 12 4.465908 4.465908 1273 +island 1 11 4.553877 4.553877 1345 +night 1 11 4.553877 4.553877 1319 +host 1 11 4.553877 4.553877 1306 +literatur 1 11 4.553877 4.553877 1300 +broad 1 11 4.553877 4.553877 1302 +ofcomput 1 10 4.653960 4.653960 1442 +rich 1 10 4.653960 4.653960 1396 +seven 1 9 4.753590 4.753590 1561 +calvin 1 9 4.753590 4.753590 1518 +face 1 9 4.753590 4.753590 1501 +ideal 1 8 4.875197 4.875197 1630 +film 1 8 4.875197 4.875197 1761 +exactli 1 7 5.010635 5.010635 1817 +hunt 1 7 5.010635 5.010635 1798 +brought 1 7 5.010635 5.010635 1925 +migrat 1 7 5.010635 5.010635 1851 +discoveri 1 7 5.010635 5.010635 1915 +channel 1 7 5.010635 5.010635 1836 +truth 1 6 5.164786 5.164786 2179 +parent 1 6 5.164786 5.164786 2204 +south 1 6 5.164786 5.164786 2167 +lucki 1 6 5.164786 5.164786 2163 +chat 1 6 5.164786 5.164786 2128 +railroad 1 6 5.164786 5.164786 2161 +myresum 1 6 5.164786 5.164786 2199 +freeli 1 6 5.164786 5.164786 2014 +ship 1 5 5.347108 5.347108 2534 +tang 1 5 5.347108 5.347108 2409 +knew 1 5 5.347108 5.347108 2445 +ofinterest 1 5 5.347108 5.347108 2323 +champion 1 4 5.568345 5.568345 2982 +proud 1 4 5.568345 5.568345 2918 +assur 1 4 5.568345 5.568345 2722 +somewhat 1 4 5.568345 5.568345 2659 +fulfil 1 4 5.568345 5.568345 2932 +hobb 1 4 5.568345 5.568345 2893 +children 1 3 5.857933 5.857933 3767 +talent 1 3 5.857933 5.857933 3768 +peac 1 3 5.857933 5.857933 3769 +pai 1 3 5.857933 5.857933 3672 +asid 1 3 5.857933 5.857933 3770 +lego 1 3 5.857933 5.857933 3188 +dick 1 2 6.263398 6.263398 5396 +tender 1 2 6.263398 6.263398 5397 +tropic 1 2 6.263398 6.263398 5398 +fifteen 1 2 6.263398 6.263398 5399 +marvel 1 2 6.263398 6.263398 5400 +defeat 1 2 6.263398 6.263398 5401 +reward 1 2 6.263398 6.263398 5402 +andwork 1 2 6.263398 6.263398 5403 +relai 1 2 6.263398 6.263398 5404 +weapon 1 2 6.263398 6.263398 5115 +spectrum 1 2 6.263398 6.263398 5405 +blobbi 1 2 6.263398 6.263398 4820 +huangszu 1 1 6.957497 6.957497 11954 +defend 1 1 6.957497 6.957497 11955 +justic 1 1 6.957497 6.957497 11956 +nevermind 1 1 6.957497 6.957497 11957 +iarriv 1 1 6.957497 6.957497 11958 +soundslik 1 1 6.957497 6.957497 11959 +mobi 1 1 6.957497 6.957497 11960 +nointent 1 1 6.957497 6.957497 11961 +whale 1 1 6.957497 6.957497 11962 +digress 1 1 6.957497 6.957497 11963 +wholefamili 1 1 6.957497 6.957497 11964 +philippin 1 1 6.957497 6.957497 11965 +aroundsix 1 1 6.957497 6.957497 11966 +fluentli 1 1 6.957497 6.957497 11967 +bilingu 1 1 6.957497 6.957497 11968 +thepoetri 1 1 6.957497 6.957497 11969 +dynasti 1 1 6.957497 6.957497 11970 +arabian 1 1 6.957497 6.957497 11971 +doveright 1 1 6.957497 6.957497 11972 +ienter 1 1 6.957497 6.957497 11973 +philippineswith 1 1 6.957497 6.957497 11974 +unabashedli 1 1 6.957497 6.957497 11975 +alsoin 1 1 6.957497 6.957497 11976 +whirlwind 1 1 6.957497 6.957497 11977 +awoman 1 1 6.957497 6.957497 11978 +effortlessli 1 1 6.957497 6.957497 11979 +eek 1 1 6.957497 6.957497 11980 +blunt 1 1 6.957497 6.957497 11981 +ienrol 1 1 6.957497 6.957497 11982 +segreg 1 1 6.957497 6.957497 11983 +everydaygeek 1 1 6.957497 6.957497 11984 +fromactu 1 1 6.957497 6.957497 11985 +happili 1 1 6.957497 6.957497 11986 +myspar 1 1 6.957497 6.957497 11987 +linuxnet 1 1 6.957497 6.957497 11988 +plastic 1 1 6.957497 6.957497 11989 +suspens 1 1 6.957497 6.957497 11990 +thriller 1 1 6.957497 6.957497 11991 +sting 1 1 6.957497 6.957497 11992 +sesam 1 1 6.957497 6.957497 11993 +offend 1 1 6.957497 6.957497 11994 +bysom 1 1 6.957497 6.957497 11995 +blatant 1 1 6.957497 6.957497 11996 +highlyinterest 1 1 6.957497 6.957497 11997 +compatiblecomput 1 1 6.957497 6.957497 11998 +metaballsund 1 1 6.957497 6.957497 11999 +techniquesin 1 1 6.957497 6.957497 12000 +andport 1 1 6.957497 6.957497 12001 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..2d5b37e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +technolog 1 131 2.079442 2.079442 102 +move 1 47 3.091042 3.091042 382 +hybrid 1 15 4.248495 4.248495 1057 +henzing 1 3 5.857933 5.857933 3713 +hytechhytech 1 1 6.957497 6.957497 12002 +toolw 1 1 6.957497 6.957497 12003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..0f11fa70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +analysi 1 124 2.079442 2.079442 98 +advanc 1 99 2.302585 2.302585 130 +real 1 93 2.397895 2.397895 144 +control 1 82 2.484907 2.484907 164 +logic 1 71 2.639057 2.639057 230 +septemb 1 65 2.772589 2.772589 274 +linear 1 41 3.218876 3.218876 431 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +symbol 1 27 3.637586 3.637586 620 +universityithaca 1 24 3.761200 3.761200 710 +methodolog 1 23 3.806662 3.806662 733 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +eduphon 1 15 4.248495 4.248495 1060 +transit 1 15 4.248495 4.248495 1046 +hybrid 1 15 4.248495 4.248495 1057 +embed 1 14 4.317488 4.317488 1102 +automata 1 13 4.382027 4.382027 1135 +modul 1 10 4.653960 4.653960 1434 +departmentcornel 1 5 5.347108 5.347108 2275 +henzing 1 3 5.857933 5.857933 3713 +professorcomput 1 3 5.857933 5.857933 3714 +worldwid 1 3 5.857933 5.857933 3704 +checker 1 3 5.857933 5.857933 3644 +systemsr 1 2 6.263398 6.263398 4312 +bibtex 1 2 6.263398 6.263398 5406 +henzingerthoma 1 1 6.957497 6.957497 12004 +movedassist 1 1 6.957497 6.957497 12005 +researchform 1 1 6.957497 6.957497 12006 +researchat 1 1 6.957497 6.957497 12007 +cornelland 1 1 6.957497 6.957497 12008 +resumepublicationsreact 1 1 6.957497 6.957497 12009 +systemsclock 1 1 6.957497 6.957497 12010 +systemshybrid 1 1 6.957497 6.957497 12011 +systemsbibliographi 1 1 6.957497 6.957497 12012 +publicationstoolshytech 1 1 6.957497 6.957497 12013 +systemscoursesc 1 1 6.957497 6.957497 12014 +languagesconferenceshybrid 1 1 6.957497 6.957497 12015 +systemscav 1 1 6.957497 6.957497 12016 +verificationlast 1 1 6.957497 6.957497 12017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..ac2c222f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +manag 1 114 2.197225 2.197225 125 +homepag 1 93 2.397895 2.397895 148 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +upson 1 71 2.639057 2.639057 218 +previou 1 62 2.772589 2.772589 290 +life 1 50 3.044522 3.044522 375 +eduoffic 1 33 3.433987 3.433987 531 +quot 1 29 3.583519 3.583519 582 +psycholog 1 15 4.248495 4.248495 1054 +horu 1 14 4.317488 4.317488 1116 +social 1 13 4.382027 4.382027 1123 +reness 1 11 4.553877 4.553877 1333 +hockei 1 8 4.875197 4.875197 1760 +atcornel 1 6 5.164786 5.164786 2131 +advis 1 6 5.164786 5.164786 2173 +hickei 1 4 5.568345 5.568345 2845 +hallphon 1 4 5.568345 5.568345 2900 +schneider 1 4 5.568345 5.568345 2868 +takako 1 3 5.857933 5.857933 3538 +backcountri 1 3 5.857933 5.857933 3686 +byrobbert 1 1 6.957497 6.957497 12018 +andfr 1 1 6.957497 6.957497 12019 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..90e91237 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +compil 1 122 2.079442 2.079442 96 +environ 1 84 2.484907 2.484907 177 +editor 1 41 3.218876 3.218876 433 +transform 1 32 3.465736 3.465736 542 +attribut 1 14 4.317488 4.317488 1092 +eduresearch 1 6 5.164786 5.164786 2205 +increment 1 6 5.164786 5.164786 2206 +grammar 1 6 5.164786 5.164786 2058 +tim_teitelbaum 1 1 6.957497 6.957497 12020 +teitelbaumassoci 1 1 6.957497 6.957497 12021 +adavita 1 1 6.957497 6.957497 12022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..65672d74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +support 1 132 1.945910 1.945910 83 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +proceed 1 93 2.397895 2.397895 152 +level 1 87 2.484907 2.484907 180 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +messag 1 76 2.564949 2.564949 212 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +abstract 1 62 2.772589 2.772589 276 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +simpl 1 60 2.833213 2.833213 298 +share 1 59 2.833213 2.833213 304 +automat 1 61 2.833213 2.833213 306 +juli 1 60 2.833213 2.833213 305 +sever 1 56 2.890372 2.890372 322 +think 1 57 2.890372 2.890372 314 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +talk 1 53 2.944439 2.944439 336 +run 1 51 2.995732 2.995732 347 +digit 1 52 2.995732 2.995732 348 +without 1 50 3.044522 3.044522 370 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +offer 1 43 3.178054 3.178054 414 +mechan 1 43 3.178054 3.178054 416 +annual 1 40 3.258097 3.258097 458 +slide 1 38 3.295837 3.295837 467 +workstat 1 37 3.332205 3.332205 479 +extend 1 32 3.465736 3.465736 539 +platform 1 29 3.583519 3.583519 591 +limit 1 29 3.583519 3.583519 585 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +berkelei 1 26 3.688879 3.688879 657 +proc 1 26 3.688879 3.688879 649 +supercomput 1 25 3.737670 3.737670 681 +toward 1 25 3.737670 3.737670 668 +fundament 1 25 3.737670 3.737670 661 +magazin 1 24 3.761200 3.761200 704 +lead 1 23 3.806662 3.806662 718 +thread 1 23 3.806662 3.806662 722 +department 1 20 3.951244 3.951244 839 +fine 1 20 3.951244 3.951244 822 +speed 1 18 4.060443 4.060443 911 +interconnect 1 17 4.110874 4.110874 937 +latenc 1 16 4.174387 4.174387 993 +diego 1 16 4.174387 4.174387 992 +fourth 1 16 4.174387 4.174387 999 +cambridg 1 16 4.174387 4.174387 1008 +month 1 15 4.248495 4.248495 1025 +micro 1 15 4.248495 4.248495 1031 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +sigplan 1 13 4.382027 4.382027 1190 +conf 1 13 4.382027 4.382027 1181 +guest 1 12 4.465908 4.465908 1220 +bandwidth 1 11 4.553877 4.553877 1365 +multithread 1 11 4.553877 4.553877 1315 +werner 1 10 4.653960 4.653960 1385 +grain 1 10 4.653960 4.653960 1448 +santa 1 10 4.653960 4.653960 1441 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +patterson 1 9 4.753590 4.753590 1554 +port 1 8 4.875197 4.875197 1766 +entri 1 8 4.875197 4.875197 1678 +vineet 1 8 4.875197 4.875197 1639 +vogel 1 8 4.875197 4.875197 1622 +andcomput 1 8 4.875197 4.875197 1623 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +realist 1 8 4.875197 4.875197 1665 +forum 1 6 5.164786 5.164786 2027 +fish 1 6 5.164786 5.164786 2207 +goldstein 1 6 5.164786 5.164786 2168 +greec 1 6 5.164786 5.164786 2208 +water 1 5 5.347108 5.347108 2535 +plant 1 5 5.347108 5.347108 2497 +buch 1 5 5.347108 5.347108 2272 +culler 1 5 5.347108 5.347108 2381 +symp 1 5 5.347108 5.347108 2376 +australia 1 5 5.347108 5.347108 2478 +dataflow 1 5 5.347108 5.347108 2390 +karp 1 5 5.347108 5.347108 2284 +ifip 1 5 5.347108 5.347108 2459 +hallphon 1 4 5.568345 5.568345 2900 +coursesc 1 4 5.568345 5.568345 2692 +tire 1 4 5.568345 5.568345 2799 +password 1 4 5.568345 5.568345 2594 +basu 1 4 5.568345 5.568345 2843 +medium 1 4 5.568345 5.568345 2834 +interfacefor 1 3 5.857933 5.857933 3534 +frontier 1 3 5.857933 5.857933 3771 +anindya 1 3 5.857933 5.857933 3535 +avula 1 3 5.857933 5.857933 3600 +abridg 1 3 5.857933 5.857933 3772 +schauser 1 3 5.857933 5.857933 3599 +dusseau 1 3 5.857933 5.857933 3382 +yelick 1 3 5.857933 5.857933 3374 +crete 1 3 5.857933 5.857933 3773 +lan 1 2 6.263398 6.263398 4359 +includingth 1 2 6.263398 6.263398 4493 +pond 1 2 6.263398 6.263398 5127 +firewal 1 2 6.263398 6.263398 5407 +distributedcomput 1 2 6.263398 6.263398 5336 +communicationarchitectur 1 2 6.263398 6.263398 4859 +krishnamurthi 1 2 6.263398 6.263398 5408 +lumetta 1 2 6.263398 6.263398 5409 +dalli 1 2 6.263398 6.263398 4517 +logp 1 2 6.263398 6.263398 4227 +orlando 1 2 6.263398 6.263398 5410 +clara 1 2 6.263398 6.263398 4958 +barrera 1 2 6.263398 6.263398 4309 +eickenassist 1 1 6.957497 6.957497 12024 +eduprojectsth 1 1 6.957497 6.957497 12025 +architectureprovid 1 1 6.957497 6.957497 12026 +latencyand 1 1 6.957497 6.957497 12027 +currentimplement 1 1 6.957497 6.957497 12028 +tonon 1 1 6.957497 6.957497 12029 +spmd 1 1 6.957497 6.957497 12030 +extensionto 1 1 6.957497 6.957497 12031 +newplatform 1 1 6.957497 6.957497 12032 +multprocessor 1 1 6.957497 6.957497 12033 +computerorgan 1 1 6.957497 6.957497 12034 +maynd 1 1 6.957497 6.957497 12035 +departement 1 1 6.957497 6.957497 12023 +pagestv 1 1 6.957497 6.957497 12036 +macpppwhich 1 1 6.957497 6.957497 12037 +everhav 1 1 6.957497 6.957497 12038 +passwordssuddenli 1 1 6.957497 6.957497 12039 +installationinstruct 1 1 6.957497 6.957497 12040 +publicationsu 1 1 6.957497 6.957497 12041 +atmnetwork 1 1 6.957497 6.957497 12042 +controlledthread 1 1 6.957497 6.957497 12043 +spertu 1 1 6.957497 6.957497 12044 +modelof 1 1 6.957497 6.957497 12045 +sahai 1 1 6.957497 6.957497 12046 +santo 1 1 6.957497 6.957497 12047 +subramonian 1 1 6.957497 6.957497 12048 +dataflowmultiprocess 1 1 6.957497 6.957497 12049 +forintegr 1 1 6.957497 6.957497 12050 +forleni 1 1 6.957497 6.957497 12051 +minimalhardwar 1 1 6.957497 6.957497 12052 +wawrzynek 1 1 6.957497 6.957497 12053 +architecturesfor 1 1 6.957497 6.957497 12054 +saavedra 1 1 6.957497 6.957497 12055 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..1abd3c1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +contact 1 153 1.791759 1.791759 59 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +pleas 1 113 2.197225 2.197225 114 +real 1 93 2.397895 2.397895 144 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +appli 1 71 2.639057 2.639057 226 +date 1 51 2.995732 2.995732 344 +done 1 47 3.091042 3.091042 381 +often 1 31 3.496508 3.496508 551 +actual 1 28 3.610918 3.610918 604 +enjoi 1 26 3.688879 3.688879 660 +assum 1 19 4.007333 4.007333 845 +incomput 1 14 4.317488 4.317488 1096 +touch 1 12 4.465908 4.465908 1288 +moment 1 11 4.553877 4.553877 1379 +apart 1 7 5.010635 5.010635 1936 +somewhat 1 4 5.568345 5.568345 2659 +disclaim 1 4 5.568345 5.568345 2847 +lfar 1 2 6.263398 6.263398 4106 +erlingsson 1 2 6.263398 6.263398 4107 +pagelfar 1 1 6.957497 6.957497 12056 +specificationi 1 1 6.957497 6.957497 12057 +incongruousiceland 1 1 6.957497 6.957497 12058 +implementationbackgroundwher 1 1 6.957497 6.957497 12059 +activitieswhat 1 1 6.957497 6.957497 12060 +schedulewher 1 1 6.957497 6.957497 12061 +researchwhat 1 1 6.957497 6.957497 12062 +interestswhat 1 1 6.957497 6.957497 12063 +acquaintancesthos 1 1 6.957497 6.957497 12064 +infohow 1 1 6.957497 6.957497 12065 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..e966529a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +center 1 88 2.397895 2.397895 158 +novemb 1 81 2.484907 2.484907 179 +resourc 1 81 2.484907 2.484907 172 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +run 1 51 2.995732 2.995732 347 +robert 1 30 3.555348 3.555348 567 +packag 1 28 3.610918 3.610918 614 +releas 1 28 3.610918 3.610918 616 +geometri 1 22 3.850148 3.850148 752 +geometr 1 19 4.007333 4.007333 852 +element 1 18 4.060443 4.060443 895 +finit 1 14 4.317488 4.317488 1106 +jonathan 1 13 4.382027 4.382027 1174 +mesh 1 11 4.553877 4.553877 1351 +stephen 1 11 4.553877 4.553877 1342 +solver 1 7 5.010635 5.010635 1911 +minnesota 1 5 5.347108 5.347108 2469 +websit 1 4 5.568345 5.568345 2726 +schneider 1 4 5.568345 5.568345 2868 +vavasi 1 3 5.857933 5.857933 3526 +threedimens 1 1 6.957497 6.957497 12066 +themesh 1 1 6.957497 6.957497 12067 +softwaredownload 1 1 6.957497 6.957497 12068 +andqmg 1 1 6.957497 6.957497 12069 +mcphedran 1 1 6.957497 6.957497 12070 +offinit 1 1 6.957497 6.957497 12071 +ofsoftwar 1 1 6.957497 6.957497 12072 +computationalgeometri 1 1 6.957497 6.957497 12073 +shewchuk 1 1 6.957497 6.957497 12074 +triangl 1 1 6.957497 6.957497 12075 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..7b5e380a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +analysi 1 124 2.079442 2.079442 98 +document 1 121 2.079442 2.079442 89 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +specif 1 106 2.197225 2.197225 106 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +chang 1 82 2.484907 2.484907 163 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +solv 1 73 2.639057 2.639057 234 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +ithaca 1 65 2.772589 2.772589 294 +laboratori 1 63 2.772589 2.772589 292 +complex 1 64 2.772589 2.772589 269 +creat 1 63 2.772589 2.772589 277 +unix 1 58 2.890372 2.890372 308 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +numer 1 49 3.044522 3.044522 369 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +map 1 39 3.258097 3.258097 452 +annual 1 40 3.258097 3.258097 458 +microsoft 1 38 3.295837 3.295837 468 +least 1 35 3.401197 3.401197 516 +domain 1 30 3.555348 3.555348 564 +depend 1 29 3.583519 3.583519 583 +packag 1 28 3.610918 3.610918 614 +releas 1 28 3.610918 3.610918 616 +bound 1 26 3.688879 3.688879 659 +valu 1 25 3.737670 3.737670 665 +aspect 1 25 3.737670 3.737670 663 +period 1 22 3.850148 3.850148 743 +divis 1 21 3.912023 3.912023 803 +grad 1 20 3.951244 3.951244 837 +geometr 1 19 4.007333 4.007333 852 +element 1 18 4.060443 4.060443 895 +scott 1 18 4.060443 4.060443 884 +matrix 1 17 4.110874 4.110874 933 +squar 1 14 4.317488 4.317488 1082 +finit 1 14 4.317488 4.317488 1106 +anonym 1 14 4.317488 4.317488 1100 +topolog 1 14 4.317488 4.317488 1089 +matlab 1 14 4.317488 4.317488 1081 +whose 1 13 4.382027 4.382027 1166 +forth 1 13 4.382027 4.382027 1186 +weight 1 12 4.465908 4.465908 1204 +stephen 1 11 4.553877 4.553877 1342 +mesh 1 11 4.553877 4.553877 1351 +faster 1 11 4.553877 4.553877 1323 +rhode 1 9 4.753590 4.753590 1579 +cross 1 8 4.875197 4.875197 1703 +boundari 1 7 5.010635 5.010635 1929 +aris 1 7 5.010635 5.010635 1924 +dimens 1 7 5.010635 5.010635 1930 +argonn 1 5 5.347108 5.347108 2461 +colleagu 1 5 5.347108 5.347108 2304 +dual 1 5 5.347108 5.347108 2522 +hole 1 5 5.347108 5.347108 2518 +compat 1 5 5.347108 5.347108 2485 +bldg 1 4 5.568345 5.568345 2983 +manuscript 1 4 5.568345 5.568345 2750 +orthogon 1 4 5.568345 5.568345 2832 +ratio 1 4 5.568345 5.568345 2942 +triangul 1 4 5.568345 5.568345 2903 +conform 1 4 5.568345 5.568345 2941 +vrml 1 4 5.568345 5.568345 2949 +vavasi 1 3 5.857933 5.857933 3526 +aren 1 3 5.857933 5.857933 3512 +trefethen 1 3 5.857933 5.857933 3528 +hough 1 3 5.857933 5.857933 3527 +delaunai 1 3 5.857933 5.857933 3619 +ellipt 1 3 5.857933 5.857933 3774 +cleaner 1 3 5.857933 5.857933 3775 +acceler 1 2 6.263398 6.263398 5411 +mitchel 1 2 6.263398 6.263398 4792 +driscol 1 2 6.263398 6.263398 4836 +polyhedr 1 2 6.263398 6.263398 5412 +andautomat 1 2 6.263398 6.263398 5413 +onsabbat 1 1 6.957497 6.957497 12076 +cass 1 1 6.957497 6.957497 12077 +tsure 1 1 6.957497 6.957497 12078 +essaybi 1 1 6.957497 6.957497 12079 +issuesnumer 1 1 6.957497 6.957497 12080 +problemsgeometr 1 1 6.957497 6.957497 12081 +computingspars 1 1 6.957497 6.957497 12082 +computationsi 1 1 6.957497 6.957497 12083 +primal 1 1 6.957497 6.957497 12084 +interiorpoint 1 1 6.957497 6.957497 12085 +decompositionfor 1 1 6.957497 6.957497 12086 +gridcut 1 1 6.957497 6.957497 12087 +hyperplan 1 1 6.957497 6.957497 12088 +packagei 1 1 6.957497 6.957497 12089 +verycompl 1 1 6.957497 6.957497 12090 +unstructuredtetrahedr 1 1 6.957497 6.957497 12091 +boundaryvalu 1 1 6.957497 6.957497 12092 +iswritten 1 1 6.957497 6.957497 12093 +distributedfor 1 1 6.957497 6.957497 12094 +distributionbegan 1 1 6.957497 6.957497 12095 +manyimprov 1 1 6.957497 6.957497 12096 +compatibilitywith 1 1 6.957497 6.957497 12097 +pleasese 1 1 6.957497 6.957497 12098 +reportback 1 1 6.957497 6.957497 12099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..26ed3678 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +support 1 132 1.945910 1.945910 83 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +frame 1 24 3.761200 3.761200 684 +higher 1 24 3.761200 3.761200 690 +arun 1 4 5.568345 5.568345 2736 +verma 1 2 6.263398 6.263398 4341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..08df42ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +read 1 154 1.791759 1.791759 47 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +thank 1 23 3.806662 3.806662 721 +vinc 1 2 6.263398 6.263398 5414 +suck 1 2 6.263398 6.263398 5232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..df8cc61d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +databas 1 122 2.079442 2.079442 86 +advanc 1 99 2.302585 2.302585 130 +start 1 83 2.484907 2.484907 173 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +multimedia 1 68 2.708050 2.708050 258 +give 1 50 3.044522 3.044522 359 +break 1 20 3.951244 3.951244 812 +pagec 1 15 4.248495 4.248495 1011 +pageer 1 3 5.857933 5.857933 3776 +vitrano 1 1 6.957497 6.957497 12100 +pagehei 1 1 6.957497 6.957497 12101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..9cfa01ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +info 1 85 2.484907 2.484907 176 +larg 1 82 2.484907 2.484907 168 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +function 1 62 2.772589 2.772589 275 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +back 1 60 2.833213 2.833213 297 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +maintain 1 51 2.995732 2.995732 342 +right 1 48 3.044522 3.044522 363 +visitor 1 49 3.044522 3.044522 371 +life 1 50 3.044522 3.044522 375 +effect 1 46 3.091042 3.091042 385 +favorit 1 44 3.135494 3.135494 410 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +might 1 41 3.218876 3.218876 426 +small 1 39 3.258097 3.258097 447 +respons 1 37 3.332205 3.332205 476 +expect 1 37 3.332205 3.332205 484 +copyright 1 36 3.367296 3.367296 495 +word 1 34 3.401197 3.401197 508 +given 1 32 3.465736 3.465736 538 +someth 1 31 3.496508 3.496508 554 +anim 1 31 3.496508 3.496508 557 +held 1 28 3.610918 3.610918 600 +relev 1 26 3.688879 3.688879 637 +reach 1 24 3.761200 3.761200 688 +yahoo 1 24 3.761200 3.761200 707 +reserv 1 20 3.951244 3.951244 808 +applet 1 20 3.951244 3.951244 827 +wonder 1 20 3.951244 3.951244 815 +qualiti 1 20 3.951244 3.951244 832 +els 1 19 4.007333 4.007333 843 +capabl 1 15 4.248495 4.248495 1016 +happi 1 14 4.317488 4.317488 1079 +deriv 1 13 4.382027 4.382027 1145 +clock 1 11 4.553877 4.553877 1320 +regard 1 11 4.553877 4.553877 1309 +vista 1 10 4.653960 4.653960 1452 +ground 1 7 5.010635 5.010635 1955 +usabl 1 7 5.010635 5.010635 1810 +heavi 1 7 5.010635 5.010635 1841 +usag 1 6 5.164786 5.164786 2209 +vivek 1 6 5.164786 5.164786 2210 +promis 1 6 5.164786 5.164786 2037 +million 1 5 5.347108 5.347108 2495 +settimeout 1 5 5.347108 5.347108 2536 +wast 1 5 5.347108 5.347108 2537 +seed 1 4 5.568345 5.568345 2984 +timertwo 1 4 5.568345 5.568345 2985 +transmit 1 4 5.568345 5.568345 2835 +fulli 1 4 5.568345 5.568345 2986 +impli 1 3 5.857933 5.857933 3348 +dont 1 3 5.857933 5.857933 3473 +kolla 1 1 6.957497 6.957497 12102 +unwant 1 1 6.957497 6.957497 12104 +warrante 1 1 6.957497 6.957497 12105 +scrollit 1 1 6.957497 6.957497 12103 +zillion 1 1 6.957497 6.957497 12106 +thoughtsfriend 1 1 6.957497 6.957497 12107 +foeslinksa 1 1 6.957497 6.957497 12108 +tryalta 1 1 6.957497 6.957497 12109 +theinktomiresumein 1 1 6.957497 6.957497 12110 +htmlin 1 1 6.957497 6.957497 12111 +postscriptin 1 1 6.957497 6.957497 12112 +perfectin 1 1 6.957497 6.957497 12113 +asciith 1 1 6.957497 6.957497 12114 +wanna 1 1 6.957497 6.957497 12115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..a793647c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +comment 1 93 2.397895 2.397895 146 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +david 1 71 2.639057 2.639057 232 +differ 1 66 2.708050 2.708050 253 +prof 1 64 2.772589 2.772589 273 +function 1 62 2.772589 2.772589 275 +simpl 1 60 2.833213 2.833213 298 +particular 1 51 2.995732 2.995732 352 +friend 1 48 3.044522 3.044522 376 +might 1 41 3.218876 3.218876 426 +paul 1 38 3.295837 3.295837 471 +everi 1 34 3.401197 3.401197 519 +either 1 35 3.401197 3.401197 506 +seem 1 18 4.060443 4.060443 899 +matrix 1 17 4.110874 4.110874 933 +spars 1 16 4.174387 4.174387 989 +difficulti 1 13 4.382027 4.382027 1132 +guess 1 10 4.653960 4.653960 1443 +henri 1 10 4.653960 4.653960 1417 +said 1 9 4.753590 4.753590 1571 +govern 1 9 4.753590 4.753590 1581 +judg 1 8 4.875197 4.875197 1644 +cornellunivers 1 7 5.010635 5.010635 1916 +keshav 1 7 5.010635 5.010635 1852 +remind 1 7 5.010635 5.010635 1799 +encrypt 1 7 5.010635 5.010635 1835 +legal 1 6 5.164786 5.164786 2094 +privaci 1 6 5.164786 5.164786 2144 +strong 1 6 5.164786 5.164786 2029 +pingali 1 4 5.568345 5.568345 2956 +bernoulli 1 4 5.568345 5.568345 2955 +stodghil 1 4 5.568345 5.568345 2864 +lawyer 1 4 5.568345 5.568345 2836 +lord 1 4 5.568345 5.568345 2906 +wherea 1 4 5.568345 5.568345 2597 +functionof 1 2 6.263398 6.263398 5415 +todayth 1 2 6.263398 6.263398 5416 +vlad 1 1 6.957497 6.957497 12116 +pagevladimir 1 1 6.957497 6.957497 12117 +kotlyarvladimir 1 1 6.957497 6.957497 12118 +wereteach 1 1 6.957497 6.957497 12119 +andindu 1 1 6.957497 6.957497 12120 +kodukulapubl 1 1 6.957497 6.957497 12121 +kissing 1 1 6.957497 6.957497 12122 +profess 1 1 6.957497 6.957497 12123 +abritish 1 1 6.957497 6.957497 12124 +sveri 1 1 6.957497 6.957497 12125 +den 1 1 6.957497 6.957497 12126 +asolut 1 1 6.957497 6.957497 12127 +outpac 1 1 6.957497 6.957497 12128 +ofsolut 1 1 6.957497 6.957497 12129 +hardenough 1 1 6.957497 6.957497 12130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..2f0ea5eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +august 1 66 2.708050 2.708050 257 +ithaca 1 65 2.772589 2.772589 294 +mapl 1 11 4.553877 4.553877 1376 +rhode 1 9 4.753590 4.753590 1579 +vijai 1 4 5.568345 5.568345 2960 +menon 1 2 6.263398 6.263398 5249 +menonvijai 1 1 6.957497 6.957497 12131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..d81321f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +academ 1 82 2.484907 2.484907 178 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +august 1 66 2.708050 2.708050 257 +third 1 43 3.178054 3.178054 412 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +computersci 1 30 3.555348 3.555348 562 +detect 1 26 3.688879 3.688879 646 +bookmark 1 26 3.688879 3.688879 639 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +chen 1 21 3.912023 3.912023 791 +beij 1 19 4.007333 4.007333 876 +failur 1 18 4.060443 4.060443 898 +tsinghua 1 13 4.382027 4.382027 1195 +soccer 1 8 4.875197 4.875197 1752 +distributedsystem 1 6 5.164786 5.164786 2022 +spare 1 6 5.164786 5.164786 2177 +membership 1 3 5.857933 5.857933 3751 +bachelorand 1 2 6.263398 6.263398 5128 +chinami 1 2 6.263398 6.263398 5129 +toueg 1 2 6.263398 6.263398 5339 +pagewei 1 1 6.957497 6.957497 12132 +weichen 1 1 6.957497 6.957497 12133 +inpartition 1 1 6.957497 6.957497 12134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..88b223a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +site 1 106 2.197225 2.197225 119 +homepag 1 93 2.397895 2.397895 148 +window 1 68 2.708050 2.708050 242 +movi 1 40 3.258097 3.258097 459 +newspap 1 12 4.465908 4.465908 1280 +weitsang 1 2 6.263398 6.263398 4088 +lwhere 1 1 6.957497 6.957497 12135 +fromwhat 1 1 6.957497 6.957497 12136 +watchwhat 1 1 6.957497 6.957497 12137 +likec 1 1 6.957497 6.957497 12138 +wrotepictur 1 1 6.957497 6.957497 12139 +drawa 1 1 6.957497 6.957497 12140 +motifcomput 1 1 6.957497 6.957497 12141 +theoryhom 1 1 6.957497 6.957497 12142 +vimi 1 1 6.957497 6.957497 12143 +tsearch 1 1 6.957497 6.957497 12144 +webcoolest 1 1 6.957497 6.957497 12145 +sitessharewar 1 1 6.957497 6.957497 12146 +archivem 1 1 6.957497 6.957497 12147 +onlineunivers 1 1 6.957497 6.957497 12148 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..0d5a6c8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +final 1 116 2.197225 2.197225 108 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +academ 1 82 2.484907 2.484907 178 +resum 1 79 2.564949 2.564949 217 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +extens 1 53 2.944439 2.944439 340 +tabl 1 51 2.995732 2.995732 346 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +understand 1 47 3.091042 3.091042 384 +favorit 1 44 3.135494 3.135494 410 +vision 1 41 3.218876 3.218876 430 +field 1 37 3.332205 3.332205 482 +photo 1 31 3.496508 3.496508 561 +except 1 28 3.610918 3.610918 607 +team 1 27 3.637586 3.637586 625 +background 1 25 3.737670 3.737670 664 +sport 1 25 3.737670 3.737670 683 +other 1 24 3.761200 3.761200 697 +tenni 1 20 3.951244 3.951244 838 +partial 1 18 4.060443 4.060443 900 +taiwan 1 16 4.174387 4.174387 1006 +drive 1 15 4.248495 4.248495 1052 +avenu 1 12 4.465908 4.465908 1277 +basketbal 1 12 4.465908 4.465908 1289 +danc 1 12 4.465908 4.465908 1278 +skill 1 12 4.465908 4.465908 1205 +calcul 1 12 4.465908 4.465908 1268 +meng 1 12 4.465908 4.465908 1214 +mapl 1 11 4.553877 4.553877 1376 +magic 1 11 4.553877 4.553877 1358 +player 1 11 4.553877 4.553877 1371 +market 1 11 4.553877 4.553877 1361 +swim 1 9 4.753590 4.753590 1599 +volleybal 1 9 4.753590 4.753590 1598 +rivl 1 8 4.875197 4.875197 1632 +job 1 8 4.875197 4.875197 1702 +morph 1 7 5.010635 5.010635 1937 +financi 1 6 5.164786 5.164786 2197 +sing 1 5 5.347108 5.347108 2499 +hung 1 3 5.857933 5.857933 3524 +habit 1 3 5.857933 5.857933 3777 +atlanta 1 3 5.857933 5.857933 3778 +bowl 1 2 6.263398 6.263398 5417 +orlando 1 2 6.263398 6.263398 5410 +glavin 1 1 6.957497 6.957497 12149 +billiard 1 1 6.957497 6.957497 12150 +brave 1 1 6.957497 6.957497 12151 +anferne 1 1 6.957497 6.957497 12152 +hardawai 1 1 6.957497 6.957497 12153 +warp 1 1 6.957497 6.957497 12154 +webpaint 1 1 6.957497 6.957497 12155 +whkao 1 1 6.957497 6.957497 12156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..2b562e4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +present 1 91 2.397895 2.397895 145 +search 1 95 2.397895 2.397895 155 +school 1 84 2.484907 2.484907 188 +west 1 83 2.484907 2.484907 192 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +write 1 72 2.639057 2.639057 222 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +reason 1 57 2.890372 2.890372 318 +week 1 52 2.995732 2.995732 343 +visitor 1 49 3.044522 3.044522 371 +cool 1 49 3.044522 3.044522 374 +could 1 46 3.091042 3.091042 383 +get 1 46 3.091042 3.091042 380 +even 1 45 3.135494 3.135494 393 +york 1 41 3.218876 3.218876 435 +live 1 40 3.258097 3.258097 451 +probabl 1 40 3.258097 3.258097 455 +seminar 1 38 3.295837 3.295837 470 +formal 1 37 3.332205 3.332205 478 +robot 1 36 3.367296 3.367296 497 +campu 1 27 3.637586 3.637586 623 +enjoi 1 26 3.688879 3.688879 660 +spent 1 25 3.737670 3.737670 676 +never 1 25 3.737670 3.737670 671 +william 1 22 3.850148 3.850148 765 +minut 1 20 3.951244 3.951244 810 +mpeg 1 20 3.951244 3.951244 831 +tenni 1 20 3.951244 3.951244 838 +wrote 1 20 3.951244 3.951244 830 +beauti 1 18 4.060443 4.060443 912 +weekli 1 17 4.110874 4.110874 919 +match 1 16 4.174387 4.174387 965 +practicum 1 16 4.174387 4.174387 960 +princeton 1 15 4.248495 4.248495 1042 +mellon 1 13 4.382027 4.382027 1179 +scienceat 1 11 4.553877 4.553877 1375 +systemsc 1 11 4.553877 4.553877 1293 +mapl 1 11 4.553877 4.553877 1376 +awai 1 10 4.653960 4.653960 1447 +guess 1 10 4.653960 4.653960 1443 +jersei 1 9 4.753590 4.753590 1587 +motorola 1 9 4.753590 4.753590 1546 +besid 1 8 4.875197 4.875197 1681 +partner 1 8 4.875197 4.875197 1648 +parti 1 8 4.875197 4.875197 1676 +on 1 8 4.875197 4.875197 1628 +sleep 1 6 5.164786 5.164786 2211 +south 1 6 5.164786 5.164786 2167 +piano 1 6 5.164786 5.164786 2201 +florida 1 5 5.347108 5.347108 2526 +compet 1 5 5.347108 5.347108 2462 +coral 1 5 5.347108 5.347108 2538 +quantifi 1 5 5.347108 5.347108 2525 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +encod 1 4 5.568345 5.568345 2929 +somehow 1 4 5.568345 5.568345 2974 +essai 1 4 5.568345 5.568345 2948 +wart 1 4 5.568345 5.568345 2987 +exit 1 3 5.857933 5.857933 3124 +engineeringand 1 3 5.857933 5.857933 3779 +hpux 1 3 5.857933 5.857933 3780 +sector 1 3 5.857933 5.857933 3766 +classesc 1 3 5.857933 5.857933 3681 +cornellopoli 1 2 6.263398 6.263398 5157 +chopin 1 2 6.263398 6.263398 5358 +reject 1 2 6.263398 6.263398 5418 +techniquec 1 2 6.263398 6.263398 5158 +methodsc 1 2 6.263398 6.263398 5159 +colloquiumc 1 2 6.263398 6.263398 5160 +computingc 1 2 6.263398 6.263398 5216 +sciencefrom 1 1 6.957497 6.957497 12158 +carneig 1 1 6.957497 6.957497 12159 +didresearch 1 1 6.957497 6.957497 12160 +institu 1 1 6.957497 6.957497 12161 +xsro 1 1 6.957497 6.957497 12162 +atft 1 1 6.957497 6.957497 12163 +lauderdal 1 1 6.957497 6.957497 12164 +usta 1 1 6.957497 6.957497 12165 +tournment 1 1 6.957497 6.957497 12166 +faviorit 1 1 6.957497 6.957497 12167 +boca 1 1 6.957497 6.957497 12168 +ratonkei 1 1 6.957497 6.957497 12169 +concerto 1 1 6.957497 6.957497 12157 +beethoven 1 1 6.957497 6.957497 12170 +gershwin 1 1 6.957497 6.957497 12171 +liszt 1 1 6.957497 6.957497 12172 +mendelssohn 1 1 6.957497 6.957497 12173 +mozart 1 1 6.957497 6.957497 12174 +rachmaninoff 1 1 6.957497 6.957497 12175 +ravel 1 1 6.957497 6.957497 12176 +tchaikovski 1 1 6.957497 6.957497 12177 +violinconcerto 1 1 6.957497 6.957497 12178 +purifi 1 1 6.957497 6.957497 12179 +computerc 1 1 6.957497 6.957497 12180 +sectorcool 1 1 6.957497 6.957497 12181 +links_leap 1 1 6.957497 6.957497 12182 +frogski 1 1 6.957497 6.957497 12183 +serverident 1 1 6.957497 6.957497 12184 +crisi 1 1 6.957497 6.957497 12185 +testweath 1 1 6.957497 6.957497 12186 +undergroundinktomi 1 1 6.957497 6.957497 12187 +enginequest 1 1 6.957497 6.957497 12188 +archiveslast 1 1 6.957497 6.957497 12189 +ecithaca 1 1 6.957497 6.957497 12190 +wwlee 1 1 6.957497 6.957497 12191 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..ea6a8a2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +graphic 1 90 2.397895 2.397895 147 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +china 1 37 3.332205 3.332205 487 +taken 1 31 3.496508 3.496508 555 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +alumni 1 21 3.912023 3.912023 807 +bachelor 1 17 4.110874 4.110874 957 +edui 1 13 4.382027 4.382027 1193 +systemsc 1 11 4.553877 4.553877 1293 +capac 1 8 4.875197 4.875197 1740 +shade 1 7 5.010635 5.010635 1881 +atcornel 1 6 5.164786 5.164786 2131 +engineeringc 1 4 5.568345 5.568345 2904 +phong 1 2 6.263398 6.263398 4822 +xichun 1 1 6.957497 6.957497 12192 +jennif 1 1 6.957497 6.957497 12194 +zhejiang 1 1 6.957497 6.957497 12193 +hangzhou 1 1 6.957497 6.957497 12195 +javaworldsunhigh 1 1 6.957497 6.957497 12196 +alumnimeng 1 1 6.957497 6.957497 12197 +gouraud 1 1 6.957497 6.957497 12198 +systeme 1 1 6.957497 6.957497 12199 +communicationby 1 1 6.957497 6.957497 12200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..22cb19b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +topic 1 114 2.197225 2.197225 110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..c66d27db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +relat 1 139 1.945910 1.945910 68 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +techniqu 1 99 2.302585 2.302585 138 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +june 1 79 2.564949 2.564949 214 +optim 1 79 2.564949 2.564949 197 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +knowledg 1 67 2.708050 2.708050 243 +improv 1 62 2.772589 2.772589 289 +interact 1 62 2.772589 2.772589 270 +written 1 63 2.772589 2.772589 278 +function 1 62 2.772589 2.772589 275 +result 1 65 2.772589 2.772589 281 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +juli 1 60 2.833213 2.833213 305 +februari 1 54 2.944439 2.944439 328 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +cach 1 41 3.218876 3.218876 432 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +societi 1 40 3.258097 3.258097 456 +transform 1 32 3.465736 3.465736 542 +semant 1 29 3.583519 3.583519 587 +manipul 1 27 3.637586 3.637586 624 +boston 1 19 4.007333 4.007333 862 +partial 1 18 4.060443 4.060443 900 +attribut 1 14 4.317488 4.317488 1092 +massachusett 1 14 4.317488 4.317488 1118 +deriv 1 13 4.382027 4.382027 1145 +sigplan 1 13 4.382027 4.382027 1190 +intermedi 1 9 4.753590 4.753590 1497 +discov 1 9 4.753590 4.753590 1562 +strength 1 9 4.753590 4.753590 1494 +reduct 1 7 5.010635 5.010635 1877 +increment 1 6 5.164786 5.164786 2206 +teitelbaum 1 6 5.164786 5.164786 2102 +sigact 1 6 5.164786 5.164786 2212 +florida 1 5 5.347108 5.347108 2526 +jolla 1 4 5.568345 5.568345 2988 +stoller 1 4 5.568345 5.568345 2866 +petersburg 1 4 5.568345 5.568345 2989 +systemat 1 3 5.857933 5.857933 3781 +beach 1 3 5.857933 5.857933 3782 +cachet 1 2 6.263398 6.263398 5419 +auxiliari 1 2 6.263398 6.263398 5421 +anni 1 2 6.263398 6.263398 5420 +yanhong 1 2 6.263398 6.263398 5422 +computationderiv 1 1 6.957497 6.957497 12201 +programsa 1 1 6.957497 6.957497 12202 +themeprogram 1 1 6.957497 6.957497 12203 +usessystemat 1 1 6.957497 6.957497 12204 +deriveincrement 1 1 6.957497 6.957497 12205 +peoplei 1 1 6.957497 6.957497 12206 +liutim 1 1 6.957497 6.957497 12207 +teitelbaumkeyword 1 1 6.957497 6.957497 12208 +cacheti 1 1 6.957497 6.957497 12209 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..268286c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +intern 1 108 2.197225 2.197225 128 +world 1 115 2.197225 2.197225 126 +techniqu 1 99 2.302585 2.302585 138 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +center 1 88 2.397895 2.397895 158 +present 1 91 2.397895 2.397895 145 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +institut 1 84 2.484907 2.484907 187 +build 1 85 2.484907 2.484907 184 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +dynam 1 76 2.564949 2.564949 194 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +upson 1 71 2.639057 2.639057 218 +html 1 75 2.639057 2.639057 235 +knowledg 1 67 2.708050 2.708050 243 +august 1 66 2.708050 2.708050 257 +test 1 66 2.708050 2.708050 252 +improv 1 62 2.772589 2.772589 289 +ithaca 1 65 2.772589 2.772589 294 +januari 1 62 2.772589 2.772589 264 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +evalu 1 64 2.772589 2.772589 266 +descript 1 64 2.772589 2.772589 271 +septemb 1 65 2.772589 2.772589 274 +laboratori 1 63 2.772589 2.772589 292 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +automat 1 61 2.833213 2.833213 306 +reason 1 57 2.890372 2.890372 318 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +februari 1 54 2.944439 2.944439 328 +talk 1 53 2.944439 2.944439 336 +profession 1 51 2.995732 2.995732 345 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +move 1 47 3.091042 3.091042 382 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +cach 1 41 3.218876 3.218876 432 +combin 1 42 3.218876 3.218876 421 +annual 1 40 3.258097 3.258097 458 +societi 1 40 3.258097 3.258097 456 +map 1 39 3.258097 3.258097 452 +author 1 39 3.258097 3.258097 450 +seminar 1 38 3.295837 3.295837 470 +china 1 37 3.332205 3.332205 487 +formal 1 37 3.332205 3.332205 478 +multi 1 36 3.367296 3.367296 493 +post 1 35 3.401197 3.401197 505 +concurr 1 34 3.401197 3.401197 501 +survei 1 35 3.401197 3.401197 513 +manual 1 35 3.401197 3.401197 504 +obtain 1 33 3.433987 3.433987 534 +transform 1 32 3.465736 3.465736 542 +dissert 1 32 3.465736 3.465736 549 +scientist 1 31 3.496508 3.496508 560 +compon 1 30 3.555348 3.555348 570 +semant 1 29 3.583519 3.583519 587 +manipul 1 27 3.637586 3.637586 624 +revis 1 26 3.688879 3.688879 640 +doctor 1 24 3.761200 3.761200 709 +universityithaca 1 24 3.761200 3.761200 710 +wang 1 21 3.912023 3.912023 790 +hous 1 21 3.912023 3.912023 801 +break 1 20 3.951244 3.951244 812 +expert 1 20 3.951244 3.951244 833 +department 1 20 3.951244 3.951244 839 +boston 1 19 4.007333 4.007333 862 +beij 1 19 4.007333 4.007333 876 +partial 1 18 4.060443 4.060443 900 +germani 1 17 4.110874 4.110874 946 +young 1 16 4.174387 4.174387 991 +zhang 1 16 4.174387 4.174387 980 +partit 1 16 4.174387 4.174387 984 +attribut 1 14 4.317488 4.317488 1092 +massachusett 1 14 4.317488 4.317488 1118 +deriv 1 13 4.382027 4.382027 1145 +sigplan 1 13 4.382027 4.382027 1190 +tsinghua 1 13 4.382027 4.382027 1195 +huang 1 12 4.465908 4.465908 1202 +qualit 1 11 4.553877 4.553877 1362 +song 1 11 4.553877 4.553877 1380 +ofcomput 1 10 4.653960 4.653960 1442 +mainten 1 9 4.753590 4.753590 1543 +discov 1 9 4.753590 4.753590 1562 +intermedi 1 9 4.753590 4.753590 1497 +factor 1 9 4.753590 4.753590 1544 +congress 1 9 4.753590 4.753590 1592 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +compos 1 9 4.753590 4.753590 1527 +quantit 1 8 4.875197 4.875197 1654 +xerox 1 8 4.875197 4.875197 1725 +hallcornel 1 8 4.875197 4.875197 1757 +refere 1 7 5.010635 5.010635 1895 +uncertainti 1 7 5.010635 5.010635 1882 +teitelbaum 1 6 5.164786 5.164786 2102 +increment 1 6 5.164786 5.164786 2206 +sigact 1 6 5.164786 5.164786 2212 +usag 1 6 5.164786 5.164786 2209 +indiana 1 6 5.164786 5.164786 2057 +florida 1 5 5.347108 5.347108 2526 +peke 1 5 5.347108 5.347108 2539 +webster 1 5 5.347108 5.347108 2468 +stoller 1 4 5.568345 5.568345 2866 +petersburg 1 4 5.568345 5.568345 2989 +jolla 1 4 5.568345 5.568345 2988 +kestrel 1 4 5.568345 5.568345 2990 +dagstuhl 1 4 5.568345 5.568345 2871 +systemat 1 3 5.857933 5.857933 3781 +beach 1 3 5.857933 5.857933 3782 +schloss 1 3 5.857933 5.857933 3727 +tocomput 1 3 5.857933 5.857933 3162 +yanhong 1 2 6.263398 6.263398 5422 +anni 1 2 6.263398 6.263398 5420 +auxiliari 1 2 6.263398 6.263398 5421 +cachet 1 2 6.263398 6.263398 5419 +fuzzi 1 2 6.263398 6.263398 5423 +eduhttp 1 2 6.263398 6.263398 5424 +pageyanhong 1 1 6.957497 6.957497 12210 +forincrement 1 1 6.957497 6.957497 12211 +interactivesystem 1 1 6.957497 6.957497 12212 +systemorgan 1 1 6.957497 6.957497 12213 +talksph 1 1 6.957497 6.957497 12214 +basedsystemat 1 1 6.957497 6.957497 12215 +abstractjourn 1 1 6.957497 6.957497 12216 +inexact 1 1 6.957497 6.957497 12217 +wakayama 1 1 6.957497 6.957497 12218 +oggeb 1 1 6.957497 6.957497 12219 +basin 1 1 6.957497 6.957497 12220 +ri 1 1 6.957497 6.957497 12221 +tshinghua 1 1 6.957497 6.957497 12222 +lindlei 1 1 6.957497 6.957497 12223 +hallindiana 1 1 6.957497 6.957497 12224 +universitybloomington 1 1 6.957497 6.957497 12225 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..e2313c56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +hall 1 146 1.945910 1.945910 65 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +ithaca 1 65 2.772589 2.772589 294 +favorit 1 44 3.135494 3.135494 410 +edui 1 13 4.382027 4.382027 1193 +huang 1 12 4.465908 4.465908 1202 +cheng 1 10 4.653960 4.653960 1381 +ychuang 1 3 5.857933 5.857933 3093 +huangyi 1 1 6.957497 6.957497 12226 +documentscoursesprojectaccess 1 1 6.957497 6.957497 12227 +byvisitorslast 1 1 6.957497 6.957497 12228 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..abcaeb26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +modifi 1 178 1.609438 1.609438 35 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +person 1 111 2.197225 2.197225 117 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +imag 1 91 2.397895 2.397895 161 +school 1 84 2.484907 2.484907 188 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +novemb 1 81 2.484907 2.484907 179 +april 1 77 2.564949 2.564949 196 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +resum 1 79 2.564949 2.564949 217 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +java 1 70 2.708050 2.708050 248 +plai 1 60 2.833213 2.833213 307 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +still 1 50 3.044522 3.044522 362 +better 1 45 3.135494 3.135494 401 +music 1 42 3.218876 3.218876 436 +programm 1 39 3.258097 3.258097 445 +origin 1 38 3.295837 3.295837 472 +word 1 34 3.401197 3.401197 508 +kind 1 32 3.465736 3.465736 541 +titl 1 31 3.496508 3.496508 556 +actual 1 28 3.610918 3.610918 604 +brows 1 23 3.806662 3.806662 726 +sciencecornel 1 22 3.850148 3.850148 768 +love 1 21 3.912023 3.912023 804 +wonder 1 20 3.951244 3.951244 815 +beauti 1 18 4.060443 4.060443 912 +listen 1 18 4.060443 4.060443 907 +women 1 16 4.174387 4.174387 1004 +georg 1 16 4.174387 4.174387 994 +classic 1 14 4.317488 4.317488 1084 +came 1 13 4.382027 4.382027 1197 +forth 1 13 4.382027 4.382027 1186 +went 1 12 4.465908 4.465908 1279 +meng 1 12 4.465908 4.465908 1214 +pagewelcom 1 11 4.553877 4.553877 1344 +america 1 11 4.553877 4.553877 1370 +moment 1 11 4.553877 4.553877 1379 +virginia 1 8 4.875197 4.875197 1659 +chung 1 7 5.010635 5.010635 1964 +elementari 1 7 5.010635 5.010635 1825 +marri 1 7 5.010635 5.010635 1946 +perfect 1 7 5.010635 5.010635 1921 +sung 1 6 5.164786 5.164786 2075 +piano 1 6 5.164786 5.164786 2201 +emerg 1 6 5.164786 5.164786 2038 +everybodi 1 5 5.347108 5.347108 2517 +junior 1 5 5.347108 5.347108 2519 +korea 1 4 5.568345 5.568345 2971 +keyboard 1 4 5.568345 5.568345 2970 +moon 1 4 5.568345 5.568345 2991 +hire 1 4 5.568345 5.568345 2976 +seoul 1 3 5.857933 5.857933 3783 +forward 1 3 5.857933 5.857933 3784 +sang 1 2 6.263398 6.263398 5356 +kang 1 2 6.263398 6.263398 5360 +mason 1 2 6.263398 6.263398 4916 +korean 1 2 6.263398 6.263398 5354 +infom 1 2 6.263398 6.263398 5425 +chungyou 1 1 6.957497 6.957497 12232 +thvisitor 1 1 6.957497 6.957497 12233 +universitywher 1 1 6.957497 6.957497 12234 +kindergarten 1 1 6.957497 6.957497 12235 +myoung 1 1 6.957497 6.957497 12230 +universityin 1 1 6.957497 6.957497 12236 +happiest 1 1 6.957497 6.957497 12237 +husband 1 1 6.957497 6.957497 12231 +forsaic 1 1 6.957497 6.957497 12238 +shin 1 1 6.957497 6.957497 12239 +seung 1 1 6.957497 6.957497 12240 +hoon 1 1 6.957497 6.957497 12241 +newpap 1 1 6.957497 6.957497 12242 +hangook 1 1 6.957497 6.957497 12243 +ilbo 1 1 6.957497 6.957497 12229 +chosun 1 1 6.957497 6.957497 12244 +joongang 1 1 6.957497 6.957497 12245 +appletyoosun 1 1 6.957497 6.957497 12246 +triphamm 1 1 6.957497 6.957497 12247 +sbithaca 1 1 6.957497 6.957497 12248 +ychung 1 1 6.957497 6.957497 12249 +yooschung 1 1 6.957497 6.957497 12250 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..1b18d982 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,222 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +center 1 88 2.397895 2.397895 158 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +school 1 84 2.484907 2.484907 188 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +free 1 73 2.639057 2.639057 224 +order 1 69 2.708050 2.708050 249 +ithaca 1 65 2.772589 2.772589 294 +virtual 1 62 2.772589 2.772589 285 +plai 1 60 2.833213 2.833213 307 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +direct 1 57 2.890372 2.890372 316 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +particular 1 51 2.995732 2.995732 352 +date 1 51 2.995732 2.995732 344 +much 1 52 2.995732 2.995732 349 +run 1 51 2.995732 2.995732 347 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +effect 1 46 3.091042 3.091042 385 +better 1 45 3.135494 3.135494 401 +execut 1 45 3.135494 3.135494 404 +howev 1 41 3.218876 3.218876 422 +york 1 41 3.218876 3.218876 435 +compani 1 41 3.218876 3.218876 423 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +microsoft 1 38 3.295837 3.295837 468 +expect 1 37 3.332205 3.332205 484 +game 1 36 3.367296 3.367296 498 +download 1 36 3.367296 3.367296 489 +everi 1 34 3.401197 3.401197 519 +michael 1 35 3.401197 3.401197 514 +toler 1 33 3.433987 3.433987 533 +go 1 33 3.433987 3.433987 529 +within 1 33 3.433987 3.433987 525 +often 1 31 3.496508 3.496508 551 +someth 1 31 3.496508 3.496508 554 +computersci 1 30 3.555348 3.555348 562 +focus 1 29 3.583519 3.583519 584 +hope 1 28 3.610918 3.610918 610 +great 1 27 3.637586 3.637586 626 +though 1 27 3.637586 3.637586 622 +linux 1 27 3.637586 3.637586 631 +rule 1 26 3.688879 3.688879 638 +compar 1 26 3.688879 3.688879 648 +notic 1 25 3.737670 3.737670 675 +strategi 1 25 3.737670 3.737670 682 +other 1 24 3.761200 3.761200 697 +yahoo 1 24 3.761200 3.761200 707 +magazin 1 24 3.761200 3.761200 704 +highli 1 23 3.806662 3.806662 725 +begin 1 23 3.806662 3.806662 716 +recommend 1 22 3.850148 3.850148 737 +almost 1 22 3.850148 3.850148 742 +instal 1 22 3.850148 3.850148 754 +love 1 21 3.912023 3.912023 804 +longer 1 20 3.951244 3.951244 816 +agent 1 18 4.060443 4.060443 910 +attempt 1 17 4.110874 4.110874 917 +intro 1 17 4.110874 4.110874 915 +medic 1 17 4.110874 4.110874 958 +thought 1 17 4.110874 4.110874 945 +easi 1 16 4.174387 4.174387 969 +critic 1 16 4.174387 4.174387 982 +rate 1 15 4.248495 4.248495 1037 +save 1 14 4.317488 4.317488 1099 +front 1 13 4.382027 4.382027 1154 +wife 1 13 4.382027 4.382027 1196 +brother 1 13 4.382027 4.382027 1189 +block 1 13 4.382027 4.382027 1183 +emac 1 13 4.382027 4.382027 1143 +uniqu 1 12 4.465908 4.465908 1228 +town 1 10 4.653960 4.653960 1458 +yellow 1 9 4.753590 4.753590 1601 +seven 1 9 4.753590 4.753590 1561 +crash 1 8 4.875197 4.875197 1616 +satisfi 1 8 4.875197 4.875197 1694 +qualifi 1 8 4.875197 4.875197 1721 +contrast 1 8 4.875197 4.875197 1637 +fail 1 8 4.875197 4.875197 1655 +slightli 1 7 5.010635 5.010635 1795 +marri 1 7 5.010635 5.010635 1946 +perfect 1 7 5.010635 5.010635 1921 +bookstor 1 7 5.010635 5.010635 1837 +seen 1 6 5.164786 5.164786 2202 +impress 1 6 5.164786 5.164786 2096 +tri 1 6 5.164786 5.164786 2166 +plu 1 6 5.164786 5.164786 2004 +slate 1 6 5.164786 5.164786 2021 +suni 1 5 5.347108 5.347108 2452 +twenti 1 5 5.347108 5.347108 2540 +fairli 1 5 5.347108 5.347108 2322 +hate 1 5 5.347108 5.347108 2529 +solid 1 5 5.347108 5.347108 2255 +webpag 1 4 5.568345 5.568345 2660 +complic 1 4 5.568345 5.568345 2902 +trivial 1 4 5.568345 5.568345 2786 +closest 1 4 5.568345 5.568345 2828 +edudepart 1 3 5.857933 5.857933 3302 +syracus 1 3 5.857933 5.857933 3553 +forfault 1 3 5.857933 5.857933 3748 +outof 1 3 5.857933 5.857933 3296 +nota 1 3 5.857933 5.857933 3785 +newli 1 3 5.857933 5.857933 3786 +health 1 3 5.857933 5.857933 3787 +advertis 1 3 5.857933 5.857933 3788 +yaron 1 2 6.263398 6.263398 4122 +minski 1 2 6.263398 6.263398 4123 +veggi 1 2 6.263398 6.263398 5426 +coop 1 2 6.263398 6.263398 4213 +nowadai 1 2 6.263398 6.263398 5376 +lisa 1 2 6.263398 6.263398 5427 +theidea 1 2 6.263398 6.263398 5428 +resumesom 1 2 6.263398 6.263398 5186 +miser 1 2 6.263398 6.263398 5359 +admit 1 2 6.263398 6.263398 5429 +amazon 1 2 6.263398 6.263398 5193 +yminski 1 1 6.957497 6.957497 12252 +comstock 1 1 6.957497 6.957497 12253 +onfault 1 1 6.957497 6.957497 12254 +thetacoma 1 1 6.957497 6.957497 12255 +flapdragon 1 1 6.957497 6.957497 12251 +livether 1 1 6.957497 6.957497 12256 +anopen 1 1 6.957497 6.957497 12257 +recommendit 1 1 6.957497 6.957497 12258 +ancientchines 1 1 6.957497 6.957497 12259 +extremlysimpl 1 1 6.957497 6.957497 12260 +thannoth 1 1 6.957497 6.957497 12261 +cgoban 1 1 6.957497 6.957497 12262 +nicest 1 1 6.957497 6.957497 12263 +goboard 1 1 6.957497 6.957497 12264 +thenet 1 1 6.957497 6.957497 12265 +minutesof 1 1 6.957497 6.957497 12266 +favoritepoem 1 1 6.957497 6.957497 12267 +lafiglia 1 1 6.957497 6.957497 12268 +piang 1 1 6.957497 6.957497 12269 +advicefor 1 1 6.957497 6.957497 12270 +interestinglink 1 1 6.957497 6.957497 12271 +alarmingli 1 1 6.957497 6.957497 12272 +firefli 1 1 6.957497 6.957497 12273 +bakeri 1 1 6.957497 6.957497 12274 +bigbook 1 1 6.957497 6.957497 12275 +bigyellow 1 1 6.957497 6.957497 12276 +kinslei 1 1 6.957497 6.957497 12277 +discount 1 1 6.957497 6.957497 12278 +booksel 1 1 6.957497 6.957497 12279 +mailcrypt 1 1 6.957497 6.957497 12280 +interfacemqbtazgjohoaaaedalfhlgjmdg 1 1 6.957497 6.957497 12281 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 1 1 6.957497 6.957497 12282 +rbylf 1 1 6.957497 6.957497 12283 +zwqujcioczoecv 1 1 6.957497 6.957497 12284 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 1 1 6.957497 6.957497 12285 +gkgarsokrinnoazihja 1 1 6.957497 6.957497 12286 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 1 1 6.957497 6.957497 12287 +wumjgzsnvispwkrvzgdrojswmc 1 1 6.957497 6.957497 12288 +eigsqsb 1 1 6.957497 6.957497 12289 +bsbpw 1 1 6.957497 6.957497 12290 +jcwz 1 1 6.957497 6.957497 12291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..6fc0f3a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +introduct 1 126 2.079442 2.079442 87 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +welcom 1 122 2.079442 2.079442 99 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +info 1 85 2.484907 2.484907 176 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +resum 1 79 2.564949 2.564949 217 +html 1 75 2.639057 2.639057 235 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +semest 1 58 2.890372 2.890372 312 +format 1 48 3.044522 3.044522 356 +http 1 41 3.218876 3.218876 420 +unit 1 21 3.912023 3.912023 779 +modif 1 17 4.110874 4.110874 913 +practicum 1 16 4.174387 4.174387 960 +systemsc 1 11 4.553877 4.553877 1293 +pagecours 1 5 5.347108 5.347108 2395 +intelligencec 1 4 5.568345 5.568345 2673 +visionfal 1 2 6.263398 6.263398 4749 +eduhttp 1 2 6.263398 6.263398 5424 +yuichi 1 1 6.957497 6.957497 12292 +tsuchimoto 1 1 6.957497 6.957497 12293 +pageyuichi 1 1 6.957497 6.957497 12295 +workfal 1 1 6.957497 6.957497 12296 +engineeringspr 1 1 6.957497 6.957497 12297 +translatorsc 1 1 6.957497 6.957497 12294 +computingi 1 1 6.957497 6.957497 12298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..e9fe738a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +document 1 121 2.079442 2.079442 89 +high 1 130 2.079442 2.079442 101 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +proceed 1 93 2.397895 2.397895 152 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +build 1 85 2.484907 2.484907 184 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +messag 1 76 2.564949 2.564949 212 +sourc 1 77 2.564949 2.564949 201 +know 1 80 2.564949 2.564949 198 +appear 1 78 2.564949 2.564949 210 +good 1 77 2.564949 2.564949 200 +interfac 1 79 2.564949 2.564949 209 +optim 1 79 2.564949 2.564949 197 +state 1 76 2.564949 2.564949 207 +dynam 1 76 2.564949 2.564949 194 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +abstract 1 62 2.772589 2.772589 276 +evalu 1 64 2.772589 2.772589 266 +share 1 59 2.833213 2.833213 304 +major 1 56 2.890372 2.890372 315 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +think 1 57 2.890372 2.890372 314 +thesi 1 57 2.890372 2.890372 327 +processor 1 54 2.944439 2.944439 335 +februari 1 54 2.944439 2.944439 328 +talk 1 53 2.944439 2.944439 336 +allow 1 53 2.944439 2.944439 333 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +without 1 50 3.044522 3.044522 370 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +california 1 46 3.091042 3.091042 388 +describ 1 45 3.135494 3.135494 400 +made 1 44 3.135494 3.135494 398 +even 1 45 3.135494 3.135494 393 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +offer 1 43 3.178054 3.178054 414 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +slide 1 38 3.295837 3.295837 467 +prototyp 1 38 3.295837 3.295837 463 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +cost 1 37 3.332205 3.332205 480 +word 1 34 3.401197 3.401197 508 +toler 1 33 3.433987 3.433987 533 +concept 1 32 3.465736 3.465736 537 +someth 1 31 3.496508 3.496508 554 +power 1 30 3.555348 3.555348 573 +compon 1 30 3.555348 3.555348 570 +exist 1 30 3.555348 3.555348 569 +rang 1 30 3.555348 3.555348 565 +releas 1 28 3.610918 3.610918 616 +packag 1 28 3.610918 3.610918 614 +intend 1 28 3.610918 3.610918 599 +pass 1 28 3.610918 3.610918 611 +cluster 1 28 3.610918 3.610918 612 +progress 1 28 3.610918 3.610918 598 +propos 1 28 3.610918 3.610918 602 +multiprocessor 1 28 3.610918 3.610918 605 +measur 1 28 3.610918 3.610918 609 +scale 1 28 3.610918 3.610918 613 +becom 1 28 3.610918 3.610918 603 +though 1 27 3.637586 3.637586 622 +altern 1 26 3.688879 3.688879 641 +compar 1 26 3.688879 3.688879 648 +challeng 1 26 3.688879 3.688879 653 +berkelei 1 26 3.688879 3.688879 657 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +higher 1 24 3.761200 3.761200 690 +demonstr 1 24 3.761200 3.761200 694 +magazin 1 24 3.761200 3.761200 704 +flow 1 24 3.761200 3.761200 700 +instal 1 22 3.850148 3.850148 754 +reduc 1 22 3.850148 3.850148 759 +defin 1 22 3.850148 3.850148 746 +varieti 1 22 3.850148 3.850148 740 +flexibl 1 21 3.912023 3.912023 792 +portabl 1 20 3.951244 3.951244 819 +benchmark 1 19 4.007333 4.007333 859 +comparison 1 19 4.007333 4.007333 863 +lower 1 18 4.060443 4.060443 886 +layer 1 17 4.110874 4.110874 926 +interconnect 1 17 4.110874 4.110874 937 +whether 1 17 4.110874 4.110874 918 +protect 1 17 4.110874 4.110874 935 +outlin 1 17 4.110874 4.110874 914 +latenc 1 16 4.174387 4.174387 993 +commerci 1 16 4.174387 4.174387 1005 +across 1 16 4.174387 4.174387 974 +overhead 1 15 4.248495 4.248495 1035 +micro 1 15 4.248495 4.248495 1031 +driven 1 15 4.248495 4.248495 1048 +trip 1 14 4.317488 4.317488 1113 +split 1 14 4.317488 4.317488 1078 +achiev 1 14 4.317488 4.317488 1088 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +block 1 13 4.382027 4.382027 1183 +directli 1 13 4.382027 4.382027 1141 +signific 1 13 4.382027 4.382027 1125 +earlier 1 13 4.382027 4.382027 1140 +carri 1 13 4.382027 4.382027 1152 +coordin 1 13 4.382027 4.382027 1182 +introduc 1 13 4.382027 4.382027 1139 +onth 1 12 4.465908 4.465908 1218 +characterist 1 12 4.465908 4.465908 1257 +buffer 1 12 4.465908 4.465908 1211 +chri 1 11 4.553877 4.553877 1311 +bandwidth 1 11 4.553877 4.553877 1365 +primit 1 11 4.553877 4.553877 1317 +underli 1 10 4.653960 4.653960 1410 +equip 1 10 4.653960 4.653960 1459 +equival 1 9 4.753590 4.753590 1496 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +transmiss 1 9 4.753590 4.753590 1588 +significantli 1 9 4.753590 4.753590 1508 +desir 1 9 4.753590 4.753590 1542 +spec 1 8 4.875197 4.875197 1640 +readm 1 8 4.875197 4.875197 1699 +chao 1 8 4.875197 4.875197 1753 +round 1 8 4.875197 4.875197 1769 +ring 1 8 4.875197 4.875197 1684 +vineet 1 8 4.875197 4.875197 1639 +andcomput 1 8 4.875197 4.875197 1623 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +poor 1 8 4.875197 4.875197 1736 +pittsburgh 1 7 5.010635 5.010635 1938 +larger 1 7 5.010635 5.010635 1875 +suffici 1 7 5.010635 5.010635 1897 +freeli 1 6 5.164786 5.164786 2014 +lack 1 6 5.164786 5.164786 1994 +affect 1 6 5.164786 5.164786 2044 +goldstein 1 6 5.164786 5.164786 2168 +phase 1 6 5.164786 5.164786 1977 +older 1 5 5.347108 5.347108 2387 +buch 1 5 5.347108 5.347108 2272 +culler 1 5 5.347108 5.347108 2381 +symp 1 5 5.347108 5.347108 2376 +australia 1 5 5.347108 5.347108 2478 +conform 1 4 5.568345 5.568345 2941 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +theus 1 4 5.568345 5.568345 2992 +throughput 1 4 5.568345 5.568345 2993 +andevalu 1 4 5.568345 5.568345 2706 +asymptot 1 4 5.568345 5.568345 2676 +basu 1 4 5.568345 5.568345 2843 +forparallel 1 4 5.568345 5.568345 2703 +neta 1 3 5.857933 5.857933 3789 +thegener 1 3 5.857933 5.857933 3648 +moreinform 1 3 5.857933 5.857933 3307 +let 1 3 5.857933 5.857933 3790 +mpp 1 3 5.857933 5.857933 3194 +avula 1 3 5.857933 5.857933 3600 +abridg 1 3 5.857933 5.857933 3772 +magnitud 1 3 5.857933 5.857933 3582 +roughli 1 3 5.857933 5.857933 3097 +schauser 1 3 5.857933 5.857933 3599 +tremend 1 3 5.857933 5.857933 3453 +thegam 1 2 6.263398 6.263398 5430 +differencebetween 1 2 6.263398 6.263398 5431 +pleaseclick 1 2 6.263398 6.263398 5432 +messageslow 1 2 6.263398 6.263398 5040 +meiko 1 2 6.263398 6.263398 4996 +focuseson 1 2 6.263398 6.263398 5433 +veena 1 2 6.263398 6.263398 5000 +thecommun 1 2 6.263398 6.263398 4928 +thesetechniqu 1 2 6.263398 6.263398 4263 +thenetwork 1 2 6.263398 6.263398 5434 +incommun 1 2 6.263398 6.263398 4349 +microsecond 1 2 6.263398 6.263398 5435 +tominim 1 2 6.263398 6.263398 5436 +unnecessarili 1 2 6.263398 6.263398 4688 +mainstream 1 2 6.263398 6.263398 5437 +contactthorsten 1 2 6.263398 6.263398 5438 +messagescornel 1 1 6.957497 6.957497 12301 +implementationsact 1 1 6.957497 6.957497 12302 +codereleas 1 1 6.957497 6.957497 12303 +instructionson 1 1 6.957497 6.957497 12304 +releasenot 1 1 6.957497 6.957497 12305 +fileto 1 1 6.957497 6.957497 12306 +currentvers 1 1 6.957497 6.957497 12307 +libmpci 1 1 6.957497 6.957497 12308 +thedistribut 1 1 6.957497 6.957497 12309 +fordetail 1 1 6.957497 6.957497 12310 +briefnot 1 1 6.957497 6.957497 12311 +ibmrisc 1 1 6.957497 6.957497 12312 +hawblitzel 1 1 6.957497 6.957497 12313 +ieeesupercomput 1 1 6.957497 6.957497 12314 +spiteof 1 1 6.957497 6.957497 12315 +scommun 1 1 6.957497 6.957497 12316 +inferior 1 1 6.957497 6.957497 12317 +tmccm 1 1 6.957497 6.957497 12318 +standardmessag 1 1 6.957497 6.957497 12319 +tooffer 1 1 6.957497 6.957497 12320 +activemessag 1 1 6.957497 6.957497 12299 +networkadapt 1 1 6.957497 6.957497 12321 +yieldsa 1 1 6.957497 6.957497 12322 +secondpart 1 1 6.957497 6.957497 12300 +communicationsubstr 1 1 6.957497 6.957497 12323 +cbenchmark 1 1 6.957497 6.957497 12324 +lowmessag 1 1 6.957497 6.957497 12325 +compens 1 1 6.957497 6.957497 12326 +networklat 1 1 6.957497 6.957497 12327 +availablempich 1 1 6.957497 6.957497 12328 +implementationbenchmark 1 1 6.957497 6.957497 12329 +firmwar 1 1 6.957497 6.957497 12330 +butdo 1 1 6.957497 6.957497 12331 +assumefamiliar 1 1 6.957497 6.957497 12332 +mainperform 1 1 6.957497 6.957497 12333 +timeof 1 1 6.957497 6.957497 12334 +smessag 1 1 6.957497 6.957497 12335 +theu 1 1 6.957497 6.957497 12336 +themeiko 1 1 6.957497 6.957497 12337 +thehpam 1 1 6.957497 6.957497 12338 +fddi 1 1 6.957497 6.957497 12339 +theparagon 1 1 6.957497 6.957497 12340 +thesp 1 1 6.957497 6.957497 12341 +networksus 1 1 6.957497 6.957497 12342 +anyndia 1 1 6.957497 6.957497 12343 +ascompar 1 1 6.957497 6.957497 12344 +anatm 1 1 6.957497 6.957497 12345 +systemsoftwar 1 1 6.957497 6.957497 12346 +streamcommun 1 1 6.957497 6.957497 12347 +flowcontrol 1 1 6.957497 6.957497 12348 +builtfrom 1 1 6.957497 6.957497 12349 +artmultiprocessor 1 1 6.957497 6.957497 12350 +systemcoordin 1 1 6.957497 6.957497 12351 +andrequir 1 1 6.957497 6.957497 12352 +clusterinterconnect 1 1 6.957497 6.957497 12353 +showappl 1 1 6.957497 6.957497 12354 +smallmessag 1 1 6.957497 6.957497 12355 +messagesimplement 1 1 6.957497 6.957497 12356 +abstractth 1 1 6.957497 6.957497 12357 +overlapcomput 1 1 6.957497 6.957497 12358 +sacrificingprocessor 1 1 6.957497 6.957497 12359 +passingmultiprocessor 1 1 6.957497 6.957497 12360 +researchprototyp 1 1 6.957497 6.957497 12361 +communicationoverhead 1 1 6.957497 6.957497 12362 +simplecommun 1 1 6.957497 6.957497 12363 +isintrins 1 1 6.957497 6.957497 12364 +thehardwar 1 1 6.957497 6.957497 12365 +ncube 1 1 6.957497 6.957497 12366 +memoryextens 1 1 6.957497 6.957497 12367 +messagesar 1 1 6.957497 6.957497 12368 +forwhich 1 1 6.957497 6.957497 12369 +hardwaresupport 1 1 6.957497 6.957497 12370 +ofenhanc 1 1 6.957497 6.957497 12371 +efficientcommun 1 1 6.957497 6.957497 12372 +sitesact 1 1 6.957497 6.957497 12373 +messagesin 1 1 6.957497 6.957497 12374 +projectfor 1 1 6.957497 6.957497 12375 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..981b2f7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +commun 1 95 2.397895 2.397895 157 +environ 1 84 2.484907 2.484907 177 +wide 1 84 2.484907 2.484907 185 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +resourc 1 81 2.484907 2.484907 172 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +creat 1 63 2.772589 2.772589 277 +plan 1 65 2.772589 2.772589 272 +dept 1 64 2.772589 2.772589 291 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +variou 1 56 2.890372 2.890372 317 +approach 1 48 3.044522 3.044522 366 +featur 1 46 3.091042 3.091042 386 +electron 1 47 3.091042 3.091042 379 +possibl 1 47 3.091042 3.091042 378 +offer 1 43 3.178054 3.178054 414 +combin 1 42 3.218876 3.218876 421 +might 1 41 3.218876 3.218876 426 +autom 1 41 3.218876 3.218876 434 +futur 1 41 3.218876 3.218876 427 +howev 1 41 3.218876 3.218876 422 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +expect 1 37 3.332205 3.332205 484 +multi 1 36 3.367296 3.367296 493 +manual 1 35 3.401197 3.401197 504 +next 1 34 3.401197 3.401197 517 +toler 1 33 3.433987 3.433987 533 +within 1 33 3.433987 3.433987 525 +fault 1 32 3.465736 3.465736 547 +extend 1 32 3.465736 3.465736 539 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +rang 1 30 3.555348 3.555348 565 +compon 1 30 3.555348 3.555348 570 +secur 1 30 3.555348 3.555348 577 +becom 1 28 3.610918 3.610918 603 +hope 1 28 3.610918 3.610918 610 +effort 1 26 3.688879 3.688879 652 +enhanc 1 26 3.688879 3.688879 644 +reliabl 1 25 3.737670 3.737670 674 +higher 1 24 3.761200 3.761200 690 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +varieti 1 22 3.850148 3.850148 740 +similar 1 21 3.912023 3.912023 771 +flexibl 1 21 3.912023 3.912023 792 +among 1 21 3.912023 3.912023 781 +thu 1 21 3.912023 3.912023 773 +toolkit 1 20 3.951244 3.951244 835 +media 1 19 4.007333 4.007333 861 +element 1 18 4.060443 4.060443 895 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +commerci 1 16 4.174387 4.174387 1005 +latenc 1 16 4.174387 4.174387 993 +stock 1 16 4.174387 4.174387 1007 +permit 1 16 4.174387 4.174387 962 +novel 1 15 4.248495 4.248495 1039 +capabl 1 15 4.248495 4.248495 1016 +remot 1 15 4.248495 4.248495 1041 +transit 1 15 4.248495 4.248495 1046 +action 1 15 4.248495 4.248495 1038 +horu 1 14 4.317488 4.317488 1116 +demand 1 14 4.317488 4.317488 1073 +floor 1 14 4.317488 4.317488 1070 +signific 1 13 4.382027 4.382027 1125 +coordin 1 13 4.382027 4.382027 1182 +kenneth 1 12 4.465908 4.465908 1265 +reness 1 11 4.553877 4.553877 1333 +extrem 1 11 4.553877 4.553877 1330 +market 1 11 4.553877 4.553877 1361 +branch 1 11 4.553877 4.553877 1318 +impact 1 11 4.553877 4.553877 1334 +prior 1 10 4.653960 4.653960 1438 +success 1 10 4.653960 4.653960 1390 +traffic 1 10 4.653960 4.653960 1421 +birman 1 9 4.753590 4.753590 1531 +robbert 1 9 4.753590 4.753590 1529 +telecommun 1 9 4.753590 4.753590 1565 +govern 1 9 4.753590 4.753590 1581 +occur 1 9 4.753590 4.753590 1572 +manufactur 1 8 4.875197 4.875197 1634 +illustr 1 8 4.875197 4.875197 1679 +synchroni 1 7 5.010635 5.010635 1923 +ground 1 7 5.010635 5.010635 1955 +privaci 1 6 5.164786 5.164786 2144 +benefit 1 6 5.164786 5.164786 2213 +isi 1 5 5.347108 5.347108 2443 +matur 1 5 5.347108 5.347108 2269 +isth 1 5 5.347108 5.347108 2532 +licens 1 5 5.347108 5.347108 2520 +mission 1 5 5.347108 5.347108 2465 +respond 1 5 5.347108 5.347108 2354 +substanti 1 4 5.568345 5.568345 2921 +visibl 1 4 5.568345 5.568345 2994 +naval 1 4 5.568345 5.568345 2920 +rapidli 1 4 5.568345 5.568345 2850 +militari 1 3 5.857933 5.857933 3326 +reconfigur 1 3 5.857933 5.857933 3556 +ofhoru 1 2 6.263398 6.263398 5181 +offersa 1 2 6.263398 6.263398 4071 +securityand 1 2 6.263398 6.263398 5066 +retain 1 2 6.263398 6.263398 5443 +basedcommun 1 2 6.263398 6.263398 4348 +stratu 1 2 6.263398 6.263398 5345 +isdescrib 1 2 6.263398 6.263398 5444 +groupwar 1 2 6.263398 6.263398 4857 +theatr 1 2 6.263398 6.263398 5173 +environmenthoru 1 1 6.957497 6.957497 12408 +shoru 1 1 6.957497 6.957497 12409 +reliabledistribut 1 1 6.957497 6.957497 12410 +demonstrategroupwar 1 1 6.957497 6.957497 12411 +foundto 1 1 6.957497 6.957497 12412 +synchronousprocess 1 1 6.957497 6.957497 12413 +importantresearch 1 1 6.957497 6.957497 12414 +performancer 1 1 6.957497 6.957497 12415 +calledact 1 1 6.957497 6.957497 12416 +messageswith 1 1 6.957497 6.957497 12417 +playbacksystem 1 1 6.957497 6.957497 12418 +calledcontinu 1 1 6.957497 6.957497 12419 +multimediaserv 1 1 6.957497 6.957497 12420 +telemedicin 1 1 6.957497 6.957497 12421 +videoon 1 1 6.957497 6.957497 12422 +andsecur 1 1 6.957497 6.957497 12423 +expectrapid 1 1 6.957497 6.957497 12424 +uptak 1 1 6.957497 6.957497 12425 +spana 1 1 6.957497 6.957497 12426 +financialtrad 1 1 6.957497 6.957497 12427 +factori 1 1 6.957497 6.957497 12428 +fordiscret 1 1 6.957497 6.957497 12429 +beingexplor 1 1 6.957497 6.957497 12430 +othernon 1 1 6.957497 6.957497 12431 +hiper 1 1 6.957497 6.957497 12432 +systemthat 1 1 6.957497 6.957497 12433 +aegi 1 1 6.957497 6.957497 12434 +battleradar 1 1 6.957497 6.957497 12435 +benefitfrom 1 1 6.957497 6.957497 12436 +migrateisi 1 1 6.957497 6.957497 12437 +communityin 1 1 6.957497 6.957497 12438 +agreementswith 1 1 6.957497 6.957497 12439 +subsidiari 1 1 6.957497 6.957497 12440 +mixtur 1 1 6.957497 6.957497 12441 +technologieswil 1 1 6.957497 6.957497 12442 +beseen 1 1 6.957497 6.957497 12443 +belowshow 1 1 6.957497 6.957497 12444 +andus 1 1 6.957497 6.957497 12445 +asset 1 1 6.957497 6.957497 12446 +thissort 1 1 6.957497 6.957497 12447 +utmost 1 1 6.957497 6.957497 12448 +whilealso 1 1 6.957497 6.957497 12449 +civilianand 1 1 6.957497 6.957497 12450 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..22eb2573 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +group 1 183 1.609438 1.609438 36 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +commun 1 95 2.397895 2.397895 157 +level 1 87 2.484907 2.484907 180 +build 1 85 2.484907 2.484907 184 +multimedia 1 68 2.708050 2.708050 258 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +improv 1 62 2.772589 2.772589 289 +approach 1 48 3.044522 3.044522 366 +adapt 1 46 3.091042 3.091042 387 +video 1 44 3.135494 3.135494 405 +combin 1 42 3.218876 3.218876 421 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +brian 1 38 3.295837 3.295837 466 +workstat 1 37 3.332205 3.332205 479 +secur 1 30 3.555348 3.555348 577 +platform 1 29 3.583519 3.583519 591 +cluster 1 28 3.610918 3.610918 612 +reliabl 1 25 3.737670 3.737670 674 +store 1 24 3.761200 3.761200 693 +flexibl 1 21 3.912023 3.912023 792 +fund 1 21 3.912023 3.912023 805 +toolkit 1 20 3.951244 3.951244 835 +portabl 1 20 3.951244 3.951244 819 +smith 1 20 3.951244 3.951244 820 +media 1 19 4.007333 4.007333 861 +critic 1 16 4.174387 4.174387 982 +commerci 1 16 4.174387 4.174387 1005 +horu 1 14 4.317488 4.317488 1116 +audio 1 14 4.317488 4.317488 1094 +eicken 1 13 4.382027 4.382027 1134 +primit 1 11 4.553877 4.553877 1317 +facilit 1 10 4.653960 4.653960 1412 +rapid 1 10 4.653960 4.653960 1453 +strength 1 9 4.753590 4.753590 1494 +transport 1 8 4.875197 4.875197 1672 +fromth 1 7 5.010635 5.010635 1802 +contract 1 6 5.164786 5.164786 1985 +testb 1 5 5.347108 5.347108 2456 +darpa 1 4 5.568345 5.568345 2944 +dramat 1 3 5.857933 5.857933 3239 +magnitud 1 3 5.857933 5.857933 3582 +militari 1 3 5.857933 5.857933 3326 +multimediaappl 1 3 5.857933 5.857933 3274 +todevelop 1 2 6.263398 6.263398 5448 +communicationprimit 1 2 6.263398 6.263398 5449 +thorstenvon 1 2 6.263398 6.263398 5450 +medianet 1 1 6.957497 6.957497 12468 +projectmedianet 1 1 6.957497 6.957497 12469 +protocolsth 1 1 6.957497 6.957497 12470 +communicationmak 1 1 6.957497 6.957497 12471 +foradvanc 1 1 6.957497 6.957497 12472 +includeaudio 1 1 6.957497 6.957497 12473 +technologyofficefor 1 1 6.957497 6.957497 12474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..d8af7844 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +theori 1 111 2.197225 2.197225 127 +user 1 104 2.302585 2.302585 137 +mani 1 92 2.397895 2.397895 150 +help 1 83 2.484907 2.484907 175 +main 1 67 2.708050 2.708050 256 +written 1 63 2.772589 2.772589 278 +reason 1 57 2.890372 2.890372 318 +browser 1 56 2.890372 2.890372 313 +index 1 56 2.890372 2.890372 309 +suggest 1 53 2.944439 2.944439 331 +autom 1 41 3.218876 3.218876 434 +announc 1 40 3.258097 3.258097 441 +articl 1 33 3.433987 3.433987 530 +linux 1 27 3.637586 3.637586 631 +theorem 1 21 3.912023 3.912023 786 +feedback 1 19 4.007333 4.007333 854 +nuprl 1 10 4.653960 4.653960 1402 +curiou 1 5 5.347108 5.347108 2541 +vaughn 1 1 6.957497 6.957497 12475 +askaltavista 1 1 6.957497 6.957497 12476 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..c1321f7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +follow 1 92 2.397895 2.397895 143 +section 1 94 2.397895 2.397895 149 +mani 1 92 2.397895 2.397895 150 +contain 1 81 2.484907 2.484907 174 +info 1 85 2.484907 2.484907 176 +activ 1 84 2.484907 2.484907 182 +librari 1 87 2.484907 2.484907 181 +exampl 1 77 2.564949 2.564949 195 +sourc 1 77 2.564949 2.564949 201 +messag 1 76 2.564949 2.564949 212 +want 1 79 2.564949 2.564949 199 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +addit 1 74 2.639057 2.639057 228 +main 1 67 2.708050 2.708050 256 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +previou 1 62 2.772589 2.772589 290 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +locat 1 59 2.833213 2.833213 303 +space 1 57 2.890372 2.890372 310 +variou 1 56 2.890372 2.890372 317 +local 1 55 2.944439 2.944439 334 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +found 1 53 2.944439 2.944439 337 +sampl 1 53 2.944439 2.944439 339 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +run 1 51 2.995732 2.995732 347 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +execut 1 45 3.135494 3.135494 404 +directori 1 45 3.135494 3.135494 396 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +must 1 40 3.258097 3.258097 442 +continu 1 39 3.258097 3.258097 448 +open 1 38 3.295837 3.295837 469 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +word 1 34 3.401197 3.401197 508 +built 1 29 3.583519 3.583519 592 +releas 1 28 3.610918 3.610918 616 +pass 1 28 3.610918 3.610918 611 +though 1 27 3.637586 3.637586 622 +proc 1 26 3.688879 3.688879 649 +frame 1 24 3.761200 3.761200 684 +displai 1 23 3.806662 3.806662 712 +instal 1 22 3.850148 3.850148 754 +instead 1 22 3.850148 3.850148 756 +path 1 21 3.912023 3.912023 778 +portabl 1 20 3.951244 3.951244 819 +sure 1 20 3.951244 3.951244 813 +regular 1 17 4.110874 4.110874 929 +debug 1 17 4.110874 4.110874 944 +stop 1 17 4.110874 4.110874 942 +layer 1 17 4.110874 4.110874 926 +configur 1 15 4.248495 4.248495 1012 +remot 1 15 4.248495 4.248495 1041 +overhead 1 15 4.248495 4.248495 1035 +fortran 1 15 4.248495 4.248495 1027 +command 1 14 4.317488 4.317488 1083 +split 1 14 4.317488 4.317488 1078 +matlab 1 14 4.317488 4.317488 1081 +step 1 13 4.382027 4.382027 1138 +script 1 13 4.382027 4.382027 1171 +emac 1 13 4.382027 4.382027 1143 +difficulti 1 13 4.382027 4.382027 1132 +remov 1 12 4.465908 4.465908 1225 +insid 1 12 4.465908 4.465908 1262 +characterist 1 12 4.465908 4.465908 1257 +replic 1 12 4.465908 4.465908 1231 +eight 1 11 4.553877 4.553877 1331 +node 1 11 4.553877 4.553877 1326 +statement 1 11 4.553877 4.553877 1313 +bandwidth 1 11 4.553877 4.553877 1365 +enter 1 10 4.653960 4.653960 1454 +stack 1 10 4.653960 4.653960 1389 +login 1 9 4.753590 4.753590 1550 +informationabout 1 9 4.753590 4.753590 1515 +readm 1 8 4.875197 4.875197 1699 +job 1 8 4.875197 4.875197 1702 +insert 1 8 4.875197 4.875197 1687 +round 1 8 4.875197 4.875197 1769 +hit 1 7 5.010635 5.010635 1965 +attach 1 7 5.010635 5.010635 1785 +header 1 7 5.010635 5.010635 1787 +exactli 1 7 5.010635 5.010635 1817 +usag 1 6 5.164786 5.164786 2209 +neither 1 6 5.164786 5.164786 1990 +phase 1 6 5.164786 5.164786 1977 +onto 1 6 5.164786 5.164786 2089 +proce 1 6 5.164786 5.164786 2114 +nativ 1 6 5.164786 5.164786 2192 +whichi 1 6 5.164786 5.164786 2056 +shell 1 5 5.347108 5.347108 2353 +overlap 1 5 5.347108 5.347108 2368 +theth 1 5 5.347108 5.347108 2325 +czar 1 5 5.347108 5.347108 2503 +cuc 1 4 5.568345 5.568345 2630 +arch 1 4 5.568345 5.568345 2995 +forparallel 1 4 5.568345 5.568345 2703 +hide 1 4 5.568345 5.568345 2996 +makefil 1 4 5.568345 5.568345 2662 +commonli 1 4 5.568345 5.568345 2877 +spam 1 4 5.568345 5.568345 2927 +asymptot 1 4 5.568345 5.568345 2676 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +andyou 1 3 5.857933 5.857933 3256 +haveth 1 2 6.263398 6.263398 5378 +properli 1 2 6.263398 6.263398 5454 +setenv 1 2 6.263398 6.263398 4491 +animplement 1 2 6.263398 6.263398 4931 +granita 1 1 6.957497 6.957497 12493 +granitathrough 1 1 6.957497 6.957497 12500 +asinteract 1 1 6.957497 6.957497 12501 +tcsh 1 1 6.957497 6.957497 12496 +bash 1 1 6.957497 6.957497 12497 +problemsdur 1 1 6.957497 6.957497 12502 +stufffrom 1 1 6.957497 6.957497 12503 +unam 1 1 6.957497 6.957497 12504 +manyou 1 1 6.957497 6.957497 12505 +infoexplor 1 1 6.957497 6.957497 12506 +commandsand 1 1 6.957497 6.957497 12507 +activemassag 1 1 6.957497 6.957497 12508 +peor 1 1 6.957497 6.957497 12509 +messagesor 1 1 6.957497 6.957497 12510 +homegrown 1 1 6.957497 6.957497 12511 +softwarein 1 1 6.957497 6.957497 12512 +besur 1 1 6.957497 6.957497 12513 +csplit 1 1 6.957497 6.957497 12514 +globalpoint 1 1 6.957497 6.957497 12515 +dereferenc 1 1 6.957497 6.957497 12516 +latencyof 1 1 6.957497 6.957497 12517 +bench 1 1 6.957497 6.957497 12494 +shellsshould 1 1 6.957497 6.957497 12518 +gmake 1 1 6.957497 6.957497 12498 +asact 1 1 6.957497 6.957497 12519 +scriptsloc 1 1 6.957497 6.957497 12520 +programfoo 1 1 6.957497 6.957497 12521 +foodebug 1 1 6.957497 6.957497 12522 +splitc_debug 1 1 6.957497 6.957497 12523 +aftersplitc_main 1 1 6.957497 6.957497 12524 +ongranita 1 1 6.957497 6.957497 12525 +youwant 1 1 6.957497 6.957497 12526 +am_run 1 1 6.957497 6.957497 12495 +thenattach 1 1 6.957497 6.957497 12527 +breakpoint 1 1 6.957497 6.957497 12528 +messagesact 1 1 6.957497 6.957497 12529 +layerthat 1 1 6.957497 6.957497 12530 +triplat 1 1 6.957497 6.957497 12531 +libspgam 1 1 6.957497 6.957497 12532 +aand 1 1 6.957497 6.957497 12533 +beforerun 1 1 6.957497 6.957497 12534 +runningprgm 1 1 6.957497 6.957497 12535 +mpimpi 1 1 6.957497 6.957497 12536 +popularmessag 1 1 6.957497 6.957497 12537 +mpich 1 1 6.957497 6.957497 12538 +overact 1 1 6.957497 6.957497 12539 +easiest 1 1 6.957497 6.957497 12540 +ampicc 1 1 6.957497 6.957497 12499 +fooyou 1 1 6.957497 6.957497 12541 +lookat 1 1 6.957497 6.957497 12542 +examplesin 1 1 6.957497 6.957497 12543 +ampi 1 1 6.957497 6.957497 12544 +likeordinari 1 1 6.957497 6.957497 12545 +softwaresoftwar 1 1 6.957497 6.957497 12546 +xpdbx 1 1 6.957497 6.957497 12547 +bison 1 1 6.957497 6.957497 12548 +problemsif 1 1 6.957497 6.957497 12549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..7bed21cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +advanc 1 99 2.302585 2.302585 130 +peopl 1 96 2.302585 2.302585 132 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +method 1 80 2.564949 2.564949 213 +simul 1 66 2.708050 2.708050 255 +creat 1 63 2.772589 2.772589 277 +overview 1 56 2.890372 2.890372 323 +direct 1 57 2.890372 2.890372 316 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +numer 1 49 3.044522 3.044522 369 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +discuss 1 45 3.135494 3.135494 399 +paul 1 38 3.295837 3.295837 471 +collabor 1 32 3.465736 3.465736 543 +transform 1 32 3.465736 3.465736 542 +richard 1 31 3.496508 3.496508 559 +compon 1 30 3.555348 3.555348 570 +semant 1 29 3.583519 3.583519 587 +propos 1 28 3.610918 3.610918 602 +effort 1 26 3.688879 3.688879 652 +defin 1 22 3.850148 3.850148 746 +geometri 1 22 3.850148 3.850148 752 +reduc 1 22 3.850148 3.850148 759 +qualiti 1 20 3.951244 3.951244 832 +longer 1 20 3.951244 3.951244 816 +particularli 1 19 4.007333 4.007333 867 +geometr 1 19 4.007333 4.007333 852 +exercis 1 19 4.007333 4.007333 842 +brief 1 16 4.174387 4.174387 1001 +topolog 1 14 4.317488 4.317488 1089 +mesh 1 11 4.553877 4.553877 1351 +guarante 1 10 4.653960 4.653960 1391 +rais 1 8 4.875197 4.875197 1711 +manufactur 1 8 4.875197 4.875197 1634 +thegoal 1 6 5.164786 5.164786 2033 +proce 1 6 5.164786 5.164786 2114 +synthes 1 5 5.347108 5.347108 2451 +weyl 1 4 5.568345 5.568345 2854 +substrat 1 4 5.568345 5.568345 2857 +nist 1 4 5.568345 5.568345 2973 +zippel 1 4 5.568345 5.568345 2879 +theus 1 4 5.568345 5.568345 2992 +rick 1 4 5.568345 5.568345 2646 +enorm 1 3 5.857933 5.857933 3431 +chew 1 3 5.857933 5.857933 3618 +expend 1 2 6.263398 6.263398 5451 +scientificsoftwar 1 2 6.263398 6.263398 5038 +andform 1 2 6.263398 6.263398 4274 +levelprogram 1 2 6.263398 6.263398 5452 +insystem 1 2 6.263398 6.263398 4172 +ideason 1 2 6.263398 6.263398 4469 +microstorag 1 2 6.263398 6.263398 4887 +palmer 1 2 6.263398 6.263398 5453 +oncomplex 1 1 6.957497 6.957497 12478 +simlab 1 1 6.957497 6.957497 12477 +bringingtogeth 1 1 6.957497 6.957497 12479 +symbolicmathemat 1 1 6.957497 6.957497 12480 +levelat 1 1 6.957497 6.957497 12481 +softwarepackag 1 1 6.957497 6.957497 12482 +microstoragearchitectur 1 1 6.957497 6.957497 12483 +computeralgebra 1 1 6.957497 6.957497 12484 +thechain 1 1 6.957497 6.957497 12485 +thearpa 1 1 6.957497 6.957497 12486 +madefast 1 1 6.957497 6.957497 12487 +ofnon 1 1 6.957497 6.957497 12488 +contemporan 1 1 6.957497 6.957497 12489 +chainsprogram 1 1 6.957497 6.957497 12490 +complextopolog 1 1 6.957497 6.957497 12491 +numericalalgorithm 1 1 6.957497 6.957497 12492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..bf01aee9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +contact 1 153 1.791759 1.791759 59 +code 1 108 2.197225 2.197225 116 +memori 1 101 2.302585 2.302585 139 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +sourc 1 77 2.564949 2.564949 201 +share 1 59 2.833213 2.833213 304 +releas 1 28 3.610918 3.610918 616 +multiprocessor 1 28 3.610918 3.610918 605 +berkelei 1 26 3.688879 3.688879 657 +supercomput 1 25 3.737670 3.737670 681 +prepar 1 20 3.951244 3.951244 824 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +chao 1 8 4.875197 4.875197 1753 +goldstein 1 6 5.164786 5.164786 2168 +culler 1 5 5.347108 5.347108 2381 +spam 1 4 5.568345 5.568345 2927 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +neta 1 3 5.857933 5.857933 3789 +dusseau 1 3 5.857933 5.857933 3382 +yelick 1 3 5.857933 5.857933 3374 +thorstenvon 1 2 6.263398 6.263398 5450 +multiprocessorsa 1 2 6.263398 6.263398 5455 +krishnamurthi 1 2 6.263398 6.263398 5408 +lumetta 1 2 6.263398 6.263398 5409 +contactthorsten 1 2 6.263398 6.263398 5438 +ccornel 1 1 6.957497 6.957497 12550 +implementationssplit 1 1 6.957497 6.957497 12551 +isimpl 1 1 6.957497 6.957497 12552 +messagesfor 1 1 6.957497 6.957497 12553 +ofsplit 1 1 6.957497 6.957497 12554 +distr 1 1 6.957497 6.957497 12555 +implementedon 1 1 6.957497 6.957497 12556 +contactchi 1 1 6.957497 6.957497 12557 +runningsolari 1 1 6.957497 6.957497 12558 +mattwelsh 1 1 6.957497 6.957497 12559 +cparallel 1 1 6.957497 6.957497 12560 +abstractproject 1 1 6.957497 6.957497 12561 +sitessplit 1 1 6.957497 6.957497 12562 +chome 1 1 6.957497 6.957497 12563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..b61312f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +cornel 1 215 1.386294 1.386294 23 +second 1 81 2.484907 2.484907 166 +html 1 75 2.639057 2.639057 235 +browser 1 56 2.890372 2.890372 313 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +default 1 5 5.347108 5.347108 2335 +redirect 1 1 6.957497 6.957497 12564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..97652a5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,221 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +professor 1 137 1.945910 1.945910 76 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +pleas 1 113 2.197225 2.197225 114 +intern 1 108 2.197225 2.197225 128 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +assist 1 112 2.197225 2.197225 113 +technic 1 100 2.302585 2.302585 140 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +server 1 76 2.564949 2.564949 204 +dynam 1 76 2.564949 2.564949 194 +involv 1 71 2.639057 2.639057 227 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +intellig 1 72 2.639057 2.639057 225 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +laboratori 1 63 2.772589 2.772589 292 +ithaca 1 65 2.772589 2.772589 294 +prof 1 64 2.772589 2.772589 273 +complex 1 64 2.772589 2.772589 269 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +abstract 1 62 2.772589 2.772589 276 +locat 1 59 2.833213 2.833213 303 +automat 1 61 2.833213 2.833213 306 +type 1 61 2.833213 2.833213 296 +direct 1 57 2.890372 2.890372 316 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +three 1 54 2.944439 2.944439 330 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +approach 1 48 3.044522 3.044522 366 +done 1 47 3.091042 3.091042 381 +move 1 47 3.091042 3.091042 382 +video 1 44 3.135494 3.135494 405 +discuss 1 45 3.135494 3.135494 399 +mechan 1 43 3.178054 3.178054 416 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +submit 1 39 3.258097 3.258097 440 +author 1 39 3.258097 3.258097 450 +microsoft 1 38 3.295837 3.295837 468 +robot 1 36 3.367296 3.367296 497 +global 1 34 3.401197 3.401197 520 +tech 1 35 3.401197 3.401197 515 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +titl 1 31 3.496508 3.496508 556 +hard 1 30 3.555348 3.555348 563 +full 1 28 3.610918 3.610918 615 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +hope 1 28 3.610918 3.610918 610 +pass 1 28 3.610918 3.610918 611 +manipul 1 27 3.637586 3.637586 624 +arrai 1 27 3.637586 3.637586 627 +detect 1 26 3.688879 3.688879 646 +proc 1 26 3.688879 3.688879 649 +revis 1 26 3.688879 3.688879 640 +task 1 25 3.737670 3.737670 678 +motion 1 24 3.761200 3.761200 699 +frame 1 24 3.761200 3.761200 684 +greg 1 24 3.761200 3.761200 695 +thank 1 23 3.806662 3.806662 721 +mobil 1 23 3.806662 3.806662 730 +supervis 1 20 3.951244 3.951244 840 +break 1 20 3.951244 3.951244 812 +mpeg 1 20 3.951244 3.951244 831 +scheme 1 20 3.951244 3.951244 818 +boston 1 19 4.007333 4.007333 862 +scott 1 18 4.060443 4.060443 884 +miller 1 17 4.110874 4.110874 949 +match 1 16 4.174387 4.174387 965 +diego 1 16 4.174387 4.174387 992 +micro 1 15 4.248495 4.248495 1031 +configur 1 15 4.248495 4.248495 1012 +massiv 1 15 4.248495 4.248495 1026 +scene 1 14 4.317488 4.317488 1114 +split 1 14 4.317488 4.317488 1078 +anonym 1 14 4.317488 4.317488 1100 +canada 1 13 4.382027 4.382027 1158 +daniel 1 12 4.465908 4.465908 1233 +bruce 1 12 4.465908 4.465908 1226 +franc 1 12 4.465908 4.465908 1276 +tour 1 11 4.553877 4.553877 1307 +peter 1 11 4.553877 4.553877 1316 +keyword 1 11 4.553877 4.553877 1356 +donald 1 9 4.753590 4.753590 1510 +transmiss 1 9 4.753590 4.753590 1588 +juan 1 9 4.753590 4.753590 1580 +wilson 1 9 4.753590 4.753590 1536 +potenti 1 8 4.875197 4.875197 1690 +invari 1 8 4.875197 4.875197 1748 +japan 1 8 4.875197 4.875197 1762 +siggraph 1 8 4.875197 4.875197 1773 +ramin 1 7 5.010635 5.010635 1820 +justin 1 7 5.010635 5.010635 1789 +sensor 1 7 5.010635 5.010635 1920 +huttenloch 1 6 5.164786 5.164786 1983 +zabih 1 6 5.164786 5.164786 2138 +actuat 1 5 5.347108 5.347108 2442 +solid 1 5 5.347108 5.347108 2255 +symmetr 1 4 5.568345 5.568345 2908 +publicationsth 1 4 5.568345 5.568345 2859 +triangul 1 4 5.568345 5.568345 2903 +csrvl 1 3 5.857933 5.857933 3543 +jen 1 3 5.857933 5.857933 3378 +microfabr 1 3 5.857933 5.857933 3610 +bhringer 1 3 5.857933 5.857933 3606 +ryan 1 3 5.857933 5.857933 3679 +michel 1 3 5.857933 5.857933 3791 +szewczyk 1 3 5.857933 5.857933 3108 +voskuhl 1 3 5.857933 5.857933 3109 +matt 1 3 5.857933 5.857933 3792 +laboratorywelcom 1 2 6.263398 6.263398 5439 +electro 1 2 6.263398 6.263398 5014 +mem 1 2 6.263398 6.263398 5007 +andclassif 1 2 6.263398 6.263398 5390 +windowsnt 1 2 6.263398 6.263398 5440 +ree 1 2 6.263398 6.263398 5009 +automationnic 1 2 6.263398 6.263398 5019 +homolog 1 2 6.263398 6.263398 5441 +andj 1 2 6.263398 6.263398 5020 +latomb 1 2 6.263398 6.263398 5021 +brigg 1 2 6.263398 6.263398 5008 +actuatorarrai 1 2 6.263398 6.263398 5017 +mihailovich 1 2 6.263398 6.263398 5018 +macdonald 1 2 6.263398 6.263398 5006 +hing 1 2 6.263398 6.263398 5442 +montral 1 2 6.263398 6.263398 5394 +cytacki 1 2 6.263398 6.263398 5330 +aaron 1 2 6.263398 6.263398 4438 +csrvlcornel 1 1 6.957497 6.957497 12376 +nich 1 1 6.957497 6.957497 12377 +rrentli 1 1 6.957497 6.957497 12378 +ofresearch 1 1 6.957497 6.957497 12379 +pictor 1 1 6.957497 6.957497 12380 +projectsth 1 1 6.957497 6.957497 12381 +byramin 1 1 6.957497 6.957497 12382 +allowingscen 1 1 6.957497 6.957497 12383 +onplatform 1 1 6.957497 6.957497 12384 +nynet 1 1 6.957497 6.957497 12385 +foru 1 1 6.957497 6.957497 12386 +sproject 1 1 6.957497 6.957497 12387 +currentlyconsid 1 1 6.957497 6.957497 12388 +theissuesher 1 1 6.957497 6.957497 12389 +thecsrvl 1 1 6.957497 6.957497 12390 +serverar 1 1 6.957497 6.957497 12391 +sensorless 1 1 6.957497 6.957497 12392 +oiso 1 1 6.957497 6.957497 12393 +micromechan 1 1 6.957497 6.957497 12394 +quebc 1 1 6.957497 6.957497 12395 +authorthes 1 1 6.957497 6.957497 12396 +pedro 1 1 6.957497 6.957497 12397 +felzenszwalb 1 1 6.957497 6.957497 12398 +lilien 1 1 6.957497 6.957497 12399 +maharbiz 1 1 6.957497 6.957497 12400 +scharstein 1 1 6.957497 6.957497 12401 +stump 1 1 6.957497 6.957497 12402 +fernando 1 1 6.957497 6.957497 12403 +viton 1 1 6.957497 6.957497 12404 +wayt 1 1 6.957497 6.957497 12405 +welsh 1 1 6.957497 6.957497 12406 +whelan 1 1 6.957497 6.957497 12407 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..c345f33b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +fall 1 181 1.609438 1.609438 40 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +manag 1 114 2.197225 2.197225 125 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +build 1 85 2.484907 2.484907 184 +server 1 76 2.564949 2.564949 204 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +knowledg 1 67 2.708050 2.708050 243 +collect 1 65 2.772589 2.772589 268 +locat 1 59 2.833213 2.833213 303 +overview 1 56 2.890372 2.890372 323 +found 1 53 2.944439 2.944439 337 +undergradu 1 54 2.944439 2.944439 338 +digit 1 52 2.995732 2.995732 348 +form 1 39 3.258097 3.258097 443 +john 1 33 3.433987 3.433987 532 +computersci 1 30 3.555348 3.555348 562 +determin 1 27 3.637586 3.637586 630 +consist 1 26 3.688879 3.688879 651 +util 1 21 3.912023 3.912023 774 +increas 1 20 3.951244 3.951244 829 +longer 1 20 3.951244 3.951244 816 +ever 1 19 4.007333 4.007333 872 +dean 1 14 4.317488 4.317488 1104 +captur 1 12 4.465908 4.465908 1232 +volum 1 11 4.553877 4.553877 1347 +explicit 1 9 4.753590 4.753590 1525 +extract 1 8 4.875197 4.875197 1728 +begun 1 5 5.347108 5.347108 2386 +allan 1 4 5.568345 5.568345 2849 +thisdocu 1 3 5.857933 5.857933 3336 +krafft 1 3 5.857933 5.857933 3638 +waysthat 1 2 6.263398 6.263398 5445 +tabular 1 2 6.263398 6.263398 4515 +informationag 1 2 6.263398 6.263398 5446 +hyperlink 1 2 6.263398 6.263398 5447 +oninform 1 2 6.263398 6.263398 4316 +projectinform 1 1 6.957497 6.957497 12451 +accessth 1 1 6.957497 6.957497 12452 +ofonlin 1 1 6.957497 6.957497 12453 +forhuman 1 1 6.957497 6.957497 12454 +hopcroft 1 1 6.957497 6.957497 12455 +davisin 1 1 6.957497 6.957497 12456 +researchextract 1 1 6.957497 6.957497 12457 +thestructur 1 1 6.957497 6.957497 12458 +extractinginform 1 1 6.957497 6.957497 12459 +collectionsof 1 1 6.957497 6.957497 12460 +nationwid 1 1 6.957497 6.957497 12461 +sciencetechn 1 1 6.957497 6.957497 12462 +moreaccess 1 1 6.957497 6.957497 12463 +toit 1 1 6.957497 6.957497 12464 +visitingscientist 1 1 6.957497 6.957497 12465 +jimdavi 1 1 6.957497 6.957497 12466 +jrdpublicationsjam 1 1 6.957497 6.957497 12467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..655c5baa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +develop 1 174 1.791759 1.791759 53 +peopl 1 96 2.302585 2.302585 132 +multimedia 1 68 2.708050 2.708050 258 +direct 1 57 2.890372 2.890372 316 +mission 1 5 5.347108 5.347108 2465 +zeno 1 3 5.857933 5.857933 3580 +potpourri 1 2 6.263398 6.263398 4547 +groupzeno 1 1 6.957497 6.957497 12565 +curricula 1 1 6.957497 6.957497 12566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..9d3eeb8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +control 1 82 2.484907 2.484907 164 +stuff 1 87 2.484907 2.484907 171 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +ithaca 1 65 2.772589 2.772589 294 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +januari 1 62 2.772589 2.772589 264 +back 1 60 2.833213 2.833213 297 +date 1 51 2.995732 2.995732 344 +still 1 50 3.044522 3.044522 362 +california 1 46 3.091042 3.091042 388 +video 1 44 3.135494 3.135494 405 +long 1 43 3.178054 3.178054 413 +york 1 41 3.218876 3.218876 435 +vision 1 41 3.218876 3.218876 430 +compani 1 41 3.218876 3.218876 423 +robot 1 36 3.367296 3.367296 497 +game 1 36 3.367296 3.367296 498 +actual 1 28 3.610918 3.610918 604 +quit 1 27 3.637586 3.637586 633 +challeng 1 26 3.688879 3.688879 653 +hill 1 25 3.737670 3.737670 670 +frame 1 24 3.761200 3.761200 684 +motion 1 24 3.761200 3.761200 699 +viewer 1 21 3.912023 3.912023 787 +leav 1 21 3.912023 3.912023 772 +martin 1 21 3.912023 3.912023 794 +left 1 19 4.007333 4.007333 851 +miss 1 19 4.007333 4.007333 866 +wind 1 18 4.060443 4.060443 908 +bachelor 1 17 4.110874 4.110874 957 +segment 1 17 4.110874 4.110874 931 +jose 1 16 4.174387 4.174387 976 +month 1 15 4.248495 4.248495 1025 +mayb 1 15 4.248495 4.248495 1014 +enough 1 15 4.248495 4.248495 1040 +anywai 1 15 4.248495 4.248495 1047 +decid 1 14 4.317488 4.317488 1075 +land 1 12 4.465908 4.465908 1273 +meng 1 12 4.465908 4.465908 1214 +pagewelcom 1 11 4.553877 4.553877 1344 +see 1 11 4.553877 4.553877 1337 +island 1 11 4.553877 4.553877 1345 +town 1 10 4.653960 4.653960 1458 +santa 1 10 4.653960 4.653960 1441 +babylon 1 8 4.875197 4.875197 1731 +ramin 1 7 5.010635 5.010635 1820 +rain 1 6 5.164786 5.164786 2137 +snow 1 6 5.164786 5.164786 2031 +zabih 1 6 5.164786 5.164786 2138 +corp 1 6 5.164786 5.164786 2139 +green 1 4 5.568345 5.568345 2848 +barbara 1 3 5.857933 5.857933 3380 +csrvl 1 3 5.857933 5.857933 3543 +binghamton 1 3 5.857933 5.857933 3544 +season 1 2 6.263398 6.263398 4872 +syosset 1 1 6.957497 6.957497 9497 +californialockhe 1 1 6.957497 6.957497 9498 +yorkaltera 1 1 6.957497 6.957497 9499 +californiafun 1 1 6.957497 6.957497 9500 +domainvth 1 1 6.957497 6.957497 9501 +siteoth 1 1 6.957497 6.957497 9502 +worldcareermosaictop 1 1 6.957497 6.957497 9503 +kmai 1 1 6.957497 6.957497 9504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..906c4a4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +relat 1 139 1.945910 1.945910 68 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +academ 1 82 2.484907 2.484907 178 +requir 1 81 2.484907 2.484907 167 +descript 1 64 2.772589 2.772589 271 +organ 1 65 2.772589 2.772589 265 +faculti 1 56 2.890372 2.890372 325 +overview 1 56 2.890372 2.890372 323 +talk 1 53 2.944439 2.944439 336 +visitor 1 49 3.044522 3.044522 371 +seminar 1 38 3.295837 3.295837 470 +staff 1 36 3.367296 3.367296 490 +utc 1 27 3.637586 3.637586 629 +alumni 1 21 3.912023 3.912023 807 +facil 1 20 3.951244 3.951244 814 +event 1 18 4.060443 4.060443 896 +catalog 1 10 4.653960 4.653960 1431 +upcom 1 8 4.875197 4.875197 1685 +calendar 1 8 4.875197 4.875197 1649 +recruit 1 6 5.164786 5.164786 2145 +admiss 1 4 5.568345 5.568345 2704 +pagegener 1 1 6.957497 6.957497 12567 +schedulespag 1 1 6.957497 6.957497 12568 +directoryth 1 1 6.957497 6.957497 12569 +universitywww 1 1 6.957497 6.957497 12570 +informationgrip 1 1 6.957497 6.957497 12571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..78fb9473 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +professor 1 137 1.945910 1.945910 76 +confer 1 126 2.079442 2.079442 100 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +journal 1 83 2.484907 2.484907 183 +learn 1 86 2.484907 2.484907 170 +exampl 1 77 2.564949 2.564949 195 +servic 1 72 2.639057 2.639057 236 +intellig 1 72 2.639057 2.639057 225 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +artifici 1 63 2.772589 2.772589 280 +previou 1 62 2.772589 2.772589 290 +automat 1 61 2.833213 2.833213 306 +index 1 56 2.890372 2.890372 309 +profession 1 51 2.995732 2.995732 345 +california 1 46 3.091042 3.091042 388 +third 1 43 3.178054 3.178054 412 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +societi 1 40 3.258097 3.258097 456 +award 1 34 3.401197 3.401197 523 +next 1 34 3.401197 3.401197 517 +board 1 33 3.433987 3.433987 528 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +focus 1 29 3.583519 3.583519 584 +american 1 27 3.637586 3.637586 634 +berkelei 1 26 3.688879 3.688879 657 +higher 1 24 3.761200 3.761200 690 +honor 1 23 3.806662 3.806662 729 +theorem 1 21 3.912023 3.912023 786 +citi 1 19 4.007333 4.007333 874 +prove 1 19 4.007333 4.007333 848 +researchmi 1 14 4.317488 4.317488 1119 +joint 1 13 4.382027 4.382027 1130 +lake 1 11 4.553877 4.553877 1373 +distinguish 1 11 4.553877 4.553877 1357 +utah 1 9 4.753590 4.753590 1585 +centenni 1 7 5.010635 5.010635 1967 +presid 1 6 5.164786 5.164786 2196 +heurist 1 6 5.164786 5.164786 2125 +emeritu 1 5 5.347108 5.347108 2544 +salt 1 5 5.347108 5.347108 2413 +bledso 1 4 5.568345 5.568345 2999 +presentarea 1 4 5.568345 5.568345 3026 +analog 1 4 5.568345 5.568345 2875 +truste 1 3 5.857933 5.857933 3900 +artificialintellig 1 3 5.857933 5.857933 3608 +alsointerest 1 3 5.857933 5.857933 3813 +donnel 1 2 6.263398 6.263398 5233 +mileston 1 2 6.263398 6.263398 4416 +jointconfer 1 2 6.263398 6.263398 5030 +woodrow 1 1 6.957497 6.957497 14287 +bledsoepet 1 1 6.957497 6.957497 14288 +americanmathemat 1 1 6.957497 6.957497 14289 +onartifici 1 1 6.957497 6.957497 14290 +interestautomat 1 1 6.957497 6.957497 14291 +theoremproof 1 1 6.957497 6.957497 14292 +levelplan 1 1 6.957497 6.957497 14293 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..5469a814 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +compil 1 122 2.079442 2.079442 96 +confer 1 126 2.079442 2.079442 100 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +code 1 108 2.197225 2.197225 116 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +august 1 66 2.708050 2.708050 257 +practic 1 70 2.708050 2.708050 246 +interact 1 62 2.772589 2.772589 270 +previou 1 62 2.772589 2.772589 290 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +set 1 50 3.044522 3.044522 361 +york 1 41 3.218876 3.218876 435 +societi 1 40 3.258097 3.258097 456 +multipl 1 39 3.258097 3.258097 453 +formal 1 37 3.332205 3.332205 478 +committe 1 34 3.401197 3.401197 522 +concurr 1 34 3.401197 3.401197 501 +next 1 34 3.401197 3.401197 517 +john 1 33 3.433987 3.433987 532 +board 1 33 3.433987 3.433987 528 +scientist 1 31 3.496508 3.496508 560 +computersci 1 30 3.555348 3.555348 562 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +debug 1 17 4.110874 4.110874 944 +brown 1 16 4.174387 4.174387 977 +researchmi 1 14 4.317488 4.317488 1119 +incomput 1 14 4.317488 4.317488 1096 +vice 1 9 4.753590 4.753590 1604 +unifi 1 8 4.875197 4.875197 1774 +newton 1 7 5.010635 5.010635 1824 +softwareengin 1 6 5.164786 5.164786 2162 +parallelprogram 1 5 5.347108 5.347108 2379 +jain 1 5 5.347108 5.347108 2332 +werth 1 4 5.568345 5.568345 3004 +andimplement 1 4 5.568345 5.568345 3029 +hyder 1 4 5.568345 5.568345 2772 +interestparallel 1 3 5.857933 5.857933 3806 +publicationss 1 2 6.263398 6.263398 5732 +thirteenth 1 2 6.263398 6.263398 5733 +werthsenior 1 1 6.957497 6.957497 14294 +emori 1 1 6.957497 6.957497 14295 +accredit 1 1 6.957497 6.957497 14296 +sobek 1 1 6.957497 6.957497 14297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..cd735cce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +member 1 84 2.484907 2.484907 165 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +colleg 1 61 2.833213 2.833213 300 +back 1 60 2.833213 2.833213 297 +index 1 56 2.890372 2.890372 309 +faculti 1 56 2.890372 2.890372 325 +algebra 1 45 3.135494 3.135494 394 +multi 1 36 3.367296 3.367296 493 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +strategi 1 25 3.737670 3.737670 682 +researchmi 1 14 4.317488 4.317488 1119 +fromindividu 1 12 4.465908 4.465908 1290 +oxford 1 6 5.164786 5.164786 2121 +regent 1 5 5.347108 5.347108 2551 +emeritu 1 5 5.347108 5.347108 2544 +england 1 5 5.347108 5.347108 2557 +alfr 1 4 5.568345 5.568345 2882 +dale 1 4 5.568345 5.568345 2687 +crow 1 3 5.857933 5.857933 3845 +trammel 1 2 6.263398 6.263398 5562 +andmap 1 2 6.263398 6.263398 4258 +daleno 1 1 6.957497 6.957497 14298 +exet 1 1 6.957497 6.957497 14299 +interestdatabas 1 1 6.957497 6.957497 14300 +stagei 1 1 6.957497 6.957497 14301 +studiedinclud 1 1 6.957497 6.957497 14302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..c2ba9b03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +mathemat 1 108 2.197225 2.197225 123 +teach 1 108 2.197225 2.197225 112 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +orient 1 80 2.564949 2.564949 205 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +faculti 1 56 2.890372 2.890372 325 +visual 1 48 3.044522 3.044522 372 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +respons 1 37 3.332205 3.332205 476 +award 1 34 3.401197 3.401197 523 +concurr 1 34 3.401197 3.401197 501 +obtain 1 33 3.433987 3.433987 534 +exist 1 30 3.555348 3.555348 569 +profil 1 30 3.555348 3.555348 581 +propos 1 28 3.610918 3.610918 602 +repres 1 26 3.688879 3.688879 656 +honor 1 23 3.806662 3.806662 729 +excel 1 19 4.007333 4.007333 868 +miller 1 17 4.110874 4.110874 949 +role 1 14 4.317488 4.317488 1101 +fromindividu 1 12 4.465908 4.465908 1290 +purdu 1 10 4.653960 4.653960 1466 +queue 1 10 4.653960 4.653960 1386 +length 1 10 4.653960 4.653960 1400 +jeffrei 1 9 4.753590 4.753590 1612 +modula 1 9 4.753590 4.753590 1613 +researchi 1 8 4.875197 4.875197 1756 +inproceed 1 8 4.875197 4.875197 1670 +distributedsystem 1 6 5.164786 5.164786 2022 +ofparallel 1 5 5.347108 5.347108 2380 +bulletin 1 5 5.347108 5.347108 2343 +throughput 1 4 5.568345 5.568345 2993 +richter 1 4 5.568345 5.568345 2957 +chou 1 4 5.568345 5.568345 3033 +sigcs 1 4 5.568345 5.568345 2865 +georgia 1 3 5.857933 5.857933 3834 +publicationsj 1 3 5.857933 5.857933 3808 +shen 1 3 5.857933 5.857933 3370 +performanceof 1 2 6.263398 6.263398 4585 +modelingof 1 2 6.263398 6.263398 5734 +loui 1 2 6.263398 6.263398 5220 +brumfield 1 1 6.957497 6.957497 14303 +brumfieldsenior 1 1 6.957497 6.957497 14304 +interestperform 1 1 6.957497 6.957497 14305 +designersof 1 1 6.957497 6.957497 14306 +eachresourc 1 1 6.957497 6.957497 14307 +tasksawait 1 1 6.957497 6.957497 14308 +computationof 1 1 6.957497 6.957497 14309 +graf 1 1 6.957497 6.957497 14310 +verdi 1 1 6.957497 6.957497 14311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..4ae00fd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +journal 1 83 2.484907 2.484907 183 +solut 1 82 2.484907 2.484907 162 +member 1 84 2.484907 2.484907 165 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +appli 1 71 2.639057 2.639057 226 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +major 1 56 2.890372 2.890372 315 +faculti 1 56 2.890372 2.890372 325 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +profession 1 51 2.995732 2.995732 345 +particular 1 51 2.995732 2.995732 352 +numer 1 49 3.044522 3.044522 369 +editor 1 41 3.218876 3.218876 433 +transact 1 39 3.258097 3.258097 438 +statist 1 35 3.401197 3.401197 521 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +transform 1 32 3.465736 3.465736 542 +profil 1 30 3.555348 3.555348 581 +packag 1 28 3.610918 3.610918 614 +symbol 1 27 3.637586 3.637586 620 +methodolog 1 23 3.806662 3.806662 733 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +rout 1 21 3.912023 3.912023 793 +expert 1 20 3.951244 3.951244 833 +region 1 19 4.007333 4.007333 875 +north 1 19 4.007333 4.007333 873 +dimension 1 18 4.060443 4.060443 909 +estim 1 17 4.110874 4.110874 930 +matrix 1 17 4.110874 4.110874 933 +condit 1 16 4.174387 4.174387 975 +alan 1 13 4.382027 4.382027 1146 +emploi 1 12 4.465908 4.465908 1284 +fromindividu 1 12 4.465908 4.465908 1290 +michigan 1 11 4.553877 4.553877 1368 +node 1 11 4.553877 4.553877 1326 +editori 1 9 4.753590 4.753590 1611 +hundr 1 9 4.753590 4.753590 1528 +researchi 1 8 4.875197 4.875197 1756 +curv 1 8 4.875197 4.875197 1656 +coast 1 8 4.875197 4.875197 1746 +presenc 1 8 4.875197 4.875197 1671 +counter 1 8 4.875197 4.875197 1765 +centenni 1 7 5.010635 5.010635 1967 +southern 1 6 5.164786 5.164786 2191 +spline 1 6 5.164786 5.164786 2007 +constrain 1 6 5.164786 5.164786 2042 +fit 1 5 5.347108 5.347108 2285 +holland 1 5 5.347108 5.347108 2490 +triangul 1 4 5.568345 5.568345 2903 +closest 1 4 5.568345 5.568345 2828 +cline 1 3 5.857933 5.857933 3218 +interestmathemat 1 3 5.857933 5.857933 3860 +scatter 1 3 5.857933 5.857933 3351 +delaunai 1 3 5.857933 5.857933 3619 +imac 1 3 5.857933 5.857933 3718 +wilkinson 1 3 5.857933 5.857933 3579 +subprogram 1 2 6.263398 6.263398 5618 +andsurfac 1 2 6.263398 6.263398 5735 +publicationsr 1 2 6.263398 6.263398 5736 +king 1 2 6.263398 6.263398 5737 +meyer 1 2 6.263398 6.263398 4728 +guard 1 2 6.263398 6.263398 5738 +tender 1 2 6.263398 6.263398 5397 +stewart 1 2 6.263398 6.263398 5739 +clinedavid 1 1 6.957497 6.957497 14313 +bruton 1 1 6.957497 6.957497 14314 +statisticalcomput 1 1 6.957497 6.957497 14315 +socialrespons 1 1 6.957497 6.957497 14316 +whichcan 1 1 6.957497 6.957497 14317 +constructionof 1 1 6.957497 6.957497 14318 +formathemat 1 1 6.957497 6.957497 14319 +developmentha 1 1 6.957497 6.957497 14320 +tension 1 1 6.957497 6.957497 14321 +renka 1 1 6.957497 6.957497 14312 +buoi 1 1 6.957497 6.957497 14322 +barrier 1 1 6.957497 6.957497 14323 +moler 1 1 6.957497 6.957497 14324 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..a235e013 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +area 1 144 1.945910 1.945910 80 +mathemat 1 108 2.197225 2.197225 123 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +member 1 84 2.484907 2.484907 165 +good 1 77 2.564949 2.564949 200 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +reason 1 57 2.890372 2.890372 318 +faculti 1 56 2.890372 2.890372 325 +particular 1 51 2.995732 2.995732 352 +physic 1 47 3.091042 3.091042 377 +theoret 1 39 3.258097 3.258097 446 +correct 1 38 3.295837 3.295837 462 +award 1 34 3.401197 3.401197 523 +obtain 1 33 3.433987 3.433987 534 +power 1 30 3.555348 3.555348 573 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +art 1 29 3.583519 3.583519 593 +focus 1 29 3.583519 3.583519 584 +american 1 27 3.637586 3.637586 634 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +methodolog 1 23 3.806662 3.806662 733 +increas 1 20 3.951244 3.951244 829 +fromindividu 1 12 4.465908 4.465908 1290 +academi 1 8 4.875197 4.875197 1735 +centenni 1 7 5.010635 5.010635 1967 +foreign 1 7 5.010635 5.010635 1919 +ture 1 6 5.164786 5.164786 1997 +british 1 5 5.347108 5.347108 2546 +harri 1 4 5.568345 5.568345 3034 +queen 1 4 5.568345 5.568345 2919 +dijkstra 1 3 5.857933 5.857933 3173 +netherland 1 3 5.857933 5.857933 3650 +streamlin 1 3 5.857933 5.857933 3573 +edsger 1 2 6.263398 6.263398 5740 +honorari 1 2 6.263398 6.263398 5741 +sciencesmemb 1 2 6.263398 6.263398 5742 +royal 1 2 6.263398 6.263398 4756 +wybe 1 1 6.957497 6.957497 14325 +dijkstraschlumberg 1 1 6.957497 6.957497 14326 +sciencesprofessor 1 1 6.957497 6.957497 14327 +mathematicskandidaatsexamen 1 1 6.957497 6.957497 14328 +doctora 1 1 6.957497 6.957497 14329 +examen 1 1 6.957497 6.957497 14330 +leydenph 1 1 6.957497 6.957497 14331 +amsterdamhonor 1 1 6.957497 6.957497 14332 +awardsacm 1 1 6.957497 6.957497 14333 +sciencesdistinguish 1 1 6.957497 6.957497 14334 +societyafip 1 1 6.957497 6.957497 14335 +honori 1 1 6.957497 6.957497 14336 +causa 1 1 6.957497 6.957497 14337 +belfastarea 1 1 6.957497 6.957497 14338 +systemssummari 1 1 6.957497 6.957497 14339 +argumentso 1 1 6.957497 6.957497 14340 +ofform 1 1 6.957497 6.957497 14341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..e348986b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +educ 1 86 2.484907 2.484907 191 +larg 1 82 2.484907 2.484907 168 +internet 1 83 2.484907 2.484907 186 +member 1 84 2.484907 2.484907 165 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +protocol 1 45 3.135494 3.135494 407 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +mobil 1 23 3.806662 3.806662 730 +speed 1 18 4.060443 4.060443 911 +researchmi 1 14 4.317488 4.317488 1119 +fromindividu 1 12 4.465908 4.465908 1290 +chri 1 11 4.553877 4.553877 1311 +secretari 1 8 4.875197 4.875197 1775 +inproceed 1 8 4.875197 4.875197 1670 +sigcomm 1 5 5.347108 5.347108 2329 +gouda 1 4 5.568345 5.568345 3021 +treasur 1 3 5.857933 5.857933 3229 +andm 1 3 5.857933 5.857933 3901 +edmondson 1 2 6.263398 6.263398 4182 +yurkanan 1 2 6.263398 6.263398 4175 +interestcomput 1 2 6.263398 6.263398 5743 +yurkananlectur 1 1 6.957497 6.957497 14342 +internetwork 1 1 6.957497 6.957497 14343 +cobb 1 1 6.957497 6.957497 14344 +informaticsconfer 1 1 6.957497 6.957497 14345 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..82b460f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +list 1 201 1.609438 1.609438 39 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +confer 1 126 2.079442 2.079442 100 +techniqu 1 99 2.302585 2.302585 138 +academ 1 82 2.484907 2.484907 178 +educ 1 86 2.484907 2.484907 191 +librari 1 87 2.484907 2.484907 181 +school 1 84 2.484907 2.484907 188 +member 1 84 2.484907 2.484907 165 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +improv 1 62 2.772589 2.772589 289 +back 1 60 2.833213 2.833213 297 +faculti 1 56 2.890372 2.890372 325 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +committe 1 34 3.401197 3.401197 522 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +retriev 1 27 3.637586 3.637586 621 +women 1 16 4.174387 4.174387 1004 +researchmi 1 14 4.317488 4.317488 1119 +coordin 1 13 4.382027 4.382027 1182 +minor 1 12 4.465908 4.465908 1237 +fromindividu 1 12 4.465908 4.465908 1290 +secondari 1 7 5.010635 5.010635 1884 +recruit 1 6 5.164786 5.164786 2145 +sigcs 1 4 5.568345 5.568345 2865 +louisiana 1 3 5.857933 5.857933 3902 +suzi 1 2 6.263398 6.263398 4288 +gallagh 1 2 6.263398 6.263398 4293 +southwestern 1 2 6.263398 6.263398 5744 +interestcomput 1 2 6.263398 6.263398 5743 +gallagherlectur 1 1 6.957497 6.957497 14346 +loyola 1 1 6.957497 6.957497 14347 +necc 1 1 6.957497 6.957497 14348 +andretent 1 1 6.957497 6.957497 14349 +scienceeduc 1 1 6.957497 6.957497 14350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..1e49d5a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +integr 1 67 2.708050 2.708050 245 +guid 1 63 2.772589 2.772589 267 +januari 1 62 2.772589 2.772589 264 +back 1 60 2.833213 2.833213 297 +special 1 56 2.890372 2.890372 320 +faculti 1 56 2.890372 2.890372 325 +processor 1 54 2.944439 2.944439 335 +press 1 42 3.218876 3.218876 419 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +toler 1 33 3.433987 3.433987 533 +obtain 1 33 3.433987 3.433987 534 +fault 1 32 3.465736 3.465736 547 +profil 1 30 3.555348 3.555348 581 +focus 1 29 3.583519 3.583519 584 +scale 1 28 3.610918 3.610918 613 +measur 1 28 3.610918 3.610918 609 +multiprocessor 1 28 3.610918 3.610918 605 +repres 1 26 3.688879 3.688879 656 +strategi 1 25 3.737670 3.737670 682 +reliabl 1 25 3.737670 3.737670 674 +lead 1 23 3.806662 3.806662 718 +methodolog 1 23 3.806662 3.806662 733 +properti 1 22 3.850148 3.850148 749 +scheme 1 20 3.951244 3.951244 818 +qualiti 1 20 3.951244 3.951244 832 +failur 1 18 4.060443 4.060443 898 +appropri 1 18 4.060443 4.060443 883 +interconnect 1 17 4.110874 4.110874 937 +across 1 16 4.174387 4.174387 974 +precis 1 15 4.248495 4.248495 1023 +senior 1 14 4.317488 4.317488 1120 +researchmi 1 14 4.317488 4.317488 1119 +johnson 1 13 4.382027 4.382027 1162 +optic 1 12 4.465908 4.465908 1221 +fromindividu 1 12 4.465908 4.465908 1290 +abil 1 11 4.553877 4.553877 1341 +impact 1 11 4.553877 4.553877 1334 +success 1 10 4.653960 4.653960 1390 +devis 1 10 4.653960 4.653960 1451 +true 1 10 4.653960 4.653960 1422 +contrast 1 8 4.875197 4.875197 1637 +multicomput 1 7 5.010635 5.010635 1890 +predic 1 7 5.010635 5.010635 1806 +nest 1 6 5.164786 5.164786 2151 +chemistri 1 5 5.347108 5.347108 2405 +orlean 1 5 5.347108 5.347108 2550 +buss 1 4 5.568345 5.568345 2649 +louisiana 1 3 5.857933 5.857933 3902 +thedevelop 1 3 5.857933 5.857933 3903 +wave 1 3 5.857933 5.857933 3518 +campbel 1 3 5.857933 5.857933 3272 +laser 1 2 6.263398 6.263398 4747 +beinginvestig 1 2 6.263398 6.263398 5745 +parallelsystem 1 2 6.263398 6.263398 5746 +publicationsr 1 2 6.263398 6.263398 5736 +jenevein 1 1 6.957497 6.957497 14351 +interestinterconnect 1 1 6.957497 6.957497 14355 +interconnectionnetwork 1 1 6.957497 6.957497 14356 +restsin 1 1 6.957497 6.957497 14357 +interconnectionstructur 1 1 6.957497 6.957497 14358 +wafer 1 1 6.957497 6.957497 14352 +kindof 1 1 6.957497 6.957497 14359 +beingappli 1 1 6.957497 6.957497 14360 +communicationswitch 1 1 6.957497 6.957497 14361 +iscontinu 1 1 6.957497 6.957497 14362 +performanceport 1 1 6.957497 6.957497 14363 +tobenchmark 1 1 6.957497 6.957497 14364 +memorysystem 1 1 6.957497 6.957497 14365 +menez 1 1 6.957497 6.957497 14353 +kyklo 1 1 6.957497 6.957497 14366 +laranjeira 1 1 6.957497 6.957497 14367 +malek 1 1 6.957497 6.957497 14354 +ullah 1 1 6.957497 6.957497 14368 +metrix 1 1 6.957497 6.957497 14369 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..9faeb4a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +applic 1 170 1.791759 1.791759 56 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +center 1 88 2.397895 2.397895 158 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +complet 1 77 2.564949 2.564949 208 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +back 1 60 2.833213 2.833213 297 +space 1 57 2.890372 2.890372 310 +faculti 1 56 2.890372 2.890372 325 +california 1 46 3.091042 3.091042 388 +especi 1 36 3.367296 3.367296 496 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +valu 1 25 3.737670 3.737670 665 +martin 1 21 3.912023 3.912023 794 +concentr 1 18 4.060443 4.060443 906 +track 1 15 4.248495 4.248495 1029 +researchmi 1 14 4.317488 4.317488 1119 +classic 1 14 4.317488 4.317488 1084 +philosophi 1 13 4.382027 4.382027 1167 +deduct 1 12 4.465908 4.465908 1236 +scan 1 12 4.465908 4.465908 1243 +fromindividu 1 12 4.465908 4.465908 1290 +angel 1 8 4.875197 4.875197 1779 +closur 1 8 4.875197 4.875197 1643 +notion 1 7 5.010635 5.010635 1947 +vehicl 1 7 5.010635 5.010635 1928 +delai 1 7 5.010635 5.010635 1848 +chicago 1 6 5.164786 5.164786 2149 +emeritu 1 5 5.347108 5.347108 2544 +interestmathemat 1 3 5.857933 5.857933 3860 +andon 1 3 5.857933 5.857933 3115 +metatheori 1 3 5.857933 5.857933 3642 +norman 1 1 6.957497 6.957497 14370 +martinprofessor 1 1 6.957497 6.957497 14371 +ofphilosophi 1 1 6.957497 6.957497 14372 +asinterpret 1 1 6.957497 6.957497 14373 +whichexploit 1 1 6.957497 6.957497 14374 +intension 1 1 6.957497 6.957497 14375 +significantearli 1 1 6.957497 6.957497 14376 +missil 1 1 6.957497 6.957497 14377 +trackingalgorithm 1 1 6.957497 6.957497 14378 +radar 1 1 6.957497 6.957497 14379 +inmani 1 1 6.957497 6.957497 14380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..588f57e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +technic 1 100 2.302585 2.302585 140 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +academ 1 82 2.484907 2.484907 178 +member 1 84 2.484907 2.484907 165 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +servic 1 72 2.639057 2.639057 236 +symposium 1 72 2.639057 2.639057 238 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +addit 1 74 2.639057 2.639057 228 +goal 1 66 2.708050 2.708050 250 +foundat 1 62 2.772589 2.772589 286 +automat 1 61 2.833213 2.833213 306 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +publish 1 57 2.890372 2.890372 326 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +understand 1 47 3.091042 3.091042 384 +adapt 1 46 3.091042 3.091042 387 +mechan 1 43 3.178054 3.178054 416 +editor 1 41 3.218876 3.218876 433 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +formal 1 37 3.332205 3.332205 478 +respons 1 37 3.332205 3.332205 476 +robot 1 36 3.367296 3.367296 497 +committe 1 34 3.401197 3.401197 522 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +obtain 1 33 3.433987 3.433987 534 +hard 1 30 3.555348 3.555348 563 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +framework 1 28 3.610918 3.610918 606 +load 1 28 3.610918 3.610918 601 +constraint 1 26 3.688879 3.688879 636 +bound 1 26 3.688879 3.688879 659 +fundament 1 25 3.737670 3.737670 661 +primari 1 25 3.737670 3.737670 669 +concern 1 25 3.737670 3.737670 666 +toward 1 25 3.737670 3.737670 668 +fellow 1 24 3.761200 3.761200 701 +highli 1 23 3.806662 3.806662 725 +fund 1 21 3.912023 3.912023 805 +wang 1 21 3.912023 3.912023 790 +synthesi 1 20 3.951244 3.951244 834 +expert 1 20 3.951244 3.951244 833 +aid 1 18 4.060443 4.060443 904 +critic 1 16 4.174387 4.174387 982 +taiwan 1 16 4.174387 4.174387 1006 +brown 1 16 4.174387 4.174387 977 +massachusett 1 14 4.317488 4.317488 1118 +conduct 1 14 4.317488 4.317488 1065 +nasa 1 13 4.382027 4.382027 1188 +robust 1 12 4.465908 4.465908 1271 +asynchron 1 12 4.465908 4.465908 1229 +fromindividu 1 12 4.465908 4.465908 1290 +editori 1 9 4.753590 4.753590 1611 +vice 1 9 4.753590 4.753590 1604 +researchi 1 8 4.875197 4.875197 1756 +fifth 1 7 5.010635 5.010635 1931 +montreal 1 7 5.010635 5.010635 1961 +kluwer 1 6 5.164786 5.164786 2143 +antonio 1 6 5.164786 5.164786 2186 +ofdistribut 1 5 5.347108 5.347108 2316 +emerson 1 5 5.347108 5.347108 2547 +adjust 1 5 5.347108 5.347108 2422 +orlean 1 5 5.347108 5.347108 2550 +presentarea 1 4 5.568345 5.568345 3026 +avion 1 4 5.568345 5.568345 3018 +melbourn 1 4 5.568345 5.568345 3035 +sigsoft 1 4 5.568345 5.568345 3036 +aloysiu 1 3 5.857933 5.857933 3829 +systemdesign 1 2 6.263398 6.263398 4297 +stringent 1 2 6.263398 6.263398 5523 +areasinclud 1 2 6.263398 6.263398 5747 +publicationsa 1 2 6.263398 6.263398 4885 +clement 1 2 6.263398 6.263398 5526 +tsou 1 2 6.263398 6.263398 5525 +mokassoci 1 1 6.957497 6.957497 14381 +professorfaculti 1 1 6.957497 6.957497 14382 +federationof 1 1 6.957497 6.957497 14383 +interestfault 1 1 6.957497 6.957497 14384 +includespecif 1 1 6.957497 6.957497 14385 +forguarante 1 1 6.957497 6.957497 14386 +thetrad 1 1 6.957497 6.957497 14387 +criticalsystem 1 1 6.957497 6.957497 14388 +theanalysi 1 1 6.957497 6.957497 14389 +industrialprocess 1 1 6.957497 6.957497 14390 +ofnav 1 1 6.957497 6.957497 14391 +forreal 1 1 6.957497 6.957497 14392 +tilborg 1 1 6.957497 6.957497 14393 +heitmey 1 1 6.957497 6.957497 14394 +labaw 1 1 6.957497 6.957497 14395 +aptl 1 1 6.957497 6.957497 14396 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..b6759792 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +teach 1 108 2.197225 2.197225 112 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +state 1 76 2.564949 2.564949 207 +orient 1 80 2.564949 2.564949 205 +appli 1 71 2.639057 2.639057 226 +addit 1 74 2.639057 2.639057 228 +function 1 62 2.772589 2.772589 275 +back 1 60 2.833213 2.833213 297 +reason 1 57 2.890372 2.890372 318 +faculti 1 56 2.890372 2.890372 325 +undergradu 1 54 2.944439 2.944439 338 +maintain 1 51 2.995732 2.995732 342 +physic 1 47 3.091042 3.091042 377 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +editor 1 41 3.218876 3.218876 433 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +obtain 1 33 3.433987 3.433987 534 +richard 1 31 3.496508 3.496508 559 +profil 1 30 3.555348 3.555348 581 +seri 1 24 3.761200 3.761200 708 +longer 1 20 3.951244 3.951244 816 +stand 1 18 4.060443 4.060443 891 +stanford 1 17 4.110874 4.110874 955 +weslei 1 16 4.174387 4.174387 983 +permit 1 16 4.174387 4.174387 962 +senior 1 14 4.317488 4.317488 1120 +addison 1 12 4.465908 4.465908 1230 +fromindividu 1 12 4.465908 4.465908 1290 +suitabl 1 9 4.753590 4.753590 1486 +harvard 1 7 5.010635 5.010635 1926 +iowa 1 7 5.010635 5.010635 1971 +implementationof 1 7 5.010635 5.010635 1813 +microcomput 1 3 5.857933 5.857933 3444 +hamilton 1 2 6.263398 6.263398 5719 +collegem 1 2 6.263398 6.263398 5563 +astronaut 1 2 6.263398 6.263398 5748 +universityph 1 2 6.263398 6.263398 5604 +lecturerb 1 1 6.957497 6.957497 14397 +aero 1 1 6.957497 6.957497 14398 +universityprofession 1 1 6.957497 6.957497 14399 +servicecoordin 1 1 6.957497 6.957497 14400 +vol 1 1 6.957497 6.957497 14401 +educationsummari 1 1 6.957497 6.957497 14402 +potentialfor 1 1 6.957497 6.957497 14403 +infal 1 1 6.957497 6.957497 14404 +sectionof 1 1 6.957497 6.957497 14405 +onfunct 1 1 6.957497 6.957497 14406 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..f1fca91b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +larg 1 82 2.484907 2.484907 168 +member 1 84 2.484907 2.484907 165 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +nation 1 74 2.639057 2.639057 240 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +knowledg 1 67 2.708050 2.708050 243 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +foundat 1 62 2.772589 2.772589 286 +organ 1 65 2.772589 2.772589 265 +septemb 1 65 2.772589 2.772589 274 +back 1 60 2.833213 2.833213 297 +special 1 56 2.890372 2.890372 320 +faculti 1 56 2.890372 2.890372 325 +processor 1 54 2.944439 2.944439 335 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +societi 1 40 3.258097 3.258097 456 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +map 1 39 3.258097 3.258097 452 +movi 1 40 3.258097 3.258097 459 +cost 1 37 3.332205 3.332205 480 +robot 1 36 3.367296 3.367296 497 +multi 1 36 3.367296 3.367296 493 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +concurr 1 34 3.401197 3.401197 501 +obtain 1 33 3.433987 3.433987 534 +storag 1 31 3.496508 3.496508 553 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +framework 1 28 3.610918 3.610918 606 +retriev 1 27 3.637586 3.637586 621 +divis 1 21 3.912023 3.912023 803 +sigmod 1 19 4.007333 4.007333 877 +media 1 19 4.007333 4.007333 861 +concentr 1 18 4.060443 4.060443 906 +demand 1 14 4.317488 4.317488 1073 +resolut 1 13 4.382027 4.382027 1172 +fromindividu 1 12 4.465908 4.465908 1290 +invit 1 10 4.653960 4.653960 1428 +conferenceon 1 9 4.753590 4.753590 1595 +databasesystem 1 8 4.875197 4.875197 1617 +silberschatz 1 6 5.164786 5.164786 1978 +outstand 1 6 5.164786 5.164786 2136 +advisori 1 6 5.164786 5.164786 2148 +sigact 1 6 5.164786 5.164786 2212 +symposiumon 1 6 5.164786 5.164786 2054 +distributedsystem 1 6 5.164786 5.164786 2022 +internationalconfer 1 6 5.164786 5.164786 2051 +seventh 1 5 5.347108 5.347108 2464 +multiresolut 1 5 5.347108 5.347108 2423 +fussel 1 5 5.347108 5.347108 2300 +abraham 1 4 5.568345 5.568345 2644 +ullman 1 4 5.568345 5.568345 2749 +stoni 1 3 5.857933 5.857933 3571 +sudarshan 1 3 5.857933 5.857933 3885 +eighth 1 2 6.263398 6.263398 5750 +publicationss 1 2 6.263398 6.263398 5732 +knowledgeand 1 2 6.263398 6.263398 4366 +ozden 1 2 6.263398 6.263398 5749 +onveri 1 2 6.263398 6.263398 4367 +silberschatzprofessorship 1 1 6.957497 6.957497 14409 +sciencesm 1 1 6.957497 6.957497 14410 +brookhonor 1 1 6.957497 6.957497 14411 +serviceiee 1 1 6.957497 6.957497 14412 +pod 1 1 6.957497 6.957497 14408 +futureof 1 1 6.957497 6.957497 14413 +basedsystemssummari 1 1 6.957497 6.957497 14414 +recentresearch 1 1 6.957497 6.957497 14415 +multidatabas 1 1 6.957497 6.957497 14416 +transactionmanag 1 1 6.957497 6.957497 14417 +ganguli 1 1 6.957497 6.957497 14418 +tsur 1 1 6.957497 6.957497 14419 +datalog 1 1 6.957497 6.957497 14420 +programexecut 1 1 6.957497 6.957497 14421 +jagadish 1 1 6.957497 6.957497 14422 +lieuwen 1 1 6.957497 6.957497 14423 +rastogi 1 1 6.957497 6.957497 14407 +dali 1 1 6.957497 6.957497 14424 +biliri 1 1 6.957497 6.957497 14425 +storageserv 1 1 6.957497 6.957497 14426 +storageand 1 1 6.957497 6.957497 14427 +relationaldata 1 1 6.957497 6.957497 14428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..bde9f239 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +list 1 201 1.609438 1.609438 39 +professor 1 137 1.945910 1.945910 76 +novemb 1 81 2.484907 2.484907 179 +back 1 60 2.833213 2.833213 297 +faculti 1 56 2.890372 2.890372 325 +robert 1 30 3.555348 3.555348 567 +profil 1 30 3.555348 3.555348 581 +rememb 1 12 4.465908 4.465908 1217 +centenni 1 7 5.010635 5.010635 1967 +emeritu 1 5 5.347108 5.347108 2544 +bledso 1 4 5.568345 5.568345 2999 +simmon 1 2 6.263398 6.263398 5460 +simmonsquinci 1 1 6.957497 6.957497 14429 +professoremeritu 1 1 6.957497 6.957497 14430 +psychologymai 1 1 6.957497 6.957497 14431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..c43af4d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +austin 1 168 1.791759 1.791759 63 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +check 1 115 2.197225 2.197225 118 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +new 1 64 2.772589 2.772589 262 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +think 1 57 2.890372 2.890372 314 +undergradu 1 54 2.944439 2.944439 338 +could 1 46 3.091042 3.091042 383 +semant 1 29 3.583519 3.583519 587 +progress 1 28 3.610918 3.610918 598 +rule 1 26 3.688879 3.688879 638 +altern 1 26 3.688879 3.688879 641 +yahoo 1 24 3.761200 3.761200 707 +log 1 19 4.007333 4.007333 857 +adam 1 17 4.110874 4.110874 934 +happi 1 14 4.317488 4.317488 1079 +touch 1 12 4.465908 4.465908 1288 +claim 1 8 4.875197 4.875197 1664 +core 1 7 5.010635 5.010635 1809 +gzip 1 6 5.164786 5.164786 2117 +gradual 1 4 5.568345 5.568345 2997 +vrml 1 4 5.568345 5.568345 2949 +aweekli 1 3 5.857933 5.857933 3312 +fame 1 3 5.857933 5.857933 3793 +knowwhat 1 2 6.263398 6.263398 5456 +junki 1 2 6.263398 6.263398 5457 +nando 1 2 6.263398 6.263398 5458 +seligman 1 1 6.957497 6.957497 12572 +pageadam 1 1 6.957497 6.957497 12573 +specifiedth 1 1 6.957497 6.957497 12574 +fileor 1 1 6.957497 6.957497 12575 +pagemart 1 1 6.957497 6.957497 12576 +fromreut 1 1 6.957497 6.957497 12577 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..3ce10f87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +machin 1 129 2.079442 2.079442 95 +specif 1 106 2.197225 2.197225 106 +learn 1 86 2.484907 2.484907 170 +colleg 1 61 2.833213 2.833213 300 +california 1 46 3.091042 3.091042 388 +eduphon 1 15 4.248495 4.248495 1060 +hopefulli 1 14 4.317488 4.317488 1071 +station 1 13 4.382027 4.382027 1157 +acquisit 1 10 4.653960 4.653960 1465 +santa 1 10 4.653960 4.653960 1441 +commonsens 1 4 5.568345 5.568345 2998 +barbara 1 3 5.857933 5.857933 3380 +chill 1 2 6.263398 6.263398 4244 +agapito 1 1 6.957497 6.957497 12578 +sustaita 1 1 6.957497 6.957497 12579 +austincognit 1 1 6.957497 6.957497 12580 +connection 1 1 6.957497 6.957497 12581 +reasoningschoolingph 1 1 6.957497 6.957497 12582 +miscellaneouspost 1 1 6.957497 6.957497 12583 +addressth 1 1 6.957497 6.957497 12584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..c1e555d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +intellig 1 72 2.639057 2.639057 225 +nation 1 74 2.639057 2.639057 240 +logic 1 71 2.639057 2.639057 230 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +new 1 64 2.772589 2.772589 262 +foundat 1 62 2.772589 2.772589 286 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +faculti 1 56 2.890372 2.890372 325 +reason 1 57 2.890372 2.890372 318 +physic 1 47 3.091042 3.091042 377 +directori 1 45 3.135494 3.135494 396 +autom 1 41 3.218876 3.218876 434 +close 1 38 3.295837 3.295837 465 +multi 1 36 3.367296 3.367296 493 +robert 1 30 3.555348 3.555348 567 +neural 1 30 3.555348 3.555348 578 +rule 1 26 3.688879 3.688879 638 +lab 1 24 3.761200 3.761200 698 +theorem 1 21 3.912023 3.912023 786 +fund 1 21 3.912023 3.912023 805 +histori 1 19 4.007333 4.007333 853 +excel 1 19 4.007333 4.007333 868 +prove 1 19 4.007333 4.007333 848 +atth 1 15 4.248495 4.248495 1019 +action 1 15 4.248495 4.248495 1038 +rank 1 14 4.317488 4.317488 1063 +bruce 1 12 4.465908 4.465908 1226 +distinguish 1 11 4.553877 4.553877 1357 +benjamin 1 11 4.553877 4.553877 1296 +qualit 1 11 4.553877 4.553877 1362 +vladimir 1 11 4.553877 4.553877 1324 +peter 1 11 4.553877 4.553877 1316 +risto 1 9 4.753590 4.753590 1523 +moonei 1 9 4.753590 4.753590 1520 +novak 1 9 4.753590 4.753590 1521 +miikkulainen 1 8 4.875197 4.875197 1667 +aaai 1 8 4.875197 4.875197 1750 +presidenti 1 8 4.875197 4.875197 1737 +postdoc 1 8 4.875197 4.875197 1724 +philosoph 1 7 5.010635 5.010635 1904 +boyer 1 6 5.164786 5.164786 2013 +gordon 1 6 5.164786 5.164786 2032 +dream 1 6 5.164786 5.164786 2165 +lifschitz 1 5 5.347108 5.347108 2542 +mirank 1 5 5.347108 5.347108 2543 +porter 1 5 5.347108 5.347108 2293 +emeritu 1 5 5.347108 5.347108 2544 +bledso 1 4 5.568345 5.568345 2999 +clark 1 4 5.568345 5.568345 2705 +kuiper 1 3 5.857933 5.857933 3794 +souther 1 3 5.857933 5.857933 3795 +woodi 1 2 6.263398 6.263398 5459 +simmon 1 2 6.263398 6.263398 5460 +laboratoryut 1 1 6.957497 6.957497 12585 +laboratoryth 1 1 6.957497 6.957497 12586 +austinha 1 1 6.957497 6.957497 12587 +andgradu 1 1 6.957497 6.957497 12588 +causei 1 1 6.957497 6.957497 12589 +deceas 1 1 6.957497 6.957497 12590 +memoriam 1 1 6.957497 6.957497 12591 +porterpoint 1 1 6.957497 6.957497 12592 +agenciescontact 1 1 6.957497 6.957497 12593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..61be9ef8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +want 1 79 2.564949 2.564949 199 +main 1 67 2.708050 2.708050 256 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +advisor 1 51 2.995732 2.995732 355 +john 1 33 3.433987 3.433987 532 +postal 1 30 3.555348 3.555348 580 +constraint 1 26 3.688879 3.688879 636 +translat 1 13 4.382027 4.382027 1164 +usavoic 1 13 4.382027 4.382027 1198 +hello 1 10 4.653960 4.653960 1407 +candid 1 9 4.753590 4.753590 1606 +routin 1 9 4.753590 4.753590 1549 +ajita 1 2 6.263398 6.263398 5461 +papersmi 1 2 6.263398 6.263398 5462 +johnajita 1 1 6.957497 6.957497 12594 +programmingframework 1 1 6.957497 6.957497 12595 +parallelprocedur 1 1 6.957497 6.957497 12596 +brownemi 1 1 6.957497 6.957497 12597 +ajohn 1 1 6.957497 6.957497 12598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..f384965a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +pictur 1 89 2.397895 2.397895 160 +associ 1 93 2.397895 2.397895 151 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +institut 1 84 2.484907 2.484907 187 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +integr 1 67 2.708050 2.708050 245 +main 1 67 2.708050 2.708050 256 +evalu 1 64 2.772589 2.772589 266 +organ 1 65 2.772589 2.772589 265 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +maintain 1 51 2.995732 2.995732 342 +profession 1 51 2.995732 2.995732 345 +understand 1 47 3.091042 3.091042 384 +electron 1 47 3.091042 3.091042 379 +electr 1 38 3.295837 3.295837 461 +formal 1 37 3.332205 3.332205 478 +field 1 37 3.332205 3.332205 482 +connect 1 37 3.332205 3.332205 485 +scientist 1 31 3.496508 3.496508 560 +computersci 1 30 3.555348 3.555348 562 +travel 1 30 3.555348 3.555348 579 +limit 1 29 3.583519 3.583519 585 +utc 1 27 3.637586 3.637586 629 +other 1 24 3.761200 3.761200 697 +alwai 1 24 3.761200 3.761200 691 +leav 1 21 3.912023 3.912023 772 +particularli 1 19 4.007333 4.007333 867 +excel 1 19 4.007333 4.007333 868 +encourag 1 18 4.060443 4.060443 880 +social 1 13 4.382027 4.382027 1123 +jump 1 9 4.753590 4.753590 1603 +sweden 1 7 5.010635 5.010635 1885 +interestedin 1 5 5.347108 5.347108 2260 +craft 1 5 5.347108 5.347108 2412 +camp 1 5 5.347108 5.347108 2545 +suffer 1 5 5.347108 5.347108 2268 +novic 1 4 5.568345 5.568345 2815 +mentor 1 4 5.568345 5.568345 2591 +sigcs 1 4 5.568345 5.568345 2865 +vicki 1 3 5.857933 5.857933 3187 +mathematicallog 1 3 5.857933 5.857933 3796 +belong 1 3 5.857933 5.857933 3797 +almstrum 1 2 6.263398 6.263398 4165 +woodwork 1 2 6.263398 6.263398 5463 +spurt 1 2 6.263398 6.263398 5464 +plenti 1 2 6.263398 6.263398 5465 +almstrumabout 1 1 6.957497 6.957497 12600 +doctoralresearch 1 1 6.957497 6.957497 12601 +ispent 1 1 6.957497 6.957497 12602 +uppsala 1 1 6.957497 6.957497 12599 +pagether 1 1 6.957497 6.957497 12603 +garden 1 1 6.957497 6.957497 12604 +sew 1 1 6.957497 6.957497 12605 +hubbi 1 1 6.957497 6.957497 12606 +torgni 1 1 6.957497 6.957497 12607 +stadler 1 1 6.957497 6.957497 12608 +itics 1 1 6.957497 6.957497 12609 +educationjun 1 1 6.957497 6.957497 12610 +swedenoth 1 1 6.957497 6.957497 12611 +frenzi 1 1 6.957497 6.957497 12612 +educationsigsoft 1 1 6.957497 6.957497 12613 +engineeringacm 1 1 6.957497 6.957497 12614 +machineryieeeth 1 1 6.957497 6.957497 12615 +engineerscpsrcomput 1 1 6.957497 6.957497 12616 +responsibilityconnect 1 1 6.957497 6.957497 12617 +elsewhereto 1 1 6.957497 6.957497 12618 +seldom 1 1 6.957497 6.957497 12619 +forewarn 1 1 6.957497 6.957497 12620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..eba166dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +hour 1 165 1.791759 1.791759 46 +compil 1 122 2.079442 2.079442 96 +mondai 1 77 2.564949 2.564949 206 +wednesdai 1 64 2.772589 2.772589 261 +anthoni 1 4 5.568345 5.568345 2792 +hung 1 3 5.857933 5.857933 3524 +pang 1 3 5.857933 5.857933 3509 +hing 1 2 6.263398 6.263398 5442 +pagehung 1 1 6.957497 6.957497 12621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..e236ef30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +studi 1 120 2.079442 2.079442 91 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +taylor 1 63 2.772589 2.772589 287 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +sciencesunivers 1 37 3.332205 3.332205 486 +bachelor 1 17 4.110874 4.110874 957 +aruna 1 1 6.957497 6.957497 12622 +addalacurr 1 1 6.957497 6.957497 12623 +studentth 1 1 6.957497 6.957497 12624 +sciencess 1 1 6.957497 6.957497 12625 +engineeringmysorework 1 1 6.957497 6.957497 12626 +mysoreindiai 1 1 6.957497 6.957497 12627 +mysor 1 1 6.957497 6.957497 12628 +cityindiato 1 1 6.957497 6.957497 12629 +eduvoic 1 1 6.957497 6.957497 12630 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..b63bc5b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +pleas 1 113 2.197225 2.197225 114 +main 1 67 2.708050 2.708050 256 +get 1 46 3.091042 3.091042 380 +exist 1 30 3.555348 3.555348 569 +known 1 24 3.761200 3.761200 702 +usavoic 1 13 4.382027 4.382027 1198 +mepost 1 10 4.653960 4.653960 1472 +round 1 8 4.875197 4.875197 1769 +let 1 3 5.857933 5.857933 3790 +patienc 1 2 6.263398 6.263398 5466 +ashi 1 1 6.957497 6.957497 12631 +tarafdarashi 1 1 6.957497 6.957497 12632 +tarafdarabout 1 1 6.957497 6.957497 12633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..c1a91792 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +activ 1 84 2.484907 2.484907 182 +complet 1 77 2.564949 2.564949 208 +line 1 75 2.639057 2.639057 231 +solv 1 73 2.639057 2.639057 234 +prof 1 64 2.772589 2.772589 273 +dept 1 64 2.772589 2.772589 291 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +date 1 51 2.995732 2.995732 344 +advisor 1 51 2.995732 2.995732 355 +electr 1 38 3.295837 3.295837 461 +expect 1 37 3.332205 3.332205 484 +within 1 33 3.433987 3.433987 525 +queri 1 33 3.433987 3.433987 524 +hard 1 30 3.555348 3.555348 563 +campu 1 27 3.637586 3.637586 623 +mine 1 26 3.688879 3.688879 654 +constraint 1 26 3.688879 3.688879 636 +expert 1 20 3.951244 3.951244 833 +toolkit 1 20 3.951244 3.951244 835 +histori 1 19 4.007333 4.007333 853 +along 1 18 4.060443 4.060443 878 +coordin 1 13 4.382027 4.382027 1182 +daniel 1 12 4.465908 4.465908 1233 +instanc 1 11 4.553877 4.553877 1322 +candid 1 9 4.753590 4.753590 1606 +mirank 1 5 5.347108 5.347108 2543 +roberto 1 2 6.263398 6.263398 5468 +bayardo 1 2 6.263398 6.263398 5467 +exception 1 2 6.263398 6.263398 4467 +pageroberto 1 1 6.957497 6.957497 12634 +infosleuth 1 1 6.957497 6.957497 12635 +satisfactionmi 1 1 6.957497 6.957497 12636 +generatingand 1 1 6.957497 6.957497 12637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..a0feb839 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +abstract 1 62 2.772589 2.772589 276 +taylor 1 63 2.772589 2.772589 287 +reason 1 57 2.890372 2.890372 318 +physic 1 47 3.091042 3.091042 377 +past 1 42 3.218876 3.218876 428 +ofth 1 36 3.367296 3.367296 491 +dissert 1 32 3.465736 3.465736 549 +retriev 1 27 3.637586 3.637586 621 +behavior 1 18 4.060443 4.060443 881 +month 1 15 4.248495 4.248495 1025 +refin 1 11 4.553877 4.553877 1363 +entitl 1 9 4.753590 4.753590 1490 +drink 1 9 4.753590 4.753590 1607 +informationemail 1 9 4.753590 4.753590 1564 +overviewof 1 2 6.263398 6.263398 5469 +bert 1 1 6.957497 6.957497 12638 +kayresearch 1 1 6.957497 6.957497 12640 +imprecis 1 1 6.957497 6.957497 12639 +vitami 1 1 6.957497 6.957497 12641 +stuffsonia 1 1 6.957497 6.957497 12642 +andnina 1 1 6.957497 6.957497 12643 +springbank 1 1 6.957497 6.957497 12644 +scotchdrinksof 1 1 6.957497 6.957497 12645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..df7e1d26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +mathemat 1 108 2.197225 2.197225 123 +manag 1 114 2.197225 2.197225 125 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +logic 1 71 2.639057 2.639057 230 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +semest 1 58 2.890372 2.890372 312 +visitor 1 49 3.044522 3.044522 371 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +vladimir 1 11 4.553877 4.553877 1324 +harrick 1 7 5.010635 5.010635 1849 +lifschitz 1 5 5.347108 5.347108 2542 +coursesc 1 4 5.568345 5.568345 2692 +vinc 1 2 6.263398 6.263398 5414 +bhanu 1 1 6.957497 6.957497 12646 +homepagethi 1 1 6.957497 6.957497 12647 +akhil 1 1 6.957497 6.957497 12648 +reddythank 1 1 6.957497 6.957497 12649 +austinm 1 1 6.957497 6.957497 12650 +datacommun 1 1 6.957497 6.957497 12651 +anitish 1 1 6.957497 6.957497 12652 +barua 1 1 6.957497 6.957497 12653 +schwetmani 1 1 6.957497 6.957497 12654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..3b7f2085 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,229 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +read 1 154 1.791759 1.791759 47 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +construct 1 139 1.945910 1.945910 82 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +final 1 116 2.197225 2.197225 108 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +pictur 1 89 2.397895 2.397895 160 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +level 1 87 2.484907 2.484907 180 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +complet 1 77 2.564949 2.564949 208 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +written 1 63 2.772589 2.772589 278 +colleg 1 61 2.833213 2.833213 300 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +telephon 1 50 3.044522 3.044522 373 +physic 1 47 3.091042 3.091042 377 +quarter 1 47 3.091042 3.091042 389 +made 1 44 3.135494 3.135494 398 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +probabl 1 40 3.258097 3.258097 455 +error 1 40 3.258097 3.258097 449 +correct 1 38 3.295837 3.295837 462 +feel 1 37 3.332205 3.332205 483 +expect 1 37 3.332205 3.332205 484 +connect 1 37 3.332205 3.332205 485 +soon 1 36 3.367296 3.367296 494 +michael 1 35 3.401197 3.401197 514 +return 1 34 3.401197 3.401197 502 +either 1 35 3.401197 3.401197 506 +articl 1 33 3.433987 3.433987 530 +human 1 32 3.465736 3.465736 546 +taken 1 31 3.496508 3.496508 555 +hard 1 30 3.555348 3.555348 563 +postal 1 30 3.555348 3.555348 580 +depend 1 29 3.583519 3.583519 583 +built 1 29 3.583519 3.583519 592 +quot 1 29 3.583519 3.583519 582 +intend 1 28 3.610918 3.610918 599 +ask 1 28 3.610918 3.610918 597 +symbol 1 27 3.637586 3.637586 620 +altern 1 26 3.688879 3.688879 641 +although 1 25 3.737670 3.737670 667 +accur 1 25 3.737670 3.737670 680 +sometim 1 24 3.761200 3.761200 696 +frame 1 24 3.761200 3.761200 684 +interpret 1 24 3.761200 3.761200 686 +decis 1 23 3.806662 3.806662 728 +lead 1 23 3.806662 3.806662 718 +almost 1 22 3.850148 3.850148 742 +util 1 21 3.912023 3.912023 774 +theorem 1 21 3.912023 3.912023 786 +prepar 1 20 3.951244 3.951244 824 +assum 1 19 4.007333 4.007333 845 +accept 1 18 4.060443 4.060443 879 +account 1 18 4.060443 4.060443 882 +estim 1 17 4.110874 4.110874 930 +differenti 1 17 4.110874 4.110874 921 +choic 1 16 4.174387 4.174387 979 +transfer 1 16 4.174387 4.174387 967 +cognit 1 16 4.174387 4.174387 986 +psycholog 1 15 4.248495 4.248495 1054 +mayb 1 15 4.248495 4.248495 1014 +manner 1 14 4.317488 4.317488 1074 +save 1 14 4.317488 4.317488 1099 +balanc 1 14 4.317488 4.317488 1112 +stori 1 14 4.317488 4.317488 1087 +econom 1 13 4.382027 4.382027 1184 +rememb 1 12 4.465908 4.465908 1217 +sound 1 9 4.753590 4.753590 1605 +unusu 1 9 4.753590 4.753590 1566 +exact 1 9 4.753590 4.753590 1509 +charg 1 9 4.753590 4.753590 1582 +risk 1 8 4.875197 4.875197 1689 +formul 1 8 4.875197 4.875197 1733 +brain 1 8 4.875197 4.875197 1638 +wire 1 8 4.875197 4.875197 1747 +simpli 1 8 4.875197 4.875197 1626 +prover 1 8 4.875197 4.875197 1653 +insert 1 8 4.875197 4.875197 1687 +filter 1 8 4.875197 4.875197 1641 +maxim 1 7 5.010635 5.010635 1944 +explain 1 7 5.010635 5.010635 1816 +prevent 1 7 5.010635 5.010635 1827 +gave 1 7 5.010635 5.010635 1922 +remind 1 7 5.010635 5.010635 1799 +consequ 1 6 5.164786 5.164786 1989 +biolog 1 6 5.164786 5.164786 2147 +wrong 1 6 5.164786 5.164786 2025 +mistak 1 6 5.164786 5.164786 2110 +postcard 1 6 5.164786 5.164786 2181 +promis 1 6 5.164786 5.164786 2037 +hidden 1 6 5.164786 5.164786 1987 +quantum 1 6 5.164786 5.164786 2214 +amherst 1 5 5.347108 5.347108 2484 +fair 1 5 5.347108 5.347108 2333 +respond 1 5 5.347108 5.347108 2354 +adopt 1 5 5.347108 5.347108 2467 +favor 1 5 5.347108 5.347108 2414 +ahead 1 5 5.347108 5.347108 2338 +puzzl 1 5 5.347108 5.347108 2507 +stupid 1 5 5.347108 5.347108 2489 +kill 1 4 5.568345 5.568345 3000 +analog 1 4 5.568345 5.568345 2875 +fire 1 4 5.568345 5.568345 3001 +hypothesi 1 4 5.568345 5.568345 2650 +suppos 1 4 5.568345 5.568345 3002 +coin 1 3 5.857933 5.857933 3799 +diseas 1 3 5.857933 5.857933 3635 +neuron 1 3 5.857933 5.857933 3798 +scream 1 3 5.857933 5.857933 3609 +wasn 1 3 5.857933 5.857933 3800 +incorrect 1 3 5.857933 5.857933 3134 +cogsci 1 2 6.263398 6.263398 4798 +advert 1 2 6.263398 6.263398 5201 +toss 1 2 6.263398 6.263398 5470 +reject 1 2 6.263398 6.263398 5418 +belov 1 2 6.263398 6.263398 5073 +imagin 1 2 6.263398 6.263398 5472 +combat 1 2 6.263398 6.263398 5473 +nobodi 1 2 6.263398 6.263398 5474 +voltag 1 2 6.263398 6.263398 5475 +invalid 1 2 6.263398 6.263398 5476 +append 1 2 6.263398 6.263398 4295 +informationthi 1 2 6.263398 6.263398 5477 +empti 1 2 6.263398 6.263398 5478 +bogu 1 2 6.263398 6.263398 5471 +bogomolnymichael 1 1 6.957497 6.957497 12659 +bogomolni 1 1 6.957497 6.957497 12660 +interestsnot 1 1 6.957497 6.957497 12661 +jenef 1 1 6.957497 6.957497 12662 +husman 1 1 6.957497 6.957497 12663 +avers 1 1 6.957497 6.957497 12656 +outcom 1 1 6.957497 6.957497 12657 +bet 1 1 6.957497 6.957497 12664 +diminish 1 1 6.957497 6.957497 12665 +tverski 1 1 6.957497 6.957497 12666 +kahneman 1 1 6.957497 6.957497 12667 +verbatimfrom 1 1 6.957497 6.957497 12668 +outbreak 1 1 6.957497 6.957497 12669 +beenpropos 1 1 6.957497 6.957497 12670 +programsar 1 1 6.957497 6.957497 12671 +besav 1 1 6.957497 6.957497 12672 +digitalif 1 1 6.957497 6.957497 12673 +electrochem 1 1 6.957497 6.957497 12674 +axon 1 1 6.957497 6.957497 12675 +shaki 1 1 6.957497 6.957497 12676 +inaccur 1 1 6.957497 6.957497 12677 +subtract 1 1 6.957497 6.957497 12678 +checkbook 1 1 6.957497 6.957497 12679 +nevertheless 1 1 6.957497 6.957497 12680 +misfir 1 1 6.957497 6.957497 12681 +italic 1 1 6.957497 6.957497 12682 +researchcognit 1 1 6.957497 6.957497 12683 +sciencearitifici 1 1 6.957497 6.957497 12684 +intelligencemathemat 1 1 6.957497 6.957497 12685 +logictopolog 1 1 6.957497 6.957497 12686 +ghrist 1 1 6.957497 6.957497 12687 +bogo 1 1 6.957497 6.957497 12658 +wilshir 1 1 6.957497 6.957497 12688 +parkwai 1 1 6.957497 6.957497 12689 +talentsdefinit 1 1 6.957497 6.957497 12690 +bogodynamicsdefinit 1 1 6.957497 6.957497 12691 +sortwhil 1 1 6.957497 6.957497 12692 +bogos 1 1 6.957497 6.957497 12693 +bogomet 1 1 6.957497 6.957497 12694 +bogon 1 1 6.957497 6.957497 12655 +flux 1 1 6.957497 6.957497 12695 +bogotifi 1 1 6.957497 6.957497 12696 +autobogotiphobia 1 1 6.957497 6.957497 12697 +blinkenlight 1 1 6.957497 6.957497 12698 +lasher 1 1 6.957497 6.957497 12699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..435c5720 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +mathemat 1 108 2.197225 2.197225 123 +teach 1 108 2.197225 2.197225 112 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +educ 1 86 2.484907 2.484907 191 +build 1 85 2.484907 2.484907 184 +state 1 76 2.564949 2.564949 207 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +logic 1 71 2.639057 2.639057 230 +view 1 70 2.708050 2.708050 254 +dept 1 64 2.772589 2.772589 291 +result 1 65 2.772589 2.772589 281 +polici 1 64 2.772589 2.772589 279 +share 1 59 2.833213 2.833213 304 +detail 1 57 2.890372 2.890372 321 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +found 1 53 2.944439 2.944439 337 +much 1 52 2.995732 2.995732 349 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +natur 1 44 3.135494 3.135494 406 +press 1 42 3.218876 3.218876 419 +close 1 38 3.295837 3.295837 465 +formal 1 37 3.332205 3.332205 478 +mean 1 37 3.332205 3.332205 477 +short 1 36 3.367296 3.367296 499 +john 1 33 3.433987 3.433987 532 +articl 1 33 3.433987 3.433987 530 +photo 1 31 3.496508 3.496508 561 +taken 1 31 3.496508 3.496508 555 +posit 1 31 3.496508 3.496508 552 +richard 1 31 3.496508 3.496508 559 +scientist 1 31 3.496508 3.496508 560 +robert 1 30 3.555348 3.555348 567 +univ 1 28 3.610918 3.610918 617 +pass 1 28 3.610918 3.610918 611 +symbol 1 27 3.637586 3.637586 620 +repres 1 26 3.688879 3.688879 656 +bound 1 26 3.688879 3.688879 659 +fundament 1 25 3.737670 3.737670 661 +reach 1 24 3.761200 3.761200 688 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +recommend 1 22 3.850148 3.850148 737 +properti 1 22 3.850148 3.850148 749 +divis 1 21 3.912023 3.912023 803 +theorem 1 21 3.912023 3.912023 786 +fund 1 21 3.912023 3.912023 805 +verif 1 20 3.951244 3.951244 826 +wonder 1 20 3.951244 3.951244 815 +wrote 1 20 3.951244 3.951244 830 +offici 1 18 4.060443 4.060443 894 +moor 1 17 4.110874 4.110874 936 +everyth 1 13 4.382027 4.382027 1169 +introduc 1 13 4.382027 4.382027 1139 +shape 1 12 4.465908 4.465908 1245 +stephen 1 11 4.553877 4.553877 1342 +smart 1 11 4.553877 4.553877 1352 +noth 1 11 4.553877 4.553877 1328 +peter 1 11 4.553877 4.553877 1316 +instanc 1 11 4.553877 4.553877 1322 +rice 1 11 4.553877 4.553877 1336 +death 1 10 4.653960 4.653960 1457 +float 1 9 4.753590 4.753590 1504 +end 1 9 4.753590 4.753590 1567 +govern 1 9 4.753590 4.753590 1581 +said 1 9 4.753590 4.753590 1571 +ball 1 9 4.753590 4.753590 1608 +prover 1 8 4.875197 4.875197 1653 +joke 1 8 4.875197 4.875197 1620 +pageth 1 7 5.010635 5.010635 1939 +microprocessor 1 7 5.010635 5.010635 1808 +zero 1 7 5.010635 5.010635 1896 +intellectu 1 7 5.010635 5.010635 1847 +discoveri 1 7 5.010635 5.010635 1915 +boyer 1 6 5.164786 5.164786 2013 +furthermor 1 6 5.164786 5.164786 2141 +licens 1 5 5.347108 5.347108 2520 +own 1 5 5.347108 5.347108 2531 +despit 1 5 5.347108 5.347108 2317 +vote 1 4 5.568345 5.568345 2953 +disclaim 1 4 5.568345 5.568345 2847 +oftexa 1 4 5.568345 5.568345 3003 +fire 1 4 5.568345 5.568345 3001 +subsequ 1 4 5.568345 5.568345 2665 +usaemail 1 3 5.857933 5.857933 3722 +mccune 1 3 5.857933 5.857933 3522 +enumer 1 3 5.857933 5.857933 3244 +tenur 1 3 5.857933 5.857933 3801 +shouldb 1 3 5.857933 5.857933 3673 +deutsch 1 3 5.857933 5.857933 3802 +harold 1 3 5.857933 5.857933 3803 +carbon 1 3 5.857933 5.857933 3804 +loss 1 3 5.857933 5.857933 3805 +edufax 1 2 6.263398 6.263398 5479 +knowna 1 2 6.263398 6.263398 5480 +webth 1 2 6.263398 6.263398 5481 +projectmi 1 2 6.263398 6.263398 5482 +andsom 1 2 6.263398 6.263398 5483 +thegreat 1 2 6.263398 6.263398 4987 +thereof 1 2 6.263398 6.263398 5484 +steal 1 2 6.263398 6.263398 5485 +riski 1 2 6.263398 6.263398 4291 +peano 1 2 6.263398 6.263398 4234 +rebel 1 2 6.263398 6.263398 5388 +amor 1 2 6.263398 6.263398 5486 +congeni 1 2 6.263398 6.263398 4713 +rudi 1 2 6.263398 6.263398 5487 +verg 1 2 6.263398 6.263398 5488 +atom 1 2 6.263398 6.263398 4472 +lament 1 2 6.263398 6.263398 4866 +texan 1 2 6.263398 6.263398 5489 +boyerhom 1 1 6.957497 6.957497 12700 +philosophydepart 1 1 6.957497 6.957497 12701 +austinhow 1 1 6.957497 6.957497 12702 +mepap 1 1 6.957497 6.957497 12703 +locationsclassescurriculum 1 1 6.957497 6.957497 12704 +vitaeperson 1 1 6.957497 6.957497 12705 +dataeducationpublicationshonorsjobsgradu 1 1 6.957497 6.957497 12706 +studentsth 1 1 6.957497 6.957497 12707 +nqthm 1 1 6.957497 6.957497 12708 +mccarthi 1 1 6.957497 6.957497 12709 +moffett 1 1 6.957497 6.957497 12710 +controversyni 1 1 6.957497 6.957497 12711 +robbin 1 1 6.957497 6.957497 12712 +permitsth 1 1 6.957497 6.957497 12713 +administrativeoverhead 1 1 6.957497 6.957497 12714 +howthi 1 1 6.957497 6.957497 12715 +confess 1 1 6.957497 6.957497 12716 +acanon 1 1 6.957497 6.957497 12717 +thumper 1 1 6.957497 6.957497 12718 +universitiesstandard 1 1 6.957497 6.957497 12719 +aweb 1 1 6.957497 6.957497 12720 +anind 1 1 6.957497 6.957497 12721 +endors 1 1 6.957497 6.957497 12722 +habitu 1 1 6.957497 6.957497 12723 +hislectur 1 1 6.957497 6.957497 12724 +militaryacademi 1 1 6.957497 6.957497 12725 +incens 1 1 6.957497 6.957497 12726 +hisformalist 1 1 6.957497 6.957497 12727 +hispromis 1 1 6.957497 6.957497 12728 +turin 1 1 6.957497 6.957497 12729 +sincomplet 1 1 6.957497 6.957497 12730 +rucker 1 1 6.957497 6.957497 12731 +extinct 1 1 6.957497 6.957497 12732 +kroto 1 1 6.957497 6.957497 12733 +britain 1 1 6.957497 6.957497 12734 +sussex 1 1 6.957497 6.957497 12735 +chemistrypr 1 1 6.957497 6.957497 12736 +curl 1 1 6.957497 6.957497 12737 +smallei 1 1 6.957497 6.957497 12738 +inhouston 1 1 6.957497 6.957497 12739 +asocc 1 1 6.957497 6.957497 12740 +upup 1 1 6.957497 6.957497 12741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..16c8c3ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,157 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +specif 1 106 2.197225 2.197225 106 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +chang 1 82 2.484907 2.484907 163 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +refer 1 78 2.564949 2.564949 203 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +integr 1 67 2.708050 2.708050 245 +august 1 66 2.708050 2.708050 257 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +interact 1 62 2.772589 2.772589 270 +juli 1 60 2.833213 2.833213 305 +major 1 56 2.890372 2.890372 315 +three 1 54 2.944439 2.944439 330 +visual 1 48 3.044522 3.044522 372 +physic 1 47 3.091042 3.091042 377 +california 1 46 3.091042 3.091042 388 +describ 1 45 3.135494 3.135494 400 +societi 1 40 3.258097 3.258097 456 +electr 1 38 3.295837 3.295837 461 +prototyp 1 38 3.295837 3.295837 463 +formal 1 37 3.332205 3.332205 478 +ofth 1 36 3.367296 3.367296 491 +jame 1 35 3.401197 3.401197 507 +award 1 34 3.401197 3.401197 523 +concurr 1 34 3.401197 3.401197 501 +extend 1 32 3.465736 3.465736 539 +idea 1 32 3.465736 3.465736 545 +focu 1 30 3.555348 3.555348 571 +specifi 1 30 3.555348 3.555348 568 +graph 1 30 3.555348 3.555348 576 +chair 1 29 3.583519 3.583519 596 +univ 1 28 3.610918 3.610918 617 +american 1 27 3.637586 3.637586 634 +compar 1 26 3.688879 3.688879 648 +experiment 1 26 3.688879 3.688879 645 +rule 1 26 3.688879 3.688879 638 +proc 1 26 3.688879 3.688879 649 +supercomput 1 25 3.737670 3.737670 681 +fellow 1 24 3.761200 3.761200 701 +flow 1 24 3.761200 3.761200 700 +displai 1 23 3.806662 3.806662 712 +siam 1 21 3.912023 3.912023 800 +longer 1 20 3.951244 3.951244 816 +debug 1 17 4.110874 4.110874 944 +moor 1 17 4.110874 4.110874 936 +brown 1 16 4.174387 4.174387 977 +partit 1 16 4.174387 4.174387 984 +brief 1 16 4.174387 4.174387 1001 +conf 1 13 4.382027 4.382027 1181 +evolv 1 12 4.465908 4.465908 1223 +robust 1 12 4.465908 4.465908 1271 +volum 1 11 4.553877 4.553877 1347 +broad 1 11 4.553877 4.553877 1302 +declar 1 9 4.753590 4.753590 1526 +notat 1 9 4.753590 4.753590 1489 +researchi 1 8 4.875197 4.875197 1756 +unifi 1 8 4.875197 4.875197 1774 +newton 1 7 5.010635 5.010635 1824 +henc 1 7 5.010635 5.010635 1805 +ongo 1 6 5.164786 5.164786 2215 +british 1 5 5.347108 5.347108 2546 +jain 1 5 5.347108 5.347108 2332 +mirank 1 5 5.347108 5.347108 2543 +remain 1 5 5.347108 5.347108 2278 +hyder 1 4 5.568345 5.568345 2772 +werth 1 4 5.568345 5.568345 3004 +interestparallel 1 3 5.857933 5.857933 3806 +narrow 1 3 5.857933 5.857933 3807 +publicationsj 1 3 5.857933 5.857933 3808 +baltimor 1 3 5.857933 5.857933 3809 +hendrix 1 2 6.263398 6.263398 5490 +anabstract 1 2 6.263398 6.263398 5491 +dongarra 1 2 6.263398 6.263398 5058 +brownereg 1 1 6.957497 6.957497 12742 +collegeph 1 1 6.957497 6.957497 12743 +austinhonor 1 1 6.957497 6.957497 12744 +societyarea 1 1 6.957497 6.957497 12745 +sciencewith 1 1 6.957497 6.957497 12746 +tenyear 1 1 6.957497 6.957497 12747 +computation 1 1 6.957497 6.957497 12748 +includesmethod 1 1 6.957497 6.957497 12749 +highlevel 1 1 6.957497 6.957497 12750 +throughdata 1 1 6.957497 6.957497 12751 +compositionalapproach 1 1 6.957497 6.957497 12752 +intelligenceprocess 1 1 6.957497 6.957497 12753 +fluiddynam 1 1 6.957497 6.957497 12754 +domaincompil 1 1 6.957497 6.957497 12755 +basedlanguag 1 1 6.957497 6.957497 12756 +timedecis 1 1 6.957497 6.957497 12757 +andpract 1 1 6.957497 6.957497 12758 +fourthworkshop 1 1 6.957497 6.957497 12759 +santacruz 1 1 6.957497 6.957497 12760 +theeffect 1 1 6.957497 6.957497 12761 +parallelizingcompil 1 1 6.957497 6.957497 12762 +kleyn 1 1 6.957497 6.957497 12763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..c737454a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +peopl 1 96 2.302585 2.302585 132 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +member 1 84 2.484907 2.484907 165 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +institut 1 84 2.484907 2.484907 187 +issu 1 78 2.564949 2.564949 211 +logic 1 71 2.639057 2.639057 230 +prof 1 64 2.772589 2.772589 273 +abstract 1 62 2.772589 2.772589 276 +special 1 56 2.890372 2.890372 320 +found 1 53 2.944439 2.944439 337 +electron 1 47 3.091042 3.091042 379 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +rang 1 30 3.555348 3.555348 565 +utc 1 27 3.637586 3.637586 629 +trace 1 25 3.737670 3.737670 677 +vlsi 1 21 3.912023 3.912023 795 +martin 1 21 3.912023 3.912023 794 +chen 1 21 3.912023 3.912023 791 +rout 1 21 3.912023 3.912023 793 +supervis 1 20 3.951244 3.951244 840 +synthesi 1 20 3.951244 3.951244 834 +partit 1 16 4.174387 4.174387 984 +fpga 1 10 4.653960 4.653960 1433 +placement 1 10 4.653960 4.653960 1420 +wong 1 9 4.753590 4.753590 1609 +classifi 1 9 4.753590 4.753590 1537 +chung 1 7 5.010635 5.010635 1964 +zhou 1 6 5.164786 5.164786 2092 +ping 1 4 5.568345 5.568345 2922 +ming 1 3 5.857933 5.857933 3712 +researchth 1 2 6.263398 6.263398 5492 +broadli 1 2 6.263398 6.263398 5095 +sigda 1 2 6.263398 6.263398 5493 +addressdepart 1 1 6.957497 6.957497 12765 +chenyao 1 1 6.957497 6.957497 12766 +yung 1 1 6.957497 6.957497 12767 +fang 1 1 6.957497 6.957497 12768 +shashidhar 1 1 6.957497 6.957497 12769 +thakur 1 1 6.957497 6.957497 12764 +groupcan 1 1 6.957497 6.957497 12770 +austinclick 1 1 6.957497 6.957497 12771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..5b9521cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +april 1 77 2.564949 2.564949 196 +prof 1 64 2.772589 2.772589 273 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +hardwar 1 51 2.995732 2.995732 350 +give 1 50 3.044522 3.044522 359 +cool 1 49 3.044522 3.044522 374 +done 1 47 3.091042 3.091042 381 +slide 1 38 3.295837 3.295837 467 +photo 1 31 3.496508 3.496508 561 +travel 1 30 3.555348 3.555348 579 +built 1 29 3.583519 3.583519 592 +effort 1 26 3.688879 3.688879 652 +highli 1 23 3.806662 3.806662 725 +thank 1 23 3.806662 3.806662 721 +divis 1 21 3.912023 3.912023 803 +verif 1 20 3.951244 3.951244 826 +women 1 16 4.174387 4.174387 1004 +todd 1 15 4.248495 4.248495 1051 +wife 1 13 4.382027 4.382027 1196 +land 1 12 4.465908 4.465908 1273 +speak 1 12 4.465908 4.465908 1283 +bill 1 11 4.553877 4.553877 1297 +peter 1 11 4.553877 4.553877 1316 +label 1 10 4.653960 4.653960 1423 +poetri 1 9 4.753590 4.753590 1596 +andth 1 9 4.753590 4.753590 1481 +mach 1 8 4.875197 4.875197 1669 +daughter 1 7 5.010635 5.010635 1943 +ruth 1 7 5.010635 5.010635 1870 +prioriti 1 7 5.010635 5.010635 1792 +foreign 1 7 5.010635 5.010635 1919 +beer 1 6 5.164786 5.164786 2216 +somewher 1 6 5.164786 5.164786 2176 +approv 1 6 5.164786 5.164786 2078 +allen 1 5 5.347108 5.347108 2470 +emerson 1 5 5.347108 5.347108 2547 +li 1 5 5.347108 5.347108 2500 +humor 1 5 5.347108 5.347108 2533 +substitut 1 5 5.347108 5.347108 2247 +ti 1 4 5.568345 5.568345 3005 +clair 1 4 5.568345 5.568345 2605 +enjoy 1 4 5.568345 5.568345 2937 +guangtian 1 3 5.857933 5.857933 3810 +haiku 1 3 5.857933 5.857933 3811 +cristian 1 2 6.263398 6.263398 4311 +sourcesth 1 2 6.263398 6.263398 4219 +disinform 1 2 6.263398 6.263398 5494 +dole 1 2 6.263398 6.263398 4067 +canfieldhom 1 1 6.957497 6.957497 12772 +businessmi 1 1 6.957497 6.957497 12773 +flaviu 1 1 6.957497 6.957497 12774 +ther 1 1 6.957497 6.957497 12775 +pleasuredomest 1 1 6.957497 6.957497 12776 +bliss 1 1 6.957497 6.957497 12777 +carla 1 1 6.957497 6.957497 12778 +newborn 1 1 6.957497 6.957497 12779 +parenthood 1 1 6.957497 6.957497 12780 +struck 1 1 6.957497 6.957497 12781 +peel 1 1 6.957497 6.957497 12782 +bottl 1 1 6.957497 6.957497 12783 +sofaspher 1 1 6.957497 6.957497 12784 +olestra 1 1 6.957497 6.957497 12785 +canfield 1 1 6.957497 6.957497 12786 +peterst 1 1 6.957497 6.957497 12787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..71b5ec71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +introduct 1 126 2.079442 2.079442 87 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +order 1 69 2.708050 2.708050 249 +function 1 62 2.772589 2.772589 275 +taylor 1 63 2.772589 2.772589 287 +suggest 1 53 2.944439 2.944439 331 +autom 1 41 3.218876 3.218876 434 +extend 1 32 3.465736 3.465736 539 +express 1 32 3.465736 3.465736 540 +computersci 1 30 3.555348 3.555348 562 +usual 1 28 3.610918 3.610918 608 +progress 1 28 3.610918 3.610918 598 +theorem 1 21 3.912023 3.912023 786 +supervis 1 20 3.951244 3.951244 840 +safeti 1 20 3.951244 3.951244 817 +prove 1 19 4.007333 4.007333 848 +partial 1 18 4.060443 4.060443 900 +finit 1 14 4.317488 4.317488 1106 +candid 1 9 4.753590 4.753590 1606 +misra 1 7 5.010635 5.010635 1856 +jayadev 1 4 5.568345 5.568345 3006 +uniti 1 3 5.857933 5.857933 3812 +alsointerest 1 3 5.857933 5.857933 3813 +carruth 1 2 6.263398 6.263398 5495 +mydissert 1 2 6.263398 6.263398 5496 +carruthpleas 1 1 6.957497 6.957497 12788 +boundson 1 1 6.957497 6.957497 12789 +ordersemant 1 1 6.957497 6.957497 12790 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..62d6ee9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +schedul 1 119 2.079442 2.079442 85 +intern 1 108 2.197225 2.197225 128 +send 1 114 2.197225 2.197225 109 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +new 1 64 2.772589 2.772589 262 +improv 1 62 2.772589 2.772589 289 +locat 1 59 2.833213 2.833213 303 +summer 1 56 2.890372 2.890372 311 +suggest 1 53 2.944439 2.944439 331 +idea 1 32 3.465736 3.465736 545 +chen 1 21 3.912023 3.912023 791 +exercis 1 19 4.007333 4.007333 842 +intel 1 16 4.174387 4.174387 1000 +meng 1 12 4.465908 4.465908 1214 +chung 1 7 5.010635 5.010635 1964 +ping 1 4 5.568345 5.568345 2922 +fiance 1 2 6.263398 6.263398 5497 +tsai 1 2 6.263398 6.263398 4831 +bufferinsert 1 1 6.957497 6.957497 12791 +syllabustopicschung 1 1 6.957497 6.957497 12792 +clen 1 1 6.957497 6.957497 12793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..6151ce3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +first 1 140 1.945910 1.945910 71 +well 1 109 2.197225 2.197225 121 +homepag 1 93 2.397895 2.397895 148 +main 1 67 2.708050 2.708050 256 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +chen 1 21 3.912023 3.912023 791 +break 1 20 3.951244 3.951244 812 +item 1 19 4.007333 4.007333 856 +easi 1 16 4.174387 4.174387 969 +lake 1 11 4.553877 4.553877 1373 +paragraph 1 10 4.653960 4.653960 1449 +mepost 1 10 4.653960 4.653960 1472 +usaphon 1 9 4.753590 4.753590 1600 +forget 1 8 4.875197 4.875197 1712 +shanghai 1 4 5.568345 5.568345 2925 +blvd 1 4 5.568345 5.568345 3007 +deji 1 2 6.263398 6.263398 5498 +chenabout 1 2 6.263398 6.263398 5499 +bullet 1 2 6.263398 6.263398 5500 +mehello 1 1 6.957497 6.957497 12794 +tongji 1 1 6.957497 6.957497 12795 +chinaa 1 1 6.957497 6.957497 12796 +usahom 1 1 6.957497 6.957497 12797 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..ca09ac03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +softwar 1 220 1.386294 1.386294 30 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +peopl 1 96 2.302585 2.302585 132 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +educ 1 86 2.484907 2.484907 191 +environ 1 84 2.484907 2.484907 177 +school 1 84 2.484907 2.484907 188 +write 1 72 2.639057 2.639057 222 +simul 1 66 2.708050 2.708050 255 +artifici 1 63 2.772589 2.772589 280 +new 1 64 2.772589 2.772589 262 +plai 1 60 2.833213 2.833213 307 +visual 1 48 3.044522 3.044522 372 +life 1 50 3.044522 3.044522 375 +archiv 1 49 3.044522 3.044522 364 +programm 1 39 3.258097 3.258097 445 +movi 1 40 3.258097 3.258097 459 +microsoft 1 38 3.295837 3.295837 468 +mean 1 37 3.332205 3.332205 477 +robot 1 36 3.367296 3.367296 497 +represent 1 35 3.401197 3.401197 512 +common 1 30 3.555348 3.555348 574 +symbol 1 27 3.637586 3.637586 620 +spent 1 25 3.737670 3.737670 676 +client 1 25 3.737670 3.737670 679 +corpor 1 21 3.912023 3.912023 802 +martin 1 21 3.912023 3.912023 794 +watch 1 21 3.912023 3.912023 789 +voic 1 21 3.912023 3.912023 806 +portabl 1 20 3.951244 3.951244 819 +scheme 1 20 3.951244 3.951244 818 +left 1 19 4.007333 4.007333 851 +lisp 1 18 4.060443 4.060443 897 +listen 1 18 4.060443 4.060443 907 +macintosh 1 17 4.110874 4.110874 920 +anywai 1 15 4.248495 4.248495 1047 +emploi 1 12 4.465908 4.465908 1284 +tour 1 11 4.553877 4.553877 1307 +wood 1 11 4.553877 4.553877 1355 +road 1 11 4.553877 4.553877 1374 +bike 1 10 4.653960 4.653960 1468 +hang 1 9 4.753590 4.753590 1499 +brain 1 8 4.875197 4.875197 1638 +ride 1 8 4.875197 4.875197 1741 +sleep 1 6 5.164786 5.164786 2211 +dream 1 6 5.164786 5.164786 2165 +fiction 1 6 5.164786 5.164786 2217 +emerg 1 6 5.164786 5.164786 2038 +hair 1 5 5.347108 5.347108 2446 +worst 1 5 5.347108 5.347108 2287 +webster 1 5 5.347108 5.347108 2468 +phrase 1 5 5.347108 5.347108 2242 +austindepart 1 4 5.568345 5.568345 3008 +catch 1 4 5.568345 5.568345 2602 +sciencestaylor 1 3 5.857933 5.857933 3814 +republican 1 3 5.857933 5.857933 3815 +softwareth 1 3 5.857933 5.857933 3552 +stone 1 3 5.857933 5.857933 3674 +cliff 1 2 6.263398 6.263398 4285 +northwestern 1 2 6.263398 6.263398 5502 +mstk 1 2 6.263398 6.263398 5501 +captain 1 2 6.263398 6.263398 4983 +webth 1 2 6.263398 6.263398 5481 +weird 1 2 6.263398 6.263398 5503 +chaputcliff 1 1 6.957497 6.957497 12798 +chaputth 1 1 6.957497 6.957497 12799 +robotlab 1 1 6.957497 6.957497 12800 +dullchaput 1 1 6.957497 6.957497 12801 +gothimself 1 1 6.957497 6.957497 12802 +anemail 1 1 6.957497 6.957497 12803 +odesta 1 1 6.957497 6.957497 12804 +thelearn 1 1 6.957497 6.957497 12805 +hewrot 1 1 6.957497 6.957497 12806 +trane 1 1 6.957497 6.957497 12807 +thenimpl 1 1 6.957497 6.957497 12808 +studentscal 1 1 6.957497 6.957497 12809 +gamesproject 1 1 6.957497 6.957497 12810 +labannoi 1 1 6.957497 6.957497 12811 +farka 1 1 6.957497 6.957497 12812 +medeski 1 1 6.957497 6.957497 12813 +rerun 1 1 6.957497 6.957497 12814 +korg 1 1 6.957497 6.957497 12815 +turnon 1 1 6.957497 6.957497 12816 +breakfast 1 1 6.957497 6.957497 12817 +raspi 1 1 6.957497 6.957497 12818 +starfleet 1 1 6.957497 6.957497 12819 +turnoff 1 1 6.957497 6.957497 12820 +hangov 1 1 6.957497 6.957497 12821 +fave 1 1 6.957497 6.957497 12822 +eventsdaili 1 1 6.957497 6.957497 12823 +reutersintellicast 1 1 6.957497 6.957497 12824 +weatheraustin 1 1 6.957497 6.957497 12825 +txchicago 1 1 6.957497 6.957497 12826 +ilperiodicalssucksalonmirski 1 1 6.957497 6.957497 12827 +onionmacweekmacuserreferencehypertext 1 1 6.957497 6.957497 12828 +interfaceyahooalta 1 1 6.957497 6.957497 12829 +vistacardiff 1 1 6.957497 6.957497 12830 +databaselyco 1 1 6.957497 6.957497 12831 +mapalt 1 1 6.957497 6.957497 12832 +culturemacintosh 1 1 6.957497 6.957497 12833 +dataappl 1 1 6.957497 6.957497 12834 +computercyberdogquicktimequickdraw 1 1 6.957497 6.957497 12835 +dappl 1 1 6.957497 6.957497 12836 +supportmacintouchmacintosh 1 1 6.957497 6.957497 12837 +resourcecyberdog 1 1 6.957497 6.957497 12838 +poundinfo 1 1 6.957497 6.957497 12839 +rootcool 1 1 6.957497 6.957497 12840 +stufffringewareth 1 1 6.957497 6.957497 12841 +actlabpbsnprnow 1 1 6.957497 6.957497 12842 +catalogpap 1 1 6.957497 6.957497 12843 +rsumsymbol 1 1 6.957497 6.957497 12844 +groundingrobotmap 1 1 6.957497 6.957497 12845 +peopledav 1 1 6.957497 6.957497 12846 +falooncharl 1 1 6.957497 6.957497 12847 +lewisjeff 1 1 6.957497 6.957497 12848 +lindjeff 1 1 6.957497 6.957497 12849 +sherwoodbrian 1 1 6.957497 6.957497 12850 +slatorsandi 1 1 6.957497 6.957497 12851 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..282c8a72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +chang 1 82 2.484907 2.484907 163 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +journal 1 83 2.484907 2.484907 183 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +know 1 80 2.564949 2.564949 198 +complet 1 77 2.564949 2.564949 208 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +detail 1 57 2.890372 2.890372 321 +unix 1 58 2.890372 2.890372 308 +without 1 50 3.044522 3.044522 370 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +origin 1 38 3.295837 3.295837 472 +china 1 37 3.332205 3.332205 487 +word 1 34 3.401197 3.401197 508 +return 1 34 3.401197 3.401197 502 +hard 1 30 3.555348 3.555348 563 +univ 1 28 3.610918 3.610918 617 +mind 1 27 3.637586 3.637586 632 +enjoi 1 26 3.688879 3.688879 660 +magazin 1 24 3.761200 3.761200 704 +wang 1 21 3.912023 3.912023 790 +among 1 21 3.912023 3.912023 781 +break 1 20 3.951244 3.951244 812 +fine 1 20 3.951244 3.951244 822 +thought 1 17 4.110874 4.110874 945 +countri 1 15 4.248495 4.248495 1059 +decid 1 14 4.317488 4.317488 1075 +came 1 13 4.382027 4.382027 1197 +tsinghua 1 13 4.382027 4.382027 1195 +captur 1 12 4.465908 4.465908 1232 +moment 1 11 4.553877 4.553877 1379 +surf 1 11 4.553877 4.553877 1301 +earth 1 10 4.653960 4.653960 1463 +end 1 9 4.753590 4.753590 1567 +jump 1 9 4.753590 4.753590 1603 +unusu 1 9 4.753590 4.753590 1566 +opinion 1 8 4.875197 4.875197 1708 +philosoph 1 7 5.010635 5.010635 1904 +televis 1 6 5.164786 5.164786 2118 +rock 1 6 5.164786 5.164786 2164 +million 1 5 5.347108 5.347108 2495 +provinc 1 4 5.568345 5.568345 3009 +gloriou 1 3 5.857933 5.857933 3816 +hometown 1 3 5.857933 5.857933 3817 +tower 1 3 5.857933 5.857933 3818 +fresh 1 3 5.857933 5.857933 3706 +nifti 1 2 6.263398 6.263398 5504 +numb 1 2 6.263398 6.263398 5505 +pope 1 2 6.263398 6.263398 5506 +chuanjun 1 1 6.957497 6.957497 12852 +stun 1 1 6.957497 6.957497 12854 +hubei 1 1 6.957497 6.957497 12855 +beautifulunivers 1 1 6.957497 6.957497 12856 +faceless 1 1 6.957497 6.957497 12857 +brilliant 1 1 6.957497 6.957497 12858 +miner 1 1 6.957497 6.957497 12859 +diamond 1 1 6.957497 6.957497 12853 +unemploi 1 1 6.957497 6.957497 12860 +dobb 1 1 6.957497 6.957497 12861 +prose 1 1 6.957497 6.957497 12862 +porsch 1 1 6.957497 6.957497 12863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..cbf247d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +assist 1 112 2.197225 2.197225 113 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +knowledg 1 67 2.708050 2.708050 243 +dept 1 64 2.772589 2.772589 291 +improv 1 62 2.772589 2.772589 289 +result 1 65 2.772589 2.772589 281 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +allow 1 53 2.944439 2.944439 333 +tabl 1 51 2.995732 2.995732 346 +visitor 1 49 3.044522 3.044522 371 +natur 1 44 3.135494 3.135494 406 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +word 1 34 3.401197 3.401197 508 +human 1 32 3.465736 3.465736 546 +chines 1 29 3.583519 3.583519 595 +reach 1 24 3.761200 3.761200 688 +proof 1 23 3.806662 3.806662 720 +tenni 1 20 3.951244 3.951244 838 +ever 1 19 4.007333 4.007333 872 +segment 1 17 4.110874 4.110874 931 +taiwan 1 16 4.174387 4.174387 1006 +capabl 1 15 4.248495 4.248495 1016 +train 1 14 4.317488 4.317488 1066 +huang 1 12 4.465908 4.465908 1202 +basketbal 1 12 4.465908 4.465908 1289 +literatur 1 11 4.553877 4.553877 1300 +tag 1 7 5.010635 5.010635 1821 +academia 1 6 5.164786 5.164786 2036 +chin 1 5 5.347108 5.347108 2408 +categori 1 5 5.347108 5.347108 2261 +markov 1 5 5.347108 5.347108 2280 +accuraci 1 5 5.347108 5.347108 2450 +atlant 1 5 5.347108 5.347108 2508 +taipei 1 4 5.568345 5.568345 2926 +worki 1 4 5.568345 5.568345 3010 +basebal 1 4 5.568345 5.568345 2969 +edufing 1 4 5.568345 5.568345 2713 +sinica 1 3 5.857933 5.857933 3819 +expans 1 3 5.857933 5.857933 3755 +instituteof 1 2 6.263398 6.263398 5507 +pinbal 1 2 6.263398 6.263398 5508 +todayth 1 2 6.263398 6.263398 5416 +tser 1 1 6.957497 6.957497 12864 +systemsexperiencei 1 1 6.957497 6.957497 12865 +usinghidden 1 1 6.957497 6.957497 12866 +friendli 1 1 6.957497 6.957497 12867 +toexecut 1 1 6.957497 6.957497 12868 +automatictag 1 1 6.957497 6.957497 12869 +improvedbecaus 1 1 6.957497 6.957497 12870 +interestsmovi 1 1 6.957497 6.957497 12871 +semiolog 1 1 6.957497 6.957497 12872 +siteschina 1 1 6.957497 6.957497 12873 +timesminsheng 1 1 6.957497 6.957497 12874 +dailyth 1 1 6.957497 6.957497 12875 +timesusa 1 1 6.957497 6.957497 12876 +economistth 1 1 6.957497 6.957497 12877 +monthlymak 1 1 6.957497 6.957497 12878 +chuang 1 1 6.957497 6.957497 12879 +meyou 1 1 6.957497 6.957497 12880 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..8c2de9db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +august 1 66 2.708050 2.708050 257 +robert 1 30 3.555348 3.555348 567 +runtim 1 19 4.007333 4.007333 858 +multithread 1 11 4.553877 4.553877 1315 +pronounc 1 7 5.010635 5.010635 1918 +blumoferdb 1 5 5.347108 5.347108 2324 +silk 1 2 6.263398 6.263398 5373 +inthi 1 2 6.263398 6.263398 5509 +cilkcilkcilk 1 1 6.957497 6.957497 12881 +languageand 1 1 6.957497 6.957497 12882 +thecilk 1 1 6.957497 6.957497 12883 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..7c780ebd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +high 1 130 2.079442 2.079442 101 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +school 1 84 2.484907 2.484907 188 +plan 1 65 2.772589 2.772589 272 +complex 1 64 2.772589 2.772589 269 +thesi 1 57 2.890372 2.890372 327 +theoret 1 39 3.258097 3.258097 446 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +hong 1 14 4.317488 4.317488 1105 +kong 1 9 4.753590 4.753590 1602 +chung 1 7 5.010635 5.010635 1964 +austinaustin 1 7 5.010635 5.010635 1966 +edumi 1 6 5.164786 5.164786 2132 +fish 1 6 5.164786 5.164786 2207 +poon 1 3 5.857933 5.857933 3820 +ckpoon 1 2 6.263398 6.263398 5510 +hungri 1 2 6.263398 6.263398 5511 +keung 1 1 6.957497 6.957497 12884 +poondepart 1 1 6.957497 6.957497 12885 +askvinc 1 1 6.957497 6.957497 12886 +gogan 1 1 6.957497 6.957497 12887 +problemsom 1 1 6.957497 6.957497 12888 +harmonica 1 1 6.957497 6.957497 12889 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..096c68f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +relat 1 139 1.945910 1.945910 68 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +member 1 84 2.484907 2.484907 165 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +dept 1 64 2.772589 2.772589 291 +secur 1 30 3.555348 3.555348 577 +chines 1 29 3.583519 3.583519 595 +linux 1 27 3.637586 3.637586 631 +head 1 23 3.806662 3.806662 732 +divis 1 21 3.912023 3.912023 803 +role 1 14 4.317488 4.317488 1101 +hong 1 14 4.317488 4.317488 1105 +usavoic 1 13 4.382027 4.382027 1198 +thedepart 1 11 4.553877 4.553877 1350 +cryptographi 1 9 4.753590 4.753590 1512 +kong 1 9 4.753590 4.753590 1602 +simon 1 8 4.875197 4.875197 1697 +chung 1 7 5.010635 5.010635 1964 +park 1 6 5.164786 5.164786 2218 +mission 1 5 5.347108 5.347108 2465 +nist 1 4 5.568345 5.568345 2973 +church 1 4 5.568345 5.568345 3011 +rivest 1 3 5.857933 5.857933 3248 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +thenetwork 1 2 6.263398 6.263398 5434 +byprof 1 2 6.263398 6.263398 5512 +wongchung 1 1 6.957497 6.957497 12890 +wonglast 1 1 6.957497 6.957497 12891 +labwhich 1 1 6.957497 6.957497 12892 +clearinghous 1 1 6.957497 6.957497 12893 +rbac 1 1 6.957497 6.957497 12894 +ckwong 1 1 6.957497 6.957497 12895 +hyde 1 1 6.957497 6.957497 12896 +baptist 1 1 6.957497 6.957497 12897 +netbsd 1 1 6.957497 6.957497 12898 +freebsd 1 1 6.957497 6.957497 12899 +openbsd 1 1 6.957497 6.957497 12900 +tockwong 1 1 6.957497 6.957497 12901 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..6f38eb1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +techniqu 1 99 2.302585 2.302585 138 +real 1 93 2.397895 2.397895 144 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +appli 1 71 2.639057 2.639057 226 +knowledg 1 67 2.708050 2.708050 243 +simul 1 66 2.708050 2.708050 255 +integr 1 67 2.708050 2.708050 245 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +particular 1 51 2.995732 2.995732 352 +finger 1 52 2.995732 2.995732 354 +possibl 1 47 3.091042 3.091042 378 +netscap 1 44 3.135494 3.135494 395 +vita 1 38 3.295837 3.295837 473 +focu 1 30 3.555348 3.555348 571 +scale 1 28 3.610918 3.610918 613 +retriev 1 27 3.637586 3.637586 621 +constraint 1 26 3.688879 3.688879 636 +variabl 1 23 3.806662 3.806662 715 +behavior 1 18 4.060443 4.060443 881 +hotlist 1 13 4.382027 4.382027 1199 +qualit 1 11 4.553877 4.553877 1362 +facilit 1 10 4.653960 4.653960 1412 +incomplet 1 9 4.753590 4.753590 1575 +elimin 1 9 4.753590 4.753590 1558 +informationemail 1 9 4.753590 4.753590 1564 +aggreg 1 6 5.164786 5.164786 2219 +irrelev 1 3 5.857933 5.857933 3823 +descriptionof 1 2 6.263398 6.263398 5513 +intract 1 2 6.263398 6.263398 5044 +thiswil 1 2 6.263398 6.263398 4944 +withlarg 1 2 6.263398 6.263398 4926 +followingtechniqu 1 2 6.263398 6.263398 5514 +clanci 1 1 6.957497 6.957497 12902 +clancyresearch 1 1 6.957497 6.957497 12903 +containinga 1 1 6.957497 6.957497 12904 +frequentlyi 1 1 6.957497 6.957497 12905 +incomprehens 1 1 6.957497 6.957497 12906 +simulationto 1 1 6.957497 6.957497 12907 +distinctionsof 1 1 6.957497 6.957497 12908 +whichaddress 1 1 6.957497 6.957497 12909 +abstractiontechniqu 1 1 6.957497 6.957497 12910 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..9453d798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +three 1 54 2.944439 2.944439 330 +five 1 19 4.007333 4.007333 841 +four 1 18 4.060443 4.060443 905 +eight 1 11 4.553877 4.553877 1331 +seven 1 9 4.753590 4.753590 1561 +nine 1 6 5.164786 5.164786 2047 +eleven 1 3 5.857933 5.857933 3824 +jimbo 1 1 6.957497 6.957497 12911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..0e9a82cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +call 1 91 2.397895 2.397895 153 +taylor 1 63 2.772589 2.772589 287 +januari 1 62 2.772589 2.772589 264 +author 1 39 3.258097 3.258097 450 +china 1 37 3.332205 3.332205 487 +photo 1 31 3.496508 3.496508 561 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +edulast 1 17 4.110874 4.110874 927 +hong 1 14 4.317488 4.317488 1105 +chri 1 11 4.553877 4.553877 1311 +fellowship 1 10 4.653960 4.653960 1460 +kong 1 9 4.753590 4.753590 1602 +christian 1 7 5.010635 5.010635 1949 +church 1 4 5.568345 5.568345 3011 +chuwelcom 1 1 6.957497 6.957497 12912 +myselfmi 1 1 6.957497 6.957497 12913 +chuemail 1 1 6.957497 6.957497 12914 +cnchu 1 1 6.957497 6.957497 12915 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..abe6ef47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,180 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +code 1 108 2.197225 2.197225 116 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +manag 1 114 2.197225 2.197225 125 +user 1 104 2.302585 2.302585 137 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +member 1 84 2.484907 2.484907 165 +interfac 1 79 2.564949 2.564949 209 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +dynam 1 76 2.564949 2.564949 194 +free 1 73 2.639057 2.639057 224 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +window 1 68 2.708050 2.708050 242 +written 1 63 2.772589 2.772589 278 +improv 1 62 2.772589 2.772589 289 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +automat 1 61 2.833213 2.833213 306 +direct 1 57 2.890372 2.890372 316 +major 1 56 2.890372 2.890372 315 +browser 1 56 2.890372 2.890372 313 +publish 1 57 2.890372 2.890372 326 +faculti 1 56 2.890372 2.890372 325 +overview 1 56 2.890372 2.890372 323 +allow 1 53 2.944439 2.944439 333 +visual 1 48 3.044522 3.044522 372 +still 1 50 3.044522 3.044522 362 +featur 1 46 3.091042 3.091042 386 +directori 1 45 3.135494 3.135494 396 +made 1 44 3.135494 3.135494 398 +edit 1 42 3.218876 3.218876 418 +tutori 1 39 3.258097 3.258097 437 +announc 1 40 3.258097 3.258097 441 +multipl 1 39 3.258097 3.258097 453 +join 1 39 3.258097 3.258097 457 +form 1 39 3.258097 3.258097 443 +connect 1 37 3.332205 3.332205 485 +download 1 36 3.367296 3.367296 489 +singl 1 34 3.401197 3.401197 510 +manual 1 35 3.401197 3.401197 504 +jame 1 35 3.401197 3.401197 507 +articl 1 33 3.433987 3.433987 530 +john 1 33 3.433987 3.433987 532 +independ 1 32 3.465736 3.465736 548 +kind 1 32 3.465736 3.465736 541 +ad 1 32 3.465736 3.465736 544 +graph 1 30 3.555348 3.555348 576 +produc 1 30 3.555348 3.555348 572 +compon 1 30 3.555348 3.555348 570 +releas 1 28 3.610918 3.610918 616 +quit 1 27 3.637586 3.637586 633 +repres 1 26 3.688879 3.688879 656 +revis 1 26 3.688879 3.688879 640 +constraint 1 26 3.688879 3.688879 636 +flow 1 24 3.761200 3.761200 700 +compress 1 23 3.806662 3.806662 719 +sequenti 1 22 3.850148 3.850148 745 +varieti 1 22 3.850148 3.850148 740 +hierarchi 1 22 3.850148 3.850148 744 +alumni 1 21 3.912023 3.912023 807 +prepar 1 20 3.951244 3.951244 824 +brown 1 16 4.174387 4.174387 977 +partit 1 16 4.174387 4.174387 984 +enough 1 15 4.248495 4.248495 1040 +affili 1 13 4.382027 4.382027 1194 +incorpor 1 13 4.382027 4.382027 1163 +instanc 1 11 4.553877 4.553877 1322 +node 1 11 4.553877 4.553877 1326 +fill 1 11 4.553877 4.553877 1349 +regard 1 11 4.553877 4.553877 1309 +compos 1 9 4.753590 4.753590 1527 +screen 1 9 4.753590 4.753590 1577 +entitl 1 9 4.753590 4.753590 1490 +shot 1 7 5.010635 5.010635 1898 +notifi 1 6 5.164786 5.164786 2106 +banerje 1 6 5.164786 5.164786 2018 +parallelprogram 1 5 5.347108 5.347108 2379 +stage 1 5 5.347108 5.347108 2488 +despit 1 5 5.347108 5.347108 2317 +alfr 1 4 5.568345 5.568345 2882 +lord 1 4 5.568345 5.568345 2906 +crai 1 4 5.568345 5.568345 3012 +prospect 1 4 5.568345 5.568345 3013 +snail 1 4 5.568345 5.568345 2916 +werth 1 4 5.568345 5.568345 3004 +preced 1 3 5.857933 5.857933 3107 +sophist 1 3 5.857933 5.857933 3545 +easier 1 3 5.857933 5.857933 3470 +pleasant 1 3 5.857933 5.857933 3825 +informationfor 1 3 5.857933 5.857933 3738 +berger 1 3 5.857933 5.857933 3702 +dwip 1 3 5.857933 5.857933 3197 +wilder 1 2 6.263398 6.263398 5516 +symmetri 1 2 6.263398 6.263398 5517 +newest 1 2 6.263398 6.263398 5518 +reproduc 1 2 6.263398 6.263398 5519 +publicationscod 1 2 6.263398 6.263398 5520 +emeri 1 2 6.263398 6.263398 5515 +ajita 1 2 6.263398 6.263398 5461 +systemmast 1 1 6.957497 6.957497 12916 +lawless 1 1 6.957497 6.957497 12917 +codeless 1 1 6.957497 6.957497 12918 +myriad 1 1 6.957497 6.957497 12919 +tennysoncod 1 1 6.957497 6.957497 12920 +sequentialprogram 1 1 6.957497 6.957497 12921 +wheredata 1 1 6.957497 6.957497 12922 +arc 1 1 6.957497 6.957497 12923 +thesequenti 1 1 6.957497 6.957497 12924 +sequent 1 1 6.957497 6.957497 12925 +smp 1 1 6.957497 6.957497 12926 +macdraw 1 1 6.957497 6.957497 12927 +subgraph 1 1 6.957497 6.957497 12928 +hpcwire 1 1 6.957497 6.957497 12929 +backend 1 1 6.957497 6.957497 12930 +xcodelib 1 1 6.957497 6.957497 12931 +lieu 1 1 6.957497 6.957497 12932 +groupgroup 1 1 6.957497 6.957497 12933 +leaderprofessor 1 1 6.957497 6.957497 12934 +bergerstud 1 1 6.957497 6.957497 12935 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..6b504286 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +search 1 95 2.397895 2.397895 155 +addit 1 74 2.639057 2.639057 228 +knowledg 1 67 2.708050 2.708050 243 +taylor 1 63 2.772589 2.772589 287 +tech 1 35 3.401197 3.401197 515 +steve 1 29 3.583519 3.583519 594 +hotlist 1 13 4.382027 4.382027 1199 +correl 1 5 5.347108 5.347108 2279 +multifunct 1 3 5.857933 5.857933 3826 +correlstev 1 1 6.957497 6.957497 12936 +correlresearchph 1 1 6.957497 6.957497 12937 +reportcontact 1 1 6.957497 6.957497 12938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..81ba4475 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +specif 1 106 2.197225 2.197225 106 +check 1 115 2.197225 2.197225 118 +part 1 98 2.302585 2.302585 129 +real 1 93 2.397895 2.397895 144 +control 1 82 2.484907 2.484907 164 +method 1 80 2.564949 2.564949 213 +written 1 63 2.772589 2.772589 278 +taylor 1 63 2.772589 2.772589 287 +origin 1 38 3.295837 3.295837 472 +respons 1 37 3.332205 3.332205 476 +formal 1 37 3.332205 3.332205 478 +robot 1 36 3.367296 3.367296 497 +synchron 1 29 3.583519 3.583519 588 +packag 1 28 3.610918 3.610918 614 +utc 1 27 3.637586 3.637586 629 +linux 1 27 3.637586 3.637586 631 +properti 1 22 3.850148 3.850148 749 +inth 1 22 3.850148 3.850148 741 +half 1 21 3.912023 3.912023 776 +latest 1 21 3.912023 3.912023 785 +wrote 1 20 3.951244 3.951244 830 +lot 1 18 4.060443 4.060443 889 +devic 1 16 4.174387 4.174387 1002 +driver 1 8 4.875197 4.875197 1657 +carlo 1 5 5.347108 5.347108 2515 +tempest 1 5 5.347108 5.347108 2548 +theth 1 5 5.347108 5.347108 2325 +toolset 1 4 5.568345 5.568345 3014 +austindepart 1 4 5.568345 5.568345 3008 +provinc 1 4 5.568345 5.568345 3009 +reactiv 1 3 5.857933 5.857933 3575 +publicationsi 1 3 5.857933 5.857933 3827 +softwareth 1 3 5.857933 5.857933 3552 +sciencesaustin 1 3 5.857933 5.857933 3828 +grabber 1 2 6.263398 6.263398 5521 +spain 1 2 6.263398 6.263398 5522 +pucholcarlo 1 1 6.957497 6.957497 12940 +pucholresearch 1 1 6.957497 6.957497 12941 +esterel 1 1 6.957497 6.957497 12939 +mawl 1 1 6.957497 6.957497 12942 +forbrows 1 1 6.957497 6.957497 12943 +verifyingsafeti 1 1 6.957497 6.957497 12944 +thequantavisionfram 1 1 6.957497 6.957497 12945 +thejoystickdevic 1 1 6.957497 6.957497 12946 +informationoffic 1 1 6.957497 6.957497 12947 +dreal 1 1 6.957497 6.957497 12948 +phun 1 1 6.957497 6.957497 12949 +interestsmemb 1 1 6.957497 6.957497 12950 +interesti 1 1 6.957497 6.957497 12951 +fromgandia 1 1 6.957497 6.957497 12952 +valencia 1 1 6.957497 6.957497 12953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..bfdf6e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +build 1 85 2.484907 2.484907 184 +member 1 84 2.484907 2.484907 165 +logic 1 71 2.639057 2.639057 230 +simul 1 66 2.708050 2.708050 255 +foundat 1 62 2.772589 2.772589 286 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +past 1 42 3.218876 3.218876 428 +editor 1 41 3.218876 3.218876 433 +theoret 1 39 3.258097 3.258097 446 +paul 1 38 3.295837 3.295837 471 +ofth 1 36 3.367296 3.367296 491 +utc 1 27 3.637586 3.637586 629 +constraint 1 26 3.688879 3.688879 636 +toward 1 25 3.737670 3.737670 668 +head 1 23 3.806662 3.806662 732 +properti 1 22 3.850148 3.850148 749 +chen 1 21 3.912023 3.912023 791 +wang 1 21 3.912023 3.912023 790 +verif 1 20 3.951244 3.951244 826 +synthesi 1 20 3.951244 3.951244 834 +precis 1 15 4.248495 4.248495 1023 +verifi 1 12 4.465908 4.465908 1261 +establish 1 9 4.753590 4.753590 1532 +doug 1 9 4.753590 4.753590 1517 +formul 1 8 4.875197 4.875197 1733 +canb 1 7 5.010635 5.010635 1846 +chung 1 7 5.010635 5.010635 1964 +groupth 1 5 5.347108 5.347108 2549 +carlo 1 5 5.347108 5.347108 2515 +firm 1 4 5.568345 5.568345 2684 +systemsand 1 4 5.568345 5.568345 2804 +toolset 1 4 5.568345 5.568345 3014 +aloysiu 1 3 5.857933 5.857933 3829 +lai 1 3 5.857933 5.857933 3694 +categor 1 3 5.857933 5.857933 3765 +stuart 1 3 5.857933 5.857933 3584 +guangtian 1 3 5.857933 5.857933 3810 +byprof 1 2 6.263398 6.263398 5512 +stringent 1 2 6.263398 6.263398 5523 +scenario 1 2 6.263398 6.263398 5524 +availableonlin 1 2 6.263398 6.263398 4929 +deji 1 2 6.263398 6.263398 5498 +tsou 1 2 6.263398 6.263398 5525 +clement 1 2 6.263398 6.263398 5526 +groundworkfor 1 1 6.957497 6.957497 12955 +enforc 1 1 6.957497 6.957497 12956 +modechart 1 1 6.957497 6.957497 12954 +timetool 1 1 6.957497 6.957497 12957 +languagepublicationsabstract 1 1 6.957497 6.957497 12958 +puchol 1 1 6.957497 6.957497 12959 +yangalumni 1 1 6.957497 6.957497 12960 +chih 1 1 6.957497 6.957497 12961 +farn 1 1 6.957497 6.957497 12962 +supoj 1 1 6.957497 6.957497 12963 +suthandavibul 1 1 6.957497 6.957497 12964 +farnam 1 1 6.957497 6.957497 12965 +jahanian 1 1 6.957497 6.957497 12966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..ba4daaf1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +specif 1 106 2.197225 2.197225 106 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +educ 1 86 2.484907 2.484907 191 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +stuff 1 87 2.484907 2.484907 171 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +intellig 1 72 2.639057 2.639057 225 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +taylor 1 63 2.772589 2.772589 287 +collect 1 65 2.772589 2.772589 268 +laboratori 1 63 2.772589 2.772589 292 +evalu 1 64 2.772589 2.772589 266 +thesi 1 57 2.890372 2.890372 327 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +robot 1 36 3.367296 3.367296 497 +bibliographi 1 34 3.401197 3.401197 518 +tech 1 35 3.401197 3.401197 515 +produc 1 30 3.555348 3.555348 572 +postal 1 30 3.555348 3.555348 580 +particip 1 29 3.583519 3.583519 589 +propos 1 28 3.610918 3.610918 602 +rule 1 26 3.688879 3.688879 638 +task 1 25 3.737670 3.737670 678 +input 1 23 3.806662 3.806662 727 +mobil 1 23 3.806662 3.806662 730 +miscellan 1 23 3.806662 3.806662 731 +increas 1 20 3.951244 3.951244 829 +wrote 1 20 3.951244 3.951244 830 +expert 1 20 3.951244 3.951244 833 +north 1 19 4.007333 4.007333 873 +agent 1 18 4.060443 4.060443 910 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +across 1 16 4.174387 4.174387 974 +women 1 16 4.174387 4.174387 1004 +cognit 1 16 4.174387 4.174387 986 +researchmi 1 14 4.317488 4.317488 1119 +hotlist 1 13 4.382027 4.382027 1199 +misc 1 13 4.382027 4.382027 1124 +acquisit 1 10 4.653960 4.653960 1465 +sentenc 1 10 4.653960 4.653960 1413 +consortium 1 10 4.653960 4.653960 1467 +suitabl 1 9 4.753590 4.753590 1486 +linguist 1 9 4.753590 4.753590 1593 +folk 1 9 4.753590 4.753590 1597 +awar 1 7 5.010635 5.010635 1800 +carolina 1 6 5.164786 5.164786 2142 +truth 1 6 5.164786 5.164786 2179 +interestedin 1 5 5.347108 5.347108 2260 +corpu 1 5 5.347108 5.347108 2282 +cindi 1 3 5.857933 5.857933 3830 +groupunivers 1 3 5.857933 5.857933 3831 +primarilyin 1 3 5.857933 5.857933 3832 +diagnost 1 3 5.857933 5.857933 3833 +georgia 1 3 5.857933 5.857933 3834 +horizon 1 3 5.857933 5.857933 3746 +austini 1 2 6.263398 6.263398 5527 +deep 1 2 6.263398 6.263398 5528 +exhibit 1 2 6.263398 6.263398 5529 +cthomp 1 2 6.263398 6.263398 5530 +thompsoncindi 1 1 6.957497 6.957497 12968 +thompsonmachin 1 1 6.957497 6.957497 12969 +candlelight 1 1 6.957497 6.957497 12970 +vigil 1 1 6.957497 6.957497 12971 +internetto 1 1 6.957497 6.957497 12972 +violenc 1 1 6.957497 6.957497 12973 +semanticrepresent 1 1 6.957497 6.957497 12974 +lexic 1 1 6.957497 6.957497 12967 +atrobofest 1 1 6.957497 6.957497 12975 +wolv 1 1 6.957497 6.957497 12976 +counsel 1 1 6.957497 6.957497 12977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..a54001a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +utexa 1 189 1.609438 1.609438 44 +support 1 132 1.945910 1.945910 83 +welcom 1 122 2.079442 2.079442 99 +school 1 84 2.484907 2.484907 188 +want 1 79 2.564949 2.564949 199 +browser 1 56 2.890372 2.890372 313 +friend 1 48 3.044522 3.044522 376 +netscap 1 44 3.135494 3.135494 395 +frame 1 24 3.761200 3.761200 684 +famili 1 23 3.806662 3.806662 735 +latest 1 21 3.912023 3.912023 785 +seem 1 18 4.060443 4.060443 899 +doesn 1 15 4.248495 4.248495 1055 +xingshan 1 1 6.957497 6.957497 12978 +downloadth 1 1 6.957497 6.957497 12979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..a7292c96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +memori 1 101 2.302585 2.302585 139 +internet 1 83 2.484907 2.484907 186 +want 1 79 2.564949 2.564949 199 +taylor 1 63 2.772589 2.772589 287 +probabl 1 40 3.258097 3.258097 455 +electr 1 38 3.295837 3.295837 461 +postal 1 30 3.555348 3.555348 580 +berkelei 1 26 3.688879 3.688879 657 +experiment 1 26 3.688879 3.688879 645 +mike 1 24 3.761200 3.761200 703 +disk 1 22 3.850148 3.850148 747 +less 1 18 4.060443 4.060443 892 +seem 1 18 4.060443 4.060443 899 +classic 1 14 4.317488 4.317488 1084 +rice 1 11 4.553877 4.553877 1336 +operatingsystem 1 10 4.653960 4.653960 1401 +architect 1 8 4.875197 4.875197 1624 +gather 1 8 4.875197 4.875197 1719 +capac 1 8 4.875197 4.875197 1740 +root 1 8 4.875197 4.875197 1650 +trend 1 7 5.010635 5.010635 1842 +bore 1 7 5.010635 5.010635 1948 +austinaustin 1 7 5.010635 5.010635 1966 +price 1 6 5.164786 5.164786 1999 +pagethi 1 5 5.347108 5.347108 2336 +serverless 1 3 5.857933 5.857933 3181 +systemsth 1 3 5.857933 5.857933 3835 +informationtechnolog 1 3 5.857933 5.857933 3836 +informationassist 1 2 6.263398 6.263398 5531 +teachingfal 1 2 6.263398 6.263398 5532 +systemsspr 1 2 6.263398 6.263398 4762 +dahlin 1 1 6.957497 6.957497 12980 +dahlingener 1 1 6.957497 6.957497 12981 +architectureeveryon 1 1 6.957497 6.957497 12982 +researchxf 1 1 6.957497 6.957497 12983 +systemweb 1 1 6.957497 6.957497 12984 +pagesummar 1 1 6.957497 6.957497 12985 +compter 1 1 6.957497 6.957497 12986 +includinghistor 1 1 6.957497 6.957497 12987 +informationif 1 1 6.957497 6.957497 12988 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..4b604863 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +read 1 154 1.791759 1.791759 47 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +activ 1 84 2.484907 2.484907 182 +servic 1 72 2.639057 2.639057 236 +laboratori 1 63 2.772589 2.772589 292 +dept 1 64 2.772589 2.772589 291 +probabl 1 40 3.258097 3.258097 455 +feel 1 37 3.332205 3.332205 483 +honor 1 23 3.806662 3.806662 729 +busi 1 21 3.912023 3.912023 784 +anyth 1 16 4.174387 4.174387 998 +regularli 1 11 4.553877 4.553877 1338 +tradit 1 10 4.653960 4.653960 1404 +meant 1 6 5.164786 5.164786 2055 +phrase 1 5 5.347108 5.347108 2242 +suffic 1 4 5.568345 5.568345 2869 +crazi 1 4 5.568345 5.568345 2822 +sytem 1 4 5.568345 5.568345 3015 +vijai 1 4 5.568345 5.568345 2960 +mehom 1 4 5.568345 5.568345 2979 +lazi 1 2 6.263398 6.263398 4527 +appeal 1 2 6.263398 6.263398 4186 +garg 1 2 6.263398 6.263398 5533 +damani 1 1 6.957497 6.957497 12989 +howdi 1 1 6.957497 6.957497 12990 +pagal 1 1 6.957497 6.957497 12991 +dekho 1 1 6.957497 6.957497 12992 +updateth 1 1 6.957497 6.957497 12993 +oblig 1 1 6.957497 6.957497 12994 +providesometh 1 1 6.957497 6.957497 12995 +guadulp 1 1 6.957497 6.957497 12996 +austinphon 1 1 6.957497 6.957497 12997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..ec6a38e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +model 1 145 1.945910 1.945910 69 +area 1 144 1.945910 1.945910 80 +machin 1 129 2.079442 2.079442 95 +make 1 111 2.197225 2.197225 120 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +school 1 84 2.484907 2.484907 188 +appli 1 71 2.639057 2.639057 226 +view 1 70 2.708050 2.708050 254 +main 1 67 2.708050 2.708050 256 +complex 1 64 2.772589 2.772589 269 +laboratori 1 63 2.772589 2.772589 292 +autom 1 41 3.218876 3.218876 434 +tree 1 36 3.367296 3.367296 492 +global 1 34 3.401197 3.401197 520 +equat 1 23 3.806662 3.806662 724 +nice 1 20 3.951244 3.951244 809 +render 1 17 4.110874 4.110874 947 +happi 1 14 4.317488 4.317488 1079 +station 1 13 4.382027 4.382027 1157 +mainten 1 9 4.753590 4.753590 1543 +plane 1 6 5.164786 5.164786 2187 +multiresolut 1 5 5.347108 5.347108 2423 +escap 1 4 5.568345 5.568345 3016 +unrel 1 3 5.857933 5.857933 3837 +dane 1 2 6.263398 6.263398 5534 +marshal 1 2 6.263398 6.263398 4118 +illumin 1 2 6.263398 6.263398 4819 +pinbal 1 2 6.263398 6.263398 5508 +probe 1 2 6.263398 6.263398 5535 +marshalldan 1 1 6.957497 6.957497 12998 +electromechan 1 1 6.957497 6.957497 12999 +thelogist 1 1 6.957497 6.957497 13000 +attractor 1 1 6.957497 6.957497 13001 +burnet 1 1 6.957497 6.957497 13002 +pastur 1 1 6.957497 6.957497 13003 +jupit 1 1 6.957497 6.957497 13004 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..2f568027 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +databas 1 122 2.079442 2.079442 86 +place 1 106 2.197225 2.197225 124 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +access 1 102 2.302585 2.302585 136 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +archiv 1 49 3.044522 3.044522 364 +keep 1 44 3.135494 3.135494 409 +video 1 44 3.135494 3.135494 405 +go 1 33 3.433987 3.433987 529 +idea 1 32 3.465736 3.465736 545 +someth 1 31 3.496508 3.496508 554 +sort 1 22 3.850148 3.850148 738 +sure 1 20 3.951244 3.951244 813 +easi 1 16 4.174387 4.174387 969 +save 1 14 4.317488 4.317488 1099 +manner 1 14 4.317488 4.317488 1074 +pagewelcom 1 11 4.553877 4.553877 1344 +guess 1 10 4.653960 4.653960 1443 +doug 1 9 4.753590 4.753590 1517 +perhap 1 8 4.875197 4.875197 1693 +fiction 1 6 5.164786 5.164786 2217 +latexhtml 1 5 5.347108 5.347108 2347 +orlean 1 5 5.347108 5.347108 2550 +bear 1 4 5.568345 5.568345 2651 +stuart 1 3 5.857933 5.857933 3584 +justa 1 2 6.263398 6.263398 5326 +pagedoug 1 1 6.957497 6.957497 13005 +oflinksto 1 1 6.957497 6.957497 13006 +aboutsport 1 1 6.957497 6.957497 13007 +booksin 1 1 6.957497 6.957497 13008 +fewjok 1 1 6.957497 6.957497 13009 +testof 1 1 6.957497 6.957497 13010 +aweath 1 1 6.957497 6.957497 13011 +mapandcondit 1 1 6.957497 6.957497 13012 +austinandnew 1 1 6.957497 6.957497 13013 +amgraci 1 1 6.957497 6.957497 13014 +puttingit 1 1 6.957497 6.957497 13015 +webbrows 1 1 6.957497 6.957497 13016 +thisi 1 1 6.957497 6.957497 13017 +stuffmom 1 1 6.957497 6.957497 13018 +calendarlink 1 1 6.957497 6.957497 13019 +fictionbooksjokessportsfoodvideout 1 1 6.957497 6.957497 13020 +libraryresumelast 1 1 6.957497 6.957497 13021 +dasdastuart 1 1 6.957497 6.957497 13022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..0c23c3a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +doug 1 9 4.753590 4.753590 1517 +swhich 1 1 6.957497 6.957497 13023 +annoi 1 1 6.957497 6.957497 13024 +thisorthi 1 1 6.957497 6.957497 13025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..46cd2beb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +taylor 1 63 2.772589 2.772589 287 +local 1 55 2.944439 2.944439 334 +archiv 1 49 3.044522 3.044522 364 +robot 1 36 3.367296 3.367296 497 +neural 1 30 3.555348 3.555348 578 +postal 1 30 3.555348 3.555348 580 +art 1 29 3.583519 3.583519 593 +symbol 1 27 3.637586 3.637586 620 +utc 1 27 3.637586 3.637586 629 +fine 1 20 3.951244 3.951244 822 +agent 1 18 4.060443 4.060443 910 +universityof 1 15 4.248495 4.248495 1061 +evolv 1 12 4.465908 4.465908 1223 +literatur 1 11 4.553877 4.553877 1300 +michigan 1 11 4.553877 4.553877 1368 +genet 1 10 4.653960 4.653960 1409 +santa 1 10 4.653960 4.653960 1441 +ground 1 7 5.010635 5.010635 1955 +digest 1 7 5.010635 5.010635 1864 +theus 1 4 5.568345 5.568345 2992 +spanish 1 4 5.568345 5.568345 3017 +intereststh 1 3 5.857933 5.857933 3838 +dian 1 2 6.263398 6.263398 5536 +lawdian 1 1 6.957497 6.957497 13026 +problemnavig 1 1 6.957497 6.957497 13027 +washingtonst 1 1 6.957497 6.957497 13028 +stateunivers 1 1 6.957497 6.957497 13029 +dianelaw 1 1 6.957497 6.957497 13030 +gann 1 1 6.957497 6.957497 13031 +illig 1 1 6.957497 6.957497 13032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..614746da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +resourc 1 81 2.484907 2.484907 172 +homework 1 79 2.564949 2.564949 193 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +undergradu 1 54 2.944439 2.944439 338 +mine 1 26 3.688879 3.688879 654 +alwai 1 24 3.761200 3.761200 691 +unit 1 21 3.912023 3.912023 779 +monitor 1 17 4.110874 4.110874 941 +athlet 1 7 5.010635 5.010635 1933 +greec 1 6 5.164786 5.164786 2208 +informat 1 3 5.857933 5.857933 3839 +patra 1 2 6.263398 6.263398 5537 +reasearch 1 2 6.263398 6.263398 5538 +dionisi 1 1 6.957497 6.957497 13033 +papadopoulosdionisi 1 1 6.957497 6.957497 13034 +papadopoulosabout 1 1 6.957497 6.957497 13035 +medionisi 1 1 6.957497 6.957497 13036 +papadopoulo 1 1 6.957497 6.957497 13037 +panhellen 1 1 6.957497 6.957497 13038 +associationpanathinaiko 1 1 6.957497 6.957497 13039 +clubgreek 1 1 6.957497 6.957497 13040 +newshellen 1 1 6.957497 6.957497 13041 +networkeveryth 1 1 6.957497 6.957497 13042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..55300c6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +sourc 1 77 2.564949 2.564949 201 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +simul 1 66 2.708050 2.708050 255 +taylor 1 63 2.772589 2.772589 287 +complex 1 64 2.772589 2.772589 269 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +visit 1 63 2.772589 2.772589 288 +space 1 57 2.890372 2.890372 310 +sampl 1 53 2.944439 2.944439 339 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +finger 1 52 2.995732 2.995732 354 +linear 1 41 3.218876 3.218876 431 +annual 1 40 3.258097 3.258097 458 +multipl 1 39 3.258097 3.258097 453 +small 1 39 3.258097 3.258097 447 +correct 1 38 3.295837 3.295837 462 +random 1 34 3.401197 3.401197 511 +product 1 33 3.433987 3.433987 527 +postal 1 30 3.555348 3.555348 580 +graph 1 30 3.555348 3.555348 576 +load 1 28 3.610918 3.610918 601 +utc 1 27 3.637586 3.637586 629 +revis 1 26 3.688879 3.688879 640 +bound 1 26 3.688879 3.688879 659 +lower 1 18 4.060443 4.060443 886 +expand 1 17 4.110874 4.110874 928 +role 1 14 4.317488 4.317488 1101 +balanc 1 14 4.317488 4.317488 1112 +weak 1 13 4.382027 4.382027 1159 +walk 1 12 4.465908 4.465908 1281 +paragraph 1 10 4.653960 4.653960 1449 +cryptographi 1 9 4.753590 4.753590 1512 +leader 1 9 4.753590 4.753590 1576 +preliminari 1 9 4.753590 4.753590 1480 +explicit 1 9 4.753590 4.753590 1525 +insert 1 8 4.875197 4.875197 1687 +elect 1 8 4.875197 4.875197 1771 +analys 1 8 4.875197 4.875197 1666 +combinatori 1 8 4.875197 4.875197 1629 +foc 1 7 5.010635 5.010635 1880 +hit 1 7 5.010635 5.010635 1965 +dimens 1 7 5.010635 5.010635 1930 +soda 1 6 5.164786 5.164786 2189 +determinist 1 6 5.164786 5.164786 2034 +stoc 1 5 5.347108 5.347108 2491 +mutual 1 5 5.347108 5.347108 2418 +asymptot 1 4 5.568345 5.568345 2676 +delet 1 4 5.568345 5.568345 2691 +exclus 1 4 5.568345 5.568345 2947 +intereststh 1 3 5.857933 5.857933 3838 +algorithmica 1 3 5.857933 5.857933 3561 +beat 1 3 5.857933 5.857933 3840 +eigenvalu 1 3 5.857933 5.857933 3364 +combinatorica 1 3 5.857933 5.857933 3649 +zuckermandavid 1 1 6.957497 6.957497 13044 +zuckermanassist 1 1 6.957497 6.957497 13045 +cryptographyresearch 1 1 6.957497 6.957497 13046 +myprofil 1 1 6.957497 6.957497 13047 +transposit 1 1 6.957497 6.957497 13048 +extractor 1 1 6.957497 6.957497 13049 +jcss 1 1 6.957497 6.957497 13050 +logspac 1 1 6.957497 6.957497 13051 +tight 1 1 6.957497 6.957497 13052 +derandom 1 1 6.957497 6.957497 13053 +constructionand 1 1 6.957497 6.957497 13054 +setfor 1 1 6.957497 6.957497 13055 +rectangl 1 1 6.957497 6.957497 13056 +sicomp 1 1 6.957497 6.957497 13043 +unapproxim 1 1 6.957497 6.957497 13057 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..471516df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +distribut 1 162 1.791759 1.791759 51 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +report 1 131 2.079442 2.079442 92 +send 1 114 2.197225 2.197225 109 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +wide 1 84 2.484907 2.484907 185 +member 1 84 2.484907 2.484907 165 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +would 1 67 2.708050 2.708050 251 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +content 1 59 2.833213 2.833213 302 +variou 1 56 2.890372 2.890372 317 +suggest 1 53 2.944439 2.944439 331 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +tabl 1 51 2.995732 2.995732 346 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +industri 1 38 3.295837 3.295837 464 +microsoft 1 38 3.295837 3.295837 468 +electr 1 38 3.295837 3.295837 461 +storag 1 31 3.496508 3.496508 553 +focus 1 29 3.583519 3.583519 584 +relev 1 26 3.688879 3.688879 637 +intel 1 16 4.174387 4.174387 1000 +audio 1 14 4.317488 4.317488 1094 +carri 1 13 4.382027 4.382027 1152 +nasa 1 13 4.382027 4.382027 1188 +departmentof 1 9 4.753590 4.753590 1539 +transport 1 8 4.875197 4.875197 1672 +hear 1 7 5.010635 5.010635 1940 +sponsor 1 6 5.164786 5.164786 2133 +multimediacomput 1 3 5.857933 5.857933 3841 +mitsubishi 1 3 5.857933 5.857933 3842 +merl 1 3 5.857933 5.857933 3843 +rangeof 1 2 6.263398 6.263398 4076 +federalinstitut 1 2 6.263398 6.263398 5539 +agenda 1 2 6.263398 6.263398 5037 +currentresearch 1 1 6.957497 6.957497 13058 +andmultiresolut 1 1 6.957497 6.957497 13059 +dmcl 1 1 6.957497 6.957497 13060 +microsystemsinc 1 1 6.957497 6.957497 13061 +yourcom 1 1 6.957497 6.957497 13062 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..ae482e77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +austin 1 168 1.791759 1.791759 63 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +orient 1 80 2.564949 2.564949 205 +appli 1 71 2.639057 2.639057 226 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +improv 1 62 2.772589 2.772589 289 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +programm 1 39 3.258097 3.258097 445 +microsoft 1 38 3.295837 3.295837 468 +cost 1 37 3.332205 3.332205 480 +product 1 33 3.433987 3.433987 527 +domain 1 30 3.555348 3.555348 564 +compon 1 30 3.555348 3.555348 570 +postal 1 30 3.555348 3.555348 580 +scale 1 28 3.610918 3.610918 613 +framework 1 28 3.610918 3.610918 606 +enhanc 1 26 3.688879 3.688879 644 +subject 1 26 3.688879 3.688879 647 +wai 1 25 3.737670 3.737670 662 +pattern 1 24 3.761200 3.761200 689 +methodolog 1 23 3.806662 3.806662 733 +reduc 1 22 3.850148 3.850148 759 +fund 1 21 3.912023 3.912023 805 +synthesi 1 20 3.951244 3.951244 834 +mainten 1 9 4.753590 4.753590 1543 +realiz 1 8 4.875197 4.875197 1739 +span 1 8 4.875197 4.875197 1751 +avion 1 4 5.568345 5.568345 3018 +darpa 1 4 5.568345 5.568345 2944 +batori 1 4 5.568345 5.568345 2690 +preprocessor 1 3 5.857933 5.857933 3844 +parameter 1 2 6.263398 6.263398 5540 +encapsul 1 2 6.263398 6.263398 5541 +batorydon 1 1 6.957497 6.957497 13064 +batorysoftwar 1 1 6.957497 6.957497 13065 +jakarta 1 1 6.957497 6.957497 13063 +pluggabl 1 1 6.957497 6.957497 13066 +schlumberg 1 1 6.957497 6.957497 13067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..16ca8231 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +process 1 142 1.945910 1.945910 72 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +present 1 91 2.397895 2.397895 145 +info 1 85 2.484907 2.484907 176 +know 1 80 2.564949 2.564949 198 +symposium 1 72 2.639057 2.639057 238 +main 1 67 2.708050 2.708050 256 +favorit 1 44 3.135494 3.135494 410 +methodolog 1 23 3.806662 3.806662 733 +partit 1 16 4.174387 4.174387 984 +photograph 1 15 4.248495 4.248495 1056 +usavoic 1 13 4.382027 4.382027 1198 +road 1 11 4.553877 4.553877 1374 +insert 1 8 4.875197 4.875197 1687 +banerje 1 6 5.164786 5.164786 2018 +theintern 1 4 5.568345 5.568345 2981 +dwip 1 3 5.857933 5.857933 3197 +banerjeeabout 1 1 6.957497 6.957497 13068 +methi 1 1 6.957497 6.957497 13069 +programminggroup 1 1 6.957497 6.957497 13070 +graphicalparallel 1 1 6.957497 6.957497 13071 +departmentpost 1 1 6.957497 6.957497 13072 +homepost 1 1 6.957497 6.957497 13073 +enfield 1 1 6.957497 6.957497 13074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..9c59b1c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +first 1 140 1.945910 1.945910 71 +make 1 111 2.197225 2.197225 120 +thing 1 84 2.484907 2.484907 189 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +main 1 67 2.708050 2.708050 256 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +taylor 1 63 2.772589 2.772589 287 +summer 1 56 2.890372 2.890372 311 +think 1 57 2.890372 2.890372 314 +set 1 50 3.044522 3.044522 361 +right 1 48 3.044522 3.044522 363 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +multipl 1 39 3.258097 3.258097 453 +finish 1 22 3.850148 3.850148 748 +break 1 20 3.951244 3.951244 812 +els 1 19 4.007333 4.007333 843 +left 1 19 4.007333 4.007333 851 +item 1 19 4.007333 4.007333 856 +usavoic 1 13 4.382027 4.382027 1198 +paragraph 1 10 4.653960 4.653960 1449 +mepost 1 10 4.653960 4.653960 1472 +deadlin 1 9 4.753590 4.753590 1502 +siggraph 1 8 4.875197 4.875197 1773 +forget 1 8 4.875197 4.875197 1712 +promis 1 6 5.164786 5.164786 2037 +complaint 1 4 5.568345 5.568345 2795 +emilio 1 3 5.857933 5.857933 3683 +bout 1 3 5.857933 5.857933 3670 +credibl 1 3 5.857933 5.857933 3210 +decent 1 2 6.263398 6.263398 5542 +excus 1 2 6.263398 6.263398 4684 +camahort 1 1 6.957497 6.957497 13075 +gurrea 1 1 6.957497 6.957497 13076 +mmmmm 1 1 6.957497 6.957497 13077 +lose 1 1 6.957497 6.957497 13078 +ecamahor 1 1 6.957497 6.957497 13079 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..f07f7e59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +support 1 132 1.945910 1.945910 83 +summari 1 73 2.639057 2.639057 237 +multimedia 1 68 2.708050 2.708050 258 +view 1 70 2.708050 2.708050 254 +laboratori 1 63 2.772589 2.772589 292 +greg 1 24 3.761200 3.761200 695 +head 1 23 3.806662 3.806662 732 +supervis 1 20 3.951244 3.951244 840 +eduphon 1 15 4.248495 4.248495 1060 +consortium 1 10 4.653960 4.653960 1467 +harrick 1 7 5.010635 5.010635 1849 +multimediacomput 1 3 5.857933 5.857933 3841 +lavend 1 3 5.857933 5.857933 3217 +posnak 1 1 6.957497 6.957497 13080 +isod 1 1 6.957497 6.957497 13081 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..c20aa436 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +theori 1 111 2.197225 2.197225 127 +real 1 93 2.397895 2.397895 144 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +logic 1 71 2.639057 2.639057 230 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +practic 1 70 2.708050 2.708050 246 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +direct 1 57 2.890372 2.890372 316 +reason 1 57 2.890372 2.890372 318 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +theoret 1 39 3.258097 3.258097 446 +formal 1 37 3.332205 3.332205 478 +tree 1 36 3.367296 3.367296 492 +concurr 1 34 3.401197 3.401197 501 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +cambridg 1 16 4.174387 4.174387 1008 +automata 1 13 4.382027 4.382027 1135 +calculu 1 12 4.465908 4.465908 1203 +tempor 1 9 4.753590 4.753590 1584 +juan 1 9 4.753590 4.753590 1580 +secretari 1 8 4.875197 4.875197 1775 +quantit 1 8 4.875197 4.875197 1654 +satisfi 1 8 4.875197 4.875197 1694 +mass 1 8 4.875197 4.875197 1732 +centenni 1 7 5.010635 5.010635 1967 +foc 1 7 5.010635 5.010635 1880 +srinivasan 1 6 5.164786 5.164786 2175 +handbook 1 6 5.164786 5.164786 2061 +allen 1 5 5.347108 5.347108 2470 +emerson 1 5 5.347108 5.347108 2547 +infinit 1 4 5.568345 5.568345 2596 +elsevi 1 3 5.857933 5.857933 3671 +systemsselect 1 2 6.263398 6.263398 4049 +bakker 1 2 6.263398 6.263398 5337 +leeuwen 1 2 6.263398 6.263398 5543 +emersonbruton 1 1 6.957497 6.957497 13082 +publications 1 1 6.957497 6.957497 13083 +sistla 1 1 6.957497 6.957497 13084 +sadler 1 1 6.957497 6.957497 13085 +jutla 1 1 6.957497 6.957497 13086 +determinaci 1 1 6.957497 6.957497 13087 +modal 1 1 6.957497 6.957497 13088 +amsterdam 1 1 6.957497 6.957497 13089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..e4ac74be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +person 1 111 2.197225 2.197225 117 +code 1 108 2.197225 2.197225 116 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +user 1 104 2.302585 2.302585 137 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +graphic 1 90 2.397895 2.397895 147 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +info 1 85 2.484907 2.484907 176 +academ 1 82 2.484907 2.484907 178 +orient 1 80 2.564949 2.564949 205 +materi 1 75 2.639057 2.639057 221 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +dept 1 64 2.772589 2.772589 291 +taylor 1 63 2.772589 2.772589 287 +function 1 62 2.772589 2.772589 275 +abstract 1 62 2.772589 2.772589 276 +type 1 61 2.833213 2.833213 296 +unix 1 58 2.890372 2.890372 308 +extens 1 53 2.944439 2.944439 340 +visual 1 48 3.044522 3.044522 372 +describ 1 45 3.135494 3.135494 400 +textbook 1 44 3.135494 3.135494 397 +http 1 41 3.218876 3.218876 420 +join 1 39 3.258097 3.258097 457 +concept 1 32 3.465736 3.465736 537 +framework 1 28 3.610918 3.610918 606 +repres 1 26 3.688879 3.688879 656 +known 1 24 3.761200 3.761200 702 +compress 1 23 3.806662 3.806662 719 +lyco 1 19 4.007333 4.007333 871 +analyz 1 17 4.110874 4.110874 925 +macintosh 1 17 4.110874 4.110874 920 +near 1 14 4.317488 4.317488 1091 +composit 1 13 4.382027 4.382027 1150 +affili 1 13 4.382027 4.382027 1194 +brother 1 13 4.382027 4.382027 1189 +believ 1 13 4.382027 4.382027 1187 +doug 1 9 4.753590 4.753590 1517 +pure 1 8 4.875197 4.875197 1776 +accomplish 1 8 4.875197 4.875197 1755 +mirror 1 6 5.164786 5.164786 2028 +artist 1 6 5.164786 5.164786 2127 +haskel 1 4 5.568345 5.568345 2618 +aspir 1 4 5.568345 5.568345 3019 +berger 1 3 5.857933 5.857933 3702 +add 1 3 5.857933 5.857933 3131 +emeri 1 2 6.263398 6.263398 5515 +groupi 1 2 6.263398 6.263398 5544 +linksth 1 2 6.263398 6.263398 5545 +pageemeri 1 1 6.957497 6.957497 13093 +analyst 1 1 6.957497 6.957497 13090 +ticam 1 1 6.957497 6.957497 13091 +randomli 1 1 6.957497 6.957497 13094 +uttr 1 1 6.957497 6.957497 13095 +othermi 1 1 6.957497 6.957497 13096 +youngest 1 1 6.957497 6.957497 13097 +handiwork 1 1 6.957497 6.957497 13098 +systemtexbook 1 1 6.957497 6.957497 13099 +exchangegrac 1 1 6.957497 6.957497 13100 +evangelist 1 1 6.957497 6.957497 13092 +macaddict 1 1 6.957497 6.957497 13101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..3589dc94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +part 1 98 2.302585 2.302585 129 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +librari 1 87 2.484907 2.484907 181 +come 1 78 2.564949 2.564949 202 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +august 1 66 2.708050 2.708050 257 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +visit 1 63 2.772589 2.772589 288 +januari 1 62 2.772589 2.772589 264 +semest 1 58 2.890372 2.890372 312 +date 1 51 2.995732 2.995732 344 +compani 1 41 3.218876 3.218876 423 +small 1 39 3.258097 3.258097 447 +programm 1 39 3.258097 3.258097 445 +tutori 1 39 3.258097 3.258097 437 +industri 1 38 3.295837 3.295837 464 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +repres 1 26 3.688879 3.688879 656 +try 1 22 3.850148 3.850148 764 +alumni 1 21 3.912023 3.912023 807 +love 1 21 3.912023 3.912023 804 +nice 1 20 3.951244 3.951244 809 +media 1 19 4.007333 4.007333 861 +attend 1 18 4.060443 4.060443 893 +lot 1 18 4.060443 4.060443 889 +jose 1 16 4.174387 4.174387 976 +club 1 15 4.248495 4.248495 1058 +fortran 1 15 4.248495 4.248495 1027 +eduphon 1 15 4.248495 4.248495 1060 +becam 1 14 4.317488 4.317488 1117 +francisco 1 14 4.317488 4.317488 1095 +opportun 1 13 4.382027 4.382027 1161 +market 1 11 4.553877 4.553877 1361 +branch 1 11 4.553877 4.553877 1318 +enter 1 10 4.653960 4.653960 1454 +telecommun 1 9 4.753590 4.753590 1565 +surpris 1 7 5.010635 5.010635 1828 +capit 1 7 5.010635 5.010635 1957 +vallei 1 7 5.010635 5.010635 1959 +south 1 6 5.164786 5.164786 2167 +deliv 1 6 5.164786 5.164786 2070 +silicon 1 6 5.164786 5.164786 2076 +girl 1 5 5.347108 5.347108 2410 +orlean 1 5 5.347108 5.347108 2550 +houston 1 5 5.347108 5.347108 2460 +planet 1 4 5.568345 5.568345 2912 +immedi 1 3 5.857933 5.857933 3117 +peac 1 3 5.857933 5.857933 3769 +emma 1 2 6.263398 6.263398 5546 +zhongshan 1 2 6.263398 6.263398 5547 +mini 1 2 6.263398 6.263398 5548 +magazinepc 1 2 6.263398 6.263398 5161 +wuabout 1 1 6.957497 6.957497 13102 +myselfhi 1 1 6.957497 6.957497 13103 +inibm 1 1 6.957497 6.957497 13104 +costom 1 1 6.957497 6.957497 13105 +manyalumni 1 1 6.957497 6.957497 13106 +instrumentsinc 1 1 6.957497 6.957497 13107 +computingmanag 1 1 6.957497 6.957497 13108 +informationautomat 1 1 6.957497 6.957497 13109 +baseyahoogalaxi 1 1 6.957497 6.957497 13110 +universityyellow 1 1 6.957497 6.957497 13111 +infoleisur 1 1 6.957497 6.957497 13112 +timenewspagepeopl 1 1 6.957497 6.957497 13113 +dailyartstim 1 1 6.957497 6.957497 13114 +magazinechines 1 1 6.957497 6.957497 13115 +magazinec 1 1 6.957497 6.957497 13116 +antoniosan 1 1 6.957497 6.957497 13117 +franciscomarina 1 1 6.957497 6.957497 13118 +citysan 1 1 6.957497 6.957497 13119 +pointemail 1 1 6.957497 6.957497 13120 +emmawu 1 1 6.957497 6.957497 13121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..62d9f665 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +first 1 140 1.945910 1.945910 71 +well 1 109 2.197225 2.197225 121 +main 1 67 2.708050 2.708050 256 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +break 1 20 3.951244 3.951244 812 +item 1 19 4.007333 4.007333 856 +easi 1 16 4.174387 4.174387 969 +usavoic 1 13 4.382027 4.382027 1198 +paragraph 1 10 4.653960 4.653960 1449 +mepost 1 10 4.653960 4.653960 1472 +forget 1 8 4.875197 4.875197 1712 +emilio 1 3 5.857933 5.857933 3683 +mehi 1 2 6.263398 6.263398 5549 +bullet 1 2 6.263398 6.263398 5500 +remolinaemilio 1 1 6.957497 6.957497 13122 +remolinaabout 1 1 6.957497 6.957497 13123 +eremolin 1 1 6.957497 6.957497 13124 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..9df0d045 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +locat 1 59 2.833213 2.833213 303 +http 1 41 3.218876 3.218876 420 +reach 1 24 3.761200 3.761200 688 +thank 1 23 3.806662 3.806662 721 +former 1 17 4.110874 4.110874 956 +universityof 1 15 4.248495 4.248495 1061 +sciencesat 1 7 5.010635 5.010635 1968 +levent 1 1 6.957497 6.957497 13125 +sayfasi 1 1 6.957497 6.957497 13126 +erkok 1 1 6.957497 6.957497 13127 +inturkei 1 1 6.957497 6.957497 13128 +ceng 1 1 6.957497 6.957497 13129 +metu 1 1 6.957497 6.957497 13130 +erkokto 1 1 6.957497 6.957497 13131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..8bd55e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +area 1 144 1.945910 1.945910 80 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +logic 1 71 2.639057 2.639057 230 +reason 1 57 2.890372 2.890372 318 +postal 1 30 3.555348 3.555348 580 +mind 1 27 3.637586 3.637586 632 +voic 1 21 3.912023 3.912023 806 +cognit 1 16 4.174387 4.174387 986 +thedepart 1 11 4.553877 4.553877 1350 +turkei 1 4 5.568345 5.568345 2914 +commonsens 1 4 5.568345 5.568345 2998 +children 1 3 5.857933 5.857933 3767 +andinform 1 2 6.263398 6.263398 5550 +monoton 1 2 6.263398 6.263398 5321 +esra 1 1 6.957497 6.957497 13132 +erdem 1 1 6.957497 6.957497 13133 +bilkent 1 1 6.957497 6.957497 13134 +learninginduct 1 1 6.957497 6.957497 13135 +sciencelearningreason 1 1 6.957497 6.957497 13136 +reasoningknowledg 1 1 6.957497 6.957497 13137 +representationemotionsphilosophi 1 1 6.957497 6.957497 13138 +mindcontact 1 1 6.957497 6.957497 13139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..476bce34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +perform 1 143 1.945910 1.945910 74 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +thing 1 84 2.484907 2.484907 189 +wide 1 84 2.484907 2.484907 185 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +degre 1 69 2.708050 2.708050 259 +main 1 67 2.708050 2.708050 256 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +extens 1 53 2.944439 2.944439 340 +life 1 50 3.044522 3.044522 375 +describ 1 45 3.135494 3.135494 400 +random 1 34 3.401197 3.401197 511 +art 1 29 3.583519 3.583519 593 +toward 1 25 3.737670 3.737670 668 +sport 1 25 3.737670 3.737670 683 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +scheme 1 20 3.951244 3.951244 818 +item 1 19 4.007333 4.007333 856 +carl 1 15 4.248495 4.248495 1024 +believ 1 13 4.382027 4.382027 1187 +usavoic 1 13 4.382027 4.382027 1198 +worth 1 11 4.553877 4.553877 1294 +rice 1 11 4.553877 4.553877 1336 +chronicl 1 7 5.010635 5.010635 1952 +athlet 1 7 5.010635 5.010635 1933 +myresum 1 6 5.164786 5.164786 2199 +snow 1 6 5.164786 5.164786 2031 +houston 1 5 5.347108 5.347108 2460 +wasn 1 3 5.857933 5.857933 3800 +dust 1 2 6.263398 6.263398 5551 +syntact 1 2 6.263398 6.263398 5552 +linksth 1 2 6.263398 6.263398 5545 +household 1 2 6.263398 6.263398 4920 +peak 1 2 6.263398 6.263398 5553 +pagestephen 1 1 6.957497 6.957497 13140 +carlpardon 1 1 6.957497 6.957497 13141 +planmi 1 1 6.957497 6.957497 13142 +interestsa 1 1 6.957497 6.957497 13143 +psuedo 1 1 6.957497 6.957497 13144 +dose 1 1 6.957497 6.957497 13145 +pike 1 1 6.957497 6.957497 13146 +bandget 1 1 6.957497 6.957497 13147 +touchpost 1 1 6.957497 6.957497 13148 +esteban 1 1 6.957497 6.957497 13149 +edureturn 1 1 6.957497 6.957497 13150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..e688264b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +check 1 115 2.197225 2.197225 118 +techniqu 1 99 2.302585 2.302585 138 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +control 1 82 2.484907 2.484907 164 +educ 1 86 2.484907 2.484907 191 +method 1 80 2.564949 2.564949 213 +effici 1 73 2.639057 2.639057 233 +knowledg 1 67 2.708050 2.708050 243 +improv 1 62 2.772589 2.772589 289 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +taylor 1 63 2.772589 2.772589 287 +detail 1 57 2.890372 2.890372 321 +combin 1 42 3.218876 3.218876 421 +vita 1 38 3.295837 3.295837 473 +postal 1 30 3.555348 3.555348 580 +accur 1 25 3.737670 3.737670 680 +induct 1 11 4.553877 4.553877 1304 +solver 1 7 5.010635 5.010635 1911 +analyt 1 7 5.010635 5.010635 1913 +machinelearn 1 6 5.164786 5.164786 2084 +groupth 1 5 5.347108 5.347108 2549 +myresearch 1 4 5.568345 5.568345 2842 +theperform 1 3 5.857933 5.857933 3262 +tara 1 2 6.263398 6.263398 5555 +researchinvolv 1 2 6.263398 6.263398 5556 +acquir 1 2 6.263398 6.263398 5557 +amparticularli 1 2 6.263398 6.263398 5558 +tulan 1 2 6.263398 6.263398 5559 +estlin 1 2 6.263398 6.263398 5554 +estlintara 1 1 6.957497 6.957497 13151 +estlinmachin 1 1 6.957497 6.957497 13152 +austinresearchcontrol 1 1 6.957497 6.957497 13153 +byguid 1 1 6.957497 6.957497 13154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..f9c149e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +find 1 111 2.197225 2.197225 111 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +theori 1 111 2.197225 2.197225 127 +user 1 104 2.302585 2.302585 137 +real 1 93 2.397895 2.397895 144 +issu 1 78 2.564949 2.564949 211 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +suggest 1 53 2.944439 2.944439 331 +directori 1 45 3.135494 3.135494 396 +join 1 39 3.258097 3.258097 457 +chines 1 29 3.583519 3.583519 595 +utc 1 27 3.637586 3.637586 629 +todai 1 25 3.737670 3.737670 672 +watch 1 21 3.912023 3.912023 789 +voic 1 21 3.912023 3.912023 806 +assum 1 19 4.007333 4.007333 845 +stop 1 17 4.110874 4.110874 942 +women 1 16 4.174387 4.174387 1004 +action 1 15 4.248495 4.248495 1038 +trip 1 14 4.317488 4.317488 1113 +central 1 13 4.382027 4.382027 1160 +suit 1 13 4.382027 4.382027 1129 +food 1 12 4.465908 4.465908 1285 +basketbal 1 12 4.465908 4.465908 1289 +market 1 11 4.553877 4.553877 1361 +noth 1 11 4.553877 4.553877 1328 +black 1 10 4.653960 4.653960 1418 +shop 1 10 4.653960 4.653960 1469 +mepost 1 10 4.653960 4.653960 1472 +swim 1 9 4.753590 4.753590 1599 +japan 1 8 4.875197 4.875197 1762 +foreign 1 7 5.010635 5.010635 1919 +troubl 1 6 5.164786 5.164786 2002 +knew 1 5 5.347108 5.347108 2445 +pack 1 3 5.857933 5.857933 3597 +legion 1 3 5.857933 5.857933 3708 +guadalup 1 3 5.857933 5.857933 3255 +francoi 1 2 6.263398 6.263398 4523 +forest 1 2 6.263398 6.263398 5368 +cake 1 2 6.263398 6.263398 5118 +forthcom 1 2 6.263398 6.263398 5392 +crawl 1 2 6.263398 6.263398 5561 +wisdom 1 2 6.263398 6.263398 4430 +polic 1 2 6.263398 6.263398 5560 +barbanson 1 1 6.957497 6.957497 13155 +versionhom 1 1 6.957497 6.957497 13157 +versionthi 1 1 6.957497 6.957497 13158 +spool 1 1 6.957497 6.957497 13159 +francoisabout 1 1 6.957497 6.957497 13160 +mecurr 1 1 6.957497 6.957497 13161 +genuin 1 1 6.957497 6.957497 13162 +pastri 1 1 6.957497 6.957497 13163 +fruit 1 1 6.957497 6.957497 13164 +mouss 1 1 6.957497 6.957497 13165 +groceri 1 1 6.957497 6.957497 13166 +shed 1 1 6.957497 6.957497 13167 +lighton 1 1 6.957497 6.957497 13168 +hyogo 1 1 6.957497 6.957497 13169 +tank 1 1 6.957497 6.957497 13156 +atdominion 1 1 6.957497 6.957497 13170 +hqcheck 1 1 6.957497 6.957497 13171 +dilberti 1 1 6.957497 6.957497 13172 +mentionthat 1 1 6.957497 6.957497 13173 +edufrancoi 1 1 6.957497 6.957497 13174 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..8917faec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +center 1 88 2.397895 2.397895 158 +graphic 1 90 2.397895 2.397895 147 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +appli 1 71 2.639057 2.639057 226 +laboratori 1 63 2.772589 2.772589 292 +http 1 41 3.218876 3.218876 420 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +progress 1 28 3.610918 3.610918 598 +director 1 22 3.850148 3.850148 767 +divis 1 21 3.912023 3.912023 803 +former 1 17 4.110874 4.110874 956 +social 1 13 4.382027 4.382027 1123 +donald 1 9 4.753590 4.753590 1510 +fussel 1 5 5.347108 5.347108 2300 +regent 1 5 5.347108 5.347108 2551 +dalla 1 4 5.568345 5.568345 2930 +crow 1 3 5.857933 5.857933 3845 +trammel 1 2 6.263398 6.263398 5562 +dartmouth 1 2 6.263398 6.263398 5393 +collegem 1 2 6.263398 6.263398 5563 +eduinform 1 1 6.957497 6.957497 13175 +fussellb 1 1 6.957497 6.957497 13176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..4e2c6358 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +document 1 121 2.079442 2.079442 89 +find 1 111 2.197225 2.197225 111 +start 1 83 2.484907 2.484907 173 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +go 1 33 3.433987 3.433987 529 +someth 1 31 3.496508 3.496508 554 +anyth 1 16 4.174387 4.174387 998 +georg 1 16 4.174387 4.174387 994 +lane 1 8 4.875197 4.875197 1720 +pagethi 1 5 5.347108 5.347108 2336 +ajit 1 3 5.857933 5.857933 3299 +eduher 1 3 5.857933 5.857933 3499 +useless 1 2 6.263398 6.263398 5564 +odd 1 2 6.263398 6.263398 5565 +georgemi 1 1 6.957497 6.957497 13177 +youand 1 1 6.957497 6.957497 13178 +goodthat 1 1 6.957497 6.957497 13179 +wickersham 1 1 6.957497 6.957497 13180 +gajit 1 1 6.957497 6.957497 13181 +foundus 1 1 6.957497 6.957497 13182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..46290ac6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +technolog 1 131 2.079442 2.079442 102 +institut 1 84 2.484907 2.484907 187 +still 1 50 3.044522 3.044522 362 +india 1 32 3.465736 3.465736 550 +art 1 29 3.583519 3.583519 593 +try 1 22 3.850148 3.850148 764 +indian 1 22 3.850148 3.850148 769 +medic 1 17 4.110874 4.110874 958 +undergrad 1 9 4.753590 4.753590 1589 +kanpur 1 8 4.875197 4.875197 1744 +river 1 6 5.164786 5.164786 2220 +arora 1 4 5.568345 5.568345 2658 +mehom 1 4 5.568345 5.568345 2979 +oak 1 2 6.263398 6.263398 5566 +geeta 1 1 6.957497 6.957497 13183 +tofigur 1 1 6.957497 6.957497 13184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..a162b6f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +click 1 142 1.945910 1.945910 78 +number 1 130 2.079442 2.079442 97 +final 1 116 2.197225 2.197225 108 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +help 1 83 2.484907 2.484907 175 +novemb 1 81 2.484907 2.484907 179 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +receiv 1 66 2.708050 2.708050 244 +suggest 1 53 2.944439 2.944439 331 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +kind 1 32 3.465736 3.465736 541 +art 1 29 3.583519 3.583519 593 +actual 1 28 3.610918 3.610918 604 +quit 1 27 3.637586 3.637586 633 +wish 1 24 3.761200 3.761200 692 +head 1 23 3.806662 3.806662 732 +medic 1 17 4.110874 4.110874 958 +critic 1 16 4.174387 4.174387 982 +easi 1 16 4.174387 4.174387 969 +mayb 1 15 4.248495 4.248495 1014 +decid 1 14 4.317488 4.317488 1075 +earlier 1 13 4.382027 4.382027 1140 +minimum 1 9 4.753590 4.753590 1555 +mass 1 8 4.875197 4.875197 1732 +risk 1 8 4.875197 4.875197 1689 +sleep 1 6 5.164786 5.164786 2211 +merit 1 5 5.347108 5.347108 2466 +gokul 1 4 5.568345 5.568345 2668 +countless 1 4 5.568345 5.568345 3020 +flame 1 3 5.857933 5.857933 3696 +gripe 1 3 5.857933 5.857933 3257 +democrat 1 2 6.263398 6.263398 5567 +plakal 1 2 6.263398 6.263398 5568 +perfectli 1 2 6.263398 6.263398 5569 +theexcess 1 1 6.957497 6.957497 13185 +verbos 1 1 6.957497 6.957497 13186 +thecollect 1 1 6.957497 6.957497 13187 +putonli 1 1 6.957497 6.957497 13188 +barest 1 1 6.957497 6.957497 13189 +adieu 1 1 6.957497 6.957497 13190 +outpour 1 1 6.957497 6.957497 13191 +hag 1 1 6.957497 6.957497 13192 +hopey 1 1 6.957497 6.957497 13193 +untroubl 1 1 6.957497 6.957497 13194 +conscienc 1 1 6.957497 6.957497 13195 +untim 1 1 6.957497 6.957497 13196 +demis 1 1 6.957497 6.957497 13197 +vitriol 1 1 6.957497 6.957497 13198 +reinstat 1 1 6.957497 6.957497 13199 +signin 1 1 6.957497 6.957497 13200 +lesscrit 1 1 6.957497 6.957497 13201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..601f80ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +number 1 130 2.079442 2.079442 97 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +text 1 98 2.302585 2.302585 133 +present 1 91 2.397895 2.397895 145 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +thing 1 84 2.484907 2.484907 189 +control 1 82 2.484907 2.484907 164 +know 1 80 2.564949 2.564949 198 +state 1 76 2.564949 2.564949 207 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +order 1 69 2.708050 2.708050 249 +would 1 67 2.708050 2.708050 251 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +friend 1 48 3.044522 3.044522 376 +life 1 50 3.044522 3.044522 375 +cool 1 49 3.044522 3.044522 374 +without 1 50 3.044522 3.044522 370 +made 1 44 3.135494 3.135494 398 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +futur 1 41 3.218876 3.218876 427 +continu 1 39 3.258097 3.258097 448 +join 1 39 3.258097 3.258097 457 +winter 1 36 3.367296 3.367296 500 +especi 1 36 3.367296 3.367296 496 +game 1 36 3.367296 3.367296 498 +everi 1 34 3.401197 3.401197 519 +board 1 33 3.433987 3.433987 528 +india 1 32 3.465736 3.465736 550 +travel 1 30 3.555348 3.555348 579 +option 1 30 3.555348 3.555348 575 +power 1 30 3.555348 3.555348 573 +art 1 29 3.583519 3.583519 593 +except 1 28 3.610918 3.610918 607 +weather 1 28 3.610918 3.610918 618 +usual 1 28 3.610918 3.610918 608 +propos 1 28 3.610918 3.610918 602 +great 1 27 3.637586 3.637586 626 +american 1 27 3.637586 3.637586 634 +background 1 25 3.737670 3.737670 664 +todai 1 25 3.737670 3.737670 672 +alwai 1 24 3.761200 3.761200 691 +famili 1 23 3.806662 3.806662 735 +born 1 21 3.912023 3.912023 798 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +tenni 1 20 3.951244 3.951244 838 +els 1 19 4.007333 4.007333 843 +ever 1 19 4.007333 4.007333 872 +spend 1 19 4.007333 4.007333 850 +listen 1 18 4.060443 4.060443 907 +intro 1 17 4.110874 4.110874 915 +sept 1 17 4.110874 4.110874 952 +bachelor 1 17 4.110874 4.110874 957 +thought 1 17 4.110874 4.110874 945 +medic 1 17 4.110874 4.110874 958 +transfer 1 16 4.174387 4.174387 967 +sign 1 16 4.174387 4.174387 970 +goe 1 15 4.248495 4.248495 1044 +hopefulli 1 14 4.317488 4.317488 1071 +becam 1 14 4.317488 4.317488 1117 +attribut 1 14 4.317488 4.317488 1092 +came 1 13 4.382027 4.382027 1197 +care 1 13 4.382027 4.382027 1177 +sai 1 13 4.382027 4.382027 1175 +guest 1 12 4.465908 4.465908 1220 +song 1 11 4.553877 4.553877 1380 +fill 1 11 4.553877 4.553877 1349 +success 1 10 4.653960 4.653960 1390 +hang 1 9 4.753590 4.753590 1499 +kumar 1 9 4.753590 4.753590 1506 +ball 1 9 4.753590 4.753590 1608 +gold 1 8 4.875197 4.875197 1745 +soccer 1 8 4.875197 4.875197 1752 +footbal 1 7 5.010635 5.010635 1912 +cricket 1 7 5.010635 5.010635 1945 +bore 1 7 5.010635 5.010635 1948 +golden 1 7 5.010635 5.010635 1962 +happen 1 7 5.010635 5.010635 1790 +southern 1 6 5.164786 5.164786 2191 +vari 1 6 5.164786 5.164786 2001 +alphabet 1 6 5.164786 5.164786 1980 +chat 1 6 5.164786 5.164786 2128 +curiou 1 5 5.347108 5.347108 2541 +chemic 1 5 5.347108 5.347108 2552 +valuabl 1 5 5.347108 5.347108 2256 +proud 1 4 5.568345 5.568345 2918 +batch 1 4 5.568345 5.568345 2700 +enrol 1 4 5.568345 5.568345 2613 +abraham 1 4 5.568345 5.568345 2644 +gokul 1 4 5.568345 5.568345 2668 +hide 1 4 5.568345 5.568345 2996 +bold 1 3 5.857933 5.857933 3846 +cold 1 3 5.857933 5.857933 3637 +acad 1 3 5.857933 5.857933 3847 +hindi 1 3 5.857933 5.857933 3753 +narrow 1 3 5.857933 5.857933 3807 +gooti 1 2 6.263398 6.263398 4281 +subramanyam 1 2 6.263398 6.263398 4282 +somebodi 1 2 6.263398 6.263398 4463 +hyderabad 1 2 6.263398 6.263398 5570 +andhra 1 2 6.263398 6.263398 5571 +pradesh 1 2 6.263398 6.263398 5572 +osmania 1 2 6.263398 6.263398 5573 +nebraska 1 2 6.263398 6.263398 5574 +lincoln 1 2 6.263398 6.263398 5575 +addict 1 2 6.263398 6.263398 5576 +neeraj 1 2 6.263398 6.263398 5577 +shailesh 1 2 6.263398 6.263398 5578 +vipin 1 2 6.263398 6.263398 5579 +hideout 1 1 6.957497 6.957497 13202 +wont 1 1 6.957497 6.957497 13203 +disappoint 1 1 6.957497 6.957497 13204 +geographi 1 1 6.957497 6.957497 13205 +gala 1 1 6.957497 6.957497 13206 +wasjust 1 1 6.957497 6.957497 13207 +mehul 1 1 6.957497 6.957497 13208 +shantanu 1 1 6.957497 6.957497 13209 +likechess 1 1 6.957497 6.957497 13210 +carrom 1 1 6.957497 6.957497 13211 +racquet 1 1 6.957497 6.957497 13212 +definetli 1 1 6.957497 6.957497 13213 +horoscop 1 1 6.957497 6.957497 13214 +compatabil 1 1 6.957497 6.957497 13215 +destini 1 1 6.957497 6.957497 13216 +hardwork 1 1 6.957497 6.957497 13217 +dispos 1 1 6.957497 6.957497 13218 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..8ece2481 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +first 1 140 1.945910 1.945910 71 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +copi 1 63 2.772589 2.772589 284 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +date 1 51 2.995732 2.995732 344 +without 1 50 3.044522 3.044522 370 +protocol 1 45 3.135494 3.135494 407 +made 1 44 3.135494 3.135494 398 +term 1 43 3.178054 3.178054 411 +http 1 41 3.218876 3.218876 420 +must 1 40 3.258097 3.258097 442 +credit 1 38 3.295837 3.295837 460 +formal 1 37 3.332205 3.332205 478 +copyright 1 36 3.367296 3.367296 495 +survei 1 35 3.401197 3.401197 513 +word 1 34 3.401197 3.401197 508 +post 1 35 3.401197 3.401197 505 +posit 1 31 3.496508 3.496508 552 +profil 1 30 3.555348 3.555348 581 +hard 1 30 3.555348 3.555348 563 +full 1 28 3.610918 3.610918 615 +utc 1 27 3.637586 3.637586 629 +revis 1 26 3.688879 3.688879 640 +request 1 26 3.688879 3.688879 635 +notic 1 25 3.737670 3.737670 675 +other 1 24 3.761200 3.761200 697 +honor 1 23 3.806662 3.806662 729 +accept 1 18 4.060443 4.060443 879 +otherwis 1 17 4.110874 4.110874 922 +commerci 1 16 4.174387 4.174387 1005 +advantag 1 16 4.174387 4.174387 987 +permit 1 16 4.174387 4.174387 962 +evolv 1 12 4.465908 4.465908 1223 +grant 1 12 4.465908 4.465908 1216 +statement 1 11 4.553877 4.553877 1313 +submiss 1 11 4.553877 4.553877 1298 +exact 1 9 4.753590 4.753590 1509 +phrase 1 5 5.347108 5.347108 2242 +own 1 5 5.347108 5.347108 2531 +machineri 1 4 5.568345 5.568345 2851 +permiss 1 4 5.568345 5.568345 2642 +gouda 1 4 5.568345 5.568345 3021 +citat 1 3 5.857933 5.857933 3617 +sciencesaustin 1 3 5.857933 5.857933 3828 +argu 1 3 5.857933 5.857933 3698 +networkprotocol 1 3 5.857933 5.857933 3285 +moham 1 3 5.857933 5.857933 3848 +fornetwork 1 2 6.263398 6.263398 5580 +ordistribut 1 2 6.263398 6.263398 5581 +redistribut 1 2 6.263398 6.263398 5582 +goudaacm 1 1 6.957497 6.957497 13221 +surveysa 1 1 6.957497 6.957497 13222 +goudanetwork 1 1 6.957497 6.957497 13220 +specificationsand 1 1 6.957497 6.957497 13223 +pragmat 1 1 6.957497 6.957497 13219 +implementationsmoham 1 1 6.957497 6.957497 13224 +goudath 1 1 6.957497 6.957497 13225 +usagouda 1 1 6.957497 6.957497 13226 +htmlabstract 1 1 6.957497 6.957497 13227 +bridgeth 1 1 6.957497 6.957497 13228 +implementationsaddit 1 1 6.957497 6.957497 13229 +methodologypubl 1 1 6.957497 6.957497 13230 +digitalor 1 1 6.957497 6.957497 13231 +classroomus 1 1 6.957497 6.957497 13232 +profit 1 1 6.957497 6.957497 13233 +bearthi 1 1 6.957497 6.957497 13234 +forcompon 1 1 6.957497 6.957497 13235 +torepublish 1 1 6.957497 6.957497 13236 +requiresprior 1 1 6.957497 6.957497 13237 +frompubl 1 1 6.957497 6.957497 13238 +orpermiss 1 1 6.957497 6.957497 13239 +goudagouda 1 1 6.957497 6.957497 13240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..11371262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +report 1 131 2.079442 2.079442 92 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +check 1 115 2.197225 2.197225 118 +take 1 97 2.302585 2.302585 134 +imag 1 91 2.397895 2.397895 161 +meet 1 72 2.639057 2.639057 229 +would 1 67 2.708050 2.708050 251 +test 1 66 2.708050 2.708050 252 +collect 1 65 2.772589 2.772589 268 +plan 1 65 2.772589 2.772589 272 +best 1 59 2.833213 2.833213 299 +talk 1 53 2.944439 2.944439 336 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +friend 1 48 3.044522 3.044522 376 +visitor 1 49 3.044522 3.044522 371 +long 1 43 3.178054 3.178054 413 +author 1 39 3.258097 3.258097 450 +connect 1 37 3.332205 3.332205 485 +john 1 33 3.433987 3.433987 532 +product 1 33 3.433987 3.433987 527 +except 1 28 3.610918 3.610918 607 +minut 1 20 3.951244 3.951244 810 +log 1 19 4.007333 4.007333 857 +less 1 18 4.060443 4.060443 892 +matrix 1 17 4.110874 4.110874 933 +spars 1 16 4.174387 4.174387 989 +doesn 1 15 4.248495 4.248495 1055 +central 1 13 4.382027 4.382027 1160 +land 1 12 4.465908 4.465908 1273 +rememb 1 12 4.465908 4.465908 1217 +guess 1 10 4.653960 4.653960 1443 +latter 1 9 4.753590 4.753590 1522 +bore 1 7 5.010635 5.010635 1948 +oregon 1 5 5.347108 5.347108 2437 +glimps 1 4 5.568345 5.568345 2778 +plapack 1 3 5.857933 5.857933 3849 +redmond 1 3 5.857933 5.857933 3568 +mysteri 1 2 6.263398 6.263398 4715 +hail 1 2 6.263398 6.263398 5583 +gunnel 1 1 6.957497 6.957497 13241 +transpos 1 1 6.957497 6.957497 13242 +drank 1 1 6.957497 6.957497 13243 +depict 1 1 6.957497 6.957497 13244 +computationsif 1 1 6.957497 6.957497 13245 +pageam 1 1 6.957497 6.957497 13246 +towni 1 1 6.957497 6.957497 13247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..6592104a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +year 1 148 1.945910 1.945910 84 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +undergradu 1 54 2.944439 2.944439 338 +curriculum 1 33 3.433987 3.433987 535 +mellon 1 13 4.382027 4.382027 1179 +frank 1 9 4.753590 4.753590 1568 +junior 1 5 5.347108 5.347108 2519 +tropschuhfrank 1 1 6.957497 6.957497 13248 +tropschuh 1 1 6.957497 6.957497 13249 +gunther 1 1 6.957497 6.957497 13250 +schweiz 1 1 6.957497 6.957497 13251 +clayton 1 1 6.957497 6.957497 13252 +waldhofstrass 1 1 6.957497 6.957497 13253 +rheinfelden 1 1 6.957497 6.957497 13254 +vitaeenglishdeutschlinkscarnegi 1 1 6.957497 6.957497 13255 +universitterlangen 1 1 6.957497 6.957497 13256 +nrnberg 1 1 6.957497 6.957497 13257 +abroad 1 1 6.957497 6.957497 13258 +mathematisch 1 1 6.957497 6.957497 13259 +maschinen 1 1 6.957497 6.957497 13260 +datenverarbeitung 1 1 6.957497 6.957497 13261 +tropschuhgunth 1 1 6.957497 6.957497 13262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..0e71dc65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +place 1 106 2.197225 2.197225 124 +master 1 76 2.564949 2.564949 216 +name 1 72 2.639057 2.639057 220 +servic 1 72 2.639057 2.639057 236 +degre 1 69 2.708050 2.708050 259 +juli 1 60 2.833213 2.833213 305 +tabl 1 51 2.995732 2.995732 346 +directori 1 45 3.135494 3.135494 396 +china 1 37 3.332205 3.332205 487 +mine 1 26 3.688879 3.688879 654 +background 1 25 3.737670 3.737670 664 +hobbi 1 16 4.174387 4.174387 1009 +grant 1 12 4.465908 4.465908 1216 +birthdai 1 4 5.568345 5.568345 2800 +birth 1 3 5.857933 5.857933 3594 +addresspictur 1 2 6.263398 6.263398 5584 +ceremoni 1 2 6.263398 6.263398 5585 +jiangsu 1 2 6.263398 6.263398 5586 +yongxiang 1 1 6.957497 6.957497 13263 +pagemerri 1 1 6.957497 6.957497 13264 +christmashappi 1 1 6.957497 6.957497 13265 +homepagegao 1 1 6.957497 6.957497 13266 +yongxiangsever 1 1 6.957497 6.957497 13267 +pointsto 1 1 6.957497 6.957497 13268 +chinadepart 1 1 6.957497 6.957497 13269 +male 1 1 6.957497 6.957497 13270 +huanan 1 1 6.957497 6.957497 13271 +tenniseduc 1 1 6.957497 6.957497 13272 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..82964392 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +second 1 81 2.484907 2.484907 166 +term 1 43 3.178054 3.178054 411 +queri 1 33 3.433987 3.433987 524 +stop 1 17 4.110874 4.110874 942 +zhang 1 16 4.174387 4.174387 980 +gzhang 1 2 6.263398 6.263398 4183 +schoolth 1 1 6.957497 6.957497 13273 +semestercoursesc 1 1 6.957497 6.957497 13274 +linc 1 1 6.957497 6.957497 13275 +alvis 1 1 6.957497 6.957497 13276 +mirankerfil 1 1 6.957497 6.957497 13277 +databs 1 1 6.957497 6.957497 13278 +formthank 1 1 6.957497 6.957497 13279 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..7b921270 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +find 1 111 2.197225 2.197225 111 +mathemat 1 108 2.197225 2.197225 123 +techniqu 1 99 2.302585 2.302585 138 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +academ 1 82 2.484907 2.484907 178 +internet 1 83 2.484907 2.484907 186 +optim 1 79 2.564949 2.564949 197 +refer 1 78 2.564949 2.564949 203 +new 1 64 2.772589 2.772589 262 +complex 1 64 2.772589 2.772589 269 +prof 1 64 2.772589 2.772589 273 +visit 1 63 2.772589 2.772589 288 +room 1 59 2.833213 2.833213 301 +undergradu 1 54 2.944439 2.944439 338 +even 1 45 3.135494 3.135494 393 +directori 1 45 3.135494 3.135494 396 +theoret 1 39 3.258097 3.258097 446 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +industri 1 38 3.295837 3.295837 464 +china 1 37 3.332205 3.332205 487 +sciencesunivers 1 37 3.332205 3.332205 486 +staff 1 36 3.367296 3.367296 490 +award 1 34 3.401197 3.401197 523 +given 1 32 3.465736 3.465736 538 +focus 1 29 3.583519 3.583519 584 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +constraint 1 26 3.688879 3.688879 636 +head 1 23 3.806662 3.806662 732 +size 1 23 3.806662 3.806662 713 +period 1 22 3.850148 3.850148 743 +vlsi 1 21 3.912023 3.912023 795 +martin 1 21 3.912023 3.912023 794 +rout 1 21 3.912023 3.912023 793 +chen 1 21 3.912023 3.912023 791 +voic 1 21 3.912023 3.912023 806 +aid 1 18 4.060443 4.060443 904 +edulast 1 17 4.110874 4.110874 927 +jose 1 16 4.174387 4.174387 976 +tsinghua 1 13 4.382027 4.382027 1195 +israel 1 11 4.553877 4.553877 1366 +classmat 1 9 4.753590 4.753590 1516 +wong 1 9 4.753590 4.753590 1609 +combinatori 1 8 4.875197 4.875197 1629 +wire 1 8 4.875197 4.875197 1747 +uniform 1 7 5.010635 5.010635 1845 +delai 1 7 5.010635 5.010635 1848 +zhou 1 6 5.164786 5.164786 2092 +ture 1 6 5.164786 5.164786 1997 +alex 1 6 5.164786 5.164786 2130 +internationalconfer 1 6 5.164786 5.164786 2051 +bulletin 1 5 5.347108 5.347108 2343 +weizmann 1 4 5.568345 5.568345 2858 +kept 1 4 5.568345 5.568345 2762 +zhao 1 4 5.568345 5.568345 2699 +headlin 1 3 5.857933 5.857933 3710 +amir 1 3 5.857933 5.857933 3850 +mathematicallog 1 3 5.857933 5.857933 3796 +pagealan 1 2 6.263398 6.263398 5587 +compuer 1 2 6.263398 6.263398 4692 +researchgroup 1 2 6.263398 6.263398 5588 +pnueli 1 1 6.957497 6.957497 13280 +aprofessor 1 1 6.957497 6.957497 13281 +prestig 1 1 6.957497 6.957497 13282 +incompletelist 1 1 6.957497 6.957497 13283 +publicationshai 1 1 6.957497 6.957497 13284 +forriv 1 1 6.957497 6.957497 13285 +crosstalk 1 1 6.957497 6.957497 13286 +optimalnon 1 1 6.957497 6.957497 13287 +elmor 1 1 6.957497 6.957497 13288 +acmintern 1 1 6.957497 6.957497 13289 +austintaylor 1 1 6.957497 6.957497 13290 +staustin 1 1 6.957497 6.957497 13291 +haizhou 1 1 6.957497 6.957497 13292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..d4fc3c00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +nation 1 74 2.639057 2.639057 240 +dept 1 64 2.772589 2.772589 291 +visitor 1 49 3.044522 3.044522 371 +china 1 37 3.332205 3.332205 487 +univ 1 28 3.610918 3.610918 617 +alumni 1 21 3.912023 3.912023 807 +sept 1 17 4.110874 4.110874 952 +universityof 1 15 4.248495 4.248495 1061 +wait 1 13 4.382027 4.382027 1168 +departmentof 1 9 4.753590 4.753590 1539 +hear 1 7 5.010635 5.010635 1940 +wuhan 1 2 6.263398 6.263398 5589 +pal 1 2 6.263398 6.263398 4964 +myselfnow 1 1 6.957497 6.957497 13293 +pre 1 1 6.957497 6.957497 13294 +alumnihom 1 1 6.957497 6.957497 13295 +pagecontact 1 1 6.957497 6.957497 13296 +haosun 1 1 6.957497 6.957497 13297 +edunow 1 1 6.957497 6.957497 13298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..54ba4fb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +hall 1 146 1.945910 1.945910 65 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +call 1 91 2.397895 2.397895 153 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +nation 1 74 2.639057 2.639057 240 +view 1 70 2.708050 2.708050 254 +visit 1 63 2.772589 2.772589 288 +taylor 1 63 2.772589 2.772589 287 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +might 1 41 3.218876 3.218876 426 +author 1 39 3.258097 3.258097 450 +electr 1 38 3.295837 3.295837 461 +open 1 38 3.295837 3.295837 469 +utc 1 27 3.637586 3.637586 629 +honor 1 23 3.806662 3.806662 729 +finish 1 22 3.850148 3.850148 748 +divis 1 21 3.912023 3.912023 803 +grad 1 20 3.951244 3.951244 837 +minut 1 20 3.951244 3.951244 810 +thoma 1 18 4.060443 4.060443 901 +edulast 1 17 4.110874 4.110874 927 +fourth 1 16 4.174387 4.174387 999 +universityof 1 15 4.248495 4.248495 1061 +club 1 15 4.248495 4.248495 1058 +central 1 13 4.382027 4.382027 1160 +departmentof 1 9 4.753590 4.753590 1539 +volleybal 1 9 4.753590 4.753590 1598 +motorola 1 9 4.753590 4.753590 1546 +competit 1 8 4.875197 4.875197 1635 +contest 1 5 5.347108 5.347108 2273 +ioanni 1 5 5.347108 5.347108 2553 +champion 1 4 5.568345 5.568345 2982 +lanc 1 4 5.568345 5.568345 3022 +educlick 1 3 5.857933 5.857933 3612 +sawada 1 3 5.857933 5.857933 3190 +smaragdaki 1 3 5.857933 5.857933 3851 +tower 1 3 5.857933 5.857933 3818 +myfavorit 1 3 5.857933 5.857933 3852 +kansa 1 2 6.263398 6.263398 5591 +bowl 1 2 6.263398 6.263398 5417 +intramur 1 2 6.263398 6.263398 5590 +marathon 1 2 6.263398 6.263398 5592 +micheal 1 1 6.957497 6.957497 13299 +hewett 1 1 6.957497 6.957497 13300 +hewetthewett 1 1 6.957497 6.957497 13301 +fingerm 1 1 6.957497 6.957497 13302 +stanfordunivers 1 1 6.957497 6.957497 13303 +washburnunivers 1 1 6.957497 6.957497 13304 +collegiateprogram 1 1 6.957497 6.957497 13305 +wahlutc 1 1 6.957497 6.957497 13306 +hanoi 1 1 6.957497 6.957497 13307 +tokudaut 1 1 6.957497 6.957497 13308 +locatem 1 1 6.957497 6.957497 13309 +learnabout 1 1 6.957497 6.957497 13310 +downloadmi 1 1 6.957497 6.957497 13311 +learnmor 1 1 6.957497 6.957497 13312 +timefax 1 1 6.957497 6.957497 13313 +hewettemail 1 1 6.957497 6.957497 13314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..f32c44c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +activ 1 84 2.484907 2.484907 182 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +solut 1 82 2.484907 2.484907 162 +educ 1 86 2.484907 2.484907 191 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +interfac 1 79 2.564949 2.564949 209 +onlin 1 75 2.639057 2.639057 223 +write 1 72 2.639057 2.639057 222 +receiv 1 66 2.708050 2.708050 244 +simul 1 66 2.708050 2.708050 255 +java 1 70 2.708050 2.708050 248 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +function 1 62 2.772589 2.772589 275 +written 1 63 2.772589 2.772589 278 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +detail 1 57 2.890372 2.890372 321 +explor 1 58 2.890372 2.890372 324 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +life 1 50 3.044522 3.044522 375 +video 1 44 3.135494 3.135494 405 +natur 1 44 3.135494 3.135494 406 +http 1 41 3.218876 3.218876 420 +might 1 41 3.218876 3.218876 426 +fast 1 42 3.218876 3.218876 429 +live 1 40 3.258097 3.258097 451 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +game 1 36 3.367296 3.367296 498 +robot 1 36 3.367296 3.367296 497 +especi 1 36 3.367296 3.367296 496 +product 1 33 3.433987 3.433987 527 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +anim 1 31 3.496508 3.496508 557 +rang 1 30 3.555348 3.555348 565 +common 1 30 3.555348 3.555348 574 +platform 1 29 3.583519 3.583519 591 +packag 1 28 3.610918 3.610918 614 +american 1 27 3.637586 3.637586 634 +client 1 25 3.737670 3.737670 679 +flow 1 24 3.761200 3.761200 700 +methodolog 1 23 3.806662 3.806662 733 +born 1 21 3.912023 3.912023 798 +unit 1 21 3.912023 3.912023 779 +busi 1 21 3.912023 3.912023 784 +theunivers 1 21 3.912023 3.912023 797 +five 1 19 4.007333 4.007333 841 +lisp 1 18 4.060443 4.060443 897 +seek 1 17 4.110874 4.110874 954 +edulast 1 17 4.110874 4.110874 927 +easi 1 16 4.174387 4.174387 969 +reflect 1 15 4.248495 4.248495 1034 +came 1 13 4.382027 4.382027 1197 +usavoic 1 13 4.382027 4.382027 1198 +assembl 1 12 4.465908 4.465908 1207 +realiti 1 12 4.465908 4.465908 1272 +resid 1 10 4.653960 4.653960 1461 +placement 1 10 4.653960 4.653960 1420 +mepost 1 10 4.653960 4.653960 1472 +novak 1 9 4.753590 4.753590 1521 +poetri 1 9 4.753590 4.753590 1596 +cross 1 8 4.875197 4.875197 1703 +realist 1 8 4.875197 4.875197 1665 +port 1 8 4.875197 4.875197 1766 +textur 1 8 4.875197 4.875197 1677 +spot 1 7 5.010635 5.010635 1894 +contract 1 6 5.164786 5.164786 1985 +gordon 1 6 5.164786 5.164786 2032 +ousterhout 1 5 5.347108 5.347108 2301 +outdoor 1 5 5.347108 5.347108 2514 +havedevelop 1 4 5.568345 5.568345 2681 +vrml 1 4 5.568345 5.568345 2949 +legion 1 3 5.857933 5.857933 3708 +flat 1 3 5.857933 5.857933 3853 +moredetail 1 3 5.857933 5.857933 3854 +expertis 1 3 5.857933 5.857933 3321 +leverag 1 3 5.857933 5.857933 3153 +vietnames 1 2 6.263398 6.263398 5593 +occup 1 2 6.263398 6.263398 5169 +akcl 1 2 6.263398 6.263398 4796 +standalon 1 2 6.263398 6.263398 4077 +researchwith 1 2 6.263398 6.263398 5594 +sdsc 1 2 6.263398 6.263398 5199 +hiep 1 1 6.957497 6.957497 13315 +nguyenhiep 1 1 6.957497 6.957497 13320 +nguyenabout 1 1 6.957497 6.957497 13321 +meabout 1 1 6.957497 6.957497 13322 +texasfor 1 1 6.957497 6.957497 13323 +providinghigh 1 1 6.957497 6.957497 13324 +hypertextresum 1 1 6.957497 6.957497 13325 +con 1 1 6.957497 6.957497 13326 +nsplace 1 1 6.957497 6.957497 13327 +rexi 1 1 6.957497 6.957497 13328 +emptiv 1 1 6.957497 6.957497 13329 +gdraw 1 1 6.957497 6.957497 13330 +xwindow 1 1 6.957497 6.957497 13316 +specular 1 1 6.957497 6.957497 13331 +sonar 1 1 6.957497 6.957497 13332 +xgcl 1 1 6.957497 6.957497 13333 +xakcl 1 1 6.957497 6.957497 13334 +gunu 1 1 6.957497 6.957497 13317 +anonlin 1 1 6.957497 6.957497 13335 +andmaintain 1 1 6.957497 6.957497 13336 +netrek 1 1 6.957497 6.957497 13318 +currentlyact 1 1 6.957497 6.957497 13337 +internetsoftwar 1 1 6.957497 6.957497 13338 +mappingroutin 1 1 6.957497 6.957497 13339 +potteri 1 1 6.957497 6.957497 13340 +factoryx 1 1 6.957497 6.957497 13319 +vrmlto 1 1 6.957497 6.957497 13341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..82a5120a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +tool 1 117 2.079442 2.079442 93 +welcom 1 122 2.079442 2.079442 99 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +book 1 99 2.302585 2.302585 131 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +member 1 84 2.484907 2.484907 165 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +ieee 1 86 2.484907 2.484907 190 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +java 1 70 2.708050 2.708050 248 +guid 1 63 2.772589 2.772589 267 +new 1 64 2.772589 2.772589 262 +virtual 1 62 2.772589 2.772589 285 +collect 1 65 2.772589 2.772589 268 +taylor 1 63 2.772589 2.772589 287 +unix 1 58 2.890372 2.890372 308 +directori 1 45 3.135494 3.135494 396 +societi 1 40 3.258097 3.258097 456 +expect 1 37 3.332205 3.332205 484 +manual 1 35 3.401197 3.401197 504 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +bookmark 1 26 3.688879 3.688879 639 +yahoo 1 24 3.761200 3.761200 707 +vlsi 1 21 3.912023 3.912023 795 +martin 1 21 3.912023 3.912023 794 +nice 1 20 3.951244 3.951244 809 +citi 1 19 4.007333 4.007333 874 +career 1 12 4.465908 4.465908 1287 +entertain 1 12 4.465908 4.465908 1286 +tour 1 11 4.553877 4.553877 1307 +perl 1 11 4.553877 4.553877 1332 +mosaic 1 10 4.653960 4.653960 1426 +wong 1 9 4.753590 4.753590 1609 +infoseek 1 6 5.164786 5.164786 2188 +giant 1 3 5.857933 5.857933 3137 +huiqun 1 2 6.263398 6.263398 4200 +sunris 1 2 6.263398 6.263398 5212 +rosett 1 2 6.263398 6.263398 5595 +hqliu 1 2 6.263398 6.263398 4199 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..8d97bcee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +postscript 1 131 2.079442 2.079442 90 +librari 1 87 2.484907 2.484907 181 +onlin 1 75 2.639057 2.639057 223 +knowledg 1 67 2.708050 2.708050 243 +main 1 67 2.708050 2.708050 256 +colleg 1 61 2.833213 2.833213 300 +thesi 1 57 2.890372 2.890372 327 +reason 1 57 2.890372 2.890372 318 +advisor 1 51 2.995732 2.995732 355 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +represent 1 35 3.401197 3.401197 512 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +rule 1 26 3.688879 3.688879 638 +action 1 15 4.248495 4.248495 1038 +english 1 15 4.248495 4.248495 1033 +draft 1 14 4.317488 4.317488 1085 +philosophi 1 13 4.382027 4.382027 1167 +usavoic 1 13 4.382027 4.382027 1198 +vladimir 1 11 4.553877 4.553877 1324 +mepost 1 10 4.653960 4.653960 1472 +tempor 1 9 4.753590 4.753590 1584 +colloquium 1 8 4.875197 4.875197 1734 +sciencesat 1 7 5.010635 5.010635 1968 +infer 1 6 5.164786 5.164786 2040 +causal 1 6 5.164786 5.164786 2024 +lifschitz 1 5 5.347108 5.347108 2542 +nonmonoton 1 4 5.568345 5.568345 3023 +interestscommonsens 1 2 6.263398 6.263398 5596 +actionlog 1 2 6.263398 6.263398 5597 +reasoningmi 1 2 6.263398 6.263398 5598 +hudson 1 1 6.957497 6.957497 13342 +pagehudson 1 1 6.957497 6.957497 13343 +turnerphd 1 1 6.957497 6.957497 13344 +ofcommonsens 1 1 6.957497 6.957497 13345 +msc 1 1 6.957497 6.957497 13346 +mli 1 1 6.957497 6.957497 13347 +linkseuropean 1 1 6.957497 6.957497 13348 +spatialand 1 1 6.957497 6.957497 13349 +reasoningto 1 1 6.957497 6.957497 13350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..b178e5f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +part 1 98 2.302585 2.302585 129 +center 1 88 2.397895 2.397895 158 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +ieee 1 86 2.484907 2.484907 190 +help 1 83 2.484907 2.484907 175 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +resum 1 79 2.564949 2.564949 217 +onlin 1 75 2.639057 2.639057 223 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +format 1 48 3.044522 3.044522 356 +compani 1 41 3.218876 3.218876 423 +littl 1 39 3.258097 3.258097 454 +travel 1 30 3.555348 3.555348 579 +full 1 28 3.610918 3.610918 615 +campu 1 27 3.637586 3.637586 623 +left 1 19 4.007333 4.007333 851 +beauti 1 18 4.060443 4.060443 912 +seek 1 17 4.110874 4.110874 954 +zhang 1 16 4.174387 4.174387 980 +career 1 12 4.465908 4.465908 1287 +lake 1 11 4.553877 4.553877 1373 +allen 1 5 5.347108 5.347108 2470 +blvd 1 4 5.568345 5.568345 3007 +yanbin 1 2 6.263398 6.263398 5599 +cutti 1 1 6.957497 6.957497 13352 +webmuseum 1 1 6.957497 6.957497 13353 +homeland 1 1 6.957497 6.957497 13354 +hyanbin 1 1 6.957497 6.957497 13351 +tarlor 1 1 6.957497 6.957497 13355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..271c4e08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +construct 1 139 1.945910 1.945910 82 +tool 1 117 2.079442 2.079442 93 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +graphic 1 90 2.397895 2.397895 147 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +degre 1 69 2.708050 2.708050 259 +creat 1 63 2.772589 2.772589 277 +summer 1 56 2.890372 2.890372 311 +undergradu 1 54 2.944439 2.944439 338 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +http 1 41 3.218876 3.218876 420 +compani 1 41 3.218876 3.218876 423 +small 1 39 3.258097 3.258097 447 +framework 1 28 3.610918 3.610918 606 +trace 1 25 3.737670 3.737670 677 +geometri 1 22 3.850148 3.850148 752 +concentr 1 18 4.060443 4.060443 906 +modular 1 10 4.653960 4.653960 1392 +univeristi 1 8 4.875197 4.875197 1754 +mass 1 8 4.875197 4.875197 1732 +solid 1 5 5.347108 5.347108 2255 +babi 1 5 5.347108 5.347108 2493 +isaac 1 3 5.857933 5.857933 3855 +coursework 1 3 5.857933 5.857933 3588 +sheldon 1 2 6.263398 6.263398 5226 +reciev 1 2 6.263398 6.263398 5600 +lowel 1 2 6.263398 6.263398 5224 +isheldon 1 1 6.957497 6.957497 13356 +reciv 1 1 6.957497 6.957497 13357 +unives 1 1 6.957497 6.957497 13358 +schlaeor 1 1 6.957497 6.957497 13359 +mellor 1 1 6.957497 6.957497 13360 +bsptree 1 1 6.957497 6.957497 13361 +butt 1 1 6.957497 6.957497 13362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..bd4fc43f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +internet 1 83 2.484907 2.484907 186 +taylor 1 63 2.772589 2.772589 287 +back 1 60 2.833213 2.833213 297 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +live 1 40 3.258097 3.258097 451 +jame 1 35 3.401197 3.401197 507 +john 1 33 3.433987 3.433987 532 +steve 1 29 3.583519 3.583519 594 +consult 1 24 3.761200 3.761200 687 +doctor 1 24 3.761200 3.761200 709 +alumni 1 21 3.912023 3.912023 807 +white 1 17 4.110874 4.110874 951 +carl 1 15 4.248495 4.248495 1024 +wife 1 13 4.382027 4.382027 1196 +rice 1 11 4.553877 4.553877 1336 +evan 1 8 4.875197 4.875197 1633 +matthew 1 6 5.164786 5.164786 2193 +fish 1 6 5.164786 5.164786 2207 +holli 1 2 6.263398 6.263398 5601 +dejanew 1 2 6.263398 6.263398 5602 +adair 1 1 6.957497 6.957497 13363 +crinkum 1 1 6.957497 6.957497 13364 +crankum 1 1 6.957497 6.957497 13365 +compound 1 1 6.957497 6.957497 13366 +eileen 1 1 6.957497 6.957497 13367 +mengerink 1 1 6.957497 6.957497 13368 +fanat 1 1 6.957497 6.957497 13369 +traylen 1 1 6.957497 6.957497 13370 +jadair 1 1 6.957497 6.957497 13371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..814a0e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +physic 1 47 3.091042 3.091042 377 +vita 1 38 3.295837 3.295837 473 +john 1 33 3.433987 3.433987 532 +chamber 1 8 4.875197 4.875197 1692 +yale 1 6 5.164786 5.164786 2003 +pagejohn 1 2 6.263398 6.263398 5603 +universityph 1 2 6.263398 6.263398 5604 +chamberssenior 1 1 6.957497 6.957497 13372 +specialistb 1 1 6.957497 6.957497 13373 +paso 1 1 6.957497 6.957497 13374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..0d43efa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,165 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +machin 1 129 2.079442 2.079442 95 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +level 1 87 2.484907 2.484907 180 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +complet 1 77 2.564949 2.564949 208 +orient 1 80 2.564949 2.564949 205 +resum 1 79 2.564949 2.564949 217 +line 1 75 2.639057 2.639057 231 +appli 1 71 2.639057 2.639057 226 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +test 1 66 2.708050 2.708050 252 +receiv 1 66 2.708050 2.708050 244 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +artifici 1 63 2.772589 2.772589 280 +function 1 62 2.772589 2.772589 275 +organ 1 65 2.772589 2.772589 265 +abstract 1 62 2.772589 2.772589 276 +result 1 65 2.772589 2.772589 281 +interact 1 62 2.772589 2.772589 270 +juli 1 60 2.833213 2.833213 305 +thesi 1 57 2.890372 2.890372 327 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +extens 1 53 2.944439 2.944439 340 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +finger 1 52 2.995732 2.995732 354 +visual 1 48 3.044522 3.044522 372 +principl 1 48 3.044522 3.044522 357 +format 1 48 3.044522 3.044522 356 +understand 1 47 3.091042 3.091042 384 +effect 1 46 3.091042 3.091042 385 +long 1 43 3.178054 3.178054 413 +offer 1 43 3.178054 3.178054 414 +past 1 42 3.218876 3.218876 428 +vision 1 41 3.218876 3.218876 430 +might 1 41 3.218876 3.218876 426 +examin 1 42 3.218876 3.218876 424 +probabl 1 40 3.258097 3.258097 455 +electr 1 38 3.295837 3.295837 461 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +human 1 32 3.465736 3.465736 546 +neural 1 30 3.555348 3.555348 578 +power 1 30 3.555348 3.555348 573 +domain 1 30 3.555348 3.555348 564 +propos 1 28 3.610918 3.610918 602 +measur 1 28 3.610918 3.610918 609 +mind 1 27 3.637586 3.637586 632 +enabl 1 26 3.688879 3.688879 655 +rather 1 26 3.688879 3.688879 642 +primari 1 25 3.737670 3.737670 669 +demonstr 1 24 3.761200 3.761200 694 +doctor 1 24 3.761200 3.761200 709 +input 1 23 3.806662 3.806662 727 +begin 1 23 3.806662 3.806662 716 +self 1 22 3.850148 3.850148 761 +serv 1 22 3.850148 3.850148 758 +thu 1 21 3.912023 3.912023 773 +similar 1 21 3.912023 3.912023 771 +basi 1 20 3.951244 3.951244 828 +predict 1 19 4.007333 4.007333 855 +log 1 19 4.007333 4.007333 857 +concentr 1 18 4.060443 4.060443 906 +failur 1 18 4.060443 4.060443 898 +figur 1 18 4.060443 4.060443 903 +seek 1 17 4.110874 4.110874 954 +thought 1 17 4.110874 4.110874 945 +explan 1 16 4.174387 4.174387 985 +spatial 1 16 4.174387 4.174387 988 +later 1 15 4.248495 4.248495 1043 +drive 1 15 4.248495 4.248495 1052 +ascii 1 15 4.248495 4.248495 1032 +command 1 14 4.317488 4.317488 1083 +philosophi 1 13 4.382027 4.382027 1167 +necessari 1 13 4.382027 4.382027 1147 +carri 1 13 4.382027 4.382027 1152 +incorpor 1 13 4.382027 4.382027 1163 +overal 1 12 4.465908 4.465908 1254 +qualit 1 11 4.553877 4.553877 1362 +equal 1 10 4.653960 4.653960 1424 +candid 1 9 4.753590 4.753590 1606 +preliminari 1 9 4.753590 4.753590 1480 +realist 1 8 4.875197 4.875197 1665 +brain 1 8 4.875197 4.875197 1638 +empir 1 8 4.875197 4.875197 1722 +miikkulainen 1 8 4.875197 4.875197 1667 +quantit 1 8 4.875197 4.875197 1654 +centuri 1 7 5.010635 5.010635 1935 +appar 1 7 5.010635 5.010635 1958 +aris 1 7 5.010635 5.010635 1924 +biolog 1 6 5.164786 5.164786 2147 +illus 1 4 5.568345 5.568345 2603 +insight 1 4 5.568345 5.568345 3024 +outdat 1 4 5.568345 5.568345 2797 +useth 1 3 5.857933 5.857933 3110 +dramat 1 3 5.857933 5.857933 3239 +cortic 1 3 5.857933 5.857933 3857 +cortex 1 3 5.857933 5.857933 3856 +neuron 1 3 5.857933 5.857933 3798 +frequenc 1 3 5.857933 5.857933 3206 +bednar 1 2 6.263398 6.263398 4283 +testabl 1 2 6.263398 6.263398 5606 +hypothes 1 2 6.263398 6.263398 5607 +nearli 1 2 6.263398 6.263398 5608 +lissom 1 2 6.263398 6.263398 5605 +sirosh 1 2 6.263398 6.263398 5609 +jbednar 1 2 6.263398 6.263398 4284 +bednarjim 1 1 6.957497 6.957497 13377 +ofcognit 1 1 6.957497 6.957497 13378 +fewdecad 1 1 6.957497 6.957497 13379 +thehuman 1 1 6.957497 6.957497 13380 +beavail 1 1 6.957497 6.957497 13381 +becomingpract 1 1 6.957497 6.957497 13382 +refut 1 1 6.957497 6.957497 13383 +makecognit 1 1 6.957497 6.957497 13384 +purelyphilosoph 1 1 6.957497 6.957497 13385 +tilt 1 1 6.957497 6.957497 13376 +aftereffect 1 1 6.957497 6.957497 13375 +psychologist 1 1 6.957497 6.957497 13386 +inhibit 1 1 6.957497 6.957497 13387 +indirect 1 1 6.957497 6.957497 13388 +visualbehavior 1 1 6.957497 6.957497 13389 +departmentmi 1 1 6.957497 6.957497 13390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..4d9a135e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +document 1 121 2.079442 2.079442 89 +number 1 130 2.079442 2.079442 97 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +librari 1 87 2.484907 2.484907 181 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +help 1 83 2.484907 2.484907 175 +stuff 1 87 2.484907 2.484907 171 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +resum 1 79 2.564949 2.564949 217 +sourc 1 77 2.564949 2.564949 201 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +test 1 66 2.708050 2.708050 252 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +guid 1 63 2.772589 2.772589 267 +special 1 56 2.890372 2.890372 320 +unix 1 58 2.890372 2.890372 308 +summer 1 56 2.890372 2.890372 311 +format 1 48 3.044522 3.044522 356 +visitor 1 49 3.044522 3.044522 371 +tutori 1 39 3.258097 3.258097 437 +domain 1 30 3.555348 3.555348 564 +packag 1 28 3.610918 3.610918 614 +linux 1 27 3.637586 3.637586 631 +utc 1 27 3.637586 3.637586 629 +administr 1 27 3.637586 3.637586 628 +jeff 1 25 3.737670 3.737670 673 +frame 1 24 3.761200 3.761200 684 +applet 1 20 3.951244 3.951244 827 +north 1 19 4.007333 4.007333 873 +excel 1 19 4.007333 4.007333 868 +debug 1 17 4.110874 4.110874 944 +transfer 1 16 4.174387 4.174387 967 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +pretti 1 13 4.382027 4.382027 1191 +america 1 11 4.553877 4.553877 1370 +catalog 1 10 4.653960 4.653960 1431 +novak 1 9 4.753590 4.753590 1521 +largest 1 7 5.010635 5.010635 1858 +privaci 1 6 5.164786 5.164786 2144 +ross 1 5 5.347108 5.347108 2243 +florida 1 5 5.347108 5.347108 2526 +automobil 1 3 5.857933 5.857933 3709 +ethernet 1 2 6.263398 6.263398 5171 +edmund 1 2 6.263398 6.263398 5213 +buyer 1 2 6.263398 6.263398 5210 +sceneri 1 2 6.263398 6.263398 5152 +ofjunfanghi 1 1 6.957497 6.957497 13391 +sysadm 1 1 6.957497 6.957497 13392 +unixish 1 1 6.957497 6.957497 13393 +kristina 1 1 6.957497 6.957497 13394 +jfang 1 1 6.957497 6.957497 13395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..f09f64e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +start 1 83 2.484907 2.484907 173 +resum 1 79 2.564949 2.564949 217 +good 1 77 2.564949 2.564949 200 +knowledg 1 67 2.708050 2.708050 243 +long 1 43 3.178054 3.178054 413 +probabl 1 40 3.258097 3.258097 455 +john 1 33 3.433987 3.433987 532 +someth 1 31 3.496508 3.496508 554 +chip 1 21 3.912023 3.912023 770 +enough 1 15 4.248495 4.248495 1040 +beer 1 6 5.164786 5.164786 2216 +sleep 1 6 5.164786 5.164786 2211 +chew 1 3 5.857933 5.857933 3618 +dog 1 2 6.263398 6.263398 5089 +swallow 1 2 6.263398 6.263398 5025 +priorjohn 1 1 6.957497 6.957497 13397 +priormi 1 1 6.957497 6.957497 13398 +accumul 1 1 6.957497 6.957497 13399 +hurt 1 1 6.957497 6.957497 13400 +nacho 1 1 6.957497 6.957497 13401 +jprior 1 1 6.957497 6.957497 13396 +swisher 1 1 6.957497 6.957497 13402 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..0efa81bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +technolog 1 131 2.079442 2.079442 102 +site 1 106 2.197225 2.197225 119 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +internet 1 83 2.484907 2.484907 186 +laboratori 1 63 2.772589 2.772589 292 +jeff 1 25 3.737670 3.737670 673 +thoma 1 18 4.060443 4.060443 901 +ultim 1 17 4.110874 4.110874 943 +keyword 1 11 4.553877 4.553877 1356 +trade 1 7 5.010635 5.010635 1815 +homepagejeff 1 1 6.957497 6.957497 13403 +homepagecontact 1 1 6.957497 6.957497 13404 +informationpublicationssoftwar 1 1 6.957497 6.957497 13405 +groupphoto 1 1 6.957497 6.957497 13406 +albumfavorit 1 1 6.957497 6.957497 13407 +sitesuniversityof 1 1 6.957497 6.957497 13408 +departmentappliedresearch 1 1 6.957497 6.957497 13409 +electricaland 1 1 6.957497 6.957497 13410 +departmentedsfinanci 1 1 6.957497 6.957497 13411 +fttc 1 1 6.957497 6.957497 13412 +utacademiccalendarsut 1 1 6.957497 6.957497 13413 +sportshook 1 1 6.957497 6.957497 13414 +longhorn 1 1 6.957497 6.957497 13415 +utfootbal 1 1 6.957497 6.957497 13416 +scheduleaustintexa 1 1 6.957497 6.957497 13417 +jthoma 1 1 6.957497 6.957497 13418 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..92e1b9d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +octob 1 89 2.397895 2.397895 156 +stuff 1 87 2.484907 2.484907 171 +journal 1 83 2.484907 2.484907 183 +collect 1 65 2.772589 2.772589 268 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +china 1 37 3.332205 3.332205 487 +weather 1 28 3.610918 3.610918 618 +todai 1 25 3.737670 3.737670 672 +highli 1 23 3.806662 3.806662 725 +alumni 1 21 3.912023 3.912023 807 +beij 1 19 4.007333 4.007333 876 +lot 1 18 4.060443 4.060443 889 +excit 1 11 4.553877 4.553877 1329 +perl 1 11 4.553877 4.553877 1332 +ataustin 1 9 4.753590 4.753590 1610 +pagecomput 1 7 5.010635 5.010635 1900 +peke 1 5 5.347108 5.347108 2539 +appreci 1 5 5.347108 5.347108 2374 +meyour 1 3 5.857933 5.857933 3858 +homepagewelcom 1 2 6.263398 6.263398 4808 +novelschines 1 2 6.263398 6.263398 5610 +registrar 1 2 6.263398 6.263398 5611 +gradaut 1 2 6.263398 6.263398 5612 +studiesut 1 2 6.263398 6.263398 5613 +magzin 1 2 6.263398 6.263398 5614 +technicalreport 1 2 6.263398 6.263398 5615 +visitorsinc 1 2 6.263398 6.263398 5616 +jiani 1 1 6.957497 6.957497 13419 +indepart 1 1 6.957497 6.957497 13420 +ofpek 1 1 6.957497 6.957497 13421 +chinesechines 1 1 6.957497 6.957497 13422 +scenerychines 1 1 6.957497 6.957497 13423 +classicschines 1 1 6.957497 6.957497 13424 +magazineschines 1 1 6.957497 6.957497 13425 +newspapersus 1 1 6.957497 6.957497 13426 +libraryut 1 1 6.957497 6.957497 13427 +campusutaccessabout 1 1 6.957497 6.957497 13428 +citylimit 1 1 6.957497 6.957497 13429 +miscellaneousyahoojava 1 1 6.957497 6.957497 13430 +sunjavascript 1 1 6.957497 6.957497 13431 +netscapeth 1 1 6.957497 6.957497 13432 +associationcomput 1 1 6.957497 6.957497 13433 +webnetwork 1 1 6.957497 6.957497 13434 +libraryth 1 1 6.957497 6.957497 13435 +bibliographiesintern 1 1 6.957497 6.957497 13436 +jyluo 1 1 6.957497 6.957497 13437 +suggestionswould 1 1 6.957497 6.957497 13438 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..aaabd1d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +know 1 80 2.564949 2.564949 198 +logic 1 71 2.639057 2.639057 230 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +reason 1 57 2.890372 2.890372 318 +advisor 1 51 2.995732 2.995732 355 +concurr 1 34 3.401197 3.401197 501 +semant 1 29 3.583519 3.583519 587 +todai 1 25 3.737670 3.737670 672 +doctor 1 24 3.761200 3.761200 709 +thank 1 23 3.806662 3.806662 721 +indian 1 22 3.850148 3.850148 769 +wonder 1 20 3.951244 3.951244 815 +lot 1 18 4.060443 4.060443 889 +bachelor 1 17 4.110874 4.110874 957 +came 1 13 4.382027 4.382027 1197 +tempor 1 9 4.753590 4.753590 1584 +madra 1 8 4.875197 4.875197 1770 +allen 1 5 5.347108 5.347108 2470 +emerson 1 5 5.347108 5.347108 2547 +mehi 1 2 6.263398 6.263398 5549 +kedar 1 1 6.957497 6.957497 13439 +namjoshiabout 1 1 6.957497 6.957497 13440 +distributedalgorithm 1 1 6.957497 6.957497 13441 +automatatheori 1 1 6.957497 6.957497 13442 +amul 1 1 6.957497 6.957497 13443 +adkedar 1 1 6.957497 6.957497 13444 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..54ee267c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +academ 1 82 2.484907 2.484907 178 +stuff 1 87 2.484907 2.484907 171 +wide 1 84 2.484907 2.484907 185 +resum 1 79 2.564949 2.564949 217 +servic 1 72 2.639057 2.639057 236 +view 1 70 2.708050 2.708050 254 +taylor 1 63 2.772589 2.772589 287 +faculti 1 56 2.890372 2.890372 325 +author 1 39 3.258097 3.258097 450 +respons 1 37 3.332205 3.332205 476 +staff 1 36 3.367296 3.367296 490 +board 1 33 3.433987 3.433987 528 +express 1 32 3.465736 3.465736 540 +linux 1 27 3.637586 3.637586 631 +facil 1 20 3.951244 3.951244 814 +reflect 1 15 4.248495 4.248495 1034 +kenneth 1 12 4.465908 4.465908 1265 +guest 1 12 4.465908 4.465908 1220 +babylon 1 8 4.875197 4.875197 1731 +opinion 1 8 4.875197 4.875197 1708 +necessarili 1 7 5.010635 5.010635 1899 +polit 1 6 5.164786 5.164786 2115 +regent 1 5 5.347108 5.347108 2551 +radio 1 4 5.568345 5.568345 3025 +sole 1 4 5.568345 5.568345 2592 +cyberspac 1 3 5.857933 5.857933 3719 +harker 1 1 6.957497 6.957497 13445 +kharker 1 1 6.957497 6.957497 13446 +amateur 1 1 6.957497 6.957497 13447 +rocketri 1 1 6.957497 6.957497 13448 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..2612a400 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +softwar 1 220 1.386294 1.386294 30 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +world 1 115 2.197225 2.197225 126 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +method 1 80 2.564949 2.564949 213 +david 1 71 2.639057 2.639057 232 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +degre 1 69 2.708050 2.708050 259 +organ 1 65 2.772589 2.772589 265 +sever 1 56 2.890372 2.890372 322 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +basic 1 50 3.044522 3.044522 360 +algebra 1 45 3.135494 3.135494 394 +anoth 1 45 3.135494 3.135494 408 +linear 1 41 3.218876 3.218876 431 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +focus 1 29 3.583519 3.583519 584 +session 1 26 3.688879 3.688879 643 +honor 1 23 3.806662 3.806662 729 +recognit 1 23 3.806662 3.806662 723 +equat 1 23 3.806662 3.806662 724 +variabl 1 23 3.806662 3.806662 715 +director 1 22 3.850148 3.850148 767 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +spars 1 16 4.174387 4.174387 989 +young 1 16 4.174387 4.174387 991 +senior 1 14 4.317488 4.317488 1120 +researchmi 1 14 4.317488 4.317488 1119 +polynomi 1 14 4.317488 4.317488 1069 +nasa 1 13 4.382027 4.382027 1188 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +congress 1 9 4.753590 4.753590 1592 +jersei 1 9 4.753590 4.753590 1587 +creativ 1 8 4.875197 4.875197 1777 +pacif 1 8 4.875197 4.875197 1674 +grove 1 8 4.875197 4.875197 1675 +edg 1 8 4.875197 4.875197 1647 +aris 1 7 5.010635 5.010635 1924 +brook 1 6 5.164786 5.164786 2152 +river 1 6 5.164786 5.164786 2220 +certif 1 3 5.857933 5.857933 3859 +imac 1 3 5.857933 5.857933 3718 +interestmathemat 1 3 5.857933 5.857933 3860 +ellipt 1 3 5.857933 5.857933 3774 +atlanta 1 3 5.857933 5.857933 3778 +stationari 1 3 5.857933 5.857933 3861 +kincaid 1 2 6.263398 6.263398 5617 +subprogram 1 2 6.263398 6.263398 5618 +cole 1 2 6.263398 6.263398 4697 +itpack 1 2 6.263398 6.263398 5619 +rassia 1 2 6.263398 6.263398 5620 +lecturerassoci 1 1 6.957497 6.957497 13449 +lamar 1 1 6.957497 6.957497 13450 +technicalinnov 1 1 6.957497 6.957497 13451 +andappli 1 1 6.957497 6.957497 13452 +coeffici 1 1 6.957497 6.957497 13453 +publicationsw 1 1 6.957497 6.957497 13454 +chenei 1 1 6.957497 6.957497 13455 +hay 1 1 6.957497 6.957497 13456 +coput 1 1 6.957497 6.957497 13457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..17ee21ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +academ 1 82 2.484907 2.484907 178 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +collect 1 65 2.772589 2.772589 268 +prof 1 64 2.772589 2.772589 273 +juli 1 60 2.833213 2.833213 305 +numer 1 49 3.044522 3.044522 369 +visitor 1 49 3.044522 3.044522 371 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +linear 1 41 3.218876 3.218876 431 +random 1 34 3.401197 3.401197 511 +administr 1 27 3.637586 3.637586 628 +mike 1 24 3.761200 3.761200 703 +busi 1 21 3.912023 3.912023 784 +particularli 1 19 4.007333 4.007333 867 +commerci 1 16 4.174387 4.174387 1005 +alan 1 13 4.382027 4.382027 1146 +emploi 1 12 4.465908 4.465908 1284 +walk 1 12 4.465908 4.465908 1281 +thedepart 1 11 4.553877 4.553877 1350 +ataustin 1 9 4.753590 4.753590 1610 +interestsi 1 7 5.010635 5.010635 1969 +misra 1 7 5.010635 5.010635 1856 +jayadev 1 4 5.568345 5.568345 3006 +kistler 1 3 5.857933 5.857933 3267 +syracus 1 3 5.857933 5.857933 3553 +cline 1 3 5.857933 5.857933 3218 +coursesfal 1 2 6.263398 6.263398 5225 +theperson 1 1 6.957497 6.957497 13458 +productsdivis 1 1 6.957497 6.957497 13459 +backgroundba 1 1 6.957497 6.957497 13460 +susquehanna 1 1 6.957497 6.957497 13461 +selinsgrov 1 1 6.957497 6.957497 13462 +stern 1 1 6.957497 6.957497 13463 +businessnew 1 1 6.957497 6.957497 13464 +iwith 1 1 6.957497 6.957497 13465 +algebrawith 1 1 6.957497 6.957497 13466 +pflugervil 1 1 6.957497 6.957497 13467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..8c119867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +time 1 293 1.098612 1.098612 17 +welcom 1 122 2.079442 2.079442 99 +sinc 1 90 2.397895 2.397895 159 +march 1 61 2.833213 2.833213 295 +jacob 1 4 5.568345 5.568345 2667 +kornerup 1 3 5.857933 5.857933 3215 +kornerupjacob 1 1 6.957497 6.957497 13468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..9915a348 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +spring 1 131 2.079442 2.079442 88 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +build 1 85 2.484907 2.484907 184 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +simul 1 66 2.708050 2.708050 255 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +colleg 1 61 2.833213 2.833213 300 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +describ 1 45 3.135494 3.135494 400 +press 1 42 3.218876 3.218876 419 +map 1 39 3.258097 3.258097 452 +robot 1 36 3.367296 3.367296 497 +represent 1 35 3.401197 3.401197 512 +limit 1 29 3.583519 3.583519 585 +strategi 1 25 3.737670 3.737670 682 +recognit 1 23 3.806662 3.806662 723 +emphasi 1 22 3.850148 3.850148 755 +expert 1 20 3.951244 3.951244 833 +agent 1 18 4.060443 4.060443 910 +spatial 1 16 4.174387 4.174387 988 +cognit 1 16 4.174387 4.174387 986 +cambridg 1 16 4.174387 4.174387 1008 +consider 1 14 4.317488 4.317488 1076 +benjamin 1 11 4.553877 4.553877 1296 +tour 1 11 4.553877 4.553877 1307 +qualit 1 11 4.553877 4.553877 1362 +incomplet 1 9 4.753590 4.753590 1575 +accomplish 1 8 4.875197 4.875197 1755 +centenni 1 7 5.010635 5.010635 1967 +distinct 1 5 5.347108 5.347108 2319 +commonsens 1 4 5.568345 5.568345 2998 +qsim 1 3 5.857933 5.857933 3862 +kuiper 1 3 5.857933 5.857933 3794 +swarthmor 1 2 6.263398 6.263398 5621 +thequalit 1 2 6.263398 6.263398 5622 +kuipersbenjamin 1 1 6.957497 6.957497 13469 +kuipersbruton 1 1 6.957497 6.957497 13470 +withparticular 1 1 6.957497 6.957497 13471 +grouphom 1 1 6.957497 6.957497 13472 +andavail 1 1 6.957497 6.957497 13473 +qualitativereason 1 1 6.957497 6.957497 13474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..b93d0225 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +assist 1 112 2.197225 2.197225 113 +ieee 1 86 2.484907 2.484907 190 +internet 1 83 2.484907 2.484907 186 +taylor 1 63 2.772589 2.772589 287 +laboratori 1 63 2.772589 2.772589 292 +februari 1 54 2.944439 2.944439 328 +electron 1 47 3.091042 3.091042 379 +transact 1 39 3.258097 3.258097 438 +sciencesunivers 1 37 3.332205 3.332205 486 +photo 1 31 3.496508 3.496508 561 +profil 1 30 3.555348 3.555348 581 +turn 1 29 3.583519 3.583519 586 +campu 1 27 3.637586 3.637586 623 +administr 1 27 3.637586 3.637586 628 +american 1 27 3.637586 3.637586 634 +compress 1 23 3.806662 3.806662 719 +eduphon 1 15 4.248495 4.248495 1060 +front 1 13 4.382027 4.382027 1154 +tune 1 12 4.465908 4.465908 1227 +editori 1 9 4.753590 4.753590 1611 +simon 1 8 4.875197 4.875197 1697 +clip 1 7 5.010635 5.010635 1868 +sciencesdepart 1 6 5.164786 5.164786 2020 +carbon 1 3 5.857933 5.857933 3804 +cont 1 3 5.857933 5.857933 3171 +toss 1 2 6.263398 6.263398 5470 +kata 1 1 6.957497 6.957497 13475 +submissionnew 1 1 6.957497 6.957497 13476 +empt 1 1 6.957497 6.957497 13477 +statesman 1 1 6.957497 6.957497 13478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..b514c389 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +commun 1 95 2.397895 2.397895 157 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +protocol 1 45 3.135494 3.135494 407 +secur 1 30 3.555348 3.555348 577 +fund 1 21 3.912023 3.912023 805 +entir 1 20 3.951244 3.951244 811 +verif 1 20 3.951244 3.951244 826 +supervis 1 20 3.951244 3.951244 840 +tune 1 12 4.465908 4.465908 1227 +cycl 1 11 4.553877 4.553877 1335 +underli 1 10 4.653960 4.653960 1410 +span 1 8 4.875197 4.875197 1751 +simon 1 8 4.875197 4.875197 1697 +lockhe 1 3 5.857933 5.857933 3863 +currentinterest 1 1 6.957497 6.957497 13479 +nsaunivers 1 1 6.957497 6.957497 13480 +videoservic 1 1 6.957497 6.957497 13481 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..483ff1c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +octob 1 89 2.397895 2.397895 156 +ofth 1 36 3.367296 3.367296 491 +robert 1 30 3.555348 3.555348 567 +edulast 1 17 4.110874 4.110874 927 +stori 1 14 4.317488 4.317488 1087 +convent 1 14 4.317488 4.317488 1072 +council 1 11 4.553877 4.553877 1364 +mountain 1 10 4.653960 4.653960 1456 +awai 1 10 4.653960 4.653960 1447 +christian 1 7 5.010635 5.010635 1949 +pageth 1 7 5.010635 5.010635 1939 +gordon 1 6 5.164786 5.164786 2032 +graham 1 4 5.568345 5.568345 2817 +republican 1 3 5.857933 5.857933 3815 +backbon 1 2 6.263398 6.263398 5623 +landrum 1 1 6.957497 6.957497 13482 +viruspictur 1 1 6.957497 6.957497 13483 +empirepch 1 1 6.957497 6.957497 13484 +retreattexa 1 1 6.957497 6.957497 13485 +rockrsumfamilyinterest 1 1 6.957497 6.957497 13486 +councillandrum 1 1 6.957497 6.957497 13487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..37960f1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +welcom 1 122 2.079442 2.079442 99 +assist 1 112 2.197225 2.197225 113 +activ 1 84 2.484907 2.484907 182 +improv 1 62 2.772589 2.772589 289 +electr 1 38 3.295837 3.295837 461 +greg 1 24 3.761200 3.761200 695 +recommend 1 22 3.850148 3.850148 737 +lavend 1 3 5.857933 5.857933 3217 +professordepart 1 2 6.263398 6.263398 5624 +biograph 1 2 6.263398 6.263398 5625 +austinr 1 1 6.957497 6.957497 13488 +lavenderadjunct 1 1 6.957497 6.957497 13489 +anddepart 1 1 6.957497 6.957497 13490 +engineeringth 1 1 6.957497 6.957497 13491 +informationsuggest 1 1 6.957497 6.957497 13492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..b66cddcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +welcom 1 122 2.079442 2.079442 99 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +master 1 76 2.564949 2.564949 216 +know 1 80 2.564949 2.564949 198 +come 1 78 2.564949 2.564949 202 +nation 1 74 2.639057 2.639057 240 +degre 1 69 2.708050 2.708050 259 +taylor 1 63 2.772589 2.772589 287 +locat 1 59 2.833213 2.833213 303 +much 1 52 2.995732 2.995732 349 +small 1 39 3.258097 3.258097 447 +jame 1 35 3.401197 3.401197 507 +particip 1 29 3.583519 3.583519 589 +campu 1 27 3.637586 3.637586 623 +equat 1 23 3.806662 3.806662 724 +vlsi 1 21 3.912023 3.912023 795 +north 1 19 4.007333 4.007333 873 +bachelor 1 17 4.110874 4.110874 957 +normal 1 16 4.174387 4.174387 995 +atth 1 15 4.248495 4.248495 1019 +month 1 15 4.248495 4.248495 1025 +hong 1 14 4.317488 4.317488 1105 +wife 1 13 4.382027 4.382027 1196 +island 1 11 4.553877 4.553877 1345 +kong 1 9 4.753590 4.753590 1602 +sciencesat 1 7 5.010635 5.010635 1968 +smile 1 7 5.010635 5.010635 1807 +singapor 1 5 5.347108 5.347108 2487 +aliv 1 3 5.857933 5.857933 3864 +disc 1 2 6.263398 6.263398 5626 +tropic 1 2 6.263398 6.263398 5398 +aboutthi 1 2 6.263398 6.263398 5627 +addr 1 2 6.263398 6.263398 5628 +pageyeap 1 1 6.957497 6.957497 13493 +designalgorithm 1 1 6.957497 6.957497 13494 +communityi 1 1 6.957497 6.957497 13495 +lovesto 1 1 6.957497 6.957497 13496 +leekk 1 1 6.957497 6.957497 13497 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..99bbbafb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +softwar 1 220 1.386294 1.386294 30 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +decemb 1 80 2.564949 2.564949 215 +main 1 67 2.708050 2.708050 256 +laboratori 1 63 2.772589 2.772589 292 +investig 1 51 2.995732 2.995732 353 +seminar 1 38 3.295837 3.295837 470 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +experiment 1 26 3.688879 3.688879 645 +wai 1 25 3.737670 3.737670 662 +less 1 18 4.060443 4.060443 892 +apart 1 7 5.010635 5.010635 1936 +distributedsystem 1 6 5.164786 5.164786 2022 +blumoferdb 1 5 5.347108 5.347108 2324 +oftexa 1 4 5.568345 5.568345 3003 +buildreli 1 1 6.957497 6.957497 13498 +projectsmemb 1 1 6.957497 6.957497 13499 +lablessss 1 1 6.957497 6.957497 13500 +seriessponsorslast 1 1 6.957497 6.957497 13501 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..d77ebf3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +memori 1 101 2.302585 2.302585 139 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +thing 1 84 2.484907 2.484907 189 +journal 1 83 2.484907 2.484907 183 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +import 1 65 2.772589 2.772589 282 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +simpl 1 60 2.833213 2.833213 298 +share 1 59 2.833213 2.833213 304 +scientif 1 53 2.944439 2.944439 341 +postal 1 30 3.555348 3.555348 580 +multiprocessor 1 28 3.610918 3.610918 605 +arrai 1 27 3.637586 3.637586 627 +supercomput 1 25 3.737670 3.737670 681 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +portabl 1 20 3.951244 3.951244 819 +comparison 1 19 4.007333 4.007333 863 +novel 1 15 4.248495 4.248495 1039 +conf 1 13 4.382027 4.382027 1181 +decomposit 1 10 4.653960 4.653960 1439 +calvin 1 9 4.753590 4.753590 1518 +austinaustin 1 7 5.010635 5.010635 1966 +banerje 1 6 5.164786 5.164786 2018 +explicitli 1 5 5.347108 5.347108 2308 +parallelprogram 1 5 5.347108 5.347108 2379 +snyder 1 5 5.347108 5.347108 2359 +publicationsth 1 4 5.568345 5.568345 2859 +polymorph 1 4 5.568345 5.568345 2627 +pete 1 3 5.857933 5.857933 3865 +accommod 1 3 5.857933 5.857933 3337 +parallelprocess 1 3 5.857933 5.857933 3626 +performanceanalysi 1 2 6.263398 6.263398 5629 +padua 1 2 6.263398 6.263398 4544 +lincalvin 1 1 6.957497 6.957497 13503 +linassist 1 1 6.957497 6.957497 13504 +sciencesth 1 1 6.957497 6.957497 13502 +iswhat 1 1 6.957497 6.957497 13505 +_study_ 1 1 6.957497 6.957497 13506 +_play_ 1 1 6.957497 6.957497 13507 +carrilresearch 1 1 6.957497 6.957497 13508 +interestscompil 1 1 6.957497 6.957497 13509 +biologyalgorithm 1 1 6.957497 6.957497 13510 +dikaiako 1 1 6.957497 6.957497 13511 +manoussaki 1 1 6.957497 6.957497 13512 +woodward 1 1 6.957497 6.957497 13513 +internationalparallel 1 1 6.957497 6.957497 13514 +sublanguag 1 1 6.957497 6.957497 13515 +compilersfor 1 1 6.957497 6.957497 13516 +gelernt 1 1 6.957497 6.957497 13517 +nicolau 1 1 6.957497 6.957497 13518 +withl 1 1 6.957497 6.957497 13519 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..23cd9d1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +welcom 1 122 2.079442 2.079442 99 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +real 1 93 2.397895 2.397895 144 +comment 1 93 2.397895 2.397895 146 +member 1 84 2.484907 2.484907 165 +good 1 77 2.564949 2.564949 200 +knowledg 1 67 2.708050 2.708050 243 +view 1 70 2.708050 2.708050 254 +result 1 65 2.772589 2.772589 281 +content 1 59 2.833213 2.833213 302 +summer 1 56 2.890372 2.890372 311 +mine 1 26 3.688879 3.688879 654 +theunivers 1 21 3.912023 3.912023 797 +permit 1 16 4.174387 4.174387 962 +replic 1 12 4.465908 4.465908 1231 +incomplet 1 9 4.753590 4.753590 1575 +researchi 1 8 4.875197 4.875197 1756 +apolog 1 6 5.164786 5.164786 2046 +guangtian 1 3 5.857933 5.857933 3810 +inconveni 1 3 5.857933 5.857933 3866 +internship 1 3 5.857933 5.857933 3764 +homepagehi 1 1 6.957497 6.957497 13521 +timeschedul 1 1 6.957497 6.957497 13522 +liugt 1 1 6.957497 6.957497 13520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..93c49e87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +taylor 1 63 2.772589 2.772589 287 +locat 1 59 2.833213 2.833213 303 +special 1 56 2.890372 2.890372 320 +physic 1 47 3.091042 3.091042 377 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +photo 1 31 3.496508 3.496508 561 +emphasi 1 22 3.850148 3.850148 755 +itali 1 11 4.553877 4.553877 1378 +interestsi 1 7 5.010635 5.010635 1969 +lorenzo 1 4 5.568345 5.568345 2588 +sytem 1 4 5.568345 5.568345 3015 +maria 1 4 5.568345 5.568345 2954 +alvisi 1 3 5.857933 5.857933 3095 +universit 1 2 6.263398 6.263398 5630 +bologna 1 2 6.263398 6.263398 5631 +laurea 1 1 6.957497 6.957497 13523 +agrav 1 1 6.957497 6.957497 13524 +taylorhal 1 1 6.957497 6.957497 13525 +campusshow 1 1 6.957497 6.957497 13526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..ecd073c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +site 1 106 2.197225 2.197225 119 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +internet 1 83 2.484907 2.484907 186 +ieee 1 86 2.484907 2.484907 190 +come 1 78 2.564949 2.564949 202 +onlin 1 75 2.639057 2.639057 223 +dept 1 64 2.772589 2.772589 291 +undergradu 1 54 2.944439 2.944439 338 +cool 1 49 3.044522 3.044522 374 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +live 1 40 3.258097 3.258097 451 +microsoft 1 38 3.295837 3.295837 468 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +weather 1 28 3.610918 3.610918 618 +univ 1 28 3.610918 3.610918 617 +campu 1 27 3.637586 3.637586 623 +thank 1 23 3.806662 3.806662 721 +corpor 1 21 3.912023 3.912023 802 +sigmod 1 19 4.007333 4.007333 877 +tsinghua 1 13 4.382027 4.382027 1195 +shop 1 10 4.653960 4.653960 1469 +siggraph 1 8 4.875197 4.875197 1773 +dictionari 1 8 4.875197 4.875197 1642 +hunt 1 7 5.010635 5.010635 1798 +sigcomm 1 5 5.347108 5.347108 2329 +sigir 1 2 6.263398 6.263398 4873 +addr 1 2 6.263398 6.263398 5628 +networksoth 1 1 6.957497 6.957497 13528 +studyut 1 1 6.957497 6.957497 13529 +universityaustin 1 1 6.957497 6.957497 13530 +siglink 1 1 6.957497 6.957497 13531 +sigmm 1 1 6.957497 6.957497 13532 +newsjob 1 1 6.957497 6.957497 13533 +forcast 1 1 6.957497 6.957497 13534 +luxu 1 1 6.957497 6.957497 13527 +xuelu 1 1 6.957497 6.957497 13535 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..b5b550d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +texa 1 160 1.791759 1.791759 64 +area 1 144 1.945910 1.945910 80 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +taylor 1 63 2.772589 2.772589 287 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +semest 1 58 2.890372 2.890372 312 +direct 1 57 2.890372 2.890372 316 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +committe 1 34 3.401197 3.401197 522 +john 1 33 3.433987 3.433987 532 +human 1 32 3.465736 3.465736 546 +chair 1 29 3.583519 3.583519 596 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +qualiti 1 20 3.951244 3.951244 832 +macintosh 1 17 4.110874 4.110874 920 +cognit 1 16 4.174387 4.174387 986 +researchmi 1 14 4.317488 4.317488 1119 +strength 1 9 4.753590 4.753590 1494 +metric 1 7 5.010635 5.010635 1831 +educurr 1 5 5.347108 5.347108 2504 +engineeringc 1 4 5.568345 5.568345 2904 +contemporari 1 4 5.568345 5.568345 2719 +presentarea 1 4 5.568345 5.568345 3026 +andsoftwar 1 4 5.568345 5.568345 2753 +werth 1 4 5.568345 5.568345 3004 +assur 1 4 5.568345 5.568345 2722 +ics 1 4 5.568345 5.568345 2779 +lauri 1 3 5.857933 5.857933 3867 +honour 1 2 6.263398 6.263398 5632 +werthlauri 1 1 6.957497 6.957497 13536 +werthlectur 1 1 6.957497 6.957497 13537 +lwerth 1 1 6.957497 6.957497 13538 +scienceprofession 1 1 6.957497 6.957497 13539 +servicevic 1 1 6.957497 6.957497 13540 +presentco 1 1 6.957497 6.957497 13541 +interestsoftwar 1 1 6.957497 6.957497 13542 +andenviron 1 1 6.957497 6.957497 13543 +publicationsl 1 1 6.957497 6.957497 13544 +tomayko 1 1 6.957497 6.957497 13545 +pagefaculti 1 1 6.957497 6.957497 13546 +profilesc 1 1 6.957497 6.957497 13547 +classeslast 1 1 6.957497 6.957497 13548 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..5d4e293c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +offic 1 299 1.098612 1.098612 13 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +hall 1 146 1.945910 1.945910 65 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +info 1 85 2.484907 2.484907 176 +taylor 1 63 2.772589 2.772589 287 +dept 1 64 2.772589 2.772589 291 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +author 1 39 3.258097 3.258097 450 +india 1 32 3.465736 3.465736 550 +univ 1 28 3.610918 3.610918 617 +comp 1 26 3.688879 3.688879 650 +reach 1 24 3.761200 3.761200 688 +offici 1 18 4.060443 4.060443 894 +avenu 1 12 4.465908 4.465908 1277 +madra 1 8 4.875197 4.875197 1770 +colloquium 1 8 4.875197 4.875197 1734 +cricket 1 7 5.010635 5.010635 1945 +oncomput 1 5 5.347108 5.347108 2326 +reddi 1 3 5.857933 5.857933 3277 +worldwid 1 3 5.857933 5.857933 3704 +madhukar 1 2 6.263398 6.263398 5633 +espnet 1 2 6.263398 6.263398 5634 +korupoluwelcom 1 1 6.957497 6.957497 13549 +ahom 1 1 6.957497 6.957497 13550 +madrashomepag 1 1 6.957497 6.957497 13551 +ganga 1 1 6.957497 6.957497 13552 +alumniclass 1 1 6.957497 6.957497 13553 +utalgorithm 1 1 6.957497 6.957497 13554 +sportszon 1 1 6.957497 6.957497 13555 +batchu 1 1 6.957497 6.957497 13556 +korupoluemail 1 1 6.957497 6.957497 13557 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..2cf07e76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +taylor 1 63 2.772589 2.772589 287 +simpl 1 60 2.833213 2.833213 298 +natur 1 44 3.135494 3.135494 406 +richard 1 31 3.496508 3.496508 559 +produc 1 30 3.555348 3.555348 572 +explan 1 16 4.174387 4.174387 985 +mallori 1 2 6.263398 6.263398 5635 +malloryrichard 1 1 6.957497 6.957497 13558 +malloryresearchthesi 1 1 6.957497 6.957497 13559 +quasi 1 1 6.957497 6.957497 13560 +qsimsimul 1 1 6.957497 6.957497 13561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..2e7787dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +state 1 76 2.564949 2.564949 207 +appear 1 78 2.564949 2.564949 210 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +publish 1 57 2.890372 2.890372 326 +talk 1 53 2.944439 2.944439 336 +particular 1 51 2.995732 2.995732 352 +principl 1 48 3.044522 3.044522 357 +protocol 1 45 3.135494 3.135494 407 +execut 1 45 3.135494 3.135494 404 +third 1 43 3.178054 3.178054 412 +annual 1 40 3.258097 3.258097 458 +submit 1 39 3.258097 3.258097 440 +author 1 39 3.258097 3.258097 450 +correct 1 38 3.295837 3.295837 462 +vita 1 38 3.295837 3.295837 473 +respons 1 37 3.332205 3.332205 476 +tree 1 36 3.367296 3.367296 492 +survei 1 35 3.401197 3.401197 513 +toler 1 33 3.433987 3.433987 533 +dissert 1 32 3.465736 3.465736 549 +fault 1 32 3.465736 3.465736 547 +titl 1 31 3.496508 3.496508 556 +postal 1 30 3.555348 3.555348 580 +consid 1 29 3.583519 3.583519 590 +intend 1 28 3.610918 3.610918 599 +toward 1 25 3.737670 3.737670 668 +flow 1 24 3.761200 3.761200 700 +initi 1 23 3.806662 3.806662 717 +decis 1 23 3.806662 3.806662 728 +self 1 22 3.850148 3.850148 761 +identifi 1 22 3.850148 3.850148 760 +rout 1 21 3.912023 3.912023 793 +prepar 1 20 3.951244 3.951244 824 +finit 1 14 4.317488 4.317488 1106 +step 1 13 4.382027 4.382027 1138 +earlier 1 13 4.382027 4.382027 1140 +joint 1 13 4.382027 4.382027 1130 +stai 1 12 4.465908 4.465908 1215 +label 1 10 4.653960 4.653960 1423 +guarante 1 10 4.653960 4.653960 1391 +invit 1 10 4.653960 4.653960 1428 +candid 1 9 4.753590 4.753590 1606 +occur 1 9 4.753590 4.753590 1572 +said 1 9 4.753590 4.753590 1571 +minimum 1 9 4.753590 4.753590 1555 +depth 1 8 4.875197 4.875197 1636 +span 1 8 4.875197 4.875197 1751 +converg 1 7 5.010635 5.010635 1844 +kluwer 1 6 5.164786 5.164786 2143 +stabil 1 5 5.347108 5.347108 2286 +marco 1 4 5.568345 5.568345 2589 +implicit 1 4 5.568345 5.568345 2830 +maximum 1 4 5.568345 5.568345 2632 +gouda 1 4 5.568345 5.568345 3021 +arora 1 4 5.568345 5.568345 2658 +moham 1 3 5.857933 5.857933 3848 +fifteenth 1 3 5.857933 5.857933 3868 +forev 1 2 6.263398 6.263398 5636 +schneidermarco 1 1 6.957497 6.957497 13564 +schneiderph 1 1 6.957497 6.957497 13565 +austinresearchth 1 1 6.957497 6.957497 13566 +itsstat 1 1 6.957497 6.957497 13567 +legitim 1 1 6.957497 6.957497 13562 +illegitim 1 1 6.957497 6.957497 13563 +whenregardless 1 1 6.957497 6.957497 13568 +systemwhich 1 1 6.957497 6.957497 13569 +tolerantr 1 1 6.957497 6.957497 13570 +anish 1 1 6.957497 6.957497 13571 +silent 1 1 6.957497 6.957497 13572 +shlomi 1 1 6.957497 6.957497 13573 +dolev 1 1 6.957497 6.957497 13574 +ctaylor 1 1 6.957497 6.957497 13575 +usamarco 1 1 6.957497 6.957497 13576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..b67d85f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +code 1 108 2.197225 2.197225 116 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +librari 1 87 2.484907 2.484907 181 +stuff 1 87 2.484907 2.484907 171 +orient 1 80 2.564949 2.564949 205 +addit 1 74 2.639057 2.639057 228 +taylor 1 63 2.772589 2.772589 287 +copi 1 63 2.772589 2.772589 284 +descript 1 64 2.772589 2.772589 271 +best 1 59 2.833213 2.833213 299 +semest 1 58 2.890372 2.890372 312 +allow 1 53 2.944439 2.944439 333 +finger 1 52 2.995732 2.995732 354 +run 1 51 2.995732 2.995732 347 +mark 1 44 3.135494 3.135494 403 +compani 1 41 3.218876 3.218876 423 +taught 1 33 3.433987 3.433987 526 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +full 1 28 3.610918 3.610918 615 +linux 1 27 3.637586 3.637586 631 +reach 1 24 3.761200 3.761200 688 +alloc 1 20 3.951244 3.951244 821 +along 1 18 4.060443 4.060443 878 +brief 1 16 4.174387 4.174387 1001 +intel 1 16 4.174387 4.174387 1000 +precis 1 15 4.248495 4.248495 1023 +appl 1 11 4.553877 4.553877 1303 +motorola 1 9 4.753590 4.753590 1546 +routin 1 9 4.753590 4.753590 1549 +oop 1 8 4.875197 4.875197 1778 +pentium 1 6 5.164786 5.164786 2077 +glenn 1 3 5.857933 5.857933 3869 +down 1 3 5.857933 5.857933 3870 +informationi 1 3 5.857933 5.857933 3871 +listof 1 3 5.857933 5.857933 3322 +publicli 1 3 5.857933 5.857933 3687 +isvia 1 2 6.263398 6.263398 5637 +johnston 1 2 6.263398 6.263398 5638 +somerset 1 2 6.263398 6.263398 5639 +johnstonemark 1 1 6.957497 6.957497 13577 +johnstonecontact 1 1 6.957497 6.957497 13578 +markj 1 1 6.957497 6.957497 13579 +byrichard 1 1 6.957497 6.957497 13580 +brice 1 1 6.957497 6.957497 13581 +analysisclass 1 1 6.957497 6.957497 13582 +somersetdesign 1 1 6.957497 6.957497 13583 +centerresearch 1 1 6.957497 6.957497 13584 +garbagecollector 1 1 6.957497 6.957497 13585 +ofstudi 1 1 6.957497 6.957497 13586 +dissertationpropos 1 1 6.957497 6.957497 13587 +timingof 1 1 6.957497 6.957497 13588 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..0e366112 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +click 1 142 1.945910 1.945910 78 +visit 1 63 2.772589 2.772589 288 +point 1 58 2.890372 2.890372 319 +friend 1 48 3.044522 3.044522 376 +mark 1 44 3.135494 3.135494 403 +markng 1 1 6.957497 6.957497 13589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..e7e59090 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +version 1 113 2.197225 2.197225 122 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +member 1 84 2.484907 2.484907 165 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +logic 1 71 2.639057 2.639057 230 +result 1 65 2.772589 2.772589 281 +prof 1 64 2.772589 2.772589 273 +taylor 1 63 2.772589 2.772589 287 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +archiv 1 49 3.044522 3.044522 364 +postal 1 30 3.555348 3.555348 580 +macintosh 1 17 4.110874 4.110874 920 +permit 1 16 4.174387 4.174387 962 +finit 1 14 4.317488 4.317488 1106 +verifi 1 12 4.465908 4.465908 1261 +worth 1 11 4.553877 4.553877 1294 +appl 1 11 4.553877 4.553877 1303 +incomplet 1 9 4.753590 4.753590 1575 +entri 1 8 4.875197 4.875197 1678 +researchi 1 8 4.875197 4.875197 1756 +misra 1 7 5.010635 5.010635 1856 +emerson 1 5 5.347108 5.347108 2547 +proposit 1 5 5.347108 5.347108 2339 +comprehens 1 4 5.568345 5.568345 2745 +marku 1 3 5.857933 5.857933 3872 +andwil 1 3 5.857933 5.857933 3335 +inconveni 1 3 5.857933 5.857933 3866 +groupand 1 3 5.857933 5.857933 3873 +uniti 1 3 5.857933 5.857933 3812 +isalso 1 2 6.263398 6.263398 5640 +kaltenbachmarku 1 1 6.957497 6.957497 13590 +kaltenbachintroductionwelcom 1 1 6.957497 6.957497 13591 +iapolog 1 1 6.957497 6.957497 13592 +spsp 1 1 6.957497 6.957497 13593 +stempor 1 1 6.957497 6.957497 13594 +checkerfor 1 1 6.957497 6.957497 13595 +avisit 1 1 6.957497 6.957497 13596 +theut 1 1 6.957497 6.957497 13597 +departmenthom 1 1 6.957497 6.957497 13598 +archivefor 1 1 6.957497 6.957497 13599 +sworld 1 1 6.957497 6.957497 13600 +supporthom 1 1 6.957497 6.957497 13601 +actansit 1 1 6.957497 6.957497 13602 +theatt 1 1 6.957497 6.957497 13603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..451f43fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +homepag 1 93 2.397895 2.397895 148 +stuff 1 87 2.484907 2.484907 171 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +taylor 1 63 2.772589 2.772589 287 +guid 1 63 2.772589 2.772589 267 +virtual 1 62 2.772589 2.772589 285 +local 1 55 2.944439 2.944439 334 +math 1 44 3.135494 3.135494 402 +kind 1 32 3.465736 3.465736 541 +postal 1 30 3.555348 3.555348 580 +neural 1 30 3.555348 3.555348 578 +utc 1 27 3.637586 3.637586 629 +demonstr 1 24 3.761200 3.761200 694 +daili 1 24 3.761200 3.761200 706 +displai 1 23 3.806662 3.806662 712 +applet 1 20 3.951244 3.951244 827 +hotlist 1 13 4.382027 4.382027 1199 +paus 1 4 5.568345 5.568345 2965 +mayberri 1 2 6.263398 6.263398 5641 +downtown 1 2 6.263398 6.263398 5642 +texan 1 2 6.263398 6.263398 5489 +memarti 1 1 6.957497 6.957497 13604 +researchal 1 1 6.957497 6.957497 13605 +martym 1 1 6.957497 6.957497 13606 +anywher 1 1 6.957497 6.957497 13607 +virtualc 1 1 6.957497 6.957497 13608 +internetrestaur 1 1 6.957497 6.957497 13609 +tnstechnolog 1 1 6.957497 6.957497 13610 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..f23a03ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +postscript 1 131 2.079442 2.079442 90 +onlin 1 75 2.639057 2.639057 223 +main 1 67 2.708050 2.708050 256 +colleg 1 61 2.833213 2.833213 300 +thesi 1 57 2.890372 2.890372 327 +reason 1 57 2.890372 2.890372 318 +advisor 1 51 2.995732 2.995732 355 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +titl 1 31 3.496508 3.496508 556 +action 1 15 4.248495 4.248495 1038 +philosophi 1 13 4.382027 4.382027 1167 +usavoic 1 13 4.382027 4.382027 1198 +vladimir 1 11 4.553877 4.553877 1324 +mepost 1 10 4.653960 4.653960 1472 +sciencesat 1 7 5.010635 5.010635 1968 +baker 1 7 5.010635 5.010635 1812 +causal 1 6 5.164786 5.164786 2024 +lifschitz 1 5 5.347108 5.347108 2542 +commonsens 1 4 5.568345 5.568345 2998 +nonmonoton 1 4 5.568345 5.568345 3023 +norm 1 2 6.263398 6.263398 5643 +kansa 1 2 6.263398 6.263398 5591 +interestscommonsens 1 2 6.263398 6.263398 5596 +actionlog 1 2 6.263398 6.263398 5597 +reasoningmi 1 2 6.263398 6.263398 5598 +mccain 1 1 6.957497 6.957497 13611 +mccainabout 1 1 6.957497 6.957497 13612 +mephd 1 1 6.957497 6.957497 13613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..5d6e2f2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +check 1 115 2.197225 2.197225 118 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +educ 1 86 2.484907 2.484907 191 +logic 1 71 2.639057 2.639057 230 +taylor 1 63 2.772589 2.772589 287 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +postal 1 30 3.555348 3.555348 580 +english 1 15 4.248495 4.248495 1033 +mari 1 12 4.465908 4.465908 1266 +acquisit 1 10 4.653960 4.653960 1465 +elain 1 5 5.347108 5.347108 2496 +groupunivers 1 3 5.857933 5.857933 3831 +austinresearchmi 1 2 6.263398 6.263398 5644 +formor 1 2 6.263398 6.263398 5335 +mecaliff 1 2 6.263398 6.263398 5645 +califfmari 1 1 6.957497 6.957497 13615 +califfmachin 1 1 6.957497 6.957497 13616 +especiallyinduct 1 1 6.957497 6.957497 13617 +baylor 1 1 6.957497 6.957497 13614 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..347549b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +base 1 165 1.791759 1.791759 50 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +technolog 1 131 2.079442 2.079442 102 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +question 1 91 2.397895 2.397895 141 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +larg 1 82 2.484907 2.484907 168 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +help 1 83 2.484907 2.484907 175 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +addit 1 74 2.639057 2.639057 228 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +would 1 67 2.708050 2.708050 251 +test 1 66 2.708050 2.708050 252 +differ 1 66 2.708050 2.708050 253 +simul 1 66 2.708050 2.708050 255 +improv 1 62 2.772589 2.772589 289 +result 1 65 2.772589 2.772589 281 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +plan 1 65 2.772589 2.772589 272 +collect 1 65 2.772589 2.772589 268 +automat 1 61 2.833213 2.833213 306 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +extens 1 53 2.944439 2.944439 340 +numer 1 49 3.044522 3.044522 369 +pointer 1 48 3.044522 3.044522 368 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +natur 1 44 3.135494 3.135494 406 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +past 1 42 3.218876 3.218876 428 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +multipl 1 39 3.258097 3.258097 453 +littl 1 39 3.258097 3.258097 454 +ofth 1 36 3.367296 3.367296 491 +especi 1 36 3.367296 3.367296 496 +jame 1 35 3.401197 3.401197 507 +represent 1 35 3.401197 3.401197 512 +concept 1 32 3.465736 3.465736 537 +kind 1 32 3.465736 3.465736 541 +express 1 32 3.465736 3.465736 540 +extend 1 32 3.465736 3.465736 539 +domain 1 30 3.555348 3.555348 564 +steve 1 29 3.583519 3.583519 594 +built 1 29 3.583519 3.583519 592 +retriev 1 27 3.637586 3.637586 621 +task 1 25 3.737670 3.737670 678 +concern 1 25 3.737670 3.737670 666 +jeff 1 25 3.737670 3.737670 673 +begin 1 23 3.806662 3.806662 716 +varieti 1 22 3.850148 3.850148 740 +fact 1 21 3.912023 3.912023 780 +similar 1 21 3.912023 3.912023 771 +alumni 1 21 3.912023 3.912023 807 +expert 1 20 3.951244 3.951244 833 +predict 1 19 4.007333 4.007333 855 +encourag 1 18 4.060443 4.060443 880 +event 1 18 4.060443 4.060443 896 +appropri 1 18 4.060443 4.060443 883 +aid 1 18 4.060443 4.060443 904 +expand 1 17 4.110874 4.110874 928 +otherwis 1 17 4.110874 4.110874 922 +explan 1 16 4.174387 4.174387 985 +normal 1 16 4.174387 4.174387 995 +carl 1 15 4.248495 4.248495 1024 +biologi 1 15 4.248495 4.248495 1049 +english 1 15 4.248495 4.248495 1033 +shown 1 14 4.317488 4.317488 1080 +charl 1 13 4.382027 4.382027 1149 +composit 1 13 4.382027 4.382027 1150 +bruce 1 12 4.465908 4.465908 1226 +brad 1 12 4.465908 4.465908 1264 +peter 1 11 4.553877 4.553877 1316 +eight 1 11 4.553877 4.553877 1331 +qualit 1 11 4.553877 4.553877 1362 +rich 1 10 4.653960 4.653960 1396 +custom 1 10 4.653960 4.653960 1414 +significantli 1 9 4.753590 4.753590 1508 +tutor 1 9 4.753590 4.753590 1552 +mainten 1 9 4.753590 4.753590 1543 +herefor 1 9 4.753590 4.753590 1483 +erik 1 8 4.875197 4.875197 1701 +largest 1 7 5.010635 5.010635 1858 +fred 1 6 5.164786 5.164786 2072 +viewpoint 1 6 5.164786 5.164786 2116 +biolog 1 6 5.164786 5.164786 2147 +ongo 1 6 5.164786 5.164786 2215 +porter 1 5 5.347108 5.347108 2293 +correl 1 5 5.347108 5.347108 2279 +oncomput 1 5 5.347108 5.347108 2326 +notabl 1 5 5.347108 5.347108 2276 +colleagu 1 5 5.347108 5.347108 2304 +focuss 1 5 5.347108 5.347108 2271 +desk 1 5 5.347108 5.347108 2297 +clark 1 4 5.568345 5.568345 2705 +knight 1 4 5.568345 5.568345 2728 +souther 1 3 5.857933 5.857933 3795 +multifunct 1 3 5.857933 5.857933 3826 +implicitli 1 3 5.857933 5.857933 3620 +qsim 1 3 5.857933 5.857933 3862 +proport 1 3 5.857933 5.857933 3293 +karl 1 3 5.857933 5.857933 3623 +boe 1 3 5.857933 5.857933 3318 +mallori 1 2 6.263398 6.263398 5635 +forconstruct 1 2 6.263398 6.263398 5649 +inon 1 2 6.263398 6.263398 4496 +arealso 1 2 6.263398 6.263398 5650 +knowledgebas 1 2 6.263398 6.263398 5136 +adequ 1 2 6.263398 6.263398 4116 +lexicon 1 2 6.263398 6.263398 5651 +bareiss 1 2 6.263398 6.263398 5646 +brant 1 2 6.263398 6.263398 5652 +murrai 1 2 6.263398 6.263398 5647 +rickel 1 2 6.263398 6.263398 5648 +aroundth 1 2 6.263398 6.263398 5653 +groupknowledg 1 1 6.957497 6.957497 13624 +prado 1 1 6.957497 6.957497 13618 +callawai 1 1 6.957497 6.957497 13620 +andersen 1 1 6.957497 6.957497 13621 +overviewour 1 1 6.957497 6.957497 13625 +atuniv 1 1 6.957497 6.957497 13626 +currentexpert 1 1 6.957497 6.957497 13627 +broadknowledg 1 1 6.957497 6.957497 13628 +toexplain 1 1 6.957497 6.957497 13629 +answeringa 1 1 6.957497 6.957497 13630 +formallyrepres 1 1 6.957497 6.957497 13631 +lester 1 1 6.957497 6.957497 13619 +thebiolog 1 1 6.957497 6.957497 13632 +andthos 1 1 6.957497 6.957497 13633 +beanswer 1 1 6.957497 6.957497 13634 +jeffrickel 1 1 6.957497 6.957497 13635 +taskof 1 1 6.957497 6.957497 13636 +thesimplest 1 1 6.957497 6.957497 13637 +dauntingrequir 1 1 6.957497 6.957497 13638 +manymodel 1 1 6.957497 6.957497 13639 +compilerand 1 1 6.957497 6.957497 13640 +bybuild 1 1 6.957497 6.957497 13641 +computingenviron 1 1 6.957497 6.957497 13642 +deskassist 1 1 6.957497 6.957497 13643 +squestion 1 1 6.957497 6.957497 13644 +projectsour 1 1 6.957497 6.957497 13645 +kned 1 1 6.957497 6.957497 13646 +kastl 1 1 6.957497 6.957497 13647 +fare 1 1 6.957497 6.957497 13648 +lex 1 1 6.957497 6.957497 13649 +tripel 1 1 6.957497 6.957497 13650 +theorist 1 1 6.957497 6.957497 13651 +searcher 1 1 6.957497 6.957497 13652 +alumna 1 1 6.957497 6.957497 13653 +lian 1 1 6.957497 6.957497 13654 +acker 1 1 6.957497 6.957497 13622 +eilert 1 1 6.957497 6.957497 13623 +blumenth 1 1 6.957497 6.957497 13655 +eolu 1 1 6.957497 6.957497 13656 +uwyo 1 1 6.957497 6.957497 13657 +clarkp 1 1 6.957497 6.957497 13658 +redwood 1 1 6.957497 6.957497 13659 +ncsu 1 1 6.957497 6.957497 13660 +publicationsclick 1 1 6.957497 6.957497 13661 +projectsclick 1 1 6.957497 6.957497 13662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..6896cba2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +algorithm 1 162 1.791759 1.791759 57 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +text 1 98 2.302585 2.302585 133 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +knowledg 1 67 2.708050 2.708050 243 +execut 1 45 3.135494 3.135494 404 +past 1 42 3.218876 3.218876 428 +soon 1 36 3.367296 3.367296 494 +either 1 35 3.401197 3.401197 506 +bibliographi 1 34 3.401197 3.401197 518 +queri 1 33 3.433987 3.433987 524 +someth 1 31 3.496508 3.496508 554 +robert 1 30 3.555348 3.555348 567 +rule 1 26 3.688879 3.688879 638 +constraint 1 26 3.688879 3.688879 636 +sometim 1 24 3.761200 3.761200 696 +finish 1 22 3.850148 3.850148 748 +basi 1 20 3.951244 3.951244 828 +render 1 17 4.110874 4.110874 947 +match 1 16 4.174387 4.174387 965 +warn 1 14 4.317488 4.317488 1068 +daniel 1 12 4.465908 4.465908 1233 +evolv 1 12 4.465908 4.465908 1223 +candid 1 9 4.753590 4.753590 1606 +presenc 1 8 4.875197 4.875197 1671 +hold 1 8 4.875197 4.875197 1645 +lane 1 8 4.875197 4.875197 1720 +yang 1 8 4.875197 4.875197 1652 +wouldn 1 7 5.010635 5.010635 1970 +srinivasan 1 6 5.164786 5.164786 2175 +mirank 1 5 5.347108 5.347108 2543 +treat 1 5 5.347108 5.347108 2521 +breath 1 4 5.568345 5.568345 2946 +lanc 1 4 5.568345 5.568345 3022 +obsolet 1 3 5.857933 5.857933 3196 +byth 1 3 5.857933 5.857933 3874 +archi 1 3 5.857933 5.857933 3639 +ming 1 3 5.857933 5.857933 3712 +leap 1 2 6.263398 6.263398 5654 +venu 1 2 6.263398 6.263398 5655 +usea 1 2 6.263398 6.263398 4800 +satisfact 1 2 6.263398 6.263398 5656 +bibtex 1 2 6.263398 6.263398 5406 +roberto 1 2 6.263398 6.263398 5468 +bayardo 1 2 6.263398 6.263398 5467 +obermey 1 2 6.263398 6.263398 5657 +vaidyaraman 1 2 6.263398 6.263398 5658 +warshaw 1 2 6.263398 6.263398 5659 +belat 1 1 6.957497 6.957497 13664 +fashionwai 1 1 6.957497 6.957497 13665 +itscomparison 1 1 6.957497 6.957497 13666 +rete 1 1 6.957497 6.957497 13663 +encompass 1 1 6.957497 6.957497 13667 +fundamentalcomput 1 1 6.957497 6.957497 13668 +corollari 1 1 6.957497 6.957497 13669 +thatgoal 1 1 6.957497 6.957497 13670 +gadboi 1 1 6.957497 6.957497 13671 +vasili 1 1 6.957497 6.957497 13672 +samoladi 1 1 6.957497 6.957497 13673 +schrag 1 1 6.957497 6.957497 13674 +andrewsdavid 1 1 6.957497 6.957497 13675 +brantchin 1 1 6.957497 6.957497 13676 +kuoshiow 1 1 6.957497 6.957497 13677 +salvator 1 1 6.957497 6.957497 13678 +stolfo 1 1 6.957497 6.957497 13679 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..e1871823 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +hall 1 146 1.945910 1.945910 65 +process 1 142 1.945910 1.945910 72 +technolog 1 131 2.079442 2.079442 102 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +access 1 102 2.302585 2.302585 136 +select 1 91 2.397895 2.397895 154 +homepag 1 93 2.397895 2.397895 148 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +appli 1 71 2.639057 2.639057 226 +practic 1 70 2.708050 2.708050 246 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +reason 1 57 2.890372 2.890372 318 +profession 1 51 2.995732 2.995732 345 +electron 1 47 3.091042 3.091042 379 +futur 1 41 3.218876 3.218876 427 +formal 1 37 3.332205 3.332205 478 +tech 1 35 3.401197 3.401197 515 +award 1 34 3.401197 3.401197 523 +john 1 33 3.433987 3.433987 532 +chair 1 29 3.583519 3.583519 596 +synchron 1 29 3.583519 3.583519 588 +mind 1 27 3.637586 3.637586 632 +aspect 1 25 3.737670 3.737670 663 +fellow 1 24 3.761200 3.761200 701 +honor 1 23 3.806662 3.806662 729 +equat 1 23 3.806662 3.806662 724 +indian 1 22 3.850148 3.850148 769 +inth 1 22 3.850148 3.850148 741 +particularli 1 19 4.007333 4.007333 867 +north 1 19 4.007333 4.007333 873 +coupl 1 17 4.110874 4.110874 939 +letter 1 16 4.174387 4.174387 981 +weslei 1 16 4.174387 4.174387 983 +researchmi 1 14 4.317488 4.317488 1119 +classic 1 14 4.317488 4.317488 1084 +recurs 1 13 4.382027 4.382027 1127 +addison 1 12 4.465908 4.465908 1230 +kanpur 1 8 4.875197 4.875197 1744 +simon 1 8 4.875197 4.875197 1697 +guggenheim 1 8 4.875197 4.875197 1759 +misra 1 7 5.010635 5.010635 1856 +prentic 1 7 5.010635 5.010635 1838 +phase 1 6 5.164786 5.164786 1977 +holland 1 5 5.347108 5.347108 2490 +jayadev 1 4 5.568345 5.568345 3006 +essai 1 4 5.568345 5.568345 2948 +interestparallel 1 3 5.857933 5.857933 3806 +publicationsj 1 3 5.857933 5.857933 3808 +hoar 1 3 5.857933 5.857933 3875 +nondeterminist 1 3 5.857933 5.857933 3560 +powerlist 1 2 6.263398 6.263398 5660 +loos 1 2 6.263398 6.263398 4774 +chandi 1 2 6.263398 6.263398 5661 +seuss 1 2 6.263398 6.263398 5662 +misrareg 1 1 6.957497 6.957497 13680 +hopkin 1 1 6.957497 6.957497 13681 +fellowarea 1 1 6.957497 6.957497 13682 +asynchronoussystem 1 1 6.957497 6.957497 13683 +otherpap 1 1 6.957497 6.957497 13684 +anoverview 1 1 6.957497 6.957497 13685 +apostscript 1 1 6.957497 6.957497 13686 +versionaccess 1 1 6.957497 6.957497 13687 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..93a0d602 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +texa 1 160 1.791759 1.791759 64 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +plan 1 65 2.772589 2.772589 272 +artifici 1 63 2.772589 2.772589 280 +reason 1 57 2.890372 2.890372 318 +special 1 56 2.890372 2.890372 320 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +combin 1 42 3.218876 3.218876 421 +autom 1 41 3.218876 3.218876 434 +form 1 39 3.258097 3.258097 443 +paul 1 38 3.295837 3.295837 471 +john 1 33 3.433987 3.433987 532 +queri 1 33 3.433987 3.433987 524 +ad 1 32 3.465736 3.465736 544 +richard 1 31 3.496508 3.496508 559 +utc 1 27 3.637586 3.637586 629 +american 1 27 3.637586 3.637586 634 +revis 1 26 3.688879 3.688879 640 +rule 1 26 3.688879 3.688879 638 +experiment 1 26 3.688879 3.688879 645 +subject 1 26 3.688879 3.688879 647 +jeff 1 25 3.737670 3.737670 673 +decis 1 23 3.806662 3.806662 728 +alumni 1 21 3.912023 3.912023 807 +supervis 1 20 3.951244 3.951244 840 +comparison 1 19 4.007333 4.007333 863 +partial 1 18 4.060443 4.060443 900 +repositori 1 17 4.110874 4.110874 932 +fourth 1 16 4.174387 4.174387 999 +explan 1 16 4.174387 4.174387 985 +atth 1 15 4.248495 4.248495 1019 +prolog 1 13 4.382027 4.382027 1155 +joint 1 13 4.382027 4.382027 1130 +mellon 1 13 4.382027 4.382027 1179 +mari 1 12 4.465908 4.465908 1266 +carnegi 1 12 4.465908 4.465908 1260 +induct 1 11 4.553877 4.553877 1304 +qualit 1 11 4.553877 4.553877 1362 +refin 1 11 4.553877 4.553877 1363 +acquisit 1 10 4.653960 4.653960 1465 +moonei 1 9 4.753590 4.753590 1520 +ataustin 1 9 4.753590 4.753590 1610 +tutor 1 9 4.753590 4.753590 1552 +classif 1 9 4.753590 4.753590 1586 +linguist 1 9 4.753590 4.753590 1593 +empir 1 8 4.875197 4.875197 1722 +aaai 1 8 4.875197 4.875197 1750 +european 1 8 4.875197 4.875197 1763 +irvin 1 8 4.875197 4.875197 1660 +predic 1 7 5.010635 5.010635 1806 +planner 1 7 5.010635 5.010635 1797 +illinoi 1 7 5.010635 5.010635 1941 +thompson 1 6 5.164786 5.164786 2049 +neither 1 6 5.164786 5.164786 1990 +machinelearn 1 6 5.164786 5.164786 2084 +oxford 1 6 5.164786 5.164786 2121 +elain 1 5 5.347108 5.347108 2496 +bradlei 1 5 5.347108 5.347108 2554 +proposit 1 5 5.347108 5.347108 2339 +sowmya 1 4 5.568345 5.568345 2670 +diagnosi 1 4 5.568345 5.568345 3027 +uncertain 1 4 5.568345 5.568345 2758 +invent 1 4 5.568345 5.568345 3028 +ijcai 1 4 5.568345 5.568345 2901 +hermjakob 1 3 5.857933 5.857933 3876 +ramachandran 1 3 5.857933 5.857933 3742 +cindi 1 3 5.857933 5.857933 3830 +acad 1 3 5.857933 5.857933 3847 +signll 1 3 5.857933 5.857933 3877 +ucpop 1 3 5.857933 5.857933 3878 +focuseson 1 2 6.263398 6.263398 5433 +califf 1 2 6.263398 6.263398 5664 +mecaliff 1 2 6.263398 6.263398 5645 +tara 1 2 6.263398 6.263398 5555 +estlin 1 2 6.263398 6.263398 5554 +cthomp 1 2 6.263398 6.263398 5530 +dirk 1 2 6.263398 6.263398 5665 +subramanian 1 2 6.263398 6.263398 5666 +georgetown 1 2 6.263398 6.263398 5667 +drake 1 2 6.263398 6.263398 5668 +abduct 1 2 6.263398 6.263398 5663 +accel 1 2 6.263398 6.263398 5166 +foidl 1 2 6.263398 6.263398 4270 +icml 1 2 6.263398 6.263398 5669 +quinlan 1 2 6.263398 6.263398 4797 +learner 1 2 6.263398 6.263398 4508 +prodigi 1 2 6.263398 6.263398 5670 +knowledgerefin 1 1 6.957497 6.957497 13691 +baff 1 1 6.957497 6.957497 13688 +scicomp 1 1 6.957497 6.957497 13692 +mahonei 1 1 6.957497 6.957497 13689 +firstadvisor 1 1 6.957497 6.957497 13693 +hwee 1 1 6.957497 6.957497 13694 +nhweetou 1 1 6.957497 6.957497 13695 +trantor 1 1 6.957497 6.957497 13696 +ourston 1 1 6.957497 6.957497 13697 +dirk_ourston 1 1 6.957497 6.957497 13698 +cpqm 1 1 6.957497 6.957497 13699 +saic 1 1 6.957497 6.957497 13700 +furtwangen 1 1 6.957497 6.957497 13701 +siddarth 1 1 6.957497 6.957497 13702 +zell 1 1 6.957497 6.957497 13703 +speedup 1 1 6.957497 6.957497 13690 +reasoningher 1 1 6.957497 6.957497 13704 +fort 1 1 6.957497 6.957497 13705 +chillin 1 1 6.957497 6.957497 13706 +dolphin 1 1 6.957497 6.957497 13707 +ilpnet 1 1 6.957497 6.957497 13708 +sigart 1 1 6.957497 6.957497 13709 +aritfici 1 1 6.957497 6.957497 13710 +biblio 1 1 6.957497 6.957497 13711 +jair 1 1 6.957497 6.957497 13712 +foil 1 1 6.957497 6.957497 13713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..2d2f0b59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +theori 1 111 2.197225 2.197225 127 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +complet 1 77 2.564949 2.564949 208 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +effici 1 73 2.639057 2.639057 233 +knowledg 1 67 2.708050 2.708050 243 +degre 1 69 2.708050 2.708050 259 +artifici 1 63 2.772589 2.772589 280 +improv 1 62 2.772589 2.772589 289 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +finger 1 52 2.995732 2.995732 354 +natur 1 44 3.135494 3.135494 406 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +vita 1 38 3.295837 3.295837 473 +word 1 34 3.401197 3.401197 508 +obtain 1 33 3.433987 3.433987 534 +posit 1 31 3.496508 3.496508 552 +computersci 1 30 3.555348 3.555348 562 +neural 1 30 3.555348 3.555348 578 +postal 1 30 3.555348 3.555348 580 +symbol 1 27 3.637586 3.637586 620 +rule 1 26 3.688879 3.688879 638 +compar 1 26 3.688879 3.688879 648 +qualiti 1 20 3.951244 3.951244 832 +lisp 1 18 4.060443 4.060443 897 +attend 1 18 4.060443 4.060443 893 +explan 1 16 4.174387 4.174387 985 +prolog 1 13 4.382027 4.382027 1155 +went 1 12 4.465908 4.465908 1279 +sens 1 11 4.553877 4.553877 1305 +induct 1 11 4.553877 4.553877 1304 +refin 1 11 4.553877 4.553877 1363 +interestsmi 1 10 4.653960 4.653960 1462 +acquisit 1 10 4.653960 4.653960 1465 +town 1 10 4.653960 4.653960 1458 +moonei 1 9 4.753590 4.753590 1520 +extract 1 8 4.875197 4.875197 1728 +empir 1 8 4.875197 4.875197 1722 +grew 1 8 4.875197 4.875197 1742 +illinoi 1 7 5.010635 5.010635 1941 +raymond 1 5 5.347108 5.347108 2313 +began 1 5 5.347108 5.347108 2498 +disambigu 1 4 5.568345 5.568345 2899 +bayesian 1 4 5.568345 5.568345 2671 +urbana 1 3 5.857933 5.857933 3879 +primarilyin 1 3 5.857933 5.857933 3832 +parser 1 3 5.857933 5.857933 3141 +myph 1 3 5.857933 5.857933 3880 +champaign 1 2 6.263398 6.263398 5671 +lexicon 1 2 6.263398 6.263398 5651 +highschool 1 2 6.263398 6.263398 5672 +homepageraymond 1 1 6.957497 6.957497 13714 +mooneyassoci 1 1 6.957497 6.957497 13715 +informationfal 1 1 6.957497 6.957497 13716 +learningspr 1 1 6.957497 6.957497 13717 +iiperson 1 1 6.957497 6.957497 13718 +historyi 1 1 6.957497 6.957497 13719 +fallon 1 1 6.957497 6.957497 13720 +wherestart 1 1 6.957497 6.957497 13721 +fallontownship 1 1 6.957497 6.957497 13722 +urbanato 1 1 6.957497 6.957497 13723 +learninggroup 1 1 6.957497 6.957497 13724 +gerald 1 1 6.957497 6.957497 13725 +dejong 1 1 6.957497 6.957497 13726 +meadowfir 1 1 6.957497 6.957497 13727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..f5a4d7b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +world 1 115 2.197225 2.197225 126 +specif 1 106 2.197225 2.197225 106 +mani 1 92 2.397895 2.397895 150 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +homepag 1 93 2.397895 2.397895 148 +control 1 82 2.484907 2.484907 164 +resourc 1 81 2.484907 2.484907 172 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +state 1 76 2.564949 2.564949 207 +intellig 1 72 2.639057 2.639057 225 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +local 1 55 2.944439 2.944439 334 +visitor 1 49 3.044522 3.044522 371 +must 1 40 3.258097 3.258097 442 +game 1 36 3.367296 3.367296 498 +return 1 34 3.401197 3.401197 502 +obtain 1 33 3.433987 3.433987 534 +neural 1 30 3.555348 3.555348 578 +domain 1 30 3.555348 3.555348 564 +postal 1 30 3.555348 3.555348 580 +utc 1 27 3.637586 3.637586 629 +task 1 25 3.737670 3.737670 678 +sport 1 25 3.737670 3.737670 683 +decis 1 23 3.806662 3.806662 728 +sequenc 1 23 3.806662 3.806662 734 +sequenti 1 22 3.850148 3.850148 745 +rout 1 21 3.912023 3.912023 793 +alloc 1 20 3.951244 3.951244 821 +agent 1 18 4.060443 4.060443 910 +upon 1 16 4.174387 4.174387 978 +action 1 15 4.248495 4.248495 1038 +universityof 1 15 4.248495 4.248495 1061 +dave 1 14 4.317488 4.317488 1098 +finit 1 14 4.317488 4.317488 1106 +misc 1 13 4.382027 4.382027 1124 +evolv 1 12 4.465908 4.465908 1223 +enter 1 10 4.653960 4.653960 1454 +total 1 10 4.653960 4.653960 1398 +observ 1 9 4.753590 4.753590 1578 +character 1 8 4.875197 4.875197 1767 +canb 1 7 5.010635 5.010635 1846 +highest 1 4 5.568345 5.568345 2950 +thesystem 1 3 5.857933 5.857933 3881 +scenario 1 2 6.263398 6.263398 5524 +geneticalgorithm 1 2 6.263398 6.263398 5673 +amparticularli 1 2 6.263398 6.263398 5558 +unavail 1 2 6.263398 6.263398 5046 +tulan 1 2 6.263398 6.263398 5559 +moriartydav 1 1 6.957497 6.957497 13729 +moriarti 1 1 6.957497 6.957497 13728 +researchsequenti 1 1 6.957497 6.957497 13730 +problemsinclud 1 1 6.957497 6.957497 13731 +stateof 1 1 6.957497 6.957497 13732 +selectanoth 1 1 6.957497 6.957497 13733 +payoff 1 1 6.957497 6.957497 13734 +madeor 1 1 6.957497 6.957497 13735 +thesequ 1 1 6.957497 6.957497 13736 +cumulativepayoff 1 1 6.957497 6.957497 13737 +iscurr 1 1 6.957497 6.957497 13738 +costli 1 1 6.957497 6.957497 13739 +havestudi 1 1 6.957497 6.957497 13740 +constraintsatisfact 1 1 6.957497 6.957497 13741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..d4f8b061 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +educ 1 86 2.484907 2.484907 191 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +map 1 39 3.258097 3.258097 452 +campu 1 27 3.637586 3.637586 623 +mine 1 26 3.688879 3.688879 654 +reach 1 24 3.761200 3.761200 688 +edulast 1 17 4.110874 4.110874 927 +utah 1 9 4.753590 4.753590 1585 +eduresearch 1 6 5.164786 5.164786 2205 +trail 1 6 5.164786 5.164786 2071 +mehom 1 4 5.568345 5.568345 2979 +wade 1 1 6.957497 6.957497 13742 +barnesm 1 1 6.957497 6.957497 13744 +barnesmwbarn 1 1 6.957497 6.957497 13745 +workhelp 1 1 6.957497 6.957497 13746 +pagestyp 1 1 6.957497 6.957497 13747 +literatureliteratur 1 1 6.957497 6.957497 13748 +notesclassesbackground 1 1 6.957497 6.957497 13749 +informationph 1 1 6.957497 6.957497 13750 +tanglebriar 1 1 6.957497 6.957497 13751 +yete 1 1 6.957497 6.957497 13752 +mwbarn 1 1 6.957497 6.957497 13743 +eduauthor 1 1 6.957497 6.957497 13753 +barnesemail 1 1 6.957497 6.957497 13754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..0a3eeab4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +teach 1 108 2.197225 2.197225 112 +pleas 1 113 2.197225 2.197225 114 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +resum 1 79 2.564949 2.564949 217 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +receiv 1 66 2.708050 2.708050 244 +abstract 1 62 2.772589 2.772589 276 +creat 1 63 2.772589 2.772589 277 +room 1 59 2.833213 2.833213 301 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +direct 1 57 2.890372 2.890372 316 +profession 1 51 2.995732 2.995732 345 +right 1 48 3.044522 3.044522 363 +author 1 39 3.258097 3.258097 450 +vita 1 38 3.295837 3.295837 473 +feel 1 37 3.332205 3.332205 483 +bibliographi 1 34 3.401197 3.401197 518 +curriculum 1 33 3.433987 3.433987 535 +travel 1 30 3.555348 3.555348 579 +chair 1 29 3.583519 3.583519 596 +full 1 28 3.610918 3.610918 615 +load 1 28 3.610918 3.610918 601 +reach 1 24 3.761200 3.761200 688 +brows 1 23 3.806662 3.806662 726 +reserv 1 20 3.951244 3.951244 808 +els 1 19 4.007333 4.007333 843 +spend 1 19 4.007333 4.007333 850 +account 1 18 4.060443 4.060443 882 +senior 1 14 4.317488 4.317488 1120 +settimeout 1 5 5.347108 5.347108 2536 +dale 1 4 5.568345 5.568345 2687 +websit 1 4 5.568345 5.568345 2726 +seed 1 4 5.568345 5.568345 2984 +timertwo 1 4 5.568345 5.568345 2985 +oftexa 1 4 5.568345 5.568345 3003 +whichcontain 1 4 5.568345 5.568345 2714 +scrollit_rl 1 3 5.857933 5.857933 3882 +retir 1 2 6.263398 6.263398 5674 +nell 1 1 6.957497 6.957497 13755 +pagesunivers 1 1 6.957497 6.957497 13756 +departmentwelcom 1 1 6.957497 6.957497 13757 +utaustin 1 1 6.957497 6.957497 13758 +fromful 1 1 6.957497 6.957497 13759 +falland 1 1 6.957497 6.957497 13760 +ofdissert 1 1 6.957497 6.957497 13761 +memento 1 1 6.957497 6.957497 13762 +nontechn 1 1 6.957497 6.957497 13763 +anycorrespond 1 1 6.957497 6.957497 13764 +ndale 1 1 6.957497 6.957497 13765 +profilepublicationsresearch 1 1 6.957497 6.957497 13766 +interestsperson 1 1 6.957497 6.957497 13767 +interestsnel 1 1 6.957497 6.957497 13768 +westlak 1 1 6.957497 6.957497 13769 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..087a446d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +design 1 213 1.386294 1.386294 25 +support 1 132 1.945910 1.945910 83 +note 1 142 1.945910 1.945910 67 +pleas 1 113 2.197225 2.197225 114 +view 1 70 2.708050 2.708050 254 +browser 1 56 2.890372 2.890372 313 +without 1 50 3.044522 3.044522 370 +netscap 1 44 3.135494 3.135494 395 +keep 1 44 3.135494 3.135494 409 +download 1 36 3.367296 3.367296 489 +either 1 35 3.401197 3.401197 506 +mind 1 27 3.637586 3.637586 632 +background 1 25 3.737670 3.737670 664 +frame 1 24 3.761200 3.761200 684 +color 1 22 3.850148 3.850148 762 +navig 1 21 3.912023 3.912023 796 +choos 1 16 4.174387 4.174387 964 +pretti 1 13 4.382027 4.382027 1191 +latter 1 9 4.753590 4.753590 1522 +chosen 1 6 5.164786 5.164786 1984 +blame 1 3 5.857933 5.857933 3636 +neeraj 1 2 6.263398 6.263398 5577 +obnoxi 1 1 6.957497 6.957497 13770 +chartreus 1 1 6.957497 6.957497 13771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..7fa1ae8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +sciencesunivers 1 37 3.332205 3.332205 486 +log 1 19 4.007333 4.007333 857 +kumar 1 9 4.753590 4.753590 1506 +sciencedepart 1 6 5.164786 5.164786 2172 +natarajan 1 2 6.263398 6.263398 4377 +austini 1 2 6.263398 6.263398 5527 +gnan 1 1 6.957497 6.957497 13772 +pagegnana 1 1 6.957497 6.957497 13773 +edufind 1 1 6.957497 6.957497 13774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..1cd21cc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +number 1 130 2.079442 2.079442 97 +look 1 107 2.197225 2.197225 115 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +visitor 1 49 3.044522 3.044522 371 +term 1 43 3.178054 3.178054 411 +queri 1 33 3.433987 3.433987 524 +art 1 29 3.583519 3.583519 593 +quit 1 27 3.637586 3.637586 633 +altern 1 26 3.688879 3.688879 641 +output 1 21 3.912023 3.912023 788 +medic 1 17 4.110874 4.110874 958 +doesn 1 15 4.248495 4.248495 1055 +score 1 15 4.248495 4.248495 1017 +typic 1 11 4.553877 4.553877 1360 +hit 1 7 5.010635 5.010635 1965 +arora 1 4 5.568345 5.568345 2658 +ters 1 3 5.857933 5.857933 3297 +nimar 1 2 6.263398 6.263398 4188 +singh 1 2 6.263398 6.263398 5675 +knowwhat 1 2 6.263398 6.263398 5456 +clearer 1 2 6.263398 6.263398 5676 +bookmarksto 1 1 6.957497 6.957497 13775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..6f95e7cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +resum 1 79 2.564949 2.564949 217 +meet 1 72 2.639057 2.639057 229 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +prof 1 64 2.772589 2.772589 273 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +week 1 52 2.995732 2.995732 343 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +discuss 1 45 3.135494 3.135494 399 +third 1 43 3.178054 3.178054 412 +probabl 1 40 3.258097 3.258097 455 +close 1 38 3.295837 3.295837 465 +ofth 1 36 3.367296 3.367296 491 +everi 1 34 3.401197 3.401197 519 +print 1 34 3.401197 3.401197 503 +neural 1 30 3.555348 3.555348 578 +particip 1 29 3.583519 3.583519 589 +usual 1 28 3.610918 3.610918 608 +propos 1 28 3.610918 3.610918 602 +utc 1 27 3.637586 3.637586 629 +previous 1 17 4.110874 4.110874 923 +coordin 1 13 4.382027 4.382027 1182 +mari 1 12 4.465908 4.465908 1266 +acquisit 1 10 4.653960 4.653960 1465 +moonei 1 9 4.753590 4.753590 1520 +risto 1 9 4.753590 4.753590 1523 +linguist 1 9 4.753590 4.753590 1593 +miikkulainen 1 8 4.875197 4.875197 1667 +thompson 1 6 5.164786 5.164786 2049 +elain 1 5 5.347108 5.347108 2496 +tang 1 5 5.347108 5.347108 2409 +hermjakob 1 3 5.857933 5.857933 3876 +poon 1 3 5.857933 5.857933 3820 +cindi 1 3 5.857933 5.857933 3830 +signll 1 3 5.857933 5.857933 3877 +groupat 1 2 6.263398 6.263398 5677 +bobbi 1 2 6.263398 6.263398 5678 +califf 1 2 6.263398 6.263398 5664 +marti 1 2 6.263398 6.263398 5679 +mayberri 1 2 6.263398 6.263398 5641 +rupert 1 2 6.263398 6.263398 5680 +acquist 1 1 6.957497 6.957497 13776 +groupnatur 1 1 6.957497 6.957497 13777 +austinw 1 1 6.957497 6.957497 13778 +acquisitionand 1 1 6.957497 6.957497 13779 +havedrawn 1 1 6.957497 6.957497 13780 +bryant 1 1 6.957497 6.957497 13781 +ataustinlast 1 1 6.957497 6.957497 13782 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..b053dde9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +structur 1 106 2.197225 2.197225 105 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +sourc 1 77 2.564949 2.564949 201 +intellig 1 72 2.639057 2.639057 225 +prof 1 64 2.772589 2.772589 273 +artifici 1 63 2.772589 2.772589 280 +organ 1 65 2.772589 2.772589 265 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +detail 1 57 2.890372 2.890372 321 +visitor 1 49 3.044522 3.044522 371 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +neural 1 30 3.555348 3.555348 578 +utc 1 27 3.637586 3.637586 629 +decis 1 23 3.806662 3.806662 728 +self 1 22 3.850148 3.850148 761 +alumni 1 21 3.912023 3.912023 807 +newsgroup 1 21 3.912023 3.912023 783 +supervis 1 20 3.951244 3.951244 840 +hypertext 1 19 4.007333 4.007333 865 +concentr 1 18 4.060443 4.060443 906 +demo 1 18 4.060443 4.060443 888 +later 1 15 4.248495 4.248495 1043 +evolv 1 12 4.465908 4.465908 1223 +genet 1 10 4.653960 4.653960 1409 +ataustin 1 9 4.753590 4.753590 1610 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +poster 1 7 5.010635 5.010635 1814 +schema 1 6 5.164786 5.164786 1988 +groupth 1 5 5.347108 5.347108 2549 +net 1 4 5.568345 5.568345 2741 +episod 1 4 5.568345 5.568345 2747 +cortic 1 3 5.857933 5.857933 3857 +privat 1 3 5.857933 5.857933 3496 +andcognit 1 2 6.263398 6.263398 5681 +ristomiikkulainen 1 1 6.957497 6.957497 13783 +basedvis 1 1 6.957497 6.957497 13784 +mapbelow 1 1 6.957497 6.957497 13785 +thecortex 1 1 6.957497 6.957497 13786 +linkswusagemartym 1 1 6.957497 6.957497 13787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..1b47ab70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +associ 1 93 2.397895 2.397895 151 +graphic 1 90 2.397895 2.397895 147 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +interfac 1 79 2.564949 2.564949 209 +server 1 76 2.564949 2.564949 204 +state 1 76 2.564949 2.564949 207 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +artifici 1 63 2.772589 2.772589 280 +creat 1 63 2.772589 2.772589 277 +interact 1 62 2.772589 2.772589 270 +automat 1 61 2.833213 2.833213 306 +physic 1 47 3.091042 3.091042 377 +vita 1 38 3.295837 3.295837 473 +connect 1 37 3.332205 3.332205 485 +common 1 30 3.555348 3.555348 574 +measur 1 28 3.610918 3.610918 609 +univ 1 28 3.610918 3.610918 617 +honor 1 23 3.806662 3.806662 729 +director 1 22 3.850148 3.850148 767 +unit 1 21 3.912023 3.912023 779 +expert 1 20 3.951244 3.951244 833 +demo 1 18 4.060443 4.060443 888 +lisp 1 18 4.060443 4.060443 897 +atth 1 15 4.248495 4.248495 1019 +english 1 15 4.248495 4.248495 1033 +draw 1 14 4.317488 4.317488 1086 +convert 1 13 4.382027 4.382027 1122 +speech 1 12 4.465908 4.465908 1222 +ofcomput 1 10 4.653960 4.653960 1442 +novak 1 9 4.753590 4.753590 1521 +reus 1 8 4.875197 4.875197 1661 +convers 1 8 4.875197 4.875197 1673 +gordon 1 6 5.164786 5.164786 2032 +shell 1 5 5.347108 5.347108 2353 +diagram 1 5 5.347108 5.347108 2346 +highest 1 4 5.568345 5.568345 2950 +intelligencec 1 4 5.568345 5.568345 2673 +isaac 1 3 5.857933 5.857933 3855 +compilersc 1 2 6.263398 6.263398 4237 +intelligencelaboratori 1 1 6.957497 6.957497 13788 +genericalgorithmssolv 1 1 6.957497 6.957497 13789 +specifiedinformallyartifici 1 1 6.957497 6.957497 13790 +intelligencecurriculum 1 1 6.957497 6.957497 13791 +publicationsemploymentgrantsprofession 1 1 6.957497 6.957497 13792 +honorscurriculum 1 1 6.957497 6.957497 13793 +vitaefre 1 1 6.957497 6.957497 13794 +tmycin 1 1 6.957497 6.957497 13795 +emycin 1 1 6.957497 6.957497 13796 +lispconvers 1 1 6.957497 6.957497 13797 +measurementsoftwar 1 1 6.957497 6.957497 13798 +schemec 1 1 6.957497 6.957497 13799 +programmingweb 1 1 6.957497 6.957497 13800 +linksweatheraddress 1 1 6.957497 6.957497 13801 +ctai 1 1 6.957497 6.957497 13802 +austinaustintexa 1 1 6.957497 6.957497 13803 +faxnovak 1 1 6.957497 6.957497 13804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..78ccbf6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +welcom 1 122 2.079442 2.079442 99 +pleas 1 113 2.197225 2.197225 114 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +resum 1 79 2.564949 2.564949 217 +free 1 73 2.639057 2.639057 224 +august 1 66 2.708050 2.708050 257 +date 1 51 2.995732 2.995732 344 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +get 1 46 3.091042 3.091042 380 +paul 1 38 3.295837 3.295837 471 +feel 1 37 3.332205 3.332205 483 +download 1 36 3.367296 3.367296 489 +hope 1 28 3.610918 3.610918 610 +except 1 28 3.610918 3.610918 607 +thank 1 23 3.806662 3.806662 721 +size 1 23 3.806662 3.806662 713 +sent 1 22 3.850148 3.850148 763 +beauti 1 18 4.060443 4.060443 912 +anyon 1 17 4.110874 4.110874 916 +stock 1 16 4.174387 4.174387 1007 +wait 1 13 4.382027 4.382027 1168 +remov 1 12 4.465908 4.465908 1225 +enter 1 10 4.653960 4.653960 1454 +chanc 1 7 5.010635 5.010635 1960 +poster 1 7 5.010635 5.010635 1814 +hear 1 7 5.010635 5.010635 1940 +marri 1 7 5.010635 5.010635 1946 +feet 1 5 5.347108 5.347108 2492 +blow 1 5 5.347108 5.347108 2407 +complaint 1 4 5.568345 5.568345 2795 +queen 1 4 5.568345 5.568345 2919 +laugh 1 3 5.857933 5.857933 3659 +panic 1 2 6.263398 6.263398 5682 +gorgeou 1 2 6.263398 6.263398 5082 +meghan 1 1 6.957497 6.957497 13805 +brienhi 1 1 6.957497 6.957497 13807 +wipe 1 1 6.957497 6.957497 13808 +crappi 1 1 6.957497 6.957497 13809 +insult 1 1 6.957497 6.957497 13806 +obrien 1 1 6.957497 6.957497 13810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..4cfb8c16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +databas 1 122 2.079442 2.079442 86 +theunivers 1 21 3.912023 3.912023 797 +systemsth 1 3 5.857933 5.857933 3835 +oguer 1 1 6.957497 6.957497 13811 +gutierrezogu 1 1 6.957497 6.957497 13812 +gutierrezth 1 1 6.957497 6.957497 13813 +austinprojectsomioswwhlinksconfer 1 1 6.957497 6.957497 13814 +worldemail 1 1 6.957497 6.957497 13815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..7167ea9c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +introduct 1 126 2.079442 2.079442 87 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +homepag 1 93 2.397895 2.397895 148 +larg 1 82 2.484907 2.484907 168 +info 1 85 2.484907 2.484907 176 +contain 1 81 2.484907 2.484907 174 +thing 1 84 2.484907 2.484907 189 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +workshop 1 71 2.639057 2.639057 239 +free 1 73 2.639057 2.639057 224 +integr 1 67 2.708050 2.708050 245 +prof 1 64 2.772589 2.772589 273 +virtual 1 62 2.772589 2.772589 285 +collect 1 65 2.772589 2.772589 268 +written 1 63 2.772589 2.772589 278 +descript 1 64 2.772589 2.772589 271 +improv 1 62 2.772589 2.772589 289 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +space 1 57 2.890372 2.890372 310 +thesi 1 57 2.890372 2.890372 327 +sever 1 56 2.890372 2.890372 322 +three 1 54 2.944439 2.944439 330 +extens 1 53 2.944439 2.944439 340 +local 1 55 2.944439 2.944439 334 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +basic 1 50 3.044522 3.044522 360 +done 1 47 3.091042 3.091042 381 +adapt 1 46 3.091042 3.091042 387 +mark 1 44 3.135494 3.135494 403 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +cach 1 41 3.218876 3.218876 432 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +paul 1 38 3.295837 3.295837 471 +open 1 38 3.295837 3.295837 469 +especi 1 36 3.367296 3.367296 496 +michael 1 35 3.401197 3.401197 514 +survei 1 35 3.401197 3.401197 513 +bibliographi 1 34 3.401197 3.401197 518 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +hard 1 30 3.555348 3.555348 563 +travel 1 30 3.555348 3.555348 579 +releas 1 28 3.610918 3.610918 616 +progress 1 28 3.610918 3.610918 598 +great 1 27 3.637586 3.637586 626 +although 1 25 3.737670 3.737670 667 +store 1 24 3.761200 3.761200 693 +mike 1 24 3.761200 3.761200 703 +interpret 1 24 3.761200 3.761200 686 +highli 1 23 3.806662 3.806662 725 +thread 1 23 3.806662 3.806662 722 +brows 1 23 3.806662 3.806662 726 +hierarchi 1 22 3.850148 3.850148 744 +try 1 22 3.850148 3.850148 764 +supervis 1 20 3.951244 3.951244 840 +alloc 1 20 3.951244 3.951244 821 +scheme 1 20 3.951244 3.951244 818 +portabl 1 20 3.951244 3.951244 819 +mostli 1 19 4.007333 4.007333 869 +scott 1 18 4.060443 4.060443 884 +behavior 1 18 4.060443 4.060443 881 +debug 1 17 4.110874 4.110874 944 +coupl 1 17 4.110874 4.110874 939 +intro 1 17 4.110874 4.110874 915 +georg 1 16 4.174387 4.174387 994 +stock 1 16 4.174387 4.174387 1007 +brief 1 16 4.174387 4.174387 1001 +carl 1 15 4.248495 4.248495 1024 +anywai 1 15 4.248495 4.248495 1047 +ascii 1 15 4.248495 4.248495 1032 +draft 1 14 4.317488 4.317488 1085 +anonym 1 14 4.317488 4.317488 1100 +stephen 1 11 4.553877 4.553877 1342 +persist 1 11 4.553877 4.553877 1367 +smart 1 11 4.553877 4.553877 1352 +alpha 1 11 4.553877 4.553877 1348 +henri 1 10 4.653960 4.653960 1417 +wilson 1 9 4.753590 4.753590 1536 +oop 1 8 4.875197 4.875197 1778 +besid 1 8 4.875197 4.875197 1681 +readm 1 8 4.875197 4.875197 1699 +baker 1 7 5.010635 5.010635 1812 +garbag 1 6 5.164786 5.164786 1986 +oopsla 1 6 5.164786 5.164786 2221 +dougla 1 5 5.347108 5.347108 2471 +decad 1 5 5.347108 5.347108 2455 +overload 1 5 5.347108 5.347108 2403 +whichcontain 1 4 5.568345 5.568345 2714 +ajit 1 3 5.857933 5.857933 3299 +qing 1 3 5.857933 5.857933 3295 +swizzl 1 3 5.857933 5.857933 3883 +andoper 1 3 5.857933 5.857933 3621 +forfault 1 3 5.857933 5.857933 3748 +rscheme 1 3 5.857933 5.857933 3250 +tosupport 1 3 5.857933 5.857933 3613 +providesa 1 3 5.857933 5.857933 3884 +heap 1 3 5.857933 5.857933 3123 +johnston 1 2 6.263398 6.263398 5638 +sheetal 1 2 6.263398 6.263398 5684 +kakkad 1 2 6.263398 6.263398 5685 +donovan 1 2 6.263398 6.263398 4371 +kolbl 1 2 6.263398 6.263398 4372 +repair 1 2 6.263398 6.263398 4198 +damag 1 2 6.263398 6.263398 5687 +collector 1 2 6.263398 6.263398 5683 +checkpoint 1 2 6.263398 6.263398 4205 +programmingsystem 1 2 6.263398 6.263398 5688 +socket 1 2 6.263398 6.263398 4725 +macro 1 2 6.263398 6.263398 5686 +materiali 1 2 6.263398 6.263398 4214 +subdirectori 1 2 6.263398 6.263398 4133 +han 1 2 6.263398 6.263398 4535 +groupoop 1 1 6.957497 6.957497 13817 +groupthi 1 1 6.957497 6.957497 13818 +studentsin 1 1 6.957497 6.957497 13819 +kaplan 1 1 6.957497 6.957497 13820 +neeli 1 1 6.957497 6.957497 13816 +wieren 1 1 6.957497 6.957497 13821 +toimplement 1 1 6.957497 6.957497 13822 +whichattempt 1 1 6.957497 6.957497 13823 +unsoundstudi 1 1 6.957497 6.957497 13824 +generationaland 1 1 6.957497 6.957497 13825 +ongarbag 1 1 6.957497 6.957497 13826 +managementfor 1 1 6.957497 6.957497 13827 +andcompress 1 1 6.957497 6.957497 13828 +noteson 1 1 6.957497 6.957497 13829 +rawascii 1 1 6.957497 6.957497 13830 +andrschemear 1 1 6.957497 6.957497 13831 +thesiscontain 1 1 6.957497 6.957497 13832 +whicharen 1 1 6.957497 6.957497 13833 +sometimesoon 1 1 6.957497 6.957497 13834 +htmlformat 1 1 6.957497 6.957497 13835 +materialfrom 1 1 6.957497 6.957497 13836 +expandedpresent 1 1 6.957497 6.957497 13837 +texinfo 1 1 6.957497 6.957497 13838 +metaobject 1 1 6.957497 6.957497 13839 +backgroundread 1 1 6.957497 6.957497 13840 +fortexa 1 1 6.957497 6.957497 13841 +sftp 1 1 6.957497 6.957497 13842 +notb 1 1 6.957497 6.957497 13843 +boehm 1 1 6.957497 6.957497 13844 +severalgarbag 1 1 6.957497 6.957497 13845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..deb4e556 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +robert 1 30 3.555348 3.555348 567 +otuomagieaddress 1 1 6.957497 6.957497 13846 +emailotu 1 1 6.957497 6.957497 13847 +eduuniververs 1 1 6.957497 6.957497 13848 +infouniversityth 1 1 6.957497 6.957497 13849 +txa 1 1 6.957497 6.957497 13850 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..0d708841 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +professor 1 137 1.945910 1.945910 76 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +call 1 91 2.397895 2.397895 153 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +virtual 1 62 2.772589 2.772589 285 +creat 1 63 2.772589 2.772589 277 +variou 1 56 2.890372 2.890372 317 +investig 1 51 2.995732 2.995732 353 +featur 1 46 3.091042 3.091042 386 +physic 1 47 3.091042 3.091042 377 +prototyp 1 38 3.295837 3.295837 463 +microsoft 1 38 3.295837 3.295837 468 +manual 1 35 3.401197 3.401197 504 +dissert 1 32 3.465736 3.465736 549 +domain 1 30 3.555348 3.555348 564 +focu 1 30 3.555348 3.555348 571 +specifi 1 30 3.555348 3.555348 568 +compon 1 30 3.555348 3.555348 570 +effort 1 26 3.688879 3.688879 652 +reduc 1 22 3.850148 3.850148 759 +thu 1 21 3.912023 3.912023 773 +brown 1 16 4.174387 4.174387 977 +devic 1 16 4.174387 4.174387 1002 +draft 1 14 4.317488 4.317488 1085 +transpar 1 11 4.553877 4.553877 1325 +devis 1 10 4.653960 4.653960 1451 +researchi 1 8 4.875197 4.875197 1756 +driver 1 8 4.875197 4.875197 1657 +counter 1 8 4.875197 4.875197 1765 +creation 1 6 5.164786 5.164786 2069 +andimplement 1 4 5.568345 5.568345 3029 +multifunct 1 3 5.857933 5.857933 3826 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +manualfor 1 2 6.263398 6.263398 4720 +export 1 2 6.263398 6.263398 5689 +padgettdon 1 1 6.957497 6.957497 13852 +padgett 1 1 6.957497 6.957497 13851 +softar 1 1 6.957497 6.957497 13853 +powerpointvers 1 1 6.957497 6.957497 13854 +usafax 1 1 6.957497 6.957497 13855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..17a9d683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +avail 1 169 1.791759 1.791759 48 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +access 1 102 2.302585 2.302585 136 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +school 1 84 2.484907 2.484907 188 +librari 1 87 2.484907 2.484907 181 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +plan 1 65 2.772589 2.772589 272 +automat 1 61 2.833213 2.833213 306 +space 1 57 2.890372 2.890372 310 +standard 1 48 3.044522 3.044522 365 +cool 1 49 3.044522 3.044522 374 +get 1 46 3.091042 3.091042 380 +execut 1 45 3.135494 3.135494 404 +realli 1 40 3.258097 3.258097 444 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +go 1 33 3.433987 3.433987 529 +handl 1 24 3.761200 3.761200 685 +inth 1 22 3.850148 3.850148 741 +binari 1 20 3.951244 3.951244 823 +wrote 1 20 3.951244 3.951244 830 +geometr 1 19 4.007333 4.007333 852 +partit 1 16 4.174387 4.174387 984 +unfortun 1 13 4.382027 4.382027 1170 +solari 1 12 4.465908 4.465908 1238 +guess 1 10 4.653960 4.653960 1443 +elimin 1 9 4.753590 4.753590 1558 +fail 1 8 4.875197 4.875197 1655 +port 1 8 4.875197 4.875197 1766 +reus 1 8 4.875197 4.875197 1661 +bug 1 7 5.010635 5.010635 1801 +philip 1 6 5.164786 5.164786 2005 +templat 1 5 5.347108 5.347108 2311 +anda 1 5 5.347108 5.347108 2416 +suno 1 4 5.568345 5.568345 2790 +screenshot 1 4 5.568345 5.568345 2743 +campbel 1 3 5.857933 5.857933 3272 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +everywher 1 2 6.263398 6.263398 5690 +curli 1 2 6.263398 6.263398 5691 +hardinphilip 1 1 6.957497 6.957497 13857 +hardinabout 1 1 6.957497 6.957497 13858 +fallback 1 1 6.957497 6.957497 13859 +battlebal 1 1 6.957497 6.957497 13856 +multiplay 1 1 6.957497 6.957497 13860 +runsund 1 1 6.957497 6.957497 13861 +graphicssoftwar 1 1 6.957497 6.957497 13862 +programmingto 1 1 6.957497 6.957497 13863 +pahardin 1 1 6.957497 6.957497 13864 +usanetrek 1 1 6.957497 6.957497 13865 +pita 1 1 6.957497 6.957497 13866 +digitaldisast 1 1 6.957497 6.957497 13867 +plaster 1 1 6.957497 6.957497 13868 +congradul 1 1 6.957497 6.957497 13869 +smartest 1 1 6.957497 6.957497 13870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..636884bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +check 1 115 2.197225 2.197225 118 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +learn 1 86 2.484907 2.484907 170 +librari 1 87 2.484907 2.484907 181 +exampl 1 77 2.564949 2.564949 195 +addit 1 74 2.639057 2.639057 228 +intellig 1 72 2.639057 2.639057 225 +onlin 1 75 2.639057 2.639057 223 +knowledg 1 67 2.708050 2.708050 243 +interact 1 62 2.772589 2.772589 270 +taylor 1 63 2.772589 2.772589 287 +artifici 1 63 2.772589 2.772589 280 +think 1 57 2.890372 2.890372 314 +pointer 1 48 3.044522 3.044522 368 +natur 1 44 3.135494 3.135494 406 +mechan 1 43 3.178054 3.178054 416 +might 1 41 3.218876 3.218876 426 +paul 1 38 3.295837 3.295837 471 +postal 1 30 3.555348 3.555348 580 +neural 1 30 3.555348 3.555348 578 +usual 1 28 3.610918 3.610918 608 +head 1 23 3.806662 3.806662 732 +reflect 1 15 4.248495 4.248495 1034 +dave 1 14 4.317488 4.317488 1098 +hotlist 1 13 4.382027 4.382027 1199 +pascal 1 12 4.465908 4.465908 1213 +evolut 1 11 4.553877 4.553877 1314 +surf 1 11 4.553877 4.553877 1301 +death 1 10 4.653960 4.653960 1457 +handi 1 6 5.164786 5.164786 2111 +mix 1 6 5.164786 5.164786 2200 +studentdepart 1 5 5.347108 5.347108 2505 +explicitli 1 5 5.347108 5.347108 2308 +seriou 1 5 5.347108 5.347108 2252 +wast 1 5 5.347108 5.347108 2537 +austindepart 1 4 5.568345 5.568345 3008 +websit 1 4 5.568345 5.568345 2726 +neuro 1 2 6.263398 6.263398 4265 +mcquestenpaul 1 1 6.957497 6.957497 13871 +mcquestenphd 1 1 6.957497 6.957497 13872 +bepract 1 1 6.957497 6.957497 13873 +paulmcq 1 1 6.957497 6.957497 13874 +forcsp 1 1 6.957497 6.957497 13875 +programmingmor 1 1 6.957497 6.957497 13876 +inmoriarti 1 1 6.957497 6.957497 13877 +atcnr 1 1 6.957497 6.957497 13878 +rome 1 1 6.957497 6.957497 13879 +tout 1 1 6.957497 6.957497 13880 +winer 1 1 6.957497 6.957497 13881 +cynb 1 1 6.957497 6.957497 13882 +humong 1 1 6.957497 6.957497 13883 +knick 1 1 6.957497 6.957497 13884 +knack 1 1 6.957497 6.957497 13885 +nut 1 1 6.957497 6.957497 13886 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..cb0b9991 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +network 1 168 1.791759 1.791759 61 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +file 1 132 1.945910 1.945910 70 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +summari 1 73 2.639057 2.639057 237 +multimedia 1 68 2.708050 2.708050 258 +finger 1 52 2.995732 2.995732 354 +get 1 46 3.091042 3.091042 380 +protocol 1 45 3.135494 3.135494 407 +log 1 19 4.007333 4.007333 857 +affili 1 13 4.382027 4.382027 1194 +touch 1 12 4.465908 4.465908 1288 +goyal 1 3 5.857933 5.857933 3268 +pawan 1 1 6.957497 6.957497 13888 +pawang 1 1 6.957497 6.957497 13887 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..98349a8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +theori 1 111 2.197225 2.197225 127 +center 1 88 2.397895 2.397895 158 +journal 1 83 2.484907 2.484907 183 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +main 1 67 2.708050 2.708050 256 +previou 1 62 2.772589 2.772589 290 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +visitor 1 49 3.044522 3.044522 371 +physic 1 47 3.091042 3.091042 377 +algebra 1 45 3.135494 3.135494 394 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +societi 1 40 3.258097 3.258097 456 +open 1 38 3.295837 3.295837 469 +vita 1 38 3.295837 3.295837 473 +field 1 37 3.332205 3.332205 482 +represent 1 35 3.401197 3.401197 512 +print 1 34 3.401197 3.401197 503 +obtain 1 33 3.433987 3.433987 534 +curriculum 1 33 3.433987 3.433987 535 +dissert 1 32 3.465736 3.465736 549 +posit 1 31 3.496508 3.496508 552 +spent 1 25 3.737670 3.737670 676 +finish 1 22 3.850148 3.850148 748 +sequenti 1 22 3.850148 3.850148 745 +half 1 21 3.912023 3.912023 776 +previous 1 17 4.110874 4.110874 923 +germani 1 17 4.110874 4.110874 946 +jose 1 16 4.174387 4.174387 976 +georg 1 16 4.174387 4.174387 994 +joint 1 13 4.382027 4.382027 1130 +econom 1 13 4.382027 4.382027 1184 +mellon 1 13 4.382027 4.382027 1179 +usavoic 1 13 4.382027 4.382027 1198 +calcul 1 12 4.465908 4.465908 1268 +carnegi 1 12 4.465908 4.465908 1260 +fill 1 11 4.553877 4.553877 1349 +cryptographi 1 9 4.753590 4.753590 1512 +rel 1 9 4.753590 4.753590 1487 +invari 1 8 4.875197 4.875197 1748 +pittsburgh 1 7 5.010635 5.010635 1938 +quantum 1 6 5.164786 5.164786 2214 +particl 1 5 5.347108 5.347108 2436 +invers 1 4 5.568345 5.568345 2764 +sudarshan 1 3 5.857933 5.857933 3885 +supervisor 1 3 5.857933 5.857933 3754 +quantiz 1 2 6.263398 6.263398 5692 +irreduc 1 2 6.263398 6.263398 4890 +lemk 1 2 6.263398 6.263398 5693 +thephys 1 2 6.263398 6.263398 5694 +symmetri 1 2 6.263398 6.263398 5517 +pecina 1 1 6.957497 6.957497 13889 +pecinaabout 1 1 6.957497 6.957497 13891 +innuclear 1 1 6.957497 6.957497 13892 +workedinvestig 1 1 6.957497 6.957497 13893 +gaug 1 1 6.957497 6.957497 13894 +graviti 1 1 6.957497 6.957497 13895 +gravit 1 1 6.957497 6.957497 13896 +unitari 1 1 6.957497 6.957497 13897 +yuval 1 1 6.957497 6.957497 13898 +eman 1 1 6.957497 6.957497 13899 +jurgen 1 1 6.957497 6.957497 13900 +fromcologn 1 1 6.957497 6.957497 13901 +bureau 1 1 6.957497 6.957497 13902 +geologi 1 1 6.957497 6.957497 13903 +seismic 1 1 6.957497 6.957497 13904 +tomographi 1 1 6.957497 6.957497 13905 +hardag 1 1 6.957497 6.957497 13906 +geophys 1 1 6.957497 6.957497 13907 +geophysicist 1 1 6.957497 6.957497 13908 +comerci 1 1 6.957497 6.957497 13909 +solutionsin 1 1 6.957497 6.957497 13910 +chromodynamicsmi 1 1 6.957497 6.957497 13911 +orpecina 1 1 6.957497 6.957497 13890 +defo 1 1 6.957497 6.957497 13912 +phy 1 1 6.957497 6.957497 13913 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..e3d8eb42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +file 1 132 1.945910 1.945910 70 +welcom 1 122 2.079442 2.079442 99 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +stuff 1 87 2.484907 2.484907 171 +chang 1 82 2.484907 2.484907 163 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +unix 1 58 2.890372 2.890372 308 +think 1 57 2.890372 2.890372 314 +cool 1 49 3.044522 3.044522 374 +could 1 46 3.091042 3.091042 383 +realli 1 40 3.258097 3.258097 444 +mean 1 37 3.332205 3.332205 477 +short 1 36 3.367296 3.367296 499 +staff 1 36 3.367296 3.367296 490 +experiment 1 26 3.688879 3.688879 645 +instal 1 22 3.850148 3.850148 754 +increas 1 20 3.951244 3.951244 829 +anyon 1 17 4.110874 4.110874 916 +protect 1 17 4.110874 4.110874 935 +drive 1 15 4.248495 4.248495 1052 +floor 1 14 4.317488 4.317488 1070 +stori 1 14 4.317488 4.317488 1087 +comic 1 14 4.317488 4.317488 1103 +neat 1 12 4.465908 4.465908 1263 +true 1 10 4.653960 4.653960 1422 +vista 1 10 4.653960 4.653960 1452 +chanc 1 7 5.010635 5.010635 1960 +escap 1 4 5.568345 5.568345 3016 +meyour 1 3 5.857933 5.857933 3858 +blah 1 2 6.263398 6.263398 5695 +drastic 1 2 6.263398 6.263398 4201 +nettl 1 1 6.957497 6.957497 13915 +cornerinfolik 1 1 6.957497 6.957497 13916 +blahblah 1 1 6.957497 6.957497 13917 +woof 1 1 6.957497 6.957497 13914 +eeek 1 1 6.957497 6.957497 13918 +ibm 1 1 6.957497 6.957497 13919 +afteri 1 1 6.957497 6.957497 13920 +theinnoc 1 1 6.957497 6.957497 13921 +buena 1 1 6.957497 6.957497 13922 +movieplex 1 1 6.957497 6.957497 13923 +employan 1 1 6.957497 6.957497 13924 +improb 1 1 6.957497 6.957497 13925 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..23af4447 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +hall 1 146 1.945910 1.945910 65 +report 1 131 2.079442 2.079442 92 +decemb 1 80 2.564949 2.564949 215 +taylor 1 63 2.772589 2.772589 287 +annual 1 40 3.258097 3.258097 458 +postal 1 30 3.555348 3.555348 580 +greg 1 24 3.761200 3.761200 695 +eduphon 1 15 4.248495 4.248495 1060 +informationemail 1 9 4.753590 4.753590 1564 +austinaustin 1 7 5.010635 5.010635 1966 +plaxton 1 3 5.857933 5.857933 3886 +plaxtongreg 1 1 6.957497 6.957497 13926 +plaxtoncontact 1 1 6.957497 6.957497 13927 +sciencetaylor 1 1 6.957497 6.957497 13928 +profilepubl 1 1 6.957497 6.957497 13929 +plaxtonplaxton 1 1 6.957497 6.957497 13930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..6e3c4504 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +servic 1 72 2.639057 2.639057 236 +intellig 1 72 2.639057 2.639057 225 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +abstract 1 62 2.772589 2.772589 276 +artifici 1 63 2.772589 2.772589 280 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +investig 1 51 2.995732 2.995732 353 +case 1 51 2.995732 2.995732 351 +california 1 46 3.091042 3.091042 388 +answer 1 45 3.135494 3.135494 391 +editor 1 41 3.218876 3.218876 433 +autom 1 41 3.218876 3.218876 434 +press 1 42 3.218876 3.218876 419 +award 1 34 3.401197 3.401197 523 +tech 1 35 3.401197 3.401197 515 +concept 1 32 3.465736 3.465736 537 +domain 1 30 3.555348 3.555348 564 +scale 1 28 3.610918 3.610918 613 +rule 1 26 3.688879 3.688879 638 +fellow 1 24 3.761200 3.761200 701 +honor 1 23 3.806662 3.806662 729 +predict 1 19 4.007333 4.007333 855 +young 1 16 4.174387 4.174387 991 +cambridg 1 16 4.174387 4.174387 1008 +weak 1 13 4.382027 4.382027 1159 +hotlist 1 13 4.382027 4.382027 1199 +bruce 1 12 4.465908 4.465908 1226 +classif 1 9 4.753590 4.753590 1586 +irvin 1 8 4.875197 4.875197 1660 +presidenti 1 8 4.875197 4.875197 1737 +aaai 1 8 4.875197 4.875197 1750 +boundari 1 7 5.010635 5.010635 1929 +heurist 1 6 5.164786 5.164786 2125 +porter 1 5 5.347108 5.347108 2293 +complementari 1 5 5.347108 5.347108 2523 +presentarea 1 4 5.568345 5.568345 3026 +thetim 1 3 5.857933 5.857933 3581 +preced 1 3 5.857933 5.857933 3107 +researchinterest 1 2 6.263398 6.263398 5123 +rickel 1 2 6.263398 6.263398 5648 +andpostscript 1 2 6.263398 6.263398 5696 +brant 1 2 6.263398 6.263398 5652 +warrant 1 2 6.263398 6.263398 5697 +bareiss 1 2 6.263398 6.263398 5646 +porterassoci 1 1 6.957497 6.957497 13931 +interestartifici 1 1 6.957497 6.957497 13932 +researchhead 1 1 6.957497 6.957497 13933 +basesand 1 1 6.957497 6.957497 13934 +aait 1 1 6.957497 6.957497 13935 +holt 1 1 6.957497 6.957497 13936 +abstractand 1 1 6.957497 6.957497 13937 +reportport 1 1 6.957497 6.957497 13938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..d6cc6cfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +texa 1 160 1.791759 1.791759 64 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +introduct 1 126 2.079442 2.079442 87 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +book 1 99 2.302585 2.302585 131 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +member 1 84 2.484907 2.484907 165 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +logic 1 71 2.639057 2.639057 230 +write 1 72 2.639057 2.639057 222 +effici 1 73 2.639057 2.639057 233 +differ 1 66 2.708050 2.708050 253 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +written 1 63 2.772589 2.772589 278 +simpl 1 60 2.833213 2.833213 298 +reason 1 57 2.890372 2.890372 318 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +overview 1 56 2.890372 2.890372 323 +thesi 1 57 2.890372 2.890372 327 +found 1 53 2.944439 2.944439 337 +give 1 50 3.044522 3.044522 359 +basic 1 50 3.044522 3.044522 360 +possibl 1 47 3.091042 3.091042 378 +electron 1 47 3.091042 3.091042 379 +understand 1 47 3.091042 3.091042 384 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +describ 1 45 3.135494 3.135494 400 +fast 1 42 3.218876 3.218876 429 +map 1 39 3.258097 3.258097 452 +correct 1 38 3.295837 3.295837 462 +concurr 1 34 3.401197 3.401197 501 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +chapter 1 32 3.465736 3.465736 536 +specifi 1 30 3.555348 3.555348 568 +power 1 30 3.555348 3.555348 573 +compon 1 30 3.555348 3.555348 570 +synchron 1 29 3.583519 3.583519 588 +aspect 1 25 3.737670 3.737670 663 +seri 1 24 3.761200 3.761200 708 +proof 1 23 3.806662 3.806662 720 +emphasi 1 22 3.850148 3.850148 755 +inth 1 22 3.850148 3.850148 741 +sort 1 22 3.850148 3.850148 738 +divis 1 21 3.912023 3.912023 803 +safeti 1 20 3.951244 3.951244 817 +assum 1 19 4.007333 4.007333 845 +prove 1 19 4.007333 4.007333 848 +stand 1 18 4.060443 4.060443 891 +former 1 17 4.110874 4.110874 956 +adam 1 17 4.110874 4.110874 934 +weslei 1 16 4.174387 4.174387 983 +reflect 1 15 4.248495 4.248495 1034 +hybrid 1 15 4.248495 4.248495 1057 +manner 1 14 4.317488 4.317488 1074 +balanc 1 14 4.317488 4.317488 1112 +circuit 1 13 4.382027 4.382027 1131 +recurs 1 13 4.382027 4.382027 1127 +composit 1 13 4.382027 4.382027 1150 +addison 1 12 4.465908 4.465908 1230 +verifi 1 12 4.465908 4.465908 1261 +summar 1 11 4.553877 4.553877 1295 +length 1 10 4.653960 4.653960 1400 +equal 1 10 4.653960 4.653960 1424 +arithmet 1 10 4.653960 4.653960 1388 +ataustin 1 9 4.753590 4.753590 1610 +notat 1 9 4.753590 4.753590 1489 +tempor 1 9 4.753590 4.753590 1584 +misra 1 7 5.010635 5.010635 1856 +restrict 1 6 5.164786 5.164786 2129 +multiprogram 1 6 5.164786 5.164786 2010 +rigor 1 4 5.568345 5.568345 3030 +jayadev 1 4 5.568345 5.568345 3006 +jacob 1 4 5.568345 5.568345 2667 +anddistribut 1 4 5.568345 5.568345 3031 +ofprogram 1 4 5.568345 5.568345 2624 +monograph 1 4 5.568345 5.568345 2860 +uniti 1 3 5.857933 5.857933 3812 +cohen 1 3 5.857933 5.857933 3652 +rajeev 1 3 5.857933 5.857933 3152 +marku 1 3 5.857933 5.857933 3872 +kornerup 1 3 5.857933 5.857933 3215 +checker 1 3 5.857933 5.857933 3644 +parallelalgorithm 1 3 5.857933 5.857933 3249 +alsoavail 1 3 5.857933 5.857933 3887 +powerlist 1 2 6.263398 6.263398 5660 +seuss 1 2 6.263398 6.263398 5662 +carruth 1 2 6.263398 6.263398 5495 +erni 1 2 6.263398 6.263398 5104 +joshi 1 2 6.263398 6.263398 4202 +chandi 1 2 6.263398 6.263398 5661 +ofpap 1 2 6.263398 6.263398 4329 +fourier 1 2 6.263398 6.263398 5698 +offspr 1 2 6.263398 6.263398 5699 +froma 1 2 6.263398 6.263398 4862 +basedprogram 1 2 6.263398 6.263398 5700 +austinpsp 1 1 6.957497 6.957497 13941 +austinthi 1 1 6.957497 6.957497 13942 +deriveparallel 1 1 6.957497 6.957497 13943 +issupervis 1 1 6.957497 6.957497 13944 +groupinclud 1 1 6.957497 6.957497 13945 +kaltenbach 1 1 6.957497 6.957497 13939 +edgar 1 1 6.957497 6.957497 13946 +knapp 1 1 6.957497 6.957497 13947 +ingolf 1 1 6.957497 6.957497 13948 +krger 1 1 6.957497 6.957497 13949 +josyula 1 1 6.957497 6.957497 13950 +staskauska 1 1 6.957497 6.957497 13951 +publicationsbelow 1 1 6.957497 6.957497 13952 +wherev 1 1 6.957497 6.957497 13953 +topap 1 1 6.957497 6.957497 13954 +thenot 1 1 6.957497 6.957497 13955 +inchandi 1 1 6.957497 6.957497 13956 +amanuscript 1 1 6.957497 6.957497 13957 +newun 1 1 6.957497 6.957497 13958 +operatorco 1 1 6.957497 6.957497 13959 +forrefer 1 1 6.957497 6.957497 13960 +asymbol 1 1 6.957497 6.957497 13961 +forfinit 1 1 6.957497 6.957497 13962 +unityverifi 1 1 6.957497 6.957497 13963 +toinclud 1 1 6.957497 6.957497 13964 +twodiffer 1 1 6.957497 6.957497 13965 +succinct 1 1 6.957497 6.957497 13966 +thepowerlist 1 1 6.957497 6.957497 13940 +givesnumer 1 1 6.957497 6.957497 13967 +batcher 1 1 6.957497 6.957497 13968 +asadd 1 1 6.957497 6.957497 13969 +multipli 1 1 6.957497 6.957497 13970 +addercircuit 1 1 6.957497 6.957497 13971 +programscan 1 1 6.957497 6.957497 13972 +speciallyhypercub 1 1 6.957497 6.957497 13973 +caninterfer 1 1 6.957497 6.957497 13974 +adisciplin 1 1 6.957497 6.957497 13975 +genrat 1 1 6.957497 6.957497 13976 +callsfor 1 1 6.957497 6.957497 13977 +anexperi 1 1 6.957497 6.957497 13978 +ingolfkrg 1 1 6.957497 6.957497 13979 +thepsp 1 1 6.957497 6.957497 13980 +sitejacob 1 1 6.957497 6.957497 13981 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..ec09b8b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +construct 1 139 1.945910 1.945910 82 +spring 1 131 2.079442 2.079442 88 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +server 1 76 2.564949 2.564949 204 +appli 1 71 2.639057 2.639057 226 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +visit 1 63 2.772589 2.772589 288 +room 1 59 2.833213 2.833213 301 +march 1 61 2.833213 2.833213 295 +electr 1 38 3.295837 3.295837 461 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +client 1 25 3.737670 3.737670 679 +sign 1 16 4.174387 4.174387 970 +huang 1 12 4.465908 4.465908 1202 +guest 1 12 4.465908 4.465908 1220 +card 1 10 4.653960 4.653960 1435 +placement 1 10 4.653960 4.653960 1420 +telecommun 1 9 4.753590 4.753590 1565 +job 1 8 4.875197 4.875197 1702 +postcard 1 6 5.164786 5.164786 2181 +attract 1 5 5.347108 5.347108 2356 +hawaii 1 3 5.857933 5.857933 3888 +csc 1 3 5.857933 5.857933 3183 +nankai 1 2 6.263398 6.263398 5147 +tianjin 1 2 6.263398 6.263398 5148 +junk 1 2 6.263398 6.263398 5701 +qime 1 1 6.957497 6.957497 13982 +edureceiv 1 1 6.957497 6.957497 13983 +univeris 1 1 6.957497 6.957497 13984 +manoa 1 1 6.957497 6.957497 13985 +hawaiiwork 1 1 6.957497 6.957497 13986 +austincours 1 1 6.957497 6.957497 13987 +teamweb 1 1 6.957497 6.957497 13988 +utcssadaili 1 1 6.957497 6.957497 13989 +texanstock 1 1 6.957497 6.957497 13990 +picturesimageschines 1 1 6.957497 6.957497 13991 +popsend 1 1 6.957497 6.957497 13992 +jobtrakut 1 1 6.957497 6.957497 13993 +gopherftp 1 1 6.957497 6.957497 13994 +newstelnet 1 1 6.957497 6.957497 13995 +cschen 1 1 6.957497 6.957497 13996 +staffyour 1 1 6.957497 6.957497 13997 +commentsguest 1 1 6.957497 6.957497 13998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..955729f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +texa 1 160 1.791759 1.791759 64 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +professor 1 137 1.945910 1.945910 76 +document 1 121 2.079442 2.079442 89 +world 1 115 2.197225 2.197225 126 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +abstract 1 62 2.772589 2.772589 276 +visit 1 63 2.772589 2.772589 288 +reason 1 57 2.890372 2.890372 318 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +directori 1 45 3.135494 3.135494 396 +robot 1 36 3.367296 3.367296 497 +bibliographi 1 34 3.401197 3.401197 518 +represent 1 35 3.401197 3.401197 512 +dissert 1 32 3.465736 3.465736 549 +limit 1 29 3.583519 3.583519 585 +alumni 1 21 3.912023 3.912023 807 +supervis 1 20 3.951244 3.951244 840 +spatial 1 16 4.174387 4.174387 988 +atth 1 15 4.248495 4.248495 1019 +easili 1 14 4.317488 4.317488 1077 +qualit 1 11 4.553877 4.553877 1362 +tour 1 11 4.553877 4.553877 1307 +ataustin 1 9 4.753590 4.753590 1610 +yellow 1 9 4.753590 4.753590 1601 +qsim 1 3 5.857933 5.857933 3862 +kuiper 1 3 5.857933 5.857933 3794 +thephys 1 2 6.263398 6.263398 5694 +utexasqualit 1 1 6.957497 6.957497 14000 +utexasth 1 1 6.957497 6.957497 14001 +ourresearch 1 1 6.957497 6.957497 13999 +intelligentrobot 1 1 6.957497 6.957497 14002 +knowledgerepresent 1 1 6.957497 6.957497 14003 +algernon 1 1 6.957497 6.957497 14004 +benjaminkuip 1 1 6.957497 6.957497 14005 +areadescript 1 1 6.957497 6.957497 14006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..22f86814 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +introduct 1 126 2.079442 2.079442 87 +pleas 1 113 2.197225 2.197225 114 +technic 1 100 2.302585 2.302585 140 +comment 1 93 2.397895 2.397895 146 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +institut 1 84 2.484907 2.484907 187 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +still 1 50 3.044522 3.044522 362 +around 1 43 3.178054 3.178054 415 +feel 1 37 3.332205 3.332205 483 +game 1 36 3.367296 3.367296 498 +copyright 1 36 3.367296 3.367296 495 +john 1 33 3.433987 3.433987 532 +linux 1 27 3.637586 3.637586 631 +sport 1 25 3.737670 3.737670 683 +brows 1 23 3.806662 3.806662 726 +thank 1 23 3.806662 3.806662 721 +self 1 22 3.850148 3.850148 761 +leav 1 21 3.912023 3.912023 772 +els 1 19 4.007333 4.007333 843 +modif 1 17 4.110874 4.110874 913 +joke 1 8 4.875197 4.875197 1620 +settimeout 1 5 5.347108 5.347108 2536 +guestbook 1 5 5.347108 5.347108 2475 +seed 1 4 5.568345 5.568345 2984 +timertwo 1 4 5.568345 5.568345 2985 +scrollit_rl 1 3 5.857933 5.857933 3882 +underconstruct 1 3 5.857933 5.857933 3889 +com 1 2 6.263398 6.263398 5156 +qiang 1 1 6.957497 6.957497 14007 +seriousjunk 1 1 6.957497 6.957497 14008 +realjunk 1 1 6.957497 6.957497 14009 +struggleforliv 1 1 6.957497 6.957497 14010 +qzuo 1 1 6.957497 6.957497 14011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..2a3d27e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +teach 1 108 2.197225 2.197225 112 +institut 1 84 2.484907 2.484907 187 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +laboratori 1 63 2.772589 2.772589 292 +abstract 1 62 2.772589 2.772589 276 +taylor 1 63 2.772589 2.772589 287 +type 1 61 2.833213 2.833213 296 +semest 1 58 2.890372 2.890372 312 +directori 1 45 3.135494 3.135494 396 +robert 1 30 3.555348 3.555348 567 +experiment 1 26 3.688879 3.688879 645 +runtim 1 19 4.007333 4.007333 858 +less 1 18 4.060443 4.060443 892 +brown 1 16 4.174387 4.174387 977 +eduphon 1 15 4.248495 4.248495 1060 +massachusett 1 14 4.317488 4.317488 1118 +ofcomput 1 10 4.653960 4.653960 1442 +informationemail 1 9 4.753590 4.753590 1564 +pronounc 1 7 5.010635 5.010635 1918 +austinaustin 1 7 5.010635 5.010635 1966 +blumoferdb 1 5 5.347108 5.347108 2324 +bloom 1 4 5.568345 5.568345 2913 +sciencestaylor 1 3 5.857933 5.857933 3814 +bobbi 1 2 6.263398 6.263398 5678 +informationassist 1 2 6.263398 6.263398 5531 +blumoferobert 1 1 6.957497 6.957497 14012 +blumofei 1 1 6.957497 6.957497 14013 +cilkmultithread 1 1 6.957497 6.957497 14014 +hallpost 1 1 6.957497 6.957497 14015 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..538fd105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +construct 1 139 1.945910 1.945910 82 +analysi 1 124 2.079442 2.079442 98 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +homepag 1 93 2.397895 2.397895 148 +real 1 93 2.397895 2.397895 144 +comment 1 93 2.397895 2.397895 146 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +content 1 59 2.833213 2.833213 302 +rule 1 26 3.688879 3.688879 638 +brows 1 23 3.806662 3.806662 726 +wang 1 21 3.912023 3.912023 790 +theunivers 1 21 3.912023 3.912023 797 +permit 1 16 4.174387 4.174387 962 +candid 1 9 4.753590 4.753590 1606 +publicationsi 1 3 5.857933 5.857933 3827 +wangwelcom 1 1 6.957497 6.957497 14016 +rhwang 1 1 6.957497 6.957497 14017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..1013b9c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +mathemat 1 108 2.197225 2.197225 123 +memori 1 101 2.302585 2.302585 139 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +intellig 1 72 2.639057 2.639057 225 +goal 1 66 2.708050 2.708050 250 +artifici 1 63 2.772589 2.772589 280 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +detail 1 57 2.890372 2.890372 321 +undergradu 1 54 2.944439 2.944439 338 +finger 1 52 2.995732 2.995732 354 +visual 1 48 3.044522 3.044522 372 +vision 1 41 3.218876 3.218876 430 +seminar 1 38 3.295837 3.295837 470 +robot 1 36 3.367296 3.367296 497 +computersci 1 30 3.555348 3.555348 562 +neural 1 30 3.555348 3.555348 578 +postal 1 30 3.555348 3.555348 580 +utc 1 27 3.637586 3.637586 629 +strategi 1 25 3.737670 3.737670 682 +decis 1 23 3.806662 3.806662 728 +self 1 22 3.850148 3.850148 761 +concentr 1 18 4.060443 4.060443 906 +cognit 1 16 4.174387 4.174387 986 +evolv 1 12 4.465908 4.465908 1223 +genet 1 10 4.653960 4.653960 1409 +risto 1 9 4.753590 4.753590 1523 +schema 1 6 5.164786 5.164786 1988 +ucla 1 5 5.347108 5.347108 2502 +oftexa 1 4 5.568345 5.568345 3003 +episod 1 4 5.568345 5.568345 2747 +intereststh 1 3 5.857933 5.857933 3838 +cortex 1 3 5.857933 5.857933 3856 +helsinki 1 2 6.263398 6.263398 5702 +miikkulainenristo 1 1 6.957497 6.957497 14018 +miikkulainenassoci 1 1 6.957497 6.957497 14019 +processeswith 1 1 6.957497 6.957497 14020 +languageacquisit 1 1 6.957497 6.957497 14021 +networkswith 1 1 6.957497 6.957497 14022 +discoversequenti 1 1 6.957497 6.957497 14023 +classessumm 1 1 6.957497 6.957497 14024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..873848b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +rong 1 2 6.263398 6.263398 5703 +bigfoot 1 1 6.957497 6.957497 14025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..54a195c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +algorithm 1 162 1.791759 1.791759 57 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +member 1 84 2.484907 2.484907 165 +complet 1 77 2.564949 2.564949 208 +onlin 1 75 2.639057 2.639057 223 +plan 1 65 2.772589 2.772589 272 +vita 1 38 3.295837 3.295837 473 +random 1 34 3.401197 3.401197 511 +curriculum 1 33 3.433987 3.433987 535 +postal 1 30 3.555348 3.555348 580 +particularli 1 19 4.007333 4.007333 867 +atth 1 15 4.248495 4.248495 1019 +ataustin 1 9 4.753590 4.753590 1610 +andcomput 1 8 4.875197 4.875197 1623 +supervisor 1 3 5.857933 5.857933 3754 +rajmohan 1 2 6.263398 6.263398 5706 +rajaraman 1 2 6.263398 6.263398 5704 +mydissert 1 2 6.263398 6.263398 5496 +mypubl 1 2 6.263398 6.263398 5707 +linkscontact 1 2 6.263398 6.263398 5708 +rraj 1 2 6.263398 6.263398 5705 +pagerajmohan 1 1 6.957497 6.957497 14026 +gregplaxton 1 1 6.957497 6.957497 14027 +incombinator 1 1 6.957497 6.957497 14028 +sciencemiscellan 1 1 6.957497 6.957497 14029 +ephon 1 1 6.957497 6.957497 14030 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..085a796b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +librari 1 87 2.484907 2.484907 181 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +colleg 1 61 2.833213 2.833213 300 +telephon 1 50 3.044522 3.044522 373 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +ofth 1 36 3.367296 3.367296 491 +computersci 1 30 3.555348 3.555348 562 +weather 1 28 3.610918 3.610918 618 +spent 1 25 3.737670 3.737670 676 +yahoo 1 24 3.761200 3.761200 707 +beij 1 19 4.007333 4.007333 876 +five 1 19 4.007333 4.007333 841 +citi 1 19 4.007333 4.007333 874 +tsinghua 1 13 4.382027 4.382027 1195 +replic 1 12 4.465908 4.465908 1231 +entertain 1 12 4.465908 4.465908 1286 +catalog 1 10 4.653960 4.653960 1431 +login 1 9 4.753590 4.753590 1550 +film 1 8 4.875197 4.875197 1761 +capit 1 7 5.010635 5.010635 1957 +apart 1 7 5.010635 5.010635 1936 +chronicl 1 7 5.010635 5.010635 1952 +river 1 6 5.164786 5.164786 2220 +provinc 1 4 5.568345 5.568345 3009 +republ 1 4 5.568345 5.568345 3032 +sinanet 1 4 5.568345 5.568345 2883 +rong 1 2 6.263398 6.263398 5703 +zhai 1 2 6.263398 6.263398 5709 +homepagea 1 1 6.957497 6.957497 14032 +chinachina 1 1 6.957497 6.957497 14033 +jinan 1 1 6.957497 6.957497 14034 +myhometown 1 1 6.957497 6.957497 14035 +shandong 1 1 6.957497 6.957497 14036 +gotmi 1 1 6.957497 6.957497 14037 +ofchina 1 1 6.957497 6.957497 14038 +texasaustin 1 1 6.957497 6.957497 14039 +rent 1 1 6.957497 6.957497 14040 +utcsth 1 1 6.957497 6.957497 14041 +utnetcat 1 1 6.957497 6.957497 14042 +browsabl 1 1 6.957497 6.957497 14043 +utaccess 1 1 6.957497 6.957497 14031 +austininform 1 1 6.957497 6.957497 14044 +utcat 1 1 6.957497 6.957497 14045 +systemsdynam 1 1 6.957497 6.957497 14046 +graphicsc 1 1 6.957497 6.957497 14047 +logicc 1 1 6.957497 6.957497 14048 +moviesaustin 1 1 6.957497 6.957497 14049 +filmsmicrosoft 1 1 6.957497 6.957497 14050 +cinemania 1 1 6.957497 6.957497 14051 +onlineal 1 1 6.957497 6.957497 14052 +guidehollywood 1 1 6.957497 6.957497 14053 +onlineinternet 1 1 6.957497 6.957497 14054 +databaserog 1 1 6.957497 6.957497 14055 +ebert 1 1 6.957497 6.957497 14056 +moviesvisit 1 1 6.957497 6.957497 14057 +contactmail 1 1 6.957497 6.957497 14058 +aaustin 1 1 6.957497 6.957497 14059 +emailrtan 1 1 6.957497 6.957497 14060 +fingerclick 1 1 6.957497 6.957497 14061 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..b449cafa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +machin 1 129 2.079442 2.079442 95 +teach 1 108 2.197225 2.197225 112 +need 1 98 2.302585 2.302585 135 +academ 1 82 2.484907 2.484907 178 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +servic 1 72 2.639057 2.639057 236 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +think 1 57 2.890372 2.890372 314 +much 1 52 2.995732 2.995732 349 +life 1 50 3.044522 3.044522 375 +cool 1 49 3.044522 3.044522 374 +littl 1 39 3.258097 3.258097 454 +realli 1 40 3.258097 3.258097 444 +actual 1 28 3.610918 3.610918 604 +altern 1 26 3.688879 3.688879 641 +alwai 1 24 3.761200 3.761200 691 +wish 1 24 3.761200 3.761200 692 +almost 1 22 3.850148 3.850148 742 +entir 1 20 3.951244 3.951244 811 +nice 1 20 3.951244 3.951244 809 +modern 1 16 4.174387 4.174387 966 +anyth 1 16 4.174387 4.174387 998 +choos 1 16 4.174387 4.174387 964 +opportun 1 13 4.382027 4.382027 1161 +besid 1 8 4.875197 4.875197 1681 +tang 1 5 5.347108 5.347108 2409 +plant 1 5 5.347108 5.347108 2497 +aspir 1 4 5.568345 5.568345 3019 +fear 1 4 5.568345 5.568345 2911 +freedom 1 3 5.857933 5.857933 3890 +rupert 1 2 6.263398 6.263398 5680 +miracl 1 2 6.263398 6.263398 5710 +holi 1 2 6.263398 6.263398 5711 +stimul 1 2 6.263398 6.263398 5712 +empti 1 2 6.263398 6.263398 5478 +truck 1 2 6.263398 6.263398 5713 +wash 1 2 6.263398 6.263398 5714 +strangl 1 1 6.957497 6.957497 14062 +curious 1 1 6.957497 6.957497 14063 +inquiri 1 1 6.957497 6.957497 14064 +delic 1 1 6.957497 6.957497 14065 +depriv 1 1 6.957497 6.957497 14066 +distast 1 1 6.957497 6.957497 14067 +deni 1 1 6.957497 6.957497 14068 +duress 1 1 6.957497 6.957497 14069 +fate 1 1 6.957497 6.957497 14070 +messi 1 1 6.957497 6.957497 14071 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..fd0eedbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +homepag 1 93 2.397895 2.397895 148 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +frame 1 24 3.761200 3.761200 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +ruwei 1 1 6.957497 6.957497 14072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..6675910e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +appli 1 71 2.639057 2.639057 226 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +meet 1 72 2.639057 2.639057 229 +evalu 1 64 2.772589 2.772589 266 +colleg 1 61 2.833213 2.833213 300 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +scientif 1 53 2.944439 2.944439 341 +case 1 51 2.995732 2.995732 351 +numer 1 49 3.044522 3.044522 369 +better 1 45 3.135494 3.135494 401 +http 1 41 3.218876 3.218876 420 +continu 1 39 3.258097 3.258097 448 +tutori 1 39 3.258097 3.258097 437 +robert 1 30 3.555348 3.555348 567 +effort 1 26 3.688879 3.688879 652 +supercomput 1 25 3.737670 3.737670 681 +famili 1 23 3.806662 3.806662 735 +prove 1 19 4.007333 4.007333 848 +former 1 17 4.110874 4.110874 956 +easili 1 14 4.317488 4.317488 1077 +infrastructur 1 12 4.465908 4.465908 1234 +forc 1 10 4.653960 4.653960 1384 +maryland 1 6 5.164786 5.164786 2140 +park 1 6 5.164786 5.164786 2218 +plapack 1 3 5.857933 5.857933 3849 +geijn 1 2 6.263398 6.263398 5715 +appliedmathemat 1 2 6.263398 6.263398 5716 +interestnumer 1 2 6.263398 6.263398 5717 +researchth 1 2 6.263398 6.263398 5492 +geijnassoci 1 1 6.957497 6.957497 14074 +rvdg 1 1 6.957497 6.957497 14073 +oftradit 1 1 6.957497 6.957497 14075 +sequentialmachin 1 1 6.957497 6.957497 14076 +inoth 1 1 6.957497 6.957497 14077 +researchconcentr 1 1 6.957497 6.957497 14078 +forimpl 1 1 6.957497 6.957497 14079 +allowssuch 1 1 6.957497 6.957497 14080 +parallelprocessor 1 1 6.957497 6.957497 14081 +intercom 1 1 6.957497 6.957497 14082 +sl_librari 1 1 6.957497 6.957497 14083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..0e80d0fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +know 1 80 2.564949 2.564949 198 +window 1 68 2.708050 2.708050 242 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +visitor 1 49 3.044522 3.044522 371 +india 1 32 3.465736 3.465736 550 +computersci 1 30 3.555348 3.555348 562 +els 1 19 4.007333 4.007333 843 +region 1 19 4.007333 4.007333 875 +universityof 1 15 4.248495 4.248495 1061 +countri 1 15 4.248495 4.248495 1059 +settimeout 1 5 5.347108 5.347108 2536 +abraham 1 4 5.568345 5.568345 2644 +seed 1 4 5.568345 5.568345 2984 +timertwo 1 4 5.568345 5.568345 2985 +engg 1 4 5.568345 5.568345 2884 +scrollit_rl 1 3 5.857933 5.857933 3882 +kerala 1 3 5.857933 5.857933 3749 +sciencesand 1 2 6.263398 6.263398 4711 +hail 1 2 6.263398 6.263398 5583 +sundeep 1 1 6.957497 6.957497 14084 +sundeepabraham 1 1 6.957497 6.957497 14085 +calicut 1 1 6.957497 6.957497 14086 +tinkerwith 1 1 6.957497 6.957497 14087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..689ba035 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +make 1 111 2.197225 2.197225 120 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +advanc 1 99 2.302585 2.302585 130 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +activ 1 84 2.484907 2.484907 182 +info 1 85 2.484907 2.484907 176 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +intellig 1 72 2.639057 2.639057 225 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +prof 1 64 2.772589 2.772589 273 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +virtual 1 62 2.772589 2.772589 285 +guid 1 63 2.772589 2.772589 267 +taylor 1 63 2.772589 2.772589 287 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +algebra 1 45 3.135494 3.135494 394 +press 1 42 3.218876 3.218876 419 +linear 1 41 3.218876 3.218876 431 +map 1 39 3.258097 3.258097 452 +express 1 32 3.465736 3.465736 540 +robert 1 30 3.555348 3.555348 567 +weather 1 28 3.610918 3.610918 618 +packag 1 28 3.610918 3.610918 614 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +jeff 1 25 3.737670 3.737670 673 +hill 1 25 3.737670 3.737670 670 +yahoo 1 24 3.761200 3.761200 707 +famili 1 23 3.806662 3.806662 735 +miscellan 1 23 3.806662 3.806662 731 +unit 1 21 3.912023 3.912023 779 +lyco 1 19 4.007333 4.007333 871 +exercis 1 19 4.007333 4.007333 842 +hypertext 1 19 4.007333 4.007333 865 +boston 1 19 4.007333 4.007333 862 +white 1 17 4.110874 4.110874 951 +sign 1 16 4.174387 4.174387 970 +hotlist 1 13 4.382027 4.382027 1199 +entertain 1 12 4.465908 4.465908 1286 +america 1 11 4.553877 4.553877 1370 +consortium 1 10 4.653960 4.653960 1467 +yellow 1 9 4.753590 4.753590 1601 +congress 1 9 4.753590 4.753590 1592 +respect 1 9 4.753590 4.753590 1545 +establish 1 9 4.753590 4.753590 1532 +govern 1 9 4.753590 4.753590 1581 +calvin 1 9 4.753590 4.753590 1518 +swim 1 9 4.753590 4.753590 1599 +yanni 1 8 4.875197 4.875197 1713 +hockei 1 8 4.875197 4.875197 1760 +opinion 1 8 4.875197 4.875197 1708 +chronicl 1 7 5.010635 5.010635 1952 +necessarili 1 7 5.010635 5.010635 1899 +altavista 1 6 5.164786 5.164786 2222 +constitut 1 6 5.164786 5.164786 2026 +quick 1 6 5.164786 5.164786 2184 +andrea 1 5 5.347108 5.347108 2375 +shall 1 3 5.857933 5.857933 3891 +abridg 1 3 5.857933 5.857933 3772 +freedom 1 3 5.857933 5.857933 3890 +plapack 1 3 5.857933 5.857933 3849 +health 1 3 5.857933 5.857933 3787 +concert 1 3 5.857933 5.857933 3533 +thereof 1 2 6.263398 6.263398 5484 +geijn 1 2 6.263398 6.263398 5715 +musician 1 2 6.263398 6.263398 5718 +hamilton 1 2 6.263398 6.263398 5719 +guyer 1 2 6.263398 6.263398 4171 +northwestern 1 2 6.263398 6.263398 5502 +nate 1 2 6.263398 6.263398 5720 +dell 1 2 6.263398 6.263398 4193 +fring 1 2 6.263398 6.263398 5721 +sammi 1 1 6.957497 6.957497 14088 +startingpoint 1 1 6.957497 6.957497 14089 +religion 1 1 6.957497 6.957497 14090 +orprohibit 1 1 6.957497 6.957497 14091 +ofspeech 1 1 6.957497 6.957497 14092 +peaceabl 1 1 6.957497 6.957497 14093 +toassembl 1 1 6.957497 6.957497 14094 +petit 1 1 6.957497 6.957497 14095 +redress 1 1 6.957497 6.957497 14096 +grievanc 1 1 6.957497 6.957497 14097 +herbarium 1 1 6.957497 6.957497 14098 +anagram 1 1 6.957497 6.957497 14099 +nil 1 1 6.957497 6.957497 14100 +reker 1 1 6.957497 6.957497 14101 +pop 1 1 6.957497 6.957497 14102 +anthropolog 1 1 6.957497 6.957497 14103 +kate 1 1 6.957497 6.957497 14104 +showbiz 1 1 6.957497 6.957497 14105 +pollstar 1 1 6.957497 6.957497 14106 +ryder 1 1 6.957497 6.957497 14107 +laptop 1 1 6.957497 6.957497 14108 +traveloc 1 1 6.957497 6.957497 14109 +eduth 1 1 6.957497 6.957497 14110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..bd24b56e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +report 1 131 2.079442 2.079442 92 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +method 1 80 2.564949 2.564949 213 +logic 1 71 2.639057 2.639057 230 +main 1 67 2.708050 2.708050 256 +taylor 1 63 2.772589 2.772589 287 +abstract 1 62 2.772589 2.772589 276 +frequent 1 49 3.044522 3.044522 367 +around 1 43 3.178054 3.178054 415 +edit 1 42 3.218876 3.218876 418 +formal 1 37 3.332205 3.332205 478 +dissert 1 32 3.465736 3.465736 549 +common 1 30 3.555348 3.555348 574 +univ 1 28 3.610918 3.610918 617 +propos 1 28 3.610918 3.610918 602 +fellow 1 24 3.761200 3.761200 701 +lisp 1 18 4.060443 4.060443 897 +boyer 1 6 5.164786 5.164786 2013 +sawada 1 3 5.857933 5.857933 3190 +oral 1 3 5.857933 5.857933 3189 +teacher 1 3 5.857933 5.857933 3892 +supplementari 1 2 6.263398 6.263398 4752 +bowen 1 2 6.263398 6.263398 4170 +sawadajun 1 1 6.957497 6.957497 14111 +sawadacontact 1 1 6.957497 6.957497 14112 +wooten 1 1 6.957497 6.957497 14113 +kbresourc 1 1 6.957497 6.957497 14114 +pvsother 1 1 6.957497 6.957497 14115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..54e8ecea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +avail 1 169 1.791759 1.791759 48 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +specif 1 106 2.197225 2.197225 106 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +decemb 1 80 2.564949 2.564949 215 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +simul 1 66 2.708050 2.708050 255 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +visit 1 63 2.772589 2.772589 288 +creat 1 63 2.772589 2.772589 277 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +index 1 56 2.890372 2.890372 309 +overview 1 56 2.890372 2.890372 323 +basic 1 50 3.044522 3.044522 360 +approach 1 48 3.044522 3.044522 366 +get 1 46 3.091042 3.091042 380 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +term 1 43 3.178054 3.178054 411 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +feel 1 37 3.332205 3.332205 483 +product 1 33 3.433987 3.433987 527 +independ 1 32 3.465736 3.465736 548 +transform 1 32 3.465736 3.465736 542 +titl 1 31 3.496508 3.496508 556 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +scale 1 28 3.610918 3.610918 613 +releas 1 28 3.610918 3.610918 616 +utc 1 27 3.637586 3.637586 629 +manipul 1 27 3.637586 3.637586 624 +subject 1 26 3.688879 3.688879 647 +demonstr 1 24 3.761200 3.761200 694 +scalabl 1 24 3.761200 3.761200 705 +pattern 1 24 3.761200 3.761200 689 +equat 1 23 3.806662 3.806662 724 +compress 1 23 3.806662 3.806662 719 +defin 1 22 3.850148 3.850148 746 +recommend 1 22 3.850148 3.850148 737 +period 1 22 3.850148 3.850148 743 +thu 1 21 3.912023 3.912023 773 +reflect 1 15 4.248495 4.248495 1034 +goe 1 15 4.248495 4.248495 1044 +composit 1 13 4.382027 4.382027 1150 +dbm 1 13 4.382027 4.382027 1136 +assembl 1 12 4.465908 4.465908 1207 +pagewelcom 1 11 4.553877 4.553877 1344 +typic 1 11 4.553877 4.553877 1360 +refin 1 11 4.553877 4.553877 1363 +valid 1 11 4.553877 4.553877 1299 +evolut 1 11 4.553877 4.553877 1314 +modul 1 10 4.653960 4.653960 1434 +relationship 1 10 4.653960 4.653960 1383 +bart 1 9 4.753590 4.753590 1559 +reus 1 8 4.875197 4.875197 1661 +successfulli 1 7 5.010635 5.010635 1869 +beyond 1 7 5.010635 5.010635 1834 +deliv 1 6 5.164786 5.164786 2070 +avion 1 4 5.568345 5.568345 3018 +substanti 1 4 5.568345 5.568345 2921 +batori 1 4 5.568345 5.568345 2690 +metadata 1 4 5.568345 5.568345 2945 +breadth 1 4 5.568345 5.568345 2695 +interchang 1 3 5.857933 5.857933 3893 +tokuda 1 3 5.857933 5.857933 3266 +smaragdaki 1 3 5.857933 5.857933 3851 +lightweight 1 3 5.857933 5.857933 3234 +reusabl 1 2 6.263398 6.263398 4218 +marti 1 2 6.263398 6.263398 5679 +encapsul 1 2 6.263398 6.263398 5541 +ssgrg 1 1 6.957497 6.957497 14117 +genvoca 1 1 6.957497 6.957497 14116 +professorangela 1 1 6.957497 6.957497 14118 +dappert 1 1 6.957497 6.957497 14119 +studentguillermo 1 1 6.957497 6.957497 14120 +jimenez 1 1 6.957497 6.957497 14121 +perezph 1 1 6.957497 6.957497 14122 +studentjeff 1 1 6.957497 6.957497 14123 +thomasph 1 1 6.957497 6.957497 14124 +studentl 1 1 6.957497 6.957497 14125 +studentyanni 1 1 6.957497 6.957497 14126 +studentk 1 1 6.957497 6.957497 14127 +shepherdresearch 1 1 6.957497 6.957497 14128 +associateform 1 1 6.957497 6.957497 14129 +datesdinesh 1 1 6.957497 6.957497 14130 +dasph 1 1 6.957497 6.957497 14131 +milli 1 1 6.957497 6.957497 14132 +villarrealph 1 1 6.957497 6.957497 14133 +geracipostdoc 1 1 6.957497 6.957497 14134 +sirkinph 1 1 6.957497 6.957497 14135 +sankar 1 1 6.957497 6.957497 14136 +dasarim 1 1 6.957497 6.957497 14137 +starter 1 1 6.957497 6.957497 14138 +reengin 1 1 6.957497 6.957497 14139 +generatorsautom 1 1 6.957497 6.957497 14140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..9e830ff8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +machin 1 129 2.079442 2.079442 95 +look 1 107 2.197225 2.197225 115 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +send 1 114 2.197225 2.197225 109 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +stuff 1 87 2.484907 2.484907 171 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +orient 1 80 2.564949 2.564949 205 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +differ 1 66 2.708050 2.708050 253 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +colleg 1 61 2.833213 2.833213 300 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +maintain 1 51 2.995732 2.995732 342 +basic 1 50 3.044522 3.044522 360 +cool 1 49 3.044522 3.044522 374 +without 1 50 3.044522 3.044522 370 +still 1 50 3.044522 3.044522 362 +right 1 48 3.044522 3.044522 363 +could 1 46 3.091042 3.091042 383 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +futur 1 41 3.218876 3.218876 427 +might 1 41 3.218876 3.218876 426 +realli 1 40 3.258097 3.258097 444 +littl 1 39 3.258097 3.258097 454 +probabl 1 40 3.258097 3.258097 455 +mean 1 37 3.332205 3.332205 477 +expect 1 37 3.332205 3.332205 484 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +within 1 33 3.433987 3.433987 525 +kind 1 32 3.465736 3.465736 541 +actual 1 28 3.610918 3.610918 604 +load 1 28 3.610918 3.610918 601 +utc 1 27 3.637586 3.637586 629 +great 1 27 3.637586 3.637586 626 +linux 1 27 3.637586 3.637586 631 +spent 1 25 3.737670 3.737670 676 +miscellan 1 23 3.806662 3.806662 731 +finish 1 22 3.850148 3.850148 748 +try 1 22 3.850148 3.850148 764 +dai 1 22 3.850148 3.850148 753 +longer 1 20 3.951244 3.951244 816 +sure 1 20 3.951244 3.951244 813 +minut 1 20 3.951244 3.951244 810 +spend 1 19 4.007333 4.007333 850 +item 1 19 4.007333 4.007333 856 +five 1 19 4.007333 4.007333 841 +scott 1 18 4.060443 4.060443 884 +hobbi 1 16 4.174387 4.174387 1009 +doesn 1 15 4.248495 4.248495 1055 +wait 1 13 4.382027 4.382027 1168 +neat 1 12 4.465908 4.465908 1263 +appl 1 11 4.553877 4.553877 1303 +noth 1 11 4.553877 4.553877 1328 +santa 1 10 4.653960 4.653960 1441 +didn 1 9 4.753590 4.753590 1563 +oop 1 8 4.875197 4.875197 1778 +forget 1 8 4.875197 4.875197 1712 +perfect 1 7 5.010635 5.010635 1921 +pageth 1 7 5.010635 5.010635 1939 +encrypt 1 7 5.010635 5.010635 1835 +squash 1 6 5.164786 5.164786 2223 +beer 1 6 5.164786 5.164786 2216 +sharp 1 6 5.164786 5.164786 2100 +amherst 1 5 5.347108 5.347108 2484 +humor 1 5 5.347108 5.347108 2533 +pagescott 1 4 5.568345 5.568345 2978 +amaz 1 4 5.568345 5.568345 2600 +slight 1 3 5.857933 5.857933 3894 +glenn 1 3 5.857933 5.857933 3869 +down 1 3 5.857933 5.857933 3870 +wine 1 3 5.857933 5.857933 3895 +maker 1 3 5.857933 5.857933 3164 +dine 1 3 5.857933 5.857933 3472 +citizen 1 3 5.857933 5.857933 3238 +iici 1 3 5.857933 5.857933 3436 +bright 1 3 5.857933 5.857933 3596 +fanci 1 2 6.263398 6.263398 4992 +unpredict 1 2 6.263398 6.263398 5722 +stuffit 1 2 6.263398 6.263398 4127 +invalu 1 2 6.263398 6.263398 4680 +forev 1 2 6.263398 6.263398 5636 +grab 1 2 6.263398 6.263398 5723 +pageokai 1 1 6.957497 6.957497 14141 +overdu 1 1 6.957497 6.957497 14142 +mead 1 1 6.957497 6.957497 14143 +psion 1 1 6.957497 6.957497 14144 +palmtop 1 1 6.957497 6.957497 14145 +anastasi 1 1 6.957497 6.957497 14146 +poke 1 1 6.957497 6.957497 14147 +ala 1 1 6.957497 6.957497 14148 +bebox 1 1 6.957497 6.957497 14149 +sfkaplan 1 1 6.957497 6.957497 14150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..b40ad454 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +build 1 85 2.484907 2.484907 184 +onlin 1 75 2.639057 2.639057 223 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +finger 1 52 2.995732 2.995732 354 +get 1 46 3.091042 3.091042 380 +log 1 19 4.007333 4.007333 857 +floor 1 14 4.317488 4.317488 1070 +touch 1 12 4.465908 4.465908 1288 +river 1 6 5.164786 5.164786 2220 +shenoi 1 3 5.857933 5.857933 3269 +tower 1 3 5.857933 5.857933 3818 +prashant 1 2 6.263398 6.263398 4331 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..3b4b06cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +tabl 1 51 2.995732 2.995732 346 +finger 1 52 2.995732 2.995732 354 +campu 1 27 3.637586 3.637586 623 +vlsi 1 21 3.912023 3.912023 795 +citi 1 19 4.007333 4.007333 874 +vallei 1 7 5.010635 5.010635 1959 +ongo 1 6 5.164786 5.164786 2215 +coffe 1 5 5.347108 5.347108 2556 +pleasant 1 3 5.857933 5.857933 3825 +bookshelf 1 2 6.263398 6.263398 5724 +shaob 1 1 6.957497 6.957497 14151 +cyberhom 1 1 6.957497 6.957497 14152 +hardvar 1 1 6.957497 6.957497 14153 +verifc 1 1 6.957497 6.957497 14154 +shma 1 1 6.957497 6.957497 14155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..17f5fb14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +mathemat 1 108 2.197225 2.197225 123 +proceed 1 93 2.397895 2.397895 152 +internet 1 83 2.484907 2.484907 186 +learn 1 86 2.484907 2.484907 170 +dynam 1 76 2.564949 2.564949 194 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +line 1 75 2.639057 2.639057 231 +appli 1 71 2.639057 2.639057 226 +symposium 1 72 2.639057 2.639057 238 +artifici 1 63 2.772589 2.772589 280 +processor 1 54 2.944439 2.944439 335 +life 1 50 3.044522 3.044522 375 +adapt 1 46 3.091042 3.091042 387 +music 1 42 3.218876 3.218876 436 +annual 1 40 3.258097 3.258097 458 +india 1 32 3.465736 3.465736 550 +neural 1 30 3.555348 3.555348 578 +qualiti 1 20 3.951244 3.951244 832 +massiv 1 15 4.248495 4.248495 1026 +nonlinear 1 14 4.317488 4.317488 1107 +affili 1 13 4.382027 4.382027 1194 +automata 1 13 4.382027 4.382027 1135 +avenu 1 12 4.465908 4.465908 1277 +itali 1 11 4.553877 4.553877 1378 +evolut 1 11 4.553877 4.553877 1314 +genet 1 10 4.653960 4.653960 1409 +kumar 1 9 4.753590 4.753590 1506 +risto 1 9 4.753590 4.753590 1523 +chao 1 8 4.875197 4.875197 1753 +miikkulainen 1 8 4.875197 4.875197 1667 +signal 1 7 5.010635 5.010635 1910 +edumi 1 6 5.164786 5.164786 2132 +cellular 1 5 5.347108 5.347108 2433 +dual 1 5 5.347108 5.347108 2522 +austindepart 1 4 5.568345 5.568345 3008 +reinforc 1 4 5.568345 5.568345 2674 +snail 1 4 5.568345 5.568345 2916 +sciencestaylor 1 3 5.857933 5.857933 3814 +patrick 1 3 5.857933 5.857933 3334 +shailesh 1 2 6.263398 6.263398 5578 +fuzzi 1 2 6.263398 6.263398 5423 +publicationson 1 2 6.263398 6.263398 4899 +singh 1 2 6.263398 6.263398 5675 +kumarshailesh 1 1 6.957497 6.957497 14156 +kumarth 1 1 6.957497 6.957497 14157 +skumar 1 1 6.957497 6.957497 14158 +resumeresearch 1 1 6.957497 6.957497 14159 +publicationscontact 1 1 6.957497 6.957497 14160 +mesrcm 1 1 6.957497 6.957497 14161 +spiritu 1 1 6.957497 6.957497 14162 +offersom 1 1 6.957497 6.957497 14163 +linkscognit 1 1 6.957497 6.957497 14164 +scienceutc 1 1 6.957497 6.957497 14165 +researchutc 1 1 6.957497 6.957497 14166 +groupresearch 1 1 6.957497 6.957497 14167 +neuroevolut 1 1 6.957497 6.957497 14168 +predistort 1 1 6.957497 6.957497 14169 +goetz 1 1 6.957497 6.957497 14170 +bari 1 1 6.957497 6.957497 14171 +bord 1 1 6.957497 6.957497 14172 +aprl 1 1 6.957497 6.957497 14173 +whiti 1 1 6.957497 6.957497 14174 +offernet 1 1 6.957497 6.957497 14175 +assistancesearch 1 1 6.957497 6.957497 14176 +institutewww 1 1 6.957497 6.957497 14177 +infoindia 1 1 6.957497 6.957497 14178 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..1bab60fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +utexa 1 189 1.609438 1.609438 44 +applic 1 170 1.791759 1.791759 56 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +area 1 144 1.945910 1.945910 80 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +main 1 67 2.708050 2.708050 256 +plan 1 65 2.772589 2.772589 272 +favorit 1 44 3.135494 3.135494 410 +posit 1 31 3.496508 3.496508 552 +someth 1 31 3.496508 3.496508 554 +photo 1 31 3.496508 3.496508 561 +turn 1 29 3.583519 3.583519 586 +utc 1 27 3.637586 3.637586 629 +lead 1 23 3.806662 3.806662 718 +scheme 1 20 3.951244 3.951244 818 +particularli 1 19 4.007333 4.007333 867 +success 1 10 4.653960 4.653960 1390 +meta 1 9 4.753590 4.753590 1505 +yanni 1 8 4.875197 4.875197 1713 +gold 1 8 4.875197 4.875197 1745 +dictionari 1 8 4.875197 4.875197 1642 +moder 1 6 5.164786 5.164786 2112 +arrang 1 6 5.164786 5.164786 2023 +webster 1 5 5.347108 5.347108 2468 +album 1 4 5.568345 5.568345 2888 +smaragdaki 1 3 5.857933 5.857933 3851 +serious 1 3 5.857933 5.857933 3663 +alchemi 1 1 6.957497 6.957497 14179 +sitessmaragd 1 1 6.957497 6.957497 14180 +eduyanni 1 1 6.957497 6.957497 14181 +smaragdakisunivers 1 1 6.957497 6.957497 14182 +departmenttai 1 1 6.957497 6.957497 14183 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..84533fa1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +note 1 142 1.945910 1.945910 67 +site 1 106 2.197225 2.197225 119 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +center 1 88 2.397895 2.397895 158 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +addit 1 74 2.639057 2.639057 228 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +life 1 50 3.044522 3.044522 375 +visitor 1 49 3.044522 3.044522 371 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +effort 1 26 3.688879 3.688879 652 +tell 1 21 3.912023 3.912023 777 +basi 1 20 3.951244 3.951244 828 +ever 1 19 4.007333 4.007333 872 +wind 1 18 4.060443 4.060443 908 +stream 1 15 4.248495 4.248495 1015 +trip 1 14 4.317488 4.317488 1113 +bodi 1 13 4.382027 4.382027 1178 +danc 1 12 4.465908 4.465908 1278 +duli 1 12 4.465908 4.465908 1248 +absolut 1 8 4.875197 4.875197 1646 +wouldn 1 7 5.010635 5.010635 1970 +edward 1 6 5.164786 5.164786 2050 +greatest 1 6 5.164786 5.164786 2073 +ignor 1 5 5.347108 5.347108 2288 +shadow 1 3 5.857933 5.857933 3519 +haiku 1 3 5.857933 5.857933 3811 +eddi 1 3 5.857933 5.857933 3896 +danger 1 2 6.263398 6.263398 5725 +strictli 1 2 6.263398 6.263398 5726 +stimul 1 2 6.263398 6.263398 5712 +asphalt 1 1 6.957497 6.957497 14185 +moonlight 1 1 6.957497 6.957497 14186 +nerv 1 1 6.957497 6.957497 14187 +dy 1 1 6.957497 6.957497 14188 +minion 1 1 6.957497 6.957497 14184 +pania 1 1 6.957497 6.957497 14189 +leaf 1 1 6.957497 6.957497 14190 +afloat 1 1 6.957497 6.957497 14191 +waterfal 1 1 6.957497 6.957497 14192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..11254931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +site 1 106 2.197225 2.197225 119 +search 1 95 2.397895 2.397895 155 +member 1 84 2.484907 2.484907 165 +knowledg 1 67 2.708050 2.708050 243 +tech 1 35 3.401197 3.401197 515 +hotlist 1 13 4.382027 4.382027 1199 +souther 1 3 5.857933 5.857933 3795 +southerart 1 1 6.957497 6.957497 14193 +southerresearchbuild 1 1 6.957497 6.957497 14194 +reportsouth 1 1 6.957497 6.957497 14195 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..107200f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +austin 1 168 1.791759 1.791759 63 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +theori 1 111 2.197225 2.197225 127 +techniqu 1 99 2.302585 2.302585 138 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +exampl 1 77 2.564949 2.564949 195 +resum 1 79 2.564949 2.564949 217 +intellig 1 72 2.639057 2.639057 225 +appli 1 71 2.639057 2.639057 226 +creat 1 63 2.772589 2.772589 277 +taylor 1 63 2.772589 2.772589 287 +approach 1 48 3.044522 3.044522 366 +field 1 37 3.332205 3.332205 482 +tech 1 35 3.401197 3.401197 515 +india 1 32 3.465736 3.465736 550 +postal 1 30 3.555348 3.555348 580 +symbol 1 27 3.637586 3.637586 620 +challeng 1 26 3.688879 3.688879 653 +revis 1 26 3.688879 3.688879 640 +variabl 1 23 3.806662 3.806662 715 +indian 1 22 3.850148 3.850148 769 +madra 1 8 4.875197 4.875197 1770 +connectionist 1 5 5.347108 5.347108 2430 +sowmya 1 4 5.568345 5.568345 2670 +bayesian 1 4 5.568345 5.568345 2671 +groupunivers 1 3 5.857933 5.857933 3831 +multimediaappl 1 3 5.857933 5.857933 3274 +rutger 1 3 5.857933 5.857933 3566 +austinresearchmi 1 2 6.263398 6.263398 5644 +ramachandransowmya 1 1 6.957497 6.957497 14196 +ramachandranmachin 1 1 6.957497 6.957497 14197 +ofartif 1 1 6.957497 6.957497 14198 +learningbayesian 1 1 6.957497 6.957497 14199 +withhidden 1 1 6.957497 6.957497 14200 +thisproblem 1 1 6.957497 6.957497 14201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..5a8c8829 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +multimedia 1 68 2.708050 2.708050 258 +prof 1 64 2.772589 2.772589 273 +locat 1 59 2.833213 2.833213 303 +advisor 1 51 2.995732 2.995732 355 +hill 1 25 3.737670 3.737670 670 +countri 1 15 4.248495 4.248495 1059 +central 1 13 4.382027 4.382027 1160 +herefor 1 9 4.753590 4.753590 1483 +informationabout 1 9 4.753590 4.753590 1515 +austinaustin 1 7 5.010635 5.010635 1966 +capit 1 7 5.010635 5.010635 1957 +sciencesdepart 1 6 5.164786 5.164786 2020 +isth 1 5 5.347108 5.347108 2532 +edudepart 1 3 5.857933 5.857933 3302 +sriram 1 2 6.263398 6.263398 4550 +multimediai 1 2 6.263398 6.263398 4337 +raocurr 1 1 6.957497 6.957497 14202 +systemoper 1 1 6.957497 6.957497 14203 +multimediagroup 1 1 6.957497 6.957497 14204 +harrickvinpublicationsminegroupcontact 1 1 6.957497 6.957497 14205 +informationofficetai 1 1 6.957497 6.957497 14206 +miscellaneousotherinterest 1 1 6.957497 6.957497 14207 +pagespicturesof 1 1 6.957497 6.957497 14208 +toweraustin 1 1 6.957497 6.957497 14209 +kannada 1 1 6.957497 6.957497 14210 +koota 1 1 6.957497 6.957497 14211 +tamil 1 1 6.957497 6.957497 14212 +sangam 1 1 6.957497 6.957497 14213 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..52eaafc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 1 24 3.761200 3.761200 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +tiger 1 3 5.857933 5.857933 3897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..22d7dee7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +machin 1 129 2.079442 2.079442 95 +assist 1 112 2.197225 2.197225 113 +present 1 91 2.397895 2.397895 145 +real 1 93 2.397895 2.397895 144 +educ 1 86 2.484907 2.484907 191 +master 1 76 2.564949 2.564949 216 +nation 1 74 2.639057 2.639057 240 +august 1 66 2.708050 2.708050 257 +experi 1 64 2.772589 2.772589 283 +prof 1 64 2.772589 2.772589 273 +septemb 1 65 2.772589 2.772589 274 +finger 1 52 2.995732 2.995732 354 +author 1 39 3.258097 3.258097 450 +administr 1 27 3.637586 3.637586 628 +utc 1 27 3.637586 3.637586 629 +log 1 19 4.007333 4.007333 857 +edulast 1 17 4.110874 4.110874 927 +chemic 1 5 5.347108 5.347108 2552 +korea 1 4 5.568345 5.568345 2971 +seoul 1 3 5.857933 5.857933 3783 +aloysiu 1 3 5.857933 5.857933 3829 +choiwelcom 1 2 6.263398 6.263398 5727 +sunghe 1 1 6.957497 6.957497 14214 +choisunghe 1 1 6.957497 6.957497 14215 +nuec 1 1 6.957497 6.957497 14216 +choiemail 1 1 6.957497 6.957497 14217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..81714531 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +provid 1 121 2.079442 2.079442 94 +postscript 1 131 2.079442 2.079442 90 +pleas 1 113 2.197225 2.197225 114 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +call 1 91 2.397895 2.397895 153 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +larg 1 82 2.484907 2.484907 168 +effici 1 73 2.639057 2.639057 233 +taylor 1 63 2.772589 2.772589 287 +descript 1 64 2.772589 2.772589 271 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +best 1 59 2.833213 2.833213 299 +finger 1 52 2.995732 2.995732 354 +hardwar 1 51 2.995732 2.995732 350 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +full 1 28 3.610918 3.610918 615 +reach 1 24 3.761200 3.761200 688 +inth 1 22 3.850148 3.850148 741 +along 1 18 4.060443 4.060443 878 +easi 1 16 4.174387 4.174387 969 +brief 1 16 4.174387 4.174387 1001 +novel 1 15 4.248495 4.248495 1039 +persist 1 11 4.553877 4.553877 1367 +motorola 1 9 4.753590 4.753590 1546 +oop 1 8 4.875197 4.875197 1778 +myresum 1 6 5.164786 5.164786 2199 +informationi 1 3 5.857933 5.857933 3871 +swizzl 1 3 5.857933 5.857933 3883 +sheetal 1 2 6.263398 6.263398 5684 +isvia 1 2 6.263398 6.263398 5637 +mypubl 1 2 6.263398 6.263398 5707 +somerset 1 2 6.263398 6.263398 5639 +kakkad 1 2 6.263398 6.263398 5685 +kakkadsheet 1 1 6.957497 6.957497 14218 +kakkadcontact 1 1 6.957497 6.957497 14219 +storagesystem 1 1 6.957497 6.957497 14220 +faulttim 1 1 6.957497 6.957497 14221 +whilefinish 1 1 6.957497 6.957497 14222 +svkakkad 1 1 6.957497 6.957497 14223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..48a1b2bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +octob 1 89 2.397895 2.397895 156 +school 1 84 2.484907 2.484907 188 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +suggest 1 53 2.944439 2.944439 331 +finger 1 52 2.995732 2.995732 354 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +art 1 29 3.583519 3.583519 593 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +todai 1 25 3.737670 3.737670 672 +highli 1 23 3.806662 3.806662 725 +voic 1 21 3.912023 3.912023 806 +item 1 19 4.007333 4.007333 856 +medic 1 17 4.110874 4.110874 958 +academi 1 8 4.875197 4.875197 1735 +scholar 1 6 5.164786 5.164786 2180 +appreci 1 5 5.347108 5.347108 2374 +sale 1 3 5.857933 5.857933 3688 +meyour 1 3 5.857933 5.857933 3858 +registrar 1 2 6.263398 6.263398 5611 +gradaut 1 2 6.263398 6.263398 5612 +studiesut 1 2 6.263398 6.263398 5613 +novelschines 1 2 6.263398 6.263398 5610 +visitorsinc 1 2 6.263398 6.263398 5616 +shengm 1 1 6.957497 6.957497 14224 +homepageabout 1 1 6.957497 6.957497 14225 +classmatesclass 1 1 6.957497 6.957497 14226 +ustc 1 1 6.957497 6.957497 14227 +sciencesus 1 1 6.957497 6.957497 14228 +linksut 1 1 6.957497 6.957497 14229 +libraryutaccesschines 1 1 6.957497 6.957497 14230 +associationchina 1 1 6.957497 6.957497 14231 +chinesechinainternet 1 1 6.957497 6.957497 14232 +magazinestsinghua 1 1 6.957497 6.957497 14233 +bbsncic 1 1 6.957497 6.957497 14234 +bbschines 1 1 6.957497 6.957497 14235 +classicsabout 1 1 6.957497 6.957497 14236 +austinwhat 1 1 6.957497 6.957497 14237 +citylimitsclassifi 1 1 6.957497 6.957497 14238 +austinto 1 1 6.957497 6.957497 14239 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..b77cc812 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +frame 1 24 3.761200 3.761200 684 +wang 1 21 3.912023 3.912023 790 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..8e98b6e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..25357101 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +name 1 72 2.639057 2.639057 220 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +plai 1 60 2.833213 2.833213 307 +done 1 47 3.091042 3.091042 381 +option 1 30 3.555348 3.555348 575 +watson 1 8 4.875197 4.875197 1691 +bore 1 7 5.010635 5.010635 1948 +internship 1 3 5.857933 5.857933 3764 +renu 1 1 6.957497 6.957497 14240 +tewarirenu 1 1 6.957497 6.957497 14241 +tewariwhat 1 1 6.957497 6.957497 14242 +addresshom 1 1 6.957497 6.957497 14243 +tewari 1 1 6.957497 6.957497 14244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..828c1b55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +assist 1 112 2.197225 2.197225 113 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +stuff 1 87 2.484907 2.484907 171 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +come 1 78 2.564949 2.564949 202 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +messag 1 76 2.564949 2.564949 212 +state 1 76 2.564949 2.564949 207 +resum 1 79 2.564949 2.564949 217 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +investig 1 51 2.995732 2.995732 353 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +formal 1 37 3.332205 3.332205 478 +mean 1 37 3.332205 3.332205 477 +soon 1 36 3.367296 3.367296 494 +random 1 34 3.401197 3.401197 511 +photo 1 31 3.496508 3.496508 561 +secur 1 30 3.555348 3.555348 577 +postal 1 30 3.555348 3.555348 580 +client 1 25 3.737670 3.737670 679 +synthesi 1 20 3.951244 3.951244 834 +verif 1 20 3.951244 3.951244 826 +analyz 1 17 4.110874 4.110874 925 +draft 1 14 4.317488 4.317488 1085 +finit 1 14 4.317488 4.317488 1106 +stai 1 12 4.465908 4.465908 1215 +tune 1 12 4.465908 4.465908 1227 +genet 1 10 4.653960 4.653960 1409 +metric 1 7 5.010635 5.010635 1831 +photographi 1 6 5.164786 5.164786 2146 +pierc 1 4 5.568345 5.568345 2623 +queu 1 4 5.568345 5.568345 2648 +blvd 1 4 5.568345 5.568345 3007 +evolutionari 1 3 5.857933 5.857933 3898 +amwork 1 2 6.263398 6.263398 4850 +communicatewith 1 2 6.263398 6.263398 5062 +tumlin 1 1 6.957497 6.957497 14245 +brenda 1 1 6.957497 6.957497 14246 +ladd 1 1 6.957497 6.957497 14247 +authenticationprotocol 1 1 6.957497 6.957497 14248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..e1d1bcdc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +click 1 142 1.945910 1.945910 78 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +commun 1 95 2.397895 2.397895 157 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +real 1 93 2.397895 2.397895 144 +associ 1 93 2.397895 2.397895 151 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +school 1 84 2.484907 2.484907 188 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +resum 1 79 2.564949 2.564949 217 +april 1 77 2.564949 2.564949 196 +know 1 80 2.564949 2.564949 198 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +java 1 70 2.708050 2.708050 248 +multimedia 1 68 2.708050 2.708050 258 +differ 1 66 2.708050 2.708050 253 +visit 1 63 2.772589 2.772589 288 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +plan 1 65 2.772589 2.772589 272 +laboratori 1 63 2.772589 2.772589 292 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +plai 1 60 2.833213 2.833213 307 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +sampl 1 53 2.944439 2.944439 339 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +life 1 50 3.044522 3.044522 375 +standard 1 48 3.044522 3.044522 365 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +visitor 1 49 3.044522 3.044522 371 +netscap 1 44 3.135494 3.135494 395 +anoth 1 45 3.135494 3.135494 408 +protocol 1 45 3.135494 3.135494 407 +made 1 44 3.135494 3.135494 398 +favorit 1 44 3.135494 3.135494 410 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +form 1 39 3.258097 3.258097 443 +movi 1 40 3.258097 3.258097 459 +tutori 1 39 3.258097 3.258097 437 +china 1 37 3.332205 3.332205 487 +robot 1 36 3.367296 3.367296 497 +copyright 1 36 3.367296 3.367296 495 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +kind 1 32 3.465736 3.465736 541 +anim 1 31 3.496508 3.496508 557 +compon 1 30 3.555348 3.555348 570 +chines 1 29 3.583519 3.583519 595 +synchron 1 29 3.583519 3.583519 588 +full 1 28 3.610918 3.610918 615 +quit 1 27 3.637586 3.637586 633 +never 1 25 3.737670 3.737670 671 +background 1 25 3.737670 3.737670 664 +reach 1 24 3.761200 3.761200 688 +mobil 1 23 3.806662 3.806662 730 +thank 1 23 3.806662 3.806662 721 +recommend 1 22 3.850148 3.850148 737 +wang 1 21 3.912023 3.912023 790 +viewer 1 21 3.912023 3.912023 787 +chen 1 21 3.912023 3.912023 791 +leav 1 21 3.912023 3.912023 772 +applet 1 20 3.951244 3.951244 827 +mpeg 1 20 3.951244 3.951244 831 +beij 1 19 4.007333 4.007333 876 +listen 1 18 4.060443 4.060443 907 +demo 1 18 4.060443 4.060443 888 +seek 1 17 4.110874 4.110874 954 +normal 1 16 4.174387 4.174387 995 +tsinghua 1 13 4.382027 4.382027 1195 +misc 1 13 4.382027 4.382027 1124 +clock 1 11 4.553877 4.553877 1320 +host 1 11 4.553877 4.553877 1306 +player 1 11 4.553877 4.553877 1371 +perl 1 11 4.553877 4.553877 1332 +lake 1 11 4.553877 4.553877 1373 +song 1 11 4.553877 4.553877 1380 +jersei 1 9 4.753590 4.753590 1587 +sound 1 9 4.753590 4.753590 1605 +trust 1 9 4.753590 4.753590 1583 +pure 1 8 4.875197 4.875197 1776 +univeristi 1 8 4.875197 4.875197 1754 +counter 1 8 4.875197 4.875197 1765 +misra 1 7 5.010635 5.010635 1856 +clip 1 7 5.010635 5.010635 1868 +attach 1 7 5.010635 5.010635 1785 +accord 1 7 5.010635 5.010635 1826 +bell 1 6 5.164786 5.164786 2224 +troubl 1 6 5.164786 5.164786 2002 +fussel 1 5 5.347108 5.347108 2300 +opengl 1 5 5.347108 5.347108 2299 +lang 1 5 5.347108 5.347108 2294 +republ 1 4 5.568345 5.568345 3032 +shanghai 1 4 5.568345 5.568345 2925 +restructur 1 4 5.568345 5.568345 2775 +gouda 1 4 5.568345 5.568345 3021 +batori 1 4 5.568345 5.568345 2690 +blvd 1 4 5.568345 5.568345 3007 +tong 1 3 5.857933 5.857933 3258 +zuckerman 1 3 5.857933 5.857933 3205 +underconstruct 1 3 5.857933 5.857933 3889 +nanj 1 2 6.263398 6.263398 5728 +mini 1 2 6.263398 6.263398 5548 +decod 1 2 6.263398 6.263398 4936 +zodiac 1 2 6.263398 6.263398 5729 +twang 1 2 6.263398 6.263398 5730 +eagl 1 2 6.263398 6.263398 5731 +nank 1 1 6.957497 6.957497 14251 +jiao 1 1 6.957497 6.957497 14249 +summerluc 1 1 6.957497 6.957497 14252 +thissumm 1 1 6.957497 6.957497 14253 +lucent 1 1 6.957497 6.957497 14250 +plexton 1 1 6.957497 6.957497 14254 +libari 1 1 6.957497 6.957497 14255 +glut 1 1 6.957497 6.957497 14256 +mariah 1 1 6.957497 6.957497 14257 +boyz 1 1 6.957497 6.957497 14258 +babyfac 1 1 6.957497 6.957497 14259 +haiq 1 1 6.957497 6.957497 14260 +shenfeng 1 1 6.957497 6.957497 14261 +deskmat 1 1 6.957497 6.957497 14262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..bef2a647 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +welcom 1 122 2.079442 2.079442 99 +machin 1 129 2.079442 2.079442 95 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +search 1 95 2.397895 2.397895 155 +activ 1 84 2.484907 2.484907 182 +learn 1 86 2.484907 2.484907 170 +thing 1 84 2.484907 2.484907 189 +resourc 1 81 2.484907 2.484907 172 +orient 1 80 2.564949 2.564949 205 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +januari 1 62 2.772589 2.772589 264 +special 1 56 2.890372 2.890372 320 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +york 1 41 3.218876 3.218876 435 +print 1 34 3.401197 3.401197 503 +dissert 1 32 3.465736 3.465736 549 +focu 1 30 3.555348 3.555348 571 +consid 1 29 3.583519 3.583519 590 +weather 1 28 3.610918 3.610918 618 +utc 1 27 3.637586 3.637586 629 +yahoo 1 24 3.761200 3.761200 707 +decis 1 23 3.806662 3.806662 728 +voic 1 21 3.912023 3.912023 806 +supervis 1 20 3.951244 3.951244 840 +lyco 1 19 4.007333 4.007333 871 +context 1 13 4.382027 4.382027 1153 +perman 1 11 4.553877 4.553877 1372 +acquisit 1 10 4.653960 4.653960 1465 +moonei 1 9 4.753590 4.753590 1520 +linguist 1 9 4.753590 4.753590 1593 +european 1 8 4.875197 4.875197 1763 +altavista 1 6 5.164786 5.164786 2222 +infoseek 1 6 5.164786 5.164786 2188 +pars 1 5 5.347108 5.347108 2321 +raymond 1 5 5.347108 5.347108 2313 +hermjakob 1 3 5.857933 5.857933 3876 +groupand 1 3 5.857933 5.857933 3873 +signll 1 3 5.857933 5.857933 3877 +galaxi 1 3 5.857933 5.857933 3603 +deutsch 1 3 5.857933 5.857933 3802 +pageulf 1 1 6.957497 6.957497 14263 +hermjakobhello 1 1 6.957497 6.957497 14264 +thedept 1 1 6.957497 6.957497 14265 +austinand 1 1 6.957497 6.957497 14266 +aboutexampl 1 1 6.957497 6.957497 14267 +translationund 1 1 6.957497 6.957497 14268 +einet 1 1 6.957497 6.957497 14269 +dernir 1 1 6.957497 6.957497 14270 +nouvel 1 1 6.957497 6.957497 14271 +alsac 1 1 6.957497 6.957497 14272 +spiegel 1 1 6.957497 6.957497 14273 +svenska 1 1 6.957497 6.957497 14274 +dagbladet 1 1 6.957497 6.957497 14275 +tagesspiegel 1 1 6.957497 6.957497 14276 +vanguardia 1 1 6.957497 6.957497 14277 +welt 1 1 6.957497 6.957497 14278 +zeitplusacm 1 1 6.957497 6.957497 14279 +moltkestr 1 1 6.957497 6.957497 14280 +bnde 1 1 6.957497 6.957497 14281 +germanyphon 1 1 6.957497 6.957497 14282 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..a198fdc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +schedul 1 119 2.079442 2.079442 85 +peopl 1 96 2.302585 2.302585 132 +refer 1 78 2.564949 2.564949 203 +new 1 64 2.772589 2.772589 262 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +street 1 63 2.772589 2.772589 293 +directori 1 45 3.135494 3.135494 396 +sport 1 25 3.737670 3.737670 683 +entertain 1 12 4.465908 4.465908 1286 +magic 1 11 4.553877 4.553877 1358 +perman 1 11 4.553877 4.553877 1372 +gather 1 8 4.875197 4.875197 1719 +lanc 1 4 5.568345 5.568345 3022 +champion 1 4 5.568345 5.568345 2982 +tokuda 1 3 5.857933 5.857933 3266 +twelv 1 3 5.857933 5.857933 3899 +hawaii 1 3 5.857933 5.857933 3888 +intramur 1 2 6.263398 6.263398 5590 +unicron 1 1 6.957497 6.957497 14283 +financ 1 1 6.957497 6.957497 14284 +heeia 1 1 6.957497 6.957497 14285 +kaneoh 1 1 6.957497 6.957497 14286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..3032f90d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +taylor 1 63 2.772589 2.772589 287 +finger 1 52 2.995732 2.995732 354 +telephon 1 50 3.044522 3.044522 373 +eduoffic 1 33 3.433987 3.433987 531 +postal 1 30 3.555348 3.555348 580 +log 1 19 4.007333 4.007333 857 +account 1 18 4.060443 4.060443 882 +whether 1 17 4.110874 4.110874 918 +informationemail 1 9 4.753590 4.753590 1564 +painter 1 2 6.263398 6.263398 4187 +balayoghanv 1 1 6.957497 6.957497 14432 +balayoghancontact 1 1 6.957497 6.957497 14433 +ineosdi 1 1 6.957497 6.957497 14434 +bookmarksvbb 1 1 6.957497 6.957497 14435 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..859cf361 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +member 1 84 2.484907 2.484907 165 +novemb 1 81 2.484907 2.484907 179 +second 1 81 2.484907 2.484907 166 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +state 1 76 2.564949 2.564949 207 +decemb 1 80 2.564949 2.564949 215 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +summari 1 73 2.639057 2.639057 237 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +profession 1 51 2.995732 2.995732 345 +digit 1 52 2.995732 2.995732 348 +california 1 46 3.091042 3.091042 388 +video 1 44 3.135494 3.135494 405 +third 1 43 3.178054 3.178054 412 +tutori 1 39 3.258097 3.258097 437 +industri 1 38 3.295837 3.295837 464 +china 1 37 3.332205 3.332205 487 +tech 1 35 3.401197 3.401197 515 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +storag 1 31 3.496508 3.496508 553 +rang 1 30 3.555348 3.555348 565 +chair 1 29 3.583519 3.583519 596 +scale 1 28 3.610918 3.610918 613 +arrai 1 27 3.637586 3.637586 627 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +honor 1 23 3.806662 3.806662 729 +initi 1 23 3.806662 3.806662 717 +director 1 22 3.850148 3.850148 767 +indian 1 22 3.850148 3.850148 769 +disk 1 22 3.850148 3.850148 747 +beij 1 19 4.007333 4.007333 876 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +germani 1 17 4.110874 4.110874 946 +diego 1 16 4.174387 4.174387 992 +taiwan 1 16 4.174387 4.174387 1006 +intel 1 16 4.174387 4.174387 1000 +atth 1 15 4.248495 4.248495 1019 +audio 1 14 4.317488 4.317488 1094 +heterogen 1 14 4.317488 4.317488 1090 +nasa 1 13 4.382027 4.382027 1188 +career 1 12 4.465908 4.465908 1287 +placement 1 10 4.653960 4.653960 1420 +ataustin 1 9 4.753590 4.753590 1610 +editori 1 9 4.753590 4.753590 1611 +vice 1 9 4.753590 4.753590 1604 +transmiss 1 9 4.753590 4.753590 1588 +recoveri 1 9 4.753590 4.753590 1474 +creativ 1 8 4.875197 4.875197 1777 +harrick 1 7 5.010635 5.010635 1849 +bombai 1 7 5.010635 5.010635 1972 +internationalconfer 1 6 5.164786 5.164786 2051 +sponsor 1 6 5.164786 5.164786 2133 +microsystem 1 6 5.164786 5.164786 2160 +ofdistribut 1 5 5.347108 5.347108 2316 +row 1 5 5.347108 5.347108 2330 +colorado 1 4 5.568345 5.568345 2938 +innov 1 4 5.568345 5.568345 2933 +multimediasystem 1 4 5.568345 5.568345 2701 +venkat 1 4 5.568345 5.568345 2702 +multimediacomput 1 3 5.857933 5.857933 3841 +mobilecomput 1 3 5.857933 5.857933 3629 +shenoi 1 3 5.857933 5.857933 3269 +ftc 1 3 5.857933 5.857933 3275 +rangan 1 3 5.857933 5.857933 3270 +goyal 1 3 5.857933 5.857933 3268 +durham 1 3 5.857933 5.857933 3279 +hampshir 1 3 5.857933 5.857933 3280 +mitsubishi 1 3 5.857933 5.857933 3842 +merl 1 3 5.857933 5.857933 3843 +icdc 1 2 6.263398 6.263398 5191 +andnetwork 1 2 6.263398 6.263398 5751 +protocolsfor 1 2 6.263398 6.263398 5204 +inmulti 1 2 6.263398 6.263398 4334 +annualintern 1 2 6.263398 6.263398 4335 +pasadena 1 2 6.263398 6.263398 4336 +gemmel 1 2 6.263398 6.263398 4332 +kandlur 1 2 6.263398 6.263398 4321 +ofmultimedia 1 2 6.263398 6.263398 4322 +ieeeintern 1 2 6.263398 6.263398 4333 +icmc 1 2 6.263398 6.263398 4323 +delaybound 1 2 6.263398 6.263398 4342 +fordigit 1 2 6.263398 6.263398 5752 +nossdav 1 2 6.263398 6.263398 4344 +federalinstitut 1 2 6.263398 6.263398 5539 +vinharrick 1 1 6.957497 6.957497 14437 +electronicimag 1 1 6.957497 6.957497 14438 +kaohsiung 1 1 6.957497 6.957497 14439 +eurograph 1 1 6.957497 6.957497 14436 +rostock 1 1 6.957497 6.957497 14440 +interestmultimedia 1 1 6.957497 6.957497 14441 +anend 1 1 6.957497 6.957497 14442 +thintern 1 1 6.957497 6.957497 14443 +designingmultimedia 1 1 6.957497 6.957497 14444 +foundationresearch 1 1 6.957497 6.957497 14445 +electricresearch 1 1 6.957497 6.957497 14446 +electrospacesystem 1 1 6.957497 6.957497 14447 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..4f52525a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +thing 1 84 2.484907 2.484907 189 +activ 1 84 2.484907 2.484907 182 +academ 1 82 2.484907 2.484907 178 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +street 1 63 2.772589 2.772589 293 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +give 1 50 3.044522 3.044522 359 +visitor 1 49 3.044522 3.044522 371 +soon 1 36 3.367296 3.367296 494 +go 1 33 3.433987 3.433987 529 +india 1 32 3.465736 3.465736 550 +art 1 29 3.583519 3.583519 593 +pass 1 28 3.610918 3.610918 611 +though 1 27 3.637586 3.637586 622 +never 1 25 3.737670 3.737670 671 +color 1 22 3.850148 3.850148 762 +increas 1 20 3.951244 3.951244 829 +sure 1 20 3.951244 3.951244 813 +log 1 19 4.007333 4.007333 857 +less 1 18 4.060443 4.060443 892 +medic 1 17 4.110874 4.110874 958 +match 1 16 4.174387 4.174387 965 +sign 1 16 4.174387 4.174387 970 +guest 1 12 4.465908 4.465908 1220 +incomplet 1 9 4.753590 4.753590 1575 +risk 1 8 4.875197 4.875197 1689 +yeah 1 6 5.164786 5.164786 2195 +put 1 6 5.164786 5.164786 2017 +guestbook 1 5 5.347108 5.347108 2475 +delhi 1 5 5.347108 5.347108 2530 +haven 1 4 5.568345 5.568345 3037 +shall 1 3 5.857933 5.857933 3891 +vipin 1 2 6.263398 6.263398 5579 +interestscours 1 2 6.263398 6.263398 5026 +reset 1 2 6.263398 6.263398 5236 +decreas 1 2 6.263398 6.263398 4877 +undergraduatefrom 1 1 6.957497 6.957497 14448 +interestsreportsy 1 1 6.957497 6.957497 14449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..021a1430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,156 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +object 1 138 1.945910 1.945910 79 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +intern 1 108 2.197225 2.197225 128 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +school 1 84 2.484907 2.484907 188 +state 1 76 2.564949 2.564949 207 +line 1 75 2.639057 2.639057 231 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +taylor 1 63 2.772589 2.772589 287 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +profession 1 51 2.995732 2.995732 345 +right 1 48 3.044522 3.044522 363 +california 1 46 3.091042 3.091042 388 +better 1 45 3.135494 3.135494 401 +favorit 1 44 3.135494 3.135494 410 +live 1 40 3.258097 3.258097 451 +paul 1 38 3.295837 3.295837 471 +close 1 38 3.295837 3.295837 465 +feel 1 37 3.332205 3.332205 483 +sciencesunivers 1 37 3.332205 3.332205 486 +survei 1 35 3.401197 3.401197 513 +human 1 32 3.465736 3.465736 546 +dissert 1 32 3.465736 3.465736 549 +taken 1 31 3.496508 3.496508 555 +scientist 1 31 3.496508 3.496508 560 +postal 1 30 3.555348 3.555348 580 +turn 1 29 3.583519 3.583519 586 +quot 1 29 3.583519 3.583519 582 +mind 1 27 3.637586 3.637586 632 +fellow 1 24 3.761200 3.761200 701 +famili 1 23 3.806662 3.806662 735 +initi 1 23 3.806662 3.806662 717 +finish 1 22 3.850148 3.850148 748 +wang 1 21 3.912023 3.912023 790 +appropri 1 18 4.060443 4.060443 883 +germani 1 17 4.110874 4.110874 946 +white 1 17 4.110874 4.110874 951 +precis 1 15 4.248495 4.248495 1023 +countri 1 15 4.248495 4.248495 1059 +incomput 1 14 4.317488 4.317488 1096 +becam 1 14 4.317488 4.317488 1117 +stori 1 14 4.317488 4.317488 1087 +daniel 1 12 4.465908 4.465908 1233 +vladimir 1 11 4.553877 4.553877 1324 +america 1 11 4.553877 4.553877 1370 +sentenc 1 10 4.653960 4.653960 1413 +black 1 10 4.653960 4.653960 1418 +gain 1 8 4.875197 4.875197 1730 +secretari 1 8 4.875197 4.875197 1775 +elect 1 8 4.875197 4.875197 1771 +joke 1 8 4.875197 4.875197 1620 +centenni 1 7 5.010635 5.010635 1967 +sciencesat 1 7 5.010635 5.010635 1968 +austinaustin 1 7 5.010635 5.010635 1966 +lifschitz 1 5 5.347108 5.347108 2542 +ortega 1 5 5.347108 5.347108 2559 +lost 1 5 5.347108 5.347108 2358 +race 1 5 5.347108 5.347108 2417 +insight 1 4 5.568345 5.568345 3024 +petersburg 1 4 5.568345 5.568345 2989 +nonmonoton 1 4 5.568345 5.568345 3023 +evid 1 4 5.568345 5.568345 2768 +ratio 1 4 5.568345 5.568345 2942 +silli 1 4 5.568345 5.568345 3038 +dijkstra 1 3 5.857933 5.857933 3173 +armi 1 3 5.857933 5.857933 3562 +prison 1 3 5.857933 5.857933 3907 +tortur 1 3 5.857933 5.857933 3634 +district 1 3 5.857933 5.857933 3756 +civil 1 3 5.857933 5.857933 3908 +spirit 1 2 6.263398 6.263398 5234 +theamerican 1 2 6.263398 6.263398 5120 +russia 1 2 6.263398 6.263398 5756 +programmingand 1 2 6.263398 6.263398 4940 +edsger 1 2 6.263398 6.263398 5740 +convoc 1 2 6.263398 6.263398 5757 +nomin 1 2 6.263398 6.263398 5758 +helm 1 2 6.263398 6.263398 4217 +burton 1 2 6.263398 6.263398 5759 +polic 1 2 6.263398 6.263398 5560 +democrat 1 2 6.263398 6.263398 5567 +admit 1 2 6.263398 6.263398 5429 +neutral 1 2 6.263398 6.263398 5760 +lifschitzwhen 1 1 6.957497 6.957497 14488 +burden 1 1 6.957497 6.957497 14489 +downcast 1 1 6.957497 6.957497 14490 +gladli 1 1 6.957497 6.957497 14491 +therealm 1 1 6.957497 6.957497 14492 +lucid 1 1 6.957497 6.957497 14493 +grasp 1 1 6.957497 6.957497 14494 +isobtain 1 1 6.957497 6.957497 14495 +pleasantli 1 1 6.957497 6.957497 14496 +conceptform 1 1 6.957497 6.957497 14497 +bernai 1 1 6.957497 6.957497 14498 +lifschitzgottesman 1 1 6.957497 6.957497 14499 +texasat 1 1 6.957497 6.957497 14500 +forartifici 1 1 6.957497 6.957497 14501 +intelligenceb 1 1 6.957497 6.957497 14502 +branchof 1 1 6.957497 6.957497 14503 +steklov 1 1 6.957497 6.957497 14504 +interesttempor 1 1 6.957497 6.957497 14505 +reasoningand 1 1 6.957497 6.957497 14506 +aboutactionslog 1 1 6.957497 6.957497 14507 +reasoningteachingoth 1 1 6.957497 6.957497 14508 +activitiespap 1 1 6.957497 6.957497 14509 +bylifschitz 1 1 6.957497 6.957497 14510 +studentsrecommend 1 1 6.957497 6.957497 14511 +speechgood 1 1 6.957497 6.957497 14512 +madelein 1 1 6.957497 6.957497 14513 +albright 1 1 6.957497 6.957497 14514 +regain 1 1 6.957497 6.957497 14515 +soviet 1 1 6.957497 6.957497 14516 +recycl 1 1 6.957497 6.957497 14517 +actbad 1 1 6.957497 6.957497 14518 +sequest 1 1 6.957497 6.957497 14519 +archeolog 1 1 6.957497 6.957497 14520 +societynot 1 1 6.957497 6.957497 14521 +redrawn 1 1 6.957497 6.957497 14522 +basisoth 1 1 6.957497 6.957497 14523 +amnesti 1 1 6.957497 6.957497 14524 +monthcontact 1 1 6.957497 6.957497 14525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..e664842b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +theori 1 111 2.197225 2.197225 127 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +effici 1 73 2.639057 2.639057 233 +evalu 1 64 2.772589 2.772589 266 +copi 1 63 2.772589 2.772589 284 +taylor 1 63 2.772589 2.772589 287 +visit 1 63 2.772589 2.772589 288 +faculti 1 56 2.890372 2.890372 325 +vita 1 38 3.295837 3.295837 473 +profil 1 30 3.555348 3.555348 581 +postal 1 30 3.555348 3.555348 580 +experiment 1 26 3.688879 3.688879 645 +mine 1 26 3.688879 3.688879 654 +sequenti 1 22 3.850148 3.850148 745 +offici 1 18 4.060443 4.060443 894 +princeton 1 15 4.248495 4.248495 1042 +interestsmi 1 10 4.653960 4.653960 1462 +regent 1 5 5.347108 5.347108 2551 +vijaya 1 4 5.568345 5.568345 2677 +primarilyin 1 3 5.857933 5.857933 3832 +ramachandranvijaya 1 1 6.957497 6.957497 14450 +ramachandranblakemor 1 1 6.957497 6.957497 14451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..5a368e15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +texa 1 160 1.791759 1.791759 64 +recent 1 167 1.791759 1.791759 58 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +mani 1 92 2.397895 2.397895 150 +resourc 1 81 2.484907 2.484907 172 +activ 1 84 2.484907 2.484907 182 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +member 1 84 2.484907 2.484907 165 +method 1 80 2.564949 2.564949 213 +messag 1 76 2.564949 2.564949 212 +state 1 76 2.564949 2.564949 207 +effici 1 73 2.639057 2.639057 233 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +differ 1 66 2.708050 2.708050 253 +foundat 1 62 2.772589 2.772589 286 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +virtual 1 62 2.772589 2.772589 285 +dept 1 64 2.772589 2.772589 291 +locat 1 59 2.833213 2.833213 303 +faculti 1 56 2.890372 2.890372 325 +major 1 56 2.890372 2.890372 315 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +sever 1 56 2.890372 2.890372 322 +special 1 56 2.890372 2.890372 320 +processor 1 54 2.944439 2.944439 335 +talk 1 53 2.944439 2.944439 336 +pointer 1 48 3.044522 3.044522 368 +electron 1 47 3.091042 3.091042 379 +execut 1 45 3.135494 3.135494 404 +term 1 43 3.178054 3.178054 411 +theoret 1 39 3.258097 3.258097 446 +announc 1 40 3.258097 3.258097 441 +seminar 1 38 3.295837 3.295837 470 +random 1 34 3.401197 3.401197 511 +post 1 35 3.401197 3.401197 505 +next 1 34 3.401197 3.401197 517 +committe 1 34 3.401197 3.401197 522 +bibliographi 1 34 3.401197 3.401197 518 +express 1 32 3.465736 3.465736 540 +ad 1 32 3.465736 3.465736 544 +often 1 31 3.496508 3.496508 551 +scientist 1 31 3.496508 3.496508 560 +graph 1 30 3.555348 3.555348 576 +focu 1 30 3.555348 3.555348 571 +focus 1 29 3.583519 3.583519 584 +measur 1 28 3.610918 3.610918 609 +held 1 28 3.610918 3.610918 600 +becom 1 28 3.610918 3.610918 603 +bound 1 26 3.688879 3.688879 659 +berkelei 1 26 3.688879 3.688879 657 +request 1 26 3.688879 3.688879 635 +consist 1 26 3.688879 3.688879 651 +fundament 1 25 3.737670 3.737670 661 +greg 1 24 3.761200 3.761200 695 +sent 1 22 3.850148 3.850148 763 +serv 1 22 3.850148 3.850148 758 +alumni 1 21 3.912023 3.912023 807 +siam 1 21 3.912023 3.912023 800 +region 1 19 4.007333 4.007333 875 +north 1 19 4.007333 4.007333 873 +hypertext 1 19 4.007333 4.007333 865 +lower 1 18 4.060443 4.060443 886 +stand 1 18 4.060443 4.060443 891 +attend 1 18 4.060443 4.060443 893 +affili 1 13 4.382027 4.382027 1194 +discret 1 13 4.382027 4.382027 1165 +walk 1 12 4.465908 4.465908 1281 +remov 1 12 4.465908 4.465908 1225 +outsid 1 12 4.465908 4.465908 1219 +probabilist 1 11 4.553877 4.553877 1343 +typic 1 11 4.553877 4.553877 1360 +distinguish 1 11 4.553877 4.553877 1357 +regard 1 11 4.553877 4.553877 1309 +cryptographi 1 9 4.753590 4.753590 1512 +folk 1 9 4.753590 4.753590 1597 +postdoc 1 8 4.875197 4.875197 1724 +elect 1 8 4.875197 4.875197 1771 +calendar 1 8 4.875197 4.875197 1649 +colloquium 1 8 4.875197 4.875197 1734 +bit 1 7 5.010635 5.010635 1833 +foc 1 7 5.010635 5.010635 1880 +zhou 1 6 5.164786 5.164786 2092 +forum 1 6 5.164786 5.164786 2027 +southern 1 6 5.164786 5.164786 2191 +pool 1 6 5.164786 5.164786 2225 +arrang 1 6 5.164786 5.164786 2023 +sigact 1 6 5.164786 5.164786 2212 +sponsor 1 6 5.164786 5.164786 2133 +soda 1 6 5.164786 5.164786 2189 +groupth 1 5 5.347108 5.347108 2549 +provabl 1 5 5.347108 5.347108 2558 +phil 1 5 5.347108 5.347108 2419 +speaker 1 5 5.347108 5.347108 2370 +stoc 1 5 5.347108 5.347108 2491 +combinator 1 4 5.568345 5.568345 2915 +vijaya 1 4 5.568345 5.568345 2677 +twice 1 4 5.568345 5.568345 2614 +dalla 1 4 5.568345 5.568345 2930 +algorithmsand 1 4 5.568345 5.568345 2680 +plaxton 1 3 5.857933 5.857933 3886 +ramachandran 1 3 5.857933 5.857933 3742 +zuckerman 1 3 5.857933 5.857933 3205 +sinica 1 3 5.857933 5.857933 3819 +poon 1 3 5.857933 5.857933 3820 +dozen 1 3 5.857933 5.857933 3905 +gripe 1 3 5.857933 5.857933 3257 +warm 1 3 5.857933 5.857933 3904 +surround 1 3 5.857933 5.857933 3492 +louisiana 1 3 5.857933 5.857933 3902 +spaa 1 3 5.857933 5.857933 3906 +baruah 1 2 6.263398 6.263398 5753 +sheng 1 2 6.263398 6.263398 5153 +madhukar 1 2 6.263398 6.263398 5633 +ckpoon 1 2 6.263398 6.263398 5510 +rajmohan 1 2 6.263398 6.263398 5706 +rajaraman 1 2 6.263398 6.263398 5704 +rraj 1 2 6.263398 6.263398 5705 +sinha 1 2 6.263398 6.263398 5754 +southwestern 1 2 6.263398 6.263398 5744 +andarchitectur 1 2 6.263398 6.263398 5755 +grouput 1 1 6.957497 6.957497 14457 +sanjoi 1 1 6.957497 6.957497 14452 +emba 1 1 6.957497 6.957497 14458 +tsan 1 1 6.957497 6.957497 14459 +tshsu 1 1 6.957497 6.957497 14460 +pierr 1 1 6.957497 6.957497 14461 +kelsen 1 1 6.957497 6.957497 14453 +korupolu 1 1 6.957497 6.957497 14462 +mackenzi 1 1 6.957497 6.957497 14463 +philmac 1 1 6.957497 6.957497 14464 +idbsu 1 1 6.957497 6.957497 14465 +ramgop 1 1 6.957497 6.957497 14454 +mettu 1 1 6.957497 6.957497 14466 +santanu 1 1 6.957497 6.957497 14467 +ssinha 1 1 6.957497 6.957497 14468 +torsten 1 1 6.957497 6.957497 14469 +suel 1 1 6.957497 6.957497 14455 +yuke 1 1 6.957497 6.957497 14456 +lowvolum 1 1 6.957497 6.957497 14470 +themidsouth 1 1 6.957497 6.957497 14471 +midsouthwest 1 1 6.957497 6.957497 14472 +keynot 1 1 6.957497 6.957497 14473 +atut 1 1 6.957497 6.957497 14474 +organizedanoth 1 1 6.957497 6.957497 14475 +methodist 1 1 6.957497 6.957497 14476 +oklahoma 1 1 6.957497 6.957497 14477 +beheld 1 1 6.957497 6.957497 14478 +algorithmsmail 1 1 6.957497 6.957497 14479 +usuallytri 1 1 6.957497 6.957497 14480 +ofaustin 1 1 6.957497 6.957497 14481 +thatinclud 1 1 6.957497 6.957497 14482 +sponsorsth 1 1 6.957497 6.957497 14483 +interestar 1 1 6.957497 6.957497 14484 +thesigact 1 1 6.957497 6.957497 14485 +eccc 1 1 6.957497 6.957497 14486 +rolodex 1 1 6.957497 6.957497 14487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..6777725d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,8 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +phone 1 175 1.791759 1.791759 45 +srinivasan 1 6 5.164786 5.164786 2175 +vaidyaraman 1 2 6.263398 6.263398 5658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..9500bc24 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +memori 1 101 2.302585 2.302585 139 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +intellig 1 72 2.639057 2.639057 225 +solv 1 73 2.639057 2.639057 234 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +visual 1 48 3.044522 3.044522 372 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +term 1 43 3.178054 3.178054 411 +represent 1 35 3.401197 3.401197 512 +neural 1 30 3.555348 3.555348 578 +retriev 1 27 3.637586 3.637586 621 +background 1 25 3.737670 3.737670 664 +cognit 1 16 4.174387 4.174387 986 +usavoic 1 13 4.382027 4.382027 1198 +mepost 1 10 4.653960 4.653960 1472 +attent 1 8 4.875197 4.875197 1651 +interestsi 1 7 5.010635 5.010635 1969 +connectionist 1 5 5.347108 5.347108 2430 +howto 1 2 6.263398 6.263398 5761 +sengul 1 1 6.957497 6.957497 14527 +vurgun 1 1 6.957497 6.957497 14526 +sengulvurgun 1 1 6.957497 6.957497 14528 +ammainli 1 1 6.957497 6.957497 14529 +evolutionaryalgorithm 1 1 6.957497 6.957497 14530 +ofprefer 1 1 6.957497 6.957497 14531 +skillacquisit 1 1 6.957497 6.957497 14532 +mindto 1 1 6.957497 6.957497 14533 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..6d158708 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +person 1 111 2.197225 2.197225 117 +server 1 76 2.564949 2.564949 204 +locat 1 59 2.833213 2.833213 303 +chuck 1 14 4.317488 4.317488 1108 +enterpris 1 2 6.263398 6.263398 4839 +walbourn 1 1 6.957497 6.957497 14534 +walbournmi 1 1 6.957497 6.957497 14535 +charybdi 1 1 6.957497 6.957497 14536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..895ad4af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +fall 1 181 1.609438 1.609438 40 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +lectur 1 135 1.945910 1.945910 73 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +mathemat 1 108 2.197225 2.197225 123 +follow 1 92 2.397895 2.397895 143 +member 1 84 2.484907 2.484907 165 +activ 1 84 2.484907 2.484907 182 +academ 1 82 2.484907 2.484907 178 +complet 1 77 2.564949 2.564949 208 +visit 1 63 2.772589 2.772589 288 +creat 1 63 2.772589 2.772589 277 +septemb 1 65 2.772589 2.772589 274 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +summer 1 56 2.890372 2.890372 311 +profession 1 51 2.995732 2.995732 345 +appoint 1 49 3.044522 3.044522 358 +math 1 44 3.135494 3.135494 402 +http 1 41 3.218876 3.218876 420 +formal 1 37 3.332205 3.332205 478 +revis 1 26 3.688879 3.688879 640 +period 1 22 3.850148 3.850148 743 +regular 1 17 4.110874 4.110874 929 +photograph 1 15 4.248495 4.248495 1056 +senior 1 14 4.317488 4.317488 1120 +henri 1 10 4.653960 4.653960 1417 +jack 1 8 4.875197 4.875197 1780 +walker 1 3 5.857933 5.857933 3161 +tenur 1 3 5.857933 5.857933 3801 +mackai 1 2 6.263398 6.263398 5762 +grinnel 1 2 6.263398 6.263398 5763 +edua 1 2 6.263398 6.263398 5764 +grin 1 1 6.957497 6.957497 14537 +professorwalk 1 1 6.957497 6.957497 14538 +teachand 1 1 6.957497 6.957497 14539 +atgrinnel 1 1 6.957497 6.957497 14540 +robertson 1 1 6.957497 6.957497 14541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..48be051c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +recent 1 167 1.791759 1.791759 58 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +databas 1 122 2.079442 2.079442 86 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +follow 1 92 2.397895 2.397895 143 +school 1 84 2.484907 2.484907 188 +activ 1 84 2.484907 2.484907 182 +appli 1 71 2.639057 2.639057 226 +knowledg 1 67 2.708050 2.708050 243 +laboratori 1 63 2.772589 2.772589 292 +maintain 1 51 2.995732 2.995732 342 +case 1 51 2.995732 2.995732 351 +cool 1 49 3.044522 3.044522 374 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +posit 1 31 3.496508 3.496508 552 +rule 1 26 3.688879 3.688879 638 +mike 1 24 3.761200 3.761200 703 +basi 1 20 3.951244 3.951244 828 +item 1 19 4.007333 4.007333 856 +accept 1 18 4.060443 4.060443 879 +senior 1 14 4.317488 4.317488 1120 +usavoic 1 13 4.382027 4.382027 1198 +modul 1 10 4.653960 4.653960 1434 +mepost 1 10 4.653960 4.653960 1472 +declar 1 9 4.753590 4.753590 1526 +lane 1 8 4.875197 4.875197 1720 +unpublish 1 6 5.164786 5.164786 2226 +mirank 1 5 5.347108 5.347108 2543 +lanc 1 4 5.568345 5.568345 3022 +warshaw 1 2 6.263398 6.263398 5659 +venu 1 2 6.263398 6.263398 5655 +developedat 1 2 6.263398 6.263398 4078 +obermey 1 2 6.263398 6.263398 5657 +warshawlan 1 1 6.957497 6.957497 14542 +laboratoryinvolv 1 1 6.957497 6.957497 14543 +andat 1 1 6.957497 6.957497 14544 +arlut 1 1 6.957497 6.957497 14545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..e9d9fa9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +analysi 1 124 2.079442 2.079442 98 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +book 1 99 2.302585 2.302585 131 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +august 1 66 2.708050 2.708050 257 +java 1 70 2.708050 2.708050 248 +dept 1 64 2.772589 2.772589 291 +juli 1 60 2.833213 2.833213 305 +unix 1 58 2.890372 2.890372 308 +sampl 1 53 2.944439 2.944439 339 +numer 1 49 3.044522 3.044522 369 +math 1 44 3.135494 3.135494 402 +china 1 37 3.332205 3.332205 487 +expect 1 37 3.332205 3.332205 484 +manual 1 35 3.401197 3.401197 504 +common 1 30 3.555348 3.555348 574 +load 1 28 3.610918 3.610918 601 +chen 1 21 3.912023 3.912023 791 +demo 1 18 4.060443 4.060443 888 +perl 1 11 4.553877 4.553877 1332 +gatewai 1 7 5.010635 5.010635 1942 +fudan 1 3 5.857933 5.857933 3707 +rosett 1 2 6.263398 6.263398 5595 +wchen 1 1 6.957497 6.957497 14546 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..b59d79f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +fall 1 181 1.609438 1.609438 40 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +memori 1 101 2.302585 2.302585 139 +section 1 94 2.397895 2.397895 149 +orient 1 80 2.564949 2.564949 205 +taylor 1 63 2.772589 2.772589 287 +best 1 59 2.833213 2.833213 299 +paul 1 38 3.295837 3.295837 471 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +reach 1 24 3.761200 3.761200 688 +lead 1 23 3.806662 3.806662 718 +thought 1 17 4.110874 4.110874 945 +wilson 1 9 4.753590 4.753590 1536 +cross 1 8 4.875197 4.875197 1703 +oop 1 8 4.875197 4.875197 1778 +informationi 1 3 5.857933 5.857933 3871 +novelti 1 2 6.263398 6.263398 5765 +ltwilson 1 1 6.957497 6.957497 14547 +headshot 1 1 6.957497 6.957497 14548 +workson 1 1 6.957497 6.957497 14549 +teachingin 1 1 6.957497 6.957497 14550 +sciencesnot 1 1 6.957497 6.957497 14551 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..2c8110e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +relat 1 139 1.945910 1.945910 68 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +thing 1 84 2.484907 2.484907 189 +start 1 83 2.484907 2.484907 173 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +differ 1 66 2.708050 2.708050 253 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +without 1 50 3.044522 3.044522 370 +give 1 50 3.044522 3.044522 359 +made 1 44 3.135494 3.135494 398 +past 1 42 3.218876 3.218876 428 +howev 1 41 3.218876 3.218876 422 +live 1 40 3.258097 3.258097 451 +realli 1 40 3.258097 3.258097 444 +mean 1 37 3.332205 3.332205 477 +ofth 1 36 3.367296 3.367296 491 +john 1 33 3.433987 3.433987 532 +abl 1 30 3.555348 3.555348 566 +becom 1 28 3.610918 3.610918 603 +though 1 27 3.637586 3.637586 622 +quit 1 27 3.637586 3.637586 633 +decis 1 23 3.806662 3.806662 728 +lead 1 23 3.806662 3.806662 718 +dai 1 22 3.850148 3.850148 753 +born 1 21 3.912023 3.912023 798 +attend 1 18 4.060443 4.060443 893 +record 1 18 4.060443 4.060443 890 +listen 1 18 4.060443 4.060443 907 +accept 1 18 4.060443 4.060443 879 +thought 1 17 4.110874 4.110874 945 +whether 1 17 4.110874 4.110874 918 +whole 1 17 4.110874 4.110874 940 +seek 1 17 4.110874 4.110874 954 +earli 1 16 4.174387 4.174387 968 +month 1 15 4.248495 4.248495 1025 +enough 1 15 4.248495 4.248495 1040 +becam 1 14 4.317488 4.317488 1117 +hong 1 14 4.317488 4.317488 1105 +decid 1 14 4.317488 4.317488 1075 +believ 1 13 4.382027 4.382027 1187 +came 1 13 4.382027 4.382027 1197 +weak 1 13 4.382027 4.382027 1159 +opportun 1 13 4.382027 4.382027 1161 +count 1 12 4.465908 4.465908 1239 +true 1 10 4.653960 4.653960 1422 +reli 1 10 4.653960 4.653960 1411 +kong 1 9 4.753590 4.753590 1602 +clear 1 9 4.753590 4.753590 1488 +trust 1 9 4.753590 4.753590 1583 +strength 1 9 4.753590 4.753590 1494 +said 1 9 4.753590 4.753590 1571 +matter 1 8 4.875197 4.875197 1627 +realiz 1 8 4.875197 4.875197 1739 +christian 1 7 5.010635 5.010635 1949 +therefor 1 7 5.010635 5.010635 1822 +wrong 1 6 5.164786 5.164786 2025 +matthew 1 6 5.164786 5.164786 2193 +church 1 4 5.568345 5.568345 3011 +jesu 1 3 5.857933 5.857933 3624 +bibl 1 3 5.857933 5.857933 3143 +credibl 1 3 5.857933 5.857933 3210 +shouldb 1 3 5.857933 5.857933 3673 +theywil 1 3 5.857933 5.857933 3102 +faith 1 3 5.857933 5.857933 3363 +doubt 1 3 5.857933 5.857933 3119 +nota 1 3 5.857933 5.857933 3785 +arthur 1 2 6.263398 6.263398 5767 +christ 1 2 6.263398 6.263398 5766 +religi 1 2 6.263398 6.263398 4816 +intent 1 2 6.263398 6.263398 5768 +holi 1 2 6.263398 6.263398 5711 +stumbl 1 2 6.263398 6.263398 5349 +hei 1 2 6.263398 6.263398 5769 +forgiv 1 2 6.263398 6.263398 5770 +andto 1 2 6.263398 6.263398 5771 +differencebetween 1 2 6.263398 6.263398 5431 +deed 1 2 6.263398 6.263398 5077 +wedo 1 2 6.263398 6.263398 5772 +sick 1 2 6.263398 6.263398 5773 +ought 1 2 6.263398 6.263398 5365 +hesit 1 2 6.263398 6.263398 5774 +cent 1 1 6.957497 6.957497 14553 +christiani 1 1 6.957497 6.957497 14554 +alittl 1 1 6.957497 6.957497 14555 +totallyunexpect 1 1 6.957497 6.957497 14556 +compulsori 1 1 6.957497 6.957497 14557 +thechristian 1 1 6.957497 6.957497 14558 +tobecom 1 1 6.957497 6.957497 14559 +slife 1 1 6.957497 6.957497 14560 +deepli 1 1 6.957497 6.957497 14561 +mylif 1 1 6.957497 6.957497 14562 +misconcept 1 1 6.957497 6.957497 14563 +christianwa 1 1 6.957497 6.957497 14564 +christianand 1 1 6.957497 6.957497 14565 +lovedeveri 1 1 6.957497 6.957497 14566 +achristian 1 1 6.957497 6.957497 14567 +virtuou 1 1 6.957497 6.957497 14568 +thefellowship 1 1 6.957497 6.957497 14569 +sin 1 1 6.957497 6.957497 14552 +flesh 1 1 6.957497 6.957497 14570 +sinless 1 1 6.957497 6.957497 14571 +sympath 1 1 6.957497 6.957497 14572 +weconfess 1 1 6.957497 6.957497 14573 +cleans 1 1 6.957497 6.957497 14574 +unright 1 1 6.957497 6.957497 14575 +astheir 1 1 6.957497 6.957497 14576 +saviour 1 1 6.957497 6.957497 14577 +gratefulli 1 1 6.957497 6.957497 14578 +redempt 1 1 6.957497 6.957497 14579 +fortheir 1 1 6.957497 6.957497 14580 +justifi 1 1 6.957497 6.957497 14581 +roman 1 1 6.957497 6.957497 14582 +thecontrari 1 1 6.957497 6.957497 14583 +givesu 1 1 6.957497 6.957497 14584 +physician 1 1 6.957497 6.957497 14585 +onour 1 1 6.957497 6.957497 14586 +thetruth 1 1 6.957497 6.957497 14587 +thankgod 1 1 6.957497 6.957497 14588 +wkmak 1 1 6.957497 6.957497 14589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..a01144d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +list 1 201 1.609438 1.609438 39 +contact 1 153 1.791759 1.791759 59 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +topic 1 114 2.197225 2.197225 110 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +user 1 104 2.302585 2.302585 137 +pictur 1 89 2.397895 2.397895 160 +comment 1 93 2.397895 2.397895 146 +graphic 1 90 2.397895 2.397895 147 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +homepag 1 93 2.397895 2.397895 148 +associ 1 93 2.397895 2.397895 151 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +member 1 84 2.484907 2.484907 165 +interfac 1 79 2.564949 2.564949 209 +state 1 76 2.564949 2.564949 207 +intellig 1 72 2.639057 2.639057 225 +onlin 1 75 2.639057 2.639057 223 +multimedia 1 68 2.708050 2.708050 258 +artifici 1 63 2.772589 2.772589 280 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +explor 1 58 2.890372 2.890372 324 +reason 1 57 2.890372 2.890372 318 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +approach 1 48 3.044522 3.044522 366 +frequent 1 49 3.044522 3.044522 367 +physic 1 47 3.091042 3.091042 377 +vision 1 41 3.218876 3.218876 430 +fast 1 42 3.218876 3.218876 429 +movi 1 40 3.258097 3.258097 459 +robot 1 36 3.367296 3.367296 497 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +rang 1 30 3.555348 3.555348 565 +neural 1 30 3.555348 3.555348 578 +common 1 30 3.555348 3.555348 574 +semant 1 29 3.583519 3.583519 587 +built 1 29 3.583519 3.583519 592 +art 1 29 3.583519 3.583519 593 +ask 1 28 3.610918 3.610918 597 +manipul 1 27 3.637586 3.637586 624 +doctor 1 24 3.761200 3.761200 709 +yahoo 1 24 3.761200 3.761200 707 +miscellan 1 23 3.806662 3.806662 731 +mobil 1 23 3.806662 3.806662 730 +hierarchi 1 22 3.850148 3.850148 744 +navig 1 21 3.912023 3.912023 796 +unit 1 21 3.912023 3.912023 779 +love 1 21 3.912023 3.912023 804 +offici 1 18 4.060443 4.060443 894 +spatial 1 16 4.174387 4.174387 988 +commerci 1 16 4.174387 4.174387 1005 +remot 1 15 4.248495 4.248495 1041 +embed 1 14 4.317488 4.317488 1102 +hotlist 1 13 4.382027 4.382027 1199 +primarili 1 13 4.382027 4.382027 1185 +qualit 1 11 4.553877 4.553877 1362 +player 1 11 4.553877 4.553877 1371 +hello 1 10 4.653960 4.653960 1407 +catalog 1 10 4.653960 4.653960 1431 +meta 1 9 4.753590 4.753590 1505 +ring 1 8 4.875197 4.875197 1684 +autonom 1 8 4.875197 4.875197 1749 +guitar 1 8 4.875197 4.875197 1758 +spot 1 7 5.010635 5.010635 1894 +sensor 1 7 5.010635 5.010635 1920 +usenet 1 7 5.010635 5.010635 1839 +race 1 5 5.347108 5.347108 2417 +worki 1 4 5.568345 5.568345 3010 +car 1 4 5.568345 5.568345 2931 +fora 1 4 5.568345 5.568345 2697 +ncsa 1 4 5.568345 5.568345 2767 +motor 1 3 5.857933 5.857933 3909 +worm 1 2 6.263398 6.263398 5775 +badminton 1 2 6.263398 6.263398 5221 +eduperson 1 2 6.263398 6.263398 5776 +martial 1 2 6.263398 6.263398 5004 +mobilerobot 1 1 6.957497 6.957497 14595 +ultrason 1 1 6.957497 6.957497 14590 +rover 1 1 6.957497 6.957497 14596 +tall 1 1 6.957497 6.957497 14597 +rhino 1 1 6.957497 6.957497 14591 +robocac 1 1 6.957497 6.957497 14598 +robofest 1 1 6.957497 6.957497 14599 +robokreta 1 1 6.957497 6.957497 14592 +besar 1 1 6.957497 6.957497 14600 +kicik 1 1 6.957497 6.957497 14601 +chassi 1 1 6.957497 6.957497 14602 +andqualit 1 1 6.957497 6.957497 14603 +malaysia 1 1 6.957497 6.957497 14604 +wyle 1 1 6.957497 6.957497 14593 +interestsavid 1 1 6.957497 6.957497 14605 +usba 1 1 6.957497 6.957497 14606 +clarinet 1 1 6.957497 6.957497 14594 +miscellaneousinterest 1 1 6.957497 6.957497 14607 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..77565e1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +austin 1 168 1.791759 1.791759 63 +west 1 83 2.484907 2.484907 192 +xfeng 1 2 6.263398 6.263398 4376 +qaustin 1 1 6.957497 6.957497 14608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..67602840 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +present 1 91 2.397895 2.397895 145 +imag 1 91 2.397895 2.397895 161 +search 1 95 2.397895 2.397895 155 +stuff 1 87 2.484907 2.484907 171 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +multimedia 1 68 2.708050 2.708050 258 +creat 1 63 2.772589 2.772589 277 +major 1 56 2.890372 2.890372 315 +visual 1 48 3.044522 3.044522 372 +right 1 48 3.044522 3.044522 363 +frequent 1 49 3.044522 3.044522 367 +pointer 1 48 3.044522 3.044522 368 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +china 1 37 3.332205 3.332205 487 +soon 1 36 3.367296 3.367296 494 +express 1 32 3.465736 3.465736 540 +travel 1 30 3.555348 3.555348 579 +hard 1 30 3.555348 3.555348 563 +american 1 27 3.637586 3.637586 634 +sport 1 25 3.737670 3.737670 683 +head 1 23 3.806662 3.806662 732 +watch 1 21 3.912023 3.912023 789 +unit 1 21 3.912023 3.912023 779 +goe 1 15 4.248495 4.248495 1044 +club 1 15 4.248495 4.248495 1058 +classic 1 14 4.317488 4.317488 1084 +audio 1 14 4.317488 4.317488 1094 +rank 1 14 4.317488 4.317488 1063 +hopefulli 1 14 4.317488 4.317488 1071 +pretti 1 13 4.382027 4.382027 1191 +walk 1 12 4.465908 4.465908 1281 +newspap 1 12 4.465908 4.465908 1280 +string 1 11 4.553877 4.553877 1340 +keyword 1 11 4.553877 4.553877 1356 +vista 1 10 4.653960 4.653960 1452 +card 1 10 4.653960 4.653960 1435 +hang 1 9 4.753590 4.753590 1499 +soccer 1 8 4.875197 4.875197 1752 +harrick 1 7 5.010635 5.010635 1849 +surpris 1 7 5.010635 5.010635 1828 +photographi 1 6 5.164786 5.164786 2146 +infoseek 1 6 5.164786 5.164786 2188 +financi 1 6 5.164786 5.164786 2197 +atlant 1 5 5.347108 5.347108 2508 +alta 1 4 5.568345 5.568345 3039 +leagu 1 4 5.568345 5.568345 3040 +aswel 1 3 5.857933 5.857933 3286 +serious 1 3 5.857933 5.857933 3663 +monthli 1 3 5.857933 5.857933 3910 +imagin 1 2 6.263398 6.263398 5472 +clearer 1 2 6.263398 6.263398 5676 +marvel 1 2 6.263398 6.263398 5400 +morn 1 2 6.263398 6.263398 5162 +xingang 1 1 6.957497 6.957497 14609 +delight 1 1 6.957497 6.957497 14610 +temporaryresort 1 1 6.957497 6.957497 14611 +llgradual 1 1 6.957497 6.957497 14612 +havesometh 1 1 6.957497 6.957497 14613 +foliag 1 1 6.957497 6.957497 14614 +miata 1 1 6.957497 6.957497 14615 +xguo 1 1 6.957497 6.957497 14616 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..dbad7cfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +address 1 170 1.791759 1.791759 62 +geoffrei 1 3 5.857933 5.857933 3505 +pagemov 1 1 6.957497 6.957497 14617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..e65b7689 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 1 24 3.761200 3.761200 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +yang 1 8 4.875197 4.875197 1652 +alert 1 5 5.347108 5.347108 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..82e6b26e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +first 1 140 1.945910 1.945910 71 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +make 1 111 2.197225 2.197225 120 +sinc 1 90 2.397895 2.397895 159 +homepag 1 93 2.397895 2.397895 148 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +street 1 63 2.772589 2.772589 293 +still 1 50 3.044522 3.044522 362 +life 1 50 3.044522 3.044522 375 +get 1 46 3.091042 3.091042 380 +favorit 1 44 3.135494 3.135494 410 +must 1 40 3.258097 3.258097 442 +word 1 34 3.401197 3.401197 508 +quot 1 29 3.583519 3.583519 582 +art 1 29 3.583519 3.583519 593 +hope 1 28 3.610918 3.610918 610 +utc 1 27 3.637586 3.637586 629 +team 1 27 3.637586 3.637586 625 +daili 1 24 3.761200 3.761200 706 +busi 1 21 3.912023 3.912023 784 +tell 1 21 3.912023 3.912023 777 +beauti 1 18 4.060443 4.060443 912 +sept 1 17 4.110874 4.110874 952 +took 1 16 4.174387 4.174387 1010 +classic 1 14 4.317488 4.317488 1084 +hong 1 14 4.317488 4.317488 1105 +stori 1 14 4.317488 4.317488 1087 +introduc 1 13 4.382027 4.382027 1139 +outsid 1 12 4.465908 4.465908 1219 +franc 1 12 4.465908 4.465908 1276 +hello 1 10 4.653960 4.653960 1407 +rich 1 10 4.653960 4.653960 1396 +label 1 10 4.653960 4.653960 1423 +establish 1 9 4.753590 4.753590 1532 +kong 1 9 4.753590 4.753590 1602 +french 1 9 4.753590 4.753590 1511 +guitar 1 8 4.875197 4.875197 1758 +grew 1 8 4.875197 4.875197 1742 +corner 1 7 5.010635 5.010635 1909 +footbal 1 7 5.010635 5.010635 1912 +whatev 1 6 5.164786 5.164786 2097 +artist 1 6 5.164786 5.164786 2127 +seriou 1 5 5.347108 5.347108 2252 +christoph 1 5 5.347108 5.347108 2512 +festiv 1 4 5.568345 5.568345 2952 +tire 1 4 5.568345 5.568345 2799 +align 1 4 5.568345 5.568345 2863 +concert 1 3 5.857933 5.857933 3533 +byth 1 3 5.857933 5.857933 3874 +jesu 1 3 5.857933 5.857933 3624 +passion 1 3 5.857933 5.857933 3633 +michel 1 3 5.857933 5.857933 3791 +medit 1 2 6.263398 6.263398 5777 +retir 1 2 6.263398 6.263398 5674 +christ 1 2 6.263398 6.263398 5766 +wwwdavid 1 1 6.957497 6.957497 14618 +assad 1 1 6.957497 6.957497 14619 +brothersin 1 1 6.957497 6.957497 14620 +parkeningi 1 1 6.957497 6.957497 14621 +guitarist 1 1 6.957497 6.957497 14622 +ofconcert 1 1 6.957497 6.957497 14623 +reconcili 1 1 6.957497 6.957497 14624 +rekindl 1 1 6.957497 6.957497 14625 +theamsterdam 1 1 6.957497 6.957497 14626 +trio 1 1 6.957497 6.957497 14627 +flair 1 1 6.957497 6.957497 14628 +platini 1 1 6.957497 6.957497 14629 +magazinepublish 1 1 6.957497 6.957497 14630 +minist 1 1 6.957497 6.957497 14631 +absmiddl 1 1 6.957497 6.957497 14632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..b412ac73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 1 24 3.761200 3.761200 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +yanbin 1 2 6.263398 6.263398 5599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..a2f0f3bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +pleas 1 113 2.197225 2.197225 114 +browser 1 56 2.890372 2.890372 313 +continu 1 39 3.258097 3.258097 448 +oop 1 8 4.875197 4.875197 1778 +yuan 1 3 5.857933 5.857933 3653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..b6210944 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +sinc 1 90 2.397895 2.397895 159 +state 1 76 2.564949 2.564949 207 +servic 1 72 2.639057 2.639057 236 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +china 1 37 3.332205 3.332205 487 +travel 1 30 3.555348 3.555348 579 +postal 1 30 3.555348 3.555348 580 +unit 1 21 3.912023 3.912023 779 +beij 1 19 4.007333 4.007333 876 +beauti 1 18 4.060443 4.060443 912 +wife 1 13 4.382027 4.382027 1196 +tsinghua 1 13 4.382027 4.382027 1195 +stai 1 12 4.465908 4.465908 1215 +jersei 1 9 4.753590 4.753590 1587 +heavi 1 7 5.010635 5.010635 1841 +river 1 6 5.164786 5.164786 2220 +yong 1 4 5.568345 5.568345 2809 +rutger 1 3 5.857933 5.857933 3566 +brunswick 1 3 5.857933 5.857933 3567 +settl 1 2 6.263398 6.263398 5778 +homepageto 1 1 6.957497 6.957497 14633 +homepagey 1 1 6.957497 6.957497 14634 +milanitalian 1 1 6.957497 6.957497 14635 +soccerk 1 1 6.957497 6.957497 14636 +soccernba 1 1 6.957497 6.957497 14637 +sitefox 1 1 6.957497 6.957497 14638 +sportschicago 1 1 6.957497 6.957497 14639 +bullsmichael 1 1 6.957497 6.957497 14640 +jordannflnhlc 1 1 6.957497 6.957497 14641 +rankingmarri 1 1 6.957497 6.957497 14642 +childrenseinfeldcomput 1 1 6.957497 6.957497 14643 +sciencesutilitieshtml 1 1 6.957497 6.957497 14644 +convertersimag 1 1 6.957497 6.957497 14645 +collectionssystemshtmllatexcgitcl 1 1 6.957497 6.957497 14646 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 1 1 6.957497 6.957497 14647 +tmiscinternet 1 1 6.957497 6.957497 14648 +parcel 1 1 6.957497 6.957497 14649 +usp 1 1 6.957497 6.957497 14650 +fedexus 1 1 6.957497 6.957497 14651 +guidefun 1 1 6.957497 6.957497 14652 +todayu 1 1 6.957497 6.957497 14653 +newsstarwavesupermodel 1 1 6.957497 6.957497 14654 +yonglu 1 1 6.957497 6.957497 14655 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..3677b169 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +solut 1 82 2.484907 2.484907 162 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +level 1 87 2.484907 2.484907 180 +method 1 80 2.564949 2.564949 213 +david 1 71 2.639057 2.639057 232 +servic 1 72 2.639057 2.639057 236 +appli 1 71 2.639057 2.639057 226 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +involv 1 71 2.639057 2.639057 227 +differ 1 66 2.708050 2.708050 253 +degre 1 69 2.708050 2.708050 259 +previou 1 62 2.772589 2.772589 290 +share 1 59 2.833213 2.833213 304 +special 1 56 2.890372 2.890372 320 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +linear 1 41 3.218876 3.218876 431 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +press 1 42 3.218876 3.218876 419 +procedur 1 36 3.367296 3.367296 488 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +next 1 34 3.401197 3.401197 517 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +focus 1 29 3.583519 3.583519 584 +packag 1 28 3.610918 3.610918 614 +american 1 27 3.637586 3.637586 634 +supercomput 1 25 3.737670 3.737670 681 +fellow 1 24 3.761200 3.761200 701 +honor 1 23 3.806662 3.806662 729 +equat 1 23 3.806662 3.806662 724 +variabl 1 23 3.806662 3.806662 715 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +smith 1 20 3.951244 3.951244 820 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +young 1 16 4.174387 4.174387 991 +spars 1 16 4.174387 4.174387 989 +vector 1 16 4.174387 4.174387 961 +contribut 1 15 4.248495 4.248495 1021 +researchmi 1 14 4.317488 4.317488 1119 +finit 1 14 4.317488 4.317488 1106 +polynomi 1 14 4.317488 4.317488 1069 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +suitabl 1 9 4.753590 4.753590 1486 +carei 1 8 4.875197 4.875197 1781 +harvard 1 7 5.010635 5.010635 1926 +converg 1 7 5.010635 5.010635 1844 +solver 1 7 5.010635 5.010635 1911 +outstand 1 6 5.164786 5.164786 2136 +argonn 1 5 5.347108 5.347108 2461 +singapor 1 5 5.347108 5.347108 2487 +ration 1 5 5.347108 5.347108 2427 +minneapoli 1 5 5.347108 5.347108 2480 +naval 1 4 5.568345 5.568345 2920 +rapidli 1 4 5.568345 5.568345 2850 +crai 1 4 5.568345 5.568345 3012 +graham 1 4 5.568345 5.568345 2817 +wilei 1 4 5.568345 5.568345 2669 +truste 1 3 5.857933 5.857933 3900 +stationari 1 3 5.857933 5.857933 3861 +ofmathemat 1 2 6.263398 6.263398 4167 +interestnumer 1 2 6.263398 6.263398 5717 +methodsand 1 2 6.263398 6.263398 5779 +itpack 1 2 6.263398 6.263398 5619 +kincaid 1 2 6.263398 6.263398 5617 +rassia 1 2 6.263398 6.263398 5620 +omega 1 2 6.263398 6.263398 4368 +pde 1 2 6.263398 6.263398 4505 +youngashbel 1 1 6.957497 6.957497 14656 +webb 1 1 6.957497 6.957497 14657 +issueded 1 1 6.957497 6.957497 14658 +mathematicalsocieti 1 1 6.957497 6.957497 14659 +matrixappl 1 1 6.957497 6.957497 14660 +numericallinear 1 1 6.957497 6.957497 14661 +partialdifferenti 1 1 6.957497 6.957497 14662 +oflinear 1 1 6.957497 6.957497 14663 +andspars 1 1 6.957497 6.957497 14664 +basedon 1 1 6.957497 6.957497 14665 +beingextend 1 1 6.957497 6.957497 14666 +distributedmemori 1 1 6.957497 6.957497 14667 +methodsbas 1 1 6.957497 6.957497 14668 +multilevel 1 1 6.957497 6.957497 14669 +beingdevelop 1 1 6.957497 6.957497 14670 +publicationsd 1 1 6.957497 6.957497 14671 +srivasiava 1 1 6.957497 6.957497 14672 +yanushauska 1 1 6.957497 6.957497 14673 +publ 1 1 6.957497 6.957497 14674 +vona 1 1 6.957497 6.957497 14675 +sepehrnoori 1 1 6.957497 6.957497 14676 +son 1 1 6.957497 6.957497 14677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..cb09ada1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +comment 1 93 2.397895 2.397895 146 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +master 1 76 2.564949 2.564949 216 +html 1 75 2.639057 2.639057 235 +summari 1 73 2.639057 2.639057 237 +august 1 66 2.708050 2.708050 257 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +written 1 63 2.772589 2.772589 278 +organ 1 65 2.772589 2.772589 265 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +septemb 1 65 2.772589 2.772589 274 +thesi 1 57 2.890372 2.890372 327 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +visitor 1 49 3.044522 3.044522 371 +featur 1 46 3.091042 3.091042 386 +electron 1 47 3.091042 3.091042 379 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +press 1 42 3.218876 3.218876 419 +multipl 1 39 3.258097 3.258097 453 +map 1 39 3.258097 3.258097 452 +hand 1 37 3.332205 3.332205 475 +connect 1 37 3.332205 3.332205 485 +within 1 33 3.433987 3.433987 525 +ad 1 32 3.465736 3.465736 544 +extend 1 32 3.465736 3.465736 539 +photo 1 31 3.496508 3.496508 561 +neural 1 30 3.555348 3.555348 578 +synchron 1 29 3.583519 3.583519 588 +utc 1 27 3.637586 3.637586 629 +repres 1 26 3.688879 3.688879 656 +recognit 1 23 3.806662 3.806662 723 +self 1 22 3.850148 3.850148 761 +newsgroup 1 21 3.912023 3.912023 783 +event 1 18 4.060443 4.060443 896 +interconnect 1 17 4.110874 4.110874 937 +segment 1 17 4.110874 4.110874 931 +outlin 1 17 4.110874 4.110874 914 +cambridg 1 16 4.174387 4.174387 1008 +later 1 15 4.248495 4.248495 1043 +total 1 10 4.653960 4.653960 1398 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +isbn 1 7 5.010635 5.010635 1901 +bunch 1 7 5.010635 5.010635 1861 +joseph 1 5 5.347108 5.347108 2327 +korea 1 4 5.568345 5.568345 2971 +seoul 1 3 5.857933 5.857933 3783 +cortic 1 3 5.857933 5.857933 3857 +cortex 1 3 5.857933 5.857933 3856 +neuron 1 3 5.857933 5.857933 3798 +yoonsuck 1 2 6.263398 6.263398 4177 +choe 1 2 6.263398 6.263398 4178 +lissom 1 2 6.263398 6.263398 5605 +sirosh 1 2 6.263398 6.263398 5609 +touretzki 1 2 6.263398 6.263398 4428 +yschoe 1 2 6.263398 6.263398 4179 +yonsei 1 1 6.957497 6.957497 14679 +systembas 1 1 6.957497 6.957497 14680 +laterali 1 1 6.957497 6.957497 14681 +synerget 1 1 6.957497 6.957497 14682 +actualspik 1 1 6.957497 6.957497 14683 +spike 1 1 6.957497 6.957497 14678 +slissom 1 1 6.957497 6.957497 14684 +beself 1 1 6.957497 6.957497 14685 +retinabi 1 1 6.957497 6.957497 14686 +desynchron 1 1 6.957497 6.957497 14687 +mozer 1 1 6.957497 6.957497 14688 +hasselmo 1 1 6.957497 6.957497 14689 +handwritten 1 1 6.957497 6.957497 14690 +techic 1 1 6.957497 6.957497 14691 +unord 1 1 6.957497 6.957497 14692 +interestingcontact 1 1 6.957497 6.957497 14693 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..e6698cbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +object 1 138 1.945910 1.945910 79 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +mathemat 1 108 2.197225 2.197225 123 +select 1 91 2.397895 2.397895 154 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +interfac 1 79 2.564949 2.564949 209 +logic 1 71 2.639057 2.639057 230 +java 1 70 2.708050 2.708050 248 +previou 1 62 2.772589 2.772589 290 +prof 1 64 2.772589 2.772589 273 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +tabl 1 51 2.995732 2.995732 346 +china 1 37 3.332205 3.332205 487 +common 1 30 3.555348 3.555348 574 +chines 1 29 3.583519 3.583519 595 +client 1 25 3.737670 3.737670 679 +yahoo 1 24 3.761200 3.761200 707 +famili 1 23 3.806662 3.806662 735 +alumni 1 21 3.912023 3.912023 807 +wang 1 21 3.912023 3.912023 790 +chen 1 21 3.912023 3.912023 791 +break 1 20 3.951244 3.951244 812 +synthesi 1 20 3.951244 3.951244 834 +beij 1 19 4.007333 4.007333 876 +brown 1 16 4.174387 4.174387 977 +universityof 1 15 4.248495 4.248495 1061 +hong 1 14 4.317488 4.317488 1105 +nick 1 13 4.382027 4.382027 1180 +perl 1 11 4.553877 4.553877 1332 +engr 1 10 4.653960 4.653960 1427 +academi 1 8 4.875197 4.875197 1735 +yang 1 8 4.875197 4.875197 1652 +gatewai 1 7 5.010635 5.010635 1942 +munich 1 3 5.857933 5.857933 3570 +yuan 1 3 5.857933 5.857933 3653 +addresspictur 1 2 6.263398 6.263398 5584 +schedulec 1 2 6.263398 6.263398 4190 +gang 1 2 6.263398 6.263398 4530 +manchest 1 2 6.263398 6.263398 4828 +yuanj 1 1 6.957497 6.957497 14694 +xuwint 1 1 6.957497 6.957497 14695 +aziz 1 1 6.957497 6.957497 14696 +pagechines 1 1 6.957497 6.957497 14697 +hefei 1 1 6.957497 6.957497 14698 +chinaunivers 1 1 6.957497 6.957497 14699 +atmunich 1 1 6.957497 6.957497 14700 +germanyunivers 1 1 6.957497 6.957497 14701 +higham 1 1 6.957497 6.957497 14702 +lifan 1 1 6.957497 6.957497 14703 +guizhongustc 1 1 6.957497 6.957497 14704 +hailiang 1 1 6.957497 6.957497 14705 +yuhongfriend 1 1 6.957497 6.957497 14706 +linsoftwar 1 1 6.957497 6.957497 14707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..d3f0372c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +number 1 130 2.079442 2.079442 97 +sinc 1 90 2.397895 2.397895 159 +visitor 1 49 3.044522 3.044522 371 +ring 1 8 4.875197 4.875197 1684 +edufing 1 4 5.568345 5.568345 2713 +feng 1 3 5.857933 5.857933 3300 +yufeng 1 1 6.957497 6.957497 14708 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..aa4e99dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +spring 1 131 2.079442 2.079442 88 +pleas 1 113 2.197225 2.197225 114 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +peopl 1 96 2.302585 2.302585 132 +associ 1 93 2.397895 2.397895 151 +sinc 1 90 2.397895 2.397895 159 +wide 1 84 2.484907 2.484907 185 +info 1 85 2.484907 2.484907 176 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +creat 1 63 2.772589 2.772589 277 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +visitor 1 49 3.044522 3.044522 371 +china 1 37 3.332205 3.332205 487 +copyright 1 36 3.367296 3.367296 495 +john 1 33 3.433987 3.433987 532 +kind 1 32 3.465736 3.465736 541 +anim 1 31 3.496508 3.496508 557 +chines 1 29 3.583519 3.583519 595 +full 1 28 3.610918 3.610918 615 +thank 1 23 3.806662 3.806662 721 +chen 1 21 3.912023 3.912023 791 +els 1 19 4.007333 4.007333 843 +seek 1 17 4.110874 4.110874 954 +misc 1 13 4.382027 4.382027 1124 +calculu 1 12 4.465908 4.465908 1203 +counter 1 8 4.875197 4.875197 1765 +accord 1 7 5.010635 5.010635 1826 +republ 1 4 5.568345 5.568345 3032 +chenabout 1 2 6.263398 6.263398 5499 +postcript 1 2 6.263398 6.263398 4050 +zhongshan 1 2 6.263398 6.263398 5547 +zodiac 1 2 6.263398 6.263398 5729 +burton 1 2 6.263398 6.263398 5759 +zhii 1 1 6.957497 6.957497 14709 +pagezhii 1 1 6.957497 6.957497 14711 +mefrom 1 1 6.957497 6.957497 14712 +guangzhou 1 1 6.957497 6.957497 14710 +canton 1 1 6.957497 6.957497 14713 +dong 1 1 6.957497 6.957497 14714 +zchen 1 1 6.957497 6.957497 14715 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..0e4a5c02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +process 1 142 1.945910 1.945910 72 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +build 1 85 2.484907 2.484907 184 +onlin 1 75 2.639057 2.639057 223 +html 1 75 2.639057 2.639057 235 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +guid 1 63 2.772589 2.772589 267 +taylor 1 63 2.772589 2.772589 287 +room 1 59 2.833213 2.833213 301 +life 1 50 3.044522 3.044522 375 +directori 1 45 3.135494 3.135494 396 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +movi 1 40 3.258097 3.258097 459 +societi 1 40 3.258097 3.258097 456 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +daili 1 24 3.761200 3.761200 706 +magazin 1 24 3.761200 3.761200 704 +yahoo 1 24 3.761200 3.761200 707 +beij 1 19 4.007333 4.007333 876 +lyco 1 19 4.007333 4.007333 871 +beauti 1 18 4.060443 4.060443 912 +sept 1 17 4.110874 4.110874 952 +script 1 13 4.382027 4.382027 1171 +land 1 12 4.465908 4.465908 1273 +entertain 1 12 4.465908 4.465908 1286 +career 1 12 4.465908 4.465908 1287 +sciencesat 1 7 5.010635 5.010635 1968 +digest 1 7 5.010635 5.010635 1864 +zhou 1 6 5.164786 5.164786 2092 +assistantship 1 3 5.857933 5.857933 3660 +stamp 1 3 5.857933 5.857933 3678 +giant 1 3 5.857933 5.857933 3137 +galaxi 1 3 5.857933 5.857933 3603 +kaleidoscop 1 2 6.263398 6.263398 5780 +zhai 1 2 6.263398 6.263398 5709 +maggi 1 1 6.957497 6.957497 14717 +xiao 1 1 6.957497 6.957497 14718 +buaa 1 1 6.957497 6.957497 14719 +newspag 1 1 6.957497 6.957497 14720 +zhouxiao 1 1 6.957497 6.957497 14716 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..b3ea4425 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +number 1 130 2.079442 2.079442 97 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +know 1 80 2.564949 2.564949 198 +dynam 1 76 2.564949 2.564949 194 +meet 1 72 2.639057 2.639057 229 +html 1 75 2.639057 2.639057 235 +degre 1 69 2.708050 2.708050 259 +best 1 59 2.833213 2.833213 299 +maintain 1 51 2.995732 2.995732 342 +friend 1 48 3.044522 3.044522 376 +visitor 1 49 3.044522 3.044522 371 +netscap 1 44 3.135494 3.135494 395 +anoth 1 45 3.135494 3.135494 408 +live 1 40 3.258097 3.258097 451 +china 1 37 3.332205 3.332205 487 +titl 1 31 3.496508 3.496508 556 +travel 1 30 3.555348 3.555348 579 +enjoi 1 26 3.688879 3.688879 660 +inth 1 22 3.850148 3.850148 741 +born 1 21 3.912023 3.912023 798 +alumni 1 21 3.912023 3.912023 807 +beij 1 19 4.007333 4.007333 876 +citi 1 19 4.007333 4.007333 874 +lot 1 18 4.060443 4.060443 889 +bachelor 1 17 4.110874 4.110874 957 +former 1 17 4.110874 4.110874 956 +danc 1 12 4.465908 4.465908 1278 +classmat 1 9 4.753590 4.753590 1516 +capit 1 7 5.010635 5.010635 1957 +peek 1 6 5.164786 5.164786 2169 +peke 1 5 5.347108 5.347108 2539 +valuabl 1 5 5.347108 5.347108 2256 +qing 1 3 5.857933 5.857933 3295 +ofmi 1 3 5.857933 5.857933 3911 +sciencesaustin 1 3 5.857933 5.857933 3828 +oversea 1 2 6.263398 6.263398 5781 +informationand 1 2 6.263398 6.263398 4840 +qinghi 1 1 6.957497 6.957497 14722 +scinc 1 1 6.957497 6.957497 14723 +findmor 1 1 6.957497 6.957497 14724 +pekingunivers 1 1 6.957497 6.957497 14725 +professionalinternetpc 1 1 6.957497 6.957497 14726 +relatedmac 1 1 6.957497 6.957497 14727 +relatedaft 1 1 6.957497 6.957497 14728 +worknetscap 1 1 6.957497 6.957497 14729 +testtwin 1 1 6.957497 6.957497 14730 +eldertwin 1 1 6.957497 6.957497 14731 +youngernetscap 1 1 6.957497 6.957497 14732 +testanim 1 1 6.957497 6.957497 14733 +titledanc 1 1 6.957497 6.957497 14734 +testanoth 1 1 6.957497 6.957497 14735 +testyet 1 1 6.957497 6.957497 14736 +testfriendsthi 1 1 6.957497 6.957497 14737 +xiaohai 1 1 6.957497 6.957497 14738 +shan 1 1 6.957497 6.957497 14739 +shinan 1 1 6.957497 6.957497 14740 +clike 1 1 6.957497 6.957497 14721 +qingunivers 1 1 6.957497 6.957497 14741 +zhuqe 1 1 6.957497 6.957497 14742 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..d561b696 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +technolog 1 131 2.079442 2.079442 102 +seattl 1 120 2.079442 2.079442 103 +intern 1 108 2.197225 2.197225 128 +peopl 1 96 2.302585 2.302585 132 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +refer 1 78 2.564949 2.564949 203 +nation 1 74 2.639057 2.639057 240 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +laboratori 1 63 2.772589 2.772589 292 +faculti 1 56 2.890372 2.890372 325 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +visitor 1 49 3.044522 3.044522 371 +live 1 40 3.258097 3.258097 451 +staff 1 36 3.367296 3.367296 490 +autumn 1 31 3.496508 3.496508 558 +univ 1 28 3.610918 3.610918 617 +progress 1 28 3.610918 3.610918 598 +handl 1 24 3.761200 3.761200 685 +half 1 21 3.912023 3.912023 776 +voic 1 21 3.912023 3.912023 806 +region 1 19 4.007333 4.007333 875 +intel 1 16 4.174387 4.174387 1000 +webmast 1 15 4.248495 4.248495 1045 +desktop 1 10 4.653960 4.653960 1445 +perspect 1 10 4.653960 4.653960 1437 +deadlin 1 9 4.753590 4.753590 1502 +pacif 1 8 4.875197 4.875197 1674 +centuri 1 7 5.010635 5.010635 1935 +elsewher 1 5 5.347108 5.347108 2444 +highlight 1 5 5.347108 5.347108 2340 +medal 1 3 5.857933 5.857933 3912 +theimpact 1 3 5.857933 5.857933 3179 +exponenti 1 3 5.857933 5.857933 3529 +organizationsinclud 1 1 6.957497 6.957497 14743 +ouraffili 1 1 6.957497 6.957497 14744 +regioninclud 1 1 6.957497 6.957497 14745 +spotlightuwwin 1 1 6.957497 6.957497 14746 +programmingcontesttwovideo 1 1 6.957497 6.957497 14747 +initiativesourcolloquia 1 1 6.957497 6.957497 14748 +mbonemajordon 1 1 6.957497 6.957497 14749 +corporationdickkarp 1 1 6.957497 6.957497 14750 +scienceprofessionalmast 1 1 6.957497 6.957497 14751 +departmentoverview 1 1 6.957497 6.957497 14752 +staffposit 1 1 6.957497 6.957497 14753 +newscan 1 1 6.957497 6.957497 14754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..d0a7886a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +proceed 1 93 2.397895 2.397895 152 +imag 1 91 2.397895 2.397895 161 +appear 1 78 2.564949 2.564949 210 +effici 1 73 2.639057 2.639057 233 +unix 1 58 2.890372 2.890372 308 +special 1 56 2.890372 2.890372 320 +format 1 48 3.044522 3.044522 356 +fast 1 42 3.218876 3.218876 429 +multipl 1 39 3.258097 3.258097 453 +winter 1 36 3.367296 3.367296 500 +approxim 1 35 3.401197 3.401197 509 +measur 1 28 3.610918 3.610918 609 +retriev 1 27 3.637586 3.637586 621 +bookmark 1 26 3.688879 3.688879 639 +miscellan 1 23 3.806662 3.806662 731 +andrew 1 19 4.007333 4.007333 849 +beauti 1 18 4.060443 4.060443 912 +protect 1 17 4.110874 4.110874 935 +match 1 16 4.174387 4.174387 965 +wife 1 13 4.382027 4.382027 1196 +usenix 1 12 4.465908 4.465908 1240 +linda 1 10 4.653960 4.653960 1394 +distanc 1 9 4.753590 4.753590 1500 +erik 1 8 4.875197 4.875197 1701 +shapiro 1 8 4.875197 4.875197 1686 +daughter 1 7 5.010635 5.010635 1943 +spie 1 6 5.164786 5.164786 2119 +educomput 1 5 5.347108 5.347108 2524 +selberg 1 5 5.347108 5.347108 2441 +stupid 1 5 5.347108 5.347108 2489 +aberman 1 2 6.263398 6.263398 4429 +bourassa 1 2 6.263398 6.263398 5782 +virgil 1 2 6.263398 6.263398 5783 +melani 1 2 6.263398 6.263398 5784 +berman 1 1 6.957497 6.957497 14755 +pageandrew 1 1 6.957497 6.957497 14757 +tron 1 1 6.957497 6.957497 14758 +bothpostscript 1 1 6.957497 6.957497 14759 +andhtml 1 1 6.957497 6.957497 14760 +debbi 1 1 6.957497 6.957497 14756 +poison 1 1 6.957497 6.957497 14761 +donut 1 1 6.957497 6.957497 14762 +stupidmi 1 1 6.957497 6.957497 14763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..6e5a672c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +good 1 77 2.564949 2.564949 200 +view 1 70 2.708050 2.708050 254 +plan 1 65 2.772589 2.772589 272 +visit 1 63 2.772589 2.772589 288 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +undergradu 1 54 2.944439 2.944439 338 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +physic 1 47 3.091042 3.091042 377 +video 1 44 3.135494 3.135494 405 +made 1 44 3.135494 3.135494 398 +live 1 40 3.258097 3.258097 451 +join 1 39 3.258097 3.258097 457 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +someth 1 31 3.496508 3.496508 554 +photo 1 31 3.496508 3.496508 561 +taken 1 31 3.496508 3.496508 555 +pass 1 28 3.610918 3.610918 611 +team 1 27 3.637586 3.637586 625 +great 1 27 3.637586 3.637586 626 +doctor 1 24 3.761200 3.761200 709 +finish 1 22 3.850148 3.850148 748 +hous 1 21 3.912023 3.912023 801 +wrote 1 20 3.951244 3.951244 830 +excel 1 19 4.007333 4.007333 868 +adam 1 17 4.110874 4.110874 934 +ultim 1 17 4.110874 4.110874 943 +earli 1 16 4.174387 4.174387 968 +stock 1 16 4.174387 4.174387 1007 +across 1 16 4.174387 4.174387 974 +bodi 1 13 4.382027 4.382027 1178 +menu 1 13 4.382027 4.382027 1156 +night 1 11 4.553877 4.553877 1319 +thecomput 1 10 4.653960 4.653960 1408 +drink 1 9 4.753590 4.753590 1607 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +trade 1 7 5.010635 5.010635 1815 +occasion 1 7 5.010635 5.010635 1905 +coffe 1 5 5.347108 5.347108 2556 +hair 1 5 5.347108 5.347108 2446 +formerli 1 5 5.347108 5.347108 2397 +multiresolut 1 5 5.347108 5.347108 2423 +frisbe 1 5 5.347108 5.347108 2560 +glass 1 3 5.857933 5.857933 3759 +swarthmor 1 2 6.263398 6.263398 5621 +comet 1 2 6.263398 6.263398 5785 +sculptur 1 2 6.263398 6.263398 5176 +mona 1 2 6.263398 6.263398 5786 +gothic 1 2 6.263398 6.263398 5787 +finkelstein 1 1 6.957497 6.957497 14764 +cup 1 1 6.957497 6.957497 14766 +limp 1 1 6.957497 6.957497 14767 +andlack 1 1 6.957497 6.957497 14768 +atprinceton 1 1 6.957497 6.957497 14769 +tibco 1 1 6.957497 6.957497 14770 +teknekron 1 1 6.957497 6.957497 14771 +alarg 1 1 6.957497 6.957497 14772 +calledumatata 1 1 6.957497 6.957497 14773 +photocopi 1 1 6.957497 6.957497 14765 +thehilari 1 1 6.957497 6.957497 14774 +caff 1 1 6.957497 6.957497 14775 +lardo 1 1 6.957497 6.957497 14776 +chilli 1 1 6.957497 6.957497 14777 +snoqualmi 1 1 6.957497 6.957497 14778 +hyakutak 1 1 6.957497 6.957497 14779 +marcu 1 1 6.957497 6.957497 14780 +dither 1 1 6.957497 6.957497 14781 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..33165009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +need 1 98 2.302585 2.302585 135 +present 1 91 2.397895 2.397895 145 +proceed 1 93 2.397895 2.397895 152 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +environ 1 84 2.484907 2.484907 177 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +interfac 1 79 2.564949 2.564949 209 +method 1 80 2.564949 2.564949 213 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +involv 1 71 2.639057 2.639057 227 +august 1 66 2.708050 2.708050 257 +laboratori 1 63 2.772589 2.772589 292 +improv 1 62 2.772589 2.772589 289 +experi 1 64 2.772589 2.772589 283 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +found 1 53 2.944439 2.944439 337 +visual 1 48 3.044522 3.044522 372 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +possibl 1 47 3.091042 3.091042 378 +describ 1 45 3.135494 3.135494 400 +show 1 43 3.178054 3.178054 417 +vision 1 41 3.218876 3.218876 430 +fast 1 42 3.218876 3.218876 429 +cost 1 37 3.332205 3.332205 480 +jame 1 35 3.401197 3.401197 507 +post 1 35 3.401197 3.401197 505 +queri 1 33 3.433987 3.433987 524 +scientist 1 31 3.496508 3.496508 560 +load 1 28 3.610918 3.610918 601 +intend 1 28 3.610918 3.610918 599 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +handl 1 24 3.761200 3.761200 685 +highli 1 23 3.806662 3.806662 725 +displai 1 23 3.806662 3.806662 712 +ofwashington 1 22 3.850148 3.850148 766 +avoid 1 21 3.912023 3.912023 799 +facil 1 20 3.951244 3.951244 814 +toolkit 1 20 3.951244 3.951244 835 +longer 1 20 3.951244 3.951244 816 +aid 1 18 4.060443 4.060443 904 +steven 1 17 4.110874 4.110874 953 +render 1 17 4.110874 4.110874 947 +massiv 1 15 4.248495 4.248495 1026 +balanc 1 14 4.317488 4.317488 1112 +save 1 14 4.317488 4.317488 1099 +charl 1 13 4.382027 4.382027 1149 +directli 1 13 4.382027 4.382027 1141 +promot 1 12 4.465908 4.465908 1235 +target 1 12 4.465908 4.465908 1282 +amount 1 12 4.465908 4.465908 1208 +typic 1 11 4.553877 4.553877 1360 +motiv 1 11 4.553877 4.553877 1346 +extrem 1 11 4.553877 4.553877 1330 +mesh 1 11 4.553877 4.553877 1351 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +occur 1 9 4.753590 4.753590 1572 +factor 1 9 4.753590 4.753590 1544 +frank 1 9 4.753590 4.753590 1568 +transmiss 1 9 4.753590 4.753590 1588 +shapiro 1 8 4.875197 4.875197 1686 +lewi 1 8 4.875197 4.875197 1700 +unifi 1 8 4.875197 4.875197 1774 +polygon 1 8 4.875197 4.875197 1723 +dataset 1 7 5.010635 5.010635 1914 +shade 1 7 5.010635 5.010635 1881 +huge 1 6 5.164786 5.164786 1991 +unnecessari 1 5 5.347108 5.347108 2506 +ortega 1 5 5.347108 5.347108 2559 +icpp 1 5 5.347108 5.347108 2382 +loss 1 3 5.857933 5.857933 3805 +jakobovit 1 3 5.857933 5.857933 3913 +lara 1 3 5.857933 5.857933 3914 +ahren 1 2 6.263398 6.263398 5788 +alamo 1 2 6.263398 6.263398 4243 +hansen 1 2 6.263398 6.263398 4301 +degrad 1 2 6.263398 6.263398 5362 +redistribut 1 2 6.263398 6.263398 5582 +brinklei 1 2 6.263398 6.263398 5789 +notebook 1 2 6.263398 6.263398 5790 +arbitrarili 1 2 6.263398 6.263398 5791 +onunbalanc 1 1 6.957497 6.957497 14782 +whichperform 1 1 6.957497 6.957497 14783 +outweighth 1 1 6.957497 6.957497 14784 +polygonrender 1 1 6.957497 6.957497 14785 +percent 1 1 6.957497 6.957497 14786 +onbalanc 1 1 6.957497 6.957497 14787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..2154801c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +level 1 87 2.484907 2.484907 180 +interfac 1 79 2.564949 2.564949 209 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +differ 1 66 2.708050 2.708050 253 +interact 1 62 2.772589 2.772589 270 +laboratori 1 63 2.772589 2.772589 292 +experi 1 64 2.772589 2.772589 283 +organ 1 65 2.772589 2.772589 265 +share 1 59 2.833213 2.833213 304 +overview 1 56 2.890372 2.890372 323 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +investig 1 51 2.995732 2.995732 353 +visual 1 48 3.044522 3.044522 372 +electron 1 47 3.091042 3.091042 379 +describ 1 45 3.135494 3.135494 400 +vision 1 41 3.218876 3.218876 430 +multi 1 36 3.367296 3.367296 493 +jame 1 35 3.401197 3.401197 507 +queri 1 33 3.433987 3.433987 524 +experiment 1 26 3.688879 3.688879 645 +store 1 24 3.761200 3.761200 693 +highli 1 23 3.806662 3.806662 725 +among 1 21 3.912023 3.912023 781 +definit 1 19 4.007333 4.007333 864 +aid 1 18 4.060443 4.060443 904 +steven 1 17 4.110874 4.110874 953 +hierarch 1 15 4.248495 4.248495 1018 +attribut 1 14 4.317488 4.317488 1092 +motiv 1 11 4.553877 4.553877 1346 +princip 1 10 4.653960 4.653960 1397 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +shapiro 1 8 4.875197 4.875197 1686 +lewi 1 8 4.875197 4.875197 1700 +databasesystem 1 8 4.875197 4.875197 1617 +schema 1 6 5.164786 5.164786 1988 +spie 1 6 5.164786 5.164786 2119 +scienceand 1 5 5.347108 5.347108 2348 +entiti 1 3 5.857933 5.857933 3096 +jakobovit 1 3 5.857933 5.857933 3913 +lara 1 3 5.857933 5.857933 3914 +notebook 1 2 6.263398 6.263398 5790 +databaseenviron 1 2 6.263398 6.263398 5792 +datastructur 1 2 6.263398 6.263398 4685 +brinklei 1 2 6.263398 6.263398 5789 +ahren 1 2 6.263398 6.263398 5788 +wasdesign 1 1 6.957497 6.957497 14789 +andintend 1 1 6.957497 6.957497 14790 +unifieddata 1 1 6.957497 6.957497 14791 +queryfacil 1 1 6.957497 6.957497 14792 +andpromot 1 1 6.957497 6.957497 14793 +devr 1 1 6.957497 6.957497 14788 +ofproperti 1 1 6.957497 6.957497 14794 +thepart 1 1 6.957497 6.957497 14795 +buildinst 1 1 6.957497 6.957497 14796 +inmodel 1 1 6.957497 6.957497 14797 +secondcad 1 1 6.957497 6.957497 14798 +flexibledata 1 1 6.957497 6.957497 14799 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..97fdac3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +environ 1 84 2.484907 2.484907 177 +educ 1 86 2.484907 2.484907 191 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +nation 1 74 2.639057 2.639057 240 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +allow 1 53 2.944439 2.944439 333 +undergradu 1 54 2.944439 2.944439 338 +still 1 50 3.044522 3.044522 362 +principl 1 48 3.044522 3.044522 357 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +math 1 44 3.135494 3.135494 402 +past 1 42 3.218876 3.218876 428 +seminar 1 38 3.295837 3.295837 470 +autumn 1 31 3.496508 3.496508 558 +great 1 27 3.637586 3.637586 626 +william 1 22 3.850148 3.850148 765 +programminglanguag 1 21 3.912023 3.912023 782 +spend 1 19 4.007333 4.007333 850 +scott 1 18 4.060443 4.060443 884 +ultim 1 17 4.110874 4.110874 943 +women 1 16 4.174387 4.174387 1004 +front 1 13 4.382027 4.382027 1154 +automata 1 13 4.382027 4.382027 1135 +opportun 1 13 4.382027 4.382027 1161 +undergrad 1 9 4.753590 4.753590 1589 +christian 1 7 5.010635 5.010635 1949 +peterson 1 7 5.010635 5.010635 1850 +park 1 6 5.164786 5.164786 2218 +frisbe 1 5 5.347108 5.347108 2560 +outdoor 1 5 5.347108 5.347108 2514 +mentor 1 4 5.568345 5.568345 2591 +pile 1 2 6.263398 6.263398 5371 +blast 1 2 6.263398 6.263398 5172 +femal 1 2 6.263398 6.263398 4672 +pagelast 1 2 6.263398 6.263398 5793 +bernheim 1 1 6.957497 6.957497 14801 +ofdigit 1 1 6.957497 6.957497 14802 +gorp 1 1 6.957497 6.957497 14803 +guideto 1 1 6.957497 6.957497 14804 +recreationfun 1 1 6.957497 6.957497 14805 +abig 1 1 6.957497 6.957497 14806 +scoobi 1 1 6.957497 6.957497 14807 +dooeduc 1 1 6.957497 6.957497 14808 +mentorship 1 1 6.957497 6.957497 14800 +summerwork 1 1 6.957497 6.957497 14809 +highlyrecommend 1 1 6.957497 6.957497 14810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..801961c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +postscript 1 131 2.079442 2.079442 90 +teach 1 108 2.197225 2.197225 112 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +write 1 72 2.639057 2.639057 222 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +explor 1 58 2.890372 2.890372 324 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +found 1 53 2.944439 2.944439 337 +particular 1 51 2.995732 2.995732 352 +better 1 45 3.135494 3.135494 401 +might 1 41 3.218876 3.218876 426 +error 1 40 3.258097 3.258097 449 +tree 1 36 3.367296 3.367296 492 +obtain 1 33 3.433987 3.433987 534 +anim 1 31 3.496508 3.496508 557 +scientist 1 31 3.496508 3.496508 560 +often 1 31 3.496508 3.496508 551 +built 1 29 3.583519 3.583519 592 +consid 1 29 3.583519 3.583519 590 +wai 1 25 3.737670 3.737670 662 +compress 1 23 3.806662 3.806662 719 +proof 1 23 3.806662 3.806662 720 +theorem 1 21 3.912023 3.912023 786 +avoid 1 21 3.912023 3.912023 799 +binari 1 20 3.951244 3.951244 823 +wrote 1 20 3.951244 3.951244 830 +seem 1 18 4.060443 4.060443 899 +whether 1 17 4.110874 4.110874 918 +universityof 1 15 4.248495 4.248495 1061 +hierarch 1 15 4.248495 4.248495 1018 +style 1 15 4.248495 4.248495 1036 +balanc 1 14 4.317488 4.317488 1112 +believ 1 13 4.382027 4.382027 1187 +unfortun 1 13 4.382027 4.382027 1170 +care 1 13 4.382027 4.382027 1177 +scienceat 1 11 4.553877 4.553877 1375 +motiv 1 11 4.553877 4.553877 1346 +ring 1 8 4.875197 4.875197 1684 +told 1 8 4.875197 4.875197 1658 +refere 1 7 5.010635 5.010635 1895 +toronto 1 6 5.164786 5.164786 2156 +meant 1 6 5.164786 5.164786 2055 +fewer 1 6 5.164786 5.164786 2074 +mistak 1 6 5.164786 5.164786 2110 +broadcast 1 5 5.347108 5.347108 2453 +shift 1 5 5.347108 5.347108 2357 +tend 1 4 5.568345 5.568345 3041 +conserv 1 4 5.568345 5.568345 2870 +will 1 4 5.568345 5.568345 2782 +glad 1 4 5.568345 5.568345 2657 +fear 1 4 5.568345 5.568345 2911 +reveal 1 4 5.568345 5.568345 2647 +complic 1 4 5.568345 5.568345 2902 +amir 1 3 5.857933 5.857933 3850 +hereto 1 3 5.857933 5.857933 3476 +quotat 1 3 5.857933 5.857933 3121 +theywil 1 3 5.857933 5.857933 3102 +incorrect 1 3 5.857933 5.857933 3134 +caught 1 3 5.857933 5.857933 3465 +obvious 1 3 5.857933 5.857933 3474 +hoar 1 3 5.857933 5.857933 3875 +mathematician 1 2 6.263398 6.263398 5189 +persuad 1 2 6.263398 6.263398 5384 +defici 1 2 6.263398 6.263398 5054 +obviou 1 2 6.263398 6.263398 5367 +michail 1 1 6.957497 6.957497 14811 +michailgradu 1 1 6.957497 6.957497 14812 +studenti 1 1 6.957497 6.957497 14813 +followingarea 1 1 6.957497 6.957497 14814 +summationfor 1 1 6.957497 6.957497 14815 +opsi 1 1 6.957497 6.957497 14816 +appletdesign 1 1 6.957497 6.957497 14817 +combinesprogram 1 1 6.957497 6.957497 14818 +lunar 1 1 6.957497 6.957497 14819 +lander 1 1 6.957497 6.957497 14820 +gamethat 1 1 6.957497 6.957497 14821 +unwillingto 1 1 6.957497 6.957497 14822 +embarrass 1 1 6.957497 6.957497 14823 +publishedincorrect 1 1 6.957497 6.957497 14824 +unconvent 1 1 6.957497 6.957497 14825 +proofstyl 1 1 6.957497 6.957497 14826 +theyhav 1 1 6.957497 6.957497 14827 +wasnot 1 1 6.957497 6.957497 14828 +stylethat 1 1 6.957497 6.957497 14829 +lesli 1 1 6.957497 6.957497 14830 +lamport 1 1 6.957497 6.957497 14831 +wayi 1 1 6.957497 6.957497 14832 +theother 1 1 6.957497 6.957497 14833 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..af018a9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +resum 1 79 2.564949 2.564949 217 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +visit 1 63 2.772589 2.772589 288 +evalu 1 64 2.772589 2.772589 266 +colleg 1 61 2.833213 2.833213 300 +scientif 1 53 2.944439 2.944439 341 +join 1 39 3.258097 3.258097 457 +award 1 34 3.401197 3.401197 523 +india 1 32 3.465736 3.465736 550 +richard 1 31 3.496508 3.496508 559 +travel 1 30 3.555348 3.555348 579 +progress 1 28 3.610918 3.610918 598 +spent 1 25 3.737670 3.737670 676 +indian 1 22 3.850148 3.850148 769 +geometri 1 22 3.850148 3.850148 752 +anderson 1 19 4.007333 4.007333 860 +postdoc 1 8 4.875197 4.875197 1724 +presidenti 1 8 4.875197 4.875197 1737 +qualifi 1 8 4.875197 4.875197 1721 +tourist 1 8 4.875197 4.875197 1710 +implementationof 1 7 5.010635 5.010635 1813 +reed 1 6 5.164786 5.164786 2086 +inmathemat 1 2 6.263398 6.263398 5098 +younginvestig 1 2 6.263398 6.263398 5794 +bangalor 1 2 6.263398 6.263398 5110 +theindian 1 2 6.263398 6.263398 5795 +stanfordin 1 1 6.957497 6.957497 14834 +aon 1 1 6.957497 6.957497 14835 +inberkelei 1 1 6.957497 6.957497 14836 +yeara 1 1 6.957497 6.957497 14837 +talksanderson 1 1 6.957497 6.957497 14838 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..dd9afa0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +academ 1 82 2.484907 2.484907 178 +thing 1 84 2.484907 2.484907 189 +librari 1 87 2.484907 2.484907 181 +educ 1 86 2.484907 2.484907 191 +intellig 1 72 2.639057 2.639057 225 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +receiv 1 66 2.708050 2.708050 244 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +content 1 59 2.833213 2.833213 302 +locat 1 59 2.833213 2.833213 303 +investig 1 51 2.995732 2.995732 353 +life 1 50 3.044522 3.044522 375 +made 1 44 3.135494 3.135494 398 +show 1 43 3.178054 3.178054 417 +music 1 42 3.218876 3.218876 436 +combin 1 42 3.218876 3.218876 421 +theoret 1 39 3.258097 3.258097 446 +vita 1 38 3.295837 3.295837 473 +mean 1 37 3.332205 3.332205 477 +purpos 1 37 3.332205 3.332205 481 +soon 1 36 3.367296 3.367296 494 +represent 1 35 3.401197 3.401197 512 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +travel 1 30 3.555348 3.555348 579 +art 1 29 3.583519 3.583519 593 +decis 1 23 3.806662 3.806662 728 +honor 1 23 3.806662 3.806662 729 +finish 1 22 3.850148 3.850148 748 +ofwashington 1 22 3.850148 3.850148 766 +born 1 21 3.912023 3.912023 798 +mostli 1 19 4.007333 4.007333 869 +histori 1 19 4.007333 4.007333 853 +listen 1 18 4.060443 4.060443 907 +thought 1 17 4.110874 4.110874 945 +brother 1 13 4.382027 4.382027 1189 +employ 1 12 4.465908 4.465908 1291 +qualit 1 11 4.553877 4.553877 1362 +probabilist 1 11 4.553877 4.553877 1343 +literatur 1 11 4.553877 4.553877 1300 +creativ 1 8 4.875197 4.875197 1777 +switch 1 8 4.875197 4.875197 1718 +brought 1 7 5.010635 5.010635 1925 +gave 1 7 5.010635 5.010635 1922 +foreign 1 7 5.010635 5.010635 1919 +reconstruct 1 6 5.164786 5.164786 2170 +blue 1 6 5.164786 5.164786 2227 +mother 1 6 5.164786 5.164786 2083 +markov 1 5 5.347108 5.347108 2280 +jazz 1 5 5.347108 5.347108 2527 +paint 1 5 5.347108 5.347108 2400 +andengin 1 4 5.568345 5.568345 3042 +revisit 1 3 5.857933 5.857933 3915 +ofmi 1 3 5.857933 5.857933 3911 +father 1 3 5.857933 5.857933 3757 +birth 1 3 5.857933 5.857933 3594 +affair 1 3 5.857933 5.857933 3916 +anhai 1 2 6.263398 6.263398 4404 +doan 1 2 6.263398 6.263398 4405 +andscienc 1 2 6.263398 6.263398 5796 +milwauke 1 2 6.263398 6.263398 5797 +amcurr 1 2 6.263398 6.263398 5798 +vietnames 1 2 6.263398 6.263398 5593 +homepageanhai 1 1 6.957497 6.957497 14842 +vietnam 1 1 6.957497 6.957497 14843 +iwent 1 1 6.957497 6.957497 14844 +hungari 1 1 6.957497 6.957497 14840 +kossuth 1 1 6.957497 6.957497 14845 +lajo 1 1 6.957497 6.957497 14846 +debrecen 1 1 6.957497 6.957497 14847 +underuncertainti 1 1 6.957497 6.957497 14848 +calm 1 1 6.957497 6.957497 14849 +invietnames 1 1 6.957497 6.957497 14850 +syllabl 1 1 6.957497 6.957497 14839 +birthplac 1 1 6.957497 6.957497 14841 +nghean 1 1 6.957497 6.957497 14851 +haiphong 1 1 6.957497 6.957497 14852 +folkswer 1 1 6.957497 6.957497 14853 +younger 1 1 6.957497 6.957497 14854 +theysimpli 1 1 6.957497 6.957497 14855 +namehaian 1 1 6.957497 6.957497 14856 +comtemporari 1 1 6.957497 6.957497 14857 +snapshotsanhai 1 1 6.957497 6.957497 14858 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..ac1f4ede --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +spring 1 131 2.079442 2.079442 88 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +place 1 106 2.197225 2.197225 124 +pictur 1 89 2.397895 2.397895 160 +follow 1 92 2.397895 2.397895 143 +graphic 1 90 2.397895 2.397895 147 +control 1 82 2.484907 2.484907 164 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +workshop 1 71 2.639057 2.639057 239 +sieg 1 69 2.708050 2.708050 260 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +automat 1 61 2.833213 2.833213 306 +februari 1 54 2.944439 2.944439 328 +without 1 50 3.044522 3.044522 370 +quarter 1 47 3.091042 3.091042 389 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +better 1 45 3.135494 3.135494 401 +show 1 43 3.178054 3.178054 417 +fast 1 42 3.218876 3.218876 429 +small 1 39 3.258097 3.258097 447 +origin 1 38 3.295837 3.295837 472 +slide 1 38 3.295837 3.295837 467 +seminar 1 38 3.295837 3.295837 470 +short 1 36 3.367296 3.367296 499 +anim 1 31 3.496508 3.496508 557 +autumn 1 31 3.496508 3.496508 558 +hope 1 28 3.610918 3.610918 610 +univ 1 28 3.610918 3.610918 617 +valu 1 25 3.737670 3.737670 665 +motion 1 24 3.761200 3.761200 699 +magazin 1 24 3.761200 3.761200 704 +compress 1 23 3.806662 3.806662 719 +sequenc 1 23 3.806662 3.806662 734 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +mpeg 1 20 3.951244 3.951244 831 +synthesi 1 20 3.951244 3.951244 834 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +figur 1 18 4.060443 4.060443 903 +speed 1 18 4.060443 4.060443 911 +brown 1 16 4.174387 4.174387 977 +took 1 16 4.174387 4.174387 1010 +qual 1 15 4.248495 4.248495 1062 +goe 1 15 4.248495 4.248495 1044 +piec 1 15 4.248495 4.248495 1020 +consider 1 14 4.317488 4.317488 1076 +benjamin 1 11 4.553877 4.553877 1296 +decomposit 1 10 4.653960 4.653960 1439 +jump 1 9 4.753590 4.753590 1603 +joel 1 8 4.875197 4.875197 1698 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +pldi 1 8 4.875197 4.875197 1704 +switch 1 8 4.875197 4.875197 1718 +dispatch 1 7 5.010635 5.010635 1791 +gave 1 7 5.010635 5.010635 1922 +mock 1 6 5.164786 5.164786 2087 +engineeringat 1 5 5.347108 5.347108 2561 +philipos 1 5 5.347108 5.347108 2373 +andp 1 4 5.568345 5.568345 2811 +pardyak 1 4 5.568345 5.568345 3043 +doubl 1 4 5.568345 5.568345 2951 +ausland 1 3 5.857933 5.857933 3917 +super 1 3 5.857933 5.857933 3918 +singular 1 3 5.857933 5.857933 3366 +multiflow 1 2 6.263398 6.263398 4473 +articul 1 2 6.263398 6.263398 5799 +acmtransact 1 2 6.263398 6.263398 4310 +wilkerson 1 2 6.263398 6.263398 4516 +mywork 1 2 6.263398 6.263398 5800 +orang 1 2 6.263398 6.263398 5163 +onit 1 1 6.957497 6.957497 14859 +andb 1 1 6.957497 6.957497 14860 +inextens 1 1 6.957497 6.957497 14861 +compilersupport 1 1 6.957497 6.957497 14862 +synthesisfor 1 1 6.957497 6.957497 14863 +fukunaga 1 1 6.957497 6.957497 14864 +partovi 1 1 6.957497 6.957497 14865 +christensen 1 1 6.957497 6.957497 14866 +reiss 1 1 6.957497 6.957497 14867 +shuman 1 1 6.957497 6.957497 14868 +leapfrog 1 1 6.957497 6.957497 14869 +lossili 1 1 6.957497 6.957497 14870 +animationthat 1 1 6.957497 6.957497 14871 +cartwheel 1 1 6.957497 6.957497 14872 +andshuffl 1 1 6.957497 6.957497 14873 +andcollaps 1 1 6.957497 6.957497 14874 +isjust 1 1 6.957497 6.957497 14875 +tosmooth 1 1 6.957497 6.957497 14876 +thetalk 1 1 6.957497 6.957497 14877 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..7e46da4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +number 1 130 2.079442 2.079442 97 +site 1 106 2.197225 2.197225 119 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +second 1 81 2.484907 2.484907 166 +institut 1 84 2.484907 2.484907 187 +info 1 85 2.484907 2.484907 176 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +refer 1 78 2.564949 2.564949 203 +complet 1 77 2.564949 2.564949 208 +name 1 72 2.639057 2.639057 220 +onlin 1 75 2.639057 2.639057 223 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +test 1 66 2.708050 2.708050 252 +visit 1 63 2.772589 2.772589 288 +dept 1 64 2.772589 2.772589 291 +locat 1 59 2.833213 2.833213 303 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +favorit 1 44 3.135494 3.135494 410 +edit 1 42 3.218876 3.218876 418 +past 1 42 3.218876 3.218876 428 +brian 1 38 3.295837 3.295837 466 +origin 1 38 3.295837 3.295837 472 +random 1 34 3.401197 3.401197 511 +idea 1 32 3.465736 3.465736 545 +quot 1 29 3.583519 3.583519 582 +actual 1 28 3.610918 3.610918 604 +stop 1 17 4.110874 4.110874 942 +alreadi 1 16 4.174387 4.174387 963 +doesn 1 15 4.248495 4.248495 1055 +hotlist 1 13 4.382027 4.382027 1199 +touch 1 12 4.465908 4.465908 1288 +brad 1 12 4.465908 4.465908 1264 +artist 1 6 5.164786 5.164786 2127 +band 1 6 5.164786 5.164786 2198 +girl 1 5 5.347108 5.347108 2410 +worst 1 5 5.347108 5.347108 2287 +poem 1 5 5.347108 5.347108 2483 +guestbook 1 5 5.347108 5.347108 2475 +snapshot 1 5 5.347108 5.347108 2303 +washingtonseattl 1 4 5.568345 5.568345 3044 +ling 1 4 5.568345 5.568345 3045 +confus 1 3 5.857933 5.857933 3144 +thrash 1 3 5.857933 5.857933 3400 +mossi 1 2 6.263398 6.263398 5801 +liber 1 2 6.263398 6.263398 5154 +glorifi 1 2 6.263398 6.263398 4114 +mental 1 2 6.263398 6.263398 5802 +stolen 1 2 6.263398 6.263398 5803 +boinge 1 1 6.957497 6.957497 14878 +michalowskidepart 1 1 6.957497 6.957497 14879 +engineeringmail 1 1 6.957497 6.957497 14880 +bitsthank 1 1 6.957497 6.957497 14881 +headscapewhenev 1 1 6.957497 6.957497 14882 +gradstud 1 1 6.957497 6.957497 14883 +inlinguist 1 1 6.957497 6.957497 14884 +ultrahotlist 1 1 6.957497 6.957497 14885 +ofal 1 1 6.957497 6.957497 14886 +forsometh 1 1 6.957497 6.957497 14887 +thave 1 1 6.957497 6.957497 14888 +urouletteto 1 1 6.957497 6.957497 14889 +ofwhich 1 1 6.957497 6.957497 14890 +songsand 1 1 6.957497 6.957497 14891 +fictiti 1 1 6.957497 6.957497 14892 +puriti 1 1 6.957497 6.957497 14893 +tokeep 1 1 6.957497 6.957497 14894 +pagesfrom 1 1 6.957497 6.957497 14895 +aslfingerspel 1 1 6.957497 6.957497 14896 +blatantli 1 1 6.957497 6.957497 14897 +chamberlain 1 1 6.957497 6.957497 14898 +michalowski 1 1 6.957497 6.957497 14899 +sanityerad 1 1 6.957497 6.957497 14900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..1d6893c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +associ 1 93 2.397895 2.397895 151 +receiv 1 66 2.708050 2.708050 244 +complex 1 64 2.772589 2.772589 269 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +join 1 39 3.258097 3.258097 457 +theoret 1 39 3.258097 3.258097 446 +paul 1 38 3.295837 3.295837 471 +connect 1 37 3.332205 3.332205 485 +post 1 35 3.401197 3.401197 505 +award 1 34 3.401197 3.401197 523 +autumn 1 31 3.496508 3.496508 558 +enjoi 1 26 3.688879 3.688879 660 +concern 1 25 3.737670 3.737670 666 +aspect 1 25 3.737670 3.737670 663 +sport 1 25 3.737670 3.737670 683 +doctor 1 24 3.761200 3.761200 709 +proof 1 23 3.806662 3.806662 720 +theunivers 1 21 3.912023 3.912023 797 +concentr 1 18 4.060443 4.060443 906 +qual 1 15 4.248495 4.248495 1062 +primarili 1 13 4.382027 4.382027 1185 +thedepart 1 11 4.553877 4.553877 1350 +softbal 1 9 4.753590 4.753590 1594 +toronto 1 6 5.164786 5.164786 2156 +squash 1 6 5.164786 5.164786 2223 +lack 1 6 5.164786 5.164786 1994 +beam 1 5 5.347108 5.347108 2344 +engineeringat 1 5 5.347108 5.347108 2561 +talent 1 3 5.857933 5.857933 3768 +sciencein 1 2 6.263398 6.263398 5804 +paralleland 1 2 6.263398 6.263398 5805 +beamepaul 1 1 6.957497 6.957497 14901 +academicyear 1 1 6.957497 6.957497 14903 +presidentialyoung 1 1 6.957497 6.957497 14904 +computationalcomplex 1 1 6.957497 6.957497 14902 +inproposit 1 1 6.957497 6.957497 14905 +enthusiasm 1 1 6.957497 6.957497 14906 +cancompens 1 1 6.957497 6.957497 14907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..5f99882d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +develop 1 174 1.791759 1.791759 53 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +real 1 93 2.397895 2.397895 144 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +build 1 85 2.484907 2.484907 184 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +optim 1 79 2.564949 2.564949 197 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +free 1 73 2.639057 2.639057 224 +function 1 62 2.772589 2.772589 275 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +much 1 52 2.995732 2.995732 349 +done 1 47 3.091042 3.091042 381 +mark 1 44 3.135494 3.135494 403 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +respons 1 37 3.332205 3.332205 476 +platform 1 29 3.583519 3.583519 591 +measur 1 28 3.610918 3.610918 609 +team 1 27 3.637586 3.637586 625 +sport 1 25 3.737670 3.737670 683 +grad 1 20 3.951244 3.951244 837 +particularli 1 19 4.007333 4.007333 867 +ultim 1 17 4.110874 4.110874 943 +devic 1 16 4.174387 4.174387 1002 +goe 1 15 4.248495 4.248495 1044 +spin 1 14 4.317488 4.317488 1121 +econom 1 13 4.382027 4.382027 1184 +jump 1 9 4.753590 4.753590 1603 +volleybal 1 9 4.753590 4.753590 1598 +trust 1 9 4.753590 4.753590 1583 +spot 1 7 5.010635 5.010635 1894 +bunch 1 7 5.010635 5.010635 1861 +athlet 1 7 5.010635 5.010635 1933 +tri 1 6 5.164786 5.164786 2166 +railroad 1 6 5.164786 5.164786 2161 +frisbe 1 5 5.347108 5.347108 2560 +minnesota 1 5 5.347108 5.347108 2469 +anti 1 5 5.347108 5.347108 2434 +champion 1 4 5.568345 5.568345 2982 +skate 1 4 5.568345 5.568345 3046 +borrow 1 3 5.857933 5.857933 3725 +somedai 1 3 5.857933 5.857933 3919 +tripl 1 3 5.857933 5.857933 3160 +bank 1 3 5.857933 5.857933 3920 +getto 1 2 6.263398 6.263398 5806 +locomot 1 2 6.263398 6.263398 5807 +beckerdavid 1 1 6.957497 6.957497 14908 +beckercontact 1 1 6.957497 6.957497 14909 +makingspina 1 1 6.957497 6.957497 14910 +drvier 1 1 6.957497 6.957497 14911 +bethel 1 1 6.957497 6.957497 14912 +men 1 1 6.957497 6.957497 14913 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 1 1 6.957497 6.957497 14914 +handbal 1 1 6.957497 6.957497 14915 +playracquetballgolftenni 1 1 6.957497 6.957497 14916 +bridgecampingcanoeingdisc 1 1 6.957497 6.957497 14917 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 1 1 6.957497 6.957497 14918 +skiingweightliftingwhitewat 1 1 6.957497 6.957497 14919 +raftinghorseback 1 1 6.957497 6.957497 14920 +ridingmountain 1 1 6.957497 6.957497 14921 +bikingin 1 1 6.957497 6.957497 14922 +currenc 1 1 6.957497 6.957497 14923 +ssto 1 1 6.957497 6.957497 14924 +rlv 1 1 6.957497 6.957497 14925 +theologi 1 1 6.957497 6.957497 14926 +centurai 1 1 6.957497 6.957497 14927 +boot 1 1 6.957497 6.957497 14928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..127ddf26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,281 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +memori 1 101 2.302585 2.302585 139 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +commun 1 95 2.397895 2.397895 157 +larg 1 82 2.484907 2.484907 168 +ieee 1 86 2.484907 2.484907 190 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +symposium 1 72 2.639057 2.639057 238 +write 1 72 2.639057 2.639057 222 +workshop 1 71 2.639057 2.639057 239 +david 1 71 2.639057 2.639057 232 +servic 1 72 2.639057 2.639057 236 +effici 1 73 2.639057 2.639057 233 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +street 1 63 2.772589 2.772589 293 +experi 1 64 2.772589 2.772589 283 +polici 1 64 2.772589 2.772589 279 +virtual 1 62 2.772589 2.772589 285 +evalu 1 64 2.772589 2.772589 266 +plai 1 60 2.833213 2.833213 307 +automat 1 61 2.833213 2.833213 306 +share 1 59 2.833213 2.833213 304 +publish 1 57 2.890372 2.890372 326 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +extens 1 53 2.944439 2.944439 340 +februari 1 54 2.944439 2.944439 328 +local 1 55 2.944439 2.944439 334 +run 1 51 2.995732 2.995732 347 +hardwar 1 51 2.995732 2.995732 350 +life 1 50 3.044522 3.044522 375 +standard 1 48 3.044522 3.044522 365 +effect 1 46 3.091042 3.091042 385 +protocol 1 45 3.135494 3.135494 407 +fast 1 42 3.218876 3.218876 429 +cach 1 41 3.218876 3.218876 432 +map 1 39 3.258097 3.258097 452 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +brian 1 38 3.295837 3.295837 466 +industri 1 38 3.295837 3.295837 464 +winter 1 36 3.367296 3.367296 500 +post 1 35 3.401197 3.401197 505 +return 1 34 3.401197 3.401197 502 +concurr 1 34 3.401197 3.401197 501 +detect 1 26 3.688879 3.688879 646 +consist 1 26 3.688879 3.688879 651 +although 1 25 3.737670 3.737670 667 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +interpret 1 24 3.761200 3.761200 686 +greg 1 24 3.761200 3.761200 695 +mobil 1 23 3.806662 3.806662 730 +thread 1 23 3.806662 3.806662 722 +ofwashington 1 22 3.850148 3.850148 766 +reduc 1 22 3.850148 3.850148 759 +inth 1 22 3.850148 3.850148 741 +chen 1 21 3.912023 3.912023 791 +avoid 1 21 3.912023 3.912023 799 +hous 1 21 3.912023 3.912023 801 +binari 1 20 3.951244 3.951244 823 +safeti 1 20 3.951244 3.951244 817 +kernel 1 20 3.951244 3.951244 825 +increas 1 20 3.951244 3.951244 829 +comparison 1 19 4.007333 4.007333 863 +andrew 1 19 4.007333 4.007333 849 +miss 1 19 4.007333 4.007333 866 +citi 1 19 4.007333 4.007333 874 +bershad 1 18 4.060443 4.060443 902 +seem 1 18 4.060443 4.060443 899 +event 1 18 4.060443 4.060443 896 +asplo 1 17 4.110874 4.110874 948 +took 1 16 4.174387 4.174387 1010 +brief 1 16 4.174387 4.174387 1001 +stock 1 16 4.174387 4.174387 1007 +qual 1 15 4.248495 4.248495 1062 +driven 1 15 4.248495 4.248495 1048 +overhead 1 15 4.248495 4.248495 1035 +micro 1 15 4.248495 4.248495 1031 +save 1 14 4.317488 4.317488 1099 +spin 1 14 4.317488 4.317488 1121 +levi 1 14 4.317488 4.317488 1093 +consider 1 14 4.317488 4.317488 1076 +dean 1 14 4.317488 4.317488 1104 +mellon 1 13 4.382027 4.382027 1179 +karlin 1 13 4.382027 4.382027 1176 +charl 1 13 4.382027 4.382027 1149 +block 1 13 4.382027 4.382027 1183 +carnegi 1 12 4.465908 4.465908 1260 +anna 1 12 4.465908 4.465908 1292 +usenix 1 12 4.465908 4.465908 1240 +mari 1 12 4.465908 4.465908 1266 +safe 1 12 4.465908 4.465908 1274 +promot 1 12 4.465908 4.465908 1235 +isca 1 11 4.553877 4.553877 1354 +denni 1 11 4.553877 4.553877 1321 +baer 1 11 4.553877 4.553877 1353 +systemsc 1 11 4.553877 4.553877 1293 +impact 1 11 4.553877 4.553877 1334 +primit 1 11 4.553877 4.553877 1317 +arpa 1 11 4.553877 4.553877 1369 +sosp 1 10 4.653960 4.653960 1416 +jean 1 10 4.653960 4.653960 1440 +henri 1 10 4.653960 4.653960 1417 +packet 1 10 4.653960 4.653960 1415 +decomposit 1 10 4.653960 4.653960 1439 +hang 1 9 4.753590 4.753590 1499 +patterson 1 9 4.753590 4.753590 1554 +osdi 1 9 4.753590 4.753590 1534 +voelker 1 9 4.753590 4.753590 1557 +wong 1 9 4.753590 4.753590 1609 +modula 1 9 4.753590 4.753590 1613 +wilson 1 9 4.753590 4.753590 1536 +rel 1 9 4.753590 4.753590 1487 +besid 1 8 4.875197 4.875197 1681 +sigop 1 8 4.875197 4.875197 1727 +romer 1 8 4.875197 4.875197 1706 +wayn 1 8 4.875197 4.875197 1738 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +marc 1 8 4.875197 4.875197 1680 +watson 1 8 4.875197 4.875197 1691 +uniprocessor 1 8 4.875197 4.875197 1696 +mach 1 8 4.875197 4.875197 1669 +hash 1 8 4.875197 4.875197 1618 +cultur 1 7 5.010635 5.010635 1951 +northwest 1 7 5.010635 5.010635 1973 +instrument 1 7 5.010635 5.010635 1954 +dispatch 1 7 5.010635 5.010635 1791 +interrupt 1 7 5.010635 5.010635 1793 +prioriti 1 7 5.010635 5.010635 1792 +squash 1 6 5.164786 5.164786 2223 +prefetch 1 6 5.164786 5.164786 2039 +edward 1 6 5.164786 5.164786 2050 +onoper 1 6 5.164786 5.164786 2048 +wolman 1 6 5.164786 5.164786 2093 +loup 1 6 5.164786 5.164786 2228 +mock 1 6 5.164786 5.164786 2087 +conflict 1 6 5.164786 5.164786 2041 +rain 1 6 5.164786 5.164786 2137 +coffe 1 5 5.347108 5.347108 2556 +toc 1 5 5.347108 5.347108 2562 +bind 1 5 5.347108 5.347108 2250 +alec 1 5 5.347108 5.347108 2563 +philipos 1 5 5.347108 5.347108 2373 +ohlrich 1 5 5.347108 5.347108 2564 +mutual 1 5 5.347108 5.347108 2418 +rocki 1 4 5.568345 5.568345 3048 +etch 1 4 5.568345 5.568345 2755 +pardyak 1 4 5.568345 5.568345 3043 +savag 1 4 5.568345 5.568345 2777 +microkernel 1 4 5.568345 5.568345 3047 +compcon 1 4 5.568345 5.568345 2958 +exclus 1 4 5.568345 5.568345 2947 +northeast 1 3 5.857933 5.857933 3922 +cachingtraci 1 3 5.857933 5.857933 3923 +kimbrel 1 3 5.857933 5.857933 3924 +felten 1 3 5.857933 5.857933 3925 +geoffrei 1 3 5.857933 5.857933 3505 +dynamiccompil 1 3 5.857933 5.857933 3926 +ausland 1 3 5.857933 5.857933 3917 +fiuczynski 1 3 5.857933 5.857933 3390 +stefan 1 3 5.857933 5.857933 3921 +garrett 1 3 5.857933 5.857933 3377 +mobisa 1 3 5.857933 5.857933 3927 +mappedcach 1 3 5.857933 5.857933 3928 +forappl 1 3 5.857933 5.857933 3929 +irrelev 1 3 5.857933 5.857933 3823 +golub 1 3 5.857933 5.857933 3265 +alien 1 3 5.857933 5.857933 3930 +eduwork 1 2 6.263398 6.263398 5813 +hasappear 1 2 6.263398 6.263398 5099 +tomkin 1 2 6.263398 6.263398 5814 +hugo 1 2 6.263398 6.263398 5815 +garth 1 2 6.263398 6.263398 5816 +gibson 1 2 6.263398 6.263398 5817 +implemen 1 2 6.263398 6.263398 5809 +przemyslaw 1 2 6.263398 6.263398 5808 +emin 1 2 6.263398 6.263398 5810 +sirer 1 2 6.263398 6.263398 5811 +hsieh 1 2 6.263398 6.263398 5818 +onlinesuperpag 1 2 6.263398 6.263398 5819 +appearedin 1 2 6.263398 6.263398 5096 +endpoint 1 2 6.263398 6.263398 4967 +moss 1 2 6.263398 6.263398 5820 +redel 1 2 6.263398 6.263398 4358 +elli 1 2 6.263398 6.263398 4216 +baron 1 2 6.263398 6.263398 4317 +microbenchmark 1 2 6.263398 6.263398 5821 +wwo 1 2 6.263398 6.263398 5812 +rashid 1 2 6.263398 6.263398 4318 +abduct 1 2 6.263398 6.263398 5663 +respit 1 1 6.957497 6.957497 14936 +asigmetr 1 1 6.957497 6.957497 14937 +thestairmast 1 1 6.957497 6.957497 14938 +extensibleoper 1 1 6.957497 6.957497 14939 +midwai 1 1 6.957497 6.957497 14930 +parallelnetwork 1 1 6.957497 6.957497 14940 +thesequel 1 1 6.957497 6.957497 14941 +optimizationcours 1 1 6.957497 6.957497 14942 +youmight 1 1 6.957497 6.957497 14943 +extensiblesystem 1 1 6.957497 6.957497 14944 +theodor 1 1 6.957497 6.957497 14945 +implementationj 1 1 6.957497 6.957497 14946 +defouw 1 1 6.957497 6.957497 14947 +alapat 1 1 6.957497 6.957497 14948 +becker 1 1 6.957497 6.957497 14949 +sharedmemori 1 1 6.957497 6.957497 14950 +zekauska 1 1 6.957497 6.957497 14931 +sawdon 1 1 6.957497 6.957497 14932 +conflictresolut 1 1 6.957497 6.957497 14951 +uwtechn 1 1 6.957497 6.957497 14952 +demultiplex 1 1 6.957497 6.957497 14953 +yuhara 1 1 6.957497 6.957497 14954 +maeda 1 1 6.957497 6.957497 14929 +andmostli 1 1 6.957497 6.957497 14955 +moblic 1 1 6.957497 6.957497 14956 +wheeler 1 1 6.957497 6.957497 14957 +ginsburg 1 1 6.957497 6.957497 14958 +machnix 1 1 6.957497 6.957497 14933 +drave 1 1 6.957497 6.957497 14934 +forin 1 1 6.957497 6.957497 14935 +inoper 1 1 6.957497 6.957497 14959 +harrier 1 1 6.957497 6.957497 14960 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..b2129638 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,153 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +text 1 98 2.302585 2.302585 133 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +good 1 77 2.564949 2.564949 200 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +new 1 64 2.772589 2.772589 262 +previou 1 62 2.772589 2.772589 290 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +thesi 1 57 2.890372 2.890372 327 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +cool 1 49 3.044522 3.044522 374 +pointer 1 48 3.044522 3.044522 368 +telephon 1 50 3.044522 3.044522 373 +life 1 50 3.044522 3.044522 375 +directori 1 45 3.135494 3.135494 396 +past 1 42 3.218876 3.218876 428 +open 1 38 3.295837 3.295837 469 +hand 1 37 3.332205 3.332205 475 +post 1 35 3.401197 3.401197 505 +scientist 1 31 3.496508 3.496508 560 +steve 1 29 3.583519 3.583519 594 +american 1 27 3.637586 3.637586 634 +todai 1 25 3.737670 3.737670 672 +sport 1 25 3.737670 3.737670 683 +daili 1 24 3.761200 3.761200 706 +magazin 1 24 3.761200 3.761200 704 +yahoo 1 24 3.761200 3.761200 707 +miscellan 1 23 3.806662 3.806662 731 +voic 1 21 3.912023 3.912023 806 +hous 1 21 3.912023 3.912023 801 +lyco 1 19 4.007333 4.007333 871 +boston 1 19 4.007333 4.007333 862 +agent 1 18 4.060443 4.060443 910 +engineeringunivers 1 17 4.110874 4.110874 959 +repositori 1 17 4.110874 4.110874 932 +white 1 17 4.110874 4.110874 951 +dilbert 1 16 4.174387 4.174387 996 +hierarch 1 15 4.248495 4.248495 1018 +balanc 1 14 4.317488 4.317488 1112 +washingtonbox 1 13 4.382027 4.382027 1200 +suit 1 13 4.382027 4.382027 1129 +canada 1 13 4.382027 4.382027 1158 +social 1 13 4.382027 4.382027 1123 +hank 1 12 4.465908 4.465908 1253 +excit 1 11 4.553877 4.553877 1329 +arpa 1 11 4.553877 4.553877 1369 +clock 1 11 4.553877 4.553877 1320 +shop 1 10 4.653960 4.653960 1469 +metacrawl 1 10 4.653960 4.653960 1455 +vista 1 10 4.653960 4.653960 1452 +weld 1 9 4.753590 4.753590 1538 +meta 1 9 4.753590 4.753590 1505 +yellow 1 9 4.753590 4.753590 1601 +govern 1 9 4.753590 4.753590 1581 +congress 1 9 4.753590 4.753590 1592 +entitl 1 9 4.753590 4.753590 1490 +postdoc 1 8 4.875197 4.875197 1724 +span 1 8 4.875197 4.875197 1751 +upcom 1 8 4.875197 4.875197 1685 +softbot 1 7 5.010635 5.010635 1974 +pittsburgh 1 7 5.010635 5.010635 1938 +strip 1 6 5.164786 5.164786 2203 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +gopher 1 6 5.164786 5.164786 1982 +infoseek 1 6 5.164786 5.164786 2188 +slate 1 6 5.164786 5.164786 2021 +atlant 1 5 5.347108 5.347108 2508 +feder 1 5 5.347108 5.347108 2266 +union 1 4 5.568345 5.568345 2634 +alta 1 4 5.568345 5.568345 3039 +birthdai 1 4 5.568345 5.568345 2800 +letterman 1 3 5.857933 5.857933 3931 +shortcut 1 3 5.857933 5.857933 3932 +soar 1 3 5.857933 5.857933 3506 +headlin 1 3 5.857933 5.857933 3710 +monthli 1 3 5.857933 5.857933 3910 +espn 1 3 5.857933 5.857933 3724 +zone 1 3 5.857933 5.857933 3747 +museum 1 3 5.857933 5.857933 3933 +comedi 1 2 6.263398 6.263398 5822 +geeki 1 2 6.263398 6.263398 5823 +shopbot 1 2 6.263398 6.263398 5824 +sigma 1 2 6.263398 6.263398 4369 +magellan 1 2 6.263398 6.263398 5825 +reuter 1 2 6.263398 6.263398 4099 +cafe 1 2 6.263398 6.263398 5826 +salon 1 2 6.263398 6.263398 5827 +harper 1 2 6.263398 6.263398 5141 +commiss 1 2 6.263398 6.263398 4901 +reform 1 2 6.263398 6.263398 5828 +doorenbo 1 1 6.957497 6.957497 14962 +pagebob 1 1 6.957497 6.957497 14963 +bobd 1 1 6.957497 6.957497 14964 +netbot 1 1 6.957497 6.957497 14965 +boffo 1 1 6.957497 6.957497 14966 +zdnet 1 1 6.957497 6.957497 14967 +anchordesk 1 1 6.957497 6.957497 14968 +savvysearch 1 1 6.957497 6.957497 14969 +inktomi 1 1 6.957497 6.957497 14970 +crawler 1 1 6.957497 6.957497 14971 +hotbot 1 1 6.957497 6.957497 14972 +pointcom 1 1 6.957497 6.957497 14973 +switchboard 1 1 6.957497 6.957497 14974 +cnnfn 1 1 6.957497 6.957497 14975 +newshour 1 1 6.957497 6.957497 14976 +globe 1 1 6.957497 6.957497 14977 +feed 1 1 6.957497 6.957497 14978 +fedworld 1 1 6.957497 6.957497 14979 +budget 1 1 6.957497 6.957497 14961 +deficit 1 1 6.957497 6.957497 14980 +debt 1 1 6.957497 6.957497 14981 +concord 1 1 6.957497 6.957497 14982 +coalit 1 1 6.957497 6.957497 14983 +bipartisan 1 1 6.957497 6.957497 14984 +andfun 1 1 6.957497 6.957497 14985 +pagebobd 1 1 6.957497 6.957497 14986 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..e5545c5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +seattl 1 120 2.079442 2.079442 103 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +orient 1 80 2.564949 2.564949 205 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +solv 1 73 2.639057 2.639057 234 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +interact 1 62 2.772589 2.772589 270 +artifici 1 63 2.772589 2.772589 280 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +done 1 47 3.091042 3.091042 381 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +societi 1 40 3.258097 3.258097 456 +join 1 39 3.258097 3.258097 457 +winter 1 36 3.367296 3.367296 500 +post 1 35 3.401197 3.401197 505 +taught 1 33 3.433987 3.433987 526 +human 1 32 3.465736 3.465736 546 +idea 1 32 3.465736 3.465736 545 +concept 1 32 3.465736 3.465736 537 +dissert 1 32 3.465736 3.465736 549 +autumn 1 31 3.496508 3.496508 558 +constraint 1 26 3.688879 3.688879 636 +concern 1 25 3.737670 3.737670 666 +spent 1 25 3.737670 3.737670 676 +born 1 21 3.912023 3.912023 798 +media 1 19 4.007333 4.007333 861 +cambridg 1 16 4.174387 4.174387 1008 +alan 1 13 4.382027 4.382027 1146 +pagei 1 8 4.875197 4.875197 1683 +grew 1 8 4.875197 4.875197 1742 +xerox 1 8 4.875197 4.875197 1725 +reed 1 6 5.164786 5.164786 2086 +england 1 5 5.347108 5.347108 2557 +scotland 1 4 5.568345 5.568345 3049 +sabbat 1 4 5.568345 5.568345 2824 +groupuw 1 3 5.857933 5.857933 3934 +atstanford 1 3 5.857933 5.857933 3935 +pagealan 1 2 6.263398 6.263398 5587 +computerinteract 1 2 6.263398 6.263398 5829 +satisfact 1 2 6.263398 6.263398 5656 +idaho 1 2 6.263398 6.263398 5055 +havebeen 1 2 6.263398 6.263398 5830 +eduwww 1 2 6.263398 6.263398 5138 +principalresearch 1 1 6.957497 6.957497 14987 +activitiesuwconstraint 1 1 6.957497 6.957497 14988 +domainsourc 1 1 6.957497 6.957497 14989 +democraci 1 1 6.957497 6.957497 14990 +qualsproject 1 1 6.957497 6.957497 14991 +teachingher 1 1 6.957497 6.957497 14992 +informationhistori 1 1 6.957497 6.957497 14993 +paloalto 1 1 6.957497 6.957497 14994 +simulationlaboratori 1 1 6.957497 6.957497 14995 +doctoralfellow 1 1 6.957497 6.957497 14996 +ofedinburgh 1 1 6.957497 6.957497 14997 +symbolicalgebra 1 1 6.957497 6.957497 14998 +andexcept 1 1 6.957497 6.957497 14999 +europarc 1 1 6.957497 6.957497 15000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..ebfc673c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +address 1 170 1.791759 1.791759 62 +like 1 132 1.945910 1.945910 81 +thing 1 84 2.484907 2.484907 189 +probabl 1 40 3.258097 3.258097 455 +credit 1 38 3.295837 3.295837 460 +ofth 1 36 3.367296 3.367296 491 +ad 1 32 3.465736 3.465736 544 +mike 1 24 3.761200 3.761200 703 +less 1 18 4.060443 4.060443 892 +care 1 13 4.382027 4.382027 1177 +brad 1 12 4.465908 4.465908 1264 +subset 1 10 4.653960 4.653960 1425 +couldn 1 4 5.568345 5.568345 2977 +pagebrad 1 1 6.957497 6.957497 15001 +chamberlainphoto 1 1 6.957497 6.957497 15002 +perkowitzth 1 1 6.957497 6.957497 15003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..4722263b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +first 1 140 1.945910 1.945910 71 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +academ 1 82 2.484907 2.484907 178 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +appli 1 71 2.639057 2.639057 226 +onlin 1 75 2.639057 2.639057 223 +html 1 75 2.639057 2.639057 235 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +written 1 63 2.772589 2.772589 278 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +reason 1 57 2.890372 2.890372 318 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +math 1 44 3.135494 3.135494 402 +around 1 43 3.178054 3.178054 415 +theoret 1 39 3.258097 3.258097 446 +map 1 39 3.258097 3.258097 452 +error 1 40 3.258097 3.258097 449 +expect 1 37 3.332205 3.332205 484 +tree 1 36 3.367296 3.367296 492 +survei 1 35 3.401197 3.401197 513 +curriculum 1 33 3.433987 3.433987 535 +scientist 1 31 3.496508 3.496508 560 +photo 1 31 3.496508 3.496508 561 +power 1 30 3.555348 3.555348 573 +produc 1 30 3.555348 3.555348 572 +cluster 1 28 3.610918 3.610918 612 +hope 1 28 3.610918 3.610918 610 +challeng 1 26 3.688879 3.688879 653 +bound 1 26 3.688879 3.688879 659 +bookmark 1 26 3.688879 3.688879 639 +aspect 1 25 3.737670 3.737670 663 +sometim 1 24 3.761200 3.761200 696 +theunivers 1 21 3.912023 3.912023 797 +siam 1 21 3.912023 3.912023 800 +binari 1 20 3.951244 3.951244 823 +lower 1 18 4.060443 4.060443 886 +biologi 1 15 4.248495 4.248495 1049 +incomput 1 14 4.317488 4.317488 1096 +galleri 1 13 4.382027 4.382027 1192 +speak 1 12 4.465908 4.465908 1283 +readabl 1 12 4.465908 4.465908 1258 +scienceat 1 11 4.553877 4.553877 1375 +moment 1 11 4.553877 4.553877 1379 +cycl 1 11 4.553877 4.553877 1335 +enter 1 10 4.653960 4.653960 1454 +ski 1 10 4.653960 4.653960 1471 +drink 1 9 4.753590 4.753590 1607 +bridg 1 8 4.875197 4.875197 1764 +interestsi 1 7 5.010635 5.010635 1969 +poster 1 7 5.010635 5.010635 1814 +rough 1 6 5.164786 5.164786 2107 +quickli 1 6 5.164786 5.164786 2000 +alphabet 1 6 5.164786 5.164786 1980 +soda 1 6 5.164786 5.164786 2189 +overlap 1 5 5.347108 5.347108 2368 +upper 1 5 5.347108 5.347108 2481 +latexhtml 1 5 5.347108 5.347108 2347 +older 1 5 5.347108 5.347108 2387 +fulfil 1 4 5.568345 5.568345 2932 +climb 1 4 5.568345 5.568345 2936 +genom 1 3 5.857933 5.857933 3546 +astrophys 1 3 5.857933 5.857933 3936 +dimac 1 3 5.857933 5.857933 3574 +edufor 1 2 6.263398 6.263398 5831 +hpcc 1 2 6.263398 6.263398 5832 +clone 1 2 6.263398 6.263398 5833 +ismb 1 2 6.263398 6.263398 5834 +probe 1 2 6.263398 6.263398 5535 +brendan 1 1 6.957497 6.957497 15005 +pagebrendan 1 1 6.957497 6.957497 15006 +mumeyi 1 1 6.957497 6.957497 15007 +information 1 1 6.957497 6.957497 15008 +vitaein 1 1 6.957497 6.957497 15009 +htmlorpostscriptformat 1 1 6.957497 6.957497 15010 +landmark 1 1 6.957497 6.957497 15011 +tosolv 1 1 6.957497 6.957497 15012 +groupher 1 1 6.957497 6.957497 15013 +papersb 1 1 6.957497 6.957497 15014 +mumei 1 1 6.957497 6.957497 15004 +candidaci 1 1 6.957497 6.957497 15015 +klaw 1 1 6.957497 6.957497 15016 +ofdiscret 1 1 6.957497 6.957497 15017 +containsom 1 1 6.957497 6.957497 15018 +recreationhik 1 1 6.957497 6.957497 15019 +coffeeto 1 1 6.957497 6.957497 15020 +sailingand 1 1 6.957497 6.957497 15021 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..fb99dcc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +spring 1 131 2.079442 2.079442 88 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +info 1 85 2.484907 2.484907 176 +school 1 84 2.484907 2.484907 188 +learn 1 86 2.484907 2.484907 170 +method 1 80 2.564949 2.564949 213 +interfac 1 79 2.564949 2.564949 209 +mondai 1 77 2.564949 2.564949 206 +june 1 79 2.564949 2.564949 214 +goal 1 66 2.708050 2.708050 250 +interact 1 62 2.772589 2.772589 270 +extens 1 53 2.944439 2.944439 340 +particular 1 51 2.995732 2.995732 352 +quarter 1 47 3.091042 3.091042 389 +even 1 45 3.135494 3.135494 393 +ask 1 28 3.610918 3.610918 597 +rather 1 26 3.688879 3.688879 642 +enjoi 1 26 3.688879 3.688879 660 +although 1 25 3.737670 3.737670 667 +self 1 22 3.850148 3.850148 761 +busi 1 21 3.912023 3.912023 784 +hobbi 1 16 4.174387 4.174387 1009 +excit 1 11 4.553877 4.553877 1329 +junior 1 5 5.347108 5.347108 2519 +kid 1 5 5.347108 5.347108 2516 +fairli 1 5 5.347108 5.347108 2322 +writeup 1 5 5.347108 5.347108 2352 +bricker 1 4 5.568345 5.568345 3050 +asystem 1 4 5.568345 5.568345 2612 +lauren 1 3 5.857933 5.857933 3251 +metip 1 3 5.857933 5.857933 3937 +workin 1 3 5.857933 5.857933 3938 +groupi 1 2 6.263398 6.263398 5544 +stevetanimoto 1 2 6.263398 6.263398 5835 +ofthi 1 2 6.263398 6.263398 5836 +cscl 1 2 6.263398 6.263398 5837 +inthi 1 2 6.263398 6.263398 5509 +studio 1 2 6.263398 6.263398 5838 +brickerlauren 1 1 6.957497 6.957497 15022 +clue 1 1 6.957497 6.957497 15023 +primarli 1 1 6.957497 6.957497 15024 +userinterfac 1 1 6.957497 6.957497 15025 +proclaim 1 1 6.957497 6.957497 15026 +mathematicsexperi 1 1 6.957497 6.957497 15027 +usingexploratori 1 1 6.957497 6.957497 15028 +rote 1 1 6.957497 6.957497 15029 +minterest 1 1 6.957497 6.957497 15030 +supportedcollabor 1 1 6.957497 6.957497 15031 +lawk 1 1 6.957497 6.957497 15032 +dawg 1 1 6.957497 6.957497 15033 +interfacea 1 1 6.957497 6.957497 15034 +resumeschool 1 1 6.957497 6.957497 15035 +dazethi 1 1 6.957497 6.957497 15036 +quarterdoth 1 1 6.957497 6.957497 15037 +quartershuman 1 1 6.957497 6.957497 15038 +writeupwhat 1 1 6.957497 6.957497 15039 +insocieti 1 1 6.957497 6.957497 15040 +lifesportscookingpotteri 1 1 6.957497 6.957497 15041 +garag 1 1 6.957497 6.957497 15042 +stuffbecaus 1 1 6.957497 6.957497 15043 +itaddress 1 1 6.957497 6.957497 15044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..4bce5d75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +washington 1 236 1.386294 1.386294 32 +adam 1 17 4.110874 4.110874 934 +carlson 1 5 5.347108 5.347108 2351 +carlsonadam 1 1 6.957497 6.957497 15045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..3f861d43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +look 1 107 2.197225 2.197225 115 +follow 1 92 2.397895 2.397895 143 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +direct 1 57 2.890372 2.890372 316 +browser 1 56 2.890372 2.890372 313 +give 1 50 3.044522 3.044522 359 +netscap 1 44 3.135494 3.135494 395 +workstat 1 37 3.332205 3.332205 479 +download 1 36 3.367296 3.367296 489 +viewer 1 21 3.912023 3.912023 787 +similar 1 21 3.912023 3.912023 771 +andrew 1 19 4.007333 4.007333 849 +fix 1 11 4.553877 4.553877 1327 +certain 1 10 4.653960 4.653960 1393 +werner 1 10 4.653960 4.653960 1385 +salesin 1 4 5.568345 5.568345 3051 +tonyderos 1 2 6.263398 6.263398 5839 +stuetzl 1 2 6.263398 6.263398 5840 +duchamp 1 2 6.263398 6.263398 5841 +jovan 1 2 6.263398 6.263398 5842 +theview 1 1 6.957497 6.957497 15046 +popov 1 1 6.957497 6.957497 15047 +scanningproject 1 1 6.957497 6.957497 15048 +sgigraph 1 1 6.957497 6.957497 15049 +shouldalso 1 1 6.957497 6.957497 15050 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..cf9583dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +school 1 84 2.484907 2.484907 188 +info 1 85 2.484907 2.484907 176 +requir 1 81 2.484907 2.484907 167 +workshop 1 71 2.639057 2.639057 239 +line 1 75 2.639057 2.639057 231 +java 1 70 2.708050 2.708050 248 +best 1 59 2.833213 2.833213 299 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +open 1 38 3.295837 3.295837 469 +computersci 1 30 3.555348 3.555348 562 +chines 1 29 3.583519 3.583519 595 +though 1 27 3.637586 3.637586 622 +enjoi 1 26 3.688879 3.688879 660 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +instal 1 22 3.850148 3.850148 754 +grad 1 20 3.951244 3.951244 837 +applet 1 20 3.951244 3.951244 827 +taiwan 1 16 4.174387 4.174387 1006 +charact 1 15 4.248495 4.248495 1028 +promot 1 12 4.465908 4.465908 1235 +rice 1 11 4.553877 4.553877 1336 +cook 1 10 4.653960 4.653960 1464 +yellow 1 9 4.753590 4.753590 1601 +ball 1 9 4.753590 4.753590 1608 +absolut 1 8 4.875197 4.875197 1646 +chinook 1 6 5.164786 5.164786 2229 +greec 1 6 5.164786 5.164786 2208 +restaur 1 6 5.164786 5.164786 2230 +ross 1 5 5.347108 5.347108 2243 +chou 1 4 5.568345 5.568345 3033 +recip 1 3 5.857933 5.857933 3668 +infoth 1 2 6.263398 6.263398 5195 +projectmi 1 2 6.263398 6.263398 5482 +schedulemi 1 2 6.263398 6.263398 5843 +publicationscod 1 2 6.263398 6.263398 5520 +stir 1 2 6.263398 6.263398 4865 +fri 1 2 6.263398 6.263398 5844 +geek 1 2 6.263398 6.263398 5083 +toi 1 2 6.263398 6.263398 5184 +font 1 2 6.263398 6.263398 5845 +purpl 1 2 6.263398 6.263398 5372 +scari 1 1 6.957497 6.957497 15051 +codesignpersonalperson 1 1 6.957497 6.957497 15052 +resumefoodi 1 1 6.957497 6.957497 15053 +ofpeopl 1 1 6.957497 6.957497 15054 +dish 1 1 6.957497 6.957497 15055 +noodl 1 1 6.957497 6.957497 15056 +beefskew 1 1 6.957497 6.957497 15057 +toysb 1 1 6.957497 6.957497 15058 +computersand 1 1 6.957497 6.957497 15059 +taiwanesei 1 1 6.957497 6.957497 15060 +taiwaneselanguag 1 1 6.957497 6.957497 15061 +taiwanes 1 1 6.957497 6.957497 15062 +sureto 1 1 6.957497 6.957497 15063 +taiwanesedictionari 1 1 6.957497 6.957497 15064 +viewedif 1 1 6.957497 6.957497 15065 +beabl 1 1 6.957497 6.957497 15066 +bouncingov 1 1 6.957497 6.957497 15067 +barnei 1 1 6.957497 6.957497 15068 +dynosaur 1 1 6.957497 6.957497 15069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..dd2b4fb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +read 1 154 1.791759 1.791759 47 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +intern 1 108 2.197225 2.197225 128 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +search 1 95 2.397895 2.397895 155 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +info 1 85 2.484907 2.484907 176 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +refer 1 78 2.564949 2.564949 203 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +mondai 1 77 2.564949 2.564949 206 +html 1 75 2.639057 2.639057 235 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +august 1 66 2.708050 2.708050 257 +thursdai 1 70 2.708050 2.708050 241 +result 1 65 2.772589 2.772589 281 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +guid 1 63 2.772589 2.772589 267 +wednesdai 1 64 2.772589 2.772589 261 +automat 1 61 2.833213 2.833213 306 +plai 1 60 2.833213 2.833213 307 +colleg 1 61 2.833213 2.833213 300 +found 1 53 2.944439 2.944439 337 +set 1 50 3.044522 3.044522 361 +archiv 1 49 3.044522 3.044522 364 +adapt 1 46 3.091042 3.091042 387 +done 1 47 3.091042 3.091042 381 +featur 1 46 3.091042 3.091042 386 +netscap 1 44 3.135494 3.135494 395 +fridai 1 44 3.135494 3.135494 390 +math 1 44 3.135494 3.135494 402 +term 1 43 3.178054 3.178054 411 +review 1 42 3.218876 3.218876 425 +late 1 40 3.258097 3.258097 439 +programm 1 39 3.258097 3.258097 445 +correct 1 38 3.295837 3.295837 462 +open 1 38 3.295837 3.295837 469 +statist 1 35 3.401197 3.401197 521 +tech 1 35 3.401197 3.401197 515 +chapter 1 32 3.465736 3.465736 536 +autumn 1 31 3.496508 3.496508 558 +linux 1 27 3.637586 3.637586 631 +lab 1 24 3.761200 3.761200 698 +color 1 22 3.850148 3.850148 762 +self 1 22 3.850148 3.850148 761 +hous 1 21 3.912023 3.912023 801 +region 1 19 4.007333 4.007333 875 +macintosh 1 17 4.110874 4.110874 920 +dilbert 1 16 4.174387 4.174387 996 +month 1 15 4.248495 4.248495 1025 +contribut 1 15 4.248495 4.248495 1021 +brother 1 13 4.382027 4.382027 1189 +tune 1 12 4.465908 4.465908 1227 +insid 1 12 4.465908 4.465908 1262 +fpga 1 10 4.653960 4.653960 1433 +pacif 1 8 4.875197 4.875197 1674 +babylon 1 8 4.875197 4.875197 1731 +duke 1 6 5.164786 5.164786 2231 +usag 1 6 5.164786 5.164786 2209 +peek 1 6 5.164786 5.164786 2169 +compet 1 5 5.347108 5.347108 2462 +contest 1 5 5.347108 5.347108 2273 +corei 1 4 5.568345 5.568345 2718 +wavelet 1 4 5.568345 5.568345 2874 +percept 1 3 5.857933 5.857933 3739 +rsum 1 3 5.857933 5.857933 3939 +zone 1 3 5.857933 5.857933 3747 +corin 1 3 5.857933 5.857933 3311 +induc 1 2 6.263398 6.263398 4795 +lurker 1 2 6.263398 6.263398 5050 +andersoncorei 1 1 6.957497 6.957497 15070 +andersonth 1 1 6.957497 6.957497 15071 +localtalk 1 1 6.957497 6.957497 15072 +collegi 1 1 6.957497 6.957497 15073 +univser 1 1 6.957497 6.957497 15074 +highlin 1 1 6.957497 6.957497 15075 +polli 1 1 6.957497 6.957497 15076 +treasuri 1 1 6.957497 6.957497 15077 +providercool 1 1 6.957497 6.957497 15078 +sunsit 1 1 6.957497 6.957497 15079 +pageus 1 1 6.957497 6.957497 15080 +washinton 1 1 6.957497 6.957497 15081 +uwtv 1 1 6.957497 6.957497 15082 +notesmi 1 1 6.957497 6.957497 15083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..b55281b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +well 1 109 2.197225 2.197225 121 +text 1 98 2.302585 2.302585 133 +homepag 1 93 2.397895 2.397895 148 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +know 1 80 2.564949 2.564949 198 +experi 1 64 2.772589 2.772589 283 +locat 1 59 2.833213 2.833213 303 +found 1 53 2.944439 2.944439 337 +visitor 1 49 3.044522 3.044522 371 +express 1 32 3.465736 3.465736 540 +photo 1 31 3.496508 3.496508 561 +repres 1 26 3.688879 3.688879 656 +never 1 25 3.737670 3.737670 671 +grad 1 20 3.951244 3.951244 837 +wonder 1 20 3.951244 3.951244 815 +minut 1 20 3.951244 3.951244 810 +appropri 1 18 4.060443 4.060443 883 +anyon 1 17 4.110874 4.110874 916 +choos 1 16 4.174387 4.174387 964 +universityof 1 15 4.248495 4.248495 1061 +near 1 14 4.317488 4.317488 1091 +cannot 1 13 4.382027 4.382027 1144 +sai 1 13 4.382027 4.382027 1175 +ball 1 9 4.753590 4.753590 1608 +occur 1 9 4.753590 4.753590 1572 +didn 1 9 4.753590 4.753590 1563 +craig 1 7 5.010635 5.010635 1879 +fromth 1 7 5.010635 5.010635 1802 +saturdai 1 7 5.010635 5.010635 1794 +parent 1 6 5.164786 5.164786 2204 +situat 1 5 5.347108 5.347108 2365 +curiou 1 5 5.347108 5.347108 2541 +enjoy 1 4 5.568345 5.568345 2937 +waterloo 1 3 5.857933 5.857933 3523 +tomi 1 2 6.263398 6.263398 5846 +convoc 1 2 6.263398 6.263398 5757 +honour 1 2 6.263398 6.263398 5632 +felt 1 2 6.263398 6.263398 4978 +incident 1 2 6.263398 6.263398 5109 +ceremoni 1 2 6.263398 6.263398 5585 +forgiv 1 2 6.263398 6.263398 5770 +experiencecraig 1 1 6.957497 6.957497 15085 +kaplancurr 1 1 6.957497 6.957497 15086 +copyof 1 1 6.957497 6.957497 15087 +undergraduatewa 1 1 6.957497 6.957497 15088 +valedictorian 1 1 6.957497 6.957497 15084 +cskaplan 1 1 6.957497 6.957497 15089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..a97574dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +welcom 1 122 2.079442 2.079442 99 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +sinc 1 90 2.397895 2.397895 159 +thing 1 84 2.484907 2.484907 189 +java 1 70 2.708050 2.708050 248 +plan 1 65 2.772589 2.772589 272 +think 1 57 2.890372 2.890372 314 +visitor 1 49 3.044522 3.044522 371 +numer 1 49 3.044522 3.044522 369 +futur 1 41 3.218876 3.218876 427 +vision 1 41 3.218876 3.218876 430 +mean 1 37 3.332205 3.332205 477 +anim 1 31 3.496508 3.496508 557 +load 1 28 3.610918 3.610918 601 +applet 1 20 3.951244 3.951244 827 +agent 1 18 4.060443 4.060443 910 +wind 1 18 4.060443 4.060443 908 +figur 1 18 4.060443 4.060443 903 +thought 1 17 4.110874 4.110874 945 +edui 1 13 4.382027 4.382027 1193 +weld 1 9 4.753590 4.753590 1538 +chung 1 7 5.010635 5.010635 1964 +softbot 1 7 5.010635 5.010635 1974 +vallei 1 7 5.010635 5.010635 1959 +etzioni 1 6 5.164786 5.164786 2135 +andsoftwar 1 4 5.568345 5.568345 2753 +arch 1 4 5.568345 5.568345 2995 +codi 1 3 5.857933 5.857933 3940 +kwok 1 3 5.857933 5.857933 3941 +aliv 1 3 5.857933 5.857933 3864 +ingram 1 2 6.263398 6.263398 5847 +nausicaa 1 2 6.263398 6.263398 5218 +castl 1 2 6.263398 6.263398 5217 +doom 1 2 6.263398 6.263398 5848 +asami 1 1 6.957497 6.957497 15091 +chiaki 1 1 6.957497 6.957497 15092 +ctkwok 1 1 6.957497 6.957497 15093 +andoren 1 1 6.957497 6.957497 15094 +sanctuari 1 1 6.957497 6.957497 15090 +aiuw 1 1 6.957497 6.957497 15095 +informationleisur 1 1 6.957497 6.957497 15096 +windlaputa 1 1 6.957497 6.957497 15097 +skyhyp 1 1 6.957497 6.957497 15098 +gunnm 1 1 6.957497 6.957497 15099 +vile 1 1 6.957497 6.957497 15100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..72e1369c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +homepag 1 93 2.397895 2.397895 148 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +html 1 75 2.639057 2.639057 235 +plan 1 65 2.772589 2.772589 272 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +employ 1 12 4.465908 4.465908 1291 +rest 1 12 4.465908 4.465908 1259 +darren 1 5 5.347108 5.347108 2565 +cronquist 1 3 5.857933 5.857933 3942 +myph 1 3 5.857933 5.857933 3880 +underconstruct 1 3 5.857933 5.857933 3889 +darrenc 1 1 6.957497 6.957497 15101 +vitaperson 1 1 6.957497 6.957497 15102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..11644fe3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +well 1 109 2.197225 2.197225 121 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +user 1 104 2.302585 2.302585 137 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +librari 1 87 2.484907 2.484907 181 +help 1 83 2.484907 2.484907 175 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +david 1 71 2.639057 2.639057 232 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +symposium 1 72 2.639057 2.639057 238 +nation 1 74 2.639057 2.639057 240 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +plan 1 65 2.772589 2.772589 272 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +import 1 65 2.772589 2.772589 282 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +local 1 55 2.944439 2.944439 334 +week 1 52 2.995732 2.995732 343 +even 1 45 3.135494 3.135494 393 +favorit 1 44 3.135494 3.135494 410 +third 1 43 3.178054 3.178054 412 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +live 1 40 3.258097 3.258097 451 +map 1 39 3.258097 3.258097 452 +realli 1 40 3.258097 3.258097 444 +prototyp 1 38 3.295837 3.295837 463 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +game 1 36 3.367296 3.367296 498 +represent 1 35 3.401197 3.401197 512 +michael 1 35 3.401197 3.401197 514 +everi 1 34 3.401197 3.401197 519 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +human 1 32 3.465736 3.465736 546 +collabor 1 32 3.465736 3.465736 543 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +rather 1 26 3.688879 3.688879 642 +client 1 25 3.737670 3.737670 679 +spent 1 25 3.737670 3.737670 676 +store 1 24 3.761200 3.761200 693 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +famili 1 23 3.806662 3.806662 735 +william 1 22 3.850148 3.850148 765 +theunivers 1 21 3.912023 3.912023 797 +navig 1 21 3.912023 3.912023 796 +watch 1 21 3.912023 3.912023 789 +anderson 1 19 4.007333 4.007333 860 +boston 1 19 4.007333 4.007333 862 +agent 1 18 4.060443 4.060443 910 +adam 1 17 4.110874 4.110874 934 +match 1 16 4.174387 4.174387 965 +atth 1 15 4.248495 4.248495 1019 +mayb 1 15 4.248495 4.248495 1014 +camera 1 14 4.317488 4.317488 1115 +dave 1 14 4.317488 4.317488 1098 +senior 1 14 4.317488 4.317488 1120 +context 1 13 4.382027 4.382027 1153 +whose 1 13 4.382027 4.382027 1166 +sai 1 13 4.382027 4.382027 1175 +touch 1 12 4.465908 4.465908 1288 +grow 1 12 4.465908 4.465908 1209 +career 1 12 4.465908 4.465908 1287 +appl 1 11 4.553877 4.553877 1303 +michigan 1 11 4.553877 4.553877 1368 +surf 1 11 4.553877 4.553877 1301 +shop 1 10 4.653960 4.653960 1469 +death 1 10 4.653960 4.653960 1457 +weld 1 9 4.753590 4.753590 1538 +russel 1 9 4.753590 4.753590 1507 +debugg 1 9 4.753590 4.753590 1493 +declar 1 9 4.753590 4.753590 1526 +leader 1 9 4.753590 4.753590 1576 +juan 1 9 4.753590 4.753590 1580 +sean 1 8 4.875197 4.875197 1705 +aaai 1 8 4.875197 4.875197 1750 +edg 1 8 4.875197 4.875197 1647 +planner 1 7 5.010635 5.010635 1797 +sensor 1 7 5.010635 5.010635 1920 +fortun 1 7 5.010635 5.010635 1872 +spare 1 6 5.164786 5.164786 2177 +mix 1 6 5.164786 5.164786 2200 +chicago 1 6 5.164786 5.164786 2149 +babi 1 5 5.347108 5.347108 2493 +pars 1 5 5.347108 5.347108 2321 +darren 1 5 5.347108 5.347108 2565 +compet 1 5 5.347108 5.347108 2462 +midnight 1 4 5.568345 5.568345 2599 +gotten 1 4 5.568345 5.568345 2628 +salesin 1 4 5.568345 5.568345 3051 +cut 1 4 5.568345 5.568345 2620 +tick 1 4 5.568345 5.568345 2975 +workin 1 3 5.857933 5.857933 3938 +cohen 1 3 5.857933 5.857933 3652 +harold 1 3 5.857933 5.857933 3803 +gloriou 1 3 5.857933 5.857933 3816 +hero 1 3 5.857933 5.857933 3711 +fame 1 3 5.857933 5.857933 3793 +straight 1 3 5.857933 5.857933 3655 +evil 1 3 5.857933 5.857933 3943 +christianson 1 2 6.263398 6.263398 5849 +till 1 2 6.263398 6.263398 5850 +nowher 1 2 6.263398 6.263398 4292 +shopbot 1 2 6.263398 6.263398 5824 +chicken 1 2 6.263398 6.263398 5851 +theanim 1 2 6.263398 6.263398 5852 +thechateau 1 2 6.263398 6.263398 5853 +cynic 1 2 6.263398 6.263398 5854 +duel 1 2 6.263398 6.263398 5855 +christiansondbc 1 1 6.957497 6.957497 15103 +inaiand 1 1 6.957497 6.957497 15104 +graphicsa 1 1 6.957497 6.957497 15105 +directedbehavior 1 1 6.957497 6.957497 15106 +buzzwordacquisit 1 1 6.957497 6.957497 15107 +bobdoorenbo 1 1 6.957497 6.957497 15108 +somehowintegr 1 1 6.957497 6.957497 15109 +applicationthat 1 1 6.957497 6.957497 15110 +basket 1 1 6.957497 6.957497 15111 +determinewhat 1 1 6.957497 6.957497 15112 +moviethat 1 1 6.957497 6.957497 15113 +technologyinto 1 1 6.957497 6.957497 15114 +perpetr 1 1 6.957497 6.957497 15115 +theucpop 1 1 6.957497 6.957497 15116 +isher 1 1 6.957497 6.957497 15117 +carboload 1 1 6.957497 6.957497 15118 +publicationschristianson 1 1 6.957497 6.957497 15119 +cinematographi 1 1 6.957497 6.957497 15120 +firbi 1 1 6.957497 6.957497 15121 +mcdougal 1 1 6.957497 6.957497 15122 +fusion 1 1 6.957497 6.957497 15123 +withfreder 1 1 6.957497 6.957497 15124 +judo 1 1 6.957497 6.957497 15125 +sibl 1 1 6.957497 6.957497 15126 +sisterjust 1 1 6.957497 6.957497 15127 +supercollid 1 1 6.957497 6.957497 15128 +slack 1 1 6.957497 6.957497 15129 +mirski 1 1 6.957497 6.957497 15130 +youth 1 1 6.957497 6.957497 15131 +wwwf 1 1 6.957497 6.957497 15132 +grudg 1 1 6.957497 6.957497 15133 +doomgat 1 1 6.957497 6.957497 15134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..eebc05bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +assist 1 112 2.197225 2.197225 113 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +activ 1 84 2.484907 2.484907 182 +mondai 1 77 2.564949 2.564949 206 +david 1 71 2.639057 2.639057 232 +creat 1 63 2.772589 2.772589 277 +give 1 50 3.044522 3.044522 359 +tutori 1 39 3.258097 3.258097 437 +form 1 39 3.258097 3.258097 443 +togeth 1 23 3.806662 3.806662 714 +navig 1 21 3.912023 3.912023 796 +hypertext 1 19 4.007333 4.007333 865 +quiz 1 16 4.174387 4.174387 990 +dave 1 14 4.317488 4.317488 1098 +johnson 1 13 4.382027 4.382027 1162 +script 1 13 4.382027 4.382027 1171 +basketbal 1 12 4.465908 4.465908 1289 +softbal 1 9 4.753590 4.753590 1594 +golf 1 6 5.164786 5.164786 2178 +fit 1 5 5.347108 5.347108 2285 +racquetbal 1 4 5.568345 5.568345 3052 +assess 1 4 5.568345 5.568345 2724 +readersproject 1 1 6.957497 6.957497 15135 +theracquetbal 1 1 6.957497 6.957497 15136 +thecreat 1 1 6.957497 6.957497 15137 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..7584366e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +real 1 93 2.397895 2.397895 144 +academ 1 82 2.484907 2.484907 178 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +help 1 83 2.484907 2.484907 175 +stuff 1 87 2.484907 2.484907 171 +institut 1 84 2.484907 2.484907 187 +learn 1 86 2.484907 2.484907 170 +server 1 76 2.564949 2.564949 204 +dynam 1 76 2.564949 2.564949 194 +know 1 80 2.564949 2.564949 198 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +solv 1 73 2.639057 2.639057 234 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +main 1 67 2.708050 2.708050 256 +written 1 63 2.772589 2.772589 278 +previou 1 62 2.772589 2.772589 290 +visit 1 63 2.772589 2.772589 288 +guid 1 63 2.772589 2.772589 267 +unix 1 58 2.890372 2.890372 308 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +major 1 56 2.890372 2.890372 315 +extens 1 53 2.944439 2.944439 340 +undergradu 1 54 2.944439 2.944439 338 +maintain 1 51 2.995732 2.995732 342 +run 1 51 2.995732 2.995732 347 +date 1 51 2.995732 2.995732 344 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +netscap 1 44 3.135494 3.135494 395 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +littl 1 39 3.258097 3.258097 454 +movi 1 40 3.258097 3.258097 459 +brian 1 38 3.295837 3.295837 466 +respons 1 37 3.332205 3.332205 476 +soon 1 36 3.367296 3.367296 494 +manual 1 35 3.401197 3.401197 504 +extend 1 32 3.465736 3.465736 539 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +team 1 27 3.637586 3.637586 625 +primari 1 25 3.737670 3.737670 669 +sport 1 25 3.737670 3.737670 683 +todai 1 25 3.737670 3.737670 672 +other 1 24 3.761200 3.761200 697 +dai 1 22 3.850148 3.850148 753 +recommend 1 22 3.850148 3.850148 737 +kernel 1 20 3.951244 3.951244 825 +safeti 1 20 3.951244 3.951244 817 +fine 1 20 3.951244 3.951244 822 +region 1 19 4.007333 4.007333 875 +excel 1 19 4.007333 4.007333 868 +bershad 1 18 4.060443 4.060443 902 +statu 1 18 4.060443 4.060443 885 +protect 1 17 4.110874 4.110874 935 +debug 1 17 4.110874 4.110874 944 +anyth 1 16 4.174387 4.174387 998 +condit 1 16 4.174387 4.174387 975 +dilbert 1 16 4.174387 4.174387 996 +mayb 1 15 4.248495 4.248495 1014 +spin 1 14 4.317488 4.317488 1121 +achiev 1 14 4.317488 4.317488 1088 +stai 1 12 4.465908 4.465908 1215 +touch 1 12 4.465908 4.465908 1288 +rest 1 12 4.465908 4.465908 1259 +surf 1 11 4.553877 4.553877 1301 +traffic 1 10 4.653960 4.653960 1421 +shop 1 10 4.653960 4.653960 1469 +modula 1 9 4.753590 4.753590 1613 +mach 1 8 4.875197 4.875197 1669 +claim 1 8 4.875197 4.875197 1664 +hold 1 8 4.875197 4.875197 1645 +transport 1 8 4.875197 4.875197 1672 +wouldn 1 7 5.010635 5.010635 1970 +occasion 1 7 5.010635 5.010635 1905 +athlet 1 7 5.010635 5.010635 1933 +yeah 1 6 5.164786 5.164786 2195 +impress 1 6 5.164786 5.164786 2096 +variant 1 6 5.164786 5.164786 2043 +subsystem 1 6 5.164786 5.164786 2015 +band 1 6 5.164786 5.164786 2198 +restaur 1 6 5.164786 5.164786 2230 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +afraid 1 4 5.568345 5.568345 3053 +breath 1 4 5.568345 5.568345 2946 +countless 1 4 5.568345 5.568345 3020 +racquetbal 1 4 5.568345 5.568345 3052 +leagu 1 4 5.568345 5.568345 3040 +basebal 1 4 5.568345 5.568345 2969 +bean 1 4 5.568345 5.568345 2968 +slight 1 3 5.857933 5.857933 3894 +emul 1 3 5.857933 5.857933 3944 +meanwhil 1 3 5.857933 5.857933 3129 +distract 1 3 5.857933 5.857933 3945 +trumpet 1 3 5.857933 5.857933 3946 +espn 1 3 5.857933 5.857933 3724 +marin 1 3 5.857933 5.857933 3947 +dion 1 2 6.263398 6.263398 5856 +okai 1 2 6.263398 6.263398 4465 +occupi 1 2 6.263398 6.263398 5857 +ladder 1 2 6.263398 6.263398 5858 +outlet 1 2 6.263398 6.263398 5248 +infam 1 2 6.263398 6.263398 5859 +ddion 1 1 6.957497 6.957497 15138 +thespinoper 1 1 6.957497 6.957497 15139 +intercept 1 1 6.957497 6.957497 15140 +havework 1 1 6.957497 6.957497 15141 +ipromis 1 1 6.957497 6.957497 15142 +enhancedthi 1 1 6.957497 6.957497 15143 +vast 1 1 6.957497 6.957497 15144 +sportzon 1 1 6.957497 6.957497 15145 +eateri 1 1 6.957497 6.957497 15146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..691641e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +address 1 170 1.791759 1.791759 62 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +environ 1 84 2.484907 2.484907 177 +start 1 83 2.484907 2.484907 173 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +share 1 59 2.833213 2.833213 304 +colleg 1 61 2.833213 2.833213 300 +index 1 56 2.890372 2.890372 309 +summer 1 56 2.890372 2.890372 311 +extens 1 53 2.944439 2.944439 340 +undergradu 1 54 2.944439 2.944439 338 +run 1 51 2.995732 2.995732 347 +protocol 1 45 3.135494 3.135494 407 +offer 1 43 3.178054 3.178054 414 +around 1 43 3.178054 3.178054 415 +howev 1 41 3.218876 3.218876 422 +field 1 37 3.332205 3.332205 482 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +independ 1 32 3.465736 3.465736 548 +dissert 1 32 3.465736 3.465736 549 +often 1 31 3.496508 3.496508 551 +campu 1 27 3.637586 3.637586 623 +arrai 1 27 3.637586 3.637586 627 +administr 1 27 3.637586 3.637586 628 +challeng 1 26 3.688879 3.688879 653 +task 1 25 3.737670 3.737670 678 +portabl 1 20 3.951244 3.951244 819 +beauti 1 18 4.060443 4.060443 912 +event 1 18 4.060443 4.060443 896 +sheet 1 16 4.174387 4.174387 973 +women 1 16 4.174387 4.174387 1004 +career 1 12 4.465908 4.465908 1287 +skill 1 12 4.465908 4.465908 1205 +asynchron 1 12 4.465908 4.465908 1229 +host 1 11 4.553877 4.553877 1306 +typic 1 11 4.553877 4.553877 1360 +extra 1 11 4.553877 4.553877 1312 +interestsmi 1 10 4.653960 4.653960 1462 +tutor 1 9 4.753590 4.753590 1552 +heart 1 8 4.875197 4.875197 1729 +ring 1 8 4.875197 4.875197 1684 +on 1 8 4.875197 4.875197 1628 +pursu 1 7 5.010635 5.010635 1902 +divers 1 6 5.164786 5.164786 2232 +averag 1 6 5.164786 5.164786 2098 +conveni 1 6 5.164786 5.164786 2088 +li 1 5 5.347108 5.347108 2500 +suffer 1 5 5.347108 5.347108 2268 +spaa 1 3 5.857933 5.857933 3906 +certif 1 3 5.857933 5.857933 3859 +token 1 2 6.263398 6.263398 4415 +foremost 1 2 6.263398 6.263398 5361 +ordistribut 1 2 6.263398 6.263398 5581 +distributedenviron 1 2 6.263398 6.263398 5183 +comm 1 2 6.263398 6.263398 4746 +newslet 1 2 6.263398 6.263398 5860 +derrick 1 1 6.957497 6.957497 15147 +weathersbi 1 1 6.957497 6.957497 15148 +bullssupersonicsi 1 1 6.957497 6.957497 15149 +phdin 1 1 6.957497 6.957497 15150 +ofseattl 1 1 6.957497 6.957497 15151 +prei 1 1 6.957497 6.957497 15152 +therebyextend 1 1 6.957497 6.957497 15153 +interestssignific 1 1 6.957497 6.957497 15154 +securityresearch 1 1 6.957497 6.957497 15155 +challengespres 1 1 6.957497 6.957497 15156 +theseenviron 1 1 6.957497 6.957497 15157 +daunt 1 1 6.957497 6.957497 15158 +projectacadem 1 1 6.957497 6.957497 15159 +achievementsinstructor 1 1 6.957497 6.957497 15160 +collegeinstructor 1 1 6.957497 6.957497 15161 +minoritystud 1 1 6.957497 6.957497 15162 +engineeringoutstand 1 1 6.957497 6.957497 15163 +cnnfinanciala 1 1 6.957497 6.957497 15164 +javaw 1 1 6.957497 6.957497 15165 +weathersbyderrick 1 1 6.957497 6.957497 15166 +edutu 1 1 6.957497 6.957497 15167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..365ee8eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +book 1 99 2.302585 2.302585 131 +imag 1 91 2.397895 2.397895 161 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +know 1 80 2.564949 2.564949 198 +june 1 79 2.564949 2.564949 214 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +tuesdai 1 73 2.639057 2.639057 219 +goal 1 66 2.708050 2.708050 250 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +finger 1 52 2.995732 2.995732 354 +much 1 52 2.995732 2.995732 349 +format 1 48 3.044522 3.044522 356 +frequent 1 49 3.044522 3.044522 367 +possibl 1 47 3.091042 3.091042 378 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +late 1 40 3.258097 3.258097 439 +brian 1 38 3.295837 3.295837 466 +feel 1 37 3.332205 3.332205 483 +china 1 37 3.332205 3.332205 487 +game 1 36 3.367296 3.367296 498 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +return 1 34 3.401197 3.401197 502 +bibliographi 1 34 3.401197 3.401197 518 +statist 1 35 3.401197 3.401197 521 +anim 1 31 3.496508 3.496508 557 +progress 1 28 3.610918 3.610918 598 +color 1 22 3.850148 3.850148 762 +period 1 22 3.850148 3.850148 743 +feedback 1 19 4.007333 4.007333 854 +histori 1 19 4.007333 4.007333 853 +encourag 1 18 4.060443 4.060443 880 +edulast 1 17 4.110874 4.110874 927 +letter 1 16 4.174387 4.174387 981 +alreadi 1 16 4.174387 4.174387 963 +trip 1 14 4.317488 4.317488 1113 +hopefulli 1 14 4.317488 4.317488 1071 +draft 1 14 4.317488 4.317488 1085 +near 1 14 4.317488 4.317488 1091 +carri 1 13 4.382027 4.382027 1152 +readabl 1 12 4.465908 4.465908 1258 +bike 1 10 4.653960 4.653960 1468 +death 1 10 4.653960 4.653960 1457 +drink 1 9 4.753590 4.753590 1607 +ride 1 8 4.875197 4.875197 1741 +blue 1 6 5.164786 5.164786 2227 +seen 1 6 5.164786 5.164786 2202 +scienceand 1 5 5.347108 5.347108 2348 +atlant 1 5 5.347108 5.347108 2508 +semi 1 5 5.347108 5.347108 2510 +chaotic 1 5 5.347108 5.347108 2566 +pagebrian 1 4 5.568345 5.568345 3054 +ireland 1 4 5.568345 5.568345 2853 +myfavorit 1 3 5.857933 5.857933 3852 +interview 1 3 5.857933 5.857933 3324 +fascin 1 3 5.857933 5.857933 3948 +ocean 1 2 6.263398 6.263398 5375 +northern 1 2 6.263398 6.263398 5861 +shoulder 1 2 6.263398 6.263398 4750 +thecurr 1 2 6.263398 6.263398 5862 +terrorist 1 2 6.263398 6.263398 5190 +addict 1 2 6.263398 6.263398 5576 +thorough 1 2 6.263398 6.263398 4134 +dewei 1 1 6.957497 6.957497 15168 +deweyabout 1 1 6.957497 6.957497 15169 +doyou 1 1 6.957497 6.957497 15170 +ilov 1 1 6.957497 6.957497 15171 +oldroomm 1 1 6.957497 6.957497 15172 +irelandi 1 1 6.957497 6.957497 15173 +belfast 1 1 6.957497 6.957497 15174 +sixti 1 1 6.957497 6.957497 15175 +pagesof 1 1 6.957497 6.957497 15176 +luggag 1 1 6.957497 6.957497 15177 +getthos 1 1 6.957497 6.957497 15178 +enlighteningformat 1 1 6.957497 6.957497 15179 +thisproject 1 1 6.957497 6.957497 15180 +sinn 1 1 6.957497 6.957497 15181 +fein 1 1 6.957497 6.957497 15182 +injuri 1 1 6.957497 6.957497 15183 +recuri 1 1 6.957497 6.957497 15184 +ancient 1 1 6.957497 6.957497 15185 +imageek 1 1 6.957497 6.957497 15186 +cuni 1 1 6.957497 6.957497 15187 +jansteen 1 1 6.957497 6.957497 15188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..6e629725 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +mathemat 1 108 2.197225 2.197225 123 +find 1 111 2.197225 2.197225 111 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +graphic 1 90 2.397895 2.397895 147 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +homepag 1 93 2.397895 2.397895 148 +contain 1 81 2.484907 2.484907 174 +academ 1 82 2.484907 2.484907 178 +thing 1 84 2.484907 2.484907 189 +appli 1 71 2.639057 2.639057 226 +write 1 72 2.639057 2.639057 222 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +written 1 63 2.772589 2.772589 278 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +publish 1 57 2.890372 2.890372 326 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +friend 1 48 3.044522 3.044522 376 +favorit 1 44 3.135494 3.135494 410 +better 1 45 3.135494 3.135494 401 +third 1 43 3.178054 3.178054 412 +press 1 42 3.218876 3.218876 419 +futur 1 41 3.218876 3.218876 427 +probabl 1 40 3.258097 3.258097 455 +random 1 34 3.401197 3.401197 511 +given 1 32 3.465736 3.465736 538 +taken 1 31 3.496508 3.496508 555 +computersci 1 30 3.555348 3.555348 562 +depend 1 29 3.583519 3.583519 583 +ask 1 28 3.610918 3.610918 597 +mine 1 26 3.688879 3.688879 654 +seri 1 24 3.761200 3.761200 708 +fellow 1 24 3.761200 3.761200 701 +ofwashington 1 22 3.850148 3.850148 766 +identifi 1 22 3.850148 3.850148 760 +recommend 1 22 3.850148 3.850148 737 +william 1 22 3.850148 3.850148 765 +fact 1 21 3.912023 3.912023 780 +tenni 1 20 3.951244 3.951244 838 +grad 1 20 3.951244 3.951244 837 +ever 1 19 4.007333 4.007333 872 +otherwis 1 17 4.110874 4.110874 922 +anyon 1 17 4.110874 4.110874 916 +former 1 17 4.110874 4.110874 956 +whether 1 17 4.110874 4.110874 918 +chateau 1 16 4.174387 4.174387 997 +explan 1 16 4.174387 4.174387 985 +biologi 1 15 4.248495 4.248495 1049 +charact 1 15 4.248495 4.248495 1028 +role 1 14 4.317488 4.317488 1101 +pagewelcom 1 11 4.553877 4.553877 1344 +poetri 1 9 4.753590 4.753590 1596 +distanc 1 9 4.753590 4.753590 1500 +illustr 1 8 4.875197 4.875197 1679 +babylon 1 8 4.875197 4.875197 1731 +creativ 1 8 4.875197 4.875197 1777 +absolut 1 8 4.875197 4.875197 1646 +sean 1 8 4.875197 4.875197 1705 +roger 1 7 5.010635 5.010635 1892 +chronicl 1 7 5.010635 5.010635 1952 +athlet 1 7 5.010635 5.010635 1933 +fiction 1 6 5.164786 5.164786 2217 +cat 1 6 5.164786 5.164786 2194 +fantasi 1 4 5.568345 5.568345 3055 +sandi 1 4 5.568345 5.568345 2765 +portrait 1 3 5.857933 5.857933 3491 +slight 1 3 5.857933 5.857933 3894 +kwon 1 3 5.857933 5.857933 3690 +woman 1 3 5.857933 5.857933 3539 +fasulo 1 2 6.263398 6.263398 4391 +honest 1 2 6.263398 6.263398 5060 +alumnu 1 2 6.263398 6.263398 5863 +wendi 1 2 6.263398 6.263398 5864 +belluomini 1 2 6.263398 6.263398 5865 +worthwhil 1 2 6.263398 6.263398 4951 +dfasulo 1 1 6.957497 6.957497 15189 +williamscolleg 1 1 6.957497 6.957497 15191 +inaccuraci 1 1 6.957497 6.957497 15192 +eastlak 1 1 6.957497 6.957497 15193 +merlin 1 1 6.957497 6.957497 15194 +corwin 1 1 6.957497 6.957497 15195 +zelazni 1 1 6.957497 6.957497 15196 +amber 1 1 6.957497 6.957497 15190 +drpg 1 1 6.957497 6.957497 15197 +phage 1 1 6.957497 6.957497 15198 +dress 1 1 6.957497 6.957497 15199 +dogbert 1 1 6.957497 6.957497 15200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..e895c388 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +site 1 106 2.197225 2.197225 119 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +tuesdai 1 73 2.639057 2.639057 219 +java 1 70 2.708050 2.708050 248 +septemb 1 65 2.772589 2.772589 274 +plai 1 60 2.833213 2.833213 307 +favorit 1 44 3.135494 3.135494 410 +autumn 1 31 3.496508 3.496508 558 +martin 1 21 3.912023 3.912023 794 +engineeringunivers 1 17 4.110874 4.110874 959 +weekli 1 17 4.110874 4.110874 919 +script 1 13 4.382027 4.382027 1171 +engr 1 10 4.653960 4.653960 1427 +sister 1 9 4.753590 4.753590 1524 +coffe 1 5 5.347108 5.347108 2556 +eduupd 1 4 5.568345 5.568345 3056 +dickei 1 2 6.263398 6.263398 4389 +garg 1 2 6.263398 6.263398 5533 +dickeycomput 1 1 6.957497 6.957497 15201 +washingtonwelcom 1 1 6.957497 6.957497 15202 +schedulenarr 1 1 6.957497 6.957497 15203 +blurbcs 1 1 6.957497 6.957497 15204 +housesfavorit 1 1 6.957497 6.957497 15205 +bookspirograph 1 1 6.957497 6.957497 15206 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..675ad173 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,188 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +peopl 1 96 2.302585 2.302585 132 +search 1 95 2.397895 2.397895 155 +present 1 91 2.397895 2.397895 145 +larg 1 82 2.484907 2.484907 168 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +issu 1 78 2.564949 2.564949 211 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +differ 1 66 2.708050 2.708050 253 +sieg 1 69 2.708050 2.708050 260 +would 1 67 2.708050 2.708050 251 +polici 1 64 2.772589 2.772589 279 +guid 1 63 2.772589 2.772589 267 +plan 1 65 2.772589 2.772589 272 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +pointer 1 48 3.044522 3.044522 368 +appoint 1 49 3.044522 3.044522 358 +still 1 50 3.044522 3.044522 362 +without 1 50 3.044522 3.044522 370 +effect 1 46 3.091042 3.091042 385 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +execut 1 45 3.135494 3.135494 404 +around 1 43 3.178054 3.178054 415 +show 1 43 3.178054 3.178054 417 +cach 1 41 3.218876 3.218876 432 +map 1 39 3.258097 3.258097 452 +realli 1 40 3.258097 3.258097 444 +live 1 40 3.258097 3.258097 451 +brian 1 38 3.295837 3.295837 466 +next 1 34 3.401197 3.401197 517 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +idea 1 32 3.465736 3.465736 545 +consid 1 29 3.583519 3.583519 590 +mind 1 27 3.637586 3.637586 632 +rather 1 26 3.688879 3.688879 642 +enjoi 1 26 3.688879 3.688879 660 +primari 1 25 3.737670 3.737670 669 +concern 1 25 3.737670 3.737670 666 +never 1 25 3.737670 3.737670 671 +yahoo 1 24 3.761200 3.761200 707 +alwai 1 24 3.761200 3.761200 691 +begin 1 23 3.806662 3.806662 716 +thread 1 23 3.806662 3.806662 722 +initi 1 23 3.806662 3.806662 717 +decis 1 23 3.806662 3.806662 728 +almost 1 22 3.850148 3.850148 742 +sort 1 22 3.850148 3.850148 738 +chen 1 21 3.912023 3.912023 791 +avoid 1 21 3.912023 3.912023 799 +among 1 21 3.912023 3.912023 781 +miss 1 19 4.007333 4.007333 866 +lyco 1 19 4.007333 4.007333 871 +definit 1 19 4.007333 4.007333 864 +bershad 1 18 4.060443 4.060443 902 +four 1 18 4.060443 4.060443 905 +element 1 18 4.060443 4.060443 895 +event 1 18 4.060443 4.060443 896 +asplo 1 17 4.110874 4.110874 948 +otherwis 1 17 4.110874 4.110874 922 +whole 1 17 4.110874 4.110874 940 +choic 1 16 4.174387 4.174387 979 +stream 1 15 4.248495 4.248495 1015 +draw 1 14 4.317488 4.317488 1086 +manner 1 14 4.317488 4.317488 1074 +alan 1 13 4.382027 4.382027 1146 +hotlist 1 13 4.382027 4.382027 1199 +brad 1 12 4.465908 4.465908 1264 +denni 1 11 4.553877 4.553877 1321 +smart 1 11 4.553877 4.553877 1352 +baer 1 11 4.553877 4.553877 1353 +isca 1 11 4.553877 4.553877 1354 +moment 1 11 4.553877 4.553877 1379 +magic 1 11 4.553877 4.553877 1358 +osdi 1 9 4.753590 4.753590 1534 +yellow 1 9 4.753590 4.753590 1601 +russel 1 9 4.753590 4.753590 1507 +occur 1 9 4.753590 4.753590 1572 +romer 1 8 4.875197 4.875197 1706 +qualifi 1 8 4.875197 4.875197 1721 +entri 1 8 4.875197 4.875197 1678 +gold 1 8 4.875197 4.875197 1745 +rais 1 8 4.875197 4.875197 1711 +baker 1 7 5.010635 5.010635 1812 +chanc 1 7 5.010635 5.010635 1960 +loup 1 6 5.164786 5.164786 2228 +conflict 1 6 5.164786 5.164786 2041 +presid 1 6 5.164786 5.164786 2196 +commit 1 6 5.164786 5.164786 2233 +truth 1 6 5.164786 5.164786 2179 +dream 1 6 5.164786 5.164786 2165 +whatev 1 6 5.164786 5.164786 2097 +fetch 1 5 5.347108 5.347108 2567 +million 1 5 5.347108 5.347108 2495 +lesson 1 5 5.347108 5.347108 2568 +ignor 1 5 5.347108 5.347108 2288 +favor 1 5 5.347108 5.347108 2414 +vote 1 4 5.568345 5.568345 2953 +ford 1 4 5.568345 5.568345 2636 +kill 1 4 5.568345 5.568345 3000 +countless 1 4 5.568345 5.568345 3020 +energi 1 3 5.857933 5.857933 3950 +specul 1 3 5.857933 5.857933 3951 +mappedcach 1 3 5.857933 5.857933 3928 +reorder 1 3 5.857933 5.857933 3952 +dlee 1 3 5.857933 5.857933 3949 +evil 1 3 5.857933 5.857933 3943 +act 1 3 5.857933 5.857933 3557 +researchwith 1 2 6.263398 6.263398 5594 +eustac 1 2 6.263398 6.263398 5866 +dirk 1 2 6.263398 6.263398 5665 +andt 1 2 6.263398 6.263398 5121 +resolutionon 1 2 6.263398 6.263398 5867 +nixon 1 2 6.263398 6.263398 5868 +court 1 2 6.263398 6.263398 4870 +silver 1 2 6.263398 6.263398 5374 +theblack 1 2 6.263398 6.263398 5869 +hesit 1 2 6.263398 6.263398 5774 +incid 1 2 6.263398 6.263398 5870 +goeth 1 2 6.263398 6.263398 5366 +likejean 1 1 6.957497 6.957497 15210 +calder 1 1 6.957497 6.957497 15207 +grunwald 1 1 6.957497 6.957497 15208 +enginefor 1 1 6.957497 6.957497 15211 +sensibl 1 1 6.957497 6.957497 15212 +conced 1 1 6.957497 6.957497 15213 +thatpolit 1 1 6.957497 6.957497 15214 +lesser 1 1 6.957497 6.957497 15215 +tweedledumand 1 1 6.957497 6.957497 15216 +tweedlede 1 1 6.957497 6.957497 15217 +abstain 1 1 6.957497 6.957497 15218 +theyar 1 1 6.957497 6.957497 15219 +torummag 1 1 6.957497 6.957497 15220 +allth 1 1 6.957497 6.957497 15221 +stew 1 1 6.957497 6.957497 15222 +huberthumphrei 1 1 6.957497 6.957497 15209 +humphrei 1 1 6.957497 6.957497 15223 +suprem 1 1 6.957497 6.957497 15224 +whentricia 1 1 6.957497 6.957497 15225 +flummeri 1 1 6.957497 6.957497 15226 +ineffect 1 1 6.957497 6.957497 15227 +splendid 1 1 6.957497 6.957497 15228 +unforeseen 1 1 6.957497 6.957497 15229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..27ae9a92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +databas 1 122 2.079442 2.079442 86 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +homepag 1 93 2.397895 2.397895 148 +member 1 84 2.484907 2.484907 165 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +much 1 52 2.995732 2.995732 349 +date 1 51 2.995732 2.995732 344 +anoth 1 45 3.135494 3.135494 408 +math 1 44 3.135494 3.135494 402 +execut 1 45 3.135494 3.135494 404 +edit 1 42 3.218876 3.218876 418 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +usual 1 28 3.610918 3.610918 608 +intend 1 28 3.610918 3.610918 599 +actual 1 28 3.610918 3.610918 604 +todai 1 25 3.737670 3.737670 672 +background 1 25 3.737670 3.737670 664 +brows 1 23 3.806662 3.806662 726 +ofwashington 1 22 3.850148 3.850148 766 +sure 1 20 3.951244 3.951244 813 +grad 1 20 3.951244 3.951244 837 +anywai 1 15 4.248495 4.248495 1047 +club 1 15 4.248495 4.248495 1058 +player 1 11 4.553877 4.553877 1371 +michigan 1 11 4.553877 4.553877 1368 +strongli 1 10 4.653960 4.653960 1406 +death 1 10 4.653960 4.653960 1457 +mention 1 9 4.753590 4.753590 1569 +doug 1 9 4.753590 4.753590 1517 +unusu 1 9 4.753590 4.753590 1566 +vice 1 9 4.753590 4.753590 1604 +charg 1 9 4.753590 4.753590 1582 +screen 1 9 4.753590 4.753590 1577 +sister 1 9 4.753590 4.753590 1524 +ground 1 7 5.010635 5.010635 1955 +hit 1 7 5.010635 5.010635 1965 +lucki 1 6 5.164786 5.164786 2163 +presid 1 6 5.164786 5.164786 2196 +emerg 1 6 5.164786 5.164786 2038 +highwai 1 6 5.164786 5.164786 2095 +unnecessari 1 5 5.347108 5.347108 2506 +wast 1 5 5.347108 5.347108 2537 +keeper 1 5 5.347108 5.347108 2569 +bryan 1 5 5.347108 5.347108 2421 +worst 1 5 5.347108 5.347108 2287 +exhaust 1 4 5.568345 5.568345 2825 +engineeringdepart 1 4 5.568345 5.568345 2917 +backup 1 4 5.568345 5.568345 2645 +gear 1 4 5.568345 5.568345 2891 +bold 1 3 5.857933 5.857933 3846 +treasur 1 3 5.857933 5.857933 3229 +sit 1 3 5.857933 5.857933 3953 +tast 1 3 5.857933 5.857933 3666 +labor 1 3 5.857933 5.857933 3195 +casual 1 2 6.263398 6.263398 4542 +buti 1 2 6.263398 6.263398 4775 +meth 1 2 6.263398 6.263398 5872 +useless 1 2 6.263398 6.263398 5564 +caveat 1 2 6.263398 6.263398 4975 +sarcasm 1 2 6.263398 6.263398 5871 +apprentic 1 2 6.263398 6.263398 5873 +cart 1 2 6.263398 6.263398 5874 +stolen 1 2 6.263398 6.263398 5803 +pagececi 1 1 6.957497 6.957497 15231 +noless 1 1 6.957497 6.957497 15232 +zongker 1 1 6.957497 6.957497 15230 +classeshow 1 1 6.957497 6.957497 15233 +toxic 1 1 6.957497 6.957497 15234 +custard 1 1 6.957497 6.957497 15235 +filesth 1 1 6.957497 6.957497 15236 +mento 1 1 6.957497 6.957497 15237 +galleryvisit 1 1 6.957497 6.957497 15238 +supercolliderth 1 1 6.957497 6.957497 15239 +cron 1 1 6.957497 6.957497 15240 +avirtu 1 1 6.957497 6.957497 15241 +trove 1 1 6.957497 6.957497 15242 +whichmai 1 1 6.957497 6.957497 15243 +imajor 1 1 6.957497 6.957497 15244 +andminor 1 1 6.957497 6.957497 15245 +dubiou 1 1 6.957497 6.957497 15246 +honorsjunior 1 1 6.957497 6.957497 15247 +brotherhood 1 1 6.957497 6.957497 15248 +crouton 1 1 6.957497 6.957497 15249 +pizzicato 1 1 6.957497 6.957497 15250 +clicker 1 1 6.957497 6.957497 15251 +cruis 1 1 6.957497 6.957497 15252 +inhigh 1 1 6.957497 6.957497 15253 +buttstar 1 1 6.957497 6.957497 15254 +wheremi 1 1 6.957497 6.957497 15255 +dougz 1 1 6.957497 6.957497 15256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..158ea899 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +help 1 83 2.484907 2.484907 175 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +addit 1 74 2.639057 2.639057 228 +differ 1 66 2.708050 2.708050 253 +java 1 70 2.708050 2.708050 248 +interact 1 62 2.772589 2.772589 270 +polici 1 64 2.772589 2.772589 279 +experi 1 64 2.772589 2.772589 283 +virtual 1 62 2.772589 2.772589 285 +improv 1 62 2.772589 2.772589 289 +thesi 1 57 2.890372 2.890372 327 +allow 1 53 2.944439 2.944439 333 +investig 1 51 2.995732 2.995732 353 +done 1 47 3.091042 3.091042 381 +quarter 1 47 3.091042 3.091042 389 +mechan 1 43 3.178054 3.178054 416 +seminar 1 38 3.295837 3.295837 470 +slide 1 38 3.295837 3.295837 467 +winter 1 36 3.367296 3.367296 500 +jame 1 35 3.401197 3.401197 507 +idea 1 32 3.465736 3.465736 545 +built 1 29 3.583519 3.583519 592 +demonstr 1 24 3.761200 3.761200 694 +thread 1 23 3.806662 3.806662 722 +lead 1 23 3.806662 3.806662 718 +kernel 1 20 3.951244 3.951244 825 +concentr 1 18 4.060443 4.060443 906 +spin 1 14 4.317488 4.317488 1121 +carri 1 13 4.382027 4.382027 1152 +signific 1 13 4.382027 4.382027 1125 +folk 1 9 4.753590 4.753590 1597 +voelker 1 9 4.753590 4.753590 1557 +dylan 1 8 4.875197 4.875197 1625 +slightli 1 7 5.010635 5.010635 1795 +dedic 1 7 5.010635 5.010635 1843 +gave 1 7 5.010635 5.010635 1922 +geoff 1 6 5.164786 5.164786 2124 +caus 1 5 5.347108 5.347108 2298 +commod 1 5 5.347108 5.347108 2415 +poorli 1 4 5.568345 5.568345 2781 +opal 1 4 5.568345 5.568345 3057 +oodb 1 3 5.857933 5.857933 3954 +mcname 1 2 6.263398 6.263398 5875 +properli 1 2 6.263398 6.263398 5454 +architecturethat 1 2 6.263398 6.263398 5876 +applicationsand 1 1 6.957497 6.957497 15257 +mechanismthat 1 1 6.957497 6.957497 15258 +replacementpolici 1 1 6.957497 6.957497 15259 +machoper 1 1 6.957497 6.957497 15260 +thathelp 1 1 6.957497 6.957497 15261 +kernelthread 1 1 6.957497 6.957497 15262 +tailor 1 1 6.957497 6.957497 15263 +betweenobject 1 1 6.957497 6.957497 15264 +achiv 1 1 6.957497 6.957497 15265 +improvementscan 1 1 6.957497 6.957497 15266 +betterserv 1 1 6.957497 6.957497 15267 +paperscv 1 1 6.957497 6.957497 15268 +lectureintroduc 1 1 6.957497 6.957497 15269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..6302b297 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +seattl 1 120 2.079442 2.079442 103 +make 1 111 2.197225 2.197225 120 +mondai 1 77 2.564949 2.564949 206 +sieg 1 69 2.708050 2.708050 260 +dept 1 64 2.772589 2.772589 291 +wednesdai 1 64 2.772589 2.772589 261 +septemb 1 65 2.772589 2.772589 274 +could 1 46 3.091042 3.091042 383 +tent 1 22 3.850148 3.850148 739 +thur 1 19 4.007333 4.007333 847 +hello 1 10 4.653960 4.653960 1407 +lewi 1 8 4.875197 4.875197 1700 +christoph 1 5 5.347108 5.347108 2512 +glad 1 4 5.568345 5.568345 2657 +echri 1 1 6.957497 6.957497 15270 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..13e02af1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +blank 1 3 5.857933 5.857933 3379 +ecrock 1 1 6.957497 6.957497 15271 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..eab45018 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +site 1 106 2.197225 2.197225 119 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +book 1 99 2.302585 2.302585 131 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +help 1 83 2.484907 2.484907 175 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +dynam 1 76 2.564949 2.564949 194 +want 1 79 2.564949 2.564949 199 +come 1 78 2.564949 2.564949 202 +line 1 75 2.639057 2.639057 231 +free 1 73 2.639057 2.639057 224 +name 1 72 2.639057 2.639057 220 +august 1 66 2.708050 2.708050 257 +plan 1 65 2.772589 2.772589 272 +creat 1 63 2.772589 2.772589 277 +visit 1 63 2.772589 2.772589 288 +import 1 65 2.772589 2.772589 282 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +variou 1 56 2.890372 2.890372 317 +think 1 57 2.890372 2.890372 314 +found 1 53 2.944439 2.944439 337 +give 1 50 3.044522 3.044522 359 +friend 1 48 3.044522 3.044522 376 +anoth 1 45 3.135494 3.135494 408 +keep 1 44 3.135494 3.135494 409 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +littl 1 39 3.258097 3.258097 454 +form 1 39 3.258097 3.258097 443 +theoret 1 39 3.258097 3.258097 446 +industri 1 38 3.295837 3.295837 464 +hand 1 37 3.332205 3.332205 475 +power 1 30 3.555348 3.555348 573 +domain 1 30 3.555348 3.555348 564 +sometim 1 24 3.761200 3.761200 696 +alwai 1 24 3.761200 3.761200 691 +busi 1 21 3.912023 3.912023 784 +fact 1 21 3.912023 3.912023 780 +stand 1 18 4.060443 4.060443 891 +macintosh 1 17 4.110874 4.110874 920 +qual 1 15 4.248495 4.248495 1062 +countri 1 15 4.248495 4.248495 1059 +hong 1 14 4.317488 4.317488 1105 +karlin 1 13 4.382027 4.382027 1176 +anna 1 12 4.465908 4.465908 1292 +appl 1 11 4.553877 4.553877 1303 +guess 1 10 4.653960 4.653960 1443 +traffic 1 10 4.653960 4.653960 1421 +plain 1 9 4.753590 4.753590 1495 +sister 1 9 4.753590 4.753590 1524 +chamber 1 8 4.875197 4.875197 1692 +craig 1 7 5.010635 5.010635 1879 +happen 1 7 5.010635 5.010635 1790 +histor 1 6 5.164786 5.164786 2085 +mac 1 5 5.347108 5.347108 2292 +advic 1 5 5.347108 5.347108 2509 +insight 1 4 5.568345 5.568345 3024 +eddi 1 3 5.857933 5.857933 3896 +studentat 1 2 6.263398 6.263398 5877 +seig 1 2 6.263398 6.263398 4462 +commentari 1 2 6.263398 6.263398 4287 +bias 1 2 6.263398 6.263398 5033 +tosai 1 1 6.957497 6.957497 15272 +hadto 1 1 6.957497 6.957497 15273 +postcriptand 1 1 6.957497 6.957497 15274 +workingon 1 1 6.957497 6.957497 15275 +fordynam 1 1 6.957497 6.957497 15276 +activit 1 1 6.957497 6.957497 15277 +vine 1 1 6.957497 6.957497 15278 +branchesmi 1 1 6.957497 6.957497 15279 +knowof 1 1 6.957497 6.957497 15280 +daveneti 1 1 6.957497 6.957497 15281 +towardslik 1 1 6.957497 6.957497 15282 +eveneasi 1 1 6.957497 6.957497 15283 +freewai 1 1 6.957497 6.957497 15284 +worldher 1 1 6.957497 6.957497 15285 +edhong 1 1 6.957497 6.957497 15286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..80463b04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +memori 1 101 2.302585 2.302585 139 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +dynam 1 76 2.564949 2.564949 194 +sieg 1 69 2.708050 2.708050 260 +degre 1 69 2.708050 2.708050 259 +new 1 64 2.772589 2.772589 262 +back 1 60 2.833213 2.833213 297 +share 1 59 2.833213 2.833213 304 +thesi 1 57 2.890372 2.890372 327 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +http 1 41 3.218876 3.218876 420 +cach 1 41 3.218876 3.218876 432 +might 1 41 3.218876 3.218876 426 +committe 1 34 3.401197 3.401197 522 +computersci 1 30 3.555348 3.555348 562 +miscellan 1 23 3.806662 3.806662 731 +begin 1 23 3.806662 3.806662 716 +emphasi 1 22 3.850148 3.850148 755 +reduc 1 22 3.850148 3.850148 759 +voic 1 21 3.912023 3.912023 806 +asplo 1 17 4.110874 4.110874 948 +susan 1 15 4.248495 4.248495 1050 +qual 1 15 4.248495 4.248495 1062 +coher 1 14 4.317488 4.317488 1109 +workload 1 12 4.465908 4.465908 1210 +multithread 1 11 4.553877 4.553877 1315 +egger 1 8 4.875197 4.875197 1695 +prefetch 1 6 5.164786 5.164786 2039 +fals 1 4 5.568345 5.568345 2861 +amast 1 3 5.857933 5.857933 3955 +onexperiment 1 1 6.957497 6.957497 15287 +incompil 1 1 6.957497 6.957497 15288 +optimizationsand 1 1 6.957497 6.957497 15289 +multithreadedarchitectur 1 1 6.957497 6.957497 15290 +spinprevi 1 1 6.957497 6.957497 15291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..cc4098c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +activ 1 84 2.484907 2.484907 182 +educ 1 86 2.484907 2.484907 191 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +simul 1 66 2.708050 2.708050 255 +prof 1 64 2.772589 2.772589 273 +experi 1 64 2.772589 2.772589 283 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +summer 1 56 2.890372 2.890372 311 +extens 1 53 2.944439 2.944439 340 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +describ 1 45 3.135494 3.135494 400 +third 1 43 3.178054 3.178054 412 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +review 1 42 3.218876 3.218876 425 +brian 1 38 3.295837 3.295837 466 +prototyp 1 38 3.295837 3.295837 463 +slide 1 38 3.295837 3.295837 467 +mean 1 37 3.332205 3.332205 477 +fault 1 32 3.465736 3.465736 547 +independ 1 32 3.465736 3.465736 548 +posit 1 31 3.496508 3.496508 552 +domain 1 30 3.555348 3.555348 564 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +load 1 28 3.610918 3.610918 601 +measur 1 28 3.610918 3.610918 609 +symbol 1 27 3.637586 3.637586 620 +compar 1 26 3.688879 3.688879 648 +toward 1 25 3.737670 3.737670 668 +spent 1 25 3.737670 3.737670 676 +aspect 1 25 3.737670 3.737670 663 +displai 1 23 3.806662 3.806662 712 +thread 1 23 3.806662 3.806662 722 +miscellan 1 23 3.806662 3.806662 731 +reduc 1 22 3.850148 3.850148 759 +path 1 21 3.912023 3.912023 778 +wrote 1 20 3.951244 3.951244 830 +kernel 1 20 3.951244 3.951244 825 +fine 1 20 3.951244 3.951244 822 +safeti 1 20 3.951244 3.951244 817 +benchmark 1 19 4.007333 4.007333 859 +bershad 1 18 4.060443 4.060443 902 +protect 1 17 4.110874 4.110874 935 +coupl 1 17 4.110874 4.110874 939 +latenc 1 16 4.174387 4.174387 993 +princeton 1 15 4.248495 4.248495 1042 +novel 1 15 4.248495 4.248495 1039 +overhead 1 15 4.248495 4.248495 1035 +enough 1 15 4.248495 4.248495 1040 +spin 1 14 4.317488 4.317488 1121 +achiev 1 14 4.317488 4.317488 1088 +senior 1 14 4.317488 4.317488 1120 +opportun 1 13 4.382027 4.382027 1161 +safe 1 12 4.465908 4.465908 1274 +robust 1 12 4.465908 4.465908 1271 +arbitrari 1 11 4.553877 4.553877 1359 +grain 1 10 4.653960 4.653960 1448 +sosp 1 10 4.653960 4.653960 1416 +ski 1 10 4.653960 4.653960 1471 +jersei 1 9 4.753590 4.753590 1587 +modula 1 9 4.753590 4.753590 1613 +grew 1 8 4.875197 4.875197 1742 +mach 1 8 4.875197 4.875197 1669 +isol 1 8 4.875197 4.875197 1663 +spec 1 8 4.875197 4.875197 1640 +sigop 1 8 4.875197 4.875197 1727 +european 1 8 4.875197 4.875197 1763 +dylan 1 8 4.875197 4.875197 1625 +interrupt 1 7 5.010635 5.010635 1793 +bell 1 6 5.164786 5.164786 2224 +subsystem 1 6 5.164786 5.164786 2015 +simultan 1 6 5.164786 5.164786 2155 +spinproject 1 5 5.347108 5.347108 2570 +compat 1 5 5.347108 5.347108 2485 +sail 1 5 5.347108 5.347108 2571 +outdoor 1 5 5.347108 5.347108 2514 +turkei 1 4 5.568345 5.568345 2914 +hide 1 4 5.568345 5.568345 2996 +mip 1 4 5.568345 5.568345 2738 +microkernel 1 4 5.568345 5.568345 3047 +thesumm 1 3 5.857933 5.857933 3763 +wcsss 1 3 5.857933 5.857933 3956 +namespac 1 3 5.857933 5.857933 3957 +hoto 1 3 5.857933 5.857933 3577 +arizona 1 3 5.857933 5.857933 3700 +dive 1 3 5.857933 5.857933 3654 +emin 1 2 6.263398 6.263398 5810 +sirer 1 2 6.263398 6.263398 5811 +backgroundi 1 2 6.263398 6.263398 5878 +andsom 1 2 6.263398 6.263398 5483 +schedulingpolici 1 2 6.263398 6.263398 5879 +strand 1 2 6.263398 6.263398 5880 +ofnew 1 2 6.263398 6.263398 5881 +mipsi 1 2 6.263398 6.263398 5882 +tucson 1 2 6.263398 6.263398 5883 +cloth 1 2 6.263398 6.263398 5884 +sirereg 1 1 6.957497 6.957497 15292 +istanbul 1 1 6.957497 6.957497 15293 +labswork 1 1 6.957497 6.957497 15294 +theplan 1 1 6.957497 6.957497 15295 +thevesta 1 1 6.957497 6.957497 15296 +projectsmi 1 1 6.957497 6.957497 15297 +andprotect 1 1 6.957497 6.957497 15298 +specificaspect 1 1 6.957497 6.957497 15299 +alarm 1 1 6.957497 6.957497 15300 +ofextend 1 1 6.957497 6.957497 15301 +allowsu 1 1 6.957497 6.957497 15302 +isdesign 1 1 6.957497 6.957497 15303 +allowsisol 1 1 6.957497 6.957497 15304 +withconflict 1 1 6.957497 6.957497 15305 +beassur 1 1 6.957497 6.957497 15306 +clincher 1 1 6.957497 6.957497 15307 +extensionsthat 1 1 6.957497 6.957497 15308 +protectionenforc 1 1 6.957497 6.957497 15309 +performanceweb 1 1 6.957497 6.957497 15310 +networkingstack 1 1 6.957497 6.957497 15311 +andminim 1 1 6.957497 6.957497 15312 +calledmipsi 1 1 6.957497 6.957497 15313 +researchplatform 1 1 6.957497 6.957497 15314 +featuresand 1 1 6.957497 6.957497 15315 +talkslanguag 1 1 6.957497 6.957497 15316 +interestswhenev 1 1 6.957497 6.957497 15317 +windsurf 1 1 6.957497 6.957497 15318 +bikingmak 1 1 6.957497 6.957497 15319 +andhik 1 1 6.957497 6.957497 15320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..eb527530 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +first 1 140 1.945910 1.945910 71 +seattl 1 120 2.079442 2.079442 103 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +section 1 94 2.397895 2.397895 149 +solut 1 82 2.484907 2.484907 162 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +method 1 80 2.564949 2.564949 213 +write 1 72 2.639057 2.639057 222 +involv 1 71 2.639057 2.639057 227 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +automat 1 61 2.833213 2.833213 306 +thesi 1 57 2.890372 2.890372 327 +particular 1 51 2.995732 2.995732 352 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +could 1 46 3.091042 3.091042 383 +third 1 43 3.178054 3.178054 412 +music 1 42 3.218876 3.218876 436 +probabl 1 40 3.258097 3.258097 455 +realli 1 40 3.258097 3.258097 444 +small 1 39 3.258097 3.258097 447 +mean 1 37 3.332205 3.332205 477 +idea 1 32 3.465736 3.465736 545 +richard 1 31 3.496508 3.496508 559 +power 1 30 3.555348 3.555348 573 +weather 1 28 3.610918 3.610918 618 +mind 1 27 3.637586 3.637586 632 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +try 1 22 3.850148 3.850148 764 +finish 1 22 3.850148 3.850148 748 +fact 1 21 3.912023 3.912023 780 +longer 1 20 3.951244 3.951244 816 +applet 1 20 3.951244 3.951244 827 +eric 1 19 4.007333 4.007333 870 +anderson 1 19 4.007333 4.007333 860 +els 1 19 4.007333 4.007333 843 +mostli 1 19 4.007333 4.007333 869 +prove 1 19 4.007333 4.007333 848 +matrix 1 17 4.110874 4.110874 933 +sept 1 17 4.110874 4.110874 952 +fourth 1 16 4.174387 4.174387 999 +spatial 1 16 4.174387 4.174387 988 +spars 1 16 4.174387 4.174387 989 +qual 1 15 4.248495 4.248495 1062 +nonlinear 1 14 4.317488 4.317488 1107 +step 1 13 4.382027 4.382027 1138 +care 1 13 4.382027 4.382027 1177 +resolut 1 13 4.382027 4.382027 1172 +necessari 1 13 4.382027 4.382027 1147 +bodi 1 13 4.382027 4.382027 1178 +karlin 1 13 4.382027 4.382027 1176 +brother 1 13 4.382027 4.382027 1189 +speech 1 12 4.465908 4.465908 1222 +anna 1 12 4.465908 4.465908 1292 +black 1 10 4.653960 4.653960 1418 +henri 1 10 4.653960 4.653960 1417 +explicit 1 9 4.753590 4.753590 1525 +newton 1 7 5.010635 5.010635 1824 +signal 1 7 5.010635 5.010635 1910 +commit 1 6 5.164786 5.164786 2233 +duke 1 6 5.164786 5.164786 2231 +restrict 1 6 5.164786 5.164786 2129 +transcript 1 6 5.164786 5.164786 2067 +stabil 1 5 5.347108 5.347108 2286 +aim 1 5 5.347108 5.347108 2477 +isth 1 5 5.347108 5.347108 2532 +everybodi 1 5 5.347108 5.347108 2517 +snapshot 1 5 5.347108 5.347108 2303 +implicit 1 4 5.568345 5.568345 2830 +backward 1 4 5.568345 5.568345 2638 +freedom 1 3 5.857933 5.857933 3890 +euler 1 3 5.857933 5.857933 3174 +interplai 1 3 5.857933 5.857933 3726 +astrophys 1 3 5.857933 5.857933 3936 +aclu 1 2 6.263398 6.263398 5227 +reno 1 2 6.263398 6.263398 5228 +mere 1 2 6.263398 6.263398 5340 +panic 1 2 6.263398 6.263398 5682 +criterion 1 2 6.263398 6.263398 5885 +acoust 1 2 6.263398 6.263398 5355 +musician 1 2 6.263398 6.263398 5718 +preparedfor 1 2 6.263398 6.263398 5886 +meander 1 2 6.263398 6.263398 5887 +andersonwher 1 1 6.957497 6.957497 15321 +decisionin 1 1 6.957497 6.957497 15322 +thedecis 1 1 6.957497 6.957497 15323 +interim 1 1 6.957497 6.957497 15324 +feloni 1 1 6.957497 6.957497 15325 +themarketplac 1 1 6.957497 6.957497 15326 +imostli 1 1 6.957497 6.957497 15327 +greensideof 1 1 6.957497 6.957497 15328 +onsteadi 1 1 6.957497 6.957497 15329 +biharmon 1 1 6.957497 6.957497 15330 +timesteppingmethod 1 1 6.957497 6.957497 15331 +analysisissu 1 1 6.957497 6.957497 15332 +nonlinearequ 1 1 6.957497 6.957497 15333 +newtonstep 1 1 6.957497 6.957497 15334 +spiffi 1 1 6.957497 6.957497 15335 +structuresbi 1 1 6.957497 6.957497 15336 +andersoni 1 1 6.957497 6.957497 15337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..d94e5fcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +machin 1 129 2.079442 2.079442 95 +databas 1 122 2.079442 2.079442 86 +world 1 115 2.197225 2.197225 126 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +internet 1 83 2.484907 2.484907 186 +learn 1 86 2.484907 2.484907 170 +second 1 81 2.484907 2.484907 166 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +resourc 1 81 2.484907 2.484907 172 +contain 1 81 2.484907 2.484907 174 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +servic 1 72 2.639057 2.639057 236 +effici 1 73 2.639057 2.639057 233 +html 1 75 2.639057 2.639057 235 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +knowledg 1 67 2.708050 2.708050 243 +plan 1 65 2.772589 2.772589 272 +room 1 59 2.833213 2.833213 301 +juli 1 60 2.833213 2.833213 305 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +unix 1 58 2.890372 2.890372 308 +undergradu 1 54 2.944439 2.944439 338 +without 1 50 3.044522 3.044522 370 +move 1 47 3.091042 3.091042 382 +understand 1 47 3.091042 3.091042 384 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +field 1 37 3.332205 3.332205 482 +multi 1 36 3.367296 3.367296 493 +robot 1 36 3.367296 3.367296 497 +statist 1 35 3.401197 3.401197 521 +human 1 32 3.465736 3.465736 546 +photo 1 31 3.496508 3.496508 561 +robert 1 30 3.555348 3.555348 567 +neural 1 30 3.555348 3.555348 578 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +request 1 26 3.688879 3.688879 635 +relev 1 26 3.688879 3.688879 637 +magazin 1 24 3.761200 3.761200 704 +greg 1 24 3.761200 3.761200 695 +ofwashington 1 22 3.850148 3.850148 766 +william 1 22 3.850148 3.850148 765 +voic 1 21 3.912023 3.912023 806 +fact 1 21 3.912023 3.912023 780 +expert 1 20 3.951244 3.951244 833 +comparison 1 19 4.007333 4.007333 863 +agent 1 18 4.060443 4.060443 910 +white 1 17 4.110874 4.110874 951 +analyz 1 17 4.110874 4.110874 925 +repositori 1 17 4.110874 4.110874 932 +brief 1 16 4.174387 4.174387 1001 +choic 1 16 4.174387 4.174387 979 +indic 1 15 4.248495 4.248495 1013 +jonathan 1 13 4.382027 4.382027 1174 +food 1 12 4.465908 4.465908 1285 +bruce 1 12 4.465908 4.465908 1226 +count 1 12 4.465908 4.465908 1239 +stephen 1 11 4.553877 4.553877 1342 +induct 1 11 4.553877 4.553877 1304 +metacrawl 1 10 4.653960 4.653960 1455 +shop 1 10 4.653960 4.653960 1469 +packard 1 10 4.653960 4.653960 1444 +sound 1 9 4.753590 4.753590 1605 +incomplet 1 9 4.753590 4.753590 1575 +accomplish 1 8 4.875197 4.875197 1755 +satisfi 1 8 4.875197 4.875197 1694 +gather 1 8 4.875197 4.875197 1719 +aaai 1 8 4.875197 4.875197 1750 +autonom 1 8 4.875197 4.875197 1749 +erik 1 8 4.875197 4.875197 1701 +hewlett 1 8 4.875197 4.875197 1709 +irvin 1 8 4.875197 4.875197 1660 +softbot 1 7 5.010635 5.010635 1974 +sparc 1 7 5.010635 5.010635 1860 +foc 1 7 5.010635 5.010635 1880 +planner 1 7 5.010635 5.010635 1797 +golden 1 7 5.010635 5.010635 1962 +hunt 1 7 5.010635 5.010635 1798 +discoveri 1 7 5.010635 5.010635 1915 +illinoi 1 7 5.010635 5.010635 1941 +usenet 1 7 5.010635 5.010635 1839 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +fiction 1 6 5.164786 5.164786 2217 +forecast 1 6 5.164786 5.164786 2171 +brook 1 6 5.164786 5.164786 2152 +advis 1 6 5.164786 5.164786 2173 +german 1 6 5.164786 5.164786 2190 +accuraci 1 5 5.347108 5.347108 2450 +cacm 1 5 5.347108 5.347108 2388 +keith 1 5 5.347108 5.347108 2528 +selberg 1 5 5.347108 5.347108 2441 +amherst 1 5 5.347108 5.347108 2484 +disambigu 1 4 5.568345 5.568345 2899 +innov 1 4 5.568345 5.568345 2933 +chain 1 4 5.568345 5.568345 2712 +repli 1 4 5.568345 5.568345 2689 +ijcai 1 4 5.568345 5.568345 2901 +sophist 1 3 5.857933 5.857933 3545 +ahoi 1 3 5.857933 5.857933 3532 +deploi 1 3 5.857933 5.857933 3750 +neal 1 3 5.857933 5.857933 3184 +lockhe 1 3 5.857933 5.857933 3863 +faq 1 3 5.857933 5.857933 3216 +pageoren 1 2 6.263398 6.263398 5888 +pagedepart 1 2 6.263398 6.263398 5052 +anddynam 1 2 6.263398 6.263398 5889 +finalist 1 2 6.263398 6.263398 5890 +discoveraward 1 2 6.263398 6.263398 5891 +brute 1 2 6.263398 6.263398 5892 +hypothes 1 2 6.263398 6.263398 5607 +ascal 1 2 6.263398 6.263398 5893 +toappear 1 2 6.263398 6.263398 4343 +bernard 1 2 6.263398 6.263398 5894 +lesh 1 2 6.263398 6.263398 5895 +goan 1 2 6.263398 6.263398 5896 +zamir 1 2 6.263398 6.263398 5897 +shake 1 2 6.263398 6.263398 5898 +umass 1 2 6.263398 6.263398 5899 +bioand 1 1 6.957497 6.957497 15338 +heor 1 1 6.957497 6.957497 15339 +searchmultipl 1 1 6.957497 6.957497 15340 +pruningopt 1 1 6.957497 6.957497 15341 +netrecommend 1 1 6.957497 6.957497 15342 +locatesindividu 1 1 6.957497 6.957497 15343 +bruteforc 1 1 6.957497 6.957497 15344 +whenrun 1 1 6.957497 6.957497 15345 +theweb 1 1 6.957497 6.957497 15346 +richardseg 1 1 6.957497 6.957497 15347 +fileretriev 1 1 6.957497 6.957497 15348 +universalquantif 1 1 6.957497 6.957497 15349 +terranc 1 1 6.957497 6.957497 15350 +mikeperkowitz 1 1 6.957497 6.957497 15351 +soderland 1 1 6.957497 6.957497 15352 +roomi 1 1 6.957497 6.957497 15353 +lesourd 1 1 6.957497 6.957497 15354 +spiger 1 1 6.957497 6.957497 15355 +alford 1 1 6.957497 6.957497 15356 +fitchenholtz 1 1 6.957497 6.957497 15357 +guido 1 1 6.957497 6.957497 15358 +dymitr 1 1 6.957497 6.957497 15359 +mozdyniewicz 1 1 6.957497 6.957497 15360 +quark 1 1 6.957497 6.957497 15361 +minecontain 1 1 6.957497 6.957497 15362 +neuroprosearch 1 1 6.957497 6.957497 15363 +statlib 1 1 6.957497 6.957497 15364 +learningtoolbox 1 1 6.957497 6.957497 15365 +bonn 1 1 6.957497 6.957497 15366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..d3cc52e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +juli 1 60 2.833213 2.833213 305 +thesi 1 57 2.890372 2.890372 327 +faculti 1 56 2.890372 2.890372 325 +done 1 47 3.091042 3.091042 381 +join 1 39 3.258097 3.258097 457 +soon 1 36 3.367296 3.367296 494 +global 1 34 3.401197 3.401197 520 +concern 1 25 3.737670 3.737670 666 +mike 1 24 3.761200 3.761200 703 +finish 1 22 3.850148 3.850148 748 +feelei 1 7 5.010635 5.010635 1859 +british 1 5 5.347108 5.347108 2546 +columbia 1 2 6.263398 6.263398 5900 +papersmi 1 2 6.263398 6.263398 5462 +pagemik 1 1 6.957497 6.957497 15367 +workstationclust 1 1 6.957497 6.957497 15368 +opalproject 1 1 6.957497 6.957497 15369 +injanuari 1 1 6.957497 6.957497 15370 +summarycvsoutheast 1 1 6.957497 6.957497 15371 +idaholast 1 1 6.957497 6.957497 15372 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..12793c5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +pictur 1 89 2.397895 2.397895 160 +sieg 1 69 2.708050 2.708050 260 +room 1 59 2.833213 2.833213 301 +return 1 34 3.401197 3.401197 502 +voic 1 21 3.912023 3.912023 806 +chri 1 11 4.553877 4.553877 1311 +fisher 1 2 6.263398 6.263398 4794 +fisherdepart 1 1 6.957497 6.957497 15373 +engineeringbox 1 1 6.957497 6.957497 15374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..737e3b31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +thing 1 84 2.484907 2.484907 189 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +found 1 53 2.944439 2.944439 337 +quarter 1 47 3.091042 3.091042 389 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +might 1 41 3.218876 3.218876 426 +paul 1 38 3.295837 3.295837 471 +autumn 1 31 3.496508 3.496508 558 +mind 1 27 3.637586 3.637586 632 +rather 1 26 3.688879 3.688879 642 +geometri 1 22 3.850148 3.850148 752 +fund 1 21 3.912023 3.912023 805 +nice 1 20 3.951244 3.951244 809 +listen 1 18 4.060443 4.060443 907 +engineeringunivers 1 17 4.110874 4.110874 959 +otherwis 1 17 4.110874 4.110874 922 +chateau 1 16 4.174387 4.174387 997 +drive 1 15 4.248495 4.248495 1052 +galleri 1 13 4.382027 4.382027 1192 +captur 1 12 4.465908 4.465908 1232 +denni 1 11 4.553877 4.553877 1321 +moment 1 11 4.553877 4.553877 1379 +vista 1 10 4.653960 4.653960 1452 +seven 1 9 4.753590 4.753590 1561 +somewher 1 6 5.164786 5.164786 2176 +wolman 1 6 5.164786 5.164786 2093 +alec 1 5 5.347108 5.347108 2563 +lost 1 5 5.347108 5.347108 2358 +ta 1 4 5.568345 5.568345 3058 +soul 1 4 5.568345 5.568345 2907 +luci 1 3 5.857933 5.857933 3705 +schedulethi 1 2 6.263398 6.263398 4068 +meander 1 2 6.263398 6.263398 5887 +booth 1 2 6.263398 6.263398 5119 +scheduleto 1 1 6.957497 6.957497 15375 +probablyb 1 1 6.957497 6.957497 15376 +activitiesmi 1 1 6.957497 6.957497 15377 +areasof 1 1 6.957497 6.957497 15378 +outta 1 1 6.957497 6.957497 15379 +pea 1 1 6.957497 6.957497 15380 +mofo 1 1 6.957497 6.957497 15381 +peach 1 1 6.957497 6.957497 15382 +ruel 1 1 6.957497 6.957497 15383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..1d03ecef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +welcom 1 122 2.079442 2.079442 99 +machin 1 129 2.079442 2.079442 95 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +user 1 104 2.302585 2.302585 137 +real 1 93 2.397895 2.397895 144 +activ 1 84 2.484907 2.484907 182 +educ 1 86 2.484907 2.484907 191 +interfac 1 79 2.564949 2.564949 209 +onlin 1 75 2.639057 2.639057 223 +summari 1 73 2.639057 2.639057 237 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +august 1 66 2.708050 2.708050 257 +interact 1 62 2.772589 2.772589 270 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +profession 1 51 2.995732 2.995732 345 +basic 1 50 3.044522 3.044522 360 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +human 1 32 3.465736 3.465736 546 +collabor 1 32 3.465736 3.465736 543 +chair 1 29 3.583519 3.583519 596 +demonstr 1 24 3.761200 3.761200 694 +verif 1 20 3.951244 3.951244 826 +safeti 1 20 3.951244 3.951244 817 +engineeringunivers 1 17 4.110874 4.110874 959 +anyth 1 16 4.174387 4.174387 998 +washingtonbox 1 13 4.382027 4.382027 1200 +opportun 1 13 4.382027 4.382027 1161 +mellon 1 13 4.382027 4.382027 1179 +carnegi 1 12 4.465908 4.465908 1260 +ski 1 10 4.653960 4.653960 1471 +cook 1 10 4.653960 4.653960 1464 +cultur 1 7 5.010635 5.010635 1951 +spanish 1 4 5.568345 5.568345 3017 +ofmi 1 3 5.857933 5.857933 3911 +uist 1 2 6.263398 6.263398 5901 +vegetarian 1 2 6.263398 6.263398 5902 +greet 1 2 6.263398 6.263398 5903 +francesmari 1 1 6.957497 6.957497 15385 +modugno 1 1 6.957497 6.957497 15384 +pagefrancesmari 1 1 6.957497 6.957497 15386 +algorthim 1 1 6.957497 6.957497 15387 +includecycl 1 1 6.957497 6.957497 15388 +previouslyitalian 1 1 6.957497 6.957497 15389 +elleri 1 1 6.957497 6.957497 15390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..974ca1e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +compil 1 122 2.079442 2.079442 96 +pictur 1 89 2.397895 2.397895 160 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +optim 1 79 2.564949 2.564949 197 +free 1 73 2.639057 2.639057 224 +receiv 1 66 2.708050 2.708050 244 +written 1 63 2.772589 2.772589 278 +dept 1 64 2.772589 2.772589 291 +finger 1 52 2.995732 2.995732 354 +netscap 1 44 3.135494 3.135494 395 +live 1 40 3.258097 3.258097 451 +word 1 34 3.401197 3.401197 508 +someth 1 31 3.496508 3.496508 554 +anim 1 31 3.496508 3.496508 557 +weather 1 28 3.610918 3.610918 618 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +pattern 1 24 3.761200 3.761200 689 +mobil 1 23 3.806662 3.806662 730 +georg 1 16 4.174387 4.174387 994 +match 1 16 4.174387 4.174387 965 +script 1 13 4.382027 4.382027 1171 +song 1 11 4.553877 4.553877 1380 +debugg 1 9 4.753590 4.753590 1493 +pagei 1 8 4.875197 4.875197 1683 +handi 1 6 5.164786 5.164786 2111 +puzzl 1 5 5.347108 5.347108 2507 +water 1 5 5.347108 5.347108 2535 +forman 1 2 6.263398 6.263398 5904 +hyperlink 1 2 6.263398 6.263398 5447 +pagegeorg 1 1 6.957497 6.957497 15391 +ariadn 1 1 6.957497 6.957497 15392 +gforman 1 1 6.957497 6.957497 15393 +comhom 1 1 6.957497 6.957497 15394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..0a782365 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +sinc 1 90 2.397895 2.397895 159 +chang 1 82 2.484907 2.484907 163 +refer 1 78 2.564949 2.564949 203 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +life 1 50 3.044522 3.044522 375 +visitor 1 49 3.044522 3.044522 371 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +everi 1 34 3.401197 3.401197 519 +collabor 1 32 3.465736 3.465736 543 +quot 1 29 3.583519 3.583519 582 +bookmark 1 26 3.688879 3.688879 639 +applet 1 20 3.951244 3.951244 827 +agent 1 18 4.060443 4.060443 910 +english 1 15 4.248495 4.248495 1033 +trip 1 14 4.317488 4.317488 1113 +bike 1 10 4.653960 4.653960 1468 +poetri 1 9 4.753590 4.753590 1596 +weld 1 9 4.753590 4.753590 1538 +marc 1 8 4.875197 4.875197 1680 +dictionari 1 8 4.875197 4.875197 1642 +gather 1 8 4.875197 4.875197 1719 +friedman 1 7 5.010635 5.010635 1886 +golden 1 7 5.010635 5.010635 1962 +planner 1 7 5.010635 5.010635 1797 +camp 1 5 5.347108 5.347108 2545 +elsewher 1 5 5.347108 5.347108 2444 +keith 1 5 5.347108 5.347108 2528 +spanish 1 4 5.568345 5.568345 3017 +codi 1 3 5.857933 5.857933 3940 +kwok 1 3 5.857933 5.857933 3941 +ucpop 1 3 5.857933 5.857933 3878 +watercolor 1 1 6.957497 6.957497 15395 +checklist 1 1 6.957497 6.957497 15396 +occam 1 1 6.957497 6.957497 15397 +wordbot 1 1 6.957497 6.957497 15398 +nietzschein 1 1 6.957497 6.957497 15399 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..cf4007f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +file 1 132 1.945910 1.945910 70 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +graphic 1 90 2.397895 2.397895 147 +member 1 84 2.484907 2.484907 165 +line 1 75 2.639057 2.639057 231 +plai 1 60 2.833213 2.833213 307 +game 1 36 3.367296 3.367296 498 +neural 1 30 3.555348 3.555348 578 +ofwashington 1 22 3.850148 3.850148 766 +audio 1 14 4.317488 4.317488 1094 +genet 1 10 4.653960 4.653960 1409 +cecil 1 9 4.753590 4.753590 1547 +garrett 1 3 5.857933 5.857933 3377 +charli 1 2 6.263398 6.263398 5905 +bookshelf 1 2 6.263398 6.263398 5724 +algorithmspap 1 1 6.957497 6.957497 15400 +algorithmsformerli 1 1 6.957497 6.957497 15401 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..4db093cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +techniqu 1 99 2.302585 2.302585 138 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +imag 1 91 2.397895 2.397895 161 +david 1 71 2.639057 2.639057 232 +appli 1 71 2.639057 2.639057 226 +multimedia 1 68 2.708050 2.708050 258 +laboratori 1 63 2.772589 2.772589 292 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +prototyp 1 38 3.295837 3.295837 463 +deal 1 22 3.850148 3.850148 736 +supervis 1 20 3.951244 3.951244 840 +dimension 1 18 4.060443 4.060443 909 +engineeringunivers 1 17 4.110874 4.110874 959 +render 1 17 4.110874 4.110874 947 +georg 1 16 4.174387 4.174387 994 +eduphon 1 15 4.248495 4.248495 1060 +washingtonbox 1 13 4.382027 4.382027 1200 +galleri 1 13 4.382027 4.382027 1192 +wife 1 13 4.382027 4.382027 1196 +tradit 1 10 4.653960 4.653960 1404 +illustr 1 8 4.875197 4.875197 1679 +salesin 1 4 5.568345 5.568345 3051 +grail 1 3 5.857933 5.857933 3356 +winkenbach 1 1 6.957497 6.957497 15402 +winkenbachdepart 1 1 6.957497 6.957497 15403 +georgew 1 1 6.957497 6.957497 15404 +doneund 1 1 6.957497 6.957497 15405 +theautomat 1 1 6.957497 6.957497 15406 +imagescr 1 1 6.957497 6.957497 15407 +taweewan 1 1 6.957497 6.957497 15408 +siwadun 1 1 6.957497 6.957497 15409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..61694ea2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,212 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +welcom 1 122 2.079442 2.079442 99 +seattl 1 120 2.079442 2.079442 103 +spring 1 131 2.079442 2.079442 88 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +user 1 104 2.302585 2.302585 137 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +commun 1 95 2.397895 2.397895 157 +imag 1 91 2.397895 2.397895 161 +stuff 1 87 2.484907 2.484907 171 +contain 1 81 2.484907 2.484907 174 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +nation 1 74 2.639057 2.639057 240 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +view 1 70 2.708050 2.708050 254 +creat 1 63 2.772589 2.772589 277 +virtual 1 62 2.772589 2.772589 285 +foundat 1 62 2.772589 2.772589 286 +back 1 60 2.833213 2.833213 297 +locat 1 59 2.833213 2.833213 303 +best 1 59 2.833213 2.833213 299 +simpl 1 60 2.833213 2.833213 298 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +variou 1 56 2.890372 2.890372 317 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +numer 1 49 3.044522 3.044522 369 +archiv 1 49 3.044522 3.044522 364 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +keep 1 44 3.135494 3.135494 409 +show 1 43 3.178054 3.178054 417 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +small 1 39 3.258097 3.258097 447 +author 1 39 3.258097 3.258097 450 +origin 1 38 3.295837 3.295837 472 +seminar 1 38 3.295837 3.295837 470 +microsoft 1 38 3.295837 3.295837 468 +feel 1 37 3.332205 3.332205 483 +connect 1 37 3.332205 3.332205 485 +winter 1 36 3.367296 3.367296 500 +especi 1 36 3.367296 3.367296 496 +game 1 36 3.367296 3.367296 498 +procedur 1 36 3.367296 3.367296 488 +random 1 34 3.401197 3.401197 511 +global 1 34 3.401197 3.401197 520 +articl 1 33 3.433987 3.433987 530 +express 1 32 3.465736 3.465736 540 +scientist 1 31 3.496508 3.496508 560 +computersci 1 30 3.555348 3.555348 562 +full 1 28 3.610918 3.610918 615 +progress 1 28 3.610918 3.610918 598 +univ 1 28 3.610918 3.610918 617 +linux 1 27 3.637586 3.637586 631 +pagecs 1 26 3.688879 3.688879 658 +greg 1 24 3.761200 3.761200 695 +alwai 1 24 3.761200 3.761200 691 +magazin 1 24 3.761200 3.761200 704 +yahoo 1 24 3.761200 3.761200 707 +daili 1 24 3.761200 3.761200 706 +ofwashington 1 22 3.850148 3.850148 766 +recommend 1 22 3.850148 3.850148 737 +busi 1 21 3.912023 3.912023 784 +corpor 1 21 3.912023 3.912023 802 +navig 1 21 3.912023 3.912023 796 +tenni 1 20 3.951244 3.951244 838 +feedback 1 19 4.007333 4.007333 854 +lyco 1 19 4.007333 4.007333 871 +hobbi 1 16 4.174387 4.174387 1009 +devic 1 16 4.174387 4.174387 1002 +upon 1 16 4.174387 4.174387 978 +configur 1 15 4.248495 4.248495 1012 +reflect 1 15 4.248495 4.248495 1034 +incomput 1 14 4.317488 4.317488 1096 +senior 1 14 4.317488 4.317488 1120 +philosophi 1 13 4.382027 4.382027 1167 +misc 1 13 4.382027 4.382027 1124 +emac 1 13 4.382027 4.382027 1143 +emploi 1 12 4.465908 4.465908 1284 +newspap 1 12 4.465908 4.465908 1280 +basketbal 1 12 4.465908 4.465908 1289 +magic 1 11 4.553877 4.553877 1358 +perl 1 11 4.553877 4.553877 1332 +hello 1 10 4.653960 4.653960 1407 +desktop 1 10 4.653960 4.653960 1445 +ski 1 10 4.653960 4.653960 1471 +fellowship 1 10 4.653960 4.653960 1460 +volleybal 1 9 4.753590 4.753590 1598 +inter 1 9 4.753590 4.753590 1530 +competit 1 8 4.875197 4.875197 1635 +entri 1 8 4.875197 4.875197 1678 +readm 1 8 4.875197 4.875197 1699 +joel 1 8 4.875197 4.875197 1698 +extract 1 8 4.875197 4.875197 1728 +opinion 1 8 4.875197 4.875197 1708 +chronicl 1 7 5.010635 5.010635 1952 +gatewai 1 7 5.010635 5.010635 1942 +necessarili 1 7 5.010635 5.010635 1899 +duke 1 6 5.164786 5.164786 2231 +piano 1 6 5.164786 5.164786 2201 +histor 1 6 5.164786 5.164786 2085 +vertic 1 5 5.347108 5.347108 2270 +hole 1 5 5.347108 5.347108 2518 +billi 1 5 5.347108 5.347108 2404 +doubl 1 4 5.568345 5.568345 2951 +patch 1 4 5.568345 5.568345 2710 +drew 1 4 5.568345 5.568345 2980 +cube 1 4 5.568345 5.568345 2940 +eduaddress 1 3 5.857933 5.857933 3762 +rsum 1 3 5.857933 5.857933 3939 +jackson 1 3 5.857933 5.857933 3586 +freewar 1 3 5.857933 5.857933 3504 +hotjava 1 3 5.857933 5.857933 3220 +seinfeld 1 3 5.857933 5.857933 3958 +conclus 1 3 5.857933 5.857933 3367 +pagegreg 1 2 6.263398 6.263398 5906 +amcurr 1 2 6.263398 6.263398 5798 +bermuda 1 2 6.263398 6.263398 5907 +seminarcs 1 2 6.263398 6.263398 4521 +geneticalgorithm 1 2 6.263398 6.263398 5673 +ncaa 1 2 6.263398 6.263398 5908 +unoffici 1 2 6.263398 6.263398 5909 +unif 1 2 6.263398 6.263398 5910 +badro 1 1 6.957497 6.957497 15410 +nesbit 1 1 6.957497 6.957497 15412 +isuppos 1 1 6.957497 6.957497 15413 +excitingfeatur 1 1 6.957497 6.957497 15414 +dukeunivers 1 1 6.957497 6.957497 15415 +fortransworld 1 1 6.957497 6.957497 15416 +indurham 1 1 6.957497 6.957497 15417 +headquart 1 1 6.957497 6.957497 15418 +myapart 1 1 6.957497 6.957497 15419 +newer 1 1 6.957497 6.957497 15420 +fvwm 1 1 6.957497 6.957497 15421 +redhat 1 1 6.957497 6.957497 15422 +zshell 1 1 6.957497 6.957497 15411 +transworldnumer 1 1 6.957497 6.957497 15423 +ieeenat 1 1 6.957497 6.957497 15424 +victori 1 1 6.957497 6.957497 15425 +bycomput 1 1 6.957497 6.957497 15426 +canterburi 1 1 6.957497 6.957497 15427 +definitelynot 1 1 6.957497 6.957497 15428 +juggl 1 1 6.957497 6.957497 15429 +rubik 1 1 6.957497 6.957497 15430 +sarahmclachlan 1 1 6.957497 6.957497 15431 +parliamentari 1 1 6.957497 6.957497 15432 +sgml 1 1 6.957497 6.957497 15433 +sitcom 1 1 6.957497 6.957497 15434 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..ec02da7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,295 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +look 1 107 2.197225 2.197225 115 +check 1 115 2.197225 2.197225 118 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +world 1 115 2.197225 2.197225 126 +intern 1 108 2.197225 2.197225 128 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +associ 1 93 2.397895 2.397895 151 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +second 1 81 2.484907 2.484907 166 +stuff 1 87 2.484907 2.484907 171 +info 1 85 2.484907 2.484907 176 +complet 1 77 2.564949 2.564949 208 +decemb 1 80 2.564949 2.564949 215 +resum 1 79 2.564949 2.564949 217 +sourc 1 77 2.564949 2.564949 201 +addit 1 74 2.639057 2.639057 228 +servic 1 72 2.639057 2.639057 236 +appli 1 71 2.639057 2.639057 226 +goal 1 66 2.708050 2.708050 250 +java 1 70 2.708050 2.708050 248 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +evalu 1 64 2.772589 2.772589 266 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +artifici 1 63 2.772589 2.772589 280 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +major 1 56 2.890372 2.890372 315 +publish 1 57 2.890372 2.890372 326 +browser 1 56 2.890372 2.890372 313 +summer 1 56 2.890372 2.890372 311 +thesi 1 57 2.890372 2.890372 327 +reason 1 57 2.890372 2.890372 318 +allow 1 53 2.944439 2.944439 333 +found 1 53 2.944439 2.944439 337 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +cool 1 49 3.044522 3.044522 374 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +life 1 50 3.044522 3.044522 375 +california 1 46 3.091042 3.091042 388 +could 1 46 3.091042 3.091042 383 +even 1 45 3.135494 3.135494 393 +made 1 44 3.135494 3.135494 398 +third 1 43 3.178054 3.178054 412 +show 1 43 3.178054 3.178054 417 +autom 1 41 3.218876 3.218876 434 +linear 1 41 3.218876 3.218876 431 +might 1 41 3.218876 3.218876 426 +submit 1 39 3.258097 3.258097 440 +multipl 1 39 3.258097 3.258097 453 +movi 1 40 3.258097 3.258097 459 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +staff 1 36 3.367296 3.367296 490 +tree 1 36 3.367296 3.367296 492 +ofth 1 36 3.367296 3.367296 491 +return 1 34 3.401197 3.401197 502 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +concept 1 32 3.465736 3.465736 537 +posit 1 31 3.496508 3.496508 552 +scientist 1 31 3.496508 3.496508 560 +anim 1 31 3.496508 3.496508 557 +graph 1 30 3.555348 3.555348 576 +travel 1 30 3.555348 3.555348 579 +neural 1 30 3.555348 3.555348 578 +quit 1 27 3.637586 3.637586 633 +though 1 27 3.637586 3.637586 622 +manipul 1 27 3.637586 3.637586 624 +constraint 1 26 3.688879 3.688879 636 +toward 1 25 3.737670 3.737670 668 +client 1 25 3.737670 3.737670 679 +trace 1 25 3.737670 3.737670 677 +greg 1 24 3.761200 3.761200 695 +demonstr 1 24 3.761200 3.761200 694 +pattern 1 24 3.761200 3.761200 689 +famili 1 23 3.806662 3.806662 735 +displai 1 23 3.806662 3.806662 712 +brows 1 23 3.806662 3.806662 726 +highli 1 23 3.806662 3.806662 725 +thread 1 23 3.806662 3.806662 722 +honor 1 23 3.806662 3.806662 729 +compress 1 23 3.806662 3.806662 719 +ofwashington 1 22 3.850148 3.850148 766 +instead 1 22 3.850148 3.850148 756 +love 1 21 3.912023 3.912023 804 +leav 1 21 3.912023 3.912023 772 +avoid 1 21 3.912023 3.912023 799 +applet 1 20 3.951244 3.951244 827 +wrote 1 20 3.951244 3.951244 830 +qualiti 1 20 3.951244 3.951244 832 +repositori 1 17 4.110874 4.110874 932 +thought 1 17 4.110874 4.110874 945 +adam 1 17 4.110874 4.110874 934 +diego 1 16 4.174387 4.174387 992 +earli 1 16 4.174387 4.174387 968 +spatial 1 16 4.174387 4.174387 988 +anyth 1 16 4.174387 4.174387 998 +dilbert 1 16 4.174387 4.174387 996 +cognit 1 16 4.174387 4.174387 986 +chateau 1 16 4.174387 4.174387 997 +rate 1 15 4.248495 4.248495 1037 +capabl 1 15 4.248495 4.248495 1016 +enough 1 15 4.248495 4.248495 1040 +reflect 1 15 4.248495 4.248495 1034 +doesn 1 15 4.248495 4.248495 1055 +draw 1 14 4.317488 4.317488 1086 +econom 1 13 4.382027 4.382027 1184 +buffer 1 12 4.465908 4.465908 1211 +entertain 1 12 4.465908 4.465908 1286 +hank 1 12 4.465908 4.465908 1253 +iter 1 12 4.465908 4.465908 1206 +assembl 1 12 4.465908 4.465908 1207 +keyword 1 11 4.553877 4.553877 1356 +transpar 1 11 4.553877 4.553877 1325 +thecomput 1 10 4.653960 4.653960 1408 +awai 1 10 4.653960 4.653960 1447 +enter 1 10 4.653960 4.653960 1454 +metacrawl 1 10 4.653960 4.653960 1455 +prefer 1 9 4.753590 4.753590 1491 +pick 1 9 4.753590 4.753590 1498 +congress 1 9 4.753590 4.753590 1592 +oop 1 8 4.875197 4.875197 1778 +realist 1 8 4.875197 4.875197 1665 +judg 1 8 4.875197 4.875197 1644 +guggenheim 1 8 4.875197 4.875197 1759 +hit 1 7 5.010635 5.010635 1965 +occasion 1 7 5.010635 5.010635 1905 +polit 1 6 5.164786 5.164786 2115 +impress 1 6 5.164786 5.164786 2096 +unpublish 1 6 5.164786 5.164786 2226 +stage 1 5 5.347108 5.347108 2488 +contest 1 5 5.347108 5.347108 2273 +particl 1 5 5.347108 5.347108 2436 +carlson 1 5 5.347108 5.347108 2351 +chess 1 5 5.347108 5.347108 2486 +annex 1 5 5.347108 5.347108 2572 +doubl 1 4 5.568345 5.568345 2951 +gradual 1 4 5.568345 5.568345 2997 +sorri 1 4 5.568345 5.568345 3059 +mess 1 4 5.568345 5.568345 2886 +slave 1 3 5.857933 5.857933 3959 +emul 1 3 5.857933 5.857933 3944 +jar 1 3 5.857933 5.857933 3223 +cleaner 1 3 5.857933 5.857933 3775 +civil 1 3 5.857933 5.857933 3908 +evolutionari 1 3 5.857933 5.857933 3898 +prison 1 3 5.857933 5.857933 3907 +boolean 1 3 5.857933 5.857933 3202 +recurr 1 3 5.857933 5.857933 3740 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +shadow 1 3 5.857933 5.857933 3519 +inventor 1 3 5.857933 5.857933 3695 +quicktim 1 3 5.857933 5.857933 3493 +scienceher 1 2 6.263398 6.263398 5912 +elicit 1 2 6.263398 6.263398 4294 +flight 1 2 6.263398 6.263398 5911 +gamelan 1 2 6.263398 6.263398 4221 +thejava 1 2 6.263398 6.263398 4704 +certainli 1 2 6.263398 6.263398 4090 +ucsd 1 2 6.263398 6.263398 5192 +belew 1 2 6.263398 6.263398 4739 +lesh 1 2 6.263398 6.263398 5895 +tracer 1 2 6.263398 6.263398 5913 +inc 1 2 6.263398 6.263398 5914 +duel 1 2 6.263398 6.263398 5855 +rai 1 2 6.263398 6.263398 5915 +theanim 1 2 6.263398 6.263398 5852 +thed 1 2 6.263398 6.263398 4963 +mbquicktim 1 2 6.263398 6.263398 5916 +linden 1 1 6.957497 6.957497 15435 +lindenmi 1 1 6.957497 6.957497 15442 +wifecorina 1 1 6.957497 6.957497 15443 +lofti 1 1 6.957497 6.957497 15444 +undergraduatedegre 1 1 6.957497 6.957497 15445 +anodd 1 1 6.957497 6.957497 15446 +mactiv 1 1 6.957497 6.957497 15447 +webview 1 1 6.957497 6.957497 15436 +orset 1 1 6.957497 6.957497 15448 +altavistawebviewand 1 1 6.957497 6.957497 15449 +metawebview 1 1 6.957497 6.957497 15450 +foraltavista 1 1 6.957497 6.957497 15451 +searchservic 1 1 6.957497 6.957497 15452 +dialog 1 1 6.957497 6.957497 15453 +travelag 1 1 6.957497 6.957497 15454 +whileallow 1 1 6.957497 6.957497 15455 +andjar 1 1 6.957497 6.957497 15456 +wasrat 1 1 6.957497 6.957497 15437 +andwa 1 1 6.957497 6.957497 15438 +altavistawebview 1 1 6.957497 6.957497 15457 +winner 1 1 6.957497 6.957497 15458 +walsh 1 1 6.957497 6.957497 15459 +meilleur 1 1 6.957497 6.957497 15460 +ballet 1 1 6.957497 6.957497 15461 +flicker 1 1 6.957497 6.957497 15462 +standardsto 1 1 6.957497 6.957497 15463 +mylgramm 1 1 6.957497 6.957497 15464 +lgrammer 1 1 6.957497 6.957497 15465 +theparticletre 1 1 6.957497 6.957497 15466 +thejar 1 1 6.957497 6.957497 15467 +dawn 1 1 6.957497 6.957497 15468 +ademonstr 1 1 6.957497 6.957497 15469 +cansuccessfulli 1 1 6.957497 6.957497 15470 +myriadsoftwar 1 1 6.957497 6.957497 15471 +filippo 1 1 6.957497 6.957497 15472 +menzer 1 1 6.957497 6.957497 15473 +latentenergi 1 1 6.957497 6.957497 15474 +developingartifici 1 1 6.957497 6.957497 15475 +neuralnetwork 1 1 6.957497 6.957497 15439 +enviro 1 1 6.957497 6.957497 15476 +theautom 1 1 6.957497 6.957497 15477 +assit 1 1 6.957497 6.957497 15478 +majeski 1 1 6.957497 6.957497 15479 +spitzer 1 1 6.957497 6.957497 15480 +localizedinteract 1 1 6.957497 6.957497 15481 +dilemma 1 1 6.957497 6.957497 15482 +krishnamoorthi 1 1 6.957497 6.957497 15483 +paturi 1 1 6.957497 6.957497 15484 +blume 1 1 6.957497 6.957497 15485 +liden 1 1 6.957497 6.957497 15486 +esen 1 1 6.957497 6.957497 15487 +hardwaretradeoff 1 1 6.957497 6.957497 15488 +sdilemma 1 1 6.957497 6.957497 15489 +funrai 1 1 6.957497 6.957497 15490 +headless 1 1 6.957497 6.957497 15440 +horseman 1 1 6.957497 6.957497 15441 +closeup 1 1 6.957497 6.957497 15491 +sphere 1 1 6.957497 6.957497 15492 +withreflect 1 1 6.957497 6.957497 15493 +adaptivesampl 1 1 6.957497 6.957497 15494 +thespher 1 1 6.957497 6.957497 15495 +causingth 1 1 6.957497 6.957497 15496 +refract 1 1 6.957497 6.957497 15497 +surfaceand 1 1 6.957497 6.957497 15498 +strike 1 1 6.957497 6.957497 15499 +alow 1 1 6.957497 6.957497 15500 +anyfurth 1 1 6.957497 6.957497 15501 +resembl 1 1 6.957497 6.957497 15502 +glinden 1 1 6.957497 6.957497 15503 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..edd5435b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +relat 1 139 1.945910 1.945910 68 +compil 1 122 2.079442 2.079442 96 +octob 1 89 2.397895 2.397895 156 +homework 1 79 2.564949 2.564949 193 +brian 1 38 3.295837 3.295837 466 +computersci 1 30 3.555348 3.555348 562 +trip 1 14 4.317488 4.317488 1113 +grant 1 12 4.465908 4.465908 1216 +awai 1 10 4.653960 4.653960 1447 +daughter 1 7 5.010635 5.010635 1943 +pagebrian 1 4 5.568345 5.568345 3054 +groupuw 1 3 5.857933 5.857933 3934 +kri 1 1 6.957497 6.957497 15504 +infowork 1 1 6.957497 6.957497 15505 +backgrounduwdynam 1 1 6.957497 6.957497 15506 +engineeringperson 1 1 6.957497 6.957497 15507 +stuffperson 1 1 6.957497 6.957497 15508 +backgroundmi 1 1 6.957497 6.957497 15509 +isismi 1 1 6.957497 6.957497 15510 +singaporemi 1 1 6.957497 6.957497 15511 +bookmarksmi 1 1 6.957497 6.957497 15512 +keylast 1 1 6.957497 6.957497 15513 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..ad6971db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +paper 1 205 1.609438 1.609438 38 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +pictur 1 89 2.397895 2.397895 160 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +complet 1 77 2.564949 2.564949 208 +sieg 1 69 2.708050 2.708050 260 +integr 1 67 2.708050 2.708050 245 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +summer 1 56 2.890372 2.890372 311 +much 1 52 2.995732 2.995732 349 +frequent 1 49 3.044522 3.044522 367 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +anoth 1 45 3.135494 3.135494 408 +author 1 39 3.258097 3.258097 450 +littl 1 39 3.258097 3.258097 454 +close 1 38 3.295837 3.295837 465 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +taken 1 31 3.496508 3.496508 555 +someth 1 31 3.496508 3.496508 554 +actual 1 28 3.610918 3.610918 604 +team 1 27 3.637586 3.637586 625 +consult 1 24 3.761200 3.761200 687 +sometim 1 24 3.761200 3.761200 696 +spend 1 19 4.007333 4.007333 850 +less 1 18 4.060443 4.060443 892 +along 1 18 4.060443 4.060443 878 +attempt 1 17 4.110874 4.110874 917 +white 1 17 4.110874 4.110874 951 +chateau 1 16 4.174387 4.174387 997 +took 1 16 4.174387 4.174387 1010 +month 1 15 4.248495 4.248495 1025 +dave 1 14 4.317488 4.317488 1098 +trip 1 14 4.317488 4.317488 1113 +council 1 11 4.553877 4.553877 1364 +cecil 1 9 4.753590 4.753590 1547 +hang 1 9 4.753590 4.753590 1499 +grove 1 8 4.875197 4.875197 1675 +pure 1 8 4.875197 4.875197 1776 +vehicl 1 7 5.010635 5.010635 1928 +wouldn 1 7 5.010635 5.010635 1970 +footbal 1 7 5.010635 5.010635 1912 +strip 1 6 5.164786 5.164786 2203 +toronto 1 6 5.164786 5.164786 2156 +spinproject 1 5 5.347108 5.347108 2570 +gui 1 5 5.347108 5.347108 2573 +water 1 5 5.347108 5.347108 2535 +worki 1 4 5.568345 5.568345 3010 +fantasi 1 4 5.568345 5.568345 3055 +silli 1 4 5.568345 5.568345 3038 +raft 1 4 5.568345 5.568345 3060 +langaug 1 3 5.857933 5.857933 3661 +hampshir 1 3 5.857933 5.857933 3280 +kick 1 3 5.857933 5.857933 3962 +hord 1 2 6.263398 6.263398 5917 +aroundth 1 2 6.263398 6.263398 5653 +fring 1 2 6.263398 6.263398 5721 +boi 1 2 6.263398 6.263398 5918 +toseattl 1 2 6.263398 6.263398 5919 +soonish 1 1 6.957497 6.957497 15514 +dilbertfix 1 1 6.957497 6.957497 15515 +thathit 1 1 6.957497 6.957497 15516 +underacheiv 1 1 6.957497 6.957497 15517 +scoutreserv 1 1 6.957497 6.957497 15518 +greaterlowel 1 1 6.957497 6.957497 15519 +casunset 1 1 6.957497 6.957497 15520 +cabin 1 1 6.957497 6.957497 15521 +drove 1 1 6.957497 6.957497 15522 +detour 1 1 6.957497 6.957497 15523 +somehihglight 1 1 6.957497 6.957497 15524 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..2515c8b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +level 1 87 2.484907 2.484907 180 +educ 1 86 2.484907 2.484907 191 +june 1 79 2.564949 2.564949 214 +simul 1 66 2.708050 2.708050 255 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +hardwar 1 51 2.995732 2.995732 350 +prototyp 1 38 3.295837 3.295837 463 +multi 1 36 3.367296 3.367296 493 +survei 1 35 3.401197 3.401197 513 +board 1 33 3.433987 3.433987 528 +curriculum 1 33 3.433987 3.433987 535 +methodolog 1 23 3.806662 3.806662 733 +rout 1 21 3.912023 3.912023 793 +synthesi 1 20 3.951244 3.951244 834 +scott 1 18 4.060443 4.060443 884 +commerci 1 16 4.174387 4.174387 1005 +partit 1 16 4.174387 4.174387 984 +topolog 1 14 4.317488 4.317488 1089 +embed 1 14 4.317488 4.317488 1102 +circuit 1 13 4.382027 4.382027 1131 +asynchron 1 12 4.465908 4.465908 1229 +fpga 1 10 4.653960 4.653960 1433 +rapid 1 10 4.653960 4.653960 1453 +densiti 1 7 5.010635 5.010635 1927 +chinook 1 6 5.164786 5.164786 2229 +triptych 1 4 5.568345 5.568345 3061 +biographi 1 3 5.857933 5.857933 3658 +hauck 1 2 6.263398 6.263398 5920 +montag 1 2 6.263398 6.263398 5921 +springbok 1 2 6.263398 6.263398 5922 +thoughi 1 1 6.957497 6.957497 15525 +vitaeresearch 1 1 6.957497 6.957497 15526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..a76f3330 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +kevin 1 9 4.753590 4.753590 1482 +hinshaw 1 1 6.957497 6.957497 15527 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..47f99aae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +place 1 106 2.197225 2.197225 124 +part 1 98 2.302585 2.302585 129 +imag 1 91 2.397895 2.397895 161 +chang 1 82 2.484907 2.484907 163 +stuff 1 87 2.484907 2.484907 171 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +dept 1 64 2.772589 2.772589 291 +back 1 60 2.833213 2.833213 297 +thesi 1 57 2.890372 2.890372 327 +week 1 52 2.995732 2.995732 343 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +pointer 1 48 3.044522 3.044522 368 +possibl 1 47 3.091042 3.091042 378 +around 1 43 3.178054 3.178054 415 +profil 1 30 3.555348 3.555348 581 +pass 1 28 3.610918 3.610918 611 +univ 1 28 3.610918 3.610918 617 +subject 1 26 3.688879 3.688879 647 +notic 1 25 3.737670 3.737670 675 +head 1 23 3.806662 3.806662 732 +brows 1 23 3.806662 3.806662 726 +half 1 21 3.912023 3.912023 776 +busi 1 21 3.912023 3.912023 784 +unfortun 1 13 4.382027 4.382027 1170 +rememb 1 12 4.465908 4.465908 1217 +neat 1 12 4.465908 4.465908 1263 +daughter 1 7 5.010635 5.010635 1943 +chinook 1 6 5.164786 5.164786 2229 +upper 1 5 5.347108 5.347108 2481 +wast 1 5 5.347108 5.347108 2537 +silli 1 4 5.568345 5.568345 3038 +washingtonseattl 1 4 5.568345 5.568345 3044 +macduff 1 2 6.263398 6.263398 5923 +emma 1 2 6.263398 6.263398 5546 +obsess 1 2 6.263398 6.263398 5924 +ultrasound 1 1 6.957497 6.957497 15528 +elspeth 1 1 6.957497 6.957497 15529 +unborn 1 1 6.957497 6.957497 15530 +fromconcept 1 1 6.957497 6.957497 15531 +ripe 1 1 6.957497 6.957497 15532 +inmid 1 1 6.957497 6.957497 15533 +ly 1 1 6.957497 6.957497 15534 +lookingup 1 1 6.957497 6.957497 15535 +torso 1 1 6.957497 6.957497 15536 +theleft 1 1 6.957497 6.957497 15537 +impend 1 1 6.957497 6.957497 15538 +fatherhood 1 1 6.957497 6.957497 15539 +myspam 1 1 6.957497 6.957497 15540 +usingwebcrawl 1 1 6.957497 6.957497 15541 +frogstv 1 1 6.957497 6.957497 15542 +nationpenn 1 1 6.957497 6.957497 15543 +tellermus 1 1 6.957497 6.957497 15544 +lyricsian 1 1 6.957497 6.957497 15545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..736cf709 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,146 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +order 1 69 2.708050 2.708050 249 +septemb 1 65 2.772589 2.772589 274 +virtual 1 62 2.772589 2.772589 285 +visit 1 63 2.772589 2.772589 288 +back 1 60 2.833213 2.833213 297 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +cach 1 41 3.218876 3.218876 432 +small 1 39 3.258097 3.258097 447 +map 1 39 3.258097 3.258097 452 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +mean 1 37 3.332205 3.332205 477 +global 1 34 3.401197 3.401197 520 +cluster 1 28 3.610918 3.610918 612 +load 1 28 3.610918 3.610918 601 +primari 1 25 3.737670 3.737670 669 +fundament 1 25 3.737670 3.737670 661 +mike 1 24 3.761200 3.761200 703 +store 1 24 3.761200 3.761200 693 +size 1 23 3.806662 3.806662 713 +reduc 1 22 3.850148 3.850148 759 +disk 1 22 3.850148 3.850148 747 +hierarchi 1 22 3.850148 3.850148 744 +unit 1 21 3.912023 3.912023 779 +thu 1 21 3.912023 3.912023 773 +increas 1 20 3.951244 3.951244 829 +speed 1 18 4.060443 4.060443 911 +encourag 1 18 4.060443 4.060443 880 +engineeringunivers 1 17 4.110874 4.110874 959 +debug 1 17 4.110874 4.110874 944 +transfer 1 16 4.174387 4.174387 967 +modern 1 16 4.174387 4.174387 966 +latenc 1 16 4.174387 4.174387 993 +remot 1 15 4.248495 4.248495 1041 +levi 1 14 4.317488 4.317488 1093 +washingtonbox 1 13 4.382027 4.382027 1200 +karlin 1 13 4.382027 4.382027 1176 +introduc 1 13 4.382027 4.382027 1139 +unfortun 1 13 4.382027 4.382027 1170 +galleri 1 13 4.382027 4.382027 1192 +hank 1 12 4.465908 4.465908 1253 +mari 1 12 4.465908 4.465908 1266 +anna 1 12 4.465908 4.465908 1292 +franc 1 12 4.465908 4.465908 1276 +node 1 11 4.553877 4.553877 1326 +extrem 1 11 4.553877 4.553877 1330 +vernon 1 9 4.753590 4.753590 1556 +voelker 1 9 4.753590 4.753590 1557 +factor 1 9 4.753590 4.753590 1544 +postdoc 1 8 4.875197 4.875197 1724 +evan 1 8 4.875197 4.875197 1633 +inproceed 1 8 4.875197 4.875197 1670 +feelei 1 7 5.010635 5.010635 1859 +trend 1 7 5.010635 5.010635 1842 +geoff 1 6 5.164786 5.164786 2124 +temporari 1 6 5.164786 5.164786 2090 +li 1 5 5.347108 5.347108 2500 +seventh 1 5 5.347108 5.347108 2464 +joseph 1 5 5.347108 5.347108 2327 +coverag 1 4 5.568345 5.568345 2656 +greatli 1 3 5.857933 5.857933 3541 +europ 1 3 5.857933 5.857933 3761 +jamrozik 1 2 6.263398 6.263398 5925 +amort 1 2 6.263398 6.263398 4370 +odd 1 2 6.263398 6.263398 5565 +subpag 1 2 6.263398 6.263398 5926 +theuniversit 1 2 6.263398 6.263398 5927 +fourier 1 2 6.263398 6.263398 5698 +grenobl 1 2 6.263398 6.263398 5928 +laboratoir 1 2 6.263398 6.263398 5929 +herv 1 1 6.957497 6.957497 15546 +jamrozikherv 1 1 6.957497 6.957497 15547 +memoi 1 1 6.957497 6.957497 15548 +therebi 1 1 6.957497 6.957497 15549 +intens 1 1 6.957497 6.957497 15550 +lightli 1 1 6.957497 6.957497 15551 +guideproject 1 1 6.957497 6.957497 15552 +bull 1 1 6.957497 6.957497 15553 +imaginstitut 1 1 6.957497 6.957497 15554 +snot 1 1 6.957497 6.957497 15555 +louvr 1 1 6.957497 6.957497 15556 +somefamili 1 1 6.957497 6.957497 15557 +somefriend 1 1 6.957497 6.957497 15558 +eduv 1 1 6.957497 6.957497 15559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..0bbb95ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +hall 1 146 1.945910 1.945910 65 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +frequent 1 49 3.044522 3.044522 367 +weather 1 28 3.610918 3.610918 618 +channel 1 7 5.010635 5.010635 1836 +forecast 1 6 5.164786 5.164786 2171 +jason 1 3 5.857933 5.857933 3389 +eduaddress 1 3 5.857933 5.857933 3762 +secoski 1 2 6.263398 6.263398 4526 +pagejason 1 1 6.957497 6.957497 15560 +cunivers 1 1 6.957497 6.957497 15561 +boxseattl 1 1 6.957497 6.957497 15562 +projectseattl 1 1 6.957497 6.957497 15563 +secoskylast 1 1 6.957497 6.957497 15564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..d631e2fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +question 1 91 2.397895 2.397895 141 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +stuff 1 87 2.484907 2.484907 171 +intellig 1 72 2.639057 2.639057 225 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +artifici 1 63 2.772589 2.772589 280 +virtual 1 62 2.772589 2.772589 285 +copi 1 63 2.772589 2.772589 284 +cool 1 49 3.044522 3.044522 374 +effect 1 46 3.091042 3.091042 385 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +music 1 42 3.218876 3.218876 436 +realli 1 40 3.258097 3.258097 444 +littl 1 39 3.258097 3.258097 454 +industri 1 38 3.295837 3.295837 464 +feel 1 37 3.332205 3.332205 483 +download 1 36 3.367296 3.367296 489 +human 1 32 3.465736 3.465736 546 +travel 1 30 3.555348 3.555348 579 +progress 1 28 3.610918 3.610918 598 +static 1 27 3.637586 3.637586 619 +mine 1 26 3.688879 3.688879 654 +experiment 1 26 3.688879 3.688879 645 +never 1 25 3.737670 3.737670 671 +william 1 22 3.850148 3.850148 765 +applet 1 20 3.951244 3.951244 827 +wrote 1 20 3.951244 3.951244 830 +spend 1 19 4.007333 4.007333 850 +demo 1 18 4.060443 4.060443 888 +layer 1 17 4.110874 4.110874 926 +macintosh 1 17 4.110874 4.110874 920 +signific 1 13 4.382027 4.382027 1125 +baer 1 11 4.553877 4.553877 1353 +scienceat 1 11 4.553877 4.553877 1375 +eight 1 11 4.553877 4.553877 1331 +metacrawl 1 10 4.653960 4.653960 1455 +creativ 1 8 4.875197 4.875197 1777 +dream 1 6 5.164786 5.164786 2165 +jeremi 1 5 5.347108 5.347108 2360 +interfer 1 5 5.347108 5.347108 2494 +puzzl 1 5 5.347108 5.347108 2507 +silli 1 4 5.568345 5.568345 3038 +thati 1 4 5.568345 5.568345 2616 +museum 1 3 5.857933 5.857933 3933 +computerinteract 1 2 6.263398 6.263398 5829 +stress 1 2 6.263398 6.263398 4146 +baerjeremi 1 1 6.957497 6.957497 15565 +twain 1 1 6.957497 6.957497 15566 +shakespearei 1 1 6.957497 6.957497 15567 +engineeringtool 1 1 6.957497 6.957497 15568 +pierian 1 1 6.957497 6.957497 15569 +softwareoregon 1 1 6.957497 6.957497 15570 +omsi 1 1 6.957497 6.957497 15571 +pomona 1 1 6.957497 6.957497 15572 +collegeher 1 1 6.957497 6.957497 15573 +searchcopyright 1 1 6.957497 6.957497 15574 +jbaer 1 1 6.957497 6.957497 15575 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..01c77951 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +postscript 1 131 2.079442 2.079442 90 +control 1 82 2.484907 2.484907 164 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +come 1 78 2.564949 2.564949 202 +sieg 1 69 2.708050 2.708050 260 +import 1 65 2.772589 2.772589 282 +foundat 1 62 2.772589 2.772589 286 +browser 1 56 2.890372 2.890372 313 +undergradu 1 54 2.944439 2.944439 338 +finger 1 52 2.995732 2.995732 354 +electron 1 47 3.091042 3.091042 379 +tree 1 36 3.367296 3.367296 492 +soon 1 36 3.367296 3.367296 494 +return 1 34 3.401197 3.401197 502 +statu 1 18 4.060443 4.060443 885 +attempt 1 17 4.110874 4.110874 917 +latex 1 14 4.317488 4.317488 1064 +rice 1 11 4.553877 4.553877 1336 +transmiss 1 9 4.753590 4.753590 1588 +jeremi 1 5 5.347108 5.347108 2360 +adjust 1 5 5.347108 5.347108 2422 +frontier 1 3 5.857933 5.857933 3771 +alma 1 3 5.857933 5.857933 3963 +schedulemi 1 2 6.263398 6.263398 5843 +mater 1 2 6.263398 6.263398 5930 +buhler 1 1 6.957497 6.957497 15576 +pagejeremi 1 1 6.957497 6.957497 15578 +pagedo 1 1 6.957497 6.957497 15579 +jbuhler 1 1 6.957497 6.957497 15577 +tako 1 1 6.957497 6.957497 15580 +stufflectur 1 1 6.957497 6.957497 15581 +suffix 1 1 6.957497 6.957497 15582 +keycyb 1 1 6.957497 6.957497 15583 +grinsrecommend 1 1 6.957497 6.957497 15584 +readingmi 1 1 6.957497 6.957497 15585 +universityquot 1 1 6.957497 6.957497 15586 +quotesmi 1 1 6.957497 6.957497 15587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..3deee50c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,271 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +real 1 93 2.397895 2.397895 144 +build 1 85 2.484907 2.484907 184 +environ 1 84 2.484907 2.484907 177 +larg 1 82 2.484907 2.484907 168 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +contain 1 81 2.484907 2.484907 174 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +dynam 1 76 2.564949 2.564949 194 +exampl 1 77 2.564949 2.564949 195 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +involv 1 71 2.639057 2.639057 227 +meet 1 72 2.639057 2.639057 229 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +plan 1 65 2.772589 2.772589 272 +laboratori 1 63 2.772589 2.772589 292 +experi 1 64 2.772589 2.772589 283 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +interact 1 62 2.772589 2.772589 270 +guid 1 63 2.772589 2.772589 267 +result 1 65 2.772589 2.772589 281 +back 1 60 2.833213 2.833213 297 +summer 1 56 2.890372 2.890372 311 +direct 1 57 2.890372 2.890372 316 +explor 1 58 2.890372 2.890372 324 +extens 1 53 2.944439 2.944439 340 +three 1 54 2.944439 2.944439 330 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +adapt 1 46 3.091042 3.091042 387 +quarter 1 47 3.091042 3.091042 389 +featur 1 46 3.091042 3.091042 386 +keep 1 44 3.135494 3.135494 409 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +even 1 45 3.135494 3.135494 393 +around 1 43 3.178054 3.178054 415 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +join 1 39 3.258097 3.258097 457 +littl 1 39 3.258097 3.258097 454 +author 1 39 3.258097 3.258097 450 +probabl 1 40 3.258097 3.258097 455 +realli 1 40 3.258097 3.258097 444 +vita 1 38 3.295837 3.295837 473 +seminar 1 38 3.295837 3.295837 470 +feel 1 37 3.332205 3.332205 483 +singl 1 34 3.401197 3.401197 510 +approxim 1 35 3.401197 3.401197 509 +word 1 34 3.401197 3.401197 508 +curriculum 1 33 3.433987 3.433987 535 +obtain 1 33 3.433987 3.433987 534 +kind 1 32 3.465736 3.465736 541 +independ 1 32 3.465736 3.465736 548 +someth 1 31 3.496508 3.496508 554 +exist 1 30 3.555348 3.555348 569 +profil 1 30 3.555348 3.555348 581 +specifi 1 30 3.555348 3.555348 568 +travel 1 30 3.555348 3.555348 579 +scale 1 28 3.610918 3.610918 613 +becom 1 28 3.610918 3.610918 603 +pass 1 28 3.610918 3.610918 611 +framework 1 28 3.610918 3.610918 606 +effort 1 26 3.688879 3.688879 652 +consist 1 26 3.688879 3.688879 651 +enjoi 1 26 3.688879 3.688879 660 +rather 1 26 3.688879 3.688879 642 +jeff 1 25 3.737670 3.737670 673 +concern 1 25 3.737670 3.737670 666 +toward 1 25 3.737670 3.737670 668 +wai 1 25 3.737670 3.737670 662 +client 1 25 3.737670 3.737670 679 +spent 1 25 3.737670 3.737670 676 +never 1 25 3.737670 3.737670 671 +highli 1 23 3.806662 3.806662 725 +defin 1 22 3.850148 3.850148 746 +recommend 1 22 3.850148 3.850148 737 +hous 1 21 3.912023 3.912023 801 +programminglanguag 1 21 3.912023 3.912023 782 +flexibl 1 21 3.912023 3.912023 792 +love 1 21 3.912023 3.912023 804 +entir 1 20 3.951244 3.951244 811 +nice 1 20 3.951244 3.951244 809 +minut 1 20 3.951244 3.951244 810 +predict 1 19 4.007333 4.007333 855 +four 1 18 4.060443 4.060443 905 +whole 1 17 4.110874 4.110874 940 +chateau 1 16 4.174387 4.174387 997 +permit 1 16 4.174387 4.174387 962 +letter 1 16 4.174387 4.174387 981 +anyth 1 16 4.174387 4.174387 998 +took 1 16 4.174387 4.174387 1010 +track 1 15 4.248495 4.248495 1029 +enough 1 15 4.248495 4.248495 1040 +doesn 1 15 4.248495 4.248495 1055 +dean 1 14 4.317488 4.317488 1104 +spin 1 14 4.317488 4.317488 1121 +split 1 14 4.317488 4.317488 1078 +primarili 1 13 4.382027 4.382027 1185 +composit 1 13 4.382027 4.382027 1150 +wife 1 13 4.382027 4.382027 1196 +hotlist 1 13 4.382027 4.382027 1199 +uniqu 1 12 4.465908 4.465908 1228 +iter 1 12 4.465908 4.465908 1206 +food 1 12 4.465908 4.465908 1285 +walk 1 12 4.465908 4.465908 1281 +valid 1 11 4.553877 4.553877 1299 +moment 1 11 4.553877 4.553877 1379 +true 1 10 4.653960 4.653960 1422 +guess 1 10 4.653960 4.653960 1443 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +cecil 1 9 4.753590 4.753590 1547 +hang 1 9 4.753590 4.753590 1499 +inter 1 9 4.753590 4.753590 1530 +compos 1 9 4.753590 4.753590 1527 +sound 1 9 4.753590 4.753590 1605 +pure 1 8 4.875197 4.875197 1776 +isol 1 8 4.875197 4.875197 1663 +closur 1 8 4.875197 4.875197 1643 +ride 1 8 4.875197 4.875197 1741 +bug 1 7 5.010635 5.010635 1801 +dead 1 7 5.010635 5.010635 1840 +daughter 1 7 5.010635 5.010635 1943 +park 1 6 5.164786 5.164786 2218 +affect 1 6 5.164786 5.164786 2044 +increment 1 6 5.164786 5.164786 2206 +creation 1 6 5.164786 5.164786 2069 +spinproject 1 5 5.347108 5.347108 2570 +unnecessari 1 5 5.347108 5.347108 2506 +lesson 1 5 5.347108 5.347108 2568 +vortex 1 5 5.347108 5.347108 2362 +western 1 4 5.568345 5.568345 3062 +usedto 1 4 5.568345 5.568345 2643 +inlin 1 4 5.568345 5.568345 2964 +enjoy 1 4 5.568345 5.568345 2937 +insur 1 4 5.568345 5.568345 2939 +coverag 1 4 5.568345 5.568345 2656 +nearbi 1 3 5.857933 5.857933 3291 +langaug 1 3 5.857933 5.857933 3661 +stillmaintain 1 3 5.857933 5.857933 3964 +ofobject 1 3 5.857933 5.857933 3399 +forobject 1 3 5.857933 5.857933 3965 +kick 1 3 5.857933 5.857933 3962 +habit 1 3 5.857933 5.857933 3777 +somedai 1 3 5.857933 5.857933 3919 +fantast 1 3 5.857933 5.857933 3966 +bought 1 2 6.263398 6.263398 5165 +projectsi 1 2 6.263398 6.263398 5931 +andto 1 2 6.263398 6.263398 5771 +vortexcompil 1 2 6.263398 6.263398 5932 +interfacesand 1 2 6.263398 6.263398 5206 +andhow 1 2 6.263398 6.263398 5933 +intraprocedur 1 2 6.263398 6.263398 5934 +coke 1 2 6.263398 6.263398 5935 +caffein 1 2 6.263398 6.263398 5936 +galvin 1 2 6.263398 6.263398 4160 +fly 1 2 6.263398 6.263398 5937 +anymor 1 2 6.263398 6.263398 5938 +flight 1 2 6.263398 6.263398 5911 +downtown 1 2 6.263398 6.263398 5642 +wing 1 2 6.263398 6.263398 4864 +lengthi 1 2 6.263398 6.263398 4273 +jdean 1 2 6.263398 6.263398 4455 +dang 1 1 6.957497 6.957497 15589 +weren 1 1 6.957497 6.957497 15590 +plansi 1 1 6.957497 6.957497 15591 +sunni 1 1 6.957497 6.957497 15592 +menlo 1 1 6.957497 6.957497 15593 +avehicl 1 1 6.957497 6.957497 15594 +weintend 1 1 6.957497 6.957497 15595 +codein 1 1 6.957497 6.957497 15596 +systemmicrokernel 1 1 6.957497 6.957497 15597 +especiallyprofil 1 1 6.957497 6.957497 15598 +howwhol 1 1 6.957497 6.957497 15599 +assumedthat 1 1 6.957497 6.957497 15600 +manycompromis 1 1 6.957497 6.957497 15601 +wholeprogram 1 1 6.957497 6.957497 15602 +underlyingimplement 1 1 6.957497 6.957497 15603 +principaldesign 1 1 6.957497 6.957497 15604 +independentintermedi 1 1 6.957497 6.957497 15605 +ishigh 1 1 6.957497 6.957497 15606 +messagesend 1 1 6.957497 6.957497 15607 +wayof 1 1 6.957497 6.957497 15608 +repeatedli 1 1 6.957497 6.957497 15609 +passessepar 1 1 6.957497 6.957497 15610 +classanalysi 1 1 6.957497 6.957497 15611 +aliasanalysi 1 1 6.957497 6.957497 15612 +structuringoptim 1 1 6.957497 6.957497 15613 +stillallow 1 1 6.957497 6.957497 15614 +eachoth 1 1 6.957497 6.957497 15615 +flowanalys 1 1 6.957497 6.957497 15616 +withrel 1 1 6.957497 6.957497 15617 +assignmentelimin 1 1 6.957497 6.957497 15618 +publicationssom 1 1 6.957497 6.957497 15619 +personali 1 1 6.957497 6.957497 15620 +spici 1 1 6.957497 6.957497 15621 +mild 1 1 6.957497 6.957497 15622 +heidi 1 1 6.957497 6.957497 15623 +victoria 1 1 6.957497 6.957497 15624 +honeymoon 1 1 6.957497 6.957497 15625 +kauai 1 1 6.957497 6.957497 15626 +hurrican 1 1 6.957497 6.957497 15627 +iniki 1 1 6.957497 6.957497 15628 +biplan 1 1 6.957497 6.957497 15588 +puget 1 1 6.957497 6.957497 15629 +dare 1 1 6.957497 6.957497 15630 +sadli 1 1 6.957497 6.957497 15631 +passeng 1 1 6.957497 6.957497 15632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..ba25d3b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +high 1 130 2.079442 2.079442 101 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +commun 1 95 2.397895 2.397895 157 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +control 1 82 2.484907 2.484907 164 +requir 1 81 2.484907 2.484907 167 +academ 1 82 2.484907 2.484907 178 +school 1 84 2.484907 2.484907 188 +ieee 1 86 2.484907 2.484907 190 +come 1 78 2.564949 2.564949 202 +exampl 1 77 2.564949 2.564949 195 +state 1 76 2.564949 2.564949 207 +dynam 1 76 2.564949 2.564949 194 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +write 1 72 2.639057 2.639057 222 +workshop 1 71 2.639057 2.639057 239 +integr 1 67 2.708050 2.708050 245 +complex 1 64 2.772589 2.772589 269 +improv 1 62 2.772589 2.772589 289 +septemb 1 65 2.772589 2.772589 274 +januari 1 62 2.772589 2.772589 264 +experi 1 64 2.772589 2.772589 283 +best 1 59 2.833213 2.833213 299 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +februari 1 54 2.944439 2.944439 328 +hardwar 1 51 2.995732 2.995732 350 +possibl 1 47 3.091042 3.091042 378 +california 1 46 3.091042 3.091042 388 +discuss 1 45 3.135494 3.135494 399 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +procedur 1 36 3.367296 3.367296 488 +articl 1 33 3.433987 3.433987 530 +concept 1 32 3.465736 3.465736 537 +dissert 1 32 3.465736 3.465736 549 +rang 1 30 3.555348 3.555348 565 +specifi 1 30 3.555348 3.555348 568 +semant 1 29 3.583519 3.583519 587 +becom 1 28 3.610918 3.610918 603 +great 1 27 3.637586 3.637586 626 +doctor 1 24 3.761200 3.761200 709 +interpret 1 24 3.761200 3.761200 686 +displai 1 23 3.806662 3.806662 712 +flexibl 1 21 3.912023 3.912023 792 +safeti 1 20 3.951244 3.951244 817 +histori 1 19 4.007333 4.007333 853 +less 1 18 4.060443 4.060443 892 +behavior 1 18 4.060443 4.060443 881 +concentr 1 18 4.060443 4.060443 906 +engineeringunivers 1 17 4.110874 4.110874 959 +steven 1 17 4.110874 4.110874 953 +critic 1 16 4.174387 4.174387 982 +chateau 1 16 4.174387 4.174387 997 +devic 1 16 4.174387 4.174387 1002 +advantag 1 16 4.174387 4.174387 987 +convent 1 14 4.317488 4.317488 1072 +draft 1 14 4.317488 4.317488 1085 +washingtonbox 1 13 4.382027 4.382027 1200 +difficulti 1 13 4.382027 4.382027 1132 +signific 1 13 4.382027 4.382027 1125 +nanci 1 12 4.465908 4.465908 1256 +island 1 11 4.553877 4.553877 1345 +valid 1 11 4.553877 4.553877 1299 +summar 1 11 4.553877 4.553877 1295 +alpha 1 11 4.553877 4.553877 1348 +rice 1 11 4.553877 4.553877 1336 +itali 1 11 4.553877 4.553877 1378 +success 1 10 4.653960 4.653960 1390 +respect 1 9 4.753590 4.753590 1545 +kurt 1 9 4.753590 4.753590 1548 +linguist 1 9 4.753590 4.753590 1593 +leveson 1 9 4.753590 4.753590 1540 +guggenheim 1 8 4.875197 4.875197 1759 +fail 1 8 4.875197 4.875197 1655 +perhap 1 8 4.875197 4.875197 1693 +mile 1 8 4.875197 4.875197 1743 +sean 1 8 4.875197 4.875197 1705 +irvin 1 8 4.875197 4.875197 1660 +curv 1 8 4.875197 4.875197 1656 +awar 1 7 5.010635 5.010635 1800 +henc 1 7 5.010635 5.010635 1805 +sixth 1 7 5.010635 5.010635 1917 +price 1 6 5.164786 5.164786 1999 +emerg 1 6 5.164786 5.164786 2038 +transcript 1 6 5.164786 5.164786 2067 +variant 1 6 5.164786 5.164786 2043 +annex 1 5 5.347108 5.347108 2572 +caus 1 5 5.347108 5.347108 2298 +stage 1 5 5.347108 5.347108 2488 +colleagu 1 5 5.347108 5.347108 2304 +ortega 1 5 5.347108 5.347108 2559 +expens 1 4 5.568345 5.568345 2678 +avion 1 4 5.568345 5.568345 3018 +invent 1 4 5.568345 5.568345 3028 +sandi 1 4 5.568345 5.568345 2765 +diagnos 1 3 5.857933 5.857933 3968 +rsml 1 3 5.857933 5.857933 3967 +hazard 1 3 5.857933 5.857933 3191 +borrow 1 3 5.857933 5.857933 3725 +partridg 1 3 5.857933 5.857933 3346 +publicli 1 3 5.857933 5.857933 3687 +diagnost 1 3 5.857933 5.857933 3833 +rees 1 2 6.263398 6.263398 5939 +unpredict 1 2 6.263398 6.263398 5722 +incid 1 2 6.263398 6.263398 5870 +tca 1 2 6.263398 6.263398 5941 +deviat 1 2 6.263398 6.263398 4826 +mat 1 2 6.263398 6.263398 5942 +heimdahl 1 2 6.263398 6.263398 5940 +holli 1 2 6.263398 6.263398 5601 +damon 1 1 6.957497 6.957497 15633 +pagejon 1 1 6.957497 6.957497 15638 +reesepost 1 1 6.957497 6.957497 15639 +groupdepart 1 1 6.957497 6.957497 15640 +jdrees 1 1 6.957497 6.957497 15634 +catastroph 1 1 6.957497 6.957497 15641 +wider 1 1 6.957497 6.957497 15642 +hazop 1 1 6.957497 6.957497 15635 +siang 1 1 6.957497 6.957497 15643 +waxahachi 1 1 6.957497 6.957497 15636 +hildreth 1 1 6.957497 6.957497 15637 +dolin 1 1 6.957497 6.957497 15644 +statechart 1 1 6.957497 6.957497 15645 +como 1 1 6.957497 6.957497 15646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..c89cceed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +seattl 1 120 2.079442 2.079442 103 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +level 1 87 2.484907 2.484907 180 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +june 1 79 2.564949 2.564949 214 +dynam 1 76 2.564949 2.564949 194 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +abstract 1 62 2.772589 2.772589 276 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +written 1 63 2.772589 2.772589 278 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +juli 1 60 2.833213 2.833213 305 +room 1 59 2.833213 2.833213 301 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +california 1 46 3.091042 3.091042 388 +examin 1 42 3.218876 3.218876 424 +submit 1 39 3.258097 3.258097 440 +annual 1 40 3.258097 3.258097 458 +static 1 27 3.637586 3.637586 619 +compar 1 26 3.688879 3.688879 648 +thread 1 23 3.806662 3.806662 722 +exploit 1 20 3.951244 3.951244 836 +increas 1 20 3.951244 3.951244 829 +stanford 1 17 4.110874 4.110874 955 +coupl 1 17 4.110874 4.110874 939 +choic 1 16 4.174387 4.174387 979 +susan 1 15 4.248495 4.248495 1050 +levi 1 14 4.317488 4.317488 1093 +dean 1 14 4.317488 4.317488 1104 +balanc 1 14 4.317488 4.317488 1112 +convert 1 13 4.382027 4.382027 1122 +sigplan 1 13 4.382027 4.382027 1190 +philadelphia 1 12 4.465908 4.465908 1244 +multithread 1 11 4.553877 4.553877 1315 +henri 1 10 4.653960 4.653960 1417 +franklin 1 10 4.653960 4.653960 1436 +jack 1 8 4.875197 4.875197 1780 +egger 1 8 4.875197 4.875197 1695 +joel 1 8 4.875197 4.875197 1698 +qualifi 1 8 4.875197 4.875197 1721 +simultan 1 6 5.164786 5.164786 2155 +rebecca 1 6 5.164786 5.164786 2174 +tullsen 1 6 5.164786 5.164786 2081 +superscalar 1 6 5.164786 5.164786 2082 +fetch 1 5 5.347108 5.347108 2567 +jolla 1 4 5.568345 5.568345 2988 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +vliw 1 3 5.857933 5.857933 3514 +lojlo 1 2 6.263398 6.263398 5943 +suif 1 2 6.263398 6.263398 5944 +lojack 1 1 6.957497 6.957497 15648 +anddean 1 1 6.957497 6.957497 15647 +loph 1 1 6.957497 6.957497 15649 +eseattl 1 1 6.957497 6.957497 15650 +orsieg 1 1 6.957497 6.957497 15651 +paintbal 1 1 6.957497 6.957497 15652 +yahoojlo 1 1 6.957497 6.957497 15653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..3ab8b5e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +topic 1 114 2.197225 2.197225 110 +send 1 114 2.197225 2.197225 109 +user 1 104 2.302585 2.302585 137 +activ 1 84 2.484907 2.484907 182 +interfac 1 79 2.564949 2.564949 209 +want 1 79 2.564949 2.564949 199 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +local 1 55 2.944439 2.944439 334 +suggest 1 53 2.944439 2.944439 331 +visual 1 48 3.044522 3.044522 372 +directori 1 45 3.135494 3.135494 396 +might 1 41 3.218876 3.218876 426 +survei 1 35 3.401197 3.401197 513 +navig 1 21 3.912023 3.912023 796 +engineeringunivers 1 17 4.110874 4.110874 959 +washingtonbox 1 13 4.382027 4.382027 1200 +impress 1 6 5.164786 5.164786 2096 +sherman 1 1 6.957497 6.957497 15654 +shermanjoebob 1 1 6.957497 6.957497 15655 +usami 1 1 6.957497 6.957497 15656 +designinform 1 1 6.957497 6.957497 15657 +useclass 1 1 6.957497 6.957497 15658 +hcreat 1 1 6.957497 6.957497 15659 +pagequ 1 1 6.957497 6.957497 15660 +sarahsoftballstuff 1 1 6.957497 6.957497 15661 +pagesif 1 1 6.957497 6.957497 15662 +tojoebob 1 1 6.957497 6.957497 15663 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..780f6d68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +year 1 148 1.945910 1.945910 84 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +present 1 91 2.397895 2.397895 145 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +test 1 66 2.708050 2.708050 252 +abstract 1 62 2.772589 2.772589 276 +organ 1 65 2.772589 2.772589 265 +colleg 1 61 2.833213 2.833213 300 +sever 1 56 2.890372 2.890372 322 +discuss 1 45 3.135494 3.135494 399 +futur 1 41 3.218876 3.218876 427 +expect 1 37 3.332205 3.332205 484 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +neural 1 30 3.555348 3.555348 578 +travel 1 30 3.555348 3.555348 579 +task 1 25 3.737670 3.737670 678 +demonstr 1 24 3.761200 3.761200 694 +theunivers 1 21 3.912023 3.912023 797 +thought 1 17 4.110874 4.110874 945 +cognit 1 16 4.174387 4.174387 986 +achiev 1 14 4.317488 4.317488 1088 +context 1 13 4.382027 4.382027 1153 +accomplish 1 8 4.875197 4.875197 1755 +potenti 1 8 4.875197 4.875197 1690 +creativ 1 8 4.875197 4.875197 1777 +successfulli 1 7 5.010635 5.010635 1869 +earn 1 7 5.010635 5.010635 1788 +biolog 1 6 5.164786 5.164786 2147 +slate 1 6 5.164786 5.164786 2021 +addition 1 4 5.568345 5.568345 2593 +joshua 1 3 5.857933 5.857933 3333 +blank 1 3 5.857933 5.857933 3379 +emul 1 3 5.857933 5.857933 3944 +josh 1 2 6.263398 6.263398 5945 +overviewof 1 2 6.263398 6.263398 5469 +seim 1 1 6.957497 6.957497 15664 +begunin 1 1 6.957497 6.957497 15665 +lockean 1 1 6.957497 6.957497 15666 +observedbehavior 1 1 6.957497 6.957497 15667 +graduatingfrom 1 1 6.957497 6.957497 15668 +volit 1 1 6.957497 6.957497 15669 +taskw 1 1 6.957497 6.957497 15670 +ambulatori 1 1 6.957497 6.957497 15671 +academichierarchi 1 1 6.957497 6.957497 15672 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..2aba061d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +move 1 47 3.091042 3.091042 382 +jovan 1 2 6.263398 6.263398 5842 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..8693bb97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +recent 1 167 1.791759 1.791759 58 +site 1 106 2.197225 2.197225 119 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +proceed 1 93 2.397895 2.397895 152 +real 1 93 2.397895 2.397895 144 +academ 1 82 2.484907 2.484907 178 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +david 1 71 2.639057 2.639057 232 +main 1 67 2.708050 2.708050 256 +major 1 56 2.890372 2.890372 315 +york 1 41 3.218876 3.218876 435 +especi 1 36 3.367296 3.367296 496 +power 1 30 3.555348 3.555348 573 +color 1 22 3.850148 3.850148 762 +grad 1 20 3.951244 3.951244 837 +eric 1 19 4.007333 4.007333 870 +women 1 16 4.174387 4.174387 1004 +biologi 1 15 4.248495 4.248495 1049 +comic 1 14 4.317488 4.317488 1103 +jonathan 1 13 4.382027 4.382027 1174 +brad 1 12 4.465908 4.465908 1264 +interestsmi 1 10 4.653960 4.653960 1462 +genet 1 10 4.653960 4.653960 1409 +gain 1 8 4.875197 4.875197 1730 +siggraph 1 8 4.875197 4.875197 1773 +sean 1 8 4.875197 4.875197 1705 +molecular 1 7 5.010635 5.010635 1887 +cat 1 6 5.164786 5.164786 2194 +salesin 1 4 5.568345 5.568345 3051 +alma 1 3 5.857933 5.857933 3963 +joanna 1 2 6.263398 6.263398 4503 +reproduc 1 2 6.263398 6.263398 5519 +powerjoanna 1 1 6.957497 6.957497 15673 +pagehi 1 1 6.957497 6.957497 15674 +uwneat 1 1 6.957497 6.957497 15675 +matercool 1 1 6.957497 6.957497 15676 +shadegraph 1 1 6.957497 6.957497 15677 +uwduoton 1 1 6.957497 6.957497 15678 +reproductionmi 1 1 6.957497 6.957497 15679 +matermost 1 1 6.957497 6.957497 15680 +employmentpubl 1 1 6.957497 6.957497 15681 +stollnitz 1 1 6.957497 6.957497 15682 +duoton 1 1 6.957497 6.957497 15683 +lifepast 1 1 6.957497 6.957497 15684 +homesdiversionsgend 1 1 6.957497 6.957497 15685 +issuesstatu 1 1 6.957497 6.957497 15686 +sciencenow 1 1 6.957497 6.957497 15687 +pagefeminist 1 1 6.957497 6.957497 15688 +onlineultim 1 1 6.957497 6.957497 15689 +frisbeefun 1 1 6.957497 6.957497 15690 +stufffroggi 1 1 6.957497 6.957497 15691 +quotesbrad 1 1 6.957497 6.957497 15692 +musicevan 1 1 6.957497 6.957497 15693 +jokes 1 1 6.957497 6.957497 15694 +pagesmi 1 1 6.957497 6.957497 15695 +herojpow 1 1 6.957497 6.957497 15696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..bbc09ea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +seattl 1 120 2.079442 2.079442 103 +homepag 1 93 2.397895 2.397895 148 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +jonathan 1 13 4.382027 4.382027 1174 +ahoi 1 3 5.857933 5.857933 3532 +shake 1 2 6.263398 6.263398 5898 +finderresumlinkslast 1 1 6.957497 6.957497 15697 +jshake 1 1 6.957497 6.957497 15698 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..66e8c83d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +juan 1 9 4.753590 4.753590 1580 +alemanyjuan 1 1 6.957497 6.957497 15699 +alemani 1 1 6.957497 6.957497 15700 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..b4ed70a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +report 1 131 2.079442 2.079442 92 +seattl 1 120 2.079442 2.079442 103 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +sieg 1 69 2.708050 2.708050 260 +function 1 62 2.772589 2.772589 275 +understand 1 47 3.091042 3.091042 384 +third 1 43 3.178054 3.178054 412 +vision 1 41 3.218876 3.218876 430 +combin 1 42 3.218876 3.218876 421 +examin 1 42 3.218876 3.218876 424 +multipl 1 39 3.258097 3.258097 453 +microsoft 1 38 3.295837 3.295837 468 +slide 1 38 3.295837 3.295837 467 +statist 1 35 3.401197 3.401197 521 +obtain 1 33 3.433987 3.433987 534 +taught 1 33 3.433987 3.433987 526 +rang 1 30 3.555348 3.555348 565 +actual 1 28 3.610918 3.610918 604 +pass 1 28 3.610918 3.610918 611 +aspect 1 25 3.737670 3.737670 663 +ofwashington 1 22 3.850148 3.850148 766 +try 1 22 3.850148 3.850148 764 +geometri 1 22 3.850148 3.850148 752 +left 1 19 4.007333 4.007333 851 +steven 1 17 4.110874 4.110874 953 +qual 1 15 4.248495 4.248495 1062 +universityof 1 15 4.248495 4.248495 1061 +reflect 1 15 4.248495 4.248495 1034 +remov 1 12 4.465908 4.465908 1225 +werner 1 10 4.653960 4.653960 1385 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +surfac 1 9 4.753590 4.753590 1574 +folk 1 9 4.753590 4.753590 1597 +siggraph 1 8 4.875197 4.875197 1773 +theclass 1 6 5.164786 5.164786 2060 +speaker 1 5 5.347108 5.347108 2370 +engineeringdepart 1 4 5.568345 5.568345 2917 +closest 1 4 5.568345 5.568345 2828 +addition 1 4 5.568345 5.568345 2593 +rick 1 4 5.568345 5.568345 2646 +wavelet 1 4 5.568345 5.568345 2874 +union 1 4 5.568345 5.568345 2634 +kari 1 2 6.263398 6.263398 4500 +andmathemat 1 2 6.263398 6.263398 4948 +tonyderos 1 2 6.263398 6.263398 5839 +stuetzl 1 2 6.263398 6.263398 5840 +duchamp 1 2 6.263398 6.263398 5841 +hopp 1 2 6.263398 6.263398 5092 +sketch 1 2 6.263398 6.263398 5946 +getto 1 2 6.263398 6.263398 5806 +herear 1 2 6.263398 6.263398 5947 +pulli 1 1 6.957497 6.957497 15701 +pagekari 1 1 6.957497 6.957497 15704 +antero 1 1 6.957497 6.957497 15702 +pullii 1 1 6.957497 6.957497 15705 +thesedisciplin 1 1 6.957497 6.957497 15706 +uwfor 1 1 6.957497 6.957497 15707 +pixar 1 1 6.957497 6.957497 15708 +lindashapiro 1 1 6.957497 6.957497 15709 +andjohn 1 1 6.957497 6.957497 15710 +mcdonald 1 1 6.957497 6.957497 15711 +andhugu 1 1 6.957497 6.957497 15712 +szeliski 1 1 6.957497 6.957497 15713 +tribor 1 1 6.957497 6.957497 15714 +triplet 1 1 6.957497 6.957497 15715 +recognitionsystem 1 1 6.957497 6.957497 15716 +surfacereconstruct 1 1 6.957497 6.957497 15717 +baselin 1 1 6.957497 6.957497 15718 +camerasystem 1 1 6.957497 6.957497 15719 +subdivis 1 1 6.957497 6.957497 15703 +waveletanalysi 1 1 6.957497 6.957497 15720 +rigidregistr 1 1 6.957497 6.957497 15721 +architecturesystem 1 1 6.957497 6.957497 15722 +susanegg 1 1 6.957497 6.957497 15723 +brianbershad 1 1 6.957497 6.957497 15724 +eacutesum 1 1 6.957497 6.957497 15725 +eacut 1 1 6.957497 6.957497 15726 +kapu 1 1 6.957497 6.957497 15727 +takavainionti 1 1 6.957497 6.957497 15728 +oulu 1 1 6.957497 6.957497 15729 +finland 1 1 6.957497 6.957497 15730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..06d8e905 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +professor 1 137 1.945910 1.945910 76 +seattl 1 120 2.079442 2.079442 103 +anna 1 12 4.465908 4.465908 1292 +karlinanna 1 1 6.957497 6.957497 15731 +rochel 1 1 6.957497 6.957497 15732 +karlinassoci 1 1 6.957497 6.957497 15733 +sincejuli 1 1 6.957497 6.957497 15734 +paperskarlin 1 1 6.957497 6.957497 15735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..5edd272b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +email 1 220 1.386294 1.386294 29 +back 1 60 2.833213 2.833213 297 +yeunghom 1 1 6.957497 6.957497 15736 +yeungperson 1 1 6.957497 6.957497 15737 +infomi 1 1 6.957497 6.957497 15738 +picturemi 1 1 6.957497 6.957497 15739 +researchtelnet 1 1 6.957497 6.957497 15740 +machinessend 1 1 6.957497 6.957497 15741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..6cbc9c64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +area 1 144 1.945910 1.945910 80 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +seattl 1 120 2.079442 2.079442 103 +specif 1 106 2.197225 2.197225 106 +user 1 104 2.302585 2.302585 137 +academ 1 82 2.484907 2.484907 178 +environ 1 84 2.484907 2.484907 177 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +method 1 80 2.564949 2.564949 213 +interfac 1 79 2.564949 2.564949 209 +complet 1 77 2.564949 2.564949 208 +html 1 75 2.639057 2.639057 235 +name 1 72 2.639057 2.639057 220 +java 1 70 2.708050 2.708050 248 +interact 1 62 2.772589 2.772589 270 +visual 1 48 3.044522 3.044522 372 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +describ 1 45 3.135494 3.135494 400 +live 1 40 3.258097 3.258097 451 +formal 1 37 3.332205 3.332205 478 +human 1 32 3.465736 3.465736 546 +manipul 1 27 3.637586 3.637586 624 +berkelei 1 26 3.688879 3.688879 657 +background 1 25 3.737670 3.737670 664 +other 1 24 3.761200 3.761200 697 +love 1 21 3.912023 3.912023 804 +voic 1 21 3.912023 3.912023 806 +safeti 1 20 3.951244 3.951244 817 +binari 1 20 3.951244 3.951244 823 +qualiti 1 20 3.951244 3.951244 832 +critic 1 16 4.174387 4.174387 982 +nasa 1 13 4.382027 4.382027 1188 +readabl 1 12 4.465908 4.465908 1258 +nanci 1 12 4.465908 4.465908 1256 +kurt 1 9 4.753590 4.753590 1548 +leveson 1 9 4.753590 4.753590 1540 +sister 1 9 4.753590 4.753590 1524 +wayn 1 8 4.875197 4.875197 1738 +usabl 1 7 5.010635 5.010635 1810 +poster 1 7 5.010635 5.010635 1814 +corner 1 7 5.010635 5.010635 1909 +vivek 1 6 5.164786 5.164786 2210 +parent 1 6 5.164786 5.164786 2204 +ohlrich 1 5 5.347108 5.347108 2564 +humor 1 5 5.347108 5.347108 2533 +partridg 1 3 5.857933 5.857933 3346 +dabbl 1 3 5.857933 5.857933 3971 +preview 1 3 5.857933 5.857933 3306 +bauer 1 2 6.263398 6.263398 5117 +mat 1 2 6.263398 6.263398 5942 +heimdahl 1 2 6.263398 6.263398 5940 +ratan 1 2 6.263398 6.263398 5948 +rees 1 2 6.263398 6.263398 5939 +thousand 1 2 6.263398 6.263398 5949 +oak 1 2 6.263398 6.263398 5566 +kepart 1 2 6.263398 6.263398 4459 +bddtcl 1 1 6.957497 6.957497 15742 +decisiondiagram 1 1 6.957497 6.957497 15743 +suburban 1 1 6.957497 6.957497 15744 +oti 1 1 6.957497 6.957497 15745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..06cf1e18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +stuff 1 87 2.484907 2.484907 171 +complet 1 77 2.564949 2.564949 208 +advisor 1 51 2.995732 2.995732 355 +friend 1 48 3.044522 3.044522 376 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +collabor 1 32 3.465736 3.465736 543 +suit 1 13 4.382027 4.382027 1129 +tour 1 11 4.553877 4.553877 1307 +ofcomput 1 10 4.653960 4.653960 1442 +weld 1 9 4.753590 4.753590 1538 +dictionari 1 8 4.875197 4.875197 1642 +golden 1 7 5.010635 5.010635 1962 +photographi 1 6 5.164786 5.164786 2146 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +keith 1 5 5.347108 5.347108 2528 +paint 1 5 5.347108 5.347108 2400 +coffe 1 5 5.347108 5.347108 2556 +lawyer 1 4 5.568345 5.568345 2836 +car 1 4 5.568345 5.568345 2931 +bicycl 1 2 6.263398 6.263398 5950 +questa 1 1 6.957497 6.957497 15746 +pagina 1 1 6.957497 6.957497 15747 +anch 1 1 6.957497 6.957497 15748 +italiano 1 1 6.957497 6.957497 15749 +researchsoftbotsplanningkrselect 1 1 6.957497 6.957497 15750 +publicationscurriculum 1 1 6.957497 6.957497 15751 +inpostscriptrandom 1 1 6.957497 6.957497 15752 +hackingwordbot 1 1 6.957497 6.957497 15753 +godless 1 1 6.957497 6.957497 15754 +pinko 1 1 6.957497 6.957497 15755 +dislik 1 1 6.957497 6.957497 15756 +ellenmarcruben 1 1 6.957497 6.957497 15757 +laurennickrich 1 1 6.957497 6.957497 15758 +joannavivek 1 1 6.957497 6.957497 15759 +keithgolden 1 1 6.957497 6.957497 15760 +kgolden 1 1 6.957497 6.957497 15761 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..3683e09f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,144 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +proceed 1 93 2.397895 2.397895 152 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +school 1 84 2.484907 2.484907 188 +resum 1 79 2.564949 2.564949 217 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +workshop 1 71 2.639057 2.639057 239 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +polici 1 64 2.772589 2.772589 279 +new 1 64 2.772589 2.772589 262 +visit 1 63 2.772589 2.772589 288 +content 1 59 2.833213 2.833213 302 +automat 1 61 2.833213 2.833213 306 +march 1 61 2.833213 2.833213 295 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +advisor 1 51 2.995732 2.995732 355 +date 1 51 2.995732 2.995732 344 +format 1 48 3.044522 3.044522 356 +editor 1 41 3.218876 3.218876 433 +movi 1 40 3.258097 3.258097 459 +respons 1 37 3.332205 3.332205 476 +china 1 37 3.332205 3.332205 487 +manual 1 35 3.401197 3.401197 504 +transform 1 32 3.465736 3.465736 542 +dissert 1 32 3.465736 3.465736 549 +specifi 1 30 3.555348 3.555348 568 +quot 1 29 3.583519 3.583519 582 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +revis 1 26 3.688879 3.688879 640 +experiment 1 26 3.688879 3.688879 645 +alwai 1 24 3.761200 3.761200 691 +william 1 22 3.850148 3.850148 765 +wang 1 21 3.912023 3.912023 790 +watch 1 21 3.912023 3.912023 789 +fund 1 21 3.912023 3.912023 805 +qualiti 1 20 3.951244 3.951244 832 +minut 1 20 3.951244 3.951244 810 +citi 1 19 4.007333 4.007333 874 +thoma 1 18 4.060443 4.060443 901 +stock 1 16 4.174387 4.174387 1007 +driven 1 15 4.248495 4.248495 1048 +style 1 15 4.248495 4.248495 1036 +hong 1 14 4.317488 4.317488 1105 +asynchron 1 12 4.465908 4.465908 1229 +readi 1 12 4.465908 4.465908 1242 +evolut 1 11 4.553877 4.553877 1314 +market 1 11 4.553877 4.553877 1361 +kong 1 9 4.753590 4.753590 1602 +mainten 1 9 4.753590 4.753590 1543 +upcom 1 8 4.875197 4.875197 1685 +bridg 1 8 4.875197 4.875197 1764 +pacif 1 8 4.875197 4.875197 1674 +delai 1 7 5.010635 5.010635 1848 +highwai 1 6 5.164786 5.164786 2095 +invest 1 6 5.164786 5.164786 2153 +educomput 1 5 5.347108 5.347108 2524 +semi 1 5 5.347108 5.347108 2510 +these 1 5 5.347108 5.347108 2482 +mutual 1 5 5.347108 5.347108 2418 +commod 1 5 5.347108 5.347108 2415 +ics 1 4 5.568345 5.568345 2779 +chart 1 4 5.568345 5.568345 2653 +chow 1 3 5.857933 5.857933 3281 +notkin 1 3 5.857933 5.857933 3345 +polytechn 1 3 5.857933 5.857933 3222 +usathi 1 2 6.263398 6.263398 5951 +glossari 1 2 6.263398 6.263398 4418 +asia 1 2 6.263398 6.263398 5952 +alumnu 1 2 6.263398 6.263398 5863 +kingsum 1 1 6.957497 6.957497 15762 +feedbackresearchmi 1 1 6.957497 6.957497 15764 +toolspap 1 1 6.957497 6.957497 15765 +icsm 1 1 6.957497 6.957497 15766 +griswold 1 1 6.957497 6.957497 15767 +pcct 1 1 6.957497 6.957497 15763 +sorcererpcct 1 1 6.957497 6.957497 15768 +terrenc 1 1 6.957497 6.957497 15769 +parr 1 1 6.957497 6.957497 15770 +newbiesresumepleasedrop 1 1 6.957497 6.957497 15771 +mailto 1 1 6.957497 6.957497 15772 +kongchines 1 1 6.957497 6.957497 15773 +kongsingapor 1 1 6.957497 6.957497 15774 +sitessingapor 1 1 6.957497 6.957497 15775 +websom 1 1 6.957497 6.957497 15776 +friendstom 1 1 6.957497 6.957497 15777 +liew 1 1 6.957497 6.957497 15778 +fook 1 1 6.957497 6.957497 15779 +jiang 1 1 6.957497 6.957497 15780 +weidongu 1 1 6.957497 6.957497 15781 +relatedunivers 1 1 6.957497 6.957497 15782 +webserv 1 1 6.957497 6.957497 15783 +storeinvestmentsfre 1 1 6.957497 6.957497 15784 +analysismisc 1 1 6.957497 6.957497 15785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..5c220430 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +like 1 132 1.945910 1.945910 81 +professor 1 137 1.945910 1.945910 76 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +teach 1 108 2.197225 2.197225 112 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +build 1 85 2.484907 2.484907 184 +want 1 79 2.564949 2.564949 199 +previou 1 62 2.772589 2.772589 290 +written 1 63 2.772589 2.772589 278 +visit 1 63 2.772589 2.772589 288 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +life 1 50 3.044522 3.044522 375 +archiv 1 49 3.044522 3.044522 364 +get 1 46 3.091042 3.091042 380 +adapt 1 46 3.091042 3.091042 387 +anoth 1 45 3.135494 3.135494 408 +form 1 39 3.258097 3.258097 443 +electr 1 38 3.295837 3.295837 461 +ofth 1 36 3.367296 3.367296 491 +photo 1 31 3.496508 3.496508 561 +rather 1 26 3.688879 3.688879 642 +task 1 25 3.737670 3.737670 678 +ofwashington 1 22 3.850148 3.850148 766 +rout 1 21 3.912023 3.912023 793 +spend 1 19 4.007333 4.007333 850 +speed 1 18 4.060443 4.060443 911 +minim 1 18 4.060443 4.060443 887 +took 1 16 4.174387 4.174387 1010 +doesn 1 15 4.248495 4.248495 1055 +signific 1 13 4.382027 4.382027 1125 +kevin 1 9 4.753590 4.753590 1482 +suitabl 1 9 4.753590 4.753590 1486 +root 1 8 4.875197 4.875197 1650 +brain 1 8 4.875197 4.875197 1638 +router 1 8 4.875197 4.875197 1772 +pacif 1 8 4.875197 4.875197 1674 +spot 1 7 5.010635 5.010635 1894 +explain 1 7 5.010635 5.010635 1816 +multicomput 1 7 5.010635 5.010635 1890 +rock 1 6 5.164786 5.164786 2164 +coral 1 5 5.347108 5.347108 2538 +chaotic 1 5 5.347108 5.347108 2566 +engineeringat 1 5 5.347108 5.347108 2561 +wander 1 4 5.568345 5.568345 2896 +chaoticrout 1 4 5.568345 5.568345 3063 +bold 1 3 5.857933 5.857933 3846 +tenur 1 3 5.857933 5.857933 3801 +researchassoci 1 3 5.857933 5.857933 3664 +nervou 1 2 6.263398 6.263398 5953 +conscious 1 2 6.263398 6.263398 5954 +boldingkwb 1 1 6.957497 6.957497 15786 +juvenil 1 1 6.957497 6.957497 15787 +squirt 1 1 6.957497 6.957497 15788 +hunk 1 1 6.957497 6.957497 15789 +cling 1 1 6.957497 6.957497 15790 +rudimentari 1 1 6.957497 6.957497 15791 +eat 1 1 6.957497 6.957497 15792 +dennett 1 1 6.957497 6.957497 15793 +latencylan 1 1 6.957497 6.957497 15794 +researchha 1 1 6.957497 6.957497 15795 +formass 1 1 6.957497 6.957497 15796 +comethyakutak 1 1 6.957497 6.957497 15797 +moustach 1 1 6.957497 6.957497 15798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..4d54a5bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +person 1 111 2.197225 2.197225 117 +structur 1 106 2.197225 2.197225 105 +commun 1 95 2.397895 2.397895 157 +sieg 1 69 2.708050 2.708050 260 +room 1 59 2.833213 2.833213 301 +quarter 1 47 3.091042 3.091042 389 +formal 1 37 3.332205 3.332205 478 +short 1 36 3.367296 3.367296 499 +winter 1 36 3.367296 3.367296 500 +richard 1 31 3.496508 3.496508 559 +ladner 1 6 5.164786 5.164786 2062 +ladnerrichard 1 1 6.957497 6.957497 15799 +ladnerprofessor 1 1 6.957497 6.957497 15800 +biographyresearch 1 1 6.957497 6.957497 15801 +studentsteachingcomput 1 1 6.957497 6.957497 15802 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..8e01f4dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +technic 1 100 2.302585 2.302585 140 +sinc 1 90 2.397895 2.397895 159 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +sieg 1 69 2.708050 2.708050 260 +integr 1 67 2.708050 2.708050 245 +test 1 66 2.708050 2.708050 252 +laboratori 1 63 2.772589 2.772589 292 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +creat 1 63 2.772589 2.772589 277 +room 1 59 2.833213 2.833213 301 +allow 1 53 2.944439 2.944439 333 +small 1 39 3.258097 3.258097 447 +join 1 39 3.258097 3.258097 457 +purpos 1 37 3.332205 3.332205 481 +cost 1 37 3.332205 3.332205 480 +staff 1 36 3.367296 3.367296 490 +ofth 1 36 3.367296 3.367296 491 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +articl 1 33 3.433987 3.433987 530 +focu 1 30 3.555348 3.555348 571 +packag 1 28 3.610918 3.610918 614 +symbol 1 27 3.637586 3.637586 620 +primari 1 25 3.737670 3.737670 669 +ofwashington 1 22 3.850148 3.850148 766 +director 1 22 3.850148 3.850148 767 +voic 1 21 3.912023 3.912023 806 +chip 1 21 3.912023 3.912023 770 +vlsi 1 21 3.912023 3.912023 795 +supervis 1 20 3.951244 3.951244 840 +spars 1 16 4.174387 4.174387 989 +later 1 15 4.248495 4.248495 1043 +driven 1 15 4.248495 4.248495 1048 +larri 1 13 4.382027 4.382027 1142 +calcul 1 12 4.465908 4.465908 1268 +captur 1 12 4.465908 4.465908 1232 +matric 1 10 4.653960 4.653960 1399 +fpga 1 10 4.653960 4.653960 1433 +router 1 8 4.875197 4.875197 1772 +upcom 1 8 4.875197 4.875197 1685 +northwest 1 7 5.010635 5.010635 1973 +densiti 1 7 5.010635 5.010635 1927 +quantum 1 6 5.164786 5.164786 2214 +chemistri 1 5 5.347108 5.347108 2405 +mcmurchi 1 4 5.568345 5.568345 2757 +western 1 4 5.568345 5.568345 3062 +comprehens 1 4 5.568345 5.568345 2745 +andengin 1 4 5.568345 5.568345 3042 +coauthor 1 4 5.568345 5.568345 3064 +tester 1 4 5.568345 5.568345 2754 +triptych 1 4 5.568345 5.568345 3061 +mactest 1 3 5.857933 5.857933 3972 +ofintegr 1 2 6.263398 6.263398 5324 +gaussian 1 2 6.263398 6.263398 4763 +molecul 1 2 6.263398 6.263398 5246 +representationof 1 2 6.263398 6.263398 4119 +andha 1 2 6.263398 6.263398 5955 +mcmurchiedepart 1 1 6.957497 6.957497 15803 +integratedsystem 1 1 6.957497 6.957497 15804 +hework 1 1 6.957497 6.957497 15805 +theconstruct 1 1 6.957497 6.957497 15806 +hamiltonian 1 1 6.957497 6.957497 15807 +coauthorof 1 1 6.957497 6.957497 15808 +meld 1 1 6.957497 6.957497 15809 +abinitio 1 1 6.957497 6.957497 15810 +wirec 1 1 6.957497 6.957497 15811 +aschemat 1 1 6.957497 6.957497 15812 +withschemat 1 1 6.957497 6.957497 15813 +concis 1 1 6.957497 6.957497 15814 +parameteriz 1 1 6.957497 6.957497 15815 +andcommerci 1 1 6.957497 6.957497 15816 +hardwareenviron 1 1 6.957497 6.957497 15817 +andsubsystem 1 1 6.957497 6.957497 15818 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..fae83668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +seattl 1 120 2.079442 2.079442 103 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +learn 1 86 2.484907 2.484907 170 +ieee 1 86 2.484907 2.484907 190 +member 1 84 2.484907 2.484907 165 +control 1 82 2.484907 2.484907 164 +come 1 78 2.564949 2.564949 202 +state 1 76 2.564949 2.564949 207 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +nation 1 74 2.639057 2.639057 240 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +result 1 65 2.772589 2.772589 281 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +copi 1 63 2.772589 2.772589 284 +interact 1 62 2.772589 2.772589 270 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +space 1 57 2.890372 2.890372 310 +publish 1 57 2.890372 2.890372 326 +finger 1 52 2.995732 2.995732 354 +life 1 50 3.044522 3.044522 375 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +favorit 1 44 3.135494 3.135494 410 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +join 1 39 3.258097 3.258097 457 +form 1 39 3.258097 3.258097 443 +late 1 40 3.258097 3.258097 439 +transact 1 39 3.258097 3.258097 438 +field 1 37 3.332205 3.332205 482 +tree 1 36 3.367296 3.367296 492 +committe 1 34 3.401197 3.401197 522 +award 1 34 3.401197 3.401197 523 +toler 1 33 3.433987 3.433987 533 +board 1 33 3.433987 3.433987 528 +express 1 32 3.465736 3.465736 540 +fault 1 32 3.465736 3.465736 547 +human 1 32 3.465736 3.465736 546 +titl 1 31 3.496508 3.496508 556 +produc 1 30 3.555348 3.555348 572 +specifi 1 30 3.555348 3.555348 568 +chair 1 29 3.583519 3.583519 596 +except 1 28 3.610918 3.610918 607 +actual 1 28 3.610918 3.610918 604 +though 1 27 3.637586 3.637586 622 +determin 1 27 3.637586 3.637586 630 +spent 1 25 3.737670 3.737670 676 +concern 1 25 3.737670 3.737670 666 +never 1 25 3.737670 3.737670 671 +wai 1 25 3.737670 3.737670 662 +fellow 1 24 3.761200 3.761200 701 +properti 1 22 3.850148 3.850148 749 +director 1 22 3.850148 3.850148 767 +avoid 1 21 3.912023 3.912023 799 +fact 1 21 3.912023 3.912023 780 +safeti 1 20 3.951244 3.951244 817 +verif 1 20 3.951244 3.951244 826 +citi 1 19 4.007333 4.007333 874 +failur 1 18 4.060443 4.060443 898 +seem 1 18 4.060443 4.060443 899 +behavior 1 18 4.060443 4.060443 881 +engineeringunivers 1 17 4.110874 4.110874 959 +analyz 1 17 4.110874 4.110874 925 +advantag 1 16 4.174387 4.174387 987 +commerci 1 16 4.174387 4.174387 1005 +anyth 1 16 4.174387 4.174387 998 +weslei 1 16 4.174387 4.174387 983 +anywai 1 15 4.248495 4.248495 1047 +contribut 1 15 4.248495 4.248495 1021 +qual 1 15 4.248495 4.248495 1062 +style 1 15 4.248495 4.248495 1036 +train 1 14 4.317488 4.317488 1066 +washingtonbox 1 13 4.382027 4.382027 1200 +conf 1 13 4.382027 4.382027 1181 +deriv 1 13 4.382027 4.382027 1145 +nanci 1 12 4.465908 4.465908 1256 +safe 1 12 4.465908 4.465908 1274 +addison 1 12 4.465908 4.465908 1230 +valid 1 11 4.553877 4.553877 1299 +council 1 11 4.553877 4.553877 1364 +leveson 1 9 4.753590 4.753590 1540 +mode 1 9 4.753590 4.753590 1492 +irvin 1 8 4.875197 4.875197 1660 +matter 1 8 4.875197 4.875197 1627 +claim 1 8 4.875197 4.875197 1664 +elect 1 8 4.875197 4.875197 1771 +analys 1 8 4.875197 4.875197 1666 +perhap 1 8 4.875197 4.875197 1693 +chief 1 7 5.010635 5.010635 1829 +awar 1 7 5.010635 5.010635 1800 +rain 1 6 5.164786 5.164786 2137 +highwai 1 6 5.164786 5.164786 2095 +softwareengin 1 6 5.164786 5.164786 2162 +ucla 1 5 5.347108 5.347108 2502 +lesson 1 5 5.347108 5.347108 2568 +adopt 1 5 5.347108 5.347108 2467 +aircraft 1 4 5.568345 5.568345 2872 +melbourn 1 4 5.568345 5.568345 3035 +loss 1 3 5.857933 5.857933 3805 +automobil 1 3 5.857933 5.857933 3709 +aerospac 1 3 5.857933 5.857933 3555 +rsml 1 3 5.857933 5.857933 3967 +hazard 1 3 5.857933 5.857933 3191 +nobodi 1 2 6.263398 6.263398 5474 +tca 1 2 6.263398 6.263398 5941 +collis 1 2 6.263398 6.263398 5956 +thatyou 1 2 6.263398 6.263398 4682 +computingresearch 1 2 6.263398 6.263398 5957 +shuttl 1 2 6.263398 6.263398 4787 +aiaa 1 2 6.263398 6.263398 5239 +aeronaut 1 2 6.263398 6.263398 5958 +andscienc 1 2 6.263398 6.263398 5796 +safewar 1 2 6.263398 6.263398 5959 +isalso 1 2 6.263398 6.263398 5640 +pressur 1 2 6.263398 6.263398 5960 +accid 1 2 6.263398 6.263398 5961 +airport 1 2 6.263398 6.263398 5962 +levesondepart 1 1 6.957497 6.957497 15819 +mathand 1 1 6.957497 6.957497 15820 +misanthrop 1 1 6.957497 6.957497 15821 +aform 1 1 6.957497 6.957497 15822 +airspac 1 1 6.957497 6.957497 15823 +theiroffici 1 1 6.957497 6.957497 15824 +safetyresearch 1 1 6.957497 6.957497 15825 +subtop 1 1 6.957497 6.957497 15826 +commissionon 1 1 6.957497 6.957497 15827 +levesoni 1 1 6.957497 6.957497 15828 +systemsaward 1 1 6.957497 6.957497 15829 +promotingrespons 1 1 6.957497 6.957497 15830 +propertyar 1 1 6.957497 6.957497 15831 +stake 1 1 6.957497 6.957497 15832 +keynoteaddress 1 1 6.957497 6.957497 15833 +steam 1 1 6.957497 6.957497 15834 +hazardanalysi 1 1 6.957497 6.957497 15835 +writtenin 1 1 6.957497 6.957497 15836 +newrequir 1 1 6.957497 6.957497 15837 +cockpit 1 1 6.957497 6.957497 15838 +problemsand 1 1 6.957497 6.957497 15839 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..8ebe8dda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +book 1 99 2.302585 2.302585 131 +memori 1 101 2.302585 2.302585 139 +select 1 91 2.397895 2.397895 154 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +environ 1 84 2.484907 2.484907 177 +academ 1 82 2.484907 2.484907 178 +help 1 83 2.484907 2.484907 175 +novemb 1 81 2.484907 2.484907 179 +control 1 82 2.484907 2.484907 164 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +integr 1 67 2.708050 2.708050 245 +plai 1 60 2.833213 2.833213 307 +share 1 59 2.833213 2.833213 304 +faculti 1 56 2.890372 2.890372 325 +space 1 57 2.890372 2.890372 310 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +sampl 1 53 2.944439 2.944439 339 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +numer 1 49 3.044522 3.044522 369 +principl 1 48 3.044522 3.044522 357 +execut 1 45 3.135494 3.135494 404 +join 1 39 3.258097 3.258097 457 +author 1 39 3.258097 3.258097 450 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +workstat 1 37 3.332205 3.332205 479 +singl 1 34 3.401197 3.401197 510 +award 1 34 3.401197 3.401197 523 +global 1 34 3.401197 3.401197 520 +michael 1 35 3.401197 3.401197 514 +posit 1 31 3.496508 3.496508 552 +focu 1 30 3.555348 3.555348 571 +produc 1 30 3.555348 3.555348 572 +rang 1 30 3.555348 3.555348 565 +chair 1 29 3.583519 3.583519 596 +held 1 28 3.610918 3.610918 600 +usual 1 28 3.610918 3.610918 608 +cluster 1 28 3.610918 3.610918 612 +except 1 28 3.610918 3.610918 607 +team 1 27 3.637586 3.637586 625 +proc 1 26 3.688879 3.688879 649 +consult 1 24 3.761200 3.761200 687 +fellow 1 24 3.761200 3.761200 701 +lab 1 24 3.761200 3.761200 698 +handl 1 24 3.761200 3.761200 685 +lead 1 23 3.806662 3.806662 718 +deal 1 22 3.850148 3.850148 736 +william 1 22 3.850148 3.850148 765 +corpor 1 21 3.912023 3.912023 802 +chip 1 21 3.912023 3.912023 770 +binari 1 20 3.951244 3.951244 823 +supervis 1 20 3.951244 3.951244 840 +tenni 1 20 3.951244 3.951244 838 +exploit 1 20 3.951244 3.951244 836 +particularli 1 19 4.007333 4.007333 867 +separ 1 19 4.007333 4.007333 844 +four 1 18 4.060443 4.060443 905 +former 1 17 4.110874 4.110874 956 +protect 1 17 4.110874 4.110874 935 +asplo 1 17 4.110874 4.110874 948 +latenc 1 16 4.174387 4.174387 993 +choic 1 16 4.174387 4.174387 979 +transfer 1 16 4.174387 4.174387 967 +susan 1 15 4.248495 4.248495 1050 +levi 1 14 4.317488 4.317488 1093 +dean 1 14 4.317488 4.317488 1104 +coher 1 14 4.317488 4.317488 1109 +mellon 1 13 4.382027 4.382027 1179 +karlin 1 13 4.382027 4.382027 1176 +conf 1 13 4.382027 4.382027 1181 +hank 1 12 4.465908 4.465908 1253 +carnegi 1 12 4.465908 4.465908 1260 +anna 1 12 4.465908 4.465908 1292 +multithread 1 11 4.553877 4.553877 1315 +thedepart 1 11 4.553877 4.553877 1350 +henri 1 10 4.653960 4.653960 1417 +equip 1 10 4.653960 4.653960 1459 +ski 1 10 4.653960 4.653960 1471 +bike 1 10 4.653960 4.653960 1468 +death 1 10 4.653960 4.653960 1457 +softbal 1 9 4.753590 4.753590 1594 +voelker 1 9 4.753590 4.753590 1557 +vernon 1 9 4.753590 4.753590 1556 +morgan 1 9 4.753590 4.753590 1484 +jeffrei 1 9 4.753590 4.753590 1612 +sigop 1 8 4.875197 4.875197 1727 +hold 1 8 4.875197 4.875197 1645 +span 1 8 4.875197 4.875197 1751 +evan 1 8 4.875197 4.875197 1633 +inproceed 1 8 4.875197 4.875197 1670 +egger 1 8 4.875197 4.875197 1695 +jack 1 8 4.875197 4.875197 1780 +instrument 1 7 5.010635 5.010635 1954 +smile 1 7 5.010635 5.010635 1807 +feelei 1 7 5.010635 5.010635 1859 +maxim 1 7 5.010635 5.010635 1944 +simultan 1 6 5.164786 5.164786 2155 +outstand 1 6 5.164786 5.164786 2136 +onoper 1 6 5.164786 5.164786 2048 +tobe 1 6 5.164786 5.164786 1995 +scholar 1 6 5.164786 5.164786 2180 +nine 1 6 5.164786 5.164786 2047 +tullsen 1 6 5.164786 5.164786 2081 +rebecca 1 6 5.164786 5.164786 2174 +vivek 1 6 5.164786 5.164786 2210 +theth 1 5 5.347108 5.347108 2325 +seventh 1 5 5.347108 5.347108 2464 +fetch 1 5 5.347108 5.347108 2567 +opal 1 4 5.568345 5.568345 3057 +fulbright 1 4 5.568345 5.568345 2963 +escap 1 4 5.568345 5.568345 3016 +pighin 1 4 5.568345 5.568345 2735 +chase 1 4 5.568345 5.568345 2897 +lazowska 1 4 5.568345 5.568345 2694 +narasayya 1 4 5.568345 5.568345 3065 +arch 1 4 5.568345 5.568345 2995 +prog 1 4 5.568345 5.568345 2740 +recipi 1 3 5.857933 5.857933 3627 +eleven 1 3 5.857933 5.857933 3824 +freder 1 3 5.857933 5.857933 3352 +thekkath 1 3 5.857933 5.857933 3973 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +projecti 1 2 6.263398 6.263398 5963 +befound 1 2 6.263398 6.263398 5964 +infam 1 2 6.263398 6.263398 5859 +dessert 1 2 6.263398 6.263398 5194 +subpag 1 2 6.263398 6.263398 5926 +jamrozik 1 2 6.263398 6.263398 5925 +chandramohan 1 2 6.263398 6.263398 5965 +projectcal 1 1 6.957497 6.957497 15840 +theetch 1 1 6.957497 6.957497 15841 +consecutiveacm 1 1 6.957497 6.957497 15842 +symposia 1 1 6.957497 6.957497 15843 +universityand 1 1 6.957497 6.957497 15844 +machineryand 1 1 6.957497 6.957497 15845 +survivedlevi 1 1 6.957497 6.957497 15846 +haveal 1 1 6.957497 6.957497 15847 +glu 1 1 6.957497 6.957497 15848 +potato 1 1 6.957497 6.957497 15849 +parlor 1 1 6.957497 6.957497 15850 +publicationsreduc 1 1 6.957497 6.957497 15851 +implementablesimultan 1 1 6.957497 6.957497 15852 +joen 1 1 6.957497 6.957497 15853 +edwardd 1 1 6.957497 6.957497 15854 +recover 1 1 6.957497 6.957497 15855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..2886a1d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +provid 1 121 2.079442 2.079442 94 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +larg 1 82 2.484907 2.484907 168 +chang 1 82 2.484907 2.484907 163 +member 1 84 2.484907 2.484907 165 +dynam 1 76 2.564949 2.564949 194 +sourc 1 77 2.564949 2.564949 201 +differ 1 66 2.708050 2.708050 253 +complex 1 64 2.772589 2.772589 269 +virtual 1 62 2.772589 2.772589 285 +result 1 65 2.772589 2.772589 281 +organ 1 65 2.772589 2.772589 265 +share 1 59 2.833213 2.833213 304 +explor 1 58 2.890372 2.890372 324 +space 1 57 2.890372 2.890372 310 +faculti 1 56 2.890372 2.890372 325 +much 1 52 2.995732 2.995732 349 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +archiv 1 49 3.044522 3.044522 364 +execut 1 45 3.135494 3.135494 404 +anoth 1 45 3.135494 3.135494 408 +prototyp 1 38 3.295837 3.295837 463 +ofth 1 36 3.367296 3.367296 491 +singl 1 34 3.401197 3.401197 510 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +storag 1 31 3.496508 3.496508 553 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +depend 1 29 3.583519 3.583519 583 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +manipul 1 27 3.637586 3.637586 624 +enhanc 1 26 3.688879 3.688879 644 +jeff 1 25 3.737670 3.737670 673 +interpret 1 24 3.761200 3.761200 686 +mike 1 24 3.761200 3.761200 703 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +defin 1 22 3.850148 3.850148 746 +thu 1 21 3.912023 3.912023 773 +flexibl 1 21 3.912023 3.912023 792 +protect 1 17 4.110874 4.110874 935 +permit 1 16 4.174387 4.174387 962 +easili 1 14 4.317488 4.317488 1077 +levi 1 14 4.317488 4.317488 1093 +directli 1 13 4.382027 4.382027 1141 +translat 1 13 4.382027 4.382027 1164 +believ 1 13 4.382027 4.382027 1187 +uniqu 1 12 4.465908 4.465908 1228 +hank 1 12 4.465908 4.465908 1253 +persist 1 11 4.553877 4.553877 1367 +alpha 1 11 4.553877 4.553877 1348 +trust 1 9 4.753590 4.753590 1583 +parti 1 8 4.875197 4.875197 1676 +mach 1 8 4.875197 4.875197 1669 +dylan 1 8 4.875197 4.875197 1625 +secondari 1 7 5.010635 5.010635 1884 +feelei 1 7 5.010635 5.010635 1859 +huge 1 6 5.164786 5.164786 1991 +bestor 1 6 5.164786 5.164786 2099 +risc 1 6 5.164786 5.164786 2016 +duke 1 6 5.164786 5.164786 2231 +vivek 1 6 5.164786 5.164786 2210 +tiwari 1 5 5.347108 5.347108 2385 +opal 1 4 5.568345 5.568345 3057 +simplifi 1 4 5.568345 5.568345 3066 +mip 1 4 5.568345 5.568345 2738 +transmit 1 4 5.568345 5.568345 2835 +lazowska 1 4 5.568345 5.568345 2694 +chase 1 4 5.568345 5.568345 2897 +narasayya 1 4 5.568345 5.568345 3065 +databaseof 1 2 6.263398 6.263398 4696 +ashutosh 1 2 6.263398 6.263398 5966 +mcname 1 2 6.263398 6.263398 5875 +projectop 1 1 6.957497 6.957497 15856 +tunedto 1 1 6.957497 6.957497 15857 +numberof 1 1 6.957497 6.957497 15858 +andcooper 1 1 6.957497 6.957497 15859 +directlycommun 1 1 6.957497 6.957497 15860 +addressspac 1 1 6.957497 6.957497 15861 +domainthat 1 1 6.957497 6.957497 15862 +oneprocess 1 1 6.957497 6.957497 15863 +protectionstructur 1 1 6.957497 6.957497 15864 +relationshipbetween 1 1 6.957497 6.957497 15865 +canimprov 1 1 6.957497 6.957497 15866 +cooperatingappl 1 1 6.957497 6.957497 15867 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..f495fa12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +well 1 109 2.197225 2.197225 121 +topic 1 114 2.197225 2.197225 110 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +octob 1 89 2.397895 2.397895 156 +school 1 84 2.484907 2.484907 188 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +meet 1 72 2.639057 2.639057 229 +sieg 1 69 2.708050 2.708050 260 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +virtual 1 62 2.772589 2.772589 285 +previou 1 62 2.772589 2.772589 290 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +direct 1 57 2.890372 2.890372 316 +cover 1 55 2.944439 2.944439 329 +archiv 1 49 3.044522 3.044522 364 +california 1 46 3.091042 3.091042 388 +electron 1 47 3.091042 3.091042 379 +editor 1 41 3.218876 3.218876 433 +past 1 42 3.218876 3.218876 428 +tutori 1 39 3.258097 3.258097 437 +submit 1 39 3.258097 3.258097 440 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +industri 1 38 3.295837 3.295837 464 +respons 1 37 3.332205 3.332205 476 +curriculum 1 33 3.433987 3.433987 535 +chair 1 29 3.583519 3.583519 596 +consid 1 29 3.583519 3.583519 590 +propos 1 28 3.610918 3.610918 602 +constraint 1 26 3.688879 3.688879 636 +request 1 26 3.688879 3.688879 635 +aspect 1 25 3.737670 3.737670 663 +seri 1 24 3.761200 3.761200 708 +known 1 24 3.761200 3.761200 702 +lead 1 23 3.806662 3.806662 718 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +born 1 21 3.912023 3.912023 798 +qualiti 1 20 3.951244 3.951244 832 +encourag 1 18 4.060443 4.060443 880 +accept 1 18 4.060443 4.060443 879 +anyon 1 17 4.110874 4.110874 916 +jose 1 16 4.174387 4.174387 976 +alan 1 13 4.382027 4.382027 1146 +speak 1 12 4.465908 4.465908 1283 +readi 1 12 4.465908 4.465908 1242 +itali 1 11 4.553877 4.553877 1378 +submiss 1 11 4.553877 4.553877 1298 +inproceed 1 8 4.875197 4.875197 1670 +european 1 8 4.875197 4.875197 1763 +upcom 1 8 4.875197 4.875197 1685 +depth 1 8 4.875197 4.875197 1636 +edg 1 8 4.875197 4.875197 1647 +star 1 8 4.875197 4.875197 1717 +portland 1 7 5.010635 5.010635 1878 +oopsla 1 6 5.164786 5.164786 2221 +oregon 1 5 5.347108 5.347108 2437 +imper 1 4 5.568345 5.568345 3067 +freeman 1 4 5.568345 5.568345 2725 +ident 1 4 5.568345 5.568345 2826 +andsoftwar 1 4 5.568345 5.568345 2753 +breadth 1 4 5.568345 5.568345 2695 +green 1 4 5.568345 5.568345 2848 +nato 1 3 5.857933 5.857933 3587 +kaleidoscop 1 2 6.263398 6.263398 5780 +bologna 1 2 6.263398 6.263398 5631 +programmingsystem 1 2 6.263398 6.263398 5688 +hotlin 1 2 6.263398 6.263398 5967 +hendrix 1 2 6.263398 6.263398 5490 +grave 1 2 6.263398 6.263398 5968 +war 1 2 6.263398 6.263398 5969 +collector 1 2 6.263398 6.263398 5683 +lopezgu 1 1 6.957497 6.957497 15871 +lopezlopez 1 1 6.957497 6.957497 15872 +dissertationresearch 1 1 6.957497 6.957497 15873 +publicationsgu 1 1 6.957497 6.957497 15874 +lopez 1 1 6.957497 6.957497 15868 +bjorn 1 1 6.957497 6.957497 15869 +benson 1 1 6.957497 6.957497 15870 +mayoh 1 1 6.957497 6.957497 15875 +tougu 1 1 6.957497 6.957497 15876 +jann 1 1 6.957497 6.957497 15877 +penjam 1 1 6.957497 6.957497 15878 +constraintprogram 1 1 6.957497 6.957497 15879 +instituteseri 1 1 6.957497 6.957497 15880 +publisheda 1 1 6.957497 6.957497 15881 +tutorialsi 1 1 6.957497 6.957497 15882 +conferencein 1 1 6.957497 6.957497 15883 +itsextens 1 1 6.957497 6.957497 15884 +tutorialshav 1 1 6.957497 6.957497 15885 +introductorysurvei 1 1 6.957497 6.957497 15886 +academicresearch 1 1 6.957497 6.957497 15887 +attende 1 1 6.957497 6.957497 15888 +weespeci 1 1 6.957497 6.957497 15889 +requestguidelin 1 1 6.957497 6.957497 15890 +theoopsla 1 1 6.957497 6.957497 15891 +enthusiast 1 1 6.957497 6.957497 15892 +proposalswithout 1 1 6.957497 6.957497 15893 +notif 1 1 6.957497 6.957497 15894 +withcamera 1 1 6.957497 6.957497 15895 +jimi 1 1 6.957497 6.957497 15896 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..9f480a0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +look 1 107 2.197225 2.197225 115 +graphic 1 90 2.397895 2.397895 147 +academ 1 82 2.484907 2.484907 178 +want 1 79 2.564949 2.564949 199 +best 1 59 2.833213 2.833213 299 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +keep 1 44 3.135494 3.135494 409 +citi 1 19 4.007333 4.007333 874 +chateau 1 16 4.174387 4.174387 997 +fourth 1 16 4.174387 4.174387 999 +countri 1 15 4.248495 4.248495 1059 +suit 1 13 4.382027 4.382027 1129 +touch 1 12 4.465908 4.465908 1288 +curiou 1 5 5.347108 5.347108 2541 +areasinclud 1 2 6.263398 6.263398 5747 +omid 1 1 6.957497 6.957497 15897 +madani 1 1 6.957497 6.957497 15898 +bhello 1 1 6.957497 6.957497 15899 +enjoytheori 1 1 6.957497 6.957497 15900 +islamicarchitectur 1 1 6.957497 6.957497 15901 +isfahan 1 1 6.957497 6.957497 15902 +nomine 1 1 6.957497 6.957497 15903 +iran 1 1 6.957497 6.957497 15904 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..4e90f8cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +washington 1 236 1.386294 1.386294 32 +area 1 144 1.945910 1.945910 80 +written 1 63 2.772589 2.772589 278 +music 1 42 3.218876 3.218876 436 +mike 1 24 3.761200 3.761200 703 +goe 1 15 4.248495 4.248495 1044 +creativ 1 8 4.875197 4.875197 1777 +academia 1 6 5.164786 5.164786 2036 +perkowitz 1 2 6.263398 6.263398 5970 +perkowitznewsflash 1 1 6.957497 6.957497 15905 +blond 1 1 6.957497 6.957497 15906 +randomfavorit 1 1 6.957497 6.957497 15907 +sheba 1 1 6.957497 6.957497 15908 +voyeur 1 1 6.957497 6.957497 15909 +grooveneedl 1 1 6.957497 6.957497 15910 +espressoresumemik 1 1 6.957497 6.957497 15911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..3a0e9861 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +follow 1 92 2.397895 2.397895 143 +resum 1 79 2.564949 2.564949 217 +sieg 1 69 2.708050 2.708050 260 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +back 1 60 2.833213 2.833213 297 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +http 1 41 3.218876 3.218876 420 +short 1 36 3.367296 3.367296 499 +spent 1 25 3.737670 3.737670 676 +finish 1 22 3.850148 3.850148 748 +theunivers 1 21 3.912023 3.912023 797 +germani 1 17 4.110874 4.110874 946 +marc 1 8 4.875197 4.875197 1680 +german 1 6 5.164786 5.164786 2190 +langheinrich 1 1 6.957497 6.957497 15912 +homepagemarc 1 1 6.957497 6.957497 15915 +langheinrichuniversitt 1 1 6.957497 6.957497 15916 +bielefeld 1 1 6.957497 6.957497 15913 +washingtontechnisch 1 1 6.957497 6.957497 15917 +fakultt 1 1 6.957497 6.957497 15918 +scienceemail 1 1 6.957497 6.957497 15919 +imlangh 1 1 6.957497 6.957497 15920 +techfak 1 1 6.957497 6.957497 15921 +marclang 1 1 6.957497 6.957497 15914 +eduabout 1 1 6.957497 6.957497 15922 +myselfi 1 1 6.957497 6.957497 15923 +thefulbright 1 1 6.957497 6.957497 15924 +depthinform 1 1 6.957497 6.957497 15925 +biopost 1 1 6.957497 6.957497 15926 +addressa 1 1 6.957497 6.957497 15927 +mastersat 1 1 6.957497 6.957497 15928 +homeschoolgermanyringstra 1 1 6.957497 6.957497 15929 +maintalphon 1 1 6.957497 6.957497 15930 +paulusplatz 1 1 6.957497 6.957497 15931 +bielefeldphon 1 1 6.957497 6.957497 15932 +woodlawn 1 1 6.957497 6.957497 15933 +formatmarc 1 1 6.957497 6.957497 15934 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..885eace6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +applic 1 170 1.791759 1.791759 56 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +check 1 115 2.197225 2.197225 118 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +academ 1 82 2.484907 2.484907 178 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +environ 1 84 2.484907 2.484907 177 +interfac 1 79 2.564949 2.564949 209 +resum 1 79 2.564949 2.564949 217 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +sieg 1 69 2.708050 2.708050 260 +interact 1 62 2.772589 2.772589 270 +septemb 1 65 2.772589 2.772589 274 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +explor 1 58 2.890372 2.890372 324 +space 1 57 2.890372 2.890372 310 +special 1 56 2.890372 2.890372 320 +investig 1 51 2.995732 2.995732 353 +visual 1 48 3.044522 3.044522 372 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +tutori 1 39 3.258097 3.258097 437 +human 1 32 3.465736 3.465736 546 +collabor 1 32 3.465736 3.465736 543 +given 1 32 3.465736 3.465736 538 +transform 1 32 3.465736 3.465736 542 +scale 1 28 3.610918 3.610918 613 +manipul 1 27 3.637586 3.637586 624 +wai 1 25 3.737670 3.737670 662 +displai 1 23 3.806662 3.806662 712 +cooper 1 22 3.850148 3.850148 757 +geometr 1 19 4.007333 4.007333 852 +hierarch 1 15 4.248495 4.248495 1018 +stephen 1 11 4.553877 4.553877 1342 +fill 1 11 4.553877 4.553877 1349 +itali 1 11 4.553877 4.553877 1378 +packard 1 10 4.653960 4.653960 1444 +classif 1 9 4.753590 4.753590 1586 +partner 1 8 4.875197 4.875197 1648 +yang 1 8 4.875197 4.875197 1652 +baker 1 7 5.010635 5.010635 1812 +chief 1 7 5.010635 5.010635 1829 +bell 1 6 5.164786 5.164786 2224 +carlson 1 5 5.347108 5.347108 2351 +patent 1 5 5.347108 5.347108 2574 +bricker 1 4 5.568345 5.568345 3050 +assess 1 4 5.568345 5.568345 2724 +lauren 1 3 5.857933 5.857933 3251 +metip 1 3 5.857933 5.857933 3937 +marla 1 2 6.263398 6.263398 4510 +crime 1 2 6.263398 6.263398 5972 +cscl 1 2 6.263398 6.263398 5837 +stevetanimoto 1 2 6.263398 6.263398 5835 +eick 1 2 6.263398 6.263398 5971 +burnett 1 2 6.263398 6.263398 4578 +bentlei 1 1 6.957497 6.957497 15935 +interestsgraph 1 1 6.957497 6.957497 15936 +coimag 1 1 6.957497 6.957497 15937 +devleop 1 1 6.957497 6.957497 15938 +contol 1 1 6.957497 6.957497 15939 +cansimultan 1 1 6.957497 6.957497 15940 +publicationsbak 1 1 6.957497 6.957497 15941 +bohu 1 1 6.957497 6.957497 15942 +margaret 1 1 6.957497 6.957497 15943 +sorento 1 1 6.957497 6.957497 15944 +apparatu 1 1 6.957497 6.957497 15945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..57e0231c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +phone 1 175 1.791759 1.791759 45 +implement 1 152 1.791759 1.791759 52 +architectur 1 139 1.945910 1.945910 77 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +stuff 1 87 2.484907 2.484907 171 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +good 1 77 2.564949 2.564949 200 +workshop 1 71 2.639057 2.639057 239 +java 1 70 2.708050 2.708050 248 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +processor 1 54 2.944439 2.944439 335 +extens 1 53 2.944439 2.944439 340 +februari 1 54 2.944439 2.944439 328 +local 1 55 2.944439 2.944439 334 +effect 1 46 3.091042 3.091042 385 +fast 1 42 3.218876 3.218876 429 +past 1 42 3.218876 3.218876 428 +produc 1 30 3.555348 3.555348 572 +constraint 1 26 3.688879 3.688879 636 +bookmark 1 26 3.688879 3.688879 639 +interpret 1 24 3.761200 3.761200 686 +runtim 1 19 4.007333 4.007333 858 +figur 1 18 4.060443 4.060443 903 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +modern 1 16 4.174387 4.174387 966 +side 1 15 4.248495 4.248495 1022 +goe 1 15 4.248495 4.248495 1044 +susan 1 15 4.248495 4.248495 1050 +black 1 10 4.653960 4.653960 1418 +wire 1 8 4.875197 4.875197 1747 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +craig 1 7 5.010635 5.010635 1879 +dispatch 1 7 5.010635 5.010635 1791 +mock 1 6 5.164786 5.164786 2087 +blue 1 6 5.164786 5.164786 2227 +philipos 1 5 5.347108 5.347108 2373 +asystem 1 4 5.568345 5.568345 2612 +andp 1 4 5.568345 5.568345 2811 +pardyak 1 4 5.568345 5.568345 3043 +ausland 1 3 5.857933 5.857933 3917 +matthai 1 2 6.263398 6.263398 4514 +withprofessor 1 2 6.263398 6.263398 5180 +eggersand 1 2 6.263398 6.263398 4522 +ribbon 1 2 6.263398 6.263398 5973 +compileri 1 1 6.957497 6.957497 15946 +beast 1 1 6.957497 6.957497 15947 +shortterm 1 1 6.957497 6.957497 15948 +basedsystem 1 1 6.957497 6.957497 15949 +canbenefit 1 1 6.957497 6.957497 15950 +onprogram 1 1 6.957497 6.957497 15951 +automaticdynam 1 1 6.957497 6.957497 15952 +frequentlymiscellan 1 1 6.957497 6.957497 15953 +importancefrom 1 1 6.957497 6.957497 15954 +abuwhi 1 1 6.957497 6.957497 15955 +campaign 1 1 6.957497 6.957497 15956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..61e1142b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +person 1 111 2.197225 2.197225 117 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +octob 1 89 2.397895 2.397895 156 +contain 1 81 2.484907 2.484907 174 +messag 1 76 2.564949 2.564949 212 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +workshop 1 71 2.639057 2.639057 239 +laboratori 1 63 2.772589 2.772589 292 +guid 1 63 2.772589 2.772589 267 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +index 1 56 2.890372 2.890372 309 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +advisor 1 51 2.995732 2.995732 355 +maintain 1 51 2.995732 2.995732 342 +adapt 1 46 3.091042 3.091042 387 +answer 1 45 3.135494 3.135494 391 +futur 1 41 3.218876 3.218876 427 +past 1 42 3.218876 3.218876 428 +live 1 40 3.258097 3.258097 451 +electr 1 38 3.295837 3.295837 461 +industri 1 38 3.295837 3.295837 464 +game 1 36 3.367296 3.367296 498 +dissert 1 32 3.465736 3.465736 549 +graph 1 30 3.555348 3.555348 576 +produc 1 30 3.555348 3.555348 572 +pass 1 28 3.610918 3.610918 611 +bookmark 1 26 3.688879 3.688879 639 +head 1 23 3.806662 3.806662 732 +rout 1 21 3.912023 3.912023 793 +chip 1 21 3.912023 3.912023 770 +hous 1 21 3.912023 3.912023 801 +fine 1 20 3.951244 3.951244 822 +render 1 17 4.110874 4.110874 947 +medic 1 17 4.110874 4.110874 958 +cambridg 1 16 4.174387 4.174387 1008 +carl 1 15 4.248495 4.248495 1024 +countri 1 15 4.248495 4.248495 1059 +princeton 1 15 4.248495 4.248495 1042 +floor 1 14 4.317488 4.317488 1070 +massachusett 1 14 4.317488 4.317488 1118 +menu 1 13 4.382027 4.382027 1156 +larri 1 13 4.382027 4.382027 1142 +canada 1 13 4.382027 4.382027 1158 +speak 1 12 4.465908 4.465908 1283 +volum 1 11 4.553877 4.553877 1347 +mesh 1 11 4.553877 4.553877 1351 +packet 1 10 4.653960 4.653960 1415 +label 1 10 4.653960 4.653960 1423 +purdu 1 10 4.653960 4.653960 1466 +coast 1 8 4.875197 4.875197 1746 +mile 1 8 4.875197 4.875197 1743 +angel 1 8 4.875197 4.875197 1779 +creativ 1 8 4.875197 4.875197 1777 +virginia 1 8 4.875197 4.875197 1659 +marri 1 7 5.010635 5.010635 1946 +adob 1 7 5.010635 5.010635 1873 +shot 1 7 5.010635 5.010635 1898 +layout 1 6 5.164786 5.164786 2183 +east 1 5 5.347108 5.347108 2472 +snyder 1 5 5.347108 5.347108 2359 +chaotic 1 5 5.347108 5.347108 2566 +remain 1 5 5.347108 5.347108 2278 +amus 1 5 5.347108 5.347108 2366 +neil 1 4 5.568345 5.568345 2841 +ebel 1 4 5.568345 5.568345 2756 +tester 1 4 5.568345 5.568345 2754 +mcmurchi 1 4 5.568345 5.568345 2757 +gregori 1 4 5.568345 5.568345 2928 +dine 1 3 5.857933 5.857933 3472 +mitsubishi 1 3 5.857933 5.857933 3842 +merl 1 3 5.857933 5.857933 3843 +mactest 1 3 5.857933 5.857933 3972 +fashion 1 3 5.857933 5.857933 3699 +neighborhood 1 3 5.857933 5.857933 3242 +jar 1 3 5.857933 5.857933 3223 +mckenzi 1 2 6.263398 6.263398 5974 +andwork 1 2 6.263398 6.263398 5403 +projectsi 1 2 6.263398 6.263398 5931 +isomorph 1 2 6.263398 6.263398 5976 +knowna 1 2 6.263398 6.263398 5480 +gemini 1 2 6.263398 6.263398 5975 +shirt 1 2 6.263398 6.263398 5977 +farm 1 2 6.263398 6.263398 4115 +broadwai 1 1 6.957497 6.957497 15957 +projectsgonna 1 1 6.957497 6.957497 15958 +teenag 1 1 6.957497 6.957497 15959 +lobotomi 1 1 6.957497 6.957497 15960 +ramonesi 1 1 6.957497 6.957497 15961 +projectconcern 1 1 6.957497 6.957497 15962 +copiou 1 1 6.957497 6.957497 15963 +expatri 1 1 6.957497 6.957497 15964 +onchaot 1 1 6.957497 6.957497 15965 +routingwith 1 1 6.957497 6.957497 15966 +torusnetwork 1 1 6.957497 6.957497 15967 +thecranium 1 1 6.957497 6.957497 15968 +compatiblewith 1 1 6.957497 6.957497 15969 +netlist 1 1 6.957497 6.957497 15970 +calledgemini 1 1 6.957497 6.957497 15971 +schemat 1 1 6.957497 6.957497 15972 +cranium 1 1 6.957497 6.957497 15973 +packetrout 1 1 6.957497 6.957497 15974 +andcommun 1 1 6.957497 6.957497 15975 +tomactest 1 1 6.957497 6.957497 15976 +arlington 1 1 6.957497 6.957497 15977 +livein 1 1 6.957497 6.957497 15978 +ofballard 1 1 6.957497 6.957497 15979 +artworkcr 1 1 6.957497 6.957497 15980 +photoshop 1 1 6.957497 6.957497 15981 +ownedthi 1 1 6.957497 6.957497 15982 +onlyth 1 1 6.957497 6.957497 15983 +correctlyguess 1 1 6.957497 6.957497 15984 +toriddl 1 1 6.957497 6.957497 15985 +jour 1 1 6.957497 6.957497 15986 +honei 1 1 6.957497 6.957497 15987 +myuncl 1 1 6.957497 6.957497 15988 +edmonton 1 1 6.957497 6.957497 15989 +alberta 1 1 6.957497 6.957497 15990 +linkschairman 1 1 6.957497 6.957497 15991 +linksnorm 1 1 6.957497 6.957497 15992 +halcyon 1 1 6.957497 6.957497 15993 +eugen 1 1 6.957497 6.957497 15994 +spafford 1 1 6.957497 6.957497 15995 +randi 1 1 6.957497 6.957497 15996 +pausch 1 1 6.957497 6.957497 15997 +wallach 1 1 6.957497 6.957497 15998 +scool 1 1 6.957497 6.957497 15999 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..aeb75110 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +analysi 1 124 2.079442 2.079442 98 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +proceed 1 93 2.397895 2.397895 152 +ieee 1 86 2.484907 2.484907 190 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +servic 1 72 2.639057 2.639057 236 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +receiv 1 66 2.708050 2.708050 244 +creat 1 63 2.772589 2.772589 277 +improv 1 62 2.772589 2.772589 289 +sever 1 56 2.890372 2.890372 322 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +processor 1 54 2.944439 2.944439 335 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +run 1 51 2.995732 2.995732 347 +hardwar 1 51 2.995732 2.995732 350 +telephon 1 50 3.044522 3.044522 373 +principl 1 48 3.044522 3.044522 357 +adapt 1 46 3.091042 3.091042 387 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +transact 1 39 3.258097 3.258097 438 +winter 1 36 3.367296 3.367296 500 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +posit 1 31 3.496508 3.496508 552 +rang 1 30 3.555348 3.555348 565 +graph 1 30 3.555348 3.555348 576 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +load 1 28 3.610918 3.610918 601 +compar 1 26 3.688879 3.688879 648 +request 1 26 3.688879 3.688879 635 +enabl 1 26 3.688879 3.688879 655 +spent 1 25 3.737670 3.737670 676 +primari 1 25 3.737670 3.737670 669 +lab 1 24 3.761200 3.761200 698 +demonstr 1 24 3.761200 3.761200 694 +flow 1 24 3.761200 3.761200 700 +deal 1 22 3.850148 3.850148 736 +corpor 1 21 3.912023 3.912023 802 +similar 1 21 3.912023 3.912023 771 +kernel 1 20 3.951244 3.951244 825 +safeti 1 20 3.951244 3.951244 817 +spend 1 19 4.007333 4.007333 850 +region 1 19 4.007333 4.007333 875 +germani 1 17 4.110874 4.110874 946 +anyon 1 17 4.110874 4.110874 916 +protect 1 17 4.110874 4.110874 935 +commerci 1 16 4.174387 4.174387 1005 +princeton 1 15 4.248495 4.248495 1042 +contribut 1 15 4.248495 4.248495 1021 +near 1 14 4.317488 4.317488 1091 +spin 1 14 4.317488 4.317488 1121 +achiev 1 14 4.317488 4.317488 1088 +happi 1 14 4.317488 4.317488 1079 +pretti 1 13 4.382027 4.382027 1191 +safe 1 12 4.465908 4.465908 1274 +usenix 1 12 4.465908 4.465908 1240 +abil 1 11 4.553877 4.553877 1341 +custom 1 10 4.653960 4.653960 1414 +elimin 1 9 4.753590 4.753590 1558 +marc 1 8 4.875197 4.875197 1680 +hack 1 7 5.010635 5.010635 1950 +fifth 1 7 5.010635 5.010635 1931 +bell 1 6 5.164786 5.164786 2224 +isth 1 5 5.347108 5.347108 2532 +sole 1 4 5.568345 5.568345 2592 +sell 1 4 5.568345 5.568345 2935 +fiuczynski 1 3 5.857933 5.857933 3390 +scratch 1 3 5.857933 5.857933 3140 +forappl 1 3 5.857933 5.857933 3929 +anin 1 3 5.857933 5.857933 3354 +fifteenth 1 3 5.857933 5.857933 3868 +linker 1 3 5.857933 5.857933 3157 +namespac 1 3 5.857933 5.857933 3957 +backgroundi 1 2 6.263398 6.263398 5878 +highschool 1 2 6.263398 6.263398 5672 +ofproject 1 2 6.263398 6.263398 4446 +shortcom 1 2 6.263398 6.263398 5978 +inord 1 2 6.263398 6.263398 4824 +linkabl 1 2 6.263398 6.263398 5979 +andcollect 1 2 6.263398 6.263398 4249 +grewup 1 1 6.957497 6.957497 16001 +sseldorf 1 1 6.957497 6.957497 16002 +fromrutg 1 1 6.957497 6.957497 16003 +mitr 1 1 6.957497 6.957497 16004 +proprietor 1 1 6.957497 6.957497 16005 +companythat 1 1 6.957497 6.957497 16006 +setof 1 1 6.957497 6.957497 16007 +chasi 1 1 6.957497 6.957497 16008 +univoic 1 1 6.957497 6.957497 16009 +cardsand 1 1 6.957497 6.957497 16010 +vxwork 1 1 6.957497 6.957497 16011 +compellingperform 1 1 6.957497 6.957497 16012 +tosimilar 1 1 6.957497 6.957497 16013 +contacthttp 1 1 6.957497 6.957497 16000 +anextens 1 1 6.957497 6.957497 16014 +betterperform 1 1 6.957497 6.957497 16015 +conventionaloper 1 1 6.957497 6.957497 16016 +technicalconfer 1 1 6.957497 6.957497 16017 +describeshow 1 1 6.957497 6.957497 16018 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..e2c4c4d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +machin 1 129 2.079442 2.079442 95 +real 1 93 2.397895 2.397895 144 +help 1 83 2.484907 2.484907 175 +requir 1 81 2.484907 2.484907 167 +method 1 80 2.564949 2.564949 213 +practic 1 70 2.708050 2.708050 246 +compar 1 26 3.688879 3.688879 648 +bound 1 26 3.688879 3.688879 659 +sort 1 22 3.850148 3.850148 738 +rout 1 21 3.912023 3.912023 793 +predict 1 19 4.007333 4.007333 855 +minim 1 18 4.060443 4.060443 887 +lower 1 18 4.060443 4.060443 886 +topolog 1 14 4.317488 4.317488 1089 +mesh 1 11 4.553877 4.553877 1351 +router 1 8 4.875197 4.875197 1772 +versu 1 6 5.164786 5.164786 2052 +upper 1 5 5.347108 5.347108 2481 +melani 1 2 6.263398 6.263398 5784 +fulgham 1 1 6.957497 6.957497 16020 +deflect 1 1 6.957497 6.957497 16019 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..7cd0b040 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +take 1 97 2.302585 2.302585 134 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +stuff 1 87 2.484907 2.484907 171 +exampl 1 77 2.564949 2.564949 195 +nation 1 74 2.639057 2.639057 240 +main 1 67 2.708050 2.708050 256 +guid 1 63 2.772589 2.772589 267 +plan 1 65 2.772589 2.772589 272 +visit 1 63 2.772589 2.772589 288 +done 1 47 3.091042 3.091042 381 +kind 1 32 3.465736 3.465736 541 +transform 1 32 3.465736 3.465736 542 +retriev 1 27 3.637586 3.637586 621 +try 1 22 3.850148 3.850148 764 +similar 1 21 3.912023 3.912023 771 +fact 1 21 3.912023 3.912023 780 +engineeringunivers 1 17 4.110874 4.110874 959 +commerci 1 16 4.174387 4.174387 1005 +english 1 15 4.248495 4.248495 1033 +trip 1 14 4.317488 4.317488 1113 +meng 1 12 4.465908 4.465908 1214 +newspap 1 12 4.465908 4.465908 1280 +scienceat 1 11 4.553877 4.553877 1375 +island 1 11 4.553877 4.553877 1345 +undergrad 1 9 4.753590 4.753590 1589 +charg 1 9 4.753590 4.753590 1582 +pennsylvania 1 7 5.010635 5.010635 1932 +interestsi 1 7 5.010635 5.010635 1969 +huge 1 6 5.164786 5.164786 1991 +snapshot 1 5 5.347108 5.347108 2303 +singapor 1 5 5.347108 5.347108 2487 +washingtonseattl 1 4 5.568345 5.568345 3044 +heng 1 2 6.263398 6.263398 5202 +strait 1 2 6.263398 6.263398 5980 +homepagemenghe 1 1 6.957497 6.957497 16021 +edubox 1 1 6.957497 6.957497 16022 +findimag 1 1 6.957497 6.957497 16023 +virag 1 1 6.957497 6.957497 16024 +andqbicar 1 1 6.957497 6.957497 16025 +singaporesingapor 1 1 6.957497 6.957497 16026 +infomap 1 1 6.957497 6.957497 16027 +andstatist 1 1 6.957497 6.957497 16028 +singaporeonlin 1 1 6.957497 6.957497 16029 +boardi 1 1 6.957497 6.957497 16030 +anintellig 1 1 6.957497 6.957497 16031 +menghe 1 1 6.957497 6.957497 16032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..94e62987 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +technic 1 100 2.302585 2.302585 140 +real 1 93 2.397895 2.397895 144 +optim 1 79 2.564949 2.564949 197 +workshop 1 71 2.639057 2.639057 239 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +life 1 50 3.044522 3.044522 375 +microsoft 1 38 3.295837 3.295837 468 +game 1 36 3.367296 3.367296 498 +michael 1 35 3.401197 3.401197 514 +represent 1 35 3.401197 3.401197 512 +chair 1 29 3.583519 3.583519 596 +static 1 27 3.637586 3.637586 619 +properti 1 22 3.850148 3.850148 749 +particularli 1 19 4.007333 4.007333 867 +previous 1 17 4.110874 4.110874 923 +debug 1 17 4.110874 4.110874 944 +philosophi 1 13 4.382027 4.382027 1167 +carri 1 13 4.382027 4.382027 1152 +awai 1 10 4.653960 4.653960 1447 +intermedi 1 9 4.753590 4.753590 1497 +cryptographi 1 9 4.753590 4.753590 1512 +serial 1 7 5.010635 5.010635 1975 +intellectu 1 7 5.010635 5.010635 1847 +occasion 1 7 5.010635 5.010635 1905 +sciencedepart 1 6 5.164786 5.164786 2172 +slice 1 4 5.568345 5.568345 2622 +popl 1 4 5.568345 5.568345 3068 +denot 1 3 5.857933 5.857933 3147 +ernst 1 2 6.263398 6.263398 4525 +eec 1 2 6.263398 6.263398 5981 +pagemichael 1 1 6.957497 6.957497 16033 +ernsti 1 1 6.957497 6.957497 16034 +riceunivers 1 1 6.957497 6.957497 16035 +programanalysi 1 1 6.957497 6.957497 16036 +coloc 1 1 6.957497 6.957497 16037 +semanticsi 1 1 6.957497 6.957497 16038 +resourcesfor 1 1 6.957497 6.957497 16039 +slip 1 1 6.957497 6.957497 16040 +possibleinterest 1 1 6.957497 6.957497 16041 +mernst 1 1 6.957497 6.957497 16042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..ccecdc0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +search 1 95 2.397895 2.397895 155 +sinc 1 90 2.397895 2.397895 159 +stuff 1 87 2.484907 2.484907 171 +journal 1 83 2.484907 2.484907 183 +state 1 76 2.564949 2.564949 207 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +main 1 67 2.708050 2.708050 256 +view 1 70 2.708050 2.708050 254 +still 1 50 3.044522 3.044522 362 +cool 1 49 3.044522 3.044522 374 +even 1 45 3.135494 3.135494 393 +movi 1 40 3.258097 3.258097 459 +live 1 40 3.258097 3.258097 451 +electr 1 38 3.295837 3.295837 461 +expect 1 37 3.332205 3.332205 484 +field 1 37 3.332205 3.332205 482 +obtain 1 33 3.433987 3.433987 534 +travel 1 30 3.555348 3.555348 579 +quot 1 29 3.583519 3.583519 582 +consid 1 29 3.583519 3.583519 590 +american 1 27 3.637586 3.637586 634 +arrai 1 27 3.637586 3.637586 627 +spent 1 25 3.737670 3.737670 676 +grad 1 20 3.951244 3.951244 837 +left 1 19 4.007333 4.007333 851 +els 1 19 4.007333 4.007333 843 +event 1 18 4.060443 4.060443 896 +squar 1 14 4.317488 4.317488 1082 +spin 1 14 4.317488 4.317488 1121 +danc 1 12 4.465908 4.465908 1278 +grow 1 12 4.465908 4.465908 1209 +metacrawl 1 10 4.653960 4.653960 1455 +grew 1 8 4.875197 4.875197 1742 +solomon 1 8 4.875197 4.875197 1716 +cultur 1 7 5.010635 5.010635 1951 +bit 1 7 5.010635 5.010635 1833 +wouldn 1 7 5.010635 5.010635 1970 +mock 1 6 5.164786 5.164786 2087 +whichi 1 6 5.164786 5.164786 2056 +oopsla 1 6 5.164786 5.164786 2221 +altavista 1 6 5.164786 5.164786 2222 +neither 1 6 5.164786 5.164786 1990 +matthew 1 6 5.164786 5.164786 2193 +upper 1 5 5.347108 5.347108 2481 +chess 1 5 5.347108 5.347108 2486 +volunt 1 5 5.347108 5.347108 2307 +lili 1 5 5.347108 5.347108 2240 +fulbright 1 4 5.568345 5.568345 2963 +spanish 1 4 5.568345 5.568345 3017 +marku 1 3 5.857933 5.857933 3872 +district 1 3 5.857933 5.857933 3756 +karlsruh 1 3 5.857933 5.857933 3689 +latin 1 3 5.857933 5.857933 3741 +deutsch 1 3 5.857933 5.857933 3802 +diplom 1 2 6.263398 6.263398 5982 +umass 1 2 6.263398 6.263398 5899 +grante 1 2 6.263398 6.263398 4914 +labyrinth 1 2 6.263398 6.263398 5983 +mainstream 1 2 6.263398 6.263398 5437 +salsa 1 2 6.263398 6.263398 5984 +colloquia 1 2 6.263398 6.263398 4710 +mossi 1 2 6.263398 6.263398 5801 +rttemberg 1 1 6.957497 6.957497 16043 +anotherpart 1 1 6.957497 6.957497 16044 +biberach 1 1 6.957497 6.957497 16045 +swabia 1 1 6.957497 6.957497 16046 +oberschwaben 1 1 6.957497 6.957497 16047 +solitud 1 1 6.957497 6.957497 16048 +dieangst 1 1 6.957497 6.957497 16049 +torwart 1 1 6.957497 6.957497 16050 +beim 1 1 6.957497 6.957497 16051 +elfmet 1 1 6.957497 6.957497 16052 +handk 1 1 6.957497 6.957497 16053 +merengu 1 1 6.957497 6.957497 16054 +publicationssepar 1 1 6.957497 6.957497 16055 +olympiad 1 1 6.957497 6.957497 16056 +yerewan 1 1 6.957497 6.957497 16057 +csek 1 1 6.957497 6.957497 16058 +csebi 1 1 6.957497 6.957497 16059 +cse 1 1 6.957497 6.957497 16060 +studentsimag 1 1 6.957497 6.957497 16061 +engineeringy 1 1 6.957497 6.957497 16062 +realaudio 1 1 6.957497 6.957497 16063 +linksand 1 1 6.957497 6.957497 16064 +toil 1 1 6.957497 6.957497 16065 +unto 1 1 6.957497 6.957497 16066 +glorywa 1 1 6.957497 6.957497 16067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..02af6e6d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +vivek 1 6 5.164786 5.164786 2210 +narasayya 1 4 5.568345 5.568345 3065 +informationresearch 1 3 5.857933 5.857933 3675 +nara 1 1 6.957497 6.957497 16068 +interestspap 1 1 6.957497 6.957497 16069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..707ce7be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +internet 1 83 2.484907 2.484907 186 +intellig 1 72 2.639057 2.639057 225 +name 1 72 2.639057 2.639057 220 +sieg 1 69 2.708050 2.708050 260 +order 1 69 2.708050 2.708050 249 +artifici 1 63 2.772589 2.772589 280 +collect 1 65 2.772589 2.772589 268 +digit 1 52 2.995732 2.995732 348 +finger 1 52 2.995732 2.995732 354 +much 1 52 2.995732 2.995732 349 +principl 1 48 3.044522 3.044522 357 +friend 1 48 3.044522 3.044522 376 +quarter 1 47 3.091042 3.091042 389 +favorit 1 44 3.135494 3.135494 410 +movi 1 40 3.258097 3.258097 459 +tech 1 35 3.401197 3.401197 515 +india 1 32 3.465736 3.465736 550 +autumn 1 31 3.496508 3.496508 558 +travel 1 30 3.555348 3.555348 579 +finish 1 22 3.850148 3.850148 748 +stop 1 17 4.110874 4.110874 942 +adam 1 17 4.110874 4.110874 934 +cook 1 10 4.653960 4.653960 1464 +sound 1 9 4.753590 4.753590 1605 +heavi 1 7 5.010635 5.010635 1841 +alphabet 1 6 5.164786 5.164786 1980 +dougla 1 5 5.347108 5.347108 2471 +delhi 1 5 5.347108 5.347108 2530 +radio 1 4 5.568345 5.568345 3025 +skate 1 4 5.568345 5.568345 3046 +terri 1 3 5.857933 5.857933 3264 +impli 1 3 5.857933 5.857933 3348 +astronomi 1 3 5.857933 5.857933 3974 +coin 1 3 5.857933 5.857933 3799 +pelham 1 2 6.263398 6.263398 4988 +grenvil 1 2 6.263398 6.263398 4989 +himanshu 1 1 6.957497 6.957497 16070 +nautiy 1 1 6.957497 6.957497 16071 +pagehimanshu 1 1 6.957497 6.957497 16072 +nautiyalthi 1 1 6.957497 6.957497 16073 +nautiyaldept 1 1 6.957497 6.957497 16074 +edugod 1 1 6.957497 6.957497 16075 +gift 1 1 6.957497 6.957497 16076 +personkind 1 1 6.957497 6.957497 16077 +pratchett 1 1 6.957497 6.957497 16078 +wodehouseth 1 1 6.957497 6.957497 16079 +aviat 1 1 6.957497 6.957497 16080 +numismat 1 1 6.957497 6.957497 16081 +profound 1 1 6.957497 6.957497 16082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..534ce3c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +paper 1 205 1.609438 1.609438 38 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +seattl 1 120 2.079442 2.079442 103 +version 1 113 2.197225 2.197225 122 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +comment 1 93 2.397895 2.397895 146 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +know 1 80 2.564949 2.564949 198 +intellig 1 72 2.639057 2.639057 225 +servic 1 72 2.639057 2.639057 236 +line 1 75 2.639057 2.639057 231 +involv 1 71 2.639057 2.639057 227 +artifici 1 63 2.772589 2.772589 280 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +week 1 52 2.995732 2.995732 343 +favorit 1 44 3.135494 3.135494 410 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +random 1 34 3.401197 3.401197 511 +return 1 34 3.401197 3.401197 502 +great 1 27 3.637586 3.637586 626 +enabl 1 26 3.688879 3.688879 655 +bookmark 1 26 3.688879 3.688879 639 +alwai 1 24 3.761200 3.761200 691 +color 1 22 3.850148 3.850148 762 +tell 1 21 3.912023 3.912023 777 +anonym 1 14 4.317488 4.317488 1100 +easili 1 14 4.317488 4.317488 1077 +preliminari 1 9 4.753590 4.753590 1480 +wilson 1 9 4.753590 4.753590 1536 +awar 1 7 5.010635 5.010635 1800 +guidelin 1 7 5.010635 5.010635 1832 +divers 1 6 5.164786 5.164786 2232 +handi 1 6 5.164786 5.164786 2111 +lost 1 5 5.347108 5.347108 2358 +alsointerest 1 3 5.857933 5.857933 3813 +shortli 1 3 5.857933 5.857933 3375 +surgeri 1 3 5.857933 5.857933 3975 +javascript 1 3 5.857933 5.857933 3221 +republican 1 3 5.857933 5.857933 3815 +miscellani 1 3 5.857933 5.857933 3976 +nichola 1 3 5.857933 5.857933 3252 +uwcs 1 3 5.857933 5.857933 3977 +labyrinth 1 2 6.263398 6.263398 5983 +cynic 1 2 6.263398 6.263398 5854 +andcognit 1 2 6.263398 6.263398 5681 +temperatur 1 2 6.263398 6.263398 5985 +bitter 1 2 6.263398 6.263398 5387 +ironi 1 2 6.263398 6.263398 5986 +nonetheless 1 2 6.263398 6.263398 4681 +madeavail 1 2 6.263398 6.263398 4326 +mediocr 1 1 6.957497 6.957497 16083 +bemoan 1 1 6.957497 6.957497 16084 +hype 1 1 6.957497 6.957497 16085 +skeptic 1 1 6.957497 6.957497 16086 +automaticconstruct 1 1 6.957497 6.957497 16087 +wrapper 1 1 6.957497 6.957497 16088 +beeninvolv 1 1 6.957497 6.957497 16089 +glbal 1 1 6.957497 6.957497 16090 +infrmatin 1 1 6.957497 6.957497 16091 +sperhighwai 1 1 6.957497 6.957497 16092 +meter 1 1 6.957497 6.957497 16093 +ronald 1 1 6.957497 6.957497 16094 +reagan 1 1 6.957497 6.957497 16095 +wendel 1 1 6.957497 6.957497 16096 +berri 1 1 6.957497 6.957497 16097 +constitutesgood 1 1 6.957497 6.957497 16098 +kushmerick 1 1 6.957497 6.957497 16099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..bb8068fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +analysi 1 124 2.079442 2.079442 98 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +octob 1 89 2.397895 2.397895 156 +contain 1 81 2.484907 2.484907 174 +onlin 1 75 2.639057 2.639057 223 +test 1 66 2.708050 2.708050 252 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +march 1 61 2.833213 2.833213 295 +sever 1 56 2.890372 2.890372 322 +summer 1 56 2.890372 2.890372 311 +local 1 55 2.944439 2.944439 334 +found 1 53 2.944439 2.944439 337 +brian 1 38 3.295837 3.295837 466 +game 1 36 3.367296 3.367296 498 +known 1 24 3.761200 3.761200 702 +reduc 1 22 3.850148 3.850148 759 +sort 1 22 3.850148 3.850148 738 +safeti 1 20 3.951244 3.951244 817 +wonder 1 20 3.951244 3.951244 815 +bershad 1 18 4.060443 4.060443 902 +chateau 1 16 4.174387 4.174387 997 +overhead 1 15 4.248495 4.248495 1035 +karlin 1 13 4.382027 4.382027 1176 +nanci 1 12 4.465908 4.465908 1256 +anna 1 12 4.465908 4.465908 1292 +promot 1 12 4.465908 4.465908 1235 +isca 1 11 4.553877 4.553877 1354 +itali 1 11 4.553877 4.553877 1378 +leveson 1 9 4.753590 4.753590 1540 +wayn 1 8 4.875197 4.875197 1738 +guggenheim 1 8 4.875197 4.875197 1759 +romer 1 8 4.875197 4.875197 1706 +spare 1 6 5.164786 5.164786 2177 +invest 1 6 5.164786 5.164786 2153 +ohlrich 1 5 5.347108 5.347108 2564 +annex 1 5 5.347108 5.347108 2572 +sytem 1 4 5.568345 5.568345 3015 +superpag 1 3 5.857933 5.857933 3978 +damag 1 2 6.263398 6.263398 5687 +debut 1 1 6.957497 6.957497 16100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..022fde9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +mathemat 1 108 2.197225 2.197225 123 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +resum 1 79 2.564949 2.564949 217 +summari 1 73 2.639057 2.639057 237 +sieg 1 69 2.708050 2.708050 260 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +wednesdai 1 64 2.772589 2.772589 261 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +plai 1 60 2.833213 2.833213 307 +summer 1 56 2.890372 2.890372 311 +three 1 54 2.944439 2.944439 330 +cool 1 49 3.044522 3.044522 374 +move 1 47 3.091042 3.091042 382 +california 1 46 3.091042 3.091042 388 +quarter 1 47 3.091042 3.091042 389 +made 1 44 3.135494 3.135494 398 +york 1 41 3.218876 3.218876 435 +live 1 40 3.258097 3.258097 451 +origin 1 38 3.295837 3.295837 472 +seminar 1 38 3.295837 3.295837 470 +option 1 30 3.555348 3.555348 575 +usual 1 28 3.610918 3.610918 608 +berkelei 1 26 3.688879 3.688879 657 +departmentunivers 1 24 3.761200 3.761200 711 +daili 1 24 3.761200 3.761200 706 +ofwashington 1 22 3.850148 3.850148 766 +whole 1 17 4.110874 4.110874 940 +took 1 16 4.174387 4.174387 1010 +basketbal 1 12 4.465908 4.465908 1289 +employ 1 12 4.465908 4.465908 1291 +realiti 1 12 4.465908 4.465908 1272 +israel 1 11 4.553877 4.553877 1366 +seven 1 9 4.753590 4.753590 1561 +angel 1 8 4.875197 4.875197 1779 +potenti 1 8 4.875197 4.875197 1690 +studentcomput 1 7 5.010635 5.010635 1963 +bunch 1 7 5.010635 5.010635 1861 +hike 1 6 5.164786 5.164786 2234 +northeast 1 3 5.857933 5.857933 3922 +haifa 1 3 5.857933 5.857933 3554 +kwon 1 3 5.857933 5.857933 3690 +gershoni 1 2 6.263398 6.263398 4513 +shirt 1 2 6.263398 6.263398 5977 +washingtonoffic 1 1 6.957497 6.957497 16101 +fouryear 1 1 6.957497 6.957497 16102 +lancast 1 1 6.957497 6.957497 16103 +holon 1 1 6.957497 6.957497 16104 +amta 1 1 6.957497 6.957497 16105 +aremondai 1 1 6.957497 6.957497 16106 +tose 1 1 6.957497 6.957497 16107 +graphicsprogram 1 1 6.957497 6.957497 16108 +riderlink 1 1 6.957497 6.957497 16109 +seattletransport 1 1 6.957497 6.957497 16110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..1df1320a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +section 1 94 2.397895 2.397895 149 +pictur 1 89 2.397895 2.397895 160 +school 1 84 2.484907 2.484907 188 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +would 1 67 2.708050 2.708050 251 +sieg 1 69 2.708050 2.708050 260 +experi 1 64 2.772589 2.772589 283 +advisor 1 51 2.995732 2.995732 355 +profession 1 51 2.995732 2.995732 345 +realli 1 40 3.258097 3.258097 444 +go 1 33 3.433987 3.433987 529 +sometim 1 24 3.761200 3.761200 696 +head 1 23 3.806662 3.806662 732 +try 1 22 3.850148 3.850148 764 +color 1 22 3.850148 3.850148 762 +leav 1 21 3.912023 3.912023 772 +boston 1 19 4.007333 4.007333 862 +offici 1 18 4.060443 4.060443 894 +decid 1 14 4.317488 4.317488 1075 +embed 1 14 4.317488 4.317488 1102 +came 1 13 4.382027 4.382027 1197 +jean 1 10 4.653960 4.653960 1440 +northwest 1 7 5.010635 5.010635 1973 +accord 1 7 5.010635 5.010635 1826 +hack 1 7 5.010635 5.010635 1950 +gaetano 1 6 5.164786 5.164786 2068 +chinook 1 6 5.164786 5.164786 2229 +beer 1 6 5.164786 5.164786 2216 +german 1 6 5.164786 5.164786 2190 +myresum 1 6 5.164786 5.164786 2199 +ross 1 5 5.347108 5.347108 2243 +ortega 1 5 5.347108 5.347108 2559 +knew 1 5 5.347108 5.347108 2445 +borriello 1 5 5.347108 5.347108 2349 +wear 1 4 5.568345 5.568345 2785 +albert 1 2 6.263398 6.263398 5987 +unoffici 1 2 6.263398 6.263398 5909 +brew 1 2 6.263398 6.263398 5988 +shepherd 1 2 6.263398 6.263398 4347 +behav 1 2 6.263398 6.263398 4670 +einstein 1 1 6.957497 6.957497 16112 +pageaft 1 1 6.957497 6.957497 16113 +tequila 1 1 6.957497 6.957497 16111 +puppi 1 1 6.957497 6.957497 16114 +updatedthu 1 1 6.957497 6.957497 16115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..1084cb32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +document 1 121 2.079442 2.079442 89 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +free 1 73 2.639057 2.639057 224 +nation 1 74 2.639057 2.639057 240 +simul 1 66 2.708050 2.708050 255 +share 1 59 2.833213 2.833213 304 +thesi 1 57 2.890372 2.890372 327 +think 1 57 2.890372 2.890372 314 +instruct 1 53 2.944439 2.944439 332 +week 1 52 2.995732 2.995732 343 +particular 1 51 2.995732 2.995732 352 +featur 1 46 3.091042 3.091042 386 +show 1 43 3.178054 3.178054 417 +around 1 43 3.178054 3.178054 415 +http 1 41 3.218876 3.218876 420 +movi 1 40 3.258097 3.258097 459 +everi 1 34 3.401197 3.401197 519 +consid 1 29 3.583519 3.583519 590 +weather 1 28 3.610918 3.610918 618 +though 1 27 3.637586 3.637586 622 +background 1 25 3.737670 3.737670 664 +trace 1 25 3.737670 3.737670 677 +sometim 1 24 3.761200 3.761200 696 +disk 1 22 3.850148 3.850148 747 +particularli 1 19 4.007333 4.007333 867 +runtim 1 19 4.007333 4.007333 858 +item 1 19 4.007333 4.007333 856 +log 1 19 4.007333 4.007333 857 +engineeringunivers 1 17 4.110874 4.110874 959 +white 1 17 4.110874 4.110874 951 +regular 1 17 4.110874 4.110874 929 +letter 1 16 4.174387 4.174387 981 +month 1 15 4.248495 4.248495 1025 +doesn 1 15 4.248495 4.248495 1055 +stori 1 14 4.317488 4.317488 1087 +washingtonbox 1 13 4.382027 4.382027 1200 +weak 1 13 4.382027 4.382027 1159 +minor 1 12 4.465908 4.465908 1237 +newspap 1 12 4.465908 4.465908 1280 +bill 1 11 4.553877 4.553877 1297 +black 1 10 4.653960 4.653960 1418 +telecommun 1 9 4.753590 4.753590 1565 +transport 1 8 4.875197 4.875197 1672 +film 1 8 4.875197 4.875197 1761 +ethic 1 7 5.010635 5.010635 1786 +courtesi 1 7 5.010635 5.010635 1953 +blue 1 6 5.164786 5.164786 2227 +quick 1 6 5.164786 5.164786 2184 +famou 1 6 5.164786 5.164786 2185 +legal 1 6 5.164786 5.164786 2094 +privaci 1 6 5.164786 5.164786 2144 +everybodi 1 5 5.347108 5.347108 2517 +oncomput 1 5 5.347108 5.347108 2326 +truli 1 5 5.347108 5.347108 2476 +festiv 1 4 5.568345 5.568345 2952 +flat 1 3 5.857933 5.857933 3853 +icon 1 3 5.857933 5.857933 3362 +unrel 1 3 5.857933 5.857933 3837 +ribbon 1 2 6.263398 6.263398 5973 +quiet 1 2 6.263398 6.263398 5203 +likewis 1 2 6.263398 6.263398 4100 +blink 1 2 6.263398 6.263398 5067 +anymor 1 2 6.263398 6.263398 5938 +gross 1 2 6.263398 6.263398 5989 +untitl 1 1 6.957497 6.957497 16119 +morri 1 1 6.957497 6.957497 16120 +pardodepart 1 1 6.957497 6.957497 16121 +washingtonusapardo 1 1 6.957497 6.957497 16122 +edunot 1 1 6.957497 6.957497 16123 +asimgsrc 1 1 6.957497 6.957497 16124 +blueribbon 1 1 6.957497 6.957497 16125 +rib_trn_plain_sm 1 1 6.957497 6.957497 16126 +opposit 1 1 6.957497 6.957497 16127 +speechprohibit 1 1 6.957497 6.957497 16128 +beth 1 1 6.957497 6.957497 16116 +pardo 1 1 6.957497 6.957497 16117 +academicsom 1 1 6.957497 6.957497 16129 +papersi 1 1 6.957497 6.957497 16130 +rtcg 1 1 6.957497 6.957497 16131 +architectureandcompil 1 1 6.957497 6.957497 16132 +otherpeopl 1 1 6.957497 6.957497 16133 +stylenon 1 1 6.957497 6.957497 16134 +academicfeatur 1 1 6.957497 6.957497 16135 +itemsbicyclesbusinessescomputersfoodhumori 1 1 6.957497 6.957497 16136 +weirdnesslinux 1 1 6.957497 6.957497 16137 +journalmusicgoofi 1 1 6.957497 6.957497 16138 +politicssci 1 1 6.957497 6.957497 16139 +dant 1 1 6.957497 6.957497 16140 +trepan 1 1 6.957497 6.957497 16141 +wors 1 1 6.957497 6.957497 16142 +courtesei 1 1 6.957497 6.957497 16118 +newhous 1 1 6.957497 6.957497 16143 +yesterdai 1 1 6.957497 6.957497 16144 +stuffpardo 1 1 6.957497 6.957497 16145 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..7a26fd80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +phone 1 175 1.791759 1.791759 45 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +find 1 111 2.197225 2.197225 111 +book 1 99 2.302585 2.302585 131 +commun 1 95 2.397895 2.397895 157 +school 1 84 2.484907 2.484907 188 +activ 1 84 2.484907 2.484907 182 +resourc 1 81 2.484907 2.484907 172 +resum 1 79 2.564949 2.564949 217 +descript 1 64 2.772589 2.772589 271 +variou 1 56 2.890372 2.890372 317 +extens 1 53 2.944439 2.944439 340 +week 1 52 2.995732 2.995732 343 +life 1 50 3.044522 3.044522 375 +third 1 43 3.178054 3.178054 412 +mechan 1 43 3.178054 3.178054 416 +music 1 42 3.218876 3.218876 436 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +short 1 36 3.367296 3.367296 499 +built 1 29 3.583519 3.583519 592 +enjoi 1 26 3.688879 3.688879 660 +mine 1 26 3.688879 3.688879 654 +miscellan 1 23 3.806662 3.806662 731 +theunivers 1 21 3.912023 3.912023 797 +busi 1 21 3.912023 3.912023 784 +grad 1 20 3.951244 3.951244 837 +bershad 1 18 4.060443 4.060443 902 +coupl 1 17 4.110874 4.110874 939 +later 1 15 4.248495 4.248495 1043 +fill 1 11 4.553877 4.553877 1349 +besid 1 8 4.875197 4.875197 1681 +hike 1 6 5.164786 5.164786 2234 +outdoor 1 5 5.347108 5.347108 2514 +pardyak 1 4 5.568345 5.568345 3043 +withth 1 4 5.568345 5.568345 2805 +outdat 1 4 5.568345 5.568345 2797 +somehow 1 4 5.568345 5.568345 2974 +ofoper 1 3 5.857933 5.857933 3292 +gloriou 1 3 5.857933 5.857933 3816 +leadership 1 3 5.857933 5.857933 3320 +emerald 1 3 5.857933 5.857933 3979 +poland 1 3 5.857933 5.857933 3665 +unrel 1 3 5.857933 5.857933 3837 +przemyslaw 1 2 6.263398 6.263398 5808 +basedprogram 1 2 6.263398 6.263398 5700 +przemek 1 1 6.957497 6.957497 16148 +pardi 1 1 6.957497 6.957497 16146 +interast 1 1 6.957497 6.957497 16149 +drizzl 1 1 6.957497 6.957497 16147 +notbusi 1 1 6.957497 6.957497 16150 +happenswhen 1 1 6.957497 6.957497 16151 +projectsspinan 1 1 6.957497 6.957497 16152 +systemsgroup 1 1 6.957497 6.957497 16153 +polish 1 1 6.957497 6.957497 16154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..6d85e12c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +updat 1 191 1.609438 1.609438 41 +public 1 202 1.609438 1.609438 43 +contact 1 153 1.791759 1.791759 59 +recent 1 167 1.791759 1.791759 58 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +come 1 78 2.564949 2.564949 202 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +know 1 80 2.564949 2.564949 198 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +friend 1 48 3.044522 3.044522 376 +move 1 47 3.091042 3.091042 382 +done 1 47 3.091042 3.091042 381 +anoth 1 45 3.135494 3.135494 408 +better 1 45 3.135494 3.135494 401 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +might 1 41 3.218876 3.218876 426 +live 1 40 3.258097 3.258097 451 +annual 1 40 3.258097 3.258097 458 +paul 1 38 3.295837 3.295837 471 +electr 1 38 3.295837 3.295837 461 +jame 1 35 3.401197 3.401197 507 +return 1 34 3.401197 3.401197 502 +everi 1 34 3.401197 3.401197 519 +taught 1 33 3.433987 3.433987 526 +express 1 32 3.465736 3.465736 540 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +travel 1 30 3.555348 3.555348 579 +univ 1 28 3.610918 3.610918 617 +usual 1 28 3.610918 3.610918 608 +hope 1 28 3.610918 3.610918 610 +concern 1 25 3.737670 3.737670 666 +hill 1 25 3.737670 3.737670 670 +never 1 25 3.737670 3.737670 671 +togeth 1 23 3.806662 3.806662 714 +inth 1 22 3.850148 3.850148 741 +dai 1 22 3.850148 3.850148 753 +love 1 21 3.912023 3.912023 804 +tell 1 21 3.912023 3.912023 777 +rout 1 21 3.912023 3.912023 793 +longer 1 20 3.951244 3.951244 816 +north 1 19 4.007333 4.007333 873 +particularli 1 19 4.007333 4.007333 867 +offici 1 18 4.060443 4.060443 894 +lot 1 18 4.060443 4.060443 889 +took 1 16 4.174387 4.174387 1010 +easi 1 16 4.174387 4.174387 969 +someon 1 13 4.382027 4.382027 1128 +everyon 1 13 4.382027 4.382027 1148 +prolog 1 13 4.382027 4.382027 1155 +brother 1 13 4.382027 4.382027 1189 +scan 1 12 4.465908 4.465908 1243 +rememb 1 12 4.465908 4.465908 1217 +rest 1 12 4.465908 4.465908 1259 +danc 1 12 4.465908 4.465908 1278 +chri 1 11 4.553877 4.553877 1311 +franklin 1 10 4.653960 4.653960 1436 +rapid 1 10 4.653960 4.653960 1453 +bike 1 10 4.653960 4.653960 1468 +town 1 10 4.653960 4.653960 1458 +folk 1 9 4.753590 4.753590 1597 +pagei 1 8 4.875197 4.875197 1683 +character 1 8 4.875197 4.875197 1767 +on 1 8 4.875197 4.875197 1628 +andcomput 1 8 4.875197 4.875197 1623 +partner 1 8 4.875197 4.875197 1648 +ride 1 8 4.875197 4.875197 1741 +fortun 1 7 5.010635 5.010635 1872 +necessarili 1 7 5.010635 5.010635 1899 +davi 1 7 5.010635 5.010635 1888 +throughout 1 7 5.010635 5.010635 1871 +portland 1 7 5.010635 5.010635 1878 +header 1 7 5.010635 5.010635 1787 +somewher 1 6 5.164786 5.164786 2176 +restaur 1 6 5.164786 5.164786 2230 +drop 1 6 5.164786 5.164786 2008 +poem 1 5 5.347108 5.347108 2483 +exchang 1 5 5.347108 5.347108 2310 +hire 1 4 5.568345 5.568345 2976 +glad 1 4 5.568345 5.568345 2657 +tend 1 4 5.568345 5.568345 3041 +kept 1 4 5.568345 5.568345 2762 +pagepaul 1 3 5.857933 5.857933 3669 +thesumm 1 3 5.857933 5.857933 3763 +matt 1 3 5.857933 5.857933 3792 +flat 1 3 5.857933 5.857933 3853 +aboutthi 1 2 6.263398 6.263398 5627 +oneof 1 2 6.263398 6.263398 4674 +diploma 1 2 6.263398 6.263398 5990 +bergen 1 2 6.263398 6.263398 5991 +itin 1 2 6.263398 6.263398 5992 +toseattl 1 2 6.263398 6.263398 5919 +myoffic 1 1 6.957497 6.957497 16155 +iliv 1 1 6.957497 6.957497 16156 +norwegian 1 1 6.957497 6.957497 16157 +likea 1 1 6.957497 6.957497 16158 +localchines 1 1 6.957497 6.957497 16159 +mundan 1 1 6.957497 6.957497 16160 +stuffi 1 1 6.957497 6.957497 16161 +hotlink 1 1 6.957497 6.957497 16162 +pagesstuff 1 1 6.957497 6.957497 16163 +maintainmi 1 1 6.957497 6.957497 16164 +mewher 1 1 6.957497 6.957497 16165 +inmorgan 1 1 6.957497 6.957497 16166 +fromuc 1 1 6.957497 6.957497 16167 +andy 1 1 6.957497 6.957497 16168 +ididn 1 1 6.957497 6.957497 16169 +ialso 1 1 6.957497 6.957497 16170 +stuffwhil 1 1 6.957497 6.957497 16171 +relatedact 1 1 6.957497 6.957497 16172 +evengot 1 1 6.957497 6.957497 16173 +marriag 1 1 6.957497 6.957497 16174 +joann 1 1 6.957497 6.957497 16175 +anexcus 1 1 6.957497 6.957497 16176 +ofbergen 1 1 6.957497 6.957497 16177 +hillier 1 1 6.957497 6.957497 16178 +returnedto 1 1 6.957497 6.957497 16179 +rollerblad 1 1 6.957497 6.957497 16180 +wasnow 1 1 6.957497 6.957497 16181 +hewlettpackard 1 1 6.957497 6.957497 16182 +vengeanc 1 1 6.957497 6.957497 16183 +intwo 1 1 6.957497 6.957497 16184 +inseason 1 1 6.957497 6.957497 16185 +justcommut 1 1 6.957497 6.957497 16186 +lindyhop 1 1 6.957497 6.957497 16187 +linethat 1 1 6.957497 6.957497 16188 +doctyp 1 1 6.957497 6.957497 16189 +ietf 1 1 6.957497 6.957497 16190 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..afe2d09b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +implement 1 152 1.791759 1.791759 52 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +symposium 1 72 2.639057 2.639057 238 +collect 1 65 2.772589 2.772589 268 +found 1 53 2.944439 2.944439 337 +much 1 52 2.995732 2.995732 349 +principl 1 48 3.044522 3.044522 357 +quarter 1 47 3.091042 3.091042 389 +even 1 45 3.135494 3.135494 393 +might 1 41 3.218876 3.218876 426 +movi 1 40 3.258097 3.258097 459 +live 1 40 3.258097 3.258097 451 +workstat 1 37 3.332205 3.332205 479 +global 1 34 3.401197 3.401197 520 +michael 1 35 3.401197 3.401197 514 +everi 1 34 3.401197 3.401197 519 +taught 1 33 3.433987 3.433987 526 +often 1 31 3.496508 3.496508 551 +cluster 1 28 3.610918 3.610918 612 +although 1 25 3.737670 3.737670 667 +william 1 22 3.850148 3.850148 765 +wonder 1 20 3.951244 3.951244 815 +mpeg 1 20 3.951244 3.951244 831 +supervis 1 20 3.951244 3.951244 840 +minut 1 20 3.951244 3.951244 810 +citi 1 19 4.007333 4.007333 874 +five 1 19 4.007333 4.007333 841 +otherwis 1 17 4.110874 4.110874 922 +qual 1 15 4.248495 4.248495 1062 +action 1 15 4.248495 4.248495 1038 +comic 1 14 4.317488 4.317488 1103 +levi 1 14 4.317488 4.317488 1093 +squar 1 14 4.317488 4.317488 1082 +karlin 1 13 4.382027 4.382027 1176 +nick 1 13 4.382027 4.382027 1180 +anna 1 12 4.465908 4.465908 1292 +rest 1 12 4.465908 4.465908 1259 +song 1 11 4.553877 4.553877 1380 +henri 1 10 4.653960 4.653960 1417 +swim 1 9 4.753590 4.753590 1599 +french 1 9 4.753590 4.753590 1511 +morgan 1 9 4.753590 4.753590 1484 +juan 1 9 4.753590 4.753590 1580 +tourist 1 8 4.875197 4.875197 1710 +surpris 1 7 5.010635 5.010635 1828 +feelei 1 7 5.010635 5.010635 1859 +cat 1 6 5.164786 5.164786 2194 +pari 1 6 5.164786 5.164786 2158 +rain 1 6 5.164786 5.164786 2137 +lucki 1 6 5.164786 5.164786 2163 +gui 1 5 5.347108 5.347108 2573 +british 1 5 5.347108 5.347108 2546 +formerli 1 5 5.347108 5.347108 2397 +door 1 5 5.347108 5.347108 2291 +darren 1 5 5.347108 5.347108 2565 +pighin 1 4 5.568345 5.568345 2735 +ta 1 4 5.568345 5.568345 3058 +breath 1 4 5.568345 5.568345 2946 +dark 1 4 5.568345 5.568345 2910 +berlin 1 3 5.857933 5.857933 3263 +marin 1 3 5.857933 5.857933 3947 +freder 1 3 5.857933 5.857933 3352 +thekkath 1 3 5.857933 5.857933 3973 +monti 1 2 6.263398 6.263398 4993 +python 1 2 6.263398 6.263398 4994 +cave 1 2 6.263398 6.263398 4959 +italian 1 2 6.263398 6.263398 5993 +simpson 1 2 6.263398 6.263398 5994 +chandramohan 1 2 6.263398 6.263398 5965 +frdric 1 1 6.957497 6.957497 16192 +lcommun 1 1 6.957497 6.957497 16193 +dani 1 1 6.957497 6.957497 16194 +corto 1 1 6.957497 6.957497 16195 +maltes 1 1 6.957497 6.957497 16196 +venis 1 1 6.957497 6.957497 16197 +traditionn 1 1 6.957497 6.957497 16198 +systemher 1 1 6.957497 6.957497 16199 +refresh 1 1 6.957497 6.957497 16191 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..18e3b6d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +map 1 39 3.258097 3.258097 452 +anderson 1 19 4.007333 4.007333 860 +brother 1 13 4.382027 4.382027 1189 +ruth 1 7 5.010635 5.010635 1870 +wxyc 1 1 6.957497 6.957497 16200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..2e9feae6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +redston 1 3 5.857933 5.857933 3332 +joshua 1 3 5.857933 5.857933 3333 +josh 1 2 6.263398 6.263398 5945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..338cc263 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,163 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +seattl 1 120 2.079442 2.079442 103 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +build 1 85 2.484907 2.484907 184 +librari 1 87 2.484907 2.484907 181 +good 1 77 2.564949 2.564949 200 +line 1 75 2.639057 2.639057 231 +write 1 72 2.639057 2.639057 222 +onlin 1 75 2.639057 2.639057 223 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +java 1 70 2.708050 2.708050 248 +interact 1 62 2.772589 2.772589 270 +visit 1 63 2.772589 2.772589 288 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +local 1 55 2.944439 2.944439 334 +extens 1 53 2.944439 2.944439 340 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +run 1 51 2.995732 2.995732 347 +profession 1 51 2.995732 2.995732 345 +get 1 46 3.091042 3.091042 380 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +live 1 40 3.258097 3.258097 451 +probabl 1 40 3.258097 3.258097 455 +map 1 39 3.258097 3.258097 452 +movi 1 40 3.258097 3.258097 459 +multi 1 36 3.367296 3.367296 493 +everi 1 34 3.401197 3.401197 519 +jame 1 35 3.401197 3.401197 507 +word 1 34 3.401197 3.401197 508 +taught 1 33 3.433987 3.433987 526 +human 1 32 3.465736 3.465736 546 +framework 1 28 3.610918 3.610918 606 +team 1 27 3.637586 3.637586 625 +great 1 27 3.637586 3.637586 626 +todai 1 25 3.737670 3.737670 672 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +famili 1 23 3.806662 3.806662 735 +ofwashington 1 22 3.850148 3.850148 766 +color 1 22 3.850148 3.850148 762 +wonder 1 20 3.951244 3.951244 815 +nice 1 20 3.951244 3.951244 809 +toolkit 1 20 3.951244 3.951244 835 +expert 1 20 3.951244 3.951244 833 +wrote 1 20 3.951244 3.951244 830 +citi 1 19 4.007333 4.007333 874 +media 1 19 4.007333 4.007333 861 +els 1 19 4.007333 4.007333 843 +beauti 1 18 4.060443 4.060443 912 +stand 1 18 4.060443 4.060443 891 +repositori 1 17 4.110874 4.110874 932 +stat 1 17 4.110874 4.110874 924 +stock 1 16 4.174387 4.174387 1007 +psycholog 1 15 4.248495 4.248495 1054 +camera 1 14 4.317488 4.317488 1115 +happi 1 14 4.317488 4.317488 1079 +bodi 1 13 4.382027 4.382027 1178 +script 1 13 4.382027 4.382027 1171 +step 1 13 4.382027 4.382027 1138 +outsid 1 12 4.465908 4.465908 1219 +perl 1 11 4.553877 4.553877 1332 +magic 1 11 4.553877 4.553877 1358 +market 1 11 4.553877 4.553877 1361 +town 1 10 4.653960 4.653960 1458 +traffic 1 10 4.653960 4.653960 1421 +departmentof 1 9 4.753590 4.753590 1539 +clear 1 9 4.753590 4.753590 1488 +brain 1 8 4.875197 4.875197 1638 +rais 1 8 4.875197 4.875197 1711 +leon 1 8 4.875197 4.875197 1631 +chanc 1 7 5.010635 5.010635 1960 +vallei 1 7 5.010635 5.010635 1959 +footbal 1 7 5.010635 5.010635 1912 +rain 1 6 5.164786 5.164786 2137 +peek 1 6 5.164786 5.164786 2169 +camp 1 5 5.347108 5.347108 2545 +poem 1 5 5.347108 5.347108 2483 +fountain 1 4 5.568345 5.568345 3069 +catch 1 4 5.568345 5.568345 2602 +glimps 1 4 5.568345 5.568345 2778 +proud 1 4 5.568345 5.568345 2918 +leagu 1 4 5.568345 5.568345 3040 +fantasi 1 4 5.568345 5.568345 3055 +jakobovit 1 3 5.857933 5.857933 3913 +hawaii 1 3 5.857933 5.857933 3888 +foster 1 3 5.857933 5.857933 3159 +eddi 1 3 5.857933 5.857933 3896 +drag 1 3 5.857933 5.857933 3434 +mount 1 2 6.263398 6.263398 5995 +youcan 1 2 6.263398 6.263398 4373 +glori 1 2 6.263398 6.263398 5327 +consol 1 2 6.263398 6.263398 4048 +atla 1 2 6.263398 6.263398 5996 +databaseenviron 1 2 6.263398 6.263398 5792 +persistentprogram 1 2 6.263398 6.263398 5997 +creator 1 2 6.263398 6.263398 5998 +ladder 1 2 6.263398 6.263398 5858 +newslet 1 2 6.263398 6.263398 5860 +dian 1 2 6.263398 6.263398 5536 +judi 1 2 6.263398 6.263398 4442 +usai 1 1 6.957497 6.957497 16201 +thisup 1 1 6.957497 6.957497 16202 +weatherreport 1 1 6.957497 6.957497 16203 +sneak 1 1 6.957497 6.957497 16204 +drumhel 1 1 6.957497 6.957497 16205 +rainier 1 1 6.957497 6.957497 16206 +cleardai 1 1 6.957497 6.957497 16207 +astructur 1 1 6.957497 6.957497 16208 +anatomist 1 1 6.957497 6.957497 16209 +internetracquetbal 1 1 6.957497 6.957497 16210 +rotisseriebasebal 1 1 6.957497 6.957497 16211 +fromusa 1 1 6.957497 6.957497 16212 +africancichlid 1 1 6.957497 6.957497 16213 +honolulu 1 1 6.957497 6.957497 16214 +kalalau 1 1 6.957497 6.957497 16215 +gambl 1 1 6.957497 6.957497 16216 +darn 1 1 6.957497 6.957497 16217 +javafamili 1 1 6.957497 6.957497 16218 +mydad 1 1 6.957497 6.957497 16219 +whoi 1 1 6.957497 6.957497 16220 +polem 1 1 6.957497 6.957497 16221 +emanuel 1 1 6.957497 6.957497 16222 +swedenborg 1 1 6.957497 6.957497 16223 +nahl 1 1 6.957497 6.957497 16224 +whoprovid 1 1 6.957497 6.957497 16225 +realtor 1 1 6.957497 6.957497 16226 +uncl 1 1 6.957497 6.957497 16227 +bioscienc 1 1 6.957497 6.957497 16228 +bookmarksif 1 1 6.957497 6.957497 16229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..7e2587cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +welcom 1 122 2.079442 2.079442 99 +right 1 48 3.044522 3.044522 363 +robert 1 30 3.555348 3.555348 567 +head 1 23 3.806662 3.806662 732 +reserv 1 20 3.951244 3.951244 808 +five 1 19 4.007333 4.007333 841 +photograph 1 15 4.248495 4.248495 1056 +galleri 1 13 4.382027 4.382027 1192 +twenti 1 5 5.347108 5.347108 2540 +twilight 1 1 6.957497 6.957497 16230 +grimm 1 1 6.957497 6.957497 16231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..5b6e19c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +memori 1 101 2.302585 2.302585 139 +peopl 1 96 2.302585 2.302585 132 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +journal 1 83 2.484907 2.484907 183 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +addit 1 74 2.639057 2.639057 228 +sieg 1 69 2.708050 2.708050 260 +would 1 67 2.708050 2.708050 251 +polici 1 64 2.772589 2.772589 279 +abstract 1 62 2.772589 2.772589 276 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +direct 1 57 2.890372 2.890372 316 +three 1 54 2.944439 2.944439 330 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +friend 1 48 3.044522 3.044522 376 +could 1 46 3.091042 3.091042 383 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +cach 1 41 3.218876 3.218876 432 +edit 1 42 3.218876 3.218876 418 +realli 1 40 3.258097 3.258097 444 +map 1 39 3.258097 3.258097 452 +origin 1 38 3.295837 3.295837 472 +bibliographi 1 34 3.401197 3.401197 518 +eduoffic 1 33 3.433987 3.433987 531 +scientist 1 31 3.496508 3.496508 560 +travel 1 30 3.555348 3.555348 579 +american 1 27 3.637586 3.637586 634 +symbol 1 27 3.637586 3.637586 620 +subject 1 26 3.688879 3.688879 647 +hill 1 25 3.737670 3.737670 670 +interpret 1 24 3.761200 3.761200 686 +togeth 1 23 3.806662 3.806662 714 +reduc 1 22 3.850148 3.850148 759 +chen 1 21 3.912023 3.912023 791 +avoid 1 21 3.912023 3.912023 799 +hous 1 21 3.912023 3.912023 801 +wrote 1 20 3.951244 3.951244 830 +miss 1 19 4.007333 4.007333 866 +els 1 19 4.007333 4.007333 843 +bershad 1 18 4.060443 4.060443 902 +attend 1 18 4.060443 4.060443 893 +asplo 1 17 4.110874 4.110874 948 +thought 1 17 4.110874 4.110874 945 +chateau 1 16 4.174387 4.174387 997 +took 1 16 4.174387 4.174387 1010 +overhead 1 15 4.248495 4.248495 1035 +countri 1 15 4.248495 4.248495 1059 +levi 1 14 4.317488 4.317488 1093 +conduct 1 14 4.317488 4.317488 1065 +alan 1 13 4.382027 4.382027 1146 +karlin 1 13 4.382027 4.382027 1176 +brad 1 12 4.465908 4.465908 1264 +anna 1 12 4.465908 4.465908 1292 +promot 1 12 4.465908 4.465908 1235 +food 1 12 4.465908 4.465908 1285 +speak 1 12 4.465908 4.465908 1283 +smart 1 11 4.553877 4.553877 1352 +denni 1 11 4.553877 4.553877 1321 +isca 1 11 4.553877 4.553877 1354 +baer 1 11 4.553877 4.553877 1353 +wong 1 9 4.753590 4.753590 1609 +osdi 1 9 4.753590 4.753590 1534 +voelker 1 9 4.753590 4.753590 1557 +said 1 9 4.753590 4.753590 1571 +didn 1 9 4.753590 4.753590 1563 +romer 1 8 4.875197 4.875197 1706 +wayn 1 8 4.875197 4.875197 1738 +judg 1 8 4.875197 4.875197 1644 +interestsi 1 7 5.010635 5.010635 1969 +supportfor 1 7 5.010635 5.010635 1854 +tip 1 7 5.010635 5.010635 1863 +conflict 1 6 5.164786 5.164786 2041 +theproject 1 6 5.164786 5.164786 1981 +wolman 1 6 5.164786 5.164786 2093 +edward 1 6 5.164786 5.164786 2050 +ohlrich 1 5 5.347108 5.347108 2564 +hair 1 5 5.347108 5.347108 2446 +ration 1 5 5.347108 5.347108 2427 +unknown 1 5 5.347108 5.347108 2318 +tuft 1 5 5.347108 5.347108 2575 +couldn 1 4 5.568345 5.568345 2977 +accompani 1 4 5.568345 5.568345 2666 +mappedcach 1 3 5.857933 5.857933 3928 +knee 1 3 5.857933 5.857933 3980 +surgeri 1 3 5.857933 5.857933 3975 +europ 1 3 5.857933 5.857933 3761 +lunch 1 3 5.857933 5.857933 3369 +father 1 3 5.857933 5.857933 3757 +systemswith 1 2 6.263398 6.263398 5342 +eustac 1 2 6.263398 6.263398 5866 +onlinesuperpag 1 2 6.263398 6.263398 5819 +resolutionon 1 2 6.263398 6.263398 5867 +stuffa 1 2 6.263398 6.263398 5999 +rai 1 2 6.263398 6.263398 5915 +ticker 1 2 6.263398 6.263398 5247 +likebrian 1 1 6.957497 6.957497 16232 +andwayn 1 1 6.957497 6.957497 16233 +ofinterpret 1 1 6.957497 6.957497 16234 +rockyhom 1 1 6.957497 6.957497 16235 +lobo 1 1 6.957497 6.957497 16236 +listrandom 1 1 6.957497 6.957497 16237 +limb 1 1 6.957497 6.957497 16238 +arthroscop 1 1 6.957497 6.957497 16239 +wrist 1 1 6.957497 6.957497 16240 +dylansaid 1 1 6.957497 6.957497 16241 +flowbe 1 1 6.957497 6.957497 16242 +beingexperiment 1 1 6.957497 6.957497 16243 +somepictur 1 1 6.957497 6.957497 16244 +eatsomeon 1 1 6.957497 6.957497 16245 +sincer 1 1 6.957497 6.957497 16246 +forexampl 1 1 6.957497 6.957497 16247 +leftth 1 1 6.957497 6.957497 16248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..db9530d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,158 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +intern 1 108 2.197225 2.197225 128 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +follow 1 92 2.397895 2.397895 143 +larg 1 82 2.484907 2.484907 168 +dynam 1 76 2.564949 2.564949 194 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +appear 1 78 2.564949 2.564949 210 +good 1 77 2.564949 2.564949 200 +master 1 76 2.564949 2.564949 216 +onlin 1 75 2.639057 2.639057 223 +order 1 69 2.708050 2.708050 249 +simul 1 66 2.708050 2.708050 255 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +descript 1 64 2.772589 2.772589 271 +improv 1 62 2.772589 2.772589 289 +collect 1 65 2.772589 2.772589 268 +polici 1 64 2.772589 2.772589 279 +result 1 65 2.772589 2.772589 281 +copi 1 63 2.772589 2.772589 284 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +sever 1 56 2.890372 2.890372 322 +space 1 57 2.890372 2.890372 310 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +cover 1 55 2.944439 2.944439 329 +instruct 1 53 2.944439 2.944439 332 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +without 1 50 3.044522 3.044522 370 +standard 1 48 3.044522 3.044522 365 +featur 1 46 3.091042 3.091042 386 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +mechan 1 43 3.178054 3.178054 416 +combin 1 42 3.218876 3.218876 421 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +small 1 39 3.258097 3.258097 447 +map 1 39 3.258097 3.258097 452 +brian 1 38 3.295837 3.295837 466 +cost 1 37 3.332205 3.332205 480 +bibliographi 1 34 3.401197 3.401197 518 +compon 1 30 3.555348 3.555348 570 +pattern 1 24 3.761200 3.761200 689 +size 1 23 3.806662 3.806662 713 +identifi 1 22 3.850148 3.850148 760 +reduc 1 22 3.850148 3.850148 759 +chen 1 21 3.912023 3.912023 791 +avoid 1 21 3.912023 3.912023 799 +runtim 1 19 4.007333 4.007333 858 +miss 1 19 4.007333 4.007333 866 +region 1 19 4.007333 4.007333 875 +comparison 1 19 4.007333 4.007333 863 +behavior 1 18 4.060443 4.060443 881 +bershad 1 18 4.060443 4.060443 902 +modif 1 17 4.110874 4.110874 913 +monitor 1 17 4.110874 4.110874 941 +asplo 1 17 4.110874 4.110874 948 +overhead 1 15 4.248495 4.248495 1035 +qual 1 15 4.248495 4.248495 1062 +whose 1 13 4.382027 4.382027 1166 +someon 1 13 4.382027 4.382027 1128 +karlin 1 13 4.382027 4.382027 1176 +resolut 1 13 4.382027 4.382027 1172 +overal 1 12 4.465908 4.465908 1254 +promot 1 12 4.465908 4.465908 1235 +anna 1 12 4.465908 4.465908 1292 +isca 1 11 4.553877 4.553877 1354 +denni 1 11 4.553877 4.553877 1321 +alpha 1 11 4.553877 4.553877 1348 +operatingsystem 1 10 4.653960 4.653960 1401 +reli 1 10 4.653960 4.653960 1411 +significantli 1 9 4.753590 4.753590 1508 +wong 1 9 4.753590 4.753590 1609 +osdi 1 9 4.753590 4.753590 1534 +poor 1 8 4.875197 4.875197 1736 +potenti 1 8 4.875197 4.875197 1690 +wayn 1 8 4.875197 4.875197 1738 +romer 1 8 4.875197 4.875197 1706 +delai 1 7 5.010635 5.010635 1848 +larger 1 7 5.010635 5.010635 1875 +reduct 1 7 5.010635 5.010635 1877 +benefit 1 6 5.164786 5.164786 2213 +conflict 1 6 5.164786 5.164786 2041 +ohlrich 1 5 5.347108 5.347108 2564 +resolv 1 4 5.568345 5.568345 2675 +bottleneck 1 4 5.568345 5.568345 2769 +mip 1 4 5.568345 5.568345 2738 +superpag 1 3 5.857933 5.857933 3978 +peoplefaculti 1 3 5.857933 5.857933 3981 +dlee 1 3 5.857933 5.857933 3949 +waynew 1 3 5.857933 5.857933 3982 +reorder 1 3 5.857933 5.857933 3952 +fragment 1 2 6.263398 6.263398 6000 +contigu 1 2 6.263398 6.263398 6001 +warrant 1 2 6.263398 6.263398 5697 +washingtonmemori 1 1 6.957497 6.957497 16249 +researchdepart 1 1 6.957497 6.957497 16250 +sharesth 1 1 6.957497 6.957497 16251 +incur 1 1 6.957497 6.957497 16252 +monitorappl 1 1 6.957497 6.957497 16253 +resolvetlb 1 1 6.957497 6.957497 16254 +tlbi 1 1 6.957497 6.957497 16255 +severalmodern 1 1 6.957497 6.957497 16256 +amultipl 1 1 6.957497 6.957497 16257 +tlbperform 1 1 6.957497 6.957497 16258 +ofwast 1 1 6.957497 6.957497 16259 +todiffer 1 1 6.957497 6.957497 16260 +constructingsuperpag 1 1 6.957497 6.957497 16261 +ofmemori 1 1 6.957497 6.957497 16262 +balancesth 1 1 6.957497 6.957497 16263 +tlbmiss 1 1 6.957497 6.957497 16264 +memorycopi 1 1 6.957497 6.957497 16265 +misspattern 1 1 6.957497 6.957497 16266 +attain 1 1 6.957497 6.957497 16267 +largepag 1 1 6.957497 6.957497 16268 +makea 1 1 6.957497 6.957497 16269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..addf4acb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +appear 1 78 2.564949 2.564949 210 +sourc 1 77 2.564949 2.564949 201 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +juli 1 60 2.833213 2.833213 305 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +processor 1 54 2.944439 2.944439 335 +execut 1 45 3.135494 3.135494 404 +examin 1 42 3.218876 3.218876 424 +brian 1 38 3.295837 3.295837 466 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +strategi 1 25 3.737670 3.737670 682 +trace 1 25 3.737670 3.737670 677 +interpret 1 24 3.761200 3.761200 686 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +portabl 1 20 3.951244 3.951244 819 +safeti 1 20 3.951244 3.951244 817 +basi 1 20 3.951244 3.951244 828 +binari 1 20 3.951244 3.951244 823 +benchmark 1 19 4.007333 4.007333 859 +bershad 1 18 4.060443 4.060443 902 +asplo 1 17 4.110874 4.110874 948 +demand 1 14 4.317488 4.317488 1073 +levi 1 14 4.317488 4.317488 1093 +characterist 1 12 4.465908 4.465908 1257 +perl 1 11 4.553877 4.553877 1332 +baer 1 11 4.553877 4.553877 1353 +denni 1 11 4.553877 4.553877 1321 +perspect 1 10 4.653960 4.653960 1437 +jean 1 10 4.653960 4.653960 1440 +henri 1 10 4.653960 4.653960 1417 +voelker 1 9 4.753590 4.753590 1557 +wong 1 9 4.753590 4.753590 1609 +gain 1 8 4.875197 4.875197 1730 +romer 1 8 4.875197 4.875197 1706 +wayn 1 8 4.875197 4.875197 1738 +instrument 1 7 5.010635 5.010635 1954 +loup 1 6 5.164786 5.164786 2228 +geoff 1 6 5.164786 5.164786 2124 +wolman 1 6 5.164786 5.164786 2093 +eas 1 5 5.347108 5.347108 2267 +alec 1 5 5.347108 5.347108 2563 +rewrit 1 5 5.347108 5.347108 2367 +rocki 1 4 5.568345 5.568345 3048 +increasingli 1 4 5.568345 5.568345 2766 +popular 1 4 5.568345 5.568345 2802 +insight 1 4 5.568345 5.568345 3024 +etch 1 4 5.568345 5.568345 2755 +peoplefaculti 1 3 5.857933 5.857933 3981 +dlee 1 3 5.857933 5.857933 3949 +waynew 1 3 5.857933 5.857933 3982 +microbenchmark 1 2 6.263398 6.263398 5821 +mipsi 1 2 6.263398 6.263398 5882 +papersrom 1 1 6.957497 6.957497 16270 +abstractpostscriptjava 1 1 6.957497 6.957497 16271 +xjava 1 1 6.957497 6.957497 16272 +benchmarkstoolsto 1 1 6.957497 6.957497 16273 +vebeen 1 1 6.957497 6.957497 16274 +yetpublicli 1 1 6.957497 6.957497 16275 +etchhom 1 1 6.957497 6.957497 16276 +documentationproject 1 1 6.957497 6.957497 16277 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..22b8c93a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +grade 1 90 2.397895 2.397895 142 +librari 1 87 2.484907 2.484907 181 +help 1 83 2.484907 2.484907 175 +educ 1 86 2.484907 2.484907 191 +intellig 1 72 2.639057 2.639057 225 +nation 1 74 2.639057 2.639057 240 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +order 1 69 2.708050 2.708050 249 +foundat 1 62 2.772589 2.772589 286 +best 1 59 2.833213 2.833213 299 +summer 1 56 2.890372 2.890372 311 +februari 1 54 2.944439 2.944439 328 +basic 1 50 3.044522 3.044522 360 +offer 1 43 3.178054 3.178054 414 +long 1 43 3.178054 3.178054 413 +obtain 1 33 3.433987 3.433987 534 +richard 1 31 3.496508 3.496508 559 +produc 1 30 3.555348 3.555348 572 +arrai 1 27 3.637586 3.637586 627 +departmentunivers 1 24 3.761200 3.761200 711 +recognit 1 23 3.806662 3.806662 723 +director 1 22 3.850148 3.850148 767 +fund 1 21 3.912023 3.912023 805 +facil 1 20 3.951244 3.951244 814 +increas 1 20 3.951244 3.951244 829 +beauti 1 18 4.060443 4.060443 912 +chateau 1 16 4.174387 4.174387 997 +massiv 1 15 4.248495 4.248495 1026 +charact 1 15 4.248495 4.248495 1028 +optic 1 12 4.465908 4.465908 1221 +minor 1 12 4.465908 4.465908 1237 +island 1 11 4.553877 4.553877 1345 +length 1 10 4.653960 4.653960 1400 +juan 1 9 4.753590 4.753590 1580 +extract 1 8 4.875197 4.875197 1728 +roger 1 7 5.010635 5.010635 1892 +northwest 1 7 5.010635 5.010635 1973 +usaoffic 1 6 5.164786 5.164786 2159 +layout 1 6 5.164786 5.164786 2183 +educomput 1 5 5.347108 5.347108 2524 +cellular 1 5 5.347108 5.347108 2433 +camp 1 5 5.347108 5.347108 2545 +girl 1 5 5.347108 5.347108 2410 +snake 1 5 5.347108 5.347108 2281 +radio 1 4 5.568345 5.568345 3025 +bake 1 2 6.263398 6.263398 4468 +rogersrrog 1 1 6.957497 6.957497 16280 +laboratri 1 1 6.957497 6.957497 16281 +systol 1 1 6.957497 6.957497 16282 +scam 1 1 6.957497 6.957497 16278 +morpholog 1 1 6.957497 6.957497 16283 +groundtruth 1 1 6.957497 6.957497 16284 +environment 1 1 6.957497 6.957497 16285 +ncee 1 1 6.957497 6.957497 16286 +ag 1 1 6.957497 6.957497 16287 +splash 1 1 6.957497 6.957497 16279 +corn 1 1 6.957497 6.957497 16288 +jessica 1 1 6.957497 6.957497 16289 +squishi 1 1 6.957497 6.957497 16290 +kuow 1 1 6.957497 6.957497 16291 +stationi 1 1 6.957497 6.957497 16292 +pecan 1 1 6.957497 6.957497 16293 +seattlelast 1 1 6.957497 6.957497 16294 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..bd2cc883 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +sieg 1 69 2.708050 2.708050 260 +friend 1 48 3.044522 3.044522 376 +mike 1 24 3.761200 3.761200 703 +chateau 1 16 4.174387 4.174387 997 +usaoffic 1 6 5.164786 5.164786 2159 +salisburysalisbur 1 1 6.957497 6.957497 16295 +lifehistori 1 1 6.957497 6.957497 16296 +vitacool 1 1 6.957497 6.957497 16297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..6d09d6df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +specif 1 106 2.197225 2.197225 106 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +need 1 98 2.302585 2.302585 135 +technic 1 100 2.302585 2.302585 140 +user 1 104 2.302585 2.302585 137 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +control 1 82 2.484907 2.484907 164 +thing 1 84 2.484907 2.484907 189 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +integr 1 67 2.708050 2.708050 245 +multimedia 1 68 2.708050 2.708050 258 +abstract 1 62 2.772589 2.772589 276 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +best 1 59 2.833213 2.833213 299 +direct 1 57 2.890372 2.890372 316 +sampl 1 53 2.944439 2.944439 339 +talk 1 53 2.944439 2.944439 336 +extens 1 53 2.944439 2.944439 340 +processor 1 54 2.944439 2.944439 335 +right 1 48 3.044522 3.044522 363 +principl 1 48 3.044522 3.044522 357 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +third 1 43 3.178054 3.178054 412 +review 1 42 3.218876 3.218876 425 +music 1 42 3.218876 3.218876 436 +industri 1 38 3.295837 3.295837 464 +brian 1 38 3.295837 3.295837 466 +slide 1 38 3.295837 3.295837 467 +workstat 1 37 3.332205 3.332205 479 +winter 1 36 3.367296 3.367296 500 +post 1 35 3.401197 3.401197 505 +tech 1 35 3.401197 3.401197 515 +independ 1 32 3.465736 3.465736 548 +platform 1 29 3.583519 3.583519 591 +american 1 27 3.637586 3.637586 634 +quit 1 27 3.637586 3.637586 633 +arrai 1 27 3.637586 3.637586 627 +disk 1 22 3.850148 3.850148 747 +similar 1 21 3.912023 3.912023 771 +safeti 1 20 3.951244 3.951244 817 +longer 1 20 3.951244 3.951244 816 +reserv 1 20 3.951244 3.951244 808 +histori 1 19 4.007333 4.007333 853 +boston 1 19 4.007333 4.007333 862 +bershad 1 18 4.060443 4.060443 902 +protect 1 17 4.110874 4.110874 935 +modern 1 16 4.174387 4.174387 966 +match 1 16 4.174387 4.174387 965 +diego 1 16 4.174387 4.174387 992 +fourth 1 16 4.174387 4.174387 999 +rank 1 14 4.317488 4.317488 1063 +spin 1 14 4.317488 4.317488 1121 +mellon 1 13 4.382027 4.382027 1179 +rest 1 12 4.465908 4.465908 1259 +usenix 1 12 4.465908 4.465908 1240 +carnegi 1 12 4.465908 4.465908 1260 +statement 1 11 4.553877 4.553877 1313 +island 1 11 4.553877 4.553877 1345 +rich 1 10 4.653960 4.653960 1396 +sosp 1 10 4.653960 4.653960 1416 +mountain 1 10 4.653960 4.653960 1456 +santa 1 10 4.653960 4.653960 1441 +inter 1 9 4.753590 4.753590 1530 +modula 1 9 4.753590 4.753590 1613 +osdi 1 9 4.753590 4.753590 1534 +ride 1 8 4.875197 4.875197 1741 +isol 1 8 4.875197 4.875197 1663 +sigop 1 8 4.875197 4.875197 1727 +european 1 8 4.875197 4.875197 1763 +capac 1 8 4.875197 4.875197 1740 +mach 1 8 4.875197 4.875197 1669 +cultur 1 7 5.010635 5.010635 1951 +migrat 1 7 5.010635 5.010635 1851 +centuri 1 7 5.010635 5.010635 1935 +microprocessor 1 7 5.010635 5.010635 1808 +trend 1 7 5.010635 5.010635 1842 +fifth 1 7 5.010635 5.010635 1931 +sixth 1 7 5.010635 5.010635 1917 +band 1 6 5.164786 5.164786 2198 +unpublish 1 6 5.164786 5.164786 2226 +usag 1 6 5.164786 5.164786 2209 +favor 1 5 5.347108 5.347108 2414 +panel 1 5 5.347108 5.347108 2463 +savag 1 4 5.568345 5.568345 2777 +gradual 1 4 5.568345 5.568345 2997 +witha 1 4 5.568345 5.568345 2617 +firm 1 4 5.568345 5.568345 2684 +peer 1 4 5.568345 5.568345 2742 +andimplement 1 4 5.568345 5.568345 3029 +microkernel 1 4 5.568345 5.568345 3047 +afraid 1 4 5.568345 5.568345 3053 +redund 1 4 5.568345 5.568345 2839 +stefan 1 3 5.857933 5.857933 3921 +caught 1 3 5.857933 5.857933 3465 +irrelev 1 3 5.857933 5.857933 3823 +inconveni 1 3 5.857933 5.857933 3866 +distract 1 3 5.857933 5.857933 3945 +stillmaintain 1 3 5.857933 5.857933 3964 +copper 1 3 5.857933 5.857933 3536 +wcsss 1 3 5.857933 5.857933 3956 +hoto 1 3 5.857933 5.857933 3577 +orca 1 3 5.857933 5.857933 3578 +fool 1 2 6.263398 6.263398 5353 +ofappl 1 2 6.263398 6.263398 6002 +whichsupport 1 2 6.263398 6.263398 6003 +tucson 1 2 6.263398 6.263398 5883 +monterei 1 2 6.263398 6.263398 4362 +wwo 1 2 6.263398 6.263398 5812 +export 1 2 6.263398 6.263398 5689 +pittsburghfor 1 1 6.957497 6.957497 16298 +mnow 1 1 6.957497 6.957497 16299 +strongbackground 1 1 6.957497 6.957497 16300 +trash 1 1 6.957497 6.957497 16301 +tocqeuvil 1 1 6.957497 6.957497 16302 +tiresom 1 1 6.957497 6.957497 16303 +exercisepolit 1 1 6.957497 6.957497 16304 +tocurr 1 1 6.957497 6.957497 16305 +merri 1 1 6.957497 6.957497 16306 +onan 1 1 6.957497 6.957497 16307 +projectsspinspin 1 1 6.957497 6.957497 16308 +omnifemtokernel 1 1 6.957497 6.957497 16309 +writingspin 1 1 6.957497 6.957497 16310 +napa 1 1 6.957497 6.957497 16311 +timer 1 1 6.957497 6.957497 16312 +hikingthi 1 1 6.957497 6.957497 16313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..4471adfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +david 1 71 2.639057 2.639057 232 +juli 1 60 2.833213 2.833213 305 +revis 1 26 3.688879 3.688879 640 +sean 1 8 4.875197 4.875197 1705 +sandi 1 4 5.568345 5.568345 2765 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..713fc967 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +seattl 1 120 2.079442 2.079442 103 +postscript 1 131 2.079442 2.079442 90 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +overview 1 56 2.890372 2.890372 323 +better 1 45 3.135494 3.135494 401 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +richard 1 31 3.496508 3.496508 559 +famili 1 23 3.806662 3.806662 735 +half 1 21 3.912023 3.912023 776 +washingtonbox 1 13 4.382027 4.382027 1200 +ski 1 10 4.653960 4.653960 1471 +softbal 1 9 4.753590 4.753590 1594 +softbot 1 7 5.010635 5.010635 1974 +amus 1 5 5.347108 5.347108 2366 +racquetbal 1 4 5.568345 5.568345 3052 +biographi 1 3 5.857933 5.857933 3658 +brute 1 2 6.263398 6.263398 5892 +bicycl 1 2 6.263398 6.263398 5950 +segal 1 1 6.957497 6.957497 16314 +segaldepart 1 1 6.957497 6.957497 16315 +archeri 1 1 6.957497 6.957497 16316 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..f66c8769 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +pictur 1 89 2.397895 2.397895 160 +activ 1 84 2.484907 2.484907 182 +school 1 84 2.484907 2.484907 188 +solut 1 82 2.484907 2.484907 162 +contain 1 81 2.484907 2.484907 174 +complet 1 77 2.564949 2.564949 208 +come 1 78 2.564949 2.564949 202 +sourc 1 77 2.564949 2.564949 201 +resum 1 79 2.564949 2.564949 217 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +finger 1 52 2.995732 2.995732 354 +date 1 51 2.995732 2.995732 344 +hardwar 1 51 2.995732 2.995732 350 +without 1 50 3.044522 3.044522 370 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +even 1 45 3.135494 3.135494 393 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +linear 1 41 3.218876 3.218876 431 +field 1 37 3.332205 3.332205 482 +expect 1 37 3.332205 3.332205 484 +print 1 34 3.401197 3.401197 503 +return 1 34 3.401197 3.401197 502 +weather 1 28 3.610918 3.610918 618 +bookmark 1 26 3.688879 3.688879 639 +toward 1 25 3.737670 3.737670 668 +sometim 1 24 3.761200 3.761200 696 +honor 1 23 3.806662 3.806662 729 +sort 1 22 3.850148 3.850148 738 +born 1 21 3.912023 3.912023 798 +unit 1 21 3.912023 3.912023 779 +north 1 19 4.007333 4.007333 873 +comparison 1 19 4.007333 4.007333 863 +els 1 19 4.007333 4.007333 843 +germani 1 17 4.110874 4.110874 946 +bachelor 1 17 4.110874 4.110874 957 +qual 1 15 4.248495 4.248495 1062 +squar 1 14 4.317488 4.317488 1082 +warn 1 14 4.317488 4.317488 1068 +excit 1 11 4.553877 4.553877 1329 +moment 1 11 4.553877 4.553877 1379 +didn 1 9 4.753590 4.753590 1563 +exact 1 9 4.753590 4.753590 1509 +reduct 1 7 5.010635 5.010635 1877 +indiana 1 6 5.164786 5.164786 2057 +trail 1 6 5.164786 5.164786 2071 +byte 1 6 5.164786 5.164786 2108 +fewer 1 6 5.164786 5.164786 2074 +fals 1 4 5.568345 5.568345 2861 +raft 1 4 5.568345 5.568345 3060 +crazi 1 4 5.568345 5.568345 2822 +shouldn 1 4 5.568345 5.568345 2606 +stefan 1 3 5.857933 5.857933 3921 +berg 1 2 6.263398 6.263398 4970 +reif 1 2 6.263398 6.263398 5015 +diploma 1 2 6.263398 6.263398 5990 +bloomington 1 2 6.263398 6.263398 5034 +itin 1 2 6.263398 6.263398 5992 +char 1 2 6.263398 6.263398 4716 +bergstefan 1 1 6.957497 6.957497 16320 +sgberg 1 1 6.957497 6.957497 16321 +cologn 1 1 6.957497 6.957497 16318 +mittler 1 1 6.957497 6.957497 16322 +thgrade 1 1 6.957497 6.957497 16323 +schillergymnasium 1 1 6.957497 6.957497 16324 +statesto 1 1 6.957497 6.957497 16325 +distinctionin 1 1 6.957497 6.957497 16326 +fromindiana 1 1 6.957497 6.957497 16327 +momenth 1 1 6.957497 6.957497 16328 +thiscenturi 1 1 6.957497 6.957497 16329 +yourselfsometh 1 1 6.957497 6.957497 16330 +particularsolut 1 1 6.957497 6.957497 16331 +sall 1 1 6.957497 6.957497 16332 +carriag 1 1 6.957497 6.957497 16333 +putchar 1 1 6.957497 6.957497 16319 +printf 1 1 6.957497 6.957497 16317 +inpostscript 1 1 6.957497 6.957497 16334 +andtex 1 1 6.957497 6.957497 16335 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..b10a77ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +follow 1 92 2.397895 2.397895 143 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +thing 1 84 2.484907 2.484907 189 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +differ 1 66 2.708050 2.708050 253 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +laboratori 1 63 2.772589 2.772589 292 +plan 1 65 2.772589 2.772589 272 +browser 1 56 2.890372 2.890372 313 +get 1 46 3.091042 3.091042 380 +done 1 47 3.091042 3.091042 381 +third 1 43 3.178054 3.178054 412 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +anim 1 31 3.496508 3.496508 557 +travel 1 30 3.555348 3.555348 579 +daili 1 24 3.761200 3.761200 706 +grad 1 20 3.951244 3.951244 837 +left 1 19 4.007333 4.007333 851 +lower 1 18 4.060443 4.060443 886 +lot 1 18 4.060443 4.060443 889 +scene 1 14 4.317488 4.317488 1114 +island 1 11 4.553877 4.553877 1345 +siggraph 1 8 4.875197 4.875197 1773 +corner 1 7 5.010635 5.010635 1909 +pixel 1 4 5.568345 5.568345 2831 +shortcut 1 3 5.857933 5.857933 3932 +ward 1 2 6.263398 6.263398 4506 +hereat 1 2 6.263398 6.263398 5048 +shadegreet 1 1 6.957497 6.957497 16336 +salut 1 1 6.957497 6.957497 16337 +dubcs 1 1 6.957497 6.957497 16338 +renderingof 1 1 6.957497 6.957497 16339 +walkthruproject 1 1 6.957497 6.957497 16340 +amonglot 1 1 6.957497 6.957497 16341 +paperdescrib 1 1 6.957497 6.957497 16342 +thepictur 1 1 6.957497 6.957497 16343 +aspectsof 1 1 6.957497 6.957497 16344 +thegraph 1 1 6.957497 6.957497 16345 +scrunch 1 1 6.957497 6.957497 16346 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..a2529b79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +summari 1 73 2.639057 2.639057 237 +prof 1 64 2.772589 2.772589 273 +januari 1 62 2.772589 2.772589 264 +pointer 1 48 3.044522 3.044522 368 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +request 1 26 3.688879 3.688879 635 +ofwashington 1 22 3.850148 3.850148 766 +upon 1 16 4.174387 4.174387 978 +andengin 1 4 5.568345 5.568345 3042 +shun 1 2 6.263398 6.263398 4533 +leung 1 2 6.263398 6.263398 4534 +johnzahorjan 1 2 6.263398 6.263398 6004 +leungshun 1 1 6.957497 6.957497 16347 +shuntak 1 1 6.957497 6.957497 16348 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..ea6b8bf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +relat 1 139 1.945910 1.945910 68 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +info 1 85 2.484907 2.484907 176 +interfac 1 79 2.564949 2.564949 209 +name 1 72 2.639057 2.639057 220 +involv 1 71 2.639057 2.639057 227 +degre 1 69 2.708050 2.708050 259 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +origin 1 38 3.295837 3.295837 472 +mean 1 37 3.332205 3.332205 477 +dissert 1 32 3.465736 3.465736 549 +quit 1 27 3.637586 3.637586 633 +notic 1 25 3.737670 3.737670 675 +departmentunivers 1 24 3.761200 3.761200 711 +ofwashington 1 22 3.850148 3.850148 766 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +less 1 18 4.060443 4.060443 892 +universityof 1 15 4.248495 4.248495 1061 +anywai 1 15 4.248495 4.248495 1047 +neat 1 12 4.465908 4.465908 1263 +virginia 1 8 4.875197 4.875197 1659 +studentcomput 1 7 5.010635 5.010635 1963 +foreign 1 7 5.010635 5.010635 1919 +smaller 1 7 5.010635 5.010635 1874 +hunt 1 7 5.010635 5.010635 1798 +bug 1 7 5.010635 5.010635 1801 +slate 1 6 5.164786 5.164786 2021 +sciencedepart 1 6 5.164786 5.164786 2172 +haven 1 4 5.568345 5.568345 3037 +asian 1 3 5.857933 5.857933 3598 +heavili 1 3 5.857933 5.857933 3572 +groupand 1 3 5.857933 5.857933 3873 +shuichi 1 2 6.263398 6.263398 4498 +myqual 1 2 6.263398 6.263398 6005 +degreein 1 2 6.263398 6.263398 5116 +koga 1 1 6.957497 6.957497 16349 +skoga 1 1 6.957497 6.957497 16350 +bynow 1 1 6.957497 6.957497 16351 +alsoheavili 1 1 6.957497 6.957497 16352 +andgovern 1 1 6.957497 6.957497 16353 +alic 1 1 6.957497 6.957497 16354 +destroi 1 1 6.957497 6.957497 16355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..8640c668 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +schedul 1 119 2.079442 2.079442 85 +seattl 1 120 2.079442 2.079442 103 +site 1 106 2.197225 2.197225 119 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +dept 1 64 2.772589 2.772589 291 +previou 1 62 2.772589 2.772589 290 +experi 1 64 2.772589 2.772589 283 +profession 1 51 2.995732 2.995732 345 +littl 1 39 3.258097 3.258097 454 +photo 1 31 3.496508 3.496508 561 +busi 1 21 3.912023 3.912023 784 +vlsi 1 21 3.912023 3.912023 795 +weekli 1 17 4.110874 4.110874 919 +circuit 1 13 4.382027 4.382027 1131 +galleri 1 13 4.382027 4.382027 1192 +chao 1 8 4.875197 4.875197 1753 +patent 1 5 5.347108 5.347108 2574 +soha 1 2 6.263398 6.263398 6006 +hassoun 1 2 6.263398 6.263398 6007 +retim 1 2 6.263398 6.263398 6008 +hassounit 1 1 6.957497 6.957497 16356 +whoturn 1 1 6.957497 6.957497 16357 +onarchitectur 1 1 6.957497 6.957497 16358 +carlebel 1 1 6.957497 6.957497 16359 +deede 1 1 6.957497 6.957497 16360 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..cdc847df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +contact 1 153 1.791759 1.791759 59 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +hall 1 146 1.945910 1.945910 65 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +pleas 1 113 2.197225 2.197225 114 +person 1 111 2.197225 2.197225 117 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +control 1 82 2.484907 2.484907 164 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +west 1 83 2.484907 2.484907 192 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +sieg 1 69 2.708050 2.708050 260 +evalu 1 64 2.772589 2.772589 266 +particular 1 51 2.995732 2.995732 352 +right 1 48 3.044522 3.044522 363 +quarter 1 47 3.091042 3.091042 389 +done 1 47 3.091042 3.091042 381 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +long 1 43 3.178054 3.178054 413 +music 1 42 3.218876 3.218876 436 +seminar 1 38 3.295837 3.295837 470 +procedur 1 36 3.367296 3.367296 488 +random 1 34 3.401197 3.401197 511 +bookmark 1 26 3.688879 3.688879 639 +sport 1 25 3.737670 3.737670 683 +sort 1 22 3.850148 3.850148 738 +fund 1 21 3.912023 3.912023 805 +corpor 1 21 3.912023 3.912023 802 +tenni 1 20 3.951244 3.951244 838 +chateau 1 16 4.174387 4.174387 997 +cognit 1 16 4.174387 4.174387 986 +action 1 15 4.248495 4.248495 1038 +track 1 15 4.248495 4.248495 1029 +psycholog 1 15 4.248495 4.248495 1054 +convent 1 14 4.317488 4.317488 1072 +philosophi 1 13 4.382027 4.382027 1167 +emploi 1 12 4.465908 4.465908 1284 +remov 1 12 4.465908 4.465908 1225 +danc 1 12 4.465908 4.465908 1278 +food 1 12 4.465908 4.465908 1285 +island 1 11 4.553877 4.553877 1345 +peter 1 11 4.553877 4.553877 1316 +french 1 9 4.753590 4.753590 1511 +volleybal 1 9 4.753590 4.753590 1598 +guggenheim 1 8 4.875197 4.875197 1759 +simon 1 8 4.875197 4.875197 1697 +soccer 1 8 4.875197 4.875197 1752 +coast 1 8 4.875197 4.875197 1746 +softbot 1 7 5.010635 5.010635 1974 +squash 1 6 5.164786 5.164786 2223 +rock 1 6 5.164786 5.164786 2164 +annex 1 5 5.347108 5.347108 2572 +sail 1 5 5.347108 5.347108 2571 +east 1 5 5.347108 5.347108 2472 +phil 1 5 5.347108 5.347108 2419 +oracl 1 4 5.568345 5.568345 2823 +swing 1 4 5.568345 5.568345 2887 +floyd 1 4 5.568345 5.568345 2682 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +ballroom 1 3 5.857933 5.857933 3983 +spud 1 2 6.263398 6.263398 6009 +chicken 1 2 6.263398 6.263398 5851 +tango 1 2 6.263398 6.263398 6010 +salsa 1 2 6.263398 6.263398 5984 +strait 1 2 6.263398 6.263398 5980 +genesi 1 2 6.263398 6.263398 6011 +gabriel 1 2 6.263398 6.263398 5029 +simultaneousmultithread 1 1 6.957497 6.957497 16361 +tomultithread 1 1 6.957497 6.957497 16362 +controlsystem 1 1 6.957497 6.957497 16363 +patio 1 1 6.957497 6.957497 16364 +workspac 1 1 6.957497 6.957497 16365 +stottler 1 1 6.957497 6.957497 16366 +henk 1 1 6.957497 6.957497 16367 +oondhiu 1 1 6.957497 6.957497 16368 +mango 1 1 6.957497 6.957497 16369 +phad 1 1 6.957497 6.957497 16370 +thai 1 1 6.957497 6.957497 16371 +kung 1 1 6.957497 6.957497 16372 +beverag 1 1 6.957497 6.957497 16373 +screwdriv 1 1 6.957497 6.957497 16374 +scotch 1 1 6.957497 6.957497 16375 +ic 1 1 6.957497 6.957497 16376 +dire 1 1 6.957497 6.957497 16377 +pink 1 1 6.957497 6.957497 16378 +collin 1 1 6.957497 6.957497 16379 +petti 1 1 6.957497 6.957497 16380 +sparekh 1 1 6.957497 6.957497 16381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..e317f023 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +address 1 170 1.791759 1.791759 62 +machin 1 129 2.079442 2.079442 95 +peopl 1 96 2.302585 2.302585 132 +academ 1 82 2.484907 2.484907 178 +name 1 72 2.639057 2.639057 220 +improv 1 62 2.772589 2.772589 289 +colleg 1 61 2.833213 2.833213 300 +friend 1 48 3.044522 3.044522 376 +compani 1 41 3.218876 3.218876 423 +live 1 40 3.258097 3.258097 451 +power 1 30 3.555348 3.555348 573 +sport 1 25 3.737670 3.737670 683 +famili 1 23 3.806662 3.806662 735 +almost 1 22 3.850148 3.850148 742 +hous 1 21 3.912023 3.912023 801 +speed 1 18 4.060443 4.060443 911 +ultim 1 17 4.110874 4.110874 943 +drive 1 15 4.248495 4.248495 1052 +comic 1 14 4.317488 4.317488 1103 +food 1 12 4.465908 4.465908 1285 +mari 1 12 4.465908 4.465908 1266 +lake 1 11 4.553877 4.553877 1373 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +ski 1 10 4.653960 4.653960 1471 +drink 1 9 4.753590 4.753590 1607 +softbal 1 9 4.753590 4.753590 1594 +utah 1 9 4.753590 4.753590 1585 +erik 1 8 4.875197 4.875197 1701 +lewi 1 8 4.875197 4.875197 1700 +star 1 8 4.875197 4.875197 1717 +brain 1 8 4.875197 4.875197 1638 +babylon 1 8 4.875197 4.875197 1731 +cultur 1 7 5.010635 5.010635 1951 +fish 1 6 5.164786 5.164786 2207 +selberg 1 5 5.347108 5.347108 2441 +salt 1 5 5.347108 5.347108 2413 +bean 1 4 5.568345 5.568345 2968 +lara 1 3 5.857933 5.857933 3914 +disc 1 2 6.263398 6.263398 5626 +spud 1 2 6.263398 6.263398 6009 +raquetbal 1 2 6.263398 6.263398 6012 +pepper 1 2 6.263398 6.263398 6013 +war 1 2 6.263398 6.263398 5969 +toon 1 2 6.263398 6.263398 4120 +fishcam 1 1 6.957497 6.957497 16382 +memorialhappi 1 1 6.957497 6.957497 16383 +kay 1 1 6.957497 6.957497 16384 +pasti 1 1 6.957497 6.957497 16385 +ur 1 1 6.957497 6.957497 16386 +pro 1 1 6.957497 6.957497 16387 +wedgwood 1 1 6.957497 6.957497 16388 +diet 1 1 6.957497 6.957497 16389 +roast 1 1 6.957497 6.957497 16390 +bagel 1 1 6.957497 6.957497 16391 +racer 1 1 6.957497 6.957497 16392 +tini 1 1 6.957497 6.957497 16393 +pinki 1 1 6.957497 6.957497 16394 +phantom 1 1 6.957497 6.957497 16395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..39ab489b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +seattl 1 120 2.079442 2.079442 103 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +result 1 65 2.772589 2.772589 281 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +processor 1 54 2.944439 2.944439 335 +visual 1 48 3.044522 3.044522 372 +quarter 1 47 3.091042 3.091042 389 +anoth 1 45 3.135494 3.135494 408 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +movi 1 40 3.258097 3.258097 459 +must 1 40 3.258097 3.258097 442 +game 1 36 3.367296 3.367296 498 +independ 1 32 3.465736 3.465736 548 +quit 1 27 3.637586 3.637586 633 +enjoi 1 26 3.688879 3.688879 660 +primari 1 25 3.737670 3.737670 669 +cooper 1 22 3.850148 3.850148 757 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +watch 1 21 3.912023 3.912023 789 +divis 1 21 3.912023 3.912023 803 +mostli 1 19 4.007333 4.007333 869 +exercis 1 19 4.007333 4.007333 842 +left 1 19 4.007333 4.007333 851 +listen 1 18 4.060443 4.060443 907 +front 1 13 4.382027 4.382027 1154 +came 1 13 4.382027 4.382027 1197 +unfortun 1 13 4.382027 4.382027 1170 +weight 1 12 4.465908 4.465908 1204 +hang 1 9 4.753590 4.753590 1499 +drink 1 9 4.753590 4.753590 1607 +swim 1 9 4.753590 4.753590 1599 +router 1 8 4.875197 4.875197 1772 +soccer 1 8 4.875197 4.875197 1752 +sung 1 6 5.164786 5.164786 2075 +superscalar 1 6 5.164786 5.164786 2082 +seen 1 6 5.164786 5.164786 2202 +yeah 1 6 5.164786 5.164786 2195 +ta 1 4 5.568345 5.568345 3058 +choi 1 4 5.568345 5.568345 2732 +astronomi 1 3 5.857933 5.857933 3974 +comfort 1 3 5.857933 5.857933 3136 +wine 1 3 5.857933 5.857933 3895 +knee 1 3 5.857933 5.857933 3980 +choiwelcom 1 2 6.263398 6.263398 5727 +ironman 1 2 6.263398 6.263398 4226 +vegetarian 1 2 6.263398 6.263398 5902 +season 1 2 6.263398 6.263398 4872 +thehomepag 1 1 6.957497 6.957497 16397 +ofsung 1 1 6.957497 6.957497 16398 +eunchoi 1 1 6.957497 6.957497 16399 +myschoollifemi 1 1 6.957497 6.957497 16400 +zplcompil 1 1 6.957497 6.957497 16401 +beenspend 1 1 6.957497 6.957497 16402 +communicationgener 1 1 6.957497 6.957497 16403 +architechtur 1 1 6.957497 6.957497 16404 +communicationlibrari 1 1 6.957497 6.957497 16405 +programson 1 1 6.957497 6.957497 16406 +nodeperform 1 1 6.957497 6.957497 16407 +alsobeen 1 1 6.957497 6.957497 16408 +chaosrout 1 1 6.957497 6.957497 16409 +thatexperi 1 1 6.957497 6.957497 16410 +inzpl 1 1 6.957497 6.957497 16411 +myjunior 1 1 6.957497 6.957497 16412 +dinner 1 1 6.957497 6.957497 16413 +samewithout 1 1 6.957497 6.957497 16414 +twosoccerteam 1 1 6.957497 6.957497 16415 +cousin 1 1 6.957497 6.957497 16416 +scrub 1 1 6.957497 6.957497 16396 +recdivis 1 1 6.957497 6.957497 16417 +andcoop 1 1 6.957497 6.957497 16418 +sacrifiedmi 1 1 6.957497 6.957497 16419 +usualstep 1 1 6.957497 6.957497 16420 +aerobicsclass 1 1 6.957497 6.957497 16421 +trainingclass 1 1 6.957497 6.957497 16422 +abit 1 1 6.957497 6.957497 16423 +shakespear 1 1 6.957497 6.957497 16424 +publictelevis 1 1 6.957497 6.957497 16425 +classicalmus 1 1 6.957497 6.957497 16426 +myotherlif 1 1 6.957497 6.957497 16427 +sungeun 1 1 6.957497 6.957497 16428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..8dcec79b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +real 1 93 2.397895 2.397895 144 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +complet 1 77 2.564949 2.564949 208 +advisor 1 51 2.995732 2.995732 355 +run 1 51 2.995732 2.995732 347 +visual 1 48 3.044522 3.044522 372 +execut 1 45 3.135494 3.135494 404 +workstat 1 37 3.332205 3.332205 479 +secur 1 30 3.555348 3.555348 577 +measur 1 28 3.610918 3.610918 609 +decis 1 23 3.806662 3.806662 728 +runtim 1 19 4.007333 4.007333 858 +partial 1 18 4.060443 4.060443 900 +engineeringunivers 1 17 4.110874 4.110874 959 +washingtonbox 1 13 4.382027 4.382027 1200 +characterist 1 12 4.465908 4.465908 1257 +multiprogram 1 6 5.164786 5.164786 2010 +nguyen 1 3 5.857933 5.857933 3290 +andparallel 1 2 6.263398 6.263398 6014 +johnzahorjan 1 2 6.263398 6.263398 6004 +soft 1 2 6.263398 6.263398 5072 +idl 1 2 6.263398 6.263398 4256 +ofappl 1 2 6.263398 6.263398 6002 +tominim 1 2 6.263398 6.263398 5436 +frommi 1 1 6.957497 6.957497 16430 +timeappl 1 1 6.957497 6.957497 16431 +innow 1 1 6.957497 6.957497 16432 +uniprogram 1 1 6.957497 6.957497 16433 +multiprocessorsenviron 1 1 6.957497 6.957497 16429 +goodglob 1 1 6.957497 6.957497 16434 +cvpublic 1 1 6.957497 6.957497 16435 +worldvietnameseresourc 1 1 6.957497 6.957497 16436 +netcyclingplayground 1 1 6.957497 6.957497 16437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..8a44ea65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +click 1 142 1.945910 1.945910 78 +part 1 98 2.302585 2.302585 129 +homepag 1 93 2.397895 2.397895 148 +html 1 75 2.639057 2.639057 235 +browser 1 56 2.890372 2.890372 313 +standard 1 48 3.044522 3.044522 365 +even 1 45 3.135494 3.135494 393 +frame 1 24 3.761200 3.761200 684 +yellow 1 9 4.753590 4.753590 1601 +turkei 1 4 5.568345 5.568345 2914 +tian 1 3 5.857933 5.857933 3680 +homepageyour 1 1 6.957497 6.957497 16438 +rusti 1 1 6.957497 6.957497 16439 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..d8ced596 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +follow 1 92 2.397895 2.397895 143 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +involv 1 71 2.639057 2.639057 227 +organ 1 65 2.772589 2.772589 265 +creat 1 63 2.772589 2.772589 277 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +evalu 1 64 2.772589 2.772589 266 +juli 1 60 2.833213 2.833213 305 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +advisor 1 51 2.995732 2.995732 355 +profession 1 51 2.995732 2.995732 345 +past 1 42 3.218876 3.218876 428 +multipl 1 39 3.258097 3.258097 453 +close 1 38 3.295837 3.295837 465 +singl 1 34 3.401197 3.401197 510 +global 1 34 3.401197 3.401197 520 +full 1 28 3.610918 3.610918 615 +measur 1 28 3.610918 3.610918 609 +except 1 28 3.610918 3.610918 607 +proc 1 26 3.688879 3.688879 649 +jeff 1 25 3.737670 3.737670 673 +primari 1 25 3.737670 3.737670 669 +handl 1 24 3.761200 3.761200 685 +basi 1 20 3.951244 3.951244 828 +mostli 1 19 4.007333 4.007333 869 +benchmark 1 19 4.007333 4.007333 859 +behavior 1 18 4.060443 4.060443 881 +fourth 1 16 4.174387 4.174387 999 +across 1 16 4.174387 4.174387 974 +levi 1 14 4.317488 4.317488 1093 +opportun 1 13 4.382027 4.382027 1161 +workload 1 12 4.465908 4.465908 1210 +infrastructur 1 12 4.465908 4.465908 1234 +hank 1 12 4.465908 4.465908 1253 +career 1 12 4.465908 4.465908 1287 +persist 1 11 4.553877 4.553877 1367 +architect 1 8 4.875197 4.875197 1624 +character 1 8 4.875197 4.875197 1767 +oop 1 8 4.875197 4.875197 1778 +spare 1 6 5.164786 5.164786 2177 +oopsla 1 6 5.164786 5.164786 2221 +tiwari 1 5 5.347108 5.347108 2385 +opal 1 4 5.568345 5.568345 3057 +chase 1 4 5.568345 5.568345 2897 +narasayya 1 4 5.568345 5.568345 3065 +boe 1 3 5.857933 5.857933 3318 +addendum 1 3 5.857933 5.857933 3150 +ashutosh 1 2 6.263398 6.263398 5966 +projectsopali 1 1 6.957497 6.957497 16440 +thisexperi 1 1 6.957497 6.957497 16441 +distrbut 1 1 6.957497 6.957497 16442 +ecoop 1 1 6.957497 6.957497 16443 +bosch 1 1 6.957497 6.957497 16444 +messeng 1 1 6.957497 6.957497 16445 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..50584bf2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +onlin 1 75 2.639057 2.639057 223 +java 1 70 2.708050 2.708050 248 +share 1 59 2.833213 2.833213 304 +plai 1 60 2.833213 2.833213 307 +simpl 1 60 2.833213 2.833213 298 +found 1 53 2.944439 2.944439 337 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +anoth 1 45 3.135494 3.135494 408 +game 1 36 3.367296 3.367296 498 +copyright 1 36 3.367296 3.367296 495 +platform 1 29 3.583519 3.583519 591 +great 1 27 3.637586 3.637586 626 +linux 1 27 3.637586 3.637586 631 +known 1 24 3.761200 3.761200 702 +honor 1 23 3.806662 3.806662 729 +sort 1 22 3.850148 3.850148 738 +applet 1 20 3.951244 3.951244 827 +histori 1 19 4.007333 4.007333 853 +seek 1 17 4.110874 4.110874 954 +attempt 1 17 4.110874 4.110874 917 +qual 1 15 4.248495 4.248495 1062 +pretti 1 13 4.382027 4.382027 1191 +employ 1 12 4.465908 4.465908 1291 +gain 1 8 4.875197 4.875197 1730 +dead 1 7 5.010635 5.010635 1840 +apart 1 7 5.010635 5.010635 1936 +appar 1 7 5.010635 5.010635 1958 +bookstor 1 7 5.010635 5.010635 1837 +myresum 1 6 5.164786 5.164786 2199 +cat 1 6 5.164786 5.164786 2194 +commit 1 6 5.164786 5.164786 2233 +curiou 1 5 5.347108 5.347108 2541 +superhighwai 1 4 5.568345 5.568345 2943 +scotland 1 4 5.568345 5.568345 3049 +fulfil 1 4 5.568345 5.568345 2932 +breadth 1 4 5.568345 5.568345 2695 +gambit 1 3 5.857933 5.857933 3227 +tessa 1 2 6.263398 6.263398 4507 +yeargradu 1 2 6.263398 6.263398 6015 +maze 1 2 6.263398 6.263398 4843 +knit 1 2 6.263398 6.263398 4906 +relatedgoodi 1 1 6.957497 6.957497 16446 +clio 1 1 6.957497 6.957497 16447 +andbrows 1 1 6.957497 6.957497 16448 +kittyi 1 1 6.957497 6.957497 16449 +siames 1 1 6.957497 6.957497 16450 +memor 1 1 6.957497 6.957497 16451 +therear 1 1 6.957497 6.957497 16452 +tofind 1 1 6.957497 6.957497 16453 +alsor 1 1 6.957497 6.957497 16454 +classesi 1 1 6.957497 6.957497 16455 +ofeight 1 1 6.957497 6.957497 16456 +seminarlinux 1 1 6.957497 6.957497 16457 +gameseverybodi 1 1 6.957497 6.957497 16458 +gametom 1 1 6.957497 6.957497 16459 +coolgam 1 1 6.957497 6.957497 16460 +sleepingi 1 1 6.957497 6.957497 16461 +crochet 1 1 6.957497 6.957497 16462 +tlau 1 1 6.957497 6.957497 16463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..87751e79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +build 1 85 2.484907 2.484907 184 +educ 1 86 2.484907 2.484907 191 +resourc 1 81 2.484907 2.484907 172 +finger 1 52 2.995732 2.995732 354 +electron 1 47 3.091042 3.091042 379 +articl 1 33 3.433987 3.433987 530 +collabor 1 32 3.465736 3.465736 543 +photo 1 31 3.496508 3.496508 561 +martin 1 21 3.912023 3.912023 794 +among 1 21 3.912023 3.912023 781 +across 1 16 4.174387 4.174387 974 +photograph 1 15 4.248495 4.248495 1056 +holidai 1 12 4.465908 4.465908 1224 +lane 1 8 4.875197 4.875197 1720 +courtesi 1 7 5.010635 5.010635 1953 +moon 1 4 5.568345 5.568345 2991 +pierc 1 4 5.568345 5.568345 2623 +tompa 1 3 5.857933 5.857933 3305 +health 1 3 5.857933 5.857933 3787 +trajectori 1 2 6.263398 6.263398 4260 +pearl 1 2 6.263398 6.263398 4485 +wash 1 2 6.263398 6.263398 5714 +receptionist 1 1 6.957497 6.957497 16464 +thelma 1 1 6.957497 6.957497 16465 +louis 1 1 6.957497 6.957497 16466 +oyster 1 1 6.957497 6.957497 16467 +surrealist 1 1 6.957497 6.957497 16468 +propheci 1 1 6.957497 6.957497 16469 +carol 1 1 6.957497 6.957497 16470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..f96229ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +version 1 113 2.197225 2.197225 122 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +institut 1 84 2.484907 2.484907 187 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +appear 1 78 2.564949 2.564949 210 +optim 1 79 2.564949 2.564949 197 +free 1 73 2.639057 2.639057 224 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +detail 1 57 2.890372 2.890372 321 +without 1 50 3.044522 3.044522 370 +move 1 47 3.091042 3.091042 382 +done 1 47 3.091042 3.091042 381 +cach 1 41 3.218876 3.218876 432 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +return 1 34 3.401197 3.401197 502 +random 1 34 3.401197 3.401197 511 +curriculum 1 33 3.433987 3.433987 535 +product 1 33 3.433987 3.433987 527 +extend 1 32 3.465736 3.465736 539 +held 1 28 3.610918 3.610918 600 +trace 1 25 3.737670 3.737670 677 +longer 1 20 3.951244 3.951244 816 +histori 1 19 4.007333 4.007333 853 +comparison 1 19 4.007333 4.007333 863 +andrew 1 19 4.007333 4.007333 849 +bershad 1 18 4.060443 4.060443 902 +matrix 1 17 4.110874 4.110874 933 +letter 1 16 4.174387 4.174387 981 +driven 1 15 4.248495 4.248495 1048 +near 1 14 4.317488 4.317488 1091 +squar 1 14 4.317488 4.317488 1082 +karlin 1 13 4.382027 4.382027 1176 +sigmetr 1 13 4.382027 4.382027 1173 +anna 1 12 4.465908 4.465908 1292 +usenix 1 12 4.465908 4.465908 1240 +verifi 1 12 4.465908 4.465908 1261 +statement 1 11 4.553877 4.553877 1313 +probabilist 1 11 4.553877 4.553877 1343 +forc 1 10 4.653960 4.653960 1384 +charg 1 9 4.753590 4.753590 1582 +patterson 1 9 4.753590 4.753590 1554 +kumar 1 9 4.753590 4.753590 1506 +sigop 1 8 4.875197 4.875197 1727 +bit 1 7 5.010635 5.010635 1833 +promis 1 6 5.164786 5.164786 2037 +prefetch 1 6 5.164786 5.164786 2039 +edward 1 6 5.164786 5.164786 2050 +onoper 1 6 5.164786 5.164786 2048 +escap 1 4 5.568345 5.568345 3016 +traci 1 3 5.857933 5.857933 3984 +kimbrel 1 3 5.857933 5.857933 3924 +prison 1 3 5.857933 5.857933 3907 +eduher 1 3 5.857933 5.857933 3499 +cachingtraci 1 3 5.857933 5.857933 3923 +felten 1 3 5.857933 5.857933 3925 +tomanufactur 1 2 6.263398 6.263398 6016 +airplan 1 2 6.263398 6.263398 4917 +tomkin 1 2 6.263398 6.263398 5814 +hugo 1 2 6.263398 6.263398 5815 +garth 1 2 6.263398 6.263398 5816 +gibson 1 2 6.263398 6.263398 5817 +implemen 1 2 6.263398 6.263398 5809 +rakesh 1 2 6.263398 6.263398 6017 +sinha 1 2 6.263398 6.263398 5754 +washingtonsinc 1 1 6.957497 6.957497 16473 +trial 1 1 6.957497 6.957497 16474 +toanoth 1 1 6.957497 6.957497 16475 +inmat 1 1 6.957497 6.957497 16476 +wasrecaptur 1 1 6.957497 6.957497 16477 +hisplight 1 1 6.957497 6.957497 16478 +rescu 1 1 6.957497 6.957497 16479 +imprison 1 1 6.957497 6.957497 16471 +ofwhat 1 1 6.957497 6.957497 16480 +captor 1 1 6.957497 6.957497 16472 +tracyk 1 1 6.957497 6.957497 16481 +ieeesymposium 1 1 6.957497 6.957497 16482 +measurementand 1 1 6.957497 6.957497 16483 +usingo 1 1 6.957497 6.957497 16484 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..def0d283 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +take 1 97 2.302585 2.302585 134 +real 1 93 2.397895 2.397895 144 +help 1 83 2.484907 2.484907 175 +run 1 51 2.995732 2.995732 347 +quarter 1 47 3.091042 3.091042 389 +understand 1 47 3.091042 3.091042 384 +keep 1 44 3.135494 3.135494 409 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +press 1 42 3.218876 3.218876 419 +dissert 1 32 3.465736 3.465736 549 +half 1 21 3.912023 3.912023 776 +corpor 1 21 3.912023 3.912023 802 +latest 1 21 3.912023 3.912023 785 +predict 1 19 4.007333 4.007333 855 +statu 1 18 4.060443 4.060443 885 +side 1 15 4.248495 4.248495 1022 +spin 1 14 4.317488 4.317488 1121 +lock 1 9 4.753590 4.753590 1551 +craig 1 7 5.010635 5.010635 1879 +pool 1 6 5.164786 5.164786 2225 +consum 1 5 5.347108 5.347108 2334 +queu 1 4 5.568345 5.568345 2648 +travi 1 3 5.857933 5.857933 3985 +motor 1 3 5.857933 5.857933 3909 +submarin 1 2 6.263398 6.263398 6018 +restor 1 1 6.957497 6.957497 16485 +arctic 1 1 6.957497 6.957497 16486 +esca 1 1 6.957497 6.957497 16487 +volvo 1 1 6.957497 6.957497 16488 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..45c81e90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +download 1 36 3.367296 3.367296 489 +bibliographi 1 34 3.401197 3.401197 518 +hobbi 1 16 4.174387 4.174387 1009 +dean 1 14 4.317488 4.317488 1104 +tullsen 1 6 5.164786 5.164786 2081 +biograph 1 2 6.263398 6.263398 5625 +resumemi 1 2 6.263398 6.263398 4971 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..91f9656f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,236 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +librari 1 87 2.484907 2.484907 181 +thing 1 84 2.484907 2.484907 189 +activ 1 84 2.484907 2.484907 182 +larg 1 82 2.484907 2.484907 168 +novemb 1 81 2.484907 2.484907 179 +messag 1 76 2.564949 2.564949 212 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +solv 1 73 2.639057 2.639057 234 +free 1 73 2.639057 2.639057 224 +would 1 67 2.708050 2.708050 251 +degre 1 69 2.708050 2.708050 259 +differ 1 66 2.708050 2.708050 253 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +improv 1 62 2.772589 2.772589 289 +visit 1 63 2.772589 2.772589 288 +locat 1 59 2.833213 2.833213 303 +explor 1 58 2.890372 2.890372 324 +summer 1 56 2.890372 2.890372 311 +special 1 56 2.890372 2.890372 320 +extens 1 53 2.944439 2.944439 340 +talk 1 53 2.944439 2.944439 336 +hardwar 1 51 2.995732 2.995732 350 +life 1 50 3.044522 3.044522 375 +still 1 50 3.044522 3.044522 362 +quarter 1 47 3.091042 3.091042 389 +could 1 46 3.091042 3.091042 383 +get 1 46 3.091042 3.091042 380 +made 1 44 3.135494 3.135494 398 +anoth 1 45 3.135494 3.135494 408 +fridai 1 44 3.135494 3.135494 390 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +littl 1 39 3.258097 3.258097 454 +programm 1 39 3.258097 3.258097 445 +realli 1 40 3.258097 3.258097 444 +continu 1 39 3.258097 3.258097 448 +small 1 39 3.258097 3.258097 447 +winter 1 36 3.367296 3.367296 500 +staff 1 36 3.367296 3.367296 490 +soon 1 36 3.367296 3.367296 494 +michael 1 35 3.401197 3.401197 514 +post 1 35 3.401197 3.401197 505 +within 1 33 3.433987 3.433987 525 +taught 1 33 3.433987 3.433987 526 +product 1 33 3.433987 3.433987 527 +kind 1 32 3.465736 3.465736 541 +chapter 1 32 3.465736 3.465736 536 +taken 1 31 3.496508 3.496508 555 +photo 1 31 3.496508 3.496508 561 +particip 1 29 3.583519 3.583519 589 +steve 1 29 3.583519 3.583519 594 +subject 1 26 3.688879 3.688879 647 +enjoi 1 26 3.688879 3.688879 660 +valu 1 25 3.737670 3.737670 665 +mike 1 24 3.761200 3.761200 703 +other 1 24 3.761200 3.761200 697 +doctor 1 24 3.761200 3.761200 709 +thank 1 23 3.806662 3.806662 721 +begin 1 23 3.806662 3.806662 716 +finish 1 22 3.850148 3.850148 748 +director 1 22 3.850148 3.850148 767 +born 1 21 3.912023 3.912023 798 +wrote 1 20 3.951244 3.951244 830 +wonder 1 20 3.951244 3.951244 815 +eric 1 19 4.007333 4.007333 870 +citi 1 19 4.007333 4.007333 874 +lot 1 18 4.060443 4.060443 889 +demo 1 18 4.060443 4.060443 888 +debug 1 17 4.110874 4.110874 944 +took 1 16 4.174387 4.174387 1010 +contribut 1 15 4.248495 4.248495 1021 +countri 1 15 4.248495 4.248495 1059 +hopefulli 1 14 4.317488 4.317488 1071 +wife 1 13 4.382027 4.382027 1196 +front 1 13 4.382027 4.382027 1154 +earlier 1 13 4.382027 4.382027 1140 +forth 1 13 4.382027 4.382027 1186 +stai 1 12 4.465908 4.465908 1215 +franc 1 12 4.465908 4.465908 1276 +skill 1 12 4.465908 4.465908 1205 +fix 1 11 4.553877 4.553877 1327 +bill 1 11 4.553877 4.553877 1297 +america 1 11 4.553877 4.553877 1370 +motiv 1 11 4.553877 4.553877 1346 +chri 1 11 4.553877 4.553877 1311 +lake 1 11 4.553877 4.553877 1373 +prior 1 10 4.653960 4.653960 1438 +acquisit 1 10 4.653960 4.653960 1465 +ski 1 10 4.653960 4.653960 1471 +correctli 1 9 4.753590 4.753590 1478 +doug 1 9 4.753590 4.753590 1517 +mention 1 9 4.753590 4.753590 1569 +french 1 9 4.753590 4.753590 1511 +folk 1 9 4.753590 4.753590 1597 +screen 1 9 4.753590 4.753590 1577 +swim 1 9 4.753590 4.753590 1599 +cross 1 8 4.875197 4.875197 1703 +harvard 1 7 5.010635 5.010635 1926 +brought 1 7 5.010635 5.010635 1925 +poster 1 7 5.010635 5.010635 1814 +earn 1 7 5.010635 5.010635 1788 +iowa 1 7 5.010635 5.010635 1971 +pari 1 6 5.164786 5.164786 2158 +nativ 1 6 5.164786 5.164786 2192 +south 1 6 5.164786 5.164786 2167 +oopsla 1 6 5.164786 5.164786 2221 +hike 1 6 5.164786 5.164786 2234 +truli 1 5 5.347108 5.347108 2476 +sail 1 5 5.347108 5.347108 2571 +observatori 1 4 5.568345 5.568345 3070 +countless 1 4 5.568345 5.568345 3020 +theintern 1 4 5.568345 5.568345 2981 +theacm 1 4 5.568345 5.568345 2698 +sigsoft 1 4 5.568345 5.568345 3036 +ti 1 4 5.568345 5.568345 3005 +marco 1 4 5.568345 5.568345 2589 +luck 1 3 5.857933 5.857933 3201 +immedi 1 3 5.857933 5.857933 3117 +motif 1 3 5.857933 5.857933 3752 +astrophys 1 3 5.857933 5.857933 3936 +schwarz 1 3 5.857933 5.857933 3986 +talent 1 3 5.857933 5.857933 3768 +traci 1 3 5.857933 5.857933 3984 +harold 1 3 5.857933 5.857933 3803 +scanner 1 3 5.857933 5.857933 3437 +eduperson 1 2 6.263398 6.263398 5776 +contractor 1 2 6.263398 6.263398 4915 +widget 1 2 6.263398 6.263398 5347 +convinc 1 2 6.263398 6.263398 6019 +calibr 1 2 6.263398 6.263398 4502 +francais 1 2 6.263398 6.263398 6020 +uist 1 2 6.263398 6.263398 5901 +grinnel 1 2 6.263398 6.263398 5763 +alexand 1 2 6.263398 6.263398 5329 +vanhilstmichael 1 1 6.957497 6.957497 16493 +vanhilstvanhilst 1 1 6.957497 6.957497 16494 +edumvh 1 1 6.957497 6.957497 16495 +usaclick 1 1 6.957497 6.957497 16496 +vanhilst 1 1 6.957497 6.957497 16491 +personalmik 1 1 6.957497 6.957497 16497 +theend 1 1 6.957497 6.957497 16498 +udub 1 1 6.957497 6.957497 16499 +atibm 1 1 6.957497 6.957497 16500 +unterfac 1 1 6.957497 6.957497 16501 +sdata 1 1 6.957497 6.957497 16502 +maintainingcomput 1 1 6.957497 6.957497 16503 +smithsonian 1 1 6.957497 6.957497 16489 +saoimagewhich 1 1 6.957497 6.957497 16504 +astronom 1 1 6.957497 6.957497 16505 +saoimag 1 1 6.957497 6.957497 16506 +gnudistribut 1 1 6.957497 6.957497 16507 +wyatt 1 1 6.957497 6.957497 16508 +mandel 1 1 6.957497 6.957497 16509 +minkfor 1 1 6.957497 6.957497 16510 +seismologistsin 1 1 6.957497 6.957497 16511 +angela 1 1 6.957497 6.957497 16492 +theallianc 1 1 6.957497 6.957497 16512 +colombiain 1 1 6.957497 6.957497 16513 +studentsbrows 1 1 6.957497 6.957497 16514 +uwin 1 1 6.957497 6.957497 16490 +pine 1 1 6.957497 6.957497 16515 +shirei 1 1 6.957497 6.957497 16516 +stenvik 1 1 6.957497 6.957497 16517 +frommicrosoft 1 1 6.957497 6.957497 16518 +sacrif 1 1 6.957497 6.957497 16519 +isota 1 1 6.957497 6.957497 16520 +inarchitectur 1 1 6.957497 6.957497 16521 +wooden 1 1 6.957497 6.957497 16522 +planningfrom 1 1 6.957497 6.957497 16523 +mitand 1 1 6.957497 6.957497 16524 +visualdesign 1 1 6.957497 6.957497 16525 +andkayak 1 1 6.957497 6.957497 16526 +bronson 1 1 6.957497 6.957497 16527 +sebastien 1 1 6.957497 6.957497 16528 +hilst 1 1 6.957497 6.957497 16529 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..2dff48ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +access 1 102 2.302585 2.302585 136 +peopl 1 96 2.302585 2.302585 132 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +appear 1 78 2.564949 2.564949 210 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +collect 1 65 2.772589 2.772589 268 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +cool 1 49 3.044522 3.044522 374 +visitor 1 49 3.044522 3.044522 371 +even 1 45 3.135494 3.135494 393 +http 1 41 3.218876 3.218876 420 +live 1 40 3.258097 3.258097 451 +realli 1 40 3.258097 3.258097 444 +join 1 39 3.258097 3.258097 457 +slide 1 38 3.295837 3.295837 467 +download 1 36 3.367296 3.367296 489 +staff 1 36 3.367296 3.367296 490 +short 1 36 3.367296 3.367296 499 +graph 1 30 3.555348 3.555348 576 +quit 1 27 3.637586 3.637586 633 +arrai 1 27 3.637586 3.637586 627 +repres 1 26 3.688879 3.688879 656 +handl 1 24 3.761200 3.761200 685 +thu 1 21 3.912023 3.912023 773 +expand 1 17 4.110874 4.110874 928 +young 1 16 4.174387 4.174387 991 +mayb 1 15 4.248495 4.248495 1014 +qual 1 15 4.248495 4.248495 1062 +infrastructur 1 12 4.465908 4.465908 1234 +hello 1 10 4.653960 4.653960 1407 +mosaic 1 10 4.653960 4.653960 1426 +cecil 1 9 4.753590 4.753590 1547 +pure 1 8 4.875197 4.875197 1776 +irregular 1 8 4.875197 4.875197 1768 +mirror 1 6 5.164786 5.164786 2028 +shortest 1 5 5.347108 5.347108 2424 +writeup 1 5 5.347108 5.347108 2352 +vass 1 2 6.263398 6.263398 4449 +pageuw 1 2 6.263398 6.263398 6021 +pagerec 1 2 6.263398 6.263398 6022 +cecilproject 1 2 6.263398 6.263398 4457 +cooler 1 2 6.263398 6.263398 6023 +anddynam 1 2 6.263398 6.263398 5889 +myqual 1 2 6.263398 6.263398 6005 +closer 1 2 6.263398 6.263398 6024 +vassilylong 1 1 6.957497 6.957497 16530 +linki 1 1 6.957497 6.957497 16531 +fewfil 1 1 6.957497 6.957497 16532 +thisstuff 1 1 6.957497 6.957497 16533 +quotesrussian 1 1 6.957497 6.957497 16534 +pagesvari 1 1 6.957497 6.957497 16535 +linksguid 1 1 6.957497 6.957497 16536 +formsoth 1 1 6.957497 6.957497 16537 +pagencsa 1 1 6.957497 6.957497 16538 +andvortex 1 1 6.957497 6.957497 16539 +befast 1 1 6.957497 6.957497 16540 +themvi 1 1 6.957497 6.957497 16541 +ourdepartment 1 1 6.957497 6.957497 16542 +beenupgrad 1 1 6.957497 6.957497 16543 +thezpl 1 1 6.957497 6.957497 16544 +languageto 1 1 6.957497 6.957497 16545 +repartit 1 1 6.957497 6.957497 16546 +theslidesfrom 1 1 6.957497 6.957497 16547 +toresourc 1 1 6.957497 6.957497 16548 +eduobject 1 1 6.957497 6.957497 16549 +pastor 1 1 6.957497 6.957497 16550 +vybrasyvalsya 1 1 6.957497 6.957497 16551 +okna 1 1 6.957497 6.957497 16552 +pyatyi 1 1 6.957497 6.957497 16553 +deystvov 1 1 6.957497 6.957497 16554 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..27b4beef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +present 1 91 2.397895 2.397895 145 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +servic 1 72 2.639057 2.639057 236 +receiv 1 66 2.708050 2.708050 244 +profession 1 51 2.995732 2.995732 345 +join 1 39 3.258097 3.258097 457 +electr 1 38 3.295837 3.295837 461 +scientist 1 31 3.496508 3.496508 560 +recommend 1 22 3.850148 3.850148 737 +theunivers 1 21 3.912023 3.912023 797 +divis 1 21 3.912023 3.912023 803 +histori 1 19 4.007333 4.007333 853 +letter 1 16 4.174387 4.174387 981 +achiev 1 14 4.317488 4.317488 1088 +evan 1 8 4.875197 4.875197 1633 +patent 1 5 5.347108 5.347108 2574 +invent 1 4 5.568345 5.568345 3028 +arizona 1 3 5.857933 5.857933 3700 +electricalengin 1 3 5.857933 5.857933 3987 +expertis 1 3 5.857933 5.857933 3321 +virgil 1 2 6.263398 6.263398 5783 +bourassa 1 2 6.263398 6.263398 5782 +uwvirgil 1 1 6.957497 6.957497 16555 +bourassavirgil 1 1 6.957497 6.957497 16556 +interestsinclud 1 1 6.957497 6.957497 16557 +boeingin 1 1 6.957497 6.957497 16558 +scienceorgan 1 1 6.957497 6.957497 16559 +bellevu 1 1 6.957497 6.957497 16560 +arizonast 1 1 6.957497 6.957497 16561 +temp 1 1 6.957497 6.957497 16562 +accesswhat 1 1 6.957497 6.957497 16563 +statusoccasion 1 1 6.957497 6.957497 16564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..7ac6dc2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +high 1 130 2.079442 2.079442 101 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +look 1 107 2.197225 2.197225 115 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +follow 1 92 2.397895 2.397895 143 +center 1 88 2.397895 2.397895 158 +academ 1 82 2.484907 2.484907 178 +activ 1 84 2.484907 2.484907 182 +learn 1 86 2.484907 2.484907 170 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +state 1 76 2.564949 2.564949 207 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +nation 1 74 2.639057 2.639057 240 +simul 1 66 2.708050 2.708050 255 +receiv 1 66 2.708050 2.708050 244 +written 1 63 2.772589 2.772589 278 +organ 1 65 2.772589 2.772589 265 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +automat 1 61 2.833213 2.833213 306 +plai 1 60 2.833213 2.833213 307 +best 1 59 2.833213 2.833213 299 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +undergradu 1 54 2.944439 2.944439 338 +particular 1 51 2.995732 2.995732 352 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +right 1 48 3.044522 3.044522 363 +physic 1 47 3.091042 3.091042 377 +directori 1 45 3.135494 3.135494 396 +math 1 44 3.135494 3.135494 402 +mechan 1 43 3.178054 3.178054 416 +continu 1 39 3.258097 3.258097 448 +tree 1 36 3.367296 3.367296 492 +game 1 36 3.367296 3.367296 498 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +india 1 32 3.465736 3.465736 550 +chapter 1 32 3.465736 3.465736 536 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +held 1 28 3.610918 3.610918 600 +effort 1 26 3.688879 3.688879 652 +enhanc 1 26 3.688879 3.688879 644 +session 1 26 3.688879 3.688879 643 +highli 1 23 3.806662 3.806662 725 +methodolog 1 23 3.806662 3.806662 733 +head 1 23 3.806662 3.806662 732 +period 1 22 3.850148 3.850148 743 +leav 1 21 3.912023 3.912023 772 +born 1 21 3.912023 3.912023 798 +half 1 21 3.912023 3.912023 776 +unit 1 21 3.912023 3.912023 779 +scheme 1 20 3.951244 3.951244 818 +safeti 1 20 3.951244 3.951244 817 +tenni 1 20 3.951244 3.951244 838 +exploit 1 20 3.951244 3.951244 836 +failur 1 18 4.060443 4.060443 898 +attend 1 18 4.060443 4.060443 893 +english 1 15 4.248495 4.248495 1033 +came 1 13 4.382027 4.382027 1197 +replic 1 12 4.465908 4.465908 1231 +nanci 1 12 4.465908 4.465908 1256 +danc 1 12 4.465908 4.465908 1278 +eight 1 11 4.553877 4.553877 1331 +council 1 11 4.553877 4.553877 1364 +literatur 1 11 4.553877 4.553877 1300 +leveson 1 9 4.753590 4.753590 1540 +poetri 1 9 4.753590 4.753590 1596 +simpli 1 8 4.875197 4.875197 1626 +presenc 1 8 4.875197 4.875197 1671 +coast 1 8 4.875197 4.875197 1746 +brought 1 7 5.010635 5.010635 1925 +whenev 1 7 5.010635 5.010635 1883 +cricket 1 7 5.010635 5.010635 1945 +occasion 1 7 5.010635 5.010635 1905 +saturdai 1 7 5.010635 5.010635 1794 +throughout 1 7 5.010635 5.010635 1871 +vivek 1 6 5.164786 5.164786 2210 +squash 1 6 5.164786 5.164786 2223 +band 1 6 5.164786 5.164786 2198 +corba 1 5 5.347108 5.347108 2320 +focuss 1 5 5.347108 5.347108 2271 +cell 1 5 5.347108 5.347108 2274 +nuclear 1 5 5.347108 5.347108 2576 +toolset 1 4 5.568345 5.568345 3014 +murphi 1 4 5.568345 5.568345 2737 +racquetbal 1 4 5.568345 5.568345 3052 +suffic 1 4 5.568345 5.568345 2869 +ultra 1 4 5.568345 5.568345 2889 +swing 1 4 5.568345 5.568345 2887 +restructur 1 4 5.568345 5.568345 2775 +tend 1 4 5.568345 5.568345 3041 +rsml 1 3 5.857933 5.857933 3967 +wesleyan 1 3 5.857933 5.857933 3988 +marin 1 3 5.857933 5.857933 3947 +ballroom 1 3 5.857933 5.857933 3983 +dabbl 1 3 5.857933 5.857933 3971 +bank 1 3 5.857933 5.857933 3920 +mirza 1 3 5.857933 5.857933 3989 +ratan 1 2 6.263398 6.263398 5948 +bellcor 1 2 6.263398 6.263398 5174 +adher 1 2 6.263398 6.263398 6025 +tango 1 2 6.263398 6.263398 6010 +reform 1 2 6.263398 6.263398 5828 +growth 1 2 6.263398 6.263398 4084 +scientistat 1 1 6.957497 6.957497 16566 +morristown 1 1 6.957497 6.957497 16567 +researchwork 1 1 6.957497 6.957497 16568 +distributedsoftwar 1 1 6.957497 6.957497 16569 +anatida 1 1 6.957497 6.957497 16570 +indc 1 1 6.957497 6.957497 16571 +foundher 1 1 6.957497 6.957497 16572 +integrationof 1 1 6.957497 6.957497 16573 +bydr 1 1 6.957497 6.957497 16574 +fromrequir 1 1 6.957497 6.957497 16575 +angelo 1 1 6.957497 6.957497 16565 +middletown 1 1 6.957497 6.957497 16576 +purus 1 1 6.957497 6.957497 16577 +lesserext 1 1 6.957497 6.957497 16578 +ardent 1 1 6.957497 6.957497 16579 +folow 1 1 6.957497 6.957497 16580 +superson 1 1 6.957497 6.957497 16581 +cowboi 1 1 6.957497 6.957497 16582 +keen 1 1 6.957497 6.957497 16583 +waltz 1 1 6.957497 6.957497 16584 +foxtrot 1 1 6.957497 6.957497 16585 +chacha 1 1 6.957497 6.957497 16586 +rhumba 1 1 6.957497 6.957497 16587 +mambo 1 1 6.957497 6.957497 16588 +ecosoc 1 1 6.957497 6.957497 16589 +rapidpopul 1 1 6.957497 6.957497 16590 +prolifer 1 1 6.957497 6.957497 16591 +ghalib 1 1 6.957497 6.957497 16592 +centuryindian 1 1 6.957497 6.957497 16593 +poet 1 1 6.957497 6.957497 16594 +romant 1 1 6.957497 6.957497 16595 +victorian 1 1 6.957497 6.957497 16596 +obligatori 1 1 6.957497 6.957497 16597 +sitesthat 1 1 6.957497 6.957497 16598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..dfc57b55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +call 1 91 2.397895 2.397895 153 +master 1 76 2.564949 2.564949 216 +window 1 68 2.708050 2.708050 242 +thesi 1 57 2.890372 2.890372 327 +mobil 1 23 3.806662 3.806662 730 +avoid 1 21 3.912023 3.912023 799 +emac 1 13 4.382027 4.382027 1143 +voelker 1 9 4.753590 4.753590 1557 +guggenheim 1 8 4.875197 4.875197 1759 +geoff 1 6 5.164786 5.164786 2124 +annex 1 5 5.347108 5.347108 2572 +wireless 1 4 5.568345 5.568345 2693 +washingtonseattl 1 4 5.568345 5.568345 3044 +mobisa 1 3 5.857933 5.857933 3927 +inseattl 1 2 6.263398 6.263398 6026 +whati 1 2 6.263398 6.263398 6027 +andbuild 1 2 6.263398 6.263398 6028 +settl 1 2 6.263398 6.263398 5778 +skywhoi 1 1 6.957497 6.957497 16599 +wherechateau 1 1 6.957497 6.957497 16600 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..4f570990 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +look 1 107 2.197225 2.197225 115 +version 1 113 2.197225 2.197225 122 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +memori 1 101 2.302585 2.302585 139 +peopl 1 96 2.302585 2.302585 132 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +test 1 66 2.708050 2.708050 252 +organ 1 65 2.772589 2.772589 265 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +cool 1 49 3.044522 3.044522 374 +keep 1 44 3.135494 3.135494 409 +howev 1 41 3.218876 3.218876 422 +littl 1 39 3.258097 3.258097 454 +actual 1 28 3.610918 3.610918 604 +interpret 1 24 3.761200 3.761200 686 +other 1 24 3.761200 3.761200 697 +earli 1 16 4.174387 4.174387 968 +baer 1 11 4.553877 4.553877 1353 +denni 1 11 4.553877 4.553877 1321 +alpha 1 11 4.553877 4.553877 1348 +jean 1 10 4.653960 4.653960 1440 +jump 1 9 4.753590 4.753590 1603 +wong 1 9 4.753590 4.753590 1609 +wayn 1 8 4.875197 4.875197 1738 +loup 1 6 5.164786 5.164786 2228 +geoff 1 6 5.164786 5.164786 2124 +fish 1 6 5.164786 5.164786 2207 +alec 1 5 5.347108 5.347108 2563 +rocki 1 4 5.568345 5.568345 3048 +waynew 1 3 5.857933 5.857933 3982 +differentmemori 1 1 6.957497 6.957497 16601 +beingdon 1 1 6.957497 6.957497 16602 +rightnow 1 1 6.957497 6.957497 16603 +peoplewho 1 1 6.957497 6.957497 16604 +testwayn 1 1 6.957497 6.957497 16605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..9bf03ae9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +william 1 22 3.850148 3.850148 765 +spend 1 19 4.007333 4.007333 850 +hang 1 9 4.753590 4.753590 1499 +pagei 1 8 4.875197 4.875197 1683 +chan 1 7 5.010635 5.010635 1876 +spare 1 6 5.164786 5.164786 2177 +hell 1 4 5.568345 5.568345 2885 +heaven 1 3 5.857933 5.857933 3589 +wchan 1 3 5.857933 5.857933 3338 +pagewilliam 1 1 6.957497 6.957497 16606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..ef471d2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +data 1 170 1.791759 1.791759 49 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +report 1 131 2.079442 2.079442 92 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +world 1 115 2.197225 2.197225 126 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +select 1 91 2.397895 2.397895 154 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +wide 1 84 2.484907 2.484907 185 +internet 1 83 2.484907 2.484907 186 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +intellig 1 72 2.639057 2.639057 225 +nation 1 74 2.639057 2.639057 240 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +artifici 1 63 2.772589 2.772589 280 +dept 1 64 2.772589 2.772589 291 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +visit 1 63 2.772589 2.772589 288 +juli 1 60 2.833213 2.833213 305 +plai 1 60 2.833213 2.833213 307 +publish 1 57 2.890372 2.890372 326 +found 1 53 2.944439 2.944439 337 +investig 1 51 2.995732 2.995732 353 +electron 1 47 3.091042 3.091042 379 +favorit 1 44 3.135494 3.135494 410 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +past 1 42 3.218876 3.218876 428 +winter 1 36 3.367296 3.367296 500 +award 1 34 3.401197 3.401197 523 +least 1 35 3.401197 3.401197 516 +board 1 33 3.433987 3.433987 528 +travel 1 30 3.555348 3.555348 579 +chair 1 29 3.583519 3.583519 596 +repres 1 26 3.688879 3.688879 656 +enjoi 1 26 3.688879 3.688879 660 +reach 1 24 3.761200 3.761200 688 +magazin 1 24 3.761200 3.761200 704 +ofwashington 1 22 3.850148 3.850148 766 +almost 1 22 3.850148 3.850148 742 +comparison 1 19 4.007333 4.007333 863 +agent 1 18 4.060443 4.060443 910 +bachelor 1 17 4.110874 4.110874 957 +adam 1 17 4.110874 4.110874 934 +young 1 16 4.174387 4.174387 991 +action 1 15 4.248495 4.248495 1038 +photograph 1 15 4.248495 4.248495 1056 +role 1 14 4.317488 4.317488 1101 +stori 1 14 4.317488 4.317488 1087 +galleri 1 13 4.382027 4.382027 1192 +daniel 1 12 4.465908 4.465908 1233 +land 1 12 4.465908 4.465908 1273 +guest 1 12 4.465908 4.465908 1220 +infrastructur 1 12 4.465908 4.465908 1234 +sens 1 11 4.553877 4.553877 1305 +shop 1 10 4.653960 4.653960 1469 +invit 1 10 4.653960 4.653960 1428 +weld 1 9 4.753590 4.753590 1538 +hundr 1 9 4.753590 4.753590 1528 +presidenti 1 8 4.875197 4.875197 1737 +aaai 1 8 4.875197 4.875197 1750 +gather 1 8 4.875197 4.875197 1719 +pacif 1 8 4.875197 4.875197 1674 +illustr 1 8 4.875197 4.875197 1679 +planner 1 7 5.010635 5.010635 1797 +ground 1 7 5.010635 5.010635 1955 +softbot 1 7 5.010635 5.010635 1974 +northwest 1 7 5.010635 5.010635 1973 +yale 1 6 5.164786 5.164786 2003 +commit 1 6 5.164786 5.164786 2233 +engineeringat 1 5 5.347108 5.347108 2561 +middl 1 5 5.347108 5.347108 2372 +cacm 1 5 5.347108 5.347108 2388 +allegro 1 5 5.347108 5.347108 2314 +naval 1 4 5.568345 5.568345 2920 +climb 1 4 5.568345 5.568345 2936 +biochemistri 1 3 5.857933 5.857933 3513 +ucpop 1 3 5.857933 5.857933 3878 +revisit 1 3 5.857933 5.857933 3915 +recreat 1 3 5.857933 5.857933 3990 +theworld 1 3 5.857933 5.857933 3158 +twin 1 3 5.857933 5.857933 3657 +younginvestig 1 2 6.263398 6.263398 5794 +ascal 1 2 6.263398 6.263398 5893 +anintroduct 1 2 6.263398 6.263398 4156 +absent 1 2 6.263398 6.263398 4825 +cafe 1 2 6.263398 6.263398 5826 +boi 1 2 6.263398 6.263398 5918 +wilder 1 2 6.263398 6.263398 5516 +theadvisori 1 1 6.957497 6.957497 16607 +airesearch 1 1 6.957497 6.957497 16608 +ofintellig 1 1 6.957497 6.957497 16609 +isco 1 1 6.957497 6.957497 16610 +scad 1 1 6.957497 6.957497 16611 +seattlewa 1 1 6.957497 6.957497 16612 +sitesworldwid 1 1 6.957497 6.957497 16613 +arehi 1 1 6.957497 6.957497 16614 +aip 1 1 6.957497 6.957497 16615 +exhaustivelist 1 1 6.957497 6.957497 16616 +stormymountain 1 1 6.957497 6.957497 16617 +galen 1 1 6.957497 6.957497 16618 +desert 1 1 6.957497 6.957497 16619 +morocco 1 1 6.957497 6.957497 16620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..1c148923 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +master 1 76 2.564949 2.564949 216 +univ 1 28 3.610918 3.610918 617 +utah 1 9 4.753590 4.753590 1585 +wendi 1 2 6.263398 6.263398 5864 +belluomini 1 2 6.263398 6.263398 5865 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..cde24ad5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +architectur 1 139 1.945910 1.945910 77 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +select 1 91 2.397895 2.397895 154 +member 1 84 2.484907 2.484907 165 +chang 1 82 2.484907 2.484907 163 +receiv 1 66 2.708050 2.708050 244 +sieg 1 69 2.708050 2.708050 260 +interact 1 62 2.772589 2.772589 270 +advisor 1 51 2.995732 2.995732 355 +move 1 47 3.091042 3.091042 382 +electr 1 38 3.295837 3.295837 461 +theunivers 1 21 3.912023 3.912023 797 +among 1 21 3.912023 3.912023 781 +programminglanguag 1 21 3.912023 3.912023 782 +voic 1 21 3.912023 3.912023 806 +runtim 1 19 4.007333 4.007333 858 +thedepart 1 11 4.553877 4.553877 1350 +wilson 1 9 4.753590 4.753590 1536 +postdoc 1 8 4.875197 4.875197 1724 +myresearch 1 4 5.568345 5.568345 2842 +weihl 1 3 5.857933 5.857933 3284 +inseattl 1 2 6.263398 6.263398 6026 +thespin 1 2 6.263398 6.263398 6029 +sciencein 1 2 6.263398 6.263398 5804 +thelaboratori 1 2 6.263398 6.263398 4424 +linksperson 1 2 6.263398 6.263398 5143 +hsieh 1 2 6.263398 6.263398 5818 +hsiehwilson 1 1 6.957497 6.957497 16621 +hsiehi 1 1 6.957497 6.957497 16622 +theschool 1 1 6.957497 6.957497 16623 +engineeringatmit 1 1 6.957497 6.957497 16624 +werefran 1 1 6.957497 6.957497 16625 +kaashoekandbil 1 1 6.957497 6.957497 16626 +publicationsselect 1 1 6.957497 6.957497 16627 +interestswilson 1 1 6.957497 6.957497 16628 +numberha 1 1 6.957497 6.957497 16629 +whsieh 1 1 6.957497 6.957497 16630 +keyoctob 1 1 6.957497 6.957497 16631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..13f020d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +analysi 1 124 2.079442 2.079442 98 +structur 1 106 2.197225 2.197225 105 +school 1 84 2.484907 2.484907 188 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +line 1 75 2.639057 2.639057 231 +plai 1 60 2.833213 2.833213 307 +summer 1 56 2.890372 2.890372 311 +realli 1 40 3.258097 3.258097 444 +winter 1 36 3.367296 3.367296 500 +idea 1 32 3.465736 3.465736 545 +departmentunivers 1 24 3.761200 3.761200 711 +scalabl 1 24 3.761200 3.761200 705 +interpret 1 24 3.761200 3.761200 686 +binari 1 20 3.951244 3.951244 823 +bershad 1 18 4.060443 4.060443 902 +asplo 1 17 4.110874 4.110874 948 +cambridg 1 16 4.174387 4.174387 1008 +latenc 1 16 4.174387 4.174387 993 +levi 1 14 4.317488 4.317488 1093 +washingtonbox 1 13 4.382027 4.382027 1200 +usenix 1 12 4.465908 4.465908 1240 +baer 1 11 4.553877 4.553877 1353 +thecomput 1 10 4.653960 4.653960 1408 +equip 1 10 4.653960 4.653960 1459 +voelker 1 9 4.753590 4.753590 1557 +wong 1 9 4.753590 4.753590 1609 +romer 1 8 4.875197 4.875197 1706 +guitar 1 8 4.875197 4.875197 1758 +instrument 1 7 5.010635 5.010635 1954 +wolman 1 6 5.164786 5.164786 2093 +corp 1 6 5.164786 5.164786 2139 +strang 1 6 5.164786 5.164786 2064 +alec 1 5 5.347108 5.347108 2563 +departmentat 1 5 5.347108 5.347108 2513 +treat 1 5 5.347108 5.347108 2521 +gradual 1 4 5.568345 5.568345 2997 +etch 1 4 5.568345 5.568345 2755 +thekkath 1 3 5.857933 5.857933 3973 +habit 1 3 5.857933 5.857933 3777 +thechateau 1 2 6.263398 6.263398 5853 +fordigit 1 2 6.263398 6.263398 5752 +firewal 1 2 6.263398 6.263398 5407 +relai 1 2 6.263398 6.263398 5404 +hungri 1 2 6.263398 6.263398 5511 +otter 1 2 6.263398 6.263398 4166 +nervou 1 2 6.263398 6.263398 5953 +pressur 1 2 6.263398 6.263398 5960 +wolmanwolman 1 1 6.957497 6.957497 16632 +eduworkcomput 1 1 6.957497 6.957497 16633 +isroom 1 1 6.957497 6.957497 16634 +executablesrocki 1 1 6.957497 6.957497 16635 +performanceon 1 1 6.957497 6.957497 16636 +trees 1 1 6.957497 6.957497 16637 +fixha 1 1 6.957497 6.957497 16638 +hallwolman 1 1 6.957497 6.957497 16639 +diseasewolman 1 1 6.957497 6.957497 16640 +lumber 1 1 6.957497 6.957497 16641 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..9e094bee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +intern 1 108 2.197225 2.197225 128 +proceed 1 93 2.397895 2.397895 152 +school 1 84 2.484907 2.484907 188 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +stuff 1 87 2.484907 2.484907 171 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +evalu 1 64 2.772589 2.772589 266 +dept 1 64 2.772589 2.772589 291 +visual 1 48 3.044522 3.044522 372 +possibl 1 47 3.091042 3.091042 378 +term 1 43 3.178054 3.178054 411 +cach 1 41 3.218876 3.218876 432 +submit 1 39 3.258097 3.258097 440 +short 1 36 3.367296 3.367296 499 +soon 1 36 3.367296 3.367296 494 +award 1 34 3.401197 3.401197 523 +tech 1 35 3.401197 3.401197 515 +photo 1 31 3.496508 3.496508 561 +graph 1 30 3.555348 3.555348 576 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +univ 1 28 3.610918 3.610918 617 +compar 1 26 3.688879 3.688879 648 +trace 1 25 3.737670 3.737670 677 +predict 1 19 4.007333 4.007333 855 +monitor 1 17 4.110874 4.110874 941 +zhang 1 16 4.174387 4.174387 980 +driven 1 15 4.248495 4.248495 1048 +coher 1 14 4.317488 4.317488 1109 +baer 1 11 4.553877 4.553877 1353 +jean 1 10 4.653960 4.653960 1440 +explicit 1 9 4.753590 4.753590 1525 +loup 1 6 5.164786 5.164786 2228 +optimist 1 5 5.347108 5.347108 2501 +conserv 1 4 5.568345 5.568345 2870 +tran 1 3 5.857933 5.857933 3384 +communicationprimit 1 2 6.263398 6.263398 5449 +hpca 1 2 6.263398 6.263398 6030 +toolfor 1 2 6.263398 6.263398 6031 +numa 1 2 6.263398 6.263398 4905 +xiaohan 1 1 6.957497 6.957497 16642 +xqin 1 1 6.957497 6.957497 16643 +basedmultiprocessor 1 1 6.957497 6.957497 16644 +nalluri 1 1 6.957497 6.957497 16645 +processingon 1 1 6.957497 6.957497 16646 +chinaread 1 1 6.957497 6.957497 16647 +chinesesearch 1 1 6.957497 6.957497 16648 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..61661749 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +intern 1 108 2.197225 2.197225 128 +text 1 98 2.302585 2.302585 133 +second 1 81 2.484907 2.484907 166 +info 1 85 2.484907 2.484907 176 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +servic 1 72 2.639057 2.639057 236 +type 1 61 2.833213 2.833213 296 +index 1 56 2.890372 2.890372 309 +talk 1 53 2.944439 2.944439 336 +finger 1 52 2.995732 2.995732 354 +transact 1 39 3.258097 3.258097 438 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +tech 1 35 3.401197 3.401197 515 +survei 1 35 3.401197 3.401197 513 +random 1 34 3.401197 3.401197 511 +linux 1 27 3.637586 3.637586 631 +yahoo 1 24 3.761200 3.761200 707 +lyco 1 19 4.007333 4.007333 871 +bershad 1 18 4.060443 4.060443 902 +qual 1 15 4.248495 4.248495 1062 +spin 1 14 4.317488 4.317488 1121 +touch 1 12 4.465908 4.465908 1288 +perl 1 11 4.553877 4.553877 1332 +metacrawl 1 10 4.653960 4.653960 1455 +desktop 1 10 4.653960 4.653960 1445 +vista 1 10 4.653960 4.653960 1452 +meta 1 9 4.753590 4.753590 1505 +modula 1 9 4.753590 4.753590 1613 +japan 1 8 4.875197 4.875197 1762 +gatewai 1 7 5.010635 5.010635 1942 +lesson 1 5 5.347108 5.347108 2568 +alta 1 4 5.568345 5.568345 3039 +japanes 1 4 5.568345 5.568345 2934 +patch 1 4 5.568345 5.568345 2710 +archi 1 3 5.857933 5.857933 3639 +javascript 1 3 5.857933 5.857933 3221 +thespin 1 2 6.263398 6.263398 6029 +apprentic 1 2 6.263398 6.263398 5873 +yasushi 1 1 6.957497 6.957497 16649 +saitoyasushi 1 1 6.957497 6.957497 16650 +saito 1 1 6.957497 6.957497 16651 +atdepart 1 1 6.957497 6.957497 16652 +workingwith 1 1 6.957497 6.957497 16653 +andperson 1 1 6.957497 6.957497 16654 +sightse 1 1 6.957497 6.957497 16655 +trainer 1 1 6.957497 6.957497 16656 +dvorak 1 1 6.957497 6.957497 16657 +trycanva 1 1 6.957497 6.957497 16658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..b702849b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +mathemat 1 108 2.197225 2.197225 123 +part 1 98 2.302585 2.302585 129 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +resum 1 79 2.564949 2.564949 217 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +servic 1 72 2.639057 2.639057 236 +degre 1 69 2.708050 2.708050 259 +artifici 1 63 2.772589 2.772589 280 +result 1 65 2.772589 2.772589 281 +basic 1 50 3.044522 3.044522 360 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +field 1 37 3.332205 3.332205 482 +idea 1 32 3.465736 3.465736 545 +computersci 1 30 3.555348 3.555348 562 +cluster 1 28 3.610918 3.610918 612 +retriev 1 27 3.637586 3.637586 621 +along 1 18 4.060443 4.060443 878 +engineeringunivers 1 17 4.110874 4.110874 959 +chateau 1 16 4.174387 4.174387 997 +trip 1 14 4.317488 4.317488 1113 +washingtonbox 1 13 4.382027 4.382027 1200 +edui 1 13 4.382027 4.382027 1193 +israel 1 11 4.553877 4.553877 1366 +metacrawl 1 10 4.653960 4.653960 1455 +ski 1 10 4.653960 4.653960 1471 +hundr 1 9 4.753590 4.753590 1528 +erik 1 8 4.875197 4.875197 1701 +oren 1 6 5.164786 5.164786 2134 +softwareengin 1 6 5.164786 5.164786 2162 +selberg 1 5 5.347108 5.347108 2441 +algorithmsfor 1 4 5.568345 5.568345 2748 +worki 1 4 5.568345 5.568345 3010 +raft 1 4 5.568345 5.568345 3060 +dive 1 3 5.857933 5.857933 3654 +zamir 1 2 6.263398 6.263398 5897 +pageoren 1 2 6.263398 6.263398 5888 +jerusalem 1 2 6.263398 6.263398 4918 +isra 1 1 6.957497 6.957497 16659 +myundergradu 1 1 6.957497 6.957497 16660 +hebrewunivers 1 1 6.957497 6.957497 16661 +userwith 1 1 6.957497 6.957497 16662 +orenetzioni 1 1 6.957497 6.957497 16663 +sinai 1 1 6.957497 6.957497 16664 +jeeptour 1 1 6.957497 6.957497 16665 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..eeabd7ba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +look 1 107 2.197225 2.197225 115 +present 1 91 2.397895 2.397895 145 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +nation 1 74 2.639057 2.639057 240 +involv 1 71 2.639057 2.639057 227 +receiv 1 66 2.708050 2.708050 244 +simul 1 66 2.708050 2.708050 255 +improv 1 62 2.772589 2.772589 289 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +approach 1 48 3.044522 3.044522 366 +textbook 1 44 3.135494 3.135494 397 +protocol 1 45 3.135494 3.135494 407 +execut 1 45 3.135494 3.135494 404 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +cach 1 41 3.218876 3.218876 432 +join 1 39 3.258097 3.258097 457 +author 1 39 3.258097 3.258097 450 +electr 1 38 3.295837 3.295837 461 +industri 1 38 3.295837 3.295837 464 +ofth 1 36 3.367296 3.367296 491 +singl 1 34 3.401197 3.401197 510 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +chair 1 29 3.583519 3.583519 596 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +although 1 25 3.737670 3.737670 667 +trace 1 25 3.737670 3.737670 677 +fellow 1 24 3.761200 3.761200 701 +serv 1 22 3.850148 3.850148 758 +comparison 1 19 4.007333 4.007333 863 +asplo 1 17 4.110874 4.110874 948 +driven 1 15 4.248495 4.248495 1048 +coher 1 14 4.317488 4.317488 1109 +difficulti 1 13 4.382027 4.382027 1132 +block 1 13 4.382027 4.382027 1183 +franc 1 12 4.465908 4.465908 1276 +baer 1 11 4.553877 4.553877 1353 +cycl 1 11 4.553877 4.553877 1335 +primit 1 11 4.553877 4.553877 1317 +isca 1 11 4.553877 4.553877 1354 +impact 1 11 4.553877 4.553877 1334 +denni 1 11 4.553877 4.553877 1321 +jean 1 10 4.653960 4.653960 1440 +prior 1 10 4.653960 4.653960 1438 +french 1 9 4.753590 4.753590 1511 +guggenheim 1 8 4.875197 4.875197 1759 +uniprocessor 1 8 4.875197 4.875197 1696 +loup 1 6 5.164786 5.164786 2228 +prefetch 1 6 5.164786 5.164786 2039 +ucla 1 5 5.347108 5.347108 2502 +icpp 1 5 5.347108 5.347108 2382 +anddistribut 1 4 5.568345 5.568345 3031 +coauthor 1 4 5.568345 5.568345 3064 +conserv 1 4 5.568345 5.568345 2870 +electricalengin 1 3 5.857933 5.857933 3987 +chairman 1 3 5.857933 5.857933 3991 +parallelprocess 1 3 5.857933 5.857933 3626 +twelv 1 3 5.857933 5.857933 3899 +specul 1 3 5.857933 5.857933 3951 +adjunct 1 2 6.263398 6.263398 6033 +diplom 1 2 6.263398 6.263398 5982 +theuniversit 1 2 6.263398 6.263398 5927 +grenobl 1 2 6.263398 6.263398 5928 +laboratoir 1 2 6.263398 6.263398 5929 +universit 1 2 6.263398 6.263398 5630 +internationalsymposium 1 2 6.263398 6.263398 6032 +retain 1 2 6.263398 6.263398 5443 +hpca 1 2 6.263398 6.263398 6030 +professorand 1 1 6.957497 6.957497 16666 +ingnieur 1 1 6.957497 6.957497 16667 +doctorat 1 1 6.957497 6.957497 16668 +decalcul 1 1 6.957497 6.957497 16669 +technologygroup 1 1 6.957497 6.957497 16670 +thesearea 1 1 6.957497 6.957497 16671 +distinguishedvisitor 1 1 6.957497 6.957497 16672 +asprogram 1 1 6.957497 6.957497 16673 +sigarch 1 1 6.957497 6.957497 16674 +eighteen 1 1 6.957497 6.957497 16675 +professorba 1 1 6.957497 6.957497 16676 +laboratoriesand 1 1 6.957497 6.957497 16677 +inacademia 1 1 6.957497 6.957497 16678 +hashad 1 1 6.957497 6.957497 16679 +accent 1 1 6.957497 6.957497 16680 +comparisonwith 1 1 6.957497 6.957497 16681 +andisca 1 1 6.957497 6.957497 16682 +optimisticapproach 1 1 6.957497 6.957497 16683 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..e37eab92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +seattl 1 120 2.079442 2.079442 103 +assist 1 112 2.197225 2.197225 113 +advanc 1 99 2.302585 2.302585 130 +member 1 84 2.484907 2.484907 165 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +orient 1 80 2.564949 2.564949 205 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +april 1 77 2.564949 2.564949 196 +effici 1 73 2.639057 2.639057 233 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +sieg 1 69 2.708050 2.708050 260 +guid 1 63 2.772589 2.772589 267 +street 1 63 2.772589 2.772589 293 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +extens 1 53 2.944439 2.944439 340 +undergradu 1 54 2.944439 2.944439 338 +investig 1 51 2.995732 2.995732 353 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +pointer 1 48 3.044522 3.044522 368 +featur 1 46 3.091042 3.091042 386 +adapt 1 46 3.091042 3.091042 387 +join 1 39 3.258097 3.258097 457 +multi 1 36 3.367296 3.367296 493 +express 1 32 3.465736 3.465736 540 +profil 1 30 3.555348 3.555348 581 +static 1 27 3.637586 3.637586 619 +lead 1 23 3.806662 3.806662 718 +self 1 22 3.850148 3.850148 761 +programminglanguag 1 21 3.912023 3.912023 782 +util 1 21 3.912023 3.912023 774 +kernel 1 20 3.951244 3.951244 825 +fine 1 20 3.951244 3.951244 822 +stanford 1 17 4.110874 4.110874 955 +previous 1 17 4.110874 4.110874 923 +engineeringunivers 1 17 4.110874 4.110874 959 +spin 1 14 4.317488 4.317488 1121 +achiev 1 14 4.317488 4.317488 1088 +incorpor 1 13 4.382027 4.382027 1163 +washingtonbox 1 13 4.382027 4.382027 1200 +safe 1 12 4.465908 4.465908 1274 +modul 1 10 4.653960 4.653960 1434 +reli 1 10 4.653960 4.653960 1411 +cecil 1 9 4.753590 4.753590 1547 +end 1 9 4.753590 4.753590 1567 +modula 1 9 4.753590 4.753590 1613 +herefor 1 9 4.753590 4.753590 1483 +chamber 1 8 4.875197 4.875197 1692 +pure 1 8 4.875197 4.875197 1776 +analys 1 8 4.875197 4.875197 1666 +isol 1 8 4.875197 4.875197 1663 +craig 1 7 5.010635 5.010635 1879 +implementationof 1 7 5.010635 5.010635 1813 +vehicl 1 7 5.010635 5.010635 1928 +vortex 1 5 5.347108 5.347108 2362 +spinproject 1 5 5.347108 5.347108 2570 +despit 1 5 5.347108 5.347108 2317 +languagesand 1 4 5.568345 5.568345 3071 +microkernel 1 4 5.568345 5.568345 3047 +intra 1 3 5.857933 5.857933 3243 +dialect 1 3 5.857933 5.857933 3226 +dynamiccompil 1 3 5.857933 5.857933 3926 +programmingenviron 1 2 6.263398 6.263398 5240 +whichsupport 1 2 6.263398 6.263398 6003 +ceciland 1 1 6.957497 6.957497 16684 +languageserv 1 1 6.957497 6.957497 16685 +compilersystem 1 1 6.957497 6.957497 16686 +andinterprocedur 1 1 6.957497 6.957497 16687 +withfront 1 1 6.957497 6.957497 16688 +chamberswa 1 1 6.957497 6.957497 16689 +implementationsund 1 1 6.957497 6.957497 16690 +systemintegr 1 1 6.957497 6.957497 16691 +themodula 1 1 6.957497 6.957497 16692 +spinalso 1 1 6.957497 6.957497 16693 +grainedextens 1 1 6.957497 6.957497 16694 +researchproject 1 1 6.957497 6.957497 16695 +informationprof 1 1 6.957497 6.957497 16696 +chambersdepart 1 1 6.957497 6.957497 16697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..66654e18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +advanc 1 99 2.302585 2.302585 130 +associ 1 93 2.397895 2.397895 151 +graphic 1 90 2.397895 2.397895 147 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +involv 1 71 2.639057 2.639057 227 +logic 1 71 2.639057 2.639057 230 +workshop 1 71 2.639057 2.639057 239 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +room 1 59 2.833213 2.833213 301 +digit 1 52 2.995732 2.995732 348 +physic 1 47 3.091042 3.091042 377 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +paul 1 38 3.295837 3.295837 471 +field 1 37 3.332205 3.332205 482 +articl 1 33 3.433987 3.433987 530 +focu 1 30 3.555348 3.555348 571 +travel 1 30 3.555348 3.555348 579 +arrai 1 27 3.637586 3.637586 627 +ofwashington 1 22 3.850148 3.850148 766 +voic 1 21 3.912023 3.912023 806 +vlsi 1 21 3.912023 3.912023 795 +chip 1 21 3.912023 3.912023 770 +rout 1 21 3.912023 3.912023 793 +particularli 1 19 4.007333 4.007333 867 +aid 1 18 4.060443 4.060443 904 +carl 1 15 4.248495 4.248495 1024 +draw 1 14 4.317488 4.317488 1086 +circuit 1 13 4.382027 4.382027 1131 +carnegi 1 12 4.465908 4.465908 1260 +fpga 1 10 4.653960 4.653960 1433 +franklin 1 10 4.653960 4.653960 1436 +curv 1 8 4.875197 4.875197 1656 +chao 1 8 4.875197 4.875197 1753 +sensit 1 8 4.875197 4.875197 1726 +router 1 8 4.875197 4.875197 1772 +multicomput 1 7 5.010635 5.010635 1890 +northwest 1 7 5.010635 5.010635 1973 +densiti 1 7 5.010635 5.010635 1927 +southern 1 6 5.164786 5.164786 2191 +spline 1 6 5.164786 5.164786 2007 +gate 1 6 5.164786 5.164786 2182 +categori 1 5 5.347108 5.347108 2261 +darren 1 5 5.347108 5.347108 2565 +ebel 1 4 5.568345 5.568345 2756 +triptych 1 4 5.568345 5.568345 3061 +neil 1 4 5.568345 5.568345 2841 +theperform 1 3 5.857933 5.857933 3262 +cronquist 1 3 5.857933 5.857933 3942 +haswork 1 2 6.263398 6.263398 5182 +andsurfac 1 2 6.263398 6.263398 5735 +hei 1 2 6.263398 6.263398 5769 +latch 1 2 6.263398 6.263398 6034 +soha 1 2 6.263398 6.263398 6006 +hassoun 1 2 6.263398 6.263398 6007 +mckenzi 1 2 6.263398 6.263398 5974 +ebelingdepart 1 1 6.957497 6.957497 16698 +wheatoncolleg 1 1 6.957497 6.957497 16699 +illinoisunivers 1 1 6.957497 6.957497 16700 +mellonunivers 1 1 6.957497 6.957497 16701 +vlsiarchitectur 1 1 6.957497 6.957497 16702 +hitech 1 1 6.957497 6.957497 16703 +chessmachin 1 1 6.957497 6.957497 16704 +apex 1 1 6.957497 6.957497 16705 +routingnetwork 1 1 6.957497 6.957497 16706 +placementand 1 1 6.957497 6.957497 16707 +teachingspr 1 1 6.957497 6.957497 16708 +designoffic 1 1 6.957497 6.957497 16709 +fccm 1 1 6.957497 6.957497 16710 +napamai 1 1 6.957497 6.957497 16711 +burlington 1 1 6.957497 6.957497 16712 +chicagojun 1 1 6.957497 6.957497 16713 +vegasresearch 1 1 6.957497 6.957497 16714 +amara 1 1 6.957497 6.957497 16715 +galleryelan 1 1 6.957497 6.957497 16716 +galleryebel 1 1 6.957497 6.957497 16717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..ab1a6bc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +server 1 76 2.564949 2.564949 204 +new 1 64 2.772589 2.772589 262 +photo 1 31 3.496508 3.496508 561 +steve 1 29 3.583519 3.583519 594 +magazin 1 24 3.761200 3.761200 704 +tenni 1 20 3.951244 3.951244 838 +agent 1 18 4.060443 4.060443 910 +hank 1 12 4.465908 4.465908 1253 +uncertainti 1 7 5.010635 5.010635 1882 +restaur 1 6 5.164786 5.164786 2230 +seriou 1 5 5.347108 5.347108 2252 +carlo 1 5 5.347108 5.347108 2515 +maria 1 4 5.568345 5.568345 2954 +wine 1 3 5.857933 5.857933 3895 +hanksunivers 1 1 6.957497 6.957497 16718 +washingtondepart 1 1 6.957497 6.957497 16719 +architecturesai 1 1 6.957497 6.957497 16720 +symphoni 1 1 6.957497 6.957497 16721 +opera 1 1 6.957497 6.957497 16722 +edita 1 1 6.957497 6.957497 16723 +gruberova 1 1 6.957497 6.957497 16724 +giulini 1 1 6.957497 6.957497 16725 +discographi 1 1 6.957497 6.957497 16726 +sumac 1 1 6.957497 6.957497 16727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..5fdee874 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +take 1 97 2.302585 2.302585 134 +school 1 84 2.484907 2.484907 188 +intellig 1 72 2.639057 2.639057 225 +degre 1 69 2.708050 2.708050 259 +knowledg 1 67 2.708050 2.708050 243 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +understand 1 47 3.091042 3.091042 384 +math 1 44 3.135494 3.135494 402 +electr 1 38 3.295837 3.295837 461 +origin 1 38 3.295837 3.295837 472 +within 1 33 3.433987 3.433987 525 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +neural 1 30 3.555348 3.555348 578 +symbol 1 27 3.637586 3.637586 620 +spent 1 25 3.737670 3.737670 676 +initi 1 23 3.806662 3.806662 717 +corpor 1 21 3.912023 3.912023 802 +divis 1 21 3.912023 3.912023 803 +verif 1 20 3.951244 3.951244 826 +expert 1 20 3.951244 3.951244 833 +mostli 1 19 4.007333 4.007333 869 +aid 1 18 4.060443 4.060443 904 +speech 1 12 4.465908 4.465908 1222 +fellowship 1 10 4.653960 4.653960 1460 +yale 1 6 5.164786 5.164786 2003 +british 1 5 5.347108 5.347108 2546 +broadcast 1 5 5.347108 5.347108 2453 +began 1 5 5.347108 5.347108 2498 +scotland 1 4 5.568345 5.568345 3049 +withth 1 4 5.568345 5.568345 2805 +alistair 1 3 5.857933 5.857933 3315 +holden 1 3 5.857933 5.857933 3314 +london 1 3 5.857933 5.857933 3282 +imperi 1 2 6.263398 6.263398 5389 +highland 1 1 6.957497 6.957497 16728 +receivedhi 1 1 6.957497 6.957497 16729 +glasgow 1 1 6.957497 6.957497 16730 +graduateapprentic 1 1 6.957497 6.957497 16731 +edison 1 1 6.957497 6.957497 16732 +phddegre 1 1 6.957497 6.957497 16733 +learningin 1 1 6.957497 6.957497 16734 +coursefrom 1 1 6.957497 6.957497 16735 +colin 1 1 6.957497 6.957497 16736 +cherri 1 1 6.957497 6.957497 16737 +thebbc 1 1 6.957497 6.957497 16738 +theuw 1 1 6.957497 6.957497 16739 +departmentsform 1 1 6.957497 6.957497 16740 +netmethodolog 1 1 6.957497 6.957497 16741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..821a041e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +teach 1 108 2.197225 2.197225 112 +manag 1 114 2.197225 2.197225 125 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +member 1 84 2.484907 2.484907 165 +academ 1 82 2.484907 2.484907 178 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +nation 1 74 2.639057 2.639057 240 +effici 1 73 2.639057 2.639057 233 +complex 1 64 2.772589 2.772589 269 +improv 1 62 2.772589 2.772589 289 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +press 1 42 3.218876 3.218876 419 +societi 1 40 3.258097 3.258097 456 +theoret 1 39 3.258097 3.258097 446 +industri 1 38 3.295837 3.295837 464 +respons 1 37 3.332205 3.332205 476 +award 1 34 3.401197 3.401197 523 +random 1 34 3.401197 3.401197 511 +john 1 33 3.433987 3.433987 532 +board 1 33 3.433987 3.433987 528 +travel 1 30 3.555348 3.555348 579 +art 1 29 3.583519 3.583519 593 +chair 1 29 3.583519 3.583519 596 +held 1 28 3.610918 3.610918 600 +american 1 27 3.637586 3.637586 634 +berkelei 1 26 3.688879 3.688879 657 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +flow 1 24 3.761200 3.761200 700 +ofwashington 1 22 3.850148 3.850148 766 +reduc 1 22 3.850148 3.850148 759 +among 1 21 3.912023 3.912023 781 +miller 1 17 4.110874 4.110874 949 +match 1 16 4.174387 4.174387 965 +partit 1 16 4.174387 4.174387 984 +massachusett 1 14 4.317488 4.317488 1118 +america 1 11 4.553877 4.553877 1370 +probabilist 1 11 4.553877 4.553877 1343 +minimum 1 9 4.753590 4.753590 1555 +academi 1 8 4.875197 4.875197 1735 +combinatori 1 8 4.875197 4.875197 1629 +pennsylvania 1 7 5.010635 5.010635 1932 +perfect 1 7 5.010635 5.010635 1921 +prize 1 6 5.164786 5.164786 2150 +ture 1 6 5.164786 5.164786 1997 +advisori 1 6 5.164786 5.164786 2148 +plane 1 6 5.164786 5.164786 2187 +karp 1 5 5.347108 5.347108 2284 +weyl 1 4 5.568345 5.568345 2854 +technion 1 4 5.568345 5.568345 2856 +weizmann 1 4 5.568345 5.568345 2858 +combinator 1 4 5.568345 5.568345 2915 +theacm 1 4 5.568345 5.568345 2698 +medal 1 3 5.857933 5.857933 3912 +neumann 1 3 5.857933 5.857933 3720 +truste 1 3 5.857933 5.857933 3900 +combinatorica 1 3 5.857933 5.857933 3649 +ofoper 1 3 5.857933 5.857933 3292 +dick 1 2 6.263398 6.263398 5396 +sciencesmemb 1 2 6.263398 6.263398 5742 +engineeringfellow 1 2 6.263398 6.263398 4902 +sciencesfellow 1 2 6.263398 6.263398 4903 +appliedmathemat 1 2 6.263398 6.263398 5716 +honorari 1 2 6.263398 6.263398 5741 +georgetown 1 2 6.263398 6.263398 5667 +wigderson 1 2 6.263398 6.263398 6035 +fornetwork 1 2 6.263398 6.263398 5580 +edmond 1 2 6.263398 6.263398 4144 +plenum 1 2 6.263398 6.263398 6036 +karprichard 1 1 6.957497 6.957497 16744 +karpprofessor 1 1 6.957497 6.957497 16745 +ofcomputersci 1 1 6.957497 6.957497 16746 +andadjunct 1 1 6.957497 6.957497 16747 +ofmolecularbiotechnologyunivers 1 1 6.957497 6.957497 16748 +eduaward 1 1 6.957497 6.957497 16749 +membershipsn 1 1 6.957497 6.957497 16750 +babbag 1 1 6.957497 6.957497 16751 +sciencedistinguish 1 1 6.957497 6.957497 16752 +senat 1 1 6.957497 6.957497 16753 +berkeleylanchest 1 1 6.957497 6.957497 16754 +fulkerson 1 1 6.957497 6.957497 16755 +hermann 1 1 6.957497 6.957497 16756 +forsoci 1 1 6.957497 6.957497 16757 +presentmemb 1 1 6.957497 6.957497 16742 +governor 1 1 6.957497 6.957497 16758 +scienceinstitut 1 1 6.957497 6.957497 16759 +presentselect 1 1 6.957497 6.957497 16760 +turingaward 1 1 6.957497 6.957497 16761 +upfal 1 1 6.957497 6.957497 16762 +salesman 1 1 6.957497 6.957497 16743 +spanningtre 1 1 6.957497 6.957497 16763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..dd01b7be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,251 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +person 1 111 2.197225 2.197225 117 +version 1 113 2.197225 2.197225 122 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +grade 1 90 2.397895 2.397895 142 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +polici 1 64 2.772589 2.772589 279 +foundat 1 62 2.772589 2.772589 286 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +reason 1 57 2.890372 2.890372 318 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +talk 1 53 2.944439 2.944439 336 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +profession 1 51 2.995732 2.995732 345 +frequent 1 49 3.044522 3.044522 367 +visitor 1 49 3.044522 3.044522 371 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +execut 1 45 3.135494 3.135494 404 +review 1 42 3.218876 3.218876 425 +examin 1 42 3.218876 3.218876 424 +http 1 41 3.218876 3.218876 420 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +industri 1 38 3.295837 3.295837 464 +electr 1 38 3.295837 3.295837 461 +field 1 37 3.332205 3.332205 482 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +committe 1 34 3.401197 3.401197 522 +award 1 34 3.401197 3.401197 523 +board 1 33 3.433987 3.433987 528 +product 1 33 3.433987 3.433987 527 +chines 1 29 3.583519 3.583519 595 +chair 1 29 3.583519 3.583519 596 +intend 1 28 3.610918 3.610918 599 +berkelei 1 26 3.688879 3.688879 657 +concern 1 25 3.737670 3.737670 666 +doctor 1 24 3.761200 3.761200 709 +sometim 1 24 3.761200 3.761200 696 +miscellan 1 23 3.806662 3.806662 731 +famili 1 23 3.806662 3.806662 735 +director 1 22 3.850148 3.850148 767 +serv 1 22 3.850148 3.850148 758 +corpor 1 21 3.912023 3.912023 802 +fund 1 21 3.912023 3.912023 805 +theunivers 1 21 3.912023 3.912023 797 +hous 1 21 3.912023 3.912023 801 +region 1 19 4.007333 4.007333 875 +appropri 1 18 4.060443 4.060443 883 +seem 1 18 4.060443 4.060443 899 +lot 1 18 4.060443 4.060443 889 +stand 1 18 4.060443 4.060443 891 +record 1 18 4.060443 4.060443 890 +demo 1 18 4.060443 4.060443 888 +universityof 1 15 4.248495 4.248495 1061 +drive 1 15 4.248495 4.248495 1052 +club 1 15 4.248495 4.248495 1058 +trip 1 14 4.317488 4.317488 1113 +essenti 1 13 4.382027 4.382027 1137 +sigmetr 1 13 4.382027 4.382027 1173 +forth 1 13 4.382027 4.382027 1186 +speech 1 12 4.465908 4.465908 1222 +thedepart 1 11 4.553877 4.553877 1350 +host 1 11 4.553877 4.553877 1306 +council 1 11 4.553877 4.553877 1364 +player 1 11 4.553877 4.553877 1371 +cook 1 10 4.653960 4.653960 1464 +perspect 1 10 4.653960 4.653960 1437 +congress 1 9 4.753590 4.753590 1592 +pick 1 9 4.753590 4.753590 1498 +govern 1 9 4.753590 4.753590 1581 +telecommun 1 9 4.753590 4.753590 1565 +andth 1 9 4.753590 4.753590 1481 +vice 1 9 4.753590 4.753590 1604 +lane 1 8 4.875197 4.875197 1720 +mile 1 8 4.875197 4.875197 1743 +virginia 1 8 4.875197 4.875197 1659 +driver 1 8 4.875197 4.875197 1657 +centuri 1 7 5.010635 5.010635 1935 +surpris 1 7 5.010635 5.010635 1828 +molecular 1 7 5.010635 5.010635 1887 +advisori 1 6 5.164786 5.164786 2148 +ture 1 6 5.164786 5.164786 1997 +brook 1 6 5.164786 5.164786 2152 +deliv 1 6 5.164786 5.164786 2070 +highwai 1 6 5.164786 5.164786 2095 +presid 1 6 5.164786 5.164786 2196 +duke 1 6 5.164786 5.164786 2231 +lazowska 1 4 5.568345 5.568345 2694 +invent 1 4 5.568345 5.568345 3028 +push 1 4 5.568345 5.568345 2635 +andengin 1 4 5.568345 5.568345 3042 +machineri 1 4 5.568345 5.568345 2851 +rack 1 3 5.857933 5.857933 3176 +researchassoci 1 3 5.857933 5.857933 3664 +affair 1 3 5.857933 5.857933 3916 +belong 1 3 5.857933 5.857933 3797 +atstanford 1 3 5.857933 5.857933 3935 +hongkong 1 3 5.857933 5.857933 3677 +theimpact 1 3 5.857933 5.857933 3179 +uwcs 1 3 5.857933 5.857933 3977 +informationtechnolog 1 3 5.857933 5.857933 3836 +down 1 3 5.857933 5.857933 3870 +celebr 1 2 6.263398 6.263398 4946 +onthi 1 2 6.263398 6.263398 5357 +mbquicktim 1 2 6.263398 6.263398 5916 +advisorycommitte 1 2 6.263398 6.263398 6037 +ofdata 1 2 6.263398 6.263398 6038 +ventur 1 2 6.263398 6.263398 4938 +andha 1 2 6.263398 6.263398 5955 +theinstitut 1 2 6.263398 6.263398 6039 +hpcc 1 2 6.263398 6.263398 5832 +pagerec 1 2 6.263398 6.263398 6022 +ahalf 1 1 6.957497 6.957497 16764 +exponentialprogress 1 1 6.957497 6.957497 16765 +annualfaculti 1 1 6.957497 6.957497 16766 +vicepresid 1 1 6.957497 6.957497 16767 +gore 1 1 6.957497 6.957497 16768 +eniac 1 1 6.957497 6.957497 16769 +thanniversari 1 1 6.957497 6.957497 16770 +georgejetson 1 1 6.957497 6.957497 16775 +forfr 1 1 6.957497 6.957497 16776 +flintston 1 1 6.957497 6.957497 16777 +nathanmyhrvold 1 1 6.957497 6.957497 16771 +joinsedlazowska 1 1 6.957497 6.957497 16772 +theuwcs 1 1 6.957497 6.957497 16773 +mostlywearsti 1 1 6.957497 6.957497 16778 +flier 1 1 6.957497 6.957497 16779 +healso 1 1 6.957497 6.957497 16780 +havefunnynos 1 1 6.957497 6.957497 16781 +allgradu 1 1 6.957497 6.957497 16782 +laboratoriesin 1 1 6.957497 6.957497 16783 +ofcra 1 1 6.957497 6.957497 16784 +scomputersci 1 1 6.957497 6.957497 16785 +formicrosoft 1 1 6.957497 6.957497 16786 +personnationalsemiconductor 1 1 6.957497 6.957497 16787 +academicadvisori 1 1 6.957497 6.957497 16788 +forcabl 1 1 6.957497 6.957497 16789 +hows 1 1 6.957497 6.957497 16790 +cascadia 1 1 6.957497 6.957497 16791 +committeesfor 1 1 6.957497 6.957497 16792 +eecsat 1 1 6.957497 6.957497 16793 +councilpanel 1 1 6.957497 6.957497 16794 +agencyhigh 1 1 6.957497 6.957497 16795 +computingand 1 1 6.957497 6.957497 16796 +sutherland 1 1 6.957497 6.957497 16797 +examinersfor 1 1 6.957497 6.957497 16798 +sspecial 1 1 6.957497 6.957497 16799 +chairof 1 1 6.957497 6.957497 16800 +andeditor 1 1 6.957497 6.957497 16801 +servinga 1 1 6.957497 6.957497 16802 +onacadem 1 1 6.957497 6.957497 16803 +thecommitte 1 1 6.957497 6.957497 16804 +deanship 1 1 6.957497 6.957497 16805 +artsand 1 1 6.957497 6.957497 16806 +biotechnolog 1 1 6.957497 6.957497 16807 +amemb 1 1 6.957497 6.957497 16808 +deanof 1 1 6.957497 6.957497 16809 +fellowof 1 1 6.957497 6.957497 16810 +associationfor 1 1 6.957497 6.957497 16811 +andelectron 1 1 6.957497 6.957497 16812 +seventeenph 1 1 6.957497 6.957497 16813 +studentshav 1 1 6.957497 6.957497 16814 +integratedoverview 1 1 6.957497 6.957497 16815 +apersuas 1 1 6.957497 6.957497 16816 +forloc 1 1 6.957497 6.957497 16817 +consumpt 1 1 6.957497 6.957497 16818 +persuas 1 1 6.957497 6.957497 16819 +playertopten 1 1 6.957497 6.957497 16820 +csebuild 1 1 6.957497 6.957497 16821 +abbrevi 1 1 6.957497 6.957497 16822 +cvcomputingresearch 1 1 6.957497 6.957497 16823 +forwardmassi 1 1 6.957497 6.957497 16824 +goldmanreport 1 1 6.957497 6.957497 16825 +alleg 1 1 6.957497 6.957497 16826 +cseph 1 1 6.957497 6.957497 16827 +flaw 1 1 6.957497 6.957497 16828 +medianyear 1 1 6.957497 6.957497 16829 +boardstudi 1 1 6.957497 6.957497 16830 +saturdayseminar 1 1 6.957497 6.957497 16831 +testimonyto 1 1 6.957497 6.957497 16774 +houseappropri 1 1 6.957497 6.957497 16832 +interestinghom 1 1 6.957497 6.957497 16833 +odeto 1 1 6.957497 6.957497 16834 +tallman 1 1 6.957497 6.957497 16835 +trask 1 1 6.957497 6.957497 16836 +departsfor 1 1 6.957497 6.957497 16837 +lanelazowska 1 1 6.957497 6.957497 16838 +pagedirect 1 1 6.957497 6.957497 16839 +houseshilshol 1 1 6.957497 6.957497 16840 +aquat 1 1 6.957497 6.957497 16841 +discoveredreview 1 1 6.957497 6.957497 16842 +poetryfing 1 1 6.957497 6.957497 16843 +scheduleinform 1 1 6.957497 6.957497 16844 +reflector 1 1 6.957497 6.957497 16845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..5730afe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +professor 1 137 1.945910 1.945910 76 +document 1 121 2.079442 2.079442 89 +mathemat 1 108 2.197225 2.197225 123 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +book 1 99 2.302585 2.302585 131 +text 1 98 2.302585 2.302585 133 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +method 1 80 2.564949 2.564949 213 +good 1 77 2.564949 2.564949 200 +addit 1 74 2.639057 2.639057 228 +visit 1 63 2.772589 2.772589 288 +physic 1 47 3.091042 3.091042 377 +textbook 1 44 3.135494 3.135494 397 +linear 1 41 3.218876 3.218876 431 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +transact 1 39 3.258097 3.258097 438 +live 1 40 3.258097 3.258097 451 +committe 1 34 3.401197 3.401197 522 +award 1 34 3.401197 3.401197 523 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +serv 1 22 3.850148 3.850148 758 +corpor 1 21 3.912023 3.912023 802 +among 1 21 3.912023 3.912023 781 +half 1 21 3.912023 3.912023 776 +prepar 1 20 3.951244 3.951244 824 +supervis 1 20 3.951244 3.951244 840 +tenni 1 20 3.951244 3.951244 838 +bachelor 1 17 4.110874 4.110874 957 +stanford 1 17 4.110874 4.110874 955 +former 1 17 4.110874 4.110874 956 +hobbi 1 16 4.174387 4.174387 1009 +atth 1 15 4.248495 4.248495 1019 +incomput 1 14 4.317488 4.317488 1096 +alan 1 13 4.382027 4.382027 1146 +guest 1 12 4.465908 4.465908 1220 +food 1 12 4.465908 4.465908 1285 +distinguish 1 11 4.553877 4.553877 1357 +bike 1 10 4.653960 4.653960 1468 +introductori 1 9 4.753590 4.753590 1479 +editori 1 9 4.753590 4.753590 1611 +toronto 1 6 5.164786 5.164786 2156 +scholar 1 6 5.164786 5.164786 2180 +pari 1 6 5.164786 5.164786 2158 +softwareengin 1 6 5.164786 5.164786 2162 +hike 1 6 5.164786 5.164786 2234 +these 1 5 5.347108 5.347108 2482 +hasbeen 1 4 5.568345 5.568345 2661 +fulbright 1 4 5.568345 5.568345 2963 +amast 1 3 5.857933 5.857933 3955 +informat 1 3 5.857933 5.857933 3839 +zurich 1 3 5.857933 5.857933 3550 +memberof 1 3 5.857933 5.857933 3169 +trumpet 1 3 5.857933 5.857933 3946 +sdegre 1 2 6.263398 6.263398 6040 +acceler 1 2 6.263398 6.263398 5411 +fifteen 1 2 6.263398 6.263398 5399 +shaw 1 1 6.957497 6.957497 16846 +facultyappoint 1 1 6.957497 6.957497 16847 +theibm 1 1 6.957497 6.957497 16848 +publicationsinclud 1 1 6.957497 6.957497 16849 +andan 1 1 6.957497 6.957497 16850 +sciencescreen 1 1 6.957497 6.957497 16851 +associateeditor 1 1 6.957497 6.957497 16852 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..7000e67d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +parallel 1 169 1.791759 1.791759 60 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +mathemat 1 108 2.197225 2.197225 123 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +nation 1 74 2.639057 2.639057 240 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +receiv 1 66 2.708050 2.708050 244 +visit 1 63 2.772589 2.772589 288 +polici 1 64 2.772589 2.772589 279 +guid 1 63 2.772589 2.772589 267 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +investig 1 51 2.995732 2.995732 353 +numer 1 49 3.044522 3.044522 369 +quarter 1 47 3.091042 3.091042 389 +editor 1 41 3.218876 3.218876 433 +futur 1 41 3.218876 3.218876 427 +join 1 39 3.258097 3.258097 457 +transact 1 39 3.258097 3.258097 438 +ofth 1 36 3.367296 3.367296 491 +singl 1 34 3.401197 3.401197 510 +committe 1 34 3.401197 3.401197 522 +award 1 34 3.401197 3.401197 523 +titl 1 31 3.496508 3.496508 556 +computersci 1 30 3.555348 3.555348 562 +rang 1 30 3.555348 3.555348 565 +particip 1 29 3.583519 3.583519 589 +chair 1 29 3.583519 3.583519 596 +doctor 1 24 3.761200 3.761200 709 +proof 1 23 3.806662 3.806662 720 +highli 1 23 3.806662 3.806662 725 +serv 1 22 3.850148 3.850148 758 +properti 1 22 3.850148 3.850148 749 +chip 1 21 3.912023 3.912023 770 +divis 1 21 3.912023 3.912023 803 +bachelor 1 17 4.110874 4.110874 957 +configur 1 15 4.248495 4.248495 1012 +econom 1 13 4.382027 4.382027 1184 +mellon 1 13 4.382027 4.382027 1179 +carnegi 1 12 4.465908 4.465908 1260 +onth 1 12 4.465908 4.465908 1218 +perman 1 11 4.553877 4.553877 1372 +distinguish 1 11 4.553877 4.553877 1357 +purdu 1 10 4.653960 4.653960 1466 +andcomput 1 8 4.875197 4.875197 1623 +lawrenc 1 7 5.010635 5.010635 1908 +iowa 1 7 5.010635 5.010635 1971 +harvard 1 7 5.010635 5.010635 1926 +microprocessor 1 7 5.010635 5.010635 1808 +scholar 1 6 5.164786 5.164786 2180 +yale 1 6 5.164786 5.164786 2003 +blue 1 6 5.164786 5.164786 2227 +snyder 1 5 5.347108 5.347108 2359 +chaoticrout 1 4 5.568345 5.568345 3063 +anddistribut 1 4 5.568345 5.568345 3031 +algorithmsand 1 4 5.568345 5.568345 2680 +cmo 1 3 5.857933 5.857933 3992 +inventor 1 3 5.857933 5.857933 3695 +orca 1 3 5.857933 5.857933 3578 +dozen 1 3 5.857933 5.857933 3905 +sdegre 1 2 6.263398 6.263398 6040 +developmentof 1 2 6.263398 6.263398 6041 +hors 1 2 6.263398 6.263398 5348 +advisorycommitte 1 2 6.263398 6.263398 6037 +andin 1 1 6.957497 6.957497 16853 +scholarat 1 1 6.957497 6.957497 16854 +theundecid 1 1 6.957497 6.957497 16855 +hecreat 1 1 6.957497 6.957497 16856 +thepok 1 1 6.957497 6.957497 16857 +nowprincip 1 1 6.957497 6.957497 16858 +nwli 1 1 6.957497 6.957497 16859 +computerand 1 1 6.957497 6.957497 16860 +foundationadvisori 1 1 6.957497 6.957497 16861 +doctoraldissert 1 1 6.957497 6.957497 16862 +degreesund 1 1 6.957497 6.957497 16863 +seniorproject 1 1 6.957497 6.957497 16864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..5eb061e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +design 1 213 1.386294 1.386294 25 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +memori 1 101 2.302585 2.302585 139 +control 1 82 2.484907 2.484907 164 +issu 1 78 2.564949 2.564949 211 +involv 1 71 2.639057 2.639057 227 +degre 1 69 2.708050 2.708050 259 +integr 1 67 2.708050 2.708050 245 +evalu 1 64 2.772589 2.772589 266 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +tabl 1 51 2.995732 2.995732 346 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +toler 1 33 3.433987 3.433987 533 +india 1 32 3.465736 3.465736 550 +fault 1 32 3.465736 3.465736 547 +multiprocessor 1 28 3.610918 3.610918 605 +period 1 22 3.850148 3.850148 743 +indian 1 22 3.850148 3.850148 769 +tenni 1 20 3.951244 3.951244 838 +interconnect 1 17 4.110874 4.110874 937 +canada 1 13 4.382027 4.382027 1158 +food 1 12 4.465908 4.465908 1285 +prior 1 10 4.653960 4.653960 1438 +grain 1 10 4.653960 4.653960 1448 +cook 1 10 4.653960 4.653960 1464 +respect 1 9 4.753590 4.753590 1545 +classif 1 9 4.753590 4.753590 1586 +bridg 1 8 4.875197 4.875197 1764 +earn 1 7 5.010635 5.010635 1788 +montreal 1 7 5.010635 5.010635 1961 +hike 1 6 5.164786 5.164786 2234 +delhi 1 5 5.347108 5.347108 2530 +anti 1 5 5.347108 5.347108 2434 +arun 1 4 5.568345 5.568345 2736 +redund 1 4 5.568345 5.568345 2839 +congest 1 3 5.857933 5.857933 3993 +reconfigur 1 3 5.857933 5.857933 3556 +somani 1 2 6.263398 6.263398 4432 +submarin 1 2 6.263398 6.263398 6018 +warfar 1 2 6.263398 6.263398 4910 +navi 1 2 6.263398 6.263398 5155 +msee 1 1 6.957497 6.957497 16866 +mcgill 1 1 6.957497 6.957497 16867 +govt 1 1 6.957497 6.957497 16868 +offault 1 1 6.957497 6.957497 16869 +tocach 1 1 6.957497 6.957497 16870 +broadband 1 1 6.957497 6.957497 16871 +proteu 1 1 6.957497 6.957497 16865 +generalizedenhanc 1 1 6.957497 6.957497 16872 +hypercub 1 1 6.957497 6.957497 16873 +coars 1 1 6.957497 6.957497 16874 +dpcnl 1 1 6.957497 6.957497 16875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..8fc87145 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +take 1 97 2.302585 2.302585 134 +book 1 99 2.302585 2.302585 131 +sinc 1 90 2.397895 2.397895 159 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +grade 1 90 2.397895 2.397895 142 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +june 1 79 2.564949 2.564949 214 +addit 1 74 2.639057 2.639057 228 +intellig 1 72 2.639057 2.639057 225 +meet 1 72 2.639057 2.639057 229 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +visit 1 63 2.772589 2.772589 288 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +organ 1 65 2.772589 2.772589 265 +polici 1 64 2.772589 2.772589 279 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +faculti 1 56 2.890372 2.890372 325 +think 1 57 2.890372 2.890372 314 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +processor 1 54 2.944439 2.944439 335 +visual 1 48 3.044522 3.044522 372 +numer 1 49 3.044522 3.044522 369 +understand 1 47 3.091042 3.091042 384 +textbook 1 44 3.135494 3.135494 397 +edit 1 42 3.218876 3.218876 418 +vision 1 41 3.218876 3.218876 430 +editor 1 41 3.218876 3.218876 433 +music 1 42 3.218876 3.218876 436 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +author 1 39 3.258097 3.258097 450 +societi 1 40 3.258097 3.258097 456 +committe 1 34 3.401197 3.401197 522 +scientist 1 31 3.496508 3.496508 560 +common 1 30 3.555348 3.555348 574 +chair 1 29 3.583519 3.583519 596 +steve 1 29 3.583519 3.583519 594 +held 1 28 3.610918 3.610918 600 +subject 1 26 3.688879 3.688879 647 +enjoi 1 26 3.688879 3.688879 660 +pattern 1 24 3.761200 3.761200 689 +fellow 1 24 3.761200 3.761200 701 +recognit 1 23 3.806662 3.806662 723 +serv 1 22 3.850148 3.850148 758 +theunivers 1 21 3.912023 3.912023 797 +corpor 1 21 3.912023 3.912023 802 +particularli 1 19 4.007333 4.007333 867 +element 1 18 4.060443 4.060443 895 +lisp 1 18 4.060443 4.060443 897 +steven 1 17 4.110874 4.110874 953 +cambridg 1 16 4.174387 4.174387 1008 +princeton 1 15 4.248495 4.248495 1042 +atth 1 15 4.248495 4.248495 1019 +massachusett 1 14 4.317488 4.317488 1118 +whose 1 13 4.382027 4.382027 1166 +franc 1 12 4.465908 4.465908 1276 +outsid 1 12 4.465908 4.465908 1219 +motiv 1 11 4.553877 4.553877 1346 +council 1 11 4.553877 4.553877 1364 +tanimoto 1 10 4.653960 4.653960 1429 +entitl 1 9 4.753590 4.753590 1490 +conferenceon 1 9 4.753590 4.753590 1595 +vice 1 9 4.753590 4.753590 1604 +japan 1 8 4.875197 4.875197 1762 +elect 1 8 4.875197 4.875197 1771 +sweden 1 7 5.010635 5.010635 1885 +chief 1 7 5.010635 5.010635 1829 +pari 1 6 5.164786 5.164786 2158 +scholar 1 6 5.164786 5.164786 2180 +sponsor 1 6 5.164786 5.164786 2133 +piano 1 6 5.164786 5.164786 2201 +anda 1 5 5.347108 5.347108 2416 +ofparallel 1 5 5.347108 5.347108 2380 +steer 1 5 5.347108 5.347108 2328 +jazz 1 5 5.347108 5.347108 2527 +devot 1 4 5.568345 5.568345 2711 +coauthor 1 4 5.568345 5.568345 3064 +electricalengin 1 3 5.857933 5.857933 3987 +chairman 1 3 5.857933 5.857933 3991 +adjunct 1 2 6.263398 6.263398 6033 +theinstitut 1 2 6.263398 6.263398 6039 +internationalworkshop 1 2 6.263398 6.263398 5012 +bergen 1 2 6.263398 6.263398 5991 +norwai 1 2 6.263398 6.263398 4908 +programcommitte 1 2 6.263398 6.263398 6042 +theieee 1 2 6.263398 6.263398 6043 +ieeetransact 1 2 6.263398 6.263398 4315 +andclass 1 2 6.263398 6.263398 4330 +fromharvard 1 1 6.957497 6.957497 16877 +connecticut 1 1 6.957497 6.957497 16878 +professorat 1 1 6.957497 6.957497 16879 +linkp 1 1 6.957497 6.957497 16876 +hasalso 1 1 6.957497 6.957497 16880 +atkob 1 1 6.957497 6.957497 16881 +enseign 1 1 6.957497 6.957497 16882 +superieur 1 1 6.957497 6.957497 16883 +techniquesd 1 1 6.957497 6.957497 16884 +electroniqu 1 1 6.957497 6.957497 16885 +irest 1 1 6.957497 6.957497 16886 +nant 1 1 6.957497 6.957497 16887 +hasrec 1 1 6.957497 6.957497 16888 +forimag 1 1 6.957497 6.957497 16889 +processingand 1 1 6.957497 6.957497 16890 +bordeaux 1 1 6.957497 6.957497 16891 +ofimag 1 1 6.957497 6.957497 16892 +currentlydirect 1 1 6.957497 6.957497 16893 +throughimag 1 1 6.957497 6.957497 16894 +softwarethat 1 1 6.957497 6.957497 16895 +thebook 1 1 6.957497 6.957497 16896 +introductionus 1 1 6.957497 6.957497 16897 +accompanyingsoftwar 1 1 6.957497 6.957497 16898 +serveda 1 1 6.957497 6.957497 16899 +subconfer 1 1 6.957497 6.957497 16900 +patternrecognit 1 1 6.957497 6.957497 16901 +societyworkshop 1 1 6.957497 6.957497 16902 +machineintellig 1 1 6.957497 6.957497 16903 +symposiaon 1 1 6.957497 6.957497 16904 +editorialboard 1 1 6.957497 6.957497 16905 +cvgip 1 1 6.957497 6.957497 16906 +engineeringeduc 1 1 6.957497 6.957497 16907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..35f681d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +read 1 154 1.791759 1.791759 47 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +structur 1 106 2.197225 2.197225 105 +associ 1 93 2.397895 2.397895 151 +question 1 91 2.397895 2.397895 141 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +state 1 76 2.564949 2.564949 207 +issu 1 78 2.564949 2.564949 211 +complet 1 77 2.564949 2.564949 208 +nation 1 74 2.639057 2.639057 240 +receiv 1 66 2.708050 2.708050 244 +foundat 1 62 2.772589 2.772589 286 +visit 1 63 2.772589 2.772589 288 +complex 1 64 2.772589 2.772589 269 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +sever 1 56 2.890372 2.890372 322 +three 1 54 2.944439 2.944439 330 +california 1 46 3.091042 3.091042 388 +textbook 1 44 3.135494 3.135494 397 +theoret 1 39 3.258097 3.258097 446 +author 1 39 3.258097 3.258097 450 +societi 1 40 3.258097 3.258097 456 +paul 1 38 3.295837 3.295837 471 +industri 1 38 3.295837 3.295837 464 +connect 1 37 3.332205 3.332205 485 +ofth 1 36 3.367296 3.367296 491 +committe 1 34 3.401197 3.401197 522 +taught 1 33 3.433987 3.433987 526 +board 1 33 3.433987 3.433987 528 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +chair 1 29 3.583519 3.583519 596 +berkelei 1 26 3.688879 3.688879 657 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +serv 1 22 3.850148 3.850148 758 +emphasi 1 22 3.850148 3.850148 755 +unit 1 21 3.912023 3.912023 779 +theunivers 1 21 3.912023 3.912023 797 +divis 1 21 3.912023 3.912023 803 +facil 1 20 3.951244 3.951244 814 +histori 1 19 4.007333 4.007333 853 +young 1 16 4.174387 4.174387 991 +earli 1 16 4.174387 4.174387 968 +becam 1 14 4.317488 4.317488 1117 +dean 1 14 4.317488 4.317488 1104 +employ 1 12 4.465908 4.465908 1291 +eight 1 11 4.553877 4.553877 1331 +ofcomput 1 10 4.653960 4.653960 1442 +vice 1 9 4.753590 4.753590 1604 +editori 1 9 4.753590 4.753590 1611 +hold 1 8 4.875197 4.875197 1645 +foc 1 7 5.010635 5.010635 1880 +reed 1 6 5.164786 5.164786 2086 +symposiumon 1 6 5.164786 5.164786 2054 +sigact 1 6 5.164786 5.164786 2212 +chosen 1 6 5.164786 5.164786 1984 +twice 1 4 5.568345 5.568345 2614 +coauthor 1 4 5.568345 5.568345 3064 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +gone 1 4 5.568345 5.568345 3072 +atstanford 1 3 5.857933 5.857933 3935 +briefli 1 3 5.857933 5.857933 3459 +thegener 1 3 5.857933 5.857933 3648 +mathematicallog 1 3 5.857933 5.857933 3796 +chairman 1 3 5.857933 5.857933 3991 +eleven 1 3 5.857933 5.857933 3824 +postdoctor 1 2 6.263398 6.263398 5059 +mexico 1 2 6.263398 6.263398 6044 +nomin 1 2 6.263398 6.263398 5758 +programcommitte 1 2 6.263398 6.263398 6042 +annal 1 2 6.263398 6.263398 4912 +underprofessor 1 2 6.263398 6.263398 6045 +ratherthan 1 2 6.263398 6.263398 6046 +graduateof 1 1 6.957497 6.957497 16908 +antioch 1 1 6.957497 6.957497 16909 +hejoin 1 1 6.957497 6.957497 16910 +seventeen 1 1 6.957497 6.957497 16911 +atpurdu 1 1 6.957497 6.957497 16912 +inperhap 1 1 6.957497 6.957497 16913 +aschairman 1 1 6.957497 6.957497 16914 +professorin 1 1 6.957497 6.957497 16915 +iscoauthor 1 1 6.957497 6.957497 16916 +executivecommitte 1 1 6.957497 6.957497 16917 +interestgroup 1 1 6.957497 6.957497 16918 +chairmanof 1 1 6.957497 6.957497 16919 +annualsymposium 1 1 6.957497 6.957497 16920 +hasserv 1 1 6.957497 6.957497 16921 +stechnic 1 1 6.957497 6.957497 16922 +advisorysubcommitte 1 1 6.957497 6.957497 16923 +thiscommitte 1 1 6.957497 6.957497 16924 +formallog 1 1 6.957497 6.957497 16925 +dopostdoctor 1 1 6.957497 6.957497 16926 +ofcalifornia 1 1 6.957497 6.957497 16927 +avarieti 1 1 6.957497 6.957497 16928 +leather 1 1 6.957497 6.957497 16929 +motorcycl 1 1 6.957497 6.957497 16930 +jacket 1 1 6.957497 6.957497 16931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..86c09ebc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +techniqu 1 99 2.302585 2.302585 138 +real 1 93 2.397895 2.397895 144 +chang 1 82 2.484907 2.484907 163 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +interfac 1 79 2.564949 2.564949 209 +involv 1 71 2.639057 2.639057 227 +receiv 1 66 2.708050 2.708050 244 +polici 1 64 2.772589 2.772589 279 +written 1 63 2.772589 2.772589 278 +allow 1 53 2.944439 2.944439 333 +video 1 44 3.135494 3.135494 405 +continu 1 39 3.258097 3.258097 448 +survei 1 35 3.401197 3.401197 513 +john 1 33 3.433987 3.433987 532 +board 1 33 3.433987 3.433987 528 +titl 1 31 3.496508 3.496508 556 +focu 1 30 3.555348 3.555348 571 +platform 1 29 3.583519 3.583519 591 +load 1 28 3.610918 3.610918 601 +intend 1 28 3.610918 3.610918 599 +primari 1 25 3.737670 3.737670 669 +mobil 1 23 3.806662 3.806662 730 +sequenti 1 22 3.850148 3.850148 745 +runtim 1 19 4.007333 4.007333 858 +young 1 16 4.174387 4.174387 991 +audio 1 14 4.317488 4.317488 1094 +easili 1 14 4.317488 4.317488 1077 +editori 1 9 4.753590 4.753590 1611 +presidenti 1 8 4.875197 4.875197 1737 +supportfor 1 7 5.010635 5.010635 1854 +thegoal 1 6 5.164786 5.164786 2033 +zahorjan 1 3 5.857933 5.857933 3383 +formobil 1 3 5.857933 5.857933 3261 +parallelsystem 1 2 6.263398 6.263398 5746 +exhibit 1 2 6.263398 6.263398 5529 +frombrown 1 1 6.957497 6.957497 16932 +oftoronto 1 1 6.957497 6.957497 16933 +investigatoraward 1 1 6.957497 6.957497 16934 +mediaappl 1 1 6.957497 6.957497 16935 +torespond 1 1 6.957497 6.957497 16936 +parallelizationof 1 1 6.957497 6.957497 16937 +bothcontrol 1 1 6.957497 6.957497 16938 +transactionson 1 1 6.957497 6.957497 16939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..54bfbcd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +version 1 113 2.197225 2.197225 122 +teach 1 108 2.197225 2.197225 112 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +technic 1 100 2.302585 2.302585 140 +imag 1 91 2.397895 2.397895 161 +learn 1 86 2.484907 2.484907 170 +help 1 83 2.484907 2.484907 175 +activ 1 84 2.484907 2.484907 182 +educ 1 86 2.484907 2.484907 191 +environ 1 84 2.484907 2.484907 177 +thing 1 84 2.484907 2.484907 189 +exampl 1 77 2.564949 2.564949 195 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +involv 1 71 2.639057 2.639057 227 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +tuesdai 1 73 2.639057 2.639057 219 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +window 1 68 2.708050 2.708050 242 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +collect 1 65 2.772589 2.772589 268 +creat 1 63 2.772589 2.772589 277 +foundat 1 62 2.772589 2.772589 286 +plai 1 60 2.833213 2.833213 307 +variou 1 56 2.890372 2.890372 317 +explor 1 58 2.890372 2.890372 324 +direct 1 57 2.890372 2.890372 316 +allow 1 53 2.944439 2.944439 333 +februari 1 54 2.944439 2.944439 328 +digit 1 52 2.995732 2.995732 348 +particular 1 51 2.995732 2.995732 352 +approach 1 48 3.044522 3.044522 366 +visual 1 48 3.044522 3.044522 372 +done 1 47 3.091042 3.091042 381 +effect 1 46 3.091042 3.091042 385 +discuss 1 45 3.135494 3.135494 399 +keep 1 44 3.135494 3.135494 409 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +littl 1 39 3.258097 3.258097 454 +open 1 38 3.295837 3.295837 469 +microsoft 1 38 3.295837 3.295837 468 +close 1 38 3.295837 3.295837 465 +seminar 1 38 3.295837 3.295837 470 +winter 1 36 3.367296 3.367296 500 +curriculum 1 33 3.433987 3.433987 535 +transform 1 32 3.465736 3.465736 542 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +hard 1 30 3.555348 3.555348 563 +common 1 30 3.555348 3.555348 574 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +framework 1 28 3.610918 3.610918 606 +manipul 1 27 3.637586 3.637586 624 +rather 1 26 3.688879 3.688879 642 +experiment 1 26 3.688879 3.688879 645 +todai 1 25 3.737670 3.737670 672 +seri 1 24 3.761200 3.761200 708 +demonstr 1 24 3.761200 3.761200 694 +togeth 1 23 3.806662 3.806662 714 +director 1 22 3.850148 3.850148 767 +disk 1 22 3.850148 3.850148 747 +instal 1 22 3.850148 3.850148 754 +encourag 1 18 4.060443 4.060443 880 +record 1 18 4.060443 4.060443 890 +lisp 1 18 4.060443 4.060443 897 +steven 1 17 4.110874 4.110874 953 +macintosh 1 17 4.110874 4.110874 920 +choic 1 16 4.174387 4.174387 979 +took 1 16 4.174387 4.174387 1010 +role 1 14 4.317488 4.317488 1101 +primarili 1 13 4.382027 4.382027 1185 +forth 1 13 4.382027 4.382027 1186 +essenti 1 13 4.382027 4.382027 1137 +calcul 1 12 4.465908 4.465908 1268 +neat 1 12 4.465908 4.465908 1263 +appl 1 11 4.553877 4.553877 1303 +tanimoto 1 10 4.653960 4.653960 1429 +subset 1 10 4.653960 4.653960 1425 +thecomput 1 10 4.653960 4.653960 1408 +end 1 9 4.753590 4.753590 1567 +successfulli 1 7 5.010635 5.010635 1869 +classroom 1 6 5.164786 5.164786 2006 +pentium 1 6 5.164786 5.164786 2077 +put 1 6 5.164786 5.164786 2017 +volunt 1 5 5.347108 5.347108 2307 +own 1 5 5.347108 5.347108 2531 +emphas 1 4 5.568345 5.568345 2672 +exploratori 1 4 5.568345 5.568345 3073 +pixel 1 4 5.568345 5.568345 2831 +prospect 1 4 5.568345 5.568345 3013 +witha 1 4 5.568345 5.568345 2617 +bricker 1 4 5.568345 5.568345 3050 +metip 1 3 5.857933 5.857933 3937 +teacher 1 3 5.857933 5.857933 3892 +alsoavail 1 3 5.857933 5.857933 3887 +newapproach 1 2 6.263398 6.263398 6047 +pursuit 1 2 6.263398 6.263398 6048 +portrai 1 2 6.263398 6.263398 5386 +theseobject 1 1 6.957497 6.957497 16941 +applicationsdesign 1 1 6.957497 6.957497 16942 +enrich 1 1 6.957497 6.957497 16943 +astandard 1 1 6.957497 6.957497 16944 +withthes 1 1 6.957497 6.957497 16945 +catalyz 1 1 6.957497 6.957497 16946 +bylead 1 1 6.957497 6.957497 16947 +theconcept 1 1 6.957497 6.957497 16948 +toexplor 1 1 6.957497 6.957497 16949 +warper 1 1 6.957497 6.957497 16950 +srun 1 1 6.957497 6.957497 16951 +mathematicsteach 1 1 6.957497 6.957497 16952 +transcriptproject 1 1 6.957497 6.957497 16953 +willfacilit 1 1 6.957497 6.957497 16954 +academicinform 1 1 6.957497 6.957497 16955 +floppi 1 1 6.957497 6.957497 16956 +themetip 1 1 6.957497 6.957497 16957 +ofmultiplay 1 1 6.957497 6.957497 16958 +tointegr 1 1 6.957497 6.957497 16959 +itsxform 1 1 6.957497 6.957497 16960 +somethingfun 1 1 6.957497 6.957497 16961 +xform 1 1 6.957497 6.957497 16940 +beenput 1 1 6.957497 6.957497 16962 +fundamentalattract 1 1 6.957497 6.957497 16963 +digitalimag 1 1 6.957497 6.957497 16964 +discussteach 1 1 6.957497 6.957497 16965 +undergr 1 1 6.957497 6.957497 16966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..1844541c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +applic 1 170 1.791759 1.791759 56 +manag 1 114 2.197225 2.197225 125 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +prof 1 64 2.772589 2.772589 273 +overview 1 56 2.890372 2.890372 323 +describ 1 45 3.135494 3.135494 400 +transact 1 39 3.258097 3.258097 438 +brian 1 38 3.295837 3.295837 466 +field 1 37 3.332205 3.332205 482 +survei 1 35 3.401197 3.401197 513 +john 1 33 3.433987 3.433987 532 +graph 1 30 3.555348 3.555348 576 +challeng 1 26 3.688879 3.688879 653 +task 1 25 3.737670 3.737670 678 +fundament 1 25 3.737670 3.737670 661 +mobil 1 23 3.806662 3.806662 730 +variabl 1 23 3.806662 3.806662 715 +methodolog 1 23 3.806662 3.806662 733 +infrastructur 1 12 4.465908 4.465908 1234 +hank 1 12 4.465908 4.465908 1253 +gaetano 1 6 5.164786 5.164786 2068 +wireless 1 4 5.568345 5.568345 2693 +disconnect 1 4 5.568345 5.568345 2664 +mobisa 1 3 5.857933 5.857933 3927 +mobilecomput 1 3 5.857933 5.857933 3629 +zahorjan 1 3 5.857933 5.857933 3383 +ubiquit 1 2 6.263398 6.263398 6049 +computingresearch 1 2 6.263398 6.263398 5957 +cope 1 2 6.263398 6.263398 6050 +forman 1 2 6.263398 6.263398 5904 +washingtonher 1 1 6.957497 6.957497 16967 +handheld 1 1 6.957497 6.957497 16968 +operationdistribut 1 1 6.957497 6.957497 16969 +systemcontact 1 1 6.957497 6.957497 16970 +bershadprof 1 1 6.957497 6.957497 16971 +borriellomarc 1 1 6.957497 6.957497 16972 +fiuczynskigeorg 1 1 6.957497 6.957497 16973 +formanprof 1 1 6.957497 6.957497 16974 +levygeoff 1 1 6.957497 6.957497 16975 +voelkerterri 1 1 6.957497 6.957497 16976 +watsonprof 1 1 6.957497 6.957497 16977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..f00db6d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +level 1 87 2.484907 2.484907 180 +member 1 84 2.484907 2.484907 165 +issu 1 78 2.564949 2.564949 211 +orient 1 80 2.564949 2.564949 205 +method 1 80 2.564949 2.564949 213 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +messag 1 76 2.564949 2.564949 212 +sourc 1 77 2.564949 2.564949 201 +java 1 70 2.708050 2.708050 248 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +august 1 66 2.708050 2.708050 257 +guid 1 63 2.772589 2.772589 267 +written 1 63 2.772589 2.772589 278 +simpl 1 60 2.833213 2.833213 298 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +extens 1 53 2.944439 2.944439 340 +sampl 1 53 2.944439 2.944439 339 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +pointer 1 48 3.044522 3.044522 368 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +past 1 42 3.218876 3.218876 428 +form 1 39 3.258097 3.258097 443 +prototyp 1 38 3.295837 3.295837 463 +procedur 1 36 3.367296 3.367296 488 +either 1 35 3.401197 3.401197 506 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +intend 1 28 3.610918 3.610918 599 +releas 1 28 3.610918 3.610918 616 +static 1 27 3.637586 3.637586 619 +request 1 26 3.688879 3.688879 635 +initi 1 23 3.806662 3.806662 717 +emphasi 1 22 3.850148 3.850148 755 +finish 1 22 3.850148 3.850148 748 +flexibl 1 21 3.912023 3.912023 792 +qualiti 1 20 3.951244 3.951244 832 +entir 1 20 3.951244 3.951244 811 +predict 1 19 4.007333 4.007333 855 +hybrid 1 15 4.248495 4.248495 1057 +conduct 1 14 4.317488 4.317488 1065 +split 1 14 4.317488 4.317488 1078 +bodi 1 13 4.382027 4.382027 1178 +infrastructur 1 12 4.465908 4.465908 1234 +target 1 12 4.465908 4.465908 1282 +solari 1 12 4.465908 4.465908 1238 +modul 1 10 4.653960 4.653960 1434 +cecil 1 9 4.753590 4.753590 1547 +modula 1 9 4.753590 4.753590 1613 +elimin 1 9 4.753590 4.753590 1558 +subscrib 1 9 4.753590 4.753590 1541 +pure 1 8 4.875197 4.875197 1776 +closur 1 8 4.875197 4.875197 1643 +analys 1 8 4.875197 4.875197 1666 +parti 1 8 4.875197 4.875197 1676 +dead 1 7 5.010635 5.010635 1840 +sparc 1 7 5.010635 5.010635 1860 +freeli 1 6 5.164786 5.164786 2014 +beta 1 6 5.164786 5.164786 1993 +vortex 1 5 5.347108 5.347108 2362 +ofinterest 1 5 5.347108 5.347108 2323 +inlin 1 4 5.568345 5.568345 2964 +suno 1 4 5.568345 5.568345 2790 +tosupport 1 3 5.857933 5.857933 3613 +inherit 1 3 5.857933 5.857933 3122 +forobject 1 3 5.857933 5.857933 3965 +uwcs 1 3 5.857933 5.857933 3977 +orientedlanguag 1 2 6.263398 6.263398 4079 +intraprocedur 1 2 6.263398 6.263398 5934 +acollect 1 2 6.263398 6.263398 5039 +vortexcompil 1 2 6.263398 6.263398 5932 +projectuw 1 1 6.957497 6.957497 16978 +projectwelcom 1 1 6.957497 6.957497 16979 +rapidconstruct 1 1 6.957497 6.957497 16980 +incorporatesmulti 1 1 6.957497 6.957497 16981 +basedencapsul 1 1 6.957497 6.957497 16982 +allowsstat 1 1 6.957497 6.957497 16983 +pureobject 1 1 6.957497 6.957497 16984 +incorporateshigh 1 1 6.957497 6.957497 16985 +hierachyanalysi 1 1 6.957497 6.957497 16986 +guidedselect 1 1 6.957497 6.957497 16987 +commonsubexpress 1 1 6.957497 6.957497 16988 +currentlyavail 1 1 6.957497 6.957497 16989 +thebeta 1 1 6.957497 6.957497 16990 +projectslast 1 1 6.957497 6.957497 16991 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..8cf74cc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +data 1 170 1.791759 1.791759 49 +seattl 1 120 2.079442 2.079442 103 +peopl 1 96 2.302585 2.302585 132 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +comment 1 93 2.397895 2.397895 146 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +juli 1 60 2.833213 2.833213 305 +local 1 55 2.944439 2.944439 334 +cool 1 49 3.044522 3.044522 374 +disk 1 22 3.850148 3.850148 747 +theunivers 1 21 3.912023 3.912023 797 +usag 1 6 5.164786 5.164786 2209 +these 1 5 5.347108 5.347108 2482 +grail 1 3 5.857933 5.857933 3356 +neighborhood 1 3 5.857933 5.857933 3242 +laboratorywelcom 1 2 6.263398 6.263398 5439 +mtwong 1 1 6.957497 6.957497 16992 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..d3ac33dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +peopl 1 96 2.302585 2.302585 132 +technic 1 100 2.302585 2.302585 140 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +build 1 85 2.484907 2.484907 184 +come 1 78 2.564949 2.564949 202 +workshop 1 71 2.639057 2.639057 239 +test 1 66 2.708050 2.708050 252 +simul 1 66 2.708050 2.708050 255 +result 1 65 2.772589 2.772589 281 +hardwar 1 51 2.995732 2.995732 350 +friend 1 48 3.044522 3.044522 376 +standard 1 48 3.044522 3.044522 365 +better 1 45 3.135494 3.135494 401 +discuss 1 45 3.135494 3.135494 399 +describ 1 45 3.135494 3.135494 400 +power 1 30 3.555348 3.555348 573 +abl 1 30 3.555348 3.555348 566 +built 1 29 3.583519 3.583519 592 +held 1 28 3.610918 3.610918 600 +mine 1 26 3.688879 3.688879 654 +sort 1 22 3.850148 3.850148 738 +rout 1 21 3.912023 3.912023 793 +chip 1 21 3.912023 3.912023 770 +nice 1 20 3.951244 3.951244 809 +repositori 1 17 4.110874 4.110874 932 +interconnect 1 17 4.110874 4.110874 937 +web 1 12 4.465908 4.465908 1249 +chao 1 8 4.875197 4.875197 1753 +dylan 1 8 4.875197 4.875197 1625 +router 1 8 4.875197 4.875197 1772 +univeristi 1 8 4.875197 4.875197 1754 +guidelin 1 7 5.010635 5.010635 1832 +chaotic 1 5 5.347108 5.347108 2566 +chaoticrout 1 4 5.568345 5.568345 3063 +micron 1 3 5.857933 5.857933 3341 +cmo 1 3 5.857933 5.857933 3992 +redesign 1 3 5.857933 5.857933 3540 +thathav 1 3 5.857933 5.857933 3735 +papersand 1 2 6.263398 6.263398 4867 +peopleal 1 1 6.957497 6.957497 16994 +allsort 1 1 6.957497 6.957497 16995 +graphicalfront 1 1 6.957497 6.957497 16996 +pcrcw 1 1 6.957497 6.957497 16993 +presentationof 1 1 6.957497 6.957497 16997 +upwith 1 1 6.957497 6.957497 16998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..6a828587 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +schedul 1 119 2.079442 2.079442 85 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +larg 1 82 2.484907 2.484907 168 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +activ 1 84 2.484907 2.484907 182 +school 1 84 2.484907 2.484907 188 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +want 1 79 2.564949 2.564949 199 +june 1 79 2.564949 2.564949 214 +interfac 1 79 2.564949 2.564949 209 +name 1 72 2.639057 2.639057 220 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +nation 1 74 2.639057 2.639057 240 +simul 1 66 2.708050 2.708050 255 +main 1 67 2.708050 2.708050 256 +evalu 1 64 2.772589 2.772589 266 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +improv 1 62 2.772589 2.772589 289 +foundat 1 62 2.772589 2.772589 286 +automat 1 61 2.833213 2.833213 306 +detail 1 57 2.890372 2.890372 321 +summer 1 56 2.890372 2.890372 311 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +maintain 1 51 2.995732 2.995732 342 +even 1 45 3.135494 3.135494 393 +autom 1 41 3.218876 3.218876 434 +map 1 39 3.258097 3.258097 452 +connect 1 37 3.332205 3.332205 485 +singl 1 34 3.401197 3.401197 510 +concurr 1 34 3.401197 3.401197 501 +compon 1 30 3.555348 3.555348 570 +robert 1 30 3.555348 3.555348 567 +becom 1 28 3.610918 3.610918 603 +american 1 27 3.637586 3.637586 634 +constraint 1 26 3.688879 3.688879 636 +enabl 1 26 3.688879 3.688879 655 +rather 1 26 3.688879 3.688879 642 +accur 1 25 3.737670 3.737670 680 +sometim 1 24 3.761200 3.761200 696 +demonstr 1 24 3.761200 3.761200 694 +input 1 23 3.806662 3.806662 727 +togeth 1 23 3.806662 3.806662 714 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +sequenti 1 22 3.850148 3.850148 745 +util 1 21 3.912023 3.912023 774 +output 1 21 3.912023 3.912023 788 +synthesi 1 20 3.951244 3.951244 834 +kernel 1 20 3.951244 3.951244 825 +wind 1 18 4.060443 4.060443 908 +behavior 1 18 4.060443 4.060443 881 +earli 1 16 4.174387 4.174387 968 +advantag 1 16 4.174387 4.174387 987 +partit 1 16 4.174387 4.174387 984 +diego 1 16 4.174387 4.174387 992 +devic 1 16 4.174387 4.174387 1002 +universityof 1 15 4.248495 4.248495 1061 +embed 1 14 4.317488 4.317488 1102 +believ 1 13 4.382027 4.382027 1187 +incorpor 1 13 4.382027 4.382027 1163 +target 1 12 4.465908 4.465908 1282 +grant 1 12 4.465908 4.465908 1216 +fill 1 11 4.553877 4.553877 1349 +cycl 1 11 4.553877 4.553877 1335 +fix 1 11 4.553877 4.553877 1327 +itali 1 11 4.553877 4.553877 1378 +mountain 1 10 4.653960 4.653960 1456 +forc 1 10 4.653960 4.653960 1384 +reli 1 10 4.653960 4.653960 1411 +fellowship 1 10 4.653960 4.653960 1460 +pacif 1 8 4.875197 4.875197 1674 +character 1 8 4.875197 4.875197 1767 +driver 1 8 4.875197 4.875197 1657 +maxim 1 7 5.010635 5.010635 1944 +chinook 1 6 5.164786 5.164786 2229 +averag 1 6 5.164786 5.164786 2098 +contract 1 6 5.164786 5.164786 1985 +blow 1 5 5.347108 5.347108 2407 +east 1 5 5.347108 5.347108 2472 +synthes 1 5 5.347108 5.347108 2451 +ross 1 5 5.347108 5.347108 2243 +ortega 1 5 5.347108 5.347108 2559 +rocki 1 4 5.568345 5.568345 3048 +ti 1 4 5.568345 5.568345 3005 +shelf 1 4 5.568345 5.568345 2621 +chou 1 4 5.568345 5.568345 3033 +harri 1 4 5.568345 5.568345 3034 +warm 1 3 5.857933 5.857933 3904 +domin 1 3 5.857933 5.857933 3995 +retarget 1 3 5.857933 5.857933 3994 +moredetail 1 3 5.857933 5.857933 3854 +shortli 1 3 5.857933 5.857933 3375 +nato 1 3 5.857933 5.857933 3587 +salmon 1 2 6.263398 6.263398 4802 +rare 1 2 6.263398 6.263398 4184 +toolfor 1 2 6.263398 6.263398 6031 +neededto 1 2 6.263398 6.263398 5379 +ratherthan 1 2 6.263398 6.263398 6046 +differentarchitectur 1 2 6.263398 6.263398 6051 +verilog 1 2 6.263398 6.263398 4441 +softwareprogram 1 2 6.263398 6.263398 4889 +moreeffici 1 2 6.263398 6.263398 4209 +macduff 1 2 6.263398 6.263398 5923 +hauck 1 2 6.263398 6.263398 5920 +shinook 1 1 6.957497 6.957497 16999 +oncorhynchu 1 1 6.957497 6.957497 17000 +tshawytscha 1 1 6.957497 6.957497 17001 +amer 1 1 6.957497 6.957497 17002 +tribe 1 1 6.957497 6.957497 17003 +southerli 1 1 6.957497 6.957497 17004 +sled 1 1 6.957497 6.957497 17005 +doga 1 1 6.957497 6.957497 17006 +cadtool 1 1 6.957497 6.957497 17007 +reactivesystem 1 1 6.957497 6.957497 17008 +descriptionto 1 1 6.957497 6.957497 17009 +designdecis 1 1 6.957497 6.957497 17010 +reiterateaft 1 1 6.957497 6.957497 17011 +willnot 1 1 6.957497 6.957497 17012 +designerto 1 1 6.957497 6.957497 17013 +legacycod 1 1 6.957497 6.957497 17014 +currentlyw 1 1 6.957497 6.957497 17015 +interprocessorcommun 1 1 6.957497 6.957497 17016 +assumesmanu 1 1 6.957497 6.957497 17017 +intricateand 1 1 6.957497 6.957497 17018 +asicarchitectur 1 1 6.957497 6.957497 17019 +onoff 1 1 6.957497 6.957497 17020 +discourag 1 1 6.957497 6.957497 17021 +innovemb 1 1 6.957497 6.957497 17022 +shownat 1 1 6.957497 6.957497 17023 +mainfeatur 1 1 6.957497 6.957497 17024 +peripheraldevic 1 1 6.957497 6.957497 17025 +andsynthes 1 1 6.957497 6.957497 17026 +hardwarenetlist 1 1 6.957497 6.957497 17027 +interfacingproblem 1 1 6.957497 6.957497 17028 +timingconstraint 1 1 6.957497 6.957497 17029 +swcodedesign 1 1 6.957497 6.957497 17030 +tremezzo 1 1 6.957497 6.957497 17031 +severalmor 1 1 6.957497 6.957497 17032 +chinookersfacultygaetano 1 1 6.957497 6.957497 17033 +borriellogradu 1 1 6.957497 6.957497 17034 +ortegaken 1 1 6.957497 6.957497 17035 +hinesian 1 1 6.957497 6.957497 17036 +selizabeth 1 1 6.957497 6.957497 17037 +walkupscott 1 1 6.957497 6.957497 17038 +henrik 1 1 6.957497 6.957497 17039 +hulgaardstafflarri 1 1 6.957497 6.957497 17040 +mcmurchielist 1 1 6.957497 6.957497 17041 +paperschinook 1 1 6.957497 6.957497 17042 +sponsorsarpa 1 1 6.957497 6.957497 17043 +walkup 1 1 6.957497 6.957497 17044 +patricia 1 1 6.957497 6.957497 17045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..6344c58b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +develop 1 174 1.791759 1.791759 53 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +person 1 111 2.197225 2.197225 117 +intern 1 108 2.197225 2.197225 128 +need 1 98 2.302585 2.302585 135 +technic 1 100 2.302585 2.302585 140 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +appear 1 78 2.564949 2.564949 210 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +would 1 67 2.708050 2.708050 251 +evalu 1 64 2.772589 2.772589 266 +copi 1 63 2.772589 2.772589 284 +allow 1 53 2.944439 2.944439 333 +februari 1 54 2.944439 2.944439 328 +maintain 1 51 2.995732 2.995732 342 +right 1 48 3.044522 3.044522 363 +without 1 50 3.044522 3.044522 370 +featur 1 46 3.091042 3.091042 386 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +third 1 43 3.178054 3.178054 412 +fast 1 42 3.218876 3.218876 429 +map 1 39 3.258097 3.258097 452 +author 1 39 3.258097 3.258097 450 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +copyright 1 36 3.367296 3.367296 495 +global 1 34 3.401197 3.401197 520 +given 1 32 3.465736 3.465736 538 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +specifi 1 30 3.555348 3.555348 568 +propos 1 28 3.610918 3.610918 602 +arrai 1 27 3.637586 3.637586 627 +constraint 1 26 3.688879 3.688879 636 +reliabl 1 25 3.737670 3.737670 674 +accur 1 25 3.737670 3.737670 680 +frame 1 24 3.761200 3.761200 684 +rout 1 21 3.912023 3.912023 793 +basi 1 20 3.951244 3.951244 828 +definit 1 19 4.007333 4.007333 864 +partit 1 16 4.174387 4.174387 984 +commerci 1 16 4.174387 4.174387 1005 +fourth 1 16 4.174387 4.174387 999 +driven 1 15 4.248495 4.248495 1048 +contribut 1 15 4.248495 4.248495 1021 +carl 1 15 4.248495 4.248495 1024 +unfortun 1 13 4.382027 4.382027 1170 +block 1 13 4.382027 4.382027 1183 +larri 1 13 4.382027 4.382027 1142 +fpga 1 10 4.653960 4.653960 1433 +placement 1 10 4.653960 4.653960 1420 +face 1 9 4.753590 4.753590 1501 +satisfi 1 8 4.875197 4.875197 1694 +heart 1 8 4.875197 4.875197 1729 +router 1 8 4.875197 4.875197 1772 +metric 1 7 5.010635 5.010635 1831 +phase 1 6 5.164786 5.164786 1977 +quickli 1 6 5.164786 5.164786 2000 +ensur 1 6 5.164786 5.164786 2012 +invok 1 6 5.164786 5.164786 2079 +gate 1 6 5.164786 5.164786 2182 +variat 1 5 5.347108 5.347108 2248 +understood 1 5 5.347108 5.347108 2364 +darren 1 5 5.347108 5.347108 2565 +permiss 1 4 5.568345 5.568345 2642 +mcmurchi 1 4 5.568345 5.568345 2757 +ebel 1 4 5.568345 5.568345 2756 +emerald 1 3 5.857933 5.857933 3979 +moreov 1 3 5.857933 5.857933 3200 +byth 1 3 5.857933 5.857933 3874 +cronquist 1 3 5.857933 5.857933 3942 +performanceevalu 1 2 6.263398 6.263398 6052 +thoroughli 1 2 6.263398 6.263398 4801 +parameter 1 2 6.263398 6.263398 5540 +dissemin 1 2 6.263398 6.263398 5080 +adher 1 2 6.263398 6.263398 6025 +sigda 1 2 6.263398 6.263398 5493 +pathfind 1 2 6.263398 6.263398 6053 +negoti 1 2 6.263398 6.263398 6054 +basedperform 1 2 6.263398 6.263398 6055 +projectid 1 1 6.957497 6.957497 17046 +makeus 1 1 6.957497 6.957497 17047 +quickproduct 1 1 6.957497 6.957497 17048 +isoften 1 1 6.957497 6.957497 17049 +postpon 1 1 6.957497 6.957497 17050 +beenfrozen 1 1 6.957497 6.957497 17051 +havedesign 1 1 6.957497 6.957497 17052 +quickdevelop 1 1 6.957497 6.957497 17053 +basicfeatur 1 1 6.957497 6.957497 17054 +synthesisand 1 1 6.957497 6.957497 17055 +anddetail 1 1 6.957497 6.957497 17056 +aneffici 1 1 6.957497 6.957497 17057 +blockarchitectur 1 1 6.957497 6.957497 17058 +tailorplac 1 1 6.957497 6.957497 17059 +schematicspecif 1 1 6.957497 6.957497 17060 +capturedand 1 1 6.957497 6.957497 17061 +ofscholarli 1 1 6.957497 6.957497 17062 +andal 1 1 6.957497 6.957497 17063 +therein 1 1 6.957497 6.957497 17064 +copyrighthold 1 1 6.957497 6.957497 17065 +notwithstand 1 1 6.957497 6.957497 17066 +hereelectron 1 1 6.957497 6.957497 17067 +thisinform 1 1 6.957497 6.957497 17068 +eachauthor 1 1 6.957497 6.957497 17069 +repost 1 1 6.957497 6.957497 17070 +theexplicit 1 1 6.957497 6.957497 17071 +holder 1 1 6.957497 6.957497 17072 +emeraldlarri 1 1 6.957497 6.957497 17073 +arraysaid 1 1 6.957497 6.957497 17074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..6de2d64c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +sinc 1 90 2.397895 2.397895 159 +larg 1 82 2.484907 2.484907 168 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +level 1 87 2.484907 2.484907 180 +complet 1 77 2.564949 2.564949 208 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +effici 1 73 2.639057 2.639057 233 +integr 1 67 2.708050 2.708050 245 +simul 1 66 2.708050 2.708050 255 +laboratori 1 63 2.772589 2.772589 292 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +variou 1 56 2.890372 2.890372 317 +direct 1 57 2.890372 2.890372 316 +overview 1 56 2.890372 2.890372 323 +hardwar 1 51 2.995732 2.995732 350 +digit 1 52 2.995732 2.995732 348 +adapt 1 46 3.091042 3.091042 387 +late 1 40 3.258097 3.258097 439 +map 1 39 3.258097 3.258097 452 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +survei 1 35 3.401197 3.401197 513 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +focu 1 30 3.555348 3.555348 571 +synchron 1 29 3.583519 3.583519 588 +scale 1 28 3.610918 3.610918 613 +compar 1 26 3.688879 3.688879 648 +todai 1 25 3.737670 3.737670 672 +methodolog 1 23 3.806662 3.806662 733 +varieti 1 22 3.850148 3.850148 740 +reduc 1 22 3.850148 3.850148 759 +self 1 22 3.850148 3.850148 761 +vlsi 1 21 3.912023 3.912023 795 +rout 1 21 3.912023 3.912023 793 +chip 1 21 3.912023 3.912023 770 +synthesi 1 20 3.951244 3.951244 834 +verif 1 20 3.951244 3.951244 826 +separ 1 19 4.007333 4.007333 844 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +event 1 18 4.060443 4.060443 896 +commerci 1 16 4.174387 4.174387 1005 +partit 1 16 4.174387 4.174387 984 +latenc 1 16 4.174387 4.174387 993 +embed 1 14 4.317488 4.317488 1102 +topolog 1 14 4.317488 4.317488 1089 +circuit 1 13 4.382027 4.382027 1131 +asynchron 1 12 4.465908 4.465908 1229 +tune 1 12 4.465908 4.465908 1227 +clock 1 11 4.553877 4.553877 1320 +valid 1 11 4.553877 4.553877 1299 +arpa 1 11 4.553877 4.553877 1369 +fpga 1 10 4.653960 4.653960 1433 +rapid 1 10 4.653960 4.653960 1453 +placement 1 10 4.653960 4.653960 1420 +paragraph 1 10 4.653960 4.653960 1449 +sensit 1 8 4.875197 4.875197 1726 +accomplish 1 8 4.875197 4.875197 1755 +northwest 1 7 5.010635 5.010635 1973 +densiti 1 7 5.010635 5.010635 1927 +metric 1 7 5.010635 5.010635 1831 +chinook 1 6 5.164786 5.164786 2229 +layout 1 6 5.164786 5.164786 2183 +sytem 1 4 5.568345 5.568345 3015 +triptych 1 4 5.568345 5.568345 3061 +toolset 1 4 5.568345 5.568345 3014 +chaoticrout 1 4 5.568345 5.568345 3063 +tester 1 4 5.568345 5.568345 2754 +emerald 1 3 5.857933 5.857933 3979 +systemsth 1 3 5.857933 5.857933 3835 +mactest 1 3 5.857933 5.857933 3972 +cmo 1 3 5.857933 5.857933 3992 +usath 1 2 6.263398 6.263398 6056 +engag 1 2 6.263398 6.263398 4937 +montag 1 2 6.263398 6.263398 5921 +springbok 1 2 6.263398 6.263398 5922 +retim 1 2 6.263398 6.263398 6008 +latch 1 2 6.263398 6.263398 6034 +skew 1 2 6.263398 6.263398 6057 +gemini 1 2 6.263398 6.263398 5975 +voltag 1 2 6.263398 6.263398 5475 +verificationtim 1 1 6.957497 6.957497 17075 +prototypingtriptych 1 1 6.957497 6.957497 17076 +toolscan 1 1 6.957497 6.957497 17077 +fpgaarchitectur 1 1 6.957497 6.957497 17078 +incorporatedinto 1 1 6.957497 6.957497 17079 +circuitsretim 1 1 6.957497 6.957497 17080 +uselevel 1 1 6.957497 6.957497 17081 +andincreas 1 1 6.957497 6.957497 17082 +synchronouscircuit 1 1 6.957497 6.957497 17083 +contraint 1 1 6.957497 6.957497 17084 +routerth 1 1 6.957497 6.957497 17085 +systemsself 1 1 6.957497 6.957497 17086 +kehlprevi 1 1 6.957497 6.957497 17087 +reportsarpa 1 1 6.957497 6.957497 17088 +bluebook 1 1 6.957497 6.957497 17089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..470984f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +specif 1 106 2.197225 2.197225 106 +techniqu 1 99 2.302585 2.302585 138 +contain 1 81 2.484907 2.484907 174 +build 1 85 2.484907 2.484907 184 +issu 1 78 2.564949 2.564949 211 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +test 1 66 2.708050 2.708050 252 +goal 1 66 2.708050 2.708050 250 +import 1 65 2.772589 2.772589 282 +foundat 1 62 2.772589 2.772589 286 +plai 1 60 2.833213 2.833213 307 +space 1 57 2.890372 2.890372 310 +digit 1 52 2.995732 2.995732 348 +review 1 42 3.218876 3.218876 425 +must 1 40 3.258097 3.258097 442 +theoret 1 39 3.258097 3.258097 446 +prototyp 1 38 3.295837 3.295837 463 +exist 1 30 3.555348 3.555348 569 +built 1 29 3.583519 3.583519 592 +pass 1 28 3.610918 3.610918 611 +although 1 25 3.737670 3.737670 667 +methodolog 1 23 3.806662 3.806662 733 +safeti 1 20 3.951244 3.951244 817 +medic 1 17 4.110874 4.110874 958 +critic 1 16 4.174387 4.174387 982 +upon 1 16 4.174387 4.174387 978 +role 1 14 4.317488 4.317488 1101 +nanci 1 12 4.465908 4.465908 1256 +summar 1 11 4.553877 4.553877 1295 +valid 1 11 4.553877 4.553877 1299 +equip 1 10 4.653960 4.653960 1459 +leveson 1 9 4.753590 4.753590 1540 +consequ 1 6 5.164786 5.164786 1989 +nuclear 1 5 5.347108 5.347108 2576 +chemic 1 5 5.347108 5.347108 2552 +plant 1 5 5.347108 5.347108 2497 +decad 1 5 5.347108 5.347108 2455 +increasingli 1 4 5.568345 5.568345 2766 +aircraft 1 4 5.568345 5.568345 2872 +rigor 1 4 5.568345 5.568345 3030 +lai 1 3 5.857933 5.857933 3694 +safewar 1 2 6.263398 6.263398 5959 +reactor 1 1 6.957497 6.957497 17090 +defenc 1 1 6.957497 6.957497 17091 +malfunct 1 1 6.957497 6.957497 17092 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..6d8c3907 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +world 1 115 2.197225 2.197225 126 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +internet 1 83 2.484907 2.484907 186 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +learn 1 86 2.484907 2.484907 170 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +info 1 85 2.484907 2.484907 176 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +intellig 1 72 2.639057 2.639057 225 +servic 1 72 2.639057 2.639057 236 +goal 1 66 2.708050 2.708050 250 +interact 1 62 2.772589 2.772589 270 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +space 1 57 2.890372 2.890372 310 +browser 1 56 2.890372 2.890372 313 +found 1 53 2.944439 2.944439 337 +allow 1 53 2.944439 2.944439 333 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +maintain 1 51 2.995732 2.995732 342 +without 1 50 3.044522 3.044522 370 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +multipl 1 39 3.258097 3.258097 453 +field 1 37 3.332205 3.332205 482 +robot 1 36 3.367296 3.367296 497 +procedur 1 36 3.367296 3.367296 488 +multi 1 36 3.367296 3.367296 493 +articl 1 33 3.433987 3.433987 530 +human 1 32 3.465736 3.465736 546 +extend 1 32 3.465736 3.465736 539 +collabor 1 32 3.465736 3.465736 543 +taken 1 31 3.496508 3.496508 555 +rang 1 30 3.555348 3.555348 565 +option 1 30 3.555348 3.555348 575 +specifi 1 30 3.555348 3.555348 568 +domain 1 30 3.555348 3.555348 564 +determin 1 27 3.637586 3.637586 630 +challeng 1 26 3.688879 3.688879 653 +enabl 1 26 3.688879 3.688879 655 +compar 1 26 3.688879 3.688879 648 +rule 1 26 3.688879 3.688879 638 +magazin 1 24 3.761200 3.761200 704 +mike 1 24 3.761200 3.761200 703 +methodolog 1 23 3.806662 3.806662 733 +util 1 21 3.912023 3.912023 774 +alumni 1 21 3.912023 3.912023 807 +agent 1 18 4.060443 4.060443 910 +accept 1 18 4.060443 4.060443 879 +debug 1 17 4.110874 4.110874 944 +indic 1 15 4.248495 4.248495 1013 +achiev 1 14 4.317488 4.317488 1088 +easili 1 14 4.317488 4.317488 1077 +dave 1 14 4.317488 4.317488 1098 +daniel 1 12 4.465908 4.465908 1233 +tour 1 11 4.553877 4.553877 1307 +motiv 1 11 4.553877 4.553877 1346 +princip 1 10 4.653960 4.653960 1397 +metacrawl 1 10 4.653960 4.653960 1455 +weld 1 9 4.753590 4.753590 1538 +autonom 1 8 4.875197 4.875197 1749 +claim 1 8 4.875197 4.875197 1664 +accomplish 1 8 4.875197 4.875197 1755 +gather 1 8 4.875197 4.875197 1719 +softbot 1 7 5.010635 5.010635 1974 +intellectu 1 7 5.010635 5.010635 1847 +golden 1 7 5.010635 5.010635 1962 +planner 1 7 5.010635 5.010635 1797 +moder 1 6 5.164786 5.164786 2112 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +brook 1 6 5.164786 5.164786 2152 +versu 1 6 5.164786 5.164786 2052 +shell 1 5 5.347108 5.347108 2353 +cacm 1 5 5.347108 5.347108 2388 +keith 1 5 5.347108 5.347108 2528 +innov 1 4 5.568345 5.568345 2933 +substrat 1 4 5.568345 5.568345 2857 +disambigu 1 4 5.568345 5.568345 2899 +repli 1 4 5.568345 5.568345 2689 +toth 1 4 5.568345 5.568345 2595 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +reactiv 1 3 5.857933 5.857933 3575 +kwok 1 3 5.857933 5.857933 3941 +hacker 1 3 5.857933 5.857933 3996 +finalist 1 2 6.263398 6.263398 5890 +discoveraward 1 2 6.263398 6.263398 5891 +learningtechniqu 1 2 6.263398 6.263398 5028 +christianson 1 2 6.263398 6.263398 5849 +negoti 1 2 6.263398 6.263398 6054 +goan 1 2 6.263398 6.263398 5896 +ingram 1 2 6.263398 6.263398 5847 +perkowitz 1 2 6.263398 6.263398 5970 +softbotinternet 1 1 6.957497 6.957497 17093 +softbotth 1 1 6.957497 6.957497 17094 +softwareenviron 1 1 6.957497 6.957497 17095 +pragmaticallyconveni 1 1 6.957497 6.957497 17096 +acustomiz 1 1 6.957497 6.957497 17097 +internetaccess 1 1 6.957497 6.957497 17098 +generatesand 1 1 6.957497 6.957497 17099 +itsexperi 1 1 6.957497 6.957497 17100 +requestand 1 1 6.957497 6.957497 17101 +satisfyit 1 1 6.957497 6.957497 17102 +interactwith 1 1 6.957497 6.957497 17103 +sgraphic 1 1 6.957497 6.957497 17104 +tosearch 1 1 6.957497 6.957497 17105 +sophisticatedprun 1 1 6.957497 6.957497 17106 +cartoonrepresent 1 1 6.957497 6.957497 17107 +blanchard 1 1 6.957497 6.957497 17108 +ofcolumn 1 1 6.957497 6.957497 17109 +xiiplann 1 1 6.957497 6.957497 17110 +ilalearn 1 1 6.957497 6.957497 17111 +ying 1 1 6.957497 6.957497 17112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..f3f55fd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,214 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +assist 1 112 2.197225 2.197225 113 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +control 1 82 2.484907 2.484907 164 +resourc 1 81 2.484907 2.484907 172 +build 1 85 2.484907 2.484907 184 +member 1 84 2.484907 2.484907 165 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +appear 1 78 2.564949 2.564949 210 +master 1 76 2.564949 2.564949 216 +servic 1 72 2.639057 2.639057 236 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +integr 1 67 2.708050 2.708050 245 +order 1 69 2.708050 2.708050 249 +degre 1 69 2.708050 2.708050 259 +copi 1 63 2.772589 2.772589 284 +collect 1 65 2.772589 2.772589 268 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +creat 1 63 2.772589 2.772589 277 +function 1 62 2.772589 2.772589 275 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +direct 1 57 2.890372 2.890372 316 +space 1 57 2.890372 2.890372 310 +special 1 56 2.890372 2.890372 320 +unix 1 58 2.890372 2.890372 308 +point 1 58 2.890372 2.890372 319 +overview 1 56 2.890372 2.890372 323 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +undergradu 1 54 2.944439 2.944439 338 +run 1 51 2.995732 2.995732 347 +maintain 1 51 2.995732 2.995732 342 +basic 1 50 3.044522 3.044522 360 +friend 1 48 3.044522 3.044522 376 +pointer 1 48 3.044522 3.044522 368 +adapt 1 46 3.091042 3.091042 387 +could 1 46 3.091042 3.091042 383 +video 1 44 3.135494 3.135494 405 +anoth 1 45 3.135494 3.135494 408 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +show 1 43 3.178054 3.178054 417 +join 1 39 3.258097 3.258097 457 +realli 1 40 3.258097 3.258097 444 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +brian 1 38 3.295837 3.295837 466 +workstat 1 37 3.332205 3.332205 479 +purpos 1 37 3.332205 3.332205 481 +connect 1 37 3.332205 3.332205 485 +procedur 1 36 3.367296 3.367296 488 +winter 1 36 3.367296 3.367296 500 +extend 1 32 3.465736 3.465736 539 +fault 1 32 3.465736 3.465736 547 +posit 1 31 3.496508 3.496508 552 +synchron 1 29 3.583519 3.583519 588 +load 1 28 3.610918 3.610918 601 +pass 1 28 3.610918 3.610918 611 +manipul 1 27 3.637586 3.637586 624 +rather 1 26 3.688879 3.688879 642 +effort 1 26 3.688879 3.688879 652 +handl 1 24 3.761200 3.761200 685 +thread 1 23 3.806662 3.806662 722 +almost 1 22 3.850148 3.850148 742 +properti 1 22 3.850148 3.850148 749 +deal 1 22 3.850148 3.850148 736 +flexibl 1 21 3.912023 3.912023 792 +latest 1 21 3.912023 3.912023 785 +fund 1 21 3.912023 3.912023 805 +kernel 1 20 3.951244 3.951244 825 +longer 1 20 3.951244 3.951244 816 +safeti 1 20 3.951244 3.951244 817 +facil 1 20 3.951244 3.951244 814 +benchmark 1 19 4.007333 4.007333 859 +runtim 1 19 4.007333 4.007333 858 +less 1 18 4.060443 4.060443 892 +statu 1 18 4.060443 4.060443 885 +encourag 1 18 4.060443 4.060443 880 +bershad 1 18 4.060443 4.060443 902 +regular 1 17 4.110874 4.110874 929 +critic 1 16 4.174387 4.174387 982 +capabl 1 15 4.248495 4.248495 1016 +piec 1 15 4.248495 4.248495 1020 +overhead 1 15 4.248495 4.248495 1035 +spin 1 14 4.317488 4.317488 1121 +happi 1 14 4.317488 4.317488 1079 +decid 1 14 4.317488 4.317488 1075 +pretti 1 13 4.382027 4.382027 1191 +wait 1 13 4.382027 4.382027 1168 +sai 1 13 4.382027 4.382027 1175 +usenix 1 12 4.465908 4.465908 1240 +safe 1 12 4.465908 4.465908 1274 +arbitrari 1 11 4.553877 4.553877 1359 +alpha 1 11 4.553877 4.553877 1348 +abil 1 11 4.553877 4.553877 1341 +arpa 1 11 4.553877 4.553877 1369 +sosp 1 10 4.653960 4.653960 1416 +inter 1 9 4.753590 4.753590 1530 +modula 1 9 4.753590 4.753590 1613 +osdi 1 9 4.753590 4.753590 1534 +clear 1 9 4.753590 4.753590 1488 +isol 1 8 4.875197 4.875197 1663 +crash 1 8 4.875197 4.875197 1616 +cross 1 8 4.875197 4.875197 1703 +mach 1 8 4.875197 4.875197 1669 +core 1 7 5.010635 5.010635 1809 +prevent 1 7 5.010635 5.010635 1827 +bottom 1 7 5.010635 5.010635 1906 +quick 1 6 5.164786 5.164786 2184 +recov 1 6 5.164786 5.164786 2235 +trail 1 6 5.164786 5.164786 2071 +academia 1 6 5.164786 5.164786 2036 +bind 1 5 5.347108 5.347108 2250 +distinct 1 5 5.347108 5.347108 2319 +adopt 1 5 5.347108 5.347108 2467 +andimplement 1 4 5.568345 5.568345 3029 +termin 1 4 5.568345 5.568345 2852 +fork 1 4 5.568345 5.568345 2801 +gotten 1 4 5.568345 5.568345 2628 +stillmaintain 1 3 5.857933 5.857933 3964 +providesa 1 3 5.857933 5.857933 3884 +thesear 1 3 5.857933 5.857933 3456 +forappl 1 3 5.857933 5.857933 3929 +linker 1 3 5.857933 5.857933 3157 +namespac 1 3 5.857933 5.857933 3957 +arrow 1 3 5.857933 5.857933 3520 +wella 1 2 6.263398 6.263398 4289 +microsecond 1 2 6.263398 6.263398 5435 +shortcom 1 2 6.263398 6.263398 5978 +linkabl 1 2 6.263398 6.263398 5979 +barb 1 2 6.263398 6.263398 6058 +qualif 1 2 6.263398 6.263398 6059 +mascot 1 2 6.263398 6.263398 6060 +systemspin 1 1 6.957497 6.957497 17113 +thatsupport 1 1 6.957497 6.957497 17114 +atruntim 1 1 6.957497 6.957497 17115 +accesshardwar 1 1 6.957497 6.957497 17116 +nooverhead 1 1 6.957497 6.957497 17117 +byrefer 1 1 6.957497 6.957497 17118 +systemservic 1 1 6.957497 6.957497 17119 +allextens 1 1 6.957497 6.957497 17120 +typesaf 1 1 6.957497 6.957497 17121 +oftypesafeti 1 1 6.957497 6.957497 17122 +attemptingto 1 1 6.957497 6.957497 17123 +writeboth 1 1 6.957497 6.957497 17124 +machinerun 1 1 6.957497 6.957497 17125 +withlow 1 1 6.957497 6.957497 17126 +executeit 1 1 6.957497 6.957497 17127 +protectedprocedur 1 1 6.957497 6.957497 17128 +overethernet 1 1 6.957497 6.957497 17129 +oldadapt 1 1 6.957497 6.957497 17130 +operationsund 1 1 6.957497 6.957497 17131 +samehardwar 1 1 6.957497 6.957497 17132 +saveyourself 1 1 6.957497 6.957497 17133 +invoc 1 1 6.957497 6.957497 17134 +andsimpl 1 1 6.957497 6.957497 17135 +interposit 1 1 6.957497 6.957497 17136 +raship 1 1 6.957497 6.957497 17137 +ourmascot 1 1 6.957497 6.957497 17138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..40485804 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +relat 1 139 1.945910 1.945910 68 +perform 1 143 1.945910 1.945910 74 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +machin 1 129 2.079442 2.079442 95 +code 1 108 2.197225 2.197225 116 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +member 1 84 2.484907 2.484907 165 +activ 1 84 2.484907 2.484907 182 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +second 1 81 2.484907 2.484907 166 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +exampl 1 77 2.564949 2.564949 195 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +august 1 66 2.708050 2.708050 257 +copi 1 63 2.772589 2.772589 284 +automat 1 61 2.833213 2.833213 306 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +detail 1 57 2.890372 2.890372 321 +approach 1 48 3.044522 3.044522 366 +execut 1 45 3.135494 3.135494 404 +describ 1 45 3.135494 3.135494 400 +howev 1 41 3.218876 3.218876 422 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +soon 1 36 3.367296 3.367296 494 +produc 1 30 3.555348 3.555348 572 +rang 1 30 3.555348 3.555348 565 +releas 1 28 3.610918 3.610918 616 +static 1 27 3.637586 3.637586 619 +enabl 1 26 3.688879 3.688879 655 +bound 1 26 3.688879 3.688879 659 +valu 1 25 3.737670 3.737670 665 +interpret 1 24 3.761200 3.761200 686 +variabl 1 23 3.806662 3.806662 715 +initi 1 23 3.806662 3.806662 717 +identifi 1 22 3.850148 3.850148 760 +annot 1 21 3.912023 3.912023 775 +kernel 1 20 3.951244 3.951244 825 +region 1 19 4.007333 4.007333 875 +spin 1 14 4.317488 4.317488 1121 +remov 1 12 4.465908 4.465908 1225 +target 1 12 4.465908 4.465908 1282 +grant 1 12 4.465908 4.465908 1216 +branch 1 11 4.553877 4.553877 1318 +loop 1 11 4.553877 4.553877 1310 +elimin 1 9 4.753590 4.753590 1558 +pair 1 9 4.753590 4.753590 1503 +analys 1 8 4.875197 4.875197 1666 +pldi 1 8 4.875197 4.875197 1704 +dispatch 1 7 5.010635 5.010635 1791 +constant 1 5 5.347108 5.347108 2251 +templat 1 5 5.347108 5.347108 2311 +dataflow 1 5 5.347108 5.347108 2390 +willb 1 5 5.347108 5.347108 2277 +spinproject 1 5 5.347108 5.347108 2570 +fold 1 4 5.568345 5.568345 2615 +fulli 1 4 5.568345 5.568345 2986 +theprogram 1 4 5.568345 5.568345 2686 +patch 1 4 5.568345 5.568345 2710 +imper 1 4 5.568345 5.568345 3067 +eventu 1 4 5.568345 5.568345 3074 +wewil 1 4 5.568345 5.568345 2688 +projectth 1 3 5.857933 5.857933 3344 +propag 1 3 5.857933 5.857933 3997 +dynamiccompil 1 3 5.857933 5.857933 3926 +optimizingcompil 1 2 6.263398 6.263398 4456 +projectmor 1 1 6.957497 6.957497 17139 +projectsuw 1 1 6.957497 6.957497 17140 +webdynam 1 1 6.957497 6.957497 17141 +ofinvari 1 1 6.957497 6.957497 17142 +theserun 1 1 6.957497 6.957497 17143 +memoryload 1 1 6.957497 6.957497 17144 +theydetermin 1 1 6.957497 6.957497 17145 +unrol 1 1 6.957497 6.957497 17146 +performancebenefit 1 1 6.957497 6.957497 17147 +offsetbi 1 1 6.957497 6.957497 17148 +strive 1 1 6.957497 6.957497 17149 +qualitydynam 1 1 6.957497 6.957497 17150 +thetempl 1 1 6.957497 6.957497 17151 +initialexperi 1 1 6.957497 6.957497 17152 +producedspeedup 1 1 6.957497 6.957497 17153 +dynamicallycompil 1 1 6.957497 6.957497 17154 +spinev 1 1 6.957497 6.957497 17155 +otherposs 1 1 6.957497 6.957497 17156 +invirtu 1 1 6.957497 6.957497 17157 +systemi 1 1 6.957497 6.957497 17158 +arenow 1 1 6.957497 6.957497 17159 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..b02e0310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..a02d9946 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +info 1 85 2.484907 2.484907 176 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +member 1 84 2.484907 2.484907 165 +help 1 83 2.484907 2.484907 175 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +would 1 67 2.708050 2.708050 251 +written 1 63 2.772589 2.772589 278 +descript 1 64 2.772589 2.772589 271 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +overview 1 56 2.890372 2.890372 323 +browser 1 56 2.890372 2.890372 313 +detail 1 57 2.890372 2.890372 321 +scientif 1 53 2.944439 2.944439 341 +sampl 1 53 2.944439 2.944439 339 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +fast 1 42 3.218876 3.218876 429 +futur 1 41 3.218876 3.218876 427 +error 1 40 3.258097 3.258097 449 +small 1 39 3.258097 3.258097 447 +programm 1 39 3.258097 3.258097 445 +manual 1 35 3.401197 3.401197 504 +independ 1 32 3.465736 3.465736 548 +concept 1 32 3.465736 3.465736 537 +scientist 1 31 3.496508 3.496508 560 +autumn 1 31 3.496508 3.496508 558 +arrai 1 27 3.637586 3.637586 627 +higher 1 24 3.761200 3.761200 690 +flow 1 24 3.761200 3.761200 700 +sequenti 1 22 3.850148 3.850148 745 +minut 1 20 3.951244 3.951244 810 +region 1 19 4.007333 4.007333 875 +previous 1 17 4.110874 4.110874 923 +modif 1 17 4.110874 4.110874 913 +fortran 1 15 4.248495 4.248495 1027 +easili 1 14 4.317488 4.317488 1077 +necessari 1 13 4.382027 4.382027 1147 +walk 1 12 4.465908 4.465908 1281 +loop 1 11 4.553877 4.553877 1310 +typic 1 11 4.553877 4.553877 1360 +suitabl 1 9 4.753590 4.753590 1486 +elimin 1 9 4.753590 4.753590 1558 +ideal 1 8 4.875197 4.875197 1630 +understood 1 5 5.347108 5.347108 2364 +enrol 1 4 5.568345 5.568345 2613 +tediou 1 3 5.857933 5.857933 3731 +shorter 1 3 5.857933 5.857933 3998 +conclus 1 3 5.857933 5.857933 3367 +horizon 1 3 5.857933 5.857933 3746 +border 1 2 6.263398 6.263398 4980 +prone 1 2 6.263398 6.263398 5178 +shouldconsid 1 2 6.263398 6.263398 6061 +acknowledg 1 2 6.263398 6.263398 6062 +eduzpl 1 1 6.957497 6.957497 17160 +recompil 1 1 6.957497 6.957497 17161 +shatter 1 1 6.957497 6.957497 17162 +yourmachin 1 1 6.957497 6.957497 17163 +zpthi 1 1 6.957497 6.957497 17164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..8dfbc867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +intern 1 108 2.197225 2.197225 128 +check 1 115 2.197225 2.197225 118 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +proceed 1 93 2.397895 2.397895 152 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +resourc 1 81 2.484907 2.484907 172 +level 1 87 2.484907 2.484907 180 +issu 1 78 2.564949 2.564949 211 +dynam 1 76 2.564949 2.564949 194 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +differ 1 66 2.708050 2.708050 253 +function 1 62 2.772589 2.772589 275 +abstract 1 62 2.772589 2.772589 276 +januari 1 62 2.772589 2.772589 264 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +extens 1 53 2.944439 2.944439 340 +maintain 1 51 2.995732 2.995732 342 +hardwar 1 51 2.995732 2.995732 350 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +still 1 50 3.044522 3.044522 362 +effect 1 46 3.091042 3.091042 385 +execut 1 45 3.135494 3.135494 404 +long 1 43 3.178054 3.178054 413 +combin 1 42 3.218876 3.218876 421 +futur 1 41 3.218876 3.218876 427 +fast 1 42 3.218876 3.218876 429 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +annual 1 40 3.258097 3.258097 458 +singl 1 34 3.401197 3.401197 510 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +collabor 1 32 3.465736 3.465736 543 +limit 1 29 3.583519 3.583519 585 +though 1 27 3.637586 3.637586 622 +enabl 1 26 3.688879 3.688879 655 +todai 1 25 3.737670 3.737670 672 +thread 1 23 3.806662 3.806662 722 +util 1 21 3.912023 3.912023 774 +unit 1 21 3.912023 3.912023 779 +corpor 1 21 3.912023 3.912023 802 +chip 1 21 3.912023 3.912023 770 +increas 1 20 3.951244 3.951244 829 +exploit 1 20 3.951244 3.951244 836 +speed 1 18 4.060443 4.060443 911 +minim 1 18 4.060443 4.060443 887 +ultim 1 17 4.110874 4.110874 943 +stanford 1 17 4.110874 4.110874 955 +latenc 1 16 4.174387 4.174387 993 +modern 1 16 4.174387 4.174387 966 +permit 1 16 4.174387 4.174387 962 +choic 1 16 4.174387 4.174387 979 +susan 1 15 4.248495 4.248495 1050 +shown 1 14 4.317488 4.317488 1080 +conduct 1 14 4.317488 4.317488 1065 +dean 1 14 4.317488 4.317488 1104 +levi 1 14 4.317488 4.317488 1093 +convert 1 13 4.382027 4.382027 1122 +amount 1 12 4.465908 4.465908 1208 +hank 1 12 4.465908 4.465908 1253 +philadelphia 1 12 4.465908 4.465908 1244 +multithread 1 11 4.553877 4.553877 1315 +cycl 1 11 4.553877 4.553877 1335 +itali 1 11 4.553877 4.553877 1378 +equip 1 10 4.653960 4.653960 1459 +santa 1 10 4.653960 4.653960 1441 +face 1 9 4.753590 4.753590 1501 +significantli 1 9 4.753590 4.753590 1508 +gain 1 8 4.875197 4.875197 1730 +egger 1 8 4.875197 4.875197 1695 +jack 1 8 4.875197 4.875197 1780 +joel 1 8 4.875197 4.875197 1698 +microprocessor 1 7 5.010635 5.010635 1808 +maxim 1 7 5.010635 5.010635 1944 +simultan 1 6 5.164786 5.164786 2155 +superscalar 1 6 5.164786 5.164786 2082 +multiprogram 1 6 5.164786 5.164786 2010 +rebecca 1 6 5.164786 5.164786 2174 +tullsen 1 6 5.164786 5.164786 2081 +crucial 1 5 5.347108 5.347108 2384 +compet 1 5 5.347108 5.347108 2462 +fetch 1 5 5.347108 5.347108 2567 +hide 1 4 5.568345 5.568345 2996 +throughput 1 4 5.568345 5.568345 2993 +allevi 1 3 5.857933 5.857933 3643 +interchang 1 3 5.857933 5.857933 3893 +peoplefaculti 1 3 5.857933 5.857933 3981 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +affair 1 3 5.857933 5.857933 3916 +andd 1 2 6.263398 6.263398 4346 +suif 1 2 6.263398 6.263398 5944 +lojlo 1 2 6.263398 6.263398 5943 +pagesimultan 1 1 6.957497 6.957497 17165 +projectoverviewpeoplepubl 1 1 6.957497 6.957497 17166 +overviewth 1 1 6.957497 6.957497 17167 +interleav 1 1 6.957497 6.957497 17168 +differentthread 1 1 6.957497 6.957497 17169 +issuefeatur 1 1 6.957497 6.957497 17170 +abilityof 1 1 6.957497 6.957497 17171 +contextsar 1 1 6.957497 6.957497 17172 +exploitthread 1 1 6.957497 6.957497 17173 +formsof 1 1 6.957497 6.957497 17174 +havedemonstr 1 1 6.957497 6.957497 17175 +improvesprocessor 1 1 6.957497 6.957497 17176 +parallelworkload 1 1 6.957497 6.957497 17177 +achievedin 1 1 6.957497 6.957497 17178 +ordersuperscalar 1 1 6.957497 6.957497 17179 +synchronizationtechniqu 1 1 6.957497 6.957497 17180 +otherarchitectur 1 1 6.957497 6.957497 17181 +levygradu 1 1 6.957497 6.957497 17182 +tullsenindustri 1 1 6.957497 6.957497 17183 +andh 1 1 6.957497 6.957497 17184 +margherita 1 1 6.957497 6.957497 17185 +ligur 1 1 6.957497 6.957497 17186 +doon 1 1 6.957497 6.957497 17187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..4ee293c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +develop 1 174 1.791759 1.791759 53 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +report 1 131 2.079442 2.079442 92 +dayton 1 119 2.079442 2.079442 104 +peopl 1 96 2.302585 2.302585 132 +technic 1 100 2.302585 2.302585 140 +question 1 91 2.397895 2.397895 141 +member 1 84 2.484907 2.484907 165 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +faculti 1 56 2.890372 2.890372 325 +three 1 54 2.944439 2.944439 330 +undergradu 1 54 2.944439 2.944439 338 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +frequent 1 49 3.044522 3.044522 367 +answer 1 45 3.135494 3.135494 391 +offer 1 43 3.178054 3.178054 414 +futur 1 41 3.218876 3.218876 427 +form 1 39 3.258097 3.258097 443 +annual 1 40 3.258097 3.258097 458 +streetmadison 1 38 3.295837 3.295837 474 +award 1 34 3.401197 3.401197 523 +statist 1 35 3.401197 3.401197 521 +dissert 1 32 3.465736 3.465736 549 +scientist 1 31 3.496508 3.496508 560 +ask 1 28 3.610918 3.610918 597 +consist 1 26 3.688879 3.688879 651 +doctor 1 24 3.761200 3.761200 709 +departmentunivers 1 24 3.761200 3.761200 711 +alumni 1 21 3.912023 3.912023 807 +util 1 21 3.912023 3.912023 774 +voic 1 21 3.912023 3.912023 806 +excel 1 19 4.007333 4.007333 868 +young 1 16 4.174387 4.174387 991 +women 1 16 4.174387 4.174387 1004 +countri 1 15 4.248495 4.248495 1059 +rank 1 14 4.317488 4.317488 1063 +packard 1 10 4.653960 4.653960 1444 +fellowship 1 10 4.653960 4.653960 1460 +presidenti 1 8 4.875197 4.875197 1737 +pagecomput 1 7 5.010635 5.010635 1900 +timet 1 3 5.857933 5.857933 3471 +guidebook 1 2 6.263398 6.263398 4643 +departmentabout 1 1 6.957497 6.957497 17188 +departmentour 1 1 6.957497 6.957497 17189 +fourteen 1 1 6.957497 6.957497 17190 +incent 1 1 6.957497 6.957497 17191 +colophon 1 1 6.957497 6.957497 17192 +infocomput 1 1 6.957497 6.957497 17193 +madisona 1 1 6.957497 6.957497 17194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..f1519edc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +provid 1 121 2.079442 2.079442 94 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +control 1 82 2.484907 2.484907 164 +activ 1 84 2.484907 2.484907 182 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +order 1 69 2.708050 2.708050 249 +view 1 70 2.708050 2.708050 254 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +local 1 55 2.944439 2.944439 334 +suggest 1 53 2.944439 2.944439 331 +maintain 1 51 2.995732 2.995732 342 +approach 1 48 3.044522 3.044522 366 +show 1 43 3.178054 3.178054 417 +combin 1 42 3.218876 3.218876 421 +correct 1 38 3.295837 3.295837 462 +slide 1 38 3.295837 3.295837 467 +purpos 1 37 3.332205 3.332205 481 +connect 1 37 3.332205 3.332205 485 +either 1 35 3.401197 3.401197 506 +global 1 34 3.401197 3.401197 520 +posit 1 31 3.496508 3.496508 552 +exist 1 30 3.555348 3.555348 569 +focus 1 29 3.583519 3.583519 584 +consid 1 29 3.583519 3.583519 590 +task 1 25 3.737670 3.737670 678 +strategi 1 25 3.737670 3.737670 682 +motion 1 24 3.761200 3.761200 699 +other 1 24 3.761200 3.761200 697 +reach 1 24 3.761200 3.761200 688 +frame 1 24 3.761200 3.761200 684 +decis 1 23 3.806662 3.806662 728 +mobil 1 23 3.806662 3.806662 730 +lead 1 23 3.806662 3.806662 718 +defin 1 22 3.850148 3.850148 746 +navig 1 21 3.912023 3.912023 796 +avoid 1 21 3.912023 3.912023 799 +region 1 19 4.007333 4.007333 875 +geometr 1 19 4.007333 4.007333 852 +behavior 1 18 4.060443 4.060443 881 +minim 1 18 4.060443 4.060443 887 +attempt 1 17 4.110874 4.110874 917 +chuck 1 14 4.317488 4.317488 1108 +consider 1 14 4.317488 4.317488 1076 +scene 1 14 4.317488 4.317488 1114 +achiev 1 14 4.317488 4.317488 1088 +deriv 1 13 4.382027 4.382027 1145 +emploi 1 12 4.465908 4.465908 1284 +shape 1 12 4.465908 4.465908 1245 +abil 1 11 4.553877 4.553877 1341 +arbitrari 1 11 4.553877 4.553877 1359 +princip 1 10 4.653960 4.653960 1397 +relationship 1 10 4.653960 4.653960 1383 +dyer 1 9 4.753590 4.753590 1573 +recoveri 1 9 4.753590 4.753590 1474 +observ 1 9 4.753590 4.753590 1578 +surfac 1 9 4.753590 4.753590 1574 +formul 1 8 4.875197 4.875197 1733 +maxim 1 7 5.010635 5.010635 1944 +smooth 1 7 5.010635 5.010635 1855 +viewpoint 1 6 5.164786 5.164786 2116 +recov 1 6 5.164786 5.164786 2235 +reconstruct 1 6 5.164786 5.164786 2170 +provabl 1 5 5.347108 5.347108 2558 +align 1 4 5.568345 5.568345 2863 +visibl 1 4 5.568345 5.568345 2994 +simplifi 1 4 5.568345 5.568345 3066 +kyro 1 2 6.263398 6.263398 6063 +kutulako 1 2 6.263398 6.263398 6064 +descriptionof 1 2 6.263398 6.263398 5513 +thequalit 1 2 6.263398 6.263398 5622 +simpleobserv 1 1 6.957497 6.957497 17196 +propertieseasi 1 1 6.957497 6.957497 17197 +fixat 1 1 6.957497 6.957497 17198 +toperform 1 1 6.957497 6.957497 17199 +obstacl 1 1 6.957497 6.957497 17200 +ourwork 1 1 6.957497 6.957497 17201 +pointof 1 1 6.957497 6.957497 17202 +makesimpl 1 1 6.957497 6.957497 17203 +geometryof 1 1 6.957497 6.957497 17204 +thesurfac 1 1 6.957497 6.957497 17205 +generalobserv 1 1 6.957497 6.957497 17206 +objectthan 1 1 6.957497 6.957497 17207 +beexploit 1 1 6.957497 6.957497 17208 +anddeterminist 1 1 6.957497 6.957497 17209 +localshap 1 1 6.957497 6.957497 17210 +qualitativestrategi 1 1 6.957497 6.957497 17211 +smoothli 1 1 6.957497 6.957497 17195 +viewingdirect 1 1 6.957497 6.957497 17212 +selectedpoint 1 1 6.957497 6.957497 17213 +observationso 1 1 6.957497 6.957497 17214 +observationand 1 1 6.957497 6.957497 17215 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..d2d9e3c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +studi 1 120 2.079442 2.079442 91 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +imag 1 91 2.397895 2.397895 161 +contain 1 81 2.484907 2.484907 174 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +optim 1 79 2.564949 2.564949 197 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +special 1 56 2.890372 2.890372 320 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 1 51 2.995732 2.995732 351 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +visual 1 48 3.044522 3.044522 372 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +field 1 37 3.332205 3.332205 482 +random 1 34 3.401197 3.401197 511 +global 1 34 3.401197 3.401197 520 +transform 1 32 3.465736 3.465736 542 +consid 1 29 3.583519 3.583519 590 +turn 1 29 3.583519 3.583519 586 +framework 1 28 3.610918 3.610918 606 +determin 1 27 3.637586 3.637586 630 +detect 1 26 3.688879 3.688879 646 +experiment 1 26 3.688879 3.688879 645 +valu 1 25 3.737670 3.737670 665 +task 1 25 3.737670 3.737670 678 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +region 1 19 4.007333 4.007333 875 +along 1 18 4.060443 4.060443 878 +minim 1 18 4.060443 4.060443 887 +lower 1 18 4.060443 4.060443 886 +regular 1 17 4.110874 4.110874 929 +estim 1 17 4.110874 4.110874 930 +conduct 1 14 4.317488 4.317488 1065 +directli 1 13 4.382027 4.382027 1141 +deriv 1 13 4.382027 4.382027 1145 +arbitrari 1 11 4.553877 4.553877 1359 +valid 1 11 4.553877 4.553877 1299 +classif 1 9 4.753590 4.753590 1586 +classifi 1 9 4.753590 4.753590 1537 +equival 1 9 4.753590 4.753590 1496 +extract 1 8 4.875197 4.875197 1728 +formul 1 8 4.875197 4.875197 1733 +invari 1 8 4.875197 4.875197 1748 +furthermor 1 6 5.164786 5.164786 2141 +chin 1 5 5.347108 5.347108 2408 +snake 1 5 5.347108 5.347108 2281 +yield 1 5 5.347108 5.347108 2458 +stabl 1 5 5.347108 5.347108 2309 +markov 1 5 5.347108 5.347108 2280 +contour 1 4 5.568345 5.568345 2812 +subsequ 1 4 5.568345 5.568345 2665 +bayesian 1 4 5.568345 5.568345 2671 +rigor 1 4 5.568345 5.568345 3030 +implicitli 1 3 5.857933 5.857933 3620 +energi 1 3 5.857933 5.857933 3950 +hough 1 3 5.857933 5.857933 3527 +influenc 1 3 5.857933 5.857933 3349 +deform 1 2 6.263398 6.263398 6065 +criterion 1 2 6.263398 6.263398 5885 +pearson 1 2 6.263398 6.263398 5245 +summat 1 2 6.263398 6.263398 5325 +peak 1 2 6.263398 6.263398 5553 +confirm 1 2 6.263398 6.263398 4101 +fung 1 1 6.957497 6.957497 17217 +roland 1 1 6.957497 6.957497 17218 +noisi 1 1 6.957497 6.957497 17216 +ofact 1 1 6.957497 6.957497 17219 +minimax 1 1 6.957497 6.957497 17220 +wherebi 1 1 6.957497 6.957497 17221 +anduniqu 1 1 6.957497 6.957497 17222 +priordistribut 1 1 6.957497 6.957497 17223 +exert 1 1 6.957497 6.957497 17224 +posterior 1 1 6.957497 6.957497 17225 +withpattern 1 1 6.957497 6.957497 17226 +nearman 1 1 6.957497 6.957497 17227 +lemma 1 1 6.957497 6.957497 17228 +classificationtest 1 1 6.957497 6.957497 17229 +margin 1 1 6.957497 6.957497 17230 +gsnake 1 1 6.957497 6.957497 17231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..68533927 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +specif 1 106 2.197225 2.197225 106 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +call 1 91 2.397895 2.397895 153 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +graphic 1 90 2.397895 2.397895 147 +contain 1 81 2.484907 2.484907 174 +thing 1 84 2.484907 2.484907 189 +control 1 82 2.484907 2.484907 164 +help 1 83 2.484907 2.484907 175 +complet 1 77 2.564949 2.564949 208 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +order 1 69 2.708050 2.708050 249 +foundat 1 62 2.772589 2.772589 286 +function 1 62 2.772589 2.772589 275 +guid 1 63 2.772589 2.772589 267 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +abstract 1 62 2.772589 2.772589 276 +complex 1 64 2.772589 2.772589 269 +type 1 61 2.833213 2.833213 296 +special 1 56 2.890372 2.890372 320 +scientif 1 53 2.944439 2.944439 341 +allow 1 53 2.944439 2.944439 333 +particular 1 51 2.995732 2.995732 352 +visual 1 48 3.044522 3.044522 372 +principl 1 48 3.044522 3.044522 357 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +show 1 43 3.178054 3.178054 417 +howev 1 41 3.218876 3.218876 422 +map 1 39 3.258097 3.258097 452 +brian 1 38 3.295837 3.295837 466 +paul 1 38 3.295837 3.295837 471 +prototyp 1 38 3.295837 3.295837 463 +close 1 38 3.295837 3.295837 465 +purpos 1 37 3.332205 3.332205 481 +tree 1 36 3.367296 3.367296 492 +approxim 1 35 3.401197 3.401197 509 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +express 1 32 3.465736 3.465736 540 +scientist 1 31 3.496508 3.496508 560 +anim 1 31 3.496508 3.496508 557 +domain 1 30 3.555348 3.555348 564 +specifi 1 30 3.555348 3.555348 568 +graph 1 30 3.555348 3.555348 576 +built 1 29 3.583519 3.583519 592 +arrai 1 27 3.637586 3.637586 627 +quit 1 27 3.637586 3.637586 633 +repres 1 26 3.688879 3.688879 656 +altern 1 26 3.688879 3.688879 641 +fundament 1 25 3.737670 3.737670 661 +frame 1 24 3.761200 3.761200 684 +interpret 1 24 3.761200 3.761200 686 +seri 1 24 3.761200 3.761200 708 +flow 1 24 3.761200 3.761200 700 +displai 1 23 3.806662 3.806662 712 +sequenc 1 23 3.806662 3.806662 734 +size 1 23 3.806662 3.806662 713 +variabl 1 23 3.806662 3.806662 715 +defin 1 22 3.850148 3.850148 746 +color 1 22 3.850148 3.850148 762 +thu 1 21 3.912023 3.912023 773 +fact 1 21 3.912023 3.912023 780 +assum 1 19 4.007333 4.007333 845 +appropri 1 18 4.060443 4.060443 883 +along 1 18 4.060443 4.060443 878 +render 1 17 4.110874 4.110874 947 +upon 1 16 4.174387 4.174387 978 +condit 1 16 4.174387 4.174387 975 +alreadi 1 16 4.174387 4.174387 963 +precis 1 15 4.248495 4.248495 1023 +chuck 1 14 4.317488 4.317488 1108 +finit 1 14 4.317488 4.317488 1106 +context 1 13 4.382027 4.382027 1153 +recurs 1 13 4.382027 4.382027 1127 +amount 1 12 4.465908 4.465908 1208 +bill 1 11 4.553877 4.553877 1297 +sens 1 11 4.553877 4.553877 1305 +volum 1 11 4.553877 4.553877 1347 +primit 1 11 4.553877 4.553877 1317 +relationship 1 10 4.653960 4.653960 1383 +dyer 1 9 4.753590 4.753590 1573 +assumpt 1 9 4.753590 4.753590 1514 +ideal 1 8 4.875197 4.875197 1630 +satisfi 1 8 4.875197 4.875197 1694 +therefor 1 7 5.010635 5.010635 1822 +fromth 1 7 5.010635 5.010635 1802 +pipelin 1 7 5.010635 5.010635 1830 +analyt 1 7 5.010635 5.010635 1913 +consequ 1 6 5.164786 5.164786 1989 +tupl 1 5 5.347108 5.347108 2244 +steer 1 5 5.347108 5.347108 2328 +infinit 1 4 5.568345 5.568345 2596 +wherea 1 4 5.568345 5.568345 2597 +pixel 1 4 5.568345 5.568345 2831 +encod 1 4 5.568345 5.568345 2929 +rigor 1 4 5.568345 5.568345 3030 +fora 1 4 5.568345 5.568345 2697 +lattic 1 3 5.857933 5.857933 3721 +interfacefor 1 3 5.857933 5.857933 3534 +hibbard 1 2 6.263398 6.263398 6066 +theidea 1 2 6.263398 6.263398 5428 +ofdata 1 2 6.263398 6.263398 6038 +themathemat 1 2 6.263398 6.263398 4421 +isomorph 1 2 6.263398 6.263398 5976 +scientificdata 1 2 6.263398 6.263398 6067 +scalar 1 2 6.263398 6.263398 4815 +radianc 1 2 6.263398 6.263398 6068 +temperatur 1 2 6.263398 6.263398 5985 +ofcours 1 2 6.263398 6.263398 4064 +axi 1 2 6.263398 6.263398 6069 +remark 1 2 6.263398 6.263398 4124 +wedo 1 2 6.263398 6.263398 5772 +datatyp 1 2 6.263398 6.263398 4129 +calleda 1 1 6.957497 6.957497 17234 +adha 1 1 6.957497 6.957497 17235 +objectsrepres 1 1 6.957497 6.957497 17236 +objectsfrequ 1 1 6.957497 6.957497 17237 +functionswith 1 1 6.957497 6.957497 17238 +containfinit 1 1 6.957497 6.957497 17239 +chosenfrom 1 1 6.957497 6.957497 17240 +palett 1 1 6.957497 6.957497 17241 +numbersof 1 1 6.957497 6.957497 17242 +computationalmodel 1 1 6.957497 6.957497 17243 +informationcont 1 1 6.957497 6.957497 17244 +expressivenesscondit 1 1 6.957497 6.957497 17232 +thatdisplai 1 1 6.957497 6.957497 17245 +onlythos 1 1 6.957497 6.957497 17246 +itimpl 1 1 6.957497 6.957497 17247 +satisfyingth 1 1 6.957497 6.957497 17248 +expressivenss 1 1 6.957497 6.957497 17249 +onhow 1 1 6.957497 6.957497 17250 +wecan 1 1 6.957497 6.957497 17251 +howprecis 1 1 6.957497 6.957497 17252 +voxelresolut 1 1 6.957497 6.957497 17253 +visualizationprocess 1 1 6.957497 6.957497 17254 +objectsto 1 1 6.957497 6.957497 17255 +theexpress 1 1 6.957497 6.957497 17256 +primitivevari 1 1 6.957497 6.957497 17257 +latitud 1 1 6.957497 6.957497 17258 +constructor 1 1 6.957497 6.957497 17259 +appropriatefor 1 1 6.957497 6.957497 17260 +containsth 1 1 6.957497 6.957497 17261 +canalso 1 1 6.957497 6.957497 17262 +displayi 1 1 6.957497 6.957497 17263 +voxel 1 1 6.957497 6.957497 17233 +graphicsprimit 1 1 6.957497 6.957497 17264 +locationand 1 1 6.957497 6.957497 17265 +animationsequ 1 1 6.957497 6.957497 17266 +thedisplai 1 1 6.957497 6.957497 17267 +isnatur 1 1 6.957497 6.957497 17268 +andtemperatur 1 1 6.957497 6.957497 17269 +calledvi 1 1 6.957497 6.957497 17270 +adthat 1 1 6.957497 6.957497 17271 +theircomput 1 1 6.957497 6.957497 17272 +theirprogram 1 1 6.957497 6.957497 17273 +thevi 1 1 6.957497 6.957497 17274 +vvof 1 1 6.957497 6.957497 17275 +thatsatisfi 1 1 6.957497 6.957497 17276 +implementationi 1 1 6.957497 6.957497 17277 +auser 1 1 6.957497 6.957497 17278 +abstractionof 1 1 6.957497 6.957497 17279 +ofmap 1 1 6.957497 6.957497 17280 +defineddata 1 1 6.957497 6.957497 17281 +ingener 1 1 6.957497 6.957497 17282 +usualapproach 1 1 6.957497 6.957497 17283 +bywrit 1 1 6.957497 6.957497 17284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..794eacb9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +world 1 115 2.197225 2.197225 126 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +goal 1 66 2.708050 2.708050 250 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +scientist 1 31 3.496508 3.496508 560 +team 1 27 3.637586 3.637586 625 +challeng 1 26 3.688879 3.688879 653 +enabl 1 26 3.688879 3.688879 655 +increas 1 20 3.951244 3.951244 829 +edulast 1 17 4.110874 4.110874 927 +admin 1 9 4.753590 4.753590 1476 +pool 1 6 5.164786 5.164786 2225 +condor 1 5 5.347108 5.347108 2577 +own 1 5 5.347108 5.347108 2531 +throughput 1 4 5.568345 5.568345 2993 +deploi 1 3 5.857933 5.857933 3750 +evaluatemechan 1 1 6.957497 6.957497 17285 +technologicaland 1 1 6.957497 6.957497 17286 +sociolog 1 1 6.957497 6.957497 17287 +suggestionscondor 1 1 6.957497 6.957497 17288 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..18006516 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +septemb 1 65 2.772589 2.772589 274 +next 1 34 3.401197 3.401197 517 +miron 1 14 4.317488 4.317488 1110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..f33b1c60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +structur 1 106 2.197225 2.197225 105 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +member 1 84 2.484907 2.484907 165 +contain 1 81 2.484907 2.484907 174 +wide 1 84 2.484907 2.484907 185 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +interfac 1 79 2.564949 2.564949 209 +optim 1 79 2.564949 2.564949 197 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +sourc 1 77 2.564949 2.564949 201 +effici 1 73 2.639057 2.639057 233 +addit 1 74 2.639057 2.639057 228 +degre 1 69 2.708050 2.708050 259 +main 1 67 2.708050 2.708050 256 +complex 1 64 2.772589 2.772589 269 +organ 1 65 2.772589 2.772589 265 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +evalu 1 64 2.772589 2.772589 266 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +overview 1 56 2.890372 2.890372 323 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +instruct 1 53 2.944439 2.944439 332 +investig 1 51 2.995732 2.995732 353 +made 1 44 3.135494 3.135494 398 +term 1 43 3.178054 3.178054 411 +combin 1 42 3.218876 3.218876 421 +programm 1 39 3.258097 3.258097 445 +transact 1 39 3.258097 3.258097 438 +announc 1 40 3.258097 3.258097 441 +submit 1 39 3.258097 3.258097 440 +manual 1 35 3.401197 3.401197 504 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +domain 1 30 3.555348 3.555348 564 +rang 1 30 3.555348 3.555348 565 +releas 1 28 3.610918 3.610918 616 +linux 1 27 3.637586 3.637586 631 +rule 1 26 3.688879 3.688879 638 +enhanc 1 26 3.688879 3.688879 644 +relev 1 26 3.688879 3.688879 637 +comp 1 26 3.688879 3.688879 650 +strategi 1 25 3.737670 3.737670 682 +seri 1 24 3.761200 3.761200 708 +variabl 1 23 3.806662 3.806662 715 +disk 1 22 3.850148 3.850148 747 +instal 1 22 3.850148 3.850148 754 +among 1 21 3.912023 3.912023 781 +newsgroup 1 21 3.912023 3.912023 783 +binari 1 20 3.951244 3.951244 823 +edulast 1 17 4.110874 4.110874 927 +choos 1 16 4.174387 4.174387 964 +permit 1 16 4.174387 4.174387 962 +choic 1 16 4.174387 4.174387 979 +atth 1 15 4.248495 4.248495 1019 +indic 1 15 4.248495 4.248495 1013 +forth 1 13 4.382027 4.382027 1186 +misc 1 13 4.382027 4.382027 1124 +robust 1 12 4.465908 4.465908 1271 +deduct 1 12 4.465908 4.465908 1236 +solari 1 12 4.465908 4.465908 1238 +stai 1 12 4.465908 4.465908 1215 +primit 1 11 4.553877 4.553877 1317 +underli 1 10 4.653960 4.653960 1410 +rich 1 10 4.653960 4.653960 1396 +modul 1 10 4.653960 4.653960 1434 +resid 1 10 4.653960 4.653960 1461 +declar 1 9 4.753590 4.753590 1526 +desir 1 9 4.753590 4.753590 1542 +readm 1 8 4.875197 4.875197 1699 +canb 1 7 5.010635 5.010635 1846 +aggreg 1 6 5.164786 5.164786 2219 +coral 1 5 5.347108 5.347108 2538 +augment 1 5 5.347108 5.347108 2350 +tupl 1 5 5.347108 5.347108 2244 +quantifi 1 5 5.347108 5.347108 2525 +lang 1 5 5.347108 5.347108 2294 +imper 1 4 5.568345 5.568345 3067 +delet 1 4 5.568345 5.568345 2691 +suno 1 4 5.568345 5.568345 2790 +claus 1 3 5.857933 5.857933 3733 +hpux 1 3 5.857933 5.857933 3780 +objectiveoverviewreleas 1 2 6.263398 6.263398 6070 +informationse 1 2 6.263398 6.263398 6071 +horn 1 2 6.263398 6.263398 6072 +negat 1 2 6.263398 6.263398 6073 +andautomat 1 2 6.263398 6.263398 5413 +grab 1 2 6.263398 6.263398 5723 +reciev 1 2 6.263398 6.263398 5600 +projectcor 1 1 6.957497 6.957497 17290 +projectdocu 1 1 6.957497 6.957497 17291 +coralpeopl 1 1 6.957497 6.957497 17292 +coraloth 1 1 6.957497 6.957497 17293 +madisonobject 1 1 6.957497 6.957497 17294 +efficientdeduct 1 1 6.957497 6.957497 17295 +coralsystem 1 1 6.957497 6.957497 17296 +durationof 1 1 6.957497 6.957497 17297 +declaritiveand 1 1 6.957497 6.957497 17298 +supportsgener 1 1 6.957497 6.957497 17299 +coralimplement 1 1 6.957497 6.957497 17300 +modulein 1 1 6.957497 6.957497 17301 +insertand 1 1 6.957497 6.957497 17302 +canprogram 1 1 6.957497 6.957497 17303 +withcor 1 1 6.957497 6.957497 17304 +allowingc 1 1 6.957497 6.957497 17305 +coralimplemen 1 1 6.957497 6.957497 17306 +theexodusstorag 1 1 6.957497 6.957497 17307 +manang 1 1 6.957497 6.957497 17308 +aclient 1 1 6.957497 6.957497 17309 +nobin 1 1 6.957497 6.957497 17289 +requiringy 1 1 6.957497 6.957497 17310 +announcemnt 1 1 6.957497 6.957497 17311 +listwhich 1 1 6.957497 6.957497 17312 +shawn 1 1 6.957497 6.957497 17313 +flisakowski 1 1 6.957497 6.957497 17314 +flisakow 1 1 6.957497 6.957497 17315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..daacf4d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +well 1 109 2.197225 2.197225 121 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +search 1 95 2.397895 2.397895 155 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +librari 1 87 2.484907 2.484907 181 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +requir 1 81 2.484907 2.484907 167 +optim 1 79 2.564949 2.564949 197 +sourc 1 77 2.564949 2.564949 201 +interfac 1 79 2.564949 2.564949 209 +method 1 80 2.564949 2.564949 213 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +nation 1 74 2.639057 2.639057 240 +result 1 65 2.772589 2.772589 281 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +laboratori 1 63 2.772589 2.772589 292 +content 1 59 2.833213 2.833213 302 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +point 1 58 2.890372 2.890372 319 +direct 1 57 2.890372 2.890372 316 +major 1 56 2.890372 2.890372 315 +overview 1 56 2.890372 2.890372 323 +three 1 54 2.944439 2.944439 330 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +possibl 1 47 3.091042 3.091042 378 +directori 1 45 3.135494 3.135494 396 +describ 1 45 3.135494 3.135494 400 +keep 1 44 3.135494 3.135494 409 +linear 1 41 3.218876 3.218876 431 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +origin 1 38 3.295837 3.295837 472 +download 1 36 3.367296 3.367296 489 +survei 1 35 3.401197 3.401197 513 +everi 1 34 3.401197 3.401197 519 +approxim 1 35 3.401197 3.401197 509 +michael 1 35 3.401197 3.401197 514 +within 1 33 3.433987 3.433987 525 +given 1 32 3.465736 3.465736 538 +taken 1 31 3.496508 3.496508 555 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +steve 1 29 3.583519 3.583519 594 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +determin 1 27 3.637586 3.637586 630 +subject 1 26 3.688879 3.688879 647 +relev 1 26 3.688879 3.688879 637 +consist 1 26 3.688879 3.688879 651 +compar 1 26 3.688879 3.688879 648 +strategi 1 25 3.737670 3.737670 682 +known 1 24 3.761200 3.761200 702 +sequenc 1 23 3.806662 3.806662 734 +equat 1 23 3.806662 3.806662 724 +serv 1 22 3.850148 3.850148 758 +almost 1 22 3.850148 3.850148 742 +path 1 21 3.912023 3.912023 778 +similar 1 21 3.912023 3.912023 771 +avoid 1 21 3.912023 3.912023 799 +entir 1 20 3.951244 3.951244 811 +along 1 18 4.060443 4.060443 878 +spars 1 16 4.174387 4.174387 989 +role 1 14 4.317488 4.317488 1101 +nonlinear 1 14 4.317488 4.317488 1107 +matlab 1 14 4.317488 4.317488 1081 +easili 1 14 4.317488 4.317488 1077 +econom 1 13 4.382027 4.382027 1184 +cannot 1 13 4.382027 4.382027 1144 +directli 1 13 4.382027 4.382027 1141 +step 1 13 4.382027 4.382027 1138 +forth 1 13 4.382027 4.382027 1186 +deriv 1 13 4.382027 4.382027 1145 +evolv 1 12 4.465908 4.465908 1223 +emploi 1 12 4.465908 4.465908 1284 +iter 1 12 4.465908 4.465908 1206 +regard 1 11 4.553877 4.553877 1309 +underli 1 10 4.653960 4.653960 1410 +establish 1 9 4.753590 4.753590 1532 +routin 1 9 4.753590 4.753590 1549 +ferri 1 8 4.875197 4.875197 1715 +mile 1 8 4.875197 4.875197 1743 +formul 1 8 4.875197 4.875197 1733 +solver 1 7 5.010635 5.010635 1911 +newton 1 7 5.010635 5.010635 1824 +secondari 1 7 5.010635 5.010635 1884 +smooth 1 7 5.010635 5.010635 1855 +converg 1 7 5.010635 5.010635 1844 +zero 1 7 5.010635 5.010635 1896 +divers 1 6 5.164786 5.164786 2232 +mix 1 6 5.164786 5.164786 2200 +freeli 1 6 5.164786 5.164786 2014 +subsystem 1 6 5.164786 5.164786 2015 +decad 1 5 5.347108 5.347108 2455 +complementari 1 5 5.347108 5.347108 2523 +pivot 1 5 5.347108 5.347108 2426 +merit 1 5 5.347108 5.347108 2466 +interior 1 5 5.347108 5.347108 2439 +argonn 1 5 5.347108 5.347108 2461 +monograph 1 4 5.568345 5.568345 2860 +areavail 1 4 5.568345 5.568345 2810 +colorado 1 4 5.568345 5.568345 2938 +algorithmsand 1 4 5.568345 5.568345 2680 +trick 1 4 5.568345 5.568345 2967 +complementar 1 3 5.857933 5.857933 3999 +neta 1 3 5.857933 5.857933 3789 +forthes 1 3 5.857933 5.857933 3199 +toolbox 1 3 5.857933 5.857933 3112 +andm 1 3 5.857933 5.857933 3901 +violat 1 3 5.857933 5.857933 3211 +engineeringand 1 3 5.857933 5.857933 3779 +preprocessor 1 3 5.857933 5.857933 3844 +energi 1 3 5.857933 5.857933 3950 +gam 1 2 6.263398 6.263398 4758 +edufor 1 2 6.263398 6.263398 5831 +lemk 1 2 6.263398 6.263398 5693 +similarto 1 2 6.263398 6.263398 6074 +anapproxim 1 2 6.263398 6.263398 5045 +norm 1 2 6.263398 6.263398 5643 +perturb 1 2 6.263398 6.263398 6075 +leadto 1 2 6.263398 6.263398 5350 +residu 1 2 6.263398 6.263398 4741 +thecurr 1 2 6.263398 6.263398 5862 +equilibrium 1 2 6.263398 6.263398 4259 +thegam 1 2 6.263398 6.263398 5430 +pointmethod 1 2 6.263398 6.263398 4835 +fruitfuldisciplin 1 1 6.957497 6.957497 17321 +incomplementar 1 1 6.957497 6.957497 17322 +meetingsof 1 1 6.957497 6.957497 17323 +forcomplementar 1 1 6.957497 6.957497 17324 +researcherssoftwar 1 1 6.957497 6.957497 17325 +mcplib 1 1 6.957497 6.957497 17317 +problemdescript 1 1 6.957497 6.957497 17326 +frommatlab 1 1 6.957497 6.957497 17327 +functionevalu 1 1 6.957497 6.957497 17318 +jacobian 1 1 6.957497 6.957497 17328 +specificvers 1 1 6.957497 6.957497 17329 +hook 1 1 6.957497 6.957497 17330 +rutherford 1 1 6.957497 6.957497 17331 +classicaljosephi 1 1 6.957497 6.957497 17332 +linearizedsubproblem 1 1 6.957497 6.957497 17333 +defineth 1 1 6.957497 6.957497 17334 +dampedlinesearch 1 1 6.957497 6.957497 17335 +infeas 1 1 6.957497 6.957497 17336 +restartprocedur 1 1 6.957497 6.957497 17337 +totermin 1 1 6.957497 6.957497 17338 +subproblem 1 1 6.957497 6.957497 17319 +rescal 1 1 6.957497 6.957497 17339 +equilibr 1 1 6.957497 6.957497 17340 +elementsappear 1 1 6.957497 6.957497 17341 +mcpor 1 1 6.957497 6.957497 17342 +anonsmooth 1 1 6.957497 6.957497 17343 +reformul 1 1 6.957497 6.957497 17344 +algorithmconsist 1 1 6.957497 6.957497 17345 +pathto 1 1 6.957497 6.957497 17346 +aposs 1 1 6.957497 6.957497 17347 +thepath 1 1 6.957497 6.957497 17348 +partiallycomput 1 1 6.957497 6.957497 17349 +relinear 1 1 6.957497 6.957497 17350 +anonmonoton 1 1 6.957497 6.957497 17351 +watchdog 1 1 6.957497 6.957497 17352 +minima 1 1 6.957497 6.957497 17353 +nonsmooth 1 1 6.957497 6.957497 17316 +uponreformul 1 1 6.957497 6.957497 17320 +robustnessimprov 1 1 6.957497 6.957497 17354 +proxim 1 1 6.957497 6.957497 17355 +qpcomp 1 1 6.957497 6.957497 17356 +ishandl 1 1 6.957497 6.957497 17357 +thenapproxim 1 1 6.957497 6.957497 17358 +theaccuraci 1 1 6.957497 6.957497 17359 +mpsge 1 1 6.957497 6.957497 17360 +thatallow 1 1 6.957497 6.957497 17361 +nemsth 1 1 6.957497 6.957497 17362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..59528c99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +manag 1 114 2.197225 2.197225 125 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +requir 1 81 2.484907 2.484907 167 +orient 1 80 2.564949 2.564949 205 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +still 1 50 3.044522 3.044522 362 +michael 1 35 3.401197 3.401197 514 +storag 1 31 3.496508 3.496508 553 +mike 1 24 3.761200 3.761200 703 +prepar 1 20 3.951244 3.951244 824 +benchmark 1 19 4.007333 4.007333 859 +minim 1 18 4.060443 4.060443 887 +carei 1 8 4.875197 4.875197 1781 +licens 1 5 5.347108 5.347108 2520 +exodu 1 4 5.568345 5.568345 3075 +zwill 1 4 5.568345 5.568345 3076 +successor 1 3 5.857933 5.857933 3576 +theexodu 1 2 6.263398 6.263398 6076 +persistentprogram 1 2 6.263398 6.263398 5997 +pageexodu 1 1 6.957497 6.957497 17363 +toolkitnot 1 1 6.957497 6.957497 17364 +succed 1 1 6.957497 6.957497 17365 +theshor 1 1 6.957497 6.957497 17366 +eduprincip 1 1 6.957497 6.957497 17367 +dewittse 1 1 6.957497 6.957497 17368 +exodusshor 1 1 6.957497 6.957497 17369 +exoduslatest 1 1 6.957497 6.957497 17370 +compilercontribut 1 1 6.957497 6.957497 17371 +managera 1 1 6.957497 6.957497 17372 +exodus_al 1 1 6.957497 6.957497 17373 +oodbsdat 1 1 6.957497 6.957497 17374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..905241fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +wisconsin 1 169 1.791759 1.791759 54 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +dayton 1 119 2.079442 2.079442 104 +manag 1 114 2.197225 2.197225 125 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +peopl 1 96 2.302585 2.302585 132 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +help 1 83 2.484907 2.484907 175 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +addit 1 74 2.639057 2.639057 228 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +david 1 71 2.639057 2.639057 232 +order 1 69 2.708050 2.708050 249 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +creat 1 63 2.772589 2.772589 277 +prof 1 64 2.772589 2.772589 273 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +point 1 58 2.890372 2.890372 319 +sever 1 56 2.890372 2.890372 322 +sampl 1 53 2.944439 2.944439 339 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +tabl 1 51 2.995732 2.995732 346 +set 1 50 3.044522 3.044522 361 +could 1 46 3.091042 3.091042 383 +video 1 44 3.135494 3.135494 405 +execut 1 45 3.135494 3.135494 404 +examin 1 42 3.218876 3.218876 424 +multipl 1 39 3.258097 3.258097 453 +streetmadison 1 38 3.295837 3.295837 474 +connect 1 37 3.332205 3.332205 485 +especi 1 36 3.367296 3.367296 496 +either 1 35 3.401197 3.401197 506 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +ad 1 32 3.465736 3.465736 544 +built 1 29 3.583519 3.583519 592 +hope 1 28 3.610918 3.610918 610 +manipul 1 27 3.637586 3.637586 624 +client 1 25 3.737670 3.737670 679 +scalabl 1 24 3.761200 3.761200 705 +store 1 24 3.761200 3.761200 693 +handl 1 24 3.761200 3.761200 685 +size 1 23 3.806662 3.806662 713 +brows 1 23 3.806662 3.806662 726 +displai 1 23 3.806662 3.806662 712 +thread 1 23 3.806662 3.806662 722 +defin 1 22 3.850148 3.850148 746 +mpeg 1 20 3.951244 3.951244 831 +benchmark 1 19 4.007333 4.007333 859 +layer 1 17 4.110874 4.110874 926 +spatial 1 16 4.174387 4.174387 988 +massiv 1 15 4.248495 4.248495 1026 +indic 1 15 4.248495 4.248495 1013 +attribut 1 14 4.317488 4.317488 1092 +script 1 13 4.382027 4.382027 1171 +front 1 13 4.382027 4.382027 1154 +menu 1 13 4.382027 4.382027 1156 +composit 1 13 4.382027 4.382027 1150 +context 1 13 4.382027 4.382027 1153 +calcul 1 12 4.465908 4.465908 1268 +emploi 1 12 4.465908 4.465908 1284 +shore 1 11 4.553877 4.553877 1377 +string 1 11 4.553877 4.553877 1340 +persist 1 11 4.553877 4.553877 1367 +abil 1 11 4.553877 4.553877 1341 +vldb 1 10 4.653960 4.653960 1470 +subset 1 10 4.653960 4.653960 1425 +underli 1 10 4.653960 4.653960 1410 +correspond 1 10 4.653960 4.653960 1382 +custom 1 10 4.653960 4.653960 1414 +label 1 10 4.653960 4.653960 1423 +compos 1 9 4.753590 4.753590 1527 +paradis 1 8 4.875197 4.875197 1782 +databasesystem 1 8 4.875197 4.875197 1617 +polygon 1 8 4.875197 4.875197 1723 +sensit 1 8 4.875197 4.875197 1726 +insert 1 8 4.875197 4.875197 1687 +successfulli 1 7 5.010635 5.010635 1869 +geograph 1 6 5.164786 5.164786 2236 +band 1 6 5.164786 5.164786 2198 +invok 1 6 5.164786 5.164786 2079 +drop 1 6 5.164786 5.164786 2008 +syntax 1 6 5.164786 5.164786 2030 +aim 1 5 5.347108 5.347108 2477 +tupl 1 5 5.347108 5.347108 2244 +ship 1 5 5.347108 5.347108 2534 +madisoncomput 1 5 5.347108 5.347108 2391 +andevalu 1 4 5.568345 5.568345 2706 +zoom 1 4 5.568345 5.568345 2961 +insur 1 4 5.568345 5.568345 2939 +providesa 1 3 5.857933 5.857933 3884 +informationse 1 2 6.263398 6.263398 6071 +serverobject 1 2 6.263398 6.263398 6077 +raster 1 2 6.263398 6.263398 6078 +polylin 1 2 6.263398 6.263398 6079 +sketch 1 2 6.263398 6.263398 5946 +extent 1 2 6.263398 6.263398 6080 +paid 1 2 6.263398 6.263398 6081 +biswadeep 1 2 6.263398 6.263398 4805 +projectparadis 1 1 6.957497 6.957497 17375 +frontend 1 1 6.957497 6.957497 17376 +sequoia 1 1 6.957497 6.957497 17377 +iscap 1 1 6.957497 6.957497 17378 +applyingobject 1 1 6.957497 6.957497 17379 +ofstor 1 1 6.957497 6.957497 17380 +tosignificantli 1 1 6.957497 6.957497 17381 +thatcan 1 1 6.957497 6.957497 17382 +andsupport 1 1 6.957497 6.957497 17383 +paradiseprovid 1 1 6.957497 6.957497 17384 +gisappl 1 1 6.957497 6.957497 17385 +asinteg 1 1 6.957497 6.957497 17386 +circl 1 1 6.957497 6.957497 17387 +spatialattribut 1 1 6.957497 6.957497 17388 +foroverlap 1 1 6.957497 6.957497 17389 +selectingcolor 1 1 6.957497 6.957497 17390 +withad 1 1 6.957497 6.957497 17391 +issueimplicit 1 1 6.957497 6.957497 17392 +arubb 1 1 6.957497 6.957497 17393 +querycompos 1 1 6.957497 6.957497 17394 +databaseschema 1 1 6.957497 6.957497 17395 +beview 1 1 6.957497 6.957497 17396 +bedisplai 1 1 6.957497 6.957497 17397 +sqlwe 1 1 6.957497 6.957497 17398 +extendedset 1 1 6.957497 6.957497 17399 +byus 1 1 6.957497 6.957497 17400 +standarddatabas 1 1 6.957497 6.957497 17401 +anddrop 1 1 6.957497 6.957497 17402 +paradiseserv 1 1 6.957497 6.957497 17403 +theresult 1 1 6.957497 6.957497 17404 +ismulti 1 1 6.957497 6.957497 17405 +sameserv 1 1 6.957497 6.957497 17406 +carefulattent 1 1 6.957497 6.957497 17407 +processqueri 1 1 6.957497 6.957497 17408 +largevolum 1 1 6.957497 6.957497 17409 +frontendeurop 1 1 6.957497 6.957497 17410 +pressher 1 1 6.957497 6.957497 17411 +projectattn 1 1 6.957497 6.957497 17412 +dewittunivers 1 1 6.957497 6.957497 17413 +edumor 1 1 6.957497 6.957497 17414 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..3aae7f45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +updat 1 191 1.609438 1.609438 41 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +network 1 168 1.791759 1.791759 61 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +support 1 132 1.945910 1.945910 83 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +welcom 1 122 2.079442 2.079442 99 +provid 1 121 2.079442 2.079442 94 +intern 1 108 2.197225 2.197225 128 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +comment 1 93 2.397895 2.397895 146 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +educ 1 86 2.484907 2.484907 191 +know 1 80 2.564949 2.564949 198 +servic 1 72 2.639057 2.639057 236 +onlin 1 75 2.639057 2.639057 223 +goal 1 66 2.708050 2.708050 250 +organ 1 65 2.772589 2.772589 265 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +sever 1 56 2.890372 2.890372 322 +suggest 1 53 2.944439 2.944439 331 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +format 1 48 3.044522 3.044522 356 +effect 1 46 3.091042 3.091042 385 +show 1 43 3.178054 3.178054 417 +offer 1 43 3.178054 3.178054 414 +howev 1 41 3.218876 3.218876 422 +announc 1 40 3.258097 3.258097 441 +primari 1 25 3.737670 3.737670 669 +daili 1 24 3.761200 3.761200 706 +annot 1 21 3.912023 3.912023 775 +theunivers 1 21 3.912023 3.912023 797 +longer 1 20 3.951244 3.951244 816 +entir 1 20 3.951244 3.951244 811 +toolkit 1 20 3.951244 3.951244 835 +universityof 1 15 4.248495 4.248495 1061 +everyon 1 13 4.382027 4.382027 1148 +summar 1 11 4.553877 4.553877 1295 +discov 1 9 4.753590 4.753590 1562 +hundr 1 9 4.753590 4.753590 1528 +filter 1 8 4.875197 4.875197 1641 +scout 1 7 5.010635 5.010635 1903 +happen 1 7 5.010635 5.010635 1790 +valuabl 1 5 5.347108 5.347108 2256 +newli 1 3 5.857933 5.857933 3786 +useth 1 3 5.857933 5.857933 3110 +thescout 1 2 6.263398 6.263398 6082 +homepagego 1 1 6.957497 6.957497 17415 +versionnewslett 1 1 6.957497 6.957497 17416 +newand 1 1 6.957497 6.957497 17417 +toolsinternet 1 1 6.957497 6.957497 17418 +effectiveinternet 1 1 6.957497 6.957497 17419 +availablea 1 1 6.957497 6.957497 17420 +studentssurf 1 1 6.957497 6.957497 17421 +smarter 1 1 6.957497 6.957497 17422 +canchoos 1 1 6.957497 6.957497 17423 +annoucementseach 1 1 6.957497 6.957497 17424 +networktool 1 1 6.957497 6.957497 17425 +vefound 1 1 6.957497 6.957497 17426 +byeduc 1 1 6.957497 6.957497 17427 +encouragefeedback 1 1 6.957497 6.957497 17428 +ournewest 1 1 6.957497 6.957497 17429 +feedbackscout 1 1 6.957497 6.957497 17430 +servicesfor 1 1 6.957497 6.957497 17431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..a6e3d42a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,482 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +wisconsin 1 169 1.791759 1.791759 54 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +wide 1 84 2.484907 2.484907 185 +environ 1 84 2.484907 2.484907 177 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +build 1 85 2.484907 2.484907 184 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +chang 1 82 2.484907 2.484907 163 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +sourc 1 77 2.564949 2.564949 201 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +want 1 79 2.564949 2.564949 199 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +name 1 72 2.639057 2.639057 220 +servic 1 72 2.639057 2.639057 236 +write 1 72 2.639057 2.639057 222 +goal 1 66 2.708050 2.708050 250 +degre 1 69 2.708050 2.708050 259 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +august 1 66 2.708050 2.708050 257 +receiv 1 66 2.708050 2.708050 244 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +improv 1 62 2.772589 2.772589 289 +copi 1 63 2.772589 2.772589 284 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +plai 1 60 2.833213 2.833213 307 +share 1 59 2.833213 2.833213 304 +unix 1 58 2.890372 2.890372 308 +space 1 57 2.890372 2.890372 310 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +major 1 56 2.890372 2.890372 315 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +point 1 58 2.890372 2.890372 319 +three 1 54 2.944439 2.944439 330 +processor 1 54 2.944439 2.944439 335 +extens 1 53 2.944439 2.944439 340 +found 1 53 2.944439 2.944439 337 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +run 1 51 2.995732 2.995732 347 +digit 1 52 2.995732 2.995732 348 +tabl 1 51 2.995732 2.995732 346 +date 1 51 2.995732 2.995732 344 +basic 1 50 3.044522 3.044522 360 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +give 1 50 3.044522 3.044522 359 +without 1 50 3.044522 3.044522 370 +archiv 1 49 3.044522 3.044522 364 +featur 1 46 3.091042 3.091042 386 +could 1 46 3.091042 3.091042 383 +possibl 1 47 3.091042 3.091042 378 +get 1 46 3.091042 3.091042 380 +natur 1 44 3.135494 3.135494 406 +describ 1 45 3.135494 3.135494 400 +video 1 44 3.135494 3.135494 405 +anoth 1 45 3.135494 3.135494 408 +mechan 1 43 3.178054 3.178054 416 +term 1 43 3.178054 3.178054 411 +futur 1 41 3.218876 3.218876 427 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +field 1 37 3.332205 3.332205 482 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +feel 1 37 3.332205 3.332205 483 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +tree 1 36 3.367296 3.367296 492 +either 1 35 3.401197 3.401197 506 +singl 1 34 3.401197 3.401197 510 +everi 1 34 3.401197 3.401197 519 +approxim 1 35 3.401197 3.401197 509 +post 1 35 3.401197 3.401197 505 +return 1 34 3.401197 3.401197 502 +concurr 1 34 3.401197 3.401197 501 +go 1 33 3.433987 3.433987 529 +product 1 33 3.433987 3.433987 527 +queri 1 33 3.433987 3.433987 524 +obtain 1 33 3.433987 3.433987 534 +ad 1 32 3.465736 3.465736 544 +kind 1 32 3.465736 3.465736 541 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +option 1 30 3.555348 3.555348 575 +built 1 29 3.583519 3.583519 592 +turn 1 29 3.583519 3.583519 586 +depend 1 29 3.583519 3.583519 583 +particip 1 29 3.583519 3.583519 589 +releas 1 28 3.610918 3.610918 616 +intend 1 28 3.610918 3.610918 599 +becom 1 28 3.610918 3.610918 603 +propos 1 28 3.610918 3.610918 602 +hope 1 28 3.610918 3.610918 610 +framework 1 28 3.610918 3.610918 606 +mind 1 27 3.637586 3.637586 632 +retriev 1 27 3.637586 3.637586 621 +manipul 1 27 3.637586 3.637586 624 +quit 1 27 3.637586 3.637586 633 +symbol 1 27 3.637586 3.637586 620 +linux 1 27 3.637586 3.637586 631 +team 1 27 3.637586 3.637586 625 +effort 1 26 3.688879 3.688879 652 +enhanc 1 26 3.688879 3.688879 644 +enabl 1 26 3.688879 3.688879 655 +subject 1 26 3.688879 3.688879 647 +request 1 26 3.688879 3.688879 635 +rather 1 26 3.688879 3.688879 642 +wai 1 25 3.737670 3.737670 662 +client 1 25 3.737670 3.737670 679 +valu 1 25 3.737670 3.737670 665 +task 1 25 3.737670 3.737670 678 +scalabl 1 24 3.761200 3.761200 705 +store 1 24 3.761200 3.761200 693 +reach 1 24 3.761200 3.761200 688 +serv 1 22 3.850148 3.850148 758 +varieti 1 22 3.850148 3.850148 740 +almost 1 22 3.850148 3.850148 742 +deal 1 22 3.850148 3.850148 736 +defin 1 22 3.850148 3.850148 746 +emphasi 1 22 3.850148 3.850148 755 +sent 1 22 3.850148 3.850148 763 +fund 1 21 3.912023 3.912023 805 +programminglanguag 1 21 3.912023 3.912023 782 +flexibl 1 21 3.912023 3.912023 792 +latest 1 21 3.912023 3.912023 785 +thu 1 21 3.912023 3.912023 773 +similar 1 21 3.912023 3.912023 771 +binari 1 20 3.951244 3.951244 823 +fine 1 20 3.951244 3.951244 822 +entir 1 20 3.951244 3.951244 811 +benchmark 1 19 4.007333 4.007333 859 +media 1 19 4.007333 4.007333 861 +definit 1 19 4.007333 4.007333 864 +separ 1 19 4.007333 4.007333 844 +concentr 1 18 4.060443 4.060443 906 +along 1 18 4.060443 4.060443 878 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +whether 1 17 4.110874 4.110874 918 +stop 1 17 4.110874 4.110874 942 +ultim 1 17 4.110874 4.110874 943 +regist 1 17 4.110874 4.110874 938 +attempt 1 17 4.110874 4.110874 917 +sept 1 17 4.110874 4.110874 952 +weekli 1 17 4.110874 4.110874 919 +anyon 1 17 4.110874 4.110874 916 +commerci 1 16 4.174387 4.174387 1005 +intel 1 16 4.174387 4.174387 1000 +alreadi 1 16 4.174387 4.174387 963 +portion 1 16 4.174387 4.174387 971 +sign 1 16 4.174387 4.174387 970 +capabl 1 15 4.248495 4.248495 1016 +hierarch 1 15 4.248495 4.248495 1018 +transit 1 15 4.248495 4.248495 1046 +hybrid 1 15 4.248495 4.248495 1057 +piec 1 15 4.248495 4.248495 1020 +stream 1 15 4.248495 4.248495 1015 +charact 1 15 4.248495 4.248495 1028 +heterogen 1 14 4.317488 4.317488 1090 +role 1 14 4.317488 4.317488 1101 +anonym 1 14 4.317488 4.317488 1100 +attribut 1 14 4.317488 4.317488 1092 +shown 1 14 4.317488 4.317488 1080 +decid 1 14 4.317488 4.317488 1075 +nasa 1 13 4.382027 4.382027 1188 +earlier 1 13 4.382027 4.382027 1140 +directli 1 13 4.382027 4.382027 1141 +cannot 1 13 4.382027 4.382027 1144 +individu 1 13 4.382027 4.382027 1126 +convert 1 13 4.382027 4.382027 1122 +target 1 12 4.465908 4.465908 1282 +uniqu 1 12 4.465908 4.465908 1228 +safe 1 12 4.465908 4.465908 1274 +solari 1 12 4.465908 4.465908 1238 +nanci 1 12 4.465908 4.465908 1256 +shore 1 11 4.553877 4.553877 1377 +persist 1 11 4.553877 4.553877 1367 +arpa 1 11 4.553877 4.553877 1369 +distinguish 1 11 4.553877 4.553877 1357 +string 1 11 4.553877 4.553877 1340 +fix 1 11 4.553877 4.553877 1327 +facilit 1 10 4.653960 4.653960 1412 +equal 1 10 4.653960 4.653960 1424 +consortium 1 10 4.653960 4.653960 1467 +length 1 10 4.653960 4.653960 1400 +rel 1 9 4.753590 4.753590 1487 +mention 1 9 4.753590 4.753590 1569 +inter 1 9 4.753590 4.753590 1530 +familiar 1 9 4.753590 4.753590 1485 +subscrib 1 9 4.753590 4.753590 1541 +contrast 1 8 4.875197 4.875197 1637 +root 1 8 4.875197 4.875197 1650 +realiz 1 8 4.875197 4.875197 1739 +cross 1 8 4.875197 4.875197 1703 +port 1 8 4.875197 4.875197 1766 +parti 1 8 4.875197 4.875197 1676 +supportfor 1 7 5.010635 5.010635 1854 +sparc 1 7 5.010635 5.010635 1860 +digest 1 7 5.010635 5.010635 1864 +henc 1 7 5.010635 5.010635 1805 +beta 1 6 5.164786 5.164786 1993 +geograph 1 6 5.164786 5.164786 2236 +furthermor 1 6 5.164786 5.164786 2141 +byte 1 6 5.164786 5.164786 2108 +conveni 1 6 5.164786 5.164786 2088 +pool 1 6 5.164786 5.164786 2225 +feasibl 1 6 5.164786 5.164786 2157 +gzip 1 6 5.164786 5.164786 2117 +moder 1 6 5.164786 5.164786 2112 +notifi 1 6 5.164786 5.164786 2106 +anda 1 5 5.347108 5.347108 2416 +compat 1 5 5.347108 5.347108 2485 +eas 1 5 5.347108 5.347108 2267 +greater 1 5 5.347108 5.347108 2258 +began 1 5 5.347108 5.347108 2498 +remain 1 5 5.347108 5.347108 2278 +default 1 5 5.347108 5.347108 2335 +andevalu 1 4 5.568345 5.568345 2706 +satellit 1 4 5.568345 5.568345 3077 +eventu 1 4 5.568345 5.568345 3074 +symmetr 1 4 5.568345 5.568345 2908 +peer 1 4 5.568345 5.568345 2742 +exodu 1 4 5.568345 5.568345 3075 +customiz 1 4 5.568345 5.568345 2966 +simplifi 1 4 5.568345 5.568345 3066 +repli 1 4 5.568345 5.568345 2689 +predecessor 1 3 5.857933 5.857933 3585 +briefli 1 3 5.857933 5.857933 3459 +sector 1 3 5.857933 5.857933 3766 +paragon 1 3 5.857933 5.857933 3359 +serverarchitectur 1 3 5.857933 5.857933 3736 +gigabyt 1 3 5.857933 5.857933 3548 +embodi 1 3 5.857933 5.857933 3236 +bulk 1 3 5.857933 5.857933 4000 +oodb 1 3 5.857933 5.857933 3954 +intra 1 3 5.857933 5.857933 3243 +reachabl 1 3 5.857933 5.857933 4001 +eduand 1 3 5.857933 5.857933 3452 +membership 1 3 5.857933 5.857933 3751 +subscript 1 3 5.857933 5.857933 3469 +objectiveoverviewreleas 1 2 6.263398 6.263398 6070 +serverobject 1 2 6.263398 6.263398 6077 +oodbm 1 2 6.263398 6.263398 6083 +provis 1 2 6.263398 6.263398 4683 +neutral 1 2 6.263398 6.263398 5760 +mount 1 2 6.263398 6.263398 5995 +eduthi 1 2 6.263398 6.263398 5382 +junk 1 2 6.263398 6.263398 5701 +mailbox 1 2 6.263398 6.263398 6084 +sender 1 2 6.263398 6.263398 5064 +pageshor 1 1 6.957497 6.957497 17442 +repositorydocu 1 1 6.957497 6.957497 17443 +informationmail 1 1 6.957497 6.957497 17444 +listsse 1 1 6.957497 6.957497 17445 +shorepeopl 1 1 6.957497 6.957497 17446 +shorelatest 1 1 6.957497 6.957497 17447 +arpaparadis 1 1 6.957497 6.957497 17448 +shoreexodu 1 1 6.957497 6.957497 17449 +shoreoo 1 1 6.957497 6.957497 17450 +oodbsshor 1 1 6.957497 6.957497 17451 +albumuw 1 1 6.957497 6.957497 17452 +widevarieti 1 1 6.957497 6.957497 17453 +cadsystem 1 1 6.957497 6.957497 17454 +informationsystem 1 1 6.957497 6.957497 17436 +usedexodusstorag 1 1 6.957497 6.957497 17455 +ofwai 1 1 6.957497 6.957497 17456 +thisinterfac 1 1 6.957497 6.957497 17457 +theunix 1 1 6.957497 6.957497 17458 +viand 1 1 6.957497 6.957497 17459 +withoutmodif 1 1 6.957497 6.957497 17460 +shoreobject 1 1 6.957497 6.957497 17461 +inheritingcharacterist 1 1 6.957497 6.957497 17462 +fromfil 1 1 6.957497 6.957497 17463 +ofshor 1 1 6.957497 6.957497 17464 +scalabilitysupport 1 1 6.957497 6.957497 17465 +heterogeneitysupport 1 1 6.957497 6.957497 17466 +applicationswhen 1 1 6.957497 6.957497 17467 +uniqueamong 1 1 6.957497 6.957497 17468 +odmg 1 1 6.957497 6.957497 17433 +languageheterogen 1 1 6.957497 6.957497 17469 +persistentstorag 1 1 6.957497 6.957497 17470 +basicallycompat 1 1 6.957497 6.957497 17471 +betransf 1 1 6.957497 6.957497 17472 +architectureshor 1 1 6.957497 6.957497 17473 +distributedarchitectur 1 1 6.957497 6.957497 17474 +ashor 1 1 6.957497 6.957497 17437 +disksattach 1 1 6.957497 6.957497 17475 +architectureus 1 1 6.957497 6.957497 17476 +vendor 1 1 6.957497 6.957497 17438 +typicallyus 1 1 6.957497 6.957497 17477 +notionof 1 1 6.957497 6.957497 17478 +runsin 1 1 6.957497 6.957497 17479 +forus 1 1 6.957497 6.957497 17480 +theparadis 1 1 6.957497 6.957497 17481 +seosdi 1 1 6.957497 6.957497 17482 +aimport 1 1 6.957497 6.957497 17483 +endeavor 1 1 6.957497 6.957497 17484 +certainlydepend 1 1 6.957497 6.957497 17485 +transmitobject 1 1 6.957497 6.957497 17486 +whilecurr 1 1 6.957497 6.957497 17487 +orientedtoward 1 1 6.957497 6.957497 17488 +terabyt 1 1 6.957497 6.957497 17489 +libraryar 1 1 6.957497 6.957497 17490 +heterogeneityobject 1 1 6.957497 6.957497 17491 +neutraltyp 1 1 6.957497 6.957497 17492 +databasefeatur 1 1 6.957497 6.957497 17493 +ofsupport 1 1 6.957497 6.957497 17494 +feasibleto 1 1 6.957497 6.957497 17495 +wasrec 1 1 6.957497 6.957497 17496 +onprovid 1 1 6.957497 6.957497 17497 +withina 1 1 6.957497 6.957497 17498 +applicationsa 1 1 6.957497 6.957497 17499 +currentlyus 1 1 6.957497 6.957497 17500 +untyp 1 1 6.957497 6.957497 17501 +flatten 1 1 6.957497 6.957497 17439 +structuredobject 1 1 6.957497 6.957497 17502 +displac 1 1 6.957497 6.957497 17503 +orientedfil 1 1 6.957497 6.957497 17504 +standpoint 1 1 6.957497 6.957497 17505 +manypersist 1 1 6.957497 6.957497 17506 +indirectli 1 1 6.957497 6.957497 17507 +usersa 1 1 6.957497 6.957497 17508 +individualpersist 1 1 6.957497 6.957497 17509 +oflarg 1 1 6.957497 6.957497 17510 +unnam 1 1 6.957497 6.957497 17511 +involvessever 1 1 6.957497 6.957497 17512 +includingdirectori 1 1 6.957497 6.957497 17513 +legaci 1 1 6.957497 6.957497 17440 +unixappl 1 1 6.957497 6.957497 17514 +fromtradit 1 1 6.957497 6.957497 17515 +standardunix 1 1 6.957497 6.957497 17516 +mkdir 1 1 6.957497 6.957497 17517 +chdir 1 1 6.957497 6.957497 17518 +callsposs 1 1 6.957497 6.957497 17519 +onevari 1 1 6.957497 6.957497 17520 +asb 1 1 6.957497 6.957497 17521 +objectthrough 1 1 6.957497 6.957497 17522 +counterpart 1 1 6.957497 6.957497 17523 +callswil 1 1 6.957497 6.957497 17524 +thatwish 1 1 6.957497 6.957497 17525 +datacontain 1 1 6.957497 6.957497 17526 +bothnew 1 1 6.957497 6.957497 17527 +componentof 1 1 6.957497 6.957497 17528 +morestructur 1 1 6.957497 6.957497 17529 +shore_support 1 1 6.957497 6.957497 17435 +rleas 1 1 6.957497 6.957497 17530 +completeimplement 1 1 6.957497 6.957497 17531 +tosolari 1 1 6.957497 6.957497 17532 +andpentium 1 1 6.957497 6.957497 17533 +atftp 1 1 6.957497 6.957497 17534 +liststher 1 1 6.957497 6.957497 17535 +shore_al 1 1 6.957497 6.957497 17432 +usebi 1 1 6.957497 6.957497 17536 +listproc 1 1 6.957497 6.957497 17434 +madisonc 1 1 6.957497 6.957497 17537 +unmoder 1 1 6.957497 6.957497 17538 +unlikelyev 1 1 6.957497 6.957497 17539 +clutter 1 1 6.957497 6.957497 17441 +isalreadi 1 1 6.957497 6.957497 17540 +belowfor 1 1 6.957497 6.957497 17541 +sentwhen 1 1 6.957497 6.957497 17542 +beingpost 1 1 6.957497 6.957497 17543 +yourrepli 1 1 6.957497 6.957497 17544 +maysubscrib 1 1 6.957497 6.957497 17545 +existenceof 1 1 6.957497 6.957497 17546 +whenit 1 1 6.957497 6.957497 17547 +yoursubscript 1 1 6.957497 6.957497 17548 +conceal 1 1 6.957497 6.957497 17549 +subscriberscannot 1 1 6.957497 6.957497 17550 +specialmessag 1 1 6.957497 6.957497 17551 +sendthi 1 1 6.957497 6.957497 17552 +unsubscrib 1 1 6.957497 6.957497 17553 +messageshould 1 1 6.957497 6.957497 17554 +helplast 1 1 6.957497 6.957497 17555 +nhall 1 1 6.957497 6.957497 17556 +footnot 1 1 6.957497 6.957497 17557 +odlshor 1 1 6.957497 6.957497 17558 +modelidl 1 1 6.957497 6.957497 17559 +odlar 1 1 6.957497 6.957497 17560 +stabilizesw 1 1 6.957497 6.957497 17561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..4e2c3811 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +find 1 111 2.197225 2.197225 111 +stuff 1 87 2.484907 2.484907 171 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +gupta 1 12 4.465908 4.465908 1241 +avenu 1 12 4.465908 4.465908 1277 +newspap 1 12 4.465908 4.465908 1280 +whereabout 1 4 5.568345 5.568345 3078 +abhinav 1 3 5.857933 5.857933 3428 +agupta 1 3 5.857933 5.857933 3429 +kendal 1 2 6.263398 6.263398 6085 +residenceoffic 1 1 6.957497 6.957497 17562 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..cb633000 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +click 1 142 1.945910 1.945910 78 +pictur 1 89 2.397895 2.397895 160 +live 1 40 3.258097 3.258097 451 +ever 1 19 4.007333 4.007333 872 +larger 1 7 5.010635 5.010635 1875 +largest 1 7 5.010635 5.010635 1858 +alain 1 2 6.263398 6.263398 6086 +pagealain 1 1 6.957497 6.957497 17563 +carnivor 1 1 6.957497 6.957497 17564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..ea852c3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +machin 1 129 2.079442 2.079442 95 +west 1 83 2.484907 2.484907 192 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +state 1 76 2.564949 2.564949 207 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +neural 1 30 3.555348 3.555348 578 +departmentunivers 1 24 3.761200 3.761200 711 +sequenc 1 23 3.806662 3.806662 734 +biologi 1 15 4.248495 4.248495 1049 +train 1 14 4.317488 4.317488 1066 +edutelephon 1 10 4.653960 4.653960 1473 +purdu 1 10 4.653960 4.653960 1466 +molecular 1 7 5.010635 5.010635 1887 +jude 1 6 5.164786 5.164786 2123 +fold 1 4 5.568345 5.568345 2615 +carolyn 1 2 6.263398 6.263398 6088 +allex 1 2 6.263398 6.263398 6087 +ismb 1 2 6.263398 6.263398 5834 +studentbiotechnolog 1 1 6.957497 6.957497 17565 +traineecomput 1 1 6.957497 6.957497 17566 +shavlikinterest 1 1 6.957497 6.957497 17567 +protein 1 1 6.957497 6.957497 17568 +networkseduc 1 1 6.957497 6.957497 17569 +madisonb 1 1 6.957497 6.957497 17570 +universityb 1 1 6.957497 6.957497 17571 +mankato 1 1 6.957497 6.957497 17572 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..5c433a34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +read 1 154 1.791759 1.791759 47 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +peopl 1 96 2.302585 2.302585 132 +associ 1 93 2.397895 2.397895 151 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +thing 1 84 2.484907 2.484907 189 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +resum 1 79 2.564949 2.564949 217 +method 1 80 2.564949 2.564949 213 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +solv 1 73 2.639057 2.639057 234 +write 1 72 2.639057 2.639057 222 +david 1 71 2.639057 2.639057 232 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +think 1 57 2.890372 2.890372 314 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +three 1 54 2.944439 2.944439 330 +talk 1 53 2.944439 2.944439 336 +week 1 52 2.995732 2.995732 343 +advisor 1 51 2.995732 2.995732 355 +much 1 52 2.995732 2.995732 349 +set 1 50 3.044522 3.044522 361 +friend 1 48 3.044522 3.044522 376 +physic 1 47 3.091042 3.091042 377 +get 1 46 3.091042 3.091042 380 +featur 1 46 3.091042 3.091042 386 +better 1 45 3.135494 3.135494 401 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +linear 1 41 3.218876 3.218876 431 +live 1 40 3.258097 3.258097 451 +author 1 39 3.258097 3.258097 450 +seminar 1 38 3.295837 3.295837 470 +return 1 34 3.401197 3.401197 502 +go 1 33 3.433987 3.433987 529 +articl 1 33 3.433987 3.433987 530 +depend 1 29 3.583519 3.583519 583 +enhanc 1 26 3.688879 3.688879 644 +never 1 25 3.737670 3.737670 671 +magazin 1 24 3.761200 3.761200 704 +watch 1 21 3.912023 3.912023 789 +love 1 21 3.912023 3.912023 804 +leav 1 21 3.912023 3.912023 772 +minut 1 20 3.951244 3.951244 810 +five 1 19 4.007333 4.007333 841 +beauti 1 18 4.060443 4.060443 912 +regist 1 17 4.110874 4.110874 938 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +score 1 15 4.248495 4.248495 1017 +went 1 12 4.465908 4.465908 1279 +partner 1 8 4.875197 4.875197 1648 +multiscalar 1 8 4.875197 4.875197 1783 +parti 1 8 4.875197 4.875197 1676 +vallei 1 7 5.010635 5.010635 1959 +shot 1 7 5.010635 5.010635 1898 +truth 1 6 5.164786 5.164786 2179 +sohi 1 6 5.164786 5.164786 2237 +yale 1 6 5.164786 5.164786 2003 +cat 1 6 5.164786 5.164786 2194 +presid 1 6 5.164786 5.164786 2196 +promis 1 6 5.164786 5.164786 2037 +guri 1 5 5.347108 5.347108 2578 +girlfriend 1 5 5.347108 5.347108 2579 +everybodi 1 5 5.347108 5.347108 2517 +gui 1 5 5.347108 5.347108 2573 +kid 1 5 5.347108 5.347108 2516 +arch 1 4 5.568345 5.568345 2995 +metal 1 4 5.568345 5.568345 3079 +soul 1 4 5.568345 5.568345 2907 +drew 1 4 5.568345 5.568345 2980 +amir 1 3 5.857933 5.857933 3850 +preprocessor 1 3 5.857933 5.857933 3844 +detector 1 3 5.857933 5.857933 3745 +allevi 1 3 5.857933 5.857933 3643 +recip 1 3 5.857933 5.857933 3668 +terri 1 3 5.857933 5.857933 3264 +carbon 1 3 5.857933 5.857933 3804 +vagu 1 3 5.857933 5.857933 3393 +super 1 3 5.857933 5.857933 3918 +roth 1 2 6.263398 6.263398 6089 +delphi 1 2 6.263398 6.263398 4192 +airport 1 2 6.263398 6.263398 5962 +out 1 2 6.263398 6.263398 6090 +curli 1 2 6.263398 6.263398 5691 +fri 1 2 6.263398 6.263398 5844 +charli 1 2 6.263398 6.263398 5905 +regress 1 2 6.263398 6.263398 4501 +weird 1 2 6.263398 6.263398 5503 +subba 1 2 6.263398 6.263398 6091 +officem 1 2 6.263398 6.263398 6092 +barb 1 2 6.263398 6.263398 6058 +wierd 1 2 6.263398 6.263398 6093 +maven 1 1 6.957497 6.957497 17574 +erin 1 1 6.957497 6.957497 17575 +occasionali 1 1 6.957497 6.957497 17576 +cvte 1 1 6.957497 6.957497 17577 +deleg 1 1 6.957497 6.957497 17578 +existencei 1 1 6.957497 6.957497 17579 +nail 1 1 6.957497 6.957497 17580 +marci 1 1 6.957497 6.957497 17573 +lafollett 1 1 6.957497 6.957497 17581 +meantim 1 1 6.957497 6.957497 17582 +wacki 1 1 6.957497 6.957497 17583 +eggplant 1 1 6.957497 6.957497 17584 +daddi 1 1 6.957497 6.957497 17585 +titanium 1 1 6.957497 6.957497 17586 +screw 1 1 6.957497 6.957497 17587 +desi 1 1 6.957497 6.957497 17588 +relaford 1 1 6.957497 6.957497 17589 +mulholland 1 1 6.957497 6.957497 17590 +oxygen 1 1 6.957497 6.957497 17591 +dioxid 1 1 6.957497 6.957497 17592 +whack 1 1 6.957497 6.957497 17593 +scaryarea 1 1 6.957497 6.957497 17594 +rabid 1 1 6.957497 6.957497 17595 +interestth 1 1 6.957497 6.957497 17596 +hmmm 1 1 6.957497 6.957497 17597 +handyinformatik 1 1 6.957497 6.957497 17598 +madcat 1 1 6.957497 6.957497 17599 +sportslin 1 1 6.957497 6.957497 17600 +philli 1 1 6.957497 6.957497 17601 +ickyth 1 1 6.957497 6.957497 17602 +kemin 1 1 6.957497 6.957497 17603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..5024bb9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..6ecc01aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +search 1 95 2.397895 2.397895 155 +homepag 1 93 2.397895 2.397895 148 +associ 1 93 2.397895 2.397895 151 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +comment 1 93 2.397895 2.397895 146 +west 1 83 2.484907 2.484907 192 +activ 1 84 2.484907 2.484907 182 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +order 1 69 2.708050 2.708050 249 +view 1 70 2.708050 2.708050 254 +main 1 67 2.708050 2.708050 256 +goal 1 66 2.708050 2.708050 250 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +handout 1 64 2.772589 2.772589 263 +abstract 1 62 2.772589 2.772589 276 +space 1 57 2.890372 2.890372 310 +unix 1 58 2.890372 2.890372 308 +variou 1 56 2.890372 2.890372 317 +found 1 53 2.944439 2.944439 337 +tabl 1 51 2.995732 2.995732 346 +maintain 1 51 2.995732 2.995732 342 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +netscap 1 44 3.135494 3.135494 395 +futur 1 41 3.218876 3.218876 427 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +sciencesunivers 1 37 3.332205 3.332205 486 +download 1 36 3.367296 3.367296 489 +short 1 36 3.367296 3.367296 499 +word 1 34 3.401197 3.401197 508 +approxim 1 35 3.401197 3.401197 509 +articl 1 33 3.433987 3.433987 530 +enhanc 1 26 3.688879 3.688879 644 +mine 1 26 3.688879 3.688879 654 +wish 1 24 3.761200 3.761200 692 +compress 1 23 3.806662 3.806662 719 +togeth 1 23 3.806662 3.806662 714 +miscellan 1 23 3.806662 3.806662 731 +recommend 1 22 3.850148 3.850148 737 +basi 1 20 3.951244 3.951244 828 +item 1 19 4.007333 4.007333 856 +offici 1 18 4.060443 4.060443 894 +otherwis 1 17 4.110874 4.110874 922 +choos 1 16 4.174387 4.174387 964 +carl 1 15 4.248495 4.248495 1024 +anonym 1 14 4.317488 4.317488 1100 +invari 1 8 4.875197 4.875197 1748 +univeristi 1 8 4.875197 4.875197 1754 +none 1 7 5.010635 5.010635 1811 +spline 1 6 5.164786 5.164786 2007 +shift 1 5 5.347108 5.347108 2357 +clickher 1 5 5.347108 5.347108 2428 +wavelet 1 4 5.568345 5.568345 2874 +usa 1 4 5.568345 5.568345 3080 +thin 1 3 5.857933 5.857933 3488 +shen 1 3 5.857933 5.857933 3370 +uncompress 1 3 5.857933 5.857933 3177 +boor 1 3 5.857933 5.857933 3482 +ofwisconsin 1 3 5.857933 5.857933 4002 +amo 1 2 6.263398 6.263398 6094 +professordepart 1 2 6.263398 6.263398 5624 +deposit 1 2 6.263398 6.263398 6095 +mailbox 1 2 6.263398 6.263398 6084 +boxsplin 1 1 6.957497 6.957497 17604 +radial 1 1 6.957497 6.957497 17605 +toscatt 1 1 6.957497 6.957497 17606 +multiquadr 1 1 6.957497 6.957497 17607 +plate 1 1 6.957497 6.957497 17608 +splinesthi 1 1 6.957497 6.957497 17609 +linksat 1 1 6.957497 6.957497 17610 +paperaffin 1 1 6.957497 6.957497 17611 +operatorof 1 1 6.957497 6.957497 17612 +zuowei 1 1 6.957497 6.957497 17613 +fromher 1 1 6.957497 6.957497 17614 +directlyfrom 1 1 6.957497 6.957497 17615 +accounther 1 1 6.957497 6.957497 17616 +articlesof 1 1 6.957497 6.957497 17617 +containspostscript 1 1 6.957497 6.957497 17618 +theapproxim 1 1 6.957497 6.957497 17619 +filesconcern 1 1 6.957497 6.957497 17620 +andpubl 1 1 6.957497 6.957497 17621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..eb09bad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +andi 1 4 5.568345 5.568345 3081 +pageandi 1 2 6.263398 6.263398 6096 +therber 1 1 6.957497 6.957497 17622 +therberoffic 1 1 6.957497 6.957497 17623 +sphone 1 1 6.957497 6.957497 17624 +andyt 1 1 6.957497 6.957497 17625 +eduzooresumebookmarksapplet 1 1 6.957497 6.957497 17626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..84d9b323 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +world 1 115 2.197225 2.197225 126 +present 1 91 2.397895 2.397895 145 +collect 1 65 2.772589 2.772589 268 +finger 1 52 2.995732 2.995732 354 +log 1 19 4.007333 4.007333 857 +classic 1 14 4.317488 4.317488 1084 +fascin 1 3 5.857933 5.857933 3948 +ranga 1 1 6.957497 6.957497 17630 +arvind 1 1 6.957497 6.957497 17627 +ranganathan 1 1 6.957497 6.957497 17628 +erstwhil 1 1 6.957497 6.957497 17631 +workplac 1 1 6.957497 6.957497 17629 +indiaworld 1 1 6.957497 6.957497 17632 +escher 1 1 6.957497 6.957497 17633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..cbc673f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +visit 1 63 2.772589 2.772589 288 +undergradu 1 54 2.944439 2.944439 338 +india 1 32 3.465736 3.465736 550 +altern 1 26 3.688879 3.688879 641 +worth 1 11 4.553877 4.553877 1294 +ashish 1 5 5.347108 5.347108 2473 +delhi 1 5 5.347108 5.347108 2530 +whereabout 1 4 5.568345 5.568345 3078 +indianinstitut 1 3 5.857933 5.857933 4003 +fantast 1 3 5.857933 5.857933 3966 +hadmi 1 2 6.263398 6.263398 6097 +canfing 1 2 6.263398 6.263398 6098 +thusoo 1 1 6.957497 6.957497 17634 +iitd 1 1 6.957497 6.957497 17635 +ashisht 1 1 6.957497 6.957497 17636 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..481cdb3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +dayton 1 119 2.079442 2.079442 104 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +west 1 83 2.484907 2.484907 192 +info 1 85 2.484907 2.484907 176 +june 1 79 2.564949 2.564949 214 +view 1 70 2.708050 2.708050 254 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +juli 1 60 2.833213 2.833213 305 +finger 1 52 2.995732 2.995732 354 +usaphon 1 9 4.753590 4.753590 1600 +ashraf 1 3 5.857933 5.857933 3421 +aboulnaga 1 3 5.857933 5.857933 3426 +edueduc 1 3 5.857933 5.857933 4004 +egypt 1 2 6.263398 6.263398 4856 +desautel 1 2 6.263398 6.263398 4791 +pageashraf 1 1 6.957497 6.957497 17638 +aboulnagacomput 1 1 6.957497 6.957497 17639 +alexandria 1 1 6.957497 6.957497 17637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..51212b66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +seattl 1 120 2.079442 2.079442 103 +back 1 60 2.833213 2.833213 297 +appoint 1 49 3.044522 3.044522 358 +basketbal 1 12 4.465908 4.465908 1289 +tuth 1 9 4.753590 4.753590 1519 +phil 1 5 5.347108 5.347108 2419 +educurr 1 5 5.347108 5.347108 2504 +win 1 3 5.857933 5.857933 3593 +atkinson 1 2 6.263398 6.263398 4722 +ncaa 1 2 6.263398 6.263398 5908 +pageucla 1 1 6.957497 6.957497 17641 +bannon 1 1 6.957497 6.957497 17642 +championship 1 1 6.957497 6.957497 17643 +infooffic 1 1 6.957497 6.957497 17640 +researchsailinghors 1 1 6.957497 6.957497 17644 +ridingscuba 1 1 6.957497 6.957497 17645 +divingc 1 1 6.957497 6.957497 17646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..c3fd004a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +question 1 91 2.397895 2.397895 141 +proceed 1 93 2.397895 2.397895 152 +larg 1 82 2.484907 2.484907 168 +info 1 85 2.484907 2.484907 176 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +complex 1 64 2.772589 2.772589 269 +improv 1 62 2.772589 2.772589 289 +creat 1 63 2.772589 2.772589 277 +simpl 1 60 2.833213 2.833213 298 +juli 1 60 2.833213 2.833213 305 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +telephon 1 50 3.044522 3.044522 373 +without 1 50 3.044522 3.044522 370 +california 1 46 3.091042 3.091042 388 +algebra 1 45 3.135494 3.135494 394 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +examin 1 42 3.218876 3.218876 424 +press 1 42 3.218876 3.218876 419 +theoret 1 39 3.258097 3.258097 446 +probabl 1 40 3.258097 3.258097 455 +small 1 39 3.258097 3.258097 447 +annual 1 40 3.258097 3.258097 458 +error 1 40 3.258097 3.258097 449 +vita 1 38 3.295837 3.295837 473 +random 1 34 3.401197 3.401197 511 +least 1 35 3.401197 3.401197 516 +approxim 1 35 3.401197 3.401197 509 +product 1 33 3.433987 3.433987 527 +curriculum 1 33 3.433987 3.433987 535 +given 1 32 3.465736 3.465736 538 +secur 1 30 3.555348 3.555348 577 +usual 1 28 3.610918 3.610918 608 +berkelei 1 26 3.688879 3.688879 657 +proc 1 26 3.688879 3.688879 649 +lead 1 23 3.806662 3.806662 718 +among 1 21 3.912023 3.912023 781 +similar 1 21 3.912023 3.912023 771 +eric 1 19 4.007333 4.007333 870 +prove 1 19 4.007333 4.007333 848 +automata 1 13 4.382027 4.382027 1135 +conf 1 13 4.382027 4.382027 1181 +string 1 11 4.553877 4.553877 1340 +probabilist 1 11 4.553877 4.553877 1343 +volum 1 11 4.553877 4.553877 1347 +cryptographi 1 9 4.753590 4.753590 1512 +transmiss 1 9 4.753590 4.753590 1588 +assumpt 1 9 4.753590 4.753590 1514 +eduto 1 7 5.010635 5.010635 1956 +bach 1 4 5.568345 5.568345 2708 +wit 1 3 5.857933 5.857933 4005 +euler 1 3 5.857933 5.857933 3174 +canadian 1 3 5.857933 5.857933 3508 +condon 1 3 5.857933 5.857933 3309 +prime 1 2 6.263398 6.263398 6099 +designand 1 2 6.263398 6.263398 6100 +functionof 1 2 6.263398 6.263398 5415 +algebraicalgorithm 1 1 6.957497 6.957497 17647 +solvealgebra 1 1 6.957497 6.957497 17648 +onetel 1 1 6.957497 6.957497 17649 +possiblefactor 1 1 6.957497 6.957497 17650 +intrins 1 1 6.957497 6.957497 17651 +forreli 1 1 6.957497 6.957497 17652 +iscomposit 1 1 6.957497 6.957497 17653 +auxiliarynumb 1 1 6.957497 6.957497 17654 +witnessbi 1 1 6.957497 6.957497 17655 +followingnatur 1 1 6.957497 6.957497 17656 +accurateheurist 1 1 6.957497 6.957497 17657 +allowsthi 1 1 6.957497 6.957497 17658 +cnta 1 1 6.957497 6.957497 17659 +glaser 1 1 6.957497 6.957497 17660 +tanguai 1 1 6.957497 6.957497 17661 +shallit 1 1 6.957497 6.957497 17662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..5e4bbc04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +perform 1 143 1.945910 1.945910 74 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +follow 1 92 2.397895 2.397895 143 +center 1 88 2.397895 2.397895 158 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +symposium 1 72 2.639057 2.639057 238 +undergradu 1 54 2.944439 2.944439 338 +streetmadison 1 38 3.295837 3.295837 474 +seminar 1 38 3.295837 3.295837 470 +random 1 34 3.401197 3.401197 511 +departmentunivers 1 24 3.761200 3.761200 711 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +director 1 22 3.850148 3.850148 767 +offici 1 18 4.060443 4.060443 894 +miller 1 17 4.110874 4.110874 949 +convent 1 14 4.317488 4.317488 1072 +bart 1 9 4.753590 4.753590 1559 +paradyn 1 9 4.753590 4.753590 1614 +frank 1 9 4.753590 4.753590 1568 +lloyd 1 6 5.164786 5.164786 2103 +advisori 1 6 5.164786 5.164786 2148 +barton 1 5 5.347108 5.347108 2371 +professorcomput 1 3 5.857933 5.857933 3714 +usath 1 2 6.263398 6.263398 6056 +wright 1 2 6.263398 6.263398 5177 +fuzz 1 1 6.957497 6.957497 17663 +testingteach 1 1 6.957497 6.957497 17664 +graduatesprofession 1 1 6.957497 6.957497 17665 +monona 1 1 6.957497 6.957497 17666 +terrac 1 1 6.957497 6.957497 17667 +groupperson 1 1 6.957497 6.957497 17668 +photosbart 1 1 6.957497 6.957497 17669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..f87f43cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +look 1 107 2.197225 2.197225 115 +west 1 83 2.484907 2.484907 192 +internet 1 83 2.484907 2.484907 186 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +street 1 63 2.772589 2.772589 293 +wednesdai 1 64 2.772589 2.772589 261 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +fridai 1 44 3.135494 3.135494 390 +word 1 34 3.401197 3.401197 508 +someth 1 31 3.496508 3.496508 554 +ultim 1 17 4.110874 4.110874 943 +miron 1 14 4.317488 4.317488 1110 +readabl 1 12 4.465908 4.465908 1258 +benjamin 1 11 4.553877 4.553877 1296 +teitelbaum 1 6 5.164786 5.164786 2102 +garbag 1 6 5.164786 5.164786 1986 +hyper 1 5 5.347108 5.347108 2435 +usaben 1 1 6.957497 6.957497 17670 +edursumquinc 1 1 6.957497 6.957497 17671 +gamezillion 1 1 6.957497 6.957497 17672 +bookmarksspr 1 1 6.957497 6.957497 17673 +dbseminar 1 1 6.957497 6.957497 17674 +osseminar 1 1 6.957497 6.957497 17675 +condormeet 1 1 6.957497 6.957497 17676 +plseminar 1 1 6.957497 6.957497 17677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..91262f50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +mathemat 1 108 2.197225 2.197225 123 +instructor 1 108 2.197225 2.197225 107 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +section 1 94 2.397895 2.397895 149 +search 1 95 2.397895 2.397895 155 +west 1 83 2.484907 2.484907 192 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +learn 1 86 2.484907 2.484907 170 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +addit 1 74 2.639057 2.639057 228 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +logic 1 71 2.639057 2.639057 230 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +street 1 63 2.772589 2.772589 293 +abstract 1 62 2.772589 2.772589 276 +import 1 65 2.772589 2.772589 282 +result 1 65 2.772589 2.772589 281 +prof 1 64 2.772589 2.772589 273 +virtual 1 62 2.772589 2.772589 285 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +colleg 1 61 2.833213 2.833213 300 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +major 1 56 2.890372 2.890372 315 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +cover 1 55 2.944439 2.944439 329 +instruct 1 53 2.944439 2.944439 332 +finger 1 52 2.995732 2.995732 354 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +algebra 1 45 3.135494 3.135494 394 +http 1 41 3.218876 3.218876 420 +vision 1 41 3.218876 3.218876 430 +examin 1 42 3.218876 3.218876 424 +howev 1 41 3.218876 3.218876 422 +multipl 1 39 3.258097 3.258097 453 +error 1 40 3.258097 3.258097 449 +littl 1 39 3.258097 3.258097 454 +vita 1 38 3.295837 3.295837 473 +credit 1 38 3.295837 3.295837 460 +open 1 38 3.295837 3.295837 469 +robot 1 36 3.367296 3.367296 497 +procedur 1 36 3.367296 3.367296 488 +copyright 1 36 3.367296 3.367296 495 +concurr 1 34 3.401197 3.401197 501 +approxim 1 35 3.401197 3.401197 509 +least 1 35 3.401197 3.401197 516 +survei 1 35 3.401197 3.401197 513 +statist 1 35 3.401197 3.401197 521 +next 1 34 3.401197 3.401197 517 +curriculum 1 33 3.433987 3.433987 535 +within 1 33 3.433987 3.433987 525 +taught 1 33 3.433987 3.433987 526 +dissert 1 32 3.465736 3.465736 549 +transform 1 32 3.465736 3.465736 542 +given 1 32 3.465736 3.465736 538 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +intend 1 28 3.610918 3.610918 599 +administr 1 27 3.637586 3.637586 628 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +accur 1 25 3.737670 3.737670 680 +motion 1 24 3.761200 3.761200 699 +honor 1 23 3.806662 3.806662 729 +instead 1 22 3.850148 3.850148 756 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +navig 1 21 3.912023 3.912023 796 +prepar 1 20 3.951244 3.951244 824 +entir 1 20 3.951244 3.951244 811 +geometr 1 19 4.007333 4.007333 852 +assum 1 19 4.007333 4.007333 845 +lyco 1 19 4.007333 4.007333 871 +minim 1 18 4.060443 4.060443 887 +drive 1 15 4.248495 4.248495 1052 +fortran 1 15 4.248495 4.248495 1027 +club 1 15 4.248495 4.248495 1058 +scene 1 14 4.317488 4.317488 1114 +camera 1 14 4.317488 4.317488 1115 +essenti 1 13 4.382027 4.382027 1137 +charl 1 13 4.382027 4.382027 1149 +primarili 1 13 4.382027 4.382027 1185 +optic 1 12 4.465908 4.465908 1221 +realiti 1 12 4.465908 4.465908 1272 +pascal 1 12 4.465908 4.465908 1213 +pagewelcom 1 11 4.553877 4.553877 1344 +keyword 1 11 4.553877 4.553877 1356 +perspect 1 10 4.653960 4.653960 1437 +prior 1 10 4.653960 4.653960 1438 +observ 1 9 4.753590 4.753590 1578 +assumpt 1 9 4.753590 4.753590 1514 +minimum 1 9 4.753590 4.753590 1555 +occur 1 9 4.753590 4.753590 1572 +dyer 1 9 4.753590 4.753590 1573 +sensit 1 8 4.875197 4.875197 1726 +dimens 1 7 5.010635 5.010635 1930 +elementari 1 7 5.010635 5.010635 1825 +bestor 1 6 5.164786 5.164786 2099 +recov 1 6 5.164786 5.164786 2235 +restrict 1 6 5.164786 5.164786 2129 +constrain 1 6 5.164786 5.164786 2042 +gareth 1 5 5.347108 5.347108 2392 +rigid 1 5 5.347108 5.347108 2432 +unknown 1 5 5.347108 5.347108 2318 +consent 1 5 5.347108 5.347108 2389 +observatori 1 4 5.568345 5.568345 3070 +invers 1 4 5.568345 5.568345 2764 +edueduc 1 3 5.857933 5.857933 4004 +coursework 1 3 5.857933 5.857933 3588 +projector 1 3 5.857933 5.857933 3409 +duti 1 3 5.857933 5.857933 3317 +prereq 1 3 5.857933 5.857933 3178 +wiscinfo 1 3 5.857933 5.857933 3106 +hoofer 1 2 6.263398 6.263398 6101 +out 1 2 6.263398 6.263398 6090 +nextstep 1 2 6.263398 6.263398 6102 +pagegareth 1 1 6.957497 6.957497 17680 +dpl 1 1 6.957497 6.957497 17681 +dacc 1 1 6.957497 6.957497 17682 +massei 1 1 6.957497 6.957497 17679 +zealand 1 1 6.957497 6.957497 17678 +nois 1 1 6.957497 6.957497 17683 +tradition 1 1 6.957497 6.957497 17684 +intersect 1 1 6.957497 6.957497 17685 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..84e0814f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +west 1 83 2.484907 2.484907 192 +local 1 55 2.944439 2.944439 334 +undergradu 1 54 2.944439 2.944439 338 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +ramakrishnan 1 16 4.174387 4.174387 972 +raghu 1 12 4.465908 4.465908 1212 +kevin 1 9 4.753590 4.753590 1482 +assistantdepart 1 8 4.875197 4.875197 1784 +coral 1 5 5.347108 5.347108 2538 +beyer 1 2 6.263398 6.263398 6103 +caution 1 2 6.263398 6.263398 4754 +pagekevin 1 1 6.957497 6.957497 17686 +beyerbey 1 1 6.957497 6.957497 17687 +researchresearch 1 1 6.957497 6.957497 17688 +coursesinstruct 1 1 6.957497 6.957497 17689 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..50525886 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +dayton 1 119 2.079442 2.079442 104 +advanc 1 99 2.302585 2.302585 130 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +html 1 75 2.639057 2.639057 235 +window 1 68 2.708050 2.708050 242 +street 1 63 2.772589 2.772589 293 +back 1 60 2.833213 2.833213 297 +point 1 58 2.890372 2.890372 319 +past 1 42 3.218876 3.218876 428 +press 1 42 3.218876 3.218876 419 +cach 1 41 3.218876 3.218876 432 +microsoft 1 38 3.295837 3.295837 468 +express 1 32 3.465736 3.465736 540 +actual 1 28 3.610918 3.610918 604 +packag 1 28 3.610918 3.610918 614 +consist 1 26 3.688879 3.688879 651 +size 1 23 3.806662 3.806662 713 +miller 1 17 4.110874 4.110874 949 +todd 1 15 4.248495 4.248495 1051 +introduc 1 13 4.382027 4.382027 1139 +faster 1 11 4.553877 4.553877 1323 +bart 1 9 4.753590 4.753590 1559 +microprocessor 1 7 5.010635 5.010635 1808 +yeah 1 6 5.164786 5.164786 2195 +locomot 1 2 6.263398 6.263398 5807 +skew 1 2 6.263398 6.263398 6057 +bezenek 1 1 6.957497 6.957497 17690 +toddm 1 1 6.957497 6.957497 17692 +cpu 1 1 6.957497 6.957497 17693 +_great 1 1 6.957497 6.957497 17694 +present_ 1 1 6.957497 6.957497 17695 +uregina 1 1 6.957497 6.957497 17696 +bayko 1 1 6.957497 6.957497 17697 +squeez 1 1 6.957497 6.957497 17698 +skateboard 1 1 6.957497 6.957497 17699 +helen 1 1 6.957497 6.957497 17700 +custer 1 1 6.957497 6.957497 17701 +_insid 1 1 6.957497 6.957497 17702 +pithi 1 1 6.957497 6.957497 17703 +pith 1 1 6.957497 6.957497 17691 +abound 1 1 6.957497 6.957497 17704 +edubezenek 1 1 6.957497 6.957497 17705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..b9a53609 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +hour 1 165 1.791759 1.791759 46 +distribut 1 162 1.791759 1.791759 51 +click 1 142 1.945910 1.945910 78 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +schedul 1 119 2.079442 2.079442 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +pictur 1 89 2.397895 2.397895 160 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +mondai 1 77 2.564949 2.564949 206 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +simul 1 66 2.708050 2.708050 255 +wednesdai 1 64 2.772589 2.772589 261 +back 1 60 2.833213 2.833213 297 +anoth 1 45 3.135494 3.135494 408 +review 1 42 3.218876 3.218876 425 +word 1 34 3.401197 3.401197 508 +held 1 28 3.610918 3.610918 600 +dai 1 22 3.850148 3.850148 753 +quiz 1 16 4.174387 4.174387 990 +anywai 1 15 4.248495 4.248495 1047 +speech 1 12 4.465908 4.465908 1222 +neat 1 12 4.465908 4.465908 1263 +averag 1 6 5.164786 5.164786 2098 +jpeg 1 6 5.164786 5.164786 2053 +condor 1 5 5.347108 5.347108 2577 +nathan 1 4 5.568345 5.568345 2794 +bockrath 1 3 5.857933 5.857933 3420 +nate 1 2 6.263398 6.263398 5720 +viru 1 2 6.263398 6.263398 4782 +macro 1 2 6.263398 6.263398 5686 +pageoth 1 2 6.263398 6.263398 6104 +pageback 1 1 6.957497 6.957497 17706 +oraclesend 1 1 6.957497 6.957497 17707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..1e2a9751 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,289 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +follow 1 92 2.397895 2.397895 143 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +thing 1 84 2.484907 2.484907 189 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +internet 1 83 2.484907 2.484907 186 +member 1 84 2.484907 2.484907 165 +school 1 84 2.484907 2.484907 188 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +appli 1 71 2.639057 2.639057 226 +david 1 71 2.639057 2.639057 232 +addit 1 74 2.639057 2.639057 228 +free 1 73 2.639057 2.639057 224 +line 1 75 2.639057 2.639057 231 +meet 1 72 2.639057 2.639057 229 +degre 1 69 2.708050 2.708050 259 +thursdai 1 70 2.708050 2.708050 241 +creat 1 63 2.772589 2.772589 277 +new 1 64 2.772589 2.772589 262 +organ 1 65 2.772589 2.772589 265 +visit 1 63 2.772589 2.772589 288 +import 1 65 2.772589 2.772589 282 +type 1 61 2.833213 2.833213 296 +plai 1 60 2.833213 2.833213 307 +locat 1 59 2.833213 2.833213 303 +unix 1 58 2.890372 2.890372 308 +local 1 55 2.944439 2.944439 334 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +run 1 51 2.995732 2.995732 347 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +done 1 47 3.091042 3.091042 381 +howev 1 41 3.218876 3.218876 422 +realli 1 40 3.258097 3.258097 444 +societi 1 40 3.258097 3.258097 456 +prototyp 1 38 3.295837 3.295837 463 +hand 1 37 3.332205 3.332205 475 +game 1 36 3.367296 3.367296 498 +short 1 36 3.367296 3.367296 499 +word 1 34 3.401197 3.401197 508 +either 1 35 3.401197 3.401197 506 +everi 1 34 3.401197 3.401197 519 +print 1 34 3.401197 3.401197 503 +queri 1 33 3.433987 3.433987 524 +kind 1 32 3.465736 3.465736 541 +scientist 1 31 3.496508 3.496508 560 +often 1 31 3.496508 3.496508 551 +taken 1 31 3.496508 3.496508 555 +storag 1 31 3.496508 3.496508 553 +someth 1 31 3.496508 3.496508 554 +except 1 28 3.610918 3.610918 607 +quit 1 27 3.637586 3.637586 633 +administr 1 27 3.637586 3.637586 628 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +enjoi 1 26 3.688879 3.688879 660 +rather 1 26 3.688879 3.688879 642 +experiment 1 26 3.688879 3.688879 645 +although 1 25 3.737670 3.737670 667 +wai 1 25 3.737670 3.737670 662 +notic 1 25 3.737670 3.737670 675 +task 1 25 3.737670 3.737670 678 +interpret 1 24 3.761200 3.761200 686 +store 1 24 3.761200 3.761200 693 +consult 1 24 3.761200 3.761200 687 +alwai 1 24 3.761200 3.761200 691 +methodolog 1 23 3.806662 3.806662 733 +defin 1 22 3.850148 3.850148 746 +william 1 22 3.850148 3.850148 765 +try 1 22 3.850148 3.850148 764 +util 1 21 3.912023 3.912023 774 +voic 1 21 3.912023 3.912023 806 +tell 1 21 3.912023 3.912023 777 +sure 1 20 3.951244 3.951244 813 +kernel 1 20 3.951244 3.951244 825 +wonder 1 20 3.951244 3.951244 815 +longer 1 20 3.951244 3.951244 816 +entir 1 20 3.951244 3.951244 811 +els 1 19 4.007333 4.007333 843 +five 1 19 4.007333 4.007333 841 +histori 1 19 4.007333 4.007333 853 +thoma 1 18 4.060443 4.060443 901 +seem 1 18 4.060443 4.060443 899 +along 1 18 4.060443 4.060443 878 +whole 1 17 4.110874 4.110874 940 +anyth 1 16 4.174387 4.174387 998 +across 1 16 4.174387 4.174387 974 +enough 1 15 4.248495 4.248495 1040 +purchas 1 15 4.248495 4.248495 1030 +drive 1 15 4.248495 4.248495 1052 +goe 1 15 4.248495 4.248495 1044 +draw 1 14 4.317488 4.317488 1086 +comic 1 14 4.317488 4.317488 1103 +role 1 14 4.317488 4.317488 1101 +becam 1 14 4.317488 4.317488 1117 +everyon 1 13 4.382027 4.382027 1148 +forth 1 13 4.382027 4.382027 1186 +everyth 1 13 4.382027 4.382027 1169 +care 1 13 4.382027 4.382027 1177 +dewitt 1 12 4.465908 4.465908 1270 +reader 1 12 4.465908 4.465908 1246 +usenix 1 12 4.465908 4.465908 1240 +road 1 11 4.553877 4.553877 1374 +shore 1 11 4.553877 4.553877 1377 +lake 1 11 4.553877 4.553877 1373 +night 1 11 4.553877 4.553877 1319 +thecomput 1 10 4.653960 4.653960 1408 +occur 1 9 4.753590 4.753590 1572 +departmentof 1 9 4.753590 4.753590 1539 +drink 1 9 4.753590 4.753590 1607 +architect 1 8 4.875197 4.875197 1624 +job 1 8 4.875197 4.875197 1702 +paradis 1 8 4.875197 4.875197 1782 +port 1 8 4.875197 4.875197 1766 +burger 1 7 5.010635 5.010635 1889 +throughout 1 7 5.010635 5.010635 1871 +bore 1 7 5.010635 5.010635 1948 +usenet 1 7 5.010635 5.010635 1839 +parent 1 6 5.164786 5.164786 2204 +beer 1 6 5.164786 5.164786 2216 +relax 1 6 5.164786 5.164786 2120 +sleep 1 6 5.164786 5.164786 2211 +fiction 1 6 5.164786 5.164786 2217 +railroad 1 6 5.164786 5.164786 2161 +put 1 6 5.164786 5.164786 2017 +gate 1 6 5.164786 5.164786 2182 +famou 1 6 5.164786 5.164786 2185 +geograph 1 6 5.164786 5.164786 2236 +whatev 1 6 5.164786 5.164786 2097 +benefit 1 6 5.164786 5.164786 2213 +divers 1 6 5.164786 5.164786 2232 +semi 1 5 5.347108 5.347108 2510 +east 1 5 5.347108 5.347108 2472 +matur 1 5 5.347108 5.347108 2269 +advic 1 5 5.347108 5.347108 2509 +moon 1 4 5.568345 5.568345 2991 +haven 1 4 5.568345 5.568345 3037 +aircraft 1 4 5.568345 5.568345 2872 +roll 1 3 5.857933 5.857933 3723 +hacker 1 3 5.857933 5.857933 3996 +pai 1 3 5.857933 5.857933 3672 +tiger 1 3 5.857933 5.857933 3897 +tremend 1 3 5.857933 5.857933 3453 +insan 1 3 5.857933 5.857933 4006 +beat 1 3 5.857933 5.857933 3840 +gamma 1 3 5.857933 5.857933 3219 +workin 1 3 5.857933 5.857933 3938 +agre 1 3 5.857933 5.857933 4007 +owner 1 3 5.857933 5.857933 3531 +pilot 1 3 5.857933 5.857933 4008 +acquaint 1 3 5.857933 5.857933 3468 +weekend 1 3 5.857933 5.857933 3357 +timeoper 1 2 6.263398 6.263398 4363 +woodwork 1 2 6.263398 6.263398 5463 +brew 1 2 6.263398 6.263398 5988 +disagre 1 2 6.263398 6.263398 6105 +pursuit 1 2 6.263398 6.263398 6048 +fly 1 2 6.263398 6.263398 5937 +stripe 1 2 6.263398 6.263398 6106 +creatur 1 2 6.263398 6.263398 6107 +leap 1 2 6.263398 6.263398 5654 +tovisit 1 2 6.263398 6.263398 4686 +that 1 2 6.263398 6.263398 5111 +differentarchitectur 1 2 6.263398 6.263398 6051 +hord 1 2 6.263398 6.263398 5917 +slowli 1 2 6.263398 6.263398 5363 +pagelast 1 2 6.263398 6.263398 5793 +bolobologreet 1 1 6.957497 6.957497 17714 +bolo 1 1 6.957497 6.957497 17708 +christen 1 1 6.957497 6.957497 17715 +josef 1 1 6.957497 6.957497 17710 +mebolo 1 1 6.957497 6.957497 17716 +bestexplan 1 1 6.957497 6.957497 17717 +bywhat 1 1 6.957497 6.957497 17718 +acomput 1 1 6.957497 6.957497 17719 +shudder 1 1 6.957497 6.957497 17720 +newoper 1 1 6.957497 6.957497 17721 +sameto 1 1 6.957497 6.957497 17722 +myroomm 1 1 6.957497 6.957497 17723 +sublim 1 1 6.957497 6.957497 17724 +thetig 1 1 6.957497 6.957497 17725 +blake 1 1 6.957497 6.957497 17726 +poemtyg 1 1 6.957497 6.957497 17727 +tyger 1 1 6.957497 6.957497 17728 +againin 1 1 6.957497 6.957497 17729 +ahous 1 1 6.957497 6.957497 17730 +isjosef 1 1 6.957497 6.957497 17731 +roadmonona 1 1 6.957497 6.957497 17732 +workwork 1 1 6.957497 6.957497 17733 +banana 1 1 6.957497 6.957497 17734 +grung 1 1 6.957497 6.957497 17735 +perhapssom 1 1 6.957497 6.957497 17736 +othermonth 1 1 6.957497 6.957497 17737 +intosubmiss 1 1 6.957497 6.957497 17738 +andstar 1 1 6.957497 6.957497 17739 +fordav 1 1 6.957497 6.957497 17740 +wiss 1 1 6.957497 6.957497 17741 +themadison 1 1 6.957497 6.957497 17742 +campusof 1 1 6.957497 6.957497 17743 +peninsula 1 1 6.957497 6.957497 17744 +technicalexpertis 1 1 6.957497 6.957497 17745 +newsystem 1 1 6.957497 6.957497 17746 +reviv 1 1 6.957497 6.957497 17747 +oddbal 1 1 6.957497 6.957497 17748 +tasksar 1 1 6.957497 6.957497 17749 +serverbut 1 1 6.957497 6.957497 17750 +mostlyempti 1 1 6.957497 6.957497 17751 +activitiesuwvaxi 1 1 6.957497 6.957497 17752 +uwvax 1 1 6.957497 6.957497 17709 +uucp 1 1 6.957497 6.957497 17711 +svolunt 1 1 6.957497 6.957497 17753 +organizationsi 1 1 6.957497 6.957497 17754 +oftenhav 1 1 6.957497 6.957497 17755 +usersof 1 1 6.957497 6.957497 17756 +aopa 1 1 6.957497 6.957497 17757 +blitz 1 1 6.957497 6.957497 17758 +drinkingwhen 1 1 6.957497 6.957497 17759 +friendsand 1 1 6.957497 6.957497 17760 +essen 1 1 6.957497 6.957497 17712 +hau 1 1 6.957497 6.957497 17713 +loftili 1 1 6.957497 6.957497 17761 +labelledblitz 1 1 6.957497 6.957497 17762 +ofoctoberfest 1 1 6.957497 6.957497 17763 +chud 1 1 6.957497 6.957497 17764 +accumulateda 1 1 6.957497 6.957497 17765 +whatnotof 1 1 6.957497 6.957497 17766 +charad 1 1 6.957497 6.957497 17767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..aa6eb640 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +network 1 168 1.791759 1.791759 61 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +homepag 1 93 2.397895 2.397895 148 +search 1 95 2.397895 2.397895 155 +would 1 67 2.708050 2.708050 251 +much 1 52 2.995732 2.995732 349 +possibl 1 47 3.091042 3.091042 378 +probabl 1 40 3.258097 3.258097 455 +intro 1 17 4.110874 4.110874 915 +brad 1 12 4.465908 4.465908 1264 +bore 1 7 5.010635 5.010635 1948 +athlet 1 7 5.010635 5.010635 1933 +altavista 1 6 5.164786 5.164786 2222 +aim 1 5 5.347108 5.347108 2477 +jazz 1 5 5.347108 5.347108 2527 +thayer 1 3 5.857933 5.857933 3441 +thec 1 3 5.857933 5.857933 3132 +badger 1 3 5.857933 5.857933 3502 +packer 1 3 5.857933 5.857933 3728 +foolish 1 2 6.263398 6.263398 6108 +pepper 1 2 6.263398 6.263398 6013 +pagesom 1 2 6.263398 6.263398 6109 +adress 1 2 6.263398 6.263398 5168 +pageuw 1 2 6.263398 6.263398 6021 +neglect 1 1 6.957497 6.957497 17768 +seminaranywai 1 1 6.957497 6.957497 17769 +beaucoup 1 1 6.957497 6.957497 17770 +boir 1 1 6.957497 6.957497 17771 +enginefind 1 1 6.957497 6.957497 17772 +wideth 1 1 6.957497 6.957497 17773 +duan 1 1 6.957497 6.957497 17774 +mclaughlin 1 1 6.957497 6.957497 17775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..d3286a49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +dayton 1 119 2.079442 2.079442 104 +intern 1 108 2.197225 2.197225 128 +access 1 102 2.302585 2.302585 136 +west 1 83 2.484907 2.484907 192 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +septemb 1 65 2.772589 2.772589 274 +pointer 1 48 3.044522 3.044522 368 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +arrai 1 27 3.637586 3.637586 627 +detect 1 26 3.688879 3.688879 646 +scott 1 18 4.060443 4.060443 884 +regist 1 17 4.110874 4.110874 938 +mellon 1 13 4.382027 4.382027 1179 +carnegi 1 12 4.465908 4.465908 1260 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 1 6 5.164786 5.164786 2237 +microarchitectur 1 6 5.164786 5.164786 2238 +pagescott 1 4 5.568345 5.568345 2978 +breach 1 3 5.857933 5.857933 4009 +recreat 1 3 5.857933 5.857933 3990 +anatomi 1 3 5.857933 5.857933 4010 +vijaykumar 1 3 5.857933 5.857933 4011 +usatel 1 2 6.263398 6.263398 6111 +educationph 1 2 6.263398 6.263398 6112 +interestscomput 1 2 6.263398 6.263398 6113 +gurindar 1 2 6.263398 6.263398 6110 +addresseseducationresearch 1 1 6.957497 6.957497 17776 +associatesaddressesscott 1 1 6.957497 6.957497 17777 +breachdepart 1 1 6.957497 6.957497 17778 +advisorguri 1 1 6.957497 6.957497 17779 +sohiresearch 1 1 6.957497 6.957497 17780 +architecturemultiscalarpublicationsmultiscalar 1 1 6.957497 6.957497 17781 +processorsgurindar 1 1 6.957497 6.957497 17782 +vijaykumarnd 1 1 6.957497 6.957497 17783 +processorscott 1 1 6.957497 6.957497 17784 +sohith 1 1 6.957497 6.957497 17785 +errorstodd 1 1 6.957497 6.957497 17786 +sohiconfer 1 1 6.957497 6.957497 17787 +recreationwingsbeersquidtvassociatestodd 1 1 6.957497 6.957497 17788 +austindoug 1 1 6.957497 6.957497 17789 +burgerbabak 1 1 6.957497 6.957497 17790 +falsafialain 1 1 6.957497 6.957497 17791 +kagit 1 1 6.957497 6.957497 17792 +vijaykumarlast 1 1 6.957497 6.957497 17793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..d3c828b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +fall 1 181 1.609438 1.609438 40 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +make 1 111 2.197225 2.197225 120 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +name 1 72 2.639057 2.639057 220 +differ 1 66 2.708050 2.708050 253 +get 1 46 3.091042 3.091042 380 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +bookmark 1 26 3.688879 3.688879 639 +notic 1 25 3.737670 3.737670 675 +brief 1 16 4.174387 4.174387 1001 +hobbi 1 16 4.174387 4.174387 1009 +zhang 1 16 4.174387 4.174387 980 +becam 1 14 4.317488 4.317488 1117 +unfortun 1 13 4.382027 4.382027 1170 +stai 1 12 4.465908 4.465908 1215 +said 1 9 4.753590 4.753590 1571 +poor 1 8 4.875197 4.875197 1736 +perhap 1 8 4.875197 4.875197 1693 +smooth 1 7 5.010635 5.010635 1855 +whenev 1 7 5.010635 5.010635 1883 +wouldn 1 7 5.010635 5.010635 1970 +smile 1 7 5.010635 5.010635 1807 +pool 1 6 5.164786 5.164786 2225 +suni 1 5 5.347108 5.347108 2452 +skin 1 4 5.568345 5.568345 2840 +temporarili 1 3 5.857933 5.857933 3692 +crack 1 3 5.857933 5.857933 3435 +roll 1 3 5.857933 5.857933 3723 +isaac 1 3 5.857933 5.857933 3855 +albani 1 2 6.263398 6.263398 4892 +tragic 1 2 6.263398 6.263398 6114 +theblack 1 2 6.263398 6.263398 5869 +eggleston 1 2 6.263398 6.263398 4581 +bleed 1 1 6.957497 6.957497 17794 +nontrivi 1 1 6.957497 6.957497 17795 +waysher 1 1 6.957497 6.957497 17796 +underst 1 1 6.957497 6.957497 17797 +unadorn 1 1 6.957497 6.957497 17798 +pizza 1 1 6.957497 6.957497 17799 +stinkin 1 1 6.957497 6.957497 17800 +myclass 1 1 6.957497 6.957497 17801 +hypersensit 1 1 6.957497 6.957497 17802 +rockjock 1 1 6.957497 6.957497 17803 +cretin 1 1 6.957497 6.957497 17804 +brood 1 1 6.957497 6.957497 17805 +glare 1 1 6.957497 6.957497 17806 +clenchesfist 1 1 6.957497 6.957497 17807 +knuckl 1 1 6.957497 6.957497 17808 +flightyfemm 1 1 6.957497 6.957497 17809 +razz 1 1 6.957497 6.957497 17810 +asskick 1 1 6.957497 6.957497 17811 +thirdgrad 1 1 6.957497 6.957497 17812 +hardbodi 1 1 6.957497 6.957497 17813 +leatherboi 1 1 6.957497 6.957497 17814 +leer 1 1 6.957497 6.957497 17815 +atm 1 1 6.957497 6.957497 17816 +todayi 1 1 6.957497 6.957497 17817 +giggl 1 1 6.957497 6.957497 17818 +aprostitut 1 1 6.957497 6.957497 17819 +bigotri 1 1 6.957497 6.957497 17820 +pedagodi 1 1 6.957497 6.957497 17821 +goat 1 1 6.957497 6.957497 17822 +refus 1 1 6.957497 6.957497 17823 +claw 1 1 6.957497 6.957497 17824 +sssuuuhhh 1 1 6.957497 6.957497 17825 +mmuuuhhhh 1 1 6.957497 6.957497 17826 +dddduuuuuhhhhh 1 1 6.957497 6.957497 17827 +mmmmuuuhhhh 1 1 6.957497 6.957497 17828 +maaaahhhjaaaaaahhhhh 1 1 6.957497 6.957497 17829 +fffuuuhhhhh 1 1 6.957497 6.957497 17830 +yyyyyyyuuuuuhhhhh 1 1 6.957497 6.957497 17831 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 1 1 6.957497 6.957497 17832 +uuuhhh 1 1 6.957497 6.957497 17833 +uuummmm 1 1 6.957497 6.957497 17834 +uuuhhhh 1 1 6.957497 6.957497 17835 +wwwwwhhhhuuuuuhhhhh 1 1 6.957497 6.957497 17836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..c0056159 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,155 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +schedul 1 119 2.079442 2.079442 85 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +techniqu 1 99 2.302585 2.302585 138 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +good 1 77 2.564949 2.564949 200 +summari 1 73 2.639057 2.639057 237 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +simul 1 66 2.708050 2.708050 255 +integr 1 67 2.708050 2.708050 245 +collect 1 65 2.772589 2.772589 268 +polici 1 64 2.772589 2.772589 279 +improv 1 62 2.772589 2.772589 289 +thesi 1 57 2.890372 2.890372 327 +summer 1 56 2.890372 2.890372 311 +talk 1 53 2.944439 2.944439 336 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +physic 1 47 3.091042 3.091042 377 +cach 1 41 3.218876 3.218876 432 +streetmadison 1 38 3.295837 3.295837 474 +slide 1 38 3.295837 3.295837 467 +prototyp 1 38 3.295837 3.295837 463 +sciencesunivers 1 37 3.332205 3.332205 486 +china 1 37 3.332205 3.332205 487 +respons 1 37 3.332205 3.332205 476 +tech 1 35 3.401197 3.401197 515 +global 1 34 3.401197 3.401197 520 +john 1 33 3.433987 3.433987 532 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +focus 1 29 3.583519 3.583519 584 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +disk 1 22 3.850148 3.850148 747 +department 1 20 3.951244 3.951244 839 +alloc 1 20 3.951244 3.951244 821 +kernel 1 20 3.951244 3.951244 825 +beij 1 19 4.007333 4.007333 876 +eduphon 1 15 4.248495 4.248495 1060 +princeton 1 15 4.248495 4.248495 1042 +decid 1 14 4.317488 4.317488 1075 +tsinghua 1 13 4.382027 4.382027 1195 +karlin 1 13 4.382027 4.382027 1176 +sigmetr 1 13 4.382027 4.382027 1173 +anna 1 12 4.465908 4.465908 1292 +usenix 1 12 4.465908 4.465908 1240 +isca 1 11 4.553877 4.553877 1354 +osdi 1 9 4.753590 4.753590 1534 +significantli 1 9 4.753590 4.753590 1508 +uniprocessor 1 8 4.875197 4.875197 1696 +replac 1 8 4.875197 4.875197 1668 +sciencedepart 1 6 5.164786 5.164786 2172 +prefetch 1 6 5.164786 5.164786 2039 +edward 1 6 5.164786 5.164786 2050 +carefulli 1 6 5.164786 5.164786 2045 +chosen 1 6 5.164786 5.164786 1984 +toc 1 5 5.347108 5.347108 2562 +summarymi 1 5 5.347108 5.347108 2580 +havedevelop 1 4 5.568345 5.568345 2681 +cachingtraci 1 3 5.857933 5.857933 3923 +kimbrel 1 3 5.857933 5.857933 3924 +felten 1 3 5.857933 5.857933 3925 +shorter 1 3 5.857933 5.857933 3998 +raid 1 3 5.857933 5.857933 4012 +aggress 1 3 5.857933 5.857933 3240 +wilk 1 2 6.263398 6.263398 4548 +andpostscript 1 2 6.263398 6.263398 5696 +andparallel 1 2 6.263398 6.263398 6014 +usacao 1 1 6.957497 6.957497 17838 +cachingacf 1 1 6.957497 6.957497 17839 +tracesrec 1 1 6.957497 6.957497 17840 +papersintegr 1 1 6.957497 6.957497 17841 +schedulingpei 1 1 6.957497 6.957497 17842 +princetonunivers 1 1 6.957497 6.957497 17837 +strategiespei 1 1 6.957497 6.957497 17843 +peform 1 1 6.957497 6.957497 17844 +tickertaip 1 1 6.957497 6.957497 17845 +swee 1 1 6.957497 6.957497 17846 +boon 1 1 6.957497 6.957497 17847 +shivakumar 1 1 6.957497 6.957497 17848 +venkataraman 1 1 6.957497 6.957497 17849 +talksslid 1 1 6.957497 6.957497 17850 +improvefil 1 1 6.957497 6.957497 17851 +filecach 1 1 6.957497 6.957497 17852 +individualappl 1 1 6.957497 6.957497 17853 +useit 1 1 6.957497 6.957497 17854 +fairglob 1 1 6.957497 6.957497 17855 +cachereplac 1 1 6.957497 6.957497 17856 +implementationon 1 1 6.957497 6.957497 17857 +demonstratedthat 1 1 6.957497 6.957497 17858 +informationcan 1 1 6.957497 6.957497 17859 +amdevelop 1 1 6.957497 6.957497 17860 +diskarrai 1 1 6.957497 6.957497 17861 +managementproblem 1 1 6.957497 6.957497 17862 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..1a1c7b13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +memori 1 101 2.302585 2.302585 139 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +west 1 83 2.484907 2.484907 192 +member 1 84 2.484907 2.484907 165 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +academ 1 82 2.484907 2.484907 178 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +meet 1 72 2.639057 2.639057 229 +workshop 1 71 2.639057 2.639057 239 +main 1 67 2.708050 2.708050 256 +goal 1 66 2.708050 2.708050 250 +multimedia 1 68 2.708050 2.708050 258 +street 1 63 2.772589 2.772589 293 +evalu 1 64 2.772589 2.772589 266 +complex 1 64 2.772589 2.772589 269 +experi 1 64 2.772589 2.772589 283 +septemb 1 65 2.772589 2.772589 274 +share 1 59 2.833213 2.833213 304 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +content 1 59 2.833213 2.833213 302 +march 1 61 2.833213 2.833213 295 +unix 1 58 2.890372 2.890372 308 +browser 1 56 2.890372 2.890372 313 +index 1 56 2.890372 2.890372 309 +publish 1 57 2.890372 2.890372 326 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +move 1 47 3.091042 3.091042 382 +past 1 42 3.218876 3.218876 428 +autom 1 41 3.218876 3.218876 434 +cach 1 41 3.218876 3.218876 432 +transact 1 39 3.258097 3.258097 438 +live 1 40 3.258097 3.258097 451 +multipl 1 39 3.258097 3.258097 453 +join 1 39 3.258097 3.258097 457 +industri 1 38 3.295837 3.295837 464 +staff 1 36 3.367296 3.367296 490 +multi 1 36 3.367296 3.367296 493 +next 1 34 3.401197 3.401197 517 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +specifi 1 30 3.555348 3.555348 568 +focus 1 29 3.583519 3.583519 584 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +manipul 1 27 3.637586 3.637586 624 +though 1 27 3.637586 3.637586 622 +altern 1 26 3.688879 3.688879 641 +effort 1 26 3.688879 3.688879 652 +challeng 1 26 3.688879 3.688879 653 +proc 1 26 3.688879 3.688879 649 +primari 1 25 3.737670 3.737670 669 +spent 1 25 3.737670 3.737670 676 +toward 1 25 3.737670 3.737670 668 +accur 1 25 3.737670 3.737670 680 +client 1 25 3.737670 3.737670 679 +mike 1 24 3.761200 3.761200 703 +scalabl 1 24 3.761200 3.761200 705 +known 1 24 3.761200 3.761200 702 +initi 1 23 3.806662 3.806662 717 +varieti 1 22 3.850148 3.850148 740 +william 1 22 3.850148 3.850148 765 +leav 1 21 3.912023 3.912023 772 +fine 1 20 3.951244 3.951244 822 +benchmark 1 19 4.007333 4.007333 859 +sigmod 1 19 4.007333 4.007333 877 +thoma 1 18 4.060443 4.060443 901 +statu 1 18 4.060443 4.060443 885 +repositori 1 17 4.110874 4.110874 932 +white 1 17 4.110874 4.110874 951 +jose 1 16 4.174387 4.174387 976 +upon 1 16 4.174387 4.174387 978 +taiwan 1 16 4.174387 4.174387 1006 +brown 1 16 4.174387 4.174387 977 +cambridg 1 16 4.174387 4.174387 1008 +livni 1 15 4.248495 4.248495 1053 +hybrid 1 15 4.248495 4.248495 1057 +heterogen 1 14 4.317488 4.317488 1090 +dbm 1 13 4.382027 4.382027 1136 +forth 1 13 4.382027 4.382027 1186 +signific 1 13 4.382027 4.382027 1125 +front 1 13 4.382027 4.382027 1154 +conf 1 13 4.382027 4.382027 1181 +sigmetr 1 13 4.382027 4.382027 1173 +workload 1 12 4.465908 4.465908 1210 +dewitt 1 12 4.465908 4.465908 1270 +tune 1 12 4.465908 4.465908 1227 +franc 1 12 4.465908 4.465908 1276 +road 1 11 4.553877 4.553877 1374 +shore 1 11 4.553877 4.553877 1377 +persist 1 11 4.553877 4.553877 1367 +resid 1 10 4.653960 4.653960 1461 +naughton 1 10 4.653960 4.653960 1450 +franklin 1 10 4.653960 4.653960 1436 +grain 1 10 4.653960 4.653960 1448 +rel 1 9 4.753590 4.753590 1487 +vernon 1 9 4.753590 4.753590 1556 +lock 1 9 4.753590 4.753590 1551 +morgan 1 9 4.753590 4.753590 1484 +carei 1 8 4.875197 4.875197 1781 +replac 1 8 4.875197 4.875197 1668 +ride 1 8 4.875197 4.875197 1741 +solomon 1 8 4.875197 4.875197 1716 +hash 1 8 4.875197 4.875197 1618 +portland 1 7 5.010635 5.010635 1878 +eduresearch 1 6 5.164786 5.164786 2205 +whichi 1 6 5.164786 5.164786 2056 +academia 1 6 5.164786 5.164786 2036 +tobe 1 6 5.164786 5.164786 1995 +oopsla 1 6 5.164786 5.164786 2221 +srinivasan 1 6 5.164786 5.164786 2175 +patel 1 6 5.164786 5.164786 2154 +almaden 1 5 5.347108 5.347108 2511 +fraction 1 5 5.347108 5.347108 2259 +ifip 1 5 5.347108 5.347108 2459 +tsatalo 1 5 5.347108 5.347108 2581 +minneapoli 1 5 5.347108 5.347108 2480 +england 1 5 5.347108 5.347108 2557 +kaufmann 1 5 5.347108 5.347108 2254 +harri 1 4 5.568345 5.568345 3034 +exodu 1 4 5.568345 5.568345 3075 +sabbat 1 4 5.568345 5.568345 2824 +taipei 1 4 5.568345 5.568345 2926 +chile 1 4 5.568345 5.568345 3082 +mcauliff 1 4 5.568345 5.568345 3083 +zwill 1 4 5.568345 5.568345 3076 +nashvil 1 4 5.568345 5.568345 2867 +andp 1 4 5.568345 5.568345 2811 +tradeoff 1 3 5.857933 5.857933 3387 +forobject 1 3 5.857933 5.857933 3965 +twelv 1 3 5.857933 5.857933 3899 +oodb 1 3 5.857933 5.857933 3954 +codi 1 3 5.857933 5.857933 3940 +schwarz 1 3 5.857933 5.857933 3986 +switzerland 1 3 5.857933 5.857933 3551 +santiago 1 3 5.857933 5.857933 4013 +schuh 1 3 5.857933 5.857933 4014 +andm 1 3 5.857933 5.857933 3901 +pang 1 3 5.857933 5.857933 3509 +revisit 1 3 5.857933 5.857933 3915 +interestsdatabas 1 2 6.263398 6.263398 6116 +theexodu 1 2 6.263398 6.263398 6076 +aimedat 1 2 6.263398 6.263398 6117 +researchgroup 1 2 6.263398 6.263398 5588 +homogen 1 2 6.263398 6.263398 4821 +haa 1 2 6.263398 6.263398 6115 +niblack 1 2 6.263398 6.263398 4364 +roth 1 2 6.263398 6.263398 6089 +lausann 1 2 6.263398 6.263398 4955 +and 1 2 6.263398 6.263398 5241 +dataengin 1 2 6.263398 6.263398 6118 +oodbm 1 2 6.263398 6.263398 6083 +zaharioudaki 1 2 6.263398 6.263398 6119 +modelingof 1 2 6.263398 6.263398 5734 +indistribut 1 2 6.263398 6.263398 4257 +careymichael 1 1 6.957497 6.957497 17869 +careyprofessor 1 1 6.957497 6.957497 17870 +performanceand 1 1 6.957497 6.957497 17871 +topicsof 1 1 6.957497 6.957497 17872 +algorithmsrel 1 1 6.957497 6.957497 17873 +userdatabas 1 1 6.957497 6.957497 17874 +persistentobject 1 1 6.957497 6.957497 17875 +objectmanag 1 1 6.957497 6.957497 17876 +applicationssuch 1 1 6.957497 6.957497 17877 +greatyear 1 1 6.957497 6.957497 17878 +tackl 1 1 6.957497 6.957497 17879 +anddiffer 1 1 6.957497 6.957497 17880 +thesourc 1 1 6.957497 6.957497 17881 +projectther 1 1 6.957497 6.957497 17882 +garlic 1 1 6.957497 6.957497 17863 +multimediainform 1 1 6.957497 6.957497 17883 +objectdatabas 1 1 6.957497 6.957497 17884 +continuedto 1 1 6.957497 6.957497 17885 +aqueri 1 1 6.957497 6.957497 17886 +pesto 1 1 6.957497 6.957497 17887 +thegarl 1 1 6.957497 6.957497 17888 +kiernan 1 1 6.957497 6.957497 17889 +orientedprogram 1 1 6.957497 6.957497 17890 +arya 1 1 6.957497 6.957497 17864 +fagin 1 1 6.957497 6.957497 17865 +flickner 1 1 6.957497 6.957497 17866 +petkov 1 1 6.957497 6.957497 17867 +tork 1 1 6.957497 6.957497 17891 +wimmer 1 1 6.957497 6.957497 17868 +visualdatabas 1 1 6.957497 6.957497 17892 +garlicapproach 1 1 6.957497 6.957497 17893 +luniewski 1 1 6.957497 6.957497 17894 +withd 1 1 6.957497 6.957497 17895 +kant 1 1 6.957497 6.957497 17896 +onobject 1 1 6.957497 6.957497 17897 +mehta 1 1 6.957497 6.957497 17898 +thint 1 1 6.957497 6.957497 17899 +smrc 1 1 6.957497 6.957497 17900 +withb 1 1 6.957497 6.957497 17901 +reinwald 1 1 6.957497 6.957497 17902 +desslock 1 1 6.957497 6.957497 17903 +lehman 1 1 6.957497 6.957497 17904 +pirahesh 1 1 6.957497 6.957497 17905 +tarascon 1 1 6.957497 6.957497 17906 +provenc 1 1 6.957497 6.957497 17907 +sigmodint 1 1 6.957497 6.957497 17908 +managementof 1 1 6.957497 6.957497 17909 +multivers 1 1 6.957497 6.957497 17910 +bober 1 1 6.957497 6.957497 17911 +oszu 1 1 6.957497 6.957497 17912 +dayal 1 1 6.957497 6.957497 17913 +valduriez 1 1 6.957497 6.957497 17914 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..e18d1f7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +assign 1 135 1.945910 1.945910 66 +dayton 1 119 2.079442 2.079442 104 +introduct 1 126 2.079442 2.079442 87 +structur 1 106 2.197225 2.197225 105 +west 1 83 2.484907 2.484907 192 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +fridai 1 44 3.135494 3.135494 390 +streetmadison 1 38 3.295837 3.295837 474 +edutelephon 1 10 4.653960 4.653960 1473 +chin 1 5 5.347108 5.347108 2408 +tang 1 5 5.347108 5.347108 2409 +bldg 1 4 5.568345 5.568345 2983 +biochemistri 1 3 5.857933 5.857933 3513 +cchin 1 2 6.263398 6.263398 4691 +pagechin 1 1 6.957497 6.957497 17915 +tanggradu 1 1 6.957497 6.957497 17916 +ameduc 1 1 6.957497 6.957497 17917 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..743fe67c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +come 1 78 2.564949 2.564949 202 +summari 1 73 2.639057 2.639057 237 +york 1 41 3.218876 3.218876 435 +movi 1 40 3.258097 3.258097 459 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +soon 1 36 3.367296 3.367296 494 +miscellan 1 23 3.806662 3.806662 731 +log 1 19 4.007333 4.007333 857 +usaphon 1 9 4.753590 4.753590 1600 +assistantdepart 1 8 4.875197 4.875197 1784 +cultur 1 7 5.010635 5.010635 1951 +chandra 1 6 5.164786 5.164786 2091 +altavista 1 6 5.164786 5.164786 2222 +satish 1 4 5.568345 5.568345 2833 +wodehous 1 2 6.263398 6.263398 4990 +italian 1 2 6.263398 6.263398 5993 +nostalgia 1 1 6.957497 6.957497 17918 +linksclick 1 1 6.957497 6.957497 17919 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..93a8ee40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +state 1 76 2.564949 2.564949 207 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +integr 1 67 2.708050 2.708050 245 +august 1 66 2.708050 2.708050 257 +share 1 59 2.833213 2.833213 304 +automat 1 61 2.833213 2.833213 306 +suggest 1 53 2.944439 2.944439 331 +visual 1 48 3.044522 3.044522 372 +physic 1 47 3.091042 3.091042 377 +electron 1 47 3.091042 3.091042 379 +examin 1 42 3.218876 3.218876 424 +movi 1 40 3.258097 3.258097 459 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +sciencesunivers 1 37 3.332205 3.332205 486 +tech 1 35 3.401197 3.401197 515 +jame 1 35 3.401197 3.401197 507 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +pass 1 28 3.610918 3.610918 611 +enhanc 1 26 3.688879 3.688879 644 +supercomput 1 25 3.737670 3.737670 681 +honor 1 23 3.806662 3.806662 729 +miscellan 1 23 3.806662 3.806662 731 +indian 1 22 3.850148 3.850148 769 +annot 1 21 3.912023 3.912023 775 +wind 1 18 4.060443 4.060443 908 +thoma 1 18 4.060443 4.060443 901 +stephen 1 11 4.553877 4.553877 1342 +laru 1 9 4.753590 4.753590 1560 +ball 1 9 4.753590 4.753590 1608 +assistantdepart 1 8 4.875197 4.875197 1784 +insert 1 8 4.875197 4.875197 1687 +gold 1 8 4.875197 4.875197 1745 +bombai 1 7 5.010635 5.010635 1972 +dream 1 6 5.164786 5.164786 2165 +icpp 1 5 5.347108 5.347108 2382 +merit 1 5 5.347108 5.347108 2466 +trishul 1 3 5.857933 5.857933 4016 +chilimbi 1 3 5.857933 5.857933 4015 +usaadvisor 1 3 5.857933 5.857933 4017 +certif 1 3 5.857933 5.857933 3859 +medal 1 3 5.857933 5.857933 3912 +cico 1 2 6.263398 6.263398 6120 +eick 1 2 6.263398 6.263398 5971 +megradu 1 1 6.957497 6.957497 17920 +designresearch 1 1 6.957497 6.957497 17921 +tunneleduc 1 1 6.957497 6.957497 17922 +publicationscachi 1 1 6.957497 6.957497 17923 +stormwatch 1 1 6.957497 6.957497 17924 +protocolstrishul 1 1 6.957497 6.957497 17925 +olympiadpresid 1 1 6.957497 6.957497 17926 +examinationcertif 1 1 6.957497 6.957497 17927 +chemistrycertif 1 1 6.957497 6.957497 17928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..dfaa321a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +june 1 79 2.564949 2.564949 214 +septemb 1 65 2.772589 2.772589 274 +mark 1 44 3.135494 3.135494 403 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +hill 1 25 3.737670 3.737670 670 +indian 1 22 3.850148 3.850148 769 +madra 1 8 4.875197 4.875197 1770 +univeristi 1 8 4.875197 4.875197 1754 +studentdepart 1 5 5.347108 5.347108 2505 +usaadvisor 1 3 5.857933 5.857933 4017 +chandrasekaran 1 2 6.263398 6.263398 6121 +sashikanth 1 2 6.263398 6.263398 6122 +btech 1 2 6.263398 6.263398 6123 +csashi 1 1 6.957497 6.957497 17929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..11d6d8d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +world 1 115 2.197225 2.197225 126 +text 1 98 2.302585 2.302585 133 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +java 1 70 2.708050 2.708050 248 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +locat 1 59 2.833213 2.833213 303 +index 1 56 2.890372 2.890372 309 +life 1 50 3.044522 3.044522 375 +approach 1 48 3.044522 3.044522 366 +standard 1 48 3.044522 3.044522 365 +transact 1 39 3.258097 3.258097 438 +map 1 39 3.258097 3.258097 452 +prototyp 1 38 3.295837 3.295837 463 +microsoft 1 38 3.295837 3.295837 468 +open 1 38 3.295837 3.295837 469 +sciencesunivers 1 37 3.332205 3.332205 486 +global 1 34 3.401197 3.401197 520 +survei 1 35 3.401197 3.401197 513 +posit 1 31 3.496508 3.496508 552 +focu 1 30 3.555348 3.555348 571 +weather 1 28 3.610918 3.610918 618 +item 1 19 4.007333 4.007333 856 +white 1 17 4.110874 4.110874 951 +dilbert 1 16 4.174387 4.174387 996 +stock 1 16 4.174387 4.174387 1007 +track 1 15 4.248495 4.248495 1029 +doit 1 14 4.317488 4.317488 1111 +draft 1 14 4.317488 4.317488 1085 +land 1 12 4.465908 4.465908 1273 +shore 1 11 4.553877 4.553877 1377 +council 1 11 4.553877 4.553877 1364 +appl 1 11 4.553877 4.553877 1303 +market 1 11 4.553877 4.553877 1361 +consortium 1 10 4.653960 4.653960 1467 +govern 1 9 4.753590 4.753590 1581 +paradis 1 8 4.875197 4.875197 1782 +gopher 1 6 5.164786 5.164786 1982 +feder 1 5 5.347108 5.347108 2266 +wiscinfo 1 3 5.857933 5.857933 3106 +commerc 1 3 5.857933 5.857933 3209 +dienst 1 3 5.857933 5.857933 3640 +eosdi 1 2 6.263398 6.263398 6124 +nebraska 1 2 6.263398 6.263398 5574 +lincoln 1 2 6.263398 6.263398 5575 +metrowerk 1 2 6.263398 6.263398 4131 +worm 1 2 6.263398 6.263398 5775 +projectdepart 1 2 6.263398 6.263398 6125 +curt 1 1 6.957497 6.957497 17930 +ellmann 1 1 6.957497 6.957497 17931 +webgnat 1 1 6.957497 6.957497 17932 +defect 1 1 6.957497 6.957497 17933 +opengi 1 1 6.957497 6.957497 17934 +calmit 1 1 6.957497 6.957497 17935 +illustra 1 1 6.957497 6.957497 17936 +papersmiscellan 1 1 6.957497 6.957497 17937 +sitescampu 1 1 6.957497 6.957497 17938 +wyrm 1 1 6.957497 6.957497 17939 +hoard 1 1 6.957497 6.957497 17940 +wiscnet 1 1 6.957497 6.957497 17941 +netcorpor 1 1 6.957497 6.957497 17942 +paww 1 1 6.957497 6.957497 17943 +taligentsearch 1 1 6.957497 6.957497 17944 +savvi 1 1 6.957497 6.957497 17945 +webcrawl 1 1 6.957497 6.957497 17946 +winsock 1 1 6.957497 6.957497 17947 +geolog 1 1 6.957497 6.957497 17948 +gil 1 1 6.957497 6.957497 17949 +oakridg 1 1 6.957497 6.957497 17950 +datacurt 1 1 6.957497 6.957497 17951 +ellmanncurt 1 1 6.957497 6.957497 17952 +eduparadis 1 1 6.957497 6.957497 17953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..3f9a873f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +yong 1 4 5.568345 5.568345 2809 +chee 1 3 5.857933 5.857933 3480 +cychan 1 2 6.263398 6.263398 4737 +pagechan 1 1 6.957497 6.957497 17954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..67392392 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +address 1 170 1.791759 1.791759 62 +schedul 1 119 2.079442 2.079442 85 +dayton 1 119 2.079442 2.079442 104 +mathemat 1 108 2.197225 2.197225 123 +educ 1 86 2.484907 2.484907 191 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +colleg 1 61 2.833213 2.833213 300 +friend 1 48 3.044522 3.044522 376 +physic 1 47 3.091042 3.091042 377 +math 1 44 3.135494 3.135494 402 +mechan 1 43 3.178054 3.178054 416 +statist 1 35 3.401197 3.401197 521 +lewi 1 8 4.875197 4.875197 1700 +nuclear 1 5 5.347108 5.347108 2576 +clark 1 4 5.568345 5.568345 2705 +mace 1 2 6.263398 6.263398 4849 +astronaut 1 2 6.263398 6.263398 5748 +sara 1 1 6.957497 6.957497 17955 +bauman 1 1 6.957497 6.957497 17956 +dailei 1 1 6.957497 6.957497 17957 +baumandailei 1 1 6.957497 6.957497 17958 +edugradu 1 1 6.957497 6.957497 17959 +pagessend 1 1 6.957497 6.957497 17960 +daileytu 1 1 6.957497 6.957497 17961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..864d12ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,314 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +teach 1 108 2.197225 2.197225 112 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +chang 1 82 2.484907 2.484907 163 +librari 1 87 2.484907 2.484907 181 +interfac 1 79 2.564949 2.564949 209 +dynam 1 76 2.564949 2.564949 194 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +messag 1 76 2.564949 2.564949 212 +method 1 80 2.564949 2.564949 213 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +david 1 71 2.639057 2.639057 232 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +simul 1 66 2.708050 2.708050 255 +august 1 66 2.708050 2.708050 257 +main 1 67 2.708050 2.708050 256 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +evalu 1 64 2.772589 2.772589 266 +organ 1 65 2.772589 2.772589 265 +abstract 1 62 2.772589 2.772589 276 +new 1 64 2.772589 2.772589 262 +virtual 1 62 2.772589 2.772589 285 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +juli 1 60 2.833213 2.833213 305 +share 1 59 2.833213 2.833213 304 +type 1 61 2.833213 2.833213 296 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +three 1 54 2.944439 2.944439 330 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +found 1 53 2.944439 2.944439 337 +hardwar 1 51 2.995732 2.995732 350 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +investig 1 51 2.995732 2.995732 353 +frequent 1 49 3.044522 3.044522 367 +california 1 46 3.091042 3.091042 388 +effect 1 46 3.091042 3.091042 385 +understand 1 47 3.091042 3.091042 384 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +combin 1 42 3.218876 3.218876 421 +fast 1 42 3.218876 3.218876 429 +transact 1 39 3.258097 3.258097 438 +programm 1 39 3.258097 3.258097 445 +electr 1 38 3.295837 3.295837 461 +streetmadison 1 38 3.295837 3.295837 474 +brian 1 38 3.295837 3.295837 466 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +sciencesunivers 1 37 3.332205 3.332205 486 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +especi 1 36 3.367296 3.367296 496 +multi 1 36 3.367296 3.367296 493 +procedur 1 36 3.367296 3.367296 488 +jame 1 35 3.401197 3.401197 507 +bibliographi 1 34 3.401197 3.401197 518 +fault 1 32 3.465736 3.465736 547 +power 1 30 3.555348 3.555348 573 +robert 1 30 3.555348 3.555348 567 +profil 1 30 3.555348 3.555348 581 +exist 1 30 3.555348 3.555348 569 +common 1 30 3.555348 3.555348 574 +specifi 1 30 3.555348 3.555348 568 +steve 1 29 3.583519 3.583519 594 +synchron 1 29 3.583519 3.583519 588 +depend 1 29 3.583519 3.583519 583 +platform 1 29 3.583519 3.583519 591 +multiprocessor 1 28 3.610918 3.610918 605 +pass 1 28 3.610918 3.610918 611 +propos 1 28 3.610918 3.610918 602 +manipul 1 27 3.637586 3.637586 624 +berkelei 1 26 3.688879 3.688879 657 +revis 1 26 3.688879 3.688879 640 +altern 1 26 3.688879 3.688879 641 +detect 1 26 3.688879 3.688879 646 +hill 1 25 3.737670 3.737670 670 +supercomput 1 25 3.737670 3.737670 681 +wai 1 25 3.737670 3.737670 662 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +self 1 22 3.850148 3.850148 761 +reduc 1 22 3.850148 3.850148 759 +cooper 1 22 3.850148 3.850148 757 +vlsi 1 21 3.912023 3.912023 795 +annot 1 21 3.912023 3.912023 775 +department 1 20 3.951244 3.951244 839 +portabl 1 20 3.951244 3.951244 819 +fine 1 20 3.951244 3.951244 822 +exploit 1 20 3.951244 3.951244 836 +binari 1 20 3.951244 3.951244 823 +benchmark 1 19 4.007333 4.007333 859 +five 1 19 4.007333 4.007333 841 +wind 1 18 4.060443 4.060443 908 +interconnect 1 17 4.110874 4.110874 937 +steven 1 17 4.110874 4.110874 953 +asplo 1 17 4.110874 4.110874 948 +intel 1 16 4.174387 4.174387 1000 +advantag 1 16 4.174387 4.174387 987 +transfer 1 16 4.174387 4.174387 967 +upon 1 16 4.174387 4.174387 978 +eduphon 1 15 4.248495 4.248495 1060 +overhead 1 15 4.248495 4.248495 1035 +hybrid 1 15 4.248495 4.248495 1057 +remot 1 15 4.248495 4.248495 1041 +action 1 15 4.248495 4.248495 1038 +driven 1 15 4.248495 4.248495 1048 +coher 1 14 4.317488 4.317488 1109 +convent 1 14 4.317488 4.317488 1072 +block 1 13 4.382027 4.382027 1183 +tune 1 12 4.465908 4.465908 1227 +wood 1 11 4.553877 4.553877 1355 +isca 1 11 4.553877 4.553877 1354 +transpar 1 11 4.553877 4.553877 1325 +faster 1 11 4.553877 4.553877 1323 +grain 1 10 4.653960 4.653960 1448 +facilit 1 10 4.653960 4.653960 1412 +tunnel 1 9 4.753590 4.753590 1615 +laru 1 9 4.753590 4.753590 1560 +significantli 1 9 4.753590 4.753590 1508 +secretari 1 8 4.875197 4.875197 1775 +uniprocessor 1 8 4.875197 4.875197 1696 +spec 1 8 4.875197 4.875197 1640 +paradigm 1 8 4.875197 4.875197 1662 +burger 1 7 5.010635 5.010635 1889 +roger 1 7 5.010635 5.010635 1892 +hit 1 7 5.010635 5.010635 1965 +tag 1 7 5.010635 5.010635 1821 +instrument 1 7 5.010635 5.010635 1954 +duke 1 6 5.164786 5.164786 2231 +microsystem 1 6 5.164786 5.164786 2160 +ann 1 6 5.164786 5.164786 2065 +feasibl 1 6 5.164786 5.164786 2157 +byte 1 6 5.164786 5.164786 2108 +invok 1 6 5.164786 5.164786 2079 +scienceand 1 5 5.347108 5.347108 2348 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +reinhardt 1 5 5.347108 5.347108 2583 +lebeck 1 5 5.347108 5.347108 2582 +mukherje 1 5 5.347108 5.347108 2586 +accuraci 1 5 5.347108 5.347108 2450 +dougla 1 5 5.347108 5.347108 2471 +ioanni 1 5 5.347108 5.347108 2553 +tempest 1 5 5.347108 5.347108 2548 +toc 1 5 5.347108 5.347108 2562 +computerarchitectur 1 5 5.347108 5.347108 2290 +focuss 1 5 5.347108 5.347108 2271 +hypothet 1 5 5.347108 5.347108 2474 +rewrit 1 5 5.347108 5.347108 2367 +lookup 1 5 5.347108 5.347108 2399 +sparcstat 1 5 5.347108 5.347108 2406 +engineeringdepart 1 4 5.568345 5.568345 2917 +wart 1 4 5.568345 5.568345 2987 +hyder 1 4 5.568345 5.568345 2772 +decoupl 1 4 5.568345 5.568345 2898 +alvin 1 4 5.568345 5.568345 3084 +schoina 1 4 5.568345 5.568345 3085 +talluri 1 4 5.568345 5.568345 2820 +rapidli 1 4 5.568345 5.568345 2850 +myresearch 1 4 5.568345 5.568345 2842 +fulli 1 4 5.568345 5.568345 2986 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +pfile 1 3 5.857933 5.857933 3100 +programmingc 1 3 5.857933 5.857933 3232 +madhusudhan 1 3 5.857933 5.857933 4021 +bulk 1 3 5.857933 5.857933 4000 +callaghan 1 2 6.263398 6.263398 6128 +architecturec 1 2 6.263398 6.263398 6127 +anddavid 1 2 6.263398 6.263398 6126 +invalid 1 2 6.263398 6.263398 5476 +virtualmemori 1 2 6.263398 6.263398 4305 +anal 1 2 6.263398 6.263398 4834 +null 1 2 6.263398 6.263398 4714 +usadavid 1 1 6.957497 6.957497 17963 +toonenrec 1 1 6.957497 6.957497 17964 +rahmat 1 1 6.957497 6.957497 17965 +alvi 1 1 6.957497 6.957497 17966 +informix 1 1 6.957497 6.957497 17967 +memorysteven 1 1 6.957497 6.957497 17968 +communicationshubhendu 1 1 6.957497 6.957497 17969 +costrahmat 1 1 6.957497 6.957497 17970 +multiprocessorsalvin 1 1 6.957497 6.957497 17971 +simulationalvin 1 1 6.957497 6.957497 17972 +sigmetricsmai 1 1 6.957497 6.957497 17973 +typhoon 1 1 6.957497 6.957497 17962 +thrust 1 1 6.957497 6.957497 17974 +hybridprogram 1 1 6.957497 6.957497 17975 +similaritesof 1 1 6.957497 6.957497 17976 +calledtempest 1 1 6.957497 6.957497 17977 +handler 1 1 6.957497 6.957497 17978 +suppliedmechan 1 1 6.957497 6.957497 17979 +tempestmechan 1 1 6.957497 6.957497 17980 +novelmechan 1 1 6.957497 6.957497 17981 +tagblock 1 1 6.957497 6.957497 17982 +theloc 1 1 6.957497 6.957497 17983 +hardwareplatform 1 1 6.957497 6.957497 17984 +revers 1 1 6.957497 6.957497 17985 +translationt 1 1 6.957497 6.957497 17986 +rtlb 1 1 6.957497 6.957497 17987 +grainaccess 1 1 6.957497 6.957497 17988 +thata 1 1 6.957497 6.957497 17989 +performscompar 1 1 6.957497 6.957497 17990 +memoryprogram 1 1 6.957497 6.957497 17991 +thatoptim 1 1 6.957497 6.957497 17992 +reducingsimul 1 1 6.957497 6.957497 17993 +tightli 1 1 6.957497 6.957497 17994 +byprovid 1 1 6.957497 6.957497 17995 +referenceinvok 1 1 6.957497 6.957497 17996 +andmemori 1 1 6.957497 6.957497 17997 +processedbi 1 1 6.957497 6.957497 17998 +functionfor 1 1 6.957497 6.957497 17999 +usingbinari 1 1 6.957497 6.957497 18000 +memoryrefer 1 1 6.957497 6.957497 18001 +tothre 1 1 6.957497 6.957497 18002 +thatcal 1 1 6.957497 6.957497 18003 +onlythre 1 1 6.957497 6.957497 18004 +slower 1 1 6.957497 6.957497 18005 +techniquesto 1 1 6.957497 6.957497 18006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..ffa515db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +tool 1 117 2.079442 2.079442 93 +stuff 1 87 2.484907 2.484907 171 +school 1 84 2.484907 2.484907 188 +maintain 1 51 2.995732 2.995732 342 +grad 1 20 3.951244 3.951244 837 +wind 1 18 4.060443 4.060443 908 +doug 1 9 4.753590 4.753590 1517 +burger 1 7 5.010635 5.010635 1889 +galileo 1 4 5.568345 5.568345 3086 +damn 1 2 6.263398 6.263398 6129 +pageprofession 1 1 6.957497 6.957497 18007 +summaryresum 1 1 6.957497 6.957497 18008 +cvtranscriptcours 1 1 6.957497 6.957497 18009 +projectsadvisoraffili 1 1 6.957497 6.957497 18010 +sciwisconsin 1 1 6.957497 6.957497 18011 +tunnelpag 1 1 6.957497 6.957497 18012 +architectureuw 1 1 6.957497 6.957497 18013 +architecturesimplescalar 1 1 6.957497 6.957497 18014 +setgenericasacmperson 1 1 6.957497 6.957497 18015 +meus 1 1 6.957497 6.957497 18016 +linksphoto 1 1 6.957497 6.957497 18017 +galleryrid 1 1 6.957497 6.957497 18018 +demonhunt 1 1 6.957497 6.957497 18019 +catsbewar 1 1 6.957497 6.957497 18020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..b8db0cd7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +teach 1 108 2.197225 2.197225 112 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +place 1 106 2.197225 2.197225 124 +mathemat 1 108 2.197225 2.197225 123 +access 1 102 2.302585 2.302585 136 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +homepag 1 93 2.397895 2.397895 148 +center 1 88 2.397895 2.397895 158 +pictur 1 89 2.397895 2.397895 160 +chang 1 82 2.484907 2.484907 163 +west 1 83 2.484907 2.484907 192 +journal 1 83 2.484907 2.484907 183 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +contain 1 81 2.484907 2.484907 174 +html 1 75 2.639057 2.639057 235 +david 1 71 2.639057 2.639057 232 +practic 1 70 2.708050 2.708050 246 +written 1 63 2.772589 2.772589 278 +guid 1 63 2.772589 2.772589 267 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +much 1 52 2.995732 2.995732 349 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +approach 1 48 3.044522 3.044522 366 +even 1 45 3.135494 3.135494 393 +better 1 45 3.135494 3.135494 401 +math 1 44 3.135494 3.135494 402 +third 1 43 3.178054 3.178054 412 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +author 1 39 3.258097 3.258097 450 +tutori 1 39 3.258097 3.258097 437 +streetmadison 1 38 3.295837 3.295837 474 +paul 1 38 3.295837 3.295837 471 +sciencesunivers 1 37 3.332205 3.332205 486 +approxim 1 35 3.401197 3.401197 509 +print 1 34 3.401197 3.401197 503 +bibliographi 1 34 3.401197 3.401197 518 +next 1 34 3.401197 3.401197 517 +articl 1 33 3.433987 3.433987 530 +postal 1 30 3.555348 3.555348 580 +great 1 27 3.637586 3.637586 626 +thank 1 23 3.806662 3.806662 721 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +latest 1 21 3.912023 3.912023 785 +hous 1 21 3.912023 3.912023 801 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +former 1 17 4.110874 4.110874 956 +seek 1 17 4.110874 4.110874 954 +carl 1 15 4.248495 4.248495 1024 +anonym 1 14 4.317488 4.317488 1100 +individu 1 13 4.382027 4.382027 1126 +town 1 10 4.653960 4.653960 1458 +death 1 10 4.653960 4.653960 1457 +subset 1 10 4.653960 4.653960 1425 +errata 1 10 4.653960 4.653960 1403 +latter 1 9 4.753590 4.753590 1522 +screen 1 9 4.753590 4.753590 1577 +unusu 1 9 4.753590 4.753590 1566 +end 1 9 4.753590 4.753590 1567 +driver 1 8 4.875197 4.875197 1657 +elementari 1 7 5.010635 5.010635 1825 +usaoffic 1 6 5.164786 5.164786 2159 +spline 1 6 5.164786 5.164786 2007 +button 1 5 5.347108 5.347108 2337 +door 1 5 5.347108 5.347108 2291 +areavail 1 4 5.568345 5.568345 2810 +allan 1 4 5.568345 5.568345 2849 +technion 1 4 5.568345 5.568345 2856 +boor 1 3 5.857933 5.857933 3482 +cont 1 3 5.857933 5.857933 3171 +shall 1 3 5.857933 5.857933 3891 +deboor 1 2 6.263398 6.263398 4744 +clickabl 1 2 6.263398 6.263398 4788 +thevari 1 2 6.263398 6.263398 6130 +forconstruct 1 2 6.263398 6.263398 5649 +amo 1 2 6.263398 6.263398 6094 +joi 1 2 6.263398 6.263398 5208 +hermit 1 2 6.263398 6.263398 4150 +soup 1 2 6.263398 6.263398 6131 +kitchen 1 2 6.263398 6.263398 6132 +occupi 1 2 6.263398 6.263398 5857 +mathematicsdepart 1 1 6.957497 6.957497 18024 +schoenberg 1 1 6.957497 6.957497 18025 +approx 1 1 6.957497 6.957497 18026 +theclick 1 1 6.957497 6.957497 18027 +ofapproxim 1 1 6.957497 6.957497 18028 +publishedpap 1 1 6.957497 6.957497 18029 +andmuch 1 1 6.957497 6.957497 18030 +ditto 1 1 6.957497 6.957497 18021 +foreast 1 1 6.957497 6.957497 18031 +theirtabl 1 1 6.957497 6.957497 18032 +singli 1 1 6.957497 6.957497 18033 +nevai 1 1 6.957497 6.957497 18022 +thishandi 1 1 6.957497 6.957497 18034 +alsoapproxim 1 1 6.957497 6.957497 18035 +slist 1 1 6.957497 6.957497 18036 +ila 1 1 6.957497 6.957497 18037 +seeviva_vi 1 1 6.957497 6.957497 18038 +alsoon 1 1 6.957497 6.957497 18039 +thehtml 1 1 6.957497 6.957497 18040 +primermight 1 1 6.957497 6.957497 18041 +ever_chang 1 1 6.957497 6.957497 18042 +griffeath 1 1 6.957497 6.957497 18043 +sprimordi 1 1 6.957497 6.957497 18044 +seeodd 1 1 6.957497 6.957497 18045 +pinku 1 1 6.957497 6.957497 18023 +techunix 1 1 6.957497 6.957497 18046 +nevaiif 1 1 6.957497 6.957497 18047 +makehi 1 1 6.957497 6.957497 18048 +outputavail 1 1 6.957497 6.957497 18049 +taki 1 1 6.957497 6.957497 18050 +souganid 1 1 6.957497 6.957497 18051 +andthaleia 1 1 6.957497 6.957497 18052 +zariphopoul 1 1 6.957497 6.957497 18053 +szego 1 1 6.957497 6.957497 18054 +bust 1 1 6.957497 6.957497 18055 +inscript 1 1 6.957497 6.957497 18056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..76181309 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +memori 1 101 2.302585 2.302585 139 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +help 1 83 2.484907 2.484907 175 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +librari 1 87 2.484907 2.484907 181 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +window 1 68 2.708050 2.708050 242 +differ 1 66 2.708050 2.708050 253 +descript 1 64 2.772589 2.772589 271 +januari 1 62 2.772589 2.772589 264 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +explor 1 58 2.890372 2.890372 324 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +februari 1 54 2.944439 2.944439 328 +date 1 51 2.995732 2.995732 344 +visual 1 48 3.044522 3.044522 372 +cool 1 49 3.044522 3.044522 374 +featur 1 46 3.091042 3.091042 386 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +map 1 39 3.258097 3.258097 452 +tree 1 36 3.367296 3.367296 492 +download 1 36 3.367296 3.367296 489 +michael 1 35 3.401197 3.401197 514 +next 1 34 3.401197 3.401197 517 +queri 1 33 3.433987 3.433987 524 +within 1 33 3.433987 3.433987 525 +platform 1 29 3.583519 3.583519 591 +releas 1 28 3.610918 3.610918 616 +repres 1 26 3.688879 3.688879 656 +compar 1 26 3.688879 3.688879 648 +handl 1 24 3.761200 3.761200 685 +input 1 23 3.806662 3.806662 727 +togeth 1 23 3.806662 3.806662 714 +famili 1 23 3.806662 3.806662 735 +sequenc 1 23 3.806662 3.806662 734 +variabl 1 23 3.806662 3.806662 715 +color 1 22 3.850148 3.850148 762 +flexibl 1 21 3.912023 3.912023 792 +viewer 1 21 3.912023 3.912023 787 +output 1 21 3.912023 3.912023 788 +chen 1 21 3.912023 3.912023 791 +comparison 1 19 4.007333 4.007333 863 +record 1 18 4.060443 4.060443 890 +appropri 1 18 4.060443 4.060443 883 +ramakrishnan 1 16 4.174387 4.174387 972 +ascii 1 15 4.248495 4.248495 1032 +biologi 1 15 4.248495 4.248495 1049 +livni 1 15 4.248495 4.248495 1053 +stream 1 15 4.248495 4.248495 1015 +save 1 14 4.317488 4.317488 1099 +miron 1 14 4.317488 4.317488 1110 +individu 1 13 4.382027 4.382027 1126 +shape 1 12 4.465908 4.465908 1245 +raghu 1 12 4.465908 4.465908 1212 +solari 1 12 4.465908 4.465908 1238 +distinguish 1 11 4.553877 4.553877 1357 +abil 1 11 4.553877 4.553877 1341 +string 1 11 4.553877 4.553877 1340 +devis 1 10 4.653960 4.653960 1451 +relationship 1 10 4.653960 4.653960 1383 +cheng 1 10 4.653960 4.653960 1381 +float 1 9 4.753590 4.753590 1504 +integ 1 8 4.875197 4.875197 1688 +inproceed 1 8 4.875197 4.875197 1670 +larger 1 7 5.010635 5.010635 1875 +dataset 1 7 5.010635 5.010635 1914 +layout 1 6 5.164786 5.164786 2183 +quick 1 6 5.164786 5.164786 2184 +spie 1 6 5.164786 5.164786 2119 +cell 1 5 5.347108 5.347108 2274 +medicin 1 5 5.347108 5.347108 2448 +complementari 1 5 5.347108 5.347108 2523 +kent 1 4 5.568345 5.568345 2744 +asid 1 3 5.857933 5.857933 3770 +myllymaki 1 3 5.857933 5.857933 4022 +comad 1 3 5.857933 5.857933 3737 +wenger 1 3 5.857933 5.857933 4023 +oneset 1 2 6.263398 6.263398 6134 +viewsof 1 2 6.263398 6.263398 6135 +birch 1 2 6.263398 6.263398 6136 +jussi 1 2 6.263398 6.263398 6133 +andanalysi 1 2 6.263398 6.263398 4271 +workth 1 2 6.263398 6.263398 6137 +guangshun 1 2 6.263398 6.263398 6138 +hotlin 1 2 6.263398 6.263398 5967 +pagedevis 1 1 6.957497 6.957497 18057 +visualizationt 1 1 6.957497 6.957497 18058 +featuresexamplesin 1 1 6.957497 6.957497 18059 +depthpublicationsrel 1 1 6.957497 6.957497 18060 +workreleasecontactsfeaturesthes 1 1 6.957497 6.957497 18061 +cancontrol 1 1 6.957497 6.957497 18062 +ax 1 1 6.957497 6.957497 18063 +cursor 1 1 6.957497 6.957497 18064 +examplescheck 1 1 6.957497 6.957497 18065 +validationmolecular 1 1 6.957497 6.957497 18066 +soil 1 1 6.957497 6.957497 18067 +clusteringfinanci 1 1 6.957497 6.957497 18068 +explorationfamili 1 1 6.957497 6.957497 18069 +climatedata 1 1 6.957497 6.957497 18070 +centergeograph 1 1 6.957497 6.957497 18071 +systemsoil 1 1 6.957497 6.957497 18072 +sciencefil 1 1 6.957497 6.957497 18073 +serverprogram 1 1 6.957497 6.957497 18074 +tracesclin 1 1 6.957497 6.957497 18075 +moreexampl 1 1 6.957497 6.957497 18076 +depthfor 1 1 6.957497 6.957497 18077 +visualizationvisu 1 1 6.957497 6.957497 18078 +interfaceperform 1 1 6.957497 6.957497 18079 +issuespublicationsmiron 1 1 6.957497 6.957497 18080 +dataexplor 1 1 6.957497 6.957497 18081 +praveenseshadri 1 1 6.957497 6.957497 18082 +sequencequeri 1 1 6.957497 6.957497 18083 +themanag 1 1 6.957497 6.957497 18084 +seqproject 1 1 6.957497 6.957497 18085 +queryrecord 1 1 6.957497 6.957497 18086 +bevisu 1 1 6.957497 6.957497 18087 +informationw 1 1 6.957497 6.957497 18088 +executablesfor 1 1 6.957497 6.957497 18089 +ld_library_path 1 1 6.957497 6.957497 18090 +rundevis 1 1 6.957497 6.957497 18091 +arestat 1 1 6.957497 6.957497 18092 +shareabl 1 1 6.957497 6.957497 18093 +contactsfor 1 1 6.957497 6.957497 18094 +contactmiron 1 1 6.957497 6.957497 18095 +usersupport 1 1 6.957497 6.957497 18096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..56ee6864 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +wide 1 84 2.484907 2.484907 185 +environ 1 84 2.484907 2.484907 177 +larg 1 82 2.484907 2.484907 168 +orient 1 80 2.564949 2.564949 205 +interfac 1 79 2.564949 2.564949 209 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +summari 1 73 2.639057 2.639057 237 +name 1 72 2.639057 2.639057 220 +appli 1 71 2.639057 2.639057 226 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +evalu 1 64 2.772589 2.772589 266 +complex 1 64 2.772589 2.772589 269 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +unix 1 58 2.890372 2.890372 308 +space 1 57 2.890372 2.890372 310 +sampl 1 53 2.944439 2.944439 339 +talk 1 53 2.944439 2.944439 336 +hardwar 1 51 2.995732 2.995732 350 +telephon 1 50 3.044522 3.044522 373 +basic 1 50 3.044522 3.044522 360 +without 1 50 3.044522 3.044522 370 +set 1 50 3.044522 3.044522 361 +anoth 1 45 3.135494 3.135494 408 +execut 1 45 3.135494 3.135494 404 +multipl 1 39 3.258097 3.258097 453 +must 1 40 3.258097 3.258097 442 +error 1 40 3.258097 3.258097 449 +field 1 37 3.332205 3.332205 482 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +either 1 35 3.401197 3.401197 506 +singl 1 34 3.401197 3.401197 510 +storag 1 31 3.496508 3.496508 553 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +rang 1 30 3.555348 3.555348 565 +platform 1 29 3.583519 3.583519 591 +intend 1 28 3.610918 3.610918 599 +becom 1 28 3.610918 3.610918 603 +scale 1 28 3.610918 3.610918 613 +multiprocessor 1 28 3.610918 3.610918 605 +cluster 1 28 3.610918 3.610918 612 +manipul 1 27 3.637586 3.637586 624 +wai 1 25 3.737670 3.737670 662 +client 1 25 3.737670 3.737670 679 +fellow 1 24 3.761200 3.761200 701 +store 1 24 3.761200 3.761200 693 +serv 1 22 3.850148 3.850148 758 +varieti 1 22 3.850148 3.850148 740 +instead 1 22 3.850148 3.850148 756 +programminglanguag 1 21 3.912023 3.912023 782 +fund 1 21 3.912023 3.912023 805 +busi 1 21 3.912023 3.912023 784 +excel 1 19 4.007333 4.007333 868 +benchmark 1 19 4.007333 4.007333 859 +sigmod 1 19 4.007333 4.007333 877 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +modif 1 17 4.110874 4.110874 913 +attempt 1 17 4.110874 4.110874 917 +white 1 17 4.110874 4.110874 951 +intel 1 16 4.174387 4.174387 1000 +match 1 16 4.174387 4.174387 965 +transit 1 15 4.248495 4.248495 1046 +capabl 1 15 4.248495 4.248495 1016 +heterogen 1 14 4.317488 4.317488 1090 +signific 1 13 4.382027 4.382027 1125 +dewitt 1 12 4.465908 4.465908 1270 +target 1 12 4.465908 4.465908 1282 +fromindividu 1 12 4.465908 4.465908 1290 +emploi 1 12 4.465908 4.465908 1284 +michigan 1 11 4.553877 4.553877 1368 +shore 1 11 4.553877 4.553877 1377 +persist 1 11 4.553877 4.553877 1367 +arpa 1 11 4.553877 4.553877 1369 +naughton 1 10 4.653960 4.653960 1450 +facilit 1 10 4.653960 4.653960 1412 +franklin 1 10 4.653960 4.653960 1436 +vldb 1 10 4.653960 4.653960 1470 +invit 1 10 4.653960 4.653960 1428 +conferenceon 1 9 4.753590 4.753590 1595 +paradis 1 8 4.875197 4.875197 1782 +carei 1 8 4.875197 4.875197 1781 +solomon 1 8 4.875197 4.875197 1716 +databasesystem 1 8 4.875197 4.875197 1617 +hold 1 8 4.875197 4.875197 1645 +poor 1 8 4.875197 4.875197 1736 +polygon 1 8 4.875197 4.875197 1723 +sparc 1 7 5.010635 5.010635 1860 +geograph 1 6 5.164786 5.164786 2236 +patel 1 6 5.164786 5.164786 2154 +pub 1 6 5.164786 5.164786 2239 +compat 1 5 5.347108 5.347108 2485 +tsatalo 1 5 5.347108 5.347108 2581 +minneapoli 1 5 5.347108 5.347108 2480 +proceedingsof 1 5 5.347108 5.347108 2331 +satellit 1 4 5.568345 5.568345 3077 +exodu 1 4 5.568345 5.568345 3075 +mcauliff 1 4 5.568345 5.568345 3083 +zwill 1 4 5.568345 5.568345 3076 +chile 1 4 5.568345 5.568345 3082 +orth 1 3 5.857933 5.857933 3685 +paragon 1 3 5.857933 5.857933 3359 +gamma 1 3 5.857933 5.857933 3219 +summit 1 3 5.857933 5.857933 3684 +developeda 1 2 6.263398 6.263398 5205 +polylin 1 2 6.263398 6.263398 6079 +projecti 1 2 6.263398 6.263398 5963 +kabra 1 2 6.263398 6.263398 6139 +romn 1 1 6.957497 6.957497 18097 +databasebenchmark 1 1 6.957497 6.957497 18098 +objectiveof 1 1 6.957497 6.957497 18099 +objectsystem 1 1 6.957497 6.957497 18100 +applicationsinclud 1 1 6.957497 6.957497 18101 +capabilitiesof 1 1 6.957497 6.957497 18102 +typedobject 1 1 6.957497 6.957497 18103 +hierarchicalnam 1 1 6.957497 6.957497 18104 +interfaceto 1 1 6.957497 6.957497 18105 +toeas 1 1 6.957497 6.957497 18106 +systemenviron 1 1 6.957497 6.957497 18107 +ccwill 1 1 6.957497 6.957497 18108 +networksto 1 1 6.957497 6.957497 18109 +ajoint 1 1 6.957497 6.957497 18110 +relationaldatabas 1 1 6.957497 6.957497 18111 +thetask 1 1 6.957497 6.957497 18112 +formanag 1 1 6.957497 6.957497 18113 +modelingne 1 1 6.957497 6.957497 18114 +manipulatingmuch 1 1 6.957497 6.957497 18115 +muchbett 1 1 6.957497 6.957497 18116 +differencefrom 1 1 6.957497 6.957497 18117 +parallelismto 1 1 6.957497 6.957497 18118 +assatellit 1 1 6.957497 6.957497 18119 +withm 1 1 6.957497 6.957497 18120 +persistentappl 1 1 6.957497 6.957497 18121 +chuh 1 1 6.957497 6.957497 18122 +santiego 1 1 6.957497 6.957497 18123 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..07a7fffc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +west 1 83 2.484907 2.484907 192 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +chang 1 82 2.484907 2.484907 163 +ieee 1 86 2.484907 2.484907 190 +appear 1 78 2.564949 2.564949 210 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +refer 1 78 2.564949 2.564949 203 +complet 1 77 2.564949 2.564949 208 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +view 1 70 2.708050 2.708050 254 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +order 1 69 2.708050 2.708050 249 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +artifici 1 63 2.772589 2.772589 280 +guid 1 63 2.772589 2.772589 267 +abstract 1 62 2.772589 2.772589 276 +descript 1 64 2.772589 2.772589 271 +simpl 1 60 2.833213 2.833213 298 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +finger 1 52 2.995732 2.995732 354 +investig 1 51 2.995732 2.995732 353 +basic 1 50 3.044522 3.044522 360 +visual 1 48 3.044522 3.044522 372 +without 1 50 3.044522 3.044522 370 +approach 1 48 3.044522 3.044522 366 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +understand 1 47 3.091042 3.091042 384 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +around 1 43 3.178054 3.178054 415 +vision 1 41 3.218876 3.218876 430 +combin 1 42 3.218876 3.218876 421 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +autom 1 41 3.218876 3.218876 434 +continu 1 39 3.258097 3.258097 448 +map 1 39 3.258097 3.258097 452 +societi 1 40 3.258097 3.258097 456 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +close 1 38 3.295837 3.295837 465 +correct 1 38 3.295837 3.295837 462 +paul 1 38 3.295837 3.295837 471 +brian 1 38 3.295837 3.295837 466 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +purpos 1 37 3.332205 3.332205 481 +procedur 1 36 3.367296 3.367296 488 +especi 1 36 3.367296 3.367296 496 +robot 1 36 3.367296 3.367296 497 +represent 1 35 3.401197 3.401197 512 +either 1 35 3.401197 3.401197 506 +global 1 34 3.401197 3.401197 520 +curriculum 1 33 3.433987 3.433987 535 +taught 1 33 3.433987 3.433987 526 +rang 1 30 3.555348 3.555348 565 +power 1 30 3.555348 3.555348 573 +chair 1 29 3.583519 3.583519 596 +steve 1 29 3.583519 3.583519 594 +progress 1 28 3.610918 3.610918 598 +measur 1 28 3.610918 3.610918 609 +proc 1 26 3.688879 3.688879 649 +detect 1 26 3.688879 3.688879 646 +bookmark 1 26 3.688879 3.688879 639 +task 1 25 3.737670 3.737670 678 +accur 1 25 3.737670 3.737670 680 +trace 1 25 3.737670 3.737670 677 +toward 1 25 3.737670 3.737670 668 +strategi 1 25 3.737670 3.737670 682 +motion 1 24 3.761200 3.761200 699 +pattern 1 24 3.761200 3.761200 689 +flow 1 24 3.761200 3.761200 700 +input 1 23 3.806662 3.806662 727 +mobil 1 23 3.806662 3.806662 730 +displai 1 23 3.806662 3.806662 712 +recognit 1 23 3.806662 3.806662 723 +sequenc 1 23 3.806662 3.806662 734 +defin 1 22 3.850148 3.850148 746 +geometri 1 22 3.850148 3.850148 752 +period 1 22 3.850148 3.850148 743 +output 1 21 3.912023 3.912023 788 +navig 1 21 3.912023 3.912023 796 +path 1 21 3.912023 3.912023 778 +viewer 1 21 3.912023 3.912023 787 +synthesi 1 20 3.951244 3.951244 834 +basi 1 20 3.951244 3.951244 828 +boston 1 19 4.007333 4.007333 862 +separ 1 19 4.007333 4.007333 844 +dimension 1 18 4.060443 4.060443 909 +behavior 1 18 4.060443 4.060443 881 +four 1 18 4.060443 4.060443 905 +differenti 1 17 4.110874 4.110874 921 +coupl 1 17 4.110874 4.110874 939 +precis 1 15 4.248495 4.248495 1023 +capabl 1 15 4.248495 4.248495 1016 +track 1 15 4.248495 4.248495 1029 +camera 1 14 4.317488 4.317488 1115 +scene 1 14 4.317488 4.317488 1114 +charl 1 13 4.382027 4.382027 1149 +step 1 13 4.382027 4.382027 1138 +coordin 1 13 4.382027 4.382027 1182 +believ 1 13 4.382027 4.382027 1187 +conf 1 13 4.382027 4.382027 1181 +shape 1 12 4.465908 4.465908 1245 +fix 1 11 4.553877 4.553877 1327 +valid 1 11 4.553877 4.553877 1299 +bill 1 11 4.553877 4.553877 1297 +edutelephon 1 10 4.653960 4.653960 1473 +custom 1 10 4.653960 4.653960 1414 +correspond 1 10 4.653960 4.653960 1382 +acquisit 1 10 4.653960 4.653960 1465 +earth 1 10 4.653960 4.653960 1463 +dyer 1 9 4.753590 4.753590 1573 +leader 1 9 4.753590 4.753590 1576 +intermedi 1 9 4.753590 4.753590 1497 +surfac 1 9 4.753590 4.753590 1574 +observ 1 9 4.753590 4.753590 1578 +recoveri 1 9 4.753590 4.753590 1474 +morgan 1 9 4.753590 4.753590 1484 +autonom 1 8 4.875197 4.875197 1749 +invari 1 8 4.875197 4.875197 1748 +siggraph 1 8 4.875197 4.875197 1773 +irregular 1 8 4.875197 4.875197 1768 +curv 1 8 4.875197 4.875197 1656 +edg 1 8 4.875197 4.875197 1647 +textur 1 8 4.875197 4.875197 1677 +morph 1 7 5.010635 5.010635 1937 +interpol 1 7 5.010635 5.010635 1823 +paramet 1 7 5.010635 5.010635 1796 +smooth 1 7 5.010635 5.010635 1855 +seitz 1 7 5.010635 5.010635 1976 +dimens 1 7 5.010635 5.010635 1930 +stereo 1 7 5.010635 5.010635 1818 +maryland 1 6 5.164786 5.164786 2140 +viewpoint 1 6 5.164786 5.164786 2116 +recov 1 6 5.164786 5.164786 2235 +reconstruct 1 6 5.164786 5.164786 2170 +kluwer 1 6 5.164786 5.164786 2143 +bestor 1 6 5.164786 5.164786 2099 +unknown 1 5 5.347108 5.347108 2318 +recogn 1 5 5.347108 5.347108 2302 +provabl 1 5 5.347108 5.347108 2558 +revolut 1 5 5.347108 5.347108 2315 +cyclic 1 5 5.347108 5.347108 2383 +jain 1 5 5.347108 5.347108 2332 +adjust 1 5 5.347108 5.347108 2422 +affin 1 5 5.347108 5.347108 2378 +rigid 1 5 5.347108 5.347108 2432 +gareth 1 5 5.347108 5.347108 2392 +connectionist 1 5 5.347108 5.347108 2430 +bradlei 1 5 5.347108 5.347108 2554 +usa 1 4 5.568345 5.568345 3080 +cvpr 1 4 5.568345 5.568345 2761 +theus 1 4 5.568345 5.568345 2992 +simplifi 1 4 5.568345 5.568345 3066 +satellit 1 4 5.568345 5.568345 3077 +shah 1 4 5.568345 5.568345 2814 +contour 1 4 5.568345 5.568345 2812 +lumelski 1 4 5.568345 5.568345 2837 +harri 1 4 5.568345 5.568345 3034 +asid 1 3 5.857933 5.857933 3770 +stationari 1 3 5.857933 5.857933 3861 +alamito 1 3 5.857933 5.857933 3558 +lattic 1 3 5.857933 5.857933 3721 +groupcours 1 3 5.857933 5.857933 3092 +macc 1 3 5.857933 5.857933 3414 +acquir 1 2 6.263398 6.263398 5557 +panoram 1 2 6.263398 6.263398 4755 +howto 1 2 6.263398 6.263398 5761 +arbitrarili 1 2 6.263398 6.263398 5791 +discrimin 1 2 6.263398 6.263398 6140 +festschrift 1 2 6.263398 6.263398 6141 +rosenfeld 1 2 6.263398 6.263398 4495 +kutulako 1 2 6.263398 6.263398 6064 +articul 1 2 6.263398 6.263398 5799 +hibbard 1 2 6.263398 6.263398 6066 +kyro 1 2 6.263398 6.263398 6063 +rochest 1 2 6.263398 6.263398 6142 +stewart 1 2 6.263398 6.263398 5739 +pagecharl 1 1 6.957497 6.957497 18127 +dyerprofessordepart 1 1 6.957497 6.957497 18128 +infoph 1 1 6.957497 6.957497 18129 +visualizationgroup 1 1 6.957497 6.957497 18130 +groupprogram 1 1 6.957497 6.957497 18131 +synthesisth 1 1 6.957497 6.957497 18132 +controllingin 1 1 6.957497 6.957497 18133 +cameraof 1 1 6.957497 6.957497 18134 +videostream 1 1 6.957497 6.957497 18135 +whicha 1 1 6.957497 6.957497 18136 +througha 1 1 6.957497 6.957497 18137 +thesit 1 1 6.957497 6.957497 18138 +predetermin 1 1 6.957497 6.957497 18139 +researchquest 1 1 6.957497 6.957497 18140 +synthesizenew 1 1 6.957497 6.957497 18141 +reconstructiona 1 1 6.957497 6.957497 18142 +innovativetechniqu 1 1 6.957497 6.957497 18143 +callview 1 1 6.957497 6.957497 18144 +basisimag 1 1 6.957497 6.957497 18145 +explorationcomput 1 1 6.957497 6.957497 18146 +controllingcamera 1 1 6.957497 6.957497 18147 +purposefulli 1 1 6.957497 6.957497 18148 +theposit 1 1 6.957497 6.957497 18149 +adjustviewpoint 1 1 6.957497 6.957497 18150 +forsolv 1 1 6.957497 6.957497 18151 +findspecif 1 1 6.957497 6.957497 18152 +unknownshap 1 1 6.957497 6.957497 18153 +appearanceof 1 1 6.957497 6.957497 18154 +computationsrequir 1 1 6.957497 6.957497 18155 +andelimin 1 1 6.957497 6.957497 18156 +thecamera 1 1 6.957497 6.957497 18157 +towardsviewpoint 1 1 6.957497 6.957497 18158 +viewedobject 1 1 6.957497 6.957497 18159 +thisapproach 1 1 6.957497 6.957497 18160 +visualizationin 1 1 6.957497 6.957497 18161 +techniquescap 1 1 6.957497 6.957497 18162 +specificgraph 1 1 6.957497 6.957497 18163 +displayingarbitrari 1 1 6.957497 6.957497 18164 +commonfram 1 1 6.957497 6.957497 18165 +algorithmexecut 1 1 6.957497 6.957497 18166 +dataanalysi 1 1 6.957497 6.957497 18167 +forexperi 1 1 6.957497 6.957497 18168 +visualizingintermedi 1 1 6.957497 6.957497 18169 +forproblem 1 1 6.957497 6.957497 18170 +cloud 1 1 6.957497 6.957497 18171 +azriel 1 1 6.957497 6.957497 18172 +seal 1 1 6.957497 6.957497 18124 +occlud 1 1 6.957497 6.957497 18173 +battaiola 1 1 6.957497 6.957497 18174 +santek 1 1 6.957497 6.957497 18175 +voidrot 1 1 6.957497 6.957497 18176 +martinez 1 1 6.957497 6.957497 18177 +liangyin 1 1 6.957497 6.957497 18178 +yuph 1 1 6.957497 6.957497 18179 +whibbard 1 1 6.957497 6.957497 18180 +onlattic 1 1 6.957497 6.957497 18181 +kiriako 1 1 6.957497 6.957497 18182 +ofobserv 1 1 6.957497 6.957497 18183 +allmen 1 1 6.957497 6.957497 18125 +iutech 1 1 6.957497 6.957497 18184 +spatiotempor 1 1 6.957497 6.957497 18185 +brent 1 1 6.957497 6.957497 18186 +dimensionalshap 1 1 6.957497 6.957497 18187 +plantinga 1 1 6.957497 6.957497 18188 +wheaton 1 1 6.957497 6.957497 18189 +representationfor 1 1 6.957497 6.957497 18190 +kjell 1 1 6.957497 6.957497 18126 +ccsua 1 1 6.957497 6.957497 18191 +ctstateu 1 1 6.957497 6.957497 18192 +measureslink 1 1 6.957497 6.957497 18193 +interestmi 1 1 6.957497 6.957497 18194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..f6a258da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +class 1 199 1.609438 1.609438 37 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +teach 1 108 2.197225 2.197225 112 +section 1 94 2.397895 2.397895 149 +educ 1 86 2.484907 2.484907 191 +friend 1 48 3.044522 3.044522 376 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +curriculum 1 33 3.433987 3.433987 535 +task 1 25 3.737670 3.737670 678 +bring 1 10 4.653960 4.653960 1430 +appreci 1 5 5.347108 5.347108 2374 +patienc 1 2 6.263398 6.263398 5466 +machinew 1 1 6.957497 6.957497 18195 +arduou 1 1 6.957497 6.957497 18196 +vitaecheck 1 1 6.957497 6.957497 18197 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..ee7c4d13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,166 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +problem 1 147 1.945910 1.945910 75 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +dayton 1 119 2.079442 2.079442 104 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +intern 1 108 2.197225 2.197225 128 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +assist 1 112 2.197225 2.197225 113 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +sinc 1 90 2.397895 2.397895 159 +homepag 1 93 2.397895 2.397895 148 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +requir 1 81 2.484907 2.484907 167 +want 1 79 2.564949 2.564949 199 +come 1 78 2.564949 2.564949 202 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +servic 1 72 2.639057 2.639057 236 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +laboratori 1 63 2.772589 2.772589 292 +creat 1 63 2.772589 2.772589 277 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +instruct 1 53 2.944439 2.944439 332 +talk 1 53 2.944439 2.944439 336 +digit 1 52 2.995732 2.995732 348 +date 1 51 2.995732 2.995732 344 +cool 1 49 3.044522 3.044522 374 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +could 1 46 3.091042 3.091042 383 +even 1 45 3.135494 3.135494 393 +video 1 44 3.135494 3.135494 405 +answer 1 45 3.135494 3.135494 391 +discuss 1 45 3.135494 3.135494 399 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +societi 1 40 3.258097 3.258097 456 +purpos 1 37 3.332205 3.332205 481 +curriculum 1 33 3.433987 3.433987 535 +anim 1 31 3.496508 3.496508 557 +domain 1 30 3.555348 3.555348 564 +secur 1 30 3.555348 3.555348 577 +full 1 28 3.610918 3.610918 615 +never 1 25 3.737670 3.737670 671 +spent 1 25 3.737670 3.737670 676 +magazin 1 24 3.761200 3.761200 704 +serv 1 22 3.850148 3.850148 758 +half 1 21 3.912023 3.912023 776 +among 1 21 3.912023 3.912023 781 +wonder 1 20 3.951244 3.951244 815 +eric 1 19 4.007333 4.007333 870 +anderson 1 19 4.007333 4.007333 860 +offici 1 18 4.060443 4.060443 894 +edulast 1 17 4.110874 4.110874 927 +explan 1 16 4.174387 4.174387 985 +susan 1 15 4.248495 4.248495 1050 +biologi 1 15 4.248495 4.248495 1049 +shown 1 14 4.317488 4.317488 1080 +wait 1 13 4.382027 4.382027 1168 +philosophi 1 13 4.382027 4.382027 1167 +neat 1 12 4.465908 4.465908 1263 +entertain 1 12 4.465908 4.465908 1286 +holidai 1 12 4.465908 4.465908 1224 +see 1 11 4.553877 4.553877 1337 +fix 1 11 4.553877 4.553877 1327 +leader 1 9 4.753590 4.753590 1576 +mainten 1 9 4.753590 4.753590 1543 +told 1 8 4.875197 4.875197 1658 +scout 1 7 5.010635 5.010635 1903 +monei 1 7 5.010635 5.010635 1934 +explain 1 7 5.010635 5.010635 1816 +molecular 1 7 5.010635 5.010635 1887 +philosoph 1 7 5.010635 5.010635 1904 +meant 1 6 5.164786 5.164786 2055 +lucki 1 6 5.164786 5.164786 2163 +registr 1 5 5.347108 5.347108 2249 +mac 1 5 5.347108 5.347108 2292 +commod 1 5 5.347108 5.347108 2415 +girlfriend 1 5 5.347108 5.347108 2579 +billi 1 5 5.347108 5.347108 2404 +couldn 1 4 5.568345 5.568345 2977 +green 1 4 5.568345 5.568345 2848 +pageer 1 3 5.857933 5.857933 3776 +ofwisconsin 1 3 5.857933 5.857933 4002 +pete 1 3 5.857933 5.857933 3865 +specialist 1 3 5.857933 5.857933 3319 +lauri 1 3 5.857933 5.857933 3867 +wit 1 3 5.857933 5.857933 4005 +popul 1 3 5.857933 5.857933 3235 +facstaff 1 3 5.857933 5.857933 3433 +hazen 1 2 6.263398 6.263398 6143 +calcari 1 2 6.263398 6.263398 6144 +devri 1 2 6.263398 6.263398 6145 +broken 1 2 6.263398 6.263398 5074 +mice 1 2 6.263398 6.263398 5069 +shameless 1 2 6.263398 6.263398 6146 +salon 1 2 6.263398 6.263398 5827 +nixon 1 2 6.263398 6.263398 5868 +hazennon 1 1 6.957497 6.957497 18198 +professorroom 1 1 6.957497 6.957497 18199 +fornet 1 1 6.957497 6.957497 18200 +elegantli 1 1 6.957497 6.957497 18201 +fearless 1 1 6.957497 6.957497 18202 +withtech 1 1 6.957497 6.957497 18203 +capitalist 1 1 6.957497 6.957497 18204 +pragmatist 1 1 6.957497 6.957497 18205 +metaphys 1 1 6.957497 6.957497 18206 +makethi 1 1 6.957497 6.957497 18207 +drosophila 1 1 6.957497 6.957497 18208 +geneticist 1 1 6.957497 6.957497 18209 +ezin 1 1 6.957497 6.957497 18210 +shockwav 1 1 6.957497 6.957497 18211 +kudon 1 1 6.957497 6.957497 18212 +quicktimevr 1 1 6.957497 6.957497 18213 +documentari 1 1 6.957497 6.957497 18214 +plight 1 1 6.957497 6.957497 18215 +bosnia 1 1 6.957497 6.957497 18216 +uproot 1 1 6.957497 6.957497 18217 +preslei 1 1 6.957497 6.957497 18218 +meetingsejhazen 1 1 6.957497 6.957497 18219 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..fafc921f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +illinoi 1 7 5.010635 5.010635 1941 +tina 1 3 5.857933 5.857933 3744 +urbana 1 3 5.857933 5.857933 3879 +eliassi 1 2 6.263398 6.263398 6147 +champaign 1 2 6.263398 6.263398 5671 +pagetina 1 1 6.957497 6.957497 18220 +bldgphone 1 1 6.957497 6.957497 18221 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..0348a464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html @@ -0,0 +1 @@ +term, tf, in documents count, idf, tfidf, wordid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..4bd7426e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +address 1 170 1.791759 1.791759 62 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +topic 1 114 2.197225 2.197225 110 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +west 1 83 2.484907 2.484907 192 +level 1 87 2.484907 2.484907 180 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +april 1 77 2.564949 2.564949 196 +resum 1 79 2.564949 2.564949 217 +symposium 1 72 2.639057 2.639057 238 +street 1 63 2.772589 2.772589 293 +instruct 1 53 2.944439 2.944439 332 +advisor 1 51 2.995732 2.995732 355 +approach 1 48 3.044522 3.044522 366 +anoth 1 45 3.135494 3.135494 408 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +annual 1 40 3.258097 3.258097 458 +electr 1 38 3.295837 3.295837 461 +jame 1 35 3.401197 3.401197 507 +steve 1 29 3.583519 3.583519 594 +trace 1 25 3.737670 3.737670 677 +smith 1 20 3.951244 3.951244 820 +eric 1 19 4.007333 4.007333 870 +predict 1 19 4.007333 4.007333 855 +latenc 1 16 4.174387 4.174387 993 +condit 1 16 4.174387 4.174387 975 +drive 1 15 4.248495 4.248495 1052 +johnson 1 13 4.382027 4.382027 1162 +bandwidth 1 11 4.553877 4.553877 1365 +branch 1 11 4.553877 4.553877 1318 +multiscalar 1 8 4.875197 4.875197 1783 +erik 1 8 4.875197 4.875197 1701 +microarchitectur 1 6 5.164786 5.164786 2238 +fetch 1 5 5.347108 5.347108 2567 +kestrel 1 4 5.568345 5.568345 2990 +cold 1 3 5.857933 5.857933 3637 +confid 1 3 5.857933 5.857933 3691 +bennett 1 3 5.857933 5.857933 4024 +passsth 1 1 6.957497 6.957497 18223 +budweisth 1 1 6.957497 6.957497 18224 +ericro 1 1 6.957497 6.957497 18225 +smithresearch 1 1 6.957497 6.957497 18226 +mispredict 1 1 6.957497 6.957497 18227 +tolerancepubl 1 1 6.957497 6.957497 18228 +rotenberg 1 1 6.957497 6.957497 18222 +jacobsen 1 1 6.957497 6.957497 18229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..b2c739b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +phone 1 175 1.791759 1.791759 45 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +west 1 83 2.484907 2.484907 192 +school 1 84 2.484907 2.484907 188 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +would 1 67 2.708050 2.708050 251 +evalu 1 64 2.772589 2.772589 266 +streetmadison 1 38 3.295837 3.295837 474 +electr 1 38 3.295837 3.295837 461 +sciencesunivers 1 37 3.332205 3.332205 486 +next 1 34 3.401197 3.401197 517 +idea 1 32 3.465736 3.465736 545 +measur 1 28 3.610918 3.610918 609 +american 1 27 3.637586 3.637586 634 +rather 1 26 3.688879 3.688879 642 +miscellan 1 23 3.806662 3.806662 731 +listen 1 18 4.060443 4.060443 907 +drink 1 9 4.753590 4.753590 1607 +french 1 9 4.753590 4.753590 1511 +assistantdepart 1 8 4.875197 4.875197 1784 +fail 1 8 4.875197 4.875197 1655 +convers 1 8 4.875197 4.875197 1673 +architect 1 8 4.875197 4.875197 1624 +partner 1 8 4.875197 4.875197 1648 +hack 1 7 5.010635 5.010635 1950 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +suni 1 5 5.347108 5.347108 2452 +queen 1 4 5.568345 5.568345 2919 +usatel 1 2 6.263398 6.263398 6111 +buffalo 1 2 6.263398 6.263398 4947 +shubu 1 2 6.263398 6.263398 6148 +crime 1 2 6.263398 6.263398 5972 +mentorcultresearch 1 1 6.957497 6.957497 18230 +modelseduc 1 1 6.957497 6.957497 18231 +morf 1 1 6.957497 6.957497 18232 +dionosi 1 1 6.957497 6.957497 18233 +hillari 1 1 6.957497 6.957497 18234 +profan 1 1 6.957497 6.957497 18235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..e5daeaa9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +street 1 63 2.772589 2.772589 293 +januari 1 62 2.772589 2.772589 264 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +effect 1 46 3.091042 3.091042 385 +electron 1 47 3.091042 3.091042 379 +linear 1 41 3.218876 3.218876 431 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +purpos 1 37 3.332205 3.332205 481 +michael 1 35 3.401197 3.401197 514 +within 1 33 3.433987 3.433987 525 +graph 1 30 3.555348 3.555348 576 +consid 1 29 3.583519 3.583519 590 +scale 1 28 3.610918 3.610918 613 +framework 1 28 3.610918 3.610918 606 +determin 1 27 3.637586 3.637586 630 +relev 1 26 3.688879 3.688879 637 +emphasi 1 22 3.850148 3.850148 755 +properti 1 22 3.850148 3.850148 749 +identifi 1 22 3.850148 3.850148 760 +path 1 21 3.912023 3.912023 778 +exploit 1 20 3.951244 3.951244 836 +mostli 1 19 4.007333 4.007333 869 +cambridg 1 16 4.174387 4.174387 1008 +pagec 1 15 4.248495 4.248495 1011 +nonlinear 1 14 4.317488 4.317488 1107 +consider 1 14 4.317488 4.317488 1076 +econom 1 13 4.382027 4.382027 1184 +directli 1 13 4.382027 4.382027 1141 +robust 1 12 4.465908 4.465908 1271 +success 1 10 4.653960 4.653960 1390 +traffic 1 10 4.653960 4.653960 1421 +underli 1 10 4.653960 4.653960 1410 +ferri 1 8 4.875197 4.875197 1715 +pivot 1 5 5.347108 5.347108 2426 +chemic 1 5 5.347108 5.347108 2552 +condor 1 5 5.347108 5.347108 2577 +engineeringand 1 3 5.857933 5.857933 3779 +congest 1 3 5.857933 5.857933 3993 +complementar 1 3 5.857933 5.857933 3999 +followingtechniqu 1 2 6.263398 6.263398 5514 +equilibria 1 2 6.263398 6.263398 4760 +taxat 1 2 6.263398 6.263398 4524 +toll 1 2 6.263398 6.263398 6149 +arealso 1 2 6.263398 6.263398 5650 +beinginvestig 1 2 6.263398 6.263398 5745 +variationalinequ 1 1 6.957497 6.957497 18236 +toproblem 1 1 6.957497 6.957497 18237 +andinterfac 1 1 6.957497 6.957497 18238 +beingconsid 1 1 6.957497 6.957497 18239 +oncarbon 1 1 6.957497 6.957497 18240 +emiss 1 1 6.957497 6.957497 18241 +solvingproblem 1 1 6.957497 6.957497 18242 +partitioningtechniqu 1 1 6.957497 6.957497 18243 +forexploit 1 1 6.957497 6.957497 18244 +underlyingmodel 1 1 6.957497 6.957497 18245 +cpnet 1 1 6.957497 6.957497 18246 +prgram 1 1 6.957497 6.957497 18247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..4a6fecc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,255 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +dayton 1 119 2.079442 2.079442 104 +welcom 1 122 2.079442 2.079442 99 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +send 1 114 2.197225 2.197225 109 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +comment 1 93 2.397895 2.397895 146 +present 1 91 2.397895 2.397895 145 +pictur 1 89 2.397895 2.397895 160 +homepag 1 93 2.397895 2.397895 148 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +librari 1 87 2.484907 2.484907 181 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +david 1 71 2.639057 2.639057 232 +free 1 73 2.639057 2.639057 224 +intellig 1 72 2.639057 2.639057 225 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +degre 1 69 2.708050 2.708050 259 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +receiv 1 66 2.708050 2.708050 244 +knowledg 1 67 2.708050 2.708050 243 +street 1 63 2.772589 2.772589 293 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +function 1 62 2.772589 2.772589 275 +visit 1 63 2.772589 2.772589 288 +virtual 1 62 2.772589 2.772589 285 +plai 1 60 2.833213 2.833213 307 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +browser 1 56 2.890372 2.890372 313 +think 1 57 2.890372 2.890372 314 +allow 1 53 2.944439 2.944439 333 +advisor 1 51 2.995732 2.995732 355 +finger 1 52 2.995732 2.995732 354 +case 1 51 2.995732 2.995732 351 +investig 1 51 2.995732 2.995732 353 +date 1 51 2.995732 2.995732 344 +understand 1 47 3.091042 3.091042 384 +adapt 1 46 3.091042 3.091042 387 +featur 1 46 3.091042 3.091042 386 +math 1 44 3.135494 3.135494 402 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +netscap 1 44 3.135494 3.135494 395 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +term 1 43 3.178054 3.178054 411 +music 1 42 3.218876 3.218876 436 +combin 1 42 3.218876 3.218876 421 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +form 1 39 3.258097 3.258097 443 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +represent 1 35 3.401197 3.401197 512 +next 1 34 3.401197 3.401197 517 +word 1 34 3.401197 3.401197 508 +dissert 1 32 3.465736 3.465736 549 +kind 1 32 3.465736 3.465736 541 +express 1 32 3.465736 3.465736 540 +independ 1 32 3.465736 3.465736 548 +posit 1 31 3.496508 3.496508 552 +specifi 1 30 3.555348 3.555348 568 +platform 1 29 3.583519 3.583519 591 +measur 1 28 3.610918 3.610918 609 +hope 1 28 3.610918 3.610918 610 +actual 1 28 3.610918 3.610918 604 +releas 1 28 3.610918 3.610918 616 +relev 1 26 3.688879 3.688879 637 +bookmark 1 26 3.688879 3.688879 639 +wai 1 25 3.737670 3.737670 662 +task 1 25 3.737670 3.737670 678 +reach 1 24 3.761200 3.761200 688 +daili 1 24 3.761200 3.761200 706 +input 1 23 3.806662 3.806662 727 +head 1 23 3.806662 3.806662 732 +instead 1 22 3.850148 3.850148 756 +output 1 21 3.912023 3.912023 788 +tell 1 21 3.912023 3.912023 777 +grad 1 20 3.951244 3.951244 837 +exploit 1 20 3.951244 3.951244 836 +basi 1 20 3.951244 3.951244 828 +left 1 19 4.007333 4.007333 851 +feedback 1 19 4.007333 4.007333 854 +citi 1 19 4.007333 4.007333 874 +listen 1 18 4.060443 4.060443 907 +whether 1 17 4.110874 4.110874 918 +dilbert 1 16 4.174387 4.174387 996 +contribut 1 15 4.248495 4.248495 1021 +action 1 15 4.248495 4.248495 1038 +balanc 1 14 4.317488 4.317488 1112 +weak 1 13 4.382027 4.382027 1159 +hotlist 1 13 4.382027 4.382027 1199 +nasa 1 13 4.382027 4.382027 1188 +employ 1 12 4.465908 4.465908 1291 +minor 1 12 4.465908 4.465908 1237 +michigan 1 11 4.553877 4.553877 1368 +smart 1 11 4.553877 4.553877 1352 +abil 1 11 4.553877 4.553877 1341 +sens 1 11 4.553877 4.553877 1305 +bill 1 11 4.553877 4.553877 1297 +rapid 1 10 4.653960 4.653960 1453 +traffic 1 10 4.653960 4.653960 1421 +fellowship 1 10 4.653960 4.653960 1460 +true 1 10 4.653960 4.653960 1422 +volleybal 1 9 4.753590 4.753590 1598 +pair 1 9 4.753590 4.753590 1503 +editori 1 9 4.753590 4.753590 1611 +grew 1 8 4.875197 4.875197 1742 +gain 1 8 4.875197 4.875197 1730 +irregular 1 8 4.875197 4.875197 1768 +on 1 8 4.875197 4.875197 1628 +extract 1 8 4.875197 4.875197 1728 +opinion 1 8 4.875197 4.875197 1708 +star 1 8 4.875197 4.875197 1717 +tourist 1 8 4.875197 4.875197 1710 +earn 1 7 5.010635 5.010635 1788 +notion 1 7 5.010635 5.010635 1947 +piano 1 6 5.164786 5.164786 2201 +benefit 1 6 5.164786 5.164786 2213 +variant 1 6 5.164786 5.164786 2043 +gate 1 6 5.164786 5.164786 2182 +sponsor 1 6 5.164786 5.164786 2133 +grand 1 5 5.347108 5.347108 2425 +race 1 5 5.347108 5.347108 2417 +treat 1 5 5.347108 5.347108 2521 +focuss 1 5 5.347108 5.347108 2271 +blow 1 5 5.347108 5.347108 2407 +reinforc 1 4 5.568345 5.568345 2674 +thumb 1 4 5.568345 5.568345 2816 +sorri 1 4 5.568345 5.568345 3059 +trumpet 1 3 5.857933 5.857933 3946 +arm 1 3 5.857933 5.857933 3697 +neg 1 3 5.857933 5.857933 3451 +teacher 1 3 5.857933 5.857933 3892 +thesystem 1 3 5.857933 5.857933 3881 +interv 1 3 5.857933 5.857933 3253 +wit 1 3 5.857933 5.857933 4005 +trek 1 3 5.857933 5.857933 4025 +bibl 1 3 5.857933 5.857933 3143 +glenn 1 3 5.857933 5.857933 3869 +gould 1 3 5.857933 5.857933 3559 +scienceher 1 2 6.263398 6.263398 5912 +essenc 1 2 6.263398 6.263398 6150 +agood 1 2 6.263398 6.263398 5380 +pagefor 1 2 6.263398 6.263398 6151 +nextstep 1 2 6.263398 6.263398 6102 +foral 1 2 6.263398 6.263398 4290 +isthmu 1 2 6.263398 6.263398 6152 +pagesom 1 2 6.263398 6.263398 6109 +finton 1 1 6.957497 6.957497 18248 +nerdin 1 1 6.957497 6.957497 18250 +intelligenceher 1 1 6.957497 6.957497 18251 +softwarefor 1 1 6.957497 6.957497 18252 +trusti 1 1 6.957497 6.957497 18253 +nextstationor 1 1 6.957497 6.957497 18254 +enjoyplai 1 1 6.957497 6.957497 18255 +longhair 1 1 6.957497 6.957497 18256 +intervarsityfolk 1 1 6.957497 6.957497 18257 +supersoak 1 1 6.957497 6.957497 18258 +accountto 1 1 6.957497 6.957497 18259 +intelligenti 1 1 6.957497 6.957497 18260 +intelligencei 1 1 6.957497 6.957497 18261 +actappropri 1 1 6.957497 6.957497 18262 +todistinguish 1 1 6.957497 6.957497 18263 +orimport 1 1 6.957497 6.957497 18264 +basedfeatur 1 1 6.957497 6.957497 18265 +learningprocess 1 1 6.957497 6.957497 18266 +intelligentadapt 1 1 6.957497 6.957497 18267 +whichwil 1 1 6.957497 6.957497 18268 +hotlistthi 1 1 6.957497 6.957497 18269 +omniweb 1 1 6.957497 6.957497 18270 +eleg 1 1 6.957497 6.957497 18271 +omniwebi 1 1 6.957497 6.957497 18272 +openstep 1 1 6.957497 6.957497 18249 +responseto 1 1 6.957497 6.957497 18273 +jehovah 1 1 6.957497 6.957497 18274 +deiti 1 1 6.957497 6.957497 18275 +christwisconsin 1 1 6.957497 6.957497 18276 +intervars 1 1 6.957497 6.957497 18277 +weatherin 1 1 6.957497 6.957497 18278 +nebula 1 1 6.957497 6.957497 18279 +crosssearch 1 1 6.957497 6.957497 18280 +farsid 1 1 6.957497 6.957497 18281 +voyagerent 1 1 6.957497 6.957497 18282 +zoneroam 1 1 6.957497 6.957497 18283 +stereogram 1 1 6.957497 6.957497 18284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..facc34ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +teach 1 108 2.197225 2.197225 112 +code 1 108 2.197225 2.197225 116 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +specif 1 106 2.197225 2.197225 106 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +commun 1 95 2.397895 2.397895 157 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +environ 1 84 2.484907 2.484907 177 +novemb 1 81 2.484907 2.484907 179 +messag 1 76 2.564949 2.564949 212 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +know 1 80 2.564949 2.564949 198 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +complet 1 77 2.564949 2.564949 208 +decemb 1 80 2.564949 2.564949 215 +involv 1 71 2.639057 2.639057 227 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +logic 1 71 2.639057 2.639057 230 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +integr 1 67 2.708050 2.708050 245 +import 1 65 2.772589 2.772589 282 +januari 1 62 2.772589 2.772589 264 +experi 1 64 2.772589 2.772589 283 +evalu 1 64 2.772589 2.772589 266 +septemb 1 65 2.772589 2.772589 274 +best 1 59 2.833213 2.833213 299 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +telephon 1 50 3.044522 3.044522 373 +approach 1 48 3.044522 3.044522 366 +pointer 1 48 3.044522 3.044522 368 +principl 1 48 3.044522 3.044522 357 +right 1 48 3.044522 3.044522 363 +effect 1 46 3.091042 3.091042 385 +possibl 1 47 3.091042 3.091042 378 +anoth 1 45 3.135494 3.135494 408 +execut 1 45 3.135494 3.135494 404 +better 1 45 3.135494 3.135494 401 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +must 1 40 3.258097 3.258097 442 +error 1 40 3.258097 3.258097 449 +littl 1 39 3.258097 3.258097 454 +transact 1 39 3.258097 3.258097 438 +author 1 39 3.258097 3.258097 450 +correct 1 38 3.295837 3.295837 462 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +procedur 1 36 3.367296 3.367296 488 +especi 1 36 3.367296 3.367296 496 +soon 1 36 3.367296 3.367296 494 +short 1 36 3.367296 3.367296 499 +global 1 34 3.401197 3.401197 520 +concurr 1 34 3.401197 3.401197 501 +least 1 35 3.401197 3.401197 516 +john 1 33 3.433987 3.433987 532 +express 1 32 3.465736 3.465736 540 +extend 1 32 3.465736 3.465736 539 +richard 1 31 3.496508 3.496508 559 +focu 1 30 3.555348 3.555348 571 +graph 1 30 3.555348 3.555348 576 +common 1 30 3.555348 3.555348 574 +rang 1 30 3.555348 3.555348 565 +semant 1 29 3.583519 3.583519 587 +steve 1 29 3.583519 3.583519 594 +multiprocessor 1 28 3.610918 3.610918 605 +load 1 28 3.610918 3.610918 601 +framework 1 28 3.610918 3.610918 606 +arrai 1 27 3.637586 3.637586 627 +revis 1 26 3.688879 3.688879 640 +wai 1 25 3.737670 3.737670 662 +store 1 24 3.761200 3.761200 693 +flow 1 24 3.761200 3.761200 700 +color 1 22 3.850148 3.850148 762 +almost 1 22 3.850148 3.850148 742 +william 1 22 3.850148 3.850148 765 +avoid 1 21 3.912023 3.912023 799 +among 1 21 3.912023 3.912023 781 +programminglanguag 1 21 3.912023 3.912023 782 +exploit 1 20 3.951244 3.951244 836 +alloc 1 20 3.951244 3.951244 821 +supervis 1 20 3.951244 3.951244 840 +facil 1 20 3.951244 3.951244 814 +definit 1 19 4.007333 4.007333 864 +seem 1 18 4.060443 4.060443 899 +regist 1 17 4.110874 4.110874 938 +ultim 1 17 4.110874 4.110874 943 +analyz 1 17 4.110874 4.110874 925 +anyon 1 17 4.110874 4.110874 916 +monitor 1 17 4.110874 4.110874 941 +steven 1 17 4.110874 4.110874 953 +modern 1 16 4.174387 4.174387 966 +easi 1 16 4.174387 4.174387 969 +mayb 1 15 4.248495 4.248495 1014 +indic 1 15 4.248495 4.248495 1013 +driven 1 15 4.248495 4.248495 1048 +todd 1 15 4.248495 4.248495 1051 +polynomi 1 14 4.317488 4.317488 1069 +demand 1 14 4.317488 4.317488 1073 +split 1 14 4.317488 4.317488 1078 +attribut 1 14 4.317488 4.317488 1092 +charl 1 13 4.382027 4.382027 1149 +care 1 13 4.382027 4.382027 1177 +sigplan 1 13 4.382027 4.382027 1190 +context 1 13 4.382027 4.382027 1153 +johnson 1 13 4.382027 4.382027 1162 +bruce 1 12 4.465908 4.465908 1226 +benjamin 1 11 4.553877 4.553877 1296 +stephen 1 11 4.553877 4.553877 1342 +transpar 1 11 4.553877 4.553877 1325 +arithmet 1 10 4.653960 4.653960 1388 +routin 1 9 4.753590 4.753590 1549 +minimum 1 9 4.753590 4.753590 1555 +cum 1 8 4.875197 4.875197 1619 +sensit 1 8 4.875197 4.875197 1726 +fischer 1 7 5.010635 5.010635 1893 +pipelin 1 7 5.010635 5.010635 1830 +delai 1 7 5.010635 5.010635 1848 +appar 1 7 5.010635 5.010635 1958 +zero 1 7 5.010635 5.010635 1896 +bookstor 1 7 5.010635 5.010635 1837 +benefit 1 6 5.164786 5.164786 2213 +mistak 1 6 5.164786 5.164786 2110 +sigact 1 6 5.164786 5.164786 2212 +grammar 1 6 5.164786 5.164786 2058 +unnecessari 1 5 5.347108 5.347108 2506 +explicitli 1 5 5.347108 5.347108 2308 +quantifi 1 5 5.347108 5.347108 2525 +attract 1 5 5.347108 5.347108 2356 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +pars 1 5 5.347108 5.347108 2321 +vital 1 4 5.568345 5.568345 2733 +interprocedur 1 4 5.568345 5.568345 2771 +popl 1 4 5.568345 5.568345 3068 +gregori 1 4 5.568345 5.568345 2928 +teachingc 1 3 5.857933 5.857933 3614 +domin 1 3 5.857933 5.857933 3995 +likelihood 1 3 5.857933 5.857933 3172 +topla 1 3 5.857933 5.857933 3563 +retarget 1 3 5.857933 5.857933 3994 +insoftwar 1 2 6.263398 6.263398 4932 +everywher 1 2 6.263398 6.263398 5690 +educationph 1 2 6.263398 6.263398 6112 +milton 1 2 6.263398 6.263398 6153 +syntact 1 2 6.263398 6.263398 5552 +bernard 1 2 6.263398 6.263398 5894 +dion 1 2 6.263398 6.263398 5856 +venkatesh 1 2 6.263398 6.263398 6154 +nbsp 1 1 6.957497 6.957497 18285 +nbspcharl 1 1 6.957497 6.957497 18290 +nbspprofessor 1 1 6.957497 6.957497 18291 +nbspunivers 1 1 6.957497 6.957497 18292 +enormouscap 1 1 6.957497 6.957497 18293 +haveinvestig 1 1 6.957497 6.957497 18294 +registerresid 1 1 6.957497 6.957497 18295 +loadsand 1 1 6.957497 6.957497 18296 +theprocedur 1 1 6.957497 6.957497 18297 +studiedinterprocedur 1 1 6.957497 6.957497 18298 +modelsthat 1 1 6.957497 6.957497 18299 +optimallyalloc 1 1 6.957497 6.957497 18300 +toautomat 1 1 6.957497 6.957497 18301 +orno 1 1 6.957497 6.957497 18302 +slowdown 1 1 6.957497 6.957497 18303 +kurland 1 1 6.957497 6.957497 18286 +harish 1 1 6.957497 6.957497 18288 +patil 1 1 6.957497 6.957497 18289 +proebst 1 1 6.957497 6.957497 18287 +inacm 1 1 6.957497 6.957497 18304 +activitiesa 1 1 6.957497 6.957497 18305 +cytronand 1 1 6.957497 6.957497 18306 +studentsdonn 1 1 6.957497 6.957497 18307 +rowland 1 1 6.957497 6.957497 18308 +skedzielewski 1 1 6.957497 6.957497 18309 +reevalu 1 1 6.957497 6.957497 18310 +corrector 1 1 6.957497 6.957497 18311 +sensitivepars 1 1 6.957497 6.957497 18312 +mahadevan 1 1 6.957497 6.957497 18313 +ganapathi 1 1 6.957497 6.957497 18314 +vimal 1 1 6.957497 6.957497 18315 +begwami 1 1 6.957497 6.957497 18316 +maunei 1 1 6.957497 6.957497 18317 +anil 1 1 6.957497 6.957497 18318 +winsborough 1 1 6.957497 6.957497 18319 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..a52b50b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,260 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +specif 1 106 2.197225 2.197225 106 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +topic 1 114 2.197225 2.197225 110 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +follow 1 92 2.397895 2.397895 143 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +larg 1 82 2.484907 2.484907 168 +ieee 1 86 2.484907 2.484907 190 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +main 1 67 2.708050 2.708050 256 +integr 1 67 2.708050 2.708050 245 +differ 1 66 2.708050 2.708050 253 +simul 1 66 2.708050 2.708050 255 +januari 1 62 2.772589 2.772589 264 +juli 1 60 2.833213 2.833213 305 +share 1 59 2.833213 2.833213 304 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +faculti 1 56 2.890372 2.890372 325 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +extens 1 53 2.944439 2.944439 340 +februari 1 54 2.944439 2.944439 328 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +possibl 1 47 3.091042 3.091042 378 +effect 1 46 3.091042 3.091042 385 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +mechan 1 43 3.178054 3.178054 416 +third 1 43 3.178054 3.178054 412 +examin 1 42 3.218876 3.218876 424 +futur 1 41 3.218876 3.218876 427 +cach 1 41 3.218876 3.218876 432 +multipl 1 39 3.258097 3.258097 453 +close 1 38 3.295837 3.295837 465 +purpos 1 37 3.332205 3.332205 481 +cost 1 37 3.332205 3.332205 480 +least 1 35 3.401197 3.401197 516 +jame 1 35 3.401197 3.401197 507 +idea 1 32 3.465736 3.465736 545 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +specifi 1 30 3.555348 3.555348 568 +focus 1 29 3.583519 3.583519 584 +limit 1 29 3.583519 3.583519 585 +platform 1 29 3.583519 3.583519 591 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +multiprocessor 1 28 3.610918 3.610918 605 +cluster 1 28 3.610918 3.610918 612 +scale 1 28 3.610918 3.610918 613 +repres 1 26 3.688879 3.688879 656 +consist 1 26 3.688879 3.688879 651 +bound 1 26 3.688879 3.688879 659 +todai 1 25 3.737670 3.737670 672 +wai 1 25 3.737670 3.737670 662 +supercomput 1 25 3.737670 3.737670 681 +scalabl 1 24 3.761200 3.761200 705 +pattern 1 24 3.761200 3.761200 689 +hierarchi 1 22 3.850148 3.850148 744 +chip 1 21 3.912023 3.912023 770 +alumni 1 21 3.912023 3.912023 807 +increas 1 20 3.951244 3.951244 829 +exploit 1 20 3.951244 3.951244 836 +separ 1 19 4.007333 4.007333 844 +along 1 18 4.060443 4.060443 878 +element 1 18 4.060443 4.060443 895 +minim 1 18 4.060443 4.060443 887 +wind 1 18 4.060443 4.060443 908 +scott 1 18 4.060443 4.060443 884 +lower 1 18 4.060443 4.060443 886 +interconnect 1 17 4.110874 4.110874 937 +layer 1 17 4.110874 4.110874 926 +latenc 1 16 4.174387 4.174387 993 +capabl 1 15 4.248495 4.248495 1016 +massiv 1 15 4.248495 4.248495 1026 +overhead 1 15 4.248495 4.248495 1035 +hierarch 1 15 4.248495 4.248495 1018 +conduct 1 14 4.317488 4.317488 1065 +coher 1 14 4.317488 4.317488 1109 +topolog 1 14 4.317488 4.317488 1089 +central 1 13 4.382027 4.382027 1160 +individu 1 13 4.382027 4.382027 1126 +johnson 1 13 4.382027 4.382027 1162 +optic 1 12 4.465908 4.465908 1221 +grow 1 12 4.465908 4.465908 1209 +mari 1 12 4.465908 4.465908 1266 +evolut 1 11 4.553877 4.553877 1314 +bandwidth 1 11 4.553877 4.553877 1365 +impact 1 11 4.553877 4.553877 1334 +primit 1 11 4.553877 4.553877 1317 +extrem 1 11 4.553877 4.553877 1330 +penalti 1 10 4.653960 4.653960 1405 +modul 1 10 4.653960 4.653960 1434 +resid 1 10 4.653960 4.653960 1461 +label 1 10 4.653960 4.653960 1423 +queue 1 10 4.653960 4.653960 1386 +elimin 1 9 4.753590 4.753590 1558 +lock 1 9 4.753590 4.753590 1551 +vernon 1 9 4.753590 4.753590 1556 +doug 1 9 4.753590 4.753590 1517 +univeristi 1 8 4.875197 4.875197 1754 +transport 1 8 4.875197 4.875197 1672 +evan 1 8 4.875197 4.875197 1633 +merg 1 7 5.010635 5.010635 1862 +migrat 1 7 5.010635 5.010635 1851 +microprocessor 1 7 5.010635 5.010635 1808 +serial 1 7 5.010635 5.010635 1975 +burger 1 7 5.010635 5.010635 1889 +goodman 1 7 5.010635 5.010635 1891 +onto 1 6 5.164786 5.164786 2089 +philip 1 6 5.164786 5.164786 2005 +diagram 1 5 5.347108 5.347108 2346 +quantifi 1 5 5.347108 5.347108 2525 +galileo 1 4 5.568345 5.568345 3086 +medium 1 4 5.568345 5.568345 2834 +eventu 1 4 5.568345 5.568345 3074 +arrow 1 3 5.857933 5.857933 3520 +bank 1 3 5.857933 5.857933 3920 +stefano 1 3 5.857933 5.857933 3372 +kaxira 1 3 5.857933 5.857933 3373 +aswel 1 3 5.857933 5.857933 3286 +fresh 1 3 5.857933 5.857933 3706 +stein 1 3 5.857933 5.857933 3646 +wisconsint 1 2 6.263398 6.263398 6155 +groupat 1 2 6.263398 6.263398 5677 +emphasison 1 2 6.263398 6.263398 4157 +extent 1 2 6.263398 6.263398 6080 +dram 1 2 6.263398 6.263398 4173 +iram 1 2 6.263398 6.263398 4520 +datascalar 1 2 6.263398 6.263398 4518 +spsd 1 2 6.263398 6.263398 4519 +alain 1 2 6.263398 6.263398 6086 +declin 1 2 6.263398 6.263398 5385 +logarithm 1 2 6.263398 6.263398 5322 +multiprocessorsa 1 2 6.263398 6.263398 5455 +gjess 1 2 6.263398 6.263398 6156 +contentsgalileoproject 1 1 6.957497 6.957497 18322 +descriptionpublicationsrel 1 1 6.957497 6.957497 18323 +projectssci 1 1 6.957497 6.957497 18324 +wisconsinproject 1 1 6.957497 6.957497 18325 +descriptionpublicationsproject 1 1 6.957497 6.957497 18326 +membersgalileo 1 1 6.957497 6.957497 18327 +wisconsingalileo 1 1 6.957497 6.957497 18328 +therelationship 1 1 6.957497 6.957497 18329 +futuresystem 1 1 6.957497 6.957497 18330 +issuabl 1 1 6.957497 6.957497 18331 +orlimit 1 1 6.957497 6.957497 18332 +capacityon 1 1 6.957497 6.957497 18333 +sizabl 1 1 6.957497 6.957497 18334 +fractionof 1 1 6.957497 6.957497 18335 +mopin 1 1 6.957497 6.957497 18336 +ofprocessor 1 1 6.957497 6.957497 18337 +eventuallyobvi 1 1 6.957497 6.957497 18338 +andlimit 1 1 6.957497 6.957497 18339 +systemsperform 1 1 6.957497 6.957497 18340 +theprocessor 1 1 6.957497 6.957497 18341 +spectrumcach 1 1 6.957497 6.957497 18342 +systemsdesign 1 1 6.957497 6.957497 18343 +systemprogram 1 1 6.957497 6.957497 18344 +bottlenecksdoug 1 1 6.957497 6.957497 18345 +modeldoug 1 1 6.957497 6.957497 18346 +microprocessorsdoug 1 1 6.957497 6.957497 18347 +microprocessorsdougla 1 1 6.957497 6.957497 18348 +berkeleyppram 1 1 6.957497 6.957497 18349 +kyushu 1 1 6.957497 6.957497 18350 +japansci 1 1 6.957497 6.957497 18351 +wisconsinour 1 1 6.957497 6.957497 18352 +coherentshar 1 1 6.957497 6.957497 18353 +coherentinterfac 1 1 6.957497 6.957497 18354 +qolb 1 1 6.957497 6.957497 18355 +pairwis 1 1 6.957497 6.957497 18356 +definitionfor 1 1 6.957497 6.957497 18357 +betweenprocess 1 1 6.957497 6.957497 18358 +structureseffici 1 1 6.957497 6.957497 18359 +extensionsaggress 1 1 6.957497 6.957497 18360 +multiprocessorswisconsin 1 1 6.957497 6.957497 18361 +tunneldougla 1 1 6.957497 6.957497 18362 +scijam 1 1 6.957497 6.957497 18363 +memoryross 1 1 6.957497 6.957497 18364 +nagi 1 1 6.957497 6.957497 18321 +aboulenein 1 1 6.957497 6.957497 18365 +woest 1 1 6.957497 6.957497 18320 +ringsross 1 1 6.957497 6.957497 18366 +ringsteven 1 1 6.957497 6.957497 18367 +coherenceross 1 1 6.957497 6.957497 18368 +multiprocessorsphilip 1 1 6.957497 6.957497 18369 +multiprocessorjam 1 1 6.957497 6.957497 18370 +abouleneinross 1 1 6.957497 6.957497 18371 +johnsonstev 1 1 6.957497 6.957497 18372 +scottlast 1 1 6.957497 6.957497 18373 +dburger 1 1 6.957497 6.957497 18374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..bf9b9ca2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +structur 1 106 2.197225 2.197225 105 +peopl 1 96 2.302585 2.302585 132 +grade 1 90 2.397895 2.397895 142 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +paul 1 38 3.295837 3.295837 471 +post 1 35 3.401197 3.401197 505 +martin 1 21 3.912023 3.912023 794 +andrew 1 19 4.007333 4.007333 849 +jean 1 10 4.653960 4.653960 1440 +regent 1 5 5.347108 5.347108 2551 +geeri 1 3 5.857933 5.857933 3422 +albert 1 2 6.263398 6.263398 5987 +friedrich 1 2 6.263398 6.263398 5175 +madisonin 1 1 6.957497 6.957497 18375 +compsci 1 1 6.957497 6.957497 18376 +pontif 1 1 6.957497 6.957497 18377 +jacqu 1 1 6.957497 6.957497 18378 +derrida 1 1 6.957497 6.957497 18379 +heidegg 1 1 6.957497 6.957497 18380 +camu 1 1 6.957497 6.957497 18381 +sartr 1 1 6.957497 6.957497 18382 +nietzsch 1 1 6.957497 6.957497 18383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..3640219d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +find 1 111 2.197225 2.197225 111 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +well 1 109 2.197225 2.197225 121 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +follow 1 92 2.397895 2.397895 143 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +orient 1 80 2.564949 2.564949 205 +main 1 67 2.708050 2.708050 256 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +share 1 59 2.833213 2.833213 304 +back 1 60 2.833213 2.833213 297 +index 1 56 2.890372 2.890372 309 +sever 1 56 2.890372 2.890372 322 +think 1 57 2.890372 2.890372 314 +unix 1 58 2.890372 2.890372 308 +sampl 1 53 2.944439 2.944439 339 +case 1 51 2.995732 2.995732 351 +right 1 48 3.044522 3.044522 363 +move 1 47 3.091042 3.091042 382 +directori 1 45 3.135494 3.135494 396 +netscap 1 44 3.135494 3.135494 395 +might 1 41 3.218876 3.218876 426 +fast 1 42 3.218876 3.218876 429 +continu 1 39 3.258097 3.258097 448 +movi 1 40 3.258097 3.258097 459 +small 1 39 3.258097 3.258097 447 +cost 1 37 3.332205 3.332205 480 +either 1 35 3.401197 3.401197 506 +given 1 32 3.465736 3.465736 538 +someth 1 31 3.496508 3.496508 554 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +load 1 28 3.610918 3.610918 601 +actual 1 28 3.610918 3.610918 604 +great 1 27 3.637586 3.637586 626 +bookmark 1 26 3.688879 3.688879 639 +sometim 1 24 3.761200 3.761200 696 +higher 1 24 3.761200 3.761200 690 +thank 1 23 3.806662 3.806662 721 +sent 1 22 3.850148 3.850148 763 +grad 1 20 3.951244 3.951244 837 +wrote 1 20 3.951244 3.951244 830 +mostli 1 19 4.007333 4.007333 869 +exercis 1 19 4.007333 4.007333 842 +stop 1 17 4.110874 4.110874 942 +purchas 1 15 4.248495 4.248495 1030 +mayb 1 15 4.248495 4.248495 1014 +floor 1 14 4.317488 4.317488 1070 +wait 1 13 4.382027 4.382027 1168 +walk 1 12 4.465908 4.465908 1281 +outsid 1 12 4.465908 4.465908 1219 +grow 1 12 4.465908 4.465908 1209 +noth 1 11 4.553877 4.553877 1328 +denni 1 11 4.553877 4.553877 1321 +calvin 1 9 4.753590 4.753590 1518 +claim 1 8 4.875197 4.875197 1664 +unifi 1 8 4.875197 4.875197 1774 +reload 1 8 4.875197 4.875197 1682 +told 1 8 4.875197 4.875197 1658 +accord 1 7 5.010635 5.010635 1826 +none 1 7 5.010635 5.010635 1811 +monei 1 7 5.010635 5.010635 1934 +christian 1 7 5.010635 5.010635 1949 +huge 1 6 5.164786 5.164786 1991 +handbook 1 6 5.164786 5.164786 2061 +gui 1 5 5.347108 5.347108 2573 +feet 1 5 5.347108 5.347108 2492 +anti 1 5 5.347108 5.347108 2434 +cheap 1 4 5.568345 5.568345 2751 +prospect 1 4 5.568345 5.568345 3013 +shelf 1 4 5.568345 5.568345 2621 +fork 1 4 5.568345 5.568345 2801 +kill 1 4 5.568345 5.568345 3000 +suppos 1 4 5.568345 5.568345 3002 +suffic 1 4 5.568345 5.568345 2869 +glass 1 3 5.857933 5.857933 3759 +dutch 1 3 5.857933 5.857933 3592 +influenc 1 3 5.857933 5.857933 3349 +cash 1 3 5.857933 5.857933 3355 +dabbl 1 3 5.857933 5.857933 3971 +forward 1 3 5.857933 5.857933 3784 +deposit 1 2 6.263398 6.263398 6095 +cooler 1 2 6.263398 6.263398 6023 +suspect 1 2 6.263398 6.263398 5187 +nearest 1 2 6.263398 6.263398 4922 +roommat 1 2 6.263398 6.263398 6157 +withno 1 2 6.263398 6.263398 5370 +eventhough 1 2 6.263398 6.263398 6158 +anyhow 1 2 6.263398 6.263398 5188 +killer 1 2 6.263398 6.263398 6159 +programmingin 1 2 6.263398 6.263398 4135 +ritchi 1 2 6.263398 6.263398 4306 +creator 1 2 6.263398 6.263398 5998 +gideon 1 1 6.957497 6.957497 18384 +tweak 1 1 6.957497 6.957497 18385 +toonion 1 1 6.957497 6.957497 18386 +seethi 1 1 6.957497 6.957497 18387 +blockbust 1 1 6.957497 6.957497 18388 +predica 1 1 6.957497 6.957497 18389 +dismal 1 1 6.957497 6.957497 18390 +donationto 1 1 6.957497 6.957497 18391 +defrai 1 1 6.957497 6.957497 18392 +orderscan 1 1 6.957497 6.957497 18393 +monro 1 1 6.957497 6.957497 18394 +usathank 1 1 6.957497 6.957497 18395 +unread 1 1 6.957497 6.957497 18396 +achil 1 1 6.957497 6.957497 18397 +cstechreport 1 1 6.957497 6.957497 18398 +otherstuff 1 1 6.957497 6.957497 18399 +averagewil 1 1 6.957497 6.957497 18400 +doofu 1 1 6.957497 6.957497 18401 +zippi 1 1 6.957497 6.957497 18402 +pinheadha 1 1 6.957497 6.957497 18403 +justtri 1 1 6.957497 6.957497 18404 +mozilla 1 1 6.957497 6.957497 18405 +buttonher 1 1 6.957497 6.957497 18406 +somethingin 1 1 6.957497 6.957497 18407 +hater 1 1 6.957497 6.957497 18408 +mailand 1 1 6.957497 6.957497 18409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..b86c7484 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,407 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +algorithm 1 162 1.791759 1.791759 57 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +architectur 1 139 1.945910 1.945910 77 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +access 1 102 2.302585 2.302585 136 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +thing 1 84 2.484907 2.484907 189 +level 1 87 2.484907 2.484907 180 +educ 1 86 2.484907 2.484907 191 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +build 1 85 2.484907 2.484907 184 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +requir 1 81 2.484907 2.484907 167 +dynam 1 76 2.564949 2.564949 194 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +meet 1 72 2.639057 2.639057 229 +html 1 75 2.639057 2.639057 235 +involv 1 71 2.639057 2.639057 227 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +integr 1 67 2.708050 2.708050 245 +view 1 70 2.708050 2.708050 254 +test 1 66 2.708050 2.708050 252 +organ 1 65 2.772589 2.772589 265 +previou 1 62 2.772589 2.772589 290 +creat 1 63 2.772589 2.772589 277 +plan 1 65 2.772589 2.772589 272 +best 1 59 2.833213 2.833213 299 +summer 1 56 2.890372 2.890372 311 +unix 1 58 2.890372 2.890372 308 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +suggest 1 53 2.944439 2.944439 331 +instruct 1 53 2.944439 2.944439 332 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +week 1 52 2.995732 2.995732 343 +approach 1 48 3.044522 3.044522 366 +frequent 1 49 3.044522 3.044522 367 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +could 1 46 3.091042 3.091042 383 +get 1 46 3.091042 3.091042 380 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +favorit 1 44 3.135494 3.135494 410 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +describ 1 45 3.135494 3.135494 400 +even 1 45 3.135494 3.135494 393 +mark 1 44 3.135494 3.135494 403 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +futur 1 41 3.218876 3.218876 427 +compani 1 41 3.218876 3.218876 423 +linear 1 41 3.218876 3.218876 431 +past 1 42 3.218876 3.218876 428 +form 1 39 3.258097 3.258097 443 +author 1 39 3.258097 3.258097 450 +multipl 1 39 3.258097 3.258097 453 +programm 1 39 3.258097 3.258097 445 +must 1 40 3.258097 3.258097 442 +open 1 38 3.295837 3.295837 469 +brian 1 38 3.295837 3.295837 466 +credit 1 38 3.295837 3.295837 460 +microsoft 1 38 3.295837 3.295837 468 +connect 1 37 3.332205 3.332205 485 +workstat 1 37 3.332205 3.332205 479 +mean 1 37 3.332205 3.332205 477 +tree 1 36 3.367296 3.367296 492 +download 1 36 3.367296 3.367296 489 +print 1 34 3.401197 3.401197 503 +concurr 1 34 3.401197 3.401197 501 +singl 1 34 3.401197 3.401197 510 +least 1 35 3.401197 3.401197 516 +manual 1 35 3.401197 3.401197 504 +someth 1 31 3.496508 3.496508 554 +often 1 31 3.496508 3.496508 551 +domain 1 30 3.555348 3.555348 564 +quot 1 29 3.583519 3.583519 582 +synchron 1 29 3.583519 3.583519 588 +actual 1 28 3.610918 3.610918 604 +usual 1 28 3.610918 3.610918 608 +static 1 27 3.637586 3.637586 619 +quit 1 27 3.637586 3.637586 633 +team 1 27 3.637586 3.637586 625 +rather 1 26 3.688879 3.688879 642 +enhanc 1 26 3.688879 3.688879 644 +wai 1 25 3.737670 3.737670 662 +although 1 25 3.737670 3.737670 667 +never 1 25 3.737670 3.737670 671 +higher 1 24 3.761200 3.761200 690 +known 1 24 3.761200 3.761200 702 +alwai 1 24 3.761200 3.761200 691 +mike 1 24 3.761200 3.761200 703 +miscellan 1 23 3.806662 3.806662 731 +self 1 22 3.850148 3.850148 761 +william 1 22 3.850148 3.850148 765 +sort 1 22 3.850148 3.850148 738 +identifi 1 22 3.850148 3.850148 760 +sent 1 22 3.850148 3.850148 763 +busi 1 21 3.912023 3.912023 784 +chip 1 21 3.912023 3.912023 770 +love 1 21 3.912023 3.912023 804 +divis 1 21 3.912023 3.912023 803 +similar 1 21 3.912023 3.912023 771 +voic 1 21 3.912023 3.912023 806 +watch 1 21 3.912023 3.912023 789 +tell 1 21 3.912023 3.912023 777 +avoid 1 21 3.912023 3.912023 799 +minut 1 20 3.951244 3.951244 810 +kernel 1 20 3.951244 3.951244 825 +portabl 1 20 3.951244 3.951244 819 +ever 1 19 4.007333 4.007333 872 +miss 1 19 4.007333 4.007333 866 +less 1 18 4.060443 4.060443 892 +element 1 18 4.060443 4.060443 895 +seem 1 18 4.060443 4.060443 899 +record 1 18 4.060443 4.060443 890 +aid 1 18 4.060443 4.060443 904 +anyon 1 17 4.110874 4.110874 916 +weekli 1 17 4.110874 4.110874 919 +thought 1 17 4.110874 4.110874 945 +intel 1 16 4.174387 4.174387 1000 +alreadi 1 16 4.174387 4.174387 963 +critic 1 16 4.174387 4.174387 982 +advantag 1 16 4.174387 4.174387 987 +configur 1 15 4.248495 4.248495 1012 +enough 1 15 4.248495 4.248495 1040 +piec 1 15 4.248495 4.248495 1020 +save 1 14 4.317488 4.317488 1099 +manner 1 14 4.317488 4.317488 1074 +becam 1 14 4.317488 4.317488 1117 +sai 1 13 4.382027 4.382027 1175 +care 1 13 4.382027 4.382027 1177 +cannot 1 13 4.382027 4.382027 1144 +central 1 13 4.382027 4.382027 1160 +everyth 1 13 4.382027 4.382027 1169 +weak 1 13 4.382027 4.382027 1159 +touch 1 12 4.465908 4.465908 1288 +assembl 1 12 4.465908 4.465908 1207 +usenix 1 12 4.465908 4.465908 1240 +went 1 12 4.465908 4.465908 1279 +overal 1 12 4.465908 4.465908 1254 +see 1 11 4.553877 4.553877 1337 +mapl 1 11 4.553877 4.553877 1376 +wood 1 11 4.553877 4.553877 1355 +fix 1 11 4.553877 4.553877 1327 +card 1 10 4.653960 4.653960 1435 +relationship 1 10 4.653960 4.653960 1383 +mainli 1 10 4.653960 4.653960 1432 +invit 1 10 4.653960 4.653960 1428 +bring 1 10 4.653960 4.653960 1430 +trust 1 9 4.753590 4.753590 1583 +motorola 1 9 4.753590 4.753590 1546 +entitl 1 9 4.753590 4.753590 1490 +patterson 1 9 4.753590 4.753590 1554 +charg 1 9 4.753590 4.753590 1582 +lock 1 9 4.753590 4.753590 1551 +login 1 9 4.753590 4.753590 1550 +prefer 1 9 4.753590 4.753590 1491 +admin 1 9 4.753590 4.753590 1476 +claim 1 8 4.875197 4.875197 1664 +poor 1 8 4.875197 4.875197 1736 +architect 1 8 4.875197 4.875197 1624 +perhap 1 8 4.875197 4.875197 1693 +driver 1 8 4.875197 4.875197 1657 +accomplish 1 8 4.875197 4.875197 1755 +calendar 1 8 4.875197 4.875197 1649 +mass 1 8 4.875197 4.875197 1732 +isol 1 8 4.875197 4.875197 1663 +realiz 1 8 4.875197 4.875197 1739 +montreal 1 7 5.010635 5.010635 1961 +serial 1 7 5.010635 5.010635 1975 +suffici 1 7 5.010635 5.010635 1897 +therefor 1 7 5.010635 5.010635 1822 +header 1 7 5.010635 5.010635 1787 +arrang 1 6 5.164786 5.164786 2023 +sohi 1 6 5.164786 5.164786 2237 +versu 1 6 5.164786 5.164786 2052 +snow 1 6 5.164786 5.164786 2031 +sleep 1 6 5.164786 5.164786 2211 +microarchitectur 1 6 5.164786 5.164786 2238 +pentium 1 6 5.164786 5.164786 2077 +quickli 1 6 5.164786 5.164786 2000 +proce 1 6 5.164786 5.164786 2114 +relax 1 6 5.164786 5.164786 2120 +strip 1 6 5.164786 5.164786 2203 +apolog 1 6 5.164786 5.164786 2046 +commit 1 6 5.164786 5.164786 2233 +pagethi 1 5 5.347108 5.347108 2336 +ship 1 5 5.347108 5.347108 2534 +default 1 5 5.347108 5.347108 2335 +patent 1 5 5.347108 5.347108 2574 +chapel 1 5 5.347108 5.347108 2457 +keeper 1 5 5.347108 5.347108 2569 +knew 1 5 5.347108 5.347108 2445 +exchang 1 5 5.347108 5.347108 2310 +adopt 1 5 5.347108 5.347108 2467 +hennessi 1 5 5.347108 5.347108 2289 +appreci 1 5 5.347108 5.347108 2374 +hate 1 5 5.347108 5.347108 2529 +optimist 1 5 5.347108 5.347108 2501 +recogn 1 5 5.347108 5.347108 2302 +truli 1 5 5.347108 5.347108 2476 +notabl 1 5 5.347108 5.347108 2276 +camp 1 5 5.347108 5.347108 2545 +andi 1 4 5.568345 5.568345 3081 +filesystem 1 4 5.568345 5.568345 2587 +unless 1 4 5.568345 5.568345 2607 +scribe 1 4 5.568345 5.568345 2631 +church 1 4 5.568345 5.568345 3011 +aspir 1 4 5.568345 5.568345 3019 +wear 1 4 5.568345 5.568345 2785 +trick 1 4 5.568345 5.568345 2967 +wander 1 4 5.568345 5.568345 2896 +wherea 1 4 5.568345 5.568345 2597 +cshrc 1 4 5.568345 5.568345 2759 +disconnect 1 4 5.568345 5.568345 2664 +fame 1 3 5.857933 5.857933 3793 +coin 1 3 5.857933 5.857933 3799 +harm 1 3 5.857933 5.857933 3515 +warm 1 3 5.857933 5.857933 3904 +advertis 1 3 5.857933 5.857933 3788 +redesign 1 3 5.857933 5.857933 3540 +hacker 1 3 5.857933 5.857933 3996 +gould 1 3 5.857933 5.857933 3559 +wilkinson 1 3 5.857933 5.857933 3579 +berlin 1 3 5.857933 5.857933 3263 +pilot 1 3 5.857933 5.857933 4008 +urgent 1 3 5.857933 5.857933 3316 +fascin 1 3 5.857933 5.857933 3948 +glew 1 2 6.263398 6.263398 4162 +pageandi 1 2 6.263398 6.263398 6096 +pope 1 2 6.263398 6.263398 5506 +parson 1 2 6.263398 6.263398 4528 +king 1 2 6.263398 6.263398 5737 +strand 1 2 6.263398 6.263398 5880 +chop 1 2 6.263398 6.263398 6160 +beard 1 2 6.263398 6.263398 6161 +constantli 1 2 6.263398 6.263398 4181 +verg 1 2 6.263398 6.263398 5488 +disagre 1 2 6.263398 6.263398 6105 +defunct 1 2 6.263398 6.263398 6162 +startup 1 2 6.263398 6.263398 4676 +clone 1 2 6.263398 6.263398 5833 +aitken 1 2 6.263398 6.263398 4941 +ubiquit 1 2 6.263398 6.263398 6049 +intervent 1 2 6.263398 6.263398 6163 +bother 1 2 6.263398 6.263398 6164 +krazi 1 1 6.957497 6.957497 18412 +largelyform 1 1 6.957497 6.957497 18416 +snippet 1 1 6.957497 6.957497 18417 +stylishor 1 1 6.957497 6.957497 18418 +summarycontact 1 1 6.957497 6.957497 18419 +addressescalendar 1 1 6.957497 6.957497 18420 +taker 1 1 6.957497 6.957497 18421 +wannab 1 1 6.957497 6.957497 18413 +beef 1 1 6.957497 6.957497 18411 +priest 1 1 6.957497 6.957497 18422 +boyn 1 1 6.957497 6.957497 18423 +frost 1 1 6.957497 6.957497 18424 +almighti 1 1 6.957497 6.957497 18425 +dollar 1 1 6.957497 6.957497 18426 +bellow 1 1 6.957497 6.957497 18427 +ranter 1 1 6.957497 6.957497 18428 +preacher 1 1 6.957497 6.957497 18429 +beecher 1 1 6.957497 6.957497 18430 +harbour 1 1 6.957497 6.957497 18431 +deplor 1 1 6.957497 6.957497 18432 +churchmen 1 1 6.957497 6.957497 18433 +notori 1 1 6.957497 6.957497 18434 +atheist 1 1 6.957497 6.957497 18435 +chariti 1 1 6.957497 6.957497 18436 +sailor 1 1 6.957497 6.957497 18437 +chord 1 1 6.957497 6.957497 18438 +firewood 1 1 6.957497 6.957497 18439 +meal 1 1 6.957497 6.957497 18440 +manifesto 1 1 6.957497 6.957497 18441 +handbil 1 1 6.957497 6.957497 18442 +hackeralthough 1 1 6.957497 6.957497 18443 +formerlyhad 1 1 6.957497 6.957497 18444 +fake 1 1 6.957497 6.957497 18445 +andstil 1 1 6.957497 6.957497 18446 +wistfulli 1 1 6.957497 6.957497 18447 +suspend 1 1 6.957497 6.957497 18448 +bald 1 1 6.957497 6.957497 18449 +architectureonc 1 1 6.957497 6.957497 18450 +architecturei 1 1 6.957497 6.957497 18451 +grabbag 1 1 6.957497 6.957497 18452 +antidot 1 1 6.957497 6.957497 18453 +afford 1 1 6.957497 6.957497 18454 +diskspac 1 1 6.957497 6.957497 18455 +provideror 1 1 6.957497 6.957497 18456 +architectureon 1 1 6.957497 6.957497 18457 +datasheet 1 1 6.957497 6.957497 18458 +netscapebookmarksstockscod 1 1 6.957497 6.957497 18459 +standardsroi 1 1 6.957497 6.957497 18460 +standardsi 1 1 6.957497 6.957497 18461 +enfopris 1 1 6.957497 6.957497 18462 +writingto 1 1 6.957497 6.957497 18463 +longstand 1 1 6.957497 6.957497 18464 +configurationmanag 1 1 6.957497 6.957497 18465 +scc 1 1 6.957497 6.957497 18466 +box 1 1 6.957497 6.957497 18467 +hardlink 1 1 6.957497 6.957497 18468 +deprec 1 1 6.957497 6.957497 18469 +advoc 1 1 6.957497 6.957497 18410 +livelock 1 1 6.957497 6.957497 18470 +insist 1 1 6.957497 6.957497 18471 +checkinsso 1 1 6.957497 6.957497 18472 +approachin 1 1 6.957497 6.957497 18473 +fetterman 1 1 6.957497 6.957497 18474 +deserv 1 1 6.957497 6.957497 18475 +wisconsinhow 1 1 6.957497 6.957497 18476 +programat 1 1 6.957497 6.957497 18477 +cmtool 1 1 6.957497 6.957497 18478 +ical 1 1 6.957497 6.957497 18479 +anyof 1 1 6.957497 6.957497 18480 +manuallyadd 1 1 6.957497 6.957497 18481 +softwareto 1 1 6.957497 6.957497 18414 +intelat 1 1 6.957497 6.957497 18482 +devout 1 1 6.957497 6.957497 18483 +ontim 1 1 6.957497 6.957497 18484 +meetingswith 1 1 6.957497 6.957497 18485 +reserveth 1 1 6.957497 6.957497 18486 +blindli 1 1 6.957497 6.957497 18487 +teresa 1 1 6.957497 6.957497 18415 +proposeif 1 1 6.957497 6.957497 18488 +overallschedul 1 1 6.957497 6.957497 18489 +secretariesand 1 1 6.957497 6.957497 18490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..a3ff0d76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +good 1 77 2.564949 2.564949 200 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 1 35 3.401197 3.401197 507 +lot 1 18 4.060443 4.060443 889 +goodman 1 7 5.010635 5.010635 1891 +sciencesdepart 1 6 5.164786 5.164786 2020 +galileo 1 4 5.568345 5.568345 3086 +usaresearch 1 1 6.957497 6.957497 18491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..876e4b11 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,339 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +academ 1 82 2.484907 2.484907 178 +april 1 77 2.564949 2.564949 196 +refer 1 78 2.564949 2.564949 203 +homework 1 79 2.564949 2.564949 193 +state 1 76 2.564949 2.564949 207 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +view 1 70 2.708050 2.708050 254 +simul 1 66 2.708050 2.708050 255 +window 1 68 2.708050 2.708050 242 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +guid 1 63 2.772589 2.772589 267 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +new 1 64 2.772589 2.772589 262 +back 1 60 2.833213 2.833213 297 +automat 1 61 2.833213 2.833213 306 +unix 1 58 2.890372 2.890372 308 +browser 1 56 2.890372 2.890372 313 +index 1 56 2.890372 2.890372 309 +hardwar 1 51 2.995732 2.995732 350 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +numer 1 49 3.044522 3.044522 369 +give 1 50 3.044522 3.044522 359 +cool 1 49 3.044522 3.044522 374 +archiv 1 49 3.044522 3.044522 364 +fridai 1 44 3.135494 3.135494 390 +algebra 1 45 3.135494 3.135494 394 +directori 1 45 3.135494 3.135494 396 +video 1 44 3.135494 3.135494 405 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +mark 1 44 3.135494 3.135494 403 +linear 1 41 3.218876 3.218876 431 +vision 1 41 3.218876 3.218876 430 +might 1 41 3.218876 3.218876 426 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +theoret 1 39 3.258097 3.258097 446 +electr 1 38 3.295837 3.295837 461 +robot 1 36 3.367296 3.367296 497 +everi 1 34 3.401197 3.401197 519 +tech 1 35 3.401197 3.401197 515 +survei 1 35 3.401197 3.401197 513 +eduoffic 1 33 3.433987 3.433987 531 +board 1 33 3.433987 3.433987 528 +independ 1 32 3.465736 3.465736 548 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +rang 1 30 3.555348 3.555348 565 +specifi 1 30 3.555348 3.555348 568 +packag 1 28 3.610918 3.610918 614 +univ 1 28 3.610918 3.610918 617 +retriev 1 27 3.637586 3.637586 621 +altern 1 26 3.688879 3.688879 641 +challeng 1 26 3.688879 3.688879 653 +todai 1 25 3.737670 3.737670 672 +client 1 25 3.737670 3.737670 679 +trace 1 25 3.737670 3.737670 677 +greg 1 24 3.761200 3.761200 695 +yahoo 1 24 3.761200 3.761200 707 +thank 1 23 3.806662 3.806662 721 +recognit 1 23 3.806662 3.806662 723 +geometri 1 22 3.850148 3.850148 752 +util 1 21 3.912023 3.912023 774 +fund 1 21 3.912023 3.912023 805 +hous 1 21 3.912023 3.912023 801 +portabl 1 20 3.951244 3.951244 819 +nice 1 20 3.951244 3.951244 809 +toolkit 1 20 3.951244 3.951244 835 +lyco 1 19 4.007333 4.007333 871 +histori 1 19 4.007333 4.007333 853 +feedback 1 19 4.007333 4.007333 854 +lot 1 18 4.060443 4.060443 889 +thoma 1 18 4.060443 4.060443 901 +stanford 1 17 4.110874 4.110874 955 +medic 1 17 4.110874 4.110874 958 +germani 1 17 4.110874 4.110874 946 +repositori 1 17 4.110874 4.110874 932 +white 1 17 4.110874 4.110874 951 +overhead 1 15 4.248495 4.248495 1035 +todd 1 15 4.248495 4.248495 1051 +charact 1 15 4.248495 4.248495 1028 +massiv 1 15 4.248495 4.248495 1026 +english 1 15 4.248495 4.248495 1033 +draft 1 14 4.317488 4.317488 1085 +suit 1 13 4.382027 4.382027 1129 +pretti 1 13 4.382027 4.382027 1191 +resolut 1 13 4.382027 4.382027 1172 +misc 1 13 4.382027 4.382027 1124 +charl 1 13 4.382027 4.382027 1149 +shape 1 12 4.465908 4.465908 1245 +optic 1 12 4.465908 4.465908 1221 +excit 1 11 4.553877 4.553877 1329 +string 1 11 4.553877 4.553877 1340 +night 1 11 4.553877 4.553877 1319 +worth 1 11 4.553877 4.553877 1294 +perl 1 11 4.553877 4.553877 1332 +literatur 1 11 4.553877 4.553877 1300 +metacrawl 1 10 4.653960 4.653960 1455 +card 1 10 4.653960 4.653960 1435 +enter 1 10 4.653960 4.653960 1454 +custom 1 10 4.653960 4.653960 1414 +utah 1 9 4.753590 4.753590 1585 +patterson 1 9 4.753590 4.753590 1554 +linguist 1 9 4.753590 4.753590 1593 +editori 1 9 4.753590 4.753590 1611 +japan 1 8 4.875197 4.875197 1762 +textur 1 8 4.875197 4.875197 1677 +dictionari 1 8 4.875197 4.875197 1642 +jack 1 8 4.875197 4.875197 1780 +usenet 1 7 5.010635 5.010635 1839 +attach 1 7 5.010635 5.010635 1785 +shade 1 7 5.010635 5.010635 1881 +shot 1 7 5.010635 5.010635 1898 +sharp 1 6 5.164786 5.164786 2100 +altavista 1 6 5.164786 5.164786 2222 +infoseek 1 6 5.164786 5.164786 2188 +mirror 1 6 5.164786 5.164786 2028 +postcard 1 6 5.164786 5.164786 2181 +textual 1 6 5.164786 5.164786 1979 +apolog 1 6 5.164786 5.164786 2046 +garbag 1 6 5.164786 5.164786 1986 +invest 1 6 5.164786 5.164786 2153 +price 1 6 5.164786 5.164786 1999 +appt 1 5 5.347108 5.347108 2312 +hyper 1 5 5.347108 5.347108 2435 +multiresolut 1 5 5.347108 5.347108 2423 +medicin 1 5 5.347108 5.347108 2448 +hennessi 1 5 5.347108 5.347108 2289 +travers 1 5 5.347108 5.347108 2363 +row 1 5 5.347108 5.347108 2330 +zoom 1 4 5.568345 5.568345 2961 +moon 1 4 5.568345 5.568345 2991 +planet 1 4 5.568345 5.568345 2912 +diagnosi 1 4 5.568345 5.568345 3027 +japanes 1 4 5.568345 5.568345 2934 +nist 1 4 5.568345 5.568345 2973 +wander 1 4 5.568345 5.568345 2896 +motif 1 3 5.857933 5.857933 3752 +freewar 1 3 5.857933 5.857933 3504 +sharewar 1 3 5.857933 5.857933 3503 +atmospher 1 3 5.857933 5.857933 3388 +rack 1 3 5.857933 5.857933 3176 +lockhe 1 3 5.857933 5.857933 3863 +georgia 1 3 5.857933 5.857933 3834 +trec 1 3 5.857933 5.857933 3547 +gigabyt 1 3 5.857933 5.857933 3548 +spider 1 3 5.857933 5.857933 3605 +latin 1 3 5.857933 5.857933 3741 +citizen 1 3 5.857933 5.857933 3238 +belong 1 3 5.857933 5.857933 3797 +pagegreg 1 2 6.263398 6.263398 5906 +pagenam 1 2 6.263398 6.263398 6165 +sharpemail 1 2 6.263398 6.263398 4766 +dejanew 1 2 6.263398 6.263398 5602 +harmoni 1 2 6.263398 6.263398 5235 +solar 1 2 6.263398 6.263398 5003 +comet 1 2 6.263398 6.263398 5785 +catalogu 1 2 6.263398 6.263398 6166 +gothic 1 2 6.263398 6.263398 5787 +soup 1 2 6.263398 6.263398 6131 +kitchen 1 2 6.263398 6.263398 6132 +awesom 1 2 6.263398 6.263398 6167 +diagon 1 2 6.263398 6.263398 4974 +atla 1 2 6.263398 6.263398 5996 +raster 1 2 6.263398 6.263398 6078 +radianc 1 2 6.263398 6.263398 6068 +tracer 1 2 6.263398 6.263398 5913 +radios 1 2 6.263398 6.263398 4504 +mexico 1 2 6.263398 6.263398 6044 +monash 1 2 6.263398 6.263398 4460 +strictli 1 2 6.263398 6.263398 5726 +pointcast 1 2 6.263398 6.263398 5377 +portfolio 1 2 6.263398 6.263398 4408 +offens 1 2 6.263398 6.263398 6168 +brill 1 2 6.263398 6.263398 4137 +sharpgreg 1 2 6.263398 6.263398 4767 +tgif 1 1 6.957497 6.957497 18497 +notesclass 1 1 6.957497 6.957497 18498 +aboutsearch 1 1 6.957497 6.957497 18499 +ohioc 1 1 6.957497 6.957497 18500 +cygnu 1 1 6.957497 6.957497 18501 +mumit 1 1 6.957497 6.957497 18502 +newbi 1 1 6.957497 6.957497 18503 +guideplatform 1 1 6.957497 6.957497 18504 +kit 1 1 6.957497 6.957497 18505 +amulet 1 1 6.957497 6.957497 18506 +dclap 1 1 6.957497 6.957497 18507 +wxwindow 1 1 6.957497 6.957497 18508 +yacl 1 1 6.957497 6.957497 18509 +projectclass 1 1 6.957497 6.957497 18510 +projectmisc 1 1 6.957497 6.957497 18511 +cygwin 1 1 6.957497 6.957497 18512 +gimp 1 1 6.957497 6.957497 18513 +harmonai 1 1 6.957497 6.957497 18514 +vasc 1 1 6.957497 6.957497 18515 +pic 1 1 6.957497 6.957497 18492 +jaida 1 1 6.957497 6.957497 18516 +seamless 1 1 6.957497 6.957497 18517 +meteor 1 1 6.957497 6.957497 18518 +wyom 1 1 6.957497 6.957497 18493 +antarctica 1 1 6.957497 6.957497 18519 +satelit 1 1 6.957497 6.957497 18494 +niae 1 1 6.957497 6.957497 18520 +vistex 1 1 6.957497 6.957497 18521 +databaseartifici 1 1 6.957497 6.957497 18522 +primoridi 1 1 6.957497 6.957497 18523 +dermatolog 1 1 6.957497 6.957497 18524 +erlang 1 1 6.957497 6.957497 18525 +orthopaed 1 1 6.957497 6.957497 18526 +ecvnet 1 1 6.957497 6.957497 18527 +handwrit 1 1 6.957497 6.957497 18495 +nici 1 1 6.957497 6.957497 18528 +groupimag 1 1 6.957497 6.957497 18529 +raytrac 1 1 6.957497 6.957497 18530 +rayshad 1 1 6.957497 6.957497 18531 +avalon 1 1 6.957497 6.957497 18532 +grimstead 1 1 6.957497 6.957497 18533 +dsite 1 1 6.957497 6.957497 18534 +intergraph 1 1 6.957497 6.957497 18535 +glint 1 1 6.957497 6.957497 18536 +chipset 1 1 6.957497 6.957497 18537 +nvidia 1 1 6.957497 6.957497 18538 +chipsetcomput 1 1 6.957497 6.957497 18539 +geometeri 1 1 6.957497 6.957497 18540 +geometrylispuseless 1 1 6.957497 6.957497 18541 +pagescomput 1 1 6.957497 6.957497 18542 +superdlx 1 1 6.957497 6.957497 18543 +parl 1 1 6.957497 6.957497 18544 +washingt 1 1 6.957497 6.957497 18545 +groupjapanes 1 1 6.957497 6.957497 18546 +unvers 1 1 6.957497 6.957497 18547 +infowav 1 1 6.957497 6.957497 18548 +edict 1 1 6.957497 6.957497 18549 +shodouka 1 1 6.957497 6.957497 18550 +asiasoftinform 1 1 6.957497 6.957497 18551 +retrev 1 1 6.957497 6.957497 18552 +peregrin 1 1 6.957497 6.957497 18553 +infomin 1 1 6.957497 6.957497 18554 +other_sw 1 1 6.957497 6.957497 18555 +info_retriev 1 1 6.957497 6.957497 18556 +jedi 1 1 6.957497 6.957497 18557 +hartlib 1 1 6.957497 6.957497 18558 +stemmer 1 1 6.957497 6.957497 18559 +twainhumor 1 1 6.957497 6.957497 18560 +threw 1 1 6.957497 6.957497 18561 +investorweb 1 1 6.957497 6.957497 18562 +networth 1 1 6.957497 6.957497 18563 +fundscap 1 1 6.957497 6.957497 18564 +stockmastermutu 1 1 6.957497 6.957497 18565 +brokerag 1 1 6.957497 6.957497 18566 +fidel 1 1 6.957497 6.957497 18567 +vanguard 1 1 6.957497 6.957497 18568 +schwab 1 1 6.957497 6.957497 18496 +gabelli 1 1 6.957497 6.957497 18569 +mutualsmisc 1 1 6.957497 6.957497 18570 +psnuplast 1 1 6.957497 6.957497 18571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..d7e668de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +sridhar 1 2 6.263398 6.263398 4807 +gopalsridhar 1 1 6.957497 6.957497 18572 +gopalgsri 1 1 6.957497 6.957497 18573 +edubon 1 1 6.957497 6.957497 18574 +marrow 1 1 6.957497 6.957497 18575 +pageresumest 1 1 6.957497 6.957497 18576 +pagecalvin 1 1 6.957497 6.957497 18577 +hobbesbookmark 1 1 6.957497 6.957497 18578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..f49278f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +send 1 114 2.197225 2.197225 109 +grade 1 90 2.397895 2.397895 142 +sinc 1 90 2.397895 2.397895 159 +educ 1 86 2.484907 2.484907 191 +environ 1 84 2.484907 2.484907 177 +stuff 1 87 2.484907 2.484907 171 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +plan 1 65 2.772589 2.772589 272 +explor 1 58 2.890372 2.890372 324 +visual 1 48 3.044522 3.044522 372 +visitor 1 49 3.044522 3.044522 371 +california 1 46 3.091042 3.091042 388 +physic 1 47 3.091042 3.091042 377 +around 1 43 3.178054 3.178054 415 +chines 1 29 3.583519 3.583519 595 +weather 1 28 3.610918 3.610918 618 +famili 1 23 3.806662 3.806662 735 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +raghu 1 12 4.465908 4.465908 1212 +career 1 12 4.465908 4.465908 1287 +devis 1 10 4.653960 4.653960 1451 +angel 1 8 4.875197 4.875197 1779 +advis 1 6 5.164786 5.164786 2173 +forecast 1 6 5.164786 5.164786 2171 +peke 1 5 5.347108 5.347108 2539 +medicin 1 5 5.347108 5.347108 2448 +miscellani 1 3 5.857933 5.857933 3976 +guangshun 1 2 6.263398 6.263398 6138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..072f6edb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +west 1 83 2.484907 2.484907 192 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +summari 1 73 2.639057 2.639057 237 +thesi 1 57 2.890372 2.890372 327 +detail 1 57 2.890372 2.890372 321 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +execut 1 45 3.135494 3.135494 404 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +hand 1 37 3.332205 3.332205 475 +focus 1 29 3.583519 3.583519 584 +target 1 12 4.465908 4.465908 1282 +laru 1 9 4.753590 4.753590 1560 +studentdepart 1 5 5.347108 5.347108 2505 +parallelprogram 1 5 5.347108 5.347108 2379 +usaadvisor 1 3 5.857933 5.857933 4017 +parallellanguag 1 3 5.857933 5.857933 4026 +guhan 1 2 6.263398 6.263398 6169 +viswanathan 1 2 6.263398 6.263398 6170 +amor 1 2 6.263398 6.263398 5486 +gviswana 1 1 6.957497 6.957497 18579 +parallelappl 1 1 6.957497 6.957497 18580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..99d0724e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +address 1 170 1.791759 1.791759 62 +like 1 132 1.945910 1.945910 81 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +manag 1 114 2.197225 2.197225 125 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +meet 1 72 2.639057 2.639057 229 +line 1 75 2.639057 2.639057 231 +would 1 67 2.708050 2.708050 251 +prof 1 64 2.772589 2.772589 273 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +electron 1 47 3.091042 3.091042 379 +mark 1 44 3.135494 3.135494 403 +jame 1 35 3.401197 3.401197 507 +india 1 32 3.465736 3.465736 550 +photo 1 31 3.496508 3.496508 561 +hill 1 25 3.737670 3.737670 670 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +sept 1 17 4.110874 4.110874 952 +ramakrishnan 1 16 4.174387 4.174387 972 +photograph 1 15 4.248495 4.248495 1056 +draw 1 14 4.317488 4.317488 1086 +warn 1 14 4.317488 4.317488 1068 +raghu 1 12 4.465908 4.465908 1212 +newspap 1 12 4.465908 4.465908 1280 +classmat 1 9 4.753590 4.753590 1516 +folk 1 9 4.753590 4.753590 1597 +counter 1 8 4.875197 4.875197 1765 +goodman 1 7 5.010635 5.010635 1891 +courtesi 1 7 5.010635 5.010635 1953 +famou 1 6 5.164786 5.164786 2185 +mirza 1 3 5.857933 5.857933 3989 +osmania 1 2 6.263398 6.263398 5573 +hyderabad 1 2 6.263398 6.263398 5570 +sastri 1 2 6.263398 6.263398 6171 +roommat 1 2 6.263398 6.263398 6157 +saeed 1 2 6.263398 6.263398 6172 +dust 1 2 6.263398 6.263398 5551 +harit 1 1 6.957497 6.957497 18581 +mvsr 1 1 6.957497 6.957497 18582 +murthi 1 1 6.957497 6.957497 18583 +zubber 1 1 6.957497 6.957497 18584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..cbb65458 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +first 1 140 1.945910 1.945910 71 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +java 1 70 2.708050 2.708050 248 +dept 1 64 2.772589 2.772589 291 +septemb 1 65 2.772589 2.772589 274 +colleg 1 61 2.833213 2.833213 300 +telephon 1 50 3.044522 3.044522 373 +seminar 1 38 3.295837 3.295837 470 +departmentunivers 1 24 3.761200 3.761200 711 +applet 1 20 3.951244 3.951244 827 +basketbal 1 12 4.465908 4.465908 1289 +edutelephon 1 10 4.653960 4.653960 1473 +engr 1 10 4.653960 4.653960 1427 +volleybal 1 9 4.753590 4.753590 1598 +softbal 1 9 4.753590 4.753590 1594 +rebecca 1 6 5.164786 5.164786 2174 +noland 1 5 5.347108 5.347108 2420 +assistantcomput 1 3 5.857933 5.857933 4027 +hasti 1 2 6.263398 6.263398 6173 +carleton 1 2 6.263398 6.263398 5381 +linkag 1 2 6.263398 6.263398 5139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..cacdeefd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +final 1 116 2.197225 2.197225 108 +manag 1 114 2.197225 2.197225 125 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +section 1 94 2.397895 2.397895 149 +imag 1 91 2.397895 2.397895 161 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +internet 1 83 2.484907 2.484907 186 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +new 1 64 2.772589 2.772589 262 +talk 1 53 2.944439 2.944439 336 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +get 1 46 3.091042 3.091042 380 +expect 1 37 3.332205 3.332205 484 +download 1 36 3.367296 3.367296 489 +semant 1 29 3.583519 3.583519 587 +hope 1 28 3.610918 3.610918 610 +retriev 1 27 3.637586 3.637586 621 +enjoi 1 26 3.688879 3.688879 660 +tell 1 21 3.912023 3.912023 777 +stand 1 18 4.060443 4.060443 891 +stop 1 17 4.110874 4.110874 942 +seek 1 17 4.110874 4.110874 954 +thought 1 17 4.110874 4.110874 945 +alan 1 13 4.382027 4.382027 1146 +philosophi 1 13 4.382027 4.382027 1167 +brother 1 13 4.382027 4.382027 1189 +neat 1 12 4.465908 4.465908 1263 +count 1 12 4.465908 4.465908 1239 +minor 1 12 4.465908 4.465908 1237 +linguist 1 9 4.753590 4.753590 1593 +bart 1 9 4.753590 4.753590 1559 +lane 1 8 4.875197 4.875197 1720 +rais 1 8 4.875197 4.875197 1711 +jack 1 8 4.875197 4.875197 1780 +reload 1 8 4.875197 4.875197 1682 +on 1 8 4.875197 4.875197 1628 +marri 1 7 5.010635 5.010635 1946 +accord 1 7 5.010635 5.010635 1826 +creation 1 6 5.164786 5.164786 2069 +handi 1 6 5.164786 5.164786 2111 +advic 1 5 5.347108 5.347108 2509 +formerli 1 5 5.347108 5.347108 2397 +truli 1 5 5.347108 5.347108 2476 +plant 1 5 5.347108 5.347108 2497 +ling 1 4 5.568345 5.568345 3045 +cyber 1 4 5.568345 5.568345 2909 +luck 1 3 5.857933 5.857933 3201 +audit 1 3 5.857933 5.857933 3391 +northeast 1 3 5.857933 5.857933 3922 +chad 1 2 6.263398 6.263398 4768 +biggest 1 2 6.263398 6.263398 4790 +arthur 1 2 6.263398 6.263398 5767 +deep 1 2 6.263398 6.263398 5528 +welcomethank 1 1 6.957497 6.957497 18586 +bestbet 1 1 6.957497 6.957497 18587 +onmai 1 1 6.957497 6.957497 18588 +nichol 1 1 6.957497 6.957497 18589 +discours 1 1 6.957497 6.957497 18590 +barwis 1 1 6.957497 6.957497 18591 +epigram 1 1 6.957497 6.957497 18592 +perli 1 1 6.957497 6.957497 18593 +laud 1 1 6.957497 6.957497 18594 +truman 1 1 6.957497 6.957497 18595 +missouri 1 1 6.957497 6.957497 18596 +poop 1 1 6.957497 6.957497 18597 +unabash 1 1 6.957497 6.957497 18598 +claud 1 1 6.957497 6.957497 18585 +psychot 1 1 6.957497 6.957497 18599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..e7489d1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +librari 1 87 2.484907 2.484907 181 +educ 1 86 2.484907 2.484907 191 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +august 1 66 2.708050 2.708050 257 +simul 1 66 2.708050 2.708050 255 +plan 1 65 2.772589 2.772589 272 +abstract 1 62 2.772589 2.772589 276 +laboratori 1 63 2.772589 2.772589 292 +juli 1 60 2.833213 2.833213 305 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +move 1 47 3.091042 3.091042 382 +autom 1 41 3.218876 3.218876 434 +multipl 1 39 3.258097 3.258097 453 +vita 1 38 3.295837 3.295837 473 +robot 1 36 3.367296 3.367296 497 +curriculum 1 33 3.433987 3.433987 535 +extend 1 32 3.465736 3.465736 539 +anim 1 31 3.496508 3.496508 557 +common 1 30 3.555348 3.555348 574 +travel 1 30 3.555348 3.555348 579 +experiment 1 26 3.688879 3.688879 645 +proc 1 26 3.688879 3.688879 649 +motion 1 24 3.761200 3.761200 699 +geometri 1 22 3.850148 3.850148 752 +rout 1 21 3.912023 3.912023 793 +unit 1 21 3.912023 3.912023 779 +basi 1 20 3.951244 3.951244 828 +geometr 1 19 4.007333 4.007333 852 +susan 1 15 4.248495 4.248495 1050 +configur 1 15 4.248495 4.248495 1012 +vladimir 1 11 4.553877 4.553877 1324 +arbitrari 1 11 4.553877 4.553877 1359 +cook 1 10 4.653960 4.653960 1464 +congress 1 9 4.753590 4.753590 1592 +curv 1 8 4.875197 4.875197 1656 +autonom 1 8 4.875197 4.875197 1749 +plane 1 6 5.164786 5.164786 2187 +bind 1 5 5.347108 5.347108 2250 +tiwari 1 5 5.347108 5.347108 2385 +lumelski 1 4 5.568345 5.568345 2837 +ti 1 4 5.568345 5.568345 3005 +underwat 1 4 5.568345 5.568345 2838 +shelf 1 4 5.568345 5.568345 2621 +canadian 1 3 5.857933 5.857933 3508 +planar 1 3 5.857933 5.857933 3647 +hert 1 2 6.263398 6.263398 4848 +deform 1 2 6.263398 6.263398 6065 +tether 1 2 6.263398 6.263398 4844 +terrain 1 2 6.263398 6.263398 6174 +epicuri 1 2 6.263398 6.263398 5105 +veggi 1 2 6.263398 6.263398 5426 +alogirthm 1 1 6.957497 6.957497 18600 +sanjai 1 1 6.957497 6.957497 18601 +reznik 1 1 6.957497 6.957497 18602 +samantha 1 1 6.957497 6.957497 18603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..0c5f16ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +dayton 1 119 2.079442 2.079442 104 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +visit 1 63 2.772589 2.772589 288 +experi 1 64 2.772589 2.772589 283 +finger 1 52 2.995732 2.995732 354 +maintain 1 51 2.995732 2.995732 342 +appoint 1 49 3.044522 3.044522 358 +departmentunivers 1 24 3.761200 3.761200 711 +serv 1 22 3.850148 3.850148 758 +countri 1 15 4.248495 4.248495 1059 +wife 1 13 4.382027 4.382027 1196 +sai 1 13 4.382027 4.382027 1175 +tue 1 11 4.553877 4.553877 1308 +edutelephon 1 10 4.653960 4.653960 1473 +counter 1 8 4.875197 4.875197 1765 +studentcomput 1 7 5.010635 5.010635 1963 +none 1 7 5.010635 5.010635 1811 +corp 1 6 5.164786 5.164786 2139 +peac 1 3 5.857933 5.857933 3769 +kirk 1 2 6.263398 6.263398 6175 +hogenson 1 1 6.957497 6.957497 18604 +myschedul 1 1 6.957497 6.957497 18605 +workout 1 1 6.957497 6.957497 18606 +tryto 1 1 6.957497 6.957497 18607 +ghana 1 1 6.957497 6.957497 18608 +usernam 1 1 6.957497 6.957497 18609 +pnhp 1 1 6.957497 6.957497 18610 +eilun 1 1 6.957497 6.957497 18611 +accessedtim 1 1 6.957497 6.957497 18612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..067eb489 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +linear 1 41 3.218876 3.218876 431 +road 1 11 4.553877 4.553877 1374 +jeffrei 1 9 4.753590 4.753590 1612 +wise 1 3 5.857933 5.857933 3631 +horn 1 2 6.263398 6.263398 6072 +swanton 1 1 6.957497 6.957497 18613 +familyemploymenteducationresearchgenealog 1 1 6.957497 6.957497 18614 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..9a084610 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +specif 1 106 2.197225 2.197225 106 +intern 1 108 2.197225 2.197225 128 +teach 1 108 2.197225 2.197225 112 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +symposium 1 72 2.639057 2.639057 238 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +point 1 58 2.890372 2.890372 319 +telephon 1 50 3.044522 3.044522 373 +understand 1 47 3.091042 3.091042 384 +combin 1 42 3.218876 3.218876 421 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +streetmadison 1 38 3.295837 3.295837 474 +represent 1 35 3.401197 3.401197 512 +exist 1 30 3.555348 3.555348 569 +produc 1 30 3.555348 3.555348 572 +graph 1 30 3.555348 3.555348 576 +semant 1 29 3.583519 3.583519 587 +depend 1 29 3.583519 3.583519 583 +propos 1 28 3.610918 3.610918 602 +accur 1 25 3.737670 3.737670 680 +departmentunivers 1 24 3.761200 3.761200 711 +flow 1 24 3.761200 3.761200 700 +identifi 1 22 3.850148 3.850148 760 +thoma 1 18 4.060443 4.060443 901 +record 1 18 4.060443 4.060443 890 +modif 1 17 4.110874 4.110874 913 +white 1 17 4.110874 4.110874 951 +fourth 1 16 4.174387 4.174387 999 +susan 1 15 4.248495 4.248495 1050 +piec 1 15 4.248495 4.248495 1020 +precis 1 15 4.248495 4.248495 1023 +demand 1 14 4.317488 4.317488 1073 +francisco 1 14 4.317488 4.317488 1095 +individu 1 13 4.382027 4.382027 1126 +joint 1 13 4.382027 4.382027 1130 +sigplan 1 13 4.382027 4.382027 1190 +franc 1 12 4.465908 4.465908 1276 +mainli 1 10 4.653960 4.653960 1432 +guarante 1 10 4.653960 4.653960 1391 +conferenceon 1 9 4.753590 4.753590 1595 +plain 1 9 4.753590 4.753590 1495 +secretari 1 8 4.875197 4.875197 1775 +shapiro 1 8 4.875197 4.875197 1686 +implementationof 1 7 5.010635 5.010635 1813 +necessarili 1 7 5.010635 5.010635 1899 +sixth 1 7 5.010635 5.010635 1917 +textual 1 6 5.164786 5.164786 1979 +pari 1 6 5.164786 5.164786 2158 +softwareengin 1 6 5.164786 5.164786 2162 +increment 1 6 5.164786 5.164786 2206 +horwitz 1 5 5.347108 5.347108 2411 +dataflow 1 5 5.347108 5.347108 2390 +summarymi 1 5 5.347108 5.347108 2580 +twenti 1 5 5.347108 5.347108 2540 +australia 1 5 5.347108 5.347108 2478 +usa 1 4 5.568345 5.568345 3080 +slice 1 4 5.568345 5.568345 2622 +theprogram 1 4 5.568345 5.568345 2686 +interprocedur 1 4 5.568345 5.568345 2771 +rep 1 4 5.568345 5.568345 3087 +insensit 1 4 5.568345 5.568345 2716 +sigsoft 1 4 5.568345 5.568345 3036 +melbourn 1 4 5.568345 5.568345 3035 +onprincipl 1 3 5.857933 5.857933 3701 +theoryand 1 3 5.857933 5.857933 3350 +denmark 1 3 5.857933 5.857933 3676 +reachabl 1 3 5.857933 5.857933 4001 +principlesof 1 3 5.857933 5.857933 3145 +twentieth 1 3 5.857933 5.857933 3760 +fourteenth 1 3 5.857933 5.857933 3615 +differenc 1 2 6.263398 6.263398 6177 +interproceduraldataflow 1 2 6.263398 6.263398 6178 +mooli 1 2 6.263398 6.263398 6179 +sagiv 1 2 6.263398 6.263398 6176 +aarhu 1 2 6.263398 6.263398 6180 +charleston 1 2 6.263398 6.263398 6181 +aprogram 1 2 6.263398 6.263398 4943 +languagedesign 1 2 6.263398 6.263398 6182 +horwitzsusan 1 1 6.957497 6.957497 18615 +horwitzprofessorcomput 1 1 6.957497 6.957497 18616 +environmentsprogram 1 1 6.957497 6.957497 18617 +mergingstat 1 1 6.957497 6.957497 18618 +programsinterprocedur 1 1 6.957497 6.957497 18619 +analysisresearch 1 1 6.957497 6.957497 18620 +affectedbi 1 1 6.957497 6.957497 18621 +betweentwo 1 1 6.957497 6.957497 18622 +retest 1 1 6.957497 6.957497 18623 +certainsemant 1 1 6.957497 6.957497 18624 +concentratedeith 1 1 6.957497 6.957497 18625 +newalgorithm 1 1 6.957497 6.957497 18626 +publicationsm 1 1 6.957497 6.957497 18627 +constantpropag 1 1 6.957497 6.957497 18628 +bate 1 1 6.957497 6.957497 18629 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..e7ec5273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +hummert 1 3 5.857933 5.857933 3416 +pagenam 1 2 6.263398 6.263398 6165 +sidnei 1 2 6.263398 6.263398 4587 +edua 1 2 6.263398 6.263398 5764 +pagesid 1 1 6.957497 6.957497 18630 +hummertoffic 1 1 6.957497 6.957497 18631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..d358b350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +dayton 1 119 2.079442 2.079442 104 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +particular 1 51 2.995732 2.995732 352 +math 1 44 3.135494 3.135494 402 +vision 1 41 3.218876 3.218876 430 +streetmadison 1 38 3.295837 3.295837 474 +feel 1 37 3.332205 3.332205 483 +robot 1 36 3.367296 3.367296 497 +actual 1 28 3.610918 3.610918 604 +departmentunivers 1 24 3.761200 3.761200 711 +ever 1 19 4.007333 4.007333 872 +alreadi 1 16 4.174387 4.174387 963 +speak 1 12 4.465908 4.465908 1283 +undergrad 1 9 4.753590 4.753590 1589 +disclaim 1 4 5.568345 5.568345 2847 +alien 1 3 5.857933 5.857933 3930 +slave 1 3 5.857933 5.857933 3959 +igor 1 2 6.263398 6.263398 6183 +ivanisev 1 2 6.263398 6.263398 6184 +newest 1 2 6.263398 6.263398 5518 +needless 1 2 6.263398 6.263398 4694 +drake 1 2 6.263398 6.263398 5668 +pageigorivanisev 1 1 6.957497 6.957497 18632 +generalgradu 1 1 6.957497 6.957497 18633 +departmentwa 1 1 6.957497 6.957497 18634 +departmentaddress 1 1 6.957497 6.957497 18635 +iigor 1 1 6.957497 6.957497 18636 +eduiigor 1 1 6.957497 6.957497 18637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..072bcbd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +pageireland 1 1 6.957497 6.957497 18638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..d86c175d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +thursdai 1 70 2.708050 2.708050 241 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +profil 1 30 3.555348 3.555348 581 +peterson 1 7 5.010635 5.010635 1850 +sharenow 1 3 5.857933 5.857933 3439 +jone 1 3 5.857933 5.857933 3703 +recreat 1 3 5.857933 5.857933 3990 +madisonmadison 1 1 6.957497 6.957497 18639 +sciencestelephon 1 1 6.957497 6.957497 18640 +pmsection 1 1 6.957497 6.957497 18641 +pmboth 1 1 6.957497 6.957497 18642 +sciencesc 1 1 6.957497 6.957497 18643 +announcementshandoutsmoth 1 1 6.957497 6.957497 18644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..05adcd13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +resum 1 79 2.564949 2.564949 217 +onlin 1 75 2.639057 2.639057 223 +receiv 1 66 2.708050 2.708050 244 +prof 1 64 2.772589 2.772589 273 +previou 1 62 2.772589 2.772589 290 +colleg 1 61 2.833213 2.833213 300 +eduoffic 1 33 3.433987 3.433987 531 +departmentunivers 1 24 3.761200 3.761200 711 +livni 1 15 4.248495 4.248495 1053 +english 1 15 4.248495 4.248495 1033 +miron 1 14 4.317488 4.317488 1110 +condor 1 5 5.347108 5.347108 2577 +webpag 1 4 5.568345 5.568345 2660 +assistantcomput 1 3 5.857933 5.857933 4027 +statisticsoffic 1 2 6.263398 6.263398 4810 +andnetwork 1 2 6.263398 6.263398 5751 +basnei 1 2 6.263398 6.263398 4804 +basneyjim 1 1 6.957497 6.957497 18645 +basneygradu 1 1 6.957497 6.957497 18646 +jbasnei 1 1 6.957497 6.957497 18647 +directionof 1 1 6.957497 6.957497 18648 +fromoberlin 1 1 6.957497 6.957497 18649 +oberlin 1 1 6.957497 6.957497 18650 +codefrom 1 1 6.957497 6.957497 18651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..f91d183b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +first 1 140 1.945910 1.945910 71 +like 1 132 1.945910 1.945910 81 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +stuff 1 87 2.484907 2.484907 171 +know 1 80 2.564949 2.564949 198 +plai 1 60 2.833213 2.833213 307 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +around 1 43 3.178054 3.178054 415 +error 1 40 3.258097 3.258097 449 +electr 1 38 3.295837 3.295837 461 +origin 1 38 3.295837 3.295837 472 +respons 1 37 3.332205 3.332205 476 +soon 1 36 3.367296 3.367296 494 +abl 1 30 3.555348 3.555348 566 +wai 1 25 3.737670 3.737670 662 +instal 1 22 3.850148 3.850148 754 +watch 1 21 3.912023 3.912023 789 +mostli 1 19 4.007333 4.007333 869 +record 1 18 4.060443 4.060443 890 +stand 1 18 4.060443 4.060443 891 +train 1 14 4.317488 4.317488 1066 +classic 1 14 4.317488 4.317488 1084 +believ 1 13 4.382027 4.382027 1187 +mainli 1 10 4.653960 4.653960 1432 +didn 1 9 4.753590 4.753590 1563 +guitar 1 8 4.875197 4.875197 1758 +hit 1 7 5.010635 5.010635 1965 +seen 1 6 5.164786 5.164786 2202 +golf 1 6 5.164786 5.164786 2178 +pool 1 6 5.164786 5.164786 2225 +yeah 1 6 5.164786 5.164786 2195 +backup 1 4 5.568345 5.568345 2645 +metal 1 4 5.568345 5.568345 3079 +hourli 1 3 5.857933 5.857933 3734 +thrash 1 3 5.857933 5.857933 3400 +evil 1 3 5.857933 5.857933 3943 +mackai 1 2 6.263398 6.263398 5762 +fulltim 1 2 6.263398 6.263398 5170 +sybas 1 2 6.263398 6.263398 4723 +ingr 1 2 6.263398 6.263398 4097 +ey 1 2 6.263398 6.263398 5068 +metallica 1 2 6.263398 6.263398 4991 +raquetbal 1 2 6.263398 6.263398 6012 +towel 1 2 6.263398 6.263398 4793 +jerel 1 1 6.957497 6.957497 18652 +pagejerel 1 1 6.957497 6.957497 18653 +specialti 1 1 6.957497 6.957497 18654 +violin 1 1 6.957497 6.957497 18655 +baroqu 1 1 6.957497 6.957497 18656 +shock 1 1 6.957497 6.957497 18657 +funni 1 1 6.957497 6.957497 18658 +abba 1 1 6.957497 6.957497 18659 +shoot 1 1 6.957497 6.957497 18660 +jerellast 1 1 6.957497 6.957497 18661 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..6ef1426e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +johan 1 2 6.263398 6.263398 4900 +larson 1 1 6.957497 6.957497 18662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..0c223e91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +number 1 130 2.079442 2.079442 97 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +memori 1 101 2.302585 2.302585 139 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +thing 1 84 2.484907 2.484907 189 +start 1 83 2.484907 2.484907 173 +stuff 1 87 2.484907 2.484907 171 +contain 1 81 2.484907 2.484907 174 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +date 1 51 2.995732 2.995732 344 +cool 1 49 3.044522 3.044522 374 +format 1 48 3.044522 3.044522 356 +friend 1 48 3.044522 3.044522 376 +archiv 1 49 3.044522 3.044522 364 +made 1 44 3.135494 3.135494 398 +directori 1 45 3.135494 3.135494 396 +realli 1 40 3.258097 3.258097 444 +societi 1 40 3.258097 3.258097 456 +join 1 39 3.258097 3.258097 457 +movi 1 40 3.258097 3.258097 459 +go 1 33 3.433987 3.433987 529 +anim 1 31 3.496508 3.496508 557 +someth 1 31 3.496508 3.496508 554 +exist 1 30 3.555348 3.555348 569 +though 1 27 3.637586 3.637586 622 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +enjoi 1 26 3.688879 3.688879 660 +rule 1 26 3.688879 3.688879 638 +todai 1 25 3.737670 3.737670 672 +frame 1 24 3.761200 3.761200 684 +seri 1 24 3.761200 3.761200 708 +head 1 23 3.806662 3.806662 732 +sequenc 1 23 3.806662 3.806662 734 +almost 1 22 3.850148 3.850148 742 +hierarchi 1 22 3.850148 3.850148 744 +half 1 21 3.912023 3.912023 776 +grad 1 20 3.951244 3.951244 837 +mpeg 1 20 3.951244 3.951244 831 +els 1 19 4.007333 4.007333 843 +club 1 15 4.248495 4.248495 1058 +happi 1 14 4.317488 4.317488 1079 +becam 1 14 4.317488 4.317488 1117 +trip 1 14 4.317488 4.317488 1113 +pretti 1 13 4.382027 4.382027 1191 +neat 1 12 4.465908 4.465908 1263 +realiti 1 12 4.465908 4.465908 1272 +death 1 10 4.653960 4.653960 1457 +float 1 9 4.753590 4.753590 1504 +explicit 1 9 4.753590 4.753590 1525 +said 1 9 4.753590 4.753590 1571 +sound 1 9 4.753590 4.753590 1605 +jack 1 8 4.875197 4.875197 1780 +relax 1 6 5.164786 5.164786 2120 +jpeg 1 6 5.164786 5.164786 2053 +apolog 1 6 5.164786 5.164786 2046 +benefit 1 6 5.164786 5.164786 2213 +girlfriend 1 5 5.347108 5.347108 2579 +default 1 5 5.347108 5.347108 2335 +frog 1 5 5.347108 5.347108 2479 +chaotic 1 5 5.347108 5.347108 2566 +semi 1 5 5.347108 5.347108 2510 +relief 1 4 5.568345 5.568345 2784 +afraid 1 4 5.568345 5.568345 3053 +paus 1 4 5.568345 5.568345 2965 +repeat 1 4 5.568345 5.568345 2798 +vital 1 4 5.568345 5.568345 2733 +bear 1 4 5.568345 5.568345 2651 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +exploratori 1 4 5.568345 5.568345 3073 +exclus 1 4 5.568345 5.568345 2947 +jherro 1 3 5.857933 5.857933 3427 +let 1 3 5.857933 5.857933 3790 +tortur 1 3 5.857933 5.857933 3634 +nicknam 1 3 5.857933 5.857933 3716 +lame 1 3 5.857933 5.857933 3717 +haiku 1 3 5.857933 5.857933 3811 +bout 1 3 5.857933 5.857933 3670 +apictur 1 2 6.263398 6.263398 5024 +kermit 1 2 6.263398 6.263398 4742 +intervent 1 2 6.263398 6.263398 6163 +shack 1 2 6.263398 6.263398 5369 +cano 1 2 6.263398 6.263398 5207 +censorship 1 1 6.957497 6.957497 18664 +disembody 1 1 6.957497 6.957497 18665 +millisecond 1 1 6.957497 6.957497 18666 +overriden 1 1 6.957497 6.957497 18667 +aquir 1 1 6.957497 6.957497 18668 +skellington 1 1 6.957497 6.957497 18669 +thath 1 1 6.957497 6.957497 18670 +roomat 1 1 6.957497 6.957497 18663 +forgotten 1 1 6.957497 6.957497 18671 +cult 1 1 6.957497 6.957497 18672 +hippothi 1 1 6.957497 6.957497 18673 +matriarch 1 1 6.957497 6.957497 18674 +yahooooooooooooo 1 1 6.957497 6.957497 18675 +bazillion 1 1 6.957497 6.957497 18676 +muppet 1 1 6.957497 6.957497 18677 +rachel 1 1 6.957497 6.957497 18678 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..d8095ab8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +hall 1 146 1.945910 1.945910 65 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +educ 1 86 2.484907 2.484907 191 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +ieee 1 86 2.484907 2.484907 190 +orient 1 80 2.564949 2.564949 205 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +prof 1 64 2.772589 2.772589 273 +evalu 1 64 2.772589 2.772589 266 +septemb 1 65 2.772589 2.772589 274 +juli 1 60 2.833213 2.833213 305 +index 1 56 2.890372 2.890372 309 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +pointer 1 48 3.044522 3.044522 368 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +execut 1 45 3.135494 3.135494 404 +submit 1 39 3.258097 3.258097 440 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +tree 1 36 3.367296 3.367296 492 +queri 1 33 3.433987 3.433987 524 +storag 1 31 3.496508 3.496508 553 +full 1 28 3.610918 3.610918 615 +constraint 1 26 3.688879 3.688879 636 +client 1 25 3.737670 3.737670 679 +store 1 24 3.761200 3.761200 693 +size 1 23 3.806662 3.806662 713 +tenni 1 20 3.951244 3.951244 838 +sigmod 1 19 4.007333 4.007333 877 +white 1 17 4.110874 4.110874 951 +hobbi 1 16 4.174387 4.174387 1009 +spatial 1 16 4.174387 4.174387 988 +ramakrishnan 1 16 4.174387 4.174387 972 +dbm 1 13 4.382027 4.382027 1136 +nasa 1 13 4.382027 4.382027 1188 +dewitt 1 12 4.465908 4.465908 1270 +impact 1 11 4.553877 4.553877 1334 +naughton 1 10 4.653960 4.653960 1450 +resid 1 10 4.653960 4.653960 1461 +conferenceon 1 9 4.753590 4.753590 1595 +volleybal 1 9 4.753590 4.753590 1598 +assistantdepart 1 8 4.875197 4.875197 1784 +paradis 1 8 4.875197 4.875197 1782 +mass 1 8 4.875197 4.875197 1732 +burger 1 7 5.010635 5.010635 1889 +geograph 1 6 5.164786 5.164786 2236 +patel 1 6 5.164786 5.164786 2154 +goldstein 1 6 5.164786 5.164786 2168 +tuft 1 5 5.347108 5.347108 2575 +houston 1 5 5.347108 5.347108 2460 +water 1 5 5.347108 5.347108 2535 +ramasami 1 4 5.568345 5.568345 3088 +batch 1 4 5.568345 5.568345 2700 +tape 1 4 5.568345 5.568345 2959 +satellit 1 4 5.568345 5.568345 3077 +chile 1 4 5.568345 5.568345 3082 +yong 1 4 5.568345 5.568345 2809 +raft 1 4 5.568345 5.568345 3060 +informationresearch 1 3 5.857933 5.857933 3675 +edueduc 1 3 5.857933 5.857933 4004 +tertiari 1 3 5.857933 5.857933 3193 +shorter 1 3 5.857933 5.857933 3998 +santiago 1 3 5.857933 5.857933 4013 +dewittresearch 1 2 6.263398 6.263398 6185 +kabra 1 2 6.263398 6.263398 6139 +shaft 1 2 6.263398 6.263398 6186 +eosdi 1 2 6.263398 6.263398 6124 +bing 1 1 6.957497 6.957497 18679 +jieb 1 1 6.957497 6.957497 18680 +systemsresearch 1 1 6.957497 6.957497 18681 +shorepublicationsbuild 1 1 6.957497 6.957497 18682 +scaleabl 1 1 6.957497 6.957497 18683 +implment 1 1 6.957497 6.957497 18684 +lueder 1 1 6.957497 6.957497 18685 +ellman 1 1 6.957497 6.957497 18686 +kupsch 1 1 6.957497 6.957497 18687 +prong 1 1 6.957497 6.957497 18688 +tile 1 1 6.957497 6.957497 18689 +goddard 1 1 6.957497 6.957497 18690 +reclam 1 1 6.957497 6.957497 18691 +reorgan 1 1 6.957497 6.957497 18692 +serverpersist 1 1 6.957497 6.957497 18693 +grouphobbi 1 1 6.957497 6.957497 18694 +volleyballweb 1 1 6.957497 6.957497 18695 +whitewat 1 1 6.957497 6.957497 18696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..ffb43bc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +virtual 1 62 2.772589 2.772589 285 +publish 1 57 2.890372 2.890372 326 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +join 1 39 3.258097 3.258097 457 +bookmark 1 26 3.688879 3.688879 639 +client 1 25 3.737670 3.737670 679 +accur 1 25 3.737670 3.737670 680 +miscellan 1 23 3.806662 3.806662 731 +sigmod 1 19 4.007333 4.007333 877 +partit 1 16 4.174387 4.174387 984 +spatial 1 16 4.174387 4.174387 988 +hybrid 1 15 4.248495 4.248495 1057 +sigmetr 1 13 4.382027 4.382027 1173 +dewitt 1 12 4.465908 4.465908 1270 +vldb 1 10 4.653960 4.653960 1470 +paradis 1 8 4.875197 4.875197 1782 +hash 1 8 4.875197 4.875197 1618 +tourist 1 8 4.875197 4.875197 1710 +merg 1 7 5.010635 5.010635 1862 +patel 1 6 5.164786 5.164786 2154 +inlin 1 4 5.568345 5.568345 2964 +skate 1 4 5.568345 5.568345 3046 +jignesh 1 1 6.957497 6.957497 18697 +madhuri 1 1 6.957497 6.957497 18698 +kashmir 1 1 6.957497 6.957497 18699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..b37bb094 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +click 1 142 1.945910 1.945910 78 +peopl 1 96 2.302585 2.302585 132 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +georg 1 16 4.174387 4.174387 994 +warn 1 14 4.317488 4.317488 1068 +pretti 1 13 4.382027 4.382027 1191 +varghes 1 3 5.857933 5.857933 3442 +lame 1 3 5.857933 5.857933 3717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..600bb2ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +real 1 93 2.397895 2.397895 144 +proceed 1 93 2.397895 2.397895 152 +west 1 83 2.484907 2.484907 192 +resourc 1 81 2.484907 2.484907 172 +ieee 1 86 2.484907 2.484907 190 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +street 1 63 2.772589 2.772589 293 +room 1 59 2.833213 2.833213 301 +share 1 59 2.833213 2.833213 304 +februari 1 54 2.944439 2.944439 328 +multipl 1 39 3.258097 3.258097 453 +mine 1 26 3.688879 3.688879 654 +task 1 25 3.737670 3.737670 678 +period 1 22 3.850148 3.850148 743 +alloc 1 20 3.951244 3.951244 821 +expand 1 17 4.110874 4.110874 928 +universityof 1 15 4.248495 4.248495 1061 +inproceed 1 8 4.875197 4.875197 1670 +sciencesdepart 1 6 5.164786 5.164786 2020 +height 1 4 5.568345 5.568345 2890 +johann 1 3 5.857933 5.857933 3758 +ofwisconsin 1 3 5.857933 5.857933 4002 +plaxton 1 3 5.857933 5.857933 3886 +proport 1 3 5.857933 5.857933 3293 +studentat 1 2 6.263398 6.263398 5877 +databasemanag 1 2 6.263398 6.263398 4089 +underprofessor 1 2 6.263398 6.263398 6045 +linkscontact 1 2 6.263398 6.263398 5708 +eagl 1 2 6.263398 6.263398 5731 +jeffai 1 2 6.263398 6.263398 4357 +baruah 1 2 6.263398 6.263398 5753 +technicalreport 1 2 6.263398 6.263398 5615 +gehrk 1 1 6.957497 6.957497 18700 +homepagejohann 1 1 6.957497 6.957497 18701 +gehrkewelcom 1 1 6.957497 6.957497 18702 +raghuramakrishnan 1 1 6.957497 6.957497 18703 +stoica 1 1 6.957497 6.957497 18704 +abdel 1 1 6.957497 6.957497 18705 +wahab 1 1 6.957497 6.957497 18706 +algorithmfor 1 1 6.957497 6.957497 18707 +anexpand 1 1 6.957497 6.957497 18708 +fastschedul 1 1 6.957497 6.957497 18709 +processingsymposium 1 1 6.957497 6.957497 18710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..fdd7ae19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +manag 1 114 2.197225 2.197225 125 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +thing 1 84 2.484907 2.484907 189 +good 1 77 2.564949 2.564949 200 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +industri 1 38 3.295837 3.295837 464 +often 1 31 3.496508 3.496508 551 +ask 1 28 3.610918 3.610918 597 +releas 1 28 3.610918 3.610918 616 +great 1 27 3.637586 3.637586 626 +mind 1 27 3.637586 3.637586 632 +latest 1 21 3.912023 3.912023 785 +dilbert 1 16 4.174387 4.174387 996 +pagewelcom 1 11 4.553877 4.553877 1344 +appl 1 11 4.553877 4.553877 1303 +spot 1 7 5.010635 5.010635 1894 +bodner 1 5 5.347108 5.347108 2401 +alma 1 3 5.857933 5.857933 3963 +herei 1 2 6.263398 6.263398 6187 +mater 1 2 6.263398 6.263398 5930 +nando 1 2 6.263398 6.263398 5458 +numb 1 2 6.263398 6.263398 5505 +soap 1 2 6.263398 6.263398 4511 +jonb 1 2 6.263398 6.263398 4771 +mound 1 2 6.263398 6.263398 4773 +powerbook 1 1 6.957497 6.957497 18711 +amass 1 1 6.957497 6.957497 18712 +catagori 1 1 6.957497 6.957497 18713 +needsth 1 1 6.957497 6.957497 18714 +coverageth 1 1 6.957497 6.957497 18715 +operaish 1 1 6.957497 6.957497 18716 +drivelziffnet 1 1 6.957497 6.957497 18717 +newsc 1 1 6.957497 6.957497 18718 +classworktodai 1 1 6.957497 6.957497 18719 +chucklejon 1 1 6.957497 6.957497 18720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..36efb0d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +sinc 1 90 2.397895 2.397895 159 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +refer 1 78 2.564949 2.564949 203 +degre 1 69 2.708050 2.708050 259 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +autom 1 41 3.218876 3.218876 434 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +beij 1 19 4.007333 4.007333 876 +stanford 1 17 4.110874 4.110874 955 +medic 1 17 4.110874 4.110874 958 +came 1 13 4.382027 4.382027 1197 +academi 1 8 4.875197 4.875197 1735 +signal 1 7 5.010635 5.010635 1910 +provinc 1 4 5.568345 5.568345 3009 +biomed 1 4 5.568345 5.568345 2905 +hometown 1 3 5.857933 5.857933 3817 +nanj 1 2 6.263398 6.263398 5728 +jiangsu 1 2 6.263398 6.263398 5586 +southeast 1 2 6.263398 6.263398 6188 +frombeij 1 1 6.957497 6.957497 18721 +capitol 1 1 6.957497 6.957497 18722 +specil 1 1 6.957497 6.957497 18723 +chinacurr 1 1 6.957497 6.957497 18724 +tele 1 1 6.957497 6.957497 18725 +stuffjava 1 1 6.957497 6.957497 18726 +placeshor 1 1 6.957497 6.957497 18727 +tutorialchina 1 1 6.957497 6.957497 18728 +affairchina 1 1 6.957497 6.957497 18729 +democracybeij 1 1 6.957497 6.957497 18730 +groupstanford 1 1 6.957497 6.957497 18731 +informaticsmit 1 1 6.957497 6.957497 18732 +processingjob 1 1 6.957497 6.957497 18733 +newsyou 1 1 6.957497 6.957497 18734 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..7eebe3ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +school 1 84 2.484907 2.484907 188 +info 1 85 2.484907 2.484907 176 +state 1 76 2.564949 2.564949 207 +view 1 70 2.708050 2.708050 254 +receiv 1 66 2.708050 2.708050 244 +window 1 68 2.708050 2.708050 242 +plan 1 65 2.772589 2.772589 272 +new 1 64 2.772589 2.772589 262 +appoint 1 49 3.044522 3.044522 358 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +join 1 39 3.258097 3.258097 457 +electr 1 38 3.295837 3.295837 461 +print 1 34 3.401197 3.401197 503 +team 1 27 3.637586 3.637586 625 +jeff 1 25 3.737670 3.737670 673 +background 1 25 3.737670 3.737670 664 +sport 1 25 3.737670 3.737670 683 +miscellan 1 23 3.806662 3.806662 731 +emphasi 1 22 3.850148 3.850148 755 +deal 1 22 3.850148 3.850148 736 +hous 1 21 3.912023 3.912023 801 +thur 1 19 4.007333 4.007333 847 +statu 1 18 4.060443 4.060443 885 +diego 1 16 4.174387 4.174387 992 +jose 1 16 4.174387 4.174387 976 +club 1 15 4.248495 4.248495 1058 +francisco 1 14 4.317488 4.317488 1095 +basketbal 1 12 4.465908 4.465908 1289 +song 1 11 4.553877 4.553877 1380 +tue 1 11 4.553877 4.553877 1308 +town 1 10 4.653960 4.653960 1458 +vista 1 10 4.653960 4.653960 1452 +theme 1 8 4.875197 4.875197 1707 +hockei 1 8 4.875197 4.875197 1760 +golden 1 7 5.010635 5.010635 1962 +footbal 1 7 5.010635 5.010635 1912 +tip 1 7 5.010635 5.010635 1863 +cupertino 1 2 6.263398 6.263398 4956 +columbia 1 2 6.263398 6.263398 5900 +shabel 1 1 6.957497 6.957497 18737 +pagech 1 1 6.957497 6.957497 18738 +wisconsinch 1 1 6.957497 6.957497 18739 +informationmajor 1 1 6.957497 6.957497 18740 +monta 1 1 6.957497 6.957497 18741 +warrior 1 1 6.957497 6.957497 18742 +mercuri 1 1 6.957497 6.957497 18735 +andnando 1 1 6.957497 6.957497 18736 +shark 1 1 6.957497 6.957497 18743 +oakland 1 1 6.957497 6.957497 18744 +newsmus 1 1 6.957497 6.957497 18745 +jshabel 1 1 6.957497 6.957497 18746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..50dc1936 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +need 1 98 2.302585 2.302585 135 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +search 1 95 2.397895 2.397895 155 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +environ 1 84 2.484907 2.484907 177 +master 1 76 2.564949 2.564949 216 +server 1 76 2.564949 2.564949 204 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +involv 1 71 2.639057 2.639057 227 +workshop 1 71 2.639057 2.639057 239 +integr 1 67 2.708050 2.708050 245 +august 1 66 2.708050 2.708050 257 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +improv 1 62 2.772589 2.772589 289 +organ 1 65 2.772589 2.772589 265 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +experi 1 64 2.772589 2.772589 283 +explor 1 58 2.890372 2.890372 324 +thesi 1 57 2.890372 2.890372 327 +overview 1 56 2.890372 2.890372 323 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +found 1 53 2.944439 2.944439 337 +advisor 1 51 2.995732 2.995732 355 +digit 1 52 2.995732 2.995732 348 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +visual 1 48 3.044522 3.044522 372 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +discuss 1 45 3.135494 3.135494 399 +third 1 43 3.178054 3.178054 412 +linear 1 41 3.218876 3.218876 431 +societi 1 40 3.258097 3.258097 456 +join 1 39 3.258097 3.258097 457 +submit 1 39 3.258097 3.258097 440 +industri 1 38 3.295837 3.295837 464 +concurr 1 34 3.401197 3.401197 501 +queri 1 33 3.433987 3.433987 524 +storag 1 31 3.496508 3.496508 553 +limit 1 29 3.583519 3.583519 585 +synchron 1 29 3.583519 3.583519 588 +measur 1 28 3.610918 3.610918 609 +arrai 1 27 3.637586 3.637586 627 +client 1 25 3.737670 3.737670 679 +jeff 1 25 3.737670 3.737670 673 +disk 1 22 3.850148 3.850148 747 +chen 1 21 3.912023 3.912023 791 +media 1 19 4.007333 4.007333 861 +sigmod 1 19 4.007333 4.007333 877 +miller 1 17 4.110874 4.110874 949 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +dbm 1 13 4.382027 4.382027 1136 +buffer 1 12 4.465908 4.465908 1211 +characterist 1 12 4.465908 4.465908 1257 +daniel 1 12 4.465908 4.465908 1233 +raghu 1 12 4.465908 4.465908 1212 +optic 1 12 4.465908 4.465908 1221 +volum 1 11 4.553877 4.553877 1347 +alpha 1 11 4.553877 4.553877 1348 +resid 1 10 4.653960 4.653960 1461 +unifi 1 8 4.875197 4.875197 1774 +dataset 1 7 5.010635 5.010635 1914 +refere 1 7 5.010635 5.010635 1895 +eduresearch 1 6 5.164786 5.164786 2205 +divers 1 6 5.164786 5.164786 2232 +spie 1 6 5.164786 5.164786 2119 +quantum 1 6 5.164786 5.164786 2214 +almaden 1 5 5.347108 5.347108 2511 +tape 1 4 5.568345 5.568345 2959 +metadata 1 4 5.568345 5.568345 2945 +karen 1 4 5.568345 5.568345 2796 +ford 1 4 5.568345 5.568345 2636 +theintern 1 4 5.568345 5.568345 2981 +filesystem 1 4 5.568345 5.568345 2587 +myllymaki 1 3 5.857933 5.857933 4022 +tertiari 1 3 5.857933 5.857933 3193 +alsoavail 1 3 5.857933 5.857933 3887 +wenger 1 3 5.857933 5.857933 4023 +schwarz 1 3 5.857933 5.857933 3986 +trishul 1 3 5.857933 5.857933 4016 +chilimbi 1 3 5.857933 5.857933 4015 +raid 1 3 5.857933 5.857933 4012 +jussi 1 2 6.263398 6.263398 6133 +andvisu 1 2 6.263398 6.263398 6189 +karavan 1 2 6.263398 6.263398 6190 +andtool 1 2 6.263398 6.263398 5126 +beyer 1 2 6.263398 6.263398 6103 +lawand 1 2 6.263398 6.263398 6191 +dataengin 1 2 6.263398 6.263398 6118 +helsinki 1 2 6.263398 6.263398 5702 +myllymakijussi 1 1 6.957497 6.957497 18752 +summaryi 1 1 6.957497 6.957497 18753 +onadvanc 1 1 6.957497 6.957497 18754 +mcurrent 1 1 6.957497 6.957497 18755 +storageto 1 1 6.957497 6.957497 18747 +deviseproject 1 1 6.957497 6.957497 18756 +mironlivni 1 1 6.957497 6.957497 18757 +joinsof 1 1 6.957497 6.957497 18758 +listbelow 1 1 6.957497 6.957497 18759 +andfunct 1 1 6.957497 6.957497 18760 +datavisu 1 1 6.957497 6.957497 18761 +managementissu 1 1 6.957497 6.957497 18762 +publicationseffici 1 1 6.957497 6.957497 18763 +andtap 1 1 6.957497 6.957497 18748 +programperform 1 1 6.957497 6.957497 18764 +bartonp 1 1 6.957497 6.957497 18765 +tertiarystorag 1 1 6.957497 6.957497 18766 +withmiron 1 1 6.957497 6.957497 18767 +acmsigmetr 1 1 6.957497 6.957497 18768 +publicationdevis 1 1 6.957497 6.957497 18769 +donjerkov 1 1 6.957497 6.957497 18770 +andmiron 1 1 6.957497 6.957497 18771 +publicationsdisk 1 1 6.957497 6.957497 18772 +tapeaccess 1 1 6.957497 6.957497 18773 +degreeproject 1 1 6.957497 6.957497 18774 +networkarchitectur 1 1 6.957497 6.957497 18775 +finnish 1 1 6.957497 6.957497 18776 +documentsimplement 1 1 6.957497 6.957497 18777 +treealgorithm 1 1 6.957497 6.957497 18778 +yoav 1 1 6.957497 6.957497 18749 +weiss 1 1 6.957497 6.957497 18750 +productsoverview 1 1 6.957497 6.957497 18779 +supplier 1 1 6.957497 6.957497 18780 +productssom 1 1 6.957497 6.957497 18781 +adaptec 1 1 6.957497 6.957497 18782 +scsi 1 1 6.957497 6.957497 18751 +workstationsandpcsandtechn 1 1 6.957497 6.957497 18783 +journaland 1 1 6.957497 6.957497 18784 +whitepap 1 1 6.957497 6.957497 18785 +researchandcyberjourn 1 1 6.957497 6.957497 18786 +tapeanddlt 1 1 6.957497 6.957497 18787 +faqandwhitepap 1 1 6.957497 6.957497 18788 +solarisandsparcstationsandtechn 1 1 6.957497 6.957497 18789 +faqandstorag 1 1 6.957497 6.957497 18790 +faqand 1 1 6.957497 6.957497 18791 +otherusenet 1 1 6.957497 6.957497 18792 +faqsmani 1 1 6.957497 6.957497 18793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..1d5696e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +other 1 24 3.761200 3.761200 697 +sorri 1 4 5.568345 5.568345 3059 +jyothi 1 3 5.857933 5.857933 3423 +jyothithi 1 1 6.957497 6.957497 18794 +dissappoint 1 1 6.957497 6.957497 18795 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..7ba3cd14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +tool 1 117 2.079442 2.079442 93 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +learn 1 86 2.484907 2.484907 170 +school 1 84 2.484907 2.484907 188 +west 1 83 2.484907 2.484907 192 +environ 1 84 2.484907 2.484907 177 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +servic 1 72 2.639057 2.639057 236 +free 1 73 2.639057 2.639057 224 +street 1 63 2.772589 2.772589 293 +undergradu 1 54 2.944439 2.944439 338 +cool 1 49 3.044522 3.044522 374 +life 1 50 3.044522 3.044522 375 +could 1 46 3.091042 3.091042 383 +autom 1 41 3.218876 3.218876 434 +alumni 1 21 3.912023 3.912023 807 +miss 1 19 4.007333 4.007333 866 +thoma 1 18 4.060443 4.060443 901 +women 1 16 4.174387 4.174387 1004 +anyth 1 16 4.174387 4.174387 998 +save 1 14 4.317488 4.317488 1099 +tune 1 12 4.465908 4.465908 1227 +safe 1 12 4.465908 4.465908 1274 +paradyn 1 9 4.753590 4.753590 1614 +tutor 1 9 4.753590 4.753590 1552 +port 1 8 4.875197 4.875197 1766 +pursu 1 7 5.010635 5.010635 1902 +constitut 1 6 5.164786 5.164786 2026 +salt 1 5 5.347108 5.347108 2413 +water 1 5 5.347108 5.347108 2535 +ship 1 5 5.347108 5.347108 2534 +sail 1 5 5.347108 5.347108 2571 +karen 1 4 5.568345 5.568345 2796 +frontier 1 3 5.857933 5.857933 3771 +counti 1 3 5.857933 5.857933 3682 +karavan 1 2 6.263398 6.263398 6190 +wic 1 2 6.263398 6.263398 4673 +dane 1 2 6.263398 6.263398 5534 +pagefor 1 2 6.263398 6.263398 6151 +lover 1 2 6.263398 6.263398 6192 +tear 1 2 6.263398 6.263398 5076 +karavaniceveryth 1 1 6.957497 6.957497 18796 +karavanicresearch 1 1 6.957497 6.957497 18797 +databasesask 1 1 6.957497 6.957497 18798 +studentstrio 1 1 6.957497 6.957497 18799 +safer 1 1 6.957497 6.957497 18800 +chocol 1 1 6.957497 6.957497 18801 +onlystuyves 1 1 6.957497 6.957497 18802 +associationstuyves 1 1 6.957497 6.957497 18803 +legisl 1 1 6.957497 6.957497 18804 +internetth 1 1 6.957497 6.957497 18805 +cure 1 1 6.957497 6.957497 18806 +sweat 1 1 6.957497 6.957497 18807 +isak 1 1 6.957497 6.957497 18808 +dinesen 1 1 6.957497 6.957497 18809 +admir 1 1 6.957497 6.957497 18810 +grace 1 1 6.957497 6.957497 18811 +hopper 1 1 6.957497 6.957497 18812 +pioneer 1 1 6.957497 6.957497 18813 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..144f1b19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +us 1 329 1.098612 1.098612 16 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +architectur 1 139 1.945910 1.945910 77 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +ieee 1 86 2.484907 2.484907 190 +wide 1 84 2.484907 2.484907 185 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +summari 1 73 2.639057 2.639057 237 +david 1 71 2.639057 2.639057 232 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +simul 1 66 2.708050 2.708050 255 +dept 1 64 2.772589 2.772589 291 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +extens 1 53 2.944439 2.944439 340 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +protocol 1 45 3.135494 3.135494 407 +editor 1 41 3.218876 3.218876 433 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +autom 1 41 3.218876 3.218876 434 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +jame 1 35 3.401197 3.401197 507 +option 1 30 3.555348 3.555348 575 +multiprocessor 1 28 3.610918 3.610918 605 +aspect 1 25 3.737670 3.737670 663 +supercomput 1 25 3.737670 3.737670 681 +scalabl 1 24 3.761200 3.761200 705 +methodolog 1 23 3.806662 3.806662 733 +synthesi 1 20 3.951244 3.951244 834 +north 1 19 4.007333 4.007333 873 +wind 1 18 4.060443 4.060443 908 +monitor 1 17 4.110874 4.110874 941 +hierarch 1 15 4.248495 4.248495 1018 +coher 1 14 4.317488 4.317488 1109 +introduc 1 13 4.382027 4.382027 1139 +prolog 1 13 4.382027 4.382027 1155 +tunnel 1 9 4.753590 4.753590 1615 +depth 1 8 4.875197 4.875197 1636 +upcom 1 8 4.875197 4.875197 1685 +goodman 1 7 5.010635 5.010635 1891 +dedic 1 7 5.010635 5.010635 1843 +greec 1 6 5.164786 5.164786 2208 +holland 1 5 5.347108 5.347108 2490 +publicationsresearch 1 4 5.568345 5.568345 2876 +galileo 1 4 5.568345 5.568345 3086 +microprogram 1 4 5.568345 5.568345 2604 +stefano 1 3 5.857933 5.857933 3372 +kaxira 1 3 5.857933 5.857933 3373 +stein 1 3 5.857933 5.857933 3646 +multiprocess 1 2 6.263398 6.263398 5142 +gjess 1 2 6.263398 6.263398 6156 +kiloprocessor 1 1 6.957497 6.957497 18814 +sciresearch 1 1 6.957497 6.957497 18818 +glow 1 1 6.957497 6.957497 18815 +collaborationwith 1 1 6.957497 6.957497 18819 +incolabor 1 1 6.957497 6.957497 18820 +goodmanto 1 1 6.957497 6.957497 18821 +kaxirasto 1 1 6.957497 6.957497 18822 +goodmannd 1 1 6.957497 6.957497 18823 +goodmanst 1 1 6.957497 6.957497 18824 +kaxirasunivers 1 1 6.957497 6.957497 18825 +stafylopati 1 1 6.957497 6.957497 18826 +papakonstantin 1 1 6.957497 6.957497 18816 +kaxirasinform 1 1 6.957497 6.957497 18827 +tsanaka 1 1 6.957497 6.957497 18817 +pekmestzi 1 1 6.957497 6.957497 18828 +kaxirasp 1 1 6.957497 6.957497 18829 +kaxirasmicroprocess 1 1 6.957497 6.957497 18830 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..355bb6a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,252 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +make 1 111 2.197225 2.197225 120 +structur 1 106 2.197225 2.197225 105 +look 1 107 2.197225 2.197225 115 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +person 1 111 2.197225 2.197225 117 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +level 1 87 2.484907 2.484907 180 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +java 1 70 2.708050 2.708050 248 +result 1 65 2.772589 2.772589 281 +locat 1 59 2.833213 2.833213 303 +publish 1 57 2.890372 2.890372 326 +variou 1 56 2.890372 2.890372 317 +index 1 56 2.890372 2.890372 309 +think 1 57 2.890372 2.890372 314 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +week 1 52 2.995732 2.995732 343 +date 1 51 2.995732 2.995732 344 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +visual 1 48 3.044522 3.044522 372 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +possibl 1 47 3.091042 3.091042 378 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +made 1 44 3.135494 3.135494 398 +directori 1 45 3.135494 3.135494 396 +better 1 45 3.135494 3.135494 401 +third 1 43 3.178054 3.178054 412 +long 1 43 3.178054 3.178054 413 +might 1 41 3.218876 3.218876 426 +littl 1 39 3.258097 3.258097 454 +realli 1 40 3.258097 3.258097 444 +probabl 1 40 3.258097 3.258097 455 +programm 1 39 3.258097 3.258097 445 +form 1 39 3.258097 3.258097 443 +small 1 39 3.258097 3.258097 447 +slide 1 38 3.295837 3.295837 467 +especi 1 36 3.367296 3.367296 496 +short 1 36 3.367296 3.367296 499 +soon 1 36 3.367296 3.367296 494 +print 1 34 3.401197 3.401197 503 +taught 1 33 3.433987 3.433987 526 +idea 1 32 3.465736 3.465736 545 +often 1 31 3.496508 3.496508 551 +someth 1 31 3.496508 3.496508 554 +quot 1 29 3.583519 3.583519 582 +pass 1 28 3.610918 3.610918 611 +becom 1 28 3.610918 3.610918 603 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +never 1 25 3.737670 3.737670 671 +hill 1 25 3.737670 3.737670 670 +reliabl 1 25 3.737670 3.737670 674 +notic 1 25 3.737670 3.737670 675 +wish 1 24 3.761200 3.761200 692 +consult 1 24 3.761200 3.761200 687 +displai 1 23 3.806662 3.806662 712 +proof 1 23 3.806662 3.806662 720 +varieti 1 22 3.850148 3.850148 740 +hierarchi 1 22 3.850148 3.850148 744 +properti 1 22 3.850148 3.850148 749 +leav 1 21 3.912023 3.912023 772 +love 1 21 3.912023 3.912023 804 +break 1 20 3.951244 3.951244 812 +anderson 1 19 4.007333 4.007333 860 +ever 1 19 4.007333 4.007333 872 +steven 1 17 4.110874 4.110874 953 +thought 1 17 4.110874 4.110874 945 +anyon 1 17 4.110874 4.110874 916 +anyth 1 16 4.174387 4.174387 998 +doesn 1 15 4.248495 4.248495 1055 +indic 1 15 4.248495 4.248495 1013 +piec 1 15 4.248495 4.248495 1020 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +rate 1 15 4.248495 4.248495 1037 +atth 1 15 4.248495 4.248495 1019 +psycholog 1 15 4.248495 4.248495 1054 +convent 1 14 4.317488 4.317488 1072 +attribut 1 14 4.317488 4.317488 1092 +everyth 1 13 4.382027 4.382027 1169 +care 1 13 4.382027 4.382027 1177 +front 1 13 4.382027 4.382027 1154 +unfortun 1 13 4.382027 4.382027 1170 +translat 1 13 4.382027 4.382027 1164 +prelim 1 12 4.465908 4.465908 1201 +skill 1 12 4.465908 4.465908 1205 +insid 1 12 4.465908 4.465908 1262 +uniqu 1 12 4.465908 4.465908 1228 +primit 1 11 4.553877 4.553877 1317 +perman 1 11 4.553877 4.553877 1372 +success 1 10 4.653960 4.653960 1390 +enter 1 10 4.653960 4.653960 1454 +subset 1 10 4.653960 4.653960 1425 +light 1 9 4.753590 4.753590 1533 +end 1 9 4.753590 4.753590 1567 +discov 1 9 4.753590 4.753590 1562 +clear 1 9 4.753590 4.753590 1488 +angel 1 8 4.875197 4.875197 1779 +unifi 1 8 4.875197 4.875197 1774 +perfect 1 7 5.010635 5.010635 1921 +tag 1 7 5.010635 5.010635 1821 +surpris 1 7 5.010635 5.010635 1828 +gave 1 7 5.010635 5.010635 1922 +intellectu 1 7 5.010635 5.010635 1847 +serial 1 7 5.010635 5.010635 1975 +adob 1 7 5.010635 5.010635 1873 +chanc 1 7 5.010635 5.010635 1960 +shot 1 7 5.010635 5.010635 1898 +strang 1 6 5.164786 5.164786 2064 +markup 1 6 5.164786 5.164786 2059 +meant 1 6 5.164786 5.164786 2055 +vari 1 6 5.164786 5.164786 2001 +impress 1 6 5.164786 5.164786 2096 +ifyou 1 6 5.164786 5.164786 1992 +creation 1 6 5.164786 5.164786 2069 +somewher 1 6 5.164786 5.164786 2176 +keeper 1 5 5.347108 5.347108 2569 +stupid 1 5 5.347108 5.347108 2489 +junior 1 5 5.347108 5.347108 2519 +explicitli 1 5 5.347108 5.347108 2308 +hate 1 5 5.347108 5.347108 2529 +bean 1 4 5.568345 5.568345 2968 +hell 1 4 5.568345 5.568345 2885 +suppos 1 4 5.568345 5.568345 3002 +cheap 1 4 5.568345 5.568345 2751 +heaven 1 3 5.857933 5.857933 3589 +lauri 1 3 5.857933 5.857933 3867 +wasn 1 3 5.857933 5.857933 3800 +aliv 1 3 5.857933 5.857933 3864 +argu 1 3 5.857933 5.857933 3698 +rsum 1 3 5.857933 5.857933 3939 +outof 1 3 5.857933 5.857933 3296 +aren 1 3 5.857933 5.857933 3512 +easier 1 3 5.857933 5.857933 3470 +ironi 1 2 6.263398 6.263398 5986 +sarcasm 1 2 6.263398 6.263398 5871 +offens 1 2 6.263398 6.263398 6168 +miracl 1 2 6.263398 6.263398 5710 +convinc 1 2 6.263398 6.263398 6019 +defunct 1 2 6.263398 6.263398 6162 +caltech 1 2 6.263398 6.263398 5223 +personnel 1 2 6.263398 6.263398 4381 +danger 1 2 6.263398 6.263398 5725 +informationag 1 2 6.263398 6.263398 5446 +bui 1 2 6.263398 6.263398 4486 +ofread 1 2 6.263398 6.263398 4417 +possibleto 1 2 6.263398 6.263398 4942 +foughtthei 1 1 6.957497 6.957497 18833 +bitmap 1 1 6.957497 6.957497 18834 +theblind 1 1 6.957497 6.957497 18835 +whateverbrows 1 1 6.957497 6.957497 18836 +literari 1 1 6.957497 6.957497 18837 +satir 1 1 6.957497 6.957497 18838 +butnoth 1 1 6.957497 6.957497 18839 +herein 1 1 6.957497 6.957497 18840 +areoffend 1 1 6.957497 6.957497 18841 +firsttwo 1 1 6.957497 6.957497 18842 +addup 1 1 6.957497 6.957497 18843 +fizzl 1 1 6.957497 6.957497 18844 +areobtain 1 1 6.957497 6.957497 18845 +creatingkiosk 1 1 6.957497 6.957497 18846 +thosewho 1 1 6.957497 6.957497 18847 +mybe 1 1 6.957497 6.957497 18848 +thoughtson 1 1 6.957497 6.957497 18849 +hedgehog 1 1 6.957497 6.957497 18831 +pager 1 1 6.957497 6.957497 18832 +wantto 1 1 6.957497 6.957497 18850 +todo 1 1 6.957497 6.957497 18851 +sporad 1 1 6.957497 6.957497 18852 +danenet 1 1 6.957497 6.957497 18853 +dilhr 1 1 6.957497 6.957497 18854 +jobnet 1 1 6.957497 6.957497 18855 +photonet 1 1 6.957497 6.957497 18856 +databaseus 1 1 6.957497 6.957497 18857 +freez 1 1 6.957497 6.957497 18858 +fought 1 1 6.957497 6.957497 18859 +sfuai 1 1 6.957497 6.957497 18860 +assigna 1 1 6.957497 6.957497 18861 +contextu 1 1 6.957497 6.957497 18862 +distil 1 1 6.957497 6.957497 18863 +rsuminto 1 1 6.957497 6.957497 18864 +pinch 1 1 6.957497 6.957497 18865 +certaintruth 1 1 6.957497 6.957497 18866 +eventuallypick 1 1 6.957497 6.957497 18867 +mull 1 1 6.957497 6.957497 18868 +accessibleto 1 1 6.957497 6.957497 18869 +tough 1 1 6.957497 6.957497 18870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..a1b7626a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +search 1 95 2.397895 2.397895 155 +music 1 42 3.218876 3.218876 436 +indian 1 22 3.850148 3.850148 769 +classic 1 14 4.317488 4.317488 1084 +gzip 1 6 5.164786 5.164786 2117 +steer 1 5 5.347108 5.347108 2328 +krishna 1 3 5.857933 5.857933 3495 +kunchithapadamkrishna 1 1 6.957497 6.957497 18871 +kunchithapadamgreet 1 1 6.957497 6.957497 18872 +miscellaneouspubl 1 1 6.957497 6.957497 18873 +toolsresum 1 1 6.957497 6.957497 18874 +bykk 1 1 6.957497 6.957497 18875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..35cc7f94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +sigmod 1 19 4.007333 4.007333 877 +dbm 1 13 4.382027 4.382027 1136 +dewitt 1 12 4.465908 4.465908 1270 +tuft 1 5 5.347108 5.347108 2575 +kristin 1 4 5.568345 5.568345 3089 +miscellani 1 3 5.857933 5.857933 3976 +pagekristin 1 1 6.957497 6.957497 18876 +eduadvisor 1 1 6.957497 6.957497 18877 +serveruw 1 1 6.957497 6.957497 18878 +groupacm 1 1 6.957497 6.957497 18879 +pageeo 1 1 6.957497 6.957497 18880 +officelast 1 1 6.957497 6.957497 18881 +tuftekristin 1 1 6.957497 6.957497 18882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..279d273a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +good 1 77 2.564949 2.564949 200 +import 1 65 2.772589 2.772589 282 +life 1 50 3.044522 3.044522 375 +electron 1 47 3.091042 3.091042 379 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +compani 1 41 3.218876 3.218876 423 +short 1 36 3.367296 3.367296 499 +whole 1 17 4.110874 4.110874 940 +hobbi 1 16 4.174387 4.174387 1009 +uniqu 1 12 4.465908 4.465908 1228 +opinion 1 8 4.875197 4.875197 1708 +pursu 1 7 5.010635 5.010635 1902 +entiti 1 3 5.857933 5.857933 3096 +krung 1 1 6.957497 6.957497 18883 +homepageupd 1 1 6.957497 6.957497 18884 +underconstructioni 1 1 6.957497 6.957497 18885 +serf 1 1 6.957497 6.957497 18886 +cometh 1 1 6.957497 6.957497 18887 +linkedth 1 1 6.957497 6.957497 18888 +sinapiromsaran 1 1 6.957497 6.957497 18889 +emailkrung 1 1 6.957497 6.957497 18890 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..a8b09bc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +structur 1 106 2.197225 2.197225 105 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +chang 1 82 2.484907 2.484907 163 +appear 1 78 2.564949 2.564949 210 +complet 1 77 2.564949 2.564949 208 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +reason 1 57 2.890372 2.890372 318 +space 1 57 2.890372 2.890372 310 +local 1 55 2.944439 2.944439 334 +extens 1 53 2.944439 2.944439 340 +right 1 48 3.044522 3.044522 363 +basic 1 50 3.044522 3.044522 360 +answer 1 45 3.135494 3.135494 391 +algebra 1 45 3.135494 3.135494 394 +math 1 44 3.135494 3.135494 402 +autom 1 41 3.218876 3.218876 434 +press 1 42 3.218876 3.218876 419 +review 1 42 3.218876 3.218876 425 +theoret 1 39 3.258097 3.258097 446 +close 1 38 3.295837 3.295837 465 +singl 1 34 3.401197 3.401197 510 +taught 1 33 3.433987 3.433987 526 +independ 1 32 3.465736 3.465736 548 +semant 1 29 3.583519 3.583519 587 +consid 1 29 3.583519 3.583519 590 +turn 1 29 3.583519 3.583519 586 +measur 1 28 3.610918 3.610918 609 +american 1 27 3.637586 3.637586 634 +comp 1 26 3.688879 3.688879 650 +theorem 1 21 3.912023 3.912023 786 +prove 1 19 4.007333 4.007333 848 +geometr 1 19 4.007333 4.007333 852 +failur 1 18 4.060443 4.060443 898 +stanford 1 17 4.110874 4.110874 955 +moor 1 17 4.110874 4.110874 936 +style 1 15 4.248495 4.248495 1036 +topolog 1 14 4.317488 4.317488 1089 +draft 1 14 4.317488 4.317488 1085 +prolog 1 13 4.382027 4.382027 1155 +resolut 1 13 4.382027 4.382027 1172 +weak 1 13 4.382027 4.382027 1159 +kenneth 1 12 4.465908 4.465908 1265 +deduct 1 12 4.465908 4.465908 1236 +typic 1 11 4.553877 4.553877 1360 +fix 1 11 4.553877 4.553877 1327 +loop 1 11 4.553877 4.553877 1310 +edutelephon 1 10 4.653960 4.653960 1473 +besid 1 8 4.875197 4.875197 1681 +compact 1 7 5.010635 5.010635 1907 +boyer 1 6 5.164786 5.164786 2013 +rough 1 6 5.164786 5.164786 2107 +infer 1 6 5.164786 5.164786 2040 +shortest 1 5 5.347108 5.347108 2424 +constant 1 5 5.347108 5.347108 2251 +kunen 1 3 5.857933 5.857933 3500 +axiomat 1 3 5.857933 5.857933 3288 +mathematica 1 3 5.857933 5.857933 3303 +preprint 1 3 5.857933 5.857933 3481 +monthli 1 3 5.857933 5.857933 3910 +negat 1 2 6.263398 6.263398 6073 +expon 1 2 6.263398 6.263398 5323 +mill 1 2 6.263398 6.263398 6193 +liter 1 2 6.263398 6.263398 4689 +law 1 2 6.263398 6.263398 4896 +professormath 1 1 6.957497 6.957497 18895 +resolutionto 1 1 6.957497 6.957497 18896 +likeprolog 1 1 6.957497 6.957497 18897 +prologus 1 1 6.957497 6.957497 18898 +incompat 1 1 6.957497 6.957497 18899 +betweenleast 1 1 6.957497 6.957497 18900 +backtrack 1 1 6.957497 6.957497 18901 +thissubject 1 1 6.957497 6.957497 18902 +usualaxiom 1 1 6.957497 6.957497 18903 +axiom 1 1 6.957497 6.957497 18892 +hart 1 1 6.957497 6.957497 18891 +ramsei 1 1 6.957497 6.957497 18904 +corson 1 1 6.957497 6.957497 18905 +fundamenta 1 1 6.957497 6.957497 18893 +moufang 1 1 6.957497 6.957497 18906 +quasigroup 1 1 6.957497 6.957497 18894 +conjugaci 1 1 6.957497 6.957497 18907 +moschovaki 1 1 6.957497 6.957497 18908 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..bc1d0960 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,246 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +perform 1 143 1.945910 1.945910 74 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +memori 1 101 2.302585 2.302585 139 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +help 1 83 2.484907 2.484907 175 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +summari 1 73 2.639057 2.639057 237 +write 1 72 2.639057 2.639057 222 +workshop 1 71 2.639057 2.639057 239 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +david 1 71 2.639057 2.639057 232 +java 1 70 2.708050 2.708050 248 +august 1 66 2.708050 2.708050 257 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +evalu 1 64 2.772589 2.772589 266 +improv 1 62 2.772589 2.772589 289 +juli 1 60 2.833213 2.833213 305 +colleg 1 61 2.833213 2.833213 300 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +automat 1 61 2.833213 2.833213 306 +instruct 1 53 2.944439 2.944439 332 +februari 1 54 2.944439 2.944439 328 +particular 1 51 2.995732 2.995732 352 +hardwar 1 51 2.995732 2.995732 350 +principl 1 48 3.044522 3.044522 357 +frequent 1 49 3.044522 3.044522 367 +california 1 46 3.091042 3.091042 388 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +mark 1 44 3.135494 3.135494 403 +better 1 45 3.135494 3.135494 401 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 1 35 3.401197 3.401197 507 +bibliographi 1 34 3.401197 3.401197 518 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +richard 1 31 3.496508 3.496508 559 +profil 1 30 3.555348 3.555348 581 +power 1 30 3.555348 3.555348 573 +depend 1 29 3.583519 3.583519 583 +focus 1 29 3.583519 3.583519 584 +pass 1 28 3.610918 3.610918 611 +multiprocessor 1 28 3.610918 3.610918 605 +symbol 1 27 3.637586 3.637586 620 +static 1 27 3.637586 3.637586 619 +berkelei 1 26 3.688879 3.688879 657 +revis 1 26 3.688879 3.688879 640 +trace 1 25 3.737670 3.737670 677 +hill 1 25 3.737670 3.737670 670 +supercomput 1 25 3.737670 3.737670 681 +spent 1 25 3.737670 3.737670 676 +scalabl 1 24 3.761200 3.761200 705 +lab 1 24 3.761200 3.761200 698 +flow 1 24 3.761200 3.761200 700 +demonstr 1 24 3.761200 3.761200 694 +cooper 1 22 3.850148 3.850148 757 +path 1 21 3.912023 3.912023 778 +programminglanguag 1 21 3.912023 3.912023 782 +annot 1 21 3.912023 3.912023 775 +department 1 20 3.951244 3.951244 839 +portabl 1 20 3.951244 3.951244 819 +fine 1 20 3.951244 3.951244 822 +exploit 1 20 3.951244 3.951244 836 +eric 1 19 4.007333 4.007333 870 +wind 1 18 4.060443 4.060443 908 +thoma 1 18 4.060443 4.060443 901 +steven 1 17 4.110874 4.110874 953 +asplo 1 17 4.110874 4.110874 948 +eduphon 1 15 4.248495 4.248495 1060 +micro 1 15 4.248495 4.248495 1031 +hybrid 1 15 4.248495 4.248495 1057 +coher 1 14 4.317488 4.317488 1109 +sigplan 1 13 4.382027 4.382027 1190 +brad 1 12 4.465908 4.465908 1264 +employ 1 12 4.465908 4.465908 1291 +wood 1 11 4.553877 4.553877 1355 +branch 1 11 4.553877 4.553877 1318 +grain 1 10 4.653960 4.653960 1448 +facilit 1 10 4.653960 4.653960 1412 +laru 1 9 4.753590 4.753590 1560 +tunnel 1 9 4.753590 4.753590 1615 +ball 1 9 4.753590 4.753590 1608 +wilson 1 9 4.753590 4.753590 1536 +routin 1 9 4.753590 4.753590 1549 +secretari 1 8 4.875197 4.875197 1775 +upcom 1 8 4.875197 4.875197 1685 +pldi 1 8 4.875197 4.875197 1704 +irregular 1 8 4.875197 4.875197 1768 +joel 1 8 4.875197 4.875197 1698 +insert 1 8 4.875197 4.875197 1687 +harvard 1 7 5.010635 5.010635 1926 +fifth 1 7 5.010635 5.010635 1931 +roger 1 7 5.010635 5.010635 1892 +sixth 1 7 5.010635 5.010635 1917 +sciencedepart 1 6 5.164786 5.164786 2172 +chandra 1 6 5.164786 5.164786 2091 +microarchitectur 1 6 5.164786 5.164786 2238 +ann 1 6 5.164786 5.164786 2065 +unpublish 1 6 5.164786 5.164786 2226 +bell 1 6 5.164786 5.164786 2224 +mukherje 1 5 5.347108 5.347108 2586 +tempest 1 5 5.347108 5.347108 2548 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +ioanni 1 5 5.347108 5.347108 2553 +forprogram 1 5 5.347108 5.347108 2361 +icpp 1 5 5.347108 5.347108 2382 +toc 1 5 5.347108 5.347108 2562 +summarymi 1 5 5.347108 5.347108 2580 +computerarchitectur 1 5 5.347108 5.347108 2290 +languagesand 1 4 5.568345 5.568345 3071 +gregori 1 4 5.568345 5.568345 2928 +satish 1 4 5.568345 5.568345 2833 +sharma 1 4 5.568345 5.568345 2752 +ppopp 1 4 5.568345 5.568345 2774 +substrat 1 4 5.568345 5.568345 2857 +compcon 1 4 5.568345 5.568345 2958 +alvin 1 4 5.568345 5.568345 3084 +schoina 1 4 5.568345 5.568345 3085 +markhil 1 4 5.568345 5.568345 2819 +manuscript 1 4 5.568345 5.568345 2750 +wart 1 4 5.568345 5.568345 2987 +talluri 1 4 5.568345 5.568345 2820 +oracl 1 4 5.568345 5.568345 2823 +andi 1 4 5.568345 5.568345 3081 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +wcsss 1 3 5.857933 5.857933 3956 +shubhendu 1 3 5.857933 5.857933 4028 +saltz 1 3 5.857933 5.857933 3385 +frequenc 1 3 5.857933 5.857933 3206 +trishul 1 3 5.857933 5.857933 4016 +chilimbi 1 3 5.857933 5.857933 4015 +madhusudhan 1 3 5.857933 5.857933 4021 +parallelmachin 1 3 5.857933 5.857933 3693 +moredetail 1 3 5.857933 5.857933 3854 +guhan 1 2 6.263398 6.263398 6169 +viswanathan 1 2 6.263398 6.263398 6170 +schnarr 1 2 6.263398 6.263398 6194 +shamik 1 2 6.263398 6.263398 6195 +cico 1 2 6.263398 6.263398 6120 +lorenz 1 2 6.263398 6.263398 4846 +thewisconsin 1 2 6.263398 6.263398 6196 +usalaru 1 1 6.957497 6.957497 18909 +structuresc 1 1 6.957497 6.957497 18910 +spim 1 1 6.957497 6.957497 18911 +wartsrec 1 1 6.957497 6.957497 18912 +paperseffici 1 1 6.957497 6.957497 18913 +teapot 1 1 6.957497 6.957497 18914 +andjam 1 1 6.957497 6.957497 18915 +annerog 1 1 6.957497 6.957497 18916 +practiceof 1 1 6.957497 6.957497 18917 +languagesdesign 1 1 6.957497 6.957497 18918 +youfeng 1 1 6.957497 6.957497 18919 +jameslaru 1 1 6.957497 6.957497 18920 +cachier 1 1 6.957497 6.957497 18921 +graduatesbrad 1 1 6.957497 6.957497 18922 +vassar 1 1 6.957497 6.957497 18923 +languagesfirst 1 1 6.957497 6.957497 18924 +huelsbergen 1 1 6.957497 6.957497 18925 +tball 1 1 6.957497 6.957497 18926 +havehelp 1 1 6.957497 6.957497 18927 +coherencepolici 1 1 6.957497 6.957497 18928 +programmersunderstand 1 1 6.957497 6.957497 18929 +hasidentifi 1 1 6.957497 6.957497 18930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..e0f9165c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +tuesdai 1 73 2.639057 2.639057 219 +eduoffic 1 33 3.433987 3.433987 531 +nick 1 13 4.382027 4.382027 1180 +leavi 1 3 5.857933 5.857933 3438 +pagenick 1 1 6.957497 6.957497 18931 +pageoffic 1 1 6.957497 6.957497 18932 +wednessdai 1 1 6.957497 6.957497 18933 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..d9736869 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +dayton 1 119 2.079442 2.079442 104 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +book 1 99 2.302585 2.302585 131 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +complet 1 77 2.564949 2.564949 208 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +messag 1 76 2.564949 2.564949 212 +html 1 75 2.639057 2.639057 235 +order 1 69 2.708050 2.708050 249 +would 1 67 2.708050 2.708050 251 +dept 1 64 2.772589 2.772589 291 +sever 1 56 2.890372 2.890372 322 +publish 1 57 2.890372 2.890372 326 +finger 1 52 2.995732 2.995732 354 +algebra 1 45 3.135494 3.135494 394 +keep 1 44 3.135494 3.135494 409 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +http 1 41 3.218876 3.218876 420 +origin 1 38 3.295837 3.295837 472 +committe 1 34 3.401197 3.401197 522 +statist 1 35 3.401197 3.401197 521 +univ 1 28 3.610918 3.610918 617 +intend 1 28 3.610918 3.610918 599 +mind 1 27 3.637586 3.637586 632 +other 1 24 3.761200 3.761200 697 +compress 1 23 3.806662 3.806662 719 +wind 1 18 4.060443 4.060443 908 +steven 1 17 4.110874 4.110874 953 +draft 1 14 4.317488 4.317488 1085 +individu 1 13 4.382027 4.382027 1126 +tunnel 1 9 4.753590 4.753590 1615 +isbn 1 7 5.010635 5.010635 1901 +forum 1 6 5.164786 5.164786 2027 +ongo 1 6 5.164786 5.164786 2215 +heavili 1 3 5.857933 5.857933 3572 +andit 1 3 5.857933 5.857933 3328 +thewisconsin 1 2 6.263398 6.263398 6196 +huss 1 1 6.957497 6.957497 18935 +lederman 1 1 6.957497 6.957497 18934 +iscov 1 1 6.957497 6.957497 18937 +prismproject 1 1 6.957497 6.957497 18938 +invol 1 1 6.957497 6.957497 18939 +mpistandard 1 1 6.957497 6.957497 18936 +ongoingwork 1 1 6.957497 6.957497 18940 +compressedtar 1 1 6.957497 6.957497 18941 +desper 1 1 6.957497 6.957497 18942 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..3fe6919e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +proceed 1 93 2.397895 2.397895 152 +second 1 81 2.484907 2.484907 166 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +april 1 77 2.564949 2.564949 196 +dynam 1 76 2.564949 2.564949 194 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +window 1 68 2.708050 2.708050 242 +august 1 66 2.708050 2.708050 257 +virtual 1 62 2.772589 2.772589 285 +sampl 1 53 2.944439 2.944439 339 +tabl 1 51 2.995732 2.995732 346 +telephon 1 50 3.044522 3.044522 373 +protocol 1 45 3.135494 3.135494 407 +fast 1 42 3.218876 3.218876 429 +combin 1 42 3.218876 3.218876 421 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +focus 1 29 3.583519 3.583519 584 +feedback 1 19 4.007333 4.007333 854 +speed 1 18 4.060443 4.060443 911 +rate 1 15 4.248495 4.248495 1037 +francisco 1 14 4.317488 4.317488 1095 +circuit 1 13 4.382027 4.382027 1131 +clock 1 11 4.553877 4.553877 1320 +loop 1 11 4.553877 4.553877 1310 +purdu 1 10 4.653960 4.653960 1466 +packet 1 10 4.653960 4.653960 1415 +lawrenc 1 7 5.010635 5.010635 1908 +conferenc 1 7 5.010635 5.010635 1857 +testb 1 5 5.347108 5.347108 2456 +mukherje 1 5 5.347108 5.347108 2586 +darpa 1 4 5.568345 5.568345 2944 +admiss 1 4 5.568345 5.568345 2704 +phenomena 1 4 5.568345 5.568345 2962 +landweb 1 3 5.857933 5.857933 3402 +congest 1 3 5.857933 5.857933 3993 +atmospher 1 3 5.857933 5.857933 3388 +infocom 1 3 5.857933 5.857933 3283 +baltimor 1 3 5.857933 5.857933 3809 +theieee 1 2 6.263398 6.263398 6043 +florenc 1 2 6.263398 6.263398 4950 +electronicmail 1 1 6.957497 6.957497 18944 +participatingin 1 1 6.957497 6.957497 18945 +gigabit 1 1 6.957497 6.957497 18946 +involvesth 1 1 6.957497 6.957497 18947 +atgigabit 1 1 6.957497 6.957497 18948 +onissu 1 1 6.957497 6.957497 18949 +visualizationof 1 1 6.957497 6.957497 18950 +establishmentmethod 1 1 6.957497 6.957497 18951 +olsen 1 1 6.957497 6.957497 18952 +witht 1 1 6.957497 6.957497 18953 +faber 1 1 6.957497 6.957497 18943 +sigcommconfer 1 1 6.957497 6.957497 18954 +coursesconnect 1 1 6.957497 6.957497 18955 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..c8c1eeab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +address 1 170 1.791759 1.791759 62 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +person 1 111 2.197225 2.197225 117 +section 1 94 2.397895 2.397895 149 +west 1 83 2.484907 2.484907 192 +servic 1 72 2.639057 2.639057 236 +intellig 1 72 2.639057 2.639057 225 +thursdai 1 70 2.708050 2.708050 241 +street 1 63 2.772589 2.772589 293 +wednesdai 1 64 2.772589 2.772589 261 +artifici 1 63 2.772589 2.772589 280 +variou 1 56 2.890372 2.890372 317 +appoint 1 49 3.044522 3.044522 358 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +respons 1 37 3.332205 3.332205 476 +comp 1 26 3.688879 3.688879 650 +women 1 16 4.174387 4.174387 1004 +cognit 1 16 4.174387 4.174387 986 +career 1 12 4.465908 4.465908 1287 +linguist 1 9 4.753590 4.753590 1593 +utah 1 9 4.753590 4.753590 1585 +lloyd 1 6 5.164786 5.164786 2103 +chemistri 1 5 5.347108 5.347108 2405 +shannon 1 1 6.957497 6.957497 18956 +xsoft 1 1 6.957497 6.957497 18957 +lexdemo 1 1 6.957497 6.957497 18958 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..f5759b1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +real 1 93 2.397895 2.397895 144 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +free 1 73 2.639057 2.639057 224 +meet 1 72 2.639057 2.639057 229 +name 1 72 2.639057 2.639057 220 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +java 1 70 2.708050 2.708050 248 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +organ 1 65 2.772589 2.772589 265 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +space 1 57 2.890372 2.890372 310 +advisor 1 51 2.995732 2.995732 355 +maintain 1 51 2.995732 2.995732 342 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +friend 1 48 3.044522 3.044522 376 +cool 1 49 3.044522 3.044522 374 +electron 1 47 3.091042 3.091042 379 +favorit 1 44 3.135494 3.135494 410 +realli 1 40 3.258097 3.258097 444 +submit 1 39 3.258097 3.258097 440 +seminar 1 38 3.295837 3.295837 470 +feel 1 37 3.332205 3.332205 483 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +someth 1 31 3.496508 3.496508 554 +option 1 30 3.555348 3.555348 575 +becom 1 28 3.610918 3.610918 603 +concern 1 25 3.737670 3.737670 666 +higher 1 24 3.761200 3.761200 690 +longer 1 20 3.951244 3.951244 816 +figur 1 18 4.060443 4.060443 903 +stock 1 16 4.174387 4.174387 1007 +women 1 16 4.174387 4.174387 1004 +todd 1 15 4.248495 4.248495 1051 +anywai 1 15 4.248495 4.248495 1047 +mayb 1 15 4.248495 4.248495 1014 +save 1 14 4.317488 4.317488 1099 +edui 1 13 4.382027 4.382027 1193 +coordin 1 13 4.382027 4.382027 1182 +wait 1 13 4.382027 4.382027 1168 +wife 1 13 4.382027 4.382027 1196 +entertain 1 12 4.465908 4.465908 1286 +basketbal 1 12 4.465908 4.465908 1289 +readi 1 12 4.465908 4.465908 1242 +food 1 12 4.465908 4.465908 1285 +market 1 11 4.553877 4.553877 1361 +fill 1 11 4.553877 4.553877 1349 +keyword 1 11 4.553877 4.553877 1356 +candid 1 9 4.753590 4.753590 1606 +jump 1 9 4.753590 4.753590 1603 +simpli 1 8 4.875197 4.875197 1626 +reload 1 8 4.875197 4.875197 1682 +appar 1 7 5.010635 5.010635 1958 +iowa 1 7 5.010635 5.010635 1971 +polit 1 6 5.164786 5.164786 2115 +legal 1 6 5.164786 5.164786 2094 +troubl 1 6 5.164786 5.164786 2002 +christoph 1 5 5.347108 5.347108 2512 +czar 1 5 5.347108 5.347108 2503 +tuft 1 5 5.347108 5.347108 2575 +amus 1 5 5.347108 5.347108 2366 +sing 1 5 5.347108 5.347108 2499 +gui 1 5 5.347108 5.347108 2573 +girlfriend 1 5 5.347108 5.347108 2579 +festiv 1 4 5.568345 5.568345 2952 +turnidg 1 4 5.568345 5.568345 2829 +superhighwai 1 4 5.568345 5.568345 2943 +chees 1 4 5.568345 5.568345 3090 +rival 1 3 5.857933 5.857933 3583 +tiger 1 3 5.857933 5.857933 3897 +wealth 1 3 5.857933 5.857933 3353 +traci 1 3 5.857933 5.857933 3984 +child 1 3 5.857933 5.857933 3542 +tast 1 3 5.857933 5.857933 3666 +kick 1 3 5.857933 5.857933 3962 +alien 1 3 5.857933 5.857933 3930 +laugh 1 3 5.857933 5.857933 3659 +defeat 1 2 6.263398 6.263398 5401 +kirk 1 2 6.263398 6.263398 6175 +killer 1 2 6.263398 6.263398 6159 +tragic 1 2 6.263398 6.263398 6114 +junki 1 2 6.263398 6.263398 5457 +outer 1 2 6.263398 6.263398 4464 +gross 1 2 6.263398 6.263398 5989 +nake 1 2 6.263398 6.263398 6197 +offspr 1 2 6.263398 6.263398 5699 +bogu 1 2 6.263398 6.263398 5471 +wierd 1 2 6.263398 6.263398 6093 +luka 1 1 6.957497 6.957497 18959 +pagechristoph 1 1 6.957497 6.957497 18962 +lukasrelev 1 1 6.957497 6.957497 18963 +mspl 1 1 6.957497 6.957497 18964 +workshipi 1 1 6.957497 6.957497 18965 +quest 1 1 6.957497 6.957497 18966 +sunivers 1 1 6.957497 6.957497 18967 +prisonerthi 1 1 6.957497 6.957497 18968 +quoteserv 1 1 6.957497 6.957497 18969 +fabul 1 1 6.957497 6.957497 18970 +pagebet 1 1 6.957497 6.957497 18971 +identitycaptain 1 1 6.957497 6.957497 18972 +lone 1 1 6.957497 6.957497 18960 +throughamaz 1 1 6.957497 6.957497 18973 +withtri 1 1 6.957497 6.957497 18974 +teri 1 1 6.957497 6.957497 18975 +incred 1 1 6.957497 6.957497 18976 +catthi 1 1 6.957497 6.957497 18977 +buttmunchextrem 1 1 6.957497 6.957497 18978 +dudemichael 1 1 6.957497 6.957497 18979 +nesmith 1 1 6.957497 6.957497 18980 +fanfoolmyth 1 1 6.957497 6.957497 18981 +figurewick 1 1 6.957497 6.957497 18982 +playervalu 1 1 6.957497 6.957497 18983 +studentment 1 1 6.957497 6.957497 18984 +defectivea 1 1 6.957497 6.957497 18985 +wkrp 1 1 6.957497 6.957497 18986 +cincinatti 1 1 6.957497 6.957497 18987 +figuregeek 1 1 6.957497 6.957497 18988 +tradesgonzo 1 1 6.957497 6.957497 18989 +admirernetscap 1 1 6.957497 6.957497 18990 +pornpersonifi 1 1 6.957497 6.957497 18991 +condom 1 1 6.957497 6.957497 18992 +stretch 1 1 6.957497 6.957497 18993 +blowflam 1 1 6.957497 6.957497 18994 +testicl 1 1 6.957497 6.957497 18995 +goodpoetri 1 1 6.957497 6.957497 18996 +guruhogwildthi 1 1 6.957497 6.957497 18997 +assman 1 1 6.957497 6.957497 18998 +manbig 1 1 6.957497 6.957497 18999 +dudeuh 1 1 6.957497 6.957497 19000 +ohprofession 1 1 6.957497 6.957497 19001 +muff 1 1 6.957497 6.957497 19002 +diverregress 1 1 6.957497 6.957497 19003 +lifeformherald 1 1 6.957497 6.957497 19004 +invas 1 1 6.957497 6.957497 19005 +forcechri 1 1 6.957497 6.957497 19006 +formsalienherpetophiletodd 1 1 6.957497 6.957497 19007 +hatth 1 1 6.957497 6.957497 19008 +mancreepi 1 1 6.957497 6.957497 19009 +headsmal 1 1 6.957497 6.957497 19010 +planetdr 1 1 6.957497 6.957497 19011 +companioneast 1 1 6.957497 6.957497 19012 +bunnycyberweenietcl 1 1 6.957497 6.957497 19013 +hellbeast 1 1 6.957497 6.957497 19014 +checkbox 1 1 6.957497 6.957497 18961 +drug 1 1 6.957497 6.957497 19015 +cosmo 1 1 6.957497 6.957497 19016 +irrit 1 1 6.957497 6.957497 19017 +scatolog 1 1 6.957497 6.957497 19018 +pervert 1 1 6.957497 6.957497 19019 +etymolog 1 1 6.957497 6.957497 19020 +phat 1 1 6.957497 6.957497 19021 +gnarli 1 1 6.957497 6.957497 19022 +cybermuffin 1 1 6.957497 6.957497 19023 +erotica 1 1 6.957497 6.957497 19024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..637af5c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +version 1 113 2.197225 2.197225 122 +assist 1 112 2.197225 2.197225 113 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +pictur 1 89 2.397895 2.397895 160 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +educ 1 86 2.484907 2.484907 191 +school 1 84 2.484907 2.484907 188 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +dept 1 64 2.772589 2.772589 291 +januari 1 62 2.772589 2.772589 264 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +suggest 1 53 2.944439 2.944439 331 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +compani 1 41 3.218876 3.218876 423 +china 1 37 3.332205 3.332205 487 +thank 1 23 3.806662 3.806662 721 +self 1 22 3.850148 3.850148 761 +alumni 1 21 3.912023 3.912023 807 +miller 1 17 4.110874 4.110874 949 +side 1 15 4.248495 4.248495 1022 +promot 1 12 4.465908 4.465908 1235 +surf 1 11 4.553877 4.553877 1301 +america 1 11 4.553877 4.553877 1370 +paradyn 1 9 4.753590 4.753590 1614 +charg 1 9 4.753590 4.753590 1582 +port 1 8 4.875197 4.875197 1766 +hack 1 7 5.010635 5.010635 1950 +iowa 1 7 5.010635 5.010635 1971 +onto 1 6 5.164786 5.164786 2089 +barton 1 5 5.347108 5.347108 2371 +girlfriend 1 5 5.347108 5.347108 2579 +ignor 1 5 5.347108 5.347108 2288 +ling 1 4 5.568345 5.568345 3045 +hpux 1 3 5.857933 5.857933 3780 +temporarili 1 3 5.857933 5.857933 3692 +sheboygan 1 2 6.263398 6.263398 6198 +shameless 1 2 6.263398 6.263398 6146 +wuhan 1 2 6.263398 6.263398 5589 +chinaand 1 2 6.263398 6.263398 5151 +officem 1 2 6.263398 6.263398 6092 +marcelo 1 2 6.263398 6.263398 6199 +infom 1 2 6.263398 6.263398 5425 +bother 1 2 6.263398 6.263398 6164 +zheng 1 1 6.957497 6.957497 19025 +lzheng 1 1 6.957497 6.957497 19026 +boss 1 1 6.957497 6.957497 19027 +prese 1 1 6.957497 6.957497 19028 +winsconsin 1 1 6.957497 6.957497 19029 +goncalv 1 1 6.957497 6.957497 19030 +hereif 1 1 6.957497 6.957497 19031 +sthe 1 1 6.957497 6.957497 19032 +schoolssend 1 1 6.957497 6.957497 19033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..47312cb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +like 1 132 1.945910 1.945910 81 +look 1 107 2.197225 2.197225 115 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +start 1 83 2.484907 2.484907 173 +know 1 80 2.564949 2.564949 198 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +plai 1 60 2.833213 2.833213 307 +advisor 1 51 2.995732 2.995732 355 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +photo 1 31 3.496508 3.496508 561 +turn 1 29 3.583519 3.583519 586 +team 1 27 3.637586 3.637586 625 +dai 1 22 3.850148 3.850148 753 +later 1 15 4.248495 4.248495 1043 +america 1 11 4.553877 4.553877 1370 +hello 1 10 4.653960 4.653960 1407 +sundai 1 10 4.653960 4.653960 1387 +said 1 9 4.753590 4.753590 1571 +round 1 8 4.875197 4.875197 1769 +monei 1 7 5.010635 5.010635 1934 +golf 1 6 5.164786 5.164786 2178 +leagu 1 4 5.568345 5.568345 3040 +passion 1 3 5.857933 5.857933 3633 +manuvir 1 1 6.957497 6.957497 19034 +pagemanuvir 1 1 6.957497 6.957497 19035 +dasnow 1 1 6.957497 6.957497 19036 +andwhat 1 1 6.957497 6.957497 19037 +feelfre 1 1 6.957497 6.957497 19038 +somethingsend 1 1 6.957497 6.957497 19039 +anact 1 1 6.957497 6.957497 19040 +manuvirwhat 1 1 6.957497 6.957497 19041 +thisto 1 1 6.957497 6.957497 19042 +theorigin 1 1 6.957497 6.957497 19043 +consin 1 1 6.957497 6.957497 19044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..1e1747cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,395 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +first 1 140 1.945910 1.945910 71 +click 1 142 1.945910 1.945910 78 +tool 1 117 2.079442 2.079442 93 +dayton 1 119 2.079442 2.079442 104 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +topic 1 114 2.197225 2.197225 110 +intern 1 108 2.197225 2.197225 128 +final 1 116 2.197225 2.197225 108 +specif 1 106 2.197225 2.197225 106 +assist 1 112 2.197225 2.197225 113 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +search 1 95 2.397895 2.397895 155 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +level 1 87 2.484907 2.484907 180 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +help 1 83 2.484907 2.484907 175 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +complet 1 77 2.564949 2.564949 208 +june 1 79 2.564949 2.564949 214 +decemb 1 80 2.564949 2.564949 215 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +name 1 72 2.639057 2.639057 220 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +effici 1 73 2.639057 2.639057 233 +solv 1 73 2.639057 2.639057 234 +java 1 70 2.708050 2.708050 248 +simul 1 66 2.708050 2.708050 255 +august 1 66 2.708050 2.708050 257 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +wednesdai 1 64 2.772589 2.772589 261 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +prof 1 64 2.772589 2.772589 273 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +virtual 1 62 2.772589 2.772589 285 +abstract 1 62 2.772589 2.772589 276 +content 1 59 2.833213 2.833213 302 +juli 1 60 2.833213 2.833213 305 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +space 1 57 2.890372 2.890372 310 +think 1 57 2.890372 2.890372 314 +index 1 56 2.890372 2.890372 309 +talk 1 53 2.944439 2.944439 336 +februari 1 54 2.944439 2.944439 328 +sampl 1 53 2.944439 2.944439 339 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +tabl 1 51 2.995732 2.995732 346 +investig 1 51 2.995732 2.995732 353 +give 1 50 3.044522 3.044522 359 +appoint 1 49 3.044522 3.044522 358 +frequent 1 49 3.044522 3.044522 367 +california 1 46 3.091042 3.091042 388 +effect 1 46 3.091042 3.091042 385 +mark 1 44 3.135494 3.135494 403 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +directori 1 45 3.135494 3.135494 396 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +cach 1 41 3.218876 3.218876 432 +programm 1 39 3.258097 3.258097 445 +transact 1 39 3.258097 3.258097 438 +streetmadison 1 38 3.295837 3.295837 474 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +sciencesunivers 1 37 3.332205 3.332205 486 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +formal 1 37 3.332205 3.332205 478 +multi 1 36 3.367296 3.367296 493 +bibliographi 1 34 3.401197 3.401197 518 +jame 1 35 3.401197 3.401197 507 +award 1 34 3.401197 3.401197 523 +toler 1 33 3.433987 3.433987 533 +john 1 33 3.433987 3.433987 532 +fault 1 32 3.465736 3.465736 547 +extend 1 32 3.465736 3.465736 539 +often 1 31 3.496508 3.496508 551 +richard 1 31 3.496508 3.496508 559 +option 1 30 3.555348 3.555348 575 +robert 1 30 3.555348 3.555348 567 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +pass 1 28 3.610918 3.610918 611 +cluster 1 28 3.610918 3.610918 612 +propos 1 28 3.610918 3.610918 602 +scale 1 28 3.610918 3.610918 613 +multiprocessor 1 28 3.610918 3.610918 605 +determin 1 27 3.637586 3.637586 630 +berkelei 1 26 3.688879 3.688879 657 +enabl 1 26 3.688879 3.688879 655 +revis 1 26 3.688879 3.688879 640 +experiment 1 26 3.688879 3.688879 645 +consist 1 26 3.688879 3.688879 651 +detect 1 26 3.688879 3.688879 646 +hill 1 25 3.737670 3.737670 670 +supercomput 1 25 3.737670 3.737670 681 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +higher 1 24 3.761200 3.761200 690 +proof 1 23 3.806662 3.806662 720 +size 1 23 3.806662 3.806662 713 +highli 1 23 3.806662 3.806662 725 +cooper 1 22 3.850148 3.850148 757 +sequenti 1 22 3.850148 3.850148 745 +hierarchi 1 22 3.850148 3.850148 744 +annot 1 21 3.912023 3.912023 775 +department 1 20 3.951244 3.951244 839 +fine 1 20 3.951244 3.951244 822 +smith 1 20 3.951244 3.951244 820 +scheme 1 20 3.951244 3.951244 818 +benchmark 1 19 4.007333 4.007333 859 +comparison 1 19 4.007333 4.007333 863 +miss 1 19 4.007333 4.007333 866 +definit 1 19 4.007333 4.007333 864 +wind 1 18 4.060443 4.060443 908 +partial 1 18 4.060443 4.060443 900 +less 1 18 4.060443 4.060443 892 +four 1 18 4.060443 4.060443 905 +seek 1 17 4.110874 4.110874 954 +steven 1 17 4.110874 4.110874 953 +asplo 1 17 4.110874 4.110874 948 +miller 1 17 4.110874 4.110874 949 +estim 1 17 4.110874 4.110874 930 +transfer 1 16 4.174387 4.174387 967 +young 1 16 4.174387 4.174387 991 +eduphon 1 15 4.248495 4.248495 1060 +massiv 1 15 4.248495 4.248495 1026 +hybrid 1 15 4.248495 4.248495 1057 +micro 1 15 4.248495 4.248495 1031 +manner 1 14 4.317488 4.317488 1074 +convent 1 14 4.317488 4.317488 1072 +coher 1 14 4.317488 4.317488 1109 +rank 1 14 4.317488 4.317488 1063 +suit 1 13 4.382027 4.382027 1129 +translat 1 13 4.382027 4.382027 1164 +alan 1 13 4.382027 4.382027 1146 +sigmetr 1 13 4.382027 4.382027 1173 +weak 1 13 4.382027 4.382027 1159 +target 1 12 4.465908 4.465908 1282 +buffer 1 12 4.465908 4.465908 1211 +gupta 1 12 4.465908 4.465908 1241 +mari 1 12 4.465908 4.465908 1266 +employ 1 12 4.465908 4.465908 1291 +michigan 1 11 4.553877 4.553877 1368 +wood 1 11 4.553877 4.553877 1355 +node 1 11 4.553877 4.553877 1326 +transpar 1 11 4.553877 4.553877 1325 +isca 1 11 4.553877 4.553877 1354 +rice 1 11 4.553877 4.553877 1336 +keyword 1 11 4.553877 4.553877 1356 +catalog 1 10 4.653960 4.653960 1431 +grain 1 10 4.653960 4.653960 1448 +princip 1 10 4.653960 4.653960 1397 +sosp 1 10 4.653960 4.653960 1416 +placement 1 10 4.653960 4.653960 1420 +stack 1 10 4.653960 4.653960 1389 +tunnel 1 9 4.753590 4.753590 1615 +patterson 1 9 4.753590 4.753590 1554 +sound 1 9 4.753590 4.753590 1605 +laru 1 9 4.753590 4.753590 1560 +frank 1 9 4.753590 4.753590 1568 +jeffrei 1 9 4.753590 4.753590 1612 +kong 1 9 4.753590 4.753590 1602 +vernon 1 9 4.753590 4.753590 1556 +spec 1 8 4.875197 4.875197 1640 +secretari 1 8 4.875197 4.875197 1775 +uniprocessor 1 8 4.875197 4.875197 1696 +quantit 1 8 4.875197 4.875197 1654 +presidenti 1 8 4.875197 4.875197 1737 +irregular 1 8 4.875197 4.875197 1768 +joel 1 8 4.875197 4.875197 1698 +unifi 1 8 4.875197 4.875197 1774 +lewi 1 8 4.875197 4.875197 1700 +roger 1 7 5.010635 5.010635 1892 +secondari 1 7 5.010635 5.010635 1884 +consensu 1 6 5.164786 5.164786 2080 +unpublish 1 6 5.164786 5.164786 2226 +ann 1 6 5.164786 5.164786 2065 +chandra 1 6 5.164786 5.164786 2091 +microsystem 1 6 5.164786 5.164786 2160 +engineeringat 1 5 5.347108 5.347108 2561 +advic 1 5 5.347108 5.347108 2509 +educurr 1 5 5.347108 5.347108 2504 +summarymi 1 5 5.347108 5.347108 2580 +middl 1 5 5.347108 5.347108 2372 +tempest 1 5 5.347108 5.347108 2548 +hypothet 1 5 5.347108 5.347108 2474 +mukherje 1 5 5.347108 5.347108 2586 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +optimist 1 5 5.347108 5.347108 2501 +chemic 1 5 5.347108 5.347108 2552 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +ioanni 1 5 5.347108 5.347108 2553 +forprogram 1 5 5.347108 5.347108 2361 +toc 1 5 5.347108 5.347108 2562 +andrea 1 5 5.347108 5.347108 2375 +hennessi 1 5 5.347108 5.347108 2289 +race 1 5 5.347108 5.347108 2417 +barton 1 5 5.347108 5.347108 2371 +markhil 1 4 5.568345 5.568345 2819 +wart 1 4 5.568345 5.568345 2987 +emphas 1 4 5.568345 5.568345 2672 +languagesand 1 4 5.568345 5.568345 3071 +align 1 4 5.568345 5.568345 2863 +manuscript 1 4 5.568345 5.568345 2750 +sabbat 1 4 5.568345 5.568345 2824 +talluri 1 4 5.568345 5.568345 2820 +sharma 1 4 5.568345 5.568345 2752 +ppopp 1 4 5.568345 5.568345 2774 +alvin 1 4 5.568345 5.568345 3084 +schoina 1 4 5.568345 5.568345 3085 +medium 1 4 5.568345 5.568345 2834 +implic 1 4 5.568345 5.568345 2696 +satish 1 4 5.568345 5.568345 2833 +anoop 1 4 5.568345 5.568345 2770 +ratio 1 4 5.568345 5.568345 2942 +crai 1 4 5.568345 5.568345 3012 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +programmingc 1 3 5.857933 5.857933 3232 +shubhendu 1 3 5.857933 5.857933 4028 +madhusudhan 1 3 5.857933 5.857933 4021 +saltz 1 3 5.857933 5.857933 3385 +surpass 1 3 5.857933 5.857933 3247 +superpag 1 3 5.857933 5.857933 3978 +megabyt 1 3 5.857933 5.857933 3732 +tradeoff 1 3 5.857933 5.857933 3387 +andelectr 1 2 6.263398 6.263398 6200 +wisconsint 1 2 6.263398 6.263398 6155 +teachingfal 1 2 6.263398 6.263398 5532 +ifal 1 2 6.263398 6.263398 4776 +architecturec 1 2 6.263398 6.263398 6127 +sustain 1 2 6.263398 6.263398 6201 +mywork 1 2 6.263398 6.263398 5800 +projectwith 1 2 6.263398 6.263398 4986 +uniformli 1 2 6.263398 6.263398 6202 +todevelop 1 2 6.263398 6.263398 5448 +aredevelop 1 2 6.263398 6.263398 4930 +similarto 1 2 6.263398 6.263398 6074 +aeronaut 1 2 6.263398 6.263398 5958 +anddavid 1 2 6.263398 6.263398 6126 +sashikanth 1 2 6.263398 6.263398 6122 +chandrasekaran 1 2 6.263398 6.263398 6121 +shamik 1 2 6.263398 6.263398 6195 +memorymultiprocessor 1 2 6.263398 6.263398 4529 +dionisio 1 2 6.263398 6.263398 6203 +pnevmatikato 1 2 6.263398 6.263398 6204 +adv 1 2 6.263398 6.263398 4540 +subbarao 1 2 6.263398 6.263398 6205 +shing 1 2 6.263398 6.263398 5146 +pagemark 1 1 6.957497 6.957497 19051 +andsummari 1 1 6.957497 6.957497 19052 +sampler 1 1 6.957497 6.957497 19048 +graduateslink 1 1 6.957497 6.957497 19053 +oralpresent 1 1 6.957497 6.957497 19054 +forcach 1 1 6.957497 6.957497 19055 +usamarkhil 1 1 6.957497 6.957497 19056 +icatalog 1 1 6.957497 6.957497 19057 +teachc 1 1 6.957497 6.957497 19058 +iieduc 1 1 6.957497 6.957497 19059 +evaluationresearch 1 1 6.957497 6.957497 19060 +multiprocessorsand 1 1 6.957497 6.957497 19061 +evaluationtechniqu 1 1 6.957497 6.957497 19062 +windtunnel 1 1 6.957497 6.957497 19063 +manystud 1 1 6.957497 6.957497 19064 +computerswil 1 1 6.957497 6.957497 19065 +levelparallel 1 1 6.957497 6.957497 19066 +inwhich 1 1 6.957497 6.957497 19067 +recentlypropos 1 1 6.957497 6.957497 19068 +aclust 1 1 6.957497 6.957497 19069 +toolsto 1 1 6.957497 6.957497 19070 +cull 1 1 6.957497 6.957497 19071 +designairplan 1 1 6.957497 6.957497 19072 +madhu 1 1 6.957497 6.957497 19049 +talluritarget 1 1 6.957497 6.957497 19073 +lookasid 1 1 6.957497 6.957497 19074 +subblock 1 1 6.957497 6.957497 19047 +tlb 1 1 6.957497 6.957497 19050 +superpagesand 1 1 6.957497 6.957497 19075 +asplosandsosppap 1 1 6.957497 6.957497 19076 +papersth 1 1 6.957497 6.957497 19077 +bidirect 1 1 6.957497 6.957497 19078 +pad 1 1 6.957497 6.957497 19079 +yousef 1 1 6.957497 6.957497 19080 +khalidi 1 1 6.957497 6.957497 19081 +microstructur 1 1 6.957497 6.957497 19082 +electrostat 1 1 6.957497 6.957497 19083 +traenkl 1 1 6.957497 6.957497 19084 +sangta 1 1 6.957497 6.957497 19085 +kessler 1 1 6.957497 6.957497 19046 +sarita 1 1 6.957497 6.957497 19045 +tpd 1 1 6.957497 6.957497 19086 +farid 1 1 6.957497 6.957497 19087 +pour 1 1 6.957497 6.957497 19088 +palacharla 1 1 6.957497 6.957497 19089 +kourosh 1 1 6.957497 6.957497 19090 +gharachorloo 1 1 6.957497 6.957497 19091 +netzer 1 1 6.957497 6.957497 19092 +vikram 1 1 6.957497 6.957497 19093 +kessleracm 1 1 6.957497 6.957497 19094 +graduatesmadhusudhan 1 1 6.957497 6.957497 19095 +updatedw 1 1 6.957497 6.957497 19096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..a1b0396a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +usaphon 1 9 4.753590 4.753590 1600 +assistantdepart 1 8 4.875197 4.875197 1784 +zaharioudaki 1 2 6.263398 6.263398 6119 +marko 1 1 6.957497 6.957497 19097 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..8b253cc8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +section 1 94 2.397895 2.397895 149 +michael 1 35 3.401197 3.401197 514 +birk 1 4 5.568345 5.568345 2791 +mbirk 1 3 5.857933 5.857933 3501 +alltraxx 1 1 6.957497 6.957497 19098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..51706755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +object 1 138 1.945910 1.945910 79 +hall 1 146 1.945910 1.945910 65 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +techniqu 1 99 2.302585 2.302585 138 +proceed 1 93 2.397895 2.397895 152 +ieee 1 86 2.484907 2.484907 190 +orient 1 80 2.564949 2.564949 205 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +simul 1 66 2.708050 2.708050 255 +march 1 61 2.833213 2.833213 295 +space 1 57 2.890372 2.890372 310 +pointer 1 48 3.044522 3.044522 368 +mark 1 44 3.135494 3.135494 403 +michael 1 35 3.401197 3.401197 514 +proc 1 26 3.688879 3.688879 649 +sigmod 1 19 4.007333 4.007333 877 +white 1 17 4.110874 4.110874 951 +dewitt 1 12 4.465908 4.465908 1270 +persist 1 11 4.553877 4.553877 1367 +franklin 1 10 4.653960 4.653960 1436 +naughton 1 10 4.653960 4.653960 1450 +carei 1 8 4.875197 4.875197 1781 +solomon 1 8 4.875197 4.875197 1716 +tsatalo 1 5 5.347108 5.347108 2581 +mcauliff 1 4 5.568345 5.568345 3083 +zwill 1 4 5.568345 5.568345 3076 +marvin 1 4 5.568345 5.568345 2806 +schuh 1 3 5.857933 5.857933 4014 +swizzl 1 3 5.857933 5.857933 3883 +shoringup 1 1 6.957497 6.957497 19099 +atrac 1 1 6.957497 6.957497 19100 +towardseffect 1 1 6.957497 6.957497 19101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..12dd0dbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +peopl 1 96 2.302585 2.302585 132 +school 1 84 2.484907 2.484907 188 +level 1 87 2.484907 2.484907 180 +appear 1 78 2.564949 2.564949 210 +know 1 80 2.564949 2.564949 198 +html 1 75 2.639057 2.639057 235 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +dept 1 64 2.772589 2.772589 291 +automat 1 61 2.833213 2.833213 306 +think 1 57 2.890372 2.890372 314 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +pointer 1 48 3.044522 3.044522 368 +cool 1 49 3.044522 3.044522 374 +principl 1 48 3.044522 3.044522 357 +term 1 43 3.178054 3.178054 411 +fast 1 42 3.218876 3.218876 429 +movi 1 40 3.258097 3.258097 459 +littl 1 39 3.258097 3.258097 454 +submit 1 39 3.258097 3.258097 440 +realli 1 40 3.258097 3.258097 444 +paul 1 38 3.295837 3.295837 471 +abl 1 30 3.555348 3.555348 566 +hope 1 28 3.610918 3.610918 610 +never 1 25 3.737670 3.737670 671 +accur 1 25 3.737670 3.737670 680 +flow 1 24 3.761200 3.761200 700 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +watch 1 21 3.912023 3.912023 789 +hous 1 21 3.912023 3.912023 801 +wrote 1 20 3.951244 3.951244 830 +mostli 1 19 4.007333 4.007333 869 +miss 1 19 4.007333 4.007333 866 +lot 1 18 4.060443 4.060443 889 +previous 1 17 4.110874 4.110874 923 +brown 1 16 4.174387 4.174387 977 +todd 1 15 4.248495 4.248495 1051 +doesn 1 15 4.248495 4.248495 1055 +susan 1 15 4.248495 4.248495 1050 +believ 1 13 4.382027 4.382027 1187 +recurs 1 13 4.382027 4.382027 1127 +step 1 13 4.382027 4.382027 1138 +emac 1 13 4.382027 4.382027 1143 +menu 1 13 4.382027 4.382027 1156 +jonathan 1 13 4.382027 4.382027 1174 +submiss 1 11 4.553877 4.553877 1298 +mode 1 9 4.753590 4.753590 1492 +marc 1 8 4.875197 4.875197 1680 +shapiro 1 8 4.875197 4.875197 1686 +analys 1 8 4.875197 4.875197 1666 +pldi 1 8 4.875197 4.875197 1704 +chan 1 7 5.010635 5.010635 1876 +elementari 1 7 5.010635 5.010635 1825 +interrupt 1 7 5.010635 5.010635 1793 +tag 1 7 5.010635 5.010635 1821 +lawrenc 1 7 5.010635 5.010635 1908 +recov 1 6 5.164786 5.164786 2235 +goldstein 1 6 5.164786 5.164786 2168 +elain 1 5 5.347108 5.347108 2496 +hyper 1 5 5.347108 5.347108 2435 +horwitz 1 5 5.347108 5.347108 2411 +fear 1 4 5.568345 5.568345 2911 +backward 1 4 5.568345 5.568345 2638 +popl 1 4 5.568345 5.568345 3068 +insensit 1 4 5.568345 5.568345 2716 +hoar 1 3 5.857933 5.857933 3875 +obsess 1 2 6.263398 6.263398 5924 +disappear 1 2 6.263398 6.263398 4748 +accid 1 2 6.263398 6.263398 5961 +softwarei 1 2 6.263398 6.263398 4960 +tautolog 1 1 6.957497 6.957497 19102 +fond 1 1 6.957497 6.957497 19103 +repuls 1 1 6.957497 6.957497 19104 +ponder 1 1 6.957497 6.957497 19105 +jacki 1 1 6.957497 6.957497 19106 +dimasi 1 1 6.957497 6.957497 19107 +twisti 1 1 6.957497 6.957497 19108 +amanda 1 1 6.957497 6.957497 19109 +peet 1 1 6.957497 6.957497 19110 +retreather 1 1 6.957497 6.957497 19111 +thepul 1 1 6.957497 6.957497 19112 +cobbl 1 1 6.957497 6.957497 19113 +nowinclud 1 1 6.957497 6.957497 19114 +shapiroand 1 1 6.957497 6.957497 19115 +marion 1 1 6.957497 6.957497 19116 +ferguson 1 1 6.957497 6.957497 19117 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..6f23a607 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +oper 1 180 1.609438 1.609438 34 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +introduct 1 126 2.079442 2.079442 87 +build 1 85 2.484907 2.484907 184 +august 1 66 2.708050 2.708050 257 +appoint 1 49 3.044522 3.044522 358 +mellencamp 1 2 6.263398 6.263398 4707 +mellen 1 2 6.263398 6.263398 4708 +pagerob 1 1 6.957497 6.957497 19118 +minimalist 1 1 6.957497 6.957497 19119 +taship 1 1 6.957497 6.957497 19120 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..e0903338 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +person 1 111 2.197225 2.197225 117 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +need 1 98 2.302585 2.302585 135 +section 1 94 2.397895 2.397895 149 +info 1 85 2.484907 2.484907 176 +west 1 83 2.484907 2.484907 192 +second 1 81 2.484907 2.484907 166 +want 1 79 2.564949 2.564949 199 +david 1 71 2.639057 2.639057 232 +plan 1 65 2.772589 2.772589 272 +back 1 60 2.833213 2.833213 297 +semest 1 58 2.890372 2.890372 312 +major 1 56 2.890372 2.890372 315 +direct 1 57 2.890372 2.890372 316 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +even 1 45 3.135494 3.135494 393 +map 1 39 3.258097 3.258097 452 +often 1 31 3.496508 3.496508 551 +great 1 27 3.637586 3.637586 626 +determin 1 27 3.637586 3.637586 630 +spent 1 25 3.737670 3.737670 676 +eric 1 19 4.007333 4.007333 870 +miss 1 19 4.007333 4.007333 866 +coupl 1 17 4.110874 4.110874 939 +brother 1 13 4.382027 4.382027 1189 +sister 1 9 4.753590 4.753590 1524 +undergrad 1 9 4.753590 4.753590 1589 +soccer 1 8 4.875197 4.875197 1752 +chanc 1 7 5.010635 5.010635 1960 +chess 1 5 5.347108 5.347108 2486 +rewrit 1 5 5.347108 5.347108 2367 +rep 1 4 5.568345 5.568345 3087 +michel 1 3 5.857933 5.857933 3791 +somedai 1 3 5.857933 5.857933 3919 +distract 1 3 5.857933 5.857933 3945 +melski 1 2 6.263398 6.263398 4780 +pagedavid 1 2 6.263398 6.263398 5114 +mill 1 2 6.263398 6.263398 6193 +awesom 1 2 6.263398 6.263398 6167 +russia 1 2 6.263398 6.263398 5756 +hasti 1 2 6.263398 6.263398 6173 +steal 1 2 6.263398 6.263398 5485 +melskicurr 1 1 6.957497 6.957497 19122 +statisticsmadison 1 1 6.957497 6.957497 19123 +permen 1 1 6.957497 6.957497 19124 +ivesmarshfield 1 1 6.957497 6.957497 19125 +kasei 1 1 6.957497 6.957497 19126 +myexact 1 1 6.957497 6.957497 19127 +russian 1 1 6.957497 6.957497 19121 +studiesher 1 1 6.957497 6.957497 19128 +semesterof 1 1 6.957497 6.957497 19129 +beenbik 1 1 6.957497 6.957497 19130 +numerousbook 1 1 6.957497 6.957497 19131 +tomapquest 1 1 6.957497 6.957497 19132 +alot 1 1 6.957497 6.957497 19133 +marshfield 1 1 6.957497 6.957497 19134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..b0fca461 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +techniqu 1 99 2.302585 2.302585 138 +section 1 94 2.397895 2.397895 149 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +ieee 1 86 2.484907 2.484907 190 +school 1 84 2.484907 2.484907 188 +resourc 1 81 2.484907 2.484907 172 +educ 1 86 2.484907 2.484907 191 +build 1 85 2.484907 2.484907 184 +optim 1 79 2.564949 2.564949 197 +know 1 80 2.564949 2.564949 198 +dynam 1 76 2.564949 2.564949 194 +tuesdai 1 73 2.639057 2.639057 219 +addit 1 74 2.639057 2.639057 228 +nation 1 74 2.639057 2.639057 240 +symposium 1 72 2.639057 2.639057 238 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +receiv 1 66 2.708050 2.708050 244 +laboratori 1 63 2.772589 2.772589 292 +evalu 1 64 2.772589 2.772589 266 +organ 1 65 2.772589 2.772589 265 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +best 1 59 2.833213 2.833213 299 +simpl 1 60 2.833213 2.833213 298 +summer 1 56 2.890372 2.890372 311 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +space 1 57 2.890372 2.890372 310 +explor 1 58 2.890372 2.890372 324 +found 1 53 2.944439 2.944439 337 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +profession 1 51 2.995732 2.995732 345 +standard 1 48 3.044522 3.044522 365 +right 1 48 3.044522 3.044522 363 +mark 1 44 3.135494 3.135494 403 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +show 1 43 3.178054 3.178054 417 +fast 1 42 3.218876 3.218876 429 +compani 1 41 3.218876 3.218876 423 +combin 1 42 3.218876 3.218876 421 +live 1 40 3.258097 3.258097 451 +streetmadison 1 38 3.295837 3.295837 474 +open 1 38 3.295837 3.295837 469 +multi 1 36 3.367296 3.367296 493 +game 1 36 3.367296 3.367296 498 +jame 1 35 3.401197 3.401197 507 +next 1 34 3.401197 3.401197 517 +eduoffic 1 33 3.433987 3.433987 531 +quot 1 29 3.583519 3.583519 582 +team 1 27 3.637586 3.637586 625 +challeng 1 26 3.688879 3.688879 653 +detect 1 26 3.688879 3.688879 646 +rule 1 26 3.688879 3.688879 638 +hill 1 25 3.737670 3.737670 670 +departmentunivers 1 24 3.761200 3.761200 711 +mobil 1 23 3.806662 3.806662 730 +honor 1 23 3.806662 3.806662 729 +head 1 23 3.806662 3.806662 732 +serv 1 22 3.850148 3.850148 758 +martin 1 21 3.912023 3.912023 794 +programminglanguag 1 21 3.912023 3.912023 782 +divis 1 21 3.912023 3.912023 803 +wrote 1 20 3.951244 3.951244 830 +wonder 1 20 3.951244 3.951244 815 +element 1 18 4.060443 4.060443 895 +medic 1 17 4.110874 4.110874 958 +ultim 1 17 4.110874 4.110874 943 +senior 1 14 4.317488 4.317488 1120 +train 1 14 4.317488 4.317488 1066 +charl 1 13 4.382027 4.382027 1149 +everyon 1 13 4.382027 4.382027 1148 +promot 1 12 4.465908 4.465908 1235 +land 1 12 4.465908 4.465908 1273 +basketbal 1 12 4.465908 4.465908 1289 +player 1 11 4.553877 4.553877 1371 +transmiss 1 9 4.753590 4.753590 1588 +discov 1 9 4.753590 4.753590 1562 +babylon 1 8 4.875197 4.875197 1731 +fischer 1 7 5.010635 5.010635 1893 +interestsi 1 7 5.010635 5.010635 1969 +paramet 1 7 5.010635 5.010635 1796 +dedic 1 7 5.010635 5.010635 1843 +ethic 1 7 5.010635 5.010635 1786 +footbal 1 7 5.010635 5.010635 1912 +trade 1 7 5.010635 5.010635 1815 +advis 1 6 5.164786 5.164786 2173 +reconstruct 1 6 5.164786 5.164786 2170 +determinist 1 6 5.164786 5.164786 2034 +pace 1 6 5.164786 5.164786 2011 +argonn 1 5 5.347108 5.347108 2461 +nuclear 1 5 5.347108 5.347108 2576 +minnesota 1 5 5.347108 5.347108 2469 +frisbe 1 5 5.347108 5.347108 2560 +publicationsresearch 1 4 5.568345 5.568345 2876 +chees 1 4 5.568345 5.568345 3090 +assistantcomput 1 3 5.857933 5.857933 4027 +usaemail 1 3 5.857933 5.857933 3722 +sit 1 3 5.857933 5.857933 3953 +fink 1 3 5.857933 5.857933 3425 +informationtechnolog 1 3 5.857933 5.857933 3836 +interchang 1 3 5.857933 5.857933 3893 +myfavorit 1 3 5.857933 5.857933 3852 +armi 1 3 5.857933 5.857933 3562 +milo 1 2 6.263398 6.263398 4781 +spectroscopi 1 2 6.263398 6.263398 6206 +meth 1 2 6.263398 6.263398 5872 +eventhough 1 2 6.263398 6.263398 6158 +conquer 1 2 6.263398 6.263398 5112 +combat 1 2 6.263398 6.263398 5473 +fight 1 2 6.263398 6.263398 5209 +monster 1 2 6.263398 6.263398 6207 +pagemilo 1 1 6.957497 6.957497 19143 +byappointmentba 1 1 6.957497 6.957497 19144 +gustavu 1 1 6.957497 6.957497 19135 +adolphu 1 1 6.957497 6.957497 19136 +larusteach 1 1 6.957497 6.957497 19145 +beinfluenc 1 1 6.957497 6.957497 19146 +humm 1 1 6.957497 6.957497 19138 +micklich 1 1 6.957497 6.957497 19139 +illicitsubst 1 1 6.957497 6.957497 19140 +neutron 1 1 6.957497 6.957497 19141 +yule 1 1 6.957497 6.957497 19147 +sagalovski 1 1 6.957497 6.957497 19148 +nucl 1 1 6.957497 6.957497 19149 +inst 1 1 6.957497 6.957497 19150 +hailperin 1 1 6.957497 6.957497 19142 +languageflex 1 1 6.957497 6.957497 19151 +anintern 1 1 6.957497 6.957497 19152 +toadvanc 1 1 6.957497 6.957497 19153 +fosteringth 1 1 6.957497 6.957497 19154 +highestprofession 1 1 6.957497 6.957497 19155 +bignfl 1 1 6.957497 6.957497 19156 +vike 1 1 6.957497 6.957497 19157 +colon 1 1 6.957497 6.957497 19158 +imho 1 1 6.957497 6.957497 19159 +atlanti 1 1 6.957497 6.957497 19137 +mythic 1 1 6.957497 6.957497 19160 +engaug 1 1 6.957497 6.957497 19161 +wizard 1 1 6.957497 6.957497 19162 +underworld 1 1 6.957497 6.957497 19163 +ofsocc 1 1 6.957497 6.957497 19164 +afrisbe 1 1 6.957497 6.957497 19165 +quarterback 1 1 6.957497 6.957497 19166 +ultimatein 1 1 6.957497 6.957497 19167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..b3ade814 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +center 1 88 2.397895 2.397895 158 +west 1 83 2.484907 2.484907 192 +resourc 1 81 2.484907 2.484907 172 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +optim 1 79 2.564949 2.564949 197 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +logic 1 71 2.639057 2.639057 230 +multimedia 1 68 2.708050 2.708050 258 +view 1 70 2.708050 2.708050 254 +complex 1 64 2.772589 2.772589 269 +dept 1 64 2.772589 2.772589 291 +abstract 1 62 2.772589 2.772589 276 +juli 1 60 2.833213 2.833213 305 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +pointer 1 48 3.044522 3.044522 368 +effect 1 46 3.091042 3.091042 385 +continu 1 39 3.258097 3.258097 448 +submit 1 39 3.258097 3.258097 440 +societi 1 40 3.258097 3.258097 456 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +feel 1 37 3.332205 3.332205 483 +multi 1 36 3.367296 3.367296 493 +survei 1 35 3.401197 3.401197 513 +michael 1 35 3.401197 3.401197 514 +queri 1 33 3.433987 3.433987 524 +enhanc 1 26 3.688879 3.688879 644 +sequenti 1 22 3.850148 3.850148 745 +sigmod 1 19 4.007333 4.007333 877 +media 1 19 4.007333 4.007333 861 +dimension 1 18 4.060443 4.060443 909 +stat 1 17 4.110874 4.110874 924 +canada 1 13 4.382027 4.382027 1158 +dbm 1 13 4.382027 4.382027 1136 +probabilist 1 11 4.553877 4.553877 1343 +vldb 1 10 4.653960 4.653960 1470 +candid 1 9 4.753590 4.753590 1606 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +watson 1 8 4.875197 4.875197 1691 +refere 1 7 5.010635 5.010635 1895 +montreal 1 7 5.010635 5.010635 1961 +usaoffic 1 6 5.164786 5.164786 2159 +silberschatz 1 6 5.164786 5.164786 1978 +peek 1 6 5.164786 5.164786 2169 +almaden 1 5 5.347108 5.347108 2511 +informat 1 3 5.857933 5.857933 3839 +mino 1 2 6.263398 6.263398 6208 +patra 1 2 6.263398 6.263398 5537 +garofalaki 1 2 6.263398 6.263398 6209 +ozden 1 2 6.263398 6.263398 5749 +reasearch 1 2 6.263398 6.263398 5538 +hellen 1 2 6.263398 6.263398 6210 +garofalakismino 1 1 6.957497 6.957497 19168 +eduphd 1 1 6.957497 6.957497 19169 +workresearch 1 1 6.957497 6.957497 19170 +theoryeduc 1 1 6.957497 6.957497 19171 +banu 1 1 6.957497 6.957497 19172 +ioannidismor 1 1 6.957497 6.957497 19173 +centerdr 1 1 6.957497 6.957497 19174 +bibliograpi 1 1 6.957497 6.957497 19175 +perpetu 1 1 6.957497 6.957497 19176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..448005a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +sciencesunivers 1 37 3.332205 3.332205 486 +paradyn 1 9 4.753590 4.753590 1614 +marcelo 1 2 6.263398 6.263398 6199 +sheboygan 1 2 6.263398 6.263398 6198 +gonalv 1 1 6.957497 6.957497 19177 +mjrg 1 1 6.957497 6.957497 19178 +addresswork 1 1 6.957497 6.957497 19179 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..c892288e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +imag 1 91 2.397895 2.397895 161 +west 1 83 2.484907 2.484907 192 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +brian 1 38 3.295837 3.295837 466 +streetmadison 1 38 3.295837 3.295837 474 +compress 1 23 3.806662 3.806662 719 +chuck 1 14 4.317488 4.317488 1108 +bandwidth 1 11 4.553877 4.553877 1365 +morgan 1 9 4.753590 4.753590 1484 +studentcomput 1 7 5.010635 5.010635 1963 +conferenc 1 7 5.010635 5.010635 1857 +morgangradu 1 1 6.957497 6.957497 19180 +dyerresearch 1 1 6.957497 6.957497 19181 +interestsvirtu 1 1 6.957497 6.957497 19182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..7234050d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +mani 1 92 2.397895 2.397895 150 +level 1 87 2.484907 2.484907 180 +chang 1 82 2.484907 2.484907 163 +want 1 79 2.564949 2.564949 199 +write 1 72 2.639057 2.639057 222 +meet 1 72 2.639057 2.639057 229 +nation 1 74 2.639057 2.639057 240 +free 1 73 2.639057 2.639057 224 +degre 1 69 2.708050 2.708050 259 +descript 1 64 2.772589 2.772589 271 +visit 1 63 2.772589 2.772589 288 +copi 1 63 2.772589 2.772589 284 +sampl 1 53 2.944439 2.944439 339 +processor 1 54 2.944439 2.944439 335 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +futur 1 41 3.218876 3.218876 427 +york 1 41 3.218876 3.218876 435 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +slide 1 38 3.295837 3.295837 467 +sciencesunivers 1 37 3.332205 3.332205 486 +download 1 36 3.367296 3.367296 489 +short 1 36 3.367296 3.367296 499 +obtain 1 33 3.433987 3.433987 534 +depend 1 29 3.583519 3.583519 583 +load 1 28 3.610918 3.610918 601 +bookmark 1 26 3.688879 3.688879 639 +compress 1 23 3.806662 3.806662 719 +instal 1 22 3.850148 3.850148 754 +leav 1 21 3.912023 3.912023 772 +vlsi 1 21 3.912023 3.912023 795 +sure 1 20 3.951244 3.951244 813 +excel 1 19 4.007333 4.007333 868 +transfer 1 16 4.174387 4.174387 967 +balanc 1 14 4.317488 4.317488 1112 +brother 1 13 4.382027 4.382027 1189 +wife 1 13 4.382027 4.382027 1196 +resid 1 10 4.653960 4.653960 1461 +poetri 1 9 4.753590 4.753590 1596 +herefor 1 9 4.753590 4.753590 1483 +multiscalar 1 8 4.875197 4.875197 1783 +dictionari 1 8 4.875197 4.875197 1642 +earn 1 7 5.010635 5.010635 1788 +pipelin 1 7 5.010635 5.010635 1830 +peek 1 6 5.164786 5.164786 2169 +greec 1 6 5.164786 5.164786 2208 +andrea 1 5 5.347108 5.347108 2375 +guri 1 5 5.347108 5.347108 2578 +hyper 1 5 5.347108 5.347108 2435 +kestrel 1 4 5.568345 5.568345 2990 +decoupl 1 4 5.568345 5.568345 2898 +mess 1 4 5.568345 5.568345 2886 +specul 1 3 5.857933 5.857933 3951 +uncompress 1 3 5.857933 5.857933 3177 +crete 1 3 5.857933 5.857933 3773 +greek 1 3 5.857933 5.857933 3595 +moshovo 1 2 6.263398 6.263398 6211 +madisonadvisor 1 2 6.263398 6.263398 6212 +instituteof 1 2 6.263398 6.263398 5507 +architecturethat 1 2 6.263398 6.263398 5876 +hellen 1 2 6.263398 6.263398 6210 +font 1 2 6.263398 6.263398 5845 +moshovosresearch 1 1 6.957497 6.957497 19183 +sohigroup 1 1 6.957497 6.957497 19184 +notese 1 1 6.957497 6.957497 19185 +aroundw 1 1 6.957497 6.957497 19186 +clickheremi 1 1 6.957497 6.957497 19187 +explot 1 1 6.957497 6.957497 19188 +thecour 1 1 6.957497 6.957497 19189 +theopportun 1 1 6.957497 6.957497 19190 +kateveni 1 1 6.957497 6.957497 19191 +viha 1 1 6.957497 6.957497 19192 +resouc 1 1 6.957497 6.957497 19193 +atwww 1 1 6.957497 6.957497 19194 +devil 1 1 6.957497 6.957497 19195 +fraud 1 1 6.957497 6.957497 19196 +centerusenet 1 1 6.957497 6.957497 19197 +afax 1 1 6.957497 6.957497 19198 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..a0000dbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +octob 1 89 2.397895 2.397895 156 +chang 1 82 2.484907 2.484907 163 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +main 1 67 2.708050 2.708050 256 +back 1 60 2.833213 2.833213 297 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +index 1 56 2.890372 2.890372 309 +friend 1 48 3.044522 3.044522 376 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +background 1 25 3.737670 3.737670 664 +navig 1 21 3.912023 3.912023 796 +brief 1 16 4.174387 4.174387 1001 +minor 1 12 4.465908 4.465908 1237 +black 1 10 4.653960 4.653960 1418 +prefer 1 9 4.753590 4.753590 1491 +contrast 1 8 4.875197 4.875197 1637 +older 1 5 5.347108 5.347108 2387 +toni 1 3 5.857933 5.857933 3415 +herear 1 2 6.263398 6.263398 5947 +pagewhat 1 1 6.957497 6.957497 19199 +newoctob 1 1 6.957497 6.957497 19200 +inmadison 1 1 6.957497 6.957497 19201 +informationlast 1 1 6.957497 6.957497 19202 +educopyright 1 1 6.957497 6.957497 19203 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..2ec202fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +machin 1 129 2.079442 2.079442 95 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +educ 1 86 2.484907 2.484907 191 +thing 1 84 2.484907 2.484907 189 +learn 1 86 2.484907 2.484907 170 +stuff 1 87 2.484907 2.484907 171 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +logic 1 71 2.639057 2.639057 230 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +line 1 75 2.639057 2.639057 231 +dept 1 64 2.772589 2.772589 291 +previou 1 62 2.772589 2.772589 290 +plai 1 60 2.833213 2.833213 307 +reason 1 57 2.890372 2.890372 318 +summer 1 56 2.890372 2.890372 311 +talk 1 53 2.944439 2.944439 336 +februari 1 54 2.944439 2.944439 328 +finger 1 52 2.995732 2.995732 354 +particular 1 51 2.995732 2.995732 352 +digit 1 52 2.995732 2.995732 348 +telephon 1 50 3.044522 3.044522 373 +life 1 50 3.044522 3.044522 375 +better 1 45 3.135494 3.135494 401 +even 1 45 3.135494 3.135494 393 +third 1 43 3.178054 3.178054 412 +around 1 43 3.178054 3.178054 415 +might 1 41 3.218876 3.218876 426 +examin 1 42 3.218876 3.218876 424 +realli 1 40 3.258097 3.258097 444 +probabl 1 40 3.258097 3.258097 455 +feel 1 37 3.332205 3.332205 483 +product 1 33 3.433987 3.433987 527 +often 1 31 3.496508 3.496508 551 +usual 1 28 3.610918 3.610918 608 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +enjoi 1 26 3.688879 3.688879 660 +notic 1 25 3.737670 3.737670 675 +departmentunivers 1 24 3.761200 3.761200 711 +sometim 1 24 3.761200 3.761200 696 +alwai 1 24 3.761200 3.761200 691 +wish 1 24 3.761200 3.761200 692 +head 1 23 3.806662 3.806662 732 +hierarchi 1 22 3.850148 3.850148 744 +martin 1 21 3.912023 3.912023 794 +exploit 1 20 3.951244 3.951244 836 +exercis 1 19 4.007333 4.007333 842 +concentr 1 18 4.060443 4.060443 906 +ultim 1 17 4.110874 4.110874 943 +doesn 1 15 4.248495 4.248495 1055 +qual 1 15 4.248495 4.248495 1062 +senior 1 14 4.317488 4.317488 1120 +role 1 14 4.317488 4.317488 1101 +affili 1 13 4.382027 4.382027 1194 +basketbal 1 12 4.465908 4.465908 1289 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +mention 1 9 4.753590 4.753590 1569 +qualifi 1 8 4.875197 4.875197 1721 +heart 1 8 4.875197 4.875197 1729 +besid 1 8 4.875197 4.875197 1681 +round 1 8 4.875197 4.875197 1769 +relax 1 6 5.164786 5.164786 2120 +squash 1 6 5.164786 5.164786 2223 +adjust 1 5 5.347108 5.347108 2422 +crucial 1 5 5.347108 5.347108 2384 +frisbe 1 5 5.347108 5.347108 2560 +afraid 1 4 5.568345 5.568345 3053 +gone 1 4 5.568345 5.568345 3072 +poorli 1 4 5.568345 5.568345 2781 +wesleyan 1 3 5.857933 5.857933 3988 +ream 1 2 6.263398 6.263398 4783 +coke 1 2 6.263398 6.263398 5935 +mream 1 2 6.263398 6.263398 4784 +terrain 1 2 6.263398 6.263398 6174 +logicprogram 1 2 6.263398 6.263398 4262 +interestsin 1 2 6.263398 6.263398 6213 +unif 1 2 6.263398 6.263398 5910 +mighti 1 2 6.263398 6.263398 4863 +tomi 1 2 6.263398 6.263398 5846 +mental 1 2 6.263398 6.263398 5802 +yeargradu 1 2 6.263398 6.263398 6015 +poobah 1 1 6.957497 6.957497 19204 +edufal 1 1 6.957497 6.957497 19205 +scheduleresearch 1 1 6.957497 6.957497 19206 +tin 1 1 6.957497 6.957497 19207 +orientedenviron 1 1 6.957497 6.957497 19208 +postscriptand 1 1 6.957497 6.957497 19209 +faint 1 1 6.957497 6.957497 19210 +alink 1 1 6.957497 6.957497 19211 +honorsthesi 1 1 6.957497 6.957497 19212 +poobahlook 1 1 6.957497 6.957497 19213 +dear 1 1 6.957497 6.957497 19214 +tosomeon 1 1 6.957497 6.957497 19215 +youshould 1 1 6.957497 6.957497 19216 +elton 1 1 6.957497 6.957497 19217 +imaginethat 1 1 6.957497 6.957497 19218 +aforement 1 1 6.957497 6.957497 19219 +poobahship 1 1 6.957497 6.957497 19220 +ill 1 1 6.957497 6.957497 19221 +afew 1 1 6.957497 6.957497 19222 +indatabas 1 1 6.957497 6.957497 19223 +inearli 1 1 6.957497 6.957497 19224 +andinfrequ 1 1 6.957497 6.957497 19225 +rapidlyrid 1 1 6.957497 6.957497 19226 +chilliest 1 1 6.957497 6.957497 19227 +helmet 1 1 6.957497 6.957497 19228 +mynot 1 1 6.957497 6.957497 19229 +ilik 1 1 6.957497 6.957497 19230 +librarylast 1 1 6.957497 6.957497 19231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..5b69882c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +avail 1 169 1.791759 1.791759 48 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +wide 1 84 2.484907 2.484907 185 +sourc 1 77 2.564949 2.564949 201 +talk 1 53 2.944439 2.944439 336 +local 1 55 2.944439 2.944439 334 +februari 1 54 2.944439 2.944439 328 +given 1 32 3.465736 3.465736 538 +fund 1 21 3.912023 3.912023 805 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 1 6 5.164786 5.164786 2237 +departmentat 1 5 5.347108 5.347108 2513 +guri 1 5 5.347108 5.347108 2578 +contributor 1 2 6.263398 6.263398 6214 +pagewisconsin 1 1 6.957497 6.957497 19232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..1322d895 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +pictur 1 89 2.397895 2.397895 160 +build 1 85 2.484907 2.484907 184 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +april 1 77 2.564949 2.564949 196 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +intellig 1 72 2.639057 2.639057 225 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +test 1 66 2.708050 2.708050 252 +artifici 1 63 2.772589 2.772589 280 +back 1 60 2.833213 2.833213 297 +semest 1 58 2.890372 2.890372 312 +undergradu 1 54 2.944439 2.944439 338 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +frequent 1 49 3.044522 3.044522 367 +still 1 50 3.044522 3.044522 362 +algebra 1 45 3.135494 3.135494 394 +favorit 1 44 3.135494 3.135494 410 +around 1 43 3.178054 3.178054 415 +vision 1 41 3.218876 3.218876 430 +hand 1 37 3.332205 3.332205 475 +michael 1 35 3.401197 3.401197 514 +eduoffic 1 33 3.433987 3.433987 531 +photo 1 31 3.496508 3.496508 561 +univ 1 28 3.610918 3.610918 617 +hope 1 28 3.610918 3.610918 610 +ask 1 28 3.610918 3.610918 597 +administr 1 27 3.637586 3.637586 628 +comp 1 26 3.688879 3.688879 650 +sport 1 25 3.737670 3.737670 683 +mike 1 24 3.761200 3.761200 703 +sometim 1 24 3.761200 3.761200 696 +theunivers 1 21 3.912023 3.912023 797 +lower 1 18 4.060443 4.060443 886 +stat 1 17 4.110874 4.110874 924 +bachelor 1 17 4.110874 4.110874 957 +chuck 1 14 4.317488 4.317488 1108 +dave 1 14 4.317488 4.317488 1098 +sai 1 13 4.382027 4.382027 1175 +suit 1 13 4.382027 4.382027 1129 +land 1 12 4.465908 4.465908 1273 +mari 1 12 4.465908 4.465908 1266 +touch 1 12 4.465908 4.465908 1288 +host 1 11 4.553877 4.553877 1306 +folk 1 9 4.753590 4.753590 1597 +joel 1 8 4.875197 4.875197 1698 +corner 1 7 5.010635 5.010635 1909 +usenet 1 7 5.010635 5.010635 1839 +maryland 1 6 5.164786 5.164786 2140 +gzip 1 6 5.164786 5.164786 2117 +billi 1 5 5.347108 5.347108 2404 +madisoncomput 1 5 5.347108 5.347108 2391 +steel 1 4 5.568345 5.568345 2818 +chees 1 4 5.568345 5.568345 3090 +kill 1 4 5.568345 5.568345 3000 +sit 1 3 5.857933 5.857933 3953 +stamp 1 3 5.857933 5.857933 3678 +artificialintellig 1 3 5.857933 5.857933 3608 +narrow 1 3 5.857933 5.857933 3807 +predat 1 3 5.857933 5.857933 3135 +forgot 1 2 6.263398 6.263398 4769 +linksmi 1 2 6.263398 6.263398 6215 +barri 1 2 6.263398 6.263398 5149 +eduunivers 1 2 6.263398 6.263398 6216 +homepagemik 1 1 6.957497 6.957497 19233 +homepagemsteel 1 1 6.957497 6.957497 19234 +struggl 1 1 6.957497 6.957497 19235 +sometimearound 1 1 6.957497 6.957497 19236 +motto 1 1 6.957497 6.957497 19237 +freezein 1 1 6.957497 6.957497 19238 +graduateinstructor 1 1 6.957497 6.957497 19239 +scomput 1 1 6.957497 6.957497 19240 +publicationsgrindston 1 1 6.957497 6.957497 19241 +jefferyk 1 1 6.957497 6.957497 19242 +hollingsworth 1 1 6.957497 6.957497 19243 +reportc 1 1 6.957497 6.957497 19244 +postscriptfil 1 1 6.957497 6.957497 19245 +semesterc 1 1 6.957497 6.957497 19246 +vernonc 1 1 6.957497 6.957497 19247 +dyermi 1 1 6.957497 6.957497 19248 +pagesinform 1 1 6.957497 6.957497 19249 +gettingin 1 1 6.957497 6.957497 19250 +marylandwhom 1 1 6.957497 6.957497 19251 +teamssom 1 1 6.957497 6.957497 19252 +listth 1 1 6.957497 6.957497 19253 +listi 1 1 6.957497 6.957497 19254 +thefruit 1 1 6.957497 6.957497 19255 +ofmaryland 1 1 6.957497 6.957497 19256 +insidejok 1 1 6.957497 6.957497 19257 +andnow 1 1 6.957497 6.957497 19258 +someinfrar 1 1 6.957497 6.957497 19259 +looklik 1 1 6.957497 6.957497 19260 +infrar 1 1 6.957497 6.957497 19261 +memik 1 1 6.957497 6.957497 19262 +steelemsteel 1 1 6.957497 6.957497 19263 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..383db682 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +section 1 94 2.397895 2.397895 149 +want 1 79 2.564949 2.564949 199 +visit 1 63 2.772589 2.772589 288 +colleg 1 61 2.833213 2.833213 300 +might 1 41 3.218876 3.218876 426 +mayb 1 15 4.248495 4.248495 1014 +maryland 1 6 5.164786 5.164786 2140 +park 1 6 5.164786 5.164786 2218 +maria 1 4 5.568345 5.568345 2954 +pagemaria 1 1 6.957497 6.957497 19264 +pagehow 1 1 6.957497 6.957497 19265 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..4c21db21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +check 1 115 2.197225 2.197225 118 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +pictur 1 89 2.397895 2.397895 160 +member 1 84 2.484907 2.484907 165 +good 1 77 2.564949 2.564949 200 +main 1 67 2.708050 2.708050 256 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +finger 1 52 2.995732 2.995732 354 +visual 1 48 3.044522 3.044522 372 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +howev 1 41 3.218876 3.218876 422 +staff 1 36 3.367296 3.367296 490 +known 1 24 3.761200 3.761200 702 +finish 1 22 3.850148 3.850148 748 +born 1 21 3.912023 3.912023 798 +fact 1 21 3.912023 3.912023 780 +watch 1 21 3.912023 3.912023 789 +citi 1 19 4.007333 4.007333 874 +spend 1 19 4.007333 4.007333 850 +beauti 1 18 4.060443 4.060443 912 +stori 1 14 4.317488 4.317488 1087 +central 1 13 4.382027 4.382027 1160 +cook 1 10 4.653960 4.653960 1464 +paradyn 1 9 4.753590 4.753590 1614 +undergrad 1 9 4.753590 4.753590 1589 +simon 1 8 4.875197 4.875197 1697 +guitar 1 8 4.875197 4.875197 1758 +capit 1 7 5.010635 5.010635 1957 +apart 1 7 5.010635 5.010635 1936 +antonio 1 6 5.164786 5.164786 2186 +england 1 5 5.347108 5.347108 2557 +million 1 5 5.347108 5.347108 2495 +western 1 4 5.568345 5.568345 3062 +basebal 1 4 5.568345 5.568345 2969 +myph 1 3 5.857933 5.857933 3880 +popul 1 3 5.857933 5.857933 3235 +pleaseclick 1 2 6.263398 6.263398 5432 +oscar 1 1 6.957497 6.957497 19269 +naim 1 1 6.957497 6.957497 19268 +bienvenido 1 1 6.957497 6.957497 19270 +southampton 1 1 6.957497 6.957497 19271 +universidad 1 1 6.957497 6.957497 19272 +bolivar 1 1 6.957497 6.957497 19273 +caraca 1 1 6.957497 6.957497 19274 +venezuela 1 1 6.957497 6.957497 19266 +barquisimeto 1 1 6.957497 6.957497 19267 +barquisimetoi 1 1 6.957497 6.957497 19275 +ofabout 1 1 6.957497 6.957497 19276 +playclass 1 1 6.957497 6.957497 19277 +excellentmaestro 1 1 6.957497 6.957497 19278 +rodrigo 1 1 6.957497 6.957497 19279 +riera 1 1 6.957497 6.957497 19280 +lauro 1 1 6.957497 6.957497 19281 +sherlock 1 1 6.957497 6.957497 19282 +holm 1 1 6.957497 6.957497 19283 +beati 1 1 6.957497 6.957497 19284 +mundo 1 1 6.957497 6.957497 19285 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..771e0177 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +find 1 111 2.197225 2.197225 111 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +pictur 1 89 2.397895 2.397895 160 +want 1 79 2.564949 2.564949 199 +street 1 63 2.772589 2.772589 293 +import 1 65 2.772589 2.772589 282 +guid 1 63 2.772589 2.772589 267 +visit 1 63 2.772589 2.772589 288 +realli 1 40 3.258097 3.258097 444 +notic 1 25 3.737670 3.737670 675 +nice 1 20 3.951244 3.951244 809 +georg 1 16 4.174387 4.174387 994 +worth 1 11 4.553877 4.553877 1294 +alex 1 6 5.164786 5.164786 2130 +greek 1 3 5.857933 5.857933 3595 +decent 1 2 6.263398 6.263398 5542 +rochest 1 2 6.263398 6.263398 6142 +anastassia 1 1 6.957497 6.957497 19286 +ailamaki 1 1 6.957497 6.957497 19287 +islandsar 1 1 6.957497 6.957497 19288 +natassa 1 1 6.957497 6.957497 19289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..6c622f54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +analysi 1 124 2.079442 2.079442 98 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +proceed 1 93 2.397895 2.397895 152 +larg 1 82 2.484907 2.484907 168 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +improv 1 62 2.772589 2.772589 289 +three 1 54 2.944439 2.944439 330 +submit 1 39 3.258097 3.258097 440 +multi 1 36 3.367296 3.367296 493 +michael 1 35 3.401197 3.401197 514 +storag 1 31 3.496508 3.496508 553 +arrai 1 27 3.637586 3.637586 627 +toward 1 25 3.737670 3.737670 668 +hierarchi 1 22 3.850148 3.850148 744 +prepar 1 20 3.951244 3.951244 824 +benchmark 1 19 4.007333 4.007333 859 +dimension 1 18 4.060443 4.060443 909 +estim 1 17 4.110874 4.110874 930 +spatial 1 16 4.174387 4.174387 988 +ramakrishnan 1 16 4.174387 4.174387 972 +indic 1 15 4.248495 4.248495 1013 +dbm 1 13 4.382027 4.382027 1136 +overal 1 12 4.465908 4.465908 1254 +workload 1 12 4.465908 4.465908 1210 +gupta 1 12 4.465908 4.465908 1241 +raghu 1 12 4.465908 4.465908 1212 +dewitt 1 12 4.465908 4.465908 1270 +jeffrei 1 9 4.753590 4.753590 1612 +presenc 1 8 4.875197 4.875197 1671 +carei 1 8 4.875197 4.875197 1781 +bombai 1 7 5.010635 5.010635 1972 +eduresearch 1 6 5.164786 5.164786 2205 +aggreg 1 6 5.164786 5.164786 2219 +prasad 1 6 5.164786 5.164786 2126 +eas 1 5 5.347108 5.347108 2267 +ofinterest 1 5 5.347108 5.347108 2323 +deshpand 1 5 5.347108 5.347108 2431 +ashish 1 5 5.347108 5.347108 2473 +tuft 1 5 5.347108 5.347108 2575 +cube 1 4 5.568345 5.568345 2940 +multidimension 1 4 5.568345 5.568345 3091 +amit 1 4 5.568345 5.568345 2972 +ramasami 1 4 5.568345 5.568345 3088 +shah 1 4 5.568345 5.568345 2814 +zhao 1 4 5.568345 5.568345 2699 +kristin 1 4 5.568345 5.568345 3089 +surpass 1 3 5.857933 5.857933 3247 +mumbai 1 3 5.857933 5.857933 4029 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +johann 1 3 5.857933 5.857933 3758 +moham 1 3 5.857933 5.857933 3848 +asgarian 1 3 5.857933 5.857933 3447 +andprocess 1 2 6.263398 6.263398 4925 +agarw 1 2 6.263398 6.263398 5352 +rakesh 1 2 6.263398 6.263398 6017 +agraw 1 2 6.263398 6.263398 4536 +molap 1 2 6.263398 6.263398 6217 +naughtonjeffrei 1 1 6.957497 6.957497 19290 +naughtonnaughton 1 1 6.957497 6.957497 19291 +interestsolap 1 1 6.957497 6.957497 19292 +relationaldbm 1 1 6.957497 6.957497 19293 +ofdatabas 1 1 6.957497 6.957497 19294 +inperform 1 1 6.957497 6.957497 19295 +ofmulti 1 1 6.957497 6.957497 19296 +computingth 1 1 6.957497 6.957497 19297 +valuedattribut 1 1 6.957497 6.957497 19298 +withsameet 1 1 6.957497 6.957497 19299 +sunita 1 1 6.957497 6.957497 19300 +sarawagi 1 1 6.957497 6.957497 19301 +thend 1 1 6.957497 6.957497 19302 +aggregatesin 1 1 6.957497 6.957497 19303 +bucki 1 1 6.957497 6.957497 19304 +gerhk 1 1 6.957497 6.957497 19305 +dhaval 1 1 6.957497 6.957497 19306 +withyihong 1 1 6.957497 6.957497 19307 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..51424008 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +databas 1 122 2.079442 2.079442 86 +look 1 107 2.197225 2.197225 115 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +optim 1 79 2.564949 2.564949 197 +david 1 71 2.639057 2.639057 232 +plan 1 65 2.772589 2.772589 272 +explor 1 58 2.890372 2.890372 324 +archiv 1 49 3.044522 3.044522 364 +could 1 46 3.091042 3.091042 383 +better 1 45 3.135494 3.135494 401 +queri 1 33 3.433987 3.433987 524 +bookmark 1 26 3.688879 3.688879 639 +indian 1 22 3.850148 3.850148 769 +among 1 21 3.912023 3.912023 781 +noth 1 11 4.553877 4.553877 1328 +song 1 11 4.553877 4.553877 1380 +paradis 1 8 4.875197 4.875197 1782 +customiz 1 4 5.568345 5.568345 2966 +hindi 1 3 5.857933 5.857933 3753 +navin 1 2 6.263398 6.263398 5351 +madisonadvisor 1 2 6.263398 6.263398 6212 +dewittresearch 1 2 6.263398 6.263398 6185 +kabranavin 1 1 6.957497 6.957497 19308 +kabragradu 1 1 6.957497 6.957497 19309 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..e46d0893 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +perform 1 143 1.945910 1.945910 74 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +pictur 1 89 2.397895 2.397895 160 +chang 1 82 2.484907 2.484907 163 +java 1 70 2.708050 2.708050 248 +august 1 66 2.708050 2.708050 257 +telephon 1 50 3.044522 3.044522 373 +scalabl 1 24 3.761200 3.761200 705 +predict 1 19 4.007333 4.007333 855 +bart 1 9 4.753590 4.753590 1559 +newhalltia 1 1 6.957497 6.957497 19311 +newhal 1 1 6.957497 6.957497 19310 +paradynadvisor 1 1 6.957497 6.957497 19312 +millermummi 1 1 6.957497 6.957497 19313 +guanajuato 1 1 6.957497 6.957497 19314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..a6da90c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +object 1 138 1.945910 1.945910 79 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +scalabl 1 24 3.761200 3.761200 705 +repositori 1 17 4.110874 4.110874 932 +heterogen 1 14 4.317488 4.317488 1090 +nanci 1 12 4.465908 4.465908 1256 +shore 1 11 4.553877 4.553877 1377 +hallcomput 1 1 6.957497 6.957497 19315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..6a55a763 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +techniqu 1 99 2.302585 2.302585 138 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +real 1 93 2.397895 2.397895 144 +octob 1 89 2.397895 2.397895 156 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +member 1 84 2.484907 2.484907 165 +learn 1 86 2.484907 2.484907 170 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +april 1 77 2.564949 2.564949 196 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +view 1 70 2.708050 2.708050 254 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +collect 1 65 2.772589 2.772589 268 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +juli 1 60 2.833213 2.833213 305 +publish 1 57 2.890372 2.890372 326 +februari 1 54 2.944439 2.944439 328 +telephon 1 50 3.044522 3.044522 373 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +california 1 46 3.091042 3.091042 388 +linear 1 41 3.218876 3.218876 431 +editor 1 41 3.218876 3.218876 433 +press 1 42 3.218876 3.218876 419 +error 1 40 3.258097 3.258097 449 +submit 1 39 3.258097 3.258097 440 +paul 1 38 3.295837 3.295837 471 +download 1 36 3.367296 3.367296 489 +global 1 34 3.401197 3.401197 520 +bibliographi 1 34 3.401197 3.401197 518 +john 1 33 3.433987 3.433987 532 +toler 1 33 3.433987 3.433987 533 +rang 1 30 3.555348 3.555348 565 +neural 1 30 3.555348 3.555348 578 +cluster 1 28 3.610918 3.610918 612 +bound 1 26 3.688879 3.688879 659 +constraint 1 26 3.688879 3.688879 636 +revis 1 26 3.688879 3.688879 640 +aspect 1 25 3.737670 3.737670 663 +accur 1 25 3.737670 3.737670 680 +proof 1 23 3.806662 3.806662 720 +variabl 1 23 3.806662 3.806662 715 +equat 1 23 3.806662 3.806662 724 +verlag 1 22 3.850148 3.850148 751 +period 1 22 3.850148 3.850148 743 +siam 1 21 3.912023 3.912023 800 +chen 1 21 3.912023 3.912023 791 +predict 1 19 4.007333 4.007333 855 +eric 1 19 4.007333 4.007333 870 +separ 1 19 4.007333 4.007333 844 +minim 1 18 4.060443 4.060443 887 +differenti 1 17 4.110874 4.110874 921 +germani 1 17 4.110874 4.110874 946 +hybrid 1 15 4.248495 4.248495 1057 +nonlinear 1 14 4.317488 4.317488 1107 +train 1 14 4.317488 4.317488 1066 +francisco 1 14 4.317488 4.317488 1095 +context 1 13 4.382027 4.382027 1153 +individu 1 13 4.382027 4.382027 1126 +nick 1 13 4.382027 4.382027 1180 +broad 1 11 4.553877 4.553877 1302 +rich 1 10 4.653960 4.653960 1396 +strongli 1 10 4.653960 4.653960 1406 +penalti 1 10 4.653960 4.653960 1405 +mangasarian 1 9 4.753590 4.753590 1570 +pose 1 9 4.753590 4.753590 1535 +morgan 1 9 4.753590 4.753590 1484 +harvard 1 7 5.010635 5.010635 1926 +converg 1 7 5.010635 5.010635 1844 +smooth 1 7 5.010635 5.010635 1855 +fischer 1 7 5.010635 5.010635 1893 +serial 1 7 5.010635 5.010635 1975 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +constrain 1 6 5.164786 5.164786 2042 +strong 1 6 5.164786 5.164786 2029 +mix 1 6 5.164786 5.164786 2200 +determinist 1 6 5.164786 5.164786 2034 +variat 1 5 5.347108 5.347108 2248 +bradlei 1 5 5.347108 5.347108 2554 +kaufmann 1 5 5.347108 5.347108 2254 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +diagnosi 1 4 5.568345 5.568345 3027 +nonmonoton 1 4 5.568345 5.568345 3023 +net 1 4 5.568345 5.568345 2741 +neumann 1 3 5.857933 5.857933 3720 +programsand 1 3 5.857933 5.857933 3111 +programmingtechniqu 1 3 5.857933 5.857933 3113 +diagnos 1 3 5.857933 5.857933 3968 +breast 1 3 5.857933 5.857933 4033 +cancer 1 3 5.857933 5.857933 4032 +diagnost 1 3 5.857933 5.857933 3833 +complementar 1 3 5.857933 5.857933 3999 +baltimor 1 3 5.857933 5.857933 3809 +backpropag 1 3 5.857933 5.857933 3507 +chronolog 1 3 5.857933 5.857933 4034 +interestsin 1 2 6.263398 6.263398 6213 +spectrum 1 2 6.263398 6.263398 5405 +applicationsto 1 2 6.263398 6.263398 4254 +computer 1 2 6.263398 6.263398 6219 +linearli 1 2 6.263398 6.263398 6220 +qualif 1 2 6.263398 6.263398 6059 +wolberg 1 2 6.263398 6.263398 6218 +prognost 1 2 6.263398 6.263398 6221 +polyhedr 1 2 6.263398 6.263398 5412 +festschrift 1 2 6.263398 6.263398 6141 +klau 1 2 6.263398 6.263398 4999 +internationalsymposium 1 2 6.263398 6.263398 6032 +plenum 1 2 6.263398 6.263398 6036 +prognosi 1 2 6.263398 6.263398 6222 +perturb 1 2 6.263398 6.263398 6075 +effectivecomputation 1 1 6.957497 6.957497 19319 +encompassestheoret 1 1 6.957497 6.957497 19320 +parallelgradi 1 1 6.957497 6.957497 19321 +problemsa 1 1 6.957497 6.957497 19322 +animport 1 1 6.957497 6.957497 19323 +ahighli 1 1 6.957497 6.957497 19324 +useat 1 1 6.957497 6.957497 19325 +hospit 1 1 6.957497 6.957497 19326 +solodova 1 1 6.957497 6.957497 19327 +descent 1 1 6.957497 6.957497 19328 +monotonecomplementar 1 1 6.957497 6.957497 19329 +jong 1 1 6.957497 6.957497 19330 +pangexact 1 1 6.957497 6.957497 19331 +programswith 1 1 6.957497 6.957497 19332 +mangasarianmathemat 1 1 6.957497 6.957497 19333 +miningmathemat 1 1 6.957497 6.957497 19334 +mangasarianerror 1 1 6.957497 6.957497 19335 +nondifferenti 1 1 6.957497 6.957497 19336 +slater 1 1 6.957497 6.957497 19337 +ritter 1 1 6.957497 6.957497 19338 +riedmuel 1 1 6.957497 6.957497 19339 +schaeffler 1 1 6.957497 6.957497 19340 +physica 1 1 6.957497 6.957497 19341 +chunhui 1 1 6.957497 6.957497 19316 +misclassif 1 1 6.957497 6.957497 19317 +siag 1 1 6.957497 6.957497 19342 +bilinear 1 1 6.957497 6.957497 19343 +solodov 1 1 6.957497 6.957497 19318 +cowan 1 1 6.957497 6.957497 19344 +tesauro 1 1 6.957497 6.957497 19345 +alspector 1 1 6.957497 6.957497 19346 +inequalitiesand 1 1 6.957497 6.957497 19347 +vianonmonoton 1 1 6.957497 6.957497 19348 +minimn 1 1 6.957497 6.957497 19349 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..d927a82c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,422 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +base 1 165 1.791759 1.791759 50 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +pleas 1 113 2.197225 2.197225 114 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +imag 1 91 2.397895 2.397895 161 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +proceed 1 93 2.397895 2.397895 152 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +learn 1 86 2.484907 2.484907 170 +second 1 81 2.484907 2.484907 166 +solut 1 82 2.484907 2.484907 162 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +good 1 77 2.564949 2.564949 200 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +appli 1 71 2.639057 2.639057 226 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +workshop 1 71 2.639057 2.639057 239 +nation 1 74 2.639057 2.639057 240 +goal 1 66 2.708050 2.708050 250 +august 1 66 2.708050 2.708050 257 +result 1 65 2.772589 2.772589 281 +copi 1 63 2.772589 2.772589 284 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +previou 1 62 2.772589 2.772589 290 +street 1 63 2.772589 2.772589 293 +function 1 62 2.772589 2.772589 275 +abstract 1 62 2.772589 2.772589 276 +new 1 64 2.772589 2.772589 262 +collect 1 65 2.772589 2.772589 268 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +septemb 1 65 2.772589 2.772589 274 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +point 1 58 2.890372 2.890372 319 +detail 1 57 2.890372 2.890372 321 +local 1 55 2.944439 2.944439 334 +sampl 1 53 2.944439 2.944439 339 +allow 1 53 2.944439 2.944439 333 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +digit 1 52 2.995732 2.995732 348 +date 1 51 2.995732 2.995732 344 +particular 1 51 2.995732 2.995732 352 +visual 1 48 3.044522 3.044522 372 +pointer 1 48 3.044522 3.044522 368 +approach 1 48 3.044522 3.044522 366 +right 1 48 3.044522 3.044522 363 +format 1 48 3.044522 3.044522 356 +numer 1 49 3.044522 3.044522 369 +without 1 50 3.044522 3.044522 370 +friend 1 48 3.044522 3.044522 376 +featur 1 46 3.091042 3.091042 386 +could 1 46 3.091042 3.091042 383 +describ 1 45 3.135494 3.135494 400 +better 1 45 3.135494 3.135494 401 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +vision 1 41 3.218876 3.218876 430 +examin 1 42 3.218876 3.218876 424 +societi 1 40 3.258097 3.258097 456 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +author 1 39 3.258097 3.258097 450 +submit 1 39 3.258097 3.258097 440 +seminar 1 38 3.295837 3.295837 470 +slide 1 38 3.295837 3.295837 467 +mean 1 37 3.332205 3.332205 477 +ofth 1 36 3.367296 3.367296 491 +procedur 1 36 3.367296 3.367296 488 +download 1 36 3.367296 3.367296 489 +bibliographi 1 34 3.401197 3.401197 518 +approxim 1 35 3.401197 3.401197 509 +singl 1 34 3.401197 3.401197 510 +return 1 34 3.401197 3.401197 502 +print 1 34 3.401197 3.401197 503 +tech 1 35 3.401197 3.401197 515 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +collabor 1 32 3.465736 3.465736 543 +human 1 32 3.465736 3.465736 546 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +power 1 30 3.555348 3.555348 573 +consid 1 29 3.583519 3.583519 590 +releas 1 28 3.610918 3.610918 616 +measur 1 28 3.610918 3.610918 609 +actual 1 28 3.610918 3.610918 604 +scale 1 28 3.610918 3.610918 613 +progress 1 28 3.610918 3.610918 598 +american 1 27 3.637586 3.637586 634 +team 1 27 3.637586 3.637586 625 +consist 1 26 3.688879 3.688879 651 +repres 1 26 3.688879 3.688879 656 +compar 1 26 3.688879 3.688879 648 +detect 1 26 3.688879 3.688879 646 +accur 1 25 3.737670 3.737670 680 +valu 1 25 3.737670 3.737670 665 +todai 1 25 3.737670 3.737670 672 +pattern 1 24 3.761200 3.761200 689 +known 1 24 3.761200 3.761200 702 +interpret 1 24 3.761200 3.761200 686 +size 1 23 3.806662 3.806662 713 +recognit 1 23 3.806662 3.806662 723 +togeth 1 23 3.806662 3.806662 714 +william 1 22 3.850148 3.850148 765 +identifi 1 22 3.850148 3.850148 760 +theunivers 1 21 3.912023 3.912023 797 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +viewer 1 21 3.912023 3.912023 787 +siam 1 21 3.912023 3.912023 800 +fine 1 20 3.951244 3.951244 822 +minut 1 20 3.951244 3.951244 810 +separ 1 19 4.007333 4.007333 844 +five 1 19 4.007333 4.007333 841 +comparison 1 19 4.007333 4.007333 863 +predict 1 19 4.007333 4.007333 855 +behavior 1 18 4.060443 4.060443 881 +statu 1 18 4.060443 4.060443 885 +aid 1 18 4.060443 4.060443 904 +differenti 1 17 4.110874 4.110874 921 +segment 1 17 4.110874 4.110874 931 +medic 1 17 4.110874 4.110874 958 +analyz 1 17 4.110874 4.110874 925 +seek 1 17 4.110874 4.110874 954 +portion 1 16 4.174387 4.174387 971 +capabl 1 15 4.248495 4.248495 1016 +indic 1 15 4.248495 4.248495 1013 +precis 1 15 4.248495 4.248495 1023 +ascii 1 15 4.248495 4.248495 1032 +camera 1 14 4.317488 4.317488 1115 +draw 1 14 4.317488 4.317488 1086 +train 1 14 4.317488 4.317488 1066 +shown 1 14 4.317488 4.317488 1080 +nick 1 13 4.382027 4.382027 1180 +individu 1 13 4.382027 4.382027 1126 +incorpor 1 13 4.382027 4.382027 1163 +deriv 1 13 4.382027 4.382027 1145 +characterist 1 12 4.465908 4.465908 1257 +scan 1 12 4.465908 4.465908 1243 +shape 1 12 4.465908 4.465908 1245 +remov 1 12 4.465908 4.465908 1225 +philadelphia 1 12 4.465908 4.465908 1244 +extrem 1 11 4.553877 4.553877 1330 +eight 1 11 4.553877 4.553877 1331 +node 1 11 4.553877 4.553877 1326 +distinguish 1 11 4.553877 4.553877 1357 +induct 1 11 4.553877 4.553877 1304 +total 1 10 4.653960 4.653960 1398 +subset 1 10 4.653960 4.653960 1425 +black 1 10 4.653960 4.653960 1418 +equal 1 10 4.653960 4.653960 1424 +tradit 1 10 4.653960 4.653960 1404 +perspect 1 10 4.653960 4.653960 1437 +mangasarian 1 9 4.753590 4.753590 1570 +desir 1 9 4.753590 4.753590 1542 +exact 1 9 4.753590 4.753590 1509 +classifi 1 9 4.753590 4.753590 1537 +hundr 1 9 4.753590 4.753590 1528 +correctli 1 9 4.753590 4.753590 1478 +surfac 1 9 4.753590 4.753590 1574 +factor 1 9 4.753590 4.753590 1544 +russel 1 9 4.753590 4.753590 1507 +morgan 1 9 4.753590 4.753590 1484 +grew 1 8 4.875197 4.875197 1742 +mass 1 8 4.875197 4.875197 1732 +isol 1 8 4.875197 4.875197 1663 +textur 1 8 4.875197 4.875197 1677 +judg 1 8 4.875197 4.875197 1644 +curv 1 8 4.875197 4.875197 1656 +quantit 1 8 4.875197 4.875197 1654 +aaai 1 8 4.875197 4.875197 1750 +replac 1 8 4.875197 4.875197 1668 +angel 1 8 4.875197 4.875197 1779 +boundari 1 7 5.010635 5.010635 1929 +converg 1 7 5.010635 5.010635 1844 +smooth 1 7 5.010635 5.010635 1855 +densiti 1 7 5.010635 5.010635 1927 +analyt 1 7 5.010635 5.010635 1913 +hunt 1 7 5.010635 5.010635 1798 +ruth 1 7 5.010635 5.010635 1870 +chronicl 1 7 5.010635 5.010635 1952 +capit 1 7 5.010635 5.010635 1957 +olvi 1 6 5.164786 5.164786 2109 +nine 1 6 5.164786 5.164786 2047 +plane 1 6 5.164786 5.164786 2187 +onto 1 6 5.164786 5.164786 2089 +versu 1 6 5.164786 5.164786 2052 +averag 1 6 5.164786 5.164786 2098 +began 1 5 5.347108 5.347108 2498 +highlight 1 5 5.347108 5.347108 2340 +cell 1 5 5.347108 5.347108 2274 +snake 1 5 5.347108 5.347108 2281 +nuclear 1 5 5.347108 5.347108 2576 +accuraci 1 5 5.347108 5.347108 2450 +shift 1 5 5.347108 5.347108 2357 +kaufmann 1 5 5.347108 5.347108 2254 +medicin 1 5 5.347108 5.347108 2448 +houston 1 5 5.347108 5.347108 2460 +diagnosi 1 4 5.568345 5.568345 3027 +writer 1 4 5.568345 5.568345 2783 +popular 1 4 5.568345 5.568345 2802 +sole 1 4 5.568345 5.568345 2592 +aspir 1 4 5.568345 5.568345 3019 +assess 1 4 5.568345 5.568345 2724 +kristin 1 4 5.568345 5.568345 3089 +surviv 1 4 5.568345 5.568345 2734 +prospect 1 4 5.568345 5.568345 3013 +biomed 1 4 5.568345 5.568345 2905 +cancer 1 3 5.857933 5.857933 4032 +surgeri 1 3 5.857933 5.857933 3975 +citat 1 3 5.857933 5.857933 3617 +bennett 1 3 5.857933 5.857933 4024 +diagnos 1 3 5.857933 5.857933 3968 +breast 1 3 5.857933 5.857933 4033 +microscop 1 3 5.857933 5.857933 4035 +confid 1 3 5.857933 5.857933 3691 +diseas 1 3 5.857933 5.857933 3635 +recurr 1 3 5.857933 5.857933 3740 +pain 1 3 5.857933 5.857933 3460 +chronolog 1 3 5.857933 5.857933 4034 +man 1 3 5.857933 5.857933 3417 +detroit 1 3 5.857933 5.857933 3565 +paulb 1 3 5.857933 5.857933 4036 +prognosi 1 2 6.263398 6.263398 6222 +wolberg 1 2 6.263398 6.263398 6218 +rudi 1 2 6.263398 6.263398 5487 +multisurfac 1 2 6.263398 6.263398 6224 +ofthi 1 2 6.263398 6.263398 5836 +grabber 1 2 6.263398 6.263398 5521 +nucleu 1 2 6.263398 6.263398 4302 +benign 1 2 6.263398 6.263398 4893 +thenorm 1 2 6.263398 6.263398 4412 +patient 1 2 6.263398 6.263398 6223 +plot 1 2 6.263398 6.263398 4236 +ofvari 1 2 6.263398 6.263398 4582 +prognost 1 2 6.263398 6.263398 6221 +twelfth 1 2 6.263398 6.263398 5035 +icml 1 2 6.263398 6.263398 5669 +prime 1 2 6.263398 6.263398 6099 +milwauke 1 2 6.263398 6.263398 5797 +computer 1 2 6.263398 6.263398 6219 +cope 1 2 6.263398 6.263398 6050 +nevada 1 2 6.263398 6.263398 4875 +prognosismachin 1 1 6.957497 6.957497 19363 +prognosisthi 1 1 6.957497 6.957497 19364 +learningapproach 1 1 6.957497 6.957497 19365 +ofbreast 1 1 6.957497 6.957497 19366 +betweenprof 1 1 6.957497 6.957497 19367 +anddr 1 1 6.957497 6.957497 19368 +wolbergof 1 1 6.957497 6.957497 19369 +oncolog 1 1 6.957497 6.957497 19354 +thepress 1 1 6.957497 6.957497 19370 +inmarch 1 1 6.957497 6.957497 19371 +linksdiagnosisthi 1 1 6.957497 6.957497 19372 +diagnosebreast 1 1 6.957497 6.957497 19373 +needl 1 1 6.957497 6.957497 19355 +heidentifi 1 1 6.957497 6.957497 19374 +consideredrelev 1 1 6.957497 6.957497 19375 +andtwo 1 1 6.957497 6.957497 19376 +setiono 1 1 6.957497 6.957497 19357 +aclassifi 1 1 6.957497 6.957497 19377 +thatsuccessfulli 1 1 6.957497 6.957497 19378 +iswel 1 1 6.957497 6.957497 19379 +streetto 1 1 6.957497 6.957497 19380 +adigit 1 1 6.957497 6.957497 19381 +consolid 1 1 6.957497 6.957497 19382 +xcyt 1 1 6.957497 6.957497 19356 +clinicalpractic 1 1 6.957497 6.957497 19383 +thenmount 1 1 6.957497 6.957497 19384 +stain 1 1 6.957497 6.957497 19385 +cellularnuclei 1 1 6.957497 6.957497 19386 +arewel 1 1 6.957497 6.957497 19387 +afram 1 1 6.957497 6.957497 19388 +nuclei 1 1 6.957497 6.957497 19351 +mous 1 1 6.957497 6.957497 19389 +ofeach 1 1 6.957497 6.957497 19358 +showingxcyt 1 1 6.957497 6.957497 19390 +thisfas 1 1 6.957497 6.957497 19391 +standarderror 1 1 6.957497 6.957497 19392 +wasconstruct 1 1 6.957497 6.957497 19393 +malign 1 1 6.957497 6.957497 19350 +thisclassifi 1 1 6.957497 6.957497 19394 +threeof 1 1 6.957497 6.957497 19395 +bayesiancomput 1 1 6.957497 6.957497 19396 +thesedens 1 1 6.957497 6.957497 19397 +consecut 1 1 6.957497 6.957497 19398 +newpati 1 1 6.957497 6.957497 19399 +didxcyt 1 1 6.957497 6.957497 19400 +suspici 1 1 6.957497 6.957497 19401 +estimatedprob 1 1 6.957497 6.957497 19402 +goodtest 1 1 6.957497 6.957497 19403 +petsegment 1 1 6.957497 6.957497 19404 +inthes 1 1 6.957497 6.957497 19405 +prognosisth 1 1 6.957497 6.957497 19406 +haveapproach 1 1 6.957497 6.957497 19407 +inputfeatur 1 1 6.957497 6.957497 19408 +atim 1 1 6.957497 6.957497 19409 +censor 1 1 6.957497 6.957497 19410 +linearprogram 1 1 6.957497 6.957497 19411 +fornew 1 1 6.957497 6.957497 19412 +caseswith 1 1 6.957497 6.957497 19413 +ofdiseas 1 1 6.957497 6.957497 19359 +anindividu 1 1 6.957497 6.957497 19414 +intoxcyt 1 1 6.957497 6.957497 19415 +ourorigin 1 1 6.957497 6.957497 19416 +thereforeha 1 1 6.957497 6.957497 19417 +freeafter 1 1 6.957497 6.957497 19418 +cytolog 1 1 6.957497 6.957497 19352 +xcytgiv 1 1 6.957497 6.957497 19419 +tumors 1 1 6.957497 6.957497 19420 +lymph 1 1 6.957497 6.957497 19360 +corrobor 1 1 6.957497 6.957497 19421 +axillari 1 1 6.957497 6.957497 19422 +bibliographylink 1 1 6.957497 6.957497 19423 +notlink 1 1 6.957497 6.957497 19424 +histolog 1 1 6.957497 6.957497 19361 +heisei 1 1 6.957497 6.957497 19362 +patholog 1 1 6.957497 6.957497 19425 +priediti 1 1 6.957497 6.957497 19426 +teagu 1 1 6.957497 6.957497 19427 +indetermin 1 1 6.957497 6.957497 19428 +biopsi 1 1 6.957497 6.957497 19353 +imit 1 1 6.957497 6.957497 19429 +sentinel 1 1 6.957497 6.957497 19430 +marilynn 1 1 6.957497 6.957497 19431 +marchion 1 1 6.957497 6.957497 19432 +sorel 1 1 6.957497 6.957497 19433 +surgic 1 1 6.957497 6.957497 19434 +column 1 1 6.957497 6.957497 19435 +schooloth 1 1 6.957497 6.957497 19436 +oncolink 1 1 6.957497 6.957497 19437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..17c780dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +problem 1 147 1.945910 1.945910 75 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +confer 1 126 2.079442 2.079442 100 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +proceed 1 93 2.397895 2.397895 152 +contain 1 81 2.484907 2.484907 174 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +goal 1 66 2.708050 2.708050 250 +view 1 70 2.708050 2.708050 254 +artifici 1 63 2.772589 2.772589 280 +street 1 63 2.772589 2.772589 293 +descript 1 64 2.772589 2.772589 271 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +found 1 53 2.944439 2.944439 337 +particular 1 51 2.995732 2.995732 352 +much 1 52 2.995732 2.995732 349 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +numer 1 49 3.044522 3.044522 369 +could 1 46 3.091042 3.091042 383 +describ 1 45 3.135494 3.135494 400 +anoth 1 45 3.135494 3.135494 408 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +linear 1 41 3.218876 3.218876 431 +howev 1 41 3.218876 3.218876 422 +error 1 40 3.258097 3.258097 449 +transact 1 39 3.258097 3.258097 438 +societi 1 40 3.258097 3.258097 456 +close 1 38 3.295837 3.295837 465 +paul 1 38 3.295837 3.295837 471 +tree 1 36 3.367296 3.367296 492 +procedur 1 36 3.367296 3.367296 488 +bibliographi 1 34 3.401197 3.401197 518 +concept 1 32 3.465736 3.465736 537 +neural 1 30 3.555348 3.555348 578 +packag 1 28 3.610918 3.610918 614 +determin 1 27 3.637586 3.637586 630 +pattern 1 24 3.761200 3.761200 689 +reach 1 24 3.761200 3.761200 688 +togeth 1 23 3.806662 3.806662 714 +sequenc 1 23 3.806662 3.806662 734 +decis 1 23 3.806662 3.806662 728 +avoid 1 21 3.912023 3.912023 799 +separ 1 19 4.007333 4.007333 844 +histori 1 19 4.007333 4.007333 853 +region 1 19 4.007333 4.007333 875 +mostli 1 19 4.007333 4.007333 869 +dimension 1 18 4.060443 4.060443 909 +minim 1 18 4.060443 4.060443 887 +stop 1 17 4.110874 4.110874 942 +otherwis 1 17 4.110874 4.110874 922 +layer 1 17 4.110874 4.110874 926 +brief 1 16 4.174387 4.174387 1001 +choos 1 16 4.174387 4.174387 964 +advantag 1 16 4.174387 4.174387 987 +cognit 1 16 4.174387 4.174387 986 +side 1 15 4.248495 4.248495 1022 +nonlinear 1 14 4.317488 4.317488 1107 +polynomi 1 14 4.317488 4.317488 1069 +finit 1 14 4.317488 4.317488 1106 +split 1 14 4.317488 4.317488 1078 +shown 1 14 4.317488 4.317488 1080 +train 1 14 4.317488 4.317488 1066 +matlab 1 14 4.317488 4.317488 1081 +difficulti 1 13 4.382027 4.382027 1132 +nick 1 13 4.382027 4.382027 1180 +robust 1 12 4.465908 4.465908 1271 +node 1 11 4.553877 4.553877 1326 +branch 1 11 4.553877 4.553877 1318 +faster 1 11 4.553877 4.553877 1323 +surfac 1 9 4.753590 4.753590 1574 +distanc 1 9 4.753590 4.753590 1500 +mangasarian 1 9 4.753590 4.753590 1570 +formul 1 8 4.875197 4.875197 1733 +whenev 1 7 5.010635 5.010635 1883 +paramet 1 7 5.010635 5.010635 1796 +plane 1 6 5.164786 5.164786 2187 +variant 1 6 5.164786 5.164786 2043 +averag 1 6 5.164786 5.164786 2098 +hidden 1 6 5.164786 5.164786 1987 +proce 1 6 5.164786 5.164786 2114 +li 1 5 5.347108 5.347108 2500 +bradlei 1 5 5.347108 5.347108 2554 +disjoint 1 4 5.568345 5.568345 2709 +repeat 1 4 5.568345 5.568345 2798 +kristin 1 4 5.568345 5.568345 3089 +todetermin 1 3 5.857933 5.857933 3182 +similarli 1 3 5.857933 5.857933 3241 +backpropag 1 3 5.857933 5.857933 3507 +bennett 1 3 5.857933 5.857933 4024 +chronolog 1 3 5.857933 5.857933 4034 +paulb 1 3 5.857933 5.857933 4036 +linearli 1 2 6.263398 6.263398 6220 +quadrat 1 2 6.263398 6.263398 4497 +multisurfac 1 2 6.263398 6.263398 6224 +euclidean 1 2 6.263398 6.263398 5198 +oneset 1 2 6.263398 6.263398 6134 +cart 1 2 6.263398 6.263398 5874 +mino 1 2 6.263398 6.263398 6208 +midwest 1 2 6.263398 6.263398 6225 +discrimin 1 2 6.263398 6.263398 6140 +programmingpattern 1 1 6.957497 6.957497 19440 +programmingthi 1 1 6.957497 6.957497 19441 +outlinemathemat 1 1 6.957497 6.957497 19442 +failon 1 1 6.957497 6.957497 19443 +discard 1 1 6.957497 6.957497 19444 +euclideanspac 1 1 6.957497 6.957497 19439 +misclassifi 1 1 6.957497 6.957497 19438 +eachnod 1 1 6.957497 6.957497 19445 +thesam 1 1 6.957497 6.957497 19446 +astrain 1 1 6.957497 6.957497 19447 +traditionallearn 1 1 6.957497 6.957497 19448 +inthat 1 1 6.957497 6.957497 19449 +insepar 1 1 6.957497 6.957497 19450 +orsa 1 1 6.957497 6.957497 19451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..d87fe041 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +dayton 1 119 2.079442 2.079442 104 +version 1 113 2.197225 2.197225 122 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +contain 1 81 2.484907 2.484907 174 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +west 1 83 2.484907 2.484907 192 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +copi 1 63 2.772589 2.772589 284 +explor 1 58 2.890372 2.890372 324 +tabl 1 51 2.995732 2.995732 346 +describ 1 45 3.135494 3.135494 400 +made 1 44 3.135494 3.135494 398 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +staff 1 36 3.367296 3.367296 490 +common 1 30 3.555348 3.555348 574 +releas 1 28 3.610918 3.610918 616 +symbol 1 27 3.637586 3.637586 620 +effort 1 26 3.688879 3.688879 652 +scalabl 1 24 3.761200 3.761200 705 +hypertext 1 19 4.007333 4.007333 865 +statu 1 18 4.060443 4.060443 885 +sigmetr 1 13 4.382027 4.382027 1173 +arpa 1 11 4.553877 4.553877 1369 +paradyn 1 9 4.753590 4.753590 1614 +routin 1 9 4.753590 4.753590 1549 +bart 1 9 4.753590 4.753590 1559 +poster 1 7 5.010635 5.010635 1814 +antonio 1 6 5.164786 5.164786 2186 +restaur 1 6 5.164786 5.164786 2230 +temporari 1 6 5.164786 5.164786 2090 +panel 1 5 5.347108 5.347108 2463 +elsewher 1 5 5.347108 5.347108 2444 +insan 1 3 5.857933 5.857933 4006 +super 1 3 5.857933 5.857933 3918 +parallellanguag 1 3 5.857933 5.857933 4026 +informationthi 1 2 6.263398 6.263398 5477 +ofreleas 1 2 6.263398 6.263398 4860 +newapproach 1 2 6.263398 6.263398 6047 +blizzard 1 2 6.263398 6.263398 6226 +projectdepart 1 2 6.263398 6.263398 6125 +edufax 1 2 6.263398 6.263398 5479 +goalsth 1 1 6.957497 6.957497 19454 +manualsstatu 1 1 6.957497 6.957497 19455 +reporta 1 1 6.957497 6.957497 19456 +presentationthi 1 1 6.957497 6.957497 19453 +csto 1 1 6.957497 6.957497 19452 +inflorida 1 1 6.957497 6.957497 19457 +tocompil 1 1 6.957497 6.957497 19458 +postera 1 1 6.957497 6.957497 19459 +spdt 1 1 6.957497 6.957497 19460 +toolsyou 1 1 6.957497 6.957497 19461 +placehold 1 1 6.957497 6.957497 19462 +informationparadyn 1 1 6.957497 6.957497 19463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..c36866b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +send 1 114 2.197225 2.197225 109 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +numer 1 49 3.044522 3.044522 369 +math 1 44 3.135494 3.135494 402 +steven 1 17 4.110874 4.110874 953 +employ 1 12 4.465908 4.465908 1291 +depth 1 8 4.875197 4.875197 1636 +parker 1 1 6.957497 6.957497 19464 +prism 1 1 6.957497 6.957497 19465 +projectfal 1 1 6.957497 6.957497 19466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..a6a674c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +area 1 144 1.945910 1.945910 80 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +techniqu 1 99 2.302585 2.302585 138 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +learn 1 86 2.484907 2.484907 170 +journal 1 83 2.484907 2.484907 183 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +guid 1 63 2.772589 2.772589 267 +street 1 63 2.772589 2.772589 293 +abstract 1 62 2.772589 2.772589 276 +march 1 61 2.833213 2.833213 295 +advisor 1 51 2.995732 2.995732 355 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +featur 1 46 3.091042 3.091042 386 +netscap 1 44 3.135494 3.135494 395 +linear 1 41 3.218876 3.218876 431 +submit 1 39 3.258097 3.258097 440 +paul 1 38 3.295837 3.295837 471 +download 1 36 3.367296 3.367296 489 +print 1 34 3.401197 3.401197 503 +eduoffic 1 33 3.433987 3.433987 531 +neural 1 30 3.555348 3.555348 578 +cluster 1 28 3.610918 3.610918 612 +revis 1 26 3.688879 3.688879 640 +store 1 24 3.761200 3.761200 693 +viewer 1 21 3.912023 3.912023 787 +minim 1 18 4.060443 4.060443 887 +accept 1 18 4.060443 4.060443 879 +ascii 1 15 4.248495 4.248495 1032 +nonlinear 1 14 4.317488 4.317488 1107 +nick 1 13 4.382027 4.382027 1180 +nasa 1 13 4.382027 4.382027 1188 +induct 1 11 4.553877 4.553877 1304 +mangasarian 1 9 4.753590 4.753590 1570 +dead 1 7 5.010635 5.010635 1840 +fish 1 6 5.164786 5.164786 2207 +shift 1 5 5.347108 5.347108 2357 +bradlei 1 5 5.347108 5.347108 2554 +frog 1 5 5.347108 5.347108 2479 +concav 1 4 5.568345 5.568345 2808 +paulb 1 3 5.857933 5.857933 4036 +csphone 1 3 5.857933 5.857933 3394 +backcountri 1 3 5.857933 5.857933 3686 +espnet 1 2 6.263398 6.263398 5634 +bradleygradu 1 1 6.957497 6.957497 19467 +mangasarianinterestsmathemat 1 1 6.957497 6.957497 19468 +programmingmachin 1 1 6.957497 6.957497 19469 +learningfli 1 1 6.957497 6.957497 19470 +currentlyb 1 1 6.957497 6.957497 19471 +madisonmathemat 1 1 6.957497 6.957497 19472 +thiswork 1 1 6.957497 6.957497 19473 +olvimangasarian 1 1 6.957497 6.957497 19474 +publicationsal 1 1 6.957497 6.957497 19475 +picksthes 1 1 6.957497 6.957497 19476 +grate 1 1 6.957497 6.957497 19477 +timesfax 1 1 6.957497 6.957497 19478 +uroullett 1 1 6.957497 6.957497 19479 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..e80ec2ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +intern 1 108 2.197225 2.197225 128 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +technic 1 100 2.302585 2.302585 140 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +info 1 85 2.484907 2.484907 176 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +decemb 1 80 2.564949 2.564949 215 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +tuesdai 1 73 2.639057 2.639057 219 +integr 1 67 2.708050 2.708050 245 +receiv 1 66 2.708050 2.708050 244 +laboratori 1 63 2.772589 2.772589 292 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +guid 1 63 2.772589 2.772589 267 +experi 1 64 2.772589 2.772589 283 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +think 1 57 2.890372 2.890372 314 +major 1 56 2.890372 2.890372 315 +cover 1 55 2.944439 2.944439 329 +basic 1 50 3.044522 3.044522 360 +cool 1 49 3.044522 3.044522 374 +standard 1 48 3.044522 3.044522 365 +join 1 39 3.258097 3.258097 457 +multipl 1 39 3.258097 3.258097 453 +mean 1 37 3.332205 3.332205 477 +articl 1 33 3.433987 3.433987 530 +photo 1 31 3.496508 3.496508 561 +great 1 27 3.637586 3.637586 626 +team 1 27 3.637586 3.637586 625 +rather 1 26 3.688879 3.688879 642 +although 1 25 3.737670 3.737670 667 +lab 1 24 3.761200 3.761200 698 +inth 1 22 3.850148 3.850148 741 +tell 1 21 3.912023 3.912023 777 +toolkit 1 20 3.951244 3.951244 835 +eric 1 19 4.007333 4.007333 870 +excel 1 19 4.007333 4.007333 868 +dimension 1 18 4.060443 4.060443 909 +anywai 1 15 4.248495 4.248495 1047 +webmast 1 15 4.248495 4.248495 1045 +biologi 1 15 4.248495 4.248495 1049 +everyth 1 13 4.382027 4.382027 1169 +rest 1 12 4.465908 4.465908 1259 +guest 1 12 4.465908 4.465908 1220 +peter 1 11 4.553877 4.553877 1316 +sens 1 11 4.553877 4.553877 1305 +eight 1 11 4.553877 4.553877 1331 +label 1 10 4.653960 4.653960 1423 +star 1 8 4.875197 4.875197 1717 +scout 1 7 5.010635 5.010635 1903 +fortun 1 7 5.010635 5.010635 1872 +molecular 1 7 5.010635 5.010635 1887 +isthat 1 4 5.568345 5.568345 2723 +biomed 1 4 5.568345 5.568345 2905 +pete 1 3 5.857933 5.857933 3865 +specialist 1 3 5.857933 5.857933 3319 +microscop 1 3 5.857933 5.857933 4035 +devri 1 2 6.263398 6.263398 6145 +foolish 1 2 6.263398 6.263398 6108 +hazen 1 2 6.263398 6.263398 6143 +nearli 1 2 6.263398 6.263398 5608 +magellan 1 2 6.263398 6.263398 5825 +isdescrib 1 2 6.263398 6.263398 5444 +westdayton 1 1 6.957497 6.957497 19482 +pdevri 1 1 6.957497 6.957497 19483 +andthen 1 1 6.957497 6.957497 19484 +iread 1 1 6.957497 6.957497 19485 +topai 1 1 6.957497 6.957497 19486 +alsoprovid 1 1 6.957497 6.957497 19487 +folksat 1 1 6.957497 6.957497 19488 +molecularbiolog 1 1 6.957497 6.957497 19480 +microscopi 1 1 6.957497 6.957497 19489 +seancarrol 1 1 6.957497 6.957497 19490 +confoc 1 1 6.957497 6.957497 19491 +embryo 1 1 6.957497 6.957497 19481 +lotof 1 1 6.957497 6.957497 19492 +johnwhit 1 1 6.957497 6.957497 19493 +imrstaff 1 1 6.957497 6.957497 19494 +augustnd 1 1 6.957497 6.957497 19495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..a7c54d0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +state 1 76 2.564949 2.564949 207 +dept 1 64 2.772589 2.772589 291 +visit 1 63 2.772589 2.772589 288 +street 1 63 2.772589 2.772589 293 +major 1 56 2.890372 2.890372 315 +variou 1 56 2.890372 2.890372 317 +tabl 1 51 2.995732 2.995732 346 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +featur 1 46 3.091042 3.091042 386 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +origin 1 38 3.295837 3.295837 472 +industri 1 38 3.295837 3.295837 464 +either 1 35 3.401197 3.401197 506 +everi 1 34 3.401197 3.401197 519 +india 1 32 3.465736 3.465736 550 +though 1 27 3.637586 3.637586 622 +altern 1 26 3.688879 3.688879 641 +bookmark 1 26 3.688879 3.688879 639 +log 1 19 4.007333 4.007333 857 +north 1 19 4.007333 4.007333 873 +countri 1 15 4.248495 4.248495 1059 +galleri 1 13 4.382027 4.382027 1192 +stai 1 12 4.465908 4.465908 1215 +avenu 1 12 4.465908 4.465908 1277 +undergrad 1 9 4.753590 4.753590 1589 +kanpur 1 8 4.875197 4.875197 1744 +hack 1 7 5.010635 5.010635 1950 +gatewai 1 7 5.010635 5.010635 1942 +corner 1 7 5.010635 5.010635 1909 +rock 1 6 5.164786 5.164786 2164 +blue 1 6 5.164786 5.164786 2227 +chat 1 6 5.164786 5.164786 2128 +metal 1 4 5.568345 5.568345 3079 +randal 1 4 5.568345 5.568345 2776 +venkat 1 4 5.568345 5.568345 2702 +slave 1 3 5.857933 5.857933 3959 +kerala 1 3 5.857933 5.857933 3749 +assistantship 1 3 5.857933 5.857933 3660 +acad 1 3 5.857933 5.857933 3847 +icon 1 3 5.857933 5.857933 3362 +plakal 1 2 6.263398 6.263398 5568 +blah 1 2 6.263398 6.263398 5695 +geeki 1 2 6.263398 6.263398 5823 +iitk 1 2 6.263398 6.263398 6227 +snap 1 2 6.263398 6.263398 4962 +nerd 1 2 6.263398 6.263398 5231 +acknowledg 1 2 6.263398 6.263398 6062 +nifti 1 2 6.263398 6.263398 5504 +igor 1 2 6.263398 6.263398 6183 +ivanisev 1 2 6.263398 6.263398 6184 +yumpe 1 1 6.957497 6.957497 19498 +manoj 1 1 6.957497 6.957497 19499 +universityofwisconsin 1 1 6.957497 6.957497 19500 +calcutta 1 1 6.957497 6.957497 19496 +bosco 1 1 6.957497 6.957497 19497 +salesian 1 1 6.957497 6.957497 19501 +stare 1 1 6.957497 6.957497 19502 +barrel 1 1 6.957497 6.957497 19503 +nerdi 1 1 6.957497 6.957497 19504 +seealso 1 1 6.957497 6.957497 19505 +pinup 1 1 6.957497 6.957497 19506 +suresh 1 1 6.957497 6.957497 19507 +wisecrack 1 1 6.957497 6.957497 19508 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..7f50b9fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 1 111 2.197225 2.197225 127 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +sinc 1 90 2.397895 2.397895 159 +build 1 85 2.484907 2.484907 184 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +want 1 79 2.564949 2.564949 199 +meet 1 72 2.639057 2.639057 229 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +music 1 42 3.218876 3.218876 436 +download 1 36 3.367296 3.367296 489 +random 1 34 3.401197 3.401197 511 +india 1 32 3.465736 3.465736 550 +storag 1 31 3.496508 3.496508 553 +packag 1 28 3.610918 3.610918 614 +bookmark 1 26 3.688879 3.688879 639 +jeff 1 25 3.737670 3.737670 673 +hierarchi 1 22 3.850148 3.850148 744 +spend 1 19 4.007333 4.007333 850 +estim 1 17 4.110874 4.110874 930 +dilbert 1 16 4.174387 4.174387 996 +princeton 1 15 4.248495 4.248495 1042 +econom 1 13 4.382027 4.382027 1184 +naughton 1 10 4.653960 4.653960 1450 +vldb 1 10 4.653960 4.653960 1470 +presenc 1 8 4.875197 4.875197 1671 +prasad 1 6 5.164786 5.164786 2126 +invest 1 6 5.164786 5.164786 2153 +aggreg 1 6 5.164786 5.164786 2219 +deshpand 1 5 5.347108 5.347108 2431 +multidimension 1 4 5.568345 5.568345 3091 +meanwhil 1 3 5.857933 5.857933 3129 +manageri 1 2 6.263398 6.263398 5135 +constuct 1 1 6.957497 6.957497 19509 +depar 1 1 6.957497 6.957497 19510 +multidimensionalaggreg 1 1 6.957497 6.957497 19511 +timex 1 1 6.957497 6.957497 19512 +comix 1 1 6.957497 6.957497 19513 +hakuna 1 1 6.957497 6.957497 19514 +matata 1 1 6.957497 6.957497 19515 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..acc2aa7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +summari 1 73 2.639057 2.639057 237 +html 1 75 2.639057 2.639057 235 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +advisor 1 51 2.995732 2.995732 355 +basic 1 50 3.044522 3.044522 360 +india 1 32 3.465736 3.465736 550 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +asha 1 3 5.857933 5.857933 4037 +poosala 1 2 6.263398 6.263398 6228 +vishi 1 1 6.957497 6.957497 19516 +viswanath 1 1 6.957497 6.957497 19517 +reseach 1 1 6.957497 6.957497 19518 +voluntari 1 1 6.957497 6.957497 19519 +interestsuw 1 1 6.957497 6.957497 19520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..e1aaf237 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +dayton 1 119 2.079442 2.079442 104 +manag 1 114 2.197225 2.197225 125 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +proceed 1 93 2.397895 2.397895 152 +graphic 1 90 2.397895 2.397895 147 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +optim 1 79 2.564949 2.564949 197 +issu 1 78 2.564949 2.564949 211 +exampl 1 77 2.564949 2.564949 195 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +decemb 1 80 2.564949 2.564949 215 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +view 1 70 2.708050 2.708050 254 +practic 1 70 2.708050 2.708050 246 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +abstract 1 62 2.772589 2.772589 276 +previou 1 62 2.772589 2.772589 290 +complex 1 64 2.772589 2.772589 269 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 1 51 2.995732 2.995732 351 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +natur 1 44 3.135494 3.135494 406 +algebra 1 45 3.135494 3.135494 394 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +theoret 1 39 3.258097 3.258097 446 +form 1 39 3.258097 3.258097 443 +join 1 39 3.258097 3.258097 457 +littl 1 39 3.258097 3.258097 454 +map 1 39 3.258097 3.258097 452 +probabl 1 40 3.258097 3.258097 455 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +mean 1 37 3.332205 3.332205 477 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +next 1 34 3.401197 3.401197 517 +singl 1 34 3.401197 3.401197 510 +either 1 35 3.401197 3.401197 506 +michael 1 35 3.401197 3.401197 514 +queri 1 33 3.433987 3.433987 524 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +storag 1 31 3.496508 3.496508 553 +scientist 1 31 3.496508 3.496508 560 +posit 1 31 3.496508 3.496508 552 +domain 1 30 3.555348 3.555348 564 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +consid 1 29 3.583519 3.583519 590 +built 1 29 3.583519 3.583519 592 +propos 1 28 3.610918 3.610918 602 +weather 1 28 3.610918 3.610918 618 +ask 1 28 3.610918 3.610918 597 +scale 1 28 3.610918 3.610918 613 +except 1 28 3.610918 3.610918 607 +framework 1 28 3.610918 3.610918 606 +valu 1 25 3.737670 3.737670 665 +wai 1 25 3.737670 3.737670 662 +strategi 1 25 3.737670 3.737670 682 +client 1 25 3.737670 3.737670 679 +demonstr 1 24 3.761200 3.761200 694 +store 1 24 3.761200 3.761200 693 +daili 1 24 3.761200 3.761200 706 +sequenc 1 23 3.806662 3.806662 734 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +serv 1 22 3.850148 3.850148 758 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +disk 1 22 3.850148 3.850148 747 +sequenti 1 22 3.850148 3.850148 745 +sort 1 22 3.850148 3.850148 738 +deal 1 22 3.850148 3.850148 736 +instead 1 22 3.850148 3.850148 756 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +sigmod 1 19 4.007333 4.007333 877 +statu 1 18 4.060443 4.060443 885 +record 1 18 4.060443 4.060443 890 +event 1 18 4.060443 4.060443 896 +account 1 18 4.060443 4.060443 882 +medic 1 17 4.110874 4.110874 958 +monitor 1 17 4.110874 4.110874 941 +expand 1 17 4.110874 4.110874 928 +weekli 1 17 4.110874 4.110874 919 +estim 1 17 4.110874 4.110874 930 +advantag 1 16 4.174387 4.174387 987 +easi 1 16 4.174387 4.174387 969 +ramakrishnan 1 16 4.174387 4.174387 972 +indic 1 15 4.248495 4.248495 1013 +stream 1 15 4.248495 4.248495 1015 +livni 1 15 4.248495 4.248495 1053 +manner 1 14 4.317488 4.317488 1074 +embed 1 14 4.317488 4.317488 1102 +convent 1 14 4.317488 4.317488 1072 +miron 1 14 4.317488 4.317488 1110 +econom 1 13 4.382027 4.382027 1184 +social 1 13 4.382027 4.382027 1123 +opportun 1 13 4.382027 4.382027 1161 +composit 1 13 4.382027 4.382027 1150 +step 1 13 4.382027 4.382027 1138 +front 1 13 4.382027 4.382027 1154 +amount 1 12 4.465908 4.465908 1208 +uniqu 1 12 4.465908 4.465908 1228 +scan 1 12 4.465908 4.465908 1243 +buffer 1 12 4.465908 4.465908 1211 +insid 1 12 4.465908 4.465908 1262 +raghu 1 12 4.465908 4.465908 1212 +regard 1 11 4.553877 4.553877 1309 +shore 1 11 4.553877 4.553877 1377 +motiv 1 11 4.553877 4.553877 1346 +instanc 1 11 4.553877 4.553877 1322 +relationship 1 10 4.653960 4.653960 1383 +devis 1 10 4.653960 4.653960 1451 +reli 1 10 4.653960 4.653960 1411 +subset 1 10 4.653960 4.653960 1425 +vldb 1 10 4.653960 4.653960 1470 +cheng 1 10 4.653960 4.653960 1381 +declar 1 9 4.753590 4.753590 1526 +tempor 1 9 4.753590 4.753590 1584 +compos 1 9 4.753590 4.753590 1527 +vice 1 9 4.753590 4.753590 1604 +strength 1 9 4.753590 4.753590 1494 +lock 1 9 4.753590 4.753590 1551 +respect 1 9 4.753590 4.753590 1545 +meta 1 9 4.753590 4.753590 1505 +intermedi 1 9 4.753590 4.753590 1497 +observ 1 9 4.753590 4.753590 1578 +mode 1 9 4.753590 4.753590 1492 +pose 1 9 4.753590 4.753590 1535 +pageth 1 7 5.010635 5.010635 1939 +notion 1 7 5.010635 5.010635 1947 +merg 1 7 5.010635 5.010635 1862 +whenev 1 7 5.010635 5.010635 1883 +therefor 1 7 5.010635 5.010635 1822 +supportfor 1 7 5.010635 5.010635 1854 +seshadri 1 7 5.010635 5.010635 1803 +financi 1 6 5.164786 5.164786 2197 +histor 1 6 5.164786 5.164786 2085 +consequ 1 6 5.164786 5.164786 1989 +feasibl 1 6 5.164786 5.164786 2157 +nest 1 6 5.164786 5.164786 2151 +temporari 1 6 5.164786 5.164786 2090 +praveen 1 6 5.164786 5.164786 1996 +treat 1 5 5.347108 5.347108 2521 +greater 1 5 5.347108 5.347108 2258 +correl 1 5 5.347108 5.347108 2279 +dual 1 5 5.347108 5.347108 2522 +distinct 1 5 5.347108 5.347108 2319 +overlap 1 5 5.347108 5.347108 2368 +complementari 1 5 5.347108 5.347108 2523 +educomput 1 5 5.347108 5.347108 2524 +phenomena 1 4 5.568345 5.568345 2962 +richter 1 4 5.568345 5.568345 2957 +flavor 1 4 5.568345 5.568345 2625 +zoom 1 4 5.568345 5.568345 2961 +inadequ 1 3 5.857933 5.857933 3730 +tediou 1 3 5.857933 5.857933 3731 +ineffici 1 3 5.857933 5.857933 3457 +megabyt 1 3 5.857933 5.857933 3732 +claus 1 3 5.857933 5.857933 3733 +offset 1 3 5.857933 5.857933 3467 +collaps 1 3 5.857933 5.857933 3729 +hourli 1 3 5.857933 5.857933 3734 +thathav 1 3 5.857933 5.857933 3735 +serverarchitectur 1 3 5.857933 5.857933 3736 +comad 1 3 5.857933 5.857933 3737 +informationfor 1 3 5.857933 5.857933 3738 +objectivescurr 1 2 6.263398 6.263398 5255 +statusmotiv 1 2 6.263398 6.263398 5256 +exampleseq 1 2 6.263398 6.263398 5257 +sequin 1 2 6.263398 6.263398 5250 +languageoptim 1 2 6.263398 6.263398 5258 +techniquesseq 1 2 6.263398 6.263398 5259 +developmentpublicationsrel 1 2 6.263398 6.263398 5260 +workcontact 1 2 6.263398 6.263398 5261 +informationproject 1 2 6.263398 6.263398 5262 +processingof 1 2 6.263398 6.263398 5263 +theseappl 1 2 6.263398 6.263398 5264 +metereolog 1 2 6.263398 6.263398 5265 +andbiolog 1 2 6.263398 6.263398 5266 +semanticstak 1 2 6.263398 6.263398 5267 +evaluationintegr 1 2 6.263398 6.263398 5268 +canstor 1 2 6.263398 6.263398 5269 +sequencesthes 1 2 6.263398 6.263398 5270 +themost 1 2 6.263398 6.263398 5271 +statusth 1 2 6.263398 6.263398 5272 +algebraicqueri 1 2 6.263398 6.263398 5273 +analogousto 1 2 6.263398 6.263398 5274 +candeclar 1 2 6.263398 6.263398 5275 +likesql 1 2 6.263398 6.263398 5276 +versa 1 2 6.263398 6.263398 5277 +querya 1 2 6.263398 6.263398 5278 +meteorolog 1 2 6.263398 6.263398 5253 +occurr 1 2 6.263398 6.263398 5279 +volcano 1 2 6.263398 6.263398 5252 +erupt 1 2 6.263398 6.263398 5280 +didth 1 2 6.263398 6.263398 5281 +earthquak 1 2 6.263398 6.263398 5251 +groupbi 1 2 6.263398 6.263398 5282 +subqueri 1 2 6.263398 6.263398 5283 +aggregatefunct 1 2 6.263398 6.263398 5284 +sequencesord 1 2 6.263398 6.263398 5285 +modelth 1 2 6.263398 6.263398 5286 +aredescrib 1 2 6.263398 6.263398 5254 +gist 1 2 6.263398 6.263398 5287 +ordereddomain 1 2 6.263398 6.263398 5288 +andposit 1 2 6.263398 6.263398 5289 +recordsmap 1 2 6.263398 6.263398 5290 +rise 1 2 6.263398 6.263398 5291 +relationaloper 1 2 6.263398 6.263398 5292 +andaggreg 1 2 6.263398 6.263398 5293 +researchersin 1 2 6.263398 6.263398 5294 +movingaggreg 1 2 6.263398 6.263398 5295 +worldsitu 1 2 6.263398 6.263398 5296 +extensionof 1 2 6.263398 6.263398 5297 +ofseq 1 2 6.263398 6.263398 5298 +languagew 1 2 6.263398 6.263398 5299 +usingwhich 1 2 6.263398 6.263398 5300 +languagei 1 2 6.263398 6.263398 5301 +queriesa 1 2 6.263398 6.263398 5302 +techniquesw 1 2 6.263398 6.263398 5303 +developmentth 1 2 6.263398 6.263398 5304 +viaa 1 2 6.263398 6.263398 5305 +ontop 1 2 6.263398 6.263398 5306 +languageswhich 1 2 6.263398 6.263398 5307 +arbitrarylevel 1 2 6.263398 6.263398 5308 +viceversa 1 2 6.263398 6.263398 5309 +detailson 1 2 6.263398 6.263398 5310 +publicationssequ 1 2 6.263398 6.263398 5311 +datapraveen 1 2 6.263398 6.263398 5312 +systempraveen 1 2 6.263398 6.263398 5313 +queriesraghu 1 2 6.263398 6.263398 5314 +workthedevis 1 2 6.263398 6.263398 5315 +visualizationenviron 1 2 6.263398 6.263398 5316 +servercontact 1 2 6.263398 6.263398 5317 +eduraghu 1 2 6.263398 6.263398 5318 +edumiron 1 2 6.263398 6.263398 5319 +seshadripraveen 1 2 6.263398 6.263398 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..a6de6987 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +person 1 111 2.197225 2.197225 117 +school 1 84 2.484907 2.484907 188 +resum 1 79 2.564949 2.564949 217 +bookmark 1 26 3.688879 3.688879 639 +andrew 1 19 4.007333 4.007333 849 +histori 1 19 4.007333 4.007333 853 +vista 1 10 4.653960 4.653960 1452 +alta 1 4 5.568345 5.568345 3039 +prock 1 2 6.263398 6.263398 4786 +clemen 1 1 6.957497 6.957497 19521 +hockert 1 1 6.957497 6.957497 19522 +prockoffic 1 1 6.957497 6.957497 19523 +doonesburi 1 1 6.957497 6.957497 19524 +trot 1 1 6.957497 6.957497 19525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..6a2cee34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +solut 1 82 2.484907 2.484907 162 +journal 1 83 2.484907 2.484907 183 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +state 1 76 2.564949 2.564949 207 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +sampl 1 53 2.944439 2.944439 339 +februari 1 54 2.944439 2.944439 328 +much 1 52 2.995732 2.995732 349 +telephon 1 50 3.044522 3.044522 373 +understand 1 47 3.091042 3.091042 384 +long 1 43 3.178054 3.178054 413 +combin 1 42 3.218876 3.218876 421 +annual 1 40 3.258097 3.258097 458 +error 1 40 3.258097 3.258097 449 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +random 1 34 3.401197 3.401197 511 +approxim 1 35 3.401197 3.401197 509 +posit 1 31 3.496508 3.496508 552 +hard 1 30 3.555348 3.555348 563 +graph 1 30 3.555348 3.555348 576 +turn 1 29 3.583519 3.583519 586 +synchron 1 29 3.583519 3.583519 588 +progress 1 28 3.610918 3.610918 598 +bound 1 26 3.688879 3.688879 659 +although 1 25 3.737670 3.737670 667 +strategi 1 25 3.737670 3.737670 682 +proof 1 23 3.806662 3.806662 720 +identifi 1 22 3.850148 3.850148 760 +prove 1 19 4.007333 4.007333 848 +easi 1 16 4.174387 4.174387 969 +novel 1 15 4.248495 4.248495 1039 +polynomi 1 14 4.317488 4.317488 1069 +finit 1 14 4.317488 4.317488 1106 +automata 1 13 4.382027 4.382027 1135 +minimum 1 9 4.753590 4.753590 1555 +span 1 8 4.875197 4.875197 1751 +prover 1 8 4.875197 4.875197 1653 +round 1 8 4.875197 4.875197 1769 +aris 1 7 5.010635 5.010635 1924 +eduto 1 7 5.010635 5.010635 1956 +ann 1 6 5.164786 5.164786 2065 +ladner 1 6 5.164786 5.164786 2062 +pub 1 6 5.164786 5.164786 2239 +proceedingsof 1 5 5.347108 5.347108 2331 +provabl 1 5 5.347108 5.347108 2558 +surprisingli 1 4 5.568345 5.568345 2609 +expens 1 4 5.568345 5.568345 2678 +condon 1 3 5.857933 5.857933 3309 +neg 1 3 5.857933 5.857933 3451 +theoryand 1 3 5.857933 5.857933 3350 +nondeterminist 1 3 5.857933 5.857933 3560 +wigderson 1 2 6.263398 6.263398 6035 +complexityclass 1 1 6.957497 6.957497 19526 +interactiveproof 1 1 6.957497 6.957497 19527 +nondetermin 1 1 6.957497 6.957497 19528 +suchmodel 1 1 6.957497 6.957497 19529 +proven 1 1 6.957497 6.957497 19530 +classicproblem 1 1 6.957497 6.957497 19531 +theoryof 1 1 6.957497 6.957497 19532 +computationalproblem 1 1 6.957497 6.957497 19533 +whichhard 1 1 6.957497 6.957497 19534 +recentresult 1 1 6.957497 6.957497 19535 +modelsof 1 1 6.957497 6.957497 19536 +approximabilityresult 1 1 6.957497 6.957497 19537 +developingboth 1 1 6.957497 6.957497 19538 +hardcombinatori 1 1 6.957497 6.957497 19539 +forsort 1 1 6.957497 6.957497 19540 +costscan 1 1 6.957497 6.957497 19541 +probabilisticst 1 1 6.957497 6.957497 19542 +hellerstein 1 1 6.957497 6.957497 19543 +pottl 1 1 6.957497 6.957497 19544 +pspace 1 1 6.957497 6.957497 19545 +caiand 1 1 6.957497 6.957497 19546 +lipton 1 1 6.957497 6.957497 19547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..fba94d5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +question 1 91 2.397895 2.397895 141 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +larg 1 82 2.484907 2.484907 168 +method 1 80 2.564949 2.564949 213 +dynam 1 76 2.564949 2.564949 194 +appli 1 71 2.639057 2.639057 226 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +degre 1 69 2.708050 2.708050 259 +complex 1 64 2.772589 2.772589 269 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +sampl 1 53 2.944439 2.944439 339 +local 1 55 2.944439 2.944439 334 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +theoret 1 39 3.258097 3.258097 446 +littl 1 39 3.258097 3.258097 454 +error 1 40 3.258097 3.258097 449 +formal 1 37 3.332205 3.332205 478 +global 1 34 3.401197 3.401197 520 +obtain 1 33 3.433987 3.433987 534 +graph 1 30 3.555348 3.555348 576 +scale 1 28 3.610918 3.610918 613 +great 1 27 3.637586 3.637586 626 +effort 1 26 3.688879 3.688879 652 +detect 1 26 3.688879 3.688879 646 +concern 1 25 3.737670 3.737670 666 +known 1 24 3.761200 3.761200 702 +proof 1 23 3.806662 3.806662 720 +sequenc 1 23 3.806662 3.806662 734 +geometri 1 22 3.850148 3.850148 752 +properti 1 22 3.850148 3.850148 749 +deal 1 22 3.850148 3.850148 736 +inth 1 22 3.850148 3.850148 741 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +fact 1 21 3.912023 3.912023 780 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +whether 1 17 4.110874 4.110874 918 +young 1 16 4.174387 4.174387 991 +spars 1 16 4.174387 4.174387 989 +fourth 1 16 4.174387 4.174387 999 +biologi 1 15 4.248495 4.248495 1049 +decid 1 14 4.317488 4.317488 1075 +polynomi 1 14 4.317488 4.317488 1069 +anonym 1 14 4.317488 4.317488 1100 +incomput 1 14 4.317488 4.317488 1096 +necessari 1 13 4.382027 4.382027 1147 +primarili 1 13 4.382027 4.382027 1185 +discret 1 13 4.382027 4.382027 1165 +assembl 1 12 4.465908 4.465908 1207 +weight 1 12 4.465908 4.465908 1204 +purdu 1 10 4.653960 4.653960 1466 +rapid 1 10 4.653960 4.653960 1453 +eduto 1 7 5.010635 5.010635 1956 +biolog 1 6 5.164786 5.164786 2147 +determinist 1 6 5.164786 5.164786 2034 +pub 1 6 5.164786 5.164786 2239 +joseph 1 5 5.347108 5.347108 2327 +twenti 1 5 5.347108 5.347108 2540 +despit 1 5 5.347108 5.347108 2317 +tiwari 1 5 5.347108 5.347108 2385 +gone 1 4 5.568345 5.568345 3072 +resolv 1 4 5.568345 5.568345 2675 +algorithmsfor 1 4 5.568345 5.568345 2748 +genom 1 3 5.857933 5.857933 3546 +collaps 1 3 5.857933 5.857933 3729 +ninth 1 3 5.857933 5.857933 3616 +soar 1 3 5.857933 5.857933 3506 +adequaci 1 2 6.263398 6.263398 6229 +fragment 1 2 6.263398 6.263398 6000 +homolog 1 2 6.263398 6.263398 5441 +analysisof 1 2 6.263398 6.263398 4277 +deborah 1 1 6.957497 6.957497 19548 +studyingth 1 1 6.957497 6.957497 19549 +andnondeterminist 1 1 6.957497 6.957497 19550 +stillknow 1 1 6.957497 6.957497 19551 +computerscientist 1 1 6.957497 6.957497 19552 +techniquesfor 1 1 6.957497 6.957497 19553 +investigatesth 1 1 6.957497 6.957497 19554 +exploresin 1 1 6.957497 6.957497 19555 +resolveproblem 1 1 6.957497 6.957497 19556 +theseinclud 1 1 6.957497 6.957497 19557 +handlingrepetit 1 1 6.957497 6.957497 19558 +graphtheoret 1 1 6.957497 6.957497 19559 +subexponenti 1 1 6.957497 6.957497 19560 +pruim 1 1 6.957497 6.957497 19561 +theoryconfer 1 1 6.957497 6.957497 19562 +spanner 1 1 6.957497 6.957497 19563 +althof 1 1 6.957497 6.957497 19564 +dobkin 1 1 6.957497 6.957497 19565 +meidanisand 1 1 6.957497 6.957497 19566 +scandinavianworkshop 1 1 6.957497 6.957497 19567 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..29c03ce5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +institut 1 84 2.484907 2.484907 187 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +interfac 1 79 2.564949 2.564949 209 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +simul 1 66 2.708050 2.708050 255 +differ 1 66 2.708050 2.708050 253 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +sampl 1 53 2.944439 2.944439 339 +telephon 1 50 3.044522 3.044522 373 +visual 1 48 3.044522 3.044522 372 +join 1 39 3.258097 3.258097 457 +error 1 40 3.258097 3.258097 449 +purpos 1 37 3.332205 3.332205 481 +queri 1 33 3.433987 3.433987 524 +consid 1 29 3.583519 3.583519 590 +synchron 1 29 3.583519 3.583519 588 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +emphasi 1 22 3.850148 3.850148 755 +properti 1 22 3.850148 3.850148 749 +disk 1 22 3.850148 3.850148 747 +util 1 21 3.912023 3.912023 774 +sigmod 1 19 4.007333 4.007333 877 +event 1 18 4.060443 4.060443 896 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 1 15 4.248495 4.248495 1053 +novel 1 15 4.248495 4.248495 1039 +miron 1 14 4.317488 4.317488 1110 +discret 1 13 4.382027 4.382027 1165 +sigmetr 1 13 4.382027 4.382027 1173 +emploi 1 12 4.465908 4.465908 1284 +israel 1 11 4.553877 4.553877 1366 +ioannidi 1 8 4.875197 4.875197 1714 +eduto 1 7 5.010635 5.010635 1956 +schema 1 6 5.164786 5.164786 1988 +pub 1 6 5.164786 5.164786 2239 +weizmann 1 4 5.568345 5.568345 2858 +tape 1 4 5.568345 5.568345 2959 +evaluationof 1 3 5.857933 5.857933 3192 +interplai 1 3 5.857933 5.857933 3726 +myllymaki 1 3 5.857933 5.857933 4022 +metaphor 1 3 5.857933 5.857933 4038 +rehovot 1 2 6.263398 6.263398 4891 +developmentof 1 2 6.263398 6.263398 6041 +managementsystem 1 2 6.263398 6.263398 4365 +researchinvolv 1 2 6.263398 6.263398 5556 +asreal 1 1 6.957497 6.957497 19568 +specialemphasi 1 1 6.957497 6.957497 19569 +systemand 1 1 6.957497 6.957497 19570 +performancestudi 1 1 6.957497 6.957497 19571 +modelingand 1 1 6.957497 6.957497 19572 +implementinga 1 1 6.957497 6.957497 19573 +visualizationtool 1 1 6.957497 6.957497 19574 +sashadri 1 1 6.957497 6.957497 19575 +haberand 1 1 6.957497 6.957497 19576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..86a8b2de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +part 1 98 2.302585 2.302585 129 +real 1 93 2.397895 2.397895 144 +octob 1 89 2.397895 2.397895 156 +journal 1 83 2.484907 2.484907 183 +second 1 81 2.484907 2.484907 166 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +summari 1 73 2.639057 2.639057 237 +order 1 69 2.708050 2.708050 249 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +sever 1 56 2.890372 2.890372 322 +sampl 1 53 2.944439 2.944439 339 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +without 1 50 3.044522 3.044522 370 +effect 1 46 3.091042 3.091042 385 +york 1 41 3.218876 3.218876 435 +small 1 39 3.258097 3.258097 447 +error 1 40 3.258097 3.258097 449 +hand 1 37 3.332205 3.332205 475 +procedur 1 36 3.367296 3.367296 488 +concept 1 32 3.465736 3.465736 537 +posit 1 31 3.496508 3.496508 552 +valu 1 25 3.737670 3.737670 665 +equat 1 23 3.806662 3.806662 724 +emphasi 1 22 3.850148 3.850148 755 +thu 1 21 3.912023 3.912023 773 +siam 1 21 3.912023 3.912023 800 +definit 1 19 4.007333 4.007333 864 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +attempt 1 17 4.110874 4.110874 917 +estim 1 17 4.110874 4.110874 930 +spars 1 16 4.174387 4.174387 989 +condit 1 16 4.174387 4.174387 975 +discret 1 13 4.382027 4.382027 1165 +boundari 1 7 5.010635 5.010635 1929 +eduto 1 7 5.010635 5.010635 1956 +mix 1 6 5.164786 5.164786 2200 +pub 1 6 5.164786 5.164786 2239 +pivot 1 5 5.347108 5.347108 2426 +symmetr 1 4 5.568345 5.568345 2908 +ellipt 1 3 5.857933 5.857933 3774 +preserv 1 3 5.857933 5.857933 3628 +thesystem 1 3 5.857933 5.857933 3881 +encount 1 3 5.857933 5.857933 3128 +attack 1 3 5.857933 5.857933 3168 +singular 1 3 5.857933 5.857933 3366 +parter 1 2 6.263398 6.263398 4075 +solutionof 1 2 6.263398 6.263398 5056 +linearalgebra 1 2 6.263398 6.263398 4833 +scientificcomput 1 2 6.263398 6.263398 4145 +seymour 1 1 6.957497 6.957497 19578 +indefinit 1 1 6.957497 6.957497 19579 +classicalit 1 1 6.957497 6.957497 19580 +multigrid 1 1 6.957497 6.957497 19581 +effectivelywhen 1 1 6.957497 6.957497 19582 +bemad 1 1 6.957497 6.957497 19583 +operatori 1 1 6.957497 6.957497 19584 +casedirect 1 1 6.957497 6.957497 19585 +challengingproblem 1 1 6.957497 6.957497 19586 +nowinvolv 1 1 6.957497 6.957497 19587 +precondit 1 1 6.957497 6.957497 19577 +specialmultigrid 1 1 6.957497 6.957497 19588 +chebyshev 1 1 6.957497 6.957497 19589 +collact 1 1 6.957497 6.957497 19590 +ellipticparti 1 1 6.957497 6.957497 19591 +journalon 1 1 6.957497 6.957497 19592 +numbersand 1 1 6.957497 6.957497 19593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..f35e19b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +techniqu 1 99 2.302585 2.302585 138 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +ieee 1 86 2.484907 2.484907 190 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +effici 1 73 2.639057 2.639057 233 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +processor 1 54 2.944439 2.944439 335 +sampl 1 53 2.944439 2.944439 339 +telephon 1 50 3.044522 3.044522 373 +california 1 46 3.091042 3.091042 388 +featur 1 46 3.091042 3.091042 386 +join 1 39 3.258097 3.258097 457 +error 1 40 3.258097 3.258097 449 +industri 1 38 3.295837 3.295837 464 +workstat 1 37 3.332205 3.332205 479 +approxim 1 35 3.401197 3.401197 509 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +repres 1 26 3.688879 3.688879 656 +valu 1 25 3.737670 3.737670 665 +accur 1 25 3.737670 3.737670 680 +togeth 1 23 3.806662 3.806662 714 +emphasi 1 22 3.850148 3.850148 755 +alloc 1 20 3.951244 3.951244 821 +precis 1 15 4.248495 4.248495 1023 +hybrid 1 15 4.248495 4.248495 1057 +sigmetr 1 13 4.382027 4.382027 1173 +mari 1 12 4.465908 4.465908 1266 +workload 1 12 4.465908 4.465908 1210 +characterist 1 12 4.465908 4.465908 1257 +operatingsystem 1 10 4.653960 4.653960 1401 +custom 1 10 4.653960 4.653960 1414 +vernon 1 9 4.753590 4.753590 1556 +angel 1 8 4.875197 4.875197 1779 +character 1 8 4.875197 4.875197 1767 +reus 1 8 4.875197 4.875197 1661 +hash 1 8 4.875197 4.875197 1618 +carei 1 8 4.875197 4.875197 1781 +analyt 1 7 5.010635 5.010635 1913 +prioriti 1 7 5.010635 5.010635 1792 +interpol 1 7 5.010635 5.010635 1823 +chiang 1 7 5.010635 5.010635 1853 +eduto 1 7 5.010635 5.010635 1956 +pub 1 6 5.164786 5.164786 2239 +yield 1 5 5.347108 5.347108 2458 +fair 1 5 5.347108 5.347108 2333 +infocom 1 3 5.857933 5.857933 3283 +paralleland 1 2 6.263398 6.263398 5805 +petri 1 2 6.263398 6.263398 4414 +intuit 1 2 6.263398 6.263398 4921 +performanceanalysi 1 2 6.263398 6.263398 5629 +schedulingpolici 1 2 6.263398 6.263398 5879 +memorymanag 1 2 6.263398 6.263398 4158 +preemption 1 2 6.263398 6.263398 6230 +mansharamani 1 2 6.263398 6.263398 6231 +applicationto 1 1 6.957497 6.957497 19594 +techniquesi 1 1 6.957497 6.957497 19595 +colleaguesinclud 1 1 6.957497 6.957497 19596 +customizedmean 1 1 6.957497 6.957497 19597 +gtpn 1 1 6.957497 6.957497 19598 +systemfeatur 1 1 6.957497 6.957497 19599 +equationsthat 1 1 6.957497 6.957497 19600 +butcan 1 1 6.957497 6.957497 19601 +proposedth 1 1 6.957497 6.957497 19602 +approximationsfor 1 1 6.957497 6.957497 19603 +techniquemai 1 1 6.957497 6.957497 19604 +broader 1 1 6.957497 6.957497 19605 +performanceparallel 1 1 6.957497 6.957497 19606 +dqdb 1 1 6.957497 6.957497 19607 +slot 1 1 6.957497 6.957497 19608 +brewster 1 1 6.957497 6.957497 19609 +pateland 1 1 6.957497 6.957497 19610 +forrun 1 1 6.957497 6.957497 19611 +with 1 1 6.957497 6.957497 19612 +sigmetricsconfer 1 1 6.957497 6.957497 19613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..db7b4e69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +welcom 1 122 2.079442 2.079442 99 +wang 1 21 3.912023 3.912023 790 +edulast 1 17 4.110874 4.110874 927 +qinqin 1 1 6.957497 6.957497 19614 +pageqw 1 1 6.957497 6.957497 19615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..975f85ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +teach 1 108 2.197225 2.197225 112 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +associ 1 93 2.397895 2.397895 151 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +activ 1 84 2.484907 2.484907 182 +institut 1 84 2.484907 2.484907 187 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +school 1 84 2.484907 2.484907 188 +larg 1 82 2.484907 2.484907 168 +sourc 1 77 2.564949 2.564949 201 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +logic 1 71 2.639057 2.639057 230 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +august 1 66 2.708050 2.708050 257 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +result 1 65 2.772589 2.772589 281 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +content 1 59 2.833213 2.833213 302 +publish 1 57 2.890372 2.890372 326 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +explor 1 58 2.890372 2.890372 324 +cover 1 55 2.944439 2.944439 329 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +set 1 50 3.044522 3.044522 361 +visual 1 48 3.044522 3.044522 372 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +sciencesunivers 1 37 3.332205 3.332205 486 +formal 1 37 3.332205 3.332205 478 +tech 1 35 3.401197 3.401197 515 +next 1 34 3.401197 3.401197 517 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +independ 1 32 3.465736 3.465736 548 +express 1 32 3.465736 3.465736 540 +given 1 32 3.465736 3.465736 538 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +specifi 1 30 3.555348 3.555348 568 +rang 1 30 3.555348 3.555348 565 +focus 1 29 3.583519 3.583519 584 +cluster 1 28 3.610918 3.610918 612 +retriev 1 27 3.637586 3.637586 621 +mine 1 26 3.688879 3.688879 654 +constraint 1 26 3.688879 3.688879 636 +hill 1 25 3.737670 3.737670 670 +aspect 1 25 3.737670 3.737670 663 +pattern 1 24 3.761200 3.761200 689 +lab 1 24 3.761200 3.761200 698 +sequenc 1 23 3.806662 3.806662 734 +indian 1 22 3.850148 3.850148 769 +deal 1 22 3.850148 3.850148 736 +identifi 1 22 3.850148 3.850148 760 +toolkit 1 20 3.951244 3.951244 835 +definit 1 19 4.007333 4.007333 864 +stand 1 18 4.060443 4.060443 891 +ramakrishnan 1 16 4.174387 4.174387 972 +advantag 1 16 4.174387 4.174387 987 +upon 1 16 4.174387 4.174387 978 +livni 1 15 4.248495 4.248495 1053 +transit 1 15 4.248495 4.248495 1046 +heterogen 1 14 4.317488 4.317488 1090 +easili 1 14 4.317488 4.317488 1077 +dbm 1 13 4.382027 4.382027 1136 +joint 1 13 4.382027 4.382027 1130 +recurs 1 13 4.382027 4.382027 1127 +raghu 1 12 4.465908 4.465908 1212 +grow 1 12 4.465908 4.465908 1209 +employ 1 12 4.465908 4.465908 1291 +deduct 1 12 4.465908 4.465908 1236 +broad 1 11 4.553877 4.553877 1302 +usaphon 1 9 4.753590 4.753590 1600 +madra 1 8 4.875197 4.875197 1770 +ioannidi 1 8 4.875197 4.875197 1714 +closur 1 8 4.875197 4.875197 1643 +dataset 1 7 5.010635 5.010635 1914 +bottom 1 7 5.010635 5.010635 1906 +seshadri 1 7 5.010635 5.010635 1803 +ongo 1 6 5.164786 5.164786 2215 +bell 1 6 5.164786 5.164786 2224 +praveen 1 6 5.164786 5.164786 1996 +mcgraw 1 5 5.347108 5.347108 2262 +coral 1 5 5.347108 5.347108 2538 +minibas 1 4 5.568345 5.568345 2608 +exploratori 1 4 5.568345 5.568345 3073 +ofinform 1 4 5.568345 5.568345 2707 +successor 1 3 5.857933 5.857933 3576 +sudarshan 1 3 5.857933 5.857933 3885 +aimedat 1 2 6.263398 6.263398 6117 +murrai 1 2 6.263398 6.263398 5647 +srivastava 1 2 6.263398 6.263398 5395 +minibaseand 1 1 6.957497 6.957497 19616 +coralth 1 1 6.957497 6.957497 19617 +undergraduateand 1 1 6.957497 6.957497 19618 +inconjunct 1 1 6.957497 6.957497 19619 +coursesthat 1 1 6.957497 6.957497 19620 +deductiona 1 1 6.957497 6.957497 19621 +diversifi 1 1 6.957497 6.957497 19622 +increasinglyimport 1 1 6.957497 6.957497 19623 +dispers 1 1 6.957497 6.957497 19624 +rodin 1 1 6.957497 6.957497 19625 +severalissu 1 1 6.957497 6.957497 19626 +forsemant 1 1 6.957497 6.957497 19627 +serviceand 1 1 6.957497 6.957497 19628 +networkedclust 1 1 6.957497 6.957497 19629 +explorationfrom 1 1 6.957497 6.957497 19630 +assequ 1 1 6.957497 6.957497 19631 +seqsystem 1 1 6.957497 6.957497 19632 +optimizationissu 1 1 6.957497 6.957497 19633 +identifyingtrend 1 1 6.957497 6.957497 19634 +fromlarg 1 1 6.957497 6.957497 19635 +implementingan 1 1 6.957497 6.957497 19636 +customizea 1 1 6.957497 6.957497 19637 +specializedinform 1 1 6.957497 6.957497 19638 +indexedand 1 1 6.957497 6.957497 19639 +andmin 1 1 6.957497 6.957497 19640 +birchfor 1 1 6.957497 6.957497 19641 +devisea 1 1 6.957497 6.957497 19642 +databasequeri 1 1 6.957497 6.957497 19643 +featuressuch 1 1 6.957497 6.957497 19644 +ofarithmet 1 1 6.957497 6.957497 19645 +morecompactli 1 1 6.957497 6.957497 19646 +coraldeduct 1 1 6.957497 6.957497 19647 +fixpointevalu 1 1 6.957497 6.957497 19648 +efficientacross 1 1 6.957497 6.957497 19649 +sudarsha 1 1 6.957497 6.957497 19650 +divesh 1 1 6.957497 6.957497 19651 +managementfirst 1 1 6.957497 6.957497 19652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..a7663539 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +call 1 91 2.397895 2.397895 153 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +learn 1 86 2.484907 2.484907 170 +know 1 80 2.564949 2.564949 198 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +degre 1 69 2.708050 2.708050 259 +collect 1 65 2.772589 2.772589 268 +plai 1 60 2.833213 2.833213 307 +semest 1 58 2.890372 2.890372 312 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +much 1 52 2.995732 2.995732 349 +get 1 46 3.091042 3.091042 380 +california 1 46 3.091042 3.091042 388 +could 1 46 3.091042 3.091042 383 +natur 1 44 3.135494 3.135494 406 +long 1 43 3.178054 3.178054 413 +third 1 43 3.178054 3.178054 412 +show 1 43 3.178054 3.178054 417 +music 1 42 3.218876 3.218876 436 +littl 1 39 3.258097 3.258097 454 +small 1 39 3.258097 3.258097 447 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +form 1 39 3.258097 3.258097 443 +winter 1 36 3.367296 3.367296 500 +go 1 33 3.433987 3.433987 529 +india 1 32 3.465736 3.465736 550 +kind 1 32 3.465736 3.465736 541 +travel 1 30 3.555348 3.555348 579 +hope 1 28 3.610918 3.610918 610 +great 1 27 3.637586 3.637586 626 +though 1 27 3.637586 3.637586 622 +enjoi 1 26 3.688879 3.688879 660 +sport 1 25 3.737670 3.737670 683 +concern 1 25 3.737670 3.737670 666 +magazin 1 24 3.761200 3.761200 704 +wish 1 24 3.761200 3.761200 692 +famili 1 23 3.806662 3.806662 735 +almost 1 22 3.850148 3.850148 742 +try 1 22 3.850148 3.850148 764 +born 1 21 3.912023 3.912023 798 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +nice 1 20 3.951244 3.951244 809 +tenni 1 20 3.951244 3.951244 838 +citi 1 19 4.007333 4.007333 874 +offici 1 18 4.060443 4.060443 894 +figur 1 18 4.060443 4.060443 903 +bachelor 1 17 4.110874 4.110874 957 +normal 1 16 4.174387 4.174387 995 +jose 1 16 4.174387 4.174387 976 +enough 1 15 4.248495 4.248495 1040 +rate 1 15 4.248495 4.248495 1037 +anywai 1 15 4.248495 4.248495 1047 +novel 1 15 4.248495 4.248495 1039 +came 1 13 4.382027 4.382027 1197 +cannot 1 13 4.382027 4.382027 1144 +sai 1 13 4.382027 4.382027 1175 +philosophi 1 13 4.382027 4.382027 1167 +stai 1 12 4.465908 4.465908 1215 +employ 1 12 4.465908 4.465908 1291 +walk 1 12 4.465908 4.465908 1281 +rest 1 12 4.465908 4.465908 1259 +surf 1 11 4.553877 4.553877 1301 +town 1 10 4.653960 4.653960 1458 +guess 1 10 4.653960 4.653960 1443 +sister 1 9 4.753590 4.753590 1524 +prefer 1 9 4.753590 4.753590 1491 +swim 1 9 4.753590 4.753590 1599 +kanpur 1 8 4.875197 4.875197 1744 +star 1 8 4.875197 4.875197 1717 +bridg 1 8 4.875197 4.875197 1764 +job 1 8 4.875197 4.875197 1702 +fortun 1 7 5.010635 5.010635 1872 +monei 1 7 5.010635 5.010635 1934 +cricket 1 7 5.010635 5.010635 1945 +slightli 1 7 5.010635 5.010635 1795 +parent 1 6 5.164786 5.164786 2204 +whatev 1 6 5.164786 5.164786 2097 +hike 1 6 5.164786 5.164786 2234 +televis 1 6 5.164786 5.164786 2118 +almaden 1 5 5.347108 5.347108 2511 +cyber 1 4 5.568345 5.568345 2909 +gone 1 4 5.568345 5.568345 3072 +compris 1 4 5.568345 5.568345 2862 +shouldn 1 4 5.568345 5.568345 2606 +suppos 1 4 5.568345 5.568345 3002 +skate 1 4 5.568345 5.568345 3046 +rahul 1 3 5.857933 5.857933 3464 +indianinstitut 1 3 5.857933 5.857933 4003 +romanc 1 3 5.857933 5.857933 3632 +trek 1 3 5.857933 5.857933 4025 +win 1 3 5.857933 5.857933 3593 +surfer 1 2 6.263398 6.263398 4982 +centr 1 2 6.263398 6.263398 4222 +northern 1 2 6.263398 6.263398 5861 +comedi 1 2 6.263398 6.263398 5822 +lover 1 2 6.263398 6.263398 6192 +paid 1 2 6.263398 6.263398 6081 +kapoorhello 1 1 6.957497 6.957497 19654 +schedulemydepartmentmyuniversityiitkanpuriitkclass 1 1 6.957497 6.957497 19655 +relatedlink 1 1 6.957497 6.957497 19656 +menow 1 1 6.957497 6.957497 19657 +andrais 1 1 6.957497 6.957497 19658 +elder 1 1 6.957497 6.957497 19659 +moneymagazin 1 1 6.957497 6.957497 19660 +livabl 1 1 6.957497 6.957497 19653 +editormust 1 1 6.957497 6.957497 19661 +greenland 1 1 6.957497 6.957497 19662 +complain 1 1 6.957497 6.957497 19663 +isawesom 1 1 6.957497 6.957497 19664 +regret 1 1 6.957497 6.957497 19665 +genr 1 1 6.957497 6.957497 19666 +gymnast 1 1 6.957497 6.957497 19667 +cloudi 1 1 6.957497 6.957497 19668 +breezi 1 1 6.957497 6.957497 19669 +youget 1 1 6.957497 6.957497 19670 +musicstuffmovi 1 1 6.957497 6.957497 19671 +televisioninternettravelotherbookmark 1 1 6.957497 6.957497 19672 +meget 1 1 6.957497 6.957497 19673 +guestbookrahul 1 1 6.957497 6.957497 19674 +eduh 1 1 6.957497 6.957497 19675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..d2f585bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +distribut 1 162 1.791759 1.791759 51 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +west 1 83 2.484907 2.484907 192 +member 1 84 2.484907 2.484907 165 +thing 1 84 2.484907 2.484907 189 +integr 1 67 2.708050 2.708050 245 +street 1 63 2.772589 2.772589 293 +evalu 1 64 2.772589 2.772589 266 +major 1 56 2.890372 2.890372 315 +telephon 1 50 3.044522 3.044522 373 +music 1 42 3.218876 3.218876 436 +error 1 40 3.258097 3.258097 449 +vita 1 38 3.295837 3.295837 473 +committe 1 34 3.401197 3.401197 522 +curriculum 1 33 3.433987 3.433987 535 +team 1 27 3.637586 3.637586 625 +bookmark 1 26 3.688879 3.688879 639 +subject 1 26 3.688879 3.688879 647 +livni 1 15 4.248495 4.248495 1053 +minor 1 12 4.465908 4.465908 1237 +pascal 1 12 4.465908 4.465908 1213 +earth 1 10 4.653960 4.653960 1463 +chao 1 8 4.875197 4.875197 1753 +judg 1 8 4.875197 4.875197 1644 +uncertainti 1 7 5.010635 5.010635 1882 +truth 1 6 5.164786 5.164786 2179 +ohio 1 5 5.347108 5.347108 2447 +condor 1 5 5.347108 5.347108 2577 +raman 1 4 5.568345 5.568345 2827 +rajesh 1 3 5.857933 5.857933 3511 +off 1 3 5.857933 5.857933 3170 +wesleyan 1 3 5.857933 5.857933 3988 +saluja 1 3 5.857933 5.857933 3104 +novelti 1 2 6.263398 6.263398 5765 +monster 1 2 6.263398 6.263398 6207 +prodigi 1 2 6.263398 6.263398 5670 +old 1 1 6.957497 6.957497 19676 +homm 1 1 6.957497 6.957497 19677 +winsonsin 1 1 6.957497 6.957497 19678 +chimera 1 1 6.957497 6.957497 19679 +contradict 1 1 6.957497 6.957497 19680 +feebleworm 1 1 6.957497 6.957497 19681 +depositari 1 1 6.957497 6.957497 19682 +cloaca 1 1 6.957497 6.957497 19683 +theglori 1 1 6.957497 6.957497 19684 +shame 1 1 6.957497 6.957497 19685 +blais 1 1 6.957497 6.957497 19686 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..3531b57d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +improv 1 62 2.772589 2.772589 289 +think 1 57 2.890372 2.890372 314 +electron 1 47 3.091042 3.091042 379 +might 1 41 3.218876 3.218876 426 +eduoffic 1 33 3.433987 3.433987 531 +altern 1 26 3.688879 3.688879 641 +scalabl 1 24 3.761200 3.761200 705 +hierarchi 1 22 3.850148 3.850148 744 +tell 1 21 3.912023 3.912023 777 +mostli 1 19 4.007333 4.007333 869 +estim 1 17 4.110874 4.110874 930 +dewitt 1 12 4.465908 4.465908 1270 +naughton 1 10 4.653960 4.653960 1450 +jeffrei 1 9 4.753590 4.753590 1612 +wall 1 9 4.753590 4.753590 1553 +paradis 1 8 4.875197 4.875197 1782 +presenc 1 8 4.875197 4.875197 1671 +bombai 1 7 5.010635 5.010635 1972 +aggreg 1 6 5.164786 5.164786 2219 +prasad 1 6 5.164786 5.164786 2126 +deshpand 1 5 5.347108 5.347108 2431 +multidimension 1 4 5.568345 5.568345 3091 +amit 1 4 5.568345 5.568345 2972 +ramasami 1 4 5.568345 5.568345 3088 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +mumbai 1 3 5.857933 5.857933 4029 +karthik 1 1 6.957497 6.957497 19687 +pagekarthikeyan 1 1 6.957497 6.957497 19688 +ramasamyabouti 1 1 6.957497 6.957497 19689 +projectshack 1 1 6.957497 6.957497 19690 +connectivityparadis 1 1 6.957497 6.957497 19691 +pthread 1 1 6.957497 6.957497 19692 +wrapperspublicationsstorag 1 1 6.957497 6.957497 19693 +presentationsweb 1 1 6.957497 6.957497 19694 +picturearchitectur 1 1 6.957497 6.957497 19695 +serversphoto 1 1 6.957497 6.957497 19696 +albumencount 1 1 6.957497 6.957497 19697 +leafperson 1 1 6.957497 6.957497 19698 +inforesum 1 1 6.957497 6.957497 19699 +financemonei 1 1 6.957497 6.957497 19700 +interestshack 1 1 6.957497 6.957497 19701 +photographycontact 1 1 6.957497 6.957497 19702 +informationstreet 1 1 6.957497 6.957497 19703 +addresskarthik 1 1 6.957497 6.957497 19704 +suggestionspleas 1 1 6.957497 6.957497 19705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..8ff53a39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +construct 1 139 1.945910 1.945910 82 +place 1 106 2.197225 2.197225 124 +visit 1 63 2.772589 2.772589 288 +space 1 57 2.890372 2.890372 310 +edulast 1 17 4.110874 4.110874 927 +stai 1 12 4.465908 4.465908 1215 +tune 1 12 4.465908 4.465908 1227 +login 1 9 4.753590 4.753590 1550 +kelli 1 4 5.568345 5.568345 2793 +ratliffoffic 1 1 6.957497 6.957497 19706 +genealog 1 1 6.957497 6.957497 19707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..02c648f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +imag 1 91 2.397895 2.397895 161 +control 1 82 2.484907 2.484907 164 +thing 1 84 2.484907 2.484907 189 +west 1 83 2.484907 2.484907 192 +main 1 67 2.708050 2.708050 256 +street 1 63 2.772589 2.772589 293 +digit 1 52 2.995732 2.995732 348 +advisor 1 51 2.995732 2.995732 355 +video 1 44 3.135494 3.135494 405 +product 1 33 3.433987 3.433987 527 +compress 1 23 3.806662 3.806662 719 +qualiti 1 20 3.951244 3.951244 832 +vector 1 16 4.174387 4.174387 961 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +mode 1 9 4.753590 4.753590 1492 +invok 1 6 5.164786 5.164786 2079 +reveal 1 4 5.568345 5.568345 2647 +fractal 1 3 5.857933 5.857933 3475 +quantiz 1 2 6.263398 6.263398 5692 +viresh 1 1 6.957497 6.957497 19709 +ratnakar 1 1 6.957497 6.957497 19708 +lossi 1 1 6.957497 6.957497 19710 +qclicauthor 1 1 6.957497 6.957497 19711 +qclic 1 1 6.957497 6.957497 19712 +qclicbrows 1 1 6.957497 6.957497 19713 +rever 1 1 6.957497 6.957497 19714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..102f0770 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +distribut 1 162 1.791759 1.791759 51 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +person 1 111 2.197225 2.197225 117 +check 1 115 2.197225 2.197225 118 +advanc 1 99 2.302585 2.302585 130 +need 1 98 2.302585 2.302585 135 +mani 1 92 2.397895 2.397895 150 +graphic 1 90 2.397895 2.397895 147 +west 1 83 2.484907 2.484907 192 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +street 1 63 2.772589 2.772589 293 +wednesdai 1 64 2.772589 2.772589 261 +best 1 59 2.833213 2.833213 299 +publish 1 57 2.890372 2.890372 326 +profession 1 51 2.995732 2.995732 345 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +fridai 1 44 3.135494 3.135494 390 +richard 1 31 3.496508 3.496508 559 +load 1 28 3.610918 3.610918 601 +fellow 1 24 3.761200 3.761200 701 +emphasi 1 22 3.850148 3.850148 755 +thur 1 19 4.007333 4.007333 847 +ever 1 19 4.007333 4.007333 872 +whole 1 17 4.110874 4.110874 940 +brother 1 13 4.382027 4.382027 1189 +sundai 1 10 4.653960 4.653960 1387 +desktop 1 10 4.653960 4.653960 1445 +prevent 1 7 5.010635 5.010635 1827 +saturdai 1 7 5.010635 5.010635 1794 +shade 1 7 5.010635 5.010635 1881 +polit 1 6 5.164786 5.164786 2115 +artist 1 6 5.164786 5.164786 2127 +seriou 1 5 5.347108 5.347108 2252 +haven 1 4 5.568345 5.568345 3037 +underwat 1 4 5.568345 5.568345 2838 +fire 1 4 5.568345 5.568345 3001 +beard 1 2 6.263398 6.263398 6161 +grave 1 2 6.263398 6.263398 5968 +doom 1 2 6.263398 6.263398 5848 +goofi 1 2 6.263398 6.263398 4074 +monasteriu 1 1 6.957497 6.957497 19716 +omin 1 1 6.957497 6.957497 19715 +doominu 1 1 6.957497 6.957497 19717 +rcarl 1 1 6.957497 6.957497 19718 +subsurfac 1 1 6.957497 6.957497 19719 +depositori 1 1 6.957497 6.957497 19720 +dig 1 1 6.957497 6.957497 19721 +solitari 1 1 6.957497 6.957497 19722 +innebri 1 1 6.957497 6.957497 19723 +vampir 1 1 6.957497 6.957497 19724 +nostalg 1 1 6.957497 6.957497 19725 +funki 1 1 6.957497 6.957497 19726 +monk 1 1 6.957497 6.957497 19727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..9abbe0df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,559 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +book 1 99 2.302585 2.302585 131 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +present 1 91 2.397895 2.397895 145 +west 1 83 2.484907 2.484907 192 +environ 1 84 2.484907 2.484907 177 +member 1 84 2.484907 2.484907 165 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +institut 1 84 2.484907 2.484907 187 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +refer 1 78 2.564949 2.564949 203 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +decemb 1 80 2.564949 2.564949 215 +optim 1 79 2.564949 2.564949 197 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +logic 1 71 2.639057 2.639057 230 +line 1 75 2.639057 2.639057 231 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +nation 1 74 2.639057 2.639057 240 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +differ 1 66 2.708050 2.708050 253 +creat 1 63 2.772589 2.772589 277 +complex 1 64 2.772589 2.772589 269 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +evalu 1 64 2.772589 2.772589 266 +septemb 1 65 2.772589 2.772589 274 +interact 1 62 2.772589 2.772589 270 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +content 1 59 2.833213 2.833213 302 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +index 1 56 2.890372 2.890372 309 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +allow 1 53 2.944439 2.944439 333 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +maintain 1 51 2.995732 2.995732 342 +telephon 1 50 3.044522 3.044522 373 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +set 1 50 3.044522 3.044522 361 +principl 1 48 3.044522 3.044522 357 +understand 1 47 3.091042 3.091042 384 +california 1 46 3.091042 3.091042 388 +algebra 1 45 3.135494 3.135494 394 +describ 1 45 3.135494 3.135494 400 +natur 1 44 3.135494 3.135494 406 +show 1 43 3.178054 3.178054 417 +third 1 43 3.178054 3.178054 412 +editor 1 41 3.218876 3.218876 433 +york 1 41 3.218876 3.218876 435 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +compani 1 41 3.218876 3.218876 423 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +theoret 1 39 3.258097 3.258097 446 +transact 1 39 3.258097 3.258097 438 +societi 1 40 3.258097 3.258097 456 +submit 1 39 3.258097 3.258097 440 +tutori 1 39 3.258097 3.258097 437 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +origin 1 38 3.295837 3.295837 472 +seminar 1 38 3.295837 3.295837 470 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +connect 1 37 3.332205 3.332205 485 +mean 1 37 3.332205 3.332205 477 +china 1 37 3.332205 3.332205 487 +procedur 1 36 3.367296 3.367296 488 +tree 1 36 3.367296 3.367296 492 +multi 1 36 3.367296 3.367296 493 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +manual 1 35 3.401197 3.401197 504 +bibliographi 1 34 3.401197 3.401197 518 +represent 1 35 3.401197 3.401197 512 +tech 1 35 3.401197 3.401197 515 +curriculum 1 33 3.433987 3.433987 535 +obtain 1 33 3.433987 3.433987 534 +kind 1 32 3.465736 3.465736 541 +transform 1 32 3.465736 3.465736 542 +chapter 1 32 3.465736 3.465736 536 +dissert 1 32 3.465736 3.465736 549 +power 1 30 3.555348 3.555348 573 +graph 1 30 3.555348 3.555348 576 +robert 1 30 3.555348 3.555348 567 +semant 1 29 3.583519 3.583519 587 +chines 1 29 3.583519 3.583519 595 +depend 1 29 3.583519 3.583519 583 +releas 1 28 3.610918 3.610918 616 +univ 1 28 3.610918 3.610918 617 +manipul 1 27 3.637586 3.637586 624 +static 1 27 3.637586 3.637586 619 +consist 1 26 3.688879 3.688879 651 +subject 1 26 3.688879 3.688879 647 +repres 1 26 3.688879 3.688879 656 +bound 1 26 3.688879 3.688879 659 +valu 1 25 3.737670 3.737670 665 +fundament 1 25 3.737670 3.737670 661 +hill 1 25 3.737670 3.737670 670 +notic 1 25 3.737670 3.737670 675 +departmentunivers 1 24 3.761200 3.761200 711 +doctor 1 24 3.761200 3.761200 709 +handl 1 24 3.761200 3.761200 685 +demonstr 1 24 3.761200 3.761200 694 +miscellan 1 23 3.806662 3.806662 731 +methodolog 1 23 3.806662 3.806662 733 +variabl 1 23 3.806662 3.806662 715 +proof 1 23 3.806662 3.806662 720 +serv 1 22 3.850148 3.850148 758 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +sequenti 1 22 3.850148 3.850148 745 +properti 1 22 3.850148 3.850148 749 +identifi 1 22 3.850148 3.850148 760 +corpor 1 21 3.912023 3.912023 802 +path 1 21 3.912023 3.912023 778 +programminglanguag 1 21 3.912023 3.912023 782 +latest 1 21 3.912023 3.912023 785 +theorem 1 21 3.912023 3.912023 786 +basi 1 20 3.951244 3.951244 828 +beij 1 19 4.007333 4.007333 876 +comparison 1 19 4.007333 4.007333 863 +boston 1 19 4.007333 4.007333 862 +citi 1 19 4.007333 4.007333 874 +north 1 19 4.007333 4.007333 873 +thoma 1 18 4.060443 4.060443 901 +element 1 18 4.060443 4.060443 895 +partial 1 18 4.060443 4.060443 900 +record 1 18 4.060443 4.060443 890 +speed 1 18 4.060443 4.060443 911 +debug 1 17 4.110874 4.110874 944 +modif 1 17 4.110874 4.110874 913 +germani 1 17 4.110874 4.110874 946 +cambridg 1 16 4.174387 4.174387 1008 +letter 1 16 4.174387 4.174387 981 +ramakrishnan 1 16 4.174387 4.174387 972 +fourth 1 16 4.174387 4.174387 999 +diego 1 16 4.174387 4.174387 992 +taiwan 1 16 4.174387 4.174387 1006 +transit 1 15 4.248495 4.248495 1046 +precis 1 15 4.248495 4.248495 1023 +princeton 1 15 4.248495 4.248495 1042 +configur 1 15 4.248495 4.248495 1012 +remot 1 15 4.248495 4.248495 1041 +attribut 1 14 4.317488 4.317488 1092 +polynomi 1 14 4.317488 4.317488 1069 +demand 1 14 4.317488 4.317488 1073 +reprint 1 14 4.317488 4.317488 1097 +francisco 1 14 4.317488 4.317488 1095 +directli 1 13 4.382027 4.382027 1141 +carri 1 13 4.382027 4.382027 1152 +sigplan 1 13 4.382027 4.382027 1190 +context 1 13 4.382027 4.382027 1153 +shape 1 12 4.465908 4.465908 1245 +pascal 1 12 4.465908 4.465908 1213 +nanci 1 12 4.465908 4.465908 1256 +franc 1 12 4.465908 4.465908 1276 +onth 1 12 4.465908 4.465908 1218 +scan 1 12 4.465908 4.465908 1243 +instanc 1 11 4.553877 4.553877 1322 +magic 1 11 4.553877 4.553877 1358 +impact 1 11 4.553877 4.553877 1334 +lake 1 11 4.553877 4.553877 1373 +israel 1 11 4.553877 4.553877 1366 +decomposit 1 10 4.653960 4.653960 1439 +underli 1 10 4.653960 4.653960 1410 +invit 1 10 4.653960 4.653960 1428 +respect 1 9 4.753590 4.753590 1545 +mainten 1 9 4.753590 4.753590 1543 +establish 1 9 4.753590 4.753590 1532 +conferenceon 1 9 4.753590 4.753590 1595 +utah 1 9 4.753590 4.753590 1585 +herefor 1 9 4.753590 4.753590 1483 +ball 1 9 4.753590 4.753590 1608 +equival 1 9 4.753590 4.753590 1496 +secretari 1 8 4.875197 4.875197 1775 +reus 1 8 4.875197 4.875197 1661 +pldi 1 8 4.875197 4.875197 1704 +yang 1 8 4.875197 4.875197 1652 +competit 1 8 4.875197 4.875197 1635 +european 1 8 4.875197 4.875197 1763 +colloquium 1 8 4.875197 4.875197 1734 +irvin 1 8 4.875197 4.875197 1660 +illustr 1 8 4.875197 4.875197 1679 +analys 1 8 4.875197 4.875197 1666 +merg 1 7 5.010635 5.010635 1862 +bottom 1 7 5.010635 5.010635 1906 +prioriti 1 7 5.010635 5.010635 1792 +portland 1 7 5.010635 5.010635 1878 +fifth 1 7 5.010635 5.010635 1931 +iowa 1 7 5.010635 5.010635 1971 +pittsburgh 1 7 5.010635 5.010635 1938 +digest 1 7 5.010635 5.010635 1864 +increment 1 6 5.164786 5.164786 2206 +grammar 1 6 5.164786 5.164786 2058 +affect 1 6 5.164786 5.164786 2044 +textual 1 6 5.164786 5.164786 1979 +teitelbaum 1 6 5.164786 5.164786 2102 +syntax 1 6 5.164786 5.164786 2030 +kluwer 1 6 5.164786 5.164786 2143 +symposiumon 1 6 5.164786 5.164786 2054 +variant 1 6 5.164786 5.164786 2043 +german 1 6 5.164786 5.164786 2190 +unpublish 1 6 5.164786 5.164786 2226 +carolina 1 6 5.164786 5.164786 2142 +dataflow 1 5 5.347108 5.347108 2390 +summarymi 1 5 5.347108 5.347108 2580 +aim 1 5 5.347108 5.347108 2477 +licens 1 5 5.347108 5.347108 2520 +synthes 1 5 5.347108 5.347108 2451 +cacm 1 5 5.347108 5.347108 2388 +horwitz 1 5 5.347108 5.347108 2411 +shortest 1 5 5.347108 5.347108 2424 +australia 1 5 5.347108 5.347108 2478 +singapor 1 5 5.347108 5.347108 2487 +mcgraw 1 5 5.347108 5.347108 2262 +twenti 1 5 5.347108 5.347108 2540 +bind 1 5 5.347108 5.347108 2250 +orlean 1 5 5.347108 5.347108 2550 +interfer 1 5 5.347108 5.347108 2494 +forprogram 1 5 5.347108 5.347108 2361 +salt 1 5 5.347108 5.347108 2413 +patent 1 5 5.347108 5.347108 2574 +chapel 1 5 5.347108 5.347108 2457 +rep 1 4 5.568345 5.568345 3087 +usa 1 4 5.568345 5.568345 3080 +slice 1 4 5.568345 5.568345 2622 +interprocedur 1 4 5.568345 5.568345 2771 +ics 1 4 5.568345 5.568345 2779 +dagstuhl 1 4 5.568345 5.568345 2871 +sigsoft 1 4 5.568345 5.568345 3036 +exhaust 1 4 5.568345 5.568345 2825 +popl 1 4 5.568345 5.568345 3068 +compcon 1 4 5.568345 5.568345 2958 +melbourn 1 4 5.568345 5.568345 3035 +turnidg 1 4 5.568345 5.568345 2829 +petersburg 1 4 5.568345 5.568345 2989 +imper 1 4 5.568345 5.568345 3067 +jolla 1 4 5.568345 5.568345 2988 +scotland 1 4 5.568345 5.568345 3049 +bricker 1 4 5.568345 5.568345 3050 +ofprogram 1 4 5.568345 5.568345 2624 +categor 1 3 5.857933 5.857933 3765 +thedevelop 1 3 5.857933 5.857933 3903 +meaning 1 3 5.857933 5.857933 3458 +reachabl 1 3 5.857933 5.857933 4001 +nearbi 1 3 5.857933 5.857933 3291 +topla 1 3 5.857933 5.857933 3563 +amast 1 3 5.857933 5.857933 3955 +retarget 1 3 5.857933 5.857933 3994 +spaa 1 3 5.857933 5.857933 3906 +propag 1 3 5.857933 5.857933 3997 +accommod 1 3 5.857933 5.857933 3337 +fourteenth 1 3 5.857933 5.857933 3615 +alamito 1 3 5.857933 5.857933 3558 +schloss 1 3 5.857933 5.857933 3727 +denmark 1 3 5.857933 5.857933 3676 +domin 1 3 5.857933 5.857933 3995 +preserv 1 3 5.857933 5.857933 3628 +jone 1 3 5.857933 5.857933 3703 +atlanta 1 3 5.857933 5.857933 3778 +fifteenth 1 3 5.857933 5.857933 3868 +onprincipl 1 3 5.857933 5.857933 3701 +ninth 1 3 5.857933 5.857933 3616 +principlesof 1 3 5.857933 5.857933 3145 +twentieth 1 3 5.857933 5.857933 3760 +differenc 1 2 6.263398 6.263398 6177 +alia 1 2 6.263398 6.263398 5383 +thevari 1 2 6.263398 6.263398 6130 +contigu 1 2 6.263398 6.263398 6001 +worker 1 2 6.263398 6.263398 4841 +andbuild 1 2 6.263398 6.263398 6028 +clickherefor 1 2 6.263398 6.263398 5344 +interproceduraldataflow 1 2 6.263398 6.263398 6178 +acta 1 2 6.263398 6.263398 5124 +chop 1 2 6.263398 6.263398 6160 +informatica 1 2 6.263398 6.263398 5125 +unrestrict 1 2 6.263398 6.263398 4879 +arnold 1 2 6.263398 6.263398 4705 +wasserman 1 2 6.263398 6.263398 5331 +sagiv 1 2 6.263398 6.263398 6176 +destruct 1 2 6.263398 6.263398 6232 +aarhu 1 2 6.263398 6.263398 6180 +moss 1 2 6.263398 6.263398 5820 +fritzson 1 2 6.263398 6.263398 4546 +andarchitectur 1 2 6.263398 6.263398 5755 +copenhagen 1 2 6.263398 6.263398 5145 +languagedesign 1 2 6.263398 6.263398 6182 +spain 1 2 6.263398 6.263398 5522 +adequaci 1 2 6.263398 6.263398 6229 +thirteenth 1 2 6.263398 6.263398 5733 +eleventh 1 2 6.263398 6.263398 5031 +eighth 1 2 6.263398 6.263398 5750 +leeuwen 1 2 6.263398 6.263398 5543 +charleston 1 2 6.263398 6.263398 6181 +doc 1 2 6.263398 6.263398 5022 +mooli 1 2 6.263398 6.263398 6179 +tung 1 2 6.263398 6.263398 4709 +repsprofessorcomput 1 1 6.957497 6.957497 19763 +thehom 1 1 6.957497 6.957497 19764 +createtool 1 1 6.957497 6.957497 19765 +manipulationoper 1 1 6.957497 6.957497 19766 +slicingcan 1 1 6.957497 6.957497 19767 +elementss 1 1 6.957497 6.957497 19768 +thatmight 1 1 6.957497 6.957497 19769 +findsemant 1 1 6.957497 6.957497 19770 +thedecomposit 1 1 6.957497 6.957497 19771 +solvingmani 1 1 6.957497 6.957497 19772 +applicationsin 1 1 6.957497 6.957497 19773 +atimprov 1 1 6.957497 6.957497 19774 +relatedoper 1 1 6.957497 6.957497 19775 +slicer 1 1 6.957497 6.957497 19776 +unexpect 1 1 6.957497 6.957497 19777 +betweeninterprocedur 1 1 6.957497 6.957497 19778 +oninterprocedur 1 1 6.957497 6.957497 19779 +transformingthem 1 1 6.957497 6.957497 19780 +timebi 1 1 6.957497 6.957497 19781 +probleminst 1 1 6.957497 6.957497 19782 +publicationsprogram 1 1 6.957497 6.957497 19783 +slicing_pat 1 1 6.957497 6.957497 19784 +binklei 1 1 6.957497 6.957497 19728 +fseb 1 1 6.957497 6.957497 19742 +thesismerg 1 1 6.957497 6.957497 19785 +tosem 1 1 6.957497 6.957497 19734 +thesiswuu 1 1 6.957497 6.957497 19743 +iwscm 1 1 6.957497 6.957497 19786 +popla 1 1 6.957497 6.957497 19787 +esop 1 1 6.957497 6.957497 19744 +iwsvcc 1 1 6.957497 6.957497 19788 +ccpsd 1 1 6.957497 6.957497 19789 +npfo_submiss 1 1 6.957497 6.957497 19790 +ccipl 1 1 6.957497 6.957497 19791 +poplb 1 1 6.957497 6.957497 19745 +pepma 1 1 6.957497 6.957497 19746 +prog_integration_system 1 1 6.957497 6.957497 19792 +prog_integration_manu 1 1 6.957497 6.957497 19793 +subsetof 1 1 6.957497 6.957497 19794 +clickingher 1 1 6.957497 6.957497 19795 +andexpect 1 1 6.957497 6.957497 19796 +anddifferenc 1 1 6.957497 6.957497 19797 +thesesdavid 1 1 6.957497 6.957497 19798 +thesisphil 1 1 6.957497 6.957497 19799 +pfeiffer 1 1 6.957497 6.957497 19735 +thesisinterprocedur 1 1 6.957497 6.957497 19800 +analysisdemand 1 1 6.957497 6.957497 19801 +idfa 1 1 6.957497 6.957497 19731 +fsea 1 1 6.957497 6.957497 19747 +diku 1 1 6.957497 6.957497 19748 +tcs_ide_pap 1 1 6.957497 6.957497 19802 +fase 1 1 6.957497 6.957497 19749 +ptime 1 1 6.957497 6.957497 19803 +acta_pap 1 1 6.957497 6.957497 19804 +pepmb 1 1 6.957497 6.957497 19750 +pfeiffer_thesi 1 1 6.957497 6.957497 19805 +lape 1 1 6.957497 6.957497 19751 +psde 1 1 6.957497 6.957497 19752 +ramalingam 1 1 6.957497 6.957497 19729 +jalg_pap 1 1 6.957497 6.957497 19806 +popl_not 1 1 6.957497 6.957497 19807 +publicationsbooksrep 1 1 6.957497 6.957497 19808 +constructinglanguag 1 1 6.957497 6.957497 19809 +publicationssagiv 1 1 6.957497 6.957497 19810 +toconst 1 1 6.957497 6.957497 19753 +j_alg 1 1 6.957497 6.957497 19811 +preservingtransform 1 1 6.957497 6.957497 19812 +prin 1 1 6.957497 6.957497 19730 +interf 1 1 6.957497 6.957497 19732 +grammarswith 1 1 6.957497 6.957497 19813 +movement 1 1 6.957497 6.957497 19814 +demer 1 1 6.957497 6.957497 19736 +sublinear 1 1 6.957497 6.957497 19815 +papershorwitz 1 1 6.957497 6.957497 19816 +ganzing 1 1 6.957497 6.957497 19817 +chaptersrep 1 1 6.957497 6.957497 19818 +bohner 1 1 6.957497 6.957497 19819 +fromacm 1 1 6.957497 6.957497 19737 +berzin 1 1 6.957497 6.957497 19738 +fromproceed 1 1 6.957497 6.957497 19820 +paradigmsfor 1 1 6.957497 6.957497 19754 +brighton 1 1 6.957497 6.957497 19755 +abramski 1 1 6.957497 6.957497 19756 +maibaum 1 1 6.957497 6.957497 19757 +ichikawa 1 1 6.957497 6.957497 19821 +tsubotani 1 1 6.957497 6.957497 19822 +barstow 1 1 6.957497 6.957497 19823 +sandewal 1 1 6.957497 6.957497 19824 +shrobe 1 1 6.957497 6.957497 19825 +wherefor 1 1 6.957497 6.957497 19758 +sigoa 1 1 6.957497 6.957497 19759 +publicationssiff 1 1 6.957497 6.957497 19826 +sigsoftsymposium 1 1 6.957497 6.957497 19739 +wadern 1 1 6.957497 6.957497 19740 +danvi 1 1 6.957497 6.957497 19827 +glueck 1 1 6.957497 6.957497 19828 +thiemann 1 1 6.957497 6.957497 19829 +wilhelm 1 1 6.957497 6.957497 19733 +rosai 1 1 6.957497 6.957497 19741 +hentenryck 1 1 6.957497 6.957497 19830 +pepm 1 1 6.957497 6.957497 19760 +onparti 1 1 6.957497 6.957497 19761 +formalapproach 1 1 6.957497 6.957497 19831 +nielsen 1 1 6.957497 6.957497 19832 +schwartzbach 1 1 6.957497 6.957497 19833 +tapsoft 1 1 6.957497 6.957497 19834 +compilerconstruct 1 1 6.957497 6.957497 19835 +edinburgh 1 1 6.957497 6.957497 19836 +reducibleflowgraph 1 1 6.957497 6.957497 19837 +velen 1 1 6.957497 6.957497 19838 +onalgebra 1 1 6.957497 6.957497 19839 +softwareconfigur 1 1 6.957497 6.957497 19840 +issuesin 1 1 6.957497 6.957497 19841 +barcelona 1 1 6.957497 6.957497 19842 +diaz 1 1 6.957497 6.957497 19843 +oreja 1 1 6.957497 6.957497 19844 +versionand 1 1 6.957497 6.957497 19845 +grassau 1 1 6.957497 6.957497 19846 +bericht 1 1 6.957497 6.957497 19847 +winkler 1 1 6.957497 6.957497 19848 +teubner 1 1 6.957497 6.957497 19849 +stuttgart 1 1 6.957497 6.957497 19850 +marceau 1 1 6.957497 6.957497 19851 +engineeringsymposium 1 1 6.957497 6.957497 19852 +alpern 1 1 6.957497 6.957497 19853 +albuquerqu 1 1 6.957497 6.957497 19854 +tosyntax 1 1 6.957497 6.957497 19855 +williamsburg 1 1 6.957497 6.957497 19856 +softwarerep 1 1 6.957497 6.957497 19857 +patentsrep 1 1 6.957497 6.957497 19858 +pend 1 1 6.957497 6.957497 19859 +submissionsrep 1 1 6.957497 6.957497 19860 +reportsrep 1 1 6.957497 6.957497 19861 +mehlhorn 1 1 6.957497 6.957497 19862 +ibfi 1 1 6.957497 6.957497 19762 +datalogisk 1 1 6.957497 6.957497 19863 +psramalingam 1 1 6.957497 6.957497 19864 +klint 1 1 6.957497 6.957497 19865 +snelt 1 1 6.957497 6.957497 19866 +extendedabstract 1 1 6.957497 6.957497 19867 +reconstitut 1 1 6.957497 6.957497 19868 +studentsvisitor 1 1 6.957497 6.957497 19869 +jiazhen 1 1 6.957497 6.957497 19870 +paig 1 1 6.957497 6.957497 19871 +chiao 1 1 6.957497 6.957497 19872 +studentsramalingam 1 1 6.957497 6.957497 19873 +programintegr 1 1 6.957497 6.957497 19874 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..68a1666a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +dayton 1 119 2.079442 2.079442 104 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +institut 1 84 2.484907 2.484907 187 +window 1 68 2.708050 2.708050 242 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +copi 1 63 2.772589 2.772589 284 +best 1 59 2.833213 2.833213 299 +suggest 1 53 2.944439 2.944439 331 +date 1 51 2.995732 2.995732 344 +friend 1 48 3.044522 3.044522 376 +right 1 48 3.044522 3.044522 363 +tech 1 35 3.401197 3.401197 515 +india 1 32 3.465736 3.465736 550 +univ 1 28 3.610918 3.610918 617 +comp 1 26 3.688879 3.688879 650 +indian 1 22 3.850148 3.850148 769 +love 1 21 3.912023 3.912023 804 +reserv 1 20 3.951244 3.951244 808 +spend 1 19 4.007333 4.007333 850 +statu 1 18 4.060443 4.060443 885 +speed 1 18 4.060443 4.060443 911 +beauti 1 18 4.060443 4.060443 912 +seem 1 18 4.060443 4.060443 899 +comic 1 14 4.317488 4.317488 1103 +song 1 11 4.553877 4.553877 1380 +calvin 1 9 4.753590 4.753590 1518 +kanpur 1 8 4.875197 4.875197 1744 +film 1 8 4.875197 4.875197 1761 +apart 1 7 5.010635 5.010635 1936 +settimeout 1 5 5.347108 5.347108 2536 +guestbook 1 5 5.347108 5.347108 2475 +randal 1 4 5.568345 5.568345 2776 +mirza 1 3 5.857933 5.857933 3989 +hero 1 3 5.857933 5.857933 3711 +saeed 1 2 6.263398 6.263398 6172 +pagespe 1 1 6.957497 6.957497 19876 +statusclock 1 1 6.957497 6.957497 19875 +clearid 1 1 6.957497 6.957497 19877 +cleartimeout 1 1 6.957497 6.957497 19878 +lucknow 1 1 6.957497 6.957497 19879 +listn 1 1 6.957497 6.957497 19880 +netsurf 1 1 6.957497 6.957497 19881 +wismad 1 1 6.957497 6.957497 19882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..e721672f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +dayton 1 119 2.079442 2.079442 104 +real 1 93 2.397895 2.397895 144 +chang 1 82 2.484907 2.484907 163 +septemb 1 65 2.772589 2.772589 274 +taught 1 33 3.433987 3.433987 526 +comp 1 26 3.688879 3.688879 650 +edutelephon 1 10 4.653960 4.653960 1473 +desktop 1 10 4.653960 4.653960 1445 +peterson 1 7 5.010635 5.010635 1850 +salli 1 3 5.857933 5.857933 3432 +goodwin 1 1 6.957497 6.957497 19883 +lecturercomput 1 1 6.957497 6.957497 19884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..81fb0c66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,130 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +seattl 1 120 2.079442 2.079442 103 +introduct 1 126 2.079442 2.079442 87 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +present 1 91 2.397895 2.397895 145 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +help 1 83 2.484907 2.484907 175 +master 1 76 2.564949 2.564949 216 +server 1 76 2.564949 2.564949 204 +sourc 1 77 2.564949 2.564949 201 +onlin 1 75 2.639057 2.639057 223 +logic 1 71 2.639057 2.639057 230 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +evalu 1 64 2.772589 2.772589 266 +run 1 51 2.995732 2.995732 347 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +pointer 1 48 3.044522 3.044522 368 +archiv 1 49 3.044522 3.044522 364 +protocol 1 45 3.135494 3.135494 407 +york 1 41 3.218876 3.218876 435 +transact 1 39 3.258097 3.258097 438 +slide 1 38 3.295837 3.295837 467 +industri 1 38 3.295837 3.295837 464 +bibliographi 1 34 3.401197 3.401197 518 +queri 1 33 3.433987 3.433987 524 +articl 1 33 3.433987 3.433987 530 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +profil 1 30 3.555348 3.555348 581 +toward 1 25 3.737670 3.737670 668 +jeff 1 25 3.737670 3.737670 673 +reach 1 24 3.761200 3.761200 688 +initi 1 23 3.806662 3.806662 717 +indian 1 22 3.850148 3.850148 769 +hierarchi 1 22 3.850148 3.850148 744 +sort 1 22 3.850148 3.850148 738 +cooper 1 22 3.850148 3.850148 757 +boston 1 19 4.007333 4.007333 862 +sigmod 1 19 4.007333 4.007333 877 +bachelor 1 17 4.110874 4.110874 957 +estim 1 17 4.110874 4.110874 930 +georg 1 16 4.174387 4.174387 994 +princeton 1 15 4.248495 4.248495 1042 +massiv 1 15 4.248495 4.248495 1026 +warn 1 14 4.317488 4.317488 1068 +infrastructur 1 12 4.465908 4.465908 1234 +council 1 11 4.553877 4.553877 1364 +naughton 1 10 4.653960 4.653960 1450 +vldb 1 10 4.653960 4.653960 1470 +consortium 1 10 4.653960 4.653960 1467 +jeffrei 1 9 4.753590 4.753590 1612 +utah 1 9 4.753590 4.753590 1585 +madra 1 8 4.875197 4.875197 1770 +presenc 1 8 4.875197 4.875197 1671 +competit 1 8 4.875197 4.875197 1635 +spec 1 8 4.875197 4.875197 1640 +analyt 1 7 5.010635 5.010635 1913 +bombai 1 7 5.010635 5.010635 1972 +aggreg 1 6 5.164786 5.164786 2219 +prasad 1 6 5.164786 5.164786 2126 +chicago 1 6 5.164786 5.164786 2149 +deshpand 1 5 5.347108 5.347108 2431 +amit 1 4 5.568345 5.568345 2972 +snail 1 4 5.568345 5.568345 2916 +multidimension 1 4 5.568345 5.568345 3091 +ramasami 1 4 5.568345 5.568345 3088 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +mumbai 1 3 5.857933 5.857933 4029 +pilot 1 3 5.857933 5.857933 4008 +children 1 3 5.857933 5.857933 3767 +asha 1 3 5.857933 5.857933 4037 +marathon 1 2 6.263398 6.263398 5592 +endow 1 2 6.263398 6.263398 6234 +olap 1 2 6.263398 6.263398 6233 +guidanc 1 1 6.957497 6.957497 19885 +trier 1 1 6.957497 6.957497 19886 +mdd 1 1 6.957497 6.957497 19887 +niiip 1 1 6.957497 6.957497 19888 +transcoop 1 1 6.957497 6.957497 19889 +needi 1 1 6.957497 6.957497 19890 +pageand 1 1 6.957497 6.957497 19891 +bookmarksar 1 1 6.957497 6.957497 19892 +garfield 1 1 6.957497 6.957497 19893 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..da946892 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +visit 1 63 2.772589 2.772589 288 +undergradu 1 54 2.944439 2.944439 338 +india 1 32 3.465736 3.465736 550 +altern 1 26 3.688879 3.688879 641 +bookmark 1 26 3.688879 3.688879 639 +worth 1 11 4.553877 4.553877 1294 +bombai 1 7 5.010635 5.010635 1972 +whereabout 1 4 5.568345 5.568345 3078 +indianinstitut 1 3 5.857933 5.857933 4003 +fantast 1 3 5.857933 5.857933 3966 +hadmi 1 2 6.263398 6.263398 6097 +canfing 1 2 6.263398 6.263398 6098 +ashwin 1 1 6.957497 6.957497 19894 +iitb 1 1 6.957497 6.957497 19895 +meto 1 1 6.957497 6.957497 19896 +sashwin 1 1 6.957497 6.957497 19897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..dea61fa8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +mani 1 92 2.397895 2.397895 150 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +contain 1 81 2.484907 2.484907 174 +academ 1 82 2.484907 2.484907 178 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +solv 1 73 2.639057 2.639057 234 +would 1 67 2.708050 2.708050 251 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +suggest 1 53 2.944439 2.944439 331 +maintain 1 51 2.995732 2.995732 342 +much 1 52 2.995732 2.995732 349 +date 1 51 2.995732 2.995732 344 +friend 1 48 3.044522 3.044522 376 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +past 1 42 3.218876 3.218876 428 +must 1 40 3.258097 3.258097 442 +realli 1 40 3.258097 3.258097 444 +author 1 39 3.258097 3.258097 450 +field 1 37 3.332205 3.332205 482 +mean 1 37 3.332205 3.332205 477 +india 1 32 3.465736 3.465736 550 +kind 1 32 3.465736 3.465736 541 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +hard 1 30 3.555348 3.555348 563 +built 1 29 3.583519 3.583519 592 +consid 1 29 3.583519 3.583519 590 +hope 1 28 3.610918 3.610918 610 +mine 1 26 3.688879 3.688879 654 +bookmark 1 26 3.688879 3.688879 639 +enjoi 1 26 3.688879 3.688879 660 +rather 1 26 3.688879 3.688879 642 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +inth 1 22 3.850148 3.850148 741 +watch 1 21 3.912023 3.912023 789 +wonder 1 20 3.951244 3.951244 815 +tenni 1 20 3.951244 3.951244 838 +beauti 1 18 4.060443 4.060443 912 +listen 1 18 4.060443 4.060443 907 +across 1 16 4.174387 4.174387 974 +anyth 1 16 4.174387 4.174387 998 +hobbi 1 16 4.174387 4.174387 1009 +photograph 1 15 4.248495 4.248495 1056 +goe 1 15 4.248495 4.248495 1044 +near 1 14 4.317488 4.317488 1091 +unfortun 1 13 4.382027 4.382027 1170 +scan 1 12 4.465908 4.465908 1243 +reader 1 12 4.465908 4.465908 1246 +town 1 10 4.653960 4.653960 1458 +awai 1 10 4.653960 4.653960 1447 +interestsmi 1 10 4.653960 4.653960 1462 +earth 1 10 4.653960 4.653960 1463 +hint 1 10 4.653960 4.653960 1419 +ball 1 9 4.753590 4.753590 1608 +jeffrei 1 9 4.753590 4.753590 1612 +pick 1 9 4.753590 4.753590 1498 +pagei 1 8 4.875197 4.875197 1683 +empir 1 8 4.875197 4.875197 1722 +kanpur 1 8 4.875197 4.875197 1744 +bridg 1 8 4.875197 4.875197 1764 +cricket 1 7 5.010635 5.010635 1945 +river 1 6 5.164786 5.164786 2220 +rock 1 6 5.164786 5.164786 2164 +whatev 1 6 5.164786 5.164786 2097 +neither 1 6 5.164786 5.164786 1990 +fiction 1 6 5.164786 5.164786 2217 +tri 1 6 5.164786 5.164786 2166 +album 1 4 5.568345 5.568345 2888 +gokul 1 4 5.568345 5.568345 2668 +thati 1 4 5.568345 5.568345 2616 +metal 1 4 5.568345 5.568345 3079 +fantasi 1 4 5.568345 5.568345 3055 +devot 1 4 5.568345 5.568345 2711 +dont 1 3 5.857933 5.857933 3473 +pleasant 1 3 5.857933 5.857933 3825 +seinfeld 1 3 5.857933 5.857933 3958 +romanc 1 3 5.857933 5.857933 3632 +sastri 1 2 6.263398 6.263398 6171 +mugshot 1 2 6.263398 6.263398 4984 +karnataka 1 2 6.263398 6.263398 5106 +iitk 1 2 6.263398 6.263398 6227 +whati 1 2 6.263398 6.263398 6027 +horror 1 2 6.263398 6.263398 5075 +eduunivers 1 2 6.263398 6.263398 6216 +subramanya 1 1 6.957497 6.957497 19898 +hospet 1 1 6.957497 6.957497 19899 +hampi 1 1 6.957497 6.957497 19902 +ruin 1 1 6.957497 6.957497 19903 +vijayanagara 1 1 6.957497 6.957497 19904 +tungabhadra 1 1 6.957497 6.957497 19900 +fewphotograph 1 1 6.957497 6.957497 19905 +classmatesat 1 1 6.957497 6.957497 19906 +presentcurr 1 1 6.957497 6.957497 19907 +registeredfor 1 1 6.957497 6.957497 19908 +favourit 1 1 6.957497 6.957497 19901 +playphatta 1 1 6.957497 6.957497 19909 +champ 1 1 6.957497 6.957497 19910 +entertainmentin 1 1 6.957497 6.957497 19911 +donot 1 1 6.957497 6.957497 19912 +sshow 1 1 6.957497 6.957497 19913 +voraci 1 1 6.957497 6.957497 19914 +unsuccesfulli 1 1 6.957497 6.957497 19915 +grip 1 1 6.957497 6.957497 19916 +ifposs 1 1 6.957497 6.957497 19917 +archer 1 1 6.957497 6.957497 19918 +jane 1 1 6.957497 6.957497 19919 +austen 1 1 6.957497 6.957497 19920 +pride 1 1 6.957497 6.957497 19921 +prejudic 1 1 6.957497 6.957497 19922 +ramesh 1 1 6.957497 6.957497 19923 +mahadeven 1 1 6.957497 6.957497 19924 +sarticl 1 1 6.957497 6.957497 19925 +wonderfulgam 1 1 6.957497 6.957497 19926 +itagain 1 1 6.957497 6.957497 19927 +crossword 1 1 6.957497 6.957497 19928 +cryptic 1 1 6.957497 6.957497 19929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..fa9eb4c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,220 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +user 1 104 2.302585 2.302585 137 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +sinc 1 90 2.397895 2.397895 159 +internet 1 83 2.484907 2.484907 186 +educ 1 86 2.484907 2.484907 191 +help 1 83 2.484907 2.484907 175 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +orient 1 80 2.564949 2.564949 205 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +appli 1 71 2.639057 2.639057 226 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +plan 1 65 2.772589 2.772589 272 +visit 1 63 2.772589 2.772589 288 +descript 1 64 2.772589 2.772589 271 +street 1 63 2.772589 2.772589 293 +best 1 59 2.833213 2.833213 299 +special 1 56 2.890372 2.890372 320 +three 1 54 2.944439 2.944439 330 +undergradu 1 54 2.944439 2.944439 338 +sampl 1 53 2.944439 2.944439 339 +week 1 52 2.995732 2.995732 343 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +telephon 1 50 3.044522 3.044522 373 +effect 1 46 3.091042 3.091042 385 +natur 1 44 3.135494 3.135494 406 +third 1 43 3.178054 3.178054 412 +edit 1 42 3.218876 3.218876 418 +futur 1 41 3.218876 3.218876 427 +multipl 1 39 3.258097 3.258097 453 +join 1 39 3.258097 3.258097 457 +continu 1 39 3.258097 3.258097 448 +seminar 1 38 3.295837 3.295837 470 +open 1 38 3.295837 3.295837 469 +feel 1 37 3.332205 3.332205 483 +staff 1 36 3.367296 3.367296 490 +everi 1 34 3.401197 3.401197 519 +award 1 34 3.401197 3.401197 523 +kind 1 32 3.465736 3.465736 541 +collabor 1 32 3.465736 3.465736 543 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +produc 1 30 3.555348 3.555348 572 +focus 1 29 3.583519 3.583519 584 +depend 1 29 3.583519 3.583519 583 +propos 1 28 3.610918 3.610918 602 +retriev 1 27 3.637586 3.637586 621 +background 1 25 3.737670 3.737670 664 +spent 1 25 3.737670 3.737670 676 +departmentunivers 1 24 3.761200 3.761200 711 +higher 1 24 3.761200 3.761200 690 +seri 1 24 3.761200 3.761200 708 +director 1 22 3.850148 3.850148 767 +cooper 1 22 3.850148 3.850148 757 +newsgroup 1 21 3.912023 3.912023 783 +divis 1 21 3.912023 3.912023 803 +toolkit 1 20 3.951244 3.951244 835 +wrote 1 20 3.951244 3.951244 830 +item 1 19 4.007333 4.007333 856 +expand 1 17 4.110874 4.110874 928 +diego 1 16 4.174387 4.174387 992 +susan 1 15 4.248495 4.248495 1050 +later 1 15 4.248495 4.248495 1043 +becam 1 14 4.317488 4.317488 1117 +speak 1 12 4.465908 4.465908 1283 +onth 1 12 4.465908 4.465908 1218 +branch 1 11 4.553877 4.553877 1318 +thecomput 1 10 4.653960 4.653960 1408 +hundr 1 9 4.753590 4.753590 1528 +discov 1 9 4.753590 4.753590 1562 +respect 1 9 4.753590 4.753590 1545 +filter 1 8 4.875197 4.875197 1641 +elect 1 8 4.875197 4.875197 1771 +jack 1 8 4.875197 4.875197 1780 +potenti 1 8 4.875197 4.875197 1690 +gather 1 8 4.875197 4.875197 1719 +scout 1 7 5.010635 5.010635 1903 +usabl 1 7 5.010635 5.010635 1810 +happen 1 7 5.010635 5.010635 1790 +discoveri 1 7 5.010635 5.010635 1915 +edumi 1 6 5.164786 5.164786 2132 +approv 1 6 5.164786 5.164786 2078 +matthew 1 6 5.164786 5.164786 2193 +ifyou 1 6 5.164786 5.164786 1992 +kid 1 5 5.347108 5.347108 2516 +merit 1 5 5.347108 5.347108 2466 +newslett 1 4 5.568345 5.568345 2873 +devot 1 4 5.568345 5.568345 2711 +termin 1 4 5.568345 5.568345 2852 +chose 1 4 5.568345 5.568345 2629 +hire 1 4 5.568345 5.568345 2976 +newli 1 3 5.857933 5.857933 3786 +orth 1 3 5.857933 5.857933 3685 +moreinform 1 3 5.857933 5.857933 3307 +audienc 1 3 5.857933 5.857933 3180 +agreement 1 3 5.857933 5.857933 3207 +aproject 1 3 5.857933 5.857933 3142 +expans 1 3 5.857933 5.857933 3755 +disciplin 1 3 5.857933 5.857933 3392 +calcari 1 2 6.263398 6.263398 6144 +sciencefound 1 2 6.263398 6.263398 5150 +thehigh 1 2 6.263398 6.263398 4095 +thousand 1 2 6.263398 6.263398 5949 +arbor 1 2 6.263398 6.263398 6235 +backbon 1 2 6.263398 6.263398 5623 +thescout 1 2 6.263398 6.263398 6082 +calcarimanag 1 1 6.957497 6.957497 19934 +servicescomput 1 1 6.957497 6.957497 19935 +madisonsc 1 1 6.957497 6.957497 19936 +scoutservic 1 1 6.957497 6.957497 19937 +internicand 1 1 6.957497 6.957497 19938 +bestresourc 1 1 6.957497 6.957497 19939 +andeduc 1 1 6.957497 6.957497 19930 +soonth 1 1 6.957497 6.957497 19940 +sprout 1 1 6.957497 6.957497 19941 +andthousand 1 1 6.957497 6.957497 19942 +annotatedlist 1 1 6.957497 6.957497 19943 +itemsinclud 1 1 6.957497 6.957497 19944 +happeningspost 1 1 6.957497 6.957497 19945 +weekdai 1 1 6.957497 6.957497 19946 +wheni 1 1 6.957497 6.957497 19947 +thensfnet 1 1 6.957497 6.957497 19948 +informationservic 1 1 6.957497 6.957497 19949 +tonat 1 1 6.957497 6.957497 19950 +internetand 1 1 6.957497 6.957497 19951 +seminarseri 1 1 6.957497 6.957497 19952 +internetend 1 1 6.957497 6.957497 19953 +forcerfnet 1 1 6.957497 6.957497 19954 +internicproject 1 1 6.957497 6.957497 19955 +theport 1 1 6.957497 6.957497 19956 +workof 1 1 6.957497 6.957497 19957 +reloc 1 1 6.957497 6.957497 19931 +andrequest 1 1 6.957497 6.957497 19958 +heartilyagre 1 1 6.957497 6.957497 19959 +servicesat 1 1 6.957497 6.957497 19960 +solock 1 1 6.957497 6.957497 19961 +speciallibrarian 1 1 6.957497 6.957497 19932 +theaddit 1 1 6.957497 6.957497 19962 +systemadministr 1 1 6.957497 6.957497 19933 +livesei 1 1 6.957497 6.957497 19963 +asscout 1 1 6.957497 6.957497 19964 +researcharea 1 1 6.957497 6.957497 19965 +campus 1 1 6.957497 6.957497 19966 +includenetwork 1 1 6.957497 6.957497 19967 +nidr 1 1 6.957497 6.957497 19968 +anddisciplin 1 1 6.957497 6.957497 19969 +willincludecomput 1 1 6.957497 6.957497 19970 +ofour 1 1 6.957497 6.957497 19971 +theonlin 1 1 6.957497 6.957497 19972 +librarian 1 1 6.957497 6.957497 19973 +aresum 1 1 6.957497 6.957497 19974 +contactm 1 1 6.957497 6.957497 19975 +calcariinternet 1 1 6.957497 6.957497 19976 +scal 1 1 6.957497 6.957497 19977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..e3a3a502 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +welcom 1 122 2.079442 2.079442 99 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +dayton 1 119 2.079442 2.079442 104 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +june 1 79 2.564949 2.564949 214 +state 1 76 2.564949 2.564949 207 +dept 1 64 2.772589 2.772589 291 +major 1 56 2.890372 2.890372 315 +undergradu 1 54 2.944439 2.944439 338 +finger 1 52 2.995732 2.995732 354 +past 1 42 3.218876 3.218876 428 +higher 1 24 3.761200 3.761200 690 +born 1 21 3.912023 3.912023 798 +happi 1 14 4.317488 4.317488 1079 +avenu 1 12 4.465908 4.465908 1277 +resid 1 10 4.653960 4.653960 1461 +secondari 1 7 5.010635 5.010635 1884 +southern 1 6 5.164786 5.164786 2191 +whereabout 1 4 5.568345 5.568345 3078 +worri 1 3 5.857933 5.857933 3130 +coimbator 1 2 6.263398 6.263398 5130 +theindian 1 2 6.263398 6.263398 5795 +kharagpur 1 2 6.263398 6.263398 6236 +kendal 1 2 6.263398 6.263398 6085 +chandrasekar 1 1 6.957497 6.957497 19978 +tamilnadu 1 1 6.957497 6.957497 19979 +inindia 1 1 6.957497 6.957497 19980 +officedept 1 1 6.957497 6.957497 19981 +sivasankaran 1 1 6.957497 6.957497 19982 +schandra 1 1 6.957497 6.957497 19983 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..0f88c1ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +descript 1 64 2.772589 2.772589 271 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +eric 1 19 4.007333 4.007333 870 +wind 1 18 4.060443 4.060443 908 +club 1 15 4.248495 4.248495 1058 +usaphon 1 9 4.753590 4.753590 1600 +tunnel 1 9 4.753590 4.753590 1615 +assistantdepart 1 8 4.875197 4.875197 1784 +hockei 1 8 4.875197 4.875197 1760 +byte 1 6 5.164786 5.164786 2108 +pageer 1 3 5.857933 5.857933 3776 +schnarr 1 2 6.263398 6.263398 6194 +dragon 1 2 6.263398 6.263398 4176 +larusresearch 1 1 6.957497 6.957497 19984 +languagesfunct 1 1 6.957497 6.957497 19985 +designinterest 1 1 6.957497 6.957497 19986 +sacm 1 1 6.957497 6.957497 19987 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..80756271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +dayton 1 119 2.079442 2.079442 104 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +access 1 102 2.302585 2.302585 136 +memori 1 101 2.302585 2.302585 139 +user 1 104 2.302585 2.302585 137 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +educ 1 86 2.484907 2.484907 191 +david 1 71 2.639057 2.639057 232 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +advisor 1 51 2.995732 2.995732 355 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 1 35 3.401197 3.401197 507 +hill 1 25 3.737670 3.737670 670 +supercomput 1 25 3.737670 3.737670 681 +programminglanguag 1 21 3.912023 3.912023 782 +fine 1 20 3.951244 3.951244 822 +steven 1 17 4.110874 4.110874 953 +asplo 1 17 4.110874 4.110874 948 +wood 1 11 4.553877 4.553877 1355 +grain 1 10 4.653960 4.653960 1448 +cook 1 10 4.653960 4.653960 1464 +laru 1 9 4.753590 4.753590 1560 +yanni 1 8 4.875197 4.875197 1713 +assistantdepart 1 8 4.875197 4.875197 1784 +sixth 1 7 5.010635 5.010635 1917 +roger 1 7 5.010635 5.010635 1892 +ann 1 6 5.164786 5.164786 2065 +ioanni 1 5 5.347108 5.347108 2553 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +schoina 1 4 5.568345 5.568345 3085 +alvin 1 4 5.568345 5.568345 3084 +crete 1 3 5.857933 5.857933 3773 +systemspubl 1 1 6.957497 6.957497 19989 +iraklio 1 1 6.957497 6.957497 19988 +cretan 1 1 6.957497 6.957497 19990 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..4c9c9ac5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +includ 1 208 1.609438 1.609438 42 +version 1 113 2.197225 2.197225 122 +degre 1 69 2.708050 2.708050 259 +differ 1 66 2.708050 2.708050 253 +special 1 56 2.890372 2.890372 320 +finger 1 52 2.995732 2.995732 354 +give 1 50 3.044522 3.044522 359 +could 1 46 3.091042 3.091042 383 +india 1 32 3.465736 3.465736 550 +mine 1 26 3.688879 3.688879 654 +wish 1 24 3.761200 3.761200 692 +instead 1 22 3.850148 3.850148 756 +grad 1 20 3.951244 3.951244 837 +account 1 18 4.060443 4.060443 882 +regist 1 17 4.110874 4.110874 938 +biologi 1 15 4.248495 4.248495 1049 +classic 1 14 4.317488 4.317488 1084 +danc 1 12 4.465908 4.465908 1278 +switch 1 8 4.875197 4.875197 1718 +keeper 1 5 5.347108 5.347108 2569 +keyboard 1 4 5.568345 5.568345 2970 +asian 1 3 5.857933 5.857933 3598 +southeast 1 2 6.263398 6.263398 6188 +asia 1 2 6.263398 6.263398 5952 +hairbal 1 2 6.263398 6.263398 6237 +beverli 1 1 6.957497 6.957497 19991 +seavei 1 1 6.957497 6.957497 19992 +ramayana 1 1 6.957497 6.957497 19993 +drama 1 1 6.957497 6.957497 19994 +ramakien 1 1 6.957497 6.957497 19995 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..781e91d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +book 1 99 2.302585 2.302585 131 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +addit 1 74 2.639057 2.639057 228 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +back 1 60 2.833213 2.833213 297 +game 1 36 3.367296 3.367296 498 +eduoffic 1 33 3.433987 3.433987 531 +enjoi 1 26 3.688879 3.688879 660 +fact 1 21 3.912023 3.912023 780 +scott 1 18 4.060443 4.060443 884 +rate 1 15 4.248495 4.248495 1037 +english 1 15 4.248495 4.248495 1033 +hopefulli 1 14 4.317488 4.317488 1071 +franc 1 12 4.465908 4.465908 1276 +ball 1 9 4.753590 4.753590 1608 +drink 1 9 4.753590 4.753590 1607 +lock 1 9 4.753590 4.753590 1551 +poetri 1 9 4.753590 4.753590 1596 +absolut 1 8 4.875197 4.875197 1646 +dictionari 1 8 4.875197 4.875197 1642 +largest 1 7 5.010635 5.010635 1858 +seen 1 6 5.164786 5.164786 2202 +beer 1 6 5.164786 5.164786 2216 +soda 1 6 5.164786 5.164786 2189 +constitut 1 6 5.164786 5.164786 2026 +pagescott 1 4 5.568345 5.568345 2978 +chees 1 4 5.568345 5.568345 3090 +add 1 3 5.857933 5.857933 3131 +uwisc 1 2 6.263398 6.263398 4738 +caffein 1 2 6.263398 6.263398 5936 +thesauru 1 2 6.263398 6.263398 6238 +colvil 1 1 6.957497 6.957497 19996 +pagein 1 1 6.957497 6.957497 19997 +pickingand 1 1 6.957497 6.957497 19998 +artsi 1 1 6.957497 6.957497 19999 +roget 1 1 6.957497 6.957497 20000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..66f6ce07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +virtual 1 62 2.772589 2.772589 285 +robot 1 36 3.367296 3.367296 497 +anim 1 31 3.496508 3.496508 557 +steve 1 29 3.583519 3.583519 594 +task 1 25 3.737670 3.737670 678 +motion 1 24 3.761200 3.761200 699 +store 1 24 3.761200 3.761200 693 +input 1 23 3.806662 3.806662 727 +sequenc 1 23 3.806662 3.806662 734 +period 1 22 3.850148 3.850148 743 +modern 1 16 4.174387 4.174387 966 +devic 1 16 4.174387 4.174387 1002 +charact 1 15 4.248495 4.248495 1028 +track 1 15 4.248495 4.248495 1029 +chuck 1 14 4.317488 4.317488 1108 +directli 1 13 4.382027 4.382027 1141 +realiti 1 12 4.465908 4.465908 1272 +walk 1 12 4.465908 4.465908 1281 +motiv 1 11 4.553877 4.553877 1346 +realist 1 8 4.875197 4.875197 1665 +root 1 8 4.875197 4.875197 1650 +seitz 1 7 5.010635 5.010635 1976 +smile 1 7 5.010635 5.010635 1807 +infer 1 6 5.164786 5.164786 2040 +writeup 1 5 5.347108 5.347108 2352 +rigid 1 5 5.347108 5.347108 2432 +tocomput 1 3 5.857933 5.857933 3162 +endow 1 2 6.263398 6.263398 6234 +cue 1 2 6.263398 6.263398 5391 +anabstract 1 2 6.263398 6.263398 5491 +dyerour 1 1 6.957497 6.957497 20001 +teachinga 1 1 6.957497 6.957497 20002 +hasit 1 1 6.957497 6.957497 20003 +cartoon 1 1 6.957497 6.957497 20004 +teleconferenc 1 1 6.957497 6.957497 20005 +performa 1 1 6.957497 6.957497 20006 +repertoir 1 1 6.957497 6.957497 20007 +beinvok 1 1 6.957497 6.957497 20008 +cu 1 1 6.957497 6.957497 20009 +levelev 1 1 6.957497 6.957497 20010 +nonrigid 1 1 6.957497 6.957497 20011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..a8948ad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +recent 1 167 1.791759 1.791759 58 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +provid 1 121 2.079442 2.079442 94 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +chang 1 82 2.484907 2.484907 163 +june 1 79 2.564949 2.564949 214 +workshop 1 71 2.639057 2.639057 239 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +creat 1 63 2.772589 2.772589 277 +interact 1 62 2.772589 2.772589 270 +guid 1 63 2.772589 2.772589 267 +three 1 54 2.944439 2.944439 330 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +right 1 48 3.044522 3.044522 363 +visual 1 48 3.044522 3.044522 372 +physic 1 47 3.091042 3.091042 377 +answer 1 45 3.135494 3.135494 391 +describ 1 45 3.135494 3.135494 400 +show 1 43 3.178054 3.178054 417 +autom 1 41 3.218876 3.218876 434 +theoret 1 39 3.258097 3.258097 446 +movi 1 40 3.258097 3.258097 459 +correct 1 38 3.295837 3.295837 462 +origin 1 38 3.295837 3.295837 472 +procedur 1 36 3.367296 3.367296 488 +represent 1 35 3.401197 3.401197 512 +produc 1 30 3.555348 3.555348 572 +steve 1 29 3.583519 3.583519 594 +consid 1 29 3.583519 3.583519 590 +enjoi 1 26 3.688879 3.688879 660 +proc 1 26 3.688879 3.688879 649 +although 1 25 3.737670 3.737670 667 +known 1 24 3.761200 3.761200 702 +sequenc 1 23 3.806662 3.806662 734 +synthesi 1 20 3.951244 3.951244 834 +basi 1 20 3.951244 3.951244 828 +mpeg 1 20 3.951244 3.951244 831 +geometr 1 19 4.007333 4.007333 852 +left 1 19 4.007333 4.007333 851 +chuck 1 14 4.317488 4.317488 1108 +scene 1 14 4.317488 4.317488 1114 +shown 1 14 4.317488 4.317488 1080 +valid 1 11 4.553877 4.553877 1299 +devis 1 10 4.653960 4.653960 1451 +reli 1 10 4.653960 4.653960 1411 +certain 1 10 4.653960 4.653960 1393 +correspond 1 10 4.653960 4.653960 1382 +intermedi 1 9 4.753590 4.753590 1497 +establish 1 9 4.753590 4.753590 1532 +assumpt 1 9 4.753590 4.753590 1514 +pair 1 9 4.753590 4.753590 1503 +dyer 1 9 4.753590 4.753590 1573 +satisfi 1 8 4.875197 4.875197 1694 +seitz 1 7 5.010635 5.010635 1976 +interpol 1 7 5.010635 5.010635 1823 +morph 1 7 5.010635 5.010635 1937 +stereo 1 7 5.010635 5.010635 1818 +theproject 1 6 5.164786 5.164786 1981 +provabl 1 5 5.347108 5.347108 2558 +surprisingli 1 4 5.568345 5.568345 2609 +visibl 1 4 5.568345 5.568345 2994 +todetermin 1 3 5.857933 5.857933 3182 +widespread 1 2 6.263398 6.263398 4911 +viewsof 1 2 6.263398 6.263398 6135 +undergo 1 2 6.263398 6.263398 4253 +dyerw 1 1 6.957497 6.957497 20012 +graphicscommun 1 1 6.957497 6.957497 20013 +techniquescurr 1 1 6.957497 6.957497 20014 +validityha 1 1 6.957497 6.957497 20015 +ofthat 1 1 6.957497 6.957497 20016 +simplerectif 1 1 6.957497 6.957497 20017 +therectifi 1 1 6.957497 6.957497 20018 +theinterpol 1 1 6.957497 6.957497 20019 +computedinterpol 1 1 6.957497 6.957497 20020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..ea0779e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +high 1 130 2.079442 2.079442 101 +well 1 109 2.197225 2.197225 121 +techniqu 1 99 2.302585 2.302585 138 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +requir 1 81 2.484907 2.484907 167 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +appear 1 78 2.564949 2.564949 210 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +knowledg 1 67 2.708050 2.708050 243 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +simpl 1 60 2.833213 2.833213 298 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +basic 1 50 3.044522 3.044522 360 +principl 1 48 3.044522 3.044522 357 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +move 1 47 3.091042 3.091042 382 +howev 1 41 3.218876 3.218876 422 +movi 1 40 3.258097 3.258097 459 +correct 1 38 3.295837 3.295837 462 +represent 1 35 3.401197 3.401197 512 +manual 1 35 3.401197 3.401197 504 +transform 1 32 3.465736 3.465736 542 +often 1 31 3.496508 3.496508 551 +taken 1 31 3.496508 3.496508 555 +produc 1 30 3.555348 3.555348 572 +steve 1 29 3.583519 3.583519 594 +toward 1 25 3.737670 3.737670 668 +handl 1 24 3.761200 3.761200 685 +frame 1 24 3.761200 3.761200 684 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +mpeg 1 20 3.951244 3.951244 831 +render 1 17 4.110874 4.110874 947 +transit 1 15 4.248495 4.248495 1046 +photograph 1 15 4.248495 4.248495 1056 +reflect 1 15 4.248495 4.248495 1034 +chuck 1 14 4.317488 4.317488 1108 +scene 1 14 4.317488 4.317488 1114 +camera 1 14 4.317488 4.317488 1115 +draw 1 14 4.317488 4.317488 1086 +introduc 1 13 4.382027 4.382027 1139 +resolut 1 13 4.382027 4.382027 1172 +shape 1 12 4.465908 4.465908 1245 +abil 1 11 4.553877 4.553877 1341 +dyer 1 9 4.753590 4.753590 1573 +pose 1 9 4.753590 4.753590 1535 +correctli 1 9 4.753590 4.753590 1478 +face 1 9 4.753590 4.753590 1501 +siggraph 1 8 4.875197 4.875197 1773 +morph 1 7 5.010635 5.010635 1937 +seitz 1 7 5.010635 5.010635 1976 +interpol 1 7 5.010635 5.010635 1823 +viewpoint 1 6 5.164786 5.164786 2116 +difficult 1 6 5.164786 5.164786 2035 +simultan 1 6 5.164786 5.164786 2155 +jude 1 6 5.164786 5.164786 2123 +synthes 1 5 5.347108 5.347108 2451 +facial 1 5 5.347108 5.347108 2438 +shavlik 1 5 5.347108 5.347108 2429 +illus 1 4 5.568345 5.568345 2603 +mona 1 2 6.263398 6.263398 5786 +lisa 1 2 6.263398 6.263398 5427 +icpr 1 1 6.957497 6.957497 20021 +compel 1 1 6.957497 6.957497 20022 +betweenimag 1 1 6.957497 6.957497 20023 +causeunnatur 1 1 6.957497 6.957497 20024 +distort 1 1 6.957497 6.957497 20025 +projectivegeometri 1 1 6.957497 6.957497 20026 +morphingthat 1 1 6.957497 6.957497 20027 +prewarp 1 1 6.957497 6.957497 20028 +imagesprior 1 1 6.957497 6.957497 20029 +postwarp 1 1 6.957497 6.957497 20030 +appliedto 1 1 6.957497 6.957497 20031 +structureafford 1 1 6.957497 6.957497 20032 +imagetransform 1 1 6.957497 6.957497 20033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..c6283ea1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +refer 1 78 2.564949 2.564949 203 +line 1 75 2.639057 2.639057 231 +addit 1 74 2.639057 2.639057 228 +view 1 70 2.708050 2.708050 254 +previou 1 62 2.772589 2.772589 290 +import 1 65 2.772589 2.772589 282 +locat 1 59 2.833213 2.833213 303 +sever 1 56 2.890372 2.890372 322 +allow 1 53 2.944439 2.944439 333 +run 1 51 2.995732 2.995732 347 +life 1 50 3.044522 3.044522 375 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +physic 1 47 3.091042 3.091042 377 +featur 1 46 3.091042 3.091042 386 +even 1 45 3.135494 3.135494 393 +describ 1 45 3.135494 3.135494 400 +natur 1 44 3.135494 3.135494 406 +show 1 43 3.178054 3.178054 417 +error 1 40 3.258097 3.258097 449 +mean 1 37 3.332205 3.332205 477 +next 1 34 3.401197 3.401197 517 +represent 1 35 3.401197 3.401197 512 +singl 1 34 3.401197 3.401197 510 +human 1 32 3.465736 3.465736 546 +steve 1 29 3.583519 3.583519 594 +determin 1 27 3.637586 3.637586 630 +enhanc 1 26 3.688879 3.688879 644 +trace 1 25 3.737670 3.737670 677 +reliabl 1 25 3.737670 3.737670 674 +motion 1 24 3.761200 3.761200 699 +frame 1 24 3.761200 3.761200 684 +sequenc 1 23 3.806662 3.806662 734 +period 1 22 3.850148 3.850148 743 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +attempt 1 17 4.110874 4.110874 917 +medic 1 17 4.110874 4.110874 958 +spatial 1 16 4.174387 4.174387 988 +chuck 1 14 4.317488 4.317488 1108 +camera 1 14 4.317488 4.317488 1115 +scene 1 14 4.317488 4.317488 1114 +whose 1 13 4.382027 4.382027 1166 +deriv 1 13 4.382027 4.382027 1145 +composit 1 13 4.382027 4.382027 1150 +walk 1 12 4.465908 4.465908 1281 +instanc 1 11 4.553877 4.553877 1322 +moment 1 11 4.553877 4.553877 1379 +cycl 1 11 4.553877 4.553877 1335 +correspond 1 10 4.653960 4.653960 1382 +surfac 1 9 4.753590 4.753590 1574 +explicit 1 9 4.753590 4.753590 1525 +tempor 1 9 4.753590 4.753590 1584 +film 1 8 4.875197 4.875197 1761 +pure 1 8 4.875197 4.875197 1776 +irregular 1 8 4.875197 4.875197 1768 +invari 1 8 4.875197 4.875197 1748 +heart 1 8 4.875197 4.875197 1729 +seitz 1 7 5.010635 5.010635 1976 +compact 1 7 5.010635 5.010635 1907 +canb 1 7 5.010635 5.010635 1846 +bottom 1 7 5.010635 5.010635 1906 +appar 1 7 5.010635 5.010635 1958 +recov 1 6 5.164786 5.164786 2235 +furthermor 1 6 5.164786 5.164786 2141 +cyclic 1 5 5.347108 5.347108 2383 +skip 1 5 5.347108 5.347108 2402 +variat 1 5 5.347108 5.347108 2248 +affin 1 5 5.347108 5.347108 2378 +clickher 1 5 5.347108 5.347108 2428 +havedevelop 1 4 5.568345 5.568345 2681 +repeat 1 4 5.568345 5.568345 2798 +tend 1 4 5.568345 5.568345 3041 +visibl 1 4 5.568345 5.568345 2994 +fashion 1 3 5.857933 5.857933 3699 +unlik 1 2 6.263398 6.263398 5063 +slow 1 2 6.263398 6.263398 5341 +perfectli 1 2 6.263398 6.263398 5569 +dyermani 1 1 6.957497 6.957497 20036 +locomotori 1 1 6.957497 6.957497 20037 +shuffl 1 1 6.957497 6.957497 20038 +areperiod 1 1 6.957497 6.957497 20039 +beenproduc 1 1 6.957497 6.957497 20040 +ourapproach 1 1 6.957497 6.957497 20041 +poscript 1 1 6.957497 6.957497 20034 +tracethi 1 1 6.957497 6.957497 20042 +imagesequ 1 1 6.957497 6.957497 20043 +phonograph 1 1 6.957497 6.957497 20044 +turntabl 1 1 6.957497 6.957497 20035 +ramp 1 1 6.957497 6.957497 20045 +timewher 1 1 6.957497 6.957497 20046 +momentarili 1 1 6.957497 6.957497 20047 +shownsuperimpos 1 1 6.957497 6.957497 20048 +variesslightli 1 1 6.957497 6.957497 20049 +changesin 1 1 6.957497 6.957497 20050 +motionsthat 1 1 6.957497 6.957497 20051 +evolutionof 1 1 6.957497 6.957497 20052 +quantiti 1 1 6.957497 6.957497 20053 +asposit 1 1 6.957497 6.957497 20054 +veloc 1 1 6.957497 6.957497 20055 +delimit 1 1 6.957497 6.957497 20056 +correspondencesacross 1 1 6.957497 6.957497 20057 +parsinga 1 1 6.957497 6.957497 20058 +tracecan 1 1 6.957497 6.957497 20059 +fromdiffer 1 1 6.957497 6.957497 20060 +recoveredfrom 1 1 6.957497 6.957497 20061 +angiograph 1 1 6.957497 6.957497 20062 +additionalstructur 1 1 6.957497 6.957497 20063 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..801b0c08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +look 1 107 2.197225 2.197225 115 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +stuff 1 87 2.484907 2.484907 171 +chang 1 82 2.484907 2.484907 163 +exampl 1 77 2.564949 2.564949 195 +view 1 70 2.708050 2.708050 254 +juli 1 60 2.833213 2.833213 305 +frequent 1 49 3.044522 3.044522 367 +math 1 44 3.135494 3.135494 402 +show 1 43 3.178054 3.178054 417 +vision 1 41 3.218876 3.218876 430 +cach 1 41 3.218876 3.218876 432 +movi 1 40 3.258097 3.258097 459 +steve 1 29 3.583519 3.583519 594 +berkelei 1 26 3.688879 3.688879 657 +motion 1 24 3.761200 3.761200 699 +synthesi 1 20 3.951244 3.951244 834 +mpeg 1 20 3.951244 3.951244 831 +left 1 19 4.007333 4.007333 851 +render 1 17 4.110874 4.110874 947 +seitz 1 7 5.010635 5.010635 1976 +morph 1 7 5.010635 5.010635 1937 +interpol 1 7 5.010635 5.010635 1823 +cyclic 1 5 5.347108 5.347108 2383 +closer 1 2 6.263398 6.263398 6024 +surreal 1 1 6.957497 6.957497 20064 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..61183c5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +machin 1 129 2.079442 2.079442 95 +schedul 1 119 2.079442 2.079442 85 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +access 1 102 2.302585 2.302585 136 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +member 1 84 2.484907 2.484907 165 +school 1 84 2.484907 2.484907 188 +librari 1 87 2.484907 2.484907 181 +journal 1 83 2.484907 2.484907 183 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +prof 1 64 2.772589 2.772589 273 +dept 1 64 2.772589 2.772589 291 +abstract 1 62 2.772589 2.772589 276 +content 1 59 2.833213 2.833213 302 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +archiv 1 49 3.044522 3.044522 364 +mark 1 44 3.135494 3.135494 403 +describ 1 45 3.135494 3.135494 400 +directori 1 45 3.135494 3.135494 396 +math 1 44 3.135494 3.135494 402 +vision 1 41 3.218876 3.218876 430 +seminar 1 38 3.295837 3.295837 470 +robot 1 36 3.367296 3.367296 497 +richard 1 31 3.496508 3.496508 559 +domain 1 30 3.555348 3.555348 564 +held 1 28 3.610918 3.610918 600 +relev 1 26 3.688879 3.688879 637 +proc 1 26 3.688879 3.688879 649 +mostli 1 19 4.007333 4.007333 869 +agent 1 18 4.060443 4.060443 910 +ascii 1 15 4.248495 4.248495 1032 +biologi 1 15 4.248495 4.248495 1049 +doit 1 14 4.317488 4.317488 1111 +readabl 1 12 4.465908 4.465908 1258 +wendt 1 10 4.653960 4.653960 1446 +kevin 1 9 4.753590 4.753590 1482 +mangasarian 1 9 4.753590 4.753590 1570 +dataset 1 7 5.010635 5.010635 1914 +extern 1 6 5.164786 5.164786 2105 +jude 1 6 5.164786 5.164786 2123 +olvi 1 6 5.164786 5.164786 2109 +gopher 1 6 5.164786 5.164786 1982 +bodner 1 5 5.347108 5.347108 2401 +shavlik 1 5 5.347108 5.347108 2429 +testb 1 5 5.347108 5.347108 2456 +tina 1 3 5.857933 5.857933 3744 +breast 1 3 5.857933 5.857933 4033 +cancer 1 3 5.857933 5.857933 4032 +carolyn 1 2 6.263398 6.263398 6088 +allex 1 2 6.263398 6.263398 6087 +eliassi 1 2 6.263398 6.263398 6147 +thememb 1 1 6.957497 6.957497 20066 +mlrg 1 1 6.957497 6.957497 20065 +jonathon 1 1 6.957497 6.957497 20067 +cherkauer 1 1 6.957497 6.957497 20068 +craven 1 1 6.957497 6.957497 20069 +maclin 1 1 6.957497 6.957497 20070 +opitz 1 1 6.957497 6.957497 20071 +papersvisit 1 1 6.957497 6.957497 20072 +recentabstractsi 1 1 6.957497 6.957497 20073 +theoriesy 1 1 6.957497 6.957497 20074 +severalml 1 1 6.957497 6.957497 20075 +sgroup 1 1 6.957497 6.957497 20076 +neurosci 1 1 6.957497 6.957497 20077 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..b450ffc9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +memori 1 101 2.302585 2.302585 139 +west 1 83 2.484907 2.484907 192 +wide 1 84 2.484907 2.484907 185 +institut 1 84 2.484907 2.484907 187 +interfac 1 79 2.564949 2.564949 209 +summari 1 73 2.639057 2.639057 237 +simul 1 66 2.708050 2.708050 255 +street 1 63 2.772589 2.772589 293 +copi 1 63 2.772589 2.772589 284 +share 1 59 2.833213 2.833213 304 +space 1 57 2.890372 2.890372 310 +advisor 1 51 2.995732 2.995732 355 +right 1 48 3.044522 3.044522 363 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +submit 1 39 3.258097 3.258097 440 +tutori 1 39 3.258097 3.258097 437 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +copyright 1 36 3.367296 3.367296 495 +random 1 34 3.401197 3.401197 511 +tech 1 35 3.401197 3.401197 515 +articl 1 33 3.433987 3.433987 530 +india 1 32 3.465736 3.465736 550 +dissert 1 32 3.465736 3.465736 549 +steve 1 29 3.583519 3.583519 594 +progress 1 28 3.610918 3.610918 598 +team 1 27 3.637586 3.637586 625 +hill 1 25 3.737670 3.737670 670 +departmentunivers 1 24 3.761200 3.761200 711 +indian 1 22 3.850148 3.850148 769 +cooper 1 22 3.850148 3.850148 757 +reserv 1 20 3.951244 3.951244 808 +wind 1 18 4.060443 4.060443 908 +hobbi 1 16 4.174387 4.174387 1009 +month 1 15 4.248495 4.248495 1025 +coher 1 14 4.317488 4.317488 1109 +danc 1 12 4.465908 4.465908 1278 +isca 1 11 4.553877 4.553877 1354 +correspond 1 10 4.653960 4.653960 1382 +queue 1 10 4.653960 4.653960 1386 +custom 1 10 4.653960 4.653960 1414 +usaphon 1 9 4.753590 4.753590 1600 +jump 1 9 4.753590 4.753590 1603 +tunnel 1 9 4.753590 4.753590 1615 +architect 1 8 4.875197 4.875197 1624 +kanpur 1 8 4.875197 4.875197 1744 +irregular 1 8 4.875197 4.875197 1768 +morph 1 7 5.010635 5.010635 1937 +courtesi 1 7 5.010635 5.010635 1953 +seitz 1 7 5.010635 5.010635 1976 +mukherje 1 5 5.347108 5.347108 2586 +button 1 5 5.347108 5.347108 2337 +commod 1 5 5.347108 5.347108 2415 +ppopp 1 4 5.568345 5.568345 2774 +shubhendu 1 3 5.857933 5.857933 4028 +badger 1 3 5.857933 5.857933 3502 +ballroom 1 3 5.857933 5.857933 3983 +shubu 1 2 6.263398 6.263398 6148 +fiance 1 2 6.263398 6.263398 5497 +nephew 1 2 6.263398 6.263398 5332 +dionisio 1 2 6.263398 6.263398 6203 +grai 1 2 6.263398 6.263398 4098 +mimi 1 1 6.957497 6.957497 20078 +avirup 1 1 6.957497 6.957497 20079 +linkseducationph 1 1 6.957497 6.957497 20080 +cachabl 1 1 6.957497 6.957497 20081 +dirsw 1 1 6.957497 6.957497 20082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..a4ef7a0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +fall 1 181 1.609438 1.609438 40 +book 1 99 2.302585 2.302585 131 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +new 1 64 2.772589 2.772589 262 +run 1 51 2.995732 2.995732 347 +movi 1 40 3.258097 3.258097 459 +seminar 1 38 3.295837 3.295837 470 +michael 1 35 3.401197 3.401197 514 +wai 1 25 3.737670 3.737670 662 +sport 1 25 3.737670 3.737670 683 +wonder 1 20 3.951244 3.951244 815 +club 1 15 4.248495 4.248495 1058 +philosophi 1 13 4.382027 4.382027 1167 +televis 1 6 5.164786 5.164786 2118 +wast 1 5 5.347108 5.347108 2537 +humor 1 5 5.347108 5.347108 2533 +midwest 1 2 6.263398 6.263398 6225 +siff 1 1 6.957497 6.957497 20083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..faade86d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +februari 1 54 2.944439 2.944439 328 +departmentunivers 1 24 3.761200 3.761200 711 +edutelephon 1 10 4.653960 4.653960 1473 +studentcomput 1 7 5.010635 5.010635 1963 +skrentni 1 6 5.164786 5.164786 2104 +lecturerc 1 1 6.957497 6.957497 20084 +coordinatorgradu 1 1 6.957497 6.957497 20085 +sciencesemail 1 1 6.957497 6.957497 20086 +groupskrentni 1 1 6.957497 6.957497 20087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..a0278fae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +relat 1 139 1.945910 1.945910 68 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +help 1 83 2.484907 2.484907 175 +activ 1 84 2.484907 2.484907 182 +april 1 77 2.564949 2.564949 196 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +interact 1 62 2.772589 2.772589 270 +unix 1 58 2.890372 2.890372 308 +advisor 1 51 2.995732 2.995732 355 +represent 1 35 3.401197 3.401197 512 +human 1 32 3.465736 3.465736 546 +toward 1 25 3.737670 3.737670 668 +reliabl 1 25 3.737670 3.737670 674 +departmentunivers 1 24 3.761200 3.761200 711 +util 1 21 3.912023 3.912023 774 +miller 1 17 4.110874 4.110874 949 +step 1 13 4.382027 4.382027 1138 +larri 1 13 4.382027 4.382027 1142 +edutelephon 1 10 4.653960 4.653960 1473 +purdu 1 10 4.653960 4.653960 1466 +empir 1 8 4.875197 4.875197 1722 +studentcomput 1 7 5.010635 5.010635 1963 +bryan 1 5 5.347108 5.347108 2421 +travi 1 3 5.857933 5.857933 3985 +fredriksen 1 1 6.957497 6.957497 20139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..bd15deb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +manag 1 114 2.197225 2.197225 125 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +journal 1 83 2.484907 2.484907 183 +west 1 83 2.484907 2.484907 192 +chang 1 82 2.484907 2.484907 163 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +intellig 1 72 2.639057 2.639057 225 +summari 1 73 2.639057 2.639057 237 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +knowledg 1 67 2.708050 2.708050 243 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +complex 1 64 2.772589 2.772589 269 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +artifici 1 63 2.772589 2.772589 280 +dept 1 64 2.772589 2.772589 291 +automat 1 61 2.833213 2.833213 306 +special 1 56 2.890372 2.890372 320 +sever 1 56 2.890372 2.890372 322 +visual 1 48 3.044522 3.044522 372 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +could 1 46 3.091042 3.091042 383 +around 1 43 3.178054 3.178054 415 +examin 1 42 3.218876 3.218876 424 +form 1 39 3.258097 3.258097 443 +map 1 39 3.258097 3.258097 452 +societi 1 40 3.258097 3.258097 456 +formal 1 37 3.332205 3.332205 478 +procedur 1 36 3.367296 3.367296 488 +represent 1 35 3.401197 3.401197 512 +singl 1 34 3.401197 3.401197 510 +focus 1 29 3.583519 3.583519 584 +enhanc 1 26 3.688879 3.688879 644 +altern 1 26 3.688879 3.688879 641 +departmentunivers 1 24 3.761200 3.761200 711 +pattern 1 24 3.761200 3.761200 689 +magazin 1 24 3.761200 3.761200 704 +displai 1 23 3.806662 3.806662 712 +expert 1 20 3.951244 3.951244 833 +basi 1 20 3.951244 3.951244 828 +aid 1 18 4.060443 4.060443 904 +scott 1 18 4.060443 4.060443 884 +failur 1 18 4.060443 4.060443 898 +heterogen 1 14 4.317488 4.317488 1090 +chuck 1 14 4.317488 4.317488 1108 +larri 1 13 4.382027 4.382027 1142 +social 1 13 4.382027 4.382027 1123 +incorpor 1 13 4.382027 4.382027 1163 +deduct 1 12 4.465908 4.465908 1236 +edutelephon 1 10 4.653960 4.653960 1473 +angel 1 8 4.875197 4.875197 1779 +databasesystem 1 8 4.875197 4.875197 1617 +attent 1 8 4.875197 4.875197 1651 +philosoph 1 7 5.010635 5.010635 1904 +geograph 1 6 5.164786 5.164786 2236 +augment 1 5 5.347108 5.347108 2350 +bryan 1 5 5.347108 5.347108 2421 +implic 1 4 5.568345 5.568345 2696 +andi 1 4 5.568345 5.568345 3081 +travi 1 3 5.857933 5.857933 3985 +metaphor 1 3 5.857933 5.857933 4038 +landscap 1 3 5.857933 5.857933 3525 +waysthat 1 2 6.263398 6.263398 5445 +andwith 1 2 6.263398 6.263398 5051 +derek 1 2 6.263398 6.263398 4537 +travisprofessorcomput 1 1 6.957497 6.957497 20140 +californa 1 1 6.957497 6.957497 20141 +ofartifici 1 1 6.957497 6.957497 20142 +automaticdeduct 1 1 6.957497 6.957497 20143 +contruct 1 1 6.957497 6.957497 20144 +informationcontain 1 1 6.957497 6.957497 20145 +beingdevot 1 1 6.957497 6.957497 20146 +visualiz 1 1 6.957497 6.957497 20147 +organiz 1 1 6.957497 6.957497 20148 +suppositionsunderli 1 1 6.957497 6.957497 20149 +ohar 1 1 6.957497 6.957497 20150 +swanson 1 1 6.957497 6.957497 20151 +whitsitt 1 1 6.957497 6.957497 20152 +zahn 1 1 6.957497 6.957497 20153 +oravec 1 1 6.957497 6.957497 20154 +reflex 1 1 6.957497 6.957497 20155 +falsework 1 1 6.957497 6.957497 20156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..14571495 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +center 1 88 2.397895 2.397895 158 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +free 1 73 2.639057 2.639057 224 +java 1 70 2.708050 2.708050 248 +street 1 63 2.772589 2.772589 293 +new 1 64 2.772589 2.772589 262 +juli 1 60 2.833213 2.833213 305 +directori 1 45 3.135494 3.135494 396 +edit 1 42 3.218876 3.218876 418 +download 1 36 3.367296 3.367296 489 +tech 1 35 3.401197 3.401197 515 +random 1 34 3.401197 3.401197 511 +india 1 32 3.465736 3.465736 550 +packag 1 28 3.610918 3.610918 614 +indian 1 22 3.850148 3.850148 769 +rank 1 14 4.317488 4.317488 1063 +multiscalar 1 8 4.875197 4.875197 1783 +cricket 1 7 5.010635 5.010635 1945 +kestrel 1 4 5.568345 5.568345 2990 +batch 1 4 5.568345 5.568345 2700 +will 1 4 5.568345 5.568345 2782 +avinash 1 3 5.857933 5.857933 3510 +mate 1 3 5.857933 5.857933 3127 +hindu 1 3 5.857933 5.857933 3590 +sodani 1 2 6.263398 6.263398 4803 +kharagpur 1 2 6.263398 6.263398 6236 +toll 1 2 6.263398 6.263398 6149 +hon 1 1 6.957497 6.957497 20088 +kgpite 1 1 6.957497 6.957497 20089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..79d8d041 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,253 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +educ 1 86 2.484907 2.484907 191 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +summari 1 73 2.639057 2.639057 237 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +simul 1 66 2.708050 2.708050 255 +window 1 68 2.708050 2.708050 242 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +septemb 1 65 2.772589 2.772589 274 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +numer 1 49 3.044522 3.044522 369 +set 1 50 3.044522 3.044522 361 +pointer 1 48 3.044522 3.044522 368 +electron 1 47 3.091042 3.091042 379 +possibl 1 47 3.091042 3.091042 378 +understand 1 47 3.091042 3.091042 384 +could 1 46 3.091042 3.091042 383 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +futur 1 41 3.218876 3.218876 427 +cach 1 41 3.218876 3.218876 432 +fast 1 42 3.218876 3.218876 429 +combin 1 42 3.218876 3.218876 421 +press 1 42 3.218876 3.218876 419 +continu 1 39 3.258097 3.258097 448 +multipl 1 39 3.258097 3.258097 453 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +error 1 40 3.258097 3.258097 449 +streetmadison 1 38 3.295837 3.295837 474 +electr 1 38 3.295837 3.295837 461 +sciencesunivers 1 37 3.332205 3.332205 486 +india 1 32 3.465736 3.465736 550 +concept 1 32 3.465736 3.465736 537 +given 1 32 3.465736 3.465736 538 +exist 1 30 3.555348 3.555348 569 +focus 1 29 3.583519 3.583519 584 +depend 1 29 3.583519 3.583519 583 +multiprocessor 1 28 3.610918 3.610918 605 +held 1 28 3.610918 3.610918 600 +load 1 28 3.610918 3.610918 601 +arrai 1 27 3.637586 3.637586 627 +challeng 1 26 3.688879 3.688879 653 +effort 1 26 3.688879 3.688879 652 +request 1 26 3.688879 3.688879 635 +detect 1 26 3.688879 3.688879 646 +flow 1 24 3.761200 3.761200 700 +compress 1 23 3.806662 3.806662 719 +sequenti 1 22 3.850148 3.850148 745 +reduc 1 22 3.850148 3.850148 759 +hierarchi 1 22 3.850148 3.850148 744 +chip 1 21 3.912023 3.912023 770 +department 1 20 3.951244 3.951244 839 +exploit 1 20 3.951244 3.951244 836 +smith 1 20 3.951244 3.951244 820 +fine 1 20 3.951244 3.951244 822 +predict 1 19 4.007333 4.007333 855 +scott 1 18 4.060443 4.060443 884 +regist 1 17 4.110874 4.110874 938 +interconnect 1 17 4.110874 4.110874 937 +expand 1 17 4.110874 4.110874 928 +eduphon 1 15 4.248495 4.248495 1060 +novel 1 15 4.248495 4.248495 1039 +todd 1 15 4.248495 4.248495 1051 +micro 1 15 4.248495 4.248495 1031 +achiev 1 14 4.317488 4.317488 1088 +split 1 14 4.317488 4.317488 1078 +translat 1 13 4.382027 4.382027 1164 +sigplan 1 13 4.382027 4.382027 1190 +incorpor 1 13 4.382027 4.382027 1163 +prolog 1 13 4.382027 4.382027 1155 +target 1 12 4.465908 4.465908 1282 +calcul 1 12 4.465908 4.465908 1268 +cycl 1 11 4.553877 4.553877 1335 +bandwidth 1 11 4.553877 4.553877 1365 +arbitrari 1 11 4.553877 4.553877 1359 +branch 1 11 4.553877 4.553877 1318 +franklin 1 10 4.653960 4.653960 1436 +traffic 1 10 4.653960 4.653960 1421 +grain 1 10 4.653960 4.653960 1448 +inter 1 9 4.753590 4.753590 1530 +elect 1 8 4.875197 4.875197 1771 +uniprocessor 1 8 4.875197 4.875197 1696 +character 1 8 4.875197 4.875197 1767 +multiscalar 1 8 4.875197 4.875197 1783 +paradigm 1 8 4.875197 4.875197 1662 +potenti 1 8 4.875197 4.875197 1690 +watson 1 8 4.875197 4.875197 1691 +illinoi 1 7 5.010635 5.010635 1941 +zero 1 7 5.010635 5.010635 1896 +goodman 1 7 5.010635 5.010635 1891 +serial 1 7 5.010635 5.010635 1975 +friedman 1 7 5.010635 5.010635 1886 +chiang 1 7 5.010635 5.010635 1853 +sohi 1 6 5.164786 5.164786 2237 +risc 1 6 5.164786 5.164786 2016 +microarchitectur 1 6 5.164786 5.164786 2238 +superscalar 1 6 5.164786 5.164786 2082 +handbook 1 6 5.164786 5.164786 2061 +guri 1 5 5.347108 5.347108 2578 +andrea 1 5 5.347108 5.347108 2375 +lebeck 1 5 5.347108 5.347108 2582 +highest 1 4 5.568345 5.568345 2950 +resolv 1 4 5.568345 5.568345 2675 +height 1 4 5.568345 5.568345 2890 +appendix 1 4 5.568345 5.568345 2739 +crai 1 4 5.568345 5.568345 3012 +urbana 1 3 5.857933 5.857933 3879 +ordinari 1 3 5.857933 5.857933 3233 +bulk 1 3 5.857933 5.857933 4000 +thedevelop 1 3 5.857933 5.857933 3903 +breach 1 3 5.857933 5.857933 4009 +reorder 1 3 5.857933 5.857933 3952 +vijaykumar 1 3 5.857933 5.857933 4011 +streamlin 1 3 5.857933 5.857933 3573 +anatomi 1 3 5.857933 5.857933 4010 +chow 1 3 5.857933 5.857933 3281 +gurindar 1 2 6.263398 6.263398 6110 +andelectr 1 2 6.263398 6.263398 6200 +birla 1 2 6.263398 6.263398 6239 +pilani 1 2 6.263398 6.263398 6240 +plenti 1 2 6.263398 6.263398 5465 +sustain 1 2 6.263398 6.263398 6201 +needto 1 2 6.263398 6.263398 4927 +andhow 1 2 6.263398 6.263398 5933 +expend 1 2 6.263398 6.263398 5451 +moshovo 1 2 6.263398 6.263398 6211 +inrd 1 2 6.263398 6.263398 4531 +pnevmatikato 1 2 6.263398 6.263398 6204 +guard 1 2 6.263398 6.263398 5738 +tetra 1 2 6.263398 6.263398 5196 +graduatesaddress 1 1 6.957497 6.957497 20091 +usasohi 1 1 6.957497 6.957497 20092 +thehighest 1 1 6.957497 6.957497 20093 +circa 1 1 6.957497 6.957497 20094 +transistor 1 1 6.957497 6.957497 20095 +availableon 1 1 6.957497 6.957497 20096 +getth 1 1 6.957497 6.957497 20097 +ofov 1 1 6.957497 6.957497 20098 +thenatur 1 1 6.957497 6.957497 20099 +numericappl 1 1 6.957497 6.957497 20100 +andcarri 1 1 6.957497 6.957497 20101 +assessth 1 1 6.957497 6.957497 20102 +vijaykumarrec 1 1 6.957497 6.957497 20103 +talkswil 1 1 6.957497 6.957497 20104 +researchcent 1 1 6.957497 6.957497 20105 +yorktown 1 1 6.957497 6.957497 20106 +framemak 1 1 6.957497 6.957497 20090 +publicationshigh 1 1 6.957497 6.957497 20107 +ofdetail 1 1 6.957497 6.957497 20108 +resultsi 1 1 6.957497 6.957497 20109 +latencyt 1 1 6.957497 6.957497 20110 +processorsj 1 1 6.957497 6.957497 20111 +referencesm 1 1 6.957497 6.957497 20112 +communicationin 1 1 6.957497 6.957497 20113 +errorst 1 1 6.957497 6.957497 20114 +processorsd 1 1 6.957497 6.957497 20115 +knapsack 1 1 6.957497 6.957497 20116 +componentt 1 1 6.957497 6.957497 20117 +processorst 1 1 6.957497 6.957497 20118 +gradstodd 1 1 6.957497 6.957497 20119 +latencydionisio 1 1 6.957497 6.957497 20120 +setsmanoj 1 1 6.957497 6.957497 20121 +architecturemark 1 1 6.957497 6.957497 20122 +executionsriram 1 1 6.957497 6.957497 20123 +vajapeyam 1 1 6.957497 6.957497 20124 +processormen 1 1 6.957497 6.957497 20125 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..28046ee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +graphic 1 90 2.397895 2.397895 147 +west 1 83 2.484907 2.484907 192 +environ 1 84 2.484907 2.484907 177 +larg 1 82 2.484907 2.484907 168 +orient 1 80 2.564949 2.564949 205 +june 1 79 2.564949 2.564949 214 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +interfac 1 79 2.564949 2.564949 209 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +david 1 71 2.639057 2.639057 232 +workshop 1 71 2.639057 2.639057 239 +logic 1 71 2.639057 2.639057 230 +java 1 70 2.708050 2.708050 248 +street 1 63 2.772589 2.772589 293 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +space 1 57 2.890372 2.890372 310 +overview 1 56 2.890372 2.890372 323 +point 1 58 2.890372 2.890372 319 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +paul 1 38 3.295837 3.295837 471 +michael 1 35 3.401197 3.401197 514 +independ 1 32 3.465736 3.465736 548 +chair 1 29 3.583519 3.583519 596 +built 1 29 3.583519 3.583519 592 +univ 1 28 3.610918 3.610918 617 +proc 1 26 3.688879 3.688879 649 +todai 1 25 3.737670 3.737670 672 +inth 1 22 3.850148 3.850148 741 +sigmod 1 19 4.007333 4.007333 877 +former 1 17 4.110874 4.110874 956 +white 1 17 4.110874 4.110874 951 +adam 1 17 4.110874 4.110874 934 +fourth 1 16 4.174387 4.174387 999 +dilbert 1 16 4.174387 4.174387 996 +configur 1 15 4.248495 4.248495 1012 +conf 1 13 4.382027 4.382027 1181 +dewitt 1 12 4.465908 4.465908 1270 +nanci 1 12 4.465908 4.465908 1256 +daniel 1 12 4.465908 4.465908 1233 +persist 1 11 4.553877 4.553877 1367 +shore 1 11 4.553877 4.553877 1377 +vldb 1 10 4.653960 4.653960 1470 +franklin 1 10 4.653960 4.653960 1436 +naughton 1 10 4.653960 4.653960 1450 +jeffrei 1 9 4.753590 4.753590 1612 +solomon 1 8 4.875197 4.875197 1716 +carei 1 8 4.875197 4.875197 1781 +ioannidi 1 8 4.875197 4.875197 1714 +goodman 1 7 5.010635 5.010635 1891 +tsatalo 1 5 5.347108 5.347108 2581 +marvin 1 4 5.568345 5.568345 2806 +mcauliff 1 4 5.568345 5.568345 3083 +schuh 1 3 5.857933 5.857933 4014 +gmap 1 2 6.263398 6.263398 6241 +versatil 1 2 6.263398 6.263398 6242 +seth 1 2 6.263398 6.263398 4998 +publicationstoward 1 1 6.957497 6.957497 20129 +andmarvin 1 1 6.957497 6.957497 20126 +abstractpostscriptth 1 1 6.957497 6.957497 20130 +odyssea 1 1 6.957497 6.957497 20128 +andyanni 1 1 6.957497 6.957497 20131 +abstractpostscriptexpand 1 1 6.957497 6.957497 20132 +journalv 1 1 6.957497 6.957497 20133 +abstractpostscriptshor 1 1 6.957497 6.957497 20134 +andmichael 1 1 6.957497 6.957497 20135 +zwillingavail 1 1 6.957497 6.957497 20136 +astech 1 1 6.957497 6.957497 20127 +capitl 1 1 6.957497 6.957497 20137 +photoalbum 1 1 6.957497 6.957497 20138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..440bfe60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +welcom 1 122 2.079442 2.079442 99 +sowmya 1 4 5.568345 5.568345 2670 +subramanian 1 2 6.263398 6.263398 5666 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..21257f50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +relat 1 139 1.945910 1.945910 68 +first 1 140 1.945910 1.945910 71 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +send 1 114 2.197225 2.197225 109 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +info 1 85 2.484907 2.484907 176 +second 1 81 2.484907 2.484907 166 +stuff 1 87 2.484907 2.484907 171 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +june 1 79 2.564949 2.564949 214 +html 1 75 2.639057 2.639057 235 +meet 1 72 2.639057 2.639057 229 +java 1 70 2.708050 2.708050 248 +juli 1 60 2.833213 2.833213 305 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +finger 1 52 2.995732 2.995732 354 +cool 1 49 3.044522 3.044522 374 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +decis 1 23 3.806662 3.806662 728 +love 1 21 3.912023 3.912023 804 +women 1 16 4.174387 4.174387 1004 +ascii 1 15 4.248495 4.248495 1032 +philadelphia 1 12 4.465908 4.465908 1244 +guest 1 12 4.465908 4.465908 1220 +pagei 1 8 4.875197 4.875197 1683 +judg 1 8 4.875197 4.875197 1644 +constitut 1 6 5.164786 5.164786 2026 +sail 1 5 5.347108 5.347108 2571 +panel 1 5 5.347108 5.347108 2463 +vote 1 4 5.568345 5.568345 2953 +lawand 1 2 6.263398 6.263398 6191 +stuffa 1 2 6.263398 6.263398 5999 +resours 1 2 6.263398 6.263398 5211 +serverth 1 2 6.263398 6.263398 4448 +hoofer 1 2 6.263398 6.263398 6101 +shilpa 1 1 6.957497 6.957497 20157 +pastfor 1 1 6.957497 6.957497 20158 +schoolher 1 1 6.957497 6.957497 20159 +syster 1 1 6.957497 6.957497 20160 +madisonsurf 1 1 6.957497 6.957497 20161 +madisonst 1 1 6.957497 6.957497 20162 +clubowl 1 1 6.957497 6.957497 20163 +signatur 1 1 6.957497 6.957497 20164 +lovesnowi 1 1 6.957497 6.957497 20165 +linksher 1 1 6.957497 6.957497 20166 +iswher 1 1 6.957497 6.957497 20167 +tossl 1 1 6.957497 6.957497 20168 +shilpal 1 1 6.957497 6.957497 20169 +thru 1 1 6.957497 6.957497 20170 +formlast 1 1 6.957497 6.957497 20171 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..6ab2e553 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +teach 1 108 2.197225 2.197225 112 +take 1 97 2.302585 2.302585 134 +section 1 94 2.397895 2.397895 149 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +sport 1 25 3.737670 3.737670 683 +pageth 1 7 5.010635 5.010635 1939 +jeremi 1 5 5.347108 5.347108 2360 +simpson 1 2 6.263398 6.263398 5994 +stenglein 1 1 6.957497 6.957497 20172 +stenglei 1 1 6.957497 6.957497 20173 +pageespn 1 1 6.957497 6.957497 20174 +hotwir 1 1 6.957497 6.957497 20175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..f28dadf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +west 1 83 2.484907 2.484907 192 +june 1 79 2.564949 2.564949 214 +david 1 71 2.639057 2.639057 232 +free 1 73 2.639057 2.639057 224 +summari 1 73 2.639057 2.639057 237 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +faculti 1 56 2.890372 2.890372 325 +advisor 1 51 2.995732 2.995732 355 +finger 1 52 2.995732 2.995732 354 +mark 1 44 3.135494 3.135494 403 +join 1 39 3.258097 3.258097 457 +streetmadison 1 38 3.295837 3.295837 474 +feel 1 37 3.332205 3.332205 483 +sciencesunivers 1 37 3.332205 3.332205 486 +ofth 1 36 3.367296 3.367296 491 +often 1 31 3.496508 3.496508 551 +steve 1 29 3.583519 3.583519 594 +although 1 25 3.737670 3.737670 667 +hill 1 25 3.737670 3.737670 670 +finish 1 22 3.850148 3.850148 748 +tell 1 21 3.912023 3.912023 777 +wind 1 18 4.060443 4.060443 908 +steven 1 17 4.110874 4.110874 953 +wood 1 11 4.553877 4.553877 1355 +michigan 1 11 4.553877 4.553877 1368 +laru 1 9 4.753590 4.753590 1560 +reinhardt 1 5 5.347108 5.347108 2583 +computerarchitectur 1 5 5.347108 5.347108 2290 +publicationsresearch 1 4 5.568345 5.568345 2876 +eec 1 2 6.263398 6.263398 5981 +tunnelgroup 1 1 6.957497 6.957497 20176 +andjim 1 1 6.957497 6.957497 20177 +mewhat 1 1 6.957497 6.957497 20178 +stever 1 1 6.957497 6.957497 20179 +usalast 1 1 6.957497 6.957497 20180 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..20db9171 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +teach 1 108 2.197225 2.197225 112 +search 1 95 2.397895 2.397895 155 +west 1 83 2.484907 2.484907 192 +exam 1 86 2.484907 2.484907 169 +stuff 1 87 2.484907 2.484907 171 +solv 1 73 2.639057 2.639057 234 +nation 1 74 2.639057 2.639057 240 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +talk 1 53 2.944439 2.944439 336 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +show 1 43 3.178054 3.178054 417 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +field 1 37 3.332205 3.332205 482 +john 1 33 3.433987 3.433987 532 +begin 1 23 3.806662 3.806662 716 +rate 1 15 4.248495 4.248495 1037 +qualifi 1 8 4.875197 4.875197 1721 +chicago 1 6 5.164786 5.164786 2149 +fluid 1 5 5.347108 5.347108 2440 +kid 1 5 5.347108 5.347108 2516 +radio 1 4 5.568345 5.568345 3025 +car 1 4 5.568345 5.568345 2931 +nathan 1 4 5.568345 5.568345 2794 +drew 1 4 5.568345 5.568345 2980 +museum 1 3 5.857933 5.857933 3933 +pageoth 1 2 6.263398 6.263398 6104 +strikwerda 1 1 6.957497 6.957497 20181 +strikwerdadepart 1 1 6.957497 6.957497 20182 +strik 1 1 6.957497 6.957497 20183 +dynamicsmyoffici 1 1 6.957497 6.957497 20184 +tribun 1 1 6.957497 6.957497 20185 +footballmi 1 1 6.957497 6.957497 20186 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..58d610b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +thing 1 84 2.484907 2.484907 189 +david 1 71 2.639057 2.639057 232 +evalu 1 64 2.772589 2.772589 266 +processor 1 54 2.944439 2.944439 335 +execut 1 45 3.135494 3.135494 404 +show 1 43 3.178054 3.178054 417 +cach 1 41 3.218876 3.218876 432 +late 1 40 3.258097 3.258097 439 +enjoi 1 26 3.688879 3.688879 660 +cambridg 1 16 4.174387 4.174387 1008 +stream 1 15 4.248495 4.248495 1015 +buffer 1 12 4.465908 4.465908 1211 +calvin 1 9 4.753590 4.753590 1518 +replac 1 8 4.875197 4.875197 1668 +integ 1 8 4.875197 4.875197 1688 +secondari 1 7 5.010635 5.010635 1884 +histor 1 6 5.164786 5.164786 2085 +superscalar 1 6 5.164786 5.164786 2082 +hobb 1 4 5.568345 5.568345 2893 +decoupl 1 4 5.568345 5.568345 2898 +letterman 1 3 5.857933 5.857933 3931 +seinfeld 1 3 5.857933 5.857933 3958 +subba 1 2 6.263398 6.263398 6091 +subbarao 1 2 6.263398 6.263398 6205 +prooocessor 1 1 6.957497 6.957497 20187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..6addcd63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +schedul 1 119 2.079442 2.079442 85 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +search 1 95 2.397895 2.397895 155 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +complet 1 77 2.564949 2.564949 208 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +sourc 1 77 2.564949 2.564949 201 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +polici 1 64 2.772589 2.772589 279 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +guid 1 63 2.772589 2.772589 267 +processor 1 54 2.944439 2.944439 335 +still 1 50 3.044522 3.044522 362 +bibliographi 1 34 3.401197 3.401197 518 +limit 1 29 3.583519 3.583519 585 +measur 1 28 3.610918 3.610918 609 +static 1 27 3.637586 3.637586 619 +proc 1 26 3.688879 3.688879 649 +subject 1 26 3.688879 3.688879 647 +strategi 1 25 3.737670 3.737670 682 +yahoo 1 24 3.761200 3.761200 707 +alloc 1 20 3.951244 3.951244 821 +thur 1 19 4.007333 4.007333 847 +taiwan 1 16 4.174387 4.174387 1006 +sigmetr 1 13 4.382027 4.382027 1173 +conf 1 13 4.382027 4.382027 1181 +characterist 1 12 4.465908 4.465908 1257 +mari 1 12 4.465908 4.465908 1266 +shop 1 10 4.653960 4.653960 1469 +vernon 1 9 4.753590 4.753590 1556 +job 1 8 4.875197 4.875197 1702 +calendar 1 8 4.875197 4.875197 1649 +chiang 1 7 5.010635 5.010635 1853 +quantum 1 6 5.164786 5.164786 2214 +academia 1 6 5.164786 5.164786 2036 +ta 1 4 5.568345 5.568345 3058 +nashvil 1 4 5.568345 5.568345 2867 +sinanet 1 4 5.568345 5.568345 2883 +suhui 1 3 5.857933 5.857933 3430 +educlick 1 3 5.857933 5.857933 3612 +rajesh 1 3 5.857933 5.857933 3511 +conjunct 1 3 5.857933 5.857933 3743 +ipp 1 3 5.857933 5.857933 3381 +sinica 1 3 5.857933 5.857933 3819 +preemption 1 2 6.263398 6.263398 6230 +mansharamani 1 2 6.263398 6.263398 6231 +catalogu 1 2 6.263398 6.263398 6166 +magzin 1 2 6.263398 6.263398 5614 +madisonoffic 1 1 6.957497 6.957497 20188 +stelephon 1 1 6.957497 6.957497 20189 +emailoffic 1 1 6.957497 6.957497 20190 +seednet 1 1 6.957497 6.957497 20191 +vistor 1 1 6.957497 6.957497 20192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..946dd8a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,193 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +recent 1 167 1.791759 1.791759 58 +madison 1 165 1.791759 1.791759 55 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +homepag 1 93 2.397895 2.397895 148 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +select 1 91 2.397895 2.397895 154 +follow 1 92 2.397895 2.397895 143 +institut 1 84 2.484907 2.484907 187 +info 1 85 2.484907 2.484907 176 +contain 1 81 2.484907 2.484907 174 +sourc 1 77 2.564949 2.564949 201 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +addit 1 74 2.639057 2.639057 228 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +view 1 70 2.708050 2.708050 254 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +foundat 1 62 2.772589 2.772589 286 +new 1 64 2.772589 2.772589 262 +written 1 63 2.772589 2.772589 278 +back 1 60 2.833213 2.833213 297 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +scientif 1 53 2.944439 2.944439 341 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +numer 1 49 3.044522 3.044522 369 +physic 1 47 3.091042 3.091042 377 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +show 1 43 3.178054 3.178054 417 +edit 1 42 3.218876 3.218876 418 +review 1 42 3.218876 3.218876 425 +realli 1 40 3.258097 3.258097 444 +late 1 40 3.258097 3.258097 439 +movi 1 40 3.258097 3.258097 459 +hand 1 37 3.332205 3.332205 475 +print 1 34 3.401197 3.401197 503 +articl 1 33 3.433987 3.433987 530 +obtain 1 33 3.433987 3.433987 534 +within 1 33 3.433987 3.433987 525 +india 1 32 3.465736 3.465736 550 +scientist 1 31 3.496508 3.496508 560 +steve 1 29 3.583519 3.583519 594 +weather 1 28 3.610918 3.610918 618 +american 1 27 3.637586 3.637586 634 +great 1 27 3.637586 3.637586 626 +william 1 22 3.850148 3.850148 765 +tell 1 21 3.912023 3.912023 777 +ever 1 19 4.007333 4.007333 872 +hypertext 1 19 4.007333 4.007333 865 +histori 1 19 4.007333 4.007333 853 +thoma 1 18 4.060443 4.060443 901 +regist 1 17 4.110874 4.110874 938 +weekli 1 17 4.110874 4.110874 919 +upon 1 16 4.174387 4.174387 978 +choos 1 16 4.174387 4.174387 964 +brief 1 16 4.174387 4.174387 1001 +dilbert 1 16 4.174387 4.174387 996 +advantag 1 16 4.174387 4.174387 987 +piec 1 15 4.248495 4.248495 1020 +floor 1 14 4.317488 4.317488 1070 +camera 1 14 4.317488 4.317488 1115 +care 1 13 4.382027 4.382027 1177 +wait 1 13 4.382027 4.382027 1168 +reader 1 12 4.465908 4.465908 1246 +outsid 1 12 4.465908 4.465908 1219 +newspap 1 12 4.465908 4.465908 1280 +smart 1 11 4.553877 4.553877 1352 +see 1 11 4.553877 4.553877 1337 +fill 1 11 4.553877 4.553877 1349 +santa 1 10 4.653960 4.653960 1441 +jump 1 9 4.753590 4.753590 1603 +joke 1 8 4.875197 4.875197 1620 +dedic 1 7 5.010635 5.010635 1843 +philosoph 1 7 5.010635 5.010635 1904 +centuri 1 7 5.010635 5.010635 1935 +smaller 1 7 5.010635 5.010635 1874 +channel 1 7 5.010635 5.010635 1836 +tri 1 6 5.164786 5.164786 2166 +strip 1 6 5.164786 5.164786 2203 +somewher 1 6 5.164786 5.164786 2176 +artist 1 6 5.164786 5.164786 2127 +feet 1 5 5.347108 5.347108 2492 +optimist 1 5 5.347108 5.347108 2501 +push 1 4 5.568345 5.568345 2635 +climb 1 4 5.568345 5.568345 2936 +surviv 1 4 5.568345 5.568345 2734 +writer 1 4 5.568345 5.568345 2783 +observatori 1 4 5.568345 5.568345 3070 +ultra 1 4 5.568345 5.568345 2889 +sundaram 1 3 5.857933 5.857933 3463 +blind 1 3 5.857933 5.857933 3662 +scream 1 3 5.857933 5.857933 3609 +knee 1 3 5.857933 5.857933 3980 +woman 1 3 5.857933 5.857933 3539 +lunch 1 3 5.857933 5.857933 3369 +beat 1 3 5.857933 5.857933 3840 +packer 1 3 5.857933 5.857933 3728 +dozen 1 3 5.857933 5.857933 3905 +hindu 1 3 5.857933 5.857933 3590 +astronomi 1 3 5.857933 5.857933 3974 +stukel 1 2 6.263398 6.263398 4698 +patient 1 2 6.263398 6.263398 6223 +spurt 1 2 6.263398 6.263398 5464 +crawl 1 2 6.263398 6.263398 5561 +damn 1 2 6.263398 6.263398 6129 +nake 1 2 6.263398 6.263398 6197 +cloth 1 2 6.263398 6.263398 5884 +conscious 1 2 6.263398 6.263398 5954 +destruct 1 2 6.263398 6.263398 6232 +phoenix 1 2 6.263398 6.263398 4552 +reward 1 2 6.263398 6.263398 5402 +disinform 1 2 6.263398 6.263398 5494 +trendi 1 2 6.263398 6.263398 4717 +effronteri 1 1 6.957497 6.957497 20194 +femor 1 1 6.957497 6.957497 20195 +arteri 1 1 6.957497 6.957497 20196 +blood 1 1 6.957497 6.957497 20197 +anesthetist 1 1 6.957497 6.957497 20198 +browbeck 1 1 6.957497 6.957497 20193 +groin 1 1 6.957497 6.957497 20199 +hamstr 1 1 6.957497 6.957497 20200 +scalpel 1 1 6.957497 6.957497 20201 +stab 1 1 6.957497 6.957497 20202 +leg 1 1 6.957497 6.957497 20203 +voilet 1 1 6.957497 6.957497 20204 +baboon 1 1 6.957497 6.957497 20205 +wig 1 1 6.957497 6.957497 20206 +pois 1 1 6.957497 6.957497 20207 +stomp 1 1 6.957497 6.957497 20208 +cop 1 1 6.957497 6.957497 20209 +rush 1 1 6.957497 6.957497 20210 +burrough 1 1 6.957497 6.957497 20211 +catapult 1 1 6.957497 6.957497 20212 +mann 1 1 6.957497 6.957497 20213 +wearabl 1 1 6.957497 6.957497 20214 +tierra 1 1 6.957497 6.957497 20215 +arcosanti 1 1 6.957497 6.957497 20216 +arcolog 1 1 6.957497 6.957497 20217 +krishnamurti 1 1 6.957497 6.957497 20218 +harass 1 1 6.957497 6.957497 20219 +factoid 1 1 6.957497 6.957497 20220 +astound 1 1 6.957497 6.957497 20221 +onion 1 1 6.957497 6.957497 20222 +washburn 1 1 6.957497 6.957497 20223 +len 1 1 6.957497 6.957497 20224 +insignific 1 1 6.957497 6.957497 20225 +webweath 1 1 6.957497 6.957497 20226 +timothi 1 1 6.957497 6.957497 20227 +leari 1 1 6.957497 6.957497 20228 +noam 1 1 6.957497 6.957497 20229 +chomski 1 1 6.957497 6.957497 20230 +conspiraci 1 1 6.957497 6.957497 20231 +buri 1 1 6.957497 6.957497 20232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..565396ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +hour 1 165 1.791759 1.791759 46 +think 1 57 2.890372 2.890372 314 +mark 1 44 3.135494 3.135494 403 +brian 1 38 3.295837 3.295837 466 +bookmark 1 26 3.688879 3.688879 639 +pagebrian 1 4 5.568345 5.568345 3054 +swander 1 3 5.857933 5.857933 3440 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..26cc78da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +object 1 138 1.945910 1.945910 79 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +peopl 1 96 2.302585 2.302585 132 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +need 1 98 2.302585 2.302585 135 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +april 1 77 2.564949 2.564949 196 +free 1 73 2.639057 2.639057 224 +differ 1 66 2.708050 2.708050 253 +street 1 63 2.772589 2.772589 293 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +colleg 1 61 2.833213 2.833213 300 +share 1 59 2.833213 2.833213 304 +thesi 1 57 2.890372 2.890372 327 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +cool 1 49 3.044522 3.044522 374 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +concurr 1 34 3.401197 3.401197 501 +multiprocessor 1 28 3.610918 3.610918 605 +enhanc 1 26 3.688879 3.688879 644 +supercomput 1 25 3.737670 3.737670 681 +highli 1 23 3.806662 3.806662 725 +els 1 19 4.007333 4.007333 843 +interconnect 1 17 4.110874 4.110874 937 +countri 1 15 4.248495 4.248495 1059 +fortran 1 15 4.248495 4.248495 1027 +block 1 13 4.382027 4.382027 1183 +wait 1 13 4.382027 4.382027 1168 +asynchron 1 12 4.465908 4.465908 1229 +typic 1 11 4.553877 4.553877 1360 +pose 1 9 4.753590 4.753590 1535 +paradyn 1 9 4.753590 4.753590 1614 +joke 1 8 4.875197 4.875197 1620 +attach 1 7 5.010635 5.010635 1785 +park 1 6 5.164786 5.164786 2218 +risc 1 6 5.164786 5.164786 2016 +matur 1 5 5.347108 5.347108 2269 +hate 1 5 5.347108 5.347108 2529 +sorri 1 4 5.568345 5.568345 3059 +eventu 1 4 5.568345 5.568345 3074 +cleaner 1 3 5.857933 5.857933 3775 +raid 1 3 5.857933 5.857933 4012 +angri 1 2 6.263398 6.263398 5088 +snowboard 1 2 6.263398 6.263398 5084 +greet 1 2 6.263398 6.263398 5903 +dude 1 2 6.263398 6.263398 4977 +callaghan 1 2 6.263398 6.263398 6128 +stripe 1 2 6.263398 6.263398 6106 +ariel 1 1 6.957497 6.957497 20234 +tamchesari 1 1 6.957497 6.957497 20237 +tamch 1 1 6.957497 6.957497 20233 +assistantemail 1 1 6.957497 6.957497 20238 +posei 1 1 6.957497 6.957497 20239 +sresearch 1 1 6.957497 6.957497 20240 +toolsstatu 1 1 6.957497 6.957497 20241 +toolsparallel 1 1 6.957497 6.957497 20242 +systemsbluesth 1 1 6.957497 6.957497 20243 +simpsonsseinfeldskiingskinetkeyston 1 1 6.957497 6.957497 20244 +vacum 1 1 6.957497 6.957497 20245 +dirt 1 1 6.957497 6.957497 20246 +whoa 1 1 6.957497 6.957497 20247 +municip 1 1 6.957497 6.957497 20235 +bond 1 1 6.957497 6.957497 20236 +incom 1 1 6.957497 6.957497 20248 +yahooespncpu 1 1 6.957497 6.957497 20249 +infoskinetoth 1 1 6.957497 6.957497 20250 +exokernel 1 1 6.957497 6.957497 20251 +zebra 1 1 6.957497 6.957497 20252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..3bc97eaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +high 1 130 2.079442 2.079442 101 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +pictur 1 89 2.397895 2.397895 160 +school 1 84 2.484907 2.484907 188 +academ 1 82 2.484907 2.484907 178 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +resum 1 79 2.564949 2.564949 217 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +copi 1 63 2.772589 2.772589 284 +dept 1 64 2.772589 2.772589 291 +import 1 65 2.772589 2.772589 282 +juli 1 60 2.833213 2.833213 305 +think 1 57 2.890372 2.890372 314 +found 1 53 2.944439 2.944439 337 +still 1 50 3.044522 3.044522 362 +basic 1 50 3.044522 3.044522 360 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +least 1 35 3.401197 3.401197 516 +human 1 32 3.465736 3.465736 546 +turn 1 29 3.583519 3.583519 586 +jeff 1 25 3.737670 3.737670 673 +never 1 25 3.737670 3.737670 671 +wish 1 24 3.761200 3.761200 692 +miscellan 1 23 3.806662 3.806662 731 +voic 1 21 3.912023 3.912023 806 +newsgroup 1 21 3.912023 3.912023 783 +separ 1 19 4.007333 4.007333 844 +andrew 1 19 4.007333 4.007333 849 +coupl 1 17 4.110874 4.110874 939 +monitor 1 17 4.110874 4.110874 941 +seek 1 17 4.110874 4.110874 954 +took 1 16 4.174387 4.174387 1010 +sign 1 16 4.174387 4.174387 970 +choos 1 16 4.174387 4.174387 964 +hobbi 1 16 4.174387 4.174387 1009 +doesn 1 15 4.248495 4.248495 1055 +club 1 15 4.248495 4.248495 1058 +someon 1 13 4.382027 4.382027 1128 +night 1 11 4.553877 4.553877 1319 +song 1 11 4.553877 4.553877 1380 +sound 1 9 4.753590 4.753590 1605 +theme 1 8 4.875197 4.875197 1707 +henc 1 7 5.010635 5.010635 1805 +pace 1 6 5.164786 5.164786 2011 +plu 1 6 5.164786 5.164786 2004 +lampert 1 5 5.347108 5.347108 2398 +babi 1 5 5.347108 5.347108 2493 +heard 1 4 5.568345 5.568345 2895 +tick 1 4 5.568345 5.568345 2975 +fire 1 4 5.568345 5.568345 3001 +nota 1 3 5.857933 5.857933 3785 +crow 1 3 5.857933 5.857933 3845 +straight 1 3 5.857933 5.857933 3655 +forward 1 3 5.857933 5.857933 3784 +yearbook 1 2 6.263398 6.263398 6243 +cute 1 2 6.263398 6.263398 5108 +befound 1 2 6.263398 6.263398 5964 +sick 1 2 6.263398 6.263398 5773 +mstk 1 2 6.263398 6.263398 5501 +pagejeff 1 1 6.957497 6.957497 20253 +ricardo 1 1 6.957497 6.957497 20254 +montalban 1 1 6.957497 6.957497 20255 +foron 1 1 6.957497 6.957497 20256 +incrimin 1 1 6.957497 6.957497 20257 +aconvict 1 1 6.957497 6.957497 20258 +lasttim 1 1 6.957497 6.957497 20259 +threaten 1 1 6.957497 6.957497 20260 +intoa 1 1 6.957497 6.957497 20261 +dispens 1 1 6.957497 6.957497 20262 +anautograph 1 1 6.957497 6.957497 20263 +pictureappear 1 1 6.957497 6.957497 20264 +weasel 1 1 6.957497 6.957497 20265 +factswho 1 1 6.957497 6.957497 20266 +relatedwhat 1 1 6.957497 6.957497 20267 +entertainmentbook 1 1 6.957497 6.957497 20268 +subjectsfriendsno 1 1 6.957497 6.957497 20269 +organizationsgroup 1 1 6.957497 6.957497 20270 +inmi 1 1 6.957497 6.957497 20271 +linksugh 1 1 6.957497 6.957497 20272 +servo 1 1 6.957497 6.957497 20273 +eclect 1 1 6.957497 6.957497 20274 +paraphenaliai 1 1 6.957497 6.957497 20275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..bca0f2b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +area 1 144 1.945910 1.945910 80 +teach 1 108 2.197225 2.197225 112 +mathemat 1 108 2.197225 2.197225 123 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +todd 1 15 4.248495 4.248495 1051 +wealth 1 3 5.857933 5.857933 3353 +plug 1 2 6.263398 6.263398 5167 +tmunson 1 2 6.263398 6.263398 4809 +homepagetodd 1 1 6.957497 6.957497 20277 +homepagein 1 1 6.957497 6.957497 20278 +mathematicalprogram 1 1 6.957497 6.957497 20276 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..9d6b8c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +thing 1 84 2.484907 2.484907 189 +life 1 50 3.044522 3.044522 375 +brian 1 38 3.295837 3.295837 466 +streetmadison 1 38 3.295837 3.295837 474 +departmentunivers 1 24 3.761200 3.761200 711 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +chief 1 7 5.010635 5.010635 1829 +ground 1 7 5.010635 5.010635 1955 +pagebrian 1 4 5.568345 5.568345 3054 +bear 1 4 5.568345 5.568345 2651 +civil 1 3 5.857933 5.857933 3908 +medit 1 2 6.263398 6.263398 5777 +creatur 1 2 6.263398 6.263398 6107 +essenc 1 2 6.263398 6.263398 6150 +toonen 1 1 6.957497 6.957497 20279 +cswhatev 1 1 6.957497 6.957497 20280 +seattleth 1 1 6.957497 6.957497 20281 +tipi 1 1 6.957497 6.957497 20282 +itsmean 1 1 6.957497 6.957497 20283 +kinship 1 1 6.957497 6.957497 20284 +acknowledgingun 1 1 6.957497 6.957497 20285 +infus 1 1 6.957497 6.957497 20286 +thetru 1 1 6.957497 6.957497 20287 +luther 1 1 6.957497 6.957497 20288 +oglala 1 1 6.957497 6.957497 20289 +siouxlast 1 1 6.957497 6.957497 20290 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..6edda14e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +read 1 154 1.791759 1.791759 47 +site 1 106 2.197225 2.197225 119 +version 1 113 2.197225 2.197225 122 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +option 1 30 3.555348 3.555348 575 +enhanc 1 26 3.688879 3.688879 644 +latest 1 21 3.912023 3.912023 785 +thano 1 3 5.857933 5.857933 3424 +tsioli 1 3 5.857933 5.857933 3418 +shouldconsid 1 2 6.263398 6.263398 6061 +upgrad 1 1 6.957497 6.957497 20291 +ifthat 1 1 6.957497 6.957497 20292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..85df2c8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +section 1 94 2.397895 2.397895 149 +locat 1 59 2.833213 2.833213 303 +case 1 51 2.995732 2.995732 351 +keep 1 44 3.135494 3.135494 409 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +live 1 40 3.258097 3.258097 451 +origin 1 38 3.295837 3.295837 472 +computersci 1 30 3.555348 3.555348 562 +departmentunivers 1 24 3.761200 3.761200 711 +left 1 19 4.007333 4.007333 851 +todd 1 15 4.248495 4.248495 1051 +enough 1 15 4.248495 4.248495 1040 +awai 1 10 4.653960 4.653960 1447 +babylon 1 8 4.875197 4.875197 1731 +hold 1 8 4.875197 4.875197 1645 +judg 1 8 4.875197 4.875197 1644 +sciencesat 1 7 5.010635 5.010635 1968 +dougla 1 5 5.347108 5.347108 2471 +ohio 1 5 5.347108 5.347108 2447 +amus 1 5 5.347108 5.347108 2366 +turnidg 1 4 5.568345 5.568345 2829 +rep 1 4 5.568345 5.568345 3087 +western 1 4 5.568345 5.568345 3062 +kent 1 4 5.568345 5.568345 2744 +evid 1 4 5.568345 5.568345 2768 +shortcut 1 3 5.857933 5.857933 3932 +milton 1 2 6.263398 6.263398 6153 +axi 1 2 6.263398 6.263398 6069 +professorthoma 1 2 6.263398 6.263398 5053 +turnidgeschoolcomput 1 1 6.957497 6.957497 20293 +homemuppet 1 1 6.957497 6.957497 20294 +eyesightright 1 1 6.957497 6.957497 20295 +studyingprogram 1 1 6.957497 6.957497 20296 +mathematicsand 1 1 6.957497 6.957497 20297 +reserveunivers 1 1 6.957497 6.957497 20298 +cleveland 1 1 6.957497 6.957497 20299 +myfamili 1 1 6.957497 6.957497 20300 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..a34aa31d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +sinc 1 90 2.397895 2.397895 159 +june 1 79 2.564949 2.564949 214 +street 1 63 2.772589 2.772589 293 +visit 1 63 2.772589 2.772589 288 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +wang 1 21 3.912023 3.912023 790 +heavi 1 7 5.010635 5.010635 1841 +bldg 1 4 5.568345 5.568345 2983 +taxiao 1 2 6.263398 6.263398 4806 +twang 1 2 6.263398 6.263398 5730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..56197968 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +wisc 1 242 1.386294 1.386294 33 +start 1 83 2.484907 2.484907 173 +meet 1 72 2.639057 2.639057 229 +window 1 68 2.708050 2.708050 242 +microsoft 1 38 3.295837 3.295837 468 +trek 1 3 5.857933 5.857933 4025 +shaft 1 2 6.263398 6.263398 6186 +pageuri 1 1 6.957497 6.957497 20301 +pageemail 1 1 6.957497 6.957497 20302 +eduinterest 1 1 6.957497 6.957497 20303 +diversionsstart 1 1 6.957497 6.957497 20304 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..289ab836 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +look 1 107 2.197225 2.197225 115 +present 1 91 2.397895 2.397895 145 +homepag 1 93 2.397895 2.397895 148 +real 1 93 2.397895 2.397895 144 +info 1 85 2.484907 2.484907 176 +educ 1 86 2.484907 2.484907 191 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +onlin 1 75 2.639057 2.639057 223 +januari 1 62 2.772589 2.772589 264 +undergradu 1 54 2.944439 2.944439 338 +basic 1 50 3.044522 3.044522 360 +past 1 42 3.218876 3.218876 428 +india 1 32 3.465736 3.465736 550 +hope 1 28 3.610918 3.610918 610 +sometim 1 24 3.761200 3.761200 696 +earlier 1 13 4.382027 4.382027 1140 +usaphon 1 9 4.753590 4.753590 1600 +madra 1 8 4.875197 4.875197 1770 +nativ 1 6 5.164786 5.164786 2192 +asha 1 3 5.857933 5.857933 4037 +venkatesh 1 2 6.263398 6.263398 6154 +andhra 1 2 6.263398 6.263398 5571 +pradesh 1 2 6.263398 6.263398 5572 +yearbook 1 2 6.263398 6.263398 6243 +till 1 2 6.263398 6.263398 5850 +btech 1 2 6.263398 6.263398 6123 +genesi 1 2 6.263398 6.263398 6011 +ganti 1 1 6.957497 6.957497 20305 +pagevenkatesh 1 1 6.957497 6.957497 20307 +vganti 1 1 6.957497 6.957497 20308 +studentoffic 1 1 6.957497 6.957497 20309 +kakinada 1 1 6.957497 6.957497 20310 +godav 1 1 6.957497 6.957497 20306 +hostel 1 1 6.957497 6.957497 20311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..ecf16124 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +symposium 1 72 2.639057 2.639057 238 +august 1 66 2.708050 2.708050 257 +street 1 63 2.772589 2.772589 293 +undergradu 1 54 2.944439 2.944439 338 +processor 1 54 2.944439 2.944439 335 +profession 1 51 2.995732 2.995732 345 +advisor 1 51 2.995732 2.995732 355 +annual 1 40 3.258097 3.258097 458 +submit 1 39 3.258097 3.258097 440 +go 1 33 3.433987 3.433987 529 +india 1 32 3.465736 3.465736 550 +dissert 1 32 3.465736 3.465736 549 +depend 1 29 3.583519 3.583519 583 +strategi 1 25 3.737670 3.737670 682 +doctor 1 24 3.761200 3.761200 709 +predict 1 19 4.007333 4.007333 855 +regist 1 17 4.110874 4.110874 938 +micro 1 15 4.248495 4.248495 1031 +side 1 15 4.248495 4.248495 1022 +affili 1 13 4.382027 4.382027 1194 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 1 6 5.164786 5.164786 2237 +microarchitectur 1 6 5.164786 5.164786 2238 +guri 1 5 5.347108 5.347108 2578 +vijai 1 4 5.568345 5.568345 2960 +vijaykumar 1 3 5.857933 5.857933 4011 +anatomi 1 3 5.857933 5.857933 4010 +breach 1 3 5.857933 5.857933 4009 +birla 1 2 6.263398 6.263398 6239 +pilani 1 2 6.263398 6.263398 6240 +architecturet 1 1 6.957497 6.957497 20312 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..4d00d722 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +associ 1 93 2.397895 2.397895 151 +refer 1 78 2.564949 2.564949 203 +foundat 1 62 2.772589 2.772589 286 +laboratori 1 63 2.772589 2.772589 292 +telephon 1 50 3.044522 3.044522 373 +archiv 1 49 3.044522 3.044522 364 +physic 1 47 3.091042 3.091042 377 +theoret 1 39 3.258097 3.258097 446 +streetmadison 1 38 3.295837 3.295837 474 +approxim 1 35 3.401197 3.401197 509 +bibliographi 1 34 3.401197 3.401197 518 +random 1 34 3.401197 3.401197 511 +john 1 33 3.433987 3.433987 532 +proc 1 26 3.688879 3.688879 649 +departmentunivers 1 24 3.761200 3.761200 711 +daili 1 24 3.761200 3.761200 706 +theorem 1 21 3.912023 3.912023 786 +hypertext 1 19 4.007333 4.007333 865 +dimension 1 18 4.060443 4.060443 909 +element 1 18 4.060443 4.060443 895 +stanford 1 17 4.110874 4.110874 955 +fourth 1 16 4.174387 4.174387 999 +polynomi 1 14 4.317488 4.317488 1069 +automata 1 13 4.382027 4.382027 1135 +montreal 1 7 5.010635 5.010635 1961 +quantum 1 6 5.164786 5.164786 2214 +oxford 1 6 5.164786 5.164786 2121 +forecast 1 6 5.164786 5.164786 2171 +cellular 1 5 5.347108 5.347108 2433 +symp 1 5 5.347108 5.347108 2376 +particl 1 5 5.347108 5.347108 2436 +beam 1 5 5.347108 5.347108 2344 +ucla 1 5 5.347108 5.347108 2502 +webster 1 5 5.347108 5.347108 2468 +preprint 1 3 5.857933 5.857933 3481 +quotat 1 3 5.857933 5.857933 3121 +pagejohn 1 2 6.263398 6.263398 5603 +thesauru 1 2 6.263398 6.263398 6238 +isthmu 1 2 6.263398 6.263398 6152 +watrou 1 1 6.957497 6.957497 20313 +artin 1 1 6.957497 6.957497 20314 +whapl 1 1 6.957497 6.957497 20315 +canadiannumb 1 1 6.957497 6.957497 20316 +assort 1 1 6.957497 6.957497 20317 +lanl 1 1 6.957497 6.957497 20318 +hypatia 1 1 6.957497 6.957497 20319 +stylehypertext 1 1 6.957497 6.957497 20320 +interfaceroget 1 1 6.957497 6.957497 20321 +parasol 1 1 6.957497 6.957497 20322 +recordsplayst 1 1 6.957497 6.957497 20323 +linksweath 1 1 6.957497 6.957497 20324 +madisonth 1 1 6.957497 6.957497 20325 +pagemathemat 1 1 6.957497 6.957497 20326 +servermathematician 1 1 6.957497 6.957497 20327 +biographiesgeek 1 1 6.957497 6.957497 20328 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..1c9e8aa2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +david 1 71 2.639057 2.639057 232 +test 1 66 2.708050 2.708050 252 +guid 1 63 2.772589 2.772589 267 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +date 1 51 2.995732 2.995732 344 +friend 1 48 3.044522 3.044522 376 +possibl 1 47 3.091042 3.091042 378 +physic 1 47 3.091042 3.091042 377 +favorit 1 44 3.135494 3.135494 410 +around 1 43 3.178054 3.178054 415 +show 1 43 3.178054 3.178054 417 +review 1 42 3.218876 3.218876 425 +must 1 40 3.258097 3.258097 442 +littl 1 39 3.258097 3.258097 454 +late 1 40 3.258097 3.258097 439 +movi 1 40 3.258097 3.258097 459 +correct 1 38 3.295837 3.295837 462 +game 1 36 3.367296 3.367296 498 +least 1 35 3.401197 3.401197 516 +richard 1 31 3.496508 3.496508 559 +steve 1 29 3.583519 3.583519 594 +weather 1 28 3.610918 3.610918 618 +great 1 27 3.637586 3.637586 626 +sometim 1 24 3.761200 3.761200 696 +seri 1 24 3.761200 3.761200 708 +wish 1 24 3.761200 3.761200 692 +deal 1 22 3.850148 3.850148 736 +reduc 1 22 3.850148 3.850148 759 +alumni 1 21 3.912023 3.912023 807 +increas 1 20 3.951244 3.951244 829 +longer 1 20 3.951244 3.951244 816 +miss 1 19 4.007333 4.007333 866 +english 1 15 4.248495 4.248495 1033 +manner 1 14 4.317488 4.317488 1074 +someon 1 13 4.382027 4.382027 1128 +entertain 1 12 4.465908 4.465908 1286 +food 1 12 4.465908 4.465908 1285 +amount 1 12 4.465908 4.465908 1208 +player 1 11 4.553877 4.553877 1371 +recit 1 9 4.753590 4.753590 1475 +french 1 9 4.753590 4.753590 1511 +hockei 1 8 4.875197 4.875197 1760 +commit 1 6 5.164786 5.164786 2233 +affect 1 6 5.164786 5.164786 2044 +moder 1 6 5.164786 5.164786 2112 +postcard 1 6 5.164786 5.164786 2181 +forecast 1 6 5.164786 5.164786 2171 +speaker 1 5 5.347108 5.347108 2370 +ship 1 5 5.347108 5.347108 2534 +feet 1 5 5.347108 5.347108 2492 +circumst 1 5 5.347108 5.347108 2283 +peke 1 5 5.347108 5.347108 2539 +cell 1 5 5.347108 5.347108 2274 +lesson 1 5 5.347108 5.347108 2568 +insan 1 3 5.857933 5.857933 4006 +omit 1 3 5.857933 5.857933 3466 +letterman 1 3 5.857933 5.857933 3931 +truck 1 2 6.263398 6.263398 5713 +proportion 1 2 6.263398 6.263398 4091 +behaviour 1 2 6.263398 6.263398 4724 +studio 1 2 6.263398 6.263398 5838 +francais 1 2 6.263398 6.263398 6020 +weiru 1 1 6.957497 6.957497 20329 +eiru 1 1 6.957497 6.957497 20330 +ppppleas 1 1 6.957497 6.957497 20331 +asylum 1 1 6.957497 6.957497 20332 +verbal 1 1 6.957497 6.957497 20333 +cargo 1 1 6.957497 6.957497 20334 +havenos 1 1 6.957497 6.957497 20335 +smell 1 1 6.957497 6.957497 20336 +leder 1 1 6.957497 6.957497 20337 +beoffer 1 1 6.957497 6.957497 20338 +customari 1 1 6.957497 6.957497 20339 +begina 1 1 6.957497 6.957497 20340 +amountof 1 1 6.957497 6.957497 20341 +merest 1 1 6.957497 6.957497 20342 +ofaffect 1 1 6.957497 6.957497 20343 +excruciatingli 1 1 6.957497 6.957497 20344 +atmadison 1 1 6.957497 6.957497 20345 +grei 1 1 6.957497 6.957497 20346 +francai 1 1 6.957497 6.957497 20347 +dictionnairefrancai 1 1 6.957497 6.957497 20348 +anglai 1 1 6.957497 6.957497 20349 +dictionnair 1 1 6.957497 6.957497 20350 +relatif 1 1 6.957497 6.957497 20351 +lafrancophoni 1 1 6.957497 6.957497 20352 +degrammair 1 1 6.957497 6.957497 20353 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..24e207ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +peopl 1 96 2.302585 2.302585 132 +memori 1 101 2.302585 2.302585 139 +center 1 88 2.397895 2.397895 158 +educ 1 86 2.484907 2.484907 191 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +state 1 76 2.564949 2.564949 207 +differ 1 66 2.708050 2.708050 253 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +variou 1 56 2.890372 2.890372 317 +tabl 1 51 2.995732 2.995732 346 +without 1 50 3.044522 3.044522 370 +california 1 46 3.091042 3.091042 388 +china 1 37 3.332205 3.332205 487 +game 1 36 3.367296 3.367296 498 +within 1 33 3.433987 3.433987 525 +platform 1 29 3.583519 3.583519 591 +quot 1 29 3.583519 3.583519 582 +framework 1 28 3.610918 3.610918 606 +mine 1 26 3.688879 3.688879 654 +corpor 1 21 3.912023 3.912023 802 +tenni 1 20 3.951244 3.951244 838 +expert 1 20 3.951244 3.951244 833 +beij 1 19 4.007333 4.007333 876 +ultim 1 17 4.110874 4.110874 943 +zhang 1 16 4.174387 4.174387 980 +jose 1 16 4.174387 4.174387 976 +tsinghua 1 13 4.382027 4.382027 1195 +nasa 1 13 4.382027 4.382027 1188 +solari 1 12 4.465908 4.465908 1238 +card 1 10 4.653960 4.653960 1435 +cheat 1 10 4.653960 4.653960 1395 +puzzl 1 5 5.347108 5.347108 2507 +shanghai 1 4 5.568345 5.568345 2925 +republ 1 4 5.568345 5.568345 3032 +hometown 1 3 5.857933 5.857933 3817 +am 1 3 5.857933 5.857933 3386 +weiz 1 2 6.263398 6.263398 4693 +eduwork 1 2 6.263398 6.263398 5813 +windowsnt 1 2 6.263398 6.263398 5440 +tandem 1 2 6.263398 6.263398 5027 +chinaemail 1 1 6.957497 6.957497 20354 +experiencecontractor 1 1 6.957497 6.957497 20355 +tuxedo 1 1 6.957497 6.957497 20356 +pathwai 1 1 6.957497 6.957497 20357 +sherpa 1 1 6.957497 6.957497 20358 +hobbiesma 1 1 6.957497 6.957497 20359 +jiangbridg 1 1 6.957497 6.957497 20360 +pingpong 1 1 6.957497 6.957497 20361 +joggingth 1 1 6.957497 6.957497 20362 +challengesolv 1 1 6.957497 6.957497 20363 +sweeper 1 1 6.957497 6.957497 20364 +dayth 1 1 6.957497 6.957497 20365 +ackowledgementthi 1 1 6.957497 6.957497 20366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..f0cea3fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +madison 1 165 1.791759 1.791759 55 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +provid 1 121 2.079442 2.079442 94 +need 1 98 2.302585 2.302585 135 +peopl 1 96 2.302585 2.302585 132 +pictur 1 89 2.397895 2.397895 160 +west 1 83 2.484907 2.484907 192 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +main 1 67 2.708050 2.708050 256 +explor 1 58 2.890372 2.890372 324 +still 1 50 3.044522 3.044522 362 +telephon 1 50 3.044522 3.044522 373 +streetmadison 1 38 3.295837 3.295837 474 +cluster 1 28 3.610918 3.610918 612 +departmentunivers 1 24 3.761200 3.761200 711 +definit 1 19 4.007333 4.007333 864 +miron 1 14 4.317488 4.317488 1110 +dbm 1 13 4.382027 4.382027 1136 +scan 1 12 4.465908 4.465908 1243 +devis 1 10 4.653960 4.653960 1451 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +wouldn 1 7 5.010635 5.010635 1970 +kent 1 4 5.568345 5.568345 2744 +edufing 1 4 5.568345 5.568345 2713 +wenger 1 3 5.857933 5.857933 4023 +agre 1 3 5.857933 5.857933 4007 +groupuw 1 3 5.857933 5.857933 3934 +preparedfor 1 2 6.263398 6.263398 5886 +workth 1 2 6.263398 6.263398 6137 +andvisu 1 2 6.263398 6.263398 6189 +pothol 1 1 6.957497 6.957497 20367 +wengerassoci 1 1 6.957497 6.957497 20368 +researchercomput 1 1 6.957497 6.957497 20369 +arecod 1 1 6.957497 6.957497 20370 +anddevis 1 1 6.957497 6.957497 20371 +acronym 1 1 6.957497 6.957497 20372 +importantpart 1 1 6.957497 6.957497 20373 +visualizationproduc 1 1 6.957497 6.957497 20374 +livnyraghu 1 1 6.957497 6.957497 20375 +ramakrishnanmor 1 1 6.957497 6.957497 20376 +pagewiscinfo 1 1 6.957497 6.957497 20377 +personallinksimageslast 1 1 6.957497 6.957497 20378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..b5a2a5ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +level 1 87 2.484907 2.484907 180 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +wide 1 84 2.484907 2.484907 185 +refer 1 78 2.564949 2.564949 203 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +logic 1 71 2.639057 2.639057 230 +name 1 72 2.639057 2.639057 220 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +space 1 57 2.890372 2.890372 310 +think 1 57 2.890372 2.890372 314 +overview 1 56 2.890372 2.890372 323 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +hardwar 1 51 2.995732 2.995732 350 +week 1 52 2.995732 2.995732 343 +approach 1 48 3.044522 3.044522 366 +adapt 1 46 3.091042 3.091042 387 +mark 1 44 3.135494 3.135494 403 +mechan 1 43 3.178054 3.178054 416 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +programm 1 39 3.258097 3.258097 445 +slide 1 38 3.295837 3.295837 467 +origin 1 38 3.295837 3.295837 472 +bibliographi 1 34 3.401197 3.401197 518 +articl 1 33 3.433987 3.433987 530 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +propos 1 28 3.610918 3.610918 602 +pass 1 28 3.610918 3.610918 611 +cluster 1 28 3.610918 3.610918 612 +hill 1 25 3.737670 3.737670 670 +annot 1 21 3.912023 3.912023 775 +fund 1 21 3.912023 3.912023 805 +wind 1 18 4.060443 4.060443 908 +four 1 18 4.060443 4.060443 905 +seek 1 17 4.110874 4.110874 954 +massiv 1 15 4.248495 4.248495 1026 +hybrid 1 15 4.248495 4.248495 1057 +node 1 11 4.553877 4.553877 1326 +fpga 1 10 4.653960 4.653960 1433 +tunnel 1 9 4.753590 4.753590 1615 +paradyn 1 9 4.753590 4.753590 1614 +consensu 1 6 5.164786 5.164786 2080 +middl 1 5 5.347108 5.347108 2372 +tempest 1 5 5.347108 5.347108 2548 +hypothet 1 5 5.347108 5.347108 2474 +departmentat 1 5 5.347108 5.347108 2513 +aboutth 1 4 5.568345 5.568345 2720 +ofworkst 1 4 5.568345 5.568345 2679 +markhil 1 4 5.568345 5.568345 2819 +parallellanguag 1 3 5.857933 5.857933 4026 +evolutionari 1 3 5.857933 5.857933 3898 +uniformli 1 2 6.263398 6.263398 6202 +havebeen 1 2 6.263398 6.263398 5830 +snoop 1 2 6.263398 6.263398 5364 +contributor 1 2 6.263398 6.263398 6214 +projectmost 1 1 6.957497 6.957497 20379 +fromworkst 1 1 6.957497 6.957497 20380 +whichprocess 1 1 6.957497 6.957497 20381 +abovesystem 1 1 6.957497 6.957497 20382 +wascoop 1 1 6.957497 6.957497 20383 +toconvent 1 1 6.957497 6.957497 20384 +revolutionari 1 1 6.957497 6.957497 20385 +andprogram 1 1 6.957497 6.957497 20386 +transparentshar 1 1 6.957497 6.957497 20387 +developingimplement 1 1 6.957497 6.957497 20388 +wisconsincow 1 1 6.957497 6.957497 20389 +cowus 1 1 6.957497 6.957497 20390 +sram 1 1 6.957497 6.957497 20391 +collaboratingwith 1 1 6.957497 6.957497 20392 +overviewand 1 1 6.957497 6.957497 20393 +pageor 1 1 6.957497 6.957497 20394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..1af65554 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +like 1 132 1.945910 1.945910 81 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +thing 1 84 2.484907 2.484907 189 +appear 1 78 2.564949 2.564949 210 +creat 1 63 2.772589 2.772589 277 +back 1 60 2.833213 2.833213 297 +finger 1 52 2.995732 2.995732 354 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +could 1 46 3.091042 3.091042 383 +obtain 1 33 3.433987 3.433987 534 +product 1 33 3.433987 3.433987 527 +human 1 32 3.465736 3.465736 546 +anim 1 31 3.496508 3.496508 557 +pass 1 28 3.610918 3.610918 611 +team 1 27 3.637586 3.637586 625 +seri 1 24 3.761200 3.761200 708 +wish 1 24 3.761200 3.761200 692 +instead 1 22 3.850148 3.850148 756 +spend 1 19 4.007333 4.007333 850 +statu 1 18 4.060443 4.060443 885 +seem 1 18 4.060443 4.060443 899 +account 1 18 4.060443 4.060443 882 +whether 1 17 4.110874 4.110874 918 +across 1 16 4.174387 4.174387 974 +charact 1 15 4.248495 4.248495 1028 +save 1 14 4.317488 4.317488 1099 +shown 1 14 4.317488 4.317488 1080 +comic 1 14 4.317488 4.317488 1103 +charl 1 13 4.382027 4.382027 1149 +newspap 1 12 4.465908 4.465908 1280 +remov 1 12 4.465908 4.465908 1225 +magic 1 11 4.553877 4.553877 1358 +pick 1 9 4.753590 4.753590 1498 +didn 1 9 4.753590 4.753590 1563 +hundr 1 9 4.753590 4.753590 1528 +star 1 8 4.875197 4.875197 1717 +film 1 8 4.875197 4.875197 1761 +successfulli 1 7 5.010635 5.010635 1869 +famou 1 6 5.164786 5.164786 2185 +televis 1 6 5.164786 5.164786 2118 +strip 1 6 5.164786 5.164786 2203 +put 1 6 5.164786 5.164786 2017 +chat 1 6 5.164786 5.164786 2128 +keeper 1 5 5.347108 5.347108 2569 +accompani 1 4 5.568345 5.568345 2666 +transmit 1 4 5.568345 5.568345 2835 +somehow 1 4 5.568345 5.568345 2974 +trick 1 4 5.568345 5.568345 2967 +keyboard 1 4 5.568345 5.568345 2970 +rival 1 3 5.857933 5.857933 3583 +agre 1 3 5.857933 5.857933 4007 +advertis 1 3 5.857933 5.857933 3788 +felix 1 2 6.263398 6.263398 5103 +princ 1 2 6.263398 6.263398 4813 +wale 1 2 6.263398 6.263398 4827 +mascot 1 2 6.263398 6.263398 6060 +ear 1 2 6.263398 6.263398 5071 +hairbal 1 2 6.263398 6.263398 6237 +xuelin 1 1 6.957497 6.957497 20395 +otto 1 1 6.957497 6.957497 20396 +messmer 1 1 6.957497 6.957497 20397 +whichwa 1 1 6.957497 6.957497 20398 +chaplin 1 1 6.957497 6.957497 20399 +keaton 1 1 6.957497 6.957497 20400 +polo 1 1 6.957497 6.957497 20401 +lindbergh 1 1 6.957497 6.957497 20402 +theatlant 1 1 6.957497 6.957497 20403 +oneev 1 1 6.957497 6.957497 20404 +teeth 1 1 6.957497 6.957497 20405 +whisker 1 1 6.957497 6.957497 20406 +tail 1 1 6.957497 6.957497 20407 +sui 1 1 6.957497 6.957497 20408 +vritabl 1 1 6.957497 6.957497 20409 +partout 1 1 6.957497 6.957497 20410 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..870f83e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +file 1 132 1.945910 1.945910 70 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +mani 1 92 2.397895 2.397895 150 +associ 1 93 2.397895 2.397895 151 +imag 1 91 2.397895 2.397895 161 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +help 1 83 2.484907 2.484907 175 +contain 1 81 2.484907 2.484907 174 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +resourc 1 81 2.484907 2.484907 172 +interfac 1 79 2.564949 2.564949 209 +optim 1 79 2.564949 2.564949 197 +issu 1 78 2.564949 2.564949 211 +dynam 1 76 2.564949 2.564949 194 +decemb 1 80 2.564949 2.564949 215 +orient 1 80 2.564949 2.564949 205 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +involv 1 71 2.639057 2.639057 227 +symposium 1 72 2.639057 2.639057 238 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +simul 1 66 2.708050 2.708050 255 +multimedia 1 68 2.708050 2.708050 258 +integr 1 67 2.708050 2.708050 245 +differ 1 66 2.708050 2.708050 253 +knowledg 1 67 2.708050 2.708050 243 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +foundat 1 62 2.772589 2.772589 286 +januari 1 62 2.772589 2.772589 264 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +sever 1 56 2.890372 2.890372 322 +special 1 56 2.890372 2.890372 320 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +much 1 52 2.995732 2.995732 349 +investig 1 51 2.995732 2.995732 353 +maintain 1 51 2.995732 2.995732 342 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +visual 1 48 3.044522 3.044522 372 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +format 1 48 3.044522 3.044522 356 +set 1 50 3.044522 3.044522 361 +natur 1 44 3.135494 3.135494 406 +futur 1 41 3.218876 3.218876 427 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +error 1 40 3.258097 3.258097 449 +must 1 40 3.258097 3.258097 442 +transact 1 39 3.258097 3.258097 438 +join 1 39 3.258097 3.258097 457 +multipl 1 39 3.258097 3.258097 453 +cost 1 37 3.332205 3.332205 480 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +multi 1 36 3.367296 3.367296 493 +random 1 34 3.401197 3.401197 511 +approxim 1 35 3.401197 3.401197 509 +survei 1 35 3.401197 3.401197 513 +statist 1 35 3.401197 3.401197 521 +queri 1 33 3.433987 3.433987 524 +india 1 32 3.465736 3.465736 550 +independ 1 32 3.465736 3.465736 548 +express 1 32 3.465736 3.465736 540 +scientist 1 31 3.496508 3.496508 560 +compon 1 30 3.555348 3.555348 570 +power 1 30 3.555348 3.555348 573 +graph 1 30 3.555348 3.555348 576 +rang 1 30 3.555348 3.555348 565 +focus 1 29 3.583519 3.583519 584 +limit 1 29 3.583519 3.583519 585 +ask 1 28 3.610918 3.610918 597 +load 1 28 3.610918 3.610918 601 +framework 1 28 3.610918 3.610918 606 +challeng 1 26 3.688879 3.688879 653 +altern 1 26 3.688879 3.688879 641 +repres 1 26 3.688879 3.688879 656 +effort 1 26 3.688879 3.688879 652 +proc 1 26 3.688879 3.688879 649 +valu 1 25 3.737670 3.737670 665 +primari 1 25 3.737670 3.737670 669 +although 1 25 3.737670 3.737670 667 +higher 1 24 3.761200 3.761200 690 +size 1 23 3.806662 3.806662 713 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +try 1 22 3.850148 3.850148 764 +identifi 1 22 3.850148 3.850148 760 +properti 1 22 3.850148 3.850148 749 +thu 1 21 3.912023 3.912023 773 +among 1 21 3.912023 3.912023 781 +path 1 21 3.912023 3.912023 778 +flexibl 1 21 3.912023 3.912023 792 +sigmod 1 19 4.007333 4.007333 877 +boston 1 19 4.007333 4.007333 862 +concentr 1 18 4.060443 4.060443 906 +record 1 18 4.060443 4.060443 890 +dimension 1 18 4.060443 4.060443 909 +attempt 1 17 4.110874 4.110874 917 +estim 1 17 4.110874 4.110874 930 +former 1 17 4.110874 4.110874 956 +miller 1 17 4.110874 4.110874 949 +advantag 1 16 4.174387 4.174387 987 +ramakrishnan 1 16 4.174387 4.174387 972 +jose 1 16 4.174387 4.174387 976 +cambridg 1 16 4.174387 4.174387 1008 +livni 1 15 4.248495 4.248495 1053 +transit 1 15 4.248495 4.248495 1046 +heterogen 1 14 4.317488 4.317488 1090 +attribut 1 14 4.317488 4.317488 1092 +balanc 1 14 4.317488 4.317488 1112 +primarili 1 13 4.382027 4.382027 1185 +translat 1 13 4.382027 4.382027 1164 +canada 1 13 4.382027 4.382027 1158 +readi 1 12 4.465908 4.465908 1242 +gupta 1 12 4.465908 4.465908 1241 +extrem 1 11 4.553877 4.553877 1330 +cycl 1 11 4.553877 4.553877 1335 +itali 1 11 4.553877 4.553877 1378 +tradit 1 10 4.653960 4.653960 1404 +genet 1 10 4.653960 4.653960 1409 +desktop 1 10 4.653960 4.653960 1445 +vldb 1 10 4.653960 4.653960 1470 +pose 1 9 4.753590 4.753590 1535 +significantli 1 9 4.753590 4.753590 1508 +mode 1 9 4.753590 4.753590 1492 +latter 1 9 4.753590 4.753590 1522 +conferenceon 1 9 4.753590 4.753590 1595 +incomplet 1 9 4.753590 4.753590 1575 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +bridg 1 8 4.875197 4.875197 1764 +closur 1 8 4.875197 4.875197 1643 +solomon 1 8 4.875197 4.875197 1716 +unifi 1 8 4.875197 4.875197 1774 +databasesystem 1 8 4.875197 4.875197 1617 +paramet 1 7 5.010635 5.010635 1796 +parametr 1 7 5.010635 5.010635 1819 +aris 1 7 5.010635 5.010635 1924 +throughout 1 7 5.010635 5.010635 1871 +bombai 1 7 5.010635 5.010635 1972 +sweden 1 7 5.010635 5.010635 1885 +montreal 1 7 5.010635 5.010635 1961 +predic 1 7 5.010635 5.010635 1806 +serial 1 7 5.010635 5.010635 1975 +eduresearch 1 6 5.164786 5.164786 2205 +schema 1 6 5.164786 5.164786 1988 +divers 1 6 5.164786 5.164786 2232 +greec 1 6 5.164786 5.164786 2208 +travers 1 5 5.347108 5.347108 2363 +frog 1 5 5.347108 5.347108 2479 +desk 1 5 5.347108 5.347108 2297 +tsatalo 1 5 5.347108 5.347108 2581 +minneapoli 1 5 5.347108 5.347108 2480 +england 1 5 5.347108 5.347108 2557 +algorithmsfor 1 4 5.568345 5.568345 2748 +multimediasystem 1 4 5.568345 5.568345 2701 +forparallel 1 4 5.568345 5.568345 2703 +customiz 1 4 5.568345 5.568345 2966 +chile 1 4 5.568345 5.568345 3082 +ireland 1 4 5.568345 5.568345 2853 +inadequ 1 3 5.857933 5.857933 3730 +propag 1 3 5.857933 5.857933 3997 +histogram 1 3 5.857933 5.857933 3490 +disciplin 1 3 5.857933 5.857933 3392 +andsemant 1 3 5.857933 5.857933 3246 +metaphor 1 3 5.857933 5.857933 4038 +microscop 1 3 5.857933 5.857933 4035 +publicationsi 1 3 5.857933 5.857933 3827 +conjunct 1 3 5.857933 5.857933 3743 +stockholm 1 3 5.857933 5.857933 3715 +zurich 1 3 5.857933 5.857933 3550 +switzerland 1 3 5.857933 5.857933 3551 +santiago 1 3 5.857933 5.857933 4013 +interestsdatabas 1 2 6.263398 6.263398 6116 +andinform 1 2 6.263398 6.263398 5550 +scientificdata 1 2 6.263398 6.263398 6067 +queryoptim 1 2 6.263398 6.263398 4057 +algorithmsa 1 2 6.263398 6.263398 4487 +anneal 1 2 6.263398 6.263398 4136 +basedperform 1 2 6.263398 6.263398 6055 +spectroscopi 1 2 6.263398 6.263398 6206 +anniversari 1 2 6.263398 6.263398 4945 +garofalaki 1 2 6.263398 6.263398 6209 +performanceevalu 1 2 6.263398 6.263398 6052 +bermuda 1 2 6.263398 6.263398 5907 +poosala 1 2 6.263398 6.263398 6228 +turtl 1 2 6.263398 6.263398 4235 +haa 1 2 6.263398 6.263398 6115 +gmap 1 2 6.263398 6.263398 6241 +versatil 1 2 6.263398 6.263398 6242 +dublin 1 2 6.263398 6.263398 4883 +ioannidisyanni 1 1 6.957497 6.957497 20417 +toqueri 1 1 6.957497 6.957497 20418 +thanin 1 1 6.957497 6.957497 20419 +highera 1 1 6.957497 6.957497 20420 +tooptim 1 1 6.957497 6.957497 20421 +querywil 1 1 6.957497 6.957497 20422 +optimum 1 1 6.957497 6.957497 20423 +viabl 1 1 6.957497 6.957497 20424 +propertiesof 1 1 6.957497 6.957497 20425 +especiallythos 1 1 6.957497 6.957497 20426 +alsopart 1 1 6.957497 6.957497 20427 +appropriateinform 1 1 6.957497 6.957497 20428 +thepropag 1 1 6.957497 6.957497 20429 +ofoptim 1 1 6.957497 6.957497 20430 +inrel 1 1 6.957497 6.957497 20431 +manyexperi 1 1 6.957497 6.957497 20432 +aspectsthat 1 1 6.957497 6.957497 20433 +managementenviron 1 1 6.957497 6.957497 20434 +theirexperiment 1 1 6.957497 6.957497 20435 +arefor 1 1 6.957497 6.957497 20436 +scientistsso 1 1 6.957497 6.957497 20437 +facilitatetransl 1 1 6.957497 6.957497 20438 +experimentalscientif 1 1 6.957497 6.957497 20439 +specificproject 1 1 6.957497 6.957497 20440 +plantgrowth 1 1 6.957497 6.957497 20441 +issueon 1 1 6.957497 6.957497 20442 +beyondrel 1 1 6.957497 6.957497 20443 +tod 1 1 6.957497 6.957497 20413 +haber 1 1 6.957497 6.957497 20411 +forschema 1 1 6.957497 6.957497 20444 +tsangari 1 1 6.957497 6.957497 20445 +tkde 1 1 6.957497 6.957497 20446 +ofheterogen 1 1 6.957497 6.957497 20414 +christodoulaki 1 1 6.957497 6.957497 20447 +limitingworst 1 1 6.957497 6.957497 20448 +winger 1 1 6.957497 6.957497 20449 +algorithmsbas 1 1 6.957497 6.957497 20450 +ondatabas 1 1 6.957497 6.957497 20415 +databaseestim 1 1 6.957497 6.957497 20451 +ponnekanti 1 1 6.957497 6.957497 20452 +experimentmanag 1 1 6.957497 6.957497 20453 +itsappl 1 1 6.957497 6.957497 20454 +vldbconfer 1 1 6.957497 6.957497 20412 +anjur 1 1 6.957497 6.957497 20455 +bridgesbetween 1 1 6.957497 6.957497 20456 +shekita 1 1 6.957497 6.957497 20457 +forselect 1 1 6.957497 6.957497 20458 +internationalacm 1 1 6.957497 6.957497 20459 +layoutat 1 1 6.957497 6.957497 20460 +granular 1 1 6.957497 6.957497 20461 +advancedvisu 1 1 6.957497 6.957497 20462 +gubbio 1 1 6.957497 6.957497 20463 +opossum 1 1 6.957497 6.957497 20416 +managementthrough 1 1 6.957497 6.957497 20464 +practicalityfor 1 1 6.957497 6.957497 20465 +sigmodconfer 1 1 6.957497 6.957497 20466 +forphys 1 1 6.957497 6.957497 20467 +dexa 1 1 6.957497 6.957497 20468 +athen 1 1 6.957497 6.957497 20469 +lashkari 1 1 6.957497 6.957497 20470 +theirdisambigu 1 1 6.957497 6.957497 20471 +schemavisu 1 1 6.957497 6.957497 20472 +edbt 1 1 6.957497 6.957497 20473 +internationalvldb 1 1 6.957497 6.957497 20474 +capacityin 1 1 6.957497 6.957497 20475 +wiener 1 1 6.957497 6.957497 20476 +moos 1 1 6.957497 6.957497 20477 +withdata 1 1 6.957497 6.957497 20478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..eb414f0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +homepag 1 93 2.397895 2.397895 148 +kevin 1 9 4.753590 4.753590 1482 +zhongbin 1 1 6.957497 6.957497 20496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..2d0ea169 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +modifi 1 178 1.609438 1.609438 35 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +support 1 132 1.945910 1.945910 83 +welcom 1 122 2.079442 2.079442 99 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +info 1 85 2.484907 2.484907 176 +good 1 77 2.564949 2.564949 200 +collect 1 65 2.772589 2.772589 268 +organ 1 65 2.772589 2.772589 265 +give 1 50 3.044522 3.044522 359 +telephon 1 50 3.044522 3.044522 373 +visitor 1 49 3.044522 3.044522 371 +press 1 42 3.218876 3.218876 419 +littl 1 39 3.258097 3.258097 454 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +articl 1 33 3.433987 3.433987 530 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +univ 1 28 3.610918 3.610918 617 +comp 1 26 3.688879 3.688879 650 +client 1 25 3.737670 3.737670 679 +sometim 1 24 3.761200 3.761200 696 +alumni 1 21 3.912023 3.912023 807 +wrote 1 20 3.951244 3.951244 830 +stat 1 17 4.110874 4.110874 924 +took 1 16 4.174387 4.174387 1010 +month 1 15 4.248495 4.248495 1025 +trip 1 14 4.317488 4.317488 1113 +employ 1 12 4.465908 4.465908 1291 +classmat 1 9 4.753590 4.753590 1516 +chicago 1 6 5.164786 5.164786 2149 +bldg 1 4 5.568345 5.568345 2983 +amaz 1 4 5.568345 5.568345 2600 +usathi 1 2 6.263398 6.263398 5951 +diari 1 2 6.263398 6.263398 4740 +linksmi 1 2 6.263398 6.263398 6215 +oversea 1 2 6.263398 6.263398 5781 +yinng 1 1 6.957497 6.957497 20479 +pageindexofyinongwei 1 1 6.957497 6.957497 20480 +spagehi 1 1 6.957497 6.957497 20481 +alsolink 1 1 6.957497 6.957497 20482 +inforesumehobbiestravel 1 1 6.957497 6.957497 20483 +pointersr 1 1 6.957497 6.957497 20484 +computingmacin 1 1 6.957497 6.957497 20485 +learningpattern 1 1 6.957497 6.957497 20486 +recognitioncomputatin 1 1 6.957497 6.957497 20487 +geometrydatabasevisionacadem 1 1 6.957497 6.957497 20488 +diarythi 1 1 6.957497 6.957497 20489 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 1 1 6.957497 6.957497 20490 +beida 1 1 6.957497 6.957497 20491 +classmatespek 1 1 6.957497 6.957497 20492 +ciumi 1 1 6.957497 6.957497 20493 +bookmarkcom 1 1 6.957497 6.957497 20494 +yinong 1 1 6.957497 6.957497 20495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..f538124b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +call 1 91 2.397895 2.397895 153 +room 1 59 2.833213 2.833213 301 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +california 1 46 3.091042 3.091042 388 +around 1 43 3.178054 3.178054 415 +strategi 1 25 3.737670 3.737670 682 +voic 1 21 3.912023 3.912023 806 +hous 1 21 3.912023 3.912023 801 +beauti 1 18 4.060443 4.060443 912 +drive 1 15 4.248495 4.248495 1052 +food 1 12 4.465908 4.465908 1285 +poor 1 8 4.875197 4.875197 1736 +matthew 1 6 5.164786 5.164786 2193 +parent 1 6 5.164786 5.164786 2204 +observatori 1 4 5.568345 5.568345 3070 +matt 1 3 5.857933 5.857933 3792 +beach 1 3 5.857933 5.857933 3782 +pageuntil 1 1 6.957497 6.957497 20499 +zeidenbergcent 1 1 6.957497 6.957497 20500 +gilson 1 1 6.957497 6.957497 20501 +zeiden 1 1 6.957497 6.957497 20502 +eduzeidenb 1 1 6.957497 6.957497 20503 +eduwhen 1 1 6.957497 6.957497 20504 +coho 1 1 6.957497 6.957497 20505 +huntington 1 1 6.957497 6.957497 20506 +convuls 1 1 6.957497 6.957497 20497 +breton 1 1 6.957497 6.957497 20507 +nadja 1 1 6.957497 6.957497 20508 +beaut 1 1 6.957497 6.957497 20509 +sera 1 1 6.957497 6.957497 20498 +saint 1 1 6.957497 6.957497 20510 +whyth 1 1 6.957497 6.957497 20511 +communist 1 1 6.957497 6.957497 20512 +helder 1 1 6.957497 6.957497 20513 +camara 1 1 6.957497 6.957497 20514 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..e24b1cbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +document 1 121 2.079442 2.079442 89 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +find 1 111 2.197225 2.197225 111 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +larg 1 82 2.484907 2.484907 168 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +educ 1 86 2.484907 2.484907 191 +june 1 79 2.564949 2.564949 214 +method 1 80 2.564949 2.564949 213 +issu 1 78 2.564949 2.564949 211 +intellig 1 72 2.639057 2.639057 225 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +free 1 73 2.639057 2.639057 224 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +prof 1 64 2.772589 2.772589 273 +artifici 1 63 2.772589 2.772589 280 +dept 1 64 2.772589 2.772589 291 +function 1 62 2.772589 2.772589 275 +import 1 65 2.772589 2.772589 282 +interact 1 62 2.772589 2.772589 270 +plan 1 65 2.772589 2.772589 272 +organ 1 65 2.772589 2.772589 265 +room 1 59 2.833213 2.833213 301 +juli 1 60 2.833213 2.833213 305 +major 1 56 2.890372 2.890372 315 +thesi 1 57 2.890372 2.890372 327 +space 1 57 2.890372 2.890372 310 +run 1 51 2.995732 2.995732 347 +profession 1 51 2.995732 2.995732 345 +telephon 1 50 3.044522 3.044522 373 +fast 1 42 3.218876 3.218876 429 +submit 1 39 3.258097 3.258097 440 +probabl 1 40 3.258097 3.258097 455 +purpos 1 37 3.332205 3.332205 481 +china 1 37 3.332205 3.332205 487 +multi 1 36 3.367296 3.367296 493 +robot 1 36 3.367296 3.367296 497 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +given 1 32 3.465736 3.465736 538 +domain 1 30 3.555348 3.555348 564 +limit 1 29 3.583519 3.583519 585 +univ 1 28 3.610918 3.610918 617 +cluster 1 28 3.610918 3.610918 612 +manipul 1 27 3.637586 3.637586 624 +mine 1 26 3.688879 3.688879 654 +proc 1 26 3.688879 3.688879 649 +relev 1 26 3.688879 3.688879 637 +accur 1 25 3.737670 3.737670 680 +pattern 1 24 3.761200 3.761200 689 +motion 1 24 3.761200 3.761200 699 +compress 1 23 3.806662 3.806662 719 +recognit 1 23 3.806662 3.806662 723 +mobil 1 23 3.806662 3.806662 730 +identifi 1 22 3.850148 3.850148 760 +cooper 1 22 3.850148 3.850148 757 +divis 1 21 3.912023 3.912023 803 +path 1 21 3.912023 3.912023 778 +kernel 1 20 3.951244 3.951244 825 +region 1 19 4.007333 4.007333 875 +sigmod 1 19 4.007333 4.007333 877 +beij 1 19 4.007333 4.007333 876 +concentr 1 18 4.060443 4.060443 906 +dimension 1 18 4.060443 4.060443 909 +estim 1 17 4.110874 4.110874 930 +zhang 1 16 4.174387 4.174387 980 +ramakrishnan 1 16 4.174387 4.174387 972 +spars 1 16 4.174387 4.174387 989 +young 1 16 4.174387 4.174387 991 +livni 1 15 4.248495 4.248495 1053 +configur 1 15 4.248495 4.248495 1012 +miron 1 14 4.317488 4.317488 1110 +topolog 1 14 4.317488 4.317488 1089 +finit 1 14 4.317488 4.317488 1106 +joint 1 13 4.382027 4.382027 1130 +conf 1 13 4.382027 4.382027 1181 +canada 1 13 4.382027 4.382027 1158 +raghu 1 12 4.465908 4.465908 1212 +grow 1 12 4.465908 4.465908 1209 +amount 1 12 4.465908 4.465908 1208 +overal 1 12 4.465908 4.465908 1254 +branch 1 11 4.553877 4.553877 1318 +discov 1 9 4.753590 4.753590 1562 +classif 1 9 4.753590 4.753590 1586 +manufactur 1 8 4.875197 4.875197 1634 +dataset 1 7 5.010635 5.010635 1914 +densiti 1 7 5.010635 5.010635 1927 +discoveri 1 7 5.010635 5.010635 1915 +trend 1 7 5.010635 5.010635 1842 +dimens 1 7 5.010635 5.010635 1930 +reduct 1 7 5.010635 5.010635 1877 +financi 1 6 5.164786 5.164786 2197 +invest 1 6 5.164786 5.164786 2153 +exploratori 1 4 5.568345 5.568345 3073 +ling 1 4 5.568345 5.568345 3045 +ijcai 1 4 5.568345 5.568345 2901 +tian 1 3 5.857933 5.857933 3680 +ortool 1 2 6.263398 6.263398 4169 +birch 1 2 6.263398 6.263398 6136 +andmanufactur 1 2 6.263398 6.263398 6244 +collis 1 2 6.263398 6.263398 5956 +assistantadvisor 1 1 6.957497 6.957497 20516 +compilerminor 1 1 6.957497 6.957497 20517 +bankingoffic 1 1 6.957497 6.957497 20518 +intereststher 1 1 6.957497 6.957497 20519 +territori 1 1 6.957497 6.957497 20520 +densityanalysi 1 1 6.957497 6.957497 20521 +crowd 1 1 6.957497 6.957497 20522 +dataclassif 1 1 6.957497 6.957497 20523 +knowledgediscoveri 1 1 6.957497 6.957497 20524 +dimensionreduct 1 1 6.957497 6.957497 20525 +findpath 1 1 6.957497 6.957497 20526 +jianwei 1 1 6.957497 6.957497 20515 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..c46fb0e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +line 1 75 2.639057 2.639057 231 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +detail 1 57 2.890372 2.890372 321 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +graph 1 30 3.555348 3.555348 576 +chines 1 29 3.583519 3.583519 595 +mine 1 26 3.688879 3.688879 654 +jeff 1 25 3.737670 3.737670 673 +todai 1 25 3.737670 3.737670 672 +daili 1 24 3.761200 3.761200 706 +yahoo 1 24 3.761200 3.761200 707 +benchmark 1 19 4.007333 4.007333 859 +north 1 19 4.007333 4.007333 873 +sigmod 1 19 4.007333 4.007333 877 +lyco 1 19 4.007333 4.007333 871 +stock 1 16 4.174387 4.174387 1007 +taiwan 1 16 4.174387 4.174387 1006 +club 1 15 4.248495 4.248495 1058 +dbm 1 13 4.382027 4.382027 1136 +excit 1 11 4.553877 4.553877 1329 +surf 1 11 4.553877 4.553877 1301 +naughton 1 10 4.653960 4.653960 1450 +analyt 1 7 5.010635 5.010635 1913 +monei 1 7 5.010635 5.010635 1934 +advis 1 6 5.164786 5.164786 2173 +financi 1 6 5.164786 5.164786 2197 +carolina 1 6 5.164786 5.164786 2142 +maryland 1 6 5.164786 5.164786 2140 +chapel 1 5 5.347108 5.347108 2457 +zhao 1 4 5.568345 5.568345 2699 +ters 1 3 5.857933 5.857933 3297 +olap 1 2 6.263398 6.263398 6233 +arbor 1 2 6.263398 6.263398 6235 +molap 1 2 6.263398 6.263398 6217 +pathfind 1 2 6.263398 6.263398 6053 +yihong 1 1 6.957497 6.957497 20527 +educationb 1 1 6.957497 6.957497 20528 +hillm 1 1 6.957497 6.957497 20529 +wiscosin 1 1 6.957497 6.957497 20530 +datamin 1 1 6.957497 6.957497 20531 +microstrategi 1 1 6.957497 6.957497 20532 +rolap 1 1 6.957497 6.957497 20533 +lombard 1 1 6.957497 6.957497 20534 +kiwi 1 1 6.957497 6.957497 20535 +pgmo 1 1 6.957497 6.957497 20536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..f837a860 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +homepag 1 93 2.397895 2.397895 148 +street 1 63 2.772589 2.772589 293 +still 1 50 3.044522 3.044522 362 +offer 1 43 3.178054 3.178054 414 +wang 1 21 3.912023 3.912023 790 +johnson 1 13 4.382027 4.382027 1162 +zhewang 1 1 6.957497 6.957497 20537 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..87b151b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +texa 1 160 1.791759 1.791759 64 +develop 1 174 1.791759 1.791759 53 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +environ 1 84 2.484907 2.484907 177 +issu 1 78 2.564949 2.564949 211 +orient 1 80 2.564949 2.564949 205 +nation 1 74 2.639057 2.639057 240 +html 1 75 2.639057 2.639057 235 +simul 1 66 2.708050 2.708050 255 +java 1 70 2.708050 2.708050 248 +evalu 1 64 2.772589 2.772589 266 +laboratori 1 63 2.772589 2.772589 292 +juli 1 60 2.833213 2.833213 305 +share 1 59 2.833213 2.833213 304 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +sever 1 56 2.890372 2.890372 322 +advisor 1 51 2.995732 2.995732 355 +friend 1 48 3.044522 3.044522 376 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +workstat 1 37 3.332205 3.332205 479 +field 1 37 3.332205 3.332205 482 +china 1 37 3.332205 3.332205 487 +jame 1 35 3.401197 3.401197 507 +award 1 34 3.401197 3.401197 523 +particip 1 29 3.583519 3.583519 589 +cluster 1 28 3.610918 3.610918 612 +detect 1 26 3.688879 3.688879 646 +supercomput 1 25 3.737670 3.737670 681 +benchmark 1 19 4.007333 4.007333 859 +predict 1 19 4.007333 4.007333 855 +asplo 1 17 4.110874 4.110874 948 +novel 1 15 4.248495 4.248495 1039 +paradyn 1 9 4.753590 4.753590 1614 +tunnel 1 9 4.753590 4.753590 1615 +andth 1 9 4.753590 4.753590 1481 +antonio 1 6 5.164786 5.164786 2186 +barton 1 5 5.347108 5.347108 2371 +ofparallel 1 5 5.347108 5.347108 2380 +departmentat 1 5 5.347108 5.347108 2513 +anddistribut 1 4 5.568345 5.568345 3031 +bottleneck 1 4 5.568345 5.568345 2769 +fudan 1 3 5.857933 5.857933 3707 +blizzard 1 2 6.263398 6.263398 6226 +levelprogram 1 2 6.263398 6.263398 5452 +zhichen 1 1 6.957497 6.957497 20538 +larusprofessor 1 1 6.957497 6.957497 20539 +millerawardbest 1 1 6.957497 6.957497 20540 +eliminateperform 1 1 6.957497 6.957497 20541 +toolwith 1 1 6.957497 6.957497 20542 +wisconsinwind 1 1 6.957497 6.957497 20543 +interestprogram 1 1 6.957497 6.957497 20544 +andimcrement 1 1 6.957497 6.957497 20545 +programjourn 1 1 6.957497 6.957497 20546 +researchchines 1 1 6.957497 6.957497 20547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..56ee7b1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +pictur 1 89 2.397895 2.397895 160 +west 1 83 2.484907 2.484907 192 +name 1 72 2.639057 2.639057 220 +street 1 63 2.772589 2.772589 293 +taken 1 31 3.496508 3.496508 555 +chen 1 21 3.912023 3.912023 791 +wang 1 21 3.912023 3.912023 790 +zhang 1 16 4.174387 4.174387 980 +tsinghua 1 13 4.382027 4.382027 1195 +hello 1 10 4.653960 4.653960 1407 +invit 1 10 4.653960 4.653960 1428 +restaur 1 6 5.164786 5.164786 2230 +theth 1 5 5.347108 5.347108 2325 +tong 1 3 5.857933 5.857933 3258 +supper 1 1 6.957497 6.957497 20548 +weihai 1 1 6.957497 6.957497 20549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..de8d37a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +number 1 130 2.079442 2.079442 97 +pictur 1 89 2.397895 2.397895 160 +visitor 1 49 3.044522 3.044522 371 +thank 1 23 3.806662 3.806662 721 +stop 1 17 4.110874 4.110874 942 +poland 1 3 5.857933 5.857933 3665 +inc 1 2 6.263398 6.263398 5914 +krzysztof 1 1 6.957497 6.957497 20550 +zmudzinskikrzysztof 1 1 6.957497 6.957497 20551 +zmudzinskispin 1 1 6.957497 6.957497 20552 +pole 1 1 6.957497 6.957497 20553 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..9a3a4837 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +number 1 130 2.079442 2.079442 97 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +help 1 83 2.484907 2.484907 175 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +logic 1 71 2.639057 2.639057 230 +servic 1 72 2.639057 2.639057 236 +integr 1 67 2.708050 2.708050 245 +test 1 66 2.708050 2.708050 252 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +view 1 70 2.708050 2.708050 254 +laboratori 1 63 2.772589 2.772589 292 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +extens 1 53 2.944439 2.944439 340 +digit 1 52 2.995732 2.995732 348 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +tabl 1 51 2.995732 2.995732 346 +anoth 1 45 3.135494 3.135494 408 +fridai 1 44 3.135494 3.135494 390 +combin 1 42 3.218876 3.218876 421 +programm 1 39 3.258097 3.258097 445 +littl 1 39 3.258097 3.258097 454 +continu 1 39 3.258097 3.258097 448 +credit 1 38 3.295837 3.295837 460 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +profil 1 30 3.555348 3.555348 581 +built 1 29 3.583519 3.583519 592 +arrai 1 27 3.637586 3.637586 627 +enhanc 1 26 3.688879 3.688879 644 +reliabl 1 25 3.737670 3.737670 674 +wai 1 25 3.737670 3.737670 662 +compress 1 23 3.806662 3.806662 719 +self 1 22 3.850148 3.850148 761 +vlsi 1 21 3.912023 3.912023 795 +hous 1 21 3.912023 3.912023 801 +facil 1 20 3.951244 3.951244 814 +concentr 1 18 4.060443 4.060443 906 +engineeringunivers 1 17 4.110874 4.110874 959 +modif 1 17 4.110874 4.110874 913 +monitor 1 17 4.110874 4.110874 941 +normal 1 16 4.174387 4.174387 995 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +circuit 1 13 4.382027 4.382027 1131 +carri 1 13 4.382027 4.382027 1152 +station 1 13 4.382027 4.382027 1157 +engr 1 10 4.653960 4.653960 1427 +penalti 1 10 4.653960 4.653960 1405 +iowa 1 7 5.010635 5.010635 1971 +compact 1 7 5.010635 5.010635 1907 +asystem 1 4 5.568345 5.568345 2612 +termin 1 4 5.568345 5.568345 2852 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +saluja 1 3 5.857933 5.857933 3104 +eduportrait 1 3 5.857933 5.857933 4039 +fabric 1 3 5.857933 5.857933 3607 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +kewal 1 2 6.263398 6.263398 4072 +drivemadison 1 2 6.263398 6.263398 6245 +testabl 1 2 6.263398 6.263398 5606 +andsequenti 1 2 6.263398 6.263398 4532 +salujaprofessor 1 1 6.957497 6.957497 20554 +jpgdepartmentselectr 1 1 6.957497 6.957497 20555 +engineeringcomput 1 1 6.957497 6.957497 20556 +interestsdesign 1 1 6.957497 6.957497 20557 +testableand 1 1 6.957497 6.957497 20558 +thisarea 1 1 6.957497 6.957497 20559 +theresearch 1 1 6.957497 6.957497 20560 +testgener 1 1 6.957497 6.957497 20561 +inself 1 1 6.957497 6.957497 20562 +andfault 1 1 6.957497 6.957497 20563 +methodsapplic 1 1 6.957497 6.957497 20564 +testenviron 1 1 6.957497 6.957497 20565 +regularstructur 1 1 6.957497 6.957497 20566 +ram 1 1 6.957497 6.957497 20567 +areinvestig 1 1 6.957497 6.957497 20568 +inhardwar 1 1 6.957497 6.957497 20569 +projectw 1 1 6.957497 6.957497 20570 +thatth 1 1 6.957497 6.957497 20571 +noimpact 1 1 6.957497 6.957497 20572 +digitalsystem 1 1 6.957497 6.957497 20573 +withcolor 1 1 6.957497 6.957497 20574 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..e0923fb6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +real 1 93 2.397895 2.397895 144 +associ 1 93 2.397895 2.397895 151 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +help 1 83 2.484907 2.484907 175 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +involv 1 71 2.639057 2.639057 227 +materi 1 75 2.639057 2.639057 221 +servic 1 72 2.639057 2.639057 236 +tuesdai 1 73 2.639057 2.639057 219 +integr 1 67 2.708050 2.708050 245 +test 1 66 2.708050 2.708050 252 +view 1 70 2.708050 2.708050 254 +guid 1 63 2.772589 2.772589 267 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +autom 1 41 3.218876 3.218876 434 +author 1 39 3.258097 3.258097 450 +close 1 38 3.295837 3.295837 465 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +cost 1 37 3.332205 3.332205 480 +robot 1 36 3.367296 3.367296 497 +product 1 33 3.433987 3.433987 527 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +human 1 32 3.465736 3.465736 546 +profil 1 30 3.555348 3.555348 581 +hope 1 28 3.610918 3.610918 610 +scale 1 28 3.610918 3.610918 613 +experiment 1 26 3.688879 3.688879 645 +handl 1 24 3.761200 3.761200 685 +highli 1 23 3.806662 3.806662 725 +self 1 22 3.850148 3.850148 761 +finish 1 22 3.850148 3.850148 748 +reduc 1 22 3.850148 3.850148 759 +properti 1 22 3.850148 3.850148 749 +director 1 22 3.850148 3.850148 767 +flexibl 1 21 3.912023 3.912023 792 +fund 1 21 3.912023 3.912023 805 +increas 1 20 3.951244 3.951244 829 +feedback 1 19 4.007333 4.007333 854 +engineeringunivers 1 17 4.110874 4.110874 959 +precis 1 15 4.248495 4.248495 1023 +hierarch 1 15 4.248495 4.248495 1018 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +incorpor 1 13 4.382027 4.382027 1163 +nasa 1 13 4.382027 4.382027 1188 +carri 1 13 4.382027 4.382027 1152 +engr 1 10 4.653960 4.653960 1427 +weld 1 9 4.753590 4.753590 1538 +factor 1 9 4.753590 4.753590 1544 +manufactur 1 8 4.875197 4.875197 1634 +sensor 1 7 5.010635 5.010635 1920 +explain 1 7 5.010635 5.010635 1816 +actuat 1 5 5.347108 5.347108 2442 +neil 1 4 5.568345 5.568345 2841 +fulli 1 4 5.568345 5.568345 2986 +emphas 1 4 5.568345 5.568345 2672 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +eduportrait 1 3 5.857933 5.857933 4039 +aerospac 1 3 5.857933 5.857933 3555 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +duffi 1 2 6.263398 6.263398 4845 +drivemadison 1 2 6.263398 6.263398 6245 +telerobot 1 2 6.263398 6.263398 4847 +departmentsmechan 1 1 6.957497 6.957497 20575 +engineeringeducationb 1 1 6.957497 6.957497 20576 +madisonm 1 1 6.957497 6.957497 20577 +madisonphd 1 1 6.957497 6.957497 20578 +madisonresearch 1 1 6.957497 6.957497 20579 +interestsrobot 1 1 6.957497 6.957497 20580 +micromechanismscent 1 1 6.957497 6.957497 20581 +consortiamanufactur 1 1 6.957497 6.957497 20582 +programwisconsin 1 1 6.957497 6.957497 20583 +roboticsprofessor 1 1 6.957497 6.957497 20584 +inspect 1 1 6.957497 6.957497 20585 +mold 1 1 6.957497 6.957497 20586 +rework 1 1 6.957497 6.957497 20587 +agricultur 1 1 6.957497 6.957497 20588 +tactil 1 1 6.957497 6.957497 20589 +sensori 1 1 6.957497 6.957497 20590 +fatigu 1 1 6.957497 6.957497 20591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..2cd6f8a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +select 1 91 2.397895 2.397895 154 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +april 1 77 2.564949 2.564949 196 +appli 1 71 2.639057 2.639057 226 +servic 1 72 2.639057 2.639057 236 +simul 1 66 2.708050 2.708050 255 +practic 1 70 2.708050 2.708050 246 +thursdai 1 70 2.708050 2.708050 241 +view 1 70 2.708050 2.708050 254 +foundat 1 62 2.772589 2.772589 286 +function 1 62 2.772589 2.772589 275 +januari 1 62 2.772589 2.772589 264 +creat 1 63 2.772589 2.772589 277 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +simpl 1 60 2.833213 2.833213 298 +best 1 59 2.833213 2.833213 299 +space 1 57 2.890372 2.890372 310 +major 1 56 2.890372 2.890372 315 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +suggest 1 53 2.944439 2.944439 331 +maintain 1 51 2.995732 2.995732 342 +investig 1 51 2.995732 2.995732 353 +tabl 1 51 2.995732 2.995732 346 +california 1 46 3.091042 3.091042 388 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +mechan 1 43 3.178054 3.178054 416 +term 1 43 3.178054 3.178054 411 +http 1 41 3.218876 3.218876 420 +york 1 41 3.218876 3.218876 435 +autom 1 41 3.218876 3.218876 434 +transact 1 39 3.258097 3.258097 438 +form 1 39 3.258097 3.258097 443 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +formal 1 37 3.332205 3.332205 478 +award 1 34 3.401197 3.401197 523 +represent 1 35 3.401197 3.401197 512 +product 1 33 3.433987 3.433987 527 +collabor 1 32 3.465736 3.465736 543 +focu 1 30 3.555348 3.555348 571 +profil 1 30 3.555348 3.555348 581 +manipul 1 27 3.637586 3.637586 624 +repres 1 26 3.688879 3.688879 656 +consist 1 26 3.688879 3.688879 651 +effort 1 26 3.688879 3.688879 652 +reliabl 1 25 3.737670 3.737670 674 +todai 1 25 3.737670 3.737670 672 +aspect 1 25 3.737670 3.737670 663 +fellow 1 24 3.761200 3.761200 701 +famili 1 23 3.806662 3.806662 735 +geometri 1 22 3.850148 3.850148 752 +deal 1 22 3.850148 3.850148 736 +thu 1 21 3.912023 3.912023 773 +basi 1 20 3.951244 3.951244 828 +geometr 1 19 4.007333 4.007333 852 +separ 1 19 4.007333 4.007333 844 +aid 1 18 4.060443 4.060443 904 +behavior 1 18 4.060443 4.060443 881 +engineeringunivers 1 17 4.110874 4.110874 959 +analyz 1 17 4.110874 4.110874 925 +seek 1 17 4.110874 4.110874 954 +novel 1 15 4.248495 4.248495 1039 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +topolog 1 14 4.317488 4.317488 1089 +convert 1 13 4.382027 4.382027 1122 +cannot 1 13 4.382027 4.382027 1144 +discret 1 13 4.382027 4.382027 1165 +career 1 12 4.465908 4.465908 1287 +captur 1 12 4.465908 4.465908 1232 +abil 1 11 4.553877 4.553877 1341 +engr 1 10 4.653960 4.653960 1427 +decomposit 1 10 4.653960 4.653960 1439 +relationship 1 10 4.653960 4.653960 1383 +facilit 1 10 4.653960 4.653960 1412 +mainten 1 9 4.753590 4.753590 1543 +establish 1 9 4.753590 4.753590 1532 +shapiro 1 8 4.875197 4.875197 1686 +manufactur 1 8 4.875197 4.875197 1634 +convers 1 8 4.875197 4.875197 1673 +combinatori 1 8 4.875197 4.875197 1629 +competit 1 8 4.875197 4.875197 1635 +boundari 1 7 5.010635 5.010635 1929 +appar 1 7 5.010635 5.010635 1958 +ongo 1 6 5.164786 5.164786 2215 +lack 1 6 5.164786 5.164786 1994 +solid 1 5 5.347108 5.347108 2255 +rigid 1 5 5.347108 5.347108 2432 +chain 1 4 5.568345 5.568345 2712 +phenomena 1 4 5.568345 5.568345 2962 +languagesand 1 4 5.568345 5.568345 3071 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +eduportrait 1 3 5.857933 5.857933 4039 +motor 1 3 5.857933 5.857933 3909 +fabric 1 3 5.857933 5.857933 3607 +systemat 1 3 5.857933 5.857933 3781 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +avenuemadison 1 2 6.263398 6.263398 4842 +interestscomput 1 2 6.263398 6.263398 6113 +palmer 1 2 6.263398 6.263398 5453 +artifact 1 2 6.263398 6.263398 5346 +methodsand 1 2 6.263398 6.263398 5779 +amajor 1 2 6.263398 6.263398 5343 +designand 1 2 6.263398 6.263398 6100 +andmanufactur 1 2 6.263398 6.263398 6244 +tomanufactur 1 2 6.263398 6.263398 6016 +ofnew 1 2 6.263398 6.263398 5881 +vadim 1 1 6.957497 6.957497 20592 +vshapiro 1 1 6.957497 6.957497 20593 +jpgurl 1 1 6.957497 6.957497 20594 +departmentscomput 1 1 6.957497 6.957497 20595 +sciencemechan 1 1 6.957497 6.957497 20596 +engineeringeducationba 1 1 6.957497 6.957497 20597 +universitym 1 1 6.957497 6.957497 20598 +angelesm 1 1 6.957497 6.957497 20599 +universityphd 1 1 6.957497 6.957497 20600 +univeristyresearch 1 1 6.957497 6.957497 20601 +automationcent 1 1 6.957497 6.957497 20602 +consortiamathemat 1 1 6.957497 6.957497 20603 +programmanufactur 1 1 6.957497 6.957497 20604 +programspati 1 1 6.957497 6.957497 20605 +laboratoryselect 1 1 6.957497 6.957497 20606 +honorsn 1 1 6.957497 6.957497 20607 +vossler 1 1 6.957497 6.957497 20608 +betweengeometri 1 1 6.957497 6.957497 20609 +bemodel 1 1 6.957497 6.957497 20610 +manufacturedbas 1 1 6.957497 6.957497 20611 +ofdistinct 1 1 6.957497 6.957497 20612 +technologicalbarri 1 1 6.957497 6.957497 20613 +undermin 1 1 6.957497 6.957497 20614 +commercialgeometr 1 1 6.957497 6.957497 20615 +eliminatingambigu 1 1 6.957497 6.957497 20616 +ofparametr 1 1 6.957497 6.957497 20617 +bedescrib 1 1 6.957497 6.957497 20618 +interactingprimit 1 1 6.957497 6.957497 20619 +roadblock 1 1 6.957497 6.957497 20620 +withtheoret 1 1 6.957497 6.957497 20621 +smoothintegr 1 1 6.957497 6.957497 20622 +thedesir 1 1 6.957497 6.957497 20623 +tounifi 1 1 6.957497 6.957497 20624 +theseand 1 1 6.957497 6.957497 20625 +physicalobject 1 1 6.957497 6.957497 20626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..b6625ac4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +theori 1 111 2.197225 2.197225 127 +part 1 98 2.302585 2.302585 129 +present 1 91 2.397895 2.397895 145 +method 1 80 2.564949 2.564949 213 +want 1 79 2.564949 2.564949 199 +logic 1 71 2.639057 2.639057 230 +appli 1 71 2.639057 2.639057 226 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +previou 1 62 2.772589 2.772589 290 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +index 1 56 2.890372 2.890372 309 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +visitor 1 49 3.044522 3.044522 371 +electron 1 47 3.091042 3.091042 379 +done 1 47 3.091042 3.091042 381 +natur 1 44 3.135494 3.135494 406 +autom 1 41 3.218876 3.218876 434 +past 1 42 3.218876 3.218876 428 +continu 1 39 3.258097 3.258097 448 +late 1 40 3.258097 3.258097 439 +tech 1 35 3.401197 3.401197 515 +ad 1 32 3.465736 3.465736 544 +produc 1 30 3.555348 3.555348 572 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +profil 1 30 3.555348 3.555348 581 +intend 1 28 3.610918 3.610918 599 +higher 1 24 3.761200 3.761200 690 +seri 1 24 3.761200 3.761200 708 +other 1 24 3.761200 3.761200 697 +proof 1 23 3.806662 3.806662 720 +geometri 1 22 3.850148 3.850148 752 +theorem 1 21 3.912023 3.912023 786 +prove 1 19 4.007333 4.007333 848 +feedback 1 19 4.007333 4.007333 854 +primarili 1 13 4.382027 4.382027 1185 +deduct 1 12 4.465908 4.465908 1236 +benjamin 1 11 4.553877 4.553877 1296 +ataustin 1 9 4.753590 4.753590 1610 +incomplet 1 9 4.753590 4.753590 1575 +prover 1 8 4.875197 4.875197 1653 +boyer 1 6 5.164786 5.164786 2013 +inequ 1 6 5.164786 5.164786 2113 +groupth 1 5 5.347108 5.347108 2549 +systemsand 1 4 5.568345 5.568345 2804 +bledso 1 4 5.568345 5.568345 2999 +chou 1 4 5.568345 5.568345 3033 +analog 1 4 5.568345 5.568345 2875 +feng 1 3 5.857933 5.857933 3300 +intent 1 2 6.263398 6.263398 5768 +herei 1 2 6.263398 6.263398 6187 +woodi 1 2 6.263398 6.263398 5459 +hine 1 2 6.263398 6.263398 4475 +groupautom 1 1 6.957497 6.957497 20629 +techreport 1 1 6.957497 6.957497 20630 +reportseri 1 1 6.957497 6.957497 20631 +grouplarri 1 1 6.957497 6.957497 20632 +hinesmarti 1 1 6.957497 6.957497 20633 +mayberrybenjamin 1 1 6.957497 6.957497 20634 +shultsalumniprevi 1 1 6.957497 6.957497 20635 +robertboyerj 1 1 6.957497 6.957497 20636 +strother 1 1 6.957497 6.957497 20637 +moorethi 1 1 6.957497 6.957497 20638 +collaboratorswhat 1 1 6.957497 6.957497 20639 +implyth 1 1 6.957497 6.957497 20640 +proverstrivelarri 1 1 6.957497 6.957497 20641 +struvelarri 1 1 6.957497 6.957497 20642 +proverand 1 1 6.957497 6.957497 20643 +theretoinclud 1 1 6.957497 6.957497 20644 +mcphee 1 1 6.957497 6.957497 20645 +theoryimplement 1 1 6.957497 6.957497 20646 +hein 1 1 6.957497 6.957497 20627 +borel 1 1 6.957497 6.957497 20628 +theoremprecondit 1 1 6.957497 6.957497 20647 +proverbledso 1 1 6.957497 6.957497 20648 +theoremnqthmboy 1 1 6.957497 6.957497 20649 +andmoor 1 1 6.957497 6.957497 20650 +clinc 1 1 6.957497 6.957497 20651 +iprshult 1 1 6.957497 6.957497 20652 +relatedlinksdo 1 1 6.957497 6.957497 20653 +shult 1 1 6.957497 6.957497 20654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..c3ac4d9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +perman 1 11 4.553877 4.553877 1372 +moveddocu 1 2 6.263398 6.263398 6246 +movedthi 1 2 6.263398 6.263398 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..c3ac4d9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_bool/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 1 47 3.091042 3.091042 382 +perman 1 11 4.553877 4.553877 1372 +moveddocu 1 2 6.263398 6.263398 6246 +movedthi 1 2 6.263398 6.263398 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..3cdc291d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +structur 0 106 2.197225 0.000000 105 +solut 2 82 2.484907 4.969814 162 +homework 1 79 2.564949 2.564949 193 +dynam 0 76 2.564949 0.000000 194 +upson 1 71 2.639057 2.639057 218 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +new 0 64 2.772589 0.000000 262 +unix 0 58 2.890372 0.000000 308 +fridai 0 44 3.135494 0.000000 390 +static 0 27 3.637586 0.000000 619 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +prelim 1 12 4.465908 4.465908 1201 +huang 0 12 4.465908 0.000000 1202 +systemsc 0 11 4.553877 0.000000 1293 +cheng 0 10 4.653960 0.000000 1381 +lili 0 5 5.347108 0.000000 2240 +filesystem 0 4 5.568345 0.000000 2587 +groupcours 0 3 5.857933 0.000000 3092 +ychuang 0 3 5.857933 0.000000 3093 +budiu 1 2 6.263398 6.263398 4042 +systemkenneth 0 2 6.263398 0.000000 4043 +birmanc 0 2 6.263398 0.000000 4044 +syllabuslectur 0 2 6.263398 0.000000 4045 +taslili 0 2 6.263398 0.000000 4046 +mihai 0 2 6.263398 0.000000 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..80cf6c29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +last 1 314 1.098612 1.098612 14 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +oper 1 180 1.609438 1.609438 34 +group 0 183 1.609438 0.000000 36 +schedul 0 119 2.079442 0.000000 85 +specif 0 106 2.197225 0.000000 106 +question 0 91 2.397895 0.000000 141 +chang 1 82 2.484907 2.484907 163 +exampl 0 77 2.564949 0.000000 195 +window 0 68 2.708050 0.000000 242 +handout 1 64 2.772589 2.772589 263 +maintain 0 51 2.995732 0.000000 342 +format 1 48 3.044522 3.044522 356 +principl 0 48 3.044522 0.000000 357 +answer 0 45 3.135494 0.000000 391 +tutori 0 39 3.258097 0.000000 437 +hand 0 37 3.332205 0.000000 475 +symbol 0 27 3.637586 0.000000 620 +displai 0 23 3.806662 0.000000 712 +chip 1 21 3.912023 3.912023 770 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +configur 0 15 4.248495 0.000000 1012 +systemsc 0 11 4.553877 0.000000 1293 +correspond 0 10 4.653960 0.000000 1382 +phase 0 6 5.164786 0.000000 1977 +hoca 1 5 5.347108 5.347108 2241 +lorenzo 0 4 5.568345 0.000000 2588 +penn 0 3 5.857933 0.000000 3094 +alvisi 0 3 5.857933 0.000000 3095 +consol 1 2 6.263398 6.263398 4048 +systemsselect 0 2 6.263398 0.000000 4049 +postcript 0 2 6.263398 0.000000 4050 +postcriptdocu 0 1 6.957497 0.000000 6248 +hocacours 0 1 6.957497 0.000000 6249 +broccoli 0 1 6.957497 0.000000 6250 +fileth 0 1 6.957497 0.000000 6251 +systemth 0 1 6.957497 0.000000 6252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..3deef895 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,241 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +read 2 154 1.791759 3.583518 47 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +relat 2 139 1.945910 3.891820 68 +model 2 145 1.945910 3.891820 69 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +note 0 142 1.945910 0.000000 67 +databas 2 122 2.079442 4.158884 86 +introduct 1 126 2.079442 2.079442 87 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +send 1 114 2.197225 2.197225 109 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +grade 2 90 2.397895 4.795790 142 +follow 1 92 2.397895 2.397895 143 +control 1 82 2.484907 2.484907 164 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +larg 0 82 2.484907 0.000000 168 +exam 0 86 2.484907 0.000000 169 +learn 0 86 2.484907 0.000000 170 +homework 3 79 2.564949 7.694847 193 +april 2 77 2.564949 5.129898 196 +optim 1 79 2.564949 2.564949 197 +know 0 80 2.564949 0.000000 198 +exampl 0 77 2.564949 0.000000 195 +want 0 79 2.564949 0.000000 199 +tuesdai 2 73 2.639057 5.278114 219 +name 1 72 2.639057 2.639057 220 +upson 1 71 2.639057 2.639057 218 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +thursdai 2 70 2.708050 5.416100 241 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +integr 0 67 2.708050 0.000000 245 +januari 1 62 2.772589 2.772589 264 +organ 0 65 2.772589 0.000000 265 +evalu 0 64 2.772589 0.000000 266 +wednesdai 0 64 2.772589 0.000000 261 +guid 0 63 2.772589 0.000000 267 +march 1 61 2.833213 2.833213 295 +type 0 61 2.833213 0.000000 296 +back 0 60 2.833213 0.000000 297 +index 1 56 2.890372 2.890372 309 +space 1 57 2.890372 2.890372 310 +summer 0 56 2.890372 0.000000 311 +semest 0 58 2.890372 0.000000 312 +februari 2 54 2.944439 5.888878 328 +cover 1 55 2.944439 2.944439 329 +three 0 54 2.944439 0.000000 330 +week 1 52 2.995732 2.995732 343 +date 0 51 2.995732 0.000000 344 +appoint 1 49 3.044522 3.044522 358 +principl 0 48 3.044522 0.000000 357 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +midterm 0 45 3.135494 0.000000 392 +even 0 45 3.135494 0.000000 393 +algebra 0 45 3.135494 0.000000 394 +term 1 43 3.178054 3.178054 411 +third 0 43 3.178054 0.000000 412 +edit 1 42 3.218876 3.218876 418 +press 0 42 3.218876 0.000000 419 +transact 1 39 3.258097 3.258097 438 +late 0 40 3.258097 0.000000 439 +submit 0 39 3.258097 0.000000 440 +credit 0 38 3.295837 0.000000 460 +concurr 1 34 3.401197 3.401197 501 +return 1 34 3.401197 3.401197 502 +queri 1 33 3.433987 3.433987 524 +within 0 33 3.433987 0.000000 525 +chapter 2 32 3.465736 6.931472 536 +concept 0 32 3.465736 0.000000 537 +computersci 0 30 3.555348 0.000000 562 +hard 0 30 3.555348 0.000000 563 +domain 0 30 3.555348 0.000000 564 +retriev 1 27 3.637586 3.637586 621 +though 0 27 3.637586 0.000000 622 +request 0 26 3.688879 0.000000 635 +constraint 0 26 3.688879 0.000000 636 +relev 0 26 3.688879 0.000000 637 +fundament 1 25 3.737670 3.737670 661 +deal 0 22 3.850148 0.000000 736 +recommend 0 22 3.850148 0.000000 737 +sort 0 22 3.850148 0.000000 738 +tent 0 22 3.850148 0.000000 739 +similar 0 21 3.912023 0.000000 771 +reserv 1 20 3.951244 3.951244 808 +nice 0 20 3.951244 0.000000 809 +minut 0 20 3.951244 0.000000 810 +entir 0 20 3.951244 0.000000 811 +break 0 20 3.951244 0.000000 812 +five 0 19 4.007333 0.000000 841 +along 0 18 4.060443 0.000000 878 +accept 0 18 4.060443 0.000000 879 +encourag 0 18 4.060443 0.000000 880 +vector 1 16 4.174387 4.174387 961 +permit 0 16 4.174387 0.000000 962 +alreadi 0 16 4.174387 0.000000 963 +pagec 0 15 4.248495 0.000000 1011 +indic 0 15 4.248495 0.000000 1013 +rank 0 14 4.317488 0.000000 1063 +latex 0 14 4.317488 0.000000 1064 +calculu 1 12 4.465908 4.465908 1203 +prelim 1 12 4.465908 4.465908 1201 +weight 0 12 4.465908 0.000000 1204 +worth 1 11 4.553877 4.553877 1294 +summar 0 11 4.553877 0.000000 1295 +benjamin 0 11 4.553877 0.000000 1296 +bill 0 11 4.553877 0.000000 1297 +relationship 1 10 4.653960 4.653960 1383 +forc 0 10 4.653960 0.000000 1384 +recoveri 1 9 4.753590 4.753590 1474 +crash 1 8 4.875197 4.875197 1616 +databasesystem 0 8 4.875197 0.000000 1617 +hash 0 8 4.875197 0.000000 1618 +cum 0 8 4.875197 0.000000 1619 +attach 0 7 5.010635 0.000000 1785 +silberschatz 1 6 5.164786 5.164786 1978 +textual 0 6 5.164786 0.000000 1979 +alphabet 0 6 5.164786 0.000000 1980 +phrase 0 5 5.347108 0.000000 2242 +ross 0 5 5.347108 0.000000 2243 +tupl 0 5 5.347108 0.000000 2244 +marco 0 4 5.568345 0.000000 2589 +clearli 0 4 5.568345 0.000000 2590 +entiti 1 3 5.857933 5.857933 3096 +roughli 0 3 5.857933 0.000000 3097 +singhal 0 3 5.857933 0.000000 3098 +sendmail 0 3 5.857933 0.000000 3099 +korth 1 2 6.263398 6.263398 4051 +aguilera 1 2 6.263398 6.263398 4052 +amith 1 2 6.263398 6.263398 4053 +thegroup 1 2 6.263398 6.263398 4054 +universityspr 0 2 6.263398 0.000000 4055 +introductionthi 0 2 6.263398 0.000000 4056 +queryoptim 0 2 6.263398 0.000000 4057 +prerequisitesc 0 2 6.263398 0.000000 4058 +elmasri 0 2 6.263398 0.000000 4059 +salton 0 2 6.263398 0.000000 4060 +amitsingh 0 2 6.263398 0.000000 4061 +yamasani 0 2 6.263398 0.000000 4062 +ofyour 0 2 6.263398 0.000000 4063 +ofcours 0 2 6.263398 0.000000 4064 +throughth 0 2 6.263398 0.000000 4065 +iti 0 2 6.263398 0.000000 4066 +dole 0 2 6.263398 0.000000 4067 +schedulethi 0 2 6.263398 0.000000 4068 +availablethursdai 1 1 6.957497 6.957497 6253 +duetuesdai 1 1 6.957497 6.957497 6254 +regrad 1 1 6.957497 6.957497 6255 +retrievalthursdai 1 1 6.957497 6.957497 6256 +retrievaldepart 0 1 6.957497 0.000000 6257 +gradeshav 0 1 6.957497 0.000000 6258 +twothird 0 1 6.957497 0.000000 6259 +systemsinclud 0 1 6.957497 0.000000 6260 +transactionprocess 0 1 6.957497 0.000000 6261 +usefulinform 0 1 6.957497 0.000000 6262 +willcov 0 1 6.957497 0.000000 6263 +invert 0 1 6.957497 0.000000 6264 +smartsystem 0 1 6.957497 0.000000 6265 +relevancefeedback 0 1 6.957497 0.000000 6266 +thesaurusconstruct 0 1 6.957497 0.000000 6267 +automatictext 0 1 6.957497 0.000000 6268 +placetuesdai 0 1 6.957497 0.000000 6269 +thurston 0 1 6.957497 0.000000 6270 +booksdatabas 0 1 6.957497 0.000000 6271 +mcgrawhil 0 1 6.957497 0.000000 6272 +andnavath 0 1 6.957497 0.000000 6273 +byullman 0 1 6.957497 0.000000 6274 +photocopiedmateri 0 1 6.957497 0.000000 6275 +sophia 0 1 6.957497 0.000000 6276 +georgiakaki 0 1 6.957497 0.000000 6277 +officehour 0 1 6.957497 0.000000 6278 +gradingexam 0 1 6.957497 0.000000 6279 +yourfin 0 1 6.957497 0.000000 6280 +policiesy 0 1 6.957497 0.000000 6281 +samegrad 0 1 6.957497 0.000000 6282 +tuesdayand 0 1 6.957497 0.000000 6283 +illeg 0 1 6.957497 0.000000 6284 +latexif 0 1 6.957497 0.000000 6285 +goodopportun 0 1 6.957497 0.000000 6286 +submissionpleas 0 1 6.957497 0.000000 6287 +clinton 0 1 6.957497 0.000000 6288 +perot 0 1 6.957497 0.000000 6289 +homeworksgrad 0 1 6.957497 0.000000 6290 +sortedalphabet 0 1 6.957497 0.000000 6291 +thecov 0 1 6.957497 0.000000 6292 +pagefollow 0 1 6.957497 0.000000 6293 +policyal 0 1 6.957497 0.000000 6294 +inwrit 0 1 6.957497 0.000000 6295 +referto 0 1 6.957497 0.000000 6296 +modelhomework 0 1 6.957497 0.000000 6297 +weightingthursdai 0 1 6.957497 0.000000 6298 +indexinghomework 0 1 6.957497 0.000000 6299 +evaluationtuesdai 0 1 6.957497 0.000000 6300 +feedbackthursdai 0 1 6.957497 0.000000 6301 +clusteringhomework 0 1 6.957497 0.000000 6302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..a15567f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +hour 1 165 1.791759 1.791759 46 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +postscript 1 131 2.079442 2.079442 90 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +text 1 98 2.302585 2.302585 133 +peopl 0 96 2.302585 0.000000 132 +stuff 1 87 2.484907 2.484907 171 +resourc 0 81 2.484907 0.000000 172 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +come 0 78 2.564949 0.000000 202 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +new 0 64 2.772589 0.000000 262 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +electron 0 47 3.091042 0.000000 379 +submit 0 39 3.258097 0.000000 440 +respons 0 37 3.332205 0.000000 476 +procedur 0 36 3.367296 0.000000 488 +print 1 34 3.401197 3.401197 503 +quot 0 29 3.583519 0.000000 582 +ask 0 28 3.610918 0.000000 597 +wai 0 25 3.737670 0.000000 662 +leav 0 21 3.912023 0.000000 772 +break 0 20 3.951244 0.000000 812 +nice 0 20 3.951244 0.000000 809 +modif 0 17 4.110874 0.000000 913 +mayb 0 15 4.248495 0.000000 1014 +convert 1 13 4.382027 4.382027 1122 +social 0 13 4.382027 0.000000 1123 +misc 0 13 4.382027 0.000000 1124 +submiss 0 11 4.553877 0.000000 1298 +recit 1 9 4.753590 4.753590 1475 +admin 0 9 4.753590 0.000000 1476 +joke 0 8 4.875197 0.000000 1620 +printer 0 8 4.875197 0.000000 1621 +ethic 0 7 5.010635 0.000000 1786 +header 0 7 5.010635 0.000000 1787 +pfile 1 3 5.857933 5.857933 3100 +sumedh 0 3 5.857933 0.000000 3101 +enscript 1 2 6.263398 6.263398 4069 +incl 0 2 6.263398 0.000000 4070 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..c2892f40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +also 0 259 1.386294 0.000000 28 +distribut 1 162 1.791759 1.791759 51 +implement 0 152 1.791759 0.000000 52 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +present 0 91 2.397895 0.000000 145 +comment 0 93 2.397895 0.000000 146 +practic 1 70 2.708050 2.708050 246 +complex 0 64 2.772589 0.000000 269 +interact 0 62 2.772589 0.000000 270 +descript 0 64 2.772589 0.000000 271 +plan 0 65 2.772589 0.000000 272 +simpl 0 60 2.833213 0.000000 298 +semest 0 58 2.890372 0.000000 312 +instruct 0 53 2.944439 0.000000 332 +tabl 0 51 2.995732 0.000000 346 +basic 0 50 3.044522 0.000000 360 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +rang 1 30 3.555348 3.555348 565 +depend 0 29 3.583519 0.000000 583 +progress 0 28 3.610918 0.000000 598 +aspect 0 25 3.737670 0.000000 663 +size 0 23 3.806662 0.000000 713 +varieti 0 22 3.850148 0.000000 740 +practicum 1 16 4.174387 4.174387 960 +choos 0 16 4.174387 0.000000 964 +signific 0 13 4.382027 0.000000 1125 +werner 0 10 4.653960 0.000000 1385 +vogel 0 8 4.875197 0.000000 1622 +earn 0 7 5.010635 0.000000 1788 +theywil 0 3 5.857933 0.000000 3102 +contentspag 0 3 5.857933 0.000000 3103 +offersa 0 2 6.263398 0.000000 4071 +systemsor 0 1 6.957497 0.000000 6303 +dirti 0 1 6.957497 0.000000 6304 +internetworkingto 0 1 6.957497 0.000000 6305 +teamsof 0 1 6.957497 0.000000 6306 +trough 0 1 6.957497 0.000000 6307 +complexityof 0 1 6.957497 0.000000 6308 +offcial 0 1 6.957497 0.000000 6309 +pageslink 0 1 6.957497 0.000000 6310 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..0e11d9b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +design 0 213 1.386294 0.000000 25 +fall 1 181 1.609438 1.609438 40 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +avail 1 169 1.791759 1.791759 48 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +problem 1 147 1.945910 1.945910 75 +professor 1 137 1.945910 1.945910 76 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +tool 1 117 2.079442 2.079442 93 +document 0 121 2.079442 0.000000 89 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +part 1 98 2.302585 2.302585 129 +need 0 98 2.302585 0.000000 135 +graphic 0 90 2.397895 0.000000 147 +follow 0 92 2.397895 0.000000 143 +homepag 0 93 2.397895 0.000000 148 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +solut 1 82 2.484907 2.484907 162 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +start 0 83 2.484907 0.000000 173 +refer 1 78 2.564949 2.564949 203 +homework 0 79 2.564949 0.000000 193 +server 0 76 2.564949 0.000000 204 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +semest 0 58 2.890372 0.000000 312 +get 0 46 3.091042 0.000000 380 +announc 0 40 3.258097 0.000000 441 +electr 0 38 3.295837 0.000000 461 +manual 0 35 3.401197 0.000000 504 +intend 0 28 3.610918 0.000000 599 +outlin 1 17 4.110874 4.110874 914 +intro 0 17 4.110874 0.000000 915 +anyon 0 17 4.110874 0.000000 916 +conduct 0 14 4.317488 0.000000 1065 +train 0 14 4.317488 0.000000 1066 +valid 0 11 4.553877 0.000000 1299 +literatur 0 11 4.553877 0.000000 1300 +surf 0 11 4.553877 0.000000 1301 +andcomput 0 8 4.875197 0.000000 1623 +architect 0 8 4.875197 0.000000 1624 +theproject 0 6 5.164786 0.000000 1981 +gopher 0 6 5.164786 0.000000 1982 +mentor 1 4 5.568345 5.568345 2591 +sole 0 4 5.568345 0.000000 2592 +addition 0 4 5.568345 0.000000 2593 +saluja 1 3 5.857933 5.857933 3104 +duedat 0 3 5.857933 0.000000 3105 +wiscinfo 0 3 5.857933 0.000000 3106 +kewal 1 2 6.263398 6.263398 4072 +studentsenrol 0 2 6.263398 0.000000 4073 +sorin 1 1 6.957497 6.957497 6311 +generalinform 0 1 6.957497 0.000000 6312 +midtermsyllabu 0 1 6.957497 0.000000 6313 +midtermi 0 1 6.957497 0.000000 6314 +caeworkst 0 1 6.957497 0.000000 6315 +whomai 0 1 6.957497 0.000000 6316 +throughbold_brows 0 1 6.957497 0.000000 6317 +gettingstart 0 1 6.957497 0.000000 6318 +workbook 0 1 6.957497 0.000000 6319 +quicksim 0 1 6.957497 0.000000 6320 +trainingworkbook 0 1 6.957497 0.000000 6321 +exersis 0 1 6.957497 0.000000 6322 +thesedocu 0 1 6.957497 0.000000 6323 +uwengin 0 1 6.957497 0.000000 6324 +pmcst 0 1 6.957497 0.000000 6325 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..bb59f9c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +pleas 0 113 2.197225 0.000000 114 +grade 1 90 2.397895 2.397895 142 +homepag 0 93 2.397895 0.000000 148 +section 0 94 2.397895 0.000000 149 +info 1 85 2.484907 2.484907 176 +exam 1 86 2.484907 2.484907 169 +stuff 0 87 2.484907 0.000000 171 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +homework 1 79 2.564949 2.564949 193 +refer 0 78 2.564949 0.000000 203 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +browser 0 56 2.890372 0.000000 313 +tabl 1 51 2.995732 2.995732 346 +run 0 51 2.995732 0.000000 347 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +abl 0 30 3.555348 0.000000 566 +frame 0 24 3.761200 0.000000 684 +handl 0 24 3.761200 0.000000 685 +individu 0 13 4.382027 0.000000 1126 +criteria 0 9 4.753590 0.000000 1477 +preced 0 3 5.857933 0.000000 3107 +goofi 0 2 6.263398 0.000000 4074 +herelink 0 1 6.957497 0.000000 6326 +motw 0 1 6.957497 0.000000 6327 +stuffnot 0 1 6.957497 0.000000 6328 +edupag 0 1 6.957497 0.000000 6329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..2fbde088 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +access 1 102 2.302585 2.302585 136 +user 0 104 2.302585 0.000000 137 +chang 1 82 2.484907 2.484907 163 +requir 0 81 2.484907 0.000000 167 +info 0 85 2.484907 0.000000 176 +materi 1 75 2.639057 2.639057 221 +window 1 68 2.708050 2.708050 242 +java 0 70 2.708050 0.000000 248 +browser 0 56 2.890372 0.000000 313 +netscap 0 44 3.135494 0.000000 395 +directori 0 45 3.135494 0.000000 396 +announc 0 40 3.258097 0.000000 441 +request 0 26 3.688879 0.000000 635 +interpret 1 24 3.761200 3.761200 686 +thu 0 21 3.912023 0.000000 773 +util 0 21 3.912023 0.000000 774 +behavior 0 18 4.060443 0.000000 881 +attempt 0 17 4.110874 0.000000 917 +pagec 0 15 4.248495 0.000000 1011 +correctli 0 9 4.753590 0.000000 1478 +dylan 1 8 4.875197 4.875197 1625 +password 0 4 5.568345 0.000000 2594 +parter 0 2 6.263398 0.000000 4075 +noodll 0 1 6.957497 0.000000 6330 +inconsist 0 1 6.957497 0.000000 6331 +partnerjoin 0 1 6.957497 0.000000 6332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..dbc1eb7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,360 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 0 299 1.098612 0.000000 13 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +email 0 220 1.386294 0.000000 29 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +assign 2 135 1.945910 3.891820 66 +problem 2 147 1.945910 3.891820 75 +object 2 138 1.945910 3.891820 79 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +note 0 142 1.945910 0.000000 67 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +site 2 106 2.197225 4.394450 119 +topic 1 114 2.197225 2.197225 110 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +person 1 111 2.197225 2.197225 117 +teach 0 108 2.197225 0.000000 112 +mathemat 0 108 2.197225 0.000000 123 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +structur 0 106 2.197225 0.000000 105 +techniqu 1 99 2.302585 2.302585 138 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +access 0 102 2.302585 0.000000 136 +peopl 0 96 2.302585 0.000000 132 +memori 0 101 2.302585 0.000000 139 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +comment 0 93 2.397895 0.000000 146 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +info 0 85 2.484907 0.000000 176 +contain 0 81 2.484907 0.000000 174 +chang 0 82 2.484907 0.000000 163 +learn 0 86 2.484907 0.000000 170 +solut 0 82 2.484907 0.000000 162 +environ 0 84 2.484907 0.000000 177 +orient 1 80 2.564949 2.564949 205 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +state 1 76 2.564949 2.564949 207 +dynam 0 76 2.564949 0.000000 194 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +complet 0 77 2.564949 0.000000 208 +come 0 78 2.564949 0.000000 202 +server 0 76 2.564949 0.000000 204 +optim 0 79 2.564949 0.000000 197 +tuesdai 1 73 2.639057 2.639057 219 +materi 1 75 2.639057 2.639057 221 +upson 1 71 2.639057 2.639057 218 +name 1 72 2.639057 2.639057 220 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +thursdai 1 70 2.708050 2.708050 241 +goal 0 66 2.708050 0.000000 250 +window 0 68 2.708050 0.000000 242 +would 0 67 2.708050 0.000000 251 +function 1 62 2.772589 2.772589 275 +evalu 1 64 2.772589 2.772589 266 +abstract 1 62 2.772589 2.772589 276 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +creat 0 63 2.772589 0.000000 277 +written 0 63 2.772589 0.000000 278 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +best 1 59 2.833213 2.833213 299 +colleg 0 61 2.833213 0.000000 300 +type 0 61 2.833213 0.000000 296 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +variou 0 56 2.890372 0.000000 317 +reason 0 57 2.890372 0.000000 318 +cover 1 55 2.944439 2.944439 329 +allow 0 53 2.944439 0.000000 333 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +date 1 51 2.995732 2.995732 344 +week 0 52 2.995732 0.000000 343 +run 0 51 2.995732 0.000000 347 +set 2 50 3.044522 6.089044 361 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +even 1 45 3.135494 3.135494 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +http 0 41 3.218876 0.000000 420 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +late 1 40 3.258097 3.258097 439 +must 1 40 3.258097 3.258097 442 +submit 1 39 3.258097 3.258097 440 +correct 1 38 3.295837 3.295837 462 +credit 0 38 3.295837 0.000000 460 +respons 0 37 3.332205 0.000000 476 +hand 0 37 3.332205 0.000000 475 +staff 1 36 3.367296 3.367296 490 +procedur 1 36 3.367296 3.367296 488 +ofth 0 36 3.367296 0.000000 491 +download 0 36 3.367296 0.000000 489 +tree 0 36 3.367296 0.000000 492 +post 1 35 3.401197 3.401197 505 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +word 0 34 3.401197 0.000000 508 +approxim 0 35 3.401197 0.000000 509 +return 0 34 3.401197 0.000000 502 +singl 0 34 3.401197 0.000000 510 +random 0 34 3.401197 0.000000 511 +concept 0 32 3.465736 0.000000 537 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +express 0 32 3.465736 0.000000 540 +often 0 31 3.496508 0.000000 551 +rang 0 30 3.555348 0.000000 565 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +focus 0 29 3.583519 0.000000 584 +ask 0 28 3.610918 0.000000 597 +held 0 28 3.610918 0.000000 600 +campu 0 27 3.637586 0.000000 623 +symbol 0 27 3.637586 0.000000 620 +request 1 26 3.688879 3.688879 635 +rule 0 26 3.688879 0.000000 638 +background 0 25 3.737670 0.000000 664 +valu 0 25 3.737670 0.000000 665 +consult 1 24 3.761200 3.761200 687 +reach 1 24 3.761200 3.761200 688 +pattern 0 24 3.761200 0.000000 689 +interpret 0 24 3.761200 0.000000 686 +higher 0 24 3.761200 0.000000 690 +togeth 1 23 3.806662 3.806662 714 +variabl 0 23 3.806662 0.000000 715 +inth 0 22 3.850148 0.000000 741 +almost 0 22 3.850148 0.000000 742 +period 0 22 3.850148 0.000000 743 +thu 1 21 3.912023 3.912023 773 +annot 0 21 3.912023 0.000000 775 +half 0 21 3.912023 0.000000 776 +tell 0 21 3.912023 0.000000 777 +sure 1 20 3.951244 3.951244 813 +facil 1 20 3.951244 3.951244 814 +wonder 0 20 3.951244 0.000000 815 +break 0 20 3.951244 0.000000 812 +exercis 0 19 4.007333 0.000000 842 +els 0 19 4.007333 0.000000 843 +accept 1 18 4.060443 4.060443 879 +encourag 0 18 4.060443 0.000000 880 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +weekli 0 17 4.110874 0.000000 919 +macintosh 0 17 4.110874 0.000000 920 +outlin 0 17 4.110874 0.000000 914 +differenti 0 17 4.110874 0.000000 921 +match 0 16 4.174387 0.000000 965 +modern 0 16 4.174387 0.000000 966 +transfer 0 16 4.174387 0.000000 967 +earli 0 16 4.174387 0.000000 968 +easi 0 16 4.174387 0.000000 969 +stream 1 15 4.248495 4.248495 1015 +capabl 0 15 4.248495 0.000000 1016 +score 0 15 4.248495 0.000000 1017 +hierarch 0 15 4.248495 0.000000 1018 +borland 0 14 4.317488 0.000000 1067 +warn 0 14 4.317488 0.000000 1068 +polynomi 0 14 4.317488 0.000000 1069 +recurs 1 13 4.382027 4.382027 1127 +someon 1 13 4.382027 4.382027 1128 +suit 0 13 4.382027 0.000000 1129 +joint 0 13 4.382027 0.000000 1130 +skill 1 12 4.465908 4.465908 1205 +prelim 1 12 4.465908 4.465908 1201 +iter 0 12 4.465908 0.000000 1206 +broad 1 11 4.553877 4.553877 1302 +appl 0 11 4.553877 0.000000 1303 +induct 0 11 4.553877 0.000000 1304 +queue 1 10 4.653960 4.653960 1386 +sundai 0 10 4.653960 0.000000 1387 +arithmet 0 10 4.653960 0.000000 1388 +stack 0 10 4.653960 0.000000 1389 +introductori 1 9 4.753590 4.753590 1479 +recit 0 9 4.753590 0.000000 1475 +preliminari 0 9 4.753590 0.000000 1480 +andth 0 9 4.753590 0.000000 1481 +dylan 2 8 4.875197 9.750394 1625 +simpli 0 8 4.875197 0.000000 1626 +matter 0 8 4.875197 0.000000 1627 +on 0 8 4.875197 0.000000 1628 +justin 1 7 5.010635 5.010635 1789 +happen 0 7 5.010635 0.000000 1790 +dispatch 0 7 5.010635 0.000000 1791 +prioriti 0 7 5.010635 0.000000 1792 +huttenloch 1 6 5.164786 5.164786 1983 +chosen 0 6 5.164786 0.000000 1984 +contract 0 6 5.164786 0.000000 1985 +garbag 0 6 5.164786 0.000000 1986 +conot 1 5 5.347108 5.347108 2245 +hardcopi 0 5 5.347108 0.000000 2246 +substitut 0 5 5.347108 0.000000 2247 +variat 0 5 5.347108 0.000000 2248 +password 1 4 5.568345 5.568345 2594 +toth 1 4 5.568345 5.568345 2595 +infinit 1 4 5.568345 5.568345 2596 +clearli 0 4 5.568345 0.000000 2590 +wherea 0 4 5.568345 0.000000 2597 +exposur 0 4 5.568345 0.000000 2598 +midnight 0 4 5.568345 0.000000 2599 +amaz 0 4 5.568345 0.000000 2600 +thiscours 0 4 5.568345 0.000000 2601 +catch 0 4 5.568345 0.000000 2602 +illus 0 4 5.568345 0.000000 2603 +szewczyk 1 3 5.857933 5.857933 3108 +voskuhl 1 3 5.857933 5.857933 3109 +useth 0 3 5.857933 0.000000 3110 +programsand 0 3 5.857933 0.000000 3111 +toolbox 0 3 5.857933 0.000000 3112 +programmingtechniqu 0 3 5.857933 0.000000 3113 +kimbal 0 3 5.857933 0.000000 3114 +andon 0 3 5.857933 0.000000 3115 +requirementsstud 0 3 5.857933 0.000000 3116 +immedi 0 3 5.857933 0.000000 3117 +jointli 0 3 5.857933 0.000000 3118 +doubt 0 3 5.857933 0.000000 3119 +argument 0 3 5.857933 0.000000 3120 +quotat 0 3 5.857933 0.000000 3121 +inherit 0 3 5.857933 0.000000 3122 +heap 0 3 5.857933 0.000000 3123 +exit 0 3 5.857933 0.000000 3124 +rangeof 1 2 6.263398 6.263398 4076 +standalon 1 2 6.263398 6.263398 4077 +developedat 0 2 6.263398 0.000000 4078 +orientedlanguag 0 2 6.263398 0.000000 4079 +therewil 0 2 6.263398 0.000000 4080 +combinationof 0 2 6.263398 0.000000 4081 +programmingproblem 0 2 6.263398 0.000000 4082 +youwork 0 2 6.263398 0.000000 4083 +growth 0 2 6.263398 0.000000 4084 +ugrad 1 1 6.957497 6.957497 6333 +idand 1 1 6.957497 6.957497 6334 +tobia 1 1 6.957497 6.957497 6335 +mayr 1 1 6.957497 6.957497 6336 +hamblin 1 1 6.957497 6.957497 6337 +mutabl 1 1 6.957497 6.957497 6338 +informationaugust 0 1 6.957497 0.000000 6339 +courseabout 0 1 6.957497 0.000000 6340 +notationthat 0 1 6.957497 0.000000 6341 +takec 0 1 6.957497 0.000000 6342 +programmingparadigm 0 1 6.957497 0.000000 6343 +imperativeprogram 0 1 6.957497 0.000000 6344 +goodform 0 1 6.957497 0.000000 6345 +probablytak 0 1 6.957497 0.000000 6346 +questionsor 0 1 6.957497 0.000000 6347 +serverwhich 0 1 6.957497 0.000000 6348 +answersa 0 1 6.957497 0.000000 6349 +thisweek 0 1 6.957497 0.000000 6350 +edubut 0 1 6.957497 0.000000 6351 +aboutproblem 0 1 6.957497 0.000000 6352 +upsonjam 0 1 6.957497 0.000000 6353 +tarobert 0 1 6.957497 0.000000 6354 +tajustin 0 1 6.957497 0.000000 6355 +taandra 0 1 6.957497 0.000000 6356 +ferencz 0 1 6.957497 0.000000 6357 +melissa 0 1 6.957497 0.000000 6358 +consultantwhen 0 1 6.957497 0.000000 6359 +meetlectur 0 1 6.957497 0.000000 6360 +andrecit 0 1 6.957497 0.000000 6361 +recitationsexpand 0 1 6.957497 0.000000 6362 +opportunityto 0 1 6.957497 0.000000 6363 +eachproblem 0 1 6.957497 0.000000 6364 +setsdu 0 1 6.957497 0.000000 6365 +mondayeven 0 1 6.957497 0.000000 6366 +consultinghour 0 1 6.957497 0.000000 6367 +voskuhltba 0 1 6.957497 0.000000 6368 +materialsther 0 1 6.957497 0.000000 6369 +handoutsand 0 1 6.957497 0.000000 6370 +implementedin 0 1 6.957497 0.000000 6371 +downloadonto 0 1 6.957497 0.000000 6372 +ontoyour 0 1 6.957497 0.000000 6373 +recentvers 0 1 6.957497 0.000000 6374 +gradeswil 0 1 6.957497 0.000000 6375 +thetot 0 1 6.957497 0.000000 6376 +willgener 0 1 6.957497 0.000000 6377 +followingclass 0 1 6.957497 0.000000 6378 +sittingdown 0 1 6.957497 0.000000 6379 +sink 0 1 6.957497 0.000000 6380 +beforesit 0 1 6.957497 0.000000 6381 +workmuch 0 1 6.957497 0.000000 6382 +jointassign 0 1 6.957497 0.000000 6383 +circumstancesmai 0 1 6.957497 0.000000 6384 +yourown 0 1 6.957497 0.000000 6385 +yougot 0 1 6.957497 0.000000 6386 +whenpeopl 0 1 6.957497 0.000000 6387 +lifeunpleas 0 1 6.957497 0.000000 6388 +facilitiescit 0 1 6.957497 0.000000 6389 +andpc 0 1 6.957497 0.000000 6390 +upsonmac 0 1 6.957497 0.000000 6391 +datesal 0 1 6.957497 0.000000 6392 +mondaynight 0 1 6.957497 0.000000 6393 +submityour 0 1 6.957497 0.000000 6394 +multimethod 0 1 6.957497 0.000000 6395 +heapsort 0 1 6.957497 0.000000 6396 +metacircular 0 1 6.957497 0.000000 6397 +nonloc 0 1 6.957497 0.000000 6398 +throw 0 1 6.957497 0.000000 6399 +quicksort 0 1 6.957497 0.000000 6400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..f517ed52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +cours 2 273 1.098612 2.197224 15 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +email 1 220 1.386294 1.386294 29 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +includ 1 208 1.609438 1.609438 42 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +document 0 121 2.079442 0.000000 89 +send 1 114 2.197225 2.197225 109 +pleas 1 113 2.197225 2.197225 114 +check 0 115 2.197225 0.000000 118 +memori 0 101 2.302585 0.000000 139 +section 1 94 2.397895 2.397895 149 +follow 0 92 2.397895 0.000000 143 +help 1 83 2.484907 2.484907 175 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +homework 1 79 2.564949 2.564949 193 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +materi 1 75 2.639057 2.639057 221 +organ 0 65 2.772589 0.000000 265 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +allow 1 53 2.944439 2.944439 333 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +date 1 51 2.995732 2.995732 344 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +made 0 44 3.135494 0.000000 398 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +post 0 35 3.401197 0.000000 505 +bookmark 0 26 3.688879 0.000000 639 +request 0 26 3.688879 0.000000 635 +consult 0 24 3.761200 0.000000 687 +hierarchi 0 22 3.850148 0.000000 744 +sequenti 0 22 3.850148 0.000000 745 +annot 1 21 3.912023 3.912023 775 +path 0 21 3.912023 0.000000 778 +unit 0 21 3.912023 0.000000 779 +separ 0 19 4.007333 0.000000 844 +appropri 0 18 4.060443 0.000000 883 +account 0 18 4.060443 0.000000 882 +otherwis 0 17 4.110874 0.000000 922 +sign 0 16 4.174387 0.000000 970 +circuit 0 13 4.382027 0.000000 1131 +difficulti 0 13 4.382027 0.000000 1132 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +assembl 0 12 4.465908 0.000000 1207 +combinatori 0 8 4.875197 0.000000 1629 +interrupt 0 7 5.010635 0.000000 1793 +saturdai 0 7 5.010635 0.000000 1794 +hidden 0 6 5.164786 0.000000 1987 +conot 1 5 5.347108 5.347108 2245 +registr 1 5 5.347108 5.347108 2249 +microprogram 0 4 5.568345 0.000000 2604 +eickenfal 0 3 5.857933 0.000000 3125 +kimbal 0 3 5.857933 0.000000 3114 +helpif 0 3 5.857933 0.000000 3126 +mate 0 3 5.857933 0.000000 3127 +encount 0 3 5.857933 0.000000 3128 +btopic 0 2 6.263398 0.000000 4085 +organizationthorsten 0 1 6.957497 0.000000 6401 +materialsal 0 1 6.957497 0.000000 6402 +listlist 0 1 6.957497 0.000000 6403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..8df82188 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +hour 1 165 1.791759 1.791759 46 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +postscript 1 131 2.079442 2.079442 90 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +text 1 98 2.302585 2.302585 133 +peopl 0 96 2.302585 0.000000 132 +stuff 1 87 2.484907 2.484907 171 +resourc 0 81 2.484907 0.000000 172 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +mondai 0 77 2.564949 0.000000 206 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +new 0 64 2.772589 0.000000 262 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +electron 0 47 3.091042 0.000000 379 +submit 0 39 3.258097 0.000000 440 +respons 0 37 3.332205 0.000000 476 +procedur 0 36 3.367296 0.000000 488 +print 1 34 3.401197 3.401197 503 +quot 0 29 3.583519 0.000000 582 +held 0 28 3.610918 0.000000 600 +ask 0 28 3.610918 0.000000 597 +wai 0 25 3.737670 0.000000 662 +leav 0 21 3.912023 0.000000 772 +break 0 20 3.951244 0.000000 812 +nice 0 20 3.951244 0.000000 809 +modif 0 17 4.110874 0.000000 913 +mayb 0 15 4.248495 0.000000 1014 +floor 0 14 4.317488 0.000000 1070 +convert 1 13 4.382027 4.382027 1122 +social 0 13 4.382027 0.000000 1123 +misc 0 13 4.382027 0.000000 1124 +submiss 0 11 4.553877 0.000000 1298 +recit 1 9 4.753590 4.753590 1475 +admin 0 9 4.753590 0.000000 1476 +joke 0 8 4.875197 0.000000 1620 +printer 0 8 4.875197 0.000000 1621 +ethic 0 7 5.010635 0.000000 1786 +header 0 7 5.010635 0.000000 1787 +pfile 1 3 5.857933 5.857933 3100 +sumedh 0 3 5.857933 0.000000 3101 +enscript 1 2 6.263398 6.263398 4069 +incl 0 2 6.263398 0.000000 4070 +csuglab 0 1 6.957497 0.000000 6404 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..b8b124a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +note 0 142 1.945910 0.000000 67 +structur 0 106 2.197225 0.000000 105 +solut 2 82 2.484907 4.969814 162 +dynam 0 76 2.564949 0.000000 194 +upson 1 71 2.639057 2.639057 218 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +new 0 64 2.772589 0.000000 262 +unix 0 58 2.890372 0.000000 308 +fridai 0 44 3.135494 0.000000 390 +static 0 27 3.637586 0.000000 619 +practicum 0 16 4.174387 0.000000 960 +pagec 0 15 4.248495 0.000000 1011 +prelim 1 12 4.465908 4.465908 1201 +huang 0 12 4.465908 0.000000 1202 +systemsc 0 11 4.553877 0.000000 1293 +cheng 0 10 4.653960 0.000000 1381 +lili 0 5 5.347108 0.000000 2240 +filesystem 0 4 5.568345 0.000000 2587 +groupcours 0 3 5.857933 0.000000 3092 +ychuang 0 3 5.857933 0.000000 3093 +budiu 1 2 6.263398 6.263398 4042 +systemkenneth 0 2 6.263398 0.000000 4043 +birmanc 0 2 6.263398 0.000000 4044 +syllabuslectur 0 2 6.263398 0.000000 4045 +taslili 0 2 6.263398 0.000000 4046 +mihai 0 2 6.263398 0.000000 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..aef64792 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +includ 0 208 1.609438 0.000000 42 +avail 1 169 1.791759 1.791759 48 +read 0 154 1.791759 0.000000 47 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +postscript 1 131 2.079442 2.079442 90 +welcom 0 122 2.079442 0.000000 99 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +look 0 107 2.197225 0.000000 115 +need 0 98 2.302585 0.000000 135 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +solut 1 82 2.484907 2.484907 162 +academ 0 82 2.484907 0.000000 178 +chang 0 82 2.484907 0.000000 163 +start 0 83 2.484907 0.000000 173 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +complet 0 77 2.564949 0.000000 208 +materi 1 75 2.639057 2.639057 221 +intellig 0 72 2.639057 0.000000 225 +appli 0 71 2.639057 0.000000 226 +tuesdai 0 73 2.639057 0.000000 219 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +result 0 65 2.772589 0.000000 281 +special 0 56 2.890372 0.000000 320 +found 0 53 2.944439 0.000000 337 +date 0 51 2.995732 0.000000 344 +still 1 50 3.044522 3.044522 362 +right 0 48 3.044522 0.000000 363 +move 0 47 3.091042 0.000000 382 +get 0 46 3.091042 0.000000 380 +netscap 0 44 3.135494 0.000000 395 +midterm 0 45 3.135494 0.000000 392 +offer 1 43 3.178054 3.178054 414 +third 0 43 3.178054 0.000000 412 +announc 0 40 3.258097 0.000000 441 +either 0 35 3.401197 0.000000 506 +queri 0 33 3.433987 0.000000 524 +posit 0 31 3.496508 0.000000 552 +abl 0 30 3.555348 0.000000 566 +specifi 0 30 3.555348 0.000000 568 +exist 0 30 3.555348 0.000000 569 +limit 0 29 3.583519 0.000000 585 +load 0 28 3.610918 0.000000 601 +concern 0 25 3.737670 0.000000 666 +begin 0 23 3.806662 0.000000 716 +variabl 0 23 3.806662 0.000000 715 +defin 0 22 3.850148 0.000000 746 +fact 1 21 3.912023 3.912023 780 +longer 0 20 3.951244 0.000000 816 +assum 0 19 4.007333 0.000000 845 +scott 1 18 4.060443 4.060443 884 +account 0 18 4.060443 0.000000 882 +statu 0 18 4.060443 0.000000 885 +previous 0 17 4.110874 0.000000 923 +otherwis 0 17 4.110874 0.000000 922 +alreadi 0 16 4.174387 0.000000 963 +portion 0 16 4.174387 0.000000 971 +atth 0 15 4.248495 0.000000 1019 +success 0 10 4.653960 0.000000 1390 +guarante 0 10 4.653960 0.000000 1391 +kevin 1 9 4.753590 4.753590 1482 +slightli 0 7 5.010635 0.000000 1795 +paramet 0 7 5.010635 0.000000 1796 +planner 0 7 5.010635 0.000000 1797 +hunt 0 7 5.010635 0.000000 1798 +remind 0 7 5.010635 0.000000 1799 +schema 0 6 5.164786 0.000000 1988 +bind 1 5 5.347108 5.347108 2250 +constant 0 5 5.347108 0.000000 2251 +seriou 0 5 5.347108 0.000000 2252 +clarif 0 5 5.347108 0.000000 2253 +clair 1 4 5.568345 5.568345 2605 +shouldn 0 4 5.568345 0.000000 2606 +unless 0 4 5.568345 0.000000 2607 +meanwhil 0 3 5.857933 0.000000 3129 +worri 0 3 5.857933 0.000000 3130 +add 0 3 5.857933 0.000000 3131 +thec 0 3 5.857933 0.000000 3132 +pagesc 0 3 5.857933 0.000000 3133 +pagecsfound 0 2 6.263398 0.000000 4086 +dodg 1 1 6.957497 6.957497 6405 +notethat 0 1 6.957497 0.000000 6406 +rubix 0 1 6.957497 0.000000 6407 +thefunct 0 1 6.957497 0.000000 6408 +rearrang 0 1 6.957497 0.000000 6409 +appeas 0 1 6.957497 0.000000 6410 +var 0 1 6.957497 0.000000 6411 +bracket 0 1 6.957497 0.000000 6412 +youus 0 1 6.957497 0.000000 6413 +machinesshould 0 1 6.957497 0.000000 6414 +sbin 0 1 6.957497 0.000000 6415 +ksaunder 0 1 6.957497 0.000000 6416 +sbinfor 0 1 6.957497 0.000000 6417 +gremlin 0 1 6.957497 0.000000 6418 +codefor 0 1 6.957497 0.000000 6419 +andget 0 1 6.957497 0.000000 6420 +uponcomplet 0 1 6.957497 0.000000 6421 +thoseus 0 1 6.957497 0.000000 6422 +zeroon 0 1 6.957497 0.000000 6423 +asspecifi 0 1 6.957497 0.000000 6424 +oneassign 0 1 6.957497 0.000000 6425 +vanto 0 1 6.957497 0.000000 6426 +thisclarif 0 1 6.957497 0.000000 6427 +newhomework 0 1 6.957497 0.000000 6428 +coursemateri 0 1 6.957497 0.000000 6429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..94457dc3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +note 1 142 1.945910 1.945910 67 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +studi 0 120 2.079442 0.000000 91 +pleas 0 113 2.197225 0.000000 114 +exam 0 86 2.484907 0.000000 169 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +homework 1 79 2.564949 2.564949 193 +integr 0 67 2.708050 0.000000 245 +room 0 59 2.833213 0.000000 301 +date 0 51 2.995732 0.000000 344 +set 0 50 3.044522 0.000000 361 +revis 0 26 3.688879 0.000000 640 +pagec 0 15 4.248495 0.000000 1011 +automata 0 13 4.382027 0.000000 1135 +prelim 1 12 4.465908 4.465908 1201 +hardcopi 0 5 5.347108 0.000000 2246 +incorrect 0 3 5.857933 0.000000 3134 +nikolai 0 2 6.263398 0.000000 4087 +theorywelcom 0 1 6.957497 0.000000 6430 +guideannounc 0 1 6.957497 0.000000 6431 +erratum 0 1 6.957497 0.000000 6432 +hourscod 0 1 6.957497 0.000000 6433 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..91b2c75b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +paper 2 205 1.609438 3.218876 38 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +includ 0 208 1.609438 0.000000 42 +develop 2 174 1.791759 3.583518 53 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +click 1 142 1.945910 1.945910 78 +assign 1 135 1.945910 1.945910 66 +area 1 144 1.945910 1.945910 80 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +databas 2 122 2.079442 4.158884 86 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +schedul 0 119 2.079442 0.000000 85 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +associ 1 93 2.397895 2.397895 151 +proceed 0 93 2.397895 0.000000 152 +homepag 0 93 2.397895 0.000000 148 +question 0 91 2.397895 0.000000 141 +exam 2 86 2.484907 4.969814 169 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +info 0 85 2.484907 0.000000 176 +activ 0 84 2.484907 0.000000 182 +journal 0 83 2.484907 0.000000 183 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +control 0 82 2.484907 0.000000 164 +resourc 0 81 2.484907 0.000000 172 +refer 2 78 2.564949 5.129898 203 +complet 1 77 2.564949 2.564949 208 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +homework 0 79 2.564949 0.000000 193 +appear 0 78 2.564949 0.000000 210 +come 0 78 2.564949 0.000000 202 +optim 0 79 2.564949 0.000000 197 +know 0 80 2.564949 0.000000 198 +materi 1 75 2.639057 2.639057 221 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +write 1 72 2.639057 2.639057 222 +tuesdai 1 73 2.639057 2.639057 219 +addit 0 74 2.639057 0.000000 228 +meet 0 72 2.639057 0.000000 229 +free 0 73 2.639057 0.000000 224 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +goal 1 66 2.708050 2.708050 250 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +import 1 65 2.772589 2.772589 282 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +evalu 0 64 2.772589 0.000000 266 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +interact 0 62 2.772589 0.000000 270 +collect 0 65 2.772589 0.000000 268 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +type 0 61 2.833213 0.000000 296 +detail 1 57 2.890372 2.890372 321 +variou 0 56 2.890372 0.000000 317 +semest 0 58 2.890372 0.000000 312 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +cover 1 55 2.944439 2.944439 329 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +much 0 52 2.995732 0.000000 349 +date 0 51 2.995732 0.000000 344 +basic 1 50 3.044522 3.044522 360 +archiv 0 49 3.044522 0.000000 364 +give 0 50 3.044522 0.000000 359 +principl 0 48 3.044522 0.000000 357 +standard 0 48 3.044522 0.000000 365 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +understand 0 47 3.091042 0.000000 384 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +even 0 45 3.135494 0.000000 393 +term 1 43 3.178054 3.178054 411 +offer 0 43 3.178054 0.000000 414 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +edit 1 42 3.218876 3.218876 418 +compani 0 41 3.218876 0.000000 423 +examin 0 42 3.218876 0.000000 424 +howev 0 41 3.218876 0.000000 422 +review 0 42 3.218876 0.000000 425 +might 0 41 3.218876 0.000000 426 +transact 1 39 3.258097 3.258097 438 +form 1 39 3.258097 3.258097 443 +realli 0 40 3.258097 0.000000 444 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +prototyp 1 38 3.295837 3.295837 463 +industri 0 38 3.295837 0.000000 464 +close 0 38 3.295837 0.000000 465 +mean 0 37 3.332205 0.000000 477 +hand 0 37 3.332205 0.000000 475 +survei 1 35 3.401197 3.401197 513 +singl 0 34 3.401197 0.000000 510 +michael 0 35 3.401197 0.000000 514 +concurr 0 34 3.401197 0.000000 501 +queri 1 33 3.433987 3.433987 524 +taught 1 33 3.433987 3.433987 526 +concept 1 32 3.465736 3.465736 537 +storag 0 31 3.496508 0.000000 553 +someth 0 31 3.496508 0.000000 554 +compon 1 30 3.555348 3.555348 570 +domain 0 30 3.555348 0.000000 564 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +produc 0 30 3.555348 0.000000 572 +turn 1 29 3.583519 3.583519 586 +depend 1 29 3.583519 3.583519 583 +semant 0 29 3.583519 0.000000 587 +propos 1 28 3.610918 3.610918 602 +becom 1 28 3.610918 3.610918 603 +intend 0 28 3.610918 0.000000 599 +actual 0 28 3.610918 0.000000 604 +progress 0 28 3.610918 0.000000 598 +manipul 0 27 3.637586 0.000000 624 +team 0 27 3.637586 0.000000 625 +altern 0 26 3.688879 0.000000 641 +background 1 25 3.737670 3.737670 664 +fundament 1 25 3.737670 3.737670 661 +although 0 25 3.737670 0.000000 667 +toward 0 25 3.737670 0.000000 668 +primari 0 25 3.737670 0.000000 669 +hill 0 25 3.737670 0.000000 670 +aspect 0 25 3.737670 0.000000 663 +alwai 0 24 3.761200 0.000000 691 +wish 0 24 3.761200 0.000000 692 +higher 0 24 3.761200 0.000000 690 +store 0 24 3.761200 0.000000 693 +begin 0 23 3.806662 0.000000 716 +initi 0 23 3.806662 0.000000 717 +lead 0 23 3.806662 0.000000 718 +tent 1 22 3.850148 3.850148 739 +disk 0 22 3.850148 0.000000 747 +recommend 0 22 3.850148 0.000000 737 +finish 0 22 3.850148 0.000000 748 +period 0 22 3.850148 0.000000 743 +among 0 21 3.912023 0.000000 781 +half 0 21 3.912023 0.000000 776 +prerequisit 0 19 4.007333 0.000000 846 +thur 0 19 4.007333 0.000000 847 +lower 1 18 4.060443 4.060443 886 +minim 0 18 4.060443 0.000000 887 +demo 0 18 4.060443 0.000000 888 +stat 1 17 4.110874 4.110874 924 +whether 1 17 4.110874 4.110874 918 +anyon 0 17 4.110874 0.000000 916 +choos 1 16 4.174387 4.174387 964 +ramakrishnan 0 16 4.174387 0.000000 972 +piec 1 15 4.248495 4.248495 1020 +contribut 0 15 4.248495 0.000000 1021 +hopefulli 1 14 4.317488 4.317488 1071 +convent 1 14 4.317488 4.317488 1072 +demand 0 14 4.317488 0.000000 1073 +manner 0 14 4.317488 0.000000 1074 +decid 0 14 4.317488 0.000000 1075 +dbm 1 13 4.382027 4.382027 1136 +essenti 0 13 4.382027 0.000000 1137 +signific 0 13 4.382027 0.000000 1125 +step 0 13 4.382027 0.000000 1138 +introduc 0 13 4.382027 0.000000 1139 +earlier 0 13 4.382027 0.000000 1140 +prelim 1 12 4.465908 4.465908 1201 +amount 1 12 4.465908 4.465908 1208 +grow 0 12 4.465908 0.000000 1209 +workload 0 12 4.465908 0.000000 1210 +buffer 0 12 4.465908 0.000000 1211 +raghu 0 12 4.465908 0.000000 1212 +sens 0 11 4.553877 0.000000 1305 +submiss 0 11 4.553877 0.000000 1298 +benjamin 0 11 4.553877 0.000000 1296 +modular 1 10 4.653960 4.653960 1392 +forc 0 10 4.653960 0.000000 1384 +certain 0 10 4.653960 0.000000 1393 +introductori 1 9 4.753590 4.753590 1479 +herefor 1 9 4.753590 4.753590 1483 +morgan 1 9 4.753590 4.753590 1484 +familiar 0 9 4.753590 0.000000 1485 +suitabl 0 9 4.753590 0.000000 1486 +rel 0 9 4.753590 0.000000 1487 +clear 0 9 4.753590 0.000000 1488 +recoveri 0 9 4.753590 0.000000 1474 +databasesystem 0 8 4.875197 0.000000 1617 +cum 0 8 4.875197 0.000000 1619 +awar 0 7 5.010635 0.000000 1800 +bug 0 7 5.010635 0.000000 1801 +fromth 0 7 5.010635 0.000000 1802 +seshadri 0 7 5.010635 0.000000 1803 +noon 0 7 5.010635 0.000000 1804 +consequ 1 6 5.164786 5.164786 1989 +neither 1 6 5.164786 5.164786 1990 +huge 0 6 5.164786 0.000000 1991 +ifyou 0 6 5.164786 0.000000 1992 +beta 0 6 5.164786 0.000000 1993 +silberschatz 0 6 5.164786 0.000000 1978 +lack 0 6 5.164786 0.000000 1994 +tobe 0 6 5.164786 0.000000 1995 +praveen 0 6 5.164786 0.000000 1996 +kaufmann 1 5 5.347108 5.347108 2254 +solid 0 5 5.347108 0.000000 2255 +valuabl 0 5 5.347108 0.000000 2256 +thrive 0 5 5.347108 0.000000 2257 +greater 0 5 5.347108 0.000000 2258 +fraction 0 5 5.347108 0.000000 2259 +interestedin 0 5 5.347108 0.000000 2260 +categori 0 5 5.347108 0.000000 2261 +mcgraw 0 5 5.347108 0.000000 2262 +minibas 1 4 5.568345 5.568345 2608 +surprisingli 0 4 5.568345 0.000000 2609 +behind 0 4 5.568345 0.000000 2610 +suppli 0 4 5.568345 0.000000 2611 +thiscours 0 4 5.568345 0.000000 2601 +asystem 0 4 5.568345 0.000000 2612 +enrol 0 4 5.568345 0.000000 2613 +twice 0 4 5.568345 0.000000 2614 +fold 0 4 5.568345 0.000000 2615 +thati 0 4 5.568345 0.000000 2616 +predat 1 3 5.857933 5.857933 3135 +comfort 1 3 5.857933 5.857933 3136 +giant 0 3 5.857933 0.000000 3137 +explos 0 3 5.857933 0.000000 3138 +alon 0 3 5.857933 0.000000 3139 +scratch 0 3 5.857933 0.000000 3140 +parser 0 3 5.857933 0.000000 3141 +aproject 0 3 5.857933 0.000000 3142 +bibl 0 3 5.857933 0.000000 3143 +confus 0 3 5.857933 0.000000 3144 +weitsang 1 2 6.263398 6.263398 4088 +databasemanag 0 2 6.263398 0.000000 4089 +certainli 0 2 6.263398 0.000000 4090 +proportion 0 2 6.263398 0.000000 4091 +thefirst 0 2 6.263398 0.000000 4092 +youto 0 2 6.263398 0.000000 4093 +builton 0 2 6.263398 0.000000 4094 +thehigh 0 2 6.263398 0.000000 4095 +korth 0 2 6.263398 0.000000 4051 +secondedit 0 2 6.263398 0.000000 4096 +ingr 0 2 6.263398 0.000000 4097 +elmasri 0 2 6.263398 0.000000 4059 +grai 0 2 6.263398 0.000000 4098 +reuter 0 2 6.263398 0.000000 4099 +likewis 0 2 6.263398 0.000000 4100 +confirm 0 2 6.263398 0.000000 4101 +outsidefirewal 1 1 6.957497 6.957497 6434 +stonebrak 1 1 6.957497 6.957497 6435 +samplequest 0 1 6.957497 0.000000 6436 +predatordbm 0 1 6.957497 0.000000 6437 +currentproject 0 1 6.957497 0.000000 6438 +coursedescript 0 1 6.957497 0.000000 6439 +intendedto 0 1 6.957497 0.000000 6440 +slargest 0 1 6.957497 0.000000 6441 +piecesof 0 1 6.957497 0.000000 6442 +knowledgeabledatabas 0 1 6.957497 0.000000 6443 +researchcommun 0 1 6.957497 0.000000 6444 +addressedbecaus 0 1 6.957497 0.000000 6445 +informedus 0 1 6.957497 0.000000 6446 +teller 0 1 6.957497 0.000000 6447 +newcours 0 1 6.957497 0.000000 6448 +quickreview 0 1 6.957497 0.000000 6449 +abreadth 0 1 6.957497 0.000000 6450 +advancedtop 0 1 6.957497 0.000000 6451 +thepurpos 0 1 6.957497 0.000000 6452 +coursei 0 1 6.957497 0.000000 6453 +weeksaft 0 1 6.957497 0.000000 6454 +requireread 0 1 6.957497 0.000000 6455 +engineeringlibrari 0 1 6.957497 0.000000 6456 +pursueaddit 0 1 6.957497 0.000000 6457 +forinform 0 1 6.957497 0.000000 6458 +examtim 0 1 6.957497 0.000000 6459 +developmentproject 0 1 6.957497 0.000000 6460 +involvea 0 1 6.957497 0.000000 6461 +wishto 0 1 6.957497 0.000000 6462 +willinvolv 0 1 6.957497 0.000000 6463 +andmodifi 0 1 6.957497 0.000000 6464 +andrar 0 1 6.957497 0.000000 6465 +luxuri 0 1 6.957497 0.000000 6466 +thediffer 0 1 6.957497 0.000000 6467 +inevit 0 1 6.957497 0.000000 6468 +varioussystem 0 1 6.957497 0.000000 6469 +buffermanag 0 1 6.957497 0.000000 6470 +enginethat 0 1 6.957497 0.000000 6471 +possibleproject 0 1 6.957497 0.000000 6472 +likecomplex 0 1 6.957497 0.000000 6473 +becauseth 0 1 6.957497 0.000000 6474 +betweenminibas 0 1 6.957497 0.000000 6475 +somegener 0 1 6.957497 0.000000 6476 +ideaon 0 1 6.957497 0.000000 6477 +advanceof 0 1 6.957497 0.000000 6478 +submitan 0 1 6.957497 0.000000 6479 +discussth 0 1 6.957497 0.000000 6480 +particularsystem 0 1 6.957497 0.000000 6481 +documentwil 0 1 6.957497 0.000000 6482 +picki 0 1 6.957497 0.000000 6483 +geton 0 1 6.957497 0.000000 6484 +oftest 0 1 6.957497 0.000000 6485 +coursetextbook 0 1 6.957497 0.000000 6486 +bookcontain 0 1 6.957497 0.000000 6487 +databasebook 0 1 6.957497 0.000000 6488 +thecampu 0 1 6.957497 0.000000 6489 +collectedand 0 1 6.957497 0.000000 6490 +postgr 0 1 6.957497 0.000000 6491 +andillustra 0 1 6.957497 0.000000 6492 +corearea 0 1 6.957497 0.000000 6493 +navath 0 1 6.957497 0.000000 6494 +tellsyou 0 1 6.957497 0.000000 6495 +wonderfulrefer 0 1 6.957497 0.000000 6496 +debuggingwith 0 1 6.957497 0.000000 6497 +gradingpolici 0 1 6.957497 0.000000 6498 +percentag 0 1 6.957497 0.000000 6499 +anextra 0 1 6.957497 0.000000 6500 +thefin 0 1 6.957497 0.000000 6501 +willfocu 0 1 6.957497 0.000000 6502 +coveredin 0 1 6.957497 0.000000 6503 +professorpraveen 0 1 6.957497 0.000000 6504 +teachingassist 0 1 6.957497 0.000000 6505 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..59859b79 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 2 227 1.386294 2.772588 26 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +class 1 199 1.609438 1.609438 37 +oper 0 180 1.609438 0.000000 34 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +contact 0 153 1.791759 0.000000 59 +base 0 165 1.791759 0.000000 50 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +relat 0 139 1.945910 0.000000 68 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +instructor 1 108 2.197225 2.197225 107 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +call 0 91 2.397895 0.000000 153 +environ 1 84 2.484907 2.484907 177 +learn 0 86 2.484907 0.000000 170 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +refer 1 78 2.564949 2.564949 203 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +upson 1 71 2.639057 2.639057 218 +logic 1 71 2.639057 2.639057 230 +line 0 75 2.639057 0.000000 231 +knowledg 1 67 2.708050 2.708050 243 +java 0 70 2.708050 0.000000 248 +goal 0 66 2.708050 0.000000 250 +descript 1 64 2.772589 2.772589 271 +experi 1 64 2.772589 2.772589 283 +function 1 62 2.772589 2.772589 275 +abstract 0 62 2.772589 0.000000 276 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +undergradu 1 54 2.944439 2.944439 338 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +tabl 0 51 2.995732 0.000000 346 +basic 1 50 3.044522 3.044522 360 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +understand 0 47 3.091042 0.000000 384 +describ 1 45 3.135494 3.135494 400 +better 0 45 3.135494 0.000000 401 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +mark 0 44 3.135494 0.000000 403 +mechan 1 43 3.178054 3.178054 416 +edit 0 42 3.218876 0.000000 418 +programm 0 39 3.258097 0.000000 445 +theoret 0 39 3.258097 0.000000 446 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +formal 0 37 3.332205 0.000000 478 +survei 0 35 3.401197 0.000000 513 +tech 0 35 3.401197 0.000000 515 +least 0 35 3.401197 0.000000 516 +concept 0 32 3.465736 0.000000 537 +specifi 0 30 3.555348 0.000000 568 +semant 1 29 3.583519 3.583519 587 +turn 0 29 3.583519 0.000000 586 +though 0 27 3.637586 0.000000 622 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +concern 0 25 3.737670 0.000000 666 +demonstr 0 24 3.761200 0.000000 694 +greg 0 24 3.761200 0.000000 695 +compress 0 23 3.806662 0.000000 719 +lead 0 23 3.806662 0.000000 718 +proof 0 23 3.806662 0.000000 720 +properti 1 22 3.850148 3.850148 749 +programminglanguag 0 21 3.912023 0.000000 782 +newsgroup 0 21 3.912023 0.000000 783 +safeti 0 20 3.951244 0.000000 817 +scheme 0 20 3.951244 0.000000 818 +prove 1 19 4.007333 4.007333 848 +assum 1 19 4.007333 4.007333 845 +prerequisit 0 19 4.007333 0.000000 846 +andrew 0 19 4.007333 0.000000 849 +analyz 0 17 4.110874 0.000000 925 +side 1 15 4.248495 4.248495 1022 +precis 0 15 4.248495 0.000000 1023 +carl 0 15 4.248495 0.000000 1024 +conduct 0 14 4.317488 0.000000 1065 +directli 0 13 4.382027 0.000000 1141 +larri 0 13 4.382027 0.000000 1142 +recurs 0 13 4.382027 0.000000 1127 +emac 0 13 4.382027 0.000000 1143 +pascal 0 12 4.465908 0.000000 1213 +calculu 0 12 4.465908 0.000000 1203 +meng 0 12 4.465908 0.000000 1214 +broad 0 11 4.553877 0.000000 1302 +induct 0 11 4.553877 0.000000 1304 +linda 0 10 4.653960 0.000000 1394 +notat 1 9 4.753590 4.753590 1489 +entitl 0 9 4.753590 0.000000 1490 +prefer 0 9 4.753590 0.000000 1491 +suitabl 0 9 4.753590 0.000000 1486 +admin 0 9 4.753590 0.000000 1476 +mode 0 9 4.753590 0.000000 1492 +ideal 0 8 4.875197 0.000000 1630 +leon 0 8 4.875197 0.000000 1631 +cum 0 8 4.875197 0.000000 1619 +dispatch 0 7 5.010635 0.000000 1791 +henc 0 7 5.010635 0.000000 1805 +predic 0 7 5.010635 0.000000 1806 +ture 0 6 5.164786 0.000000 1997 +morrisett 0 5 5.347108 0.000000 2263 +gentl 0 5 5.347108 0.000000 2264 +witha 0 4 5.568345 0.000000 2617 +haskel 0 4 5.568345 0.000000 2618 +principlesof 0 3 5.857933 0.000000 3145 +deeper 0 3 5.857933 0.000000 3146 +denot 0 3 5.857933 0.000000 3147 +noteshomework 0 2 6.263398 0.000000 4102 +profici 0 2 6.263398 0.000000 4103 +andlog 0 2 6.263398 0.000000 4104 +competillo 0 2 6.263398 0.000000 4105 +lfar 0 2 6.263398 0.000000 4106 +erlingsson 0 2 6.263398 0.000000 4107 +indexdocument 0 2 6.263398 0.000000 4108 +toolsa 0 2 6.263398 0.000000 4109 +prerequisiteshandoutsscrib 0 1 6.957497 0.000000 6506 +assignmentscontact 0 1 6.957497 0.000000 6507 +informationrelev 0 1 6.957497 0.000000 6508 +goalof 0 1 6.957497 0.000000 6509 +multipleinherit 0 1 6.957497 0.000000 6510 +subsum 0 1 6.957497 0.000000 6511 +thestudi 0 1 6.957497 0.000000 6512 +abstractli 0 1 6.957497 0.000000 6513 +howprogram 0 1 6.957497 0.000000 6514 +asnot 0 1 6.957497 0.000000 6515 +preciser 0 1 6.957497 0.000000 6516 +forform 0 1 6.957497 0.000000 6517 +somethingabout 0 1 6.957497 0.000000 6518 +tomanipul 0 1 6.957497 0.000000 6519 +gunter 0 1 6.957497 0.000000 6520 +paulson 0 1 6.957497 0.000000 6521 +undergraduatemathemat 0 1 6.957497 0.000000 6522 +mathematicalmatur 0 1 6.957497 0.000000 6523 +anmeng 0 1 6.957497 0.000000 6524 +ifth 0 1 6.957497 0.000000 6525 +ulfar 0 1 6.957497 0.000000 6526 +pmrelev 0 1 6.957497 0.000000 6527 +comint 0 1 6.957497 0.000000 6528 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..0cd46740 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +schedul 0 119 2.079442 0.000000 85 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +info 0 85 2.484907 0.000000 176 +materi 0 75 2.639057 0.000000 221 +staff 0 36 3.367296 0.000000 490 +newsgroup 1 21 3.912023 3.912023 783 +rivl 0 8 4.875197 0.000000 1632 +systemscomput 0 3 5.857933 0.000000 3148 +janosi 0 3 5.857933 0.000000 3149 +pagecsmultimedia 0 2 6.263398 0.000000 4110 +anounc 0 2 6.263398 0.000000 4111 +bugcom 0 2 6.263398 0.000000 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..36d03ed3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +question 0 91 2.397895 0.000000 141 +info 0 85 2.484907 0.000000 176 +materi 0 75 2.639057 0.000000 221 +staff 0 36 3.367296 0.000000 490 +newsgroup 1 21 3.912023 3.912023 783 +rivl 0 8 4.875197 0.000000 1632 +systemscomput 0 3 5.857933 0.000000 3148 +janosi 0 3 5.857933 0.000000 3149 +pagecsmultimedia 0 2 6.263398 0.000000 4110 +anounc 0 2 6.263398 0.000000 4111 +bugcom 0 2 6.263398 0.000000 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..750289ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +last 2 314 1.098612 2.197224 14 +time 1 293 1.098612 1.098612 17 +cours 0 273 1.098612 0.000000 15 +design 1 213 1.386294 1.386294 25 +modifi 2 178 1.609438 3.218876 35 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +algorithm 1 162 1.791759 1.791759 57 +note 1 142 1.945910 1.945910 67 +lectur 0 135 1.945910 0.000000 73 +analysi 1 124 2.079442 2.079442 98 +instructor 0 108 2.197225 0.000000 107 +make 0 111 2.197225 0.000000 120 +text 1 98 2.302585 2.302585 133 +homepag 0 93 2.397895 0.000000 148 +solut 2 82 2.484907 4.969814 162 +exam 0 86 2.484907 0.000000 169 +homework 2 79 2.564949 5.129898 193 +refer 0 78 2.564949 0.000000 203 +upson 0 71 2.639057 0.000000 218 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +copi 0 63 2.772589 0.000000 284 +locat 0 59 2.833213 0.000000 303 +talk 0 53 2.944439 0.000000 336 +announc 1 40 3.258097 3.258097 441 +approxim 0 35 3.401197 0.000000 509 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +sheet 0 16 4.174387 0.000000 973 +cannot 0 13 4.382027 0.000000 1144 +cheat 0 10 4.653960 0.000000 1395 +evan 1 8 4.875197 4.875197 1633 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 0 5 5.347108 0.000000 2265 +kozen 1 4 5.568345 5.568345 2619 +addendum 1 3 5.857933 5.857933 3150 +moran 0 3 5.857933 0.000000 3151 +rajeev 0 3 5.857933 0.000000 3152 +inupson 0 1 6.957497 0.000000 6529 +tome 0 1 6.957497 0.000000 6530 +reschedul 0 1 6.957497 0.000000 6531 +motwani 0 1 6.957497 0.000000 6532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..aeef5a1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +oper 1 180 1.609438 1.609438 34 +parallel 2 169 1.791759 3.583518 60 +algorithm 1 162 1.791759 1.791759 57 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +machin 2 129 2.079442 4.158884 95 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +topic 1 114 2.197225 2.197225 110 +look 0 107 2.197225 0.000000 115 +specif 0 106 2.197225 0.000000 106 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +second 0 81 2.484907 0.000000 166 +issu 1 78 2.564949 2.564949 211 +complet 0 77 2.564949 0.000000 208 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +integr 0 67 2.708050 0.000000 245 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +locat 0 59 2.833213 0.000000 303 +share 0 59 2.833213 0.000000 304 +point 0 58 2.890372 0.000000 319 +processor 1 54 2.944439 2.944439 335 +allow 0 53 2.944439 0.000000 333 +much 0 52 2.995732 0.000000 349 +week 0 52 2.995732 0.000000 343 +hardwar 0 51 2.995732 0.000000 350 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +execut 0 45 3.135494 0.000000 404 +offer 1 43 3.178054 3.178054 414 +term 0 43 3.178054 0.000000 411 +howev 0 41 3.218876 0.000000 422 +futur 0 41 3.218876 0.000000 427 +past 0 42 3.218876 0.000000 428 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +workstat 1 37 3.332205 3.332205 479 +cost 0 37 3.332205 0.000000 480 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +product 0 33 3.433987 0.000000 527 +taken 0 31 3.496508 0.000000 555 +focu 1 30 3.555348 3.555348 571 +power 0 30 3.555348 0.000000 573 +multiprocessor 0 28 3.610918 0.000000 605 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +aspect 0 25 3.737670 0.000000 663 +almost 0 22 3.850148 0.000000 742 +sequenti 0 22 3.850148 0.000000 745 +fact 0 21 3.912023 0.000000 780 +busi 0 21 3.912023 0.000000 784 +latest 0 21 3.912023 0.000000 785 +portabl 0 20 3.951244 0.000000 819 +spend 0 19 4.007333 0.000000 850 +layer 0 17 4.110874 0.000000 926 +across 0 16 4.174387 0.000000 974 +month 0 15 4.248495 0.000000 1025 +massiv 0 15 4.248495 0.000000 1026 +consider 1 14 4.317488 4.317488 1076 +easili 0 14 4.317488 0.000000 1077 +split 0 14 4.317488 0.000000 1078 +thorsten 0 13 4.382027 0.000000 1133 +cannot 0 13 4.382027 0.000000 1144 +stai 0 12 4.465908 0.000000 1215 +workload 0 12 4.465908 0.000000 1210 +grant 0 12 4.465908 0.000000 1216 +host 0 11 4.553877 0.000000 1306 +debugg 0 9 4.753590 0.000000 1493 +manufactur 1 8 4.875197 4.875197 1634 +competit 0 8 4.875197 0.000000 1635 +depth 0 8 4.875197 0.000000 1636 +contrast 0 8 4.875197 0.000000 1637 +smile 0 7 5.010635 0.000000 1807 +microprocessor 0 7 5.010635 0.000000 1808 +core 0 7 5.010635 0.000000 1809 +usabl 0 7 5.010635 0.000000 1810 +price 1 6 5.164786 5.164786 1999 +quickli 0 6 5.164786 0.000000 2000 +vari 0 6 5.164786 0.000000 2001 +feder 0 5 5.347108 0.000000 2266 +eas 0 5 5.347108 0.000000 2267 +suffer 0 5 5.347108 0.000000 2268 +matur 0 5 5.347108 0.000000 2269 +vertic 0 5 5.347108 0.000000 2270 +focuss 0 5 5.347108 0.000000 2271 +cut 0 4 5.568345 0.000000 2620 +shelf 0 4 5.568345 0.000000 2621 +slice 0 4 5.568345 0.000000 2622 +eickenfal 0 3 5.857933 0.000000 3125 +leverag 0 3 5.857933 0.000000 3153 +heat 0 2 6.263398 0.000000 4113 +glorifi 0 2 6.263398 0.000000 4114 +farm 0 2 6.263398 0.000000 4115 +adequ 0 2 6.263398 0.000000 4116 +horizont 0 2 6.263398 0.000000 4117 +debat 1 1 6.957497 6.957497 6533 +pagefronti 0 1 6.957497 0.000000 6534 +pmoffic 0 1 6.957497 0.000000 6535 +pmcours 0 1 6.957497 0.000000 6536 +descriptionparallel 0 1 6.957497 0.000000 6537 +underscor 0 1 6.957497 0.000000 6538 +erad 0 1 6.957497 0.000000 6539 +competitor 0 1 6.957497 0.000000 6540 +dash 0 1 6.957497 0.000000 6541 +materialscours 0 1 6.957497 0.000000 6542 +formatlectur 0 1 6.957497 0.000000 6543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..0ec368f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +assign 0 135 1.945910 0.000000 66 +like 0 132 1.945910 0.000000 81 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +machin 0 129 2.079442 0.000000 95 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +question 1 91 2.397895 2.397895 141 +search 0 95 2.397895 0.000000 155 +start 1 83 2.484907 2.484907 173 +control 0 82 2.484907 0.000000 164 +help 0 83 2.484907 0.000000 175 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +exampl 1 77 2.564949 2.564949 195 +homework 0 79 2.564949 0.000000 193 +know 0 80 2.564949 0.000000 198 +refer 0 78 2.564949 0.000000 203 +come 0 78 2.564949 0.000000 202 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +david 0 71 2.639057 0.000000 232 +organ 1 65 2.772589 2.772589 265 +guid 0 63 2.772589 0.000000 267 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +sever 0 56 2.890372 0.000000 322 +allow 1 53 2.944439 2.944439 333 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +digit 0 52 2.995732 0.000000 348 +case 0 51 2.995732 0.000000 351 +maintain 0 51 2.995732 0.000000 342 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +video 0 44 3.135494 0.000000 405 +tutori 1 39 3.258097 3.258097 437 +small 0 39 3.258097 0.000000 447 +staff 0 36 3.367296 0.000000 490 +procedur 0 36 3.367296 0.000000 488 +common 0 30 3.555348 0.000000 574 +ask 0 28 3.610918 0.000000 597 +great 0 27 3.637586 0.000000 626 +never 0 25 3.737670 0.000000 671 +todai 0 25 3.737670 0.000000 672 +consult 0 24 3.761200 0.000000 687 +wish 0 24 3.761200 0.000000 692 +hierarchi 0 22 3.850148 0.000000 744 +annot 1 21 3.912023 3.912023 775 +path 0 21 3.912023 0.000000 778 +unit 0 21 3.912023 0.000000 779 +newsgroup 0 21 3.912023 0.000000 783 +lot 0 18 4.060443 0.000000 889 +otherwis 0 17 4.110874 0.000000 922 +fortran 0 15 4.248495 0.000000 1027 +thorsten 1 13 4.382027 4.382027 1133 +assembl 0 12 4.465908 0.000000 1207 +pascal 0 12 4.465908 0.000000 1213 +surf 0 11 4.553877 0.000000 1301 +combinatori 0 8 4.875197 0.000000 1629 +brain 0 8 4.875197 0.000000 1638 +interrupt 0 7 5.010635 0.000000 1793 +conot 1 5 5.347108 5.347108 2245 +eickenfal 0 3 5.857933 0.000000 3125 +kimbal 0 3 5.857933 0.000000 3114 +helpif 0 3 5.857933 0.000000 3126 +aproject 0 3 5.857933 0.000000 3142 +mate 0 3 5.857933 0.000000 3127 +cardiff 0 3 5.857933 0.000000 3154 +programsand 0 3 5.857933 0.000000 3111 +marshal 1 2 6.263398 6.263398 4118 +btopic 0 2 6.263398 0.000000 4085 +representationof 0 2 6.263398 0.000000 4119 +toon 0 2 6.263398 0.000000 4120 +pageintroduct 0 1 6.957497 0.000000 6544 +sequentialcircuit 0 1 6.957497 0.000000 6545 +andmicroprogram 0 1 6.957497 0.000000 6546 +theappropri 0 1 6.957497 0.000000 6547 +gethelp 0 1 6.957497 0.000000 6548 +informationcoursemateri 0 1 6.957497 0.000000 6549 +announcementsannounc 0 1 6.957497 0.000000 6550 +onlinean 0 1 6.957497 0.000000 6551 +forpeopl 0 1 6.957497 0.000000 6552 +cclass 0 1 6.957497 0.000000 6553 +learnc 0 1 6.957497 0.000000 6554 +theyahoo 0 1 6.957497 0.000000 6555 +ofmor 0 1 6.957497 0.000000 6556 +inansw 0 1 6.957497 0.000000 6557 +voneicken 0 1 6.957497 0.000000 6558 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..0317a1ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +welcom 0 122 2.079442 0.000000 99 +final 1 116 2.197225 2.197225 108 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +grade 0 90 2.397895 0.000000 142 +academ 0 82 2.484907 0.000000 178 +exam 0 86 2.484907 0.000000 169 +upson 1 71 2.639057 2.639057 218 +intellig 0 72 2.639057 0.000000 225 +materi 0 75 2.639057 0.000000 221 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +date 0 51 2.995732 0.000000 344 +announc 0 40 3.258097 0.000000 441 +request 0 26 3.688879 0.000000 635 +altern 0 26 3.688879 0.000000 641 +sometim 0 24 3.761200 0.000000 696 +saturdai 0 7 5.010635 0.000000 1794 +clair 1 4 5.568345 5.568345 2605 +pagesc 0 3 5.857933 0.000000 3133 +pagecsfound 0 2 6.263398 0.000000 4086 +yourgrad 0 2 6.263398 0.000000 4121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..cc18d3d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +technolog 0 131 2.079442 0.000000 102 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +resourc 0 81 2.484907 0.000000 172 +stuff 0 87 2.484907 0.000000 171 +upson 1 71 2.639057 2.639057 218 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +overview 1 56 2.890372 2.890372 323 +frequent 0 49 3.044522 0.000000 367 +staff 0 36 3.367296 0.000000 490 +ask 0 28 3.610918 0.000000 597 +edulast 0 17 4.110874 0.000000 927 +pagec 0 15 4.248495 0.000000 1011 +borland 0 14 4.317488 0.000000 1067 +recit 0 9 4.753590 0.000000 1475 +vineet 0 8 4.875197 0.000000 1639 +none 0 7 5.010635 0.000000 1811 +buch 1 5 5.347108 5.347108 2272 +samuel 1 3 5.857933 5.857933 3155 +weber 1 3 5.857933 5.857933 3156 +yaron 0 2 6.263398 0.000000 4122 +minski 0 2 6.263398 0.000000 4123 +remark 0 2 6.263398 0.000000 4124 +techniquescomput 0 1 6.957497 0.000000 6559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..0de4a544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +introduct 1 126 2.079442 2.079442 87 +postscript 0 131 2.079442 0.000000 90 +homework 0 79 2.564949 0.000000 193 +logic 0 71 2.639057 0.000000 230 +guid 0 63 2.772589 0.000000 267 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +tabl 0 51 2.995732 0.000000 346 +brian 0 38 3.295837 0.000000 466 +slide 0 38 3.295837 0.000000 467 +procedur 0 36 3.367296 0.000000 488 +smith 0 20 3.951244 0.000000 820 +recurs 0 13 4.382027 0.000000 1127 +assembl 0 12 4.465908 0.000000 1207 +tour 0 11 4.553877 0.000000 1307 +stack 0 10 4.653960 0.000000 1389 +spec 0 8 4.875197 0.000000 1640 +interrupt 0 7 5.010635 0.000000 1793 +linker 0 3 5.857933 0.000000 3157 +loader 0 1 6.957497 0.000000 6560 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..fe95aeef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +lectur 0 135 1.945910 0.000000 73 +spring 1 131 2.079442 2.079442 88 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +exam 1 86 2.484907 2.484907 169 +wide 0 84 2.484907 0.000000 185 +april 1 77 2.564949 2.564949 196 +messag 0 76 2.564949 0.000000 212 +tuesdai 0 73 2.639057 0.000000 219 +materi 0 75 2.639057 0.000000 221 +import 0 65 2.772589 0.000000 282 +march 0 61 2.833213 0.000000 295 +februari 0 54 2.944439 0.000000 328 +get 0 46 3.091042 0.000000 380 +review 0 42 3.218876 0.000000 425 +held 0 28 3.610918 0.000000 600 +session 0 26 3.688879 0.000000 643 +thur 0 19 4.007333 0.000000 847 +prelim 1 12 4.465908 4.465908 1201 +rememb 0 12 4.465908 0.000000 1217 +tue 1 11 4.553877 4.553877 1308 +regard 0 11 4.553877 0.000000 1309 +baker 0 7 5.010635 0.000000 1812 +pierc 0 4 5.568345 0.000000 2623 +theworld 0 3 5.857933 0.000000 3158 +codewarrior 0 2 6.263398 0.000000 4125 +frequentlyfor 0 1 6.957497 0.000000 6561 +onsundai 0 1 6.957497 0.000000 6562 +personalmac 0 1 6.957497 0.000000 6563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..268c9793 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,261 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +link 2 247 1.386294 2.772588 24 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +class 3 199 1.609438 4.828314 37 +list 2 201 1.609438 3.218876 39 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +hour 2 165 1.791759 3.583518 46 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +lectur 4 135 1.945910 7.783640 73 +assign 2 135 1.945910 3.891820 66 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +architectur 1 139 1.945910 1.945910 77 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +site 1 106 2.197225 2.197225 119 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +text 1 98 2.302585 2.302585 133 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +question 0 91 2.397895 0.000000 141 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +exam 1 86 2.484907 2.484907 169 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +server 1 76 2.564949 2.564949 204 +dynam 1 76 2.564949 2.564949 194 +mondai 1 77 2.564949 2.564949 206 +april 0 77 2.564949 0.000000 196 +orient 0 80 2.564949 0.000000 205 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +sourc 0 77 2.564949 0.000000 201 +upson 2 71 2.639057 5.278114 218 +david 1 71 2.639057 2.639057 232 +tuesdai 1 73 2.639057 2.639057 219 +name 0 72 2.639057 0.000000 220 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +window 0 68 2.708050 0.000000 242 +handout 2 64 2.772589 5.545178 263 +function 1 62 2.772589 2.772589 275 +descript 1 64 2.772589 2.772589 271 +abstract 0 62 2.772589 0.000000 276 +complex 0 64 2.772589 0.000000 269 +type 1 61 2.833213 2.833213 296 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 1 58 2.890372 2.890372 319 +summer 0 56 2.890372 0.000000 311 +unix 0 58 2.890372 0.000000 308 +think 0 57 2.890372 0.000000 314 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +date 1 51 2.995732 2.995732 344 +tabl 0 51 2.995732 0.000000 346 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +appoint 0 49 3.044522 0.000000 358 +set 0 50 3.044522 0.000000 361 +effect 0 46 3.091042 0.000000 385 +netscap 0 44 3.135494 0.000000 395 +fridai 0 44 3.135494 0.000000 390 +http 0 41 3.218876 0.000000 420 +correct 2 38 3.295837 6.591674 462 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +microsoft 1 38 3.295837 3.295837 468 +tree 1 36 3.367296 3.367296 492 +staff 1 36 3.367296 3.367296 490 +soon 0 36 3.367296 0.000000 494 +word 1 34 3.401197 3.401197 508 +next 0 34 3.401197 0.000000 517 +print 0 34 3.401197 0.000000 503 +manual 0 35 3.401197 0.000000 504 +given 0 32 3.465736 0.000000 538 +storag 1 31 3.496508 3.496508 553 +framework 1 28 3.610918 3.610918 606 +held 0 28 3.610918 0.000000 600 +arrai 1 27 3.637586 3.637586 627 +session 0 26 3.688879 0.000000 643 +enhanc 0 26 3.688879 0.000000 644 +jeff 1 25 3.737670 3.737670 673 +consult 1 24 3.761200 3.761200 687 +other 0 24 3.761200 0.000000 697 +lab 0 24 3.761200 0.000000 698 +thank 1 23 3.806662 3.806662 721 +proof 0 23 3.806662 0.000000 720 +almost 0 22 3.850148 0.000000 742 +inth 0 22 3.850148 0.000000 741 +alloc 1 20 3.951244 3.951244 821 +fine 1 20 3.951244 3.951244 822 +binari 1 20 3.951244 3.951244 823 +prove 1 19 4.007333 4.007333 848 +prerequisit 0 19 4.007333 0.000000 846 +expand 1 17 4.110874 4.110874 928 +regular 0 17 4.110874 0.000000 929 +macintosh 0 17 4.110874 0.000000 920 +intro 0 17 4.110874 0.000000 915 +condit 1 16 4.174387 4.174387 975 +jose 1 16 4.174387 4.174387 976 +charact 0 15 4.248495 0.000000 1028 +piec 0 15 4.248495 0.000000 1020 +recurs 1 13 4.382027 4.382027 1127 +deriv 1 13 4.382027 4.382027 1145 +alan 0 13 4.382027 0.000000 1146 +prelim 1 12 4.465908 4.465908 1201 +iter 1 12 4.465908 4.465908 1206 +grant 0 12 4.465908 0.000000 1216 +onth 0 12 4.465908 0.000000 1218 +loop 1 11 4.553877 4.553877 1310 +chri 0 11 4.553877 0.000000 1311 +rich 1 10 4.653960 4.653960 1396 +princip 0 10 4.653960 0.000000 1397 +sundai 0 10 4.653960 0.000000 1387 +preliminari 1 9 4.753590 4.753590 1480 +notat 1 9 4.753590 4.753590 1489 +strength 1 9 4.753590 4.753590 1494 +plain 1 9 4.753590 4.753590 1495 +equival 0 9 4.753590 0.000000 1496 +intermedi 0 9 4.753590 0.000000 1497 +filter 1 8 4.875197 4.875197 1641 +printer 0 8 4.875197 0.000000 1621 +dictionari 0 8 4.875197 0.000000 1642 +none 1 7 5.010635 5.010635 1811 +noon 0 7 5.010635 0.000000 1804 +troubl 0 6 5.164786 0.000000 2002 +yale 0 6 5.164786 0.000000 2003 +contest 0 5 5.347108 0.000000 2273 +cell 0 5 5.347108 0.000000 2274 +ofprogram 0 4 5.568345 0.000000 2624 +flavor 0 4 5.568345 0.000000 2625 +haskel 0 4 5.568345 0.000000 2618 +cuinfo 0 4 5.568345 0.000000 2626 +foster 1 3 5.857933 5.857933 3159 +tripl 1 3 5.857933 5.857933 3160 +walker 1 3 5.857933 5.857933 3161 +tocomput 0 3 5.857933 0.000000 3162 +ghostview 0 3 5.857933 0.000000 3163 +maker 0 3 5.857933 0.000000 3164 +kwan 1 2 6.263398 6.263398 4126 +stuffit 1 2 6.263398 6.263398 4127 +codewarrior 1 2 6.263398 6.263398 4125 +thesecond 0 2 6.263398 0.000000 4128 +datatyp 0 2 6.263398 0.000000 4129 +csdepart 0 2 6.263398 0.000000 4130 +metrowerk 0 2 6.263398 0.000000 4131 +gofer 2 1 6.957497 13.914994 6564 +macgof 1 1 6.957497 6.957497 6565 +jfoster 1 1 6.957497 6.957497 6566 +hollist 1 1 6.957497 6.957497 6567 +binhqx 1 1 6.957497 6.957497 6568 +dynamicdata 1 1 6.957497 6.957497 6569 +curri 1 1 6.957497 6.957497 6570 +olin 1 1 6.957497 6.957497 6571 +ahal 1 1 6.957497 6.957497 6572 +walkerwednesdai 1 1 6.957497 6.957497 6573 +kaykylesteveericvasantha 1 1 6.957497 6.957497 6574 +danerickaychrisdan 1 1 6.957497 6.957497 6575 +earlyvers 0 1 6.957497 0.000000 6576 +announcetim 0 1 6.957497 0.000000 6577 +theprelim 0 1 6.957497 0.000000 6578 +wereannounc 0 1 6.957497 0.000000 6579 +lastnam 0 1 6.957497 0.000000 6580 +covereveryth 0 1 6.957497 0.000000 6581 +topicsconv 0 1 6.957497 0.000000 6582 +daywhenwherewhomondai 0 1 6.957497 0.000000 6583 +davetuesdai 0 1 6.957497 0.000000 6584 +jeffwednesdai 0 1 6.957497 0.000000 6585 +davethursdai 0 1 6.957497 0.000000 6586 +halfridai 0 1 6.957497 0.000000 6587 +halsaturdai 0 1 6.957497 0.000000 6588 +breview 0 1 6.957497 0.000000 6589 +chrisand 0 1 6.957497 0.000000 6590 +engrd 0 1 6.957497 0.000000 6591 +bothcom 0 1 6.957497 0.000000 6592 +programmingexperi 0 1 6.957497 0.000000 6593 +ofalgorithm 0 1 6.957497 0.000000 6594 +perkin 0 1 6.957497 0.000000 6595 +sectionsdaytimeroominstructortuesdai 0 1 6.957497 0.000000 6596 +perkinstuesdai 0 1 6.957497 0.000000 6597 +perkinswednesdai 0 1 6.957497 0.000000 6598 +walkerthursdai 0 1 6.957497 0.000000 6599 +fosterfridai 0 1 6.957497 0.000000 6600 +ofclass 0 1 6.957497 0.000000 6601 +consultingsundaymondaytuesdaywednesdaythursdayfridai 0 1 6.957497 0.000000 6602 +steveerickylechrisjpkyl 0 1 6.957497 0.000000 6603 +steveerickylechrisjpvasantha 0 1 6.957497 0.000000 6604 +josejosekayjosejpvasantha 0 1 6.957497 0.000000 6605 +josejosekayjosejp 0 1 6.957497 0.000000 6606 +macbinari 0 1 6.957497 0.000000 6607 +parseabl 0 1 6.957497 0.000000 6608 +waspost 0 1 6.957497 0.000000 6609 +foraladdin 0 1 6.957497 0.000000 6610 +armandonunez 0 1 6.957497 0.000000 6611 +anylas 0 1 6.957497 0.000000 6612 +applicationlik 0 1 6.957497 0.000000 6613 +ishaskel 0 1 6.957497 0.000000 6614 +systemsz 0 1 6.957497 0.000000 6615 +ofgof 0 1 6.957497 0.000000 6616 +itavail 0 1 6.957497 0.000000 6617 +enhance_assign 0 1 6.957497 0.000000 6618 +aladdin 0 1 6.957497 0.000000 6619 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..71752c4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +section 0 94 2.397895 0.000000 149 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +materi 0 75 2.639057 0.000000 221 +room 0 59 2.833213 0.000000 301 +extens 0 53 2.944439 0.000000 340 +date 0 51 2.995732 0.000000 344 +announc 1 40 3.258097 3.258097 441 +staff 0 36 3.367296 0.000000 490 +demo 0 18 4.060443 0.000000 888 +pagec 0 15 4.248495 0.000000 1011 +happi 0 14 4.317488 0.000000 1079 +emac 0 13 4.382027 0.000000 1143 +prelim 0 12 4.465908 0.000000 1201 +departmentcornel 0 5 5.347108 0.000000 2275 +grader 0 3 5.857933 0.000000 3165 +universityspr 0 2 6.263398 0.000000 4055 +interpretationof 0 1 6.957497 0.000000 6620 +programscomput 0 1 6.957497 0.000000 6621 +macmarlai 0 1 6.957497 0.000000 6622 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..728f5b8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +implement 1 152 1.791759 1.791759 52 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +object 1 138 1.945910 1.945910 79 +construct 0 139 1.945910 0.000000 82 +document 1 121 2.079442 2.079442 89 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +environ 1 84 2.484907 2.484907 177 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +refer 1 78 2.564949 2.564949 203 +orient 0 80 2.564949 0.000000 205 +upson 1 71 2.639057 2.639057 218 +effici 0 73 2.639057 0.000000 233 +tuesdai 0 73 2.639057 0.000000 219 +line 0 75 2.639057 0.000000 231 +java 0 70 2.708050 0.000000 248 +thursdai 0 70 2.708050 0.000000 241 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +abstract 0 62 2.772589 0.000000 276 +collect 0 65 2.772589 0.000000 268 +function 0 62 2.772589 0.000000 275 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +featur 1 46 3.091042 3.091042 386 +mark 0 44 3.135494 0.000000 403 +continu 0 39 3.258097 0.000000 448 +survei 0 35 3.401197 0.000000 513 +kind 0 32 3.465736 0.000000 541 +focu 0 30 3.555348 0.000000 571 +synchron 0 29 3.583519 0.000000 588 +except 0 28 3.610918 0.000000 607 +greg 0 24 3.761200 0.000000 695 +thread 0 23 3.806662 0.000000 722 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +modern 1 16 4.174387 4.174387 966 +linda 0 10 4.653960 0.000000 1394 +admin 0 9 4.753590 0.000000 1476 +evan 1 8 4.875197 4.875197 1633 +dylan 0 8 4.875197 0.000000 1625 +closur 0 8 4.875197 0.000000 1643 +leon 0 8 4.875197 0.000000 1631 +cum 0 8 4.875197 0.000000 1619 +implementationof 0 7 5.010635 0.000000 1813 +garbag 0 6 5.164786 0.000000 1986 +notabl 0 5 5.347108 0.000000 2276 +morrisett 0 5 5.347108 0.000000 2263 +gentl 0 5 5.347108 0.000000 2264 +haskel 0 4 5.568345 0.000000 2618 +polymorph 0 4 5.568345 0.000000 2627 +administrivia 0 3 5.857933 0.000000 3166 +moran 0 3 5.857933 0.000000 3151 +competillo 0 2 6.263398 0.000000 4105 +indexdocument 0 2 6.263398 0.000000 4108 +toolsa 0 2 6.263398 0.000000 4109 +descriptionhandoutsadministriviaweb 0 1 6.957497 0.000000 6623 +ofmodern 0 1 6.957497 0.000000 6624 +connectionsto 0 1 6.957497 0.000000 6625 +pmweb 0 1 6.957497 0.000000 6626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..cef80af6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +postscript 1 131 2.079442 2.079442 90 +final 0 116 2.197225 0.000000 108 +homework 1 79 2.564949 2.564949 193 +practic 0 70 2.708050 0.000000 246 +overview 0 56 2.890372 0.000000 323 +examin 1 42 3.218876 3.218876 424 +bibliographi 0 34 3.401197 0.000000 518 +annot 1 21 3.912023 3.912023 775 +prepar 0 20 3.951244 0.000000 824 +necessari 0 13 4.382027 0.000000 1147 +registr 0 5 5.347108 0.000000 2249 +informationcours 0 3 5.857933 0.000000 3167 +systemspract 0 1 6.957497 0.000000 6627 +takingc 0 1 6.957497 0.000000 6628 +logist 0 1 6.957497 0.000000 6629 +homeworkshomework 0 1 6.957497 0.000000 6630 +amexaminationsmidterm 0 1 6.957497 0.000000 6631 +bibliographiesselect 0 1 6.957497 0.000000 6632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..3aa1d3b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +problem 1 147 1.945910 1.945910 75 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +report 1 131 2.079442 2.079442 92 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +need 0 98 2.302585 0.000000 135 +present 1 91 2.397895 2.397895 145 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +start 1 83 2.484907 2.484907 173 +solut 0 82 2.484907 0.000000 162 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +homework 1 79 2.564949 2.564949 193 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +know 0 80 2.564949 0.000000 198 +upson 1 71 2.639057 2.639057 218 +solv 0 73 2.639057 0.000000 234 +order 0 69 2.708050 0.000000 249 +import 1 65 2.772589 2.772589 282 +result 0 65 2.772589 0.000000 281 +think 0 57 2.890372 0.000000 314 +instruct 1 53 2.944439 2.944439 332 +sampl 0 53 2.944439 0.000000 339 +give 0 50 3.044522 0.000000 359 +discuss 0 45 3.135494 0.000000 399 +futur 1 41 3.218876 3.218876 427 +might 0 41 3.218876 0.000000 426 +late 0 40 3.258097 0.000000 439 +must 0 40 3.258097 0.000000 442 +open 0 38 3.295837 0.000000 469 +everi 0 34 3.401197 0.000000 519 +board 1 33 3.433987 3.433987 528 +taken 0 31 3.496508 0.000000 555 +option 0 30 3.555348 0.000000 575 +limit 0 29 3.583519 0.000000 585 +particip 0 29 3.583519 0.000000 589 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +measur 0 28 3.610918 0.000000 609 +session 1 26 3.688879 3.688879 643 +experiment 0 26 3.688879 0.000000 645 +begin 0 23 3.806662 0.000000 716 +minut 1 20 3.951244 3.951244 810 +left 0 19 4.007333 0.000000 851 +sign 0 16 4.174387 0.000000 970 +across 0 16 4.174387 0.000000 974 +contribut 0 15 4.248495 0.000000 1021 +split 1 14 4.317488 4.317488 1078 +consider 0 14 4.317488 0.000000 1076 +shown 0 14 4.317488 0.000000 1080 +thorsten 1 13 4.382027 4.382027 1133 +everyon 0 13 4.382027 0.000000 1148 +difficulti 0 13 4.382027 0.000000 1132 +eicken 0 13 4.382027 0.000000 1134 +outsid 0 12 4.465908 0.000000 1219 +pick 0 9 4.753590 0.000000 1498 +hang 0 9 4.753590 0.000000 1499 +andth 0 9 4.753590 0.000000 1481 +judg 1 8 4.875197 4.875197 1644 +hold 0 8 4.875197 0.000000 1645 +absolut 0 8 4.875197 0.000000 1646 +poster 1 7 5.010635 5.010635 1814 +noon 0 7 5.010635 0.000000 1804 +trade 0 7 5.010635 0.000000 1815 +explain 0 7 5.010635 0.000000 1816 +exactli 0 7 5.010635 0.000000 1817 +plu 0 6 5.164786 0.000000 2004 +willb 0 5 5.347108 0.000000 2277 +remain 0 5 5.347108 0.000000 2278 +gotten 0 4 5.568345 0.000000 2628 +chose 0 4 5.568345 0.000000 2629 +cuc 0 4 5.568345 0.000000 2630 +attack 0 3 5.857933 0.000000 3168 +memberof 0 3 5.857933 0.000000 3169 +off 0 3 5.857933 0.000000 3170 +arriv 1 2 6.263398 6.263398 4132 +subdirectori 0 2 6.263398 0.000000 4133 +thorough 0 2 6.263398 0.000000 4134 +programmingin 0 2 6.263398 0.000000 4135 +pagehigh 0 1 6.957497 0.000000 6633 +eickenspr 0 1 6.957497 0.000000 6634 +sessionthu 0 1 6.957497 0.000000 6635 +tbdpleas 0 1 6.957497 0.000000 6636 +willdetermin 0 1 6.957497 0.000000 6637 +postersess 0 1 6.957497 0.000000 6638 +cindywilliam 0 1 6.957497 0.000000 6639 +ithorizont 0 1 6.957497 0.000000 6640 +corridor 0 1 6.957497 0.000000 6641 +presentyour 0 1 6.957497 0.000000 6642 +asens 0 1 6.957497 0.000000 6643 +contempl 0 1 6.957497 0.000000 6644 +presentationswil 0 1 6.957497 0.000000 6645 +nativespeak 0 1 6.957497 0.000000 6646 +thelongest 0 1 6.957497 0.000000 6647 +tocom 0 1 6.957497 0.000000 6648 +finalreport 0 1 6.957497 0.000000 6649 +aretri 0 1 6.957497 0.000000 6650 +thesolut 0 1 6.957497 0.000000 6651 +youreject 0 1 6.957497 0.000000 6652 +webread 0 1 6.957497 0.000000 6653 +convic 0 1 6.957497 0.000000 6654 +bestsolut 0 1 6.957497 0.000000 6655 +showcas 0 1 6.957497 0.000000 6656 +ampl 0 1 6.957497 0.000000 6657 +goodexplan 0 1 6.957497 0.000000 6658 +whatyou 0 1 6.957497 0.000000 6659 +projectsproject 0 1 6.957497 0.000000 6660 +reportsproject 0 1 6.957497 0.000000 6661 +proposalsiniti 0 1 6.957497 0.000000 6662 +ideascours 0 1 6.957497 0.000000 6663 +materialshomework 0 1 6.957497 0.000000 6664 +pagebefor 0 1 6.957497 0.000000 6665 +introc 0 1 6.957497 0.000000 6666 +casec 0 1 6.957497 0.000000 6667 +technologyc 0 1 6.957497 0.000000 6668 +cachesc 0 1 6.957497 0.000000 6669 +netsc 0 1 6.957497 0.000000 6670 +spc 0 1 6.957497 0.000000 6671 +cyou 0 1 6.957497 0.000000 6672 +emdc 0 1 6.957497 0.000000 6673 +sortingc 0 1 6.957497 0.000000 6674 +spamc 0 1 6.957497 0.000000 6675 +msgpassc 0 1 6.957497 0.000000 6676 +mpic 0 1 6.957497 0.000000 6677 +cachecohc 0 1 6.957497 0.000000 6678 +locksc 0 1 6.957497 0.000000 6679 +threadsc 0 1 6.957497 0.000000 6680 +atmc 0 1 6.957497 0.000000 6681 +netc 0 1 6.957497 0.000000 6682 +scoreboardc 0 1 6.957497 0.000000 6683 +tomasuloc 0 1 6.957497 0.000000 6684 +predc 0 1 6.957497 0.000000 6685 +superscalarc 0 1 6.957497 0.000000 6686 +busesc 0 1 6.957497 0.000000 6687 +pentiummaintain 0 1 6.957497 0.000000 6688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..4ecb9d45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +class 0 199 1.609438 0.000000 37 +base 0 165 1.791759 0.000000 50 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +april 2 77 2.564949 5.129898 196 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +simul 0 66 2.708050 0.000000 255 +januari 1 62 2.772589 2.772589 264 +march 1 61 2.833213 2.833213 295 +februari 2 54 2.944439 5.888878 328 +week 0 52 2.995732 0.000000 343 +set 0 50 3.044522 0.000000 361 +vision 1 41 3.218876 3.218876 430 +continu 0 39 3.258097 0.000000 448 +field 0 37 3.332205 0.000000 482 +staff 0 36 3.367296 0.000000 490 +random 0 34 3.401197 0.000000 511 +transform 1 32 3.465736 3.465736 542 +detect 0 26 3.688879 0.000000 646 +constraint 0 26 3.688879 0.000000 636 +motion 1 24 3.761200 3.761200 699 +flow 0 24 3.761200 0.000000 700 +recognit 0 23 3.806662 0.000000 723 +geometri 0 22 3.850148 0.000000 752 +geometr 0 19 4.007333 0.000000 852 +histori 0 19 4.007333 0.000000 853 +regular 1 17 4.110874 4.110874 929 +estim 0 17 4.110874 0.000000 930 +segment 0 17 4.110874 0.000000 931 +track 1 15 4.248495 4.248495 1029 +guest 1 12 4.465908 4.465908 1220 +calculu 0 12 4.465908 0.000000 1203 +optic 0 12 4.465908 0.000000 1221 +distanc 0 9 4.753590 0.000000 1500 +face 0 9 4.753590 0.000000 1501 +edg 0 8 4.875197 0.000000 1647 +stereo 1 7 5.010635 5.010635 1818 +parametr 1 7 5.010635 5.010635 1819 +ramin 0 7 5.010635 0.000000 1820 +justin 0 7 5.010635 0.000000 1789 +correl 1 5 5.347108 5.347108 2279 +variat 0 5 5.347108 0.000000 2248 +markov 0 5 5.347108 0.000000 2280 +snake 0 5 5.347108 0.000000 2281 +scribe 0 4 5.568345 0.000000 2631 +maximum 0 4 5.568345 0.000000 2632 +hausdorff 0 4 5.568345 0.000000 2633 +cont 1 3 5.857933 5.857933 3171 +likelihood 0 3 5.857933 0.000000 3172 +anneal 0 2 6.263398 0.000000 4136 +zabihteach 0 1 6.957497 0.000000 6689 +millerclass 0 1 6.957497 0.000000 6690 +phillip 0 1 6.957497 0.000000 6691 +suggestionsproblem 0 1 6.957497 0.000000 6692 +mestim 0 1 6.957497 0.000000 6693 +censu 0 1 6.957497 0.000000 6694 +eigenhausdorff 0 1 6.957497 0.000000 6695 +recognitionsect 0 1 6.957497 0.000000 6696 +equationoth 0 1 6.957497 0.000000 6697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..28babc93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +code 1 108 2.197225 2.197225 116 +pleas 0 113 2.197225 0.000000 114 +final 0 116 2.197225 0.000000 108 +site 0 106 2.197225 0.000000 119 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +contain 1 81 2.484907 2.484907 174 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +environ 0 84 2.484907 0.000000 177 +materi 0 75 2.639057 0.000000 221 +integr 0 67 2.708050 0.000000 245 +descript 0 64 2.772589 0.000000 271 +content 0 59 2.833213 0.000000 302 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +directori 1 45 3.135494 3.135494 396 +natur 0 44 3.135494 0.000000 406 +execut 0 45 3.135494 0.000000 404 +small 0 39 3.258097 0.000000 447 +ofth 0 36 3.367296 0.000000 491 +compon 0 30 3.555348 0.000000 570 +turn 0 29 3.583519 0.000000 586 +variabl 0 23 3.806662 0.000000 715 +annot 1 21 3.912023 3.912023 775 +sure 0 20 3.951244 0.000000 813 +repositori 0 17 4.110874 0.000000 932 +brown 0 16 4.174387 0.000000 977 +speech 1 12 4.465908 4.465908 1222 +tag 0 7 5.010635 0.000000 1821 +corpu 1 5 5.347108 5.347108 2282 +penn 0 3 5.857933 0.000000 3094 +pagesc 0 3 5.857933 0.000000 3133 +brill 1 2 6.263398 6.263398 4137 +treebank 0 2 6.263398 0.000000 4138 +schedulewhat 0 2 6.263398 0.000000 4139 +wordnet 1 1 6.957497 6.957497 6698 +pagecsintroduct 0 1 6.957497 0.000000 6699 +understandingcomput 0 1 6.957497 0.000000 6700 +announcementsher 0 1 6.957497 0.000000 6701 +taggerbrown 0 1 6.957497 0.000000 6702 +withpart 0 1 6.957497 0.000000 6703 +wnsearchdir 0 1 6.957497 0.000000 6704 +dict 0 1 6.957497 0.000000 6705 +iicollect 0 1 6.957497 0.000000 6706 +canus 0 1 6.957497 0.000000 6707 +francisabout 0 1 6.957497 0.000000 6708 +computationallinguist 0 1 6.957497 0.000000 6709 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..ed20e3e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +follow 0 92 2.397895 0.000000 143 +info 0 85 2.484907 0.000000 176 +exam 0 86 2.484907 0.000000 169 +upson 1 71 2.639057 2.639057 218 +tuesdai 1 73 2.639057 2.639057 219 +meet 0 72 2.639057 0.000000 229 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 1 64 2.772589 2.772589 261 +room 0 59 2.833213 0.000000 301 +cover 0 55 2.944439 0.000000 329 +still 0 50 3.044522 0.000000 362 +close 0 38 3.295837 0.000000 465 +usual 0 28 3.610918 0.000000 608 +lab 1 24 3.761200 3.761200 698 +prelim 1 12 4.465908 4.465908 1201 +therefor 0 7 5.010635 0.000000 1822 +philip 0 6 5.164786 0.000000 2005 +circumst 0 5 5.347108 0.000000 2283 +materialcov 0 2 6.263398 0.000000 4140 +announcementsroom 0 1 6.957497 0.000000 6710 +unforseen 0 1 6.957497 0.000000 6711 +unableto 0 1 6.957497 0.000000 6712 +maclab 0 1 6.957497 0.000000 6713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..b8a69863 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +fall 1 181 1.609438 1.609438 40 +algorithm 1 162 1.791759 1.791759 57 +implement 0 152 1.791759 0.000000 52 +lectur 3 135 1.945910 5.837730 73 +professor 0 137 1.945910 0.000000 76 +find 1 111 2.197225 2.197225 111 +solut 0 82 2.484907 0.000000 162 +dynam 0 76 2.564949 0.000000 194 +explor 0 58 2.890372 0.000000 324 +tree 1 36 3.367296 3.367296 492 +graph 0 30 3.555348 0.000000 576 +theorem 0 21 3.912023 0.000000 786 +matrix 0 17 4.110874 0.000000 933 +closur 0 8 4.875197 0.000000 1643 +karp 0 5 5.347108 0.000000 2284 +union 1 4 5.568345 5.568345 2634 +push 1 4 5.568345 5.568345 2635 +ford 0 4 5.568345 0.000000 2636 +heap 1 3 5.857933 5.857933 3123 +dijkstra 0 3 5.857933 0.000000 3173 +monika 0 2 6.263398 0.000000 4141 +rauch 0 2 6.263398 0.000000 4142 +greedi 0 2 6.263398 0.000000 4143 +edmond 0 2 6.263398 0.000000 4144 +maxflow 2 1 6.957497 13.914994 6714 +matroid 1 1 6.957497 6.957497 6715 +binomi 1 1 6.957497 6.957497 6716 +preflow 1 1 6.957497 6.957497 6717 +henzingeremail 0 1 6.957497 0.000000 6718 +informationhomework 0 1 6.957497 0.000000 6719 +bellman 0 1 6.957497 0.000000 6720 +fibonacci 0 1 6.957497 0.000000 6721 +treap 0 1 6.957497 0.000000 6722 +randomizedsearch 0 1 6.957497 0.000000 6723 +mincut 0 1 6.957497 0.000000 6724 +dinitz 0 1 6.957497 0.000000 6725 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..94b17db4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,213 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +list 0 201 1.609438 0.000000 39 +oper 0 180 1.609438 0.000000 34 +hour 1 165 1.791759 1.791759 46 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +assign 2 135 1.945910 3.891820 66 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +introduct 1 126 2.079442 2.079442 87 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +final 2 116 2.197225 4.394450 108 +code 1 108 2.197225 2.197225 116 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +need 0 98 2.302585 0.000000 135 +grade 2 90 2.397895 4.795790 142 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +exam 1 86 2.484907 2.484907 169 +chang 1 82 2.484907 2.484907 163 +environ 0 84 2.484907 0.000000 177 +june 1 79 2.564949 2.564949 214 +sourc 1 77 2.564949 2.564949 201 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +upson 2 71 2.639057 5.278114 218 +solv 1 73 2.639057 2.639057 234 +name 1 72 2.639057 2.639057 220 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +syllabu 1 67 2.708050 2.708050 247 +integr 0 67 2.708050 0.000000 245 +order 0 69 2.708050 0.000000 249 +handout 1 64 2.772589 2.772589 263 +function 1 62 2.772589 2.772589 275 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +plan 0 65 2.772589 0.000000 272 +juli 3 60 2.833213 8.499639 305 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +unix 1 58 2.890372 2.890372 308 +point 0 58 2.890372 0.000000 319 +scientif 1 53 2.944439 2.944439 341 +three 0 54 2.944439 0.000000 330 +numer 1 49 3.044522 3.044522 369 +appoint 1 49 3.044522 3.044522 358 +set 0 50 3.044522 0.000000 361 +done 1 47 3.091042 3.091042 381 +adapt 0 46 3.091042 0.000000 387 +midterm 1 45 3.135494 3.135494 392 +math 1 44 3.135494 3.135494 402 +linear 1 41 3.218876 3.218876 431 +review 1 42 3.218876 3.218876 425 +late 0 40 3.258097 0.000000 439 +error 0 40 3.258097 0.000000 449 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +least 1 35 3.401197 3.401197 516 +everi 0 34 3.401197 0.000000 519 +return 0 34 3.401197 0.000000 502 +given 0 32 3.465736 0.000000 538 +chapter 0 32 3.465736 0.000000 536 +administr 0 27 3.637586 0.000000 628 +though 0 27 3.637586 0.000000 622 +rule 0 26 3.688879 0.000000 638 +session 0 26 3.688879 0.000000 643 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +valu 0 25 3.737670 0.000000 665 +lab 1 24 3.761200 3.761200 698 +equat 1 23 3.806662 3.806662 724 +begin 0 23 3.806662 0.000000 716 +variabl 0 23 3.806662 0.000000 715 +initi 0 23 3.806662 0.000000 717 +highli 0 23 3.806662 0.000000 725 +brows 0 23 3.806662 0.000000 726 +dai 0 22 3.850148 0.000000 753 +recommend 0 22 3.850148 0.000000 737 +instal 0 22 3.850148 0.000000 754 +viewer 0 21 3.912023 0.000000 787 +minim 1 18 4.060443 4.060443 887 +record 0 18 4.060443 0.000000 890 +account 0 18 4.060443 0.000000 882 +accept 0 18 4.060443 0.000000 879 +stand 0 18 4.060443 0.000000 891 +matrix 0 17 4.110874 0.000000 933 +macintosh 0 17 4.110874 0.000000 920 +adam 0 17 4.110874 0.000000 934 +vector 1 16 4.174387 4.174387 961 +score 1 15 4.248495 4.248495 1017 +purchas 0 15 4.248495 0.000000 1030 +matlab 2 14 4.317488 8.634976 1081 +squar 1 14 4.317488 4.317488 1082 +rank 0 14 4.317488 0.000000 1063 +polynomi 0 14 4.317488 0.000000 1069 +command 0 14 4.317488 0.000000 1083 +charl 0 13 4.382027 0.000000 1149 +composit 0 13 4.382027 0.000000 1150 +outsid 0 12 4.465908 0.000000 1219 +extra 0 11 4.553877 0.000000 1312 +total 1 10 4.653960 4.653960 1398 +matric 0 10 4.653960 0.000000 1399 +length 0 10 4.653960 0.000000 1400 +deadlin 1 9 4.753590 4.753590 1502 +pair 0 9 4.753590 0.000000 1503 +rel 0 9 4.753590 0.000000 1487 +float 0 9 4.753590 0.000000 1504 +partner 0 8 4.875197 0.000000 1648 +calendar 0 8 4.875197 0.000000 1649 +root 0 8 4.875197 0.000000 1650 +on 0 8 4.875197 0.000000 1628 +interpol 1 7 5.010635 5.010635 1823 +newton 1 7 5.010635 5.010635 1824 +elementari 0 7 5.010635 0.000000 1825 +accord 0 7 5.010635 0.000000 1826 +classroom 1 6 5.164786 5.164786 2006 +spline 0 6 5.164786 0.000000 2007 +drop 0 6 5.164786 0.000000 2008 +otherthan 0 6 5.164786 0.000000 2009 +fit 0 5 5.347108 0.000000 2285 +stabil 0 5 5.347108 0.000000 2286 +worst 0 5 5.347108 0.000000 2287 +ignor 0 5 5.347108 0.000000 2288 +registr 0 5 5.347108 0.000000 2249 +niko 1 4 5.568345 5.568345 2637 +backward 0 4 5.568345 0.000000 2638 +alon 1 3 5.857933 5.857933 3139 +euler 1 3 5.857933 5.857933 3174 +pitsiani 0 3 5.857933 0.000000 3175 +rack 0 3 5.857933 0.000000 3176 +duedat 0 3 5.857933 0.000000 3105 +uncompress 0 3 5.857933 0.000000 3177 +scientificcomput 0 2 6.263398 0.000000 4145 +stress 0 2 6.263398 0.000000 4146 +prerequisitesc 0 2 6.263398 0.000000 4058 +loan 0 2 6.263398 0.000000 4147 +renssela 0 2 6.263398 0.000000 4148 +examsther 0 2 6.263398 0.000000 4149 +hermit 0 2 6.263398 0.000000 4150 +multivari 0 2 6.263398 0.000000 4151 +folder 0 2 6.263398 0.000000 4152 +scmv 1 1 6.957497 6.957497 6726 +quadratur 1 1 6.957497 6.957497 6727 +ozan 1 1 6.957497 6.957497 6728 +siblei 1 1 6.957497 6.957497 6729 +martha 1 1 6.957497 6.957497 6730 +cubic 1 1 6.957497 6.957497 6731 +zcat 1 1 6.957497 6.957497 6732 +computationsumm 0 1 6.957497 0.000000 6733 +setsan 0 1 6.957497 0.000000 6734 +andnonlinear 0 1 6.957497 0.000000 6735 +ordinarydifferenti 0 1 6.957497 0.000000 6736 +informationstaff 0 1 6.957497 0.000000 6737 +hafizogullari 0 1 6.957497 0.000000 6738 +lecturesclass 0 1 6.957497 0.000000 6739 +administrationlauri 0 1 6.957497 0.000000 6740 +buck 0 1 6.957497 0.000000 6741 +addressedto 0 1 6.957497 0.000000 6742 +corequisit 0 1 6.957497 0.000000 6743 +materialstext 0 1 6.957497 0.000000 6744 +approachus 0 1 6.957497 0.000000 6745 +eitherth 0 1 6.957497 0.000000 6746 +labsthi 0 1 6.957497 0.000000 6747 +setsther 0 1 6.957497 0.000000 6748 +orfrom 0 1 6.957497 0.000000 6749 +computingproblem 0 1 6.957497 0.000000 6750 +behandl 0 1 6.957497 0.000000 6751 +gradefrom 0 1 6.957497 0.000000 6752 +printyour 0 1 6.957497 0.000000 6753 +firstpag 0 1 6.957497 0.000000 6754 +partnernam 0 1 6.957497 0.000000 6755 +gradingyour 0 1 6.957497 0.000000 6756 +beassign 0 1 6.957497 0.000000 6757 +onyour 0 1 6.957497 0.000000 6758 +vandermond 0 1 6.957497 0.000000 6759 +piecewis 0 1 6.957497 0.000000 6760 +cote 0 1 6.957497 0.000000 6761 +choleski 0 1 6.957497 0.000000 6762 +rung 0 1 6.957497 0.000000 6763 +kutta 0 1 6.957497 0.000000 6764 +computingat 0 1 6.957497 0.000000 6765 +rennselaerhal 0 1 6.957497 0.000000 6766 +untar 0 1 6.957497 0.000000 6767 +randperm 0 1 6.957497 0.000000 6768 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..bce5711c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +architectur 0 139 1.945910 0.000000 77 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +schedul 1 119 2.079442 2.079442 85 +final 1 116 2.197225 2.197225 108 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +advanc 0 99 2.302585 0.000000 130 +text 0 98 2.302585 0.000000 133 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +comment 1 93 2.397895 2.397895 146 +follow 1 92 2.397895 2.397895 143 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +requir 1 81 2.484907 2.484907 167 +start 1 83 2.484907 2.484907 173 +environ 0 84 2.484907 0.000000 177 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +second 0 81 2.484907 0.000000 166 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +complet 0 77 2.564949 0.000000 208 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +materi 2 75 2.639057 5.278114 221 +logic 0 71 2.639057 0.000000 230 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +tuesdai 0 73 2.639057 0.000000 219 +upson 0 71 2.639057 0.000000 218 +thursdai 1 70 2.708050 2.708050 241 +knowledg 0 67 2.708050 0.000000 243 +descript 1 64 2.772589 2.772589 271 +polici 1 64 2.772589 2.772589 279 +virtual 1 62 2.772589 2.772589 285 +organ 0 65 2.772589 0.000000 265 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +share 0 59 2.833213 0.000000 304 +content 0 59 2.833213 0.000000 302 +summer 1 56 2.890372 2.890372 311 +overview 0 56 2.890372 0.000000 323 +variou 0 56 2.890372 0.000000 317 +cover 1 55 2.944439 2.944439 329 +particular 1 51 2.995732 2.995732 352 +week 1 52 2.995732 2.995732 343 +maintain 0 51 2.995732 0.000000 342 +archiv 0 49 3.044522 0.000000 364 +understand 0 47 3.091042 0.000000 384 +discuss 1 45 3.135494 3.135494 399 +textbook 0 44 3.135494 0.000000 397 +fast 0 42 3.218876 0.000000 429 +form 0 39 3.258097 0.000000 443 +submit 0 39 3.258097 0.000000 440 +close 0 38 3.295837 0.000000 465 +purpos 0 37 3.332205 0.000000 481 +hand 0 37 3.332205 0.000000 475 +next 1 34 3.401197 3.401197 517 +concurr 0 34 3.401197 0.000000 501 +collabor 1 32 3.465736 3.465736 543 +secur 0 30 3.555348 0.000000 577 +depend 0 29 3.583519 0.000000 583 +synchron 0 29 3.583519 0.000000 588 +multiprocessor 0 28 3.610918 0.000000 605 +subject 1 26 3.688879 3.688879 647 +detect 0 26 3.688879 0.000000 646 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +programminglanguag 0 21 3.912023 0.000000 782 +kernel 0 20 3.951244 0.000000 825 +assum 0 19 4.007333 0.000000 845 +feedback 0 19 4.007333 0.000000 854 +outlin 0 17 4.110874 0.000000 914 +protect 0 17 4.110874 0.000000 935 +segment 0 17 4.110874 0.000000 931 +weekli 0 17 4.110874 0.000000 919 +micro 0 15 4.248495 0.000000 1031 +quizz 1 13 4.382027 4.382027 1151 +carri 0 13 4.382027 0.000000 1152 +assembl 0 12 4.465908 0.000000 1207 +statement 1 11 4.553877 4.553877 1313 +worth 1 11 4.553877 4.553877 1294 +evolut 0 11 4.553877 0.000000 1314 +multithread 0 11 4.553877 0.000000 1315 +peter 0 11 4.553877 0.000000 1316 +operatingsystem 0 10 4.653960 0.000000 1401 +princip 0 10 4.653960 0.000000 1397 +familiar 0 9 4.753590 0.000000 1485 +attent 0 8 4.875197 0.000000 1651 +remind 0 7 5.010635 0.000000 1799 +prevent 0 7 5.010635 0.000000 1827 +surpris 0 7 5.010635 0.000000 1828 +multiprogram 0 6 5.164786 0.000000 2010 +pace 0 6 5.164786 0.000000 2011 +ensur 0 6 5.164786 0.000000 2012 +silberschatz 0 6 5.164786 0.000000 1978 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +deadlock 1 4 5.568345 5.568345 2641 +permiss 0 4 5.568345 0.000000 2642 +usedto 0 4 5.568345 0.000000 2643 +abraham 0 4 5.568345 0.000000 2644 +roughli 1 3 5.857933 5.857933 3097 +prereq 0 3 5.857933 0.000000 3178 +theimpact 0 3 5.857933 0.000000 3179 +audienc 0 3 5.857933 0.000000 3180 +serverless 0 3 5.857933 0.000000 3181 +todetermin 0 3 5.857933 0.000000 3182 +nawaaz 1 2 6.263398 6.263398 4153 +ahm 1 2 6.263398 6.263398 4154 +praka 0 2 6.263398 0.000000 4155 +anintroduct 0 2 6.263398 0.000000 4156 +emphasison 0 2 6.263398 0.000000 4157 +memorymanag 0 2 6.263398 0.000000 4158 +thetradit 0 2 6.263398 0.000000 4159 +galvin 0 2 6.263398 0.000000 4160 +motd 1 1 6.957497 6.957497 6769 +lldiscuss 1 1 6.957497 6.957497 6770 +prerequsit 0 1 6.957497 0.000000 6771 +processsynchron 0 1 6.957497 0.000000 6772 +requiringconst 0 1 6.957497 0.000000 6773 +prerequsitescomplet 0 1 6.957497 0.000000 6774 +inparticular 0 1 6.957497 0.000000 6775 +theintroductori 0 1 6.957497 0.000000 6776 +thatwil 0 1 6.957497 0.000000 6777 +outlineth 0 1 6.957497 0.000000 6778 +theorder 0 1 6.957497 0.000000 6779 +mutualexclus 0 1 6.957497 0.000000 6780 +timepermit 0 1 6.957497 0.000000 6781 +textbooksth 0 1 6.957497 0.000000 6782 +conceptsbook 0 1 6.957497 0.000000 6783 +distributeclass 0 1 6.957497 0.000000 6784 +noteswil 0 1 6.957497 0.000000 6785 +pageat 0 1 6.957497 0.000000 6786 +mondaythru 0 1 6.957497 0.000000 6787 +thesewil 0 1 6.957497 0.000000 6788 +thursdayat 0 1 6.957497 0.000000 6789 +gradingeach 0 1 6.957497 0.000000 6790 +weightag 0 1 6.957497 0.000000 6791 +combinedweightag 0 1 6.957497 0.000000 6792 +twomidterm 0 1 6.957497 0.000000 6793 +collaborationat 0 1 6.957497 0.000000 6794 +eachhomework 0 1 6.957497 0.000000 6795 +thehomework 0 1 6.957497 0.000000 6796 +closednot 0 1 6.957497 0.000000 6797 +induprakaskodukula 0 1 6.957497 0.000000 6798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..c7816c42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +lectur 3 135 1.945910 5.837730 73 +note 3 142 1.945910 5.837730 67 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +advanc 0 99 2.302585 0.000000 130 +octob 2 89 2.397895 4.795790 156 +novemb 2 81 2.484907 4.969814 179 +solut 2 82 2.484907 4.969814 162 +start 0 83 2.484907 0.000000 173 +decemb 2 80 2.564949 5.129898 215 +homework 2 79 2.564949 5.129898 193 +upson 0 71 2.639057 0.000000 218 +septemb 3 65 2.772589 8.317767 274 +appoint 1 49 3.044522 3.044522 358 +get 0 46 3.091042 0.000000 380 +midterm 0 45 3.135494 0.000000 392 +meta 0 9 4.753590 0.000000 1505 +scribe 0 4 5.568345 0.000000 2631 +csc 0 3 5.857933 0.000000 3183 +neal 0 3 5.857933 0.000000 3184 +languagesfal 0 2 6.263398 0.000000 4161 +glew 0 2 6.263398 0.000000 4162 +informationhandout 0 2 6.263398 0.000000 4163 +henzingerupson 0 1 6.957497 0.000000 6799 +glewupson 0 1 6.957497 0.000000 6800 +handoutshandout 0 1 6.957497 0.000000 6801 +mlhandout 0 1 6.957497 0.000000 6802 +lambdahomeworkshomework 0 1 6.957497 0.000000 6803 +grieshomework 0 1 6.957497 0.000000 6804 +notesraw 0 1 6.957497 0.000000 6805 +noteslectur 0 1 6.957497 0.000000 6806 +mllectur 0 1 6.957497 0.000000 6807 +grieslectur 0 1 6.957497 0.000000 6808 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..42fcb883 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +fall 2 181 1.609438 3.218876 40 +hour 1 165 1.791759 1.791759 46 +note 1 142 1.945910 1.945910 67 +assign 0 135 1.945910 0.000000 66 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +advanc 0 99 2.302585 0.000000 130 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +mondai 0 77 2.564949 0.000000 206 +upson 1 71 2.639057 2.639057 218 +prof 0 64 2.772589 0.000000 273 +faculti 0 56 2.890372 0.000000 325 +suggest 0 53 2.944439 0.000000 331 +robert 0 30 3.555348 0.000000 567 +thur 0 19 4.007333 0.000000 847 +classic 0 14 4.317488 0.000000 1084 +nuprl 0 10 4.653960 0.000000 1402 +kumar 0 9 4.753590 0.000000 1506 +ravi 1 3 5.857933 5.857933 3185 +constabl 0 3 5.857933 0.000000 3186 +pavel 0 2 6.263398 0.000000 4164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..d95519e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +spring 0 131 2.079442 0.000000 88 +member 0 84 2.484907 0.000000 165 +addit 1 74 2.639057 2.639057 228 +maintain 0 51 2.995732 0.000000 342 +consult 0 24 3.761200 0.000000 687 +coursesc 0 4 5.568345 0.000000 2692 +individualfaculti 0 1 6.957497 0.000000 7418 +contactgloria 0 1 6.957497 0.000000 7419 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..1e1a3e48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +austin 0 168 1.791759 0.000000 63 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +analysi 1 124 2.079442 2.079442 98 +welcom 0 122 2.079442 0.000000 99 +instructor 1 108 2.197225 2.197225 107 +send 0 114 2.197225 0.000000 109 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +homework 0 79 2.564949 0.000000 193 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +new 0 64 2.772589 0.000000 262 +suggest 0 53 2.944439 0.000000 331 +announc 0 40 3.258097 0.000000 441 +tutori 0 39 3.258097 0.000000 437 +prepar 0 20 3.951244 0.000000 824 +yang 0 8 4.875197 0.000000 1652 +vicki 1 3 5.857933 5.857933 3187 +almstrum 1 2 6.263398 6.263398 4165 +linyuan 1 1 6.957497 6.957497 6809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..de946878 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +cours 0 273 1.098612 0.000000 15 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +tool 1 117 2.079442 2.079442 93 +number 1 130 2.079442 2.079442 97 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +schedul 0 119 2.079442 0.000000 85 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +world 0 115 2.197225 0.000000 126 +instructor 0 108 2.197225 0.000000 107 +make 0 111 2.197225 0.000000 120 +text 0 98 2.302585 0.000000 133 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +grade 0 90 2.397895 0.000000 142 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +help 0 83 2.484907 0.000000 175 +april 1 77 2.564949 2.564949 196 +method 1 80 2.564949 2.564949 213 +exampl 0 77 2.564949 0.000000 195 +good 0 77 2.564949 0.000000 200 +logic 1 71 2.639057 2.639057 230 +order 1 69 2.708050 2.708050 249 +differ 0 66 2.708050 0.000000 253 +test 0 66 2.708050 0.000000 252 +foundat 1 62 2.772589 2.772589 286 +taylor 0 63 2.772589 0.000000 287 +interact 0 62 2.772589 0.000000 270 +copi 0 63 2.772589 0.000000 284 +type 0 61 2.833213 0.000000 296 +reason 1 57 2.890372 2.890372 318 +approach 0 48 3.044522 0.000000 366 +numer 0 49 3.044522 0.000000 369 +algebra 0 45 3.135494 0.000000 394 +mechan 0 43 3.178054 0.000000 416 +examin 1 42 3.218876 3.218876 424 +continu 0 39 3.258097 0.000000 448 +formal 1 37 3.332205 3.332205 478 +either 0 35 3.401197 0.000000 506 +specifi 0 30 3.555348 0.000000 568 +hope 0 28 3.610918 0.000000 610 +higher 0 24 3.761200 0.000000 690 +consult 0 24 3.761200 0.000000 687 +equat 0 23 3.806662 0.000000 724 +tent 0 22 3.850148 0.000000 739 +moor 0 17 4.110874 0.000000 936 +upon 1 16 4.174387 4.174387 978 +choos 0 16 4.174387 0.000000 964 +choic 0 16 4.174387 0.000000 979 +squar 0 14 4.317488 0.000000 1082 +recurs 0 13 4.382027 0.000000 1127 +guest 0 12 4.465908 0.000000 1220 +primit 0 11 4.553877 0.000000 1317 +arithmet 0 10 4.653960 0.000000 1388 +nuprl 0 10 4.653960 0.000000 1402 +pair 0 9 4.753590 0.000000 1503 +russel 0 9 4.753590 0.000000 1507 +prover 0 8 4.875197 0.000000 1653 +root 0 8 4.875197 0.000000 1650 +chief 0 7 5.010635 0.000000 1829 +boyer 0 6 5.164786 0.000000 2013 +freeli 0 6 5.164786 0.000000 2014 +backup 0 4 5.568345 0.000000 2645 +rick 0 4 5.568345 0.000000 2646 +lego 0 3 5.857933 0.000000 3188 +oral 0 3 5.857933 0.000000 3189 +samuel 0 3 5.857933 0.000000 3155 +sawada 0 3 5.857933 0.000000 3190 +otter 1 2 6.263398 6.263398 4166 +ofmathemat 0 2 6.263398 0.000000 4167 +nelson 0 2 6.263398 0.000000 4168 +ortool 0 2 6.263398 0.000000 4169 +bowen 0 2 6.263398 0.000000 4170 +guyer 0 2 6.263398 0.000000 4171 +blurb 0 1 6.957497 0.000000 6810 +theobject 0 1 6.957497 0.000000 6811 +formalizationof 0 1 6.957497 0.000000 6812 +creationof 0 1 6.957497 0.000000 6813 +systemsfor 0 1 6.957497 0.000000 6814 +formalmethod 0 1 6.957497 0.000000 6815 +suchsystem 0 1 6.957497 0.000000 6816 +imp 0 1 6.957497 0.000000 6817 +mizar 0 1 6.957497 0.000000 6818 +quaif 0 1 6.957497 0.000000 6819 +coqstud 0 1 6.957497 0.000000 6820 +aboutthes 0 1 6.957497 0.000000 6821 +projecthtml 0 1 6.957497 0.000000 6822 +theqe 0 1 6.957497 0.000000 6823 +manifestoplain 0 1 6.957497 0.000000 6824 +qedmanifestobowen 0 1 6.957497 0.000000 6825 +localform 0 1 6.957497 0.000000 6826 +tannei 0 1 6.957497 0.000000 6827 +trevor 0 1 6.957497 0.000000 6828 +hick 0 1 6.957497 0.000000 6829 +ruben 0 1 6.957497 0.000000 6830 +gamboa 0 1 6.957497 0.000000 6831 +circal 0 1 6.957497 0.000000 6832 +turpin 0 1 6.957497 0.000000 6833 +galoi 0 1 6.957497 0.000000 6834 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..6945187d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +architectur 1 139 1.945910 1.945910 77 +perform 0 143 1.945910 0.000000 74 +note 0 142 1.945910 0.000000 67 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +exam 0 86 2.484907 0.000000 169 +orient 0 80 2.564949 0.000000 205 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +differ 0 66 2.708050 0.000000 253 +written 0 63 2.772589 0.000000 278 +major 1 56 2.890372 2.890372 315 +variou 0 56 2.890372 0.000000 317 +space 0 57 2.890372 0.000000 310 +instruct 0 53 2.944439 0.000000 332 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +pointer 0 48 3.044522 0.000000 368 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +edit 1 42 3.218876 3.218876 418 +review 0 42 3.218876 0.000000 425 +cach 0 41 3.218876 0.000000 432 +compani 0 41 3.218876 0.000000 423 +cost 0 37 3.332205 0.000000 480 +tech 1 35 3.401197 3.401197 515 +bibliographi 0 34 3.401197 0.000000 518 +product 0 33 3.433987 0.000000 527 +compon 0 30 3.555348 0.000000 570 +focus 0 29 3.583519 0.000000 584 +particip 0 29 3.583519 0.000000 589 +limit 0 29 3.583519 0.000000 585 +administr 0 27 3.637586 0.000000 628 +static 0 27 3.637586 0.000000 619 +compar 0 26 3.688879 0.000000 648 +altern 0 26 3.688879 0.000000 641 +aspect 0 25 3.737670 0.000000 663 +input 0 23 3.806662 0.000000 727 +emphasi 0 22 3.850148 0.000000 755 +recommend 0 22 3.850148 0.000000 737 +disk 0 22 3.850148 0.000000 747 +output 0 21 3.912023 0.000000 788 +watch 0 21 3.912023 0.000000 789 +predict 0 19 4.007333 0.000000 855 +appropri 0 18 4.060443 0.000000 883 +interconnect 0 17 4.110874 0.000000 937 +attempt 0 17 4.110874 0.000000 917 +intro 0 17 4.110874 0.000000 915 +modern 0 16 4.174387 0.000000 966 +choic 0 16 4.174387 0.000000 979 +choos 0 16 4.174387 0.000000 964 +vector 0 16 4.174387 0.000000 961 +branch 1 11 4.553877 4.553877 1318 +errata 0 10 4.653960 0.000000 1403 +pair 1 9 4.753590 4.753590 1503 +significantli 0 9 4.753590 0.000000 1508 +admin 0 9 4.753590 0.000000 1476 +quantit 1 8 4.875197 4.875197 1654 +pipelin 1 7 5.010635 5.010635 1830 +metric 0 7 5.010635 0.000000 1831 +subsystem 0 6 5.164786 0.000000 2015 +hennessi 1 5 5.347108 5.347108 2289 +computerarchitectur 0 5 5.347108 0.000000 2290 +reveal 0 4 5.568345 0.000000 2647 +queu 0 4 5.568345 0.000000 2648 +buss 0 4 5.568345 0.000000 2649 +hazard 1 3 5.857933 5.857933 3191 +evaluationof 0 3 5.857933 0.000000 3192 +tertiari 0 3 5.857933 0.000000 3193 +mpp 0 3 5.857933 0.000000 3194 +insystem 0 2 6.263398 0.000000 4172 +dram 0 2 6.263398 0.000000 4173 +stedit 1 1 6.957497 6.957497 6835 +predictionch 1 1 6.957497 6.957497 6836 +revieww 1 1 6.957497 6.957497 6837 +architecturethi 0 1 6.957497 0.000000 6838 +benchmarksto 0 1 6.957497 0.000000 6839 +highperform 0 1 6.957497 0.000000 6840 +memoryhierarchi 0 1 6.957497 0.000000 6841 +studentswil 0 1 6.957497 0.000000 6842 +undertak 0 1 6.957497 0.000000 6843 +oftheir 0 1 6.957497 0.000000 6844 +informationuniqu 0 1 6.957497 0.000000 6845 +mikedahlinoffic 0 1 6.957497 0.000000 6846 +tbdtaoffic 0 1 6.957497 0.000000 6847 +tbdreadingstextbook 0 1 6.957497 0.000000 6848 +patteson 0 1 6.957497 0.000000 6849 +sheetfor 0 1 6.957497 0.000000 6850 +pattersonin 0 1 6.957497 0.000000 6851 +currentcomput 0 1 6.957497 0.000000 6852 +readinglist 0 1 6.957497 0.000000 6853 +scheduleweekdatetopicreadingduejan 0 1 6.957497 0.000000 6854 +perf 0 1 6.957497 0.000000 6855 +amdahl 0 1 6.957497 0.000000 6856 +trendsch 0 1 6.957497 0.000000 6857 +isa 0 1 6.957497 0.000000 6858 +mlkholidayf 0 1 6.957497 0.000000 6859 +proposalfeb 0 1 6.957497 0.000000 6860 +scoreboard 0 1 6.957497 0.000000 6861 +tomasulu 0 1 6.957497 0.000000 6862 +speculationch 0 1 6.957497 0.000000 6863 +processorsch 0 1 6.957497 0.000000 6864 +dfeb 0 1 6.957497 0.000000 6865 +hierarchych 0 1 6.957497 0.000000 6866 +surveyfeb 0 1 6.957497 0.000000 6867 +banksf 0 1 6.957497 0.000000 6868 +breakm 0 1 6.957497 0.000000 6869 +breakmar 0 1 6.957497 0.000000 6870 +raidch 0 1 6.957497 0.000000 6871 +networksf 0 1 6.957497 0.000000 6872 +networksch 0 1 6.957497 0.000000 6873 +checkpointapr 0 1 6.957497 0.000000 6874 +architecturesf 0 1 6.957497 0.000000 6875 +mppsch 0 1 6.957497 0.000000 6876 +preseantationsm 0 1 6.957497 0.000000 6877 +presentationsfri 0 1 6.957497 0.000000 6878 +classesm 0 1 6.957497 0.000000 6879 +reportaddit 0 1 6.957497 0.000000 6880 +resourcescours 0 1 6.957497 0.000000 6881 +reportsyahoo 0 1 6.957497 0.000000 6882 +businessand 0 1 6.957497 0.000000 6883 +economi 0 1 6.957497 0.000000 6884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..c94ec5c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +address 1 170 1.791759 1.791759 62 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +provid 1 121 2.079442 2.079442 94 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +world 0 115 2.197225 0.000000 126 +final 0 116 2.197225 0.000000 108 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +internet 1 83 2.484907 2.484907 186 +wide 0 84 2.484907 0.000000 185 +resourc 0 81 2.484907 0.000000 172 +state 0 76 2.564949 0.000000 207 +refer 0 78 2.564949 0.000000 203 +solv 0 73 2.639057 0.000000 234 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +organ 0 65 2.772589 0.000000 265 +talk 0 53 2.944439 0.000000 336 +protocol 1 45 3.135494 3.135494 407 +must 0 40 3.258097 0.000000 442 +purpos 0 37 3.332205 0.000000 481 +secur 0 30 3.555348 0.000000 577 +concern 0 25 3.737670 0.000000 666 +alloc 0 20 3.951244 0.000000 821 +longer 0 20 3.951244 0.000000 816 +verif 0 20 3.951244 0.000000 826 +less 0 18 4.060443 0.000000 892 +context 0 13 4.382027 0.000000 1153 +evolv 0 12 4.465908 0.000000 1223 +operatingsystem 0 10 4.653960 0.000000 1401 +tradit 0 10 4.653960 0.000000 1404 +guidelin 0 7 5.010635 0.000000 1832 +matur 0 5 5.347108 0.000000 2269 +hypothesi 0 4 5.568345 0.000000 2650 +behind 0 4 5.568345 0.000000 2610 +bear 0 4 5.568345 0.000000 2651 +explos 0 3 5.857933 0.000000 3138 +thetradit 0 2 6.263398 0.000000 4159 +interprocess 0 2 6.263398 0.000000 4174 +systemsuniqu 0 1 6.957497 0.000000 6885 +resultedin 0 1 6.957497 0.000000 6886 +contextof 0 1 6.957497 0.000000 6887 +understandingof 0 1 6.957497 0.000000 6888 +addressproblem 0 1 6.957497 0.000000 6889 +theissu 0 1 6.957497 0.000000 6890 +addressedin 0 1 6.957497 0.000000 6891 +occasionallyread 0 1 6.957497 0.000000 6892 +understandingcurr 0 1 6.957497 0.000000 6893 +reportspoint 0 1 6.957497 0.000000 6894 +rosterhandout 0 1 6.957497 0.000000 6895 +sslprotocol 0 1 6.957497 0.000000 6896 +proofsketch 0 1 6.957497 0.000000 6897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..49294487 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +last 2 314 1.098612 2.197224 14 +offic 2 299 1.098612 2.197224 13 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +class 2 199 1.609438 3.218876 37 +utexa 2 189 1.609438 3.218876 44 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +oper 1 180 1.609438 1.609438 34 +includ 0 208 1.609438 0.000000 42 +avail 2 169 1.791759 3.583518 48 +hour 2 165 1.791759 3.583518 46 +data 0 170 1.791759 0.000000 49 +austin 0 168 1.791759 0.000000 63 +object 2 138 1.945910 3.891820 79 +lectur 2 135 1.945910 3.891820 73 +problem 2 147 1.945910 3.891820 75 +note 1 142 1.945910 1.945910 67 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +model 1 145 1.945910 1.945910 69 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +final 1 116 2.197225 2.197225 108 +check 1 115 2.197225 2.197225 118 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +part 2 98 2.302585 4.605170 129 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +real 0 93 2.397895 0.000000 144 +exam 3 86 2.484907 7.454721 169 +solut 3 82 2.484907 7.454721 162 +start 2 83 2.484907 4.969814 173 +info 1 85 2.484907 2.484907 176 +academ 1 82 2.484907 2.484907 178 +chang 1 82 2.484907 2.484907 163 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +homework 2 79 2.564949 5.129898 193 +exampl 2 77 2.564949 5.129898 195 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +meet 1 72 2.639057 2.639057 229 +appli 0 71 2.639057 0.000000 226 +free 0 73 2.639057 0.000000 224 +test 2 66 2.708050 5.416100 252 +practic 1 70 2.708050 2.708050 246 +thursdai 1 70 2.708050 2.708050 241 +syllabu 0 67 2.708050 0.000000 247 +handout 4 64 2.772589 11.090356 263 +wednesdai 1 64 2.772589 2.772589 261 +import 1 65 2.772589 2.772589 282 +organ 1 65 2.772589 2.772589 265 +new 0 64 2.772589 0.000000 262 +visit 0 63 2.772589 0.000000 288 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +polici 0 64 2.772589 0.000000 279 +guid 0 63 2.772589 0.000000 267 +content 2 59 2.833213 5.666426 302 +back 2 60 2.833213 5.666426 297 +locat 0 59 2.833213 0.000000 303 +automat 0 61 2.833213 0.000000 306 +reason 0 57 2.890372 0.000000 318 +overview 0 56 2.890372 0.000000 323 +instruct 1 53 2.944439 2.944439 332 +date 2 51 2.995732 5.991464 344 +maintain 0 51 2.995732 0.000000 342 +electron 1 47 3.091042 3.091042 379 +discuss 2 45 3.135494 6.270988 399 +fridai 1 44 3.135494 3.135494 390 +might 0 41 3.218876 0.000000 426 +late 2 40 3.258097 6.516194 439 +announc 1 40 3.258097 3.258097 441 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +slide 1 38 3.295837 3.295837 467 +correct 0 38 3.295837 0.000000 462 +procedur 0 36 3.367296 0.000000 488 +manual 1 35 3.401197 3.401197 504 +least 0 35 3.401197 0.000000 516 +post 0 35 3.401197 0.000000 505 +singl 0 34 3.401197 0.000000 510 +next 0 34 3.401197 0.000000 517 +jame 0 35 3.401197 0.000000 507 +go 0 33 3.433987 0.000000 529 +titl 0 31 3.496508 0.000000 556 +turn 1 29 3.583519 3.583519 586 +pass 1 28 3.610918 3.610918 611 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +utc 0 27 3.637586 0.000000 629 +session 1 26 3.688879 3.688879 643 +proc 0 26 3.688879 0.000000 649 +valu 0 25 3.737670 0.000000 665 +toward 0 25 3.737670 0.000000 668 +begin 0 23 3.806662 0.000000 716 +thank 0 23 3.806662 0.000000 721 +disk 1 22 3.850148 3.850148 747 +period 1 22 3.850148 3.850148 743 +hierarchi 0 22 3.850148 0.000000 744 +output 1 21 3.912023 3.912023 788 +newsgroup 1 21 3.912023 3.912023 783 +thur 1 19 4.007333 4.007333 847 +prerequisit 0 19 4.007333 0.000000 846 +attend 0 18 4.060443 0.000000 893 +offici 0 18 4.060443 0.000000 894 +regist 1 17 4.110874 4.110874 938 +earli 2 16 4.174387 8.348774 968 +zhang 0 16 4.174387 0.000000 980 +letter 0 16 4.174387 0.000000 981 +ascii 0 15 4.248495 0.000000 1032 +charact 0 15 4.248495 0.000000 1028 +conduct 0 14 4.317488 0.000000 1065 +front 1 13 4.382027 4.382027 1154 +holidai 1 12 4.465908 4.465908 1224 +remov 1 12 4.465908 4.465908 1225 +pascal 1 12 4.465908 4.465908 1213 +tue 1 11 4.553877 4.553877 1308 +chri 0 11 4.553877 0.000000 1311 +extra 0 11 4.553877 0.000000 1312 +night 0 11 4.553877 0.000000 1319 +penalti 0 10 4.653960 0.000000 1405 +stack 0 10 4.653960 0.000000 1389 +cheat 0 10 4.653960 0.000000 1395 +pick 2 9 4.753590 9.507180 1498 +deadlin 0 9 4.753590 0.000000 1502 +calendar 0 8 4.875197 0.000000 1649 +fail 0 8 4.875197 0.000000 1655 +spec 0 8 4.875197 0.000000 1640 +noon 1 7 5.010635 5.010635 1804 +bit 1 7 5.010635 5.010635 1833 +paramet 1 7 5.010635 5.010635 1796 +saturdai 1 7 5.010635 5.010635 1794 +drop 1 6 5.164786 5.164786 2008 +risc 0 6 5.164786 0.000000 2016 +door 1 5 5.347108 5.347108 2291 +mac 1 5 5.347108 5.347108 2292 +circumst 0 5 5.347108 0.000000 2283 +registr 0 5 5.347108 0.000000 2249 +glanc 0 4 5.568345 0.000000 2652 +chart 0 4 5.568345 0.000000 2653 +turnin 0 4 5.568345 0.000000 2654 +labor 0 3 5.857933 0.000000 3195 +obsolet 0 3 5.857933 0.000000 3196 +yurkanan 1 2 6.263398 6.263398 4175 +dragon 1 2 6.263398 6.263398 4176 +yoonsuck 1 2 6.263398 6.263398 4177 +choe 1 2 6.263398 6.263398 4178 +yschoe 1 2 6.263398 6.263398 4179 +typo 1 2 6.263398 6.263398 4180 +folder 1 2 6.263398 6.263398 4152 +constantli 0 2 6.263398 0.000000 4181 +edmondson 0 2 6.263398 0.000000 4182 +gzhang 0 2 6.263398 0.000000 4183 +rare 0 2 6.263398 0.000000 4184 +thanksgiv 0 2 6.263398 0.000000 4185 +appeal 0 2 6.263398 0.000000 4186 +fantasm 2 1 6.957497 13.914994 6898 +p_global 2 1 6.957497 13.914994 6899 +bonu 1 1 6.957497 6.957497 6900 +electronc 1 1 6.957497 6.957497 6901 +macsbug 1 1 6.957497 6.957497 6902 +electoron 1 1 6.957497 6.957497 6903 +edum 1 1 6.957497 6.957497 6904 +withdraw 1 1 6.957497 6.957497 6905 +rightmost 1 1 6.957497 6.957497 6906 +procudur 1 1 6.957497 6.957497 6907 +powermac 1 1 6.957497 6.957497 6908 +quadra 1 1 6.957497 6.957497 6909 +onmon 0 1 6.957497 0.000000 6910 +cynthia 0 1 6.957497 0.000000 6911 +deepa 0 1 6.957497 0.000000 6912 +ramani 0 1 6.957497 0.000000 6913 +dparam 0 1 6.957497 0.000000 6914 +eduw 0 1 6.957497 0.000000 6915 +eduf 0 1 6.957497 0.000000 6916 +refund 0 1 6.957497 0.000000 6917 +extenu 0 1 6.957497 0.000000 6918 +boxin 0 1 6.957497 0.000000 6919 +endia 0 1 6.957497 0.000000 6920 +func 0 1 6.957497 0.000000 6921 +practiv 0 1 6.957497 0.000000 6922 +questionair 0 1 6.957497 0.000000 6923 +brett 0 1 6.957497 0.000000 6924 +subroutine_fil 0 1 6.957497 0.000000 6925 +exception_fil 0 1 6.957497 0.000000 6926 +avali 0 1 6.957497 0.000000 6927 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..a8265c97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +cours 0 273 1.098612 0.000000 15 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +includ 0 208 1.609438 0.000000 42 +avail 1 169 1.791759 1.791759 48 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +relat 1 139 1.945910 1.945910 68 +click 1 142 1.945910 1.945910 78 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +postscript 1 131 2.079442 2.079442 90 +studi 1 120 2.079442 2.079442 91 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +final 2 116 2.197225 4.394450 108 +version 1 113 2.197225 2.197225 122 +topic 1 114 2.197225 2.197225 110 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +structur 0 106 2.197225 0.000000 105 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +technic 0 100 2.302585 0.000000 140 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +question 2 91 2.397895 4.795790 141 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +section 0 94 2.397895 0.000000 149 +comment 0 93 2.397895 0.000000 146 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +activ 0 84 2.484907 0.000000 182 +good 1 77 2.564949 2.564949 200 +decemb 1 80 2.564949 2.564949 215 +come 0 78 2.564949 0.000000 202 +mondai 0 77 2.564949 0.000000 206 +html 1 75 2.639057 2.639057 235 +free 0 73 2.639057 0.000000 224 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +test 1 66 2.708050 2.708050 252 +thursdai 0 70 2.708050 0.000000 241 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +guid 1 63 2.772589 2.772589 267 +complex 0 64 2.772589 0.000000 269 +descript 0 64 2.772589 0.000000 271 +room 1 59 2.833213 2.833213 301 +locat 0 59 2.833213 0.000000 303 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +sampl 0 53 2.944439 0.000000 339 +suggest 0 53 2.944439 0.000000 331 +week 1 52 2.995732 2.995732 343 +maintain 0 51 2.995732 0.000000 342 +right 1 48 3.044522 3.044522 363 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +review 2 42 3.218876 6.437752 425 +howev 1 41 3.218876 3.218876 422 +might 0 41 3.218876 0.000000 426 +announc 1 40 3.258097 3.258097 441 +tutori 1 39 3.258097 3.258097 437 +programm 0 39 3.258097 0.000000 445 +slide 0 38 3.295837 0.000000 467 +feel 0 37 3.332205 0.000000 483 +soon 0 36 3.367296 0.000000 494 +download 0 36 3.367296 0.000000 489 +post 1 35 3.401197 3.401197 505 +next 1 34 3.401197 3.401197 517 +articl 1 33 3.433987 3.433987 530 +ad 0 32 3.465736 0.000000 544 +concept 0 32 3.465736 0.000000 537 +option 0 30 3.555348 0.000000 575 +becom 0 28 3.610918 0.000000 603 +progress 0 28 3.610918 0.000000 598 +held 0 28 3.610918 0.000000 600 +hope 0 28 3.610918 0.000000 610 +ask 0 28 3.610918 0.000000 597 +usual 0 28 3.610918 0.000000 608 +session 1 26 3.688879 3.688879 643 +relev 0 26 3.688879 0.000000 637 +comp 0 26 3.688879 0.000000 650 +todai 0 25 3.737670 0.000000 672 +lead 0 23 3.806662 0.000000 718 +dai 0 22 3.850148 0.000000 753 +almost 0 22 3.850148 0.000000 742 +newsgroup 1 21 3.912023 3.912023 783 +reserv 0 20 3.951244 0.000000 808 +item 0 19 4.007333 0.000000 856 +coupl 0 17 4.110874 0.000000 939 +whole 0 17 4.110874 0.000000 940 +sheet 0 16 4.174387 0.000000 973 +critic 0 16 4.174387 0.000000 982 +warn 0 14 4.317488 0.000000 1068 +someon 0 13 4.382027 0.000000 1128 +circuit 0 13 4.382027 0.000000 1131 +difficulti 0 13 4.382027 0.000000 1132 +prolog 0 13 4.382027 0.000000 1155 +menu 0 13 4.382027 0.000000 1156 +pascal 2 12 4.465908 8.931816 1213 +bruce 1 12 4.465908 4.465908 1226 +tune 0 12 4.465908 0.000000 1227 +uniqu 0 12 4.465908 0.000000 1228 +rememb 0 12 4.465908 0.000000 1217 +regard 0 11 4.553877 0.000000 1309 +summar 0 11 4.553877 0.000000 1295 +total 0 10 4.653960 0.000000 1398 +length 0 10 4.653960 0.000000 1400 +exact 0 9 4.753590 0.000000 1509 +prefer 0 9 4.753590 0.000000 1491 +bit 0 7 5.010635 0.000000 1833 +beyond 0 7 5.010635 0.000000 1834 +put 0 6 5.164786 0.000000 2017 +banerje 0 6 5.164786 0.000000 2018 +assignmentsprogram 0 6 5.164786 0.000000 2019 +porter 2 5 5.347108 10.694216 2293 +lang 1 5 5.347108 5.347108 2294 +door 0 5 5.347108 0.000000 2291 +rotat 0 5 5.347108 0.000000 2295 +scope 0 5 5.347108 0.000000 2296 +desk 0 5 5.347108 0.000000 2297 +caus 0 5 5.347108 0.000000 2298 +welch 1 4 5.568345 5.568345 2655 +coverag 0 4 5.568345 0.000000 2656 +glad 0 4 5.568345 0.000000 2657 +arora 0 4 5.568345 0.000000 2658 +somewhat 0 4 5.568345 0.000000 2659 +webpag 0 4 5.568345 0.000000 2660 +dwip 1 3 5.857933 5.857933 3197 +addendum 1 3 5.857933 5.857933 3150 +ansi 1 3 5.857933 5.857933 3198 +forthes 0 3 5.857933 0.000000 3199 +moreov 0 3 5.857933 0.000000 3200 +luck 0 3 5.857933 0.000000 3201 +boolean 0 3 5.857933 0.000000 3202 +experienc 0 3 5.857933 0.000000 3203 +painter 0 2 6.263398 0.000000 4187 +therewil 0 2 6.263398 0.000000 4080 +nimar 0 2 6.263398 0.000000 4188 +disregard 0 2 6.263398 0.000000 4189 +schedulec 0 2 6.263398 0.000000 4190 +newgroup 0 2 6.263398 0.000000 4191 +delphi 0 2 6.263398 0.000000 4192 +dell 0 2 6.263398 0.000000 4193 +turbo 1 1 6.957497 6.957497 6928 +andther 0 1 6.957497 0.000000 6929 +unabl 0 1 6.957497 0.000000 6930 +luckfor 0 1 6.957497 0.000000 6931 +dependon 0 1 6.957497 0.000000 6932 +availib 0 1 6.957497 0.000000 6933 +uptoth 0 1 6.957497 0.000000 6934 +resolutio 0 1 6.957497 0.000000 6935 +porterquest 0 1 6.957497 0.000000 6936 +thecont 0 1 6.957497 0.000000 6937 +atugl 0 1 6.957497 0.000000 6938 +sostai 0 1 6.957497 0.000000 6939 +iinstructorbruc 0 1 6.957497 0.000000 6940 +tasoffic 0 1 6.957497 0.000000 6941 +hourslab 0 1 6.957497 0.000000 6942 +descriptionclass 0 1 6.957497 0.000000 6943 +scheduleclass 0 1 6.957497 0.000000 6944 +articlesclass 0 1 6.957497 0.000000 6945 +newsgroupprogram 0 1 6.957497 0.000000 6946 +pascaltutori 0 1 6.957497 0.000000 6947 +faqyou 0 1 6.957497 0.000000 6948 +zipe 0 1 6.957497 0.000000 6949 +isocomp 0 1 6.957497 0.000000 6950 +maccomp 0 1 6.957497 0.000000 6951 +borlandcomp 0 1 6.957497 0.000000 6952 +misccomp 0 1 6.957497 0.000000 6953 +miscfj 0 1 6.957497 0.000000 6954 +serverto 0 1 6.957497 0.000000 6955 +importantstuff 0 1 6.957497 0.000000 6956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..f067a5be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +updat 1 191 1.609438 1.609438 41 +public 0 202 1.609438 0.000000 43 +assign 2 135 1.945910 3.891820 66 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +machin 0 129 2.079442 0.000000 95 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +specif 0 106 2.197225 0.000000 106 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +center 0 88 2.397895 0.000000 158 +librari 1 87 2.484907 2.484907 181 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +exam 0 86 2.484907 0.000000 169 +contain 0 81 2.484907 0.000000 174 +exampl 1 77 2.564949 2.564949 195 +refer 0 78 2.564949 0.000000 203 +syllabu 0 67 2.708050 0.000000 247 +descript 0 64 2.772589 0.000000 271 +copi 0 63 2.772589 0.000000 284 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +directori 0 45 3.135494 0.000000 396 +show 0 43 3.178054 0.000000 417 +submit 0 39 3.258097 0.000000 440 +workstat 0 37 3.332205 0.000000 479 +manual 0 35 3.401197 0.000000 504 +option 0 30 3.555348 0.000000 575 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +utc 1 27 3.637586 3.637586 629 +wish 0 24 3.761200 0.000000 692 +higher 0 24 3.761200 0.000000 690 +instal 0 22 3.850148 0.000000 754 +score 0 15 4.248495 0.000000 1017 +donald 0 9 4.753590 0.000000 1510 +curv 0 8 4.875197 0.000000 1656 +driver 0 8 4.875197 0.000000 1657 +sciencesdepart 0 6 5.164786 0.000000 2020 +slate 0 6 5.164786 0.000000 2021 +opengl 1 5 5.347108 5.347108 2299 +fussel 0 5 5.347108 0.000000 2300 +ousterhout 0 5 5.347108 0.000000 2301 +hasbeen 0 4 5.568345 0.000000 2661 +makefil 0 4 5.568345 0.000000 2662 +welch 0 4 5.568345 0.000000 2655 +xlib 0 3 5.857933 0.000000 3204 +mesa 1 2 6.263398 6.263398 4194 +cscomput 0 2 6.263398 0.000000 4195 +anopengl 0 2 6.263398 0.000000 4196 +billthecat 0 2 6.263398 0.000000 4197 +graphicsspr 0 1 6.957497 0.000000 6957 +oneor 0 1 6.957497 0.000000 6958 +examwil 0 1 6.957497 0.000000 6959 +bothmai 0 1 6.957497 0.000000 6960 +willcount 0 1 6.957497 0.000000 6961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..3b8f4db7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +assign 1 135 1.945910 1.945910 66 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +machin 1 129 2.079442 2.079442 95 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +specif 0 106 2.197225 0.000000 106 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +center 0 88 2.397895 0.000000 158 +librari 1 87 2.484907 2.484907 181 +contain 0 81 2.484907 0.000000 174 +exampl 1 77 2.564949 2.564949 195 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +syllabu 0 67 2.708050 0.000000 247 +descript 0 64 2.772589 0.000000 271 +copi 0 63 2.772589 0.000000 284 +new 0 64 2.772589 0.000000 262 +instruct 0 53 2.944439 0.000000 332 +sampl 0 53 2.944439 0.000000 339 +directori 0 45 3.135494 0.000000 396 +workstat 0 37 3.332205 0.000000 479 +manual 0 35 3.401197 0.000000 504 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +utc 1 27 3.637586 3.637586 629 +wish 0 24 3.761200 0.000000 692 +instal 0 22 3.850148 0.000000 754 +demo 0 18 4.060443 0.000000 888 +donald 0 9 4.753590 0.000000 1510 +driver 0 8 4.875197 0.000000 1657 +sciencesdepart 0 6 5.164786 0.000000 2020 +slate 0 6 5.164786 0.000000 2021 +opengl 1 5 5.347108 5.347108 2299 +fussel 0 5 5.347108 0.000000 2300 +ousterhout 0 5 5.347108 0.000000 2301 +welch 1 4 5.568345 5.568345 2655 +hasbeen 0 4 5.568345 0.000000 2661 +makefil 0 4 5.568345 0.000000 2662 +turnin 0 4 5.568345 0.000000 2654 +walker 1 3 5.857933 5.857933 3161 +xlib 0 3 5.857933 0.000000 3204 +mesa 1 2 6.263398 6.263398 4194 +anopengl 0 2 6.263398 0.000000 4196 +billthecat 0 2 6.263398 0.000000 4197 +repair 0 2 6.263398 0.000000 4198 +reinstal 1 1 6.957497 6.957497 6962 +gcomput 0 1 6.957497 0.000000 6963 +graphicsfal 0 1 6.957497 0.000000 6964 +libtcl 0 1 6.957497 0.000000 6965 +libtk 0 1 6.957497 0.000000 6966 +tclsh 0 1 6.957497 0.000000 6967 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..216c53da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +mathemat 0 108 2.197225 0.000000 123 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +chang 0 82 2.484907 0.000000 163 +homework 1 79 2.564949 2.564949 193 +appear 1 78 2.564949 2.564949 210 +david 0 71 2.639057 0.000000 232 +syllabu 0 67 2.708050 0.000000 247 +taylor 1 63 2.772589 2.772589 287 +septemb 0 65 2.772589 0.000000 274 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +howev 0 41 3.218876 0.000000 422 +word 0 34 3.401197 0.000000 508 +abl 0 30 3.555348 0.000000 566 +common 0 30 3.555348 0.000000 574 +background 0 25 3.737670 0.000000 664 +notic 0 25 3.737670 0.000000 675 +letter 0 16 4.174387 0.000000 981 +english 0 15 4.248495 0.000000 1033 +station 0 13 4.382027 0.000000 1157 +canada 0 13 4.382027 0.000000 1158 +french 1 9 4.753590 4.753590 1511 +cryptographi 0 9 4.753590 0.000000 1512 +recogn 0 5 5.347108 0.000000 2302 +basement 0 4 5.568345 0.000000 2663 +zuckerman 0 3 5.857933 0.000000 3205 +frequenc 0 3 5.857933 0.000000 3206 +hqliu 1 2 6.263398 6.263398 4199 +huiqun 0 2 6.263398 0.000000 4200 +drastic 0 2 6.263398 0.000000 4201 +ciphertext 0 1 6.957497 0.000000 6968 +digram 0 1 6.957497 0.000000 6969 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..82934856 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,315 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +class 2 199 1.609438 3.218876 37 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +distribut 2 162 1.791759 3.583518 51 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +algorithm 1 162 1.791759 1.791759 57 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +problem 2 147 1.945910 3.891820 75 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +hall 1 146 1.945910 1.945910 65 +lectur 0 135 1.945910 0.000000 73 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +send 1 114 2.197225 2.197225 109 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +take 1 97 2.302585 2.302585 134 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +grade 0 90 2.397895 0.000000 142 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +question 0 91 2.397895 0.000000 141 +solut 2 82 2.484907 4.969814 162 +requir 1 81 2.484907 2.484907 167 +exam 1 86 2.484907 2.484907 169 +second 1 81 2.484907 2.484907 166 +build 0 85 2.484907 0.000000 184 +wide 0 84 2.484907 0.000000 185 +academ 0 82 2.484907 0.000000 178 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +mondai 1 77 2.564949 2.564949 206 +state 1 76 2.564949 2.564949 207 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +come 0 78 2.564949 0.000000 202 +solv 1 73 2.639057 2.639057 234 +tuesdai 0 73 2.639057 0.000000 219 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +logic 0 71 2.639057 0.000000 230 +servic 0 72 2.639057 0.000000 236 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +improv 1 62 2.772589 2.772589 289 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +locat 0 59 2.833213 0.000000 303 +content 0 59 2.833213 0.000000 302 +share 0 59 2.833213 0.000000 304 +point 1 58 2.890372 2.890372 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +suggest 1 53 2.944439 2.944439 331 +cover 1 55 2.944439 2.944439 329 +allow 1 53 2.944439 2.944439 333 +instruct 0 53 2.944439 0.000000 332 +three 0 54 2.944439 0.000000 330 +case 0 51 2.995732 0.000000 351 +week 0 52 2.995732 0.000000 343 +particular 0 51 2.995732 0.000000 352 +give 1 50 3.044522 3.044522 359 +set 1 50 3.044522 3.044522 361 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +electron 0 47 3.091042 0.000000 379 +protocol 2 45 3.135494 6.270988 407 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +midterm 1 45 3.135494 3.135494 392 +better 0 45 3.135494 0.000000 401 +describ 0 45 3.135494 0.000000 400 +show 1 43 3.178054 3.178054 417 +mechan 0 43 3.178054 0.000000 416 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +howev 0 41 3.218876 0.000000 422 +author 0 39 3.258097 0.000000 450 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +expect 1 37 3.332205 3.332205 484 +cost 1 37 3.332205 3.332205 480 +feel 0 37 3.332205 0.000000 483 +ofth 1 36 3.367296 3.367296 491 +global 1 34 3.401197 3.401197 520 +survei 1 35 3.401197 3.401197 513 +singl 0 34 3.401197 0.000000 510 +collabor 1 32 3.465736 3.465736 543 +given 0 32 3.465736 0.000000 538 +idea 0 32 3.465736 0.000000 545 +someth 1 31 3.496508 3.496508 554 +often 0 31 3.496508 0.000000 551 +secur 1 30 3.555348 3.555348 577 +robert 0 30 3.555348 0.000000 567 +option 0 30 3.555348 0.000000 575 +produc 0 30 3.555348 0.000000 572 +depend 1 29 3.583519 3.583519 583 +synchron 0 29 3.583519 0.000000 588 +consid 0 29 3.583519 0.000000 590 +ask 0 28 3.610918 0.000000 597 +team 0 27 3.637586 0.000000 625 +detect 1 26 3.688879 3.688879 646 +consist 1 26 3.688879 3.688879 651 +effort 0 26 3.688879 0.000000 652 +primari 0 25 3.737670 0.000000 669 +demonstr 0 24 3.761200 0.000000 694 +proof 1 23 3.806662 3.806662 720 +size 0 23 3.806662 0.000000 713 +properti 0 22 3.850148 0.000000 749 +newsgroup 1 21 3.912023 3.912023 783 +kernel 0 20 3.951244 0.000000 825 +prepar 0 20 3.951244 0.000000 824 +prove 1 19 4.007333 4.007333 848 +assum 1 19 4.007333 4.007333 845 +log 0 19 4.007333 0.000000 857 +encourag 1 18 4.060443 4.060443 880 +appropri 0 18 4.060443 0.000000 883 +moor 0 17 4.110874 0.000000 936 +whether 0 17 4.110874 0.000000 918 +previous 0 17 4.110874 0.000000 923 +otherwis 0 17 4.110874 0.000000 922 +monitor 0 17 4.110874 0.000000 941 +weslei 0 16 4.174387 0.000000 983 +vector 0 16 4.174387 0.000000 961 +precis 0 15 4.248495 0.000000 1023 +deriv 1 13 4.382027 4.382027 1145 +weak 0 13 4.382027 0.000000 1159 +cannot 0 13 4.382027 0.000000 1144 +central 0 13 4.382027 0.000000 1160 +asynchron 1 12 4.465908 4.465908 1229 +addison 0 12 4.465908 0.000000 1230 +replic 0 12 4.465908 0.000000 1231 +skill 0 12 4.465908 0.000000 1205 +onth 0 12 4.465908 0.000000 1218 +clock 1 11 4.553877 4.553877 1320 +imposs 0 9 4.753590 0.000000 1513 +clear 0 9 4.753590 0.000000 1488 +assumpt 0 9 4.753590 0.000000 1514 +matter 0 8 4.875197 0.000000 1627 +told 0 8 4.875197 0.000000 1658 +ideal 0 8 4.875197 0.000000 1630 +predic 1 7 5.010635 5.010635 1806 +encrypt 0 7 5.010635 0.000000 1835 +henc 0 7 5.010635 0.000000 1805 +guidelin 0 7 5.010635 0.000000 1832 +channel 0 7 5.010635 0.000000 1836 +distributedsystem 1 6 5.164786 5.164786 2022 +arrang 0 6 5.164786 0.000000 2023 +tobe 0 6 5.164786 0.000000 1995 +causal 0 6 5.164786 0.000000 2024 +wrong 0 6 5.164786 0.000000 2025 +constitut 0 6 5.164786 0.000000 2026 +snapshot 1 5 5.347108 5.347108 2303 +colleagu 1 5 5.347108 5.347108 2304 +multicast 0 5 5.347108 0.000000 2305 +authent 0 5 5.347108 0.000000 2306 +volunt 0 5 5.347108 0.000000 2307 +explicitli 0 5 5.347108 0.000000 2308 +stabl 0 5 5.347108 0.000000 2309 +exchang 0 5 5.347108 0.000000 2310 +lorenzo 1 4 5.568345 5.568345 2588 +deadlock 1 4 5.568345 5.568345 2641 +cut 0 4 5.568345 0.000000 2620 +disconnect 0 4 5.568345 0.000000 2664 +subsequ 0 4 5.568345 0.000000 2665 +accompani 0 4 5.568345 0.000000 2666 +unless 0 4 5.568345 0.000000 2607 +rajeev 1 3 5.857933 5.857933 3152 +agreement 1 3 5.857933 5.857933 3207 +pertain 0 3 5.857933 0.000000 3208 +alvisi 0 3 5.857933 0.000000 3095 +commerc 0 3 5.857933 0.000000 3209 +credibl 0 3 5.857933 0.000000 3210 +violat 0 3 5.857933 0.000000 3211 +urg 0 3 5.857933 0.000000 3212 +agener 0 3 5.857933 0.000000 3213 +conceptu 0 3 5.857933 0.000000 3214 +joshi 1 2 6.263398 6.263398 4202 +byzantin 1 2 6.263398 6.263398 4203 +requiredtextbook 0 2 6.263398 0.000000 4204 +checkpoint 0 2 6.263398 0.000000 4205 +replica 0 2 6.263398 0.000000 4206 +towrit 0 2 6.263398 0.000000 4207 +algorithmi 0 2 6.263398 0.000000 4208 +moreeffici 0 2 6.263398 0.000000 4209 +simpler 0 2 6.263398 0.000000 4210 +fifo 1 1 6.957497 6.957497 6970 +ispr 0 1 6.957497 0.000000 6971 +alvisiteach 0 1 6.957497 0.000000 6972 +joshicont 0 1 6.957497 0.000000 6973 +stafflorenzo 0 1 6.957497 0.000000 6974 +mechanicsi 0 1 6.957497 0.000000 6975 +remaind 0 1 6.957497 0.000000 6976 +classat 0 1 6.957497 0.000000 6977 +isutexa 0 1 6.957497 0.000000 6978 +mullend 0 1 6.957497 0.000000 6979 +acmpress 0 1 6.957497 0.000000 6980 +contentc 0 1 6.957497 0.000000 6981 +tomorrow 0 1 6.957497 0.000000 6982 +messagedeliveri 0 1 6.957497 0.000000 6983 +backupapproach 0 1 6.957497 0.000000 6984 +thepresent 0 1 6.957497 0.000000 6985 +exemplifi 0 1 6.957497 0.000000 6986 +principleshav 0 1 6.957497 0.000000 6987 +meor 0 1 6.957497 0.000000 6988 +apresent 0 1 6.957497 0.000000 6989 +networksgradingther 0 1 6.957497 0.000000 6990 +begrad 0 1 6.957497 0.000000 6991 +onbehalf 0 1 6.957497 0.000000 6992 +willrec 0 1 6.957497 0.000000 6993 +ispermit 0 1 6.957497 0.000000 6994 +acollabor 0 1 6.957497 0.000000 6995 +forgrad 0 1 6.957497 0.000000 6996 +collaborationswil 0 1 6.957497 0.000000 6997 +nocollabor 0 1 6.957497 0.000000 6998 +issuesthat 0 1 6.957497 0.000000 6999 +bedistribut 0 1 6.957497 0.000000 7000 +tocomplet 0 1 6.957497 0.000000 7001 +twolectur 0 1 6.957497 0.000000 7002 +choosethi 0 1 6.957497 0.000000 7003 +asingl 0 1 6.957497 0.000000 7004 +warmli 0 1 6.957497 0.000000 7005 +toconsid 0 1 6.957497 0.000000 7006 +excellentopportun 0 1 6.957497 0.000000 7007 +setsin 0 1 6.957497 0.000000 7008 +shouldconform 0 1 6.957497 0.000000 7009 +synonym 0 1 6.957497 0.000000 7010 +isrequir 0 1 6.957497 0.000000 7011 +thatmak 0 1 6.957497 0.000000 7012 +insuffici 0 1 6.957497 0.000000 7013 +ofcorrect 0 1 6.957497 0.000000 7014 +thetextbook 0 1 6.957497 0.000000 7015 +asnapshot 0 1 6.957497 0.000000 7016 +theprotocol 0 1 6.957497 0.000000 7017 +atmost 0 1 6.957497 0.000000 7018 +mattern 0 1 6.957497 0.000000 7019 +thatcontain 0 1 6.957497 0.000000 7020 +resist 0 1 6.957497 0.000000 7021 +thetempt 0 1 6.957497 0.000000 7022 +monitorprocess 0 1 6.957497 0.000000 7023 +basedsnapshot 0 1 6.957497 0.000000 7024 +nowonlin 0 1 6.957497 0.000000 7025 +filedescrib 0 1 6.957497 0.000000 7026 +examth 0 1 6.957497 0.000000 7027 +fridaymai 0 1 6.957497 0.000000 7028 +thepostscript 0 1 6.957497 0.000000 7029 +freeto 0 1 6.957497 0.000000 7030 +yoursuggest 0 1 6.957497 0.000000 7031 +edurajeev 0 1 6.957497 0.000000 7032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..ad6b24c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +austin 0 168 1.791759 0.000000 63 +note 1 142 1.945910 1.945910 67 +welcom 0 122 2.079442 0.000000 99 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +version 0 113 2.197225 0.000000 122 +look 0 107 2.197225 0.000000 115 +find 0 111 2.197225 0.000000 111 +technic 0 100 2.302585 0.000000 140 +homepag 0 93 2.397895 0.000000 148 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +homework 0 79 2.564949 0.000000 193 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +differ 0 66 2.708050 0.000000 253 +practic 0 70 2.708050 0.000000 246 +syllabu 0 67 2.708050 0.000000 247 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +electron 0 47 3.091042 0.000000 379 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +taught 1 33 3.433987 3.433987 526 +chapter 0 32 3.465736 0.000000 536 +turn 0 29 3.583519 0.000000 586 +adam 0 17 4.110874 0.000000 934 +reflect 0 15 4.248495 0.000000 1034 +overhead 0 15 4.248495 0.000000 1035 +correspond 0 10 4.653960 0.000000 1382 +informationabout 0 9 4.753590 0.000000 1515 +scope 0 5 5.347108 0.000000 2296 +jacob 1 4 5.568345 5.568345 2667 +kornerup 1 3 5.857933 5.857933 3215 +bywil 0 1 6.957497 0.000000 7033 +linea 0 1 6.957497 0.000000 7034 +inhomework 0 1 6.957497 0.000000 7035 +crude 0 1 6.957497 0.000000 7036 +newsgrouputexa 0 1 6.957497 0.000000 7037 +takesplac 0 1 6.957497 0.000000 7038 +pascalprogramm 0 1 6.957497 0.000000 7039 +viewinginform 0 1 6.957497 0.000000 7040 +projecthow 0 1 6.957497 0.000000 7041 +examand 0 1 6.957497 0.000000 7042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..46830cea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 2 273 1.098612 2.197224 15 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +hall 0 146 1.945910 0.000000 65 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +introduct 0 126 2.079442 0.000000 87 +document 0 121 2.079442 0.000000 89 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +pleas 0 113 2.197225 0.000000 114 +text 1 98 2.302585 2.302585 133 +advanc 0 99 2.302585 0.000000 130 +follow 1 92 2.397895 2.397895 143 +associ 0 93 2.397895 0.000000 151 +librari 1 87 2.484907 2.484907 181 +solut 1 82 2.484907 2.484907 162 +internet 0 83 2.484907 0.000000 186 +institut 0 84 2.484907 0.000000 187 +info 0 85 2.484907 0.000000 176 +orient 1 80 2.564949 2.564949 205 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +mondai 0 77 2.564949 0.000000 206 +master 0 76 2.564949 0.000000 216 +refer 0 78 2.564949 0.000000 203 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +write 0 72 2.639057 0.000000 222 +materi 0 75 2.639057 0.000000 221 +summari 0 73 2.639057 0.000000 237 +java 3 70 2.708050 8.124150 248 +wednesdai 0 64 2.772589 0.000000 261 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +think 0 57 2.890372 0.000000 314 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +give 0 50 3.044522 0.000000 359 +discuss 0 45 3.135494 0.000000 399 +offer 0 43 3.178054 0.000000 414 +edit 0 42 3.218876 0.000000 418 +programm 0 39 3.258097 0.000000 445 +open 0 38 3.295837 0.000000 469 +manual 1 35 3.401197 3.401197 504 +tech 0 35 3.401197 0.000000 515 +eduoffic 1 33 3.433987 3.433987 531 +john 0 33 3.433987 0.000000 532 +product 0 33 3.433987 0.000000 527 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +intend 0 28 3.610918 0.000000 599 +comp 2 26 3.688879 7.377758 650 +challeng 0 26 3.688879 0.000000 653 +greg 0 24 3.761200 0.000000 695 +pattern 0 24 3.761200 0.000000 689 +fellow 0 24 3.761200 0.000000 701 +lab 0 24 3.761200 0.000000 698 +newsgroup 1 21 3.912023 3.912023 783 +annot 0 21 3.912023 0.000000 775 +applet 1 20 3.951244 3.951244 827 +element 0 18 4.060443 0.000000 895 +encourag 0 18 4.060443 0.000000 880 +event 0 18 4.060443 0.000000 896 +weslei 2 16 4.174387 8.348774 983 +alreadi 0 16 4.174387 0.000000 963 +style 1 15 4.248495 4.248495 1036 +pagec 0 15 4.248495 0.000000 1011 +rate 0 15 4.248495 0.000000 1037 +draft 1 14 4.317488 4.317488 1085 +manner 0 14 4.317488 0.000000 1074 +opportun 1 13 4.382027 4.382027 1161 +station 0 13 4.382027 0.000000 1157 +johnson 0 13 4.382027 0.000000 1162 +misc 0 13 4.382027 0.000000 1124 +addison 2 12 4.465908 8.931816 1230 +captur 0 12 4.465908 0.000000 1232 +evolut 0 11 4.553877 0.000000 1314 +denni 0 11 4.553877 0.000000 1321 +strongli 0 10 4.653960 0.000000 1406 +classmat 0 9 4.753590 0.000000 1516 +doug 0 9 4.753590 0.000000 1517 +virginia 0 8 4.875197 0.000000 1659 +irvin 0 8 4.875197 0.000000 1660 +bookstor 0 7 5.010635 0.000000 1837 +prentic 0 7 5.010635 0.000000 1838 +usenet 0 7 5.010635 0.000000 1839 +dead 0 7 5.010635 0.000000 1840 +forum 0 6 5.164786 0.000000 2027 +mirror 0 6 5.164786 0.000000 2028 +huge 0 6 5.164786 0.000000 1991 +lang 2 5 5.347108 10.694216 2294 +templat 1 5 5.347108 5.347108 2311 +appt 0 5 5.347108 0.000000 2312 +gokul 1 4 5.568345 5.568345 2668 +polymorph 0 4 5.568345 0.000000 2627 +wilei 0 4 5.568345 0.000000 2669 +faq 1 3 5.857933 5.857933 3216 +lavend 1 3 5.857933 5.857933 3217 +cline 0 3 5.857933 0.000000 3218 +gamma 0 3 5.857933 0.000000 3219 +hotjava 0 3 5.857933 0.000000 3220 +javascript 0 3 5.857933 0.000000 3221 +ansi 0 3 5.857933 0.000000 3198 +polytechn 0 3 5.857933 0.000000 3222 +jar 0 3 5.857933 0.000000 3223 +setup 1 2 6.263398 6.263398 4211 +infocours 0 2 6.263398 0.000000 4212 +noteshomework 0 2 6.263398 0.000000 4102 +profici 0 2 6.263398 0.000000 4103 +coop 0 2 6.263398 0.000000 4213 +materiali 0 2 6.263398 0.000000 4214 +drawn 0 2 6.263398 0.000000 4215 +elli 0 2 6.263398 0.000000 4216 +helm 0 2 6.263398 0.000000 4217 +reusabl 0 2 6.263398 0.000000 4218 +renssela 0 2 6.263398 0.000000 4148 +sourcesth 0 2 6.263398 0.000000 4219 +javasoft 0 2 6.263398 0.000000 4220 +gamelan 0 2 6.263398 0.000000 4221 +centr 0 2 6.263398 0.000000 4222 +stroustrup 1 1 6.957497 6.957497 7043 +libg 1 1 6.957497 6.957497 7044 +descriptionc 0 1 6.957497 0.000000 7045 +programminglast 0 1 6.957497 0.000000 7046 +rajaram 0 1 6.957497 0.000000 7047 +lavendercours 0 1 6.957497 0.000000 7048 +syllabusannouncementslectur 0 1 6.957497 0.000000 7049 +solutionsprogram 0 1 6.957497 0.000000 7050 +assignmentsgnu 0 1 6.957497 0.000000 7051 +manualsstandard 0 1 6.957497 0.000000 7052 +codesocket 0 1 6.957497 0.000000 7053 +manualdescript 0 1 6.957497 0.000000 7054 +anintroductori 0 1 6.957497 0.000000 7055 +reusablepattern 0 1 6.957497 0.000000 7056 +typehierarchi 0 1 6.957497 0.000000 7057 +professionallyus 0 1 6.957497 0.000000 7058 +horstmann 0 1 6.957497 0.000000 7059 +cargil 0 1 6.957497 0.000000 7060 +lomow 0 1 6.957497 0.000000 7061 +coplien 0 1 6.957497 0.000000 7062 +idiom 0 1 6.957497 0.000000 7063 +plauger 0 1 6.957497 0.000000 7064 +vlissid 0 1 6.957497 0.000000 7065 +announcementsabout 0 1 6.957497 0.000000 7066 +linediscuss 0 1 6.957497 0.000000 7067 +lavendery 0 1 6.957497 0.000000 7068 +helpjava 0 1 6.957497 0.000000 7069 +advocaci 0 1 6.957497 0.000000 7070 +oopth 0 1 6.957497 0.000000 7071 +objectspac 0 1 6.957497 0.000000 7072 +libstdc 0 1 6.957497 0.000000 7073 +mitgnu 0 1 6.957497 0.000000 7074 +cygnusgnu 0 1 6.957497 0.000000 7075 +ftpobject 0 1 6.957497 0.000000 7076 +developmentindex 0 1 6.957497 0.000000 7077 +librariesth 0 1 6.957497 0.000000 7078 +libraryindex 0 1 6.957497 0.000000 7079 +talig 0 1 6.957497 0.000000 7080 +frameworkjava 0 1 6.957497 0.000000 7081 +registri 0 1 6.957497 0.000000 7082 +espresso 0 1 6.957497 0.000000 7083 +kafura 0 1 6.957497 0.000000 7084 +techdoug 0 1 6.957497 0.000000 7085 +schmidt 0 1 6.957497 0.000000 7086 +universitydoug 0 1 6.957497 0.000000 7087 +sunyintroductori 0 1 6.957497 0.000000 7088 +groningen 0 1 6.957497 0.000000 7089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..fc760475 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +utexa 1 189 1.609438 1.609438 44 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +world 0 115 2.197225 0.000000 126 +memori 0 101 2.302585 0.000000 139 +commun 1 95 2.397895 2.397895 157 +exampl 1 77 2.564949 2.564949 195 +interfac 0 79 2.564949 0.000000 209 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +decemb 0 80 2.564949 0.000000 215 +tuesdai 1 73 2.639057 2.639057 219 +onlin 0 75 2.639057 0.000000 223 +thursdai 1 70 2.708050 2.708050 241 +practic 0 70 2.708050 0.000000 246 +taylor 0 63 2.772589 0.000000 287 +handout 0 64 2.772589 0.000000 263 +foundat 0 62 2.772589 0.000000 286 +share 0 59 2.833213 0.000000 304 +case 0 51 2.995732 0.000000 351 +adapt 0 46 3.091042 0.000000 387 +tutori 0 39 3.258097 0.000000 437 +copyright 0 36 3.367296 0.000000 495 +manual 1 35 3.401197 3.401197 504 +pass 0 28 3.610918 0.000000 611 +spent 0 25 3.737670 0.000000 676 +thread 0 23 3.806662 0.000000 722 +partit 0 16 4.174387 0.000000 984 +hello 0 10 4.653960 0.000000 1407 +calvin 1 9 4.753590 4.753590 1518 +compilersfal 0 2 6.263398 0.000000 4223 +tera 0 2 6.263398 0.000000 4224 +skeleton 0 2 6.263398 0.000000 4225 +ironman 0 2 6.263398 0.000000 4226 +logp 0 2 6.263398 0.000000 4227 +grid 0 2 6.263398 0.000000 4228 +compilerscst 0 1 6.957497 0.000000 7090 +posix 0 1 6.957497 0.000000 7091 +hierarchieslast 0 1 6.957497 0.000000 7092 +linlin 0 1 6.957497 0.000000 7093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..6382eb39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +mail 1 238 1.386294 1.386294 22 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +read 0 154 1.791759 0.000000 47 +assign 0 135 1.945910 0.000000 66 +send 1 114 2.197225 2.197225 109 +handout 0 64 2.772589 0.000000 263 +prof 0 64 2.772589 0.000000 273 +newsgroup 0 21 3.912023 0.000000 783 +csintroduct 0 1 6.957497 0.000000 7094 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..9f75234a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +topic 0 114 2.197225 0.000000 110 +thot 0 1 6.957497 0.000000 7095 +systemsfil 0 1 6.957497 0.000000 7096 +systemstopolog 0 1 6.957497 0.000000 7097 +systemselectron 0 1 6.957497 0.000000 7098 +commenrcefailur 0 1 6.957497 0.000000 7099 +detectorsdistribut 0 1 6.957497 0.000000 7100 +objectsconsistencysecuregroup 0 1 6.957497 0.000000 7101 +communicationlanguag 0 1 6.957497 0.000000 7102 +dsmmobil 0 1 6.957497 0.000000 7103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..4cc65402 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +list 0 201 1.609438 0.000000 39 +databas 0 122 2.079442 0.000000 86 +materi 0 75 2.639057 0.000000 221 +term 0 43 3.178054 0.000000 411 +mine 1 26 3.688879 3.688879 654 +monitor 1 17 4.110874 4.110874 941 +daniel 0 12 4.465908 0.000000 1233 +databasesprof 0 1 6.957497 0.000000 7104 +mirankernew 0 1 6.957497 0.000000 7105 +seminarschedul 0 1 6.957497 0.000000 7106 +overviewtentativeread 0 1 6.957497 0.000000 7107 +homeworkproject 0 1 6.957497 0.000000 7108 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..54d9e4ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +introduct 0 126 2.079442 0.000000 87 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +intellig 0 72 2.639057 0.000000 225 +syllabu 0 67 2.708050 0.000000 247 +artifici 0 63 2.772589 0.000000 280 +taylor 0 63 2.772589 0.000000 287 +trace 0 25 3.737670 0.000000 677 +tuth 0 9 4.753590 0.000000 1519 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +mooneytim 0 2 6.263398 0.000000 4229 +sheetand 0 2 6.263398 0.000000 4230 +placespr 0 1 6.957497 0.000000 7109 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..83e9181e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +info 0 85 2.484907 0.000000 176 +homework 1 79 2.564949 2.564949 193 +intellig 0 72 2.639057 0.000000 225 +test 1 66 2.708050 2.708050 252 +syllabu 0 67 2.708050 0.000000 247 +artifici 0 63 2.772589 0.000000 280 +case 0 51 2.995732 0.000000 351 +common 0 30 3.555348 0.000000 574 +symbol 0 27 3.637586 0.000000 620 +trace 0 25 3.737670 0.000000 677 +lisp 1 18 4.060443 4.060443 897 +sheet 0 16 4.174387 0.000000 973 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +allegro 0 5 5.347108 0.000000 2314 +sowmya 0 4 5.568345 0.000000 2670 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +placetu 0 2 6.263398 0.000000 4231 +informationon 0 2 6.263398 0.000000 4232 +mooneyteach 0 1 6.957497 0.000000 7110 +ramachandrantim 0 1 6.957497 0.000000 7111 +alsout 0 1 6.957497 0.000000 7112 +textparadigm 0 1 6.957497 0.000000 7113 +lispassignmentsse 0 1 6.957497 0.000000 7114 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..a3cc7893 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +base 1 165 1.791759 1.791759 50 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +learn 2 86 2.484907 4.969814 170 +homework 1 79 2.564949 2.564949 193 +logic 0 71 2.639057 0.000000 230 +syllabu 0 67 2.708050 0.000000 247 +order 0 69 2.708050 0.000000 249 +evalu 0 64 2.772589 0.000000 266 +suggest 0 53 2.944439 0.000000 331 +talk 0 53 2.944439 0.000000 336 +format 0 48 3.044522 0.000000 356 +slide 0 38 3.295837 0.000000 467 +tree 0 36 3.367296 0.000000 492 +concept 0 32 3.465736 0.000000 537 +neural 0 30 3.555348 0.000000 578 +cluster 0 28 3.610918 0.000000 612 +experiment 0 26 3.688879 0.000000 645 +rule 0 26 3.688879 0.000000 638 +trace 0 25 3.737670 0.000000 677 +decis 0 23 3.806662 0.000000 728 +outlin 0 17 4.110874 0.000000 914 +explan 0 16 4.174387 0.000000 985 +induct 0 11 4.553877 0.000000 1304 +instanc 0 11 4.553877 0.000000 1322 +moonei 0 9 4.753590 0.000000 1520 +raymond 0 5 5.347108 0.000000 2313 +bayesian 0 4 5.568345 0.000000 2671 +informationclick 0 3 5.857933 0.000000 3224 +networkfor 0 3 5.857933 0.000000 3225 +mooneytim 0 2 6.263398 0.000000 4229 +placetu 0 2 6.263398 0.000000 4231 +sheetand 0 2 6.263398 0.000000 4230 +unsupervis 0 2 6.263398 0.000000 4233 +textmachinelearninglectur 0 1 6.957497 0.000000 7115 +learningassignmentsse 0 1 6.957497 0.000000 7116 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..119ae3f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +softwar 0 220 1.386294 0.000000 30 +list 0 201 1.609438 0.000000 39 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +assign 1 135 1.945910 1.945910 66 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +studi 1 120 2.079442 2.079442 91 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +section 1 94 2.397895 2.397895 149 +call 0 91 2.397895 0.000000 153 +exam 1 86 2.484907 2.484907 169 +school 0 84 2.484907 0.000000 188 +chang 0 82 2.484907 0.000000 163 +requir 0 81 2.484907 0.000000 167 +syllabu 0 67 2.708050 0.000000 247 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +semest 0 58 2.890372 0.000000 312 +run 0 51 2.995732 0.000000 347 +basic 0 50 3.044522 0.000000 360 +move 0 47 3.091042 0.000000 382 +math 0 44 3.135494 0.000000 402 +directori 0 45 3.135494 0.000000 396 +next 0 34 3.401197 0.000000 517 +least 0 35 3.401197 0.000000 516 +concept 0 32 3.465736 0.000000 537 +express 0 32 3.465736 0.000000 540 +taken 0 31 3.496508 0.000000 555 +hard 0 30 3.555348 0.000000 563 +intend 0 28 3.610918 0.000000 599 +symbol 0 27 3.637586 0.000000 620 +never 0 25 3.737670 0.000000 671 +background 0 25 3.737670 0.000000 664 +instead 0 22 3.850148 0.000000 756 +scheme 1 20 3.951244 3.951244 818 +thur 0 19 4.007333 0.000000 847 +lisp 0 18 4.060443 0.000000 897 +hopefulli 0 14 4.317488 0.000000 1071 +draw 0 14 4.317488 0.000000 1086 +faster 0 11 4.553877 0.000000 1323 +surf 0 11 4.553877 0.000000 1301 +submiss 0 11 4.553877 0.000000 1298 +novak 0 9 4.753590 0.000000 1521 +strong 0 6 5.164786 0.000000 2029 +syntax 0 6 5.164786 0.000000 2030 +snow 0 6 5.164786 0.000000 2031 +gordon 0 6 5.164786 0.000000 2032 +assignmentsprogram 0 6 5.164786 0.000000 2019 +willb 0 5 5.347108 0.000000 2277 +porter 0 5 5.347108 0.000000 2293 +emphas 0 4 5.568345 0.000000 2672 +dialect 0 3 5.857933 0.000000 3226 +gambit 0 3 5.857933 0.000000 3227 +macintoshcomput 0 3 5.857933 0.000000 3228 +treasur 0 3 5.857933 0.000000 3229 +gradingmidterm 0 3 5.857933 0.000000 3230 +guidefin 0 3 5.857933 0.000000 3231 +peano 0 2 6.263398 0.000000 4234 +turtl 0 2 6.263398 0.000000 4235 +plot 0 2 6.263398 0.000000 4236 +sciencec 1 1 6.957497 6.957497 7117 +treesassign 1 1 6.957497 6.957497 7118 +atleast 0 1 6.957497 0.000000 7119 +programmingcours 0 1 6.957497 0.000000 7120 +precalculu 0 1 6.957497 0.000000 7121 +theschem 0 1 6.957497 0.000000 7122 +coursesand 0 1 6.957497 0.000000 7123 +learninga 0 1 6.957497 0.000000 7124 +tutorcopi 0 1 6.957497 0.000000 7125 +pcassign 0 1 6.957497 0.000000 7126 +simulationassign 0 1 6.957497 0.000000 7127 +webassign 0 1 6.957497 0.000000 7128 +schemeassign 0 1 6.957497 0.000000 7129 +gamblingassign 0 1 6.957497 0.000000 7130 +graphicsassign 0 1 6.957497 0.000000 7131 +manipulationstudi 0 1 6.957497 0.000000 7132 +vocabulari 0 1 6.957497 0.000000 7133 +thickensassign 0 1 6.957497 0.000000 7134 +huntassign 0 1 6.957497 0.000000 7135 +algebraassign 0 1 6.957497 0.000000 7136 +matricesstudi 0 1 6.957497 0.000000 7137 +unparsingassign 0 1 6.957497 0.000000 7138 +translationstudi 0 1 6.957497 0.000000 7139 +descriptionsprogram 0 1 6.957497 0.000000 7140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..70d973d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +file 1 132 1.945910 1.945910 70 +construct 0 139 1.945910 0.000000 82 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +take 0 97 2.302585 0.000000 134 +exam 0 86 2.484907 0.000000 169 +server 0 76 2.564949 0.000000 204 +write 0 72 2.639057 0.000000 222 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +summer 1 56 2.890372 2.890372 311 +cover 0 55 2.944439 0.000000 329 +processor 0 54 2.944439 0.000000 335 +week 0 52 2.995732 0.000000 343 +directori 0 45 3.135494 0.000000 396 +live 0 40 3.258097 0.000000 451 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +programminglanguag 0 21 3.912023 0.000000 782 +chip 0 21 3.912023 0.000000 770 +five 0 19 4.007333 0.000000 841 +incorpor 0 13 4.382027 0.000000 1163 +pascal 0 12 4.465908 0.000000 1213 +workload 0 12 4.465908 0.000000 1210 +submiss 0 11 4.553877 0.000000 1298 +novak 0 9 4.753590 0.000000 1521 +heavi 0 7 5.010635 0.000000 1841 +assignmentsprogram 0 6 5.164786 0.000000 2019 +gradingmidterm 0 3 5.857933 0.000000 3230 +guidefin 0 3 5.857933 0.000000 3231 +compilersc 1 2 6.263398 6.263398 4237 +powerpc 1 2 6.263398 6.263398 4238 +syllabusprogram 0 2 6.263398 0.000000 4239 +codei 0 1 6.957497 0.000000 7141 +dedicatetheir 0 1 6.957497 0.000000 7142 +guidegordon 0 1 6.957497 0.000000 7143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..70ba126c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +studi 1 120 2.079442 2.079442 91 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +requir 0 81 2.484907 0.000000 167 +environ 0 84 2.484907 0.000000 177 +exam 0 86 2.484907 0.000000 169 +intellig 1 72 2.639057 2.639057 225 +logic 0 71 2.639057 0.000000 230 +artifici 1 63 2.772589 2.772589 280 +major 0 56 2.890372 0.000000 315 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +survei 0 35 3.401197 0.000000 513 +represent 0 35 3.401197 0.000000 512 +bibliographi 0 34 3.401197 0.000000 518 +human 0 32 3.465736 0.000000 546 +defin 0 22 3.850148 0.000000 746 +behavior 0 18 4.060443 0.000000 881 +appropri 0 18 4.060443 0.000000 883 +attempt 0 17 4.110874 0.000000 917 +action 0 15 4.248495 0.000000 1038 +stori 1 14 4.317488 4.317488 1087 +achiev 0 14 4.317488 0.000000 1088 +calculu 0 12 4.465908 0.000000 1203 +thecomput 0 10 4.653960 0.000000 1408 +novak 0 9 4.753590 0.000000 1521 +brain 0 8 4.875197 0.000000 1638 +thegoal 0 6 5.164786 0.000000 2033 +assignmentsprogram 0 6 5.164786 0.000000 2019 +intelligencec 0 4 5.568345 0.000000 2673 +coverag 0 4 5.568345 0.000000 2656 +guidefin 0 3 5.857933 0.000000 3231 +actor 0 2 6.263398 0.000000 4240 +syllabusprogram 0 2 6.263398 0.000000 4239 +problemssolut 0 2 6.263398 0.000000 4241 +intelligenceartifici 0 1 6.957497 0.000000 7144 +todupl 0 1 6.957497 0.000000 7145 +connectspercept 0 1 6.957497 0.000000 7146 +andknowledg 0 1 6.957497 0.000000 7147 +withbrief 0 1 6.957497 0.000000 7148 +descriptionsmidterm 0 1 6.957497 0.000000 7149 +guidepred 0 1 6.957497 0.000000 7150 +problemsnot 0 1 6.957497 0.000000 7151 +braingordon 0 1 6.957497 0.000000 7152 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..33d41eae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +read 0 154 1.791759 0.000000 47 +lectur 1 135 1.945910 1.945910 73 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +compil 0 122 2.079442 0.000000 96 +specif 0 106 2.197225 0.000000 106 +present 0 91 2.397895 0.000000 145 +graphic 0 90 2.397895 0.000000 147 +level 0 87 2.484907 0.000000 180 +homework 0 79 2.564949 0.000000 193 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +materi 0 75 2.639057 0.000000 221 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +sever 0 56 2.890372 0.000000 322 +cover 0 55 2.944439 0.000000 329 +done 0 47 3.091042 0.000000 381 +execut 0 45 3.135494 0.000000 404 +third 0 43 3.178054 0.000000 412 +long 0 43 3.178054 0.000000 413 +expect 0 37 3.332205 0.000000 484 +given 0 32 3.465736 0.000000 538 +kind 0 32 3.465736 0.000000 541 +consist 0 26 3.688879 0.000000 651 +higher 0 24 3.761200 0.000000 690 +literatur 0 11 4.553877 0.000000 1300 +latter 0 9 4.753590 0.000000 1522 +novak 0 9 4.753590 0.000000 1521 +programmingc 0 3 5.857933 0.000000 3232 +ordinari 0 3 5.857933 0.000000 3233 +programmingautomat 0 1 6.957497 0.000000 7153 +programsfrom 0 1 6.957497 0.000000 7154 +illustrateth 0 1 6.957497 0.000000 7155 +requirelearn 0 1 6.957497 0.000000 7156 +partof 0 1 6.957497 0.000000 7157 +syllabusbibliographyassign 0 1 6.957497 0.000000 7158 +handpattern 0 1 6.957497 0.000000 7159 +matchingobject 0 1 6.957497 0.000000 7160 +programmingintroduct 0 1 6.957497 0.000000 7161 +glispview 0 1 6.957497 0.000000 7162 +programminggordon 0 1 6.957497 0.000000 7163 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..557f6ca1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,676 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +us 2 329 1.098612 2.197224 16 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +last 0 314 1.098612 0.000000 14 +languag 2 227 1.386294 2.772588 26 +softwar 2 220 1.386294 2.772588 30 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 2 170 1.791759 3.583518 49 +parallel 2 169 1.791759 3.583518 60 +algorithm 2 162 1.791759 3.583518 57 +network 2 168 1.791759 3.583518 61 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +recent 0 167 1.791759 0.000000 58 +problem 2 147 1.945910 3.891820 75 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +lectur 1 135 1.945910 1.945910 73 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +compil 2 122 2.079442 4.158884 96 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +well 1 109 2.197225 2.197225 121 +mathemat 1 108 2.197225 2.197225 123 +make 1 111 2.197225 2.197225 120 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +code 1 108 2.197225 2.197225 116 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +present 1 91 2.397895 2.397895 145 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +level 2 87 2.484907 4.969814 180 +librari 1 87 2.484907 2.484907 181 +learn 1 86 2.484907 2.484907 170 +wide 1 84 2.484907 2.484907 185 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +solut 1 82 2.484907 2.484907 162 +chang 0 82 2.484907 0.000000 163 +resourc 0 81 2.484907 0.000000 172 +contain 0 81 2.484907 0.000000 174 +control 0 82 2.484907 0.000000 164 +larg 0 82 2.484907 0.000000 168 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +interfac 1 79 2.564949 2.564949 209 +state 1 76 2.564949 2.564949 207 +sourc 1 77 2.564949 2.564949 201 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +mondai 0 77 2.564949 0.000000 206 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +orient 0 80 2.564949 0.000000 205 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +come 0 78 2.564949 0.000000 202 +logic 2 71 2.639057 5.278114 230 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +appli 1 71 2.639057 2.639057 226 +addit 0 74 2.639057 0.000000 228 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +differ 1 66 2.708050 2.708050 253 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +view 1 70 2.708050 2.708050 254 +main 1 67 2.708050 2.708050 256 +thursdai 0 70 2.708050 0.000000 241 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +abstract 1 62 2.772589 2.772589 276 +result 1 65 2.772589 2.772589 281 +evalu 1 64 2.772589 2.772589 266 +experi 1 64 2.772589 2.772589 283 +taylor 0 63 2.772589 0.000000 287 +organ 0 65 2.772589 0.000000 265 +foundat 0 62 2.772589 0.000000 286 +plan 0 65 2.772589 0.000000 272 +virtual 0 62 2.772589 0.000000 285 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +artifici 0 63 2.772589 0.000000 280 +previou 0 62 2.772589 0.000000 290 +written 0 63 2.772589 0.000000 278 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +best 0 59 2.833213 0.000000 299 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +sever 1 56 2.890372 2.890372 322 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +reason 0 57 2.890372 0.000000 318 +direct 0 57 2.890372 0.000000 316 +major 0 56 2.890372 0.000000 315 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +undergradu 1 54 2.944439 2.944439 338 +extens 1 53 2.944439 2.944439 340 +local 0 55 2.944439 0.000000 334 +instruct 0 53 2.944439 0.000000 332 +three 0 54 2.944439 0.000000 330 +scientif 0 53 2.944439 0.000000 341 +run 0 51 2.995732 0.000000 347 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +standard 0 48 3.044522 0.000000 365 +adapt 1 46 3.091042 3.091042 387 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +move 0 47 3.091042 0.000000 382 +describ 1 45 3.135494 3.135494 400 +natur 1 44 3.135494 3.135494 406 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +discuss 1 45 3.135494 3.135494 399 +better 0 45 3.135494 0.000000 401 +offer 1 43 3.178054 3.178054 414 +fast 1 42 3.218876 3.218876 429 +autom 0 41 3.218876 0.000000 434 +past 0 42 3.218876 0.000000 428 +linear 0 41 3.218876 0.000000 431 +examin 0 42 3.218876 0.000000 424 +continu 1 39 3.258097 3.258097 448 +form 1 39 3.258097 3.258097 443 +must 0 40 3.258097 0.000000 442 +programm 0 39 3.258097 0.000000 445 +map 0 39 3.258097 0.000000 452 +theoret 0 39 3.258097 0.000000 446 +open 1 38 3.295837 3.295837 469 +close 1 38 3.295837 3.295837 465 +seminar 0 38 3.295837 0.000000 470 +credit 0 38 3.295837 0.000000 460 +paul 0 38 3.295837 0.000000 471 +formal 1 37 3.332205 3.332205 478 +hand 1 37 3.332205 3.332205 475 +workstat 1 37 3.332205 3.332205 479 +feel 0 37 3.332205 0.000000 483 +cost 0 37 3.332205 0.000000 480 +mean 0 37 3.332205 0.000000 477 +robot 1 36 3.367296 3.367296 497 +procedur 1 36 3.367296 3.367296 488 +game 0 36 3.367296 0.000000 498 +represent 1 35 3.401197 3.401197 512 +singl 1 34 3.401197 3.401197 510 +least 0 35 3.401197 0.000000 516 +random 0 34 3.401197 0.000000 511 +toler 2 33 3.433987 6.867974 533 +queri 1 33 3.433987 3.433987 524 +within 1 33 3.433987 3.433987 525 +obtain 0 33 3.433987 0.000000 534 +fault 2 32 3.465736 6.931472 547 +idea 1 32 3.465736 3.465736 545 +human 0 32 3.465736 0.000000 546 +given 0 32 3.465736 0.000000 538 +ad 0 32 3.465736 0.000000 544 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +neural 1 30 3.555348 3.555348 578 +robert 1 30 3.555348 3.555348 567 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +abl 1 30 3.555348 3.555348 566 +graph 0 30 3.555348 0.000000 576 +domain 0 30 3.555348 0.000000 564 +rang 0 30 3.555348 0.000000 565 +common 0 30 3.555348 0.000000 574 +compon 0 30 3.555348 0.000000 570 +focu 0 30 3.555348 0.000000 571 +semant 1 29 3.583519 3.583519 587 +depend 1 29 3.583519 3.583519 583 +built 1 29 3.583519 3.583519 592 +limit 0 29 3.583519 0.000000 585 +turn 0 29 3.583519 0.000000 586 +platform 0 29 3.583519 0.000000 591 +consid 0 29 3.583519 0.000000 590 +cluster 1 28 3.610918 3.610918 612 +scale 1 28 3.610918 3.610918 613 +propos 0 28 3.610918 0.000000 602 +usual 0 28 3.610918 0.000000 608 +framework 0 28 3.610918 0.000000 606 +enabl 1 26 3.688879 3.688879 655 +consist 1 26 3.688879 3.688879 651 +detect 0 26 3.688879 0.000000 646 +request 0 26 3.688879 0.000000 635 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +task 1 25 3.737670 3.737670 678 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +client 0 25 3.737670 0.000000 679 +known 1 24 3.761200 3.761200 702 +higher 1 24 3.761200 3.761200 690 +mike 0 24 3.761200 0.000000 703 +greg 0 24 3.761200 0.000000 695 +lab 0 24 3.761200 0.000000 698 +decis 1 23 3.806662 3.806662 728 +highli 1 23 3.806662 3.806662 725 +honor 0 23 3.806662 0.000000 729 +cooper 1 22 3.850148 3.850148 757 +hierarchi 1 22 3.850148 3.850148 744 +sequenti 1 22 3.850148 3.850148 745 +serv 1 22 3.850148 3.850148 758 +emphasi 0 22 3.850148 0.000000 755 +instead 0 22 3.850148 0.000000 756 +inth 0 22 3.850148 0.000000 741 +deal 0 22 3.850148 0.000000 736 +reduc 0 22 3.850148 0.000000 759 +among 1 21 3.912023 3.912023 781 +path 1 21 3.912023 3.912023 778 +busi 0 21 3.912023 0.000000 784 +fact 0 21 3.912023 0.000000 780 +output 0 21 3.912023 0.000000 788 +facil 1 20 3.951244 3.951244 814 +portabl 1 20 3.951244 3.951244 819 +basi 0 20 3.951244 0.000000 828 +increas 0 20 3.951244 0.000000 829 +runtim 1 19 4.007333 4.007333 858 +separ 0 19 4.007333 0.000000 844 +failur 1 18 4.060443 4.060443 898 +attend 0 18 4.060443 0.000000 893 +encourag 0 18 4.060443 0.000000 880 +seem 0 18 4.060443 0.000000 899 +appropri 0 18 4.060443 0.000000 883 +stop 0 17 4.110874 0.000000 942 +ultim 0 17 4.110874 0.000000 943 +previous 0 17 4.110874 0.000000 923 +modif 0 17 4.110874 0.000000 913 +cognit 1 16 4.174387 4.174387 986 +advantag 1 16 4.174387 4.174387 987 +spatial 1 16 4.174387 4.174387 988 +easi 1 16 4.174387 4.174387 969 +spars 0 16 4.174387 0.000000 989 +vector 0 16 4.174387 0.000000 961 +across 0 16 4.174387 0.000000 974 +action 1 15 4.248495 4.248495 1038 +novel 0 15 4.248495 0.000000 1039 +english 0 15 4.248495 0.000000 1033 +side 0 15 4.248495 0.000000 1022 +massiv 0 15 4.248495 0.000000 1026 +reflect 0 15 4.248495 0.000000 1034 +topolog 0 14 4.317488 0.000000 1089 +heterogen 0 14 4.317488 0.000000 1090 +convent 0 14 4.317488 0.000000 1072 +translat 1 13 4.382027 4.382027 1164 +discret 1 13 4.382027 4.382027 1165 +prolog 1 13 4.382027 4.382027 1155 +whose 0 13 4.382027 0.000000 1166 +central 0 13 4.382027 0.000000 1160 +signific 0 13 4.382027 0.000000 1125 +difficulti 0 13 4.382027 0.000000 1132 +infrastructur 0 12 4.465908 0.000000 1234 +grow 0 12 4.465908 0.000000 1209 +onth 0 12 4.465908 0.000000 1218 +amount 0 12 4.465908 0.000000 1208 +evolv 0 12 4.465908 0.000000 1223 +promot 0 12 4.465908 0.000000 1235 +buffer 0 12 4.465908 0.000000 1211 +deduct 0 12 4.465908 0.000000 1236 +minor 0 12 4.465908 0.000000 1237 +evolut 1 11 4.553877 4.553877 1314 +broad 1 11 4.553877 4.553877 1302 +vladimir 0 11 4.553877 0.000000 1324 +transpar 0 11 4.553877 0.000000 1325 +multithread 0 11 4.553877 0.000000 1315 +node 0 11 4.553877 0.000000 1326 +induct 0 11 4.553877 0.000000 1304 +fix 0 11 4.553877 0.000000 1327 +noth 0 11 4.553877 0.000000 1328 +genet 1 10 4.653960 4.653960 1409 +underli 1 10 4.653960 4.653960 1410 +guarante 0 10 4.653960 0.000000 1391 +reli 0 10 4.653960 0.000000 1411 +facilit 0 10 4.653960 0.000000 1412 +length 0 10 4.653960 0.000000 1400 +sentenc 0 10 4.653960 0.000000 1413 +custom 0 10 4.653960 0.000000 1414 +certain 0 10 4.653960 0.000000 1393 +tradit 0 10 4.653960 0.000000 1404 +modular 0 10 4.653960 0.000000 1392 +notat 1 9 4.753590 4.753590 1489 +risto 0 9 4.753590 0.000000 1523 +calvin 0 9 4.753590 0.000000 1518 +sister 0 9 4.753590 0.000000 1524 +explicit 0 9 4.753590 0.000000 1525 +declar 0 9 4.753590 0.000000 1526 +compos 0 9 4.753590 0.000000 1527 +meta 0 9 4.753590 0.000000 1505 +novak 0 9 4.753590 0.000000 1521 +assumpt 0 9 4.753590 0.000000 1514 +significantli 0 9 4.753590 0.000000 1508 +reus 1 8 4.875197 4.875197 1661 +simpli 1 8 4.875197 4.875197 1626 +fail 0 8 4.875197 0.000000 1655 +paradigm 0 8 4.875197 0.000000 1662 +isol 0 8 4.875197 0.000000 1663 +claim 0 8 4.875197 0.000000 1664 +realist 0 8 4.875197 0.000000 1665 +analys 0 8 4.875197 0.000000 1666 +beyond 1 7 5.010635 5.010635 1834 +trend 1 7 5.010635 5.010635 1842 +dedic 0 7 5.010635 0.000000 1843 +metric 0 7 5.010635 0.000000 1831 +usabl 0 7 5.010635 0.000000 1810 +converg 0 7 5.010635 0.000000 1844 +uniform 0 7 5.010635 0.000000 1845 +determinist 1 6 5.164786 5.164786 2034 +difficult 1 6 5.164786 5.164786 2035 +gordon 0 6 5.164786 0.000000 2032 +academia 0 6 5.164786 0.000000 2036 +promis 0 6 5.164786 0.000000 2037 +pace 0 6 5.164786 0.000000 2011 +emerg 0 6 5.164786 0.000000 2038 +causal 0 6 5.164786 0.000000 2024 +prefetch 0 6 5.164786 0.000000 2039 +infer 0 6 5.164786 0.000000 2040 +conflict 0 6 5.164786 0.000000 2041 +constrain 0 6 5.164786 0.000000 2042 +variant 0 6 5.164786 0.000000 2043 +affect 0 6 5.164786 0.000000 2044 +corpu 1 5 5.347108 5.347108 2282 +revolut 0 5 5.347108 0.000000 2315 +ofdistribut 0 5 5.347108 0.000000 2316 +scope 0 5 5.347108 0.000000 2296 +despit 0 5 5.347108 0.000000 2317 +unknown 0 5 5.347108 0.000000 2318 +stabl 0 5 5.347108 0.000000 2309 +distinct 0 5 5.347108 0.000000 2319 +corba 0 5 5.347108 0.000000 2320 +pars 0 5 5.347108 0.000000 2321 +fairli 0 5 5.347108 0.000000 2322 +ofinterest 0 5 5.347108 0.000000 2323 +blumoferdb 0 5 5.347108 0.000000 2324 +reinforc 1 4 5.568345 5.568345 2674 +resolv 1 4 5.568345 5.568345 2675 +asymptot 1 4 5.568345 5.568345 2676 +vijaya 0 4 5.568345 0.000000 2677 +lorenzo 0 4 5.568345 0.000000 2588 +expens 0 4 5.568345 0.000000 2678 +ofworkst 0 4 5.568345 0.000000 2679 +illus 0 4 5.568345 0.000000 2603 +algorithmsand 0 4 5.568345 0.000000 2680 +havedevelop 0 4 5.568345 0.000000 2681 +clearli 0 4 5.568345 0.000000 2590 +surprisingli 0 4 5.568345 0.000000 2609 +floyd 0 4 5.568345 0.000000 2682 +queu 0 4 5.568345 0.000000 2648 +conceptu 1 3 5.857933 5.857933 3214 +lightweight 1 3 5.857933 5.857933 3234 +popul 1 3 5.857933 5.857933 3235 +embodi 1 3 5.857933 5.857933 3236 +blumof 0 3 5.857933 0.000000 3237 +citizen 0 3 5.857933 0.000000 3238 +dramat 0 3 5.857933 0.000000 3239 +aggress 0 3 5.857933 0.000000 3240 +similarli 0 3 5.857933 0.000000 3241 +neighborhood 0 3 5.857933 0.000000 3242 +intra 0 3 5.857933 0.000000 3243 +enumer 0 3 5.857933 0.000000 3244 +compliant 0 3 5.857933 0.000000 3245 +andsemant 0 3 5.857933 0.000000 3246 +parser 0 3 5.857933 0.000000 3141 +surpass 0 3 5.857933 0.000000 3247 +agener 0 3 5.857933 0.000000 3213 +theworld 0 3 5.857933 0.000000 3158 +rivest 0 3 5.857933 0.000000 3248 +parallelalgorithm 0 3 5.857933 0.000000 3249 +rscheme 0 3 5.857933 0.000000 3250 +cilk 2 2 6.263398 12.526796 4242 +alamo 1 2 6.263398 6.263398 4243 +chill 1 2 6.263398 6.263398 4244 +theform 1 2 6.263398 6.263398 4245 +sciencefal 0 2 6.263398 0.000000 4246 +andresearch 0 2 6.263398 0.000000 4247 +government 0 2 6.263398 0.000000 4248 +andcollect 0 2 6.263398 0.000000 4249 +todramat 0 2 6.263398 0.000000 4250 +thedesign 0 2 6.263398 0.000000 4251 +har 0 2 6.263398 0.000000 4252 +undergo 0 2 6.263398 0.000000 4253 +applicationsto 0 2 6.263398 0.000000 4254 +offailur 0 2 6.263398 0.000000 4255 +idl 0 2 6.263398 0.000000 4256 +indistribut 0 2 6.263398 0.000000 4257 +andmap 0 2 6.263398 0.000000 4258 +equilibrium 0 2 6.263398 0.000000 4259 +trajectori 0 2 6.263398 0.000000 4260 +sufficientto 0 2 6.263398 0.000000 4261 +logicprogram 0 2 6.263398 0.000000 4262 +thesetechniqu 0 2 6.263398 0.000000 4263 +ofneur 0 2 6.263398 0.000000 4264 +neuro 0 2 6.263398 0.000000 4265 +resourcemanag 0 2 6.263398 0.000000 4266 +anobject 0 2 6.263398 0.000000 4267 +anticip 0 2 6.263398 0.000000 4268 +corpora 0 2 6.263398 0.000000 4269 +foidl 0 2 6.263398 0.000000 4270 +andanalysi 0 2 6.263398 0.000000 4271 +straightforward 0 2 6.263398 0.000000 4272 +lengthi 0 2 6.263398 0.000000 4273 +andform 0 2 6.263398 0.000000 4274 +succinctli 0 2 6.263398 0.000000 4275 +concret 0 2 6.263398 0.000000 4276 +analysisof 0 2 6.263398 0.000000 4277 +tarjan 0 2 6.263398 0.000000 4278 +maspar 0 2 6.263398 0.000000 4279 +workon 0 2 6.263398 0.000000 4280 +symbiot 1 1 6.957497 6.957497 7164 +pram 1 1 6.957497 6.957497 7165 +programmingoctob 1 1 6.957497 6.957497 7166 +wilsonextens 1 1 6.957497 6.957497 7167 +ramachandranth 1 1 6.957497 6.957497 7168 +theapplic 1 1 6.957497 6.957497 7169 +sane 1 1 6.957497 6.957497 7170 +datasourc 1 1 6.957497 6.957497 7171 +theabstract 1 1 6.957497 6.957497 7172 +sciencecst 0 1 6.957497 0.000000 7173 +apass 0 1 6.957497 0.000000 7174 +beregist 0 1 6.957497 0.000000 7175 +schedulespeakertitleseptemb 0 1 6.957497 0.000000 7176 +mirankeralamo 0 1 6.957497 0.000000 7177 +warehouseseptemb 0 1 6.957497 0.000000 7178 +kuipersth 0 1 6.957497 0.000000 7179 +humanand 0 1 6.957497 0.000000 7180 +mapsseptemb 0 1 6.957497 0.000000 7181 +blumofecilk 0 1 6.957497 0.000000 7182 +reliableparallel 0 1 6.957497 0.000000 7183 +workstationsseptemb 0 1 6.957497 0.000000 7184 +miikkulainenlearn 0 1 6.957497 0.000000 7185 +throughsymbiot 0 1 6.957497 0.000000 7186 +networksoctob 0 1 6.957497 0.000000 7187 +lifschitzmathemat 0 1 6.957497 0.000000 7188 +reflectionoctob 0 1 6.957497 0.000000 7189 +mooneylearn 0 1 6.957497 0.000000 7190 +usinginduct 0 1 6.957497 0.000000 7191 +dahlindistribut 0 1 6.957497 0.000000 7192 +internetsnovemb 0 1 6.957497 0.000000 7193 +novaksoftwar 0 1 6.957497 0.000000 7194 +genericprocedur 0 1 6.957497 0.000000 7195 +viewsnovemb 0 1 6.957497 0.000000 7196 +parallelalgorithmsnovemb 0 1 6.957497 0.000000 7197 +alvisilighweight 0 1 6.957497 0.000000 7198 +tolerancenovemb 0 1 6.957497 0.000000 7199 +linadapt 0 1 6.957497 0.000000 7200 +optimizationdecemb 0 1 6.957497 0.000000 7201 +plaxtonanalysi 0 1 6.957497 0.000000 7202 +algorithmslighweight 0 1 6.957497 0.000000 7203 +tolerancelorenzo 0 1 6.957497 0.000000 7204 +alvisidistribut 0 1 6.957497 0.000000 7205 +confin 0 1 6.957497 0.000000 7206 +revolution 0 1 6.957497 0.000000 7207 +beyondth 0 1 6.957497 0.000000 7208 +toleranttechniqu 0 1 6.957497 0.000000 7209 +willceas 0 1 6.957497 0.000000 7210 +exot 0 1 6.957497 0.000000 7211 +distributedinform 0 1 6.957497 0.000000 7212 +acompetit 0 1 6.957497 0.000000 7213 +criticalinform 0 1 6.957497 0.000000 7214 +engineerfault 0 1 6.957497 0.000000 7215 +negligibleimpact 0 1 6.957497 0.000000 7216 +communicatethrough 0 1 6.957497 0.000000 7217 +onnetwork 0 1 6.957497 0.000000 7218 +workstationsrobert 0 1 6.957497 0.000000 7219 +blumofethi 0 1 6.957497 0.000000 7220 +pronouncedsilk 0 1 6.957497 0.000000 7221 +andcilk 0 1 6.957497 0.000000 7222 +functionalsubset 0 1 6.957497 0.000000 7223 +providesadapt 0 1 6.957497 0.000000 7224 +tranpar 0 1 6.957497 0.000000 7225 +touser 0 1 6.957497 0.000000 7226 +shrinkdynam 0 1 6.957497 0.000000 7227 +cilkprogram 0 1 6.957497 0.000000 7228 +workstationscrash 0 1 6.957497 0.000000 7229 +andrecov 0 1 6.957497 0.000000 7230 +livedemonstr 0 1 6.957497 0.000000 7231 +internetsmik 0 1 6.957497 0.000000 7232 +dahlinthi 0 1 6.957497 0.000000 7233 +applicationsmotiv 0 1 6.957497 0.000000 7234 +inclust 0 1 6.957497 0.000000 7235 +servicei 0 1 6.957497 0.000000 7236 +nodesto 0 1 6.957497 0.000000 7237 +centralserv 0 1 6.957497 0.000000 7238 +goodperform 0 1 6.957497 0.000000 7239 +networkperform 0 1 6.957497 0.000000 7240 +projectwil 0 1 6.957497 0.000000 7241 +mapsbenjamin 0 1 6.957497 0.000000 7242 +kuipershuman 0 1 6.957497 0.000000 7243 +forlarg 0 1 6.957497 0.000000 7244 +ontolog 0 1 6.957497 0.000000 7245 +varietyof 0 1 6.957497 0.000000 7246 +cast 0 1 6.957497 0.000000 7247 +diverserepresent 0 1 6.957497 0.000000 7248 +spatialsemant 0 1 6.957497 0.000000 7249 +andassumpt 0 1 6.957497 0.000000 7250 +thecontrol 0 1 6.957497 0.000000 7251 +beabstract 0 1 6.957497 0.000000 7252 +givinga 0 1 6.957497 0.000000 7253 +causalgraph 0 1 6.957497 0.000000 7254 +topologicalnetwork 0 1 6.957497 0.000000 7255 +occupancygrid 0 1 6.957497 0.000000 7256 +theframework 0 1 6.957497 0.000000 7257 +ofglob 0 1 6.957497 0.000000 7258 +programmingvladimir 0 1 6.957497 0.000000 7259 +lifschitzlog 0 1 6.957497 0.000000 7260 +functionalprogram 0 1 6.957497 0.000000 7261 +notne 0 1 6.957497 0.000000 7262 +itcan 0 1 6.957497 0.000000 7263 +executedus 0 1 6.957497 0.000000 7264 +withdefin 0 1 6.957497 0.000000 7265 +thereason 0 1 6.957497 0.000000 7266 +thesound 0 1 6.957497 0.000000 7267 +optimizationcalvin 0 1 6.957497 0.000000 7268 +linthi 0 1 6.957497 0.000000 7269 +andtheir 0 1 6.957497 0.000000 7270 +differenthardwar 0 1 6.957497 0.000000 7271 +efficientand 0 1 6.957497 0.000000 7272 +suchlibrari 0 1 6.957497 0.000000 7273 +weexplain 0 1 6.957497 0.000000 7274 +networksristo 0 1 6.957497 0.000000 7275 +miikkulainena 0 1 6.957497 0.000000 7276 +neuronsthrough 0 1 6.957497 0.000000 7277 +anddiscourag 0 1 6.957497 0.000000 7278 +suboptim 0 1 6.957497 0.000000 7279 +toextract 0 1 6.957497 0.000000 7280 +sequentialdecis 0 1 6.957497 0.000000 7281 +warehousedan 0 1 6.957497 0.000000 7282 +mirankerth 0 1 6.957497 0.000000 7283 +andint 0 1 6.957497 0.000000 7284 +theuser 0 1 6.957497 0.000000 7285 +byqueri 0 1 6.957497 0.000000 7286 +interfacethat 0 1 6.957497 0.000000 7287 +ofabstract 0 1 6.957497 0.000000 7288 +clever 0 1 6.957497 0.000000 7289 +anddata 0 1 6.957497 0.000000 7290 +activedatabas 0 1 6.957497 0.000000 7291 +constructedus 0 1 6.957497 0.000000 7292 +databasefacil 0 1 6.957497 0.000000 7293 +thealamo 0 1 6.957497 0.000000 7294 +dataintegr 0 1 6.957497 0.000000 7295 +elementsof 0 1 6.957497 0.000000 7296 +furthercomposit 0 1 6.957497 0.000000 7297 +answerhigh 0 1 6.957497 0.000000 7298 +logicprogrammingraymond 0 1 6.957497 0.000000 7299 +mooneyinduct 0 1 6.957497 0.000000 7300 +learningprolog 0 1 6.957497 0.000000 7301 +offirst 0 1 6.957497 0.000000 7302 +learningmethod 0 1 6.957497 0.000000 7303 +areappli 0 1 6.957497 0.000000 7304 +believethi 0 1 6.957497 0.000000 7305 +richer 0 1 6.957497 0.000000 7306 +parsersfrom 0 1 6.957497 0.000000 7307 +superior 0 1 6.957497 0.000000 7308 +onsever 0 1 6.957497 0.000000 7309 +networkmethod 0 1 6.957497 0.000000 7310 +ati 0 1 6.957497 0.000000 7311 +ofairlin 0 1 6.957497 0.000000 7312 +automaticallydevelop 0 1 6.957497 0.000000 7313 +englishdatabas 0 1 6.957497 0.000000 7314 +moreaccur 0 1 6.957497 0.000000 7315 +smallgeograph 0 1 6.957497 0.000000 7316 +tens 0 1 6.957497 0.000000 7317 +treemethod 0 1 6.957497 0.000000 7318 +throughviewsgordon 0 1 6.957497 0.000000 7319 +toachiev 0 1 6.957497 0.000000 7320 +thesoftwar 0 1 6.957497 0.000000 7321 +typesus 0 1 6.957497 0.000000 7322 +specifyview 0 1 6.957497 0.000000 7323 +adesir 0 1 6.957497 0.000000 7324 +algorithmsgreg 0 1 6.957497 0.000000 7325 +plaxtona 0 1 6.957497 0.000000 7326 +forspecif 0 1 6.957497 0.000000 7327 +notuncommon 0 1 6.957497 0.000000 7328 +havelittl 0 1 6.957497 0.000000 7329 +suchpap 0 1 6.957497 0.000000 7330 +gapsinher 0 1 6.957497 0.000000 7331 +inadequatefor 0 1 6.957497 0.000000 7332 +straightforwardalgorithm 0 1 6.957497 0.000000 7333 +theconceptu 0 1 6.957497 0.000000 7334 +trivialclass 0 1 6.957497 0.000000 7335 +blum 0 1 6.957497 0.000000 7336 +pratt 0 1 6.957497 0.000000 7337 +algorithmsvijaya 0 1 6.957497 0.000000 7338 +forcombinatori 0 1 6.957497 0.000000 7339 +recentyear 0 1 6.957497 0.000000 7340 +willdescrib 0 1 6.957497 0.000000 7341 +thesealgorithm 0 1 6.957497 0.000000 7342 +thendescrib 0 1 6.957497 0.000000 7343 +wepropos 0 1 6.957497 0.000000 7344 +parallelshar 0 1 6.957497 0.000000 7345 +reflectionpaul 0 1 6.957497 0.000000 7346 +addnew 0 1 6.957497 0.000000 7347 +structureaccordingli 0 1 6.957497 0.000000 7348 +adapat 0 1 6.957497 0.000000 7349 +extensiblelanguag 0 1 6.957497 0.000000 7350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..1c594242 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +gener 0 220 1.386294 0.000000 27 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +implement 0 152 1.791759 0.000000 52 +problem 2 147 1.945910 3.891820 75 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +support 0 132 1.945910 0.000000 83 +file 0 132 1.945910 0.000000 70 +introduct 1 126 2.079442 2.079442 87 +topic 1 114 2.197225 2.197225 110 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +book 0 99 2.302585 0.000000 131 +octob 1 89 2.397895 2.397895 156 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +novemb 1 81 2.484907 2.484907 179 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +mondai 0 77 2.564949 0.000000 206 +free 0 73 2.639057 0.000000 224 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 1 70 2.708050 2.708050 241 +test 0 66 2.708050 0.000000 252 +septemb 1 65 2.772589 2.772589 274 +wednesdai 0 64 2.772589 0.000000 261 +taylor 0 63 2.772589 0.000000 287 +handout 0 64 2.772589 0.000000 263 +found 1 53 2.944439 2.944439 337 +cover 1 55 2.944439 2.944439 329 +date 0 51 2.995732 0.000000 344 +midterm 1 45 3.135494 3.135494 392 +long 0 43 3.178054 0.000000 413 +map 0 39 3.258097 0.000000 452 +feel 0 37 3.332205 0.000000 483 +least 0 35 3.401197 0.000000 516 +chapter 2 32 3.465736 6.931472 536 +robert 1 30 3.555348 3.555348 567 +except 1 28 3.610918 3.610918 607 +assum 0 19 4.007333 0.000000 845 +stop 0 17 4.110874 0.000000 942 +station 0 13 4.382027 0.000000 1157 +solari 0 12 4.465908 0.000000 1238 +systemsc 0 11 4.553877 0.000000 1293 +canb 0 7 5.010635 0.000000 1846 +encrypt 0 7 5.010635 0.000000 1835 +blumoferdb 0 5 5.347108 0.000000 2324 +systemsfal 0 4 5.568345 0.000000 2683 +blumof 0 3 5.857933 0.000000 3237 +gooti 1 2 6.263398 6.263398 4281 +subramanyam 0 2 6.263398 0.000000 4282 +crypt 1 1 6.957497 6.957497 7351 +multiplemap 0 1 6.957497 0.000000 7352 +themap 0 1 6.957497 0.000000 7353 +decrypt 0 1 6.957497 0.000000 7354 +solutionsread 0 1 6.957497 0.000000 7355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..97e64e20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +select 0 91 2.397895 0.000000 154 +grade 0 90 2.397895 0.000000 142 +homework 0 79 2.564949 0.000000 193 +copi 0 63 2.772589 0.000000 284 +detail 0 57 2.890372 0.000000 321 +midterm 0 45 3.135494 0.000000 392 +slide 0 38 3.295837 0.000000 467 +neural 1 30 3.555348 3.555348 578 +fundament 0 25 3.737670 0.000000 661 +station 0 13 4.382027 0.000000 1157 +uniqu 0 12 4.465908 0.000000 1228 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +lauren 0 3 5.857933 0.000000 3251 +bednar 0 2 6.263398 0.000000 4283 +jbednar 0 2 6.263398 0.000000 4284 +ofneur 0 2 6.263398 0.000000 4264 +cliff 0 2 6.263398 0.000000 4285 +edusun 0 2 6.263398 0.000000 4286 +networksfal 0 1 6.957497 0.000000 7356 +fausett 0 1 6.957497 0.000000 7357 +englewood 0 1 6.957497 0.000000 7358 +prenticehal 0 1 6.957497 0.000000 7359 +schedulehomework 0 1 6.957497 0.000000 7360 +assignmentsexamsclass 0 1 6.957497 0.000000 7361 +resourcesa 0 1 6.957497 0.000000 7362 +versionof 0 1 6.957497 0.000000 7363 +syllabusristo 0 1 6.957497 0.000000 7364 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..3291e814 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +utexa 1 189 1.609438 1.609438 44 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +read 1 154 1.791759 1.791759 47 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +center 0 88 2.397895 0.000000 158 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +write 0 72 2.639057 0.000000 222 +dept 1 64 2.772589 2.772589 291 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +detail 0 57 2.890372 0.000000 321 +pointer 0 48 3.044522 0.000000 368 +discuss 1 45 3.135494 3.135494 399 +submit 0 39 3.258097 0.000000 440 +short 1 36 3.367296 3.367296 499 +approxim 0 35 3.401197 0.000000 509 +collabor 0 32 3.465736 0.000000 543 +particip 0 29 3.583519 0.000000 589 +toward 0 25 3.737670 0.000000 668 +attend 0 18 4.060443 0.000000 893 +regular 0 17 4.110874 0.000000 929 +cognit 1 16 4.174387 4.174387 986 +critic 0 16 4.174387 0.000000 982 +philosophi 0 13 4.382027 0.000000 1167 +signific 0 13 4.382027 0.000000 1125 +count 0 12 4.465908 0.000000 1239 +packet 0 10 4.653960 0.000000 1415 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +appt 1 5 5.347108 5.347108 2312 +ofinterest 0 5 5.347108 0.000000 2323 +nichola 0 3 5.857933 0.000000 3252 +interv 0 3 5.857933 0.000000 3253 +thepap 0 3 5.857933 0.000000 3254 +sciencefal 0 2 6.263398 0.000000 4246 +commentari 0 2 6.263398 0.000000 4287 +edusun 0 2 6.263398 0.000000 4286 +asher 0 1 6.957497 0.000000 7365 +waggen 0 1 6.957497 0.000000 7366 +nasher 0 1 6.957497 0.000000 7367 +berti 0 1 6.957497 0.000000 7368 +posner 0 1 6.957497 0.000000 7369 +mitpress 0 1 6.957497 0.000000 7370 +withanoth 0 1 6.957497 0.000000 7371 +alsorequir 0 1 6.957497 0.000000 7372 +descriptioncours 0 1 6.957497 0.000000 7373 +schedulediscuss 0 1 6.957497 0.000000 7374 +notesperson 0 1 6.957497 0.000000 7375 +adscollabor 0 1 6.957497 0.000000 7376 +paperclass 0 1 6.957497 0.000000 7377 +resourcesstud 0 1 6.957497 0.000000 7378 +questionnaireus 0 1 6.957497 0.000000 7379 +sciencefaculti 0 1 6.957497 0.000000 7380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..97408b1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,191 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +schedul 1 119 2.079442 2.079442 85 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +access 0 102 2.302585 0.000000 136 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +exam 1 86 2.484907 2.484907 169 +requir 1 81 2.484907 2.484907 167 +contain 0 81 2.484907 0.000000 174 +mondai 0 77 2.564949 0.000000 206 +exampl 0 77 2.564949 0.000000 195 +summari 0 73 2.639057 0.000000 237 +line 0 75 2.639057 0.000000 231 +materi 0 75 2.639057 0.000000 221 +syllabu 1 67 2.708050 2.708050 247 +foundat 0 62 2.772589 0.000000 286 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +copi 0 63 2.772589 0.000000 284 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +laboratori 0 63 2.772589 0.000000 292 +room 1 59 2.833213 2.833213 301 +detail 1 57 2.890372 2.890372 321 +semest 1 58 2.890372 2.890372 312 +sever 0 56 2.890372 0.000000 322 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +date 0 51 2.995732 0.000000 344 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +frequent 0 49 3.044522 0.000000 367 +still 0 50 3.044522 0.000000 362 +get 0 46 3.091042 0.000000 380 +possibl 0 47 3.091042 0.000000 378 +could 0 46 3.091042 0.000000 383 +discuss 0 45 3.135494 0.000000 399 +even 0 45 3.135494 0.000000 393 +long 1 43 3.178054 3.178054 413 +howev 0 41 3.218876 0.000000 422 +late 0 40 3.258097 0.000000 439 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +form 0 39 3.258097 0.000000 443 +credit 0 38 3.295837 0.000000 460 +respons 1 37 3.332205 3.332205 476 +expect 0 37 3.332205 0.000000 484 +feel 0 37 3.332205 0.000000 483 +procedur 0 36 3.367296 0.000000 488 +ofth 0 36 3.367296 0.000000 491 +soon 0 36 3.367296 0.000000 494 +staff 0 36 3.367296 0.000000 490 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +articl 0 33 3.433987 0.000000 530 +chapter 0 32 3.465736 0.000000 536 +often 0 31 3.496508 0.000000 551 +limit 1 29 3.583519 3.583519 585 +depend 0 29 3.583519 0.000000 583 +turn 0 29 3.583519 0.000000 586 +becom 1 28 3.610918 3.610918 603 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +background 0 25 3.737670 0.000000 664 +highli 0 23 3.806662 0.000000 725 +begin 0 23 3.806662 0.000000 716 +identifi 0 22 3.850148 0.000000 760 +self 0 22 3.850148 0.000000 761 +half 0 21 3.912023 0.000000 776 +prepar 1 20 3.951244 3.951244 824 +break 0 20 3.951244 0.000000 812 +event 0 18 4.060443 0.000000 896 +attend 0 18 4.060443 0.000000 893 +less 0 18 4.060443 0.000000 892 +partial 0 18 4.060443 0.000000 900 +monitor 0 17 4.110874 0.000000 941 +debug 0 17 4.110874 0.000000 944 +thought 0 17 4.110874 0.000000 945 +quiz 1 16 4.174387 4.174387 990 +earli 0 16 4.174387 0.000000 968 +enough 0 15 4.248495 0.000000 1040 +warn 1 14 4.317488 4.317488 1068 +near 0 14 4.317488 0.000000 1091 +consider 0 14 4.317488 0.000000 1076 +wait 1 13 4.382027 4.382027 1168 +quizz 1 13 4.382027 4.382027 1151 +everyth 0 13 4.382027 0.000000 1169 +unfortun 0 13 4.382027 0.000000 1170 +everyon 0 13 4.382027 0.000000 1148 +individu 0 13 4.382027 0.000000 1126 +pascal 1 12 4.465908 4.465908 1213 +uniqu 0 12 4.465908 0.000000 1228 +excit 0 11 4.553877 0.000000 1329 +extrem 0 11 4.553877 0.000000 1330 +eight 0 11 4.553877 0.000000 1331 +deadlin 1 9 4.753590 4.753590 1502 +hundr 0 9 4.753590 0.000000 1528 +intellectu 0 7 5.010635 0.000000 1847 +delai 0 7 5.010635 0.000000 1848 +carefulli 0 6 5.164786 0.000000 2045 +difficult 0 6 5.164786 0.000000 2035 +apolog 0 6 5.164786 0.000000 2046 +nine 0 6 5.164786 0.000000 2047 +vari 0 6 5.164786 0.000000 2001 +pace 0 6 5.164786 0.000000 2011 +firm 0 4 5.568345 0.000000 2684 +thecours 0 4 5.568345 0.000000 2685 +behind 0 4 5.568345 0.000000 2610 +welch 0 4 5.568345 0.000000 2655 +theprogram 0 4 5.568345 0.000000 2686 +dale 0 4 5.568345 0.000000 2687 +wewil 0 4 5.568345 0.000000 2688 +thiscours 0 4 5.568345 0.000000 2601 +repli 0 4 5.568345 0.000000 2689 +guadalup 0 3 5.857933 0.000000 3255 +andyou 0 3 5.857933 0.000000 3256 +gripe 0 3 5.857933 0.000000 3257 +suzi 1 2 6.263398 6.263398 4288 +wella 0 2 6.263398 0.000000 4289 +foral 0 2 6.263398 0.000000 4290 +riski 0 2 6.263398 0.000000 4291 +nowher 0 2 6.263398 0.000000 4292 +gallagh 0 2 6.263398 0.000000 4293 +requiredtextbook 0 2 6.263398 0.000000 4204 +elicit 0 2 6.263398 0.000000 4294 +thenewsgroup 1 1 6.957497 6.957497 7381 +thetest 1 1 6.957497 6.957497 7382 +programmingcsp 0 1 6.957497 0.000000 7383 +pascalintroductori 0 1 6.957497 0.000000 7384 +programminginstructor 0 1 6.957497 0.000000 7385 +gallagherwelcom 0 1 6.957497 0.000000 7386 +cspi 0 1 6.957497 0.000000 7387 +andso 0 1 6.957497 0.000000 7388 +otherdeadlin 0 1 6.957497 0.000000 7389 +thesyllabu 0 1 6.957497 0.000000 7390 +jenn 0 1 6.957497 0.000000 7391 +takethi 0 1 6.957497 0.000000 7392 +courseeach 0 1 6.957497 0.000000 7393 +howwel 0 1 6.957497 0.000000 7394 +deadlineto 0 1 6.957497 0.000000 7395 +thursdayeven 0 1 6.957497 0.000000 7396 +intosmal 0 1 6.957497 0.000000 7397 +ateach 0 1 6.957497 0.000000 7398 +thatlaboratori 0 1 6.957497 0.000000 7399 +thatgrad 0 1 6.957497 0.000000 7400 +thattest 0 1 6.957497 0.000000 7401 +limitedand 0 1 6.957497 0.000000 7402 +foravail 0 1 6.957497 0.000000 7403 +proctor 0 1 6.957497 0.000000 7404 +hoursbefor 0 1 6.957497 0.000000 7405 +andquizz 0 1 6.957497 0.000000 7406 +betaken 0 1 6.957497 0.000000 7407 +prescrib 0 1 6.957497 0.000000 7408 +openedfor 0 1 6.957497 0.000000 7409 +yourstud 0 1 6.957497 0.000000 7410 +orsak 0 1 6.957497 0.000000 7411 +weem 0 1 6.957497 0.000000 7412 +liabl 0 1 6.957497 0.000000 7413 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..2b838036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +data 2 170 1.791759 3.583518 49 +implement 1 152 1.791759 1.791759 52 +read 0 154 1.791759 0.000000 47 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +databas 1 122 2.079442 2.079442 86 +pleas 1 113 2.197225 2.197225 114 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +solut 1 82 2.484907 2.484907 162 +homework 0 79 2.564949 0.000000 193 +test 1 66 2.708050 2.708050 252 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +syllabu 0 67 2.708050 0.000000 247 +dept 1 64 2.772589 2.772589 291 +reason 0 57 2.890372 0.000000 318 +sampl 1 53 2.944439 2.944439 339 +suggest 0 53 2.944439 0.000000 331 +run 0 51 2.995732 0.000000 347 +without 0 50 3.044522 0.000000 370 +anoth 0 45 3.135494 0.000000 408 +error 0 40 3.258097 0.000000 449 +transform 0 32 3.465736 0.000000 542 +turn 0 29 3.583519 0.000000 586 +pass 1 28 3.610918 3.610918 611 +measur 0 28 3.610918 0.000000 609 +retriev 0 27 3.637586 0.000000 621 +compar 0 26 3.688879 0.000000 648 +output 2 21 3.912023 7.824046 788 +wang 1 21 3.912023 3.912023 790 +wrote 0 20 3.951244 0.000000 830 +benchmark 1 19 4.007333 4.007333 859 +attribut 0 14 4.317488 0.000000 1092 +script 1 13 4.382027 4.382027 1171 +perl 0 11 4.553877 0.000000 1332 +recoveri 1 9 4.753590 4.753590 1474 +replac 0 8 4.875197 0.000000 1668 +fail 0 8 4.875197 0.000000 1655 +contest 1 5 5.347108 5.347108 2273 +tupl 1 5 5.347108 5.347108 2244 +batori 0 4 5.568345 0.000000 2690 +delet 0 4 5.568345 0.000000 2691 +tong 1 3 5.857933 5.857933 3258 +append 0 2 6.263398 0.000000 4295 +ret_into 0 1 6.957497 0.000000 7414 +mdb 0 1 6.957497 0.000000 7415 +diff 0 1 6.957497 0.000000 7416 +medec 0 1 6.957497 0.000000 7417 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..677ab0fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,303 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +time 2 293 1.098612 2.197224 17 +cours 2 273 1.098612 2.197224 15 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +oper 3 180 1.609438 4.828314 34 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +distribut 2 162 1.791759 3.583518 51 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +parallel 0 169 1.791759 0.000000 60 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +file 2 132 1.945910 3.891820 70 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +intern 1 108 2.197225 2.197225 128 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +techniqu 0 99 2.302585 0.000000 138 +proceed 2 93 2.397895 4.795790 152 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +call 1 91 2.397895 2.397895 153 +grade 0 90 2.397895 0.000000 142 +present 0 91 2.397895 0.000000 145 +octob 0 89 2.397895 0.000000 156 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +requir 1 81 2.484907 2.484907 167 +novemb 1 81 2.484907 2.484907 179 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +decemb 2 80 2.564949 5.129898 215 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +practic 0 70 2.708050 0.000000 246 +januari 1 62 2.772589 2.772589 264 +descript 1 64 2.772589 2.772589 271 +collect 0 65 2.772589 0.000000 268 +septemb 0 65 2.772589 0.000000 274 +foundat 0 62 2.772589 0.000000 286 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +content 0 59 2.833213 0.000000 302 +unix 1 58 2.890372 2.890372 308 +summer 1 56 2.890372 2.890372 311 +overview 0 56 2.890372 0.000000 323 +februari 1 54 2.944439 2.944439 328 +undergradu 0 54 2.944439 0.000000 338 +cover 0 55 2.944439 0.000000 329 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +principl 0 48 3.044522 0.000000 357 +basic 0 50 3.044522 0.000000 360 +textbook 1 44 3.135494 3.135494 397 +made 0 44 3.135494 0.000000 398 +execut 0 45 3.135494 0.000000 404 +offer 0 43 3.178054 0.000000 414 +review 2 42 3.218876 6.437752 425 +cach 1 41 3.218876 3.218876 432 +examin 0 42 3.218876 0.000000 424 +fast 0 42 3.218876 0.000000 429 +transact 1 39 3.258097 3.258097 438 +theoret 0 39 3.258097 0.000000 446 +programm 0 39 3.258097 0.000000 445 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +procedur 1 36 3.367296 3.367296 488 +survei 1 35 3.401197 3.401197 513 +concurr 0 34 3.401197 0.000000 501 +articl 0 33 3.433987 0.000000 530 +toler 0 33 3.433987 0.000000 533 +concept 1 32 3.465736 3.465736 537 +idea 1 32 3.465736 3.465736 545 +fault 1 32 3.465736 3.465736 547 +chapter 0 32 3.465736 0.000000 536 +titl 0 31 3.496508 0.000000 556 +robert 0 30 3.555348 0.000000 567 +hard 0 30 3.555348 0.000000 563 +art 0 29 3.583519 0.000000 593 +multiprocessor 1 28 3.610918 3.610918 605 +determin 0 27 3.637586 0.000000 630 +altern 0 26 3.688879 0.000000 641 +proc 0 26 3.688879 0.000000 649 +constraint 0 26 3.688879 0.000000 636 +reliabl 0 25 3.737670 0.000000 674 +mobil 1 23 3.806662 3.806662 730 +thread 1 23 3.806662 3.806662 722 +highli 0 23 3.806662 0.000000 725 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +inth 0 22 3.850148 0.000000 741 +kernel 1 20 3.951244 3.951244 825 +andrew 1 19 4.007333 4.007333 849 +anderson 1 19 4.007333 4.007333 860 +prerequisit 0 19 4.007333 0.000000 846 +stand 0 18 4.060443 0.000000 891 +thoma 0 18 4.060443 0.000000 901 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +germani 1 17 4.110874 4.110874 946 +protect 1 17 4.110874 4.110874 935 +earli 1 16 4.174387 4.174387 968 +young 0 16 4.174387 0.000000 991 +remot 1 15 4.248495 4.248495 1041 +princeton 1 15 4.248495 4.248495 1042 +levi 1 14 4.317488 4.317488 1093 +joint 0 13 4.382027 0.000000 1130 +usenix 1 12 4.465908 4.465908 1240 +gupta 0 12 4.465908 0.000000 1241 +bruce 0 12 4.465908 0.000000 1226 +denni 0 11 4.553877 0.000000 1321 +reness 0 11 4.553877 0.000000 1333 +impact 0 11 4.553877 0.000000 1334 +sosp 1 10 4.653960 4.653960 1416 +operatingsystem 1 10 4.653960 4.653960 1401 +henri 0 10 4.653960 0.000000 1417 +black 0 10 4.653960 0.000000 1418 +hint 0 10 4.653960 0.000000 1419 +familiar 0 9 4.753590 0.000000 1485 +robbert 0 9 4.753590 0.000000 1529 +inter 0 9 4.753590 0.000000 1530 +birman 0 9 4.753590 0.000000 1531 +kumar 0 9 4.753590 0.000000 1506 +mach 1 8 4.875197 4.875197 1669 +inproceed 0 8 4.875197 0.000000 1670 +presenc 0 8 4.875197 0.000000 1671 +harrick 1 7 5.010635 5.010635 1849 +beyond 1 7 5.010635 5.010635 1834 +peterson 0 7 5.010635 0.000000 1850 +migrat 0 7 5.010635 0.000000 1851 +encrypt 0 7 5.010635 0.000000 1835 +onoper 1 6 5.164786 5.164786 2048 +multiprogram 0 6 5.164786 0.000000 2010 +thompson 0 6 5.164786 0.000000 2049 +silberschatz 0 6 5.164786 0.000000 1978 +edward 0 6 5.164786 0.000000 2050 +internationalconfer 0 6 5.164786 0.000000 2051 +distributedsystem 0 6 5.164786 0.000000 2022 +versu 0 6 5.164786 0.000000 2052 +ousterhout 1 5 5.347108 5.347108 2301 +ofdistribut 0 5 5.347108 0.000000 2316 +theth 0 5 5.347108 0.000000 2325 +oncomput 0 5 5.347108 0.000000 2326 +joseph 0 5 5.347108 0.000000 2327 +authent 0 5 5.347108 0.000000 2306 +steer 0 5 5.347108 0.000000 2328 +wireless 1 4 5.568345 5.568345 2693 +lazowska 1 4 5.568345 5.568345 2694 +breadth 0 4 5.568345 0.000000 2695 +implic 0 4 5.568345 0.000000 2696 +fora 0 4 5.568345 0.000000 2697 +theacm 0 4 5.568345 0.000000 2698 +zhao 0 4 5.568345 0.000000 2699 +cheriton 1 3 5.857933 5.857933 3259 +synopsi 0 3 5.857933 0.000000 3260 +formobil 0 3 5.857933 0.000000 3261 +requirementsstud 0 3 5.857933 0.000000 3116 +theperform 0 3 5.857933 0.000000 3262 +berlin 0 3 5.857933 0.000000 3263 +terri 0 3 5.857933 0.000000 3264 +golub 0 3 5.857933 0.000000 3265 +tokuda 0 3 5.857933 0.000000 3266 +kistler 0 3 5.857933 0.000000 3267 +synopsisc 0 2 6.263398 0.000000 4296 +systemdesign 0 2 6.263398 0.000000 4297 +theinstructor 0 2 6.263398 0.000000 4298 +anexperiment 0 2 6.263398 0.000000 4299 +afip 0 2 6.263398 0.000000 4300 +hansen 0 2 6.263398 0.000000 4301 +nucleu 0 2 6.263398 0.000000 4302 +bensoussan 0 2 6.263398 0.000000 4303 +multic 0 2 6.263398 0.000000 4304 +virtualmemori 0 2 6.263398 0.000000 4305 +ritchi 0 2 6.263398 0.000000 4306 +tucker 0 2 6.263398 0.000000 4307 +bunt 0 2 6.263398 0.000000 4308 +barrera 0 2 6.263398 0.000000 4309 +acmtransact 0 2 6.263398 0.000000 4310 +nelson 0 2 6.263398 0.000000 4168 +cristian 0 2 6.263398 0.000000 4311 +offailur 0 2 6.263398 0.000000 4255 +systemsr 0 2 6.263398 0.000000 4312 +goldberg 0 2 6.263398 0.000000 4313 +rosenblum 0 2 6.263398 0.000000 4314 +ieeetransact 0 2 6.263398 0.000000 4315 +oninform 0 2 6.263398 0.000000 4316 +baron 0 2 6.263398 0.000000 4317 +rashid 0 2 6.263398 0.000000 4318 +preemptiv 0 2 6.263398 0.000000 4319 +ondistribut 0 2 6.263398 0.000000 4320 +dalei 1 1 6.957497 6.957497 7420 +formultiprogram 1 1 6.957497 6.957497 7421 +karshmer 1 1 6.957497 6.957497 7422 +nehmer 1 1 6.957497 6.957497 7423 +schroeder 1 1 6.957497 6.957497 7424 +needham 1 1 6.957497 6.957497 7425 +trigger 1 1 6.957497 6.957497 7426 +prerequisitegradu 0 1 6.957497 0.000000 7427 +systemssuch 0 1 6.957497 0.000000 7428 +materialin 0 1 6.957497 0.000000 7429 +andsilberschatz 0 1 6.957497 0.000000 7430 +coveringboth 0 1 6.957497 0.000000 7431 +anemphasi 0 1 6.957497 0.000000 7432 +anddiscuss 0 1 6.957497 0.000000 7433 +aterm 0 1 6.957497 0.000000 7434 +systemsfernando 0 1 6.957497 0.000000 7435 +corbato 0 1 6.957497 0.000000 7436 +marjori 0 1 6.957497 0.000000 7437 +merwin 0 1 6.957497 0.000000 7438 +daggett 0 1 6.957497 0.000000 7439 +brinch 0 1 6.957497 0.000000 7440 +clingen 0 1 6.957497 0.000000 7441 +tannenbaum 0 1 6.957497 0.000000 7442 +andexampl 0 1 6.957497 0.000000 7443 +managementa 0 1 6.957497 0.000000 7444 +forshar 0 1 6.957497 0.000000 7445 +schedulingr 0 1 6.957497 0.000000 7446 +communicationj 0 1 6.957497 0.000000 7447 +birel 0 1 6.957497 0.000000 7448 +rpc 0 1 6.957497 0.000000 7449 +lightweightremot 0 1 6.957497 0.000000 7450 +migrationf 0 1 6.957497 0.000000 7451 +dougli 0 1 6.957497 0.000000 7452 +spriteoper 0 1 6.957497 0.000000 7453 +theimer 0 1 6.957497 0.000000 7454 +lantz 0 1 6.957497 0.000000 7455 +preemptabl 0 1 6.957497 0.000000 7456 +tolerancef 0 1 6.957497 0.000000 7457 +sand 0 1 6.957497 0.000000 7458 +sandberg 0 1 6.957497 0.000000 7459 +kleiman 0 1 6.957497 0.000000 7460 +ofsun 0 1 6.957497 0.000000 7461 +mckusick 0 1 6.957497 0.000000 7462 +leffler 0 1 6.957497 0.000000 7463 +fabri 0 1 6.957497 0.000000 7464 +fastfil 0 1 6.957497 0.000000 7465 +alog 0 1 6.957497 0.000000 7466 +systemsm 0 1 6.957497 0.000000 7467 +gifford 0 1 6.957497 0.000000 7468 +securityr 0 1 6.957497 0.000000 7469 +inlarg 0 1 6.957497 0.000000 7470 +butler 0 1 6.957497 0.000000 7471 +lampson 0 1 6.957497 0.000000 7472 +accetta 0 1 6.957497 0.000000 7473 +boloski 0 1 6.957497 0.000000 7474 +tevanian 0 1 6.957497 0.000000 7475 +systemsh 0 1 6.957497 0.000000 7476 +kopetz 0 1 6.957497 0.000000 7477 +timesystem 0 1 6.957497 0.000000 7478 +layland 0 1 6.957497 0.000000 7479 +ramamritham 0 1 6.957497 0.000000 7480 +stankov 0 1 6.957497 0.000000 7481 +schedulingund 0 1 6.957497 0.000000 7482 +mercer 0 1 6.957497 0.000000 7483 +computingb 0 1 6.957497 0.000000 7484 +badrinath 0 1 6.957497 0.000000 7485 +acharya 0 1 6.957497 0.000000 7486 +imielinski 0 1 6.957497 0.000000 7487 +satyanarayanan 0 1 6.957497 0.000000 7488 +okasaki 0 1 6.957497 0.000000 7489 +siegel 0 1 6.957497 0.000000 7490 +coda 0 1 6.957497 0.000000 7491 +distributedworkst 0 1 6.957497 0.000000 7492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..d45f0345 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +design 2 213 1.386294 2.772588 25 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +oper 2 180 1.609438 3.218876 34 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +network 2 168 1.791759 3.583518 61 +algorithm 2 162 1.791759 3.583518 57 +applic 1 170 1.791759 1.791759 56 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +phone 1 175 1.791759 1.791759 45 +base 1 165 1.791759 1.791759 50 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +austin 0 168 1.791759 0.000000 63 +data 0 170 1.791759 0.000000 49 +support 2 132 1.945910 3.891820 83 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +note 0 142 1.945910 0.000000 67 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +number 1 130 2.079442 2.079442 97 +spring 0 131 2.079442 0.000000 88 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +teach 1 108 2.197225 2.197225 112 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +assist 0 112 2.197225 0.000000 113 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +techniqu 2 99 2.302585 4.605170 138 +advanc 0 99 2.302585 0.000000 130 +proceed 3 93 2.397895 7.193685 152 +commun 2 95 2.397895 4.795790 157 +octob 2 89 2.397895 4.795790 156 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +follow 0 92 2.397895 0.000000 143 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +novemb 2 81 2.484907 4.969814 179 +ieee 2 86 2.484907 4.969814 190 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +resourc 0 81 2.484907 0.000000 172 +second 0 81 2.484907 0.000000 166 +environ 0 84 2.484907 0.000000 177 +server 2 76 2.564949 5.129898 204 +issu 2 78 2.564949 5.129898 211 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +optim 1 79 2.564949 2.564949 197 +june 1 79 2.564949 2.564949 214 +dynam 1 76 2.564949 2.564949 194 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +servic 0 72 2.639057 0.000000 236 +multimedia 3 68 2.708050 8.124150 258 +august 1 66 2.708050 2.708050 257 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +septemb 2 65 2.772589 5.545178 274 +descript 1 64 2.772589 2.772589 271 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +handout 0 64 2.772589 0.000000 263 +taylor 0 63 2.772589 0.000000 287 +collect 0 65 2.772589 0.000000 268 +januari 0 62 2.772589 0.000000 264 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +march 0 61 2.833213 0.000000 295 +simpl 0 60 2.833213 0.000000 298 +overview 1 56 2.890372 2.890372 323 +detail 0 57 2.890372 0.000000 321 +summer 0 56 2.890372 0.000000 311 +space 0 57 2.890372 0.000000 310 +cover 1 55 2.944439 2.944439 329 +processor 0 54 2.944439 0.000000 335 +digit 1 52 2.995732 2.995732 348 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +standard 1 48 3.044522 3.044522 365 +still 0 50 3.044522 0.000000 362 +possibl 0 47 3.091042 0.000000 378 +california 0 46 3.091042 0.000000 388 +adapt 0 46 3.091042 0.000000 387 +video 2 44 3.135494 6.270988 405 +protocol 1 45 3.135494 3.135494 407 +textbook 0 44 3.135494 0.000000 397 +discuss 0 45 3.135494 0.000000 399 +answer 0 45 3.135494 0.000000 391 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +long 0 43 3.178054 0.000000 413 +third 0 43 3.178054 0.000000 412 +mechan 0 43 3.178054 0.000000 416 +review 1 42 3.218876 3.218876 425 +cach 1 41 3.218876 3.218876 432 +examin 0 42 3.218876 0.000000 424 +futur 0 41 3.218876 0.000000 427 +tutori 0 39 3.258097 0.000000 437 +multipl 0 39 3.258097 0.000000 453 +expect 1 37 3.332205 3.332205 484 +workstat 0 37 3.332205 0.000000 479 +multi 0 36 3.367296 0.000000 493 +ofth 0 36 3.367296 0.000000 491 +survei 1 35 3.401197 3.401197 513 +represent 0 35 3.401197 0.000000 512 +statist 0 35 3.401197 0.000000 521 +articl 0 33 3.433987 0.000000 530 +toler 0 33 3.433987 0.000000 533 +concept 1 32 3.465736 3.465736 537 +fault 0 32 3.465736 0.000000 547 +storag 1 31 3.496508 3.496508 553 +titl 0 31 3.496508 0.000000 556 +particip 0 29 3.583519 0.000000 589 +synchron 0 29 3.583519 0.000000 588 +scale 1 28 3.610918 3.610918 613 +framework 1 28 3.610918 3.610918 606 +packag 0 28 3.610918 0.000000 614 +retriev 1 27 3.637586 3.637586 621 +determin 1 27 3.637586 3.637586 630 +arrai 1 27 3.637586 3.637586 627 +session 0 26 3.688879 0.000000 643 +fundament 1 25 3.737670 3.737670 661 +magazin 1 24 3.761200 3.761200 704 +scalabl 0 24 3.761200 0.000000 705 +compress 2 23 3.806662 7.613324 719 +variabl 0 23 3.806662 0.000000 715 +disk 1 22 3.850148 3.850148 747 +emphasi 0 22 3.850148 0.000000 755 +reduc 0 22 3.850148 0.000000 759 +chen 1 21 3.912023 3.912023 791 +flexibl 0 21 3.912023 0.000000 792 +mpeg 1 20 3.951244 3.951244 831 +scheme 1 20 3.951244 3.951244 818 +sure 0 20 3.951244 0.000000 813 +qualiti 0 20 3.951244 0.000000 832 +media 1 19 4.007333 4.007333 861 +boston 1 19 4.007333 4.007333 862 +prerequisit 0 19 4.007333 0.000000 846 +comparison 0 19 4.007333 0.000000 863 +anderson 0 19 4.007333 0.000000 860 +stand 0 18 4.060443 0.000000 891 +failur 0 18 4.060443 0.000000 898 +layer 0 17 4.110874 0.000000 926 +zhang 1 16 4.174387 4.174387 980 +diego 1 16 4.174387 4.174387 992 +transfer 0 16 4.174387 0.000000 967 +hierarch 1 15 4.248495 4.248495 1018 +rate 1 15 4.248495 4.248495 1037 +contribut 0 15 4.248495 0.000000 1021 +stream 0 15 4.248495 0.000000 1015 +audio 1 14 4.317488 4.317488 1094 +francisco 1 14 4.317488 4.317488 1095 +incomput 0 14 4.317488 0.000000 1096 +demand 0 14 4.317488 0.000000 1073 +heterogen 0 14 4.317488 0.000000 1090 +consider 0 14 4.317488 0.000000 1076 +introduc 0 13 4.382027 0.000000 1139 +carri 0 13 4.382027 0.000000 1152 +resolut 0 13 4.382027 0.000000 1172 +sigmetr 0 13 4.382027 0.000000 1173 +buffer 1 12 4.465908 4.465908 1211 +uniqu 0 12 4.465908 0.000000 1228 +readi 0 12 4.465908 0.000000 1242 +scan 0 12 4.465908 0.000000 1243 +philadelphia 0 12 4.465908 0.000000 1244 +weight 0 12 4.465908 0.000000 1204 +placement 1 10 4.653960 4.653960 1420 +packet 1 10 4.653960 4.653960 1415 +traffic 0 10 4.653960 0.000000 1421 +operatingsystem 0 10 4.653960 0.000000 1401 +establish 0 9 4.753590 0.000000 1532 +familiar 0 9 4.753590 0.000000 1485 +recoveri 0 9 4.753590 0.000000 1474 +light 0 9 4.753590 0.000000 1533 +osdi 0 9 4.753590 0.000000 1534 +transport 1 8 4.875197 4.875197 1672 +convers 0 8 4.875197 0.000000 1673 +pacif 0 8 4.875197 0.000000 1674 +grove 0 8 4.875197 0.000000 1675 +harrick 1 7 5.010635 5.010635 1849 +trend 1 7 5.010635 5.010635 1842 +channel 1 7 5.010635 5.010635 1836 +keshav 1 7 5.010635 5.010635 1852 +peterson 1 7 5.010635 5.010635 1850 +chiang 0 7 5.010635 0.000000 1853 +supportfor 0 7 5.010635 0.000000 1854 +smooth 0 7 5.010635 0.000000 1855 +misra 0 7 5.010635 0.000000 1856 +conferenc 0 7 5.010635 0.000000 1857 +jpeg 1 6 5.164786 5.164786 2053 +internationalconfer 0 6 5.164786 0.000000 2051 +subsystem 0 6 5.164786 0.000000 2015 +symposiumon 0 6 5.164786 0.000000 2054 +sigcomm 1 5 5.347108 5.347108 2329 +row 0 5 5.347108 0.000000 2330 +proceedingsof 0 5 5.347108 0.000000 2331 +batch 1 4 5.568345 5.568345 2700 +multimediasystem 1 4 5.568345 5.568345 2701 +venkat 1 4 5.568345 5.568345 2702 +addition 0 4 5.568345 0.000000 2593 +forparallel 0 4 5.568345 0.000000 2703 +admiss 0 4 5.568345 0.000000 2704 +clark 0 4 5.568345 0.000000 2705 +floyd 0 4 5.568345 0.000000 2682 +buss 0 4 5.568345 0.000000 2649 +goyal 2 3 5.857933 11.715866 3268 +shenoi 1 3 5.857933 5.857933 3269 +rangan 1 3 5.857933 5.857933 3270 +anaheim 1 3 5.857933 5.857933 3271 +campbel 1 3 5.857933 5.857933 3272 +mccann 1 3 5.857933 5.857933 3273 +synopsi 0 3 5.857933 0.000000 3260 +informationcours 0 3 5.857933 0.000000 3167 +guadalup 0 3 5.857933 0.000000 3255 +multimediaappl 0 3 5.857933 0.000000 3274 +ftc 0 3 5.857933 0.000000 3275 +katz 0 3 5.857933 0.000000 3276 +reddi 0 3 5.857933 0.000000 3277 +deliveri 0 3 5.857933 0.000000 3278 +durham 0 3 5.857933 0.000000 3279 +hampshir 0 3 5.857933 0.000000 3280 +chow 0 3 5.857933 0.000000 3281 +london 0 3 5.857933 0.000000 3282 +kandlur 1 2 6.263398 6.263398 4321 +ofmultimedia 1 2 6.263398 6.263398 4322 +icmc 1 2 6.263398 6.263398 4323 +jacobson 1 2 6.263398 6.263398 4324 +prerequisitesgradu 0 2 6.263398 0.000000 4325 +synopsisc 0 2 6.263398 0.000000 4296 +madeavail 0 2 6.263398 0.000000 4326 +thetop 0 2 6.263398 0.000000 4327 +critiqu 0 2 6.263398 0.000000 4328 +ofpap 0 2 6.263398 0.000000 4329 +andclass 0 2 6.263398 0.000000 4330 +prashant 0 2 6.263398 0.000000 4331 +gemmel 0 2 6.263398 0.000000 4332 +ieeeintern 0 2 6.263398 0.000000 4333 +inmulti 0 2 6.263398 0.000000 4334 +annualintern 0 2 6.263398 0.000000 4335 +pasadena 0 2 6.263398 0.000000 4336 +multimediai 0 2 6.263398 0.000000 4337 +acmmultimedia 0 2 6.263398 0.000000 4338 +sanfrancisco 0 2 6.263398 0.000000 4339 +shenker 0 2 6.263398 0.000000 4340 +verma 0 2 6.263398 0.000000 4341 +delaybound 0 2 6.263398 0.000000 4342 +toappear 0 2 6.263398 0.000000 4343 +nossdav 0 2 6.263398 0.000000 4344 +acmsigcomm 0 2 6.263398 0.000000 4345 +andd 0 2 6.263398 0.000000 4346 +shepherd 0 2 6.263398 0.000000 4347 +basedcommun 0 2 6.263398 0.000000 4348 +incommun 0 2 6.263398 0.000000 4349 +govindan 0 2 6.263398 0.000000 4350 +forcontinu 0 2 6.263398 0.000000 4351 +formultimedia 0 2 6.263398 0.000000 4352 +zellweg 0 2 6.263398 0.000000 4353 +swinehart 0 2 6.263398 0.000000 4354 +etherphon 0 2 6.263398 0.000000 4355 +steinmetz 1 1 6.957497 6.957497 7493 +sitaram 1 1 6.957497 6.957497 7494 +coulson 1 1 6.957497 6.957497 7495 +descriptiongener 0 1 6.957497 0.000000 7496 +boththeoret 0 1 6.957497 0.000000 7497 +systemsupport 0 1 6.957497 0.000000 7498 +transportprotocol 0 1 6.957497 0.000000 7499 +designissu 0 1 6.957497 0.000000 7500 +textbooka 0 1 6.957497 0.000000 7501 +requirementsth 0 1 6.957497 0.000000 7502 +relatedpap 0 1 6.957497 0.000000 7503 +tounderstand 0 1 6.957497 0.000000 7504 +asemest 0 1 6.957497 0.000000 7505 +vintuesdai 0 1 6.957497 0.000000 7506 +assistantmr 0 1 6.957497 0.000000 7507 +eduread 0 1 6.957497 0.000000 7508 +cntain 0 1 6.957497 0.000000 7509 +theread 0 1 6.957497 0.000000 7510 +speedwai 0 1 6.957497 0.000000 7511 +dobi 0 1 6.957497 0.000000 7512 +mall 0 1 6.957497 0.000000 7513 +callthem 0 1 6.957497 0.000000 7514 +compressionr 0 1 6.957497 0.000000 7515 +wallac 0 1 6.957497 0.000000 7516 +gall 0 1 6.957497 0.000000 7517 +anastassi 0 1 6.957497 0.000000 7518 +digitaltelevis 0 1 6.957497 0.000000 7519 +serversoverview 0 1 6.957497 0.000000 7520 +serverdesign 0 1 6.957497 0.000000 7521 +chiueh 0 1 6.957497 0.000000 7522 +groupedsweep 0 1 6.957497 0.000000 7523 +ofthird 0 1 6.957497 0.000000 7524 +narasimha 0 1 6.957497 0.000000 7525 +wylli 0 1 6.957497 0.000000 7526 +admissioncontrol 0 1 6.957497 0.000000 7527 +designinglarg 0 1 6.957497 0.000000 7528 +inmultimedia 0 1 6.957497 0.000000 7529 +interactivevideo 0 1 6.957497 0.000000 7530 +playout 0 1 6.957497 0.000000 7531 +shahabuddin 0 1 6.957497 0.000000 7532 +foran 0 1 6.957497 0.000000 7533 +demandvideo 0 1 6.957497 0.000000 7534 +papadimitri 0 1 6.957497 0.000000 7535 +ramanathan 0 1 6.957497 0.000000 7536 +informationcach 0 1 6.957497 0.000000 7537 +homeentertain 0 1 6.957497 0.000000 7538 +multimedianetwork 0 1 6.957497 0.000000 7539 +ferrari 0 1 6.957497 0.000000 7540 +channelestablish 0 1 6.957497 0.000000 7541 +areasin 0 1 6.957497 0.000000 7542 +servicedisciplin 0 1 6.957497 0.000000 7543 +workshopon 0 1 6.957497 0.000000 7544 +losslesssmooth 0 1 6.957497 0.000000 7545 +salehi 0 1 6.957497 0.000000 7546 +kuros 0 1 6.957497 0.000000 7547 +towslei 0 1 6.957497 0.000000 7548 +storedvideo 0 1 6.957497 0.000000 7549 +requirementsthrough 0 1 6.957497 0.000000 7550 +grossglaus 0 1 6.957497 0.000000 7551 +rcbr 0 1 6.957497 0.000000 7552 +efficientservic 0 1 6.957497 0.000000 7553 +kanakia 0 1 6.957497 0.000000 7554 +reibman 0 1 6.957497 0.000000 7555 +congestioncontrol 0 1 6.957497 0.000000 7556 +tennenhous 0 1 6.957497 0.000000 7557 +newgener 0 1 6.957497 0.000000 7558 +hutchison 0 1 6.957497 0.000000 7559 +servicearchitectur 0 1 6.957497 0.000000 7560 +turner 0 1 6.957497 0.000000 7561 +reliablemulticast 0 1 6.957497 0.000000 7562 +levelfram 0 1 6.957497 0.000000 7563 +deffner 0 1 6.957497 0.000000 7564 +schulzrinn 0 1 6.957497 0.000000 7565 +blakowski 0 1 6.957497 0.000000 7566 +onselect 0 1 6.957497 0.000000 7567 +januaryoper 0 1 6.957497 0.000000 7568 +multimediag 0 1 6.957497 0.000000 7569 +robin 0 1 6.957497 0.000000 7570 +blair 0 1 6.957497 0.000000 7571 +papathoma 0 1 6.957497 0.000000 7572 +choru 0 1 6.957497 0.000000 7573 +druschel 0 1 6.957497 0.000000 7574 +abbott 0 1 6.957497 0.000000 7575 +pagel 0 1 6.957497 0.000000 7576 +systemssupport 0 1 6.957497 0.000000 7577 +conferencingh 0 1 6.957497 0.000000 7578 +venkatrangan 0 1 6.957497 0.000000 7579 +packetvideo 0 1 6.957497 0.000000 7580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..1eba3127 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,297 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +hour 0 165 1.791759 0.000000 46 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +technic 1 100 2.302585 2.302585 140 +advanc 0 99 2.302585 0.000000 130 +proceed 2 93 2.397895 4.795790 152 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +pictur 1 89 2.397895 2.397895 160 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +imag 1 91 2.397895 2.397895 161 +octob 0 89 2.397895 0.000000 156 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +novemb 1 81 2.484907 2.484907 179 +resourc 1 81 2.484907 2.484907 172 +build 0 85 2.484907 0.000000 184 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +symposium 1 72 2.639057 2.639057 238 +servic 0 72 2.639057 0.000000 236 +meet 0 72 2.639057 0.000000 229 +effici 0 73 2.639057 0.000000 233 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +multimedia 2 68 2.708050 5.416100 258 +receiv 0 66 2.708050 0.000000 244 +simul 0 66 2.708050 0.000000 255 +august 0 66 2.708050 0.000000 257 +descript 1 64 2.772589 2.772589 271 +interact 1 62 2.772589 2.772589 270 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +share 1 59 2.833213 2.833213 304 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +processor 1 54 2.944439 2.944439 335 +februari 0 54 2.944439 0.000000 328 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +textbook 1 44 3.135494 3.135494 397 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +fridai 0 44 3.135494 0.000000 390 +made 0 44 3.135494 0.000000 398 +discuss 0 45 3.135494 0.000000 399 +mechan 0 43 3.178054 0.000000 416 +http 0 41 3.218876 0.000000 420 +transact 1 39 3.258097 3.258097 438 +submit 0 39 3.258097 0.000000 440 +paul 0 38 3.295837 0.000000 471 +cost 0 37 3.332205 0.000000 480 +connect 0 37 3.332205 0.000000 485 +tree 1 36 3.367296 3.367296 492 +multi 0 36 3.367296 0.000000 493 +ofth 0 36 3.367296 0.000000 491 +queri 1 33 3.433987 3.433987 524 +articl 0 33 3.433987 0.000000 530 +concept 0 32 3.465736 0.000000 537 +collabor 0 32 3.465736 0.000000 543 +extend 0 32 3.465736 0.000000 539 +titl 0 31 3.496508 0.000000 556 +domain 0 30 3.555348 0.000000 564 +particip 0 29 3.583519 0.000000 589 +semant 0 29 3.583519 0.000000 587 +framework 0 28 3.610918 0.000000 606 +retriev 0 27 3.637586 0.000000 621 +berkelei 0 26 3.688879 0.000000 657 +reliabl 1 25 3.737670 3.737670 674 +scalabl 1 24 3.761200 3.761200 705 +frame 0 24 3.761200 0.000000 684 +mobil 0 23 3.806662 0.000000 730 +emphasi 0 22 3.850148 0.000000 755 +self 0 22 3.850148 0.000000 761 +color 0 22 3.850148 0.000000 762 +rout 2 21 3.912023 7.824046 793 +scheme 0 20 3.951244 0.000000 818 +boston 1 19 4.007333 4.007333 862 +prerequisit 0 19 4.007333 0.000000 846 +anderson 0 19 4.007333 0.000000 860 +media 0 19 4.007333 0.000000 861 +predict 0 19 4.007333 0.000000 855 +stand 0 18 4.060443 0.000000 891 +letter 0 16 4.174387 0.000000 981 +vector 0 16 4.174387 0.000000 961 +diego 0 16 4.174387 0.000000 992 +zhang 0 16 4.174387 0.000000 980 +latenc 0 16 4.174387 0.000000 993 +hierarch 0 15 4.248495 0.000000 1018 +francisco 1 14 4.317488 4.317488 1095 +audio 0 14 4.317488 0.000000 1094 +gupta 1 12 4.465908 4.465908 1241 +replic 0 12 4.465908 0.000000 1231 +shape 0 12 4.465908 0.000000 1245 +clock 0 11 4.553877 0.000000 1320 +packet 1 10 4.653960 4.653960 1415 +queue 0 10 4.653960 0.000000 1386 +sosp 0 10 4.653960 0.000000 1416 +familiar 0 9 4.753590 0.000000 1485 +inter 0 9 4.753590 0.000000 1530 +distanc 0 9 4.753590 0.000000 1500 +light 0 9 4.753590 0.000000 1533 +osdi 0 9 4.753590 0.000000 1534 +face 0 9 4.753590 0.000000 1501 +paradigm 1 8 4.875197 4.875197 1662 +transport 0 8 4.875197 0.000000 1672 +parti 0 8 4.875197 0.000000 1676 +textur 0 8 4.875197 0.000000 1677 +conferenc 1 7 5.010635 5.010635 1857 +harrick 1 7 5.010635 5.010635 1849 +core 0 7 5.010635 0.000000 1809 +channel 0 7 5.010635 0.000000 1836 +determinist 0 6 5.164786 0.000000 2034 +onoper 0 6 5.164786 0.000000 2048 +multicast 2 5 5.347108 10.694216 2305 +sigcomm 2 5 5.347108 10.694216 2329 +jain 1 5 5.347108 5.347108 2332 +fair 0 5 5.347108 0.000000 2333 +consum 0 5 5.347108 0.000000 2334 +multimediasystem 0 4 5.568345 0.000000 2701 +venkat 0 4 5.568345 0.000000 2702 +andevalu 0 4 5.568345 0.000000 2706 +floyd 0 4 5.568345 0.000000 2682 +theacm 0 4 5.568345 0.000000 2698 +ofinform 0 4 5.568345 0.000000 2707 +bach 0 4 5.568345 0.000000 2708 +mccann 1 3 5.857933 5.857933 3273 +cheriton 1 3 5.857933 5.857933 3259 +infocom 1 3 5.857933 5.857933 3283 +weihl 1 3 5.857933 5.857933 3284 +synopsi 0 3 5.857933 0.000000 3260 +networkprotocol 0 3 5.857933 0.000000 3285 +requirementsstud 0 3 5.857933 0.000000 3116 +aswel 0 3 5.857933 0.000000 3286 +kistler 0 3 5.857933 0.000000 3267 +rangan 0 3 5.857933 0.000000 3270 +franci 0 3 5.857933 0.000000 3287 +singhal 0 3 5.857933 0.000000 3098 +axiomat 0 3 5.857933 0.000000 3288 +how 0 3 5.857933 0.000000 3289 +moran 0 3 5.857933 0.000000 3151 +nguyen 0 3 5.857933 0.000000 3290 +nearbi 0 3 5.857933 0.000000 3291 +ofoper 0 3 5.857933 0.000000 3292 +proport 0 3 5.857933 0.000000 3293 +qbic 0 3 5.857933 0.000000 3294 +acmmultimedia 1 2 6.263398 6.263398 4338 +jacobson 1 2 6.263398 6.263398 4324 +deer 1 2 6.263398 6.263398 4356 +jeffai 1 2 6.263398 6.263398 4357 +prerequisitesgradu 0 2 6.263398 0.000000 4325 +formultimedia 0 2 6.263398 0.000000 4352 +andresearch 0 2 6.263398 0.000000 4247 +theinstructor 0 2 6.263398 0.000000 4298 +studentsenrol 0 2 6.263398 0.000000 4073 +redel 0 2 6.263398 0.000000 4358 +zellweg 0 2 6.263398 0.000000 4353 +swinehart 0 2 6.263398 0.000000 4354 +etherphon 0 2 6.263398 0.000000 4355 +lan 0 2 6.263398 0.000000 4359 +computersystem 0 2 6.263398 0.000000 4360 +mbone 0 2 6.263398 0.000000 4361 +shenker 0 2 6.263398 0.000000 4340 +acmsigcomm 0 2 6.263398 0.000000 4345 +resourcemanag 0 2 6.263398 0.000000 4266 +govindan 0 2 6.263398 0.000000 4350 +forcontinu 0 2 6.263398 0.000000 4351 +monterei 0 2 6.263398 0.000000 4362 +timeoper 0 2 6.263398 0.000000 4363 +niblack 0 2 6.263398 0.000000 4364 +managementsystem 0 2 6.263398 0.000000 4365 +knowledgeand 0 2 6.263398 0.000000 4366 +onveri 0 2 6.263398 0.000000 4367 +crowcroft 1 1 6.957497 6.957497 7581 +warldersburg 1 1 6.957497 6.957497 7582 +synopsisthi 0 1 6.957497 0.000000 7583 +bediscuss 0 1 6.957497 0.000000 7584 +andmultimedia 0 1 6.957497 0.000000 7585 +multimediadatabas 0 1 6.957497 0.000000 7586 +determinedbas 0 1 6.957497 0.000000 7587 +orcarri 0 1 6.957497 0.000000 7588 +hoursfridai 0 1 6.957497 0.000000 7589 +flexibleframework 0 1 6.957497 0.000000 7590 +handlei 0 1 6.957497 0.000000 7591 +wakeman 0 1 6.957497 0.000000 7592 +controlchannel 0 1 6.957497 0.000000 7593 +cccp 0 1 6.957497 0.000000 7594 +conferencecontrol 0 1 6.957497 0.000000 7595 +gajewska 0 1 6.957497 0.000000 7596 +manass 0 1 6.957497 0.000000 7597 +argo 0 1 6.957497 0.000000 7598 +systemfor 0 1 6.957497 0.000000 7599 +gong 0 1 6.957497 0.000000 7600 +multipoint 0 1 6.957497 0.000000 7601 +basedmultimedia 0 1 6.957497 0.000000 7602 +ieeecomput 0 1 6.957497 0.000000 7603 +datagraminternetwork 0 1 6.957497 0.000000 7604 +ballardi 0 1 6.957497 0.000000 7605 +thyagarajan 0 1 6.957497 0.000000 7606 +widyono 0 1 6.957497 0.000000 7607 +msthesi 0 1 6.957497 0.000000 7608 +kompella 0 1 6.957497 0.000000 7609 +pasqual 0 1 6.957497 0.000000 7610 +polyzo 0 1 6.957497 0.000000 7611 +multimediacommun 0 1 6.957497 0.000000 7612 +weightsess 0 1 6.957497 0.000000 7613 +ofacm 0 1 6.957497 0.000000 7614 +holbrook 0 1 6.957497 0.000000 7615 +fordistribut 0 1 6.957497 0.000000 7616 +herzog 0 1 6.957497 0.000000 7617 +estrin 0 1 6.957497 0.000000 7618 +timecommun 0 1 6.957497 0.000000 7619 +servicesj 0 1 6.957497 0.000000 7620 +guyton 0 1 6.957497 0.000000 7621 +schwartz 0 1 6.957497 0.000000 7622 +mogul 0 1 6.957497 0.000000 7623 +forpersist 0 1 6.957497 0.000000 7624 +supportc 0 1 6.957497 0.000000 7625 +lotteri 0 1 6.957497 0.000000 7626 +flexibleproport 0 1 6.957497 0.000000 7627 +mangement 0 1 6.957497 0.000000 7628 +strideschedul 0 1 6.957497 0.000000 7629 +golestani 0 1 6.957497 0.000000 7630 +speedappl 0 1 6.957497 0.000000 7631 +timeproduc 0 1 6.957497 0.000000 7632 +ofeffici 0 1 6.957497 0.000000 7633 +sigapp 0 1 6.957497 0.000000 7634 +intim 0 1 6.957497 0.000000 7635 +databasesw 0 1 6.957497 0.000000 7636 +contentus 0 1 6.957497 0.000000 7637 +cawkel 0 1 6.957497 0.000000 7638 +weymouth 0 1 6.957497 0.000000 7639 +vimsi 0 1 6.957497 0.000000 7640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..a380a754 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 0 329 1.098612 0.000000 16 +updat 1 191 1.609438 1.609438 41 +fall 1 181 1.609438 1.609438 40 +algorithm 1 162 1.791759 1.791759 57 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +problem 1 147 1.945910 1.945910 75 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +number 0 130 2.079442 0.000000 97 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +instructor 0 108 2.197225 0.000000 107 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +question 1 91 2.397895 2.397895 141 +exam 1 86 2.484907 2.484907 169 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +decemb 1 80 2.564949 2.564949 215 +refer 0 78 2.564949 0.000000 203 +mondai 0 77 2.564949 0.000000 206 +receiv 1 66 2.708050 2.708050 244 +differ 0 66 2.708050 0.000000 253 +handout 1 64 2.772589 2.772589 263 +descript 0 64 2.772589 0.000000 271 +think 0 57 2.890372 0.000000 314 +instruct 0 53 2.944439 0.000000 332 +case 0 51 2.995732 0.000000 351 +right 1 48 3.044522 3.044522 363 +set 0 50 3.044522 0.000000 361 +answer 1 45 3.135494 3.135494 391 +fridai 0 44 3.135494 0.000000 390 +littl 0 39 3.258097 0.000000 454 +respons 0 37 3.332205 0.000000 476 +word 0 34 3.401197 0.000000 508 +either 0 35 3.401197 0.000000 506 +chapter 0 32 3.465736 0.000000 536 +quot 0 29 3.583519 0.000000 582 +known 0 24 3.761200 0.000000 702 +size 1 23 3.806662 3.806662 713 +inth 0 22 3.850148 0.000000 741 +sent 0 22 3.850148 0.000000 763 +cycl 1 11 4.553877 4.553877 1335 +true 1 10 4.653960 4.653960 1422 +sentenc 1 10 4.653960 4.653960 1413 +total 1 10 4.653960 4.653960 1398 +label 1 10 4.653960 4.653960 1423 +equal 0 10 4.653960 0.000000 1424 +pose 0 9 4.753590 0.000000 1535 +face 0 9 4.753590 0.000000 1501 +entri 0 8 4.875197 0.000000 1678 +largest 1 7 5.010635 5.010635 1858 +meant 0 6 5.164786 0.000000 2055 +vertic 1 5 5.347108 5.347108 2270 +worst 0 5 5.347108 0.000000 2287 +vijaya 0 4 5.568345 0.000000 2677 +disjoint 0 4 5.568345 0.000000 2709 +denot 0 3 5.857933 0.000000 3147 +omega 1 2 6.263398 6.263398 4368 +sigma 0 2 6.263398 0.000000 4369 +amort 0 2 6.263398 0.000000 4370 +submatrix 1 1 6.957497 6.957497 7641 +ramachandranuniqu 0 1 6.957497 0.000000 7642 +onsigma 0 1 6.957497 0.000000 7643 +oroth 0 1 6.957497 0.000000 7644 +isther 0 1 6.957497 0.000000 7645 +paragraphof 0 1 6.957497 0.000000 7646 +containdistinct 0 1 6.957497 0.000000 7647 +cancontain 0 1 6.957497 0.000000 7648 +unclear 0 1 6.957497 0.000000 7649 +somek 0 1 6.957497 0.000000 7650 +bepost 0 1 6.957497 0.000000 7651 +youhav 0 1 6.957497 0.000000 7652 +yourbest 0 1 6.957497 0.000000 7653 +judgment 0 1 6.957497 0.000000 7654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..f74446db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,228 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +us 2 329 1.098612 2.197224 16 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 0 201 1.609438 0.000000 39 +implement 2 152 1.791759 3.583518 52 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +data 0 170 1.791759 0.000000 49 +object 2 138 1.945910 3.891820 79 +note 2 142 1.945910 3.891820 67 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +construct 1 139 1.945910 1.945910 82 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +machin 2 129 2.079442 4.158884 95 +document 1 121 2.079442 2.079442 89 +version 2 113 2.197225 4.394450 122 +code 1 108 2.197225 2.197225 116 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +pictur 1 89 2.397895 2.397895 160 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +stuff 0 87 2.484907 0.000000 171 +help 0 83 2.484907 0.000000 175 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +homework 1 79 2.564949 2.564949 193 +orient 1 80 2.564949 2.564949 205 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +logic 0 71 2.639057 0.000000 230 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +syllabu 0 67 2.708050 0.000000 247 +interact 0 62 2.772589 0.000000 270 +simpl 1 60 2.833213 2.833213 298 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +unix 1 58 2.890372 2.890372 308 +browser 1 56 2.890372 2.890372 313 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +cover 1 55 2.944439 2.944439 329 +suggest 0 53 2.944439 0.000000 331 +allow 0 53 2.944439 0.000000 333 +three 0 54 2.944439 0.000000 330 +run 2 51 2.995732 5.991464 347 +standard 1 48 3.044522 3.044522 365 +format 0 48 3.044522 0.000000 356 +get 1 46 3.091042 3.091042 380 +featur 0 46 3.091042 0.000000 386 +answer 1 45 3.135494 3.135494 391 +describ 1 45 3.135494 3.135494 400 +mark 1 44 3.135494 3.135494 403 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +show 0 43 3.178054 0.000000 417 +might 1 41 3.218876 3.218876 426 +past 0 42 3.218876 0.000000 428 +error 1 40 3.258097 3.258097 449 +tutori 0 39 3.258097 0.000000 437 +author 0 39 3.258097 0.000000 450 +littl 0 39 3.258097 0.000000 454 +correct 0 38 3.295837 0.000000 462 +especi 1 36 3.367296 3.367296 496 +procedur 0 36 3.367296 0.000000 488 +chapter 1 32 3.465736 3.465736 536 +express 0 32 3.465736 0.000000 540 +kind 0 32 3.465736 0.000000 541 +someth 1 31 3.496508 3.496508 554 +anim 0 31 3.496508 0.000000 557 +abl 1 30 3.555348 3.555348 566 +actual 0 28 3.610918 0.000000 604 +linux 1 27 3.637586 3.637586 631 +comp 1 26 3.688879 3.688879 650 +subject 0 26 3.688879 0.000000 647 +rather 0 26 3.688879 0.000000 642 +brows 1 23 3.806662 3.806662 726 +recommend 1 22 3.850148 3.850148 737 +instal 1 22 3.850148 3.850148 754 +sort 0 22 3.850148 0.000000 738 +self 0 22 3.850148 0.000000 761 +newsgroup 1 21 3.912023 3.912023 783 +theorem 0 21 3.912023 0.000000 786 +scheme 3 20 3.951244 11.853732 818 +sure 1 20 3.951244 3.951244 813 +definit 0 19 4.007333 0.000000 864 +lot 1 18 4.060443 4.060443 889 +along 0 18 4.060443 0.000000 878 +repositori 2 17 4.110874 8.221748 932 +regular 0 17 4.110874 0.000000 929 +quiz 1 16 4.174387 4.174387 990 +explan 1 16 4.174387 4.174387 985 +advantag 1 16 4.174387 4.174387 987 +later 1 15 4.248495 4.248495 1043 +pagec 0 15 4.248495 0.000000 1011 +goe 0 15 4.248495 0.000000 1044 +command 1 14 4.317488 4.317488 1083 +convent 0 14 4.317488 0.000000 1072 +draw 0 14 4.317488 0.000000 1086 +essenti 0 13 4.382027 0.000000 1137 +prolog 0 13 4.382027 0.000000 1155 +solari 1 12 4.465908 4.465908 1238 +reader 0 12 4.465908 0.000000 1246 +calculu 0 12 4.465908 0.000000 1203 +rice 1 11 4.553877 4.553877 1336 +see 0 11 4.553877 0.000000 1337 +instanc 0 11 4.553877 0.000000 1322 +subset 0 10 4.653960 0.000000 1425 +wilson 1 9 4.753590 4.753590 1536 +classifi 0 9 4.753590 0.000000 1537 +latter 0 9 4.753590 0.000000 1522 +illustr 1 8 4.875197 4.875197 1679 +marc 1 8 4.875197 4.875197 1680 +besid 1 8 4.875197 4.875197 1681 +reload 0 8 4.875197 0.000000 1682 +prover 0 8 4.875197 0.000000 1653 +feelei 1 7 5.010635 5.010635 1859 +sparc 1 7 5.010635 5.010635 1860 +bunch 1 7 5.010635 5.010635 1861 +exactli 0 7 5.010635 0.000000 1817 +merg 0 7 5.010635 0.000000 1862 +whichi 1 6 5.164786 5.164786 2056 +indiana 1 6 5.164786 5.164786 2057 +grammar 0 6 5.164786 0.000000 2058 +lang 1 5 5.347108 5.347108 2294 +default 1 5 5.347108 5.347108 2335 +pagethi 0 5 5.347108 0.000000 2336 +button 0 5 5.347108 0.000000 2337 +ahead 0 5 5.347108 0.000000 2338 +hardcopi 0 5 5.347108 0.000000 2246 +proposit 0 5 5.347108 0.000000 2339 +patch 1 4 5.568345 5.568345 2710 +devot 1 4 5.568345 5.568345 2711 +backward 0 4 5.568345 0.000000 2638 +chain 0 4 5.568345 0.000000 2712 +rscheme 2 3 5.857933 11.715866 3250 +inherit 1 3 5.857933 5.857933 3122 +qing 1 3 5.857933 5.857933 3295 +gambit 1 3 5.857933 5.857933 3227 +outof 0 3 5.857933 0.000000 3296 +ters 0 3 5.857933 0.000000 3297 +theoremprov 0 3 5.857933 0.000000 3298 +donovan 1 2 6.263398 6.263398 4371 +kolbl 1 2 6.263398 6.263398 4372 +youcan 1 2 6.263398 6.263398 4373 +indent 0 2 6.263398 0.000000 4374 +subtyp 0 2 6.263398 0.000000 4375 +meroon 1 1 6.957497 6.957497 7655 +runschem 1 1 6.957497 6.957497 7656 +orani 1 1 6.957497 6.957497 7657 +andinstal 1 1 6.957497 6.957497 7658 +itfrom 1 1 6.957497 6.957497 7659 +friendlier 1 1 6.957497 6.957497 7660 +fornewbi 1 1 6.957497 6.957497 7661 +gettinggambit 1 1 6.957497 6.957497 7662 +bestschem 1 1 6.957497 6.957497 7663 +guil 1 1 6.957497 6.957497 7664 +mzscheme 1 1 6.957497 6.957497 7665 +doingobject 1 1 6.957497 6.957497 7666 +tous 1 1 6.957497 6.957497 7667 +freeimplement 1 1 6.957497 6.957497 7668 +getinterest 1 1 6.957497 6.957497 7669 +paulwilson 0 1 6.957497 0.000000 7670 +yourbrows 0 1 6.957497 0.000000 7671 +mostrec 0 1 6.957497 0.000000 7672 +ondeclar 0 1 6.957497 0.000000 7673 +arereason 0 1 6.957497 0.000000 7674 +willchang 0 1 6.957497 0.000000 7675 +islik 0 1 6.957497 0.000000 7676 +adventur 0 1 6.957497 0.000000 7677 +usinga 0 1 6.957497 0.000000 7678 +throughchapt 0 1 6.957497 0.000000 7679 +sanoth 0 1 6.957497 0.000000 7680 +thanprint 0 1 6.957497 0.000000 7681 +weget 0 1 6.957497 0.000000 7682 +onlinebrows 0 1 6.957497 0.000000 7683 +coursenot 0 1 6.957497 0.000000 7684 +miscellanousfunct 0 1 6.957497 0.000000 7685 +shouldconsult 0 1 6.957497 0.000000 7686 +itsens 0 1 6.957497 0.000000 7687 +andnot 0 1 6.957497 0.000000 7688 +setofrul 0 1 6.957497 0.000000 7689 +ofanim 0 1 6.957497 0.000000 7690 +simpleobject 0 1 6.957497 0.000000 7691 +metaclass 0 1 6.957497 0.000000 7692 +circular 0 1 6.957497 0.000000 7693 +onclass 0 1 6.957497 0.000000 7694 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..97a63254 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +hour 0 165 1.791759 0.000000 46 +texa 0 160 1.791759 0.000000 64 +file 2 132 1.945910 3.891820 70 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +like 0 132 1.945910 0.000000 81 +assign 0 135 1.945910 0.000000 66 +postscript 2 131 2.079442 4.158884 90 +welcom 0 122 2.079442 0.000000 99 +provid 0 121 2.079442 0.000000 94 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +homepag 0 93 2.397895 0.000000 148 +solut 2 82 2.484907 4.969814 162 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +homework 2 79 2.564949 5.129898 193 +sourc 0 77 2.564949 0.000000 201 +mondai 0 77 2.564949 0.000000 206 +tuesdai 1 73 2.639057 2.639057 219 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +view 0 70 2.708050 0.000000 254 +new 1 64 2.772589 2.772589 262 +guid 0 63 2.772589 0.000000 267 +locat 0 59 2.833213 0.000000 303 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +found 0 53 2.944439 0.000000 337 +tabl 0 51 2.995732 0.000000 346 +maintain 0 51 2.995732 0.000000 342 +format 1 48 3.044522 3.044522 356 +possibl 0 47 3.091042 0.000000 378 +midterm 1 45 3.135494 3.135494 392 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +review 1 42 3.218876 3.218876 425 +announc 0 40 3.258097 0.000000 441 +realli 0 40 3.258097 0.000000 444 +slide 1 38 3.295837 3.295837 467 +sciencesunivers 0 37 3.332205 0.000000 486 +download 0 36 3.367296 0.000000 489 +print 1 34 3.401197 3.401197 503 +taught 0 33 3.433987 0.000000 526 +linux 0 27 3.637586 0.000000 631 +session 1 26 3.688879 3.688879 643 +half 0 21 3.912023 0.000000 776 +wang 0 21 3.912023 0.000000 790 +exercis 0 19 4.007333 0.000000 842 +weekli 1 17 4.110874 4.110874 919 +adam 0 17 4.110874 0.000000 934 +modif 0 17 4.110874 0.000000 913 +georg 0 16 4.174387 0.000000 994 +score 0 15 4.248495 0.000000 1017 +station 1 13 4.382027 4.382027 1157 +kumar 1 9 4.753590 4.753590 1506 +surpris 0 7 5.010635 0.000000 1828 +tip 0 7 5.010635 0.000000 1863 +ajit 1 3 5.857933 5.857933 3299 +feng 1 3 5.857933 5.857933 3300 +warren 0 3 5.857933 0.000000 3301 +edudepart 0 3 5.857933 0.000000 3302 +xfeng 1 2 6.263398 6.263398 4376 +natarajan 0 2 6.263398 0.000000 4377 +decimalinteg 1 1 6.957497 6.957497 7695 +hexinteg 1 1 6.957497 6.957497 7696 +octalinteg 1 1 6.957497 6.957497 7697 +xunnow 0 1 6.957497 0.000000 7698 +homeworksreview 0 1 6.957497 0.000000 7699 +slidesth 0 1 6.957497 0.000000 7700 +onlineif 0 1 6.957497 0.000000 7701 +updatedhomework 0 1 6.957497 0.000000 7702 +filemidterm 0 1 6.957497 0.000000 7703 +webta 0 1 6.957497 0.000000 7704 +timetableta 0 1 6.957497 0.000000 7705 +guana 0 1 6.957497 0.000000 7706 +eduxun 0 1 6.957497 0.000000 7707 +wordlist 0 1 6.957497 0.000000 7708 +wwang 0 1 6.957497 0.000000 7709 +afternoon 0 1 6.957497 0.000000 7710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..a3308afa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +softwar 0 220 1.386294 0.000000 30 +utexa 1 189 1.609438 1.609438 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +group 0 183 1.609438 0.000000 36 +hour 1 165 1.791759 1.791759 46 +contact 1 153 1.791759 1.791759 59 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +section 0 94 2.397895 0.000000 149 +comment 0 93 2.397895 0.000000 146 +solut 1 82 2.484907 2.484907 162 +resourc 0 81 2.484907 0.000000 172 +syllabu 0 67 2.708050 0.000000 247 +august 0 66 2.708050 0.000000 257 +new 0 64 2.772589 0.000000 262 +creat 0 63 2.772589 0.000000 277 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +station 1 13 4.382027 4.382027 1157 +classmat 0 9 4.753590 0.000000 1516 +yang 1 8 4.875197 4.875197 1652 +pagei 0 8 4.875197 0.000000 1683 +schwetman 1 1 6.957497 6.957497 7711 +mesquit 1 1 6.957497 6.957497 7712 +yangyang 1 1 6.957497 6.957497 7713 +herb 0 1 6.957497 0.000000 7714 +appointmentcontact 0 1 6.957497 0.000000 7715 +statisticsassign 0 1 6.957497 0.000000 7716 +asga 0 1 6.957497 0.000000 7717 +statisticsyour 0 1 6.957497 0.000000 7718 +gradesect 0 1 6.957497 0.000000 7719 +microsparc 0 1 6.957497 0.000000 7720 +datasheetonlin 0 1 6.957497 0.000000 7721 +ruiliu 0 1 6.957497 0.000000 7722 +postmessag 0 1 6.957497 0.000000 7723 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..92db35ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +file 0 132 1.945910 0.000000 70 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +schedul 1 119 2.079442 2.079442 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +grade 0 90 2.397895 0.000000 142 +present 0 91 2.397895 0.000000 145 +internet 0 83 2.484907 0.000000 186 +solut 0 82 2.484907 0.000000 162 +info 0 85 2.484907 0.000000 176 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +java 0 70 2.708050 0.000000 248 +handout 2 64 2.772589 5.545178 263 +descript 0 64 2.772589 0.000000 271 +sampl 0 53 2.944439 0.000000 339 +protocol 1 45 3.135494 3.135494 407 +http 1 41 3.218876 3.218876 420 +tutori 0 39 3.258097 0.000000 437 +platform 0 29 3.583519 0.000000 591 +turn 0 29 3.583519 0.000000 586 +comp 0 26 3.688879 0.000000 650 +background 0 25 3.737670 0.000000 664 +mobil 0 23 3.806662 0.000000 730 +newsgroup 1 21 3.912023 3.912023 783 +rout 0 21 3.912023 0.000000 793 +alloc 0 20 3.951244 0.000000 821 +prerequisit 0 19 4.007333 0.000000 846 +configur 0 15 4.248495 0.000000 1012 +draft 1 14 4.317488 4.317488 1085 +station 0 13 4.382027 0.000000 1157 +individu 0 13 4.382027 0.000000 1126 +ring 0 8 4.875197 0.000000 1684 +digest 0 7 5.010635 0.000000 1864 +multicast 0 5 5.347108 0.000000 2305 +authent 0 5 5.347108 0.000000 2306 +edufing 0 4 5.568345 0.000000 2713 +csnet 1 1 6.957497 6.957497 7724 +wensdai 0 1 6.957497 0.000000 7725 +netsim 0 1 6.957497 0.000000 7726 +corejava 0 1 6.957497 0.000000 7727 +fengyufeng 0 1 6.957497 0.000000 7728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..b0879e6b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +read 1 154 1.791759 1.791759 47 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +document 1 121 2.079442 2.079442 89 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +topic 0 114 2.197225 0.000000 110 +techniqu 0 99 2.302585 0.000000 138 +follow 0 92 2.397895 0.000000 143 +help 1 83 2.484907 2.484907 175 +wide 0 84 2.484907 0.000000 185 +chang 0 82 2.484907 0.000000 163 +resourc 0 81 2.484907 0.000000 172 +homework 1 79 2.564949 2.564949 193 +sourc 0 77 2.564949 0.000000 201 +html 0 75 2.639057 0.000000 235 +degre 1 69 2.708050 2.708050 259 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +colleg 1 61 2.833213 2.833213 300 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +visitor 0 49 3.044522 0.000000 371 +basic 0 50 3.044522 0.000000 360 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +offer 1 43 3.178054 3.178054 414 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +art 0 29 3.583519 0.000000 593 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +accur 0 25 3.737670 0.000000 680 +item 0 19 4.007333 0.000000 856 +hypertext 0 19 4.007333 0.000000 865 +offici 0 18 4.060443 0.000000 894 +charact 0 15 4.248495 0.000000 1028 +reprint 0 14 4.317488 0.000000 1097 +hypermedia 0 12 4.465908 0.000000 1247 +duli 0 12 4.465908 0.000000 1248 +regularli 0 11 4.553877 0.000000 1338 +nonprofit 0 11 4.553877 0.000000 1339 +mosaic 1 10 4.653960 4.653960 1426 +engr 1 10 4.653960 4.653960 1427 +weld 0 9 4.753590 0.000000 1538 +departmentof 0 9 4.753590 0.000000 1539 +uniform 0 7 5.010635 0.000000 1845 +markup 0 6 5.164786 0.000000 2059 +highlight 0 5 5.347108 0.000000 2340 +foracadem 0 5 5.347108 0.000000 2341 +whichcontain 0 4 5.568345 0.000000 2714 +bounti 0 4 5.568345 0.000000 2715 +mathematica 0 3 5.857933 0.000000 3303 +quotedand 0 3 5.857933 0.000000 3304 +quarterwelcom 0 2 6.263398 0.000000 4378 +thatthi 0 2 6.263398 0.000000 4379 +addedfrequ 0 2 6.263398 0.000000 4380 +personnel 0 2 6.263398 0.000000 4381 +mvi 0 2 6.263398 0.000000 4382 +usinglynx 0 2 6.263398 0.000000 4383 +pageclick 0 1 6.957497 0.000000 7729 +gradesoth 0 1 6.957497 0.000000 7730 +browserport 0 1 6.957497 0.000000 7731 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..82911817 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +document 1 121 2.079442 2.079442 89 +studi 1 120 2.079442 2.079442 91 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +place 0 106 2.197225 0.000000 124 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +activ 1 84 2.484907 2.484907 182 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +info 0 85 2.484907 0.000000 176 +messag 1 76 2.564949 2.564949 212 +homework 0 79 2.564949 0.000000 193 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +onlin 0 75 2.639057 0.000000 223 +degre 1 69 2.708050 2.708050 259 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +guid 1 63 2.772589 2.772589 267 +previou 0 62 2.772589 0.000000 290 +colleg 1 61 2.833213 2.833213 300 +think 1 57 2.890372 2.890372 314 +special 0 56 2.890372 0.000000 320 +summer 0 56 2.890372 0.000000 311 +major 0 56 2.890372 0.000000 315 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +particular 0 51 2.995732 0.000000 352 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +adapt 0 46 3.091042 0.000000 387 +netscap 1 44 3.135494 3.135494 395 +keep 0 44 3.135494 0.000000 409 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +offer 1 43 3.178054 3.178054 414 +might 0 41 3.218876 0.000000 426 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +staff 0 36 3.367296 0.000000 490 +winter 0 36 3.367296 0.000000 500 +copyright 0 36 3.367296 0.000000 495 +word 0 34 3.401197 0.000000 508 +board 0 33 3.433987 0.000000 528 +ad 0 32 3.465736 0.000000 544 +autumn 1 31 3.496508 3.496508 558 +art 0 29 3.583519 0.000000 593 +consid 0 29 3.583519 0.000000 590 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +miscellan 0 23 3.806662 0.000000 731 +martin 0 21 3.912023 0.000000 794 +item 0 19 4.007333 0.000000 856 +demo 0 18 4.060443 0.000000 888 +less 0 18 4.060443 0.000000 892 +previous 0 17 4.110874 0.000000 923 +whole 0 17 4.110874 0.000000 940 +match 0 16 4.174387 0.000000 965 +portion 0 16 4.174387 0.000000 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 0 14 4.317488 0.000000 1097 +earlier 0 13 4.382027 0.000000 1140 +menu 0 13 4.382027 0.000000 1156 +hypermedia 0 12 4.465908 0.000000 1247 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +regularli 0 11 4.553877 0.000000 1338 +instanc 0 11 4.553877 0.000000 1322 +nonprofit 0 11 4.553877 0.000000 1339 +engr 1 10 4.653960 4.653960 1427 +debugg 0 9 4.753590 0.000000 1493 +departmentof 0 9 4.753590 0.000000 1539 +tip 1 7 5.010635 5.010635 1863 +documentfor 0 7 5.010635 0.000000 1865 +theclass 0 6 5.164786 0.000000 2060 +handbook 0 6 5.164786 0.000000 2061 +newinform 0 5 5.347108 0.000000 2342 +highlight 0 5 5.347108 0.000000 2340 +mac 0 5 5.347108 0.000000 2292 +bulletin 0 5 5.347108 0.000000 2343 +foracadem 0 5 5.347108 0.000000 2341 +bounti 0 4 5.568345 0.000000 2715 +insensit 0 4 5.568345 0.000000 2716 +tompa 0 3 5.857933 0.000000 3305 +preview 0 3 5.857933 0.000000 3306 +quotedand 0 3 5.857933 0.000000 3304 +raini 1 2 6.263398 6.263398 4384 +intact 0 2 6.263398 0.000000 4385 +nonmajor 0 2 6.263398 0.000000 4386 +itemsund 0 2 6.263398 0.000000 4387 +balloon 0 2 6.263398 0.000000 4388 +dugan 0 1 6.957497 0.000000 7732 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..a384726e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +avail 0 169 1.791759 0.000000 48 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +spring 0 131 2.079442 0.000000 88 +place 1 106 2.197225 2.197225 124 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +comment 0 93 2.397895 0.000000 146 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +refer 0 78 2.564949 0.000000 203 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +syllabu 0 67 2.708050 0.000000 247 +guid 0 63 2.772589 0.000000 267 +colleg 1 61 2.833213 2.833213 300 +summer 1 56 2.890372 2.890372 311 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +week 1 52 2.995732 2.995732 343 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +textbook 0 44 3.135494 0.000000 397 +examin 0 42 3.218876 0.000000 424 +might 0 41 3.218876 0.000000 426 +tutori 0 39 3.258097 0.000000 437 +slide 1 38 3.295837 3.295837 467 +short 0 36 3.367296 0.000000 499 +staff 0 36 3.367296 0.000000 490 +winter 0 36 3.367296 0.000000 500 +kind 0 32 3.465736 0.000000 541 +autumn 1 31 3.496508 3.496508 558 +richard 0 31 3.496508 0.000000 559 +art 0 29 3.583519 0.000000 593 +notic 0 25 3.737670 0.000000 675 +lab 0 24 3.761200 0.000000 698 +brows 0 23 3.806662 0.000000 726 +sort 0 22 3.850148 0.000000 738 +martin 0 21 3.912023 0.000000 794 +watch 0 21 3.912023 0.000000 789 +item 0 19 4.007333 0.000000 856 +demo 0 18 4.060443 0.000000 888 +less 0 18 4.060443 0.000000 892 +macintosh 0 17 4.110874 0.000000 920 +previous 0 17 4.110874 0.000000 923 +webmast 1 15 4.248495 4.248495 1045 +earlier 0 13 4.382027 0.000000 1140 +web 0 12 4.465908 0.000000 1249 +engr 1 10 4.653960 4.653960 1427 +invit 0 10 4.653960 0.000000 1428 +informationabout 0 9 4.753590 0.000000 1515 +andth 0 9 4.753590 0.000000 1481 +tip 0 7 5.010635 0.000000 1863 +ladner 0 6 5.164786 0.000000 2062 +highlight 0 5 5.347108 0.000000 2340 +bounti 0 4 5.568345 0.000000 2715 +moreinform 0 3 5.857933 0.000000 3307 +dickei 0 2 6.263398 0.000000 4389 +nonmajor 0 2 6.263398 0.000000 4386 +hypermediadocu 0 1 6.957497 0.000000 7733 +schedulesth 0 1 6.957497 0.000000 7734 +glanceweek 0 1 6.957497 0.000000 7735 +schedulecomput 0 1 6.957497 0.000000 7736 +includinglab 0 1 6.957497 0.000000 7737 +andta 0 1 6.957497 0.000000 7738 +audiofrom 0 1 6.957497 0.000000 7739 +midtermand 0 1 6.957497 0.000000 7740 +originallyschedul 0 1 6.957497 0.000000 7741 +andtim 0 1 6.957497 0.000000 7742 +usingth 0 1 6.957497 0.000000 7743 +intactand 0 1 6.957497 0.000000 7744 +forinst 0 1 6.957497 0.000000 7745 +andrel 0 1 6.957497 0.000000 7746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..03a3884c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +phone 0 175 1.791759 0.000000 45 +assign 2 135 1.945910 3.891820 66 +spring 1 131 2.079442 2.079442 88 +postscript 0 131 2.079442 0.000000 90 +seattl 0 120 2.079442 0.000000 103 +structur 0 106 2.197225 0.000000 105 +teach 0 108 2.197225 0.000000 112 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +novemb 0 81 2.484907 0.000000 179 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 1 64 2.772589 2.772589 261 +handout 0 64 2.772589 0.000000 263 +previou 0 62 2.772589 0.000000 290 +browser 0 56 2.890372 0.000000 313 +sampl 0 53 2.944439 0.000000 339 +appoint 0 49 3.044522 0.000000 358 +set 0 50 3.044522 0.000000 361 +midterm 1 45 3.135494 3.135494 392 +winter 0 36 3.367296 0.000000 500 +autumn 1 31 3.496508 3.496508 558 +defin 0 22 3.850148 0.000000 746 +edulast 0 17 4.110874 0.000000 927 +discret 0 13 4.382027 0.000000 1165 +johnson 0 13 4.382027 0.000000 1162 +recurs 0 13 4.382027 0.000000 1127 +loew 0 12 4.465908 0.000000 1252 +reader 0 12 4.465908 0.000000 1246 +web 0 12 4.465908 0.000000 1249 +induct 0 11 4.553877 0.000000 1304 +leveson 0 9 4.753590 0.000000 1540 +acrobat 1 6 5.164786 5.164786 2063 +beam 1 5 5.347108 5.347108 2344 +karp 0 5 5.347108 0.000000 2284 +ruzzo 0 5 5.347108 0.000000 2345 +nowitz 1 2 6.263398 6.263398 4390 +ofyour 0 2 6.263398 0.000000 4063 +instructorpaul 0 1 6.957497 0.000000 7747 +edulectur 0 1 6.957497 0.000000 7748 +assistantjonathan 0 1 6.957497 0.000000 7749 +edusect 0 1 6.957497 0.000000 7750 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..8835ab90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +updat 1 191 1.609438 1.609438 41 +includ 0 208 1.609438 0.000000 42 +read 0 154 1.791759 0.000000 47 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +postscript 0 131 2.079442 0.000000 90 +final 1 116 2.197225 2.197225 108 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +text 2 98 2.302585 4.605170 133 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +mani 0 92 2.397895 0.000000 150 +solut 2 82 2.484907 4.969814 162 +exam 1 86 2.484907 2.484907 169 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +homework 2 79 2.564949 5.129898 193 +state 1 76 2.564949 2.564949 207 +messag 0 76 2.564949 0.000000 212 +html 1 75 2.639057 2.639057 235 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +syllabu 0 67 2.708050 0.000000 247 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +handout 1 64 2.772589 2.772589 263 +complex 0 64 2.772589 0.000000 269 +previou 0 62 2.772589 0.000000 290 +januari 0 62 2.772589 0.000000 264 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +midterm 1 45 3.135494 3.135494 392 +math 0 44 3.135494 0.000000 402 +review 0 42 3.218876 0.000000 425 +origin 0 38 3.295837 0.000000 472 +formal 0 37 3.332205 0.000000 478 +winter 1 36 3.367296 3.367296 500 +express 0 32 3.465736 0.000000 540 +autumn 1 31 3.496508 3.496508 558 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +handl 0 24 3.761200 0.000000 685 +proof 0 23 3.806662 0.000000 720 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +regular 1 17 4.110874 4.110874 929 +intro 0 17 4.110874 0.000000 915 +ascii 0 15 4.248495 0.000000 1032 +latex 3 14 4.317488 12.952464 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +web 0 12 4.465908 0.000000 1249 +extra 0 11 4.553877 0.000000 1312 +regard 0 11 4.553877 0.000000 1309 +notat 0 9 4.753590 0.000000 1489 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +grammar 1 6 5.164786 5.164786 2058 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +ladner 0 6 5.164786 0.000000 2062 +diagram 1 5 5.347108 5.347108 2346 +pars 0 5 5.347108 0.000000 2321 +latexhtml 0 5 5.347108 0.000000 2347 +rambl 0 3 5.857933 0.000000 3308 +ladnerclass 0 1 6.957497 0.000000 7751 +construc 0 1 6.957497 0.000000 7752 +halt 0 1 6.957497 0.000000 7753 +undecidableexam 0 1 6.957497 0.000000 7754 +edufix 0 1 6.957497 0.000000 7755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..426782d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +includ 0 208 1.609438 0.000000 42 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +check 1 115 2.197225 2.197225 118 +send 0 114 2.197225 0.000000 109 +solut 0 82 2.484907 0.000000 162 +exam 0 86 2.484907 0.000000 169 +messag 1 76 2.564949 2.564949 212 +homework 0 79 2.564949 0.000000 193 +previou 1 62 2.772589 2.772589 290 +handout 0 64 2.772589 0.000000 263 +septemb 0 65 2.772589 0.000000 274 +content 0 59 2.833213 0.000000 302 +set 0 50 3.044522 0.000000 361 +pointer 0 48 3.044522 0.000000 368 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +formal 1 37 3.332205 3.332205 478 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +sent 0 22 3.850148 0.000000 763 +log 0 19 4.007333 0.000000 857 +intro 0 17 4.110874 0.000000 915 +web 0 12 4.465908 0.000000 1249 +regularli 0 11 4.553877 0.000000 1338 +subscrib 1 9 4.753590 4.753590 1541 +upcom 1 8 4.875197 4.875197 1685 +ann 0 6 5.164786 0.000000 2065 +majordomo 0 6 5.164786 0.000000 2066 +willb 0 5 5.347108 0.000000 2277 +condon 1 3 5.857933 5.857933 3309 +findhomework 0 1 6.957497 0.000000 7756 +userid 0 1 6.957497 0.000000 7757 +edukaye 0 1 6.957497 0.000000 7758 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..02001d2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +last 1 314 1.098612 1.098612 14 +cours 0 273 1.098612 0.000000 15 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +updat 1 191 1.609438 1.609438 41 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +spring 1 131 2.079442 2.079442 88 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +messag 0 76 2.564949 0.000000 212 +previou 0 62 2.772589 0.000000 290 +unix 0 58 2.890372 0.000000 308 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +request 0 26 3.688879 0.000000 635 +martin 0 21 3.912023 0.000000 794 +web 0 12 4.465908 0.000000 1249 +tompaclass 0 3 5.857933 0.000000 3310 +informationlab 0 1 6.957497 0.000000 7759 +technot 0 1 6.957497 0.000000 7760 +questionnaireloc 0 1 6.957497 0.000000 7761 +cdeletemin 0 1 6.957497 0.000000 7762 +treeshomework 0 1 6.957497 0.000000 7763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..1f67d2a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +click 0 142 1.945910 0.000000 78 +lectur 0 135 1.945910 0.000000 73 +spring 1 131 2.079442 2.079442 88 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +seattl 0 120 2.079442 0.000000 103 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +exam 0 86 2.484907 0.000000 169 +messag 1 76 2.564949 2.564949 212 +homework 0 79 2.564949 0.000000 193 +sourc 0 77 2.564949 0.000000 201 +suggest 0 53 2.944439 0.000000 331 +frequent 1 49 3.044522 3.044522 367 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +ad 0 32 3.465736 0.000000 544 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +reprint 0 14 4.317488 0.000000 1097 +hypermedia 0 12 4.465908 0.000000 1247 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +departmentof 0 9 4.753590 0.000000 1539 +documentfor 0 7 5.010635 0.000000 1865 +ladner 1 6 5.164786 5.164786 2062 +theclass 0 6 5.164786 0.000000 2060 +newinform 0 5 5.347108 0.000000 2342 +foracadem 0 5 5.347108 0.000000 2341 +quotedand 0 3 5.857933 0.000000 3304 +fasulo 0 2 6.263398 0.000000 4391 +structuresrichard 0 1 6.957497 0.000000 7764 +instructordan 0 1 6.957497 0.000000 7765 +assistantthi 0 1 6.957497 0.000000 7766 +overheadsport 0 1 6.957497 0.000000 7767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..26678247 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +spring 1 131 2.079442 2.079442 88 +provid 0 121 2.079442 0.000000 94 +sourc 0 77 2.564949 0.000000 201 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +offer 1 43 3.178054 3.178054 414 +purpos 0 37 3.332205 0.000000 481 +winter 1 36 3.367296 3.367296 500 +copyright 0 36 3.367296 0.000000 495 +everi 0 34 3.401197 0.000000 519 +autumn 1 31 3.496508 3.496508 558 +computersci 0 30 3.555348 0.000000 562 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +scienceand 0 5 5.347108 0.000000 2348 +ofcs 1 2 6.263398 6.263398 4392 +languagesfal 0 2 6.263398 0.000000 4161 +informationth 0 2 6.263398 0.000000 4393 +listinfo 0 2 6.263398 0.000000 4394 +pagehom 0 2 6.263398 0.000000 4395 +engineeringport 0 2 6.263398 0.000000 4396 +academicnonprofit 0 2 6.263398 0.000000 4397 +dulycredit 0 2 6.263398 0.000000 4398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..e91f4954 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +last 2 314 1.098612 2.197224 14 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +updat 2 191 1.609438 3.218876 41 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +read 2 154 1.791759 3.583518 47 +hour 1 165 1.791759 1.791759 46 +note 3 142 1.945910 5.837730 67 +assign 2 135 1.945910 3.891820 66 +lectur 0 135 1.945910 0.000000 73 +relat 0 139 1.945910 0.000000 68 +postscript 2 131 2.079442 4.158884 90 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +code 2 108 2.197225 4.394450 116 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +section 1 94 2.397895 2.397895 149 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +build 0 85 2.484907 0.000000 184 +april 2 77 2.564949 5.129898 196 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +interfac 0 79 2.564949 0.000000 209 +sourc 0 77 2.564949 0.000000 201 +html 2 75 2.639057 5.278114 235 +tuesdai 0 73 2.639057 0.000000 219 +sieg 1 69 2.708050 2.708050 260 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +new 0 64 2.772589 0.000000 262 +march 1 61 2.833213 2.833213 295 +unix 0 58 2.890372 0.000000 308 +reason 0 57 2.890372 0.000000 318 +suggest 2 53 2.944439 5.888878 331 +sampl 1 53 2.944439 2.944439 339 +quarter 0 47 3.091042 0.000000 389 +electron 0 47 3.091042 0.000000 379 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +netscap 0 44 3.135494 0.000000 395 +offer 1 43 3.178054 3.178054 414 +review 0 42 3.218876 0.000000 425 +hand 0 37 3.332205 0.000000 475 +purpos 0 37 3.332205 0.000000 481 +winter 1 36 3.367296 3.367296 500 +copyright 0 36 3.367296 0.000000 495 +everi 0 34 3.401197 0.000000 519 +eduoffic 1 33 3.433987 3.433987 531 +autumn 0 31 3.496508 0.000000 558 +computersci 0 30 3.555348 0.000000 562 +steve 0 29 3.583519 0.000000 594 +quot 0 29 3.583519 0.000000 582 +full 0 28 3.610918 0.000000 615 +administr 0 27 3.637586 0.000000 628 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +accur 0 25 3.737670 0.000000 680 +daili 0 24 3.761200 0.000000 706 +newsgroup 0 21 3.912023 0.000000 783 +miss 0 19 4.007333 0.000000 866 +lisp 1 18 4.060443 4.060443 897 +figur 1 18 4.060443 4.060443 903 +partial 0 18 4.060443 0.000000 900 +quiz 1 16 4.174387 4.174387 990 +webmast 0 15 4.248495 0.000000 1045 +dave 1 14 4.317488 4.317488 1098 +save 0 14 4.317488 0.000000 1099 +reprint 0 14 4.317488 0.000000 1097 +prolog 1 13 4.382027 4.382027 1155 +emac 1 13 4.382027 4.382027 1143 +hank 0 12 4.465908 0.000000 1253 +submiss 0 11 4.553877 0.000000 1298 +grove 1 8 4.875197 4.875197 1675 +clip 0 7 5.010635 0.000000 1868 +transcript 2 6 5.164786 10.329572 2067 +scienceand 0 5 5.347108 0.000000 2348 +turnin 0 4 5.568345 0.000000 2654 +employe 0 4 5.568345 0.000000 2717 +ofcs 1 2 6.263398 6.263398 4392 +overviewcours 0 2 6.263398 0.000000 4399 +informationth 0 2 6.263398 0.000000 4393 +listinfo 0 2 6.263398 0.000000 4394 +pagehom 0 2 6.263398 0.000000 4395 +engineeringport 0 2 6.263398 0.000000 4396 +academicnonprofit 0 2 6.263398 0.000000 4397 +dulycredit 0 2 6.263398 0.000000 4398 +smalltalk 2 1 6.957497 13.914994 7768 +transcipt 1 1 6.957497 6.957497 7769 +htmlpostscript 1 1 6.957497 6.957497 7770 +languagesspr 0 1 6.957497 0.000000 7771 +hanks 0 1 6.957497 0.000000 7772 +documentsgeneralintroduct 0 1 6.957497 0.000000 7773 +relatedrun 0 1 6.957497 0.000000 7774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..326f03e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +academ 0 82 2.484907 0.000000 178 +sourc 0 77 2.564949 0.000000 201 +degre 1 69 2.708050 2.708050 259 +previou 0 62 2.772589 0.000000 290 +colleg 1 61 2.833213 2.833213 300 +index 1 56 2.890372 2.890372 309 +quarter 1 47 3.091042 3.091042 389 +adapt 0 46 3.091042 0.000000 387 +offer 1 43 3.178054 3.178054 414 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +art 0 29 3.583519 0.000000 593 +quot 0 29 3.583519 0.000000 582 +accur 0 25 3.737670 0.000000 680 +less 0 18 4.060443 0.000000 892 +portion 0 16 4.174387 0.000000 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 0 14 4.317488 0.000000 1097 +earlier 0 13 4.382027 0.000000 1140 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +instanc 0 11 4.553877 0.000000 1322 +nonprofit 0 11 4.553877 0.000000 1339 +intact 0 2 6.263398 0.000000 4385 +pagecurr 0 1 6.957497 0.000000 7775 +quarterth 0 1 6.957497 0.000000 7776 +quarterscours 0 1 6.957497 0.000000 7777 +younotic 0 1 6.957497 0.000000 7778 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..e56d72d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +lectur 1 135 1.945910 1.945910 73 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +version 0 113 2.197225 0.000000 122 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +exam 1 86 2.484907 2.484907 169 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +sourc 0 77 2.564949 0.000000 201 +logic 1 71 2.639057 2.639057 230 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +sieg 1 69 2.708050 2.708050 260 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +publish 0 57 2.890372 0.000000 326 +think 0 57 2.890372 0.000000 314 +maintain 1 51 2.995732 2.995732 342 +digit 0 52 2.995732 0.000000 348 +frequent 0 49 3.044522 0.000000 367 +archiv 0 49 3.044522 0.000000 364 +quarter 1 47 3.091042 3.091042 389 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +even 0 45 3.135494 0.000000 393 +announc 1 40 3.258097 3.258097 441 +author 0 39 3.258097 0.000000 450 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +go 0 33 3.433987 0.000000 529 +ad 0 32 3.465736 0.000000 544 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +quot 0 29 3.583519 0.000000 582 +administr 1 27 3.637586 3.637586 628 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +tell 0 21 3.912023 0.000000 777 +anderson 0 19 4.007333 0.000000 860 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +whole 0 17 4.110874 0.000000 940 +weekli 0 17 4.110874 0.000000 919 +weslei 1 16 4.174387 4.174387 983 +portion 0 16 4.174387 0.000000 971 +webmast 1 15 4.248495 4.248495 1045 +anonym 0 14 4.317488 0.000000 1100 +reprint 0 14 4.317488 0.000000 1097 +everyon 0 13 4.382027 0.000000 1148 +quizz 0 13 4.382027 0.000000 1151 +addison 1 12 4.465908 4.465908 1230 +workload 0 12 4.465908 0.000000 1210 +overal 0 12 4.465908 0.000000 1254 +duli 0 12 4.465908 0.000000 1248 +benjamin 1 11 4.553877 4.553877 1296 +evolut 0 11 4.553877 0.000000 1314 +nonprofit 0 11 4.553877 0.000000 1339 +cheat 0 10 4.653960 0.000000 1395 +desir 0 9 4.753590 0.000000 1542 +cum 1 8 4.875197 4.875197 1619 +bunch 0 7 5.010635 0.000000 1861 +gaetano 1 6 5.164786 5.164786 2068 +borriello 1 5 5.347108 5.347108 2349 +corei 1 4 5.568345 5.568345 2718 +contemporari 0 4 5.568345 0.000000 2719 +katz 1 3 5.857933 5.857933 3276 +corin 0 3 5.857933 0.000000 3311 +aweekli 0 3 5.857933 0.000000 3312 +andersonwelcom 0 2 6.263398 0.000000 4400 +tocs 0 2 6.263398 0.000000 4401 +messagess 0 2 6.263398 0.000000 4402 +synario 0 2 6.263398 0.000000 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..6d8cc1ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +algorithm 0 162 1.791759 0.000000 57 +assign 1 135 1.945910 1.945910 66 +hall 1 146 1.945910 1.945910 65 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +structur 1 106 2.197225 2.197225 105 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +grade 1 90 2.397895 2.397895 142 +requir 0 81 2.484907 0.000000 167 +sieg 0 69 2.708050 0.000000 260 +practic 0 70 2.708050 0.000000 246 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +publish 0 57 2.890372 0.000000 326 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +basic 0 50 3.044522 0.000000 360 +appoint 0 49 3.044522 0.000000 358 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +keep 0 44 3.135494 0.000000 409 +offer 0 43 3.178054 0.000000 414 +late 1 40 3.258097 3.258097 439 +announc 0 40 3.258097 0.000000 441 +winter 0 36 3.367296 0.000000 500 +autumn 0 31 3.496508 0.000000 558 +option 1 30 3.555348 3.555348 575 +steve 0 29 3.583519 0.000000 594 +pagecs 0 26 3.688879 0.000000 658 +dai 0 22 3.850148 0.000000 753 +tent 0 22 3.850148 0.000000 739 +smith 0 20 3.951244 0.000000 820 +facil 0 20 3.951244 0.000000 814 +account 0 18 4.060443 0.000000 882 +lisp 0 18 4.060443 0.000000 897 +encourag 0 18 4.060443 0.000000 880 +deduct 0 12 4.465908 0.000000 1236 +tanimoto 1 10 4.653960 4.653960 1429 +penalti 0 10 4.653960 0.000000 1405 +prentic 0 7 5.010635 0.000000 1838 +aboutth 0 4 5.568345 0.000000 2720 +punctual 0 3 5.857933 0.000000 3313 +anhai 0 2 6.263398 0.000000 4404 +doan 0 2 6.263398 0.000000 4405 +mscc 0 2 6.263398 0.000000 4406 +breakdown 0 2 6.263398 0.000000 4407 +algorithmsautumn 0 1 6.957497 0.000000 7779 +shaffer 0 1 6.957497 0.000000 7780 +examinform 0 1 6.957497 0.000000 7781 +exambas 0 1 6.957497 0.000000 7782 +compilerassignmentssolut 0 1 6.957497 0.000000 7783 +assignmentsteach 0 1 6.957497 0.000000 7784 +informationscheduleweb 0 1 6.957497 0.000000 7785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..758e20a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +relat 0 139 1.945910 0.000000 68 +structur 0 106 2.197225 0.000000 105 +instructor 0 108 2.197225 0.000000 107 +exam 0 86 2.484907 0.000000 169 +help 0 83 2.484907 0.000000 175 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +materi 0 75 2.639057 0.000000 221 +degre 1 69 2.708050 2.708050 259 +syllabu 0 67 2.708050 0.000000 247 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +offer 1 43 3.178054 3.178054 414 +art 0 29 3.583519 0.000000 593 +pagecs 0 26 3.688879 0.000000 658 +demo 0 18 4.060443 0.000000 888 +jonathan 0 13 4.382027 0.000000 1174 +mosaic 0 10 4.653960 0.000000 1426 +holden 1 3 5.857933 5.857933 3314 +alistair 0 3 5.857933 0.000000 3315 +nowitz 1 2 6.263398 6.263398 4390 +raini 0 2 6.263398 0.000000 4384 +algorithmsspr 0 1 6.957497 0.000000 7786 +funnowitz 0 1 6.957497 0.000000 7787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..35b74b23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +file 2 132 1.945910 3.891820 70 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +search 1 95 2.397895 2.397895 155 +follow 0 92 2.397895 0.000000 143 +homework 1 79 2.564949 2.564949 193 +state 0 76 2.564949 0.000000 207 +come 0 78 2.564949 0.000000 202 +meet 1 72 2.639057 2.639057 229 +line 0 75 2.639057 0.000000 231 +test 2 66 2.708050 5.416100 252 +sieg 0 69 2.708050 0.000000 260 +set 0 50 3.044522 0.000000 361 +answer 0 45 3.135494 0.000000 391 +long 0 43 3.178054 0.000000 413 +tree 0 36 3.367296 0.000000 492 +next 1 34 3.401197 3.401197 517 +eduoffic 1 33 3.433987 3.433987 531 +quot 1 29 3.583519 3.583519 582 +pagecs 0 26 3.688879 0.000000 658 +valu 0 25 3.737670 0.000000 665 +begin 0 23 3.806662 0.000000 716 +sheet 0 16 4.174387 0.000000 973 +indic 1 15 4.248495 4.248495 1013 +charact 0 15 4.248495 0.000000 1028 +denis 0 12 4.465908 0.000000 1255 +string 0 11 4.553877 0.000000 1340 +linda 0 10 4.653960 0.000000 1394 +length 0 10 4.653960 0.000000 1400 +shapiro 0 8 4.875197 0.000000 1686 +insert 0 8 4.875197 0.000000 1687 +integ 0 8 4.875197 0.000000 1688 +assignmentsprogram 0 6 5.164786 0.000000 2019 +assignmentshomework 0 4 5.568345 0.000000 2721 +enclos 1 1 6.957497 6.957497 7788 +siegtelephon 1 1 6.957497 6.957497 7789 +algorithmswint 0 1 6.957497 0.000000 7790 +shapirooffic 0 1 6.957497 0.000000 7791 +pinneloffic 0 1 6.957497 0.000000 7792 +denisep 0 1 6.957497 0.000000 7793 +syllabustransparencieshomework 0 1 6.957497 0.000000 7794 +inquot 0 1 6.957497 0.000000 7795 +associatedvalu 0 1 6.957497 0.000000 7796 +linebegin 0 1 6.957497 0.000000 7797 +isfollow 0 1 6.957497 0.000000 7798 +graphimag 0 1 6.957497 0.000000 7799 +graphreview 0 1 6.957497 0.000000 7800 +listsfin 0 1 6.957497 0.000000 7801 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..589ee86b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +assign 0 135 1.945910 0.000000 66 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +world 0 115 2.197225 0.000000 126 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +announc 0 40 3.258097 0.000000 441 +ad 0 32 3.465736 0.000000 544 +static 0 27 3.637586 0.000000 619 +hypermedia 0 12 4.465908 0.000000 1247 +admin 0 9 4.753590 0.000000 1476 +documentfor 0 7 5.010635 0.000000 1865 +willb 0 5 5.347108 0.000000 2277 +urgent 0 3 5.857933 0.000000 3316 +classhomethi 0 1 6.957497 0.000000 7802 +inmind 0 1 6.957497 0.000000 7803 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..afb492ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,235 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +softwar 2 220 1.386294 2.772588 30 +design 2 213 1.386294 2.772588 25 +also 1 259 1.386294 1.386294 28 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +includ 2 208 1.609438 3.218876 42 +class 2 199 1.609438 3.218876 37 +group 1 183 1.609438 1.609438 36 +updat 0 191 1.609438 0.000000 41 +develop 1 174 1.791759 1.791759 53 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +implement 0 152 1.791759 0.000000 52 +first 1 140 1.945910 1.945910 71 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +document 2 121 2.079442 4.158884 89 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +studi 0 120 2.079442 0.000000 91 +manag 2 114 2.197225 4.394450 125 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +make 1 111 2.197225 2.197225 120 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +user 1 104 2.302585 2.302585 137 +technic 1 100 2.302585 2.302585 140 +part 0 98 2.302585 0.000000 129 +take 0 97 2.302585 0.000000 134 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +search 0 95 2.397895 0.000000 155 +present 0 91 2.397895 0.000000 145 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +control 1 82 2.484907 2.484907 164 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +larg 0 82 2.484907 0.000000 168 +member 0 84 2.484907 0.000000 165 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +method 0 80 2.564949 0.000000 213 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +write 1 72 2.639057 2.639057 222 +test 1 66 2.708050 2.708050 252 +sieg 0 69 2.708050 0.000000 260 +plan 2 65 2.772589 5.545178 272 +experi 1 64 2.772589 2.772589 283 +organ 1 65 2.772589 2.772589 265 +written 1 63 2.772589 2.772589 278 +interact 1 62 2.772589 2.772589 270 +evalu 1 64 2.772589 2.772589 266 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +creat 0 63 2.772589 0.000000 277 +guid 0 63 2.772589 0.000000 267 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +allow 0 53 2.944439 0.000000 333 +profession 0 51 2.995732 0.000000 345 +hardwar 0 51 2.995732 0.000000 350 +set 1 50 3.044522 3.044522 361 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +quarter 1 47 3.091042 3.091042 389 +done 1 47 3.091042 3.091042 381 +possibl 0 47 3.091042 0.000000 378 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +discuss 0 45 3.135494 0.000000 399 +natur 0 44 3.135494 0.000000 406 +review 1 42 3.218876 3.218876 425 +howev 1 41 3.218876 3.218876 422 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +correct 0 38 3.295837 0.000000 462 +respons 2 37 3.332205 6.664410 476 +cost 0 37 3.332205 0.000000 480 +short 0 36 3.367296 0.000000 499 +ofth 0 36 3.367296 0.000000 491 +survei 0 35 3.401197 0.000000 513 +manual 0 35 3.401197 0.000000 504 +product 1 33 3.433987 3.433987 527 +concept 1 32 3.465736 3.465736 537 +human 1 32 3.465736 3.465736 546 +often 0 31 3.496508 0.000000 551 +posit 0 31 3.496508 0.000000 552 +secur 1 30 3.555348 3.555348 577 +produc 0 30 3.555348 0.000000 572 +hard 0 30 3.555348 0.000000 563 +exist 0 30 3.555348 0.000000 569 +particip 1 29 3.583519 3.583519 589 +platform 0 29 3.583519 0.000000 591 +usual 0 28 3.610918 0.000000 608 +held 0 28 3.610918 0.000000 600 +releas 0 28 3.610918 0.000000 616 +team 1 27 3.637586 3.637586 625 +administr 0 27 3.637586 0.000000 628 +determin 0 27 3.637586 0.000000 630 +experiment 1 26 3.688879 3.688879 645 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +consist 0 26 3.688879 0.000000 651 +primari 2 25 3.737670 7.475340 669 +reliabl 1 25 3.737670 3.737670 674 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +handl 0 24 3.761200 0.000000 685 +head 0 23 3.806662 0.000000 732 +try 0 22 3.850148 0.000000 764 +qualiti 1 20 3.951244 3.951244 832 +verif 0 20 3.951244 0.000000 826 +safeti 0 20 3.951244 0.000000 817 +sure 0 20 3.951244 0.000000 813 +feedback 0 19 4.007333 0.000000 854 +statu 0 18 4.060443 0.000000 885 +regular 0 17 4.110874 0.000000 929 +outlin 0 17 4.110874 0.000000 914 +estim 0 17 4.110874 0.000000 930 +normal 0 16 4.174387 0.000000 995 +configur 1 15 4.248495 4.248495 1012 +enough 1 15 4.248495 4.248495 1040 +track 0 15 4.248495 0.000000 1029 +transit 0 15 4.248495 0.000000 1046 +role 1 14 4.317488 4.317488 1101 +conduct 1 14 4.317488 4.317488 1065 +embed 0 14 4.317488 0.000000 1102 +everyon 1 13 4.382027 4.382027 1148 +essenti 0 13 4.382027 0.000000 1137 +sai 0 13 4.382027 0.000000 1175 +necessari 0 13 4.382027 0.000000 1147 +overal 1 12 4.465908 4.465908 1254 +nanci 0 12 4.465908 0.000000 1256 +skill 0 12 4.465908 0.000000 1205 +characterist 0 12 4.465908 0.000000 1257 +readabl 0 12 4.465908 0.000000 1258 +valid 0 11 4.553877 0.000000 1299 +evolut 0 11 4.553877 0.000000 1314 +princip 0 10 4.653960 0.000000 1397 +mainten 1 9 4.753590 4.753590 1543 +leveson 1 9 4.753590 4.753590 1540 +latter 0 9 4.753590 0.000000 1522 +factor 0 9 4.753590 0.000000 1544 +respect 0 9 4.753590 0.000000 1545 +realist 0 8 4.875197 0.000000 1665 +risk 0 8 4.875197 0.000000 1689 +reus 0 8 4.875197 0.000000 1661 +architect 0 8 4.875197 0.000000 1624 +successfulli 0 7 5.010635 0.000000 1869 +attach 0 7 5.010635 0.000000 1785 +metric 0 7 5.010635 0.000000 1831 +ethic 0 7 5.010635 0.000000 1786 +creation 1 6 5.164786 5.164786 2069 +ensur 1 6 5.164786 5.164786 2012 +lack 0 6 5.164786 0.000000 1994 +phase 0 6 5.164786 0.000000 1977 +theproject 0 6 5.164786 0.000000 1981 +deliv 0 6 5.164786 0.000000 2070 +augment 0 5 5.347108 0.000000 2350 +assur 1 4 5.568345 5.568345 2722 +isthat 0 4 5.568345 0.000000 2723 +assess 0 4 5.568345 0.000000 2724 +employe 0 4 5.568345 0.000000 2717 +duti 2 3 5.857933 11.715866 3317 +boe 1 3 5.857933 5.857933 3318 +specialist 1 3 5.857933 5.857933 3319 +leadership 1 3 5.857933 5.857933 3320 +expertis 1 3 5.857933 5.857933 3321 +oral 0 3 5.857933 0.000000 3189 +listof 0 3 5.857933 0.000000 3322 +proper 0 3 5.857933 0.000000 3323 +interview 0 3 5.857933 0.000000 3324 +portfolio 1 2 6.263398 6.263398 4408 +educours 0 2 6.263398 0.000000 4409 +terminolog 0 2 6.263398 0.000000 4410 +thegroup 0 2 6.263398 0.000000 4054 +beavoid 0 2 6.263398 0.000000 4411 +thenorm 0 2 6.263398 0.000000 4412 +clariti 0 2 6.263398 0.000000 4413 +deliver 1 1 6.957497 6.957497 7804 +mockup 1 1 6.957497 6.957497 7805 +descriptioninstruct 0 1 6.957497 0.000000 7806 +softwaresystem 0 1 6.957497 0.000000 7807 +tocreat 0 1 6.957497 0.000000 7808 +effectiveor 0 1 6.957497 0.000000 7809 +topicsar 0 1 6.957497 0.000000 7810 +employersand 0 1 6.957497 0.000000 7811 +realbo 0 1 6.957497 0.000000 7812 +largegroup 0 1 6.957497 0.000000 7813 +cannotlearn 0 1 6.957497 0.000000 7814 +devotedto 0 1 6.957497 0.000000 7815 +isto 0 1 6.957497 0.000000 7816 +effectivelytogeth 0 1 6.957497 0.000000 7817 +disast 0 1 6.957497 0.000000 7818 +worktogeth 0 1 6.957497 0.000000 7819 +requirementsanalysi 0 1 6.957497 0.000000 7820 +areal 0 1 6.957497 0.000000 7821 +engineeringinstitut 0 1 6.957497 0.000000 7822 +providedat 0 1 6.957497 0.000000 7823 +playthat 0 1 6.957497 0.000000 7824 +projectso 0 1 6.957497 0.000000 7825 +softwaredevelop 0 1 6.957497 0.000000 7826 +responsiblefor 0 1 6.957497 0.000000 7827 +duri 0 1 6.957497 0.000000 7828 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..372aef72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +includ 1 208 1.609438 1.609438 42 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +professor 0 137 1.945910 0.000000 76 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +document 0 121 2.079442 0.000000 89 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +specif 1 106 2.197225 2.197225 106 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +question 0 91 2.397895 0.000000 141 +requir 1 81 2.484907 2.484907 167 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +method 0 80 2.564949 0.000000 213 +sieg 1 69 2.708050 2.708050 260 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +creat 0 63 2.772589 0.000000 277 +complex 0 64 2.772589 0.000000 269 +new 0 64 2.772589 0.000000 262 +locat 0 59 2.833213 0.000000 303 +sampl 0 53 2.944439 0.000000 339 +appoint 1 49 3.044522 3.044522 358 +standard 0 48 3.044522 0.000000 365 +fridai 0 44 3.135494 0.000000 390 +winter 1 36 3.367296 3.367296 500 +concept 0 32 3.465736 0.000000 537 +produc 0 30 3.555348 0.000000 572 +pagecs 0 26 3.688879 0.000000 658 +comp 0 26 3.688879 0.000000 650 +request 0 26 3.688879 0.000000 635 +fundament 0 25 3.737670 0.000000 661 +newsgroup 0 21 3.912023 0.000000 783 +feedback 0 19 4.007333 0.000000 854 +adam 0 17 4.110874 0.000000 934 +coupl 0 17 4.110874 0.000000 939 +essenti 0 13 4.382027 0.000000 1137 +loew 0 12 4.465908 0.000000 1252 +nanci 0 12 4.465908 0.000000 1256 +leveson 1 9 4.753590 4.753590 1540 +risk 0 8 4.875197 0.000000 1689 +successfulli 0 7 5.010635 0.000000 1869 +prentic 0 7 5.010635 0.000000 1838 +carlson 1 5 5.347108 5.347108 2351 +consum 0 5 5.347108 0.000000 2334 +interview 0 3 5.857933 0.000000 3324 +axiomat 0 3 5.857933 0.000000 3288 +mailinglist 0 3 5.857933 0.000000 3325 +militari 0 3 5.857933 0.000000 3326 +defens 0 3 5.857933 0.000000 3327 +educours 0 2 6.263398 0.000000 4409 +petri 0 2 6.263398 0.000000 4414 +engineeringmeet 0 1 6.957497 0.000000 7829 +eduta 0 1 6.957497 0.000000 7830 +descriptionthi 0 1 6.957497 0.000000 7831 +textbookghezzi 0 1 6.957497 0.000000 7832 +jazayeri 0 1 6.957497 0.000000 7833 +mandrioli 0 1 6.957497 0.000000 7834 +cohes 0 1 6.957497 0.000000 7835 +departmentsuggest 0 1 6.957497 0.000000 7836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..960bb675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,347 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +cours 2 273 1.098612 2.197224 15 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +languag 2 227 1.386294 2.772588 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +group 0 183 1.609438 0.000000 36 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +assign 2 135 1.945910 3.891820 66 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +note 1 142 1.945910 1.945910 67 +click 1 142 1.945910 1.945910 78 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +number 1 130 2.079442 2.079442 97 +seattl 0 120 2.079442 0.000000 103 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +part 2 98 2.302585 4.605170 129 +book 1 99 2.302585 2.302585 131 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +mani 1 92 2.397895 2.397895 150 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +grade 0 90 2.397895 0.000000 142 +select 0 91 2.397895 0.000000 154 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +pictur 0 89 2.397895 0.000000 160 +exam 2 86 2.484907 4.969814 169 +info 1 85 2.484907 2.484907 176 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +wide 1 84 2.484907 2.484907 185 +librari 0 87 2.484907 0.000000 181 +build 0 85 2.484907 0.000000 184 +internet 0 83 2.484907 0.000000 186 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +issu 1 78 2.564949 2.564949 211 +homework 1 79 2.564949 2.564949 193 +messag 1 76 2.564949 2.564949 212 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +complet 0 77 2.564949 0.000000 208 +tuesdai 2 73 2.639057 5.278114 219 +onlin 2 75 2.639057 5.278114 223 +materi 1 75 2.639057 2.639057 221 +intellig 1 72 2.639057 2.639057 225 +meet 1 72 2.639057 2.639057 229 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +sieg 2 69 2.708050 5.416100 260 +thursdai 2 70 2.708050 5.416100 241 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +test 0 66 2.708050 0.000000 252 +receiv 0 66 2.708050 0.000000 244 +januari 2 62 2.772589 5.545178 264 +copi 2 63 2.772589 5.545178 284 +artifici 1 63 2.772589 2.772589 280 +wednesdai 1 64 2.772589 2.772589 261 +descript 1 64 2.772589 2.772589 271 +guid 0 63 2.772589 0.000000 267 +new 0 64 2.772589 0.000000 262 +import 0 65 2.772589 0.000000 282 +march 2 61 2.833213 5.666426 295 +room 1 59 2.833213 2.833213 301 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +sever 1 56 2.890372 2.890372 322 +unix 1 58 2.890372 2.890372 308 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +detail 0 57 2.890372 0.000000 321 +februari 2 54 2.944439 5.888878 328 +cover 1 55 2.944439 2.944439 329 +extens 1 53 2.944439 2.944439 340 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +visual 0 48 3.044522 0.000000 372 +format 0 48 3.044522 0.000000 356 +get 1 46 3.091042 3.091042 380 +understand 0 47 3.091042 0.000000 384 +quarter 0 47 3.091042 0.000000 389 +move 0 47 3.091042 0.000000 382 +midterm 1 45 3.135494 3.135494 392 +fridai 1 44 3.135494 3.135494 390 +answer 0 45 3.135494 0.000000 391 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +mark 0 44 3.135494 0.000000 403 +offer 1 43 3.178054 3.178054 414 +show 0 43 3.178054 0.000000 417 +examin 1 42 3.218876 3.218876 424 +review 1 42 3.218876 3.218876 425 +edit 1 42 3.218876 3.218876 418 +past 0 42 3.218876 0.000000 428 +combin 0 42 3.218876 0.000000 421 +compani 0 41 3.218876 0.000000 423 +editor 0 41 3.218876 0.000000 433 +howev 0 41 3.218876 0.000000 422 +might 0 41 3.218876 0.000000 426 +tutori 1 39 3.258097 3.258097 437 +announc 0 40 3.258097 0.000000 441 +must 0 40 3.258097 0.000000 442 +multipl 0 39 3.258097 0.000000 453 +form 0 39 3.258097 0.000000 443 +close 1 38 3.295837 3.295837 465 +credit 0 38 3.295837 0.000000 460 +cost 0 37 3.332205 0.000000 480 +winter 1 36 3.367296 3.367296 500 +download 1 36 3.367296 3.367296 489 +post 1 35 3.401197 3.401197 505 +either 0 35 3.401197 0.000000 506 +approxim 0 35 3.401197 0.000000 509 +go 0 33 3.433987 0.000000 529 +given 1 32 3.465736 3.465736 538 +chapter 0 32 3.465736 0.000000 536 +common 2 30 3.555348 7.110696 574 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +hard 0 30 3.555348 0.000000 563 +option 0 30 3.555348 0.000000 575 +turn 1 29 3.583519 3.583519 586 +steve 1 29 3.583519 3.583519 594 +particip 0 29 3.583519 0.000000 589 +limit 0 29 3.583519 0.000000 585 +except 0 28 3.610918 0.000000 607 +full 0 28 3.610918 0.000000 615 +packag 0 28 3.610918 0.000000 614 +held 0 28 3.610918 0.000000 600 +symbol 0 27 3.637586 0.000000 620 +session 1 26 3.688879 3.688879 643 +pagecs 0 26 3.688879 0.000000 658 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +todai 0 25 3.737670 0.000000 672 +alwai 0 24 3.761200 0.000000 691 +interpret 0 24 3.761200 0.000000 686 +wish 0 24 3.761200 0.000000 692 +demonstr 0 24 3.761200 0.000000 694 +displai 0 23 3.806662 0.000000 712 +tent 0 22 3.850148 0.000000 739 +try 0 22 3.850148 0.000000 764 +varieti 0 22 3.850148 0.000000 740 +instead 0 22 3.850148 0.000000 756 +sent 0 22 3.850148 0.000000 763 +programminglanguag 0 21 3.912023 0.000000 782 +path 0 21 3.912023 0.000000 778 +viewer 0 21 3.912023 0.000000 787 +facil 1 20 3.951244 3.951244 814 +expert 0 20 3.951244 0.000000 833 +entir 0 20 3.951244 0.000000 811 +particularli 0 19 4.007333 0.000000 867 +lisp 3 18 4.060443 12.181329 897 +element 1 18 4.060443 4.060443 895 +seem 0 18 4.060443 0.000000 899 +macintosh 2 17 4.110874 8.221748 920 +stat 1 17 4.110874 4.110874 924 +whole 0 17 4.110874 0.000000 940 +regular 0 17 4.110874 0.000000 929 +modif 0 17 4.110874 0.000000 913 +transfer 0 16 4.174387 0.000000 967 +normal 0 16 4.174387 0.000000 995 +choic 0 16 4.174387 0.000000 979 +purchas 1 15 4.248495 4.248495 1030 +micro 0 15 4.248495 0.000000 1031 +easili 0 14 4.317488 0.000000 1077 +prolog 0 13 4.382027 0.000000 1155 +difficulti 0 13 4.382027 0.000000 1132 +emac 0 13 4.382027 0.000000 1143 +introduc 0 13 4.382027 0.000000 1139 +convert 0 13 4.382027 0.000000 1122 +host 1 11 4.553877 4.553877 1306 +transpar 0 11 4.553877 0.000000 1325 +abil 0 11 4.553877 0.000000 1341 +string 0 11 4.553877 0.000000 1340 +sens 0 11 4.553877 0.000000 1305 +tanimoto 1 10 4.653960 4.653960 1429 +bring 1 10 4.653960 4.653960 1430 +catalog 0 10 4.653960 0.000000 1431 +mainli 0 10 4.653960 0.000000 1432 +tradit 0 10 4.653960 0.000000 1404 +entitl 0 9 4.753590 0.000000 1490 +deadlin 0 9 4.753590 0.000000 1502 +attent 0 8 4.875197 0.000000 1651 +ruth 1 7 5.010635 5.010635 1870 +throughout 0 7 5.010635 0.000000 1871 +bookstor 0 7 5.010635 0.000000 1837 +fromth 0 7 5.010635 0.000000 1802 +fortun 0 7 5.010635 0.000000 1872 +remind 0 7 5.010635 0.000000 1799 +thompson 1 6 5.164786 5.164786 2049 +grammar 0 6 5.164786 0.000000 2058 +trail 0 6 5.164786 0.000000 2071 +fred 0 6 5.164786 0.000000 2072 +classroom 0 6 5.164786 0.000000 2006 +plu 0 6 5.164786 0.000000 2004 +mac 1 5 5.347108 5.347108 2292 +writeup 1 5 5.347108 5.347108 2352 +hardcopi 0 5 5.347108 0.000000 2246 +shell 0 5 5.347108 0.000000 2353 +respond 0 5 5.347108 0.000000 2354 +supplement 0 5 5.347108 0.000000 2355 +allegro 0 5 5.347108 0.000000 2314 +attract 0 5 5.347108 0.000000 2356 +freeman 0 4 5.568345 0.000000 2725 +basement 0 4 5.568345 0.000000 2663 +websit 0 4 5.568345 0.000000 2726 +andit 0 3 5.857933 0.000000 3328 +contentspag 0 3 5.857933 0.000000 3103 +orpostscript 0 3 5.857933 0.000000 3329 +mathematica 0 3 5.857933 0.000000 3303 +beginn 0 3 5.857933 0.000000 3330 +insieg 0 3 5.857933 0.000000 3331 +mscc 2 2 6.263398 12.526796 4406 +token 1 2 6.263398 6.263398 4415 +mileston 1 2 6.263398 6.263398 4416 +ofread 0 2 6.263398 0.000000 4417 +glossari 0 2 6.263398 0.000000 4418 +referenceon 0 2 6.263398 0.000000 4419 +usingcommon 0 2 6.263398 0.000000 4420 +themathemat 0 2 6.263398 0.000000 4421 +yacc 0 2 6.263398 0.000000 4422 +franz 0 2 6.263398 0.000000 4423 +thelaboratori 0 2 6.263398 0.000000 4424 +onthursdai 0 2 6.263398 0.000000 4425 +pencil 0 2 6.263398 0.000000 4426 +inthompson 1 1 6.957497 6.957497 7837 +koch 1 1 6.957497 6.957497 7838 +andersonmeet 0 1 6.957497 0.000000 7839 +andpars 0 1 6.957497 0.000000 7840 +incommon 0 1 6.957497 0.000000 7841 +purchasedsepar 0 1 6.957497 0.000000 7842 +fordigitool 0 1 6.957497 0.000000 7843 +thatmaintain 0 1 6.957497 0.000000 7844 +currentinform 0 1 6.957497 0.000000 7845 +introductionto 0 1 6.957497 0.000000 7846 +thatdoesn 0 1 6.957497 0.000000 7847 +promptli 0 1 6.957497 0.000000 7848 +theirimplement 0 1 6.957497 0.000000 7849 +buildingprogram 0 1 6.957497 0.000000 7850 +tointepret 0 1 6.957497 0.000000 7851 +alsolook 0 1 6.957497 0.000000 7852 +programmingfacil 0 1 6.957497 0.000000 7853 +thebas 0 1 6.957497 0.000000 7854 +allegrocommon 0 1 6.957497 0.000000 7855 +powerfulenviron 0 1 6.957497 0.000000 7856 +graphicsand 0 1 6.957497 0.000000 7857 +machinesof 0 1 6.957497 0.000000 7858 +theirown 0 1 6.957497 0.000000 7859 +xlisp 0 1 6.957497 0.000000 7860 +theseresourc 0 1 6.957497 0.000000 7861 +thatxlisp 0 1 6.957497 0.000000 7862 +bare 0 1 6.957497 0.000000 7863 +bone 0 1 6.957497 0.000000 7864 +nothav 0 1 6.957497 0.000000 7865 +disadvantag 0 1 6.957497 0.000000 7866 +labunless 0 1 6.957497 0.000000 7867 +fromdigitool 0 1 6.957497 0.000000 7868 +dealallow 0 1 6.957497 0.000000 7869 +lispfor 0 1 6.957497 0.000000 7870 +thistim 0 1 6.957497 0.000000 7871 +regardingread 0 1 6.957497 0.000000 7872 +printout 0 1 6.957497 0.000000 7873 +becov 0 1 6.957497 0.000000 7874 +announcedearli 0 1 6.957497 0.000000 7875 +snowflak 0 1 6.957497 0.000000 7876 +projectgener 0 1 6.957497 0.000000 7877 +aboutdemonstr 0 1 6.957497 0.000000 7878 +onmondai 0 1 6.957497 0.000000 7879 +exercisestokenizerassign 0 1 6.957497 0.000000 7880 +andpart 0 1 6.957497 0.000000 7881 +parsertokenizerpart 0 1 6.957497 0.000000 7882 +snowflakeassign 0 1 6.957497 0.000000 7883 +ondemonstr 0 1 6.957497 0.000000 7884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..5fee2c08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +hour 1 165 1.791759 1.791759 46 +assign 2 135 1.945910 3.891820 66 +first 1 140 1.945910 1.945910 71 +professor 0 137 1.945910 0.000000 76 +note 0 142 1.945910 0.000000 67 +introduct 1 126 2.079442 2.079442 87 +postscript 1 131 2.079442 2.079442 90 +spring 0 131 2.079442 0.000000 88 +final 1 116 2.197225 2.197225 108 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +solut 1 82 2.484907 2.484907 162 +second 1 81 2.484907 2.484907 166 +exam 0 86 2.484907 0.000000 169 +april 1 77 2.564949 2.564949 196 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +june 0 79 2.564949 0.000000 214 +interfac 0 79 2.564949 0.000000 209 +intellig 1 72 2.639057 2.639057 225 +thursdai 0 70 2.708050 0.000000 241 +artifici 1 63 2.772589 2.772589 280 +type 0 61 2.833213 0.000000 296 +instruct 1 53 2.944439 2.944439 332 +sampl 0 53 2.944439 0.000000 339 +basic 0 50 3.044522 0.000000 360 +third 1 43 3.178054 3.178054 412 +download 0 36 3.367296 0.000000 489 +common 0 30 3.555348 0.000000 574 +lisp 0 18 4.060443 0.000000 897 +record 0 18 4.060443 0.000000 890 +outlin 0 17 4.110874 0.000000 914 +macintosh 0 17 4.110874 0.000000 920 +emac 1 13 4.382027 4.382027 1143 +rich 0 10 4.653960 0.000000 1396 +noon 0 7 5.010635 0.000000 1804 +thompson 0 6 5.164786 0.000000 2049 +allegro 1 5 5.347108 5.347108 2314 +gentl 0 5 5.347108 0.000000 2264 +csoffic 0 4 5.568345 0.000000 2727 +knight 0 4 5.568345 0.000000 2728 +turnin 0 4 5.568345 0.000000 2654 +holden 1 3 5.857933 5.857933 3314 +redston 1 3 5.857933 5.857933 3332 +alistair 0 3 5.857933 0.000000 3315 +joshua 0 3 5.857933 0.000000 3333 +noonta 0 2 6.263398 0.000000 4427 +secondedit 0 2 6.263398 0.000000 4096 +touretzki 0 2 6.263398 0.000000 4428 +refcard 1 1 6.957497 6.957497 7885 +intelligencecs 0 1 6.957497 0.000000 7886 +msoffic 0 1 6.957497 0.000000 7887 +symboliccomput 0 1 6.957497 0.000000 7888 +emacsinterfac 0 1 6.957497 0.000000 7889 +standalonelisp 0 1 6.957497 0.000000 7890 +gradesredston 0 1 6.957497 0.000000 7891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..1b0a3d39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +last 1 314 1.098612 1.098612 14 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +updat 1 191 1.609438 1.609438 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +file 1 132 1.945910 1.945910 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +postscript 1 131 2.079442 2.079442 90 +provid 0 121 2.079442 0.000000 94 +check 0 115 2.197225 0.000000 118 +text 1 98 2.302585 2.302585 133 +book 0 99 2.302585 0.000000 131 +solut 1 82 2.484907 2.484907 162 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +free 1 73 2.639057 2.639057 224 +materi 0 75 2.639057 0.000000 221 +syllabu 1 67 2.708050 2.708050 247 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +previou 0 62 2.772589 0.000000 290 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +math 0 44 3.135494 0.000000 402 +winter 1 36 3.367296 3.367296 500 +print 0 34 3.401197 0.000000 503 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +viewer 1 21 3.912023 3.912023 787 +martin 0 21 3.912023 0.000000 794 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +render 0 17 4.110874 0.000000 947 +ascii 0 15 4.248495 0.000000 1032 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +larri 0 13 4.382027 0.000000 1142 +karlin 0 13 4.382027 0.000000 1176 +web 0 12 4.465908 0.000000 1249 +errata 0 10 4.653960 0.000000 1403 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 1 5 5.347108 5.347108 2345 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +tompaclass 0 3 5.857933 0.000000 3310 +tompa 0 3 5.857933 0.000000 3305 +aberman 0 2 6.263398 0.000000 4429 +midtem 1 1 6.957497 6.957497 7892 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..223d6d14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +text 0 98 2.302585 0.000000 133 +solut 0 82 2.484907 0.000000 162 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +sourc 0 77 2.564949 0.000000 201 +exampl 0 77 2.564949 0.000000 195 +free 1 73 2.639057 2.639057 224 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +syllabu 1 67 2.708050 2.708050 247 +sieg 1 69 2.708050 2.708050 260 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +print 0 34 3.401197 0.000000 503 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +tent 0 22 3.850148 0.000000 739 +sent 0 22 3.850148 0.000000 763 +viewer 1 21 3.912023 3.912023 787 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +render 0 17 4.110874 0.000000 947 +ascii 0 15 4.248495 0.000000 1032 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +everyth 1 13 4.382027 4.382027 1169 +larri 1 13 4.382027 4.382027 1142 +web 0 12 4.465908 0.000000 1249 +errata 0 10 4.653960 0.000000 1403 +admin 0 9 4.753590 0.000000 1476 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 1 5 5.347108 5.347108 2345 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +sendmail 0 3 5.857933 0.000000 3099 +jayram 1 1 6.957497 6.957497 7893 +thathachar 0 1 6.957497 0.000000 7894 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..e1ddc2ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +link 1 247 1.386294 1.386294 24 +washington 0 236 1.386294 0.000000 32 +hour 1 165 1.791759 1.791759 46 +assign 0 135 1.945910 0.000000 66 +click 0 142 1.945910 0.000000 78 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +imag 1 91 2.397895 2.397895 161 +homework 0 79 2.564949 0.000000 193 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +sieg 1 69 2.708050 2.708050 260 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +telephon 0 50 3.044522 0.000000 373 +quarter 0 47 3.091042 0.000000 389 +announc 0 40 3.258097 0.000000 441 +probabl 0 40 3.258097 0.000000 455 +word 0 34 3.401197 0.000000 508 +queri 0 33 3.433987 0.000000 524 +request 0 26 3.688879 0.000000 635 +left 0 19 4.007333 0.000000 851 +save 0 14 4.317488 0.000000 1099 +dbm 0 13 4.382027 0.000000 1136 +linda 0 10 4.653960 0.000000 1394 +shapiro 1 8 4.875197 4.875197 1686 +potenti 0 8 4.875197 0.000000 1690 +shift 0 5 5.347108 0.000000 2357 +systemsfal 0 4 5.568345 0.000000 2683 +patrick 0 3 5.857933 0.000000 3334 +qbic 0 3 5.857933 0.000000 3294 +systemscs 0 1 6.957497 0.000000 7895 +crowlei 0 1 6.957497 0.000000 7896 +pcrowlei 0 1 6.957497 0.000000 7897 +unisql 0 1 6.957497 0.000000 7898 +webcs 0 1 6.957497 0.000000 7899 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..92393ba1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +lectur 1 135 1.945910 1.945910 73 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +relat 0 139 1.945910 0.000000 68 +click 0 142 1.945910 0.000000 78 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 1 108 2.197225 2.197225 107 +person 1 111 2.197225 2.197225 117 +send 0 114 2.197225 0.000000 109 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +solut 1 82 2.484907 2.484907 162 +info 0 85 2.484907 0.000000 176 +messag 0 76 2.564949 0.000000 212 +appear 0 78 2.564949 0.000000 210 +materi 1 75 2.639057 2.639057 221 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +handout 0 64 2.772589 0.000000 263 +space 0 57 2.890372 0.000000 310 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +archiv 0 49 3.044522 0.000000 364 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +brian 1 38 3.295837 3.295837 466 +slide 0 38 3.295837 0.000000 467 +word 0 34 3.401197 0.000000 508 +autumn 0 31 3.496508 0.000000 558 +scale 0 28 3.610918 0.000000 613 +actual 0 28 3.610918 0.000000 604 +administr 0 27 3.637586 0.000000 628 +wish 0 24 3.761200 0.000000 692 +sent 0 22 3.850148 0.000000 763 +feedback 1 19 4.007333 4.007333 854 +bershad 1 18 4.060443 4.060443 902 +intro 0 17 4.110874 0.000000 915 +outlin 0 17 4.110874 0.000000 914 +reflect 0 15 4.248495 0.000000 1034 +webmast 0 15 4.248495 0.000000 1045 +anonym 0 14 4.317488 0.000000 1100 +regularli 0 11 4.553877 0.000000 1338 +hint 0 10 4.653960 0.000000 1419 +sung 0 6 5.164786 0.000000 2075 +pace 0 6 5.164786 0.000000 2011 +carefulli 0 6 5.164786 0.000000 2045 +lost 0 5 5.347108 0.000000 2358 +choi 0 4 5.568345 0.000000 2732 +vital 0 4 5.568345 0.000000 2733 +surviv 0 4 5.568345 0.000000 2734 +aggress 0 3 5.857933 0.000000 3240 +andwil 0 3 5.857933 0.000000 3335 +wisdom 0 2 6.263398 0.000000 4430 +schedulewhat 0 2 6.263398 0.000000 4139 +adminth 0 1 6.957497 0.000000 7900 +andoth 0 1 6.957497 0.000000 7901 +projectsdescript 0 1 6.957497 0.000000 7902 +solutionsto 0 1 6.957497 0.000000 7903 +notesnot 0 1 6.957497 0.000000 7904 +watchthi 0 1 6.957497 0.000000 7905 +andgrad 0 1 6.957497 0.000000 7906 +onproject 0 1 6.957497 0.000000 7907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..0aa1e757 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +class 0 199 1.609438 0.000000 37 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +schedul 0 119 2.079442 0.000000 85 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +grade 0 90 2.397895 0.000000 142 +help 1 83 2.484907 2.484907 175 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +exampl 0 77 2.564949 0.000000 195 +write 0 72 2.639057 0.000000 222 +degre 1 69 2.708050 2.708050 259 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +instruct 0 53 2.944439 0.000000 332 +frequent 0 49 3.044522 0.000000 367 +visitor 0 49 3.044522 0.000000 371 +quarter 1 47 3.091042 3.091042 389 +netscap 1 44 3.135494 3.135494 395 +mean 0 37 3.332205 0.000000 477 +winter 0 36 3.367296 0.000000 500 +ad 0 32 3.465736 0.000000 544 +autumn 1 31 3.496508 3.496508 558 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +hypermedia 0 12 4.465908 0.000000 1247 +baker 0 7 5.010635 0.000000 1812 +silicon 0 6 5.164786 0.000000 2076 +opengl 0 5 5.347108 0.000000 2299 +bounti 0 4 5.568345 0.000000 2715 +pighin 0 4 5.568345 0.000000 2735 +assignmentshomework 0 4 5.568345 0.000000 2721 +thisdocu 0 3 5.857933 0.000000 3336 +quarterwelcom 0 2 6.263398 0.000000 4378 +indi 0 2 6.263398 0.000000 4431 +keepin 0 1 6.957497 0.000000 7908 +informationwil 0 1 6.957497 0.000000 7909 +classpersonnel 0 1 6.957497 0.000000 7910 +syllabuscours 0 1 6.957497 0.000000 7911 +calendarta 0 1 6.957497 0.000000 7912 +hourshandout 0 1 6.957497 0.000000 7913 +assignmentslectur 0 1 6.957497 0.000000 7914 +notesread 0 1 6.957497 0.000000 7915 +assignmentsprojectsproject 0 1 6.957497 0.000000 7916 +handoutsproject 0 1 6.957497 0.000000 7917 +artifactsproject 0 1 6.957497 0.000000 7918 +sessionsproject 0 1 6.957497 0.000000 7919 +policyproject 0 1 6.957497 0.000000 7920 +upslibui 0 1 6.957497 0.000000 7921 +documentationoth 0 1 6.957497 0.000000 7922 +informationget 0 1 6.957497 0.000000 7923 +classhearn 0 1 6.957497 0.000000 7924 +erratath 0 1 6.957497 0.000000 7925 +labus 0 1 6.957497 0.000000 7926 +pagegraph 0 1 6.957497 0.000000 7927 +linkssgi 0 1 6.957497 0.000000 7928 +surfgrafica 0 1 6.957497 0.000000 7929 +obscurasiggraphgrailgraph 0 1 6.957497 0.000000 7930 +indexoth 0 1 6.957497 0.000000 7931 +linksmvi 0 1 6.957497 0.000000 7932 +departmentth 0 1 6.957497 0.000000 7933 +programth 0 1 6.957497 0.000000 7934 +programweb 0 1 6.957497 0.000000 7935 +helpbas 0 1 6.957497 0.000000 7936 +helpmosa 0 1 6.957497 0.000000 7937 +lynxus 0 1 6.957497 0.000000 7938 +indyspighin 0 1 6.957497 0.000000 7939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..823859a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +html 1 75 2.639057 2.639057 235 +servic 1 72 2.639057 2.639057 236 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +sieg 0 69 2.708050 0.000000 260 +would 0 67 2.708050 0.000000 251 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +discuss 0 45 3.135494 0.000000 399 +math 0 44 3.135494 0.000000 402 +might 0 41 3.218876 0.000000 426 +origin 0 38 3.295837 0.000000 472 +everi 0 34 3.401197 0.000000 519 +autumn 0 31 3.496508 0.000000 558 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +request 0 26 3.688879 0.000000 635 +consult 0 24 3.761200 0.000000 687 +handl 0 24 3.761200 0.000000 685 +thu 0 21 3.912023 0.000000 773 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +letter 1 16 4.174387 4.174387 981 +indic 0 15 4.248495 0.000000 1013 +ascii 0 15 4.248495 0.000000 1032 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +larger 0 7 5.010635 0.000000 1875 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +latexhtml 0 5 5.347108 0.000000 2347 +arun 0 4 5.568345 0.000000 2736 +csoffic 0 4 5.568345 0.000000 2727 +accommod 1 3 5.857933 5.857933 3337 +somani 1 2 6.263398 6.263398 4432 +cslectur 0 2 6.263398 0.000000 4433 +havea 0 2 6.263398 0.000000 4434 +disabl 1 1 6.957497 6.957497 7940 +jari 1 1 6.957497 6.957497 7941 +networksautumn 0 1 6.957497 0.000000 7942 +eebphon 0 1 6.957497 0.000000 7943 +kristensen 0 1 6.957497 0.000000 7944 +tomatch 0 1 6.957497 0.000000 7945 +andprovid 0 1 6.957497 0.000000 7946 +timewindow 0 1 6.957497 0.000000 7947 +overheadshomeworksprojectsinterest 0 1 6.957497 0.000000 7948 +stuffattentionif 0 1 6.957497 0.000000 7949 +pleasecontact 0 1 6.957497 0.000000 7950 +schmitz 0 1 6.957497 0.000000 7951 +requiresacadem 0 1 6.957497 0.000000 7952 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..541c143f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +text 0 98 2.302585 0.000000 133 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +html 1 75 2.639057 2.639057 235 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +sieg 1 69 2.708050 2.708050 260 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +simpl 0 60 2.833213 0.000000 298 +browser 0 56 2.890372 0.000000 313 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +frequent 0 49 3.044522 0.000000 367 +math 0 44 3.135494 0.000000 402 +origin 0 38 3.295837 0.000000 472 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +handl 0 24 3.761200 0.000000 685 +william 0 22 3.850148 0.000000 765 +fact 0 21 3.912023 0.000000 780 +viewer 0 21 3.912023 0.000000 787 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +intro 0 17 4.110874 0.000000 915 +ascii 0 15 4.248495 0.000000 1032 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +convert 0 13 4.382027 0.000000 1122 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +chan 0 7 5.010635 0.000000 1876 +legibl 0 7 5.010635 0.000000 1866 +ghostscript 0 7 5.010635 0.000000 1867 +ladner 1 6 5.164786 5.164786 2062 +markup 0 6 5.164786 0.000000 2059 +strang 0 6 5.164786 0.000000 2064 +latexhtml 0 5 5.347108 0.000000 2347 +csoffic 0 4 5.568345 0.000000 2727 +wchan 0 3 5.857933 0.000000 3338 +cslectur 0 2 6.263398 0.000000 4433 +noonta 0 2 6.263398 0.000000 4427 +eduwchan 0 2 6.263398 0.000000 4435 +networksspr 0 1 6.957497 0.000000 7953 +overheadshomeworksprojectsabout 0 1 6.957497 0.000000 7954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..fda8a46f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +washington 0 236 1.386294 0.000000 32 +fall 1 181 1.609438 1.609438 40 +advanc 0 99 2.302585 0.000000 130 +found 0 53 2.944439 0.000000 337 +digit 0 52 2.995732 0.000000 348 +pagecs 0 26 3.688879 0.000000 658 +designt 0 2 6.263398 0.000000 4436 +kehl 0 2 6.263398 0.000000 4437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..bbb5990f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +recent 0 167 1.791759 0.000000 58 +assign 0 135 1.945910 0.000000 66 +welcom 0 122 2.079442 0.000000 99 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +final 1 116 2.197225 2.197225 108 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +book 1 99 2.302585 2.302585 131 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +resourc 0 81 2.484907 0.000000 172 +academ 0 82 2.484907 0.000000 178 +homework 1 79 2.564949 2.564949 193 +server 1 76 2.564949 2.564949 204 +state 0 76 2.564949 0.000000 207 +sourc 0 77 2.564949 0.000000 201 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +test 0 66 2.708050 0.000000 252 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +laboratori 0 63 2.772589 0.000000 292 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +collect 0 65 2.772589 0.000000 268 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +sampl 1 53 2.944439 2.944439 339 +cover 0 55 2.944439 0.000000 329 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +adapt 0 46 3.091042 0.000000 387 +fridai 1 44 3.135494 3.135494 390 +mark 0 44 3.135494 0.000000 403 +combin 1 42 3.218876 3.218876 421 +review 0 42 3.218876 0.000000 425 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +richard 0 31 3.496508 0.000000 559 +option 1 30 3.555348 3.555348 575 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +sequenti 1 22 3.850148 3.850148 745 +vlsi 0 21 3.912023 0.000000 795 +thur 0 19 4.007333 0.000000 847 +scott 0 18 4.060443 0.000000 884 +dilbert 0 16 4.174387 0.000000 996 +sheet 0 16 4.174387 0.000000 973 +portion 0 16 4.174387 0.000000 971 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +johnson 0 13 4.382027 0.000000 1162 +care 0 13 4.382027 0.000000 1177 +duli 0 12 4.465908 0.000000 1248 +tue 0 11 4.553877 0.000000 1308 +stephen 0 11 4.553877 0.000000 1342 +nonprofit 0 11 4.553877 0.000000 1339 +fpga 1 10 4.653960 4.653960 1433 +motorola 0 9 4.753590 0.000000 1546 +gaetano 0 6 5.164786 0.000000 2068 +philip 0 6 5.164786 0.000000 2005 +writeup 0 5 5.347108 0.000000 2352 +borriello 0 5 5.347108 0.000000 2349 +midnight 1 4 5.568345 5.568345 2599 +murphi 0 4 5.568345 0.000000 2737 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +micron 0 3 5.857933 0.000000 3341 +kehl 1 2 6.263398 6.263398 4437 +designt 0 2 6.263398 0.000000 4436 +aaron 0 2 6.263398 0.000000 4438 +comprehensivelist 0 2 6.263398 0.000000 4439 +icmanufactur 0 2 6.263398 0.000000 4440 +optionlab 1 1 6.957497 6.957497 7955 +syllabusschedul 0 1 6.957497 0.000000 7956 +savoi 0 1 6.957497 0.000000 7957 +savac 0 1 6.957497 0.000000 7958 +chinn 0 1 6.957497 0.000000 7959 +richin 0 1 6.957497 0.000000 7960 +howard 0 1 6.957497 0.000000 7961 +shchang 0 1 6.957497 0.000000 7962 +csjason 0 1 6.957497 0.000000 7963 +quarterhomework 0 1 6.957497 0.000000 7964 +assignmentsweb 0 1 6.957497 0.000000 7965 +duehomework 0 1 6.957497 0.000000 7966 +abel 0 1 6.957497 0.000000 7967 +fixtur 0 1 6.957497 0.000000 7968 +communicationoth 0 1 6.957497 0.000000 7969 +sheetsth 0 1 6.957497 0.000000 7970 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..1309a172 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +design 2 213 1.386294 2.772588 25 +mail 1 238 1.386294 1.386294 22 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +fall 1 181 1.609438 1.609438 40 +class 0 199 1.609438 0.000000 37 +read 2 154 1.791759 3.583518 47 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +avail 0 169 1.791759 0.000000 48 +model 0 145 1.945910 0.000000 69 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +postscript 3 131 2.079442 6.238326 90 +instructor 0 108 2.197225 0.000000 107 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +memori 0 101 2.302585 0.000000 139 +follow 0 92 2.397895 0.000000 143 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +mondai 2 77 2.564949 5.129898 206 +homework 1 79 2.564949 2.564949 193 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +free 0 73 2.639057 0.000000 224 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +test 0 66 2.708050 0.000000 252 +wednesdai 2 64 2.772589 5.545178 261 +organ 1 65 2.772589 2.772589 265 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +previou 0 62 2.772589 0.000000 290 +simpl 0 60 2.833213 0.000000 298 +sampl 1 53 2.944439 2.944439 339 +instruct 1 53 2.944439 2.944439 332 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +quarter 0 47 3.091042 0.000000 389 +fridai 2 44 3.135494 6.270988 390 +answer 1 45 3.135494 3.135494 391 +midterm 0 45 3.135494 0.000000 392 +review 1 42 3.218876 3.218876 425 +fast 0 42 3.218876 0.000000 429 +form 0 39 3.258097 0.000000 443 +robert 0 30 3.555348 0.000000 567 +compon 0 30 3.555348 0.000000 570 +common 0 30 3.555348 0.000000 574 +revis 0 26 3.688879 0.000000 640 +hierarchi 0 22 3.850148 0.000000 744 +color 0 22 3.850148 0.000000 762 +unit 0 21 3.912023 0.000000 779 +binari 1 20 3.951244 3.951244 823 +prerequisit 0 19 4.007333 0.000000 846 +segment 1 17 4.110874 4.110874 931 +regist 0 17 4.110874 0.000000 938 +interconnect 0 17 4.110874 0.000000 937 +sheet 1 16 4.174387 4.174387 973 +transfer 0 16 4.174387 0.000000 967 +larri 0 13 4.382027 0.000000 1142 +assembl 1 12 4.465908 4.465908 1207 +holidai 1 12 4.465908 4.465908 1224 +loew 0 12 4.465908 0.000000 1252 +catalog 0 10 4.653960 0.000000 1431 +arithmet 0 10 4.653960 0.000000 1388 +modul 0 10 4.653960 0.000000 1434 +card 0 10 4.653960 0.000000 1435 +watson 0 8 4.875197 0.000000 1691 +pipelin 1 7 5.010635 5.010635 1830 +snyder 1 5 5.347108 5.347108 2359 +mip 1 4 5.568345 5.568345 2738 +microprogram 0 4 5.568345 0.000000 2604 +appendix 0 4 5.568345 0.000000 2739 +prog 0 4 5.568345 0.000000 2740 +verilog 1 2 6.263398 6.263398 4441 +judi 0 2 6.263398 0.000000 4442 +andorgan 0 2 6.263398 0.000000 4443 +skim 1 1 6.957497 6.957497 7971 +jwatson 0 1 6.957497 0.000000 7972 +chenoffic 0 1 6.957497 0.000000 7973 +thursdays 0 1 6.957497 0.000000 7974 +chensg 0 1 6.957497 0.000000 7975 +laboratoryproject 0 1 6.957497 0.000000 7976 +setprocessor 0 1 6.957497 0.000000 7977 +chap 0 1 6.957497 0.000000 7978 +referencesthi 0 1 6.957497 0.000000 7979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..9455e293 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +class 2 199 1.609438 3.218876 37 +list 1 201 1.609438 1.609438 39 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +assign 2 135 1.945910 3.891820 66 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +welcom 0 122 2.079442 0.000000 99 +report 0 131 2.079442 0.000000 92 +final 2 116 2.197225 4.394450 108 +topic 1 114 2.197225 2.197225 110 +send 1 114 2.197225 2.197225 109 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +world 0 115 2.197225 0.000000 126 +version 0 113 2.197225 0.000000 122 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +text 0 98 2.302585 0.000000 133 +take 0 97 2.302585 0.000000 134 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +search 1 95 2.397895 2.397895 155 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +exam 1 86 2.484907 2.484907 169 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +info 0 85 2.484907 0.000000 176 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +solut 0 82 2.484907 0.000000 162 +april 2 77 2.564949 5.129898 196 +mondai 1 77 2.564949 2.564949 206 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +know 0 80 2.564949 0.000000 198 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +intellig 1 72 2.639057 2.639057 225 +tuesdai 1 73 2.639057 2.639057 219 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +meet 0 72 2.639057 0.000000 229 +sieg 2 69 2.708050 5.416100 260 +test 1 66 2.708050 2.708050 252 +window 1 68 2.708050 2.708050 242 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +wednesdai 1 64 2.772589 2.772589 261 +artifici 1 63 2.772589 2.772589 280 +creat 1 63 2.772589 2.772589 277 +new 1 64 2.772589 2.772589 262 +laboratori 0 63 2.772589 0.000000 292 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +best 0 59 2.833213 0.000000 299 +reason 1 57 2.890372 2.890372 318 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +archiv 0 49 3.044522 0.000000 364 +standard 0 48 3.044522 0.000000 365 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +featur 0 46 3.091042 0.000000 386 +understand 0 47 3.091042 0.000000 384 +move 0 47 3.091042 0.000000 382 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +mark 1 44 3.135494 3.135494 403 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +examin 1 42 3.218876 3.218876 424 +review 1 42 3.218876 3.218876 425 +vision 0 41 3.218876 0.000000 430 +form 1 39 3.258097 3.258097 443 +multipl 0 39 3.258097 0.000000 453 +announc 0 40 3.258097 0.000000 441 +continu 0 39 3.258097 0.000000 448 +credit 0 38 3.295837 0.000000 460 +close 0 38 3.295837 0.000000 465 +download 0 36 3.367296 0.000000 489 +staff 0 36 3.367296 0.000000 490 +post 1 35 3.401197 3.401197 505 +represent 1 35 3.401197 3.401197 512 +least 0 35 3.401197 0.000000 516 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +kind 0 32 3.465736 0.000000 541 +given 0 32 3.465736 0.000000 538 +common 1 30 3.555348 3.555348 574 +neural 1 30 3.555348 3.555348 578 +hard 0 30 3.555348 0.000000 563 +option 0 30 3.555348 0.000000 575 +steve 1 29 3.583519 3.583519 594 +particip 0 29 3.583519 0.000000 589 +turn 0 29 3.583519 0.000000 586 +held 0 28 3.610918 0.000000 600 +propos 0 28 3.610918 0.000000 602 +progress 0 28 3.610918 0.000000 598 +session 1 26 3.688879 3.688879 643 +pagecs 0 26 3.688879 0.000000 658 +rather 0 26 3.688879 0.000000 642 +demonstr 0 24 3.761200 0.000000 694 +tent 0 22 3.850148 0.000000 739 +self 0 22 3.850148 0.000000 761 +try 0 22 3.850148 0.000000 764 +newsgroup 2 21 3.912023 7.824046 783 +expert 1 20 3.951244 3.951244 833 +entir 0 20 3.951244 0.000000 811 +facil 0 20 3.951244 0.000000 814 +separ 1 19 4.007333 4.007333 844 +excel 0 19 4.007333 0.000000 868 +exercis 0 19 4.007333 0.000000 842 +lisp 2 18 4.060443 8.120886 897 +element 1 18 4.060443 4.060443 895 +demo 1 18 4.060443 4.060443 888 +seem 0 18 4.060443 0.000000 899 +statu 0 18 4.060443 0.000000 885 +regular 0 17 4.110874 0.000000 929 +advantag 0 16 4.174387 0.000000 987 +choic 0 16 4.174387 0.000000 979 +explan 0 16 4.174387 0.000000 985 +portion 0 16 4.174387 0.000000 971 +purchas 1 15 4.248495 4.248495 1030 +workload 0 12 4.465908 0.000000 1210 +rest 0 12 4.465908 0.000000 1259 +holidai 0 12 4.465908 0.000000 1224 +sens 1 11 4.553877 4.553877 1305 +probabilist 0 11 4.553877 0.000000 1343 +tanimoto 1 10 4.653960 4.653960 1429 +bring 1 10 4.653960 4.653960 1430 +preliminari 1 9 4.753590 4.753590 1480 +implementationof 0 7 5.010635 0.000000 1813 +reduct 0 7 5.010635 0.000000 1877 +pentium 0 6 5.164786 0.000000 2077 +approv 0 6 5.164786 0.000000 2078 +jeremi 1 5 5.347108 5.347108 2360 +allegro 1 5 5.347108 5.347108 2314 +hardcopi 0 5 5.347108 0.000000 2246 +forprogram 0 5 5.347108 0.000000 2361 +attract 0 5 5.347108 0.000000 2356 +net 1 4 5.568345 5.568345 2741 +peer 1 4 5.568345 5.568345 2742 +freeman 0 4 5.568345 0.000000 2725 +screenshot 0 4 5.568345 0.000000 2743 +andit 0 3 5.857933 0.000000 3328 +contentspag 0 3 5.857933 0.000000 3103 +orpostscript 0 3 5.857933 0.000000 3329 +programmingtechniqu 0 3 5.857933 0.000000 3113 +insieg 0 3 5.857933 0.000000 3331 +evaluationof 0 3 5.857933 0.000000 3192 +assignmentsassign 0 3 5.857933 0.000000 3342 +youdon 0 2 6.263398 0.000000 4444 +referenceon 0 2 6.263398 0.000000 4419 +usingcommon 0 2 6.263398 0.000000 4420 +franz 0 2 6.263398 0.000000 4423 +inour 0 2 6.263398 0.000000 4445 +ofproject 0 2 6.263398 0.000000 4446 +csor 1 1 6.957497 6.957497 7980 +pnew 1 1 6.957497 6.957497 7981 +baermeet 0 1 6.957497 0.000000 7982 +windowsimplement 0 1 6.957497 0.000000 7983 +programdevelop 0 1 6.957497 0.000000 7984 +theintel 0 1 6.957497 0.000000 7985 +isfor 0 1 6.957497 0.000000 7986 +bedownload 0 1 6.957497 0.000000 7987 +givenaccord 0 1 6.957497 0.000000 7988 +alist 0 1 6.957497 0.000000 7989 +coversboth 0 1 6.957497 0.000000 7990 +logicalreason 0 1 6.957497 0.000000 7991 +clo 0 1 6.957497 0.000000 7992 +programmingpart 0 1 6.957497 0.000000 7993 +ofhow 0 1 6.957497 0.000000 7994 +circul 0 1 6.957497 0.000000 7995 +orturn 0 1 6.957497 0.000000 7996 +wrap 0 1 6.957497 0.000000 7997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..b5cbf368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +offic 0 299 1.098612 0.000000 13 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +recent 0 167 1.791759 0.000000 58 +assign 0 135 1.945910 0.000000 66 +area 0 144 1.945910 0.000000 80 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +seattl 0 120 2.079442 0.000000 103 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +manag 0 114 2.197225 0.000000 125 +book 1 99 2.302585 2.302585 131 +info 0 85 2.484907 0.000000 176 +resourc 0 81 2.484907 0.000000 172 +academ 0 82 2.484907 0.000000 178 +sourc 1 77 2.564949 2.564949 201 +server 0 76 2.564949 0.000000 204 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +syllabu 0 67 2.708050 0.000000 247 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +laboratori 0 63 2.772589 0.000000 292 +collect 0 65 2.772589 0.000000 268 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +adapt 0 46 3.091042 0.000000 387 +societi 1 40 3.258097 3.258097 456 +announc 0 40 3.258097 0.000000 441 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +robot 1 36 3.367296 3.367296 497 +staff 0 36 3.367296 0.000000 490 +copyright 0 36 3.367296 0.000000 495 +steve 0 29 3.583519 0.000000 594 +quot 0 29 3.583519 0.000000 582 +pagecs 0 26 3.688879 0.000000 658 +accur 0 25 3.737670 0.000000 680 +smith 0 20 3.951244 0.000000 820 +nice 0 20 3.951244 0.000000 809 +anderson 0 19 4.007333 0.000000 860 +dilbert 0 16 4.174387 0.000000 996 +sheet 0 16 4.174387 0.000000 973 +portion 0 16 4.174387 0.000000 971 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +care 0 13 4.382027 0.000000 1177 +loew 0 12 4.465908 0.000000 1252 +duli 0 12 4.465908 0.000000 1248 +stephen 0 11 4.553877 0.000000 1342 +nonprofit 0 11 4.553877 0.000000 1339 +motorola 0 9 4.753590 0.000000 1546 +portland 0 7 5.010635 0.000000 1878 +fred 0 6 5.164786 0.000000 2072 +gaetano 0 6 5.164786 0.000000 2068 +philip 0 6 5.164786 0.000000 2005 +borriello 0 5 5.347108 0.000000 2349 +kent 0 4 5.568345 0.000000 2744 +murphi 0 4 5.568345 0.000000 2737 +comprehens 0 4 5.568345 0.000000 2745 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +burn 1 2 6.263398 6.263398 4447 +serverth 0 2 6.263398 0.000000 4448 +designstev 0 1 6.957497 0.000000 7998 +casei 0 1 6.957497 0.000000 7999 +studentslab 0 1 6.957497 0.000000 8000 +mchc 0 1 6.957497 0.000000 8001 +martinrobot 0 1 6.957497 0.000000 8002 +societyoth 0 1 6.957497 0.000000 8003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..0267274d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +document 0 121 2.079442 0.000000 89 +schedul 0 119 2.079442 0.000000 85 +send 0 114 2.197225 0.000000 109 +final 0 116 2.197225 0.000000 108 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +handout 0 64 2.772589 0.000000 263 +march 0 61 2.833213 0.000000 295 +usual 0 28 3.610918 0.000000 608 +session 0 26 3.688879 0.000000 643 +pagewelcom 0 11 4.553877 0.000000 1344 +bug 0 7 5.010635 0.000000 1801 +encount 0 3 5.857933 0.000000 3128 +bevi 0 1 6.957497 0.000000 8004 +relatingto 0 1 6.957497 0.000000 8005 +frequentlychang 0 1 6.957497 0.000000 8006 +bswest 0 1 6.957497 0.000000 8007 +csif 0 1 6.957497 0.000000 8008 +classpersonnelsyllabuslectur 0 1 6.957497 0.000000 8009 +scheduleguest 0 1 6.957497 0.000000 8010 +scheduleoffic 0 1 6.957497 0.000000 8011 +hoursproject 0 1 6.957497 0.000000 8012 +projectoth 0 1 6.957497 0.000000 8013 +erratarefer 0 1 6.957497 0.000000 8014 +pagesmidterm 0 1 6.957497 0.000000 8015 +questionnairebswest 0 1 6.957497 0.000000 8016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..78b6ee43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +list 1 201 1.609438 1.609438 39 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +implement 0 152 1.791759 0.000000 52 +year 1 148 1.945910 1.945910 84 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +file 0 132 1.945910 0.000000 70 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +question 0 91 2.397895 0.000000 141 +exam 1 86 2.484907 2.484907 169 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +refer 1 78 2.564949 2.564949 203 +homework 0 79 2.564949 0.000000 193 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +html 0 75 2.639057 0.000000 235 +sieg 1 69 2.708050 2.708050 260 +test 0 66 2.708050 0.000000 252 +import 0 65 2.772589 0.000000 282 +handout 0 64 2.772589 0.000000 263 +descript 0 64 2.772589 0.000000 271 +previou 0 62 2.772589 0.000000 290 +simpl 0 60 2.833213 0.000000 298 +sampl 0 53 2.944439 0.000000 339 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +quarter 1 47 3.091042 3.091042 389 +answer 1 45 3.135494 3.135494 391 +midterm 1 45 3.135494 3.135494 392 +slide 0 38 3.295837 0.000000 467 +manual 1 35 3.401197 3.401197 504 +kind 0 32 3.465736 0.000000 541 +full 0 28 3.610918 0.000000 615 +pagecs 0 26 3.688879 0.000000 658 +sent 0 22 3.850148 0.000000 763 +floor 0 14 4.317488 0.000000 1070 +front 0 13 4.382027 0.000000 1154 +cecil 1 9 4.753590 4.753590 1547 +chamber 1 8 4.875197 4.875197 1692 +leon 0 8 4.875197 0.000000 1631 +affect 0 6 5.164786 0.000000 2044 +textual 0 6 5.164786 0.000000 1979 +vortex 1 5 5.347108 5.347108 2362 +travers 1 5 5.347108 5.347108 2363 +litvinov 0 3 5.857933 0.000000 3343 +vass 1 2 6.263398 6.263398 4449 +informationmeet 0 2 6.263398 0.000000 4450 +cubicl 0 2 6.263398 0.000000 4451 +archivesslid 0 2 6.263398 0.000000 4452 +informationhandout 0 2 6.263398 0.000000 4163 +tutorialth 0 2 6.263398 0.000000 4453 +onmark 0 2 6.263398 0.000000 4454 +languageswint 0 1 6.957497 0.000000 8017 +craigchamb 0 1 6.957497 0.000000 8018 +archivedher 0 1 6.957497 0.000000 8019 +closedbook 0 1 6.957497 0.000000 8020 +wereask 0 1 6.957497 0.000000 8021 +tutorialsth 0 1 6.957497 0.000000 8022 +tutorialhow 0 1 6.957497 0.000000 8023 +enda 0 1 6.957497 0.000000 8024 +interestdead 0 1 6.957497 0.000000 8025 +elim 0 1 6.957497 0.000000 8026 +idfacfg 0 1 6.957497 0.000000 8027 +frameworkvortex 0 1 6.957497 0.000000 8028 +grammarcecil 0 1 6.957497 0.000000 8029 +documentationdocument 0 1 6.957497 0.000000 8030 +resourcesth 0 1 6.957497 0.000000 8031 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..9330a693 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +list 1 201 1.609438 1.609438 39 +class 0 199 1.609438 0.000000 37 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +object 0 138 1.945910 0.000000 79 +compil 1 122 2.079442 2.079442 96 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +instructor 0 108 2.197225 0.000000 107 +find 0 111 2.197225 0.000000 111 +come 0 78 2.564949 0.000000 202 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +sieg 1 69 2.708050 2.708050 260 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +room 0 59 2.833213 0.000000 301 +found 1 53 2.944439 2.944439 337 +archiv 0 49 3.044522 0.000000 364 +slide 0 38 3.295837 0.000000 467 +manual 0 35 3.401197 0.000000 504 +full 0 28 3.610918 0.000000 615 +pagecs 0 26 3.688879 0.000000 658 +jeff 0 25 3.737670 0.000000 673 +sent 0 22 3.850148 0.000000 763 +sort 0 22 3.850148 0.000000 738 +chateau 1 16 4.174387 4.174387 997 +dean 0 14 4.317488 0.000000 1104 +dave 0 14 4.317488 0.000000 1098 +cecil 1 9 4.753590 4.753590 1547 +chamber 1 8 4.875197 4.875197 1692 +grove 1 8 4.875197 4.875197 1675 +leon 0 8 4.875197 0.000000 1631 +craig 0 7 5.010635 0.000000 1879 +vortex 1 5 5.347108 5.347108 2362 +projectth 0 3 5.857933 0.000000 3344 +jdean 1 2 6.263398 6.263398 4455 +informationmeet 0 2 6.263398 0.000000 4450 +archivesslid 0 2 6.263398 0.000000 4452 +optimizingcompil 0 2 6.263398 0.000000 4456 +cecilproject 0 2 6.263398 0.000000 4457 +onmark 0 2 6.263398 0.000000 4454 +languagesimport 0 1 6.957497 0.000000 8032 +turori 0 1 6.957497 0.000000 8033 +andtransform 0 1 6.957497 0.000000 8034 +resourcesmor 0 1 6.957497 0.000000 8035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..f6b3df66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +softwar 0 220 1.386294 0.000000 30 +washington 0 236 1.386294 0.000000 32 +assign 1 135 1.945910 1.945910 66 +spring 0 131 2.079442 0.000000 88 +handout 0 64 2.772589 0.000000 263 +sampl 0 53 2.944439 0.000000 339 +pagecs 0 26 3.688879 0.000000 658 +introductori 0 9 4.753590 0.000000 1479 +notkin 0 3 5.857933 0.000000 3345 +engineeringdavid 0 1 6.957497 0.000000 8036 +kwic 0 1 6.957497 0.000000 8037 +projectsnotkin 0 1 6.957497 0.000000 8038 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..33636d6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +hour 1 165 1.791759 1.791759 46 +assign 0 135 1.945910 0.000000 66 +object 0 138 1.945910 0.000000 79 +postscript 0 131 2.079442 0.000000 90 +studi 0 120 2.079442 0.000000 91 +introduct 0 126 2.079442 0.000000 87 +send 1 114 2.197225 2.197225 109 +instructor 0 108 2.197225 0.000000 107 +question 0 91 2.397895 0.000000 141 +resourc 1 81 2.484907 2.484907 172 +info 0 85 2.484907 0.000000 176 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +refer 0 78 2.564949 0.000000 203 +david 0 71 2.639057 0.000000 232 +html 0 75 2.639057 0.000000 235 +line 0 75 2.639057 0.000000 231 +sieg 0 69 2.708050 0.000000 260 +handout 1 64 2.772589 2.772589 263 +archiv 0 49 3.044522 0.000000 364 +standard 0 48 3.044522 0.000000 365 +mark 0 44 3.135494 0.000000 403 +singl 0 34 3.401197 0.000000 510 +concept 0 32 3.465736 0.000000 537 +administr 0 27 3.637586 0.000000 628 +pagecs 0 26 3.688879 0.000000 658 +subject 0 26 3.688879 0.000000 647 +wish 0 24 3.761200 0.000000 692 +yahoo 0 24 3.761200 0.000000 707 +thread 1 23 3.806662 3.806662 722 +sent 0 22 3.850148 0.000000 763 +self 0 22 3.850148 0.000000 761 +scheme 0 20 3.951244 0.000000 818 +excel 0 19 4.007333 0.000000 868 +previous 0 17 4.110874 0.000000 923 +floor 0 14 4.317488 0.000000 1070 +bodi 0 13 4.382027 0.000000 1178 +mellon 0 13 4.382027 0.000000 1179 +calculu 1 12 4.465908 4.465908 1203 +loew 0 12 4.465908 0.000000 1252 +carnegi 0 12 4.465908 0.000000 1260 +appl 0 11 4.553877 0.000000 1303 +subscrib 1 9 4.753590 4.753590 1541 +kurt 0 9 4.753590 0.000000 1548 +introductori 0 9 4.753590 0.000000 1479 +cecil 0 9 4.753590 0.000000 1547 +leon 0 8 4.875197 0.000000 1631 +dylan 0 8 4.875197 0.000000 1625 +majordomo 0 6 5.164786 0.000000 2066 +gentl 0 5 5.347108 0.000000 2264 +notkin 1 3 5.857933 5.857933 3345 +partridg 0 3 5.857933 0.000000 3346 +lambda 1 2 6.263398 6.263398 4458 +kepart 0 2 6.263398 0.000000 4459 +monash 0 2 6.263398 0.000000 4460 +languagesautumn 0 1 6.957497 0.000000 8039 +byappoint 0 1 6.957497 0.000000 8040 +cubiclescours 0 1 6.957497 0.000000 8041 +readingsmail 0 1 6.957497 0.000000 8042 +archivesw 0 1 6.957497 0.000000 8043 +instructionalpurpos 0 1 6.957497 0.000000 8044 +emailto 0 1 6.957497 0.000000 8045 +csegener 0 1 6.957497 0.000000 8046 +pagesprogram 0 1 6.957497 0.000000 8047 +critiquesgari 0 1 6.957497 0.000000 8048 +leaven 0 1 6.957497 0.000000 8049 +pagefunct 0 1 6.957497 0.000000 8050 +resourcesmit 0 1 6.957497 0.000000 8051 +pagecmu 0 1 6.957497 0.000000 8052 +pagea 0 1 6.957497 0.000000 8053 +mlhaskel 0 1 6.957497 0.000000 8054 +universityobject 0 1 6.957497 0.000000 8055 +geneva 0 1 6.957497 0.000000 8056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..caf5633e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +gener 0 220 1.386294 0.000000 27 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +base 0 165 1.791759 0.000000 50 +lectur 1 135 1.945910 1.945910 73 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +book 0 99 2.302585 0.000000 131 +question 0 91 2.397895 0.000000 141 +solut 2 82 2.484907 4.969814 162 +exam 1 86 2.484907 2.484907 169 +learn 0 86 2.484907 0.000000 170 +homework 2 79 2.564949 5.129898 193 +mondai 1 77 2.564949 2.564949 206 +know 0 80 2.564949 0.000000 198 +write 1 72 2.639057 2.639057 222 +tuesdai 0 73 2.639057 0.000000 219 +materi 0 75 2.639057 0.000000 221 +solv 0 73 2.639057 0.000000 234 +sieg 0 69 2.708050 0.000000 260 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +written 0 63 2.772589 0.000000 278 +room 1 59 2.833213 2.833213 301 +march 0 61 2.833213 0.000000 295 +undergradu 0 54 2.944439 0.000000 338 +suggest 0 53 2.944439 0.000000 331 +cover 0 55 2.944439 0.000000 329 +set 1 50 3.044522 3.044522 361 +appoint 0 49 3.044522 0.000000 358 +possibl 0 47 3.091042 0.000000 378 +could 0 46 3.091042 0.000000 383 +discuss 1 45 3.135494 3.135494 399 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +realli 0 40 3.258097 0.000000 444 +probabl 0 40 3.258097 0.000000 455 +must 0 40 3.258097 0.000000 442 +close 0 38 3.295837 0.000000 465 +winter 0 36 3.367296 0.000000 500 +soon 0 36 3.367296 0.000000 494 +short 0 36 3.367296 0.000000 499 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +richard 0 31 3.496508 0.000000 559 +particip 0 29 3.583519 0.000000 589 +rule 0 26 3.688879 0.000000 638 +background 0 25 3.737670 0.000000 664 +togeth 0 23 3.806662 0.000000 714 +william 0 22 3.850148 0.000000 765 +half 0 21 3.912023 0.000000 776 +anderson 1 19 4.007333 4.007333 860 +els 0 19 4.007333 0.000000 843 +prerequisit 0 19 4.007333 0.000000 846 +assum 0 19 4.007333 0.000000 845 +chateau 0 16 4.174387 0.000000 997 +alreadi 0 16 4.174387 0.000000 963 +quiz 0 16 4.174387 0.000000 990 +upon 0 16 4.174387 0.000000 978 +anyth 0 16 4.174387 0.000000 998 +floor 0 14 4.317488 0.000000 1070 +script 0 13 4.382027 0.000000 1171 +verifi 0 12 4.465908 0.000000 1261 +island 1 11 4.553877 4.553877 1345 +errata 0 10 4.653960 0.000000 1403 +classmat 0 9 4.753590 0.000000 1516 +equival 0 9 4.753590 0.000000 1496 +told 0 8 4.875197 0.000000 1658 +chan 0 7 5.010635 0.000000 1876 +wrong 0 6 5.164786 0.000000 2025 +lack 0 6 5.164786 0.000000 1994 +invok 0 6 5.164786 0.000000 2079 +understood 0 5 5.347108 0.000000 2364 +cancel 0 4 5.568345 0.000000 2746 +episod 0 4 5.568345 0.000000 2747 +wchan 0 3 5.857933 0.000000 3338 +preview 0 3 5.857933 0.000000 3306 +algorithmscs 0 2 6.263398 0.000000 4461 +seig 0 2 6.263398 0.000000 4462 +cubicl 0 2 6.263398 0.000000 4451 +somebodi 0 2 6.263398 0.000000 4463 +outer 0 2 6.263398 0.000000 4464 +okai 0 2 6.263398 0.000000 4465 +eduwchan 0 2 6.263398 0.000000 4435 +gilligan 1 1 6.957497 6.957497 8057 +readingtextbook 0 1 6.957497 0.000000 8058 +sapplet 0 1 6.957497 0.000000 8059 +willconsist 0 1 6.957497 0.000000 8060 +bureaucrat 0 1 6.957497 0.000000 8061 +stuffgrad 0 1 6.957497 0.000000 8062 +homeworkproblem 0 1 6.957497 0.000000 8063 +upindepend 0 1 6.957497 0.000000 8064 +betweenani 0 1 6.957497 0.000000 8065 +mustwatch 0 1 6.957497 0.000000 8066 +thatan 0 1 6.957497 0.000000 8067 +reboot 0 1 6.957497 0.000000 8068 +thatsurv 0 1 6.957497 0.000000 8069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..49d5bf61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +parallel 2 169 1.791759 3.583518 60 +algorithm 1 162 1.791759 1.791759 57 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +machin 1 129 2.079442 2.079442 95 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +topic 2 114 2.197225 4.394450 110 +theori 1 111 2.197225 2.197225 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +code 0 108 2.197225 0.000000 116 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +specif 0 106 2.197225 0.000000 106 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +real 1 93 2.397895 2.397895 144 +select 0 91 2.397895 0.000000 154 +section 0 94 2.397895 0.000000 149 +present 0 91 2.397895 0.000000 145 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +requir 0 81 2.484907 0.000000 167 +homework 2 79 2.564949 5.129898 193 +april 1 77 2.564949 2.564949 196 +refer 0 78 2.564949 0.000000 203 +come 0 78 2.564949 0.000000 202 +effici 1 73 2.639057 2.639057 233 +meet 0 72 2.639057 0.000000 229 +addit 0 74 2.639057 0.000000 228 +tuesdai 0 73 2.639057 0.000000 219 +write 0 72 2.639057 0.000000 222 +thursdai 1 70 2.708050 2.708050 241 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +descript 1 64 2.772589 2.772589 271 +collect 0 65 2.772589 0.000000 268 +copi 0 63 2.772589 0.000000 284 +plan 0 65 2.772589 0.000000 272 +result 0 65 2.772589 0.000000 281 +share 1 59 2.833213 2.833213 304 +content 0 59 2.833213 0.000000 302 +major 0 56 2.890372 0.000000 315 +special 0 56 2.890372 0.000000 320 +think 0 57 2.890372 0.000000 314 +cover 0 55 2.944439 0.000000 329 +three 0 54 2.944439 0.000000 330 +particular 0 51 2.995732 0.000000 352 +pointer 1 48 3.044522 3.044522 368 +approach 0 48 3.044522 0.000000 366 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +could 1 46 3.091042 3.091042 383 +term 0 43 3.178054 0.000000 411 +howev 1 41 3.218876 3.218876 422 +fast 0 42 3.218876 0.000000 429 +probabl 0 40 3.258097 0.000000 455 +correct 0 38 3.295837 0.000000 462 +close 0 38 3.295837 0.000000 465 +origin 0 38 3.295837 0.000000 472 +open 0 38 3.295837 0.000000 469 +connect 1 37 3.332205 3.332205 485 +expect 1 37 3.332205 3.332205 484 +cost 0 37 3.332205 0.000000 480 +feel 0 37 3.332205 0.000000 483 +next 0 34 3.401197 0.000000 517 +go 1 33 3.433987 3.433987 529 +taught 0 33 3.433987 0.000000 526 +richard 0 31 3.496508 0.000000 559 +titl 0 31 3.496508 0.000000 556 +graph 0 30 3.555348 0.000000 576 +compon 0 30 3.555348 0.000000 570 +exist 0 30 3.555348 0.000000 569 +consid 1 29 3.583519 3.583519 590 +limit 0 29 3.583519 0.000000 585 +progress 0 28 3.610918 0.000000 598 +quit 0 27 3.637586 0.000000 633 +mind 0 27 3.637586 0.000000 632 +challeng 0 26 3.688879 0.000000 653 +although 1 25 3.737670 3.737670 667 +fundament 0 25 3.737670 0.000000 661 +supercomput 0 25 3.737670 0.000000 681 +other 0 24 3.761200 0.000000 697 +sort 0 22 3.850148 0.000000 738 +emphasi 0 22 3.850148 0.000000 755 +instead 0 22 3.850148 0.000000 756 +theorem 0 21 3.912023 0.000000 786 +flexibl 0 21 3.912023 0.000000 792 +half 0 21 3.912023 0.000000 776 +nice 1 20 3.951244 3.951244 809 +anderson 1 19 4.007333 4.007333 860 +prerequisit 0 19 4.007333 0.000000 846 +spend 0 19 4.007333 0.000000 850 +prove 0 19 4.007333 0.000000 848 +four 0 18 4.060443 0.000000 905 +matrix 0 17 4.110874 0.000000 933 +interconnect 0 17 4.110874 0.000000 937 +upon 0 16 4.174387 0.000000 978 +choic 0 16 4.174387 0.000000 979 +mayb 0 15 4.248495 0.000000 1014 +indic 0 15 4.248495 0.000000 1013 +purchas 0 15 4.248495 0.000000 1030 +rank 0 14 4.317488 0.000000 1063 +latex 0 14 4.317488 0.000000 1064 +topolog 0 14 4.317488 0.000000 1089 +consider 0 14 4.317488 0.000000 1076 +happi 0 14 4.317488 0.000000 1079 +insid 0 12 4.465908 0.000000 1262 +asynchron 0 12 4.465908 0.000000 1229 +transpar 0 11 4.553877 0.000000 1325 +sens 0 11 4.553877 0.000000 1305 +motiv 0 11 4.553877 0.000000 1346 +volum 0 11 4.553877 0.000000 1347 +catalog 0 10 4.653960 0.000000 1431 +arithmet 0 10 4.653960 0.000000 1388 +tradit 0 10 4.653960 0.000000 1404 +equival 0 9 4.753590 0.000000 1496 +routin 0 9 4.753590 0.000000 1549 +foc 0 7 5.010635 0.000000 1880 +uniform 0 7 5.010635 0.000000 1845 +plu 0 6 5.164786 0.000000 2004 +consensu 0 6 5.164786 0.000000 2080 +situat 0 5 5.347108 0.000000 2365 +volunt 0 5 5.347108 0.000000 2307 +algorithmsfor 0 4 5.568345 0.000000 2748 +ullman 0 4 5.568345 0.000000 2749 +union 0 4 5.568345 0.000000 2634 +buss 0 4 5.568345 0.000000 2649 +manuscript 0 4 5.568345 0.000000 2750 +wewil 0 4 5.568345 0.000000 2688 +cheap 0 4 5.568345 0.000000 2751 +chose 0 4 5.568345 0.000000 2629 +rambl 0 3 5.857933 0.000000 3308 +crew 0 3 5.857933 0.000000 3347 +impli 0 3 5.857933 0.000000 3348 +pertain 0 3 5.857933 0.000000 3208 +andyou 0 3 5.857933 0.000000 3256 +parallelalgorithm 0 3 5.857933 0.000000 3249 +influenc 0 3 5.857933 0.000000 3349 +algorithmscs 0 2 6.263398 0.000000 4461 +algorithmi 0 2 6.263398 0.000000 4208 +simpler 0 2 6.263398 0.000000 4210 +swap 0 2 6.263398 0.000000 4466 +exception 0 2 6.263398 0.000000 4467 +bake 0 2 6.263398 0.000000 4468 +ideason 0 2 6.263398 0.000000 4469 +appointment 0 1 6.957497 0.000000 8070 +developingfast 0 1 6.957497 0.000000 8071 +theirefficaci 0 1 6.957497 0.000000 8072 +commentsabout 0 1 6.957497 0.000000 8073 +analysisfor 0 1 6.957497 0.000000 8074 +referencesfor 0 1 6.957497 0.000000 8075 +erew 0 1 6.957497 0.000000 8076 +yannakaki 0 1 6.957497 0.000000 8077 +certifi 0 1 6.957497 0.000000 8078 +likelysometh 0 1 6.957497 0.000000 8079 +martel 0 1 6.957497 0.000000 8080 +whim 0 1 6.957497 0.000000 8081 +smpc 0 1 6.957497 0.000000 8082 +lookingat 0 1 6.957497 0.000000 8083 +isnon 0 1 6.957497 0.000000 8084 +notconsid 0 1 6.957497 0.000000 8085 +indevelop 0 1 6.957497 0.000000 8086 +algorithmswhich 0 1 6.957497 0.000000 8087 +conceiv 0 1 6.957497 0.000000 8088 +goingto 0 1 6.957497 0.000000 8089 +outsidework 0 1 6.957497 0.000000 8090 +befollow 0 1 6.957497 0.000000 8091 +youcould 0 1 6.957497 0.000000 8092 +textwould 0 1 6.957497 0.000000 8093 +artof 0 1 6.957497 0.000000 8094 +mychoic 0 1 6.957497 0.000000 8095 +interestingor 0 1 6.957497 0.000000 8096 +uninterest 0 1 6.957497 0.000000 8097 +aseith 0 1 6.957497 0.000000 8098 +researchcont 0 1 6.957497 0.000000 8099 +turninto 0 1 6.957497 0.000000 8100 +andenergi 0 1 6.957497 0.000000 8101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..b48f741a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +updat 0 191 1.609438 0.000000 41 +provid 0 121 2.079442 0.000000 94 +comment 0 93 2.397895 0.000000 146 +sourc 0 77 2.564949 0.000000 201 +complex 0 64 2.772589 0.000000 269 +move 0 47 3.091042 0.000000 382 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +autumn 1 31 3.496508 3.496508 558 +ofwashington 0 22 3.850148 0.000000 766 +portion 0 16 4.174387 0.000000 971 +webmast 0 15 4.248495 0.000000 1045 +reprint 0 14 4.317488 0.000000 1097 +automata 0 13 4.382027 0.000000 1135 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +foracadem 0 5 5.347108 0.000000 2341 +accuratelyquot 0 2 6.263398 0.000000 4470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..0b6a235c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +washington 0 236 1.386294 0.000000 32 +fall 0 181 1.609438 0.000000 40 +postscript 1 131 2.079442 2.079442 90 +welcom 0 122 2.079442 0.000000 99 +instructor 0 108 2.197225 0.000000 107 +world 0 115 2.197225 0.000000 126 +final 0 116 2.197225 0.000000 108 +wide 0 84 2.484907 0.000000 185 +exam 0 86 2.484907 0.000000 169 +paul 0 38 3.295837 0.000000 471 +short 0 36 3.367296 0.000000 499 +quiz 1 16 4.174387 4.174387 990 +latex 0 14 4.317488 0.000000 1064 +hypermedia 0 12 4.465908 0.000000 1247 +documentfor 0 7 5.010635 0.000000 1865 +beam 1 5 5.347108 5.347108 2344 +automataautumn 0 1 6.957497 0.000000 8102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..f279c235 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +file 1 132 1.945910 1.945910 70 +assign 0 135 1.945910 0.000000 66 +support 0 132 1.945910 0.000000 83 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +sourc 1 77 2.564949 2.564949 201 +messag 0 76 2.564949 0.000000 212 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +free 1 73 2.639057 2.639057 224 +materi 0 75 2.639057 0.000000 221 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +window 0 68 2.708050 0.000000 242 +organ 1 65 2.772589 2.772589 265 +complex 1 64 2.772589 2.772589 269 +handout 0 64 2.772589 0.000000 263 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +three 0 54 2.944439 0.000000 330 +format 1 48 3.044522 3.044522 356 +adapt 0 46 3.091042 0.000000 387 +midterm 1 45 3.135494 3.135494 392 +textbook 0 44 3.135494 0.000000 397 +math 0 44 3.135494 0.000000 402 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +copyright 0 36 3.367296 0.000000 495 +print 0 34 3.401197 0.000000 503 +collabor 1 32 3.465736 3.465736 543 +autumn 1 31 3.496508 3.496508 558 +hard 0 30 3.555348 0.000000 563 +quit 0 27 3.637586 0.000000 633 +linux 0 27 3.637586 0.000000 631 +sent 0 22 3.850148 0.000000 763 +ofwashington 0 22 3.850148 0.000000 766 +viewer 1 21 3.912023 3.912023 787 +latest 0 21 3.912023 0.000000 785 +figur 0 18 4.060443 0.000000 903 +render 0 17 4.110874 0.000000 947 +portion 0 16 4.174387 0.000000 971 +ascii 0 15 4.248495 0.000000 1032 +webmast 0 15 4.248495 0.000000 1045 +latex 1 14 4.317488 4.317488 1064 +command 0 14 4.317488 0.000000 1083 +reprint 0 14 4.317488 0.000000 1097 +larri 1 13 4.382027 4.382027 1142 +automata 0 13 4.382027 0.000000 1135 +web 0 12 4.465908 0.000000 1249 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +errata 0 10 4.653960 0.000000 1403 +tuth 0 9 4.753590 0.000000 1519 +plain 0 9 4.753590 0.000000 1495 +imposs 0 9 4.753590 0.000000 1513 +perhap 0 8 4.875197 0.000000 1693 +legibl 1 7 5.010635 5.010635 1866 +adob 1 7 5.010635 5.010635 1873 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 1 5 5.347108 5.347108 2345 +foracadem 0 5 5.347108 0.000000 2341 +sharma 0 4 5.568345 0.000000 2752 +thecours 0 4 5.568345 0.000000 2685 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +administrivia 0 3 5.857933 0.000000 3166 +ghostview 0 3 5.857933 0.000000 3163 +accuratelyquot 0 2 6.263398 0.000000 4470 +nitin 1 1 6.957497 6.957497 8103 +staffnameemailphoneoffic 0 1 6.957497 0.000000 8104 +csmw 0 1 6.957497 0.000000 8105 +acroread 0 1 6.957497 0.000000 8106 +aavail 0 1 6.957497 0.000000 8107 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..325df010 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +implement 0 152 1.791759 0.000000 52 +first 1 140 1.945910 1.945910 71 +process 0 142 1.945910 0.000000 72 +well 1 109 2.197225 2.197225 121 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +search 1 95 2.397895 2.397895 155 +thing 0 84 2.484907 0.000000 189 +issu 1 78 2.564949 2.564949 211 +good 0 77 2.564949 0.000000 200 +june 0 79 2.564949 0.000000 214 +logic 1 71 2.639057 2.639057 230 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +involv 0 71 2.639057 0.000000 227 +test 1 66 2.708050 2.708050 252 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +thursdai 0 70 2.708050 0.000000 241 +complex 1 64 2.772589 2.772589 269 +talk 0 53 2.944439 0.000000 336 +give 0 50 3.044522 0.000000 359 +even 0 45 3.135494 0.000000 393 +autom 0 41 3.218876 0.000000 434 +examin 0 42 3.218876 0.000000 424 +theoret 1 39 3.258097 3.258097 446 +paul 0 38 3.295837 0.000000 471 +slide 0 38 3.295837 0.000000 467 +survei 0 35 3.401197 0.000000 513 +within 0 33 3.433987 0.000000 525 +autumn 0 31 3.496508 0.000000 558 +often 0 31 3.496508 0.000000 551 +consid 0 29 3.583519 0.000000 590 +compar 0 26 3.688879 0.000000 648 +strategi 1 25 3.737670 3.737670 682 +higher 1 24 3.761200 3.761200 690 +interpret 0 24 3.761200 0.000000 686 +proof 1 23 3.806662 3.806662 720 +instal 1 22 3.850148 3.850148 754 +varieti 0 22 3.850148 0.000000 740 +theorem 1 21 3.912023 3.912023 786 +vlsi 0 21 3.912023 0.000000 795 +verif 0 20 3.951244 0.000000 826 +prove 1 19 4.007333 4.007333 848 +concentr 1 18 4.060443 4.060443 906 +aid 0 18 4.060443 0.000000 904 +attempt 0 17 4.110874 0.000000 917 +moor 0 17 4.110874 0.000000 936 +choic 0 16 4.174387 0.000000 979 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +decid 0 14 4.317488 0.000000 1075 +consider 0 14 4.317488 0.000000 1076 +loew 0 12 4.465908 0.000000 1252 +statement 0 11 4.553877 0.000000 1313 +rel 0 9 4.753590 0.000000 1487 +satisfi 1 8 4.875197 4.875197 1694 +prover 1 8 4.875197 4.875197 1653 +proposit 1 5 5.347108 5.347108 2339 +beam 0 5 5.347108 0.000000 2344 +amus 0 5 5.347108 0.000000 2366 +andsoftwar 0 4 5.568345 0.000000 2753 +tester 0 4 5.568345 0.000000 2754 +theoremprov 0 3 5.857933 0.000000 3298 +theoryand 0 3 5.857933 0.000000 3350 +scatter 0 3 5.857933 0.000000 3351 +truthof 0 1 6.957497 0.000000 8108 +casea 0 1 6.957497 0.000000 8109 +flip 0 1 6.957497 0.000000 8110 +oftheorem 0 1 6.957497 0.000000 8111 +finitedomain 0 1 6.957497 0.000000 8112 +thesequest 0 1 6.957497 0.000000 8113 +complexityand 0 1 6.957497 0.000000 8114 +anumb 0 1 6.957497 0.000000 8115 +urquhart 0 1 6.957497 0.000000 8116 +sato 0 1 6.957497 0.000000 8117 +andboy 0 1 6.957497 0.000000 8118 +gsat 0 1 6.957497 0.000000 8119 +thedirectori 0 1 6.957497 0.000000 8120 +proversther 0 1 6.957497 0.000000 8121 +ofinstal 0 1 6.957497 0.000000 8122 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..ef30697a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +confer 0 126 2.079442 0.000000 100 +topic 0 114 2.197225 0.000000 110 +solut 0 82 2.484907 0.000000 162 +mondai 0 77 2.564949 0.000000 206 +meet 0 72 2.639057 0.000000 229 +wednesdai 0 64 2.772589 0.000000 261 +fridai 0 44 3.135494 0.000000 390 +packag 0 28 3.610918 0.000000 614 +measur 0 28 3.610918 0.000000 609 +pagecs 0 26 3.688879 0.000000 658 +sigmetr 0 13 4.382027 0.000000 1173 +loew 0 12 4.465908 0.000000 1252 +host 0 11 4.553877 0.000000 1306 +queue 0 10 4.653960 0.000000 1386 +systemperform 0 1 6.957497 0.000000 8123 +modelingspr 0 1 6.957497 0.000000 8124 +lazowskaandmaryvernonwelcom 0 1 6.957497 0.000000 8125 +performancemodel 0 1 6.957497 0.000000 8126 +hourstent 0 1 6.957497 0.000000 8127 +schedulecom 0 1 6.957497 0.000000 8128 +goingsassignmentsproject 0 1 6.957497 0.000000 8129 +informationmap 0 1 6.957497 0.000000 8130 +emailoth 0 1 6.957497 0.000000 8131 +computersystemsuw 0 1 6.957497 0.000000 8132 +engineeringlazowska 0 1 6.957497 0.000000 8133 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..3cc1d251 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +applic 0 170 1.791759 0.000000 56 +architectur 1 139 1.945910 1.945910 77 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +postscript 1 131 2.079442 2.079442 90 +tool 1 117 2.079442 2.079442 93 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +code 0 108 2.197225 0.000000 116 +user 0 104 2.302585 0.000000 137 +center 0 88 2.397895 0.000000 158 +info 1 85 2.484907 2.484907 176 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +simul 1 66 2.708050 2.708050 255 +sieg 1 69 2.708050 2.708050 260 +test 0 66 2.708050 0.000000 252 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +set 0 50 3.044522 0.000000 361 +execut 0 45 3.135494 0.000000 404 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +continu 0 39 3.258097 0.000000 448 +close 0 38 3.295837 0.000000 465 +manual 0 35 3.401197 0.000000 504 +multiprocessor 0 28 3.610918 0.000000 605 +binari 0 20 3.951244 0.000000 823 +histori 0 19 4.007333 0.000000 853 +benchmark 0 19 4.007333 0.000000 859 +analyz 0 17 4.110874 0.000000 925 +monitor 0 17 4.110874 0.000000 941 +rate 0 15 4.248495 0.000000 1037 +driven 0 15 4.248495 0.000000 1048 +neat 0 12 4.465908 0.000000 1263 +alpha 1 11 4.553877 4.553877 1348 +tuth 0 9 4.753590 0.000000 1519 +egger 1 8 4.875197 4.875197 1695 +uniprocessor 1 8 4.875197 4.875197 1696 +spec 0 8 4.875197 0.000000 1640 +sparc 1 7 5.010635 5.010635 1860 +shade 0 7 5.010635 0.000000 1881 +pentium 1 6 5.164786 5.164786 2077 +tullsen 0 6 5.164786 0.000000 2081 +superscalar 0 6 5.164786 0.000000 2082 +rewrit 0 5 5.347108 0.000000 2367 +etch 0 4 5.568345 0.000000 2755 +redston 1 3 5.857933 5.857933 3332 +specmark 0 2 6.263398 0.000000 4471 +atom 0 2 6.263398 0.000000 4472 +multiflow 0 2 6.263398 0.000000 4473 +powerpc 0 2 6.263398 0.000000 4238 +architecturewint 0 1 6.957497 0.000000 8134 +instructorsusan 0 1 6.957497 0.000000 8135 +tajoshua 0 1 6.957497 0.000000 8136 +instuct 0 1 6.957497 0.000000 8137 +pixi 0 1 6.957497 0.000000 8138 +dinero 0 1 6.957497 0.000000 8139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..105a2229 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +mail 1 238 1.386294 1.386294 22 +washington 0 236 1.386294 0.000000 32 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +spring 0 131 2.079442 0.000000 88 +confer 0 126 2.079442 0.000000 100 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +instructor 1 108 2.197225 2.197225 107 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +april 1 77 2.564949 2.564949 196 +meet 0 72 2.639057 0.000000 229 +room 0 59 2.833213 0.000000 301 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +announc 0 40 3.258097 0.000000 441 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +ad 0 32 3.465736 0.000000 544 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +unit 0 21 3.912023 0.000000 779 +chateau 0 16 4.174387 0.000000 997 +levi 0 14 4.317488 0.000000 1093 +hank 0 12 4.465908 0.000000 1253 +hypermedia 0 12 4.465908 0.000000 1247 +readi 0 12 4.465908 0.000000 1242 +pighin 1 4 5.568345 5.568345 2735 +thisdocu 1 3 5.857933 5.857933 3336 +freder 0 3 5.857933 0.000000 3352 +iti 0 2 6.263398 0.000000 4066 +forcs 0 1 6.957497 0.000000 8140 +classmessag 0 1 6.957497 0.000000 8141 +projectlevi 0 1 6.957497 0.000000 8142 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..5b15aab5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +read 0 154 1.791759 0.000000 47 +base 0 165 1.791759 0.000000 50 +year 1 148 1.945910 1.945910 84 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +professor 0 137 1.945910 0.000000 76 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +document 1 121 2.079442 2.079442 89 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +grade 0 90 2.397895 0.000000 142 +imag 0 91 2.397895 0.000000 161 +follow 0 92 2.397895 0.000000 143 +help 1 83 2.484907 2.484907 175 +wide 0 84 2.484907 0.000000 185 +solut 0 82 2.484907 0.000000 162 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +addit 0 74 2.639057 0.000000 228 +html 0 75 2.639057 0.000000 235 +degre 1 69 2.708050 2.708050 259 +syllabu 0 67 2.708050 0.000000 247 +test 0 66 2.708050 0.000000 252 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +colleg 1 61 2.833213 2.833213 300 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +instruct 0 53 2.944439 0.000000 332 +cool 0 49 3.044522 0.000000 374 +visitor 0 49 3.044522 0.000000 371 +basic 0 50 3.044522 0.000000 360 +quarter 0 47 3.091042 0.000000 389 +get 0 46 3.091042 0.000000 380 +keep 0 44 3.135494 0.000000 409 +offer 1 43 3.178054 3.178054 414 +art 0 29 3.583519 0.000000 593 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +pagecs 0 26 3.688879 0.000000 658 +hypertext 0 19 4.007333 0.000000 865 +charact 0 15 4.248495 0.000000 1028 +hypermedia 0 12 4.465908 0.000000 1247 +mosaic 1 10 4.653960 4.653960 1426 +uniform 0 7 5.010635 0.000000 1845 +markup 0 6 5.164786 0.000000 2059 +whichcontain 0 4 5.568345 0.000000 2714 +wealth 0 3 5.857933 0.000000 3353 +thatthi 0 2 6.263398 0.000000 4379 +addedfrequ 0 2 6.263398 0.000000 4380 +deros 0 2 6.263398 0.000000 4474 +indi 0 2 6.263398 0.000000 4431 +mvi 0 2 6.263398 0.000000 4382 +usinglynx 0 2 6.263398 0.000000 4383 +graphicsautumn 0 1 6.957497 0.000000 8143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..b3231378 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +class 2 199 1.609438 3.218876 37 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +recent 0 167 1.791759 0.000000 58 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +site 0 106 2.197225 0.000000 119 +book 1 99 2.302585 2.302585 131 +text 0 98 2.302585 0.000000 133 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +exam 0 86 2.484907 0.000000 169 +academ 0 82 2.484907 0.000000 178 +homework 2 79 2.564949 5.129898 193 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +tuesdai 0 73 2.639057 0.000000 219 +nation 0 74 2.639057 0.000000 240 +sieg 1 69 2.708050 2.708050 260 +syllabu 0 67 2.708050 0.000000 247 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +wednesdai 1 64 2.772589 2.772589 261 +import 0 65 2.772589 0.000000 282 +handout 0 64 2.772589 0.000000 263 +cover 0 55 2.944439 0.000000 329 +digit 0 52 2.995732 0.000000 348 +principl 0 48 3.044522 0.000000 357 +still 0 50 3.044522 0.000000 362 +quarter 0 47 3.091042 0.000000 389 +adapt 0 46 3.091042 0.000000 387 +fridai 1 44 3.135494 3.135494 390 +combin 1 42 3.218876 3.218876 421 +review 0 42 3.218876 0.000000 425 +announc 0 40 3.258097 0.000000 441 +paul 1 38 3.295837 3.295837 471 +credit 0 38 3.295837 0.000000 460 +hand 0 37 3.332205 0.000000 475 +purpos 0 37 3.332205 0.000000 481 +staff 1 36 3.367296 3.367296 490 +copyright 0 36 3.367296 0.000000 495 +board 0 33 3.433987 0.000000 528 +quot 0 29 3.583519 0.000000 582 +accur 0 25 3.737670 0.000000 680 +begin 2 23 3.806662 7.613324 716 +sequenti 1 22 3.850148 3.850148 745 +vlsi 0 21 3.912023 0.000000 795 +synthesi 0 20 3.951244 0.000000 834 +mostli 0 19 4.007333 0.000000 869 +sheet 1 16 4.174387 4.174387 973 +dilbert 0 16 4.174387 0.000000 996 +portion 0 16 4.174387 0.000000 971 +carl 1 15 4.248495 4.248495 1024 +comic 0 14 4.317488 0.000000 1103 +reprint 0 14 4.317488 0.000000 1097 +larri 1 13 4.382027 4.382027 1142 +loew 0 12 4.465908 0.000000 1252 +duli 0 12 4.465908 0.000000 1248 +nonprofit 0 11 4.553877 0.000000 1339 +fpga 1 10 4.653960 4.653960 1433 +franklin 0 10 4.653960 0.000000 1436 +motorola 0 9 4.753590 0.000000 1546 +mother 0 6 5.164786 0.000000 2083 +philip 0 6 5.164786 0.000000 2005 +ebel 1 4 5.568345 5.568345 2756 +mcmurchi 0 4 5.568345 0.000000 2757 +murphi 0 4 5.568345 0.000000 2737 +semiconductor 0 3 5.857933 0.000000 3339 +semiconduct 0 3 5.857933 0.000000 3340 +micron 0 3 5.857933 0.000000 3341 +hine 0 2 6.263398 0.000000 4475 +guru 0 2 6.263398 0.000000 4476 +comprehensivelist 0 2 6.263398 0.000000 4439 +icmanufactur 0 2 6.263398 0.000000 4440 +hineskj 0 1 6.957497 0.000000 8144 +pamett 0 1 6.957497 0.000000 8145 +groupsfin 0 1 6.957497 0.000000 8146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..dbc122b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +topic 0 114 2.197225 0.000000 110 +question 1 91 2.397895 2.397895 141 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +build 0 85 2.484907 0.000000 184 +messag 0 76 2.564949 0.000000 212 +intellig 0 72 2.639057 0.000000 225 +sieg 1 69 2.708050 2.708050 260 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +plan 0 65 2.772589 0.000000 272 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +past 0 42 3.218876 0.000000 428 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +pagecs 0 26 3.688879 0.000000 658 +fundament 0 25 3.737670 0.000000 661 +methodolog 0 23 3.806662 0.000000 733 +outlin 0 17 4.110874 0.000000 914 +nick 0 13 4.382027 0.000000 1180 +pose 0 9 4.753590 0.000000 1535 +depth 0 8 4.875197 0.000000 1636 +marc 0 8 4.875197 0.000000 1680 +uncertainti 0 7 5.010635 0.000000 1882 +machinelearn 0 6 5.164786 0.000000 2084 +anin 0 3 5.857933 0.000000 3354 +assignmentsassign 0 3 5.857933 0.000000 3342 +mailinglist 0 3 5.857933 0.000000 3325 +intelligencefal 0 2 6.263398 0.000000 4477 +andchalleng 0 2 6.263398 0.000000 4478 +intelligentmachin 0 2 6.263398 0.000000 4479 +agentarchitectur 0 2 6.263398 0.000000 4480 +weldweld 0 2 6.263398 0.000000 4481 +friedmanfriedman 0 2 6.263398 0.000000 4482 +kushmericknick 0 2 6.263398 0.000000 4483 +examsgradingresourcesth 0 2 6.263398 0.000000 4484 +topicsread 0 1 6.957497 0.000000 8147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..341ae736 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +email 0 220 1.386294 0.000000 29 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +updat 0 191 1.609438 0.000000 41 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +postscript 0 131 2.079442 0.000000 90 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +book 1 99 2.302585 2.302585 131 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +graphic 0 90 2.397895 0.000000 147 +librari 1 87 2.484907 2.484907 181 +member 0 84 2.484907 0.000000 165 +requir 0 81 2.484907 0.000000 167 +refer 1 78 2.564949 2.564949 203 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +intellig 1 72 2.639057 2.639057 225 +summari 1 73 2.639057 2.639057 237 +materi 0 75 2.639057 0.000000 221 +logic 0 71 2.639057 0.000000 230 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +artifici 0 63 2.772589 0.000000 280 +collect 0 65 2.772589 0.000000 268 +foundat 0 62 2.772589 0.000000 286 +copi 0 63 2.772589 0.000000 284 +written 0 63 2.772589 0.000000 278 +juli 0 60 2.833213 0.000000 305 +reason 1 57 2.890372 2.890372 318 +sever 1 56 2.890372 2.890372 322 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +without 0 50 3.044522 0.000000 370 +math 1 44 3.135494 3.135494 402 +around 0 43 3.178054 0.000000 415 +edit 0 42 3.218876 0.000000 418 +probabl 1 40 3.258097 3.258097 455 +winter 0 36 3.367296 0.000000 500 +statist 0 35 3.401197 0.000000 521 +go 0 33 3.433987 0.000000 529 +chapter 0 32 3.465736 0.000000 536 +steve 0 29 3.583519 0.000000 594 +though 0 27 3.637586 0.000000 622 +request 0 26 3.688879 0.000000 635 +decis 1 23 3.806662 3.806662 728 +nice 1 20 3.951244 3.951244 809 +expert 0 20 3.951244 0.000000 833 +definit 0 19 4.007333 0.000000 864 +goe 0 15 4.248495 0.000000 1044 +signific 0 13 4.382027 0.000000 1125 +hank 1 12 4.465908 4.465908 1253 +probabilist 1 11 4.553877 4.553877 1343 +extrem 0 11 4.553877 0.000000 1330 +worth 0 11 4.553877 0.000000 1294 +perspect 0 10 4.653960 0.000000 1437 +uncertainti 1 7 5.010635 5.010635 1882 +whenev 0 7 5.010635 0.000000 1883 +heavi 0 7 5.010635 0.000000 1841 +secondari 0 7 5.010635 0.000000 1884 +histor 1 6 5.164786 5.164786 2085 +arrang 0 6 5.164786 0.000000 2023 +overlap 0 5 5.347108 0.000000 2368 +uncertain 0 4 5.568345 0.000000 2758 +cash 0 3 5.857933 0.000000 3355 +grail 0 3 5.857933 0.000000 3356 +alon 0 3 5.857933 0.000000 3139 +pearl 1 2 6.263398 6.263398 4485 +bui 0 2 6.263398 0.000000 4486 +algorithmsa 0 2 6.263398 0.000000 4487 +systemsthi 0 1 6.957497 0.000000 8148 +strappedfor 0 1 6.957497 0.000000 8149 +shafer 0 1 6.957497 0.000000 8150 +reasoningthi 0 1 6.957497 0.000000 8151 +jayn 0 1 6.957497 0.000000 8152 +fragmentari 0 1 6.957497 0.000000 8153 +foundationsof 0 1 6.957497 0.000000 8154 +beautifulli 0 1 6.957497 0.000000 8155 +neapolitan 0 1 6.957497 0.000000 8156 +propagationalgorithm 0 1 6.957497 0.000000 8157 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..cbb5fe48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,272 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +cours 2 273 1.098612 2.197224 15 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +class 1 199 1.609438 1.609438 37 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +applic 1 170 1.791759 1.791759 56 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +problem 0 147 1.945910 0.000000 75 +document 1 121 2.079442 2.079442 89 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +topic 0 114 2.197225 0.000000 110 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +need 0 98 2.302585 0.000000 135 +imag 1 91 2.397895 2.397895 161 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +environ 1 84 2.484907 2.484907 177 +help 1 83 2.484907 2.484907 175 +contain 1 81 2.484907 2.484907 174 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +wide 0 84 2.484907 0.000000 185 +second 0 81 2.484907 0.000000 166 +educ 0 86 2.484907 0.000000 191 +level 0 87 2.484907 0.000000 180 +learn 0 86 2.484907 0.000000 170 +requir 0 81 2.484907 0.000000 167 +librari 0 87 2.484907 0.000000 181 +resourc 0 81 2.484907 0.000000 172 +mondai 2 77 2.564949 5.129898 206 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +messag 0 76 2.564949 0.000000 212 +know 0 80 2.564949 0.000000 198 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +name 0 72 2.639057 0.000000 220 +sieg 1 69 2.708050 2.708050 260 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +wednesdai 1 64 2.772589 2.772589 261 +copi 1 63 2.772589 2.772589 284 +experi 1 64 2.772589 2.772589 283 +plan 1 65 2.772589 2.772589 272 +import 0 65 2.772589 0.000000 282 +laboratori 0 63 2.772589 0.000000 292 +march 1 61 2.833213 2.833213 295 +room 0 59 2.833213 0.000000 301 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +local 1 55 2.944439 2.944439 334 +undergradu 1 54 2.944439 2.944439 338 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +fridai 2 44 3.135494 6.270988 390 +midterm 1 45 3.135494 3.135494 392 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +review 0 42 3.218876 0.000000 425 +vision 0 41 3.218876 0.000000 430 +tutori 1 39 3.258097 3.258097 437 +announc 0 40 3.258097 0.000000 441 +form 0 39 3.258097 0.000000 443 +correct 0 38 3.295837 0.000000 462 +slide 0 38 3.295837 0.000000 467 +workstat 0 37 3.332205 0.000000 479 +copyright 1 36 3.367296 3.367296 495 +short 0 36 3.367296 0.000000 499 +especi 0 36 3.367296 0.000000 496 +ofth 0 36 3.367296 0.000000 491 +next 1 34 3.401197 3.401197 517 +least 0 35 3.401197 0.000000 516 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +articl 1 33 3.433987 3.433987 530 +chapter 1 32 3.465736 3.465736 536 +ad 0 32 3.465736 0.000000 544 +transform 0 32 3.465736 0.000000 542 +turn 0 29 3.583519 0.000000 586 +univ 0 28 3.610918 0.000000 617 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +determin 0 27 3.637586 0.000000 630 +pagecs 0 26 3.688879 0.000000 658 +effort 0 26 3.688879 0.000000 652 +notic 0 25 3.737670 0.000000 675 +pattern 0 24 3.761200 0.000000 689 +store 0 24 3.761200 0.000000 693 +displai 0 23 3.806662 0.000000 712 +variabl 0 23 3.806662 0.000000 715 +recognit 0 23 3.806662 0.000000 723 +instal 1 22 3.850148 3.850148 754 +period 0 22 3.850148 0.000000 743 +path 1 21 3.912023 3.912023 778 +navig 0 21 3.912023 0.000000 796 +exercis 0 19 4.007333 0.000000 842 +comparison 0 19 4.007333 0.000000 863 +account 1 18 4.060443 4.060443 882 +appropri 0 18 4.060443 0.000000 883 +statu 0 18 4.060443 0.000000 885 +outlin 1 17 4.110874 4.110874 914 +regular 0 17 4.110874 0.000000 929 +intro 0 17 4.110874 0.000000 915 +sign 1 16 4.174387 4.174387 970 +georg 0 16 4.174387 0.000000 994 +spatial 0 16 4.174387 0.000000 988 +fourth 0 16 4.174387 0.000000 999 +permit 0 16 4.174387 0.000000 962 +alreadi 0 16 4.174387 0.000000 963 +earli 0 16 4.174387 0.000000 968 +overhead 0 15 4.248495 0.000000 1035 +floor 0 14 4.317488 0.000000 1070 +menu 1 13 4.382027 4.382027 1156 +resolut 0 13 4.382027 0.000000 1172 +introduc 0 13 4.382027 0.000000 1139 +care 0 13 4.382027 0.000000 1177 +hypermedia 0 12 4.465908 0.000000 1247 +noth 0 11 4.553877 0.000000 1328 +transpar 0 11 4.553877 0.000000 1325 +fill 0 11 4.553877 0.000000 1349 +thedepart 0 11 4.553877 0.000000 1350 +sundai 0 10 4.653960 0.000000 1387 +packet 0 10 4.653960 0.000000 1415 +prior 0 10 4.653960 0.000000 1438 +card 0 10 4.653960 0.000000 1435 +mosaic 0 10 4.653960 0.000000 1426 +login 1 9 4.753590 4.753590 1550 +classifi 0 9 4.753590 0.000000 1537 +pick 0 9 4.753590 0.000000 1498 +lock 0 9 4.753590 0.000000 1551 +ideal 0 8 4.875197 0.000000 1630 +evan 0 8 4.875197 0.000000 1633 +documentfor 0 7 5.010635 0.000000 1865 +remind 0 7 5.010635 0.000000 1799 +reed 1 6 5.164786 5.164786 2086 +arrang 1 6 5.164786 5.164786 2023 +theclass 0 6 5.164786 0.000000 2060 +mock 0 6 5.164786 0.000000 2087 +otherthan 0 6 5.164786 0.000000 2009 +conveni 0 6 5.164786 0.000000 2088 +onto 0 6 5.164786 0.000000 2089 +approv 0 6 5.164786 0.000000 2078 +temporari 0 6 5.164786 0.000000 2090 +contract 0 6 5.164786 0.000000 1985 +pentium 0 6 5.164786 0.000000 2077 +newinform 0 5 5.347108 0.000000 2342 +subjectto 0 5 5.347108 0.000000 2369 +ahead 0 5 5.347108 0.000000 2338 +cshrc 0 4 5.568345 0.000000 2759 +assignmentsand 0 4 5.568345 0.000000 2760 +cvpr 0 4 5.568345 0.000000 2761 +net 0 4 5.568345 0.000000 2741 +accompani 0 4 5.568345 0.000000 2666 +password 0 4 5.568345 0.000000 2594 +kept 0 4 5.568345 0.000000 2762 +insieg 0 3 5.857933 0.000000 3331 +weekend 0 3 5.857933 0.000000 3357 +khoro 2 2 6.263398 12.526796 4488 +cantata 1 2 6.263398 6.263398 4489 +sun 1 2 6.263398 6.263398 4490 +setenv 1 2 6.263398 6.263398 4491 +pmin 0 2 6.263398 0.000000 4492 +combinationof 0 2 6.263398 0.000000 4081 +includingth 0 2 6.263398 0.000000 4493 +onthursdai 0 2 6.263398 0.000000 4425 +itemsund 0 2 6.263398 0.000000 4387 +balloon 0 2 6.263398 0.000000 4388 +khoros_hom 1 1 6.957497 6.957497 8158 +msvc 1 1 6.957497 6.957497 8159 +rene 1 1 6.957497 6.957497 8160 +understandingwelcom 0 1 6.957497 0.000000 8161 +doexercis 0 1 6.957497 0.000000 8162 +torun 0 1 6.957497 0.000000 8163 +aslillith 0 1 6.957497 0.000000 8164 +containxhost 0 1 6.957497 0.000000 8165 +lilliththen 0 1 6.957497 0.000000 8166 +manpath 0 1 6.957497 0.000000 8167 +rlogin 0 1 6.957497 0.000000 8168 +lillith 0 1 6.957497 0.000000 8169 +rhost 0 1 6.957497 0.000000 8170 +typecantata 0 1 6.957497 0.000000 8171 +prompt 0 1 6.957497 0.000000 8172 +haskhoro 0 1 6.957497 0.000000 8173 +wwwhttp 0 1 6.957497 0.000000 8174 +htmland 0 1 6.957497 0.000000 8175 +itscours 0 1 6.957497 0.000000 8176 +twotop 0 1 6.957497 0.000000 8177 +pagesand 0 1 6.957497 0.000000 8178 +huerta 0 1 6.957497 0.000000 8179 +andnevatia 0 1 6.957497 0.000000 8180 +tolook 0 1 6.957497 0.000000 8181 +wolff 0 1 6.957497 0.000000 8182 +onneur 0 1 6.957497 0.000000 8183 +trainabl 0 1 6.957497 0.000000 8184 +ofmatlab 0 1 6.957497 0.000000 8185 +requirethat 0 1 6.957497 0.000000 8186 +mclain 0 1 6.957497 0.000000 8187 +documentexplain 0 1 6.957497 0.000000 8188 +withkhoro 0 1 6.957497 0.000000 8189 +accesskhoro 0 1 6.957497 0.000000 8190 +youraccount 0 1 6.957497 0.000000 8191 +itov 0 1 6.957497 0.000000 8192 +arelimit 0 1 6.957497 0.000000 8193 +andsh 0 1 6.957497 0.000000 8194 +knock 0 1 6.957497 0.000000 8195 +orhav 0 1 6.957497 0.000000 8196 +willhav 0 1 6.957497 0.000000 8197 +delft 0 1 6.957497 0.000000 8198 +brochur 0 1 6.957497 0.000000 8199 +brochuremosa 0 1 6.957497 0.000000 8200 +macmosa 0 1 6.957497 0.000000 8201 +itemund 0 1 6.957497 0.000000 8202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..c929df0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +avail 0 169 1.791759 0.000000 48 +architectur 1 139 1.945910 1.945910 77 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +schedul 1 119 2.079442 2.079442 85 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +present 0 91 2.397895 0.000000 145 +center 0 88 2.397895 0.000000 158 +novemb 2 81 2.484907 4.969814 179 +start 1 83 2.484907 2.484907 173 +exam 1 86 2.484907 2.484907 169 +wide 0 84 2.484907 0.000000 185 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +info 0 85 2.484907 0.000000 176 +resourc 0 81 2.484907 0.000000 172 +decemb 1 80 2.564949 2.564949 215 +complet 1 77 2.564949 2.564949 208 +good 0 77 2.564949 0.000000 200 +orient 0 80 2.564949 0.000000 205 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +thursdai 1 70 2.708050 2.708050 241 +view 0 70 2.708050 0.000000 254 +sieg 0 69 2.708050 0.000000 260 +prof 0 64 2.772589 0.000000 273 +virtual 0 62 2.772589 0.000000 285 +written 0 63 2.772589 0.000000 278 +descript 0 64 2.772589 0.000000 271 +collect 0 65 2.772589 0.000000 268 +import 0 65 2.772589 0.000000 282 +wednesdai 0 64 2.772589 0.000000 261 +room 0 59 2.833213 0.000000 301 +overview 1 56 2.890372 2.890372 323 +found 0 53 2.944439 0.000000 337 +week 1 52 2.995732 2.995732 343 +digit 0 52 2.995732 0.000000 348 +approach 0 48 3.044522 0.000000 366 +keep 0 44 3.135494 0.000000 409 +discuss 0 45 3.135494 0.000000 399 +made 0 44 3.135494 0.000000 398 +fridai 0 44 3.135494 0.000000 390 +term 1 43 3.178054 3.178054 411 +review 0 42 3.218876 0.000000 425 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +hand 0 37 3.332205 0.000000 475 +copyright 1 36 3.367296 3.367296 495 +short 0 36 3.367296 0.000000 499 +ofth 0 36 3.367296 0.000000 491 +ad 0 32 3.465736 0.000000 544 +titl 0 31 3.496508 0.000000 556 +neural 1 30 3.555348 3.555348 578 +scale 0 28 3.610918 0.000000 613 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +symbol 0 27 3.637586 0.000000 620 +arrai 0 27 3.637586 0.000000 627 +pagecs 0 26 3.688879 0.000000 658 +session 0 26 3.688879 0.000000 643 +supercomput 1 25 3.737670 3.737670 681 +notic 0 25 3.737670 0.000000 675 +begin 1 23 3.806662 3.806662 716 +recognit 0 23 3.806662 0.000000 723 +dai 0 22 3.850148 0.000000 753 +instal 0 22 3.850148 0.000000 754 +half 0 21 3.912023 0.000000 776 +theunivers 0 21 3.912023 0.000000 797 +demo 0 18 4.060443 0.000000 888 +segment 0 17 4.110874 0.000000 931 +intel 1 16 4.174387 4.174387 1000 +normal 1 16 4.174387 4.174387 995 +brief 0 16 4.174387 0.000000 1001 +permit 0 16 4.174387 0.000000 962 +embed 1 14 4.317488 4.317488 1102 +hong 0 14 4.317488 0.000000 1105 +heterogen 0 14 4.317488 0.000000 1090 +canada 0 13 4.382027 0.000000 1158 +guest 1 12 4.465908 4.465908 1220 +hypermedia 0 12 4.465908 0.000000 1247 +onth 0 12 4.465908 0.000000 1218 +mesh 0 11 4.553877 0.000000 1351 +simon 0 8 4.875197 0.000000 1697 +documentfor 0 7 5.010635 0.000000 1865 +sweden 0 7 5.010635 0.000000 1885 +friedman 0 7 5.010635 0.000000 1886 +theclass 0 6 5.164786 0.000000 2060 +conveni 0 6 5.164786 0.000000 2088 +otherthan 0 6 5.164786 0.000000 2009 +speaker 1 5 5.347108 5.347108 2370 +newinform 0 5 5.347108 0.000000 2342 +templat 0 5 5.347108 0.000000 2311 +subjectto 0 5 5.347108 0.000000 2369 +tennesse 0 4 5.568345 0.000000 2763 +pyramid 1 3 5.857933 5.857933 3358 +paragon 1 3 5.857933 5.857933 3359 +simd 0 3 5.857933 0.000000 3360 +mimd 0 3 5.857933 0.000000 3361 +icon 0 3 5.857933 0.000000 3362 +neal 0 3 5.857933 0.000000 3184 +maspar 1 2 6.263398 6.263398 4279 +informationon 0 2 6.263398 0.000000 4232 +burt 0 2 6.263398 0.000000 4494 +rosenfeld 0 2 6.263398 0.000000 4495 +inon 0 2 6.263398 0.000000 4496 +processingwelcom 0 1 6.957497 0.000000 8203 +hourearli 0 1 6.957497 0.000000 8204 +nian 0 1 6.957497 0.000000 8205 +fraser 0 1 6.957497 0.000000 8206 +burnabi 0 1 6.957497 0.000000 8207 +bharath 0 1 6.957497 0.000000 8208 +modayur 0 1 6.957497 0.000000 8209 +invariantoper 0 1 6.957497 0.000000 8210 +hierarchicalrelax 0 1 6.957497 0.000000 8211 +isodata 0 1 6.957497 0.000000 8212 +treatment 0 1 6.957497 0.000000 8213 +topicsdur 0 1 6.957497 0.000000 8214 +activelyexplor 0 1 6.957497 0.000000 8215 +writeupsi 0 1 6.957497 0.000000 8216 +resourcespvm 0 1 6.957497 0.000000 8217 +virtualmachin 0 1 6.957497 0.000000 8218 +layear 0 1 6.957497 0.000000 8219 +aviru 0 1 6.957497 0.000000 8220 +moreworkst 0 1 6.957497 0.000000 8221 +studydistribut 0 1 6.957497 0.000000 8222 +technicalpubl 0 1 6.957497 0.000000 8223 +paragonparallel 0 1 6.957497 0.000000 8224 +variousvendor 0 1 6.957497 0.000000 8225 +correctionsto 0 1 6.957497 0.000000 8226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..c7aa1752 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +paper 1 205 1.609438 1.609438 38 +modifi 0 178 1.609438 0.000000 35 +data 0 170 1.791759 0.000000 49 +find 0 111 2.197225 0.000000 111 +graphic 1 90 2.397895 2.397895 147 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +optim 1 79 2.564949 2.564949 197 +exampl 1 77 2.564949 2.564949 195 +solv 0 73 2.639057 0.000000 234 +differ 0 66 2.708050 0.000000 253 +numer 0 49 3.044522 0.000000 369 +linear 1 41 3.218876 3.218876 431 +global 0 34 3.401197 0.000000 520 +valu 0 25 3.737670 0.000000 665 +mike 0 24 3.761200 0.000000 703 +equat 1 23 3.806662 3.806662 724 +properti 0 22 3.850148 0.000000 749 +definit 0 19 4.007333 0.000000 864 +eric 0 19 4.007333 0.000000 870 +element 0 18 4.060443 0.000000 895 +intro 1 17 4.110874 4.110874 915 +matrix 1 17 4.110874 4.110874 933 +differenti 0 17 4.110874 0.000000 921 +adam 0 17 4.110874 0.000000 934 +finit 1 14 4.317488 4.317488 1106 +nonlinear 0 14 4.317488 0.000000 1107 +chuck 0 14 4.317488 0.000000 1108 +discret 0 13 4.382027 0.000000 1165 +jonathan 0 13 4.382027 0.000000 1174 +brad 1 12 4.465908 4.465908 1264 +daniel 0 12 4.465908 0.000000 1233 +decomposit 0 10 4.653960 0.000000 1439 +arithmet 0 10 4.653960 0.000000 1388 +kevin 0 9 4.753590 0.000000 1482 +joel 0 8 4.875197 0.000000 1698 +root 0 8 4.875197 0.000000 1650 +constrain 0 6 5.164786 0.000000 2042 +fred 0 6 5.164786 0.000000 2072 +fit 0 5 5.347108 0.000000 2285 +invers 0 4 5.568345 0.000000 2764 +corei 0 4 5.568345 0.000000 2718 +eigenvalu 0 3 5.857933 0.000000 3364 +eigenvector 0 3 5.857933 0.000000 3365 +singular 0 3 5.857933 0.000000 3366 +conclus 0 3 5.857933 0.000000 3367 +ordinari 0 3 5.857933 0.000000 3233 +interv 0 3 5.857933 0.000000 3253 +quadrat 1 2 6.263398 6.263398 4497 +shuichi 0 2 6.263398 0.000000 4498 +unconstrain 0 2 6.263398 0.000000 4499 +kari 0 2 6.263398 0.000000 4500 +regress 0 2 6.263398 0.000000 4501 +calibr 0 2 6.263398 0.000000 4502 +joanna 0 2 6.263398 0.000000 4503 +radios 0 2 6.263398 0.000000 4504 +pde 0 2 6.263398 0.000000 4505 +seminarc 0 1 6.957497 0.000000 8228 +rspring 0 1 6.957497 0.000000 8229 +ronen 0 1 6.957497 0.000000 8230 +troi 0 1 6.957497 0.000000 8231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..b1c5a0c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +process 1 142 1.945910 1.945910 72 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +site 0 106 2.197225 0.000000 119 +specif 0 106 2.197225 0.000000 106 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +materi 0 75 2.639057 0.000000 221 +view 0 70 2.708050 0.000000 254 +experi 1 64 2.772589 2.772589 283 +septemb 0 65 2.772589 0.000000 274 +special 0 56 2.890372 0.000000 320 +explor 0 58 2.890372 0.000000 324 +quarter 1 47 3.091042 3.091042 389 +seminar 0 38 3.295837 0.000000 470 +copyright 1 36 3.367296 3.367296 495 +winter 0 36 3.367296 0.000000 500 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +notic 0 25 3.737670 0.000000 675 +instal 0 22 3.850148 0.000000 754 +varieti 0 22 3.850148 0.000000 740 +tanimoto 1 10 4.653960 4.653960 1429 +transcript 0 6 5.164786 0.000000 2067 +otherthan 0 6 5.164786 0.000000 2009 +subjectto 0 5 5.347108 0.000000 2369 +useof 0 3 5.857933 0.000000 3368 +quarterscs 0 1 6.957497 0.000000 8232 +topicssteven 0 1 6.957497 0.000000 8233 +instructorcs 0 1 6.957497 0.000000 8234 +varyfrom 0 1 6.957497 0.000000 8235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..38f472e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +paper 2 205 1.609438 3.218876 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +read 2 154 1.791759 3.583518 47 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +first 1 140 1.945910 1.945910 71 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +document 0 121 2.079442 0.000000 89 +technolog 0 131 2.079442 0.000000 102 +world 1 115 2.197225 2.197225 126 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +comment 0 93 2.397895 0.000000 146 +novemb 1 81 2.484907 2.484907 179 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +contain 0 81 2.484907 0.000000 174 +environ 0 84 2.484907 0.000000 177 +second 0 81 2.484907 0.000000 166 +internet 0 83 2.484907 0.000000 186 +state 0 76 2.564949 0.000000 207 +html 1 75 2.639057 2.639057 235 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +onlin 0 75 2.639057 0.000000 223 +line 0 75 2.639057 0.000000 231 +degre 1 69 2.708050 2.708050 259 +view 0 70 2.708050 0.000000 254 +descript 0 64 2.772589 0.000000 271 +import 0 65 2.772589 0.000000 282 +share 0 59 2.833213 0.000000 304 +possibl 1 47 3.091042 3.091042 378 +discuss 1 45 3.135494 3.135494 399 +keep 0 44 3.135494 0.000000 409 +describ 0 45 3.135494 0.000000 400 +netscap 0 44 3.135494 0.000000 395 +protocol 0 45 3.135494 0.000000 407 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +combin 0 42 3.218876 0.000000 421 +vision 0 41 3.218876 0.000000 430 +futur 0 41 3.218876 0.000000 427 +map 1 39 3.258097 3.258097 452 +littl 0 39 3.258097 0.000000 454 +tutori 0 39 3.258097 0.000000 437 +paul 0 38 3.295837 0.000000 471 +respons 0 37 3.332205 0.000000 476 +copyright 1 36 3.367296 3.367296 495 +short 0 36 3.367296 0.000000 499 +either 1 35 3.401197 3.401197 506 +michael 0 35 3.401197 0.000000 514 +john 0 33 3.433987 0.000000 532 +concept 1 32 3.465736 3.465736 537 +ad 0 32 3.465736 0.000000 544 +autumn 1 31 3.496508 3.496508 558 +someth 0 31 3.496508 0.000000 554 +option 0 30 3.555348 0.000000 575 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +enhanc 0 26 3.688879 0.000000 644 +notic 0 25 3.737670 0.000000 675 +brows 1 23 3.806662 3.806662 726 +instal 0 22 3.850148 0.000000 754 +geometri 0 22 3.850148 0.000000 752 +annot 0 21 3.912023 0.000000 775 +navig 0 21 3.912023 0.000000 796 +toolkit 1 20 3.951244 3.951244 835 +anderson 0 19 4.007333 0.000000 860 +particularli 0 19 4.007333 0.000000 867 +lower 0 18 4.060443 0.000000 886 +layer 1 17 4.110874 4.110874 926 +adam 0 17 4.110874 0.000000 934 +choic 0 16 4.174387 0.000000 979 +piec 0 15 4.248495 0.000000 1020 +achiev 0 14 4.317488 0.000000 1088 +hong 0 14 4.317488 0.000000 1105 +central 0 13 4.382027 0.000000 1160 +promot 1 12 4.465908 4.465908 1235 +hypermedia 0 12 4.465908 0.000000 1247 +infrastructur 0 12 4.465908 0.000000 1234 +noth 0 11 4.553877 0.000000 1328 +smart 0 11 4.553877 0.000000 1352 +baer 0 11 4.553877 0.000000 1353 +mosaic 0 10 4.653960 0.000000 1426 +tutor 0 9 4.753590 0.000000 1552 +beyond 1 7 5.010635 5.010635 1834 +documentfor 0 7 5.010635 0.000000 1865 +davi 0 7 5.010635 0.000000 1888 +baker 0 7 5.010635 0.000000 1812 +transcript 0 6 5.164786 0.000000 2067 +theclass 0 6 5.164786 0.000000 2060 +otherthan 0 6 5.164786 0.000000 2009 +plu 0 6 5.164786 0.000000 2004 +trail 0 6 5.164786 0.000000 2071 +newinform 0 5 5.347108 0.000000 2342 +subjectto 0 5 5.347108 0.000000 2369 +barton 0 5 5.347108 0.000000 2371 +jeremi 0 5 5.347108 0.000000 2360 +carlson 0 5 5.347108 0.000000 2351 +sandi 0 4 5.568345 0.000000 2765 +increasingli 0 4 5.568345 0.000000 2766 +ncsa 0 4 5.568345 0.000000 2767 +ward 1 2 6.263398 6.263398 4506 +tessa 0 2 6.263398 0.000000 4507 +learner 0 2 6.263398 0.000000 4508 +uiuc 0 2 6.263398 0.000000 4509 +marla 0 2 6.263398 0.000000 4510 +soap 0 2 6.263398 0.000000 4511 +presentor 1 1 6.957497 6.957497 8236 +labord 1 1 6.957497 6.957497 8237 +wwwwelcom 0 1 6.957497 0.000000 8238 +mccalla 0 1 6.957497 0.000000 8239 +importanceof 0 1 6.957497 0.000000 8240 +youngquist 0 1 6.957497 0.000000 8241 +aboutinternet 0 1 6.957497 0.000000 8242 +microworld 0 1 6.957497 0.000000 8243 +tointellig 0 1 6.957497 0.000000 8244 +bartel 0 1 6.957497 0.000000 8245 +mathematicsconnect 0 1 6.957497 0.000000 8246 +gari 0 1 6.957497 0.000000 8247 +ambiti 0 1 6.957497 0.000000 8248 +thethem 0 1 6.957497 0.000000 8249 +moresophist 0 1 6.957497 0.000000 8250 +elabor 0 1 6.957497 0.000000 8251 +ofwww 0 1 6.957497 0.000000 8252 +intechn 0 1 6.957497 0.000000 8253 +couldmak 0 1 6.957497 0.000000 8254 +applicationsthat 0 1 6.957497 0.000000 8255 +webhttp 0 1 6.957497 0.000000 8256 +empow 0 1 6.957497 0.000000 8257 +agehttp 0 1 6.957497 0.000000 8258 +communitieshttp 0 1 6.957497 0.000000 8259 +dietz 0 1 6.957497 0.000000 8260 +serviceshttp 0 1 6.957497 0.000000 8261 +dcewebkit 0 1 6.957497 0.000000 8262 +zhumeet 0 1 6.957497 0.000000 8263 +aboutcurriculum 0 1 6.957497 0.000000 8264 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..3ca5a680 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +current 1 284 1.098612 1.098612 21 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +technolog 1 131 2.079442 2.079442 102 +schedul 1 119 2.079442 2.079442 85 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +site 0 106 2.197225 0.000000 119 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +present 0 91 2.397895 0.000000 145 +learn 1 86 2.484907 2.484907 170 +internet 0 83 2.484907 0.000000 186 +school 0 84 2.484907 0.000000 188 +come 0 78 2.564949 0.000000 202 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +view 0 70 2.708050 0.000000 254 +descript 0 64 2.772589 0.000000 271 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +explor 1 58 2.890372 2.890372 324 +cover 0 55 2.944439 0.000000 329 +visual 0 48 3.044522 0.000000 372 +possibl 0 47 3.091042 0.000000 378 +move 0 47 3.091042 0.000000 382 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +seminar 0 38 3.295837 0.000000 470 +respons 0 37 3.332205 0.000000 476 +copyright 1 36 3.367296 3.367296 495 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +particip 0 29 3.583519 0.000000 589 +depend 0 29 3.583519 0.000000 583 +notic 0 25 3.737670 0.000000 675 +wai 0 25 3.737670 0.000000 662 +togeth 0 23 3.806662 0.000000 714 +instal 0 22 3.850148 0.000000 754 +decid 0 14 4.317488 0.000000 1075 +context 0 13 4.382027 0.000000 1153 +subset 0 10 4.653960 0.000000 1425 +tanimoto 0 10 4.653960 0.000000 1429 +otherthan 0 6 5.164786 0.000000 2009 +middl 1 5 5.347108 5.347108 2372 +subjectto 0 5 5.347108 0.000000 2369 +evid 0 4 5.568345 0.000000 2768 +innew 0 2 6.263398 0.000000 4512 +learningwelcom 0 1 6.957497 0.000000 8265 +methodologiesfor 0 1 6.957497 0.000000 8266 +forcollabor 0 1 6.957497 0.000000 8267 +willtak 0 1 6.957497 0.000000 8268 +ofthes 0 1 6.957497 0.000000 8269 +ofai 0 1 6.957497 0.000000 8270 +ofstud 0 1 6.957497 0.000000 8271 +intopeopl 0 1 6.957497 0.000000 8272 +meani 0 1 6.957497 0.000000 8273 +schoolmai 0 1 6.957497 0.000000 8274 +participatingstud 0 1 6.957497 0.000000 8275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..43f4fc97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +us 1 329 1.098612 1.098612 16 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +paper 0 205 1.609438 0.000000 38 +parallel 2 169 1.791759 3.583518 60 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +schedul 2 119 2.079442 4.158884 85 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +memori 1 101 2.302585 2.302585 139 +techniqu 0 99 2.302585 0.000000 138 +technic 0 100 2.302585 0.000000 140 +follow 0 92 2.397895 0.000000 143 +octob 0 89 2.397895 0.000000 156 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +activ 0 84 2.484907 0.000000 182 +dynam 1 76 2.564949 2.564949 194 +april 0 77 2.564949 0.000000 196 +server 0 76 2.564949 0.000000 204 +june 0 79 2.564949 0.000000 214 +appear 0 78 2.564949 0.000000 210 +workshop 1 71 2.639057 2.639057 239 +polici 1 64 2.772589 2.772589 279 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +share 1 59 2.833213 2.833213 304 +processor 2 54 2.944439 5.888878 335 +scientif 0 53 2.944439 0.000000 341 +cover 0 55 2.944439 0.000000 329 +principl 0 48 3.044522 0.000000 357 +set 0 50 3.044522 0.000000 361 +adapt 0 46 3.091042 0.000000 387 +discuss 0 45 3.135494 0.000000 399 +tutori 1 39 3.258097 3.258097 437 +transact 0 39 3.258097 0.000000 438 +theoret 0 39 3.258097 0.000000 446 +open 0 38 3.295837 0.000000 469 +workstat 0 37 3.332205 0.000000 479 +ofth 0 36 3.367296 0.000000 491 +global 0 34 3.401197 0.000000 520 +concurr 0 34 3.401197 0.000000 501 +product 0 33 3.433987 0.000000 527 +richard 0 31 3.496508 0.000000 559 +multiprocessor 1 28 3.610918 3.610918 605 +measur 1 28 3.610918 3.610918 609 +univ 0 28 3.610918 0.000000 617 +proc 2 26 3.688879 7.377758 649 +strategi 1 25 3.737670 3.737670 682 +supercomput 1 25 3.737670 3.737670 681 +ofwashington 0 22 3.850148 0.000000 766 +william 0 22 3.850148 0.000000 765 +programminglanguag 0 21 3.912023 0.000000 782 +alloc 2 20 3.951244 7.902488 821 +smith 0 20 3.951244 0.000000 820 +runtim 1 19 4.007333 4.007333 858 +eric 1 19 4.007333 4.007333 870 +anderson 0 19 4.007333 0.000000 860 +thoma 0 18 4.060443 0.000000 901 +scott 0 18 4.060443 0.000000 884 +expand 0 17 4.110874 0.000000 928 +miller 0 17 4.110874 0.000000 949 +asplo 0 17 4.110874 0.000000 948 +partit 0 16 4.174387 0.000000 984 +jose 0 16 4.174387 0.000000 976 +demand 1 14 4.317488 4.317488 1073 +conf 1 13 4.382027 4.382027 1181 +sigmetr 1 13 4.382027 4.382027 1173 +coordin 1 13 4.382027 4.382027 1182 +karlin 0 13 4.382027 0.000000 1176 +workload 1 12 4.465908 4.465908 1210 +mari 1 12 4.465908 4.465908 1266 +gupta 1 12 4.465908 4.465908 1241 +kenneth 1 12 4.465908 4.465908 1265 +characterist 0 12 4.465908 0.000000 1257 +philadelphia 0 12 4.465908 0.000000 1244 +bill 0 11 4.553877 0.000000 1297 +impact 0 11 4.553877 0.000000 1334 +wood 0 11 4.553877 0.000000 1355 +santa 1 10 4.653960 4.653960 1441 +ofcomput 1 10 4.653960 4.653960 1442 +patterson 0 9 4.753590 0.000000 1554 +vernon 0 9 4.753590 0.000000 1556 +job 0 8 4.875197 0.000000 1702 +migrat 1 7 5.010635 5.010635 1851 +burger 0 7 5.010635 0.000000 1889 +multiprogram 1 6 5.164786 5.164786 2010 +chandra 0 6 5.164786 0.000000 2091 +symp 1 5 5.347108 5.347108 2376 +ousterhout 0 5 5.347108 0.000000 2301 +leblanc 0 5 5.347108 0.000000 2377 +affin 0 5 5.347108 0.000000 2378 +parallelprogram 0 5 5.347108 0.000000 2379 +ofparallel 0 5 5.347108 0.000000 2380 +culler 0 5 5.347108 0.000000 2381 +hyder 0 4 5.568345 0.000000 2772 +anoop 0 4 5.568345 0.000000 2770 +identif 0 4 5.568345 0.000000 2773 +barbara 1 3 5.857933 5.857933 3380 +ipp 1 3 5.857933 5.857933 3381 +dusseau 1 3 5.857933 5.857933 3382 +nguyen 1 3 5.857933 5.857933 3290 +zahorjan 1 3 5.857933 5.857933 3383 +mccann 1 3 5.857933 5.857933 3273 +tran 0 3 5.857933 0.000000 3384 +saltz 0 3 5.857933 0.000000 3385 +am 0 3 5.857933 0.000000 3386 +patrick 0 3 5.857933 0.000000 3334 +weihl 0 3 5.857933 0.000000 3284 +tradeoff 0 3 5.857933 0.000000 3387 +parson 1 2 6.263398 6.263398 4528 +memorymultiprocessor 1 2 6.263398 6.263398 4529 +gang 0 2 6.263398 0.000000 4530 +inrd 0 2 6.263398 0.000000 4531 +andsequenti 0 2 6.263398 0.000000 4532 +tucker 0 2 6.263398 0.000000 4307 +shun 0 2 6.263398 0.000000 4533 +leung 0 2 6.263398 0.000000 4534 +han 0 2 6.263398 0.000000 4535 +agraw 0 2 6.263398 0.000000 4536 +derek 0 2 6.263398 0.000000 4537 +bunt 0 2 6.263398 0.000000 4308 +rosenblum 0 2 6.263398 0.000000 4314 +tera 0 2 6.263398 0.000000 4224 +computersystem 0 2 6.263398 0.000000 4360 +arpaci 1 1 6.957497 6.957497 8345 +vaswani 1 1 6.957497 6.957497 8346 +sevcik 1 1 6.957497 6.957497 8347 +feitelson 1 1 6.957497 6.957497 8348 +coschedul 1 1 6.957497 6.957497 8349 +mvmv 0 1 6.957497 0.000000 8350 +systemsprofessor 0 1 6.957497 0.000000 8351 +vernontim 0 1 6.957497 0.000000 8352 +pmlocat 0 1 6.957497 0.000000 8353 +now 0 1 6.957497 0.000000 8354 +vahdat 0 1 6.957497 0.000000 8355 +equi 0 1 6.957497 0.000000 8356 +issuesfor 0 1 6.957497 0.000000 8357 +workloadcharacterist 0 1 6.957497 0.000000 8358 +evangelo 0 1 6.957497 0.000000 8359 +markato 0 1 6.957497 0.000000 8360 +loopschedul 0 1 6.957497 0.000000 8361 +iniee 0 1 6.957497 0.000000 8362 +zima 0 1 6.957497 0.000000 8363 +chapman 0 1 6.957497 0.000000 8364 +edjlali 0 1 6.957497 0.000000 8365 +sussman 0 1 6.957497 0.000000 8366 +comparisonsshikharesh 0 1 6.957497 0.000000 8367 +majumdar 0 1 6.957497 0.000000 8368 +eager 0 1 6.957497 0.000000 8369 +variabilityservic 0 1 6.957497 0.000000 8370 +dror 0 1 6.957497 0.000000 8371 +nitzberg 0 1 6.957497 0.000000 8372 +thenasa 0 1 6.957497 0.000000 8373 +ipsc 0 1 6.957497 0.000000 8374 +leutenegg 0 1 6.957497 0.000000 8375 +sobalvarro 0 1 6.957497 0.000000 8376 +rohit 0 1 6.957497 0.000000 8377 +devin 0 1 6.957497 0.000000 8378 +verghes 0 1 6.957497 0.000000 8379 +mendel 0 1 6.957497 0.000000 8380 +multiprocessorcomput 0 1 6.957497 0.000000 8381 +alverson 0 1 6.957497 0.000000 8382 +kahan 0 1 6.957497 0.000000 8383 +korri 0 1 6.957497 0.000000 8384 +effectivedistribut 0 1 6.957497 0.000000 8385 +rudolph 0 1 6.957497 0.000000 8386 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..1668a8e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +last 1 314 1.098612 1.098612 14 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +updat 1 191 1.609438 1.609438 41 +algorithm 0 162 1.791759 0.000000 57 +file 1 132 1.945910 1.945910 70 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +provid 0 121 2.079442 0.000000 94 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +materi 0 75 2.639057 0.000000 221 +syllabu 1 67 2.708050 2.708050 247 +window 0 68 2.708050 0.000000 242 +handout 0 64 2.772589 0.000000 263 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +format 1 48 3.044522 3.044522 356 +fast 0 42 3.218876 0.000000 429 +slide 0 38 3.295837 0.000000 467 +origin 0 38 3.295837 0.000000 472 +winter 1 36 3.367296 3.367296 500 +print 0 34 3.401197 0.000000 503 +titl 1 31 3.496508 3.496508 556 +richard 0 31 3.496508 0.000000 559 +usual 1 28 3.610918 3.610918 608 +load 0 28 3.610918 0.000000 601 +administr 0 27 3.637586 0.000000 628 +linux 0 27 3.637586 0.000000 631 +viewer 1 21 3.912023 3.912023 787 +martin 0 21 3.912023 0.000000 794 +latest 0 21 3.912023 0.000000 785 +render 0 17 4.110874 0.000000 947 +biologi 0 15 4.248495 0.000000 1049 +draft 0 14 4.317488 0.000000 1085 +latex 0 14 4.317488 0.000000 1064 +translat 1 13 4.382027 4.382027 1164 +larri 0 13 4.382027 0.000000 1142 +readabl 0 12 4.465908 0.000000 1258 +adob 1 7 5.010635 5.010635 1873 +molecular 0 7 5.010635 0.000000 1887 +ghostscript 0 7 5.010635 0.000000 1867 +smaller 0 7 5.010635 0.000000 1874 +legibl 0 7 5.010635 0.000000 1866 +acrobat 1 6 5.164786 5.164786 2063 +greatest 0 6 5.164786 0.000000 2073 +fewer 0 6 5.164786 0.000000 2074 +ruzzo 1 5 5.347108 5.347108 2345 +karp 0 5 5.347108 0.000000 2284 +isprefer 0 4 5.568345 0.000000 2729 +isfast 0 4 5.568345 0.000000 2730 +ghostscriptcan 0 4 5.568345 0.000000 2731 +tompaclass 0 3 5.857933 0.000000 3310 +faith 0 3 5.857933 0.000000 3363 +bboard 0 1 6.957497 0.000000 8227 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..917bee07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +mail 1 238 1.386294 1.386294 22 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +paper 2 205 1.609438 3.218876 38 +list 1 201 1.609438 1.609438 39 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +architectur 1 139 1.945910 1.945910 77 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +machin 1 129 2.079442 2.079442 95 +report 0 131 2.079442 0.000000 92 +welcom 0 122 2.079442 0.000000 99 +send 0 114 2.197225 0.000000 109 +memori 1 101 2.302585 2.302585 139 +technic 0 100 2.302585 0.000000 140 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +member 0 84 2.484907 0.000000 165 +start 0 83 2.484907 0.000000 173 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +tuesdai 2 73 2.639057 5.278114 219 +line 1 75 2.639057 2.639057 231 +meet 1 72 2.639057 2.639057 229 +workshop 0 71 2.639057 0.000000 239 +summari 0 73 2.639057 0.000000 237 +intellig 0 72 2.639057 0.000000 225 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +receiv 0 66 2.708050 0.000000 244 +organ 1 65 2.772589 2.772589 265 +previou 1 62 2.772589 2.772589 290 +copi 0 63 2.772589 0.000000 284 +prof 0 64 2.772589 0.000000 273 +juli 0 60 2.833213 0.000000 305 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +processor 1 54 2.944439 2.944439 335 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +week 1 52 2.995732 2.995732 343 +case 0 51 2.995732 0.000000 351 +format 1 48 3.044522 3.044522 356 +quarter 1 47 3.091042 3.091042 389 +discuss 1 45 3.135494 3.135494 399 +execut 0 45 3.135494 0.000000 404 +might 0 41 3.218876 0.000000 426 +continu 0 39 3.258097 0.000000 448 +author 0 39 3.258097 0.000000 450 +credit 1 38 3.295837 3.295837 460 +slide 0 38 3.295837 0.000000 467 +formal 0 37 3.332205 0.000000 478 +ofth 1 36 3.367296 3.367296 491 +short 0 36 3.367296 0.000000 499 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +jame 0 35 3.401197 0.000000 507 +posit 1 31 3.496508 3.496508 552 +progress 0 28 3.610918 0.000000 598 +usual 0 28 3.610918 0.000000 608 +load 0 28 3.610918 0.000000 601 +subject 0 26 3.688879 0.000000 647 +valu 1 25 3.737670 3.737670 665 +lead 1 23 3.806662 3.806662 718 +begin 0 23 3.806662 0.000000 716 +thank 0 23 3.806662 0.000000 721 +variabl 0 23 3.806662 0.000000 715 +chip 0 21 3.912023 0.000000 770 +leav 0 21 3.912023 0.000000 772 +anderson 1 19 4.007333 4.007333 860 +predict 0 19 4.007333 0.000000 855 +miss 0 19 4.007333 0.000000 866 +asplo 1 17 4.110874 4.110874 948 +brown 0 16 4.174387 0.000000 977 +micro 0 15 4.248495 0.000000 1031 +hopefulli 0 14 4.317488 0.000000 1071 +sai 0 13 4.382027 0.000000 1175 +rememb 0 12 4.465908 0.000000 1217 +literatur 0 11 4.553877 0.000000 1300 +isca 0 11 4.553877 0.000000 1354 +baer 0 11 4.553877 0.000000 1353 +jean 0 10 4.653960 0.000000 1440 +subscrib 1 9 4.753590 4.753590 1541 +herefor 0 9 4.753590 0.000000 1483 +wall 0 9 4.753590 0.000000 1553 +patterson 0 9 4.753590 0.000000 1554 +readm 0 8 4.875197 0.000000 1699 +burger 1 7 5.010635 5.010635 1889 +ruth 0 7 5.010635 0.000000 1870 +multicomput 0 7 5.010635 0.000000 1890 +goodman 0 7 5.010635 0.000000 1891 +fortun 0 7 5.010635 0.000000 1872 +tobe 0 6 5.164786 0.000000 1995 +ifyou 0 6 5.164786 0.000000 1992 +majordomo 0 6 5.164786 0.000000 2066 +philipos 0 5 5.347108 0.000000 2373 +appreci 0 5 5.347108 0.000000 2374 +volunt 0 5 5.347108 0.000000 2307 +andrea 0 5 5.347108 0.000000 2375 +lunch 1 3 5.857933 5.857933 3369 +shen 0 3 5.857933 0.000000 3370 +pong 0 3 5.857933 0.000000 3371 +stefano 0 3 5.857933 0.000000 3372 +kaxira 0 3 5.857933 0.000000 3373 +yelick 0 3 5.857933 0.000000 3374 +shortli 0 3 5.857933 0.000000 3375 +heat 0 2 6.263398 0.000000 4113 +gershoni 0 2 6.263398 0.000000 4513 +matthai 0 2 6.263398 0.000000 4514 +tabular 0 2 6.263398 0.000000 4515 +guru 0 2 6.263398 0.000000 4476 +wilkerson 0 2 6.263398 0.000000 4516 +dalli 0 2 6.263398 0.000000 4517 +datascalar 0 2 6.263398 0.000000 4518 +spsd 0 2 6.263398 0.000000 4519 +iram 0 2 6.263398 0.000000 4520 +cseg 1 1 6.957497 6.957497 8276 +lunchcs 0 1 6.957497 0.000000 8277 +lunchcours 0 1 6.957497 0.000000 8278 +loupbaermeet 0 1 6.957497 0.000000 8279 +withalmost 0 1 6.957497 0.000000 8280 +discussedat 0 1 6.957497 0.000000 8281 +byesteem 0 1 6.957497 0.000000 8282 +mostlyw 0 1 6.957497 0.000000 8283 +discussionson 0 1 6.957497 0.000000 8284 +quartersi 0 1 6.957497 0.000000 8285 +fromparticip 0 1 6.957497 0.000000 8286 +oncrit 0 1 6.957497 0.000000 8287 +hereread 0 1 6.957497 0.000000 8288 +morethem 0 1 6.957497 0.000000 8289 +molli 0 1 6.957497 0.000000 8290 +thestud 0 1 6.957497 0.000000 8291 +informallyor 0 1 6.957497 0.000000 8292 +lipasti 0 1 6.957497 0.000000 8293 +advanceprogrami 0 1 6.957497 0.000000 8294 +thesaulsburi 0 1 6.957497 0.000000 8295 +readashlei 0 1 6.957497 0.000000 8296 +saulsburi 0 1 6.957497 0.000000 8297 +fong 0 1 6.957497 0.000000 8298 +nowatzyk 0 1 6.957497 0.000000 8299 +fillo 0 1 6.957497 0.000000 8300 +keckler 0 1 6.957497 0.000000 8301 +machinelink 0 1 6.957497 0.000000 8302 +readdoug 0 1 6.957497 0.000000 8303 +neton 0 1 6.957497 0.000000 8304 +cardwel 0 1 6.957497 0.000000 8305 +fromm 0 1 6.957497 0.000000 8306 +keeton 0 1 6.957497 0.000000 8307 +kozyraki 0 1 6.957497 0.000000 8308 +thomasand 0 1 6.957497 0.000000 8309 +availableher 0 1 6.957497 0.000000 8310 +themajordomo 0 1 6.957497 0.000000 8311 +shouldinclud 0 1 6.957497 0.000000 8312 +lineblank 0 1 6.957497 0.000000 8313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..635ef698 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +cours 0 273 1.098612 0.000000 15 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +spring 0 131 2.079442 0.000000 88 +interact 0 62 2.772589 0.000000 270 +offer 0 43 3.178054 0.000000 414 +human 0 32 3.465736 0.000000 546 +pagecs 0 26 3.688879 0.000000 658 +experiment 0 26 3.688879 0.000000 645 +born 0 21 3.912023 0.000000 798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..80f5ebac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +data 1 170 1.791759 1.791759 49 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +compil 1 122 2.079442 2.079442 96 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +welcom 0 122 2.079442 0.000000 99 +specif 0 106 2.197225 0.000000 106 +send 0 114 2.197225 0.000000 109 +memori 1 101 2.302585 2.302585 139 +second 0 81 2.484907 0.000000 166 +optim 1 79 2.564949 2.564949 197 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +line 1 75 2.639057 2.639057 231 +meet 0 72 2.639057 0.000000 229 +practic 0 70 2.708050 0.000000 246 +organ 0 65 2.772589 0.000000 265 +wednesdai 0 64 2.772589 0.000000 261 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +special 1 56 2.890372 2.890372 320 +week 2 52 2.995732 5.991464 343 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +cost 0 37 3.332205 0.000000 480 +represent 0 35 3.401197 0.000000 512 +michael 0 35 3.401197 0.000000 514 +graph 0 30 3.555348 0.000000 576 +depend 0 29 3.583519 0.000000 583 +framework 0 28 3.610918 0.000000 606 +arrai 0 27 3.637586 0.000000 627 +subject 0 26 3.688879 0.000000 647 +valu 0 25 3.737670 0.000000 665 +flow 0 24 3.761200 0.000000 700 +leav 0 21 3.912023 0.000000 772 +alloc 0 20 3.951244 0.000000 821 +anderson 1 19 4.007333 4.007333 860 +andrew 0 19 4.007333 0.000000 849 +offici 0 18 4.060443 0.000000 894 +thoma 0 18 4.060443 0.000000 901 +regist 1 17 4.110874 4.110874 938 +analyz 0 17 4.110874 0.000000 925 +georg 0 16 4.174387 0.000000 994 +susan 0 15 4.248495 0.000000 1050 +todd 0 15 4.248495 0.000000 1051 +floor 0 14 4.317488 0.000000 1070 +dean 0 14 4.317488 0.000000 1104 +charl 1 13 4.382027 4.382027 1149 +sai 0 13 4.382027 0.000000 1175 +gupta 1 12 4.465908 4.465908 1241 +loew 0 12 4.465908 0.000000 1252 +iter 0 12 4.465908 0.000000 1206 +kenneth 0 12 4.465908 0.000000 1265 +grant 0 12 4.465908 0.000000 1216 +minimum 0 9 4.753590 0.000000 1555 +strength 0 9 4.753590 0.000000 1494 +subscrib 0 9 4.753590 0.000000 1541 +paradigm 0 8 4.875197 0.000000 1662 +lewi 0 8 4.875197 0.000000 1700 +erik 0 8 4.875197 0.000000 1701 +grove 0 8 4.875197 0.000000 1675 +roger 1 7 5.010635 5.010635 1892 +multicomput 0 7 5.010635 0.000000 1890 +fischer 0 7 5.010635 0.000000 1893 +reduct 0 7 5.010635 0.000000 1877 +ruth 0 7 5.010635 0.000000 1870 +banerje 0 6 5.164786 0.000000 2018 +mock 0 6 5.164786 0.000000 2087 +tullsen 0 6 5.164786 0.000000 2081 +majordomo 0 6 5.164786 0.000000 2066 +bottleneck 0 4 5.568345 0.000000 2769 +anoop 0 4 5.568345 0.000000 2770 +interprocedur 0 4 5.568345 0.000000 2771 +noel 0 3 5.857933 0.000000 3376 +crew 0 3 5.857933 0.000000 3347 +litvinov 0 3 5.857933 0.000000 3343 +garrett 0 3 5.857933 0.000000 3377 +jen 0 3 5.857933 0.000000 3378 +blank 0 3 5.857933 0.000000 3379 +seminarcs 0 2 6.263398 0.000000 4521 +eggersand 0 2 6.263398 0.000000 4522 +francoi 0 2 6.263398 0.000000 4523 +taxat 0 2 6.263398 0.000000 4524 +ernst 0 2 6.263398 0.000000 4525 +secoski 0 2 6.263398 0.000000 4526 +lazi 0 2 6.263398 0.000000 4527 +seminarcours 0 1 6.957497 0.000000 8314 +craigchambersmeet 0 1 6.957497 0.000000 8315 +butreal 0 1 6.957497 0.000000 8316 +atrium 0 1 6.957497 0.000000 8317 +scheduleweek 0 1 6.957497 0.000000 8318 +memspi 0 1 6.957497 0.000000 8319 +margaretmartonosi 0 1 6.957497 0.000000 8320 +consel 0 1 6.957497 0.000000 8321 +itsus 0 1 6.957497 0.000000 8322 +evelyn 0 1 6.957497 0.000000 8323 +duesterwald 0 1 6.957497 0.000000 8324 +rajiv 0 1 6.957497 0.000000 8325 +maryl 0 1 6.957497 0.000000 8326 +soffa 0 1 6.957497 0.000000 8327 +danielweis 0 1 6.957497 0.000000 8328 +bjarn 0 1 6.957497 0.000000 8329 +steensgaard 0 1 6.957497 0.000000 8330 +coalesc 0 1 6.957497 0.000000 8331 +appel 0 1 6.957497 0.000000 8332 +hooverand 0 1 6.957497 0.000000 8333 +zadeck 0 1 6.957497 0.000000 8334 +byprivthviraj 0 1 6.957497 0.000000 8335 +stevenkurland 0 1 6.957497 0.000000 8336 +knoblock 0 1 6.957497 0.000000 8337 +knoop 0 1 6.957497 0.000000 8338 +oliv 0 1 6.957497 0.000000 8339 +andbernhard 0 1 6.957497 0.000000 8340 +steffen 0 1 6.957497 0.000000 8341 +subscribecsek 0 1 6.957497 0.000000 8342 +shortlyrec 0 1 6.957497 0.000000 8343 +melodi 0 1 6.957497 0.000000 8344 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..fa636f61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +parallel 2 169 1.791759 3.583518 60 +data 2 170 1.791759 3.583518 49 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +object 0 138 1.945910 0.000000 79 +compil 2 122 2.079442 4.158884 96 +analysi 2 124 2.079442 4.158884 98 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +tool 0 117 2.079442 0.000000 93 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +access 0 102 2.302585 0.000000 136 +commun 1 95 2.397895 2.397895 157 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +environ 1 84 2.484907 2.484907 177 +librari 0 87 2.484907 0.000000 181 +larg 0 82 2.484907 0.000000 168 +optim 1 79 2.564949 2.564949 197 +messag 1 76 2.564949 2.564949 212 +mondai 0 77 2.564949 0.000000 206 +orient 0 80 2.564949 0.000000 205 +effici 0 73 2.639057 0.000000 233 +tuesdai 0 73 2.639057 0.000000 219 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +septemb 0 65 2.772589 0.000000 274 +local 1 55 2.944439 2.944439 334 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +quarter 1 47 3.091042 3.091042 389 +discuss 0 45 3.135494 0.000000 399 +cach 1 41 3.218876 3.218876 432 +live 0 40 3.258097 0.000000 451 +seminar 0 38 3.295837 0.000000 470 +open 0 38 3.295837 0.000000 469 +expect 0 37 3.332205 0.000000 484 +global 1 34 3.401197 3.401197 520 +task 1 25 3.737670 3.737670 678 +strategi 1 25 3.737670 3.737670 682 +supercomput 0 25 3.737670 0.000000 681 +scalabl 1 24 3.761200 3.761200 705 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +output 1 21 3.912023 3.912023 788 +util 1 21 3.912023 3.912023 774 +exploit 1 20 3.951244 3.951244 836 +portabl 0 20 3.951244 0.000000 819 +eric 0 19 4.007333 0.000000 870 +runtim 0 19 4.007333 0.000000 858 +attend 0 18 4.060443 0.000000 893 +debug 0 17 4.110874 0.000000 944 +sign 0 16 4.174387 0.000000 970 +driven 0 15 4.248495 0.000000 1048 +stream 0 15 4.248495 0.000000 1015 +matlab 1 14 4.317488 4.317488 1081 +hopefulli 0 14 4.317488 0.000000 1071 +block 1 13 4.382027 4.382027 1183 +everyon 0 13 4.382027 0.000000 1148 +bodi 0 13 4.382027 0.000000 1178 +gupta 1 12 4.465908 4.465908 1241 +characterist 1 12 4.465908 4.465908 1257 +iter 1 12 4.465908 4.465908 1206 +loew 0 12 4.465908 0.000000 1252 +holidai 0 12 4.465908 0.000000 1224 +brad 0 12 4.465908 0.000000 1264 +loop 1 11 4.553877 4.553877 1310 +multithread 0 11 4.553877 0.000000 1315 +subscrib 0 9 4.753590 0.000000 1541 +rel 0 9 4.753590 0.000000 1487 +cross 1 8 4.875197 4.875197 1703 +reus 1 8 4.875197 4.875197 1661 +pldi 1 8 4.875197 4.875197 1704 +sean 0 8 4.875197 0.000000 1705 +core 1 7 5.010635 5.010635 1809 +spot 0 7 5.010635 0.000000 1894 +ruth 0 7 5.010635 0.000000 1870 +banerje 1 6 5.164786 5.164786 2018 +sung 1 6 5.164786 5.164786 2075 +reed 1 6 5.164786 5.164786 2086 +majordomo 0 6 5.164786 0.000000 2066 +zhou 0 6 5.164786 0.000000 2092 +icpp 1 5 5.347108 5.347108 2382 +cyclic 1 5 5.347108 5.347108 2383 +ppopp 1 4 5.568345 5.568345 2774 +choi 1 4 5.568345 5.568345 2732 +restructur 0 4 5.568345 0.000000 2775 +randal 0 4 5.568345 0.000000 2776 +ipp 1 3 5.857933 5.857933 3381 +atmospher 0 3 5.857933 0.000000 3388 +andwil 0 3 5.857933 0.000000 3335 +thepap 0 3 5.857933 0.000000 3254 +jason 0 3 5.857933 0.000000 3389 +blumof 0 3 5.857933 0.000000 3237 +foster 0 3 5.857933 0.000000 3159 +lcpc 1 2 6.263398 6.263398 4538 +kennedi 1 2 6.263398 6.263398 4539 +adv 1 2 6.263398 6.263398 4540 +chien 1 2 6.263398 6.263398 4541 +casual 0 2 6.263398 0.000000 4542 +subscribeto 0 2 6.263398 0.000000 4543 +deros 0 2 6.263398 0.000000 4474 +padua 0 2 6.263398 0.000000 4544 +kale 0 2 6.263398 0.000000 4545 +cilk 0 2 6.263398 0.000000 4242 +fritzson 0 2 6.263398 0.000000 4546 +potpourri 0 2 6.263398 0.000000 4547 +scalapack 1 1 6.957497 6.957497 8387 +ramaswami 1 1 6.957497 6.957497 8388 +hodg 1 1 6.957497 6.957497 8389 +mcintosh 1 1 6.957497 6.957497 8390 +chakarabarti 1 1 6.957497 6.957497 8391 +integer 1 1 6.957497 6.957497 8392 +crandal 1 1 6.957497 6.957497 8393 +aydt 1 1 6.957497 6.957497 8394 +bordawekar 1 1 6.957497 6.957497 8395 +choudahari 1 1 6.957497 6.957497 8396 +koelbel 1 1 6.957497 6.957497 8397 +paleczni 1 1 6.957497 6.957497 8398 +midkiff 1 1 6.957497 6.957497 8399 +fahring 1 1 6.957497 6.957497 8400 +hain 1 1 6.957497 6.957497 8401 +mehrotra 1 1 6.957497 6.957497 8402 +environmentslarri 0 1 6.957497 0.000000 8403 +snyderautumn 0 1 6.957497 0.000000 8404 +ten 0 1 6.957497 0.000000 8405 +ignit 0 1 6.957497 0.000000 8406 +hurri 0 1 6.957497 0.000000 8407 +cseo 0 1 6.957497 0.000000 8408 +datepaperpresentor 0 1 6.957497 0.000000 8409 +falcon 0 1 6.957497 0.000000 8410 +gallivan 0 1 6.957497 0.000000 8411 +gallopoulo 0 1 6.957497 0.000000 8412 +marsolf 0 1 6.957497 0.000000 8413 +ramkumar 0 1 6.957497 0.000000 8414 +forb 0 1 6.957497 0.000000 8415 +gotwal 0 1 6.957497 0.000000 8416 +sriniva 0 1 6.957497 0.000000 8417 +gannon 0 1 6.957497 0.000000 8418 +joerg 0 1 6.957497 0.000000 8419 +kuszmaul 0 1 6.957497 0.000000 8420 +leiserson 0 1 6.957497 0.000000 8421 +andersson 0 1 6.957497 0.000000 8422 +realign 0 1 6.957497 0.000000 8423 +kamachi 0 1 6.957497 0.000000 8424 +kusano 0 1 6.957497 0.000000 8425 +suehiro 0 1 6.957497 0.000000 8426 +tamura 0 1 6.957497 0.000000 8427 +sakon 0 1 6.957497 0.000000 8428 +rinard 0 1 6.957497 0.000000 8429 +abramson 0 1 6.957497 0.000000 8430 +michalak 0 1 6.957497 0.000000 8431 +sosic 0 1 6.957497 0.000000 8432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..a32399b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +mail 1 238 1.386294 1.386294 22 +list 1 201 1.609438 1.609438 39 +spring 0 131 2.079442 0.000000 88 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +messag 0 76 2.564949 0.000000 212 +line 0 75 2.639057 0.000000 231 +variou 0 56 2.890372 0.000000 317 +summer 0 56 2.890372 0.000000 311 +week 0 52 2.995732 0.000000 343 +seminar 1 38 3.295837 3.295837 470 +winter 0 36 3.367296 0.000000 500 +autumn 1 31 3.496508 3.496508 558 +request 0 26 3.688879 0.000000 635 +alreadi 0 16 4.174387 0.000000 963 +bodi 0 13 4.382027 0.000000 1178 +web 0 12 4.465908 0.000000 1249 +subscrib 0 9 4.753590 0.000000 1541 +bit 0 7 5.010635 0.000000 1833 +crucial 0 5 5.347108 0.000000 2384 +ofinform 0 4 5.568345 0.000000 2707 +cancel 0 4 5.568345 0.000000 2746 +preliminariesif 0 1 6.957497 0.000000 8433 +besent 0 1 6.957497 0.000000 8434 +systemsin 0 1 6.957497 0.000000 8435 +quarterli 0 1 6.957497 0.000000 8436 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..860ade74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +read 1 154 1.791759 1.791759 47 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +file 1 132 1.945910 1.945910 70 +perform 1 143 1.945910 1.945910 74 +architectur 0 139 1.945910 0.000000 77 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +manag 0 114 2.197225 0.000000 125 +memori 1 101 2.302585 2.302585 139 +access 0 102 2.302585 0.000000 136 +present 2 91 2.397895 4.795790 145 +appear 0 78 2.564949 0.000000 210 +meet 1 72 2.639057 2.639057 229 +symposium 0 72 2.639057 0.000000 238 +interact 0 62 2.772589 0.000000 270 +virtual 0 62 2.772589 0.000000 285 +summer 0 56 2.890372 0.000000 311 +principl 0 48 3.044522 0.000000 357 +quarter 1 47 3.091042 3.091042 389 +fridai 0 44 3.135494 0.000000 390 +discuss 0 45 3.135494 0.000000 399 +cach 0 41 3.218876 0.000000 432 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +storag 0 31 3.496508 0.000000 553 +cluster 0 28 3.610918 0.000000 612 +mobil 0 23 3.806662 0.000000 730 +exploit 0 20 3.951244 0.000000 836 +log 0 19 4.007333 0.000000 857 +anderson 0 19 4.007333 0.000000 860 +hierarch 0 15 4.248495 0.000000 1018 +coher 0 14 4.317488 0.000000 1109 +weak 0 13 4.382027 0.000000 1159 +loew 0 12 4.465908 0.000000 1252 +impact 0 11 4.553877 0.000000 1334 +sosp 0 10 4.653960 0.000000 1416 +franklin 0 10 4.653960 0.000000 1436 +voelker 0 9 4.753590 0.000000 1557 +romer 0 8 4.875197 0.000000 1706 +feelei 0 7 5.010635 0.000000 1859 +chan 0 7 5.010635 0.000000 1876 +trend 0 7 5.010635 0.000000 1842 +wolman 0 6 5.164786 0.000000 2093 +tiwari 0 5 5.347108 0.000000 2385 +philipos 0 5 5.347108 0.000000 2373 +wewil 0 4 5.568345 0.000000 2688 +savag 0 4 5.568345 0.000000 2777 +serverless 0 3 5.857933 0.000000 3181 +litvinov 0 3 5.857933 0.000000 3343 +fiuczynski 0 3 5.857933 0.000000 3390 +wilk 0 2 6.263398 0.000000 4548 +hypervisor 0 2 6.263398 0.000000 4549 +sriram 0 2 6.263398 0.000000 4550 +quarterw 0 1 6.957497 0.000000 8437 +upcomingacm 0 1 6.957497 0.000000 8438 +havean 0 1 6.957497 0.000000 8439 +scheduleoct 0 1 6.957497 0.000000 8440 +autoraid 0 1 6.957497 0.000000 8441 +montgomeri 0 1 6.957497 0.000000 8442 +stackabl 0 1 6.957497 0.000000 8443 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..8b171ad6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +high 1 130 2.079442 2.079442 101 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +welcom 0 122 2.079442 0.000000 99 +document 0 121 2.079442 0.000000 89 +send 1 114 2.197225 2.197225 109 +pleas 0 113 2.197225 0.000000 114 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +follow 1 92 2.397895 2.397895 143 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +learn 0 86 2.484907 0.000000 170 +messag 1 76 2.564949 2.564949 212 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +write 1 72 2.639057 2.639057 222 +materi 0 75 2.639057 0.000000 221 +appli 0 71 2.639057 0.000000 226 +sieg 0 69 2.708050 0.000000 260 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 1 64 2.772589 2.772589 261 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +import 0 65 2.772589 0.000000 282 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +faculti 0 56 2.890372 0.000000 325 +scientif 2 53 2.944439 5.888878 341 +allow 0 53 2.944439 0.000000 333 +cover 0 55 2.944439 0.000000 329 +found 0 53 2.944439 0.000000 337 +run 0 51 2.995732 0.000000 347 +quarter 0 47 3.091042 0.000000 389 +effect 0 46 3.091042 0.000000 385 +fast 1 42 3.218876 3.218876 429 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +close 0 38 3.295837 0.000000 465 +credit 0 38 3.295837 0.000000 460 +workstat 1 37 3.332205 3.332205 479 +staff 0 36 3.367296 0.000000 490 +ad 0 32 3.465736 0.000000 544 +scientist 1 31 3.496508 3.496508 560 +rang 0 30 3.555348 0.000000 565 +semant 0 29 3.583519 0.000000 587 +platform 0 29 3.583519 0.000000 591 +releas 1 28 3.610918 3.610918 616 +arrai 0 27 3.637586 0.000000 627 +supercomput 1 25 3.737670 3.737670 681 +variabl 0 23 3.806662 0.000000 715 +dai 0 22 3.850148 0.000000 753 +exploit 0 20 3.951244 0.000000 836 +kernel 0 20 3.951244 0.000000 825 +assum 0 19 4.007333 0.000000 845 +account 0 18 4.060443 0.000000 882 +previous 0 17 4.110874 0.000000 923 +debug 0 17 4.110874 0.000000 944 +whole 0 17 4.110874 0.000000 940 +modern 1 16 4.174387 4.174387 966 +fortran 0 15 4.248495 0.000000 1027 +easili 0 14 4.317488 0.000000 1077 +matlab 0 14 4.317488 0.000000 1081 +bodi 1 13 4.382027 4.382027 1178 +block 0 13 4.382027 0.000000 1183 +loew 0 12 4.465908 0.000000 1252 +grant 0 12 4.465908 0.000000 1216 +loop 1 11 4.553877 4.553877 1310 +faster 0 11 4.553877 0.000000 1323 +reli 0 10 4.653960 0.000000 1411 +subscrib 1 9 4.753590 4.753590 1541 +suitabl 1 9 4.753590 4.753590 1486 +informationabout 0 9 4.753590 0.000000 1515 +elimin 0 9 4.753590 0.000000 1558 +simpli 0 8 4.875197 0.000000 1626 +migrat 0 7 5.010635 0.000000 1851 +largest 0 7 5.010635 0.000000 1858 +majordomo 1 6 5.164786 5.164786 2066 +syntax 0 6 5.164786 0.000000 2030 +sung 0 6 5.164786 0.000000 2075 +snyder 0 5 5.347108 0.000000 2359 +toth 0 4 5.568345 0.000000 2595 +ncsa 0 4 5.568345 0.000000 2767 +choi 0 4 5.568345 0.000000 2732 +audit 0 3 5.857933 0.000000 3391 +disciplin 0 3 5.857933 0.000000 3392 +informationcours 0 3 5.857933 0.000000 3167 +subscribeto 0 2 6.263398 0.000000 4543 +inner 0 2 6.263398 0.000000 4551 +zphigh 0 1 6.957497 0.000000 8444 +zpllarri 0 1 6.957497 0.000000 8445 +teamautumn 0 1 6.957497 0.000000 8446 +csezpl 0 1 6.957497 0.000000 8447 +usersmail 0 1 6.957497 0.000000 8448 +librarai 0 1 6.957497 0.000000 8449 +relatedinform 0 1 6.957497 0.000000 8450 +descriptionzpl 0 1 6.957497 0.000000 8451 +scientificprogram 0 1 6.957497 0.000000 8452 +infortran 0 1 6.957497 0.000000 8453 +dramaticallysimplifi 0 1 6.957497 0.000000 8454 +nuisanc 0 1 6.957497 0.000000 8455 +andtrivi 0 1 6.957497 0.000000 8456 +byrecompil 0 1 6.957497 0.000000 8457 +wysiwyg 0 1 6.957497 0.000000 8458 +booknon 0 1 6.957497 0.000000 8459 +onin 0 1 6.957497 0.000000 8460 +zplprogram 0 1 6.957497 0.000000 8461 +prerequisitesfamiliar 0 1 6.957497 0.000000 8462 +ormatlab 0 1 6.957497 0.000000 8463 +remotezpl 0 1 6.957497 0.000000 8464 +compileroth 0 1 6.957497 0.000000 8465 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..52b57db4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +lectur 1 135 1.945910 1.945910 73 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +topic 1 114 2.197225 2.197225 110 +version 0 113 2.197225 0.000000 122 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +exam 1 86 2.484907 2.484907 169 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +messag 0 76 2.564949 0.000000 212 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 1 71 2.639057 2.639057 230 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +sieg 1 69 2.708050 2.708050 260 +goal 0 66 2.708050 0.000000 250 +syllabu 0 67 2.708050 0.000000 247 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +previou 0 62 2.772589 0.000000 290 +publish 0 57 2.890372 0.000000 326 +think 0 57 2.890372 0.000000 314 +maintain 1 51 2.995732 2.995732 342 +digit 0 52 2.995732 0.000000 348 +frequent 0 49 3.044522 0.000000 367 +archiv 0 49 3.044522 0.000000 364 +quarter 1 47 3.091042 3.091042 389 +adapt 0 46 3.091042 0.000000 387 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +even 0 45 3.135494 0.000000 393 +announc 1 40 3.258097 3.258097 441 +author 0 39 3.258097 0.000000 450 +slide 0 38 3.295837 0.000000 467 +credit 0 38 3.295837 0.000000 460 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +especi 0 36 3.367296 0.000000 496 +copyright 0 36 3.367296 0.000000 495 +go 0 33 3.433987 0.000000 529 +ad 0 32 3.465736 0.000000 544 +collabor 0 32 3.465736 0.000000 543 +autumn 1 31 3.496508 3.496508 558 +quot 0 29 3.583519 0.000000 582 +administr 1 27 3.637586 3.637586 628 +mind 0 27 3.637586 0.000000 632 +static 0 27 3.637586 0.000000 619 +notic 0 25 3.737670 0.000000 675 +accur 0 25 3.737670 0.000000 680 +tell 0 21 3.912023 0.000000 777 +anderson 0 19 4.007333 0.000000 860 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +whole 0 17 4.110874 0.000000 940 +weekli 0 17 4.110874 0.000000 919 +weslei 1 16 4.174387 4.174387 983 +portion 0 16 4.174387 0.000000 971 +webmast 1 15 4.248495 4.248495 1045 +anonym 0 14 4.317488 0.000000 1100 +reprint 0 14 4.317488 0.000000 1097 +everyon 0 13 4.382027 0.000000 1148 +quizz 0 13 4.382027 0.000000 1151 +addison 1 12 4.465908 4.465908 1230 +workload 0 12 4.465908 0.000000 1210 +overal 0 12 4.465908 0.000000 1254 +duli 0 12 4.465908 0.000000 1248 +benjamin 1 11 4.553877 4.553877 1296 +evolut 0 11 4.553877 0.000000 1314 +nonprofit 0 11 4.553877 0.000000 1339 +cheat 0 10 4.653960 0.000000 1395 +desir 0 9 4.753590 0.000000 1542 +cum 1 8 4.875197 4.875197 1619 +bunch 0 7 5.010635 0.000000 1861 +gaetano 1 6 5.164786 5.164786 2068 +borriello 1 5 5.347108 5.347108 2349 +corei 1 4 5.568345 5.568345 2718 +contemporari 0 4 5.568345 0.000000 2719 +katz 1 3 5.857933 5.857933 3276 +corin 0 3 5.857933 0.000000 3311 +aweekli 0 3 5.857933 0.000000 3312 +andersonwelcom 0 2 6.263398 0.000000 4400 +tocs 0 2 6.263398 0.000000 4401 +messagess 0 2 6.263398 0.000000 4402 +synario 0 2 6.263398 0.000000 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..c5cc7482 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +class 0 199 1.609438 0.000000 37 +avail 0 169 1.791759 0.000000 48 +instructor 0 108 2.197225 0.000000 107 +present 0 91 2.397895 0.000000 145 +internet 0 83 2.484907 0.000000 186 +journal 0 83 2.484907 0.000000 183 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +futur 0 41 3.218876 0.000000 427 +societi 1 40 3.258097 3.258097 456 +focu 0 30 3.555348 0.000000 571 +relev 0 26 3.688879 0.000000 637 +born 0 21 3.912023 0.000000 798 +thur 0 19 4.007333 0.000000 847 +social 0 13 4.382027 0.000000 1123 +econom 0 13 4.382027 0.000000 1184 +alan 0 13 4.382027 0.000000 1146 +tue 0 11 4.553877 0.000000 1308 +ethic 0 7 5.010635 0.000000 1786 +legal 0 6 5.164786 0.000000 2094 +highwai 0 6 5.164786 0.000000 2095 +implic 0 4 5.568345 0.000000 2696 +societycs 0 1 6.957497 0.000000 8466 +societywelcom 0 1 6.957497 0.000000 8467 +wintercs 0 1 6.957497 0.000000 8468 +andglob 0 1 6.957497 0.000000 8469 +syllabusclass 0 1 6.957497 0.000000 8470 +schedulelink 0 1 6.957497 0.000000 8471 +sitesbook 0 1 6.957497 0.000000 8472 +referenceassignmentsassign 0 1 6.957497 0.000000 8473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..647863a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +cours 0 273 1.098612 0.000000 15 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +address 0 170 1.791759 0.000000 62 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +topic 0 114 2.197225 0.000000 110 +question 1 91 2.397895 2.397895 141 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +build 0 85 2.484907 0.000000 184 +messag 0 76 2.564949 0.000000 212 +intellig 0 72 2.639057 0.000000 225 +sieg 1 69 2.708050 2.708050 260 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +plan 0 65 2.772589 0.000000 272 +index 0 56 2.890372 0.000000 309 +reason 0 57 2.890372 0.000000 318 +archiv 0 49 3.044522 0.000000 364 +quarter 0 47 3.091042 0.000000 389 +past 0 42 3.218876 0.000000 428 +staff 0 36 3.367296 0.000000 490 +represent 0 35 3.401197 0.000000 512 +pagecs 0 26 3.688879 0.000000 658 +fundament 0 25 3.737670 0.000000 661 +methodolog 0 23 3.806662 0.000000 733 +outlin 0 17 4.110874 0.000000 914 +nick 0 13 4.382027 0.000000 1180 +pose 0 9 4.753590 0.000000 1535 +depth 0 8 4.875197 0.000000 1636 +marc 0 8 4.875197 0.000000 1680 +uncertainti 0 7 5.010635 0.000000 1882 +machinelearn 0 6 5.164786 0.000000 2084 +anin 0 3 5.857933 0.000000 3354 +assignmentsassign 0 3 5.857933 0.000000 3342 +mailinglist 0 3 5.857933 0.000000 3325 +intelligencefal 0 2 6.263398 0.000000 4477 +andchalleng 0 2 6.263398 0.000000 4478 +intelligentmachin 0 2 6.263398 0.000000 4479 +agentarchitectur 0 2 6.263398 0.000000 4480 +weldweld 0 2 6.263398 0.000000 4481 +friedmanfriedman 0 2 6.263398 0.000000 4482 +kushmericknick 0 2 6.263398 0.000000 4483 +examsgradingresourcesth 0 2 6.263398 0.000000 4484 +topicsprojectread 0 1 6.957497 0.000000 8474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..66edb542 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +paper 0 205 1.609438 0.000000 38 +read 1 154 1.791759 1.791759 47 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +provid 1 121 2.079442 2.079442 94 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +check 2 115 2.197225 4.394450 118 +make 1 111 2.197225 2.197225 120 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +peopl 0 96 2.302585 0.000000 132 +comment 1 93 2.397895 2.397895 146 +follow 0 92 2.397895 0.000000 143 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +decemb 0 80 2.564949 0.000000 215 +html 1 75 2.639057 2.639057 235 +intellig 1 72 2.639057 2.639057 225 +meet 0 72 2.639057 0.000000 229 +tuesdai 0 73 2.639057 0.000000 219 +summari 0 73 2.639057 0.000000 237 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +back 1 60 2.833213 2.833213 297 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +index 1 56 2.890372 2.890372 309 +detail 0 57 2.890372 0.000000 321 +anoth 1 45 3.135494 3.135494 408 +execut 0 45 3.135494 0.000000 404 +discuss 0 45 3.135494 0.000000 399 +show 1 43 3.178054 3.178054 417 +futur 0 41 3.218876 0.000000 427 +review 0 42 3.218876 0.000000 425 +paul 1 38 3.295837 3.295837 471 +winter 0 36 3.367296 0.000000 500 +short 0 36 3.367296 0.000000 499 +manual 0 35 3.401197 0.000000 504 +articl 0 33 3.433987 0.000000 530 +ad 0 32 3.465736 0.000000 544 +idea 0 32 3.465736 0.000000 545 +releas 0 28 3.610918 0.000000 616 +except 0 28 3.610918 0.000000 607 +rather 0 26 3.688879 0.000000 642 +wai 0 25 3.737670 0.000000 662 +mike 0 24 3.761200 0.000000 703 +displai 0 23 3.806662 0.000000 712 +miscellan 0 23 3.806662 0.000000 731 +instal 0 22 3.850148 0.000000 754 +sent 0 22 3.850148 0.000000 763 +offici 0 18 4.060443 0.000000 894 +statu 0 18 4.060443 0.000000 885 +regist 1 17 4.110874 4.110874 938 +anyon 0 17 4.110874 0.000000 916 +side 0 15 4.248495 0.000000 1022 +someon 0 13 4.382027 0.000000 1128 +nick 0 13 4.382027 0.000000 1180 +rememb 1 12 4.465908 4.465908 1217 +usenix 0 12 4.465908 0.000000 1240 +extra 0 11 4.553877 0.000000 1312 +guess 0 10 4.653960 0.000000 1443 +mosaic 0 10 4.653960 0.000000 1426 +bring 0 10 4.653960 0.000000 1430 +perspect 0 10 4.653960 0.000000 1437 +kurt 0 9 4.753590 0.000000 1548 +filter 1 8 4.875197 4.875197 1641 +on 0 8 4.875197 0.000000 1628 +guidelin 0 7 5.010635 0.000000 1832 +impress 0 6 5.164786 0.000000 2096 +begun 0 5 5.347108 0.000000 2386 +older 0 5 5.347108 0.000000 2387 +cacm 0 5 5.347108 0.000000 2388 +glimps 1 4 5.568345 5.568345 2778 +ics 0 4 5.568345 0.000000 2779 +rambl 0 3 5.857933 0.000000 3308 +vagu 0 3 5.857933 0.000000 3393 +towrit 0 2 6.263398 0.000000 4207 +phoenix 0 2 6.263398 0.000000 4552 +belief 0 2 6.263398 0.000000 4553 +zephyr 1 1 6.957497 6.957497 8475 +siegcreat 0 1 6.957497 0.000000 8476 +scriptspleas 0 1 6.957497 0.000000 8477 +tothem 0 1 6.957497 0.000000 8478 +zwhere 0 1 6.957497 0.000000 8479 +mosiac 0 1 6.957497 0.000000 8480 +znol 0 1 6.957497 0.000000 8481 +zwatch 0 1 6.957497 0.000000 8482 +zlocat 0 1 6.957497 0.000000 8483 +releg 0 1 6.957497 0.000000 8484 +grumbl 0 1 6.957497 0.000000 8485 +luddit 0 1 6.957497 0.000000 8486 +itout 0 1 6.957497 0.000000 8487 +withci 0 1 6.957497 0.000000 8488 +theentir 0 1 6.957497 0.000000 8489 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..0ac92380 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +studi 0 120 2.079442 0.000000 91 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +instructor 0 108 2.197225 0.000000 107 +memori 0 101 2.302585 0.000000 139 +commun 0 95 2.397895 0.000000 157 +control 0 82 2.484907 0.000000 164 +info 0 85 2.484907 0.000000 176 +involv 0 71 2.639057 0.000000 227 +organ 0 65 2.772589 0.000000 265 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +small 0 39 3.258097 0.000000 447 +open 1 38 3.295837 3.295837 469 +credit 0 38 3.295837 0.000000 460 +taken 0 31 3.496508 0.000000 555 +prerequisit 0 19 4.007333 0.000000 846 +devic 0 16 4.174387 0.000000 1002 +consent 0 5 5.347108 0.000000 2389 +semesterli 0 4 5.568345 0.000000 2780 +freshmen 0 2 6.263398 0.000000 4554 +computerhardwar 0 1 6.957497 0.000000 8490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..9b5f16ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +design 1 213 1.386294 1.386294 25 +fall 0 181 1.609438 0.000000 40 +address 0 170 1.791759 0.000000 62 +architectur 0 139 1.945910 0.000000 77 +introduct 0 126 2.079442 0.000000 87 +structur 1 106 2.197225 2.197225 105 +manag 0 114 2.197225 0.000000 125 +memori 1 101 2.302585 2.302585 139 +control 0 82 2.484907 0.000000 164 +info 0 85 2.484907 0.000000 176 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +cach 0 41 3.218876 0.000000 432 +credit 0 38 3.295837 0.000000 460 +compon 0 30 3.555348 0.000000 570 +hierarchi 0 22 3.850148 0.000000 744 +prerequisit 0 19 4.007333 0.000000 846 +interrupt 0 7 5.010635 0.000000 1793 +microprogram 0 4 5.568345 0.000000 2604 +semesterli 0 4 5.568345 0.000000 2780 +andc 0 1 6.957497 0.000000 8491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..d0209967 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +fall 0 181 1.609438 0.000000 40 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 1 139 1.945910 1.945910 77 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +credit 0 38 3.295837 0.000000 460 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +multiprocessor 0 28 3.610918 0.000000 605 +flow 0 24 3.761200 0.000000 700 +prerequisit 0 19 4.007333 0.000000 846 +semesterli 0 4 5.568345 0.000000 2780 +andpipelin 0 1 6.957497 0.000000 8492 +performancemachin 0 1 6.957497 0.000000 8493 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..5d7bda2b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +fall 0 181 1.609438 0.000000 40 +parallel 1 169 1.791759 1.791759 60 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 0 139 1.945910 0.000000 77 +machin 1 129 2.079442 2.079442 95 +instructor 0 108 2.197225 0.000000 107 +advanc 0 99 2.302585 0.000000 130 +info 0 85 2.484907 0.000000 176 +special 0 56 2.890372 0.000000 320 +principl 0 48 3.044522 0.000000 357 +credit 0 38 3.295837 0.000000 460 +multi 0 36 3.367296 0.000000 493 +detect 0 26 3.688879 0.000000 646 +prerequisit 0 19 4.007333 0.000000 846 +interconnect 0 17 4.110874 0.000000 937 +coher 0 14 4.317488 0.000000 1109 +dataflow 0 5 5.347108 0.000000 2390 +consent 0 5 5.347108 0.000000 2389 +semesterli 0 4 5.568345 0.000000 2780 +simd 0 3 5.857933 0.000000 3360 +mimd 0 3 5.857933 0.000000 3361 +vectorizingcompil 0 1 6.957497 0.000000 8494 +processorsynchron 0 1 6.957497 0.000000 8495 +purposeprocessor 0 1 6.957497 0.000000 8496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..1c2b90f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,244 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 2 329 1.098612 2.197224 16 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +softwar 0 220 1.386294 0.000000 30 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +avail 2 169 1.791759 3.583518 48 +read 1 154 1.791759 1.791759 47 +hour 1 165 1.791759 1.791759 46 +wisconsin 0 169 1.791759 0.000000 54 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +assign 2 135 1.945910 3.891820 66 +problem 2 147 1.945910 3.891820 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +final 2 116 2.197225 4.394450 108 +topic 1 114 2.197225 2.197225 110 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +book 1 99 2.302585 2.302585 131 +advanc 1 99 2.302585 2.302585 130 +section 2 94 2.397895 4.795790 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +learn 1 86 2.484907 2.484907 170 +exam 1 86 2.484907 2.484907 169 +start 0 83 2.484907 0.000000 173 +second 0 81 2.484907 0.000000 166 +activ 0 84 2.484907 0.000000 182 +solut 0 82 2.484907 0.000000 162 +chang 0 82 2.484907 0.000000 163 +april 1 77 2.564949 2.564949 196 +mondai 1 77 2.564949 2.564949 206 +orient 1 80 2.564949 2.564949 205 +messag 1 76 2.564949 2.564949 212 +come 0 78 2.564949 0.000000 202 +homework 0 79 2.564949 0.000000 193 +dynam 0 76 2.564949 0.000000 194 +tuesdai 1 73 2.639057 2.639057 219 +solv 0 73 2.639057 0.000000 234 +goal 1 66 2.708050 2.708050 250 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +differ 0 66 2.708050 0.000000 253 +receiv 0 66 2.708050 0.000000 244 +window 0 68 2.708050 0.000000 242 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +organ 0 65 2.772589 0.000000 265 +copi 0 63 2.772589 0.000000 284 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +handout 0 64 2.772589 0.000000 263 +virtual 0 62 2.772589 0.000000 285 +march 1 61 2.833213 2.833213 295 +room 0 59 2.833213 0.000000 301 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +detail 0 57 2.890372 0.000000 321 +variou 0 56 2.890372 0.000000 317 +overview 0 56 2.890372 0.000000 323 +februari 2 54 2.944439 5.888878 328 +three 1 54 2.944439 2.944439 330 +week 2 52 2.995732 5.991464 343 +hardwar 0 51 2.995732 0.000000 350 +date 0 51 2.995732 0.000000 344 +run 0 51 2.995732 0.000000 347 +set 1 50 3.044522 3.044522 361 +appoint 0 49 3.044522 0.000000 358 +done 0 47 3.091042 0.000000 381 +could 0 46 3.091042 0.000000 383 +discuss 1 45 3.135494 3.135494 399 +answer 1 45 3.135494 3.135494 391 +fridai 1 44 3.135494 3.135494 390 +favorit 0 44 3.135494 0.000000 410 +midterm 0 45 3.135494 0.000000 392 +mark 0 44 3.135494 0.000000 403 +around 0 43 3.178054 0.000000 415 +past 1 42 3.218876 3.218876 428 +late 1 40 3.258097 3.258097 439 +probabl 1 40 3.258097 3.258097 455 +credit 0 38 3.295837 0.000000 460 +hand 1 37 3.332205 3.332205 475 +workstat 1 37 3.332205 3.332205 479 +ofth 0 36 3.367296 0.000000 491 +post 0 35 3.401197 0.000000 505 +print 0 34 3.401197 0.000000 503 +concurr 0 34 3.401197 0.000000 501 +eduoffic 1 33 3.433987 3.433987 531 +taught 0 33 3.433987 0.000000 526 +independ 0 32 3.465736 0.000000 548 +option 0 30 3.555348 0.000000 575 +rang 0 30 3.555348 0.000000 565 +secur 0 30 3.555348 0.000000 577 +turn 0 29 3.583519 0.000000 586 +though 0 27 3.637586 0.000000 622 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +store 0 24 3.761200 0.000000 693 +dai 1 22 3.850148 3.850148 753 +inth 0 22 3.850148 0.000000 741 +tent 0 22 3.850148 0.000000 739 +cooper 0 22 3.850148 0.000000 757 +disk 0 22 3.850148 0.000000 747 +leav 0 21 3.912023 0.000000 772 +alloc 1 20 3.951244 3.951244 821 +sure 0 20 3.951244 0.000000 813 +minut 0 20 3.951244 0.000000 810 +entir 0 20 3.951244 0.000000 811 +break 0 20 3.951244 0.000000 812 +attend 0 18 4.060443 0.000000 893 +accept 0 18 4.060443 0.000000 879 +account 0 18 4.060443 0.000000 882 +weekli 0 17 4.110874 0.000000 919 +monitor 0 17 4.110874 0.000000 941 +analyz 0 17 4.110874 0.000000 925 +regist 0 17 4.110874 0.000000 938 +debug 0 17 4.110874 0.000000 944 +segment 0 17 4.110874 0.000000 931 +quiz 1 16 4.174387 4.174387 990 +modern 0 16 4.174387 0.000000 966 +devic 0 16 4.174387 0.000000 1002 +purchas 0 15 4.248495 0.000000 1030 +drive 0 15 4.248495 0.000000 1052 +driven 0 15 4.248495 0.000000 1048 +happi 0 14 4.317488 0.000000 1079 +quizz 2 13 4.382027 8.764054 1151 +jonathan 0 13 4.382027 0.000000 1174 +cannot 0 13 4.382027 0.000000 1144 +solari 1 12 4.465908 4.465908 1238 +readi 0 12 4.465908 0.000000 1242 +count 0 12 4.465908 0.000000 1239 +extra 0 11 4.553877 0.000000 1312 +mainli 0 10 4.653960 0.000000 1432 +penalti 0 10 4.653960 0.000000 1405 +bart 1 9 4.753590 4.753590 1559 +recit 0 9 4.753590 0.000000 1475 +quantit 0 8 4.875197 0.000000 1654 +absolut 0 8 4.875197 0.000000 1646 +partner 0 8 4.875197 0.000000 1648 +replac 0 8 4.875197 0.000000 1668 +dispatch 0 7 5.010635 0.000000 1791 +whatev 0 6 5.164786 0.000000 2097 +transcript 0 6 5.164786 0.000000 2067 +drop 0 6 5.164786 0.000000 2008 +averag 0 6 5.164786 0.000000 2098 +madisoncomput 0 5 5.347108 0.000000 2391 +poorli 0 4 5.568345 0.000000 2781 +maximum 0 4 5.568345 0.000000 2632 +csphone 1 3 5.857933 5.857933 3394 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +tanenbaum 0 3 5.857933 0.000000 3397 +programmingassign 0 3 5.857933 0.000000 3398 +ofobject 0 3 5.857933 0.000000 3399 +thrash 0 3 5.857933 0.000000 3400 +bybart 0 3 5.857933 0.000000 3401 +semaphor 1 2 6.263398 6.263398 4555 +milleremail 0 2 6.263398 0.000000 4556 +noonor 0 2 6.263398 0.000000 4557 +weyer 0 2 6.263398 0.000000 4558 +notesar 0 2 6.263398 0.000000 4559 +materialcov 0 2 6.263398 0.000000 4140 +youdon 0 2 6.263398 0.000000 4444 +problemssolut 0 2 6.263398 0.000000 4241 +theproblem 0 2 6.263398 0.000000 4560 +andlook 0 2 6.263398 0.000000 4561 +havethre 0 2 6.263398 0.000000 4562 +daysof 0 2 6.263398 0.000000 4563 +eachof 0 2 6.263398 0.000000 4564 +lowest 0 2 6.263398 0.000000 4565 +breakweek 0 2 6.263398 0.000000 4566 +systemsnew 0 1 6.957497 0.000000 8497 +stufffin 0 1 6.957497 0.000000 8498 +staffinstructor 0 1 6.957497 0.000000 8499 +karuna 0 1 6.957497 0.000000 8500 +muthiahemail 0 1 6.957497 0.000000 8501 +muthiah 0 1 6.957497 0.000000 8502 +weyersemail 0 1 6.957497 0.000000 8503 +materialsth 0 1 6.957497 0.000000 8504 +thelectur 0 1 6.957497 0.000000 8505 +textbookmodern 0 1 6.957497 0.000000 8506 +pohl 0 1 6.957497 0.000000 8507 +systemsandobject 0 1 6.957497 0.000000 8508 +sectionslectur 0 1 6.957497 0.000000 8509 +sciencesdiscuss 0 1 6.957497 0.000000 8510 +nolandnot 0 1 6.957497 0.000000 8511 +occas 0 1 6.957497 0.000000 8512 +quizzesther 0 1 6.957497 0.000000 8513 +thediscuss 0 1 6.957497 0.000000 8514 +usetrac 0 1 6.957497 0.000000 8515 +setsdur 0 1 6.957497 0.000000 8516 +severalwritten 0 1 6.957497 0.000000 8517 +synchronizationprimit 0 1 6.957497 0.000000 8518 +workassign 0 1 6.957497 0.000000 8519 +assignmentthat 0 1 6.957497 0.000000 8520 +weekof 0 1 6.957497 0.000000 8521 +cheatingprogram 0 1 6.957497 0.000000 8522 +cheater 0 1 6.957497 0.000000 8523 +receivingan 0 1 6.957497 0.000000 8524 +facilitiesw 0 1 6.957497 0.000000 8525 +policyif 0 1 6.957497 0.000000 8526 +beno 0 1 6.957497 0.000000 8527 +scheduleth 0 1 6.957497 0.000000 8528 +processesweek 0 1 6.957497 0.000000 8529 +creationweek 0 1 6.957497 0.000000 8530 +synchronizationweek 0 1 6.957497 0.000000 8531 +semaphoresweek 0 1 6.957497 0.000000 8532 +monitorsweek 0 1 6.957497 0.000000 8533 +deadlocksweek 0 1 6.957497 0.000000 8534 +relocationweek 0 1 6.957497 0.000000 8535 +tlbsweek 0 1 6.957497 0.000000 8536 +filesweek 0 1 6.957497 0.000000 8537 +directoriesweek 0 1 6.957497 0.000000 8538 +protectionweek 0 1 6.957497 0.000000 8539 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..420930f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +file 0 132 1.945910 0.000000 70 +spring 1 131 2.079442 2.079442 88 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +follow 0 92 2.397895 0.000000 143 +imag 0 91 2.397895 0.000000 161 +pictur 0 89 2.397895 0.000000 160 +internet 1 83 2.484907 2.484907 186 +requir 0 81 2.484907 0.000000 167 +chang 0 82 2.484907 0.000000 163 +april 1 77 2.564949 2.564949 196 +mondai 0 77 2.564949 0.000000 206 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +januari 1 62 2.772589 2.772589 264 +wednesdai 0 64 2.772589 0.000000 261 +written 0 63 2.772589 0.000000 278 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +week 1 52 2.995732 2.995732 343 +appoint 0 49 3.044522 0.000000 358 +right 0 48 3.044522 0.000000 363 +could 0 46 3.091042 0.000000 383 +fridai 0 44 3.135494 0.000000 390 +discuss 0 45 3.135494 0.000000 399 +procedur 0 36 3.367296 0.000000 488 +eduoffic 0 33 3.433987 0.000000 531 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +client 0 25 3.737670 0.000000 679 +honor 1 23 3.806662 3.806662 729 +mostli 0 19 4.007333 0.000000 869 +eric 0 19 4.007333 0.000000 870 +miller 1 17 4.110874 4.110874 949 +remot 0 15 4.248495 0.000000 1041 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +larri 0 13 4.382027 0.000000 1142 +wood 0 11 4.553877 0.000000 1355 +bart 1 9 4.753590 4.753590 1559 +laru 0 9 4.753590 0.000000 1560 +madisoncomput 0 5 5.347108 0.000000 2391 +bach 0 4 5.568345 0.000000 2708 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +csphone 0 3 5.857933 0.000000 3394 +landweb 0 3 5.857933 0.000000 3402 +bybart 0 3 5.857933 0.000000 3401 +milleremail 0 2 6.263398 0.000000 4556 +noonor 0 2 6.263398 0.000000 4557 +breakweek 0 2 6.263398 0.000000 4566 +seminarunivers 0 1 6.957497 0.000000 8540 +seminarinstructor 0 1 6.957497 0.000000 8541 +lectureslectur 0 1 6.957497 0.000000 8542 +sciencesclass 0 1 6.957497 0.000000 8543 +schedulether 0 1 6.957497 0.000000 8544 +attendal 0 1 6.957497 0.000000 8545 +overviewweek 0 1 6.957497 0.000000 8546 +protocolsweek 0 1 6.957497 0.000000 8547 +callsweek 0 1 6.957497 0.000000 8548 +securityweek 0 1 6.957497 0.000000 8549 +encryptionweek 0 1 6.957497 0.000000 8550 +netweek 0 1 6.957497 0.000000 8551 +systemsweek 0 1 6.957497 0.000000 8552 +supercomputerweek 0 1 6.957497 0.000000 8553 +javaweek 0 1 6.957497 0.000000 8554 +discussionslast 0 1 6.957497 0.000000 8555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..2b6846bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 2 205 1.609438 3.218876 38 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +modifi 0 178 1.609438 0.000000 35 +read 2 154 1.791759 3.583518 47 +wisconsin 0 169 1.791759 0.000000 54 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +hour 0 165 1.791759 0.000000 46 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +schedul 0 119 2.079442 0.000000 85 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +structur 0 106 2.197225 0.000000 105 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +advanc 1 99 2.302585 2.302585 130 +memori 0 101 2.302585 0.000000 139 +text 0 98 2.302585 0.000000 133 +part 0 98 2.302585 0.000000 129 +comment 1 93 2.397895 2.397895 146 +grade 1 90 2.397895 2.397895 142 +proceed 0 93 2.397895 0.000000 152 +second 1 81 2.484907 2.484907 166 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +activ 0 84 2.484907 0.000000 182 +exam 0 86 2.484907 0.000000 169 +issu 0 78 2.564949 0.000000 211 +good 0 77 2.564949 0.000000 200 +write 1 72 2.639057 2.639057 222 +tuesdai 1 73 2.639057 2.639057 219 +name 0 72 2.639057 0.000000 220 +meet 0 72 2.639057 0.000000 229 +involv 0 71 2.639057 0.000000 227 +summari 0 73 2.639057 0.000000 237 +thursdai 1 70 2.708050 2.708050 241 +import 1 65 2.772589 2.772589 282 +handout 0 64 2.772589 0.000000 263 +detail 1 57 2.890372 2.890372 321 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +talk 0 53 2.944439 0.000000 336 +extens 0 53 2.944439 0.000000 340 +week 1 52 2.995732 2.995732 343 +give 1 50 3.044522 3.044522 359 +discuss 2 45 3.135494 6.270988 399 +textbook 0 44 3.135494 0.000000 397 +anoth 0 45 3.135494 0.000000 408 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +review 1 42 3.218876 3.218876 425 +form 1 39 3.258097 3.258097 443 +realli 0 40 3.258097 0.000000 444 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +short 0 36 3.367296 0.000000 499 +post 1 35 3.401197 3.401197 505 +least 0 35 3.401197 0.000000 516 +articl 0 33 3.433987 0.000000 530 +idea 1 32 3.465736 3.465736 545 +secur 0 30 3.555348 0.000000 577 +abl 0 30 3.555348 0.000000 566 +particip 1 29 3.583519 3.583519 589 +synchron 0 29 3.583519 0.000000 588 +intend 0 28 3.610918 0.000000 599 +pass 0 28 3.610918 0.000000 611 +relev 0 26 3.688879 0.000000 637 +revis 0 26 3.688879 0.000000 640 +daili 0 24 3.761200 0.000000 706 +try 0 22 3.850148 0.000000 764 +busi 0 21 3.912023 0.000000 784 +kernel 0 20 3.951244 0.000000 825 +longer 0 20 3.951244 0.000000 816 +els 0 19 4.007333 0.000000 843 +listen 0 18 4.060443 0.000000 907 +protect 0 17 4.110874 0.000000 935 +sheet 0 16 4.174387 0.000000 973 +choos 0 16 4.174387 0.000000 964 +critic 0 16 4.174387 0.000000 982 +purchas 0 15 4.248495 0.000000 1030 +score 0 15 4.248495 0.000000 1017 +doit 0 14 4.317488 0.000000 1111 +someon 0 13 4.382027 0.000000 1128 +rest 0 12 4.465908 0.000000 1259 +reader 0 12 4.465908 0.000000 1246 +broad 0 11 4.553877 0.000000 1302 +literatur 0 11 4.553877 0.000000 1300 +success 0 10 4.653960 0.000000 1390 +bart 0 9 4.753590 0.000000 1559 +classmat 0 9 4.753590 0.000000 1516 +theme 0 8 4.875197 0.000000 1707 +opinion 0 8 4.875197 0.000000 1708 +refere 0 7 5.010635 0.000000 1895 +fromth 0 7 5.010635 0.000000 1802 +carefulli 0 6 5.164786 0.000000 2045 +madisoncomput 0 5 5.347108 0.000000 2391 +understood 0 5 5.347108 0.000000 2364 +twice 1 4 5.568345 5.568345 2614 +exposur 0 4 5.568345 0.000000 2598 +will 0 4 5.568345 0.000000 2782 +writer 0 4 5.568345 0.000000 2783 +csoffic 0 4 5.568345 0.000000 2727 +departmentc 0 3 5.857933 0.000000 3395 +millerc 0 3 5.857933 0.000000 3396 +advancedoper 0 3 5.857933 0.000000 3403 +focal 0 3 5.857933 0.000000 3404 +formula 0 3 5.857933 0.000000 3405 +thepap 0 3 5.857933 0.000000 3254 +andon 0 3 5.857933 0.000000 3115 +bybart 0 3 5.857933 0.000000 3401 +satisfactori 0 2 6.263398 0.000000 4567 +andconfer 0 2 6.263398 0.000000 4568 +willinstead 0 2 6.263398 0.000000 4569 +adiscuss 0 2 6.263398 0.000000 4570 +geta 0 2 6.263398 0.000000 4571 +quietli 0 2 6.263398 0.000000 4572 +examsther 0 2 6.263398 0.000000 4149 +assignmenti 0 2 6.263398 0.000000 4573 +availbl 1 1 6.957497 6.957497 8556 +systemssummarythi 0 1 6.957497 0.000000 8557 +textther 0 1 6.957497 0.000000 8558 +operatingsystemsclass 0 1 6.957497 0.000000 8559 +meetonc 0 1 6.957497 0.000000 8560 +listaccord 0 1 6.957497 0.000000 8561 +papersindepend 0 1 6.957497 0.000000 8562 +identifyth 0 1 6.957497 0.000000 8563 +discussionsclass 0 1 6.957497 0.000000 8564 +besupport 0 1 6.957497 0.000000 8565 +beveri 0 1 6.957497 0.000000 8566 +unhappi 0 1 6.957497 0.000000 8567 +papersdur 0 1 6.957497 0.000000 8568 +paperwil 0 1 6.957497 0.000000 8569 +facilityand 0 1 6.957497 0.000000 8570 +summaryof 0 1 6.957497 0.000000 8571 +aselect 0 1 6.957497 0.000000 8572 +topicsfrom 0 1 6.957497 0.000000 8573 +fellowstud 0 1 6.957497 0.000000 8574 +giveth 0 1 6.957497 0.000000 8575 +gradesscor 0 1 6.957497 0.000000 8576 +proposalsi 0 1 6.957497 0.000000 8577 +gradesar 0 1 6.957497 0.000000 8578 +detailstim 0 1 6.957497 0.000000 8579 +noonlast 0 1 6.957497 0.000000 8580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..f6bf81e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +click 2 142 1.945910 3.891820 78 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +compil 0 122 2.079442 0.000000 96 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +question 2 91 2.397895 4.795790 141 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +solut 2 82 2.484907 4.969814 162 +requir 0 81 2.484907 0.000000 167 +school 0 84 2.484907 0.000000 188 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +materi 0 75 2.639057 0.000000 221 +window 1 68 2.708050 2.708050 242 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +syllabu 0 67 2.708050 0.000000 247 +copi 1 63 2.772589 2.772589 284 +experi 0 64 2.772589 0.000000 283 +descript 0 64 2.772589 0.000000 271 +import 0 65 2.772589 0.000000 282 +polici 0 64 2.772589 0.000000 279 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +march 0 61 2.833213 0.000000 295 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 1 55 2.944439 2.944439 329 +instruct 0 53 2.944439 0.000000 332 +week 2 52 2.995732 5.991464 343 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +done 0 47 3.091042 0.000000 381 +even 0 45 3.135494 0.000000 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +howev 1 41 3.218876 3.218876 422 +littl 0 39 3.258097 0.000000 454 +announc 0 40 3.258097 0.000000 441 +probabl 0 40 3.258097 0.000000 455 +microsoft 1 38 3.295837 3.295837 468 +credit 0 38 3.295837 0.000000 460 +open 0 38 3.295837 0.000000 469 +copyright 0 36 3.367296 0.000000 495 +statist 0 35 3.401197 0.000000 521 +taught 0 33 3.433987 0.000000 526 +depend 0 29 3.583519 0.000000 583 +intend 1 28 3.610918 3.610918 599 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +enabl 0 26 3.688879 0.000000 655 +lab 0 24 3.761200 0.000000 698 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +prepar 0 20 3.951244 0.000000 824 +entir 0 20 3.951244 0.000000 811 +exercis 1 19 4.007333 4.007333 842 +assum 0 19 4.007333 0.000000 845 +lyco 0 19 4.007333 0.000000 871 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 2 15 4.248495 8.496990 1027 +pagec 0 15 4.248495 0.000000 1011 +psycholog 0 15 4.248495 0.000000 1054 +purchas 0 15 4.248495 0.000000 1030 +comic 0 14 4.317488 0.000000 1103 +primarili 1 13 4.382027 4.382027 1185 +menu 0 13 4.382027 0.000000 1156 +vectra 1 12 4.465908 4.465908 1267 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +prior 0 10 4.653960 0.000000 1438 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 0 9 4.753590 0.000000 1561 +hewlett 0 8 4.875197 0.000000 1709 +printer 0 8 4.875197 0.000000 1621 +elementari 1 7 5.010635 5.010635 1825 +bestor 1 6 5.164786 5.164786 2099 +gareth 1 5 5.347108 5.347108 2392 +relief 0 4 5.568345 0.000000 2784 +punctual 0 3 5.857933 0.000000 3313 +labyou 0 3 5.857933 0.000000 3406 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +programmingsect 0 2 6.263398 0.000000 4574 +disturb 0 2 6.263398 0.000000 4575 +subroutin 0 2 6.263398 0.000000 4576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..b69c2cda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 3 374 0.693147 2.079441 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +algorithm 0 162 1.791759 0.000000 57 +click 2 142 1.945910 3.891820 78 +assign 2 135 1.945910 3.891820 66 +problem 2 147 1.945910 3.891820 75 +lectur 2 135 1.945910 3.891820 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +perform 0 143 1.945910 0.000000 74 +hall 0 146 1.945910 0.000000 65 +compil 1 122 2.079442 2.079442 96 +final 2 116 2.197225 4.394450 108 +pleas 1 113 2.197225 2.197225 114 +instructor 1 108 2.197225 2.197225 107 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +topic 0 114 2.197225 0.000000 110 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +text 1 98 2.302585 2.302585 133 +need 1 98 2.302585 2.302585 135 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +question 2 91 2.397895 4.795790 141 +grade 2 90 2.397895 4.795790 142 +section 1 94 2.397895 2.397895 149 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +exam 2 86 2.484907 4.969814 169 +solut 2 82 2.484907 4.969814 162 +help 1 83 2.484907 2.484907 175 +learn 1 86 2.484907 2.484907 170 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +exampl 0 77 2.564949 0.000000 195 +messag 0 76 2.564949 0.000000 212 +solv 2 73 2.639057 5.278114 234 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +receiv 1 66 2.708050 2.708050 244 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +would 0 67 2.708050 0.000000 251 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +room 0 59 2.833213 0.000000 301 +locat 0 59 2.833213 0.000000 303 +major 0 56 2.890372 0.000000 315 +semest 0 58 2.890372 0.000000 312 +think 0 57 2.890372 0.000000 314 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 1 55 2.944439 2.944439 329 +februari 0 54 2.944439 0.000000 328 +three 0 54 2.944439 0.000000 330 +week 3 52 2.995732 8.987196 343 +particular 1 51 2.995732 2.995732 352 +run 0 51 2.995732 0.000000 347 +pointer 1 48 3.044522 3.044522 368 +without 0 50 3.044522 0.000000 370 +appoint 0 49 3.044522 0.000000 358 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +understand 1 47 3.091042 3.091042 384 +fridai 1 44 3.135494 3.135494 390 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +netscap 1 44 3.135494 3.135494 395 +algebra 0 45 3.135494 0.000000 394 +directori 0 45 3.135494 0.000000 396 +textbook 0 44 3.135494 0.000000 397 +term 1 43 3.178054 3.178054 411 +long 1 43 3.178054 3.178054 413 +show 0 43 3.178054 0.000000 417 +howev 1 41 3.218876 3.218876 422 +edit 0 42 3.218876 0.000000 418 +must 1 40 3.258097 3.258097 442 +error 1 40 3.258097 3.258097 449 +announc 0 40 3.258097 0.000000 441 +realli 0 40 3.258097 0.000000 444 +late 0 40 3.258097 0.000000 439 +programm 0 39 3.258097 0.000000 445 +small 0 39 3.258097 0.000000 447 +probabl 0 40 3.258097 0.000000 455 +microsoft 1 38 3.295837 3.295837 468 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +hand 1 37 3.332205 3.332205 475 +mean 0 37 3.332205 0.000000 477 +respons 0 37 3.332205 0.000000 476 +short 0 36 3.367296 0.000000 499 +copyright 0 36 3.367296 0.000000 495 +approxim 1 35 3.401197 3.401197 509 +everi 0 34 3.401197 0.000000 519 +statist 0 35 3.401197 0.000000 521 +taught 0 33 3.433987 0.000000 526 +board 0 33 3.433987 0.000000 528 +dissert 0 32 3.465736 0.000000 549 +someth 0 31 3.496508 0.000000 554 +abl 1 30 3.555348 3.555348 566 +rang 0 30 3.555348 0.000000 565 +depend 0 29 3.583519 0.000000 583 +pass 1 28 3.610918 3.610918 611 +except 1 28 3.610918 3.610918 607 +intend 0 28 3.610918 0.000000 599 +though 1 27 3.637586 3.637586 622 +campu 0 27 3.637586 0.000000 623 +relev 0 26 3.688879 0.000000 637 +consult 1 24 3.761200 3.761200 687 +lab 0 24 3.761200 0.000000 698 +tent 0 22 3.850148 0.000000 739 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +entir 0 20 3.951244 0.000000 811 +longer 0 20 3.951244 0.000000 816 +exercis 1 19 4.007333 4.007333 842 +thur 0 19 4.007333 0.000000 847 +lyco 0 19 4.007333 0.000000 871 +attempt 0 17 4.110874 0.000000 917 +weekli 0 17 4.110874 0.000000 919 +anyth 0 16 4.174387 0.000000 998 +misconduct 0 16 4.174387 0.000000 1003 +explan 0 16 4.174387 0.000000 985 +normal 0 16 4.174387 0.000000 995 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 2 15 4.248495 8.496990 1027 +contribut 1 15 4.248495 4.248495 1021 +pagec 0 15 4.248495 0.000000 1011 +psycholog 0 15 4.248495 0.000000 1054 +score 0 15 4.248495 0.000000 1017 +overhead 0 15 4.248495 0.000000 1035 +doesn 0 15 4.248495 0.000000 1055 +purchas 0 15 4.248495 0.000000 1030 +easili 0 14 4.317488 0.000000 1077 +comic 0 14 4.317488 0.000000 1103 +primarili 1 13 4.382027 4.382027 1185 +menu 0 13 4.382027 0.000000 1156 +wait 0 13 4.382027 0.000000 1168 +necessari 0 13 4.382027 0.000000 1147 +step 0 13 4.382027 0.000000 1138 +skill 1 12 4.465908 4.465908 1205 +vectra 1 12 4.465908 4.465908 1267 +stai 0 12 4.465908 0.000000 1215 +calcul 0 12 4.465908 0.000000 1268 +outsid 0 12 4.465908 0.000000 1219 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +extra 0 11 4.553877 0.000000 1312 +regularli 0 11 4.553877 0.000000 1338 +distinguish 0 11 4.553877 0.000000 1357 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +bring 0 10 4.653960 0.000000 1430 +cheat 0 10 4.653960 0.000000 1395 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 1 9 4.753590 4.753590 1561 +login 1 9 4.753590 4.753590 1550 +discov 0 9 4.753590 0.000000 1562 +didn 0 9 4.753590 0.000000 1563 +familiar 0 9 4.753590 0.000000 1485 +curv 1 8 4.875197 4.875197 1656 +printer 1 8 4.875197 4.875197 1621 +risk 0 8 4.875197 0.000000 1689 +fail 0 8 4.875197 0.000000 1655 +matter 0 8 4.875197 0.000000 1627 +hewlett 0 8 4.875197 0.000000 1709 +friedman 0 7 5.010635 0.000000 1886 +zero 0 7 5.010635 0.000000 1896 +tag 0 7 5.010635 0.000000 1821 +therefor 0 7 5.010635 0.000000 1822 +bestor 1 6 5.164786 5.164786 2099 +constitut 0 6 5.164786 0.000000 2026 +ensur 0 6 5.164786 0.000000 2012 +syntax 0 6 5.164786 0.000000 2030 +gareth 1 5 5.347108 5.347108 2392 +substitut 0 5 5.347108 0.000000 2247 +handin 0 5 5.347108 0.000000 2393 +identif 0 4 5.568345 0.000000 2773 +wear 0 4 5.568345 0.000000 2785 +trivial 0 4 5.568345 0.000000 2786 +relief 0 4 5.568345 0.000000 2784 +punctual 0 3 5.857933 0.000000 3313 +projector 0 3 5.857933 0.000000 3409 +duti 0 3 5.857933 0.000000 3317 +labyou 0 3 5.857933 0.000000 3406 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +programmingsect 0 2 6.263398 0.000000 4574 +disturb 0 2 6.263398 0.000000 4575 +lowest 0 2 6.263398 0.000000 4565 +regardless 0 2 6.263398 0.000000 4577 +pencil 0 2 6.263398 0.000000 4426 +subroutin 0 2 6.263398 0.000000 4576 +elig 1 1 6.957497 6.957497 8581 +amclick 0 1 6.957497 0.000000 8582 +unsur 0 1 6.957497 0.000000 8583 +notestext 0 1 6.957497 0.000000 8584 +koffman 0 1 6.957497 0.000000 8585 +assignmentsther 0 1 6.957497 0.000000 8586 +gradesheet 0 1 6.957497 0.000000 8587 +pmhow 0 1 6.957497 0.000000 8588 +modem 0 1 6.957497 0.000000 8589 +exerciseson 0 1 6.957497 0.000000 8590 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..b6a6b976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +note 0 142 1.945910 0.000000 67 +object 0 138 1.945910 0.000000 79 +section 1 94 2.397895 2.397895 149 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +name 0 72 2.639057 0.000000 220 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +handout 0 64 2.772589 0.000000 263 +publish 0 57 2.890372 0.000000 326 +week 0 52 2.995732 0.000000 343 +algebra 0 45 3.135494 0.000000 394 +compani 0 41 3.218876 0.000000 423 +announc 0 40 3.258097 0.000000 441 +error 0 40 3.258097 0.000000 449 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +eduoffic 0 33 3.433987 0.000000 531 +ask 0 28 3.610918 0.000000 597 +valu 0 25 3.737670 0.000000 665 +known 0 24 3.761200 0.000000 702 +greg 0 24 3.761200 0.000000 695 +dai 0 22 3.850148 0.000000 753 +output 0 21 3.912023 0.000000 788 +walter 0 17 4.110874 0.000000 950 +quiz 0 16 4.174387 0.000000 990 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +dave 1 14 4.317488 4.317488 1098 +scan 0 12 4.465908 0.000000 1243 +informationemail 0 9 4.753590 0.000000 1564 +sharp 0 6 5.164786 0.000000 2100 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +prog 0 4 5.568345 0.000000 2740 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +objectivesvectra 0 3 5.857933 0.000000 3410 +homeclass 0 3 5.857933 0.000000 3411 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +burnett 1 2 6.263398 6.263398 4578 +consultantssyllabuswork 0 2 6.263398 0.000000 4579 +archivepolici 0 2 6.263398 0.000000 4580 +eggleston 0 2 6.263398 0.000000 4581 +egglestonemail 0 1 6.957497 0.000000 8591 +hourlywork 0 1 6.957497 0.000000 8592 +classread 0 1 6.957497 0.000000 8593 +gradeshomeworkexam 0 1 6.957497 0.000000 8594 +quizzesmiscellan 0 1 6.957497 0.000000 8595 +policytextproblem 0 1 6.957497 0.000000 8596 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..943d8d96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +paper 2 205 1.609438 3.218876 38 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +distribut 0 162 1.791759 0.000000 51 +hour 0 165 1.791759 0.000000 46 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +topic 1 114 2.197225 2.197225 110 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +text 1 98 2.302585 2.302585 133 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +need 0 98 2.302585 0.000000 135 +peopl 0 96 2.302585 0.000000 132 +grade 1 90 2.397895 2.397895 142 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +requir 0 81 2.484907 0.000000 167 +solut 0 82 2.484907 0.000000 162 +info 0 85 2.484907 0.000000 176 +activ 0 84 2.484907 0.000000 182 +exam 0 86 2.484907 0.000000 169 +second 0 81 2.484907 0.000000 166 +come 0 78 2.564949 0.000000 202 +involv 1 71 2.639057 2.639057 227 +tuesdai 1 73 2.639057 2.639057 219 +summari 0 73 2.639057 0.000000 237 +thursdai 0 70 2.708050 0.000000 241 +differ 0 66 2.708050 0.000000 253 +window 0 68 2.708050 0.000000 242 +import 0 65 2.772589 0.000000 282 +previou 0 62 2.772589 0.000000 290 +copi 0 63 2.772589 0.000000 284 +semest 1 58 2.890372 2.890372 312 +detail 0 57 2.890372 0.000000 321 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +cover 0 55 2.944439 0.000000 329 +suggest 0 53 2.944439 0.000000 331 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +give 0 50 3.044522 0.000000 359 +appoint 0 49 3.044522 0.000000 358 +discuss 1 45 3.135494 3.135494 399 +review 0 42 3.218876 0.000000 425 +slide 1 38 3.295837 3.295837 467 +prototyp 0 38 3.295837 0.000000 463 +hand 0 37 3.332205 0.000000 475 +either 0 35 3.401197 0.000000 506 +compon 0 30 3.555348 0.000000 570 +secur 0 30 3.555348 0.000000 577 +particip 1 29 3.583519 3.583519 589 +synchron 0 29 3.583519 0.000000 588 +propos 1 28 3.610918 3.610918 602 +intend 0 28 3.610918 0.000000 599 +measur 0 28 3.610918 0.000000 609 +linux 0 27 3.637586 0.000000 631 +team 0 27 3.637586 0.000000 625 +relev 0 26 3.688879 0.000000 637 +instead 0 22 3.850148 0.000000 756 +tent 0 22 3.850148 0.000000 739 +benchmark 0 19 4.007333 0.000000 859 +encourag 0 18 4.060443 0.000000 880 +protect 0 17 4.110874 0.000000 935 +choos 0 16 4.174387 0.000000 964 +purchas 0 15 4.248495 0.000000 1030 +classic 0 14 4.317488 0.000000 1084 +doit 0 14 4.317488 0.000000 1111 +suit 0 13 4.382027 0.000000 1129 +count 1 12 4.465908 4.465908 1239 +solari 0 12 4.465908 0.000000 1238 +broad 0 11 4.553877 0.000000 1302 +strongli 0 10 4.653960 0.000000 1406 +total 0 10 4.653960 0.000000 1398 +theme 0 8 4.875197 0.000000 1707 +formerli 0 5 5.347108 0.000000 2397 +exposur 0 4 5.568345 0.000000 2598 +suno 0 4 5.568345 0.000000 2790 +advancedoper 0 3 5.857933 0.000000 3403 +focal 0 3 5.857933 0.000000 3404 +macc 0 3 5.857933 0.000000 3414 +ofvari 0 2 6.263398 0.000000 4582 +anexperiment 0 2 6.263398 0.000000 4299 +halloffic 0 2 6.263398 0.000000 4583 +deskfor 0 2 6.263398 0.000000 4584 +performanceof 0 2 6.263398 0.000000 4585 +topicsinclud 0 1 6.957497 0.000000 8597 +tochoos 0 1 6.957497 0.000000 8598 +rathera 0 1 6.957497 0.000000 8599 +manya 0 1 6.957497 0.000000 8600 +assig 0 1 6.957497 0.000000 8601 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..84d28264 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +design 0 213 1.386294 0.000000 25 +fall 1 181 1.609438 1.609438 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +lectur 2 135 1.945910 3.891820 73 +problem 0 147 1.945910 0.000000 75 +first 0 140 1.945910 0.000000 71 +introduct 0 126 2.079442 0.000000 87 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +materi 1 75 2.639057 2.639057 221 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +simpl 0 60 2.833213 0.000000 298 +cover 1 55 2.944439 2.944439 329 +basic 0 50 3.044522 0.000000 360 +credit 0 38 3.295837 0.000000 460 +michael 1 35 3.401197 3.401197 514 +jeff 1 25 3.737670 3.737670 673 +half 0 21 3.912023 0.000000 776 +martin 0 21 3.912023 0.000000 794 +prepar 0 20 3.951244 0.000000 824 +fortran 1 15 4.248495 4.248495 1027 +essenti 0 13 4.382027 0.000000 1137 +russel 1 9 4.753590 4.753590 1507 +suffici 0 7 5.010635 0.000000 1897 +lampert 1 5 5.347108 5.347108 2398 +birk 1 4 5.568345 5.568345 2791 +anthoni 0 4 5.568345 0.000000 2792 +toni 1 3 5.857933 5.857933 3415 +hummert 1 3 5.857933 5.857933 3416 +man 1 3 5.857933 5.857933 3417 +silva 1 2 6.263398 6.263398 4586 +sidnei 1 2 6.263398 6.263398 4587 +programmingstructur 0 1 6.957497 0.000000 8602 +elementaryengin 0 1 6.957497 0.000000 8603 +enableth 0 1 6.957497 0.000000 8604 +inelementari 0 1 6.957497 0.000000 8605 +reameslast 0 1 6.957497 0.000000 8606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..0a551c62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +offic 3 299 1.098612 3.295836 13 +us 0 329 1.098612 0.000000 16 +wisc 2 242 1.386294 2.772588 33 +email 2 220 1.386294 2.772588 29 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 2 165 1.791759 3.583518 46 +phone 2 175 1.791759 3.583518 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +teach 0 108 2.197225 0.000000 112 +find 0 111 2.197225 0.000000 111 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +name 2 72 2.639057 5.278114 220 +wednesdai 0 64 2.772589 0.000000 261 +dept 0 64 2.772589 0.000000 291 +virtual 0 62 2.772589 0.000000 285 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +maintain 1 51 2.995732 2.995732 342 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +cool 0 49 3.044522 0.000000 374 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +origin 0 38 3.295837 0.000000 472 +especi 0 36 3.367296 0.000000 496 +jame 0 35 3.401197 0.000000 507 +yahoo 0 24 3.761200 0.000000 707 +andrew 0 19 4.007333 0.000000 849 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +borland 0 14 4.317488 0.000000 1067 +whose 0 13 4.382027 0.000000 1166 +gupta 0 12 4.465908 0.000000 1241 +packard 0 10 4.653960 0.000000 1444 +classifi 0 9 4.753590 0.000000 1537 +hewlett 0 8 4.875197 0.000000 1709 +tourist 0 8 4.875197 0.000000 1710 +chiang 0 7 5.010635 0.000000 1853 +sciencesoffic 0 6 5.164786 0.000000 2101 +mother 0 6 5.164786 0.000000 2083 +alphabet 0 6 5.164786 0.000000 1980 +teitelbaum 0 6 5.164786 0.000000 2102 +categori 0 5 5.347108 0.000000 2261 +lookup 0 5 5.347108 0.000000 2399 +kelli 1 4 5.568345 5.568345 2793 +nathan 0 4 5.568345 0.000000 2794 +tsioli 1 3 5.857933 5.857933 3418 +ratliff 1 3 5.857933 5.857933 3419 +bockrath 1 3 5.857933 5.857933 3420 +ashraf 1 3 5.857933 5.857933 3421 +geeri 1 3 5.857933 5.857933 3422 +jyothi 1 3 5.857933 5.857933 3423 +thano 1 3 5.857933 5.857933 3424 +fink 0 3 5.857933 0.000000 3425 +aboulnaga 0 3 5.857933 0.000000 3426 +jherro 0 3 5.857933 0.000000 3427 +abhinav 0 3 5.857933 0.000000 3428 +agupta 0 3 5.857933 0.000000 3429 +suhui 0 3 5.857933 0.000000 3430 +enorm 0 3 5.857933 0.000000 3431 +rehnuma 1 2 6.263398 6.263398 4588 +keyinstructorprofessor 0 2 6.263398 0.000000 4589 +desautelsoffic 0 2 6.263398 0.000000 4590 +assistantsfollow 0 2 6.263398 0.000000 4591 +rahman 0 2 6.263398 0.000000 4592 +jaim 0 2 6.263398 0.000000 4593 +jfink 0 2 6.263398 0.000000 4594 +herro 0 2 6.263398 0.000000 4595 +krothap 0 2 6.263398 0.000000 4596 +gradesexplor 0 2 6.263398 0.000000 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..0a551c62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +offic 3 299 1.098612 3.295836 13 +us 0 329 1.098612 0.000000 16 +wisc 2 242 1.386294 2.772588 33 +email 2 220 1.386294 2.772588 29 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +hour 2 165 1.791759 3.583518 46 +phone 2 175 1.791759 3.583518 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +databas 0 122 2.079442 0.000000 86 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +teach 0 108 2.197225 0.000000 112 +find 0 111 2.197225 0.000000 111 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +search 0 95 2.397895 0.000000 155 +exam 0 86 2.484907 0.000000 169 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +mondai 0 77 2.564949 0.000000 206 +name 2 72 2.639057 5.278114 220 +wednesdai 0 64 2.772589 0.000000 261 +dept 0 64 2.772589 0.000000 291 +virtual 0 62 2.772589 0.000000 285 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +maintain 1 51 2.995732 2.995732 342 +hardwar 0 51 2.995732 0.000000 350 +appoint 0 49 3.044522 0.000000 358 +cool 0 49 3.044522 0.000000 374 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +origin 0 38 3.295837 0.000000 472 +especi 0 36 3.367296 0.000000 496 +jame 0 35 3.401197 0.000000 507 +yahoo 0 24 3.761200 0.000000 707 +andrew 0 19 4.007333 0.000000 849 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +borland 0 14 4.317488 0.000000 1067 +whose 0 13 4.382027 0.000000 1166 +gupta 0 12 4.465908 0.000000 1241 +packard 0 10 4.653960 0.000000 1444 +classifi 0 9 4.753590 0.000000 1537 +hewlett 0 8 4.875197 0.000000 1709 +tourist 0 8 4.875197 0.000000 1710 +chiang 0 7 5.010635 0.000000 1853 +sciencesoffic 0 6 5.164786 0.000000 2101 +mother 0 6 5.164786 0.000000 2083 +alphabet 0 6 5.164786 0.000000 1980 +teitelbaum 0 6 5.164786 0.000000 2102 +categori 0 5 5.347108 0.000000 2261 +lookup 0 5 5.347108 0.000000 2399 +kelli 1 4 5.568345 5.568345 2793 +nathan 0 4 5.568345 0.000000 2794 +tsioli 1 3 5.857933 5.857933 3418 +ratliff 1 3 5.857933 5.857933 3419 +bockrath 1 3 5.857933 5.857933 3420 +ashraf 1 3 5.857933 5.857933 3421 +geeri 1 3 5.857933 5.857933 3422 +jyothi 1 3 5.857933 5.857933 3423 +thano 1 3 5.857933 5.857933 3424 +fink 0 3 5.857933 0.000000 3425 +aboulnaga 0 3 5.857933 0.000000 3426 +jherro 0 3 5.857933 0.000000 3427 +abhinav 0 3 5.857933 0.000000 3428 +agupta 0 3 5.857933 0.000000 3429 +suhui 0 3 5.857933 0.000000 3430 +enorm 0 3 5.857933 0.000000 3431 +rehnuma 1 2 6.263398 6.263398 4588 +keyinstructorprofessor 0 2 6.263398 0.000000 4589 +desautelsoffic 0 2 6.263398 0.000000 4590 +assistantsfollow 0 2 6.263398 0.000000 4591 +rahman 0 2 6.263398 0.000000 4592 +jaim 0 2 6.263398 0.000000 4593 +jfink 0 2 6.263398 0.000000 4594 +herro 0 2 6.263398 0.000000 4595 +krothap 0 2 6.263398 0.000000 4596 +gradesexplor 0 2 6.263398 0.000000 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..9b642445 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +databas 1 122 2.079442 2.079442 86 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +topic 1 114 2.197225 2.197225 110 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +info 1 85 2.484907 2.484907 176 +exam 1 86 2.484907 2.484907 169 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +issu 0 78 2.564949 0.000000 211 +tuesdai 0 73 2.639057 0.000000 219 +intellig 0 72 2.639057 0.000000 225 +addit 0 74 2.639057 0.000000 228 +syllabu 1 67 2.708050 2.708050 247 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +point 0 58 2.890372 0.000000 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +electron 0 47 3.091042 0.000000 379 +discuss 1 45 3.135494 3.135494 399 +netscap 0 44 3.135494 0.000000 395 +term 0 43 3.178054 0.000000 411 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +word 1 34 3.401197 3.401197 508 +eduoffic 0 33 3.433987 0.000000 531 +taught 0 33 3.433987 0.000000 526 +storag 0 31 3.496508 0.000000 553 +compon 0 30 3.555348 0.000000 570 +held 0 28 3.610918 0.000000 600 +background 1 25 3.737670 3.737670 664 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +newsgroup 0 21 3.912023 0.000000 783 +expert 0 20 3.951244 0.000000 833 +qualiti 0 20 3.951244 0.000000 832 +excel 0 19 4.007333 0.000000 868 +macintosh 1 17 4.110874 4.110874 920 +regular 0 17 4.110874 0.000000 929 +devic 0 16 4.174387 0.000000 1002 +draw 0 14 4.317488 0.000000 1086 +quizz 1 13 4.382027 4.382027 1151 +social 0 13 4.382027 0.000000 1123 +necessari 0 13 4.382027 0.000000 1147 +skill 0 12 4.465908 0.000000 1205 +desktop 0 10 4.653960 0.000000 1445 +rich 0 10 4.653960 0.000000 1396 +telecommun 0 9 4.753590 0.000000 1565 +zero 0 7 5.010635 0.000000 1896 +shot 0 7 5.010635 0.000000 1898 +necessarili 0 7 5.010635 0.000000 1899 +thegoal 0 6 5.164786 0.000000 2033 +lloyd 0 6 5.164786 0.000000 2103 +paint 0 5 5.347108 0.000000 2400 +bodner 0 5 5.347108 0.000000 2401 +chart 0 4 5.568345 0.000000 2653 +assignmentsand 0 4 5.568345 0.000000 2760 +glanc 0 4 5.568345 0.000000 2652 +salli 1 3 5.857933 5.857933 3432 +facstaff 0 3 5.857933 0.000000 3433 +drag 0 3 5.857933 0.000000 3434 +crack 0 3 5.857933 0.000000 3435 +macintoshcomput 0 3 5.857933 0.000000 3228 +iici 0 3 5.857933 0.000000 3436 +scanner 0 3 5.857933 0.000000 3437 +leavi 0 3 5.857933 0.000000 3438 +sharenow 0 3 5.857933 0.000000 3439 +swander 0 3 5.857933 0.000000 3440 +thayer 0 3 5.857933 0.000000 3441 +varghes 0 3 5.857933 0.000000 3442 +weinberg 0 3 5.857933 0.000000 3443 +spreadsheet 1 2 6.263398 6.263398 4598 +aldu 1 2 6.263398 6.263398 4599 +computersinstructor 0 2 6.263398 0.000000 4600 +petersonoffic 0 2 6.263398 0.000000 4601 +sciencephon 0 2 6.263398 0.000000 4602 +slpeter 0 2 6.263398 0.000000 4603 +appointmentvit 0 2 6.263398 0.000000 4604 +halllectur 0 2 6.263398 0.000000 4605 +laudon 0 2 6.263398 0.000000 4606 +traver 0 2 6.263398 0.000000 4607 +laudonlab 0 2 6.263398 0.000000 4608 +petersoncours 0 2 6.263398 0.000000 4609 +computersto 0 2 6.263398 0.000000 4610 +throughcolleg 0 2 6.263398 0.000000 4611 +arena 0 2 6.263398 0.000000 4612 +csuse 0 2 6.263398 0.000000 4613 +experienceon 0 2 6.263398 0.000000 4614 +eudora 0 2 6.263398 0.000000 4615 +superpaint 0 2 6.263398 0.000000 4616 +filemak 0 2 6.263398 0.000000 4617 +hypercard 0 2 6.263398 0.000000 4618 +pagemak 0 2 6.263398 0.000000 4619 +educationalexperi 0 2 6.263398 0.000000 4620 +namesectiontimedai 0 2 6.263398 0.000000 4621 +mwnick 0 2 6.263398 0.000000 4622 +mwtrshannon 0 2 6.263398 0.000000 4623 +trtrjeff 0 2 6.263398 0.000000 4624 +reminga 0 2 6.263398 0.000000 4625 +mwfmwira 0 2 6.263398 0.000000 4626 +trtrbrian 0 2 6.263398 0.000000 4627 +mwfmwfbrad 0 2 6.263398 0.000000 4628 +mwfmwfjoe 0 2 6.263398 0.000000 4629 +trtrgeoff 0 2 6.263398 0.000000 4630 +mwftrmaria 0 2 6.263398 0.000000 4631 +yuin 0 2 6.263398 0.000000 4632 +mwfmwrecommend 0 2 6.263398 0.000000 4633 +nitti 0 2 6.263398 0.000000 4634 +gritti 0 2 6.263398 0.000000 4635 +superpaintassign 0 2 6.263398 0.000000 4636 +excellast 0 2 6.263398 0.000000 4637 +jonbodn 0 2 6.263398 0.000000 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..9b642445 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +relat 0 139 1.945910 0.000000 68 +process 0 142 1.945910 0.000000 72 +databas 1 122 2.079442 2.079442 86 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +topic 1 114 2.197225 2.197225 110 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +octob 0 89 2.397895 0.000000 156 +info 1 85 2.484907 2.484907 176 +exam 1 86 2.484907 2.484907 169 +wide 0 84 2.484907 0.000000 185 +learn 0 86 2.484907 0.000000 170 +contain 0 81 2.484907 0.000000 174 +issu 0 78 2.564949 0.000000 211 +tuesdai 0 73 2.639057 0.000000 219 +intellig 0 72 2.639057 0.000000 225 +addit 0 74 2.639057 0.000000 228 +syllabu 1 67 2.708050 2.708050 247 +thursdai 0 70 2.708050 0.000000 241 +knowledg 0 67 2.708050 0.000000 243 +order 0 69 2.708050 0.000000 249 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +point 0 58 2.890372 0.000000 319 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +electron 0 47 3.091042 0.000000 379 +discuss 1 45 3.135494 3.135494 399 +netscap 0 44 3.135494 0.000000 395 +term 0 43 3.178054 0.000000 411 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +word 1 34 3.401197 3.401197 508 +eduoffic 0 33 3.433987 0.000000 531 +taught 0 33 3.433987 0.000000 526 +storag 0 31 3.496508 0.000000 553 +compon 0 30 3.555348 0.000000 570 +held 0 28 3.610918 0.000000 600 +background 1 25 3.737670 3.737670 664 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +newsgroup 0 21 3.912023 0.000000 783 +expert 0 20 3.951244 0.000000 833 +qualiti 0 20 3.951244 0.000000 832 +excel 0 19 4.007333 0.000000 868 +macintosh 1 17 4.110874 4.110874 920 +regular 0 17 4.110874 0.000000 929 +devic 0 16 4.174387 0.000000 1002 +draw 0 14 4.317488 0.000000 1086 +quizz 1 13 4.382027 4.382027 1151 +social 0 13 4.382027 0.000000 1123 +necessari 0 13 4.382027 0.000000 1147 +skill 0 12 4.465908 0.000000 1205 +desktop 0 10 4.653960 0.000000 1445 +rich 0 10 4.653960 0.000000 1396 +telecommun 0 9 4.753590 0.000000 1565 +zero 0 7 5.010635 0.000000 1896 +shot 0 7 5.010635 0.000000 1898 +necessarili 0 7 5.010635 0.000000 1899 +thegoal 0 6 5.164786 0.000000 2033 +lloyd 0 6 5.164786 0.000000 2103 +paint 0 5 5.347108 0.000000 2400 +bodner 0 5 5.347108 0.000000 2401 +chart 0 4 5.568345 0.000000 2653 +assignmentsand 0 4 5.568345 0.000000 2760 +glanc 0 4 5.568345 0.000000 2652 +salli 1 3 5.857933 5.857933 3432 +facstaff 0 3 5.857933 0.000000 3433 +drag 0 3 5.857933 0.000000 3434 +crack 0 3 5.857933 0.000000 3435 +macintoshcomput 0 3 5.857933 0.000000 3228 +iici 0 3 5.857933 0.000000 3436 +scanner 0 3 5.857933 0.000000 3437 +leavi 0 3 5.857933 0.000000 3438 +sharenow 0 3 5.857933 0.000000 3439 +swander 0 3 5.857933 0.000000 3440 +thayer 0 3 5.857933 0.000000 3441 +varghes 0 3 5.857933 0.000000 3442 +weinberg 0 3 5.857933 0.000000 3443 +spreadsheet 1 2 6.263398 6.263398 4598 +aldu 1 2 6.263398 6.263398 4599 +computersinstructor 0 2 6.263398 0.000000 4600 +petersonoffic 0 2 6.263398 0.000000 4601 +sciencephon 0 2 6.263398 0.000000 4602 +slpeter 0 2 6.263398 0.000000 4603 +appointmentvit 0 2 6.263398 0.000000 4604 +halllectur 0 2 6.263398 0.000000 4605 +laudon 0 2 6.263398 0.000000 4606 +traver 0 2 6.263398 0.000000 4607 +laudonlab 0 2 6.263398 0.000000 4608 +petersoncours 0 2 6.263398 0.000000 4609 +computersto 0 2 6.263398 0.000000 4610 +throughcolleg 0 2 6.263398 0.000000 4611 +arena 0 2 6.263398 0.000000 4612 +csuse 0 2 6.263398 0.000000 4613 +experienceon 0 2 6.263398 0.000000 4614 +eudora 0 2 6.263398 0.000000 4615 +superpaint 0 2 6.263398 0.000000 4616 +filemak 0 2 6.263398 0.000000 4617 +hypercard 0 2 6.263398 0.000000 4618 +pagemak 0 2 6.263398 0.000000 4619 +educationalexperi 0 2 6.263398 0.000000 4620 +namesectiontimedai 0 2 6.263398 0.000000 4621 +mwnick 0 2 6.263398 0.000000 4622 +mwtrshannon 0 2 6.263398 0.000000 4623 +trtrjeff 0 2 6.263398 0.000000 4624 +reminga 0 2 6.263398 0.000000 4625 +mwfmwira 0 2 6.263398 0.000000 4626 +trtrbrian 0 2 6.263398 0.000000 4627 +mwfmwfbrad 0 2 6.263398 0.000000 4628 +mwfmwfjoe 0 2 6.263398 0.000000 4629 +trtrgeoff 0 2 6.263398 0.000000 4630 +mwftrmaria 0 2 6.263398 0.000000 4631 +yuin 0 2 6.263398 0.000000 4632 +mwfmwrecommend 0 2 6.263398 0.000000 4633 +nitti 0 2 6.263398 0.000000 4634 +gritti 0 2 6.263398 0.000000 4635 +superpaintassign 0 2 6.263398 0.000000 4636 +excellast 0 2 6.263398 0.000000 4637 +jonbodn 0 2 6.263398 0.000000 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..740898f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +introduct 1 126 2.079442 2.079442 87 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +section 1 94 2.397895 2.397895 149 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +window 1 68 2.708050 2.708050 242 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +creat 0 63 2.772589 0.000000 277 +overview 0 56 2.890372 0.000000 323 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +algebra 0 45 3.135494 0.000000 394 +netscap 0 44 3.135494 0.000000 395 +offer 0 43 3.178054 0.000000 414 +microsoft 0 38 3.295837 0.000000 468 +ask 0 28 3.610918 0.000000 597 +jeff 0 25 3.737670 0.000000 673 +consult 1 24 3.761200 3.761200 687 +feedback 0 19 4.007333 0.000000 854 +misconduct 0 16 4.174387 0.000000 1003 +borland 1 14 4.317488 4.317488 1067 +coordin 1 13 4.382027 4.382027 1182 +savitch 0 12 4.465908 0.000000 1269 +mainli 0 10 4.653960 0.000000 1432 +hint 0 10 4.653960 0.000000 1419 +tutor 0 9 4.753590 0.000000 1552 +pagecomput 0 7 5.010635 0.000000 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 0 5 5.347108 0.000000 2398 +complaint 0 4 5.568345 0.000000 2795 +microcomput 0 3 5.857933 0.000000 3444 +instructorsw 0 2 6.263398 0.000000 4639 +csinform 0 2 6.263398 0.000000 4640 +subdirectoriesc 0 2 6.263398 0.000000 4641 +environmentfortran 0 2 6.263398 0.000000 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..431141b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +languag 1 227 1.386294 1.386294 26 +algorithm 0 162 1.791759 0.000000 57 +construct 0 139 1.945910 0.000000 82 +problem 0 147 1.945910 0.000000 75 +high 0 130 2.079442 0.000000 101 +mathemat 1 108 2.197225 2.197225 123 +instructor 0 108 2.197225 0.000000 107 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +school 0 84 2.484907 0.000000 188 +orient 0 80 2.564949 0.000000 205 +solv 0 73 2.639057 0.000000 234 +logic 0 71 2.639057 0.000000 230 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +instruct 0 53 2.944439 0.000000 332 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +least 0 35 3.401197 0.000000 516 +survei 0 35 3.401197 0.000000 513 +statist 0 35 3.401197 0.000000 521 +prepar 0 20 3.951244 0.000000 824 +fortran 0 15 4.248495 0.000000 1027 +pascal 0 12 4.465908 0.000000 1213 +consent 0 5 5.347108 0.000000 2389 +prereq 0 3 5.857933 0.000000 3178 +infocours 0 2 6.263398 0.000000 4212 +guidebook 0 2 6.263398 0.000000 4643 +cscours 0 1 6.957497 0.000000 8607 +descriptionfrom 0 1 6.957497 0.000000 8608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..740898f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +introduct 1 126 2.079442 2.079442 87 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +section 1 94 2.397895 2.397895 149 +comment 0 93 2.397895 0.000000 146 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +window 1 68 2.708050 2.708050 242 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +creat 0 63 2.772589 0.000000 277 +overview 0 56 2.890372 0.000000 323 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +algebra 0 45 3.135494 0.000000 394 +netscap 0 44 3.135494 0.000000 395 +offer 0 43 3.178054 0.000000 414 +microsoft 0 38 3.295837 0.000000 468 +ask 0 28 3.610918 0.000000 597 +jeff 0 25 3.737670 0.000000 673 +consult 1 24 3.761200 3.761200 687 +feedback 0 19 4.007333 0.000000 854 +misconduct 0 16 4.174387 0.000000 1003 +borland 1 14 4.317488 4.317488 1067 +coordin 1 13 4.382027 4.382027 1182 +savitch 0 12 4.465908 0.000000 1269 +mainli 0 10 4.653960 0.000000 1432 +hint 0 10 4.653960 0.000000 1419 +tutor 0 9 4.753590 0.000000 1552 +pagecomput 0 7 5.010635 0.000000 1900 +skrentni 1 6 5.164786 5.164786 2104 +lampert 0 5 5.347108 0.000000 2398 +complaint 0 4 5.568345 0.000000 2795 +microcomput 0 3 5.857933 0.000000 3444 +instructorsw 0 2 6.263398 0.000000 4639 +csinform 0 2 6.263398 0.000000 4640 +subdirectoriesc 0 2 6.263398 0.000000 4641 +environmentfortran 0 2 6.263398 0.000000 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..c3413628 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +includ 1 208 1.609438 1.609438 42 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +check 1 115 2.197225 2.197225 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +servic 1 72 2.639057 2.639057 236 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +copi 1 63 2.772589 2.772589 284 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +direct 0 57 2.890372 0.000000 316 +local 1 55 2.944439 2.944439 334 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +examin 0 42 3.218876 0.000000 424 +past 0 42 3.218876 0.000000 428 +obtain 0 33 3.433987 0.000000 534 +lab 0 24 3.761200 0.000000 698 +viewer 0 21 3.912023 0.000000 787 +explan 0 16 4.174387 0.000000 985 +readm 0 8 4.875197 0.000000 1699 +ghost 0 2 6.263398 0.000000 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..c3413628 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +includ 1 208 1.609438 1.609438 42 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +check 1 115 2.197225 2.197225 118 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +mani 0 92 2.397895 0.000000 150 +section 0 94 2.397895 0.000000 149 +exam 0 86 2.484907 0.000000 169 +servic 1 72 2.639057 2.639057 236 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +copi 1 63 2.772589 2.772589 284 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +direct 0 57 2.890372 0.000000 316 +local 1 55 2.944439 2.944439 334 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +examin 0 42 3.218876 0.000000 424 +past 0 42 3.218876 0.000000 428 +obtain 0 33 3.433987 0.000000 534 +lab 0 24 3.761200 0.000000 698 +viewer 0 21 3.912023 0.000000 787 +explan 0 16 4.174387 0.000000 985 +readm 0 8 4.875197 0.000000 1699 +ghost 0 2 6.263398 0.000000 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..72ddaa46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +fall 2 181 1.609438 3.218876 40 +updat 0 191 1.609438 0.000000 41 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +assign 2 135 1.945910 3.891820 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +instructor 1 108 2.197225 2.197225 107 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +book 0 99 2.302585 0.000000 131 +section 2 94 2.397895 4.795790 149 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +question 0 91 2.397895 0.000000 141 +graphic 0 90 2.397895 0.000000 147 +exam 2 86 2.484907 4.969814 169 +novemb 1 81 2.484907 2.484907 179 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +homework 1 79 2.564949 2.564949 193 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +interfac 0 79 2.564949 0.000000 209 +html 2 75 2.639057 5.278114 235 +simul 1 66 2.708050 2.708050 255 +syllabu 1 67 2.708050 2.708050 247 +septemb 1 65 2.772589 2.772589 274 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +case 0 51 2.995732 0.000000 351 +frequent 0 49 3.044522 0.000000 367 +format 0 48 3.044522 0.000000 356 +featur 0 46 3.091042 0.000000 386 +fridai 2 44 3.135494 6.270988 390 +answer 1 45 3.135494 3.135494 391 +midterm 0 45 3.135494 0.000000 392 +programm 0 39 3.258097 0.000000 445 +probabl 0 40 3.258097 0.000000 455 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +manual 0 35 3.401197 0.000000 504 +represent 0 35 3.401197 0.000000 512 +chapter 2 32 3.465736 6.931472 536 +ask 0 28 3.610918 0.000000 597 +except 0 28 3.610918 0.000000 607 +revis 0 26 3.688879 0.000000 640 +miller 1 17 4.110874 4.110874 949 +regist 0 17 4.110874 0.000000 938 +quiz 2 16 4.174387 8.348774 990 +psycholog 0 15 4.248495 0.000000 1054 +quizz 0 13 4.382027 0.000000 1151 +calcul 0 12 4.465908 0.000000 1268 +assembl 0 12 4.465908 0.000000 1207 +arithmet 1 10 4.653960 4.653960 1388 +tuth 1 9 4.753590 4.753590 1519 +float 0 9 4.753590 0.000000 1504 +integ 0 8 4.875197 0.000000 1688 +lookup 0 5 5.347108 0.000000 2399 +karen 1 4 5.568345 5.568345 2796 +cancel 1 4 5.568345 5.568345 2746 +jerri 1 3 5.857933 5.857933 3445 +suen 0 3 5.857933 0.000000 3446 +asgarian 0 3 5.857933 0.000000 3447 +architecur 0 3 5.857933 0.000000 3448 +tusch 0 2 6.263398 0.000000 4645 +tutsch 0 2 6.263398 0.000000 4646 +execpc 0 2 6.263398 0.000000 4647 +nolandsect 0 2 6.263398 0.000000 4648 +smoler 0 2 6.263398 0.000000 4649 +sunlung 0 2 6.263398 0.000000 4650 +ssuen 0 2 6.263398 0.000000 4651 +edusridevi 0 2 6.263398 0.000000 4652 +bhamidipati 0 2 6.263398 0.000000 4653 +bsri 0 2 6.263398 0.000000 4654 +edumohammad 0 2 6.263398 0.000000 4655 +programs 0 2 6.263398 0.000000 4656 +examsal 0 2 6.263398 0.000000 4657 +noteskaren 0 2 6.263398 0.000000 4658 +updatedmondai 0 2 6.263398 0.000000 4659 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..36c59e2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +fall 2 181 1.609438 3.218876 40 +updat 0 191 1.609438 0.000000 41 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +assign 2 135 1.945910 3.891820 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +final 1 116 2.197225 2.197225 108 +instructor 1 108 2.197225 2.197225 107 +structur 0 106 2.197225 0.000000 105 +book 0 99 2.302585 0.000000 131 +advanc 0 99 2.302585 0.000000 130 +section 3 94 2.397895 7.193685 149 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +question 0 91 2.397895 0.000000 141 +graphic 0 90 2.397895 0.000000 147 +exam 2 86 2.484907 4.969814 169 +novemb 1 81 2.484907 2.484907 179 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +homework 1 79 2.564949 2.564949 193 +decemb 1 80 2.564949 2.564949 215 +mondai 0 77 2.564949 0.000000 206 +interfac 0 79 2.564949 0.000000 209 +html 2 75 2.639057 5.278114 235 +simul 1 66 2.708050 2.708050 255 +syllabu 1 67 2.708050 2.708050 247 +thursdai 0 70 2.708050 0.000000 241 +septemb 1 65 2.772589 2.772589 274 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +previou 0 62 2.772589 0.000000 290 +overview 0 56 2.890372 0.000000 323 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +case 0 51 2.995732 0.000000 351 +frequent 0 49 3.044522 0.000000 367 +format 0 48 3.044522 0.000000 356 +featur 0 46 3.091042 0.000000 386 +fridai 2 44 3.135494 6.270988 390 +answer 1 45 3.135494 3.135494 391 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +probabl 0 40 3.258097 0.000000 455 +open 0 38 3.295837 0.000000 469 +procedur 0 36 3.367296 0.000000 488 +manual 0 35 3.401197 0.000000 504 +represent 0 35 3.401197 0.000000 512 +chapter 2 32 3.465736 6.931472 536 +option 0 30 3.555348 0.000000 575 +ask 0 28 3.610918 0.000000 597 +except 0 28 3.610918 0.000000 607 +revis 0 26 3.688879 0.000000 640 +miller 1 17 4.110874 4.110874 949 +regist 0 17 4.110874 0.000000 938 +quiz 2 16 4.174387 8.348774 990 +sign 0 16 4.174387 0.000000 970 +psycholog 0 15 4.248495 0.000000 1054 +quizz 0 13 4.382027 0.000000 1151 +calcul 0 12 4.465908 0.000000 1268 +assembl 0 12 4.465908 0.000000 1207 +arithmet 1 10 4.653960 4.653960 1388 +tuth 1 9 4.753590 4.753590 1519 +float 0 9 4.753590 0.000000 1504 +rais 0 8 4.875197 0.000000 1711 +integ 0 8 4.875197 0.000000 1688 +difficult 0 6 5.164786 0.000000 2035 +lookup 0 5 5.347108 0.000000 2399 +karen 1 4 5.568345 5.568345 2796 +cancel 1 4 5.568345 5.568345 2746 +jerri 1 3 5.857933 5.857933 3445 +suen 0 3 5.857933 0.000000 3446 +asgarian 0 3 5.857933 0.000000 3447 +architecur 0 3 5.857933 0.000000 3448 +tusch 0 2 6.263398 0.000000 4645 +tutsch 0 2 6.263398 0.000000 4646 +execpc 0 2 6.263398 0.000000 4647 +nolandsect 0 2 6.263398 0.000000 4648 +smoler 0 2 6.263398 0.000000 4649 +sunlung 0 2 6.263398 0.000000 4650 +ssuen 0 2 6.263398 0.000000 4651 +edusridevi 0 2 6.263398 0.000000 4652 +bhamidipati 0 2 6.263398 0.000000 4653 +bsri 0 2 6.263398 0.000000 4654 +edumohammad 0 2 6.263398 0.000000 4655 +programs 0 2 6.263398 0.000000 4656 +examsal 0 2 6.263398 0.000000 4657 +noteskaren 0 2 6.263398 0.000000 4658 +updatedmondai 0 2 6.263398 0.000000 4659 +programa 0 1 6.957497 0.000000 8609 +programb 0 1 6.957497 0.000000 8610 +cumul 0 1 6.957497 0.000000 8611 +desperateto 0 1 6.957497 0.000000 8612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..5ce82e89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +list 1 201 1.609438 1.609438 39 +updat 0 191 1.609438 0.000000 41 +oper 0 180 1.609438 0.000000 34 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +structur 0 106 2.197225 0.000000 105 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +need 0 98 2.302585 0.000000 135 +memori 0 101 2.302585 0.000000 139 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +dynam 0 76 2.564949 0.000000 194 +onlin 1 75 2.639057 2.639057 223 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +simul 0 66 2.708050 0.000000 255 +copi 1 63 2.772589 2.772589 284 +creat 0 63 2.772589 0.000000 277 +wednesdai 0 64 2.772589 0.000000 261 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +sampl 2 53 2.944439 5.888878 339 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +tabl 1 51 2.995732 2.995732 346 +basic 1 50 3.044522 3.044522 360 +pointer 0 48 3.044522 0.000000 368 +get 1 46 3.091042 3.091042 380 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +discuss 0 45 3.135494 0.000000 399 +futur 1 41 3.218876 3.218876 427 +cach 0 41 3.218876 0.000000 432 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +tree 1 36 3.367296 3.367296 492 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +chapter 2 32 3.465736 6.931472 536 +common 0 30 3.555348 0.000000 574 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +becom 0 28 3.610918 0.000000 603 +lab 1 24 3.761200 3.761200 698 +sort 1 22 3.850148 3.850148 738 +binari 0 20 3.951244 0.000000 823 +reserv 0 20 3.951244 0.000000 808 +alloc 0 20 3.951244 0.000000 821 +thur 1 19 4.007333 4.007333 847 +comparison 0 19 4.007333 0.000000 863 +attend 1 18 4.060443 4.060443 893 +otherwis 0 17 4.110874 0.000000 922 +balanc 0 14 4.317488 0.000000 1112 +recurs 0 13 4.382027 0.000000 1127 +magic 0 11 4.553877 0.000000 1358 +queue 1 10 4.653960 4.653960 1386 +stack 1 10 4.653960 4.653960 1389 +wendt 0 10 4.653960 0.000000 1446 +cheng 0 10 4.653960 0.000000 1381 +kurt 0 9 4.753590 0.000000 1548 +unusu 0 9 4.753590 0.000000 1566 +forget 0 8 4.875197 0.000000 1712 +reload 0 8 4.875197 0.000000 1682 +hash 0 8 4.875197 0.000000 1618 +skrentni 1 6 5.164786 5.164786 2104 +skip 1 5 5.347108 5.347108 2402 +handin 0 5 5.347108 0.000000 2393 +overload 0 5 5.347108 0.000000 2403 +billi 0 5 5.347108 0.000000 2404 +outdat 0 4 5.568345 0.000000 2797 +appendix 0 4 5.568345 0.000000 2739 +makeup 0 3 5.857933 0.000000 3449 +vega 0 3 5.857933 0.000000 3450 +stale 1 2 6.263398 6.263398 4660 +lec 0 2 6.263398 0.000000 4661 +structureslectur 0 2 6.263398 0.000000 4662 +psychologylectur 0 2 6.263398 0.000000 4663 +psychologycours 0 2 6.263398 0.000000 4664 +baicheng 0 2 6.263398 0.000000 4665 +liao 0 2 6.263398 0.000000 4666 +bail 0 2 6.263398 0.000000 4667 +jiacheng 0 2 6.263398 0.000000 4668 +pmcopyright 0 2 6.263398 0.000000 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..352aedd3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,549 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +cours 2 273 1.098612 2.197224 15 +time 2 293 1.098612 2.197224 17 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +data 2 170 1.791759 3.583518 49 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +phone 0 175 1.791759 0.000000 45 +base 0 165 1.791759 0.000000 50 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +assign 2 135 1.945910 3.891820 66 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +construct 0 139 1.945910 0.000000 82 +document 2 121 2.079442 4.158884 89 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +dayton 1 119 2.079442 2.079442 104 +provid 1 121 2.079442 2.079442 94 +studi 0 120 2.079442 0.000000 91 +welcom 0 122 2.079442 0.000000 99 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +topic 1 114 2.197225 2.197225 110 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +book 1 99 2.302585 2.302585 131 +user 1 104 2.302585 2.302585 137 +peopl 0 96 2.302585 0.000000 132 +comment 2 93 2.397895 4.795790 146 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +exam 2 86 2.484907 4.969814 169 +help 2 83 2.484907 4.969814 175 +chang 1 82 2.484907 2.484907 163 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +contain 0 81 2.484907 0.000000 174 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +know 1 80 2.564949 2.564949 198 +sourc 0 77 2.564949 0.000000 201 +decemb 0 80 2.564949 0.000000 215 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +appear 0 78 2.564949 0.000000 210 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +name 1 72 2.639057 2.639057 220 +materi 1 75 2.639057 2.639057 221 +tuesdai 1 73 2.639057 2.639057 219 +effici 1 73 2.639057 2.639057 233 +line 1 75 2.639057 2.639057 231 +write 1 72 2.639057 2.639057 222 +addit 1 74 2.639057 2.639057 228 +html 0 75 2.639057 0.000000 235 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +appli 0 71 2.639057 0.000000 226 +would 1 67 2.708050 2.708050 251 +test 1 66 2.708050 2.708050 252 +main 1 67 2.708050 2.708050 256 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +experi 1 64 2.772589 2.772589 283 +descript 1 64 2.772589 2.772589 271 +new 0 64 2.772589 0.000000 262 +polici 0 64 2.772589 0.000000 279 +abstract 0 62 2.772589 0.000000 276 +written 0 63 2.772589 0.000000 278 +street 0 63 2.772589 0.000000 293 +creat 0 63 2.772589 0.000000 277 +handout 0 64 2.772589 0.000000 263 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +room 1 59 2.833213 2.833213 301 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +best 0 59 2.833213 0.000000 299 +type 0 61 2.833213 0.000000 296 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +major 1 56 2.890372 2.890372 315 +faculti 0 56 2.890372 0.000000 325 +detail 0 57 2.890372 0.000000 321 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +cover 1 55 2.944439 2.944439 329 +suggest 1 53 2.944439 2.944439 331 +sampl 0 53 2.944439 0.000000 339 +undergradu 0 54 2.944439 0.000000 338 +talk 0 53 2.944439 0.000000 336 +local 0 55 2.944439 0.000000 334 +instruct 0 53 2.944439 0.000000 332 +case 1 51 2.995732 2.995732 351 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +much 0 52 2.995732 0.000000 349 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +appoint 0 49 3.044522 0.000000 358 +basic 0 50 3.044522 0.000000 360 +right 0 48 3.044522 0.000000 363 +numer 0 49 3.044522 0.000000 369 +format 0 48 3.044522 0.000000 356 +possibl 1 47 3.091042 3.091042 378 +done 1 47 3.091042 3.091042 381 +understand 1 47 3.091042 3.091042 384 +electron 1 47 3.091042 3.091042 379 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +describ 1 45 3.135494 3.135494 400 +midterm 0 45 3.135494 0.000000 392 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +answer 0 45 3.135494 0.000000 391 +made 0 44 3.135494 0.000000 398 +long 1 43 3.178054 3.178054 413 +show 0 43 3.178054 0.000000 417 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +howev 0 41 3.218876 0.000000 422 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +continu 1 39 3.258097 3.258097 448 +error 1 40 3.258097 3.258097 449 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +tutori 0 39 3.258097 0.000000 437 +correct 1 38 3.295837 3.295837 462 +open 0 38 3.295837 0.000000 469 +mean 0 37 3.332205 0.000000 477 +field 0 37 3.332205 0.000000 482 +respons 0 37 3.332205 0.000000 476 +expect 0 37 3.332205 0.000000 484 +purpos 0 37 3.332205 0.000000 481 +short 0 36 3.367296 0.000000 499 +download 0 36 3.367296 0.000000 489 +soon 0 36 3.367296 0.000000 494 +print 1 34 3.401197 3.401197 503 +statist 0 35 3.401197 0.000000 521 +either 0 35 3.401197 0.000000 506 +approxim 0 35 3.401197 0.000000 509 +everi 0 34 3.401197 0.000000 519 +singl 0 34 3.401197 0.000000 510 +next 0 34 3.401197 0.000000 517 +product 0 33 3.433987 0.000000 527 +within 0 33 3.433987 0.000000 525 +go 0 33 3.433987 0.000000 529 +express 0 32 3.465736 0.000000 540 +ad 0 32 3.465736 0.000000 544 +given 0 32 3.465736 0.000000 538 +often 1 31 3.496508 3.496508 551 +posit 0 31 3.496508 0.000000 552 +computersci 0 30 3.555348 0.000000 562 +abl 0 30 3.555348 0.000000 566 +hard 0 30 3.555348 0.000000 563 +turn 1 29 3.583519 3.583519 586 +limit 1 29 3.583519 3.583519 585 +actual 0 28 3.610918 0.000000 604 +load 0 28 3.610918 0.000000 601 +except 0 28 3.610918 0.000000 607 +intend 0 28 3.610918 0.000000 599 +full 0 28 3.610918 0.000000 615 +becom 0 28 3.610918 0.000000 603 +held 0 28 3.610918 0.000000 600 +manipul 0 27 3.637586 0.000000 624 +determin 0 27 3.637586 0.000000 630 +administr 0 27 3.637586 0.000000 628 +quit 0 27 3.637586 0.000000 633 +consist 1 26 3.688879 3.688879 651 +subject 0 26 3.688879 0.000000 647 +bound 0 26 3.688879 0.000000 659 +comp 0 26 3.688879 0.000000 650 +session 0 26 3.688879 0.000000 643 +valu 1 25 3.737670 3.737670 665 +reliabl 0 25 3.737670 0.000000 674 +never 0 25 3.737670 0.000000 671 +aspect 0 25 3.737670 0.000000 663 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +handl 0 24 3.761200 0.000000 685 +sometim 0 24 3.761200 0.000000 696 +wish 0 24 3.761200 0.000000 692 +variabl 1 23 3.806662 3.806662 715 +begin 1 23 3.806662 3.806662 716 +size 0 23 3.806662 0.000000 713 +input 0 23 3.806662 0.000000 727 +identifi 1 22 3.850148 3.850148 760 +period 0 22 3.850148 0.000000 743 +recommend 0 22 3.850148 0.000000 737 +almost 0 22 3.850148 0.000000 742 +sort 0 22 3.850148 0.000000 738 +inth 0 22 3.850148 0.000000 741 +dai 0 22 3.850148 0.000000 753 +avoid 1 21 3.912023 3.912023 799 +tell 1 21 3.912023 3.912023 777 +output 0 21 3.912023 0.000000 788 +binari 1 20 3.951244 3.951244 823 +sure 1 20 3.951244 3.951244 813 +prepar 0 20 3.951244 0.000000 824 +grad 0 20 3.951244 0.000000 837 +minut 0 20 3.951244 0.000000 810 +qualiti 0 20 3.951244 0.000000 832 +scheme 0 20 3.951244 0.000000 818 +break 0 20 3.951244 0.000000 812 +assum 1 19 4.007333 4.007333 845 +item 0 19 4.007333 0.000000 856 +ever 0 19 4.007333 0.000000 872 +separ 0 19 4.007333 0.000000 844 +exercis 0 19 4.007333 0.000000 842 +log 0 19 4.007333 0.000000 857 +five 0 19 4.007333 0.000000 841 +particularli 0 19 4.007333 0.000000 867 +account 1 18 4.060443 4.060443 882 +encourag 1 18 4.060443 4.060443 880 +record 0 18 4.060443 0.000000 890 +accept 0 18 4.060443 0.000000 879 +along 0 18 4.060443 0.000000 878 +behavior 0 18 4.060443 0.000000 881 +four 0 18 4.060443 0.000000 905 +element 0 18 4.060443 0.000000 895 +attend 0 18 4.060443 0.000000 893 +debug 1 17 4.110874 4.110874 944 +segment 1 17 4.110874 4.110874 931 +outlin 0 17 4.110874 0.000000 914 +macintosh 0 17 4.110874 0.000000 920 +women 1 16 4.174387 4.174387 1004 +earli 1 16 4.174387 4.174387 968 +normal 0 16 4.174387 0.000000 995 +easi 0 16 4.174387 0.000000 969 +style 1 15 4.248495 4.248495 1036 +later 1 15 4.248495 4.248495 1043 +purchas 0 15 4.248495 0.000000 1030 +capabl 0 15 4.248495 0.000000 1016 +doit 1 14 4.317488 4.317488 1111 +near 1 14 4.317488 4.317488 1091 +incomput 0 14 4.317488 0.000000 1096 +consider 0 14 4.317488 0.000000 1076 +floor 0 14 4.317488 0.000000 1070 +manner 0 14 4.317488 0.000000 1074 +easili 0 14 4.317488 0.000000 1077 +convent 0 14 4.317488 0.000000 1072 +necessari 1 13 4.382027 4.382027 1147 +menu 0 13 4.382027 0.000000 1156 +someon 0 13 4.382027 0.000000 1128 +wait 0 13 4.382027 0.000000 1168 +incorpor 0 13 4.382027 0.000000 1163 +dewitt 1 12 4.465908 4.465908 1270 +readi 1 12 4.465908 4.465908 1242 +pascal 1 12 4.465908 4.465908 1213 +grow 0 12 4.465908 0.000000 1209 +onth 0 12 4.465908 0.000000 1218 +weight 0 12 4.465908 0.000000 1204 +count 0 12 4.465908 0.000000 1239 +robust 0 12 4.465908 0.000000 1271 +reader 0 12 4.465908 0.000000 1246 +cycl 1 11 4.553877 4.553877 1335 +extra 1 11 4.553877 4.553877 1312 +statement 1 11 4.553877 4.553877 1313 +extrem 0 11 4.553877 0.000000 1330 +arbitrari 0 11 4.553877 0.000000 1359 +loop 0 11 4.553877 0.000000 1310 +typic 0 11 4.553877 0.000000 1360 +summar 0 11 4.553877 0.000000 1295 +submiss 0 11 4.553877 0.000000 1298 +true 1 10 4.653960 4.653960 1422 +cheat 1 10 4.653960 4.653960 1395 +modular 1 10 4.653960 4.653960 1392 +equal 0 10 4.653960 0.000000 1424 +hint 0 10 4.653960 0.000000 1419 +awai 0 10 4.653960 0.000000 1447 +certain 0 10 4.653960 0.000000 1393 +thecomput 0 10 4.653960 0.000000 1408 +label 0 10 4.653960 0.000000 1423 +strongli 0 10 4.653960 0.000000 1406 +wall 1 9 4.753590 4.753590 1553 +notat 1 9 4.753590 4.753590 1489 +assumpt 1 9 4.753590 4.753590 1514 +declar 1 9 4.753590 4.753590 1526 +end 0 9 4.753590 0.000000 1567 +frank 0 9 4.753590 0.000000 1568 +mention 0 9 4.753590 0.000000 1569 +andth 0 9 4.753590 0.000000 1481 +familiar 0 9 4.753590 0.000000 1485 +equival 0 9 4.753590 0.000000 1496 +prefer 0 9 4.753590 0.000000 1491 +criteria 0 9 4.753590 0.000000 1477 +correctli 0 9 4.753590 0.000000 1478 +informationabout 0 9 4.753590 0.000000 1515 +unusu 0 9 4.753590 0.000000 1566 +clear 0 9 4.753590 0.000000 1488 +pick 0 9 4.753590 0.000000 1498 +yanni 1 8 4.875197 4.875197 1713 +simpli 1 8 4.875197 4.875197 1626 +ioannidi 0 8 4.875197 0.000000 1714 +paramet 1 7 5.010635 5.010635 1796 +explain 1 7 5.010635 5.010635 1816 +header 1 7 5.010635 5.010635 1787 +isbn 0 7 5.010635 0.000000 1901 +exactli 0 7 5.010635 0.000000 1817 +pursu 0 7 5.010635 0.000000 1902 +whenev 0 7 5.010635 0.000000 1883 +bug 0 7 5.010635 0.000000 1801 +extern 1 6 5.164786 5.164786 2105 +mirror 1 6 5.164786 5.164786 2028 +sciencesoffic 0 6 5.164786 0.000000 2101 +notifi 0 6 5.164786 0.000000 2106 +wrong 0 6 5.164786 0.000000 2025 +approv 0 6 5.164786 0.000000 2078 +troubl 0 6 5.164786 0.000000 2002 +desk 1 5 5.347108 5.347108 2297 +situat 1 5 5.347108 5.347108 2365 +supplement 0 5 5.347108 0.000000 2355 +chemistri 0 5 5.347108 0.000000 2405 +sparcstat 0 5 5.347108 0.000000 2406 +caus 0 5 5.347108 0.000000 2298 +respond 0 5 5.347108 0.000000 2354 +blow 0 5 5.347108 0.000000 2407 +skip 0 5 5.347108 0.000000 2402 +thiscours 1 4 5.568345 5.568345 2601 +expens 0 4 5.568345 0.000000 2678 +repeat 0 4 5.568345 0.000000 2798 +suppli 0 4 5.568345 0.000000 2611 +tire 0 4 5.568345 0.000000 2799 +thec 1 3 5.857933 5.857933 3132 +neg 1 3 5.857933 5.857933 3451 +ghostview 0 3 5.857933 0.000000 3163 +eduand 0 3 5.857933 0.000000 3452 +tremend 0 3 5.857933 0.000000 3453 +narr 0 3 5.857933 0.000000 3454 +gradingther 0 3 5.857933 0.000000 3455 +programmingassign 0 3 5.857933 0.000000 3398 +thesear 0 3 5.857933 0.000000 3456 +duedat 0 3 5.857933 0.000000 3105 +helpif 0 3 5.857933 0.000000 3126 +confus 0 3 5.857933 0.000000 3144 +ineffici 0 3 5.857933 0.000000 3457 +useof 0 3 5.857933 0.000000 3368 +meaning 0 3 5.857933 0.000000 3458 +argument 0 3 5.857933 0.000000 3120 +briefli 0 3 5.857933 0.000000 3459 +urg 0 3 5.857933 0.000000 3212 +comfort 0 3 5.857933 0.000000 3136 +pain 0 3 5.857933 0.000000 3460 +clariti 1 2 6.263398 6.263398 4413 +behav 1 2 6.263398 6.263398 4670 +indent 1 2 6.263398 6.263398 4374 +amoffic 0 2 6.263398 0.000000 4671 +femal 0 2 6.263398 0.000000 4672 +wic 0 2 6.263398 0.000000 4673 +oneof 0 2 6.263398 0.000000 4674 +tomak 0 2 6.263398 0.000000 4675 +startup 0 2 6.263398 0.000000 4676 +textth 0 2 6.263398 0.000000 4677 +carrano 0 2 6.263398 0.000000 4678 +lecturenot 0 2 6.263398 0.000000 4679 +notesar 0 2 6.263398 0.000000 4559 +invalu 0 2 6.263398 0.000000 4680 +nonetheless 0 2 6.263398 0.000000 4681 +thatyou 0 2 6.263398 0.000000 4682 +youwork 0 2 6.263398 0.000000 4083 +provis 0 2 6.263398 0.000000 4683 +excus 0 2 6.263398 0.000000 4684 +datastructur 0 2 6.263398 0.000000 4685 +tovisit 0 2 6.263398 0.000000 4686 +andlog 0 2 6.263398 0.000000 4104 +facet 0 2 6.263398 0.000000 4687 +unnecessarili 0 2 6.263398 0.000000 4688 +liter 0 2 6.263398 0.000000 4689 +convei 0 2 6.263398 0.000000 4690 +beavoid 0 2 6.263398 0.000000 4411 +thefirst 0 2 6.263398 0.000000 4092 +outputfil 1 1 6.957497 6.957497 8613 +suzan 1 1 6.957497 6.957497 8614 +inputfil 1 1 6.957497 6.957497 8615 +structuresfal 0 1 6.957497 0.000000 8616 +htmlinstructor 0 1 6.957497 0.000000 8617 +newsassign 0 1 6.957497 0.000000 8618 +statisticssom 0 1 6.957497 0.000000 8619 +median 0 1 6.957497 0.000000 8620 +midterma 0 1 6.957497 0.000000 8621 +oldmidterm 0 1 6.957497 0.000000 8622 +ownmidterm 0 1 6.957497 0.000000 8623 +searchth 0 1 6.957497 0.000000 8624 +filemenu 0 1 6.957497 0.000000 8625 +andchoos 0 1 6.957497 0.000000 8626 +sciencesom 0 1 6.957497 0.000000 8627 +haveform 0 1 6.957497 0.000000 8628 +becomecomput 0 1 6.957497 0.000000 8629 +thisclass 0 1 6.957497 0.000000 8630 +withtheir 0 1 6.957497 0.000000 8631 +classwork 0 1 6.957497 0.000000 8632 +stodder 0 1 6.957497 0.000000 8633 +theodd 0 1 6.957497 0.000000 8634 +statementi 0 1 6.957497 0.000000 8635 +aniniti 0 1 6.957497 0.000000 8636 +exceptionsy 0 1 6.957497 0.000000 8637 +isdata 0 1 6.957497 0.000000 8638 +notnecessari 0 1 6.957497 0.000000 8639 +isveri 0 1 6.957497 0.000000 8640 +whichar 0 1 6.957497 0.000000 8641 +entranceof 0 1 6.957497 0.000000 8642 +needsom 0 1 6.957497 0.000000 8643 +handoutc 0 1 6.957497 0.000000 8644 +althoughi 0 1 6.957497 0.000000 8645 +courseof 0 1 6.957497 0.000000 8646 +apoint 0 1 6.957497 0.000000 8647 +prerequisitecours 0 1 6.957497 0.000000 8648 +certainrestrict 0 1 6.957497 0.000000 8649 +emailand 0 1 6.957497 0.000000 8650 +toyour 0 1 6.957497 0.000000 8651 +runwith 0 1 6.957497 0.000000 8652 +inassign 0 1 6.957497 0.000000 8653 +allelectron 0 1 6.957497 0.000000 8654 +policyno 0 1 6.957497 0.000000 8655 +coincid 0 1 6.957497 0.000000 8656 +oneach 0 1 6.957497 0.000000 8657 +thelast 0 1 6.957497 0.000000 8658 +cheatingth 0 1 6.957497 0.000000 8659 +linest 0 1 6.957497 0.000000 8660 +tocommun 0 1 6.957497 0.000000 8661 +butther 0 1 6.957497 0.000000 8662 +obei 0 1 6.957497 0.000000 8663 +policiesgovern 0 1 6.957497 0.000000 8664 +policiesif 0 1 6.957497 0.000000 8665 +currenthard 0 1 6.957497 0.000000 8666 +conceptsthat 0 1 6.957497 0.000000 8667 +emailsever 0 1 6.957497 0.000000 8668 +gradingprogram 0 1 6.957497 0.000000 8669 +typicalinput 0 1 6.957497 0.000000 8670 +projectspecif 0 1 6.957497 0.000000 8671 +shoulddemonstr 0 1 6.957497 0.000000 8672 +includingunusu 0 1 6.957497 0.000000 8673 +considerationof 0 1 6.957497 0.000000 8674 +orcomplex 0 1 6.957497 0.000000 8675 +definedconst 0 1 6.957497 0.000000 8676 +thosevalu 0 1 6.957497 0.000000 8677 +styleus 0 1 6.957497 0.000000 8678 +variable_nam 0 1 6.957497 0.000000 8679 +function_nam 0 1 6.957497 0.000000 8680 +const 0 1 6.957497 0.000000 8681 +defined_const 0 1 6.957497 0.000000 8682 +enum 0 1 6.957497 0.000000 8683 +enumtyp 0 1 6.957497 0.000000 8684 +classnam 0 1 6.957497 0.000000 8685 +notesfor 0 1 6.957497 0.000000 8686 +meaningfulli 0 1 6.957497 0.000000 8687 +documentationthi 0 1 6.957497 0.000000 8688 +yourprogram 0 1 6.957497 0.000000 8689 +someonewho 0 1 6.957497 0.000000 8690 +superfici 0 1 6.957497 0.000000 8691 +unawar 0 1 6.957497 0.000000 8692 +descriptionne 0 1 6.957497 0.000000 8693 +thensuffici 0 1 6.957497 0.000000 8694 +documentationther 0 1 6.957497 0.000000 8695 +structuresshould 0 1 6.957497 0.000000 8696 +membershould 0 1 6.957497 0.000000 8697 +sname 0 1 6.957497 0.000000 8698 +withoutmak 0 1 6.957497 0.000000 8699 +stackyou 0 1 6.957497 0.000000 8700 +tricki 0 1 6.957497 0.000000 8701 +opaqu 0 1 6.957497 0.000000 8702 +commentcan 0 1 6.957497 0.000000 8703 +clarifi 0 1 6.957497 0.000000 8704 +outlineof 0 1 6.957497 0.000000 8705 +vimani 0 1 6.957497 0.000000 8706 +becomecomfort 0 1 6.957497 0.000000 8707 +youronli 0 1 6.957497 0.000000 8708 +macpasc 0 1 6.957497 0.000000 8709 +withunix 0 1 6.957497 0.000000 8710 +wellspent 0 1 6.957497 0.000000 8711 +thefollow 0 1 6.957497 0.000000 8712 +tbayou 0 1 6.957497 0.000000 8713 +goto 0 1 6.957497 0.000000 8714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..adde9f6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +fall 1 181 1.609438 1.609438 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +data 2 170 1.791759 3.583518 49 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +base 0 165 1.791759 0.000000 50 +assign 3 135 1.945910 5.837730 66 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +first 1 140 1.945910 1.945910 71 +object 0 138 1.945910 0.000000 79 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 1 126 2.079442 2.079442 87 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +structur 2 106 2.197225 4.394450 105 +teach 1 108 2.197225 2.197225 112 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +assist 0 112 2.197225 0.000000 113 +well 0 109 2.197225 0.000000 121 +text 2 98 2.302585 4.605170 133 +book 1 99 2.302585 2.302585 131 +need 0 98 2.302585 0.000000 135 +advanc 0 99 2.302585 0.000000 130 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +select 0 91 2.397895 0.000000 154 +question 0 91 2.397895 0.000000 141 +search 0 95 2.397895 0.000000 155 +pictur 0 89 2.397895 0.000000 160 +exam 2 86 2.484907 4.969814 169 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +homework 0 79 2.564949 0.000000 193 +mondai 0 77 2.564949 0.000000 206 +complet 0 77 2.564949 0.000000 208 +want 0 79 2.564949 0.000000 199 +appear 0 78 2.564949 0.000000 210 +know 0 80 2.564949 0.000000 198 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +materi 1 75 2.639057 2.639057 221 +tuesdai 1 73 2.639057 2.639057 219 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +addit 1 74 2.639057 2.639057 228 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +thursdai 1 70 2.708050 2.708050 241 +abstract 1 62 2.772589 2.772589 276 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +street 0 63 2.772589 0.000000 293 +creat 0 63 2.772589 0.000000 277 +handout 0 64 2.772589 0.000000 263 +complex 0 64 2.772589 0.000000 269 +function 0 62 2.772589 0.000000 275 +previou 0 62 2.772589 0.000000 290 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +detail 1 57 2.890372 2.890372 321 +unix 0 58 2.890372 0.000000 308 +semest 0 58 2.890372 0.000000 312 +index 0 56 2.890372 0.000000 309 +summer 0 56 2.890372 0.000000 311 +cover 1 55 2.944439 2.944439 329 +maintain 1 51 2.995732 2.995732 342 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +principl 1 48 3.044522 3.044522 357 +give 0 50 3.044522 0.000000 359 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +electron 1 47 3.091042 3.091042 379 +even 1 45 3.135494 3.135494 393 +fridai 1 44 3.135494 3.135494 390 +answer 0 45 3.135494 0.000000 391 +textbook 0 44 3.135494 0.000000 397 +anoth 0 45 3.135494 0.000000 408 +describ 0 45 3.135494 0.000000 400 +long 0 43 3.178054 0.000000 413 +http 1 41 3.218876 3.218876 420 +edit 0 42 3.218876 0.000000 418 +littl 1 39 3.258097 3.258097 454 +must 0 40 3.258097 0.000000 442 +correct 0 38 3.295837 0.000000 462 +close 0 38 3.295837 0.000000 465 +connect 0 37 3.332205 0.000000 485 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +short 0 36 3.367296 0.000000 499 +tree 0 36 3.367296 0.000000 492 +jame 1 35 3.401197 3.401197 507 +approxim 0 35 3.401197 0.000000 509 +taught 0 33 3.433987 0.000000 526 +concept 1 32 3.465736 3.465736 537 +given 0 32 3.465736 0.000000 538 +often 1 31 3.496508 3.496508 551 +photo 0 31 3.496508 0.000000 561 +abl 0 30 3.555348 0.000000 566 +produc 0 30 3.555348 0.000000 572 +turn 0 29 3.583519 0.000000 586 +determin 1 27 3.637586 3.637586 630 +manipul 0 27 3.637586 0.000000 624 +revis 0 26 3.688879 0.000000 640 +bound 0 26 3.688879 0.000000 659 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +background 0 25 3.737670 0.000000 664 +alwai 0 24 3.761200 0.000000 691 +size 0 23 3.806662 0.000000 713 +sequenc 0 23 3.806662 0.000000 734 +identifi 0 22 3.850148 0.000000 760 +emphasi 0 22 3.850148 0.000000 755 +recommend 0 22 3.850148 0.000000 737 +sort 0 22 3.850148 0.000000 738 +color 0 22 3.850148 0.000000 762 +basi 0 20 3.951244 0.000000 828 +binari 0 20 3.951244 0.000000 823 +tenni 0 20 3.951244 0.000000 838 +exercis 1 19 4.007333 4.007333 842 +separ 0 19 4.007333 0.000000 844 +log 0 19 4.007333 0.000000 857 +assum 0 19 4.007333 0.000000 845 +five 0 19 4.007333 0.000000 841 +appropri 0 18 4.060443 0.000000 883 +wind 0 18 4.060443 0.000000 908 +account 0 18 4.060443 0.000000 882 +attend 0 18 4.060443 0.000000 893 +record 0 18 4.060443 0.000000 890 +debug 0 17 4.110874 0.000000 944 +outlin 0 17 4.110874 0.000000 914 +white 0 17 4.110874 0.000000 951 +zhang 0 16 4.174387 0.000000 980 +portion 0 16 4.174387 0.000000 971 +purchas 0 15 4.248495 0.000000 1030 +psycholog 0 15 4.248495 0.000000 1054 +later 0 15 4.248495 0.000000 1043 +photograph 0 15 4.248495 0.000000 1056 +score 0 15 4.248495 0.000000 1017 +doit 1 14 4.317488 4.317488 1111 +happi 0 14 4.317488 0.000000 1079 +trip 0 14 4.317488 0.000000 1113 +necessari 1 13 4.382027 4.382027 1147 +block 0 13 4.382027 0.000000 1183 +introduc 0 13 4.382027 0.000000 1139 +essenti 0 13 4.382027 0.000000 1137 +dewitt 1 12 4.465908 4.465908 1270 +weight 0 12 4.465908 0.000000 1204 +count 0 12 4.465908 0.000000 1239 +regularli 1 11 4.553877 4.553877 1338 +modular 0 10 4.653960 0.000000 1392 +sundai 0 10 4.653960 0.000000 1387 +true 0 10 4.653960 0.000000 1422 +strongli 0 10 4.653960 0.000000 1406 +hint 0 10 4.653960 0.000000 1419 +equal 0 10 4.653960 0.000000 1424 +card 0 10 4.653960 0.000000 1435 +black 0 10 4.653960 0.000000 1418 +laru 1 9 4.753590 4.753590 1560 +clear 0 9 4.753590 0.000000 1488 +wall 0 9 4.753590 0.000000 1553 +frank 0 9 4.753590 0.000000 1568 +mention 0 9 4.753590 0.000000 1569 +debugg 0 9 4.753590 0.000000 1493 +notat 0 9 4.753590 0.000000 1489 +login 0 9 4.753590 0.000000 1550 +absolut 0 8 4.875197 0.000000 1646 +integ 0 8 4.875197 0.000000 1688 +hash 0 8 4.875197 0.000000 1618 +isbn 0 7 5.010635 0.000000 1901 +scout 0 7 5.010635 0.000000 1903 +sciencesoffic 1 6 5.164786 5.164786 2101 +strong 0 6 5.164786 0.000000 2029 +troubl 0 6 5.164786 0.000000 2002 +mirror 0 6 5.164786 0.000000 2028 +notifi 0 6 5.164786 0.000000 2106 +skrentni 0 6 5.164786 0.000000 2104 +difficult 0 6 5.164786 0.000000 2035 +rough 0 6 5.164786 0.000000 2107 +byte 0 6 5.164786 0.000000 2108 +chin 1 5 5.347108 5.347108 2408 +tang 1 5 5.347108 5.347108 2409 +desk 1 5 5.347108 5.347108 2297 +situat 0 5 5.347108 0.000000 2365 +skip 0 5 5.347108 0.000000 2402 +crucial 0 5 5.347108 0.000000 2384 +chemistri 0 5 5.347108 0.000000 2405 +girl 0 5 5.347108 0.000000 2410 +assignmentsand 0 4 5.568345 0.000000 2760 +unless 0 4 5.568345 0.000000 2607 +birthdai 0 4 5.568345 0.000000 2800 +fora 0 4 5.568345 0.000000 2697 +reiter 0 3 5.857933 0.000000 3461 +narr 0 3 5.857933 0.000000 3454 +gradingther 0 3 5.857933 0.000000 3455 +freshman 0 3 5.857933 0.000000 3462 +cchin 1 2 6.263398 6.263398 4691 +compuer 0 2 6.263398 0.000000 4692 +weiz 0 2 6.263398 0.000000 4693 +amoffic 0 2 6.263398 0.000000 4671 +textth 0 2 6.263398 0.000000 4677 +carrano 0 2 6.263398 0.000000 4678 +needless 0 2 6.263398 0.000000 4694 +sophomor 0 2 6.263398 0.000000 4695 +databaseof 0 2 6.263398 0.000000 4696 +larusinstructor 0 1 6.957497 0.000000 8715 +laruslaru 0 1 6.957497 0.000000 8716 +amcontentsteach 0 1 6.957497 0.000000 8717 +assistantstextlectur 0 1 6.957497 0.000000 8718 +informationelectron 0 1 6.957497 0.000000 8719 +mailth 0 1 6.957497 0.000000 8720 +languagegradingexamscours 0 1 6.957497 0.000000 8721 +scheduleassign 0 1 6.957497 0.000000 8722 +assignmentscours 0 1 6.957497 0.000000 8723 +objectivesc 0 1 6.957497 0.000000 8724 +assistantswei 0 1 6.957497 0.000000 8725 +forthi 0 1 6.957497 0.000000 8726 +theassign 0 1 6.957497 0.000000 8727 +zhangoffic 0 1 6.957497 0.000000 8728 +entranc 0 1 6.957497 0.000000 8729 +maili 0 1 6.957497 0.000000 8730 +gdbthere 0 1 6.957497 0.000000 8731 +administrationbas 0 1 6.957497 0.000000 8732 +storagelectur 0 1 6.957497 0.000000 8733 +listslectur 0 1 6.957497 0.000000 8734 +stackslectur 0 1 6.957497 0.000000 8735 +queueslectur 0 1 6.957497 0.000000 8736 +hashinglectur 0 1 6.957497 0.000000 8737 +recursionlectur 0 1 6.957497 0.000000 8738 +treesbinari 0 1 6.957497 0.000000 8739 +searchlectur 0 1 6.957497 0.000000 8740 +treesgraphslectur 0 1 6.957497 0.000000 8741 +sortinglectur 0 1 6.957497 0.000000 8742 +tbaassign 0 1 6.957497 0.000000 8743 +nameyear 0 1 6.957497 0.000000 8744 +coursesprevi 0 1 6.957497 0.000000 8745 +experiencerec 0 1 6.957497 0.000000 8746 +tournament 0 1 6.957497 0.000000 8747 +aconcord 0 1 6.957497 0.000000 8748 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..71796a1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +list 1 201 1.609438 1.609438 39 +updat 0 191 1.609438 0.000000 41 +oper 0 180 1.609438 0.000000 34 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +recent 0 167 1.791759 0.000000 58 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +structur 0 106 2.197225 0.000000 105 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +need 0 98 2.302585 0.000000 135 +memori 0 101 2.302585 0.000000 139 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +dynam 0 76 2.564949 0.000000 194 +onlin 1 75 2.639057 2.639057 223 +materi 0 75 2.639057 0.000000 221 +addit 0 74 2.639057 0.000000 228 +line 0 75 2.639057 0.000000 231 +simul 0 66 2.708050 0.000000 255 +copi 1 63 2.772589 2.772589 284 +creat 0 63 2.772589 0.000000 277 +wednesdai 0 64 2.772589 0.000000 261 +locat 0 59 2.833213 0.000000 303 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +sampl 2 53 2.944439 5.888878 339 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +basic 1 50 3.044522 3.044522 360 +pointer 0 48 3.044522 0.000000 368 +get 1 46 3.091042 3.091042 380 +done 0 47 3.091042 0.000000 381 +directori 0 45 3.135494 0.000000 396 +cach 0 41 3.218876 0.000000 432 +futur 0 41 3.218876 0.000000 427 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +tree 0 36 3.367296 0.000000 492 +either 0 35 3.401197 0.000000 506 +jame 0 35 3.401197 0.000000 507 +chapter 2 32 3.465736 6.931472 536 +common 0 30 3.555348 0.000000 574 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +becom 0 28 3.610918 0.000000 603 +lab 1 24 3.761200 3.761200 698 +sort 1 22 3.850148 3.850148 738 +reserv 0 20 3.951244 0.000000 808 +alloc 0 20 3.951244 0.000000 821 +thur 1 19 4.007333 4.007333 847 +attend 1 18 4.060443 4.060443 893 +otherwis 0 17 4.110874 0.000000 922 +recurs 0 13 4.382027 0.000000 1127 +magic 0 11 4.553877 0.000000 1358 +queue 1 10 4.653960 4.653960 1386 +stack 1 10 4.653960 4.653960 1389 +wendt 0 10 4.653960 0.000000 1446 +cheng 0 10 4.653960 0.000000 1381 +kurt 0 9 4.753590 0.000000 1548 +unusu 0 9 4.753590 0.000000 1566 +forget 0 8 4.875197 0.000000 1712 +reload 0 8 4.875197 0.000000 1682 +hash 0 8 4.875197 0.000000 1618 +skrentni 1 6 5.164786 5.164786 2104 +skip 1 5 5.347108 5.347108 2402 +handin 0 5 5.347108 0.000000 2393 +overload 0 5 5.347108 0.000000 2403 +billi 0 5 5.347108 0.000000 2404 +outdat 0 4 5.568345 0.000000 2797 +appendix 0 4 5.568345 0.000000 2739 +makeup 0 3 5.857933 0.000000 3449 +vega 0 3 5.857933 0.000000 3450 +stale 1 2 6.263398 6.263398 4660 +lec 0 2 6.263398 0.000000 4661 +structureslectur 0 2 6.263398 0.000000 4662 +psychologylectur 0 2 6.263398 0.000000 4663 +psychologycours 0 2 6.263398 0.000000 4664 +baicheng 0 2 6.263398 0.000000 4665 +liao 0 2 6.263398 0.000000 4666 +bail 0 2 6.263398 0.000000 4667 +jiacheng 0 2 6.263398 0.000000 4668 +pmcopyright 0 2 6.263398 0.000000 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..daaf5b4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +schedul 0 119 2.079442 0.000000 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +mondai 0 77 2.564949 0.000000 206 +state 0 76 2.564949 0.000000 207 +david 0 71 2.639057 0.000000 232 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +written 0 63 2.772589 0.000000 278 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +archiv 0 49 3.044522 0.000000 364 +fridai 0 44 3.135494 0.000000 390 +term 0 43 3.178054 0.000000 411 +examin 1 42 3.218876 3.218876 424 +theoret 1 39 3.258097 3.258097 446 +brian 1 38 3.295837 3.295837 466 +john 0 33 3.433987 0.000000 532 +hill 0 25 3.737670 0.000000 670 +tent 0 22 3.850148 0.000000 739 +martin 0 21 3.912023 0.000000 794 +north 0 19 4.007333 0.000000 873 +isbn 0 7 5.010635 0.000000 1901 +mcgraw 0 5 5.347108 0.000000 2262 +clarif 0 5 5.347108 0.000000 2253 +sundaram 1 3 5.857933 5.857933 3463 +cole 1 2 6.263398 6.263398 4697 +stukel 0 2 6.263398 0.000000 4698 +dakota 0 1 6.957497 0.000000 8749 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..9c0d27cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 0 247 1.386294 0.000000 24 +fall 1 181 1.609438 1.609438 40 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +madison 0 165 1.791759 0.000000 55 +lectur 0 135 1.945910 0.000000 73 +schedul 0 119 2.079442 0.000000 85 +databas 0 122 2.079442 0.000000 86 +final 1 116 2.197225 2.197225 108 +site 1 106 2.197225 2.197225 119 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +mathemat 0 108 2.197225 0.000000 123 +book 1 99 2.302585 2.302585 131 +octob 2 89 2.397895 4.795790 156 +exam 2 86 2.484907 4.969814 169 +solut 1 82 2.484907 2.484907 162 +novemb 1 81 2.484907 2.484907 179 +librari 0 87 2.484907 0.000000 181 +homework 2 79 2.564949 5.129898 193 +decemb 1 80 2.564949 2.564949 215 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +septemb 1 65 2.772589 2.772589 274 +wednesdai 1 64 2.772589 2.772589 261 +march 1 61 2.833213 2.833213 295 +locat 1 59 2.833213 2.833213 303 +semest 1 58 2.890372 2.890372 312 +overview 0 56 2.890372 0.000000 323 +variou 0 56 2.890372 0.000000 317 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +telephon 0 50 3.044522 0.000000 373 +midterm 2 45 3.135494 6.270988 392 +math 1 44 3.135494 3.135494 402 +textbook 0 44 3.135494 0.000000 397 +mechan 1 43 3.178054 3.178054 416 +linear 1 41 3.218876 3.218876 431 +open 1 38 3.295837 3.295837 469 +comp 1 26 3.688879 3.688879 650 +relev 0 26 3.688879 0.000000 637 +period 0 22 3.850148 0.000000 743 +reserv 0 20 3.951244 0.000000 808 +item 0 19 4.007333 0.000000 856 +stat 1 17 4.110874 4.110874 924 +matlab 1 14 4.317488 4.317488 1081 +doit 0 14 4.317488 0.000000 1111 +wendt 0 10 4.653960 0.000000 1446 +mangasarian 1 9 4.753590 4.753590 1570 +preliminari 0 9 4.753590 0.000000 1480 +kurt 0 9 4.753590 0.000000 1548 +ferri 0 8 4.875197 0.000000 1715 +olvi 1 6 5.164786 5.164786 2109 +setup 0 2 6.263398 0.000000 4211 +bibliograph 0 2 6.263398 0.000000 4699 +programmingfal 0 1 6.957497 0.000000 8750 +pphone 0 1 6.957497 0.000000 8751 +searchabl 0 1 6.957497 0.000000 8752 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..3072b448 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +techniqu 0 99 2.302585 0.000000 138 +grade 1 90 2.397895 2.397895 142 +octob 0 89 2.397895 0.000000 156 +librari 0 87 2.484907 0.000000 181 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +fridai 0 44 3.135494 0.000000 390 +examin 0 42 3.218876 0.000000 424 +late 0 40 3.258097 0.000000 439 +comp 0 26 3.688879 0.000000 650 +reserv 0 20 3.951244 0.000000 808 +stat 0 17 4.110874 0.000000 924 +month 0 15 4.248495 0.000000 1025 +psycholog 0 15 4.248495 0.000000 1054 +susan 0 15 4.248495 0.000000 1050 +stori 0 14 4.317488 0.000000 1087 +regularli 0 11 4.553877 0.000000 1338 +wendt 0 10 4.653960 0.000000 1446 +tuth 0 9 4.753590 0.000000 1519 +recit 0 9 4.753590 0.000000 1475 +fischer 0 7 5.010635 0.000000 1893 +horwitz 1 5 5.347108 5.347108 2411 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +ullman 0 4 5.568345 0.000000 2749 +rahul 1 3 5.857933 5.857933 3464 +compilersspr 0 2 6.263398 0.000000 4700 +kapoor 0 2 6.263398 0.000000 4701 +sethi 0 2 6.263398 0.000000 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..b9d32b65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,527 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +languag 2 227 1.386294 2.772588 26 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 0 201 1.609438 0.000000 39 +avail 2 169 1.791759 3.583518 48 +implement 2 152 1.791759 3.583518 52 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +network 0 168 1.791759 0.000000 61 +file 2 132 1.945910 3.891820 70 +process 2 142 1.945910 3.891820 72 +assign 2 135 1.945910 3.891820 66 +lectur 2 135 1.945910 3.891820 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +schedul 2 119 2.079442 4.158884 85 +introduct 2 126 2.079442 4.158884 87 +number 1 130 2.079442 2.079442 97 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +specif 2 106 2.197225 4.394450 106 +manag 2 114 2.197225 4.394450 125 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +make 1 111 2.197225 2.197225 120 +structur 1 106 2.197225 2.197225 105 +look 0 107 2.197225 0.000000 115 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +memori 2 101 2.302585 4.605170 139 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +section 2 94 2.397895 4.795790 149 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +follow 1 92 2.397895 2.397895 143 +octob 1 89 2.397895 2.397895 156 +comment 0 93 2.397895 0.000000 146 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +exam 2 86 2.484907 4.969814 169 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +larg 0 82 2.484907 0.000000 168 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +level 0 87 2.484907 0.000000 180 +librari 0 87 2.484907 0.000000 181 +learn 0 86 2.484907 0.000000 170 +messag 1 76 2.564949 2.564949 212 +refer 1 78 2.564949 2.564949 203 +issu 1 78 2.564949 2.564949 211 +sourc 1 77 2.564949 2.564949 201 +mondai 1 77 2.564949 2.564949 206 +exampl 0 77 2.564949 0.000000 195 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +decemb 0 80 2.564949 0.000000 215 +state 0 76 2.564949 0.000000 207 +come 0 78 2.564949 0.000000 202 +summari 1 73 2.639057 2.639057 237 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +addit 1 74 2.639057 2.639057 228 +line 0 75 2.639057 0.000000 231 +name 0 72 2.639057 0.000000 220 +materi 0 75 2.639057 0.000000 221 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +java 3 70 2.708050 8.124150 248 +differ 1 66 2.708050 2.708050 253 +receiv 1 66 2.708050 2.708050 244 +thursdai 1 70 2.708050 2.708050 241 +main 1 67 2.708050 2.708050 256 +view 1 70 2.708050 2.708050 254 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +simul 0 66 2.708050 0.000000 255 +knowledg 0 67 2.708050 0.000000 243 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +collect 1 65 2.772589 2.772589 268 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +virtual 1 62 2.772589 2.772589 285 +copi 0 63 2.772589 0.000000 284 +room 1 59 2.833213 2.833213 301 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +content 0 59 2.833213 0.000000 302 +share 0 59 2.833213 0.000000 304 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +detail 1 57 2.890372 2.890372 321 +unix 1 58 2.890372 2.890372 308 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +think 0 57 2.890372 0.000000 314 +sever 0 56 2.890372 0.000000 322 +reason 0 57 2.890372 0.000000 318 +three 1 54 2.944439 2.944439 330 +processor 1 54 2.944439 2.944439 335 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +local 1 55 2.944439 2.944439 334 +run 1 51 2.995732 2.995732 347 +date 0 51 2.995732 0.000000 344 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +archiv 0 49 3.044522 0.000000 364 +set 0 50 3.044522 0.000000 361 +pointer 0 48 3.044522 0.000000 368 +standard 0 48 3.044522 0.000000 365 +frequent 0 49 3.044522 0.000000 367 +right 0 48 3.044522 0.000000 363 +get 1 46 3.091042 3.091042 380 +move 0 47 3.091042 0.000000 382 +electron 0 47 3.091042 0.000000 379 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +midterm 2 45 3.135494 6.270988 392 +discuss 1 45 3.135494 3.135494 399 +directori 1 45 3.135494 3.135494 396 +answer 1 45 3.135494 3.135494 391 +describ 0 45 3.135494 0.000000 400 +even 0 45 3.135494 0.000000 393 +long 0 43 3.178054 0.000000 413 +show 0 43 3.178054 0.000000 417 +term 0 43 3.178054 0.000000 411 +edit 0 42 3.218876 0.000000 418 +fast 0 42 3.218876 0.000000 429 +howev 0 41 3.218876 0.000000 422 +late 1 40 3.258097 3.258097 439 +error 1 40 3.258097 3.258097 449 +tutori 1 39 3.258097 3.258097 437 +must 1 40 3.258097 3.258097 442 +littl 0 39 3.258097 0.000000 454 +author 0 39 3.258097 0.000000 450 +programm 0 39 3.258097 0.000000 445 +correct 1 38 3.295837 3.295837 462 +seminar 1 38 3.295837 3.295837 470 +credit 0 38 3.295837 0.000000 460 +hand 1 37 3.332205 3.332205 475 +workstat 1 37 3.332205 3.332205 479 +respons 0 37 3.332205 0.000000 476 +feel 0 37 3.332205 0.000000 483 +procedur 0 36 3.367296 0.000000 488 +short 0 36 3.367296 0.000000 499 +copyright 0 36 3.367296 0.000000 495 +post 0 35 3.401197 0.000000 505 +either 0 35 3.401197 0.000000 506 +least 0 35 3.401197 0.000000 516 +jame 0 35 3.401197 0.000000 507 +random 0 34 3.401197 0.000000 511 +concurr 0 34 3.401197 0.000000 501 +manual 0 35 3.401197 0.000000 504 +statist 0 35 3.401197 0.000000 521 +go 0 33 3.433987 0.000000 529 +chapter 1 32 3.465736 3.465736 536 +ad 0 32 3.465736 0.000000 544 +kind 0 32 3.465736 0.000000 541 +storag 0 31 3.496508 0.000000 553 +graph 1 30 3.555348 3.555348 576 +secur 1 30 3.555348 3.555348 577 +specifi 0 30 3.555348 0.000000 568 +option 0 30 3.555348 0.000000 575 +focu 0 30 3.555348 0.000000 571 +synchron 1 29 3.583519 3.583519 588 +built 0 29 3.583519 0.000000 592 +becom 1 28 3.610918 3.610918 603 +intend 0 28 3.610918 0.000000 599 +packag 0 28 3.610918 0.000000 614 +except 0 28 3.610918 0.000000 607 +quit 1 27 3.637586 3.637586 633 +determin 0 27 3.637586 0.000000 630 +arrai 0 27 3.637586 0.000000 627 +comp 1 26 3.688879 3.688879 650 +request 0 26 3.688879 0.000000 635 +session 0 26 3.688879 0.000000 643 +bound 0 26 3.688879 0.000000 659 +detect 0 26 3.688879 0.000000 646 +primari 0 25 3.737670 0.000000 669 +although 0 25 3.737670 0.000000 667 +other 1 24 3.761200 3.761200 697 +thank 1 23 3.806662 3.806662 721 +initi 1 23 3.806662 3.806662 717 +begin 1 23 3.806662 3.806662 716 +input 0 23 3.806662 0.000000 727 +variabl 0 23 3.806662 0.000000 715 +thread 0 23 3.806662 0.000000 722 +togeth 0 23 3.806662 0.000000 714 +disk 1 22 3.850148 3.850148 747 +dai 1 22 3.850148 3.850148 753 +finish 1 22 3.850148 3.850148 748 +sent 0 22 3.850148 0.000000 763 +recommend 0 22 3.850148 0.000000 737 +varieti 0 22 3.850148 0.000000 740 +tent 0 22 3.850148 0.000000 739 +avoid 1 21 3.912023 3.912023 799 +path 1 21 3.912023 3.912023 778 +watch 0 21 3.912023 0.000000 789 +latest 0 21 3.912023 0.000000 785 +theunivers 0 21 3.912023 0.000000 797 +among 0 21 3.912023 0.000000 781 +output 0 21 3.912023 0.000000 788 +programminglanguag 0 21 3.912023 0.000000 782 +entir 0 20 3.951244 0.000000 811 +alloc 0 20 3.951244 0.000000 821 +reserv 0 20 3.951244 0.000000 808 +thur 1 19 4.007333 4.007333 847 +histori 1 19 4.007333 4.007333 853 +definit 0 19 4.007333 0.000000 864 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +five 0 19 4.007333 0.000000 841 +exercis 0 19 4.007333 0.000000 842 +runtim 0 19 4.007333 0.000000 858 +excel 0 19 4.007333 0.000000 868 +assum 0 19 4.007333 0.000000 845 +lot 0 18 4.060443 0.000000 889 +behavior 0 18 4.060443 0.000000 881 +encourag 0 18 4.060443 0.000000 880 +four 0 18 4.060443 0.000000 905 +sept 2 17 4.110874 8.221748 952 +monitor 1 17 4.110874 4.110874 941 +protect 1 17 4.110874 4.110874 935 +anyon 1 17 4.110874 4.110874 916 +regist 0 17 4.110874 0.000000 938 +weekli 0 17 4.110874 0.000000 919 +segment 0 17 4.110874 0.000000 931 +outlin 0 17 4.110874 0.000000 914 +devic 1 16 4.174387 4.174387 1002 +zhang 0 16 4.174387 0.000000 980 +modern 0 16 4.174387 0.000000 966 +weslei 0 16 4.174387 0.000000 983 +transfer 0 16 4.174387 0.000000 967 +easi 0 16 4.174387 0.000000 969 +choos 0 16 4.174387 0.000000 964 +condit 0 16 4.174387 0.000000 975 +critic 0 16 4.174387 0.000000 982 +later 1 15 4.248495 4.248495 1043 +todd 1 15 4.248495 4.248495 1051 +score 0 15 4.248495 0.000000 1017 +enough 0 15 4.248495 0.000000 1040 +demand 0 14 4.317488 0.000000 1073 +warn 0 14 4.317488 0.000000 1068 +shown 0 14 4.317488 0.000000 1080 +topolog 0 14 4.317488 0.000000 1089 +central 0 13 4.382027 0.000000 1160 +directli 0 13 4.382027 0.000000 1141 +forth 0 13 4.382027 0.000000 1186 +everyth 0 13 4.382027 0.000000 1169 +count 1 12 4.465908 4.465908 1239 +solari 1 12 4.465908 4.465908 1238 +minor 0 12 4.465908 0.000000 1237 +rememb 0 12 4.465908 0.000000 1217 +readi 0 12 4.465908 0.000000 1242 +addison 0 12 4.465908 0.000000 1230 +grow 0 12 4.465908 0.000000 1209 +readabl 0 12 4.465908 0.000000 1258 +buffer 0 12 4.465908 0.000000 1211 +string 1 11 4.553877 4.553877 1340 +fix 1 11 4.553877 4.553877 1327 +tue 0 11 4.553877 0.000000 1308 +regard 0 11 4.553877 0.000000 1309 +extrem 0 11 4.553877 0.000000 1330 +faster 0 11 4.553877 0.000000 1323 +market 0 11 4.553877 0.000000 1361 +placement 1 10 4.653960 4.653960 1420 +strongli 1 10 4.653960 4.653960 1406 +grain 0 10 4.653960 0.000000 1448 +paragraph 0 10 4.653960 0.000000 1449 +hint 0 10 4.653960 0.000000 1419 +cheat 0 10 4.653960 0.000000 1395 +recoveri 1 9 4.753590 4.753590 1474 +familiar 1 9 4.753590 4.753590 1485 +pair 1 9 4.753590 4.753590 1503 +correctli 0 9 4.753590 0.000000 1478 +mention 0 9 4.753590 0.000000 1569 +said 0 9 4.753590 0.000000 1571 +introductori 0 9 4.753590 0.000000 1479 +solomon 1 8 4.875197 4.875197 1716 +star 1 8 4.875197 4.875197 1717 +replac 0 8 4.875197 0.000000 1668 +simpli 0 8 4.875197 0.000000 1626 +rais 0 8 4.875197 0.000000 1711 +partner 0 8 4.875197 0.000000 1648 +crash 0 8 4.875197 0.000000 1616 +switch 0 8 4.875197 0.000000 1718 +gather 0 8 4.875197 0.000000 1719 +peterson 1 7 5.010635 5.010635 1850 +philosoph 1 7 5.010635 5.010635 1904 +bookstor 1 7 5.010635 5.010635 1837 +prevent 1 7 5.010635 5.010635 1827 +bug 0 7 5.010635 0.000000 1801 +slightli 0 7 5.010635 0.000000 1795 +chan 0 7 5.010635 0.000000 1876 +occasion 0 7 5.010635 0.000000 1905 +awar 0 7 5.010635 0.000000 1800 +prentic 0 7 5.010635 0.000000 1838 +spot 0 7 5.010635 0.000000 1894 +fortun 0 7 5.010635 0.000000 1872 +bottom 0 7 5.010635 0.000000 1906 +compact 0 7 5.010635 0.000000 1907 +theproject 1 6 5.164786 5.164786 1981 +sciencesoffic 1 6 5.164786 5.164786 2101 +garbag 1 6 5.164786 5.164786 1986 +notifi 0 6 5.164786 0.000000 2106 +nine 0 6 5.164786 0.000000 2047 +mistak 0 6 5.164786 0.000000 2110 +creation 0 6 5.164786 0.000000 2069 +handi 0 6 5.164786 0.000000 2111 +neither 0 6 5.164786 0.000000 1990 +caus 1 5 5.347108 5.347108 2298 +salt 0 5 5.347108 0.000000 2413 +forprogram 0 5 5.347108 0.000000 2361 +sparcstat 0 5 5.347108 0.000000 2406 +favor 0 5 5.347108 0.000000 2414 +commod 0 5 5.347108 0.000000 2415 +eas 0 5 5.347108 0.000000 2267 +anda 0 5 5.347108 0.000000 2416 +remain 0 5 5.347108 0.000000 2278 +race 0 5 5.347108 0.000000 2417 +deadlock 2 4 5.568345 11.136690 2641 +fork 1 4 5.568345 5.568345 2801 +makefil 1 4 5.568345 5.568345 2662 +popular 1 4 5.568345 5.568345 2802 +cshrc 1 4 5.568345 5.568345 2759 +theprogram 0 4 5.568345 0.000000 2686 +multitask 0 4 5.568345 0.000000 2803 +systemsand 0 4 5.568345 0.000000 2804 +usedto 0 4 5.568345 0.000000 2643 +subsequ 0 4 5.568345 0.000000 2665 +withth 0 4 5.568345 0.000000 2805 +marvin 0 4 5.568345 0.000000 2806 +argument 1 3 5.857933 5.857933 3120 +caught 1 3 5.857933 5.857933 3465 +omit 0 3 5.857933 0.000000 3466 +offset 0 3 5.857933 0.000000 3467 +urgent 0 3 5.857933 0.000000 3316 +listof 0 3 5.857933 0.000000 3322 +sendmail 0 3 5.857933 0.000000 3099 +tanenbaum 0 3 5.857933 0.000000 3397 +dialect 0 3 5.857933 0.000000 3226 +acquaint 0 3 5.857933 0.000000 3468 +subscript 0 3 5.857933 0.000000 3469 +easier 0 3 5.857933 0.000000 3470 +timet 0 3 5.857933 0.000000 3471 +dine 0 3 5.857933 0.000000 3472 +avaiabl 1 2 6.263398 6.263398 4703 +thejava 1 2 6.263398 6.263398 4704 +swap 1 2 6.263398 6.263398 4466 +arnold 1 2 6.263398 6.263398 4705 +semaphor 1 2 6.263398 6.263398 4555 +troffic 0 2 6.263398 0.000000 4706 +mellencamp 0 2 6.263398 0.000000 4707 +mellen 0 2 6.263398 0.000000 4708 +breakdown 0 2 6.263398 0.000000 4407 +typo 0 2 6.263398 0.000000 4180 +tung 0 2 6.263398 0.000000 4709 +preemptiv 0 2 6.263398 0.000000 4319 +colloquia 0 2 6.263398 0.000000 4710 +sciencesand 0 2 6.263398 0.000000 4711 +tutorialth 0 2 6.263398 0.000000 4453 +designedto 0 2 6.263398 0.000000 4712 +havethre 0 2 6.263398 0.000000 4562 +daysof 0 2 6.263398 0.000000 4563 +eachof 0 2 6.263398 0.000000 4564 +congeni 0 2 6.263398 0.000000 4713 +null 0 2 6.263398 0.000000 4714 +mysteri 0 2 6.263398 0.000000 4715 +char 0 2 6.263398 0.000000 4716 +trendi 0 2 6.263398 0.000000 4717 +coursewil 0 2 6.263398 0.000000 4718 +primer 0 2 6.263398 0.000000 4719 +manualfor 0 2 6.263398 0.000000 4720 +yourgrad 0 2 6.263398 0.000000 4121 +terminolog 0 2 6.263398 0.000000 4410 +eduthu 0 2 6.263398 0.000000 4721 +threadschedul 1 1 6.957497 6.957497 8753 +forproject 1 1 6.957497 6.957497 8754 +graphcontain 1 1 6.957497 6.957497 8755 +sched 1 1 6.957497 6.957497 8756 +substr 1 1 6.957497 6.957497 8757 +thejavaprogram 1 1 6.957497 6.957497 8758 +languagebi 1 1 6.957497 6.957497 8759 +gosl 1 1 6.957497 6.957497 8760 +systemssect 0 1 6.957497 0.000000 8761 +instructormarvin 0 1 6.957497 0.000000 8762 +tarob 0 1 6.957497 0.000000 8763 +mwfoffic 0 1 6.957497 0.000000 8764 +distributioni 0 1 6.957497 0.000000 8765 +typograph 0 1 6.957497 0.000000 8766 +importantli 0 1 6.957497 0.000000 8767 +arraywa 0 1 6.957497 0.000000 8768 +isavail 0 1 6.957497 0.000000 8769 +courseus 0 1 6.957497 0.000000 8770 +likelyb 0 1 6.957497 0.000000 8771 +presentedin 0 1 6.957497 0.000000 8772 +givefork 0 1 6.957497 0.000000 8773 +specificationshould 0 1 6.957497 0.000000 8774 +garbl 0 1 6.957497 0.000000 8775 +jake 0 1 6.957497 0.000000 8776 +dawlei 0 1 6.957497 0.000000 8777 +carr 0 1 6.957497 0.000000 8778 +detailssect 0 1 6.957497 0.000000 8779 +lipe 0 1 6.957497 0.000000 8780 +srccontain 0 1 6.957497 0.000000 8781 +javacontain 0 1 6.957497 0.000000 8782 +classgraphdescrib 0 1 6.957497 0.000000 8783 +petersoncycl 0 1 6.957497 0.000000 8784 +notacycl 0 1 6.957497 0.000000 8785 +petersonacycl 0 1 6.957497 0.000000 8786 +acycl 0 1 6.957497 0.000000 8787 +sharingfork 0 1 6.957497 0.000000 8788 +jenner 0 1 6.957497 0.000000 8789 +maxthink 0 1 6.957497 0.000000 8790 +maxeat 0 1 6.957497 0.000000 8791 +versionha 0 1 6.957497 0.000000 8792 +argumenti 0 1 6.957497 0.000000 8793 +charactersin 0 1 6.957497 0.000000 8794 +franco 0 1 6.957497 0.000000 8795 +maketo 0 1 6.957497 0.000000 8796 +compilewithout 0 1 6.957497 0.000000 8797 +computershav 0 1 6.957497 0.000000 8798 +tutoriali 0 1 6.957497 0.000000 8799 +onthread 0 1 6.957497 0.000000 8800 +checkth 0 1 6.957497 0.000000 8801 +ajava 0 1 6.957497 0.000000 8802 +afil 0 1 6.957497 0.000000 8803 +onelin 0 1 6.957497 0.000000 8804 +localor 0 1 6.957497 0.000000 8805 +csmon 0 1 6.957497 0.000000 8806 +cslast 0 1 6.957497 0.000000 8807 +beprocess 0 1 6.957497 0.000000 8808 +replacementalgorithm 0 1 6.957497 0.000000 8809 +statisticsdiscuss 0 1 6.957497 0.000000 8810 +psychologyth 0 1 6.957497 0.000000 8811 +anyquest 0 1 6.957497 0.000000 8812 +thetext 0 1 6.957497 0.000000 8813 +systemsbi 0 1 6.957497 0.000000 8814 +specificationjava 0 1 6.957497 0.000000 8815 +documentationwatch 0 1 6.957497 0.000000 8816 +unixoper 0 1 6.957497 0.000000 8817 +anycomput 0 1 6.957497 0.000000 8818 +requireddata 0 1 6.957497 0.000000 8819 +involveprocess 0 1 6.957497 0.000000 8820 +butyou 0 1 6.957497 0.000000 8821 +vigor 0 1 6.957497 0.000000 8822 +punish 0 1 6.957497 0.000000 8823 +dateind 0 1 6.957497 0.000000 8824 +uniniti 0 1 6.957497 0.000000 8825 +runtimerath 0 1 6.957497 0.000000 8826 +byproduct 0 1 6.957497 0.000000 8827 +withlanguag 0 1 6.957497 0.000000 8828 +alwaysa 0 1 6.957497 0.000000 8829 +disloc 0 1 6.957497 0.000000 8830 +thetransit 0 1 6.957497 0.000000 8831 +amazingli 0 1 6.957497 0.000000 8832 +youalreadi 0 1 6.957497 0.000000 8833 +arefer 0 1 6.957497 0.000000 8834 +manuali 0 1 6.957497 0.000000 8835 +wayfrom 0 1 6.957497 0.000000 8836 +sophisticatedprogram 0 1 6.957497 0.000000 8837 +ofoth 0 1 6.957497 0.000000 8838 +niceonlin 0 1 6.957497 0.000000 8839 +tutorialabout 0 1 6.957497 0.000000 8840 +javaoct 0 1 6.957497 0.000000 8841 +synchronizationoct 0 1 6.957497 0.000000 8842 +schedulingoct 0 1 6.957497 0.000000 8843 +schedulingdec 0 1 6.957497 0.000000 8844 +systemsdec 0 1 6.957497 0.000000 8845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..d38e070c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +file 1 132 1.945910 1.945910 70 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +introduct 1 126 2.079442 2.079442 87 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +manag 1 114 2.197225 2.197225 125 +instructor 0 108 2.197225 0.000000 107 +memori 1 101 2.302585 2.302585 139 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +solut 0 82 2.484907 0.000000 162 +tuesdai 0 73 2.639057 0.000000 219 +name 0 72 2.639057 0.000000 220 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +virtual 0 62 2.772589 0.000000 285 +space 0 57 2.890372 0.000000 310 +date 0 51 2.995732 0.000000 344 +discuss 1 45 3.135494 3.135494 399 +fridai 1 44 3.135494 3.135494 390 +textbook 0 44 3.135494 0.000000 397 +cach 0 41 3.218876 0.000000 432 +review 0 42 3.218876 0.000000 425 +procedur 0 36 3.367296 0.000000 488 +concurr 1 34 3.401197 3.401197 501 +survei 0 35 3.401197 0.000000 513 +global 0 34 3.401197 0.000000 520 +chapter 0 32 3.465736 0.000000 536 +secur 0 30 3.555348 0.000000 577 +synchron 0 29 3.583519 0.000000 588 +thread 1 23 3.806662 3.806662 722 +cooper 0 22 3.850148 0.000000 757 +protect 1 17 4.110874 4.110874 935 +monitor 0 17 4.110874 0.000000 941 +quiz 0 16 4.174387 0.000000 990 +pagec 0 15 4.248495 0.000000 1011 +remot 0 15 4.248495 0.000000 1041 +demand 0 14 4.317488 0.000000 1073 +quizz 0 13 4.382027 0.000000 1151 +translat 0 13 4.382027 0.000000 1164 +host 0 11 4.553877 0.000000 1306 +vernon 0 9 4.753590 0.000000 1556 +core 0 7 5.010635 0.000000 1809 +mutual 0 5 5.347108 0.000000 2418 +systemsfal 0 4 5.568345 0.000000 2683 +deadlock 0 4 5.568345 0.000000 2641 +thanksgiv 0 2 6.263398 0.000000 4185 +maryvernon 0 1 6.957497 0.000000 8846 +andkarunamuthiah 0 1 6.957497 0.000000 8847 +beinterchang 0 1 6.957497 0.000000 8848 +archiveapproxim 0 1 6.957497 0.000000 8849 +topicsweek 0 1 6.957497 0.000000 8850 +oftopicsreadingsep 0 1 6.957497 0.000000 8851 +processeschapt 0 1 6.957497 0.000000 8852 +threadschapt 0 1 6.957497 0.000000 8853 +exclusioncont 0 1 6.957497 0.000000 8854 +semaphorescont 0 1 6.957497 0.000000 8855 +summarycont 0 1 6.957497 0.000000 8856 +doct 0 1 6.957497 0.000000 8857 +schedulingchapt 0 1 6.957497 0.000000 8858 +tlbschapter 0 1 6.957497 0.000000 8859 +memorycont 0 1 6.957497 0.000000 8860 +systemschapt 0 1 6.957497 0.000000 8861 +directorieschapt 0 1 6.957497 0.000000 8862 +methodstbanov 0 1 6.957497 0.000000 8863 +reviewchapt 0 1 6.957497 0.000000 8864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..00c907bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +assign 2 135 1.945910 3.891820 66 +lectur 0 135 1.945910 0.000000 73 +spring 1 131 2.079442 2.079442 88 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +instructor 1 108 2.197225 2.197225 107 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +select 1 91 2.397895 2.397895 154 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +start 1 83 2.484907 2.484907 173 +help 0 83 2.484907 0.000000 175 +decemb 0 80 2.564949 0.000000 215 +tuesdai 1 73 2.639057 2.639057 219 +david 1 71 2.639057 2.639057 232 +thursdai 1 70 2.708050 2.708050 241 +simul 0 66 2.708050 0.000000 255 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +descript 1 64 2.772589 2.772589 271 +previou 0 62 2.772589 0.000000 290 +room 1 59 2.833213 2.833213 301 +locat 0 59 2.833213 0.000000 303 +sampl 0 53 2.944439 0.000000 339 +appoint 1 49 3.044522 3.044522 358 +get 1 46 3.091042 3.091042 380 +midterm 2 45 3.135494 6.270988 392 +answer 1 45 3.135494 3.135494 391 +error 0 40 3.258097 0.000000 449 +correct 0 38 3.295837 0.000000 462 +demonstr 0 24 3.761200 0.000000 694 +wood 0 11 4.553877 0.000000 1355 +deadlin 0 9 4.753590 0.000000 1502 +phil 0 5 5.347108 0.000000 2419 +mentor 0 4 5.568345 0.000000 2591 +atkinson 1 2 6.263398 6.263398 4722 +vhdl 1 1 6.957497 6.957497 8865 +mentorassign 0 1 6.957497 0.000000 8866 +projectthi 0 1 6.957497 0.000000 8867 +examsth 0 1 6.957497 0.000000 8868 +endterm 0 1 6.957497 0.000000 8869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..b8c53ea6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +languag 0 227 1.386294 0.000000 26 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +assign 2 135 1.945910 3.891820 66 +year 1 148 1.945910 1.945910 84 +lectur 1 135 1.945910 1.945910 73 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +postscript 2 131 2.079442 4.158884 90 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +version 0 113 2.197225 0.000000 122 +pleas 0 113 2.197225 0.000000 114 +topic 0 114 2.197225 0.000000 110 +check 0 115 2.197225 0.000000 118 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +solut 1 82 2.484907 2.484907 162 +chang 0 82 2.484907 0.000000 163 +help 0 83 2.484907 0.000000 175 +resourc 0 81 2.484907 0.000000 172 +issu 0 78 2.564949 0.000000 211 +html 1 75 2.639057 2.639057 235 +handout 1 64 2.772589 2.772589 263 +import 1 65 2.772589 2.772589 282 +polici 0 64 2.772589 0.000000 279 +experi 0 64 2.772589 0.000000 283 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +date 0 51 2.995732 0.000000 344 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +discuss 0 45 3.135494 0.000000 399 +tutori 0 39 3.258097 0.000000 437 +tree 1 36 3.367296 3.367296 492 +print 0 34 3.401197 0.000000 503 +chapter 1 32 3.465736 3.465736 536 +yahoo 0 24 3.761200 0.000000 707 +instead 0 22 3.850148 0.000000 756 +exercis 1 19 4.007333 4.007333 842 +prerequisit 0 19 4.007333 0.000000 846 +thur 0 19 4.007333 0.000000 847 +sept 0 17 4.110874 0.000000 952 +ramakrishnan 0 16 4.174387 0.000000 972 +convent 0 14 4.317488 0.000000 1072 +raghu 1 12 4.465908 4.465908 1212 +tue 0 11 4.553877 0.000000 1308 +debugg 0 9 4.753590 0.000000 1493 +entri 0 8 4.875197 0.000000 1678 +minibas 0 4 5.568345 0.000000 2608 +dont 0 3 5.857933 0.000000 3473 +sybas 1 2 6.263398 6.263398 4723 +xbao 1 1 6.957497 6.957497 8870 +implementationc 0 1 6.957497 0.000000 8871 +implementationcours 0 1 6.957497 0.000000 8872 +assignmentoth 0 1 6.957497 0.000000 8873 +ingraham 0 1 6.957497 0.000000 8874 +xuemei 0 1 6.957497 0.000000 8875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..377a473f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +place 1 106 2.197225 2.197225 124 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +chang 0 82 2.484907 0.000000 163 +exam 0 86 2.484907 0.000000 169 +know 1 80 2.564949 2.564949 198 +meet 0 72 2.639057 0.000000 229 +import 0 65 2.772589 0.000000 282 +wednesdai 0 64 2.772589 0.000000 261 +semest 0 58 2.890372 0.000000 312 +particular 0 51 2.995732 0.000000 352 +still 0 50 3.044522 0.000000 362 +discuss 1 45 3.135494 3.135494 399 +fridai 0 44 3.135494 0.000000 390 +probabl 0 40 3.258097 0.000000 455 +close 0 38 3.295837 0.000000 465 +taught 0 33 3.433987 0.000000 526 +option 1 30 3.555348 3.555348 575 +progress 0 28 3.610918 0.000000 598 +jeff 0 25 3.737670 0.000000 673 +lab 0 24 3.761200 0.000000 698 +cooper 0 22 3.850148 0.000000 757 +fact 0 21 3.912023 0.000000 780 +psycholog 1 15 4.248495 4.248495 1054 +naughton 0 10 4.653960 0.000000 1450 +russel 1 9 4.753590 4.753590 1507 +minibas 0 4 5.568345 0.000000 2608 +obvious 0 3 5.857933 0.000000 3474 +addinginform 0 1 6.957497 0.000000 8876 +meetingroom 0 1 6.957497 0.000000 8877 +labsfor 0 1 6.957497 0.000000 8878 +beenmov 0 1 6.957497 0.000000 8879 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..1906fe9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +introduct 1 126 2.079442 2.079442 87 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +instructor 0 108 2.197225 0.000000 107 +book 0 99 2.302585 0.000000 131 +exam 0 86 2.484907 0.000000 169 +solut 0 82 2.484907 0.000000 162 +homework 1 79 2.564949 2.564949 193 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +descript 0 64 2.772589 0.000000 271 +organ 0 65 2.772589 0.000000 265 +archiv 0 49 3.044522 0.000000 364 +midterm 0 45 3.135494 0.000000 392 +graph 0 30 3.555348 0.000000 576 +reserv 0 20 3.951244 0.000000 808 +eric 0 19 4.007333 0.000000 870 +bill 0 11 4.553877 0.000000 1297 +appt 0 5 5.347108 0.000000 2312 +bach 1 4 5.568345 5.568345 2708 +fractal 0 3 5.857933 0.000000 3475 +behaviour 0 2 6.263398 0.000000 4724 +raji 1 1 6.957497 6.957497 8880 +donaldson 0 1 6.957497 0.000000 8881 +gopalakrishnan 0 1 6.957497 0.000000 8882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..bc080b7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +fall 1 181 1.609438 1.609438 40 +class 0 199 1.609438 0.000000 37 +network 2 168 1.791759 3.583518 61 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +implement 0 152 1.791759 0.000000 52 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +intern 0 108 2.197225 0.000000 128 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +advanc 1 99 2.302585 2.302585 130 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +select 0 91 2.397895 0.000000 154 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +exam 1 86 2.484907 2.484907 169 +refer 1 78 2.564949 2.564949 203 +interfac 1 79 2.564949 2.564949 209 +mondai 0 77 2.564949 0.000000 206 +complet 0 77 2.564949 0.000000 208 +free 1 73 2.639057 2.639057 224 +tuesdai 0 73 2.639057 0.000000 219 +html 0 75 2.639057 0.000000 235 +syllabu 1 67 2.708050 2.708050 247 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +evalu 0 64 2.772589 0.000000 266 +content 0 59 2.833213 0.000000 302 +overview 0 56 2.890372 0.000000 323 +unix 0 58 2.890372 0.000000 308 +tabl 0 51 2.995732 0.000000 346 +archiv 1 49 3.044522 3.044522 364 +appoint 1 49 3.044522 3.044522 358 +adapt 0 46 3.091042 0.000000 387 +midterm 1 45 3.135494 3.135494 392 +offer 0 43 3.178054 0.000000 414 +term 0 43 3.178054 0.000000 411 +review 0 42 3.218876 0.000000 425 +form 1 39 3.258097 3.258097 443 +error 0 40 3.258097 0.000000 449 +slide 1 38 3.295837 3.295837 467 +feel 1 37 3.332205 3.332205 483 +connect 0 37 3.332205 0.000000 485 +eduoffic 1 33 3.433987 3.433987 531 +richard 0 31 3.496508 0.000000 559 +option 0 30 3.555348 0.000000 575 +packag 0 28 3.610918 0.000000 614 +comp 1 26 3.688879 3.688879 650 +reliabl 0 25 3.737670 0.000000 674 +latest 0 21 3.912023 0.000000 785 +annot 0 21 3.912023 0.000000 775 +partial 0 18 4.060443 0.000000 900 +layer 1 17 4.110874 4.110874 926 +steven 0 17 4.110874 0.000000 953 +warn 0 14 4.317488 0.000000 1068 +prior 0 10 4.653960 0.000000 1438 +criteria 0 9 4.753590 0.000000 1477 +lawrenc 0 7 5.010635 0.000000 1908 +prentic 0 7 5.010635 0.000000 1838 +isbn 0 7 5.010635 0.000000 1901 +conveni 1 6 5.164786 5.164786 2088 +moder 0 6 5.164786 0.000000 2112 +landweb 0 3 5.857933 0.000000 3402 +hereto 0 3 5.857933 0.000000 3476 +gradingmidterm 0 3 5.857933 0.000000 3230 +socket 1 2 6.263398 6.263398 4725 +statphon 0 2 6.263398 0.000000 4726 +ipng 0 2 6.263398 0.000000 4727 +powerpoint 1 1 6.957497 6.957497 8883 +networksintroduct 0 1 6.957497 0.000000 8884 +readingsclick 0 1 6.957497 0.000000 8885 +networkingcours 0 1 6.957497 0.000000 8886 +madisoncours 0 1 6.957497 0.000000 8887 +informationlecturetim 0 1 6.957497 0.000000 8888 +mwfplace 0 1 6.957497 0.000000 8889 +statclass 0 1 6.957497 0.000000 8890 +listinstructor 0 1 6.957497 0.000000 8891 +landweberoffic 0 1 6.957497 0.000000 8892 +srinivasa 0 1 6.957497 0.000000 8893 +narayananoffic 0 1 6.957497 0.000000 8894 +teitelbaumoffic 0 1 6.957497 0.000000 8895 +naemail 0 1 6.957497 0.000000 8896 +garbler 0 1 6.957497 0.000000 8897 +bibliographyread 0 1 6.957497 0.000000 8898 +icmp 0 1 6.957497 0.000000 8899 +ospf 0 1 6.957497 0.000000 8900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..75652780 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +fall 1 181 1.609438 1.609438 40 +hour 0 165 1.791759 0.000000 46 +note 1 142 1.945910 1.945910 67 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +part 1 98 2.302585 2.302585 129 +solut 1 82 2.484907 2.484907 162 +homework 1 79 2.564949 2.564949 193 +descript 0 64 2.772589 0.000000 271 +robert 0 30 3.555348 0.000000 567 +option 0 30 3.555348 0.000000 575 +comp 0 26 3.688879 0.000000 650 +meyer 0 2 6.263398 0.000000 4728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..6868ce76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +algorithm 0 162 1.791759 0.000000 57 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 1 111 2.197225 2.197225 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 1 98 2.302585 2.302585 133 +book 0 99 2.302585 0.000000 131 +grade 1 90 2.397895 2.397895 142 +second 1 81 2.484907 2.484907 166 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +optim 1 79 2.564949 2.564949 197 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +order 1 69 2.708050 2.708050 249 +thursdai 0 70 2.708050 0.000000 241 +function 1 62 2.772589 2.772589 275 +wednesdai 1 64 2.772589 2.772589 261 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +semest 0 58 2.890372 0.000000 312 +scientif 0 53 2.944439 0.000000 341 +week 0 52 2.995732 0.000000 343 +telephon 1 50 3.044522 3.044522 373 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +midterm 0 45 3.135494 0.000000 392 +examin 1 42 3.218876 3.218876 424 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +linear 0 41 3.218876 0.000000 431 +michael 0 35 3.401197 0.000000 514 +altern 0 26 3.688879 0.000000 641 +period 0 22 3.850148 0.000000 743 +siam 0 21 3.912023 0.000000 800 +theorem 0 21 3.912023 0.000000 786 +reserv 0 20 3.951244 0.000000 808 +differenti 1 17 4.110874 4.110874 921 +stat 0 17 4.110874 0.000000 924 +condit 0 16 4.174387 0.000000 975 +nonlinear 1 14 4.317488 4.317488 1107 +philadelphia 0 12 4.465908 0.000000 1244 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +criteria 1 9 4.753590 4.753590 1477 +mangasarian 0 9 4.753590 0.000000 1570 +exact 0 9 4.753590 0.000000 1509 +kurt 0 9 4.753590 0.000000 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 0 6 5.164786 0.000000 2109 +inequ 0 6 5.164786 0.000000 2113 +augment 0 5 5.347108 0.000000 2350 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +wilei 0 4 5.568345 0.000000 2669 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gradient 0 3 5.857933 0.000000 3479 +applicationsfal 0 2 6.263398 0.000000 4729 +bazaraa 0 2 6.263398 0.000000 4730 +sherali 0 2 6.263398 0.000000 4731 +shetti 0 2 6.263398 0.000000 4732 +athena 0 2 6.263398 0.000000 4733 +saddlepoint 0 2 6.263398 0.000000 4734 +dualiti 0 2 6.263398 0.000000 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..6868ce76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +algorithm 0 162 1.791759 0.000000 57 +assign 1 135 1.945910 1.945910 66 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 1 111 2.197225 2.197225 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 1 98 2.302585 2.302585 133 +book 0 99 2.302585 0.000000 131 +grade 1 90 2.397895 2.397895 142 +second 1 81 2.484907 2.484907 166 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +homework 1 79 2.564949 2.564949 193 +optim 1 79 2.564949 2.564949 197 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +order 1 69 2.708050 2.708050 249 +thursdai 0 70 2.708050 0.000000 241 +function 1 62 2.772589 2.772589 275 +wednesdai 1 64 2.772589 2.772589 261 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +semest 0 58 2.890372 0.000000 312 +scientif 0 53 2.944439 0.000000 341 +week 0 52 2.995732 0.000000 343 +telephon 1 50 3.044522 3.044522 373 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +midterm 0 45 3.135494 0.000000 392 +examin 1 42 3.218876 3.218876 424 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +linear 0 41 3.218876 0.000000 431 +michael 0 35 3.401197 0.000000 514 +altern 0 26 3.688879 0.000000 641 +period 0 22 3.850148 0.000000 743 +siam 0 21 3.912023 0.000000 800 +theorem 0 21 3.912023 0.000000 786 +reserv 0 20 3.951244 0.000000 808 +differenti 1 17 4.110874 4.110874 921 +stat 0 17 4.110874 0.000000 924 +condit 0 16 4.174387 0.000000 975 +nonlinear 1 14 4.317488 4.317488 1107 +philadelphia 0 12 4.465908 0.000000 1244 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +criteria 1 9 4.753590 4.753590 1477 +mangasarian 0 9 4.753590 0.000000 1570 +exact 0 9 4.753590 0.000000 1509 +kurt 0 9 4.753590 0.000000 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 0 6 5.164786 0.000000 2109 +inequ 0 6 5.164786 0.000000 2113 +augment 0 5 5.347108 0.000000 2350 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +wilei 0 4 5.568345 0.000000 2669 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gradient 0 3 5.857933 0.000000 3479 +applicationsfal 0 2 6.263398 0.000000 4729 +bazaraa 0 2 6.263398 0.000000 4730 +sherali 0 2 6.263398 0.000000 4731 +shetti 0 2 6.263398 0.000000 4732 +athena 0 2 6.263398 0.000000 4733 +saddlepoint 0 2 6.263398 0.000000 4734 +dualiti 0 2 6.263398 0.000000 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..8cbb7994 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +public 0 202 1.609438 0.000000 43 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +avail 0 169 1.791759 0.000000 48 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +professor 0 137 1.945910 0.000000 76 +postscript 1 131 2.079442 2.079442 90 +pleas 1 113 2.197225 2.197225 114 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +text 1 98 2.302585 2.302585 133 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +comment 0 93 2.397895 0.000000 146 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +exampl 0 77 2.564949 0.000000 195 +html 1 75 2.639057 2.639057 235 +onlin 0 75 2.639057 0.000000 223 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +instruct 0 53 2.944439 0.000000 332 +suggest 0 53 2.944439 0.000000 331 +tutori 0 39 3.258097 0.000000 437 +manual 0 35 3.401197 0.000000 504 +print 0 34 3.401197 0.000000 503 +least 0 35 3.401197 0.000000 516 +initi 0 23 3.806662 0.000000 717 +half 0 21 3.912023 0.000000 776 +sept 1 17 4.110874 4.110874 952 +livni 0 15 4.248495 0.000000 1053 +miron 1 14 4.317488 4.317488 1110 +devis 0 10 4.653960 0.000000 1451 +chan 0 7 5.010635 0.000000 1876 +yong 0 4 5.568345 0.000000 2809 +chee 0 3 5.857933 0.000000 3480 +mimic 1 2 6.263398 6.263398 4736 +cychan 1 2 6.263398 6.263398 4737 +qnet 0 1 6.957497 0.000000 8901 +devc 0 1 6.957497 0.000000 8902 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..e0ae5548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +design 0 213 1.386294 0.000000 25 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +lectur 0 135 1.945910 0.000000 73 +relat 0 139 1.945910 0.000000 68 +assign 0 135 1.945910 0.000000 66 +postscript 1 131 2.079442 2.079442 90 +technolog 0 131 2.079442 0.000000 102 +schedul 0 119 2.079442 0.000000 85 +find 1 111 2.197225 2.197225 111 +world 0 115 2.197225 0.000000 126 +topic 0 114 2.197225 0.000000 110 +part 1 98 2.302585 2.302585 129 +techniqu 0 99 2.302585 0.000000 138 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +resourc 1 81 2.484907 2.484907 172 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +complet 0 77 2.564949 0.000000 208 +knowledg 0 67 2.708050 0.000000 243 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +room 0 59 2.833213 0.000000 301 +overview 1 56 2.890372 2.890372 323 +major 0 56 2.890372 0.000000 315 +suggest 0 53 2.944439 0.000000 331 +much 0 52 2.995732 0.000000 349 +understand 0 47 3.091042 0.000000 384 +represent 0 35 3.401197 0.000000 512 +taken 0 31 3.496508 0.000000 555 +consid 0 29 3.583519 0.000000 590 +univ 0 28 3.610918 0.000000 617 +retriev 1 27 3.637586 3.637586 621 +tent 0 22 3.850148 0.000000 739 +minut 0 20 3.951244 0.000000 810 +thur 0 19 4.007333 0.000000 847 +seek 0 17 4.110874 0.000000 954 +sept 0 17 4.110874 0.000000 952 +onth 0 12 4.465908 0.000000 1218 +excit 0 11 4.553877 0.000000 1329 +underli 0 10 4.653960 0.000000 1410 +occur 0 9 4.753590 0.000000 1572 +compos 0 9 4.753590 0.000000 1527 +digest 0 7 5.010635 0.000000 1864 +machinelearn 0 6 5.164786 0.000000 2084 +proce 0 6 5.164786 0.000000 2114 +polit 0 6 5.164786 0.000000 2115 +anda 0 5 5.347108 0.000000 2416 +aboutth 0 4 5.568345 0.000000 2720 +thesear 0 3 5.857933 0.000000 3456 +uwisc 0 2 6.263398 0.000000 4738 +belew 0 2 6.263398 0.000000 4739 +knowledgerichard 0 1 6.957497 0.000000 8903 +belewvisit 0 1 6.957497 0.000000 8904 +professorc 0 1 6.957497 0.000000 8905 +departmentfal 0 1 6.957497 0.000000 8906 +acal 0 1 6.957497 0.000000 8907 +engrthi 0 1 6.957497 0.000000 8908 +coures 0 1 6.957497 0.000000 8909 +canse 0 1 6.957497 0.000000 8910 +asyllabu 0 1 6.957497 0.000000 8911 +mapof 0 1 6.957497 0.000000 8912 +semesterwil 0 1 6.957497 0.000000 8913 +infidel 0 1 6.957497 0.000000 8914 +hypermai 0 1 6.957497 0.000000 8915 +classrel 0 1 6.957497 0.000000 8916 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..e1f25998 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +file 0 132 1.945910 0.000000 70 +relat 0 139 1.945910 0.000000 68 +introduct 1 126 2.079442 2.079442 87 +machin 0 129 2.079442 0.000000 95 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +text 0 98 2.302585 0.000000 133 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +question 1 91 2.397895 2.397895 141 +sinc 0 90 2.397895 0.000000 159 +present 0 91 2.397895 0.000000 145 +grade 0 90 2.397895 0.000000 142 +chang 2 82 2.484907 4.969814 163 +contain 0 81 2.484907 0.000000 174 +solut 0 82 2.484907 0.000000 162 +method 0 80 2.564949 0.000000 213 +orient 0 80 2.564949 0.000000 205 +good 0 77 2.564949 0.000000 200 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +name 1 72 2.639057 2.639057 220 +syllabu 1 67 2.708050 2.708050 247 +order 1 69 2.708050 2.708050 249 +complex 0 64 2.772589 0.000000 269 +organ 0 65 2.772589 0.000000 265 +plan 0 65 2.772589 0.000000 272 +handout 0 64 2.772589 0.000000 263 +simpl 0 60 2.833213 0.000000 298 +unix 1 58 2.890372 2.890372 308 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +numer 1 49 3.044522 3.044522 369 +frequent 0 49 3.044522 0.000000 367 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +edit 1 42 3.218876 3.218876 418 +might 0 41 3.218876 0.000000 426 +error 0 40 3.258097 0.000000 449 +tutori 0 39 3.258097 0.000000 437 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +approxim 0 35 3.401197 0.000000 509 +word 0 34 3.401197 0.000000 508 +chapter 0 32 3.465736 0.000000 536 +ask 0 28 3.610918 0.000000 597 +mind 0 27 3.637586 0.000000 632 +though 0 27 3.637586 0.000000 622 +session 1 26 3.688879 3.688879 643 +subject 0 26 3.688879 0.000000 647 +rather 0 26 3.688879 0.000000 642 +concern 1 25 3.737670 3.737670 666 +todai 0 25 3.737670 0.000000 672 +sometim 0 24 3.761200 0.000000 696 +wish 0 24 3.761200 0.000000 692 +begin 1 23 3.806662 3.806662 716 +tent 1 22 3.850148 3.850148 739 +cooper 0 22 3.850148 0.000000 757 +lower 0 18 4.060443 0.000000 886 +four 0 18 4.060443 0.000000 905 +condit 0 16 4.174387 0.000000 975 +score 1 15 4.248495 4.248495 1017 +carl 0 15 4.248495 0.000000 1024 +fortran 0 15 4.248495 0.000000 1027 +matlab 1 14 4.317488 4.317488 1081 +squar 0 14 4.317488 0.000000 1082 +doit 0 14 4.317488 0.000000 1111 +conduct 0 14 4.317488 0.000000 1065 +total 0 10 4.653960 0.000000 1398 +errata 0 10 4.653960 0.000000 1403 +mention 1 9 4.753590 4.753590 1569 +smile 0 7 5.010635 0.000000 1807 +slightli 0 7 5.010635 0.000000 1795 +awar 0 7 5.010635 0.000000 1800 +supplement 0 5 5.347108 0.000000 2355 +rick 0 4 5.568345 0.000000 2646 +areavail 0 4 5.568345 0.000000 2810 +andp 0 4 5.568345 0.000000 2811 +preprint 0 3 5.857933 0.000000 3481 +diari 1 2 6.263398 6.263398 4740 +residu 0 2 6.263398 0.000000 4741 +kermit 0 2 6.263398 0.000000 4742 +primer 0 2 6.263398 0.000000 4719 +overviewcours 0 2 6.263398 0.000000 4399 +linksyou 0 2 6.263398 0.000000 4743 +csdepart 0 2 6.263398 0.000000 4130 +telnet 1 1 6.957497 6.957497 8917 +methodsthi 0 1 6.957497 0.000000 8918 +orderli 0 1 6.957497 0.000000 8919 +assignmentson 0 1 6.957497 0.000000 8920 +numericalanalysi 0 1 6.957497 0.000000 8921 +foremostmathematician 0 1 6.957497 0.000000 8922 +trickytop 0 1 6.957497 0.000000 8923 +textmai 0 1 6.957497 0.000000 8924 +byaddit 0 1 6.957497 0.000000 8925 +capitallett 0 1 6.957497 0.000000 8926 +caselett 0 1 6.957497 0.000000 8927 +sigmon 0 1 6.957497 0.000000 8928 +reaction 0 1 6.957497 0.000000 8929 +winor 0 1 6.957497 0.000000 8930 +referenceviva 0 1 6.957497 0.000000 8931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..a848e8f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +email 1 220 1.386294 1.386294 29 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +fall 0 181 1.609438 0.000000 40 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +hour 0 165 1.791759 0.000000 46 +relat 1 139 1.945910 1.945910 68 +note 0 142 1.945910 0.000000 67 +assign 0 135 1.945910 0.000000 66 +analysi 0 124 2.079442 0.000000 98 +version 0 113 2.197225 0.000000 122 +well 0 109 2.197225 0.000000 121 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +question 0 91 2.397895 0.000000 141 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +homework 0 79 2.564949 0.000000 193 +line 0 75 2.639057 0.000000 231 +function 1 62 2.772589 2.772589 275 +copi 0 63 2.772589 0.000000 284 +locat 0 59 2.833213 0.000000 303 +index 0 56 2.890372 0.000000 309 +explor 0 58 2.890372 0.000000 324 +numer 1 49 3.044522 3.044522 369 +math 0 44 3.135494 0.000000 402 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +announc 0 40 3.258097 0.000000 441 +post 0 35 3.401197 0.000000 505 +hard 0 30 3.555348 0.000000 563 +concern 0 25 3.737670 0.000000 666 +wish 0 24 3.761200 0.000000 692 +stat 0 17 4.110874 0.000000 924 +carl 0 15 4.248495 0.000000 1024 +doit 0 14 4.317488 0.000000 1111 +none 0 7 5.010635 0.000000 1811 +boor 0 3 5.857933 0.000000 3482 +deboor 0 2 6.263398 0.000000 4744 +linksyou 0 2 6.263398 0.000000 4743 +analysisthi 0 1 6.957497 0.000000 8932 +statlectur 0 1 6.957497 0.000000 8933 +classnot 0 1 6.957497 0.000000 8934 +viii 0 1 6.957497 0.000000 8935 +courseoff 0 1 6.957497 0.000000 8936 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..64fa3e63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +note 0 142 1.945910 0.000000 67 +spring 0 131 2.079442 0.000000 88 +theori 0 111 2.197225 0.000000 127 +version 0 113 2.197225 0.000000 122 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +math 0 44 3.135494 0.000000 402 +approxim 1 35 3.401197 3.401197 509 +theorythi 0 1 6.957497 0.000000 8937 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..eafed542 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +section 0 94 2.397895 0.000000 149 +grade 0 90 2.397895 0.000000 142 +solut 1 82 2.484907 2.484907 162 +academ 0 82 2.484907 0.000000 178 +tuesdai 1 73 2.639057 2.639057 219 +solv 0 73 2.639057 0.000000 234 +window 1 68 2.708050 2.708050 242 +thursdai 1 70 2.708050 2.708050 241 +syllabu 0 67 2.708050 0.000000 247 +handout 1 64 2.772589 2.772589 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +semest 0 58 2.890372 0.000000 312 +appoint 0 49 3.044522 0.000000 358 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +statist 1 35 3.401197 3.401197 521 +specifi 0 30 3.555348 0.000000 568 +comp 0 26 3.688879 0.000000 650 +tent 0 22 3.850148 0.000000 739 +sept 1 17 4.110874 4.110874 952 +walter 0 17 4.110874 0.000000 950 +stat 0 17 4.110874 0.000000 924 +quiz 1 16 4.174387 4.174387 990 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +quizz 0 13 4.382027 0.000000 1151 +savitch 1 12 4.465908 4.465908 1269 +criteria 0 9 4.753590 0.000000 1477 +noland 0 5 5.347108 0.000000 2420 +anthoni 0 4 5.568345 0.000000 2792 +toni 1 3 5.857933 5.857933 3415 +textbookproblem 0 3 5.857933 0.000000 3483 +timet 0 3 5.857933 0.000000 3471 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +silva 1 2 6.263398 6.263398 4586 +chamberlin 0 2 6.263398 0.000000 4745 +dsilva 1 1 6.957497 6.957497 8938 +sectioncsm 0 1 6.957497 0.000000 8939 +firstdai 0 1 6.957497 0.000000 8940 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..7540b6b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,433 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 0 178 1.609438 0.000000 35 +oper 0 180 1.609438 0.000000 34 +read 2 154 1.791759 3.583518 47 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +develop 0 174 1.791759 0.000000 53 +process 2 142 1.945910 3.891820 72 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +file 1 132 1.945910 1.945910 70 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +tool 2 117 2.079442 4.158884 93 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +databas 1 122 2.079442 2.079442 86 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +send 1 114 2.197225 2.197225 109 +code 1 108 2.197225 2.197225 116 +instructor 1 108 2.197225 2.197225 107 +well 1 109 2.197225 2.197225 121 +topic 1 114 2.197225 2.197225 110 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +imag 3 91 2.397895 7.193685 161 +graphic 1 90 2.397895 2.397895 147 +grade 1 90 2.397895 2.397895 142 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +follow 1 92 2.397895 2.397895 143 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +exam 2 86 2.484907 4.969814 169 +environ 1 84 2.484907 2.484907 177 +activ 1 84 2.484907 2.484907 182 +novemb 1 81 2.484907 2.484907 179 +start 1 83 2.484907 2.484907 173 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +journal 0 83 2.484907 0.000000 183 +thing 0 84 2.484907 0.000000 189 +learn 0 86 2.484907 0.000000 170 +chang 0 82 2.484907 0.000000 163 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +solut 0 82 2.484907 0.000000 162 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +method 2 80 2.564949 5.129898 213 +homework 2 79 2.564949 5.129898 193 +exampl 1 77 2.564949 2.564949 195 +want 1 79 2.564949 2.564949 199 +mondai 0 77 2.564949 0.000000 206 +good 0 77 2.564949 0.000000 200 +decemb 0 80 2.564949 0.000000 215 +interfac 0 79 2.564949 0.000000 209 +line 1 75 2.639057 2.639057 231 +onlin 1 75 2.639057 2.639057 223 +free 1 73 2.639057 2.639057 224 +appli 1 71 2.639057 2.639057 226 +addit 1 74 2.639057 2.639057 228 +tuesdai 0 73 2.639057 0.000000 219 +html 0 75 2.639057 0.000000 235 +name 0 72 2.639057 0.000000 220 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +differ 1 66 2.708050 2.708050 253 +syllabu 0 67 2.708050 0.000000 247 +order 0 69 2.708050 0.000000 249 +main 0 67 2.708050 0.000000 256 +result 1 65 2.772589 2.772589 281 +handout 1 64 2.772589 2.772589 263 +interact 1 62 2.772589 2.772589 270 +collect 1 65 2.772589 2.772589 268 +wednesdai 0 64 2.772589 0.000000 261 +copi 0 63 2.772589 0.000000 284 +function 0 62 2.772589 0.000000 275 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +experi 0 64 2.772589 0.000000 283 +abstract 0 62 2.772589 0.000000 276 +virtual 0 62 2.772589 0.000000 285 +locat 1 59 2.833213 2.833213 303 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +simpl 0 60 2.833213 0.000000 298 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +unix 0 58 2.890372 0.000000 308 +three 1 54 2.944439 2.944439 330 +found 0 53 2.944439 0.000000 337 +cover 0 55 2.944439 0.000000 329 +talk 0 53 2.944439 0.000000 336 +digit 1 52 2.995732 2.995732 348 +date 0 51 2.995732 0.000000 344 +case 0 51 2.995732 0.000000 351 +finger 0 52 2.995732 0.000000 354 +format 1 48 3.044522 3.044522 356 +basic 1 50 3.044522 3.044522 360 +numer 1 49 3.044522 3.044522 369 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +approach 0 48 3.044522 0.000000 366 +visual 0 48 3.044522 0.000000 372 +get 0 46 3.091042 0.000000 380 +understand 0 47 3.091042 0.000000 384 +done 0 47 3.091042 0.000000 381 +directori 1 45 3.135494 3.135494 396 +execut 1 45 3.135494 3.135494 404 +fridai 0 44 3.135494 0.000000 390 +algebra 0 45 3.135494 0.000000 394 +midterm 0 45 3.135494 0.000000 392 +textbook 0 44 3.135494 0.000000 397 +video 0 44 3.135494 0.000000 405 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +vision 3 41 3.218876 9.656628 430 +might 1 41 3.218876 3.218876 426 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +linear 0 41 3.218876 0.000000 431 +york 0 41 3.218876 0.000000 435 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +howev 0 41 3.218876 0.000000 422 +probabl 0 40 3.258097 0.000000 455 +small 0 39 3.258097 0.000000 447 +origin 1 38 3.295837 3.295837 472 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +hand 1 37 3.332205 3.332205 475 +feel 0 37 3.332205 0.000000 483 +especi 0 36 3.367296 0.000000 496 +print 1 34 3.401197 3.401197 503 +least 1 35 3.401197 3.401197 516 +manual 1 35 3.401197 3.401197 504 +committe 0 34 3.401197 0.000000 522 +eduoffic 1 33 3.433987 3.433987 531 +board 0 33 3.433987 0.000000 528 +product 0 33 3.433987 0.000000 527 +chapter 2 32 3.465736 6.931472 536 +transform 0 32 3.465736 0.000000 542 +idea 0 32 3.465736 0.000000 545 +photo 0 31 3.496508 0.000000 561 +posit 0 31 3.496508 0.000000 552 +titl 0 31 3.496508 0.000000 556 +focu 1 30 3.555348 3.555348 571 +option 0 30 3.555348 0.000000 575 +produc 0 30 3.555348 0.000000 572 +particip 0 29 3.583519 0.000000 589 +except 0 28 3.610918 0.000000 607 +packag 0 28 3.610918 0.000000 614 +held 0 28 3.610918 0.000000 600 +ask 0 28 3.610918 0.000000 597 +determin 0 27 3.637586 0.000000 630 +detect 1 26 3.688879 3.688879 646 +enhanc 1 26 3.688879 3.688879 644 +altern 0 26 3.688879 0.000000 641 +relev 0 26 3.688879 0.000000 637 +fundament 0 25 3.737670 0.000000 661 +hill 0 25 3.737670 0.000000 670 +although 0 25 3.737670 0.000000 667 +pattern 1 24 3.761200 3.761200 689 +motion 0 24 3.761200 0.000000 699 +wish 0 24 3.761200 0.000000 692 +store 0 24 3.761200 0.000000 693 +other 0 24 3.761200 0.000000 697 +displai 1 23 3.806662 3.806662 712 +recognit 0 23 3.806662 0.000000 723 +head 0 23 3.806662 0.000000 732 +compress 0 23 3.806662 0.000000 719 +proof 0 23 3.806662 0.000000 720 +highli 0 23 3.806662 0.000000 725 +miscellan 0 23 3.806662 0.000000 731 +color 1 22 3.850148 3.850148 762 +varieti 1 22 3.850148 3.850148 740 +defin 0 22 3.850148 0.000000 746 +instead 0 22 3.850148 0.000000 756 +william 0 22 3.850148 0.000000 765 +disk 0 22 3.850148 0.000000 747 +sent 0 22 3.850148 0.000000 763 +recommend 0 22 3.850148 0.000000 737 +output 1 21 3.912023 3.912023 788 +tell 0 21 3.912023 0.000000 777 +fact 0 21 3.912023 0.000000 780 +wang 0 21 3.912023 0.000000 790 +sure 1 20 3.951244 3.951244 813 +entir 0 20 3.951244 0.000000 811 +toolkit 0 20 3.951244 0.000000 835 +prerequisit 0 19 4.007333 0.000000 846 +citi 0 19 4.007333 0.000000 874 +comparison 0 19 4.007333 0.000000 863 +boston 0 19 4.007333 0.000000 862 +account 1 18 4.060443 4.060443 882 +dimension 1 18 4.060443 4.060443 909 +four 1 18 4.060443 4.060443 905 +demo 1 18 4.060443 4.060443 888 +segment 1 17 4.110874 4.110874 931 +modif 1 17 4.110874 4.110874 913 +matrix 1 17 4.110874 4.110874 933 +stanford 1 17 4.110874 4.110874 955 +estim 0 17 4.110874 0.000000 930 +regular 0 17 4.110874 0.000000 929 +condit 1 16 4.174387 4.174387 975 +zhang 0 16 4.174387 0.000000 980 +earli 0 16 4.174387 0.000000 968 +sheet 0 16 4.174387 0.000000 973 +track 0 15 4.248495 0.000000 1029 +score 0 15 4.248495 0.000000 1017 +transit 0 15 4.248495 0.000000 1046 +goe 0 15 4.248495 0.000000 1044 +side 0 15 4.248495 0.000000 1022 +rate 0 15 4.248495 0.000000 1037 +doit 2 14 4.317488 8.634976 1111 +chuck 1 14 4.317488 4.317488 1108 +scene 1 14 4.317488 4.317488 1114 +matlab 1 14 4.317488 4.317488 1081 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +consider 0 14 4.317488 0.000000 1076 +command 0 14 4.317488 0.000000 1083 +block 1 13 4.382027 4.382027 1183 +primarili 0 13 4.382027 0.000000 1185 +convert 0 13 4.382027 0.000000 1122 +emac 0 13 4.382027 0.000000 1143 +everyon 0 13 4.382027 0.000000 1148 +shape 1 12 4.465908 4.465908 1245 +calculu 0 12 4.465908 0.000000 1203 +overal 0 12 4.465908 0.000000 1254 +count 0 12 4.465908 0.000000 1239 +optic 0 12 4.465908 0.000000 1221 +realiti 0 12 4.465908 0.000000 1272 +qualit 0 11 4.553877 0.000000 1362 +appl 0 11 4.553877 0.000000 1303 +vista 2 10 4.653960 9.307920 1452 +mosaic 1 10 4.653960 4.653960 1426 +modul 1 10 4.653960 4.653960 1434 +hint 1 10 4.653960 4.653960 1419 +queue 1 10 4.653960 4.653960 1386 +rapid 0 10 4.653960 0.000000 1453 +bring 0 10 4.653960 0.000000 1430 +dyer 1 9 4.753590 4.753590 1573 +face 1 9 4.753590 4.753590 1501 +distanc 1 9 4.753590 4.753590 1500 +recoveri 0 9 4.753590 0.000000 1474 +printer 1 8 4.875197 4.875197 1621 +depth 1 8 4.875197 4.875197 1636 +convers 1 8 4.875197 4.875197 1673 +edg 0 8 4.875197 0.000000 1647 +contrast 0 8 4.875197 0.000000 1637 +job 0 8 4.875197 0.000000 1702 +virginia 0 8 4.875197 0.000000 1659 +shade 1 7 5.010635 5.010635 1881 +stereo 1 7 5.010635 5.010635 1818 +prevent 0 7 5.010635 0.000000 1827 +corner 0 7 5.010635 0.000000 1909 +header 0 7 5.010635 0.000000 1787 +compact 0 7 5.010635 0.000000 1907 +signal 0 7 5.010635 0.000000 1910 +sweden 0 7 5.010635 0.000000 1885 +spline 1 6 5.164786 5.164786 2007 +viewpoint 0 6 5.164786 0.000000 2116 +gzip 0 6 5.164786 0.000000 2117 +invok 0 6 5.164786 0.000000 2079 +classroom 0 6 5.164786 0.000000 2006 +televis 0 6 5.164786 0.000000 2118 +spie 0 6 5.164786 0.000000 2119 +rotat 1 5 5.347108 5.347108 2295 +snake 1 5 5.347108 5.347108 2281 +bryan 0 5 5.347108 0.000000 2421 +jain 0 5 5.347108 0.000000 2332 +mcgraw 0 5 5.347108 0.000000 2262 +adjust 0 5 5.347108 0.000000 2422 +button 0 5 5.347108 0.000000 2337 +constant 0 5 5.347108 0.000000 2251 +multiresolut 0 5 5.347108 0.000000 2423 +sparcstat 0 5 5.347108 0.000000 2406 +shortest 0 5 5.347108 0.000000 2424 +grand 0 5 5.347108 0.000000 2425 +contour 1 4 5.568345 5.568345 2812 +sold 1 4 5.568345 5.568345 2813 +delet 1 4 5.568345 5.568345 2691 +assignmentshomework 0 4 5.568345 0.000000 2721 +shah 0 4 5.568345 0.000000 2814 +thin 1 3 5.857933 5.857933 3488 +pyramid 1 3 5.857933 5.857933 3358 +tran 1 3 5.857933 5.857933 3384 +faq 1 3 5.857933 5.857933 3216 +visionc 0 3 5.857933 0.000000 3489 +histogram 0 3 5.857933 0.000000 3490 +portrait 0 3 5.857933 0.000000 3491 +gradient 0 3 5.857933 0.000000 3479 +surround 0 3 5.857933 0.000000 3492 +suen 0 3 5.857933 0.000000 3446 +toolbox 0 3 5.857933 0.000000 3112 +quicktim 0 3 5.857933 0.000000 3493 +qbic 0 3 5.857933 0.000000 3294 +cardiff 0 3 5.857933 0.000000 3154 +khoro 1 2 6.263398 6.263398 4488 +comm 1 2 6.263398 6.263398 4746 +laser 1 2 6.263398 6.263398 4747 +skeleton 1 2 6.263398 6.263398 4225 +disappear 1 2 6.263398 6.263398 4748 +burt 1 2 6.263398 6.263398 4494 +visionfal 0 2 6.263398 0.000000 4749 +shoulder 0 2 6.263398 0.000000 4750 +altogeth 0 2 6.263398 0.000000 4751 +supplementari 0 2 6.263398 0.000000 4752 +quota 0 2 6.263398 0.000000 4753 +caution 0 2 6.263398 0.000000 4754 +cantata 0 2 6.263398 0.000000 4489 +panoram 0 2 6.263398 0.000000 4755 +royal 0 2 6.263398 0.000000 4756 +adelson 1 1 6.957497 6.957497 8941 +csstelephon 1 1 6.957497 6.957497 8942 +ubyt 1 1 6.957497 6.957497 8943 +imgstar 1 1 6.957497 6.957497 8944 +hdtv 1 1 6.957497 6.957497 8945 +atsc 1 1 6.957497 6.957497 8946 +dyeroffic 0 1 6.957497 0.000000 8947 +appointmentteach 0 1 6.957497 0.000000 8948 +sooffic 0 1 6.957497 0.000000 8949 +appointmentstud 0 1 6.957497 0.000000 8950 +informationfundament 0 1 6.957497 0.000000 8951 +featuredetect 0 1 6.957497 0.000000 8952 +forreconstruct 0 1 6.957497 0.000000 8953 +usingtechniqu 0 1 6.957497 0.000000 8954 +asshap 0 1 6.957497 0.000000 8955 +andocclud 0 1 6.957497 0.000000 8956 +kasturi 0 1 6.957497 0.000000 8957 +schunck 0 1 6.957497 0.000000 8958 +readingsfrom 0 1 6.957497 0.000000 8959 +batchessupplementari 0 1 6.957497 0.000000 8960 +sourcesonlin 0 1 6.957497 0.000000 8961 +informationmost 0 1 6.957497 0.000000 8962 +urlhttp 0 1 6.957497 0.000000 8963 +byfirst 0 1 6.957497 0.000000 8964 +crop 0 1 6.957497 0.000000 8965 +theintens 0 1 6.957497 0.000000 8966 +thewindow 0 1 6.957497 0.000000 8967 +colorif 0 1 6.957497 0.000000 8968 +grayscal 0 1 6.957497 0.000000 8969 +transformationsav 0 1 6.957497 0.000000 8970 +andput 0 1 6.957497 0.000000 8971 +whereth 0 1 6.957497 0.000000 8972 +whatintens 0 1 6.957497 0.000000 8973 +qualityof 0 1 6.957497 0.000000 8974 +ownweb 0 1 6.957497 0.000000 8975 +infin 0 1 6.957497 0.000000 8976 +chessboard 0 1 6.957497 0.000000 8977 +vconvert 0 1 6.957497 0.000000 8978 +clean 0 1 6.957497 0.000000 8979 +repn 0 1 6.957497 0.000000 8980 +component_interp 0 1 6.957497 0.000000 8981 +low_threshold 0 1 6.957497 0.000000 8982 +high_threshold 0 1 6.957497 0.000000 8983 +vlink 0 1 6.957497 0.000000 8984 +vsegedg 0 1 6.957497 0.000000 8985 +laplacian 0 1 6.957497 0.000000 8986 +kass 0 1 6.957497 0.000000 8987 +witkin 0 1 6.957497 0.000000 8988 +terzopoulo 0 1 6.957497 0.000000 8989 +curvatur 0 1 6.957497 0.000000 8990 +laserprint 0 1 6.957497 0.000000 8991 +netpbm 0 1 6.957497 0.000000 8992 +pbmplu 0 1 6.957497 0.000000 8993 +wandel 0 1 6.957497 0.000000 8994 +allianc 0 1 6.957497 0.000000 8995 +panoramix 0 1 6.957497 0.000000 8996 +decfac 0 1 6.957497 0.000000 8997 +synthet 0 1 6.957497 0.000000 8998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..67bb79f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +base 0 165 1.791759 0.000000 50 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +number 0 130 2.079442 0.000000 97 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +section 1 94 2.397895 2.397895 149 +octob 0 89 2.397895 0.000000 156 +novemb 0 81 2.484907 0.000000 179 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +algebra 0 45 3.135494 0.000000 394 +fridai 0 44 3.135494 0.000000 390 +compani 0 41 3.218876 0.000000 423 +known 0 24 3.761200 0.000000 702 +greg 0 24 3.761200 0.000000 695 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +style 0 15 4.248495 0.000000 1036 +dave 1 14 4.317488 4.317488 1098 +readi 0 12 4.465908 0.000000 1242 +informationemail 0 9 4.753590 0.000000 1564 +sharp 1 6 5.164786 5.164786 2100 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +objectivesvectra 0 3 5.857933 0.000000 3410 +homeclass 0 3 5.857933 0.000000 3411 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +consultantssyllabuswork 0 2 6.263398 0.000000 4579 +referenc 0 2 6.263398 0.000000 4757 +dzimm 1 1 6.957497 6.957497 8999 +zimmermannemail 0 1 6.957497 0.000000 9000 +educlass 0 1 6.957497 0.000000 9001 +nolandoffic 0 1 6.957497 0.000000 9002 +announcementsprogram 0 1 6.957497 0.000000 9003 +handoutsprogramsexam 0 1 6.957497 0.000000 9004 +quizzeslectur 0 1 6.957497 0.000000 9005 +notesgreg 0 1 6.957497 0.000000 9006 +guidegrad 0 1 6.957497 0.000000 9007 +quizzesprogramsexam 0 1 6.957497 0.000000 9008 +policytext 0 1 6.957497 0.000000 9009 +zimmermann 0 1 6.957497 0.000000 9010 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..e8f1bc75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +fall 1 181 1.609438 1.609438 40 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +spring 1 131 2.079442 2.079442 88 +instructor 0 108 2.197225 0.000000 107 +theori 0 111 2.197225 0.000000 127 +larg 0 82 2.484907 0.000000 168 +method 1 80 2.564949 2.564949 213 +januari 0 62 2.772589 0.000000 264 +semest 0 58 2.890372 0.000000 312 +variou 0 56 2.890372 0.000000 317 +offer 0 43 3.178054 0.000000 414 +linear 1 41 3.218876 3.218876 431 +michael 0 35 3.401197 0.000000 514 +flow 0 24 3.761200 0.000000 700 +spars 0 16 4.174387 0.000000 989 +nonlinear 1 14 4.317488 4.317488 1107 +mangasarian 0 9 4.753590 0.000000 1570 +ferri 0 8 4.875197 0.000000 1715 +integ 0 8 4.875197 0.000000 1688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..e41e639a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +lectur 0 135 1.945910 0.000000 73 +hall 0 146 1.945910 0.000000 65 +machin 1 129 2.079442 2.079442 95 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +text 1 98 2.302585 2.302585 133 +user 1 104 2.302585 2.302585 137 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +grade 1 90 2.397895 2.397895 142 +question 0 91 2.397895 0.000000 141 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +homework 2 79 2.564949 5.129898 193 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +orient 1 80 2.564949 2.564949 205 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 1 70 2.708050 2.708050 241 +knowledg 0 67 2.708050 0.000000 243 +wednesdai 1 64 2.772589 2.772589 261 +januari 0 62 2.772589 0.000000 264 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +overview 0 56 2.890372 0.000000 323 +local 1 55 2.944439 2.944439 334 +februari 0 54 2.944439 0.000000 328 +allow 0 53 2.944439 0.000000 333 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +set 0 50 3.044522 0.000000 361 +fridai 2 44 3.135494 6.270988 390 +directori 0 45 3.135494 0.000000 396 +linear 1 41 3.218876 3.218876 431 +press 0 42 3.218876 0.000000 419 +examin 0 42 3.218876 0.000000 424 +close 0 38 3.295837 0.000000 465 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +tree 0 36 3.367296 0.000000 492 +michael 0 35 3.401197 0.000000 514 +jame 0 35 3.401197 0.000000 507 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +session 1 26 3.688879 3.688879 643 +repres 0 26 3.688879 0.000000 656 +flow 1 24 3.761200 3.761200 700 +path 1 21 3.912023 3.912023 778 +minut 0 20 3.951244 0.000000 810 +prerequisit 0 19 4.007333 0.000000 846 +thoma 0 18 4.060443 0.000000 901 +appropri 0 18 4.060443 0.000000 883 +previous 0 17 4.110874 0.000000 923 +sheet 0 16 4.174387 0.000000 973 +solari 0 12 4.465908 0.000000 1238 +cycl 0 11 4.553877 0.000000 1335 +login 0 9 4.753590 0.000000 1550 +ferri 1 8 4.875197 4.875197 1715 +prentic 0 7 5.010635 0.000000 1838 +relax 0 6 5.164786 0.000000 2120 +shortest 0 5 5.347108 0.000000 2424 +cshrc 1 4 5.568345 5.568345 2759 +freeman 0 4 5.568345 0.000000 2725 +convex 0 4 5.568345 0.000000 2807 +novic 0 4 5.568345 0.000000 2815 +ahuja 0 3 5.857933 0.000000 3494 +bertseka 0 3 5.857933 0.000000 3477 +lagrangian 0 3 5.857933 0.000000 3478 +gam 1 2 6.263398 6.263398 4758 +leei 0 2 6.263398 0.000000 4759 +equilibria 0 2 6.263398 0.000000 4760 +multicommod 0 2 6.263398 0.000000 4761 +flowsspr 0 1 6.957497 0.000000 9011 +ravindra 0 1 6.957497 0.000000 9012 +magnanti 0 1 6.957497 0.000000 9013 +orlin 0 1 6.957497 0.000000 9014 +chvatal 0 1 6.957497 0.000000 9015 +simplex 0 1 6.957497 0.000000 9016 +alter 0 1 6.957497 0.000000 9017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..72dda265 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +hour 1 165 1.791759 1.791759 46 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +lectur 0 135 1.945910 0.000000 73 +introduct 1 126 2.079442 2.079442 87 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +instructor 1 108 2.197225 2.197225 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +user 1 104 2.302585 2.302585 137 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +grade 1 90 2.397895 2.397895 142 +question 0 91 2.397895 0.000000 141 +second 1 81 2.484907 2.484907 166 +larg 0 82 2.484907 0.000000 168 +ieee 0 86 2.484907 0.000000 190 +homework 2 79 2.564949 5.129898 193 +mondai 2 77 2.564949 5.129898 206 +april 1 77 2.564949 2.564949 196 +method 1 80 2.564949 2.564949 213 +orient 1 80 2.564949 2.564949 205 +optim 0 79 2.564949 0.000000 197 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +januari 0 62 2.772589 0.000000 264 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +direct 0 57 2.890372 0.000000 316 +space 0 57 2.890372 0.000000 310 +overview 0 56 2.890372 0.000000 323 +februari 1 54 2.944439 2.944439 328 +local 0 55 2.944439 0.000000 334 +allow 0 53 2.944439 0.000000 333 +instruct 0 53 2.944439 0.000000 332 +week 1 52 2.995732 2.995732 343 +telephon 1 50 3.044522 3.044522 373 +fridai 2 44 3.135494 6.270988 390 +math 1 44 3.135494 3.135494 402 +textbook 0 44 3.135494 0.000000 397 +press 0 42 3.218876 0.000000 419 +edit 0 42 3.218876 0.000000 418 +linear 0 41 3.218876 0.000000 431 +examin 0 42 3.218876 0.000000 424 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +workstat 0 37 3.332205 0.000000 479 +michael 0 35 3.401197 0.000000 514 +least 0 35 3.401197 0.000000 516 +john 0 33 3.433987 0.000000 532 +storag 0 31 3.496508 0.000000 553 +except 0 28 3.610918 0.000000 607 +held 0 28 3.610918 0.000000 600 +session 1 26 3.688879 3.688879 643 +repres 0 26 3.688879 0.000000 656 +strategi 0 25 3.737670 0.000000 682 +equat 0 23 3.806662 0.000000 724 +recommend 0 22 3.850148 0.000000 737 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +scheme 0 20 3.951244 0.000000 818 +minut 0 20 3.951244 0.000000 810 +prerequisit 0 19 4.007333 0.000000 846 +dimension 0 18 4.060443 0.000000 909 +matrix 1 17 4.110874 4.110874 933 +modif 0 17 4.110874 0.000000 913 +previous 0 17 4.110874 0.000000 923 +spars 1 16 4.174387 4.174387 989 +vector 0 16 4.174387 0.000000 961 +sheet 0 16 4.174387 0.000000 973 +finit 0 14 4.317488 0.000000 1106 +squar 0 14 4.317488 0.000000 1082 +nonlinear 0 14 4.317488 0.000000 1107 +matlab 0 14 4.317488 0.000000 1081 +iter 0 12 4.465908 0.000000 1206 +matric 0 10 4.653960 0.000000 1399 +arithmet 0 10 4.653960 0.000000 1388 +elimin 0 9 4.753590 0.000000 1558 +ferri 1 8 4.875197 4.875197 1715 +solver 0 7 5.010635 0.000000 1911 +oxford 0 6 5.164786 0.000000 2121 +dens 0 6 5.164786 0.000000 2122 +pivot 0 5 5.347108 0.000000 2426 +consent 0 5 5.347108 0.000000 2389 +novic 0 4 5.568345 0.000000 2815 +golub 0 3 5.857933 0.000000 3265 +eigenvalu 0 3 5.857933 0.000000 3364 +eigenvector 0 3 5.857933 0.000000 3365 +systemsspr 0 2 6.263398 0.000000 4762 +leei 0 2 6.263398 0.000000 4759 +loan 0 2 6.263398 0.000000 4147 +gaussian 0 2 6.263398 0.000000 4763 +hopkinsunivers 0 1 6.957497 0.000000 9018 +duff 0 1 6.957497 0.000000 9019 +erisman 0 1 6.957497 0.000000 9020 +reid 0 1 6.957497 0.000000 9021 +halmo 0 1 6.957497 0.000000 9022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..8a07be1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +lectur 1 135 1.945910 1.945910 73 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +note 0 142 1.945910 0.000000 67 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +instructor 0 108 2.197225 0.000000 107 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +grade 0 90 2.397895 0.000000 142 +start 0 83 2.484907 0.000000 173 +mondai 1 77 2.564949 2.564949 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +wednesdai 1 64 2.772589 2.772589 261 +handout 0 64 2.772589 0.000000 263 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +get 0 46 3.091042 0.000000 380 +fridai 1 44 3.135494 3.135494 390 +examin 0 42 3.218876 0.000000 424 +richard 0 31 3.496508 0.000000 559 +charl 1 13 4.382027 4.382027 1149 +benjamin 0 11 4.553877 0.000000 1296 +regularli 0 11 4.553877 0.000000 1338 +tuth 0 9 4.753590 0.000000 1519 +cum 0 8 4.875197 0.000000 1619 +fischer 1 7 5.010635 5.010635 1893 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +krishna 0 3 5.857933 0.000000 3495 +weyer 1 2 6.263398 6.263398 4558 +compilersfal 0 2 6.263398 0.000000 4223 +csst 0 2 6.263398 0.000000 4764 +krisna 0 2 6.263398 0.000000 4765 +kunchithapadam 0 1 6.957497 0.000000 9023 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..87b2e216 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +fall 1 181 1.609438 1.609438 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +object 0 138 1.945910 0.000000 79 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +section 1 94 2.397895 2.397895 149 +solv 0 73 2.639057 0.000000 234 +html 0 75 2.639057 0.000000 235 +polici 0 64 2.772589 0.000000 279 +publish 0 57 2.890372 0.000000 326 +algebra 0 45 3.135494 0.000000 394 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +must 0 40 3.258097 0.000000 442 +eduoffic 0 33 3.433987 0.000000 531 +greg 1 24 3.761200 3.761200 695 +known 0 24 3.761200 0.000000 702 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +informationemail 0 9 4.753590 0.000000 1564 +appt 0 5 5.347108 0.000000 2312 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +grader 0 3 5.857933 0.000000 3165 +krishna 0 3 5.857933 0.000000 3495 +objectivesvectra 0 3 5.857933 0.000000 3410 +policyl 0 3 5.857933 0.000000 3412 +policyacadem 0 3 5.857933 0.000000 3413 +sharpemail 0 2 6.263398 0.000000 4766 +krisna 0 2 6.263398 0.000000 4765 +archivepolici 0 2 6.263398 0.000000 4580 +sharpgreg 0 2 6.263398 0.000000 4767 +kunchithapadamemail 0 1 6.957497 0.000000 9024 +edugener 0 1 6.957497 0.000000 9025 +consultantssyllabuscours 0 1 6.957497 0.000000 9026 +difficultywork 0 1 6.957497 0.000000 9027 +homenewsstartup 0 1 6.957497 0.000000 9028 +informationclass 0 1 6.957497 0.000000 9029 +noteshomeworkexam 0 1 6.957497 0.000000 9030 +quizzesstyl 0 1 6.957497 0.000000 9031 +guideemail 0 1 6.957497 0.000000 9032 +textproblem 0 1 6.957497 0.000000 9033 +porgrammingwalt 0 1 6.957497 0.000000 9034 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..93f1dff2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,160 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +class 2 199 1.609438 3.218876 37 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +read 1 154 1.791759 1.791759 47 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +compil 1 122 2.079442 2.079442 96 +introduct 1 126 2.079442 2.079442 87 +version 1 113 2.197225 2.197225 122 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +code 0 108 2.197225 0.000000 116 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +part 0 98 2.302585 0.000000 129 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +comment 0 93 2.397895 0.000000 146 +mani 0 92 2.397895 0.000000 150 +chang 1 82 2.484907 2.484907 163 +help 0 83 2.484907 0.000000 175 +start 0 83 2.484907 0.000000 173 +requir 0 81 2.484907 0.000000 167 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +second 0 81 2.484907 0.000000 166 +want 1 79 2.564949 2.564949 199 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +name 1 72 2.639057 2.639057 220 +line 1 75 2.639057 2.639057 231 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +test 1 66 2.708050 2.708050 252 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +copi 1 63 2.772589 2.772589 284 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +descript 0 64 2.772589 0.000000 271 +handout 0 64 2.772589 0.000000 263 +type 1 61 2.833213 2.833213 296 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +sampl 1 53 2.944439 2.944439 339 +suggest 0 53 2.944439 0.000000 331 +basic 0 50 3.044522 0.000000 360 +understand 0 47 3.091042 0.000000 384 +directori 1 45 3.135494 3.135494 396 +algebra 0 45 3.135494 0.000000 394 +midterm 0 45 3.135494 0.000000 392 +answer 0 45 3.135494 0.000000 391 +might 0 41 3.218876 0.000000 426 +compani 0 41 3.218876 0.000000 423 +announc 0 40 3.258097 0.000000 441 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +mean 1 37 3.332205 3.332205 477 +either 0 35 3.401197 0.000000 506 +within 0 33 3.433987 0.000000 525 +ad 0 32 3.465736 0.000000 544 +someth 0 31 3.496508 0.000000 554 +hard 0 30 3.555348 0.000000 563 +great 0 27 3.637586 0.000000 626 +administr 0 27 3.637586 0.000000 628 +consist 1 26 3.688879 3.688879 651 +valu 0 25 3.737670 0.000000 665 +consult 0 24 3.761200 0.000000 687 +togeth 0 23 3.806662 0.000000 714 +defin 0 22 3.850148 0.000000 746 +tent 0 22 3.850148 0.000000 739 +sure 0 20 3.951244 0.000000 813 +entir 0 20 3.951244 0.000000 811 +fine 0 20 3.951244 0.000000 822 +prepar 0 20 3.951244 0.000000 824 +definit 0 19 4.007333 0.000000 864 +attend 0 18 4.060443 0.000000 893 +attempt 0 17 4.110874 0.000000 917 +walter 0 17 4.110874 0.000000 950 +earli 0 16 4.174387 0.000000 968 +weslei 0 16 4.174387 0.000000 983 +misconduct 0 16 4.174387 0.000000 1003 +piec 0 15 4.248495 0.000000 1020 +style 0 15 4.248495 0.000000 1036 +shown 0 14 4.317488 0.000000 1080 +borland 0 14 4.317488 0.000000 1067 +everyth 1 13 4.382027 4.382027 1169 +quizz 1 13 4.382027 4.382027 1151 +bodi 0 13 4.382027 0.000000 1178 +outsid 0 12 4.465908 0.000000 1219 +insid 0 12 4.465908 0.000000 1262 +readi 0 12 4.465908 0.000000 1242 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +vectra 0 12 4.465908 0.000000 1267 +tue 0 11 4.553877 0.000000 1308 +chri 0 11 4.553877 0.000000 1311 +noth 0 11 4.553877 0.000000 1328 +extra 0 11 4.553877 0.000000 1312 +correspond 0 10 4.653960 0.000000 1382 +bring 0 10 4.653960 0.000000 1430 +declar 0 9 4.753590 0.000000 1526 +lane 1 8 4.875197 4.875197 1720 +matter 0 8 4.875197 0.000000 1627 +bottom 0 7 5.010635 0.000000 1906 +throughout 0 7 5.010635 0.000000 1871 +sharp 1 6 5.164786 5.164786 2100 +recogn 0 5 5.347108 0.000000 2302 +crucial 0 5 5.347108 0.000000 2384 +prog 2 4 5.568345 11.136690 2740 +shouldn 0 4 5.568345 0.000000 2606 +thumb 0 4 5.568345 0.000000 2816 +enumer 1 3 5.857933 5.857933 3244 +privat 1 3 5.857933 5.857933 3496 +bump 0 3 5.857933 0.000000 3497 +obsolet 0 3 5.857933 0.000000 3196 +freshman 0 3 5.857933 0.000000 3462 +chad 1 2 6.263398 6.263398 4768 +forgot 0 2 6.263398 0.000000 4769 +weaver 0 2 6.263398 0.000000 4770 +freshmen 0 2 6.263398 0.000000 4554 +disregard 0 2 6.263398 0.000000 4189 +tribbl 1 1 6.957497 6.957497 9035 +randomintinrang 0 1 6.957497 0.000000 9036 +uppercas 0 1 6.957497 0.000000 9037 +overwrit 0 1 6.957497 0.000000 9038 +discrep 0 1 6.957497 0.000000 9039 +solutionscours 0 1 6.957497 0.000000 9040 +vleck 0 1 6.957497 0.000000 9041 +guidelast 0 1 6.957497 0.000000 9042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..3072b448 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +hour 1 165 1.791759 1.791759 46 +read 0 154 1.791759 0.000000 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +techniqu 0 99 2.302585 0.000000 138 +grade 1 90 2.397895 2.397895 142 +octob 0 89 2.397895 0.000000 156 +librari 0 87 2.484907 0.000000 181 +exam 0 86 2.484907 0.000000 169 +start 0 83 2.484907 0.000000 173 +mondai 0 77 2.564949 0.000000 206 +homework 0 79 2.564949 0.000000 193 +tuesdai 0 73 2.639057 0.000000 219 +wednesdai 0 64 2.772589 0.000000 261 +polici 0 64 2.772589 0.000000 279 +overview 0 56 2.890372 0.000000 323 +date 0 51 2.995732 0.000000 344 +telephon 1 50 3.044522 3.044522 373 +appoint 1 49 3.044522 3.044522 358 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +fridai 0 44 3.135494 0.000000 390 +examin 0 42 3.218876 0.000000 424 +late 0 40 3.258097 0.000000 439 +comp 0 26 3.688879 0.000000 650 +reserv 0 20 3.951244 0.000000 808 +stat 0 17 4.110874 0.000000 924 +month 0 15 4.248495 0.000000 1025 +psycholog 0 15 4.248495 0.000000 1054 +susan 0 15 4.248495 0.000000 1050 +stori 0 14 4.317488 0.000000 1087 +regularli 0 11 4.553877 0.000000 1338 +wendt 0 10 4.653960 0.000000 1446 +tuth 0 9 4.753590 0.000000 1519 +recit 0 9 4.753590 0.000000 1475 +fischer 0 7 5.010635 0.000000 1893 +horwitz 1 5 5.347108 5.347108 2411 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +ullman 0 4 5.568345 0.000000 2749 +rahul 1 3 5.857933 5.857933 3464 +compilersspr 0 2 6.263398 0.000000 4700 +kapoor 0 2 6.263398 0.000000 4701 +sethi 0 2 6.263398 0.000000 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..26700731 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +text 0 98 2.302585 0.000000 133 +grade 1 90 2.397895 2.397895 142 +section 0 94 2.397895 0.000000 149 +academ 0 82 2.484907 0.000000 178 +mondai 0 77 2.564949 0.000000 206 +solv 0 73 2.639057 0.000000 234 +window 1 68 2.708050 2.708050 242 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +semest 0 58 2.890372 0.000000 312 +announc 0 40 3.258097 0.000000 441 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +statist 1 35 3.401197 3.401197 521 +tent 0 22 3.850148 0.000000 739 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +savitch 1 12 4.465908 4.465908 1269 +criteria 0 9 4.753590 0.000000 1477 +hummert 1 3 5.857933 5.857933 3416 +psych 1 3 5.857933 5.857933 3498 +textbookproblem 0 3 5.857933 0.000000 3483 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +sectionsc 0 1 6.957497 0.000000 9043 +viewgraph 0 1 6.957497 0.000000 9044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..be15cee2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +madison 0 165 1.791759 0.000000 55 +click 1 142 1.945910 1.945910 78 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +question 0 91 2.397895 0.000000 141 +grade 0 90 2.397895 0.000000 142 +info 0 85 2.484907 0.000000 176 +thing 0 84 2.484907 0.000000 189 +mondai 0 77 2.564949 0.000000 206 +name 0 72 2.639057 0.000000 220 +thursdai 0 70 2.708050 0.000000 241 +copi 0 63 2.772589 0.000000 284 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +keep 0 44 3.135494 0.000000 409 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +mind 0 27 3.637586 0.000000 632 +stop 0 17 4.110874 0.000000 942 +choos 0 16 4.174387 0.000000 964 +doit 0 14 4.317488 0.000000 1111 +menu 0 13 4.382027 0.000000 1156 +bodner 0 5 5.347108 0.000000 2401 +eduher 0 3 5.857933 0.000000 3499 +jonb 1 2 6.263398 6.263398 4771 +infoc 0 2 6.263398 0.000000 4772 +mound 0 2 6.263398 0.000000 4773 +bodnersect 0 1 6.957497 0.000000 9045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..b2da2854 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +network 0 168 1.791759 0.000000 61 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +like 0 132 1.945910 0.000000 81 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +introduct 1 126 2.079442 2.079442 87 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +code 0 108 2.197225 0.000000 116 +teach 0 108 2.197225 0.000000 112 +book 1 99 2.302585 2.302585 131 +section 1 94 2.397895 2.397895 149 +search 1 95 2.397895 2.397895 155 +sinc 1 90 2.397895 2.397895 159 +grade 0 90 2.397895 0.000000 142 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +question 0 91 2.397895 0.000000 141 +exam 2 86 2.484907 4.969814 169 +learn 0 86 2.484907 0.000000 170 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +refer 1 78 2.564949 2.564949 203 +appear 0 78 2.564949 0.000000 210 +line 1 75 2.639057 2.639057 231 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +artifici 1 63 2.772589 2.772589 280 +previou 1 62 2.772589 2.772589 290 +plan 0 65 2.772589 0.000000 272 +plai 0 60 2.833213 0.000000 307 +best 0 59 2.833213 0.000000 299 +detail 0 57 2.890372 0.000000 321 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +cover 0 55 2.944439 0.000000 329 +appoint 0 49 3.044522 0.000000 358 +approach 0 48 3.044522 0.000000 366 +give 0 50 3.044522 0.000000 359 +still 0 50 3.044522 0.000000 362 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +directori 1 45 3.135494 3.135494 396 +discuss 1 45 3.135494 3.135494 399 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +textbook 0 44 3.135494 0.000000 397 +answer 0 45 3.135494 0.000000 391 +edit 0 42 3.218876 0.000000 418 +review 0 42 3.218876 0.000000 425 +late 1 40 3.258097 3.258097 439 +probabl 1 40 3.258097 3.258097 455 +game 0 36 3.367296 0.000000 498 +eduoffic 0 33 3.433987 0.000000 531 +within 0 33 3.433987 0.000000 525 +common 1 30 3.555348 3.555348 574 +neural 0 30 3.555348 0.000000 578 +turn 0 29 3.583519 0.000000 586 +usual 0 28 3.610918 0.000000 608 +session 0 26 3.688879 0.000000 643 +notic 0 25 3.737670 0.000000 675 +begin 0 23 3.806662 0.000000 716 +entir 0 20 3.951244 0.000000 811 +minut 0 20 3.951244 0.000000 810 +lisp 2 18 4.060443 8.120886 897 +four 0 18 4.060443 0.000000 905 +attend 0 18 4.060443 0.000000 893 +intro 0 17 4.110874 0.000000 915 +stat 0 17 4.110874 0.000000 924 +ultim 0 17 4.110874 0.000000 943 +modern 0 16 4.174387 0.000000 966 +later 0 15 4.248495 0.000000 1043 +prolog 0 13 4.382027 0.000000 1155 +count 1 12 4.465908 4.465908 1239 +deduct 0 12 4.465908 0.000000 1236 +alpha 0 11 4.553877 0.000000 1348 +engr 0 10 4.653960 0.000000 1427 +hint 0 10 4.653960 0.000000 1419 +recit 1 9 4.753590 4.753590 1475 +russel 0 9 4.753590 0.000000 1507 +beta 0 6 5.164786 0.000000 1993 +supplement 0 5 5.347108 0.000000 2355 +older 0 5 5.347108 0.000000 2387 +midnight 0 4 5.568345 0.000000 2599 +uncertain 0 4 5.568345 0.000000 2758 +graham 0 4 5.568345 0.000000 2817 +steel 0 4 5.568345 0.000000 2818 +kunen 1 3 5.857933 5.857933 3500 +ansi 0 3 5.857933 0.000000 3198 +psych 0 3 5.857933 0.000000 3498 +coursewil 1 2 6.263398 6.263398 4718 +loos 0 2 6.263398 0.000000 4774 +buti 0 2 6.263398 0.000000 4775 +assignmenti 0 2 6.263398 0.000000 4573 +sun 0 2 6.263398 0.000000 4490 +kunenoffic 0 1 6.957497 0.000000 9046 +buildingtelephon 0 1 6.957497 0.000000 9047 +thirdexam 0 1 6.957497 0.000000 9048 +thedai 0 1 6.957497 0.000000 9049 +usefulto 0 1 6.957497 0.000000 9050 +lecturesand 0 1 6.957497 0.000000 9051 +manypaperback 0 1 6.957497 0.000000 9052 +lispcraft 0 1 6.957497 0.000000 9053 +wilenski 0 1 6.957497 0.000000 9054 +norvig 0 1 6.957497 0.000000 9055 +essentiallli 0 1 6.957497 0.000000 9056 +alpha_beta 0 1 6.957497 0.000000 9057 +astar 0 1 6.957497 0.000000 9058 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..a1bd7ceb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +parallel 1 169 1.791759 1.791759 60 +hour 1 165 1.791759 1.791759 46 +wisconsin 0 169 1.791759 0.000000 54 +assign 2 135 1.945910 3.891820 66 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +spring 1 131 2.079442 2.079442 88 +introduct 0 126 2.079442 0.000000 87 +report 0 131 2.079442 0.000000 92 +instructor 0 108 2.197225 0.000000 107 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +part 1 98 2.302585 2.302585 129 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +question 0 91 2.397895 0.000000 141 +solut 1 82 2.484907 2.484907 162 +level 1 87 2.484907 2.484907 180 +novemb 0 81 2.484907 0.000000 179 +exam 0 86 2.484907 0.000000 169 +wide 0 84 2.484907 0.000000 185 +homework 2 79 2.564949 5.129898 193 +decemb 1 80 2.564949 2.564949 215 +mondai 0 77 2.564949 0.000000 206 +sourc 0 77 2.564949 0.000000 201 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +content 1 59 2.833213 2.833213 302 +instruct 1 53 2.944439 2.944439 332 +talk 1 53 2.944439 2.944439 336 +tabl 1 51 2.995732 2.995732 346 +appoint 1 49 3.044522 3.044522 358 +give 1 50 3.044522 3.044522 359 +basic 1 50 3.044522 3.044522 360 +set 0 50 3.044522 0.000000 361 +mark 0 44 3.135494 0.000000 403 +fridai 0 44 3.135494 0.000000 390 +midterm 0 45 3.135494 0.000000 392 +offer 0 43 3.178054 0.000000 414 +cach 0 41 3.218876 0.000000 432 +edit 0 42 3.218876 0.000000 418 +seminar 0 38 3.295837 0.000000 470 +cost 0 37 3.332205 0.000000 480 +eduoffic 1 33 3.433987 3.433987 531 +chapter 2 32 3.465736 6.931472 536 +hard 0 30 3.555348 0.000000 563 +full 1 28 3.610918 3.610918 615 +propos 0 28 3.610918 0.000000 602 +comp 1 26 3.688879 3.688879 650 +hill 0 25 3.737670 0.000000 670 +input 0 23 3.806662 0.000000 727 +output 0 21 3.912023 0.000000 788 +interconnect 0 17 4.110874 0.000000 937 +doit 1 14 4.317488 4.317488 1111 +reader 1 12 4.465908 4.465908 1246 +patterson 0 9 4.753590 0.000000 1554 +qualifi 0 8 4.875197 0.000000 1721 +pipelin 1 7 5.010635 5.010635 1830 +hennessi 0 5 5.347108 0.000000 2289 +markhil 0 4 5.568345 0.000000 2819 +talluri 0 4 5.568345 0.000000 2820 +ifal 0 2 6.263398 0.000000 4776 +statphon 0 2 6.263398 0.000000 4726 +hilloffic 0 1 6.957497 0.000000 9059 +statemail 0 1 6.957497 0.000000 9060 +shenoffic 0 1 6.957497 0.000000 9061 +mshen 0 1 6.957497 0.000000 9062 +miscellaneawhat 0 1 6.957497 0.000000 9063 +talksread 0 1 6.957497 0.000000 9064 +solutionproject 0 1 6.957497 0.000000 9065 +noonmiscellanea 0 1 6.957497 0.000000 9066 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..6eb6c9df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +assign 1 135 1.945910 1.945910 66 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +introduct 1 126 2.079442 2.079442 87 +number 0 130 2.079442 0.000000 97 +instructor 0 108 2.197225 0.000000 107 +structur 0 106 2.197225 0.000000 105 +text 0 98 2.302585 0.000000 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +academ 0 82 2.484907 0.000000 178 +exampl 1 77 2.564949 2.564949 195 +dynam 1 76 2.564949 2.564949 194 +solv 0 73 2.639057 0.000000 234 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +complex 1 64 2.772589 2.772589 269 +polici 0 64 2.772589 0.000000 279 +room 0 59 2.833213 0.000000 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +appoint 0 49 3.044522 0.000000 358 +standard 0 48 3.044522 0.000000 365 +anoth 0 45 3.135494 0.000000 408 +late 0 40 3.258097 0.000000 439 +tutori 0 39 3.258097 0.000000 437 +microsoft 0 38 3.295837 0.000000 468 +procedur 0 36 3.367296 0.000000 488 +michael 0 35 3.401197 0.000000 514 +administr 0 27 3.637586 0.000000 628 +comp 1 26 3.688879 3.688879 650 +repres 0 26 3.688879 0.000000 656 +consult 0 24 3.761200 0.000000 687 +size 0 23 3.806662 0.000000 713 +alloc 1 20 3.951244 3.951244 821 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +psycholog 0 15 4.248495 0.000000 1054 +borland 0 14 4.317488 0.000000 1067 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +string 0 11 4.553877 0.000000 1340 +cheat 0 10 4.653960 0.000000 1395 +float 0 9 4.753590 0.000000 1504 +debugg 0 9 4.753590 0.000000 1493 +overload 1 5 5.347108 5.347108 2403 +handin 0 5 5.347108 0.000000 2393 +ration 0 5 5.347108 0.000000 2427 +birk 0 4 5.568345 0.000000 2791 +struct 0 4 5.568345 0.000000 2821 +mbirk 1 3 5.857933 5.857933 3501 +intstack 0 2 6.263398 0.000000 4777 +unlimit 0 2 6.263398 0.000000 4778 +classinfo 0 2 6.263398 0.000000 4779 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..82e75ff5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +avail 1 169 1.791759 1.791759 48 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +data 0 170 1.791759 0.000000 49 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +text 0 98 2.302585 0.000000 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +octob 0 89 2.397895 0.000000 156 +exam 1 86 2.484907 2.484907 169 +second 0 81 2.484907 0.000000 166 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +exampl 1 77 2.564949 2.564949 195 +dynam 1 76 2.564949 2.564949 194 +mondai 0 77 2.564949 0.000000 206 +come 0 78 2.564949 0.000000 202 +onlin 0 75 2.639057 0.000000 223 +meet 0 72 2.639057 0.000000 229 +solv 0 73 2.639057 0.000000 234 +tuesdai 0 73 2.639057 0.000000 219 +syllabu 1 67 2.708050 2.708050 247 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +window 0 68 2.708050 0.000000 242 +polici 1 64 2.772589 2.772589 279 +complex 1 64 2.772589 2.772589 269 +room 1 59 2.833213 2.833213 301 +simpl 0 60 2.833213 0.000000 298 +point 0 58 2.890372 0.000000 319 +instruct 1 53 2.944439 2.944439 332 +week 1 52 2.995732 2.995732 343 +case 0 51 2.995732 0.000000 351 +digit 0 52 2.995732 0.000000 348 +appoint 0 49 3.044522 0.000000 358 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +discuss 0 45 3.135494 0.000000 399 +anoth 0 45 3.135494 0.000000 408 +past 0 42 3.218876 0.000000 428 +late 1 40 3.258097 3.258097 439 +announc 0 40 3.258097 0.000000 441 +tutori 0 39 3.258097 0.000000 437 +microsoft 0 38 3.295837 0.000000 468 +soon 0 36 3.367296 0.000000 494 +procedur 0 36 3.367296 0.000000 488 +michael 0 35 3.401197 0.000000 514 +print 0 34 3.401197 0.000000 503 +administr 0 27 3.637586 0.000000 628 +comp 1 26 3.688879 3.688879 650 +repres 0 26 3.688879 0.000000 656 +consult 0 24 3.761200 0.000000 687 +size 0 23 3.806662 0.000000 713 +output 0 21 3.912023 0.000000 788 +alloc 1 20 3.951244 3.951244 821 +four 0 18 4.060443 0.000000 905 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +rank 0 14 4.317488 0.000000 1063 +borland 0 14 4.317488 0.000000 1067 +outsid 0 12 4.465908 0.000000 1219 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +eight 1 11 4.553877 4.553877 1331 +string 0 11 4.553877 0.000000 1340 +cheat 0 10 4.653960 0.000000 1395 +float 0 9 4.753590 0.000000 1504 +debugg 0 9 4.753590 0.000000 1493 +overload 1 5 5.347108 5.347108 2403 +handin 0 5 5.347108 0.000000 2393 +ration 0 5 5.347108 0.000000 2427 +birk 0 4 5.568345 0.000000 2791 +struct 0 4 5.568345 0.000000 2821 +mbirk 1 3 5.857933 5.857933 3501 +intstack 0 2 6.263398 0.000000 4777 +unlimit 0 2 6.263398 0.000000 4778 +classinfo 0 2 6.263398 0.000000 4779 +dice 0 1 6.957497 0.000000 9067 +hangman 0 1 6.957497 0.000000 9068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..fac82207 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +assign 1 135 1.945910 1.945910 66 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +text 0 98 2.302585 0.000000 133 +section 2 94 2.397895 4.795790 149 +grade 0 90 2.397895 0.000000 142 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +help 1 83 2.484907 2.484907 175 +chang 0 82 2.484907 0.000000 163 +start 0 83 2.484907 0.000000 173 +academ 0 82 2.484907 0.000000 178 +solut 0 82 2.484907 0.000000 162 +refer 0 78 2.564949 0.000000 203 +david 1 71 2.639057 2.639057 232 +meet 1 72 2.639057 2.639057 229 +solv 0 73 2.639057 0.000000 234 +materi 0 75 2.639057 0.000000 221 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +handout 0 64 2.772589 0.000000 263 +share 0 59 2.833213 0.000000 304 +archiv 1 49 3.044522 3.044522 364 +frequent 0 49 3.044522 0.000000 367 +get 0 46 3.091042 0.000000 380 +press 0 42 3.218876 0.000000 419 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +statist 0 35 3.401197 0.000000 521 +often 0 31 3.496508 0.000000 551 +rule 0 26 3.688879 0.000000 638 +daili 0 24 3.761200 0.000000 706 +consult 0 24 3.761200 0.000000 687 +walter 0 17 4.110874 0.000000 950 +alreadi 0 16 4.174387 0.000000 963 +misconduct 0 16 4.174387 0.000000 1003 +psycholog 0 15 4.248495 0.000000 1054 +dave 0 14 4.317488 0.000000 1098 +floor 0 14 4.317488 0.000000 1070 +essenti 0 13 4.382027 0.000000 1137 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +andth 0 9 4.753590 0.000000 1481 +tutor 0 9 4.753590 0.000000 1552 +reload 0 8 4.875197 0.000000 1682 +attach 0 7 5.010635 0.000000 1785 +rough 0 6 5.164786 0.000000 2107 +button 0 5 5.347108 0.000000 2337 +noland 0 5 5.347108 0.000000 2420 +thumb 0 4 5.568345 0.000000 2816 +melski 1 2 6.263398 6.263398 4780 +melskithes 0 1 6.957497 0.000000 9069 +uncomfort 0 1 6.957497 0.000000 9070 +wesleypublish 0 1 6.957497 0.000000 9071 +usingborland 0 1 6.957497 0.000000 9072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..695abe25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +class 2 199 1.609438 3.218876 37 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +updat 0 191 1.609438 0.000000 41 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +file 1 132 1.945910 1.945910 70 +assign 0 135 1.945910 0.000000 66 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +introduct 0 126 2.079442 0.000000 87 +code 2 108 2.197225 4.394450 116 +check 1 115 2.197225 2.197225 118 +version 1 113 2.197225 2.197225 122 +instructor 0 108 2.197225 0.000000 107 +place 0 106 2.197225 0.000000 124 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +octob 2 89 2.397895 4.795790 156 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +grade 1 90 2.397895 2.397895 142 +novemb 2 81 2.484907 4.969814 179 +solut 2 82 2.484907 4.969814 162 +exam 1 86 2.484907 2.484907 169 +chang 0 82 2.484907 0.000000 163 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +good 0 77 2.564949 0.000000 200 +html 1 75 2.639057 2.639057 235 +tuesdai 1 73 2.639057 2.639057 219 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +test 0 66 2.708050 0.000000 252 +syllabu 0 67 2.708050 0.000000 247 +main 0 67 2.708050 0.000000 256 +wednesdai 2 64 2.772589 5.545178 261 +septemb 1 65 2.772589 2.772589 274 +guid 1 63 2.772589 2.772589 267 +function 1 62 2.772589 2.772589 275 +creat 0 63 2.772589 0.000000 277 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +publish 0 57 2.890372 0.000000 326 +case 0 51 2.995732 0.000000 351 +standard 1 48 3.044522 3.044522 365 +appoint 0 49 3.044522 0.000000 358 +frequent 0 49 3.044522 0.000000 367 +fridai 1 44 3.135494 3.135494 390 +algebra 0 45 3.135494 0.000000 394 +textbook 0 44 3.135494 0.000000 397 +compani 0 41 3.218876 0.000000 423 +form 0 39 3.258097 0.000000 443 +respons 0 37 3.332205 0.000000 476 +survei 0 35 3.401197 0.000000 513 +eduoffic 0 33 3.433987 0.000000 531 +ad 1 32 3.465736 3.465736 544 +given 0 32 3.465736 0.000000 538 +often 0 31 3.496508 0.000000 551 +titl 0 31 3.496508 0.000000 556 +specifi 0 30 3.555348 0.000000 568 +turn 0 29 3.583519 0.000000 586 +releas 0 28 3.610918 0.000000 616 +todai 0 25 3.737670 0.000000 672 +valu 0 25 3.737670 0.000000 665 +consult 1 24 3.761200 3.761200 687 +known 0 24 3.761200 0.000000 702 +input 1 23 3.806662 3.806662 727 +martin 1 21 3.912023 3.912023 794 +latest 0 21 3.912023 0.000000 785 +sure 0 20 3.951244 0.000000 813 +coupl 0 17 4.110874 0.000000 939 +walter 0 17 4.110874 0.000000 950 +quiz 2 16 4.174387 8.348774 990 +sheet 0 16 4.174387 0.000000 973 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +score 2 15 4.248495 8.496990 1017 +style 0 15 4.248495 0.000000 1036 +introduc 0 13 4.382027 0.000000 1139 +onth 0 12 4.465908 0.000000 1218 +vectra 0 12 4.465908 0.000000 1267 +errata 0 10 4.653960 0.000000 1403 +minimum 0 9 4.753590 0.000000 1555 +remind 0 7 5.010635 0.000000 1799 +ethic 0 7 5.010635 0.000000 1786 +savitchaddison 0 5 5.347108 0.000000 2396 +struct 0 4 5.568345 0.000000 2821 +maximum 0 4 5.568345 0.000000 2632 +toth 0 4 5.568345 0.000000 2595 +beginn 0 3 5.857933 0.000000 3330 +milo 1 2 6.263398 6.263398 4781 +viru 0 2 6.263398 0.000000 4782 +bankaccount 2 1 6.957497 13.914994 9073 +withprompt 1 1 6.957497 6.957497 9074 +psychologyinstructor 0 1 6.957497 0.000000 9075 +announcementsthi 0 1 6.957497 0.000000 9076 +scheduledfor 0 1 6.957497 0.000000 9077 +haseveryth 0 1 6.957497 0.000000 9078 +isaccur 0 1 6.957497 0.000000 9079 +withinform 0 1 6.957497 0.000000 9080 +linksar 0 1 6.957497 0.000000 9081 +onfridai 0 1 6.957497 0.000000 9082 +policyclass 0 1 6.957497 0.000000 9083 +minmax 0 1 6.957497 0.000000 9084 +findth 0 1 6.957497 0.000000 9085 +enteredfrom 0 1 6.957497 0.000000 9086 +stdin 0 1 6.957497 0.000000 9087 +formlett 0 1 6.957497 0.000000 9088 +theopen_fil 0 1 6.957497 0.000000 9089 +hoax 0 1 6.957497 0.000000 9090 +andprofession 0 1 6.957497 0.000000 9091 +conductassign 0 1 6.957497 0.000000 9092 +questionar 0 1 6.957497 0.000000 9093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..3a78b8f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +code 0 108 2.197225 0.000000 116 +instructor 0 108 2.197225 0.000000 107 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +exam 1 86 2.484907 2.484907 169 +contain 0 81 2.484907 0.000000 174 +april 1 77 2.564949 2.564949 196 +messag 0 76 2.564949 0.000000 212 +tuesdai 2 73 2.639057 5.278114 219 +solv 0 73 2.639057 0.000000 234 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +thursdai 1 70 2.708050 2.708050 241 +window 0 68 2.708050 0.000000 242 +syllabu 0 67 2.708050 0.000000 247 +januari 0 62 2.772589 0.000000 264 +polici 0 64 2.772589 0.000000 279 +march 1 61 2.833213 2.833213 295 +share 0 59 2.833213 0.000000 304 +semest 1 58 2.890372 2.890372 312 +detail 0 57 2.890372 0.000000 321 +februari 1 54 2.944439 2.944439 328 +talk 0 53 2.944439 0.000000 336 +run 0 51 2.995732 0.000000 347 +appoint 0 49 3.044522 0.000000 358 +algebra 0 45 3.135494 0.000000 394 +made 0 44 3.135494 0.000000 398 +late 0 40 3.258097 0.000000 439 +statist 0 35 3.401197 0.000000 521 +rule 0 26 3.688879 0.000000 638 +sent 0 22 3.850148 0.000000 763 +tent 0 22 3.850148 0.000000 739 +martin 1 21 3.912023 3.912023 794 +walter 0 17 4.110874 0.000000 950 +former 0 17 4.110874 0.000000 956 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +extra 0 11 4.553877 0.000000 1312 +criteria 0 9 4.753590 0.000000 1477 +absolut 0 8 4.875197 0.000000 1646 +calendar 0 8 4.875197 0.000000 1649 +noland 1 5 5.347108 5.347108 2420 +informationc 0 5 5.347108 0.000000 2394 +thumb 0 4 5.568345 0.000000 2816 +textbookproblem 0 3 5.857933 0.000000 3483 +ream 1 2 6.263398 6.263398 4783 +mream 1 2 6.263398 6.263398 4784 +csst 0 2 6.263398 0.000000 4764 +classc 0 1 6.957497 0.000000 9094 +programmingspr 0 1 6.957497 0.000000 9095 +nothingeverydai 0 1 6.957497 0.000000 9096 +pagescommon 0 1 6.957497 0.000000 9097 +programmingmistakesarch 0 1 6.957497 0.000000 9098 +placeto 0 1 6.957497 0.000000 9099 +announcedcours 0 1 6.957497 0.000000 9100 +andborland 0 1 6.957497 0.000000 9101 +academicmisconduct 0 1 6.957497 0.000000 9102 +anyform 0 1 6.957497 0.000000 9103 +bigtodd 0 1 6.957497 0.000000 9104 +thielwendi 0 1 6.957497 0.000000 9105 +staatsabout 0 1 6.957497 0.000000 9106 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..90996168 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +read 1 154 1.791759 1.791759 47 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +note 1 142 1.945910 1.945910 67 +object 0 138 1.945910 0.000000 79 +check 1 115 2.197225 2.197225 118 +pleas 0 113 2.197225 0.000000 114 +find 0 111 2.197225 0.000000 111 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +novemb 1 81 2.484907 2.484907 179 +stuff 0 87 2.484907 0.000000 171 +academ 0 82 2.484907 0.000000 178 +exampl 1 77 2.564949 2.564949 195 +refer 0 78 2.564949 0.000000 203 +tuesdai 1 73 2.639057 2.639057 219 +line 0 75 2.639057 0.000000 231 +solv 0 73 2.639057 0.000000 234 +syllabu 0 67 2.708050 0.000000 247 +polici 1 64 2.772589 2.772589 279 +import 0 65 2.772589 0.000000 282 +function 0 62 2.772589 0.000000 275 +publish 0 57 2.890372 0.000000 326 +sampl 0 53 2.944439 0.000000 339 +week 0 52 2.995732 0.000000 343 +appoint 0 49 3.044522 0.000000 358 +understand 0 47 3.091042 0.000000 384 +algebra 0 45 3.135494 0.000000 394 +even 0 45 3.135494 0.000000 393 +compani 0 41 3.218876 0.000000 423 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +hand 0 37 3.332205 0.000000 475 +eduoffic 0 33 3.433987 0.000000 531 +extend 0 32 3.465736 0.000000 539 +taken 0 31 3.496508 0.000000 555 +comp 1 26 3.688879 3.688879 650 +mike 1 24 3.761200 3.761200 703 +greg 0 24 3.761200 0.000000 695 +known 0 24 3.761200 0.000000 702 +miss 0 19 4.007333 0.000000 866 +stat 1 17 4.110874 4.110874 924 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +weslei 0 16 4.174387 0.000000 983 +near 0 14 4.317488 0.000000 1091 +borland 0 14 4.317488 0.000000 1067 +everyth 0 13 4.382027 0.000000 1169 +rememb 0 12 4.465908 0.000000 1217 +vectra 0 12 4.465908 0.000000 1267 +fill 0 11 4.553877 0.000000 1349 +deadlin 1 9 4.753590 4.753590 1502 +didn 0 9 4.753590 0.000000 1563 +informationemail 0 9 4.753590 0.000000 1564 +bottom 0 7 5.010635 0.000000 1906 +ifyou 0 6 5.164786 0.000000 1992 +sharp 0 6 5.164786 0.000000 2100 +clarif 0 5 5.347108 0.000000 2253 +informationc 0 5 5.347108 0.000000 2394 +pagecours 0 5 5.347108 0.000000 2395 +savitchaddison 0 5 5.347108 0.000000 2396 +labc 0 4 5.568345 0.000000 2787 +policygrad 0 4 5.568345 0.000000 2788 +erratalast 0 4 5.568345 0.000000 2789 +steel 0 4 5.568345 0.000000 2818 +programmingassign 0 3 5.857933 0.000000 3398 +homeclass 0 3 5.857933 0.000000 3411 +nolandinstructor 0 2 6.263398 0.000000 4785 +msteel 1 1 6.957497 6.957497 9107 +steeleemail 0 1 6.957497 0.000000 9108 +buildingoffic 0 1 6.957497 0.000000 9109 +soffic 0 1 6.957497 0.000000 9110 +announcementsi 0 1 6.957497 0.000000 9111 +thenew 0 1 6.957497 0.000000 9112 +informationmidterm 0 1 6.957497 0.000000 9113 +pastfew 0 1 6.957497 0.000000 9114 +gloss 0 1 6.957497 0.000000 9115 +makefulli 0 1 6.957497 0.000000 9116 +objectivesabout 0 1 6.957497 0.000000 9117 +consultantscours 0 1 6.957497 0.000000 9118 +assignmentsnot 0 1 6.957497 0.000000 9119 +handoutsprogram 0 1 6.957497 0.000000 9120 +assignmentsexam 0 1 6.957497 0.000000 9121 +quizzessom 0 1 6.957497 0.000000 9122 +examplespolici 0 1 6.957497 0.000000 9123 +policyus 0 1 6.957497 0.000000 9124 +pagesintroduct 0 1 6.957497 0.000000 9125 +windowsintroduct 0 1 6.957497 0.000000 9126 +styleguid 0 1 6.957497 0.000000 9127 +codetextproblem 0 1 6.957497 0.000000 9128 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..f26b5265 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +read 0 154 1.791759 0.000000 47 +assign 1 135 1.945910 1.945910 66 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +introduct 1 126 2.079442 2.079442 87 +welcom 0 122 2.079442 0.000000 99 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +pleas 0 113 2.197225 0.000000 114 +topic 0 114 2.197225 0.000000 110 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +text 1 98 2.302585 2.302585 133 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +mani 0 92 2.397895 0.000000 150 +info 1 85 2.484907 2.484907 176 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +second 0 81 2.484907 0.000000 166 +good 0 77 2.564949 0.000000 200 +decemb 0 80 2.564949 0.000000 215 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +tuesdai 0 73 2.639057 0.000000 219 +solv 0 73 2.639057 0.000000 234 +test 1 66 2.708050 2.708050 252 +knowledg 0 67 2.708050 0.000000 243 +syllabu 0 67 2.708050 0.000000 247 +window 0 68 2.708050 0.000000 242 +polici 1 64 2.772589 2.772589 279 +copi 0 63 2.772589 0.000000 284 +result 0 65 2.772589 0.000000 281 +import 0 65 2.772589 0.000000 282 +guid 0 63 2.772589 0.000000 267 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +think 0 57 2.890372 0.000000 314 +sampl 0 53 2.944439 0.000000 339 +archiv 1 49 3.044522 3.044522 364 +give 0 50 3.044522 0.000000 359 +done 0 47 3.091042 0.000000 381 +made 0 44 3.135494 0.000000 398 +anoth 0 45 3.135494 0.000000 408 +mark 0 44 3.135494 0.000000 403 +answer 0 45 3.135494 0.000000 391 +error 0 40 3.258097 0.000000 449 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +feel 0 37 3.332205 0.000000 483 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +ad 0 32 3.465736 0.000000 544 +idea 0 32 3.465736 0.000000 545 +rang 0 30 3.555348 0.000000 565 +administr 0 27 3.637586 0.000000 628 +session 0 26 3.688879 0.000000 643 +todai 1 25 3.737670 3.737670 672 +notic 0 25 3.737670 0.000000 675 +consult 1 24 3.761200 3.761200 687 +tent 0 22 3.850148 0.000000 739 +sure 0 20 3.951244 0.000000 813 +andrew 0 19 4.007333 0.000000 849 +five 0 19 4.007333 0.000000 841 +seem 0 18 4.060443 0.000000 899 +modif 0 17 4.110874 0.000000 913 +walter 0 17 4.110874 0.000000 950 +quiz 1 16 4.174387 4.174387 990 +alreadi 0 16 4.174387 0.000000 963 +score 0 15 4.248495 0.000000 1017 +style 0 15 4.248495 0.000000 1036 +borland 0 14 4.317488 0.000000 1067 +everyon 0 13 4.382027 0.000000 1148 +verifi 0 12 4.465908 0.000000 1261 +minor 0 12 4.465908 0.000000 1237 +rememb 0 12 4.465908 0.000000 1217 +savitch 0 12 4.465908 0.000000 1269 +vectra 0 12 4.465908 0.000000 1267 +extra 0 11 4.553877 0.000000 1312 +calendar 0 8 4.875197 0.000000 1649 +carefulli 0 6 5.164786 0.000000 2045 +ahead 0 5 5.347108 0.000000 2338 +crazi 1 4 5.568345 5.568345 2822 +webpag 0 4 5.568345 0.000000 2660 +prock 1 2 6.263398 6.263398 4786 +infoc 0 2 6.263398 0.000000 4772 +sessionalgebra 0 1 6.957497 0.000000 9129 +prockemail 0 1 6.957497 0.000000 9130 +thgrader 0 1 6.957497 0.000000 9131 +haihong 0 1 6.957497 0.000000 9132 +wangemail 0 1 6.957497 0.000000 9133 +mtwrannounc 0 1 6.957497 0.000000 9134 +gotton 0 1 6.957497 0.000000 9135 +perus 0 1 6.957497 0.000000 9136 +assing 0 1 6.957497 0.000000 9137 +misconductcours 0 1 6.957497 0.000000 9138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..5ce7b989 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +also 0 259 1.386294 0.000000 28 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +grade 0 90 2.397895 0.000000 142 +search 0 95 2.397895 0.000000 155 +info 2 85 2.484907 4.969814 176 +wide 0 84 2.484907 0.000000 185 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +internet 0 83 2.484907 0.000000 186 +messag 1 76 2.564949 2.564949 212 +exampl 0 77 2.564949 0.000000 195 +addit 0 74 2.639057 0.000000 228 +function 1 62 2.772589 2.772589 275 +copi 0 63 2.772589 0.000000 284 +virtual 0 62 2.772589 0.000000 285 +visit 0 63 2.772589 0.000000 288 +type 0 61 2.833213 0.000000 296 +best 0 59 2.833213 0.000000 299 +semest 0 58 2.890372 0.000000 312 +index 0 56 2.890372 0.000000 309 +space 0 57 2.890372 0.000000 310 +maintain 0 51 2.995732 0.000000 342 +archiv 1 49 3.044522 3.044522 364 +without 0 50 3.044522 0.000000 370 +might 0 41 3.218876 0.000000 426 +everi 0 34 3.401197 0.000000 519 +word 0 34 3.401197 0.000000 508 +post 0 35 3.401197 0.000000 505 +quot 0 29 3.583519 0.000000 582 +usual 0 28 3.610918 0.000000 608 +compress 0 23 3.806662 0.000000 719 +sent 0 22 3.850148 0.000000 763 +disk 0 22 3.850148 0.000000 747 +try 0 22 3.850148 0.000000 764 +command 0 14 4.317488 0.000000 1083 +keyword 0 11 4.553877 0.000000 1356 +enter 0 10 4.653960 0.000000 1454 +paragraph 0 10 4.653960 0.000000 1449 +tourist 0 8 4.875197 0.000000 1710 +usenet 1 7 5.010635 5.010635 1839 +nine 0 6 5.164786 0.000000 2047 +ignor 0 5 5.347108 0.000000 2288 +clickher 0 5 5.347108 0.000000 2428 +kelli 1 4 5.568345 5.568345 2793 +backup 0 4 5.568345 0.000000 2645 +oracl 0 4 5.568345 0.000000 2823 +ratliff 0 3 5.857933 0.000000 3419 +badger 0 3 5.857933 0.000000 3502 +sharewar 0 3 5.857933 0.000000 3503 +freewar 0 3 5.857933 0.000000 3504 +shuttl 0 2 6.263398 0.000000 4787 +clickabl 0 2 6.263398 0.000000 4788 +herald 0 2 6.263398 0.000000 4789 +biggest 0 2 6.263398 0.000000 4790 +desautel 0 2 6.263398 0.000000 4791 +simtel 1 1 6.957497 6.957497 9139 +wildcard 0 1 6.957497 0.000000 9140 +filesviru 0 1 6.957497 0.000000 9141 +faqfun 0 1 6.957497 0.000000 9142 +mapth 0 1 6.957497 0.000000 9143 +comicshumor 0 1 6.957497 0.000000 9144 +abort 0 1 6.957497 0.000000 9145 +retri 0 1 6.957497 0.000000 9146 +usersfin 0 1 6.957497 0.000000 9147 +weeklab 0 1 6.957497 0.000000 9148 +jokesget 0 1 6.957497 0.000000 9149 +tryingsom 0 1 6.957497 0.000000 9150 +somecompress 0 1 6.957497 0.000000 9151 +unpack 0 1 6.957497 0.000000 9152 +reviewsom 0 1 6.957497 0.000000 9153 +minclud 0 1 6.957497 0.000000 9154 +infocompress 0 1 6.957497 0.000000 9155 +infofavorit 0 1 6.957497 0.000000 9156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..60fc7be9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +like 0 132 1.945910 0.000000 81 +lectur 0 135 1.945910 0.000000 73 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +assign 0 135 1.945910 0.000000 66 +introduct 1 126 2.079442 2.079442 87 +instructor 0 108 2.197225 0.000000 107 +find 0 111 2.197225 0.000000 111 +final 0 116 2.197225 0.000000 108 +grade 0 90 2.397895 0.000000 142 +section 0 94 2.397895 0.000000 149 +novemb 1 81 2.484907 2.484907 179 +build 0 85 2.484907 0.000000 184 +academ 0 82 2.484907 0.000000 178 +come 0 78 2.564949 0.000000 202 +mondai 0 77 2.564949 0.000000 206 +solv 0 73 2.639057 0.000000 234 +window 1 68 2.708050 2.708050 242 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 0 64 2.772589 0.000000 261 +handout 0 64 2.772589 0.000000 263 +polici 0 64 2.772589 0.000000 279 +import 0 65 2.772589 0.000000 282 +room 1 59 2.833213 2.833213 301 +semest 0 58 2.890372 0.000000 312 +keep 0 44 3.135494 0.000000 409 +textbook 0 44 3.135494 0.000000 397 +compani 0 41 3.218876 0.000000 423 +late 0 40 3.258097 0.000000 439 +microsoft 0 38 3.295837 0.000000 468 +game 0 36 3.367296 0.000000 498 +statist 0 35 3.401197 0.000000 521 +eduoffic 0 33 3.433987 0.000000 531 +except 0 28 3.610918 0.000000 607 +although 0 25 3.737670 0.000000 667 +walter 0 17 4.110874 0.000000 950 +misconduct 0 16 4.174387 0.000000 1003 +borland 0 14 4.317488 0.000000 1067 +quizz 0 13 4.382027 0.000000 1151 +savitch 1 12 4.465908 4.465908 1269 +vectra 0 12 4.465908 0.000000 1267 +sundai 0 10 4.653960 0.000000 1387 +russel 0 9 4.753590 0.000000 1507 +saturdai 0 7 5.010635 0.000000 1794 +footbal 0 7 5.010635 0.000000 1912 +prioriti 0 7 5.010635 0.000000 1792 +none 0 7 5.010635 0.000000 1811 +basement 0 4 5.568345 0.000000 2663 +man 0 3 5.857933 0.000000 3417 +csc 0 3 5.857933 0.000000 3183 +windowshint 0 3 5.857933 0.000000 3484 +compilersth 0 3 5.857933 0.000000 3485 +systememailmosaicnetscap 0 3 5.857933 0.000000 3486 +languageth 0 3 5.857933 0.000000 3487 +russ 0 1 6.957497 0.000000 9157 +manningemail 0 1 6.957497 0.000000 9158 +rman 0 1 6.957497 0.000000 9159 +scienceoffic 0 1 6.957497 0.000000 9160 +rotc 0 1 6.957497 0.000000 9161 +textold 0 1 6.957497 0.000000 9162 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..c743d6a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +modifi 0 178 1.609438 0.000000 35 +network 1 168 1.791759 1.791759 61 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +topic 0 114 2.197225 0.000000 110 +instructor 0 108 2.197225 0.000000 107 +techniqu 0 99 2.302585 0.000000 138 +search 1 95 2.397895 2.397895 155 +section 0 94 2.397895 0.000000 149 +learn 1 86 2.484907 2.484907 170 +academ 0 82 2.484907 0.000000 178 +exam 0 86 2.484907 0.000000 169 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +intellig 1 72 2.639057 2.639057 225 +solv 1 73 2.639057 2.639057 234 +logic 0 71 2.639057 0.000000 230 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +best 0 59 2.833213 0.000000 299 +plai 0 60 2.833213 0.000000 307 +content 0 59 2.833213 0.000000 302 +semest 0 58 2.890372 0.000000 312 +variou 0 56 2.890372 0.000000 317 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +principl 0 48 3.044522 0.000000 357 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +offer 0 43 3.178054 0.000000 414 +vision 1 41 3.218876 3.218876 430 +autom 0 41 3.218876 0.000000 434 +seminar 0 38 3.295837 0.000000 470 +robot 1 36 3.367296 3.367296 497 +game 0 36 3.367296 0.000000 498 +represent 0 35 3.401197 0.000000 512 +semant 0 29 3.583519 0.000000 587 +rule 0 26 3.688879 0.000000 638 +frame 0 24 3.761200 0.000000 684 +motion 0 24 3.761200 0.000000 699 +expert 0 20 3.951244 0.000000 833 +mostli 0 19 4.007333 0.000000 869 +lisp 0 18 4.060443 0.000000 897 +biologi 0 15 4.248495 0.000000 1049 +chuck 0 14 4.317488 0.000000 1108 +prolog 0 13 4.382027 0.000000 1155 +deduct 1 12 4.465908 4.465908 1236 +readabl 0 12 4.465908 0.000000 1258 +alpha 0 11 4.553877 0.000000 1348 +wendt 0 10 4.653960 0.000000 1446 +dyer 0 9 4.753590 0.000000 1573 +qualifi 0 8 4.875197 0.000000 1721 +predic 0 7 5.010635 0.000000 1806 +jude 1 6 5.164786 5.164786 2123 +beta 0 6 5.164786 0.000000 1993 +extern 0 6 5.164786 0.000000 2105 +shavlik 1 5 5.347108 5.347108 2429 +connectionist 0 5 5.347108 0.000000 2430 +bryan 0 5 5.347108 0.000000 2421 +sabbat 0 4 5.568345 0.000000 2824 +kunen 0 3 5.857933 0.000000 3500 +thefal 0 1 6.957497 0.000000 9163 +assumedprerequisit 0 1 6.957497 0.000000 9164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..bf794d84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,217 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 2 242 1.386294 2.772588 33 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +read 2 154 1.791759 3.583518 47 +network 2 168 1.791759 3.583518 61 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +assign 2 135 1.945910 3.891820 66 +relat 1 139 1.945910 1.945910 68 +lectur 0 135 1.945910 0.000000 73 +spring 2 131 2.079442 4.158884 88 +postscript 2 131 2.079442 4.158884 90 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +databas 0 122 2.079442 0.000000 86 +theori 1 111 2.197225 2.197225 127 +instructor 0 108 2.197225 0.000000 107 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +question 1 91 2.397895 2.397895 141 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +learn 2 86 2.484907 4.969814 170 +journal 2 83 2.484907 4.969814 183 +librari 1 87 2.484907 2.484907 181 +resourc 1 81 2.484907 2.484907 172 +institut 1 84 2.484907 2.484907 187 +help 1 83 2.484907 2.484907 175 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +start 0 83 2.484907 0.000000 173 +exam 0 86 2.484907 0.000000 169 +stuff 0 87 2.484907 0.000000 171 +ieee 0 86 2.484907 0.000000 190 +homework 2 79 2.564949 5.129898 193 +april 2 77 2.564949 5.129898 196 +mondai 2 77 2.564949 5.129898 206 +server 1 76 2.564949 2.564949 204 +refer 1 78 2.564949 2.564949 203 +free 1 73 2.639057 2.639057 224 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +write 0 72 2.639057 0.000000 222 +knowledg 1 67 2.708050 2.708050 243 +syllabu 0 67 2.708050 0.000000 247 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +polici 1 64 2.772589 2.772589 279 +artifici 0 63 2.772589 0.000000 280 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +march 1 61 2.833213 2.833213 295 +best 0 59 2.833213 0.000000 299 +content 0 59 2.833213 0.000000 302 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +overview 0 56 2.890372 0.000000 323 +sever 0 56 2.890372 0.000000 322 +februari 1 54 2.944439 2.944439 328 +local 1 55 2.944439 2.944439 334 +suggest 0 53 2.944439 0.000000 331 +cover 0 55 2.944439 0.000000 329 +week 0 52 2.995732 0.000000 343 +tabl 0 51 2.995732 0.000000 346 +archiv 1 49 3.044522 3.044522 364 +frequent 1 49 3.044522 3.044522 367 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +adapt 0 46 3.091042 0.000000 387 +textbook 2 44 3.135494 6.270988 397 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +edit 0 42 3.218876 0.000000 418 +author 2 39 3.258097 6.516194 450 +late 1 40 3.258097 3.258097 439 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +connect 0 37 3.332205 0.000000 485 +workstat 0 37 3.332205 0.000000 479 +tree 1 36 3.367296 3.367296 492 +bibliographi 1 34 3.401197 3.401197 518 +print 1 34 3.401197 3.401197 503 +next 0 34 3.401197 0.000000 517 +manual 0 35 3.401197 0.000000 504 +articl 1 33 3.433987 3.433987 530 +chapter 2 32 3.465736 6.931472 536 +concept 1 32 3.465736 3.465736 537 +idea 0 32 3.465736 0.000000 545 +neural 2 30 3.555348 7.110696 578 +common 1 30 3.555348 3.555348 574 +ask 1 28 3.610918 3.610918 597 +measur 0 28 3.610918 0.000000 609 +progress 0 28 3.610918 0.000000 598 +compar 1 26 3.688879 3.688879 648 +experiment 1 26 3.688879 3.688879 645 +comp 0 26 3.688879 0.000000 650 +lab 0 24 3.761200 0.000000 698 +decis 1 23 3.806662 3.806662 728 +lead 0 23 3.806662 0.000000 718 +methodolog 0 23 3.806662 0.000000 733 +instead 0 22 3.850148 0.000000 756 +dai 0 22 3.850148 0.000000 753 +sure 0 20 3.951244 0.000000 813 +department 0 20 3.951244 0.000000 839 +feedback 2 19 4.007333 8.014666 854 +definit 0 19 4.007333 0.000000 864 +five 0 19 4.007333 0.000000 841 +mostli 0 19 4.007333 0.000000 869 +benchmark 0 19 4.007333 0.000000 859 +lisp 1 18 4.060443 4.060443 897 +accept 0 18 4.060443 0.000000 879 +behavior 0 18 4.060443 0.000000 881 +stat 0 17 4.110874 0.000000 924 +analyz 0 17 4.110874 0.000000 925 +previous 0 17 4.110874 0.000000 923 +sheet 0 16 4.174387 0.000000 973 +explan 0 16 4.174387 0.000000 985 +biologi 0 15 4.248495 0.000000 1049 +train 1 14 4.317488 4.317488 1066 +emac 0 13 4.382027 0.000000 1143 +readabl 1 12 4.465908 4.465908 1258 +refin 0 11 4.553877 0.000000 1363 +summar 0 11 4.553877 0.000000 1295 +council 0 11 4.553877 0.000000 1364 +genet 0 10 4.653960 0.000000 1409 +sentenc 0 10 4.653960 0.000000 1413 +penalti 0 10 4.653960 0.000000 1405 +wendt 0 10 4.653960 0.000000 1446 +moonei 1 9 4.753590 4.753590 1520 +debugg 0 9 4.753590 0.000000 1493 +empir 1 8 4.875197 4.875197 1722 +irvin 0 8 4.875197 0.000000 1660 +printer 0 8 4.875197 0.000000 1621 +noon 1 7 5.010635 5.010635 1804 +tip 1 7 5.010635 5.010635 1863 +analyt 0 7 5.010635 0.000000 1913 +migrat 0 7 5.010635 0.000000 1851 +dataset 0 7 5.010635 0.000000 1914 +discoveri 0 7 5.010635 0.000000 1915 +jude 1 6 5.164786 5.164786 2123 +geoff 0 6 5.164786 0.000000 2124 +highwai 0 6 5.164786 0.000000 2095 +heurist 0 6 5.164786 0.000000 2125 +extern 0 6 5.164786 0.000000 2105 +shavlik 1 5 5.347108 5.347108 2429 +reinforc 1 4 5.568345 5.568345 2674 +basement 0 4 5.568345 0.000000 2663 +exhaust 0 4 5.568345 0.000000 2825 +novic 0 4 5.568345 0.000000 2815 +steel 0 4 5.568345 0.000000 2818 +weinberg 0 3 5.857933 0.000000 3443 +geoffrei 0 3 5.857933 0.000000 3505 +soar 0 3 5.857933 0.000000 3506 +backpropag 0 3 5.857933 0.000000 3507 +weekend 0 3 5.857933 0.000000 3357 +canadian 0 3 5.857933 0.000000 3508 +mitchel 2 2 6.263398 12.526796 4792 +towel 1 2 6.263398 6.263398 4793 +fisher 1 2 6.263398 6.263398 4794 +induc 1 2 6.263398 6.263398 4795 +akcl 1 2 6.263398 6.263398 4796 +quinlan 0 2 6.263398 0.000000 4797 +unsupervis 0 2 6.263398 0.000000 4233 +cogsci 0 2 6.263398 0.000000 4798 +tractabl 0 2 6.263398 0.000000 4799 +rumelhart 1 1 6.957497 6.957497 9165 +backprop 1 1 6.957497 6.957497 9166 +cobweb 1 1 6.957497 6.957497 9167 +austrian 1 1 6.957497 6.957497 9168 +chunk 0 1 6.957497 0.000000 9169 +laird 0 1 6.957497 0.000000 9170 +rosenbloom 0 1 6.957497 0.000000 9171 +newel 0 1 6.957497 0.000000 9172 +dietterich 0 1 6.957497 0.000000 9173 +zipser 0 1 6.957497 0.000000 9174 +lenat 0 1 6.957497 0.000000 9175 +kibler 0 1 6.957497 0.000000 9176 +kbann 0 1 6.957497 0.000000 9177 +brr 0 1 6.957497 0.000000 9178 +ineedagoodicon 0 1 6.957497 0.000000 9179 +nip 0 1 6.957497 0.000000 9180 +premier 0 1 6.957497 0.000000 9181 +shavlikshavlik 0 1 6.957497 0.000000 9182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..b2a88439 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,248 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 2 205 1.609438 3.218876 38 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +lectur 1 135 1.945910 1.945910 73 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +schedul 1 119 2.079442 2.079442 85 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +manag 1 114 2.197225 2.197225 125 +place 0 106 2.197225 0.000000 124 +version 0 113 2.197225 0.000000 122 +structur 0 106 2.197225 0.000000 105 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +advanc 0 99 2.302585 0.000000 130 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +octob 0 89 2.397895 0.000000 156 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +exam 1 86 2.484907 2.484907 169 +activ 1 84 2.484907 2.484907 182 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +requir 0 81 2.484907 0.000000 167 +mondai 1 77 2.564949 2.564949 206 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +summari 1 73 2.639057 2.639057 237 +write 1 72 2.639057 2.639057 222 +tuesdai 0 73 2.639057 0.000000 219 +involv 0 71 2.639057 0.000000 227 +meet 0 72 2.639057 0.000000 229 +thursdai 0 70 2.708050 0.000000 241 +simul 0 66 2.708050 0.000000 255 +new 1 64 2.772589 2.772589 262 +copi 1 63 2.772589 2.772589 284 +wednesdai 0 64 2.772589 0.000000 261 +previou 0 62 2.772589 0.000000 290 +result 0 65 2.772589 0.000000 281 +room 1 59 2.833213 2.833213 301 +content 1 59 2.833213 2.833213 302 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +space 0 57 2.890372 0.000000 310 +major 0 56 2.890372 0.000000 315 +point 0 58 2.890372 0.000000 319 +suggest 1 53 2.944439 2.944439 331 +sampl 0 53 2.944439 0.000000 339 +cover 0 55 2.944439 0.000000 329 +week 0 52 2.995732 0.000000 343 +much 0 52 2.995732 0.000000 349 +case 0 51 2.995732 0.000000 351 +give 0 50 3.044522 0.000000 359 +standard 0 48 3.044522 0.000000 365 +done 0 47 3.091042 0.000000 381 +midterm 1 45 3.135494 3.135494 392 +discuss 1 45 3.135494 3.135494 399 +fridai 0 44 3.135494 0.000000 390 +answer 0 45 3.135494 0.000000 391 +textbook 0 44 3.135494 0.000000 397 +term 1 43 3.178054 3.178054 411 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +review 0 42 3.218876 0.000000 425 +must 1 40 3.258097 3.258097 442 +realli 0 40 3.258097 0.000000 444 +expect 0 37 3.332205 0.000000 484 +statist 1 35 3.401197 3.401197 521 +survei 0 35 3.401197 0.000000 513 +approxim 0 35 3.401197 0.000000 509 +jame 0 35 3.401197 0.000000 507 +michael 0 35 3.401197 0.000000 514 +articl 0 33 3.433987 0.000000 530 +richard 0 31 3.496508 0.000000 559 +secur 0 30 3.555348 0.000000 577 +compon 0 30 3.555348 0.000000 570 +particip 1 29 3.583519 3.583519 589 +synchron 0 29 3.583519 0.000000 588 +intend 0 28 3.610918 0.000000 599 +measur 0 28 3.610918 0.000000 609 +experiment 1 26 3.688879 3.688879 645 +relev 0 26 3.688879 0.000000 637 +daili 0 24 3.761200 0.000000 706 +tent 0 22 3.850148 0.000000 739 +wang 1 21 3.912023 3.912023 790 +chen 1 21 3.912023 3.912023 791 +watch 0 21 3.912023 0.000000 789 +latest 0 21 3.912023 0.000000 785 +similar 0 21 3.912023 0.000000 771 +basi 0 20 3.951244 0.000000 828 +qualiti 0 20 3.951244 0.000000 832 +assum 0 19 4.007333 0.000000 845 +andrew 0 19 4.007333 0.000000 849 +eric 0 19 4.007333 0.000000 870 +encourag 1 18 4.060443 4.060443 880 +figur 0 18 4.060443 0.000000 903 +listen 0 18 4.060443 0.000000 907 +protect 0 17 4.110874 0.000000 935 +normal 0 16 4.174387 0.000000 995 +zhang 0 16 4.174387 0.000000 980 +purchas 0 15 4.248495 0.000000 1030 +todd 0 15 4.248495 0.000000 1051 +classic 0 14 4.317488 0.000000 1084 +doit 0 14 4.317488 0.000000 1111 +individu 0 13 4.382027 0.000000 1126 +readabl 0 12 4.465908 0.000000 1258 +verifi 0 12 4.465908 0.000000 1261 +literatur 1 11 4.553877 4.553877 1300 +broad 0 11 4.553877 0.000000 1302 +worth 0 11 4.553877 0.000000 1294 +summar 0 11 4.553877 0.000000 1295 +strongli 1 10 4.653960 4.653960 1406 +operatingsystem 0 10 4.653960 0.000000 1401 +total 0 10 4.653960 0.000000 1398 +exact 0 9 4.753590 0.000000 1509 +informationabout 0 9 4.753590 0.000000 1515 +equival 0 9 4.753590 0.000000 1496 +herefor 0 9 4.753590 0.000000 1483 +solomon 1 8 4.875197 4.875197 1716 +theme 0 8 4.875197 0.000000 1707 +noon 0 7 5.010635 0.000000 1804 +suffici 0 7 5.010635 0.000000 1897 +larger 0 7 5.010635 0.000000 1875 +smaller 0 7 5.010635 0.000000 1874 +craig 0 7 5.010635 0.000000 1879 +sciencesoffic 0 6 5.164786 0.000000 2101 +onoper 0 6 5.164786 0.000000 2048 +carefulli 0 6 5.164786 0.000000 2045 +approv 0 6 5.164786 0.000000 2078 +prasad 0 6 5.164786 0.000000 2126 +formerli 0 5 5.347108 0.000000 2397 +deshpand 0 5 5.347108 0.000000 2431 +systemsfal 0 4 5.568345 0.000000 2683 +marvin 0 4 5.568345 0.000000 2806 +exposur 0 4 5.568345 0.000000 2598 +ident 0 4 5.568345 0.000000 2826 +will 0 4 5.568345 0.000000 2782 +raman 0 4 5.568345 0.000000 2827 +advancedoper 0 3 5.857933 0.000000 3403 +macc 0 3 5.857933 0.000000 3414 +focal 0 3 5.857933 0.000000 3404 +gradingther 0 3 5.857933 0.000000 3455 +franci 0 3 5.857933 0.000000 3287 +pang 0 3 5.857933 0.000000 3509 +avinash 0 3 5.857933 0.000000 3510 +rajesh 0 3 5.857933 0.000000 3511 +troffic 0 2 6.263398 0.000000 4706 +pmin 0 2 6.263398 0.000000 4492 +avaiabl 0 2 6.263398 0.000000 4703 +multic 0 2 6.263398 0.000000 4304 +interprocess 0 2 6.263398 0.000000 4174 +satisfactori 0 2 6.263398 0.000000 4567 +usea 0 2 6.263398 0.000000 4800 +andconfer 0 2 6.263398 0.000000 4568 +deskfor 0 2 6.263398 0.000000 4584 +youto 0 2 6.263398 0.000000 4093 +willinstead 0 2 6.263398 0.000000 4569 +adiscuss 0 2 6.263398 0.000000 4570 +geta 0 2 6.263398 0.000000 4571 +quietli 0 2 6.263398 0.000000 4572 +thoroughli 0 2 6.263398 0.000000 4801 +salmon 0 2 6.263398 0.000000 4802 +chien 0 2 6.263398 0.000000 4541 +sodani 0 2 6.263398 0.000000 4803 +basnei 0 2 6.263398 0.000000 4804 +biswadeep 0 2 6.263398 0.000000 4805 +taxiao 0 2 6.263398 0.000000 4806 +sridhar 0 2 6.263398 0.000000 4807 +eduthu 0 2 6.263398 0.000000 4721 +inroom 0 1 6.957497 0.000000 9183 +examtogeth 0 1 6.957497 0.000000 9184 +bedetermin 0 1 6.957497 0.000000 9185 +inconsider 0 1 6.957497 0.000000 9186 +sciencestextther 0 1 6.957497 0.000000 9187 +papersa 0 1 6.957497 0.000000 9188 +thoseof 0 1 6.957497 0.000000 9189 +lessout 0 1 6.957497 0.000000 9190 +projecty 0 1 6.957497 0.000000 9191 +implementationsof 0 1 6.957497 0.000000 9192 +unvalid 0 1 6.957497 0.000000 9193 +ashort 0 1 6.957497 0.000000 9194 +presentationabout 0 1 6.957497 0.000000 9195 +presentationsher 0 1 6.957497 0.000000 9196 +presen 0 1 6.957497 0.000000 9197 +manyan 0 1 6.957497 0.000000 9198 +stubb 0 1 6.957497 0.000000 9199 +bigg 0 1 6.957497 0.000000 9200 +gunawan 0 1 6.957497 0.000000 9201 +agu 0 1 6.957497 0.000000 9202 +qingmin 0 1 6.957497 0.000000 9203 +larsen 0 1 6.957497 0.000000 9204 +conroi 0 1 6.957497 0.000000 9205 +fritz 0 1 6.957497 0.000000 9206 +jordan 0 1 6.957497 0.000000 9207 +yanm 0 1 6.957497 0.000000 9208 +xinyu 0 1 6.957497 0.000000 9209 +munson 0 1 6.957497 0.000000 9210 +wenjun 0 1 6.957497 0.000000 9211 +xinyi 0 1 6.957497 0.000000 9212 +yufei 0 1 6.957497 0.000000 9213 +zeyu 0 1 6.957497 0.000000 9214 +gopal 0 1 6.957497 0.000000 9215 +leesolomon 0 1 6.957497 0.000000 9216 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..59a9f4be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +time 0 293 1.098612 0.000000 17 +homepag 1 93 2.397895 2.397895 148 +chiang 0 7 5.010635 0.000000 1853 +gradesgo 0 1 6.957497 0.000000 9217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..51c6a06b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +modifi 1 178 1.609438 1.609438 35 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +machin 1 129 2.079442 2.079442 95 +compil 0 122 2.079442 0.000000 96 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +solut 1 82 2.484907 2.484907 162 +contain 0 81 2.484907 0.000000 174 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +want 0 79 2.564949 0.000000 199 +mondai 0 77 2.564949 0.000000 206 +write 1 72 2.639057 2.639057 222 +solv 0 73 2.639057 0.000000 234 +window 1 68 2.708050 2.708050 242 +copi 1 63 2.772589 2.772589 284 +room 1 59 2.833213 2.833213 301 +locat 0 59 2.833213 0.000000 303 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +cover 0 55 2.944439 0.000000 329 +particular 1 51 2.995732 2.995732 352 +run 0 51 2.995732 0.000000 347 +week 0 52 2.995732 0.000000 343 +pointer 0 48 3.044522 0.000000 368 +even 0 45 3.135494 0.000000 393 +textbook 0 44 3.135494 0.000000 397 +netscap 0 44 3.135494 0.000000 395 +long 0 43 3.178054 0.000000 413 +howev 1 41 3.218876 3.218876 422 +must 0 40 3.258097 0.000000 442 +probabl 0 40 3.258097 0.000000 455 +microsoft 1 38 3.295837 3.295837 468 +open 0 38 3.295837 0.000000 469 +hand 0 37 3.332205 0.000000 475 +copyright 0 36 3.367296 0.000000 495 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +depend 0 29 3.583519 0.000000 583 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +toward 0 25 3.737670 0.000000 668 +jeff 0 25 3.737670 0.000000 673 +lab 0 24 3.761200 0.000000 698 +dai 0 22 3.850148 0.000000 753 +avoid 0 21 3.912023 0.000000 799 +increas 0 20 3.951244 0.000000 829 +exercis 1 19 4.007333 4.007333 842 +left 0 19 4.007333 0.000000 851 +lyco 0 19 4.007333 0.000000 871 +along 0 18 4.060443 0.000000 878 +across 0 16 4.174387 0.000000 974 +dilbert 0 16 4.174387 0.000000 996 +fortran 2 15 4.248495 8.496990 1027 +configur 0 15 4.248495 0.000000 1012 +purchas 0 15 4.248495 0.000000 1030 +comic 0 14 4.317488 0.000000 1103 +primarili 0 13 4.382027 0.000000 1185 +vectra 1 12 4.465908 4.465908 1267 +overal 0 12 4.465908 0.000000 1254 +holidai 0 12 4.465908 0.000000 1224 +insid 0 12 4.465908 0.000000 1262 +keyword 0 11 4.553877 0.000000 1356 +night 0 11 4.553877 0.000000 1319 +packard 0 10 4.653960 0.000000 1444 +certain 0 10 4.653960 0.000000 1393 +seven 0 9 4.753590 0.000000 1561 +prefer 0 9 4.753590 0.000000 1491 +correctli 0 9 4.753590 0.000000 1478 +wall 0 9 4.753590 0.000000 1553 +hewlett 0 8 4.875197 0.000000 1709 +printer 0 8 4.875197 0.000000 1621 +bestor 1 6 5.164786 5.164786 2099 +lampert 0 5 5.347108 0.000000 2398 +gareth 0 5 5.347108 0.000000 2392 +closest 0 4 5.568345 0.000000 2828 +relief 0 4 5.568345 0.000000 2784 +labyou 0 3 5.857933 0.000000 3406 +aren 0 3 5.857933 0.000000 3512 +dorm 0 3 5.857933 0.000000 3407 +lahei 0 3 5.857933 0.000000 3408 +quota 1 2 6.263398 6.263398 4753 +exce 0 1 6.957497 0.000000 9218 +bewar 0 1 6.957497 0.000000 9219 +outsidehallwai 0 1 6.957497 0.000000 9220 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..345e6a48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +document 0 121 2.079442 0.000000 89 +check 0 115 2.197225 0.000000 118 +homepag 1 93 2.397895 2.397895 148 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +chang 0 82 2.484907 0.000000 163 +solv 0 73 2.639057 0.000000 234 +frequent 0 49 3.044522 0.000000 367 +textbook 0 44 3.135494 0.000000 397 +purpos 0 37 3.332205 0.000000 481 +eduoffic 0 33 3.433987 0.000000 531 +often 0 31 3.496508 0.000000 551 +walter 0 17 4.110874 0.000000 950 +todd 0 15 4.248495 0.000000 1051 +assignmentsprogram 0 6 5.164786 0.000000 2019 +assignmentshomework 0 4 5.568345 0.000000 2721 +pertain 0 3 5.857933 0.000000 3208 +homepagewelcom 0 2 6.263398 0.000000 4808 +tmunson 0 2 6.263398 0.000000 4809 +statisticsoffic 0 2 6.263398 0.000000 4810 +homepagec 0 1 6.957497 0.000000 9221 +responsibilityto 0 1 6.957497 0.000000 9222 +informationinstructor 0 1 6.957497 0.000000 9223 +munsonemail 0 1 6.957497 0.000000 9224 +appointmentsect 0 1 6.957497 0.000000 9225 +savitchclass 0 1 6.957497 0.000000 9226 +informationexpectationssyllabusexam 0 1 6.957497 0.000000 9227 +schedule 0 1 6.957497 0.000000 9228 +mailgradingl 0 1 6.957497 0.000000 9229 +assignmentsextra 0 1 6.957497 0.000000 9230 +creditpoliciesconsult 0 1 6.957497 0.000000 9231 +responsibilitiesacadem 0 1 6.957497 0.000000 9232 +misconductoth 0 1 6.957497 0.000000 9233 +informationdaili 0 1 6.957497 0.000000 9234 +classoth 0 1 6.957497 0.000000 9235 +resourcesc 0 1 6.957497 0.000000 9236 +homepagetmunson 0 1 6.957497 0.000000 9237 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..954144ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +assign 1 135 1.945910 1.945910 66 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +instructor 0 108 2.197225 0.000000 107 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 0 90 2.397895 0.000000 142 +start 1 83 2.484907 2.484907 173 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +syllabu 1 67 2.708050 2.708050 247 +window 0 68 2.708050 0.000000 242 +handout 1 64 2.772589 2.772589 263 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +date 0 51 2.995732 0.000000 344 +get 1 46 3.091042 3.091042 380 +netscap 0 44 3.135494 0.000000 395 +consult 0 24 3.761200 0.000000 687 +tent 0 22 3.850148 0.000000 739 +facil 0 20 3.951244 0.000000 814 +whole 0 17 4.110874 0.000000 940 +todd 1 15 4.248495 4.248495 1051 +difficulti 0 13 4.382027 0.000000 1132 +tutor 0 9 4.753590 0.000000 1552 +turnidg 1 4 5.568345 5.568345 2829 +struct 0 4 5.568345 0.000000 2821 +nolandinstructor 0 2 6.263398 0.000000 4785 +muchinform 0 2 6.263398 0.000000 4811 +turnidgeoffic 0 1 6.957497 0.000000 9238 +tbalab 0 1 6.957497 0.000000 9239 +tbaannouncementsclass 0 1 6.957497 0.000000 9240 +classa 0 1 6.957497 0.000000 9241 +byother 0 1 6.957497 0.000000 9242 +gregorysharp 0 1 6.957497 0.000000 9243 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..e6ad5cdb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +read 0 154 1.791759 0.000000 47 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +assign 1 135 1.945910 1.945910 66 +first 0 140 1.945910 0.000000 71 +problem 0 147 1.945910 0.000000 75 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +instructor 0 108 2.197225 0.000000 107 +code 0 108 2.197225 0.000000 116 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +info 0 85 2.484907 0.000000 176 +chang 0 82 2.484907 0.000000 163 +sourc 0 77 2.564949 0.000000 201 +homework 0 79 2.564949 0.000000 193 +exampl 0 77 2.564949 0.000000 195 +solv 0 73 2.639057 0.000000 234 +thursdai 0 70 2.708050 0.000000 241 +syllabu 0 67 2.708050 0.000000 247 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +locat 0 59 2.833213 0.000000 303 +week 1 52 2.995732 2.995732 343 +appoint 0 49 3.044522 0.000000 358 +still 0 50 3.044522 0.000000 362 +algebra 0 45 3.135494 0.000000 394 +announc 0 40 3.258097 0.000000 441 +late 0 40 3.258097 0.000000 439 +statist 1 35 3.401197 3.401197 521 +everi 0 34 3.401197 0.000000 519 +print 0 34 3.401197 0.000000 503 +consult 0 24 3.761200 0.000000 687 +dai 0 22 3.850148 0.000000 753 +walter 0 17 4.110874 0.000000 950 +zhang 0 16 4.174387 0.000000 980 +weslei 0 16 4.174387 0.000000 983 +misconduct 0 16 4.174387 0.000000 1003 +quiz 0 16 4.174387 0.000000 990 +style 0 15 4.248495 0.000000 1036 +vectra 0 12 4.465908 0.000000 1267 +savitch 0 12 4.465908 0.000000 1269 +addison 0 12 4.465908 0.000000 1230 +chri 1 11 4.553877 4.553877 1311 +statement 0 11 4.553877 0.000000 1313 +errata 0 10 4.653960 0.000000 1403 +login 0 9 4.753590 0.000000 1550 +seven 0 9 4.753590 0.000000 1561 +reload 0 8 4.875197 0.000000 1682 +isbn 0 7 5.010635 0.000000 1901 +guidelin 0 7 5.010635 0.000000 1832 +rough 0 6 5.164786 0.000000 2107 +noland 1 5 5.347108 5.347108 2420 +psych 0 3 5.857933 0.000000 3498 +grader 0 3 5.857933 0.000000 3165 +weaver 1 2 6.263398 6.263398 4770 +kei 0 2 6.263398 0.000000 4812 +staf 0 1 6.957497 0.000000 9244 +policyassign 0 1 6.957497 0.000000 9245 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..023d9609 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +paper 1 205 1.609438 1.609438 38 +list 0 201 1.609438 0.000000 39 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 0 170 1.791759 0.000000 62 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +architectur 1 139 1.945910 1.945910 77 +hall 1 146 1.945910 1.945910 65 +perform 0 143 1.945910 0.000000 74 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +final 1 116 2.197225 2.197225 108 +instructor 0 108 2.197225 0.000000 107 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +homework 2 79 2.564949 5.129898 193 +summari 0 73 2.639057 0.000000 237 +main 1 67 2.708050 2.708050 256 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +content 1 59 2.833213 2.833213 302 +special 0 56 2.890372 0.000000 320 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +instruct 0 53 2.944439 0.000000 332 +tabl 1 51 2.995732 2.995732 346 +set 0 50 3.044522 0.000000 361 +midterm 1 45 3.135494 3.135494 392 +offer 0 43 3.178054 0.000000 414 +cach 1 41 3.218876 3.218876 432 +review 0 42 3.218876 0.000000 425 +cost 0 37 3.332205 0.000000 480 +jame 0 35 3.401197 0.000000 507 +limit 0 29 3.583519 0.000000 585 +consid 0 29 3.583519 0.000000 590 +full 1 28 3.610918 3.610918 615 +multiprocessor 1 28 3.610918 3.610918 605 +arrai 1 27 3.637586 3.637586 627 +trace 0 25 3.737670 0.000000 677 +miscellan 0 23 3.806662 0.000000 731 +disk 1 22 3.850148 3.850148 747 +rout 1 21 3.912023 3.912023 793 +smith 0 20 3.951244 0.000000 820 +reserv 0 20 3.951244 0.000000 808 +thur 0 19 4.007333 0.000000 847 +interconnect 1 17 4.110874 4.110874 937 +vector 0 16 4.174387 0.000000 961 +doit 1 14 4.317488 4.317488 1111 +station 0 13 4.382027 0.000000 1157 +tue 0 11 4.553877 0.000000 1308 +clock 0 11 4.553877 0.000000 1320 +pipelin 1 7 5.010635 5.010635 1830 +biochemistri 0 3 5.857933 0.000000 3513 +vliw 0 3 5.857933 0.000000 3514 +harm 0 3 5.857933 0.000000 3515 +princ 0 2 6.263398 0.000000 4813 +specmark 0 2 6.263398 0.000000 4471 +princeoffic 0 1 6.957497 0.000000 9246 +miscellaneousnew 0 1 6.957497 0.000000 9247 +soln 0 1 6.957497 0.000000 9248 +pmread 0 1 6.957497 0.000000 9249 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..cb57548f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,245 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 2 273 1.098612 2.197224 15 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +cornel 2 215 1.386294 2.772588 23 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +first 0 140 1.945910 0.000000 71 +lectur 0 135 1.945910 0.000000 73 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +part 0 98 2.302585 0.000000 129 +graphic 2 90 2.397895 4.795790 147 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +imag 0 91 2.397895 0.000000 161 +exam 1 86 2.484907 2.484907 169 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +homework 2 79 2.564949 5.129898 193 +dynam 1 76 2.564949 2.564949 194 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +state 0 76 2.564949 0.000000 207 +upson 1 71 2.639057 2.639057 218 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +tuesdai 0 73 2.639057 0.000000 219 +onlin 0 75 2.639057 0.000000 223 +view 1 70 2.708050 2.708050 254 +test 1 66 2.708050 2.708050 252 +main 0 67 2.708050 0.000000 256 +practic 0 70 2.708050 0.000000 246 +simul 0 66 2.708050 0.000000 255 +order 0 69 2.708050 0.000000 249 +complex 0 64 2.772589 0.000000 269 +guid 0 63 2.772589 0.000000 267 +march 1 61 2.833213 2.833213 295 +content 1 59 2.833213 2.833213 302 +simpl 0 60 2.833213 0.000000 298 +colleg 0 61 2.833213 0.000000 300 +space 1 57 2.890372 2.890372 310 +faculti 0 56 2.890372 0.000000 325 +scientif 1 53 2.944439 2.944439 341 +cover 0 55 2.944439 0.000000 329 +visual 1 48 3.044522 3.044522 372 +principl 0 48 3.044522 0.000000 357 +standard 0 48 3.044522 0.000000 365 +california 0 46 3.091042 0.000000 388 +math 1 44 3.135494 3.135494 402 +textbook 1 44 3.135494 3.135494 397 +vision 1 41 3.218876 3.218876 430 +combin 0 42 3.218876 0.000000 421 +examin 0 42 3.218876 0.000000 424 +error 1 40 3.258097 3.258097 449 +map 1 39 3.258097 3.258097 452 +form 0 39 3.258097 0.000000 443 +prototyp 0 38 3.295837 0.000000 463 +field 1 37 3.332205 3.332205 482 +connect 0 37 3.332205 0.000000 485 +mean 0 37 3.332205 0.000000 477 +staff 1 36 3.367296 3.367296 490 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +product 0 33 3.433987 0.000000 527 +transform 1 32 3.465736 3.465736 542 +human 1 32 3.465736 3.465736 546 +anim 1 31 3.496508 3.496508 557 +focus 0 29 3.583519 0.000000 584 +limit 0 29 3.583519 0.000000 585 +relev 0 26 3.688879 0.000000 637 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +other 0 24 3.761200 0.000000 697 +begin 0 23 3.806662 0.000000 716 +equat 0 23 3.806662 0.000000 724 +color 1 22 3.850148 3.850148 762 +deal 0 22 3.850148 0.000000 736 +properti 0 22 3.850148 0.000000 749 +serv 0 22 3.850148 0.000000 758 +period 0 22 3.850148 0.000000 743 +sent 0 22 3.850148 0.000000 763 +among 0 21 3.912023 0.000000 781 +viewer 0 21 3.912023 0.000000 787 +break 0 20 3.951244 0.000000 812 +prepar 0 20 3.951244 0.000000 824 +geometr 1 19 4.007333 4.007333 852 +miss 0 19 4.007333 0.000000 866 +dimension 1 18 4.060443 4.060443 909 +figur 0 18 4.060443 0.000000 903 +render 1 17 4.110874 4.110874 947 +differenti 0 17 4.110874 0.000000 921 +modif 0 17 4.110874 0.000000 913 +vector 1 16 4.174387 4.174387 961 +normal 0 16 4.174387 0.000000 995 +devic 0 16 4.174387 0.000000 1002 +atth 1 15 4.248495 4.248495 1019 +hierarch 1 15 4.248495 4.248495 1018 +style 0 15 4.248495 0.000000 1036 +camera 1 14 4.317488 4.317488 1115 +hopefulli 0 14 4.317488 0.000000 1071 +scene 0 14 4.317488 0.000000 1114 +alan 1 13 4.382027 4.382027 1146 +composit 0 13 4.382027 0.000000 1150 +coordin 0 13 4.382027 0.000000 1182 +automata 0 13 4.382027 0.000000 1135 +difficulti 0 13 4.382027 0.000000 1132 +opportun 0 13 4.382027 0.000000 1161 +prelim 1 12 4.465908 4.465908 1201 +skill 1 12 4.465908 4.465908 1205 +holidai 1 12 4.465908 4.465908 1224 +bruce 1 12 4.465908 4.465908 1226 +land 1 12 4.465908 4.465908 1273 +huang 1 12 4.465908 4.465908 1202 +evolv 0 12 4.465908 0.000000 1223 +optic 0 12 4.465908 0.000000 1221 +remov 0 12 4.465908 0.000000 1225 +buffer 0 12 4.465908 0.000000 1211 +scan 0 12 4.465908 0.000000 1243 +volum 1 11 4.553877 4.553877 1347 +transpar 0 11 4.553877 0.000000 1325 +statement 0 11 4.553877 0.000000 1313 +forc 0 10 4.653960 0.000000 1384 +perspect 0 10 4.653960 0.000000 1437 +facilit 0 10 4.653960 0.000000 1412 +surfac 2 9 4.753590 9.507180 1574 +light 1 9 4.753590 4.753590 1533 +incomplet 0 9 4.753590 0.000000 1575 +leader 0 9 4.753590 0.000000 1576 +explicit 0 9 4.753590 0.000000 1525 +screen 0 9 4.753590 0.000000 1577 +wall 0 9 4.753590 0.000000 1553 +observ 0 9 4.753590 0.000000 1578 +rhode 0 9 4.753590 0.000000 1579 +polygon 1 8 4.875197 4.875197 1723 +textur 1 8 4.875197 4.875197 1677 +convers 0 8 4.875197 0.000000 1673 +parametr 1 7 5.010635 5.010635 1819 +clip 1 7 5.010635 5.010635 1868 +shade 1 7 5.010635 5.010635 1881 +pagecomput 0 7 5.010635 0.000000 1900 +stereo 0 7 5.010635 0.000000 1818 +channel 0 7 5.010635 0.000000 1836 +justin 0 7 5.010635 0.000000 1789 +davi 0 7 5.010635 0.000000 1888 +artist 0 6 5.164786 0.000000 2127 +onto 0 6 5.164786 0.000000 2089 +hidden 0 6 5.164786 0.000000 1987 +notifi 0 6 5.164786 0.000000 2106 +rotat 0 5 5.347108 0.000000 2295 +rigid 0 5 5.347108 0.000000 2432 +cellular 0 5 5.347108 0.000000 2433 +anti 0 5 5.347108 0.000000 2434 +hyper 0 5 5.347108 0.000000 2435 +particl 0 5 5.347108 0.000000 2436 +oregon 0 5 5.347108 0.000000 2437 +implicit 0 4 5.568345 0.000000 2830 +invers 0 4 5.568345 0.000000 2764 +pixel 0 4 5.568345 0.000000 2831 +contour 0 4 5.568345 0.000000 2812 +administrivia 1 3 5.857933 5.857933 3166 +kinemat 1 3 5.857933 5.857933 3516 +computergraph 0 3 5.857933 0.000000 3517 +wave 0 3 5.857933 0.000000 3518 +shadow 0 3 5.857933 0.000000 3519 +bump 0 3 5.857933 0.000000 3497 +arrow 0 3 5.857933 0.000000 3520 +makeup 0 3 5.857933 0.000000 3449 +jing 0 3 5.857933 0.000000 3521 +mccune 0 3 5.857933 0.000000 3522 +waterloo 0 3 5.857933 0.000000 3523 +cardiff 0 3 5.857933 0.000000 3154 +watt 1 2 6.263398 6.263398 4814 +scalar 1 2 6.263398 6.263398 4815 +religi 1 2 6.263398 6.263398 4816 +folei 0 2 6.263398 0.000000 4817 +bruceland 0 2 6.263398 0.000000 4818 +designedto 0 2 6.263398 0.000000 4712 +illumin 0 2 6.263398 0.000000 4819 +blobbi 0 2 6.263398 0.000000 4820 +homogen 0 2 6.263398 0.000000 4821 +mimic 0 2 6.263398 0.000000 4736 +phong 0 2 6.263398 0.000000 4822 +alias 0 2 6.263398 0.000000 4823 +tomak 0 2 6.263398 0.000000 4675 +belief 0 2 6.263398 0.000000 4553 +inord 0 2 6.263398 0.000000 4824 +absent 0 2 6.263398 0.000000 4825 +deviat 0 2 6.263398 0.000000 4826 +wale 0 2 6.263398 0.000000 4827 +manchest 0 2 6.263398 0.000000 4828 +todoc 0 2 6.263398 0.000000 4829 +quadric 1 1 6.957497 6.957497 9250 +swept 0 1 6.957497 0.000000 9251 +tensor 0 1 6.957497 0.000000 9252 +tessel 0 1 6.957497 0.000000 9253 +gourand 0 1 6.957497 0.000000 9254 +vernier 0 1 6.957497 0.000000 9255 +acuiti 0 1 6.957497 0.000000 9256 +mispercept 0 1 6.957497 0.000000 9257 +advect 0 1 6.957497 0.000000 9258 +multiparamet 0 1 6.957497 0.000000 9259 +educationlaw 0 1 6.957497 0.000000 9260 +mandat 0 1 6.957497 0.000000 9261 +intendingto 0 1 6.957497 0.000000 9262 +requestedto 0 1 6.957497 0.000000 9263 +jmccune 0 1 6.957497 0.000000 9264 +csrelev 0 1 6.957497 0.000000 9265 +universityrel 0 1 6.957497 0.000000 9266 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..188a1e8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +base 0 165 1.791759 0.000000 50 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +first 0 140 1.945910 0.000000 71 +object 0 138 1.945910 0.000000 79 +note 0 142 1.945910 0.000000 67 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +welcom 0 122 2.079442 0.000000 99 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +site 0 106 2.197225 0.000000 119 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +contain 0 81 2.484907 0.000000 174 +level 0 87 2.484907 0.000000 180 +start 0 83 2.484907 0.000000 173 +build 0 85 2.484907 0.000000 184 +refer 0 78 2.564949 0.000000 203 +materi 1 75 2.639057 2.639057 221 +onlin 0 75 2.639057 0.000000 223 +main 0 67 2.708050 0.000000 256 +practic 0 70 2.708050 0.000000 246 +order 0 69 2.708050 0.000000 249 +laboratori 1 63 2.772589 2.772589 292 +result 0 65 2.772589 0.000000 281 +virtual 0 62 2.772589 0.000000 285 +scientif 1 53 2.944439 2.944439 341 +visual 1 48 3.044522 3.044522 372 +principl 0 48 3.044522 0.000000 357 +get 0 46 3.091042 0.000000 380 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +mark 1 44 3.135494 3.135494 403 +map 0 39 3.258097 0.000000 452 +procedur 1 36 3.367296 3.367296 488 +copyright 0 36 3.367296 0.000000 495 +taught 0 33 3.433987 0.000000 526 +transform 1 32 3.465736 3.465736 542 +anim 1 31 3.496508 3.496508 557 +deal 0 22 3.850148 0.000000 736 +sent 0 22 3.850148 0.000000 763 +facil 0 20 3.951244 0.000000 814 +exercis 1 19 4.007333 4.007333 842 +atth 0 15 4.248495 0.000000 1019 +camera 1 14 4.317488 4.317488 1115 +train 0 14 4.317488 0.000000 1066 +land 0 12 4.465908 0.000000 1273 +statement 0 11 4.553877 0.000000 1313 +perspect 0 10 4.653960 0.000000 1437 +surfac 1 9 4.753590 4.753590 1574 +leader 0 9 4.753590 0.000000 1576 +light 0 9 4.753590 0.000000 1533 +textur 1 8 4.875197 4.875197 1677 +competit 0 8 4.875197 0.000000 1635 +polygon 0 8 4.875197 0.000000 1723 +pagecomput 0 7 5.010635 0.000000 1900 +parametr 0 7 5.010635 0.000000 1819 +chat 0 6 5.164786 0.000000 2128 +restrict 0 6 5.164786 0.000000 2129 +implicit 0 4 5.568345 0.000000 2830 +enrol 0 4 5.568345 0.000000 2613 +computergraph 1 3 5.857933 5.857933 3517 +bump 0 3 5.857933 0.000000 3497 +folei 0 2 6.263398 0.000000 4817 +watt 0 2 6.263398 0.000000 4814 +bruceland 0 2 6.263398 0.000000 4818 +todoc 0 2 6.263398 0.000000 4829 +exercisesthi 0 1 6.957497 0.000000 9267 +universityundergradu 0 1 6.957497 0.000000 9268 +dcomput 0 1 6.957497 0.000000 9269 +sigucc 0 1 6.957497 0.000000 9270 +basededuc 0 1 6.957497 0.000000 9271 +areinclud 0 1 6.957497 0.000000 9272 +aboutc 0 1 6.957497 0.000000 9273 +semesteraccess 0 1 6.957497 0.000000 9274 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..c39108bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +model 1 145 1.945910 1.945910 69 +lectur 0 135 1.945910 0.000000 73 +document 0 121 2.079442 0.000000 89 +topic 1 114 2.197225 2.197225 110 +code 0 108 2.197225 0.000000 116 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +window 0 68 2.708050 0.000000 242 +content 0 59 2.833213 0.000000 302 +sampl 0 53 2.944439 0.000000 339 +visual 0 48 3.044522 0.000000 372 +video 0 44 3.135494 0.000000 405 +michael 0 35 3.401197 0.000000 514 +human 0 32 3.465736 0.000000 546 +express 0 32 3.465736 0.000000 540 +retriev 0 27 3.637586 0.000000 621 +appropri 0 18 4.060443 0.000000 883 +spatial 0 16 4.174387 0.000000 988 +huang 0 12 4.465908 0.000000 1202 +explicit 0 9 4.753590 0.000000 1525 +sean 0 8 4.875197 0.000000 1705 +justin 0 7 5.010635 0.000000 1789 +deliv 0 6 5.164786 0.000000 2070 +chosen 0 6 5.164786 0.000000 1984 +alex 0 6 5.164786 0.000000 2130 +facial 0 5 5.347108 0.000000 2438 +interior 0 5 5.347108 0.000000 2439 +particl 0 5 5.347108 0.000000 2436 +chose 0 4 5.568345 0.000000 2629 +arun 0 4 5.568345 0.000000 2736 +computergraph 0 3 5.857933 0.000000 3517 +hung 0 3 5.857933 0.000000 3524 +mccune 0 3 5.857933 0.000000 3522 +landscap 0 3 5.857933 0.000000 3525 +landi 0 2 6.263398 0.000000 4830 +tsai 0 2 6.263398 0.000000 4831 +stochast 0 2 6.263398 0.000000 4832 +semestereach 0 1 6.957497 0.000000 9275 +anddocu 0 1 6.957497 0.000000 9276 +metabal 0 1 6.957497 0.000000 9277 +arcuri 0 1 6.957497 0.000000 9278 +benton 0 1 6.957497 0.000000 9279 +interdepend 0 1 6.957497 0.000000 9280 +diffus 0 1 6.957497 0.000000 9281 +pollut 0 1 6.957497 0.000000 9282 +modelsfu 0 1 6.957497 0.000000 9283 +antialias 0 1 6.957497 0.000000 9284 +vermach 0 1 6.957497 0.000000 9285 +hsun 0 1 6.957497 0.000000 9286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..35c27805 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +applic 0 170 1.791759 0.000000 56 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +problem 0 147 1.945910 0.000000 75 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +nation 0 74 2.639057 0.000000 240 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +goal 0 66 2.708050 0.000000 250 +laboratori 0 63 2.772589 0.000000 292 +thesi 1 57 2.890372 2.890372 327 +scientif 1 53 2.944439 2.944439 341 +advisor 0 51 2.995732 0.000000 355 +numer 0 49 3.044522 0.000000 369 +least 1 35 3.401197 3.401197 516 +committe 0 34 3.401197 0.000000 522 +tech 0 35 3.401197 0.000000 515 +steve 0 29 3.583519 0.000000 594 +hous 0 21 3.912023 0.000000 801 +siam 0 21 3.912023 0.000000 800 +matrix 0 17 4.110874 0.000000 933 +squar 1 14 4.317488 4.317488 1082 +weight 0 12 4.465908 0.000000 1204 +statement 0 11 4.553877 0.000000 1313 +decomposit 0 10 4.653960 0.000000 1439 +rhode 0 9 4.753590 0.000000 1579 +juan 0 9 4.753590 0.000000 1580 +postdoc 0 8 4.875197 0.000000 1724 +cornellunivers 0 7 5.010635 0.000000 1916 +whichi 0 6 5.164786 0.000000 2056 +stabl 0 5 5.347108 0.000000 2309 +interior 0 5 5.347108 0.000000 2439 +orthogon 0 4 5.568345 0.000000 2832 +vavasi 1 3 5.857933 5.857933 3526 +hough 0 3 5.857933 0.000000 3527 +linearalgebra 0 2 6.263398 0.000000 4833 +anal 0 2 6.263398 0.000000 4834 +pointmethod 0 2 6.263398 0.000000 4835 +sandia 1 1 6.957497 6.957497 9287 +livermor 1 1 6.957497 6.957497 9288 +patti 0 1 6.957497 0.000000 9289 +houghpatti 0 1 6.957497 0.000000 9290 +frankh 0 1 6.957497 0.000000 9291 +nicktrefethen 0 1 6.957497 0.000000 9292 +schatz 0 1 6.957497 0.000000 9293 +optimizationi 0 1 6.957497 0.000000 9294 +meza 0 1 6.957497 0.000000 9295 +nationallaboratori 0 1 6.957497 0.000000 9296 +ofweight 0 1 6.957497 0.000000 9297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..f2d1cf55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +applic 0 170 1.791759 0.000000 56 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +hall 0 146 1.945910 0.000000 65 +postscript 1 131 2.079442 2.079442 90 +mathemat 1 108 2.197225 2.197225 123 +theori 0 111 2.197225 0.000000 127 +center 0 88 2.397895 0.000000 158 +activ 0 84 2.484907 0.000000 182 +dynam 1 76 2.564949 2.564949 194 +april 0 77 2.564949 0.000000 196 +exampl 0 77 2.564949 0.000000 195 +appli 0 71 2.639057 0.000000 226 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +thesi 1 57 2.890372 2.890372 327 +summer 0 56 2.890372 0.000000 311 +detail 0 57 2.890372 0.000000 321 +scientif 0 53 2.944439 0.000000 341 +physic 1 47 3.091042 3.091042 377 +mechan 0 43 3.178054 0.000000 416 +linear 0 41 3.218876 0.000000 431 +submit 1 39 3.258097 3.258097 440 +continu 0 39 3.258097 0.000000 448 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +next 0 34 3.401197 0.000000 517 +curriculum 0 33 3.433987 0.000000 535 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +propos 0 28 3.610918 0.000000 602 +jeff 0 25 3.737670 0.000000 673 +background 0 25 3.737670 0.000000 664 +hill 0 25 3.737670 0.000000 670 +equat 0 23 3.806662 0.000000 724 +finish 0 22 3.850148 0.000000 748 +supervis 0 20 3.951244 0.000000 840 +mostli 0 19 4.007333 0.000000 869 +dimension 0 18 4.060443 0.000000 909 +seek 0 17 4.110874 0.000000 954 +outlin 0 17 4.110874 0.000000 914 +coupl 0 17 4.110874 0.000000 939 +normal 0 16 4.174387 0.000000 995 +transit 1 15 4.248495 4.248495 1046 +nick 0 13 4.382027 0.000000 1180 +misc 0 13 4.382027 0.000000 1124 +frank 0 9 4.753590 0.000000 1568 +rhode 0 9 4.753590 0.000000 1579 +unusu 0 9 4.753590 0.000000 1566 +sixth 0 7 5.010635 0.000000 1917 +atcornel 0 6 5.164786 0.000000 2131 +versu 0 6 5.164786 0.000000 2052 +fluid 1 5 5.347108 5.347108 2440 +stabil 0 5 5.347108 0.000000 2286 +satish 0 4 5.568345 0.000000 2833 +trefethen 1 3 5.857933 5.857933 3528 +exponenti 0 3 5.857933 0.000000 3529 +driscol 0 2 6.263398 0.000000 4836 +spectral 0 2 6.263398 0.000000 4837 +baggett 1 1 6.957497 6.957497 9298 +turbul 1 1 6.957497 6.957497 9299 +baggettjeff 0 1 6.957497 0.000000 9300 +hydrodynam 0 1 6.957497 0.000000 9301 +blend 0 1 6.957497 0.000000 9302 +iwould 0 1 6.957497 0.000000 9303 +abscissa 0 1 6.957497 0.000000 9304 +andphillip 0 1 6.957497 0.000000 9305 +subcrit 0 1 6.957497 0.000000 9306 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..3e29246c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +report 1 131 2.079442 2.079442 92 +document 1 121 2.079442 2.079442 89 +specif 0 106 2.197225 0.000000 106 +send 0 114 2.197225 0.000000 109 +technic 1 100 2.302585 2.302585 140 +search 1 95 2.397895 2.397895 155 +institut 1 84 2.484907 2.484907 187 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +want 0 79 2.564949 0.000000 199 +goal 0 66 2.708050 0.000000 250 +collect 1 65 2.772589 2.772589 268 +laboratori 0 63 2.772589 0.000000 292 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +sever 1 56 2.890372 2.890372 322 +allow 0 53 2.944439 0.000000 333 +made 0 44 3.135494 0.000000 398 +form 0 39 3.258097 0.000000 443 +author 0 39 3.258097 0.000000 450 +join 0 39 3.258097 0.000000 457 +industri 0 38 3.295837 0.000000 464 +field 1 37 3.332205 3.332205 482 +word 1 34 3.401197 3.401197 508 +titl 0 31 3.496508 0.000000 556 +limit 0 29 3.583519 0.000000 585 +particip 0 29 3.583519 0.000000 589 +packag 0 28 3.610918 0.000000 614 +background 0 25 3.737670 0.000000 664 +brows 1 23 3.806662 3.806662 726 +among 0 21 3.912023 0.000000 781 +tell 0 21 3.912023 0.000000 777 +offici 0 18 4.060443 0.000000 894 +commerci 0 16 4.174387 0.000000 1005 +whose 0 13 4.382027 0.000000 1166 +enter 0 10 4.653960 0.000000 1454 +govern 0 9 4.753590 0.000000 1581 +pronounc 0 7 5.010635 0.000000 1918 +ncstrl 2 3 5.857933 11.715866 3530 +interoper 0 2 6.263398 0.000000 4838 +andorgan 0 2 6.263398 0.000000 4443 +bibliograph 0 2 6.263398 0.000000 4699 +libraryncstrl 0 1 6.957497 0.000000 9307 +ancestr 0 1 6.957497 0.000000 9308 +internationalcollect 0 1 6.957497 0.000000 9309 +departmentsand 0 1 6.957497 0.000000 9310 +availablefor 0 1 6.957497 0.000000 9311 +eduat 0 1 6.957497 0.000000 9312 +ncstrlcollect 0 1 6.957497 0.000000 9313 +serversoper 0 1 6.957497 0.000000 9314 +participatinginstitut 0 1 6.957497 0.000000 9315 +ncstrlpress 0 1 6.957497 0.000000 9316 +theparticip 0 1 6.957497 0.000000 9317 +moreread 0 1 6.957497 0.000000 9318 +forinstitut 0 1 6.957497 0.000000 9319 +informationfind 0 1 6.957497 0.000000 9320 +snew 0 1 6.957497 0.000000 9321 +totech 0 1 6.957497 0.000000 9322 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..39c6ff48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +design 1 213 1.386294 1.386294 25 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +file 0 132 1.945910 0.000000 70 +process 0 142 1.945910 0.000000 72 +report 0 131 2.079442 0.000000 92 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +technic 1 100 2.302585 2.302585 140 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +institut 1 84 2.484907 2.484907 187 +server 0 76 2.564949 0.000000 204 +integr 0 67 2.708050 0.000000 245 +anoth 0 45 3.135494 0.000000 408 +mike 0 24 3.761200 0.000000 703 +brows 0 23 3.806662 0.000000 726 +enterpris 0 2 6.263398 0.000000 4839 +informationand 0 2 6.263398 0.000000 4840 +instituteabout 0 1 6.957497 0.000000 9323 +researchersat 0 1 6.957497 0.000000 9324 +searchal 0 1 6.957497 0.000000 9325 +reportssearch 0 1 6.957497 0.000000 9326 +ipic 0 1 6.957497 0.000000 9327 +itisingapor 0 1 6.957497 0.000000 9328 +altavistaforum 0 1 6.957497 0.000000 9329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..77b4898e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +group 1 183 1.609438 1.609438 36 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +note 0 142 1.945910 0.000000 67 +report 1 131 2.079442 2.079442 92 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +tool 0 117 2.079442 0.000000 93 +structur 1 106 2.197225 2.197225 105 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +memori 1 101 2.302585 2.302585 139 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +question 1 91 2.397895 2.397895 141 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +resourc 1 81 2.484907 2.484907 172 +institut 1 84 2.484907 2.484907 187 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +learn 0 86 2.484907 0.000000 170 +messag 0 76 2.564949 0.000000 212 +server 0 76 2.564949 0.000000 204 +sourc 0 77 2.564949 0.000000 201 +resum 0 79 2.564949 0.000000 217 +write 0 72 2.639057 0.000000 222 +onlin 0 75 2.639057 0.000000 223 +knowledg 1 67 2.708050 2.708050 243 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +improv 0 62 2.772589 0.000000 289 +copi 0 63 2.772589 0.000000 284 +collect 0 65 2.772589 0.000000 268 +share 1 59 2.833213 2.833213 304 +content 0 59 2.833213 0.000000 302 +explor 0 58 2.890372 0.000000 324 +run 0 51 2.995732 0.000000 347 +investig 0 51 2.995732 0.000000 353 +set 0 50 3.044522 0.000000 361 +without 0 50 3.044522 0.000000 370 +understand 0 47 3.091042 0.000000 384 +answer 1 45 3.135494 3.135494 391 +even 0 45 3.135494 0.000000 393 +made 0 44 3.135494 0.000000 398 +natur 0 44 3.135494 0.000000 406 +futur 1 41 3.218876 3.218876 427 +might 0 41 3.218876 0.000000 426 +author 0 39 3.258097 0.000000 450 +mean 1 37 3.332205 3.332205 477 +staff 1 36 3.367296 3.367296 490 +ofth 0 36 3.367296 0.000000 491 +especi 0 36 3.367296 0.000000 496 +product 1 33 3.433987 3.433987 527 +obtain 0 33 3.433987 0.000000 534 +collabor 0 32 3.465736 0.000000 543 +often 0 31 3.496508 0.000000 551 +produc 0 30 3.555348 0.000000 572 +abl 0 30 3.555348 0.000000 566 +ask 0 28 3.610918 0.000000 597 +manipul 0 27 3.637586 0.000000 624 +effort 0 26 3.688879 0.000000 652 +sport 0 25 3.737670 0.000000 683 +decis 0 23 3.806662 0.000000 728 +initi 0 23 3.806662 0.000000 717 +thank 0 23 3.806662 0.000000 721 +reduc 0 22 3.850148 0.000000 759 +corpor 1 21 3.912023 3.912023 802 +among 0 21 3.912023 0.000000 781 +annot 0 21 3.912023 0.000000 775 +increas 1 20 3.951244 3.951244 829 +qualiti 0 20 3.951244 0.000000 832 +fine 0 20 3.951244 0.000000 822 +media 0 19 4.007333 0.000000 861 +feedback 0 19 4.007333 0.000000 854 +seem 1 18 4.060443 4.060443 899 +agent 0 18 4.060443 0.000000 910 +whether 1 17 4.110874 4.110874 918 +remot 1 15 4.248495 4.248495 1041 +believ 0 13 4.382027 0.000000 1187 +captur 0 12 4.465908 0.000000 1232 +safe 0 12 4.465908 0.000000 1274 +market 0 11 4.553877 0.000000 1361 +end 0 9 4.753590 0.000000 1567 +correctli 0 9 4.753590 0.000000 1478 +risk 0 8 4.875197 0.000000 1689 +xerox 0 8 4.875197 0.000000 1725 +davi 1 7 5.010635 5.010635 1888 +intellectu 0 7 5.010635 0.000000 1847 +dead 0 7 5.010635 0.000000 1840 +foreign 0 7 5.010635 0.000000 1919 +edumi 0 6 5.164786 0.000000 2132 +sponsor 0 6 5.164786 0.000000 2133 +whichi 0 6 5.164786 0.000000 2056 +contract 0 6 5.164786 0.000000 1985 +huttenloch 0 6 5.164786 0.000000 1983 +begun 0 5 5.347108 0.000000 2386 +clarif 0 5 5.347108 0.000000 2253 +medium 0 4 5.568345 0.000000 2834 +transmit 0 4 5.568345 0.000000 2835 +lawyer 0 4 5.568345 0.000000 2836 +evid 0 4 5.568345 0.000000 2768 +isthat 0 4 5.568345 0.000000 2723 +owner 0 3 5.857933 0.000000 3531 +narr 0 3 5.857933 0.000000 3454 +worker 0 2 6.263398 0.000000 4841 +institutejim 0 1 6.957497 0.000000 9330 +davisxerox 0 1 6.957497 0.000000 9331 +corporationphd 0 1 6.957497 0.000000 9332 +improvecommun 0 1 6.957497 0.000000 9333 +andcont 0 1 6.957497 0.000000 9334 +reformat 0 1 6.957497 0.000000 9335 +inhypertext 0 1 6.957497 0.000000 9336 +thecstr 0 1 6.957497 0.000000 9337 +anarpa 0 1 6.957497 0.000000 9338 +moreeasili 0 1 6.957497 0.000000 9339 +electronicsystem 0 1 6.957497 0.000000 9340 +ofor 0 1 6.957497 0.000000 9341 +memoryinclud 0 1 6.957497 0.000000 9342 +sscreenplai 0 1 6.957497 0.000000 9343 +producedth 0 1 6.957497 0.000000 9344 +andjustif 0 1 6.957497 0.000000 9345 +developingcorpor 0 1 6.957497 0.000000 9346 +sharedannot 0 1 6.957497 0.000000 9347 +howpeopl 0 1 6.957497 0.000000 9348 +inelectron 0 1 6.957497 0.000000 9349 +prototypeimplement 0 1 6.957497 0.000000 9350 +shareddocu 0 1 6.957497 0.000000 9351 +nnotat 0 1 6.957497 0.000000 9352 +berequest 0 1 6.957497 0.000000 9353 +orcorrect 0 1 6.957497 0.000000 9354 +aus 0 1 6.957497 0.000000 9355 +willfind 0 1 6.957497 0.000000 9356 +whetherstud 0 1 6.957497 0.000000 9357 +usefulmean 0 1 6.957497 0.000000 9358 +designof 0 1 6.957497 0.000000 9359 +proxi 0 1 6.957497 0.000000 9360 +reliablycarri 0 1 6.957497 0.000000 9361 +toeither 0 1 6.957497 0.000000 9362 +alsopap 0 1 6.957497 0.000000 9363 +publicatiion 0 1 6.957497 0.000000 9364 +thedrimi 0 1 6.957497 0.000000 9365 +meprofession 0 1 6.957497 0.000000 9366 +historythi 0 1 6.957497 0.000000 9367 +improvisationi 0 1 6.957497 0.000000 9368 +resumeno 0 1 6.957497 0.000000 9369 +likeit 0 1 6.957497 0.000000 9370 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..01f8ae14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +person 0 111 2.197225 0.000000 117 +search 1 95 2.397895 2.397895 155 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +result 0 65 2.772589 0.000000 281 +right 0 48 3.044522 0.000000 363 +word 0 34 3.401197 0.000000 508 +relev 0 26 3.688879 0.000000 637 +greg 1 24 3.761200 3.761200 695 +reserv 0 20 3.951244 0.000000 808 +configur 0 15 4.248495 0.000000 1012 +metacrawl 1 10 4.653960 4.653960 1455 +erik 1 8 4.875197 4.875197 1701 +oren 1 6 5.164786 5.164786 2134 +etzioni 0 6 5.164786 0.000000 2135 +selberg 1 5 5.347108 5.347108 2441 +phrase 0 5 5.347108 0.000000 2242 +ahoi 0 3 5.857933 0.000000 3532 +searchingmetacrawlerbi 0 1 6.957497 0.000000 9371 +lauckhartand 0 1 6.957497 0.000000 9372 +etzioniif 0 1 6.957497 0.000000 9373 +wordssort 0 1 6.957497 0.000000 9374 +locationcontrol 0 1 6.957497 0.000000 9375 +problemswebmast 0 1 6.957497 0.000000 9376 +comcopyright 0 1 6.957497 0.000000 9377 +lauckhart 0 1 6.957497 0.000000 9378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..08b77842 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +link 1 247 1.386294 1.386294 24 +wisc 0 242 1.386294 0.000000 33 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +wisconsin 0 169 1.791759 0.000000 54 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +relat 1 139 1.945910 1.945910 68 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +ieee 1 86 2.484907 2.484907 190 +activ 0 84 2.484907 0.000000 182 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +resourc 0 81 2.484907 0.000000 172 +dynam 0 76 2.564949 0.000000 194 +server 0 76 2.564949 0.000000 204 +intellig 0 72 2.639057 0.000000 225 +simul 0 66 2.708050 0.000000 255 +plan 1 65 2.772589 2.772589 272 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +special 0 56 2.890372 0.000000 320 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +frequent 0 49 3.044522 0.000000 367 +effect 0 46 3.091042 0.000000 385 +mechan 0 43 3.178054 0.000000 416 +review 0 42 3.218876 0.000000 425 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +societi 0 40 3.258097 0.000000 456 +seminar 0 38 3.295837 0.000000 470 +robot 2 36 3.367296 6.734592 497 +tech 0 35 3.401197 0.000000 515 +committe 0 34 3.401197 0.000000 522 +human 0 32 3.465736 0.000000 546 +robert 0 30 3.555348 0.000000 567 +ask 0 28 3.610918 0.000000 597 +motion 1 24 3.761200 3.761200 699 +compress 0 23 3.806662 0.000000 719 +director 0 22 3.850148 0.000000 767 +geometri 0 22 3.850148 0.000000 752 +path 0 21 3.912023 0.000000 778 +brief 0 16 4.174387 0.000000 1001 +chuck 0 14 4.317488 0.000000 1108 +nasa 0 13 4.382027 0.000000 1188 +vladimir 0 11 4.553877 0.000000 1324 +sens 0 11 4.553877 0.000000 1305 +errata 0 10 4.653960 0.000000 1403 +dyer 0 9 4.753590 0.000000 1573 +sensit 0 8 4.875197 0.000000 1726 +manufactur 0 8 4.875197 0.000000 1634 +sensor 1 7 5.010635 5.010635 1920 +jude 0 6 5.164786 0.000000 2123 +actuat 0 5 5.347108 0.000000 2442 +shavlik 0 5 5.347108 0.000000 2429 +lumelski 0 4 5.568345 0.000000 2837 +underwat 0 4 5.568345 0.000000 2838 +redund 0 4 5.568345 0.000000 2839 +skin 0 4 5.568345 0.000000 2840 +neil 0 4 5.568345 0.000000 2841 +kinemat 1 3 5.857933 5.857933 3516 +avenuemadison 0 2 6.263398 0.000000 4842 +maze 0 2 6.263398 0.000000 4843 +tether 0 2 6.263398 0.000000 4844 +duffi 0 2 6.263398 0.000000 4845 +lorenz 0 2 6.263398 0.000000 4846 +telerobot 0 2 6.263398 0.000000 4847 +hert 0 2 6.263398 0.000000 4848 +jogger 0 1 6.957497 0.000000 9379 +decentr 0 1 6.957497 0.000000 9380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..48a3e012 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +graduat 0 215 1.386294 0.000000 31 +wisconsin 0 169 1.791759 0.000000 54 +mathemat 1 108 2.197225 2.197225 123 +center 0 88 2.397895 0.000000 158 +ieee 1 86 2.484907 2.484907 190 +institut 0 84 2.484907 0.000000 187 +dept 1 64 2.772589 2.772589 291 +plan 0 65 2.772589 0.000000 272 +colleg 0 61 2.833213 0.000000 300 +mechan 0 43 3.178054 0.000000 416 +autom 0 41 3.218876 0.000000 434 +societi 0 40 3.258097 0.000000 456 +electr 0 38 3.295837 0.000000 461 +robot 1 36 3.367296 3.367296 497 +global 0 34 3.401197 0.000000 520 +tech 0 35 3.401197 0.000000 515 +committe 0 34 3.401197 0.000000 522 +human 0 32 3.465736 0.000000 546 +motion 0 24 3.761200 0.000000 699 +geometri 0 22 3.850148 0.000000 752 +path 0 21 3.912023 0.000000 778 +grant 0 12 4.465908 0.000000 1216 +vladimir 1 11 4.553877 4.553877 1324 +sensit 0 8 4.875197 0.000000 1726 +lumelski 0 4 5.568345 0.000000 2837 +underwat 0 4 5.568345 0.000000 2838 +redund 0 4 5.568345 0.000000 2839 +skin 0 4 5.568345 0.000000 2840 +kinemat 0 3 5.857933 0.000000 3516 +mace 0 2 6.263398 0.000000 4849 +lumelskyprofessormechan 0 1 6.957497 0.000000 9381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..36a92f1d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +public 0 202 1.609438 0.000000 43 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +high 2 130 2.079442 4.158884 101 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +structur 0 106 2.197225 0.000000 105 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +real 1 93 2.397895 2.397895 144 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +messag 1 76 2.564949 2.564949 212 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +method 0 80 2.564949 0.000000 213 +want 0 79 2.564949 0.000000 199 +decemb 0 80 2.564949 0.000000 215 +upson 0 71 2.639057 0.000000 218 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +septemb 1 65 2.772589 2.772589 274 +ithaca 0 65 2.772589 0.000000 294 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +function 0 62 2.772589 0.000000 275 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +think 1 57 2.890372 2.890372 314 +major 0 56 2.890372 0.000000 315 +reason 0 57 2.890372 0.000000 318 +explor 0 58 2.890372 0.000000 324 +adapt 0 46 3.091042 0.000000 387 +done 0 47 3.091042 0.000000 381 +protocol 1 45 3.135494 3.135494 407 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +video 0 44 3.135494 0.000000 405 +mark 0 44 3.135494 0.000000 403 +mechan 1 43 3.178054 3.178054 416 +third 0 43 3.178054 0.000000 412 +small 0 39 3.258097 0.000000 447 +brian 0 38 3.295837 0.000000 466 +respons 0 37 3.332205 0.000000 476 +abl 1 30 3.555348 3.555348 566 +focu 0 30 3.555348 0.000000 571 +cluster 1 28 3.610918 3.610918 612 +pass 0 28 3.610918 0.000000 611 +packag 0 28 3.610918 0.000000 614 +detect 1 26 3.688879 3.688879 646 +bound 0 26 3.688879 0.000000 659 +reliabl 0 25 3.737670 0.000000 674 +reach 0 24 3.761200 0.000000 688 +pattern 0 24 3.761200 0.000000 689 +highli 1 23 3.806662 3.806662 725 +lead 0 23 3.806662 0.000000 718 +sciencecornel 0 22 3.850148 0.000000 768 +deal 0 22 3.850148 0.000000 736 +cooper 0 22 3.850148 0.000000 757 +tell 0 21 3.912023 0.000000 777 +flexibl 0 21 3.912023 0.000000 792 +exploit 0 20 3.951244 0.000000 836 +smith 0 20 3.951244 0.000000 820 +left 1 19 4.007333 4.007333 851 +predict 0 19 4.007333 0.000000 855 +miss 0 19 4.007333 0.000000 866 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +anyon 0 17 4.110874 0.000000 916 +latenc 1 16 4.174387 4.174387 993 +transfer 0 16 4.174387 0.000000 967 +devic 0 16 4.174387 0.000000 1002 +practicum 0 16 4.174387 0.000000 960 +horu 1 14 4.317488 4.317488 1116 +achiev 0 14 4.317488 0.000000 1088 +demand 0 14 4.317488 0.000000 1073 +eicken 1 13 4.382027 4.382027 1134 +thorsten 1 13 4.382027 4.382027 1133 +kenneth 0 12 4.465908 0.000000 1265 +brad 0 12 4.465908 0.000000 1264 +reness 1 11 4.553877 4.553877 1333 +noth 1 11 4.553877 4.553877 1328 +bandwidth 0 11 4.553877 0.000000 1365 +node 0 11 4.553877 0.000000 1326 +werner 2 10 4.653960 9.307920 1385 +guarante 1 10 4.653960 4.653960 1391 +awai 0 10 4.653960 0.000000 1447 +mountain 0 10 4.653960 0.000000 1456 +robbert 1 9 4.753590 4.753590 1529 +birman 1 9 4.753590 4.753590 1531 +desir 0 9 4.753590 0.000000 1542 +deadlin 0 9 4.753590 0.000000 1502 +vogel 2 8 4.875197 9.750394 1622 +sigop 1 8 4.875197 4.875197 1727 +extract 0 8 4.875197 0.000000 1728 +vineet 0 8 4.875197 0.000000 1639 +perfect 0 7 5.010635 0.000000 1921 +gave 0 7 5.010635 0.000000 1922 +synchroni 0 7 5.010635 0.000000 1923 +implementationof 0 7 5.010635 0.000000 1813 +deliv 0 6 5.164786 0.000000 2070 +subsystem 0 6 5.164786 0.000000 2015 +alex 0 6 5.164786 0.000000 2130 +situat 0 5 5.347108 0.000000 2365 +scope 0 5 5.347108 0.000000 2296 +buch 0 5 5.347108 0.000000 2272 +myresearch 0 4 5.568345 0.000000 2842 +behind 0 4 5.568345 0.000000 2610 +basu 0 4 5.568345 0.000000 2843 +hayden 0 4 5.568345 0.000000 2844 +hickei 0 4 5.568345 0.000000 2845 +vaysburd 0 4 5.568345 0.000000 2846 +concert 0 3 5.857933 0.000000 3533 +interfacefor 0 3 5.857933 0.000000 3534 +anindya 0 3 5.857933 0.000000 3535 +copper 0 3 5.857933 0.000000 3536 +glade 0 3 5.857933 0.000000 3537 +takako 0 3 5.857933 0.000000 3538 +amwork 0 2 6.263398 0.000000 4850 +regardless 0 2 6.263398 0.000000 4577 +katherin 0 2 6.263398 0.000000 4851 +dalia 0 2 6.263398 0.000000 4852 +malki 0 2 6.263398 0.000000 4853 +workshopconnamoran 1 1 6.957497 6.957497 9382 +ierland 1 1 6.957497 6.957497 9383 +researchera 0 1 6.957497 0.000000 9384 +halldept 0 1 6.957497 0.000000 9385 +thehorusand 0 1 6.957497 0.000000 9386 +bandwith 0 1 6.957497 0.000000 9387 +horuswith 0 1 6.957497 0.000000 9388 +fallen 0 1 6.957497 0.000000 9389 +latencyfor 0 1 6.957497 0.000000 9390 +protocolsar 0 1 6.957497 0.000000 9391 +structureand 0 1 6.957497 0.000000 9392 +guarant 0 1 6.957497 0.000000 9393 +acur 0 1 6.957497 0.000000 9394 +aglob 0 1 6.957497 0.000000 9395 +supportfailur 0 1 6.957497 0.000000 9396 +suspis 0 1 6.957497 0.000000 9397 +workwith 0 1 6.957497 0.000000 9398 +middlewar 0 1 6.957497 0.000000 9399 +brainchild 0 1 6.957497 0.000000 9400 +andken 0 1 6.957497 0.000000 9401 +withthorsten 0 1 6.957497 0.000000 9402 +horusexperi 0 1 6.957497 0.000000 9403 +lectureson 0 1 6.957497 0.000000 9404 +virtuallysynchron 0 1 6.957497 0.000000 9405 +princpl 0 1 6.957497 0.000000 9406 +hpc 0 1 6.957497 0.000000 9407 +kati 0 1 6.957497 0.000000 9408 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..b86c7d2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 0 293 1.098612 0.000000 17 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +provid 0 121 2.079442 0.000000 94 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +version 1 113 2.197225 2.197225 122 +manag 0 114 2.197225 0.000000 125 +theori 0 111 2.197225 0.000000 127 +code 0 108 2.197225 0.000000 116 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +commun 1 95 2.397895 2.397895 157 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +follow 0 92 2.397895 0.000000 143 +present 0 91 2.397895 0.000000 145 +comment 0 93 2.397895 0.000000 146 +requir 1 81 2.484907 2.484907 167 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +practic 0 70 2.708050 0.000000 246 +java 0 70 2.708050 0.000000 248 +multimedia 0 68 2.708050 0.000000 258 +collect 0 65 2.772589 0.000000 268 +virtual 0 62 2.772589 0.000000 285 +written 0 63 2.772589 0.000000 278 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +overview 0 56 2.890372 0.000000 323 +publish 0 57 2.890372 0.000000 326 +found 1 53 2.944439 2.944439 337 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +scientif 0 53 2.944439 0.000000 341 +much 0 52 2.995732 0.000000 349 +life 1 50 3.044522 3.044522 375 +set 0 50 3.044522 0.000000 361 +standard 0 48 3.044522 0.000000 365 +effect 0 46 3.091042 0.000000 385 +made 1 44 3.135494 3.135494 398 +describ 0 45 3.135494 0.000000 400 +better 0 45 3.135494 0.000000 401 +compani 1 41 3.218876 3.218876 423 +cach 0 41 3.218876 0.000000 432 +origin 0 38 3.295837 0.000000 472 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +purpos 1 37 3.332205 3.332205 481 +cost 0 37 3.332205 0.000000 480 +workstat 0 37 3.332205 0.000000 479 +ofth 0 36 3.367296 0.000000 491 +everi 0 34 3.401197 0.000000 519 +word 0 34 3.401197 0.000000 508 +toler 1 33 3.433987 3.433987 533 +within 0 33 3.433987 0.000000 525 +articl 0 33 3.433987 0.000000 530 +fault 0 32 3.465736 0.000000 547 +independ 0 32 3.465736 0.000000 548 +collabor 0 32 3.465736 0.000000 543 +idea 0 32 3.465736 0.000000 545 +secur 0 30 3.555348 0.000000 577 +exist 0 30 3.555348 0.000000 569 +power 0 30 3.555348 0.000000 573 +platform 0 29 3.583519 0.000000 591 +framework 1 28 3.610918 3.610918 606 +cluster 0 28 3.610918 0.000000 612 +american 0 27 3.637586 0.000000 634 +effort 1 26 3.688879 3.688879 652 +toward 0 25 3.737670 0.000000 668 +never 0 25 3.737670 0.000000 671 +reliabl 0 25 3.737670 0.000000 674 +wish 0 24 3.761200 0.000000 692 +seri 0 24 3.761200 0.000000 708 +initi 0 23 3.806662 0.000000 717 +varieti 0 22 3.850148 0.000000 740 +unit 0 21 3.912023 0.000000 779 +thu 0 21 3.912023 0.000000 773 +born 0 21 3.912023 0.000000 798 +runtim 0 19 4.007333 0.000000 858 +minim 0 18 4.060443 0.000000 887 +earli 0 16 4.174387 0.000000 968 +style 0 15 4.248495 0.000000 1036 +contribut 0 15 4.248495 0.000000 1021 +horu 2 14 4.317488 8.634976 1116 +attribut 0 14 4.317488 0.000000 1092 +coher 0 14 4.317488 0.000000 1109 +becam 0 14 4.317488 0.000000 1117 +whose 1 13 4.382027 4.382027 1166 +bodi 1 13 4.382027 4.382027 1178 +brother 0 13 4.382027 0.000000 1189 +replic 1 12 4.465908 4.465908 1231 +kenneth 1 12 4.465908 4.465908 1265 +robust 0 12 4.465908 0.000000 1271 +evolv 0 12 4.465908 0.000000 1223 +weight 0 12 4.465908 0.000000 1204 +rest 0 12 4.465908 0.000000 1259 +reness 1 11 4.553877 4.553877 1333 +faster 0 11 4.553877 0.000000 1323 +volum 0 11 4.553877 0.000000 1347 +death 1 10 4.653960 4.653960 1457 +modular 0 10 4.653960 0.000000 1392 +modul 0 10 4.653960 0.000000 1434 +length 0 10 4.653960 0.000000 1400 +sentenc 0 10 4.653960 0.000000 1413 +werner 0 10 4.653960 0.000000 1385 +birman 1 9 4.753590 4.753590 1531 +robbert 1 9 4.753590 4.753590 1529 +light 0 9 4.753590 0.000000 1533 +heart 0 8 4.875197 0.000000 1729 +gain 0 8 4.875197 0.000000 1730 +vogel 0 8 4.875197 0.000000 1622 +dead 1 7 5.010635 5.010635 1840 +aris 0 7 5.010635 0.000000 1924 +exactli 0 7 5.010635 0.000000 1817 +synchroni 0 7 5.010635 0.000000 1923 +usabl 0 7 5.010635 0.000000 1810 +conferenc 0 7 5.010635 0.000000 1857 +brought 0 7 5.010635 0.000000 1925 +restrict 0 6 5.164786 0.000000 2129 +outstand 0 6 5.164786 0.000000 2136 +mother 0 6 5.164786 0.000000 2083 +greatest 0 6 5.164786 0.000000 2073 +isi 1 5 5.347108 5.347108 2443 +elsewher 0 5 5.347108 0.000000 2444 +circumst 0 5 5.347108 0.000000 2283 +knew 0 5 5.347108 0.000000 2445 +hair 0 5 5.347108 0.000000 2446 +firm 0 4 5.568345 0.000000 2684 +areavail 0 4 5.568345 0.000000 2810 +projectth 0 3 5.857933 0.000000 3344 +woman 0 3 5.857933 0.000000 3539 +redesign 0 3 5.857933 0.000000 3540 +greatli 0 3 5.857933 0.000000 3541 +child 0 3 5.857933 0.000000 3542 +london 0 3 5.857933 0.000000 3282 +ensembl 1 2 6.263398 6.263398 4854 +oppos 1 2 6.263398 6.263398 4855 +egypt 0 2 6.263398 0.000000 4856 +groupwar 0 2 6.263398 0.000000 4857 +toconstruct 0 2 6.263398 0.000000 4858 +communicationarchitectur 0 2 6.263398 0.000000 4859 +ofreleas 0 2 6.263398 0.000000 4860 +transi 0 2 6.263398 0.000000 4861 +froma 0 2 6.263398 0.000000 4862 +mighti 0 2 6.263398 0.000000 4863 +wing 0 2 6.263398 0.000000 4864 +stir 0 2 6.263398 0.000000 4865 +lament 0 2 6.263398 0.000000 4866 +papersand 0 2 6.263398 0.000000 4867 +silvano 0 2 6.263398 0.000000 4868 +mytholog 0 2 6.263398 0.000000 4869 +court 0 2 6.263398 0.000000 4870 +osiri 1 1 6.957497 6.957497 9409 +egyptian 1 1 6.957497 6.957497 9410 +god 1 1 6.957497 6.957497 9411 +rejoic 1 1 6.957497 6.957497 9412 +groupcommun 1 1 6.957497 6.957497 9413 +triumphant 0 1 6.957497 0.000000 9414 +ofisi 0 1 6.957497 0.000000 9415 +heir 0 1 6.957497 0.000000 9416 +appealedstrongli 0 1 6.957497 0.000000 9417 +becausein 0 1 6.957497 0.000000 9418 +possess 0 1 6.957497 0.000000 9419 +renew 0 1 6.957497 0.000000 9420 +movementa 0 1 6.957497 0.000000 9421 +inact 0 1 6.957497 0.000000 9422 +applicationsbas 0 1 6.957497 0.000000 9423 +infault 0 1 6.957497 0.000000 9424 +thatexploit 0 1 6.957497 0.000000 9425 +theoveral 0 1 6.957497 0.000000 9426 +applicationprotocol 0 1 6.957497 0.000000 9427 +applicationrequir 0 1 6.957497 0.000000 9428 +launch 0 1 6.957497 0.000000 9429 +theisi 0 1 6.957497 0.000000 9430 +robustdistribut 0 1 6.957497 0.000000 9431 +unsuit 0 1 6.957497 0.000000 9432 +asappl 0 1 6.957497 0.000000 9433 +besidesth 0 1 6.957497 0.000000 9434 +usedfor 0 1 6.957497 0.000000 9435 +sametim 0 1 6.957497 0.000000 9436 +lighter 0 1 6.957497 0.000000 9437 +beus 0 1 6.957497 0.000000 9438 +commericalright 0 1 6.957497 0.000000 9439 +manyoth 0 1 6.957497 0.000000 9440 +nofe 0 1 6.957497 0.000000 9441 +ensemblewil 0 1 6.957497 0.000000 9442 +groupwareappl 0 1 6.957497 0.000000 9443 +differentclass 0 1 6.957497 0.000000 9444 +onnext 0 1 6.957497 0.000000 9445 +speedcommun 0 1 6.957497 0.000000 9446 +systemsproject 0 1 6.957497 0.000000 9447 +navtech 0 1 6.957497 0.000000 9448 +stormcast 0 1 6.957497 0.000000 9449 +tacomaproject 0 1 6.957497 0.000000 9450 +thesepag 0 1 6.957497 0.000000 9451 +begotten 0 1 6.957497 0.000000 9452 +sorrow 0 1 6.957497 0.000000 9453 +herhusband 0 1 6.957497 0.000000 9454 +goddess 0 1 6.957497 0.000000 9455 +distress 0 1 6.957497 0.000000 9456 +equippedwith 0 1 6.957497 0.000000 9457 +utter 0 1 6.957497 0.000000 9458 +mighthav 0 1 6.957497 0.000000 9459 +secret 0 1 6.957497 0.000000 9460 +suckl 0 1 6.957497 0.000000 9461 +rear 0 1 6.957497 0.000000 9462 +horusvisit 0 1 6.957497 0.000000 9463 +abstractpag 0 1 6.957497 0.000000 9464 +relatedto 0 1 6.957497 0.000000 9465 +maffei 0 1 6.957497 0.000000 9466 +flexiblegroup 0 1 6.957497 0.000000 9467 +hyme 0 1 6.957497 0.000000 9468 +osirisfrom 0 1 6.957497 0.000000 9469 +papyru 0 1 6.957497 0.000000 9470 +walli 0 1 6.957497 0.000000 9471 +budg 0 1 6.957497 0.000000 9472 +studiesin 0 1 6.957497 0.000000 9473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..ac74f31d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +read 0 154 1.791759 0.000000 47 +report 1 131 2.079442 2.079442 92 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +peopl 1 96 2.302585 2.302585 132 +technic 0 100 2.302585 0.000000 140 +comment 0 93 2.397895 0.000000 146 +info 1 85 2.484907 2.484907 176 +academ 0 82 2.484907 0.000000 178 +activ 0 84 2.484907 0.000000 182 +know 1 80 2.564949 2.564949 198 +server 1 76 2.564949 2.564949 204 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +free 0 73 2.639057 0.000000 224 +degre 0 69 2.708050 0.000000 259 +locat 0 59 2.833213 0.000000 303 +faculti 1 56 2.890372 2.890372 325 +semest 0 58 2.890372 0.000000 312 +direct 0 57 2.890372 0.000000 316 +undergradu 0 54 2.944439 0.000000 338 +standard 0 48 3.044522 0.000000 365 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +annual 1 40 3.258097 3.258097 458 +feel 0 37 3.332205 0.000000 483 +staff 0 36 3.367296 0.000000 490 +tech 0 35 3.401197 0.000000 515 +go 1 33 3.433987 3.433987 529 +taught 0 33 3.433987 0.000000 526 +collabor 0 32 3.465736 0.000000 543 +team 0 27 3.637586 0.000000 625 +doctor 0 24 3.761200 0.000000 709 +brows 0 23 3.806662 0.000000 726 +size 0 23 3.806662 0.000000 713 +offici 0 18 4.060443 0.000000 894 +anonym 0 14 4.317488 0.000000 1100 +outstand 0 6 5.164786 0.000000 2136 +disclaim 0 4 5.568345 0.000000 2847 +projector 0 3 5.857933 0.000000 3409 +universitydepart 0 2 6.263398 0.000000 4871 +infoget 0 1 6.957497 0.000000 9474 +contactswithin 0 1 6.957497 0.000000 9475 +facultyfind 0 1 6.957497 0.000000 9476 +ortheir 0 1 6.957497 0.000000 9477 +researchcheck 0 1 6.957497 0.000000 9478 +aboutour 0 1 6.957497 0.000000 9479 +publicationsfind 0 1 6.957497 0.000000 9480 +researcherseith 0 1 6.957497 0.000000 9481 +degreeslook 0 1 6.957497 0.000000 9482 +orundergradu 0 1 6.957497 0.000000 9483 +academicsrefer 0 1 6.957497 0.000000 9484 +webfor 0 1 6.957497 0.000000 9485 +generalcoursedescript 0 1 6.957497 0.000000 9486 +peopleget 0 1 6.957497 0.000000 9487 +directorylist 0 1 6.957497 0.000000 9488 +activitiesfind 0 1 6.957497 0.000000 9489 +theassoci 0 1 6.957497 0.000000 9490 +excellenthockei 0 1 6.957497 0.000000 9491 +serverscheck 0 1 6.957497 0.000000 9492 +gopherserv 0 1 6.957497 0.000000 9493 +ftpserver 0 1 6.957497 0.000000 9494 +sitesquest 0 1 6.957497 0.000000 9495 +informationpres 0 1 6.957497 0.000000 9496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..0e49ac7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +model 0 145 1.945910 0.000000 69 +analysi 1 124 2.079442 2.079442 98 +document 1 121 2.079442 2.079442 89 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +seattl 1 120 2.079442 2.079442 103 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +pleas 0 113 2.197225 0.000000 114 +text 3 98 2.302585 6.907755 133 +user 1 104 2.302585 2.302585 137 +access 0 102 2.302585 0.000000 136 +search 1 95 2.397895 2.397895 155 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +associ 1 93 2.397895 2.397895 151 +section 0 94 2.397895 0.000000 149 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +larg 1 82 2.484907 2.484907 168 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +environ 0 84 2.484907 0.000000 177 +institut 0 84 2.484907 0.000000 187 +june 1 79 2.564949 2.564949 214 +method 0 80 2.564949 0.000000 213 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +nation 1 74 2.639057 2.639057 240 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +meet 0 72 2.639057 0.000000 229 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +free 0 73 2.639057 0.000000 224 +main 1 67 2.708050 2.708050 256 +differ 1 66 2.708050 2.708050 253 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +degre 0 69 2.708050 0.000000 259 +test 0 66 2.708050 0.000000 252 +multimedia 0 68 2.708050 0.000000 258 +collect 1 65 2.772589 2.772589 268 +import 0 65 2.772589 0.000000 282 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +copi 0 63 2.772589 0.000000 284 +automat 2 61 2.833213 5.666426 306 +colleg 0 61 2.833213 0.000000 300 +unix 0 58 2.890372 0.000000 308 +publish 0 57 2.890372 0.000000 326 +sever 0 56 2.890372 0.000000 322 +faculti 0 56 2.890372 0.000000 325 +extens 0 53 2.944439 0.000000 340 +cover 0 55 2.944439 0.000000 329 +februari 0 54 2.944439 0.000000 328 +without 0 50 3.044522 0.000000 370 +approach 0 48 3.044522 0.000000 366 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +natur 0 44 3.135494 0.000000 406 +made 0 44 3.135494 0.000000 398 +third 0 43 3.178054 0.000000 412 +around 0 43 3.178054 0.000000 415 +york 1 41 3.218876 3.218876 435 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +annual 1 40 3.258097 3.258097 458 +continu 0 39 3.258097 0.000000 448 +transact 0 39 3.258097 0.000000 438 +microsoft 0 38 3.295837 0.000000 468 +purpos 1 37 3.332205 3.332205 481 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +respons 0 37 3.332205 0.000000 476 +word 0 34 3.401197 0.000000 508 +statist 0 35 3.401197 0.000000 521 +represent 0 35 3.401197 0.000000 512 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +queri 1 33 3.433987 3.433987 524 +articl 0 33 3.433987 0.000000 530 +express 0 32 3.465736 0.000000 540 +storag 0 31 3.496508 0.000000 553 +exist 1 30 3.555348 3.555348 569 +abl 0 30 3.555348 0.000000 566 +semant 0 29 3.583519 0.000000 587 +full 1 28 3.610918 3.610918 615 +progress 0 28 3.610918 0.000000 598 +retriev 2 27 3.637586 7.275172 621 +determin 1 27 3.637586 3.637586 630 +subject 1 26 3.688879 3.688879 647 +consist 1 26 3.688879 3.688879 651 +store 0 24 3.761200 0.000000 693 +handl 0 24 3.761200 0.000000 685 +size 0 23 3.806662 0.000000 713 +brows 0 23 3.806662 0.000000 726 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +flexibl 0 21 3.912023 0.000000 792 +corpor 0 21 3.912023 0.000000 802 +department 0 20 3.951244 0.000000 839 +hypertext 1 19 4.007333 4.007333 865 +media 0 19 4.007333 0.000000 861 +item 0 19 4.007333 0.000000 856 +appropri 0 18 4.060443 0.000000 883 +germani 1 17 4.110874 4.110874 946 +expand 0 17 4.110874 0.000000 928 +analyz 0 17 4.110874 0.000000 925 +moor 0 17 4.110874 0.000000 936 +vector 0 16 4.174387 0.000000 961 +capabl 1 15 4.248495 4.248495 1016 +piec 0 15 4.248495 0.000000 1020 +demand 0 14 4.317488 0.000000 1073 +conduct 0 14 4.317488 0.000000 1065 +rank 0 14 4.317488 0.000000 1063 +context 0 13 4.382027 0.000000 1153 +station 0 13 4.382027 0.000000 1157 +denis 1 12 4.465908 4.465908 1255 +readabl 0 12 4.465908 0.000000 1258 +optic 0 12 4.465908 0.000000 1221 +hypermedia 0 12 4.465908 0.000000 1247 +realiti 0 12 4.465908 0.000000 1272 +pageif 0 12 4.465908 0.000000 1275 +smart 1 11 4.553877 4.553877 1352 +refin 0 11 4.553877 0.000000 1363 +probabilist 0 11 4.553877 0.000000 1343 +rapid 0 10 4.653960 0.000000 1453 +paragraph 0 10 4.653960 0.000000 1449 +sentenc 0 10 4.653960 0.000000 1413 +equip 0 10 4.653960 0.000000 1459 +criteria 0 9 4.753590 0.000000 1477 +charg 0 9 4.753590 0.000000 1582 +hundr 0 9 4.753590 0.000000 1528 +mass 0 8 4.875197 0.000000 1732 +formul 0 8 4.875197 0.000000 1733 +matter 0 8 4.875197 0.000000 1627 +colloquium 0 8 4.875197 0.000000 1734 +harvard 0 7 5.010635 0.000000 1926 +densiti 0 7 5.010635 0.000000 1927 +vehicl 0 7 5.010635 0.000000 1928 +sparc 0 7 5.010635 0.000000 1860 +maryland 1 6 5.164786 5.164786 2140 +furthermor 0 6 5.164786 0.000000 2141 +restrict 0 6 5.164786 0.000000 2129 +ohio 1 5 5.347108 5.347108 2447 +corpu 1 5 5.347108 5.347108 2282 +medicin 1 5 5.347108 5.347108 2448 +eduph 0 5 5.347108 0.000000 2449 +accuraci 0 5 5.347108 0.000000 2450 +feder 0 5 5.347108 0.000000 2266 +travers 0 5 5.347108 0.000000 2363 +allan 1 4 5.568345 5.568345 2849 +rapidli 1 4 5.568345 5.568345 2850 +machineri 1 4 5.568345 5.568345 2851 +termin 0 4 5.568345 0.000000 2852 +ireland 0 4 5.568345 0.000000 2853 +sophist 1 3 5.857933 5.857933 3545 +genom 1 3 5.857933 5.857933 3546 +trec 0 3 5.857933 0.000000 3547 +gigabyt 0 3 5.857933 0.000000 3548 +activitiesmemb 0 3 5.857933 0.000000 3549 +zurich 0 3 5.857933 0.000000 3550 +switzerland 0 3 5.857933 0.000000 3551 +vega 0 3 5.857933 0.000000 3450 +softwareth 0 3 5.857933 0.000000 3552 +sigir 1 2 6.263398 6.263398 4873 +bucklei 1 2 6.263398 6.263398 4874 +nevada 1 2 6.263398 6.263398 4875 +gerard 0 2 6.263398 0.000000 4876 +decreas 0 2 6.263398 0.000000 4877 +absenc 0 2 6.263398 0.000000 4878 +unrestrict 0 2 6.263398 0.000000 4879 +excerpt 0 2 6.263398 0.000000 4880 +activitiesassoci 0 2 6.263398 0.000000 4881 +systemsprogram 0 2 6.263398 0.000000 4882 +dublin 0 2 6.263398 0.000000 4883 +moscow 0 2 6.263398 0.000000 4884 +encyclopedia 1 1 6.957497 6.957497 9505 +bethesda 1 1 6.957497 6.957497 9506 +columbu 1 1 6.957497 6.957497 9507 +saltongerard 0 1 6.957497 0.000000 9508 +saltonprofessorg 0 1 6.957497 0.000000 9509 +cheapli 0 1 6.957497 0.000000 9510 +funk 0 1 6.957497 0.000000 9511 +wagnal 0 1 6.957497 0.000000 9512 +committeeprofession 0 1 6.957497 0.000000 9513 +seventeenth 0 1 6.957497 0.000000 9514 +darmstadt 0 1 6.957497 0.000000 9515 +lecturesautomat 0 1 6.957497 0.000000 9516 +konstanz 0 1 6.957497 0.000000 9517 +asi 0 1 6.957497 0.000000 9518 +publicationsapproach 0 1 6.957497 0.000000 9519 +passag 0 1 6.957497 0.000000 9520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..5288b707 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +includ 1 208 1.609438 1.609438 42 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +problem 1 147 1.945910 1.945910 75 +confer 1 126 2.079442 2.079442 100 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +world 0 115 2.197225 0.000000 126 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +solv 1 73 2.639057 2.639057 234 +appli 0 71 2.639057 0.000000 226 +august 0 66 2.708050 0.000000 257 +function 0 62 2.772589 0.000000 275 +scientif 1 53 2.944439 2.944439 341 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +numer 0 49 3.044522 0.000000 369 +approach 0 48 3.044522 0.000000 366 +annual 0 40 3.258097 0.000000 458 +china 0 37 3.332205 0.000000 487 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +scientist 1 31 3.496508 3.496508 560 +exist 0 30 3.555348 0.000000 569 +chines 1 29 3.583519 3.583519 595 +consid 0 29 3.583519 0.000000 590 +scale 0 28 3.610918 0.000000 613 +subject 1 26 3.688879 3.688879 647 +bound 1 26 3.688879 3.688879 659 +enhanc 0 26 3.688879 0.000000 644 +siam 0 21 3.912023 0.000000 800 +department 0 20 3.951244 0.000000 839 +region 1 19 4.007333 4.007333 875 +beij 0 19 4.007333 0.000000 876 +minim 1 18 4.060443 4.060443 887 +accept 0 18 4.060443 0.000000 879 +moor 0 17 4.110874 0.000000 936 +young 1 16 4.174387 4.174387 991 +condit 0 16 4.174387 0.000000 975 +reflect 0 15 4.248495 0.000000 1034 +nonlinear 1 14 4.317488 4.317488 1107 +denis 1 12 4.465908 4.465908 1255 +pageif 0 12 4.465908 0.000000 1275 +penalti 0 10 4.653960 0.000000 1405 +trust 1 9 4.753590 4.753590 1583 +exact 0 9 4.753590 0.000000 1509 +converg 1 7 5.010635 5.010635 1844 +constrain 1 6 5.164786 5.164786 2042 +eduph 0 5 5.347108 0.000000 2449 +affin 0 5 5.347108 0.000000 2378 +interior 0 5 5.347108 0.000000 2439 +waterloo 0 3 5.857933 0.000000 3523 +unconstrain 0 2 6.263398 0.000000 4499 +publicationsa 0 2 6.263398 0.000000 4885 +nonlinearli 1 1 6.957497 6.957497 9521 +yui 0 1 6.957497 0.000000 9522 +liyui 0 1 6.957497 0.000000 9523 +liresearch 0 1 6.957497 0.000000 9524 +associateyui 0 1 6.957497 0.000000 9525 +lecturesan 0 1 6.957497 0.000000 9526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..108d07a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +algorithm 1 162 1.791759 1.791759 57 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +studi 1 120 2.079442 2.079442 91 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +machin 0 129 2.079442 0.000000 95 +introduct 0 126 2.079442 0.000000 87 +report 0 131 2.079442 0.000000 92 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +code 1 108 2.197225 2.197225 116 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +present 1 91 2.397895 2.397895 145 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +proceed 0 93 2.397895 0.000000 152 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 1 83 2.484907 2.484907 183 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +orient 1 80 2.564949 2.564949 205 +method 0 80 2.564949 0.000000 213 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +workshop 1 71 2.639057 2.639057 239 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +januari 1 62 2.772589 2.772589 264 +function 1 62 2.772589 2.772589 275 +septemb 1 65 2.772589 2.772589 274 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +special 0 56 2.890372 0.000000 320 +space 0 57 2.890372 0.000000 310 +reason 0 57 2.890372 0.000000 318 +publish 0 57 2.890372 0.000000 326 +scientif 1 53 2.944439 2.944439 341 +allow 0 53 2.944439 0.000000 333 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +principl 0 48 3.044522 0.000000 357 +done 0 47 3.091042 0.000000 381 +effect 0 46 3.091042 0.000000 385 +algebra 1 45 3.135494 3.135494 394 +mechan 0 43 3.178054 0.000000 416 +york 1 41 3.218876 3.218876 435 +autom 0 41 3.218876 0.000000 434 +review 0 42 3.218876 0.000000 425 +societi 1 40 3.258097 3.258097 456 +transact 0 39 3.258097 0.000000 438 +error 0 40 3.258097 0.000000 449 +annual 0 40 3.258097 0.000000 458 +electr 1 38 3.295837 3.295837 461 +correct 0 38 3.295837 0.000000 462 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +extend 0 32 3.465736 0.000000 539 +richard 0 31 3.496508 0.000000 559 +scientist 0 31 3.496508 0.000000 560 +specifi 0 30 3.555348 0.000000 568 +compon 0 30 3.555348 0.000000 570 +common 0 30 3.555348 0.000000 574 +focus 1 29 3.583519 3.583519 584 +symbol 1 27 3.637586 3.637586 620 +american 1 27 3.637586 3.637586 634 +challeng 0 26 3.688879 0.000000 653 +constraint 0 26 3.688879 0.000000 636 +equat 1 23 3.806662 3.806662 724 +toolkit 1 20 3.951244 3.951244 835 +department 0 20 3.951244 0.000000 839 +region 1 19 4.007333 4.007333 875 +north 0 19 4.007333 0.000000 873 +boston 0 19 4.007333 0.000000 862 +lisp 0 18 4.060443 0.000000 897 +differenti 1 17 4.110874 4.110874 921 +layer 0 17 4.110874 0.000000 926 +moor 0 17 4.110874 0.000000 936 +modern 0 16 4.174387 0.000000 966 +vector 0 16 4.174387 0.000000 961 +susan 1 15 4.248495 4.248495 1050 +polynomi 1 14 4.317488 4.317488 1069 +dean 1 14 4.317488 4.317488 1104 +massachusett 0 14 4.317488 0.000000 1118 +joint 1 13 4.382027 4.382027 1130 +convert 0 13 4.382027 0.000000 1122 +opportun 0 13 4.382027 0.000000 1161 +denis 1 12 4.465908 4.465908 1255 +calcul 0 12 4.465908 0.000000 1268 +deduct 0 12 4.465908 0.000000 1236 +pageif 0 12 4.465908 0.000000 1275 +israel 1 11 4.553877 4.553877 1366 +vista 1 10 4.653960 4.653960 1452 +matric 0 10 4.653960 0.000000 1399 +decomposit 0 10 4.653960 0.000000 1439 +modular 0 10 4.653960 0.000000 1392 +factor 0 9 4.753590 0.000000 1544 +ring 0 8 4.875197 0.000000 1684 +ideal 0 8 4.875197 0.000000 1630 +aris 0 7 5.010635 0.000000 1924 +boundari 0 7 5.010635 0.000000 1929 +pursu 0 7 5.010635 0.000000 1902 +refere 0 7 5.010635 0.000000 1895 +interpol 0 7 5.010635 0.000000 1823 +rubinfeld 1 6 5.164786 5.164786 1998 +carolina 0 6 5.164786 0.000000 2142 +kluwer 0 6 5.164786 0.000000 2143 +fluid 1 5 5.347108 5.347108 2440 +ronitt 1 5 5.347108 5.347108 2265 +eduph 0 5 5.347108 0.000000 2449 +colleagu 0 5 5.347108 0.000000 2304 +ration 0 5 5.347108 0.000000 2427 +synthes 0 5 5.347108 0.000000 2451 +suni 0 5 5.347108 0.000000 2452 +weyl 1 4 5.568345 5.568345 2854 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +technion 1 4 5.568345 5.568345 2856 +suppli 0 4 5.568345 0.000000 2611 +substrat 0 4 5.568345 0.000000 2857 +weizmann 0 4 5.568345 0.000000 2858 +syracus 1 3 5.857933 5.857933 3553 +haifa 1 3 5.857933 5.857933 3554 +aerospac 0 3 5.857933 0.000000 3555 +durham 0 3 5.857933 0.000000 3279 +dawson 1 2 6.263398 6.263398 4886 +microstorag 1 2 6.263398 6.263398 4887 +activitieseditori 0 2 6.263398 0.000000 4888 +softwareprogram 0 2 6.263398 0.000000 4889 +irreduc 0 2 6.263398 0.000000 4890 +rehovot 0 2 6.263398 0.000000 4891 +albani 0 2 6.263398 0.000000 4892 +multivari 0 2 6.263398 0.000000 4151 +landau 1 1 6.957497 6.957497 9527 +zippelrichard 0 1 6.957497 0.000000 9528 +zippelsenior 0 1 6.957497 0.000000 9529 +associaterz 0 1 6.957497 0.000000 9530 +computationlecturesalgebra 0 1 6.957497 0.000000 9531 +publicationseffect 0 1 6.957497 0.000000 9532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..f122a038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +languag 0 227 1.386294 0.000000 26 +group 1 183 1.609438 1.609438 36 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +distribut 2 162 1.791759 3.583518 51 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +support 1 132 1.945910 1.945910 83 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +provid 0 121 2.079442 0.000000 94 +report 0 131 2.079442 0.000000 92 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +refer 0 78 2.564949 0.000000 203 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +practic 1 70 2.708050 2.708050 246 +goal 0 66 2.708050 0.000000 250 +multimedia 0 68 2.708050 0.000000 258 +integr 0 67 2.708050 0.000000 245 +virtual 1 62 2.772589 2.772589 285 +import 0 65 2.772589 0.000000 282 +collect 0 65 2.772589 0.000000 268 +polici 0 64 2.772589 0.000000 279 +reason 0 57 2.890372 0.000000 318 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +faculti 0 56 2.890372 0.000000 325 +extens 1 53 2.944439 2.944439 340 +much 0 52 2.995732 0.000000 349 +profession 0 51 2.995732 0.000000 345 +set 0 50 3.044522 0.000000 361 +basic 0 50 3.044522 0.000000 360 +approach 0 48 3.044522 0.000000 366 +california 1 46 3.091042 3.091042 388 +featur 1 46 3.091042 3.091042 386 +done 0 47 3.091042 0.000000 381 +execut 0 45 3.135494 0.000000 404 +mark 0 44 3.135494 0.000000 403 +protocol 0 45 3.135494 0.000000 407 +combin 0 42 3.218876 0.000000 421 +press 0 42 3.218876 0.000000 419 +theoret 0 39 3.258097 0.000000 446 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +annual 0 40 3.258097 0.000000 458 +origin 0 38 3.295837 0.000000 472 +brian 0 38 3.295837 0.000000 466 +purpos 0 37 3.332205 0.000000 481 +committe 1 34 3.401197 3.401197 522 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +toler 1 33 3.433987 3.433987 533 +within 0 33 3.433987 0.000000 525 +fault 1 32 3.465736 3.465736 547 +collabor 1 32 3.465736 3.465736 543 +idea 0 32 3.465736 0.000000 545 +ad 0 32 3.465736 0.000000 544 +scientist 0 31 3.496508 0.000000 560 +secur 1 30 3.555348 3.555348 577 +focu 0 30 3.555348 0.000000 571 +option 0 30 3.555348 0.000000 575 +specifi 0 30 3.555348 0.000000 568 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +univ 0 28 3.610918 0.000000 617 +intend 0 28 3.610918 0.000000 599 +actual 0 28 3.610918 0.000000 604 +packag 0 28 3.610918 0.000000 614 +effort 1 26 3.688879 3.688879 652 +consist 1 26 3.688879 3.688879 651 +berkelei 0 26 3.688879 0.000000 657 +reliabl 1 25 3.737670 3.737670 674 +concern 0 25 3.737670 0.000000 666 +although 0 25 3.737670 0.000000 667 +supercomput 0 25 3.737670 0.000000 681 +fundament 0 25 3.737670 0.000000 661 +aspect 0 25 3.737670 0.000000 663 +mike 0 24 3.761200 0.000000 703 +head 1 23 3.806662 3.806662 732 +cooper 0 22 3.850148 0.000000 757 +properti 0 22 3.850148 0.000000 749 +flexibl 0 21 3.912023 0.000000 792 +toolkit 1 20 3.951244 3.951244 835 +exploit 0 20 3.951244 0.000000 836 +smith 0 20 3.951244 0.000000 820 +department 0 20 3.951244 0.000000 839 +prove 1 19 4.007333 4.007333 848 +runtim 0 19 4.007333 0.000000 858 +failur 0 18 4.060443 0.000000 898 +speed 0 18 4.060443 0.000000 911 +element 0 18 4.060443 0.000000 895 +layer 1 17 4.110874 4.110874 926 +seek 0 17 4.110874 0.000000 954 +former 0 17 4.110874 0.000000 956 +moor 0 17 4.110874 0.000000 936 +upon 0 16 4.174387 0.000000 978 +permit 0 16 4.174387 0.000000 962 +critic 0 16 4.174387 0.000000 982 +side 1 15 4.248495 4.248495 1022 +action 0 15 4.248495 0.000000 1038 +horu 2 14 4.317488 8.634976 1116 +becam 0 14 4.317488 0.000000 1117 +coordin 0 13 4.382027 0.000000 1182 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +denis 1 12 4.465908 4.465908 1255 +kenneth 0 12 4.465908 0.000000 1265 +replic 0 12 4.465908 0.000000 1231 +robust 0 12 4.465908 0.000000 1271 +infrastructur 0 12 4.465908 0.000000 1234 +pageif 0 12 4.465908 0.000000 1275 +reness 1 11 4.553877 4.553877 1333 +broad 0 11 4.553877 0.000000 1302 +probabilist 0 11 4.553877 0.000000 1343 +primit 0 11 4.553877 0.000000 1317 +certain 0 10 4.653960 0.000000 1393 +nuprl 0 10 4.653960 0.000000 1402 +guarante 0 10 4.653960 0.000000 1391 +werner 0 10 4.653960 0.000000 1385 +unusu 0 9 4.753590 0.000000 1566 +latter 0 9 4.753590 0.000000 1522 +correctli 0 9 4.753590 0.000000 1478 +robbert 0 9 4.753590 0.000000 1529 +birman 0 9 4.753590 0.000000 1531 +vogel 0 8 4.875197 0.000000 1622 +synchroni 1 7 5.010635 5.010635 1923 +friedman 1 7 5.010635 5.010635 1886 +chief 1 7 5.010635 5.010635 1829 +privaci 1 6 5.164786 5.164786 2144 +emerg 0 6 5.164786 0.000000 2038 +recruit 0 6 5.164786 0.000000 2145 +isi 1 5 5.347108 5.347108 2443 +notabl 1 5 5.347108 5.347108 2276 +broadcast 0 5 5.347108 0.000000 2453 +activitieseditor 0 5 5.347108 0.000000 2454 +popular 0 4 5.568345 0.000000 2802 +hayden 0 4 5.568345 0.000000 2844 +publicationsth 0 4 5.568345 0.000000 2859 +reconfigur 1 3 5.857933 5.857933 3556 +leverag 0 3 5.857933 0.000000 3153 +embodi 0 3 5.857933 0.000000 3236 +reiter 0 3 5.857933 0.000000 3461 +constabl 0 3 5.857933 0.000000 3186 +act 0 3 5.857933 0.000000 3557 +leadership 0 3 5.857933 0.000000 3320 +alamito 0 3 5.857933 0.000000 3558 +glade 0 3 5.857933 0.000000 3537 +benign 0 2 6.263398 0.000000 4893 +activitieschair 0 2 6.263398 0.000000 4894 +isat 0 2 6.263398 0.000000 4895 +birmankenneth 0 1 6.957497 0.000000 9533 +birmanprofessorphd 0 1 6.957497 0.000000 9534 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..2db42924 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 2 145 1.945910 3.891820 69 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +technolog 1 131 2.079442 2.079442 102 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +topic 1 114 2.197225 2.197225 110 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +graphic 2 90 2.397895 4.795790 147 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +environ 1 84 2.484907 2.484907 177 +institut 1 84 2.484907 2.484907 187 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +novemb 0 81 2.484907 0.000000 179 +method 1 80 2.564949 2.564949 213 +dynam 0 76 2.564949 0.000000 194 +nation 1 74 2.639057 2.639057 240 +involv 1 71 2.639057 2.639057 227 +effici 0 73 2.639057 0.000000 233 +differ 0 66 2.708050 0.000000 253 +complex 1 64 2.772589 2.772589 269 +foundat 1 62 2.772589 2.772589 286 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +creat 0 63 2.772589 0.000000 277 +laboratori 0 63 2.772589 0.000000 292 +interact 0 62 2.772589 0.000000 270 +improv 0 62 2.772589 0.000000 289 +scientif 1 53 2.944439 2.944439 341 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +visual 1 48 3.044522 3.044522 372 +california 0 46 3.091042 0.000000 388 +made 0 44 3.135494 0.000000 398 +past 0 42 3.218876 0.000000 428 +annual 0 40 3.258097 0.000000 458 +multi 0 36 3.367296 0.000000 493 +within 0 33 3.433987 0.000000 525 +board 0 33 3.433987 0.000000 528 +anim 0 31 3.496508 0.000000 557 +rang 0 30 3.555348 0.000000 565 +focu 0 30 3.555348 0.000000 571 +particip 0 29 3.583519 0.000000 589 +progress 0 28 3.610918 0.000000 598 +determin 0 27 3.637586 0.000000 630 +american 0 27 3.637586 0.000000 634 +constraint 0 26 3.688879 0.000000 636 +strategi 1 25 3.737670 3.737670 682 +trace 0 25 3.737670 0.000000 677 +hill 0 25 3.737670 0.000000 670 +fellow 1 24 3.761200 3.761200 701 +motion 0 24 3.761200 0.000000 699 +displai 1 23 3.806662 3.806662 712 +input 1 23 3.806662 3.806662 727 +director 1 22 3.850148 3.850148 767 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +geometri 0 22 3.850148 0.000000 752 +flexibl 0 21 3.912023 0.000000 792 +synthesi 0 20 3.951244 0.000000 834 +facil 0 20 3.951244 0.000000 814 +department 0 20 3.951244 0.000000 839 +geometr 0 19 4.007333 0.000000 852 +media 0 19 4.007333 0.000000 861 +north 0 19 4.007333 0.000000 873 +dimension 0 18 4.060443 0.000000 909 +medic 1 17 4.110874 4.110874 958 +previous 0 17 4.110874 0.000000 923 +render 0 17 4.110874 0.000000 947 +moor 0 17 4.110874 0.000000 936 +spatial 0 16 4.174387 0.000000 988 +brown 0 16 4.174387 0.000000 977 +reflect 1 15 4.248495 4.248495 1034 +micro 0 15 4.248495 0.000000 1031 +conduct 1 14 4.317488 4.317488 1065 +denis 1 12 4.465908 4.465908 1255 +pageif 0 12 4.465908 0.000000 1275 +host 0 11 4.553877 0.000000 1306 +volum 0 11 4.553877 0.000000 1347 +modular 0 10 4.653960 0.000000 1392 +surfac 1 9 4.753590 4.753590 1574 +donald 0 9 4.753590 0.000000 1510 +routin 0 9 4.753590 0.000000 1549 +tempor 0 9 4.753590 0.000000 1584 +light 0 9 4.753590 0.000000 1533 +utah 0 9 4.753590 0.000000 1585 +realist 1 8 4.875197 4.875197 1665 +polygon 0 8 4.875197 0.000000 1723 +textur 0 8 4.875197 0.000000 1677 +academi 0 8 4.875197 0.000000 1735 +clip 0 7 5.010635 0.000000 1868 +parametr 0 7 5.010635 0.000000 1819 +suffici 0 7 5.010635 0.000000 1897 +core 0 7 5.010635 0.000000 1809 +hidden 0 6 5.164786 0.000000 1987 +photographi 0 6 5.164786 0.000000 2146 +carolina 0 6 5.164786 0.000000 2142 +biolog 0 6 5.164786 0.000000 2147 +decad 0 5 5.347108 0.000000 2455 +testb 0 5 5.347108 0.000000 2456 +anti 0 5 5.347108 0.000000 2434 +chapel 0 5 5.347108 0.000000 2457 +jacob 0 4 5.568345 0.000000 2667 +gould 0 3 5.857933 0.000000 3559 +alias 1 2 6.263398 6.263398 4823 +activitieseditori 0 2 6.263398 0.000000 4888 +greenbergdonald 0 1 6.957497 0.000000 9535 +greenberg 0 1 6.957497 0.000000 9536 +schurman 0 1 6.957497 0.000000 9537 +perceptu 0 1 6.957497 0.000000 9538 +activitiesdirector 0 1 6.957497 0.000000 9539 +visualizationprofession 0 1 6.957497 0.000000 9540 +acmreturn 0 1 6.957497 0.000000 9541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..8bb89262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +report 0 131 2.079442 0.000000 92 +intern 1 108 2.197225 2.197225 128 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +structur 0 106 2.197225 0.000000 105 +world 0 115 2.197225 0.000000 126 +pleas 0 113 2.197225 0.000000 114 +need 1 98 2.302585 2.302585 135 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +associ 1 93 2.397895 2.397895 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +journal 2 83 2.484907 4.969814 183 +resourc 1 81 2.484907 2.484907 172 +member 1 84 2.484907 2.484907 165 +novemb 1 81 2.484907 2.484907 179 +institut 0 84 2.484907 0.000000 187 +requir 0 81 2.484907 0.000000 167 +school 0 84 2.484907 0.000000 188 +chang 0 82 2.484907 0.000000 163 +state 0 76 2.564949 0.000000 207 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +solv 1 73 2.639057 2.639057 234 +nation 1 74 2.639057 2.639057 240 +logic 0 71 2.639057 0.000000 230 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +degre 0 69 2.708050 0.000000 259 +august 0 66 2.708050 0.000000 257 +complex 2 64 2.772589 5.545178 269 +foundat 1 62 2.772589 2.772589 286 +guid 0 63 2.772589 0.000000 267 +interact 0 62 2.772589 0.000000 270 +visit 0 63 2.772589 0.000000 288 +variou 1 56 2.890372 2.890372 317 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +februari 1 54 2.944439 2.944439 328 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +advisor 0 51 2.995732 0.000000 355 +electron 1 47 3.091042 3.091042 379 +california 0 46 3.091042 0.000000 388 +understand 0 47 3.091042 0.000000 384 +physic 0 47 3.091042 0.000000 377 +natur 1 44 3.135494 3.135494 406 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +futur 0 41 3.218876 0.000000 427 +theoret 1 39 3.258097 3.258097 446 +annual 0 40 3.258097 0.000000 458 +committe 1 34 3.401197 3.401197 522 +random 1 34 3.401197 3.401197 511 +award 1 34 3.401197 3.401197 523 +return 0 34 3.401197 0.000000 502 +board 1 33 3.433987 3.433987 528 +within 0 33 3.433987 0.000000 525 +given 0 32 3.465736 0.000000 538 +richard 0 31 3.496508 0.000000 559 +hard 0 30 3.555348 0.000000 563 +art 0 29 3.583519 0.000000 593 +american 1 27 3.637586 3.637586 634 +consist 0 26 3.688879 0.000000 651 +bound 0 26 3.688879 0.000000 659 +fellow 1 24 3.761200 3.761200 701 +seri 1 24 3.761200 3.761200 708 +doctor 0 24 3.761200 0.000000 709 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +sequenti 0 22 3.850148 0.000000 745 +director 0 22 3.850148 0.000000 767 +siam 0 21 3.912023 0.000000 800 +divis 0 21 3.912023 0.000000 803 +exploit 0 20 3.951244 0.000000 836 +department 0 20 3.951244 0.000000 839 +walter 0 17 4.110874 0.000000 950 +germani 0 17 4.110874 0.000000 946 +moor 0 17 4.110874 0.000000 936 +georg 0 16 4.174387 0.000000 994 +brown 0 16 4.174387 0.000000 977 +contribut 0 15 4.248495 0.000000 1021 +topolog 0 14 4.317488 0.000000 1089 +essenti 0 13 4.382027 0.000000 1137 +denis 1 12 4.465908 4.465908 1255 +grow 0 12 4.465908 0.000000 1209 +amount 0 12 4.465908 0.000000 1208 +speech 0 12 4.465908 0.000000 1222 +weight 0 12 4.465908 0.000000 1204 +pageif 0 12 4.465908 0.000000 1275 +council 1 11 4.553877 4.553877 1364 +distinguish 1 11 4.553877 4.553877 1357 +rice 0 11 4.553877 0.000000 1336 +govern 0 9 4.753590 0.000000 1581 +classifi 0 9 4.753590 0.000000 1537 +classif 0 9 4.753590 0.000000 1586 +telecommun 0 9 4.753590 0.000000 1565 +observ 0 9 4.753590 0.000000 1578 +academi 1 8 4.875197 4.875197 1735 +quantit 0 8 4.875197 0.000000 1654 +gain 0 8 4.875197 0.000000 1730 +attent 0 8 4.875197 0.000000 1651 +virginia 0 8 4.875197 0.000000 1659 +trade 0 7 5.010635 0.000000 1815 +foreign 0 7 5.010635 0.000000 1919 +dimens 0 7 5.010635 0.000000 1930 +ture 1 6 5.164786 5.164786 1997 +advisori 1 6 5.164786 5.164786 2148 +chicago 1 6 5.164786 5.164786 2149 +recruit 0 6 5.164786 0.000000 2145 +prize 0 6 5.164786 0.000000 2150 +yield 0 5 5.347108 0.000000 2458 +activitieseditor 0 5 5.347108 0.000000 2454 +ifip 0 5 5.347108 0.000000 2459 +houston 0 5 5.347108 0.000000 2460 +scope 0 5 5.347108 0.000000 2296 +bulletin 0 5 5.347108 0.000000 2343 +comprehens 0 4 5.568345 0.000000 2745 +monograph 0 4 5.568345 0.000000 2860 +peer 0 4 5.568345 0.000000 2742 +tennesse 0 4 5.568345 0.000000 2763 +oracl 0 4 5.568345 0.000000 2823 +hypothesi 0 4 5.568345 0.000000 2650 +fals 0 4 5.568345 0.000000 2861 +hausdorff 0 4 5.568345 0.000000 2633 +explos 0 3 5.857933 0.000000 3138 +deeper 0 3 5.857933 0.000000 3146 +off 0 3 5.857933 0.000000 3170 +nondeterminist 0 3 5.857933 0.000000 3560 +activitiesmemb 0 3 5.857933 0.000000 3549 +law 0 2 6.263398 0.000000 4896 +har 0 2 6.263398 0.000000 4252 +aaa 0 2 6.263398 0.000000 4897 +banquet 0 2 6.263398 0.000000 4898 +publicationson 0 2 6.263398 0.000000 4899 +johan 0 2 6.263398 0.000000 4900 +eatc 1 1 6.957497 6.957497 9542 +juri 0 1 6.957497 0.000000 9543 +hartmanisjuri 0 1 6.957497 0.000000 9544 +hartmani 0 1 6.957497 0.000000 9545 +strateg 0 1 6.957497 0.000000 9546 +representativeschair 0 1 6.957497 0.000000 9547 +committeehonorsacm 0 1 6.957497 0.000000 9548 +stearn 0 1 6.957497 0.000000 9549 +latvian 0 1 6.957497 0.000000 9550 +charter 0 1 6.957497 0.000000 9551 +presseditori 0 1 6.957497 0.000000 9552 +sciencegoedel 0 1 6.957497 0.000000 9553 +awardshonorari 0 1 6.957497 0.000000 9554 +dortmund 0 1 6.957497 0.000000 9555 +lecturessom 0 1 6.957497 0.000000 9556 +benni 0 1 6.957497 0.000000 9557 +chor 0 1 6.957497 0.000000 9558 +od 0 1 6.957497 0.000000 9559 +goldreich 0 1 6.957497 0.000000 9560 +hastad 0 1 6.957497 0.000000 9561 +desh 0 1 6.957497 0.000000 9562 +ranjan 0 1 6.957497 0.000000 9563 +pankaj 0 1 6.957497 0.000000 9564 +rohatgi 0 1 6.957497 0.000000 9565 +kolmogorov 0 1 6.957497 0.000000 9566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..433b1262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +modifi 0 178 1.609438 0.000000 35 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +technolog 1 131 2.079442 2.079442 102 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +intern 1 108 2.197225 2.197225 128 +well 0 109 2.197225 0.000000 121 +manag 0 114 2.197225 0.000000 125 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +access 1 102 2.302585 2.302585 136 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +associ 1 93 2.397895 2.397895 151 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +academ 0 82 2.484907 0.000000 178 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +nation 1 74 2.639057 2.639057 240 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +simul 0 66 2.708050 0.000000 255 +multimedia 0 68 2.708050 0.000000 258 +knowledg 0 67 2.708050 0.000000 243 +januari 0 62 2.772589 0.000000 264 +organ 0 65 2.772589 0.000000 265 +colleg 1 61 2.833213 2.833213 300 +variou 0 56 2.890372 0.000000 317 +scientif 1 53 2.944439 2.944439 341 +profession 0 51 2.995732 0.000000 345 +appoint 0 49 3.044522 0.000000 358 +physic 0 47 3.091042 0.000000 377 +electron 0 47 3.091042 0.000000 379 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +continu 0 39 3.258097 0.000000 448 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +represent 0 35 3.401197 0.000000 512 +committe 0 34 3.401197 0.000000 522 +board 1 33 3.433987 3.433987 528 +john 0 33 3.433987 0.000000 532 +within 0 33 3.433987 0.000000 525 +collabor 1 32 3.465736 3.465736 543 +storag 0 31 3.496508 0.000000 553 +art 0 29 3.583519 0.000000 593 +american 1 27 3.637586 3.637586 634 +supercomput 0 25 3.737670 0.000000 681 +fellow 1 24 3.761200 3.761200 701 +seri 0 24 3.761200 0.000000 708 +geometri 0 22 3.850148 0.000000 752 +unit 1 21 3.912023 3.912023 779 +among 0 21 3.912023 0.000000 781 +siam 0 21 3.912023 0.000000 800 +department 0 20 3.951244 0.000000 839 +geometr 0 19 4.007333 0.000000 852 +stanford 0 17 4.110874 0.000000 955 +moor 0 17 4.110874 0.000000 936 +remot 0 15 4.248495 0.000000 1041 +dean 1 14 4.317488 4.317488 1104 +heterogen 0 14 4.317488 0.000000 1090 +discret 0 13 4.382027 0.000000 1165 +captur 1 12 4.465908 4.465908 1232 +denis 1 12 4.465908 4.465908 1255 +robust 0 12 4.465908 0.000000 1271 +pageif 0 12 4.465908 0.000000 1275 +persist 0 11 4.553877 0.000000 1367 +council 0 11 4.553877 0.000000 1364 +facilit 0 10 4.653960 0.000000 1412 +packard 0 10 4.653960 0.000000 1444 +fellowship 0 10 4.653960 0.000000 1460 +academi 1 8 4.875197 4.875197 1735 +advisori 1 6 5.164786 5.164786 2148 +oxford 0 6 5.164786 0.000000 2121 +compris 0 4 5.568345 0.000000 2862 +activitiesmemb 0 3 5.857933 0.000000 3549 +defens 0 3 5.857933 0.000000 3327 +algorithmica 0 3 5.857933 0.000000 3561 +commiss 0 2 6.263398 0.000000 4901 +engineeringfellow 0 2 6.263398 0.000000 4902 +sciencesfellow 0 2 6.263398 0.000000 4903 +aaa 0 2 6.263398 0.000000 4897 +hopcroftjohn 0 1 6.957497 0.000000 9567 +hopcroftjoseph 0 1 6.957497 0.000000 9568 +silbert 0 1 6.957497 0.000000 9569 +engineeringprofessor 0 1 6.957497 0.000000 9570 +sciencephd 0 1 6.957497 0.000000 9571 +overse 0 1 6.957497 0.000000 9572 +applicationsmemb 0 1 6.957497 0.000000 9573 +boardmemb 0 1 6.957497 0.000000 9574 +forcememb 0 1 6.957497 0.000000 9575 +machinerychairman 0 1 6.957497 0.000000 9576 +trusteesmemb 0 1 6.957497 0.000000 9577 +lucil 0 1 6.957497 0.000000 9578 +foundationmemb 0 1 6.957497 0.000000 9579 +sloan 0 1 6.957497 0.000000 9580 +committeeadvisori 0 1 6.957497 0.000000 9581 +analysiseditor 0 1 6.957497 0.000000 9582 +geometryassoci 0 1 6.957497 0.000000 9583 +sciencesreturn 0 1 6.957497 0.000000 9584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..ea7d319e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +paper 1 205 1.609438 1.609438 38 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +parallel 2 169 1.791759 3.583518 60 +develop 2 174 1.791759 3.583518 53 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +like 1 132 1.945910 1.945910 81 +perform 0 143 1.945910 0.000000 74 +assign 0 135 1.945910 0.000000 66 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +compil 2 122 2.079442 4.158884 96 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +code 1 108 2.197225 2.197225 116 +structur 1 106 2.197225 2.197225 105 +intern 0 108 2.197225 0.000000 128 +pleas 0 113 2.197225 0.000000 114 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +question 1 91 2.397895 2.397895 141 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +proceed 0 93 2.397895 0.000000 152 +comment 0 93 2.397895 0.000000 146 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +solut 0 82 2.484907 0.000000 162 +requir 0 81 2.484907 0.000000 167 +build 0 85 2.484907 0.000000 184 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +optim 1 79 2.564949 2.564949 197 +refer 1 78 2.564949 2.564949 203 +june 1 79 2.564949 2.564949 214 +method 0 80 2.564949 0.000000 213 +good 0 77 2.564949 0.000000 200 +messag 0 76 2.564949 0.000000 212 +exampl 0 77 2.564949 0.000000 195 +state 0 76 2.564949 0.000000 207 +involv 1 71 2.639057 2.639057 227 +solv 0 73 2.639057 0.000000 234 +line 0 75 2.639057 0.000000 231 +symposium 0 72 2.639057 0.000000 238 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +test 0 66 2.708050 0.000000 252 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +experi 0 64 2.772589 0.000000 283 +organ 0 65 2.772589 0.000000 265 +januari 0 62 2.772589 0.000000 264 +laboratori 0 63 2.772589 0.000000 292 +ithaca 0 65 2.772589 0.000000 294 +best 1 59 2.833213 2.833213 299 +automat 0 61 2.833213 0.000000 306 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +faculti 0 56 2.890372 0.000000 325 +local 1 55 2.944439 2.944439 334 +scientif 0 53 2.944439 0.000000 341 +processor 0 54 2.944439 0.000000 335 +februari 0 54 2.944439 0.000000 328 +investig 1 51 2.995732 2.995732 353 +profession 0 51 2.995732 0.000000 345 +approach 1 48 3.044522 3.044522 366 +numer 0 49 3.044522 0.000000 369 +principl 0 48 3.044522 0.000000 357 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +algebra 1 45 3.135494 3.135494 394 +made 0 44 3.135494 0.000000 398 +better 0 45 3.135494 0.000000 401 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +show 0 43 3.178054 0.000000 417 +linear 1 41 3.218876 3.218876 431 +review 0 42 3.218876 0.000000 425 +must 1 40 3.258097 3.258097 442 +annual 1 40 3.258097 3.258097 458 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +transact 0 39 3.258097 0.000000 438 +microsoft 1 38 3.295837 3.295837 468 +open 0 38 3.295837 0.000000 469 +paul 0 38 3.295837 0.000000 471 +hand 0 37 3.332205 0.000000 475 +award 1 34 3.401197 3.401197 523 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +product 0 33 3.433987 0.000000 527 +board 0 33 3.433987 0.000000 528 +transform 1 32 3.465736 3.465736 542 +extend 0 32 3.465736 0.000000 539 +independ 0 32 3.465736 0.000000 548 +produc 1 30 3.555348 3.555348 572 +exist 0 30 3.555348 0.000000 569 +graph 0 30 3.555348 0.000000 576 +depend 1 29 3.583519 3.583519 583 +multiprocessor 0 28 3.610918 0.000000 605 +framework 0 28 3.610918 0.000000 606 +static 0 27 3.637586 0.000000 619 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +lab 1 24 3.761200 3.761200 698 +alwai 0 24 3.761200 0.000000 691 +known 0 24 3.761200 0.000000 702 +flow 0 24 3.761200 0.000000 700 +consult 0 24 3.761200 0.000000 687 +equat 1 23 3.806662 3.806662 724 +togeth 0 23 3.806662 0.000000 714 +almost 1 22 3.850148 3.850148 742 +deal 0 22 3.850148 0.000000 736 +sequenti 0 22 3.850148 0.000000 745 +corpor 1 21 3.912023 3.912023 802 +increas 0 20 3.951244 0.000000 829 +toolkit 0 20 3.951244 0.000000 835 +department 0 20 3.951244 0.000000 839 +partial 0 18 4.060443 0.000000 900 +matrix 1 17 4.110874 4.110874 933 +asplo 1 17 4.110874 4.110874 948 +differenti 0 17 4.110874 0.000000 921 +moor 0 17 4.110874 0.000000 936 +spars 1 16 4.174387 4.174387 989 +transfer 1 16 4.174387 4.174387 967 +young 1 16 4.174387 4.174387 991 +match 0 16 4.174387 0.000000 965 +normal 0 16 4.174387 0.000000 995 +permit 0 16 4.174387 0.000000 962 +intel 0 16 4.174387 0.000000 1000 +novel 0 15 4.248495 0.000000 1039 +fortran 0 15 4.248495 0.000000 1027 +massachusett 0 14 4.317488 0.000000 1118 +earlier 1 13 4.382027 4.382027 1140 +block 1 13 4.382027 4.382027 1183 +incorpor 1 13 4.382027 4.382027 1163 +unfortun 0 13 4.382027 0.000000 1170 +sigplan 0 13 4.382027 0.000000 1190 +denis 1 12 4.465908 4.465908 1255 +pageif 0 12 4.465908 0.000000 1275 +loop 1 11 4.553877 4.553877 1310 +mesh 0 11 4.553877 0.000000 1351 +refin 0 11 4.553877 0.000000 1363 +faster 0 11 4.553877 0.000000 1323 +summar 0 11 4.553877 0.000000 1295 +michigan 0 11 4.553877 0.000000 1368 +vladimir 0 11 4.553877 0.000000 1324 +matric 1 10 4.653960 4.653960 1399 +packard 1 10 4.653960 4.653960 1444 +preliminari 0 9 4.753590 0.000000 1480 +prefer 0 9 4.753590 0.000000 1491 +jersei 0 9 4.753590 0.000000 1587 +hewlett 1 8 4.875197 4.875197 1709 +poor 0 8 4.875197 0.000000 1736 +competit 0 8 4.875197 0.000000 1635 +potenti 0 8 4.875197 0.000000 1690 +uniprocessor 0 8 4.875197 0.000000 1696 +presidenti 0 8 4.875197 0.000000 1737 +wayn 0 8 4.875197 0.000000 1738 +pldi 0 8 4.875197 0.000000 1704 +keshav 0 7 5.010635 0.000000 1852 +solver 0 7 5.010635 0.000000 1911 +uniform 0 7 5.010635 0.000000 1845 +refere 0 7 5.010635 0.000000 1895 +elementari 0 7 5.010635 0.000000 1825 +dens 1 6 5.164786 5.164786 2122 +prize 1 6 5.164786 5.164786 2150 +ensur 0 6 5.164786 0.000000 2012 +nest 0 6 5.164786 0.000000 2151 +argonn 0 5 5.347108 0.000000 2461 +compet 0 5 5.347108 0.000000 2462 +decad 0 5 5.347108 0.000000 2455 +panel 0 5 5.347108 0.000000 2463 +seventh 0 5 5.347108 0.000000 2464 +restructur 1 4 5.568345 5.568345 2775 +align 1 4 5.568345 5.568345 2863 +flavor 0 4 5.568345 0.000000 2625 +indupraka 0 4 5.568345 0.000000 2639 +kodukula 0 4 5.568345 0.000000 2640 +stodghil 0 4 5.568345 0.000000 2864 +armi 0 3 5.857933 0.000000 3562 +topla 0 3 5.857933 0.000000 3563 +chelmsford 0 3 5.857933 0.000000 3564 +detroit 0 3 5.857933 0.000000 3565 +rutger 0 3 5.857933 0.000000 3566 +brunswick 0 3 5.857933 0.000000 3567 +redmond 0 3 5.857933 0.000000 3568 +professorphd 0 2 6.263398 0.000000 4904 +numa 0 2 6.263398 0.000000 4905 +lambda 0 2 6.263398 0.000000 4458 +knit 0 2 6.263398 0.000000 4906 +lcpc 0 2 6.263398 0.000000 4538 +kotlyar 0 2 6.263398 0.000000 4907 +pingalikeshav 0 1 6.957497 0.000000 9585 +pingaliassoci 0 1 6.957497 0.000000 9586 +radic 0 1 6.957497 0.000000 9587 +sparsiti 0 1 6.957497 0.000000 9588 +krylov 0 1 6.957497 0.000000 9589 +petsc 0 1 6.957497 0.000000 9590 +activitiespanel 0 1 6.957497 0.000000 9591 +ballist 0 1 6.957497 0.000000 9592 +odyssei 0 1 6.957497 0.000000 9593 +computereditori 0 1 6.957497 0.000000 9594 +awardsn 0 1 6.957497 0.000000 9595 +lecturesfast 0 1 6.957497 0.000000 9596 +publicationssolv 0 1 6.957497 0.000000 9597 +gianfranco 0 1 6.957497 0.000000 9598 +bilardi 0 1 6.957497 0.000000 9599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..af098d56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,367 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +implement 2 152 1.791759 3.583518 52 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +avail 0 169 1.791759 0.000000 48 +read 0 154 1.791759 0.000000 47 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +lectur 1 135 1.945910 1.945910 73 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +confer 1 126 2.079442 2.079442 100 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +intern 2 108 2.197225 4.394450 128 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +site 0 106 2.197225 0.000000 119 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +pleas 0 113 2.197225 0.000000 114 +techniqu 2 99 2.302585 4.605170 138 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +access 0 102 2.302585 0.000000 136 +text 0 98 2.302585 0.000000 133 +real 2 93 2.397895 4.795790 144 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 2 84 2.484907 4.969814 165 +environ 1 84 2.484907 2.484907 177 +school 1 84 2.484907 2.484907 188 +build 1 85 2.484907 2.484907 184 +ieee 1 86 2.484907 2.484907 190 +educ 1 86 2.484907 2.484907 191 +second 0 81 2.484907 0.000000 166 +start 0 83 2.484907 0.000000 173 +control 0 82 2.484907 0.000000 164 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +state 1 76 2.564949 2.564949 207 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +messag 0 76 2.564949 0.000000 212 +workshop 1 71 2.639057 2.639057 239 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +involv 0 71 2.639057 0.000000 227 +appli 0 71 2.639057 0.000000 226 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +write 0 72 2.639057 0.000000 222 +materi 0 75 2.639057 0.000000 221 +summari 0 73 2.639057 0.000000 237 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +differ 0 66 2.708050 0.000000 253 +view 0 70 2.708050 0.000000 254 +virtual 1 62 2.772589 2.772589 285 +polici 1 64 2.772589 2.772589 279 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +function 0 62 2.772589 0.000000 275 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +septemb 0 65 2.772589 0.000000 274 +foundat 0 62 2.772589 0.000000 286 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +reason 1 57 2.890372 2.890372 318 +summer 1 56 2.890372 2.890372 311 +publish 0 57 2.890372 0.000000 326 +instruct 1 53 2.944439 2.944439 332 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +investig 1 51 2.995732 2.995732 353 +run 0 51 2.995732 0.000000 347 +hardwar 0 51 2.995732 0.000000 350 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +standard 0 48 3.044522 0.000000 365 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +physic 0 47 3.091042 0.000000 377 +move 0 47 3.091042 0.000000 382 +could 0 46 3.091042 0.000000 383 +possibl 0 47 3.091042 0.000000 378 +protocol 1 45 3.135494 3.135494 407 +execut 1 45 3.135494 3.135494 404 +textbook 0 44 3.135494 0.000000 397 +term 1 43 3.178054 3.178054 411 +third 0 43 3.178054 0.000000 412 +past 1 42 3.218876 3.218876 428 +york 1 41 3.218876 3.218876 435 +editor 0 41 3.218876 0.000000 433 +continu 0 39 3.258097 0.000000 448 +programm 0 39 3.258097 0.000000 445 +transact 0 39 3.258097 0.000000 438 +annual 0 40 3.258097 0.000000 458 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +formal 1 37 3.332205 3.332205 478 +respons 0 37 3.332205 0.000000 476 +hand 0 37 3.332205 0.000000 475 +committe 2 34 3.401197 6.802394 522 +concurr 1 34 3.401197 3.401197 501 +least 0 35 3.401197 0.000000 516 +singl 0 34 3.401197 0.000000 510 +everi 0 34 3.401197 0.000000 519 +global 0 34 3.401197 0.000000 520 +return 0 34 3.401197 0.000000 502 +toler 2 33 3.433987 6.867974 533 +fault 2 32 3.465736 6.931472 547 +given 1 32 3.465736 3.465736 538 +ad 1 32 3.465736 3.465736 544 +exist 0 30 3.555348 0.000000 569 +depend 1 29 3.583519 3.583519 583 +synchron 1 29 3.583519 3.583519 588 +focus 0 29 3.583519 0.000000 584 +art 0 29 3.583519 0.000000 593 +becom 1 28 3.610918 3.610918 603 +pass 1 28 3.610918 3.610918 611 +univ 0 28 3.610918 0.000000 617 +american 0 27 3.637586 0.000000 634 +detect 1 26 3.688879 3.688879 646 +subject 0 26 3.688879 0.000000 647 +rule 0 26 3.688879 0.000000 638 +relev 0 26 3.688879 0.000000 637 +compar 0 26 3.688879 0.000000 648 +hill 1 25 3.737670 3.737670 670 +concern 0 25 3.737670 0.000000 666 +task 0 25 3.737670 0.000000 678 +handl 0 24 3.761200 0.000000 685 +seri 0 24 3.761200 0.000000 708 +proof 1 23 3.806662 3.806662 720 +equat 1 23 3.806662 3.806662 724 +mobil 1 23 3.806662 3.806662 730 +sequenc 0 23 3.806662 0.000000 734 +methodolog 0 23 3.806662 0.000000 733 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +reduc 0 22 3.850148 0.000000 759 +leav 0 21 3.912023 0.000000 772 +avoid 0 21 3.912023 0.000000 799 +exploit 1 20 3.951244 3.951244 836 +verif 1 20 3.951244 3.951244 826 +safeti 0 20 3.951244 0.000000 817 +department 0 20 3.951244 0.000000 839 +north 1 19 4.007333 4.007333 873 +agent 1 18 4.060443 4.060443 910 +partial 1 18 4.060443 4.060443 900 +along 0 18 4.060443 0.000000 878 +thoma 0 18 4.060443 0.000000 901 +scott 0 18 4.060443 0.000000 884 +germani 2 17 4.110874 8.221748 946 +sept 1 17 4.110874 4.110874 952 +outlin 1 17 4.110874 4.110874 914 +analyz 0 17 4.110874 0.000000 925 +otherwis 0 17 4.110874 0.000000 922 +whether 0 17 4.110874 0.000000 918 +moor 0 17 4.110874 0.000000 936 +critic 1 16 4.174387 4.174387 982 +georg 0 16 4.174387 0.000000 994 +letter 0 16 4.174387 0.000000 981 +hybrid 1 15 4.248495 4.248495 1057 +coordin 1 13 4.382027 4.382027 1182 +carri 0 13 4.382027 0.000000 1152 +nasa 0 13 4.382027 0.000000 1188 +discret 0 13 4.382027 0.000000 1165 +asynchron 1 12 4.465908 4.465908 1229 +verifi 1 12 4.465908 4.465908 1261 +denis 1 12 4.465908 4.465908 1255 +replic 0 12 4.465908 0.000000 1231 +speech 0 12 4.465908 0.000000 1222 +pageif 0 12 4.465908 0.000000 1275 +reness 1 11 4.553877 4.553877 1333 +refin 1 11 4.553877 4.553877 1363 +israel 1 11 4.553877 4.553877 1366 +bandwidth 0 11 4.553877 0.000000 1365 +typic 0 11 4.553877 0.000000 1360 +distinguish 0 11 4.553877 0.000000 1357 +arpa 0 11 4.553877 0.000000 1369 +volum 0 11 4.553877 0.000000 1347 +island 0 11 4.553877 0.000000 1345 +forc 1 10 4.653960 4.653960 1384 +resid 0 10 4.653960 0.000000 1461 +tradit 0 10 4.653960 0.000000 1404 +invit 0 10 4.653960 0.000000 1428 +traffic 0 10 4.653960 0.000000 1421 +robbert 1 9 4.753590 4.753590 1529 +compos 0 9 4.753590 0.000000 1527 +realiz 1 8 4.875197 4.875197 1739 +filter 0 8 4.875197 0.000000 1641 +satisfi 0 8 4.875197 0.000000 1694 +virginia 0 8 4.875197 0.000000 1659 +colloquium 0 8 4.875197 0.000000 1734 +paradigm 0 8 4.875197 0.000000 1662 +fifth 1 7 5.010635 5.010635 1931 +predic 0 7 5.010635 0.000000 1806 +chief 0 7 5.010635 0.000000 1829 +merg 0 7 5.010635 0.000000 1862 +carolina 1 6 5.164786 5.164786 2142 +moder 1 6 5.164786 5.164786 2112 +fred 0 6 5.164786 0.000000 2072 +brook 0 6 5.164786 0.000000 2152 +infer 0 6 5.164786 0.000000 2040 +risc 0 6 5.164786 0.000000 2016 +ensur 0 6 5.164786 0.000000 2012 +mistak 0 6 5.164786 0.000000 2110 +kluwer 0 6 5.164786 0.000000 2143 +causal 0 6 5.164786 0.000000 2024 +mission 1 5 5.347108 5.347108 2465 +ifip 1 5 5.347108 5.347108 2459 +chapel 1 5 5.347108 5.347108 2457 +attract 0 5 5.347108 0.000000 2356 +activitieseditor 0 5 5.347108 0.000000 2454 +merit 0 5 5.347108 0.000000 2466 +speaker 0 5 5.347108 0.000000 2370 +panel 0 5 5.347108 0.000000 2463 +proposit 0 5 5.347108 0.000000 2339 +bulletin 0 5 5.347108 0.000000 2343 +sigcs 1 4 5.568345 5.568345 2865 +stoller 1 4 5.568345 5.568345 2866 +nashvil 1 4 5.568345 5.568345 2867 +tennesse 1 4 5.568345 5.568345 2763 +schneider 0 4 5.568345 0.000000 2868 +increasingli 0 4 5.568345 0.000000 2766 +ident 0 4 5.568345 0.000000 2826 +suffic 0 4 5.568345 0.000000 2869 +conserv 0 4 5.568345 0.000000 2870 +monograph 0 4 5.568345 0.000000 2860 +assur 0 4 5.568345 0.000000 2722 +dagstuhl 0 4 5.568345 0.000000 2871 +technion 0 4 5.568345 0.000000 2856 +aircraft 0 4 5.568345 0.000000 2872 +newslett 0 4 5.568345 0.000000 2873 +gri 1 3 5.857933 5.857933 3569 +defens 1 3 5.857933 5.857933 3327 +munich 1 3 5.857933 5.857933 3570 +stoni 0 3 5.857933 0.000000 3571 +heavili 0 3 5.857933 0.000000 3572 +streamlin 0 3 5.857933 0.000000 3573 +jointli 0 3 5.857933 0.000000 3118 +dimac 0 3 5.857933 0.000000 3574 +reactiv 0 3 5.857933 0.000000 3575 +am 0 3 5.857933 0.000000 3386 +haifa 0 3 5.857933 0.000000 3554 +successor 0 3 5.857933 0.000000 3576 +hoto 0 3 5.857933 0.000000 3577 +orca 0 3 5.857933 0.000000 3578 +hypervisor 1 2 6.263398 6.263398 4549 +replica 1 2 6.263398 6.263398 4206 +norwai 1 2 6.263398 6.263398 4908 +systemsprogram 1 2 6.263398 6.263398 4882 +tacoma 1 2 6.263398 6.263398 4909 +isat 1 2 6.263398 6.263398 4895 +warfar 1 2 6.263398 6.263398 4910 +professorphd 0 2 6.263398 0.000000 4904 +widespread 0 2 6.263398 0.000000 4911 +annal 0 2 6.263398 0.000000 4912 +hebrew 0 2 6.263398 0.000000 4913 +banquet 0 2 6.263398 0.000000 4898 +grante 0 2 6.263398 0.000000 4914 +contractor 0 2 6.263398 0.000000 4915 +mason 0 2 6.263398 0.000000 4916 +airplan 0 2 6.263398 0.000000 4917 +jerusalem 0 2 6.263398 0.000000 4918 +marzullo 0 2 6.263398 0.000000 4919 +trom 1 1 6.957497 6.957497 9600 +marktoberdorf 1 1 6.957497 6.957497 9601 +lubeck 1 1 6.957497 6.957497 9602 +johansen 1 1 6.957497 6.957497 9603 +engineeringeditor 1 1 6.957497 6.957497 9604 +schneiderfr 0 1 6.957497 0.000000 9605 +assert 0 1 6.957497 0.000000 9606 +undefin 0 1 6.957497 0.000000 9607 +bressoud 0 1 6.957497 0.000000 9608 +instantli 0 1 6.957497 0.000000 9609 +freed 0 1 6.957497 0.000000 9610 +roam 0 1 6.957497 0.000000 9611 +activitiessabbat 0 1 6.957497 0.000000 9612 +computingeditor 0 1 6.957497 0.000000 9613 +letterseditor 0 1 6.957497 0.000000 9614 +systemseditor 0 1 6.957497 0.000000 9615 +surveysco 0 1 6.957497 0.000000 9616 +verlagprogram 0 1 6.957497 0.000000 9617 +constructionprogram 0 1 6.957497 0.000000 9618 +resili 0 1 6.957497 0.000000 9619 +applicationsprogram 0 1 6.957497 0.000000 9620 +sixteenth 0 1 6.957497 0.000000 9621 +symposiumprogram 0 1 6.957497 0.000000 9622 +systemsst 0 1 6.957497 0.000000 9623 +chissa 0 1 6.957497 0.000000 9624 +technologymemb 0 1 6.957497 0.000000 9625 +agencyreview 0 1 6.957497 0.000000 9626 +leibniz 0 1 6.957497 0.000000 9627 +universitymemb 0 1 6.957497 0.000000 9628 +awardsfellow 0 1 6.957497 0.000000 9629 +sciencefellow 0 1 6.957497 0.000000 9630 +machinerylecturesproof 0 1 6.957497 0.000000 9631 +afosr 0 1 6.957497 0.000000 9632 +panelist 0 1 6.957497 0.000000 9633 +publicationsreason 0 1 6.957497 0.000000 9634 +icalp 0 1 6.957497 0.000000 9635 +boll 0 1 6.957497 0.000000 9636 +limor 0 1 6.957497 0.000000 9637 +ultradepend 0 1 6.957497 0.000000 9638 +dehn 0 1 6.957497 0.000000 9639 +primu 0 1 6.957497 0.000000 9640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..7692adf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +last 1 314 1.098612 1.098612 14 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +pleas 0 113 2.197225 0.000000 114 +text 0 98 2.302585 0.000000 133 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +member 1 84 2.484907 2.484907 165 +journal 1 83 2.484907 2.484907 183 +build 1 85 2.484907 2.484907 184 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +method 1 80 2.564949 2.564949 213 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +solv 1 73 2.639057 2.639057 234 +januari 1 62 2.772589 2.772589 264 +descript 1 64 2.772589 2.772589 271 +result 0 65 2.772589 0.000000 281 +import 0 65 2.772589 0.000000 282 +organ 0 65 2.772589 0.000000 265 +plai 0 60 2.833213 0.000000 307 +variou 1 56 2.890372 2.890372 317 +semest 0 58 2.890372 0.000000 312 +undergradu 1 54 2.944439 2.944439 338 +particular 0 51 2.995732 0.000000 352 +profession 0 51 2.995732 0.000000 345 +adapt 0 46 3.091042 0.000000 387 +fast 0 42 3.218876 0.000000 429 +continu 0 39 3.258097 0.000000 448 +multipl 0 39 3.258097 0.000000 453 +societi 0 40 3.258097 0.000000 456 +annual 0 40 3.258097 0.000000 458 +close 0 38 3.295837 0.000000 465 +committe 1 34 3.401197 3.401197 522 +return 0 34 3.401197 0.000000 502 +product 1 33 3.433987 3.433987 527 +curriculum 0 33 3.433987 0.000000 535 +transform 0 32 3.465736 0.000000 542 +art 1 29 3.583519 3.583519 593 +chair 0 29 3.583519 0.000000 596 +subject 0 26 3.688879 0.000000 647 +constraint 0 26 3.688879 0.000000 636 +repres 0 26 3.688879 0.000000 656 +siam 1 21 3.912023 3.912023 800 +department 0 20 3.951244 0.000000 839 +four 0 18 4.060443 0.000000 905 +matrix 0 17 4.110874 0.000000 933 +moor 0 17 4.110874 0.000000 936 +role 0 14 4.317488 0.000000 1101 +charl 0 13 4.382027 0.000000 1149 +deriv 0 13 4.382027 0.000000 1145 +translat 0 13 4.382027 0.000000 1164 +optic 1 12 4.465908 4.465908 1221 +denis 1 12 4.465908 4.465908 1255 +reader 0 12 4.465908 0.000000 1246 +pageif 0 12 4.465908 0.000000 1275 +michigan 0 11 4.553877 0.000000 1368 +loop 0 11 4.553877 0.000000 1310 +bandwidth 0 11 4.553877 0.000000 1365 +america 0 11 4.553877 0.000000 1370 +matric 0 10 4.653960 0.000000 1399 +factor 0 9 4.753590 0.000000 1544 +sweden 1 7 5.010635 5.010635 1885 +signal 0 7 5.010635 0.000000 1910 +prize 1 6 5.164786 5.164786 2150 +proce 0 6 5.164786 0.000000 2114 +ohio 1 5 5.347108 5.347108 2447 +markov 0 5 5.347108 0.000000 2280 +activitieseditor 0 5 5.347108 0.000000 2454 +wavelet 0 4 5.568345 0.000000 2874 +analog 0 4 5.568345 0.000000 2875 +admiss 0 4 5.568345 0.000000 2704 +wilkinson 0 3 5.857933 0.000000 3579 +pitsiani 0 3 5.857933 0.000000 3175 +household 1 2 6.263398 6.263398 4920 +intuit 1 2 6.263398 6.263398 4921 +nearest 0 2 6.263398 0.000000 4922 +anticip 0 2 6.263398 0.000000 4268 +activitiescomput 0 2 6.263398 0.000000 4923 +kroneck 1 1 6.957497 6.957497 9641 +umea 1 1 6.957497 6.957497 9642 +loancharl 0 1 6.957497 0.000000 9643 +loanprofessorphd 0 1 6.957497 0.000000 9644 +inhomogen 0 1 6.957497 0.000000 9645 +committeedepart 0 1 6.957497 0.000000 9646 +meetingfreshman 0 1 6.957497 0.000000 9647 +analysismemb 0 1 6.957497 0.000000 9648 +diprima 0 1 6.957497 0.000000 9649 +lecturesappl 0 1 6.957497 0.000000 9650 +linkop 0 1 6.957497 0.000000 9651 +publicationsoptim 0 1 6.957497 0.000000 9652 +ellerbroek 0 1 6.957497 0.000000 9653 +plemmon 0 1 6.957497 0.000000 9654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..f839e219 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +report 0 131 2.079442 0.000000 92 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +mathemat 0 108 2.197225 0.000000 123 +pleas 0 113 2.197225 0.000000 114 +associ 1 93 2.397895 2.397895 151 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +level 0 87 2.484907 0.000000 180 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +logic 0 71 2.639057 0.000000 230 +undergradu 0 54 2.944439 0.000000 338 +profession 0 51 2.995732 0.000000 345 +electron 0 47 3.091042 0.000000 379 +annual 0 40 3.258097 0.000000 458 +electr 0 38 3.295837 0.000000 461 +respons 0 37 3.332205 0.000000 476 +committe 0 34 3.401197 0.000000 522 +return 0 34 3.401197 0.000000 502 +curriculum 1 33 3.433987 3.433987 535 +symbol 0 27 3.637586 0.000000 620 +revis 0 26 3.688879 0.000000 640 +primari 0 25 3.737670 0.000000 669 +other 0 24 3.761200 0.000000 697 +prepar 0 20 3.951244 0.000000 824 +department 0 20 3.951244 0.000000 839 +lower 0 18 4.060443 0.000000 886 +moor 0 17 4.110874 0.000000 936 +women 0 16 4.174387 0.000000 1004 +denis 1 12 4.465908 4.465908 1255 +pageif 0 12 4.465908 0.000000 1275 +introductori 0 9 4.753590 0.000000 1479 +machineri 0 4 5.568345 0.000000 2851 +activitiescomput 0 2 6.263398 0.000000 4923 +activitiesassoci 0 2 6.263398 0.000000 4881 +catherin 0 1 6.957497 0.000000 9655 +wagnercatherin 0 1 6.957497 0.000000 9656 +wagnersenior 0 1 6.957497 0.000000 9657 +lecturerphd 0 1 6.957497 0.000000 9658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..a62bddb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,351 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 3 431 0.693147 2.079441 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +data 2 170 1.791759 3.583518 49 +network 2 168 1.791759 3.583518 61 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +process 2 142 1.945910 3.891820 72 +first 1 140 1.945910 1.945910 71 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +intern 1 108 2.197225 2.197225 128 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +teach 0 108 2.197225 0.000000 112 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +commun 1 95 2.397895 2.397895 157 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +select 1 91 2.397895 2.397895 154 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +proceed 0 93 2.397895 0.000000 152 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +resourc 1 81 2.484907 2.484907 172 +novemb 1 81 2.484907 2.484907 179 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +second 0 81 2.484907 0.000000 166 +ieee 0 86 2.484907 0.000000 190 +server 2 76 2.564949 5.129898 204 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +sourc 0 77 2.564949 0.000000 201 +onlin 1 75 2.639057 2.639057 223 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +html 1 75 2.639057 2.639057 235 +upson 0 71 2.639057 0.000000 218 +nation 0 74 2.639057 0.000000 240 +materi 0 75 2.639057 0.000000 221 +line 0 75 2.639057 0.000000 231 +david 0 71 2.639057 0.000000 232 +name 0 72 2.639057 0.000000 220 +symposium 0 72 2.639057 0.000000 238 +multimedia 2 68 2.708050 5.416100 258 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +august 0 66 2.708050 0.000000 257 +laboratori 1 63 2.772589 2.772589 292 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +copi 0 63 2.772589 0.000000 284 +septemb 0 65 2.772589 0.000000 274 +best 1 59 2.833213 2.833213 299 +type 1 61 2.833213 2.833213 296 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +locat 0 59 2.833213 0.000000 303 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +faculti 0 56 2.890372 0.000000 325 +semest 0 58 2.890372 0.000000 312 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +sever 0 56 2.890372 0.000000 322 +explor 0 58 2.890372 0.000000 324 +talk 1 53 2.944439 2.944439 336 +local 1 55 2.944439 2.944439 334 +found 0 53 2.944439 0.000000 337 +undergradu 0 54 2.944439 0.000000 338 +februari 0 54 2.944439 0.000000 328 +hardwar 1 51 2.995732 2.995732 350 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +digit 0 52 2.995732 0.000000 348 +approach 1 48 3.044522 3.044522 366 +format 1 48 3.044522 3.044522 356 +without 0 50 3.044522 0.000000 370 +california 1 46 3.091042 3.091042 388 +electron 1 47 3.091042 3.091042 379 +possibl 0 47 3.091042 0.000000 378 +effect 0 46 3.091042 0.000000 385 +video 3 44 3.135494 9.406482 405 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +anoth 0 45 3.135494 0.000000 408 +made 0 44 3.135494 0.000000 398 +third 1 43 3.178054 3.178054 412 +around 0 43 3.178054 0.000000 415 +review 1 42 3.218876 3.218876 425 +music 0 42 3.218876 0.000000 436 +fast 0 42 3.218876 0.000000 429 +continu 1 39 3.258097 3.258097 448 +must 1 40 3.258097 3.258097 442 +brian 2 38 3.295837 6.591674 466 +slide 0 38 3.295837 0.000000 467 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +represent 0 35 3.401197 0.000000 512 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +queri 0 33 3.433987 0.000000 524 +independ 1 32 3.465736 3.465736 548 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +common 1 30 3.555348 3.555348 574 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +load 1 28 3.610918 3.610918 601 +retriev 0 27 3.637586 0.000000 621 +manipul 0 27 3.637586 0.000000 624 +proc 1 26 3.688879 3.688879 649 +effort 1 26 3.688879 3.688879 652 +berkelei 1 26 3.688879 3.688879 657 +compar 0 26 3.688879 0.000000 648 +client 2 25 3.737670 7.475340 679 +toward 0 25 3.737670 0.000000 668 +store 0 24 3.761200 0.000000 693 +scalabl 0 24 3.761200 0.000000 705 +motion 0 24 3.761200 0.000000 699 +compress 1 23 3.806662 3.806662 719 +initi 1 23 3.806662 3.806662 717 +lead 0 23 3.806662 0.000000 718 +decis 0 23 3.806662 0.000000 728 +famili 0 23 3.806662 0.000000 735 +serv 0 22 3.850148 0.000000 758 +almost 0 22 3.850148 0.000000 742 +sent 0 22 3.850148 0.000000 763 +util 0 21 3.912023 0.000000 774 +programminglanguag 0 21 3.912023 0.000000 782 +thu 0 21 3.912023 0.000000 773 +smith 2 20 3.951244 7.902488 820 +qualiti 1 20 3.951244 3.951244 832 +mpeg 1 20 3.951244 3.951244 831 +reserv 0 20 3.951244 0.000000 808 +basi 0 20 3.951244 0.000000 828 +media 1 19 4.007333 4.007333 861 +thur 0 19 4.007333 0.000000 847 +appropri 0 18 4.060443 0.000000 883 +speed 0 18 4.060443 0.000000 911 +across 1 16 4.174387 4.174387 974 +advantag 0 16 4.174387 0.000000 987 +earli 0 16 4.174387 0.000000 968 +jose 0 16 4.174387 0.000000 976 +diego 0 16 4.174387 0.000000 992 +driven 0 15 4.248495 0.000000 1048 +audio 1 14 4.317488 4.317488 1094 +francisco 1 14 4.317488 4.317488 1095 +balanc 0 14 4.317488 0.000000 1112 +heterogen 0 14 4.317488 0.000000 1090 +camera 0 14 4.317488 0.000000 1115 +suit 1 13 4.382027 4.382027 1129 +jonathan 1 13 4.382027 4.382027 1174 +resolut 1 13 4.382027 4.382027 1172 +central 0 13 4.382027 0.000000 1160 +translat 0 13 4.382027 0.000000 1164 +canada 0 13 4.382027 0.000000 1158 +misc 0 13 4.382027 0.000000 1124 +verifi 0 12 4.465908 0.000000 1261 +infrastructur 0 12 4.465908 0.000000 1234 +promot 0 12 4.465908 0.000000 1235 +remov 0 12 4.465908 0.000000 1225 +amount 0 12 4.465908 0.000000 1208 +readi 0 12 4.465908 0.000000 1242 +tue 0 11 4.553877 0.000000 1308 +peter 0 11 4.553877 0.000000 1316 +stephen 0 11 4.553877 0.000000 1342 +player 0 11 4.553877 0.000000 1371 +interestsmi 0 10 4.653960 0.000000 1462 +operatingsystem 0 10 4.653960 0.000000 1401 +equal 0 10 4.653960 0.000000 1424 +transmiss 1 9 4.753590 4.753590 1588 +establish 0 9 4.753590 0.000000 1532 +charg 0 9 4.753590 0.000000 1582 +xerox 1 8 4.875197 4.875197 1725 +contrast 1 8 4.875197 4.875197 1637 +transport 0 8 4.875197 0.000000 1672 +rivl 0 8 4.875197 0.000000 1632 +theme 0 8 4.875197 0.000000 1707 +colloquium 0 8 4.875197 0.000000 1734 +lawrenc 1 7 5.010635 5.010635 1908 +usabl 0 7 5.010635 0.000000 1810 +suffici 0 7 5.010635 0.000000 1897 +pronounc 0 7 5.010635 0.000000 1918 +prioriti 0 7 5.010635 0.000000 1792 +supportfor 0 7 5.010635 0.000000 1854 +invest 1 6 5.164786 5.164786 2153 +jpeg 1 6 5.164786 5.164786 2053 +patel 1 6 5.164786 5.164786 2154 +simultan 0 6 5.164786 0.000000 2155 +quickli 0 6 5.164786 0.000000 2000 +toronto 0 6 5.164786 0.000000 2156 +spie 0 6 5.164786 0.000000 2119 +internationalconfer 0 6 5.164786 0.000000 2051 +row 1 5 5.347108 5.347108 2330 +cyclic 1 5 5.347108 5.347108 2383 +adopt 0 5 5.347108 0.000000 2467 +consum 0 5 5.347108 0.000000 2334 +computerarchitectur 0 5 5.347108 0.000000 2290 +webster 0 5 5.347108 0.000000 2468 +minnesota 0 5 5.347108 0.000000 2469 +ofworkst 1 4 5.568345 5.568345 2679 +publicationsresearch 0 4 5.568345 0.000000 2876 +isthat 0 4 5.568345 0.000000 2723 +hypothesi 0 4 5.568345 0.000000 2650 +poorli 0 4 5.568345 0.000000 2781 +commonli 0 4 5.568345 0.000000 2877 +fold 0 4 5.568345 0.000000 2615 +swartz 0 4 5.568345 0.000000 2878 +zeno 1 3 5.857933 5.857933 3580 +networkprotocol 0 3 5.857933 0.000000 3285 +thetim 0 3 5.857933 0.000000 3581 +magnitud 0 3 5.857933 0.000000 3582 +rival 0 3 5.857933 0.000000 3583 +quicktim 0 3 5.857933 0.000000 3493 +anaheim 0 3 5.857933 0.000000 3271 +intereststeachingselect 0 2 6.263398 0.000000 4924 +andprocess 0 2 6.263398 0.000000 4925 +withlarg 0 2 6.263398 0.000000 4926 +needto 0 2 6.263398 0.000000 4927 +thecommun 0 2 6.263398 0.000000 4928 +builton 0 2 6.263398 0.000000 4094 +availableonlin 0 2 6.263398 0.000000 4929 +aredevelop 0 2 6.263398 0.000000 4930 +todramat 0 2 6.263398 0.000000 4250 +animplement 0 2 6.263398 0.000000 4931 +insoftwar 0 2 6.263398 0.000000 4932 +asif 0 2 6.263398 0.000000 4933 +ghia 0 2 6.263398 0.000000 4934 +chamberlin 0 2 6.263398 0.000000 4745 +hum 0 2 6.263398 0.000000 4935 +sanfrancisco 0 2 6.263398 0.000000 4339 +decod 0 2 6.263398 0.000000 4936 +fileserv 1 1 6.957497 6.957497 9659 +playback 1 1 6.957497 6.957497 9660 +decompress 1 1 6.957497 6.957497 9661 +transcod 1 1 6.957497 6.957497 9662 +ketan 1 1 6.957497 6.957497 9663 +bsmith 0 1 6.957497 0.000000 9664 +talksmisc 0 1 6.957497 0.000000 9665 +linksresearch 0 1 6.957497 0.000000 9666 +ourcomput 0 1 6.957497 0.000000 9667 +commercialand 0 1 6.957497 0.000000 9668 +aredesign 0 1 6.957497 0.000000 9669 +premis 0 1 6.957497 0.000000 9670 +infrastructurei 0 1 6.957497 0.000000 9671 +andappl 0 1 6.957497 0.000000 9672 +workingsystem 0 1 6.957497 0.000000 9673 +zenodistribut 0 1 6.957497 0.000000 9674 +anethernet 0 1 6.957497 0.000000 9675 +serverof 0 1 6.957497 0.000000 9676 +videostor 0 1 6.957497 0.000000 9677 +severalserv 0 1 6.957497 0.000000 9678 +effortdeliveri 0 1 6.957497 0.000000 9679 +resourcereserv 0 1 6.957497 0.000000 9680 +communicationinfrastructur 0 1 6.957497 0.000000 9681 +forbandwidth 0 1 6.957497 0.000000 9682 +networkenviron 0 1 6.957497 0.000000 9683 +accessiblebi 0 1 6.957497 0.000000 9684 +latterenviron 0 1 6.957497 0.000000 9685 +datagram 0 1 6.957497 0.000000 9686 +audioand 0 1 6.957497 0.000000 9687 +metropolitan 0 1 6.957497 0.000000 9688 +andwid 0 1 6.957497 0.000000 9689 +todeliv 0 1 6.957497 0.000000 9690 +compressedrepresent 0 1 6.957497 0.000000 9691 +reducesth 0 1 6.957497 0.000000 9692 +indicatesthat 0 1 6.957497 0.000000 9693 +fasterthan 0 1 6.957497 0.000000 9694 +currentlyextend 0 1 6.957497 0.000000 9695 +onecompress 0 1 6.957497 0.000000 9696 +compresseddomain 0 1 6.957497 0.000000 9697 +simplifyexperiment 0 1 6.957497 0.000000 9698 +calledrivl 0 1 6.957497 0.000000 9699 +allowsvideo 0 1 6.957497 0.000000 9700 +resolutionand 0 1 6.957497 0.000000 9701 +whatpostscript 0 1 6.957497 0.000000 9702 +resolutionindepend 0 1 6.957497 0.000000 9703 +sameprogram 0 1 6.957497 0.000000 9704 +whileedit 0 1 6.957497 0.000000 9705 +qualityfinish 0 1 6.957497 0.000000 9706 +bepreview 0 1 6.957497 0.000000 9707 +dpiprint 0 1 6.957497 0.000000 9708 +onvideo 0 1 6.957497 0.000000 9709 +videous 0 1 6.957497 0.000000 9710 +teachingat 0 1 6.957497 0.000000 9711 +logan 0 1 6.957497 0.000000 9712 +ontario 0 1 6.957497 0.000000 9713 +annett 0 1 6.957497 0.000000 9714 +hanna 0 1 6.957497 0.000000 9715 +mmcn 0 1 6.957497 0.000000 9716 +documentationth 0 1 6.957497 0.000000 9717 +priceweb 0 1 6.957497 0.000000 9718 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..722393f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,280 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +us 2 329 1.098612 2.197224 16 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +studi 1 120 2.079442 2.079442 91 +provid 0 121 2.079442 0.000000 94 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +theori 2 111 2.197225 4.394450 127 +mathemat 1 108 2.197225 2.197225 123 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +final 0 116 2.197225 0.000000 108 +intern 0 108 2.197225 0.000000 128 +user 0 104 2.302585 0.000000 137 +associ 0 93 2.397895 0.000000 151 +build 1 85 2.484907 2.484907 184 +wide 1 84 2.484907 2.484907 185 +journal 1 83 2.484907 2.484907 183 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +control 0 82 2.484907 0.000000 164 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +state 1 76 2.564949 2.564949 207 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +logic 2 71 2.639057 5.278114 230 +involv 1 71 2.639057 2.639057 227 +line 1 75 2.639057 2.639057 231 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +write 0 72 2.639057 0.000000 222 +symposium 0 72 2.639057 0.000000 238 +meet 0 72 2.639057 0.000000 229 +appli 0 71 2.639057 0.000000 226 +differ 0 66 2.708050 0.000000 253 +januari 1 62 2.772589 2.772589 264 +improv 0 62 2.772589 0.000000 289 +descript 0 64 2.772589 0.000000 271 +experi 0 64 2.772589 0.000000 283 +type 1 61 2.833213 2.833213 296 +march 1 61 2.833213 2.833213 295 +content 0 59 2.833213 0.000000 302 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +three 0 54 2.944439 0.000000 330 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +effect 0 46 3.091042 0.000000 385 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +mark 0 44 3.135494 0.000000 403 +algebra 0 45 3.135494 0.000000 394 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +mechan 0 43 3.178054 0.000000 416 +futur 1 41 3.218876 3.218876 427 +york 1 41 3.218876 3.218876 435 +past 0 42 3.218876 0.000000 428 +review 0 42 3.218876 0.000000 425 +theoret 1 39 3.258097 3.258097 446 +continu 0 39 3.258097 0.000000 448 +annual 0 40 3.258097 0.000000 458 +paul 0 38 3.295837 0.000000 471 +electr 0 38 3.295837 0.000000 461 +formal 1 37 3.332205 3.332205 478 +connect 1 37 3.332205 3.332205 485 +hand 0 37 3.332205 0.000000 475 +expect 0 37 3.332205 0.000000 484 +especi 1 36 3.367296 3.367296 496 +soon 0 36 3.367296 0.000000 494 +committe 1 34 3.401197 3.401197 522 +transform 0 32 3.465736 0.000000 542 +collabor 0 32 3.465736 0.000000 543 +richard 1 31 3.496508 3.496508 559 +robert 0 30 3.555348 0.000000 567 +power 0 30 3.555348 0.000000 573 +compon 0 30 3.555348 0.000000 570 +domain 0 30 3.555348 0.000000 564 +chair 1 29 3.583519 3.583519 596 +built 0 29 3.583519 0.000000 592 +hope 1 28 3.610918 3.610918 610 +packag 0 28 3.610918 0.000000 614 +symbol 1 27 3.637586 3.637586 620 +great 0 27 3.637586 0.000000 626 +american 0 27 3.637586 0.000000 634 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +effort 0 26 3.688879 0.000000 652 +fundament 1 25 3.737670 3.737670 661 +wai 0 25 3.737670 0.000000 662 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +proof 0 23 3.806662 0.000000 720 +togeth 0 23 3.806662 0.000000 714 +varieti 1 22 3.850148 3.850148 740 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +william 0 22 3.850148 0.000000 765 +deal 0 22 3.850148 0.000000 736 +inth 0 22 3.850148 0.000000 741 +theorem 1 21 3.912023 3.912023 786 +divis 0 21 3.912023 0.000000 803 +entir 0 20 3.951244 0.000000 811 +synthesi 0 20 3.951244 0.000000 834 +verif 0 20 3.951244 0.000000 826 +expert 0 20 3.951244 0.000000 833 +facil 0 20 3.951244 0.000000 814 +prove 1 19 4.007333 4.007333 848 +definit 0 19 4.007333 0.000000 864 +north 0 19 4.007333 0.000000 873 +minim 0 18 4.060443 0.000000 887 +explan 0 16 4.174387 0.000000 985 +stream 0 15 4.248495 0.000000 1015 +contribut 0 15 4.248495 0.000000 1021 +style 0 15 4.248495 0.000000 1036 +consider 0 14 4.317488 0.000000 1076 +incomput 0 14 4.317488 0.000000 1096 +near 0 14 4.317488 0.000000 1091 +circuit 1 13 4.382027 4.382027 1131 +joint 1 13 4.382027 4.382027 1130 +weak 0 13 4.382027 0.000000 1159 +canada 0 13 4.382027 0.000000 1158 +captur 0 12 4.465908 0.000000 1232 +franc 0 12 4.465908 0.000000 1276 +philadelphia 0 12 4.465908 0.000000 1244 +israel 1 11 4.553877 4.553877 1366 +excit 0 11 4.553877 0.000000 1329 +refin 0 11 4.553877 0.000000 1363 +nuprl 2 10 4.653960 9.307920 1402 +modular 0 10 4.653960 0.000000 1392 +devis 0 10 4.653960 0.000000 1451 +suitabl 0 9 4.753590 0.000000 1486 +colloquium 0 8 4.875197 0.000000 1734 +pennsylvania 1 7 5.010635 5.010635 1932 +beyond 0 7 5.010635 0.000000 1834 +feasibl 0 6 5.164786 0.000000 2157 +handbook 0 6 5.164786 0.000000 2061 +oxford 0 6 5.164786 0.000000 2121 +recruit 0 6 5.164786 0.000000 2145 +pari 0 6 5.164786 0.000000 2158 +indiana 0 6 5.164786 0.000000 2057 +allen 1 5 5.347108 5.347108 2470 +eduph 0 5 5.347108 0.000000 2449 +dougla 0 5 5.347108 0.000000 2471 +begun 0 5 5.347108 0.000000 2386 +activitieseditor 0 5 5.347108 0.000000 2454 +weyl 1 4 5.568345 5.568345 2854 +zippel 1 4 5.568345 5.568345 2879 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +stuart 1 3 5.857933 5.857933 3584 +how 1 3 5.857933 5.857933 3289 +gri 1 3 5.857933 5.857933 3569 +theoremprov 0 3 5.857933 0.000000 3298 +moreov 0 3 5.857933 0.000000 3200 +predecessor 0 3 5.857933 0.000000 3585 +jackson 0 3 5.857933 0.000000 3586 +boolean 0 3 5.857933 0.000000 3202 +nato 0 3 5.857933 0.000000 3587 +engag 1 2 6.263398 6.263398 4937 +ventur 1 2 6.263398 6.263398 4938 +polya 1 2 6.263398 6.263398 4939 +programmingand 0 2 6.263398 0.000000 4940 +theform 0 2 6.263398 0.000000 4245 +aitken 0 2 6.263398 0.000000 4941 +possibleto 0 2 6.263398 0.000000 4942 +aprogram 0 2 6.263398 0.000000 4943 +thiswil 0 2 6.263398 0.000000 4944 +activitieschair 0 2 6.263398 0.000000 4894 +anniversari 0 2 6.263398 0.000000 4945 +celebr 0 2 6.263398 0.000000 4946 +buffalo 0 2 6.263398 0.000000 4947 +bensoussan 0 2 6.263398 0.000000 4303 +andmathemat 0 2 6.263398 0.000000 4948 +manfr 0 2 6.263398 0.000000 4949 +leeser 1 1 6.957497 6.957497 9719 +eaton 1 1 6.957497 6.957497 9720 +computationeditor 1 1 6.957497 6.957497 9721 +betweencomput 1 1 6.957497 6.957497 9722 +constablerobert 0 1 6.957497 0.000000 9723 +constabledepart 0 1 6.957497 0.000000 9724 +professorrc 0 1 6.957497 0.000000 9725 +researchw 0 1 6.957497 0.000000 9726 +providemechan 0 1 6.957497 0.000000 9727 +implementedthre 0 1 6.957497 0.000000 9728 +lispprogram 0 1 6.957497 0.000000 9729 +sucha 0 1 6.957497 0.000000 9730 +canexpress 0 1 6.957497 0.000000 9731 +asmetalevel 0 1 6.957497 0.000000 9732 +canevalu 0 1 6.957497 0.000000 9733 +nuprli 0 1 6.957497 0.000000 9734 +fomal 0 1 6.957497 0.000000 9735 +iscal 0 1 6.957497 0.000000 9736 +termeditor 0 1 6.957497 0.000000 9737 +itsintern 0 1 6.957497 0.000000 9738 +hedefinit 0 1 6.957497 0.000000 9739 +inconstruct 0 1 6.957497 0.000000 9740 +mechanismha 0 1 6.957497 0.000000 9741 +rebuilt 0 1 6.957497 0.000000 9742 +thework 0 1 6.957497 0.000000 9743 +isan 0 1 6.957497 0.000000 9744 +builtprincip 0 1 6.957497 0.000000 9745 +internaldescript 0 1 6.957497 0.000000 9746 +withmiriam 0 1 6.957497 0.000000 9747 +davidgri 0 1 6.957497 0.000000 9748 +richardzippel 0 1 6.957497 0.000000 9749 +withless 0 1 6.957497 0.000000 9750 +aagard 0 1 6.957497 0.000000 9751 +thecorrect 0 1 6.957497 0.000000 9752 +bedrocsystem 0 1 6.957497 0.000000 9753 +widelyus 0 1 6.957497 0.000000 9754 +efforttaught 0 1 6.957497 0.000000 9755 +themann 0 1 6.957497 0.000000 9756 +programmingprocess 0 1 6.957497 0.000000 9757 +givn 0 1 6.957497 0.000000 9758 +ofpolya 0 1 6.957497 0.000000 9759 +tryingto 0 1 6.957497 0.000000 9760 +thepolya 0 1 6.957497 0.000000 9761 +conal 0 1 6.957497 0.000000 9762 +mannion 0 1 6.957497 0.000000 9763 +ofus 0 1 6.957497 0.000000 9764 +discussingproblem 0 1 6.957497 0.000000 9765 +ssymbol 0 1 6.957497 0.000000 9766 +computingsoftwar 0 1 6.957497 0.000000 9767 +isbuild 0 1 6.957497 0.000000 9768 +presseditor 0 1 6.957497 0.000000 9769 +pressgener 0 1 6.957497 0.000000 9770 +licsprogram 0 1 6.957497 0.000000 9771 +jumelageprogram 0 1 6.957497 0.000000 9772 +softwarerefere 0 1 6.957497 0.000000 9773 +nserc 0 1 6.957497 0.000000 9774 +scienceunivers 0 1 6.957497 0.000000 9775 +committeecomput 0 1 6.957497 0.000000 9776 +committeeprovost 0 1 6.957497 0.000000 9777 +mathematicslecturesform 0 1 6.957497 0.000000 9778 +inria 0 1 6.957497 0.000000 9779 +bengurion 0 1 6.957497 0.000000 9780 +sheva 0 1 6.957497 0.000000 9781 +aviv 0 1 6.957497 0.000000 9782 +metaprogram 0 1 6.957497 0.000000 9783 +engineeringworkshop 0 1 6.957497 0.000000 9784 +publicationsform 0 1 6.957497 0.000000 9785 +tendenc 0 1 6.957497 0.000000 9786 +verju 0 1 6.957497 0.000000 9787 +metalevel 0 1 6.957497 0.000000 9788 +broi 0 1 6.957497 0.000000 9789 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..6c219357 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +analysi 0 124 2.079442 0.000000 98 +find 0 111 2.197225 0.000000 111 +follow 0 92 2.397895 0.000000 143 +comment 0 93 2.397895 0.000000 146 +upson 0 71 2.639057 0.000000 218 +simul 0 66 2.708050 0.000000 255 +ithaca 0 65 2.772589 0.000000 294 +numer 0 49 3.044522 0.000000 369 +correct 0 38 3.295837 0.000000 462 +adam 1 17 4.110874 4.110874 934 +sept 0 17 4.110874 0.000000 952 +incomput 0 14 4.317488 0.000000 1096 +athlet 0 7 5.010635 0.000000 1933 +atcornel 0 6 5.164786 0.000000 2131 +florenc 1 2 6.263398 6.263398 4950 +aflorenc 0 1 6.957497 0.000000 9790 +professionalinterest 0 1 6.957497 0.000000 9791 +academicsresearchworkinterest 0 1 6.957497 0.000000 9792 +mewith 0 1 6.957497 0.000000 9793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..1887e3d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +check 0 115 2.197225 0.000000 118 +second 0 81 2.484907 0.000000 166 +solv 0 73 2.639057 0.000000 234 +ithaca 0 65 2.772589 0.000000 294 +suggest 0 53 2.944439 0.000000 331 +approach 0 48 3.044522 0.000000 366 +detect 0 26 3.688879 0.000000 646 +failur 0 18 4.060443 0.000000 898 +hybrid 0 15 4.248495 0.000000 1057 +warn 0 14 4.317488 0.000000 1068 +tour 0 11 4.553877 0.000000 1307 +perman 0 11 4.553877 0.000000 1372 +marco 1 4 5.568345 5.568345 2589 +aguilera 1 2 6.263398 6.263398 4052 +kawazo 1 1 6.957497 6.957497 9794 +algorithmsrandom 0 1 6.957497 0.000000 9795 +consensusgo 0 1 6.957497 0.000000 9796 +brazil 0 1 6.957497 0.000000 9797 +constructionmarco 0 1 6.957497 0.000000 9798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..70dc2287 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +requir 0 81 2.484907 0.000000 167 +stuff 0 87 2.484907 0.000000 171 +upson 0 71 2.639057 0.000000 218 +new 1 64 2.772589 2.772589 262 +ithaca 0 65 2.772589 0.000000 294 +get 0 46 3.091042 0.000000 380 +chines 1 29 3.583519 3.583519 595 +annot 0 21 3.912023 0.000000 775 +taiwan 0 16 4.174387 0.000000 1006 +hong 1 14 4.317488 4.317488 1105 +essenti 0 13 4.382027 0.000000 1137 +usaoffic 0 6 5.164786 0.000000 2159 +corba 0 5 5.347108 0.000000 2320 +alfr 1 4 5.568345 5.568345 2882 +sinanet 0 4 5.568345 0.000000 2883 +worthwhil 0 2 6.263398 0.000000 4951 +dizzi 0 1 6.957497 0.000000 9799 +nandonet 0 1 6.957497 0.000000 9800 +sunworld 0 1 6.957497 0.000000 9801 +javaworldcours 0 1 6.957497 0.000000 9802 +bibliographyc 0 1 6.957497 0.000000 9803 +reportalfr 0 1 6.957497 0.000000 9804 +ahong 0 1 6.957497 0.000000 9805 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..3fd41d45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +network 1 168 1.791759 1.791759 61 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +object 0 138 1.945910 0.000000 79 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +final 1 116 2.197225 2.197225 108 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +academ 0 82 2.484907 0.000000 178 +orient 0 80 2.564949 0.000000 205 +resum 0 79 2.564949 0.000000 217 +html 0 75 2.639057 0.000000 235 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +automata 0 13 4.382027 0.000000 1135 +vineet 0 8 4.875197 0.000000 1639 +capac 0 8 4.875197 0.000000 1740 +engg 0 4 5.568345 0.000000 2884 +ahuja 0 3 5.857933 0.000000 3494 +coursework 0 3 5.857933 0.000000 3588 +hasbrouck 0 2 6.263398 0.000000 4952 +pagevineet 0 1 6.957497 0.000000 9806 +ahujam 0 1 6.957497 0.000000 9807 +apt 0 1 6.957497 0.000000 9808 +reportfal 0 1 6.957497 0.000000 9809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..27e98809 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +address 0 170 1.791759 0.000000 62 +construct 0 139 1.945910 0.000000 82 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +pair 0 9 4.753590 0.000000 1503 +ching 1 1 6.957497 6.957497 9810 +chinglan 0 1 6.957497 0.000000 9811 +edumast 0 1 6.957497 0.000000 9812 +beau 0 1 6.957497 0.000000 9813 +seneca 0 1 6.957497 0.000000 9814 +examplegraph 0 1 6.957497 0.000000 9815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..f133bc27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cornel 2 215 1.386294 2.772588 23 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +distribut 1 162 1.791759 1.791759 51 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +report 1 131 2.079442 2.079442 92 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +technic 1 100 2.302585 2.302585 140 +commun 0 95 2.397895 0.000000 157 +search 0 95 2.397895 0.000000 155 +larg 0 82 2.484907 0.000000 168 +wide 0 84 2.484907 0.000000 185 +orient 0 80 2.564949 0.000000 205 +state 0 76 2.564949 0.000000 207 +new 1 64 2.772589 2.772589 262 +ithaca 0 65 2.772589 0.000000 294 +direct 0 57 2.890372 0.000000 316 +directori 0 45 3.135494 0.000000 396 +music 0 42 3.218876 0.000000 436 +staff 0 36 3.367296 0.000000 490 +within 0 33 3.433987 0.000000 525 +art 0 29 3.583519 0.000000 593 +weather 1 28 3.610918 3.610918 618 +scale 0 28 3.610918 0.000000 613 +seri 0 24 3.761200 0.000000 708 +lead 0 23 3.806662 0.000000 718 +lyco 0 19 4.007333 0.000000 871 +event 0 18 4.060443 0.000000 896 +condit 0 16 4.174387 0.000000 975 +brief 0 16 4.174387 0.000000 1001 +horu 0 14 4.317488 0.000000 1116 +replic 0 12 4.465908 0.000000 1231 +reness 0 11 4.553877 0.000000 1333 +birman 0 9 4.753590 0.000000 1531 +friedman 0 7 5.010635 0.000000 1886 +gopher 0 6 5.164786 0.000000 1982 +broadcast 0 5 5.347108 0.000000 2453 +vaysburd 0 4 5.568345 0.000000 2846 +cuinfo 0 4 5.568345 0.000000 2626 +androbbert 0 2 6.263398 0.000000 4953 +partition 0 2 6.263398 0.000000 4954 +lausann 0 2 6.263398 0.000000 4955 +hebrew 0 2 6.263398 0.000000 4913 +transi 0 2 6.263398 0.000000 4861 +alexei 1 1 6.957497 6.957497 9816 +pagealexei 0 1 6.957497 0.000000 9817 +vaysburdalexei 0 1 6.957497 0.000000 9818 +andobject 0 1 6.957497 0.000000 9819 +ecol 0 1 6.957497 0.000000 9820 +polytechniqu 0 1 6.957497 0.000000 9821 +federal 0 1 6.957497 0.000000 9822 +cornellcornel 0 1 6.957497 0.000000 9823 +directorycornel 0 1 6.957497 0.000000 9824 +directorycours 0 1 6.957497 0.000000 9825 +examscornel 0 1 6.957497 0.000000 9826 +calendarcornel 0 1 6.957497 0.000000 9827 +musicbailei 0 1 6.957497 0.000000 9828 +concertscornel 0 1 6.957497 0.000000 9829 +ithacaworld 0 1 6.957497 0.000000 9830 +odessa 0 1 6.957497 0.000000 9831 +odessaweb 0 1 6.957497 0.000000 9832 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..eef8f83c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +softwar 1 220 1.386294 1.386294 30 +cornel 0 215 1.386294 0.000000 23 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +includ 1 208 1.609438 1.609438 42 +data 1 170 1.791759 1.791759 49 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +look 0 107 2.197225 0.000000 115 +memori 1 101 2.302585 2.302585 139 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +present 0 91 2.397895 0.000000 145 +environ 1 84 2.484907 2.484907 177 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +second 0 81 2.484907 0.000000 166 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +involv 0 71 2.639057 0.000000 227 +simul 1 66 2.708050 2.708050 255 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +descript 0 64 2.772589 0.000000 271 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +written 0 63 2.772589 0.000000 278 +content 0 59 2.833213 0.000000 302 +processor 0 54 2.944439 0.000000 335 +sampl 0 53 2.944439 0.000000 339 +give 0 50 3.044522 0.000000 359 +california 0 46 3.091042 0.000000 388 +done 0 47 3.091042 0.000000 381 +better 1 45 3.135494 3.135494 401 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +might 0 41 3.218876 0.000000 426 +past 0 42 3.218876 0.000000 428 +futur 0 41 3.218876 0.000000 427 +game 1 36 3.367296 3.367296 498 +ofth 0 36 3.367296 0.000000 491 +kind 0 32 3.465736 0.000000 541 +propos 0 28 3.610918 0.000000 602 +hope 0 28 3.610918 0.000000 610 +though 0 27 3.637586 0.000000 622 +bookmark 0 26 3.688879 0.000000 639 +trace 0 25 3.737670 0.000000 677 +pattern 1 24 3.761200 3.761200 689 +store 0 24 3.761200 0.000000 693 +compress 1 23 3.806662 3.806662 719 +chip 1 21 3.912023 3.912023 770 +watch 0 21 3.912023 0.000000 789 +vlsi 0 21 3.912023 0.000000 795 +mpeg 0 20 3.951244 0.000000 831 +speed 0 18 4.060443 0.000000 911 +render 0 17 4.110874 0.000000 947 +track 0 15 4.248495 0.000000 1029 +stream 0 15 4.248495 0.000000 1015 +rate 0 15 4.248495 0.000000 1037 +transit 0 15 4.248495 0.000000 1046 +train 1 14 4.317488 4.317488 1066 +station 0 13 4.382027 0.000000 1157 +suit 0 13 4.382027 0.000000 1129 +avenu 1 12 4.465908 4.465908 1277 +realiti 0 12 4.465908 0.000000 1272 +death 0 10 4.653960 0.000000 1457 +earth 0 10 4.653960 0.000000 1463 +mountain 0 10 4.653960 0.000000 1456 +santa 0 10 4.653960 0.000000 1441 +wall 0 9 4.753590 0.000000 1553 +occur 0 9 4.753590 0.000000 1572 +routin 0 9 4.753590 0.000000 1549 +screen 0 9 4.753590 0.000000 1577 +ride 1 8 4.875197 4.875197 1741 +switch 0 8 4.875197 0.000000 1718 +accord 0 7 5.010635 0.000000 1826 +monei 0 7 5.010635 0.000000 1934 +stereo 0 7 5.010635 0.000000 1818 +microsystem 1 6 5.164786 5.164786 2160 +railroad 0 6 5.164786 0.000000 2161 +silicon 0 6 5.164786 0.000000 2076 +extern 0 6 5.164786 0.000000 2105 +byte 0 6 5.164786 0.000000 2108 +snapshot 0 5 5.347108 0.000000 2303 +hell 1 4 5.568345 5.568345 2885 +mess 0 4 5.568345 0.000000 2886 +engg 0 4 5.568345 0.000000 2884 +heaven 1 3 5.857933 5.857933 3589 +landscap 1 3 5.857933 5.857933 3525 +hindu 0 3 5.857933 0.000000 3590 +xlib 0 3 5.857933 0.000000 3204 +fernandez 0 3 5.857933 0.000000 3591 +cupertino 1 2 6.263398 6.263398 4956 +javasoft 1 2 6.263398 6.263398 4220 +pyramania 1 2 6.263398 6.263398 4957 +mytholog 0 2 6.263398 0.000000 4869 +clara 0 2 6.263398 0.000000 4958 +amith 0 2 6.263398 0.000000 4053 +yamasani 0 2 6.263398 0.000000 4062 +cscomput 0 2 6.263398 0.000000 4195 +cave 0 2 6.263398 0.000000 4959 +softwarei 0 2 6.263398 0.000000 4960 +nano 0 2 6.263398 0.000000 4961 +snap 0 2 6.263398 0.000000 4962 +thed 0 2 6.263398 0.000000 4963 +pal 0 2 6.263398 0.000000 4964 +joselui 0 2 6.263398 0.000000 4965 +yama 1 1 6.957497 6.957497 9833 +coursesvlsi 1 1 6.957497 6.957497 9834 +downto 0 1 6.957497 0.000000 9835 +imparti 0 1 6.957497 0.000000 9836 +amithyamasanim 0 1 6.957497 0.000000 9837 +yorki 0 1 6.957497 0.000000 9838 +garcia 0 1 6.957497 0.000000 9839 +mailstop 0 1 6.957497 0.000000 9840 +ucup 0 1 6.957497 0.000000 9841 +warburton 0 1 6.957497 0.000000 9842 +comi 0 1 6.957497 0.000000 9843 +currentlyemploi 0 1 6.957497 0.000000 9844 +workingin 0 1 6.957497 0.000000 9845 +javamedia 0 1 6.957497 0.000000 9846 +groupeducationfal 0 1 6.957497 0.000000 9847 +cssoftwar 0 1 6.957497 0.000000 9848 +csspring 0 1 6.957497 0.000000 9849 +csproject 0 1 6.957497 0.000000 9850 +railroadsystem 0 1 6.957497 0.000000 9851 +onyx 0 1 6.957497 0.000000 9852 +openinventord 0 1 6.957497 0.000000 9853 +through 0 1 6.957497 0.000000 9854 +documentimag 0 1 6.957497 0.000000 9855 +chipdevelop 0 1 6.957497 0.000000 9856 +basicallycompress 0 1 6.957497 0.000000 9857 +thisalgorithm 0 1 6.957497 0.000000 9858 +cachecam 0 1 6.957497 0.000000 9859 +inputstream 0 1 6.957497 0.000000 9860 +capableof 0 1 6.957497 0.000000 9861 +nowai 0 1 6.957497 0.000000 9862 +rivlproposalpresentationand 0 1 6.957497 0.000000 9863 +dthi 0 1 6.957497 0.000000 9864 +itswritten 0 1 6.957497 0.000000 9865 +parallelomania 0 1 6.957497 0.000000 9866 +resumehtmlpostscript 0 1 6.957497 0.000000 9867 +satyaprasad 0 1 6.957497 0.000000 9868 +avinashgupta 0 1 6.957497 0.000000 9869 +kartikh 0 1 6.957497 0.000000 9870 +kapadia 0 1 6.957497 0.000000 9871 +hrishikeshdixit 0 1 6.957497 0.000000 9872 +vineetahuja 0 1 6.957497 0.000000 9873 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..96eaff61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +homepag 0 93 2.397895 0.000000 148 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +real 0 93 2.397895 0.000000 144 +school 0 84 2.484907 0.000000 188 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +david 0 71 2.639057 0.000000 232 +multimedia 1 68 2.708050 2.708050 258 +order 0 69 2.708050 0.000000 249 +virtual 1 62 2.772589 2.772589 285 +street 0 63 2.772589 0.000000 293 +ithaca 0 65 2.772589 0.000000 294 +prof 0 64 2.772589 0.000000 273 +interact 0 62 2.772589 0.000000 270 +summer 0 56 2.890372 0.000000 311 +understand 0 47 3.091042 0.000000 384 +video 0 44 3.135494 0.000000 405 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +industri 0 38 3.295837 0.000000 464 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +independ 0 32 3.465736 0.000000 548 +photo 0 31 3.496508 0.000000 561 +common 0 30 3.555348 0.000000 574 +platform 0 29 3.583519 0.000000 591 +limit 0 29 3.583519 0.000000 585 +request 0 26 3.688879 0.000000 635 +annot 0 21 3.912023 0.000000 775 +smith 0 20 3.951244 0.000000 820 +toolkit 0 20 3.951244 0.000000 835 +kernel 0 20 3.951244 0.000000 825 +mellon 0 13 4.382027 0.000000 1179 +realiti 1 12 4.465908 4.465908 1272 +carnegi 0 12 4.465908 0.000000 1260 +total 0 10 4.653960 0.000000 1398 +patel 1 6 5.164786 5.164786 2154 +causal 0 6 5.164786 0.000000 2024 +east 0 5 5.347108 0.000000 2472 +corba 0 5 5.347108 0.000000 2320 +dale 0 4 5.568345 0.000000 2687 +zeno 0 3 5.857933 0.000000 3580 +cheriton 0 3 5.857933 0.000000 3259 +ankit 1 2 6.263398 6.263398 4966 +endpoint 0 2 6.263398 0.000000 4967 +broker 0 2 6.263398 0.000000 4968 +critiqu 0 2 6.263398 0.000000 4328 +apatel 0 1 6.957497 0.000000 9874 +galleria 0 1 6.957497 0.000000 9875 +chronologia 0 1 6.957497 0.000000 9876 +universityresumedepart 0 1 6.957497 0.000000 9877 +enrolledgradu 0 1 6.957497 0.000000 9878 +canvasd 0 1 6.957497 0.000000 9879 +conferencingmultimedia 0 1 6.957497 0.000000 9880 +assignmentsproject 0 1 6.957497 0.000000 9881 +netan 0 1 6.957497 0.000000 9882 +skeen 0 1 6.957497 0.000000 9883 +scienceworld 0 1 6.957497 0.000000 9884 +multimediamaharaja 0 1 6.957497 0.000000 9885 +sayajirao 0 1 6.957497 0.000000 9886 +academicsfriend 0 1 6.957497 0.000000 9887 +techoreli 0 1 6.957497 0.000000 9888 +limitedjob 0 1 6.957497 0.000000 9889 +profilelif 0 1 6.957497 0.000000 9890 +relianc 0 1 6.957497 0.000000 9891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..f7d481ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +languag 0 227 1.386294 0.000000 26 +databas 0 122 2.079442 0.000000 86 +softwareengin 0 6 5.164786 0.000000 2162 +ashish 0 5 5.347108 0.000000 2473 +sciencemast 0 2 6.263398 0.000000 4969 +jhaveriashish 0 1 6.957497 0.000000 9892 +jhaveridepart 0 1 6.957497 0.000000 9893 +engineeringresumehtmlpost 0 1 6.957497 0.000000 9894 +scriptcourseworkadvanc 0 1 6.957497 0.000000 9895 +systemscsmultimedia 0 1 6.957497 0.000000 9896 +systemscsengin 0 1 6.957497 0.000000 9897 +networkscsprogram 0 1 6.957497 0.000000 9898 +csashish 0 1 6.957497 0.000000 9899 +jhaveri 0 1 6.957497 0.000000 9900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..82528d81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +languag 0 227 1.386294 0.000000 26 +phone 1 175 1.791759 1.791759 45 +hall 0 146 1.945910 0.000000 65 +person 1 111 2.197225 2.197225 117 +pictur 0 89 2.397895 0.000000 160 +server 0 76 2.564949 0.000000 204 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +guid 0 63 2.772589 0.000000 267 +life 0 50 3.044522 0.000000 375 +eduoffic 0 33 3.433987 0.000000 531 +transform 1 32 3.465736 3.465736 542 +famili 0 23 3.806662 0.000000 735 +record 0 18 4.060443 0.000000 890 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +danc 1 12 4.465908 4.465908 1278 +jean 0 10 4.653960 0.000000 1440 +atcornel 0 6 5.164786 0.000000 2131 +swing 1 4 5.568345 5.568345 2887 +album 0 4 5.568345 0.000000 2888 +dutch 0 3 5.857933 0.000000 3592 +berg 1 2 6.263398 6.263398 4970 +aswin 1 1 6.957497 6.957497 9901 +skyacr 0 1 6.957497 0.000000 9902 +systemmi 0 1 6.957497 0.000000 9903 +annek 0 1 6.957497 0.000000 9904 +deejay 0 1 6.957497 0.000000 9905 +isdn 0 1 6.957497 0.000000 9906 +hop 0 1 6.957497 0.000000 9907 +nederlands 0 1 6.957497 0.000000 9908 +clubi 0 1 6.957497 0.000000 9909 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..93bfbd91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,123 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +like 1 132 1.945910 1.945910 81 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +make 0 111 2.197225 0.000000 120 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +requir 1 81 2.484907 2.484907 167 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +interfac 1 79 2.564949 2.564949 209 +appli 0 71 2.639057 0.000000 226 +multimedia 1 68 2.708050 2.708050 258 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +virtual 0 62 2.772589 0.000000 285 +guid 0 63 2.772589 0.000000 267 +point 1 58 2.890372 2.890372 319 +space 1 57 2.890372 2.890372 310 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +still 0 50 3.044522 0.000000 362 +friend 0 48 3.044522 0.000000 376 +featur 0 46 3.091042 0.000000 386 +effect 0 46 3.091042 0.000000 385 +even 0 45 3.135494 0.000000 393 +protocol 0 45 3.135494 0.000000 407 +multipl 0 39 3.258097 0.000000 453 +mean 0 37 3.332205 0.000000 477 +game 1 36 3.367296 3.367296 498 +next 1 34 3.401197 3.401197 517 +either 0 35 3.401197 0.000000 506 +everi 0 34 3.401197 0.000000 519 +board 0 33 3.433987 0.000000 528 +abl 0 30 3.555348 0.000000 566 +full 0 28 3.610918 0.000000 615 +subject 1 26 3.688879 3.688879 647 +universityithaca 0 24 3.761200 0.000000 710 +magazin 0 24 3.761200 0.000000 704 +reach 0 24 3.761200 0.000000 688 +brows 0 23 3.806662 0.000000 726 +sciencecornel 0 22 3.850148 0.000000 768 +almost 0 22 3.850148 0.000000 742 +chip 1 21 3.912023 3.912023 770 +five 1 19 4.007333 4.007333 841 +sign 0 16 4.174387 0.000000 970 +piec 1 15 4.248495 4.248495 1020 +transit 0 15 4.248495 0.000000 1046 +stream 0 15 4.248495 0.000000 1015 +hierarch 0 15 4.248495 0.000000 1018 +scene 0 14 4.317488 0.000000 1114 +skill 0 12 4.465908 0.000000 1205 +player 1 11 4.553877 4.553877 1371 +earth 0 10 4.653960 0.000000 1463 +pair 0 9 4.753590 0.000000 1503 +rivl 0 8 4.875197 0.000000 1632 +pronounc 0 7 5.010635 0.000000 1918 +earn 0 7 5.010635 0.000000 1788 +hoca 1 5 5.347108 5.347108 2241 +hypothet 0 5 5.347108 0.000000 2474 +vertic 0 5 5.347108 0.000000 2270 +guestbook 0 5 5.347108 0.000000 2475 +engg 0 4 5.568345 0.000000 2884 +multitask 0 4 5.568345 0.000000 2803 +screenshot 0 4 5.568345 0.000000 2743 +avinash 0 3 5.857933 0.000000 3510 +win 0 3 5.857933 0.000000 3593 +ipng 1 2 6.263398 6.263398 4727 +resumemi 0 2 6.263398 0.000000 4971 +hodja 0 2 6.263398 0.000000 4972 +fledg 0 2 6.263398 0.000000 4973 +horizont 0 2 6.263398 0.000000 4117 +diagon 0 2 6.263398 0.000000 4974 +caveat 0 2 6.263398 0.000000 4975 +trap 1 1 6.957497 6.957497 9910 +welcomeavinash 0 1 6.957497 0.000000 9911 +guptam 0 1 6.957497 0.000000 9912 +streetcambridg 0 1 6.957497 0.000000 9913 +thecia 0 1 6.957497 0.000000 9914 +presentationpent 0 1 6.957497 0.000000 9915 +skillpent 0 1 6.957497 0.000000 9916 +oppon 0 1 6.957497 0.000000 9917 +gamedownload 0 1 6.957497 0.000000 9918 +ipvimpl 0 1 6.957497 0.000000 9919 +proposalprogress 0 1 6.957497 0.000000 9920 +reportsam 0 1 6.957497 0.000000 9921 +pageon 0 1 6.957497 0.000000 9922 +internethytelnetth 0 1 6.957497 0.000000 9923 +catalogeinet 0 1 6.957497 0.000000 9924 +galaxyplanet 0 1 6.957497 0.000000 9925 +pagejoel 0 1 6.957497 0.000000 9926 +indexyahoo 0 1 6.957497 0.000000 9927 +wwwwebcrawlerlycosmi 0 1 6.957497 0.000000 9928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..6d9f24cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +high 1 130 2.079442 2.079442 101 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +final 0 116 2.197225 0.000000 108 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +peopl 2 96 2.302585 4.605170 132 +need 1 98 2.302585 2.302585 135 +take 0 97 2.302585 0.000000 134 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +chang 0 82 2.484907 0.000000 163 +school 0 84 2.484907 0.000000 188 +good 1 77 2.564949 2.564949 200 +master 1 76 2.564949 2.564949 216 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +name 0 72 2.639057 0.000000 220 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +would 0 67 2.708050 0.000000 251 +window 0 68 2.708050 0.000000 242 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +previou 0 62 2.772589 0.000000 290 +locat 1 59 2.833213 2.833213 303 +plai 0 60 2.833213 0.000000 307 +share 0 59 2.833213 0.000000 304 +room 0 59 2.833213 0.000000 301 +summer 1 56 2.890372 2.890372 311 +semest 1 58 2.890372 2.890372 312 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +digit 0 52 2.995732 0.000000 348 +friend 1 48 3.044522 3.044522 376 +cool 0 49 3.044522 0.000000 374 +still 0 50 3.044522 0.000000 362 +electron 0 47 3.091042 0.000000 379 +favorit 1 44 3.135494 3.135494 410 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +keep 1 44 3.135494 3.135494 409 +algebra 0 45 3.135494 0.000000 394 +futur 1 41 3.218876 3.218876 427 +past 1 42 3.218876 3.218876 428 +linear 0 41 3.218876 0.000000 431 +live 1 40 3.258097 3.258097 451 +continu 0 39 3.258097 0.000000 448 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +hand 0 37 3.332205 0.000000 475 +feel 0 37 3.332205 0.000000 483 +soon 0 36 3.367296 0.000000 494 +least 0 35 3.401197 0.000000 516 +product 1 33 3.433987 3.433987 527 +taught 0 33 3.433987 0.000000 526 +kind 0 32 3.465736 0.000000 541 +concept 0 32 3.465736 0.000000 537 +independ 0 32 3.465736 0.000000 548 +taken 1 31 3.496508 3.496508 555 +computersci 0 30 3.555348 0.000000 562 +hard 0 30 3.555348 0.000000 563 +depend 0 29 3.583519 0.000000 583 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +framework 0 28 3.610918 0.000000 606 +actual 0 28 3.610918 0.000000 604 +campu 0 27 3.637586 0.000000 623 +session 0 26 3.688879 0.000000 643 +spent 0 25 3.737670 0.000000 676 +fundament 0 25 3.737670 0.000000 661 +greg 0 24 3.761200 0.000000 695 +frame 0 24 3.761200 0.000000 684 +brows 0 23 3.806662 0.000000 726 +finish 1 22 3.850148 3.850148 748 +dai 0 22 3.850148 0.000000 753 +hous 1 21 3.912023 3.912023 801 +half 0 21 3.912023 0.000000 776 +mpeg 0 20 3.951244 0.000000 831 +nice 0 20 3.951244 0.000000 809 +media 0 19 4.007333 0.000000 861 +offici 0 18 4.060443 0.000000 894 +coupl 0 17 4.110874 0.000000 939 +thought 0 17 4.110874 0.000000 945 +otherwis 0 17 4.110874 0.000000 922 +georg 1 16 4.174387 4.174387 994 +intel 0 16 4.174387 0.000000 1000 +goe 1 15 4.248495 4.248495 1044 +fortran 0 15 4.248495 0.000000 1027 +stream 0 15 4.248495 0.000000 1015 +trip 0 14 4.317488 0.000000 1113 +hopefulli 0 14 4.317488 0.000000 1071 +camera 0 14 4.317488 0.000000 1115 +decid 0 14 4.317488 0.000000 1075 +dave 0 14 4.317488 0.000000 1098 +jonathan 1 13 4.382027 4.382027 1174 +believ 0 13 4.382027 0.000000 1187 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +resolut 0 13 4.382027 0.000000 1172 +unfortun 0 13 4.382027 0.000000 1170 +pretti 0 13 4.382027 0.000000 1191 +went 0 12 4.465908 0.000000 1279 +pageif 0 12 4.465908 0.000000 1275 +lake 0 11 4.553877 0.000000 1373 +bandwidth 0 11 4.553877 0.000000 1365 +smart 0 11 4.553877 0.000000 1352 +perman 0 11 4.553877 0.000000 1372 +road 0 11 4.553877 0.000000 1374 +sentenc 0 10 4.653960 0.000000 1413 +town 0 10 4.653960 0.000000 1458 +packet 0 10 4.653960 0.000000 1415 +bring 0 10 4.653960 0.000000 1430 +forc 0 10 4.653960 0.000000 1384 +undergrad 0 9 4.753590 0.000000 1589 +jersei 0 9 4.753590 0.000000 1587 +introductori 0 9 4.753590 0.000000 1479 +trust 0 9 4.753590 0.000000 1583 +grew 0 8 4.875197 0.000000 1742 +mile 0 8 4.875197 0.000000 1743 +filter 0 8 4.875197 0.000000 1641 +rivl 0 8 4.875197 0.000000 1632 +encrypt 1 7 5.010635 5.010635 1835 +cornellunivers 0 7 5.010635 0.000000 1916 +portland 0 7 5.010635 0.000000 1878 +conveni 0 6 5.164786 0.000000 2088 +corp 0 6 5.164786 0.000000 2139 +lucki 0 6 5.164786 0.000000 2163 +oregon 1 5 5.347108 5.347108 2437 +ahead 0 5 5.347108 0.000000 2338 +clarif 0 5 5.347108 0.000000 2253 +fork 0 4 5.568345 0.000000 2801 +skin 0 4 5.568345 0.000000 2840 +cheap 0 4 5.568345 0.000000 2751 +ultra 0 4 5.568345 0.000000 2889 +height 0 4 5.568345 0.000000 2890 +gear 0 4 5.568345 0.000000 2891 +birth 0 3 5.857933 0.000000 3594 +greek 0 3 5.857933 0.000000 3595 +labor 0 3 5.857933 0.000000 3195 +weber 0 3 5.857933 0.000000 3156 +bright 0 3 5.857933 0.000000 3596 +pack 0 3 5.857933 0.000000 3597 +urg 0 3 5.857933 0.000000 3212 +sugata 1 2 6.263398 6.263398 4976 +dude 0 2 6.263398 0.000000 4977 +felt 0 2 6.263398 0.000000 4978 +fratern 0 2 6.263398 0.000000 4979 +border 0 2 6.263398 0.000000 4980 +mukhopadhyai 0 2 6.263398 0.000000 4981 +surfer 0 2 6.263398 0.000000 4982 +captain 0 2 6.263398 0.000000 4983 +barber 1 1 6.957497 6.957497 9929 +bulli 1 1 6.957497 6.957497 9930 +ponch 1 1 6.957497 6.957497 9931 +inde 1 1 6.957497 6.957497 9932 +bush 1 1 6.957497 6.957497 9933 +fleshpooooooooooooooch 0 1 6.957497 0.000000 9934 +inclin 0 1 6.957497 0.000000 9935 +callm 0 1 6.957497 0.000000 9936 +orpooch 0 1 6.957497 0.000000 9937 +guppi 0 1 6.957497 0.000000 9938 +mama 0 1 6.957497 0.000000 9939 +phin 0 1 6.957497 0.000000 9940 +attendedmontgomeri 0 1 6.957497 0.000000 9941 +collegetown 0 1 6.957497 0.000000 9942 +adjac 0 1 6.957497 0.000000 9943 +sublet 0 1 6.957497 0.000000 9944 +oncolleg 0 1 6.957497 0.000000 9945 +radiu 0 1 6.957497 0.000000 9946 +epsilon 0 1 6.957497 0.000000 9947 +cayuga 0 1 6.957497 0.000000 9948 +thefilt 0 1 6.957497 0.000000 9949 +ofc 0 1 6.957497 0.000000 9950 +intereststhi 0 1 6.957497 0.000000 9951 +rivlan 0 1 6.957497 0.000000 9952 +tracker 0 1 6.957497 0.000000 9953 +rivli 0 1 6.957497 0.000000 9954 +smpd 0 1 6.957497 0.000000 9955 +generatorfor 0 1 6.957497 0.000000 9956 +webar 0 1 6.957497 0.000000 9957 +buddi 0 1 6.957497 0.000000 9958 +resourceful 0 1 6.957497 0.000000 9959 +pipe 0 1 6.957497 0.000000 9960 +meanth 0 1 6.957497 0.000000 9961 +comrad 0 1 6.957497 0.000000 9962 +ofhi 0 1 6.957497 0.000000 9963 +swirl 0 1 6.957497 0.000000 9964 +nefari 0 1 6.957497 0.000000 9965 +toilet 0 1 6.957497 0.000000 9966 +mukhopadyai 0 1 6.957497 0.000000 9967 +bonei 0 1 6.957497 0.000000 9968 +magoo 0 1 6.957497 0.000000 9969 +fletop 0 1 6.957497 0.000000 9970 +bigro 0 1 6.957497 0.000000 9971 +koster 0 1 6.957497 0.000000 9972 +bot 0 1 6.957497 0.000000 9973 +tffl 0 1 6.957497 0.000000 9974 +pageuuencod 0 1 6.957497 0.000000 9975 +pagetar 0 1 6.957497 0.000000 9976 +zip 0 1 6.957497 0.000000 9977 +downloadsgraphicsbarb 0 1 6.957497 0.000000 9978 +gifponch 0 1 6.957497 0.000000 9979 +htmlres_htmlres_curemmittemmitt 0 1 6.957497 0.000000 9980 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..aa71b8c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +look 0 107 2.197225 0.000000 115 +final 0 116 2.197225 0.000000 108 +user 0 104 2.302585 0.000000 137 +commun 1 95 2.397895 2.397895 157 +homepag 1 93 2.397895 2.397895 148 +proceed 1 93 2.397895 2.397895 152 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +activ 1 84 2.484907 2.484907 182 +stuff 1 87 2.484907 2.484907 171 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +messag 1 76 2.564949 2.564949 212 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +good 0 77 2.564949 0.000000 200 +state 0 76 2.564949 0.000000 207 +interfac 0 79 2.564949 0.000000 209 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +addit 0 74 2.639057 0.000000 228 +david 0 71 2.639057 0.000000 232 +would 1 67 2.708050 2.708050 251 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +ithaca 1 65 2.772589 2.772589 294 +locat 0 59 2.833213 0.000000 303 +back 0 60 2.833213 0.000000 297 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +februari 0 54 2.944439 0.000000 328 +advisor 0 51 2.995732 0.000000 355 +run 0 51 2.995732 0.000000 347 +cool 0 49 3.044522 0.000000 374 +archiv 0 49 3.044522 0.000000 364 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +fast 1 42 3.218876 3.218876 429 +review 0 42 3.218876 0.000000 425 +realli 1 40 3.258097 3.258097 444 +live 0 40 3.258097 0.000000 451 +especi 0 36 3.367296 0.000000 496 +singl 0 34 3.401197 0.000000 510 +collabor 0 32 3.465736 0.000000 543 +someth 0 31 3.496508 0.000000 554 +turn 0 29 3.583519 0.000000 586 +cluster 1 28 3.610918 3.610918 612 +hope 0 28 3.610918 0.000000 610 +team 0 27 3.637586 0.000000 625 +compar 0 26 3.688879 0.000000 648 +berkelei 0 26 3.688879 0.000000 657 +enabl 0 26 3.688879 0.000000 655 +known 0 24 3.761200 0.000000 702 +size 0 23 3.806662 0.000000 713 +indian 0 22 3.850148 0.000000 769 +love 1 21 3.912023 3.912023 804 +listen 0 18 4.060443 0.000000 907 +layer 1 17 4.110874 4.110874 926 +segment 0 17 4.110874 0.000000 931 +interconnect 0 17 4.110874 0.000000 937 +latenc 1 16 4.174387 4.174387 993 +photograph 1 15 4.248495 4.248495 1056 +micro 0 15 4.248495 0.000000 1031 +split 0 14 4.317488 0.000000 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +avenu 0 12 4.465908 0.000000 1277 +went 0 12 4.465908 0.000000 1279 +philadelphia 0 12 4.465908 0.000000 1244 +scienceat 0 11 4.553877 0.000000 1375 +see 0 11 4.553877 0.000000 1337 +motiv 0 11 4.553877 0.000000 1346 +cook 0 10 4.653960 0.000000 1464 +werner 0 10 4.653960 0.000000 1385 +sosp 0 10 4.653960 0.000000 1416 +calvin 0 9 4.753590 0.000000 1518 +trust 0 9 4.753590 0.000000 1583 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +vineet 1 8 4.875197 4.875197 1639 +kanpur 0 8 4.875197 0.000000 1744 +realiz 0 8 4.875197 0.000000 1739 +gold 0 8 4.875197 0.000000 1745 +coast 0 8 4.875197 0.000000 1746 +vogel 0 8 4.875197 0.000000 1622 +centuri 0 7 5.010635 0.000000 1935 +happen 0 7 5.010635 0.000000 1790 +rock 0 6 5.164786 0.000000 2164 +dream 0 6 5.164786 0.000000 2165 +tri 0 6 5.164786 0.000000 2166 +south 0 6 5.164786 0.000000 2167 +goldstein 0 6 5.164786 0.000000 2168 +buch 1 5 5.347108 5.347108 2272 +truli 0 5 5.347108 0.000000 2476 +aim 0 5 5.347108 0.000000 2477 +culler 0 5 5.347108 0.000000 2381 +symp 0 5 5.347108 0.000000 2376 +australia 0 5 5.347108 0.000000 2478 +basu 1 4 5.568345 5.568345 2843 +thecornel 0 4 5.568345 0.000000 2892 +hobb 0 4 5.568345 0.000000 2893 +writer 0 4 5.568345 0.000000 2783 +cuinfo 0 4 5.568345 0.000000 2626 +ofworkst 0 4 5.568345 0.000000 2679 +withth 0 4 5.568345 0.000000 2805 +anindya 2 3 5.857933 11.715866 3535 +experienc 0 3 5.857933 0.000000 3203 +asian 0 3 5.857933 0.000000 3598 +mpp 0 3 5.857933 0.000000 3194 +schauser 0 3 5.857933 0.000000 3599 +avula 0 3 5.857933 0.000000 3600 +mugshot 0 2 6.263398 0.000000 4984 +goof 0 2 6.263398 0.000000 4985 +projectwith 0 2 6.263398 0.000000 4986 +thegreat 0 2 6.263398 0.000000 4987 +pelham 0 2 6.263398 0.000000 4988 +grenvil 0 2 6.263398 0.000000 4989 +wodehous 0 2 6.263398 0.000000 4990 +metallica 0 2 6.263398 0.000000 4991 +fanci 0 2 6.263398 0.000000 4992 +monti 0 2 6.263398 0.000000 4993 +python 0 2 6.263398 0.000000 4994 +beavi 0 2 6.263398 0.000000 4995 +meiko 0 2 6.263398 0.000000 4996 +untrust 0 2 6.263398 0.000000 4997 +seth 0 2 6.263398 0.000000 4998 +klau 0 2 6.263398 0.000000 4999 +veena 0 2 6.263398 0.000000 5000 +homepagelast 0 2 6.263398 0.000000 5001 +delawar 0 1 6.957497 0.000000 9981 +eduwhat 0 1 6.957497 0.000000 9982 +musicor 0 1 6.957497 0.000000 9983 +coollik 0 1 6.957497 0.000000 9984 +indiawho 0 1 6.957497 0.000000 9985 +hardpink 0 1 6.957497 0.000000 9986 +floydfanat 0 1 6.957497 0.000000 9987 +childhood 0 1 6.957497 0.000000 9988 +livelast 0 1 6.957497 0.000000 9989 +plum 0 1 6.957497 0.000000 9990 +unwash 0 1 6.957497 0.000000 9991 +attendedwoodstock 0 1 6.957497 0.000000 9992 +onlinewoodstock 0 1 6.957497 0.000000 9993 +woodstock 0 1 6.957497 0.000000 9994 +thesocc 0 1 6.957497 0.000000 9995 +worldcup 0 1 6.957497 0.000000 9996 +butunfortun 0 1 6.957497 0.000000 9997 +putsomerecip 0 1 6.957497 0.000000 9998 +connoisseurof 0 1 6.957497 0.000000 9999 +whiski 0 1 6.957497 0.000000 10000 +malt 0 1 6.957497 0.000000 10001 +cheer 0 1 6.957497 0.000000 10002 +buttheadoth 0 1 6.957497 0.000000 10003 +gopherand 0 1 6.957497 0.000000 10004 +projectwhich 0 1 6.957497 0.000000 10005 +acheiv 0 1 6.957497 0.000000 10006 +passinglay 0 1 6.957497 0.000000 10007 +thatshow 0 1 6.957497 0.000000 10008 +saturateth 0 1 6.957497 0.000000 10009 +fibr 0 1 6.957497 0.000000 10010 +specificationfor 0 1 6.957497 0.000000 10011 +processesboth 0 1 6.957497 0.000000 10012 +abridgedvers 0 1 6.957497 0.000000 10013 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..4b3b58e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +area 1 144 1.945910 1.945910 80 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +studi 0 120 2.079442 0.000000 91 +check 1 115 2.197225 2.197225 118 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +stuff 0 87 2.484907 0.000000 171 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +view 0 70 2.708050 0.000000 254 +interact 1 62 2.772589 2.772589 270 +ithaca 0 65 2.772589 0.000000 294 +dept 0 64 2.772589 0.000000 291 +simpl 0 60 2.833213 0.000000 298 +overview 0 56 2.890372 0.000000 323 +processor 0 54 2.944439 0.000000 335 +local 0 55 2.944439 0.000000 334 +scientif 0 53 2.944439 0.000000 341 +advisor 0 51 2.995732 0.000000 355 +life 0 50 3.044522 0.000000 375 +music 1 42 3.218876 3.218876 436 +movi 0 40 3.258097 0.000000 459 +map 0 39 3.258097 0.000000 452 +tutori 0 39 3.258097 0.000000 437 +seminar 0 38 3.295837 0.000000 470 +global 0 34 3.401197 0.000000 520 +richard 0 31 3.496508 0.000000 559 +weather 1 28 3.610918 3.610918 618 +magazin 0 24 3.761200 0.000000 704 +famili 0 23 3.806662 0.000000 735 +geometri 0 22 3.850148 0.000000 752 +navig 1 21 3.912023 3.912023 796 +synthesi 0 20 3.951244 0.000000 834 +georg 0 16 4.174387 0.000000 994 +stock 0 16 4.174387 0.000000 1007 +massiv 0 15 4.248495 0.000000 1026 +incomput 0 14 4.317488 0.000000 1096 +francisco 0 14 4.317488 0.000000 1095 +levi 0 14 4.317488 0.000000 1093 +forth 0 13 4.382027 0.000000 1186 +galleri 0 13 4.382027 0.000000 1192 +insid 0 12 4.465908 0.000000 1262 +newspap 0 12 4.465908 0.000000 1280 +neat 0 12 4.465908 0.000000 1263 +congress 0 9 4.753590 0.000000 1592 +forget 0 8 4.875197 0.000000 1712 +wire 0 8 4.875197 0.000000 1747 +microsystem 0 6 5.164786 0.000000 2160 +peek 0 6 5.164786 0.000000 2169 +frog 0 5 5.347108 0.000000 2479 +hallithaca 0 4 5.568345 0.000000 2894 +zippel 0 4 5.568345 0.000000 2879 +heard 0 4 5.568345 0.000000 2895 +aboutth 0 4 5.568345 0.000000 2720 +wander 0 4 5.568345 0.000000 2896 +educornel 0 3 5.857933 0.000000 3601 +universitydept 0 3 5.857933 0.000000 3602 +galaxi 0 3 5.857933 0.000000 3603 +underground 0 3 5.857933 0.000000 3604 +spider 0 3 5.857933 0.000000 3605 +intertext 0 2 6.263398 0.000000 5002 +solar 0 2 6.263398 0.000000 5003 +martial 0 2 6.263398 0.000000 5004 +jpop 1 1 6.957497 6.957497 10014 +homepageben 0 1 6.957497 0.000000 10015 +haogradu 0 1 6.957497 0.000000 10016 +studentbhao 0 1 6.957497 0.000000 10017 +flea 0 1 6.957497 0.000000 10018 +taylorwhen 0 1 6.957497 0.000000 10019 +itsgorg 0 1 6.957497 0.000000 10020 +cornellwhat 0 1 6.957497 0.000000 10021 +dissectionmagazin 0 1 6.957497 0.000000 10022 +magazinea 0 1 6.957497 0.000000 10023 +shoemak 0 1 6.957497 0.000000 10024 +weblouvr 0 1 6.957497 0.000000 10025 +xmorphia 0 1 6.957497 0.000000 10026 +kaleidospac 0 1 6.957497 0.000000 10027 +bonsai 0 1 6.957497 0.000000 10028 +seiyuu 0 1 6.957497 0.000000 10029 +archivenetwork 0 1 6.957497 0.000000 10030 +edgelibrari 0 1 6.957497 0.000000 10031 +infonih 0 1 6.957497 0.000000 10032 +courseth 0 1 6.957497 0.000000 10033 +guidecern 0 1 6.957497 0.000000 10034 +bhao 0 1 6.957497 0.000000 10035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..90f0e1d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +network 0 168 1.791759 0.000000 61 +ithaca 0 65 2.772589 0.000000 294 +york 0 41 3.218876 0.000000 435 +apart 0 7 5.010635 0.000000 1936 +aastha 0 2 6.263398 0.000000 5005 +sciencemast 0 2 6.263398 0.000000 4969 +hasbrouck 0 2 6.263398 0.000000 4952 +pageaastha 0 1 6.957497 0.000000 10036 +bhardwajdepart 0 1 6.957497 0.000000 10037 +ofengineeeringresumehtmlpost 0 1 6.957497 0.000000 10038 +scriptcourseworkadvanceddatabas 0 1 6.957497 0.000000 10039 +csmultimediasystem 0 1 6.957497 0.000000 10040 +csengineeringcomput 0 1 6.957497 0.000000 10041 +cssoftwareengin 0 1 6.957497 0.000000 10042 +cscontact 0 1 6.957497 0.000000 10043 +bhardwaj 0 1 6.957497 0.000000 10044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..95ac4b8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,284 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +paper 2 205 1.609438 3.218876 38 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +algorithm 2 162 1.791759 3.583518 57 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +click 1 142 1.945910 1.945910 78 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +confer 1 126 2.079442 2.079442 100 +report 0 131 2.079442 0.000000 92 +tool 0 117 2.079442 0.000000 93 +intern 2 108 2.197225 4.394450 128 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +access 0 102 2.302585 0.000000 136 +pictur 1 89 2.397895 2.397895 160 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +search 0 95 2.397895 0.000000 155 +ieee 2 86 2.484907 4.969814 190 +journal 1 83 2.484907 2.484907 183 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +librari 0 87 2.484907 0.000000 181 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +april 1 77 2.564949 2.564949 196 +optim 1 79 2.564949 2.564949 197 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +workshop 1 71 2.639057 2.639057 239 +intellig 1 72 2.639057 2.639057 225 +nation 0 74 2.639057 0.000000 240 +write 0 72 2.639057 0.000000 222 +symposium 0 72 2.639057 0.000000 238 +onlin 0 75 2.639057 0.000000 223 +view 0 70 2.708050 0.000000 254 +plan 1 65 2.772589 2.772589 272 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +copi 0 63 2.772589 0.000000 284 +ithaca 0 65 2.772589 0.000000 294 +plai 1 60 2.833213 2.833213 307 +automat 0 61 2.833213 0.000000 306 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +found 0 53 2.944439 0.000000 337 +februari 0 54 2.944439 0.000000 328 +without 0 50 3.044522 0.000000 370 +visual 0 48 3.044522 0.000000 372 +california 1 46 3.091042 3.091042 388 +could 0 46 3.091042 0.000000 383 +move 0 47 3.091042 0.000000 382 +video 1 44 3.135494 3.135494 405 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +around 0 43 3.178054 0.000000 415 +vision 2 41 3.218876 6.437752 430 +autom 1 41 3.218876 3.218876 434 +press 0 42 3.218876 0.000000 419 +programm 1 39 3.258097 3.258097 445 +small 1 39 3.258097 3.258097 447 +societi 0 40 3.258097 0.000000 456 +open 0 38 3.295837 0.000000 469 +field 1 37 3.332205 3.332205 482 +robot 3 36 3.367296 10.101888 497 +approxim 1 35 3.401197 3.401197 509 +singl 0 34 3.401197 0.000000 510 +post 0 35 3.401197 0.000000 505 +tech 0 35 3.401197 0.000000 515 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +built 0 29 3.583519 0.000000 592 +weather 0 28 3.610918 0.000000 618 +scale 0 28 3.610918 0.000000 613 +arrai 1 27 3.637586 3.637586 627 +manipul 1 27 3.637586 3.637586 624 +team 1 27 3.637586 3.637586 625 +bound 1 26 3.688879 3.688879 659 +proc 1 26 3.688879 3.688879 649 +experiment 1 26 3.688879 3.688879 645 +detect 0 26 3.688879 0.000000 646 +revis 0 26 3.688879 0.000000 640 +task 1 25 3.737670 3.737670 678 +motion 0 24 3.761200 0.000000 699 +sometim 0 24 3.761200 0.000000 696 +mobil 1 23 3.806662 3.806662 730 +famili 0 23 3.806662 0.000000 735 +lead 0 23 3.806662 0.000000 718 +cooper 0 22 3.850148 0.000000 757 +chip 1 21 3.912023 3.912023 770 +vlsi 0 21 3.912023 0.000000 795 +department 0 20 3.951244 0.000000 839 +mpeg 0 20 3.951244 0.000000 831 +scheme 0 20 3.951244 0.000000 818 +boston 0 19 4.007333 0.000000 862 +demo 1 18 4.060443 4.060443 888 +offici 0 18 4.060443 0.000000 894 +lower 0 18 4.060443 0.000000 886 +minim 0 18 4.060443 0.000000 887 +agent 0 18 4.060443 0.000000 910 +stanford 0 17 4.110874 0.000000 955 +vector 1 16 4.174387 4.174387 961 +diego 1 16 4.174387 4.174387 992 +explan 0 16 4.174387 0.000000 985 +micro 1 15 4.248495 4.248495 1031 +massiv 1 15 4.248495 4.248495 1026 +track 0 15 4.248495 0.000000 1029 +configur 0 15 4.248495 0.000000 1012 +researchmi 0 14 4.317488 0.000000 1119 +draft 0 14 4.317488 0.000000 1085 +train 0 14 4.317488 0.000000 1066 +anonym 0 14 4.317488 0.000000 1100 +cannot 1 13 4.382027 4.382027 1144 +jonathan 0 13 4.382027 0.000000 1174 +franc 1 12 4.465908 4.465908 1276 +bruce 0 12 4.465908 0.000000 1226 +walk 0 12 4.465908 0.000000 1281 +target 0 12 4.465908 0.000000 1282 +peter 0 11 4.553877 0.000000 1316 +donald 1 9 4.753590 4.753590 1510 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +face 0 9 4.753590 0.000000 1501 +classif 0 9 4.753590 0.000000 1586 +entitl 0 9 4.753590 0.000000 1490 +wilson 0 9 4.753590 0.000000 1536 +herefor 0 9 4.753590 0.000000 1483 +invari 1 8 4.875197 4.875197 1748 +autonom 0 8 4.875197 0.000000 1749 +sensor 1 7 5.010635 5.010635 1920 +morph 0 7 5.010635 0.000000 1937 +pittsburgh 0 7 5.010635 0.000000 1938 +beyond 0 7 5.010635 0.000000 1834 +huttenloch 1 6 5.164786 5.164786 1983 +silicon 0 6 5.164786 0.000000 2076 +lili 1 5 5.347108 5.347108 2240 +actuat 1 5 5.347108 5.347108 2442 +minneapoli 1 5 5.347108 5.347108 2480 +minnesota 1 5 5.347108 5.347108 2469 +eduph 0 5 5.347108 0.000000 2449 +upper 0 5 5.347108 0.000000 2481 +these 0 5 5.347108 0.000000 2482 +rotat 0 5 5.347108 0.000000 2295 +poem 0 5 5.347108 0.000000 2483 +clickher 0 5 5.347108 0.000000 2428 +randal 1 4 5.568345 5.568345 2776 +thecornel 1 4 5.568345 5.568345 2892 +chase 1 4 5.568345 5.568345 2897 +decoupl 0 4 5.568345 0.000000 2898 +chain 0 4 5.568345 0.000000 2712 +push 0 4 5.568345 0.000000 2635 +alfr 0 4 5.568345 0.000000 2882 +bhringer 1 3 5.857933 5.857933 3606 +jen 1 3 5.857933 5.857933 3378 +hereto 1 3 5.857933 5.857933 3476 +noel 1 3 5.857933 5.857933 3376 +fabric 1 3 5.857933 5.857933 3607 +algorithmica 1 3 5.857933 5.857933 3561 +artificialintellig 1 3 5.857933 5.857933 3608 +scream 0 3 5.857933 0.000000 3609 +microfabr 0 3 5.857933 0.000000 3610 +daniela 0 3 5.857933 0.000000 3611 +portrait 0 3 5.857933 0.000000 3491 +harm 0 3 5.857933 0.000000 3515 +macdonald 1 2 6.263398 6.263398 5006 +mem 1 2 6.263398 6.263398 5007 +brigg 1 2 6.263398 6.263398 5008 +ree 1 2 6.263398 6.263398 5009 +nanofabr 0 2 6.263398 0.000000 5010 +toconstruct 0 2 6.263398 0.000000 4858 +ofmobil 0 2 6.263398 0.000000 5011 +internationalworkshop 0 2 6.263398 0.000000 5012 +crystal 0 2 6.263398 0.000000 5013 +electro 0 2 6.263398 0.000000 5014 +reif 0 2 6.263398 0.000000 5015 +furnitur 0 2 6.263398 0.000000 5016 +actuatorarrai 0 2 6.263398 0.000000 5017 +mihailovich 0 2 6.263398 0.000000 5018 +automationnic 0 2 6.263398 0.000000 5019 +andj 0 2 6.263398 0.000000 5020 +latomb 0 2 6.263398 0.000000 5021 +doc 0 2 6.263398 0.000000 5022 +catalogc 0 2 6.263398 0.000000 5023 +apictur 0 2 6.263398 0.000000 5024 +drawn 0 2 6.263398 0.000000 4215 +swallow 0 2 6.263398 0.000000 5025 +tommi 1 1 6.957497 6.957497 10045 +feeder 1 1 6.957497 6.957497 10046 +vibratori 1 1 6.957497 6.957497 10047 +kinodynam 1 1 6.957497 6.957497 10048 +xavier 1 1 6.957497 6.957497 10049 +ourlab 1 1 6.957497 6.957497 10050 +toulous 1 1 6.957497 6.957497 10051 +icra 1 1 6.957497 6.957497 10052 +provablygood 1 1 6.957497 6.957497 10053 +couch 1 1 6.957497 6.957497 10054 +donaldbruc 0 1 6.957497 0.000000 10055 +donaldassoci 0 1 6.957497 0.000000 10056 +professorbrd 0 1 6.957497 0.000000 10057 +laboratorydan 0 1 6.957497 0.000000 10058 +microactu 0 1 6.957497 0.000000 10059 +arrayi 0 1 6.957497 0.000000 10060 +squarecentemet 0 1 6.957497 0.000000 10061 +sensoryfeedback 0 1 6.957497 0.000000 10062 +buildself 0 1 6.957497 0.000000 10063 +propel 0 1 6.957497 0.000000 10064 +amybrigg 0 1 6.957497 0.000000 10065 +surveil 0 1 6.957497 0.000000 10066 +andintercept 0 1 6.957497 0.000000 10067 +developedbi 0 1 6.957497 0.000000 10068 +informationalon 0 1 6.957497 0.000000 10069 +andlow 0 1 6.957497 0.000000 10070 +memsand 0 1 6.957497 0.000000 10071 +thealgorithm 0 1 6.957497 0.000000 10072 +robustgeometr 0 1 6.957497 0.000000 10073 +andimprov 0 1 6.957497 0.000000 10074 +partsfeed 0 1 6.957497 0.000000 10075 +partii 0 1 6.957497 0.000000 10076 +robotswith 0 1 6.957497 0.000000 10077 +forcartesian 0 1 6.957497 0.000000 10078 +canni 0 1 6.957497 0.000000 10079 +inpress 0 1 6.957497 0.000000 10080 +supermodular 0 1 6.957497 0.000000 10081 +andtheoret 0 1 6.957497 0.000000 10082 +jetai 0 1 6.957497 0.000000 10083 +firstquart 0 1 6.957497 0.000000 10084 +inminim 0 1 6.957497 0.000000 10085 +iser 0 1 6.957497 0.000000 10086 +automon 0 1 6.957497 0.000000 10087 +ofjapan 0 1 6.957497 0.000000 10088 +iro 0 1 6.957497 0.000000 10089 +sensorlessmanipul 0 1 6.957497 0.000000 10090 +andautom 0 1 6.957497 0.000000 10091 +ofrobot 0 1 6.957497 0.000000 10092 +otherpubl 0 1 6.957497 0.000000 10093 +dinesh 0 1 6.957497 0.000000 10094 +aval 0 1 6.957497 0.000000 10095 +indexobtain 0 1 6.957497 0.000000 10096 +paperscopi 0 1 6.957497 0.000000 10097 +teamof 0 1 6.957497 0.000000 10098 +movefurnitur 0 1 6.957497 0.000000 10099 +mobot 0 1 6.957497 0.000000 10100 +loretta 0 1 6.957497 0.000000 10101 +pompilio 0 1 6.957497 0.000000 10102 +discoverychannel 0 1 6.957497 0.000000 10103 +funa 0 1 6.957497 0.000000 10104 +moreoth 0 1 6.957497 0.000000 10105 +tallest 0 1 6.957497 0.000000 10106 +darkest 0 1 6.957497 0.000000 10107 +hollywood 0 1 6.957497 0.000000 10108 +merian 0 1 6.957497 0.000000 10109 +wrai 0 1 6.957497 0.000000 10110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..a1347035 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +languag 2 227 1.386294 2.772588 26 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +area 1 144 1.945910 1.945910 80 +professor 0 137 1.945910 0.000000 76 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +file 0 132 1.945910 0.000000 70 +machin 2 129 2.079442 4.158884 95 +confer 2 126 2.079442 4.158884 100 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +specif 1 106 2.197225 2.197225 106 +intern 1 108 2.197225 2.197225 128 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +text 1 98 2.302585 2.302585 133 +user 0 104 2.302585 0.000000 137 +proceed 2 93 2.397895 4.795790 152 +select 1 91 2.397895 2.397895 154 +associ 1 93 2.397895 2.397895 151 +present 0 91 2.397895 0.000000 145 +learn 2 86 2.484907 4.969814 170 +contain 1 81 2.484907 2.484907 174 +build 0 85 2.484907 0.000000 184 +activ 0 84 2.484907 0.000000 182 +resourc 0 81 2.484907 0.000000 172 +method 0 80 2.564949 0.000000 213 +intellig 2 72 2.639057 5.278114 225 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +addit 0 74 2.639057 0.000000 228 +knowledg 1 67 2.708050 2.708050 243 +goal 0 66 2.708050 0.000000 250 +artifici 2 63 2.772589 5.545178 280 +guid 1 63 2.772589 2.772589 267 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +foundat 0 62 2.772589 0.000000 286 +improv 0 62 2.772589 0.000000 289 +content 0 59 2.833213 0.000000 302 +thesi 1 57 2.890372 2.890372 327 +reason 0 57 2.890372 0.000000 318 +variou 0 56 2.890372 0.000000 317 +case 1 51 2.995732 2.995732 351 +finger 0 52 2.995732 0.000000 354 +investig 0 51 2.995732 0.000000 353 +approach 1 48 3.044522 3.044522 366 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +understand 1 47 3.091042 3.091042 384 +featur 1 46 3.091042 3.091042 386 +effect 0 46 3.091042 0.000000 385 +natur 2 44 3.135494 6.270988 406 +mechan 0 43 3.178054 0.000000 416 +offer 0 43 3.178054 0.000000 414 +press 2 42 3.218876 6.437752 419 +autom 0 41 3.218876 0.000000 434 +combin 0 42 3.218876 0.000000 421 +annual 1 40 3.258097 3.258097 458 +societi 1 40 3.258097 3.258097 456 +seminar 0 38 3.295837 0.000000 470 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +cost 0 37 3.332205 0.000000 480 +tree 0 36 3.367296 0.000000 492 +statist 0 35 3.401197 0.000000 521 +print 0 34 3.401197 0.000000 503 +within 0 33 3.433987 0.000000 525 +chapter 1 32 3.465736 3.465736 536 +extend 0 32 3.465736 0.000000 539 +domain 1 30 3.555348 3.555348 564 +focu 1 30 3.555348 3.555348 571 +compon 0 30 3.555348 0.000000 570 +focus 0 29 3.583519 0.000000 584 +semant 0 29 3.583519 0.000000 587 +framework 0 28 3.610918 0.000000 606 +symbol 0 27 3.637586 0.000000 620 +determin 0 27 3.637586 0.000000 630 +task 0 25 3.737670 0.000000 678 +handl 0 24 3.761200 0.000000 685 +brows 0 23 3.806662 0.000000 726 +decis 0 23 3.806662 0.000000 728 +springer 0 22 3.850148 0.000000 750 +stat 0 17 4.110874 0.000000 924 +analyz 0 17 4.110874 0.000000 925 +repositori 0 17 4.110874 0.000000 932 +cognit 1 16 4.174387 4.174387 986 +jose 1 16 4.174387 4.174387 976 +condit 0 16 4.174387 0.000000 975 +practicum 0 16 4.174387 0.000000 960 +cambridg 0 16 4.174387 0.000000 1008 +massachusett 1 14 4.317488 4.317488 1118 +train 0 14 4.317488 0.000000 1066 +embed 0 14 4.317488 0.000000 1102 +primarili 0 13 4.382027 0.000000 1185 +context 0 13 4.382027 0.000000 1153 +robust 0 12 4.465908 0.000000 1271 +speech 0 12 4.465908 0.000000 1222 +lake 0 11 4.553877 0.000000 1373 +tour 0 11 4.553877 0.000000 1307 +acquisit 1 10 4.653960 4.653960 1465 +sentenc 1 10 4.653960 4.653960 1413 +underli 0 10 4.653960 0.000000 1410 +prior 0 10 4.653960 0.000000 1438 +linguist 1 9 4.753590 4.753590 1593 +rel 1 9 4.753590 4.753590 1487 +softbal 0 9 4.753590 0.000000 1594 +conferenceon 0 9 4.753590 0.000000 1595 +introductori 0 9 4.753590 0.000000 1479 +morgan 0 9 4.753590 0.000000 1484 +aaai 1 8 4.875197 4.875197 1750 +extract 1 8 4.875197 4.875197 1728 +entri 0 8 4.875197 0.000000 1678 +span 0 8 4.875197 0.000000 1751 +empir 0 8 4.875197 0.000000 1722 +tag 1 7 5.010635 5.010635 1821 +larger 1 7 5.010635 5.010635 1875 +lawrenc 1 7 5.010635 5.010635 1908 +pennsylvania 0 7 5.010635 0.000000 1932 +machinelearn 0 6 5.164786 0.000000 2084 +heurist 0 6 5.164786 0.000000 2125 +syntax 0 6 5.164786 0.000000 2030 +amherst 1 5 5.347108 5.347108 2484 +connectionist 0 5 5.347108 0.000000 2430 +kaufmann 0 5 5.347108 0.000000 2254 +corpu 0 5 5.347108 0.000000 2282 +disambigu 1 4 5.568345 5.568345 2899 +clair 0 4 5.568345 0.000000 2605 +hallphon 0 4 5.568345 0.000000 2900 +compris 0 4 5.568345 0.000000 2862 +ijcai 0 4 5.568345 0.000000 2901 +complic 0 4 5.568345 0.000000 2902 +educlick 0 3 5.857933 0.000000 3612 +tosupport 0 3 5.857933 0.000000 3613 +teachingc 0 3 5.857933 0.000000 3614 +agener 0 3 5.857933 0.000000 3213 +conceptu 0 3 5.857933 0.000000 3214 +fourteenth 0 3 5.857933 0.000000 3615 +ninth 0 3 5.857933 0.000000 3616 +anaheim 0 3 5.857933 0.000000 3271 +citat 0 3 5.857933 0.000000 3617 +penn 0 3 5.857933 0.000000 3094 +corpora 1 2 6.263398 6.263398 4269 +interestscours 0 2 6.263398 0.000000 5026 +tandem 0 2 6.263398 0.000000 5027 +learningtechniqu 0 2 6.263398 0.000000 5028 +gabriel 0 2 6.263398 0.000000 5029 +jointconfer 0 2 6.263398 0.000000 5030 +eleventh 0 2 6.263398 0.000000 5031 +newark 0 2 6.263398 0.000000 5032 +bias 0 2 6.263398 0.000000 5033 +bloomington 0 2 6.263398 0.000000 5034 +twelfth 0 2 6.263398 0.000000 5035 +treebank 0 2 6.263398 0.000000 4138 +cardi 2 1 6.957497 13.914994 10111 +kenmor 1 1 6.957497 6.957497 10112 +pronoun 1 1 6.957497 6.957497 10113 +naturallanguag 1 1 6.957497 6.957497 10114 +knowledgeacquisit 1 1 6.957497 6.957497 10115 +riloff 1 1 6.957497 6.957497 10116 +tenth 1 1 6.957497 6.957497 10117 +erlbaumassoci 1 1 6.957497 6.957497 10118 +lehnert 1 1 6.957497 6.957497 10119 +cardieclair 0 1 6.957497 0.000000 10120 +teachselect 0 1 6.957497 0.000000 10121 +publicationsnlp 0 1 6.957497 0.000000 10122 +amalgam 0 1 6.957497 0.000000 10123 +westi 0 1 6.957497 0.000000 10124 +interestsalthough 0 1 6.957497 0.000000 10125 +subfield 0 1 6.957497 0.000000 10126 +cognitivemodel 0 1 6.957497 0.000000 10127 +forexplor 0 1 6.957497 0.000000 10128 +tworel 0 1 6.957497 0.000000 10129 +reliablyextract 0 1 6.957497 0.000000 10130 +cstr 0 1 6.957497 0.000000 10131 +kenmoreacquir 0 1 6.957497 0.000000 10132 +tworeal 0 1 6.957497 0.000000 10133 +andconcept 0 1 6.957497 0.000000 10134 +anteced 0 1 6.957497 0.000000 10135 +disambiguationtask 0 1 6.957497 0.000000 10136 +learningcompon 0 1 6.957497 0.000000 10137 +isembed 0 1 6.957497 0.000000 10138 +inartifici 0 1 6.957497 0.000000 10139 +understandingselect 0 1 6.957497 0.000000 10140 +publicationsautom 0 1 6.957497 0.000000 10141 +wermter 0 1 6.957497 0.000000 10142 +scheler 0 1 6.957497 0.000000 10143 +andsymbol 0 1 6.957497 0.000000 10144 +tolearn 0 1 6.957497 0.000000 10145 +conceptualsent 0 1 6.957497 0.000000 10146 +cmpsci 0 1 6.957497 0.000000 10147 +onconstrain 0 1 6.957497 0.000000 10148 +plausibl 0 1 6.957497 0.000000 10149 +linkscomput 0 1 6.957497 0.000000 10150 +linguistics 0 1 6.957497 0.000000 10151 +aclspeci 0 1 6.957497 0.000000 10152 +learningmachin 0 1 6.957497 0.000000 10153 +digestmachinelearn 0 1 6.957497 0.000000 10154 +researchersmachin 0 1 6.957497 0.000000 10155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..540dc259 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +applic 1 170 1.791759 1.791759 56 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +click 0 142 1.945910 0.000000 78 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +mathemat 1 108 2.197225 2.197225 123 +version 0 113 2.197225 0.000000 122 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +level 1 87 2.484907 2.484907 180 +environ 0 84 2.484907 0.000000 177 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +java 0 70 2.708050 0.000000 248 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +creat 1 63 2.772589 2.772589 277 +plan 0 65 2.772589 0.000000 272 +ithaca 0 65 2.772589 0.000000 294 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +paul 1 38 3.295837 3.295837 471 +tech 0 35 3.401197 0.000000 515 +concept 0 32 3.465736 0.000000 537 +transform 0 32 3.465736 0.000000 542 +express 0 32 3.465736 0.000000 540 +specifi 0 30 3.555348 0.000000 568 +symbol 0 27 3.637586 0.000000 620 +primari 0 25 3.737670 0.000000 669 +motion 0 24 3.761200 0.000000 699 +equat 0 23 3.806662 0.000000 724 +emphasi 0 22 3.850148 0.000000 755 +thu 0 21 3.912023 0.000000 773 +applet 0 20 3.951244 0.000000 827 +geometr 1 19 4.007333 4.007333 852 +comparison 0 19 4.007333 0.000000 863 +senior 0 14 4.317488 0.000000 1120 +shape 0 12 4.465908 0.000000 1245 +mesh 1 11 4.553877 4.553877 1351 +sens 0 11 4.553877 0.000000 1305 +purdu 0 10 4.653960 0.000000 1466 +rhode 0 9 4.753590 0.000000 1579 +rais 0 8 4.875197 0.000000 1711 +canb 0 7 5.010635 0.000000 1846 +beta 0 6 5.164786 0.000000 1993 +compat 0 5 5.347108 0.000000 2485 +diagram 0 5 5.347108 0.000000 2346 +triangul 0 4 5.568345 0.000000 2903 +chew 1 3 5.857933 5.857933 3618 +delaunai 1 3 5.857933 5.857933 3619 +implicitli 0 3 5.857933 0.000000 3620 +voronoi 0 2 6.263398 0.000000 5036 +agenda 0 2 6.263398 0.000000 5037 +scientificsoftwar 0 2 6.263398 0.000000 5038 +acollect 0 2 6.263398 0.000000 5039 +associatephd 0 1 6.957497 0.000000 10156 +eduappletsy 0 1 6.957497 0.000000 10157 +asnetscap 0 1 6.957497 0.000000 10158 +avoronoi 0 1 6.957497 0.000000 10159 +onpract 0 1 6.957497 0.000000 10160 +includedplac 0 1 6.957497 0.000000 10161 +thataris 0 1 6.957497 0.000000 10162 +isspecifi 0 1 6.957497 0.000000 10163 +ofphys 0 1 6.957497 0.000000 10164 +techniquesar 0 1 6.957497 0.000000 10165 +effectiveprogram 0 1 6.957497 0.000000 10166 +myonlin 0 1 6.957497 0.000000 10167 +reportscornel 0 1 6.957497 0.000000 10168 +computerscienceth 0 1 6.957497 0.000000 10169 +simlabprojectaddress 0 1 6.957497 0.000000 10170 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..c0fc800a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +network 1 168 1.791759 1.791759 61 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +perform 0 143 1.945910 0.000000 74 +number 0 130 2.079442 0.000000 97 +compil 0 122 2.079442 0.000000 96 +world 0 115 2.197225 0.000000 126 +commun 0 95 2.397895 0.000000 157 +chang 1 82 2.484907 2.484907 163 +activ 1 84 2.484907 2.484907 182 +stuff 0 87 2.484907 0.000000 171 +messag 0 76 2.564949 0.000000 212 +server 0 76 2.564949 0.000000 204 +effici 0 73 2.639057 0.000000 233 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +guid 0 63 2.772589 0.000000 267 +faculti 0 56 2.890372 0.000000 325 +summer 0 56 2.890372 0.000000 311 +processor 0 54 2.944439 0.000000 335 +advisor 0 51 2.995732 0.000000 355 +cool 0 49 3.044522 0.000000 374 +multipl 0 39 3.258097 0.000000 453 +microsoft 0 38 3.295837 0.000000 468 +concurr 0 34 3.401197 0.000000 501 +toward 0 25 3.737670 0.000000 668 +sport 0 25 3.737670 0.000000 683 +latest 0 21 3.912023 0.000000 785 +runtim 0 19 4.007333 0.000000 858 +anyon 0 17 4.110874 0.000000 916 +latenc 0 16 4.174387 0.000000 993 +matlab 0 14 4.317488 0.000000 1081 +edui 0 13 4.382027 0.000000 1193 +eicken 0 13 4.382027 0.000000 1134 +composit 0 13 4.382027 0.000000 1150 +thedepart 0 11 4.553877 0.000000 1350 +scienceat 0 11 4.553877 0.000000 1375 +soccer 1 8 4.875197 4.875197 1752 +chao 1 8 4.875197 4.875197 1753 +risc 0 6 5.164786 0.000000 2016 +chess 0 5 5.347108 0.000000 2486 +andoper 0 3 5.857933 0.000000 3621 +messageslow 0 2 6.263398 0.000000 5040 +homepagelast 0 2 6.263398 0.000000 5001 +chichao 1 1 6.957497 6.957497 10171 +isthorsten 0 1 6.957497 0.000000 10172 +addressesand 0 1 6.957497 0.000000 10173 +overheterogen 0 1 6.957497 0.000000 10174 +tham 0 1 6.957497 0.000000 10175 +multimatlab 0 1 6.957497 0.000000 10176 +newsbraziliansocc 0 1 6.957497 0.000000 10177 +portugues 0 1 6.957497 0.000000 10178 +andhomepagesoliv 0 1 6.957497 0.000000 10179 +lubrasa 0 1 6.957497 0.000000 10180 +luso 0 1 6.957497 0.000000 10181 +brazilian 0 1 6.957497 0.000000 10182 +associationu 0 1 6.957497 0.000000 10183 +centerjorn 0 1 6.957497 0.000000 10184 +brasilmi 0 1 6.957497 0.000000 10185 +carstockmasterjayhawk 0 1 6.957497 0.000000 10186 +basketballwww 0 1 6.957497 0.000000 10187 +tennisserverback 0 1 6.957497 0.000000 10188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..0d55c4a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +avail 0 169 1.791759 0.000000 48 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +technolog 0 131 2.079442 0.000000 102 +find 0 111 2.197225 0.000000 111 +advanc 0 99 2.302585 0.000000 130 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +ithaca 1 65 2.772589 2.772589 294 +improv 0 62 2.772589 0.000000 289 +foundat 0 62 2.772589 0.000000 286 +artifici 0 63 2.772589 0.000000 280 +programm 0 39 3.258097 0.000000 445 +small 0 39 3.258097 0.000000 447 +soon 0 36 3.367296 0.000000 494 +edulast 0 17 4.110874 0.000000 927 +countri 0 15 4.248495 0.000000 1059 +hopefulli 0 14 4.317488 0.000000 1071 +forth 0 13 4.382027 0.000000 1186 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +apolog 0 6 5.164786 0.000000 2046 +singapor 1 5 5.347108 5.347108 2487 +intelligencec 0 4 5.568345 0.000000 2673 +engineeringc 0 4 5.568345 0.000000 2904 +chee 1 3 5.857933 5.857933 3480 +tokyo 1 3 5.857933 5.857933 3622 +keong 1 1 6.957497 6.957497 10189 +liau 0 1 6.957497 0.000000 10190 +liauwelcom 0 1 6.957497 0.000000 10191 +networksc 0 1 6.957497 0.000000 10192 +systemsbaccalaur 0 1 6.957497 0.000000 10193 +japanhomei 0 1 6.957497 0.000000 10194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..76c6729b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +problem 1 147 1.945910 1.945910 75 +hall 0 146 1.945910 0.000000 65 +tool 0 117 2.079442 0.000000 93 +mathemat 1 108 2.197225 2.197225 123 +theori 0 111 2.197225 0.000000 127 +final 0 116 2.197225 0.000000 108 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +center 1 88 2.397895 2.397895 158 +imag 1 91 2.397895 2.397895 161 +associ 0 93 2.397895 0.000000 151 +larg 1 82 2.484907 2.484907 168 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +optim 1 79 2.564949 2.564949 197 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +numer 0 49 3.044522 0.000000 369 +understand 0 47 3.091042 0.000000 384 +linear 1 41 3.218876 3.218876 431 +http 0 41 3.218876 0.000000 420 +york 0 41 3.218876 0.000000 435 +continu 1 39 3.258097 3.258097 448 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +scale 1 28 3.610918 3.610918 613 +constraint 0 26 3.688879 0.000000 636 +concern 0 25 3.737670 0.000000 666 +primari 0 25 3.737670 0.000000 669 +director 0 22 3.850148 0.000000 767 +thoma 1 18 4.060443 4.060443 901 +minim 0 18 4.060443 0.000000 887 +differenti 0 17 4.110874 0.000000 921 +former 0 17 4.110874 0.000000 956 +match 0 16 4.174387 0.000000 965 +nonlinear 0 14 4.317488 0.000000 1107 +affili 0 13 4.382027 0.000000 1194 +discret 0 13 4.382027 0.000000 1165 +equal 0 10 4.653960 0.000000 1424 +rhode 0 9 4.753590 0.000000 1579 +postdoc 1 8 4.875197 4.875197 1724 +strong 0 6 5.164786 0.000000 2029 +reconstruct 0 6 5.164786 0.000000 2170 +inequ 0 6 5.164786 0.000000 2113 +biomed 0 4 5.568345 0.000000 2905 +coleman 1 2 6.263398 6.263398 5041 +colemanthoma 0 1 6.957497 0.000000 10195 +colemancornel 0 1 6.957497 0.000000 10196 +universityi 0 1 6.957497 0.000000 10197 +professcp 0 1 6.957497 0.000000 10198 +ccop 0 1 6.957497 0.000000 10199 +broadfield 0 1 6.957497 0.000000 10200 +programmi 0 1 6.957497 0.000000 10201 +computationalmethod 0 1 6.957497 0.000000 10202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..a76ca8ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +languag 2 227 1.386294 2.772588 26 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +implement 1 152 1.791759 1.791759 52 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +compil 1 122 2.079442 2.079442 96 +report 0 131 2.079442 0.000000 92 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +resourc 0 81 2.484907 0.000000 172 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +name 0 72 2.639057 0.000000 220 +logic 0 71 2.639057 0.000000 230 +onlin 0 75 2.639057 0.000000 223 +view 1 70 2.708050 2.708050 254 +practic 0 70 2.708050 0.000000 246 +import 1 65 2.772589 2.772589 282 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +type 2 61 2.833213 5.666426 296 +maintain 0 51 2.995732 0.000000 342 +set 0 50 3.044522 0.000000 361 +standard 0 48 3.044522 0.000000 365 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +answer 0 45 3.135494 0.000000 391 +mark 0 44 3.135494 0.000000 403 +combin 0 42 3.218876 0.000000 421 +theoret 1 39 3.258097 3.258097 446 +map 0 39 3.258097 0.000000 452 +form 0 39 3.258097 0.000000 443 +origin 0 38 3.295837 0.000000 472 +close 0 38 3.295837 0.000000 465 +formal 0 37 3.332205 0.000000 478 +approxim 1 35 3.401197 3.401197 509 +committe 0 34 3.401197 0.000000 522 +eduoffic 0 33 3.433987 0.000000 531 +often 1 31 3.496508 3.496508 551 +semant 1 29 3.583519 3.583519 587 +mind 0 27 3.637586 0.000000 632 +consist 0 26 3.688879 0.000000 651 +primari 0 25 3.737670 0.000000 669 +strategi 0 25 3.737670 0.000000 682 +seri 0 24 3.761200 0.000000 708 +interpret 0 24 3.761200 0.000000 686 +greg 0 24 3.761200 0.000000 695 +emphasi 0 22 3.850148 0.000000 755 +programminglanguag 1 21 3.912023 3.912023 782 +love 1 21 3.912023 3.912023 804 +martin 0 21 3.912023 0.000000 794 +safeti 0 20 3.951244 0.000000 817 +grad 0 20 3.951244 0.000000 837 +particularli 0 19 4.007333 0.000000 867 +lower 0 18 4.060443 0.000000 886 +whole 0 17 4.110874 0.000000 940 +embed 0 14 4.317488 0.000000 1102 +command 0 14 4.317488 0.000000 1083 +translat 1 13 4.382027 4.382027 1164 +care 0 13 4.382027 0.000000 1177 +speak 0 12 4.465908 0.000000 1283 +calculu 0 12 4.465908 0.000000 1203 +israel 0 11 4.553877 0.000000 1366 +modular 0 10 4.653960 0.000000 1392 +relationship 0 10 4.653960 0.000000 1383 +correspond 0 10 4.653960 0.000000 1382 +guarante 0 10 4.653960 0.000000 1391 +nuprl 0 10 4.653960 0.000000 1402 +intermedi 1 9 4.753590 4.753590 1497 +andth 0 9 4.753590 0.000000 1481 +strength 0 9 4.753590 0.000000 1494 +formul 1 8 4.875197 4.875197 1733 +convers 0 8 4.875197 0.000000 1673 +paradigm 0 8 4.875197 0.000000 1662 +invari 0 8 4.875197 0.000000 1748 +leon 0 8 4.875197 0.000000 1631 +babylon 0 8 4.875197 0.000000 1731 +heart 0 8 4.875197 0.000000 1729 +pageth 0 7 5.010635 0.000000 1939 +hear 0 7 5.010635 0.000000 1940 +understood 0 5 5.347108 0.000000 2364 +stage 0 5 5.347108 0.000000 2488 +morrisett 0 5 5.347108 0.000000 2263 +lord 1 4 5.568345 5.568345 2906 +dexter 0 4 5.568345 0.000000 2855 +kozen 0 4 5.568345 0.000000 2619 +ofprogram 0 4 5.568345 0.000000 2624 +soul 0 4 5.568345 0.000000 2907 +karl 0 3 5.857933 0.000000 3623 +constabl 0 3 5.857933 0.000000 3186 +jesu 0 3 5.857933 0.000000 3624 +atyp 1 2 6.263398 6.263398 5042 +tractabl 1 2 6.263398 6.263398 4799 +pagekarl 0 2 6.263398 0.000000 5043 +halloffic 0 2 6.263398 0.000000 4583 +subtyp 0 2 6.263398 0.000000 4375 +intract 0 2 6.263398 0.000000 5044 +anapproxim 0 2 6.263398 0.000000 5045 +unavail 0 2 6.263398 0.000000 5046 +thenuprl 0 2 6.263398 0.000000 5047 +hereat 0 2 6.263398 0.000000 5048 +papersoth 0 2 6.263398 0.000000 5049 +lurker 0 2 6.263398 0.000000 5050 +andwith 0 2 6.263398 0.000000 5051 +thesecond 0 2 6.263398 0.000000 4128 +pagedepart 0 2 6.263398 0.000000 5052 +calculi 1 1 6.957497 6.957497 10203 +crari 0 1 6.957497 0.000000 10204 +crarycrari 0 1 6.957497 0.000000 10205 +researchbroadli 0 1 6.957497 0.000000 10206 +implementationand 0 1 6.957497 0.000000 10207 +kmlwhich 0 1 6.957497 0.000000 10208 +richworld 0 1 6.957497 0.000000 10209 +newprogram 0 1 6.957497 0.000000 10210 +aminterest 0 1 6.957497 0.000000 10211 +deepen 0 1 6.957497 0.000000 10212 +mitig 0 1 6.957497 0.000000 10213 +modelallow 0 1 6.957497 0.000000 10214 +allowsth 0 1 6.957497 0.000000 10215 +andcorrect 0 1 6.957497 0.000000 10216 +additionaloptim 0 1 6.957497 0.000000 10217 +automatedreason 0 1 6.957497 0.000000 10218 +ofrobert 0 1 6.957497 0.000000 10219 +jasonhickei 0 1 6.957497 0.000000 10220 +linksmark 0 1 6.957497 0.000000 10221 +cansearch 0 1 6.957497 0.000000 10222 +biblestudi 0 1 6.957497 0.000000 10223 +thelord 0 1 6.957497 0.000000 10224 +neighbor 0 1 6.957497 0.000000 10225 +commandmentgreat 0 1 6.957497 0.000000 10226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..c8e75358 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +parallel 2 169 1.791759 3.583518 60 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +problem 2 147 1.945910 3.891820 75 +process 0 142 1.945910 0.000000 72 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +welcom 0 122 2.079442 0.000000 99 +theori 1 111 2.197225 2.197225 127 +mathemat 0 108 2.197225 0.000000 123 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +select 0 91 2.397895 0.000000 154 +solut 2 82 2.484907 4.969814 162 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +larg 1 82 2.484907 2.484907 168 +contain 0 81 2.484907 0.000000 174 +second 0 81 2.484907 0.000000 166 +optim 1 79 2.564949 2.564949 197 +state 0 76 2.564949 0.000000 207 +decemb 0 80 2.564949 0.000000 215 +septemb 0 65 2.772589 0.000000 274 +ithaca 0 65 2.772589 0.000000 294 +scientif 2 53 2.944439 5.888878 341 +februari 0 54 2.944439 0.000000 328 +numer 1 49 3.044522 3.044522 369 +algebra 0 45 3.135494 0.000000 394 +linear 2 41 3.218876 6.437752 431 +map 0 39 3.258097 0.000000 452 +close 0 38 3.295837 0.000000 465 +tree 1 36 3.367296 3.367296 492 +least 2 35 3.401197 6.802394 516 +posit 0 31 3.496508 0.000000 552 +multiprocessor 1 28 3.610918 3.610918 605 +packag 0 28 3.610918 0.000000 614 +scale 0 28 3.610918 0.000000 613 +bound 0 26 3.688879 0.000000 659 +deal 0 22 3.850148 0.000000 736 +siam 2 21 3.912023 7.824046 800 +definit 0 19 4.007333 0.000000 864 +matrix 1 17 4.110874 4.110874 933 +spars 2 16 4.174387 8.348774 989 +squar 2 14 4.317488 8.634976 1082 +rank 0 14 4.317488 0.000000 1063 +francisco 0 14 4.317488 0.000000 1095 +affili 0 13 4.382027 0.000000 1194 +philadelphia 1 12 4.465908 4.465908 1244 +matric 0 10 4.653960 0.000000 1399 +factor 1 9 4.753590 4.753590 1544 +conferenceon 0 9 4.753590 0.000000 1595 +simon 0 8 4.875197 0.000000 1697 +watson 0 8 4.875197 0.000000 1691 +univeristi 0 8 4.875197 0.000000 1754 +pennsylvania 0 7 5.010635 0.000000 1932 +cornellunivers 0 7 5.010635 0.000000 1916 +sixth 0 7 5.010635 0.000000 1917 +fifth 0 7 5.010635 0.000000 1931 +compact 0 7 5.010635 0.000000 1907 +dens 1 6 5.164786 5.164786 2122 +reed 0 6 5.164786 0.000000 2086 +row 1 5 5.347108 5.347108 2330 +seventh 1 5 5.347108 5.347108 2464 +orthogon 1 4 5.568345 5.568345 2832 +thecornel 0 4 5.568345 0.000000 2892 +symmetr 0 4 5.568345 0.000000 2908 +ctctr 1 3 5.857933 5.857933 3625 +parallelprocess 0 3 5.857933 0.000000 3626 +coleman 1 2 6.263398 6.263398 5041 +professorthoma 0 2 6.263398 0.000000 5053 +defici 0 2 6.263398 0.000000 5054 +idaho 0 2 6.263398 0.000000 5055 +ondistribut 0 2 6.263398 0.000000 4320 +solutionof 0 2 6.263398 0.000000 5056 +key 0 2 6.263398 0.000000 5057 +dongarra 0 2 6.263398 0.000000 5058 +kennedi 0 2 6.263398 0.000000 4539 +multifront 1 1 6.957497 6.957497 10227 +pothen 1 1 6.957497 6.957497 10228 +chunguang 1 1 6.957497 6.957497 10229 +processingfor 1 1 6.957497 6.957497 10230 +cliqu 1 1 6.957497 6.957497 10231 +sunchunguang 0 1 6.957497 0.000000 10232 +sunphd 0 1 6.957497 0.000000 10233 +ppcx 0 1 6.957497 0.000000 10234 +pssl 0 1 6.957497 0.000000 10235 +psspd 0 1 6.957497 0.000000 10236 +systemsrec 0 1 6.957497 0.000000 10237 +lecturesparallel 0 1 6.957497 0.000000 10238 +coeur 0 1 6.957497 0.000000 10239 +alen 0 1 6.957497 0.000000 10240 +bailei 0 1 6.957497 0.000000 10241 +bjorstad 0 1 6.957497 0.000000 10242 +gilbert 0 1 6.957497 0.000000 10243 +mascagni 0 1 6.957497 0.000000 10244 +schreiber 0 1 6.957497 0.000000 10245 +torczon 0 1 6.957497 0.000000 10246 +choleskyfactor 0 1 6.957497 0.000000 10247 +matriceson 0 1 6.957497 0.000000 10248 +sinovec 0 1 6.957497 0.000000 10249 +leuz 0 1 6.957497 0.000000 10250 +petzold 0 1 6.957497 0.000000 10251 +messina 0 1 6.957497 0.000000 10252 +sorensen 0 1 6.957497 0.000000 10253 +voigt 0 1 6.957497 0.000000 10254 +structuresin 0 1 6.957497 0.000000 10255 +csun 0 1 6.957497 0.000000 10256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..92c09041 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +group 2 183 1.609438 3.218876 36 +public 0 202 1.609438 0.000000 43 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +process 2 142 1.945910 3.891820 72 +architectur 2 139 1.945910 3.891820 77 +first 1 140 1.945910 1.945910 71 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +problem 0 147 1.945910 0.000000 75 +provid 1 121 2.079442 2.079442 94 +tool 0 117 2.079442 0.000000 93 +send 1 114 2.197225 2.197225 109 +make 0 111 2.197225 0.000000 120 +version 0 113 2.197225 0.000000 122 +user 1 104 2.302585 2.302585 137 +commun 1 95 2.397895 2.397895 157 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +proceed 0 93 2.397895 0.000000 152 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +ieee 0 86 2.484907 0.000000 190 +messag 1 76 2.564949 2.564949 212 +server 1 76 2.564949 2.564949 204 +exampl 0 77 2.564949 0.000000 195 +david 1 71 2.639057 2.639057 232 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +upson 0 71 2.639057 0.000000 218 +addit 0 74 2.639057 0.000000 228 +symposium 0 72 2.639057 0.000000 238 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +august 0 66 2.708050 0.000000 257 +virtual 1 62 2.772589 2.772589 285 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +improv 0 62 2.772589 0.000000 289 +type 1 61 2.833213 2.833213 296 +locat 0 59 2.833213 0.000000 303 +thesi 1 57 2.890372 2.890372 327 +allow 1 53 2.944439 2.944439 333 +februari 0 54 2.944439 0.000000 328 +maintain 1 51 2.995732 2.995732 342 +set 1 50 3.044522 3.044522 361 +basic 0 50 3.044522 0.000000 360 +keep 0 44 3.135494 0.000000 409 +made 0 44 3.135494 0.000000 398 +protocol 0 45 3.135494 0.000000 407 +howev 1 41 3.218876 3.218876 422 +might 0 41 3.218876 0.000000 426 +join 1 39 3.258097 3.258097 457 +origin 1 38 3.295837 3.295837 472 +respons 0 37 3.332205 0.000000 476 +within 1 33 3.433987 3.433987 525 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +dissert 0 32 3.465736 0.000000 549 +secur 1 30 3.555348 3.555348 577 +semant 1 29 3.583519 3.583519 587 +limit 0 29 3.583519 0.000000 585 +propos 1 28 3.610918 3.610918 602 +static 0 27 3.637586 0.000000 619 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +client 1 25 3.737670 3.737670 679 +wish 1 24 3.761200 3.761200 692 +mike 0 24 3.761200 0.000000 703 +higher 0 24 3.761200 0.000000 690 +mobil 1 23 3.806662 3.806662 730 +cooper 1 22 3.850148 3.850148 757 +scheme 1 20 3.951244 3.951244 818 +accept 0 18 4.060443 0.000000 879 +along 0 18 4.060443 0.000000 878 +layer 0 17 4.110874 0.000000 926 +devic 1 16 4.174387 4.174387 1002 +permit 1 16 4.174387 4.174387 962 +overhead 0 15 4.248495 0.000000 1035 +horu 1 14 4.317488 4.317488 1116 +command 0 14 4.317488 0.000000 1083 +necessari 0 13 4.382027 0.000000 1147 +whose 0 13 4.382027 0.000000 1166 +carri 0 13 4.382027 0.000000 1152 +kenneth 1 12 4.465908 4.465908 1265 +arbitrari 0 11 4.553877 0.000000 1359 +ofcomput 0 10 4.653960 0.000000 1442 +trust 1 9 4.753590 4.753590 1583 +birman 1 9 4.753590 4.753590 1531 +desir 1 9 4.753590 4.753590 1542 +assumpt 0 9 4.753590 0.000000 1514 +screen 0 9 4.753590 0.000000 1577 +informationabout 0 9 4.753590 0.000000 1515 +crash 0 8 4.875197 0.000000 1616 +accomplish 0 8 4.875197 0.000000 1755 +synchroni 1 7 5.010635 5.010635 1923 +prevent 1 7 5.010635 5.010635 1827 +fromth 0 7 5.010635 0.000000 1802 +slightli 0 7 5.010635 0.000000 1795 +encrypt 0 7 5.010635 0.000000 1835 +privaci 1 6 5.164786 5.164786 2144 +extern 0 6 5.164786 0.000000 2105 +authent 1 5 5.347108 5.347108 2306 +trivial 0 4 5.568345 0.000000 2786 +witha 0 4 5.568345 0.000000 2617 +complic 0 4 5.568345 0.000000 2902 +ofinform 0 4 5.568345 0.000000 2707 +reveal 0 4 5.568345 0.000000 2647 +wireless 0 4 5.568345 0.000000 2693 +anthoni 0 4 5.568345 0.000000 2792 +privat 1 3 5.857933 5.857933 3496 +reiter 0 3 5.857933 0.000000 3461 +proper 0 3 5.857933 0.000000 3323 +recipi 0 3 5.857933 0.000000 3627 +owner 0 3 5.857933 0.000000 3531 +attack 0 3 5.857933 0.000000 3168 +preserv 0 3 5.857933 0.000000 3628 +mobilecomput 0 3 5.857933 0.000000 3629 +untrust 1 2 6.263398 6.263398 4997 +postdoctor 0 2 6.263398 0.000000 5059 +honest 0 2 6.263398 0.000000 5060 +ofvirtu 0 2 6.263398 0.000000 5061 +communicatewith 0 2 6.263398 0.000000 5062 +unlik 0 2 6.263398 0.000000 5063 +sender 0 2 6.263398 0.000000 5064 +adversari 0 2 6.263398 0.000000 5065 +ofmobil 0 2 6.263398 0.000000 5011 +securityand 0 2 6.263398 0.000000 5066 +relationshipsamong 1 1 6.957497 6.957497 10257 +dcooper 0 1 6.957497 0.000000 10258 +securityarchitectur 0 1 6.957497 0.000000 10259 +horuswhich 0 1 6.957497 0.000000 10260 +kerberosnetwork 0 1 6.957497 0.000000 10261 +cryptograph 0 1 6.957497 0.000000 10262 +toprovid 0 1 6.957497 0.000000 10263 +originalimplement 0 1 6.957497 0.000000 10264 +failuremodel 0 1 6.957497 0.000000 10265 +anyprocess 0 1 6.957497 0.000000 10266 +isposs 0 1 6.957497 0.000000 10267 +weaker 0 1 6.957497 0.000000 10268 +untrustedprocess 0 1 6.957497 0.000000 10269 +clientsto 0 1 6.957497 0.000000 10270 +horussecur 0 1 6.957497 0.000000 10271 +keymanag 0 1 6.957497 0.000000 10272 +impersonateanoth 0 1 6.957497 0.000000 10273 +achieveth 0 1 6.957497 0.000000 10274 +asclient 0 1 6.957497 0.000000 10275 +inherentin 0 1 6.957497 0.000000 10276 +contentsof 0 1 6.957497 0.000000 10277 +hiddenwith 0 1 6.957497 0.000000 10278 +outsidersfrom 0 1 6.957497 0.000000 10279 +maintainingth 0 1 6.957497 0.000000 10280 +unlink 0 1 6.957497 0.000000 10281 +chaum 0 1 6.957497 0.000000 10282 +severaloth 0 1 6.957497 0.000000 10283 +staticnetwork 0 1 6.957497 0.000000 10284 +mobilecommun 0 1 6.957497 0.000000 10285 +themessag 0 1 6.957497 0.000000 10286 +advisorken 0 1 6.957497 0.000000 10287 +internaland 0 1 6.957497 0.000000 10288 +apriv 0 1 6.957497 0.000000 10289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..fceae1e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +project 1 340 1.098612 1.098612 18 +model 1 145 1.945910 1.945910 69 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +question 0 91 2.397895 0.000000 141 +mani 0 92 2.397895 0.000000 150 +resum 0 79 2.564949 0.000000 217 +would 0 67 2.708050 0.000000 251 +type 0 61 2.833213 0.000000 296 +space 0 57 2.890372 0.000000 310 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +hand 0 37 3.332205 0.000000 475 +field 0 37 3.332205 0.000000 482 +game 0 36 3.367296 0.000000 498 +word 0 34 3.401197 0.000000 508 +express 0 32 3.465736 0.000000 540 +fault 0 32 3.465736 0.000000 547 +human 0 32 3.465736 0.000000 546 +scientist 0 31 3.496508 0.000000 560 +travel 0 30 3.555348 0.000000 579 +hope 1 28 3.610918 3.610918 610 +releas 0 28 3.610918 0.000000 616 +mine 1 26 3.688879 3.688879 654 +never 0 25 3.737670 0.000000 671 +reach 0 24 3.761200 0.000000 688 +instead 1 22 3.850148 3.850148 756 +love 1 21 3.912023 3.912023 804 +born 0 21 3.912023 0.000000 798 +ever 1 19 4.007333 4.007333 872 +brief 0 16 4.174387 0.000000 1001 +role 0 14 4.317488 0.000000 1101 +sai 0 13 4.382027 0.000000 1175 +pretti 0 13 4.382027 0.000000 1191 +cannot 0 13 4.382027 0.000000 1144 +rest 0 12 4.465908 0.000000 1259 +surf 0 11 4.553877 0.000000 1301 +shore 0 11 4.553877 0.000000 1377 +night 0 11 4.553877 0.000000 1319 +road 0 11 4.553877 0.000000 1374 +true 0 10 4.653960 0.000000 1422 +poetri 0 9 4.753590 0.000000 1596 +told 0 8 4.875197 0.000000 1658 +heart 0 8 4.875197 0.000000 1729 +hold 0 8 4.875197 0.000000 1645 +prize 0 6 5.164786 0.000000 2150 +gentl 0 5 5.347108 0.000000 2264 +facial 0 5 5.347108 0.000000 2438 +unknown 0 5 5.347108 0.000000 2318 +favor 0 5 5.347108 0.000000 2414 +suffer 0 5 5.347108 0.000000 2268 +cyber 1 4 5.568345 5.568345 2909 +lawyer 0 4 5.568345 0.000000 2836 +uncertain 0 4 5.568345 0.000000 2758 +fals 0 4 5.568345 0.000000 2861 +dark 0 4 5.568345 0.000000 2910 +soul 0 4 5.568345 0.000000 2907 +fear 0 4 5.568345 0.000000 2911 +faith 1 3 5.857933 5.857933 3363 +dread 1 3 5.857933 5.857933 3630 +wise 0 3 5.857933 0.000000 3631 +romanc 0 3 5.857933 0.000000 3632 +passion 0 3 5.857933 0.000000 3633 +tortur 0 3 5.857933 0.000000 3634 +diseas 0 3 5.857933 0.000000 3635 +pain 0 3 5.857933 0.000000 3460 +blame 0 3 5.857933 0.000000 3636 +cold 0 3 5.857933 0.000000 3637 +burn 1 2 6.263398 6.263398 4447 +blink 0 2 6.263398 0.000000 5067 +ey 0 2 6.263398 0.000000 5068 +kei 0 2 6.263398 0.000000 4812 +mice 0 2 6.263398 0.000000 5069 +autobiographi 0 2 6.263398 0.000000 5070 +concret 0 2 6.263398 0.000000 4276 +ear 0 2 6.263398 0.000000 5071 +soft 0 2 6.263398 0.000000 5072 +belov 0 2 6.263398 0.000000 5073 +broken 0 2 6.263398 0.000000 5074 +horror 0 2 6.263398 0.000000 5075 +tear 0 2 6.263398 0.000000 5076 +deed 0 2 6.263398 0.000000 5077 +frozen 0 2 6.263398 0.000000 5078 +deidr 1 1 6.957497 6.957497 10290 +pandora 1 1 6.957497 6.957497 10291 +abodedan 0 1 6.957497 0.000000 10292 +abodegreet 0 1 6.957497 0.000000 10293 +humbl 0 1 6.957497 0.000000 10294 +prithe 0 1 6.957497 0.000000 10295 +teari 0 1 6.957497 0.000000 10296 +weari 0 1 6.957497 0.000000 10297 +thyselv 0 1 6.957497 0.000000 10298 +abod 0 1 6.957497 0.000000 10299 +emot 0 1 6.957497 0.000000 10300 +simnet 0 1 6.957497 0.000000 10301 +builder 0 1 6.957497 0.000000 10302 +faiththei 0 1 6.957497 0.000000 10303 +hardli 0 1 6.957497 0.000000 10304 +ferro 0 1 6.957497 0.000000 10305 +scorn 0 1 6.957497 0.000000 10306 +bend 0 1 6.957497 0.000000 10307 +tone 0 1 6.957497 0.000000 10308 +unseen 0 1 6.957497 0.000000 10309 +unheard 0 1 6.957497 0.000000 10310 +untouch 0 1 6.957497 0.000000 10311 +silenc 0 1 6.957497 0.000000 10312 +yearn 0 1 6.957497 0.000000 10313 +lordlovewarm 0 1 6.957497 0.000000 10314 +friendship 0 1 6.957497 0.000000 10315 +mindless 0 1 6.957497 0.000000 10316 +infatu 0 1 6.957497 0.000000 10317 +sensual 0 1 6.957497 0.000000 10318 +sigh 0 1 6.957497 0.000000 10319 +hopemyth 0 1 6.957497 0.000000 10320 +beauteou 0 1 6.957497 0.000000 10321 +demon 0 1 6.957497 0.000000 10322 +astrai 0 1 6.957497 0.000000 10323 +glimmer 0 1 6.957497 0.000000 10324 +tread 0 1 6.957497 0.000000 10325 +amidst 0 1 6.957497 0.000000 10326 +thorn 0 1 6.957497 0.000000 10327 +filthi 0 1 6.957497 0.000000 10328 +miseri 0 1 6.957497 0.000000 10329 +etern 0 1 6.957497 0.000000 10330 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..d58fd367 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +network 1 168 1.791759 1.791759 61 +tool 1 117 2.079442 2.079442 93 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +site 0 106 2.197225 0.000000 119 +intern 0 108 2.197225 0.000000 128 +access 0 102 2.302585 0.000000 136 +part 0 98 2.302585 0.000000 129 +search 1 95 2.397895 2.397895 155 +select 0 91 2.397895 0.000000 154 +imag 0 91 2.397895 0.000000 161 +librari 1 87 2.484907 2.484907 181 +institut 0 84 2.484907 0.000000 187 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +server 1 76 2.564949 2.564949 204 +refer 1 78 2.564949 2.564949 203 +collect 0 65 2.772589 0.000000 268 +ithaca 0 65 2.772589 0.000000 294 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +digit 1 52 2.995732 2.995732 348 +understand 0 47 3.091042 0.000000 384 +directori 0 45 3.135494 0.000000 396 +music 0 42 3.218876 0.000000 436 +global 0 34 3.401197 0.000000 520 +secur 0 30 3.555348 0.000000 577 +weather 1 28 3.610918 3.610918 618 +berkelei 0 26 3.688879 0.000000 657 +recognit 0 23 3.806662 0.000000 723 +togeth 0 23 3.806662 0.000000 714 +head 0 23 3.806662 0.000000 732 +siam 0 21 3.912023 0.000000 800 +navig 0 21 3.912023 0.000000 796 +lyco 0 19 4.007333 0.000000 871 +excel 0 19 4.007333 0.000000 868 +stanford 0 17 4.110874 0.000000 955 +whole 0 17 4.110874 0.000000 940 +charact 0 15 4.248495 0.000000 1028 +dean 0 14 4.317488 0.000000 1104 +anonym 0 14 4.317488 0.000000 1100 +audio 0 14 4.317488 0.000000 1094 +captur 0 12 4.465908 0.000000 1232 +michigan 0 11 4.553877 0.000000 1368 +earth 0 10 4.653960 0.000000 1463 +catalog 0 10 4.653960 0.000000 1431 +folk 0 9 4.753590 0.000000 1597 +illinoi 0 7 5.010635 0.000000 1941 +gatewai 0 7 5.010635 0.000000 1942 +scout 0 7 5.010635 0.000000 1903 +gopher 1 6 5.164786 5.164786 1982 +legal 0 6 5.164786 0.000000 2094 +forecast 0 6 5.164786 0.000000 2171 +elsewher 0 5 5.347108 0.000000 2444 +cuinfo 0 4 5.568345 0.000000 2626 +planet 0 4 5.568345 0.000000 2912 +gear 0 4 5.568345 0.000000 2891 +krafft 0 3 5.857933 0.000000 3638 +archi 0 3 5.857933 0.000000 3639 +cern 0 2 6.263398 0.000000 5079 +urlsdean 0 1 6.957497 0.000000 10331 +interestcornel 0 1 6.957497 0.000000 10332 +dimund 0 1 6.957497 0.000000 10333 +librarysearch 0 1 6.957497 0.000000 10334 +veronica 0 1 6.957497 0.000000 10335 +faqsvari 0 1 6.957497 0.000000 10336 +folkbook 0 1 6.957497 0.000000 10337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..b0d48560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +avail 1 169 1.791759 1.791759 48 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +report 1 131 2.079442 2.079442 92 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +technic 1 100 2.302585 2.302585 140 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +select 0 91 2.397895 0.000000 154 +build 0 85 2.484907 0.000000 184 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +issu 0 78 2.564949 0.000000 211 +upson 0 71 2.639057 0.000000 218 +servic 0 72 2.639057 0.000000 236 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +materi 0 75 2.639057 0.000000 221 +investig 0 51 2.995732 0.000000 353 +protocol 0 45 3.135494 0.000000 407 +secur 0 30 3.555348 0.000000 577 +intend 0 28 3.610918 0.000000 599 +administr 0 27 3.637586 0.000000 628 +universityithaca 0 24 3.761200 0.000000 710 +initi 0 23 3.806662 0.000000 717 +director 0 22 3.850148 0.000000 767 +sciencecornel 0 22 3.850148 0.000000 768 +serv 0 22 3.850148 0.000000 758 +inth 0 22 3.850148 0.000000 741 +fund 0 21 3.912023 0.000000 805 +similar 0 21 3.912023 0.000000 771 +facil 1 20 3.951244 3.951244 814 +break 0 20 3.951244 0.000000 812 +five 0 19 4.007333 0.000000 841 +side 0 15 4.248495 0.000000 1022 +carl 0 15 4.248495 0.000000 1024 +dean 1 14 4.317488 4.317488 1104 +emploi 0 12 4.465908 0.000000 1284 +arpa 0 11 4.553877 0.000000 1369 +eight 0 11 4.553877 0.000000 1331 +consortium 1 10 4.653960 4.653960 1467 +princip 0 10 4.653960 0.000000 1397 +rapid 0 10 4.653960 0.000000 1453 +researchi 0 8 4.875197 0.000000 1756 +xerox 0 8 4.875197 0.000000 1725 +davi 0 7 5.010635 0.000000 1888 +sciencedepart 0 6 5.164786 0.000000 2172 +interestedin 0 5 5.347108 0.000000 2260 +employe 0 4 5.568345 0.000000 2717 +krafft 1 3 5.857933 5.857933 3638 +dienst 1 3 5.857933 5.857933 3640 +halldepart 0 3 5.857933 0.000000 3641 +dissemin 0 2 6.263398 0.000000 5080 +thedesign 0 2 6.263398 0.000000 4251 +lagoz 0 2 6.263398 0.000000 5081 +facilitiesaddress 0 1 6.957497 0.000000 10338 +guis 0 1 6.957497 0.000000 10339 +anadministr 0 1 6.957497 0.000000 10340 +andworri 0 1 6.957497 0.000000 10341 +spart 0 1 6.957497 0.000000 10342 +thecorpor 0 1 6.957497 0.000000 10343 +cnri 0 1 6.957497 0.000000 10344 +technicalresearch 0 1 6.957497 0.000000 10345 +theexist 0 1 6.957497 0.000000 10346 +disseminationov 0 1 6.957497 0.000000 10347 +atechn 0 1 6.957497 0.000000 10348 +ondienst 0 1 6.957497 0.000000 10349 +togethera 0 1 6.957497 0.000000 10350 +url 0 1 6.957497 0.000000 10351 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..c9476823 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +address 0 170 1.791759 0.000000 62 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +analysi 1 124 2.079442 2.079442 98 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +good 0 77 2.564949 0.000000 200 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +numer 0 49 3.044522 0.000000 369 +advis 0 6 5.164786 0.000000 2173 +divakar 0 1 6.957497 0.000000 10352 +pagedivakar 0 1 6.957497 0.000000 10353 +viswanathdivakar 0 1 6.957497 0.000000 10354 +isnumer 0 1 6.957497 0.000000 10355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..71a9e944 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +advanc 0 99 2.302585 0.000000 130 +institut 0 84 2.484907 0.000000 187 +know 0 80 2.564949 0.000000 198 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +receiv 1 66 2.708050 2.708050 244 +practic 0 70 2.708050 0.000000 246 +ithaca 1 65 2.772589 2.772589 294 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +locat 0 59 2.833213 0.000000 303 +friend 0 48 3.044522 0.000000 376 +physic 1 47 3.091042 3.091042 377 +york 0 41 3.218876 0.000000 435 +littl 0 39 3.258097 0.000000 454 +live 0 40 3.258097 0.000000 451 +china 1 37 3.332205 3.332205 487 +winter 0 36 3.367296 0.000000 500 +except 0 28 3.610918 0.000000 607 +campu 0 27 3.637586 0.000000 623 +beij 0 19 4.007333 0.000000 876 +miss 0 19 4.007333 0.000000 866 +beauti 0 18 4.060443 0.000000 912 +miller 0 17 4.110874 0.000000 949 +practicum 0 16 4.174387 0.000000 960 +mayb 0 15 4.248495 0.000000 1014 +anywai 0 15 4.248495 0.000000 1047 +translat 0 13 4.382027 0.000000 1164 +central 0 13 4.382027 0.000000 1160 +tsinghua 0 13 4.382027 0.000000 1195 +realiti 0 12 4.465908 0.000000 1272 +jersei 1 9 4.753590 4.753590 1587 +brought 0 7 5.010635 0.000000 1925 +railroad 0 6 5.164786 0.000000 2161 +coursesc 0 4 5.568345 0.000000 2692 +engineeringc 0 4 5.568345 0.000000 2904 +doubt 0 3 5.857933 0.000000 3119 +gorgeou 0 2 6.263398 0.000000 5082 +newark 0 2 6.263398 0.000000 5032 +diyu 1 1 6.957497 6.957497 10356 +pagediyu 0 1 6.957497 0.000000 10357 +daisi 0 1 6.957497 0.000000 10358 +translatorsfal 0 1 6.957497 0.000000 10359 +systemc 0 1 6.957497 0.000000 10360 +systemsel 0 1 6.957497 0.000000 10361 +telecommunicationsm 0 1 6.957497 0.000000 10362 +projectorigin 0 1 6.957497 0.000000 10363 +projectsinc 0 1 6.957497 0.000000 10364 +unviers 0 1 6.957497 0.000000 10365 +linksjava 0 1 6.957497 0.000000 10366 +tkfavorit 0 1 6.957497 0.000000 10367 +sitestimecnnlondon 0 1 6.957497 0.000000 10368 +timeswashington 0 1 6.957497 0.000000 10369 +postchines 0 1 6.957497 0.000000 10370 +digestchina 0 1 6.957497 0.000000 10371 +digestfeng 0 1 6.957497 0.000000 10372 +yuanxin 0 1 6.957497 0.000000 10373 +siart 0 1 6.957497 0.000000 10374 +chinaloc 0 1 6.957497 0.000000 10375 +connectionsctc 0 1 6.957497 0.000000 10376 +sunlabweathermovi 0 1 6.957497 0.000000 10377 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..1e8f4215 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +area 1 144 1.945910 1.945910 80 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +problem 0 147 1.945910 0.000000 75 +document 1 121 2.079442 2.079442 89 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +teach 1 108 2.197225 2.197225 112 +structur 0 106 2.197225 0.000000 105 +techniqu 0 99 2.302585 0.000000 138 +imag 1 91 2.397895 2.397895 161 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +activ 1 84 2.484907 2.484907 182 +wide 0 84 2.484907 0.000000 185 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +method 1 80 2.564949 2.564949 213 +june 0 79 2.564949 0.000000 214 +main 0 67 2.708050 0.000000 256 +view 0 70 2.708050 0.000000 254 +evalu 0 64 2.772589 0.000000 266 +type 0 61 2.833213 0.000000 296 +share 0 59 2.833213 0.000000 304 +index 0 56 2.890372 0.000000 309 +profession 0 51 2.995732 0.000000 345 +investig 0 51 2.995732 0.000000 353 +visual 1 48 3.044522 3.044522 372 +format 0 48 3.044522 0.000000 356 +without 0 50 3.044522 0.000000 370 +cool 0 49 3.044522 0.000000 374 +electron 1 47 3.091042 3.091042 379 +video 1 44 3.135494 3.135494 405 +favorit 0 44 3.135494 0.000000 410 +offer 0 43 3.178054 0.000000 414 +vision 1 41 3.218876 3.218876 430 +fast 0 42 3.218876 0.000000 429 +theoret 0 39 3.258097 0.000000 446 +author 0 39 3.258097 0.000000 450 +small 0 39 3.258097 0.000000 447 +brian 0 38 3.295837 0.000000 466 +approxim 0 35 3.401197 0.000000 509 +collabor 1 32 3.465736 3.465736 543 +rang 0 30 3.555348 0.000000 565 +chair 0 29 3.583519 0.000000 596 +held 0 28 3.610918 0.000000 600 +compar 0 26 3.688879 0.000000 648 +sport 0 25 3.737670 0.000000 683 +pattern 0 24 3.761200 0.000000 689 +recognit 1 23 3.806662 3.806662 723 +highli 0 23 3.806662 0.000000 725 +geometri 0 22 3.850148 0.000000 752 +smith 0 20 3.951244 0.000000 820 +geometr 0 19 4.007333 0.000000 852 +monitor 1 17 4.110874 4.110874 941 +match 1 16 4.174387 4.174387 965 +remot 1 15 4.248495 4.248495 1041 +track 0 15 4.248495 0.000000 1029 +matlab 0 14 4.317488 0.000000 1081 +daniel 0 12 4.465908 0.000000 1233 +target 0 12 4.465908 0.000000 1282 +extrem 0 11 4.553877 0.000000 1330 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +juan 0 9 4.753590 0.000000 1580 +xerox 0 8 4.875197 0.000000 1725 +compact 0 7 5.010635 0.000000 1907 +huttenloch 1 6 5.164786 5.164786 1983 +fraction 0 5 5.347108 0.000000 2259 +conot 0 5 5.347108 0.000000 2245 +stupid 0 5 5.347108 0.000000 2489 +hausdorff 1 4 5.568345 5.568345 2633 +identif 0 4 5.568345 0.000000 2773 +cvpr 0 4 5.568345 0.000000 2761 +geek 0 2 6.263398 0.000000 5083 +snowboard 0 2 6.263398 0.000000 5084 +professordph 0 1 6.957497 0.000000 10378 +eigenspac 0 1 6.957497 0.000000 10379 +digipap 0 1 6.957497 0.000000 10380 +viewabl 0 1 6.957497 0.000000 10381 +parc 0 1 6.957497 0.000000 10382 +attitud 0 1 6.957497 0.000000 10383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..404807a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +process 2 142 1.945910 3.891820 72 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +technolog 0 131 2.079442 0.000000 102 +specif 1 106 2.197225 2.197225 106 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +mani 0 92 2.397895 0.000000 150 +present 0 91 2.397895 0.000000 145 +wide 0 84 2.484907 0.000000 185 +method 0 80 2.564949 0.000000 213 +exampl 0 77 2.564949 0.000000 195 +june 0 79 2.564949 0.000000 214 +addit 0 74 2.639057 0.000000 228 +order 0 69 2.708050 0.000000 249 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +simpl 0 60 2.833213 0.000000 298 +thesi 0 57 2.890372 0.000000 327 +investig 0 51 2.995732 0.000000 353 +basic 0 50 3.044522 0.000000 360 +understand 0 47 3.091042 0.000000 384 +algebra 2 45 3.135494 6.270988 394 +better 0 45 3.135494 0.000000 401 +form 0 39 3.258097 0.000000 443 +theoret 0 39 3.258097 0.000000 446 +prototyp 0 38 3.295837 0.000000 463 +concurr 0 34 3.401197 0.000000 501 +express 1 32 3.465736 3.465736 540 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +computersci 0 30 3.555348 0.000000 562 +semant 0 29 3.583519 0.000000 587 +becom 0 28 3.610918 0.000000 603 +full 0 28 3.610918 0.000000 615 +effort 0 26 3.688879 0.000000 652 +compar 0 26 3.688879 0.000000 648 +input 0 23 3.806662 0.000000 727 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +former 0 17 4.110874 0.000000 956 +edui 0 13 4.382027 0.000000 1193 +calculu 0 12 4.465908 0.000000 1203 +verifi 0 12 4.465908 0.000000 1261 +custom 0 10 4.653960 0.000000 1414 +latter 0 9 4.753590 0.000000 1522 +bloom 0 4 5.568345 0.000000 2913 +commonli 0 4 5.568345 0.000000 2877 +metatheori 0 3 5.857933 0.000000 3642 +allevi 0 3 5.857933 0.000000 3643 +checker 0 3 5.857933 0.000000 3644 +lnc 1 2 6.263398 6.263398 5085 +theproblem 0 2 6.263398 0.000000 4560 +inher 0 2 6.263398 0.000000 5086 +dsouza 1 1 6.957497 6.957497 10384 +ashvin 1 1 6.957497 6.957497 10385 +bard 0 1 6.957497 0.000000 10386 +oftool 0 1 6.957497 0.000000 10387 +andverif 0 1 6.957497 0.000000 10388 +withrespect 0 1 6.957497 0.000000 10389 +immediatelyavail 0 1 6.957497 0.000000 10390 +duplic 0 1 6.957497 0.000000 10391 +gso 0 1 6.957497 0.000000 10392 +loto 0 1 6.957497 0.000000 10393 +exploringappl 0 1 6.957497 0.000000 10394 +bdd 0 1 6.957497 0.000000 10395 +algebraterm 0 1 6.957497 0.000000 10396 +postscipt 0 1 6.957497 0.000000 10397 +lite 0 1 6.957497 0.000000 10398 +presentedth 0 1 6.957497 0.000000 10399 +compass 0 1 6.957497 0.000000 10400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..b5294513 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +cornel 1 215 1.386294 1.386294 23 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +sinc 0 90 2.397895 0.000000 159 +stuff 0 87 2.484907 0.000000 171 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +improv 0 62 2.772589 0.000000 289 +dept 0 64 2.772589 0.000000 291 +prof 0 64 2.772589 0.000000 273 +written 0 63 2.772589 0.000000 278 +visit 0 63 2.772589 0.000000 288 +might 0 41 3.218876 0.000000 426 +origin 0 38 3.295837 0.000000 472 +art 0 29 3.583519 0.000000 593 +alwai 0 24 3.761200 0.000000 691 +fine 0 20 3.951244 0.000000 822 +ever 0 19 4.007333 0.000000 872 +warn 0 14 4.317488 0.000000 1068 +minor 0 12 4.465908 0.000000 1237 +undergrad 0 9 4.753590 0.000000 1589 +risk 0 8 4.875197 0.000000 1689 +heavi 0 7 5.010635 0.000000 1841 +rubinfeld 0 6 5.164786 0.000000 1998 +ronitt 0 5 5.347108 0.000000 2265 +paint 0 5 5.347108 0.000000 2400 +turkei 1 4 5.568345 5.568345 2914 +funda 1 3 5.857933 5.857933 3645 +ergun 1 2 6.263398 6.263398 5087 +angri 0 2 6.263398 0.000000 5088 +dog 0 2 6.263398 0.000000 5089 +pagefunda 0 1 6.957497 0.000000 10401 +ergn 0 1 6.957497 0.000000 10402 +eduhi 0 1 6.957497 0.000000 10403 +studentin 0 1 6.957497 0.000000 10404 +programcheck 0 1 6.957497 0.000000 10405 +researchpag 0 1 6.957497 0.000000 10406 +izmir 0 1 6.957497 0.000000 10407 +bilkentunivers 0 1 6.957497 0.000000 10408 +ankara 0 1 6.957497 0.000000 10409 +encounterpag 0 1 6.957497 0.000000 10410 +turkish 0 1 6.957497 0.000000 10411 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..a326eec3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +includ 0 208 1.609438 0.000000 42 +algorithm 2 162 1.791759 3.583518 57 +network 1 168 1.791759 1.791759 61 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +problem 2 147 1.945910 3.891820 75 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +assign 0 135 1.945910 0.000000 66 +high 0 130 2.079442 0.000000 101 +theori 2 111 2.197225 4.394450 127 +version 2 113 2.197225 4.394450 122 +mathemat 0 108 2.197225 0.000000 123 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +proceed 2 93 2.397895 4.795790 152 +mani 0 92 2.397895 0.000000 150 +activ 1 84 2.484907 2.484907 182 +school 1 84 2.484907 2.484907 188 +journal 1 83 2.484907 2.484907 183 +ieee 0 86 2.484907 0.000000 190 +appear 2 78 2.564949 5.129898 210 +optim 1 79 2.564949 2.564949 197 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +symposium 2 72 2.639057 5.278114 238 +upson 0 71 2.639057 0.000000 218 +practic 0 70 2.708050 0.000000 246 +januari 1 62 2.772589 2.772589 264 +complex 1 64 2.772589 2.772589 269 +improv 1 62 2.772589 2.772589 289 +foundat 0 62 2.772589 0.000000 286 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +summer 0 56 2.890372 0.000000 311 +cover 0 55 2.944439 0.000000 329 +particular 0 51 2.995732 0.000000 352 +fast 1 42 3.218876 3.218876 429 +linear 0 41 3.218876 0.000000 431 +annual 2 40 3.258097 6.516194 458 +industri 0 38 3.295837 0.000000 464 +approxim 2 35 3.401197 6.802394 509 +concurr 0 34 3.401197 0.000000 501 +survei 0 35 3.401197 0.000000 513 +bibliographi 0 34 3.401197 0.000000 518 +graph 1 30 3.555348 3.555348 576 +computersci 0 30 3.555348 0.000000 562 +bound 0 26 3.688879 0.000000 659 +proc 0 26 3.688879 0.000000 649 +aspect 0 25 3.737670 0.000000 663 +flow 2 24 3.761200 7.522400 700 +universityithaca 0 24 3.761200 0.000000 710 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +siam 1 21 3.912023 3.912023 800 +path 1 21 3.912023 3.912023 778 +theorem 1 21 3.912023 3.912023 786 +unit 0 21 3.912023 0.000000 779 +rout 0 21 3.912023 0.000000 793 +vlsi 0 21 3.912023 0.000000 795 +annot 0 21 3.912023 0.000000 775 +north 1 19 4.007333 4.007333 873 +separ 0 19 4.007333 0.000000 844 +sept 0 17 4.110874 0.000000 952 +spars 0 16 4.174387 0.000000 989 +polynomi 1 14 4.317488 4.317488 1069 +embed 0 14 4.317488 0.000000 1102 +discret 1 13 4.382027 4.382027 1165 +speak 0 12 4.465908 0.000000 1283 +arbitrari 0 11 4.553877 0.000000 1359 +itali 0 11 4.553877 0.000000 1378 +packet 0 10 4.653960 0.000000 1415 +strongli 0 10 4.653960 0.000000 1406 +preliminari 1 9 4.753590 4.753590 1480 +congress 0 9 4.753590 0.000000 1592 +combinatori 1 8 4.875197 4.875197 1629 +hallcornel 0 8 4.875197 0.000000 1757 +integ 0 8 4.875197 0.000000 1688 +capac 0 8 4.875197 0.000000 1740 +switch 0 8 4.875197 0.000000 1718 +daughter 0 7 5.010635 0.000000 1943 +maxim 0 7 5.010635 0.000000 1944 +handbook 1 6 5.164786 5.164786 2061 +rebecca 0 6 5.164786 0.000000 2174 +dens 0 6 5.164786 0.000000 2122 +inequ 0 6 5.164786 0.000000 2113 +holland 1 5 5.347108 5.347108 2490 +oncomput 0 5 5.347108 0.000000 2326 +stoc 0 5 5.347108 0.000000 2491 +fraction 0 5 5.347108 0.000000 2259 +proceedingsof 0 5 5.347108 0.000000 2331 +combinator 1 4 5.568345 5.568345 2915 +disjoint 1 4 5.568345 5.568345 2709 +graham 1 4 5.568345 5.568345 2817 +cut 0 4 5.568345 0.000000 2620 +stein 1 3 5.857933 5.857933 3646 +planar 1 3 5.857933 5.857933 3647 +thegener 0 3 5.857933 0.000000 3648 +combinatorica 0 3 5.857933 0.000000 3649 +pack 0 3 5.857933 0.000000 3597 +violat 0 3 5.857933 0.000000 3211 +tokyo 0 3 5.857933 0.000000 3622 +netherland 0 3 5.857933 0.000000 3650 +tardo 3 2 6.263398 18.790194 5090 +multicommod 1 2 6.263398 6.263398 4761 +lovasz 1 2 6.263398 6.263398 5091 +goldberg 1 2 6.263398 6.263398 4313 +hopp 1 2 6.263398 6.263398 5092 +kleinberg 1 2 6.263398 6.263398 5093 +julia 0 2 6.263398 0.000000 5094 +broadli 0 2 6.263398 0.000000 5095 +programmingproblem 0 2 6.263398 0.000000 4082 +appearedin 0 2 6.263398 0.000000 5096 +leighton 0 2 6.263398 0.000000 5097 +inmathemat 0 2 6.263398 0.000000 5098 +hasappear 0 2 6.263398 0.000000 5099 +goeman 0 2 6.263398 0.000000 5100 +williamson 0 2 6.263398 0.000000 5101 +diamet 0 2 6.263398 0.000000 5102 +tarjan 0 2 6.263398 0.000000 4278 +ori 2 1 6.957497 13.914994 10412 +shmoi 1 1 6.957497 6.957497 10413 +plotkin 1 1 6.957497 6.957497 10414 +approximationalgorithm 1 1 6.957497 6.957497 10415 +klein 1 1 6.957497 6.957497 10416 +grotschel 1 1 6.957497 6.957497 10417 +tardosassoci 0 1 6.957497 0.000000 10418 +engineeringphon 0 1 6.957497 0.000000 10419 +researchrec 0 1 6.957497 0.000000 10420 +mostlywork 0 1 6.957497 0.000000 10421 +networkproblem 0 1 6.957497 0.000000 10422 +paperssurvei 0 1 6.957497 0.000000 10423 +cutratio 0 1 6.957497 0.000000 10424 +fasterapproxim 0 1 6.957497 0.000000 10425 +problemwith 0 1 6.957497 0.000000 10426 +makedon 0 1 6.957497 0.000000 10427 +tragouda 0 1 6.957497 0.000000 10428 +flowproblem 0 1 6.957497 0.000000 10429 +annualacm 0 1 6.957497 0.000000 10430 +thefound 0 1 6.957497 0.000000 10431 +designproblem 0 1 6.957497 0.000000 10432 +discretealgorithm 0 1 6.957497 0.000000 10433 +someevacu 0 1 6.957497 0.000000 10434 +ondiscret 0 1 6.957497 0.000000 10435 +quickest 0 1 6.957497 0.000000 10436 +transship 0 1 6.957497 0.000000 10437 +theproceed 0 1 6.957497 0.000000 10438 +steiner 0 1 6.957497 0.000000 10439 +multicut 0 1 6.957497 0.000000 10440 +pathsproblem 0 1 6.957497 0.000000 10441 +annualiee 0 1 6.957497 0.000000 10442 +rabani 0 1 6.957497 0.000000 10443 +fleischer 0 1 6.957497 0.000000 10444 +comb 0 1 6.957497 0.000000 10445 +ipco 0 1 6.957497 0.000000 10446 +kort 0 1 6.957497 0.000000 10447 +lovaszand 0 1 6.957497 0.000000 10448 +schrijver 0 1 6.957497 0.000000 10449 +inoptim 0 1 6.957497 0.000000 10450 +ofmathematician 0 1 6.957497 0.000000 10451 +kyoto 0 1 6.957497 0.000000 10452 +inproc 0 1 6.957497 0.000000 10453 +maastricht 0 1 6.957497 0.000000 10454 +networkoptim 0 1 6.957497 0.000000 10455 +netflow 0 1 6.957497 0.000000 10456 +miniato 0 1 6.957497 0.000000 10457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..c3898e70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +cornel 0 215 1.386294 0.000000 23 +california 0 46 3.091042 0.000000 388 +departmentcornel 0 5 5.347108 0.000000 2275 +franci 1 3 5.857933 5.857933 3287 +universitycomput 0 3 5.857933 0.000000 3651 +berkeleymathemat 0 1 6.957497 0.000000 10458 +departmentcomput 0 1 6.957497 0.000000 10459 +departmenthumorfcc 0 1 6.957497 0.000000 10460 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..37f2ab02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +world 0 115 2.197225 0.000000 126 +felix 0 2 6.263398 0.000000 5103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..f87be2ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +click 0 142 1.945910 0.000000 78 +report 1 131 2.079442 2.079442 92 +introduct 0 126 2.079442 0.000000 87 +analysi 0 124 2.079442 0.000000 98 +person 0 111 2.197225 0.000000 117 +technic 1 100 2.302585 2.302585 140 +take 0 97 2.302585 0.000000 134 +homepag 1 93 2.397895 2.397895 148 +complet 0 77 2.564949 0.000000 208 +april 0 77 2.564949 0.000000 196 +upson 0 71 2.639057 0.000000 218 +test 1 66 2.708050 2.708050 252 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +complex 0 64 2.772589 0.000000 269 +juli 0 60 2.833213 0.000000 305 +algebra 1 45 3.135494 3.135494 394 +math 0 44 3.135494 0.000000 402 +semant 0 29 3.583519 0.000000 587 +universityithaca 0 24 3.761200 0.000000 710 +greg 0 24 3.761200 0.000000 695 +sciencecornel 0 22 3.850148 0.000000 768 +smith 1 20 3.951244 3.951244 820 +grad 0 20 3.951244 0.000000 837 +decid 0 14 4.317488 0.000000 1075 +food 0 12 4.465908 0.000000 1285 +sundai 0 10 4.653960 0.000000 1387 +morrisett 0 5 5.347108 0.000000 2263 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +catch 0 4 5.568345 0.000000 2602 +halldepart 0 3 5.857933 0.000000 3641 +cohen 0 3 5.857933 0.000000 3652 +erni 0 2 6.263398 0.000000 5104 +epicuri 0 2 6.263398 0.000000 5105 +frederick 1 1 6.957497 6.957497 10461 +kleen 1 1 6.957497 6.957497 10462 +homepagefrederick 0 1 6.957497 0.000000 10463 +zine 0 1 6.957497 0.000000 10464 +cartalk 0 1 6.957497 0.000000 10465 +clack 0 1 6.957497 0.000000 10466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..c93871cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +phone 0 175 1.791759 0.000000 45 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +improv 0 62 2.772589 0.000000 289 +electron 0 47 3.091042 0.000000 379 +either 0 35 3.401197 0.000000 506 +post 0 35 3.401197 0.000000 505 +actual 0 28 3.610918 0.000000 604 +doctor 0 24 3.761200 0.000000 709 +happi 0 14 4.317488 0.000000 1079 +frank 2 9 4.753590 9.507180 1568 +matter 0 8 4.875197 0.000000 1627 +xerox 0 8 4.875197 0.000000 1725 +planet 0 4 5.568345 0.000000 2912 +adelstein 0 1 6.957497 0.000000 10467 +checkout 0 1 6.957497 0.000000 10468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..823abb25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +model 0 145 1.945910 0.000000 69 +year 0 148 1.945910 0.000000 84 +postscript 0 131 2.079442 0.000000 90 +theori 1 111 2.197225 2.197225 127 +version 0 113 2.197225 0.000000 122 +peopl 0 96 2.302585 0.000000 132 +center 1 88 2.397895 2.397895 158 +section 0 94 2.397895 0.000000 149 +resum 0 79 2.564949 0.000000 217 +appear 0 78 2.564949 0.000000 210 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +previou 0 62 2.772589 0.000000 290 +visual 1 48 3.044522 3.044522 372 +http 1 41 3.218876 3.218876 420 +soon 1 36 3.367296 3.367296 494 +next 0 34 3.401197 0.000000 517 +anim 0 31 3.496508 0.000000 557 +berkelei 0 26 3.688879 0.000000 657 +other 0 24 3.761200 0.000000 697 +sciencecornel 0 22 3.850148 0.000000 768 +hobbi 0 16 4.174387 0.000000 1009 +mayb 0 15 4.248495 0.000000 1014 +drive 0 15 4.248495 0.000000 1052 +shown 0 14 4.317488 0.000000 1080 +massachusett 0 14 4.317488 0.000000 1118 +affili 1 13 4.382027 4.382027 1194 +hewlett 0 8 4.875197 0.000000 1709 +guitar 0 8 4.875197 0.000000 1758 +lawrenc 0 7 5.010635 0.000000 1908 +fred 2 6 5.164786 10.329572 2072 +photographi 0 6 5.164786 0.000000 2146 +feet 0 5 5.347108 0.000000 2492 +snail 0 4 5.568345 0.000000 2916 +yuan 1 3 5.857933 5.857933 3653 +chelmsford 1 3 5.857933 5.857933 3564 +binghamton 0 3 5.857933 0.000000 3544 +apollo 1 1 6.957497 6.957497 10469 +scramo 0 1 6.957497 0.000000 10470 +midi 0 1 6.957497 0.000000 10471 +choreograph 0 1 6.957497 0.000000 10472 +vpla 0 1 6.957497 0.000000 10473 +animationlink 0 1 6.957497 0.000000 10474 +packardlink 0 1 6.957497 0.000000 10475 +laboratoryinterest 0 1 6.957497 0.000000 10476 +cello 0 1 6.957497 0.000000 10477 +aquarium 0 1 6.957497 0.000000 10478 +burl 0 1 6.957497 0.000000 10479 +fredhsu 0 1 6.957497 0.000000 10480 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..e2607271 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +postscript 0 131 2.079442 0.000000 90 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +final 0 116 2.197225 0.000000 108 +part 1 98 2.302585 2.302585 129 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +school 1 84 2.484907 2.484907 188 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +novemb 0 81 2.484907 0.000000 179 +chang 0 82 2.484907 0.000000 163 +master 1 76 2.564949 2.564949 216 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +june 0 79 2.564949 0.000000 214 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +intellig 0 72 2.639057 0.000000 225 +involv 0 71 2.639057 0.000000 227 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +degre 1 69 2.708050 2.708050 259 +prof 1 64 2.772589 2.772589 273 +artifici 0 63 2.772589 0.000000 280 +back 1 60 2.833213 2.833213 297 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +semest 0 58 2.890372 0.000000 312 +detail 0 57 2.890372 0.000000 321 +undergradu 1 54 2.944439 2.944439 338 +still 0 50 3.044522 0.000000 362 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +better 0 45 3.135494 0.000000 401 +anoth 0 45 3.135494 0.000000 408 +long 1 43 3.178054 3.178054 413 +littl 0 39 3.258097 0.000000 454 +probabl 0 40 3.258097 0.000000 455 +brian 0 38 3.295837 0.000000 466 +mean 0 37 3.332205 0.000000 477 +michael 0 35 3.401197 0.000000 514 +approxim 0 35 3.401197 0.000000 509 +india 1 32 3.465736 3.465736 550 +taken 0 31 3.496508 0.000000 555 +actual 1 28 3.610918 3.610918 604 +hope 0 28 3.610918 0.000000 610 +never 0 25 3.737670 0.000000 671 +sciencecornel 0 22 3.850148 0.000000 768 +deal 0 22 3.850148 0.000000 736 +born 1 21 3.912023 3.912023 798 +leav 0 21 3.912023 0.000000 772 +smith 0 20 3.951244 0.000000 820 +wonder 0 20 3.951244 0.000000 815 +region 0 19 4.007333 0.000000 875 +miss 0 19 4.007333 0.000000 866 +four 1 18 4.060443 4.060443 905 +upon 0 16 4.174387 0.000000 978 +took 0 16 4.174387 0.000000 1010 +match 0 16 4.174387 0.000000 965 +goe 0 15 4.248495 0.000000 1044 +precis 0 15 4.248495 0.000000 1023 +earlier 0 13 4.382027 0.000000 1140 +someon 0 13 4.382027 0.000000 1128 +land 0 12 4.465908 0.000000 1273 +outsid 0 12 4.465908 0.000000 1219 +went 0 12 4.465908 0.000000 1279 +holidai 0 12 4.465908 0.000000 1224 +noth 0 11 4.553877 0.000000 1328 +light 0 9 4.753590 0.000000 1533 +vineet 0 8 4.875197 0.000000 1639 +pursu 1 7 5.010635 5.010635 1902 +seshadri 0 7 5.010635 0.000000 1803 +keshav 0 7 5.010635 0.000000 1852 +lucki 1 6 5.164786 5.164786 2163 +praveen 0 6 5.164786 0.000000 1996 +srinivasan 0 6 5.164786 0.000000 2175 +somewher 0 6 5.164786 0.000000 2176 +babi 0 5 5.347108 0.000000 2493 +interfer 0 5 5.347108 0.000000 2494 +greater 0 5 5.347108 0.000000 2258 +ashish 0 5 5.347108 0.000000 2473 +engineeringdepart 0 4 5.568345 0.000000 2917 +dive 0 3 5.857933 0.000000 3654 +straight 0 3 5.857933 0.000000 3655 +indira 0 3 5.857933 0.000000 3656 +karnataka 0 2 6.263398 0.000000 5106 +bharat 0 2 6.263398 0.000000 5107 +cute 0 2 6.263398 0.000000 5108 +incident 0 2 6.263398 0.000000 5109 +bangalor 0 2 6.263398 0.000000 5110 +that 0 2 6.263398 0.000000 5111 +conquer 0 2 6.263398 0.000000 5112 +aastha 0 2 6.263398 0.000000 5005 +ankit 0 2 6.263398 0.000000 4966 +deepak 1 1 6.957497 6.957497 10481 +balakrishna 1 1 6.957497 6.957497 10482 +balakrishnamast 0 1 6.957497 0.000000 10483 +resumeeducationcoursesperson 0 1 6.957497 0.000000 10484 +surathk 0 1 6.957497 0.000000 10485 +specialis 0 1 6.957497 0.000000 10486 +godfrei 0 1 6.957497 0.000000 10487 +chubbi 0 1 6.957497 0.000000 10488 +weigh 0 1 6.957497 0.000000 10489 +pound 0 1 6.957497 0.000000 10490 +divin 0 1 6.957497 0.000000 10491 +aishwarya 0 1 6.957497 0.000000 10492 +miniscul 0 1 6.957497 0.000000 10493 +krec 0 1 6.957497 0.000000 10494 +here 0 1 6.957497 0.000000 10495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..5b3d9e3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +includ 0 208 1.609438 0.000000 42 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +sinc 0 90 2.397895 0.000000 159 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +solut 0 82 2.484907 0.000000 162 +educ 0 86 2.484907 0.000000 191 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +onlin 0 75 2.639057 0.000000 223 +multimedia 0 68 2.708050 0.000000 258 +degre 0 69 2.708050 0.000000 259 +plan 0 65 2.772589 0.000000 272 +unix 0 58 2.890372 0.000000 308 +music 0 42 3.218876 0.000000 436 +respons 0 37 3.332205 0.000000 476 +global 0 34 3.401197 0.000000 520 +administr 0 27 3.637586 0.000000 628 +leav 0 21 3.912023 0.000000 772 +bachelor 0 17 4.110874 0.000000 957 +warn 0 14 4.317488 0.000000 1068 +hopefulli 0 14 4.317488 0.000000 1071 +cricket 0 7 5.010635 0.000000 1945 +publicationsth 0 4 5.568345 0.000000 2859 +ghia 1 2 6.263398 6.263398 4934 +asif 0 2 6.263398 0.000000 4933 +mywww 0 2 6.263398 0.000000 5113 +uddin 1 1 6.957497 6.957497 10496 +ghiasasif 0 1 6.957497 0.000000 10497 +constructioni 0 1 6.957497 0.000000 10498 +karachi 0 1 6.957497 0.000000 10499 +pakistan 0 1 6.957497 0.000000 10500 +installationso 0 1 6.957497 0.000000 10501 +astronomyasif 0 1 6.957497 0.000000 10502 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..eff4146a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +visit 0 63 2.772589 0.000000 288 +move 0 47 3.091042 0.000000 382 +http 0 41 3.218876 0.000000 420 +berkelei 0 26 3.688879 0.000000 657 +million 0 5 5.347108 0.000000 2495 +dglaser 0 1 6.957497 0.000000 10503 +htmlpleas 0 1 6.957497 0.000000 10504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..77979a9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,334 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +softwar 0 220 1.386294 0.000000 30 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +implement 0 152 1.791759 0.000000 52 +read 0 154 1.791759 0.000000 47 +hour 0 165 1.791759 0.000000 46 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +relat 0 139 1.945910 0.000000 68 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +like 0 132 1.945910 0.000000 81 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +teach 1 108 2.197225 2.197225 112 +topic 1 114 2.197225 2.197225 110 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +text 1 98 2.302585 2.302585 133 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +help 1 83 2.484907 2.484907 175 +level 0 87 2.484907 0.000000 180 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +know 1 80 2.564949 2.564949 198 +master 0 76 2.564949 0.000000 216 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +logic 1 71 2.639057 2.639057 230 +david 1 71 2.639057 2.639057 232 +symposium 1 72 2.639057 2.639057 238 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +servic 0 72 2.639057 0.000000 236 +receiv 1 66 2.708050 2.708050 244 +goal 0 66 2.708050 0.000000 250 +written 0 63 2.772589 0.000000 278 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +import 0 65 2.772589 0.000000 282 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +plai 0 60 2.833213 0.000000 307 +content 0 59 2.833213 0.000000 302 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +tabl 1 51 2.995732 2.995732 346 +particular 0 51 2.995732 0.000000 352 +numer 0 49 3.044522 0.000000 369 +telephon 0 50 3.044522 0.000000 373 +give 0 50 3.044522 0.000000 359 +move 1 47 3.091042 3.091042 382 +understand 0 47 3.091042 0.000000 384 +effect 0 46 3.091042 0.000000 385 +math 1 44 3.135494 3.135494 402 +made 1 44 3.135494 3.135494 398 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +editor 1 41 3.218876 3.218876 433 +york 0 41 3.218876 0.000000 435 +edit 0 42 3.218876 0.000000 418 +howev 0 41 3.218876 0.000000 422 +announc 0 40 3.258097 0.000000 441 +programm 0 39 3.258097 0.000000 445 +societi 0 40 3.258097 0.000000 456 +author 0 39 3.258097 0.000000 450 +late 0 40 3.258097 0.000000 439 +paul 1 38 3.295837 3.295837 471 +vita 0 38 3.295837 0.000000 473 +correct 0 38 3.295837 0.000000 462 +open 0 38 3.295837 0.000000 469 +formal 1 37 3.332205 3.332205 478 +respons 0 37 3.332205 0.000000 476 +china 0 37 3.332205 0.000000 487 +short 0 36 3.367296 0.000000 499 +award 2 34 3.401197 6.802394 523 +survei 1 35 3.401197 3.401197 513 +return 1 34 3.401197 3.401197 502 +curriculum 0 33 3.433987 0.000000 535 +go 0 33 3.433987 0.000000 529 +articl 0 33 3.433987 0.000000 530 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +dissert 0 32 3.465736 0.000000 549 +concept 0 32 3.465736 0.000000 537 +taken 0 31 3.496508 0.000000 555 +abl 0 30 3.555348 0.000000 566 +produc 0 30 3.555348 0.000000 572 +chair 1 29 3.583519 3.583519 596 +semant 0 29 3.583519 0.000000 587 +art 0 29 3.583519 0.000000 593 +weather 1 28 3.610918 3.610918 618 +full 0 28 3.610918 0.000000 615 +usual 0 28 3.610918 0.000000 608 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +compar 0 26 3.688879 0.000000 648 +enjoi 0 26 3.688879 0.000000 660 +concern 0 25 3.737670 0.000000 666 +spent 0 25 3.737670 0.000000 676 +toward 0 25 3.737670 0.000000 668 +aspect 0 25 3.737670 0.000000 663 +sport 0 25 3.737670 0.000000 683 +universityithaca 0 24 3.761200 0.000000 710 +doctor 0 24 3.761200 0.000000 709 +known 0 24 3.761200 0.000000 702 +interpret 0 24 3.761200 0.000000 686 +methodolog 0 23 3.806662 0.000000 733 +proof 0 23 3.806662 0.000000 720 +serv 1 22 3.850148 3.850148 758 +period 1 22 3.850148 3.850148 743 +almost 0 22 3.850148 0.000000 742 +william 0 22 3.850148 0.000000 765 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +born 1 21 3.912023 3.912023 798 +programminglanguag 0 21 3.912023 0.000000 782 +fact 0 21 3.912023 0.000000 780 +busi 0 21 3.912023 0.000000 784 +hous 0 21 3.912023 0.000000 801 +tenni 0 20 3.951244 0.000000 838 +item 0 19 4.007333 0.000000 856 +left 0 19 4.007333 0.000000 851 +ever 0 19 4.007333 0.000000 872 +figur 0 18 4.060443 0.000000 903 +four 0 18 4.060443 0.000000 905 +stand 0 18 4.060443 0.000000 891 +stanford 1 17 4.110874 4.110874 955 +germani 0 17 4.110874 0.000000 946 +whether 0 17 4.110874 0.000000 918 +alreadi 0 16 4.174387 0.000000 963 +later 1 15 4.248495 4.248495 1043 +susan 0 15 4.248495 0.000000 1050 +contribut 0 15 4.248495 0.000000 1021 +rate 0 15 4.248495 0.000000 1037 +becam 0 14 4.317488 0.000000 1117 +latex 0 14 4.317488 0.000000 1064 +manner 0 14 4.317488 0.000000 1074 +conduct 0 14 4.317488 0.000000 1065 +wife 1 13 4.382027 4.382027 1196 +essenti 0 13 4.382027 0.000000 1137 +individu 0 13 4.382027 0.000000 1126 +believ 0 13 4.382027 0.000000 1187 +went 1 12 4.465908 4.465908 1279 +speak 0 12 4.465908 0.000000 1283 +grant 0 12 4.465908 0.000000 1216 +excit 0 11 4.553877 0.000000 1329 +fellowship 0 10 4.653960 0.000000 1460 +thecomput 0 10 4.653960 0.000000 1408 +end 0 9 4.753590 0.000000 1567 +respect 0 9 4.753590 0.000000 1545 +softbal 0 9 4.753590 0.000000 1594 +volleybal 0 9 4.753590 0.000000 1598 +swim 0 9 4.753590 0.000000 1599 +mention 0 9 4.753590 0.000000 1569 +lewi 0 8 4.875197 0.000000 1700 +hallcornel 0 8 4.875197 0.000000 1757 +guggenheim 0 8 4.875197 0.000000 1759 +told 0 8 4.875197 0.000000 1658 +joke 0 8 4.875197 0.000000 1620 +wire 0 8 4.875197 0.000000 1747 +illinoi 1 7 5.010635 5.010635 1941 +pagecomput 0 7 5.010635 0.000000 1900 +marri 0 7 5.010635 0.000000 1946 +notion 0 7 5.010635 0.000000 1947 +explain 0 7 5.010635 0.000000 1816 +snow 0 6 5.164786 0.000000 2031 +outstand 0 6 5.164786 0.000000 2136 +spare 0 6 5.164786 0.000000 2177 +golf 0 6 5.164786 0.000000 2178 +truth 0 6 5.164786 0.000000 2179 +elain 1 5 5.347108 5.347108 2496 +joseph 0 5 5.347108 0.000000 2327 +these 0 5 5.347108 0.000000 2482 +plant 0 5 5.347108 0.000000 2497 +feder 0 5 5.347108 0.000000 2266 +ofparallel 0 5 5.347108 0.000000 2380 +interfer 0 5 5.347108 0.000000 2494 +began 0 5 5.347108 0.000000 2498 +rewrit 0 5 5.347108 0.000000 2367 +sing 0 5 5.347108 0.000000 2499 +yield 0 5 5.347108 0.000000 2458 +proud 1 4 5.568345 5.568345 2918 +raman 1 4 5.568345 5.568345 2827 +queen 0 4 5.568345 0.000000 2919 +naval 0 4 5.568345 0.000000 2920 +birthdai 0 4 5.568345 0.000000 2800 +bloom 0 4 5.568345 0.000000 2913 +substanti 0 4 5.568345 0.000000 2921 +schneider 0 4 5.568345 0.000000 2868 +ping 0 4 5.568345 0.000000 2922 +gri 1 3 5.857933 5.857933 3569 +munich 1 3 5.857933 5.857933 3570 +twin 1 3 5.857933 5.857933 3657 +biographi 1 3 5.857933 5.857933 3658 +laugh 1 3 5.857933 5.857933 3659 +freshman 0 3 5.857933 0.000000 3462 +dimac 0 3 5.857933 0.000000 3574 +assistantship 0 3 5.857933 0.000000 3660 +langaug 0 3 5.857933 0.000000 3661 +blind 0 3 5.857933 0.000000 3662 +serious 0 3 5.857933 0.000000 3663 +researchassoci 0 3 5.857933 0.000000 3664 +pong 0 3 5.857933 0.000000 3371 +audienc 0 3 5.857933 0.000000 3180 +pagedavid 0 2 6.263398 0.000000 5114 +sophomor 0 2 6.263398 0.000000 4695 +polya 0 2 6.263398 0.000000 4939 +weapon 0 2 6.263398 0.000000 5115 +degreein 0 2 6.263398 0.000000 5116 +manfr 0 2 6.263398 0.000000 4949 +bauer 0 2 6.263398 0.000000 5117 +cake 0 2 6.263398 0.000000 5118 +booth 0 2 6.263398 0.000000 5119 +theamerican 0 2 6.263398 0.000000 5120 +afip 0 2 6.263398 0.000000 4300 +andt 0 2 6.263398 0.000000 5121 +spoken 0 2 6.263398 0.000000 5122 +researchinterest 0 2 6.263398 0.000000 5123 +acta 0 2 6.263398 0.000000 5124 +informatica 0 2 6.263398 0.000000 5125 +andtool 0 2 6.263398 0.000000 5126 +grieswilliam 0 1 6.957497 0.000000 10505 +engineeringdr 0 1 6.957497 0.000000 10506 +formaldevelop 0 1 6.957497 0.000000 10507 +asinterest 0 1 6.957497 0.000000 10508 +researchin 0 1 6.957497 0.000000 10509 +taughta 0 1 6.957497 0.000000 10510 +anoverrid 0 1 6.957497 0.000000 10511 +edushort 0 1 6.957497 0.000000 10512 +griesi 0 1 6.957497 0.000000 10513 +flush 0 1 6.957497 0.000000 10514 +iescap 0 1 6.957497 0.000000 10515 +workfor 0 1 6.957497 0.000000 10516 +civilian 0 1 6.957497 0.000000 10517 +amathematician 0 1 6.957497 0.000000 10518 +fewmonth 0 1 6.957497 0.000000 10519 +twogerman 0 1 6.957497 0.000000 10520 +ruedig 0 1 6.957497 0.000000 10521 +wiehl 0 1 6.957497 0.000000 10522 +algol 0 1 6.957497 0.000000 10523 +compilerfor 0 1 6.957497 0.000000 10524 +implementrecurs 0 1 6.957497 0.000000 10525 +stoer 0 1 6.957497 0.000000 10526 +wasin 0 1 6.957497 0.000000 10527 +notyet 0 1 6.957497 0.000000 10528 +kosher 0 1 6.957497 0.000000 10529 +thebirthdai 0 1 6.957497 0.000000 10530 +intown 0 1 6.957497 0.000000 10531 +whichha 0 1 6.957497 0.000000 10532 +wasdepart 0 1 6.957497 0.000000 10533 +lewisprofessor 0 1 6.957497 0.000000 10534 +contentsi 0 1 6.957497 0.000000 10535 +mytext 0 1 6.957497 0.000000 10536 +writingand 0 1 6.957497 0.000000 10537 +thewond 0 1 6.957497 0.000000 10538 +wherey 0 1 6.957497 0.000000 10539 +contributionsto 0 1 6.957497 0.000000 10540 +sigcseaward 0 1 6.957497 0.000000 10541 +clarkaward 0 1 6.957497 0.000000 10542 +advise 0 1 6.957497 0.000000 10543 +susanowicki 0 1 6.957497 0.000000 10544 +laid 0 1 6.957497 0.000000 10545 +freeness 0 1 6.957497 0.000000 10546 +bestpap 0 1 6.957497 0.000000 10547 +sthesi 0 1 6.957497 0.000000 10548 +designedand 0 1 6.957497 0.000000 10549 +printedor 0 1 6.957497 0.000000 10550 +speakmathemat 0 1 6.957497 0.000000 10551 +audiocassett 0 1 6.957497 0.000000 10552 +officein 0 1 6.957497 0.000000 10553 +taulbe 0 1 6.957497 0.000000 10554 +responsesfrom 0 1 6.957497 0.000000 10555 +noother 0 1 6.957497 0.000000 10556 +itrequir 0 1 6.957497 0.000000 10557 +sendin 0 1 6.957497 0.000000 10558 +questionnair 0 1 6.957497 0.000000 10559 +forchair 0 1 6.957497 0.000000 10560 +andrespons 0 1 6.957497 0.000000 10561 +takean 0 1 6.957497 0.000000 10562 +willsuggest 0 1 6.957497 0.000000 10563 +servewher 0 1 6.957497 0.000000 10564 +fredb 0 1 6.957497 0.000000 10565 +andmonograph 0 1 6.957497 0.000000 10566 +isplit 0 1 6.957497 0.000000 10567 +pant 0 1 6.957497 0.000000 10568 +alectur 0 1 6.957497 0.000000 10569 +turnedaround 0 1 6.957497 0.000000 10570 +spoke 0 1 6.957497 0.000000 10571 +everyonelaugh 0 1 6.957497 0.000000 10572 +justsaid 0 1 6.957497 0.000000 10573 +barbershop 0 1 6.957497 0.000000 10574 +andgilbert 0 1 6.957497 0.000000 10575 +sullivan 0 1 6.957497 0.000000 10576 +carpentri 0 1 6.957497 0.000000 10577 +remodel 0 1 6.957497 0.000000 10578 +considerablesatisfact 0 1 6.957497 0.000000 10579 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..5778679b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +first 0 140 1.945910 0.000000 71 +learn 0 86 2.484907 0.000000 170 +html 0 75 2.639057 0.000000 235 +week 0 52 2.995732 0.000000 343 +express 0 32 3.465736 0.000000 540 +sciencecornel 0 22 3.850148 0.000000 768 +shop 0 10 4.653960 0.000000 1469 +bore 0 7 5.010635 0.000000 1948 +alex 0 6 5.164786 0.000000 2130 +grinzayd 1 1 6.957497 6.957497 10580 +homepagealex 0 1 6.957497 0.000000 10581 +grinzaydm 0 1 6.957497 0.000000 10582 +universitytel 0 1 6.957497 0.000000 10583 +necx 0 1 6.957497 0.000000 10584 +directinternet 0 1 6.957497 0.000000 10585 +networkcomput 0 1 6.957497 0.000000 10586 +damarkwarn 0 1 6.957497 0.000000 10587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..07986e2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +homepag 0 93 2.397895 0.000000 148 +second 0 81 2.484907 0.000000 166 +activ 0 84 2.484907 0.000000 182 +novemb 0 81 2.484907 0.000000 179 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +involv 0 71 2.639057 0.000000 227 +degre 0 69 2.708050 0.000000 259 +ithaca 0 65 2.772589 0.000000 294 +sever 0 56 2.890372 0.000000 322 +advisor 0 51 2.995732 0.000000 355 +york 0 41 3.218876 0.000000 435 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +split 0 14 4.317488 0.000000 1078 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +scienceat 0 11 4.553877 0.000000 1375 +charg 0 9 4.753590 0.000000 1582 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 0 4 5.568345 0.000000 2924 +cuc 0 4 5.568345 0.000000 2630 +poland 0 3 5.857933 0.000000 3665 +grze 1 1 6.957497 6.957497 10588 +czajkowskidepart 0 1 6.957497 0.000000 10589 +krakow 0 1 6.957497 0.000000 10590 +administ 0 1 6.957497 0.000000 10591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..8248e2a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +like 0 132 1.945910 0.000000 81 +number 0 130 2.079442 0.000000 97 +studi 0 120 2.079442 0.000000 91 +teach 1 108 2.197225 2.197225 112 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +check 0 115 2.197225 0.000000 118 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +activ 0 84 2.484907 0.000000 182 +resum 0 79 2.564949 0.000000 217 +upson 0 71 2.639057 0.000000 218 +knowledg 1 67 2.708050 2.708050 243 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +septemb 0 65 2.772589 0.000000 274 +best 0 59 2.833213 0.000000 299 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +talk 1 53 2.944439 2.944439 336 +case 0 51 2.995732 0.000000 351 +give 1 50 3.044522 3.044522 359 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +describ 0 45 3.135494 0.000000 400 +continu 0 39 3.258097 0.000000 448 +probabl 0 40 3.258097 0.000000 455 +field 0 37 3.332205 0.000000 482 +game 0 36 3.367296 0.000000 498 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +scientist 0 31 3.496508 0.000000 560 +focus 0 29 3.583519 0.000000 584 +semant 0 29 3.583519 0.000000 587 +subject 0 26 3.688879 0.000000 647 +although 0 25 3.737670 0.000000 667 +princeton 0 15 4.248495 0.000000 1042 +econom 0 13 4.382027 0.000000 1184 +someon 0 13 4.382027 0.000000 1128 +mainli 0 10 4.653960 0.000000 1432 +sentenc 0 10 4.653960 0.000000 1413 +uncertainti 1 7 5.010635 5.010635 1882 +boundari 0 7 5.010635 0.000000 1929 +gave 0 7 5.010635 0.000000 1922 +philosoph 0 7 5.010635 0.000000 1904 +li 0 5 5.347108 0.000000 2500 +hallithaca 0 4 5.568345 0.000000 2894 +universitycomput 0 3 5.857933 0.000000 3651 +halpern 1 1 6.957497 6.957497 10592 +pagejoseph 0 1 6.957497 0.000000 10593 +professorcornel 0 1 6.957497 0.000000 10594 +economist 0 1 6.957497 0.000000 10595 +abouta 0 1 6.957497 0.000000 10596 +sequel 0 1 6.957497 0.000000 10597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..0b1bf2a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +cornel 0 215 1.386294 0.000000 23 +construct 0 139 1.945910 0.000000 82 +tsuneshi 1 1 6.957497 6.957497 10598 +hashimoto 1 1 6.957497 6.957497 10599 +hashimototsuneshi 0 1 6.957497 0.000000 10600 +hashimotothi 0 1 6.957497 0.000000 10601 +cstsuneshi 0 1 6.957497 0.000000 10602 +hashi 0 1 6.957497 0.000000 10603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..2334e542 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +develop 0 174 1.791759 0.000000 53 +teach 0 108 2.197225 0.000000 112 +commun 1 95 2.397895 2.397895 157 +novemb 0 81 2.484907 0.000000 179 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +unix 0 58 2.890372 0.000000 308 +mark 1 44 3.135494 3.135494 403 +proof 0 23 3.806662 0.000000 720 +horu 0 14 4.317488 0.000000 1116 +nuprl 0 10 4.653960 0.000000 1402 +hockei 0 8 4.875197 0.000000 1760 +hayden 1 4 5.568345 5.568345 2844 +tast 0 3 5.857933 0.000000 3666 +ensembl 0 2 6.263398 0.000000 4854 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..8100a368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +improv 0 62 2.772589 0.000000 289 +cyber 0 4 5.568345 0.000000 2909 +pond 0 2 6.263398 0.000000 5127 +heji 1 1 6.957497 6.957497 10604 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..acb95efd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +year 0 148 1.945910 0.000000 84 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +faculti 0 56 2.890372 0.000000 325 +undergradu 0 54 2.944439 0.000000 338 +advisor 0 51 2.995732 0.000000 355 +third 0 43 3.178054 0.000000 412 +china 0 37 3.332205 0.000000 487 +berkelei 0 26 3.688879 0.000000 657 +born 0 21 3.912023 0.000000 798 +thorsten 0 13 4.382027 0.000000 1133 +eicken 0 13 4.382027 0.000000 1134 +hallithaca 0 4 5.568345 0.000000 2894 +shanghai 0 4 5.568345 0.000000 2925 +universitydept 0 3 5.857933 0.000000 3602 +deyu 1 1 6.957497 6.957497 10606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..857d360d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +mathemat 0 108 2.197225 0.000000 123 +check 0 115 2.197225 0.000000 118 +imag 0 91 2.397895 0.000000 161 +center 0 88 2.397895 0.000000 158 +homepag 0 93 2.397895 0.000000 148 +academ 0 82 2.484907 0.000000 178 +learn 0 86 2.484907 0.000000 170 +resourc 0 81 2.484907 0.000000 172 +master 0 76 2.564949 0.000000 216 +optim 0 79 2.564949 0.000000 197 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +video 0 44 3.135494 0.000000 405 +vision 1 41 3.218876 3.218876 430 +bibliographi 0 34 3.401197 0.000000 518 +chines 1 29 3.583519 3.583519 595 +retriev 0 27 3.637586 0.000000 621 +motion 0 24 3.761200 0.000000 699 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +annot 0 21 3.912023 0.000000 775 +beij 0 19 4.007333 0.000000 876 +track 0 15 4.248495 0.000000 1029 +tsinghua 0 13 4.382027 0.000000 1195 +huang 1 12 4.465908 4.465908 1202 +thedepart 0 11 4.553877 0.000000 1350 +scienceat 0 11 4.553877 0.000000 1375 +fellowship 1 10 4.653960 4.653960 1460 +christian 1 7 5.010635 5.010635 1949 +ramin 0 7 5.010635 0.000000 1820 +zabih 0 6 5.164786 0.000000 2138 +mission 0 5 5.347108 0.000000 2465 +jing 1 3 5.857933 5.857933 3521 +bachelorand 0 2 6.263398 0.000000 5128 +chinami 0 2 6.263398 0.000000 5129 +evangel 0 1 6.957497 0.000000 10605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..d3a4e369 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +databas 0 122 2.079442 0.000000 86 +manag 0 114 2.197225 0.000000 125 +graphic 0 90 2.397895 0.000000 147 +resum 0 79 2.564949 0.000000 217 +practic 1 70 2.708050 2.708050 246 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +china 0 37 3.332205 0.000000 487 +chen 1 21 3.912023 3.912023 791 +taiwan 0 16 4.174387 0.000000 1006 +practicum 0 16 4.174387 0.000000 960 +mapl 0 11 4.553877 0.000000 1376 +perman 0 11 4.553877 0.000000 1372 +sung 0 6 5.164786 0.000000 2075 +chin 1 5 5.347108 5.347108 2408 +taipei 0 4 5.568345 0.000000 2926 +album 0 4 5.568345 0.000000 2888 +icchen 0 1 6.957497 0.000000 10607 +nctu 0 1 6.957497 0.000000 10608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..feba5412 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +network 0 168 1.791759 0.000000 61 +construct 0 139 1.945910 0.000000 82 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +advanc 0 99 2.302585 0.000000 130 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +post 0 35 3.401197 0.000000 505 +script 0 13 4.382027 0.000000 1171 +indira 0 3 5.857933 0.000000 3656 +malik 0 1 6.957497 0.000000 10609 +imalik 0 1 6.957497 0.000000 10610 +tap 0 1 6.957497 0.000000 10611 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..ab1f54f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +updat 0 191 1.609438 0.000000 41 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +graphic 1 90 2.397895 2.397895 147 +homepag 0 93 2.397895 0.000000 148 +java 1 70 2.708050 2.708050 248 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +polici 0 64 2.772589 0.000000 279 +colleg 0 61 2.833213 0.000000 300 +cool 0 49 3.044522 0.000000 374 +directori 0 45 3.135494 0.000000 396 +india 0 32 3.465736 0.000000 550 +independ 0 32 3.465736 0.000000 548 +taken 0 31 3.496508 0.000000 555 +anim 0 31 3.496508 0.000000 557 +sciencecornel 0 22 3.850148 0.000000 768 +indian 0 22 3.850148 0.000000 769 +applet 1 20 3.951244 3.951244 827 +toolkit 0 20 3.951244 0.000000 835 +practicum 0 16 4.174387 0.000000 960 +drive 0 15 4.248495 0.000000 1052 +camera 1 14 4.317488 4.317488 1115 +audio 0 14 4.317488 0.000000 1094 +galleri 0 13 4.382027 0.000000 1192 +magic 0 11 4.553877 0.000000 1358 +wood 0 11 4.553877 0.000000 1355 +perspect 0 10 4.653960 0.000000 1437 +hoca 0 5 5.347108 0.000000 2241 +hobb 0 4 5.568345 0.000000 2893 +nashvil 0 4 5.568345 0.000000 2867 +tennesse 0 4 5.568345 0.000000 2763 +indira 1 3 5.857933 5.857933 3656 +engineeringclass 0 3 5.857933 0.000000 3667 +recip 0 3 5.857933 0.000000 3668 +coimbator 0 2 6.263398 0.000000 5130 +cornelluniversityfal 0 2 6.263398 0.000000 5131 +cspracticum 0 2 6.263398 0.000000 5132 +carpet 0 2 6.263398 0.000000 5133 +colloqium 0 2 6.263398 0.000000 5134 +manageri 0 2 6.263398 0.000000 5135 +vidyaprakash 0 1 6.957497 0.000000 10612 +vidyaprakashmast 0 1 6.957497 0.000000 10613 +universitywelcom 0 1 6.957497 0.000000 10614 +financesumm 0 1 6.957497 0.000000 10615 +tracingin 0 1 6.957497 0.000000 10616 +perspectivetransform 0 1 6.957497 0.000000 10617 +myresumeclick 0 1 6.957497 0.000000 10618 +transformssom 0 1 6.957497 0.000000 10619 +sgamelan 0 1 6.957497 0.000000 10620 +calvinand 0 1 6.957497 0.000000 10621 +gif 0 1 6.957497 0.000000 10622 +chicker 0 1 6.957497 0.000000 10623 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..7db996fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +includ 0 208 1.609438 0.000000 42 +base 1 165 1.791759 1.791759 50 +parallel 0 169 1.791759 0.000000 60 +file 1 132 1.945910 1.945910 70 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +code 0 108 2.197225 0.000000 116 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +user 0 104 2.302585 0.000000 137 +section 0 94 2.397895 0.000000 149 +environ 0 84 2.484907 0.000000 177 +start 0 83 2.484907 0.000000 173 +solut 0 82 2.484907 0.000000 162 +come 1 78 2.564949 2.564949 202 +good 0 77 2.564949 0.000000 200 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +homework 0 79 2.564949 0.000000 193 +write 1 72 2.639057 2.639057 222 +test 1 66 2.708050 2.708050 252 +knowledg 0 67 2.708050 0.000000 243 +prof 0 64 2.772589 0.000000 273 +descript 0 64 2.772589 0.000000 271 +virtual 0 62 2.772589 0.000000 285 +detail 1 57 2.890372 2.890372 321 +index 0 56 2.890372 0.000000 309 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +cover 0 55 2.944439 0.000000 329 +much 0 52 2.995732 0.000000 349 +week 0 52 2.995732 0.000000 343 +set 0 50 3.044522 0.000000 361 +right 0 48 3.044522 0.000000 363 +get 0 46 3.091042 0.000000 380 +directori 0 45 3.135494 0.000000 396 +video 0 44 3.135494 0.000000 405 +protocol 0 45 3.135494 0.000000 407 +http 0 41 3.218876 0.000000 420 +programm 0 39 3.258097 0.000000 445 +tutori 0 39 3.258097 0.000000 437 +brian 0 38 3.295837 0.000000 466 +prototyp 0 38 3.295837 0.000000 463 +short 1 36 3.367296 3.367296 499 +multi 0 36 3.367296 0.000000 493 +manual 1 35 3.401197 3.401197 504 +packag 1 28 3.610918 3.610918 614 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +smith 0 20 3.951244 0.000000 820 +mpeg 0 20 3.951244 0.000000 831 +media 0 19 4.007333 0.000000 861 +repositori 0 17 4.110874 0.000000 932 +doesn 0 15 4.248495 0.000000 1055 +remot 0 15 4.248495 0.000000 1041 +script 0 13 4.382027 0.000000 1171 +suit 0 13 4.382027 0.000000 1129 +realiti 0 12 4.465908 0.000000 1272 +guidelin 0 7 5.010635 0.000000 1832 +conferenc 0 7 5.010635 0.000000 1857 +put 0 6 5.164786 0.000000 2017 +valuabl 0 5 5.347108 0.000000 2256 +templat 0 5 5.347108 0.000000 2311 +spam 0 4 5.568345 0.000000 2927 +knowledgebas 0 2 6.263398 0.000000 5136 +pageioi 1 1 6.957497 6.957497 10624 +homeless 0 1 6.957497 0.000000 10625 +lamioi 0 1 6.957497 0.000000 10626 +multim 0 1 6.957497 0.000000 10627 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..2514f24d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +relat 0 139 1.945910 0.000000 68 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +introduct 0 126 2.079442 0.000000 87 +theori 1 111 2.197225 2.197225 127 +intern 0 108 2.197225 0.000000 128 +world 0 115 2.197225 0.000000 126 +make 0 111 2.197225 0.000000 120 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +proceed 1 93 2.397895 2.397895 152 +commun 0 95 2.397895 0.000000 157 +info 0 85 2.484907 0.000000 176 +environ 0 84 2.484907 0.000000 177 +wide 0 84 2.484907 0.000000 185 +method 0 80 2.564949 0.000000 213 +june 0 79 2.564949 0.000000 214 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +workshop 0 71 2.639057 0.000000 239 +summari 0 73 2.639057 0.000000 237 +abstract 1 62 2.772589 2.772589 276 +ithaca 0 65 2.772589 0.000000 294 +collect 0 65 2.772589 0.000000 268 +type 0 61 2.833213 0.000000 296 +thesi 1 57 2.890372 2.890372 327 +explor 0 58 2.890372 0.000000 324 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +hardwar 1 51 2.995732 2.995732 350 +format 1 48 3.044522 3.044522 356 +basic 0 50 3.044522 0.000000 360 +still 0 50 3.044522 0.000000 362 +algebra 1 45 3.135494 3.135494 394 +editor 1 41 3.218876 3.218876 433 +http 0 41 3.218876 0.000000 420 +live 0 40 3.258097 0.000000 451 +paul 1 38 3.295837 3.295837 471 +correct 0 38 3.295837 0.000000 462 +formal 0 37 3.332205 0.000000 478 +next 1 34 3.401197 3.401197 517 +post 0 35 3.401197 0.000000 505 +full 1 28 3.610918 3.610918 615 +load 0 28 3.610918 0.000000 601 +enhanc 0 26 3.688879 0.000000 644 +session 0 26 3.688879 0.000000 643 +doctor 0 24 3.761200 0.000000 709 +sometim 0 24 3.761200 0.000000 696 +proof 1 23 3.806662 3.806662 720 +methodolog 0 23 3.806662 0.000000 733 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +dai 0 22 3.850148 0.000000 753 +theorem 0 21 3.912023 0.000000 786 +synthesi 0 20 3.951244 0.000000 834 +toolkit 0 20 3.951244 0.000000 835 +prove 0 19 4.007333 0.000000 848 +north 0 19 4.007333 0.000000 873 +els 0 19 4.007333 0.000000 843 +hypertext 0 19 4.007333 0.000000 865 +definit 0 19 4.007333 0.000000 864 +coupl 0 17 4.110874 0.000000 939 +month 0 15 4.248495 0.000000 1025 +circuit 1 13 4.382027 4.382027 1131 +someon 0 13 4.382027 0.000000 1128 +moment 0 11 4.553877 0.000000 1379 +nuprl 1 10 4.653960 4.653960 1402 +usaphon 0 9 4.753590 0.000000 1600 +entitl 0 9 4.753590 0.000000 1490 +inter 0 9 4.753590 0.000000 1530 +float 0 9 4.753590 0.000000 1504 +prover 0 8 4.875197 0.000000 1653 +attent 0 8 4.875197 0.000000 1651 +ifip 0 5 5.347108 0.000000 2459 +holland 0 5 5.347108 0.000000 2490 +jackson 2 3 5.857933 11.715866 3586 +pagepaul 0 3 5.857933 0.000000 3669 +bout 0 3 5.857933 0.000000 3670 +elsevi 0 3 5.857933 0.000000 3671 +pai 0 3 5.857933 0.000000 3672 +shouldb 0 3 5.857933 0.000000 3673 +associatecornel 0 2 6.263398 0.000000 5137 +eduwww 0 2 6.263398 0.000000 5138 +linkag 0 2 6.263398 0.000000 5139 +thenuprl 0 2 6.263398 0.000000 5047 +workon 0 2 6.263398 0.000000 4280 +htmladdress 0 1 6.957497 0.000000 10628 +intereststheorem 0 1 6.957497 0.000000 10629 +andhardwar 0 1 6.957497 0.000000 10630 +informationmi 0 1 6.957497 0.000000 10631 +developmentsystem 0 1 6.957497 0.000000 10632 +bundi 0 1 6.957497 0.000000 10633 +automateddeduct 0 1 6.957497 0.000000 10634 +artif 0 1 6.957497 0.000000 10635 +stavrid 0 1 6.957497 0.000000 10636 +melham 0 1 6.957497 0.000000 10637 +transactionsa 0 1 6.957497 0.000000 10638 +theadvanc 0 1 6.957497 0.000000 10639 +nuprlth 0 1 6.957497 0.000000 10640 +getround 0 1 6.957497 0.000000 10641 +thetheori 0 1 6.957497 0.000000 10642 +foreach 0 1 6.957497 0.000000 10643 +andtheorem 0 1 6.957497 0.000000 10644 +thepolynomi 0 1 6.957497 0.000000 10645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..d21826d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +hani 0 2 6.263398 0.000000 5140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..49f1919c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +perman 0 11 4.553877 0.000000 1372 +usaoffic 0 6 5.164786 0.000000 2159 +janosi 0 3 5.857933 0.000000 3149 +mywww 0 2 6.263398 0.000000 5113 +tibor 0 1 6.957497 0.000000 10646 +jnositibor 0 1 6.957497 0.000000 10647 +jnosiwelcom 0 1 6.957497 0.000000 10648 +constructionoffic 0 1 6.957497 0.000000 10649 +zenotibor 0 1 6.957497 0.000000 10650 +jnosi 0 1 6.957497 0.000000 10651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..8c91e64b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 0 215 1.386294 0.000000 23 +address 0 170 1.791759 0.000000 62 +master 0 76 2.564949 0.000000 216 +ithaca 0 65 2.772589 0.000000 294 +telephon 0 50 3.044522 0.000000 373 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +janwun 1 1 6.957497 6.957497 10652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..c0fae560 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +languag 2 227 1.386294 2.772588 26 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +implement 1 152 1.791759 1.791759 52 +phone 1 175 1.791759 1.791759 45 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +version 1 113 2.197225 2.197225 122 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +technic 1 100 2.302585 2.302585 140 +memori 1 101 2.302585 2.302585 139 +advanc 0 99 2.302585 0.000000 130 +proceed 1 93 2.397895 2.397895 152 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +level 1 87 2.484907 2.484907 180 +resourc 0 81 2.484907 0.000000 172 +member 0 84 2.484907 0.000000 165 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +state 0 76 2.564949 0.000000 207 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +upson 0 71 2.639057 0.000000 218 +line 0 75 2.639057 0.000000 231 +practic 0 70 2.708050 0.000000 246 +ithaca 1 65 2.772589 2.772589 294 +abstract 1 62 2.772589 2.772589 276 +evalu 0 64 2.772589 0.000000 266 +septemb 0 65 2.772589 0.000000 274 +function 0 62 2.772589 0.000000 275 +januari 0 62 2.772589 0.000000 264 +type 1 61 2.833213 2.833213 296 +content 0 59 2.833213 0.000000 302 +juli 0 60 2.833213 0.000000 305 +publish 1 57 2.890372 2.890372 326 +direct 1 57 2.890372 2.890372 316 +faculti 0 56 2.890372 0.000000 325 +thesi 0 57 2.890372 0.000000 327 +tabl 0 51 2.995732 0.000000 346 +standard 1 48 3.044522 3.044522 365 +principl 1 48 3.044522 3.044522 357 +mark 0 44 3.135494 0.000000 403 +late 0 40 3.258097 0.000000 439 +annual 0 40 3.258097 0.000000 458 +bibliographi 0 34 3.401197 0.000000 518 +extend 1 32 3.465736 3.465736 539 +ad 0 32 3.465736 0.000000 544 +robert 1 30 3.555348 3.555348 567 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +semant 1 29 3.583519 3.583519 587 +focus 0 29 3.583519 0.000000 584 +platform 0 29 3.583519 0.000000 591 +proc 1 26 3.688879 3.688879 649 +primari 0 25 3.737670 0.000000 669 +greg 2 24 3.761200 7.522400 695 +interpret 0 24 3.761200 0.000000 686 +store 0 24 3.761200 0.000000 693 +thread 0 23 3.806662 0.000000 722 +cooper 0 22 3.850148 0.000000 757 +portabl 1 20 3.951244 3.951244 819 +andrew 1 19 4.007333 4.007333 849 +particularli 0 19 4.007333 0.000000 867 +eric 0 19 4.007333 0.000000 870 +concentr 0 18 4.060443 0.000000 906 +less 0 18 4.060443 0.000000 892 +partial 0 18 4.060443 0.000000 900 +fourth 0 16 4.174387 0.000000 999 +diego 0 16 4.174387 0.000000 992 +princeton 0 15 4.248495 0.000000 1042 +francisco 0 14 4.317488 0.000000 1095 +sigplan 1 13 4.382027 4.382027 1190 +conf 0 13 4.382027 0.000000 1181 +mellon 0 13 4.382027 0.000000 1179 +onth 0 12 4.465908 0.000000 1218 +carnegi 0 12 4.465908 0.000000 1260 +faster 0 11 4.553877 0.000000 1323 +refin 0 11 4.553877 0.000000 1363 +road 0 11 4.553877 0.000000 1374 +cheng 1 10 4.653960 4.653960 1381 +interestsmi 0 10 4.653960 0.000000 1462 +operatingsystem 0 10 4.653960 0.000000 1401 +bring 0 10 4.653960 0.000000 1430 +jersei 1 9 4.753590 4.753590 1587 +lock 0 9 4.753590 0.000000 1551 +closur 0 8 4.875197 0.000000 1643 +convers 0 8 4.875197 0.000000 1673 +andcomput 0 8 4.875197 0.000000 1623 +leon 0 8 4.875197 0.000000 1631 +hack 0 7 5.010635 0.000000 1950 +bit 0 7 5.010635 0.000000 1833 +gzip 0 6 5.164786 0.000000 2117 +morrisett 2 5 5.347108 10.694216 2263 +interestedin 0 5 5.347108 0.000000 2260 +consum 0 5 5.347108 0.000000 2334 +optimist 0 5 5.347108 0.000000 2501 +gregori 1 4 5.568345 5.568345 2928 +polymorph 1 4 5.568345 5.568345 2627 +kept 0 4 5.568345 0.000000 2762 +stone 1 3 5.857933 5.857933 3674 +informationresearch 0 3 5.857933 0.000000 3675 +teachingc 0 3 5.857933 0.000000 3614 +denmark 0 3 5.857933 0.000000 3676 +warren 0 3 5.857933 0.000000 3301 +harper 1 2 6.263398 6.263398 5141 +multiprocess 1 2 6.263398 6.263398 5142 +intereststeachingselect 0 2 6.263398 0.000000 4924 +linksperson 0 2 6.263398 0.000000 5143 +herlihi 0 2 6.263398 0.000000 5144 +copenhagen 0 2 6.263398 0.000000 5145 +tarditi 1 1 6.957497 6.957497 10653 +tolmach 1 1 6.957497 6.957497 10654 +papersrel 0 1 6.957497 0.000000 10655 +ofadvanc 0 1 6.957497 0.000000 10656 +forbuild 0 1 6.957497 0.000000 10657 +safelanguag 0 1 6.957497 0.000000 10658 +toolsfrom 0 1 6.957497 0.000000 10659 +systemssoftwar 0 1 6.957497 0.000000 10660 +paperssemant 0 1 6.957497 0.000000 10661 +safetythrough 0 1 6.957497 0.000000 10662 +yasuhiko 0 1 6.957497 0.000000 10663 +minamid 0 1 6.957497 0.000000 10664 +matthia 0 1 6.957497 0.000000 10665 +felleisen 0 1 6.957497 0.000000 10666 +reportcmu 0 1 6.957497 0.000000 10667 +notecmu 0 1 6.957497 0.000000 10668 +intensionaltyp 0 1 6.957497 0.000000 10669 +parallelizationgreg 0 1 6.957497 0.000000 10670 +mauric 0 1 6.957497 0.000000 10671 +scienceperson 0 1 6.957497 0.000000 10672 +informationhom 0 1 6.957497 0.000000 10673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..51d88e87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +address 1 170 1.791759 1.791759 62 +resum 0 79 2.564949 0.000000 217 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +taiwan 0 16 4.174387 0.000000 1006 +avenu 0 12 4.465908 0.000000 1277 +mapl 0 11 4.553877 0.000000 1376 +perman 0 11 4.553877 0.000000 1372 +taipei 0 4 5.568345 0.000000 2926 +shing 0 2 6.263398 0.000000 5146 +jiun 1 1 6.957497 6.957497 10674 +jhlin 0 1 6.957497 0.000000 10675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..d7707cfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +homepag 0 93 2.397895 0.000000 148 +jerri 1 3 5.857933 5.857933 3445 +edujerri 0 1 6.957497 0.000000 10676 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..ca1d9aa0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +phone 1 175 1.791759 1.791759 45 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +internet 1 83 2.484907 2.484907 186 +educ 0 86 2.484907 0.000000 191 +school 0 84 2.484907 0.000000 188 +ieee 0 86 2.484907 0.000000 190 +state 0 76 2.564949 0.000000 207 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +upson 0 71 2.639057 0.000000 218 +degre 0 69 2.708050 0.000000 259 +window 0 68 2.708050 0.000000 242 +new 1 64 2.772589 2.772589 262 +foundat 0 62 2.772589 0.000000 286 +guid 0 63 2.772589 0.000000 267 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +cool 0 49 3.044522 0.000000 374 +directori 0 45 3.135494 0.000000 396 +music 1 42 3.218876 3.218876 436 +york 0 41 3.218876 0.000000 435 +review 0 42 3.218876 0.000000 425 +futur 0 41 3.218876 0.000000 427 +societi 0 40 3.258097 0.000000 456 +china 2 37 3.332205 6.664410 487 +award 0 34 3.401197 0.000000 523 +chines 2 29 3.583519 7.167038 595 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +magazin 1 24 3.761200 3.761200 704 +daili 0 24 3.761200 0.000000 706 +yahoo 0 24 3.761200 0.000000 707 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +wang 0 21 3.912023 0.000000 790 +beij 0 19 4.007333 0.000000 876 +histori 0 19 4.007333 0.000000 853 +taiwan 1 16 4.174387 4.174387 1006 +transfer 0 16 4.174387 0.000000 967 +rank 1 14 4.317488 4.317488 1063 +incomput 0 14 4.317488 0.000000 1096 +entertain 0 12 4.465908 0.000000 1286 +tour 0 11 4.553877 0.000000 1307 +america 0 11 4.553877 0.000000 1370 +fellowship 1 10 4.653960 4.653960 1460 +sister 1 9 4.753590 4.753590 1524 +film 0 8 4.875197 0.000000 1761 +tourist 0 8 4.875197 0.000000 1710 +digest 1 7 5.010635 5.010635 1864 +cultur 0 7 5.010635 0.000000 1951 +monei 0 7 5.010635 0.000000 1934 +peterson 0 7 5.010635 0.000000 1850 +christian 0 7 5.010635 0.000000 1949 +scholar 0 6 5.164786 0.000000 2180 +forum 0 6 5.164786 0.000000 2027 +postcard 0 6 5.164786 0.000000 2181 +brook 0 6 5.164786 0.000000 2152 +suni 1 5 5.347108 5.347108 2452 +ucla 0 5 5.347108 0.000000 2502 +hallithaca 0 4 5.568345 0.000000 2894 +binghamton 1 3 5.857933 5.857933 3544 +twin 1 3 5.857933 5.857933 3657 +hongkong 0 3 5.857933 0.000000 3677 +stamp 0 3 5.857933 0.000000 3678 +ryan 0 3 5.857933 0.000000 3679 +tian 0 3 5.857933 0.000000 3680 +stoni 0 3 5.857933 0.000000 3571 +nankai 0 2 6.263398 0.000000 5147 +tianjin 0 2 6.263398 0.000000 5148 +barri 0 2 6.263398 0.000000 5149 +sciencefound 0 2 6.263398 0.000000 5150 +chinaand 0 2 6.263398 0.000000 5151 +sceneri 0 2 6.263398 0.000000 5152 +sheng 0 2 6.263398 0.000000 5153 +liber 0 2 6.263398 0.000000 5154 +wangphd 0 1 6.957497 0.000000 10677 +jiawang 0 1 6.957497 0.000000 10678 +goldwat 0 1 6.957497 0.000000 10679 +cbnet 0 1 6.957497 0.000000 10680 +chinanet 0 1 6.957497 0.000000 10681 +chinesecalendar 0 1 6.957497 0.000000 10682 +mediainform 0 1 6.957497 0.000000 10683 +hongkonglaserdisccent 0 1 6.957497 0.000000 10684 +internetdistribut 0 1 6.957497 0.000000 10685 +multilingu 0 1 6.957497 0.000000 10686 +smovieplex 0 1 6.957497 0.000000 10687 +diwww 0 1 6.957497 0.000000 10688 +thesenior 0 1 6.957497 0.000000 10689 +worldmap 0 1 6.957497 0.000000 10690 +mandarin 0 1 6.957497 0.000000 10691 +cssa 0 1 6.957497 0.000000 10692 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..cdbb30b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +process 0 142 1.945910 0.000000 72 +machin 0 129 2.079442 0.000000 95 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +mani 1 92 2.397895 2.397895 150 +imag 0 91 2.397895 0.000000 161 +level 0 87 2.484907 0.000000 180 +info 0 85 2.484907 0.000000 176 +master 0 76 2.564949 0.000000 216 +prof 0 64 2.772589 0.000000 273 +colleg 0 61 2.833213 0.000000 300 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +long 0 43 3.178054 0.000000 413 +vision 1 41 3.218876 3.218876 430 +robot 0 36 3.367296 0.000000 497 +primari 0 25 3.737670 0.000000 669 +particularli 0 19 4.007333 0.000000 867 +miller 1 17 4.110874 4.110874 949 +night 0 11 4.553877 0.000000 1319 +justin 1 7 5.010635 5.010635 1789 +uniform 0 7 5.010635 0.000000 1845 +ramin 0 7 5.010635 0.000000 1820 +zabih 0 6 5.164786 0.000000 2138 +csrvl 0 3 5.857933 0.000000 3543 +navi 0 2 6.263398 0.000000 5155 +com 0 2 6.263398 0.000000 5156 +ofengin 0 1 6.957497 0.000000 10693 +assistantwork 0 1 6.957497 0.000000 10694 +ismachin 0 1 6.957497 0.000000 10695 +informationsom 0 1 6.957497 0.000000 10696 +rant 0 1 6.957497 0.000000 10697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..001d2214 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +high 1 130 2.079442 2.079442 101 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +school 1 84 2.484907 2.484907 188 +west 0 83 2.484907 0.000000 192 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +free 0 73 2.639057 0.000000 224 +receiv 0 66 2.708050 0.000000 244 +practic 0 70 2.708050 0.000000 246 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +new 1 64 2.772589 2.772589 262 +creat 0 63 2.772589 0.000000 277 +street 0 63 2.772589 0.000000 293 +januari 0 62 2.772589 0.000000 264 +cool 1 49 3.044522 3.044522 374 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +seminar 0 38 3.295837 0.000000 470 +formal 0 37 3.332205 0.000000 478 +game 1 36 3.367296 3.367296 498 +product 0 33 3.433987 0.000000 527 +campu 0 27 3.637586 0.000000 623 +jeff 0 25 3.737670 0.000000 673 +daili 0 24 3.761200 0.000000 706 +divis 0 21 3.912023 0.000000 803 +mpeg 1 20 3.951244 3.951244 831 +anyon 0 17 4.110874 0.000000 916 +intel 0 16 4.174387 0.000000 1000 +practicum 0 16 4.174387 0.000000 960 +jose 0 16 4.174387 0.000000 976 +francisco 0 14 4.317488 0.000000 1095 +went 0 12 4.465908 0.000000 1279 +entertain 0 12 4.465908 0.000000 1286 +newspap 0 12 4.465908 0.000000 1280 +systemsc 0 11 4.553877 0.000000 1293 +mapl 0 11 4.553877 0.000000 1376 +purdu 0 10 4.653960 0.000000 1466 +sundai 0 10 4.653960 0.000000 1387 +leader 0 9 4.753590 0.000000 1576 +sister 0 9 4.753590 0.000000 1524 +portland 0 7 5.010635 0.000000 1878 +chronicl 0 7 5.010635 0.000000 1952 +indiana 0 6 5.164786 0.000000 2057 +oregon 0 5 5.347108 0.000000 2437 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +encod 1 4 5.568345 5.568345 2929 +dalla 0 4 5.568345 0.000000 2930 +classesc 0 3 5.857933 0.000000 3681 +detroit 0 3 5.857933 0.000000 3565 +counti 0 3 5.857933 0.000000 3682 +cornellopoli 0 2 6.263398 0.000000 5157 +techniquec 0 2 6.263398 0.000000 5158 +methodsc 0 2 6.263398 0.000000 5159 +colloquiumc 0 2 6.263398 0.000000 5160 +magazinepc 0 2 6.263398 0.000000 5161 +morn 0 2 6.263398 0.000000 5162 +orang 0 2 6.263398 0.000000 5163 +herald 0 2 6.263398 0.000000 4789 +hillsboro 1 1 6.957497 6.957497 10698 +moorejeff 0 1 6.957497 0.000000 10699 +moorewel 0 1 6.957497 0.000000 10700 +mastersof 0 1 6.957497 0.000000 10701 +lafayett 0 1 6.957497 0.000000 10702 +suburb 0 1 6.957497 0.000000 10703 +employmentmi 0 1 6.957497 0.000000 10704 +classesnba 0 1 6.957497 0.000000 10705 +sectorc 0 1 6.957497 0.000000 10706 +researchfal 0 1 6.957497 0.000000 10707 +paperc 0 1 6.957497 0.000000 10708 +opendoc 0 1 6.957497 0.000000 10709 +mfcoptim 0 1 6.957497 0.000000 10710 +researchsoftwar 0 1 6.957497 0.000000 10711 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 0 1 6.957497 0.000000 10712 +companiesintelsilicon 0 1 6.957497 0.000000 10713 +graphicsibmsunapplemagazinespc 0 1 6.957497 0.000000 10714 +weekpc 0 1 6.957497 0.000000 10715 +computingcomput 0 1 6.957497 0.000000 10716 +shopperwindow 0 1 6.957497 0.000000 10717 +sourcescomput 0 1 6.957497 0.000000 10718 +lifemacusermacweekinteract 0 1 6.957497 0.000000 10719 +weekfamili 0 1 6.957497 0.000000 10720 +pccomput 0 1 6.957497 0.000000 10721 +worldelectron 0 1 6.957497 0.000000 10722 +newspapersusa 0 1 6.957497 0.000000 10723 +todaywal 0 1 6.957497 0.000000 10724 +journalnew 0 1 6.957497 0.000000 10725 +timesphiladelphia 0 1 6.957497 0.000000 10726 +onlineth 0 1 6.957497 0.000000 10727 +worldwideth 0 1 6.957497 0.000000 10728 +opinionsth 0 1 6.957497 0.000000 10729 +gopherth 0 1 6.957497 0.000000 10730 +knoxvil 0 1 6.957497 0.000000 10731 +sentinelth 0 1 6.957497 0.000000 10732 +onlinelat 0 1 6.957497 0.000000 10733 +serviceth 0 1 6.957497 0.000000 10734 +nugget 0 1 6.957497 0.000000 10735 +oregonrworld 0 1 6.957497 0.000000 10736 +registerth 0 1 6.957497 0.000000 10737 +examinersan 0 1 6.957497 0.000000 10738 +mercuryth 0 1 6.957497 0.000000 10739 +timesnando 0 1 6.957497 0.000000 10740 +netusa 0 1 6.957497 0.000000 10741 +todayboston 0 1 6.957497 0.000000 10742 +globeportland 0 1 6.957497 0.000000 10743 +telegramvisitor 0 1 6.957497 0.000000 10744 +fdithaca 0 1 6.957497 0.000000 10745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..6fb6e306 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +distribut 1 162 1.791759 1.791759 51 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +click 0 142 1.945910 0.000000 78 +report 0 131 2.079442 0.000000 92 +number 0 130 2.079442 0.000000 97 +code 0 108 2.197225 0.000000 116 +text 0 98 2.302585 0.000000 133 +memori 0 101 2.302585 0.000000 139 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +resum 0 79 2.564949 0.000000 217 +exampl 0 77 2.564949 0.000000 195 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +view 1 70 2.708050 2.708050 254 +multimedia 0 68 2.708050 0.000000 258 +simul 0 66 2.708050 0.000000 255 +virtual 0 62 2.772589 0.000000 285 +browser 1 56 2.890372 2.890372 313 +direct 0 57 2.890372 0.000000 316 +friend 0 48 3.044522 0.000000 376 +visitor 0 49 3.044522 0.000000 371 +video 1 44 3.135494 3.135494 405 +better 0 45 3.135494 0.000000 401 +movi 0 40 3.258097 0.000000 459 +vita 0 38 3.295837 0.000000 473 +game 0 36 3.367296 0.000000 498 +curriculum 0 33 3.433987 0.000000 535 +anim 1 31 3.496508 3.496508 557 +enabl 1 26 3.688879 3.688879 655 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +jose 0 16 4.174387 0.000000 976 +sign 0 16 4.174387 0.000000 970 +hobbi 0 16 4.174387 0.000000 1009 +transit 0 15 4.248495 0.000000 1046 +avenu 0 12 4.465908 0.000000 1277 +meng 0 12 4.465908 0.000000 1214 +clock 1 11 4.553877 4.553877 1320 +mapl 0 11 4.553877 0.000000 1376 +bill 0 11 4.553877 0.000000 1297 +rivl 0 8 4.875197 0.000000 1632 +autonom 0 8 4.875197 0.000000 1749 +vehicl 0 7 5.010635 0.000000 1928 +courtesi 0 7 5.010635 0.000000 1953 +photographi 0 6 5.164786 0.000000 2146 +recruit 0 6 5.164786 0.000000 2145 +hoca 0 5 5.347108 0.000000 2241 +multitask 0 4 5.568345 0.000000 2803 +crazi 0 4 5.568345 0.000000 2822 +fernandez 0 3 5.857933 0.000000 3591 +lui 1 2 6.263398 6.263398 5164 +joselui 0 2 6.263398 0.000000 4965 +pyramania 0 2 6.263398 0.000000 4957 +actor 0 2 6.263398 0.000000 4240 +pagejos 0 1 6.957497 0.000000 10746 +fernandezjos 0 1 6.957497 0.000000 10747 +fernandezmast 0 1 6.957497 0.000000 10748 +ebithaca 0 1 6.957497 0.000000 10749 +scroll 0 1 6.957497 0.000000 10750 +presentationc 0 1 6.957497 0.000000 10751 +spaceship 0 1 6.957497 0.000000 10752 +battl 0 1 6.957497 0.000000 10753 +picturesmusiccomputerswrit 0 1 6.957497 0.000000 10754 +giel 0 1 6.957497 0.000000 10755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..ee1f15c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +distribut 0 162 1.791759 0.000000 51 +click 0 142 1.945910 0.000000 78 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +simul 0 66 2.708050 0.000000 255 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +administr 0 27 3.637586 0.000000 628 +busi 0 21 3.912023 0.000000 784 +johnson 0 13 4.382027 0.000000 1162 +linda 0 10 4.653960 0.000000 1394 +autonom 0 8 4.875197 0.000000 1749 +vehicl 0 7 5.010635 0.000000 1928 +hurtado 1 1 6.957497 6.957497 10756 +julin 0 1 6.957497 0.000000 10757 +pagejulin 0 1 6.957497 0.000000 10758 +universitymast 0 1 6.957497 0.000000 10759 +managementmast 0 1 6.957497 0.000000 10760 +science 0 1 6.957497 0.000000 10761 +colombia 0 1 6.957497 0.000000 10762 +er 0 1 6.957497 0.000000 10763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..98432308 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +group 0 183 1.609438 0.000000 36 +texa 1 160 1.791759 1.791759 64 +welcom 0 122 2.079442 0.000000 99 +send 0 114 2.197225 0.000000 109 +thing 0 84 2.484907 0.000000 189 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +august 0 66 2.708050 0.000000 257 +septemb 0 65 2.772589 0.000000 274 +favorit 0 44 3.135494 0.000000 410 +join 0 39 3.258097 0.000000 457 +ad 0 32 3.465736 0.000000 544 +instrument 0 7 5.010635 0.000000 1954 +edumi 0 6 5.164786 0.000000 2132 +dalla 0 4 5.568345 0.000000 2930 +janeen 0 1 6.957497 0.000000 10764 +homepagejaneen 0 1 6.957497 0.000000 10765 +reich 0 1 6.957497 0.000000 10766 +jreich 0 1 6.957497 0.000000 10767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..f9a9c9bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +list 1 201 1.609438 1.609438 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +high 1 130 2.079442 2.079442 101 +machin 0 129 2.079442 0.000000 95 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +master 1 76 2.564949 2.564949 216 +come 1 78 2.564949 2.564949 202 +resum 0 79 2.564949 0.000000 217 +dynam 0 76 2.564949 0.000000 194 +receiv 0 66 2.708050 0.000000 244 +multimedia 0 68 2.708050 0.000000 258 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +septemb 1 65 2.772589 2.772589 274 +januari 0 62 2.772589 0.000000 264 +visit 0 63 2.772589 0.000000 288 +still 0 50 3.044522 0.000000 362 +get 0 46 3.091042 0.000000 380 +cost 0 37 3.332205 0.000000 480 +synchron 0 29 3.583519 0.000000 588 +particip 0 29 3.583519 0.000000 589 +although 0 25 3.737670 0.000000 667 +recognit 0 23 3.806662 0.000000 723 +chip 0 21 3.912023 0.000000 770 +portabl 0 20 3.951244 0.000000 819 +speed 0 18 4.060443 0.000000 911 +stock 1 16 4.174387 4.174387 1007 +massachusett 0 14 4.317488 0.000000 1118 +bodi 1 13 4.382027 4.382027 1178 +meng 0 12 4.465908 0.000000 1214 +speech 0 12 4.465908 0.000000 1222 +grant 0 12 4.465908 0.000000 1216 +systemsc 0 11 4.553877 0.000000 1293 +desktop 0 10 4.653960 0.000000 1445 +capac 0 8 4.875197 0.000000 1740 +filter 0 8 4.875197 0.000000 1641 +mile 0 8 4.875197 0.000000 1743 +ground 0 7 5.010635 0.000000 1955 +facial 1 5 5.347108 5.347108 2438 +amherst 0 5 5.347108 0.000000 2484 +thrive 0 5 5.347108 0.000000 2257 +stage 0 5 5.347108 0.000000 2488 +steer 0 5 5.347108 0.000000 2328 +car 1 4 5.568345 5.568345 2931 +ford 1 4 5.568345 5.568345 2636 +sold 0 4 5.568345 0.000000 2813 +exhaust 0 4 5.568345 0.000000 2825 +gear 0 4 5.568345 0.000000 2891 +visionc 0 3 5.857933 0.000000 3489 +obvious 0 3 5.857933 0.000000 3474 +memberof 0 3 5.857933 0.000000 3169 +bought 0 2 6.263398 0.000000 5165 +accel 0 2 6.263398 0.000000 5166 +plug 0 2 6.263398 0.000000 5167 +camaro 1 1 6.957497 6.957497 10768 +chevi 1 1 6.957497 6.957497 10769 +jodi 0 1 6.957497 0.000000 10770 +shapirojodi 0 1 6.957497 0.000000 10771 +shapiroeduc 0 1 6.957497 0.000000 10772 +engineeringe 0 1 6.957497 0.000000 10773 +telecommunicationc 0 1 6.957497 0.000000 10774 +researchspr 0 1 6.957497 0.000000 10775 +systemse 0 1 6.957497 0.000000 10776 +networksnba 0 1 6.957497 0.000000 10777 +revolutionc 0 1 6.957497 0.000000 10778 +researchma 0 1 6.957497 0.000000 10779 +automot 0 1 6.957497 0.000000 10780 +engineeringinterest 0 1 6.957497 0.000000 10781 +animationlow 0 1 6.957497 0.000000 10782 +videoconferenc 0 1 6.957497 0.000000 10783 +recognitioninterest 0 1 6.957497 0.000000 10784 +firebird 0 1 6.957497 0.000000 10785 +yourselfelectron 0 1 6.957497 0.000000 10786 +fuel 0 1 6.957497 0.000000 10787 +inject 0 1 6.957497 0.000000 10788 +alwayshav 0 1 6.957497 0.000000 10789 +designingan 0 1 6.957497 0.000000 10790 +pageefi 0 1 6.957497 0.000000 10791 +pagethes 0 1 6.957497 0.000000 10792 +gearsmodif 0 1 6.957497 0.000000 10793 +hypertech 0 1 6.957497 0.000000 10794 +flowmast 0 1 6.957497 0.000000 10795 +hurst 0 1 6.957497 0.000000 10796 +shifter 0 1 6.957497 0.000000 10797 +wheel 0 1 6.957497 0.000000 10798 +mustang 0 1 6.957497 0.000000 10799 +speedmodif 0 1 6.957497 0.000000 10800 +motorsport 0 1 6.957497 0.000000 10801 +wiresbest 0 1 6.957497 0.000000 10802 +mphbest 0 1 6.957497 0.000000 10803 +pagenumb 0 1 6.957497 0.000000 10804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..1c05eca4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +cornel 0 215 1.386294 0.000000 23 +continu 0 39 3.258097 0.000000 448 +eduto 0 7 5.010635 0.000000 1956 +julia 0 2 6.263398 0.000000 5094 +pagejulia 0 1 6.957497 0.000000 10805 +komissarchik 0 1 6.957497 0.000000 10806 +juliak 0 1 6.957497 0.000000 10807 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..d458d65c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +high 0 130 2.079442 0.000000 101 +theori 1 111 2.197225 2.197225 127 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +techniqu 1 99 2.302585 2.302585 138 +need 0 98 2.302585 0.000000 135 +center 1 88 2.397895 2.397895 158 +pictur 0 89 2.397895 0.000000 160 +search 0 95 2.397895 0.000000 155 +wide 0 84 2.484907 0.000000 185 +master 0 76 2.564949 0.000000 216 +complet 0 77 2.564949 0.000000 208 +server 0 76 2.564949 0.000000 204 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +addit 0 74 2.639057 0.000000 228 +ithaca 0 65 2.772589 0.000000 294 +hardwar 0 51 2.995732 0.000000 350 +pointer 0 48 3.044522 0.000000 368 +video 1 44 3.135494 3.135494 405 +better 0 45 3.135494 0.000000 401 +describ 0 45 3.135494 0.000000 400 +fast 1 42 3.218876 3.218876 429 +http 0 41 3.218876 0.000000 420 +realli 0 40 3.258097 0.000000 444 +workstat 1 37 3.332205 3.332205 479 +global 0 34 3.401197 0.000000 520 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +independ 0 32 3.465736 0.000000 548 +produc 0 30 3.555348 0.000000 572 +platform 0 29 3.583519 0.000000 591 +full 1 28 3.610918 3.610918 615 +becom 0 28 3.610918 0.000000 603 +cluster 0 28 3.610918 0.000000 612 +campu 0 27 3.637586 0.000000 623 +administr 0 27 3.637586 0.000000 628 +compress 0 23 3.806662 0.000000 719 +color 0 22 3.850148 0.000000 762 +toolkit 0 20 3.951244 0.000000 835 +increas 0 20 3.951244 0.000000 829 +commerci 0 16 4.174387 0.000000 1005 +critic 0 16 4.174387 0.000000 982 +topolog 0 14 4.317488 0.000000 1089 +demand 0 14 4.317488 0.000000 1073 +horu 0 14 4.317488 0.000000 1116 +achiev 0 14 4.317488 0.000000 1088 +grow 1 12 4.465908 4.465908 1209 +faster 0 11 4.553877 0.000000 1323 +screen 0 9 4.753590 0.000000 1577 +hallcornel 0 8 4.875197 0.000000 1757 +capit 0 7 5.010635 0.000000 1957 +thegoal 0 6 5.164786 0.000000 2033 +sparcstat 0 5 5.347108 0.000000 2406 +fulfil 0 4 5.568345 0.000000 2932 +innov 0 4 5.568345 0.000000 2933 +emilio 0 3 5.857933 0.000000 3683 +summit 0 3 5.857933 0.000000 3684 +adress 0 2 6.263398 0.000000 5168 +occup 0 2 6.263398 0.000000 5169 +fulltim 0 2 6.263398 0.000000 5170 +ethernet 0 2 6.263398 0.000000 5171 +blast 0 2 6.263398 0.000000 5172 +julian 1 1 6.957497 6.957497 10808 +pelenur 1 1 6.957497 6.957497 10809 +centerithaca 0 1 6.957497 0.000000 10810 +wfinger 0 1 6.957497 0.000000 10811 +cyberserv 0 1 6.957497 0.000000 10812 +httpserver 0 1 6.957497 0.000000 10813 +prvf 0 1 6.957497 0.000000 10814 +poss 0 1 6.957497 0.000000 10815 +screenmot 0 1 6.957497 0.000000 10816 +showthat 0 1 6.957497 0.000000 10817 +snarf 0 1 6.957497 0.000000 10818 +transferwith 0 1 6.957497 0.000000 10819 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..e874a7ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +relat 0 139 1.945910 0.000000 68 +perform 0 143 1.945910 0.000000 74 +tool 0 117 2.079442 0.000000 93 +schedul 0 119 2.079442 0.000000 85 +theori 1 111 2.197225 2.197225 127 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +technic 0 100 2.302585 0.000000 140 +take 0 97 2.302585 0.000000 134 +center 0 88 2.397895 0.000000 158 +resourc 1 81 2.484907 2.484907 172 +info 0 85 2.484907 0.000000 176 +want 0 79 2.564949 0.000000 199 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +practic 0 70 2.708050 0.000000 246 +abstract 0 62 2.772589 0.000000 276 +type 1 61 2.833213 2.833213 296 +back 0 60 2.833213 0.000000 297 +publish 0 57 2.890372 0.000000 326 +overview 0 56 2.890372 0.000000 323 +talk 0 53 2.944439 0.000000 336 +done 0 47 3.091042 0.000000 381 +slide 1 38 3.295837 3.295837 467 +seminar 0 38 3.295837 0.000000 470 +especi 0 36 3.367296 0.000000 496 +bibliographi 0 34 3.401197 0.000000 518 +art 1 29 3.583519 3.583519 593 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +higher 0 24 3.761200 0.000000 690 +sequenc 0 23 3.806662 0.000000 734 +fine 1 20 3.951244 3.951244 822 +supervis 0 20 3.951244 0.000000 840 +verif 0 20 3.951244 0.000000 826 +statu 0 18 4.060443 0.000000 885 +pretti 0 13 4.382027 0.000000 1191 +forth 0 13 4.382027 0.000000 1186 +nuprl 1 10 4.653960 4.653960 1402 +mainli 0 10 4.653960 0.000000 1432 +equip 0 10 4.653960 0.000000 1459 +hockei 1 8 4.875197 4.875197 1760 +forum 0 6 5.164786 0.000000 2027 +czar 1 5 5.347108 5.347108 2503 +hickei 0 4 5.568345 0.000000 2845 +identif 0 4 5.568345 0.000000 2773 +jason 0 3 5.857933 0.000000 3389 +orth 0 3 5.857933 0.000000 3685 +backcountri 0 3 5.857933 0.000000 3686 +publicli 0 3 5.857933 0.000000 3687 +theatr 1 2 6.263398 6.263398 5173 +universitydepart 0 2 6.263398 0.000000 4871 +bellcor 0 2 6.263398 0.000000 5174 +robertconst 0 1 6.957497 0.000000 10820 +thefox 0 1 6.957497 0.000000 10821 +markleon 0 1 6.957497 0.000000 10822 +formalsystem 0 1 6.957497 0.000000 10823 +levelmodul 0 1 6.957497 0.000000 10824 +cornella 0 1 6.957497 0.000000 10825 +publishedat 0 1 6.957497 0.000000 10826 +havegiven 0 1 6.957497 0.000000 10827 +theygiv 0 1 6.957497 0.000000 10828 +galleryof 0 1 6.957497 0.000000 10829 +tryth 0 1 6.957497 0.000000 10830 +fineart 0 1 6.957497 0.000000 10831 +cucshockei 0 1 6.957497 0.000000 10832 +thebackcountri 0 1 6.957497 0.000000 10833 +maintainedsoftwar 0 1 6.957497 0.000000 10834 +hockeyfor 0 1 6.957497 0.000000 10835 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..6211b9df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +thing 0 84 2.484907 0.000000 189 +ithaca 0 65 2.772589 0.000000 294 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +go 0 33 3.433987 0.000000 529 +reach 0 24 3.761200 0.000000 688 +accept 0 18 4.060443 0.000000 879 +english 1 15 4.248495 4.248495 1033 +japan 1 8 4.875197 4.875197 1762 +japanes 1 4 5.568345 5.568345 2934 +sell 0 4 5.568345 0.000000 2935 +sold 0 4 5.568345 0.000000 2813 +sale 0 3 5.857933 0.000000 3688 +kamijo 1 1 6.957497 6.957497 10836 +koichi 1 1 6.957497 6.957497 10837 +kamijokoichi 0 1 6.957497 0.000000 10838 +papershometownseduc 0 1 6.957497 0.000000 10839 +experienceskoichi 0 1 6.957497 0.000000 10840 +muriel 0 1 6.957497 0.000000 10841 +kkamijoh 0 1 6.957497 0.000000 10842 +vnet 0 1 6.957497 0.000000 10843 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..b4f76998 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +document 0 121 2.079442 0.000000 89 +confer 0 126 2.079442 0.000000 100 +structur 0 106 2.197225 0.000000 105 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +upson 0 71 2.639057 0.000000 218 +laboratori 1 63 2.772589 2.772589 292 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +previou 0 62 2.772589 0.000000 290 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +investig 0 51 2.995732 0.000000 353 +advisor 0 51 2.995732 0.000000 355 +better 0 45 3.135494 0.000000 401 +video 0 44 3.135494 0.000000 405 +autom 0 41 3.218876 0.000000 434 +vision 0 41 3.218876 0.000000 430 +york 0 41 3.218876 0.000000 435 +announc 0 40 3.258097 0.000000 441 +close 0 38 3.295837 0.000000 465 +field 0 37 3.332205 0.000000 482 +robot 1 36 3.367296 3.367296 497 +next 0 34 3.401197 0.000000 517 +articl 0 33 3.433987 0.000000 530 +collabor 0 32 3.465736 0.000000 543 +anim 0 31 3.496508 0.000000 557 +graph 0 30 3.555348 0.000000 576 +manipul 1 27 3.637586 3.637586 624 +arrai 0 27 3.637586 0.000000 627 +strategi 1 25 3.737670 3.737670 682 +handl 0 24 3.761200 0.000000 685 +magazin 0 24 3.761200 0.000000 704 +higher 0 24 3.761200 0.000000 690 +director 0 22 3.850148 0.000000 767 +navig 0 21 3.912023 0.000000 796 +facil 0 20 3.951244 0.000000 814 +stanford 1 17 4.110874 4.110874 955 +germani 0 17 4.110874 0.000000 946 +devic 0 16 4.174387 0.000000 1002 +vector 0 16 4.174387 0.000000 961 +micro 1 15 4.248495 4.248495 1031 +club 0 15 4.248495 0.000000 1058 +earlier 0 13 4.382027 0.000000 1140 +forc 0 10 4.653960 0.000000 1384 +donald 0 9 4.753590 0.000000 1510 +frank 0 9 4.753590 0.000000 1568 +wall 0 9 4.753590 0.000000 1553 +wire 0 8 4.875197 0.000000 1747 +gate 0 6 5.164786 0.000000 2182 +layout 0 6 5.164786 0.000000 2183 +lloyd 0 6 5.164786 0.000000 2103 +educurr 0 5 5.347108 0.000000 2504 +actuat 0 5 5.347108 0.000000 2442 +climb 0 4 5.568345 0.000000 2936 +karl 1 3 5.857933 5.857933 3623 +bhringer 1 3 5.857933 5.857933 3606 +karlsruh 1 3 5.857933 5.857933 3689 +microfabr 1 3 5.857933 5.857933 3610 +noel 0 3 5.857933 0.000000 3376 +kwon 0 3 5.857933 0.000000 3690 +deeper 0 3 5.857933 0.000000 3146 +friedrich 1 2 6.263398 6.263398 5175 +nanofabr 0 2 6.263398 0.000000 5010 +innew 0 2 6.263398 0.000000 4512 +sculptur 0 2 6.263398 0.000000 5176 +wright 0 2 6.263398 0.000000 5177 +nano 0 2 6.263398 0.000000 4961 +pagekarl 0 2 6.263398 0.000000 5043 +andassembl 1 1 6.957497 6.957497 10844 +dipl 0 1 6.957497 0.000000 10845 +implementmicro 0 1 6.957497 0.000000 10846 +withprogramm 0 1 6.957497 0.000000 10847 +professorbruc 0 1 6.957497 0.000000 10848 +founder 0 1 6.957497 0.000000 10849 +macdonaldand 0 1 6.957497 0.000000 10850 +hisresearch 0 1 6.957497 0.000000 10851 +invis 0 1 6.957497 0.000000 10852 +cantilev 0 1 6.957497 0.000000 10853 +fallingwat 0 1 6.957497 0.000000 10854 +outin 0 1 6.957497 0.000000 10855 +lindseth 0 1 6.957497 0.000000 10856 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..42f6a480 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 1 208 1.609438 1.609438 42 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +implement 0 152 1.791759 0.000000 52 +avail 0 169 1.791759 0.000000 48 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +hall 0 146 1.945910 0.000000 65 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +provid 1 121 2.079442 2.079442 94 +high 0 130 2.079442 0.000000 101 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +mathemat 1 108 2.197225 2.197225 123 +site 1 106 2.197225 2.197225 119 +look 0 107 2.197225 0.000000 115 +make 0 111 2.197225 0.000000 120 +world 0 115 2.197225 0.000000 126 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +user 0 104 2.302585 0.000000 137 +commun 1 95 2.397895 2.397895 157 +call 0 91 2.397895 0.000000 153 +mani 0 92 2.397895 0.000000 150 +wide 1 84 2.484907 2.484907 185 +help 0 83 2.484907 0.000000 175 +control 0 82 2.484907 0.000000 164 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +larg 0 82 2.484907 0.000000 168 +activ 0 84 2.484907 0.000000 182 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +messag 1 76 2.564949 2.564949 212 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +exampl 0 77 2.564949 0.000000 195 +june 0 79 2.564949 0.000000 214 +david 1 71 2.639057 2.639057 232 +html 1 75 2.639057 2.639057 235 +upson 0 71 2.639057 0.000000 218 +logic 0 71 2.639057 0.000000 230 +write 0 72 2.639057 0.000000 222 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +differ 0 66 2.708050 0.000000 253 +ithaca 0 65 2.772589 0.000000 294 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +browser 0 56 2.890372 0.000000 313 +profession 0 51 2.995732 0.000000 345 +give 0 50 3.044522 0.000000 359 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +protocol 2 45 3.135494 6.270988 407 +even 0 45 3.135494 0.000000 393 +execut 0 45 3.135494 0.000000 404 +offer 0 43 3.178054 0.000000 414 +might 0 41 3.218876 0.000000 426 +programm 0 39 3.258097 0.000000 445 +multipl 0 39 3.258097 0.000000 453 +correct 0 38 3.295837 0.000000 462 +field 0 37 3.332205 0.000000 482 +cost 0 37 3.332205 0.000000 480 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +download 0 36 3.367296 0.000000 489 +statist 1 35 3.401197 3.401197 521 +concurr 0 34 3.401197 0.000000 501 +random 0 34 3.401197 0.000000 511 +dissert 0 32 3.465736 0.000000 549 +kind 0 32 3.465736 0.000000 541 +specifi 1 30 3.555348 3.555348 568 +abl 0 30 3.555348 0.000000 566 +depend 0 29 3.583519 0.000000 583 +intend 0 28 3.610918 0.000000 599 +propos 0 28 3.610918 0.000000 602 +becom 0 28 3.610918 0.000000 603 +progress 0 28 3.610918 0.000000 598 +measur 0 28 3.610918 0.000000 609 +load 0 28 3.610918 0.000000 601 +except 0 28 3.610918 0.000000 607 +though 0 27 3.637586 0.000000 622 +consist 1 26 3.688879 3.688879 651 +revis 0 26 3.688879 0.000000 640 +reliabl 0 25 3.737670 0.000000 674 +fundament 0 25 3.737670 0.000000 661 +wai 0 25 3.737670 0.000000 662 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +properti 2 22 3.850148 7.700296 749 +sciencecornel 0 22 3.850148 0.000000 768 +avoid 0 21 3.912023 0.000000 799 +verif 1 20 3.951244 3.951244 826 +applet 1 20 3.951244 3.951244 827 +basi 0 20 3.951244 0.000000 828 +assum 0 19 4.007333 0.000000 845 +hypertext 0 19 4.007333 0.000000 865 +concentr 1 18 4.060443 4.060443 906 +failur 0 18 4.060443 0.000000 898 +behavior 0 18 4.060443 0.000000 881 +encourag 0 18 4.060443 0.000000 880 +layer 2 17 4.110874 8.221748 926 +ultim 0 17 4.110874 0.000000 943 +condit 0 16 4.174387 0.000000 975 +portion 0 16 4.174387 0.000000 971 +action 0 15 4.248495 0.000000 1038 +horu 2 14 4.317488 8.634976 1116 +weak 1 13 4.382027 4.382027 1159 +whose 1 13 4.382027 4.382027 1166 +edui 0 13 4.382027 0.000000 1193 +suit 0 13 4.382027 0.000000 1129 +verifi 1 12 4.465908 4.465908 1261 +kenneth 0 12 4.465908 0.000000 1265 +minor 0 12 4.465908 0.000000 1237 +emploi 0 12 4.465908 0.000000 1284 +calcul 0 12 4.465908 0.000000 1268 +scienceat 0 11 4.553877 0.000000 1375 +reness 0 11 4.553877 0.000000 1333 +host 0 11 4.553877 0.000000 1306 +stack 2 10 4.653960 9.307920 1389 +guarante 1 10 4.653960 4.653960 1391 +certain 0 10 4.653960 0.000000 1393 +equal 0 10 4.653960 0.000000 1424 +desir 1 9 4.753590 4.753590 1542 +robbert 0 9 4.753590 0.000000 1529 +tempor 0 9 4.753590 0.000000 1584 +crash 0 8 4.875197 0.000000 1616 +notion 0 7 5.010635 0.000000 1947 +appar 0 7 5.010635 0.000000 1958 +dedic 0 7 5.010635 0.000000 1843 +hack 0 7 5.010635 0.000000 1950 +furthermor 0 6 5.164786 0.000000 2141 +rough 0 6 5.164786 0.000000 2107 +studentdepart 0 5 5.347108 0.000000 2505 +unnecessari 0 5 5.347108 0.000000 2506 +lost 0 5 5.347108 0.000000 2358 +ofdistribut 0 5 5.347108 0.000000 2316 +notabl 0 5 5.347108 0.000000 2276 +puzzl 0 5 5.347108 0.000000 2507 +disconnect 1 4 5.568345 5.568345 2664 +clearli 0 4 5.568345 0.000000 2590 +formula 0 3 5.857933 0.000000 3405 +omit 0 3 5.857933 0.000000 3466 +confid 0 3 5.857933 0.000000 3691 +temporarili 0 3 5.857933 0.000000 3692 +parallelmachin 0 3 5.857933 0.000000 3693 +lego 0 3 5.857933 0.000000 3188 +prone 1 2 6.263398 6.263398 5178 +thehoru 0 2 6.263398 0.000000 5179 +withprofessor 0 2 6.263398 0.000000 5180 +ofhoru 0 2 6.263398 0.000000 5181 +straightforward 0 2 6.263398 0.000000 4272 +thetop 0 2 6.263398 0.000000 4327 +atyp 0 2 6.263398 0.000000 5042 +sufficientto 0 2 6.263398 0.000000 4261 +haswork 0 2 6.263398 0.000000 5182 +distributedenviron 0 2 6.263398 0.000000 5183 +toi 0 2 6.263398 0.000000 5184 +linksfor 0 2 6.263398 0.000000 5185 +karr 1 1 6.957497 6.957497 10857 +karrdavid 0 1 6.957497 0.000000 10858 +karrphd 0 1 6.957497 0.000000 10859 +birmananddr 0 1 6.957497 0.000000 10860 +protocolsmi 0 1 6.957497 0.000000 10861 +formalspecif 0 1 6.957497 0.000000 10862 +variousinterest 0 1 6.957497 0.000000 10863 +usedin 0 1 6.957497 0.000000 10864 +stylefor 0 1 6.957497 0.000000 10865 +itsinterfac 0 1 6.957497 0.000000 10866 +andbelow 0 1 6.957497 0.000000 10867 +agiven 0 1 6.957497 0.000000 10868 +unusualcombin 0 1 6.957497 0.000000 10869 +systemsshould 0 1 6.957497 0.000000 10870 +constructcustom 0 1 6.957497 0.000000 10871 +theirassoci 0 1 6.957497 0.000000 10872 +thesecur 0 1 6.957497 0.000000 10873 +harden 0 1 6.957497 0.000000 10874 +ofverifi 0 1 6.957497 0.000000 10875 +stem 0 1 6.957497 0.000000 10876 +thepromis 0 1 6.957497 0.000000 10877 +variousguarante 0 1 6.957497 0.000000 10878 +passingenviron 0 1 6.957497 0.000000 10879 +delayedor 0 1 6.957497 0.000000 10880 +componentswer 0 1 6.957497 0.000000 10881 +considerablepromis 0 1 6.957497 0.000000 10882 +consistencywhil 0 1 6.957497 0.000000 10883 +filesin 0 1 6.957497 0.000000 10884 +partitionedinto 0 1 6.957497 0.000000 10885 +wouldallow 0 1 6.957497 0.000000 10886 +performancemi 0 1 6.957497 0.000000 10887 +andeffici 0 1 6.957497 0.000000 10888 +ofsystem 0 1 6.957497 0.000000 10889 +suscept 0 1 6.957497 0.000000 10890 +javath 0 1 6.957497 0.000000 10891 +applicationwith 0 1 6.957497 0.000000 10892 +tonavig 0 1 6.957497 0.000000 10893 +myweb 0 1 6.957497 0.000000 10894 +anetscap 0 1 6.957497 0.000000 10895 +abirthdai 0 1 6.957497 0.000000 10896 +forverifi 0 1 6.957497 0.000000 10897 +affiliationsi 0 1 6.957497 0.000000 10898 +andmaa 0 1 6.957497 0.000000 10899 +informationseemi 0 1 6.957497 0.000000 10900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..6e8d5a3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +click 1 142 1.945910 1.945910 78 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +postscript 1 131 2.079442 2.079442 90 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +code 1 108 2.197225 2.197225 116 +site 0 106 2.197225 0.000000 119 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +graphic 1 90 2.397895 2.397895 147 +present 0 91 2.397895 0.000000 145 +help 0 83 2.484907 0.000000 175 +interfac 1 79 2.564949 2.564949 209 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +multimedia 1 68 2.708050 2.708050 258 +view 1 70 2.708050 2.708050 254 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +simul 0 66 2.708050 0.000000 255 +virtual 1 62 2.772589 2.772589 285 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +scientif 0 53 2.944439 0.000000 341 +cool 1 49 3.044522 3.044522 374 +visual 0 48 3.044522 0.000000 372 +effect 1 46 3.091042 3.091042 385 +california 0 46 3.091042 0.000000 388 +featur 0 46 3.091042 0.000000 386 +favorit 1 44 3.135494 3.135494 410 +video 0 44 3.135494 0.000000 405 +combin 0 42 3.218876 0.000000 421 +music 0 42 3.218876 0.000000 436 +brian 0 38 3.295837 0.000000 466 +open 0 38 3.295837 0.000000 469 +game 1 36 3.367296 3.367296 498 +singl 0 34 3.401197 0.000000 510 +independ 0 32 3.465736 0.000000 548 +platform 0 29 3.583519 0.000000 591 +full 0 28 3.610918 0.000000 615 +enhanc 0 26 3.688879 0.000000 644 +aspect 0 25 3.737670 0.000000 663 +universityithaca 0 24 3.761200 0.000000 710 +motion 0 24 3.761200 0.000000 699 +sciencecornel 0 22 3.850148 0.000000 768 +chip 0 21 3.912023 0.000000 770 +break 1 20 3.951244 3.951244 812 +smith 0 20 3.951244 0.000000 820 +facil 0 20 3.951244 0.000000 814 +mpeg 0 20 3.951244 0.000000 831 +excel 0 19 4.007333 0.000000 868 +lot 1 18 4.060443 4.060443 889 +stand 0 18 4.060443 0.000000 891 +took 1 16 4.174387 4.174387 1010 +diego 0 16 4.174387 0.000000 992 +transit 1 15 4.248495 4.248495 1046 +drive 0 15 4.248495 0.000000 1052 +track 0 15 4.248495 0.000000 1029 +scene 1 14 4.317488 4.317488 1114 +resolut 0 13 4.382027 0.000000 1172 +jonathan 0 13 4.382027 0.000000 1174 +incorpor 0 13 4.382027 0.000000 1163 +entertain 0 12 4.465908 0.000000 1286 +captur 0 12 4.465908 0.000000 1232 +realiti 0 12 4.465908 0.000000 1272 +player 0 11 4.553877 0.000000 1371 +primit 0 11 4.553877 0.000000 1317 +screen 0 9 4.753590 0.000000 1577 +rivl 1 8 4.875197 4.875197 1632 +capac 0 8 4.875197 0.000000 1740 +star 0 8 4.875197 0.000000 1717 +pronounc 0 7 5.010635 0.000000 1918 +vehicl 0 7 5.010635 0.000000 1928 +clip 0 7 5.010635 0.000000 1868 +quick 0 6 5.164786 0.000000 2184 +railroad 0 6 5.164786 0.000000 2161 +hypothet 0 5 5.347108 0.000000 2474 +opengl 0 5 5.347108 0.000000 2299 +screenshot 1 4 5.568345 5.568345 2743 +multitask 0 4 5.568345 0.000000 2803 +enjoy 0 4 5.568345 0.000000 2937 +swartz 0 4 5.568345 0.000000 2878 +engineeringclass 0 3 5.857933 0.000000 3667 +lai 0 3 5.857933 0.000000 3694 +inventor 0 3 5.857933 0.000000 3695 +hodja 0 2 6.263398 0.000000 4972 +fledg 0 2 6.263398 0.000000 4973 +resumesom 0 2 6.263398 0.000000 5186 +hogman 1 1 6.957497 6.957497 10901 +qualcomm 1 1 6.957497 6.957497 10902 +pagekartik 0 1 6.957497 0.000000 10903 +kapadiamast 0 1 6.957497 0.000000 10904 +dabnei 0 1 6.957497 0.000000 10905 +kkapadia 0 1 6.957497 0.000000 10906 +comcurr 0 1 6.957497 0.000000 10907 +incorporatedmi 0 1 6.957497 0.000000 10908 +projectshoca 0 1 6.957497 0.000000 10909 +chiphoca 0 1 6.957497 0.000000 10910 +gameboard 0 1 6.957497 0.000000 10911 +rivlrivl 0 1 6.957497 0.000000 10912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..2d808d97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +high 0 130 2.079442 0.000000 101 +sinc 0 90 2.397895 0.000000 159 +center 0 88 2.397895 0.000000 158 +thing 1 84 2.484907 2.484907 189 +school 0 84 2.484907 0.000000 188 +wide 0 84 2.484907 0.000000 185 +write 1 72 2.639057 2.639057 222 +onlin 0 75 2.639057 0.000000 223 +written 0 63 2.772589 0.000000 278 +back 0 60 2.833213 0.000000 297 +publish 1 57 2.890372 2.890372 326 +think 0 57 2.890372 0.000000 314 +much 0 52 2.995732 0.000000 349 +keep 0 44 3.135494 0.000000 409 +press 0 42 3.218876 0.000000 419 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +go 0 33 3.433987 0.000000 529 +mine 0 26 3.688879 0.000000 654 +rather 0 26 3.688879 0.000000 642 +other 0 24 3.761200 0.000000 697 +wrote 1 20 3.951244 3.951244 830 +wonder 0 20 3.951244 0.000000 815 +letter 1 16 4.174387 4.174387 981 +went 0 12 4.465908 0.000000 1279 +newspap 0 12 4.465908 0.000000 1280 +true 0 10 4.653960 0.000000 1422 +rapid 0 10 4.653960 0.000000 1453 +poetri 0 9 4.753590 0.000000 1596 +mile 0 8 4.875197 0.000000 1743 +conflict 0 6 5.164786 0.000000 2041 +famou 0 6 5.164786 0.000000 2185 +grand 0 5 5.347108 0.000000 2425 +mess 0 4 5.568345 0.000000 2886 +flame 0 3 5.857933 0.000000 3696 +arm 0 3 5.857933 0.000000 3697 +argu 0 3 5.857933 0.000000 3698 +dread 0 3 5.857933 0.000000 3630 +suspect 1 2 6.263398 6.263398 5187 +anyhow 0 2 6.263398 0.000000 5188 +mathematician 0 2 6.263398 0.000000 5189 +terrorist 0 2 6.263398 0.000000 5190 +writingsi 0 1 6.957497 0.000000 10913 +proudof 0 1 6.957497 0.000000 10914 +morethought 0 1 6.957497 0.000000 10915 +conscienti 0 1 6.957497 0.000000 10916 +objector 0 1 6.957497 0.000000 10917 +myfirst 0 1 6.957497 0.000000 10918 +fewyear 0 1 6.957497 0.000000 10919 +gulf 0 1 6.957497 0.000000 10920 +vestart 0 1 6.957497 0.000000 10921 +lest 0 1 6.957497 0.000000 10922 +dprobabl 0 1 6.957497 0.000000 10923 +essayist 0 1 6.957497 0.000000 10924 +byron 0 1 6.957497 0.000000 10925 +asuburb 0 1 6.957497 0.000000 10926 +unabomb 0 1 6.957497 0.000000 10927 +mathematiciansar 0 1 6.957497 0.000000 10928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..01f6a552 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,269 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +hall 1 146 1.945910 1.945910 65 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +file 0 132 1.945910 0.000000 70 +report 1 131 2.079442 2.079442 92 +databas 1 122 2.079442 2.079442 86 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +confer 0 126 2.079442 0.000000 100 +place 1 106 2.197225 2.197225 124 +structur 0 106 2.197225 0.000000 105 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +imag 1 91 2.397895 2.397895 161 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +info 1 85 2.484907 2.484907 176 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +school 1 84 2.484907 2.484907 188 +institut 0 84 2.484907 0.000000 187 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +appear 1 78 2.564949 2.564949 210 +sourc 1 77 2.564949 2.564949 201 +interfac 0 79 2.564949 0.000000 209 +refer 0 78 2.564949 0.000000 203 +html 1 75 2.639057 2.639057 235 +upson 0 71 2.639057 0.000000 218 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +david 0 71 2.639057 0.000000 232 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +ithaca 1 65 2.772589 2.772589 294 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +dept 0 64 2.772589 0.000000 291 +plan 0 65 2.772589 0.000000 272 +new 0 64 2.772589 0.000000 262 +street 0 63 2.772589 0.000000 293 +march 0 61 2.833213 0.000000 295 +simpl 0 60 2.833213 0.000000 298 +content 0 59 2.833213 0.000000 302 +direct 0 57 2.890372 0.000000 316 +explor 0 58 2.890372 0.000000 324 +overview 0 56 2.890372 0.000000 323 +index 0 56 2.890372 0.000000 309 +tabl 0 51 2.995732 0.000000 346 +cool 0 49 3.044522 0.000000 374 +pointer 0 48 3.044522 0.000000 368 +life 0 50 3.044522 0.000000 375 +electron 1 47 3.091042 3.091042 379 +mark 0 44 3.135494 0.000000 403 +netscap 0 44 3.135494 0.000000 395 +music 1 42 3.218876 3.218876 436 +map 0 39 3.258097 0.000000 452 +industri 1 38 3.295837 3.295837 464 +open 0 38 3.295837 0.000000 469 +china 0 37 3.332205 0.000000 487 +global 0 34 3.401197 0.000000 520 +common 0 30 3.555348 0.000000 574 +synchron 0 29 3.583519 0.000000 588 +weather 0 28 3.610918 0.000000 618 +bound 0 26 3.688879 0.000000 659 +reliabl 1 25 3.737670 3.737670 674 +sport 0 25 3.737670 0.000000 683 +other 1 24 3.761200 3.761200 697 +scalabl 0 24 3.761200 0.000000 705 +yahoo 0 24 3.761200 0.000000 707 +flexibl 0 21 3.912023 0.000000 792 +navig 0 21 3.912023 0.000000 796 +kernel 0 20 3.951244 0.000000 825 +binari 0 20 3.951244 0.000000 823 +lyco 0 19 4.007333 0.000000 871 +women 1 16 4.174387 4.174387 1004 +commerci 0 16 4.174387 0.000000 1005 +brief 0 16 4.174387 0.000000 1001 +stock 0 16 4.174387 0.000000 1007 +horu 1 14 4.317488 4.317488 1116 +canada 0 13 4.382027 0.000000 1158 +alan 0 13 4.382027 0.000000 1146 +brad 1 12 4.465908 4.465908 1264 +weight 0 12 4.465908 0.000000 1204 +kenneth 0 12 4.465908 0.000000 1265 +reness 1 11 4.553877 4.553877 1333 +transpar 0 11 4.553877 0.000000 1325 +lake 0 11 4.553877 0.000000 1373 +itali 0 11 4.553877 0.000000 1378 +werner 1 10 4.653960 4.653960 1385 +cook 1 10 4.653960 4.653960 1464 +consortium 0 10 4.653960 0.000000 1467 +mosaic 0 10 4.653960 0.000000 1426 +sosp 0 10 4.653960 0.000000 1416 +mountain 0 10 4.653960 0.000000 1456 +jean 0 10 4.653960 0.000000 1440 +cheng 0 10 4.653960 0.000000 1381 +robbert 1 9 4.753590 4.753590 1529 +birman 1 9 4.753590 4.753590 1531 +wall 1 9 4.753590 4.753590 1553 +light 0 9 4.753590 0.000000 1533 +motorola 0 9 4.753590 0.000000 1546 +vogel 1 8 4.875197 4.875197 1622 +sigop 1 8 4.875197 4.875197 1727 +european 0 8 4.875197 0.000000 1763 +heart 0 8 4.875197 0.000000 1729 +grove 0 8 4.875197 0.000000 1675 +wire 0 8 4.875197 0.000000 1747 +edg 0 8 4.875197 0.000000 1647 +synchroni 0 7 5.010635 0.000000 1923 +gatewai 0 7 5.010635 0.000000 1942 +discoveri 0 7 5.010635 0.000000 1915 +hunt 0 7 5.010635 0.000000 1798 +scout 0 7 5.010635 0.000000 1903 +antonio 0 6 5.164786 0.000000 2186 +symposiumon 0 6 5.164786 0.000000 2054 +alex 0 6 5.164786 0.000000 2130 +theproject 0 6 5.164786 0.000000 1981 +postcard 0 6 5.164786 0.000000 2181 +atlant 0 5 5.347108 0.000000 2508 +advic 0 5 5.347108 0.000000 2509 +colorado 1 4 5.568345 5.568345 2938 +ncsa 1 4 5.568345 5.568345 2767 +ireland 0 4 5.568345 0.000000 2853 +hayden 0 4 5.568345 0.000000 2844 +hickei 0 4 5.568345 0.000000 2845 +vaysburd 0 4 5.568345 0.000000 2846 +insur 0 4 5.568345 0.000000 2939 +glade 1 3 5.857933 5.857933 3537 +fashion 1 3 5.857933 5.857933 3699 +educornel 0 3 5.857933 0.000000 3601 +universitydept 0 3 5.857933 0.000000 3602 +takako 0 3 5.857933 0.000000 3538 +systemscomput 0 3 5.857933 0.000000 3148 +arizona 0 3 5.857933 0.000000 3700 +beginn 0 3 5.857933 0.000000 3330 +hotjava 0 3 5.857933 0.000000 3220 +ftc 0 3 5.857933 0.000000 3275 +elsevi 0 3 5.857933 0.000000 3671 +copper 0 3 5.857933 0.000000 3536 +summit 0 3 5.857933 0.000000 3684 +counti 0 3 5.857933 0.000000 3682 +crew 0 3 5.857933 0.000000 3347 +underground 0 3 5.857933 0.000000 3604 +spider 0 3 5.857933 0.000000 3605 +katherin 1 2 6.263398 6.263398 4851 +ofvirtu 0 2 6.263398 0.000000 5061 +lui 0 2 6.263398 0.000000 5164 +dalia 0 2 6.263398 0.000000 4852 +malki 0 2 6.263398 0.000000 4853 +uiuc 0 2 6.263398 0.000000 4509 +cern 0 2 6.263398 0.000000 5079 +icdc 0 2 6.263398 0.000000 5191 +ucsd 0 2 6.263398 0.000000 5192 +amazon 0 2 6.263398 0.000000 5193 +dessert 0 2 6.263398 0.000000 5194 +intertext 0 2 6.263398 0.000000 5002 +infoth 0 2 6.263398 0.000000 5195 +infodistribut 1 1 6.957497 6.957497 10929 +infocompani 1 1 6.957497 6.957497 10930 +lisboa 1 1 6.957497 6.957497 10931 +httpd 1 1 6.957497 6.957497 10932 +xmosaic 1 1 6.957497 6.957497 10933 +guokguo 0 1 6.957497 0.000000 10934 +multicastprotocol 0 1 6.957497 0.000000 10935 +publicationskatherin 0 1 6.957497 0.000000 10936 +connemara 0 1 6.957497 0.000000 10937 +rodrigu 0 1 6.957497 0.000000 10938 +sargento 0 1 6.957497 0.000000 10939 +paulo 0 1 6.957497 0.000000 10940 +verisimo 0 1 6.957497 0.000000 10941 +niagara 0 1 6.957497 0.000000 10942 +networkscool 0 1 6.957497 0.000000 10943 +toolsbibliographyconferencesjournalsacademia 0 1 6.957497 0.000000 10944 +infoschool 0 1 6.957497 0.000000 10945 +infojob 0 1 6.957497 0.000000 10946 +searchinterest 0 1 6.957497 0.000000 10947 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 0 1 6.957497 0.000000 10948 +inforesearch 0 1 6.957497 0.000000 10949 +systempointershoru 0 1 6.957497 0.000000 10950 +productspringtotemtransisx 0 1 6.957497 0.000000 10951 +microsystemslab 0 1 6.957497 0.000000 10952 +networksmulticast 0 1 6.957497 0.000000 10953 +protocolsn 0 1 6.957497 0.000000 10954 +fromlblgun 0 1 6.957497 0.000000 10955 +sguid 0 1 6.957497 0.000000 10956 +quickrefer 0 1 6.957497 0.000000 10957 +htmldocument 0 1 6.957497 0.000000 10958 +finder 0 1 6.957497 0.000000 10959 +bibliographybibliographi 0 1 6.957497 0.000000 10960 +oldindex 0 1 6.957497 0.000000 10961 +hpdc 0 1 6.957497 0.000000 10962 +srd 0 1 6.957497 0.000000 10963 +jsac 0 1 6.957497 0.000000 10964 +scienceacademia 0 1 6.957497 0.000000 10965 +openingsibmdelltandemtiapplebel 0 1 6.957497 0.000000 10966 +gradschool 0 1 6.957497 0.000000 10967 +gradjob 0 1 6.957497 0.000000 10968 +ukinterest 0 1 6.957497 0.000000 10969 +moviesbailei 0 1 6.957497 0.000000 10970 +concertslibrari 0 1 6.957497 0.000000 10971 +hightechin 0 1 6.957497 0.000000 10972 +inesc 0 1 6.957497 0.000000 10973 +resort 0 1 6.957497 0.000000 10974 +coloradooth 0 1 6.957497 0.000000 10975 +infoart 0 1 6.957497 0.000000 10976 +weblouvreth 0 1 6.957497 0.000000 10977 +linebook 0 1 6.957497 0.000000 10978 +calvinhobb 0 1 6.957497 0.000000 10979 +archivecardsmagicchinaart 0 1 6.957497 0.000000 10980 +gourmetl 0 1 6.957497 0.000000 10981 +cordonbleu 0 1 6.957497 0.000000 10982 +fashional 0 1 6.957497 0.000000 10983 +linksa 0 1 6.957497 0.000000 10984 +cjlutz 0 1 6.957497 0.000000 10985 +wwweb 0 1 6.957497 0.000000 10986 +pagewith 0 1 6.957497 0.000000 10987 +tmexpressfirst 0 1 6.957497 0.000000 10988 +wireirc 0 1 6.957497 0.000000 10989 +faqfashion 0 1 6.957497 0.000000 10990 +nethair 0 1 6.957497 0.000000 10991 +diesel 0 1 6.957497 0.000000 10992 +guessfriend 0 1 6.957497 0.000000 10993 +deng 0 1 6.957497 0.000000 10994 +shiji 0 1 6.957497 0.000000 10995 +edulibrari 0 1 6.957497 0.000000 10996 +congressmagazin 0 1 6.957497 0.000000 10997 +timegeorg 0 1 6.957497 0.000000 10998 +gilder 0 1 6.957497 0.000000 10999 +archivesinanet 0 1 6.957497 0.000000 11000 +newsworld 0 1 6.957497 0.000000 11001 +olymp 0 1 6.957497 0.000000 11002 +streetheadlin 0 1 6.957497 0.000000 11003 +weatherhunt 0 1 6.957497 0.000000 11004 +informationglob 0 1 6.957497 0.000000 11005 +navigatorhom 0 1 6.957497 0.000000 11006 +wanderersand 0 1 6.957497 0.000000 11007 +kguo 0 1 6.957497 0.000000 11008 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..9d9c2a10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +object 0 138 1.945910 0.000000 79 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +learn 0 86 2.484907 0.000000 170 +environ 0 84 2.484907 0.000000 177 +chang 0 82 2.484907 0.000000 163 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +java 1 70 2.708050 2.708050 248 +creat 1 63 2.772589 2.772589 277 +written 0 63 2.772589 0.000000 278 +interact 0 62 2.772589 0.000000 270 +simpl 0 60 2.833213 0.000000 298 +understand 0 47 3.091042 0.000000 384 +form 0 39 3.258097 0.000000 443 +multi 0 36 3.367296 0.000000 493 +power 0 30 3.555348 0.000000 573 +platform 0 29 3.583519 0.000000 591 +input 0 23 3.806662 0.000000 727 +portabl 0 20 3.951244 0.000000 819 +applet 0 20 3.951244 0.000000 827 +safe 0 12 4.465908 0.000000 1274 +polygon 0 8 4.875197 0.000000 1723 +rotat 1 5 5.347108 5.347108 2295 +vertic 0 5 5.347108 0.000000 2270 +cube 0 4 5.568345 0.000000 2940 +introductionthi 0 2 6.263398 0.000000 4056 +tetra 0 2 6.263398 0.000000 5196 +wirefram 1 1 6.957497 6.957497 11009 +desgin 0 1 6.957497 0.000000 11010 +threader 0 1 6.957497 0.000000 11011 +speific 0 1 6.957497 0.000000 11012 +react 0 1 6.957497 0.000000 11013 +cone 0 1 6.957497 0.000000 11014 +cylind 0 1 6.957497 0.000000 11015 +toru 0 1 6.957497 0.000000 11016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..5aa028fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +algorithm 2 162 1.791759 3.583518 57 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +theori 2 111 2.197225 4.394450 127 +assist 0 112 2.197225 0.000000 113 +technic 1 100 2.302585 2.302585 140 +book 0 99 2.302585 0.000000 131 +associ 1 93 2.397895 2.397895 151 +homepag 0 93 2.397895 0.000000 148 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +ieee 1 86 2.484907 2.484907 190 +resourc 1 81 2.484907 2.484907 172 +academ 1 82 2.484907 2.484907 178 +internet 1 83 2.484907 2.484907 186 +librari 0 87 2.484907 0.000000 181 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +symposium 2 72 2.639057 5.278114 238 +line 1 75 2.639057 2.639057 231 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +foundat 2 62 2.772589 5.545178 286 +ithaca 0 65 2.772589 0.000000 294 +visit 0 63 2.772589 0.000000 288 +result 0 65 2.772589 0.000000 281 +virtual 0 62 2.772589 0.000000 285 +interact 0 62 2.772589 0.000000 270 +content 0 59 2.833213 0.000000 302 +juli 0 60 2.833213 0.000000 305 +point 1 58 2.890372 2.890372 319 +thesi 0 57 2.890372 0.000000 327 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +talk 0 53 2.944439 0.000000 336 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +set 0 50 3.044522 0.000000 361 +protocol 1 45 3.135494 3.135494 407 +math 0 44 3.135494 0.000000 402 +combin 0 42 3.218876 0.000000 421 +error 0 40 3.258097 0.000000 449 +connect 0 37 3.332205 0.000000 485 +short 0 36 3.367296 0.000000 499 +robot 0 36 3.367296 0.000000 497 +approxim 1 35 3.401197 3.401197 509 +bibliographi 1 34 3.401197 3.401197 518 +singl 0 34 3.401197 0.000000 510 +posit 0 31 3.496508 0.000000 552 +graph 1 30 3.555348 3.555348 576 +secur 1 30 3.555348 3.555348 577 +computersci 0 30 3.555348 0.000000 562 +proc 2 26 3.688879 7.377758 649 +bound 0 26 3.688879 0.000000 659 +compar 0 26 3.688879 0.000000 648 +berkelei 0 26 3.688879 0.000000 657 +jeff 0 25 3.737670 0.000000 673 +flow 0 24 3.761200 0.000000 700 +motion 0 24 3.761200 0.000000 699 +yahoo 0 24 3.761200 0.000000 707 +universityithaca 0 24 3.761200 0.000000 710 +geometri 1 22 3.850148 3.850148 752 +emphasi 0 22 3.850148 0.000000 755 +siam 1 21 3.912023 3.912023 800 +path 1 21 3.912023 3.912023 778 +rout 1 21 3.912023 3.912023 793 +vlsi 0 21 3.912023 0.000000 795 +tenni 0 20 3.951244 0.000000 838 +geometr 1 19 4.007333 4.007333 852 +definit 1 19 4.007333 4.007333 864 +spend 0 19 4.007333 0.000000 850 +andrew 0 19 4.007333 0.000000 849 +hypertext 0 19 4.007333 0.000000 865 +lower 0 18 4.060443 0.000000 886 +dimension 0 18 4.060443 0.000000 909 +analyz 0 17 4.110874 0.000000 925 +expand 0 17 4.110874 0.000000 928 +segment 0 17 4.110874 0.000000 931 +stanford 0 17 4.110874 0.000000 955 +letter 1 16 4.174387 4.174387 981 +latenc 0 16 4.174387 0.000000 993 +biologi 1 15 4.248495 4.248495 1049 +princeton 0 15 4.248495 0.000000 1042 +embed 0 14 4.317488 0.000000 1102 +discret 1 13 4.382027 4.382027 1165 +resolut 0 13 4.382027 0.000000 1172 +safe 0 12 4.465908 0.000000 1274 +probabilist 0 11 4.553877 0.000000 1343 +node 0 11 4.553877 0.000000 1326 +mesh 0 11 4.553877 0.000000 1351 +israel 0 11 4.553877 0.000000 1366 +arbitrari 0 11 4.553877 0.000000 1359 +excit 0 11 4.553877 0.000000 1329 +queue 1 10 4.653960 4.653960 1386 +minimum 1 9 4.753590 4.753590 1555 +assumpt 0 9 4.753590 0.000000 1514 +distanc 0 9 4.753590 0.000000 1500 +yellow 0 9 4.753590 0.000000 1601 +cryptographi 0 9 4.753590 0.000000 1512 +combinatori 1 8 4.875197 4.875197 1629 +hallcornel 0 8 4.875197 0.000000 1757 +molecular 1 7 5.010635 5.010635 1887 +trade 1 7 5.010635 5.010635 1815 +foc 0 7 5.010635 0.000000 1880 +huttenloch 1 6 5.164786 5.164786 1983 +rubinfeld 0 6 5.164786 0.000000 1998 +dens 0 6 5.164786 0.000000 2122 +layout 0 6 5.164786 0.000000 2183 +relax 0 6 5.164786 0.000000 2120 +consensu 0 6 5.164786 0.000000 2080 +reconstruct 0 6 5.164786 0.000000 2170 +plane 0 6 5.164786 0.000000 2187 +symposiumon 0 6 5.164786 0.000000 2054 +infoseek 0 6 5.164786 0.000000 2188 +soda 0 6 5.164786 0.000000 2189 +corp 0 6 5.164786 0.000000 2139 +semi 1 5 5.347108 5.347108 2510 +almaden 0 5 5.347108 0.000000 2511 +stabil 0 5 5.347108 0.000000 2286 +diagram 0 5 5.347108 0.000000 2346 +stoc 0 5 5.347108 0.000000 2491 +chess 0 5 5.347108 0.000000 2486 +conform 0 4 5.568345 0.000000 2941 +disjoint 0 4 5.568345 0.000000 2709 +ratio 0 4 5.568345 0.000000 2942 +hausdorff 0 4 5.568345 0.000000 2633 +glimps 0 4 5.568345 0.000000 2778 +planar 0 3 5.857933 0.000000 3647 +formobil 0 3 5.857933 0.000000 3261 +fernandez 0 3 5.857933 0.000000 3591 +deliveri 0 3 5.857933 0.000000 3278 +onprincipl 0 3 5.857933 0.000000 3701 +berger 0 3 5.857933 0.000000 3702 +universitycomput 0 3 5.857933 0.000000 3651 +ncstrl 0 3 5.857933 0.000000 3530 +jone 0 3 5.857933 0.000000 3703 +rivest 0 3 5.857933 0.000000 3248 +kleinberg 3 2 6.263398 18.790194 5093 +adversari 1 2 6.263398 6.263398 5065 +tardo 1 2 6.263398 6.263398 5090 +williamson 1 2 6.263398 6.263398 5101 +goeman 1 2 6.263398 6.263398 5100 +leighton 1 2 6.263398 6.263398 5097 +diamet 0 2 6.263398 0.000000 5102 +lovasz 0 2 6.263398 0.000000 5091 +greedi 0 2 6.263398 0.000000 4143 +attiya 0 2 6.263398 0.000000 5197 +voronoi 0 2 6.263398 0.000000 5036 +euclidean 0 2 6.263398 0.000000 5198 +sdsc 0 2 6.263398 0.000000 5199 +kleinber 1 1 6.957497 6.957497 11017 +disjointpath 1 1 6.957497 6.957497 11018 +anddisjoint 0 1 6.957497 0.000000 11019 +stabilityof 0 1 6.957497 0.000000 11020 +particularlyth 0 1 6.957497 0.000000 11021 +seeselect 0 1 6.957497 0.000000 11022 +publicationsmiscellan 0 1 6.957497 0.000000 11023 +linkspapersapproxim 0 1 6.957497 0.000000 11024 +unsplitt 0 1 6.957497 0.000000 11025 +aggarw 0 1 6.957497 0.000000 11026 +improvedapproxim 0 1 6.957497 0.000000 11027 +thetafunct 0 1 6.957497 0.000000 11028 +vertex 0 1 6.957497 0.000000 11029 +simplepolygon 0 1 6.957497 0.000000 11030 +serverbalanc 0 1 6.957497 0.000000 11031 +yaniv 0 1 6.957497 0.000000 11032 +serveralgorithm 0 1 6.957497 0.000000 11033 +robotnavig 0 1 6.957497 0.000000 11034 +awerbuch 0 1 6.957497 0.000000 11035 +borodin 0 1 6.957497 0.000000 11036 +raghavan 0 1 6.957497 0.000000 11037 +sudan 0 1 6.957497 0.000000 11038 +lynch 0 1 6.957497 0.000000 11039 +offsbetween 0 1 6.957497 0.000000 11040 +quiesc 0 1 6.957497 0.000000 11041 +managementprotocol 0 1 6.957497 0.000000 11042 +mullainathan 0 1 6.957497 0.000000 11043 +boundsand 0 1 6.957497 0.000000 11044 +athre 0 1 6.957497 0.000000 11045 +kedem 0 1 6.957497 0.000000 11046 +pointset 0 1 6.957497 0.000000 11047 +invariantsof 0 1 6.957497 0.000000 11048 +linkssearch 0 1 6.957497 0.000000 11049 +bibliographiesaltavista 0 1 6.957497 0.000000 11050 +nynex 0 1 6.957497 0.000000 11051 +sitescornel 0 1 6.957497 0.000000 11052 +computingtc 0 1 6.957497 0.000000 11053 +crescenzi 0 1 6.957497 0.000000 11054 +kann 0 1 6.957497 0.000000 11055 +compendium 0 1 6.957497 0.000000 11056 +biologycomput 0 1 6.957497 0.000000 11057 +carb 0 1 6.957497 0.000000 11058 +biocomput 0 1 6.957497 0.000000 11059 +geometrydavid 0 1 6.957497 0.000000 11060 +eppstein 0 1 6.957497 0.000000 11061 +junkyard 0 1 6.957497 0.000000 11062 +erickson 0 1 6.957497 0.000000 11063 +securitymitr 0 1 6.957497 0.000000 11064 +miscellaneousnetscap 0 1 6.957497 0.000000 11065 +intellicast 0 1 6.957497 0.000000 11066 +kleinbergdepart 0 1 6.957497 0.000000 11067 +scienceupson 0 1 6.957497 0.000000 11068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..6a369cb6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +construct 0 139 1.945910 0.000000 82 +assign 0 135 1.945910 0.000000 66 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +thing 0 84 2.484907 0.000000 189 +master 0 76 2.564949 0.000000 216 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +html 0 75 2.639057 0.000000 235 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +back 0 60 2.833213 0.000000 297 +march 0 61 2.833213 0.000000 295 +think 0 57 2.890372 0.000000 314 +februari 0 54 2.944439 0.000000 328 +move 0 47 3.091042 0.000000 382 +music 0 42 3.218876 0.000000 436 +electr 0 38 3.295837 0.000000 461 +return 0 34 3.401197 0.000000 502 +corpor 0 21 3.912023 0.000000 802 +worth 0 11 4.553877 0.000000 1294 +forc 0 10 4.653960 0.000000 1384 +japan 1 8 4.875197 4.875197 1762 +cornellunivers 0 7 5.010635 0.000000 1916 +superhighwai 0 4 5.568345 0.000000 2943 +sell 0 4 5.568345 0.000000 2935 +tokyo 0 3 5.857933 0.000000 3622 +acquaint 0 3 5.857933 0.000000 3468 +sale 0 3 5.857933 0.000000 3688 +melco 0 2 6.263398 0.000000 5200 +advert 0 2 6.263398 0.000000 5201 +kazushi 0 1 6.957497 0.000000 11069 +otakota 0 1 6.957497 0.000000 11070 +edukazushi 0 1 6.957497 0.000000 11071 +mitusbishi 0 1 6.957497 0.000000 11072 +isund 0 1 6.957497 0.000000 11073 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..5c515bbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +avail 0 169 1.791759 0.000000 48 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +technic 0 100 2.302585 0.000000 140 +pictur 0 89 2.397895 0.000000 160 +logic 1 71 2.639057 2.639057 230 +complex 1 64 2.772589 2.772589 269 +type 0 61 2.833213 0.000000 296 +faculti 0 56 2.890372 0.000000 325 +algebra 1 45 3.135494 3.135494 394 +york 0 41 3.218876 0.000000 435 +constraint 0 26 3.688879 0.000000 636 +interpret 0 24 3.761200 0.000000 686 +universityithaca 0 24 3.761200 0.000000 710 +decis 0 23 3.806662 0.000000 728 +automata 0 13 4.382027 0.000000 1135 +interestsmi 0 10 4.653960 0.000000 1462 +hallcornel 0 8 4.875197 0.000000 1757 +newton 0 7 5.010635 0.000000 1824 +infer 0 6 5.164786 0.000000 2040 +dexter 0 4 5.568345 0.000000 2855 +andsemant 0 3 5.857933 0.000000 3246 +kozendext 0 1 6.957497 0.000000 11074 +kozenjoseph 0 1 6.957497 0.000000 11075 +engineeringphd 0 1 6.957497 0.000000 11076 +especiallycomplex 0 1 6.957497 0.000000 11077 +onlinekleen 0 1 6.957497 0.000000 11078 +algebraautomata 0 1 6.957497 0.000000 11079 +logicbibliographylist 0 1 6.957497 0.000000 11080 +reportscours 0 1 6.957497 0.000000 11081 +notesc 0 1 6.957497 0.000000 11082 +programsc 0 1 6.957497 0.000000 11083 +theoryfun 0 1 6.957497 0.000000 11084 +stufffamili 0 1 6.957497 0.000000 11085 +rugbi 0 1 6.957497 0.000000 11086 +effectcomput 0 1 6.957497 0.000000 11087 +departmentupson 0 1 6.957497 0.000000 11088 +usakozen 0 1 6.957497 0.000000 11089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..ef0f468e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +theori 1 111 2.197225 2.197225 127 +topic 0 114 2.197225 0.000000 110 +teach 0 108 2.197225 0.000000 112 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +associ 0 93 2.397895 0.000000 151 +learn 0 86 2.484907 0.000000 170 +novemb 0 81 2.484907 0.000000 179 +upson 0 71 2.639057 0.000000 218 +ithaca 0 65 2.772589 0.000000 294 +type 0 61 2.833213 0.000000 296 +autom 0 41 3.218876 0.000000 434 +soon 1 36 3.367296 3.367296 494 +synthesi 0 20 3.951244 0.000000 834 +deduct 0 12 4.465908 0.000000 1236 +german 0 6 5.164786 0.000000 2190 +christoph 1 5 5.347108 5.347108 2512 +kreitz 1 1 6.957497 6.957497 11090 +lehr 0 1 6.957497 0.000000 11091 +lernen 0 1 6.957497 0.000000 11092 +vorlesungsskript 0 1 6.957497 0.000000 11093 +medienunterst 0 1 6.957497 0.000000 11094 +uumltzt 0 1 6.957497 0.000000 11095 +lehren 0 1 6.957497 0.000000 11096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..8baf4f72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +master 0 76 2.564949 0.000000 216 +multimedia 0 68 2.708050 0.000000 258 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +enjoi 0 26 3.688879 0.000000 660 +daili 0 24 3.761200 0.000000 706 +taiwan 0 16 4.174387 0.000000 1006 +countri 0 15 4.248495 0.000000 1059 +grove 0 8 4.875197 0.000000 1675 +newton 0 7 5.010635 0.000000 1824 +isi 0 5 5.347108 0.000000 2443 +heng 1 2 6.263398 6.263398 5202 +kuen 1 1 6.957497 6.957497 11097 +myproject 0 1 6.957497 0.000000 11098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..eecfa2f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 2 215 1.386294 2.772588 23 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +oper 0 180 1.609438 0.000000 34 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +contact 0 153 1.791759 0.000000 59 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +hall 0 146 1.945910 0.000000 65 +report 2 131 2.079442 4.158884 92 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +manag 0 114 2.197225 0.000000 125 +intern 0 108 2.197225 0.000000 128 +find 0 111 2.197225 0.000000 111 +site 0 106 2.197225 0.000000 119 +technic 2 100 2.302585 4.605170 140 +part 1 98 2.302585 2.302585 129 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +pictur 0 89 2.397895 0.000000 160 +librari 2 87 2.484907 4.969814 181 +wide 1 84 2.484907 2.484907 185 +internet 0 83 2.484907 0.000000 186 +build 0 85 2.484907 0.000000 184 +member 0 84 2.484907 0.000000 165 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +know 0 80 2.564949 0.000000 198 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +meet 1 72 2.639057 2.639057 229 +upson 0 71 2.639057 0.000000 218 +involv 0 71 2.639057 0.000000 227 +nation 0 74 2.639057 0.000000 240 +publish 0 57 2.890372 0.000000 326 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +digit 2 52 2.995732 5.991464 348 +maintain 0 51 2.995732 0.000000 342 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +life 1 50 3.044522 3.044522 375 +give 0 50 3.044522 0.000000 359 +protocol 1 45 3.135494 3.135494 407 +fast 0 42 3.218876 0.000000 429 +author 1 39 3.258097 3.258097 450 +littl 0 39 3.258097 0.000000 454 +open 0 38 3.295837 0.000000 469 +mean 0 37 3.332205 0.000000 477 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +extend 1 32 3.465736 3.465736 539 +chapter 0 32 3.465736 0.000000 536 +collabor 0 32 3.465736 0.000000 543 +posit 0 31 3.496508 0.000000 552 +secur 1 30 3.555348 3.555348 577 +framework 1 28 3.610918 3.610918 606 +releas 0 28 3.610918 0.000000 616 +hope 0 28 3.610918 0.000000 610 +enabl 0 26 3.688879 0.000000 655 +rule 0 26 3.688879 0.000000 638 +challeng 0 26 3.688879 0.000000 653 +never 1 25 3.737670 3.737670 671 +primari 0 25 3.737670 0.000000 669 +universityithaca 0 24 3.761200 0.000000 710 +store 0 24 3.761200 0.000000 693 +magazin 0 24 3.761200 0.000000 704 +sometim 0 24 3.761200 0.000000 696 +lead 0 23 3.806662 0.000000 718 +mobil 0 23 3.806662 0.000000 730 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +defin 0 22 3.850148 0.000000 746 +corpor 0 21 3.912023 0.000000 802 +fund 0 21 3.912023 0.000000 805 +tell 0 21 3.912023 0.000000 777 +spend 0 19 4.007333 0.000000 850 +ever 0 19 4.007333 0.000000 872 +beauti 0 18 4.060443 0.000000 912 +repositori 1 17 4.110874 4.110874 932 +carl 1 15 4.248495 4.248495 1024 +charact 0 15 4.248495 0.000000 1028 +edui 0 13 4.382027 0.000000 1193 +joint 0 13 4.382027 0.000000 1130 +infrastructur 0 12 4.465908 0.000000 1234 +road 0 11 4.553877 0.000000 1374 +consortium 0 10 4.653960 0.000000 1467 +bike 0 10 4.653960 0.000000 1468 +leader 0 9 4.753590 0.000000 1576 +desir 0 9 4.753590 0.000000 1542 +poor 0 8 4.875197 0.000000 1736 +pagei 0 8 4.875197 0.000000 1683 +davi 0 7 5.010635 0.000000 1888 +daughter 0 7 5.010635 0.000000 1943 +drop 0 6 5.164786 0.000000 2008 +trail 0 6 5.164786 0.000000 2071 +substitut 1 5 5.347108 5.347108 2247 +departmentat 0 5 5.347108 0.000000 2513 +constant 0 5 5.347108 0.000000 2251 +outdoor 0 5 5.347108 0.000000 2514 +interfer 0 5 5.347108 0.000000 2494 +darpa 0 4 5.568345 0.000000 2944 +metadata 0 4 5.568345 0.000000 2945 +breath 0 4 5.568345 0.000000 2946 +dienst 1 3 5.857933 5.857933 3640 +ncstrl 1 3 5.857933 5.857933 3530 +worldwid 0 3 5.857933 0.000000 3704 +luci 0 3 5.857933 0.000000 3705 +fresh 0 3 5.857933 0.000000 3706 +lagoz 1 2 6.263398 6.263398 5081 +interoper 1 2 6.263398 6.263398 4838 +quiet 1 2 6.263398 6.263398 5203 +protocolsfor 0 2 6.263398 0.000000 5204 +developeda 0 2 6.263398 0.000000 5205 +interfacesand 0 2 6.263398 0.000000 5206 +cano 0 2 6.263398 0.000000 5207 +joi 0 2 6.263398 0.000000 5208 +fight 0 2 6.263398 0.000000 5209 +groupin 0 1 6.957497 0.000000 11099 +ourgroup 0 1 6.957497 0.000000 11100 +adistribut 0 1 6.957497 0.000000 11101 +collaborateson 0 1 6.957497 0.000000 11102 +thedienstsoftwar 0 1 6.957497 0.000000 11103 +providesdistribut 0 1 6.957497 0.000000 11104 +initiativesto 0 1 6.957497 0.000000 11105 +iso 0 1 6.957497 0.000000 11106 +dlib 0 1 6.957497 0.000000 11107 +dlibwork 0 1 6.957497 0.000000 11108 +iiin 0 1 6.957497 0.000000 11109 +warwick 0 1 6.957497 0.000000 11110 +amveri 0 1 6.957497 0.000000 11111 +distributedobject 0 1 6.957497 0.000000 11112 +paperfor 0 1 6.957497 0.000000 11113 +codeworkshop 0 1 6.957497 0.000000 11114 +meetm 0 1 6.957497 0.000000 11115 +moreabout 0 1 6.957497 0.000000 11116 +outsideof 0 1 6.957497 0.000000 11117 +toddler 0 1 6.957497 0.000000 11118 +lucyg 0 1 6.957497 0.000000 11119 +avid 0 1 6.957497 0.000000 11120 +movingwat 0 1 6.957497 0.000000 11121 +lakeand 0 1 6.957497 0.000000 11122 +itch 0 1 6.957497 0.000000 11123 +ridingalong 0 1 6.957497 0.000000 11124 +backwood 0 1 6.957497 0.000000 11125 +sparehour 0 1 6.957497 0.000000 11126 +shoe 0 1 6.957497 0.000000 11127 +deeplyth 0 1 6.957497 0.000000 11128 +physicalnor 0 1 6.957497 0.000000 11129 +itspreserv 0 1 6.957497 0.000000 11130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..820cd0f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +applic 0 170 1.791759 0.000000 56 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +report 0 131 2.079442 0.000000 92 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +homepag 2 93 2.397895 4.795790 148 +center 0 88 2.397895 0.000000 158 +internet 1 83 2.484907 2.484907 186 +level 0 87 2.484907 0.000000 180 +school 0 84 2.484907 0.000000 188 +materi 1 75 2.639057 2.639057 221 +servic 1 72 2.639057 2.639057 236 +onlin 0 75 2.639057 0.000000 223 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +guid 1 63 2.772589 2.772589 267 +colleg 0 61 2.833213 0.000000 300 +back 0 60 2.833213 0.000000 297 +local 0 55 2.944439 0.000000 334 +friend 1 48 3.044522 3.044522 376 +standard 0 48 3.044522 0.000000 365 +basic 0 50 3.044522 0.000000 360 +author 0 39 3.258097 0.000000 450 +tutori 0 39 3.258097 0.000000 437 +open 0 38 3.295837 0.000000 469 +robot 0 36 3.367296 0.000000 497 +concept 0 32 3.465736 0.000000 537 +secur 1 30 3.555348 3.555348 577 +travel 0 30 3.555348 0.000000 579 +chines 0 29 3.583519 0.000000 595 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +grad 0 20 3.951244 0.000000 837 +ultim 0 17 4.110874 0.000000 943 +cambridg 0 16 4.174387 0.000000 1008 +massiv 0 15 4.248495 0.000000 1026 +rank 0 14 4.317488 0.000000 1063 +opportun 0 13 4.382027 0.000000 1161 +career 1 12 4.465908 4.465908 1287 +safe 0 12 4.465908 0.000000 1274 +classmat 0 9 4.753590 0.000000 1516 +yellow 0 9 4.753590 0.000000 1601 +sigop 0 8 4.875197 0.000000 1727 +soccer 0 8 4.875197 0.000000 1752 +zhou 1 6 5.164786 5.164786 2092 +legal 0 6 5.164786 0.000000 2094 +authent 0 5 5.347108 0.000000 2306 +exclus 0 4 5.568345 0.000000 2947 +insur 0 4 5.568345 0.000000 2939 +surviv 0 4 5.568345 0.000000 2734 +fudan 1 3 5.857933 5.857933 3707 +legion 0 3 5.857933 0.000000 3708 +automobil 0 3 5.857933 0.000000 3709 +buyer 1 2 6.263398 6.263398 5210 +resours 0 2 6.263398 0.000000 5211 +sunris 0 2 6.263398 0.000000 5212 +edmund 0 2 6.263398 0.000000 5213 +succe 0 2 6.263398 0.000000 5214 +lidong 1 1 6.957497 6.957497 11131 +auto 1 1 6.957497 6.957497 11132 +oasi 0 1 6.957497 0.000000 11133 +adag 0 1 6.957497 0.000000 11134 +sirac 0 1 6.957497 0.000000 11135 +kerbero 0 1 6.957497 0.000000 11136 +ocaml 0 1 6.957497 0.000000 11137 +jobtrak 0 1 6.957497 0.000000 11138 +hunter 0 1 6.957497 0.000000 11139 +careermosa 0 1 6.957497 0.000000 11140 +jobweb 0 1 6.957497 0.000000 11141 +xjob 0 1 6.957497 0.000000 11142 +yingjun 0 1 6.957497 0.000000 11143 +isso 0 1 6.957497 0.000000 11144 +autosit 0 1 6.957497 0.000000 11145 +agenc 0 1 6.957497 0.000000 11146 +indexlast 0 1 6.957497 0.000000 11147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..7fb41d0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +object 1 138 1.945910 1.945910 79 +construct 0 139 1.945910 0.000000 82 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +final 1 116 2.197225 2.197225 108 +site 1 106 2.197225 2.197225 119 +world 0 115 2.197225 0.000000 126 +look 0 107 2.197225 0.000000 115 +book 0 99 2.302585 0.000000 131 +take 0 97 2.302585 0.000000 134 +octob 0 89 2.397895 0.000000 156 +info 0 85 2.484907 0.000000 176 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +advisor 0 51 2.995732 0.000000 355 +cool 1 49 3.044522 3.044522 374 +still 0 50 3.044522 0.000000 362 +visual 0 48 3.044522 0.000000 372 +move 0 47 3.091042 0.000000 382 +video 0 44 3.135494 0.000000 405 +natur 0 44 3.135494 0.000000 406 +vision 0 41 3.218876 0.000000 430 +autom 0 41 3.218876 0.000000 434 +york 0 41 3.218876 0.000000 435 +small 0 39 3.258097 0.000000 447 +annual 0 40 3.258097 0.000000 458 +movi 0 40 3.258097 0.000000 459 +connect 0 37 3.332205 0.000000 485 +chapter 0 32 3.465736 0.000000 536 +anim 1 31 3.496508 3.496508 557 +quot 0 29 3.583519 0.000000 582 +chines 0 29 3.583519 0.000000 595 +releas 0 28 3.610918 0.000000 616 +linux 0 27 3.637586 0.000000 631 +berkelei 0 26 3.688879 0.000000 657 +wang 0 21 3.912023 0.000000 790 +born 0 21 3.912023 0.000000 798 +annot 0 21 3.912023 0.000000 775 +wind 0 18 4.060443 0.000000 908 +edulast 0 17 4.110874 0.000000 927 +taiwan 1 16 4.174387 4.174387 1006 +practicum 0 16 4.174387 0.000000 960 +track 0 15 4.248495 0.000000 1029 +scene 0 14 4.317488 0.000000 1114 +classic 0 14 4.317488 0.000000 1084 +hong 0 14 4.317488 0.000000 1105 +galleri 0 13 4.382027 0.000000 1192 +remov 1 12 4.465908 4.465908 1225 +scienceat 1 11 4.553877 4.553877 1375 +moment 0 11 4.553877 0.000000 1379 +alpha 0 11 4.553877 0.000000 1348 +earth 0 10 4.653960 0.000000 1463 +kong 0 9 4.753590 0.000000 1602 +jump 0 9 4.753590 0.000000 1603 +coast 0 8 4.875197 0.000000 1746 +edg 0 8 4.875197 0.000000 1647 +bridg 0 8 4.875197 0.000000 1764 +ramin 0 7 5.010635 0.000000 1820 +bookstor 0 7 5.010635 0.000000 1837 +vallei 0 7 5.010635 0.000000 1959 +southern 0 6 5.164786 0.000000 2191 +ohio 0 5 5.347108 0.000000 2447 +carlo 0 5 5.347108 0.000000 2515 +swartz 1 4 5.568345 5.568345 2878 +sinanet 0 4 5.568345 0.000000 2883 +sell 0 4 5.568345 0.000000 2935 +japanes 0 4 5.568345 0.000000 2934 +classesc 1 3 5.857933 5.857933 3681 +visionc 0 3 5.857933 0.000000 3489 +audit 0 3 5.857933 0.000000 3391 +headlin 0 3 5.857933 0.000000 3710 +hongkong 0 3 5.857933 0.000000 3677 +hero 0 3 5.857933 0.000000 3711 +ming 0 3 5.857933 0.000000 3712 +villag 0 2 6.263398 0.000000 5215 +computingc 0 2 6.263398 0.000000 5216 +linksfor 0 2 6.263398 0.000000 5185 +castl 0 2 6.263398 0.000000 5217 +nausicaa 0 2 6.263398 0.000000 5218 +galact 0 2 6.263398 0.000000 5219 +hsian 1 1 6.957497 6.957497 11148 +orwel 1 1 6.957497 6.957497 11149 +totoro 1 1 6.957497 6.957497 11150 +wangthi 0 1 6.957497 0.000000 11151 +constructionlin 0 1 6.957497 0.000000 11152 +fangliao 0 1 6.957497 0.000000 11153 +videoe 0 1 6.957497 0.000000 11154 +networkse 0 1 6.957497 0.000000 11155 +amidonc 0 1 6.957497 0.000000 11156 +transcrib 0 1 6.957497 0.000000 11157 +zabihspr 0 1 6.957497 0.000000 11158 +processingc 0 1 6.957497 0.000000 11159 +managementc 0 1 6.957497 0.000000 11160 +colloquimc 0 1 6.957497 0.000000 11161 +webspac 0 1 6.957497 0.000000 11162 +stuffscornel 0 1 6.957497 0.000000 11163 +reportiee 0 1 6.957497 0.000000 11164 +societytaiwan 0 1 6.957497 0.000000 11165 +comth 0 1 6.957497 0.000000 11166 +musicmovi 0 1 6.957497 0.000000 11167 +movieweb 0 1 6.957497 0.000000 11168 +moviemania 0 1 6.957497 0.000000 11169 +picturesth 0 1 6.957497 0.000000 11170 +linkstcl 0 1 6.957497 0.000000 11171 +hacksth 0 1 6.957497 0.000000 11172 +pagemiscellan 0 1 6.957497 0.000000 11173 +cja 0 1 6.957497 0.000000 11174 +calanimag 0 1 6.957497 0.000000 11175 +pagelaputa 0 1 6.957497 0.000000 11176 +conan 0 1 6.957497 0.000000 11177 +slump 0 1 6.957497 0.000000 11178 +kiki 0 1 6.957497 0.000000 11179 +legend 0 1 6.957497 0.000000 11180 +pagecampu 0 1 6.957497 0.000000 11181 +uptown 0 1 6.957497 0.000000 11182 +eithaca 0 1 6.957497 0.000000 11183 +linhsian 0 1 6.957497 0.000000 11184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..dedaf1ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +read 0 154 1.791759 0.000000 47 +look 1 107 2.197225 2.197225 115 +check 0 115 2.197225 0.000000 118 +take 1 97 2.302585 2.302585 134 +thing 0 84 2.484907 0.000000 189 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +collect 0 65 2.772589 0.000000 268 +septemb 0 65 2.772589 0.000000 274 +digit 0 52 2.995732 0.000000 348 +video 0 44 3.135494 0.000000 405 +show 1 43 3.178054 3.178054 417 +http 0 41 3.218876 0.000000 420 +littl 0 39 3.258097 0.000000 454 +download 0 36 3.367296 0.000000 489 +thought 1 17 4.110874 4.110874 945 +sept 0 17 4.110874 0.000000 952 +pagewelcom 0 11 4.553877 0.000000 1344 +song 0 11 4.553877 0.000000 1380 +theme 0 8 4.875197 0.000000 1707 +counter 0 8 4.875197 0.000000 1765 +clip 0 7 5.010635 0.000000 1868 +courtesi 0 7 5.010635 0.000000 1953 +essai 1 4 5.568345 5.568345 2948 +libbi 1 1 6.957497 6.957497 11185 +lista 0 1 6.957497 0.000000 11186 +projectemail 0 1 6.957497 0.000000 11187 +mehit 0 1 6.957497 0.000000 11188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..b4b1d7e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +oper 0 180 1.609438 0.000000 34 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +take 0 97 2.302585 0.000000 134 +name 0 72 2.639057 0.000000 220 +upson 0 71 2.639057 0.000000 218 +lili 0 5 5.347108 0.000000 2240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..e6302b5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +recent 1 167 1.791759 1.791759 58 +applic 0 170 1.791759 0.000000 56 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +analysi 0 124 2.079442 0.000000 98 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +center 0 88 2.397895 0.000000 158 +method 0 80 2.564949 0.000000 213 +appli 0 71 2.639057 0.000000 226 +view 0 70 2.708050 0.000000 254 +differ 0 66 2.708050 0.000000 253 +scientif 0 53 2.944439 0.000000 341 +much 0 52 2.995732 0.000000 349 +numer 1 49 3.044522 3.044522 369 +appoint 0 49 3.044522 0.000000 358 +algebra 1 45 3.135494 3.135494 394 +textbook 1 44 3.135494 3.135494 397 +mechan 0 43 3.178054 0.000000 416 +linear 1 41 3.218876 3.218876 431 +map 0 39 3.258097 0.000000 452 +multipl 0 39 3.258097 0.000000 453 +field 0 37 3.332205 0.000000 482 +mean 0 37 3.332205 0.000000 477 +approxim 0 35 3.401197 0.000000 509 +bibliographi 0 34 3.401197 0.000000 518 +jeff 0 25 3.737670 0.000000 673 +siam 0 21 3.912023 0.000000 800 +walter 0 17 4.110874 0.000000 950 +normal 0 16 4.174387 0.000000 995 +matlab 0 14 4.317488 0.000000 1081 +affili 0 13 4.382027 0.000000 1194 +whose 0 13 4.382027 0.000000 1166 +nick 0 13 4.382027 0.000000 1180 +iter 0 12 4.465908 0.000000 1206 +peter 0 11 4.553877 0.000000 1316 +matric 1 10 4.653960 4.653960 1399 +thecomput 0 10 4.653960 0.000000 1408 +lloyd 1 6 5.164786 5.164786 2103 +edumi 0 6 5.164786 0.000000 2132 +fluid 0 5 5.347108 0.000000 2440 +thecornel 0 4 5.568345 0.000000 2892 +conform 0 4 5.568345 0.000000 2941 +hasbeen 0 4 5.568345 0.000000 2661 +trefethen 1 3 5.857933 5.857933 3528 +eigenvector 0 3 5.857933 0.000000 3365 +vicki 0 3 5.857933 0.000000 3187 +reddi 0 3 5.857933 0.000000 3277 +havea 0 2 6.263398 0.000000 4434 +spectral 0 2 6.263398 0.000000 4837 +papersoth 0 2 6.263398 0.000000 5049 +loui 0 2 6.263398 0.000000 5220 +trefethenprofessorlnt 0 1 6.957497 0.000000 11189 +thecent 0 1 6.957497 0.000000 11190 +numericalsolut 0 1 6.957497 0.000000 11191 +notorthogon 0 1 6.957497 0.000000 11192 +textbooksfinit 0 1 6.957497 0.000000 11193 +papersmultimatlab 0 1 6.957497 0.000000 11194 +processorsmatrix 0 1 6.957497 0.000000 11195 +gap 0 1 6.957497 0.000000 11196 +betweenpotenti 0 1 6.957497 0.000000 11197 +convergencepseudospectra 0 1 6.957497 0.000000 11198 +operatorssom 0 1 6.957497 0.000000 11199 +itemsclass 0 1 6.957497 0.000000 11200 +analysiscurriculum 0 1 6.957497 0.000000 11201 +vitaepseudospectra 0 1 6.957497 0.000000 11202 +alfeldcurr 0 1 6.957497 0.000000 11203 +howlegubjrn 0 1 6.957497 0.000000 11204 +jnsson 0 1 6.957497 0.000000 11205 +yohan 0 1 6.957497 0.000000 11206 +kimdivakar 0 1 6.957497 0.000000 11207 +viswanathprevi 0 1 6.957497 0.000000 11208 +baggetttobi 0 1 6.957497 0.000000 11209 +driscollalan 0 1 6.957497 0.000000 11210 +edelman 0 1 6.957497 0.000000 11211 +howel 0 1 6.957497 0.000000 11212 +mascarenhasnoel 0 1 6.957497 0.000000 11213 +nachtigalsatish 0 1 6.957497 0.000000 11214 +chuan 0 1 6.957497 0.000000 11215 +tohsom 0 1 6.957497 0.000000 11216 +colleaguesjim 0 1 6.957497 0.000000 11217 +demmelann 0 1 6.957497 0.000000 11218 +greenbaummartin 0 1 6.957497 0.000000 11219 +gutknechtd 0 1 6.957497 0.000000 11220 +highamann 0 1 6.957497 0.000000 11221 +trefethenandr 0 1 6.957497 0.000000 11222 +weideman 0 1 6.957497 0.000000 11223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..b2aa282a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +welcom 0 122 2.079442 0.000000 99 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +topic 0 114 2.197225 0.000000 110 +person 0 111 2.197225 0.000000 117 +search 0 95 2.397895 0.000000 155 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +librari 0 87 2.484907 0.000000 181 +resum 0 79 2.564949 0.000000 217 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +archiv 0 49 3.044522 0.000000 364 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +music 1 42 3.218876 3.218876 436 +compani 0 41 3.218876 0.000000 423 +microsoft 0 38 3.295837 0.000000 468 +china 1 37 3.332205 3.332205 487 +connect 0 37 3.332205 0.000000 485 +product 0 33 3.433987 0.000000 527 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +programminglanguag 0 21 3.912023 0.000000 782 +busi 0 21 3.912023 0.000000 784 +hobbi 0 16 4.174387 0.000000 1009 +stock 0 16 4.174387 0.000000 1007 +practicum 0 16 4.174387 0.000000 960 +photograph 0 15 4.248495 0.000000 1056 +novel 0 15 4.248495 0.000000 1039 +misc 0 13 4.382027 0.000000 1124 +galleri 0 13 4.382027 0.000000 1192 +tune 0 12 4.465908 0.000000 1227 +catalog 0 10 4.653960 0.000000 1431 +swim 0 9 4.753590 0.000000 1599 +corba 0 5 5.347108 0.000000 2320 +ping 0 4 5.568345 0.000000 2922 +vrml 0 4 5.568345 0.000000 2949 +cube 0 4 5.568345 0.000000 2940 +luci 1 3 5.857933 5.857933 3705 +pong 0 3 5.857933 0.000000 3371 +underground 0 3 5.857933 0.000000 3604 +badminton 0 2 6.263398 0.000000 5221 +silvano 0 2 6.263398 0.000000 4868 +sunlab 0 2 6.263398 0.000000 5222 +caltech 0 2 6.263398 0.000000 5223 +whiz 0 1 6.957497 0.000000 11224 +systemscontact 0 1 6.957497 0.000000 11225 +yuwu 0 1 6.957497 0.000000 11226 +tkcgi 0 1 6.957497 0.000000 11227 +securitypc 0 1 6.957497 0.000000 11228 +lube 0 1 6.957497 0.000000 11229 +ipngip_atmcomput 0 1 6.957497 0.000000 11230 +sapient 0 1 6.957497 0.000000 11231 +jobtrack 0 1 6.957497 0.000000 11232 +artvark 0 1 6.957497 0.000000 11233 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..f4bd97b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +network 1 168 1.791759 1.791759 61 +click 0 142 1.945910 0.000000 78 +sinc 0 90 2.397895 0.000000 159 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +receiv 0 66 2.708050 0.000000 244 +main 0 67 2.708050 0.000000 256 +window 0 68 2.708050 0.000000 242 +multimedia 0 68 2.708050 0.000000 258 +digit 0 52 2.995732 0.000000 348 +electron 0 47 3.091042 0.000000 379 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +layer 0 17 4.110874 0.000000 926 +massachusett 0 14 4.317488 0.000000 1118 +linda 0 10 4.653960 0.000000 1394 +equip 0 10 4.653960 0.000000 1459 +stack 0 10 4.653960 0.000000 1389 +corp 0 6 5.164786 0.000000 2139 +nativ 0 6 5.164786 0.000000 2192 +multicast 0 5 5.347108 0.000000 2305 +commerc 0 3 5.857933 0.000000 3209 +lowel 0 2 6.263398 0.000000 5224 +coursesfal 0 2 6.263398 0.000000 5225 +universitylinda 0 1 6.957497 0.000000 11234 +lxwu 0 1 6.957497 0.000000 11235 +univsers 0 1 6.957497 0.000000 11236 +banyan 0 1 6.957497 0.000000 11237 +mulitimedia 0 1 6.957497 0.000000 11238 +kramer 0 1 6.957497 0.000000 11239 +mart 0 1 6.957497 0.000000 11240 +photoesus 0 1 6.957497 0.000000 11241 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..6d176bec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +last 0 314 1.098612 0.000000 14 +cornel 0 215 1.386294 0.000000 23 +updat 0 191 1.609438 0.000000 41 +upson 0 71 2.639057 0.000000 218 +juli 0 60 2.833213 0.000000 305 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +halldepart 0 3 5.857933 0.000000 3641 +nikolai 0 2 6.263398 0.000000 4087 +mateevnikolai 0 1 6.957497 0.000000 11242 +mateevgradu 0 1 6.957497 0.000000 11243 +studentmateev 0 1 6.957497 0.000000 11244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..8a0763dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +databas 1 122 2.079442 2.079442 86 +document 0 121 2.079442 0.000000 89 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +select 0 91 2.397895 0.000000 154 +academ 0 82 2.484907 0.000000 178 +come 0 78 2.564949 0.000000 202 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +laboratori 0 63 2.772589 0.000000 292 +summer 0 56 2.890372 0.000000 311 +soon 0 36 3.367296 0.000000 494 +scientist 0 31 3.496508 0.000000 560 +universityithaca 0 24 3.761200 0.000000 710 +fellow 0 24 3.761200 0.000000 701 +fund 0 21 3.912023 0.000000 805 +supervis 0 20 3.951244 0.000000 840 +stop 0 17 4.110874 0.000000 942 +heterogen 0 14 4.317488 0.000000 1090 +arpa 1 11 4.553877 4.553877 1369 +princip 0 10 4.653960 0.000000 1397 +xerox 0 8 4.875197 0.000000 1725 +matthew 0 6 5.164786 0.000000 2193 +chat 0 6 5.164786 0.000000 2128 +metadata 0 4 5.568345 0.000000 2945 +morgenstern 1 1 6.957497 6.957497 11245 +pagematthew 0 1 6.957497 0.000000 11246 +morgensternresearch 0 1 6.957497 0.000000 11247 +leaderaddress 0 1 6.957497 0.000000 11248 +centerxerox 0 1 6.957497 0.000000 11249 +institutecornel 0 1 6.957497 0.000000 11250 +edustatu 0 1 6.957497 0.000000 11251 +scienceproject 0 1 6.957497 0.000000 11252 +fundedresearch 0 1 6.957497 0.000000 11253 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..346b08c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +recent 0 167 1.791759 0.000000 58 +structur 1 106 2.197225 2.197225 105 +theori 0 111 2.197225 0.000000 127 +homepag 0 93 2.397895 0.000000 148 +dynam 1 76 2.564949 2.564949 194 +digit 0 52 2.995732 0.000000 348 +especi 0 36 3.367296 0.000000 496 +random 0 34 3.401197 0.000000 511 +committe 0 34 3.401197 0.000000 522 +graph 1 30 3.555348 3.555348 576 +bound 0 26 3.688879 0.000000 659 +universityithaca 0 24 3.761200 0.000000 710 +leav 0 21 3.912023 0.000000 772 +corpor 0 21 3.912023 0.000000 802 +lower 0 18 4.060443 0.000000 886 +eduphon 0 15 4.248495 0.000000 1060 +equip 0 10 4.653960 0.000000 1459 +soda 0 6 5.164786 0.000000 2189 +departmentcornel 0 5 5.347108 0.000000 2275 +stoc 0 5 5.347108 0.000000 2491 +henzing 0 3 5.857933 0.000000 3713 +professorcomput 0 3 5.857933 0.000000 3714 +monika 0 2 6.263398 0.000000 4141 +rauch 0 2 6.263398 0.000000 4142 +homepagemonika 0 1 6.957497 0.000000 11254 +henzingerassist 0 1 6.957497 0.000000 11255 +centerhomepageresearch 0 1 6.957497 0.000000 11256 +interestscombinatori 0 1 6.957497 0.000000 11257 +pageprogram 0 1 6.957497 0.000000 11258 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..dd8e4968 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +paper 1 205 1.609438 1.609438 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +area 1 144 1.945910 1.945910 80 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +final 0 116 2.197225 0.000000 108 +user 1 104 2.302585 2.302585 137 +technic 0 100 2.302585 0.000000 140 +exam 0 86 2.484907 0.000000 169 +level 0 87 2.484907 0.000000 180 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +integr 0 67 2.708050 0.000000 245 +plan 1 65 2.772589 2.772589 272 +ithaca 0 65 2.772589 0.000000 294 +collect 0 65 2.772589 0.000000 268 +share 1 59 2.833213 2.833213 304 +movi 0 40 3.258097 0.000000 459 +field 0 37 3.332205 0.000000 482 +travel 0 30 3.555348 0.000000 579 +tenni 0 20 3.951244 0.000000 838 +bachelor 0 17 4.110874 0.000000 957 +horu 0 14 4.317488 0.000000 1116 +huang 1 12 4.465908 4.465908 1202 +reness 0 11 4.553877 0.000000 1333 +werner 0 10 4.653960 0.000000 1385 +ride 0 8 4.875197 0.000000 1741 +vogel 0 8 4.875197 0.000000 1622 +sheldon 0 2 6.263398 0.000000 5226 +stanlei 1 1 6.957497 6.957497 11259 +kentucki 1 1 6.957497 6.957497 11260 +huangmast 0 1 6.957497 0.000000 11261 +studentmhuang 0 1 6.957497 0.000000 11262 +courtcornel 0 1 6.957497 0.000000 11263 +systemsdistribut 0 1 6.957497 0.000000 11264 +systemsdatabas 0 1 6.957497 0.000000 11265 +retrievalgraph 0 1 6.957497 0.000000 11266 +interfacesoth 0 1 6.957497 0.000000 11267 +horse_back 0 1 6.957497 0.000000 11268 +myadvisor 0 1 6.957497 0.000000 11269 +robbertvan 0 1 6.957497 0.000000 11270 +planplan 0 1 6.957497 0.000000 11271 +distributionplan 0 1 6.957497 0.000000 11272 +updateplan 0 1 6.957497 0.000000 11273 +faqhorusc 0 1 6.957497 0.000000 11274 +memorydistribut 0 1 6.957497 0.000000 11275 +memorysom 0 1 6.957497 0.000000 11276 +communicationsnapshotu 0 1 6.957497 0.000000 11277 +architecturejobscar 0 1 6.957497 0.000000 11278 +pathbai 0 1 6.957497 0.000000 11279 +jobscyberezumescar 0 1 6.957497 0.000000 11280 +opportunitiesus 0 1 6.957497 0.000000 11281 +stufftechn 0 1 6.957497 0.000000 11282 +searchbel 0 1 6.957497 0.000000 11283 +labsspbsd 0 1 6.957497 0.000000 11284 +sourcesjavarfclast 0 1 6.957497 0.000000 11285 +mhuang 0 1 6.957497 0.000000 11286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..107f62db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +list 1 201 1.609438 1.609438 39 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +develop 0 174 1.791759 0.000000 53 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +person 1 111 2.197225 2.197225 117 +find 0 111 2.197225 0.000000 111 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +comment 0 93 2.397895 0.000000 146 +second 0 81 2.484907 0.000000 166 +west 0 83 2.484907 0.000000 192 +know 0 80 2.564949 0.000000 198 +copi 0 63 2.772589 0.000000 284 +copyright 0 36 3.367296 0.000000 495 +express 0 32 3.465736 0.000000 540 +abl 0 30 3.555348 0.000000 566 +challeng 0 26 3.688879 0.000000 653 +never 0 25 3.737670 0.000000 671 +universityithaca 0 24 3.761200 0.000000 710 +decis 0 23 3.806662 0.000000 728 +sciencecornel 0 22 3.850148 0.000000 768 +protect 0 17 4.110874 0.000000 935 +differenti 0 17 4.110874 0.000000 921 +precis 0 15 4.248495 0.000000 1023 +script 0 13 4.382027 0.000000 1171 +speech 0 12 4.465908 0.000000 1222 +holidai 0 12 4.465908 0.000000 1224 +mass 0 8 4.875197 0.000000 1732 +parti 0 8 4.875197 0.000000 1676 +cat 0 6 5.164786 0.000000 2194 +rebecca 0 6 5.164786 0.000000 2174 +highest 0 4 5.568345 0.000000 2950 +cuc 0 4 5.568345 0.000000 2630 +government 0 2 6.263398 0.000000 4248 +aclu 0 2 6.263398 0.000000 5227 +reno 0 2 6.263398 0.000000 5228 +millett 1 1 6.957497 6.957497 11287 +lynett 1 1 6.957497 6.957497 11288 +millettdepart 0 1 6.957497 0.000000 11289 +participatoryform 0 1 6.957497 0.000000 11290 +internetdeserv 0 1 6.957497 0.000000 11291 +intrus 0 1 6.957497 0.000000 11292 +skit 0 1 6.957497 0.000000 11293 +femin 0 1 6.957497 0.000000 11294 +feminist 0 1 6.957497 0.000000 11295 +whenver 0 1 6.957497 0.000000 11296 +sentiment 0 1 6.957497 0.000000 11297 +doormat 0 1 6.957497 0.000000 11298 +prostitut 0 1 6.957497 0.000000 11299 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..84cbebe5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +network 0 168 1.791759 0.000000 61 +perform 0 143 1.945910 0.000000 74 +high 1 130 2.079442 2.079442 101 +machin 0 129 2.079442 0.000000 95 +databas 0 122 2.079442 0.000000 86 +place 0 106 2.197225 0.000000 124 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +involv 0 71 2.639057 0.000000 227 +servic 0 72 2.639057 0.000000 236 +practic 0 70 2.708050 0.000000 246 +ithaca 0 65 2.772589 0.000000 294 +new 0 64 2.772589 0.000000 262 +major 0 56 2.890372 0.000000 315 +reason 0 57 2.890372 0.000000 318 +extens 0 53 2.944439 0.000000 340 +cool 0 49 3.044522 0.000000 374 +video 0 44 3.135494 0.000000 405 +offer 0 43 3.178054 0.000000 414 +vision 0 41 3.218876 0.000000 430 +electr 0 38 3.295837 0.000000 461 +soon 0 36 3.367296 0.000000 494 +computersci 0 30 3.555348 0.000000 562 +hope 0 28 3.610918 0.000000 610 +weather 0 28 3.610918 0.000000 618 +latest 0 21 3.912023 0.000000 785 +sure 0 20 3.951244 0.000000 813 +ever 0 19 4.007333 0.000000 872 +practicum 0 16 4.174387 0.000000 960 +stock 0 16 4.174387 0.000000 1007 +menu 0 13 4.382027 0.000000 1156 +everyth 0 13 4.382027 0.000000 1169 +meng 0 12 4.465908 0.000000 1214 +earth 0 10 4.653960 0.000000 1463 +transmiss 0 9 4.753590 0.000000 1588 +andcomput 0 8 4.875197 0.000000 1623 +capac 0 8 4.875197 0.000000 1740 +film 0 8 4.875197 0.000000 1761 +temporari 0 6 5.164786 0.000000 2090 +wrong 0 6 5.164786 0.000000 2025 +conot 0 5 5.347108 0.000000 2245 +doubl 0 4 5.568345 0.000000 2951 +festiv 0 4 5.568345 0.000000 2952 +polytechn 0 3 5.857933 0.000000 3222 +educornel 0 3 5.857933 0.000000 3601 +coolest 0 2 6.263398 0.000000 5229 +newgroup 0 2 6.263398 0.000000 4191 +pagemi 0 2 6.263398 0.000000 5230 +nerd 0 2 6.263398 0.000000 5231 +newgroupc 1 1 6.957497 6.957497 11300 +mishaal 1 1 6.957497 6.957497 11301 +pagemisha 0 1 6.957497 0.000000 11302 +kuwaiti 0 1 6.957497 0.000000 11303 +mengc 0 1 6.957497 0.000000 11304 +worcest 0 1 6.957497 0.000000 11305 +inworcest 0 1 6.957497 0.000000 11306 +bearaccess 0 1 6.957497 0.000000 11307 +newgroupnba 0 1 6.957497 0.000000 11308 +newgroupoptim 0 1 6.957497 0.000000 11309 +kuwait 0 1 6.957497 0.000000 11310 +quotescool 0 1 6.957497 0.000000 11311 +cann 0 1 6.957497 0.000000 11312 +accus 0 1 6.957497 0.000000 11313 +almashanmisha 0 1 6.957497 0.000000 11314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..1ffc57bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +last 1 314 1.098612 1.098612 14 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +list 1 201 1.609438 1.609438 39 +class 0 199 1.609438 0.000000 37 +recent 0 167 1.791759 0.000000 58 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +number 0 130 2.079442 0.000000 97 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +version 0 113 2.197225 0.000000 122 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +homepag 0 93 2.397895 0.000000 148 +imag 0 91 2.397895 0.000000 161 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +member 0 84 2.484907 0.000000 165 +internet 0 83 2.484907 0.000000 186 +know 1 80 2.564949 2.564949 198 +complet 1 77 2.564949 2.564949 208 +want 0 79 2.564949 0.000000 199 +come 0 78 2.564949 0.000000 202 +name 1 72 2.639057 2.639057 220 +onlin 0 75 2.639057 0.000000 223 +line 0 75 2.639057 0.000000 231 +would 1 67 2.708050 2.708050 251 +result 0 65 2.772589 0.000000 281 +plai 0 60 2.833213 0.000000 307 +browser 0 56 2.890372 0.000000 313 +date 1 51 2.995732 2.995732 344 +much 0 52 2.995732 0.000000 349 +cool 1 49 3.044522 3.044522 374 +right 0 48 3.044522 0.000000 363 +visitor 0 49 3.044522 0.000000 371 +better 0 45 3.135494 0.000000 401 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +keep 0 44 3.135494 0.000000 409 +realli 1 40 3.258097 3.258097 444 +movi 0 40 3.258097 0.000000 459 +join 0 39 3.258097 0.000000 457 +connect 0 37 3.332205 0.000000 485 +hand 0 37 3.332205 0.000000 475 +statist 0 35 3.401197 0.000000 521 +random 0 34 3.401197 0.000000 511 +idea 0 32 3.465736 0.000000 545 +anim 0 31 3.496508 0.000000 557 +someth 0 31 3.496508 0.000000 554 +quit 0 27 3.637586 0.000000 633 +mike 1 24 3.761200 3.761200 703 +other 0 24 3.761200 0.000000 697 +togeth 0 23 3.806662 0.000000 714 +thank 0 23 3.806662 0.000000 721 +love 0 21 3.912023 0.000000 804 +mpeg 0 20 3.951244 0.000000 831 +andrew 0 19 4.007333 0.000000 849 +coupl 1 17 4.110874 4.110874 939 +stop 0 17 4.110874 0.000000 942 +expand 0 17 4.110874 0.000000 928 +whole 0 17 4.110874 0.000000 940 +sign 0 16 4.174387 0.000000 970 +anyth 0 16 4.174387 0.000000 998 +pretti 1 13 4.382027 4.382027 1191 +count 0 12 4.465908 0.000000 1239 +danc 0 12 4.465908 0.000000 1278 +guess 0 10 4.653960 0.000000 1443 +card 0 10 4.653960 0.000000 1435 +kevin 0 9 4.753590 0.000000 1482 +pick 0 9 4.753590 0.000000 1498 +opinion 0 8 4.875197 0.000000 1708 +attent 0 8 4.875197 0.000000 1651 +chanc 0 7 5.010635 0.000000 1960 +brought 0 7 5.010635 0.000000 1925 +bunch 0 7 5.010635 0.000000 1861 +golf 1 6 5.164786 5.164786 2178 +yeah 0 6 5.164786 0.000000 2195 +kid 1 5 5.347108 5.347108 2516 +pagethi 0 5 5.347108 0.000000 2336 +frog 0 5 5.347108 0.000000 2479 +exchang 0 5 5.347108 0.000000 2310 +sing 0 5 5.347108 0.000000 2499 +everybodi 0 5 5.347108 0.000000 2517 +dark 0 4 5.568345 0.000000 2910 +vote 0 4 5.568345 0.000000 2953 +maria 0 4 5.568345 0.000000 2954 +amaz 0 4 5.568345 0.000000 2600 +stockholm 0 3 5.857933 0.000000 3715 +ryan 0 3 5.857933 0.000000 3679 +nicknam 0 3 5.857933 0.000000 3716 +lame 0 3 5.857933 0.000000 3717 +beavi 1 2 6.263398 6.263398 4995 +suck 0 2 6.263398 0.000000 5232 +donnel 0 2 6.263398 0.000000 5233 +spirit 0 2 6.263398 0.000000 5234 +harmoni 0 2 6.263398 0.000000 5235 +reset 0 2 6.263398 0.000000 5236 +korbi 1 1 6.957497 6.957497 11315 +myguestbook 0 1 6.957497 0.000000 11316 +poll 0 1 6.957497 0.000000 11317 +vitya 0 1 6.957497 0.000000 11318 +corbett 0 1 6.957497 0.000000 11319 +eryn 0 1 6.957497 0.000000 11320 +crave 0 1 6.957497 0.000000 11321 +guttermouth 0 1 6.957497 0.000000 11322 +byjust 0 1 6.957497 0.000000 11323 +peic 0 1 6.957497 0.000000 11324 +accuar 0 1 6.957497 0.000000 11325 +atmak 0 1 6.957497 0.000000 11326 +edubas 0 1 6.957497 0.000000 11327 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..279726f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +fall 0 181 1.609438 0.000000 40 +construct 0 139 1.945910 0.000000 82 +spring 0 131 2.079442 0.000000 88 +graphic 0 90 2.397895 0.000000 147 +multimedia 0 68 2.708050 0.000000 258 +semest 1 58 2.890372 2.890372 312 +effect 0 46 3.091042 0.000000 385 +made 0 44 3.135494 0.000000 398 +anim 0 31 3.496508 0.000000 557 +titl 0 31 3.496508 0.000000 556 +jpeg 0 6 5.164786 0.000000 2053 +nobuhiko 1 1 6.957497 6.957497 11328 +mukai 1 1 6.957497 6.957497 11329 +mukainobuhiko 0 1 6.957497 0.000000 11330 +compressionon 0 1 6.957497 0.000000 11331 +magicon 0 1 6.957497 0.000000 11332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..e25416ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +person 0 111 2.197225 0.000000 117 +homepag 0 93 2.397895 0.000000 148 +imag 0 91 2.397895 0.000000 161 +info 0 85 2.484907 0.000000 176 +resum 0 79 2.564949 0.000000 217 +refer 0 78 2.564949 0.000000 203 +upson 0 71 2.639057 0.000000 218 +eduoffic 0 33 3.433987 0.000000 531 +photo 0 31 3.496508 0.000000 561 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +studentdepart 0 5 5.347108 0.000000 2505 +nichola 1 3 5.857933 5.857933 3252 +how 1 3 5.857933 5.857933 3289 +nihow 0 1 6.957497 0.000000 11333 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..39f7a5e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +cornel 1 215 1.386294 1.386294 23 +public 0 202 1.609438 0.000000 43 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +sinc 0 90 2.397895 0.000000 159 +upson 0 71 2.639057 0.000000 218 +java 0 70 2.708050 0.000000 248 +dept 0 64 2.772589 0.000000 291 +ithaca 0 65 2.772589 0.000000 294 +pointer 0 48 3.044522 0.000000 368 +visitor 0 49 3.044522 0.000000 371 +vita 0 38 3.295837 0.000000 473 +niko 1 4 5.568345 5.568345 2637 +pitsiani 0 3 5.857933 0.000000 3175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..cdbab9e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +public 1 202 1.609438 1.609438 43 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +parallel 2 169 1.791759 3.583518 60 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +base 0 165 1.791759 0.000000 50 +phone 0 175 1.791759 0.000000 45 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +hall 0 146 1.945910 0.000000 65 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +machin 0 129 2.079442 0.000000 95 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +well 0 109 2.197225 0.000000 121 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +advanc 1 99 2.302585 2.302585 130 +proceed 1 93 2.397895 2.397895 152 +mani 1 92 2.397895 2.397895 150 +center 1 88 2.397895 2.397895 158 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +method 1 80 2.564949 2.564949 213 +master 1 76 2.564949 2.564949 216 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +state 0 76 2.564949 0.000000 207 +solv 0 73 2.639057 0.000000 234 +workshop 0 71 2.639057 0.000000 239 +appli 0 71 2.639057 0.000000 226 +nation 0 74 2.639057 0.000000 240 +symposium 0 72 2.639057 0.000000 238 +simul 1 66 2.708050 2.708050 255 +foundat 0 62 2.772589 0.000000 286 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +room 0 59 2.833213 0.000000 301 +special 0 56 2.890372 0.000000 320 +undergradu 0 54 2.944439 0.000000 338 +scientif 0 53 2.944439 0.000000 341 +numer 1 49 3.044522 3.044522 369 +adapt 1 46 3.091042 3.091042 387 +map 1 39 3.258097 3.258097 452 +field 1 37 3.332205 3.332205 482 +domain 1 30 3.555348 3.555348 564 +compon 0 30 3.555348 0.000000 570 +load 1 28 3.610918 3.610918 601 +challeng 0 26 3.688879 0.000000 653 +altern 0 26 3.688879 0.000000 641 +task 0 25 3.737670 0.000000 678 +known 0 24 3.761200 0.000000 702 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +geometri 0 22 3.850148 0.000000 752 +siam 0 21 3.912023 0.000000 800 +wang 0 21 3.912023 0.000000 790 +portabl 0 20 3.951244 0.000000 819 +binari 0 20 3.951244 0.000000 823 +toolkit 0 20 3.951244 0.000000 835 +runtim 1 19 4.007333 4.007333 858 +partial 1 18 4.060443 4.060443 900 +former 0 17 4.110874 0.000000 956 +differenti 0 17 4.110874 0.000000 921 +partit 0 16 4.174387 0.000000 984 +balanc 1 14 4.317488 4.317488 1112 +incomput 0 14 4.317488 0.000000 1096 +menu 0 13 4.382027 0.000000 1156 +iter 1 12 4.465908 4.465908 1206 +touch 0 12 4.465908 0.000000 1288 +characterist 0 12 4.465908 0.000000 1257 +rice 1 11 4.553877 4.553877 1336 +multithread 0 11 4.553877 0.000000 1315 +black 1 10 4.653960 4.653960 1418 +purdu 0 10 4.653960 0.000000 1466 +decomposit 0 10 4.653960 0.000000 1439 +rhode 0 9 4.753590 0.000000 1579 +sensit 0 8 4.875197 0.000000 1726 +vineet 0 8 4.875197 0.000000 1639 +watson 0 8 4.875197 0.000000 1691 +yang 0 8 4.875197 0.000000 1652 +univeristi 0 8 4.875197 0.000000 1754 +multicomput 1 7 5.010635 5.010635 1890 +solver 1 7 5.010635 5.010635 1911 +thompson 0 6 5.164786 0.000000 2049 +heurist 0 6 5.164786 0.000000 2125 +hole 1 5 5.347108 5.347108 2518 +fluid 1 5 5.347108 5.347108 2440 +grand 0 5 5.347108 0.000000 2425 +niko 1 4 5.568345 5.568345 2637 +bernoulli 0 4 5.568345 0.000000 2955 +kodukula 0 4 5.568345 0.000000 2640 +indupraka 0 4 5.568345 0.000000 2639 +pingali 0 4 5.568345 0.000000 2956 +contemporari 0 4 5.568345 0.000000 2719 +colorado 0 4 5.568345 0.000000 2938 +knight 0 4 5.568345 0.000000 2728 +richter 0 4 5.568345 0.000000 2957 +architecur 0 3 5.857933 0.000000 3448 +ahuja 0 3 5.857933 0.000000 3494 +ctctr 0 3 5.857933 0.000000 3625 +imac 0 3 5.857933 0.000000 3718 +brunswick 0 3 5.857933 0.000000 3567 +mimd 0 3 5.857933 0.000000 3361 +chrisochoid 2 2 6.263398 12.526796 5237 +grid 1 2 6.263398 6.263398 4228 +facet 0 2 6.263398 0.000000 4687 +prema 0 2 6.263398 0.000000 5238 +kale 0 2 6.263398 0.000000 4545 +key 0 2 6.263398 0.000000 5057 +aiaa 0 2 6.263398 0.000000 5239 +moscow 0 2 6.263398 0.000000 4884 +programmingenviron 0 2 6.263398 0.000000 5240 +and 0 2 6.263398 0.000000 5241 +nikosc 0 2 6.263398 0.000000 5242 +housti 1 1 6.957497 6.957497 11334 +ellpack 1 1 6.957497 6.957497 11335 +sukup 1 1 6.957497 6.957497 11336 +mississippi 1 1 6.957497 6.957497 11337 +papachi 1 1 6.957497 6.957497 11338 +florian 0 1 6.957497 0.000000 11339 +reza 0 1 6.957497 0.000000 11340 +behforooz 0 1 6.957497 0.000000 11341 +animesh 0 1 6.957497 0.000000 11342 +chatterje 0 1 6.957497 0.000000 11343 +rajani 0 1 6.957497 0.000000 11344 +vaidyanathan 0 1 6.957497 0.000000 11345 +bowyer 0 1 6.957497 0.000000 11346 +offifth 0 1 6.957497 0.000000 11347 +kohl 0 1 6.957497 0.000000 11348 +yellick 0 1 6.957497 0.000000 11349 +unstructur 0 1 6.957497 0.000000 11350 +collid 0 1 6.957497 0.000000 11351 +haupt 0 1 6.957497 0.000000 11352 +scalableparallel 0 1 6.957497 0.000000 11353 +engineeringresearch 0 1 6.957497 0.000000 11354 +parallelhardwar 0 1 6.957497 0.000000 11355 +differentialequ 0 1 6.957497 0.000000 11356 +vichnevetski 0 1 6.957497 0.000000 11357 +decompos 0 1 6.957497 0.000000 11358 +kortesi 0 1 6.957497 0.000000 11359 +domaindecomposit 0 1 6.957497 0.000000 11360 +ussr 0 1 6.957497 0.000000 11361 +glowinski 0 1 6.957497 0.000000 11362 +karathanas 0 1 6.957497 0.000000 11363 +samartzi 0 1 6.957497 0.000000 11364 +vavali 0 1 6.957497 0.000000 11365 +weerawarana 0 1 6.957497 0.000000 11366 +onsupercomput 0 1 6.957497 0.000000 11367 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..1c36a9a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +cornel 1 215 1.386294 1.386294 23 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +parallel 1 169 1.791759 1.791759 60 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +relat 0 139 1.945910 0.000000 68 +compil 1 122 2.079442 2.079442 96 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +center 0 88 2.397895 0.000000 158 +environ 1 84 2.484907 2.484907 177 +institut 0 84 2.484907 0.000000 187 +build 0 85 2.484907 0.000000 184 +journal 0 83 2.484907 0.000000 183 +dynam 1 76 2.564949 2.564949 194 +solv 0 73 2.639057 0.000000 234 +appli 0 71 2.639057 0.000000 226 +copi 0 63 2.772589 0.000000 284 +automat 0 61 2.833213 0.000000 306 +share 0 59 2.833213 0.000000 304 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +scientif 0 53 2.944439 0.000000 341 +numer 0 49 3.044522 0.000000 369 +adapt 0 46 3.091042 0.000000 387 +execut 0 45 3.135494 0.000000 404 +mechan 0 43 3.178054 0.000000 416 +multi 0 36 3.367296 0.000000 493 +copyright 0 36 3.367296 0.000000 495 +global 0 34 3.401197 0.000000 520 +load 1 28 3.610918 3.610918 601 +task 0 25 3.737670 0.000000 678 +thread 0 23 3.806662 0.000000 722 +varieti 0 22 3.850148 0.000000 740 +portabl 1 20 3.951244 3.951244 819 +runtim 1 19 4.007333 4.007333 858 +style 0 15 4.248495 0.000000 1036 +balanc 1 14 4.317488 4.317488 1112 +target 0 12 4.465908 0.000000 1282 +multithread 0 11 4.553877 0.000000 1315 +consortium 0 10 4.653960 0.000000 1467 +port 1 8 4.875197 4.875197 1766 +multicomput 1 7 5.010635 5.010635 1890 +niko 1 4 5.568345 5.568345 2637 +ctctr 0 3 5.857933 0.000000 3625 +prema 1 2 6.263398 6.263398 5238 +chrisochoid 1 2 6.263398 6.263398 5237 +suppot 0 2 6.263398 0.000000 5243 +nikosc 0 2 6.263398 0.000000 5242 +andproblem 0 1 6.957497 0.000000 11368 +computingappl 0 1 6.957497 0.000000 11369 +pdecomput 0 1 6.957497 0.000000 11370 +pcrc 0 1 6.957497 0.000000 11371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..d507e68c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +welcom 0 122 2.079442 0.000000 99 +look 1 107 2.197225 2.197225 115 +world 0 115 2.197225 0.000000 126 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +real 0 93 2.397895 0.000000 144 +learn 0 86 2.484907 0.000000 170 +java 0 70 2.708050 0.000000 248 +visit 0 63 2.772589 0.000000 288 +written 0 63 2.772589 0.000000 278 +locat 0 59 2.833213 0.000000 303 +game 0 36 3.367296 0.000000 498 +photo 0 31 3.496508 0.000000 561 +galleri 0 13 4.382027 0.000000 1192 +rest 0 12 4.465908 0.000000 1259 +invit 0 10 4.653960 0.000000 1428 +nuprl 0 10 4.653960 0.000000 1402 +sundai 0 10 4.653960 0.000000 1387 +tire 0 4 5.568345 0.000000 2799 +cyberspac 0 3 5.857933 0.000000 3719 +pavel 0 2 6.263398 0.000000 4164 +cinema 0 2 6.263398 0.000000 5244 +naumov 0 1 6.957497 0.000000 11372 +orplai 0 1 6.957497 0.000000 11373 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..d0b8d2c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,195 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +algorithm 2 162 1.791759 3.583518 57 +parallel 2 169 1.791759 3.583518 60 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +intern 1 108 2.197225 2.197225 128 +find 1 111 2.197225 2.197225 111 +structur 1 106 2.197225 2.197225 105 +place 0 106 2.197225 0.000000 124 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +select 0 91 2.397895 0.000000 154 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +requir 0 81 2.484907 0.000000 167 +wide 0 84 2.484907 0.000000 185 +thing 0 84 2.484907 0.000000 189 +chang 0 82 2.484907 0.000000 163 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +upson 0 71 2.639057 0.000000 218 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +simul 0 66 2.708050 0.000000 255 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +simpl 0 60 2.833213 0.000000 298 +publish 1 57 2.890372 2.890372 326 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +investig 0 51 2.995732 0.000000 353 +hardwar 0 51 2.995732 0.000000 350 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +could 0 46 3.091042 0.000000 383 +video 0 44 3.135494 0.000000 405 +long 0 43 3.178054 0.000000 413 +offer 0 43 3.178054 0.000000 414 +vision 1 41 3.218876 3.218876 430 +fast 1 42 3.218876 3.218876 429 +linear 1 41 3.218876 3.218876 431 +futur 0 41 3.218876 0.000000 427 +press 0 42 3.218876 0.000000 419 +york 0 41 3.218876 0.000000 435 +theoret 1 39 3.258097 3.258097 446 +realli 1 40 3.258097 3.258097 444 +must 0 40 3.258097 0.000000 442 +societi 0 40 3.258097 0.000000 456 +transact 0 39 3.258097 0.000000 438 +connect 1 37 3.332205 3.332205 485 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +singl 1 34 3.401197 3.401197 510 +tech 1 35 3.401197 3.401197 515 +global 1 34 3.401197 3.401197 520 +taken 0 31 3.496508 0.000000 555 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +multiprocessor 1 28 3.610918 3.610918 605 +becom 0 28 3.610918 0.000000 603 +campu 0 27 3.637586 0.000000 623 +challeng 0 26 3.688879 0.000000 653 +notic 0 25 3.737670 0.000000 675 +scalabl 0 24 3.761200 0.000000 705 +universityithaca 0 24 3.761200 0.000000 710 +highli 0 23 3.806662 0.000000 725 +sequenti 0 22 3.850148 0.000000 745 +chip 0 21 3.912023 0.000000 770 +vlsi 0 21 3.912023 0.000000 795 +alloc 1 20 3.951244 3.951244 821 +prepar 0 20 3.951244 0.000000 824 +exploit 0 20 3.951244 0.000000 836 +region 1 19 4.007333 4.007333 875 +dimension 0 18 4.060443 0.000000 909 +speed 0 18 4.060443 0.000000 911 +element 0 18 4.060443 0.000000 895 +regist 1 17 4.110874 4.110874 938 +estim 0 17 4.110874 0.000000 930 +sept 0 17 4.110874 0.000000 952 +spatial 0 16 4.174387 0.000000 988 +reflect 0 15 4.248495 0.000000 1034 +near 0 14 4.317488 0.000000 1091 +polynomi 0 14 4.317488 0.000000 1069 +believ 1 13 4.382027 4.382027 1187 +johnson 1 13 4.382027 4.382027 1162 +sigplan 1 13 4.382027 4.382027 1190 +mesh 1 11 4.553877 4.553877 1351 +desktop 0 10 4.653960 0.000000 1445 +placement 0 10 4.653960 0.000000 1420 +cryptographi 0 9 4.753590 0.000000 1512 +realiz 1 8 4.875197 4.875197 1739 +perhap 0 8 4.875197 0.000000 1693 +attent 0 8 4.875197 0.000000 1651 +character 0 8 4.875197 0.000000 1767 +entri 0 8 4.875197 0.000000 1678 +pldi 0 8 4.875197 0.000000 1704 +irregular 0 8 4.875197 0.000000 1768 +hallcornel 0 8 4.875197 0.000000 1757 +maxim 1 7 5.010635 5.010635 1944 +henc 0 7 5.010635 0.000000 1805 +pursu 0 7 5.010635 0.000000 1902 +sensor 0 7 5.010635 0.000000 1920 +hidden 0 6 5.164786 0.000000 1987 +feasibl 0 6 5.164786 0.000000 2157 +cellular 0 5 5.347108 0.000000 2433 +grand 0 5 5.347108 0.000000 2425 +pingali 1 4 5.568345 5.568345 2956 +sold 0 4 5.568345 0.000000 2813 +compcon 0 4 5.568345 0.000000 2958 +zippel 0 4 5.568345 0.000000 2879 +neumann 1 3 5.857933 5.857933 3720 +simd 1 3 5.857933 5.857933 3360 +lattic 0 3 5.857933 0.000000 3721 +parallelmachin 0 3 5.857933 0.000000 3693 +arm 0 3 5.857933 0.000000 3697 +exit 0 3 5.857933 0.000000 3124 +usaemail 0 3 5.857933 0.000000 3722 +pearson 2 2 6.263398 12.526796 5245 +crystal 0 2 6.263398 0.000000 5013 +molecul 0 2 6.263398 0.000000 5246 +succe 0 2 6.263398 0.000000 5214 +vazirani 1 1 6.957497 6.957497 11374 +bipartit 1 1 6.957497 6.957497 11375 +consistingof 0 1 6.957497 0.000000 11376 +theubiquit 0 1 6.957497 0.000000 11377 +heed 0 1 6.957497 0.000000 11378 +lawsof 0 1 6.957497 0.000000 11379 +layoutand 0 1 6.957497 0.000000 11380 +accomplishedbi 0 1 6.957497 0.000000 11381 +ihav 0 1 6.957497 0.000000 11382 +couldb 0 1 6.957497 0.000000 11383 +thisarchitectur 0 1 6.957497 0.000000 11384 +designfor 0 1 6.957497 0.000000 11385 +proteinstructur 0 1 6.957497 0.000000 11386 +parallelcomput 0 1 6.957497 0.000000 11387 +commodityand 0 1 6.957497 0.000000 11388 +architectureand 0 1 6.957497 0.000000 11389 +hideth 0 1 6.957497 0.000000 11390 +underlyingvon 0 1 6.957497 0.000000 11391 +architectureha 0 1 6.957497 0.000000 11392 +easyto 0 1 6.957497 0.000000 11393 +dunten 0 1 6.957497 0.000000 11394 +kiewit 0 1 6.957497 0.000000 11395 +pillai 0 1 6.957497 0.000000 11396 +irregularli 0 1 6.957497 0.000000 11397 +allerton 0 1 6.957497 0.000000 11398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..77f9245c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +cornel 2 215 1.386294 2.772588 23 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +contact 1 153 1.791759 1.791759 59 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +manag 0 114 2.197225 0.000000 125 +pleas 0 113 2.197225 0.000000 114 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +part 0 98 2.302585 0.000000 129 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +school 0 84 2.484907 0.000000 188 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +master 1 76 2.564949 2.564949 216 +server 1 76 2.564949 2.564949 204 +resum 0 79 2.564949 0.000000 217 +complet 0 77 2.564949 0.000000 208 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +java 1 70 2.708050 2.708050 248 +receiv 1 66 2.708050 2.708050 244 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +window 0 68 2.708050 0.000000 242 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +content 1 59 2.833213 2.833213 302 +colleg 1 61 2.833213 2.833213 300 +best 0 59 2.833213 0.000000 299 +semest 1 58 2.890372 2.890372 312 +found 0 53 2.944439 0.000000 337 +februari 0 54 2.944439 0.000000 328 +run 0 51 2.995732 0.000000 347 +case 0 51 2.995732 0.000000 351 +visitor 0 49 3.044522 0.000000 371 +still 0 50 3.044522 0.000000 362 +possibl 0 47 3.091042 0.000000 378 +favorit 1 44 3.135494 3.135494 410 +offer 0 43 3.178054 0.000000 414 +compani 0 41 3.218876 0.000000 423 +soon 0 36 3.367296 0.000000 494 +chapter 1 32 3.465736 3.465736 536 +taken 0 31 3.496508 0.000000 555 +abl 0 30 3.555348 0.000000 566 +quot 0 29 3.583519 0.000000 582 +becom 0 28 3.610918 0.000000 603 +hope 0 28 3.610918 0.000000 610 +administr 0 27 3.637586 0.000000 628 +request 0 26 3.688879 0.000000 635 +enabl 0 26 3.688879 0.000000 655 +enhanc 0 26 3.688879 0.000000 644 +client 1 25 3.737670 3.737670 679 +reach 0 24 3.761200 0.000000 688 +alwai 0 24 3.761200 0.000000 691 +cooper 0 22 3.850148 0.000000 757 +busi 0 21 3.912023 0.000000 784 +applet 1 20 3.951244 3.951244 827 +mpeg 0 20 3.951244 0.000000 831 +wonder 0 20 3.951244 0.000000 815 +log 0 19 4.007333 0.000000 857 +stock 0 16 4.174387 0.000000 1007 +todd 1 15 4.248495 4.248495 1051 +joint 0 13 4.382027 0.000000 1130 +johnson 0 13 4.382027 0.000000 1162 +brother 0 13 4.382027 0.000000 1189 +meng 1 12 4.465908 4.465908 1214 +round 0 8 4.875197 0.000000 1769 +presid 0 6 5.164786 0.000000 2196 +quickli 0 6 5.164786 0.000000 2000 +classroom 0 6 5.164786 0.000000 2006 +microsystem 0 6 5.164786 0.000000 2160 +junior 0 5 5.347108 0.000000 2519 +supplement 0 5 5.347108 0.000000 2355 +suppli 0 4 5.568345 0.000000 2611 +tape 0 4 5.568345 0.000000 2959 +permiss 0 4 5.568345 0.000000 2642 +jointli 0 3 5.857933 0.000000 3118 +eduand 0 3 5.857933 0.000000 3452 +roll 0 3 5.857933 0.000000 3723 +espn 0 3 5.857933 0.000000 3724 +borrow 0 3 5.857933 0.000000 3725 +fratern 0 2 6.263398 0.000000 4979 +throughth 0 2 6.263398 0.000000 4065 +ticker 0 2 6.263398 0.000000 5247 +peskin 1 1 6.957497 6.957497 11399 +acacia 1 1 6.957497 6.957497 11400 +andyour 0 1 6.957497 0.000000 11401 +workeda 0 1 6.957497 0.000000 11402 +cornellundergradu 0 1 6.957497 0.000000 11403 +theirfield 0 1 6.957497 0.000000 11404 +isrun 0 1 6.957497 0.000000 11405 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..8d32849c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +cornel 1 215 1.386294 1.386294 23 +list 0 201 1.609438 0.000000 39 +read 1 154 1.791759 1.791759 47 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +mani 0 92 2.397895 0.000000 150 +stuff 1 87 2.484907 2.484907 171 +second 0 81 2.484907 0.000000 166 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +david 1 71 2.639057 2.639057 232 +upson 0 71 2.639057 0.000000 218 +ithaca 1 65 2.772589 2.772589 294 +without 0 50 3.044522 0.000000 370 +favorit 0 44 3.135494 0.000000 410 +howev 0 41 3.218876 0.000000 422 +approxim 0 35 3.401197 0.000000 509 +go 0 33 3.433987 0.000000 529 +quot 0 29 3.583519 0.000000 582 +great 0 27 3.637586 0.000000 626 +although 0 25 3.737670 0.000000 667 +citi 0 19 4.007333 0.000000 874 +otherwis 0 17 4.110874 0.000000 922 +alreadi 0 16 4.174387 0.000000 963 +month 0 15 4.248495 0.000000 1025 +dave 0 14 4.317488 0.000000 1098 +philadelphia 0 12 4.465908 0.000000 1244 +resid 0 10 4.653960 0.000000 1461 +shop 0 10 4.653960 0.000000 1469 +imposs 0 9 4.753590 0.000000 1513 +pittsburgh 1 7 5.010635 5.010635 1938 +pennsylvania 1 7 5.010635 5.010635 1932 +famou 0 6 5.164786 0.000000 2185 +pierc 1 4 5.568345 5.568345 2623 +outlet 0 2 6.263398 0.000000 5248 +valentin 0 1 6.957497 0.000000 11406 +familycurr 0 1 6.957497 0.000000 11407 +halfwai 0 1 6.957497 0.000000 11408 +andharrisburg 0 1 6.957497 0.000000 11409 +younev 0 1 6.957497 0.000000 11410 +sinceit 0 1 6.957497 0.000000 11411 +throughpittsburgh 0 1 6.957497 0.000000 11412 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..36333609 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +compil 1 122 2.079442 2.079442 96 +technolog 0 131 2.079442 0.000000 102 +theori 1 111 2.197225 2.197225 127 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +center 1 88 2.397895 2.397895 158 +present 1 91 2.397895 2.397895 145 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +level 0 87 2.484907 0.000000 180 +info 0 85 2.484907 0.000000 176 +april 0 77 2.564949 0.000000 196 +multimedia 0 68 2.708050 0.000000 258 +ithaca 0 65 2.772589 0.000000 294 +prof 0 64 2.772589 0.000000 273 +summer 1 56 2.890372 2.890372 311 +talk 1 53 2.944439 2.944439 336 +undergradu 0 54 2.944439 0.000000 338 +extens 0 53 2.944439 0.000000 340 +maintain 0 51 2.995732 0.000000 342 +paul 0 38 3.295837 0.000000 471 +seminar 0 38 3.295837 0.000000 470 +multi 0 36 3.367296 0.000000 493 +random 0 34 3.401197 0.000000 511 +taught 0 33 3.433987 0.000000 526 +transform 1 32 3.465736 3.465736 542 +given 0 32 3.465736 0.000000 538 +abl 1 30 3.555348 3.555348 566 +packag 1 28 3.610918 3.610918 614 +framework 0 28 3.610918 0.000000 606 +seri 0 24 3.761200 0.000000 708 +handl 0 24 3.761200 0.000000 685 +lab 0 24 3.761200 0.000000 698 +deal 0 22 3.850148 0.000000 736 +instal 0 22 3.850148 0.000000 754 +runtim 0 19 4.007333 0.000000 858 +affili 0 13 4.382027 0.000000 1194 +deriv 0 13 4.382027 0.000000 1145 +block 0 13 4.382027 0.000000 1183 +loop 1 11 4.553877 4.553877 1310 +vladimir 0 11 4.553877 0.000000 1324 +regard 0 11 4.553877 0.000000 1309 +prior 0 10 4.653960 0.000000 1438 +tradit 0 10 4.653960 0.000000 1404 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +madra 0 8 4.875197 0.000000 1770 +watson 0 8 4.875197 0.000000 1691 +keshav 0 7 5.010635 0.000000 1852 +tip 0 7 5.010635 0.000000 1863 +nest 1 6 5.164786 5.164786 2151 +dens 0 6 5.164786 0.000000 2122 +handi 0 6 5.164786 0.000000 2111 +czar 0 5 5.347108 0.000000 2503 +licens 0 5 5.347108 0.000000 2520 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +bernoulli 0 4 5.568345 0.000000 2955 +pingali 0 4 5.568345 0.000000 2956 +vijai 0 4 5.568345 0.000000 2960 +stodghil 0 4 5.568345 0.000000 2864 +trivial 0 4 5.568345 0.000000 2786 +dagstuhl 0 4 5.568345 0.000000 2871 +vliw 1 3 5.857933 5.857933 3514 +interplai 0 3 5.857933 0.000000 3726 +chelmsford 0 3 5.857933 0.000000 3564 +schloss 0 3 5.857933 0.000000 3727 +useof 0 3 5.857933 0.000000 3368 +andoper 0 3 5.857933 0.000000 3621 +praka 0 2 6.263398 0.000000 4155 +nawaaz 0 2 6.263398 0.000000 4153 +ahm 0 2 6.263398 0.000000 4154 +kotlyar 0 2 6.263398 0.000000 4907 +menon 0 2 6.263398 0.000000 5249 +imperfectli 1 1 6.957497 6.957497 11413 +tothat 0 1 6.957497 0.000000 11414 +andmultiprocessor 0 1 6.957497 0.000000 11415 +fromscientif 0 1 6.957497 0.000000 11416 +withibm 0 1 6.957497 0.000000 11417 +hasinterest 0 1 6.957497 0.000000 11418 +athp 0 1 6.957497 0.000000 11419 +wasabout 0 1 6.957497 0.000000 11420 +necess 0 1 6.957497 0.000000 11421 +looptransform 0 1 6.957497 0.000000 11422 +loopparallel 0 1 6.957497 0.000000 11423 +regardingdata 0 1 6.957497 0.000000 11424 +centric 0 1 6.957497 0.000000 11425 +availableund 0 1 6.957497 0.000000 11426 +departmentmachin 0 1 6.957497 0.000000 11427 +andfind 0 1 6.957497 0.000000 11428 +alsofind 0 1 6.957497 0.000000 11429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..188dd901 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +data 1 170 1.791759 1.791759 49 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +databas 1 122 2.079442 2.079442 86 +postscript 0 131 2.079442 0.000000 90 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +know 0 80 2.564949 0.000000 198 +upson 0 71 2.639057 0.000000 218 +order 0 69 2.708050 0.000000 249 +ithaca 1 65 2.772589 2.772589 294 +abstract 0 62 2.772589 0.000000 276 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +case 0 51 2.995732 0.000000 351 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +tree 0 36 3.367296 0.000000 492 +enhanc 0 26 3.688879 0.000000 644 +sequenc 0 23 3.806662 0.000000 734 +sigmod 0 19 4.007333 0.000000 877 +save 0 14 4.317488 0.000000 1099 +dbm 0 13 4.382027 0.000000 1136 +submiss 0 11 4.553877 0.000000 1298 +road 0 11 4.553877 0.000000 1374 +seshadri 1 7 5.010635 5.010635 1803 +praveen 1 6 5.164786 5.164786 1996 +green 0 4 5.568345 0.000000 2848 +predat 0 3 5.857933 0.000000 3135 +warren 0 3 5.857933 0.000000 3301 +packer 0 3 5.857933 0.000000 3728 +adt 0 1 6.957497 0.000000 11430 +ranjani 0 1 6.957497 0.000000 11431 +ramamurthi 0 1 6.957497 0.000000 11432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..4b108036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +project 2 340 1.098612 2.197224 18 +us 2 329 1.098612 2.197224 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +design 0 213 1.386294 0.000000 25 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +modifi 0 178 1.609438 0.000000 35 +data 3 170 1.791759 5.375277 49 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +relat 2 139 1.945910 3.891820 68 +model 2 145 1.945910 3.891820 69 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +databas 2 122 2.079442 4.158884 86 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +call 1 91 2.397895 2.397895 153 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +requir 1 81 2.484907 2.484907 167 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +optim 2 79 2.564949 5.129898 197 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +order 2 69 2.708050 5.416100 249 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +evalu 1 64 2.772589 2.772589 266 +import 1 65 2.772589 2.772589 282 +complex 1 64 2.772589 2.772589 269 +plan 1 65 2.772589 2.772589 272 +collect 1 65 2.772589 2.772589 268 +previou 1 62 2.772589 2.772589 290 +result 1 65 2.772589 2.772589 281 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +street 0 63 2.772589 0.000000 293 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +variou 1 56 2.890372 2.890372 317 +detail 1 57 2.890372 2.890372 321 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +sever 0 56 2.890372 0.000000 322 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 0 51 2.995732 0.000000 351 +set 1 50 3.044522 3.044522 361 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +could 1 46 3.091042 3.091042 383 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +answer 1 45 3.135494 3.135494 391 +natur 0 44 3.135494 0.000000 406 +algebra 0 45 3.135494 0.000000 394 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +even 0 45 3.135494 0.000000 393 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +form 1 39 3.258097 3.258097 443 +join 1 39 3.258097 3.258097 457 +map 1 39 3.258097 3.258097 452 +probabl 1 40 3.258097 3.258097 455 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +mean 0 37 3.332205 0.000000 477 +cost 0 37 3.332205 0.000000 480 +multi 0 36 3.367296 0.000000 493 +next 1 34 3.401197 3.401197 517 +singl 1 34 3.401197 3.401197 510 +either 0 35 3.401197 0.000000 506 +michael 0 35 3.401197 0.000000 514 +queri 3 33 3.433987 10.301961 524 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +posit 1 31 3.496508 3.496508 552 +storag 1 31 3.496508 3.496508 553 +scientist 0 31 3.496508 0.000000 560 +domain 1 30 3.555348 3.555348 564 +exist 1 30 3.555348 3.555348 569 +specifi 0 30 3.555348 0.000000 568 +built 1 29 3.583519 3.583519 592 +consid 0 29 3.583519 0.000000 590 +propos 1 28 3.610918 3.610918 602 +weather 0 28 3.610918 0.000000 618 +ask 0 28 3.610918 0.000000 597 +scale 0 28 3.610918 0.000000 613 +except 0 28 3.610918 0.000000 607 +framework 0 28 3.610918 0.000000 606 +client 1 25 3.737670 3.737670 679 +valu 0 25 3.737670 0.000000 665 +wai 0 25 3.737670 0.000000 662 +strategi 0 25 3.737670 0.000000 682 +demonstr 1 24 3.761200 3.761200 694 +store 1 24 3.761200 3.761200 693 +daili 0 24 3.761200 0.000000 706 +sequenc 3 23 3.806662 11.419986 734 +input 0 23 3.806662 0.000000 727 +thread 0 23 3.806662 0.000000 722 +defin 1 22 3.850148 3.850148 746 +sequenti 1 22 3.850148 3.850148 745 +sort 1 22 3.850148 3.850148 738 +serv 0 22 3.850148 0.000000 758 +identifi 0 22 3.850148 0.000000 760 +disk 0 22 3.850148 0.000000 747 +deal 0 22 3.850148 0.000000 736 +instead 0 22 3.850148 0.000000 756 +similar 1 21 3.912023 3.912023 771 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +record 2 18 4.060443 8.120886 890 +statu 0 18 4.060443 0.000000 885 +event 0 18 4.060443 0.000000 896 +account 0 18 4.060443 0.000000 882 +expand 1 17 4.110874 4.110874 928 +medic 0 17 4.110874 0.000000 958 +monitor 0 17 4.110874 0.000000 941 +weekli 0 17 4.110874 0.000000 919 +estim 0 17 4.110874 0.000000 930 +ramakrishnan 1 16 4.174387 4.174387 972 +advantag 1 16 4.174387 4.174387 987 +easi 0 16 4.174387 0.000000 969 +livni 1 15 4.248495 4.248495 1053 +indic 0 15 4.248495 0.000000 1013 +stream 0 15 4.248495 0.000000 1015 +miron 1 14 4.317488 4.317488 1110 +manner 1 14 4.317488 4.317488 1074 +embed 1 14 4.317488 4.317488 1102 +convent 0 14 4.317488 0.000000 1072 +econom 0 13 4.382027 0.000000 1184 +social 0 13 4.382027 0.000000 1123 +opportun 0 13 4.382027 0.000000 1161 +composit 0 13 4.382027 0.000000 1150 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +raghu 1 12 4.465908 4.465908 1212 +scan 1 12 4.465908 4.465908 1243 +buffer 1 12 4.465908 4.465908 1211 +insid 1 12 4.465908 4.465908 1262 +amount 0 12 4.465908 0.000000 1208 +uniqu 0 12 4.465908 0.000000 1228 +shore 1 11 4.553877 4.553877 1377 +regard 0 11 4.553877 0.000000 1309 +motiv 0 11 4.553877 0.000000 1346 +instanc 0 11 4.553877 0.000000 1322 +devis 1 10 4.653960 4.653960 1451 +relationship 0 10 4.653960 0.000000 1383 +reli 0 10 4.653960 0.000000 1411 +subset 0 10 4.653960 0.000000 1425 +vldb 0 10 4.653960 0.000000 1470 +cheng 0 10 4.653960 0.000000 1381 +declar 1 9 4.753590 4.753590 1526 +tempor 1 9 4.753590 4.753590 1584 +strength 1 9 4.753590 4.753590 1494 +compos 0 9 4.753590 0.000000 1527 +vice 0 9 4.753590 0.000000 1604 +lock 0 9 4.753590 0.000000 1551 +respect 0 9 4.753590 0.000000 1545 +meta 0 9 4.753590 0.000000 1505 +intermedi 0 9 4.753590 0.000000 1497 +observ 0 9 4.753590 0.000000 1578 +mode 0 9 4.753590 0.000000 1492 +pose 0 9 4.753590 0.000000 1535 +seshadri 1 7 5.010635 5.010635 1803 +pageth 0 7 5.010635 0.000000 1939 +notion 0 7 5.010635 0.000000 1947 +merg 0 7 5.010635 0.000000 1862 +whenev 0 7 5.010635 0.000000 1883 +therefor 0 7 5.010635 0.000000 1822 +supportfor 0 7 5.010635 0.000000 1854 +praveen 1 6 5.164786 5.164786 1996 +nest 1 6 5.164786 5.164786 2151 +feasibl 1 6 5.164786 5.164786 2157 +financi 0 6 5.164786 0.000000 2197 +histor 0 6 5.164786 0.000000 2085 +consequ 0 6 5.164786 0.000000 1989 +temporari 0 6 5.164786 0.000000 2090 +greater 1 5 5.347108 5.347108 2258 +treat 0 5 5.347108 0.000000 2521 +correl 0 5 5.347108 0.000000 2279 +dual 0 5 5.347108 0.000000 2522 +distinct 0 5 5.347108 0.000000 2319 +overlap 0 5 5.347108 0.000000 2368 +complementari 0 5 5.347108 0.000000 2523 +educomput 0 5 5.347108 0.000000 2524 +zoom 1 4 5.568345 5.568345 2961 +phenomena 1 4 5.568345 5.568345 2962 +flavor 1 4 5.568345 5.568345 2625 +richter 0 4 5.568345 0.000000 2957 +collaps 1 3 5.857933 5.857933 3729 +inadequ 0 3 5.857933 0.000000 3730 +tediou 0 3 5.857933 0.000000 3731 +ineffici 0 3 5.857933 0.000000 3457 +megabyt 0 3 5.857933 0.000000 3732 +claus 0 3 5.857933 0.000000 3733 +offset 0 3 5.857933 0.000000 3467 +hourli 0 3 5.857933 0.000000 3734 +thathav 0 3 5.857933 0.000000 3735 +serverarchitectur 0 3 5.857933 0.000000 3736 +comad 0 3 5.857933 0.000000 3737 +informationfor 0 3 5.857933 0.000000 3738 +sequin 2 2 6.263398 12.526796 5250 +earthquak 1 2 6.263398 6.263398 5251 +volcano 1 2 6.263398 6.263398 5252 +meteorolog 1 2 6.263398 6.263398 5253 +aredescrib 1 2 6.263398 6.263398 5254 +objectivescurr 0 2 6.263398 0.000000 5255 +statusmotiv 0 2 6.263398 0.000000 5256 +exampleseq 0 2 6.263398 0.000000 5257 +languageoptim 0 2 6.263398 0.000000 5258 +techniquesseq 0 2 6.263398 0.000000 5259 +developmentpublicationsrel 0 2 6.263398 0.000000 5260 +workcontact 0 2 6.263398 0.000000 5261 +informationproject 0 2 6.263398 0.000000 5262 +processingof 0 2 6.263398 0.000000 5263 +theseappl 0 2 6.263398 0.000000 5264 +metereolog 0 2 6.263398 0.000000 5265 +andbiolog 0 2 6.263398 0.000000 5266 +semanticstak 0 2 6.263398 0.000000 5267 +evaluationintegr 0 2 6.263398 0.000000 5268 +canstor 0 2 6.263398 0.000000 5269 +sequencesthes 0 2 6.263398 0.000000 5270 +themost 0 2 6.263398 0.000000 5271 +statusth 0 2 6.263398 0.000000 5272 +algebraicqueri 0 2 6.263398 0.000000 5273 +analogousto 0 2 6.263398 0.000000 5274 +candeclar 0 2 6.263398 0.000000 5275 +likesql 0 2 6.263398 0.000000 5276 +versa 0 2 6.263398 0.000000 5277 +querya 0 2 6.263398 0.000000 5278 +occurr 0 2 6.263398 0.000000 5279 +erupt 0 2 6.263398 0.000000 5280 +didth 0 2 6.263398 0.000000 5281 +groupbi 0 2 6.263398 0.000000 5282 +subqueri 0 2 6.263398 0.000000 5283 +aggregatefunct 0 2 6.263398 0.000000 5284 +sequencesord 0 2 6.263398 0.000000 5285 +modelth 0 2 6.263398 0.000000 5286 +gist 0 2 6.263398 0.000000 5287 +ordereddomain 0 2 6.263398 0.000000 5288 +andposit 0 2 6.263398 0.000000 5289 +recordsmap 0 2 6.263398 0.000000 5290 +rise 0 2 6.263398 0.000000 5291 +relationaloper 0 2 6.263398 0.000000 5292 +andaggreg 0 2 6.263398 0.000000 5293 +researchersin 0 2 6.263398 0.000000 5294 +movingaggreg 0 2 6.263398 0.000000 5295 +worldsitu 0 2 6.263398 0.000000 5296 +extensionof 0 2 6.263398 0.000000 5297 +ofseq 0 2 6.263398 0.000000 5298 +languagew 0 2 6.263398 0.000000 5299 +usingwhich 0 2 6.263398 0.000000 5300 +languagei 0 2 6.263398 0.000000 5301 +queriesa 0 2 6.263398 0.000000 5302 +techniquesw 0 2 6.263398 0.000000 5303 +developmentth 0 2 6.263398 0.000000 5304 +viaa 0 2 6.263398 0.000000 5305 +ontop 0 2 6.263398 0.000000 5306 +languageswhich 0 2 6.263398 0.000000 5307 +arbitrarylevel 0 2 6.263398 0.000000 5308 +viceversa 0 2 6.263398 0.000000 5309 +detailson 0 2 6.263398 0.000000 5310 +publicationssequ 0 2 6.263398 0.000000 5311 +datapraveen 0 2 6.263398 0.000000 5312 +systempraveen 0 2 6.263398 0.000000 5313 +queriesraghu 0 2 6.263398 0.000000 5314 +workthedevis 0 2 6.263398 0.000000 5315 +visualizationenviron 0 2 6.263398 0.000000 5316 +servercontact 0 2 6.263398 0.000000 5317 +eduraghu 0 2 6.263398 0.000000 5318 +edumiron 0 2 6.263398 0.000000 5319 +seshadripraveen 0 2 6.263398 0.000000 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..37f0139b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +advanc 0 99 2.302585 0.000000 130 +member 0 84 2.484907 0.000000 165 +upson 0 71 2.639057 0.000000 218 +august 0 66 2.708050 0.000000 257 +knowledg 0 67 2.708050 0.000000 243 +reason 0 57 2.890372 0.000000 318 +taken 0 31 3.496508 0.000000 555 +fellow 1 24 3.761200 3.761200 701 +germani 0 17 4.110874 0.000000 946 +stori 0 14 4.317488 0.000000 1087 +german 0 6 5.164786 0.000000 2190 +exchang 0 5 5.347108 0.000000 2310 +fulbright 0 4 5.568345 0.000000 2963 +karlsruh 0 3 5.857933 0.000000 3689 +ralph 1 1 6.957497 6.957497 11433 +benzingerralph 0 1 6.957497 0.000000 11434 +benzingerw 0 1 6.957497 0.000000 11435 +sich 0 1 6.957497 0.000000 11436 +seinen 0 1 6.957497 0.000000 11437 +lorbeeren 0 1 6.957497 0.000000 11438 +ausruht 0 1 6.957497 0.000000 11439 +trgt 0 1 6.957497 0.000000 11440 +derfalschen 0 1 6.957497 0.000000 11441 +stell 0 1 6.957497 0.000000 11442 +studienstiftung 0 1 6.957497 0.000000 11443 +deutschen 0 1 6.957497 0.000000 11444 +volk 0 1 6.957497 0.000000 11445 +siemen 0 1 6.957497 0.000000 11446 +international 0 1 6.957497 0.000000 11447 +studentenkrei 0 1 6.957497 0.000000 11448 +alumnusat 0 1 6.957497 0.000000 11449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..9082d4d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,292 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +us 2 329 1.098612 2.197224 16 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +first 1 140 1.945910 1.945910 71 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +construct 0 139 1.945910 0.000000 82 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +postscript 4 131 2.079442 8.317768 90 +document 1 121 2.079442 2.079442 89 +structur 1 106 2.197225 2.197225 105 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +specif 1 106 2.197225 2.197225 106 +look 0 107 2.197225 0.000000 115 +place 0 106 2.197225 0.000000 124 +well 0 109 2.197225 0.000000 121 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +peopl 0 96 2.302585 0.000000 132 +take 0 97 2.302585 0.000000 134 +section 3 94 2.397895 7.193685 149 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +imag 0 91 2.397895 0.000000 161 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +contain 1 81 2.484907 2.484907 174 +school 0 84 2.484907 0.000000 188 +exampl 2 77 2.564949 5.129898 195 +refer 1 78 2.564949 2.564949 203 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +complet 0 77 2.564949 0.000000 208 +good 0 77 2.564949 0.000000 200 +interfac 0 79 2.564949 0.000000 209 +name 1 72 2.639057 2.639057 220 +integr 1 67 2.708050 2.708050 245 +would 0 67 2.708050 0.000000 251 +complex 1 64 2.772589 2.772589 269 +written 1 63 2.772589 2.772589 278 +guid 0 63 2.772589 0.000000 267 +abstract 0 62 2.772589 0.000000 276 +interact 0 62 2.772589 0.000000 270 +simpl 1 60 2.833213 2.833213 298 +space 1 57 2.890372 2.890372 310 +thesi 0 57 2.890372 0.000000 327 +overview 0 56 2.890372 0.000000 323 +sever 0 56 2.890372 0.000000 322 +talk 1 53 2.944439 2.944439 336 +allow 1 53 2.944439 2.944439 333 +three 1 54 2.944439 2.944439 330 +suggest 0 53 2.944439 0.000000 331 +digit 0 52 2.995732 0.000000 348 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +format 1 48 3.044522 3.044522 356 +visual 1 48 3.044522 3.044522 372 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +effect 1 46 3.091042 3.091042 385 +move 1 47 3.091042 3.091042 382 +featur 0 46 3.091042 0.000000 386 +made 1 44 3.135494 3.135494 398 +algebra 1 45 3.135494 3.135494 394 +even 0 45 3.135494 0.000000 393 +show 0 43 3.178054 0.000000 417 +long 0 43 3.178054 0.000000 413 +continu 1 39 3.258097 3.258097 448 +probabl 0 40 3.258097 0.000000 455 +error 0 40 3.258097 0.000000 449 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +short 0 36 3.367296 0.000000 499 +tree 0 36 3.367296 0.000000 492 +soon 0 36 3.367296 0.000000 494 +approxim 0 35 3.401197 0.000000 509 +print 0 34 3.401197 0.000000 503 +singl 0 34 3.401197 0.000000 510 +express 2 32 3.465736 6.931472 540 +independ 1 32 3.465736 3.465736 548 +concept 0 32 3.465736 0.000000 537 +human 0 32 3.465736 0.000000 546 +taken 1 31 3.496508 3.496508 555 +titl 0 31 3.496508 0.000000 556 +produc 2 30 3.555348 7.110696 572 +power 1 30 3.555348 3.555348 573 +hard 1 30 3.555348 3.555348 563 +compon 0 30 3.555348 0.000000 570 +limit 0 29 3.583519 0.000000 585 +progress 1 28 3.610918 3.610918 598 +load 0 28 3.610918 0.000000 601 +full 0 28 3.610918 0.000000 615 +symbol 0 27 3.637586 0.000000 620 +determin 0 27 3.637586 0.000000 630 +enhanc 0 26 3.688879 0.000000 644 +rule 0 26 3.688879 0.000000 638 +enabl 0 26 3.688879 0.000000 655 +challeng 0 26 3.688879 0.000000 653 +request 0 26 3.688879 0.000000 635 +constraint 0 26 3.688879 0.000000 636 +notic 2 25 3.737670 7.475340 675 +demonstr 1 24 3.761200 3.761200 694 +interpret 1 24 3.761200 3.761200 686 +seri 0 24 3.761200 0.000000 708 +variabl 1 23 3.806662 3.806662 715 +input 0 23 3.806662 0.000000 727 +initi 0 23 3.806662 0.000000 717 +brows 0 23 3.806662 0.000000 726 +equat 0 23 3.806662 0.000000 724 +size 0 23 3.806662 0.000000 713 +head 0 23 3.806662 0.000000 732 +begin 0 23 3.806662 0.000000 716 +sequenti 0 22 3.850148 0.000000 745 +defin 0 22 3.850148 0.000000 746 +reduc 0 22 3.850148 0.000000 759 +identifi 0 22 3.850148 0.000000 760 +output 1 21 3.912023 3.912023 788 +voic 0 21 3.912023 0.000000 806 +util 0 21 3.912023 0.000000 774 +hypertext 0 19 4.007333 0.000000 865 +left 0 19 4.007333 0.000000 851 +separ 0 19 4.007333 0.000000 844 +listen 1 18 4.060443 4.060443 907 +demo 1 18 4.060443 4.060443 888 +along 1 18 4.060443 4.060443 878 +record 0 18 4.060443 0.000000 890 +dimension 0 18 4.060443 0.000000 909 +element 0 18 4.060443 0.000000 895 +lower 0 18 4.060443 0.000000 886 +render 2 17 4.110874 8.221748 947 +matrix 0 17 4.110874 0.000000 933 +choic 0 16 4.174387 0.000000 979 +cognit 0 16 4.174387 0.000000 986 +took 0 16 4.174387 0.000000 1010 +upon 0 16 4.174387 0.000000 978 +piec 0 15 4.248495 0.000000 1020 +later 0 15 4.248495 0.000000 1043 +audio 4 14 4.317488 17.269952 1094 +latex 4 14 4.317488 17.269952 1064 +attribut 0 14 4.317488 0.000000 1092 +squar 0 14 4.317488 0.000000 1082 +shown 0 14 4.317488 0.000000 1080 +context 1 13 4.382027 4.382027 1153 +emac 0 13 4.382027 0.000000 1143 +directli 0 13 4.382027 0.000000 1141 +reader 1 12 4.465908 4.465908 1246 +speak 1 12 4.465908 4.465908 1283 +calcul 1 12 4.465908 4.465908 1268 +speech 0 12 4.465908 0.000000 1222 +shape 0 12 4.465908 0.000000 1245 +typic 0 11 4.553877 0.000000 1360 +ofcomput 0 10 4.653960 0.000000 1442 +donald 0 9 4.753590 0.000000 1510 +notat 0 9 4.753590 0.000000 1489 +imposs 0 9 4.753590 0.000000 1513 +distanc 0 9 4.753590 0.000000 1500 +equival 0 9 4.753590 0.000000 1496 +cross 1 8 4.875197 4.875197 1703 +root 0 8 4.875197 0.000000 1650 +wire 0 8 4.875197 0.000000 1747 +illustr 0 8 4.875197 0.000000 1679 +forget 0 8 4.875197 0.000000 1712 +replac 0 8 4.875197 0.000000 1668 +dimens 1 7 5.010635 5.010635 1930 +stereo 1 7 5.010635 5.010635 1818 +hear 1 7 5.010635 5.010635 1940 +dedic 0 7 5.010635 0.000000 1843 +notion 0 7 5.010635 0.000000 1947 +nest 1 6 5.164786 5.164786 2151 +vari 1 6 5.164786 5.164786 2001 +difficult 0 6 5.164786 0.000000 2035 +quick 0 6 5.164786 0.000000 2184 +heurist 0 6 5.164786 0.000000 2125 +chosen 0 6 5.164786 0.000000 1984 +meant 0 6 5.164786 0.000000 2055 +fraction 1 5 5.347108 5.347108 2259 +recogn 1 5 5.347108 5.347108 2302 +quantifi 1 5 5.347108 5.347108 2525 +substitut 1 5 5.347108 5.347108 2247 +mutual 0 5 5.347108 0.000000 2418 +raman 1 4 5.568345 5.568345 2827 +inlin 0 4 5.568345 0.000000 2964 +encod 0 4 5.568345 0.000000 2929 +paus 0 4 5.568345 0.000000 2965 +orthogon 0 4 5.568345 0.000000 2832 +compris 0 4 5.568345 0.000000 2862 +vital 0 4 5.568345 0.000000 2733 +ident 0 4 5.568345 0.000000 2826 +customiz 0 4 5.568345 0.000000 2966 +trick 0 4 5.568345 0.000000 2967 +thati 0 4 5.568345 0.000000 2616 +heard 0 4 5.568345 0.000000 2895 +formula 1 3 5.857933 5.857933 3405 +percept 1 3 5.857933 5.857933 3739 +subscript 1 3 5.857933 5.857933 3469 +tripl 1 3 5.857933 5.857933 3160 +meaning 1 3 5.857933 5.857933 3458 +blind 0 3 5.857933 0.000000 3662 +forthes 0 3 5.857933 0.000000 3199 +experienc 0 3 5.857933 0.000000 3203 +exponenti 0 3 5.857933 0.000000 3529 +thetim 0 3 5.857933 0.000000 3581 +proper 0 3 5.857933 0.000000 3323 +orpostscript 0 3 5.857933 0.000000 3329 +convei 2 2 6.263398 12.526796 4690 +spoken 1 2 6.263398 6.263398 5122 +succinctli 1 2 6.263398 6.263398 4275 +monoton 1 2 6.263398 6.263398 5321 +logarithm 1 2 6.263398 6.263398 5322 +expon 0 2 6.263398 0.000000 5323 +absenc 0 2 6.263398 0.000000 4878 +oppos 0 2 6.263398 0.000000 4855 +ofintegr 0 2 6.263398 0.000000 5324 +summat 0 2 6.263398 0.000000 5325 +referenc 0 2 6.263398 0.000000 4757 +justa 0 2 6.263398 0.000000 5326 +glori 0 2 6.263398 0.000000 5327 +aster 3 1 6.957497 20.872491 11450 +bruno 1 1 6.957497 6.957497 11451 +superscript 1 1 6.957497 6.957497 11452 +knuth 1 1 6.957497 6.957497 11453 +unambigu 1 1 6.957497 6.957497 11454 +inton 1 1 6.957497 6.957497 11455 +intermix 1 1 6.957497 6.957497 11456 +demonstrationi 0 1 6.957497 0.000000 11457 +forrend 0 1 6.957497 0.000000 11458 +myphd 0 1 6.957497 0.000000 11459 +dectalk 0 1 6.957497 0.000000 11460 +mulaw 0 1 6.957497 0.000000 11461 +mono 0 1 6.957497 0.000000 11462 +dvip 0 1 6.957497 0.000000 11463 +andround 0 1 6.957497 0.000000 11464 +faad 0 1 6.957497 0.000000 11465 +casey 0 1 6.957497 0.000000 11466 +examplessinc 0 1 6.957497 0.000000 11467 +inflect 0 1 6.957497 0.000000 11468 +toconvei 0 1 6.957497 0.000000 11469 +renderingsub 0 1 6.957497 0.000000 11470 +audiost 0 1 6.957497 0.000000 11471 +dimensionus 0 1 6.957497 0.000000 11472 +verbatim 0 1 6.957497 0.000000 11473 +layoutoper 0 1 6.957497 0.000000 11474 +verydiffer 0 1 6.957497 0.000000 11475 +monotonicchang 0 1 6.957497 0.000000 11476 +trigonometr 0 1 6.957497 0.000000 11477 +ambigu 0 1 6.957497 0.000000 11478 +parenthesi 0 1 6.957497 0.000000 11479 +asexpon 0 1 6.957497 0.000000 11480 +isfulli 0 1 6.957497 0.000000 11481 +innocu 0 1 6.957497 0.000000 11482 +mostdifficult 0 1 6.957497 0.000000 11483 +theintegr 0 1 6.957497 0.000000 11484 +ofhuman 0 1 6.957497 0.000000 11485 +ofcross 0 1 6.957497 0.000000 11486 +referenceableobject 0 1 6.957497 0.000000 11487 +latercross 0 1 6.957497 0.000000 11488 +followingdeepli 0 1 6.957497 0.000000 11489 +fledgedsymbol 0 1 6.957497 0.000000 11490 +thematrix 0 1 6.957497 0.000000 11491 +commenc 0 1 6.957497 0.000000 11492 +aseach 0 1 6.957497 0.000000 11493 +secondsto 0 1 6.957497 0.000000 11494 +spacenot 0 1 6.957497 0.000000 11495 +changeth 0 1 6.957497 0.000000 11496 +techniquefor 0 1 6.957497 0.000000 11497 +renderingsconvei 0 1 6.957497 0.000000 11498 +thesub 0 1 6.957497 0.000000 11499 +denomin 0 1 6.957497 0.000000 11500 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..a3fcd858 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +cornel 1 215 1.386294 1.386294 23 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +parallel 1 169 1.791759 1.791759 60 +process 1 142 1.945910 1.945910 72 +confer 1 126 2.079442 2.079442 100 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +check 1 115 2.197225 2.197225 118 +theori 1 111 2.197225 2.197225 127 +intern 0 108 2.197225 0.000000 128 +octob 1 89 2.397895 2.397895 156 +learn 1 86 2.484907 2.484907 170 +ieee 1 86 2.484907 2.484907 190 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +effici 0 73 2.639057 0.000000 233 +test 1 66 2.708050 2.708050 252 +august 0 66 2.708050 0.000000 257 +foundat 1 62 2.772589 2.772589 286 +ithaca 0 65 2.772589 0.000000 294 +function 0 62 2.772589 0.000000 275 +juli 0 60 2.833213 0.000000 305 +extens 0 53 2.944439 0.000000 340 +without 0 50 3.044522 0.000000 370 +linear 0 41 3.218876 0.000000 431 +theoret 0 39 3.258097 0.000000 446 +correct 0 38 3.295837 0.000000 462 +approxim 1 35 3.401197 3.401197 509 +bound 0 26 3.688879 0.000000 659 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +self 1 22 3.850148 3.850148 761 +polynomi 0 14 4.317488 0.000000 1069 +squar 0 14 4.317488 0.000000 1082 +branch 0 11 4.553877 0.000000 1318 +kumar 1 9 4.753590 4.753590 1506 +russel 0 9 4.753590 0.000000 1507 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +combinator 1 4 5.568345 5.568345 2915 +bottleneck 0 4 5.568345 0.000000 2769 +shah 0 4 5.568345 0.000000 2814 +ravi 1 3 5.857933 5.857933 3185 +funda 1 3 5.857933 5.857933 3645 +recurr 0 3 5.857933 0.000000 3740 +latin 0 3 5.857933 0.000000 3741 +sundaram 0 3 5.857933 0.000000 3463 +ramachandran 0 3 5.857933 0.000000 3742 +lnc 1 2 6.263398 6.263398 5085 +width 0 2 6.263398 0.000000 5328 +alexand 0 2 6.263398 0.000000 5329 +uumln 1 1 6.957497 6.957497 11501 +sivakumar 1 1 6.957497 6.957497 11502 +jeyakumar 0 1 6.957497 0.000000 11503 +muthukumarasami 0 1 6.957497 0.000000 11504 +umakishor 0 1 6.957497 0.000000 11505 +gautam 0 1 6.957497 0.000000 11506 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..08bfee5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +also 1 259 1.386294 1.386294 28 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +note 0 142 1.945910 0.000000 67 +lectur 0 135 1.945910 0.000000 73 +confer 1 126 2.079442 2.079442 100 +introduct 1 126 2.079442 2.079442 87 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +teach 1 108 2.197225 2.197225 112 +access 0 102 2.302585 0.000000 136 +imag 1 91 2.397895 2.397895 161 +search 0 95 2.397895 0.000000 155 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +librari 0 87 2.484907 0.000000 181 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +workshop 1 71 2.639057 2.639057 239 +free 0 73 2.639057 0.000000 224 +multimedia 1 68 2.708050 2.708050 258 +differ 0 66 2.708050 0.000000 253 +organ 0 65 2.772589 0.000000 265 +content 1 59 2.833213 2.833213 302 +march 0 61 2.833213 0.000000 295 +think 0 57 2.890372 0.000000 314 +variou 0 56 2.890372 0.000000 317 +undergradu 1 54 2.944439 2.944439 338 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +visual 0 48 3.044522 0.000000 372 +electron 0 47 3.091042 0.000000 379 +featur 0 46 3.091042 0.000000 386 +california 0 46 3.091042 0.000000 388 +video 0 44 3.135494 0.000000 405 +third 1 43 3.178054 3.178054 412 +vision 1 41 3.218876 3.218876 430 +committe 0 34 3.401197 0.000000 522 +john 0 33 3.433987 0.000000 532 +taught 0 33 3.433987 0.000000 526 +transform 0 32 3.465736 0.000000 542 +pass 1 28 3.610918 3.610918 611 +held 1 28 3.610918 3.610918 600 +retriev 0 27 3.637586 0.000000 621 +subject 0 26 3.688879 0.000000 647 +compar 0 26 3.688879 0.000000 648 +detect 0 26 3.688879 0.000000 646 +greg 1 24 3.761200 3.761200 695 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +break 0 20 3.951244 0.000000 812 +spend 0 19 4.007333 0.000000 850 +boston 0 19 4.007333 0.000000 862 +scott 0 18 4.060443 0.000000 884 +miller 1 17 4.110874 4.110874 949 +vector 0 16 4.174387 0.000000 961 +fourth 0 16 4.174387 0.000000 999 +researchmi 0 14 4.317488 0.000000 1119 +coher 0 14 4.317488 0.000000 1109 +massachusett 0 14 4.317488 0.000000 1118 +scene 0 14 4.317488 0.000000 1114 +francisco 0 14 4.317488 0.000000 1095 +econom 0 13 4.382027 0.000000 1184 +huang 0 12 4.465908 0.000000 1202 +amount 0 12 4.465908 0.000000 1208 +reader 0 12 4.465908 0.000000 1246 +impact 0 11 4.553877 0.000000 1334 +refin 0 11 4.553877 0.000000 1363 +princip 0 10 4.653960 0.000000 1397 +correspond 0 10 4.653960 0.000000 1382 +observ 0 9 4.753590 0.000000 1578 +classifi 0 9 4.753590 0.000000 1537 +kevin 0 9 4.753590 0.000000 1482 +juan 0 9 4.753590 0.000000 1580 +european 0 8 4.875197 0.000000 1763 +ramin 1 7 5.010635 5.010635 1820 +justin 1 7 5.010635 5.010635 1789 +adob 0 7 5.010635 0.000000 1873 +parametr 0 7 5.010635 0.000000 1819 +sweden 0 7 5.010635 0.000000 1885 +courtesi 0 7 5.010635 0.000000 1953 +zabih 1 6 5.164786 5.164786 2138 +freeli 0 6 5.164786 0.000000 2014 +price 0 6 5.164786 0.000000 1999 +acrobat 0 6 5.164786 0.000000 2063 +phil 0 5 5.347108 0.000000 2419 +fair 0 5 5.347108 0.000000 2333 +florida 0 5 5.347108 0.000000 2526 +cvpr 1 4 5.568345 5.568345 2761 +essai 0 4 5.568345 0.000000 2948 +newslett 0 4 5.568345 0.000000 2873 +scribe 0 4 5.568345 0.000000 2631 +jing 0 3 5.857933 0.000000 3521 +voskuhl 0 3 5.857933 0.000000 3109 +szewczyk 0 3 5.857933 0.000000 3108 +histogram 0 3 5.857933 0.000000 3490 +stockholm 0 3 5.857933 0.000000 3715 +conjunct 0 3 5.857933 0.000000 3743 +cytacki 0 2 6.263398 0.000000 5330 +pageramin 0 1 6.957497 0.000000 11507 +zabihassist 0 1 6.957497 0.000000 11508 +professorrdz 0 1 6.957497 0.000000 11509 +agr 0 1 6.957497 0.000000 11510 +studentsi 0 1 6.957497 0.000000 11511 +vera 0 1 6.957497 0.000000 11512 +kettnak 0 1 6.957497 0.000000 11513 +olga 0 1 6.957497 0.000000 11514 +veksler 0 1 6.957497 0.000000 11515 +publicationsmost 0 1 6.957497 0.000000 11516 +sarasota 0 1 6.957497 0.000000 11517 +woodfil 0 1 6.957497 0.000000 11518 +teachingi 0 1 6.957497 0.000000 11519 +activitiesi 0 1 6.957497 0.000000 11520 +comitte 0 1 6.957497 0.000000 11521 +acknowledgementsthi 0 1 6.957497 0.000000 11522 +huttenlocherlast 0 1 6.957497 0.000000 11523 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..2c7a8779 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +roderick 0 1 6.957497 0.000000 11524 +moten 0 1 6.957497 0.000000 11525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..8faa249b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +fall 1 181 1.609438 1.609438 40 +paper 0 205 1.609438 0.000000 38 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +homepag 0 93 2.397895 0.000000 148 +upson 0 71 2.639057 0.000000 218 +result 0 65 2.772589 0.000000 281 +talk 0 53 2.944439 0.000000 336 +telephon 0 50 3.044522 0.000000 373 +describ 0 45 3.135494 0.000000 400 +york 0 41 3.218876 0.000000 435 +random 0 34 3.401197 0.000000 511 +universityithaca 0 24 3.761200 0.000000 710 +kumar 0 9 4.753590 0.000000 1506 +hallcornel 0 8 4.875197 0.000000 1757 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +fair 0 5 5.347108 0.000000 2333 +funda 0 3 5.857933 0.000000 3645 +ravi 0 3 5.857933 0.000000 3185 +ergun 0 2 6.263398 0.000000 5087 +wasserman 0 2 6.263398 0.000000 5331 +nephew 0 2 6.263398 0.000000 5332 +homepageronitt 0 1 6.957497 0.000000 11526 +rubinfeldi 0 1 6.957497 0.000000 11527 +rubinfeldcomput 0 1 6.957497 0.000000 11528 +edupictur 0 1 6.957497 0.000000 11529 +eitan 0 1 6.957497 0.000000 11530 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..0b7d12ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +machin 0 129 2.079442 0.000000 95 +technic 1 100 2.302585 2.302585 140 +memori 0 101 2.302585 0.000000 139 +associ 1 93 2.397895 2.397895 151 +commun 0 95 2.397895 0.000000 157 +institut 1 84 2.484907 2.484907 187 +appear 0 78 2.564949 0.000000 210 +state 0 76 2.564949 0.000000 207 +involv 0 71 2.639057 0.000000 227 +receiv 0 66 2.708050 0.000000 244 +share 0 59 2.833213 0.000000 304 +thesi 0 57 2.890372 0.000000 327 +advisor 0 51 2.995732 0.000000 355 +post 0 35 3.401197 0.000000 505 +titl 0 31 3.496508 0.000000 556 +full 0 28 3.610918 0.000000 615 +consist 0 26 3.688879 0.000000 651 +reliabl 0 25 3.737670 0.000000 674 +doctor 1 24 3.761200 3.761200 709 +condit 0 16 4.174387 0.000000 975 +edui 0 13 4.382027 0.000000 1193 +replic 0 12 4.465908 0.000000 1231 +thedepart 1 11 4.553877 4.553877 1350 +israel 1 11 4.553877 4.553877 1366 +mainli 0 10 4.653960 0.000000 1432 +birman 1 9 4.753590 4.753590 1531 +friedman 1 7 5.010635 5.010635 1886 +cornellunivers 1 7 5.010635 5.010635 1916 +trade 0 7 5.010635 0.000000 1815 +clickher 0 5 5.347108 0.000000 2428 +vaysburd 0 4 5.568345 0.000000 2846 +tina 0 3 5.857933 0.000000 3744 +scienceatcornel 0 2 6.263398 0.000000 5333 +withken 0 2 6.263398 0.000000 5334 +androbbert 0 2 6.263398 0.000000 4953 +thehoru 0 2 6.263398 0.000000 5179 +attiya 0 2 6.263398 0.000000 5197 +partition 0 2 6.263398 0.000000 4954 +thetechnion 1 1 6.957497 6.957497 11531 +friedmanroi 0 1 6.957497 0.000000 11532 +friedmanpost 0 1 6.957497 0.000000 11533 +universityroi 0 1 6.957497 0.000000 11534 +rennessein 0 1 6.957497 0.000000 11535 +washagit 0 1 6.957497 0.000000 11536 +wasconsist 0 1 6.957497 0.000000 11537 +themilliped 0 1 6.957497 0.000000 11538 +withassaf 0 1 6.957497 0.000000 11539 +schuster 0 1 6.957497 0.000000 11540 +papersr 0 1 6.957497 0.000000 11541 +scalabledistribut 0 1 6.957497 0.000000 11542 +coprocessor 0 1 6.957497 0.000000 11543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..0b2e1048 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +cornel 1 215 1.386294 1.386294 23 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +report 0 131 2.079442 0.000000 92 +version 0 113 2.197225 0.000000 122 +associ 0 93 2.397895 0.000000 151 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +simul 0 66 2.708050 0.000000 255 +ithaca 0 65 2.772589 0.000000 294 +dept 0 64 2.772589 0.000000 291 +tech 0 35 3.401197 0.000000 515 +photograph 0 15 4.248495 0.000000 1056 +daniela 1 3 5.857933 5.857933 3611 +catalogc 0 2 6.263398 0.000000 5023 +infodesign 0 1 6.957497 0.000000 11544 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..0376207b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +oper 0 180 1.609438 0.000000 34 +network 1 168 1.791759 1.791759 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +version 1 113 2.197225 2.197225 122 +place 0 106 2.197225 0.000000 124 +commun 1 95 2.397895 2.397895 157 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +contain 0 81 2.484907 0.000000 174 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +html 0 75 2.639057 0.000000 235 +ithaca 1 65 2.772589 2.772589 294 +virtual 0 62 2.772589 0.000000 285 +complex 0 64 2.772589 0.000000 269 +scientif 0 53 2.944439 0.000000 341 +advisor 0 51 2.995732 0.000000 355 +protocol 1 45 3.135494 3.135494 407 +multi 0 36 3.367296 0.000000 493 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +secur 0 30 3.555348 0.000000 577 +framework 0 28 3.610918 0.000000 606 +american 0 27 3.637586 0.000000 634 +reliabl 0 25 3.737670 0.000000 674 +flow 0 24 3.761200 0.000000 700 +mobil 0 23 3.806662 0.000000 730 +hous 0 21 3.912023 0.000000 801 +flexibl 0 21 3.912023 0.000000 792 +applet 0 20 3.951244 0.000000 827 +media 0 19 4.007333 0.000000 861 +agent 0 18 4.060443 0.000000 910 +club 0 15 4.248495 0.000000 1058 +horu 2 14 4.317488 8.634976 1116 +senior 0 14 4.317488 0.000000 1120 +composit 1 13 4.382027 4.382027 1150 +edui 0 13 4.382027 0.000000 1193 +incorpor 0 13 4.382027 0.000000 1163 +weak 0 13 4.382027 0.000000 1159 +danc 0 12 4.465908 0.000000 1278 +market 0 11 4.553877 0.000000 1361 +interestsmi 0 10 4.653960 0.000000 1462 +robbert 0 9 4.753590 0.000000 1529 +guitar 0 8 4.875197 0.000000 1758 +synchroni 0 7 5.010635 0.000000 1923 +band 0 6 5.164786 0.000000 2198 +strong 0 6 5.164786 0.000000 2029 +jazz 1 5 5.347108 5.347108 2527 +babi 0 5 5.347108 0.000000 2493 +girl 0 5 5.347108 0.000000 2410 +swing 0 4 5.568345 0.000000 2887 +dutch 1 3 5.857933 5.857933 3592 +lightweight 1 3 5.857933 5.857933 3234 +tanenbaum 0 3 5.857933 0.000000 3397 +sharewar 0 3 5.857933 0.000000 3503 +netherland 0 3 5.857933 0.000000 3650 +associatecornel 0 2 6.263398 0.000000 5137 +scienceatcornel 0 2 6.263398 0.000000 5333 +withken 0 2 6.263398 0.000000 5334 +tacoma 0 2 6.263398 0.000000 4909 +brand 1 1 6.957497 6.957497 11545 +renesserobbert 0 1 6.957497 0.000000 11546 +renessesenior 0 1 6.957497 0.000000 11547 +universityrvr 0 1 6.957497 0.000000 11548 +universityinithaca 0 1 6.957497 0.000000 11549 +birmanin 0 1 6.957497 0.000000 11550 +wasandi 0 1 6.957497 0.000000 11551 +caml 0 1 6.957497 0.000000 11552 +nynetth 0 1 6.957497 0.000000 11553 +ageless 0 1 6.957497 0.000000 11554 +accordion 0 1 6.957497 0.000000 11555 +stuffcornel 0 1 6.957497 0.000000 11556 +ithacaithacanet 0 1 6.957497 0.000000 11557 +spinner 0 1 6.957497 0.000000 11558 +paperssoftwar 0 1 6.957497 0.000000 11559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..a7344f4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +cornel 1 215 1.386294 1.386294 23 +public 0 202 1.609438 0.000000 43 +distribut 1 162 1.791759 1.791759 51 +algorithm 0 162 1.791759 0.000000 57 +click 1 142 1.945910 1.945910 78 +process 0 142 1.945910 0.000000 72 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +final 0 116 2.197225 0.000000 108 +intern 0 108 2.197225 0.000000 128 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +appear 1 78 2.564949 2.564949 210 +june 0 79 2.564949 0.000000 214 +symposium 1 72 2.639057 2.639057 238 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +august 0 66 2.708050 0.000000 257 +copi 1 63 2.772589 2.772589 284 +experi 0 64 2.772589 0.000000 283 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +februari 0 54 2.944439 0.000000 328 +sampl 0 53 2.944439 0.000000 339 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +electron 0 47 3.091042 0.000000 379 +answer 0 45 3.135494 0.000000 391 +submit 0 39 3.258097 0.000000 440 +annual 0 40 3.258097 0.000000 458 +especi 0 36 3.367296 0.000000 496 +approxim 1 35 3.401197 3.401197 509 +award 0 34 3.401197 0.000000 523 +survei 0 35 3.401197 0.000000 513 +detect 1 26 3.688879 3.688879 646 +revis 0 26 3.688879 0.000000 640 +consist 0 26 3.688879 0.000000 651 +reliabl 0 25 3.737670 0.000000 674 +doctor 0 24 3.761200 0.000000 709 +seri 0 24 3.761200 0.000000 708 +finish 0 22 3.850148 0.000000 748 +properti 0 22 3.850148 0.000000 749 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +failur 1 18 4.060443 4.060443 898 +expand 0 17 4.110874 0.000000 928 +diego 0 16 4.174387 0.000000 992 +letter 0 16 4.174387 0.000000 981 +asynchron 1 12 4.465908 4.465908 1229 +elect 0 8 4.875197 0.000000 1771 +perfect 1 7 5.010635 5.010635 1921 +chandra 0 6 5.164786 0.000000 2091 +consensu 0 6 5.164786 0.000000 2080 +prize 0 6 5.164786 0.000000 2150 +keith 1 5 5.347108 5.347108 2528 +stabl 0 5 5.347108 0.000000 2309 +blow 0 5 5.347108 0.000000 2407 +spam 0 4 5.568345 0.000000 2927 +bean 0 4 5.568345 0.000000 2968 +detector 1 3 5.857933 5.857933 3745 +horizon 0 3 5.857933 0.000000 3746 +zone 0 3 5.857933 0.000000 3747 +cash 0 3 5.857933 0.000000 3355 +marzullo 1 2 6.263398 6.263398 4919 +formor 0 2 6.263398 0.000000 5335 +distributedcomput 0 2 6.263398 0.000000 5336 +lecturenot 0 2 6.263398 0.000000 4679 +sabel 1 1 6.957497 6.957497 11560 +laura 1 1 6.957497 6.957497 11561 +asynchronousdistribut 1 1 6.957497 6.957497 11562 +jelli 1 1 6.957497 6.957497 11563 +bingo 1 1 6.957497 6.957497 11564 +professorkeith 0 1 6.957497 0.000000 11565 +tushar 0 1 6.957497 0.000000 11566 +sfailur 0 1 6.957497 0.000000 11567 +subcut 0 1 6.957497 0.000000 11568 +wdag 0 1 6.957497 0.000000 11569 +cow 0 1 6.957497 0.000000 11570 +strawberri 0 1 6.957497 0.000000 11571 +tart 0 1 6.957497 0.000000 11572 +torch 0 1 6.957497 0.000000 11573 +alpacanet 0 1 6.957497 0.000000 11574 +gourmet 0 1 6.957497 0.000000 11575 +thebobbi 0 1 6.957497 0.000000 11576 +belli 0 1 6.957497 0.000000 11577 +canplai 0 1 6.957497 0.000000 11578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..f12110c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +distribut 2 162 1.791759 3.583518 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +object 2 138 1.945910 3.891820 79 +process 1 142 1.945910 1.945910 72 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +databas 1 122 2.079442 2.079442 86 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +well 0 109 2.197225 0.000000 121 +memori 1 101 2.302585 2.302585 139 +need 0 98 2.302585 0.000000 135 +proceed 1 93 2.397895 2.397895 152 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +journal 1 83 2.484907 2.484907 183 +level 1 87 2.484907 2.484907 180 +ieee 0 86 2.484907 0.000000 190 +messag 1 76 2.564949 2.564949 212 +optim 0 79 2.564949 0.000000 197 +free 2 73 2.639057 5.278114 224 +solv 1 73 2.639057 2.639057 234 +symposium 1 72 2.639057 2.639057 238 +effici 0 73 2.639057 0.000000 233 +august 1 66 2.708050 2.708050 257 +goal 0 66 2.708050 0.000000 250 +simul 0 66 2.708050 0.000000 255 +knowledg 0 67 2.708050 0.000000 243 +result 1 65 2.772589 2.772589 281 +foundat 0 62 2.772589 0.000000 286 +share 1 59 2.833213 2.833213 304 +type 1 61 2.833213 2.833213 296 +automat 0 61 2.833213 0.000000 306 +explor 1 58 2.890372 2.890372 324 +faculti 0 56 2.890372 0.000000 325 +major 0 56 2.890372 0.000000 315 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +maintain 0 51 2.995732 0.000000 342 +principl 1 48 3.044522 3.044522 357 +even 0 45 3.135494 0.000000 393 +protocol 0 45 3.135494 0.000000 407 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +show 0 43 3.178054 0.000000 417 +theoret 0 39 3.258097 0.000000 446 +continu 0 39 3.258097 0.000000 448 +transact 0 39 3.258097 0.000000 438 +correct 1 38 3.295837 3.295837 462 +respons 0 37 3.332205 0.000000 476 +least 0 35 3.401197 0.000000 516 +concurr 0 34 3.401197 0.000000 501 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +collabor 0 32 3.465736 0.000000 543 +abl 0 30 3.555348 0.000000 566 +exist 0 30 3.555348 0.000000 569 +common 0 30 3.555348 0.000000 574 +synchron 1 29 3.583519 3.583519 588 +pass 1 28 3.610918 3.610918 611 +determin 1 27 3.637586 3.637586 630 +consist 0 26 3.688879 0.000000 651 +fundament 0 25 3.737670 0.000000 661 +known 0 24 3.761200 0.000000 702 +methodolog 0 23 3.806662 0.000000 733 +hierarchi 1 22 3.850148 3.850148 744 +thu 0 21 3.912023 0.000000 773 +increas 0 20 3.951244 0.000000 829 +prove 0 19 4.007333 0.000000 848 +failur 1 18 4.060443 4.060443 898 +whether 0 17 4.110874 0.000000 918 +partit 0 16 4.174387 0.000000 984 +princeton 0 15 4.248495 0.000000 1042 +shown 0 14 4.317488 0.000000 1080 +wait 2 13 4.382027 8.764054 1168 +canada 1 13 4.382027 4.382027 1158 +cannot 0 13 4.382027 0.000000 1144 +difficulti 0 13 4.382027 0.000000 1132 +necessari 0 13 4.382027 0.000000 1147 +asynchron 1 12 4.465908 4.465908 1229 +robust 0 12 4.465908 0.000000 1271 +replic 0 12 4.465908 0.000000 1231 +clock 1 11 4.553877 4.553877 1320 +abil 0 11 4.553877 0.000000 1341 +interestsmi 0 10 4.653960 0.000000 1462 +reli 0 10 4.653960 0.000000 1411 +guarante 0 10 4.653960 0.000000 1391 +informationabout 0 9 4.753590 0.000000 1515 +crash 1 8 4.875197 4.875197 1616 +paradigm 0 8 4.875197 0.000000 1662 +bridg 0 8 4.875197 0.000000 1764 +exactli 0 7 5.010635 0.000000 1817 +suffici 0 7 5.010635 0.000000 1897 +montreal 0 7 5.010635 0.000000 1961 +pittsburgh 0 7 5.010635 0.000000 1938 +pennsylvania 0 7 5.010635 0.000000 1932 +consensu 1 6 5.164786 5.164786 2080 +chandra 1 6 5.164786 5.164786 2091 +prasad 1 6 5.164786 5.164786 2126 +mistak 0 6 5.164786 0.000000 2110 +broadcast 0 5 5.347108 0.000000 2453 +infinit 0 4 5.568345 0.000000 2596 +detector 1 3 5.857933 5.857933 3745 +forfault 0 3 5.857933 0.000000 3748 +ofobject 0 3 5.857933 0.000000 3399 +toueg 2 2 6.263398 12.526796 5339 +inher 0 2 6.263398 0.000000 5086 +mere 0 2 6.263398 0.000000 5340 +slow 0 2 6.263398 0.000000 5341 +inour 0 2 6.263398 0.000000 4445 +systemswith 0 2 6.263398 0.000000 5342 +muchinform 0 2 6.263398 0.000000 4811 +amajor 0 2 6.263398 0.000000 5343 +herlihi 0 2 6.263398 0.000000 5144 +anobject 0 2 6.263398 0.000000 4267 +hadzilaco 0 2 6.263398 0.000000 5338 +jayanti 1 1 6.957497 6.957497 11583 +failuredetector 1 1 6.957497 6.957497 11584 +unreli 1 1 6.957497 6.957497 11585 +weakest 1 1 6.957497 6.957497 11586 +neiger 1 1 6.957497 6.957497 11587 +professorph 0 1 6.957497 0.000000 11588 +toleranceand 0 1 6.957497 0.000000 11589 +andshar 0 1 6.957497 0.000000 11590 +gapbetween 0 1 6.957497 0.000000 11591 +practicalsolut 0 1 6.957497 0.000000 11592 +withtushar 0 1 6.957497 0.000000 11593 +chandraand 0 1 6.957497 0.000000 11594 +onunreli 0 1 6.957497 0.000000 11595 +computingst 0 1 6.957497 0.000000 11596 +adeterminist 0 1 6.957497 0.000000 11597 +impossibilityresult 0 1 6.957497 0.000000 11598 +aprocess 0 1 6.957497 0.000000 11599 +wefirst 0 1 6.957497 0.000000 11600 +canmak 0 1 6.957497 0.000000 11601 +solveconsensu 0 1 6.957497 0.000000 11602 +practicalityof 0 1 6.957497 0.000000 11603 +theircorrect 0 1 6.957497 0.000000 11604 +sharedobject 0 1 6.957497 0.000000 11605 +accessesthi 0 1 6.957497 0.000000 11606 +otherprocess 0 1 6.957497 0.000000 11607 +thatcorrespond 0 1 6.957497 0.000000 11608 +atani 0 1 6.957497 0.000000 11609 +whetherrobust 0 1 6.957497 0.000000 11610 +bracha 0 1 6.957497 0.000000 11611 +srikanth 0 1 6.957497 0.000000 11612 +abbadi 0 1 6.957497 0.000000 11613 +detectorfor 0 1 6.957497 0.000000 11614 +vancouv 0 1 6.957497 0.000000 11615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..258dc869 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +public 1 202 1.609438 1.609438 43 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +distribut 1 162 1.791759 1.791759 51 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +professor 1 137 1.945910 1.945910 76 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +compil 1 122 2.079442 2.079442 96 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +technolog 0 131 2.079442 0.000000 102 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +assist 1 112 2.197225 2.197225 113 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +technic 1 100 2.302585 2.302585 140 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +proceed 1 93 2.397895 2.397895 152 +journal 0 83 2.484907 0.000000 183 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +upson 0 71 2.639057 0.000000 218 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +complex 1 64 2.772589 2.772589 269 +foundat 0 62 2.772589 0.000000 286 +septemb 0 65 2.772589 0.000000 274 +thesi 1 57 2.890372 2.890372 327 +algebra 1 45 3.135494 3.135494 394 +editor 1 41 3.218876 3.218876 433 +submit 1 39 3.258097 3.258097 440 +small 0 39 3.258097 0.000000 447 +formal 0 37 3.332205 0.000000 478 +concurr 0 34 3.401197 0.000000 501 +semant 1 29 3.583519 3.583519 587 +bound 0 26 3.688879 0.000000 659 +director 0 22 3.850148 0.000000 767 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +vlsi 0 21 3.912023 0.000000 795 +verif 0 20 3.951244 0.000000 826 +scheme 0 20 3.951244 0.000000 818 +binari 0 20 3.951244 0.000000 823 +exercis 0 19 4.007333 0.000000 842 +failur 0 18 4.060443 0.000000 898 +brown 1 16 4.174387 4.174387 977 +circuit 0 13 4.382027 0.000000 1131 +sigplan 0 13 4.382027 0.000000 1190 +verifi 1 12 4.465908 4.465908 1261 +calculu 0 12 4.465908 0.000000 1203 +meta 0 9 4.753590 0.000000 1505 +crash 0 8 4.875197 0.000000 1616 +delai 0 7 5.010635 0.000000 1848 +cornellunivers 0 7 5.010635 0.000000 1916 +seshadri 0 7 5.010635 0.000000 1803 +silicon 1 6 5.164786 5.164786 2076 +toronto 0 6 5.164786 0.000000 2156 +educurr 0 5 5.347108 0.000000 2504 +bloom 1 4 5.568345 5.568345 2913 +hallphon 0 4 5.568345 0.000000 2900 +insensit 0 4 5.568345 0.000000 2716 +knight 0 4 5.568345 0.000000 2728 +savag 0 4 5.568345 0.000000 2777 +weber 2 3 5.857933 11.715866 3156 +samuel 1 3 5.857933 5.857933 3155 +act 1 3 5.857933 5.857933 3557 +agreement 1 3 5.857933 5.857933 3207 +metatheori 0 3 5.857933 0.000000 3642 +byzantin 1 2 6.263398 6.263398 4203 +bakker 0 2 6.263398 0.000000 5337 +hadzilaco 0 2 6.263398 0.000000 5338 +roever 0 1 6.957497 0.000000 11579 +rozenberg 0 1 6.957497 0.000000 11580 +amdur 0 1 6.957497 0.000000 11581 +wortman 0 1 6.957497 0.000000 11582 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..afbb832d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +modifi 0 178 1.609438 0.000000 35 +base 1 165 1.791759 1.791759 50 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +object 1 138 1.945910 1.945910 79 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +final 0 116 2.197225 0.000000 108 +version 0 113 2.197225 0.000000 122 +topic 0 114 2.197225 0.000000 110 +person 0 111 2.197225 0.000000 117 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +graphic 1 90 2.397895 2.397895 147 +imag 0 91 2.397895 0.000000 161 +educ 0 86 2.484907 0.000000 191 +master 1 76 2.564949 2.564949 216 +orient 1 80 2.564949 2.564949 205 +resum 0 79 2.564949 0.000000 217 +window 1 68 2.708050 2.708050 242 +content 0 59 2.833213 0.000000 302 +plai 0 60 2.833213 0.000000 307 +sampl 0 53 2.944439 0.000000 339 +favorit 0 44 3.135494 0.000000 410 +past 0 42 3.218876 0.000000 428 +combin 0 42 3.218876 0.000000 421 +product 0 33 3.433987 0.000000 527 +releas 0 28 3.610918 0.000000 616 +team 1 27 3.637586 3.637586 625 +retriev 0 27 3.637586 0.000000 621 +request 0 26 3.688879 0.000000 635 +reach 0 24 3.761200 0.000000 688 +lead 0 23 3.806662 0.000000 718 +color 0 22 3.850148 0.000000 762 +divis 0 21 3.912023 0.000000 803 +render 1 17 4.110874 4.110874 947 +analyz 0 17 4.110874 0.000000 925 +sheet 0 16 4.174387 0.000000 973 +draw 1 14 4.317488 4.317488 1086 +systemsc 0 11 4.553877 0.000000 1293 +ski 0 10 4.653960 0.000000 1471 +card 0 10 4.653960 0.000000 1435 +softbal 0 9 4.753590 0.000000 1594 +sean 1 8 4.875197 4.875197 1705 +golf 0 6 5.164786 0.000000 2178 +isi 1 5 5.347108 5.347108 2443 +interior 0 5 5.347108 0.000000 2439 +basebal 1 4 5.568345 5.568345 2969 +percept 0 3 5.857933 0.000000 3739 +compliant 0 3 5.857933 0.000000 3245 +landi 1 2 6.263398 6.263398 4830 +clickherefor 0 2 6.263398 0.000000 5344 +stratu 0 2 6.263398 0.000000 5345 +broker 0 2 6.263398 0.000000 4968 +orbix 1 1 6.957497 6.957497 11616 +landissean 0 1 6.957497 0.000000 11617 +sciencewelcom 0 1 6.957497 0.000000 11618 +weanalyz 0 1 6.957497 0.000000 11619 +patternsprofession 0 1 6.957497 0.000000 11620 +acorba 0 1 6.957497 0.000000 11621 +iona 0 1 6.957497 0.000000 11622 +alpin 0 1 6.957497 0.000000 11623 +collectingi 0 1 6.957497 0.000000 11624 +comeduc 0 1 6.957497 0.000000 11625 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..6249f7c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +updat 0 191 1.609438 0.000000 41 +distribut 0 162 1.791759 0.000000 51 +model 0 145 1.945910 0.000000 69 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +graphic 1 90 2.397895 2.397895 147 +june 0 79 2.564949 0.000000 214 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +ithaca 1 65 2.772589 2.772589 294 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +summer 0 56 2.890372 0.000000 311 +york 0 41 3.218876 0.000000 435 +tech 0 35 3.401197 0.000000 515 +post 0 35 3.401197 0.000000 505 +india 0 32 3.465736 0.000000 550 +taken 0 31 3.496508 0.000000 555 +anim 0 31 3.496508 0.000000 557 +equat 0 23 3.806662 0.000000 724 +sciencecornel 0 22 3.850148 0.000000 768 +viewer 0 21 3.912023 0.000000 787 +practicum 0 16 4.174387 0.000000 960 +magic 0 11 4.553877 0.000000 1358 +mapl 0 11 4.553877 0.000000 1376 +reduct 0 7 5.010635 0.000000 1877 +parametr 0 7 5.010635 0.000000 1819 +jpeg 0 6 5.164786 0.000000 2053 +myresum 0 6 5.164786 0.000000 2199 +hoca 0 5 5.347108 0.000000 2241 +engineeringclass 0 3 5.857933 0.000000 3667 +kerala 0 3 5.857933 0.000000 3749 +cornelluniversityfal 0 2 6.263398 0.000000 5131 +artifact 0 2 6.263398 0.000000 5346 +cspracticum 0 2 6.263398 0.000000 5132 +carpet 0 2 6.263398 0.000000 5133 +colloqium 0 2 6.263398 0.000000 5134 +seena 1 1 6.957497 6.957497 11626 +cherangara 0 1 6.957497 0.000000 11627 +cherangaramast 0 1 6.957497 0.000000 11628 +homepagecurr 0 1 6.957497 0.000000 11629 +trivandrum 0 1 6.957497 0.000000 11630 +processingalgorithm 0 1 6.957497 0.000000 11631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..59962bba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,239 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +network 0 168 1.791759 0.000000 61 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +first 0 140 1.945910 0.000000 71 +postscript 1 131 2.079442 2.079442 90 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +make 1 111 2.197225 2.197225 120 +site 0 106 2.197225 0.000000 119 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +part 1 98 2.302585 2.302585 129 +text 1 98 2.302585 2.302585 133 +peopl 1 96 2.302585 2.302585 132 +access 0 102 2.302585 0.000000 136 +imag 2 91 2.397895 4.795790 161 +call 1 91 2.397895 2.397895 153 +question 0 91 2.397895 0.000000 141 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +start 0 83 2.484907 0.000000 173 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +method 0 80 2.564949 0.000000 213 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +multimedia 0 68 2.708050 0.000000 258 +test 0 66 2.708050 0.000000 252 +window 0 68 2.708050 0.000000 242 +written 1 63 2.772589 2.772589 278 +virtual 0 62 2.772589 0.000000 285 +dept 0 64 2.772589 0.000000 291 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +sever 0 56 2.890372 0.000000 322 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +allow 0 53 2.944439 0.000000 333 +cool 1 49 3.044522 3.044522 374 +give 1 50 3.044522 3.044522 359 +format 0 48 3.044522 0.000000 356 +friend 0 48 3.044522 0.000000 376 +quarter 0 47 3.091042 0.000000 389 +video 2 44 3.135494 6.270988 405 +protocol 1 45 3.135494 3.135494 407 +directori 0 45 3.135494 0.000000 396 +term 0 43 3.178054 0.000000 411 +compani 0 41 3.218876 0.000000 423 +fast 0 42 3.218876 0.000000 429 +live 0 40 3.258097 0.000000 451 +small 0 39 3.258097 0.000000 447 +prototyp 0 38 3.295837 0.000000 463 +slide 0 38 3.295837 0.000000 467 +microsoft 0 38 3.295837 0.000000 468 +mean 1 37 3.332205 3.332205 477 +statist 0 35 3.401197 0.000000 521 +transform 0 32 3.465736 0.000000 542 +given 0 32 3.465736 0.000000 538 +someth 0 31 3.496508 0.000000 554 +often 0 31 3.496508 0.000000 551 +steve 0 29 3.583519 0.000000 594 +limit 0 29 3.583519 0.000000 585 +consid 0 29 3.583519 0.000000 590 +semant 0 29 3.583519 0.000000 587 +chines 0 29 3.583519 0.000000 595 +releas 1 28 3.610918 3.610918 616 +ask 0 28 3.610918 0.000000 597 +manipul 1 27 3.637586 3.637586 624 +altern 0 26 3.688879 0.000000 641 +enhanc 0 26 3.688879 0.000000 644 +spent 0 25 3.737670 0.000000 676 +client 0 25 3.737670 0.000000 679 +interpret 1 24 3.761200 3.761200 686 +motion 0 24 3.761200 0.000000 699 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +variabl 0 23 3.806662 0.000000 715 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +indian 1 22 3.850148 3.850148 769 +fact 0 21 3.912023 0.000000 780 +similar 0 21 3.912023 0.000000 771 +viewer 0 21 3.912023 0.000000 787 +wonder 0 20 3.951244 0.000000 815 +media 1 19 4.007333 4.007333 861 +ever 0 19 4.007333 0.000000 872 +record 1 18 4.060443 4.060443 890 +event 0 18 4.060443 0.000000 896 +lot 0 18 4.060443 0.000000 889 +stanford 1 17 4.110874 4.110874 955 +coupl 0 17 4.110874 0.000000 939 +layer 0 17 4.110874 0.000000 926 +segment 0 17 4.110874 0.000000 931 +bachelor 0 17 4.110874 0.000000 957 +upon 0 16 4.174387 0.000000 978 +portion 0 16 4.174387 0.000000 971 +stream 1 15 4.248495 4.248495 1015 +remot 0 15 4.248495 0.000000 1041 +charact 0 15 4.248495 0.000000 1028 +rate 0 15 4.248495 0.000000 1037 +audio 0 14 4.317488 0.000000 1094 +script 1 13 4.382027 4.382027 1171 +came 0 13 4.382027 0.000000 1197 +asynchron 0 12 4.465908 0.000000 1229 +gupta 0 12 4.465908 0.000000 1241 +optic 0 12 4.465908 0.000000 1221 +bill 0 11 4.553877 0.000000 1297 +player 0 11 4.553877 0.000000 1371 +fix 0 11 4.553877 0.000000 1327 +smart 0 11 4.553877 0.000000 1352 +mode 1 9 4.753590 4.753590 1492 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +leader 0 9 4.753590 0.000000 1576 +claim 1 8 4.875197 4.875197 1664 +character 0 8 4.875197 0.000000 1767 +vallei 0 7 5.010635 0.000000 1959 +signal 0 7 5.010635 0.000000 1910 +keshav 0 7 5.010635 0.000000 1852 +conferenc 0 7 5.010635 0.000000 1857 +nativ 1 6 5.164786 5.164786 2192 +classroom 0 6 5.164786 0.000000 2006 +televis 0 6 5.164786 0.000000 2118 +silicon 0 6 5.164786 0.000000 2076 +mix 0 6 5.164786 0.000000 2200 +multicast 1 5 5.347108 5.347108 2305 +shell 1 5 5.347108 5.347108 2353 +sigcomm 0 5 5.347108 0.000000 2329 +affin 0 5 5.347108 0.000000 2378 +hate 0 5 5.347108 0.000000 2529 +hole 0 5 5.347108 0.000000 2518 +stupid 0 5 5.347108 0.000000 2489 +sharma 2 4 5.568345 11.136690 2752 +anoop 0 4 5.568345 0.000000 2770 +height 0 4 5.568345 0.000000 2890 +deploi 0 3 5.857933 0.000000 3750 +greatli 0 3 5.857933 0.000000 3541 +deliveri 0 3 5.857933 0.000000 3278 +membership 0 3 5.857933 0.000000 3751 +predecessor 0 3 5.857933 0.000000 3585 +motif 0 3 5.857933 0.000000 3752 +fractal 0 3 5.857933 0.000000 3475 +hindi 0 3 5.857933 0.000000 3753 +nicknam 0 3 5.857933 0.000000 3716 +widget 1 2 6.263398 6.263398 5347 +hors 1 2 6.263398 6.263398 5348 +stumbl 0 2 6.263398 0.000000 5349 +leadto 0 2 6.263398 0.000000 5350 +navin 0 2 6.263398 0.000000 5351 +agarw 0 2 6.263398 0.000000 5352 +deer 0 2 6.263398 0.000000 4356 +width 0 2 6.263398 0.000000 5328 +yacc 0 2 6.263398 0.000000 4422 +coolest 0 2 6.263398 0.000000 5229 +fool 0 2 6.263398 0.000000 5353 +frozen 0 2 6.263398 0.000000 5078 +rosen 2 1 6.957497 13.914994 11632 +sharmila 1 1 6.957497 6.957497 11633 +vxtreme 1 1 6.957497 6.957497 11634 +imagefram 1 1 6.957497 6.957497 11635 +modifiedigmp 0 1 6.957497 0.000000 11636 +unicast 0 1 6.957497 0.000000 11637 +sitn 0 1 6.957497 0.000000 11638 +microwav 0 1 6.957497 0.000000 11639 +chaddha 0 1 6.957497 0.000000 11640 +avneesh 0 1 6.957497 0.000000 11641 +asilomar 0 1 6.957497 0.000000 11642 +igmp 0 1 6.957497 0.000000 11643 +internetdraft 0 1 6.957497 0.000000 11644 +fenner 0 1 6.957497 0.000000 11645 +niten 0 1 6.957497 0.000000 11646 +malhan 0 1 6.957497 0.000000 11647 +delhiunpublish 0 1 6.957497 0.000000 11648 +preform 0 1 6.957497 0.000000 11649 +blur 0 1 6.957497 0.000000 11650 +speckl 0 1 6.957497 0.000000 11651 +subband 0 1 6.957497 0.000000 11652 +estmat 0 1 6.957497 0.000000 11653 +writen 0 1 6.957497 0.000000 11654 +flavour 0 1 6.957497 0.000000 11655 +ifram 0 1 6.957497 0.000000 11656 +nodisplai 0 1 6.957497 0.000000 11657 +filenam 0 1 6.957497 0.000000 11658 +putimageincanva 0 1 6.957497 0.000000 11659 +dummi 0 1 6.957497 0.000000 11660 +snooper 0 1 6.957497 0.000000 11661 +doesnt 0 1 6.957497 0.000000 11662 +replai 0 1 6.957497 0.000000 11663 +kludg 0 1 6.957497 0.000000 11664 +dissalow 0 1 6.957497 0.000000 11665 +gaveth 0 1 6.957497 0.000000 11666 +tongu 0 1 6.957497 0.000000 11667 +sharm 0 1 6.957497 0.000000 11668 +shyness 0 1 6.957497 0.000000 11669 +actress 0 1 6.957497 0.000000 11670 +tagor 0 1 6.957497 0.000000 11671 +ealri 0 1 6.957497 0.000000 11672 +jewish 0 1 6.957497 0.000000 11673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..632e44f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +version 1 113 2.197225 2.197225 122 +final 0 116 2.197225 0.000000 108 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +graphic 0 90 2.397895 0.000000 147 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +view 1 70 2.708050 2.708050 254 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +friend 1 48 3.044522 3.044522 376 +still 0 50 3.044522 0.000000 362 +california 0 46 3.091042 0.000000 388 +favorit 0 44 3.135494 0.000000 410 +music 1 42 3.218876 3.218876 436 +futur 0 41 3.218876 0.000000 427 +movi 1 40 3.258097 3.258097 459 +soon 0 36 3.367296 0.000000 494 +john 0 33 3.433987 0.000000 532 +transform 0 32 3.465736 0.000000 542 +sciencecornel 0 22 3.850148 0.000000 768 +love 0 21 3.912023 0.000000 804 +watch 0 21 3.912023 0.000000 789 +eric 1 19 4.007333 4.007333 870 +citi 0 19 4.007333 0.000000 874 +miss 0 19 4.007333 0.000000 866 +listen 0 18 4.060443 0.000000 907 +young 0 16 4.174387 0.000000 991 +camera 0 14 4.317488 0.000000 1115 +hong 0 14 4.317488 0.000000 1105 +near 0 14 4.317488 0.000000 1091 +meng 1 12 4.465908 4.465908 1214 +went 0 12 4.465908 0.000000 1279 +pagewelcom 0 11 4.553877 0.000000 1344 +french 0 9 4.753590 0.000000 1511 +guitar 0 8 4.875197 0.000000 1758 +mile 0 8 4.875197 0.000000 1743 +instrument 0 7 5.010635 0.000000 1954 +davi 0 7 5.010635 0.000000 1888 +piano 0 6 5.164786 0.000000 2201 +antonio 0 6 5.164786 0.000000 2186 +jazz 1 5 5.347108 5.347108 2527 +carlo 0 5 5.347108 0.000000 2515 +middl 0 5 5.347108 0.000000 2372 +keyboard 0 4 5.568345 0.000000 2970 +korea 0 4 5.568345 0.000000 2971 +cyberspac 0 3 5.857933 0.000000 3719 +korean 1 2 6.263398 6.263398 5354 +acoust 1 2 6.263398 6.263398 5355 +kwan 1 2 6.263398 6.263398 4126 +sang 0 2 6.263398 0.000000 5356 +onthi 0 2 6.263398 0.000000 5357 +chopin 0 2 6.263398 0.000000 5358 +cinema 0 2 6.263398 0.000000 5244 +miser 0 2 6.263398 0.000000 5359 +melco 0 2 6.263398 0.000000 5200 +kang 0 2 6.263398 0.000000 5360 +shim 1 1 6.957497 6.957497 11674 +shimmast 0 1 6.957497 0.000000 11675 +dryden 0 1 6.957497 0.000000 11676 +irvinestudi 0 1 6.957497 0.000000 11677 +classi 0 1 6.957497 0.000000 11678 +stan 0 1 6.957497 0.000000 11679 +getz 0 1 6.957497 0.000000 11680 +jobim 0 1 6.957497 0.000000 11681 +coltran 0 1 6.957497 0.000000 11682 +earl 0 1 6.957497 0.000000 11683 +klugh 0 1 6.957497 0.000000 11684 +metheni 0 1 6.957497 0.000000 11685 +archemi 0 1 6.957497 0.000000 11686 +paradiso 0 1 6.957497 0.000000 11687 +kiss 0 1 6.957497 0.000000 11688 +saigon 0 1 6.957497 0.000000 11689 +newswant 0 1 6.957497 0.000000 11690 +anybodi 0 1 6.957497 0.000000 11691 +hana 0 1 6.957497 0.000000 11692 +jung 0 1 6.957497 0.000000 11693 +hwan 0 1 6.957497 0.000000 11694 +victor 0 1 6.957497 0.000000 11695 +jiyang 0 1 6.957497 0.000000 11696 +timessinc 0 1 6.957497 0.000000 11697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..b6147504 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +process 1 142 1.945910 1.945910 72 +area 0 144 1.945910 0.000000 80 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +postscript 0 131 2.079442 0.000000 90 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +structur 1 106 2.197225 2.197225 105 +text 2 98 2.302585 4.605170 133 +techniqu 1 99 2.302585 2.302585 138 +need 0 98 2.302585 0.000000 135 +select 1 91 2.397895 2.397895 154 +proceed 0 93 2.397895 0.000000 152 +associ 0 93 2.397895 0.000000 151 +sinc 0 90 2.397895 0.000000 159 +help 0 83 2.484907 0.000000 175 +appear 1 78 2.564949 2.564949 210 +resum 0 79 2.564949 0.000000 217 +come 0 78 2.564949 0.000000 202 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +prof 1 64 2.772589 2.772589 273 +copi 0 63 2.772589 0.000000 284 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +automat 2 61 2.833213 5.666426 306 +best 0 59 2.833213 0.000000 299 +thesi 1 57 2.890372 2.890372 327 +variou 0 56 2.890372 0.000000 317 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +advisor 0 51 2.995732 0.000000 355 +approach 0 48 3.044522 0.000000 366 +visitor 0 49 3.044522 0.000000 371 +effect 0 46 3.091042 0.000000 385 +term 0 43 3.178054 0.000000 411 +show 0 43 3.178054 0.000000 417 +third 0 43 3.178054 0.000000 412 +late 0 40 3.258097 0.000000 439 +field 1 37 3.332205 3.332205 482 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +jame 1 35 3.401197 3.401197 507 +queri 0 33 3.433987 0.000000 524 +independ 0 32 3.465736 0.000000 548 +exist 0 30 3.555348 0.000000 569 +propos 1 28 3.610918 3.610918 602 +usual 0 28 3.610918 0.000000 608 +retriev 2 27 3.637586 7.275172 621 +relev 0 26 3.688879 0.000000 637 +other 0 24 3.761200 0.000000 697 +size 1 23 3.806662 3.806662 713 +brows 0 23 3.806662 0.000000 726 +thank 0 23 3.806662 0.000000 721 +hypertext 0 19 4.007333 0.000000 865 +segment 0 17 4.110874 0.000000 931 +normal 1 16 4.174387 4.174387 995 +eduphon 0 15 4.248495 0.000000 1060 +senior 0 14 4.317488 0.000000 1120 +weight 0 12 4.465908 0.000000 1204 +reader 0 12 4.465908 0.000000 1246 +chri 2 11 4.553877 9.107754 1311 +smart 1 11 4.553877 4.553877 1352 +arpa 0 11 4.553877 0.000000 1369 +summar 0 11 4.553877 0.000000 1295 +length 1 10 4.653960 4.653960 1400 +decomposit 1 10 4.653960 4.653960 1439 +theme 1 8 4.875197 4.875197 1707 +counter 0 8 4.875197 0.000000 1765 +vari 0 6 5.164786 0.000000 2001 +pivot 1 5 5.347108 5.347108 2426 +fairli 0 5 5.347108 0.000000 2322 +yield 0 5 5.347108 0.000000 2458 +testb 0 5 5.347108 0.000000 2456 +circumst 0 5 5.347108 0.000000 2283 +proceedingsof 0 5 5.347108 0.000000 2331 +dual 0 5 5.347108 0.000000 2522 +travers 0 5 5.347108 0.000000 2363 +amit 2 4 5.568345 11.136690 2972 +allan 1 4 5.568345 5.568345 2849 +nist 1 4 5.568345 5.568345 2973 +commonli 0 4 5.568345 0.000000 2877 +substanti 0 4 5.568345 0.000000 2921 +singhal 2 3 5.857933 11.715866 3098 +trec 1 3 5.857933 5.857933 3547 +supervisor 0 3 5.857933 0.000000 3754 +likelihood 0 3 5.857933 0.000000 3172 +expans 0 3 5.857933 0.000000 3755 +gerard 2 2 6.263398 12.526796 4876 +salton 2 2 6.263398 12.526796 4060 +bucklei 2 2 6.263398 12.526796 4874 +foremost 0 2 6.263398 0.000000 5361 +excerpt 0 2 6.263398 0.000000 4880 +degrad 0 2 6.263398 0.000000 5362 +amitsingh 0 2 6.263398 0.000000 4061 +slowli 0 2 6.263398 0.000000 5363 +mandar 1 1 6.957497 6.957497 11698 +gerardsalton 1 1 6.957497 6.957497 11699 +lengthnorm 1 1 6.957497 6.957497 11700 +mandarmitra 1 1 6.957497 6.957497 11701 +mitra 1 1 6.957497 6.957497 11702 +pageamit 0 1 6.957497 0.000000 11703 +singhaldepart 0 1 6.957497 0.000000 11704 +universitysingh 0 1 6.957497 0.000000 11705 +andtext 0 1 6.957497 0.000000 11706 +clairecardieher 0 1 6.957497 0.000000 11707 +beenon 0 1 6.957497 0.000000 11708 +informationretriev 0 1 6.957497 0.000000 11709 +thirti 0 1 6.957497 0.000000 11710 +thateffect 0 1 6.957497 0.000000 11711 +chancessimilar 0 1 6.957497 0.000000 11712 +normalizationfunct 0 1 6.957497 0.000000 11713 +retrievaleffect 0 1 6.957497 0.000000 11714 +normalizationtechniqu 0 1 6.957497 0.000000 11715 +trecparticipationtext 0 1 6.957497 0.000000 11716 +sponsoredeffort 0 1 6.957497 0.000000 11717 +retrievaltechniqu 0 1 6.957497 0.000000 11718 +hasconsist 0 1 6.957497 0.000000 11719 +somepap 0 1 6.957497 0.000000 11720 +summarizationnon 0 1 6.957497 0.000000 11721 +expositori 0 1 6.957497 0.000000 11722 +tocov 0 1 6.957497 0.000000 11723 +selectiveaccess 0 1 6.957497 0.000000 11724 +toanalyz 0 1 6.957497 0.000000 11725 +texttravers 0 1 6.957497 0.000000 11726 +papersnorm 0 1 6.957497 0.000000 11727 +documentlength 0 1 6.957497 0.000000 11728 +mitraand 0 1 6.957497 0.000000 11729 +usingsmart 0 1 6.957497 0.000000 11730 +textthem 0 1 6.957497 0.000000 11731 +andmanag 0 1 6.957497 0.000000 11732 +vectorspac 0 1 6.957497 0.000000 11733 +machineread 0 1 6.957497 0.000000 11734 +groupmemb 0 1 6.957497 0.000000 11735 +fluctuat 0 1 6.957497 0.000000 11736 +iinstal 0 1 6.957497 0.000000 11737 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..73544334 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,154 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +paper 1 205 1.609438 1.609438 38 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +postscript 0 131 2.079442 0.000000 90 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +peopl 1 96 2.302585 2.302585 132 +part 0 98 2.302585 0.000000 129 +sinc 1 90 2.397895 2.397895 159 +real 1 93 2.397895 2.397895 144 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +build 1 85 2.484907 2.484907 184 +wide 0 84 2.484907 0.000000 185 +internet 0 83 2.484907 0.000000 186 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +name 2 72 2.639057 5.278114 220 +upson 0 71 2.639057 0.000000 218 +write 0 72 2.639057 0.000000 222 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +virtual 0 62 2.772589 0.000000 285 +reason 0 57 2.890372 0.000000 318 +talk 0 53 2.944439 0.000000 336 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +still 0 50 3.044522 0.000000 362 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +littl 0 39 3.258097 0.000000 454 +everi 0 34 3.401197 0.000000 519 +least 0 35 3.401197 0.000000 516 +word 0 34 3.401197 0.000000 508 +independ 0 32 3.465736 0.000000 548 +idea 0 32 3.465736 0.000000 545 +india 0 32 3.465736 0.000000 550 +anim 0 31 3.496508 0.000000 557 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +releas 0 28 3.610918 0.000000 616 +sometim 0 24 3.761200 0.000000 696 +head 0 23 3.806662 0.000000 732 +instal 0 22 3.850148 0.000000 754 +latest 0 21 3.912023 0.000000 785 +thu 0 21 3.912023 0.000000 773 +driven 0 15 4.248495 0.000000 1048 +remot 0 15 4.248495 0.000000 1041 +goe 0 15 4.248495 0.000000 1044 +squar 0 14 4.317488 0.000000 1082 +edui 0 13 4.382027 0.000000 1193 +directli 0 13 4.382027 0.000000 1141 +circuit 0 13 4.382027 0.000000 1131 +unfortun 0 13 4.382027 0.000000 1170 +speak 0 12 4.465908 0.000000 1283 +song 0 11 4.553877 0.000000 1380 +equip 0 10 4.653960 0.000000 1459 +stack 0 10 4.653960 0.000000 1389 +packet 0 10 4.653960 0.000000 1415 +custom 0 10 4.653960 0.000000 1414 +mode 1 9 4.753590 4.753590 1492 +lane 0 8 4.875197 0.000000 1720 +router 0 8 4.875197 0.000000 1772 +switch 0 8 4.875197 0.000000 1718 +span 0 8 4.875197 0.000000 1751 +round 0 8 4.875197 0.000000 1769 +keshav 1 7 5.010635 5.010635 1852 +signal 0 7 5.010635 0.000000 1910 +canb 0 7 5.010635 0.000000 1846 +hear 0 7 5.010635 0.000000 1940 +nativ 1 6 5.164786 5.164786 2192 +south 0 6 5.164786 0.000000 2167 +srinivasan 0 6 5.164786 0.000000 2175 +christoph 0 5 5.347108 0.000000 2512 +delhi 0 5 5.347108 0.000000 2530 +facial 0 5 5.347108 0.000000 2438 +hole 0 5 5.347108 0.000000 2518 +poem 0 5 5.347108 0.000000 2483 +systemsand 0 4 5.568345 0.000000 2804 +scratch 0 3 5.857933 0.000000 3140 +compliant 0 3 5.857933 0.000000 3245 +district 0 3 5.857933 0.000000 3756 +father 0 3 5.857933 0.000000 3757 +johann 0 3 5.857933 0.000000 3758 +goof 0 2 6.263398 0.000000 4985 +snoop 0 2 6.263398 0.000000 5364 +mbone 0 2 6.263398 0.000000 4361 +pagemi 0 2 6.263398 0.000000 5230 +villag 0 2 6.263398 0.000000 5215 +ought 0 2 6.263398 0.000000 5365 +goeth 0 2 6.263398 0.000000 5366 +skeshav 1 1 6.957497 6.957497 11738 +idlinet 1 1 6.957497 6.957497 11739 +keshavemail 0 1 6.957497 0.000000 11740 +spentfiv 0 1 6.957497 0.000000 11741 +xunet 0 1 6.957497 0.000000 11742 +incollabor 0 1 6.957497 0.000000 11743 +fore 0 1 6.957497 0.000000 11744 +zeitnet 0 1 6.957497 0.000000 11745 +idlinetsourc 0 1 6.957497 0.000000 11746 +applicationget 0 1 6.957497 0.000000 11747 +linkspapersher 0 1 6.957497 0.000000 11748 +linkto 0 1 6.957497 0.000000 11749 +reali 0 1 6.957497 0.000000 11750 +beout 0 1 6.957497 0.000000 11751 +native_mod 0 1 6.957497 0.000000 11752 +namein 0 1 6.957497 0.000000 11753 +thanjavur 0 1 6.957497 0.000000 11754 +beprecis 0 1 6.957497 0.000000 11755 +prefix 0 1 6.957497 0.000000 11756 +sonli 0 1 6.957497 0.000000 11757 +surnam 0 1 6.957497 0.000000 11758 +myfath 0 1 6.957497 0.000000 11759 +intoth 0 1 6.957497 0.000000 11760 +beconfus 0 1 6.957497 0.000000 11761 +quotabl 0 1 6.957497 0.000000 11762 +wolfgang 0 1 6.957497 0.000000 11763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..7657adcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +construct 0 139 1.945910 0.000000 82 +place 0 106 2.197225 0.000000 124 +sinc 0 90 2.397895 0.000000 159 +prof 0 64 2.772589 0.000000 273 +electr 0 38 3.295837 0.000000 461 +alwai 0 24 3.761200 0.000000 691 +came 0 13 4.382027 0.000000 1197 +kenneth 1 12 4.465908 4.465908 1265 +meng 0 12 4.465908 0.000000 1214 +road 0 11 4.553877 0.000000 1374 +success 0 10 4.653960 0.000000 1390 +zabih 0 6 5.164786 0.000000 2138 +sunlab 0 2 6.263398 0.000000 5222 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..ecc3713a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +cornel 1 215 1.386294 1.386294 23 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +read 1 154 1.791759 1.791759 47 +recent 0 167 1.791759 0.000000 58 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +object 0 138 1.945910 0.000000 79 +high 0 130 2.079442 0.000000 101 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +teach 0 108 2.197225 0.000000 112 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +pictur 1 89 2.397895 2.397895 160 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +commun 0 95 2.397895 0.000000 157 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +internet 1 83 2.484907 2.484907 186 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +come 1 78 2.564949 2.564949 202 +want 0 79 2.564949 0.000000 199 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +june 0 79 2.564949 0.000000 214 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +visit 1 63 2.772589 2.772589 288 +plan 0 65 2.772589 0.000000 272 +written 0 63 2.772589 0.000000 278 +dept 0 64 2.772589 0.000000 291 +best 0 59 2.833213 0.000000 299 +summer 1 56 2.890372 2.890372 311 +direct 0 57 2.890372 0.000000 316 +point 0 58 2.890372 0.000000 319 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +talk 1 53 2.944439 2.944439 336 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +week 0 52 2.995732 0.000000 343 +friend 1 48 3.044522 3.044522 376 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +better 1 45 3.135494 3.135494 401 +even 0 45 3.135494 0.000000 393 +around 1 43 3.178054 3.178054 415 +music 0 42 3.218876 0.000000 436 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +must 0 40 3.258097 0.000000 442 +late 0 40 3.258097 0.000000 439 +theoret 0 39 3.258097 0.000000 446 +live 0 40 3.258097 0.000000 451 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +tree 1 36 3.367296 3.367296 492 +especi 1 36 3.367296 3.367296 496 +winter 0 36 3.367296 0.000000 500 +next 1 34 3.401197 3.401197 517 +random 0 34 3.401197 0.000000 511 +either 0 35 3.401197 0.000000 506 +go 0 33 3.433987 0.000000 529 +kind 0 32 3.465736 0.000000 541 +power 0 30 3.555348 0.000000 573 +actual 0 28 3.610918 0.000000 604 +quit 1 27 3.637586 3.637586 633 +great 0 27 3.637586 0.000000 626 +rather 0 26 3.688879 0.000000 642 +never 0 25 3.737670 0.000000 671 +spent 0 25 3.737670 0.000000 676 +magazin 0 24 3.761200 0.000000 704 +alwai 0 24 3.761200 0.000000 691 +pattern 0 24 3.761200 0.000000 689 +sometim 0 24 3.761200 0.000000 696 +togeth 0 23 3.806662 0.000000 714 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +inth 0 22 3.850148 0.000000 741 +instead 0 22 3.850148 0.000000 756 +leav 1 21 3.912023 3.912023 772 +half 1 21 3.912023 3.912023 776 +busi 1 21 3.912023 3.912023 784 +path 0 21 3.912023 0.000000 778 +hous 0 21 3.912023 0.000000 801 +tell 0 21 3.912023 0.000000 777 +theorem 0 21 3.912023 0.000000 786 +nice 0 20 3.951244 0.000000 809 +left 1 19 4.007333 4.007333 851 +mostli 0 19 4.007333 0.000000 869 +seem 2 18 4.060443 8.120886 899 +figur 1 18 4.060443 4.060443 903 +listen 0 18 4.060443 0.000000 907 +coupl 0 17 4.110874 0.000000 939 +stop 0 17 4.110874 0.000000 942 +sign 0 16 4.174387 0.000000 970 +modern 0 16 4.174387 0.000000 966 +brown 0 16 4.174387 0.000000 977 +side 0 15 4.248495 0.000000 1022 +score 0 15 4.248495 0.000000 1017 +english 0 15 4.248495 0.000000 1033 +indic 0 15 4.248495 0.000000 1013 +near 0 14 4.317488 0.000000 1091 +warn 0 14 4.317488 0.000000 1068 +stori 0 14 4.317488 0.000000 1087 +sai 1 13 4.382027 4.382027 1175 +someon 1 13 4.382027 4.382027 1128 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +everyth 0 13 4.382027 0.000000 1169 +nick 0 13 4.382027 0.000000 1180 +insid 1 12 4.465908 4.465908 1262 +walk 0 12 4.465908 0.000000 1281 +tune 0 12 4.465908 0.000000 1227 +went 0 12 4.465908 0.000000 1279 +daniel 0 12 4.465908 0.000000 1233 +neat 0 12 4.465908 0.000000 1263 +america 0 11 4.553877 0.000000 1370 +label 1 10 4.653960 4.653960 1423 +mountain 0 10 4.653960 0.000000 1456 +traffic 0 10 4.653960 0.000000 1421 +hello 0 10 4.653960 0.000000 1407 +paragraph 0 10 4.653960 0.000000 1449 +awai 0 10 4.653960 0.000000 1447 +clear 1 9 4.753590 4.753590 1488 +sound 1 9 4.753590 4.753590 1605 +distanc 0 9 4.753590 0.000000 1500 +ring 1 8 4.875197 4.875197 1684 +realiz 0 8 4.875197 0.000000 1739 +perhap 0 8 4.875197 0.000000 1693 +contrast 0 8 4.875197 0.000000 1637 +gold 0 8 4.875197 0.000000 1745 +switch 0 8 4.875197 0.000000 1718 +manufactur 0 8 4.875197 0.000000 1634 +japan 0 8 4.875197 0.000000 1762 +ideal 0 8 4.875197 0.000000 1630 +hear 0 7 5.010635 0.000000 1940 +throughout 0 7 5.010635 0.000000 1871 +corner 0 7 5.010635 0.000000 1909 +smile 0 7 5.010635 0.000000 1807 +golden 0 7 5.010635 0.000000 1962 +centuri 0 7 5.010635 0.000000 1935 +intellectu 0 7 5.010635 0.000000 1847 +occasion 0 7 5.010635 0.000000 1905 +pronounc 0 7 5.010635 0.000000 1918 +largest 0 7 5.010635 0.000000 1858 +happen 0 7 5.010635 0.000000 1790 +snow 1 6 5.164786 5.164786 2031 +rain 0 6 5.164786 0.000000 2137 +strang 0 6 5.164786 0.000000 2064 +mother 0 6 5.164786 0.000000 2083 +seen 0 6 5.164786 0.000000 2202 +ifyou 0 6 5.164786 0.000000 1992 +polit 0 6 5.164786 0.000000 2115 +hair 1 5 5.347108 5.347108 2446 +fairli 0 5 5.347108 0.000000 2322 +respond 0 5 5.347108 0.000000 2354 +door 0 5 5.347108 0.000000 2291 +own 0 5 5.347108 0.000000 2531 +isth 0 5 5.347108 0.000000 2532 +sing 0 5 5.347108 0.000000 2499 +wear 1 4 5.568345 5.568345 2785 +somewhat 1 4 5.568345 5.568345 2659 +somehow 0 4 5.568345 0.000000 2974 +hell 0 4 5.568345 0.000000 2885 +glanc 0 4 5.568345 0.000000 2652 +tick 0 4 5.568345 0.000000 2975 +heard 0 4 5.568345 0.000000 2895 +paus 0 4 5.568345 0.000000 2965 +dark 0 4 5.568345 0.000000 2910 +usedto 0 4 5.568345 0.000000 2643 +gotten 0 4 5.568345 0.000000 2628 +hire 0 4 5.568345 0.000000 2976 +couldn 0 4 5.568345 0.000000 2977 +glass 1 3 5.857933 5.857933 3759 +vagu 1 3 5.857933 5.857933 3393 +surround 0 3 5.857933 0.000000 3492 +stone 0 3 5.857933 0.000000 3674 +scatter 0 3 5.857933 0.000000 3351 +worri 0 3 5.857933 0.000000 3130 +thin 0 3 5.857933 0.000000 3488 +bright 0 3 5.857933 0.000000 3596 +twentieth 0 3 5.857933 0.000000 3760 +wave 0 3 5.857933 0.000000 3518 +europ 0 3 5.857933 0.000000 3761 +child 0 3 5.857933 0.000000 3542 +blame 0 3 5.857933 0.000000 3636 +dread 0 3 5.857933 0.000000 3630 +wise 0 3 5.857933 0.000000 3631 +pyramid 0 3 5.857933 0.000000 3358 +maker 0 3 5.857933 0.000000 3164 +obviou 1 2 6.263398 6.263398 5367 +forest 0 2 6.263398 0.000000 5368 +shack 0 2 6.263398 0.000000 5369 +withno 0 2 6.263398 0.000000 5370 +pile 0 2 6.263398 0.000000 5371 +hum 0 2 6.263398 0.000000 4935 +purpl 0 2 6.263398 0.000000 5372 +silk 0 2 6.263398 0.000000 5373 +silver 0 2 6.263398 0.000000 5374 +ocean 0 2 6.263398 0.000000 5375 +andlook 0 2 6.263398 0.000000 4561 +altogeth 0 2 6.263398 0.000000 4751 +nowadai 0 2 6.263398 0.000000 5376 +pointcast 0 2 6.263398 0.000000 5377 +inner 0 2 6.263398 0.000000 4551 +furnitur 0 2 6.263398 0.000000 5016 +haveth 0 2 6.263398 0.000000 5378 +hani 0 2 6.263398 0.000000 5140 +neededto 0 2 6.263398 0.000000 5379 +agood 0 2 6.263398 0.000000 5380 +presum 1 1 6.957497 6.957497 11764 +heha 1 1 6.957497 6.957497 11765 +beaver 1 1 6.957497 6.957497 11766 +perri 1 1 6.957497 6.957497 11767 +clearinglook 0 1 6.957497 0.000000 11768 +thanyou 0 1 6.957497 0.000000 11769 +onal 0 1 6.957497 0.000000 11770 +theweath 0 1 6.957497 0.000000 11771 +overcast 0 1 6.957497 0.000000 11772 +ifit 0 1 6.957497 0.000000 11773 +quiteclear 0 1 6.957497 0.000000 11774 +bird 0 1 6.957497 0.000000 11775 +chirp 0 1 6.957497 0.000000 11776 +theymai 0 1 6.957497 0.000000 11777 +louder 0 1 6.957497 0.000000 11778 +nearbywaterfal 0 1 6.957497 0.000000 11779 +gotta 0 1 6.957497 0.000000 11780 +apath 0 1 6.957497 0.000000 11781 +asign 0 1 6.957497 0.000000 11782 +hillschool 0 1 6.957497 0.000000 11783 +wormhol 0 1 6.957497 0.000000 11784 +nearbyhous 0 1 6.957497 0.000000 11785 +clearinginsid 0 1 6.957497 0.000000 11786 +offand 0 1 6.957497 0.000000 11787 +theclear 0 1 6.957497 0.000000 11788 +rhyme 0 1 6.957497 0.000000 11789 +reasonto 0 1 6.957497 0.000000 11790 +sortsof 0 1 6.957497 0.000000 11791 +betteridea 0 1 6.957497 0.000000 11792 +itseem 0 1 6.957497 0.000000 11793 +importantth 0 1 6.957497 0.000000 11794 +clearingh 0 1 6.957497 0.000000 11795 +oftendescrib 0 1 6.957497 0.000000 11796 +mostdistinct 0 1 6.957497 0.000000 11797 +quitelong 0 1 6.957497 0.000000 11798 +elfin 0 1 6.957497 0.000000 11799 +peoplebefor 0 1 6.957497 0.000000 11800 +theresoon 0 1 6.957497 0.000000 11801 +startstel 0 1 6.957497 0.000000 11802 +whynichola 0 1 6.957497 0.000000 11803 +negropont 0 1 6.957497 0.000000 11804 +moron 0 1 6.957497 0.000000 11805 +thenh 0 1 6.957497 0.000000 11806 +obscur 0 1 6.957497 0.000000 11807 +hetend 0 1 6.957497 0.000000 11808 +appearanceinstead 0 1 6.957497 0.000000 11809 +turquois 0 1 6.957497 0.000000 11810 +linen 0 1 6.957497 0.000000 11811 +imageof 0 1 6.957497 0.000000 11812 +fromhim 0 1 6.957497 0.000000 11813 +pewter 0 1 6.957497 0.000000 11814 +pentacl 0 1 6.957497 0.000000 11815 +neck 0 1 6.957497 0.000000 11816 +hippi 0 1 6.957497 0.000000 11817 +asclass 0 1 6.957497 0.000000 11818 +clearingdan 0 1 6.957497 0.000000 11819 +briani 0 1 6.957497 0.000000 11820 +anundergrad 0 1 6.957497 0.000000 11821 +newsprovid 0 1 6.957497 0.000000 11822 +ancamosoiu 0 1 6.957497 0.000000 11823 +schwa 0 1 6.957497 0.000000 11824 +backwhen 0 1 6.957497 0.000000 11825 +twoand 0 1 6.957497 0.000000 11826 +wegradu 0 1 6.957497 0.000000 11827 +shejust 0 1 6.957497 0.000000 11828 +onewav 0 1 6.957497 0.000000 11829 +issomeon 0 1 6.957497 0.000000 11830 +severalmonth 0 1 6.957497 0.000000 11831 +becamemuch 0 1 6.957497 0.000000 11832 +eedepart 0 1 6.957497 0.000000 11833 +multimediastud 0 1 6.957497 0.000000 11834 +friendof 0 1 6.957497 0.000000 11835 +finlei 0 1 6.957497 0.000000 11836 +notanymor 0 1 6.957497 0.000000 11837 +steelcas 0 1 6.957497 0.000000 11838 +dserver 0 1 6.957497 0.000000 11839 +kinda 0 1 6.957497 0.000000 11840 +cheesi 0 1 6.957497 0.000000 11841 +thebuild 0 1 6.957497 0.000000 11842 +ius 0 1 6.957497 0.000000 11843 +programcal 0 1 6.957497 0.000000 11844 +graduatedfrom 0 1 6.957497 0.000000 11845 +cuter 0 1 6.957497 0.000000 11846 +thanth 0 1 6.957497 0.000000 11847 +blurri 0 1 6.957497 0.000000 11848 +dreamer 0 1 6.957497 0.000000 11849 +ofdream 0 1 6.957497 0.000000 11850 +aphex 0 1 6.957497 0.000000 11851 +twindan 0 1 6.957497 0.000000 11852 +snowman 0 1 6.957497 0.000000 11853 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..6046a2c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +inform 0 412 0.693147 0.000000 8 +cornel 0 215 1.386294 0.000000 23 +take 0 97 2.302585 0.000000 134 +cool 0 49 3.044522 0.000000 374 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +soon 0 36 3.367296 0.000000 494 +lot 0 18 4.060443 0.000000 889 +rate 0 15 4.248495 0.000000 1037 +ashish 0 5 5.347108 0.000000 2473 +ultra 0 4 5.568345 0.000000 2889 +autobiographi 0 2 6.263398 0.000000 5070 +lookin 1 1 6.957497 6.957497 11854 +upkeep 0 1 6.957497 0.000000 11855 +setuup 0 1 6.957497 0.000000 11856 +doingajaymanishanujmom 0 1 6.957497 0.000000 11857 +daddepart 0 1 6.957497 0.000000 11858 +sciencesearch 0 1 6.957497 0.000000 11859 +netentertain 0 1 6.957497 0.000000 11860 +weeklycricket 0 1 6.957497 0.000000 11861 +soni 0 1 6.957497 0.000000 11862 +sonia 0 1 6.957497 0.000000 11863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..541aeb33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +would 0 67 2.708050 0.000000 251 +browser 0 56 2.890372 0.000000 313 +better 0 45 3.135494 0.000000 401 +form 0 39 3.258097 0.000000 443 +scott 0 18 4.060443 0.000000 884 +capabl 0 15 4.248495 0.000000 1016 +dawson 0 2 6.263398 0.000000 4886 +padif 0 1 6.957497 0.000000 11864 +scottdawson 0 1 6.957497 0.000000 11865 +shomebas 0 1 6.957497 0.000000 11866 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..bb680f65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +cornel 1 215 1.386294 1.386294 23 +hall 0 146 1.945910 0.000000 65 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +center 0 88 2.397895 0.000000 158 +institut 0 84 2.484907 0.000000 187 +paul 0 38 3.295837 0.000000 471 +scheme 0 20 3.951244 0.000000 818 +ultim 0 17 4.110874 0.000000 943 +affili 0 13 4.382027 0.000000 1194 +rhode 0 9 4.753590 0.000000 1579 +hockei 0 8 4.875197 0.000000 1760 +atcornel 0 6 5.164786 0.000000 2131 +stodghil 0 4 5.568345 0.000000 2864 +bernoulli 0 4 5.568345 0.000000 2955 +pagepaul 0 3 5.857933 0.000000 3669 +stodghillstodghil 0 1 6.957497 0.000000 11867 +acri 0 1 6.957497 0.000000 11868 +projectinterest 0 1 6.957497 0.000000 11869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..40754e08 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +updat 0 191 1.609438 0.000000 41 +august 0 66 2.708050 0.000000 257 +move 0 47 3.091042 0.000000 382 +http 0 41 3.218876 0.000000 420 +scott 0 18 4.060443 0.000000 884 +former 1 17 4.110874 4.110874 956 +indiana 0 6 5.164786 0.000000 2057 +stoller 1 4 5.568345 5.568345 2866 +pagescott 0 4 5.568345 0.000000 2978 +hyplan 0 1 6.957497 0.000000 11870 +htmllast 0 1 6.957497 0.000000 11871 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..3ec33fad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +perform 1 143 1.945910 1.945910 74 +architectur 0 139 1.945910 0.000000 77 +high 1 130 2.079442 2.079442 101 +welcom 0 122 2.079442 0.000000 99 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +person 0 111 2.197225 0.000000 117 +theori 0 111 2.197225 0.000000 127 +take 1 97 2.302585 2.302585 134 +advanc 0 99 2.302585 0.000000 130 +multimedia 0 68 2.708050 0.000000 258 +prof 0 64 2.772589 0.000000 273 +previou 0 62 2.772589 0.000000 290 +semest 0 58 2.890372 0.000000 312 +brian 0 38 3.295837 0.000000 466 +seminar 0 38 3.295837 0.000000 470 +option 0 30 3.555348 0.000000 575 +progress 0 28 3.610918 0.000000 598 +smith 0 20 3.951244 0.000000 820 +wonder 0 20 3.951244 0.000000 815 +ofcomput 0 10 4.653960 0.000000 1442 +earth 0 10 4.653960 0.000000 1463 +marri 0 7 5.010635 0.000000 1946 +price 0 6 5.164786 0.000000 1999 +czar 0 5 5.347108 0.000000 2503 +mehom 0 4 5.568345 0.000000 2979 +eduaddress 0 3 5.857933 0.000000 3762 +sugata 1 2 6.263398 6.263398 4976 +mukhopadhyai 1 2 6.263398 6.263398 4981 +ritu 0 1 6.957497 0.000000 11872 +mailsugata 0 1 6.957497 0.000000 11873 +hichori 0 1 6.957497 0.000000 11874 +estat 0 1 6.957497 0.000000 11875 +owego 0 1 6.957497 0.000000 11876 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..b34744bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +need 0 98 2.302585 0.000000 135 +present 0 91 2.397895 0.000000 145 +resum 0 79 2.564949 0.000000 217 +java 0 70 2.708050 0.000000 248 +view 0 70 2.708050 0.000000 254 +ithaca 0 65 2.772589 0.000000 294 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +physic 0 47 3.091042 0.000000 377 +paul 1 38 3.295837 3.295837 471 +anim 0 31 3.496508 0.000000 557 +background 0 25 3.737670 0.000000 664 +rout 0 21 3.912023 0.000000 793 +capabl 0 15 4.248495 0.000000 1016 +philosophi 0 13 4.382027 0.000000 1167 +clock 0 11 4.553877 0.000000 1320 +tick 0 4 5.568345 0.000000 2975 +carleton 0 2 6.263398 0.000000 5381 +sukhpal 1 1 6.957497 6.957497 11877 +sanghera 1 1 6.957497 6.957497 11878 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..031117c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +read 1 154 1.791759 1.791759 47 +first 1 140 1.945910 1.945910 71 +high 0 130 2.079442 0.000000 101 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +part 1 98 2.302585 2.302585 129 +peopl 0 96 2.302585 0.000000 132 +mani 0 92 2.397895 0.000000 150 +want 0 79 2.564949 0.000000 199 +orient 0 80 2.564949 0.000000 205 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +summer 0 56 2.890372 0.000000 311 +space 0 57 2.890372 0.000000 310 +found 0 53 2.944439 0.000000 337 +week 0 52 2.995732 0.000000 343 +run 0 51 2.995732 0.000000 347 +understand 0 47 3.091042 0.000000 384 +video 0 44 3.135494 0.000000 405 +show 0 43 3.178054 0.000000 417 +long 0 43 3.178054 0.000000 413 +compani 0 41 3.218876 0.000000 423 +realli 0 40 3.258097 0.000000 444 +microsoft 1 38 3.295837 3.295837 468 +industri 0 38 3.295837 0.000000 464 +feel 0 37 3.332205 0.000000 483 +hand 0 37 3.332205 0.000000 475 +post 0 35 3.401197 0.000000 505 +print 0 34 3.401197 0.000000 503 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +kind 0 32 3.465736 0.000000 541 +progress 0 28 3.610918 0.000000 598 +enjoi 0 26 3.688879 0.000000 660 +session 0 26 3.688879 0.000000 643 +notic 0 25 3.737670 0.000000 675 +never 0 25 3.737670 0.000000 671 +toward 0 25 3.737670 0.000000 668 +seri 0 24 3.761200 0.000000 708 +recommend 0 22 3.850148 0.000000 737 +corpor 0 21 3.912023 0.000000 802 +stand 0 18 4.060443 0.000000 891 +weekli 1 17 4.110874 4.110874 919 +whether 0 17 4.110874 0.000000 918 +attempt 0 17 4.110874 0.000000 917 +sign 0 16 4.174387 0.000000 970 +anywai 0 15 4.248495 0.000000 1047 +comic 0 14 4.317488 0.000000 1103 +social 0 13 4.382027 0.000000 1123 +everyon 0 13 4.382027 0.000000 1148 +regularli 0 11 4.553877 0.000000 1338 +fill 0 11 4.553877 0.000000 1349 +strongli 0 10 4.653960 0.000000 1406 +didn 0 9 4.753590 0.000000 1563 +parti 1 8 4.875197 4.875197 1676 +empir 0 8 4.875197 0.000000 1722 +illustr 0 8 4.875197 0.000000 1679 +told 0 8 4.875197 0.000000 1658 +strip 1 6 5.164786 5.164786 2203 +tri 0 6 5.164786 0.000000 2166 +viewpoint 0 6 5.164786 0.000000 2116 +gate 0 6 5.164786 0.000000 2182 +begun 0 5 5.347108 0.000000 2386 +suffer 0 5 5.347108 0.000000 2268 +fit 0 5 5.347108 0.000000 2285 +episod 1 4 5.568345 5.568345 2747 +lord 1 4 5.568345 5.568345 2906 +employe 1 4 5.568345 5.568345 2717 +drew 1 4 5.568345 5.568345 2980 +theintern 0 4 5.568345 0.000000 2981 +newslett 0 4 5.568345 0.000000 2873 +sumedh 0 3 5.857933 0.000000 3101 +thesumm 0 3 5.857933 0.000000 3763 +redmond 0 3 5.857933 0.000000 3568 +internship 0 3 5.857933 0.000000 3764 +flame 0 3 5.857933 0.000000 3696 +galact 1 2 6.263398 6.263398 5219 +eduthi 0 2 6.263398 0.000000 5382 +alia 0 2 6.263398 0.000000 5383 +arriv 0 2 6.263398 0.000000 4132 +persuad 0 2 6.263398 0.000000 5384 +declin 0 2 6.263398 0.000000 5385 +portrai 0 2 6.263398 0.000000 5386 +bitter 0 2 6.263398 0.000000 5387 +rebel 0 2 6.263398 0.000000 5388 +imperi 0 2 6.263398 0.000000 5389 +gater 1 1 6.957497 6.957497 11879 +bilth 1 1 6.957497 6.957497 11880 +empirewritten 0 1 6.957497 0.000000 11881 +kanetkaremail 0 1 6.957497 0.000000 11882 +kanetkar 0 1 6.957497 0.000000 11883 +atmicrosoft 0 1 6.957497 0.000000 11884 +artwork 0 1 6.957497 0.000000 11885 +leak 0 1 6.957497 0.000000 11886 +theful 0 1 6.957497 0.000000 11887 +stripi 0 1 6.957497 0.000000 11888 +themicrosoft 0 1 6.957497 0.000000 11889 +perceiv 0 1 6.957497 0.000000 11890 +problemand 0 1 6.957497 0.000000 11891 +evilempir 0 1 6.957497 0.000000 11892 +comicstrip 0 1 6.957497 0.000000 11893 +theoutsid 0 1 6.957497 0.000000 11894 +eitherbil 0 1 6.957497 0.000000 11895 +heck 0 1 6.957497 0.000000 11896 +summersof 0 1 6.957497 0.000000 11897 +anyoneinterest 0 1 6.957497 0.000000 11898 +thateveri 0 1 6.957497 0.000000 11899 +theyshow 0 1 6.957497 0.000000 11900 +trivia 0 1 6.957497 0.000000 11901 +makey 0 1 6.957497 0.000000 11902 +disclosur 0 1 6.957497 0.000000 11903 +agreeement 0 1 6.957497 0.000000 11904 +theymad 0 1 6.957497 0.000000 11905 +theirheart 0 1 6.957497 0.000000 11906 +pledg 0 1 6.957497 0.000000 11907 +alleig 0 1 6.957497 0.000000 11908 +everydesk 0 1 6.957497 0.000000 11909 +roosterepisod 0 1 6.957497 0.000000 11910 +threatepisod 0 1 6.957497 0.000000 11911 +insigniaepisod 0 1 6.957497 0.000000 11912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..42dc8ff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +cornel 1 215 1.386294 1.386294 23 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +paper 1 205 1.609438 1.609438 38 +group 0 183 1.609438 0.000000 36 +address 0 170 1.791759 0.000000 62 +algorithm 0 162 1.791759 0.000000 57 +support 1 132 1.945910 1.945910 83 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +problem 0 147 1.945910 0.000000 75 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +document 2 121 2.079442 4.158884 89 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +structur 1 106 2.197225 2.197225 105 +version 0 113 2.197225 0.000000 122 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +technic 1 100 2.302585 2.302585 140 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +memori 0 101 2.302585 0.000000 139 +proceed 1 93 2.397895 2.397895 152 +section 0 94 2.397895 0.000000 149 +question 0 91 2.397895 0.000000 141 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +issu 1 78 2.564949 2.564949 211 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +logic 1 71 2.639057 2.639057 230 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +workshop 0 71 2.639057 0.000000 239 +goal 1 66 2.708050 2.708050 250 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +august 0 66 2.708050 0.000000 257 +evalu 0 64 2.772589 0.000000 266 +previou 0 62 2.772589 0.000000 290 +descript 0 64 2.772589 0.000000 271 +experi 0 64 2.772589 0.000000 283 +type 1 61 2.833213 2.833213 296 +best 0 59 2.833213 0.000000 299 +summer 1 56 2.890372 2.890372 311 +index 1 56 2.890372 2.890372 309 +space 0 57 2.890372 0.000000 310 +publish 0 57 2.890372 0.000000 326 +digit 0 52 2.995732 0.000000 348 +principl 0 48 3.044522 0.000000 357 +electron 1 47 3.091042 3.091042 379 +term 0 43 3.178054 0.000000 411 +autom 1 41 3.218876 3.218876 434 +editor 0 41 3.218876 0.000000 433 +theoret 0 39 3.258097 0.000000 446 +author 0 39 3.258097 0.000000 450 +close 0 38 3.295837 0.000000 465 +correct 0 38 3.295837 0.000000 462 +return 0 34 3.401197 0.000000 502 +given 0 32 3.465736 0.000000 538 +chapter 0 32 3.465736 0.000000 536 +abl 0 30 3.555348 0.000000 566 +compon 0 30 3.555348 0.000000 570 +limit 0 29 3.583519 0.000000 585 +retriev 1 27 3.637586 3.637586 621 +relev 0 26 3.688879 0.000000 637 +enabl 0 26 3.688879 0.000000 655 +primari 1 25 3.737670 3.737670 669 +task 0 25 3.737670 0.000000 678 +toward 0 25 3.737670 0.000000 668 +magazin 0 24 3.761200 0.000000 704 +handl 0 24 3.761200 0.000000 685 +seri 0 24 3.761200 0.000000 708 +brows 1 23 3.806662 3.806662 726 +input 0 23 3.806662 0.000000 727 +head 0 23 3.806662 0.000000 732 +recognit 0 23 3.806662 0.000000 723 +identifi 0 22 3.850148 0.000000 760 +hierarchi 0 22 3.850148 0.000000 744 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +flexibl 1 21 3.912023 3.912023 792 +output 0 21 3.912023 0.000000 788 +busi 0 21 3.912023 0.000000 784 +geometr 0 19 4.007333 0.000000 852 +boston 0 19 4.007333 0.000000 862 +segment 0 17 4.110874 0.000000 931 +stop 0 17 4.110874 0.000000 942 +adam 0 17 4.110874 0.000000 934 +white 0 17 4.110874 0.000000 951 +letter 0 16 4.174387 0.000000 981 +piec 1 15 4.248495 4.248495 1020 +hierarch 0 15 4.248495 0.000000 1018 +near 0 14 4.317488 0.000000 1091 +bodi 0 13 4.382027 0.000000 1178 +johnson 0 13 4.382027 0.000000 1162 +captur 0 12 4.465908 0.000000 1232 +scan 0 12 4.465908 0.000000 1243 +bruce 0 12 4.465908 0.000000 1226 +arbitrari 0 11 4.553877 0.000000 1359 +paragraph 0 10 4.653960 0.000000 1449 +discov 0 9 4.753590 0.000000 1562 +donald 0 9 4.753590 0.000000 1510 +classif 0 9 4.753590 0.000000 1586 +rais 0 8 4.875197 0.000000 1711 +tobe 0 6 5.164786 0.000000 1995 +textual 0 6 5.164786 0.000000 1979 +superhighwai 0 4 5.568345 0.000000 2943 +similarli 0 3 5.857933 0.000000 3241 +categor 0 3 5.857933 0.000000 3765 +daniela 0 3 5.857933 0.000000 3611 +recipi 0 3 5.857933 0.000000 3627 +andclassif 0 2 6.263398 0.000000 5390 +cue 0 2 6.263398 0.000000 5391 +bharat 0 2 6.263398 0.000000 5107 +forthcom 0 2 6.263398 0.000000 5392 +dartmouth 0 2 6.263398 0.000000 5393 +montral 0 2 6.263398 0.000000 5394 +kristen 1 1 6.957497 6.957497 11913 +divid 1 1 6.957497 6.957497 11914 +accessresearch 0 1 6.957497 0.000000 11915 +mylong 0 1 6.957497 0.000000 11916 +forsophist 0 1 6.957497 0.000000 11917 +manipulationtool 0 1 6.957497 0.000000 11918 +logicalstructur 0 1 6.957497 0.000000 11919 +documentrepresent 0 1 6.957497 0.000000 11920 +hierarchyof 0 1 6.957497 0.000000 11921 +postscriptvers 0 1 6.957497 0.000000 11922 +croft 0 1 6.957497 0.000000 11923 +determininglog 0 1 6.957497 0.000000 11924 +soin 0 1 6.957497 0.000000 11925 +ofmultipl 0 1 6.957497 0.000000 11926 +browsingco 0 1 6.957497 0.000000 11927 +nabil 0 1 6.957497 0.000000 11928 +bhargava 0 1 6.957497 0.000000 11929 +yelena 0 1 6.957497 0.000000 11930 +yesha 0 1 6.957497 0.000000 11931 +seeheim 0 1 6.957497 0.000000 11932 +podp 0 1 6.957497 0.000000 11933 +taxonomi 0 1 6.957497 0.000000 11934 +structureselectron 0 1 6.957497 0.000000 11935 +dag 0 1 6.957497 0.000000 11936 +scholaraward 0 1 6.957497 0.000000 11937 +wordless 0 1 6.957497 0.000000 11938 +analysisand 0 1 6.957497 0.000000 11939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..1310dfb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +fall 0 181 1.609438 0.000000 40 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +model 0 145 1.945910 0.000000 69 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +structur 0 106 2.197225 0.000000 105 +site 0 106 2.197225 0.000000 119 +manag 0 114 2.197225 0.000000 125 +techniqu 0 99 2.302585 0.000000 138 +graphic 1 90 2.397895 2.397895 147 +optim 0 79 2.564949 0.000000 197 +resum 0 79 2.564949 0.000000 217 +java 1 70 2.708050 2.708050 248 +would 0 67 2.708050 0.000000 251 +simul 0 66 2.708050 0.000000 255 +multimedia 0 68 2.708050 0.000000 258 +polici 0 64 2.772589 0.000000 279 +browser 0 56 2.890372 0.000000 313 +summer 0 56 2.890372 0.000000 311 +case 0 51 2.995732 0.000000 351 +probabl 0 40 3.258097 0.000000 455 +prototyp 0 38 3.295837 0.000000 463 +statist 0 35 3.401197 0.000000 521 +independ 0 32 3.465736 0.000000 548 +displai 0 23 3.806662 0.000000 712 +applet 0 20 3.951244 0.000000 827 +telecommun 0 9 4.753590 0.000000 1565 +polygon 0 8 4.875197 0.000000 1723 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +sector 0 3 5.857933 0.000000 3766 +suppot 0 2 6.263398 0.000000 5243 +stochast 0 2 6.263398 0.000000 4832 +masafumi 1 1 6.957497 6.957497 11940 +suzukither 0 1 6.957497 0.000000 11941 +suzukisuzuki 0 1 6.957497 0.000000 11942 +educlassesfal 0 1 6.957497 0.000000 11943 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..e3463276 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +phone 0 175 1.791759 0.000000 45 +process 0 142 1.945910 0.000000 72 +number 0 130 2.079442 0.000000 97 +multimedia 0 68 2.708050 0.000000 258 +littl 0 39 3.258097 0.000000 454 +movi 0 40 3.258097 0.000000 459 +spend 0 19 4.007333 0.000000 850 +jonathan 1 13 4.382027 4.382027 1174 +edui 0 13 4.382027 0.000000 1193 +departmentof 0 9 4.753590 0.000000 1539 +humor 0 5 5.347108 0.000000 2533 +swartz 1 4 5.568345 5.568345 2878 +heredevelopingrivl 0 1 6.957497 0.000000 11944 +myaddress 0 1 6.957497 0.000000 11945 +brighten 0 1 6.957497 0.000000 11946 +dayjon 0 1 6.957497 0.000000 11947 +connectioncool 0 1 6.957497 0.000000 11948 +siteslast 0 1 6.957497 0.000000 11949 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..74e3aa21 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +class 1 199 1.609438 1.609438 37 +person 0 111 2.197225 0.000000 117 +send 0 114 2.197225 0.000000 109 +question 0 91 2.397895 0.000000 141 +academ 0 82 2.484907 0.000000 178 +pagewelcom 0 11 4.553877 0.000000 1344 +studentcomput 0 7 5.010635 0.000000 1963 +departmentcornel 0 5 5.347108 0.000000 2275 +srivastava 0 2 6.263398 0.000000 5395 +sunil 1 1 6.957497 6.957497 11950 +srivastavamast 0 1 6.957497 0.000000 11951 +linkscom 0 1 6.957497 0.000000 11952 +sxsriva 0 1 6.957497 0.000000 11953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..4647a17c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +avail 0 169 1.791759 0.000000 48 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +like 1 132 1.945910 1.945910 81 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +find 0 111 2.197225 0.000000 111 +version 0 113 2.197225 0.000000 122 +check 0 115 2.197225 0.000000 118 +part 0 98 2.302585 0.000000 129 +mani 1 92 2.397895 2.397895 150 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +graphic 0 90 2.397895 0.000000 147 +octob 0 89 2.397895 0.000000 156 +build 1 85 2.484907 2.484907 184 +second 0 81 2.484907 0.000000 166 +internet 0 83 2.484907 0.000000 186 +thing 0 84 2.484907 0.000000 189 +want 1 79 2.564949 2.564949 199 +good 0 77 2.564949 0.000000 200 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +involv 1 71 2.639057 2.639057 227 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +practic 0 70 2.708050 0.000000 246 +import 0 65 2.772589 0.000000 282 +septemb 0 65 2.772589 0.000000 274 +street 0 63 2.772589 0.000000 293 +colleg 0 61 2.833213 0.000000 300 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +unix 0 58 2.890372 0.000000 308 +found 1 53 2.944439 2.944439 337 +three 0 54 2.944439 0.000000 330 +run 0 51 2.995732 0.000000 347 +life 1 50 3.044522 3.044522 375 +effect 0 46 3.091042 0.000000 385 +made 0 44 3.135494 0.000000 398 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +around 0 43 3.178054 0.000000 415 +long 0 43 3.178054 0.000000 413 +music 0 42 3.218876 0.000000 436 +editor 0 41 3.218876 0.000000 433 +live 0 40 3.258097 0.000000 451 +mean 0 37 3.332205 0.000000 477 +field 0 37 3.332205 0.000000 482 +survei 0 35 3.401197 0.000000 513 +human 0 32 3.465736 0.000000 546 +independ 0 32 3.465736 0.000000 548 +becom 0 28 3.610918 0.000000 603 +quit 1 27 3.637586 3.637586 633 +though 0 27 3.637586 0.000000 622 +linux 0 27 3.637586 0.000000 631 +rather 0 26 3.688879 0.000000 642 +higher 0 24 3.761200 0.000000 690 +other 0 24 3.761200 0.000000 697 +wish 0 24 3.761200 0.000000 692 +almost 0 22 3.850148 0.000000 742 +self 0 22 3.850148 0.000000 761 +thu 1 21 3.912023 3.912023 773 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +nice 0 20 3.951244 0.000000 809 +supervis 0 20 3.951244 0.000000 840 +ever 0 19 4.007333 0.000000 872 +four 0 18 4.060443 0.000000 905 +listen 0 18 4.060443 0.000000 907 +concentr 0 18 4.060443 0.000000 906 +steven 1 17 4.110874 4.110874 953 +taiwan 0 16 4.174387 0.000000 1006 +hobbi 0 16 4.174387 0.000000 1009 +anyth 0 16 4.174387 0.000000 998 +intel 0 16 4.174387 0.000000 1000 +later 0 15 4.248495 0.000000 1043 +enough 0 15 4.248495 0.000000 1040 +becam 0 14 4.317488 0.000000 1117 +stori 0 14 4.317488 0.000000 1087 +draw 0 14 4.317488 0.000000 1086 +happi 0 14 4.317488 0.000000 1079 +audio 0 14 4.317488 0.000000 1094 +everyth 0 13 4.382027 0.000000 1169 +resolut 0 13 4.382027 0.000000 1172 +huang 1 12 4.465908 4.465908 1202 +career 1 12 4.465908 4.465908 1287 +promot 0 12 4.465908 0.000000 1235 +bruce 0 12 4.465908 0.000000 1226 +land 0 12 4.465908 0.000000 1273 +island 0 11 4.553877 0.000000 1345 +night 0 11 4.553877 0.000000 1319 +host 0 11 4.553877 0.000000 1306 +literatur 0 11 4.553877 0.000000 1300 +broad 0 11 4.553877 0.000000 1302 +ofcomput 1 10 4.653960 4.653960 1442 +rich 0 10 4.653960 0.000000 1396 +seven 1 9 4.753590 4.753590 1561 +calvin 0 9 4.753590 0.000000 1518 +face 0 9 4.753590 0.000000 1501 +ideal 0 8 4.875197 0.000000 1630 +film 0 8 4.875197 0.000000 1761 +exactli 0 7 5.010635 0.000000 1817 +hunt 0 7 5.010635 0.000000 1798 +brought 0 7 5.010635 0.000000 1925 +migrat 0 7 5.010635 0.000000 1851 +discoveri 0 7 5.010635 0.000000 1915 +channel 0 7 5.010635 0.000000 1836 +truth 0 6 5.164786 0.000000 2179 +parent 0 6 5.164786 0.000000 2204 +south 0 6 5.164786 0.000000 2167 +lucki 0 6 5.164786 0.000000 2163 +chat 0 6 5.164786 0.000000 2128 +railroad 0 6 5.164786 0.000000 2161 +myresum 0 6 5.164786 0.000000 2199 +freeli 0 6 5.164786 0.000000 2014 +ship 0 5 5.347108 0.000000 2534 +tang 0 5 5.347108 0.000000 2409 +knew 0 5 5.347108 0.000000 2445 +ofinterest 0 5 5.347108 0.000000 2323 +champion 0 4 5.568345 0.000000 2982 +proud 0 4 5.568345 0.000000 2918 +assur 0 4 5.568345 0.000000 2722 +somewhat 0 4 5.568345 0.000000 2659 +fulfil 0 4 5.568345 0.000000 2932 +hobb 0 4 5.568345 0.000000 2893 +children 0 3 5.857933 0.000000 3767 +talent 0 3 5.857933 0.000000 3768 +peac 0 3 5.857933 0.000000 3769 +pai 0 3 5.857933 0.000000 3672 +asid 0 3 5.857933 0.000000 3770 +lego 0 3 5.857933 0.000000 3188 +dick 0 2 6.263398 0.000000 5396 +tender 0 2 6.263398 0.000000 5397 +tropic 0 2 6.263398 0.000000 5398 +fifteen 0 2 6.263398 0.000000 5399 +marvel 0 2 6.263398 0.000000 5400 +defeat 0 2 6.263398 0.000000 5401 +reward 0 2 6.263398 0.000000 5402 +andwork 0 2 6.263398 0.000000 5403 +relai 0 2 6.263398 0.000000 5404 +weapon 0 2 6.263398 0.000000 5115 +spectrum 0 2 6.263398 0.000000 5405 +blobbi 0 2 6.263398 0.000000 4820 +huangszu 0 1 6.957497 0.000000 11954 +defend 0 1 6.957497 0.000000 11955 +justic 0 1 6.957497 0.000000 11956 +nevermind 0 1 6.957497 0.000000 11957 +iarriv 0 1 6.957497 0.000000 11958 +soundslik 0 1 6.957497 0.000000 11959 +mobi 0 1 6.957497 0.000000 11960 +nointent 0 1 6.957497 0.000000 11961 +whale 0 1 6.957497 0.000000 11962 +digress 0 1 6.957497 0.000000 11963 +wholefamili 0 1 6.957497 0.000000 11964 +philippin 0 1 6.957497 0.000000 11965 +aroundsix 0 1 6.957497 0.000000 11966 +fluentli 0 1 6.957497 0.000000 11967 +bilingu 0 1 6.957497 0.000000 11968 +thepoetri 0 1 6.957497 0.000000 11969 +dynasti 0 1 6.957497 0.000000 11970 +arabian 0 1 6.957497 0.000000 11971 +doveright 0 1 6.957497 0.000000 11972 +ienter 0 1 6.957497 0.000000 11973 +philippineswith 0 1 6.957497 0.000000 11974 +unabashedli 0 1 6.957497 0.000000 11975 +alsoin 0 1 6.957497 0.000000 11976 +whirlwind 0 1 6.957497 0.000000 11977 +awoman 0 1 6.957497 0.000000 11978 +effortlessli 0 1 6.957497 0.000000 11979 +eek 0 1 6.957497 0.000000 11980 +blunt 0 1 6.957497 0.000000 11981 +ienrol 0 1 6.957497 0.000000 11982 +segreg 0 1 6.957497 0.000000 11983 +everydaygeek 0 1 6.957497 0.000000 11984 +fromactu 0 1 6.957497 0.000000 11985 +happili 0 1 6.957497 0.000000 11986 +myspar 0 1 6.957497 0.000000 11987 +linuxnet 0 1 6.957497 0.000000 11988 +plastic 0 1 6.957497 0.000000 11989 +suspens 0 1 6.957497 0.000000 11990 +thriller 0 1 6.957497 0.000000 11991 +sting 0 1 6.957497 0.000000 11992 +sesam 0 1 6.957497 0.000000 11993 +offend 0 1 6.957497 0.000000 11994 +bysom 0 1 6.957497 0.000000 11995 +blatant 0 1 6.957497 0.000000 11996 +highlyinterest 0 1 6.957497 0.000000 11997 +compatiblecomput 0 1 6.957497 0.000000 11998 +metaballsund 0 1 6.957497 0.000000 11999 +techniquesin 0 1 6.957497 0.000000 12000 +andport 0 1 6.957497 0.000000 12001 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..93af6248 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +technolog 0 131 2.079442 0.000000 102 +move 0 47 3.091042 0.000000 382 +hybrid 0 15 4.248495 0.000000 1057 +henzing 0 3 5.857933 0.000000 3713 +hytechhytech 0 1 6.957497 0.000000 12002 +toolw 0 1 6.957497 0.000000 12003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..55a90da8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +analysi 1 124 2.079442 2.079442 98 +advanc 0 99 2.302585 0.000000 130 +real 1 93 2.397895 2.397895 144 +control 0 82 2.484907 0.000000 164 +logic 0 71 2.639057 0.000000 230 +septemb 0 65 2.772589 0.000000 274 +linear 0 41 3.218876 0.000000 431 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +symbol 0 27 3.637586 0.000000 620 +universityithaca 0 24 3.761200 0.000000 710 +methodolog 1 23 3.806662 3.806662 733 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +hybrid 1 15 4.248495 4.248495 1057 +eduphon 0 15 4.248495 0.000000 1060 +transit 0 15 4.248495 0.000000 1046 +embed 1 14 4.317488 4.317488 1102 +automata 1 13 4.382027 4.382027 1135 +modul 0 10 4.653960 0.000000 1434 +departmentcornel 0 5 5.347108 0.000000 2275 +henzing 0 3 5.857933 0.000000 3713 +professorcomput 0 3 5.857933 0.000000 3714 +worldwid 0 3 5.857933 0.000000 3704 +checker 0 3 5.857933 0.000000 3644 +systemsr 0 2 6.263398 0.000000 4312 +bibtex 0 2 6.263398 0.000000 5406 +henzingerthoma 0 1 6.957497 0.000000 12004 +movedassist 0 1 6.957497 0.000000 12005 +researchform 0 1 6.957497 0.000000 12006 +researchat 0 1 6.957497 0.000000 12007 +cornelland 0 1 6.957497 0.000000 12008 +resumepublicationsreact 0 1 6.957497 0.000000 12009 +systemsclock 0 1 6.957497 0.000000 12010 +systemshybrid 0 1 6.957497 0.000000 12011 +systemsbibliographi 0 1 6.957497 0.000000 12012 +publicationstoolshytech 0 1 6.957497 0.000000 12013 +systemscoursesc 0 1 6.957497 0.000000 12014 +languagesconferenceshybrid 0 1 6.957497 0.000000 12015 +systemscav 0 1 6.957497 0.000000 12016 +verificationlast 0 1 6.957497 0.000000 12017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..0a8779e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +manag 0 114 2.197225 0.000000 125 +homepag 0 93 2.397895 0.000000 148 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +upson 0 71 2.639057 0.000000 218 +previou 0 62 2.772589 0.000000 290 +life 0 50 3.044522 0.000000 375 +eduoffic 0 33 3.433987 0.000000 531 +quot 0 29 3.583519 0.000000 582 +psycholog 0 15 4.248495 0.000000 1054 +horu 0 14 4.317488 0.000000 1116 +social 0 13 4.382027 0.000000 1123 +reness 0 11 4.553877 0.000000 1333 +hockei 0 8 4.875197 0.000000 1760 +atcornel 0 6 5.164786 0.000000 2131 +advis 0 6 5.164786 0.000000 2173 +hickei 1 4 5.568345 5.568345 2845 +hallphon 0 4 5.568345 0.000000 2900 +schneider 0 4 5.568345 0.000000 2868 +takako 1 3 5.857933 5.857933 3538 +backcountri 0 3 5.857933 0.000000 3686 +byrobbert 0 1 6.957497 0.000000 12018 +andfr 0 1 6.957497 0.000000 12019 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..d665555f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +environ 0 84 2.484907 0.000000 177 +editor 0 41 3.218876 0.000000 433 +transform 0 32 3.465736 0.000000 542 +attribut 0 14 4.317488 0.000000 1092 +eduresearch 0 6 5.164786 0.000000 2205 +increment 0 6 5.164786 0.000000 2206 +grammar 0 6 5.164786 0.000000 2058 +tim_teitelbaum 0 1 6.957497 0.000000 12020 +teitelbaumassoci 0 1 6.957497 0.000000 12021 +adavita 0 1 6.957497 0.000000 12022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..71a37343 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +parallel 2 169 1.791759 3.583518 60 +network 1 168 1.791759 1.791759 61 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +architectur 2 139 1.945910 3.891820 77 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +lectur 0 135 1.945910 0.000000 73 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +analysi 0 124 2.079442 0.000000 98 +version 1 113 2.197225 2.197225 122 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +control 1 82 2.484907 2.484907 164 +ieee 0 86 2.484907 0.000000 190 +journal 0 83 2.484907 0.000000 183 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +upson 0 71 2.639057 0.000000 218 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +abstract 1 62 2.772589 2.772589 276 +evalu 0 64 2.772589 0.000000 266 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +automat 0 61 2.833213 0.000000 306 +juli 0 60 2.833213 0.000000 305 +sever 1 56 2.890372 2.890372 322 +think 0 57 2.890372 0.000000 314 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +talk 1 53 2.944439 2.944439 336 +run 0 51 2.995732 0.000000 347 +digit 0 52 2.995732 0.000000 348 +without 0 50 3.044522 0.000000 370 +principl 0 48 3.044522 0.000000 357 +california 0 46 3.091042 0.000000 388 +mechan 1 43 3.178054 3.178054 416 +offer 0 43 3.178054 0.000000 414 +annual 1 40 3.258097 3.258097 458 +slide 1 38 3.295837 3.295837 467 +workstat 1 37 3.332205 3.332205 479 +extend 0 32 3.465736 0.000000 539 +platform 0 29 3.583519 0.000000 591 +limit 0 29 3.583519 0.000000 585 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 0 28 3.610918 0.000000 605 +proc 1 26 3.688879 3.688879 649 +berkelei 1 26 3.688879 3.688879 657 +supercomput 0 25 3.737670 0.000000 681 +toward 0 25 3.737670 0.000000 668 +fundament 0 25 3.737670 0.000000 661 +magazin 0 24 3.761200 0.000000 704 +lead 0 23 3.806662 0.000000 718 +thread 0 23 3.806662 0.000000 722 +fine 1 20 3.951244 3.951244 822 +department 0 20 3.951244 0.000000 839 +speed 0 18 4.060443 0.000000 911 +interconnect 1 17 4.110874 4.110874 937 +diego 1 16 4.174387 4.174387 992 +latenc 0 16 4.174387 0.000000 993 +fourth 0 16 4.174387 0.000000 999 +cambridg 0 16 4.174387 0.000000 1008 +month 0 15 4.248495 0.000000 1025 +micro 0 15 4.248495 0.000000 1031 +split 1 14 4.317488 4.317488 1078 +eicken 2 13 4.382027 8.764054 1134 +thorsten 1 13 4.382027 4.382027 1133 +conf 1 13 4.382027 4.382027 1181 +sigplan 0 13 4.382027 0.000000 1190 +guest 0 12 4.465908 0.000000 1220 +multithread 1 11 4.553877 4.553877 1315 +bandwidth 0 11 4.553877 0.000000 1365 +grain 1 10 4.653960 4.653960 1448 +werner 0 10 4.653960 0.000000 1385 +santa 0 10 4.653960 0.000000 1441 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +patterson 0 9 4.753590 0.000000 1554 +port 1 8 4.875197 4.875197 1766 +entri 1 8 4.875197 4.875197 1678 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +vineet 0 8 4.875197 0.000000 1639 +vogel 0 8 4.875197 0.000000 1622 +andcomput 0 8 4.875197 0.000000 1623 +realist 0 8 4.875197 0.000000 1665 +goldstein 1 6 5.164786 5.164786 2168 +forum 1 6 5.164786 5.164786 2027 +fish 0 6 5.164786 0.000000 2207 +greec 0 6 5.164786 0.000000 2208 +culler 2 5 5.347108 10.694216 2381 +symp 1 5 5.347108 5.347108 2376 +buch 1 5 5.347108 5.347108 2272 +australia 1 5 5.347108 5.347108 2478 +water 0 5 5.347108 0.000000 2535 +plant 0 5 5.347108 0.000000 2497 +dataflow 0 5 5.347108 0.000000 2390 +karp 0 5 5.347108 0.000000 2284 +ifip 0 5 5.347108 0.000000 2459 +basu 1 4 5.568345 5.568345 2843 +hallphon 0 4 5.568345 0.000000 2900 +coursesc 0 4 5.568345 0.000000 2692 +tire 0 4 5.568345 0.000000 2799 +password 0 4 5.568345 0.000000 2594 +medium 0 4 5.568345 0.000000 2834 +schauser 2 3 5.857933 11.715866 3599 +interfacefor 0 3 5.857933 0.000000 3534 +frontier 0 3 5.857933 0.000000 3771 +anindya 0 3 5.857933 0.000000 3535 +avula 0 3 5.857933 0.000000 3600 +abridg 0 3 5.857933 0.000000 3772 +dusseau 0 3 5.857933 0.000000 3382 +yelick 0 3 5.857933 0.000000 3374 +crete 0 3 5.857933 0.000000 3773 +lan 0 2 6.263398 0.000000 4359 +includingth 0 2 6.263398 0.000000 4493 +pond 0 2 6.263398 0.000000 5127 +firewal 0 2 6.263398 0.000000 5407 +distributedcomput 0 2 6.263398 0.000000 5336 +communicationarchitectur 0 2 6.263398 0.000000 4859 +krishnamurthi 0 2 6.263398 0.000000 5408 +lumetta 0 2 6.263398 0.000000 5409 +dalli 0 2 6.263398 0.000000 4517 +logp 0 2 6.263398 0.000000 4227 +orlando 0 2 6.263398 0.000000 5410 +clara 0 2 6.263398 0.000000 4958 +barrera 0 2 6.263398 0.000000 4309 +departement 1 1 6.957497 6.957497 12023 +eickenassist 0 1 6.957497 0.000000 12024 +eduprojectsth 0 1 6.957497 0.000000 12025 +architectureprovid 0 1 6.957497 0.000000 12026 +latencyand 0 1 6.957497 0.000000 12027 +currentimplement 0 1 6.957497 0.000000 12028 +tonon 0 1 6.957497 0.000000 12029 +spmd 0 1 6.957497 0.000000 12030 +extensionto 0 1 6.957497 0.000000 12031 +newplatform 0 1 6.957497 0.000000 12032 +multprocessor 0 1 6.957497 0.000000 12033 +computerorgan 0 1 6.957497 0.000000 12034 +maynd 0 1 6.957497 0.000000 12035 +pagestv 0 1 6.957497 0.000000 12036 +macpppwhich 0 1 6.957497 0.000000 12037 +everhav 0 1 6.957497 0.000000 12038 +passwordssuddenli 0 1 6.957497 0.000000 12039 +installationinstruct 0 1 6.957497 0.000000 12040 +publicationsu 0 1 6.957497 0.000000 12041 +atmnetwork 0 1 6.957497 0.000000 12042 +controlledthread 0 1 6.957497 0.000000 12043 +spertu 0 1 6.957497 0.000000 12044 +modelof 0 1 6.957497 0.000000 12045 +sahai 0 1 6.957497 0.000000 12046 +santo 0 1 6.957497 0.000000 12047 +subramonian 0 1 6.957497 0.000000 12048 +dataflowmultiprocess 0 1 6.957497 0.000000 12049 +forintegr 0 1 6.957497 0.000000 12050 +forleni 0 1 6.957497 0.000000 12051 +minimalhardwar 0 1 6.957497 0.000000 12052 +wawrzynek 0 1 6.957497 0.000000 12053 +architecturesfor 0 1 6.957497 0.000000 12054 +saavedra 0 1 6.957497 0.000000 12055 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..b07a9661 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +contact 0 153 1.791759 0.000000 59 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +pleas 0 113 2.197225 0.000000 114 +real 0 93 2.397895 0.000000 144 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +appli 0 71 2.639057 0.000000 226 +date 0 51 2.995732 0.000000 344 +done 0 47 3.091042 0.000000 381 +often 0 31 3.496508 0.000000 551 +actual 0 28 3.610918 0.000000 604 +enjoi 0 26 3.688879 0.000000 660 +assum 0 19 4.007333 0.000000 845 +incomput 0 14 4.317488 0.000000 1096 +touch 0 12 4.465908 0.000000 1288 +moment 0 11 4.553877 0.000000 1379 +apart 0 7 5.010635 0.000000 1936 +somewhat 0 4 5.568345 0.000000 2659 +disclaim 0 4 5.568345 0.000000 2847 +erlingsson 1 2 6.263398 6.263398 4107 +lfar 1 2 6.263398 6.263398 4106 +pagelfar 0 1 6.957497 0.000000 12056 +specificationi 0 1 6.957497 0.000000 12057 +incongruousiceland 0 1 6.957497 0.000000 12058 +implementationbackgroundwher 0 1 6.957497 0.000000 12059 +activitieswhat 0 1 6.957497 0.000000 12060 +schedulewher 0 1 6.957497 0.000000 12061 +researchwhat 0 1 6.957497 0.000000 12062 +interestswhat 0 1 6.957497 0.000000 12063 +acquaintancesthos 0 1 6.957497 0.000000 12064 +infohow 0 1 6.957497 0.000000 12065 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..37bccd65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +relat 0 139 1.945910 0.000000 68 +model 0 145 1.945910 0.000000 69 +center 0 88 2.397895 0.000000 158 +novemb 0 81 2.484907 0.000000 179 +resourc 0 81 2.484907 0.000000 172 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +ithaca 0 65 2.772589 0.000000 294 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +run 0 51 2.995732 0.000000 347 +robert 0 30 3.555348 0.000000 567 +packag 1 28 3.610918 3.610918 614 +releas 1 28 3.610918 3.610918 616 +geometri 0 22 3.850148 0.000000 752 +geometr 1 19 4.007333 4.007333 852 +element 1 18 4.060443 4.060443 895 +finit 1 14 4.317488 4.317488 1106 +jonathan 0 13 4.382027 0.000000 1174 +mesh 1 11 4.553877 4.553877 1351 +stephen 0 11 4.553877 0.000000 1342 +solver 0 7 5.010635 0.000000 1911 +minnesota 0 5 5.347108 0.000000 2469 +websit 0 4 5.568345 0.000000 2726 +schneider 0 4 5.568345 0.000000 2868 +vavasi 1 3 5.857933 5.857933 3526 +threedimens 0 1 6.957497 0.000000 12066 +themesh 0 1 6.957497 0.000000 12067 +softwaredownload 0 1 6.957497 0.000000 12068 +andqmg 0 1 6.957497 0.000000 12069 +mcphedran 0 1 6.957497 0.000000 12070 +offinit 0 1 6.957497 0.000000 12071 +ofsoftwar 0 1 6.957497 0.000000 12072 +computationalgeometri 0 1 6.957497 0.000000 12073 +shewchuk 0 1 6.957497 0.000000 12074 +triangl 0 1 6.957497 0.000000 12075 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..2e7a36a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +algorithm 1 162 1.791759 1.791759 57 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +base 0 165 1.791759 0.000000 50 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +object 0 138 1.945910 0.000000 79 +analysi 1 124 2.079442 2.079442 98 +document 0 121 2.079442 0.000000 89 +code 1 108 2.197225 2.197225 116 +well 1 109 2.197225 2.197225 121 +pleas 0 113 2.197225 0.000000 114 +specif 0 106 2.197225 0.000000 106 +intern 0 108 2.197225 0.000000 128 +associ 0 93 2.397895 0.000000 151 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +level 1 87 2.484907 2.484907 180 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +optim 0 79 2.564949 0.000000 197 +line 1 75 2.639057 2.639057 231 +nation 0 74 2.639057 0.000000 240 +solv 0 73 2.639057 0.000000 234 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +ithaca 0 65 2.772589 0.000000 294 +laboratori 0 63 2.772589 0.000000 292 +complex 0 64 2.772589 0.000000 269 +creat 0 63 2.772589 0.000000 277 +unix 0 58 2.890372 0.000000 308 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +numer 1 49 3.044522 3.044522 369 +effect 0 46 3.091042 0.000000 385 +featur 0 46 3.091042 0.000000 386 +map 0 39 3.258097 0.000000 452 +annual 0 40 3.258097 0.000000 458 +microsoft 0 38 3.295837 0.000000 468 +least 0 35 3.401197 0.000000 516 +domain 0 30 3.555348 0.000000 564 +depend 0 29 3.583519 0.000000 583 +packag 1 28 3.610918 3.610918 614 +releas 0 28 3.610918 0.000000 616 +bound 0 26 3.688879 0.000000 659 +valu 0 25 3.737670 0.000000 665 +aspect 0 25 3.737670 0.000000 663 +period 0 22 3.850148 0.000000 743 +divis 0 21 3.912023 0.000000 803 +grad 0 20 3.951244 0.000000 837 +geometr 0 19 4.007333 0.000000 852 +element 0 18 4.060443 0.000000 895 +scott 0 18 4.060443 0.000000 884 +matrix 0 17 4.110874 0.000000 933 +anonym 1 14 4.317488 4.317488 1100 +matlab 1 14 4.317488 4.317488 1081 +squar 0 14 4.317488 0.000000 1082 +finit 0 14 4.317488 0.000000 1106 +topolog 0 14 4.317488 0.000000 1089 +whose 0 13 4.382027 0.000000 1166 +forth 0 13 4.382027 0.000000 1186 +weight 0 12 4.465908 0.000000 1204 +mesh 1 11 4.553877 4.553877 1351 +stephen 1 11 4.553877 4.553877 1342 +faster 0 11 4.553877 0.000000 1323 +rhode 0 9 4.753590 0.000000 1579 +cross 0 8 4.875197 0.000000 1703 +boundari 1 7 5.010635 5.010635 1929 +aris 0 7 5.010635 0.000000 1924 +dimens 0 7 5.010635 0.000000 1930 +argonn 1 5 5.347108 5.347108 2461 +colleagu 0 5 5.347108 0.000000 2304 +dual 0 5 5.347108 0.000000 2522 +hole 0 5 5.347108 0.000000 2518 +compat 0 5 5.347108 0.000000 2485 +ratio 1 4 5.568345 5.568345 2942 +triangul 1 4 5.568345 5.568345 2903 +bldg 0 4 5.568345 0.000000 2983 +manuscript 0 4 5.568345 0.000000 2750 +orthogon 0 4 5.568345 0.000000 2832 +conform 0 4 5.568345 0.000000 2941 +vrml 0 4 5.568345 0.000000 2949 +vavasi 2 3 5.857933 11.715866 3526 +aren 0 3 5.857933 0.000000 3512 +trefethen 0 3 5.857933 0.000000 3528 +hough 0 3 5.857933 0.000000 3527 +delaunai 0 3 5.857933 0.000000 3619 +ellipt 0 3 5.857933 0.000000 3774 +cleaner 0 3 5.857933 0.000000 3775 +mitchel 1 2 6.263398 6.263398 4792 +acceler 0 2 6.263398 0.000000 5411 +driscol 0 2 6.263398 0.000000 4836 +polyhedr 0 2 6.263398 0.000000 5412 +andautomat 0 2 6.263398 0.000000 5413 +onsabbat 0 1 6.957497 0.000000 12076 +cass 0 1 6.957497 0.000000 12077 +tsure 0 1 6.957497 0.000000 12078 +essaybi 0 1 6.957497 0.000000 12079 +issuesnumer 0 1 6.957497 0.000000 12080 +problemsgeometr 0 1 6.957497 0.000000 12081 +computingspars 0 1 6.957497 0.000000 12082 +computationsi 0 1 6.957497 0.000000 12083 +primal 0 1 6.957497 0.000000 12084 +interiorpoint 0 1 6.957497 0.000000 12085 +decompositionfor 0 1 6.957497 0.000000 12086 +gridcut 0 1 6.957497 0.000000 12087 +hyperplan 0 1 6.957497 0.000000 12088 +packagei 0 1 6.957497 0.000000 12089 +verycompl 0 1 6.957497 0.000000 12090 +unstructuredtetrahedr 0 1 6.957497 0.000000 12091 +boundaryvalu 0 1 6.957497 0.000000 12092 +iswritten 0 1 6.957497 0.000000 12093 +distributedfor 0 1 6.957497 0.000000 12094 +distributionbegan 0 1 6.957497 0.000000 12095 +manyimprov 0 1 6.957497 0.000000 12096 +compatibilitywith 0 1 6.957497 0.000000 12097 +pleasese 0 1 6.957497 0.000000 12098 +reportback 0 1 6.957497 0.000000 12099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..b8f38261 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +support 0 132 1.945910 0.000000 83 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +browser 0 56 2.890372 0.000000 313 +netscap 0 44 3.135494 0.000000 395 +frame 0 24 3.761200 0.000000 684 +higher 0 24 3.761200 0.000000 690 +arun 0 4 5.568345 0.000000 2736 +verma 0 2 6.263398 0.000000 4341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..13275550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +us 0 329 1.098612 0.000000 16 +read 0 154 1.791759 0.000000 47 +browser 0 56 2.890372 0.000000 313 +date 0 51 2.995732 0.000000 344 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +thank 0 23 3.806662 0.000000 721 +vinc 0 2 6.263398 0.000000 5414 +suck 0 2 6.263398 0.000000 5232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..de31ef95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +engin 0 297 1.098612 0.000000 20 +cornel 0 215 1.386294 0.000000 23 +databas 0 122 2.079442 0.000000 86 +advanc 0 99 2.302585 0.000000 130 +start 0 83 2.484907 0.000000 173 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +multimedia 0 68 2.708050 0.000000 258 +give 0 50 3.044522 0.000000 359 +break 0 20 3.951244 0.000000 812 +pagec 1 15 4.248495 4.248495 1011 +pageer 0 3 5.857933 0.000000 3776 +vitrano 1 1 6.957497 6.957497 12100 +pagehei 0 1 6.957497 0.000000 12101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..72dd9e97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +cornel 0 215 1.386294 0.000000 23 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +specif 1 106 2.197225 2.197225 106 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +info 0 85 2.484907 0.000000 176 +larg 0 82 2.484907 0.000000 168 +know 1 80 2.564949 2.564949 198 +good 0 77 2.564949 0.000000 200 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +window 0 68 2.708050 0.000000 242 +function 0 62 2.772589 0.000000 275 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +back 0 60 2.833213 0.000000 297 +browser 1 56 2.890372 2.890372 313 +think 0 57 2.890372 0.000000 314 +maintain 0 51 2.995732 0.000000 342 +right 0 48 3.044522 0.000000 363 +visitor 0 49 3.044522 0.000000 371 +life 0 50 3.044522 0.000000 375 +effect 0 46 3.091042 0.000000 385 +favorit 0 44 3.135494 0.000000 410 +made 0 44 3.135494 0.000000 398 +around 0 43 3.178054 0.000000 415 +might 0 41 3.218876 0.000000 426 +small 0 39 3.258097 0.000000 447 +respons 0 37 3.332205 0.000000 476 +expect 0 37 3.332205 0.000000 484 +copyright 0 36 3.367296 0.000000 495 +word 0 34 3.401197 0.000000 508 +given 0 32 3.465736 0.000000 538 +anim 1 31 3.496508 3.496508 557 +someth 0 31 3.496508 0.000000 554 +held 0 28 3.610918 0.000000 600 +relev 0 26 3.688879 0.000000 637 +reach 1 24 3.761200 3.761200 688 +yahoo 0 24 3.761200 0.000000 707 +wonder 1 20 3.951244 3.951244 815 +reserv 0 20 3.951244 0.000000 808 +applet 0 20 3.951244 0.000000 827 +qualiti 0 20 3.951244 0.000000 832 +els 0 19 4.007333 0.000000 843 +capabl 0 15 4.248495 0.000000 1016 +happi 0 14 4.317488 0.000000 1079 +deriv 0 13 4.382027 0.000000 1145 +clock 0 11 4.553877 0.000000 1320 +regard 0 11 4.553877 0.000000 1309 +vista 0 10 4.653960 0.000000 1452 +ground 0 7 5.010635 0.000000 1955 +usabl 0 7 5.010635 0.000000 1810 +heavi 0 7 5.010635 0.000000 1841 +usag 0 6 5.164786 0.000000 2209 +vivek 0 6 5.164786 0.000000 2210 +promis 0 6 5.164786 0.000000 2037 +million 0 5 5.347108 0.000000 2495 +settimeout 0 5 5.347108 0.000000 2536 +wast 0 5 5.347108 0.000000 2537 +seed 1 4 5.568345 5.568345 2984 +timertwo 0 4 5.568345 0.000000 2985 +transmit 0 4 5.568345 0.000000 2835 +fulli 0 4 5.568345 0.000000 2986 +dont 1 3 5.857933 5.857933 3473 +impli 0 3 5.857933 0.000000 3348 +kolla 1 1 6.957497 6.957497 12102 +scrollit 1 1 6.957497 6.957497 12103 +unwant 0 1 6.957497 0.000000 12104 +warrante 0 1 6.957497 0.000000 12105 +zillion 0 1 6.957497 0.000000 12106 +thoughtsfriend 0 1 6.957497 0.000000 12107 +foeslinksa 0 1 6.957497 0.000000 12108 +tryalta 0 1 6.957497 0.000000 12109 +theinktomiresumein 0 1 6.957497 0.000000 12110 +htmlin 0 1 6.957497 0.000000 12111 +postscriptin 0 1 6.957497 0.000000 12112 +perfectin 0 1 6.957497 0.000000 12113 +asciith 0 1 6.957497 0.000000 12114 +wanna 0 1 6.957497 0.000000 12115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..5b3615f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +number 1 130 2.079442 2.079442 97 +compil 0 122 2.079442 0.000000 96 +high 0 130 2.079442 0.000000 101 +find 1 111 2.197225 2.197225 111 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +comment 0 93 2.397895 0.000000 146 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +member 0 84 2.484907 0.000000 165 +solut 0 82 2.484907 0.000000 162 +david 0 71 2.639057 0.000000 232 +differ 0 66 2.708050 0.000000 253 +prof 0 64 2.772589 0.000000 273 +function 0 62 2.772589 0.000000 275 +simpl 0 60 2.833213 0.000000 298 +particular 0 51 2.995732 0.000000 352 +friend 1 48 3.044522 3.044522 376 +might 0 41 3.218876 0.000000 426 +paul 0 38 3.295837 0.000000 471 +everi 1 34 3.401197 3.401197 519 +either 0 35 3.401197 0.000000 506 +seem 0 18 4.060443 0.000000 899 +matrix 0 17 4.110874 0.000000 933 +spars 0 16 4.174387 0.000000 989 +difficulti 1 13 4.382027 4.382027 1132 +guess 0 10 4.653960 0.000000 1443 +henri 0 10 4.653960 0.000000 1417 +said 1 9 4.753590 4.753590 1571 +govern 0 9 4.753590 0.000000 1581 +judg 0 8 4.875197 0.000000 1644 +cornellunivers 0 7 5.010635 0.000000 1916 +keshav 0 7 5.010635 0.000000 1852 +remind 0 7 5.010635 0.000000 1799 +encrypt 0 7 5.010635 0.000000 1835 +legal 0 6 5.164786 0.000000 2094 +privaci 0 6 5.164786 0.000000 2144 +strong 0 6 5.164786 0.000000 2029 +lawyer 1 4 5.568345 5.568345 2836 +pingali 0 4 5.568345 0.000000 2956 +bernoulli 0 4 5.568345 0.000000 2955 +stodghil 0 4 5.568345 0.000000 2864 +lord 0 4 5.568345 0.000000 2906 +wherea 0 4 5.568345 0.000000 2597 +functionof 0 2 6.263398 0.000000 5415 +todayth 0 2 6.263398 0.000000 5416 +vlad 0 1 6.957497 0.000000 12116 +pagevladimir 0 1 6.957497 0.000000 12117 +kotlyarvladimir 0 1 6.957497 0.000000 12118 +wereteach 0 1 6.957497 0.000000 12119 +andindu 0 1 6.957497 0.000000 12120 +kodukulapubl 0 1 6.957497 0.000000 12121 +kissing 0 1 6.957497 0.000000 12122 +profess 0 1 6.957497 0.000000 12123 +abritish 0 1 6.957497 0.000000 12124 +sveri 0 1 6.957497 0.000000 12125 +den 0 1 6.957497 0.000000 12126 +asolut 0 1 6.957497 0.000000 12127 +outpac 0 1 6.957497 0.000000 12128 +ofsolut 0 1 6.957497 0.000000 12129 +hardenough 0 1 6.957497 0.000000 12130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..7bdb0581 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +address 1 170 1.791759 1.791759 62 +hall 0 146 1.945910 0.000000 65 +august 0 66 2.708050 0.000000 257 +ithaca 1 65 2.772589 2.772589 294 +mapl 0 11 4.553877 0.000000 1376 +rhode 0 9 4.753590 0.000000 1579 +vijai 0 4 5.568345 0.000000 2960 +menon 0 2 6.263398 0.000000 5249 +menonvijai 0 1 6.957497 0.000000 12131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..2dca0419 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +academ 0 82 2.484907 0.000000 178 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +august 0 66 2.708050 0.000000 257 +third 0 43 3.178054 0.000000 412 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +computersci 0 30 3.555348 0.000000 562 +detect 0 26 3.688879 0.000000 646 +bookmark 0 26 3.688879 0.000000 639 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +chen 1 21 3.912023 3.912023 791 +beij 0 19 4.007333 0.000000 876 +failur 0 18 4.060443 0.000000 898 +tsinghua 0 13 4.382027 0.000000 1195 +soccer 0 8 4.875197 0.000000 1752 +spare 1 6 5.164786 5.164786 2177 +distributedsystem 0 6 5.164786 0.000000 2022 +membership 0 3 5.857933 0.000000 3751 +bachelorand 0 2 6.263398 0.000000 5128 +chinami 0 2 6.263398 0.000000 5129 +toueg 0 2 6.263398 0.000000 5339 +pagewei 0 1 6.957497 0.000000 12132 +weichen 0 1 6.957497 0.000000 12133 +inpartition 0 1 6.957497 0.000000 12134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..2db41f63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +site 0 106 2.197225 0.000000 119 +homepag 0 93 2.397895 0.000000 148 +window 0 68 2.708050 0.000000 242 +movi 0 40 3.258097 0.000000 459 +newspap 0 12 4.465908 0.000000 1280 +weitsang 0 2 6.263398 0.000000 4088 +lwhere 0 1 6.957497 0.000000 12135 +fromwhat 0 1 6.957497 0.000000 12136 +watchwhat 0 1 6.957497 0.000000 12137 +likec 0 1 6.957497 0.000000 12138 +wrotepictur 0 1 6.957497 0.000000 12139 +drawa 0 1 6.957497 0.000000 12140 +motifcomput 0 1 6.957497 0.000000 12141 +theoryhom 0 1 6.957497 0.000000 12142 +vimi 0 1 6.957497 0.000000 12143 +tsearch 0 1 6.957497 0.000000 12144 +webcoolest 0 1 6.957497 0.000000 12145 +sitessharewar 0 1 6.957497 0.000000 12146 +archivem 0 1 6.957497 0.000000 12147 +onlineunivers 0 1 6.957497 0.000000 12148 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..ea098726 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +develop 0 174 1.791759 0.000000 53 +relat 0 139 1.945910 0.000000 68 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +final 0 116 2.197225 0.000000 108 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +academ 0 82 2.484907 0.000000 178 +resum 0 79 2.564949 0.000000 217 +nation 0 74 2.639057 0.000000 240 +multimedia 1 68 2.708050 2.708050 258 +window 0 68 2.708050 0.000000 242 +java 0 70 2.708050 0.000000 248 +ithaca 0 65 2.772589 0.000000 294 +plan 0 65 2.772589 0.000000 272 +result 0 65 2.772589 0.000000 281 +extens 0 53 2.944439 0.000000 340 +tabl 0 51 2.995732 0.000000 346 +telephon 0 50 3.044522 0.000000 373 +still 0 50 3.044522 0.000000 362 +understand 0 47 3.091042 0.000000 384 +favorit 1 44 3.135494 3.135494 410 +vision 0 41 3.218876 0.000000 430 +field 0 37 3.332205 0.000000 482 +photo 0 31 3.496508 0.000000 561 +except 0 28 3.610918 0.000000 607 +team 0 27 3.637586 0.000000 625 +background 0 25 3.737670 0.000000 664 +sport 0 25 3.737670 0.000000 683 +other 0 24 3.761200 0.000000 697 +tenni 1 20 3.951244 3.951244 838 +partial 0 18 4.060443 0.000000 900 +taiwan 0 16 4.174387 0.000000 1006 +drive 0 15 4.248495 0.000000 1052 +avenu 0 12 4.465908 0.000000 1277 +basketbal 0 12 4.465908 0.000000 1289 +danc 0 12 4.465908 0.000000 1278 +skill 0 12 4.465908 0.000000 1205 +calcul 0 12 4.465908 0.000000 1268 +meng 0 12 4.465908 0.000000 1214 +mapl 0 11 4.553877 0.000000 1376 +magic 0 11 4.553877 0.000000 1358 +player 0 11 4.553877 0.000000 1371 +market 0 11 4.553877 0.000000 1361 +swim 0 9 4.753590 0.000000 1599 +volleybal 0 9 4.753590 0.000000 1598 +rivl 0 8 4.875197 0.000000 1632 +job 0 8 4.875197 0.000000 1702 +morph 0 7 5.010635 0.000000 1937 +financi 0 6 5.164786 0.000000 2197 +sing 0 5 5.347108 0.000000 2499 +hung 1 3 5.857933 5.857933 3524 +habit 0 3 5.857933 0.000000 3777 +atlanta 0 3 5.857933 0.000000 3778 +bowl 0 2 6.263398 0.000000 5417 +orlando 0 2 6.263398 0.000000 5410 +glavin 1 1 6.957497 6.957497 12149 +billiard 0 1 6.957497 0.000000 12150 +brave 0 1 6.957497 0.000000 12151 +anferne 0 1 6.957497 0.000000 12152 +hardawai 0 1 6.957497 0.000000 12153 +warp 0 1 6.957497 0.000000 12154 +webpaint 0 1 6.957497 0.000000 12155 +whkao 0 1 6.957497 0.000000 12156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..d313813f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +present 0 91 2.397895 0.000000 145 +search 0 95 2.397895 0.000000 155 +school 1 84 2.484907 2.484907 188 +west 0 83 2.484907 0.000000 192 +optim 1 79 2.564949 2.564949 197 +master 0 76 2.564949 0.000000 216 +write 0 72 2.639057 0.000000 222 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +practic 0 70 2.708050 0.000000 246 +collect 1 65 2.772589 2.772589 268 +copi 0 63 2.772589 0.000000 284 +plai 0 60 2.833213 0.000000 307 +type 0 61 2.833213 0.000000 296 +reason 0 57 2.890372 0.000000 318 +week 0 52 2.995732 0.000000 343 +visitor 0 49 3.044522 0.000000 371 +cool 0 49 3.044522 0.000000 374 +could 0 46 3.091042 0.000000 383 +get 0 46 3.091042 0.000000 380 +even 0 45 3.135494 0.000000 393 +york 0 41 3.218876 0.000000 435 +probabl 1 40 3.258097 3.258097 455 +live 0 40 3.258097 0.000000 451 +seminar 0 38 3.295837 0.000000 470 +formal 0 37 3.332205 0.000000 478 +robot 0 36 3.367296 0.000000 497 +campu 0 27 3.637586 0.000000 623 +enjoi 0 26 3.688879 0.000000 660 +spent 0 25 3.737670 0.000000 676 +never 0 25 3.737670 0.000000 671 +william 1 22 3.850148 3.850148 765 +mpeg 1 20 3.951244 3.951244 831 +tenni 1 20 3.951244 3.951244 838 +minut 0 20 3.951244 0.000000 810 +wrote 0 20 3.951244 0.000000 830 +beauti 0 18 4.060443 0.000000 912 +weekli 0 17 4.110874 0.000000 919 +match 0 16 4.174387 0.000000 965 +practicum 0 16 4.174387 0.000000 960 +princeton 0 15 4.248495 0.000000 1042 +mellon 0 13 4.382027 0.000000 1179 +scienceat 0 11 4.553877 0.000000 1375 +systemsc 0 11 4.553877 0.000000 1293 +mapl 0 11 4.553877 0.000000 1376 +awai 0 10 4.653960 0.000000 1447 +guess 0 10 4.653960 0.000000 1443 +jersei 0 9 4.753590 0.000000 1587 +motorola 0 9 4.753590 0.000000 1546 +besid 0 8 4.875197 0.000000 1681 +partner 0 8 4.875197 0.000000 1648 +parti 0 8 4.875197 0.000000 1676 +on 0 8 4.875197 0.000000 1628 +south 1 6 5.164786 5.164786 2167 +piano 1 6 5.164786 5.164786 2201 +sleep 0 6 5.164786 0.000000 2211 +florida 1 5 5.347108 5.347108 2526 +compet 0 5 5.347108 0.000000 2462 +coral 0 5 5.347108 0.000000 2538 +quantifi 0 5 5.347108 0.000000 2525 +thrive 0 5 5.347108 0.000000 2257 +revolut 0 5 5.347108 0.000000 2315 +encod 1 4 5.568345 5.568345 2929 +somehow 0 4 5.568345 0.000000 2974 +essai 0 4 5.568345 0.000000 2948 +wart 0 4 5.568345 0.000000 2987 +classesc 1 3 5.857933 5.857933 3681 +exit 0 3 5.857933 0.000000 3124 +engineeringand 0 3 5.857933 0.000000 3779 +hpux 0 3 5.857933 0.000000 3780 +sector 0 3 5.857933 0.000000 3766 +cornellopoli 1 2 6.263398 6.263398 5157 +chopin 0 2 6.263398 0.000000 5358 +reject 0 2 6.263398 0.000000 5418 +techniquec 0 2 6.263398 0.000000 5158 +methodsc 0 2 6.263398 0.000000 5159 +colloquiumc 0 2 6.263398 0.000000 5160 +computingc 0 2 6.263398 0.000000 5216 +concerto 1 1 6.957497 6.957497 12157 +sciencefrom 0 1 6.957497 0.000000 12158 +carneig 0 1 6.957497 0.000000 12159 +didresearch 0 1 6.957497 0.000000 12160 +institu 0 1 6.957497 0.000000 12161 +xsro 0 1 6.957497 0.000000 12162 +atft 0 1 6.957497 0.000000 12163 +lauderdal 0 1 6.957497 0.000000 12164 +usta 0 1 6.957497 0.000000 12165 +tournment 0 1 6.957497 0.000000 12166 +faviorit 0 1 6.957497 0.000000 12167 +boca 0 1 6.957497 0.000000 12168 +ratonkei 0 1 6.957497 0.000000 12169 +beethoven 0 1 6.957497 0.000000 12170 +gershwin 0 1 6.957497 0.000000 12171 +liszt 0 1 6.957497 0.000000 12172 +mendelssohn 0 1 6.957497 0.000000 12173 +mozart 0 1 6.957497 0.000000 12174 +rachmaninoff 0 1 6.957497 0.000000 12175 +ravel 0 1 6.957497 0.000000 12176 +tchaikovski 0 1 6.957497 0.000000 12177 +violinconcerto 0 1 6.957497 0.000000 12178 +purifi 0 1 6.957497 0.000000 12179 +computerc 0 1 6.957497 0.000000 12180 +sectorcool 0 1 6.957497 0.000000 12181 +links_leap 0 1 6.957497 0.000000 12182 +frogski 0 1 6.957497 0.000000 12183 +serverident 0 1 6.957497 0.000000 12184 +crisi 0 1 6.957497 0.000000 12185 +testweath 0 1 6.957497 0.000000 12186 +undergroundinktomi 0 1 6.957497 0.000000 12187 +enginequest 0 1 6.957497 0.000000 12188 +archiveslast 0 1 6.957497 0.000000 12189 +ecithaca 0 1 6.957497 0.000000 12190 +wwlee 0 1 6.957497 0.000000 12191 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..49eb25b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +network 1 168 1.791759 1.791759 61 +hall 0 146 1.945910 0.000000 65 +welcom 1 122 2.079442 2.079442 99 +spring 0 131 2.079442 0.000000 88 +high 0 130 2.079442 0.000000 101 +databas 0 122 2.079442 0.000000 86 +site 0 106 2.197225 0.000000 119 +manag 0 114 2.197225 0.000000 125 +graphic 0 90 2.397895 0.000000 147 +school 0 84 2.484907 0.000000 188 +master 1 76 2.564949 2.564949 216 +upson 0 71 2.639057 0.000000 218 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +multimedia 0 68 2.708050 0.000000 258 +china 0 37 3.332205 0.000000 487 +taken 0 31 3.496508 0.000000 555 +universityithaca 0 24 3.761200 0.000000 710 +sciencecornel 0 22 3.850148 0.000000 768 +alumni 0 21 3.912023 0.000000 807 +bachelor 0 17 4.110874 0.000000 957 +edui 0 13 4.382027 0.000000 1193 +systemsc 0 11 4.553877 0.000000 1293 +capac 0 8 4.875197 0.000000 1740 +shade 1 7 5.010635 5.010635 1881 +atcornel 0 6 5.164786 0.000000 2131 +engineeringc 0 4 5.568345 0.000000 2904 +phong 0 2 6.263398 0.000000 4822 +xichun 1 1 6.957497 6.957497 12192 +zhejiang 1 1 6.957497 6.957497 12193 +jennif 1 1 6.957497 6.957497 12194 +hangzhou 0 1 6.957497 0.000000 12195 +javaworldsunhigh 0 1 6.957497 0.000000 12196 +alumnimeng 0 1 6.957497 0.000000 12197 +gouraud 0 1 6.957497 0.000000 12198 +systeme 0 1 6.957497 0.000000 12199 +communicationby 0 1 6.957497 0.000000 12200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..179ebe45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 0 384 0.693147 0.000000 11 +topic 0 114 2.197225 0.000000 110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..b670334b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +cornel 0 215 1.386294 0.000000 23 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +base 1 165 1.791759 1.791759 50 +relat 0 139 1.945910 0.000000 68 +analysi 1 124 2.079442 2.079442 98 +confer 0 126 2.079442 0.000000 100 +techniqu 0 99 2.302585 0.000000 138 +proceed 1 93 2.397895 2.397895 152 +select 0 91 2.397895 0.000000 154 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +june 0 79 2.564949 0.000000 214 +optim 0 79 2.564949 0.000000 197 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +knowledg 0 67 2.708050 0.000000 243 +improv 1 62 2.772589 2.772589 289 +interact 1 62 2.772589 2.772589 270 +written 0 63 2.772589 0.000000 278 +function 0 62 2.772589 0.000000 275 +result 0 65 2.772589 0.000000 281 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +juli 0 60 2.833213 0.000000 305 +februari 0 54 2.944439 0.000000 328 +principl 1 48 3.044522 3.044522 357 +approach 0 48 3.044522 0.000000 366 +california 0 46 3.091042 0.000000 388 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +societi 0 40 3.258097 0.000000 456 +transform 1 32 3.465736 3.465736 542 +semant 0 29 3.583519 0.000000 587 +manipul 0 27 3.637586 0.000000 624 +boston 0 19 4.007333 0.000000 862 +partial 0 18 4.060443 0.000000 900 +attribut 1 14 4.317488 4.317488 1092 +massachusett 0 14 4.317488 0.000000 1118 +deriv 1 13 4.382027 4.382027 1145 +sigplan 1 13 4.382027 4.382027 1190 +intermedi 0 9 4.753590 0.000000 1497 +discov 0 9 4.753590 0.000000 1562 +strength 0 9 4.753590 0.000000 1494 +reduct 0 7 5.010635 0.000000 1877 +increment 2 6 5.164786 10.329572 2206 +teitelbaum 1 6 5.164786 5.164786 2102 +sigact 0 6 5.164786 0.000000 2212 +florida 0 5 5.347108 0.000000 2526 +jolla 0 4 5.568345 0.000000 2988 +stoller 0 4 5.568345 0.000000 2866 +petersburg 0 4 5.568345 0.000000 2989 +systemat 1 3 5.857933 5.857933 3781 +beach 0 3 5.857933 0.000000 3782 +cachet 1 2 6.263398 6.263398 5419 +anni 1 2 6.263398 6.263398 5420 +auxiliari 0 2 6.263398 0.000000 5421 +yanhong 0 2 6.263398 0.000000 5422 +computationderiv 0 1 6.957497 0.000000 12201 +programsa 0 1 6.957497 0.000000 12202 +themeprogram 0 1 6.957497 0.000000 12203 +usessystemat 0 1 6.957497 0.000000 12204 +deriveincrement 0 1 6.957497 0.000000 12205 +peoplei 0 1 6.957497 0.000000 12206 +liutim 0 1 6.957497 0.000000 12207 +teitelbaumkeyword 0 1 6.957497 0.000000 12208 +cacheti 0 1 6.957497 0.000000 12209 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..e0fc6065 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +public 1 202 1.609438 1.609438 43 +updat 0 191 1.609438 0.000000 41 +base 2 165 1.791759 3.583518 50 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +problem 1 147 1.945910 1.945910 75 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +report 2 131 2.079442 4.158884 92 +confer 2 126 2.079442 4.158884 100 +analysi 1 124 2.079442 2.079442 98 +compil 0 122 2.079442 0.000000 96 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +intern 2 108 2.197225 4.394450 128 +world 0 115 2.197225 0.000000 126 +technic 2 100 2.302585 4.605170 140 +techniqu 1 99 2.302585 2.302585 138 +peopl 0 96 2.302585 0.000000 132 +proceed 2 93 2.397895 4.795790 152 +select 1 91 2.397895 2.397895 154 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +present 0 91 2.397895 0.000000 145 +novemb 1 81 2.484907 2.484907 179 +institut 1 84 2.484907 2.484907 187 +ieee 0 86 2.484907 0.000000 190 +build 0 85 2.484907 0.000000 184 +optim 1 79 2.564949 2.564949 197 +june 1 79 2.564949 2.564949 214 +appear 0 78 2.564949 0.000000 210 +dynam 0 76 2.564949 0.000000 194 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +upson 0 71 2.639057 0.000000 218 +html 0 75 2.639057 0.000000 235 +august 1 66 2.708050 2.708050 257 +knowledg 1 67 2.708050 2.708050 243 +test 0 66 2.708050 0.000000 252 +improv 1 62 2.772589 2.772589 289 +ithaca 1 65 2.772589 2.772589 294 +result 1 65 2.772589 2.772589 281 +januari 1 62 2.772589 2.772589 264 +evalu 1 64 2.772589 2.772589 266 +interact 1 62 2.772589 2.772589 270 +descript 0 64 2.772589 0.000000 271 +septemb 0 65 2.772589 0.000000 274 +laboratori 0 63 2.772589 0.000000 292 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +automat 0 61 2.833213 0.000000 306 +reason 1 57 2.890372 2.890372 318 +publish 0 57 2.890372 0.000000 326 +explor 0 58 2.890372 0.000000 324 +februari 0 54 2.944439 0.000000 328 +talk 0 53 2.944439 0.000000 336 +profession 0 51 2.995732 0.000000 345 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +move 0 47 3.091042 0.000000 382 +york 2 41 3.218876 6.437752 435 +cach 1 41 3.218876 3.218876 432 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +annual 1 40 3.258097 3.258097 458 +societi 0 40 3.258097 0.000000 456 +map 0 39 3.258097 0.000000 452 +author 0 39 3.258097 0.000000 450 +seminar 0 38 3.295837 0.000000 470 +china 1 37 3.332205 3.332205 487 +formal 0 37 3.332205 0.000000 478 +multi 1 36 3.367296 3.367296 493 +post 0 35 3.401197 0.000000 505 +concurr 0 34 3.401197 0.000000 501 +survei 0 35 3.401197 0.000000 513 +manual 0 35 3.401197 0.000000 504 +obtain 0 33 3.433987 0.000000 534 +transform 1 32 3.465736 3.465736 542 +dissert 0 32 3.465736 0.000000 549 +scientist 1 31 3.496508 3.496508 560 +compon 0 30 3.555348 0.000000 570 +semant 1 29 3.583519 3.583519 587 +manipul 1 27 3.637586 3.637586 624 +revis 0 26 3.688879 0.000000 640 +doctor 0 24 3.761200 0.000000 709 +universityithaca 0 24 3.761200 0.000000 710 +wang 1 21 3.912023 3.912023 790 +hous 0 21 3.912023 0.000000 801 +expert 1 20 3.951244 3.951244 833 +break 0 20 3.951244 0.000000 812 +department 0 20 3.951244 0.000000 839 +beij 1 19 4.007333 4.007333 876 +boston 1 19 4.007333 4.007333 862 +partial 1 18 4.060443 4.060443 900 +germani 0 17 4.110874 0.000000 946 +young 1 16 4.174387 4.174387 991 +zhang 1 16 4.174387 4.174387 980 +partit 0 16 4.174387 0.000000 984 +attribut 1 14 4.317488 4.317488 1092 +massachusett 1 14 4.317488 4.317488 1118 +deriv 2 13 4.382027 8.764054 1145 +sigplan 1 13 4.382027 4.382027 1190 +tsinghua 0 13 4.382027 0.000000 1195 +huang 0 12 4.465908 0.000000 1202 +qualit 1 11 4.553877 4.553877 1362 +song 0 11 4.553877 0.000000 1380 +ofcomput 0 10 4.653960 0.000000 1442 +intermedi 1 9 4.753590 4.753590 1497 +discov 1 9 4.753590 4.753590 1562 +factor 1 9 4.753590 4.753590 1544 +mainten 0 9 4.753590 0.000000 1543 +congress 0 9 4.753590 0.000000 1592 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +compos 0 9 4.753590 0.000000 1527 +quantit 1 8 4.875197 4.875197 1654 +xerox 1 8 4.875197 4.875197 1725 +hallcornel 0 8 4.875197 0.000000 1757 +refere 0 7 5.010635 0.000000 1895 +uncertainti 0 7 5.010635 0.000000 1882 +increment 3 6 5.164786 15.494358 2206 +teitelbaum 2 6 5.164786 10.329572 2102 +sigact 1 6 5.164786 5.164786 2212 +indiana 1 6 5.164786 5.164786 2057 +usag 0 6 5.164786 0.000000 2209 +webster 1 5 5.347108 5.347108 2468 +florida 1 5 5.347108 5.347108 2526 +peke 0 5 5.347108 0.000000 2539 +petersburg 1 4 5.568345 5.568345 2989 +jolla 1 4 5.568345 5.568345 2988 +dagstuhl 1 4 5.568345 5.568345 2871 +stoller 0 4 5.568345 0.000000 2866 +kestrel 0 4 5.568345 0.000000 2990 +systemat 2 3 5.857933 11.715866 3781 +beach 1 3 5.857933 5.857933 3782 +schloss 0 3 5.857933 0.000000 3727 +tocomput 0 3 5.857933 0.000000 3162 +yanhong 1 2 6.263398 6.263398 5422 +cachet 1 2 6.263398 6.263398 5419 +anni 1 2 6.263398 6.263398 5420 +auxiliari 1 2 6.263398 6.263398 5421 +fuzzi 0 2 6.263398 0.000000 5423 +eduhttp 0 2 6.263398 0.000000 5424 +pageyanhong 0 1 6.957497 0.000000 12210 +forincrement 0 1 6.957497 0.000000 12211 +interactivesystem 0 1 6.957497 0.000000 12212 +systemorgan 0 1 6.957497 0.000000 12213 +talksph 0 1 6.957497 0.000000 12214 +basedsystemat 0 1 6.957497 0.000000 12215 +abstractjourn 0 1 6.957497 0.000000 12216 +inexact 0 1 6.957497 0.000000 12217 +wakayama 0 1 6.957497 0.000000 12218 +oggeb 0 1 6.957497 0.000000 12219 +basin 0 1 6.957497 0.000000 12220 +ri 0 1 6.957497 0.000000 12221 +tshinghua 0 1 6.957497 0.000000 12222 +lindlei 0 1 6.957497 0.000000 12223 +hallindiana 0 1 6.957497 0.000000 12224 +universitybloomington 0 1 6.957497 0.000000 12225 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..b5cfea67 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +cornel 1 215 1.386294 1.386294 23 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +hall 0 146 1.945910 0.000000 65 +upson 0 71 2.639057 0.000000 218 +onlin 0 75 2.639057 0.000000 223 +ithaca 0 65 2.772589 0.000000 294 +favorit 0 44 3.135494 0.000000 410 +edui 0 13 4.382027 0.000000 1193 +huang 1 12 4.465908 4.465908 1202 +cheng 1 10 4.653960 4.653960 1381 +ychuang 1 3 5.857933 5.857933 3093 +huangyi 0 1 6.957497 0.000000 12226 +documentscoursesprojectaccess 0 1 6.957497 0.000000 12227 +byvisitorslast 0 1 6.957497 0.000000 12228 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..97ee2564 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +email 1 220 1.386294 1.386294 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +like 1 132 1.945910 1.945910 81 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +construct 0 139 1.945910 0.000000 82 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +version 0 113 2.197225 0.000000 122 +person 0 111 2.197225 0.000000 117 +search 1 95 2.397895 2.397895 155 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +school 2 84 2.484907 4.969814 188 +chang 1 82 2.484907 2.484907 163 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +novemb 0 81 2.484907 0.000000 179 +april 1 77 2.564949 2.564949 196 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +java 1 70 2.708050 2.708050 248 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +plai 1 60 2.833213 2.833213 307 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +still 0 50 3.044522 0.000000 362 +better 0 45 3.135494 0.000000 401 +music 1 42 3.218876 3.218876 436 +programm 0 39 3.258097 0.000000 445 +origin 0 38 3.295837 0.000000 472 +word 0 34 3.401197 0.000000 508 +kind 0 32 3.465736 0.000000 541 +titl 0 31 3.496508 0.000000 556 +actual 0 28 3.610918 0.000000 604 +brows 0 23 3.806662 0.000000 726 +sciencecornel 0 22 3.850148 0.000000 768 +love 0 21 3.912023 0.000000 804 +wonder 0 20 3.951244 0.000000 815 +beauti 0 18 4.060443 0.000000 912 +listen 0 18 4.060443 0.000000 907 +women 0 16 4.174387 0.000000 1004 +georg 0 16 4.174387 0.000000 994 +classic 0 14 4.317488 0.000000 1084 +came 1 13 4.382027 4.382027 1197 +forth 0 13 4.382027 0.000000 1186 +went 0 12 4.465908 0.000000 1279 +meng 0 12 4.465908 0.000000 1214 +america 1 11 4.553877 4.553877 1370 +pagewelcom 0 11 4.553877 0.000000 1344 +moment 0 11 4.553877 0.000000 1379 +virginia 1 8 4.875197 4.875197 1659 +chung 0 7 5.010635 0.000000 1964 +elementari 0 7 5.010635 0.000000 1825 +marri 0 7 5.010635 0.000000 1946 +perfect 0 7 5.010635 0.000000 1921 +piano 1 6 5.164786 5.164786 2201 +sung 0 6 5.164786 0.000000 2075 +emerg 0 6 5.164786 0.000000 2038 +junior 1 5 5.347108 5.347108 2519 +everybodi 0 5 5.347108 0.000000 2517 +korea 1 4 5.568345 5.568345 2971 +keyboard 1 4 5.568345 5.568345 2970 +moon 0 4 5.568345 0.000000 2991 +hire 0 4 5.568345 0.000000 2976 +seoul 0 3 5.857933 0.000000 3783 +forward 0 3 5.857933 0.000000 3784 +korean 1 2 6.263398 6.263398 5354 +sang 0 2 6.263398 0.000000 5356 +kang 0 2 6.263398 0.000000 5360 +mason 0 2 6.263398 0.000000 4916 +infom 0 2 6.263398 0.000000 5425 +ilbo 1 1 6.957497 6.957497 12229 +myoung 1 1 6.957497 6.957497 12230 +husband 1 1 6.957497 6.957497 12231 +chungyou 0 1 6.957497 0.000000 12232 +thvisitor 0 1 6.957497 0.000000 12233 +universitywher 0 1 6.957497 0.000000 12234 +kindergarten 0 1 6.957497 0.000000 12235 +universityin 0 1 6.957497 0.000000 12236 +happiest 0 1 6.957497 0.000000 12237 +forsaic 0 1 6.957497 0.000000 12238 +shin 0 1 6.957497 0.000000 12239 +seung 0 1 6.957497 0.000000 12240 +hoon 0 1 6.957497 0.000000 12241 +newpap 0 1 6.957497 0.000000 12242 +hangook 0 1 6.957497 0.000000 12243 +chosun 0 1 6.957497 0.000000 12244 +joongang 0 1 6.957497 0.000000 12245 +appletyoosun 0 1 6.957497 0.000000 12246 +triphamm 0 1 6.957497 0.000000 12247 +sbithaca 0 1 6.957497 0.000000 12248 +ychung 0 1 6.957497 0.000000 12249 +yooschung 0 1 6.957497 0.000000 12250 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..f9fd4da4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,222 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +public 1 202 1.609438 1.609438 43 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +hour 0 165 1.791759 0.000000 46 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +version 0 113 2.197225 0.000000 122 +take 1 97 2.302585 2.302585 134 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +follow 0 92 2.397895 0.000000 143 +comment 0 93 2.397895 0.000000 146 +school 1 84 2.484907 2.484907 188 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +internet 0 83 2.484907 0.000000 186 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +want 0 79 2.564949 0.000000 199 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +upson 0 71 2.639057 0.000000 218 +free 0 73 2.639057 0.000000 224 +order 0 69 2.708050 0.000000 249 +ithaca 1 65 2.772589 2.772589 294 +virtual 0 62 2.772589 0.000000 285 +plai 1 60 2.833213 2.833213 307 +simpl 0 60 2.833213 0.000000 298 +unix 0 58 2.890372 0.000000 308 +direct 0 57 2.890372 0.000000 316 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +much 1 52 2.995732 2.995732 349 +particular 0 51 2.995732 0.000000 352 +date 0 51 2.995732 0.000000 344 +run 0 51 2.995732 0.000000 347 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +right 0 48 3.044522 0.000000 363 +effect 0 46 3.091042 0.000000 385 +better 1 45 3.135494 3.135494 401 +execut 0 45 3.135494 0.000000 404 +howev 0 41 3.218876 0.000000 422 +york 0 41 3.218876 0.000000 435 +compani 0 41 3.218876 0.000000 423 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +microsoft 0 38 3.295837 0.000000 468 +expect 0 37 3.332205 0.000000 484 +game 1 36 3.367296 3.367296 498 +download 0 36 3.367296 0.000000 489 +everi 1 34 3.401197 3.401197 519 +michael 0 35 3.401197 0.000000 514 +toler 1 33 3.433987 3.433987 533 +go 0 33 3.433987 0.000000 529 +within 0 33 3.433987 0.000000 525 +often 0 31 3.496508 0.000000 551 +someth 0 31 3.496508 0.000000 554 +computersci 0 30 3.555348 0.000000 562 +focus 0 29 3.583519 0.000000 584 +hope 0 28 3.610918 0.000000 610 +great 1 27 3.637586 3.637586 626 +linux 1 27 3.637586 3.637586 631 +though 0 27 3.637586 0.000000 622 +rule 0 26 3.688879 0.000000 638 +compar 0 26 3.688879 0.000000 648 +notic 0 25 3.737670 0.000000 675 +strategi 0 25 3.737670 0.000000 682 +other 0 24 3.761200 0.000000 697 +yahoo 0 24 3.761200 0.000000 707 +magazin 0 24 3.761200 0.000000 704 +highli 0 23 3.806662 0.000000 725 +begin 0 23 3.806662 0.000000 716 +recommend 0 22 3.850148 0.000000 737 +almost 0 22 3.850148 0.000000 742 +instal 0 22 3.850148 0.000000 754 +love 1 21 3.912023 3.912023 804 +longer 1 20 3.951244 3.951244 816 +agent 0 18 4.060443 0.000000 910 +medic 1 17 4.110874 4.110874 958 +attempt 0 17 4.110874 0.000000 917 +intro 0 17 4.110874 0.000000 915 +thought 0 17 4.110874 0.000000 945 +easi 1 16 4.174387 4.174387 969 +critic 0 16 4.174387 0.000000 982 +rate 1 15 4.248495 4.248495 1037 +save 0 14 4.317488 0.000000 1099 +block 1 13 4.382027 4.382027 1183 +front 0 13 4.382027 0.000000 1154 +wife 0 13 4.382027 0.000000 1196 +brother 0 13 4.382027 0.000000 1189 +emac 0 13 4.382027 0.000000 1143 +uniqu 0 12 4.465908 0.000000 1228 +town 0 10 4.653960 0.000000 1458 +yellow 1 9 4.753590 4.753590 1601 +seven 0 9 4.753590 0.000000 1561 +crash 0 8 4.875197 0.000000 1616 +satisfi 0 8 4.875197 0.000000 1694 +qualifi 0 8 4.875197 0.000000 1721 +contrast 0 8 4.875197 0.000000 1637 +fail 0 8 4.875197 0.000000 1655 +slightli 0 7 5.010635 0.000000 1795 +marri 0 7 5.010635 0.000000 1946 +perfect 0 7 5.010635 0.000000 1921 +bookstor 0 7 5.010635 0.000000 1837 +tri 1 6 5.164786 5.164786 2166 +seen 0 6 5.164786 0.000000 2202 +impress 0 6 5.164786 0.000000 2096 +plu 0 6 5.164786 0.000000 2004 +slate 0 6 5.164786 0.000000 2021 +suni 0 5 5.347108 0.000000 2452 +twenti 0 5 5.347108 0.000000 2540 +fairli 0 5 5.347108 0.000000 2322 +hate 0 5 5.347108 0.000000 2529 +solid 0 5 5.347108 0.000000 2255 +webpag 0 4 5.568345 0.000000 2660 +complic 0 4 5.568345 0.000000 2902 +trivial 0 4 5.568345 0.000000 2786 +closest 0 4 5.568345 0.000000 2828 +syracus 1 3 5.857933 5.857933 3553 +edudepart 0 3 5.857933 0.000000 3302 +forfault 0 3 5.857933 0.000000 3748 +outof 0 3 5.857933 0.000000 3296 +nota 0 3 5.857933 0.000000 3785 +newli 0 3 5.857933 0.000000 3786 +health 0 3 5.857933 0.000000 3787 +advertis 0 3 5.857933 0.000000 3788 +yaron 1 2 6.263398 6.263398 4122 +minski 1 2 6.263398 6.263398 4123 +veggi 0 2 6.263398 0.000000 5426 +coop 0 2 6.263398 0.000000 4213 +nowadai 0 2 6.263398 0.000000 5376 +lisa 0 2 6.263398 0.000000 5427 +theidea 0 2 6.263398 0.000000 5428 +resumesom 0 2 6.263398 0.000000 5186 +miser 0 2 6.263398 0.000000 5359 +admit 0 2 6.263398 0.000000 5429 +amazon 0 2 6.263398 0.000000 5193 +flapdragon 1 1 6.957497 6.957497 12251 +yminski 0 1 6.957497 0.000000 12252 +comstock 0 1 6.957497 0.000000 12253 +onfault 0 1 6.957497 0.000000 12254 +thetacoma 0 1 6.957497 0.000000 12255 +livether 0 1 6.957497 0.000000 12256 +anopen 0 1 6.957497 0.000000 12257 +recommendit 0 1 6.957497 0.000000 12258 +ancientchines 0 1 6.957497 0.000000 12259 +extremlysimpl 0 1 6.957497 0.000000 12260 +thannoth 0 1 6.957497 0.000000 12261 +cgoban 0 1 6.957497 0.000000 12262 +nicest 0 1 6.957497 0.000000 12263 +goboard 0 1 6.957497 0.000000 12264 +thenet 0 1 6.957497 0.000000 12265 +minutesof 0 1 6.957497 0.000000 12266 +favoritepoem 0 1 6.957497 0.000000 12267 +lafiglia 0 1 6.957497 0.000000 12268 +piang 0 1 6.957497 0.000000 12269 +advicefor 0 1 6.957497 0.000000 12270 +interestinglink 0 1 6.957497 0.000000 12271 +alarmingli 0 1 6.957497 0.000000 12272 +firefli 0 1 6.957497 0.000000 12273 +bakeri 0 1 6.957497 0.000000 12274 +bigbook 0 1 6.957497 0.000000 12275 +bigyellow 0 1 6.957497 0.000000 12276 +kinslei 0 1 6.957497 0.000000 12277 +discount 0 1 6.957497 0.000000 12278 +booksel 0 1 6.957497 0.000000 12279 +mailcrypt 0 1 6.957497 0.000000 12280 +interfacemqbtazgjohoaaaedalfhlgjmdg 0 1 6.957497 0.000000 12281 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 0 1 6.957497 0.000000 12282 +rbylf 0 1 6.957497 0.000000 12283 +zwqujcioczoecv 0 1 6.957497 0.000000 12284 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 0 1 6.957497 0.000000 12285 +gkgarsokrinnoazihja 0 1 6.957497 0.000000 12286 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 0 1 6.957497 0.000000 12287 +wumjgzsnvispwkrvzgdrojswmc 0 1 6.957497 0.000000 12288 +eigsqsb 0 1 6.957497 0.000000 12289 +bsbpw 0 1 6.957497 0.000000 12290 +jcwz 0 1 6.957497 0.000000 12291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..68ba2e97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +address 0 170 1.791759 0.000000 62 +introduct 1 126 2.079442 2.079442 87 +compil 1 122 2.079442 2.079442 96 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +welcom 0 122 2.079442 0.000000 99 +theori 0 111 2.197225 0.000000 127 +look 0 107 2.197225 0.000000 115 +peopl 1 96 2.302585 2.302585 132 +info 1 85 2.484907 2.484907 176 +novemb 0 81 2.484907 0.000000 179 +state 0 76 2.564949 0.000000 207 +resum 0 79 2.564949 0.000000 217 +html 0 75 2.639057 0.000000 235 +artifici 1 63 2.772589 2.772589 280 +foundat 0 62 2.772589 0.000000 286 +semest 0 58 2.890372 0.000000 312 +format 1 48 3.044522 3.044522 356 +http 0 41 3.218876 0.000000 420 +unit 0 21 3.912023 0.000000 779 +modif 0 17 4.110874 0.000000 913 +practicum 1 16 4.174387 4.174387 960 +systemsc 0 11 4.553877 0.000000 1293 +pagecours 0 5 5.347108 0.000000 2395 +intelligencec 1 4 5.568345 5.568345 2673 +visionfal 0 2 6.263398 0.000000 4749 +eduhttp 0 2 6.263398 0.000000 5424 +yuichi 1 1 6.957497 6.957497 12292 +tsuchimoto 1 1 6.957497 6.957497 12293 +translatorsc 1 1 6.957497 6.957497 12294 +pageyuichi 0 1 6.957497 0.000000 12295 +workfal 0 1 6.957497 0.000000 12296 +engineeringspr 0 1 6.957497 0.000000 12297 +computingi 0 1 6.957497 0.000000 12298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..12e4a9f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +implement 2 152 1.791759 3.583518 52 +network 2 168 1.791759 3.583518 61 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +document 0 121 2.079442 0.000000 89 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +specif 1 106 2.197225 2.197225 106 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +well 0 109 2.197225 0.000000 121 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +commun 2 95 2.397895 4.795790 157 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +proceed 0 93 2.397895 0.000000 152 +activ 2 84 2.484907 4.969814 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +build 0 85 2.484907 0.000000 184 +ieee 0 86 2.484907 0.000000 190 +level 0 87 2.484907 0.000000 180 +control 0 82 2.484907 0.000000 164 +larg 0 82 2.484907 0.000000 168 +messag 3 76 2.564949 7.694847 212 +know 1 80 2.564949 2.564949 198 +appear 1 78 2.564949 2.564949 210 +interfac 1 79 2.564949 2.564949 209 +sourc 0 77 2.564949 0.000000 201 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +state 0 76 2.564949 0.000000 207 +dynam 0 76 2.564949 0.000000 194 +order 1 69 2.708050 2.708050 249 +differ 0 66 2.708050 0.000000 253 +integr 0 67 2.708050 0.000000 245 +abstract 1 62 2.772589 2.772589 276 +evalu 1 64 2.772589 2.772589 266 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +share 0 59 2.833213 0.000000 304 +detail 1 57 2.890372 2.890372 321 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +think 0 57 2.890372 0.000000 314 +thesi 0 57 2.890372 0.000000 327 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +februari 0 54 2.944439 0.000000 328 +talk 0 53 2.944439 0.000000 336 +investig 0 51 2.995732 0.000000 353 +hardwar 0 51 2.995732 0.000000 350 +standard 1 48 3.044522 3.044522 365 +without 1 50 3.044522 3.044522 370 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +california 0 46 3.091042 0.000000 388 +describ 1 45 3.135494 3.135494 400 +made 0 44 3.135494 0.000000 398 +even 0 45 3.135494 0.000000 393 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +offer 0 43 3.178054 0.000000 414 +fast 0 42 3.218876 0.000000 429 +examin 0 42 3.218876 0.000000 424 +slide 0 38 3.295837 0.000000 467 +prototyp 0 38 3.295837 0.000000 463 +cost 1 37 3.332205 3.332205 480 +workstat 1 37 3.332205 3.332205 479 +connect 0 37 3.332205 0.000000 485 +word 0 34 3.401197 0.000000 508 +toler 0 33 3.433987 0.000000 533 +concept 0 32 3.465736 0.000000 537 +someth 0 31 3.496508 0.000000 554 +power 1 30 3.555348 3.555348 573 +compon 0 30 3.555348 0.000000 570 +exist 0 30 3.555348 0.000000 569 +rang 0 30 3.555348 0.000000 565 +releas 1 28 3.610918 3.610918 616 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +pass 1 28 3.610918 3.610918 611 +packag 0 28 3.610918 0.000000 614 +intend 0 28 3.610918 0.000000 599 +progress 0 28 3.610918 0.000000 598 +propos 0 28 3.610918 0.000000 602 +measur 0 28 3.610918 0.000000 609 +scale 0 28 3.610918 0.000000 613 +becom 0 28 3.610918 0.000000 603 +though 0 27 3.637586 0.000000 622 +compar 1 26 3.688879 3.688879 648 +berkelei 1 26 3.688879 3.688879 657 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +reliabl 0 25 3.737670 0.000000 674 +concern 0 25 3.737670 0.000000 666 +demonstr 1 24 3.761200 3.761200 694 +higher 0 24 3.761200 0.000000 690 +magazin 0 24 3.761200 0.000000 704 +flow 0 24 3.761200 0.000000 700 +reduc 1 22 3.850148 3.850148 759 +instal 0 22 3.850148 0.000000 754 +defin 0 22 3.850148 0.000000 746 +varieti 0 22 3.850148 0.000000 740 +flexibl 0 21 3.912023 0.000000 792 +portabl 0 20 3.951244 0.000000 819 +benchmark 1 19 4.007333 4.007333 859 +comparison 0 19 4.007333 0.000000 863 +lower 0 18 4.060443 0.000000 886 +layer 1 17 4.110874 4.110874 926 +interconnect 1 17 4.110874 4.110874 937 +whether 0 17 4.110874 0.000000 918 +protect 0 17 4.110874 0.000000 935 +outlin 0 17 4.110874 0.000000 914 +latenc 2 16 4.174387 8.348774 993 +commerci 0 16 4.174387 0.000000 1005 +across 0 16 4.174387 0.000000 974 +overhead 1 15 4.248495 4.248495 1035 +driven 1 15 4.248495 4.248495 1048 +micro 0 15 4.248495 0.000000 1031 +split 1 14 4.317488 4.317488 1078 +trip 1 14 4.317488 4.317488 1113 +achiev 0 14 4.317488 0.000000 1088 +eicken 2 13 4.382027 8.764054 1134 +thorsten 1 13 4.382027 4.382027 1133 +block 0 13 4.382027 0.000000 1183 +directli 0 13 4.382027 0.000000 1141 +signific 0 13 4.382027 0.000000 1125 +earlier 0 13 4.382027 0.000000 1140 +carri 0 13 4.382027 0.000000 1152 +coordin 0 13 4.382027 0.000000 1182 +introduc 0 13 4.382027 0.000000 1139 +characterist 1 12 4.465908 4.465908 1257 +onth 0 12 4.465908 0.000000 1218 +buffer 0 12 4.465908 0.000000 1211 +bandwidth 1 11 4.553877 4.553877 1365 +chri 0 11 4.553877 0.000000 1311 +primit 0 11 4.553877 0.000000 1317 +underli 0 10 4.653960 0.000000 1410 +equip 0 10 4.653960 0.000000 1459 +equival 0 9 4.753590 0.000000 1496 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +transmiss 0 9 4.753590 0.000000 1588 +significantli 0 9 4.753590 0.000000 1508 +desir 0 9 4.753590 0.000000 1542 +chao 1 8 4.875197 4.875197 1753 +spec 1 8 4.875197 4.875197 1640 +round 1 8 4.875197 4.875197 1769 +readm 0 8 4.875197 0.000000 1699 +ring 0 8 4.875197 0.000000 1684 +vineet 0 8 4.875197 0.000000 1639 +andcomput 0 8 4.875197 0.000000 1623 +gold 0 8 4.875197 0.000000 1745 +coast 0 8 4.875197 0.000000 1746 +poor 0 8 4.875197 0.000000 1736 +pittsburgh 0 7 5.010635 0.000000 1938 +larger 0 7 5.010635 0.000000 1875 +suffici 0 7 5.010635 0.000000 1897 +freeli 0 6 5.164786 0.000000 2014 +lack 0 6 5.164786 0.000000 1994 +affect 0 6 5.164786 0.000000 2044 +goldstein 0 6 5.164786 0.000000 2168 +phase 0 6 5.164786 0.000000 1977 +older 0 5 5.347108 0.000000 2387 +buch 0 5 5.347108 0.000000 2272 +culler 0 5 5.347108 0.000000 2381 +symp 0 5 5.347108 0.000000 2376 +australia 0 5 5.347108 0.000000 2478 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +conform 1 4 5.568345 5.568345 2941 +theus 0 4 5.568345 0.000000 2992 +throughput 0 4 5.568345 0.000000 2993 +andevalu 0 4 5.568345 0.000000 2706 +asymptot 0 4 5.568345 0.000000 2676 +basu 0 4 5.568345 0.000000 2843 +forparallel 0 4 5.568345 0.000000 2703 +mpp 1 3 5.857933 5.857933 3194 +neta 0 3 5.857933 0.000000 3789 +thegener 0 3 5.857933 0.000000 3648 +moreinform 0 3 5.857933 0.000000 3307 +let 0 3 5.857933 0.000000 3790 +avula 0 3 5.857933 0.000000 3600 +abridg 0 3 5.857933 0.000000 3772 +magnitud 0 3 5.857933 0.000000 3582 +roughli 0 3 5.857933 0.000000 3097 +schauser 0 3 5.857933 0.000000 3599 +tremend 0 3 5.857933 0.000000 3453 +thegam 0 2 6.263398 0.000000 5430 +differencebetween 0 2 6.263398 0.000000 5431 +pleaseclick 0 2 6.263398 0.000000 5432 +messageslow 0 2 6.263398 0.000000 5040 +meiko 0 2 6.263398 0.000000 4996 +focuseson 0 2 6.263398 0.000000 5433 +veena 0 2 6.263398 0.000000 5000 +thecommun 0 2 6.263398 0.000000 4928 +thesetechniqu 0 2 6.263398 0.000000 4263 +thenetwork 0 2 6.263398 0.000000 5434 +incommun 0 2 6.263398 0.000000 4349 +microsecond 0 2 6.263398 0.000000 5435 +tominim 0 2 6.263398 0.000000 5436 +unnecessarili 0 2 6.263398 0.000000 4688 +mainstream 0 2 6.263398 0.000000 5437 +contactthorsten 0 2 6.263398 0.000000 5438 +activemessag 1 1 6.957497 6.957497 12299 +secondpart 1 1 6.957497 6.957497 12300 +messagescornel 0 1 6.957497 0.000000 12301 +implementationsact 0 1 6.957497 0.000000 12302 +codereleas 0 1 6.957497 0.000000 12303 +instructionson 0 1 6.957497 0.000000 12304 +releasenot 0 1 6.957497 0.000000 12305 +fileto 0 1 6.957497 0.000000 12306 +currentvers 0 1 6.957497 0.000000 12307 +libmpci 0 1 6.957497 0.000000 12308 +thedistribut 0 1 6.957497 0.000000 12309 +fordetail 0 1 6.957497 0.000000 12310 +briefnot 0 1 6.957497 0.000000 12311 +ibmrisc 0 1 6.957497 0.000000 12312 +hawblitzel 0 1 6.957497 0.000000 12313 +ieeesupercomput 0 1 6.957497 0.000000 12314 +spiteof 0 1 6.957497 0.000000 12315 +scommun 0 1 6.957497 0.000000 12316 +inferior 0 1 6.957497 0.000000 12317 +tmccm 0 1 6.957497 0.000000 12318 +standardmessag 0 1 6.957497 0.000000 12319 +tooffer 0 1 6.957497 0.000000 12320 +networkadapt 0 1 6.957497 0.000000 12321 +yieldsa 0 1 6.957497 0.000000 12322 +communicationsubstr 0 1 6.957497 0.000000 12323 +cbenchmark 0 1 6.957497 0.000000 12324 +lowmessag 0 1 6.957497 0.000000 12325 +compens 0 1 6.957497 0.000000 12326 +networklat 0 1 6.957497 0.000000 12327 +availablempich 0 1 6.957497 0.000000 12328 +implementationbenchmark 0 1 6.957497 0.000000 12329 +firmwar 0 1 6.957497 0.000000 12330 +butdo 0 1 6.957497 0.000000 12331 +assumefamiliar 0 1 6.957497 0.000000 12332 +mainperform 0 1 6.957497 0.000000 12333 +timeof 0 1 6.957497 0.000000 12334 +smessag 0 1 6.957497 0.000000 12335 +theu 0 1 6.957497 0.000000 12336 +themeiko 0 1 6.957497 0.000000 12337 +thehpam 0 1 6.957497 0.000000 12338 +fddi 0 1 6.957497 0.000000 12339 +theparagon 0 1 6.957497 0.000000 12340 +thesp 0 1 6.957497 0.000000 12341 +networksus 0 1 6.957497 0.000000 12342 +anyndia 0 1 6.957497 0.000000 12343 +ascompar 0 1 6.957497 0.000000 12344 +anatm 0 1 6.957497 0.000000 12345 +systemsoftwar 0 1 6.957497 0.000000 12346 +streamcommun 0 1 6.957497 0.000000 12347 +flowcontrol 0 1 6.957497 0.000000 12348 +builtfrom 0 1 6.957497 0.000000 12349 +artmultiprocessor 0 1 6.957497 0.000000 12350 +systemcoordin 0 1 6.957497 0.000000 12351 +andrequir 0 1 6.957497 0.000000 12352 +clusterinterconnect 0 1 6.957497 0.000000 12353 +showappl 0 1 6.957497 0.000000 12354 +smallmessag 0 1 6.957497 0.000000 12355 +messagesimplement 0 1 6.957497 0.000000 12356 +abstractth 0 1 6.957497 0.000000 12357 +overlapcomput 0 1 6.957497 0.000000 12358 +sacrificingprocessor 0 1 6.957497 0.000000 12359 +passingmultiprocessor 0 1 6.957497 0.000000 12360 +researchprototyp 0 1 6.957497 0.000000 12361 +communicationoverhead 0 1 6.957497 0.000000 12362 +simplecommun 0 1 6.957497 0.000000 12363 +isintrins 0 1 6.957497 0.000000 12364 +thehardwar 0 1 6.957497 0.000000 12365 +ncube 0 1 6.957497 0.000000 12366 +memoryextens 0 1 6.957497 0.000000 12367 +messagesar 0 1 6.957497 0.000000 12368 +forwhich 0 1 6.957497 0.000000 12369 +hardwaresupport 0 1 6.957497 0.000000 12370 +ofenhanc 0 1 6.957497 0.000000 12371 +efficientcommun 0 1 6.957497 0.000000 12372 +sitesact 0 1 6.957497 0.000000 12373 +messagesin 0 1 6.957497 0.000000 12374 +projectfor 0 1 6.957497 0.000000 12375 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..0968fb0a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +applic 2 170 1.791759 3.583518 56 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +network 0 168 1.791759 0.000000 61 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +user 1 104 2.302585 2.302585 137 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +commun 1 95 2.397895 2.397895 157 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +wide 1 84 2.484907 2.484907 185 +chang 1 82 2.484907 2.484907 163 +resourc 0 81 2.484907 0.000000 172 +view 0 70 2.708050 0.000000 254 +would 0 67 2.708050 0.000000 251 +integr 0 67 2.708050 0.000000 245 +plan 1 65 2.772589 2.772589 272 +virtual 1 62 2.772589 2.772589 285 +interact 0 62 2.772589 0.000000 270 +creat 0 63 2.772589 0.000000 277 +dept 0 64 2.772589 0.000000 291 +space 1 57 2.890372 2.890372 310 +sever 0 56 2.890372 0.000000 322 +explor 0 58 2.890372 0.000000 324 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +variou 0 56 2.890372 0.000000 317 +approach 0 48 3.044522 0.000000 366 +featur 1 46 3.091042 3.091042 386 +electron 0 47 3.091042 0.000000 379 +possibl 0 47 3.091042 0.000000 378 +offer 0 43 3.178054 0.000000 414 +futur 1 41 3.218876 3.218876 427 +combin 0 42 3.218876 0.000000 421 +might 0 41 3.218876 0.000000 426 +autom 0 41 3.218876 0.000000 434 +howev 0 41 3.218876 0.000000 422 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +expect 0 37 3.332205 0.000000 484 +multi 0 36 3.367296 0.000000 493 +manual 0 35 3.401197 0.000000 504 +next 0 34 3.401197 0.000000 517 +toler 1 33 3.433987 3.433987 533 +within 0 33 3.433987 0.000000 525 +fault 1 32 3.465736 3.465736 547 +extend 0 32 3.465736 0.000000 539 +rang 1 30 3.555348 3.555348 565 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +compon 0 30 3.555348 0.000000 570 +secur 0 30 3.555348 0.000000 577 +becom 0 28 3.610918 0.000000 603 +hope 0 28 3.610918 0.000000 610 +effort 1 26 3.688879 3.688879 652 +enhanc 0 26 3.688879 0.000000 644 +reliabl 0 25 3.737670 0.000000 674 +higher 0 24 3.761200 0.000000 690 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +varieti 0 22 3.850148 0.000000 740 +thu 1 21 3.912023 3.912023 773 +similar 0 21 3.912023 0.000000 771 +flexibl 0 21 3.912023 0.000000 792 +among 0 21 3.912023 0.000000 781 +toolkit 0 20 3.951244 0.000000 835 +media 1 19 4.007333 4.007333 861 +element 0 18 4.060443 0.000000 895 +speed 0 18 4.060443 0.000000 911 +failur 0 18 4.060443 0.000000 898 +commerci 0 16 4.174387 0.000000 1005 +latenc 0 16 4.174387 0.000000 993 +stock 0 16 4.174387 0.000000 1007 +permit 0 16 4.174387 0.000000 962 +remot 1 15 4.248495 4.248495 1041 +novel 0 15 4.248495 0.000000 1039 +capabl 0 15 4.248495 0.000000 1016 +transit 0 15 4.248495 0.000000 1046 +action 0 15 4.248495 0.000000 1038 +horu 2 14 4.317488 8.634976 1116 +demand 1 14 4.317488 4.317488 1073 +floor 0 14 4.317488 0.000000 1070 +signific 0 13 4.382027 0.000000 1125 +coordin 0 13 4.382027 0.000000 1182 +kenneth 0 12 4.465908 0.000000 1265 +branch 1 11 4.553877 4.553877 1318 +reness 0 11 4.553877 0.000000 1333 +extrem 0 11 4.553877 0.000000 1330 +market 0 11 4.553877 0.000000 1361 +impact 0 11 4.553877 0.000000 1334 +prior 1 10 4.653960 4.653960 1438 +success 1 10 4.653960 4.653960 1390 +traffic 0 10 4.653960 0.000000 1421 +birman 0 9 4.753590 0.000000 1531 +robbert 0 9 4.753590 0.000000 1529 +telecommun 0 9 4.753590 0.000000 1565 +govern 0 9 4.753590 0.000000 1581 +occur 0 9 4.753590 0.000000 1572 +manufactur 0 8 4.875197 0.000000 1634 +illustr 0 8 4.875197 0.000000 1679 +synchroni 0 7 5.010635 0.000000 1923 +ground 0 7 5.010635 0.000000 1955 +privaci 0 6 5.164786 0.000000 2144 +benefit 0 6 5.164786 0.000000 2213 +isi 2 5 5.347108 10.694216 2443 +matur 0 5 5.347108 0.000000 2269 +isth 0 5 5.347108 0.000000 2532 +licens 0 5 5.347108 0.000000 2520 +mission 0 5 5.347108 0.000000 2465 +respond 0 5 5.347108 0.000000 2354 +substanti 0 4 5.568345 0.000000 2921 +visibl 0 4 5.568345 0.000000 2994 +naval 0 4 5.568345 0.000000 2920 +rapidli 0 4 5.568345 0.000000 2850 +militari 1 3 5.857933 5.857933 3326 +reconfigur 0 3 5.857933 0.000000 3556 +ofhoru 0 2 6.263398 0.000000 5181 +offersa 0 2 6.263398 0.000000 4071 +securityand 0 2 6.263398 0.000000 5066 +retain 0 2 6.263398 0.000000 5443 +basedcommun 0 2 6.263398 0.000000 4348 +stratu 0 2 6.263398 0.000000 5345 +isdescrib 0 2 6.263398 0.000000 5444 +groupwar 0 2 6.263398 0.000000 4857 +theatr 0 2 6.263398 0.000000 5173 +environmenthoru 0 1 6.957497 0.000000 12408 +shoru 0 1 6.957497 0.000000 12409 +reliabledistribut 0 1 6.957497 0.000000 12410 +demonstrategroupwar 0 1 6.957497 0.000000 12411 +foundto 0 1 6.957497 0.000000 12412 +synchronousprocess 0 1 6.957497 0.000000 12413 +importantresearch 0 1 6.957497 0.000000 12414 +performancer 0 1 6.957497 0.000000 12415 +calledact 0 1 6.957497 0.000000 12416 +messageswith 0 1 6.957497 0.000000 12417 +playbacksystem 0 1 6.957497 0.000000 12418 +calledcontinu 0 1 6.957497 0.000000 12419 +multimediaserv 0 1 6.957497 0.000000 12420 +telemedicin 0 1 6.957497 0.000000 12421 +videoon 0 1 6.957497 0.000000 12422 +andsecur 0 1 6.957497 0.000000 12423 +expectrapid 0 1 6.957497 0.000000 12424 +uptak 0 1 6.957497 0.000000 12425 +spana 0 1 6.957497 0.000000 12426 +financialtrad 0 1 6.957497 0.000000 12427 +factori 0 1 6.957497 0.000000 12428 +fordiscret 0 1 6.957497 0.000000 12429 +beingexplor 0 1 6.957497 0.000000 12430 +othernon 0 1 6.957497 0.000000 12431 +hiper 0 1 6.957497 0.000000 12432 +systemthat 0 1 6.957497 0.000000 12433 +aegi 0 1 6.957497 0.000000 12434 +battleradar 0 1 6.957497 0.000000 12435 +benefitfrom 0 1 6.957497 0.000000 12436 +migrateisi 0 1 6.957497 0.000000 12437 +communityin 0 1 6.957497 0.000000 12438 +agreementswith 0 1 6.957497 0.000000 12439 +subsidiari 0 1 6.957497 0.000000 12440 +mixtur 0 1 6.957497 0.000000 12441 +technologieswil 0 1 6.957497 0.000000 12442 +beseen 0 1 6.957497 0.000000 12443 +belowshow 0 1 6.957497 0.000000 12444 +andus 0 1 6.957497 0.000000 12445 +asset 0 1 6.957497 0.000000 12446 +thissort 0 1 6.957497 0.000000 12447 +utmost 0 1 6.957497 0.000000 12448 +whilealso 0 1 6.957497 0.000000 12449 +civilianand 0 1 6.957497 0.000000 12450 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..73192106 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +group 1 183 1.609438 1.609438 36 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +data 0 170 1.791759 0.000000 49 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +architectur 0 139 1.945910 0.000000 77 +high 1 130 2.079442 2.079442 101 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +user 1 104 2.302585 2.302585 137 +access 0 102 2.302585 0.000000 136 +commun 1 95 2.397895 2.397895 157 +level 1 87 2.484907 2.484907 180 +build 0 85 2.484907 0.000000 184 +multimedia 1 68 2.708050 2.708050 258 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +improv 1 62 2.772589 2.772589 289 +approach 0 48 3.044522 0.000000 366 +adapt 0 46 3.091042 0.000000 387 +video 1 44 3.135494 3.135494 405 +combin 1 42 3.218876 3.218876 421 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +secur 0 30 3.555348 0.000000 577 +platform 0 29 3.583519 0.000000 591 +cluster 0 28 3.610918 0.000000 612 +reliabl 1 25 3.737670 3.737670 674 +store 0 24 3.761200 0.000000 693 +flexibl 0 21 3.912023 0.000000 792 +fund 0 21 3.912023 0.000000 805 +toolkit 1 20 3.951244 3.951244 835 +portabl 0 20 3.951244 0.000000 819 +smith 0 20 3.951244 0.000000 820 +media 0 19 4.007333 0.000000 861 +critic 0 16 4.174387 0.000000 982 +commerci 0 16 4.174387 0.000000 1005 +horu 1 14 4.317488 4.317488 1116 +audio 0 14 4.317488 0.000000 1094 +eicken 0 13 4.382027 0.000000 1134 +primit 0 11 4.553877 0.000000 1317 +facilit 1 10 4.653960 4.653960 1412 +rapid 0 10 4.653960 0.000000 1453 +strength 0 9 4.753590 0.000000 1494 +transport 0 8 4.875197 0.000000 1672 +fromth 0 7 5.010635 0.000000 1802 +contract 0 6 5.164786 0.000000 1985 +testb 0 5 5.347108 0.000000 2456 +darpa 0 4 5.568345 0.000000 2944 +dramat 0 3 5.857933 0.000000 3239 +magnitud 0 3 5.857933 0.000000 3582 +militari 0 3 5.857933 0.000000 3326 +multimediaappl 0 3 5.857933 0.000000 3274 +todevelop 0 2 6.263398 0.000000 5448 +communicationprimit 0 2 6.263398 0.000000 5449 +thorstenvon 0 2 6.263398 0.000000 5450 +medianet 1 1 6.957497 6.957497 12468 +projectmedianet 0 1 6.957497 0.000000 12469 +protocolsth 0 1 6.957497 0.000000 12470 +communicationmak 0 1 6.957497 0.000000 12471 +foradvanc 0 1 6.957497 0.000000 12472 +includeaudio 0 1 6.957497 0.000000 12473 +technologyofficefor 0 1 6.957497 0.000000 12474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..623b9122 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +introduct 0 126 2.079442 0.000000 87 +document 0 121 2.079442 0.000000 89 +theori 1 111 2.197225 2.197225 127 +user 0 104 2.302585 0.000000 137 +mani 0 92 2.397895 0.000000 150 +help 0 83 2.484907 0.000000 175 +main 0 67 2.708050 0.000000 256 +written 0 63 2.772589 0.000000 278 +browser 1 56 2.890372 2.890372 313 +reason 0 57 2.890372 0.000000 318 +index 0 56 2.890372 0.000000 309 +suggest 0 53 2.944439 0.000000 331 +autom 0 41 3.218876 0.000000 434 +announc 0 40 3.258097 0.000000 441 +articl 1 33 3.433987 3.433987 530 +linux 0 27 3.637586 0.000000 631 +theorem 0 21 3.912023 0.000000 786 +feedback 0 19 4.007333 0.000000 854 +nuprl 2 10 4.653960 9.307920 1402 +curiou 0 5 5.347108 0.000000 2541 +vaughn 0 1 6.957497 0.000000 12475 +askaltavista 0 1 6.957497 0.000000 12476 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..6fd02498 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +file 2 132 1.945910 3.891820 70 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +like 0 132 1.945910 0.000000 81 +assign 0 135 1.945910 0.000000 66 +process 0 142 1.945910 0.000000 72 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +make 0 111 2.197225 0.000000 120 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +commun 1 95 2.397895 2.397895 157 +follow 1 92 2.397895 2.397895 143 +call 0 91 2.397895 0.000000 153 +center 0 88 2.397895 0.000000 158 +section 0 94 2.397895 0.000000 149 +mani 0 92 2.397895 0.000000 150 +activ 1 84 2.484907 2.484907 182 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +contain 0 81 2.484907 0.000000 174 +messag 2 76 2.564949 5.129898 212 +exampl 1 77 2.564949 2.564949 195 +sourc 1 77 2.564949 2.564949 201 +want 0 79 2.564949 0.000000 199 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +addit 0 74 2.639057 0.000000 228 +main 0 67 2.708050 0.000000 256 +experi 1 64 2.772589 2.772589 283 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +locat 1 59 2.833213 2.833213 303 +type 1 61 2.833213 2.833213 296 +simpl 0 60 2.833213 0.000000 298 +space 0 57 2.890372 0.000000 310 +variou 0 56 2.890372 0.000000 317 +local 1 55 2.944439 2.944439 334 +found 1 53 2.944439 2.944439 337 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +sampl 0 53 2.944439 0.000000 339 +processor 0 54 2.944439 0.000000 335 +hardwar 0 51 2.995732 0.000000 350 +run 0 51 2.995732 0.000000 347 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +directori 1 45 3.135494 3.135494 396 +execut 1 45 3.135494 3.135494 404 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +must 0 40 3.258097 0.000000 442 +continu 0 39 3.258097 0.000000 448 +open 0 38 3.295837 0.000000 469 +return 1 34 3.401197 3.401197 502 +global 0 34 3.401197 0.000000 520 +word 0 34 3.401197 0.000000 508 +built 0 29 3.583519 0.000000 592 +releas 0 28 3.610918 0.000000 616 +pass 0 28 3.610918 0.000000 611 +though 0 27 3.637586 0.000000 622 +proc 0 26 3.688879 0.000000 649 +frame 0 24 3.761200 0.000000 684 +displai 0 23 3.806662 0.000000 712 +instal 1 22 3.850148 3.850148 754 +instead 0 22 3.850148 0.000000 756 +path 0 21 3.912023 0.000000 778 +portabl 0 20 3.951244 0.000000 819 +sure 0 20 3.951244 0.000000 813 +debug 1 17 4.110874 4.110874 944 +regular 0 17 4.110874 0.000000 929 +stop 0 17 4.110874 0.000000 942 +layer 0 17 4.110874 0.000000 926 +remot 1 15 4.248495 4.248495 1041 +configur 0 15 4.248495 0.000000 1012 +overhead 0 15 4.248495 0.000000 1035 +fortran 0 15 4.248495 0.000000 1027 +split 2 14 4.317488 8.634976 1078 +command 1 14 4.317488 4.317488 1083 +matlab 0 14 4.317488 0.000000 1081 +script 1 13 4.382027 4.382027 1171 +step 0 13 4.382027 0.000000 1138 +emac 0 13 4.382027 0.000000 1143 +difficulti 0 13 4.382027 0.000000 1132 +remov 0 12 4.465908 0.000000 1225 +insid 0 12 4.465908 0.000000 1262 +characterist 0 12 4.465908 0.000000 1257 +replic 0 12 4.465908 0.000000 1231 +node 2 11 4.553877 9.107754 1326 +statement 1 11 4.553877 4.553877 1313 +eight 0 11 4.553877 0.000000 1331 +bandwidth 0 11 4.553877 0.000000 1365 +enter 0 10 4.653960 0.000000 1454 +stack 0 10 4.653960 0.000000 1389 +login 1 9 4.753590 4.753590 1550 +informationabout 0 9 4.753590 0.000000 1515 +readm 0 8 4.875197 0.000000 1699 +job 0 8 4.875197 0.000000 1702 +insert 0 8 4.875197 0.000000 1687 +round 0 8 4.875197 0.000000 1769 +header 1 7 5.010635 5.010635 1787 +hit 0 7 5.010635 0.000000 1965 +attach 0 7 5.010635 0.000000 1785 +exactli 0 7 5.010635 0.000000 1817 +usag 0 6 5.164786 0.000000 2209 +neither 0 6 5.164786 0.000000 1990 +phase 0 6 5.164786 0.000000 1977 +onto 0 6 5.164786 0.000000 2089 +proce 0 6 5.164786 0.000000 2114 +nativ 0 6 5.164786 0.000000 2192 +whichi 0 6 5.164786 0.000000 2056 +shell 1 5 5.347108 5.347108 2353 +overlap 0 5 5.347108 0.000000 2368 +theth 0 5 5.347108 0.000000 2325 +czar 0 5 5.347108 0.000000 2503 +cuc 1 4 5.568345 5.568345 2630 +makefil 1 4 5.568345 5.568345 2662 +spam 1 4 5.568345 5.568345 2927 +arch 0 4 5.568345 0.000000 2995 +forparallel 0 4 5.568345 0.000000 2703 +hide 0 4 5.568345 0.000000 2996 +commonli 0 4 5.568345 0.000000 2877 +asymptot 0 4 5.568345 0.000000 2676 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +andyou 0 3 5.857933 0.000000 3256 +setenv 1 2 6.263398 6.263398 4491 +haveth 0 2 6.263398 0.000000 5378 +properli 0 2 6.263398 0.000000 5454 +animplement 0 2 6.263398 0.000000 4931 +granita 1 1 6.957497 6.957497 12493 +bench 1 1 6.957497 6.957497 12494 +am_run 1 1 6.957497 6.957497 12495 +tcsh 1 1 6.957497 6.957497 12496 +bash 1 1 6.957497 6.957497 12497 +gmake 1 1 6.957497 6.957497 12498 +ampicc 1 1 6.957497 6.957497 12499 +granitathrough 0 1 6.957497 0.000000 12500 +asinteract 0 1 6.957497 0.000000 12501 +problemsdur 0 1 6.957497 0.000000 12502 +stufffrom 0 1 6.957497 0.000000 12503 +unam 0 1 6.957497 0.000000 12504 +manyou 0 1 6.957497 0.000000 12505 +infoexplor 0 1 6.957497 0.000000 12506 +commandsand 0 1 6.957497 0.000000 12507 +activemassag 0 1 6.957497 0.000000 12508 +peor 0 1 6.957497 0.000000 12509 +messagesor 0 1 6.957497 0.000000 12510 +homegrown 0 1 6.957497 0.000000 12511 +softwarein 0 1 6.957497 0.000000 12512 +besur 0 1 6.957497 0.000000 12513 +csplit 0 1 6.957497 0.000000 12514 +globalpoint 0 1 6.957497 0.000000 12515 +dereferenc 0 1 6.957497 0.000000 12516 +latencyof 0 1 6.957497 0.000000 12517 +shellsshould 0 1 6.957497 0.000000 12518 +asact 0 1 6.957497 0.000000 12519 +scriptsloc 0 1 6.957497 0.000000 12520 +programfoo 0 1 6.957497 0.000000 12521 +foodebug 0 1 6.957497 0.000000 12522 +splitc_debug 0 1 6.957497 0.000000 12523 +aftersplitc_main 0 1 6.957497 0.000000 12524 +ongranita 0 1 6.957497 0.000000 12525 +youwant 0 1 6.957497 0.000000 12526 +thenattach 0 1 6.957497 0.000000 12527 +breakpoint 0 1 6.957497 0.000000 12528 +messagesact 0 1 6.957497 0.000000 12529 +layerthat 0 1 6.957497 0.000000 12530 +triplat 0 1 6.957497 0.000000 12531 +libspgam 0 1 6.957497 0.000000 12532 +aand 0 1 6.957497 0.000000 12533 +beforerun 0 1 6.957497 0.000000 12534 +runningprgm 0 1 6.957497 0.000000 12535 +mpimpi 0 1 6.957497 0.000000 12536 +popularmessag 0 1 6.957497 0.000000 12537 +mpich 0 1 6.957497 0.000000 12538 +overact 0 1 6.957497 0.000000 12539 +easiest 0 1 6.957497 0.000000 12540 +fooyou 0 1 6.957497 0.000000 12541 +lookat 0 1 6.957497 0.000000 12542 +examplesin 0 1 6.957497 0.000000 12543 +ampi 0 1 6.957497 0.000000 12544 +likeordinari 0 1 6.957497 0.000000 12545 +softwaresoftwar 0 1 6.957497 0.000000 12546 +xpdbx 0 1 6.957497 0.000000 12547 +bison 0 1 6.957497 0.000000 12548 +problemsif 0 1 6.957497 0.000000 12549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..6c41b18d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +model 1 145 1.945910 1.945910 69 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +version 1 113 2.197225 2.197225 122 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +peopl 0 96 2.302585 0.000000 132 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +commun 0 95 2.397895 0.000000 157 +activ 0 84 2.484907 0.000000 182 +environ 0 84 2.484907 0.000000 177 +method 0 80 2.564949 0.000000 213 +simul 1 66 2.708050 2.708050 255 +creat 1 63 2.772589 2.772589 277 +overview 0 56 2.890372 0.000000 323 +direct 0 57 2.890372 0.000000 316 +scientif 1 53 2.944439 2.944439 341 +hardwar 0 51 2.995732 0.000000 350 +numer 0 49 3.044522 0.000000 369 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +discuss 0 45 3.135494 0.000000 399 +paul 0 38 3.295837 0.000000 471 +collabor 1 32 3.465736 3.465736 543 +transform 0 32 3.465736 0.000000 542 +richard 0 31 3.496508 0.000000 559 +compon 0 30 3.555348 0.000000 570 +semant 0 29 3.583519 0.000000 587 +propos 0 28 3.610918 0.000000 602 +effort 1 26 3.688879 3.688879 652 +defin 0 22 3.850148 0.000000 746 +geometri 0 22 3.850148 0.000000 752 +reduc 0 22 3.850148 0.000000 759 +qualiti 0 20 3.951244 0.000000 832 +longer 0 20 3.951244 0.000000 816 +particularli 0 19 4.007333 0.000000 867 +geometr 0 19 4.007333 0.000000 852 +exercis 0 19 4.007333 0.000000 842 +brief 0 16 4.174387 0.000000 1001 +topolog 0 14 4.317488 0.000000 1089 +mesh 0 11 4.553877 0.000000 1351 +guarante 0 10 4.653960 0.000000 1391 +rais 0 8 4.875197 0.000000 1711 +manufactur 0 8 4.875197 0.000000 1634 +thegoal 0 6 5.164786 0.000000 2033 +proce 0 6 5.164786 0.000000 2114 +synthes 0 5 5.347108 0.000000 2451 +weyl 0 4 5.568345 0.000000 2854 +substrat 0 4 5.568345 0.000000 2857 +nist 0 4 5.568345 0.000000 2973 +zippel 0 4 5.568345 0.000000 2879 +theus 0 4 5.568345 0.000000 2992 +rick 0 4 5.568345 0.000000 2646 +chew 1 3 5.857933 5.857933 3618 +enorm 0 3 5.857933 0.000000 3431 +expend 0 2 6.263398 0.000000 5451 +scientificsoftwar 0 2 6.263398 0.000000 5038 +andform 0 2 6.263398 0.000000 4274 +levelprogram 0 2 6.263398 0.000000 5452 +insystem 0 2 6.263398 0.000000 4172 +ideason 0 2 6.263398 0.000000 4469 +microstorag 0 2 6.263398 0.000000 4887 +palmer 0 2 6.263398 0.000000 5453 +simlab 1 1 6.957497 6.957497 12477 +oncomplex 0 1 6.957497 0.000000 12478 +bringingtogeth 0 1 6.957497 0.000000 12479 +symbolicmathemat 0 1 6.957497 0.000000 12480 +levelat 0 1 6.957497 0.000000 12481 +softwarepackag 0 1 6.957497 0.000000 12482 +microstoragearchitectur 0 1 6.957497 0.000000 12483 +computeralgebra 0 1 6.957497 0.000000 12484 +thechain 0 1 6.957497 0.000000 12485 +thearpa 0 1 6.957497 0.000000 12486 +madefast 0 1 6.957497 0.000000 12487 +ofnon 0 1 6.957497 0.000000 12488 +contemporan 0 1 6.957497 0.000000 12489 +chainsprogram 0 1 6.957497 0.000000 12490 +complextopolog 0 1 6.957497 0.000000 12491 +numericalalgorithm 0 1 6.957497 0.000000 12492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..cfc70e95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +cornel 1 215 1.386294 1.386294 23 +public 0 202 1.609438 0.000000 43 +contact 0 153 1.791759 0.000000 59 +code 1 108 2.197225 2.197225 116 +memori 0 101 2.302585 0.000000 139 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +sourc 1 77 2.564949 2.564949 201 +share 0 59 2.833213 0.000000 304 +releas 1 28 3.610918 3.610918 616 +multiprocessor 0 28 3.610918 0.000000 605 +berkelei 0 26 3.688879 0.000000 657 +supercomput 0 25 3.737670 0.000000 681 +prepar 1 20 3.951244 3.951244 824 +split 2 14 4.317488 8.634976 1078 +eicken 1 13 4.382027 4.382027 1134 +thorsten 0 13 4.382027 0.000000 1133 +chao 0 8 4.875197 0.000000 1753 +goldstein 0 6 5.164786 0.000000 2168 +culler 0 5 5.347108 0.000000 2381 +spam 0 4 5.568345 0.000000 2927 +grzegorz 0 4 5.568345 0.000000 2923 +czajkowski 0 4 5.568345 0.000000 2924 +neta 0 3 5.857933 0.000000 3789 +dusseau 0 3 5.857933 0.000000 3382 +yelick 0 3 5.857933 0.000000 3374 +thorstenvon 0 2 6.263398 0.000000 5450 +multiprocessorsa 0 2 6.263398 0.000000 5455 +krishnamurthi 0 2 6.263398 0.000000 5408 +lumetta 0 2 6.263398 0.000000 5409 +contactthorsten 0 2 6.263398 0.000000 5438 +ccornel 0 1 6.957497 0.000000 12550 +implementationssplit 0 1 6.957497 0.000000 12551 +isimpl 0 1 6.957497 0.000000 12552 +messagesfor 0 1 6.957497 0.000000 12553 +ofsplit 0 1 6.957497 0.000000 12554 +distr 0 1 6.957497 0.000000 12555 +implementedon 0 1 6.957497 0.000000 12556 +contactchi 0 1 6.957497 0.000000 12557 +runningsolari 0 1 6.957497 0.000000 12558 +mattwelsh 0 1 6.957497 0.000000 12559 +cparallel 0 1 6.957497 0.000000 12560 +abstractproject 0 1 6.957497 0.000000 12561 +sitessplit 0 1 6.957497 0.000000 12562 +chome 0 1 6.957497 0.000000 12563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..39d5cfb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +cornel 0 215 1.386294 0.000000 23 +second 0 81 2.484907 0.000000 166 +html 0 75 2.639057 0.000000 235 +browser 0 56 2.890372 0.000000 313 +move 0 47 3.091042 0.000000 382 +http 0 41 3.218876 0.000000 420 +default 0 5 5.347108 0.000000 2335 +redirect 0 1 6.957497 0.000000 12564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..dbb4648e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,221 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +cornel 2 215 1.386294 2.772588 23 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +public 0 202 1.609438 0.000000 43 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +professor 1 137 1.945910 1.945910 76 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +intern 1 108 2.197225 2.197225 128 +pleas 0 113 2.197225 0.000000 114 +theori 0 111 2.197225 0.000000 127 +structur 0 106 2.197225 0.000000 105 +assist 0 112 2.197225 0.000000 113 +technic 1 100 2.302585 2.302585 140 +proceed 1 93 2.397895 2.397895 152 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +mani 1 92 2.397895 2.397895 150 +associ 1 93 2.397895 2.397895 151 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +ieee 1 86 2.484907 2.484907 190 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +server 1 76 2.564949 2.564949 204 +sourc 0 77 2.564949 0.000000 201 +master 0 76 2.564949 0.000000 216 +dynam 0 76 2.564949 0.000000 194 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +laboratori 1 63 2.772589 2.772589 292 +prof 1 64 2.772589 2.772589 273 +foundat 1 62 2.772589 2.772589 286 +ithaca 0 65 2.772589 0.000000 294 +complex 0 64 2.772589 0.000000 269 +artifici 0 63 2.772589 0.000000 280 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +abstract 0 62 2.772589 0.000000 276 +automat 1 61 2.833213 2.833213 306 +locat 0 59 2.833213 0.000000 303 +type 0 61 2.833213 0.000000 296 +direct 1 57 2.890372 2.890372 316 +browser 0 56 2.890372 0.000000 313 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +three 0 54 2.944439 0.000000 330 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +approach 0 48 3.044522 0.000000 366 +done 1 47 3.091042 3.091042 381 +move 1 47 3.091042 3.091042 382 +video 1 44 3.135494 3.135494 405 +discuss 0 45 3.135494 0.000000 399 +mechan 1 43 3.178054 3.178054 416 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +submit 0 39 3.258097 0.000000 440 +author 0 39 3.258097 0.000000 450 +microsoft 0 38 3.295837 0.000000 468 +robot 2 36 3.367296 6.734592 497 +global 0 34 3.401197 0.000000 520 +tech 0 35 3.401197 0.000000 515 +queri 0 33 3.433987 0.000000 524 +extend 0 32 3.465736 0.000000 539 +titl 0 31 3.496508 0.000000 556 +hard 0 30 3.555348 0.000000 563 +full 0 28 3.610918 0.000000 615 +cluster 0 28 3.610918 0.000000 612 +multiprocessor 0 28 3.610918 0.000000 605 +hope 0 28 3.610918 0.000000 610 +pass 0 28 3.610918 0.000000 611 +manipul 1 27 3.637586 3.637586 624 +arrai 0 27 3.637586 0.000000 627 +proc 1 26 3.688879 3.688879 649 +revis 1 26 3.688879 3.688879 640 +detect 0 26 3.688879 0.000000 646 +task 0 25 3.737670 0.000000 678 +greg 1 24 3.761200 3.761200 695 +motion 0 24 3.761200 0.000000 699 +frame 0 24 3.761200 0.000000 684 +thank 0 23 3.806662 0.000000 721 +mobil 0 23 3.806662 0.000000 730 +break 1 20 3.951244 3.951244 812 +supervis 0 20 3.951244 0.000000 840 +mpeg 0 20 3.951244 0.000000 831 +scheme 0 20 3.951244 0.000000 818 +boston 0 19 4.007333 0.000000 862 +scott 0 18 4.060443 0.000000 884 +miller 1 17 4.110874 4.110874 949 +match 1 16 4.174387 4.174387 965 +diego 1 16 4.174387 4.174387 992 +micro 1 15 4.248495 4.248495 1031 +configur 0 15 4.248495 0.000000 1012 +massiv 0 15 4.248495 0.000000 1026 +scene 0 14 4.317488 0.000000 1114 +split 0 14 4.317488 0.000000 1078 +anonym 0 14 4.317488 0.000000 1100 +canada 0 13 4.382027 0.000000 1158 +daniel 1 12 4.465908 4.465908 1233 +bruce 1 12 4.465908 4.465908 1226 +franc 0 12 4.465908 0.000000 1276 +tour 0 11 4.553877 0.000000 1307 +peter 0 11 4.553877 0.000000 1316 +keyword 0 11 4.553877 0.000000 1356 +donald 2 9 4.753590 9.507180 1510 +transmiss 0 9 4.753590 0.000000 1588 +juan 0 9 4.753590 0.000000 1580 +wilson 0 9 4.753590 0.000000 1536 +invari 1 8 4.875197 4.875197 1748 +potenti 0 8 4.875197 0.000000 1690 +japan 0 8 4.875197 0.000000 1762 +siggraph 0 8 4.875197 0.000000 1773 +justin 1 7 5.010635 5.010635 1789 +ramin 1 7 5.010635 5.010635 1820 +sensor 0 7 5.010635 0.000000 1920 +zabih 1 6 5.164786 5.164786 2138 +huttenloch 1 6 5.164786 5.164786 1983 +actuat 0 5 5.347108 0.000000 2442 +solid 0 5 5.347108 0.000000 2255 +symmetr 0 4 5.568345 0.000000 2908 +publicationsth 0 4 5.568345 0.000000 2859 +triangul 0 4 5.568345 0.000000 2903 +csrvl 1 3 5.857933 5.857933 3543 +bhringer 1 3 5.857933 5.857933 3606 +microfabr 1 3 5.857933 5.857933 3610 +jen 0 3 5.857933 0.000000 3378 +ryan 0 3 5.857933 0.000000 3679 +michel 0 3 5.857933 0.000000 3791 +szewczyk 0 3 5.857933 0.000000 3108 +voskuhl 0 3 5.857933 0.000000 3109 +matt 0 3 5.857933 0.000000 3792 +electro 1 2 6.263398 6.263398 5014 +mihailovich 1 2 6.263398 6.263398 5018 +macdonald 1 2 6.263398 6.263398 5006 +laboratorywelcom 0 2 6.263398 0.000000 5439 +mem 0 2 6.263398 0.000000 5007 +andclassif 0 2 6.263398 0.000000 5390 +windowsnt 0 2 6.263398 0.000000 5440 +ree 0 2 6.263398 0.000000 5009 +automationnic 0 2 6.263398 0.000000 5019 +homolog 0 2 6.263398 0.000000 5441 +andj 0 2 6.263398 0.000000 5020 +latomb 0 2 6.263398 0.000000 5021 +brigg 0 2 6.263398 0.000000 5008 +actuatorarrai 0 2 6.263398 0.000000 5017 +hing 0 2 6.263398 0.000000 5442 +montral 0 2 6.263398 0.000000 5394 +cytacki 0 2 6.263398 0.000000 5330 +aaron 0 2 6.263398 0.000000 4438 +csrvlcornel 0 1 6.957497 0.000000 12376 +nich 0 1 6.957497 0.000000 12377 +rrentli 0 1 6.957497 0.000000 12378 +ofresearch 0 1 6.957497 0.000000 12379 +pictor 0 1 6.957497 0.000000 12380 +projectsth 0 1 6.957497 0.000000 12381 +byramin 0 1 6.957497 0.000000 12382 +allowingscen 0 1 6.957497 0.000000 12383 +onplatform 0 1 6.957497 0.000000 12384 +nynet 0 1 6.957497 0.000000 12385 +foru 0 1 6.957497 0.000000 12386 +sproject 0 1 6.957497 0.000000 12387 +currentlyconsid 0 1 6.957497 0.000000 12388 +theissuesher 0 1 6.957497 0.000000 12389 +thecsrvl 0 1 6.957497 0.000000 12390 +serverar 0 1 6.957497 0.000000 12391 +sensorless 0 1 6.957497 0.000000 12392 +oiso 0 1 6.957497 0.000000 12393 +micromechan 0 1 6.957497 0.000000 12394 +quebc 0 1 6.957497 0.000000 12395 +authorthes 0 1 6.957497 0.000000 12396 +pedro 0 1 6.957497 0.000000 12397 +felzenszwalb 0 1 6.957497 0.000000 12398 +lilien 0 1 6.957497 0.000000 12399 +maharbiz 0 1 6.957497 0.000000 12400 +scharstein 0 1 6.957497 0.000000 12401 +stump 0 1 6.957497 0.000000 12402 +fernando 0 1 6.957497 0.000000 12403 +viton 0 1 6.957497 0.000000 12404 +wayt 0 1 6.957497 0.000000 12405 +welsh 0 1 6.957497 0.000000 12406 +whelan 0 1 6.957497 0.000000 12407 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..a1609b28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +cornel 1 215 1.386294 1.386294 23 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +group 1 183 1.609438 1.609438 36 +fall 0 181 1.609438 0.000000 40 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +construct 1 139 1.945910 1.945910 82 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +structur 1 106 2.197225 2.197225 105 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +manag 0 114 2.197225 0.000000 125 +access 1 102 2.302585 2.302585 136 +user 0 104 2.302585 0.000000 137 +text 0 98 2.302585 0.000000 133 +technic 0 100 2.302585 0.000000 140 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +build 0 85 2.484907 0.000000 184 +server 0 76 2.564949 0.000000 204 +materi 1 75 2.639057 2.639057 221 +onlin 0 75 2.639057 0.000000 223 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +order 0 69 2.708050 0.000000 249 +test 0 66 2.708050 0.000000 252 +knowledg 0 67 2.708050 0.000000 243 +collect 1 65 2.772589 2.772589 268 +locat 0 59 2.833213 0.000000 303 +overview 0 56 2.890372 0.000000 323 +found 0 53 2.944439 0.000000 337 +undergradu 0 54 2.944439 0.000000 338 +digit 0 52 2.995732 0.000000 348 +form 0 39 3.258097 0.000000 443 +john 0 33 3.433987 0.000000 532 +computersci 0 30 3.555348 0.000000 562 +determin 0 27 3.637586 0.000000 630 +consist 0 26 3.688879 0.000000 651 +util 0 21 3.912023 0.000000 774 +increas 0 20 3.951244 0.000000 829 +longer 0 20 3.951244 0.000000 816 +ever 0 19 4.007333 0.000000 872 +dean 0 14 4.317488 0.000000 1104 +captur 1 12 4.465908 4.465908 1232 +volum 0 11 4.553877 0.000000 1347 +explicit 0 9 4.753590 0.000000 1525 +extract 0 8 4.875197 0.000000 1728 +begun 0 5 5.347108 0.000000 2386 +allan 0 4 5.568345 0.000000 2849 +thisdocu 0 3 5.857933 0.000000 3336 +krafft 0 3 5.857933 0.000000 3638 +waysthat 0 2 6.263398 0.000000 5445 +tabular 0 2 6.263398 0.000000 4515 +informationag 0 2 6.263398 0.000000 5446 +hyperlink 0 2 6.263398 0.000000 5447 +oninform 0 2 6.263398 0.000000 4316 +projectinform 0 1 6.957497 0.000000 12451 +accessth 0 1 6.957497 0.000000 12452 +ofonlin 0 1 6.957497 0.000000 12453 +forhuman 0 1 6.957497 0.000000 12454 +hopcroft 0 1 6.957497 0.000000 12455 +davisin 0 1 6.957497 0.000000 12456 +researchextract 0 1 6.957497 0.000000 12457 +thestructur 0 1 6.957497 0.000000 12458 +extractinginform 0 1 6.957497 0.000000 12459 +collectionsof 0 1 6.957497 0.000000 12460 +nationwid 0 1 6.957497 0.000000 12461 +sciencetechn 0 1 6.957497 0.000000 12462 +moreaccess 0 1 6.957497 0.000000 12463 +toit 0 1 6.957497 0.000000 12464 +visitingscientist 0 1 6.957497 0.000000 12465 +jimdavi 0 1 6.957497 0.000000 12466 +jrdpublicationsjam 0 1 6.957497 0.000000 12467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..11b09b5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +project 0 340 1.098612 0.000000 18 +cornel 0 215 1.386294 0.000000 23 +softwar 0 220 1.386294 0.000000 30 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +develop 0 174 1.791759 0.000000 53 +peopl 0 96 2.302585 0.000000 132 +multimedia 1 68 2.708050 2.708050 258 +direct 0 57 2.890372 0.000000 316 +mission 0 5 5.347108 0.000000 2465 +zeno 0 3 5.857933 0.000000 3580 +potpourri 0 2 6.263398 0.000000 4547 +groupzeno 0 1 6.957497 0.000000 12565 +curricula 0 1 6.957497 0.000000 12566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..acab9e5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +topic 0 114 2.197225 0.000000 110 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +control 0 82 2.484907 0.000000 164 +stuff 0 87 2.484907 0.000000 171 +come 1 78 2.564949 2.564949 202 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +degre 1 69 2.708050 2.708050 259 +receiv 0 66 2.708050 0.000000 244 +ithaca 0 65 2.772589 0.000000 294 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +januari 0 62 2.772589 0.000000 264 +back 1 60 2.833213 2.833213 297 +date 0 51 2.995732 0.000000 344 +still 0 50 3.044522 0.000000 362 +california 0 46 3.091042 0.000000 388 +video 1 44 3.135494 3.135494 405 +long 0 43 3.178054 0.000000 413 +york 0 41 3.218876 0.000000 435 +vision 0 41 3.218876 0.000000 430 +compani 0 41 3.218876 0.000000 423 +robot 0 36 3.367296 0.000000 497 +game 0 36 3.367296 0.000000 498 +actual 0 28 3.610918 0.000000 604 +quit 0 27 3.637586 0.000000 633 +challeng 0 26 3.688879 0.000000 653 +hill 0 25 3.737670 0.000000 670 +frame 0 24 3.761200 0.000000 684 +motion 0 24 3.761200 0.000000 699 +viewer 0 21 3.912023 0.000000 787 +leav 0 21 3.912023 0.000000 772 +martin 0 21 3.912023 0.000000 794 +left 0 19 4.007333 0.000000 851 +miss 0 19 4.007333 0.000000 866 +wind 0 18 4.060443 0.000000 908 +bachelor 0 17 4.110874 0.000000 957 +segment 0 17 4.110874 0.000000 931 +jose 0 16 4.174387 0.000000 976 +month 0 15 4.248495 0.000000 1025 +mayb 0 15 4.248495 0.000000 1014 +enough 0 15 4.248495 0.000000 1040 +anywai 0 15 4.248495 0.000000 1047 +decid 0 14 4.317488 0.000000 1075 +land 0 12 4.465908 0.000000 1273 +meng 0 12 4.465908 0.000000 1214 +pagewelcom 0 11 4.553877 0.000000 1344 +see 0 11 4.553877 0.000000 1337 +island 0 11 4.553877 0.000000 1345 +santa 1 10 4.653960 4.653960 1441 +town 0 10 4.653960 0.000000 1458 +babylon 0 8 4.875197 0.000000 1731 +ramin 0 7 5.010635 0.000000 1820 +rain 1 6 5.164786 5.164786 2137 +snow 0 6 5.164786 0.000000 2031 +zabih 0 6 5.164786 0.000000 2138 +corp 0 6 5.164786 0.000000 2139 +green 0 4 5.568345 0.000000 2848 +barbara 1 3 5.857933 5.857933 3380 +csrvl 0 3 5.857933 0.000000 3543 +binghamton 0 3 5.857933 0.000000 3544 +season 0 2 6.263398 0.000000 4872 +syosset 0 1 6.957497 0.000000 9497 +californialockhe 0 1 6.957497 0.000000 9498 +yorkaltera 0 1 6.957497 0.000000 9499 +californiafun 0 1 6.957497 0.000000 9500 +domainvth 0 1 6.957497 0.000000 9501 +siteoth 0 1 6.957497 0.000000 9502 +worldcareermosaictop 0 1 6.957497 0.000000 9503 +kmai 0 1 6.957497 0.000000 9504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..383ec768 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +link 0 247 1.386294 0.000000 24 +public 1 202 1.609438 1.609438 43 +group 0 183 1.609438 0.000000 36 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +relat 0 139 1.945910 0.000000 68 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +peopl 1 96 2.302585 2.302585 132 +academ 0 82 2.484907 0.000000 178 +requir 0 81 2.484907 0.000000 167 +descript 0 64 2.772589 0.000000 271 +organ 0 65 2.772589 0.000000 265 +faculti 1 56 2.890372 2.890372 325 +overview 0 56 2.890372 0.000000 323 +talk 0 53 2.944439 0.000000 336 +visitor 0 49 3.044522 0.000000 371 +seminar 0 38 3.295837 0.000000 470 +staff 0 36 3.367296 0.000000 490 +utc 1 27 3.637586 3.637586 629 +alumni 0 21 3.912023 0.000000 807 +facil 0 20 3.951244 0.000000 814 +event 0 18 4.060443 0.000000 896 +catalog 0 10 4.653960 0.000000 1431 +upcom 0 8 4.875197 0.000000 1685 +calendar 0 8 4.875197 0.000000 1649 +recruit 0 6 5.164786 0.000000 2145 +admiss 0 4 5.568345 0.000000 2704 +pagegener 0 1 6.957497 0.000000 12567 +schedulespag 0 1 6.957497 0.000000 12568 +directoryth 0 1 6.957497 0.000000 12569 +universitywww 0 1 6.957497 0.000000 12570 +informationgrip 0 1 6.957497 0.000000 12571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..ae83cc58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +professor 0 137 1.945910 0.000000 76 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +check 0 115 2.197225 0.000000 118 +well 0 109 2.197225 0.000000 121 +associ 0 93 2.397895 0.000000 151 +journal 0 83 2.484907 0.000000 183 +learn 0 86 2.484907 0.000000 170 +exampl 0 77 2.564949 0.000000 195 +intellig 1 72 2.639057 2.639057 225 +servic 1 72 2.639057 2.639057 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +artifici 1 63 2.772589 2.772589 280 +previou 0 62 2.772589 0.000000 290 +automat 0 61 2.833213 0.000000 306 +index 0 56 2.890372 0.000000 309 +profession 0 51 2.995732 0.000000 345 +california 0 46 3.091042 0.000000 388 +third 0 43 3.178054 0.000000 412 +autom 1 41 3.218876 3.218876 434 +editor 0 41 3.218876 0.000000 433 +societi 0 40 3.258097 0.000000 456 +award 1 34 3.401197 3.401197 523 +next 0 34 3.401197 0.000000 517 +board 1 33 3.433987 3.433987 528 +profil 1 30 3.555348 3.555348 581 +chair 1 29 3.583519 3.583519 596 +focus 0 29 3.583519 0.000000 584 +american 0 27 3.637586 0.000000 634 +berkelei 0 26 3.688879 0.000000 657 +higher 0 24 3.761200 0.000000 690 +honor 0 23 3.806662 0.000000 729 +theorem 1 21 3.912023 3.912023 786 +prove 1 19 4.007333 4.007333 848 +citi 0 19 4.007333 0.000000 874 +researchmi 0 14 4.317488 0.000000 1119 +joint 1 13 4.382027 4.382027 1130 +lake 0 11 4.553877 0.000000 1373 +distinguish 0 11 4.553877 0.000000 1357 +utah 0 9 4.753590 0.000000 1585 +centenni 0 7 5.010635 0.000000 1967 +presid 0 6 5.164786 0.000000 2196 +heurist 0 6 5.164786 0.000000 2125 +emeritu 1 5 5.347108 5.347108 2544 +salt 0 5 5.347108 0.000000 2413 +analog 1 4 5.568345 5.568345 2875 +bledso 0 4 5.568345 0.000000 2999 +presentarea 0 4 5.568345 0.000000 3026 +artificialintellig 1 3 5.857933 5.857933 3608 +truste 1 3 5.857933 5.857933 3900 +alsointerest 0 3 5.857933 0.000000 3813 +donnel 0 2 6.263398 0.000000 5233 +mileston 0 2 6.263398 0.000000 4416 +jointconfer 0 2 6.263398 0.000000 5030 +woodrow 1 1 6.957497 6.957497 14287 +bledsoepet 0 1 6.957497 0.000000 14288 +americanmathemat 0 1 6.957497 0.000000 14289 +onartifici 0 1 6.957497 0.000000 14290 +interestautomat 0 1 6.957497 0.000000 14291 +theoremproof 0 1 6.957497 0.000000 14292 +levelplan 0 1 6.957497 0.000000 14293 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..33eb8d65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +softwar 1 220 1.386294 1.386294 30 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +oper 1 180 1.609438 1.609438 34 +parallel 2 169 1.791759 3.583518 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +confer 1 126 2.079442 2.079442 100 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +activ 0 84 2.484907 0.000000 182 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +august 1 66 2.708050 2.708050 257 +practic 0 70 2.708050 0.000000 246 +interact 0 62 2.772589 0.000000 270 +previou 0 62 2.772589 0.000000 290 +direct 1 57 2.890372 2.890372 316 +index 0 56 2.890372 0.000000 309 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +set 0 50 3.044522 0.000000 361 +york 0 41 3.218876 0.000000 435 +societi 0 40 3.258097 0.000000 456 +multipl 0 39 3.258097 0.000000 453 +formal 0 37 3.332205 0.000000 478 +committe 0 34 3.401197 0.000000 522 +concurr 0 34 3.401197 0.000000 501 +next 0 34 3.401197 0.000000 517 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +scientist 0 31 3.496508 0.000000 560 +profil 1 30 3.555348 3.555348 581 +computersci 0 30 3.555348 0.000000 562 +chair 1 29 3.583519 3.583519 596 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +debug 0 17 4.110874 0.000000 944 +brown 1 16 4.174387 4.174387 977 +researchmi 0 14 4.317488 0.000000 1119 +incomput 0 14 4.317488 0.000000 1096 +vice 0 9 4.753590 0.000000 1604 +unifi 0 8 4.875197 0.000000 1774 +newton 0 7 5.010635 0.000000 1824 +softwareengin 0 6 5.164786 0.000000 2162 +jain 1 5 5.347108 5.347108 2332 +parallelprogram 0 5 5.347108 0.000000 2379 +werth 2 4 5.568345 11.136690 3004 +andimplement 0 4 5.568345 0.000000 3029 +hyder 0 4 5.568345 0.000000 2772 +interestparallel 0 3 5.857933 0.000000 3806 +publicationss 0 2 6.263398 0.000000 5732 +thirteenth 0 2 6.263398 0.000000 5733 +werthsenior 0 1 6.957497 0.000000 14294 +emori 0 1 6.957497 0.000000 14295 +accredit 0 1 6.957497 0.000000 14296 +sobek 0 1 6.957497 0.000000 14297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..f79b6e82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +distribut 1 162 1.791759 1.791759 51 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +person 0 111 2.197225 0.000000 117 +member 0 84 2.484907 0.000000 165 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +colleg 0 61 2.833213 0.000000 300 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +index 0 56 2.890372 0.000000 309 +algebra 0 45 3.135494 0.000000 394 +multi 0 36 3.367296 0.000000 493 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +strategi 0 25 3.737670 0.000000 682 +researchmi 0 14 4.317488 0.000000 1119 +fromindividu 0 12 4.465908 0.000000 1290 +oxford 0 6 5.164786 0.000000 2121 +regent 0 5 5.347108 0.000000 2551 +emeritu 0 5 5.347108 0.000000 2544 +england 0 5 5.347108 0.000000 2557 +alfr 1 4 5.568345 5.568345 2882 +dale 0 4 5.568345 0.000000 2687 +crow 0 3 5.857933 0.000000 3845 +trammel 0 2 6.263398 0.000000 5562 +andmap 0 2 6.263398 0.000000 4258 +daleno 0 1 6.957497 0.000000 14298 +exet 0 1 6.957497 0.000000 14299 +interestdatabas 0 1 6.957497 0.000000 14300 +stagei 0 1 6.957497 0.000000 14301 +studiedinclud 0 1 6.957497 0.000000 14302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..8a1bd800 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +distribut 1 162 1.791759 1.791759 51 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +texa 0 160 1.791759 0.000000 64 +model 1 145 1.945910 1.945910 69 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +databas 1 122 2.079442 2.079442 86 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +mathemat 1 108 2.197225 2.197225 123 +teach 0 108 2.197225 0.000000 112 +intern 0 108 2.197225 0.000000 128 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +solut 0 82 2.484907 0.000000 162 +environ 0 84 2.484907 0.000000 177 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +orient 0 80 2.564949 0.000000 205 +decemb 0 80 2.564949 0.000000 215 +symposium 1 72 2.639057 2.639057 238 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +visual 0 48 3.044522 0.000000 372 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +respons 0 37 3.332205 0.000000 476 +award 1 34 3.401197 3.401197 523 +concurr 0 34 3.401197 0.000000 501 +obtain 0 33 3.433987 0.000000 534 +exist 0 30 3.555348 0.000000 569 +profil 0 30 3.555348 0.000000 581 +propos 0 28 3.610918 0.000000 602 +repres 0 26 3.688879 0.000000 656 +honor 0 23 3.806662 0.000000 729 +excel 0 19 4.007333 0.000000 868 +miller 0 17 4.110874 0.000000 949 +role 0 14 4.317488 0.000000 1101 +fromindividu 0 12 4.465908 0.000000 1290 +queue 1 10 4.653960 4.653960 1386 +purdu 0 10 4.653960 0.000000 1466 +length 0 10 4.653960 0.000000 1400 +jeffrei 1 9 4.753590 4.753590 1612 +modula 0 9 4.753590 0.000000 1613 +researchi 0 8 4.875197 0.000000 1756 +inproceed 0 8 4.875197 0.000000 1670 +distributedsystem 0 6 5.164786 0.000000 2022 +ofparallel 0 5 5.347108 0.000000 2380 +bulletin 0 5 5.347108 0.000000 2343 +sigcs 1 4 5.568345 5.568345 2865 +throughput 0 4 5.568345 0.000000 2993 +richter 0 4 5.568345 0.000000 2957 +chou 0 4 5.568345 0.000000 3033 +georgia 0 3 5.857933 0.000000 3834 +publicationsj 0 3 5.857933 0.000000 3808 +shen 0 3 5.857933 0.000000 3370 +performanceof 0 2 6.263398 0.000000 4585 +modelingof 0 2 6.263398 0.000000 5734 +loui 0 2 6.263398 0.000000 5220 +brumfield 1 1 6.957497 6.957497 14303 +brumfieldsenior 0 1 6.957497 0.000000 14304 +interestperform 0 1 6.957497 0.000000 14305 +designersof 0 1 6.957497 0.000000 14306 +eachresourc 0 1 6.957497 0.000000 14307 +tasksawait 0 1 6.957497 0.000000 14308 +computationof 0 1 6.957497 0.000000 14309 +graf 0 1 6.957497 0.000000 14310 +verdi 0 1 6.957497 0.000000 14311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..6ac97c43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +softwar 1 220 1.386294 1.386294 30 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +area 0 144 1.945910 0.000000 80 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +tool 0 117 2.079442 0.000000 93 +schedul 0 119 2.079442 0.000000 85 +mathemat 2 108 2.197225 4.394450 123 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +journal 1 83 2.484907 2.484907 183 +solut 0 82 2.484907 0.000000 162 +member 0 84 2.484907 0.000000 165 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +appli 1 71 2.639057 2.639057 226 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +special 0 56 2.890372 0.000000 320 +explor 0 58 2.890372 0.000000 324 +major 0 56 2.890372 0.000000 315 +scientif 1 53 2.944439 2.944439 341 +three 0 54 2.944439 0.000000 330 +profession 1 51 2.995732 2.995732 345 +particular 0 51 2.995732 0.000000 352 +numer 1 49 3.044522 3.044522 369 +editor 1 41 3.218876 3.218876 433 +transact 1 39 3.258097 3.258097 438 +statist 0 35 3.401197 0.000000 521 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +transform 0 32 3.465736 0.000000 542 +profil 0 30 3.555348 0.000000 581 +packag 0 28 3.610918 0.000000 614 +symbol 0 27 3.637586 0.000000 620 +methodolog 0 23 3.806662 0.000000 733 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +rout 0 21 3.912023 0.000000 793 +expert 0 20 3.951244 0.000000 833 +region 0 19 4.007333 0.000000 875 +north 0 19 4.007333 0.000000 873 +dimension 0 18 4.060443 0.000000 909 +estim 1 17 4.110874 4.110874 930 +matrix 0 17 4.110874 0.000000 933 +condit 1 16 4.174387 4.174387 975 +alan 1 13 4.382027 4.382027 1146 +emploi 0 12 4.465908 0.000000 1284 +fromindividu 0 12 4.465908 0.000000 1290 +michigan 0 11 4.553877 0.000000 1368 +node 0 11 4.553877 0.000000 1326 +editori 0 9 4.753590 0.000000 1611 +hundr 0 9 4.753590 0.000000 1528 +researchi 0 8 4.875197 0.000000 1756 +curv 0 8 4.875197 0.000000 1656 +coast 0 8 4.875197 0.000000 1746 +presenc 0 8 4.875197 0.000000 1671 +counter 0 8 4.875197 0.000000 1765 +centenni 0 7 5.010635 0.000000 1967 +constrain 1 6 5.164786 5.164786 2042 +southern 0 6 5.164786 0.000000 2191 +spline 0 6 5.164786 0.000000 2007 +fit 1 5 5.347108 5.347108 2285 +holland 0 5 5.347108 0.000000 2490 +triangul 1 4 5.568345 5.568345 2903 +closest 0 4 5.568345 0.000000 2828 +cline 1 3 5.857933 5.857933 3218 +interestmathemat 0 3 5.857933 0.000000 3860 +scatter 0 3 5.857933 0.000000 3351 +delaunai 0 3 5.857933 0.000000 3619 +imac 0 3 5.857933 0.000000 3718 +wilkinson 0 3 5.857933 0.000000 3579 +subprogram 0 2 6.263398 0.000000 5618 +andsurfac 0 2 6.263398 0.000000 5735 +publicationsr 0 2 6.263398 0.000000 5736 +king 0 2 6.263398 0.000000 5737 +meyer 0 2 6.263398 0.000000 4728 +guard 0 2 6.263398 0.000000 5738 +tender 0 2 6.263398 0.000000 5397 +stewart 0 2 6.263398 0.000000 5739 +renka 1 1 6.957497 6.957497 14312 +clinedavid 0 1 6.957497 0.000000 14313 +bruton 0 1 6.957497 0.000000 14314 +statisticalcomput 0 1 6.957497 0.000000 14315 +socialrespons 0 1 6.957497 0.000000 14316 +whichcan 0 1 6.957497 0.000000 14317 +constructionof 0 1 6.957497 0.000000 14318 +formathemat 0 1 6.957497 0.000000 14319 +developmentha 0 1 6.957497 0.000000 14320 +tension 0 1 6.957497 0.000000 14321 +buoi 0 1 6.957497 0.000000 14322 +barrier 0 1 6.957497 0.000000 14323 +moler 0 1 6.957497 0.000000 14324 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..cdc9f144 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +list 0 201 1.609438 0.000000 39 +algorithm 0 162 1.791759 0.000000 57 +area 0 144 1.945910 0.000000 80 +mathemat 1 108 2.197225 2.197225 123 +memori 0 101 2.302585 0.000000 139 +techniqu 0 99 2.302585 0.000000 138 +member 1 84 2.484907 2.484907 165 +good 0 77 2.564949 0.000000 200 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +reason 0 57 2.890372 0.000000 318 +particular 0 51 2.995732 0.000000 352 +physic 1 47 3.091042 3.091042 377 +theoret 0 39 3.258097 0.000000 446 +correct 0 38 3.295837 0.000000 462 +award 1 34 3.401197 3.401197 523 +obtain 0 33 3.433987 0.000000 534 +power 0 30 3.555348 0.000000 573 +profil 0 30 3.555348 0.000000 581 +art 1 29 3.583519 3.583519 593 +chair 0 29 3.583519 0.000000 596 +focus 0 29 3.583519 0.000000 584 +american 0 27 3.637586 0.000000 634 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +methodolog 0 23 3.806662 0.000000 733 +increas 0 20 3.951244 0.000000 829 +fromindividu 0 12 4.465908 0.000000 1290 +academi 1 8 4.875197 4.875197 1735 +centenni 0 7 5.010635 0.000000 1967 +foreign 0 7 5.010635 0.000000 1919 +ture 0 6 5.164786 0.000000 1997 +british 0 5 5.347108 0.000000 2546 +harri 0 4 5.568345 0.000000 3034 +queen 0 4 5.568345 0.000000 2919 +dijkstra 0 3 5.857933 0.000000 3173 +netherland 0 3 5.857933 0.000000 3650 +streamlin 0 3 5.857933 0.000000 3573 +edsger 1 2 6.263398 6.263398 5740 +honorari 0 2 6.263398 0.000000 5741 +sciencesmemb 0 2 6.263398 0.000000 5742 +royal 0 2 6.263398 0.000000 4756 +wybe 1 1 6.957497 6.957497 14325 +dijkstraschlumberg 0 1 6.957497 0.000000 14326 +sciencesprofessor 0 1 6.957497 0.000000 14327 +mathematicskandidaatsexamen 0 1 6.957497 0.000000 14328 +doctora 0 1 6.957497 0.000000 14329 +examen 0 1 6.957497 0.000000 14330 +leydenph 0 1 6.957497 0.000000 14331 +amsterdamhonor 0 1 6.957497 0.000000 14332 +awardsacm 0 1 6.957497 0.000000 14333 +sciencesdistinguish 0 1 6.957497 0.000000 14334 +societyafip 0 1 6.957497 0.000000 14335 +honori 0 1 6.957497 0.000000 14336 +causa 0 1 6.957497 0.000000 14337 +belfastarea 0 1 6.957497 0.000000 14338 +systemssummari 0 1 6.957497 0.000000 14339 +argumentso 0 1 6.957497 0.000000 14340 +ofform 0 1 6.957497 0.000000 14341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..fd228e8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +network 1 168 1.791759 1.791759 61 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +databas 0 122 2.079442 0.000000 86 +high 0 130 2.079442 0.000000 101 +mathemat 0 108 2.197225 0.000000 123 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +theori 0 111 2.197225 0.000000 127 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +educ 0 86 2.484907 0.000000 191 +larg 0 82 2.484907 0.000000 168 +internet 0 83 2.484907 0.000000 186 +member 0 84 2.484907 0.000000 165 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +profession 0 51 2.995732 0.000000 345 +protocol 1 45 3.135494 3.135494 407 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +mobil 1 23 3.806662 3.806662 730 +speed 0 18 4.060443 0.000000 911 +researchmi 0 14 4.317488 0.000000 1119 +fromindividu 0 12 4.465908 0.000000 1290 +chri 1 11 4.553877 4.553877 1311 +secretari 0 8 4.875197 0.000000 1775 +inproceed 0 8 4.875197 0.000000 1670 +sigcomm 0 5 5.347108 0.000000 2329 +gouda 0 4 5.568345 0.000000 3021 +treasur 0 3 5.857933 0.000000 3229 +andm 0 3 5.857933 0.000000 3901 +edmondson 1 2 6.263398 6.263398 4182 +yurkanan 1 2 6.263398 6.263398 4175 +interestcomput 0 2 6.263398 0.000000 5743 +yurkananlectur 0 1 6.957497 0.000000 14342 +internetwork 0 1 6.957497 0.000000 14343 +cobb 0 1 6.957497 0.000000 14344 +informaticsconfer 0 1 6.957497 0.000000 14345 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..2f288dfc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +list 0 201 1.609438 0.000000 39 +area 1 144 1.945910 1.945910 80 +process 0 142 1.945910 0.000000 72 +confer 1 126 2.079442 2.079442 100 +techniqu 0 99 2.302585 0.000000 138 +educ 1 86 2.484907 2.484907 191 +academ 0 82 2.484907 0.000000 178 +librari 0 87 2.484907 0.000000 181 +school 0 84 2.484907 0.000000 188 +member 0 84 2.484907 0.000000 165 +servic 1 72 2.639057 2.639057 236 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +improv 0 62 2.772589 0.000000 289 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +local 0 55 2.944439 0.000000 334 +profession 0 51 2.995732 0.000000 345 +committe 1 34 3.401197 3.401197 522 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +retriev 0 27 3.637586 0.000000 621 +women 0 16 4.174387 0.000000 1004 +researchmi 0 14 4.317488 0.000000 1119 +coordin 0 13 4.382027 0.000000 1182 +minor 0 12 4.465908 0.000000 1237 +fromindividu 0 12 4.465908 0.000000 1290 +secondari 1 7 5.010635 5.010635 1884 +recruit 0 6 5.164786 0.000000 2145 +sigcs 0 4 5.568345 0.000000 2865 +louisiana 0 3 5.857933 0.000000 3902 +suzi 1 2 6.263398 6.263398 4288 +gallagh 0 2 6.263398 0.000000 4293 +southwestern 0 2 6.263398 0.000000 5744 +interestcomput 0 2 6.263398 0.000000 5743 +gallagherlectur 0 1 6.957497 0.000000 14346 +loyola 0 1 6.957497 0.000000 14347 +necc 0 1 6.957497 0.000000 14348 +andretent 0 1 6.957497 0.000000 14349 +scienceeduc 0 1 6.957497 0.000000 14350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..a8ddf6ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +design 0 213 1.386294 0.000000 25 +list 0 201 1.609438 0.000000 39 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +develop 0 174 1.791759 0.000000 53 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +confer 1 126 2.079442 2.079442 100 +machin 0 129 2.079442 0.000000 95 +intern 1 108 2.197225 2.197225 128 +techniqu 0 99 2.302585 0.000000 138 +proceed 1 93 2.397895 2.397895 152 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +ieee 1 86 2.484907 2.484907 190 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +integr 0 67 2.708050 0.000000 245 +guid 0 63 2.772589 0.000000 267 +januari 0 62 2.772589 0.000000 264 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +special 0 56 2.890372 0.000000 320 +processor 1 54 2.944439 2.944439 335 +press 0 42 3.218876 0.000000 419 +transact 1 39 3.258097 3.258097 438 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +toler 1 33 3.433987 3.433987 533 +obtain 0 33 3.433987 0.000000 534 +fault 1 32 3.465736 3.465736 547 +profil 0 30 3.555348 0.000000 581 +focus 0 29 3.583519 0.000000 584 +scale 1 28 3.610918 3.610918 613 +measur 1 28 3.610918 3.610918 609 +multiprocessor 0 28 3.610918 0.000000 605 +repres 0 26 3.688879 0.000000 656 +strategi 0 25 3.737670 0.000000 682 +reliabl 0 25 3.737670 0.000000 674 +methodolog 1 23 3.806662 3.806662 733 +lead 0 23 3.806662 0.000000 718 +properti 0 22 3.850148 0.000000 749 +scheme 0 20 3.951244 0.000000 818 +qualiti 0 20 3.951244 0.000000 832 +failur 0 18 4.060443 0.000000 898 +appropri 0 18 4.060443 0.000000 883 +interconnect 1 17 4.110874 4.110874 937 +across 0 16 4.174387 0.000000 974 +precis 0 15 4.248495 0.000000 1023 +senior 0 14 4.317488 0.000000 1120 +researchmi 0 14 4.317488 0.000000 1119 +johnson 0 13 4.382027 0.000000 1162 +optic 1 12 4.465908 4.465908 1221 +fromindividu 0 12 4.465908 0.000000 1290 +abil 0 11 4.553877 0.000000 1341 +impact 0 11 4.553877 0.000000 1334 +success 0 10 4.653960 0.000000 1390 +devis 0 10 4.653960 0.000000 1451 +true 0 10 4.653960 0.000000 1422 +contrast 0 8 4.875197 0.000000 1637 +multicomput 0 7 5.010635 0.000000 1890 +predic 0 7 5.010635 0.000000 1806 +nest 1 6 5.164786 5.164786 2151 +chemistri 1 5 5.347108 5.347108 2405 +orlean 0 5 5.347108 0.000000 2550 +buss 0 4 5.568345 0.000000 2649 +louisiana 0 3 5.857933 0.000000 3902 +thedevelop 0 3 5.857933 0.000000 3903 +wave 0 3 5.857933 0.000000 3518 +campbel 0 3 5.857933 0.000000 3272 +laser 0 2 6.263398 0.000000 4747 +beinginvestig 0 2 6.263398 0.000000 5745 +parallelsystem 0 2 6.263398 0.000000 5746 +publicationsr 0 2 6.263398 0.000000 5736 +jenevein 2 1 6.957497 13.914994 14351 +wafer 1 1 6.957497 6.957497 14352 +menez 1 1 6.957497 6.957497 14353 +malek 1 1 6.957497 6.957497 14354 +interestinterconnect 0 1 6.957497 0.000000 14355 +interconnectionnetwork 0 1 6.957497 0.000000 14356 +restsin 0 1 6.957497 0.000000 14357 +interconnectionstructur 0 1 6.957497 0.000000 14358 +kindof 0 1 6.957497 0.000000 14359 +beingappli 0 1 6.957497 0.000000 14360 +communicationswitch 0 1 6.957497 0.000000 14361 +iscontinu 0 1 6.957497 0.000000 14362 +performanceport 0 1 6.957497 0.000000 14363 +tobenchmark 0 1 6.957497 0.000000 14364 +memorysystem 0 1 6.957497 0.000000 14365 +kyklo 0 1 6.957497 0.000000 14366 +laranjeira 0 1 6.957497 0.000000 14367 +ullah 0 1 6.957497 0.000000 14368 +metrix 0 1 6.957497 0.000000 14369 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..f12893c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +applic 0 170 1.791759 0.000000 56 +architectur 1 139 1.945910 1.945910 77 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +structur 0 106 2.197225 0.000000 105 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +center 0 88 2.397895 0.000000 158 +activ 0 84 2.484907 0.000000 182 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +abstract 0 62 2.772589 0.000000 276 +function 0 62 2.772589 0.000000 275 +back 0 60 2.833213 0.000000 297 +space 1 57 2.890372 2.890372 310 +faculti 1 56 2.890372 2.890372 325 +california 0 46 3.091042 0.000000 388 +especi 0 36 3.367296 0.000000 496 +obtain 0 33 3.433987 0.000000 534 +profil 0 30 3.555348 0.000000 581 +valu 0 25 3.737670 0.000000 665 +martin 0 21 3.912023 0.000000 794 +concentr 0 18 4.060443 0.000000 906 +track 0 15 4.248495 0.000000 1029 +researchmi 0 14 4.317488 0.000000 1119 +classic 0 14 4.317488 0.000000 1084 +philosophi 1 13 4.382027 4.382027 1167 +deduct 0 12 4.465908 0.000000 1236 +scan 0 12 4.465908 0.000000 1243 +fromindividu 0 12 4.465908 0.000000 1290 +closur 1 8 4.875197 4.875197 1643 +angel 0 8 4.875197 0.000000 1779 +notion 0 7 5.010635 0.000000 1947 +vehicl 0 7 5.010635 0.000000 1928 +delai 0 7 5.010635 0.000000 1848 +chicago 0 6 5.164786 0.000000 2149 +emeritu 1 5 5.347108 5.347108 2544 +interestmathemat 0 3 5.857933 0.000000 3860 +andon 0 3 5.857933 0.000000 3115 +metatheori 0 3 5.857933 0.000000 3642 +norman 1 1 6.957497 6.957497 14370 +martinprofessor 0 1 6.957497 0.000000 14371 +ofphilosophi 0 1 6.957497 0.000000 14372 +asinterpret 0 1 6.957497 0.000000 14373 +whichexploit 0 1 6.957497 0.000000 14374 +intension 0 1 6.957497 0.000000 14375 +significantearli 0 1 6.957497 0.000000 14376 +missil 0 1 6.957497 0.000000 14377 +trackingalgorithm 0 1 6.957497 0.000000 14378 +radar 0 1 6.957497 0.000000 14379 +inmani 0 1 6.957497 0.000000 14380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..87094e81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +time 3 293 1.098612 3.295836 17 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +develop 1 174 1.791759 1.791759 53 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +analysi 0 124 2.079442 0.000000 98 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +technic 1 100 2.302585 2.302585 140 +techniqu 0 99 2.302585 0.000000 138 +real 2 93 2.397895 4.795790 144 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +academ 0 82 2.484907 0.000000 178 +member 0 84 2.484907 0.000000 165 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +symposium 1 72 2.639057 2.639057 238 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +addit 0 74 2.639057 0.000000 228 +goal 0 66 2.708050 0.000000 250 +foundat 0 62 2.772589 0.000000 286 +automat 0 61 2.833213 0.000000 306 +juli 0 60 2.833213 0.000000 305 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +publish 0 57 2.890372 0.000000 326 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +understand 0 47 3.091042 0.000000 384 +adapt 0 46 3.091042 0.000000 387 +mechan 0 43 3.178054 0.000000 416 +autom 1 41 3.218876 3.218876 434 +editor 0 41 3.218876 0.000000 433 +electr 0 38 3.295837 0.000000 461 +formal 1 37 3.332205 3.332205 478 +respons 1 37 3.332205 3.332205 476 +robot 0 36 3.367296 0.000000 497 +committe 1 34 3.401197 3.401197 522 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +obtain 0 33 3.433987 0.000000 534 +hard 0 30 3.555348 0.000000 563 +profil 0 30 3.555348 0.000000 581 +chair 1 29 3.583519 3.583519 596 +framework 0 28 3.610918 0.000000 606 +load 0 28 3.610918 0.000000 601 +constraint 0 26 3.688879 0.000000 636 +bound 0 26 3.688879 0.000000 659 +fundament 0 25 3.737670 0.000000 661 +primari 0 25 3.737670 0.000000 669 +concern 0 25 3.737670 0.000000 666 +toward 0 25 3.737670 0.000000 668 +fellow 0 24 3.761200 0.000000 701 +highli 0 23 3.806662 0.000000 725 +wang 1 21 3.912023 3.912023 790 +fund 0 21 3.912023 0.000000 805 +synthesi 0 20 3.951244 0.000000 834 +expert 0 20 3.951244 0.000000 833 +aid 1 18 4.060443 4.060443 904 +critic 0 16 4.174387 0.000000 982 +taiwan 0 16 4.174387 0.000000 1006 +brown 0 16 4.174387 0.000000 977 +massachusett 0 14 4.317488 0.000000 1118 +conduct 0 14 4.317488 0.000000 1065 +nasa 0 13 4.382027 0.000000 1188 +robust 1 12 4.465908 4.465908 1271 +asynchron 0 12 4.465908 0.000000 1229 +fromindividu 0 12 4.465908 0.000000 1290 +editori 0 9 4.753590 0.000000 1611 +vice 0 9 4.753590 0.000000 1604 +researchi 0 8 4.875197 0.000000 1756 +fifth 0 7 5.010635 0.000000 1931 +montreal 0 7 5.010635 0.000000 1961 +kluwer 0 6 5.164786 0.000000 2143 +antonio 0 6 5.164786 0.000000 2186 +ofdistribut 0 5 5.347108 0.000000 2316 +emerson 0 5 5.347108 0.000000 2547 +adjust 0 5 5.347108 0.000000 2422 +orlean 0 5 5.347108 0.000000 2550 +presentarea 0 4 5.568345 0.000000 3026 +avion 0 4 5.568345 0.000000 3018 +melbourn 0 4 5.568345 0.000000 3035 +sigsoft 0 4 5.568345 0.000000 3036 +aloysiu 1 3 5.857933 5.857933 3829 +systemdesign 0 2 6.263398 0.000000 4297 +stringent 0 2 6.263398 0.000000 5523 +areasinclud 0 2 6.263398 0.000000 5747 +publicationsa 0 2 6.263398 0.000000 4885 +clement 0 2 6.263398 0.000000 5526 +tsou 0 2 6.263398 0.000000 5525 +mokassoci 0 1 6.957497 0.000000 14381 +professorfaculti 0 1 6.957497 0.000000 14382 +federationof 0 1 6.957497 0.000000 14383 +interestfault 0 1 6.957497 0.000000 14384 +includespecif 0 1 6.957497 0.000000 14385 +forguarante 0 1 6.957497 0.000000 14386 +thetrad 0 1 6.957497 0.000000 14387 +criticalsystem 0 1 6.957497 0.000000 14388 +theanalysi 0 1 6.957497 0.000000 14389 +industrialprocess 0 1 6.957497 0.000000 14390 +ofnav 0 1 6.957497 0.000000 14391 +forreal 0 1 6.957497 0.000000 14392 +tilborg 0 1 6.957497 0.000000 14393 +heitmey 0 1 6.957497 0.000000 14394 +labaw 0 1 6.957497 0.000000 14395 +aptl 0 1 6.957497 0.000000 14396 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..64d1794f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +list 0 201 1.609438 0.000000 39 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +process 1 142 1.945910 1.945910 72 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +teach 0 108 2.197225 0.000000 112 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +appli 0 71 2.639057 0.000000 226 +addit 0 74 2.639057 0.000000 228 +function 1 62 2.772589 2.772589 275 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +reason 0 57 2.890372 0.000000 318 +undergradu 0 54 2.944439 0.000000 338 +maintain 0 51 2.995732 0.000000 342 +physic 0 47 3.091042 0.000000 377 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +editor 0 41 3.218876 0.000000 433 +formal 0 37 3.332205 0.000000 478 +concurr 1 34 3.401197 3.401197 501 +obtain 0 33 3.433987 0.000000 534 +richard 1 31 3.496508 3.496508 559 +profil 0 30 3.555348 0.000000 581 +seri 0 24 3.761200 0.000000 708 +longer 0 20 3.951244 0.000000 816 +stand 0 18 4.060443 0.000000 891 +stanford 0 17 4.110874 0.000000 955 +weslei 0 16 4.174387 0.000000 983 +permit 0 16 4.174387 0.000000 962 +senior 0 14 4.317488 0.000000 1120 +addison 0 12 4.465908 0.000000 1230 +fromindividu 0 12 4.465908 0.000000 1290 +suitabl 0 9 4.753590 0.000000 1486 +harvard 0 7 5.010635 0.000000 1926 +iowa 0 7 5.010635 0.000000 1971 +implementationof 0 7 5.010635 0.000000 1813 +microcomput 0 3 5.857933 0.000000 3444 +hamilton 1 2 6.263398 6.263398 5719 +collegem 0 2 6.263398 0.000000 5563 +astronaut 0 2 6.263398 0.000000 5748 +universityph 0 2 6.263398 0.000000 5604 +lecturerb 0 1 6.957497 0.000000 14397 +aero 0 1 6.957497 0.000000 14398 +universityprofession 0 1 6.957497 0.000000 14399 +servicecoordin 0 1 6.957497 0.000000 14400 +vol 0 1 6.957497 0.000000 14401 +educationsummari 0 1 6.957497 0.000000 14402 +potentialfor 0 1 6.957497 0.000000 14403 +infal 0 1 6.957497 0.000000 14404 +sectionof 0 1 6.957497 0.000000 14405 +onfunct 0 1 6.957497 0.000000 14406 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..fe8d2c2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +gener 0 220 1.386294 0.000000 27 +paper 1 205 1.609438 1.609438 38 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +model 0 145 1.945910 0.000000 69 +databas 2 122 2.079442 4.158884 86 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +journal 0 83 2.484907 0.000000 183 +member 0 84 2.484907 0.000000 165 +server 0 76 2.564949 0.000000 204 +june 0 79 2.564949 0.000000 214 +nation 0 74 2.639057 0.000000 240 +intellig 0 72 2.639057 0.000000 225 +workshop 0 71 2.639057 0.000000 239 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +knowledg 1 67 2.708050 2.708050 243 +main 1 67 2.708050 2.708050 256 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +septemb 1 65 2.772589 2.772589 274 +foundat 0 62 2.772589 0.000000 286 +organ 0 65 2.772589 0.000000 265 +back 0 60 2.833213 0.000000 297 +faculti 1 56 2.890372 2.890372 325 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +profession 0 51 2.995732 0.000000 345 +principl 0 48 3.044522 0.000000 357 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +societi 0 40 3.258097 0.000000 456 +map 0 39 3.258097 0.000000 452 +movi 0 40 3.258097 0.000000 459 +cost 0 37 3.332205 0.000000 480 +robot 0 36 3.367296 0.000000 497 +multi 0 36 3.367296 0.000000 493 +award 1 34 3.401197 3.401197 523 +committe 0 34 3.401197 0.000000 522 +concurr 0 34 3.401197 0.000000 501 +obtain 0 33 3.433987 0.000000 534 +storag 1 31 3.496508 3.496508 553 +profil 0 30 3.555348 0.000000 581 +chair 1 29 3.583519 3.583519 596 +framework 0 28 3.610918 0.000000 606 +retriev 0 27 3.637586 0.000000 621 +divis 0 21 3.912023 0.000000 803 +media 1 19 4.007333 4.007333 861 +sigmod 0 19 4.007333 0.000000 877 +concentr 0 18 4.060443 0.000000 906 +demand 0 14 4.317488 0.000000 1073 +resolut 0 13 4.382027 0.000000 1172 +fromindividu 0 12 4.465908 0.000000 1290 +invit 0 10 4.653960 0.000000 1428 +conferenceon 0 9 4.753590 0.000000 1595 +databasesystem 0 8 4.875197 0.000000 1617 +silberschatz 1 6 5.164786 5.164786 1978 +outstand 0 6 5.164786 0.000000 2136 +advisori 0 6 5.164786 0.000000 2148 +sigact 0 6 5.164786 0.000000 2212 +symposiumon 0 6 5.164786 0.000000 2054 +distributedsystem 0 6 5.164786 0.000000 2022 +internationalconfer 0 6 5.164786 0.000000 2051 +seventh 0 5 5.347108 0.000000 2464 +multiresolut 0 5 5.347108 0.000000 2423 +fussel 0 5 5.347108 0.000000 2300 +abraham 1 4 5.568345 5.568345 2644 +ullman 0 4 5.568345 0.000000 2749 +stoni 0 3 5.857933 0.000000 3571 +sudarshan 0 3 5.857933 0.000000 3885 +ozden 1 2 6.263398 6.263398 5749 +eighth 0 2 6.263398 0.000000 5750 +publicationss 0 2 6.263398 0.000000 5732 +knowledgeand 0 2 6.263398 0.000000 4366 +onveri 0 2 6.263398 0.000000 4367 +rastogi 1 1 6.957497 6.957497 14407 +pod 1 1 6.957497 6.957497 14408 +silberschatzprofessorship 0 1 6.957497 0.000000 14409 +sciencesm 0 1 6.957497 0.000000 14410 +brookhonor 0 1 6.957497 0.000000 14411 +serviceiee 0 1 6.957497 0.000000 14412 +futureof 0 1 6.957497 0.000000 14413 +basedsystemssummari 0 1 6.957497 0.000000 14414 +recentresearch 0 1 6.957497 0.000000 14415 +multidatabas 0 1 6.957497 0.000000 14416 +transactionmanag 0 1 6.957497 0.000000 14417 +ganguli 0 1 6.957497 0.000000 14418 +tsur 0 1 6.957497 0.000000 14419 +datalog 0 1 6.957497 0.000000 14420 +programexecut 0 1 6.957497 0.000000 14421 +jagadish 0 1 6.957497 0.000000 14422 +lieuwen 0 1 6.957497 0.000000 14423 +dali 0 1 6.957497 0.000000 14424 +biliri 0 1 6.957497 0.000000 14425 +storageserv 0 1 6.957497 0.000000 14426 +storageand 0 1 6.957497 0.000000 14427 +relationaldata 0 1 6.957497 0.000000 14428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..926f1c96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +list 0 201 1.609438 0.000000 39 +professor 0 137 1.945910 0.000000 76 +novemb 0 81 2.484907 0.000000 179 +back 0 60 2.833213 0.000000 297 +faculti 0 56 2.890372 0.000000 325 +robert 1 30 3.555348 3.555348 567 +profil 0 30 3.555348 0.000000 581 +rememb 0 12 4.465908 0.000000 1217 +centenni 0 7 5.010635 0.000000 1967 +emeritu 0 5 5.347108 0.000000 2544 +bledso 0 4 5.568345 0.000000 2999 +simmon 1 2 6.263398 6.263398 5460 +simmonsquinci 0 1 6.957497 0.000000 14429 +professoremeritu 0 1 6.957497 0.000000 14430 +psychologymai 0 1 6.957497 0.000000 14431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..335d37a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +email 1 220 1.386294 1.386294 29 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +austin 0 168 1.791759 0.000000 63 +hour 0 165 1.791759 0.000000 46 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +read 0 154 1.791759 0.000000 47 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +postscript 0 131 2.079442 0.000000 90 +number 0 130 2.079442 0.000000 97 +check 0 115 2.197225 0.000000 118 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +new 0 64 2.772589 0.000000 262 +type 0 61 2.833213 0.000000 296 +thesi 0 57 2.890372 0.000000 327 +think 0 57 2.890372 0.000000 314 +undergradu 0 54 2.944439 0.000000 338 +could 0 46 3.091042 0.000000 383 +semant 0 29 3.583519 0.000000 587 +progress 0 28 3.610918 0.000000 598 +rule 0 26 3.688879 0.000000 638 +altern 0 26 3.688879 0.000000 641 +yahoo 0 24 3.761200 0.000000 707 +log 0 19 4.007333 0.000000 857 +adam 1 17 4.110874 4.110874 934 +happi 0 14 4.317488 0.000000 1079 +touch 0 12 4.465908 0.000000 1288 +claim 0 8 4.875197 0.000000 1664 +core 0 7 5.010635 0.000000 1809 +gzip 1 6 5.164786 5.164786 2117 +gradual 0 4 5.568345 0.000000 2997 +vrml 0 4 5.568345 0.000000 2949 +aweekli 0 3 5.857933 0.000000 3312 +fame 0 3 5.857933 0.000000 3793 +knowwhat 0 2 6.263398 0.000000 5456 +junki 0 2 6.263398 0.000000 5457 +nando 0 2 6.263398 0.000000 5458 +seligman 1 1 6.957497 6.957497 12572 +pageadam 0 1 6.957497 0.000000 12573 +specifiedth 0 1 6.957497 0.000000 12574 +fileor 0 1 6.957497 0.000000 12575 +pagemart 0 1 6.957497 0.000000 12576 +fromreut 0 1 6.957497 0.000000 12577 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..239b7f8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +machin 0 129 2.079442 0.000000 95 +specif 0 106 2.197225 0.000000 106 +learn 0 86 2.484907 0.000000 170 +colleg 0 61 2.833213 0.000000 300 +california 0 46 3.091042 0.000000 388 +eduphon 0 15 4.248495 0.000000 1060 +hopefulli 0 14 4.317488 0.000000 1071 +station 0 13 4.382027 0.000000 1157 +acquisit 0 10 4.653960 0.000000 1465 +santa 0 10 4.653960 0.000000 1441 +commonsens 0 4 5.568345 0.000000 2998 +barbara 0 3 5.857933 0.000000 3380 +chill 0 2 6.263398 0.000000 4244 +agapito 1 1 6.957497 6.957497 12578 +sustaita 1 1 6.957497 6.957497 12579 +austincognit 0 1 6.957497 0.000000 12580 +connection 0 1 6.957497 0.000000 12581 +reasoningschoolingph 0 1 6.957497 0.000000 12582 +miscellaneouspost 0 1 6.957497 0.000000 12583 +addressth 0 1 6.957497 0.000000 12584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..cd098525 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +base 1 165 1.791759 1.791759 50 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +report 1 131 2.079442 2.079442 92 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +world 0 115 2.197225 0.000000 126 +technic 1 100 2.302585 2.302585 140 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +intellig 1 72 2.639057 2.639057 225 +nation 0 74 2.639057 0.000000 240 +logic 0 71 2.639057 0.000000 230 +solv 0 73 2.639057 0.000000 234 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +laboratori 0 63 2.772589 0.000000 292 +new 0 64 2.772589 0.000000 262 +foundat 0 62 2.772589 0.000000 286 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +faculti 1 56 2.890372 2.890372 325 +reason 1 57 2.890372 2.890372 318 +physic 0 47 3.091042 0.000000 377 +directori 0 45 3.135494 0.000000 396 +autom 1 41 3.218876 3.218876 434 +close 0 38 3.295837 0.000000 465 +multi 0 36 3.367296 0.000000 493 +robert 1 30 3.555348 3.555348 567 +neural 0 30 3.555348 0.000000 578 +rule 0 26 3.688879 0.000000 638 +lab 0 24 3.761200 0.000000 698 +theorem 1 21 3.912023 3.912023 786 +fund 0 21 3.912023 0.000000 805 +prove 1 19 4.007333 4.007333 848 +histori 0 19 4.007333 0.000000 853 +excel 0 19 4.007333 0.000000 868 +atth 0 15 4.248495 0.000000 1019 +action 0 15 4.248495 0.000000 1038 +rank 0 14 4.317488 0.000000 1063 +bruce 0 12 4.465908 0.000000 1226 +distinguish 0 11 4.553877 0.000000 1357 +benjamin 0 11 4.553877 0.000000 1296 +qualit 0 11 4.553877 0.000000 1362 +vladimir 0 11 4.553877 0.000000 1324 +peter 0 11 4.553877 0.000000 1316 +novak 1 9 4.753590 4.753590 1521 +moonei 1 9 4.753590 4.753590 1520 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +aaai 0 8 4.875197 0.000000 1750 +presidenti 0 8 4.875197 0.000000 1737 +postdoc 0 8 4.875197 0.000000 1724 +philosoph 0 7 5.010635 0.000000 1904 +boyer 0 6 5.164786 0.000000 2013 +gordon 0 6 5.164786 0.000000 2032 +dream 0 6 5.164786 0.000000 2165 +lifschitz 0 5 5.347108 0.000000 2542 +mirank 0 5 5.347108 0.000000 2543 +porter 0 5 5.347108 0.000000 2293 +emeritu 0 5 5.347108 0.000000 2544 +bledso 0 4 5.568345 0.000000 2999 +clark 0 4 5.568345 0.000000 2705 +kuiper 1 3 5.857933 5.857933 3794 +souther 0 3 5.857933 0.000000 3795 +woodi 0 2 6.263398 0.000000 5459 +simmon 0 2 6.263398 0.000000 5460 +laboratoryut 0 1 6.957497 0.000000 12585 +laboratoryth 0 1 6.957497 0.000000 12586 +austinha 0 1 6.957497 0.000000 12587 +andgradu 0 1 6.957497 0.000000 12588 +causei 0 1 6.957497 0.000000 12589 +deceas 0 1 6.957497 0.000000 12590 +memoriam 0 1 6.957497 0.000000 12591 +porterpoint 0 1 6.957497 0.000000 12592 +agenciescontact 0 1 6.957497 0.000000 12593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..bcd0d3e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +want 0 79 2.564949 0.000000 199 +main 0 67 2.708050 0.000000 256 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +advisor 0 51 2.995732 0.000000 355 +john 0 33 3.433987 0.000000 532 +postal 0 30 3.555348 0.000000 580 +constraint 0 26 3.688879 0.000000 636 +translat 0 13 4.382027 0.000000 1164 +usavoic 0 13 4.382027 0.000000 1198 +hello 0 10 4.653960 0.000000 1407 +candid 0 9 4.753590 0.000000 1606 +routin 0 9 4.753590 0.000000 1549 +ajita 0 2 6.263398 0.000000 5461 +papersmi 0 2 6.263398 0.000000 5462 +johnajita 0 1 6.957497 0.000000 12594 +programmingframework 0 1 6.957497 0.000000 12595 +parallelprocedur 0 1 6.957497 0.000000 12596 +brownemi 0 1 6.957497 0.000000 12597 +ajohn 0 1 6.957497 0.000000 12598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..14351086 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +technolog 1 131 2.079442 2.079442 102 +confer 0 126 2.079442 0.000000 100 +mathemat 1 108 2.197225 2.197225 123 +teach 1 108 2.197225 2.197225 112 +topic 0 114 2.197225 0.000000 110 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +pictur 0 89 2.397895 0.000000 160 +associ 0 93 2.397895 0.000000 151 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +institut 0 84 2.484907 0.000000 187 +method 1 80 2.564949 2.564949 213 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +integr 0 67 2.708050 0.000000 245 +main 0 67 2.708050 0.000000 256 +evalu 0 64 2.772589 0.000000 266 +organ 0 65 2.772589 0.000000 265 +special 1 56 2.890372 2.890372 320 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +direct 0 57 2.890372 0.000000 316 +maintain 0 51 2.995732 0.000000 342 +profession 0 51 2.995732 0.000000 345 +understand 1 47 3.091042 3.091042 384 +electron 0 47 3.091042 0.000000 379 +electr 0 38 3.295837 0.000000 461 +formal 0 37 3.332205 0.000000 478 +field 0 37 3.332205 0.000000 482 +connect 0 37 3.332205 0.000000 485 +scientist 0 31 3.496508 0.000000 560 +computersci 0 30 3.555348 0.000000 562 +travel 0 30 3.555348 0.000000 579 +limit 0 29 3.583519 0.000000 585 +utc 0 27 3.637586 0.000000 629 +other 0 24 3.761200 0.000000 697 +alwai 0 24 3.761200 0.000000 691 +leav 0 21 3.912023 0.000000 772 +particularli 0 19 4.007333 0.000000 867 +excel 0 19 4.007333 0.000000 868 +encourag 0 18 4.060443 0.000000 880 +social 0 13 4.382027 0.000000 1123 +jump 0 9 4.753590 0.000000 1603 +sweden 0 7 5.010635 0.000000 1885 +interestedin 0 5 5.347108 0.000000 2260 +craft 0 5 5.347108 0.000000 2412 +camp 0 5 5.347108 0.000000 2545 +suffer 0 5 5.347108 0.000000 2268 +novic 0 4 5.568345 0.000000 2815 +mentor 0 4 5.568345 0.000000 2591 +sigcs 0 4 5.568345 0.000000 2865 +vicki 1 3 5.857933 5.857933 3187 +mathematicallog 0 3 5.857933 0.000000 3796 +belong 0 3 5.857933 0.000000 3797 +almstrum 1 2 6.263398 6.263398 4165 +woodwork 0 2 6.263398 0.000000 5463 +spurt 0 2 6.263398 0.000000 5464 +plenti 0 2 6.263398 0.000000 5465 +uppsala 1 1 6.957497 6.957497 12599 +almstrumabout 0 1 6.957497 0.000000 12600 +doctoralresearch 0 1 6.957497 0.000000 12601 +ispent 0 1 6.957497 0.000000 12602 +pagether 0 1 6.957497 0.000000 12603 +garden 0 1 6.957497 0.000000 12604 +sew 0 1 6.957497 0.000000 12605 +hubbi 0 1 6.957497 0.000000 12606 +torgni 0 1 6.957497 0.000000 12607 +stadler 0 1 6.957497 0.000000 12608 +itics 0 1 6.957497 0.000000 12609 +educationjun 0 1 6.957497 0.000000 12610 +swedenoth 0 1 6.957497 0.000000 12611 +frenzi 0 1 6.957497 0.000000 12612 +educationsigsoft 0 1 6.957497 0.000000 12613 +engineeringacm 0 1 6.957497 0.000000 12614 +machineryieeeth 0 1 6.957497 0.000000 12615 +engineerscpsrcomput 0 1 6.957497 0.000000 12616 +responsibilityconnect 0 1 6.957497 0.000000 12617 +elsewhereto 0 1 6.957497 0.000000 12618 +seldom 0 1 6.957497 0.000000 12619 +forewarn 0 1 6.957497 0.000000 12620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..eaded61b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +hour 0 165 1.791759 0.000000 46 +compil 0 122 2.079442 0.000000 96 +mondai 0 77 2.564949 0.000000 206 +wednesdai 0 64 2.772589 0.000000 261 +anthoni 1 4 5.568345 5.568345 2792 +pang 1 3 5.857933 5.857933 3509 +hung 0 3 5.857933 0.000000 3524 +hing 1 2 6.263398 6.263398 5442 +pagehung 0 1 6.957497 0.000000 12621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..e698b1fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +fall 1 181 1.609438 1.609438 40 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +studi 0 120 2.079442 0.000000 91 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +taylor 0 63 2.772589 0.000000 287 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +sciencesunivers 0 37 3.332205 0.000000 486 +bachelor 0 17 4.110874 0.000000 957 +aruna 1 1 6.957497 6.957497 12622 +addalacurr 0 1 6.957497 0.000000 12623 +studentth 0 1 6.957497 0.000000 12624 +sciencess 0 1 6.957497 0.000000 12625 +engineeringmysorework 0 1 6.957497 0.000000 12626 +mysoreindiai 0 1 6.957497 0.000000 12627 +mysor 0 1 6.957497 0.000000 12628 +cityindiato 0 1 6.957497 0.000000 12629 +eduvoic 0 1 6.957497 0.000000 12630 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..f37041e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +pleas 0 113 2.197225 0.000000 114 +main 0 67 2.708050 0.000000 256 +get 0 46 3.091042 0.000000 380 +exist 0 30 3.555348 0.000000 569 +known 0 24 3.761200 0.000000 702 +usavoic 0 13 4.382027 0.000000 1198 +mepost 0 10 4.653960 0.000000 1472 +round 0 8 4.875197 0.000000 1769 +let 0 3 5.857933 0.000000 3790 +patienc 0 2 6.263398 0.000000 5466 +ashi 1 1 6.957497 6.957497 12631 +tarafdarashi 0 1 6.957497 0.000000 12632 +tarafdarabout 0 1 6.957497 0.000000 12633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..da8d32e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +mail 1 238 1.386294 1.386294 22 +also 0 259 1.386294 0.000000 28 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +activ 0 84 2.484907 0.000000 182 +complet 0 77 2.564949 0.000000 208 +line 0 75 2.639057 0.000000 231 +solv 0 73 2.639057 0.000000 234 +prof 0 64 2.772589 0.000000 273 +dept 0 64 2.772589 0.000000 291 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +date 0 51 2.995732 0.000000 344 +advisor 0 51 2.995732 0.000000 355 +electr 0 38 3.295837 0.000000 461 +expect 0 37 3.332205 0.000000 484 +within 0 33 3.433987 0.000000 525 +queri 0 33 3.433987 0.000000 524 +hard 0 30 3.555348 0.000000 563 +campu 0 27 3.637586 0.000000 623 +mine 0 26 3.688879 0.000000 654 +constraint 0 26 3.688879 0.000000 636 +expert 0 20 3.951244 0.000000 833 +toolkit 0 20 3.951244 0.000000 835 +histori 0 19 4.007333 0.000000 853 +along 0 18 4.060443 0.000000 878 +coordin 0 13 4.382027 0.000000 1182 +daniel 0 12 4.465908 0.000000 1233 +instanc 0 11 4.553877 0.000000 1322 +candid 0 9 4.753590 0.000000 1606 +mirank 0 5 5.347108 0.000000 2543 +bayardo 1 2 6.263398 6.263398 5467 +roberto 0 2 6.263398 0.000000 5468 +exception 0 2 6.263398 0.000000 4467 +pageroberto 0 1 6.957497 0.000000 12634 +infosleuth 0 1 6.957497 0.000000 12635 +satisfactionmi 0 1 6.957497 0.000000 12636 +generatingand 0 1 6.957497 0.000000 12637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..6451ad8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +model 1 145 1.945910 1.945910 69 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +abstract 0 62 2.772589 0.000000 276 +taylor 0 63 2.772589 0.000000 287 +reason 0 57 2.890372 0.000000 318 +physic 0 47 3.091042 0.000000 377 +past 0 42 3.218876 0.000000 428 +ofth 0 36 3.367296 0.000000 491 +dissert 0 32 3.465736 0.000000 549 +retriev 0 27 3.637586 0.000000 621 +behavior 0 18 4.060443 0.000000 881 +month 1 15 4.248495 4.248495 1025 +refin 1 11 4.553877 4.553877 1363 +entitl 0 9 4.753590 0.000000 1490 +drink 0 9 4.753590 0.000000 1607 +informationemail 0 9 4.753590 0.000000 1564 +overviewof 0 2 6.263398 0.000000 5469 +bert 1 1 6.957497 6.957497 12638 +imprecis 1 1 6.957497 6.957497 12639 +kayresearch 0 1 6.957497 0.000000 12640 +vitami 0 1 6.957497 0.000000 12641 +stuffsonia 0 1 6.957497 0.000000 12642 +andnina 0 1 6.957497 0.000000 12643 +springbank 0 1 6.957497 0.000000 12644 +scotchdrinksof 0 1 6.957497 0.000000 12645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..9717e5eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +project 0 340 1.098612 0.000000 18 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +introduct 0 126 2.079442 0.000000 87 +databas 0 122 2.079442 0.000000 86 +mathemat 0 108 2.197225 0.000000 123 +manag 0 114 2.197225 0.000000 125 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +logic 0 71 2.639057 0.000000 230 +multimedia 0 68 2.708050 0.000000 258 +visit 0 63 2.772589 0.000000 288 +semest 0 58 2.890372 0.000000 312 +visitor 0 49 3.044522 0.000000 371 +third 0 43 3.178054 0.000000 412 +term 0 43 3.178054 0.000000 411 +vladimir 0 11 4.553877 0.000000 1324 +harrick 0 7 5.010635 0.000000 1849 +lifschitz 0 5 5.347108 0.000000 2542 +coursesc 0 4 5.568345 0.000000 2692 +vinc 0 2 6.263398 0.000000 5414 +bhanu 1 1 6.957497 6.957497 12646 +homepagethi 0 1 6.957497 0.000000 12647 +akhil 0 1 6.957497 0.000000 12648 +reddythank 0 1 6.957497 0.000000 12649 +austinm 0 1 6.957497 0.000000 12650 +datacommun 0 1 6.957497 0.000000 12651 +anitish 0 1 6.957497 0.000000 12652 +barua 0 1 6.957497 0.000000 12653 +schwetmani 0 1 6.957497 0.000000 12654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..d71dbb80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,229 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +updat 1 191 1.609438 1.609438 41 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +list 0 201 1.609438 0.000000 39 +austin 1 168 1.791759 1.791759 63 +read 0 154 1.791759 0.000000 47 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +number 1 130 2.079442 2.079442 97 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +final 0 116 2.197225 0.000000 108 +theori 0 111 2.197225 0.000000 127 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +peopl 2 96 2.302585 4.605170 132 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +real 0 93 2.397895 0.000000 144 +level 0 87 2.484907 0.000000 180 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +complet 0 77 2.564949 0.000000 208 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +would 1 67 2.708050 2.708050 251 +differ 0 66 2.708050 0.000000 253 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +written 0 63 2.772589 0.000000 278 +colleg 0 61 2.833213 0.000000 300 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +telephon 1 50 3.044522 3.044522 373 +physic 0 47 3.091042 0.000000 377 +quarter 0 47 3.091042 0.000000 389 +made 0 44 3.135494 0.000000 398 +answer 0 45 3.135494 0.000000 391 +anoth 0 45 3.135494 0.000000 408 +even 0 45 3.135494 0.000000 393 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +probabl 1 40 3.258097 3.258097 455 +error 0 40 3.258097 0.000000 449 +correct 0 38 3.295837 0.000000 462 +feel 1 37 3.332205 3.332205 483 +expect 0 37 3.332205 0.000000 484 +connect 0 37 3.332205 0.000000 485 +soon 0 36 3.367296 0.000000 494 +michael 0 35 3.401197 0.000000 514 +return 0 34 3.401197 0.000000 502 +either 0 35 3.401197 0.000000 506 +articl 0 33 3.433987 0.000000 530 +human 1 32 3.465736 3.465736 546 +taken 0 31 3.496508 0.000000 555 +hard 0 30 3.555348 0.000000 563 +postal 0 30 3.555348 0.000000 580 +depend 0 29 3.583519 0.000000 583 +built 0 29 3.583519 0.000000 592 +quot 0 29 3.583519 0.000000 582 +intend 0 28 3.610918 0.000000 599 +ask 0 28 3.610918 0.000000 597 +symbol 0 27 3.637586 0.000000 620 +altern 0 26 3.688879 0.000000 641 +although 0 25 3.737670 0.000000 667 +accur 0 25 3.737670 0.000000 680 +sometim 1 24 3.761200 3.761200 696 +frame 0 24 3.761200 0.000000 684 +interpret 0 24 3.761200 0.000000 686 +decis 1 23 3.806662 3.806662 728 +lead 0 23 3.806662 0.000000 718 +almost 0 22 3.850148 0.000000 742 +util 0 21 3.912023 0.000000 774 +theorem 0 21 3.912023 0.000000 786 +prepar 0 20 3.951244 0.000000 824 +assum 0 19 4.007333 0.000000 845 +accept 0 18 4.060443 0.000000 879 +account 0 18 4.060443 0.000000 882 +estim 0 17 4.110874 0.000000 930 +differenti 0 17 4.110874 0.000000 921 +choic 0 16 4.174387 0.000000 979 +transfer 0 16 4.174387 0.000000 967 +cognit 0 16 4.174387 0.000000 986 +psycholog 0 15 4.248495 0.000000 1054 +mayb 0 15 4.248495 0.000000 1014 +save 1 14 4.317488 4.317488 1099 +manner 0 14 4.317488 0.000000 1074 +balanc 0 14 4.317488 0.000000 1112 +stori 0 14 4.317488 0.000000 1087 +econom 0 13 4.382027 0.000000 1184 +rememb 0 12 4.465908 0.000000 1217 +sound 1 9 4.753590 4.753590 1605 +unusu 0 9 4.753590 0.000000 1566 +exact 0 9 4.753590 0.000000 1509 +charg 0 9 4.753590 0.000000 1582 +risk 1 8 4.875197 4.875197 1689 +brain 1 8 4.875197 4.875197 1638 +formul 0 8 4.875197 0.000000 1733 +wire 0 8 4.875197 0.000000 1747 +simpli 0 8 4.875197 0.000000 1626 +prover 0 8 4.875197 0.000000 1653 +insert 0 8 4.875197 0.000000 1687 +filter 0 8 4.875197 0.000000 1641 +explain 1 7 5.010635 5.010635 1816 +maxim 0 7 5.010635 0.000000 1944 +prevent 0 7 5.010635 0.000000 1827 +gave 0 7 5.010635 0.000000 1922 +remind 0 7 5.010635 0.000000 1799 +wrong 1 6 5.164786 5.164786 2025 +consequ 0 6 5.164786 0.000000 1989 +biolog 0 6 5.164786 0.000000 2147 +mistak 0 6 5.164786 0.000000 2110 +postcard 0 6 5.164786 0.000000 2181 +promis 0 6 5.164786 0.000000 2037 +hidden 0 6 5.164786 0.000000 1987 +quantum 0 6 5.164786 0.000000 2214 +adopt 1 5 5.347108 5.347108 2467 +amherst 0 5 5.347108 0.000000 2484 +fair 0 5 5.347108 0.000000 2333 +respond 0 5 5.347108 0.000000 2354 +favor 0 5 5.347108 0.000000 2414 +ahead 0 5 5.347108 0.000000 2338 +puzzl 0 5 5.347108 0.000000 2507 +stupid 0 5 5.347108 0.000000 2489 +analog 1 4 5.568345 5.568345 2875 +kill 0 4 5.568345 0.000000 3000 +fire 0 4 5.568345 0.000000 3001 +hypothesi 0 4 5.568345 0.000000 2650 +suppos 0 4 5.568345 0.000000 3002 +neuron 1 3 5.857933 5.857933 3798 +coin 1 3 5.857933 5.857933 3799 +diseas 1 3 5.857933 5.857933 3635 +scream 0 3 5.857933 0.000000 3609 +wasn 0 3 5.857933 0.000000 3800 +incorrect 0 3 5.857933 0.000000 3134 +cogsci 1 2 6.263398 6.263398 4798 +toss 1 2 6.263398 6.263398 5470 +reject 1 2 6.263398 6.263398 5418 +bogu 1 2 6.263398 6.263398 5471 +advert 0 2 6.263398 0.000000 5201 +belov 0 2 6.263398 0.000000 5073 +imagin 0 2 6.263398 0.000000 5472 +combat 0 2 6.263398 0.000000 5473 +nobodi 0 2 6.263398 0.000000 5474 +voltag 0 2 6.263398 0.000000 5475 +invalid 0 2 6.263398 0.000000 5476 +append 0 2 6.263398 0.000000 4295 +informationthi 0 2 6.263398 0.000000 5477 +empti 0 2 6.263398 0.000000 5478 +bogon 1 1 6.957497 6.957497 12655 +avers 1 1 6.957497 6.957497 12656 +outcom 1 1 6.957497 6.957497 12657 +bogo 1 1 6.957497 6.957497 12658 +bogomolnymichael 0 1 6.957497 0.000000 12659 +bogomolni 0 1 6.957497 0.000000 12660 +interestsnot 0 1 6.957497 0.000000 12661 +jenef 0 1 6.957497 0.000000 12662 +husman 0 1 6.957497 0.000000 12663 +bet 0 1 6.957497 0.000000 12664 +diminish 0 1 6.957497 0.000000 12665 +tverski 0 1 6.957497 0.000000 12666 +kahneman 0 1 6.957497 0.000000 12667 +verbatimfrom 0 1 6.957497 0.000000 12668 +outbreak 0 1 6.957497 0.000000 12669 +beenpropos 0 1 6.957497 0.000000 12670 +programsar 0 1 6.957497 0.000000 12671 +besav 0 1 6.957497 0.000000 12672 +digitalif 0 1 6.957497 0.000000 12673 +electrochem 0 1 6.957497 0.000000 12674 +axon 0 1 6.957497 0.000000 12675 +shaki 0 1 6.957497 0.000000 12676 +inaccur 0 1 6.957497 0.000000 12677 +subtract 0 1 6.957497 0.000000 12678 +checkbook 0 1 6.957497 0.000000 12679 +nevertheless 0 1 6.957497 0.000000 12680 +misfir 0 1 6.957497 0.000000 12681 +italic 0 1 6.957497 0.000000 12682 +researchcognit 0 1 6.957497 0.000000 12683 +sciencearitifici 0 1 6.957497 0.000000 12684 +intelligencemathemat 0 1 6.957497 0.000000 12685 +logictopolog 0 1 6.957497 0.000000 12686 +ghrist 0 1 6.957497 0.000000 12687 +wilshir 0 1 6.957497 0.000000 12688 +parkwai 0 1 6.957497 0.000000 12689 +talentsdefinit 0 1 6.957497 0.000000 12690 +bogodynamicsdefinit 0 1 6.957497 0.000000 12691 +sortwhil 0 1 6.957497 0.000000 12692 +bogos 0 1 6.957497 0.000000 12693 +bogomet 0 1 6.957497 0.000000 12694 +flux 0 1 6.957497 0.000000 12695 +bogotifi 0 1 6.957497 0.000000 12696 +autobogotiphobia 0 1 6.957497 0.000000 12697 +blinkenlight 0 1 6.957497 0.000000 12698 +lasher 0 1 6.957497 0.000000 12699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..0f2c5468 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +note 0 142 1.945910 0.000000 67 +mathemat 1 108 2.197225 2.197225 123 +teach 0 108 2.197225 0.000000 112 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +educ 0 86 2.484907 0.000000 191 +build 0 85 2.484907 0.000000 184 +state 0 76 2.564949 0.000000 207 +method 0 80 2.564949 0.000000 213 +decemb 0 80 2.564949 0.000000 215 +logic 0 71 2.639057 0.000000 230 +view 0 70 2.708050 0.000000 254 +dept 0 64 2.772589 0.000000 291 +result 0 65 2.772589 0.000000 281 +polici 0 64 2.772589 0.000000 279 +share 0 59 2.833213 0.000000 304 +detail 0 57 2.890372 0.000000 321 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +undergradu 0 54 2.944439 0.000000 338 +found 0 53 2.944439 0.000000 337 +much 0 52 2.995732 0.000000 349 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +physic 0 47 3.091042 0.000000 377 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +natur 0 44 3.135494 0.000000 406 +press 0 42 3.218876 0.000000 419 +close 0 38 3.295837 0.000000 465 +formal 0 37 3.332205 0.000000 478 +mean 0 37 3.332205 0.000000 477 +short 0 36 3.367296 0.000000 499 +articl 1 33 3.433987 3.433987 530 +john 0 33 3.433987 0.000000 532 +taken 1 31 3.496508 3.496508 555 +photo 0 31 3.496508 0.000000 561 +posit 0 31 3.496508 0.000000 552 +richard 0 31 3.496508 0.000000 559 +scientist 0 31 3.496508 0.000000 560 +robert 1 30 3.555348 3.555348 567 +univ 0 28 3.610918 0.000000 617 +pass 0 28 3.610918 0.000000 611 +symbol 1 27 3.637586 3.637586 620 +repres 0 26 3.688879 0.000000 656 +bound 0 26 3.688879 0.000000 659 +fundament 1 25 3.737670 3.737670 661 +reach 0 24 3.761200 0.000000 688 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +recommend 0 22 3.850148 0.000000 737 +properti 0 22 3.850148 0.000000 749 +divis 0 21 3.912023 0.000000 803 +theorem 0 21 3.912023 0.000000 786 +fund 0 21 3.912023 0.000000 805 +verif 0 20 3.951244 0.000000 826 +wonder 0 20 3.951244 0.000000 815 +wrote 0 20 3.951244 0.000000 830 +offici 0 18 4.060443 0.000000 894 +moor 0 17 4.110874 0.000000 936 +everyth 0 13 4.382027 0.000000 1169 +introduc 0 13 4.382027 0.000000 1139 +shape 0 12 4.465908 0.000000 1245 +stephen 0 11 4.553877 0.000000 1342 +smart 0 11 4.553877 0.000000 1352 +noth 0 11 4.553877 0.000000 1328 +peter 0 11 4.553877 0.000000 1316 +instanc 0 11 4.553877 0.000000 1322 +rice 0 11 4.553877 0.000000 1336 +death 0 10 4.653960 0.000000 1457 +govern 1 9 4.753590 4.753590 1581 +float 0 9 4.753590 0.000000 1504 +end 0 9 4.753590 0.000000 1567 +said 0 9 4.753590 0.000000 1571 +ball 0 9 4.753590 0.000000 1608 +prover 0 8 4.875197 0.000000 1653 +joke 0 8 4.875197 0.000000 1620 +pageth 0 7 5.010635 0.000000 1939 +microprocessor 0 7 5.010635 0.000000 1808 +zero 0 7 5.010635 0.000000 1896 +intellectu 0 7 5.010635 0.000000 1847 +discoveri 0 7 5.010635 0.000000 1915 +boyer 1 6 5.164786 5.164786 2013 +furthermor 0 6 5.164786 0.000000 2141 +licens 1 5 5.347108 5.347108 2520 +own 0 5 5.347108 0.000000 2531 +despit 0 5 5.347108 0.000000 2317 +oftexa 1 4 5.568345 5.568345 3003 +vote 0 4 5.568345 0.000000 2953 +disclaim 0 4 5.568345 0.000000 2847 +fire 0 4 5.568345 0.000000 3001 +subsequ 0 4 5.568345 0.000000 2665 +usaemail 0 3 5.857933 0.000000 3722 +mccune 0 3 5.857933 0.000000 3522 +enumer 0 3 5.857933 0.000000 3244 +tenur 0 3 5.857933 0.000000 3801 +shouldb 0 3 5.857933 0.000000 3673 +deutsch 0 3 5.857933 0.000000 3802 +harold 0 3 5.857933 0.000000 3803 +carbon 0 3 5.857933 0.000000 3804 +loss 0 3 5.857933 0.000000 3805 +edufax 0 2 6.263398 0.000000 5479 +knowna 0 2 6.263398 0.000000 5480 +webth 0 2 6.263398 0.000000 5481 +projectmi 0 2 6.263398 0.000000 5482 +andsom 0 2 6.263398 0.000000 5483 +thegreat 0 2 6.263398 0.000000 4987 +thereof 0 2 6.263398 0.000000 5484 +steal 0 2 6.263398 0.000000 5485 +riski 0 2 6.263398 0.000000 4291 +peano 0 2 6.263398 0.000000 4234 +rebel 0 2 6.263398 0.000000 5388 +amor 0 2 6.263398 0.000000 5486 +congeni 0 2 6.263398 0.000000 4713 +rudi 0 2 6.263398 0.000000 5487 +verg 0 2 6.263398 0.000000 5488 +atom 0 2 6.263398 0.000000 4472 +lament 0 2 6.263398 0.000000 4866 +texan 0 2 6.263398 0.000000 5489 +boyerhom 0 1 6.957497 0.000000 12700 +philosophydepart 0 1 6.957497 0.000000 12701 +austinhow 0 1 6.957497 0.000000 12702 +mepap 0 1 6.957497 0.000000 12703 +locationsclassescurriculum 0 1 6.957497 0.000000 12704 +vitaeperson 0 1 6.957497 0.000000 12705 +dataeducationpublicationshonorsjobsgradu 0 1 6.957497 0.000000 12706 +studentsth 0 1 6.957497 0.000000 12707 +nqthm 0 1 6.957497 0.000000 12708 +mccarthi 0 1 6.957497 0.000000 12709 +moffett 0 1 6.957497 0.000000 12710 +controversyni 0 1 6.957497 0.000000 12711 +robbin 0 1 6.957497 0.000000 12712 +permitsth 0 1 6.957497 0.000000 12713 +administrativeoverhead 0 1 6.957497 0.000000 12714 +howthi 0 1 6.957497 0.000000 12715 +confess 0 1 6.957497 0.000000 12716 +acanon 0 1 6.957497 0.000000 12717 +thumper 0 1 6.957497 0.000000 12718 +universitiesstandard 0 1 6.957497 0.000000 12719 +aweb 0 1 6.957497 0.000000 12720 +anind 0 1 6.957497 0.000000 12721 +endors 0 1 6.957497 0.000000 12722 +habitu 0 1 6.957497 0.000000 12723 +hislectur 0 1 6.957497 0.000000 12724 +militaryacademi 0 1 6.957497 0.000000 12725 +incens 0 1 6.957497 0.000000 12726 +hisformalist 0 1 6.957497 0.000000 12727 +hispromis 0 1 6.957497 0.000000 12728 +turin 0 1 6.957497 0.000000 12729 +sincomplet 0 1 6.957497 0.000000 12730 +rucker 0 1 6.957497 0.000000 12731 +extinct 0 1 6.957497 0.000000 12732 +kroto 0 1 6.957497 0.000000 12733 +britain 0 1 6.957497 0.000000 12734 +sussex 0 1 6.957497 0.000000 12735 +chemistrypr 0 1 6.957497 0.000000 12736 +curl 0 1 6.957497 0.000000 12737 +smallei 0 1 6.957497 0.000000 12738 +inhouston 0 1 6.957497 0.000000 12739 +asocc 0 1 6.957497 0.000000 12740 +upup 0 1 6.957497 0.000000 12741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..87524b4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,157 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +engin 1 297 1.098612 1.098612 20 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +parallel 3 169 1.791759 5.375277 60 +texa 1 160 1.791759 1.791759 64 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +distribut 0 162 1.791759 0.000000 51 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +process 1 142 1.945910 1.945910 72 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +code 2 108 2.197225 4.394450 116 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +graphic 1 90 2.397895 2.397895 147 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +environ 2 84 2.484907 4.969814 177 +level 1 87 2.484907 2.484907 180 +ieee 1 86 2.484907 2.484907 190 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +refer 0 78 2.564949 0.000000 203 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +august 1 66 2.708050 2.708050 257 +integr 1 67 2.708050 2.708050 245 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +interact 0 62 2.772589 0.000000 270 +juli 0 60 2.833213 0.000000 305 +major 0 56 2.890372 0.000000 315 +three 0 54 2.944439 0.000000 330 +visual 1 48 3.044522 3.044522 372 +physic 1 47 3.091042 3.091042 377 +california 0 46 3.091042 0.000000 388 +describ 0 45 3.135494 0.000000 400 +societi 1 40 3.258097 3.258097 456 +electr 0 38 3.295837 0.000000 461 +prototyp 0 38 3.295837 0.000000 463 +formal 0 37 3.332205 0.000000 478 +ofth 0 36 3.367296 0.000000 491 +jame 1 35 3.401197 3.401197 507 +award 0 34 3.401197 0.000000 523 +concurr 0 34 3.401197 0.000000 501 +extend 0 32 3.465736 0.000000 539 +idea 0 32 3.465736 0.000000 545 +focu 0 30 3.555348 0.000000 571 +specifi 0 30 3.555348 0.000000 568 +graph 0 30 3.555348 0.000000 576 +chair 0 29 3.583519 0.000000 596 +univ 0 28 3.610918 0.000000 617 +american 0 27 3.637586 0.000000 634 +compar 1 26 3.688879 3.688879 648 +experiment 0 26 3.688879 0.000000 645 +rule 0 26 3.688879 0.000000 638 +proc 0 26 3.688879 0.000000 649 +supercomput 0 25 3.737670 0.000000 681 +fellow 1 24 3.761200 3.761200 701 +flow 0 24 3.761200 0.000000 700 +displai 0 23 3.806662 0.000000 712 +siam 0 21 3.912023 0.000000 800 +longer 0 20 3.951244 0.000000 816 +debug 1 17 4.110874 4.110874 944 +moor 1 17 4.110874 4.110874 936 +brown 2 16 4.174387 8.348774 977 +partit 0 16 4.174387 0.000000 984 +brief 0 16 4.174387 0.000000 1001 +conf 0 13 4.382027 0.000000 1181 +evolv 0 12 4.465908 0.000000 1223 +robust 0 12 4.465908 0.000000 1271 +volum 0 11 4.553877 0.000000 1347 +broad 0 11 4.553877 0.000000 1302 +declar 0 9 4.753590 0.000000 1526 +notat 0 9 4.753590 0.000000 1489 +researchi 0 8 4.875197 0.000000 1756 +unifi 0 8 4.875197 0.000000 1774 +newton 1 7 5.010635 5.010635 1824 +henc 1 7 5.010635 5.010635 1805 +ongo 0 6 5.164786 0.000000 2215 +british 0 5 5.347108 0.000000 2546 +jain 0 5 5.347108 0.000000 2332 +mirank 0 5 5.347108 0.000000 2543 +remain 0 5 5.347108 0.000000 2278 +hyder 1 4 5.568345 5.568345 2772 +werth 1 4 5.568345 5.568345 3004 +interestparallel 0 3 5.857933 0.000000 3806 +narrow 0 3 5.857933 0.000000 3807 +publicationsj 0 3 5.857933 0.000000 3808 +baltimor 0 3 5.857933 0.000000 3809 +dongarra 1 2 6.263398 6.263398 5058 +hendrix 0 2 6.263398 0.000000 5490 +anabstract 0 2 6.263398 0.000000 5491 +brownereg 0 1 6.957497 0.000000 12742 +collegeph 0 1 6.957497 0.000000 12743 +austinhonor 0 1 6.957497 0.000000 12744 +societyarea 0 1 6.957497 0.000000 12745 +sciencewith 0 1 6.957497 0.000000 12746 +tenyear 0 1 6.957497 0.000000 12747 +computation 0 1 6.957497 0.000000 12748 +includesmethod 0 1 6.957497 0.000000 12749 +highlevel 0 1 6.957497 0.000000 12750 +throughdata 0 1 6.957497 0.000000 12751 +compositionalapproach 0 1 6.957497 0.000000 12752 +intelligenceprocess 0 1 6.957497 0.000000 12753 +fluiddynam 0 1 6.957497 0.000000 12754 +domaincompil 0 1 6.957497 0.000000 12755 +basedlanguag 0 1 6.957497 0.000000 12756 +timedecis 0 1 6.957497 0.000000 12757 +andpract 0 1 6.957497 0.000000 12758 +fourthworkshop 0 1 6.957497 0.000000 12759 +santacruz 0 1 6.957497 0.000000 12760 +theeffect 0 1 6.957497 0.000000 12761 +parallelizingcompil 0 1 6.957497 0.000000 12762 +kleyn 0 1 6.957497 0.000000 12763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..1bf2cc82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +recent 0 167 1.791759 0.000000 58 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +peopl 0 96 2.302585 0.000000 132 +comment 1 93 2.397895 2.397895 146 +follow 0 92 2.397895 0.000000 143 +member 0 84 2.484907 0.000000 165 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +institut 0 84 2.484907 0.000000 187 +issu 0 78 2.564949 0.000000 211 +logic 0 71 2.639057 0.000000 230 +prof 0 64 2.772589 0.000000 273 +abstract 0 62 2.772589 0.000000 276 +special 0 56 2.890372 0.000000 320 +found 0 53 2.944439 0.000000 337 +electron 0 47 3.091042 0.000000 379 +autom 0 41 3.218876 0.000000 434 +electr 0 38 3.295837 0.000000 461 +rang 0 30 3.555348 0.000000 565 +utc 0 27 3.637586 0.000000 629 +trace 0 25 3.737670 0.000000 677 +vlsi 1 21 3.912023 3.912023 795 +martin 0 21 3.912023 0.000000 794 +chen 0 21 3.912023 0.000000 791 +rout 0 21 3.912023 0.000000 793 +supervis 0 20 3.951244 0.000000 840 +synthesi 0 20 3.951244 0.000000 834 +partit 0 16 4.174387 0.000000 984 +fpga 1 10 4.653960 4.653960 1433 +placement 0 10 4.653960 0.000000 1420 +wong 0 9 4.753590 0.000000 1609 +classifi 0 9 4.753590 0.000000 1537 +chung 0 7 5.010635 0.000000 1964 +zhou 0 6 5.164786 0.000000 2092 +ping 1 4 5.568345 5.568345 2922 +ming 0 3 5.857933 0.000000 3712 +researchth 0 2 6.263398 0.000000 5492 +broadli 0 2 6.263398 0.000000 5095 +sigda 0 2 6.263398 0.000000 5493 +thakur 1 1 6.957497 6.957497 12764 +addressdepart 0 1 6.957497 0.000000 12765 +chenyao 0 1 6.957497 0.000000 12766 +yung 0 1 6.957497 0.000000 12767 +fang 0 1 6.957497 0.000000 12768 +shashidhar 0 1 6.957497 0.000000 12769 +groupcan 0 1 6.957497 0.000000 12770 +austinclick 0 1 6.957497 0.000000 12771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..8b35bfbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +avail 0 169 1.791759 0.000000 48 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +professor 0 137 1.945910 0.000000 76 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +resum 0 79 2.564949 0.000000 217 +april 0 77 2.564949 0.000000 196 +prof 0 64 2.772589 0.000000 273 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +hardwar 0 51 2.995732 0.000000 350 +give 0 50 3.044522 0.000000 359 +cool 0 49 3.044522 0.000000 374 +done 0 47 3.091042 0.000000 381 +slide 0 38 3.295837 0.000000 467 +photo 0 31 3.496508 0.000000 561 +travel 0 30 3.555348 0.000000 579 +built 0 29 3.583519 0.000000 592 +effort 0 26 3.688879 0.000000 652 +highli 0 23 3.806662 0.000000 725 +thank 0 23 3.806662 0.000000 721 +divis 0 21 3.912023 0.000000 803 +verif 0 20 3.951244 0.000000 826 +women 0 16 4.174387 0.000000 1004 +todd 0 15 4.248495 0.000000 1051 +wife 0 13 4.382027 0.000000 1196 +land 0 12 4.465908 0.000000 1273 +speak 0 12 4.465908 0.000000 1283 +bill 0 11 4.553877 0.000000 1297 +peter 0 11 4.553877 0.000000 1316 +label 0 10 4.653960 0.000000 1423 +poetri 0 9 4.753590 0.000000 1596 +andth 0 9 4.753590 0.000000 1481 +mach 0 8 4.875197 0.000000 1669 +daughter 0 7 5.010635 0.000000 1943 +ruth 0 7 5.010635 0.000000 1870 +prioriti 0 7 5.010635 0.000000 1792 +foreign 0 7 5.010635 0.000000 1919 +beer 1 6 5.164786 5.164786 2216 +somewher 0 6 5.164786 0.000000 2176 +approv 0 6 5.164786 0.000000 2078 +humor 1 5 5.347108 5.347108 2533 +allen 0 5 5.347108 0.000000 2470 +emerson 0 5 5.347108 0.000000 2547 +li 0 5 5.347108 0.000000 2500 +substitut 0 5 5.347108 0.000000 2247 +ti 0 4 5.568345 0.000000 3005 +clair 0 4 5.568345 0.000000 2605 +enjoy 0 4 5.568345 0.000000 2937 +guangtian 0 3 5.857933 0.000000 3810 +haiku 0 3 5.857933 0.000000 3811 +cristian 0 2 6.263398 0.000000 4311 +sourcesth 0 2 6.263398 0.000000 4219 +disinform 0 2 6.263398 0.000000 5494 +dole 0 2 6.263398 0.000000 4067 +canfieldhom 0 1 6.957497 0.000000 12772 +businessmi 0 1 6.957497 0.000000 12773 +flaviu 0 1 6.957497 0.000000 12774 +ther 0 1 6.957497 0.000000 12775 +pleasuredomest 0 1 6.957497 0.000000 12776 +bliss 0 1 6.957497 0.000000 12777 +carla 0 1 6.957497 0.000000 12778 +newborn 0 1 6.957497 0.000000 12779 +parenthood 0 1 6.957497 0.000000 12780 +struck 0 1 6.957497 0.000000 12781 +peel 0 1 6.957497 0.000000 12782 +bottl 0 1 6.957497 0.000000 12783 +sofaspher 0 1 6.957497 0.000000 12784 +olestra 0 1 6.957497 0.000000 12785 +canfield 0 1 6.957497 0.000000 12786 +peterst 0 1 6.957497 0.000000 12787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..684c294a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +professor 1 137 1.945910 1.945910 76 +hall 0 146 1.945910 0.000000 65 +introduct 0 126 2.079442 0.000000 87 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +order 0 69 2.708050 0.000000 249 +function 0 62 2.772589 0.000000 275 +taylor 0 63 2.772589 0.000000 287 +suggest 0 53 2.944439 0.000000 331 +autom 0 41 3.218876 0.000000 434 +extend 0 32 3.465736 0.000000 539 +express 0 32 3.465736 0.000000 540 +computersci 0 30 3.555348 0.000000 562 +usual 0 28 3.610918 0.000000 608 +progress 0 28 3.610918 0.000000 598 +theorem 0 21 3.912023 0.000000 786 +supervis 0 20 3.951244 0.000000 840 +safeti 0 20 3.951244 0.000000 817 +prove 0 19 4.007333 0.000000 848 +partial 0 18 4.060443 0.000000 900 +finit 0 14 4.317488 0.000000 1106 +candid 0 9 4.753590 0.000000 1606 +misra 1 7 5.010635 5.010635 1856 +jayadev 0 4 5.568345 0.000000 3006 +uniti 1 3 5.857933 5.857933 3812 +alsointerest 0 3 5.857933 0.000000 3813 +carruth 1 2 6.263398 6.263398 5495 +mydissert 0 2 6.263398 0.000000 5496 +carruthpleas 0 1 6.957497 0.000000 12788 +boundson 0 1 6.957497 0.000000 12789 +ordersemant 0 1 6.957497 0.000000 12790 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..0fe6e6b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +hour 0 165 1.791759 0.000000 46 +problem 0 147 1.945910 0.000000 75 +schedul 1 119 2.079442 2.079442 85 +intern 0 108 2.197225 0.000000 128 +send 0 114 2.197225 0.000000 109 +grade 0 90 2.397895 0.000000 142 +exam 0 86 2.484907 0.000000 169 +homework 0 79 2.564949 0.000000 193 +syllabu 0 67 2.708050 0.000000 247 +polici 0 64 2.772589 0.000000 279 +new 0 64 2.772589 0.000000 262 +improv 0 62 2.772589 0.000000 289 +locat 0 59 2.833213 0.000000 303 +summer 0 56 2.890372 0.000000 311 +suggest 0 53 2.944439 0.000000 331 +idea 0 32 3.465736 0.000000 545 +chen 1 21 3.912023 3.912023 791 +exercis 0 19 4.007333 0.000000 842 +intel 0 16 4.174387 0.000000 1000 +meng 0 12 4.465908 0.000000 1214 +chung 1 7 5.010635 5.010635 1964 +ping 1 4 5.568345 5.568345 2922 +fiance 0 2 6.263398 0.000000 5497 +tsai 0 2 6.263398 0.000000 4831 +bufferinsert 0 1 6.957497 0.000000 12791 +syllabustopicschung 0 1 6.957497 0.000000 12792 +clen 0 1 6.957497 0.000000 12793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..2388f53d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +well 0 109 2.197225 0.000000 121 +homepag 0 93 2.397895 0.000000 148 +main 0 67 2.708050 0.000000 256 +anoth 1 45 3.135494 3.135494 408 +third 0 43 3.178054 0.000000 412 +chen 0 21 3.912023 0.000000 791 +break 0 20 3.951244 0.000000 812 +item 0 19 4.007333 0.000000 856 +easi 0 16 4.174387 0.000000 969 +lake 0 11 4.553877 0.000000 1373 +paragraph 1 10 4.653960 4.653960 1449 +mepost 0 10 4.653960 0.000000 1472 +usaphon 0 9 4.753590 0.000000 1600 +forget 0 8 4.875197 0.000000 1712 +shanghai 0 4 5.568345 0.000000 2925 +blvd 0 4 5.568345 0.000000 3007 +deji 1 2 6.263398 6.263398 5498 +chenabout 0 2 6.263398 0.000000 5499 +bullet 0 2 6.263398 0.000000 5500 +mehello 0 1 6.957497 0.000000 12794 +tongji 0 1 6.957497 0.000000 12795 +chinaa 0 1 6.957497 0.000000 12796 +usahom 0 1 6.957497 0.000000 12797 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..c00d99ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +softwar 0 220 1.386294 0.000000 30 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +peopl 0 96 2.302585 0.000000 132 +educ 1 86 2.484907 2.484907 191 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +school 0 84 2.484907 0.000000 188 +write 1 72 2.639057 2.639057 222 +simul 0 66 2.708050 0.000000 255 +artifici 0 63 2.772589 0.000000 280 +new 0 64 2.772589 0.000000 262 +plai 1 60 2.833213 2.833213 307 +visual 0 48 3.044522 0.000000 372 +life 0 50 3.044522 0.000000 375 +archiv 0 49 3.044522 0.000000 364 +programm 0 39 3.258097 0.000000 445 +movi 0 40 3.258097 0.000000 459 +microsoft 0 38 3.295837 0.000000 468 +mean 0 37 3.332205 0.000000 477 +robot 0 36 3.367296 0.000000 497 +represent 0 35 3.401197 0.000000 512 +common 0 30 3.555348 0.000000 574 +symbol 1 27 3.637586 3.637586 620 +spent 0 25 3.737670 0.000000 676 +client 0 25 3.737670 0.000000 679 +corpor 0 21 3.912023 0.000000 802 +martin 0 21 3.912023 0.000000 794 +watch 0 21 3.912023 0.000000 789 +voic 0 21 3.912023 0.000000 806 +portabl 0 20 3.951244 0.000000 819 +scheme 0 20 3.951244 0.000000 818 +left 0 19 4.007333 0.000000 851 +lisp 0 18 4.060443 0.000000 897 +listen 0 18 4.060443 0.000000 907 +macintosh 1 17 4.110874 4.110874 920 +anywai 0 15 4.248495 0.000000 1047 +emploi 0 12 4.465908 0.000000 1284 +tour 0 11 4.553877 0.000000 1307 +wood 0 11 4.553877 0.000000 1355 +road 0 11 4.553877 0.000000 1374 +bike 0 10 4.653960 0.000000 1468 +hang 0 9 4.753590 0.000000 1499 +brain 0 8 4.875197 0.000000 1638 +ride 0 8 4.875197 0.000000 1741 +sleep 0 6 5.164786 0.000000 2211 +dream 0 6 5.164786 0.000000 2165 +fiction 0 6 5.164786 0.000000 2217 +emerg 0 6 5.164786 0.000000 2038 +hair 0 5 5.347108 0.000000 2446 +worst 0 5 5.347108 0.000000 2287 +webster 0 5 5.347108 0.000000 2468 +phrase 0 5 5.347108 0.000000 2242 +austindepart 0 4 5.568345 0.000000 3008 +catch 0 4 5.568345 0.000000 2602 +sciencestaylor 0 3 5.857933 0.000000 3814 +republican 0 3 5.857933 0.000000 3815 +softwareth 0 3 5.857933 0.000000 3552 +stone 0 3 5.857933 0.000000 3674 +cliff 1 2 6.263398 6.263398 4285 +mstk 1 2 6.263398 6.263398 5501 +northwestern 0 2 6.263398 0.000000 5502 +captain 0 2 6.263398 0.000000 4983 +webth 0 2 6.263398 0.000000 5481 +weird 0 2 6.263398 0.000000 5503 +chaputcliff 0 1 6.957497 0.000000 12798 +chaputth 0 1 6.957497 0.000000 12799 +robotlab 0 1 6.957497 0.000000 12800 +dullchaput 0 1 6.957497 0.000000 12801 +gothimself 0 1 6.957497 0.000000 12802 +anemail 0 1 6.957497 0.000000 12803 +odesta 0 1 6.957497 0.000000 12804 +thelearn 0 1 6.957497 0.000000 12805 +hewrot 0 1 6.957497 0.000000 12806 +trane 0 1 6.957497 0.000000 12807 +thenimpl 0 1 6.957497 0.000000 12808 +studentscal 0 1 6.957497 0.000000 12809 +gamesproject 0 1 6.957497 0.000000 12810 +labannoi 0 1 6.957497 0.000000 12811 +farka 0 1 6.957497 0.000000 12812 +medeski 0 1 6.957497 0.000000 12813 +rerun 0 1 6.957497 0.000000 12814 +korg 0 1 6.957497 0.000000 12815 +turnon 0 1 6.957497 0.000000 12816 +breakfast 0 1 6.957497 0.000000 12817 +raspi 0 1 6.957497 0.000000 12818 +starfleet 0 1 6.957497 0.000000 12819 +turnoff 0 1 6.957497 0.000000 12820 +hangov 0 1 6.957497 0.000000 12821 +fave 0 1 6.957497 0.000000 12822 +eventsdaili 0 1 6.957497 0.000000 12823 +reutersintellicast 0 1 6.957497 0.000000 12824 +weatheraustin 0 1 6.957497 0.000000 12825 +txchicago 0 1 6.957497 0.000000 12826 +ilperiodicalssucksalonmirski 0 1 6.957497 0.000000 12827 +onionmacweekmacuserreferencehypertext 0 1 6.957497 0.000000 12828 +interfaceyahooalta 0 1 6.957497 0.000000 12829 +vistacardiff 0 1 6.957497 0.000000 12830 +databaselyco 0 1 6.957497 0.000000 12831 +mapalt 0 1 6.957497 0.000000 12832 +culturemacintosh 0 1 6.957497 0.000000 12833 +dataappl 0 1 6.957497 0.000000 12834 +computercyberdogquicktimequickdraw 0 1 6.957497 0.000000 12835 +dappl 0 1 6.957497 0.000000 12836 +supportmacintouchmacintosh 0 1 6.957497 0.000000 12837 +resourcecyberdog 0 1 6.957497 0.000000 12838 +poundinfo 0 1 6.957497 0.000000 12839 +rootcool 0 1 6.957497 0.000000 12840 +stufffringewareth 0 1 6.957497 0.000000 12841 +actlabpbsnprnow 0 1 6.957497 0.000000 12842 +catalogpap 0 1 6.957497 0.000000 12843 +rsumsymbol 0 1 6.957497 0.000000 12844 +groundingrobotmap 0 1 6.957497 0.000000 12845 +peopledav 0 1 6.957497 0.000000 12846 +falooncharl 0 1 6.957497 0.000000 12847 +lewisjeff 0 1 6.957497 0.000000 12848 +lindjeff 0 1 6.957497 0.000000 12849 +sherwoodbrian 0 1 6.957497 0.000000 12850 +slatorsandi 0 1 6.957497 0.000000 12851 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..02a66bbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +cours 0 273 1.098612 0.000000 15 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +read 0 154 1.791759 0.000000 47 +like 1 132 1.945910 1.945910 81 +welcom 0 122 2.079442 0.000000 99 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +search 1 95 2.397895 2.397895 155 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +chang 0 82 2.484907 0.000000 163 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +journal 0 83 2.484907 0.000000 183 +come 0 78 2.564949 0.000000 202 +orient 0 80 2.564949 0.000000 205 +know 0 80 2.564949 0.000000 198 +complet 0 77 2.564949 0.000000 208 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +view 1 70 2.708050 2.708050 254 +would 0 67 2.708050 0.000000 251 +detail 0 57 2.890372 0.000000 321 +unix 0 58 2.890372 0.000000 308 +without 0 50 3.044522 0.000000 370 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +origin 0 38 3.295837 0.000000 472 +china 1 37 3.332205 3.332205 487 +word 0 34 3.401197 0.000000 508 +return 0 34 3.401197 0.000000 502 +hard 0 30 3.555348 0.000000 563 +univ 0 28 3.610918 0.000000 617 +mind 0 27 3.637586 0.000000 632 +enjoi 0 26 3.688879 0.000000 660 +magazin 0 24 3.761200 0.000000 704 +wang 1 21 3.912023 3.912023 790 +among 0 21 3.912023 0.000000 781 +break 0 20 3.951244 0.000000 812 +fine 0 20 3.951244 0.000000 822 +thought 0 17 4.110874 0.000000 945 +countri 0 15 4.248495 0.000000 1059 +decid 0 14 4.317488 0.000000 1075 +came 0 13 4.382027 0.000000 1197 +tsinghua 0 13 4.382027 0.000000 1195 +captur 0 12 4.465908 0.000000 1232 +moment 0 11 4.553877 0.000000 1379 +surf 0 11 4.553877 0.000000 1301 +earth 0 10 4.653960 0.000000 1463 +end 0 9 4.753590 0.000000 1567 +jump 0 9 4.753590 0.000000 1603 +unusu 0 9 4.753590 0.000000 1566 +opinion 0 8 4.875197 0.000000 1708 +philosoph 0 7 5.010635 0.000000 1904 +televis 0 6 5.164786 0.000000 2118 +rock 0 6 5.164786 0.000000 2164 +million 0 5 5.347108 0.000000 2495 +provinc 0 4 5.568345 0.000000 3009 +gloriou 0 3 5.857933 0.000000 3816 +hometown 0 3 5.857933 0.000000 3817 +tower 0 3 5.857933 0.000000 3818 +fresh 0 3 5.857933 0.000000 3706 +nifti 0 2 6.263398 0.000000 5504 +numb 0 2 6.263398 0.000000 5505 +pope 0 2 6.263398 0.000000 5506 +chuanjun 1 1 6.957497 6.957497 12852 +diamond 1 1 6.957497 6.957497 12853 +stun 0 1 6.957497 0.000000 12854 +hubei 0 1 6.957497 0.000000 12855 +beautifulunivers 0 1 6.957497 0.000000 12856 +faceless 0 1 6.957497 0.000000 12857 +brilliant 0 1 6.957497 0.000000 12858 +miner 0 1 6.957497 0.000000 12859 +unemploi 0 1 6.957497 0.000000 12860 +dobb 0 1 6.957497 0.000000 12861 +prose 0 1 6.957497 0.000000 12862 +porsch 0 1 6.957497 0.000000 12863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..b9667cc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +updat 0 191 1.609438 0.000000 41 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 1 142 1.945910 1.945910 72 +model 0 145 1.945910 0.000000 69 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +assist 0 112 2.197225 0.000000 113 +person 0 111 2.197225 0.000000 117 +user 1 104 2.302585 2.302585 137 +book 0 99 2.302585 0.000000 131 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +interfac 0 79 2.564949 0.000000 209 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +degre 0 69 2.708050 0.000000 259 +knowledg 0 67 2.708050 0.000000 243 +dept 0 64 2.772589 0.000000 291 +improv 0 62 2.772589 0.000000 289 +result 0 65 2.772589 0.000000 281 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +allow 0 53 2.944439 0.000000 333 +tabl 0 51 2.995732 0.000000 346 +visitor 0 49 3.044522 0.000000 371 +natur 0 44 3.135494 0.000000 406 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +continu 0 39 3.258097 0.000000 448 +word 0 34 3.401197 0.000000 508 +human 0 32 3.465736 0.000000 546 +chines 0 29 3.583519 0.000000 595 +reach 0 24 3.761200 0.000000 688 +proof 0 23 3.806662 0.000000 720 +tenni 0 20 3.951244 0.000000 838 +ever 0 19 4.007333 0.000000 872 +segment 0 17 4.110874 0.000000 931 +taiwan 1 16 4.174387 4.174387 1006 +capabl 0 15 4.248495 0.000000 1016 +train 0 14 4.317488 0.000000 1066 +huang 0 12 4.465908 0.000000 1202 +basketbal 0 12 4.465908 0.000000 1289 +literatur 0 11 4.553877 0.000000 1300 +tag 1 7 5.010635 5.010635 1821 +academia 0 6 5.164786 0.000000 2036 +chin 0 5 5.347108 0.000000 2408 +categori 0 5 5.347108 0.000000 2261 +markov 0 5 5.347108 0.000000 2280 +accuraci 0 5 5.347108 0.000000 2450 +atlant 0 5 5.347108 0.000000 2508 +taipei 0 4 5.568345 0.000000 2926 +worki 0 4 5.568345 0.000000 3010 +basebal 0 4 5.568345 0.000000 2969 +edufing 0 4 5.568345 0.000000 2713 +sinica 0 3 5.857933 0.000000 3819 +expans 0 3 5.857933 0.000000 3755 +instituteof 0 2 6.263398 0.000000 5507 +pinbal 0 2 6.263398 0.000000 5508 +todayth 0 2 6.263398 0.000000 5416 +tser 0 1 6.957497 0.000000 12864 +systemsexperiencei 0 1 6.957497 0.000000 12865 +usinghidden 0 1 6.957497 0.000000 12866 +friendli 0 1 6.957497 0.000000 12867 +toexecut 0 1 6.957497 0.000000 12868 +automatictag 0 1 6.957497 0.000000 12869 +improvedbecaus 0 1 6.957497 0.000000 12870 +interestsmovi 0 1 6.957497 0.000000 12871 +semiolog 0 1 6.957497 0.000000 12872 +siteschina 0 1 6.957497 0.000000 12873 +timesminsheng 0 1 6.957497 0.000000 12874 +dailyth 0 1 6.957497 0.000000 12875 +timesusa 0 1 6.957497 0.000000 12876 +economistth 0 1 6.957497 0.000000 12877 +monthlymak 0 1 6.957497 0.000000 12878 +chuang 0 1 6.957497 0.000000 12879 +meyou 0 1 6.957497 0.000000 12880 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..09ae684e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +august 0 66 2.708050 0.000000 257 +robert 0 30 3.555348 0.000000 567 +runtim 0 19 4.007333 0.000000 858 +multithread 0 11 4.553877 0.000000 1315 +pronounc 0 7 5.010635 0.000000 1918 +blumoferdb 0 5 5.347108 0.000000 2324 +silk 0 2 6.263398 0.000000 5373 +inthi 0 2 6.263398 0.000000 5509 +cilkcilkcilk 0 1 6.957497 0.000000 12881 +languageand 0 1 6.957497 0.000000 12882 +thecilk 0 1 6.957497 0.000000 12883 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..56706f8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +high 0 130 2.079442 0.000000 101 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +school 0 84 2.484907 0.000000 188 +plan 0 65 2.772589 0.000000 272 +complex 0 64 2.772589 0.000000 269 +thesi 0 57 2.890372 0.000000 327 +theoret 0 39 3.258097 0.000000 446 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +hong 0 14 4.317488 0.000000 1105 +kong 0 9 4.753590 0.000000 1602 +chung 1 7 5.010635 5.010635 1964 +austinaustin 0 7 5.010635 0.000000 1966 +edumi 0 6 5.164786 0.000000 2132 +fish 0 6 5.164786 0.000000 2207 +poon 0 3 5.857933 0.000000 3820 +ckpoon 0 2 6.263398 0.000000 5510 +hungri 0 2 6.263398 0.000000 5511 +keung 1 1 6.957497 6.957497 12884 +poondepart 0 1 6.957497 0.000000 12885 +askvinc 0 1 6.957497 0.000000 12886 +gogan 0 1 6.957497 0.000000 12887 +problemsom 0 1 6.957497 0.000000 12888 +harmonica 0 1 6.957497 0.000000 12889 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..374919b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +link 1 247 1.386294 1.386294 24 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +relat 0 139 1.945910 0.000000 68 +send 0 114 2.197225 0.000000 109 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +member 0 84 2.484907 0.000000 165 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +dept 0 64 2.772589 0.000000 291 +secur 1 30 3.555348 3.555348 577 +chines 0 29 3.583519 0.000000 595 +linux 0 27 3.637586 0.000000 631 +head 0 23 3.806662 0.000000 732 +divis 0 21 3.912023 0.000000 803 +role 0 14 4.317488 0.000000 1101 +hong 0 14 4.317488 0.000000 1105 +usavoic 0 13 4.382027 0.000000 1198 +thedepart 0 11 4.553877 0.000000 1350 +cryptographi 0 9 4.753590 0.000000 1512 +kong 0 9 4.753590 0.000000 1602 +simon 0 8 4.875197 0.000000 1697 +chung 0 7 5.010635 0.000000 1964 +park 0 6 5.164786 0.000000 2218 +mission 0 5 5.347108 0.000000 2465 +nist 0 4 5.568345 0.000000 2973 +church 0 4 5.568345 0.000000 3011 +rivest 0 3 5.857933 0.000000 3248 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +thenetwork 0 2 6.263398 0.000000 5434 +byprof 0 2 6.263398 0.000000 5512 +wongchung 0 1 6.957497 0.000000 12890 +wonglast 0 1 6.957497 0.000000 12891 +labwhich 0 1 6.957497 0.000000 12892 +clearinghous 0 1 6.957497 0.000000 12893 +rbac 0 1 6.957497 0.000000 12894 +ckwong 0 1 6.957497 0.000000 12895 +hyde 0 1 6.957497 0.000000 12896 +baptist 0 1 6.957497 0.000000 12897 +netbsd 0 1 6.957497 0.000000 12898 +freebsd 0 1 6.957497 0.000000 12899 +openbsd 0 1 6.957497 0.000000 12900 +tockwong 0 1 6.957497 0.000000 12901 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..a1e5608a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +austin 1 168 1.791759 1.791759 63 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +techniqu 1 99 2.302585 2.302585 138 +real 0 93 2.397895 0.000000 144 +larg 1 82 2.484907 2.484907 168 +requir 0 81 2.484907 0.000000 167 +build 0 85 2.484907 0.000000 184 +dynam 0 76 2.564949 0.000000 194 +issu 0 78 2.564949 0.000000 211 +appli 0 71 2.639057 0.000000 226 +simul 1 66 2.708050 2.708050 255 +knowledg 1 67 2.708050 2.708050 243 +integr 0 67 2.708050 0.000000 245 +abstract 1 62 2.772589 2.772589 276 +complex 0 64 2.772589 0.000000 269 +result 0 65 2.772589 0.000000 281 +descript 0 64 2.772589 0.000000 271 +taylor 0 63 2.772589 0.000000 287 +automat 1 61 2.833213 2.833213 306 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +particular 0 51 2.995732 0.000000 352 +finger 0 52 2.995732 0.000000 354 +possibl 0 47 3.091042 0.000000 378 +netscap 0 44 3.135494 0.000000 395 +vita 0 38 3.295837 0.000000 473 +focu 0 30 3.555348 0.000000 571 +scale 0 28 3.610918 0.000000 613 +retriev 0 27 3.637586 0.000000 621 +constraint 0 26 3.688879 0.000000 636 +variabl 0 23 3.806662 0.000000 715 +behavior 1 18 4.060443 4.060443 881 +hotlist 0 13 4.382027 0.000000 1199 +qualit 1 11 4.553877 4.553877 1362 +facilit 0 10 4.653960 0.000000 1412 +incomplet 0 9 4.753590 0.000000 1575 +elimin 0 9 4.753590 0.000000 1558 +informationemail 0 9 4.753590 0.000000 1564 +aggreg 1 6 5.164786 5.164786 2219 +irrelev 0 3 5.857933 0.000000 3823 +descriptionof 0 2 6.263398 0.000000 5513 +intract 0 2 6.263398 0.000000 5044 +thiswil 0 2 6.263398 0.000000 4944 +withlarg 0 2 6.263398 0.000000 4926 +followingtechniqu 0 2 6.263398 0.000000 5514 +clanci 1 1 6.957497 6.957497 12902 +clancyresearch 0 1 6.957497 0.000000 12903 +containinga 0 1 6.957497 0.000000 12904 +frequentlyi 0 1 6.957497 0.000000 12905 +incomprehens 0 1 6.957497 0.000000 12906 +simulationto 0 1 6.957497 0.000000 12907 +distinctionsof 0 1 6.957497 0.000000 12908 +whichaddress 0 1 6.957497 0.000000 12909 +abstractiontechniqu 0 1 6.957497 0.000000 12910 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..3a21a650 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +three 0 54 2.944439 0.000000 330 +five 0 19 4.007333 0.000000 841 +four 0 18 4.060443 0.000000 905 +eight 0 11 4.553877 0.000000 1331 +seven 0 9 4.753590 0.000000 1561 +nine 0 6 5.164786 0.000000 2047 +eleven 0 3 5.857933 0.000000 3824 +jimbo 0 1 6.957497 0.000000 12911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..f5c59d56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +call 0 91 2.397895 0.000000 153 +taylor 0 63 2.772589 0.000000 287 +januari 0 62 2.772589 0.000000 264 +author 0 39 3.258097 0.000000 450 +china 0 37 3.332205 0.000000 487 +photo 0 31 3.496508 0.000000 561 +chines 1 29 3.583519 3.583519 595 +campu 0 27 3.637586 0.000000 623 +edulast 0 17 4.110874 0.000000 927 +hong 0 14 4.317488 0.000000 1105 +chri 1 11 4.553877 4.553877 1311 +fellowship 0 10 4.653960 0.000000 1460 +kong 0 9 4.753590 0.000000 1602 +christian 0 7 5.010635 0.000000 1949 +church 0 4 5.568345 0.000000 3011 +chuwelcom 0 1 6.957497 0.000000 12912 +myselfmi 0 1 6.957497 0.000000 12913 +chuemail 0 1 6.957497 0.000000 12914 +cnchu 0 1 6.957497 0.000000 12915 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..4872eb58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,180 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +parallel 2 169 1.791759 3.583518 60 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +texa 0 160 1.791759 0.000000 64 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +postscript 1 131 2.079442 2.079442 90 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +code 3 108 2.197225 6.591675 116 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +specif 0 106 2.197225 0.000000 106 +manag 0 114 2.197225 0.000000 125 +user 1 104 2.302585 2.302585 137 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +member 1 84 2.484907 2.484907 165 +journal 0 83 2.484907 0.000000 183 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +interfac 1 79 2.564949 2.564949 209 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +dynam 0 76 2.564949 0.000000 194 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +previou 1 62 2.772589 2.772589 290 +written 0 63 2.772589 0.000000 278 +improv 0 62 2.772589 0.000000 289 +januari 0 62 2.772589 0.000000 264 +automat 0 61 2.833213 0.000000 306 +direct 0 57 2.890372 0.000000 316 +major 0 56 2.890372 0.000000 315 +browser 0 56 2.890372 0.000000 313 +publish 0 57 2.890372 0.000000 326 +faculti 0 56 2.890372 0.000000 325 +overview 0 56 2.890372 0.000000 323 +allow 0 53 2.944439 0.000000 333 +visual 1 48 3.044522 3.044522 372 +still 0 50 3.044522 0.000000 362 +featur 1 46 3.091042 3.091042 386 +directori 0 45 3.135494 0.000000 396 +made 0 44 3.135494 0.000000 398 +edit 0 42 3.218876 0.000000 418 +tutori 1 39 3.258097 3.258097 437 +announc 1 40 3.258097 3.258097 441 +multipl 0 39 3.258097 0.000000 453 +join 0 39 3.258097 0.000000 457 +form 0 39 3.258097 0.000000 443 +connect 0 37 3.332205 0.000000 485 +download 1 36 3.367296 3.367296 489 +manual 1 35 3.401197 3.401197 504 +singl 0 34 3.401197 0.000000 510 +jame 0 35 3.401197 0.000000 507 +articl 1 33 3.433987 3.433987 530 +john 1 33 3.433987 3.433987 532 +independ 0 32 3.465736 0.000000 548 +kind 0 32 3.465736 0.000000 541 +ad 0 32 3.465736 0.000000 544 +produc 1 30 3.555348 3.555348 572 +graph 0 30 3.555348 0.000000 576 +compon 0 30 3.555348 0.000000 570 +releas 1 28 3.610918 3.610918 616 +quit 0 27 3.637586 0.000000 633 +repres 0 26 3.688879 0.000000 656 +revis 0 26 3.688879 0.000000 640 +constraint 0 26 3.688879 0.000000 636 +flow 0 24 3.761200 0.000000 700 +compress 0 23 3.806662 0.000000 719 +sequenti 0 22 3.850148 0.000000 745 +varieti 0 22 3.850148 0.000000 740 +hierarchi 0 22 3.850148 0.000000 744 +alumni 0 21 3.912023 0.000000 807 +prepar 0 20 3.951244 0.000000 824 +brown 0 16 4.174387 0.000000 977 +partit 0 16 4.174387 0.000000 984 +enough 0 15 4.248495 0.000000 1040 +affili 0 13 4.382027 0.000000 1194 +incorpor 0 13 4.382027 0.000000 1163 +instanc 0 11 4.553877 0.000000 1322 +node 0 11 4.553877 0.000000 1326 +fill 0 11 4.553877 0.000000 1349 +regard 0 11 4.553877 0.000000 1309 +screen 1 9 4.753590 4.753590 1577 +compos 0 9 4.753590 0.000000 1527 +entitl 0 9 4.753590 0.000000 1490 +shot 1 7 5.010635 5.010635 1898 +notifi 0 6 5.164786 0.000000 2106 +banerje 0 6 5.164786 0.000000 2018 +parallelprogram 0 5 5.347108 0.000000 2379 +stage 0 5 5.347108 0.000000 2488 +despit 0 5 5.347108 0.000000 2317 +alfr 0 4 5.568345 0.000000 2882 +lord 0 4 5.568345 0.000000 2906 +crai 0 4 5.568345 0.000000 3012 +prospect 0 4 5.568345 0.000000 3013 +snail 0 4 5.568345 0.000000 2916 +werth 0 4 5.568345 0.000000 3004 +preced 0 3 5.857933 0.000000 3107 +sophist 0 3 5.857933 0.000000 3545 +easier 0 3 5.857933 0.000000 3470 +pleasant 0 3 5.857933 0.000000 3825 +informationfor 0 3 5.857933 0.000000 3738 +berger 0 3 5.857933 0.000000 3702 +dwip 0 3 5.857933 0.000000 3197 +emeri 1 2 6.263398 6.263398 5515 +wilder 0 2 6.263398 0.000000 5516 +symmetri 0 2 6.263398 0.000000 5517 +newest 0 2 6.263398 0.000000 5518 +reproduc 0 2 6.263398 0.000000 5519 +publicationscod 0 2 6.263398 0.000000 5520 +ajita 0 2 6.263398 0.000000 5461 +systemmast 0 1 6.957497 0.000000 12916 +lawless 0 1 6.957497 0.000000 12917 +codeless 0 1 6.957497 0.000000 12918 +myriad 0 1 6.957497 0.000000 12919 +tennysoncod 0 1 6.957497 0.000000 12920 +sequentialprogram 0 1 6.957497 0.000000 12921 +wheredata 0 1 6.957497 0.000000 12922 +arc 0 1 6.957497 0.000000 12923 +thesequenti 0 1 6.957497 0.000000 12924 +sequent 0 1 6.957497 0.000000 12925 +smp 0 1 6.957497 0.000000 12926 +macdraw 0 1 6.957497 0.000000 12927 +subgraph 0 1 6.957497 0.000000 12928 +hpcwire 0 1 6.957497 0.000000 12929 +backend 0 1 6.957497 0.000000 12930 +xcodelib 0 1 6.957497 0.000000 12931 +lieu 0 1 6.957497 0.000000 12932 +groupgroup 0 1 6.957497 0.000000 12933 +leaderprofessor 0 1 6.957497 0.000000 12934 +bergerstud 0 1 6.957497 0.000000 12935 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..94bf3d2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +mail 1 238 1.386294 1.386294 22 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +base 0 165 1.791759 0.000000 50 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +addit 0 74 2.639057 0.000000 228 +knowledg 0 67 2.708050 0.000000 243 +taylor 0 63 2.772589 0.000000 287 +tech 0 35 3.401197 0.000000 515 +steve 0 29 3.583519 0.000000 594 +hotlist 0 13 4.382027 0.000000 1199 +correl 1 5 5.347108 5.347108 2279 +multifunct 0 3 5.857933 0.000000 3826 +correlstev 0 1 6.957497 0.000000 12936 +correlresearchph 0 1 6.957497 0.000000 12937 +reportcontact 0 1 6.957497 0.000000 12938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..6950b75d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +hall 0 146 1.945910 0.000000 65 +check 1 115 2.197225 2.197225 118 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +real 1 93 2.397895 2.397895 144 +control 0 82 2.484907 0.000000 164 +method 0 80 2.564949 0.000000 213 +written 0 63 2.772589 0.000000 278 +taylor 0 63 2.772589 0.000000 287 +origin 0 38 3.295837 0.000000 472 +respons 0 37 3.332205 0.000000 476 +formal 0 37 3.332205 0.000000 478 +robot 1 36 3.367296 3.367296 497 +synchron 0 29 3.583519 0.000000 588 +packag 0 28 3.610918 0.000000 614 +utc 0 27 3.637586 0.000000 629 +linux 0 27 3.637586 0.000000 631 +properti 0 22 3.850148 0.000000 749 +inth 0 22 3.850148 0.000000 741 +half 0 21 3.912023 0.000000 776 +latest 0 21 3.912023 0.000000 785 +wrote 0 20 3.951244 0.000000 830 +lot 0 18 4.060443 0.000000 889 +devic 0 16 4.174387 0.000000 1002 +driver 1 8 4.875197 4.875197 1657 +carlo 0 5 5.347108 0.000000 2515 +tempest 0 5 5.347108 0.000000 2548 +theth 0 5 5.347108 0.000000 2325 +toolset 0 4 5.568345 0.000000 3014 +austindepart 0 4 5.568345 0.000000 3008 +provinc 0 4 5.568345 0.000000 3009 +reactiv 0 3 5.857933 0.000000 3575 +publicationsi 0 3 5.857933 0.000000 3827 +softwareth 0 3 5.857933 0.000000 3552 +sciencesaustin 0 3 5.857933 0.000000 3828 +grabber 0 2 6.263398 0.000000 5521 +spain 0 2 6.263398 0.000000 5522 +esterel 1 1 6.957497 6.957497 12939 +pucholcarlo 0 1 6.957497 0.000000 12940 +pucholresearch 0 1 6.957497 0.000000 12941 +mawl 0 1 6.957497 0.000000 12942 +forbrows 0 1 6.957497 0.000000 12943 +verifyingsafeti 0 1 6.957497 0.000000 12944 +thequantavisionfram 0 1 6.957497 0.000000 12945 +thejoystickdevic 0 1 6.957497 0.000000 12946 +informationoffic 0 1 6.957497 0.000000 12947 +dreal 0 1 6.957497 0.000000 12948 +phun 0 1 6.957497 0.000000 12949 +interestsmemb 0 1 6.957497 0.000000 12950 +interesti 0 1 6.957497 0.000000 12951 +fromgandia 0 1 6.957497 0.000000 12952 +valencia 0 1 6.957497 0.000000 12953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..a90113a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +time 2 293 1.098612 2.197224 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +real 2 93 2.397895 4.795790 144 +follow 0 92 2.397895 0.000000 143 +build 0 85 2.484907 0.000000 184 +member 0 84 2.484907 0.000000 165 +logic 0 71 2.639057 0.000000 230 +simul 0 66 2.708050 0.000000 255 +foundat 1 62 2.772589 2.772589 286 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +past 0 42 3.218876 0.000000 428 +editor 0 41 3.218876 0.000000 433 +theoret 0 39 3.258097 0.000000 446 +paul 0 38 3.295837 0.000000 471 +ofth 0 36 3.367296 0.000000 491 +utc 0 27 3.637586 0.000000 629 +constraint 0 26 3.688879 0.000000 636 +toward 0 25 3.737670 0.000000 668 +head 0 23 3.806662 0.000000 732 +properti 1 22 3.850148 3.850148 749 +wang 1 21 3.912023 3.912023 790 +chen 0 21 3.912023 0.000000 791 +verif 0 20 3.951244 0.000000 826 +synthesi 0 20 3.951244 0.000000 834 +precis 0 15 4.248495 0.000000 1023 +verifi 0 12 4.465908 0.000000 1261 +establish 0 9 4.753590 0.000000 1532 +doug 0 9 4.753590 0.000000 1517 +formul 0 8 4.875197 0.000000 1733 +canb 0 7 5.010635 0.000000 1846 +chung 0 7 5.010635 0.000000 1964 +groupth 0 5 5.347108 0.000000 2549 +carlo 0 5 5.347108 0.000000 2515 +firm 0 4 5.568345 0.000000 2684 +systemsand 0 4 5.568345 0.000000 2804 +toolset 0 4 5.568345 0.000000 3014 +aloysiu 0 3 5.857933 0.000000 3829 +lai 0 3 5.857933 0.000000 3694 +categor 0 3 5.857933 0.000000 3765 +stuart 0 3 5.857933 0.000000 3584 +guangtian 0 3 5.857933 0.000000 3810 +byprof 0 2 6.263398 0.000000 5512 +stringent 0 2 6.263398 0.000000 5523 +scenario 0 2 6.263398 0.000000 5524 +availableonlin 0 2 6.263398 0.000000 4929 +deji 0 2 6.263398 0.000000 5498 +tsou 0 2 6.263398 0.000000 5525 +clement 0 2 6.263398 0.000000 5526 +modechart 1 1 6.957497 6.957497 12954 +groundworkfor 0 1 6.957497 0.000000 12955 +enforc 0 1 6.957497 0.000000 12956 +timetool 0 1 6.957497 0.000000 12957 +languagepublicationsabstract 0 1 6.957497 0.000000 12958 +puchol 0 1 6.957497 0.000000 12959 +yangalumni 0 1 6.957497 0.000000 12960 +chih 0 1 6.957497 0.000000 12961 +farn 0 1 6.957497 0.000000 12962 +supoj 0 1 6.957497 0.000000 12963 +suthandavibul 0 1 6.957497 0.000000 12964 +farnam 0 1 6.957497 0.000000 12965 +jahanian 0 1 6.957497 0.000000 12966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..c671b743 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +also 1 259 1.386294 1.386294 28 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +machin 1 129 2.079442 2.079442 95 +spring 0 131 2.079442 0.000000 88 +specif 0 106 2.197225 0.000000 106 +associ 1 93 2.397895 2.397895 151 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +learn 1 86 2.484907 2.484907 170 +resourc 1 81 2.484907 2.484907 172 +internet 1 83 2.484907 2.484907 186 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +educ 0 86 2.484907 0.000000 191 +start 0 83 2.484907 0.000000 173 +journal 0 83 2.484907 0.000000 183 +stuff 0 87 2.484907 0.000000 171 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +intellig 1 72 2.639057 2.639057 225 +would 0 67 2.708050 0.000000 251 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +collect 1 65 2.772589 2.772589 268 +taylor 0 63 2.772589 0.000000 287 +laboratori 0 63 2.772589 0.000000 292 +evalu 0 64 2.772589 0.000000 266 +thesi 0 57 2.890372 0.000000 327 +semest 0 58 2.890372 0.000000 312 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +robot 1 36 3.367296 3.367296 497 +bibliographi 0 34 3.401197 0.000000 518 +tech 0 35 3.401197 0.000000 515 +produc 0 30 3.555348 0.000000 572 +postal 0 30 3.555348 0.000000 580 +particip 0 29 3.583519 0.000000 589 +propos 0 28 3.610918 0.000000 602 +rule 0 26 3.688879 0.000000 638 +task 0 25 3.737670 0.000000 678 +input 0 23 3.806662 0.000000 727 +mobil 0 23 3.806662 0.000000 730 +miscellan 0 23 3.806662 0.000000 731 +increas 0 20 3.951244 0.000000 829 +wrote 0 20 3.951244 0.000000 830 +expert 0 20 3.951244 0.000000 833 +north 0 19 4.007333 0.000000 873 +agent 0 18 4.060443 0.000000 910 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +women 1 16 4.174387 4.174387 1004 +across 0 16 4.174387 0.000000 974 +cognit 0 16 4.174387 0.000000 986 +researchmi 0 14 4.317488 0.000000 1119 +hotlist 0 13 4.382027 0.000000 1199 +misc 0 13 4.382027 0.000000 1124 +acquisit 1 10 4.653960 4.653960 1465 +sentenc 0 10 4.653960 0.000000 1413 +consortium 0 10 4.653960 0.000000 1467 +suitabl 0 9 4.753590 0.000000 1486 +linguist 0 9 4.753590 0.000000 1593 +folk 0 9 4.753590 0.000000 1597 +awar 0 7 5.010635 0.000000 1800 +carolina 0 6 5.164786 0.000000 2142 +truth 0 6 5.164786 0.000000 2179 +interestedin 0 5 5.347108 0.000000 2260 +corpu 0 5 5.347108 0.000000 2282 +cindi 0 3 5.857933 0.000000 3830 +groupunivers 0 3 5.857933 0.000000 3831 +primarilyin 0 3 5.857933 0.000000 3832 +diagnost 0 3 5.857933 0.000000 3833 +georgia 0 3 5.857933 0.000000 3834 +horizon 0 3 5.857933 0.000000 3746 +austini 0 2 6.263398 0.000000 5527 +deep 0 2 6.263398 0.000000 5528 +exhibit 0 2 6.263398 0.000000 5529 +cthomp 0 2 6.263398 0.000000 5530 +lexic 1 1 6.957497 6.957497 12967 +thompsoncindi 0 1 6.957497 0.000000 12968 +thompsonmachin 0 1 6.957497 0.000000 12969 +candlelight 0 1 6.957497 0.000000 12970 +vigil 0 1 6.957497 0.000000 12971 +internetto 0 1 6.957497 0.000000 12972 +violenc 0 1 6.957497 0.000000 12973 +semanticrepresent 0 1 6.957497 0.000000 12974 +atrobofest 0 1 6.957497 0.000000 12975 +wolv 0 1 6.957497 0.000000 12976 +counsel 0 1 6.957497 0.000000 12977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..e9b74758 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +utexa 0 189 1.609438 0.000000 44 +support 0 132 1.945910 0.000000 83 +welcom 0 122 2.079442 0.000000 99 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +browser 0 56 2.890372 0.000000 313 +friend 0 48 3.044522 0.000000 376 +netscap 0 44 3.135494 0.000000 395 +frame 0 24 3.761200 0.000000 684 +famili 0 23 3.806662 0.000000 735 +latest 0 21 3.912023 0.000000 785 +seem 0 18 4.060443 0.000000 899 +doesn 0 15 4.248495 0.000000 1055 +xingshan 1 1 6.957497 6.957497 12978 +downloadth 0 1 6.957497 0.000000 12979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..9fb1d852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +oper 1 180 1.609438 1.609438 34 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +hall 1 146 1.945910 1.945910 65 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +technolog 0 131 2.079442 0.000000 102 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +memori 0 101 2.302585 0.000000 139 +internet 0 83 2.484907 0.000000 186 +want 0 79 2.564949 0.000000 199 +taylor 1 63 2.772589 2.772589 287 +probabl 0 40 3.258097 0.000000 455 +electr 0 38 3.295837 0.000000 461 +postal 0 30 3.555348 0.000000 580 +berkelei 1 26 3.688879 3.688879 657 +experiment 0 26 3.688879 0.000000 645 +mike 1 24 3.761200 3.761200 703 +disk 0 22 3.850148 0.000000 747 +less 0 18 4.060443 0.000000 892 +seem 0 18 4.060443 0.000000 899 +classic 0 14 4.317488 0.000000 1084 +rice 0 11 4.553877 0.000000 1336 +operatingsystem 0 10 4.653960 0.000000 1401 +architect 0 8 4.875197 0.000000 1624 +gather 0 8 4.875197 0.000000 1719 +capac 0 8 4.875197 0.000000 1740 +root 0 8 4.875197 0.000000 1650 +trend 1 7 5.010635 5.010635 1842 +bore 0 7 5.010635 0.000000 1948 +austinaustin 0 7 5.010635 0.000000 1966 +price 1 6 5.164786 5.164786 1999 +pagethi 0 5 5.347108 0.000000 2336 +serverless 0 3 5.857933 0.000000 3181 +systemsth 0 3 5.857933 0.000000 3835 +informationtechnolog 0 3 5.857933 0.000000 3836 +informationassist 0 2 6.263398 0.000000 5531 +teachingfal 0 2 6.263398 0.000000 5532 +systemsspr 0 2 6.263398 0.000000 4762 +dahlin 1 1 6.957497 6.957497 12980 +dahlingener 0 1 6.957497 0.000000 12981 +architectureeveryon 0 1 6.957497 0.000000 12982 +researchxf 0 1 6.957497 0.000000 12983 +systemweb 0 1 6.957497 0.000000 12984 +pagesummar 0 1 6.957497 0.000000 12985 +compter 0 1 6.957497 0.000000 12986 +includinghistor 0 1 6.957497 0.000000 12987 +informationif 0 1 6.957497 0.000000 12988 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..9cb4c2b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +distribut 1 162 1.791759 1.791759 51 +read 0 154 1.791759 0.000000 47 +parallel 0 169 1.791759 0.000000 60 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +activ 0 84 2.484907 0.000000 182 +servic 0 72 2.639057 0.000000 236 +laboratori 0 63 2.772589 0.000000 292 +dept 0 64 2.772589 0.000000 291 +probabl 0 40 3.258097 0.000000 455 +feel 0 37 3.332205 0.000000 483 +honor 0 23 3.806662 0.000000 729 +busi 0 21 3.912023 0.000000 784 +anyth 0 16 4.174387 0.000000 998 +regularli 0 11 4.553877 0.000000 1338 +tradit 0 10 4.653960 0.000000 1404 +meant 0 6 5.164786 0.000000 2055 +phrase 0 5 5.347108 0.000000 2242 +suffic 0 4 5.568345 0.000000 2869 +crazi 0 4 5.568345 0.000000 2822 +sytem 0 4 5.568345 0.000000 3015 +vijai 0 4 5.568345 0.000000 2960 +mehom 0 4 5.568345 0.000000 2979 +lazi 0 2 6.263398 0.000000 4527 +appeal 0 2 6.263398 0.000000 4186 +garg 0 2 6.263398 0.000000 5533 +damani 1 1 6.957497 6.957497 12989 +howdi 0 1 6.957497 0.000000 12990 +pagal 0 1 6.957497 0.000000 12991 +dekho 0 1 6.957497 0.000000 12992 +updateth 0 1 6.957497 0.000000 12993 +oblig 0 1 6.957497 0.000000 12994 +providesometh 0 1 6.957497 0.000000 12995 +guadulp 0 1 6.957497 0.000000 12996 +austinphon 0 1 6.957497 0.000000 12997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..47430da9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +area 0 144 1.945910 0.000000 80 +machin 0 129 2.079442 0.000000 95 +make 0 111 2.197225 0.000000 120 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +school 0 84 2.484907 0.000000 188 +appli 0 71 2.639057 0.000000 226 +view 1 70 2.708050 2.708050 254 +main 0 67 2.708050 0.000000 256 +complex 0 64 2.772589 0.000000 269 +laboratori 0 63 2.772589 0.000000 292 +autom 0 41 3.218876 0.000000 434 +tree 0 36 3.367296 0.000000 492 +global 0 34 3.401197 0.000000 520 +equat 0 23 3.806662 0.000000 724 +nice 0 20 3.951244 0.000000 809 +render 0 17 4.110874 0.000000 947 +happi 0 14 4.317488 0.000000 1079 +station 0 13 4.382027 0.000000 1157 +mainten 0 9 4.753590 0.000000 1543 +plane 0 6 5.164786 0.000000 2187 +multiresolut 0 5 5.347108 0.000000 2423 +escap 0 4 5.568345 0.000000 3016 +unrel 0 3 5.857933 0.000000 3837 +dane 1 2 6.263398 6.263398 5534 +pinbal 1 2 6.263398 6.263398 5508 +marshal 0 2 6.263398 0.000000 4118 +illumin 0 2 6.263398 0.000000 4819 +probe 0 2 6.263398 0.000000 5535 +marshalldan 0 1 6.957497 0.000000 12998 +electromechan 0 1 6.957497 0.000000 12999 +thelogist 0 1 6.957497 0.000000 13000 +attractor 0 1 6.957497 0.000000 13001 +burnet 0 1 6.957497 0.000000 13002 +pastur 0 1 6.957497 0.000000 13003 +jupit 0 1 6.957497 0.000000 13004 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..fcf70920 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +provid 1 121 2.079442 2.079442 94 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +well 1 109 2.197225 2.197225 121 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +access 0 102 2.302585 0.000000 136 +know 0 80 2.564949 0.000000 198 +good 0 77 2.564949 0.000000 200 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +archiv 0 49 3.044522 0.000000 364 +keep 0 44 3.135494 0.000000 409 +video 0 44 3.135494 0.000000 405 +go 0 33 3.433987 0.000000 529 +idea 0 32 3.465736 0.000000 545 +someth 0 31 3.496508 0.000000 554 +sort 0 22 3.850148 0.000000 738 +sure 1 20 3.951244 3.951244 813 +easi 0 16 4.174387 0.000000 969 +save 1 14 4.317488 4.317488 1099 +manner 0 14 4.317488 0.000000 1074 +pagewelcom 0 11 4.553877 0.000000 1344 +guess 0 10 4.653960 0.000000 1443 +doug 0 9 4.753590 0.000000 1517 +perhap 0 8 4.875197 0.000000 1693 +fiction 0 6 5.164786 0.000000 2217 +latexhtml 0 5 5.347108 0.000000 2347 +orlean 0 5 5.347108 0.000000 2550 +bear 0 4 5.568345 0.000000 2651 +stuart 1 3 5.857933 5.857933 3584 +justa 0 2 6.263398 0.000000 5326 +pagedoug 0 1 6.957497 0.000000 13005 +oflinksto 0 1 6.957497 0.000000 13006 +aboutsport 0 1 6.957497 0.000000 13007 +booksin 0 1 6.957497 0.000000 13008 +fewjok 0 1 6.957497 0.000000 13009 +testof 0 1 6.957497 0.000000 13010 +aweath 0 1 6.957497 0.000000 13011 +mapandcondit 0 1 6.957497 0.000000 13012 +austinandnew 0 1 6.957497 0.000000 13013 +amgraci 0 1 6.957497 0.000000 13014 +puttingit 0 1 6.957497 0.000000 13015 +webbrows 0 1 6.957497 0.000000 13016 +thisi 0 1 6.957497 0.000000 13017 +stuffmom 0 1 6.957497 0.000000 13018 +calendarlink 0 1 6.957497 0.000000 13019 +fictionbooksjokessportsfoodvideout 0 1 6.957497 0.000000 13020 +libraryresumelast 0 1 6.957497 0.000000 13021 +dasdastuart 0 1 6.957497 0.000000 13022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..fb5ea75d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +doug 0 9 4.753590 0.000000 1517 +swhich 0 1 6.957497 0.000000 13023 +annoi 0 1 6.957497 0.000000 13024 +thisorthi 0 1 6.957497 0.000000 13025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..d9e1eaa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +link 1 247 1.386294 1.386294 24 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +taylor 0 63 2.772589 0.000000 287 +local 0 55 2.944439 0.000000 334 +archiv 0 49 3.044522 0.000000 364 +robot 0 36 3.367296 0.000000 497 +neural 1 30 3.555348 3.555348 578 +postal 0 30 3.555348 0.000000 580 +art 0 29 3.583519 0.000000 593 +utc 1 27 3.637586 3.637586 629 +symbol 0 27 3.637586 0.000000 620 +fine 0 20 3.951244 0.000000 822 +agent 0 18 4.060443 0.000000 910 +universityof 1 15 4.248495 4.248495 1061 +evolv 0 12 4.465908 0.000000 1223 +literatur 0 11 4.553877 0.000000 1300 +michigan 0 11 4.553877 0.000000 1368 +genet 1 10 4.653960 4.653960 1409 +santa 0 10 4.653960 0.000000 1441 +ground 0 7 5.010635 0.000000 1955 +digest 0 7 5.010635 0.000000 1864 +theus 0 4 5.568345 0.000000 2992 +spanish 0 4 5.568345 0.000000 3017 +intereststh 0 3 5.857933 0.000000 3838 +dian 0 2 6.263398 0.000000 5536 +lawdian 0 1 6.957497 0.000000 13026 +problemnavig 0 1 6.957497 0.000000 13027 +washingtonst 0 1 6.957497 0.000000 13028 +stateunivers 0 1 6.957497 0.000000 13029 +dianelaw 0 1 6.957497 0.000000 13030 +gann 0 1 6.957497 0.000000 13031 +illig 0 1 6.957497 0.000000 13032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..189df085 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +resourc 0 81 2.484907 0.000000 172 +homework 0 79 2.564949 0.000000 193 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +undergradu 0 54 2.944439 0.000000 338 +mine 0 26 3.688879 0.000000 654 +alwai 0 24 3.761200 0.000000 691 +unit 0 21 3.912023 0.000000 779 +monitor 0 17 4.110874 0.000000 941 +athlet 0 7 5.010635 0.000000 1933 +greec 1 6 5.164786 5.164786 2208 +informat 0 3 5.857933 0.000000 3839 +patra 1 2 6.263398 6.263398 5537 +reasearch 0 2 6.263398 0.000000 5538 +dionisi 1 1 6.957497 6.957497 13033 +papadopoulosdionisi 0 1 6.957497 0.000000 13034 +papadopoulosabout 0 1 6.957497 0.000000 13035 +medionisi 0 1 6.957497 0.000000 13036 +papadopoulo 0 1 6.957497 0.000000 13037 +panhellen 0 1 6.957497 0.000000 13038 +associationpanathinaiko 0 1 6.957497 0.000000 13039 +clubgreek 0 1 6.957497 0.000000 13040 +newshellen 0 1 6.957497 0.000000 13041 +networkeveryth 0 1 6.957497 0.000000 13042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..f375519f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +public 1 202 1.609438 1.609438 43 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +contact 0 153 1.791759 0.000000 59 +recent 0 167 1.791759 0.000000 58 +construct 1 139 1.945910 1.945910 82 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +problem 0 147 1.945910 0.000000 75 +report 1 131 2.079442 2.079442 92 +high 0 130 2.079442 0.000000 101 +version 2 113 2.197225 4.394450 122 +structur 1 106 2.197225 2.197225 105 +theori 0 111 2.197225 0.000000 127 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +appear 1 78 2.564949 2.564949 210 +sourc 1 77 2.564949 2.564949 201 +complet 1 77 2.564949 2.564949 208 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +simul 1 66 2.708050 2.708050 255 +complex 1 64 2.772589 2.772589 269 +taylor 0 63 2.772589 0.000000 287 +descript 0 64 2.772589 0.000000 271 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +space 0 57 2.890372 0.000000 310 +sampl 0 53 2.944439 0.000000 339 +cover 0 55 2.944439 0.000000 329 +local 0 55 2.944439 0.000000 334 +finger 0 52 2.995732 0.000000 354 +linear 0 41 3.218876 0.000000 431 +annual 0 40 3.258097 0.000000 458 +multipl 0 39 3.258097 0.000000 453 +small 0 39 3.258097 0.000000 447 +correct 0 38 3.295837 0.000000 462 +random 2 34 3.401197 6.802394 511 +product 0 33 3.433987 0.000000 527 +graph 1 30 3.555348 3.555348 576 +postal 0 30 3.555348 0.000000 580 +load 0 28 3.610918 0.000000 601 +utc 0 27 3.637586 0.000000 629 +revis 1 26 3.688879 3.688879 640 +bound 1 26 3.688879 3.688879 659 +lower 0 18 4.060443 0.000000 886 +expand 1 17 4.110874 4.110874 928 +role 0 14 4.317488 0.000000 1101 +balanc 0 14 4.317488 0.000000 1112 +weak 1 13 4.382027 4.382027 1159 +walk 0 12 4.465908 0.000000 1281 +paragraph 0 10 4.653960 0.000000 1449 +preliminari 1 9 4.753590 4.753590 1480 +cryptographi 0 9 4.753590 0.000000 1512 +leader 0 9 4.753590 0.000000 1576 +explicit 0 9 4.753590 0.000000 1525 +insert 0 8 4.875197 0.000000 1687 +elect 0 8 4.875197 0.000000 1771 +analys 0 8 4.875197 0.000000 1666 +combinatori 0 8 4.875197 0.000000 1629 +foc 1 7 5.010635 5.010635 1880 +hit 0 7 5.010635 0.000000 1965 +dimens 0 7 5.010635 0.000000 1930 +soda 0 6 5.164786 0.000000 2189 +determinist 0 6 5.164786 0.000000 2034 +stoc 1 5 5.347108 5.347108 2491 +mutual 0 5 5.347108 0.000000 2418 +asymptot 0 4 5.568345 0.000000 2676 +delet 0 4 5.568345 0.000000 2691 +exclus 0 4 5.568345 0.000000 2947 +combinatorica 1 3 5.857933 5.857933 3649 +intereststh 0 3 5.857933 0.000000 3838 +algorithmica 0 3 5.857933 0.000000 3561 +beat 0 3 5.857933 0.000000 3840 +eigenvalu 0 3 5.857933 0.000000 3364 +sicomp 1 1 6.957497 6.957497 13043 +zuckermandavid 0 1 6.957497 0.000000 13044 +zuckermanassist 0 1 6.957497 0.000000 13045 +cryptographyresearch 0 1 6.957497 0.000000 13046 +myprofil 0 1 6.957497 0.000000 13047 +transposit 0 1 6.957497 0.000000 13048 +extractor 0 1 6.957497 0.000000 13049 +jcss 0 1 6.957497 0.000000 13050 +logspac 0 1 6.957497 0.000000 13051 +tight 0 1 6.957497 0.000000 13052 +derandom 0 1 6.957497 0.000000 13053 +constructionand 0 1 6.957497 0.000000 13054 +setfor 0 1 6.957497 0.000000 13055 +rectangl 0 1 6.957497 0.000000 13056 +unapproxim 0 1 6.957497 0.000000 13057 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..c96ca8a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +design 0 213 1.386294 0.000000 25 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +distribut 1 162 1.791759 1.791759 51 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +report 0 131 2.079442 0.000000 92 +send 0 114 2.197225 0.000000 109 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +wide 0 84 2.484907 0.000000 185 +member 0 84 2.484907 0.000000 165 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +nation 0 74 2.639057 0.000000 240 +multimedia 2 68 2.708050 5.416100 258 +main 0 67 2.708050 0.000000 256 +would 0 67 2.708050 0.000000 251 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +content 0 59 2.833213 0.000000 302 +variou 0 56 2.890372 0.000000 317 +suggest 0 53 2.944439 0.000000 331 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +tabl 0 51 2.995732 0.000000 346 +protocol 0 45 3.135494 0.000000 407 +video 0 44 3.135494 0.000000 405 +industri 0 38 3.295837 0.000000 464 +microsoft 0 38 3.295837 0.000000 468 +electr 0 38 3.295837 0.000000 461 +storag 0 31 3.496508 0.000000 553 +focus 0 29 3.583519 0.000000 584 +relev 0 26 3.688879 0.000000 637 +intel 0 16 4.174387 0.000000 1000 +audio 0 14 4.317488 0.000000 1094 +carri 0 13 4.382027 0.000000 1152 +nasa 0 13 4.382027 0.000000 1188 +departmentof 0 9 4.753590 0.000000 1539 +transport 0 8 4.875197 0.000000 1672 +hear 0 7 5.010635 0.000000 1940 +sponsor 1 6 5.164786 5.164786 2133 +multimediacomput 1 3 5.857933 5.857933 3841 +mitsubishi 0 3 5.857933 0.000000 3842 +merl 0 3 5.857933 0.000000 3843 +rangeof 0 2 6.263398 0.000000 4076 +federalinstitut 0 2 6.263398 0.000000 5539 +agenda 0 2 6.263398 0.000000 5037 +currentresearch 0 1 6.957497 0.000000 13058 +andmultiresolut 0 1 6.957497 0.000000 13059 +dmcl 0 1 6.957497 0.000000 13060 +microsystemsinc 0 1 6.957497 0.000000 13061 +yourcom 0 1 6.957497 0.000000 13062 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..2a822c8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +specif 1 106 2.197225 2.197225 106 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +build 1 85 2.484907 2.484907 184 +larg 0 82 2.484907 0.000000 168 +orient 0 80 2.564949 0.000000 205 +appli 0 71 2.639057 0.000000 226 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +improv 0 62 2.772589 0.000000 289 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +extens 1 53 2.944439 2.944439 340 +investig 0 51 2.995732 0.000000 353 +programm 0 39 3.258097 0.000000 445 +microsoft 0 38 3.295837 0.000000 468 +cost 0 37 3.332205 0.000000 480 +product 0 33 3.433987 0.000000 527 +domain 1 30 3.555348 3.555348 564 +compon 0 30 3.555348 0.000000 570 +postal 0 30 3.555348 0.000000 580 +scale 0 28 3.610918 0.000000 613 +framework 0 28 3.610918 0.000000 606 +enhanc 0 26 3.688879 0.000000 644 +subject 0 26 3.688879 0.000000 647 +wai 0 25 3.737670 0.000000 662 +pattern 0 24 3.761200 0.000000 689 +methodolog 0 23 3.806662 0.000000 733 +reduc 0 22 3.850148 0.000000 759 +fund 0 21 3.912023 0.000000 805 +synthesi 0 20 3.951244 0.000000 834 +mainten 0 9 4.753590 0.000000 1543 +realiz 0 8 4.875197 0.000000 1739 +span 0 8 4.875197 0.000000 1751 +avion 0 4 5.568345 0.000000 3018 +darpa 0 4 5.568345 0.000000 2944 +batori 0 4 5.568345 0.000000 2690 +preprocessor 0 3 5.857933 0.000000 3844 +parameter 0 2 6.263398 0.000000 5540 +encapsul 0 2 6.263398 0.000000 5541 +jakarta 1 1 6.957497 6.957497 13063 +batorydon 0 1 6.957497 0.000000 13064 +batorysoftwar 0 1 6.957497 0.000000 13065 +pluggabl 0 1 6.957497 0.000000 13066 +schlumberg 0 1 6.957497 0.000000 13067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..f1fd5c78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +process 0 142 1.945910 0.000000 72 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +present 0 91 2.397895 0.000000 145 +info 1 85 2.484907 2.484907 176 +know 0 80 2.564949 0.000000 198 +symposium 0 72 2.639057 0.000000 238 +main 0 67 2.708050 0.000000 256 +favorit 0 44 3.135494 0.000000 410 +methodolog 0 23 3.806662 0.000000 733 +partit 0 16 4.174387 0.000000 984 +photograph 0 15 4.248495 0.000000 1056 +usavoic 1 13 4.382027 4.382027 1198 +road 0 11 4.553877 0.000000 1374 +insert 0 8 4.875197 0.000000 1687 +banerje 0 6 5.164786 0.000000 2018 +theintern 0 4 5.568345 0.000000 2981 +dwip 1 3 5.857933 5.857933 3197 +banerjeeabout 0 1 6.957497 0.000000 13068 +methi 0 1 6.957497 0.000000 13069 +programminggroup 0 1 6.957497 0.000000 13070 +graphicalparallel 0 1 6.957497 0.000000 13071 +departmentpost 0 1 6.957497 0.000000 13072 +homepost 0 1 6.957497 0.000000 13073 +enfield 0 1 6.957497 0.000000 13074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..3c7680ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +make 0 111 2.197225 0.000000 120 +thing 0 84 2.484907 0.000000 189 +come 0 78 2.564949 0.000000 202 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +main 0 67 2.708050 0.000000 256 +previou 0 62 2.772589 0.000000 290 +januari 0 62 2.772589 0.000000 264 +taylor 0 63 2.772589 0.000000 287 +summer 0 56 2.890372 0.000000 311 +think 0 57 2.890372 0.000000 314 +set 0 50 3.044522 0.000000 361 +right 0 48 3.044522 0.000000 363 +anoth 1 45 3.135494 3.135494 408 +third 0 43 3.178054 0.000000 412 +multipl 0 39 3.258097 0.000000 453 +finish 0 22 3.850148 0.000000 748 +break 0 20 3.951244 0.000000 812 +els 0 19 4.007333 0.000000 843 +left 0 19 4.007333 0.000000 851 +item 0 19 4.007333 0.000000 856 +usavoic 0 13 4.382027 0.000000 1198 +paragraph 1 10 4.653960 4.653960 1449 +mepost 0 10 4.653960 0.000000 1472 +deadlin 0 9 4.753590 0.000000 1502 +siggraph 0 8 4.875197 0.000000 1773 +forget 0 8 4.875197 0.000000 1712 +promis 1 6 5.164786 5.164786 2037 +complaint 0 4 5.568345 0.000000 2795 +emilio 1 3 5.857933 5.857933 3683 +bout 0 3 5.857933 0.000000 3670 +credibl 0 3 5.857933 0.000000 3210 +decent 0 2 6.263398 0.000000 5542 +excus 0 2 6.263398 0.000000 4684 +camahort 1 1 6.957497 6.957497 13075 +gurrea 1 1 6.957497 6.957497 13076 +mmmmm 0 1 6.957497 0.000000 13077 +lose 0 1 6.957497 0.000000 13078 +ecamahor 0 1 6.957497 0.000000 13079 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..24d1450c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +support 0 132 1.945910 0.000000 83 +summari 0 73 2.639057 0.000000 237 +view 1 70 2.708050 2.708050 254 +multimedia 0 68 2.708050 0.000000 258 +laboratori 0 63 2.772589 0.000000 292 +greg 0 24 3.761200 0.000000 695 +head 0 23 3.806662 0.000000 732 +supervis 0 20 3.951244 0.000000 840 +eduphon 0 15 4.248495 0.000000 1060 +consortium 0 10 4.653960 0.000000 1467 +harrick 0 7 5.010635 0.000000 1849 +multimediacomput 0 3 5.857933 0.000000 3841 +lavend 0 3 5.857933 0.000000 3217 +posnak 1 1 6.957497 6.957497 13080 +isod 0 1 6.957497 0.000000 13081 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..ce80e31d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +time 1 293 1.098612 1.098612 17 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +theori 0 111 2.197225 0.000000 127 +real 1 93 2.397895 2.397895 144 +journal 1 83 2.484907 2.484907 183 +ieee 0 86 2.484907 0.000000 190 +method 0 80 2.564949 0.000000 213 +logic 1 71 2.639057 2.639057 230 +effici 0 73 2.639057 0.000000 233 +symposium 0 72 2.639057 0.000000 238 +practic 0 70 2.708050 0.000000 246 +taylor 0 63 2.772589 0.000000 287 +foundat 0 62 2.772589 0.000000 286 +direct 0 57 2.890372 0.000000 316 +reason 0 57 2.890372 0.000000 318 +york 0 41 3.218876 0.000000 435 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +theoret 0 39 3.258097 0.000000 446 +formal 0 37 3.332205 0.000000 478 +tree 0 36 3.367296 0.000000 492 +concurr 0 34 3.401197 0.000000 501 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +verif 0 20 3.951244 0.000000 826 +aid 0 18 4.060443 0.000000 904 +cambridg 0 16 4.174387 0.000000 1008 +automata 1 13 4.382027 4.382027 1135 +calculu 1 12 4.465908 4.465908 1203 +tempor 1 9 4.753590 4.753590 1584 +juan 0 9 4.753590 0.000000 1580 +secretari 0 8 4.875197 0.000000 1775 +quantit 0 8 4.875197 0.000000 1654 +satisfi 0 8 4.875197 0.000000 1694 +mass 0 8 4.875197 0.000000 1732 +centenni 0 7 5.010635 0.000000 1967 +foc 0 7 5.010635 0.000000 1880 +srinivasan 1 6 5.164786 5.164786 2175 +handbook 0 6 5.164786 0.000000 2061 +emerson 2 5 5.347108 10.694216 2547 +allen 1 5 5.347108 5.347108 2470 +infinit 0 4 5.568345 0.000000 2596 +elsevi 0 3 5.857933 0.000000 3671 +systemsselect 0 2 6.263398 0.000000 4049 +bakker 0 2 6.263398 0.000000 5337 +leeuwen 0 2 6.263398 0.000000 5543 +emersonbruton 0 1 6.957497 0.000000 13082 +publications 0 1 6.957497 0.000000 13083 +sistla 0 1 6.957497 0.000000 13084 +sadler 0 1 6.957497 0.000000 13085 +jutla 0 1 6.957497 0.000000 13086 +determinaci 0 1 6.957497 0.000000 13087 +modal 0 1 6.957497 0.000000 13088 +amsterdam 0 1 6.957497 0.000000 13089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..207c2b20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +object 0 138 1.945910 0.000000 79 +problem 0 147 1.945910 0.000000 75 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +code 1 108 2.197225 2.197225 116 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +user 0 104 2.302585 0.000000 137 +search 0 95 2.397895 0.000000 155 +select 0 91 2.397895 0.000000 154 +graphic 0 90 2.397895 0.000000 147 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +info 1 85 2.484907 2.484907 176 +academ 0 82 2.484907 0.000000 178 +orient 0 80 2.564949 0.000000 205 +materi 1 75 2.639057 2.639057 221 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +function 1 62 2.772589 2.772589 275 +dept 0 64 2.772589 0.000000 291 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +type 0 61 2.833213 0.000000 296 +unix 0 58 2.890372 0.000000 308 +extens 0 53 2.944439 0.000000 340 +visual 1 48 3.044522 3.044522 372 +describ 0 45 3.135494 0.000000 400 +textbook 0 44 3.135494 0.000000 397 +http 0 41 3.218876 0.000000 420 +join 0 39 3.258097 0.000000 457 +concept 0 32 3.465736 0.000000 537 +framework 0 28 3.610918 0.000000 606 +repres 0 26 3.688879 0.000000 656 +known 0 24 3.761200 0.000000 702 +compress 0 23 3.806662 0.000000 719 +lyco 0 19 4.007333 0.000000 871 +analyz 0 17 4.110874 0.000000 925 +macintosh 0 17 4.110874 0.000000 920 +near 0 14 4.317488 0.000000 1091 +composit 1 13 4.382027 4.382027 1150 +affili 0 13 4.382027 0.000000 1194 +brother 0 13 4.382027 0.000000 1189 +believ 0 13 4.382027 0.000000 1187 +doug 1 9 4.753590 4.753590 1517 +pure 0 8 4.875197 0.000000 1776 +accomplish 0 8 4.875197 0.000000 1755 +mirror 0 6 5.164786 0.000000 2028 +artist 0 6 5.164786 0.000000 2127 +haskel 1 4 5.568345 5.568345 2618 +aspir 0 4 5.568345 0.000000 3019 +berger 1 3 5.857933 5.857933 3702 +add 0 3 5.857933 0.000000 3131 +emeri 1 2 6.263398 6.263398 5515 +groupi 1 2 6.263398 6.263398 5544 +linksth 0 2 6.263398 0.000000 5545 +analyst 1 1 6.957497 6.957497 13090 +ticam 1 1 6.957497 6.957497 13091 +evangelist 1 1 6.957497 6.957497 13092 +pageemeri 0 1 6.957497 0.000000 13093 +randomli 0 1 6.957497 0.000000 13094 +uttr 0 1 6.957497 0.000000 13095 +othermi 0 1 6.957497 0.000000 13096 +youngest 0 1 6.957497 0.000000 13097 +handiwork 0 1 6.957497 0.000000 13098 +systemtexbook 0 1 6.957497 0.000000 13099 +exchangegrac 0 1 6.957497 0.000000 13100 +macaddict 0 1 6.957497 0.000000 13101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..c73768b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +welcom 0 122 2.079442 0.000000 99 +studi 0 120 2.079442 0.000000 91 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +find 0 111 2.197225 0.000000 111 +intern 0 108 2.197225 0.000000 128 +part 0 98 2.302585 0.000000 129 +librari 1 87 2.484907 2.484907 181 +solut 0 82 2.484907 0.000000 162 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +come 0 78 2.564949 0.000000 202 +nation 0 74 2.639057 0.000000 240 +onlin 0 75 2.639057 0.000000 223 +august 0 66 2.708050 0.000000 257 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +visit 0 63 2.772589 0.000000 288 +januari 0 62 2.772589 0.000000 264 +semest 0 58 2.890372 0.000000 312 +date 0 51 2.995732 0.000000 344 +compani 0 41 3.218876 0.000000 423 +small 0 39 3.258097 0.000000 447 +programm 0 39 3.258097 0.000000 445 +tutori 0 39 3.258097 0.000000 437 +industri 0 38 3.295837 0.000000 464 +china 1 37 3.332205 3.332205 487 +chines 0 29 3.583519 0.000000 595 +repres 0 26 3.688879 0.000000 656 +try 0 22 3.850148 0.000000 764 +alumni 0 21 3.912023 0.000000 807 +love 0 21 3.912023 0.000000 804 +nice 0 20 3.951244 0.000000 809 +media 0 19 4.007333 0.000000 861 +attend 0 18 4.060443 0.000000 893 +lot 0 18 4.060443 0.000000 889 +jose 0 16 4.174387 0.000000 976 +club 0 15 4.248495 0.000000 1058 +fortran 0 15 4.248495 0.000000 1027 +eduphon 0 15 4.248495 0.000000 1060 +becam 0 14 4.317488 0.000000 1117 +francisco 0 14 4.317488 0.000000 1095 +opportun 0 13 4.382027 0.000000 1161 +market 0 11 4.553877 0.000000 1361 +branch 0 11 4.553877 0.000000 1318 +enter 0 10 4.653960 0.000000 1454 +telecommun 0 9 4.753590 0.000000 1565 +surpris 0 7 5.010635 0.000000 1828 +capit 0 7 5.010635 0.000000 1957 +vallei 0 7 5.010635 0.000000 1959 +south 0 6 5.164786 0.000000 2167 +deliv 0 6 5.164786 0.000000 2070 +silicon 0 6 5.164786 0.000000 2076 +girl 0 5 5.347108 0.000000 2410 +orlean 0 5 5.347108 0.000000 2550 +houston 0 5 5.347108 0.000000 2460 +planet 0 4 5.568345 0.000000 2912 +immedi 0 3 5.857933 0.000000 3117 +peac 0 3 5.857933 0.000000 3769 +emma 1 2 6.263398 6.263398 5546 +zhongshan 1 2 6.263398 6.263398 5547 +mini 0 2 6.263398 0.000000 5548 +magazinepc 0 2 6.263398 0.000000 5161 +wuabout 0 1 6.957497 0.000000 13102 +myselfhi 0 1 6.957497 0.000000 13103 +inibm 0 1 6.957497 0.000000 13104 +costom 0 1 6.957497 0.000000 13105 +manyalumni 0 1 6.957497 0.000000 13106 +instrumentsinc 0 1 6.957497 0.000000 13107 +computingmanag 0 1 6.957497 0.000000 13108 +informationautomat 0 1 6.957497 0.000000 13109 +baseyahoogalaxi 0 1 6.957497 0.000000 13110 +universityyellow 0 1 6.957497 0.000000 13111 +infoleisur 0 1 6.957497 0.000000 13112 +timenewspagepeopl 0 1 6.957497 0.000000 13113 +dailyartstim 0 1 6.957497 0.000000 13114 +magazinechines 0 1 6.957497 0.000000 13115 +magazinec 0 1 6.957497 0.000000 13116 +antoniosan 0 1 6.957497 0.000000 13117 +franciscomarina 0 1 6.957497 0.000000 13118 +citysan 0 1 6.957497 0.000000 13119 +pointemail 0 1 6.957497 0.000000 13120 +emmawu 0 1 6.957497 0.000000 13121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..36a37889 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +offic 1 299 1.098612 1.098612 13 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +first 1 140 1.945910 1.945910 71 +well 0 109 2.197225 0.000000 121 +main 0 67 2.708050 0.000000 256 +anoth 1 45 3.135494 3.135494 408 +third 0 43 3.178054 0.000000 412 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +break 0 20 3.951244 0.000000 812 +item 0 19 4.007333 0.000000 856 +easi 0 16 4.174387 0.000000 969 +usavoic 0 13 4.382027 0.000000 1198 +paragraph 1 10 4.653960 4.653960 1449 +mepost 0 10 4.653960 0.000000 1472 +forget 0 8 4.875197 0.000000 1712 +emilio 1 3 5.857933 5.857933 3683 +mehi 0 2 6.263398 0.000000 5549 +bullet 0 2 6.263398 0.000000 5500 +remolinaemilio 0 1 6.957497 0.000000 13122 +remolinaabout 0 1 6.957497 0.000000 13123 +eremolin 0 1 6.957497 0.000000 13124 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..d6c4bfc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +locat 0 59 2.833213 0.000000 303 +http 0 41 3.218876 0.000000 420 +reach 0 24 3.761200 0.000000 688 +thank 0 23 3.806662 0.000000 721 +former 0 17 4.110874 0.000000 956 +universityof 0 15 4.248495 0.000000 1061 +sciencesat 0 7 5.010635 0.000000 1968 +levent 1 1 6.957497 6.957497 13125 +sayfasi 0 1 6.957497 0.000000 13126 +erkok 0 1 6.957497 0.000000 13127 +inturkei 0 1 6.957497 0.000000 13128 +ceng 0 1 6.957497 0.000000 13129 +metu 0 1 6.957497 0.000000 13130 +erkokto 0 1 6.957497 0.000000 13131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..88cd123a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +area 0 144 1.945910 0.000000 80 +machin 0 129 2.079442 0.000000 95 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +homepag 0 93 2.397895 0.000000 148 +educ 0 86 2.484907 0.000000 191 +logic 0 71 2.639057 0.000000 230 +reason 1 57 2.890372 2.890372 318 +postal 0 30 3.555348 0.000000 580 +mind 0 27 3.637586 0.000000 632 +voic 0 21 3.912023 0.000000 806 +cognit 0 16 4.174387 0.000000 986 +thedepart 0 11 4.553877 0.000000 1350 +turkei 0 4 5.568345 0.000000 2914 +commonsens 0 4 5.568345 0.000000 2998 +children 0 3 5.857933 0.000000 3767 +monoton 1 2 6.263398 6.263398 5321 +andinform 0 2 6.263398 0.000000 5550 +esra 1 1 6.957497 6.957497 13132 +erdem 1 1 6.957497 6.957497 13133 +bilkent 0 1 6.957497 0.000000 13134 +learninginduct 0 1 6.957497 0.000000 13135 +sciencelearningreason 0 1 6.957497 0.000000 13136 +reasoningknowledg 0 1 6.957497 0.000000 13137 +representationemotionsphilosophi 0 1 6.957497 0.000000 13138 +mindcontact 0 1 6.957497 0.000000 13139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..35a5154c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 0 284 1.098612 0.000000 21 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +perform 0 143 1.945910 0.000000 74 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +thing 0 84 2.484907 0.000000 189 +wide 0 84 2.484907 0.000000 185 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +degre 0 69 2.708050 0.000000 259 +main 0 67 2.708050 0.000000 256 +collect 0 65 2.772589 0.000000 268 +interact 0 62 2.772589 0.000000 270 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +extens 0 53 2.944439 0.000000 340 +life 0 50 3.044522 0.000000 375 +describ 0 45 3.135494 0.000000 400 +random 0 34 3.401197 0.000000 511 +art 0 29 3.583519 0.000000 593 +toward 0 25 3.737670 0.000000 668 +sport 0 25 3.737670 0.000000 683 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +scheme 0 20 3.951244 0.000000 818 +item 0 19 4.007333 0.000000 856 +carl 1 15 4.248495 4.248495 1024 +believ 0 13 4.382027 0.000000 1187 +usavoic 0 13 4.382027 0.000000 1198 +rice 1 11 4.553877 4.553877 1336 +worth 0 11 4.553877 0.000000 1294 +chronicl 0 7 5.010635 0.000000 1952 +athlet 0 7 5.010635 0.000000 1933 +myresum 0 6 5.164786 0.000000 2199 +snow 0 6 5.164786 0.000000 2031 +houston 0 5 5.347108 0.000000 2460 +wasn 0 3 5.857933 0.000000 3800 +dust 0 2 6.263398 0.000000 5551 +syntact 0 2 6.263398 0.000000 5552 +linksth 0 2 6.263398 0.000000 5545 +household 0 2 6.263398 0.000000 4920 +peak 0 2 6.263398 0.000000 5553 +pagestephen 0 1 6.957497 0.000000 13140 +carlpardon 0 1 6.957497 0.000000 13141 +planmi 0 1 6.957497 0.000000 13142 +interestsa 0 1 6.957497 0.000000 13143 +psuedo 0 1 6.957497 0.000000 13144 +dose 0 1 6.957497 0.000000 13145 +pike 0 1 6.957497 0.000000 13146 +bandget 0 1 6.957497 0.000000 13147 +touchpost 0 1 6.957497 0.000000 13148 +esteban 0 1 6.957497 0.000000 13149 +edureturn 0 1 6.957497 0.000000 13150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..3eb2065d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +schedul 0 119 2.079442 0.000000 85 +machin 0 129 2.079442 0.000000 95 +check 0 115 2.197225 0.000000 118 +techniqu 0 99 2.302585 0.000000 138 +learn 1 86 2.484907 2.484907 170 +solut 0 82 2.484907 0.000000 162 +control 0 82 2.484907 0.000000 164 +educ 0 86 2.484907 0.000000 191 +method 0 80 2.564949 0.000000 213 +effici 0 73 2.639057 0.000000 233 +knowledg 0 67 2.708050 0.000000 243 +improv 1 62 2.772589 2.772589 289 +plan 0 65 2.772589 0.000000 272 +descript 0 64 2.772589 0.000000 271 +taylor 0 63 2.772589 0.000000 287 +detail 0 57 2.890372 0.000000 321 +combin 0 42 3.218876 0.000000 421 +vita 0 38 3.295837 0.000000 473 +postal 0 30 3.555348 0.000000 580 +accur 0 25 3.737670 0.000000 680 +induct 0 11 4.553877 0.000000 1304 +solver 0 7 5.010635 0.000000 1911 +analyt 0 7 5.010635 0.000000 1913 +machinelearn 0 6 5.164786 0.000000 2084 +groupth 0 5 5.347108 0.000000 2549 +myresearch 0 4 5.568345 0.000000 2842 +theperform 0 3 5.857933 0.000000 3262 +estlin 1 2 6.263398 6.263398 5554 +tara 0 2 6.263398 0.000000 5555 +researchinvolv 0 2 6.263398 0.000000 5556 +acquir 0 2 6.263398 0.000000 5557 +amparticularli 0 2 6.263398 0.000000 5558 +tulan 0 2 6.263398 0.000000 5559 +estlintara 0 1 6.957497 0.000000 13151 +estlinmachin 0 1 6.957497 0.000000 13152 +austinresearchcontrol 0 1 6.957497 0.000000 13153 +byguid 0 1 6.957497 0.000000 13154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..a7d6d485 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +machin 0 129 2.079442 0.000000 95 +find 0 111 2.197225 0.000000 111 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +theori 0 111 2.197225 0.000000 127 +user 0 104 2.302585 0.000000 137 +real 0 93 2.397895 0.000000 144 +issu 0 78 2.564949 0.000000 211 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +suggest 0 53 2.944439 0.000000 331 +directori 0 45 3.135494 0.000000 396 +join 0 39 3.258097 0.000000 457 +chines 0 29 3.583519 0.000000 595 +utc 1 27 3.637586 3.637586 629 +todai 1 25 3.737670 3.737670 672 +watch 0 21 3.912023 0.000000 789 +voic 0 21 3.912023 0.000000 806 +assum 0 19 4.007333 0.000000 845 +stop 0 17 4.110874 0.000000 942 +women 0 16 4.174387 0.000000 1004 +action 0 15 4.248495 0.000000 1038 +trip 0 14 4.317488 0.000000 1113 +central 1 13 4.382027 4.382027 1160 +suit 0 13 4.382027 0.000000 1129 +food 0 12 4.465908 0.000000 1285 +basketbal 0 12 4.465908 0.000000 1289 +market 1 11 4.553877 4.553877 1361 +noth 0 11 4.553877 0.000000 1328 +black 0 10 4.653960 0.000000 1418 +shop 0 10 4.653960 0.000000 1469 +mepost 0 10 4.653960 0.000000 1472 +swim 0 9 4.753590 0.000000 1599 +japan 0 8 4.875197 0.000000 1762 +foreign 0 7 5.010635 0.000000 1919 +troubl 0 6 5.164786 0.000000 2002 +knew 0 5 5.347108 0.000000 2445 +pack 0 3 5.857933 0.000000 3597 +legion 0 3 5.857933 0.000000 3708 +guadalup 0 3 5.857933 0.000000 3255 +francoi 1 2 6.263398 6.263398 4523 +polic 1 2 6.263398 6.263398 5560 +forest 0 2 6.263398 0.000000 5368 +cake 0 2 6.263398 0.000000 5118 +forthcom 0 2 6.263398 0.000000 5392 +crawl 0 2 6.263398 0.000000 5561 +wisdom 0 2 6.263398 0.000000 4430 +barbanson 1 1 6.957497 6.957497 13155 +tank 1 1 6.957497 6.957497 13156 +versionhom 0 1 6.957497 0.000000 13157 +versionthi 0 1 6.957497 0.000000 13158 +spool 0 1 6.957497 0.000000 13159 +francoisabout 0 1 6.957497 0.000000 13160 +mecurr 0 1 6.957497 0.000000 13161 +genuin 0 1 6.957497 0.000000 13162 +pastri 0 1 6.957497 0.000000 13163 +fruit 0 1 6.957497 0.000000 13164 +mouss 0 1 6.957497 0.000000 13165 +groceri 0 1 6.957497 0.000000 13166 +shed 0 1 6.957497 0.000000 13167 +lighton 0 1 6.957497 0.000000 13168 +hyogo 0 1 6.957497 0.000000 13169 +atdominion 0 1 6.957497 0.000000 13170 +hqcheck 0 1 6.957497 0.000000 13171 +dilberti 0 1 6.957497 0.000000 13172 +mentionthat 0 1 6.957497 0.000000 13173 +edufrancoi 0 1 6.957497 0.000000 13174 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..a9845784 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +technolog 1 131 2.079442 2.079442 102 +databas 0 122 2.079442 0.000000 86 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +mathemat 1 108 2.197225 2.197225 123 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +graphic 1 90 2.397895 2.397895 147 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +appli 1 71 2.639057 2.639057 226 +laboratori 0 63 2.772589 0.000000 292 +http 0 41 3.218876 0.000000 420 +autom 0 41 3.218876 0.000000 434 +electr 0 38 3.295837 0.000000 461 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +progress 0 28 3.610918 0.000000 598 +director 0 22 3.850148 0.000000 767 +divis 0 21 3.912023 0.000000 803 +former 0 17 4.110874 0.000000 956 +social 0 13 4.382027 0.000000 1123 +donald 1 9 4.753590 4.753590 1510 +fussel 1 5 5.347108 5.347108 2300 +regent 0 5 5.347108 0.000000 2551 +dalla 0 4 5.568345 0.000000 2930 +crow 0 3 5.857933 0.000000 3845 +trammel 0 2 6.263398 0.000000 5562 +dartmouth 0 2 6.263398 0.000000 5393 +collegem 0 2 6.263398 0.000000 5563 +eduinform 0 1 6.957497 0.000000 13175 +fussellb 0 1 6.957497 0.000000 13176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..c7e599c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +address 0 170 1.791759 0.000000 62 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +file 1 132 1.945910 1.945910 70 +construct 0 139 1.945910 0.000000 82 +document 0 121 2.079442 0.000000 89 +find 0 111 2.197225 0.000000 111 +start 0 83 2.484907 0.000000 173 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +go 0 33 3.433987 0.000000 529 +someth 0 31 3.496508 0.000000 554 +anyth 0 16 4.174387 0.000000 998 +georg 0 16 4.174387 0.000000 994 +lane 0 8 4.875197 0.000000 1720 +pagethi 0 5 5.347108 0.000000 2336 +ajit 1 3 5.857933 5.857933 3299 +eduher 0 3 5.857933 0.000000 3499 +useless 1 2 6.263398 6.263398 5564 +odd 0 2 6.263398 0.000000 5565 +georgemi 0 1 6.957497 0.000000 13177 +youand 0 1 6.957497 0.000000 13178 +goodthat 0 1 6.957497 0.000000 13179 +wickersham 0 1 6.957497 0.000000 13180 +gajit 0 1 6.957497 0.000000 13181 +foundus 0 1 6.957497 0.000000 13182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..4f8afd78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +technolog 0 131 2.079442 0.000000 102 +institut 0 84 2.484907 0.000000 187 +still 0 50 3.044522 0.000000 362 +india 0 32 3.465736 0.000000 550 +art 0 29 3.583519 0.000000 593 +try 0 22 3.850148 0.000000 764 +indian 0 22 3.850148 0.000000 769 +medic 0 17 4.110874 0.000000 958 +undergrad 0 9 4.753590 0.000000 1589 +kanpur 0 8 4.875197 0.000000 1744 +river 0 6 5.164786 0.000000 2220 +arora 0 4 5.568345 0.000000 2658 +mehom 0 4 5.568345 0.000000 2979 +oak 0 2 6.263398 0.000000 5566 +geeta 1 1 6.957497 6.957497 13183 +tofigur 0 1 6.957497 0.000000 13184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..f85f53cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +last 0 314 1.098612 0.000000 14 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +click 1 142 1.945910 1.945910 78 +number 0 130 2.079442 0.000000 97 +send 1 114 2.197225 2.197225 109 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +comment 1 93 2.397895 2.397895 146 +help 0 83 2.484907 0.000000 175 +novemb 0 81 2.484907 0.000000 179 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +receiv 0 66 2.708050 0.000000 244 +suggest 0 53 2.944439 0.000000 331 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +kind 0 32 3.465736 0.000000 541 +art 0 29 3.583519 0.000000 593 +actual 0 28 3.610918 0.000000 604 +quit 0 27 3.637586 0.000000 633 +wish 0 24 3.761200 0.000000 692 +head 0 23 3.806662 0.000000 732 +medic 0 17 4.110874 0.000000 958 +critic 1 16 4.174387 4.174387 982 +easi 0 16 4.174387 0.000000 969 +mayb 0 15 4.248495 0.000000 1014 +decid 0 14 4.317488 0.000000 1075 +earlier 0 13 4.382027 0.000000 1140 +minimum 0 9 4.753590 0.000000 1555 +mass 0 8 4.875197 0.000000 1732 +risk 0 8 4.875197 0.000000 1689 +sleep 0 6 5.164786 0.000000 2211 +merit 0 5 5.347108 0.000000 2466 +gokul 1 4 5.568345 5.568345 2668 +countless 0 4 5.568345 0.000000 3020 +flame 1 3 5.857933 5.857933 3696 +gripe 0 3 5.857933 0.000000 3257 +democrat 0 2 6.263398 0.000000 5567 +plakal 0 2 6.263398 0.000000 5568 +perfectli 0 2 6.263398 0.000000 5569 +theexcess 0 1 6.957497 0.000000 13185 +verbos 0 1 6.957497 0.000000 13186 +thecollect 0 1 6.957497 0.000000 13187 +putonli 0 1 6.957497 0.000000 13188 +barest 0 1 6.957497 0.000000 13189 +adieu 0 1 6.957497 0.000000 13190 +outpour 0 1 6.957497 0.000000 13191 +hag 0 1 6.957497 0.000000 13192 +hopey 0 1 6.957497 0.000000 13193 +untroubl 0 1 6.957497 0.000000 13194 +conscienc 0 1 6.957497 0.000000 13195 +untim 0 1 6.957497 0.000000 13196 +demis 0 1 6.957497 0.000000 13197 +vitriol 0 1 6.957497 0.000000 13198 +reinstat 0 1 6.957497 0.000000 13199 +signin 0 1 6.957497 0.000000 13200 +lesscrit 0 1 6.957497 0.000000 13201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..524eff46 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +like 2 132 1.945910 3.891820 81 +year 0 148 1.945910 0.000000 84 +number 0 130 2.079442 0.000000 97 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +text 0 98 2.302585 0.000000 133 +present 1 91 2.397895 2.397895 145 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +thing 1 84 2.484907 2.484907 189 +start 0 83 2.484907 0.000000 173 +school 0 84 2.484907 0.000000 188 +control 0 82 2.484907 0.000000 164 +know 1 80 2.564949 2.564949 198 +state 0 76 2.564949 0.000000 207 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +order 0 69 2.708050 0.000000 249 +would 0 67 2.708050 0.000000 251 +plai 1 60 2.833213 2.833213 307 +colleg 0 61 2.833213 0.000000 300 +best 0 59 2.833213 0.000000 299 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +finger 0 52 2.995732 0.000000 354 +friend 1 48 3.044522 3.044522 376 +life 1 50 3.044522 3.044522 375 +visitor 0 49 3.044522 0.000000 371 +cool 0 49 3.044522 0.000000 374 +without 0 50 3.044522 0.000000 370 +made 1 44 3.135494 3.135494 398 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +around 1 43 3.178054 3.178054 415 +futur 1 41 3.218876 3.218876 427 +past 0 42 3.218876 0.000000 428 +continu 0 39 3.258097 0.000000 448 +join 0 39 3.258097 0.000000 457 +game 1 36 3.367296 3.367296 498 +winter 0 36 3.367296 0.000000 500 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +board 0 33 3.433987 0.000000 528 +india 0 32 3.465736 0.000000 550 +travel 0 30 3.555348 0.000000 579 +option 0 30 3.555348 0.000000 575 +power 0 30 3.555348 0.000000 573 +art 0 29 3.583519 0.000000 593 +except 0 28 3.610918 0.000000 607 +weather 0 28 3.610918 0.000000 618 +usual 0 28 3.610918 0.000000 608 +propos 0 28 3.610918 0.000000 602 +great 1 27 3.637586 3.637586 626 +american 0 27 3.637586 0.000000 634 +background 0 25 3.737670 0.000000 664 +todai 0 25 3.737670 0.000000 672 +alwai 1 24 3.761200 3.761200 691 +famili 0 23 3.806662 0.000000 735 +love 1 21 3.912023 3.912023 804 +born 0 21 3.912023 0.000000 798 +watch 0 21 3.912023 0.000000 789 +tenni 1 20 3.951244 3.951244 838 +els 0 19 4.007333 0.000000 843 +ever 0 19 4.007333 0.000000 872 +spend 0 19 4.007333 0.000000 850 +listen 0 18 4.060443 0.000000 907 +intro 1 17 4.110874 4.110874 915 +sept 0 17 4.110874 0.000000 952 +bachelor 0 17 4.110874 0.000000 957 +thought 0 17 4.110874 0.000000 945 +medic 0 17 4.110874 0.000000 958 +transfer 0 16 4.174387 0.000000 967 +sign 0 16 4.174387 0.000000 970 +goe 0 15 4.248495 0.000000 1044 +hopefulli 0 14 4.317488 0.000000 1071 +becam 0 14 4.317488 0.000000 1117 +attribut 0 14 4.317488 0.000000 1092 +came 1 13 4.382027 4.382027 1197 +care 0 13 4.382027 0.000000 1177 +sai 0 13 4.382027 0.000000 1175 +guest 0 12 4.465908 0.000000 1220 +song 0 11 4.553877 0.000000 1380 +fill 0 11 4.553877 0.000000 1349 +success 0 10 4.653960 0.000000 1390 +hang 0 9 4.753590 0.000000 1499 +kumar 0 9 4.753590 0.000000 1506 +ball 0 9 4.753590 0.000000 1608 +gold 0 8 4.875197 0.000000 1745 +soccer 0 8 4.875197 0.000000 1752 +footbal 0 7 5.010635 0.000000 1912 +cricket 0 7 5.010635 0.000000 1945 +bore 0 7 5.010635 0.000000 1948 +golden 0 7 5.010635 0.000000 1962 +happen 0 7 5.010635 0.000000 1790 +southern 0 6 5.164786 0.000000 2191 +vari 0 6 5.164786 0.000000 2001 +alphabet 0 6 5.164786 0.000000 1980 +chat 0 6 5.164786 0.000000 2128 +curiou 0 5 5.347108 0.000000 2541 +chemic 0 5 5.347108 0.000000 2552 +valuabl 0 5 5.347108 0.000000 2256 +proud 0 4 5.568345 0.000000 2918 +batch 0 4 5.568345 0.000000 2700 +enrol 0 4 5.568345 0.000000 2613 +abraham 0 4 5.568345 0.000000 2644 +gokul 0 4 5.568345 0.000000 2668 +hide 0 4 5.568345 0.000000 2996 +bold 0 3 5.857933 0.000000 3846 +cold 0 3 5.857933 0.000000 3637 +acad 0 3 5.857933 0.000000 3847 +hindi 0 3 5.857933 0.000000 3753 +narrow 0 3 5.857933 0.000000 3807 +gooti 1 2 6.263398 6.263398 4281 +subramanyam 1 2 6.263398 6.263398 4282 +somebodi 0 2 6.263398 0.000000 4463 +hyderabad 0 2 6.263398 0.000000 5570 +andhra 0 2 6.263398 0.000000 5571 +pradesh 0 2 6.263398 0.000000 5572 +osmania 0 2 6.263398 0.000000 5573 +nebraska 0 2 6.263398 0.000000 5574 +lincoln 0 2 6.263398 0.000000 5575 +addict 0 2 6.263398 0.000000 5576 +neeraj 0 2 6.263398 0.000000 5577 +shailesh 0 2 6.263398 0.000000 5578 +vipin 0 2 6.263398 0.000000 5579 +hideout 0 1 6.957497 0.000000 13202 +wont 0 1 6.957497 0.000000 13203 +disappoint 0 1 6.957497 0.000000 13204 +geographi 0 1 6.957497 0.000000 13205 +gala 0 1 6.957497 0.000000 13206 +wasjust 0 1 6.957497 0.000000 13207 +mehul 0 1 6.957497 0.000000 13208 +shantanu 0 1 6.957497 0.000000 13209 +likechess 0 1 6.957497 0.000000 13210 +carrom 0 1 6.957497 0.000000 13211 +racquet 0 1 6.957497 0.000000 13212 +definetli 0 1 6.957497 0.000000 13213 +horoscop 0 1 6.957497 0.000000 13214 +compatabil 0 1 6.957497 0.000000 13215 +destini 0 1 6.957497 0.000000 13216 +hardwork 0 1 6.957497 0.000000 13217 +dispos 0 1 6.957497 0.000000 13218 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..8ec37616 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +utexa 1 189 1.609438 1.609438 44 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +austin 0 168 1.791759 0.000000 63 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +first 0 140 1.945910 0.000000 71 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +tool 0 117 2.079442 0.000000 93 +provid 0 121 2.079442 0.000000 94 +specif 1 106 2.197225 2.197225 106 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +octob 1 89 2.397895 2.397895 156 +associ 0 93 2.397895 0.000000 151 +decemb 1 80 2.564949 2.564949 215 +june 0 79 2.564949 0.000000 214 +sourc 0 77 2.564949 0.000000 201 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +copi 1 63 2.772589 2.772589 284 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +date 1 51 2.995732 2.995732 344 +without 0 50 3.044522 0.000000 370 +protocol 1 45 3.135494 3.135494 407 +made 0 44 3.135494 0.000000 398 +term 0 43 3.178054 0.000000 411 +http 1 41 3.218876 3.218876 420 +must 0 40 3.258097 0.000000 442 +credit 0 38 3.295837 0.000000 460 +formal 0 37 3.332205 0.000000 478 +copyright 1 36 3.367296 3.367296 495 +survei 1 35 3.401197 3.401197 513 +word 0 34 3.401197 0.000000 508 +post 0 35 3.401197 0.000000 505 +posit 0 31 3.496508 0.000000 552 +profil 0 30 3.555348 0.000000 581 +hard 0 30 3.555348 0.000000 563 +full 0 28 3.610918 0.000000 615 +utc 0 27 3.637586 0.000000 629 +revis 0 26 3.688879 0.000000 640 +request 0 26 3.688879 0.000000 635 +notic 0 25 3.737670 0.000000 675 +other 0 24 3.761200 0.000000 697 +honor 0 23 3.806662 0.000000 729 +accept 0 18 4.060443 0.000000 879 +otherwis 0 17 4.110874 0.000000 922 +commerci 0 16 4.174387 0.000000 1005 +advantag 0 16 4.174387 0.000000 987 +permit 0 16 4.174387 0.000000 962 +evolv 0 12 4.465908 0.000000 1223 +grant 0 12 4.465908 0.000000 1216 +statement 1 11 4.553877 4.553877 1313 +submiss 0 11 4.553877 0.000000 1298 +exact 1 9 4.753590 4.753590 1509 +phrase 0 5 5.347108 0.000000 2242 +own 0 5 5.347108 0.000000 2531 +permiss 1 4 5.568345 5.568345 2642 +gouda 1 4 5.568345 5.568345 3021 +machineri 0 4 5.568345 0.000000 2851 +citat 1 3 5.857933 5.857933 3617 +sciencesaustin 0 3 5.857933 0.000000 3828 +argu 0 3 5.857933 0.000000 3698 +networkprotocol 0 3 5.857933 0.000000 3285 +moham 0 3 5.857933 0.000000 3848 +fornetwork 0 2 6.263398 0.000000 5580 +ordistribut 0 2 6.263398 0.000000 5581 +redistribut 0 2 6.263398 0.000000 5582 +pragmat 1 1 6.957497 6.957497 13219 +goudanetwork 1 1 6.957497 6.957497 13220 +goudaacm 0 1 6.957497 0.000000 13221 +surveysa 0 1 6.957497 0.000000 13222 +specificationsand 0 1 6.957497 0.000000 13223 +implementationsmoham 0 1 6.957497 0.000000 13224 +goudath 0 1 6.957497 0.000000 13225 +usagouda 0 1 6.957497 0.000000 13226 +htmlabstract 0 1 6.957497 0.000000 13227 +bridgeth 0 1 6.957497 0.000000 13228 +implementationsaddit 0 1 6.957497 0.000000 13229 +methodologypubl 0 1 6.957497 0.000000 13230 +digitalor 0 1 6.957497 0.000000 13231 +classroomus 0 1 6.957497 0.000000 13232 +profit 0 1 6.957497 0.000000 13233 +bearthi 0 1 6.957497 0.000000 13234 +forcompon 0 1 6.957497 0.000000 13235 +torepublish 0 1 6.957497 0.000000 13236 +requiresprior 0 1 6.957497 0.000000 13237 +frompubl 0 1 6.957497 0.000000 13238 +orpermiss 0 1 6.957497 0.000000 13239 +goudagouda 0 1 6.957497 0.000000 13240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..fde25c37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +class 0 199 1.609438 0.000000 37 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +data 0 170 1.791759 0.000000 49 +assign 1 135 1.945910 1.945910 66 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +report 1 131 2.079442 2.079442 92 +look 1 107 2.197225 2.197225 115 +code 0 108 2.197225 0.000000 116 +check 0 115 2.197225 0.000000 118 +take 0 97 2.302585 0.000000 134 +imag 0 91 2.397895 0.000000 161 +meet 0 72 2.639057 0.000000 229 +would 0 67 2.708050 0.000000 251 +test 0 66 2.708050 0.000000 252 +plan 1 65 2.772589 2.772589 272 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +talk 0 53 2.944439 0.000000 336 +case 1 51 2.995732 2.995732 351 +much 0 52 2.995732 0.000000 349 +friend 0 48 3.044522 0.000000 376 +visitor 0 49 3.044522 0.000000 371 +long 0 43 3.178054 0.000000 413 +author 0 39 3.258097 0.000000 450 +connect 0 37 3.332205 0.000000 485 +john 1 33 3.433987 3.433987 532 +product 0 33 3.433987 0.000000 527 +except 0 28 3.610918 0.000000 607 +minut 0 20 3.951244 0.000000 810 +log 0 19 4.007333 0.000000 857 +less 0 18 4.060443 0.000000 892 +matrix 0 17 4.110874 0.000000 933 +spars 0 16 4.174387 0.000000 989 +doesn 0 15 4.248495 0.000000 1055 +central 0 13 4.382027 0.000000 1160 +land 0 12 4.465908 0.000000 1273 +rememb 0 12 4.465908 0.000000 1217 +guess 0 10 4.653960 0.000000 1443 +latter 0 9 4.753590 0.000000 1522 +bore 0 7 5.010635 0.000000 1948 +oregon 0 5 5.347108 0.000000 2437 +glimps 0 4 5.568345 0.000000 2778 +plapack 0 3 5.857933 0.000000 3849 +redmond 0 3 5.857933 0.000000 3568 +mysteri 0 2 6.263398 0.000000 4715 +hail 0 2 6.263398 0.000000 5583 +gunnel 1 1 6.957497 6.957497 13241 +transpos 1 1 6.957497 6.957497 13242 +drank 0 1 6.957497 0.000000 13243 +depict 0 1 6.957497 0.000000 13244 +computationsif 0 1 6.957497 0.000000 13245 +pageam 0 1 6.957497 0.000000 13246 +towni 0 1 6.957497 0.000000 13247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..1cae1a23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +graduat 0 215 1.386294 0.000000 31 +utexa 1 189 1.609438 1.609438 44 +oper 0 180 1.609438 0.000000 34 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +year 0 148 1.945910 0.000000 84 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +undergradu 0 54 2.944439 0.000000 338 +curriculum 0 33 3.433987 0.000000 535 +mellon 0 13 4.382027 0.000000 1179 +frank 1 9 4.753590 4.753590 1568 +junior 0 5 5.347108 0.000000 2519 +tropschuhfrank 0 1 6.957497 0.000000 13248 +tropschuh 0 1 6.957497 0.000000 13249 +gunther 0 1 6.957497 0.000000 13250 +schweiz 0 1 6.957497 0.000000 13251 +clayton 0 1 6.957497 0.000000 13252 +waldhofstrass 0 1 6.957497 0.000000 13253 +rheinfelden 0 1 6.957497 0.000000 13254 +vitaeenglishdeutschlinkscarnegi 0 1 6.957497 0.000000 13255 +universitterlangen 0 1 6.957497 0.000000 13256 +nrnberg 0 1 6.957497 0.000000 13257 +abroad 0 1 6.957497 0.000000 13258 +mathematisch 0 1 6.957497 0.000000 13259 +maschinen 0 1 6.957497 0.000000 13260 +datenverarbeitung 0 1 6.957497 0.000000 13261 +tropschuhgunth 0 1 6.957497 0.000000 13262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..72ffd61b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +place 0 106 2.197225 0.000000 124 +master 0 76 2.564949 0.000000 216 +name 0 72 2.639057 0.000000 220 +servic 0 72 2.639057 0.000000 236 +degre 0 69 2.708050 0.000000 259 +juli 0 60 2.833213 0.000000 305 +tabl 0 51 2.995732 0.000000 346 +directori 0 45 3.135494 0.000000 396 +china 0 37 3.332205 0.000000 487 +mine 0 26 3.688879 0.000000 654 +background 0 25 3.737670 0.000000 664 +hobbi 0 16 4.174387 0.000000 1009 +grant 0 12 4.465908 0.000000 1216 +birthdai 0 4 5.568345 0.000000 2800 +birth 0 3 5.857933 0.000000 3594 +addresspictur 0 2 6.263398 0.000000 5584 +ceremoni 0 2 6.263398 0.000000 5585 +jiangsu 0 2 6.263398 0.000000 5586 +yongxiang 1 1 6.957497 6.957497 13263 +pagemerri 0 1 6.957497 0.000000 13264 +christmashappi 0 1 6.957497 0.000000 13265 +homepagegao 0 1 6.957497 0.000000 13266 +yongxiangsever 0 1 6.957497 0.000000 13267 +pointsto 0 1 6.957497 0.000000 13268 +chinadepart 0 1 6.957497 0.000000 13269 +male 0 1 6.957497 0.000000 13270 +huanan 0 1 6.957497 0.000000 13271 +tenniseduc 0 1 6.957497 0.000000 13272 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..147f0f54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +distribut 0 162 1.791759 0.000000 51 +databas 0 122 2.079442 0.000000 86 +manag 1 114 2.197225 2.197225 125 +second 0 81 2.484907 0.000000 166 +term 0 43 3.178054 0.000000 411 +queri 0 33 3.433987 0.000000 524 +stop 0 17 4.110874 0.000000 942 +zhang 1 16 4.174387 4.174387 980 +gzhang 0 2 6.263398 0.000000 4183 +schoolth 0 1 6.957497 0.000000 13273 +semestercoursesc 0 1 6.957497 0.000000 13274 +linc 0 1 6.957497 0.000000 13275 +alvis 0 1 6.957497 0.000000 13276 +mirankerfil 0 1 6.957497 0.000000 13277 +databs 0 1 6.957497 0.000000 13278 +formthank 0 1 6.957497 0.000000 13279 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..80523ff1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +design 1 213 1.386294 1.386294 25 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +applic 0 170 1.791759 0.000000 56 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +find 0 111 2.197225 0.000000 111 +mathemat 0 108 2.197225 0.000000 123 +techniqu 0 99 2.302585 0.000000 138 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +academ 0 82 2.484907 0.000000 178 +internet 0 83 2.484907 0.000000 186 +optim 1 79 2.564949 2.564949 197 +refer 1 78 2.564949 2.564949 203 +new 0 64 2.772589 0.000000 262 +complex 0 64 2.772589 0.000000 269 +prof 0 64 2.772589 0.000000 273 +visit 0 63 2.772589 0.000000 288 +room 1 59 2.833213 2.833213 301 +undergradu 0 54 2.944439 0.000000 338 +even 0 45 3.135494 0.000000 393 +directori 0 45 3.135494 0.000000 396 +theoret 0 39 3.258097 0.000000 446 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +industri 0 38 3.295837 0.000000 464 +china 0 37 3.332205 0.000000 487 +sciencesunivers 0 37 3.332205 0.000000 486 +staff 0 36 3.367296 0.000000 490 +award 0 34 3.401197 0.000000 523 +given 0 32 3.465736 0.000000 538 +focus 0 29 3.583519 0.000000 584 +chines 0 29 3.583519 0.000000 595 +campu 1 27 3.637586 3.637586 623 +constraint 0 26 3.688879 0.000000 636 +head 0 23 3.806662 0.000000 732 +size 0 23 3.806662 0.000000 713 +period 0 22 3.850148 0.000000 743 +vlsi 1 21 3.912023 3.912023 795 +martin 0 21 3.912023 0.000000 794 +rout 0 21 3.912023 0.000000 793 +chen 0 21 3.912023 0.000000 791 +voic 0 21 3.912023 0.000000 806 +aid 1 18 4.060443 4.060443 904 +edulast 0 17 4.110874 0.000000 927 +jose 1 16 4.174387 4.174387 976 +tsinghua 0 13 4.382027 0.000000 1195 +israel 0 11 4.553877 0.000000 1366 +wong 1 9 4.753590 4.753590 1609 +classmat 0 9 4.753590 0.000000 1516 +combinatori 0 8 4.875197 0.000000 1629 +wire 0 8 4.875197 0.000000 1747 +uniform 0 7 5.010635 0.000000 1845 +delai 0 7 5.010635 0.000000 1848 +zhou 1 6 5.164786 5.164786 2092 +ture 0 6 5.164786 0.000000 1997 +alex 0 6 5.164786 0.000000 2130 +internationalconfer 0 6 5.164786 0.000000 2051 +bulletin 0 5 5.347108 0.000000 2343 +weizmann 0 4 5.568345 0.000000 2858 +kept 0 4 5.568345 0.000000 2762 +zhao 0 4 5.568345 0.000000 2699 +headlin 0 3 5.857933 0.000000 3710 +amir 0 3 5.857933 0.000000 3850 +mathematicallog 0 3 5.857933 0.000000 3796 +pagealan 0 2 6.263398 0.000000 5587 +compuer 0 2 6.263398 0.000000 4692 +researchgroup 0 2 6.263398 0.000000 5588 +pnueli 0 1 6.957497 0.000000 13280 +aprofessor 0 1 6.957497 0.000000 13281 +prestig 0 1 6.957497 0.000000 13282 +incompletelist 0 1 6.957497 0.000000 13283 +publicationshai 0 1 6.957497 0.000000 13284 +forriv 0 1 6.957497 0.000000 13285 +crosstalk 0 1 6.957497 0.000000 13286 +optimalnon 0 1 6.957497 0.000000 13287 +elmor 0 1 6.957497 0.000000 13288 +acmintern 0 1 6.957497 0.000000 13289 +austintaylor 0 1 6.957497 0.000000 13290 +staustin 0 1 6.957497 0.000000 13291 +haizhou 0 1 6.957497 0.000000 13292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..1c771bc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +welcom 0 122 2.079442 0.000000 99 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +nation 0 74 2.639057 0.000000 240 +dept 1 64 2.772589 2.772589 291 +visitor 0 49 3.044522 0.000000 371 +china 1 37 3.332205 3.332205 487 +univ 1 28 3.610918 3.610918 617 +alumni 0 21 3.912023 0.000000 807 +sept 0 17 4.110874 0.000000 952 +universityof 0 15 4.248495 0.000000 1061 +wait 0 13 4.382027 0.000000 1168 +departmentof 0 9 4.753590 0.000000 1539 +hear 0 7 5.010635 0.000000 1940 +wuhan 1 2 6.263398 6.263398 5589 +pal 0 2 6.263398 0.000000 4964 +myselfnow 0 1 6.957497 0.000000 13293 +pre 0 1 6.957497 0.000000 13294 +alumnihom 0 1 6.957497 0.000000 13295 +pagecontact 0 1 6.957497 0.000000 13296 +haosun 0 1 6.957497 0.000000 13297 +edunow 0 1 6.957497 0.000000 13298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..ad87d269 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +first 1 140 1.945910 1.945910 71 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +intern 0 108 2.197225 0.000000 128 +call 0 91 2.397895 0.000000 153 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +nation 0 74 2.639057 0.000000 240 +view 0 70 2.708050 0.000000 254 +visit 0 63 2.772589 0.000000 288 +taylor 0 63 2.772589 0.000000 287 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +might 0 41 3.218876 0.000000 426 +author 0 39 3.258097 0.000000 450 +open 1 38 3.295837 3.295837 469 +electr 0 38 3.295837 0.000000 461 +utc 0 27 3.637586 0.000000 629 +honor 1 23 3.806662 3.806662 729 +finish 1 22 3.850148 3.850148 748 +divis 1 21 3.912023 3.912023 803 +grad 1 20 3.951244 3.951244 837 +minut 0 20 3.951244 0.000000 810 +thoma 0 18 4.060443 0.000000 901 +edulast 0 17 4.110874 0.000000 927 +fourth 0 16 4.174387 0.000000 999 +universityof 1 15 4.248495 4.248495 1061 +club 0 15 4.248495 0.000000 1058 +central 0 13 4.382027 0.000000 1160 +volleybal 1 9 4.753590 4.753590 1598 +departmentof 0 9 4.753590 0.000000 1539 +motorola 0 9 4.753590 0.000000 1546 +competit 0 8 4.875197 0.000000 1635 +contest 0 5 5.347108 0.000000 2273 +ioanni 0 5 5.347108 0.000000 2553 +champion 2 4 5.568345 11.136690 2982 +lanc 0 4 5.568345 0.000000 3022 +educlick 0 3 5.857933 0.000000 3612 +sawada 0 3 5.857933 0.000000 3190 +smaragdaki 0 3 5.857933 0.000000 3851 +tower 0 3 5.857933 0.000000 3818 +myfavorit 0 3 5.857933 0.000000 3852 +intramur 1 2 6.263398 6.263398 5590 +kansa 1 2 6.263398 6.263398 5591 +bowl 1 2 6.263398 6.263398 5417 +marathon 0 2 6.263398 0.000000 5592 +micheal 1 1 6.957497 6.957497 13299 +hewett 1 1 6.957497 6.957497 13300 +hewetthewett 0 1 6.957497 0.000000 13301 +fingerm 0 1 6.957497 0.000000 13302 +stanfordunivers 0 1 6.957497 0.000000 13303 +washburnunivers 0 1 6.957497 0.000000 13304 +collegiateprogram 0 1 6.957497 0.000000 13305 +wahlutc 0 1 6.957497 0.000000 13306 +hanoi 0 1 6.957497 0.000000 13307 +tokudaut 0 1 6.957497 0.000000 13308 +locatem 0 1 6.957497 0.000000 13309 +learnabout 0 1 6.957497 0.000000 13310 +downloadmi 0 1 6.957497 0.000000 13311 +learnmor 0 1 6.957497 0.000000 13312 +timefax 0 1 6.957497 0.000000 13313 +hewettemail 0 1 6.957497 0.000000 13314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..2b4c8b1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +research 0 431 0.693147 0.000000 10 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +like 1 132 1.945910 1.945910 81 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +databas 1 122 2.079442 2.079442 86 +compil 0 122 2.079442 0.000000 96 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +graphic 1 90 2.397895 2.397895 147 +center 0 88 2.397895 0.000000 158 +real 0 93 2.397895 0.000000 144 +search 0 95 2.397895 0.000000 155 +activ 1 84 2.484907 2.484907 182 +solut 1 82 2.484907 2.484907 162 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +educ 0 86 2.484907 0.000000 191 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +novemb 0 81 2.484907 0.000000 179 +resum 1 79 2.564949 2.564949 217 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +interfac 0 79 2.564949 0.000000 209 +onlin 0 75 2.639057 0.000000 223 +write 0 72 2.639057 0.000000 222 +java 1 70 2.708050 2.708050 248 +receiv 1 66 2.708050 2.708050 244 +simul 0 66 2.708050 0.000000 255 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +function 1 62 2.772589 2.772589 275 +written 1 63 2.772589 2.772589 278 +virtual 0 62 2.772589 0.000000 285 +best 0 59 2.833213 0.000000 299 +detail 0 57 2.890372 0.000000 321 +explor 0 58 2.890372 0.000000 324 +allow 0 53 2.944439 0.000000 333 +processor 0 54 2.944439 0.000000 335 +life 0 50 3.044522 0.000000 375 +video 0 44 3.135494 0.000000 405 +natur 0 44 3.135494 0.000000 406 +http 1 41 3.218876 3.218876 420 +might 0 41 3.218876 0.000000 426 +fast 0 42 3.218876 0.000000 429 +live 0 40 3.258097 0.000000 451 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +robot 1 36 3.367296 3.367296 497 +game 1 36 3.367296 3.367296 498 +especi 0 36 3.367296 0.000000 496 +product 0 33 3.433987 0.000000 527 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +anim 0 31 3.496508 0.000000 557 +common 1 30 3.555348 3.555348 574 +rang 0 30 3.555348 0.000000 565 +platform 0 29 3.583519 0.000000 591 +packag 1 28 3.610918 3.610918 614 +american 0 27 3.637586 0.000000 634 +client 0 25 3.737670 0.000000 679 +flow 0 24 3.761200 0.000000 700 +methodolog 0 23 3.806662 0.000000 733 +born 0 21 3.912023 0.000000 798 +unit 0 21 3.912023 0.000000 779 +busi 0 21 3.912023 0.000000 784 +theunivers 0 21 3.912023 0.000000 797 +five 0 19 4.007333 0.000000 841 +lisp 1 18 4.060443 4.060443 897 +seek 0 17 4.110874 0.000000 954 +edulast 0 17 4.110874 0.000000 927 +easi 0 16 4.174387 0.000000 969 +reflect 0 15 4.248495 0.000000 1034 +came 0 13 4.382027 0.000000 1197 +usavoic 0 13 4.382027 0.000000 1198 +assembl 0 12 4.465908 0.000000 1207 +realiti 0 12 4.465908 0.000000 1272 +resid 0 10 4.653960 0.000000 1461 +placement 0 10 4.653960 0.000000 1420 +mepost 0 10 4.653960 0.000000 1472 +novak 0 9 4.753590 0.000000 1521 +poetri 0 9 4.753590 0.000000 1596 +port 1 8 4.875197 4.875197 1766 +cross 0 8 4.875197 0.000000 1703 +realist 0 8 4.875197 0.000000 1665 +textur 0 8 4.875197 0.000000 1677 +spot 0 7 5.010635 0.000000 1894 +contract 1 6 5.164786 5.164786 1985 +gordon 0 6 5.164786 0.000000 2032 +ousterhout 0 5 5.347108 0.000000 2301 +outdoor 0 5 5.347108 0.000000 2514 +havedevelop 0 4 5.568345 0.000000 2681 +vrml 0 4 5.568345 0.000000 2949 +legion 0 3 5.857933 0.000000 3708 +flat 0 3 5.857933 0.000000 3853 +moredetail 0 3 5.857933 0.000000 3854 +expertis 0 3 5.857933 0.000000 3321 +leverag 0 3 5.857933 0.000000 3153 +vietnames 0 2 6.263398 0.000000 5593 +occup 0 2 6.263398 0.000000 5169 +akcl 0 2 6.263398 0.000000 4796 +standalon 0 2 6.263398 0.000000 4077 +researchwith 0 2 6.263398 0.000000 5594 +sdsc 0 2 6.263398 0.000000 5199 +hiep 1 1 6.957497 6.957497 13315 +xwindow 1 1 6.957497 6.957497 13316 +gunu 1 1 6.957497 6.957497 13317 +netrek 1 1 6.957497 6.957497 13318 +factoryx 1 1 6.957497 6.957497 13319 +nguyenhiep 0 1 6.957497 0.000000 13320 +nguyenabout 0 1 6.957497 0.000000 13321 +meabout 0 1 6.957497 0.000000 13322 +texasfor 0 1 6.957497 0.000000 13323 +providinghigh 0 1 6.957497 0.000000 13324 +hypertextresum 0 1 6.957497 0.000000 13325 +con 0 1 6.957497 0.000000 13326 +nsplace 0 1 6.957497 0.000000 13327 +rexi 0 1 6.957497 0.000000 13328 +emptiv 0 1 6.957497 0.000000 13329 +gdraw 0 1 6.957497 0.000000 13330 +specular 0 1 6.957497 0.000000 13331 +sonar 0 1 6.957497 0.000000 13332 +xgcl 0 1 6.957497 0.000000 13333 +xakcl 0 1 6.957497 0.000000 13334 +anonlin 0 1 6.957497 0.000000 13335 +andmaintain 0 1 6.957497 0.000000 13336 +currentlyact 0 1 6.957497 0.000000 13337 +internetsoftwar 0 1 6.957497 0.000000 13338 +mappingroutin 0 1 6.957497 0.000000 13339 +potteri 0 1 6.957497 0.000000 13340 +vrmlto 0 1 6.957497 0.000000 13341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..c673868b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +tool 0 117 2.079442 0.000000 93 +welcom 0 122 2.079442 0.000000 99 +world 1 115 2.197225 2.197225 126 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +book 1 99 2.302585 2.302585 131 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +internet 1 83 2.484907 2.484907 186 +member 0 84 2.484907 0.000000 165 +stuff 0 87 2.484907 0.000000 171 +ieee 0 86 2.484907 0.000000 190 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +java 1 70 2.708050 2.708050 248 +guid 0 63 2.772589 0.000000 267 +new 0 64 2.772589 0.000000 262 +virtual 0 62 2.772589 0.000000 285 +collect 0 65 2.772589 0.000000 268 +taylor 0 63 2.772589 0.000000 287 +unix 0 58 2.890372 0.000000 308 +directori 0 45 3.135494 0.000000 396 +societi 0 40 3.258097 0.000000 456 +expect 0 37 3.332205 0.000000 484 +manual 0 35 3.401197 0.000000 504 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +bookmark 0 26 3.688879 0.000000 639 +yahoo 0 24 3.761200 0.000000 707 +vlsi 0 21 3.912023 0.000000 795 +martin 0 21 3.912023 0.000000 794 +nice 0 20 3.951244 0.000000 809 +citi 0 19 4.007333 0.000000 874 +career 1 12 4.465908 4.465908 1287 +entertain 0 12 4.465908 0.000000 1286 +tour 0 11 4.553877 0.000000 1307 +perl 0 11 4.553877 0.000000 1332 +mosaic 0 10 4.653960 0.000000 1426 +wong 0 9 4.753590 0.000000 1609 +infoseek 0 6 5.164786 0.000000 2188 +giant 0 3 5.857933 0.000000 3137 +huiqun 1 2 6.263398 6.263398 4200 +rosett 1 2 6.263398 6.263398 5595 +hqliu 1 2 6.263398 6.263398 4199 +sunris 0 2 6.263398 0.000000 5212 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..2be23f0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 2 168 1.791759 3.583518 63 +avail 1 169 1.791759 1.791759 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +postscript 0 131 2.079442 0.000000 90 +librari 0 87 2.484907 0.000000 181 +onlin 1 75 2.639057 2.639057 223 +knowledg 0 67 2.708050 0.000000 243 +main 0 67 2.708050 0.000000 256 +colleg 0 61 2.833213 0.000000 300 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +advisor 0 51 2.995732 0.000000 355 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +represent 0 35 3.401197 0.000000 512 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +rule 0 26 3.688879 0.000000 638 +action 0 15 4.248495 0.000000 1038 +english 0 15 4.248495 0.000000 1033 +draft 0 14 4.317488 0.000000 1085 +philosophi 0 13 4.382027 0.000000 1167 +usavoic 0 13 4.382027 0.000000 1198 +vladimir 0 11 4.553877 0.000000 1324 +mepost 0 10 4.653960 0.000000 1472 +tempor 0 9 4.753590 0.000000 1584 +colloquium 0 8 4.875197 0.000000 1734 +sciencesat 0 7 5.010635 0.000000 1968 +infer 0 6 5.164786 0.000000 2040 +causal 0 6 5.164786 0.000000 2024 +lifschitz 0 5 5.347108 0.000000 2542 +nonmonoton 0 4 5.568345 0.000000 3023 +interestscommonsens 0 2 6.263398 0.000000 5596 +actionlog 0 2 6.263398 0.000000 5597 +reasoningmi 0 2 6.263398 0.000000 5598 +hudson 1 1 6.957497 6.957497 13342 +pagehudson 0 1 6.957497 0.000000 13343 +turnerphd 0 1 6.957497 0.000000 13344 +ofcommonsens 0 1 6.957497 0.000000 13345 +msc 0 1 6.957497 0.000000 13346 +mli 0 1 6.957497 0.000000 13347 +linkseuropean 0 1 6.957497 0.000000 13348 +spatialand 0 1 6.957497 0.000000 13349 +reasoningto 0 1 6.957497 0.000000 13350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..d38280de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +postscript 0 131 2.079442 0.000000 90 +number 0 130 2.079442 0.000000 97 +world 1 115 2.197225 2.197225 126 +send 0 114 2.197225 0.000000 109 +part 0 98 2.302585 0.000000 129 +center 0 88 2.397895 0.000000 158 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +internet 0 83 2.484907 0.000000 186 +librari 0 87 2.484907 0.000000 181 +resum 0 79 2.564949 0.000000 217 +onlin 0 75 2.639057 0.000000 223 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +format 0 48 3.044522 0.000000 356 +compani 1 41 3.218876 3.218876 423 +littl 0 39 3.258097 0.000000 454 +travel 0 30 3.555348 0.000000 579 +full 1 28 3.610918 3.610918 615 +campu 0 27 3.637586 0.000000 623 +left 0 19 4.007333 0.000000 851 +beauti 0 18 4.060443 0.000000 912 +seek 0 17 4.110874 0.000000 954 +zhang 1 16 4.174387 4.174387 980 +career 0 12 4.465908 0.000000 1287 +lake 0 11 4.553877 0.000000 1373 +allen 0 5 5.347108 0.000000 2470 +blvd 0 4 5.568345 0.000000 3007 +yanbin 1 2 6.263398 6.263398 5599 +hyanbin 1 1 6.957497 6.957497 13351 +cutti 0 1 6.957497 0.000000 13352 +webmuseum 0 1 6.957497 0.000000 13353 +homeland 0 1 6.957497 0.000000 13354 +tarlor 0 1 6.957497 0.000000 13355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..c5f7c9fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +utexa 1 189 1.609438 1.609438 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +construct 0 139 1.945910 0.000000 82 +tool 0 117 2.079442 0.000000 93 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +graphic 0 90 2.397895 0.000000 147 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +degre 0 69 2.708050 0.000000 259 +creat 0 63 2.772589 0.000000 277 +summer 0 56 2.890372 0.000000 311 +undergradu 0 54 2.944439 0.000000 338 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +http 0 41 3.218876 0.000000 420 +compani 0 41 3.218876 0.000000 423 +small 0 39 3.258097 0.000000 447 +framework 0 28 3.610918 0.000000 606 +trace 0 25 3.737670 0.000000 677 +geometri 0 22 3.850148 0.000000 752 +concentr 0 18 4.060443 0.000000 906 +modular 0 10 4.653960 0.000000 1392 +univeristi 0 8 4.875197 0.000000 1754 +mass 0 8 4.875197 0.000000 1732 +solid 0 5 5.347108 0.000000 2255 +babi 0 5 5.347108 0.000000 2493 +isaac 1 3 5.857933 5.857933 3855 +coursework 0 3 5.857933 0.000000 3588 +sheldon 1 2 6.263398 6.263398 5226 +reciev 0 2 6.263398 0.000000 5600 +lowel 0 2 6.263398 0.000000 5224 +isheldon 1 1 6.957497 6.957497 13356 +reciv 0 1 6.957497 0.000000 13357 +unives 0 1 6.957497 0.000000 13358 +schlaeor 0 1 6.957497 0.000000 13359 +mellor 0 1 6.957497 0.000000 13360 +bsptree 0 1 6.957497 0.000000 13361 +butt 0 1 6.957497 0.000000 13362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..8234957c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +book 0 99 2.302585 0.000000 131 +homepag 0 93 2.397895 0.000000 148 +internet 0 83 2.484907 0.000000 186 +taylor 1 63 2.772589 2.772589 287 +back 1 60 2.833213 2.833213 297 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +live 1 40 3.258097 3.258097 451 +jame 0 35 3.401197 0.000000 507 +john 1 33 3.433987 3.433987 532 +steve 0 29 3.583519 0.000000 594 +consult 0 24 3.761200 0.000000 687 +doctor 0 24 3.761200 0.000000 709 +alumni 0 21 3.912023 0.000000 807 +white 0 17 4.110874 0.000000 951 +carl 0 15 4.248495 0.000000 1024 +wife 0 13 4.382027 0.000000 1196 +rice 0 11 4.553877 0.000000 1336 +evan 0 8 4.875197 0.000000 1633 +matthew 0 6 5.164786 0.000000 2193 +fish 0 6 5.164786 0.000000 2207 +holli 0 2 6.263398 0.000000 5601 +dejanew 0 2 6.263398 0.000000 5602 +adair 1 1 6.957497 6.957497 13363 +crinkum 0 1 6.957497 0.000000 13364 +crankum 0 1 6.957497 0.000000 13365 +compound 0 1 6.957497 0.000000 13366 +eileen 0 1 6.957497 0.000000 13367 +mengerink 0 1 6.957497 0.000000 13368 +fanat 0 1 6.957497 0.000000 13369 +traylen 0 1 6.957497 0.000000 13370 +jadair 0 1 6.957497 0.000000 13371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..f4a6957b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +oper 1 180 1.609438 1.609438 34 +paper 0 205 1.609438 0.000000 38 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +physic 0 47 3.091042 0.000000 377 +vita 0 38 3.295837 0.000000 473 +john 0 33 3.433987 0.000000 532 +chamber 0 8 4.875197 0.000000 1692 +yale 0 6 5.164786 0.000000 2003 +pagejohn 0 2 6.263398 0.000000 5603 +universityph 0 2 6.263398 0.000000 5604 +chamberssenior 0 1 6.957497 0.000000 13372 +specialistb 0 1 6.957497 0.000000 13373 +paso 0 1 6.957497 0.000000 13374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..c5810cd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,165 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +austin 2 168 1.791759 3.583518 63 +texa 2 160 1.791759 3.583518 64 +avail 1 169 1.791759 1.791759 48 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +postscript 1 131 2.079442 2.079442 90 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +make 1 111 2.197225 2.197225 120 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +level 0 87 2.484907 0.000000 180 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +complet 0 77 2.564949 0.000000 208 +orient 0 80 2.564949 0.000000 205 +resum 0 79 2.564949 0.000000 217 +line 0 75 2.639057 0.000000 231 +appli 0 71 2.639057 0.000000 226 +simul 1 66 2.708050 2.708050 255 +goal 0 66 2.708050 0.000000 250 +test 0 66 2.708050 0.000000 252 +receiv 0 66 2.708050 0.000000 244 +differ 0 66 2.708050 0.000000 253 +organ 1 65 2.772589 2.772589 265 +result 1 65 2.772589 2.772589 281 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +dept 0 64 2.772589 0.000000 291 +artifici 0 63 2.772589 0.000000 280 +abstract 0 62 2.772589 0.000000 276 +juli 0 60 2.833213 0.000000 305 +thesi 1 57 2.890372 2.890372 327 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +extens 0 53 2.944439 0.000000 340 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +finger 0 52 2.995732 0.000000 354 +visual 2 48 3.044522 6.089044 372 +principl 0 48 3.044522 0.000000 357 +format 0 48 3.044522 0.000000 356 +understand 0 47 3.091042 0.000000 384 +effect 0 46 3.091042 0.000000 385 +long 0 43 3.178054 0.000000 413 +offer 0 43 3.178054 0.000000 414 +past 0 42 3.218876 0.000000 428 +vision 0 41 3.218876 0.000000 430 +might 0 41 3.218876 0.000000 426 +examin 0 42 3.218876 0.000000 424 +probabl 0 40 3.258097 0.000000 455 +electr 0 38 3.295837 0.000000 461 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +human 1 32 3.465736 3.465736 546 +neural 0 30 3.555348 0.000000 578 +power 0 30 3.555348 0.000000 573 +domain 0 30 3.555348 0.000000 564 +propos 1 28 3.610918 3.610918 602 +measur 0 28 3.610918 0.000000 609 +mind 0 27 3.637586 0.000000 632 +enabl 0 26 3.688879 0.000000 655 +rather 0 26 3.688879 0.000000 642 +primari 1 25 3.737670 3.737670 669 +demonstr 0 24 3.761200 0.000000 694 +doctor 0 24 3.761200 0.000000 709 +input 0 23 3.806662 0.000000 727 +begin 0 23 3.806662 0.000000 716 +self 1 22 3.850148 3.850148 761 +serv 0 22 3.850148 0.000000 758 +thu 0 21 3.912023 0.000000 773 +similar 0 21 3.912023 0.000000 771 +basi 0 20 3.951244 0.000000 828 +predict 0 19 4.007333 0.000000 855 +log 0 19 4.007333 0.000000 857 +concentr 0 18 4.060443 0.000000 906 +failur 0 18 4.060443 0.000000 898 +figur 0 18 4.060443 0.000000 903 +seek 0 17 4.110874 0.000000 954 +thought 0 17 4.110874 0.000000 945 +explan 0 16 4.174387 0.000000 985 +spatial 0 16 4.174387 0.000000 988 +later 1 15 4.248495 4.248495 1043 +drive 0 15 4.248495 0.000000 1052 +ascii 0 15 4.248495 0.000000 1032 +command 0 14 4.317488 0.000000 1083 +philosophi 0 13 4.382027 0.000000 1167 +necessari 0 13 4.382027 0.000000 1147 +carri 0 13 4.382027 0.000000 1152 +incorpor 0 13 4.382027 0.000000 1163 +overal 0 12 4.465908 0.000000 1254 +qualit 0 11 4.553877 0.000000 1362 +equal 0 10 4.653960 0.000000 1424 +candid 0 9 4.753590 0.000000 1606 +preliminari 0 9 4.753590 0.000000 1480 +brain 1 8 4.875197 4.875197 1638 +realist 1 8 4.875197 4.875197 1665 +empir 0 8 4.875197 0.000000 1722 +miikkulainen 0 8 4.875197 0.000000 1667 +quantit 0 8 4.875197 0.000000 1654 +centuri 0 7 5.010635 0.000000 1935 +appar 0 7 5.010635 0.000000 1958 +aris 0 7 5.010635 0.000000 1924 +biolog 0 6 5.164786 0.000000 2147 +illus 1 4 5.568345 5.568345 2603 +insight 0 4 5.568345 0.000000 3024 +outdat 0 4 5.568345 0.000000 2797 +cortex 1 3 5.857933 5.857933 3856 +dramat 1 3 5.857933 5.857933 3239 +useth 0 3 5.857933 0.000000 3110 +cortic 0 3 5.857933 0.000000 3857 +neuron 0 3 5.857933 0.000000 3798 +frequenc 0 3 5.857933 0.000000 3206 +lissom 1 2 6.263398 6.263398 5605 +jbednar 1 2 6.263398 6.263398 4284 +bednar 0 2 6.263398 0.000000 4283 +testabl 0 2 6.263398 0.000000 5606 +hypothes 0 2 6.263398 0.000000 5607 +nearli 0 2 6.263398 0.000000 5608 +sirosh 0 2 6.263398 0.000000 5609 +aftereffect 2 1 6.957497 13.914994 13375 +tilt 1 1 6.957497 6.957497 13376 +bednarjim 0 1 6.957497 0.000000 13377 +ofcognit 0 1 6.957497 0.000000 13378 +fewdecad 0 1 6.957497 0.000000 13379 +thehuman 0 1 6.957497 0.000000 13380 +beavail 0 1 6.957497 0.000000 13381 +becomingpract 0 1 6.957497 0.000000 13382 +refut 0 1 6.957497 0.000000 13383 +makecognit 0 1 6.957497 0.000000 13384 +purelyphilosoph 0 1 6.957497 0.000000 13385 +psychologist 0 1 6.957497 0.000000 13386 +inhibit 0 1 6.957497 0.000000 13387 +indirect 0 1 6.957497 0.000000 13388 +visualbehavior 0 1 6.957497 0.000000 13389 +departmentmi 0 1 6.957497 0.000000 13390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..42d6e9aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +network 0 168 1.791759 0.000000 61 +construct 1 139 1.945910 1.945910 82 +professor 0 137 1.945910 0.000000 76 +assign 0 135 1.945910 0.000000 66 +file 0 132 1.945910 0.000000 70 +like 0 132 1.945910 0.000000 81 +document 1 121 2.079442 2.079442 89 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +number 0 130 2.079442 0.000000 97 +topic 1 114 2.197225 2.197225 110 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +take 0 97 2.302585 0.000000 134 +homepag 1 93 2.397895 2.397895 148 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +help 1 83 2.484907 2.484907 175 +academ 0 82 2.484907 0.000000 178 +stuff 0 87 2.484907 0.000000 171 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +resum 0 79 2.564949 0.000000 217 +sourc 0 77 2.564949 0.000000 201 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +html 1 75 2.639057 2.639057 235 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +test 1 66 2.708050 2.708050 252 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +guid 0 63 2.772589 0.000000 267 +special 0 56 2.890372 0.000000 320 +unix 0 58 2.890372 0.000000 308 +summer 0 56 2.890372 0.000000 311 +format 0 48 3.044522 0.000000 356 +visitor 0 49 3.044522 0.000000 371 +tutori 0 39 3.258097 0.000000 437 +domain 0 30 3.555348 0.000000 564 +packag 0 28 3.610918 0.000000 614 +linux 0 27 3.637586 0.000000 631 +utc 0 27 3.637586 0.000000 629 +administr 0 27 3.637586 0.000000 628 +jeff 0 25 3.737670 0.000000 673 +frame 0 24 3.761200 0.000000 684 +applet 0 20 3.951244 0.000000 827 +north 0 19 4.007333 0.000000 873 +excel 0 19 4.007333 0.000000 868 +debug 0 17 4.110874 0.000000 944 +transfer 0 16 4.174387 0.000000 967 +ascii 0 15 4.248495 0.000000 1032 +latex 0 14 4.317488 0.000000 1064 +pretti 0 13 4.382027 0.000000 1191 +america 0 11 4.553877 0.000000 1370 +catalog 0 10 4.653960 0.000000 1431 +novak 0 9 4.753590 0.000000 1521 +largest 0 7 5.010635 0.000000 1858 +privaci 0 6 5.164786 0.000000 2144 +ross 0 5 5.347108 0.000000 2243 +florida 0 5 5.347108 0.000000 2526 +automobil 0 3 5.857933 0.000000 3709 +ethernet 0 2 6.263398 0.000000 5171 +edmund 0 2 6.263398 0.000000 5213 +buyer 0 2 6.263398 0.000000 5210 +sceneri 0 2 6.263398 0.000000 5152 +ofjunfanghi 0 1 6.957497 0.000000 13391 +sysadm 0 1 6.957497 0.000000 13392 +unixish 0 1 6.957497 0.000000 13393 +kristina 0 1 6.957497 0.000000 13394 +jfang 0 1 6.957497 0.000000 13395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..f03a350b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +start 0 83 2.484907 0.000000 173 +good 1 77 2.564949 2.564949 200 +resum 0 79 2.564949 0.000000 217 +knowledg 0 67 2.708050 0.000000 243 +long 0 43 3.178054 0.000000 413 +probabl 0 40 3.258097 0.000000 455 +john 1 33 3.433987 3.433987 532 +someth 0 31 3.496508 0.000000 554 +chip 0 21 3.912023 0.000000 770 +enough 0 15 4.248495 0.000000 1040 +beer 1 6 5.164786 5.164786 2216 +sleep 0 6 5.164786 0.000000 2211 +chew 0 3 5.857933 0.000000 3618 +dog 0 2 6.263398 0.000000 5089 +swallow 0 2 6.263398 0.000000 5025 +jprior 1 1 6.957497 6.957497 13396 +priorjohn 0 1 6.957497 0.000000 13397 +priormi 0 1 6.957497 0.000000 13398 +accumul 0 1 6.957497 0.000000 13399 +hurt 0 1 6.957497 0.000000 13400 +nacho 0 1 6.957497 0.000000 13401 +swisher 0 1 6.957497 0.000000 13402 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..a56236f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +technolog 0 131 2.079442 0.000000 102 +site 0 106 2.197225 0.000000 119 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +internet 0 83 2.484907 0.000000 186 +laboratori 0 63 2.772589 0.000000 292 +jeff 1 25 3.737670 3.737670 673 +thoma 1 18 4.060443 4.060443 901 +ultim 0 17 4.110874 0.000000 943 +keyword 0 11 4.553877 0.000000 1356 +trade 0 7 5.010635 0.000000 1815 +homepagejeff 0 1 6.957497 0.000000 13403 +homepagecontact 0 1 6.957497 0.000000 13404 +informationpublicationssoftwar 0 1 6.957497 0.000000 13405 +groupphoto 0 1 6.957497 0.000000 13406 +albumfavorit 0 1 6.957497 0.000000 13407 +sitesuniversityof 0 1 6.957497 0.000000 13408 +departmentappliedresearch 0 1 6.957497 0.000000 13409 +electricaland 0 1 6.957497 0.000000 13410 +departmentedsfinanci 0 1 6.957497 0.000000 13411 +fttc 0 1 6.957497 0.000000 13412 +utacademiccalendarsut 0 1 6.957497 0.000000 13413 +sportshook 0 1 6.957497 0.000000 13414 +longhorn 0 1 6.957497 0.000000 13415 +utfootbal 0 1 6.957497 0.000000 13416 +scheduleaustintexa 0 1 6.957497 0.000000 13417 +jthoma 0 1 6.957497 0.000000 13418 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..a7f198be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +octob 0 89 2.397895 0.000000 156 +stuff 0 87 2.484907 0.000000 171 +journal 0 83 2.484907 0.000000 183 +collect 0 65 2.772589 0.000000 268 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +china 1 37 3.332205 3.332205 487 +weather 0 28 3.610918 0.000000 618 +todai 0 25 3.737670 0.000000 672 +highli 0 23 3.806662 0.000000 725 +alumni 0 21 3.912023 0.000000 807 +beij 0 19 4.007333 0.000000 876 +lot 0 18 4.060443 0.000000 889 +excit 0 11 4.553877 0.000000 1329 +perl 0 11 4.553877 0.000000 1332 +ataustin 0 9 4.753590 0.000000 1610 +pagecomput 0 7 5.010635 0.000000 1900 +peke 1 5 5.347108 5.347108 2539 +appreci 0 5 5.347108 0.000000 2374 +meyour 0 3 5.857933 0.000000 3858 +homepagewelcom 0 2 6.263398 0.000000 4808 +novelschines 0 2 6.263398 0.000000 5610 +registrar 0 2 6.263398 0.000000 5611 +gradaut 0 2 6.263398 0.000000 5612 +studiesut 0 2 6.263398 0.000000 5613 +magzin 0 2 6.263398 0.000000 5614 +technicalreport 0 2 6.263398 0.000000 5615 +visitorsinc 0 2 6.263398 0.000000 5616 +jiani 1 1 6.957497 6.957497 13419 +indepart 0 1 6.957497 0.000000 13420 +ofpek 0 1 6.957497 0.000000 13421 +chinesechines 0 1 6.957497 0.000000 13422 +scenerychines 0 1 6.957497 0.000000 13423 +classicschines 0 1 6.957497 0.000000 13424 +magazineschines 0 1 6.957497 0.000000 13425 +newspapersus 0 1 6.957497 0.000000 13426 +libraryut 0 1 6.957497 0.000000 13427 +campusutaccessabout 0 1 6.957497 0.000000 13428 +citylimit 0 1 6.957497 0.000000 13429 +miscellaneousyahoojava 0 1 6.957497 0.000000 13430 +sunjavascript 0 1 6.957497 0.000000 13431 +netscapeth 0 1 6.957497 0.000000 13432 +associationcomput 0 1 6.957497 0.000000 13433 +webnetwork 0 1 6.957497 0.000000 13434 +libraryth 0 1 6.957497 0.000000 13435 +bibliographiesintern 0 1 6.957497 0.000000 13436 +jyluo 0 1 6.957497 0.000000 13437 +suggestionswould 0 1 6.957497 0.000000 13438 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..2378eba0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +know 0 80 2.564949 0.000000 198 +logic 0 71 2.639057 0.000000 230 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +would 0 67 2.708050 0.000000 251 +street 0 63 2.772589 0.000000 293 +reason 0 57 2.890372 0.000000 318 +advisor 0 51 2.995732 0.000000 355 +concurr 1 34 3.401197 3.401197 501 +semant 0 29 3.583519 0.000000 587 +todai 0 25 3.737670 0.000000 672 +doctor 0 24 3.761200 0.000000 709 +thank 0 23 3.806662 0.000000 721 +indian 0 22 3.850148 0.000000 769 +wonder 0 20 3.951244 0.000000 815 +lot 0 18 4.060443 0.000000 889 +bachelor 0 17 4.110874 0.000000 957 +came 0 13 4.382027 0.000000 1197 +tempor 0 9 4.753590 0.000000 1584 +madra 1 8 4.875197 4.875197 1770 +allen 0 5 5.347108 0.000000 2470 +emerson 0 5 5.347108 0.000000 2547 +mehi 0 2 6.263398 0.000000 5549 +kedar 0 1 6.957497 0.000000 13439 +namjoshiabout 0 1 6.957497 0.000000 13440 +distributedalgorithm 0 1 6.957497 0.000000 13441 +automatatheori 0 1 6.957497 0.000000 13442 +amul 0 1 6.957497 0.000000 13443 +adkedar 0 1 6.957497 0.000000 13444 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..3542b079 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 0 146 1.945910 0.000000 65 +provid 0 121 2.079442 0.000000 94 +world 0 115 2.197225 0.000000 126 +academ 0 82 2.484907 0.000000 178 +stuff 0 87 2.484907 0.000000 171 +wide 0 84 2.484907 0.000000 185 +resum 0 79 2.564949 0.000000 217 +servic 0 72 2.639057 0.000000 236 +view 1 70 2.708050 2.708050 254 +taylor 0 63 2.772589 0.000000 287 +faculti 0 56 2.890372 0.000000 325 +author 0 39 3.258097 0.000000 450 +respons 0 37 3.332205 0.000000 476 +staff 0 36 3.367296 0.000000 490 +board 0 33 3.433987 0.000000 528 +express 0 32 3.465736 0.000000 540 +linux 0 27 3.637586 0.000000 631 +facil 0 20 3.951244 0.000000 814 +reflect 0 15 4.248495 0.000000 1034 +kenneth 1 12 4.465908 4.465908 1265 +guest 0 12 4.465908 0.000000 1220 +opinion 1 8 4.875197 4.875197 1708 +babylon 0 8 4.875197 0.000000 1731 +necessarili 0 7 5.010635 0.000000 1899 +polit 0 6 5.164786 0.000000 2115 +regent 0 5 5.347108 0.000000 2551 +radio 0 4 5.568345 0.000000 3025 +sole 0 4 5.568345 0.000000 2592 +cyberspac 0 3 5.857933 0.000000 3719 +harker 1 1 6.957497 6.957497 13445 +kharker 1 1 6.957497 6.957497 13446 +amateur 0 1 6.957497 0.000000 13447 +rocketri 0 1 6.957497 0.000000 13448 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..0001c711 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +softwar 1 220 1.386294 1.386294 30 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +area 1 144 1.945910 1.945910 80 +perform 0 143 1.945910 0.000000 74 +analysi 1 124 2.079442 2.079442 98 +high 0 130 2.079442 0.000000 101 +world 1 115 2.197225 2.197225 126 +mathemat 1 108 2.197225 2.197225 123 +topic 0 114 2.197225 0.000000 110 +technic 0 100 2.302585 0.000000 140 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +second 0 81 2.484907 0.000000 166 +method 0 80 2.564949 0.000000 213 +david 1 71 2.639057 2.639057 232 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +degre 0 69 2.708050 0.000000 259 +organ 0 65 2.772589 0.000000 265 +sever 0 56 2.890372 0.000000 322 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +numer 1 49 3.044522 3.044522 369 +basic 0 50 3.044522 0.000000 360 +algebra 1 45 3.135494 3.135494 394 +anoth 0 45 3.135494 0.000000 408 +linear 1 41 3.218876 3.218876 431 +award 0 34 3.401197 0.000000 523 +committe 0 34 3.401197 0.000000 522 +focus 0 29 3.583519 0.000000 584 +session 0 26 3.688879 0.000000 643 +equat 1 23 3.806662 3.806662 724 +honor 0 23 3.806662 0.000000 729 +recognit 0 23 3.806662 0.000000 723 +variabl 0 23 3.806662 0.000000 715 +director 0 22 3.850148 0.000000 767 +partial 0 18 4.060443 0.000000 900 +differenti 0 17 4.110874 0.000000 921 +young 1 16 4.174387 4.174387 991 +spars 0 16 4.174387 0.000000 989 +senior 0 14 4.317488 0.000000 1120 +researchmi 0 14 4.317488 0.000000 1119 +polynomi 0 14 4.317488 0.000000 1069 +nasa 0 13 4.382027 0.000000 1188 +iter 0 12 4.465908 0.000000 1206 +matric 0 10 4.653960 0.000000 1399 +congress 1 9 4.753590 4.753590 1592 +jersei 0 9 4.753590 0.000000 1587 +creativ 0 8 4.875197 0.000000 1777 +pacif 0 8 4.875197 0.000000 1674 +grove 0 8 4.875197 0.000000 1675 +edg 0 8 4.875197 0.000000 1647 +aris 0 7 5.010635 0.000000 1924 +brook 0 6 5.164786 0.000000 2152 +river 0 6 5.164786 0.000000 2220 +imac 1 3 5.857933 5.857933 3718 +certif 0 3 5.857933 0.000000 3859 +interestmathemat 0 3 5.857933 0.000000 3860 +ellipt 0 3 5.857933 0.000000 3774 +atlanta 0 3 5.857933 0.000000 3778 +stationari 0 3 5.857933 0.000000 3861 +kincaid 1 2 6.263398 6.263398 5617 +subprogram 0 2 6.263398 0.000000 5618 +cole 0 2 6.263398 0.000000 4697 +itpack 0 2 6.263398 0.000000 5619 +rassia 0 2 6.263398 0.000000 5620 +lecturerassoci 0 1 6.957497 0.000000 13449 +lamar 0 1 6.957497 0.000000 13450 +technicalinnov 0 1 6.957497 0.000000 13451 +andappli 0 1 6.957497 0.000000 13452 +coeffici 0 1 6.957497 0.000000 13453 +publicationsw 0 1 6.957497 0.000000 13454 +chenei 0 1 6.957497 0.000000 13455 +hay 0 1 6.957497 0.000000 13456 +coput 0 1 6.957497 0.000000 13457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..072fd3f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +process 0 142 1.945910 0.000000 72 +analysi 0 124 2.079442 0.000000 98 +number 0 130 2.079442 0.000000 97 +mathemat 0 108 2.197225 0.000000 123 +academ 1 82 2.484907 2.484907 178 +school 0 84 2.484907 0.000000 188 +master 0 76 2.564949 0.000000 216 +prof 1 64 2.772589 2.772589 273 +collect 0 65 2.772589 0.000000 268 +juli 0 60 2.833213 0.000000 305 +numer 0 49 3.044522 0.000000 369 +visitor 0 49 3.044522 0.000000 371 +york 1 41 3.218876 3.218876 435 +press 0 42 3.218876 0.000000 419 +linear 0 41 3.218876 0.000000 431 +random 0 34 3.401197 0.000000 511 +administr 0 27 3.637586 0.000000 628 +mike 1 24 3.761200 3.761200 703 +busi 0 21 3.912023 0.000000 784 +particularli 0 19 4.007333 0.000000 867 +commerci 0 16 4.174387 0.000000 1005 +alan 0 13 4.382027 0.000000 1146 +emploi 0 12 4.465908 0.000000 1284 +walk 0 12 4.465908 0.000000 1281 +thedepart 0 11 4.553877 0.000000 1350 +ataustin 0 9 4.753590 0.000000 1610 +interestsi 0 7 5.010635 0.000000 1969 +misra 0 7 5.010635 0.000000 1856 +jayadev 0 4 5.568345 0.000000 3006 +kistler 1 3 5.857933 5.857933 3267 +syracus 1 3 5.857933 5.857933 3553 +cline 0 3 5.857933 0.000000 3218 +coursesfal 0 2 6.263398 0.000000 5225 +theperson 0 1 6.957497 0.000000 13458 +productsdivis 0 1 6.957497 0.000000 13459 +backgroundba 0 1 6.957497 0.000000 13460 +susquehanna 0 1 6.957497 0.000000 13461 +selinsgrov 0 1 6.957497 0.000000 13462 +stern 0 1 6.957497 0.000000 13463 +businessnew 0 1 6.957497 0.000000 13464 +iwith 0 1 6.957497 0.000000 13465 +algebrawith 0 1 6.957497 0.000000 13466 +pflugervil 0 1 6.957497 0.000000 13467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..09e6af53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +time 0 293 1.098612 0.000000 17 +welcom 0 122 2.079442 0.000000 99 +sinc 0 90 2.397895 0.000000 159 +march 0 61 2.833213 0.000000 295 +jacob 1 4 5.568345 5.568345 2667 +kornerup 1 3 5.857933 5.857933 3215 +kornerupjacob 0 1 6.957497 0.000000 13468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..fc3fdc89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +spring 1 131 2.079442 2.079442 88 +mathemat 1 108 2.197225 2.197225 123 +topic 0 114 2.197225 0.000000 110 +place 0 106 2.197225 0.000000 124 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +build 1 85 2.484907 2.484907 184 +intellig 1 72 2.639057 2.639057 225 +logic 0 71 2.639057 0.000000 230 +knowledg 1 67 2.708050 2.708050 243 +simul 1 66 2.708050 2.708050 255 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +colleg 0 61 2.833213 0.000000 300 +reason 1 57 2.890372 2.890372 318 +detail 0 57 2.890372 0.000000 321 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +describ 0 45 3.135494 0.000000 400 +press 0 42 3.218876 0.000000 419 +map 0 39 3.258097 0.000000 452 +robot 0 36 3.367296 0.000000 497 +represent 1 35 3.401197 3.401197 512 +limit 0 29 3.583519 0.000000 585 +strategi 0 25 3.737670 0.000000 682 +recognit 0 23 3.806662 0.000000 723 +emphasi 0 22 3.850148 0.000000 755 +expert 0 20 3.951244 0.000000 833 +agent 1 18 4.060443 4.060443 910 +spatial 0 16 4.174387 0.000000 988 +cognit 0 16 4.174387 0.000000 986 +cambridg 0 16 4.174387 0.000000 1008 +consider 0 14 4.317488 0.000000 1076 +benjamin 1 11 4.553877 4.553877 1296 +qualit 1 11 4.553877 4.553877 1362 +tour 0 11 4.553877 0.000000 1307 +incomplet 1 9 4.753590 4.753590 1575 +accomplish 0 8 4.875197 0.000000 1755 +centenni 0 7 5.010635 0.000000 1967 +distinct 0 5 5.347108 0.000000 2319 +commonsens 1 4 5.568345 5.568345 2998 +kuiper 1 3 5.857933 5.857933 3794 +qsim 0 3 5.857933 0.000000 3862 +swarthmor 0 2 6.263398 0.000000 5621 +thequalit 0 2 6.263398 0.000000 5622 +kuipersbenjamin 0 1 6.957497 0.000000 13469 +kuipersbruton 0 1 6.957497 0.000000 13470 +withparticular 0 1 6.957497 0.000000 13471 +grouphom 0 1 6.957497 0.000000 13472 +andavail 0 1 6.957497 0.000000 13473 +qualitativereason 0 1 6.957497 0.000000 13474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..d2410f1a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +email 1 220 1.386294 1.386294 29 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +utexa 1 189 1.609438 1.609438 44 +fall 0 181 1.609438 0.000000 40 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +postscript 1 131 2.079442 2.079442 90 +spring 0 131 2.079442 0.000000 88 +assist 1 112 2.197225 2.197225 113 +ieee 0 86 2.484907 0.000000 190 +internet 0 83 2.484907 0.000000 186 +taylor 0 63 2.772589 0.000000 287 +laboratori 0 63 2.772589 0.000000 292 +februari 0 54 2.944439 0.000000 328 +electron 0 47 3.091042 0.000000 379 +transact 0 39 3.258097 0.000000 438 +sciencesunivers 0 37 3.332205 0.000000 486 +photo 0 31 3.496508 0.000000 561 +profil 0 30 3.555348 0.000000 581 +turn 0 29 3.583519 0.000000 586 +campu 0 27 3.637586 0.000000 623 +administr 0 27 3.637586 0.000000 628 +american 0 27 3.637586 0.000000 634 +compress 1 23 3.806662 3.806662 719 +eduphon 1 15 4.248495 4.248495 1060 +front 0 13 4.382027 0.000000 1154 +tune 0 12 4.465908 0.000000 1227 +editori 0 9 4.753590 0.000000 1611 +simon 1 8 4.875197 4.875197 1697 +clip 0 7 5.010635 0.000000 1868 +sciencesdepart 0 6 5.164786 0.000000 2020 +carbon 0 3 5.857933 0.000000 3804 +cont 0 3 5.857933 0.000000 3171 +toss 0 2 6.263398 0.000000 5470 +kata 1 1 6.957497 6.957497 13475 +submissionnew 0 1 6.957497 0.000000 13476 +empt 0 1 6.957497 0.000000 13477 +statesman 0 1 6.957497 0.000000 13478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..2ed31fbb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +perform 1 143 1.945910 1.945910 74 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +commun 0 95 2.397895 0.000000 157 +activ 0 84 2.484907 0.000000 182 +chang 0 82 2.484907 0.000000 163 +novemb 0 81 2.484907 0.000000 179 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +test 0 66 2.708050 0.000000 252 +integr 0 67 2.708050 0.000000 245 +laboratori 1 63 2.772589 2.772589 292 +foundat 1 62 2.772589 2.772589 286 +protocol 1 45 3.135494 3.135494 407 +secur 0 30 3.555348 0.000000 577 +fund 0 21 3.912023 0.000000 805 +entir 0 20 3.951244 0.000000 811 +verif 0 20 3.951244 0.000000 826 +supervis 0 20 3.951244 0.000000 840 +tune 0 12 4.465908 0.000000 1227 +cycl 0 11 4.553877 0.000000 1335 +underli 0 10 4.653960 0.000000 1410 +span 0 8 4.875197 0.000000 1751 +simon 0 8 4.875197 0.000000 1697 +lockhe 0 3 5.857933 0.000000 3863 +currentinterest 0 1 6.957497 0.000000 13479 +nsaunivers 0 1 6.957497 0.000000 13480 +videoservic 0 1 6.957497 0.000000 13481 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..0d1bb227 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +octob 0 89 2.397895 0.000000 156 +ofth 0 36 3.367296 0.000000 491 +robert 0 30 3.555348 0.000000 567 +edulast 0 17 4.110874 0.000000 927 +stori 0 14 4.317488 0.000000 1087 +convent 0 14 4.317488 0.000000 1072 +council 0 11 4.553877 0.000000 1364 +mountain 0 10 4.653960 0.000000 1456 +awai 0 10 4.653960 0.000000 1447 +christian 1 7 5.010635 5.010635 1949 +pageth 0 7 5.010635 0.000000 1939 +gordon 0 6 5.164786 0.000000 2032 +graham 0 4 5.568345 0.000000 2817 +republican 0 3 5.857933 0.000000 3815 +backbon 0 2 6.263398 0.000000 5623 +landrum 1 1 6.957497 6.957497 13482 +viruspictur 0 1 6.957497 0.000000 13483 +empirepch 0 1 6.957497 0.000000 13484 +retreattexa 0 1 6.957497 0.000000 13485 +rockrsumfamilyinterest 0 1 6.957497 0.000000 13486 +councillandrum 0 1 6.957497 0.000000 13487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..80cb910e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +welcom 0 122 2.079442 0.000000 99 +assist 0 112 2.197225 0.000000 113 +activ 0 84 2.484907 0.000000 182 +improv 0 62 2.772589 0.000000 289 +electr 0 38 3.295837 0.000000 461 +greg 1 24 3.761200 3.761200 695 +recommend 0 22 3.850148 0.000000 737 +lavend 1 3 5.857933 5.857933 3217 +professordepart 0 2 6.263398 0.000000 5624 +biograph 0 2 6.263398 0.000000 5625 +austinr 0 1 6.957497 0.000000 13488 +lavenderadjunct 0 1 6.957497 0.000000 13489 +anddepart 0 1 6.957497 0.000000 13490 +engineeringth 0 1 6.957497 0.000000 13491 +informationsuggest 0 1 6.957497 0.000000 13492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..c6cbb21d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +welcom 1 122 2.079442 2.079442 99 +look 0 107 2.197225 0.000000 115 +structur 0 106 2.197225 0.000000 105 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +come 1 78 2.564949 2.564949 202 +master 0 76 2.564949 0.000000 216 +know 0 80 2.564949 0.000000 198 +nation 1 74 2.639057 2.639057 240 +degre 0 69 2.708050 0.000000 259 +taylor 0 63 2.772589 0.000000 287 +locat 0 59 2.833213 0.000000 303 +much 0 52 2.995732 0.000000 349 +small 0 39 3.258097 0.000000 447 +jame 1 35 3.401197 3.401197 507 +particip 0 29 3.583519 0.000000 589 +campu 0 27 3.637586 0.000000 623 +equat 0 23 3.806662 0.000000 724 +vlsi 0 21 3.912023 0.000000 795 +north 0 19 4.007333 0.000000 873 +bachelor 0 17 4.110874 0.000000 957 +normal 0 16 4.174387 0.000000 995 +atth 0 15 4.248495 0.000000 1019 +month 0 15 4.248495 0.000000 1025 +hong 0 14 4.317488 0.000000 1105 +wife 0 13 4.382027 0.000000 1196 +island 1 11 4.553877 4.553877 1345 +kong 0 9 4.753590 0.000000 1602 +sciencesat 0 7 5.010635 0.000000 1968 +smile 0 7 5.010635 0.000000 1807 +singapor 1 5 5.347108 5.347108 2487 +aliv 0 3 5.857933 0.000000 3864 +disc 0 2 6.263398 0.000000 5626 +tropic 0 2 6.263398 0.000000 5398 +aboutthi 0 2 6.263398 0.000000 5627 +addr 0 2 6.263398 0.000000 5628 +pageyeap 0 1 6.957497 0.000000 13493 +designalgorithm 0 1 6.957497 0.000000 13494 +communityi 0 1 6.957497 0.000000 13495 +lovesto 0 1 6.957497 0.000000 13496 +leekk 0 1 6.957497 0.000000 13497 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..3fac2be4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +softwar 1 220 1.386294 1.386294 30 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +parallel 0 169 1.791759 0.000000 60 +austin 0 168 1.791759 0.000000 63 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +high 0 130 2.079442 0.000000 101 +decemb 0 80 2.564949 0.000000 215 +main 0 67 2.708050 0.000000 256 +laboratori 1 63 2.772589 2.772589 292 +investig 0 51 2.995732 0.000000 353 +seminar 0 38 3.295837 0.000000 470 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +experiment 1 26 3.688879 3.688879 645 +wai 0 25 3.737670 0.000000 662 +less 1 18 4.060443 4.060443 892 +apart 0 7 5.010635 0.000000 1936 +distributedsystem 0 6 5.164786 0.000000 2022 +blumoferdb 0 5 5.347108 0.000000 2324 +oftexa 0 4 5.568345 0.000000 3003 +buildreli 0 1 6.957497 0.000000 13498 +projectsmemb 0 1 6.957497 0.000000 13499 +lablessss 0 1 6.957497 0.000000 13500 +seriessponsorslast 0 1 6.957497 0.000000 13501 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..f1e55abd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +intern 1 108 2.197225 2.197225 128 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +proceed 1 93 2.397895 2.397895 152 +select 0 91 2.397895 0.000000 154 +thing 0 84 2.484907 0.000000 189 +journal 0 83 2.484907 0.000000 183 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +import 0 65 2.772589 0.000000 282 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +simpl 0 60 2.833213 0.000000 298 +share 0 59 2.833213 0.000000 304 +scientif 0 53 2.944439 0.000000 341 +postal 0 30 3.555348 0.000000 580 +multiprocessor 0 28 3.610918 0.000000 605 +arrai 0 27 3.637586 0.000000 627 +supercomput 0 25 3.737670 0.000000 681 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +portabl 1 20 3.951244 3.951244 819 +comparison 0 19 4.007333 0.000000 863 +novel 0 15 4.248495 0.000000 1039 +conf 0 13 4.382027 0.000000 1181 +decomposit 0 10 4.653960 0.000000 1439 +calvin 0 9 4.753590 0.000000 1518 +austinaustin 0 7 5.010635 0.000000 1966 +banerje 0 6 5.164786 0.000000 2018 +snyder 1 5 5.347108 5.347108 2359 +explicitli 0 5 5.347108 0.000000 2308 +parallelprogram 0 5 5.347108 0.000000 2379 +publicationsth 0 4 5.568345 0.000000 2859 +polymorph 0 4 5.568345 0.000000 2627 +pete 0 3 5.857933 0.000000 3865 +accommod 0 3 5.857933 0.000000 3337 +parallelprocess 0 3 5.857933 0.000000 3626 +performanceanalysi 0 2 6.263398 0.000000 5629 +padua 0 2 6.263398 0.000000 4544 +sciencesth 1 1 6.957497 6.957497 13502 +lincalvin 0 1 6.957497 0.000000 13503 +linassist 0 1 6.957497 0.000000 13504 +iswhat 0 1 6.957497 0.000000 13505 +_study_ 0 1 6.957497 0.000000 13506 +_play_ 0 1 6.957497 0.000000 13507 +carrilresearch 0 1 6.957497 0.000000 13508 +interestscompil 0 1 6.957497 0.000000 13509 +biologyalgorithm 0 1 6.957497 0.000000 13510 +dikaiako 0 1 6.957497 0.000000 13511 +manoussaki 0 1 6.957497 0.000000 13512 +woodward 0 1 6.957497 0.000000 13513 +internationalparallel 0 1 6.957497 0.000000 13514 +sublanguag 0 1 6.957497 0.000000 13515 +compilersfor 0 1 6.957497 0.000000 13516 +gelernt 0 1 6.957497 0.000000 13517 +nicolau 0 1 6.957497 0.000000 13518 +withl 0 1 6.957497 0.000000 13519 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..08ccf39e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +construct 0 139 1.945910 0.000000 82 +professor 0 137 1.945910 0.000000 76 +perform 0 143 1.945910 0.000000 74 +welcom 0 122 2.079442 0.000000 99 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +real 1 93 2.397895 2.397895 144 +comment 0 93 2.397895 0.000000 146 +member 0 84 2.484907 0.000000 165 +good 0 77 2.564949 0.000000 200 +knowledg 0 67 2.708050 0.000000 243 +view 0 70 2.708050 0.000000 254 +result 0 65 2.772589 0.000000 281 +content 0 59 2.833213 0.000000 302 +summer 0 56 2.890372 0.000000 311 +mine 0 26 3.688879 0.000000 654 +theunivers 0 21 3.912023 0.000000 797 +permit 0 16 4.174387 0.000000 962 +replic 0 12 4.465908 0.000000 1231 +incomplet 0 9 4.753590 0.000000 1575 +researchi 0 8 4.875197 0.000000 1756 +apolog 0 6 5.164786 0.000000 2046 +guangtian 1 3 5.857933 5.857933 3810 +inconveni 0 3 5.857933 0.000000 3866 +internship 0 3 5.857933 0.000000 3764 +liugt 1 1 6.957497 6.957497 13520 +homepagehi 0 1 6.957497 0.000000 13521 +timeschedul 0 1 6.957497 0.000000 13522 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..1fc6fe61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +cornel 1 215 1.386294 1.386294 23 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +distribut 1 162 1.791759 1.791759 51 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +taylor 0 63 2.772589 0.000000 287 +locat 0 59 2.833213 0.000000 303 +special 0 56 2.890372 0.000000 320 +physic 0 47 3.091042 0.000000 377 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +photo 0 31 3.496508 0.000000 561 +emphasi 0 22 3.850148 0.000000 755 +itali 0 11 4.553877 0.000000 1378 +interestsi 0 7 5.010635 0.000000 1969 +lorenzo 1 4 5.568345 5.568345 2588 +sytem 0 4 5.568345 0.000000 3015 +maria 0 4 5.568345 0.000000 2954 +alvisi 1 3 5.857933 5.857933 3095 +universit 0 2 6.263398 0.000000 5630 +bologna 0 2 6.263398 0.000000 5631 +laurea 0 1 6.957497 0.000000 13523 +agrav 0 1 6.957497 0.000000 13524 +taylorhal 0 1 6.957497 0.000000 13525 +campusshow 0 1 6.957497 0.000000 13526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..ca919191 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +site 0 106 2.197225 0.000000 119 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +internet 0 83 2.484907 0.000000 186 +ieee 0 86 2.484907 0.000000 190 +come 0 78 2.564949 0.000000 202 +onlin 0 75 2.639057 0.000000 223 +dept 0 64 2.772589 0.000000 291 +undergradu 0 54 2.944439 0.000000 338 +cool 0 49 3.044522 0.000000 374 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +live 0 40 3.258097 0.000000 451 +microsoft 1 38 3.295837 3.295837 468 +china 0 37 3.332205 0.000000 487 +chines 0 29 3.583519 0.000000 595 +weather 0 28 3.610918 0.000000 618 +univ 0 28 3.610918 0.000000 617 +campu 0 27 3.637586 0.000000 623 +thank 0 23 3.806662 0.000000 721 +corpor 0 21 3.912023 0.000000 802 +sigmod 0 19 4.007333 0.000000 877 +tsinghua 0 13 4.382027 0.000000 1195 +shop 0 10 4.653960 0.000000 1469 +siggraph 0 8 4.875197 0.000000 1773 +dictionari 0 8 4.875197 0.000000 1642 +hunt 0 7 5.010635 0.000000 1798 +sigcomm 0 5 5.347108 0.000000 2329 +sigir 0 2 6.263398 0.000000 4873 +addr 0 2 6.263398 0.000000 5628 +luxu 1 1 6.957497 6.957497 13527 +networksoth 0 1 6.957497 0.000000 13528 +studyut 0 1 6.957497 0.000000 13529 +universityaustin 0 1 6.957497 0.000000 13530 +siglink 0 1 6.957497 0.000000 13531 +sigmm 0 1 6.957497 0.000000 13532 +newsjob 0 1 6.957497 0.000000 13533 +forcast 0 1 6.957497 0.000000 13534 +xuelu 0 1 6.957497 0.000000 13535 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..cd56dd13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 2 297 1.098612 2.197224 20 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +softwar 2 220 1.386294 2.772588 30 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +develop 1 174 1.791759 1.791759 53 +hour 0 165 1.791759 0.000000 46 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +texa 0 160 1.791759 0.000000 64 +object 1 138 1.945910 1.945910 79 +area 0 144 1.945910 0.000000 80 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +process 0 142 1.945910 0.000000 72 +tool 1 117 2.079442 2.079442 93 +confer 0 126 2.079442 0.000000 100 +technic 0 100 2.302585 0.000000 140 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +journal 0 83 2.484907 0.000000 183 +orient 1 80 2.564949 2.564949 205 +issu 0 78 2.564949 0.000000 211 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +august 0 66 2.708050 0.000000 257 +taylor 0 63 2.772589 0.000000 287 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +direct 1 57 2.890372 2.890372 316 +semest 0 58 2.890372 0.000000 312 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +committe 1 34 3.401197 3.401197 522 +john 0 33 3.433987 0.000000 532 +human 0 32 3.465736 0.000000 546 +chair 1 29 3.583519 3.583519 596 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +qualiti 0 20 3.951244 0.000000 832 +macintosh 0 17 4.110874 0.000000 920 +cognit 0 16 4.174387 0.000000 986 +researchmi 0 14 4.317488 0.000000 1119 +strength 0 9 4.753590 0.000000 1494 +metric 0 7 5.010635 0.000000 1831 +educurr 0 5 5.347108 0.000000 2504 +werth 1 4 5.568345 5.568345 3004 +engineeringc 0 4 5.568345 0.000000 2904 +contemporari 0 4 5.568345 0.000000 2719 +presentarea 0 4 5.568345 0.000000 3026 +andsoftwar 0 4 5.568345 0.000000 2753 +assur 0 4 5.568345 0.000000 2722 +ics 0 4 5.568345 0.000000 2779 +lauri 0 3 5.857933 0.000000 3867 +honour 1 2 6.263398 6.263398 5632 +werthlauri 0 1 6.957497 0.000000 13536 +werthlectur 0 1 6.957497 0.000000 13537 +lwerth 0 1 6.957497 0.000000 13538 +scienceprofession 0 1 6.957497 0.000000 13539 +servicevic 0 1 6.957497 0.000000 13540 +presentco 0 1 6.957497 0.000000 13541 +interestsoftwar 0 1 6.957497 0.000000 13542 +andenviron 0 1 6.957497 0.000000 13543 +publicationsl 0 1 6.957497 0.000000 13544 +tomayko 0 1 6.957497 0.000000 13545 +pagefaculti 0 1 6.957497 0.000000 13546 +profilesc 0 1 6.957497 0.000000 13547 +classeslast 0 1 6.957497 0.000000 13548 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..fe64dd09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +offic 1 299 1.098612 1.098612 13 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +hall 0 146 1.945910 0.000000 65 +theori 0 111 2.197225 0.000000 127 +site 0 106 2.197225 0.000000 119 +info 0 85 2.484907 0.000000 176 +taylor 0 63 2.772589 0.000000 287 +dept 0 64 2.772589 0.000000 291 +complex 0 64 2.772589 0.000000 269 +interact 0 62 2.772589 0.000000 270 +author 0 39 3.258097 0.000000 450 +india 0 32 3.465736 0.000000 550 +univ 0 28 3.610918 0.000000 617 +comp 0 26 3.688879 0.000000 650 +reach 0 24 3.761200 0.000000 688 +offici 1 18 4.060443 4.060443 894 +avenu 0 12 4.465908 0.000000 1277 +madra 0 8 4.875197 0.000000 1770 +colloquium 0 8 4.875197 0.000000 1734 +cricket 0 7 5.010635 0.000000 1945 +oncomput 0 5 5.347108 0.000000 2326 +reddi 1 3 5.857933 5.857933 3277 +worldwid 0 3 5.857933 0.000000 3704 +madhukar 1 2 6.263398 6.263398 5633 +espnet 0 2 6.263398 0.000000 5634 +korupoluwelcom 0 1 6.957497 0.000000 13549 +ahom 0 1 6.957497 0.000000 13550 +madrashomepag 0 1 6.957497 0.000000 13551 +ganga 0 1 6.957497 0.000000 13552 +alumniclass 0 1 6.957497 0.000000 13553 +utalgorithm 0 1 6.957497 0.000000 13554 +sportszon 0 1 6.957497 0.000000 13555 +batchu 0 1 6.957497 0.000000 13556 +korupoluemail 0 1 6.957497 0.000000 13557 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..5142b4e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +taylor 0 63 2.772589 0.000000 287 +simpl 0 60 2.833213 0.000000 298 +natur 0 44 3.135494 0.000000 406 +richard 0 31 3.496508 0.000000 559 +produc 0 30 3.555348 0.000000 572 +explan 0 16 4.174387 0.000000 985 +mallori 0 2 6.263398 0.000000 5635 +malloryrichard 0 1 6.957497 0.000000 13558 +malloryresearchthesi 0 1 6.957497 0.000000 13559 +quasi 0 1 6.957497 0.000000 13560 +qsimsimul 0 1 6.957497 0.000000 13561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..86e43764 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +time 1 293 1.098612 1.098612 17 +offic 0 299 1.098612 0.000000 13 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +austin 1 168 1.791759 1.791759 63 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +confer 0 126 2.079442 0.000000 100 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +memori 0 101 2.302585 0.000000 139 +proceed 1 93 2.397895 2.397895 152 +real 0 93 2.397895 0.000000 144 +octob 0 89 2.397895 0.000000 156 +academ 0 82 2.484907 0.000000 178 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +internet 0 83 2.484907 0.000000 186 +state 1 76 2.564949 2.564949 207 +appear 1 78 2.564949 2.564949 210 +workshop 1 71 2.639057 2.639057 239 +symposium 0 72 2.639057 0.000000 238 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +publish 0 57 2.890372 0.000000 326 +talk 0 53 2.944439 0.000000 336 +particular 0 51 2.995732 0.000000 352 +principl 0 48 3.044522 0.000000 357 +protocol 0 45 3.135494 0.000000 407 +execut 0 45 3.135494 0.000000 404 +third 1 43 3.178054 3.178054 412 +author 1 39 3.258097 3.258097 450 +annual 0 40 3.258097 0.000000 458 +submit 0 39 3.258097 0.000000 440 +correct 0 38 3.295837 0.000000 462 +vita 0 38 3.295837 0.000000 473 +respons 1 37 3.332205 3.332205 476 +tree 1 36 3.367296 3.367296 492 +survei 0 35 3.401197 0.000000 513 +toler 0 33 3.433987 0.000000 533 +fault 1 32 3.465736 3.465736 547 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +postal 0 30 3.555348 0.000000 580 +consid 0 29 3.583519 0.000000 590 +intend 0 28 3.610918 0.000000 599 +toward 0 25 3.737670 0.000000 668 +flow 1 24 3.761200 3.761200 700 +initi 0 23 3.806662 0.000000 717 +decis 0 23 3.806662 0.000000 728 +self 1 22 3.850148 3.850148 761 +identifi 0 22 3.850148 0.000000 760 +rout 1 21 3.912023 3.912023 793 +prepar 1 20 3.951244 3.951244 824 +finit 0 14 4.317488 0.000000 1106 +step 1 13 4.382027 4.382027 1138 +earlier 0 13 4.382027 0.000000 1140 +joint 0 13 4.382027 0.000000 1130 +stai 0 12 4.465908 0.000000 1215 +label 0 10 4.653960 0.000000 1423 +guarante 0 10 4.653960 0.000000 1391 +invit 0 10 4.653960 0.000000 1428 +minimum 1 9 4.753590 4.753590 1555 +candid 0 9 4.753590 0.000000 1606 +occur 0 9 4.753590 0.000000 1572 +said 0 9 4.753590 0.000000 1571 +depth 0 8 4.875197 0.000000 1636 +span 0 8 4.875197 0.000000 1751 +converg 0 7 5.010635 0.000000 1844 +kluwer 0 6 5.164786 0.000000 2143 +stabil 2 5 5.347108 10.694216 2286 +gouda 1 4 5.568345 5.568345 3021 +marco 1 4 5.568345 5.568345 2589 +maximum 1 4 5.568345 5.568345 2632 +implicit 0 4 5.568345 0.000000 2830 +arora 0 4 5.568345 0.000000 2658 +moham 1 3 5.857933 5.857933 3848 +fifteenth 0 3 5.857933 0.000000 3868 +forev 0 2 6.263398 0.000000 5636 +legitim 1 1 6.957497 6.957497 13562 +illegitim 1 1 6.957497 6.957497 13563 +schneidermarco 0 1 6.957497 0.000000 13564 +schneiderph 0 1 6.957497 0.000000 13565 +austinresearchth 0 1 6.957497 0.000000 13566 +itsstat 0 1 6.957497 0.000000 13567 +whenregardless 0 1 6.957497 0.000000 13568 +systemwhich 0 1 6.957497 0.000000 13569 +tolerantr 0 1 6.957497 0.000000 13570 +anish 0 1 6.957497 0.000000 13571 +silent 0 1 6.957497 0.000000 13572 +shlomi 0 1 6.957497 0.000000 13573 +dolev 0 1 6.957497 0.000000 13574 +ctaylor 0 1 6.957497 0.000000 13575 +usamarco 0 1 6.957497 0.000000 13576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..b42d679d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +class 1 199 1.609438 1.609438 37 +utexa 0 189 1.609438 0.000000 44 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +avail 1 169 1.791759 1.791759 48 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +hall 1 146 1.945910 1.945910 65 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +postscript 0 131 2.079442 0.000000 90 +pleas 1 113 2.197225 2.197225 114 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +librari 0 87 2.484907 0.000000 181 +stuff 0 87 2.484907 0.000000 171 +orient 0 80 2.564949 0.000000 205 +addit 1 74 2.639057 2.639057 228 +taylor 1 63 2.772589 2.772589 287 +copi 0 63 2.772589 0.000000 284 +descript 0 64 2.772589 0.000000 271 +best 0 59 2.833213 0.000000 299 +semest 0 58 2.890372 0.000000 312 +allow 0 53 2.944439 0.000000 333 +finger 0 52 2.995732 0.000000 354 +run 0 51 2.995732 0.000000 347 +mark 1 44 3.135494 3.135494 403 +compani 0 41 3.218876 0.000000 423 +taught 1 33 3.433987 3.433987 526 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +full 0 28 3.610918 0.000000 615 +linux 0 27 3.637586 0.000000 631 +reach 0 24 3.761200 0.000000 688 +alloc 0 20 3.951244 0.000000 821 +along 0 18 4.060443 0.000000 878 +brief 0 16 4.174387 0.000000 1001 +intel 0 16 4.174387 0.000000 1000 +precis 0 15 4.248495 0.000000 1023 +appl 1 11 4.553877 4.553877 1303 +routin 1 9 4.753590 4.753590 1549 +motorola 0 9 4.753590 0.000000 1546 +oop 0 8 4.875197 0.000000 1778 +pentium 0 6 5.164786 0.000000 2077 +glenn 0 3 5.857933 0.000000 3869 +down 0 3 5.857933 0.000000 3870 +informationi 0 3 5.857933 0.000000 3871 +listof 0 3 5.857933 0.000000 3322 +publicli 0 3 5.857933 0.000000 3687 +isvia 0 2 6.263398 0.000000 5637 +johnston 0 2 6.263398 0.000000 5638 +somerset 0 2 6.263398 0.000000 5639 +johnstonemark 0 1 6.957497 0.000000 13577 +johnstonecontact 0 1 6.957497 0.000000 13578 +markj 0 1 6.957497 0.000000 13579 +byrichard 0 1 6.957497 0.000000 13580 +brice 0 1 6.957497 0.000000 13581 +analysisclass 0 1 6.957497 0.000000 13582 +somersetdesign 0 1 6.957497 0.000000 13583 +centerresearch 0 1 6.957497 0.000000 13584 +garbagecollector 0 1 6.957497 0.000000 13585 +ofstudi 0 1 6.957497 0.000000 13586 +dissertationpropos 0 1 6.957497 0.000000 13587 +timingof 0 1 6.957497 0.000000 13588 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..d53d689a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +click 0 142 1.945910 0.000000 78 +visit 0 63 2.772589 0.000000 288 +point 0 58 2.890372 0.000000 319 +friend 0 48 3.044522 0.000000 376 +mark 0 44 3.135494 0.000000 403 +markng 0 1 6.957497 0.000000 13589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..622a0103 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +hall 1 146 1.945910 1.945910 65 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +place 1 106 2.197225 2.197225 124 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +member 0 84 2.484907 0.000000 165 +internet 0 83 2.484907 0.000000 186 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +logic 0 71 2.639057 0.000000 230 +prof 1 64 2.772589 2.772589 273 +taylor 1 63 2.772589 2.772589 287 +result 0 65 2.772589 0.000000 281 +reason 0 57 2.890372 0.000000 318 +thesi 0 57 2.890372 0.000000 327 +archiv 1 49 3.044522 3.044522 364 +postal 0 30 3.555348 0.000000 580 +macintosh 0 17 4.110874 0.000000 920 +permit 0 16 4.174387 0.000000 962 +finit 0 14 4.317488 0.000000 1106 +verifi 0 12 4.465908 0.000000 1261 +worth 0 11 4.553877 0.000000 1294 +appl 0 11 4.553877 0.000000 1303 +incomplet 0 9 4.753590 0.000000 1575 +entri 0 8 4.875197 0.000000 1678 +researchi 0 8 4.875197 0.000000 1756 +misra 0 7 5.010635 0.000000 1856 +emerson 0 5 5.347108 0.000000 2547 +proposit 0 5 5.347108 0.000000 2339 +comprehens 0 4 5.568345 0.000000 2745 +marku 1 3 5.857933 5.857933 3872 +uniti 1 3 5.857933 5.857933 3812 +andwil 0 3 5.857933 0.000000 3335 +inconveni 0 3 5.857933 0.000000 3866 +groupand 0 3 5.857933 0.000000 3873 +isalso 0 2 6.263398 0.000000 5640 +kaltenbachmarku 0 1 6.957497 0.000000 13590 +kaltenbachintroductionwelcom 0 1 6.957497 0.000000 13591 +iapolog 0 1 6.957497 0.000000 13592 +spsp 0 1 6.957497 0.000000 13593 +stempor 0 1 6.957497 0.000000 13594 +checkerfor 0 1 6.957497 0.000000 13595 +avisit 0 1 6.957497 0.000000 13596 +theut 0 1 6.957497 0.000000 13597 +departmenthom 0 1 6.957497 0.000000 13598 +archivefor 0 1 6.957497 0.000000 13599 +sworld 0 1 6.957497 0.000000 13600 +supporthom 0 1 6.957497 0.000000 13601 +actansit 0 1 6.957497 0.000000 13602 +theatt 0 1 6.957497 0.000000 13603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..a4c265a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +read 0 154 1.791759 0.000000 47 +hall 0 146 1.945910 0.000000 65 +click 0 142 1.945910 0.000000 78 +homepag 0 93 2.397895 0.000000 148 +stuff 0 87 2.484907 0.000000 171 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +taylor 0 63 2.772589 0.000000 287 +guid 0 63 2.772589 0.000000 267 +virtual 0 62 2.772589 0.000000 285 +local 0 55 2.944439 0.000000 334 +math 0 44 3.135494 0.000000 402 +kind 0 32 3.465736 0.000000 541 +postal 0 30 3.555348 0.000000 580 +neural 0 30 3.555348 0.000000 578 +utc 1 27 3.637586 3.637586 629 +demonstr 0 24 3.761200 0.000000 694 +daili 0 24 3.761200 0.000000 706 +displai 0 23 3.806662 0.000000 712 +applet 0 20 3.951244 0.000000 827 +hotlist 0 13 4.382027 0.000000 1199 +paus 0 4 5.568345 0.000000 2965 +mayberri 0 2 6.263398 0.000000 5641 +downtown 0 2 6.263398 0.000000 5642 +texan 0 2 6.263398 0.000000 5489 +memarti 0 1 6.957497 0.000000 13604 +researchal 0 1 6.957497 0.000000 13605 +martym 0 1 6.957497 0.000000 13606 +anywher 0 1 6.957497 0.000000 13607 +virtualc 0 1 6.957497 0.000000 13608 +internetrestaur 0 1 6.957497 0.000000 13609 +tnstechnolog 0 1 6.957497 0.000000 13610 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..db2aec29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +postscript 0 131 2.079442 0.000000 90 +onlin 1 75 2.639057 2.639057 223 +main 0 67 2.708050 0.000000 256 +colleg 0 61 2.833213 0.000000 300 +reason 1 57 2.890372 2.890372 318 +thesi 0 57 2.890372 0.000000 327 +advisor 0 51 2.995732 0.000000 355 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +expect 0 37 3.332205 0.000000 484 +titl 0 31 3.496508 0.000000 556 +action 0 15 4.248495 0.000000 1038 +philosophi 0 13 4.382027 0.000000 1167 +usavoic 0 13 4.382027 0.000000 1198 +vladimir 0 11 4.553877 0.000000 1324 +mepost 0 10 4.653960 0.000000 1472 +sciencesat 0 7 5.010635 0.000000 1968 +baker 0 7 5.010635 0.000000 1812 +causal 0 6 5.164786 0.000000 2024 +lifschitz 0 5 5.347108 0.000000 2542 +commonsens 0 4 5.568345 0.000000 2998 +nonmonoton 0 4 5.568345 0.000000 3023 +norm 1 2 6.263398 6.263398 5643 +kansa 0 2 6.263398 0.000000 5591 +interestscommonsens 0 2 6.263398 0.000000 5596 +actionlog 0 2 6.263398 0.000000 5597 +reasoningmi 0 2 6.263398 0.000000 5598 +mccain 1 1 6.957497 6.957497 13611 +mccainabout 0 1 6.957497 0.000000 13612 +mephd 0 1 6.957497 0.000000 13613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..e343caaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +check 0 115 2.197225 0.000000 118 +learn 1 86 2.484907 2.484907 170 +info 0 85 2.484907 0.000000 176 +educ 0 86 2.484907 0.000000 191 +logic 0 71 2.639057 0.000000 230 +taylor 0 63 2.772589 0.000000 287 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +postal 0 30 3.555348 0.000000 580 +english 1 15 4.248495 4.248495 1033 +mari 0 12 4.465908 0.000000 1266 +acquisit 0 10 4.653960 0.000000 1465 +elain 1 5 5.347108 5.347108 2496 +groupunivers 0 3 5.857933 0.000000 3831 +austinresearchmi 0 2 6.263398 0.000000 5644 +formor 0 2 6.263398 0.000000 5335 +mecaliff 0 2 6.263398 0.000000 5645 +baylor 1 1 6.957497 6.957497 13614 +califfmari 0 1 6.957497 0.000000 13615 +califfmachin 0 1 6.957497 0.000000 13616 +especiallyinduct 0 1 6.957497 0.000000 13617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..458281b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +base 2 165 1.791759 3.583518 50 +develop 1 174 1.791759 1.791759 53 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +perform 1 143 1.945910 1.945910 74 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +technolog 0 131 2.079442 0.000000 102 +assist 1 112 2.197225 2.197225 113 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +world 0 115 2.197225 0.000000 126 +text 1 98 2.302585 2.302585 133 +part 0 98 2.302585 0.000000 129 +question 1 91 2.397895 2.397895 141 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +help 1 83 2.484907 2.484907 175 +larg 1 82 2.484907 2.484907 168 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +method 1 80 2.564949 2.564949 213 +complet 0 77 2.564949 0.000000 208 +addit 0 74 2.639057 0.000000 228 +knowledg 2 67 2.708050 5.416100 243 +would 1 67 2.708050 2.708050 251 +test 1 66 2.708050 2.708050 252 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +simul 0 66 2.708050 0.000000 255 +result 1 65 2.772589 2.772589 281 +plan 1 65 2.772589 2.772589 272 +improv 0 62 2.772589 0.000000 289 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +collect 0 65 2.772589 0.000000 268 +automat 1 61 2.833213 2.833213 306 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +reason 0 57 2.890372 0.000000 318 +detail 0 57 2.890372 0.000000 321 +found 0 53 2.944439 0.000000 337 +extens 0 53 2.944439 0.000000 340 +numer 0 49 3.044522 0.000000 369 +pointer 0 48 3.044522 0.000000 368 +answer 1 45 3.135494 3.135494 391 +anoth 0 45 3.135494 0.000000 408 +natur 0 44 3.135494 0.000000 406 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +past 0 42 3.218876 0.000000 428 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +multipl 1 39 3.258097 3.258097 453 +littl 0 39 3.258097 0.000000 454 +ofth 0 36 3.367296 0.000000 491 +especi 0 36 3.367296 0.000000 496 +jame 1 35 3.401197 3.401197 507 +represent 0 35 3.401197 0.000000 512 +concept 0 32 3.465736 0.000000 537 +kind 0 32 3.465736 0.000000 541 +express 0 32 3.465736 0.000000 540 +extend 0 32 3.465736 0.000000 539 +domain 1 30 3.555348 3.555348 564 +steve 1 29 3.583519 3.583519 594 +built 1 29 3.583519 3.583519 592 +retriev 0 27 3.637586 0.000000 621 +task 1 25 3.737670 3.737670 678 +concern 1 25 3.737670 3.737670 666 +jeff 1 25 3.737670 3.737670 673 +begin 0 23 3.806662 0.000000 716 +varieti 1 22 3.850148 3.850148 740 +fact 0 21 3.912023 0.000000 780 +similar 0 21 3.912023 0.000000 771 +alumni 0 21 3.912023 0.000000 807 +expert 0 20 3.951244 0.000000 833 +predict 1 19 4.007333 4.007333 855 +encourag 0 18 4.060443 0.000000 880 +event 0 18 4.060443 0.000000 896 +appropri 0 18 4.060443 0.000000 883 +aid 0 18 4.060443 0.000000 904 +expand 0 17 4.110874 0.000000 928 +otherwis 0 17 4.110874 0.000000 922 +explan 1 16 4.174387 4.174387 985 +normal 0 16 4.174387 0.000000 995 +carl 1 15 4.248495 4.248495 1024 +biologi 1 15 4.248495 4.248495 1049 +english 0 15 4.248495 0.000000 1033 +shown 0 14 4.317488 0.000000 1080 +charl 1 13 4.382027 4.382027 1149 +composit 1 13 4.382027 4.382027 1150 +bruce 1 12 4.465908 4.465908 1226 +brad 0 12 4.465908 0.000000 1264 +peter 1 11 4.553877 4.553877 1316 +eight 0 11 4.553877 0.000000 1331 +qualit 0 11 4.553877 0.000000 1362 +rich 1 10 4.653960 4.653960 1396 +custom 0 10 4.653960 0.000000 1414 +significantli 0 9 4.753590 0.000000 1508 +tutor 0 9 4.753590 0.000000 1552 +mainten 0 9 4.753590 0.000000 1543 +herefor 0 9 4.753590 0.000000 1483 +erik 0 8 4.875197 0.000000 1701 +largest 0 7 5.010635 0.000000 1858 +fred 1 6 5.164786 5.164786 2072 +viewpoint 1 6 5.164786 5.164786 2116 +biolog 0 6 5.164786 0.000000 2147 +ongo 0 6 5.164786 0.000000 2215 +porter 1 5 5.347108 5.347108 2293 +correl 1 5 5.347108 5.347108 2279 +desk 1 5 5.347108 5.347108 2297 +oncomput 0 5 5.347108 0.000000 2326 +notabl 0 5 5.347108 0.000000 2276 +colleagu 0 5 5.347108 0.000000 2304 +focuss 0 5 5.347108 0.000000 2271 +clark 1 4 5.568345 5.568345 2705 +knight 0 4 5.568345 0.000000 2728 +souther 1 3 5.857933 5.857933 3795 +karl 1 3 5.857933 5.857933 3623 +multifunct 0 3 5.857933 0.000000 3826 +implicitli 0 3 5.857933 0.000000 3620 +qsim 0 3 5.857933 0.000000 3862 +proport 0 3 5.857933 0.000000 3293 +boe 0 3 5.857933 0.000000 3318 +mallori 1 2 6.263398 6.263398 5635 +bareiss 1 2 6.263398 6.263398 5646 +murrai 1 2 6.263398 6.263398 5647 +rickel 1 2 6.263398 6.263398 5648 +forconstruct 0 2 6.263398 0.000000 5649 +inon 0 2 6.263398 0.000000 4496 +arealso 0 2 6.263398 0.000000 5650 +knowledgebas 0 2 6.263398 0.000000 5136 +adequ 0 2 6.263398 0.000000 4116 +lexicon 0 2 6.263398 0.000000 5651 +brant 0 2 6.263398 0.000000 5652 +aroundth 0 2 6.263398 0.000000 5653 +prado 1 1 6.957497 6.957497 13618 +lester 1 1 6.957497 6.957497 13619 +callawai 1 1 6.957497 6.957497 13620 +andersen 1 1 6.957497 6.957497 13621 +acker 1 1 6.957497 6.957497 13622 +eilert 1 1 6.957497 6.957497 13623 +groupknowledg 0 1 6.957497 0.000000 13624 +overviewour 0 1 6.957497 0.000000 13625 +atuniv 0 1 6.957497 0.000000 13626 +currentexpert 0 1 6.957497 0.000000 13627 +broadknowledg 0 1 6.957497 0.000000 13628 +toexplain 0 1 6.957497 0.000000 13629 +answeringa 0 1 6.957497 0.000000 13630 +formallyrepres 0 1 6.957497 0.000000 13631 +thebiolog 0 1 6.957497 0.000000 13632 +andthos 0 1 6.957497 0.000000 13633 +beanswer 0 1 6.957497 0.000000 13634 +jeffrickel 0 1 6.957497 0.000000 13635 +taskof 0 1 6.957497 0.000000 13636 +thesimplest 0 1 6.957497 0.000000 13637 +dauntingrequir 0 1 6.957497 0.000000 13638 +manymodel 0 1 6.957497 0.000000 13639 +compilerand 0 1 6.957497 0.000000 13640 +bybuild 0 1 6.957497 0.000000 13641 +computingenviron 0 1 6.957497 0.000000 13642 +deskassist 0 1 6.957497 0.000000 13643 +squestion 0 1 6.957497 0.000000 13644 +projectsour 0 1 6.957497 0.000000 13645 +kned 0 1 6.957497 0.000000 13646 +kastl 0 1 6.957497 0.000000 13647 +fare 0 1 6.957497 0.000000 13648 +lex 0 1 6.957497 0.000000 13649 +tripel 0 1 6.957497 0.000000 13650 +theorist 0 1 6.957497 0.000000 13651 +searcher 0 1 6.957497 0.000000 13652 +alumna 0 1 6.957497 0.000000 13653 +lian 0 1 6.957497 0.000000 13654 +blumenth 0 1 6.957497 0.000000 13655 +eolu 0 1 6.957497 0.000000 13656 +uwyo 0 1 6.957497 0.000000 13657 +clarkp 0 1 6.957497 0.000000 13658 +redwood 0 1 6.957497 0.000000 13659 +ncsu 0 1 6.957497 0.000000 13660 +publicationsclick 0 1 6.957497 0.000000 13661 +projectsclick 0 1 6.957497 0.000000 13662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..b1b3eae4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +algorithm 1 162 1.791759 1.791759 57 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +note 0 142 1.945910 0.000000 67 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +text 1 98 2.302585 2.302585 133 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +knowledg 0 67 2.708050 0.000000 243 +execut 0 45 3.135494 0.000000 404 +past 1 42 3.218876 3.218876 428 +soon 0 36 3.367296 0.000000 494 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +queri 0 33 3.433987 0.000000 524 +someth 0 31 3.496508 0.000000 554 +robert 0 30 3.555348 0.000000 567 +rule 1 26 3.688879 3.688879 638 +constraint 0 26 3.688879 0.000000 636 +sometim 0 24 3.761200 0.000000 696 +finish 0 22 3.850148 0.000000 748 +basi 0 20 3.951244 0.000000 828 +render 0 17 4.110874 0.000000 947 +match 1 16 4.174387 4.174387 965 +warn 0 14 4.317488 0.000000 1068 +daniel 1 12 4.465908 4.465908 1233 +evolv 0 12 4.465908 0.000000 1223 +candid 1 9 4.753590 4.753590 1606 +presenc 0 8 4.875197 0.000000 1671 +hold 0 8 4.875197 0.000000 1645 +lane 0 8 4.875197 0.000000 1720 +yang 0 8 4.875197 0.000000 1652 +wouldn 0 7 5.010635 0.000000 1970 +srinivasan 0 6 5.164786 0.000000 2175 +mirank 1 5 5.347108 5.347108 2543 +treat 1 5 5.347108 5.347108 2521 +breath 0 4 5.568345 0.000000 2946 +lanc 0 4 5.568345 0.000000 3022 +obsolet 0 3 5.857933 0.000000 3196 +byth 0 3 5.857933 0.000000 3874 +archi 0 3 5.857933 0.000000 3639 +ming 0 3 5.857933 0.000000 3712 +bibtex 1 2 6.263398 6.263398 5406 +leap 0 2 6.263398 0.000000 5654 +venu 0 2 6.263398 0.000000 5655 +usea 0 2 6.263398 0.000000 4800 +satisfact 0 2 6.263398 0.000000 5656 +roberto 0 2 6.263398 0.000000 5468 +bayardo 0 2 6.263398 0.000000 5467 +obermey 0 2 6.263398 0.000000 5657 +vaidyaraman 0 2 6.263398 0.000000 5658 +warshaw 0 2 6.263398 0.000000 5659 +rete 1 1 6.957497 6.957497 13663 +belat 0 1 6.957497 0.000000 13664 +fashionwai 0 1 6.957497 0.000000 13665 +itscomparison 0 1 6.957497 0.000000 13666 +encompass 0 1 6.957497 0.000000 13667 +fundamentalcomput 0 1 6.957497 0.000000 13668 +corollari 0 1 6.957497 0.000000 13669 +thatgoal 0 1 6.957497 0.000000 13670 +gadboi 0 1 6.957497 0.000000 13671 +vasili 0 1 6.957497 0.000000 13672 +samoladi 0 1 6.957497 0.000000 13673 +schrag 0 1 6.957497 0.000000 13674 +andrewsdavid 0 1 6.957497 0.000000 13675 +brantchin 0 1 6.957497 0.000000 13676 +kuoshiow 0 1 6.957497 0.000000 13677 +salvator 0 1 6.957497 0.000000 13678 +stolfo 0 1 6.957497 0.000000 13679 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..e07eb9ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +design 1 213 1.386294 1.386294 25 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +recent 0 167 1.791759 0.000000 58 +process 1 142 1.945910 1.945910 72 +hall 0 146 1.945910 0.000000 65 +technolog 0 131 2.079442 0.000000 102 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +access 0 102 2.302585 0.000000 136 +select 0 91 2.397895 0.000000 154 +homepag 0 93 2.397895 0.000000 148 +institut 0 84 2.484907 0.000000 187 +ieee 0 86 2.484907 0.000000 190 +method 0 80 2.564949 0.000000 213 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +practic 0 70 2.708050 0.000000 246 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +reason 0 57 2.890372 0.000000 318 +profession 0 51 2.995732 0.000000 345 +electron 0 47 3.091042 0.000000 379 +futur 0 41 3.218876 0.000000 427 +formal 1 37 3.332205 3.332205 478 +tech 0 35 3.401197 0.000000 515 +award 0 34 3.401197 0.000000 523 +john 1 33 3.433987 3.433987 532 +chair 1 29 3.583519 3.583519 596 +synchron 1 29 3.583519 3.583519 588 +mind 0 27 3.637586 0.000000 632 +aspect 0 25 3.737670 0.000000 663 +fellow 1 24 3.761200 3.761200 701 +honor 1 23 3.806662 3.806662 729 +equat 0 23 3.806662 0.000000 724 +indian 0 22 3.850148 0.000000 769 +inth 0 22 3.850148 0.000000 741 +particularli 0 19 4.007333 0.000000 867 +north 0 19 4.007333 0.000000 873 +coupl 0 17 4.110874 0.000000 939 +letter 0 16 4.174387 0.000000 981 +weslei 0 16 4.174387 0.000000 983 +researchmi 0 14 4.317488 0.000000 1119 +classic 0 14 4.317488 0.000000 1084 +recurs 0 13 4.382027 0.000000 1127 +addison 0 12 4.465908 0.000000 1230 +kanpur 0 8 4.875197 0.000000 1744 +simon 0 8 4.875197 0.000000 1697 +guggenheim 0 8 4.875197 0.000000 1759 +misra 1 7 5.010635 5.010635 1856 +prentic 0 7 5.010635 0.000000 1838 +phase 0 6 5.164786 0.000000 1977 +holland 0 5 5.347108 0.000000 2490 +jayadev 1 4 5.568345 5.568345 3006 +essai 0 4 5.568345 0.000000 2948 +interestparallel 0 3 5.857933 0.000000 3806 +publicationsj 0 3 5.857933 0.000000 3808 +hoar 0 3 5.857933 0.000000 3875 +nondeterminist 0 3 5.857933 0.000000 3560 +powerlist 0 2 6.263398 0.000000 5660 +loos 0 2 6.263398 0.000000 4774 +chandi 0 2 6.263398 0.000000 5661 +seuss 0 2 6.263398 0.000000 5662 +misrareg 0 1 6.957497 0.000000 13680 +hopkin 0 1 6.957497 0.000000 13681 +fellowarea 0 1 6.957497 0.000000 13682 +asynchronoussystem 0 1 6.957497 0.000000 13683 +otherpap 0 1 6.957497 0.000000 13684 +anoverview 0 1 6.957497 0.000000 13685 +apostscript 0 1 6.957497 0.000000 13686 +versionaccess 0 1 6.957497 0.000000 13687 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..fbf650ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +group 2 183 1.609438 3.218876 36 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +base 1 165 1.791759 1.791759 50 +texa 1 160 1.791759 1.791759 64 +data 1 170 1.791759 1.791759 49 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +austin 0 168 1.791759 0.000000 63 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +model 1 145 1.945910 1.945910 69 +first 1 140 1.945910 1.945910 71 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +problem 0 147 1.945910 0.000000 75 +machin 2 129 2.079442 4.158884 95 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +site 0 106 2.197225 0.000000 119 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +associ 1 93 2.397895 2.397895 151 +pictur 0 89 2.397895 0.000000 160 +search 0 95 2.397895 0.000000 155 +learn 2 86 2.484907 4.969814 170 +journal 1 83 2.484907 2.484907 183 +control 0 82 2.484907 0.000000 164 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +intellig 2 72 2.639057 5.278114 225 +logic 1 71 2.639057 2.639057 230 +nation 0 74 2.639057 0.000000 240 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +order 1 69 2.708050 2.708050 249 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +special 1 56 2.890372 2.890372 320 +index 1 56 2.890372 2.890372 309 +reason 0 57 2.890372 0.000000 318 +scientif 0 53 2.944439 0.000000 341 +standard 0 48 3.044522 0.000000 365 +archiv 0 49 3.044522 0.000000 364 +natur 1 44 3.135494 3.135494 406 +combin 0 42 3.218876 0.000000 421 +autom 0 41 3.218876 0.000000 434 +form 0 39 3.258097 0.000000 443 +paul 0 38 3.295837 0.000000 471 +john 0 33 3.433987 0.000000 532 +queri 0 33 3.433987 0.000000 524 +ad 0 32 3.465736 0.000000 544 +richard 0 31 3.496508 0.000000 559 +utc 0 27 3.637586 0.000000 629 +american 0 27 3.637586 0.000000 634 +revis 1 26 3.688879 3.688879 640 +rule 1 26 3.688879 3.688879 638 +experiment 0 26 3.688879 0.000000 645 +subject 0 26 3.688879 0.000000 647 +jeff 0 25 3.737670 0.000000 673 +decis 0 23 3.806662 0.000000 728 +alumni 0 21 3.912023 0.000000 807 +supervis 0 20 3.951244 0.000000 840 +comparison 0 19 4.007333 0.000000 863 +partial 0 18 4.060443 0.000000 900 +repositori 0 17 4.110874 0.000000 932 +fourth 0 16 4.174387 0.000000 999 +explan 0 16 4.174387 0.000000 985 +atth 0 15 4.248495 0.000000 1019 +prolog 0 13 4.382027 0.000000 1155 +joint 0 13 4.382027 0.000000 1130 +mellon 0 13 4.382027 0.000000 1179 +mari 0 12 4.465908 0.000000 1266 +carnegi 0 12 4.465908 0.000000 1260 +induct 2 11 4.553877 9.107754 1304 +qualit 0 11 4.553877 0.000000 1362 +refin 0 11 4.553877 0.000000 1363 +acquisit 1 10 4.653960 4.653960 1465 +linguist 1 9 4.753590 4.753590 1593 +moonei 0 9 4.753590 0.000000 1520 +ataustin 0 9 4.753590 0.000000 1610 +tutor 0 9 4.753590 0.000000 1552 +classif 0 9 4.753590 0.000000 1586 +aaai 1 8 4.875197 4.875197 1750 +european 1 8 4.875197 4.875197 1763 +empir 0 8 4.875197 0.000000 1722 +irvin 0 8 4.875197 0.000000 1660 +illinoi 1 7 5.010635 5.010635 1941 +predic 0 7 5.010635 0.000000 1806 +planner 0 7 5.010635 0.000000 1797 +thompson 0 6 5.164786 0.000000 2049 +neither 0 6 5.164786 0.000000 1990 +machinelearn 0 6 5.164786 0.000000 2084 +oxford 0 6 5.164786 0.000000 2121 +bradlei 1 5 5.347108 5.347108 2554 +elain 0 5 5.347108 0.000000 2496 +proposit 0 5 5.347108 0.000000 2339 +sowmya 1 4 5.568345 5.568345 2670 +diagnosi 0 4 5.568345 0.000000 3027 +uncertain 0 4 5.568345 0.000000 2758 +invent 0 4 5.568345 0.000000 3028 +ijcai 0 4 5.568345 0.000000 2901 +hermjakob 0 3 5.857933 0.000000 3876 +ramachandran 0 3 5.857933 0.000000 3742 +cindi 0 3 5.857933 0.000000 3830 +acad 0 3 5.857933 0.000000 3847 +signll 0 3 5.857933 0.000000 3877 +ucpop 0 3 5.857933 0.000000 3878 +estlin 1 2 6.263398 6.263398 5554 +abduct 1 2 6.263398 6.263398 5663 +focuseson 0 2 6.263398 0.000000 5433 +califf 0 2 6.263398 0.000000 5664 +mecaliff 0 2 6.263398 0.000000 5645 +tara 0 2 6.263398 0.000000 5555 +cthomp 0 2 6.263398 0.000000 5530 +dirk 0 2 6.263398 0.000000 5665 +subramanian 0 2 6.263398 0.000000 5666 +georgetown 0 2 6.263398 0.000000 5667 +drake 0 2 6.263398 0.000000 5668 +accel 0 2 6.263398 0.000000 5166 +foidl 0 2 6.263398 0.000000 4270 +icml 0 2 6.263398 0.000000 5669 +quinlan 0 2 6.263398 0.000000 4797 +learner 0 2 6.263398 0.000000 4508 +prodigi 0 2 6.263398 0.000000 5670 +baff 1 1 6.957497 6.957497 13688 +mahonei 1 1 6.957497 6.957497 13689 +speedup 1 1 6.957497 6.957497 13690 +knowledgerefin 0 1 6.957497 0.000000 13691 +scicomp 0 1 6.957497 0.000000 13692 +firstadvisor 0 1 6.957497 0.000000 13693 +hwee 0 1 6.957497 0.000000 13694 +nhweetou 0 1 6.957497 0.000000 13695 +trantor 0 1 6.957497 0.000000 13696 +ourston 0 1 6.957497 0.000000 13697 +dirk_ourston 0 1 6.957497 0.000000 13698 +cpqm 0 1 6.957497 0.000000 13699 +saic 0 1 6.957497 0.000000 13700 +furtwangen 0 1 6.957497 0.000000 13701 +siddarth 0 1 6.957497 0.000000 13702 +zell 0 1 6.957497 0.000000 13703 +reasoningher 0 1 6.957497 0.000000 13704 +fort 0 1 6.957497 0.000000 13705 +chillin 0 1 6.957497 0.000000 13706 +dolphin 0 1 6.957497 0.000000 13707 +ilpnet 0 1 6.957497 0.000000 13708 +sigart 0 1 6.957497 0.000000 13709 +aritfici 0 1 6.957497 0.000000 13710 +biblio 0 1 6.957497 0.000000 13711 +jair 0 1 6.957497 0.000000 13712 +foil 0 1 6.957497 0.000000 13713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..31ef6407 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +machin 1 129 2.079442 2.079442 95 +theori 0 111 2.197225 0.000000 127 +search 0 95 2.397895 0.000000 155 +learn 1 86 2.484907 2.484907 170 +control 0 82 2.484907 0.000000 164 +start 0 83 2.484907 0.000000 173 +exampl 1 77 2.564949 2.564949 195 +decemb 0 80 2.564949 0.000000 215 +complet 0 77 2.564949 0.000000 208 +intellig 1 72 2.639057 2.639057 225 +logic 0 71 2.639057 0.000000 230 +effici 0 73 2.639057 0.000000 233 +knowledg 0 67 2.708050 0.000000 243 +degre 0 69 2.708050 0.000000 259 +artifici 1 63 2.772589 2.772589 280 +improv 0 62 2.772589 0.000000 289 +plan 0 65 2.772589 0.000000 272 +prof 0 64 2.772589 0.000000 273 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +finger 0 52 2.995732 0.000000 354 +natur 0 44 3.135494 0.000000 406 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +vita 0 38 3.295837 0.000000 473 +word 0 34 3.401197 0.000000 508 +obtain 0 33 3.433987 0.000000 534 +posit 0 31 3.496508 0.000000 552 +computersci 0 30 3.555348 0.000000 562 +neural 0 30 3.555348 0.000000 578 +postal 0 30 3.555348 0.000000 580 +symbol 1 27 3.637586 3.637586 620 +rule 0 26 3.688879 0.000000 638 +compar 0 26 3.688879 0.000000 648 +qualiti 0 20 3.951244 0.000000 832 +lisp 0 18 4.060443 0.000000 897 +attend 0 18 4.060443 0.000000 893 +explan 0 16 4.174387 0.000000 985 +prolog 0 13 4.382027 0.000000 1155 +went 0 12 4.465908 0.000000 1279 +sens 0 11 4.553877 0.000000 1305 +induct 0 11 4.553877 0.000000 1304 +refin 0 11 4.553877 0.000000 1363 +acquisit 1 10 4.653960 4.653960 1465 +interestsmi 0 10 4.653960 0.000000 1462 +town 0 10 4.653960 0.000000 1458 +moonei 1 9 4.753590 4.753590 1520 +extract 0 8 4.875197 0.000000 1728 +empir 0 8 4.875197 0.000000 1722 +grew 0 8 4.875197 0.000000 1742 +illinoi 1 7 5.010635 5.010635 1941 +raymond 0 5 5.347108 0.000000 2313 +began 0 5 5.347108 0.000000 2498 +disambigu 0 4 5.568345 0.000000 2899 +bayesian 0 4 5.568345 0.000000 2671 +urbana 1 3 5.857933 5.857933 3879 +primarilyin 0 3 5.857933 0.000000 3832 +parser 0 3 5.857933 0.000000 3141 +myph 0 3 5.857933 0.000000 3880 +champaign 1 2 6.263398 6.263398 5671 +lexicon 0 2 6.263398 0.000000 5651 +highschool 0 2 6.263398 0.000000 5672 +homepageraymond 0 1 6.957497 0.000000 13714 +mooneyassoci 0 1 6.957497 0.000000 13715 +informationfal 0 1 6.957497 0.000000 13716 +learningspr 0 1 6.957497 0.000000 13717 +iiperson 0 1 6.957497 0.000000 13718 +historyi 0 1 6.957497 0.000000 13719 +fallon 0 1 6.957497 0.000000 13720 +wherestart 0 1 6.957497 0.000000 13721 +fallontownship 0 1 6.957497 0.000000 13722 +urbanato 0 1 6.957497 0.000000 13723 +learninggroup 0 1 6.957497 0.000000 13724 +gerald 0 1 6.957497 0.000000 13725 +dejong 0 1 6.957497 0.000000 13726 +meadowfir 0 1 6.957497 0.000000 13727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..c29490f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +number 0 130 2.079442 0.000000 97 +world 0 115 2.197225 0.000000 126 +specif 0 106 2.197225 0.000000 106 +select 1 91 2.397895 2.397895 154 +mani 0 92 2.397895 0.000000 150 +real 0 93 2.397895 0.000000 144 +follow 0 92 2.397895 0.000000 143 +homepag 0 93 2.397895 0.000000 148 +control 1 82 2.484907 2.484907 164 +resourc 0 81 2.484907 0.000000 172 +learn 0 86 2.484907 0.000000 170 +educ 0 86 2.484907 0.000000 191 +appear 0 78 2.564949 0.000000 210 +dynam 0 76 2.564949 0.000000 194 +state 0 76 2.564949 0.000000 207 +intellig 0 72 2.639057 0.000000 225 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +local 0 55 2.944439 0.000000 334 +visitor 0 49 3.044522 0.000000 371 +must 0 40 3.258097 0.000000 442 +game 0 36 3.367296 0.000000 498 +return 1 34 3.401197 3.401197 502 +obtain 0 33 3.433987 0.000000 534 +neural 1 30 3.555348 3.555348 578 +domain 0 30 3.555348 0.000000 564 +postal 0 30 3.555348 0.000000 580 +utc 1 27 3.637586 3.637586 629 +task 1 25 3.737670 3.737670 678 +sport 0 25 3.737670 0.000000 683 +decis 1 23 3.806662 3.806662 728 +sequenc 0 23 3.806662 0.000000 734 +sequenti 0 22 3.850148 0.000000 745 +rout 0 21 3.912023 0.000000 793 +alloc 0 20 3.951244 0.000000 821 +agent 1 18 4.060443 4.060443 910 +upon 0 16 4.174387 0.000000 978 +action 1 15 4.248495 4.248495 1038 +universityof 0 15 4.248495 0.000000 1061 +dave 0 14 4.317488 0.000000 1098 +finit 0 14 4.317488 0.000000 1106 +misc 0 13 4.382027 0.000000 1124 +evolv 0 12 4.465908 0.000000 1223 +enter 0 10 4.653960 0.000000 1454 +total 0 10 4.653960 0.000000 1398 +observ 0 9 4.753590 0.000000 1578 +character 0 8 4.875197 0.000000 1767 +canb 0 7 5.010635 0.000000 1846 +highest 0 4 5.568345 0.000000 2950 +thesystem 0 3 5.857933 0.000000 3881 +scenario 0 2 6.263398 0.000000 5524 +geneticalgorithm 0 2 6.263398 0.000000 5673 +amparticularli 0 2 6.263398 0.000000 5558 +unavail 0 2 6.263398 0.000000 5046 +tulan 0 2 6.263398 0.000000 5559 +moriarti 1 1 6.957497 6.957497 13728 +moriartydav 0 1 6.957497 0.000000 13729 +researchsequenti 0 1 6.957497 0.000000 13730 +problemsinclud 0 1 6.957497 0.000000 13731 +stateof 0 1 6.957497 0.000000 13732 +selectanoth 0 1 6.957497 0.000000 13733 +payoff 0 1 6.957497 0.000000 13734 +madeor 0 1 6.957497 0.000000 13735 +thesequ 0 1 6.957497 0.000000 13736 +cumulativepayoff 0 1 6.957497 0.000000 13737 +iscurr 0 1 6.957497 0.000000 13738 +costli 0 1 6.957497 0.000000 13739 +havestudi 0 1 6.957497 0.000000 13740 +constraintsatisfact 0 1 6.957497 0.000000 13741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..7176b14a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +educ 0 86 2.484907 0.000000 191 +mondai 0 77 2.564949 0.000000 206 +decemb 0 80 2.564949 0.000000 215 +map 0 39 3.258097 0.000000 452 +campu 0 27 3.637586 0.000000 623 +mine 1 26 3.688879 3.688879 654 +reach 0 24 3.761200 0.000000 688 +edulast 0 17 4.110874 0.000000 927 +utah 1 9 4.753590 4.753590 1585 +eduresearch 0 6 5.164786 0.000000 2205 +trail 0 6 5.164786 0.000000 2071 +mehom 0 4 5.568345 0.000000 2979 +wade 1 1 6.957497 6.957497 13742 +mwbarn 1 1 6.957497 6.957497 13743 +barnesm 0 1 6.957497 0.000000 13744 +barnesmwbarn 0 1 6.957497 0.000000 13745 +workhelp 0 1 6.957497 0.000000 13746 +pagestyp 0 1 6.957497 0.000000 13747 +literatureliteratur 0 1 6.957497 0.000000 13748 +notesclassesbackground 0 1 6.957497 0.000000 13749 +informationph 0 1 6.957497 0.000000 13750 +tanglebriar 0 1 6.957497 0.000000 13751 +yete 0 1 6.957497 0.000000 13752 +eduauthor 0 1 6.957497 0.000000 13753 +barnesemail 0 1 6.957497 0.000000 13754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..bf94fc7a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +professor 1 137 1.945910 1.945910 76 +lectur 0 135 1.945910 0.000000 73 +spring 0 131 2.079442 0.000000 88 +document 0 121 2.079442 0.000000 89 +teach 1 108 2.197225 2.197225 112 +pleas 1 113 2.197225 2.197225 114 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +sinc 0 90 2.397895 0.000000 159 +contain 1 81 2.484907 2.484907 174 +resum 0 79 2.564949 0.000000 217 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +window 0 68 2.708050 0.000000 242 +receiv 0 66 2.708050 0.000000 244 +abstract 0 62 2.772589 0.000000 276 +creat 0 63 2.772589 0.000000 277 +room 1 59 2.833213 2.833213 301 +summer 1 56 2.890372 2.890372 311 +faculti 0 56 2.890372 0.000000 325 +direct 0 57 2.890372 0.000000 316 +profession 0 51 2.995732 0.000000 345 +right 0 48 3.044522 0.000000 363 +author 1 39 3.258097 3.258097 450 +vita 0 38 3.295837 0.000000 473 +feel 0 37 3.332205 0.000000 483 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +travel 0 30 3.555348 0.000000 579 +chair 0 29 3.583519 0.000000 596 +full 0 28 3.610918 0.000000 615 +load 0 28 3.610918 0.000000 601 +reach 0 24 3.761200 0.000000 688 +brows 0 23 3.806662 0.000000 726 +reserv 0 20 3.951244 0.000000 808 +els 0 19 4.007333 0.000000 843 +spend 0 19 4.007333 0.000000 850 +account 0 18 4.060443 0.000000 882 +senior 0 14 4.317488 0.000000 1120 +settimeout 0 5 5.347108 0.000000 2536 +dale 1 4 5.568345 5.568345 2687 +seed 1 4 5.568345 5.568345 2984 +websit 0 4 5.568345 0.000000 2726 +timertwo 0 4 5.568345 0.000000 2985 +oftexa 0 4 5.568345 0.000000 3003 +whichcontain 0 4 5.568345 0.000000 2714 +scrollit_rl 0 3 5.857933 0.000000 3882 +retir 1 2 6.263398 6.263398 5674 +nell 1 1 6.957497 6.957497 13755 +pagesunivers 0 1 6.957497 0.000000 13756 +departmentwelcom 0 1 6.957497 0.000000 13757 +utaustin 0 1 6.957497 0.000000 13758 +fromful 0 1 6.957497 0.000000 13759 +falland 0 1 6.957497 0.000000 13760 +ofdissert 0 1 6.957497 0.000000 13761 +memento 0 1 6.957497 0.000000 13762 +nontechn 0 1 6.957497 0.000000 13763 +anycorrespond 0 1 6.957497 0.000000 13764 +ndale 0 1 6.957497 0.000000 13765 +profilepublicationsresearch 0 1 6.957497 0.000000 13766 +interestsperson 0 1 6.957497 0.000000 13767 +interestsnel 0 1 6.957497 0.000000 13768 +westlak 0 1 6.957497 0.000000 13769 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..44773d92 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +design 0 213 1.386294 0.000000 25 +support 0 132 1.945910 0.000000 83 +note 0 142 1.945910 0.000000 67 +pleas 0 113 2.197225 0.000000 114 +view 1 70 2.708050 2.708050 254 +browser 1 56 2.890372 2.890372 313 +without 0 50 3.044522 0.000000 370 +netscap 1 44 3.135494 3.135494 395 +keep 0 44 3.135494 0.000000 409 +download 0 36 3.367296 0.000000 489 +either 0 35 3.401197 0.000000 506 +mind 0 27 3.637586 0.000000 632 +background 0 25 3.737670 0.000000 664 +frame 1 24 3.761200 3.761200 684 +color 1 22 3.850148 3.850148 762 +navig 0 21 3.912023 0.000000 796 +choos 0 16 4.174387 0.000000 964 +pretti 0 13 4.382027 0.000000 1191 +latter 0 9 4.753590 0.000000 1522 +chosen 0 6 5.164786 0.000000 1984 +blame 0 3 5.857933 0.000000 3636 +neeraj 0 2 6.263398 0.000000 5577 +obnoxi 0 1 6.957497 0.000000 13770 +chartreus 0 1 6.957497 0.000000 13771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..a0491a77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +sciencesunivers 0 37 3.332205 0.000000 486 +log 0 19 4.007333 0.000000 857 +kumar 0 9 4.753590 0.000000 1506 +sciencedepart 0 6 5.164786 0.000000 2172 +natarajan 0 2 6.263398 0.000000 4377 +austini 0 2 6.263398 0.000000 5527 +gnan 0 1 6.957497 0.000000 13772 +pagegnana 0 1 6.957497 0.000000 13773 +edufind 0 1 6.957497 0.000000 13774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..796ca574 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +number 0 130 2.079442 0.000000 97 +look 1 107 2.197225 2.197225 115 +pictur 0 89 2.397895 0.000000 160 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +visitor 0 49 3.044522 0.000000 371 +term 0 43 3.178054 0.000000 411 +queri 0 33 3.433987 0.000000 524 +art 0 29 3.583519 0.000000 593 +quit 0 27 3.637586 0.000000 633 +altern 0 26 3.688879 0.000000 641 +output 0 21 3.912023 0.000000 788 +medic 0 17 4.110874 0.000000 958 +doesn 0 15 4.248495 0.000000 1055 +score 0 15 4.248495 0.000000 1017 +typic 0 11 4.553877 0.000000 1360 +hit 0 7 5.010635 0.000000 1965 +arora 1 4 5.568345 5.568345 2658 +ters 0 3 5.857933 0.000000 3297 +nimar 1 2 6.263398 6.263398 4188 +singh 0 2 6.263398 0.000000 5675 +knowwhat 0 2 6.263398 0.000000 5456 +clearer 0 2 6.263398 0.000000 5676 +bookmarksto 0 1 6.957497 0.000000 13775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..543b18bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +network 0 168 1.791759 0.000000 61 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +machin 0 129 2.079442 0.000000 95 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +associ 0 93 2.397895 0.000000 151 +learn 1 86 2.484907 2.484907 170 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +resum 0 79 2.564949 0.000000 217 +meet 1 72 2.639057 2.639057 229 +prof 1 64 2.772589 2.772589 273 +wednesdai 0 64 2.772589 0.000000 261 +januari 0 62 2.772589 0.000000 264 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +week 0 52 2.995732 0.000000 343 +archiv 0 49 3.044522 0.000000 364 +natur 1 44 3.135494 3.135494 406 +discuss 1 45 3.135494 3.135494 399 +third 0 43 3.178054 0.000000 412 +probabl 0 40 3.258097 0.000000 455 +close 0 38 3.295837 0.000000 465 +ofth 0 36 3.367296 0.000000 491 +everi 0 34 3.401197 0.000000 519 +print 0 34 3.401197 0.000000 503 +neural 0 30 3.555348 0.000000 578 +particip 0 29 3.583519 0.000000 589 +usual 0 28 3.610918 0.000000 608 +propos 0 28 3.610918 0.000000 602 +utc 1 27 3.637586 3.637586 629 +previous 0 17 4.110874 0.000000 923 +coordin 0 13 4.382027 0.000000 1182 +mari 0 12 4.465908 0.000000 1266 +acquisit 0 10 4.653960 0.000000 1465 +moonei 0 9 4.753590 0.000000 1520 +risto 0 9 4.753590 0.000000 1523 +linguist 0 9 4.753590 0.000000 1593 +miikkulainen 0 8 4.875197 0.000000 1667 +thompson 0 6 5.164786 0.000000 2049 +elain 0 5 5.347108 0.000000 2496 +tang 0 5 5.347108 0.000000 2409 +hermjakob 1 3 5.857933 5.857933 3876 +poon 0 3 5.857933 0.000000 3820 +cindi 0 3 5.857933 0.000000 3830 +signll 0 3 5.857933 0.000000 3877 +groupat 0 2 6.263398 0.000000 5677 +bobbi 0 2 6.263398 0.000000 5678 +califf 0 2 6.263398 0.000000 5664 +marti 0 2 6.263398 0.000000 5679 +mayberri 0 2 6.263398 0.000000 5641 +rupert 0 2 6.263398 0.000000 5680 +acquist 0 1 6.957497 0.000000 13776 +groupnatur 0 1 6.957497 0.000000 13777 +austinw 0 1 6.957497 0.000000 13778 +acquisitionand 0 1 6.957497 0.000000 13779 +havedrawn 0 1 6.957497 0.000000 13780 +bryant 0 1 6.957497 0.000000 13781 +ataustinlast 0 1 6.957497 0.000000 13782 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..9a5a44cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +book 0 99 2.302585 0.000000 131 +sourc 0 77 2.564949 0.000000 201 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +interact 0 62 2.772589 0.000000 270 +function 0 62 2.772589 0.000000 275 +detail 0 57 2.890372 0.000000 321 +visitor 0 49 3.044522 0.000000 371 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +neural 1 30 3.555348 3.555348 578 +utc 1 27 3.637586 3.637586 629 +decis 0 23 3.806662 0.000000 728 +self 0 22 3.850148 0.000000 761 +alumni 0 21 3.912023 0.000000 807 +newsgroup 0 21 3.912023 0.000000 783 +supervis 0 20 3.951244 0.000000 840 +hypertext 0 19 4.007333 0.000000 865 +concentr 0 18 4.060443 0.000000 906 +demo 0 18 4.060443 0.000000 888 +later 0 15 4.248495 0.000000 1043 +evolv 0 12 4.465908 0.000000 1223 +genet 0 10 4.653960 0.000000 1409 +ataustin 0 9 4.753590 0.000000 1610 +risto 0 9 4.753590 0.000000 1523 +miikkulainen 0 8 4.875197 0.000000 1667 +poster 0 7 5.010635 0.000000 1814 +schema 0 6 5.164786 0.000000 1988 +groupth 0 5 5.347108 0.000000 2549 +net 1 4 5.568345 5.568345 2741 +episod 0 4 5.568345 0.000000 2747 +cortic 0 3 5.857933 0.000000 3857 +privat 0 3 5.857933 0.000000 3496 +andcognit 0 2 6.263398 0.000000 5681 +ristomiikkulainen 0 1 6.957497 0.000000 13783 +basedvis 0 1 6.957497 0.000000 13784 +mapbelow 0 1 6.957497 0.000000 13785 +thecortex 0 1 6.957497 0.000000 13786 +linkswusagemartym 0 1 6.957497 0.000000 13787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..a0affdfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +problem 1 147 1.945910 1.945910 75 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +graphic 1 90 2.397895 2.397895 147 +associ 0 93 2.397895 0.000000 151 +internet 0 83 2.484907 0.000000 186 +activ 0 84 2.484907 0.000000 182 +server 1 76 2.564949 2.564949 204 +interfac 0 79 2.564949 0.000000 209 +state 0 76 2.564949 0.000000 207 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +solv 0 73 2.639057 0.000000 234 +interact 1 62 2.772589 2.772589 270 +artifici 1 63 2.772589 2.772589 280 +creat 0 63 2.772589 0.000000 277 +automat 1 61 2.833213 2.833213 306 +physic 1 47 3.091042 3.091042 377 +vita 0 38 3.295837 0.000000 473 +connect 0 37 3.332205 0.000000 485 +common 0 30 3.555348 0.000000 574 +measur 0 28 3.610918 0.000000 609 +univ 0 28 3.610918 0.000000 617 +honor 0 23 3.806662 0.000000 729 +director 0 22 3.850148 0.000000 767 +unit 1 21 3.912023 3.912023 779 +expert 0 20 3.951244 0.000000 833 +lisp 1 18 4.060443 4.060443 897 +demo 1 18 4.060443 4.060443 888 +atth 0 15 4.248495 0.000000 1019 +english 0 15 4.248495 0.000000 1033 +draw 1 14 4.317488 4.317488 1086 +convert 0 13 4.382027 0.000000 1122 +speech 0 12 4.465908 0.000000 1222 +ofcomput 0 10 4.653960 0.000000 1442 +novak 1 9 4.753590 4.753590 1521 +reus 0 8 4.875197 0.000000 1661 +convers 0 8 4.875197 0.000000 1673 +gordon 1 6 5.164786 5.164786 2032 +shell 0 5 5.347108 0.000000 2353 +diagram 0 5 5.347108 0.000000 2346 +highest 0 4 5.568345 0.000000 2950 +intelligencec 0 4 5.568345 0.000000 2673 +isaac 0 3 5.857933 0.000000 3855 +compilersc 0 2 6.263398 0.000000 4237 +intelligencelaboratori 0 1 6.957497 0.000000 13788 +genericalgorithmssolv 0 1 6.957497 0.000000 13789 +specifiedinformallyartifici 0 1 6.957497 0.000000 13790 +intelligencecurriculum 0 1 6.957497 0.000000 13791 +publicationsemploymentgrantsprofession 0 1 6.957497 0.000000 13792 +honorscurriculum 0 1 6.957497 0.000000 13793 +vitaefre 0 1 6.957497 0.000000 13794 +tmycin 0 1 6.957497 0.000000 13795 +emycin 0 1 6.957497 0.000000 13796 +lispconvers 0 1 6.957497 0.000000 13797 +measurementsoftwar 0 1 6.957497 0.000000 13798 +schemec 0 1 6.957497 0.000000 13799 +programmingweb 0 1 6.957497 0.000000 13800 +linksweatheraddress 0 1 6.957497 0.000000 13801 +ctai 0 1 6.957497 0.000000 13802 +austinaustintexa 0 1 6.957497 0.000000 13803 +faxnovak 0 1 6.957497 0.000000 13804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..3f88597f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +welcom 0 122 2.079442 0.000000 99 +pleas 0 113 2.197225 0.000000 114 +mani 0 92 2.397895 0.000000 150 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +come 0 78 2.564949 0.000000 202 +want 0 79 2.564949 0.000000 199 +resum 0 79 2.564949 0.000000 217 +free 0 73 2.639057 0.000000 224 +august 0 66 2.708050 0.000000 257 +date 0 51 2.995732 0.000000 344 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +get 0 46 3.091042 0.000000 380 +paul 0 38 3.295837 0.000000 471 +feel 0 37 3.332205 0.000000 483 +download 0 36 3.367296 0.000000 489 +hope 0 28 3.610918 0.000000 610 +except 0 28 3.610918 0.000000 607 +thank 0 23 3.806662 0.000000 721 +size 0 23 3.806662 0.000000 713 +sent 0 22 3.850148 0.000000 763 +beauti 0 18 4.060443 0.000000 912 +anyon 0 17 4.110874 0.000000 916 +stock 0 16 4.174387 0.000000 1007 +wait 0 13 4.382027 0.000000 1168 +remov 0 12 4.465908 0.000000 1225 +enter 0 10 4.653960 0.000000 1454 +chanc 0 7 5.010635 0.000000 1960 +poster 0 7 5.010635 0.000000 1814 +hear 0 7 5.010635 0.000000 1940 +marri 0 7 5.010635 0.000000 1946 +feet 0 5 5.347108 0.000000 2492 +blow 0 5 5.347108 0.000000 2407 +complaint 0 4 5.568345 0.000000 2795 +queen 0 4 5.568345 0.000000 2919 +laugh 0 3 5.857933 0.000000 3659 +panic 1 2 6.263398 6.263398 5682 +gorgeou 0 2 6.263398 0.000000 5082 +meghan 1 1 6.957497 6.957497 13805 +insult 1 1 6.957497 6.957497 13806 +brienhi 0 1 6.957497 0.000000 13807 +wipe 0 1 6.957497 0.000000 13808 +crappi 0 1 6.957497 0.000000 13809 +obrien 0 1 6.957497 0.000000 13810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..1e1aa9b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +databas 0 122 2.079442 0.000000 86 +theunivers 0 21 3.912023 0.000000 797 +systemsth 0 3 5.857933 0.000000 3835 +oguer 1 1 6.957497 6.957497 13811 +gutierrezogu 0 1 6.957497 0.000000 13812 +gutierrezth 0 1 6.957497 0.000000 13813 +austinprojectsomioswwhlinksconfer 0 1 6.957497 0.000000 13814 +worldemail 0 1 6.957497 0.000000 13815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..36325a9f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +graduat 0 215 1.386294 0.000000 31 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +utexa 0 189 1.609438 0.000000 44 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +like 0 132 1.945910 0.000000 81 +compil 1 122 2.079442 2.079442 96 +introduct 1 126 2.079442 2.079442 87 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +manag 1 114 2.197225 2.197225 125 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +memori 2 101 2.302585 4.605170 139 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +real 1 93 2.397895 2.397895 144 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +larg 1 82 2.484907 2.484907 168 +info 0 85 2.484907 0.000000 176 +contain 0 81 2.484907 0.000000 174 +thing 0 84 2.484907 0.000000 189 +master 1 76 2.564949 2.564949 216 +sourc 1 77 2.564949 2.564949 201 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +good 1 77 2.564949 2.564949 200 +dynam 0 76 2.564949 0.000000 194 +refer 0 78 2.564949 0.000000 203 +effici 1 73 2.639057 2.639057 233 +write 0 72 2.639057 0.000000 222 +onlin 0 75 2.639057 0.000000 223 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +integr 0 67 2.708050 0.000000 245 +collect 1 65 2.772589 2.772589 268 +virtual 1 62 2.772589 2.772589 285 +descript 1 64 2.772589 2.772589 271 +prof 0 64 2.772589 0.000000 273 +written 0 63 2.772589 0.000000 278 +improv 0 62 2.772589 0.000000 289 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +thesi 1 57 2.890372 2.890372 327 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +extens 1 53 2.944439 2.944439 340 +three 0 54 2.944439 0.000000 330 +local 0 55 2.944439 0.000000 334 +hardwar 0 51 2.995732 0.000000 350 +much 0 52 2.995732 0.000000 349 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +basic 0 50 3.044522 0.000000 360 +done 0 47 3.091042 0.000000 381 +adapt 0 46 3.091042 0.000000 387 +mark 0 44 3.135494 0.000000 403 +keep 0 44 3.135494 0.000000 409 +anoth 0 45 3.135494 0.000000 408 +cach 0 41 3.218876 0.000000 432 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +paul 1 38 3.295837 3.295837 471 +open 0 38 3.295837 0.000000 469 +especi 1 36 3.367296 3.367296 496 +survei 1 35 3.401197 3.401197 513 +michael 0 35 3.401197 0.000000 514 +bibliographi 0 34 3.401197 0.000000 518 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +extend 0 32 3.465736 0.000000 539 +storag 0 31 3.496508 0.000000 553 +hard 0 30 3.555348 0.000000 563 +travel 0 30 3.555348 0.000000 579 +releas 0 28 3.610918 0.000000 616 +progress 0 28 3.610918 0.000000 598 +great 0 27 3.637586 0.000000 626 +although 0 25 3.737670 0.000000 667 +store 1 24 3.761200 3.761200 693 +interpret 1 24 3.761200 3.761200 686 +mike 0 24 3.761200 0.000000 703 +highli 0 23 3.806662 0.000000 725 +thread 0 23 3.806662 0.000000 722 +brows 0 23 3.806662 0.000000 726 +hierarchi 0 22 3.850148 0.000000 744 +try 0 22 3.850148 0.000000 764 +scheme 2 20 3.951244 7.902488 818 +alloc 1 20 3.951244 3.951244 821 +supervis 0 20 3.951244 0.000000 840 +portabl 0 20 3.951244 0.000000 819 +mostli 0 19 4.007333 0.000000 869 +scott 0 18 4.060443 0.000000 884 +behavior 0 18 4.060443 0.000000 881 +debug 0 17 4.110874 0.000000 944 +coupl 0 17 4.110874 0.000000 939 +intro 0 17 4.110874 0.000000 915 +georg 0 16 4.174387 0.000000 994 +stock 0 16 4.174387 0.000000 1007 +brief 0 16 4.174387 0.000000 1001 +carl 1 15 4.248495 4.248495 1024 +anywai 0 15 4.248495 0.000000 1047 +ascii 0 15 4.248495 0.000000 1032 +draft 1 14 4.317488 4.317488 1085 +anonym 0 14 4.317488 0.000000 1100 +stephen 1 11 4.553877 4.553877 1342 +persist 1 11 4.553877 4.553877 1367 +smart 0 11 4.553877 0.000000 1352 +alpha 0 11 4.553877 0.000000 1348 +henri 0 10 4.653960 0.000000 1417 +wilson 1 9 4.753590 4.753590 1536 +oop 1 8 4.875197 4.875197 1778 +besid 0 8 4.875197 0.000000 1681 +readm 0 8 4.875197 0.000000 1699 +baker 0 7 5.010635 0.000000 1812 +garbag 1 6 5.164786 5.164786 1986 +oopsla 0 6 5.164786 0.000000 2221 +dougla 0 5 5.347108 0.000000 2471 +decad 0 5 5.347108 0.000000 2455 +overload 0 5 5.347108 0.000000 2403 +whichcontain 1 4 5.568345 5.568345 2714 +ajit 0 3 5.857933 0.000000 3299 +qing 0 3 5.857933 0.000000 3295 +swizzl 0 3 5.857933 0.000000 3883 +andoper 0 3 5.857933 0.000000 3621 +forfault 0 3 5.857933 0.000000 3748 +rscheme 0 3 5.857933 0.000000 3250 +tosupport 0 3 5.857933 0.000000 3613 +providesa 0 3 5.857933 0.000000 3884 +heap 0 3 5.857933 0.000000 3123 +collector 1 2 6.263398 6.263398 5683 +sheetal 1 2 6.263398 6.263398 5684 +kakkad 1 2 6.263398 6.263398 5685 +donovan 1 2 6.263398 6.263398 4371 +kolbl 1 2 6.263398 6.263398 4372 +macro 1 2 6.263398 6.263398 5686 +johnston 0 2 6.263398 0.000000 5638 +repair 0 2 6.263398 0.000000 4198 +damag 0 2 6.263398 0.000000 5687 +checkpoint 0 2 6.263398 0.000000 4205 +programmingsystem 0 2 6.263398 0.000000 5688 +socket 0 2 6.263398 0.000000 4725 +materiali 0 2 6.263398 0.000000 4214 +subdirectori 0 2 6.263398 0.000000 4133 +han 0 2 6.263398 0.000000 4535 +neeli 1 1 6.957497 6.957497 13816 +groupoop 0 1 6.957497 0.000000 13817 +groupthi 0 1 6.957497 0.000000 13818 +studentsin 0 1 6.957497 0.000000 13819 +kaplan 0 1 6.957497 0.000000 13820 +wieren 0 1 6.957497 0.000000 13821 +toimplement 0 1 6.957497 0.000000 13822 +whichattempt 0 1 6.957497 0.000000 13823 +unsoundstudi 0 1 6.957497 0.000000 13824 +generationaland 0 1 6.957497 0.000000 13825 +ongarbag 0 1 6.957497 0.000000 13826 +managementfor 0 1 6.957497 0.000000 13827 +andcompress 0 1 6.957497 0.000000 13828 +noteson 0 1 6.957497 0.000000 13829 +rawascii 0 1 6.957497 0.000000 13830 +andrschemear 0 1 6.957497 0.000000 13831 +thesiscontain 0 1 6.957497 0.000000 13832 +whicharen 0 1 6.957497 0.000000 13833 +sometimesoon 0 1 6.957497 0.000000 13834 +htmlformat 0 1 6.957497 0.000000 13835 +materialfrom 0 1 6.957497 0.000000 13836 +expandedpresent 0 1 6.957497 0.000000 13837 +texinfo 0 1 6.957497 0.000000 13838 +metaobject 0 1 6.957497 0.000000 13839 +backgroundread 0 1 6.957497 0.000000 13840 +fortexa 0 1 6.957497 0.000000 13841 +sftp 0 1 6.957497 0.000000 13842 +notb 0 1 6.957497 0.000000 13843 +boehm 0 1 6.957497 0.000000 13844 +severalgarbag 0 1 6.957497 0.000000 13845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..bdf8ed15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +phone 0 175 1.791759 0.000000 45 +robert 0 30 3.555348 0.000000 567 +otuomagieaddress 0 1 6.957497 0.000000 13846 +emailotu 0 1 6.957497 0.000000 13847 +eduuniververs 0 1 6.957497 0.000000 13848 +infouniversityth 0 1 6.957497 0.000000 13849 +txa 0 1 6.957497 0.000000 13850 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..b331684c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +class 1 199 1.609438 1.609438 37 +utexa 0 189 1.609438 0.000000 44 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +austin 1 168 1.791759 1.791759 63 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +file 2 132 1.945910 3.891820 70 +construct 1 139 1.945910 1.945910 82 +professor 0 137 1.945910 0.000000 76 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +postscript 0 131 2.079442 0.000000 90 +specif 1 106 2.197225 2.197225 106 +follow 1 92 2.397895 2.397895 143 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +control 1 82 2.484907 2.484907 164 +environ 0 84 2.484907 0.000000 177 +contain 0 81 2.484907 0.000000 174 +requir 0 81 2.484907 0.000000 167 +interfac 1 79 2.564949 2.564949 209 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +view 1 70 2.708050 2.708050 254 +window 0 68 2.708050 0.000000 242 +virtual 2 62 2.772589 5.545178 285 +creat 0 63 2.772589 0.000000 277 +variou 0 56 2.890372 0.000000 317 +investig 0 51 2.995732 0.000000 353 +physic 1 47 3.091042 3.091042 377 +featur 0 46 3.091042 0.000000 386 +prototyp 0 38 3.295837 0.000000 463 +microsoft 0 38 3.295837 0.000000 468 +manual 0 35 3.401197 0.000000 504 +dissert 0 32 3.465736 0.000000 549 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +focu 0 30 3.555348 0.000000 571 +specifi 0 30 3.555348 0.000000 568 +effort 0 26 3.688879 0.000000 652 +reduc 0 22 3.850148 0.000000 759 +thu 0 21 3.912023 0.000000 773 +devic 2 16 4.174387 8.348774 1002 +brown 0 16 4.174387 0.000000 977 +draft 0 14 4.317488 0.000000 1085 +transpar 0 11 4.553877 0.000000 1325 +devis 0 10 4.653960 0.000000 1451 +researchi 0 8 4.875197 0.000000 1756 +driver 0 8 4.875197 0.000000 1657 +counter 0 8 4.875197 0.000000 1765 +creation 0 6 5.164786 0.000000 2069 +andimplement 0 4 5.568345 0.000000 3029 +multifunct 0 3 5.857933 0.000000 3826 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +export 1 2 6.263398 6.263398 5689 +manualfor 0 2 6.263398 0.000000 4720 +padgett 1 1 6.957497 6.957497 13851 +padgettdon 0 1 6.957497 0.000000 13852 +softar 0 1 6.957497 0.000000 13853 +powerpointvers 0 1 6.957497 0.000000 13854 +usafax 0 1 6.957497 0.000000 13855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..3d836e81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +utexa 1 189 1.609438 1.609438 44 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +model 1 145 1.945910 1.945910 69 +file 0 132 1.945910 0.000000 70 +area 0 144 1.945910 0.000000 80 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +person 0 111 2.197225 0.000000 117 +access 0 102 2.302585 0.000000 136 +imag 0 91 2.397895 0.000000 161 +graphic 0 90 2.397895 0.000000 147 +librari 1 87 2.484907 2.484907 181 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +window 0 68 2.708050 0.000000 242 +main 0 67 2.708050 0.000000 256 +plan 1 65 2.772589 2.772589 272 +automat 0 61 2.833213 0.000000 306 +space 0 57 2.890372 0.000000 310 +standard 0 48 3.044522 0.000000 365 +cool 0 49 3.044522 0.000000 374 +get 0 46 3.091042 0.000000 380 +execut 0 45 3.135494 0.000000 404 +realli 0 40 3.258097 0.000000 444 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +go 0 33 3.433987 0.000000 529 +handl 0 24 3.761200 0.000000 685 +inth 0 22 3.850148 0.000000 741 +binari 1 20 3.951244 3.951244 823 +wrote 0 20 3.951244 0.000000 830 +geometr 1 19 4.007333 4.007333 852 +partit 0 16 4.174387 0.000000 984 +unfortun 0 13 4.382027 0.000000 1170 +solari 0 12 4.465908 0.000000 1238 +guess 0 10 4.653960 0.000000 1443 +elimin 0 9 4.753590 0.000000 1558 +fail 0 8 4.875197 0.000000 1655 +port 0 8 4.875197 0.000000 1766 +reus 0 8 4.875197 0.000000 1661 +bug 0 7 5.010635 0.000000 1801 +philip 0 6 5.164786 0.000000 2005 +templat 0 5 5.347108 0.000000 2311 +anda 0 5 5.347108 0.000000 2416 +suno 0 4 5.568345 0.000000 2790 +screenshot 0 4 5.568345 0.000000 2743 +campbel 0 3 5.857933 0.000000 3272 +meemail 0 3 5.857933 0.000000 3821 +edupost 0 3 5.857933 0.000000 3822 +everywher 0 2 6.263398 0.000000 5690 +curli 0 2 6.263398 0.000000 5691 +battlebal 1 1 6.957497 6.957497 13856 +hardinphilip 0 1 6.957497 0.000000 13857 +hardinabout 0 1 6.957497 0.000000 13858 +fallback 0 1 6.957497 0.000000 13859 +multiplay 0 1 6.957497 0.000000 13860 +runsund 0 1 6.957497 0.000000 13861 +graphicssoftwar 0 1 6.957497 0.000000 13862 +programmingto 0 1 6.957497 0.000000 13863 +pahardin 0 1 6.957497 0.000000 13864 +usanetrek 0 1 6.957497 0.000000 13865 +pita 0 1 6.957497 0.000000 13866 +digitaldisast 0 1 6.957497 0.000000 13867 +plaster 0 1 6.957497 0.000000 13868 +congradul 0 1 6.957497 0.000000 13869 +smartest 0 1 6.957497 0.000000 13870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..37e81b7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +utexa 0 189 1.609438 0.000000 44 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +schedul 0 119 2.079442 0.000000 85 +check 0 115 2.197225 0.000000 118 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +learn 1 86 2.484907 2.484907 170 +librari 0 87 2.484907 0.000000 181 +exampl 0 77 2.564949 0.000000 195 +addit 0 74 2.639057 0.000000 228 +intellig 0 72 2.639057 0.000000 225 +onlin 0 75 2.639057 0.000000 223 +knowledg 0 67 2.708050 0.000000 243 +interact 0 62 2.772589 0.000000 270 +taylor 0 63 2.772589 0.000000 287 +artifici 0 63 2.772589 0.000000 280 +think 0 57 2.890372 0.000000 314 +pointer 0 48 3.044522 0.000000 368 +natur 0 44 3.135494 0.000000 406 +mechan 0 43 3.178054 0.000000 416 +might 0 41 3.218876 0.000000 426 +paul 0 38 3.295837 0.000000 471 +postal 0 30 3.555348 0.000000 580 +neural 0 30 3.555348 0.000000 578 +usual 0 28 3.610918 0.000000 608 +head 0 23 3.806662 0.000000 732 +reflect 0 15 4.248495 0.000000 1034 +dave 0 14 4.317488 0.000000 1098 +hotlist 0 13 4.382027 0.000000 1199 +pascal 0 12 4.465908 0.000000 1213 +evolut 1 11 4.553877 4.553877 1314 +surf 0 11 4.553877 0.000000 1301 +death 0 10 4.653960 0.000000 1457 +handi 0 6 5.164786 0.000000 2111 +mix 0 6 5.164786 0.000000 2200 +studentdepart 0 5 5.347108 0.000000 2505 +explicitli 0 5 5.347108 0.000000 2308 +seriou 0 5 5.347108 0.000000 2252 +wast 0 5 5.347108 0.000000 2537 +austindepart 0 4 5.568345 0.000000 3008 +websit 0 4 5.568345 0.000000 2726 +neuro 0 2 6.263398 0.000000 4265 +mcquestenpaul 0 1 6.957497 0.000000 13871 +mcquestenphd 0 1 6.957497 0.000000 13872 +bepract 0 1 6.957497 0.000000 13873 +paulmcq 0 1 6.957497 0.000000 13874 +forcsp 0 1 6.957497 0.000000 13875 +programmingmor 0 1 6.957497 0.000000 13876 +inmoriarti 0 1 6.957497 0.000000 13877 +atcnr 0 1 6.957497 0.000000 13878 +rome 0 1 6.957497 0.000000 13879 +tout 0 1 6.957497 0.000000 13880 +winer 0 1 6.957497 0.000000 13881 +cynb 0 1 6.957497 0.000000 13882 +humong 0 1 6.957497 0.000000 13883 +knick 0 1 6.957497 0.000000 13884 +knack 0 1 6.957497 0.000000 13885 +nut 0 1 6.957497 0.000000 13886 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..ad72865f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +utexa 1 189 1.609438 1.609438 44 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +network 0 168 1.791759 0.000000 61 +file 0 132 1.945910 0.000000 70 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +summari 0 73 2.639057 0.000000 237 +multimedia 1 68 2.708050 2.708050 258 +finger 0 52 2.995732 0.000000 354 +get 0 46 3.091042 0.000000 380 +protocol 0 45 3.135494 0.000000 407 +log 0 19 4.007333 0.000000 857 +affili 0 13 4.382027 0.000000 1194 +touch 0 12 4.465908 0.000000 1288 +goyal 0 3 5.857933 0.000000 3268 +pawang 1 1 6.957497 6.957497 13887 +pawan 0 1 6.957497 0.000000 13888 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..e6cc67a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +paper 0 205 1.609438 0.000000 38 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +parallel 1 169 1.791759 1.791759 60 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +theori 1 111 2.197225 2.197225 127 +center 0 88 2.397895 0.000000 158 +journal 0 83 2.484907 0.000000 183 +complet 0 77 2.564949 0.000000 208 +master 0 76 2.564949 0.000000 216 +want 0 79 2.564949 0.000000 199 +main 0 67 2.708050 0.000000 256 +previou 0 62 2.772589 0.000000 290 +explor 1 58 2.890372 2.890372 324 +thesi 0 57 2.890372 0.000000 327 +publish 0 57 2.890372 0.000000 326 +scientif 1 53 2.944439 2.944439 341 +three 0 54 2.944439 0.000000 330 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +visitor 0 49 3.044522 0.000000 371 +physic 1 47 3.091042 3.091042 377 +algebra 0 45 3.135494 0.000000 394 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +societi 0 40 3.258097 0.000000 456 +open 0 38 3.295837 0.000000 469 +vita 0 38 3.295837 0.000000 473 +field 0 37 3.332205 0.000000 482 +represent 1 35 3.401197 3.401197 512 +print 0 34 3.401197 0.000000 503 +obtain 1 33 3.433987 3.433987 534 +curriculum 0 33 3.433987 0.000000 535 +dissert 1 32 3.465736 3.465736 549 +posit 0 31 3.496508 0.000000 552 +spent 0 25 3.737670 0.000000 676 +finish 0 22 3.850148 0.000000 748 +sequenti 0 22 3.850148 0.000000 745 +half 0 21 3.912023 0.000000 776 +previous 0 17 4.110874 0.000000 923 +germani 0 17 4.110874 0.000000 946 +jose 1 16 4.174387 4.174387 976 +georg 0 16 4.174387 0.000000 994 +joint 0 13 4.382027 0.000000 1130 +econom 0 13 4.382027 0.000000 1184 +mellon 0 13 4.382027 0.000000 1179 +usavoic 0 13 4.382027 0.000000 1198 +calcul 0 12 4.465908 0.000000 1268 +carnegi 0 12 4.465908 0.000000 1260 +fill 0 11 4.553877 0.000000 1349 +cryptographi 0 9 4.753590 0.000000 1512 +rel 0 9 4.753590 0.000000 1487 +invari 1 8 4.875197 4.875197 1748 +pittsburgh 0 7 5.010635 0.000000 1938 +quantum 1 6 5.164786 5.164786 2214 +particl 0 5 5.347108 0.000000 2436 +invers 0 4 5.568345 0.000000 2764 +sudarshan 0 3 5.857933 0.000000 3885 +supervisor 0 3 5.857933 0.000000 3754 +quantiz 0 2 6.263398 0.000000 5692 +irreduc 0 2 6.263398 0.000000 4890 +lemk 0 2 6.263398 0.000000 5693 +thephys 0 2 6.263398 0.000000 5694 +symmetri 0 2 6.263398 0.000000 5517 +pecina 1 1 6.957497 6.957497 13889 +orpecina 1 1 6.957497 6.957497 13890 +pecinaabout 0 1 6.957497 0.000000 13891 +innuclear 0 1 6.957497 0.000000 13892 +workedinvestig 0 1 6.957497 0.000000 13893 +gaug 0 1 6.957497 0.000000 13894 +graviti 0 1 6.957497 0.000000 13895 +gravit 0 1 6.957497 0.000000 13896 +unitari 0 1 6.957497 0.000000 13897 +yuval 0 1 6.957497 0.000000 13898 +eman 0 1 6.957497 0.000000 13899 +jurgen 0 1 6.957497 0.000000 13900 +fromcologn 0 1 6.957497 0.000000 13901 +bureau 0 1 6.957497 0.000000 13902 +geologi 0 1 6.957497 0.000000 13903 +seismic 0 1 6.957497 0.000000 13904 +tomographi 0 1 6.957497 0.000000 13905 +hardag 0 1 6.957497 0.000000 13906 +geophys 0 1 6.957497 0.000000 13907 +geophysicist 0 1 6.957497 0.000000 13908 +comerci 0 1 6.957497 0.000000 13909 +solutionsin 0 1 6.957497 0.000000 13910 +chromodynamicsmi 0 1 6.957497 0.000000 13911 +defo 0 1 6.957497 0.000000 13912 +phy 0 1 6.957497 0.000000 13913 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..84ab545d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +file 0 132 1.945910 0.000000 70 +welcom 0 122 2.079442 0.000000 99 +find 1 111 2.197225 2.197225 111 +make 0 111 2.197225 0.000000 120 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +stuff 0 87 2.484907 0.000000 171 +chang 0 82 2.484907 0.000000 163 +know 1 80 2.564949 2.564949 198 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +would 1 67 2.708050 2.708050 251 +window 0 68 2.708050 0.000000 242 +unix 1 58 2.890372 2.890372 308 +think 0 57 2.890372 0.000000 314 +cool 0 49 3.044522 0.000000 374 +could 0 46 3.091042 0.000000 383 +realli 0 40 3.258097 0.000000 444 +mean 0 37 3.332205 0.000000 477 +short 0 36 3.367296 0.000000 499 +staff 0 36 3.367296 0.000000 490 +experiment 0 26 3.688879 0.000000 645 +instal 0 22 3.850148 0.000000 754 +increas 0 20 3.951244 0.000000 829 +anyon 0 17 4.110874 0.000000 916 +protect 0 17 4.110874 0.000000 935 +drive 0 15 4.248495 0.000000 1052 +floor 0 14 4.317488 0.000000 1070 +stori 0 14 4.317488 0.000000 1087 +comic 0 14 4.317488 0.000000 1103 +neat 0 12 4.465908 0.000000 1263 +true 0 10 4.653960 0.000000 1422 +vista 0 10 4.653960 0.000000 1452 +chanc 0 7 5.010635 0.000000 1960 +escap 0 4 5.568345 0.000000 3016 +meyour 0 3 5.857933 0.000000 3858 +blah 0 2 6.263398 0.000000 5695 +drastic 0 2 6.263398 0.000000 4201 +woof 1 1 6.957497 6.957497 13914 +nettl 0 1 6.957497 0.000000 13915 +cornerinfolik 0 1 6.957497 0.000000 13916 +blahblah 0 1 6.957497 0.000000 13917 +eeek 0 1 6.957497 0.000000 13918 +ibm 0 1 6.957497 0.000000 13919 +afteri 0 1 6.957497 0.000000 13920 +theinnoc 0 1 6.957497 0.000000 13921 +buena 0 1 6.957497 0.000000 13922 +movieplex 0 1 6.957497 0.000000 13923 +employan 0 1 6.957497 0.000000 13924 +improb 0 1 6.957497 0.000000 13925 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..bfda7b7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +hall 1 146 1.945910 1.945910 65 +report 0 131 2.079442 0.000000 92 +decemb 0 80 2.564949 0.000000 215 +taylor 0 63 2.772589 0.000000 287 +annual 0 40 3.258097 0.000000 458 +postal 0 30 3.555348 0.000000 580 +greg 1 24 3.761200 3.761200 695 +eduphon 0 15 4.248495 0.000000 1060 +informationemail 0 9 4.753590 0.000000 1564 +austinaustin 0 7 5.010635 0.000000 1966 +plaxton 0 3 5.857933 0.000000 3886 +plaxtongreg 1 1 6.957497 6.957497 13926 +plaxtoncontact 0 1 6.957497 0.000000 13927 +sciencetaylor 0 1 6.957497 0.000000 13928 +profilepubl 0 1 6.957497 0.000000 13929 +plaxtonplaxton 0 1 6.957497 0.000000 13930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..fb76afcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +public 1 202 1.609438 1.609438 43 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +base 1 165 1.791759 1.791759 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +machin 1 129 2.079442 2.079442 95 +postscript 0 131 2.079442 0.000000 90 +theori 0 111 2.197225 0.000000 127 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +learn 1 86 2.484907 2.484907 170 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +intellig 1 72 2.639057 2.639057 225 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +knowledg 1 67 2.708050 2.708050 243 +abstract 1 62 2.772589 2.772589 276 +artifici 0 63 2.772589 0.000000 280 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +investig 0 51 2.995732 0.000000 353 +case 0 51 2.995732 0.000000 351 +california 0 46 3.091042 0.000000 388 +answer 1 45 3.135494 3.135494 391 +editor 0 41 3.218876 0.000000 433 +autom 0 41 3.218876 0.000000 434 +press 0 42 3.218876 0.000000 419 +award 0 34 3.401197 0.000000 523 +tech 0 35 3.401197 0.000000 515 +concept 0 32 3.465736 0.000000 537 +domain 0 30 3.555348 0.000000 564 +scale 0 28 3.610918 0.000000 613 +rule 0 26 3.688879 0.000000 638 +fellow 0 24 3.761200 0.000000 701 +honor 0 23 3.806662 0.000000 729 +predict 0 19 4.007333 0.000000 855 +young 0 16 4.174387 0.000000 991 +cambridg 0 16 4.174387 0.000000 1008 +weak 0 13 4.382027 0.000000 1159 +hotlist 0 13 4.382027 0.000000 1199 +bruce 1 12 4.465908 4.465908 1226 +classif 0 9 4.753590 0.000000 1586 +aaai 1 8 4.875197 4.875197 1750 +irvin 0 8 4.875197 0.000000 1660 +presidenti 0 8 4.875197 0.000000 1737 +boundari 0 7 5.010635 0.000000 1929 +heurist 0 6 5.164786 0.000000 2125 +porter 1 5 5.347108 5.347108 2293 +complementari 0 5 5.347108 0.000000 2523 +presentarea 0 4 5.568345 0.000000 3026 +thetim 0 3 5.857933 0.000000 3581 +preced 0 3 5.857933 0.000000 3107 +researchinterest 0 2 6.263398 0.000000 5123 +rickel 0 2 6.263398 0.000000 5648 +andpostscript 0 2 6.263398 0.000000 5696 +brant 0 2 6.263398 0.000000 5652 +warrant 0 2 6.263398 0.000000 5697 +bareiss 0 2 6.263398 0.000000 5646 +porterassoci 0 1 6.957497 0.000000 13931 +interestartifici 0 1 6.957497 0.000000 13932 +researchhead 0 1 6.957497 0.000000 13933 +basesand 0 1 6.957497 0.000000 13934 +aait 0 1 6.957497 0.000000 13935 +holt 0 1 6.957497 0.000000 13936 +abstractand 0 1 6.957497 0.000000 13937 +reportport 0 1 6.957497 0.000000 13938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..54af0112 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 0 247 1.386294 0.000000 24 +languag 0 227 1.386294 0.000000 26 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +model 0 145 1.945910 0.000000 69 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +introduct 1 126 2.079442 2.079442 87 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +specif 0 106 2.197225 0.000000 106 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +book 1 99 2.302585 2.302585 131 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +member 0 84 2.484907 0.000000 165 +refer 0 78 2.564949 0.000000 203 +state 0 76 2.564949 0.000000 207 +exampl 0 77 2.564949 0.000000 195 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +logic 1 71 2.639057 2.639057 230 +write 0 72 2.639057 0.000000 222 +effici 0 73 2.639057 0.000000 233 +differ 1 66 2.708050 2.708050 253 +written 1 63 2.772589 2.772589 278 +foundat 0 62 2.772589 0.000000 286 +result 0 65 2.772589 0.000000 281 +improv 0 62 2.772589 0.000000 289 +simpl 0 60 2.833213 0.000000 298 +reason 0 57 2.890372 0.000000 318 +variou 0 56 2.890372 0.000000 317 +sever 0 56 2.890372 0.000000 322 +detail 0 57 2.890372 0.000000 321 +overview 0 56 2.890372 0.000000 323 +thesi 0 57 2.890372 0.000000 327 +found 0 53 2.944439 0.000000 337 +give 0 50 3.044522 0.000000 359 +basic 0 50 3.044522 0.000000 360 +possibl 0 47 3.091042 0.000000 378 +electron 0 47 3.091042 0.000000 379 +understand 0 47 3.091042 0.000000 384 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +describ 0 45 3.135494 0.000000 400 +fast 0 42 3.218876 0.000000 429 +map 0 39 3.258097 0.000000 452 +correct 1 38 3.295837 3.295837 462 +concurr 0 34 3.401197 0.000000 501 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +chapter 0 32 3.465736 0.000000 536 +specifi 1 30 3.555348 3.555348 568 +power 0 30 3.555348 0.000000 573 +compon 0 30 3.555348 0.000000 570 +synchron 0 29 3.583519 0.000000 588 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +proof 1 23 3.806662 3.806662 720 +emphasi 0 22 3.850148 0.000000 755 +inth 0 22 3.850148 0.000000 741 +sort 0 22 3.850148 0.000000 738 +divis 0 21 3.912023 0.000000 803 +safeti 0 20 3.951244 0.000000 817 +assum 0 19 4.007333 0.000000 845 +prove 0 19 4.007333 0.000000 848 +stand 0 18 4.060443 0.000000 891 +adam 1 17 4.110874 4.110874 934 +former 0 17 4.110874 0.000000 956 +weslei 0 16 4.174387 0.000000 983 +reflect 0 15 4.248495 0.000000 1034 +hybrid 0 15 4.248495 0.000000 1057 +manner 0 14 4.317488 0.000000 1074 +balanc 0 14 4.317488 0.000000 1112 +circuit 1 13 4.382027 4.382027 1131 +recurs 0 13 4.382027 0.000000 1127 +composit 0 13 4.382027 0.000000 1150 +addison 0 12 4.465908 0.000000 1230 +verifi 0 12 4.465908 0.000000 1261 +summar 0 11 4.553877 0.000000 1295 +length 0 10 4.653960 0.000000 1400 +equal 0 10 4.653960 0.000000 1424 +arithmet 0 10 4.653960 0.000000 1388 +notat 1 9 4.753590 4.753590 1489 +ataustin 0 9 4.753590 0.000000 1610 +tempor 0 9 4.753590 0.000000 1584 +misra 2 7 5.010635 10.021270 1856 +restrict 0 6 5.164786 0.000000 2129 +multiprogram 0 6 5.164786 0.000000 2010 +jayadev 1 4 5.568345 5.568345 3006 +jacob 1 4 5.568345 5.568345 2667 +rigor 0 4 5.568345 0.000000 3030 +anddistribut 0 4 5.568345 0.000000 3031 +ofprogram 0 4 5.568345 0.000000 2624 +monograph 0 4 5.568345 0.000000 2860 +uniti 2 3 5.857933 11.715866 3812 +kornerup 1 3 5.857933 5.857933 3215 +marku 1 3 5.857933 5.857933 3872 +cohen 0 3 5.857933 0.000000 3652 +rajeev 0 3 5.857933 0.000000 3152 +checker 0 3 5.857933 0.000000 3644 +parallelalgorithm 0 3 5.857933 0.000000 3249 +alsoavail 0 3 5.857933 0.000000 3887 +powerlist 1 2 6.263398 6.263398 5660 +seuss 1 2 6.263398 6.263398 5662 +carruth 1 2 6.263398 6.263398 5495 +ofpap 1 2 6.263398 6.263398 4329 +erni 0 2 6.263398 0.000000 5104 +joshi 0 2 6.263398 0.000000 4202 +chandi 0 2 6.263398 0.000000 5661 +fourier 0 2 6.263398 0.000000 5698 +offspr 0 2 6.263398 0.000000 5699 +froma 0 2 6.263398 0.000000 4862 +basedprogram 0 2 6.263398 0.000000 5700 +kaltenbach 1 1 6.957497 6.957497 13939 +thepowerlist 1 1 6.957497 6.957497 13940 +austinpsp 0 1 6.957497 0.000000 13941 +austinthi 0 1 6.957497 0.000000 13942 +deriveparallel 0 1 6.957497 0.000000 13943 +issupervis 0 1 6.957497 0.000000 13944 +groupinclud 0 1 6.957497 0.000000 13945 +edgar 0 1 6.957497 0.000000 13946 +knapp 0 1 6.957497 0.000000 13947 +ingolf 0 1 6.957497 0.000000 13948 +krger 0 1 6.957497 0.000000 13949 +josyula 0 1 6.957497 0.000000 13950 +staskauska 0 1 6.957497 0.000000 13951 +publicationsbelow 0 1 6.957497 0.000000 13952 +wherev 0 1 6.957497 0.000000 13953 +topap 0 1 6.957497 0.000000 13954 +thenot 0 1 6.957497 0.000000 13955 +inchandi 0 1 6.957497 0.000000 13956 +amanuscript 0 1 6.957497 0.000000 13957 +newun 0 1 6.957497 0.000000 13958 +operatorco 0 1 6.957497 0.000000 13959 +forrefer 0 1 6.957497 0.000000 13960 +asymbol 0 1 6.957497 0.000000 13961 +forfinit 0 1 6.957497 0.000000 13962 +unityverifi 0 1 6.957497 0.000000 13963 +toinclud 0 1 6.957497 0.000000 13964 +twodiffer 0 1 6.957497 0.000000 13965 +succinct 0 1 6.957497 0.000000 13966 +givesnumer 0 1 6.957497 0.000000 13967 +batcher 0 1 6.957497 0.000000 13968 +asadd 0 1 6.957497 0.000000 13969 +multipli 0 1 6.957497 0.000000 13970 +addercircuit 0 1 6.957497 0.000000 13971 +programscan 0 1 6.957497 0.000000 13972 +speciallyhypercub 0 1 6.957497 0.000000 13973 +caninterfer 0 1 6.957497 0.000000 13974 +adisciplin 0 1 6.957497 0.000000 13975 +genrat 0 1 6.957497 0.000000 13976 +callsfor 0 1 6.957497 0.000000 13977 +anexperi 0 1 6.957497 0.000000 13978 +ingolfkrg 0 1 6.957497 0.000000 13979 +thepsp 0 1 6.957497 0.000000 13980 +sitejacob 0 1 6.957497 0.000000 13981 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..24f7f6d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +phone 0 175 1.791759 0.000000 45 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +construct 0 139 1.945910 0.000000 82 +spring 0 131 2.079442 0.000000 88 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +commun 0 95 2.397895 0.000000 157 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +server 0 76 2.564949 0.000000 204 +appli 0 71 2.639057 0.000000 226 +line 0 75 2.639057 0.000000 231 +name 0 72 2.639057 0.000000 220 +visit 0 63 2.772589 0.000000 288 +room 0 59 2.833213 0.000000 301 +march 0 61 2.833213 0.000000 295 +electr 0 38 3.295837 0.000000 461 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +client 0 25 3.737670 0.000000 679 +sign 0 16 4.174387 0.000000 970 +guest 1 12 4.465908 4.465908 1220 +huang 1 12 4.465908 4.465908 1202 +card 0 10 4.653960 0.000000 1435 +placement 0 10 4.653960 0.000000 1420 +telecommun 0 9 4.753590 0.000000 1565 +job 0 8 4.875197 0.000000 1702 +postcard 0 6 5.164786 0.000000 2181 +attract 0 5 5.347108 0.000000 2356 +hawaii 0 3 5.857933 0.000000 3888 +csc 0 3 5.857933 0.000000 3183 +nankai 0 2 6.263398 0.000000 5147 +tianjin 0 2 6.263398 0.000000 5148 +junk 0 2 6.263398 0.000000 5701 +qime 1 1 6.957497 6.957497 13982 +edureceiv 0 1 6.957497 0.000000 13983 +univeris 0 1 6.957497 0.000000 13984 +manoa 0 1 6.957497 0.000000 13985 +hawaiiwork 0 1 6.957497 0.000000 13986 +austincours 0 1 6.957497 0.000000 13987 +teamweb 0 1 6.957497 0.000000 13988 +utcssadaili 0 1 6.957497 0.000000 13989 +texanstock 0 1 6.957497 0.000000 13990 +picturesimageschines 0 1 6.957497 0.000000 13991 +popsend 0 1 6.957497 0.000000 13992 +jobtrakut 0 1 6.957497 0.000000 13993 +gopherftp 0 1 6.957497 0.000000 13994 +newstelnet 0 1 6.957497 0.000000 13995 +cschen 0 1 6.957497 0.000000 13996 +staffyour 0 1 6.957497 0.000000 13997 +commentsguest 0 1 6.957497 0.000000 13998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..8c379c1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +applic 0 170 1.791759 0.000000 56 +texa 0 160 1.791759 0.000000 64 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +professor 0 137 1.945910 0.000000 76 +document 0 121 2.079442 0.000000 89 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +user 0 104 2.302585 0.000000 137 +part 0 98 2.302585 0.000000 129 +book 0 99 2.302585 0.000000 131 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +abstract 0 62 2.772589 0.000000 276 +visit 0 63 2.772589 0.000000 288 +reason 2 57 2.890372 5.780744 318 +index 1 56 2.890372 2.890372 309 +sever 0 56 2.890372 0.000000 322 +pointer 0 48 3.044522 0.000000 368 +visitor 0 49 3.044522 0.000000 371 +directori 1 45 3.135494 3.135494 396 +robot 0 36 3.367296 0.000000 497 +bibliographi 1 34 3.401197 3.401197 518 +represent 0 35 3.401197 0.000000 512 +dissert 1 32 3.465736 3.465736 549 +limit 0 29 3.583519 0.000000 585 +alumni 0 21 3.912023 0.000000 807 +supervis 0 20 3.951244 0.000000 840 +spatial 0 16 4.174387 0.000000 988 +atth 0 15 4.248495 0.000000 1019 +easili 0 14 4.317488 0.000000 1077 +qualit 1 11 4.553877 4.553877 1362 +tour 0 11 4.553877 0.000000 1307 +ataustin 0 9 4.753590 0.000000 1610 +yellow 0 9 4.753590 0.000000 1601 +qsim 1 3 5.857933 5.857933 3862 +kuiper 0 3 5.857933 0.000000 3794 +thephys 0 2 6.263398 0.000000 5694 +ourresearch 1 1 6.957497 6.957497 13999 +utexasqualit 0 1 6.957497 0.000000 14000 +utexasth 0 1 6.957497 0.000000 14001 +intelligentrobot 0 1 6.957497 0.000000 14002 +knowledgerepresent 0 1 6.957497 0.000000 14003 +algernon 0 1 6.957497 0.000000 14004 +benjaminkuip 0 1 6.957497 0.000000 14005 +areadescript 0 1 6.957497 0.000000 14006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..b6705c13 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +click 0 142 1.945910 0.000000 78 +welcom 1 122 2.079442 2.079442 99 +introduct 0 126 2.079442 0.000000 87 +pleas 1 113 2.197225 2.197225 114 +technic 0 100 2.302585 0.000000 140 +comment 0 93 2.397895 0.000000 146 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +institut 0 84 2.484907 0.000000 187 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +free 0 73 2.639057 0.000000 224 +window 1 68 2.708050 2.708050 242 +java 0 70 2.708050 0.000000 248 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +back 0 60 2.833213 0.000000 297 +unix 0 58 2.890372 0.000000 308 +suggest 0 53 2.944439 0.000000 331 +still 0 50 3.044522 0.000000 362 +around 0 43 3.178054 0.000000 415 +feel 0 37 3.332205 0.000000 483 +game 0 36 3.367296 0.000000 498 +copyright 0 36 3.367296 0.000000 495 +john 1 33 3.433987 3.433987 532 +linux 0 27 3.637586 0.000000 631 +sport 0 25 3.737670 0.000000 683 +thank 1 23 3.806662 3.806662 721 +brows 0 23 3.806662 0.000000 726 +self 0 22 3.850148 0.000000 761 +leav 0 21 3.912023 0.000000 772 +els 0 19 4.007333 0.000000 843 +modif 0 17 4.110874 0.000000 913 +joke 0 8 4.875197 0.000000 1620 +settimeout 0 5 5.347108 0.000000 2536 +guestbook 0 5 5.347108 0.000000 2475 +seed 1 4 5.568345 5.568345 2984 +timertwo 0 4 5.568345 0.000000 2985 +scrollit_rl 0 3 5.857933 0.000000 3882 +underconstruct 0 3 5.857933 0.000000 3889 +com 0 2 6.263398 0.000000 5156 +qiang 1 1 6.957497 6.957497 14007 +seriousjunk 0 1 6.957497 0.000000 14008 +realjunk 0 1 6.957497 0.000000 14009 +struggleforliv 0 1 6.957497 0.000000 14010 +qzuo 0 1 6.957497 0.000000 14011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..70f82c42 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +last 1 314 1.098612 1.098612 14 +offic 0 299 1.098612 0.000000 13 +gener 1 220 1.386294 1.386294 27 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +texa 1 160 1.791759 1.791759 64 +austin 0 168 1.791759 0.000000 63 +avail 0 169 1.791759 0.000000 48 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +technolog 1 131 2.079442 2.079442 102 +compil 0 122 2.079442 0.000000 96 +document 0 121 2.079442 0.000000 89 +spring 0 131 2.079442 0.000000 88 +teach 0 108 2.197225 0.000000 112 +institut 1 84 2.484907 2.484907 187 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +laboratori 0 63 2.772589 0.000000 292 +abstract 0 62 2.772589 0.000000 276 +taylor 0 63 2.772589 0.000000 287 +type 0 61 2.833213 0.000000 296 +semest 0 58 2.890372 0.000000 312 +directori 0 45 3.135494 0.000000 396 +robert 1 30 3.555348 3.555348 567 +experiment 0 26 3.688879 0.000000 645 +runtim 0 19 4.007333 0.000000 858 +less 0 18 4.060443 0.000000 892 +brown 0 16 4.174387 0.000000 977 +eduphon 0 15 4.248495 0.000000 1060 +massachusett 1 14 4.317488 4.317488 1118 +ofcomput 0 10 4.653960 0.000000 1442 +informationemail 0 9 4.753590 0.000000 1564 +pronounc 0 7 5.010635 0.000000 1918 +austinaustin 0 7 5.010635 0.000000 1966 +blumoferdb 0 5 5.347108 0.000000 2324 +bloom 0 4 5.568345 0.000000 2913 +sciencestaylor 0 3 5.857933 0.000000 3814 +bobbi 0 2 6.263398 0.000000 5678 +informationassist 0 2 6.263398 0.000000 5531 +blumoferobert 0 1 6.957497 0.000000 14012 +blumofei 0 1 6.957497 0.000000 14013 +cilkmultithread 0 1 6.957497 0.000000 14014 +hallpost 0 1 6.957497 0.000000 14015 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..cfe9e489 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +construct 0 139 1.945910 0.000000 82 +analysi 0 124 2.079442 0.000000 98 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +homepag 0 93 2.397895 0.000000 148 +real 0 93 2.397895 0.000000 144 +comment 0 93 2.397895 0.000000 146 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +content 0 59 2.833213 0.000000 302 +rule 0 26 3.688879 0.000000 638 +brows 0 23 3.806662 0.000000 726 +wang 0 21 3.912023 0.000000 790 +theunivers 0 21 3.912023 0.000000 797 +permit 0 16 4.174387 0.000000 962 +candid 0 9 4.753590 0.000000 1606 +publicationsi 0 3 5.857933 0.000000 3827 +wangwelcom 0 1 6.957497 0.000000 14016 +rhwang 0 1 6.957497 0.000000 14017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..a373e63f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +texa 0 160 1.791759 0.000000 64 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +spring 1 131 2.079442 2.079442 88 +technolog 0 131 2.079442 0.000000 102 +mathemat 0 108 2.197225 0.000000 123 +memori 0 101 2.302585 0.000000 139 +intellig 1 72 2.639057 2.639057 225 +appli 0 71 2.639057 0.000000 226 +solv 0 73 2.639057 0.000000 234 +goal 0 66 2.708050 0.000000 250 +artifici 1 63 2.772589 2.772589 280 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +automat 0 61 2.833213 0.000000 306 +detail 0 57 2.890372 0.000000 321 +undergradu 1 54 2.944439 2.944439 338 +finger 0 52 2.995732 0.000000 354 +visual 0 48 3.044522 0.000000 372 +vision 0 41 3.218876 0.000000 430 +seminar 1 38 3.295837 3.295837 470 +robot 0 36 3.367296 0.000000 497 +neural 1 30 3.555348 3.555348 578 +computersci 0 30 3.555348 0.000000 562 +postal 0 30 3.555348 0.000000 580 +utc 0 27 3.637586 0.000000 629 +strategi 0 25 3.737670 0.000000 682 +decis 0 23 3.806662 0.000000 728 +self 0 22 3.850148 0.000000 761 +concentr 0 18 4.060443 0.000000 906 +cognit 1 16 4.174387 4.174387 986 +evolv 0 12 4.465908 0.000000 1223 +genet 0 10 4.653960 0.000000 1409 +risto 1 9 4.753590 4.753590 1523 +schema 0 6 5.164786 0.000000 1988 +ucla 0 5 5.347108 0.000000 2502 +oftexa 0 4 5.568345 0.000000 3003 +episod 0 4 5.568345 0.000000 2747 +intereststh 0 3 5.857933 0.000000 3838 +cortex 0 3 5.857933 0.000000 3856 +helsinki 0 2 6.263398 0.000000 5702 +miikkulainenristo 0 1 6.957497 0.000000 14018 +miikkulainenassoci 0 1 6.957497 0.000000 14019 +processeswith 0 1 6.957497 0.000000 14020 +languageacquisit 0 1 6.957497 0.000000 14021 +networkswith 0 1 6.957497 0.000000 14022 +discoversequenti 0 1 6.957497 0.000000 14023 +classessumm 0 1 6.957497 0.000000 14024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..cbc8a545 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +rong 0 2 6.263398 0.000000 5703 +bigfoot 0 1 6.957497 0.000000 14025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..e56fd06c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +algorithm 1 162 1.791759 1.791759 57 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +spring 0 131 2.079442 0.000000 88 +theori 0 111 2.197225 0.000000 127 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +onlin 0 75 2.639057 0.000000 223 +plan 0 65 2.772589 0.000000 272 +vita 0 38 3.295837 0.000000 473 +random 0 34 3.401197 0.000000 511 +curriculum 0 33 3.433987 0.000000 535 +postal 0 30 3.555348 0.000000 580 +particularli 0 19 4.007333 0.000000 867 +atth 0 15 4.248495 0.000000 1019 +ataustin 0 9 4.753590 0.000000 1610 +andcomput 0 8 4.875197 0.000000 1623 +supervisor 0 3 5.857933 0.000000 3754 +rajaraman 1 2 6.263398 6.263398 5704 +rraj 1 2 6.263398 6.263398 5705 +rajmohan 0 2 6.263398 0.000000 5706 +mydissert 0 2 6.263398 0.000000 5496 +mypubl 0 2 6.263398 0.000000 5707 +linkscontact 0 2 6.263398 0.000000 5708 +pagerajmohan 0 1 6.957497 0.000000 14026 +gregplaxton 0 1 6.957497 0.000000 14027 +incombinator 0 1 6.957497 0.000000 14028 +sciencemiscellan 0 1 6.957497 0.000000 14029 +ephon 0 1 6.957497 0.000000 14030 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..9570e2a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +fall 0 181 1.609438 0.000000 40 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +introduct 1 126 2.079442 2.079442 87 +technolog 0 131 2.079442 0.000000 102 +final 0 116 2.197225 0.000000 108 +mathemat 0 108 2.197225 0.000000 123 +peopl 0 96 2.302585 0.000000 132 +homepag 1 93 2.397895 2.397895 148 +librari 0 87 2.484907 0.000000 181 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +colleg 0 61 2.833213 0.000000 300 +telephon 0 50 3.044522 0.000000 373 +movi 1 40 3.258097 3.258097 459 +live 0 40 3.258097 0.000000 451 +ofth 0 36 3.367296 0.000000 491 +computersci 0 30 3.555348 0.000000 562 +weather 0 28 3.610918 0.000000 618 +spent 0 25 3.737670 0.000000 676 +yahoo 0 24 3.761200 0.000000 707 +beij 0 19 4.007333 0.000000 876 +five 0 19 4.007333 0.000000 841 +citi 0 19 4.007333 0.000000 874 +tsinghua 0 13 4.382027 0.000000 1195 +replic 0 12 4.465908 0.000000 1231 +entertain 0 12 4.465908 0.000000 1286 +catalog 0 10 4.653960 0.000000 1431 +login 0 9 4.753590 0.000000 1550 +film 0 8 4.875197 0.000000 1761 +capit 1 7 5.010635 5.010635 1957 +apart 0 7 5.010635 0.000000 1936 +chronicl 0 7 5.010635 0.000000 1952 +river 0 6 5.164786 0.000000 2220 +provinc 0 4 5.568345 0.000000 3009 +republ 0 4 5.568345 0.000000 3032 +sinanet 0 4 5.568345 0.000000 2883 +rong 1 2 6.263398 6.263398 5703 +zhai 0 2 6.263398 0.000000 5709 +utaccess 1 1 6.957497 6.957497 14031 +homepagea 0 1 6.957497 0.000000 14032 +chinachina 0 1 6.957497 0.000000 14033 +jinan 0 1 6.957497 0.000000 14034 +myhometown 0 1 6.957497 0.000000 14035 +shandong 0 1 6.957497 0.000000 14036 +gotmi 0 1 6.957497 0.000000 14037 +ofchina 0 1 6.957497 0.000000 14038 +texasaustin 0 1 6.957497 0.000000 14039 +rent 0 1 6.957497 0.000000 14040 +utcsth 0 1 6.957497 0.000000 14041 +utnetcat 0 1 6.957497 0.000000 14042 +browsabl 0 1 6.957497 0.000000 14043 +austininform 0 1 6.957497 0.000000 14044 +utcat 0 1 6.957497 0.000000 14045 +systemsdynam 0 1 6.957497 0.000000 14046 +graphicsc 0 1 6.957497 0.000000 14047 +logicc 0 1 6.957497 0.000000 14048 +moviesaustin 0 1 6.957497 0.000000 14049 +filmsmicrosoft 0 1 6.957497 0.000000 14050 +cinemania 0 1 6.957497 0.000000 14051 +onlineal 0 1 6.957497 0.000000 14052 +guidehollywood 0 1 6.957497 0.000000 14053 +onlineinternet 0 1 6.957497 0.000000 14054 +databaserog 0 1 6.957497 0.000000 14055 +ebert 0 1 6.957497 0.000000 14056 +moviesvisit 0 1 6.957497 0.000000 14057 +contactmail 0 1 6.957497 0.000000 14058 +aaustin 0 1 6.957497 0.000000 14059 +emailrtan 0 1 6.957497 0.000000 14060 +fingerclick 0 1 6.957497 0.000000 14061 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..51169c03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +machin 0 129 2.079442 0.000000 95 +teach 0 108 2.197225 0.000000 112 +need 0 98 2.302585 0.000000 135 +academ 0 82 2.484907 0.000000 178 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +know 0 80 2.564949 0.000000 198 +meet 0 72 2.639057 0.000000 229 +servic 0 72 2.639057 0.000000 236 +would 1 67 2.708050 2.708050 251 +differ 0 66 2.708050 0.000000 253 +dept 0 64 2.772589 0.000000 291 +think 1 57 2.890372 2.890372 314 +much 0 52 2.995732 0.000000 349 +life 1 50 3.044522 3.044522 375 +cool 0 49 3.044522 0.000000 374 +littl 1 39 3.258097 3.258097 454 +realli 0 40 3.258097 0.000000 444 +actual 0 28 3.610918 0.000000 604 +altern 0 26 3.688879 0.000000 641 +alwai 1 24 3.761200 3.761200 691 +wish 0 24 3.761200 0.000000 692 +almost 0 22 3.850148 0.000000 742 +entir 0 20 3.951244 0.000000 811 +nice 0 20 3.951244 0.000000 809 +modern 0 16 4.174387 0.000000 966 +anyth 0 16 4.174387 0.000000 998 +choos 0 16 4.174387 0.000000 964 +opportun 0 13 4.382027 0.000000 1161 +besid 0 8 4.875197 0.000000 1681 +tang 1 5 5.347108 5.347108 2409 +plant 0 5 5.347108 0.000000 2497 +aspir 0 4 5.568345 0.000000 3019 +fear 0 4 5.568345 0.000000 2911 +freedom 0 3 5.857933 0.000000 3890 +rupert 1 2 6.263398 6.263398 5680 +miracl 0 2 6.263398 0.000000 5710 +holi 0 2 6.263398 0.000000 5711 +stimul 0 2 6.263398 0.000000 5712 +empti 0 2 6.263398 0.000000 5478 +truck 0 2 6.263398 0.000000 5713 +wash 0 2 6.263398 0.000000 5714 +strangl 0 1 6.957497 0.000000 14062 +curious 0 1 6.957497 0.000000 14063 +inquiri 0 1 6.957497 0.000000 14064 +delic 0 1 6.957497 0.000000 14065 +depriv 0 1 6.957497 0.000000 14066 +distast 0 1 6.957497 0.000000 14067 +deni 0 1 6.957497 0.000000 14068 +duress 0 1 6.957497 0.000000 14069 +fate 0 1 6.957497 0.000000 14070 +messi 0 1 6.957497 0.000000 14071 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..08d1be6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +homepag 0 93 2.397895 0.000000 148 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +ruwei 0 1 6.957497 0.000000 14072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..b618161e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +develop 1 174 1.791759 1.791759 53 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +analysi 0 124 2.079442 0.000000 98 +introduct 0 126 2.079442 0.000000 87 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +schedul 0 119 2.079442 0.000000 85 +mathemat 1 108 2.197225 2.197225 123 +well 0 109 2.197225 0.000000 121 +techniqu 1 99 2.302585 2.302585 138 +user 0 104 2.302585 0.000000 137 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +institut 0 84 2.484907 0.000000 187 +environ 0 84 2.484907 0.000000 177 +journal 0 83 2.484907 0.000000 183 +method 1 80 2.564949 2.564949 213 +april 0 77 2.564949 0.000000 196 +appli 1 71 2.639057 2.639057 226 +summari 0 73 2.639057 0.000000 237 +workshop 0 71 2.639057 0.000000 239 +meet 0 72 2.639057 0.000000 229 +evalu 0 64 2.772589 0.000000 266 +colleg 0 61 2.833213 0.000000 300 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +scientif 0 53 2.944439 0.000000 341 +case 1 51 2.995732 2.995732 351 +numer 1 49 3.044522 3.044522 369 +better 0 45 3.135494 0.000000 401 +http 0 41 3.218876 0.000000 420 +continu 0 39 3.258097 0.000000 448 +tutori 0 39 3.258097 0.000000 437 +robert 1 30 3.555348 3.555348 567 +effort 0 26 3.688879 0.000000 652 +supercomput 0 25 3.737670 0.000000 681 +famili 0 23 3.806662 0.000000 735 +prove 0 19 4.007333 0.000000 848 +former 0 17 4.110874 0.000000 956 +easili 0 14 4.317488 0.000000 1077 +infrastructur 0 12 4.465908 0.000000 1234 +forc 0 10 4.653960 0.000000 1384 +maryland 0 6 5.164786 0.000000 2140 +park 0 6 5.164786 0.000000 2218 +plapack 0 3 5.857933 0.000000 3849 +geijn 0 2 6.263398 0.000000 5715 +appliedmathemat 0 2 6.263398 0.000000 5716 +interestnumer 0 2 6.263398 0.000000 5717 +researchth 0 2 6.263398 0.000000 5492 +rvdg 1 1 6.957497 6.957497 14073 +geijnassoci 0 1 6.957497 0.000000 14074 +oftradit 0 1 6.957497 0.000000 14075 +sequentialmachin 0 1 6.957497 0.000000 14076 +inoth 0 1 6.957497 0.000000 14077 +researchconcentr 0 1 6.957497 0.000000 14078 +forimpl 0 1 6.957497 0.000000 14079 +allowssuch 0 1 6.957497 0.000000 14080 +parallelprocessor 0 1 6.957497 0.000000 14081 +intercom 0 1 6.957497 0.000000 14082 +sl_librari 0 1 6.957497 0.000000 14083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..3a28fb5c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +contact 0 153 1.791759 0.000000 59 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +know 0 80 2.564949 0.000000 198 +window 0 68 2.708050 0.000000 242 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +visitor 0 49 3.044522 0.000000 371 +india 1 32 3.465736 3.465736 550 +computersci 0 30 3.555348 0.000000 562 +els 0 19 4.007333 0.000000 843 +region 0 19 4.007333 0.000000 875 +universityof 0 15 4.248495 0.000000 1061 +countri 0 15 4.248495 0.000000 1059 +settimeout 0 5 5.347108 0.000000 2536 +seed 1 4 5.568345 5.568345 2984 +abraham 0 4 5.568345 0.000000 2644 +timertwo 0 4 5.568345 0.000000 2985 +engg 0 4 5.568345 0.000000 2884 +scrollit_rl 0 3 5.857933 0.000000 3882 +kerala 0 3 5.857933 0.000000 3749 +sciencesand 0 2 6.263398 0.000000 4711 +hail 0 2 6.263398 0.000000 5583 +sundeep 0 1 6.957497 0.000000 14084 +sundeepabraham 0 1 6.957497 0.000000 14085 +calicut 0 1 6.957497 0.000000 14086 +tinkerwith 0 1 6.957497 0.000000 14087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..7da8d86a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +distribut 0 162 1.791759 0.000000 51 +construct 1 139 1.945910 1.945910 82 +hall 0 146 1.945910 0.000000 65 +schedul 0 119 2.079442 0.000000 85 +compil 0 122 2.079442 0.000000 96 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +world 1 115 2.197225 2.197225 126 +make 0 111 2.197225 0.000000 120 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +advanc 0 99 2.302585 0.000000 130 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +activ 0 84 2.484907 0.000000 182 +info 0 85 2.484907 0.000000 176 +refer 1 78 2.564949 2.564949 203 +state 0 76 2.564949 0.000000 207 +server 0 76 2.564949 0.000000 204 +free 0 73 2.639057 0.000000 224 +intellig 0 72 2.639057 0.000000 225 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +prof 1 64 2.772589 2.772589 273 +artifici 0 63 2.772589 0.000000 280 +experi 0 64 2.772589 0.000000 283 +virtual 0 62 2.772589 0.000000 285 +guid 0 63 2.772589 0.000000 267 +taylor 0 63 2.772589 0.000000 287 +right 0 48 3.044522 0.000000 363 +friend 0 48 3.044522 0.000000 376 +algebra 0 45 3.135494 0.000000 394 +press 0 42 3.218876 0.000000 419 +linear 0 41 3.218876 0.000000 431 +map 0 39 3.258097 0.000000 452 +express 0 32 3.465736 0.000000 540 +robert 0 30 3.555348 0.000000 567 +weather 0 28 3.610918 0.000000 618 +packag 0 28 3.610918 0.000000 614 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +repres 0 26 3.688879 0.000000 656 +jeff 0 25 3.737670 0.000000 673 +hill 0 25 3.737670 0.000000 670 +yahoo 0 24 3.761200 0.000000 707 +famili 0 23 3.806662 0.000000 735 +miscellan 0 23 3.806662 0.000000 731 +unit 0 21 3.912023 0.000000 779 +lyco 0 19 4.007333 0.000000 871 +exercis 0 19 4.007333 0.000000 842 +hypertext 0 19 4.007333 0.000000 865 +boston 0 19 4.007333 0.000000 862 +white 0 17 4.110874 0.000000 951 +sign 0 16 4.174387 0.000000 970 +hotlist 0 13 4.382027 0.000000 1199 +entertain 0 12 4.465908 0.000000 1286 +america 0 11 4.553877 0.000000 1370 +consortium 0 10 4.653960 0.000000 1467 +swim 1 9 4.753590 4.753590 1599 +yellow 1 9 4.753590 4.753590 1601 +congress 0 9 4.753590 0.000000 1592 +respect 0 9 4.753590 0.000000 1545 +establish 0 9 4.753590 0.000000 1532 +govern 0 9 4.753590 0.000000 1581 +calvin 0 9 4.753590 0.000000 1518 +yanni 0 8 4.875197 0.000000 1713 +hockei 0 8 4.875197 0.000000 1760 +opinion 0 8 4.875197 0.000000 1708 +chronicl 0 7 5.010635 0.000000 1952 +necessarili 0 7 5.010635 0.000000 1899 +altavista 0 6 5.164786 0.000000 2222 +constitut 0 6 5.164786 0.000000 2026 +quick 0 6 5.164786 0.000000 2184 +andrea 0 5 5.347108 0.000000 2375 +shall 0 3 5.857933 0.000000 3891 +abridg 0 3 5.857933 0.000000 3772 +freedom 0 3 5.857933 0.000000 3890 +plapack 0 3 5.857933 0.000000 3849 +health 0 3 5.857933 0.000000 3787 +concert 0 3 5.857933 0.000000 3533 +thereof 0 2 6.263398 0.000000 5484 +geijn 0 2 6.263398 0.000000 5715 +musician 0 2 6.263398 0.000000 5718 +hamilton 0 2 6.263398 0.000000 5719 +guyer 0 2 6.263398 0.000000 4171 +northwestern 0 2 6.263398 0.000000 5502 +nate 0 2 6.263398 0.000000 5720 +dell 0 2 6.263398 0.000000 4193 +fring 0 2 6.263398 0.000000 5721 +sammi 1 1 6.957497 6.957497 14088 +startingpoint 0 1 6.957497 0.000000 14089 +religion 0 1 6.957497 0.000000 14090 +orprohibit 0 1 6.957497 0.000000 14091 +ofspeech 0 1 6.957497 0.000000 14092 +peaceabl 0 1 6.957497 0.000000 14093 +toassembl 0 1 6.957497 0.000000 14094 +petit 0 1 6.957497 0.000000 14095 +redress 0 1 6.957497 0.000000 14096 +grievanc 0 1 6.957497 0.000000 14097 +herbarium 0 1 6.957497 0.000000 14098 +anagram 0 1 6.957497 0.000000 14099 +nil 0 1 6.957497 0.000000 14100 +reker 0 1 6.957497 0.000000 14101 +pop 0 1 6.957497 0.000000 14102 +anthropolog 0 1 6.957497 0.000000 14103 +kate 0 1 6.957497 0.000000 14104 +showbiz 0 1 6.957497 0.000000 14105 +pollstar 0 1 6.957497 0.000000 14106 +ryder 0 1 6.957497 0.000000 14107 +laptop 0 1 6.957497 0.000000 14108 +traveloc 0 1 6.957497 0.000000 14109 +eduth 0 1 6.957497 0.000000 14110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..9433e5e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 0 299 1.098612 0.000000 13 +time 0 293 1.098612 0.000000 17 +mail 1 238 1.386294 1.386294 22 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +utexa 0 189 1.609438 0.000000 44 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +file 1 132 1.945910 1.945910 70 +hall 0 146 1.945910 0.000000 65 +report 0 131 2.079442 0.000000 92 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +method 1 80 2.564949 2.564949 213 +logic 0 71 2.639057 0.000000 230 +main 0 67 2.708050 0.000000 256 +taylor 0 63 2.772589 0.000000 287 +abstract 0 62 2.772589 0.000000 276 +frequent 0 49 3.044522 0.000000 367 +around 0 43 3.178054 0.000000 415 +edit 0 42 3.218876 0.000000 418 +formal 1 37 3.332205 3.332205 478 +dissert 0 32 3.465736 0.000000 549 +common 0 30 3.555348 0.000000 574 +propos 1 28 3.610918 3.610918 602 +univ 0 28 3.610918 0.000000 617 +fellow 0 24 3.761200 0.000000 701 +lisp 0 18 4.060443 0.000000 897 +boyer 0 6 5.164786 0.000000 2013 +sawada 0 3 5.857933 0.000000 3190 +oral 0 3 5.857933 0.000000 3189 +teacher 0 3 5.857933 0.000000 3892 +supplementari 0 2 6.263398 0.000000 4752 +bowen 0 2 6.263398 0.000000 4170 +sawadajun 0 1 6.957497 0.000000 14111 +sawadacontact 0 1 6.957497 0.000000 14112 +wooten 0 1 6.957497 0.000000 14113 +kbresourc 0 1 6.957497 0.000000 14114 +pvsother 0 1 6.957497 0.000000 14115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..95d9313a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +softwar 2 220 1.386294 2.772588 30 +gener 2 220 1.386294 2.772588 27 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +avail 0 169 1.791759 0.000000 48 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +tool 0 117 2.079442 0.000000 93 +databas 0 122 2.079442 0.000000 86 +postscript 0 131 2.079442 0.000000 90 +look 1 107 2.197225 2.197225 115 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +specif 0 106 2.197225 0.000000 106 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +take 0 97 2.302585 0.000000 134 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +mani 1 92 2.397895 2.397895 150 +question 0 91 2.397895 0.000000 141 +follow 0 92 2.397895 0.000000 143 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +contain 0 81 2.484907 0.000000 174 +decemb 1 80 2.564949 2.564949 215 +orient 0 80 2.564949 0.000000 205 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +order 1 69 2.708050 2.708050 249 +differ 0 66 2.708050 0.000000 253 +simul 0 66 2.708050 0.000000 255 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +visit 0 63 2.772589 0.000000 288 +creat 0 63 2.772589 0.000000 277 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +index 0 56 2.890372 0.000000 309 +overview 0 56 2.890372 0.000000 323 +basic 1 50 3.044522 3.044522 360 +approach 0 48 3.044522 0.000000 366 +get 1 46 3.091042 3.091042 380 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +term 0 43 3.178054 0.000000 411 +must 0 40 3.258097 0.000000 442 +tutori 0 39 3.258097 0.000000 437 +feel 0 37 3.332205 0.000000 483 +product 1 33 3.433987 3.433987 527 +independ 0 32 3.465736 0.000000 548 +transform 0 32 3.465736 0.000000 542 +titl 0 31 3.496508 0.000000 556 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +scale 0 28 3.610918 0.000000 613 +releas 0 28 3.610918 0.000000 616 +utc 0 27 3.637586 0.000000 629 +manipul 0 27 3.637586 0.000000 624 +subject 0 26 3.688879 0.000000 647 +pattern 1 24 3.761200 3.761200 689 +demonstr 0 24 3.761200 0.000000 694 +scalabl 0 24 3.761200 0.000000 705 +equat 0 23 3.806662 0.000000 724 +compress 0 23 3.806662 0.000000 719 +defin 0 22 3.850148 0.000000 746 +recommend 0 22 3.850148 0.000000 737 +period 0 22 3.850148 0.000000 743 +thu 0 21 3.912023 0.000000 773 +reflect 0 15 4.248495 0.000000 1034 +goe 0 15 4.248495 0.000000 1044 +composit 0 13 4.382027 0.000000 1150 +dbm 0 13 4.382027 0.000000 1136 +assembl 0 12 4.465908 0.000000 1207 +refin 1 11 4.553877 4.553877 1363 +pagewelcom 0 11 4.553877 0.000000 1344 +typic 0 11 4.553877 0.000000 1360 +valid 0 11 4.553877 0.000000 1299 +evolut 0 11 4.553877 0.000000 1314 +modul 0 10 4.653960 0.000000 1434 +relationship 0 10 4.653960 0.000000 1383 +bart 0 9 4.753590 0.000000 1559 +reus 0 8 4.875197 0.000000 1661 +successfulli 0 7 5.010635 0.000000 1869 +beyond 0 7 5.010635 0.000000 1834 +deliv 0 6 5.164786 0.000000 2070 +batori 1 4 5.568345 5.568345 2690 +avion 0 4 5.568345 0.000000 3018 +substanti 0 4 5.568345 0.000000 2921 +metadata 0 4 5.568345 0.000000 2945 +breadth 0 4 5.568345 0.000000 2695 +interchang 0 3 5.857933 0.000000 3893 +tokuda 0 3 5.857933 0.000000 3266 +smaragdaki 0 3 5.857933 0.000000 3851 +lightweight 0 3 5.857933 0.000000 3234 +encapsul 1 2 6.263398 6.263398 5541 +reusabl 0 2 6.263398 0.000000 4218 +marti 0 2 6.263398 0.000000 5679 +genvoca 1 1 6.957497 6.957497 14116 +ssgrg 0 1 6.957497 0.000000 14117 +professorangela 0 1 6.957497 0.000000 14118 +dappert 0 1 6.957497 0.000000 14119 +studentguillermo 0 1 6.957497 0.000000 14120 +jimenez 0 1 6.957497 0.000000 14121 +perezph 0 1 6.957497 0.000000 14122 +studentjeff 0 1 6.957497 0.000000 14123 +thomasph 0 1 6.957497 0.000000 14124 +studentl 0 1 6.957497 0.000000 14125 +studentyanni 0 1 6.957497 0.000000 14126 +studentk 0 1 6.957497 0.000000 14127 +shepherdresearch 0 1 6.957497 0.000000 14128 +associateform 0 1 6.957497 0.000000 14129 +datesdinesh 0 1 6.957497 0.000000 14130 +dasph 0 1 6.957497 0.000000 14131 +milli 0 1 6.957497 0.000000 14132 +villarrealph 0 1 6.957497 0.000000 14133 +geracipostdoc 0 1 6.957497 0.000000 14134 +sirkinph 0 1 6.957497 0.000000 14135 +sankar 0 1 6.957497 0.000000 14136 +dasarim 0 1 6.957497 0.000000 14137 +starter 0 1 6.957497 0.000000 14138 +reengin 0 1 6.957497 0.000000 14139 +generatorsautom 0 1 6.957497 0.000000 14140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..c77b5929 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +gener 0 220 1.386294 0.000000 27 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +utexa 0 189 1.609438 0.000000 44 +includ 0 208 1.609438 0.000000 42 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +like 1 132 1.945910 1.945910 81 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +machin 0 129 2.079442 0.000000 95 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +check 1 115 2.197225 2.197225 118 +instructor 0 108 2.197225 0.000000 107 +well 0 109 2.197225 0.000000 121 +send 0 114 2.197225 0.000000 109 +peopl 1 96 2.302585 2.302585 132 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +stuff 1 87 2.484907 2.484907 171 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +want 1 79 2.564949 2.564949 199 +orient 1 80 2.564949 2.564949 205 +good 0 77 2.564949 0.000000 200 +name 0 72 2.639057 0.000000 220 +free 0 73 2.639057 0.000000 224 +differ 0 66 2.708050 0.000000 253 +would 0 67 2.708050 0.000000 251 +order 0 69 2.708050 0.000000 249 +guid 0 63 2.772589 0.000000 267 +import 0 65 2.772589 0.000000 282 +colleg 1 61 2.833213 2.833213 300 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +think 1 57 2.890372 2.890372 314 +summer 0 56 2.890372 0.000000 311 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +maintain 0 51 2.995732 0.000000 342 +basic 1 50 3.044522 3.044522 360 +right 1 48 3.044522 3.044522 363 +cool 0 49 3.044522 0.000000 374 +without 0 50 3.044522 0.000000 370 +still 0 50 3.044522 0.000000 362 +could 0 46 3.091042 0.000000 383 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +futur 0 41 3.218876 0.000000 427 +might 0 41 3.218876 0.000000 426 +realli 1 40 3.258097 3.258097 444 +littl 1 39 3.258097 3.258097 454 +probabl 0 40 3.258097 0.000000 455 +mean 0 37 3.332205 0.000000 477 +expect 0 37 3.332205 0.000000 484 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +within 0 33 3.433987 0.000000 525 +kind 0 32 3.465736 0.000000 541 +actual 0 28 3.610918 0.000000 604 +load 0 28 3.610918 0.000000 601 +utc 0 27 3.637586 0.000000 629 +great 0 27 3.637586 0.000000 626 +linux 0 27 3.637586 0.000000 631 +spent 1 25 3.737670 3.737670 676 +miscellan 0 23 3.806662 0.000000 731 +try 1 22 3.850148 3.850148 764 +finish 0 22 3.850148 0.000000 748 +dai 0 22 3.850148 0.000000 753 +longer 0 20 3.951244 0.000000 816 +sure 0 20 3.951244 0.000000 813 +minut 0 20 3.951244 0.000000 810 +spend 0 19 4.007333 0.000000 850 +item 0 19 4.007333 0.000000 856 +five 0 19 4.007333 0.000000 841 +scott 0 18 4.060443 0.000000 884 +hobbi 0 16 4.174387 0.000000 1009 +doesn 1 15 4.248495 4.248495 1055 +wait 0 13 4.382027 0.000000 1168 +neat 1 12 4.465908 4.465908 1263 +appl 0 11 4.553877 0.000000 1303 +noth 0 11 4.553877 0.000000 1328 +santa 0 10 4.653960 0.000000 1441 +didn 0 9 4.753590 0.000000 1563 +oop 0 8 4.875197 0.000000 1778 +forget 0 8 4.875197 0.000000 1712 +perfect 0 7 5.010635 0.000000 1921 +pageth 0 7 5.010635 0.000000 1939 +encrypt 0 7 5.010635 0.000000 1835 +squash 0 6 5.164786 0.000000 2223 +beer 0 6 5.164786 0.000000 2216 +sharp 0 6 5.164786 0.000000 2100 +amherst 1 5 5.347108 5.347108 2484 +humor 0 5 5.347108 0.000000 2533 +amaz 1 4 5.568345 5.568345 2600 +pagescott 0 4 5.568345 0.000000 2978 +slight 0 3 5.857933 0.000000 3894 +glenn 0 3 5.857933 0.000000 3869 +down 0 3 5.857933 0.000000 3870 +wine 0 3 5.857933 0.000000 3895 +maker 0 3 5.857933 0.000000 3164 +dine 0 3 5.857933 0.000000 3472 +citizen 0 3 5.857933 0.000000 3238 +iici 0 3 5.857933 0.000000 3436 +bright 0 3 5.857933 0.000000 3596 +fanci 0 2 6.263398 0.000000 4992 +unpredict 0 2 6.263398 0.000000 5722 +stuffit 0 2 6.263398 0.000000 4127 +invalu 0 2 6.263398 0.000000 4680 +forev 0 2 6.263398 0.000000 5636 +grab 0 2 6.263398 0.000000 5723 +pageokai 0 1 6.957497 0.000000 14141 +overdu 0 1 6.957497 0.000000 14142 +mead 0 1 6.957497 0.000000 14143 +psion 0 1 6.957497 0.000000 14144 +palmtop 0 1 6.957497 0.000000 14145 +anastasi 0 1 6.957497 0.000000 14146 +poke 0 1 6.957497 0.000000 14147 +ala 0 1 6.957497 0.000000 14148 +bebox 0 1 6.957497 0.000000 14149 +sfkaplan 0 1 6.957497 0.000000 14150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..818df75c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +build 0 85 2.484907 0.000000 184 +onlin 0 75 2.639057 0.000000 223 +main 1 67 2.708050 2.708050 256 +multimedia 0 68 2.708050 0.000000 258 +finger 0 52 2.995732 0.000000 354 +get 0 46 3.091042 0.000000 380 +log 0 19 4.007333 0.000000 857 +floor 0 14 4.317488 0.000000 1070 +touch 0 12 4.465908 0.000000 1288 +river 0 6 5.164786 0.000000 2220 +shenoi 1 3 5.857933 5.857933 3269 +tower 0 3 5.857933 0.000000 3818 +prashant 1 2 6.263398 6.263398 4331 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..2391db69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +final 0 116 2.197225 0.000000 108 +make 0 111 2.197225 0.000000 120 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +tabl 0 51 2.995732 0.000000 346 +finger 0 52 2.995732 0.000000 354 +campu 0 27 3.637586 0.000000 623 +vlsi 0 21 3.912023 0.000000 795 +citi 0 19 4.007333 0.000000 874 +vallei 0 7 5.010635 0.000000 1959 +ongo 0 6 5.164786 0.000000 2215 +coffe 0 5 5.347108 0.000000 2556 +pleasant 0 3 5.857933 0.000000 3825 +bookshelf 0 2 6.263398 0.000000 5724 +shaob 1 1 6.957497 6.957497 14151 +cyberhom 1 1 6.957497 6.957497 14152 +hardvar 0 1 6.957497 0.000000 14153 +verifc 0 1 6.957497 0.000000 14154 +shma 0 1 6.957497 0.000000 14155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..db0a943f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +object 0 138 1.945910 0.000000 79 +machin 1 129 2.079442 2.079442 95 +confer 0 126 2.079442 0.000000 100 +tool 0 117 2.079442 0.000000 93 +mathemat 0 108 2.197225 0.000000 123 +proceed 0 93 2.397895 0.000000 152 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +dynam 0 76 2.564949 0.000000 194 +intellig 0 72 2.639057 0.000000 225 +logic 0 71 2.639057 0.000000 230 +line 0 75 2.639057 0.000000 231 +appli 0 71 2.639057 0.000000 226 +symposium 0 72 2.639057 0.000000 238 +artifici 1 63 2.772589 2.772589 280 +processor 0 54 2.944439 0.000000 335 +life 0 50 3.044522 0.000000 375 +adapt 0 46 3.091042 0.000000 387 +music 0 42 3.218876 0.000000 436 +annual 0 40 3.258097 0.000000 458 +india 0 32 3.465736 0.000000 550 +neural 1 30 3.555348 3.555348 578 +qualiti 0 20 3.951244 0.000000 832 +massiv 0 15 4.248495 0.000000 1026 +nonlinear 0 14 4.317488 0.000000 1107 +affili 0 13 4.382027 0.000000 1194 +automata 0 13 4.382027 0.000000 1135 +avenu 0 12 4.465908 0.000000 1277 +itali 0 11 4.553877 0.000000 1378 +evolut 0 11 4.553877 0.000000 1314 +genet 1 10 4.653960 4.653960 1409 +kumar 1 9 4.753590 4.753590 1506 +risto 0 9 4.753590 0.000000 1523 +chao 0 8 4.875197 0.000000 1753 +miikkulainen 0 8 4.875197 0.000000 1667 +signal 0 7 5.010635 0.000000 1910 +edumi 0 6 5.164786 0.000000 2132 +cellular 0 5 5.347108 0.000000 2433 +dual 0 5 5.347108 0.000000 2522 +austindepart 0 4 5.568345 0.000000 3008 +reinforc 0 4 5.568345 0.000000 2674 +snail 0 4 5.568345 0.000000 2916 +sciencestaylor 0 3 5.857933 0.000000 3814 +patrick 0 3 5.857933 0.000000 3334 +shailesh 1 2 6.263398 6.263398 5578 +fuzzi 0 2 6.263398 0.000000 5423 +publicationson 0 2 6.263398 0.000000 4899 +singh 0 2 6.263398 0.000000 5675 +kumarshailesh 0 1 6.957497 0.000000 14156 +kumarth 0 1 6.957497 0.000000 14157 +skumar 0 1 6.957497 0.000000 14158 +resumeresearch 0 1 6.957497 0.000000 14159 +publicationscontact 0 1 6.957497 0.000000 14160 +mesrcm 0 1 6.957497 0.000000 14161 +spiritu 0 1 6.957497 0.000000 14162 +offersom 0 1 6.957497 0.000000 14163 +linkscognit 0 1 6.957497 0.000000 14164 +scienceutc 0 1 6.957497 0.000000 14165 +researchutc 0 1 6.957497 0.000000 14166 +groupresearch 0 1 6.957497 0.000000 14167 +neuroevolut 0 1 6.957497 0.000000 14168 +predistort 0 1 6.957497 0.000000 14169 +goetz 0 1 6.957497 0.000000 14170 +bari 0 1 6.957497 0.000000 14171 +bord 0 1 6.957497 0.000000 14172 +aprl 0 1 6.957497 0.000000 14173 +whiti 0 1 6.957497 0.000000 14174 +offernet 0 1 6.957497 0.000000 14175 +assistancesearch 0 1 6.957497 0.000000 14176 +institutewww 0 1 6.957497 0.000000 14177 +infoindia 0 1 6.957497 0.000000 14178 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..a816b29b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +applic 0 170 1.791759 0.000000 56 +texa 0 160 1.791759 0.000000 64 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +make 0 111 2.197225 0.000000 120 +world 0 115 2.197225 0.000000 126 +main 0 67 2.708050 0.000000 256 +plan 0 65 2.772589 0.000000 272 +favorit 0 44 3.135494 0.000000 410 +posit 0 31 3.496508 0.000000 552 +someth 0 31 3.496508 0.000000 554 +photo 0 31 3.496508 0.000000 561 +turn 0 29 3.583519 0.000000 586 +utc 1 27 3.637586 3.637586 629 +lead 0 23 3.806662 0.000000 718 +scheme 0 20 3.951244 0.000000 818 +particularli 0 19 4.007333 0.000000 867 +success 0 10 4.653960 0.000000 1390 +meta 0 9 4.753590 0.000000 1505 +yanni 1 8 4.875197 4.875197 1713 +gold 0 8 4.875197 0.000000 1745 +dictionari 0 8 4.875197 0.000000 1642 +moder 0 6 5.164786 0.000000 2112 +arrang 0 6 5.164786 0.000000 2023 +webster 0 5 5.347108 0.000000 2468 +album 0 4 5.568345 0.000000 2888 +smaragdaki 1 3 5.857933 5.857933 3851 +serious 0 3 5.857933 0.000000 3663 +alchemi 0 1 6.957497 0.000000 14179 +sitessmaragd 0 1 6.957497 0.000000 14180 +eduyanni 0 1 6.957497 0.000000 14181 +smaragdakisunivers 0 1 6.957497 0.000000 14182 +departmenttai 0 1 6.957497 0.000000 14183 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..c92c3ce8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 0 384 0.693147 0.000000 11 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +fall 0 181 1.609438 0.000000 40 +utexa 0 189 1.609438 0.000000 44 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +like 1 132 1.945910 1.945910 81 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +note 0 142 1.945910 0.000000 67 +pleas 1 113 2.197225 2.197225 114 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +send 0 114 2.197225 0.000000 109 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +center 0 88 2.397895 0.000000 158 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +addit 0 74 2.639057 0.000000 228 +experi 0 64 2.772589 0.000000 283 +previou 0 62 2.772589 0.000000 290 +life 0 50 3.044522 0.000000 375 +visitor 0 49 3.044522 0.000000 371 +mean 1 37 3.332205 3.332205 477 +field 0 37 3.332205 0.000000 482 +effort 0 26 3.688879 0.000000 652 +tell 0 21 3.912023 0.000000 777 +basi 0 20 3.951244 0.000000 828 +ever 0 19 4.007333 0.000000 872 +wind 0 18 4.060443 0.000000 908 +stream 0 15 4.248495 0.000000 1015 +trip 0 14 4.317488 0.000000 1113 +bodi 0 13 4.382027 0.000000 1178 +danc 0 12 4.465908 0.000000 1278 +duli 0 12 4.465908 0.000000 1248 +absolut 0 8 4.875197 0.000000 1646 +wouldn 0 7 5.010635 0.000000 1970 +edward 0 6 5.164786 0.000000 2050 +greatest 0 6 5.164786 0.000000 2073 +ignor 0 5 5.347108 0.000000 2288 +shadow 0 3 5.857933 0.000000 3519 +haiku 0 3 5.857933 0.000000 3811 +eddi 0 3 5.857933 0.000000 3896 +danger 0 2 6.263398 0.000000 5725 +strictli 0 2 6.263398 0.000000 5726 +stimul 0 2 6.263398 0.000000 5712 +minion 1 1 6.957497 6.957497 14184 +asphalt 0 1 6.957497 0.000000 14185 +moonlight 0 1 6.957497 0.000000 14186 +nerv 0 1 6.957497 0.000000 14187 +dy 0 1 6.957497 0.000000 14188 +pania 0 1 6.957497 0.000000 14189 +leaf 0 1 6.957497 0.000000 14190 +afloat 0 1 6.957497 0.000000 14191 +waterfal 0 1 6.957497 0.000000 14192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..e160177a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +email 1 220 1.386294 1.386294 29 +mail 0 238 1.386294 0.000000 22 +utexa 1 189 1.609438 1.609438 44 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +site 0 106 2.197225 0.000000 119 +search 1 95 2.397895 2.397895 155 +member 0 84 2.484907 0.000000 165 +knowledg 0 67 2.708050 0.000000 243 +tech 0 35 3.401197 0.000000 515 +hotlist 0 13 4.382027 0.000000 1199 +souther 0 3 5.857933 0.000000 3795 +southerart 0 1 6.957497 0.000000 14193 +southerresearchbuild 0 1 6.957497 0.000000 14194 +reportsouth 0 1 6.957497 0.000000 14195 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..f8860467 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +area 0 144 1.945910 0.000000 80 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +theori 0 111 2.197225 0.000000 127 +techniqu 0 99 2.302585 0.000000 138 +learn 1 86 2.484907 2.484907 170 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +exampl 0 77 2.564949 0.000000 195 +resum 0 79 2.564949 0.000000 217 +intellig 0 72 2.639057 0.000000 225 +appli 0 71 2.639057 0.000000 226 +creat 0 63 2.772589 0.000000 277 +taylor 0 63 2.772589 0.000000 287 +approach 0 48 3.044522 0.000000 366 +field 0 37 3.332205 0.000000 482 +tech 0 35 3.401197 0.000000 515 +india 0 32 3.465736 0.000000 550 +postal 0 30 3.555348 0.000000 580 +symbol 0 27 3.637586 0.000000 620 +challeng 0 26 3.688879 0.000000 653 +revis 0 26 3.688879 0.000000 640 +variabl 0 23 3.806662 0.000000 715 +indian 0 22 3.850148 0.000000 769 +madra 0 8 4.875197 0.000000 1770 +connectionist 0 5 5.347108 0.000000 2430 +sowmya 1 4 5.568345 5.568345 2670 +bayesian 0 4 5.568345 0.000000 2671 +groupunivers 0 3 5.857933 0.000000 3831 +multimediaappl 0 3 5.857933 0.000000 3274 +rutger 0 3 5.857933 0.000000 3566 +austinresearchmi 0 2 6.263398 0.000000 5644 +ramachandransowmya 0 1 6.957497 0.000000 14196 +ramachandranmachin 0 1 6.957497 0.000000 14197 +ofartif 0 1 6.957497 0.000000 14198 +learningbayesian 0 1 6.957497 0.000000 14199 +withhidden 0 1 6.957497 0.000000 14200 +thisproblem 0 1 6.957497 0.000000 14201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..9beba49a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +design 0 213 1.386294 0.000000 25 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +implement 0 152 1.791759 0.000000 52 +click 1 142 1.945910 1.945910 78 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +comment 0 93 2.397895 0.000000 146 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +multimedia 0 68 2.708050 0.000000 258 +prof 0 64 2.772589 0.000000 273 +locat 0 59 2.833213 0.000000 303 +advisor 0 51 2.995732 0.000000 355 +hill 0 25 3.737670 0.000000 670 +countri 0 15 4.248495 0.000000 1059 +central 0 13 4.382027 0.000000 1160 +herefor 0 9 4.753590 0.000000 1483 +informationabout 0 9 4.753590 0.000000 1515 +austinaustin 0 7 5.010635 0.000000 1966 +capit 0 7 5.010635 0.000000 1957 +sciencesdepart 0 6 5.164786 0.000000 2020 +isth 0 5 5.347108 0.000000 2532 +edudepart 0 3 5.857933 0.000000 3302 +sriram 1 2 6.263398 6.263398 4550 +multimediai 0 2 6.263398 0.000000 4337 +raocurr 0 1 6.957497 0.000000 14202 +systemoper 0 1 6.957497 0.000000 14203 +multimediagroup 0 1 6.957497 0.000000 14204 +harrickvinpublicationsminegroupcontact 0 1 6.957497 0.000000 14205 +informationofficetai 0 1 6.957497 0.000000 14206 +miscellaneousotherinterest 0 1 6.957497 0.000000 14207 +pagespicturesof 0 1 6.957497 0.000000 14208 +toweraustin 0 1 6.957497 0.000000 14209 +kannada 0 1 6.957497 0.000000 14210 +koota 0 1 6.957497 0.000000 14211 +tamil 0 1 6.957497 0.000000 14212 +sangam 0 1 6.957497 0.000000 14213 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..ebcbb818 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +tiger 0 3 5.857933 0.000000 3897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..97554fe9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +click 1 142 1.945910 1.945910 78 +construct 0 139 1.945910 0.000000 82 +machin 0 129 2.079442 0.000000 95 +assist 0 112 2.197225 0.000000 113 +present 0 91 2.397895 0.000000 145 +real 0 93 2.397895 0.000000 144 +educ 0 86 2.484907 0.000000 191 +master 0 76 2.564949 0.000000 216 +nation 0 74 2.639057 0.000000 240 +august 0 66 2.708050 0.000000 257 +experi 0 64 2.772589 0.000000 283 +prof 0 64 2.772589 0.000000 273 +septemb 0 65 2.772589 0.000000 274 +finger 0 52 2.995732 0.000000 354 +author 0 39 3.258097 0.000000 450 +utc 1 27 3.637586 3.637586 629 +administr 0 27 3.637586 0.000000 628 +log 0 19 4.007333 0.000000 857 +edulast 0 17 4.110874 0.000000 927 +chemic 0 5 5.347108 0.000000 2552 +korea 0 4 5.568345 0.000000 2971 +seoul 1 3 5.857933 5.857933 3783 +aloysiu 0 3 5.857933 0.000000 3829 +choiwelcom 0 2 6.263398 0.000000 5727 +sunghe 1 1 6.957497 6.957497 14214 +choisunghe 0 1 6.957497 0.000000 14215 +nuec 0 1 6.957497 0.000000 14216 +choiemail 0 1 6.957497 0.000000 14217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..edd2b12d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +implement 0 152 1.791759 0.000000 52 +hall 1 146 1.945910 1.945910 65 +support 0 132 1.945910 0.000000 83 +provid 0 121 2.079442 0.000000 94 +postscript 0 131 2.079442 0.000000 90 +pleas 0 113 2.197225 0.000000 114 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +call 1 91 2.397895 2.397895 153 +center 0 88 2.397895 0.000000 158 +member 0 84 2.484907 0.000000 165 +larg 0 82 2.484907 0.000000 168 +effici 0 73 2.639057 0.000000 233 +taylor 1 63 2.772589 2.772589 287 +descript 0 64 2.772589 0.000000 271 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +best 0 59 2.833213 0.000000 299 +finger 0 52 2.995732 0.000000 354 +hardwar 0 51 2.995732 0.000000 350 +pointer 0 48 3.044522 0.000000 368 +standard 0 48 3.044522 0.000000 365 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +full 0 28 3.610918 0.000000 615 +reach 0 24 3.761200 0.000000 688 +inth 0 22 3.850148 0.000000 741 +along 0 18 4.060443 0.000000 878 +easi 0 16 4.174387 0.000000 969 +brief 0 16 4.174387 0.000000 1001 +novel 0 15 4.248495 0.000000 1039 +persist 1 11 4.553877 4.553877 1367 +motorola 0 9 4.753590 0.000000 1546 +oop 0 8 4.875197 0.000000 1778 +myresum 0 6 5.164786 0.000000 2199 +informationi 0 3 5.857933 0.000000 3871 +swizzl 0 3 5.857933 0.000000 3883 +sheetal 1 2 6.263398 6.263398 5684 +isvia 0 2 6.263398 0.000000 5637 +mypubl 0 2 6.263398 0.000000 5707 +somerset 0 2 6.263398 0.000000 5639 +kakkad 0 2 6.263398 0.000000 5685 +kakkadsheet 0 1 6.957497 0.000000 14218 +kakkadcontact 0 1 6.957497 0.000000 14219 +storagesystem 0 1 6.957497 0.000000 14220 +faulttim 0 1 6.957497 0.000000 14221 +whilefinish 0 1 6.957497 0.000000 14222 +svkakkad 0 1 6.957497 0.000000 14223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..675e6cd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +check 0 115 2.197225 0.000000 118 +comment 0 93 2.397895 0.000000 146 +octob 0 89 2.397895 0.000000 156 +school 0 84 2.484907 0.000000 188 +want 0 79 2.564949 0.000000 199 +know 0 80 2.564949 0.000000 198 +suggest 0 53 2.944439 0.000000 331 +finger 0 52 2.995732 0.000000 354 +china 0 37 3.332205 0.000000 487 +chines 1 29 3.583519 3.583519 595 +art 0 29 3.583519 0.000000 593 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +todai 0 25 3.737670 0.000000 672 +highli 0 23 3.806662 0.000000 725 +voic 0 21 3.912023 0.000000 806 +item 0 19 4.007333 0.000000 856 +medic 0 17 4.110874 0.000000 958 +academi 0 8 4.875197 0.000000 1735 +scholar 0 6 5.164786 0.000000 2180 +appreci 0 5 5.347108 0.000000 2374 +sale 0 3 5.857933 0.000000 3688 +meyour 0 3 5.857933 0.000000 3858 +registrar 0 2 6.263398 0.000000 5611 +gradaut 0 2 6.263398 0.000000 5612 +studiesut 0 2 6.263398 0.000000 5613 +novelschines 0 2 6.263398 0.000000 5610 +visitorsinc 0 2 6.263398 0.000000 5616 +shengm 1 1 6.957497 6.957497 14224 +homepageabout 0 1 6.957497 0.000000 14225 +classmatesclass 0 1 6.957497 0.000000 14226 +ustc 0 1 6.957497 0.000000 14227 +sciencesus 0 1 6.957497 0.000000 14228 +linksut 0 1 6.957497 0.000000 14229 +libraryutaccesschines 0 1 6.957497 0.000000 14230 +associationchina 0 1 6.957497 0.000000 14231 +chinesechinainternet 0 1 6.957497 0.000000 14232 +magazinestsinghua 0 1 6.957497 0.000000 14233 +bbsncic 0 1 6.957497 0.000000 14234 +bbschines 0 1 6.957497 0.000000 14235 +classicsabout 0 1 6.957497 0.000000 14236 +austinwhat 0 1 6.957497 0.000000 14237 +citylimitsclassifi 0 1 6.957497 0.000000 14238 +austinto 0 1 6.957497 0.000000 14239 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..d37012f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +frame 1 24 3.761200 3.761200 684 +wang 0 21 3.912023 0.000000 790 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..fafc6052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..0e1b1c8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +site 0 106 2.197225 0.000000 119 +send 0 114 2.197225 0.000000 109 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +name 0 72 2.639057 0.000000 220 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +plai 0 60 2.833213 0.000000 307 +done 0 47 3.091042 0.000000 381 +option 0 30 3.555348 0.000000 575 +watson 0 8 4.875197 0.000000 1691 +bore 0 7 5.010635 0.000000 1948 +internship 0 3 5.857933 0.000000 3764 +renu 0 1 6.957497 0.000000 14240 +tewarirenu 0 1 6.957497 0.000000 14241 +tewariwhat 0 1 6.957497 0.000000 14242 +addresshom 0 1 6.957497 0.000000 14243 +tewari 0 1 6.957497 0.000000 14244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..4af96528 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +email 0 220 1.386294 0.000000 29 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +updat 0 191 1.609438 0.000000 41 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +austin 0 168 1.791759 0.000000 63 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +studi 0 120 2.079442 0.000000 91 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +stuff 0 87 2.484907 0.000000 171 +resourc 0 81 2.484907 0.000000 172 +come 0 78 2.564949 0.000000 202 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +messag 0 76 2.564949 0.000000 212 +state 0 76 2.564949 0.000000 207 +resum 0 79 2.564949 0.000000 217 +logic 0 71 2.639057 0.000000 230 +addit 0 74 2.639057 0.000000 228 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +laboratori 0 63 2.772589 0.000000 292 +taylor 0 63 2.772589 0.000000 287 +investig 0 51 2.995732 0.000000 353 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +formal 1 37 3.332205 3.332205 478 +mean 0 37 3.332205 0.000000 477 +soon 0 36 3.367296 0.000000 494 +random 0 34 3.401197 0.000000 511 +photo 0 31 3.496508 0.000000 561 +secur 0 30 3.555348 0.000000 577 +postal 0 30 3.555348 0.000000 580 +client 0 25 3.737670 0.000000 679 +synthesi 1 20 3.951244 3.951244 834 +verif 0 20 3.951244 0.000000 826 +analyz 0 17 4.110874 0.000000 925 +draft 0 14 4.317488 0.000000 1085 +finit 0 14 4.317488 0.000000 1106 +stai 0 12 4.465908 0.000000 1215 +tune 0 12 4.465908 0.000000 1227 +genet 0 10 4.653960 0.000000 1409 +metric 0 7 5.010635 0.000000 1831 +photographi 0 6 5.164786 0.000000 2146 +pierc 1 4 5.568345 5.568345 2623 +queu 0 4 5.568345 0.000000 2648 +blvd 0 4 5.568345 0.000000 3007 +evolutionari 0 3 5.857933 0.000000 3898 +amwork 0 2 6.263398 0.000000 4850 +communicatewith 0 2 6.263398 0.000000 5062 +tumlin 1 1 6.957497 6.957497 14245 +brenda 0 1 6.957497 0.000000 14246 +ladd 0 1 6.957497 0.000000 14247 +authenticationprotocol 0 1 6.957497 0.000000 14248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..6bca5458 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +design 0 213 1.386294 0.000000 25 +fall 1 181 1.609438 1.609438 40 +oper 0 180 1.609438 0.000000 34 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +texa 0 160 1.791759 0.000000 64 +algorithm 0 162 1.791759 0.000000 57 +perform 1 143 1.945910 1.945910 74 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +postscript 0 131 2.079442 0.000000 90 +introduct 0 126 2.079442 0.000000 87 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +theori 1 111 2.197225 2.197225 127 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +associ 1 93 2.397895 2.397895 151 +homepag 0 93 2.397895 0.000000 148 +commun 0 95 2.397895 0.000000 157 +graphic 0 90 2.397895 0.000000 147 +present 0 91 2.397895 0.000000 145 +real 0 93 2.397895 0.000000 144 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +school 0 84 2.484907 0.000000 188 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +resum 0 79 2.564949 0.000000 217 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +html 0 75 2.639057 0.000000 235 +java 1 70 2.708050 2.708050 248 +view 0 70 2.708050 0.000000 254 +multimedia 0 68 2.708050 0.000000 258 +differ 0 66 2.708050 0.000000 253 +prof 2 64 2.772589 5.545178 273 +visit 1 63 2.772589 2.772589 288 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +plan 0 65 2.772589 0.000000 272 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +plai 0 60 2.833213 0.000000 307 +unix 1 58 2.890372 2.890372 308 +semest 0 58 2.890372 0.000000 312 +sampl 0 53 2.944439 0.000000 339 +maintain 0 51 2.995732 0.000000 342 +friend 1 48 3.044522 3.044522 376 +format 1 48 3.044522 3.044522 356 +life 0 50 3.044522 0.000000 375 +standard 0 48 3.044522 0.000000 365 +still 0 50 3.044522 0.000000 362 +visitor 0 49 3.044522 0.000000 371 +netscap 1 44 3.135494 3.135494 395 +anoth 0 45 3.135494 0.000000 408 +protocol 0 45 3.135494 0.000000 407 +made 0 44 3.135494 0.000000 398 +favorit 0 44 3.135494 0.000000 410 +compani 1 41 3.218876 3.218876 423 +music 0 42 3.218876 0.000000 436 +form 0 39 3.258097 0.000000 443 +movi 0 40 3.258097 0.000000 459 +tutori 0 39 3.258097 0.000000 437 +china 1 37 3.332205 3.332205 487 +robot 0 36 3.367296 0.000000 497 +copyright 0 36 3.367296 0.000000 495 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +kind 0 32 3.465736 0.000000 541 +anim 1 31 3.496508 3.496508 557 +compon 0 30 3.555348 0.000000 570 +chines 1 29 3.583519 3.583519 595 +synchron 0 29 3.583519 0.000000 588 +full 0 28 3.610918 0.000000 615 +quit 0 27 3.637586 0.000000 633 +never 0 25 3.737670 0.000000 671 +background 0 25 3.737670 0.000000 664 +reach 0 24 3.761200 0.000000 688 +mobil 0 23 3.806662 0.000000 730 +thank 0 23 3.806662 0.000000 721 +recommend 1 22 3.850148 3.850148 737 +wang 1 21 3.912023 3.912023 790 +viewer 1 21 3.912023 3.912023 787 +chen 0 21 3.912023 0.000000 791 +leav 0 21 3.912023 0.000000 772 +mpeg 1 20 3.951244 3.951244 831 +applet 0 20 3.951244 0.000000 827 +beij 0 19 4.007333 0.000000 876 +demo 1 18 4.060443 4.060443 888 +listen 0 18 4.060443 0.000000 907 +seek 0 17 4.110874 0.000000 954 +normal 0 16 4.174387 0.000000 995 +tsinghua 1 13 4.382027 4.382027 1195 +misc 0 13 4.382027 0.000000 1124 +clock 0 11 4.553877 0.000000 1320 +host 0 11 4.553877 0.000000 1306 +player 0 11 4.553877 0.000000 1371 +perl 0 11 4.553877 0.000000 1332 +lake 0 11 4.553877 0.000000 1373 +song 0 11 4.553877 0.000000 1380 +jersei 0 9 4.753590 0.000000 1587 +sound 0 9 4.753590 0.000000 1605 +trust 0 9 4.753590 0.000000 1583 +pure 0 8 4.875197 0.000000 1776 +univeristi 0 8 4.875197 0.000000 1754 +counter 0 8 4.875197 0.000000 1765 +misra 1 7 5.010635 5.010635 1856 +clip 0 7 5.010635 0.000000 1868 +attach 0 7 5.010635 0.000000 1785 +accord 0 7 5.010635 0.000000 1826 +bell 0 6 5.164786 0.000000 2224 +troubl 0 6 5.164786 0.000000 2002 +fussel 0 5 5.347108 0.000000 2300 +opengl 0 5 5.347108 0.000000 2299 +lang 0 5 5.347108 0.000000 2294 +shanghai 1 4 5.568345 5.568345 2925 +republ 0 4 5.568345 0.000000 3032 +restructur 0 4 5.568345 0.000000 2775 +gouda 0 4 5.568345 0.000000 3021 +batori 0 4 5.568345 0.000000 2690 +blvd 0 4 5.568345 0.000000 3007 +tong 1 3 5.857933 5.857933 3258 +zuckerman 0 3 5.857933 0.000000 3205 +underconstruct 0 3 5.857933 0.000000 3889 +nanj 1 2 6.263398 6.263398 5728 +mini 0 2 6.263398 0.000000 5548 +decod 0 2 6.263398 0.000000 4936 +zodiac 0 2 6.263398 0.000000 5729 +twang 0 2 6.263398 0.000000 5730 +eagl 0 2 6.263398 0.000000 5731 +jiao 1 1 6.957497 6.957497 14249 +lucent 1 1 6.957497 6.957497 14250 +nank 0 1 6.957497 0.000000 14251 +summerluc 0 1 6.957497 0.000000 14252 +thissumm 0 1 6.957497 0.000000 14253 +plexton 0 1 6.957497 0.000000 14254 +libari 0 1 6.957497 0.000000 14255 +glut 0 1 6.957497 0.000000 14256 +mariah 0 1 6.957497 0.000000 14257 +boyz 0 1 6.957497 0.000000 14258 +babyfac 0 1 6.957497 0.000000 14259 +haiq 0 1 6.957497 0.000000 14260 +shenfeng 0 1 6.957497 0.000000 14261 +deskmat 0 1 6.957497 0.000000 14262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..a988fca2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +graduat 0 215 1.386294 0.000000 31 +group 1 183 1.609438 1.609438 36 +updat 0 191 1.609438 0.000000 41 +texa 0 160 1.791759 0.000000 64 +base 0 165 1.791759 0.000000 50 +austin 0 168 1.791759 0.000000 63 +address 0 170 1.791759 0.000000 62 +machin 1 129 2.079442 2.079442 95 +welcom 0 122 2.079442 0.000000 99 +make 0 111 2.197225 0.000000 120 +place 0 106 2.197225 0.000000 124 +well 0 109 2.197225 0.000000 121 +associ 1 93 2.397895 2.397895 151 +search 0 95 2.397895 0.000000 155 +learn 1 86 2.484907 2.484907 170 +activ 0 84 2.484907 0.000000 182 +thing 0 84 2.484907 0.000000 189 +resourc 0 81 2.484907 0.000000 172 +orient 0 80 2.564949 0.000000 205 +prof 0 64 2.772589 0.000000 273 +new 0 64 2.772589 0.000000 262 +januari 0 62 2.772589 0.000000 264 +special 0 56 2.890372 0.000000 320 +archiv 0 49 3.044522 0.000000 364 +natur 1 44 3.135494 3.135494 406 +york 0 41 3.218876 0.000000 435 +print 0 34 3.401197 0.000000 503 +dissert 0 32 3.465736 0.000000 549 +focu 0 30 3.555348 0.000000 571 +consid 0 29 3.583519 0.000000 590 +weather 0 28 3.610918 0.000000 618 +utc 1 27 3.637586 3.637586 629 +yahoo 0 24 3.761200 0.000000 707 +decis 0 23 3.806662 0.000000 728 +voic 0 21 3.912023 0.000000 806 +supervis 0 20 3.951244 0.000000 840 +lyco 0 19 4.007333 0.000000 871 +context 0 13 4.382027 0.000000 1153 +perman 0 11 4.553877 0.000000 1372 +acquisit 0 10 4.653960 0.000000 1465 +moonei 0 9 4.753590 0.000000 1520 +linguist 0 9 4.753590 0.000000 1593 +european 0 8 4.875197 0.000000 1763 +altavista 0 6 5.164786 0.000000 2222 +infoseek 0 6 5.164786 0.000000 2188 +pars 0 5 5.347108 0.000000 2321 +raymond 0 5 5.347108 0.000000 2313 +hermjakob 0 3 5.857933 0.000000 3876 +groupand 0 3 5.857933 0.000000 3873 +signll 0 3 5.857933 0.000000 3877 +galaxi 0 3 5.857933 0.000000 3603 +deutsch 0 3 5.857933 0.000000 3802 +pageulf 0 1 6.957497 0.000000 14263 +hermjakobhello 0 1 6.957497 0.000000 14264 +thedept 0 1 6.957497 0.000000 14265 +austinand 0 1 6.957497 0.000000 14266 +aboutexampl 0 1 6.957497 0.000000 14267 +translationund 0 1 6.957497 0.000000 14268 +einet 0 1 6.957497 0.000000 14269 +dernir 0 1 6.957497 0.000000 14270 +nouvel 0 1 6.957497 0.000000 14271 +alsac 0 1 6.957497 0.000000 14272 +spiegel 0 1 6.957497 0.000000 14273 +svenska 0 1 6.957497 0.000000 14274 +dagbladet 0 1 6.957497 0.000000 14275 +tagesspiegel 0 1 6.957497 0.000000 14276 +vanguardia 0 1 6.957497 0.000000 14277 +welt 0 1 6.957497 0.000000 14278 +zeitplusacm 0 1 6.957497 0.000000 14279 +moltkestr 0 1 6.957497 0.000000 14280 +bnde 0 1 6.957497 0.000000 14281 +germanyphon 0 1 6.957497 0.000000 14282 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..d0fe765e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +group 0 183 1.609438 0.000000 36 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +schedul 1 119 2.079442 2.079442 85 +peopl 0 96 2.302585 0.000000 132 +refer 0 78 2.564949 0.000000 203 +new 0 64 2.772589 0.000000 262 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +street 0 63 2.772589 0.000000 293 +directori 0 45 3.135494 0.000000 396 +sport 1 25 3.737670 3.737670 683 +entertain 0 12 4.465908 0.000000 1286 +magic 0 11 4.553877 0.000000 1358 +perman 0 11 4.553877 0.000000 1372 +gather 0 8 4.875197 0.000000 1719 +lanc 1 4 5.568345 5.568345 3022 +champion 0 4 5.568345 0.000000 2982 +tokuda 1 3 5.857933 5.857933 3266 +twelv 0 3 5.857933 0.000000 3899 +hawaii 0 3 5.857933 0.000000 3888 +intramur 1 2 6.263398 6.263398 5590 +unicron 0 1 6.957497 0.000000 14283 +financ 0 1 6.957497 0.000000 14284 +heeia 0 1 6.957497 0.000000 14285 +kaneoh 0 1 6.957497 0.000000 14286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..68b4957f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +hall 1 146 1.945910 1.945910 65 +click 0 142 1.945910 0.000000 78 +send 0 114 2.197225 0.000000 109 +find 0 111 2.197225 0.000000 111 +taylor 0 63 2.772589 0.000000 287 +finger 0 52 2.995732 0.000000 354 +telephon 0 50 3.044522 0.000000 373 +eduoffic 0 33 3.433987 0.000000 531 +postal 0 30 3.555348 0.000000 580 +log 0 19 4.007333 0.000000 857 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +informationemail 0 9 4.753590 0.000000 1564 +painter 0 2 6.263398 0.000000 4187 +balayoghanv 0 1 6.957497 0.000000 14432 +balayoghancontact 0 1 6.957497 0.000000 14433 +ineosdi 0 1 6.957497 0.000000 14434 +bookmarksvbb 0 1 6.957497 0.000000 14435 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..82107587 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +oper 1 180 1.609438 1.609438 34 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +utexa 0 189 1.609438 0.000000 44 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +file 0 132 1.945910 0.000000 70 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +intern 1 108 2.197225 2.197225 128 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +advanc 0 99 2.302585 0.000000 130 +proceed 1 93 2.397895 2.397895 152 +commun 1 95 2.397895 2.397895 157 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +ieee 1 86 2.484907 2.484907 190 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +novemb 0 81 2.484907 0.000000 179 +second 0 81 2.484907 0.000000 166 +wide 0 84 2.484907 0.000000 185 +larg 0 82 2.484907 0.000000 168 +server 1 76 2.564949 2.564949 204 +state 0 76 2.564949 0.000000 207 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +optim 0 79 2.564949 0.000000 197 +april 0 77 2.564949 0.000000 196 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +multimedia 2 68 2.708050 5.416100 258 +main 0 67 2.708050 0.000000 256 +integr 0 67 2.708050 0.000000 245 +laboratori 1 63 2.772589 2.772589 292 +taylor 0 63 2.772589 0.000000 287 +march 0 61 2.833213 0.000000 295 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +profession 0 51 2.995732 0.000000 345 +digit 0 52 2.995732 0.000000 348 +california 1 46 3.091042 3.091042 388 +video 1 44 3.135494 3.135494 405 +third 0 43 3.178054 0.000000 412 +tutori 0 39 3.258097 0.000000 437 +industri 0 38 3.295837 0.000000 464 +china 0 37 3.332205 0.000000 487 +award 2 34 3.401197 6.802394 523 +committe 1 34 3.401197 3.401197 522 +tech 0 35 3.401197 0.000000 515 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +storag 0 31 3.496508 0.000000 553 +rang 0 30 3.555348 0.000000 565 +chair 1 29 3.583519 3.583519 596 +scale 0 28 3.610918 0.000000 613 +arrai 0 27 3.637586 0.000000 627 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +initi 1 23 3.806662 3.806662 717 +honor 0 23 3.806662 0.000000 729 +disk 1 22 3.850148 3.850148 747 +director 0 22 3.850148 0.000000 767 +indian 0 22 3.850148 0.000000 769 +beij 0 19 4.007333 0.000000 876 +speed 0 18 4.060443 0.000000 911 +failur 0 18 4.060443 0.000000 898 +germani 0 17 4.110874 0.000000 946 +diego 1 16 4.174387 4.174387 992 +taiwan 0 16 4.174387 0.000000 1006 +intel 0 16 4.174387 0.000000 1000 +atth 0 15 4.248495 0.000000 1019 +audio 1 14 4.317488 4.317488 1094 +heterogen 0 14 4.317488 0.000000 1090 +nasa 0 13 4.382027 0.000000 1188 +career 1 12 4.465908 4.465908 1287 +placement 0 10 4.653960 0.000000 1420 +ataustin 0 9 4.753590 0.000000 1610 +editori 0 9 4.753590 0.000000 1611 +vice 0 9 4.753590 0.000000 1604 +transmiss 0 9 4.753590 0.000000 1588 +recoveri 0 9 4.753590 0.000000 1474 +creativ 0 8 4.875197 0.000000 1777 +harrick 0 7 5.010635 0.000000 1849 +bombai 0 7 5.010635 0.000000 1972 +sponsor 1 6 5.164786 5.164786 2133 +internationalconfer 0 6 5.164786 0.000000 2051 +microsystem 0 6 5.164786 0.000000 2160 +ofdistribut 0 5 5.347108 0.000000 2316 +row 0 5 5.347108 0.000000 2330 +colorado 0 4 5.568345 0.000000 2938 +innov 0 4 5.568345 0.000000 2933 +multimediasystem 0 4 5.568345 0.000000 2701 +venkat 0 4 5.568345 0.000000 2702 +goyal 1 3 5.857933 5.857933 3268 +multimediacomput 0 3 5.857933 0.000000 3841 +mobilecomput 0 3 5.857933 0.000000 3629 +shenoi 0 3 5.857933 0.000000 3269 +ftc 0 3 5.857933 0.000000 3275 +rangan 0 3 5.857933 0.000000 3270 +durham 0 3 5.857933 0.000000 3279 +hampshir 0 3 5.857933 0.000000 3280 +mitsubishi 0 3 5.857933 0.000000 3842 +merl 0 3 5.857933 0.000000 3843 +andnetwork 1 2 6.263398 6.263398 5751 +icdc 0 2 6.263398 0.000000 5191 +protocolsfor 0 2 6.263398 0.000000 5204 +inmulti 0 2 6.263398 0.000000 4334 +annualintern 0 2 6.263398 0.000000 4335 +pasadena 0 2 6.263398 0.000000 4336 +gemmel 0 2 6.263398 0.000000 4332 +kandlur 0 2 6.263398 0.000000 4321 +ofmultimedia 0 2 6.263398 0.000000 4322 +ieeeintern 0 2 6.263398 0.000000 4333 +icmc 0 2 6.263398 0.000000 4323 +delaybound 0 2 6.263398 0.000000 4342 +fordigit 0 2 6.263398 0.000000 5752 +nossdav 0 2 6.263398 0.000000 4344 +federalinstitut 0 2 6.263398 0.000000 5539 +eurograph 1 1 6.957497 6.957497 14436 +vinharrick 0 1 6.957497 0.000000 14437 +electronicimag 0 1 6.957497 0.000000 14438 +kaohsiung 0 1 6.957497 0.000000 14439 +rostock 0 1 6.957497 0.000000 14440 +interestmultimedia 0 1 6.957497 0.000000 14441 +anend 0 1 6.957497 0.000000 14442 +thintern 0 1 6.957497 0.000000 14443 +designingmultimedia 0 1 6.957497 0.000000 14444 +foundationresearch 0 1 6.957497 0.000000 14445 +electricresearch 0 1 6.957497 0.000000 14446 +electrospacesystem 0 1 6.957497 0.000000 14447 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..b8c387a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +number 1 130 2.079442 2.079442 97 +report 0 131 2.079442 0.000000 92 +pleas 1 113 2.197225 2.197225 114 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +thing 0 84 2.484907 0.000000 189 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +street 0 63 2.772589 0.000000 293 +semest 0 58 2.890372 0.000000 312 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +give 0 50 3.044522 0.000000 359 +visitor 0 49 3.044522 0.000000 371 +soon 0 36 3.367296 0.000000 494 +go 0 33 3.433987 0.000000 529 +india 0 32 3.465736 0.000000 550 +art 0 29 3.583519 0.000000 593 +pass 0 28 3.610918 0.000000 611 +though 0 27 3.637586 0.000000 622 +never 0 25 3.737670 0.000000 671 +color 0 22 3.850148 0.000000 762 +increas 0 20 3.951244 0.000000 829 +sure 0 20 3.951244 0.000000 813 +log 0 19 4.007333 0.000000 857 +less 0 18 4.060443 0.000000 892 +medic 0 17 4.110874 0.000000 958 +match 0 16 4.174387 0.000000 965 +sign 0 16 4.174387 0.000000 970 +guest 0 12 4.465908 0.000000 1220 +incomplet 0 9 4.753590 0.000000 1575 +risk 0 8 4.875197 0.000000 1689 +yeah 0 6 5.164786 0.000000 2195 +put 0 6 5.164786 0.000000 2017 +guestbook 1 5 5.347108 5.347108 2475 +delhi 0 5 5.347108 0.000000 2530 +haven 1 4 5.568345 5.568345 3037 +shall 0 3 5.857933 0.000000 3891 +vipin 1 2 6.263398 6.263398 5579 +interestscours 0 2 6.263398 0.000000 5026 +reset 0 2 6.263398 0.000000 5236 +decreas 0 2 6.263398 0.000000 4877 +undergraduatefrom 0 1 6.957497 0.000000 14448 +interestsreportsy 0 1 6.957497 0.000000 14449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..53070551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,156 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +paper 1 205 1.609438 1.609438 38 +utexa 0 189 1.609438 0.000000 44 +austin 0 168 1.791759 0.000000 63 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +texa 0 160 1.791759 0.000000 64 +note 1 142 1.945910 1.945910 67 +object 0 138 1.945910 0.000000 79 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +mathemat 1 108 2.197225 2.197225 123 +place 0 106 2.197225 0.000000 124 +world 0 115 2.197225 0.000000 126 +intern 0 108 2.197225 0.000000 128 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +school 0 84 2.484907 0.000000 188 +state 0 76 2.564949 0.000000 207 +line 0 75 2.639057 0.000000 231 +new 1 64 2.772589 2.772589 262 +import 0 65 2.772589 0.000000 282 +taylor 0 63 2.772589 0.000000 287 +reason 0 57 2.890372 0.000000 318 +three 0 54 2.944439 0.000000 330 +profession 0 51 2.995732 0.000000 345 +right 0 48 3.044522 0.000000 363 +california 0 46 3.091042 0.000000 388 +better 0 45 3.135494 0.000000 401 +favorit 0 44 3.135494 0.000000 410 +live 0 40 3.258097 0.000000 451 +paul 0 38 3.295837 0.000000 471 +close 0 38 3.295837 0.000000 465 +feel 1 37 3.332205 3.332205 483 +sciencesunivers 0 37 3.332205 0.000000 486 +survei 0 35 3.401197 0.000000 513 +human 1 32 3.465736 3.465736 546 +dissert 0 32 3.465736 0.000000 549 +taken 0 31 3.496508 0.000000 555 +scientist 0 31 3.496508 0.000000 560 +postal 0 30 3.555348 0.000000 580 +turn 0 29 3.583519 0.000000 586 +quot 0 29 3.583519 0.000000 582 +mind 0 27 3.637586 0.000000 632 +fellow 0 24 3.761200 0.000000 701 +famili 0 23 3.806662 0.000000 735 +initi 0 23 3.806662 0.000000 717 +finish 0 22 3.850148 0.000000 748 +wang 0 21 3.912023 0.000000 790 +appropri 0 18 4.060443 0.000000 883 +germani 1 17 4.110874 4.110874 946 +white 0 17 4.110874 0.000000 951 +precis 0 15 4.248495 0.000000 1023 +countri 0 15 4.248495 0.000000 1059 +incomput 0 14 4.317488 0.000000 1096 +becam 0 14 4.317488 0.000000 1117 +stori 0 14 4.317488 0.000000 1087 +daniel 0 12 4.465908 0.000000 1233 +vladimir 1 11 4.553877 4.553877 1324 +america 0 11 4.553877 0.000000 1370 +black 1 10 4.653960 4.653960 1418 +sentenc 0 10 4.653960 0.000000 1413 +gain 0 8 4.875197 0.000000 1730 +secretari 0 8 4.875197 0.000000 1775 +elect 0 8 4.875197 0.000000 1771 +joke 0 8 4.875197 0.000000 1620 +centenni 0 7 5.010635 0.000000 1967 +sciencesat 0 7 5.010635 0.000000 1968 +austinaustin 0 7 5.010635 0.000000 1966 +race 1 5 5.347108 5.347108 2417 +lifschitz 0 5 5.347108 0.000000 2542 +ortega 0 5 5.347108 0.000000 2559 +lost 0 5 5.347108 0.000000 2358 +petersburg 1 4 5.568345 5.568345 2989 +insight 0 4 5.568345 0.000000 3024 +nonmonoton 0 4 5.568345 0.000000 3023 +evid 0 4 5.568345 0.000000 2768 +ratio 0 4 5.568345 0.000000 2942 +silli 0 4 5.568345 0.000000 3038 +dijkstra 0 3 5.857933 0.000000 3173 +armi 0 3 5.857933 0.000000 3562 +prison 0 3 5.857933 0.000000 3907 +tortur 0 3 5.857933 0.000000 3634 +district 0 3 5.857933 0.000000 3756 +civil 0 3 5.857933 0.000000 3908 +russia 1 2 6.263398 6.263398 5756 +spirit 0 2 6.263398 0.000000 5234 +theamerican 0 2 6.263398 0.000000 5120 +programmingand 0 2 6.263398 0.000000 4940 +edsger 0 2 6.263398 0.000000 5740 +convoc 0 2 6.263398 0.000000 5757 +nomin 0 2 6.263398 0.000000 5758 +helm 0 2 6.263398 0.000000 4217 +burton 0 2 6.263398 0.000000 5759 +polic 0 2 6.263398 0.000000 5560 +democrat 0 2 6.263398 0.000000 5567 +admit 0 2 6.263398 0.000000 5429 +neutral 0 2 6.263398 0.000000 5760 +lifschitzwhen 0 1 6.957497 0.000000 14488 +burden 0 1 6.957497 0.000000 14489 +downcast 0 1 6.957497 0.000000 14490 +gladli 0 1 6.957497 0.000000 14491 +therealm 0 1 6.957497 0.000000 14492 +lucid 0 1 6.957497 0.000000 14493 +grasp 0 1 6.957497 0.000000 14494 +isobtain 0 1 6.957497 0.000000 14495 +pleasantli 0 1 6.957497 0.000000 14496 +conceptform 0 1 6.957497 0.000000 14497 +bernai 0 1 6.957497 0.000000 14498 +lifschitzgottesman 0 1 6.957497 0.000000 14499 +texasat 0 1 6.957497 0.000000 14500 +forartifici 0 1 6.957497 0.000000 14501 +intelligenceb 0 1 6.957497 0.000000 14502 +branchof 0 1 6.957497 0.000000 14503 +steklov 0 1 6.957497 0.000000 14504 +interesttempor 0 1 6.957497 0.000000 14505 +reasoningand 0 1 6.957497 0.000000 14506 +aboutactionslog 0 1 6.957497 0.000000 14507 +reasoningteachingoth 0 1 6.957497 0.000000 14508 +activitiespap 0 1 6.957497 0.000000 14509 +bylifschitz 0 1 6.957497 0.000000 14510 +studentsrecommend 0 1 6.957497 0.000000 14511 +speechgood 0 1 6.957497 0.000000 14512 +madelein 0 1 6.957497 0.000000 14513 +albright 0 1 6.957497 0.000000 14514 +regain 0 1 6.957497 0.000000 14515 +soviet 0 1 6.957497 0.000000 14516 +recycl 0 1 6.957497 0.000000 14517 +actbad 0 1 6.957497 0.000000 14518 +sequest 0 1 6.957497 0.000000 14519 +archeolog 0 1 6.957497 0.000000 14520 +societynot 0 1 6.957497 0.000000 14521 +redrawn 0 1 6.957497 0.000000 14522 +basisoth 0 1 6.957497 0.000000 14523 +amnesti 0 1 6.957497 0.000000 14524 +monthcontact 0 1 6.957497 0.000000 14525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..0d936011 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +utexa 0 189 1.609438 0.000000 44 +algorithm 1 162 1.791759 1.791759 57 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +hall 0 146 1.945910 0.000000 65 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +theori 0 111 2.197225 0.000000 127 +access 0 102 2.302585 0.000000 136 +sinc 0 90 2.397895 0.000000 159 +complet 0 77 2.564949 0.000000 208 +april 0 77 2.564949 0.000000 196 +effici 0 73 2.639057 0.000000 233 +evalu 1 64 2.772589 2.772589 266 +copi 0 63 2.772589 0.000000 284 +taylor 0 63 2.772589 0.000000 287 +visit 0 63 2.772589 0.000000 288 +faculti 0 56 2.890372 0.000000 325 +vita 0 38 3.295837 0.000000 473 +profil 0 30 3.555348 0.000000 581 +postal 0 30 3.555348 0.000000 580 +experiment 0 26 3.688879 0.000000 645 +mine 0 26 3.688879 0.000000 654 +sequenti 0 22 3.850148 0.000000 745 +offici 0 18 4.060443 0.000000 894 +princeton 0 15 4.248495 0.000000 1042 +interestsmi 0 10 4.653960 0.000000 1462 +regent 0 5 5.347108 0.000000 2551 +vijaya 0 4 5.568345 0.000000 2677 +primarilyin 0 3 5.857933 0.000000 3832 +ramachandranvijaya 0 1 6.957497 0.000000 14450 +ramachandranblakemor 0 1 6.957497 0.000000 14451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..3cf234c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +mail 2 238 1.386294 2.772588 22 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +utexa 2 189 1.609438 3.218876 44 +list 2 201 1.609438 3.218876 39 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +fall 0 181 1.609438 0.000000 40 +algorithm 2 162 1.791759 3.583518 57 +parallel 1 169 1.791759 1.791759 60 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +machin 0 129 2.079442 0.000000 95 +spring 0 131 2.079442 0.000000 88 +schedul 0 119 2.079442 0.000000 85 +confer 0 126 2.079442 0.000000 100 +theori 2 111 2.197225 4.394450 127 +send 1 114 2.197225 2.197225 109 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +mani 0 92 2.397895 0.000000 150 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +resourc 0 81 2.484907 0.000000 172 +novemb 0 81 2.484907 0.000000 179 +member 0 84 2.484907 0.000000 165 +messag 1 76 2.564949 2.564949 212 +state 1 76 2.564949 2.564949 207 +method 0 80 2.564949 0.000000 213 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +solv 0 73 2.639057 0.000000 234 +david 0 71 2.639057 0.000000 232 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +differ 1 66 2.708050 2.708050 253 +complex 1 64 2.772589 2.772589 269 +foundat 1 62 2.772589 2.772589 286 +result 0 65 2.772589 0.000000 281 +organ 0 65 2.772589 0.000000 265 +import 0 65 2.772589 0.000000 282 +virtual 0 62 2.772589 0.000000 285 +dept 0 64 2.772589 0.000000 291 +locat 1 59 2.833213 2.833213 303 +faculti 1 56 2.890372 2.890372 325 +sever 1 56 2.890372 2.890372 322 +major 0 56 2.890372 0.000000 315 +space 0 57 2.890372 0.000000 310 +semest 0 58 2.890372 0.000000 312 +special 0 56 2.890372 0.000000 320 +processor 0 54 2.944439 0.000000 335 +talk 0 53 2.944439 0.000000 336 +pointer 0 48 3.044522 0.000000 368 +electron 1 47 3.091042 3.091042 379 +execut 0 45 3.135494 0.000000 404 +term 0 43 3.178054 0.000000 411 +theoret 1 39 3.258097 3.258097 446 +announc 1 40 3.258097 3.258097 441 +seminar 0 38 3.295837 0.000000 470 +random 1 34 3.401197 3.401197 511 +bibliographi 1 34 3.401197 3.401197 518 +post 0 35 3.401197 0.000000 505 +next 0 34 3.401197 0.000000 517 +committe 0 34 3.401197 0.000000 522 +express 0 32 3.465736 0.000000 540 +ad 0 32 3.465736 0.000000 544 +often 0 31 3.496508 0.000000 551 +scientist 0 31 3.496508 0.000000 560 +graph 1 30 3.555348 3.555348 576 +focu 0 30 3.555348 0.000000 571 +focus 0 29 3.583519 0.000000 584 +measur 0 28 3.610918 0.000000 609 +held 0 28 3.610918 0.000000 600 +becom 0 28 3.610918 0.000000 603 +bound 0 26 3.688879 0.000000 659 +berkelei 0 26 3.688879 0.000000 657 +request 0 26 3.688879 0.000000 635 +consist 0 26 3.688879 0.000000 651 +fundament 0 25 3.737670 0.000000 661 +greg 1 24 3.761200 3.761200 695 +sent 1 22 3.850148 3.850148 763 +serv 0 22 3.850148 0.000000 758 +alumni 0 21 3.912023 0.000000 807 +siam 0 21 3.912023 0.000000 800 +region 0 19 4.007333 0.000000 875 +north 0 19 4.007333 0.000000 873 +hypertext 0 19 4.007333 0.000000 865 +stand 1 18 4.060443 4.060443 891 +lower 0 18 4.060443 0.000000 886 +attend 0 18 4.060443 0.000000 893 +affili 0 13 4.382027 0.000000 1194 +discret 0 13 4.382027 0.000000 1165 +remov 1 12 4.465908 4.465908 1225 +walk 0 12 4.465908 0.000000 1281 +outsid 0 12 4.465908 0.000000 1219 +distinguish 1 11 4.553877 4.553877 1357 +probabilist 0 11 4.553877 0.000000 1343 +typic 0 11 4.553877 0.000000 1360 +regard 0 11 4.553877 0.000000 1309 +cryptographi 0 9 4.753590 0.000000 1512 +folk 0 9 4.753590 0.000000 1597 +postdoc 0 8 4.875197 0.000000 1724 +elect 0 8 4.875197 0.000000 1771 +calendar 0 8 4.875197 0.000000 1649 +colloquium 0 8 4.875197 0.000000 1734 +bit 0 7 5.010635 0.000000 1833 +foc 0 7 5.010635 0.000000 1880 +sigact 1 6 5.164786 5.164786 2212 +zhou 0 6 5.164786 0.000000 2092 +forum 0 6 5.164786 0.000000 2027 +southern 0 6 5.164786 0.000000 2191 +pool 0 6 5.164786 0.000000 2225 +arrang 0 6 5.164786 0.000000 2023 +sponsor 0 6 5.164786 0.000000 2133 +soda 0 6 5.164786 0.000000 2189 +groupth 0 5 5.347108 0.000000 2549 +provabl 0 5 5.347108 0.000000 2558 +phil 0 5 5.347108 0.000000 2419 +speaker 0 5 5.347108 0.000000 2370 +stoc 0 5 5.347108 0.000000 2491 +vijaya 1 4 5.568345 5.568345 2677 +dalla 1 4 5.568345 5.568345 2930 +combinator 0 4 5.568345 0.000000 2915 +twice 0 4 5.568345 0.000000 2614 +algorithmsand 0 4 5.568345 0.000000 2680 +warm 2 3 5.857933 11.715866 3904 +plaxton 1 3 5.857933 5.857933 3886 +ramachandran 1 3 5.857933 5.857933 3742 +louisiana 1 3 5.857933 5.857933 3902 +zuckerman 0 3 5.857933 0.000000 3205 +sinica 0 3 5.857933 0.000000 3819 +poon 0 3 5.857933 0.000000 3820 +dozen 0 3 5.857933 0.000000 3905 +gripe 0 3 5.857933 0.000000 3257 +surround 0 3 5.857933 0.000000 3492 +spaa 0 3 5.857933 0.000000 3906 +madhukar 1 2 6.263398 6.263398 5633 +baruah 0 2 6.263398 0.000000 5753 +sheng 0 2 6.263398 0.000000 5153 +ckpoon 0 2 6.263398 0.000000 5510 +rajmohan 0 2 6.263398 0.000000 5706 +rajaraman 0 2 6.263398 0.000000 5704 +rraj 0 2 6.263398 0.000000 5705 +sinha 0 2 6.263398 0.000000 5754 +southwestern 0 2 6.263398 0.000000 5744 +andarchitectur 0 2 6.263398 0.000000 5755 +sanjoi 1 1 6.957497 6.957497 14452 +kelsen 1 1 6.957497 6.957497 14453 +ramgop 1 1 6.957497 6.957497 14454 +suel 1 1 6.957497 6.957497 14455 +yuke 1 1 6.957497 6.957497 14456 +grouput 0 1 6.957497 0.000000 14457 +emba 0 1 6.957497 0.000000 14458 +tsan 0 1 6.957497 0.000000 14459 +tshsu 0 1 6.957497 0.000000 14460 +pierr 0 1 6.957497 0.000000 14461 +korupolu 0 1 6.957497 0.000000 14462 +mackenzi 0 1 6.957497 0.000000 14463 +philmac 0 1 6.957497 0.000000 14464 +idbsu 0 1 6.957497 0.000000 14465 +mettu 0 1 6.957497 0.000000 14466 +santanu 0 1 6.957497 0.000000 14467 +ssinha 0 1 6.957497 0.000000 14468 +torsten 0 1 6.957497 0.000000 14469 +lowvolum 0 1 6.957497 0.000000 14470 +themidsouth 0 1 6.957497 0.000000 14471 +midsouthwest 0 1 6.957497 0.000000 14472 +keynot 0 1 6.957497 0.000000 14473 +atut 0 1 6.957497 0.000000 14474 +organizedanoth 0 1 6.957497 0.000000 14475 +methodist 0 1 6.957497 0.000000 14476 +oklahoma 0 1 6.957497 0.000000 14477 +beheld 0 1 6.957497 0.000000 14478 +algorithmsmail 0 1 6.957497 0.000000 14479 +usuallytri 0 1 6.957497 0.000000 14480 +ofaustin 0 1 6.957497 0.000000 14481 +thatinclud 0 1 6.957497 0.000000 14482 +sponsorsth 0 1 6.957497 0.000000 14483 +interestar 0 1 6.957497 0.000000 14484 +thesigact 0 1 6.957497 0.000000 14485 +eccc 0 1 6.957497 0.000000 14486 +rolodex 0 1 6.957497 0.000000 14487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..fcebfcc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,8 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +email 0 220 1.386294 0.000000 29 +utexa 0 189 1.609438 0.000000 44 +phone 1 175 1.791759 1.791759 45 +srinivasan 1 6 5.164786 5.164786 2175 +vaidyaraman 1 2 6.263398 6.263398 5658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..944ed9ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +problem 0 147 1.945910 0.000000 75 +architectur 0 139 1.945910 0.000000 77 +topic 0 114 2.197225 0.000000 110 +theori 0 111 2.197225 0.000000 127 +memori 0 101 2.302585 0.000000 139 +search 0 95 2.397895 0.000000 155 +learn 0 86 2.484907 0.000000 170 +intellig 0 72 2.639057 0.000000 225 +solv 0 73 2.639057 0.000000 234 +order 0 69 2.708050 0.000000 249 +knowledg 0 67 2.708050 0.000000 243 +artifici 0 63 2.772589 0.000000 280 +visual 0 48 3.044522 0.000000 372 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +term 0 43 3.178054 0.000000 411 +represent 0 35 3.401197 0.000000 512 +neural 0 30 3.555348 0.000000 578 +retriev 0 27 3.637586 0.000000 621 +background 0 25 3.737670 0.000000 664 +cognit 0 16 4.174387 0.000000 986 +usavoic 0 13 4.382027 0.000000 1198 +mepost 0 10 4.653960 0.000000 1472 +attent 0 8 4.875197 0.000000 1651 +interestsi 0 7 5.010635 0.000000 1969 +connectionist 0 5 5.347108 0.000000 2430 +howto 0 2 6.263398 0.000000 5761 +vurgun 1 1 6.957497 6.957497 14526 +sengul 0 1 6.957497 0.000000 14527 +sengulvurgun 0 1 6.957497 0.000000 14528 +ammainli 0 1 6.957497 0.000000 14529 +evolutionaryalgorithm 0 1 6.957497 0.000000 14530 +ofprefer 0 1 6.957497 0.000000 14531 +skillacquisit 0 1 6.957497 0.000000 14532 +mindto 0 1 6.957497 0.000000 14533 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..aa440f75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +person 0 111 2.197225 0.000000 117 +server 0 76 2.564949 0.000000 204 +locat 0 59 2.833213 0.000000 303 +chuck 0 14 4.317488 0.000000 1108 +enterpris 0 2 6.263398 0.000000 4839 +walbourn 0 1 6.957497 0.000000 14534 +walbournmi 0 1 6.957497 0.000000 14535 +charybdi 0 1 6.957497 0.000000 14536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..1e2ddc27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +fall 0 181 1.609438 0.000000 40 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +avail 0 169 1.791759 0.000000 48 +professor 1 137 1.945910 1.945910 76 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +mathemat 1 108 2.197225 2.197225 123 +follow 0 92 2.397895 0.000000 143 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +academ 0 82 2.484907 0.000000 178 +complet 0 77 2.564949 0.000000 208 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +creat 0 63 2.772589 0.000000 277 +colleg 1 61 2.833213 2.833213 300 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +summer 0 56 2.890372 0.000000 311 +profession 0 51 2.995732 0.000000 345 +appoint 0 49 3.044522 0.000000 358 +math 1 44 3.135494 3.135494 402 +http 0 41 3.218876 0.000000 420 +formal 0 37 3.332205 0.000000 478 +revis 0 26 3.688879 0.000000 640 +period 0 22 3.850148 0.000000 743 +regular 0 17 4.110874 0.000000 929 +photograph 0 15 4.248495 0.000000 1056 +senior 0 14 4.317488 0.000000 1120 +henri 1 10 4.653960 4.653960 1417 +jack 0 8 4.875197 0.000000 1780 +walker 1 3 5.857933 5.857933 3161 +tenur 0 3 5.857933 0.000000 3801 +mackai 1 2 6.263398 6.263398 5762 +grinnel 1 2 6.263398 6.263398 5763 +edua 0 2 6.263398 0.000000 5764 +grin 1 1 6.957497 6.957497 14537 +professorwalk 0 1 6.957497 0.000000 14538 +teachand 0 1 6.957497 0.000000 14539 +atgrinnel 0 1 6.957497 0.000000 14540 +robertson 0 1 6.957497 0.000000 14541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..51d8908f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +graduat 1 215 1.386294 1.386294 31 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +studi 1 120 2.079442 2.079442 91 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +manag 0 114 2.197225 0.000000 125 +follow 0 92 2.397895 0.000000 143 +school 0 84 2.484907 0.000000 188 +activ 0 84 2.484907 0.000000 182 +appli 1 71 2.639057 2.639057 226 +knowledg 0 67 2.708050 0.000000 243 +laboratori 0 63 2.772589 0.000000 292 +case 1 51 2.995732 2.995732 351 +maintain 0 51 2.995732 0.000000 342 +cool 0 49 3.044522 0.000000 374 +anoth 0 45 3.135494 0.000000 408 +third 0 43 3.178054 0.000000 412 +posit 0 31 3.496508 0.000000 552 +rule 1 26 3.688879 3.688879 638 +mike 0 24 3.761200 0.000000 703 +basi 1 20 3.951244 3.951244 828 +item 0 19 4.007333 0.000000 856 +accept 0 18 4.060443 0.000000 879 +senior 0 14 4.317488 0.000000 1120 +usavoic 0 13 4.382027 0.000000 1198 +modul 1 10 4.653960 4.653960 1434 +mepost 0 10 4.653960 0.000000 1472 +declar 1 9 4.753590 4.753590 1526 +lane 0 8 4.875197 0.000000 1720 +unpublish 0 6 5.164786 0.000000 2226 +mirank 1 5 5.347108 5.347108 2543 +lanc 0 4 5.568345 0.000000 3022 +warshaw 1 2 6.263398 6.263398 5659 +venu 1 2 6.263398 6.263398 5655 +developedat 0 2 6.263398 0.000000 4078 +obermey 0 2 6.263398 0.000000 5657 +warshawlan 0 1 6.957497 0.000000 14542 +laboratoryinvolv 0 1 6.957497 0.000000 14543 +andat 0 1 6.957497 0.000000 14544 +arlut 0 1 6.957497 0.000000 14545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..ce035543 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +analysi 0 124 2.079442 0.000000 98 +mathemat 1 108 2.197225 2.197225 123 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +book 0 99 2.302585 0.000000 131 +center 0 88 2.397895 0.000000 158 +comment 0 93 2.397895 0.000000 146 +master 0 76 2.564949 0.000000 216 +decemb 0 80 2.564949 0.000000 215 +refer 0 78 2.564949 0.000000 203 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +java 1 70 2.708050 2.708050 248 +august 0 66 2.708050 0.000000 257 +dept 0 64 2.772589 0.000000 291 +juli 0 60 2.833213 0.000000 305 +unix 0 58 2.890372 0.000000 308 +sampl 0 53 2.944439 0.000000 339 +numer 0 49 3.044522 0.000000 369 +math 0 44 3.135494 0.000000 402 +china 0 37 3.332205 0.000000 487 +expect 0 37 3.332205 0.000000 484 +manual 0 35 3.401197 0.000000 504 +common 0 30 3.555348 0.000000 574 +load 0 28 3.610918 0.000000 601 +chen 1 21 3.912023 3.912023 791 +demo 0 18 4.060443 0.000000 888 +perl 0 11 4.553877 0.000000 1332 +gatewai 0 7 5.010635 0.000000 1942 +fudan 0 3 5.857933 0.000000 3707 +rosett 1 2 6.263398 6.263398 5595 +wchen 1 1 6.957497 6.957497 14546 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..937feb14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +group 1 183 1.609438 1.609438 36 +utexa 0 189 1.609438 0.000000 44 +fall 0 181 1.609438 0.000000 40 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +implement 0 152 1.791759 0.000000 52 +hall 1 146 1.945910 1.945910 65 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +memori 0 101 2.302585 0.000000 139 +section 0 94 2.397895 0.000000 149 +orient 0 80 2.564949 0.000000 205 +taylor 1 63 2.772589 2.772589 287 +best 0 59 2.833213 0.000000 299 +paul 1 38 3.295837 3.295837 471 +postal 0 30 3.555348 0.000000 580 +usual 0 28 3.610918 0.000000 608 +reach 0 24 3.761200 0.000000 688 +lead 0 23 3.806662 0.000000 718 +thought 0 17 4.110874 0.000000 945 +wilson 1 9 4.753590 4.753590 1536 +oop 1 8 4.875197 4.875197 1778 +cross 0 8 4.875197 0.000000 1703 +informationi 0 3 5.857933 0.000000 3871 +novelti 0 2 6.263398 0.000000 5765 +ltwilson 0 1 6.957497 0.000000 14547 +headshot 0 1 6.957497 0.000000 14548 +workson 0 1 6.957497 0.000000 14549 +teachingin 0 1 6.957497 0.000000 14550 +sciencesnot 0 1 6.957497 0.000000 14551 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..ca53edd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +also 1 259 1.386294 1.386294 28 +class 0 199 1.609438 0.000000 37 +utexa 0 189 1.609438 0.000000 44 +relat 0 139 1.945910 0.000000 68 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +need 0 98 2.302585 0.000000 135 +homepag 0 93 2.397895 0.000000 148 +school 0 84 2.484907 0.000000 188 +thing 0 84 2.484907 0.000000 189 +start 0 83 2.484907 0.000000 173 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +come 0 78 2.564949 0.000000 202 +differ 0 66 2.708050 0.000000 253 +think 1 57 2.890372 2.890372 314 +major 0 56 2.890372 0.000000 315 +life 1 50 3.044522 3.044522 375 +friend 0 48 3.044522 0.000000 376 +without 0 50 3.044522 0.000000 370 +give 0 50 3.044522 0.000000 359 +made 0 44 3.135494 0.000000 398 +howev 1 41 3.218876 3.218876 422 +past 0 42 3.218876 0.000000 428 +live 0 40 3.258097 0.000000 451 +realli 0 40 3.258097 0.000000 444 +mean 0 37 3.332205 0.000000 477 +ofth 0 36 3.367296 0.000000 491 +john 0 33 3.433987 0.000000 532 +abl 1 30 3.555348 3.555348 566 +becom 1 28 3.610918 3.610918 603 +though 1 27 3.637586 3.637586 622 +quit 0 27 3.637586 0.000000 633 +decis 0 23 3.806662 0.000000 728 +lead 0 23 3.806662 0.000000 718 +dai 0 22 3.850148 0.000000 753 +born 0 21 3.912023 0.000000 798 +attend 0 18 4.060443 0.000000 893 +record 0 18 4.060443 0.000000 890 +listen 0 18 4.060443 0.000000 907 +accept 0 18 4.060443 0.000000 879 +thought 1 17 4.110874 4.110874 945 +seek 1 17 4.110874 4.110874 954 +whether 0 17 4.110874 0.000000 918 +whole 0 17 4.110874 0.000000 940 +earli 0 16 4.174387 0.000000 968 +month 0 15 4.248495 0.000000 1025 +enough 0 15 4.248495 0.000000 1040 +becam 1 14 4.317488 4.317488 1117 +hong 0 14 4.317488 0.000000 1105 +decid 0 14 4.317488 0.000000 1075 +believ 0 13 4.382027 0.000000 1187 +came 0 13 4.382027 0.000000 1197 +weak 0 13 4.382027 0.000000 1159 +opportun 0 13 4.382027 0.000000 1161 +count 0 12 4.465908 0.000000 1239 +true 1 10 4.653960 4.653960 1422 +reli 0 10 4.653960 0.000000 1411 +strength 1 9 4.753590 4.753590 1494 +kong 0 9 4.753590 0.000000 1602 +clear 0 9 4.753590 0.000000 1488 +trust 0 9 4.753590 0.000000 1583 +said 0 9 4.753590 0.000000 1571 +matter 0 8 4.875197 0.000000 1627 +realiz 0 8 4.875197 0.000000 1739 +christian 2 7 5.010635 10.021270 1949 +therefor 0 7 5.010635 0.000000 1822 +wrong 1 6 5.164786 5.164786 2025 +matthew 0 6 5.164786 0.000000 2193 +church 0 4 5.568345 0.000000 3011 +jesu 1 3 5.857933 5.857933 3624 +faith 1 3 5.857933 5.857933 3363 +bibl 0 3 5.857933 0.000000 3143 +credibl 0 3 5.857933 0.000000 3210 +shouldb 0 3 5.857933 0.000000 3673 +theywil 0 3 5.857933 0.000000 3102 +doubt 0 3 5.857933 0.000000 3119 +nota 0 3 5.857933 0.000000 3785 +holi 1 2 6.263398 6.263398 5711 +christ 1 2 6.263398 6.263398 5766 +arthur 0 2 6.263398 0.000000 5767 +religi 0 2 6.263398 0.000000 4816 +intent 0 2 6.263398 0.000000 5768 +stumbl 0 2 6.263398 0.000000 5349 +hei 0 2 6.263398 0.000000 5769 +forgiv 0 2 6.263398 0.000000 5770 +andto 0 2 6.263398 0.000000 5771 +differencebetween 0 2 6.263398 0.000000 5431 +deed 0 2 6.263398 0.000000 5077 +wedo 0 2 6.263398 0.000000 5772 +sick 0 2 6.263398 0.000000 5773 +ought 0 2 6.263398 0.000000 5365 +hesit 0 2 6.263398 0.000000 5774 +sin 1 1 6.957497 6.957497 14552 +cent 0 1 6.957497 0.000000 14553 +christiani 0 1 6.957497 0.000000 14554 +alittl 0 1 6.957497 0.000000 14555 +totallyunexpect 0 1 6.957497 0.000000 14556 +compulsori 0 1 6.957497 0.000000 14557 +thechristian 0 1 6.957497 0.000000 14558 +tobecom 0 1 6.957497 0.000000 14559 +slife 0 1 6.957497 0.000000 14560 +deepli 0 1 6.957497 0.000000 14561 +mylif 0 1 6.957497 0.000000 14562 +misconcept 0 1 6.957497 0.000000 14563 +christianwa 0 1 6.957497 0.000000 14564 +christianand 0 1 6.957497 0.000000 14565 +lovedeveri 0 1 6.957497 0.000000 14566 +achristian 0 1 6.957497 0.000000 14567 +virtuou 0 1 6.957497 0.000000 14568 +thefellowship 0 1 6.957497 0.000000 14569 +flesh 0 1 6.957497 0.000000 14570 +sinless 0 1 6.957497 0.000000 14571 +sympath 0 1 6.957497 0.000000 14572 +weconfess 0 1 6.957497 0.000000 14573 +cleans 0 1 6.957497 0.000000 14574 +unright 0 1 6.957497 0.000000 14575 +astheir 0 1 6.957497 0.000000 14576 +saviour 0 1 6.957497 0.000000 14577 +gratefulli 0 1 6.957497 0.000000 14578 +redempt 0 1 6.957497 0.000000 14579 +fortheir 0 1 6.957497 0.000000 14580 +justifi 0 1 6.957497 0.000000 14581 +roman 0 1 6.957497 0.000000 14582 +thecontrari 0 1 6.957497 0.000000 14583 +givesu 0 1 6.957497 0.000000 14584 +physician 0 1 6.957497 0.000000 14585 +onour 0 1 6.957497 0.000000 14586 +thetruth 0 1 6.957497 0.000000 14587 +thankgod 0 1 6.957497 0.000000 14588 +wkmak 0 1 6.957497 0.000000 14589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..38560b2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +email 1 220 1.386294 1.386294 29 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +network 1 168 1.791759 1.791759 61 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +area 0 144 1.945910 0.000000 80 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +machin 1 129 2.079442 2.079442 95 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +topic 1 114 2.197225 2.197225 110 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +well 0 109 2.197225 0.000000 121 +user 0 104 2.302585 0.000000 137 +comment 1 93 2.397895 2.397895 146 +pictur 1 89 2.397895 2.397895 160 +graphic 0 90 2.397895 0.000000 147 +real 0 93 2.397895 0.000000 144 +question 0 91 2.397895 0.000000 141 +homepag 0 93 2.397895 0.000000 148 +associ 0 93 2.397895 0.000000 151 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +educ 1 86 2.484907 2.484907 191 +control 1 82 2.484907 2.484907 164 +member 1 84 2.484907 2.484907 165 +start 0 83 2.484907 0.000000 173 +wide 0 84 2.484907 0.000000 185 +interfac 1 79 2.564949 2.564949 209 +state 0 76 2.564949 0.000000 207 +intellig 1 72 2.639057 2.639057 225 +onlin 1 75 2.639057 2.639057 223 +multimedia 0 68 2.708050 0.000000 258 +artifici 0 63 2.772589 0.000000 280 +organ 0 65 2.772589 0.000000 265 +taylor 0 63 2.772589 0.000000 287 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +explor 1 58 2.890372 2.890372 324 +reason 1 57 2.890372 2.890372 318 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +approach 0 48 3.044522 0.000000 366 +frequent 0 49 3.044522 0.000000 367 +physic 0 47 3.091042 0.000000 377 +vision 0 41 3.218876 0.000000 430 +fast 0 42 3.218876 0.000000 429 +movi 0 40 3.258097 0.000000 459 +robot 3 36 3.367296 10.101888 497 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +rang 1 30 3.555348 3.555348 565 +neural 0 30 3.555348 0.000000 578 +common 0 30 3.555348 0.000000 574 +built 1 29 3.583519 3.583519 592 +semant 1 29 3.583519 3.583519 587 +art 1 29 3.583519 3.583519 593 +ask 0 28 3.610918 0.000000 597 +manipul 1 27 3.637586 3.637586 624 +doctor 0 24 3.761200 0.000000 709 +yahoo 0 24 3.761200 0.000000 707 +mobil 1 23 3.806662 3.806662 730 +miscellan 0 23 3.806662 0.000000 731 +hierarchi 1 22 3.850148 3.850148 744 +navig 1 21 3.912023 3.912023 796 +unit 0 21 3.912023 0.000000 779 +love 0 21 3.912023 0.000000 804 +offici 0 18 4.060443 0.000000 894 +spatial 1 16 4.174387 4.174387 988 +commerci 0 16 4.174387 0.000000 1005 +remot 0 15 4.248495 0.000000 1041 +embed 0 14 4.317488 0.000000 1102 +hotlist 1 13 4.382027 4.382027 1199 +primarili 0 13 4.382027 0.000000 1185 +qualit 1 11 4.553877 4.553877 1362 +player 0 11 4.553877 0.000000 1371 +hello 0 10 4.653960 0.000000 1407 +catalog 0 10 4.653960 0.000000 1431 +meta 0 9 4.753590 0.000000 1505 +ring 1 8 4.875197 4.875197 1684 +guitar 1 8 4.875197 4.875197 1758 +autonom 0 8 4.875197 0.000000 1749 +sensor 1 7 5.010635 5.010635 1920 +spot 0 7 5.010635 0.000000 1894 +usenet 0 7 5.010635 0.000000 1839 +race 0 5 5.347108 0.000000 2417 +car 1 4 5.568345 5.568345 2931 +worki 0 4 5.568345 0.000000 3010 +fora 0 4 5.568345 0.000000 2697 +ncsa 0 4 5.568345 0.000000 2767 +motor 0 3 5.857933 0.000000 3909 +badminton 1 2 6.263398 6.263398 5221 +martial 1 2 6.263398 6.263398 5004 +worm 0 2 6.263398 0.000000 5775 +eduperson 0 2 6.263398 0.000000 5776 +ultrason 1 1 6.957497 6.957497 14590 +rhino 1 1 6.957497 6.957497 14591 +robokreta 1 1 6.957497 6.957497 14592 +wyle 1 1 6.957497 6.957497 14593 +clarinet 1 1 6.957497 6.957497 14594 +mobilerobot 0 1 6.957497 0.000000 14595 +rover 0 1 6.957497 0.000000 14596 +tall 0 1 6.957497 0.000000 14597 +robocac 0 1 6.957497 0.000000 14598 +robofest 0 1 6.957497 0.000000 14599 +besar 0 1 6.957497 0.000000 14600 +kicik 0 1 6.957497 0.000000 14601 +chassi 0 1 6.957497 0.000000 14602 +andqualit 0 1 6.957497 0.000000 14603 +malaysia 0 1 6.957497 0.000000 14604 +interestsavid 0 1 6.957497 0.000000 14605 +usba 0 1 6.957497 0.000000 14606 +miscellaneousinterest 0 1 6.957497 0.000000 14607 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..3b92530e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +utexa 0 189 1.609438 0.000000 44 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +austin 0 168 1.791759 0.000000 63 +west 0 83 2.484907 0.000000 192 +xfeng 0 2 6.263398 0.000000 4376 +qaustin 0 1 6.957497 0.000000 14608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..62776409 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +welcom 0 122 2.079442 0.000000 99 +well 1 109 2.197225 2.197225 121 +find 0 111 2.197225 0.000000 111 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +present 0 91 2.397895 0.000000 145 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +nation 0 74 2.639057 0.000000 240 +line 0 75 2.639057 0.000000 231 +multimedia 0 68 2.708050 0.000000 258 +creat 1 63 2.772589 2.772589 277 +major 0 56 2.890372 0.000000 315 +visual 0 48 3.044522 0.000000 372 +right 0 48 3.044522 0.000000 363 +frequent 0 49 3.044522 0.000000 367 +pointer 0 48 3.044522 0.000000 368 +around 0 43 3.178054 0.000000 415 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +china 0 37 3.332205 0.000000 487 +soon 0 36 3.367296 0.000000 494 +express 0 32 3.465736 0.000000 540 +travel 1 30 3.555348 3.555348 579 +hard 0 30 3.555348 0.000000 563 +american 0 27 3.637586 0.000000 634 +sport 0 25 3.737670 0.000000 683 +head 0 23 3.806662 0.000000 732 +watch 0 21 3.912023 0.000000 789 +unit 0 21 3.912023 0.000000 779 +goe 0 15 4.248495 0.000000 1044 +club 0 15 4.248495 0.000000 1058 +classic 0 14 4.317488 0.000000 1084 +audio 0 14 4.317488 0.000000 1094 +rank 0 14 4.317488 0.000000 1063 +hopefulli 0 14 4.317488 0.000000 1071 +pretti 0 13 4.382027 0.000000 1191 +walk 0 12 4.465908 0.000000 1281 +newspap 0 12 4.465908 0.000000 1280 +string 0 11 4.553877 0.000000 1340 +keyword 0 11 4.553877 0.000000 1356 +vista 0 10 4.653960 0.000000 1452 +card 0 10 4.653960 0.000000 1435 +hang 0 9 4.753590 0.000000 1499 +soccer 0 8 4.875197 0.000000 1752 +surpris 1 7 5.010635 5.010635 1828 +harrick 0 7 5.010635 0.000000 1849 +photographi 0 6 5.164786 0.000000 2146 +infoseek 0 6 5.164786 0.000000 2188 +financi 0 6 5.164786 0.000000 2197 +atlant 0 5 5.347108 0.000000 2508 +alta 0 4 5.568345 0.000000 3039 +leagu 0 4 5.568345 0.000000 3040 +aswel 0 3 5.857933 0.000000 3286 +serious 0 3 5.857933 0.000000 3663 +monthli 0 3 5.857933 0.000000 3910 +imagin 0 2 6.263398 0.000000 5472 +clearer 0 2 6.263398 0.000000 5676 +marvel 0 2 6.263398 0.000000 5400 +morn 0 2 6.263398 0.000000 5162 +xingang 1 1 6.957497 6.957497 14609 +delight 0 1 6.957497 0.000000 14610 +temporaryresort 0 1 6.957497 0.000000 14611 +llgradual 0 1 6.957497 0.000000 14612 +havesometh 0 1 6.957497 0.000000 14613 +foliag 0 1 6.957497 0.000000 14614 +miata 0 1 6.957497 0.000000 14615 +xguo 0 1 6.957497 0.000000 14616 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..13d6be03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +address 0 170 1.791759 0.000000 62 +geoffrei 0 3 5.857933 0.000000 3505 +pagemov 0 1 6.957497 0.000000 14617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..90315019 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +yang 1 8 4.875197 4.875197 1652 +alert 0 5 5.347108 0.000000 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..a29735d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +also 0 259 1.386294 0.000000 28 +utexa 0 189 1.609438 0.000000 44 +austin 1 168 1.791759 1.791759 63 +contact 0 153 1.791759 0.000000 59 +texa 0 160 1.791759 0.000000 64 +first 0 140 1.945910 0.000000 71 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +make 0 111 2.197225 0.000000 120 +sinc 0 90 2.397895 0.000000 159 +homepag 0 93 2.397895 0.000000 148 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +david 0 71 2.639057 0.000000 232 +nation 0 74 2.639057 0.000000 240 +street 0 63 2.772589 0.000000 293 +still 0 50 3.044522 0.000000 362 +life 0 50 3.044522 0.000000 375 +get 0 46 3.091042 0.000000 380 +favorit 0 44 3.135494 0.000000 410 +must 0 40 3.258097 0.000000 442 +word 0 34 3.401197 0.000000 508 +quot 0 29 3.583519 0.000000 582 +art 0 29 3.583519 0.000000 593 +hope 0 28 3.610918 0.000000 610 +utc 1 27 3.637586 3.637586 629 +team 0 27 3.637586 0.000000 625 +daili 0 24 3.761200 0.000000 706 +busi 0 21 3.912023 0.000000 784 +tell 0 21 3.912023 0.000000 777 +beauti 0 18 4.060443 0.000000 912 +sept 0 17 4.110874 0.000000 952 +took 0 16 4.174387 0.000000 1010 +classic 1 14 4.317488 4.317488 1084 +hong 0 14 4.317488 0.000000 1105 +stori 0 14 4.317488 0.000000 1087 +introduc 0 13 4.382027 0.000000 1139 +outsid 0 12 4.465908 0.000000 1219 +franc 0 12 4.465908 0.000000 1276 +hello 0 10 4.653960 0.000000 1407 +rich 0 10 4.653960 0.000000 1396 +label 0 10 4.653960 0.000000 1423 +establish 0 9 4.753590 0.000000 1532 +kong 0 9 4.753590 0.000000 1602 +french 0 9 4.753590 0.000000 1511 +guitar 1 8 4.875197 4.875197 1758 +grew 0 8 4.875197 0.000000 1742 +corner 1 7 5.010635 5.010635 1909 +footbal 0 7 5.010635 0.000000 1912 +whatev 0 6 5.164786 0.000000 2097 +artist 0 6 5.164786 0.000000 2127 +seriou 0 5 5.347108 0.000000 2252 +christoph 0 5 5.347108 0.000000 2512 +festiv 0 4 5.568345 0.000000 2952 +tire 0 4 5.568345 0.000000 2799 +align 0 4 5.568345 0.000000 2863 +concert 0 3 5.857933 0.000000 3533 +byth 0 3 5.857933 0.000000 3874 +jesu 0 3 5.857933 0.000000 3624 +passion 0 3 5.857933 0.000000 3633 +michel 0 3 5.857933 0.000000 3791 +medit 0 2 6.263398 0.000000 5777 +retir 0 2 6.263398 0.000000 5674 +christ 0 2 6.263398 0.000000 5766 +wwwdavid 0 1 6.957497 0.000000 14618 +assad 0 1 6.957497 0.000000 14619 +brothersin 0 1 6.957497 0.000000 14620 +parkeningi 0 1 6.957497 0.000000 14621 +guitarist 0 1 6.957497 0.000000 14622 +ofconcert 0 1 6.957497 0.000000 14623 +reconcili 0 1 6.957497 0.000000 14624 +rekindl 0 1 6.957497 0.000000 14625 +theamsterdam 0 1 6.957497 0.000000 14626 +trio 0 1 6.957497 0.000000 14627 +flair 0 1 6.957497 0.000000 14628 +platini 0 1 6.957497 0.000000 14629 +magazinepublish 0 1 6.957497 0.000000 14630 +minist 0 1 6.957497 0.000000 14631 +absmiddl 0 1 6.957497 0.000000 14632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..564184fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +document 0 121 2.079442 0.000000 89 +version 0 113 2.197225 0.000000 122 +messag 0 76 2.564949 0.000000 212 +browser 0 56 2.890372 0.000000 313 +could 0 46 3.091042 0.000000 383 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +challeng 0 26 3.688879 0.000000 653 +frame 1 24 3.761200 3.761200 684 +navig 0 21 3.912023 0.000000 796 +see 0 11 4.553877 0.000000 1337 +alert 0 5 5.347108 0.000000 2555 +yanbin 0 2 6.263398 0.000000 5599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..3d99fc56 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +us 0 329 1.098612 0.000000 16 +click 0 142 1.945910 0.000000 78 +pleas 0 113 2.197225 0.000000 114 +browser 0 56 2.890372 0.000000 313 +continu 0 39 3.258097 0.000000 448 +oop 0 8 4.875197 0.000000 1778 +yuan 0 3 5.857933 0.000000 3653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..d33d1a74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +network 0 168 1.791759 0.000000 61 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +number 0 130 2.079442 0.000000 97 +mathemat 0 108 2.197225 0.000000 123 +place 0 106 2.197225 0.000000 124 +sinc 0 90 2.397895 0.000000 159 +state 0 76 2.564949 0.000000 207 +servic 1 72 2.639057 2.639057 236 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +china 1 37 3.332205 3.332205 487 +travel 0 30 3.555348 0.000000 579 +postal 0 30 3.555348 0.000000 580 +unit 1 21 3.912023 3.912023 779 +beij 1 19 4.007333 4.007333 876 +beauti 0 18 4.060443 0.000000 912 +wife 0 13 4.382027 0.000000 1196 +tsinghua 0 13 4.382027 0.000000 1195 +stai 0 12 4.465908 0.000000 1215 +jersei 0 9 4.753590 0.000000 1587 +heavi 0 7 5.010635 0.000000 1841 +river 0 6 5.164786 0.000000 2220 +yong 1 4 5.568345 5.568345 2809 +rutger 0 3 5.857933 0.000000 3566 +brunswick 0 3 5.857933 0.000000 3567 +settl 0 2 6.263398 0.000000 5778 +homepageto 0 1 6.957497 0.000000 14633 +homepagey 0 1 6.957497 0.000000 14634 +milanitalian 0 1 6.957497 0.000000 14635 +soccerk 0 1 6.957497 0.000000 14636 +soccernba 0 1 6.957497 0.000000 14637 +sitefox 0 1 6.957497 0.000000 14638 +sportschicago 0 1 6.957497 0.000000 14639 +bullsmichael 0 1 6.957497 0.000000 14640 +jordannflnhlc 0 1 6.957497 0.000000 14641 +rankingmarri 0 1 6.957497 0.000000 14642 +childrenseinfeldcomput 0 1 6.957497 0.000000 14643 +sciencesutilitieshtml 0 1 6.957497 0.000000 14644 +convertersimag 0 1 6.957497 0.000000 14645 +collectionssystemshtmllatexcgitcl 0 1 6.957497 0.000000 14646 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 0 1 6.957497 0.000000 14647 +tmiscinternet 0 1 6.957497 0.000000 14648 +parcel 0 1 6.957497 0.000000 14649 +usp 0 1 6.957497 0.000000 14650 +fedexus 0 1 6.957497 0.000000 14651 +guidefun 0 1 6.957497 0.000000 14652 +todayu 0 1 6.957497 0.000000 14653 +newsstarwavesupermodel 0 1 6.957497 0.000000 14654 +yonglu 0 1 6.957497 0.000000 14655 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..7f895c29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +professor 1 137 1.945910 1.945910 76 +architectur 0 139 1.945910 0.000000 77 +area 0 144 1.945910 0.000000 80 +analysi 1 124 2.079442 2.079442 98 +high 0 130 2.079442 0.000000 101 +mathemat 1 108 2.197225 2.197225 123 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +memori 0 101 2.302585 0.000000 139 +associ 1 93 2.397895 2.397895 151 +search 1 95 2.397895 2.397895 155 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +journal 1 83 2.484907 2.484907 183 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +level 0 87 2.484907 0.000000 180 +method 1 80 2.564949 2.564949 213 +david 1 71 2.639057 2.639057 232 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +servic 0 72 2.639057 0.000000 236 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +differ 0 66 2.708050 0.000000 253 +degre 0 69 2.708050 0.000000 259 +previou 0 62 2.772589 0.000000 290 +share 0 59 2.833213 0.000000 304 +sever 1 56 2.890372 2.890372 322 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +scientif 0 53 2.944439 0.000000 341 +profession 0 51 2.995732 0.000000 345 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +linear 1 41 3.218876 3.218876 431 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +press 0 42 3.218876 0.000000 419 +procedur 0 36 3.367296 0.000000 488 +award 1 34 3.401197 3.401197 523 +committe 0 34 3.401197 0.000000 522 +next 0 34 3.401197 0.000000 517 +board 0 33 3.433987 0.000000 528 +john 0 33 3.433987 0.000000 532 +profil 1 30 3.555348 3.555348 581 +chair 0 29 3.583519 0.000000 596 +focus 0 29 3.583519 0.000000 584 +packag 0 28 3.610918 0.000000 614 +american 1 27 3.637586 3.637586 634 +supercomput 0 25 3.737670 0.000000 681 +fellow 0 24 3.761200 0.000000 701 +equat 1 23 3.806662 3.806662 724 +honor 0 23 3.806662 0.000000 729 +variabl 0 23 3.806662 0.000000 715 +director 0 22 3.850148 0.000000 767 +siam 0 21 3.912023 0.000000 800 +smith 0 20 3.951244 0.000000 820 +partial 0 18 4.060443 0.000000 900 +differenti 0 17 4.110874 0.000000 921 +young 1 16 4.174387 4.174387 991 +spars 1 16 4.174387 4.174387 989 +vector 0 16 4.174387 0.000000 961 +contribut 0 15 4.248495 0.000000 1021 +researchmi 0 14 4.317488 0.000000 1119 +finit 0 14 4.317488 0.000000 1106 +polynomi 0 14 4.317488 0.000000 1069 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +suitabl 0 9 4.753590 0.000000 1486 +carei 1 8 4.875197 4.875197 1781 +harvard 0 7 5.010635 0.000000 1926 +converg 0 7 5.010635 0.000000 1844 +solver 0 7 5.010635 0.000000 1911 +outstand 0 6 5.164786 0.000000 2136 +argonn 0 5 5.347108 0.000000 2461 +singapor 0 5 5.347108 0.000000 2487 +ration 0 5 5.347108 0.000000 2427 +minneapoli 0 5 5.347108 0.000000 2480 +crai 1 4 5.568345 5.568345 3012 +naval 0 4 5.568345 0.000000 2920 +rapidli 0 4 5.568345 0.000000 2850 +graham 0 4 5.568345 0.000000 2817 +wilei 0 4 5.568345 0.000000 2669 +truste 0 3 5.857933 0.000000 3900 +stationari 0 3 5.857933 0.000000 3861 +kincaid 1 2 6.263398 6.263398 5617 +ofmathemat 0 2 6.263398 0.000000 4167 +interestnumer 0 2 6.263398 0.000000 5717 +methodsand 0 2 6.263398 0.000000 5779 +itpack 0 2 6.263398 0.000000 5619 +rassia 0 2 6.263398 0.000000 5620 +omega 0 2 6.263398 0.000000 4368 +pde 0 2 6.263398 0.000000 4505 +youngashbel 0 1 6.957497 0.000000 14656 +webb 0 1 6.957497 0.000000 14657 +issueded 0 1 6.957497 0.000000 14658 +mathematicalsocieti 0 1 6.957497 0.000000 14659 +matrixappl 0 1 6.957497 0.000000 14660 +numericallinear 0 1 6.957497 0.000000 14661 +partialdifferenti 0 1 6.957497 0.000000 14662 +oflinear 0 1 6.957497 0.000000 14663 +andspars 0 1 6.957497 0.000000 14664 +basedon 0 1 6.957497 0.000000 14665 +beingextend 0 1 6.957497 0.000000 14666 +distributedmemori 0 1 6.957497 0.000000 14667 +methodsbas 0 1 6.957497 0.000000 14668 +multilevel 0 1 6.957497 0.000000 14669 +beingdevelop 0 1 6.957497 0.000000 14670 +publicationsd 0 1 6.957497 0.000000 14671 +srivasiava 0 1 6.957497 0.000000 14672 +yanushauska 0 1 6.957497 0.000000 14673 +publ 0 1 6.957497 0.000000 14674 +vona 0 1 6.957497 0.000000 14675 +sepehrnoori 0 1 6.957497 0.000000 14676 +son 0 1 6.957497 0.000000 14677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..7701a95b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +email 0 220 1.386294 0.000000 29 +mail 0 238 1.386294 0.000000 22 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +report 1 131 2.079442 2.079442 92 +welcom 0 122 2.079442 0.000000 99 +structur 1 106 2.197225 2.197225 105 +check 0 115 2.197225 0.000000 118 +find 0 111 2.197225 0.000000 111 +book 1 99 2.302585 2.302585 131 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +comment 0 93 2.397895 0.000000 146 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +master 0 76 2.564949 0.000000 216 +html 0 75 2.639057 0.000000 235 +summari 0 73 2.639057 0.000000 237 +august 1 66 2.708050 2.708050 257 +differ 0 66 2.708050 0.000000 253 +organ 1 65 2.772589 2.772589 265 +written 1 63 2.772589 2.772589 278 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +dept 0 64 2.772589 0.000000 291 +septemb 0 65 2.772589 0.000000 274 +thesi 0 57 2.890372 0.000000 327 +digit 1 52 2.995732 2.995732 348 +maintain 0 51 2.995732 0.000000 342 +visitor 0 49 3.044522 0.000000 371 +featur 1 46 3.091042 3.091042 386 +electron 0 47 3.091042 0.000000 379 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +press 0 42 3.218876 0.000000 419 +multipl 0 39 3.258097 0.000000 453 +map 0 39 3.258097 0.000000 452 +hand 1 37 3.332205 3.332205 475 +connect 0 37 3.332205 0.000000 485 +within 0 33 3.433987 0.000000 525 +ad 0 32 3.465736 0.000000 544 +extend 0 32 3.465736 0.000000 539 +photo 0 31 3.496508 0.000000 561 +neural 1 30 3.555348 3.555348 578 +synchron 0 29 3.583519 0.000000 588 +utc 1 27 3.637586 3.637586 629 +repres 1 26 3.688879 3.688879 656 +recognit 1 23 3.806662 3.806662 723 +self 1 22 3.850148 3.850148 761 +newsgroup 0 21 3.912023 0.000000 783 +event 0 18 4.060443 0.000000 896 +interconnect 1 17 4.110874 4.110874 937 +segment 1 17 4.110874 4.110874 931 +outlin 0 17 4.110874 0.000000 914 +cambridg 0 16 4.174387 0.000000 1008 +later 1 15 4.248495 4.248495 1043 +total 0 10 4.653960 0.000000 1398 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +isbn 0 7 5.010635 0.000000 1901 +bunch 0 7 5.010635 0.000000 1861 +joseph 1 5 5.347108 5.347108 2327 +korea 0 4 5.568345 0.000000 2971 +cortex 1 3 5.857933 5.857933 3856 +seoul 0 3 5.857933 0.000000 3783 +cortic 0 3 5.857933 0.000000 3857 +neuron 0 3 5.857933 0.000000 3798 +yoonsuck 2 2 6.263398 12.526796 4177 +choe 2 2 6.263398 12.526796 4178 +lissom 1 2 6.263398 6.263398 5605 +sirosh 1 2 6.263398 6.263398 5609 +yschoe 1 2 6.263398 6.263398 4179 +touretzki 0 2 6.263398 0.000000 4428 +spike 1 1 6.957497 6.957497 14678 +yonsei 0 1 6.957497 0.000000 14679 +systembas 0 1 6.957497 0.000000 14680 +laterali 0 1 6.957497 0.000000 14681 +synerget 0 1 6.957497 0.000000 14682 +actualspik 0 1 6.957497 0.000000 14683 +slissom 0 1 6.957497 0.000000 14684 +beself 0 1 6.957497 0.000000 14685 +retinabi 0 1 6.957497 0.000000 14686 +desynchron 0 1 6.957497 0.000000 14687 +mozer 0 1 6.957497 0.000000 14688 +hasselmo 0 1 6.957497 0.000000 14689 +handwritten 0 1 6.957497 0.000000 14690 +techic 0 1 6.957497 0.000000 14691 +unord 0 1 6.957497 0.000000 14692 +interestingcontact 0 1 6.957497 0.000000 14693 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..eda813cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +student 0 343 1.098612 0.000000 19 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +develop 0 174 1.791759 0.000000 53 +object 0 138 1.945910 0.000000 79 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +mathemat 0 108 2.197225 0.000000 123 +select 0 91 2.397895 0.000000 154 +associ 0 93 2.397895 0.000000 151 +institut 0 84 2.484907 0.000000 187 +orient 0 80 2.564949 0.000000 205 +server 0 76 2.564949 0.000000 204 +interfac 0 79 2.564949 0.000000 209 +logic 0 71 2.639057 0.000000 230 +java 1 70 2.708050 2.708050 248 +previou 0 62 2.772589 0.000000 290 +prof 0 64 2.772589 0.000000 273 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +tabl 0 51 2.995732 0.000000 346 +china 1 37 3.332205 3.332205 487 +common 0 30 3.555348 0.000000 574 +chines 0 29 3.583519 0.000000 595 +client 0 25 3.737670 0.000000 679 +yahoo 0 24 3.761200 0.000000 707 +famili 1 23 3.806662 3.806662 735 +alumni 0 21 3.912023 0.000000 807 +wang 0 21 3.912023 0.000000 790 +chen 0 21 3.912023 0.000000 791 +break 0 20 3.951244 0.000000 812 +synthesi 0 20 3.951244 0.000000 834 +beij 0 19 4.007333 0.000000 876 +brown 0 16 4.174387 0.000000 977 +universityof 0 15 4.248495 0.000000 1061 +hong 0 14 4.317488 0.000000 1105 +nick 0 13 4.382027 0.000000 1180 +perl 0 11 4.553877 0.000000 1332 +engr 0 10 4.653960 0.000000 1427 +academi 0 8 4.875197 0.000000 1735 +yang 0 8 4.875197 0.000000 1652 +gatewai 0 7 5.010635 0.000000 1942 +munich 0 3 5.857933 0.000000 3570 +yuan 0 3 5.857933 0.000000 3653 +manchest 1 2 6.263398 6.263398 4828 +addresspictur 0 2 6.263398 0.000000 5584 +schedulec 0 2 6.263398 0.000000 4190 +gang 0 2 6.263398 0.000000 4530 +yuanj 0 1 6.957497 0.000000 14694 +xuwint 0 1 6.957497 0.000000 14695 +aziz 0 1 6.957497 0.000000 14696 +pagechines 0 1 6.957497 0.000000 14697 +hefei 0 1 6.957497 0.000000 14698 +chinaunivers 0 1 6.957497 0.000000 14699 +atmunich 0 1 6.957497 0.000000 14700 +germanyunivers 0 1 6.957497 0.000000 14701 +higham 0 1 6.957497 0.000000 14702 +lifan 0 1 6.957497 0.000000 14703 +guizhongustc 0 1 6.957497 0.000000 14704 +hailiang 0 1 6.957497 0.000000 14705 +yuhongfriend 0 1 6.957497 0.000000 14706 +linsoftwar 0 1 6.957497 0.000000 14707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..ec16bb8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +last 0 314 1.098612 0.000000 14 +utexa 0 189 1.609438 0.000000 44 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +number 0 130 2.079442 0.000000 97 +sinc 0 90 2.397895 0.000000 159 +visitor 0 49 3.044522 0.000000 371 +ring 0 8 4.875197 0.000000 1684 +edufing 0 4 5.568345 0.000000 2713 +feng 1 3 5.857933 5.857933 3300 +yufeng 0 1 6.957497 0.000000 14708 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..beef8f17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +fall 1 181 1.609438 1.609438 40 +utexa 0 189 1.609438 0.000000 44 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 0 160 1.791759 0.000000 64 +contact 0 153 1.791759 0.000000 59 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +spring 1 131 2.079442 2.079442 88 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +peopl 0 96 2.302585 0.000000 132 +associ 1 93 2.397895 2.397895 151 +sinc 0 90 2.397895 0.000000 159 +wide 0 84 2.484907 0.000000 185 +info 0 85 2.484907 0.000000 176 +resum 1 79 2.564949 2.564949 217 +master 0 76 2.564949 0.000000 216 +view 0 70 2.708050 0.000000 254 +differ 0 66 2.708050 0.000000 253 +creat 0 63 2.772589 0.000000 277 +maintain 0 51 2.995732 0.000000 342 +format 0 48 3.044522 0.000000 356 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +still 0 50 3.044522 0.000000 362 +visitor 0 49 3.044522 0.000000 371 +china 1 37 3.332205 3.332205 487 +copyright 0 36 3.367296 0.000000 495 +john 0 33 3.433987 0.000000 532 +kind 0 32 3.465736 0.000000 541 +anim 1 31 3.496508 3.496508 557 +chines 1 29 3.583519 3.583519 595 +full 0 28 3.610918 0.000000 615 +thank 0 23 3.806662 0.000000 721 +chen 1 21 3.912023 3.912023 791 +els 0 19 4.007333 0.000000 843 +seek 0 17 4.110874 0.000000 954 +misc 0 13 4.382027 0.000000 1124 +calculu 0 12 4.465908 0.000000 1203 +counter 0 8 4.875197 0.000000 1765 +accord 0 7 5.010635 0.000000 1826 +republ 0 4 5.568345 0.000000 3032 +zhongshan 1 2 6.263398 6.263398 5547 +chenabout 0 2 6.263398 0.000000 5499 +postcript 0 2 6.263398 0.000000 4050 +zodiac 0 2 6.263398 0.000000 5729 +burton 0 2 6.263398 0.000000 5759 +zhii 1 1 6.957497 6.957497 14709 +guangzhou 1 1 6.957497 6.957497 14710 +pagezhii 0 1 6.957497 0.000000 14711 +mefrom 0 1 6.957497 0.000000 14712 +canton 0 1 6.957497 0.000000 14713 +dong 0 1 6.957497 0.000000 14714 +zchen 0 1 6.957497 0.000000 14715 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..b4e25c1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +modifi 0 178 1.609438 0.000000 35 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +algorithm 0 162 1.791759 0.000000 57 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +phone 0 175 1.791759 0.000000 45 +address 0 170 1.791759 0.000000 62 +process 0 142 1.945910 0.000000 72 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +welcom 0 122 2.079442 0.000000 99 +world 1 115 2.197225 2.197225 126 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +commun 0 95 2.397895 0.000000 157 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +internet 1 83 2.484907 2.484907 186 +educ 0 86 2.484907 0.000000 191 +ieee 0 86 2.484907 0.000000 190 +librari 0 87 2.484907 0.000000 181 +build 0 85 2.484907 0.000000 184 +onlin 1 75 2.639057 2.639057 223 +html 0 75 2.639057 0.000000 235 +multimedia 0 68 2.708050 0.000000 258 +main 0 67 2.708050 0.000000 256 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +guid 0 63 2.772589 0.000000 267 +taylor 0 63 2.772589 0.000000 287 +room 0 59 2.833213 0.000000 301 +life 0 50 3.044522 0.000000 375 +directori 0 45 3.135494 0.000000 396 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +movi 0 40 3.258097 0.000000 459 +societi 0 40 3.258097 0.000000 456 +china 1 37 3.332205 3.332205 487 +chines 0 29 3.583519 0.000000 595 +campu 1 27 3.637586 3.637586 623 +magazin 1 24 3.761200 3.761200 704 +daili 0 24 3.761200 0.000000 706 +yahoo 0 24 3.761200 0.000000 707 +beij 0 19 4.007333 0.000000 876 +lyco 0 19 4.007333 0.000000 871 +beauti 0 18 4.060443 0.000000 912 +sept 0 17 4.110874 0.000000 952 +script 0 13 4.382027 0.000000 1171 +land 0 12 4.465908 0.000000 1273 +entertain 0 12 4.465908 0.000000 1286 +career 0 12 4.465908 0.000000 1287 +sciencesat 0 7 5.010635 0.000000 1968 +digest 0 7 5.010635 0.000000 1864 +zhou 1 6 5.164786 5.164786 2092 +assistantship 0 3 5.857933 0.000000 3660 +stamp 0 3 5.857933 0.000000 3678 +giant 0 3 5.857933 0.000000 3137 +galaxi 0 3 5.857933 0.000000 3603 +kaleidoscop 0 2 6.263398 0.000000 5780 +zhai 0 2 6.263398 0.000000 5709 +zhouxiao 1 1 6.957497 6.957497 14716 +maggi 1 1 6.957497 6.957497 14717 +xiao 1 1 6.957497 6.957497 14718 +buaa 0 1 6.957497 0.000000 14719 +newspag 0 1 6.957497 0.000000 14720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..b032f02c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +utexa 0 189 1.609438 0.000000 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +like 0 132 1.945910 0.000000 81 +construct 0 139 1.945910 0.000000 82 +document 1 121 2.079442 2.079442 89 +welcom 0 122 2.079442 0.000000 99 +number 0 130 2.079442 0.000000 97 +site 1 106 2.197225 2.197225 119 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +peopl 0 96 2.302585 0.000000 132 +homepag 1 93 2.397895 2.397895 148 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +dynam 1 76 2.564949 2.564949 194 +know 1 80 2.564949 2.564949 198 +meet 0 72 2.639057 0.000000 229 +html 0 75 2.639057 0.000000 235 +degre 0 69 2.708050 0.000000 259 +best 0 59 2.833213 0.000000 299 +maintain 0 51 2.995732 0.000000 342 +friend 1 48 3.044522 3.044522 376 +visitor 0 49 3.044522 0.000000 371 +netscap 1 44 3.135494 3.135494 395 +anoth 0 45 3.135494 0.000000 408 +live 0 40 3.258097 0.000000 451 +china 1 37 3.332205 3.332205 487 +titl 0 31 3.496508 0.000000 556 +travel 0 30 3.555348 0.000000 579 +enjoi 0 26 3.688879 0.000000 660 +inth 0 22 3.850148 0.000000 741 +born 0 21 3.912023 0.000000 798 +alumni 0 21 3.912023 0.000000 807 +beij 0 19 4.007333 0.000000 876 +citi 0 19 4.007333 0.000000 874 +lot 0 18 4.060443 0.000000 889 +former 1 17 4.110874 4.110874 956 +bachelor 0 17 4.110874 0.000000 957 +danc 0 12 4.465908 0.000000 1278 +classmat 1 9 4.753590 4.753590 1516 +capit 0 7 5.010635 0.000000 1957 +peek 0 6 5.164786 0.000000 2169 +peke 1 5 5.347108 5.347108 2539 +valuabl 0 5 5.347108 0.000000 2256 +qing 0 3 5.857933 0.000000 3295 +ofmi 0 3 5.857933 0.000000 3911 +sciencesaustin 0 3 5.857933 0.000000 3828 +oversea 0 2 6.263398 0.000000 5781 +informationand 0 2 6.263398 0.000000 4840 +clike 1 1 6.957497 6.957497 14721 +qinghi 0 1 6.957497 0.000000 14722 +scinc 0 1 6.957497 0.000000 14723 +findmor 0 1 6.957497 0.000000 14724 +pekingunivers 0 1 6.957497 0.000000 14725 +professionalinternetpc 0 1 6.957497 0.000000 14726 +relatedmac 0 1 6.957497 0.000000 14727 +relatedaft 0 1 6.957497 0.000000 14728 +worknetscap 0 1 6.957497 0.000000 14729 +testtwin 0 1 6.957497 0.000000 14730 +eldertwin 0 1 6.957497 0.000000 14731 +youngernetscap 0 1 6.957497 0.000000 14732 +testanim 0 1 6.957497 0.000000 14733 +titledanc 0 1 6.957497 0.000000 14734 +testanoth 0 1 6.957497 0.000000 14735 +testyet 0 1 6.957497 0.000000 14736 +testfriendsthi 0 1 6.957497 0.000000 14737 +xiaohai 0 1 6.957497 0.000000 14738 +shan 0 1 6.957497 0.000000 14739 +shinan 0 1 6.957497 0.000000 14740 +qingunivers 0 1 6.957497 0.000000 14741 +zhuqe 0 1 6.957497 0.000000 14742 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..b6efedb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +click 0 142 1.945910 0.000000 78 +technolog 0 131 2.079442 0.000000 102 +seattl 0 120 2.079442 0.000000 103 +intern 0 108 2.197225 0.000000 128 +peopl 0 96 2.302585 0.000000 132 +comment 0 93 2.397895 0.000000 146 +educ 0 86 2.484907 0.000000 191 +refer 0 78 2.564949 0.000000 203 +nation 0 74 2.639057 0.000000 240 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +laboratori 0 63 2.772589 0.000000 292 +faculti 1 56 2.890372 2.890372 325 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +visitor 0 49 3.044522 0.000000 371 +live 0 40 3.258097 0.000000 451 +staff 0 36 3.367296 0.000000 490 +autumn 0 31 3.496508 0.000000 558 +univ 0 28 3.610918 0.000000 617 +progress 0 28 3.610918 0.000000 598 +handl 0 24 3.761200 0.000000 685 +half 0 21 3.912023 0.000000 776 +voic 0 21 3.912023 0.000000 806 +region 1 19 4.007333 4.007333 875 +intel 0 16 4.174387 0.000000 1000 +webmast 0 15 4.248495 0.000000 1045 +desktop 0 10 4.653960 0.000000 1445 +perspect 0 10 4.653960 0.000000 1437 +deadlin 0 9 4.753590 0.000000 1502 +pacif 0 8 4.875197 0.000000 1674 +centuri 0 7 5.010635 0.000000 1935 +elsewher 0 5 5.347108 0.000000 2444 +highlight 0 5 5.347108 0.000000 2340 +medal 0 3 5.857933 0.000000 3912 +theimpact 0 3 5.857933 0.000000 3179 +exponenti 0 3 5.857933 0.000000 3529 +organizationsinclud 0 1 6.957497 0.000000 14743 +ouraffili 0 1 6.957497 0.000000 14744 +regioninclud 0 1 6.957497 0.000000 14745 +spotlightuwwin 0 1 6.957497 0.000000 14746 +programmingcontesttwovideo 0 1 6.957497 0.000000 14747 +initiativesourcolloquia 0 1 6.957497 0.000000 14748 +mbonemajordon 0 1 6.957497 0.000000 14749 +corporationdickkarp 0 1 6.957497 0.000000 14750 +scienceprofessionalmast 0 1 6.957497 0.000000 14751 +departmentoverview 0 1 6.957497 0.000000 14752 +staffposit 0 1 6.957497 0.000000 14753 +newscan 0 1 6.957497 0.000000 14754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..e9bbeb15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +data 0 170 1.791759 0.000000 49 +avail 0 169 1.791759 0.000000 48 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +postscript 1 131 2.079442 2.079442 90 +confer 0 126 2.079442 0.000000 100 +specif 0 106 2.197225 0.000000 106 +structur 0 106 2.197225 0.000000 105 +proceed 0 93 2.397895 0.000000 152 +imag 0 91 2.397895 0.000000 161 +appear 0 78 2.564949 0.000000 210 +effici 0 73 2.639057 0.000000 233 +unix 0 58 2.890372 0.000000 308 +special 0 56 2.890372 0.000000 320 +format 1 48 3.044522 3.044522 356 +fast 0 42 3.218876 0.000000 429 +multipl 0 39 3.258097 0.000000 453 +winter 0 36 3.367296 0.000000 500 +approxim 0 35 3.401197 0.000000 509 +measur 0 28 3.610918 0.000000 609 +retriev 0 27 3.637586 0.000000 621 +bookmark 0 26 3.688879 0.000000 639 +miscellan 0 23 3.806662 0.000000 731 +andrew 1 19 4.007333 4.007333 849 +beauti 0 18 4.060443 0.000000 912 +protect 0 17 4.110874 0.000000 935 +match 0 16 4.174387 0.000000 965 +wife 0 13 4.382027 0.000000 1196 +usenix 0 12 4.465908 0.000000 1240 +linda 0 10 4.653960 0.000000 1394 +distanc 0 9 4.753590 0.000000 1500 +erik 0 8 4.875197 0.000000 1701 +shapiro 0 8 4.875197 0.000000 1686 +daughter 0 7 5.010635 0.000000 1943 +spie 0 6 5.164786 0.000000 2119 +educomput 0 5 5.347108 0.000000 2524 +selberg 0 5 5.347108 0.000000 2441 +stupid 0 5 5.347108 0.000000 2489 +aberman 0 2 6.263398 0.000000 4429 +bourassa 0 2 6.263398 0.000000 5782 +virgil 0 2 6.263398 0.000000 5783 +melani 0 2 6.263398 0.000000 5784 +berman 1 1 6.957497 6.957497 14755 +debbi 1 1 6.957497 6.957497 14756 +pageandrew 0 1 6.957497 0.000000 14757 +tron 0 1 6.957497 0.000000 14758 +bothpostscript 0 1 6.957497 0.000000 14759 +andhtml 0 1 6.957497 0.000000 14760 +poison 0 1 6.957497 0.000000 14761 +donut 0 1 6.957497 0.000000 14762 +stupidmi 0 1 6.957497 0.000000 14763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..78ad31c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +recent 1 167 1.791759 1.791759 58 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +seattl 1 120 2.079442 2.079442 103 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +specif 0 106 2.197225 0.000000 106 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +good 0 77 2.564949 0.000000 200 +view 0 70 2.708050 0.000000 254 +visit 1 63 2.772589 2.772589 288 +plan 0 65 2.772589 0.000000 272 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +undergradu 0 54 2.944439 0.000000 338 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +physic 0 47 3.091042 0.000000 377 +made 1 44 3.135494 3.135494 398 +video 0 44 3.135494 0.000000 405 +live 0 40 3.258097 0.000000 451 +join 0 39 3.258097 0.000000 457 +post 0 35 3.401197 0.000000 505 +least 0 35 3.401197 0.000000 516 +someth 0 31 3.496508 0.000000 554 +photo 0 31 3.496508 0.000000 561 +taken 0 31 3.496508 0.000000 555 +pass 0 28 3.610918 0.000000 611 +team 0 27 3.637586 0.000000 625 +great 0 27 3.637586 0.000000 626 +doctor 0 24 3.761200 0.000000 709 +finish 0 22 3.850148 0.000000 748 +hous 0 21 3.912023 0.000000 801 +wrote 0 20 3.951244 0.000000 830 +excel 0 19 4.007333 0.000000 868 +adam 1 17 4.110874 4.110874 934 +ultim 0 17 4.110874 0.000000 943 +earli 0 16 4.174387 0.000000 968 +stock 0 16 4.174387 0.000000 1007 +across 0 16 4.174387 0.000000 974 +bodi 0 13 4.382027 0.000000 1178 +menu 0 13 4.382027 0.000000 1156 +night 0 11 4.553877 0.000000 1319 +thecomput 0 10 4.653960 0.000000 1408 +drink 0 9 4.753590 0.000000 1607 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +trade 0 7 5.010635 0.000000 1815 +occasion 0 7 5.010635 0.000000 1905 +coffe 1 5 5.347108 5.347108 2556 +hair 0 5 5.347108 0.000000 2446 +formerli 0 5 5.347108 0.000000 2397 +multiresolut 0 5 5.347108 0.000000 2423 +frisbe 0 5 5.347108 0.000000 2560 +glass 0 3 5.857933 0.000000 3759 +swarthmor 0 2 6.263398 0.000000 5621 +comet 0 2 6.263398 0.000000 5785 +sculptur 0 2 6.263398 0.000000 5176 +mona 0 2 6.263398 0.000000 5786 +gothic 0 2 6.263398 0.000000 5787 +finkelstein 1 1 6.957497 6.957497 14764 +photocopi 1 1 6.957497 6.957497 14765 +cup 0 1 6.957497 0.000000 14766 +limp 0 1 6.957497 0.000000 14767 +andlack 0 1 6.957497 0.000000 14768 +atprinceton 0 1 6.957497 0.000000 14769 +tibco 0 1 6.957497 0.000000 14770 +teknekron 0 1 6.957497 0.000000 14771 +alarg 0 1 6.957497 0.000000 14772 +calledumatata 0 1 6.957497 0.000000 14773 +thehilari 0 1 6.957497 0.000000 14774 +caff 0 1 6.957497 0.000000 14775 +lardo 0 1 6.957497 0.000000 14776 +chilli 0 1 6.957497 0.000000 14777 +snoqualmi 0 1 6.957497 0.000000 14778 +hyakutak 0 1 6.957497 0.000000 14779 +marcu 0 1 6.957497 0.000000 14780 +dither 0 1 6.957497 0.000000 14781 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..0594dfb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +data 2 170 1.791759 3.583518 49 +parallel 2 169 1.791759 3.583518 60 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +databas 1 122 2.079442 2.079442 86 +machin 1 129 2.079442 2.079442 95 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +provid 0 121 2.079442 0.000000 94 +manag 1 114 2.197225 2.197225 125 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +present 1 91 2.397895 2.397895 145 +proceed 0 93 2.397895 0.000000 152 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +requir 0 81 2.484907 0.000000 167 +larg 0 82 2.484907 0.000000 168 +method 1 80 2.564949 2.564949 213 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +nation 0 74 2.639057 0.000000 240 +workshop 0 71 2.639057 0.000000 239 +involv 0 71 2.639057 0.000000 227 +august 0 66 2.708050 0.000000 257 +experi 1 64 2.772589 2.772589 283 +laboratori 1 63 2.772589 2.772589 292 +improv 1 62 2.772589 2.772589 289 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +share 0 59 2.833213 0.000000 304 +simpl 0 60 2.833213 0.000000 298 +scientif 1 53 2.944439 2.944439 341 +februari 0 54 2.944439 0.000000 328 +found 0 53 2.944439 0.000000 337 +visual 1 48 3.044522 3.044522 372 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +possibl 0 47 3.091042 0.000000 378 +describ 1 45 3.135494 3.135494 400 +show 0 43 3.178054 0.000000 417 +vision 1 41 3.218876 3.218876 430 +fast 1 42 3.218876 3.218876 429 +cost 1 37 3.332205 3.332205 480 +jame 2 35 3.401197 6.802394 507 +post 0 35 3.401197 0.000000 505 +queri 0 33 3.433987 0.000000 524 +scientist 0 31 3.496508 0.000000 560 +load 2 28 3.610918 7.221836 601 +intend 0 28 3.610918 0.000000 599 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +supercomput 0 25 3.737670 0.000000 681 +handl 0 24 3.761200 0.000000 685 +highli 0 23 3.806662 0.000000 725 +displai 0 23 3.806662 0.000000 712 +ofwashington 0 22 3.850148 0.000000 766 +avoid 0 21 3.912023 0.000000 799 +facil 0 20 3.951244 0.000000 814 +toolkit 0 20 3.951244 0.000000 835 +longer 0 20 3.951244 0.000000 816 +aid 0 18 4.060443 0.000000 904 +render 1 17 4.110874 4.110874 947 +steven 0 17 4.110874 0.000000 953 +massiv 0 15 4.248495 0.000000 1026 +balanc 2 14 4.317488 8.634976 1112 +save 0 14 4.317488 0.000000 1099 +charl 1 13 4.382027 4.382027 1149 +directli 0 13 4.382027 0.000000 1141 +promot 0 12 4.465908 0.000000 1235 +target 0 12 4.465908 0.000000 1282 +amount 0 12 4.465908 0.000000 1208 +typic 0 11 4.553877 0.000000 1360 +motiv 0 11 4.553877 0.000000 1346 +extrem 0 11 4.553877 0.000000 1330 +mesh 0 11 4.553877 0.000000 1351 +linda 0 10 4.653960 0.000000 1394 +tanimoto 0 10 4.653960 0.000000 1429 +occur 0 9 4.753590 0.000000 1572 +factor 0 9 4.753590 0.000000 1544 +frank 0 9 4.753590 0.000000 1568 +transmiss 0 9 4.753590 0.000000 1588 +polygon 1 8 4.875197 4.875197 1723 +shapiro 0 8 4.875197 0.000000 1686 +lewi 0 8 4.875197 0.000000 1700 +unifi 0 8 4.875197 0.000000 1774 +dataset 1 7 5.010635 5.010635 1914 +shade 1 7 5.010635 5.010635 1881 +huge 0 6 5.164786 0.000000 1991 +unnecessari 0 5 5.347108 0.000000 2506 +ortega 0 5 5.347108 0.000000 2559 +icpp 0 5 5.347108 0.000000 2382 +loss 0 3 5.857933 0.000000 3805 +jakobovit 0 3 5.857933 0.000000 3913 +lara 0 3 5.857933 0.000000 3914 +ahren 2 2 6.263398 12.526796 5788 +redistribut 1 2 6.263398 6.263398 5582 +hansen 1 2 6.263398 6.263398 4301 +alamo 0 2 6.263398 0.000000 4243 +degrad 0 2 6.263398 0.000000 5362 +brinklei 0 2 6.263398 0.000000 5789 +notebook 0 2 6.263398 0.000000 5790 +arbitrarili 0 2 6.263398 0.000000 5791 +onunbalanc 1 1 6.957497 6.957497 14782 +whichperform 0 1 6.957497 0.000000 14783 +outweighth 0 1 6.957497 0.000000 14784 +polygonrender 0 1 6.957497 0.000000 14785 +percent 0 1 6.957497 0.000000 14786 +onbalanc 0 1 6.957497 0.000000 14787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..54a2cf62 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +implement 0 152 1.791759 0.000000 52 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +support 0 132 1.945910 0.000000 83 +databas 2 122 2.079442 4.158884 86 +technolog 1 131 2.079442 2.079442 102 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +manag 1 114 2.197225 2.197225 125 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +proceed 0 93 2.397895 0.000000 152 +present 0 91 2.397895 0.000000 145 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +contain 0 81 2.484907 0.000000 174 +interfac 1 79 2.564949 2.564949 209 +symposium 1 72 2.639057 2.639057 238 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +differ 0 66 2.708050 0.000000 253 +experi 1 64 2.772589 2.772589 283 +interact 0 62 2.772589 0.000000 270 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +share 0 59 2.833213 0.000000 304 +overview 1 56 2.890372 2.890372 323 +februari 1 54 2.944439 2.944439 328 +scientif 1 53 2.944439 2.944439 341 +investig 0 51 2.995732 0.000000 353 +visual 1 48 3.044522 3.044522 372 +electron 1 47 3.091042 3.091042 379 +describ 0 45 3.135494 0.000000 400 +vision 2 41 3.218876 6.437752 430 +multi 1 36 3.367296 3.367296 493 +jame 1 35 3.401197 3.401197 507 +queri 1 33 3.433987 3.433987 524 +experiment 0 26 3.688879 0.000000 645 +store 0 24 3.761200 0.000000 693 +highli 0 23 3.806662 0.000000 725 +among 0 21 3.912023 0.000000 781 +definit 0 19 4.007333 0.000000 864 +aid 0 18 4.060443 0.000000 904 +steven 1 17 4.110874 4.110874 953 +hierarch 0 15 4.248495 0.000000 1018 +attribut 0 14 4.317488 0.000000 1092 +motiv 0 11 4.553877 0.000000 1346 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +princip 0 10 4.653960 0.000000 1397 +shapiro 1 8 4.875197 4.875197 1686 +lewi 1 8 4.875197 4.875197 1700 +databasesystem 0 8 4.875197 0.000000 1617 +spie 1 6 5.164786 5.164786 2119 +schema 0 6 5.164786 0.000000 1988 +scienceand 0 5 5.347108 0.000000 2348 +jakobovit 1 3 5.857933 5.857933 3913 +lara 1 3 5.857933 5.857933 3914 +entiti 1 3 5.857933 5.857933 3096 +ahren 1 2 6.263398 6.263398 5788 +brinklei 1 2 6.263398 6.263398 5789 +notebook 0 2 6.263398 0.000000 5790 +databaseenviron 0 2 6.263398 0.000000 5792 +datastructur 0 2 6.263398 0.000000 4685 +devr 1 1 6.957497 6.957497 14788 +wasdesign 0 1 6.957497 0.000000 14789 +andintend 0 1 6.957497 0.000000 14790 +unifieddata 0 1 6.957497 0.000000 14791 +queryfacil 0 1 6.957497 0.000000 14792 +andpromot 0 1 6.957497 0.000000 14793 +ofproperti 0 1 6.957497 0.000000 14794 +thepart 0 1 6.957497 0.000000 14795 +buildinst 0 1 6.957497 0.000000 14796 +inmodel 0 1 6.957497 0.000000 14797 +secondcad 0 1 6.957497 0.000000 14798 +flexibledata 0 1 6.957497 0.000000 14799 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..34094fd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +graphic 0 90 2.397895 0.000000 147 +center 0 88 2.397895 0.000000 158 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +nation 0 74 2.639057 0.000000 240 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +back 0 60 2.833213 0.000000 297 +allow 0 53 2.944439 0.000000 333 +undergradu 0 54 2.944439 0.000000 338 +still 0 50 3.044522 0.000000 362 +principl 0 48 3.044522 0.000000 357 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +math 0 44 3.135494 0.000000 402 +past 0 42 3.218876 0.000000 428 +seminar 0 38 3.295837 0.000000 470 +autumn 0 31 3.496508 0.000000 558 +great 1 27 3.637586 3.637586 626 +william 0 22 3.850148 0.000000 765 +programminglanguag 0 21 3.912023 0.000000 782 +spend 0 19 4.007333 0.000000 850 +scott 0 18 4.060443 0.000000 884 +ultim 0 17 4.110874 0.000000 943 +women 1 16 4.174387 4.174387 1004 +front 0 13 4.382027 0.000000 1154 +automata 0 13 4.382027 0.000000 1135 +opportun 0 13 4.382027 0.000000 1161 +undergrad 0 9 4.753590 0.000000 1589 +christian 0 7 5.010635 0.000000 1949 +peterson 0 7 5.010635 0.000000 1850 +park 0 6 5.164786 0.000000 2218 +outdoor 1 5 5.347108 5.347108 2514 +frisbe 0 5 5.347108 0.000000 2560 +mentor 0 4 5.568345 0.000000 2591 +pile 0 2 6.263398 0.000000 5371 +blast 0 2 6.263398 0.000000 5172 +femal 0 2 6.263398 0.000000 4672 +pagelast 0 2 6.263398 0.000000 5793 +mentorship 1 1 6.957497 6.957497 14800 +bernheim 0 1 6.957497 0.000000 14801 +ofdigit 0 1 6.957497 0.000000 14802 +gorp 0 1 6.957497 0.000000 14803 +guideto 0 1 6.957497 0.000000 14804 +recreationfun 0 1 6.957497 0.000000 14805 +abig 0 1 6.957497 0.000000 14806 +scoobi 0 1 6.957497 0.000000 14807 +dooeduc 0 1 6.957497 0.000000 14808 +summerwork 0 1 6.957497 0.000000 14809 +highlyrecommend 0 1 6.957497 0.000000 14810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..3cb1a572 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +recent 0 167 1.791759 0.000000 58 +like 1 132 1.945910 1.945910 81 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +postscript 0 131 2.079442 0.000000 90 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +graphic 0 90 2.397895 0.000000 147 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +educ 0 86 2.484907 0.000000 191 +learn 0 86 2.484907 0.000000 170 +master 1 76 2.564949 2.564949 216 +optim 0 79 2.564949 0.000000 197 +write 0 72 2.639057 0.000000 222 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +simpl 0 60 2.833213 0.000000 298 +thesi 0 57 2.890372 0.000000 327 +explor 0 58 2.890372 0.000000 324 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +found 0 53 2.944439 0.000000 337 +particular 0 51 2.995732 0.000000 352 +better 0 45 3.135494 0.000000 401 +might 0 41 3.218876 0.000000 426 +error 1 40 3.258097 3.258097 449 +tree 0 36 3.367296 0.000000 492 +obtain 0 33 3.433987 0.000000 534 +scientist 1 31 3.496508 3.496508 560 +anim 0 31 3.496508 0.000000 557 +often 0 31 3.496508 0.000000 551 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +wai 1 25 3.737670 3.737670 662 +proof 1 23 3.806662 3.806662 720 +compress 0 23 3.806662 0.000000 719 +theorem 0 21 3.912023 0.000000 786 +avoid 0 21 3.912023 0.000000 799 +binari 0 20 3.951244 0.000000 823 +wrote 0 20 3.951244 0.000000 830 +seem 0 18 4.060443 0.000000 899 +whether 0 17 4.110874 0.000000 918 +universityof 0 15 4.248495 0.000000 1061 +hierarch 0 15 4.248495 0.000000 1018 +style 0 15 4.248495 0.000000 1036 +balanc 0 14 4.317488 0.000000 1112 +believ 0 13 4.382027 0.000000 1187 +unfortun 0 13 4.382027 0.000000 1170 +care 0 13 4.382027 0.000000 1177 +motiv 1 11 4.553877 4.553877 1346 +scienceat 0 11 4.553877 0.000000 1375 +ring 0 8 4.875197 0.000000 1684 +told 0 8 4.875197 0.000000 1658 +refere 0 7 5.010635 0.000000 1895 +toronto 0 6 5.164786 0.000000 2156 +meant 0 6 5.164786 0.000000 2055 +fewer 0 6 5.164786 0.000000 2074 +mistak 0 6 5.164786 0.000000 2110 +broadcast 0 5 5.347108 0.000000 2453 +shift 0 5 5.347108 0.000000 2357 +tend 0 4 5.568345 0.000000 3041 +conserv 0 4 5.568345 0.000000 2870 +will 0 4 5.568345 0.000000 2782 +glad 0 4 5.568345 0.000000 2657 +fear 0 4 5.568345 0.000000 2911 +reveal 0 4 5.568345 0.000000 2647 +complic 0 4 5.568345 0.000000 2902 +amir 1 3 5.857933 5.857933 3850 +hereto 0 3 5.857933 0.000000 3476 +quotat 0 3 5.857933 0.000000 3121 +theywil 0 3 5.857933 0.000000 3102 +incorrect 0 3 5.857933 0.000000 3134 +caught 0 3 5.857933 0.000000 3465 +obvious 0 3 5.857933 0.000000 3474 +hoar 0 3 5.857933 0.000000 3875 +mathematician 1 2 6.263398 6.263398 5189 +defici 1 2 6.263398 6.263398 5054 +persuad 0 2 6.263398 0.000000 5384 +obviou 0 2 6.263398 0.000000 5367 +michail 0 1 6.957497 0.000000 14811 +michailgradu 0 1 6.957497 0.000000 14812 +studenti 0 1 6.957497 0.000000 14813 +followingarea 0 1 6.957497 0.000000 14814 +summationfor 0 1 6.957497 0.000000 14815 +opsi 0 1 6.957497 0.000000 14816 +appletdesign 0 1 6.957497 0.000000 14817 +combinesprogram 0 1 6.957497 0.000000 14818 +lunar 0 1 6.957497 0.000000 14819 +lander 0 1 6.957497 0.000000 14820 +gamethat 0 1 6.957497 0.000000 14821 +unwillingto 0 1 6.957497 0.000000 14822 +embarrass 0 1 6.957497 0.000000 14823 +publishedincorrect 0 1 6.957497 0.000000 14824 +unconvent 0 1 6.957497 0.000000 14825 +proofstyl 0 1 6.957497 0.000000 14826 +theyhav 0 1 6.957497 0.000000 14827 +wasnot 0 1 6.957497 0.000000 14828 +stylethat 0 1 6.957497 0.000000 14829 +lesli 0 1 6.957497 0.000000 14830 +lamport 0 1 6.957497 0.000000 14831 +wayi 0 1 6.957497 0.000000 14832 +theother 0 1 6.957497 0.000000 14833 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..9084c1ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +algorithm 1 162 1.791759 1.791759 57 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +institut 1 84 2.484907 2.484907 187 +academ 0 82 2.484907 0.000000 178 +resum 0 79 2.564949 0.000000 217 +receiv 0 66 2.708050 0.000000 244 +main 0 67 2.708050 0.000000 256 +visit 1 63 2.772589 2.772589 288 +evalu 0 64 2.772589 0.000000 266 +colleg 0 61 2.833213 0.000000 300 +scientif 0 53 2.944439 0.000000 341 +join 0 39 3.258097 0.000000 457 +award 0 34 3.401197 0.000000 523 +india 0 32 3.465736 0.000000 550 +richard 1 31 3.496508 3.496508 559 +travel 1 30 3.555348 3.555348 579 +progress 0 28 3.610918 0.000000 598 +spent 0 25 3.737670 0.000000 676 +indian 0 22 3.850148 0.000000 769 +geometri 0 22 3.850148 0.000000 752 +anderson 1 19 4.007333 4.007333 860 +postdoc 0 8 4.875197 0.000000 1724 +presidenti 0 8 4.875197 0.000000 1737 +qualifi 0 8 4.875197 0.000000 1721 +tourist 0 8 4.875197 0.000000 1710 +implementationof 0 7 5.010635 0.000000 1813 +reed 0 6 5.164786 0.000000 2086 +inmathemat 0 2 6.263398 0.000000 5098 +younginvestig 0 2 6.263398 0.000000 5794 +bangalor 0 2 6.263398 0.000000 5110 +theindian 0 2 6.263398 0.000000 5795 +stanfordin 0 1 6.957497 0.000000 14834 +aon 0 1 6.957497 0.000000 14835 +inberkelei 0 1 6.957497 0.000000 14836 +yeara 0 1 6.957497 0.000000 14837 +talksanderson 0 1 6.957497 0.000000 14838 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..28682b65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +last 0 314 1.098612 0.000000 14 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +first 1 140 1.945910 1.945910 71 +process 0 142 1.945910 0.000000 72 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +theori 0 111 2.197225 0.000000 127 +teach 0 108 2.197225 0.000000 112 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +librari 1 87 2.484907 2.484907 181 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +academ 0 82 2.484907 0.000000 178 +thing 0 84 2.484907 0.000000 189 +educ 0 86 2.484907 0.000000 191 +name 1 72 2.639057 2.639057 220 +intellig 0 72 2.639057 0.000000 225 +write 0 72 2.639057 0.000000 222 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +plan 1 65 2.772589 2.772589 272 +artifici 0 63 2.772589 0.000000 280 +content 0 59 2.833213 0.000000 302 +locat 0 59 2.833213 0.000000 303 +investig 0 51 2.995732 0.000000 353 +life 0 50 3.044522 0.000000 375 +made 0 44 3.135494 0.000000 398 +show 0 43 3.178054 0.000000 417 +music 1 42 3.218876 3.218876 436 +combin 0 42 3.218876 0.000000 421 +theoret 0 39 3.258097 0.000000 446 +vita 0 38 3.295837 0.000000 473 +mean 0 37 3.332205 0.000000 477 +purpos 0 37 3.332205 0.000000 481 +soon 0 36 3.367296 0.000000 494 +represent 0 35 3.401197 0.000000 512 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +travel 1 30 3.555348 3.555348 579 +art 0 29 3.583519 0.000000 593 +decis 1 23 3.806662 3.806662 728 +honor 0 23 3.806662 0.000000 729 +finish 0 22 3.850148 0.000000 748 +ofwashington 0 22 3.850148 0.000000 766 +born 0 21 3.912023 0.000000 798 +mostli 1 19 4.007333 4.007333 869 +histori 0 19 4.007333 0.000000 853 +listen 0 18 4.060443 0.000000 907 +thought 0 17 4.110874 0.000000 945 +brother 0 13 4.382027 0.000000 1189 +employ 0 12 4.465908 0.000000 1291 +qualit 0 11 4.553877 0.000000 1362 +probabilist 0 11 4.553877 0.000000 1343 +literatur 0 11 4.553877 0.000000 1300 +creativ 0 8 4.875197 0.000000 1777 +switch 0 8 4.875197 0.000000 1718 +brought 0 7 5.010635 0.000000 1925 +gave 0 7 5.010635 0.000000 1922 +foreign 0 7 5.010635 0.000000 1919 +reconstruct 0 6 5.164786 0.000000 2170 +blue 0 6 5.164786 0.000000 2227 +mother 0 6 5.164786 0.000000 2083 +markov 0 5 5.347108 0.000000 2280 +jazz 0 5 5.347108 0.000000 2527 +paint 0 5 5.347108 0.000000 2400 +andengin 0 4 5.568345 0.000000 3042 +ofmi 1 3 5.857933 5.857933 3911 +revisit 0 3 5.857933 0.000000 3915 +father 0 3 5.857933 0.000000 3757 +birth 0 3 5.857933 0.000000 3594 +affair 0 3 5.857933 0.000000 3916 +anhai 1 2 6.263398 6.263398 4404 +doan 1 2 6.263398 6.263398 4405 +andscienc 0 2 6.263398 0.000000 5796 +milwauke 0 2 6.263398 0.000000 5797 +amcurr 0 2 6.263398 0.000000 5798 +vietnames 0 2 6.263398 0.000000 5593 +syllabl 1 1 6.957497 6.957497 14839 +hungari 1 1 6.957497 6.957497 14840 +birthplac 1 1 6.957497 6.957497 14841 +homepageanhai 0 1 6.957497 0.000000 14842 +vietnam 0 1 6.957497 0.000000 14843 +iwent 0 1 6.957497 0.000000 14844 +kossuth 0 1 6.957497 0.000000 14845 +lajo 0 1 6.957497 0.000000 14846 +debrecen 0 1 6.957497 0.000000 14847 +underuncertainti 0 1 6.957497 0.000000 14848 +calm 0 1 6.957497 0.000000 14849 +invietnames 0 1 6.957497 0.000000 14850 +nghean 0 1 6.957497 0.000000 14851 +haiphong 0 1 6.957497 0.000000 14852 +folkswer 0 1 6.957497 0.000000 14853 +younger 0 1 6.957497 0.000000 14854 +theysimpli 0 1 6.957497 0.000000 14855 +namehaian 0 1 6.957497 0.000000 14856 +comtemporari 0 1 6.957497 0.000000 14857 +snapshotsanhai 0 1 6.957497 0.000000 14858 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..2adcb7c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +click 0 142 1.945910 0.000000 78 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +spring 0 131 2.079442 0.000000 88 +code 0 108 2.197225 0.000000 116 +site 0 106 2.197225 0.000000 119 +mathemat 0 108 2.197225 0.000000 123 +version 0 113 2.197225 0.000000 122 +final 0 116 2.197225 0.000000 108 +find 0 111 2.197225 0.000000 111 +place 0 106 2.197225 0.000000 124 +graphic 1 90 2.397895 2.397895 147 +pictur 0 89 2.397895 0.000000 160 +follow 0 92 2.397895 0.000000 143 +control 0 82 2.484907 0.000000 164 +dynam 1 76 2.564949 2.564949 194 +complet 0 77 2.564949 0.000000 208 +resum 0 79 2.564949 0.000000 217 +optim 0 79 2.564949 0.000000 197 +workshop 0 71 2.639057 0.000000 239 +sieg 0 69 2.708050 0.000000 260 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +automat 1 61 2.833213 2.833213 306 +februari 0 54 2.944439 0.000000 328 +without 1 50 3.044522 3.044522 370 +quarter 0 47 3.091042 0.000000 389 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +better 0 45 3.135494 0.000000 401 +show 0 43 3.178054 0.000000 417 +fast 0 42 3.218876 0.000000 429 +small 0 39 3.258097 0.000000 447 +origin 0 38 3.295837 0.000000 472 +slide 0 38 3.295837 0.000000 467 +seminar 0 38 3.295837 0.000000 470 +short 0 36 3.367296 0.000000 499 +anim 1 31 3.496508 3.496508 557 +autumn 0 31 3.496508 0.000000 558 +hope 0 28 3.610918 0.000000 610 +univ 0 28 3.610918 0.000000 617 +valu 0 25 3.737670 0.000000 665 +motion 1 24 3.761200 3.761200 699 +magazin 0 24 3.761200 0.000000 704 +compress 0 23 3.806662 0.000000 719 +sequenc 0 23 3.806662 0.000000 734 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +mpeg 0 20 3.951244 0.000000 831 +synthesi 0 20 3.951244 0.000000 834 +figur 1 18 4.060443 4.060443 903 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +speed 0 18 4.060443 0.000000 911 +brown 0 16 4.174387 0.000000 977 +took 0 16 4.174387 0.000000 1010 +qual 0 15 4.248495 0.000000 1062 +goe 0 15 4.248495 0.000000 1044 +piec 0 15 4.248495 0.000000 1020 +consider 0 14 4.317488 0.000000 1076 +benjamin 0 11 4.553877 0.000000 1296 +decomposit 0 10 4.653960 0.000000 1439 +jump 0 9 4.753590 0.000000 1603 +joel 1 8 4.875197 4.875197 1698 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +pldi 0 8 4.875197 0.000000 1704 +switch 0 8 4.875197 0.000000 1718 +dispatch 0 7 5.010635 0.000000 1791 +gave 0 7 5.010635 0.000000 1922 +mock 0 6 5.164786 0.000000 2087 +philipos 1 5 5.347108 5.347108 2373 +engineeringat 0 5 5.347108 0.000000 2561 +andp 0 4 5.568345 0.000000 2811 +pardyak 0 4 5.568345 0.000000 3043 +doubl 0 4 5.568345 0.000000 2951 +ausland 2 3 5.857933 11.715866 3917 +super 0 3 5.857933 0.000000 3918 +singular 0 3 5.857933 0.000000 3366 +multiflow 0 2 6.263398 0.000000 4473 +articul 0 2 6.263398 0.000000 5799 +acmtransact 0 2 6.263398 0.000000 4310 +wilkerson 0 2 6.263398 0.000000 4516 +mywork 0 2 6.263398 0.000000 5800 +orang 0 2 6.263398 0.000000 5163 +onit 0 1 6.957497 0.000000 14859 +andb 0 1 6.957497 0.000000 14860 +inextens 0 1 6.957497 0.000000 14861 +compilersupport 0 1 6.957497 0.000000 14862 +synthesisfor 0 1 6.957497 0.000000 14863 +fukunaga 0 1 6.957497 0.000000 14864 +partovi 0 1 6.957497 0.000000 14865 +christensen 0 1 6.957497 0.000000 14866 +reiss 0 1 6.957497 0.000000 14867 +shuman 0 1 6.957497 0.000000 14868 +leapfrog 0 1 6.957497 0.000000 14869 +lossili 0 1 6.957497 0.000000 14870 +animationthat 0 1 6.957497 0.000000 14871 +cartwheel 0 1 6.957497 0.000000 14872 +andshuffl 0 1 6.957497 0.000000 14873 +andcollaps 0 1 6.957497 0.000000 14874 +isjust 0 1 6.957497 0.000000 14875 +tosmooth 0 1 6.957497 0.000000 14876 +thetalk 0 1 6.957497 0.000000 14877 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..63f24d51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +number 0 130 2.079442 0.000000 97 +look 1 107 2.197225 2.197225 115 +site 0 106 2.197225 0.000000 119 +find 0 111 2.197225 0.000000 111 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +info 1 85 2.484907 2.484907 176 +second 0 81 2.484907 0.000000 166 +institut 0 84 2.484907 0.000000 187 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +refer 0 78 2.564949 0.000000 203 +complet 0 77 2.564949 0.000000 208 +name 1 72 2.639057 2.639057 220 +onlin 0 75 2.639057 0.000000 223 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +test 0 66 2.708050 0.000000 252 +visit 0 63 2.772589 0.000000 288 +dept 0 64 2.772589 0.000000 291 +locat 0 59 2.833213 0.000000 303 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +friend 0 48 3.044522 0.000000 376 +get 0 46 3.091042 0.000000 380 +favorit 1 44 3.135494 3.135494 410 +edit 0 42 3.218876 0.000000 418 +past 0 42 3.218876 0.000000 428 +brian 1 38 3.295837 3.295837 466 +origin 0 38 3.295837 0.000000 472 +random 0 34 3.401197 0.000000 511 +idea 0 32 3.465736 0.000000 545 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +stop 0 17 4.110874 0.000000 942 +alreadi 0 16 4.174387 0.000000 963 +doesn 0 15 4.248495 0.000000 1055 +hotlist 0 13 4.382027 0.000000 1199 +touch 0 12 4.465908 0.000000 1288 +brad 0 12 4.465908 0.000000 1264 +artist 0 6 5.164786 0.000000 2127 +band 0 6 5.164786 0.000000 2198 +girl 0 5 5.347108 0.000000 2410 +worst 0 5 5.347108 0.000000 2287 +poem 0 5 5.347108 0.000000 2483 +guestbook 0 5 5.347108 0.000000 2475 +snapshot 0 5 5.347108 0.000000 2303 +washingtonseattl 0 4 5.568345 0.000000 3044 +ling 0 4 5.568345 0.000000 3045 +confus 0 3 5.857933 0.000000 3144 +thrash 0 3 5.857933 0.000000 3400 +mossi 0 2 6.263398 0.000000 5801 +liber 0 2 6.263398 0.000000 5154 +glorifi 0 2 6.263398 0.000000 4114 +mental 0 2 6.263398 0.000000 5802 +stolen 0 2 6.263398 0.000000 5803 +boinge 1 1 6.957497 6.957497 14878 +michalowskidepart 0 1 6.957497 0.000000 14879 +engineeringmail 0 1 6.957497 0.000000 14880 +bitsthank 0 1 6.957497 0.000000 14881 +headscapewhenev 0 1 6.957497 0.000000 14882 +gradstud 0 1 6.957497 0.000000 14883 +inlinguist 0 1 6.957497 0.000000 14884 +ultrahotlist 0 1 6.957497 0.000000 14885 +ofal 0 1 6.957497 0.000000 14886 +forsometh 0 1 6.957497 0.000000 14887 +thave 0 1 6.957497 0.000000 14888 +urouletteto 0 1 6.957497 0.000000 14889 +ofwhich 0 1 6.957497 0.000000 14890 +songsand 0 1 6.957497 0.000000 14891 +fictiti 0 1 6.957497 0.000000 14892 +puriti 0 1 6.957497 0.000000 14893 +tokeep 0 1 6.957497 0.000000 14894 +pagesfrom 0 1 6.957497 0.000000 14895 +aslfingerspel 0 1 6.957497 0.000000 14896 +blatantli 0 1 6.957497 0.000000 14897 +chamberlain 0 1 6.957497 0.000000 14898 +michalowski 0 1 6.957497 0.000000 14899 +sanityerad 0 1 6.957497 0.000000 14900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..e5532a88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +paper 0 205 1.609438 0.000000 38 +recent 1 167 1.791759 1.791759 58 +distribut 0 162 1.791759 0.000000 51 +professor 0 137 1.945910 0.000000 76 +mathemat 0 108 2.197225 0.000000 123 +theori 0 111 2.197225 0.000000 127 +associ 1 93 2.397895 2.397895 151 +receiv 1 66 2.708050 2.708050 244 +complex 0 64 2.772589 0.000000 269 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +join 0 39 3.258097 0.000000 457 +theoret 0 39 3.258097 0.000000 446 +paul 1 38 3.295837 3.295837 471 +connect 0 37 3.332205 0.000000 485 +post 0 35 3.401197 0.000000 505 +award 0 34 3.401197 0.000000 523 +autumn 0 31 3.496508 0.000000 558 +enjoi 0 26 3.688879 0.000000 660 +concern 0 25 3.737670 0.000000 666 +aspect 0 25 3.737670 0.000000 663 +sport 0 25 3.737670 0.000000 683 +doctor 0 24 3.761200 0.000000 709 +proof 1 23 3.806662 3.806662 720 +theunivers 0 21 3.912023 0.000000 797 +concentr 0 18 4.060443 0.000000 906 +qual 0 15 4.248495 0.000000 1062 +primarili 0 13 4.382027 0.000000 1185 +thedepart 0 11 4.553877 0.000000 1350 +softbal 0 9 4.753590 0.000000 1594 +toronto 0 6 5.164786 0.000000 2156 +squash 0 6 5.164786 0.000000 2223 +lack 0 6 5.164786 0.000000 1994 +beam 1 5 5.347108 5.347108 2344 +engineeringat 0 5 5.347108 0.000000 2561 +talent 0 3 5.857933 0.000000 3768 +sciencein 0 2 6.263398 0.000000 5804 +paralleland 0 2 6.263398 0.000000 5805 +beamepaul 1 1 6.957497 6.957497 14901 +computationalcomplex 1 1 6.957497 6.957497 14902 +academicyear 0 1 6.957497 0.000000 14903 +presidentialyoung 0 1 6.957497 0.000000 14904 +inproposit 0 1 6.957497 0.000000 14905 +enthusiasm 0 1 6.957497 0.000000 14906 +cancompens 0 1 6.957497 0.000000 14907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..21468966 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +oper 0 180 1.609438 0.000000 34 +develop 0 174 1.791759 0.000000 53 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +real 0 93 2.397895 0.000000 144 +info 0 85 2.484907 0.000000 176 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +build 0 85 2.484907 0.000000 184 +environ 0 84 2.484907 0.000000 177 +librari 0 87 2.484907 0.000000 181 +optim 0 79 2.564949 0.000000 197 +david 0 71 2.639057 0.000000 232 +line 0 75 2.639057 0.000000 231 +free 0 73 2.639057 0.000000 224 +function 0 62 2.772589 0.000000 275 +colleg 0 61 2.833213 0.000000 300 +plai 0 60 2.833213 0.000000 307 +much 0 52 2.995732 0.000000 349 +done 0 47 3.091042 0.000000 381 +mark 0 44 3.135494 0.000000 403 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +respons 0 37 3.332205 0.000000 476 +platform 0 29 3.583519 0.000000 591 +measur 0 28 3.610918 0.000000 609 +team 0 27 3.637586 0.000000 625 +sport 1 25 3.737670 3.737670 683 +grad 0 20 3.951244 0.000000 837 +particularli 0 19 4.007333 0.000000 867 +ultim 1 17 4.110874 4.110874 943 +devic 1 16 4.174387 4.174387 1002 +goe 0 15 4.248495 0.000000 1044 +spin 0 14 4.317488 0.000000 1121 +econom 0 13 4.382027 0.000000 1184 +jump 0 9 4.753590 0.000000 1603 +volleybal 0 9 4.753590 0.000000 1598 +trust 0 9 4.753590 0.000000 1583 +spot 0 7 5.010635 0.000000 1894 +bunch 0 7 5.010635 0.000000 1861 +athlet 0 7 5.010635 0.000000 1933 +tri 0 6 5.164786 0.000000 2166 +railroad 0 6 5.164786 0.000000 2161 +frisbe 1 5 5.347108 5.347108 2560 +minnesota 0 5 5.347108 0.000000 2469 +anti 0 5 5.347108 0.000000 2434 +champion 1 4 5.568345 5.568345 2982 +skate 0 4 5.568345 0.000000 3046 +borrow 0 3 5.857933 0.000000 3725 +somedai 0 3 5.857933 0.000000 3919 +tripl 0 3 5.857933 0.000000 3160 +bank 0 3 5.857933 0.000000 3920 +getto 0 2 6.263398 0.000000 5806 +locomot 0 2 6.263398 0.000000 5807 +beckerdavid 0 1 6.957497 0.000000 14908 +beckercontact 0 1 6.957497 0.000000 14909 +makingspina 0 1 6.957497 0.000000 14910 +drvier 0 1 6.957497 0.000000 14911 +bethel 0 1 6.957497 0.000000 14912 +men 0 1 6.957497 0.000000 14913 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 0 1 6.957497 0.000000 14914 +handbal 0 1 6.957497 0.000000 14915 +playracquetballgolftenni 0 1 6.957497 0.000000 14916 +bridgecampingcanoeingdisc 0 1 6.957497 0.000000 14917 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 0 1 6.957497 0.000000 14918 +skiingweightliftingwhitewat 0 1 6.957497 0.000000 14919 +raftinghorseback 0 1 6.957497 0.000000 14920 +ridingmountain 0 1 6.957497 0.000000 14921 +bikingin 0 1 6.957497 0.000000 14922 +currenc 0 1 6.957497 0.000000 14923 +ssto 0 1 6.957497 0.000000 14924 +rlv 0 1 6.957497 0.000000 14925 +theologi 0 1 6.957497 0.000000 14926 +centurai 0 1 6.957497 0.000000 14927 +boot 0 1 6.957497 0.000000 14928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..8dfbbe14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,281 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 0 247 1.386294 0.000000 24 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +implement 0 152 1.791759 0.000000 52 +perform 2 143 1.945910 3.891820 74 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +professor 0 137 1.945910 0.000000 76 +click 0 142 1.945910 0.000000 78 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +assist 0 112 2.197225 0.000000 113 +look 0 107 2.197225 0.000000 115 +memori 1 101 2.302585 2.302585 139 +octob 1 89 2.397895 2.397895 156 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +sinc 0 90 2.397895 0.000000 159 +larg 1 82 2.484907 2.484907 168 +ieee 0 86 2.484907 0.000000 190 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +master 1 76 2.564949 2.564949 216 +issu 0 78 2.564949 0.000000 211 +messag 0 76 2.564949 0.000000 212 +workshop 2 71 2.639057 5.278114 239 +symposium 1 72 2.639057 2.639057 238 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +street 0 63 2.772589 0.000000 293 +experi 0 64 2.772589 0.000000 283 +polici 0 64 2.772589 0.000000 279 +virtual 0 62 2.772589 0.000000 285 +evalu 0 64 2.772589 0.000000 266 +plai 0 60 2.833213 0.000000 307 +automat 0 61 2.833213 0.000000 306 +share 0 59 2.833213 0.000000 304 +publish 0 57 2.890372 0.000000 326 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +extens 2 53 2.944439 5.888878 340 +februari 1 54 2.944439 2.944439 328 +local 0 55 2.944439 0.000000 334 +hardwar 1 51 2.995732 2.995732 350 +run 0 51 2.995732 0.000000 347 +life 0 50 3.044522 0.000000 375 +standard 0 48 3.044522 0.000000 365 +effect 0 46 3.091042 0.000000 385 +protocol 1 45 3.135494 3.135494 407 +fast 1 42 3.218876 3.218876 429 +cach 1 41 3.218876 3.218876 432 +map 0 39 3.258097 0.000000 452 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +brian 2 38 3.295837 6.591674 466 +industri 0 38 3.295837 0.000000 464 +winter 1 36 3.367296 3.367296 500 +post 0 35 3.401197 0.000000 505 +return 0 34 3.401197 0.000000 502 +concurr 0 34 3.401197 0.000000 501 +detect 0 26 3.688879 0.000000 646 +consist 0 26 3.688879 0.000000 651 +although 0 25 3.737670 0.000000 667 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +interpret 0 24 3.761200 0.000000 686 +greg 0 24 3.761200 0.000000 695 +mobil 1 23 3.806662 3.806662 730 +thread 0 23 3.806662 0.000000 722 +ofwashington 1 22 3.850148 3.850148 766 +reduc 0 22 3.850148 0.000000 759 +inth 0 22 3.850148 0.000000 741 +chen 1 21 3.912023 3.912023 791 +avoid 0 21 3.912023 0.000000 799 +hous 0 21 3.912023 0.000000 801 +kernel 1 20 3.951244 3.951244 825 +binari 0 20 3.951244 0.000000 823 +safeti 0 20 3.951244 0.000000 817 +increas 0 20 3.951244 0.000000 829 +comparison 0 19 4.007333 0.000000 863 +andrew 0 19 4.007333 0.000000 849 +miss 0 19 4.007333 0.000000 866 +citi 0 19 4.007333 0.000000 874 +bershad 3 18 4.060443 12.181329 902 +seem 0 18 4.060443 0.000000 899 +event 0 18 4.060443 0.000000 896 +asplo 1 17 4.110874 4.110874 948 +stock 1 16 4.174387 4.174387 1007 +took 0 16 4.174387 0.000000 1010 +brief 0 16 4.174387 0.000000 1001 +qual 1 15 4.248495 4.248495 1062 +driven 0 15 4.248495 0.000000 1048 +overhead 0 15 4.248495 0.000000 1035 +micro 0 15 4.248495 0.000000 1031 +spin 1 14 4.317488 4.317488 1121 +save 0 14 4.317488 0.000000 1099 +levi 0 14 4.317488 0.000000 1093 +consider 0 14 4.317488 0.000000 1076 +dean 0 14 4.317488 0.000000 1104 +karlin 1 13 4.382027 4.382027 1176 +mellon 0 13 4.382027 0.000000 1179 +charl 0 13 4.382027 0.000000 1149 +block 0 13 4.382027 0.000000 1183 +usenix 1 12 4.465908 4.465908 1240 +carnegi 0 12 4.465908 0.000000 1260 +anna 0 12 4.465908 0.000000 1292 +mari 0 12 4.465908 0.000000 1266 +safe 0 12 4.465908 0.000000 1274 +promot 0 12 4.465908 0.000000 1235 +isca 1 11 4.553877 4.553877 1354 +denni 0 11 4.553877 0.000000 1321 +baer 0 11 4.553877 0.000000 1353 +systemsc 0 11 4.553877 0.000000 1293 +impact 0 11 4.553877 0.000000 1334 +primit 0 11 4.553877 0.000000 1317 +arpa 0 11 4.553877 0.000000 1369 +sosp 1 10 4.653960 4.653960 1416 +jean 0 10 4.653960 0.000000 1440 +henri 0 10 4.653960 0.000000 1417 +packet 0 10 4.653960 0.000000 1415 +decomposit 0 10 4.653960 0.000000 1439 +osdi 1 9 4.753590 4.753590 1534 +voelker 1 9 4.753590 4.753590 1557 +hang 0 9 4.753590 0.000000 1499 +patterson 0 9 4.753590 0.000000 1554 +wong 0 9 4.753590 0.000000 1609 +modula 0 9 4.753590 0.000000 1613 +wilson 0 9 4.753590 0.000000 1536 +rel 0 9 4.753590 0.000000 1487 +romer 1 8 4.875197 4.875197 1706 +marc 1 8 4.875197 4.875197 1680 +sigop 1 8 4.875197 4.875197 1727 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +mach 1 8 4.875197 4.875197 1669 +besid 0 8 4.875197 0.000000 1681 +wayn 0 8 4.875197 0.000000 1738 +watson 0 8 4.875197 0.000000 1691 +uniprocessor 0 8 4.875197 0.000000 1696 +hash 0 8 4.875197 0.000000 1618 +cultur 0 7 5.010635 0.000000 1951 +northwest 0 7 5.010635 0.000000 1973 +instrument 0 7 5.010635 0.000000 1954 +dispatch 0 7 5.010635 0.000000 1791 +interrupt 0 7 5.010635 0.000000 1793 +prioriti 0 7 5.010635 0.000000 1792 +onoper 1 6 5.164786 5.164786 2048 +squash 0 6 5.164786 0.000000 2223 +prefetch 0 6 5.164786 0.000000 2039 +edward 0 6 5.164786 0.000000 2050 +wolman 0 6 5.164786 0.000000 2093 +loup 0 6 5.164786 0.000000 2228 +mock 0 6 5.164786 0.000000 2087 +conflict 0 6 5.164786 0.000000 2041 +rain 0 6 5.164786 0.000000 2137 +philipos 1 5 5.347108 5.347108 2373 +coffe 0 5 5.347108 0.000000 2556 +toc 0 5 5.347108 0.000000 2562 +bind 0 5 5.347108 0.000000 2250 +alec 0 5 5.347108 0.000000 2563 +ohlrich 0 5 5.347108 0.000000 2564 +mutual 0 5 5.347108 0.000000 2418 +pardyak 1 4 5.568345 5.568345 3043 +microkernel 1 4 5.568345 5.568345 3047 +savag 1 4 5.568345 5.568345 2777 +rocki 0 4 5.568345 0.000000 3048 +etch 0 4 5.568345 0.000000 2755 +compcon 0 4 5.568345 0.000000 2958 +exclus 0 4 5.568345 0.000000 2947 +fiuczynski 1 3 5.857933 5.857933 3390 +ausland 1 3 5.857933 5.857933 3917 +stefan 1 3 5.857933 5.857933 3921 +northeast 0 3 5.857933 0.000000 3922 +cachingtraci 0 3 5.857933 0.000000 3923 +kimbrel 0 3 5.857933 0.000000 3924 +felten 0 3 5.857933 0.000000 3925 +geoffrei 0 3 5.857933 0.000000 3505 +dynamiccompil 0 3 5.857933 0.000000 3926 +garrett 0 3 5.857933 0.000000 3377 +mobisa 0 3 5.857933 0.000000 3927 +mappedcach 0 3 5.857933 0.000000 3928 +forappl 0 3 5.857933 0.000000 3929 +irrelev 0 3 5.857933 0.000000 3823 +golub 0 3 5.857933 0.000000 3265 +alien 0 3 5.857933 0.000000 3930 +przemyslaw 1 2 6.263398 6.263398 5808 +implemen 1 2 6.263398 6.263398 5809 +emin 1 2 6.263398 6.263398 5810 +sirer 1 2 6.263398 6.263398 5811 +wwo 1 2 6.263398 6.263398 5812 +eduwork 0 2 6.263398 0.000000 5813 +hasappear 0 2 6.263398 0.000000 5099 +tomkin 0 2 6.263398 0.000000 5814 +hugo 0 2 6.263398 0.000000 5815 +garth 0 2 6.263398 0.000000 5816 +gibson 0 2 6.263398 0.000000 5817 +hsieh 0 2 6.263398 0.000000 5818 +onlinesuperpag 0 2 6.263398 0.000000 5819 +appearedin 0 2 6.263398 0.000000 5096 +endpoint 0 2 6.263398 0.000000 4967 +moss 0 2 6.263398 0.000000 5820 +redel 0 2 6.263398 0.000000 4358 +elli 0 2 6.263398 0.000000 4216 +baron 0 2 6.263398 0.000000 4317 +microbenchmark 0 2 6.263398 0.000000 5821 +rashid 0 2 6.263398 0.000000 4318 +abduct 0 2 6.263398 0.000000 5663 +maeda 1 1 6.957497 6.957497 14929 +midwai 1 1 6.957497 6.957497 14930 +zekauska 1 1 6.957497 6.957497 14931 +sawdon 1 1 6.957497 6.957497 14932 +machnix 1 1 6.957497 6.957497 14933 +drave 1 1 6.957497 6.957497 14934 +forin 1 1 6.957497 6.957497 14935 +respit 0 1 6.957497 0.000000 14936 +asigmetr 0 1 6.957497 0.000000 14937 +thestairmast 0 1 6.957497 0.000000 14938 +extensibleoper 0 1 6.957497 0.000000 14939 +parallelnetwork 0 1 6.957497 0.000000 14940 +thesequel 0 1 6.957497 0.000000 14941 +optimizationcours 0 1 6.957497 0.000000 14942 +youmight 0 1 6.957497 0.000000 14943 +extensiblesystem 0 1 6.957497 0.000000 14944 +theodor 0 1 6.957497 0.000000 14945 +implementationj 0 1 6.957497 0.000000 14946 +defouw 0 1 6.957497 0.000000 14947 +alapat 0 1 6.957497 0.000000 14948 +becker 0 1 6.957497 0.000000 14949 +sharedmemori 0 1 6.957497 0.000000 14950 +conflictresolut 0 1 6.957497 0.000000 14951 +uwtechn 0 1 6.957497 0.000000 14952 +demultiplex 0 1 6.957497 0.000000 14953 +yuhara 0 1 6.957497 0.000000 14954 +andmostli 0 1 6.957497 0.000000 14955 +moblic 0 1 6.957497 0.000000 14956 +wheeler 0 1 6.957497 0.000000 14957 +ginsburg 0 1 6.957497 0.000000 14958 +inoper 0 1 6.957497 0.000000 14959 +harrier 0 1 6.957497 0.000000 14960 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..7a9d8af3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,153 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +also 0 259 1.386294 0.000000 28 +list 1 201 1.609438 1.609438 39 +network 0 168 1.791759 0.000000 61 +hall 0 146 1.945910 0.000000 65 +seattl 1 120 2.079442 2.079442 103 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +world 1 115 2.197225 2.197225 126 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +person 0 111 2.197225 0.000000 117 +text 0 98 2.302585 0.000000 133 +search 1 95 2.397895 2.397895 155 +select 0 91 2.397895 0.000000 154 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +good 0 77 2.564949 0.000000 200 +david 0 71 2.639057 0.000000 232 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +sieg 0 69 2.708050 0.000000 260 +view 0 70 2.708050 0.000000 254 +new 1 64 2.772589 2.772589 262 +previou 0 62 2.772589 0.000000 290 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +thesi 0 57 2.890372 0.000000 327 +index 0 56 2.890372 0.000000 309 +scientif 0 53 2.944439 0.000000 341 +particular 0 51 2.995732 0.000000 352 +pointer 1 48 3.044522 3.044522 368 +cool 0 49 3.044522 0.000000 374 +telephon 0 50 3.044522 0.000000 373 +life 0 50 3.044522 0.000000 375 +directori 1 45 3.135494 3.135494 396 +past 0 42 3.218876 0.000000 428 +open 0 38 3.295837 0.000000 469 +hand 0 37 3.332205 0.000000 475 +post 0 35 3.401197 0.000000 505 +scientist 0 31 3.496508 0.000000 560 +steve 0 29 3.583519 0.000000 594 +american 1 27 3.637586 3.637586 634 +todai 1 25 3.737670 3.737670 672 +sport 1 25 3.737670 3.737670 683 +yahoo 1 24 3.761200 3.761200 707 +daili 0 24 3.761200 0.000000 706 +magazin 0 24 3.761200 0.000000 704 +miscellan 0 23 3.806662 0.000000 731 +voic 0 21 3.912023 0.000000 806 +hous 0 21 3.912023 0.000000 801 +lyco 0 19 4.007333 0.000000 871 +boston 0 19 4.007333 0.000000 862 +agent 0 18 4.060443 0.000000 910 +white 1 17 4.110874 4.110874 951 +engineeringunivers 0 17 4.110874 0.000000 959 +repositori 0 17 4.110874 0.000000 932 +dilbert 0 16 4.174387 0.000000 996 +hierarch 0 15 4.248495 0.000000 1018 +balanc 0 14 4.317488 0.000000 1112 +washingtonbox 0 13 4.382027 0.000000 1200 +suit 0 13 4.382027 0.000000 1129 +canada 0 13 4.382027 0.000000 1158 +social 0 13 4.382027 0.000000 1123 +hank 0 12 4.465908 0.000000 1253 +excit 0 11 4.553877 0.000000 1329 +arpa 0 11 4.553877 0.000000 1369 +clock 0 11 4.553877 0.000000 1320 +shop 0 10 4.653960 0.000000 1469 +metacrawl 0 10 4.653960 0.000000 1455 +vista 0 10 4.653960 0.000000 1452 +yellow 1 9 4.753590 4.753590 1601 +weld 0 9 4.753590 0.000000 1538 +meta 0 9 4.753590 0.000000 1505 +govern 0 9 4.753590 0.000000 1581 +congress 0 9 4.753590 0.000000 1592 +entitl 0 9 4.753590 0.000000 1490 +postdoc 0 8 4.875197 0.000000 1724 +span 0 8 4.875197 0.000000 1751 +upcom 0 8 4.875197 0.000000 1685 +softbot 0 7 5.010635 0.000000 1974 +pittsburgh 0 7 5.010635 0.000000 1938 +strip 0 6 5.164786 0.000000 2203 +oren 0 6 5.164786 0.000000 2134 +etzioni 0 6 5.164786 0.000000 2135 +gopher 0 6 5.164786 0.000000 1982 +infoseek 0 6 5.164786 0.000000 2188 +slate 0 6 5.164786 0.000000 2021 +atlant 0 5 5.347108 0.000000 2508 +feder 0 5 5.347108 0.000000 2266 +union 0 4 5.568345 0.000000 2634 +alta 0 4 5.568345 0.000000 3039 +birthdai 0 4 5.568345 0.000000 2800 +letterman 0 3 5.857933 0.000000 3931 +shortcut 0 3 5.857933 0.000000 3932 +soar 0 3 5.857933 0.000000 3506 +headlin 0 3 5.857933 0.000000 3710 +monthli 0 3 5.857933 0.000000 3910 +espn 0 3 5.857933 0.000000 3724 +zone 0 3 5.857933 0.000000 3747 +museum 0 3 5.857933 0.000000 3933 +comedi 0 2 6.263398 0.000000 5822 +geeki 0 2 6.263398 0.000000 5823 +shopbot 0 2 6.263398 0.000000 5824 +sigma 0 2 6.263398 0.000000 4369 +magellan 0 2 6.263398 0.000000 5825 +reuter 0 2 6.263398 0.000000 4099 +cafe 0 2 6.263398 0.000000 5826 +salon 0 2 6.263398 0.000000 5827 +harper 0 2 6.263398 0.000000 5141 +commiss 0 2 6.263398 0.000000 4901 +reform 0 2 6.263398 0.000000 5828 +budget 1 1 6.957497 6.957497 14961 +doorenbo 1 1 6.957497 6.957497 14962 +pagebob 0 1 6.957497 0.000000 14963 +bobd 0 1 6.957497 0.000000 14964 +netbot 0 1 6.957497 0.000000 14965 +boffo 0 1 6.957497 0.000000 14966 +zdnet 0 1 6.957497 0.000000 14967 +anchordesk 0 1 6.957497 0.000000 14968 +savvysearch 0 1 6.957497 0.000000 14969 +inktomi 0 1 6.957497 0.000000 14970 +crawler 0 1 6.957497 0.000000 14971 +hotbot 0 1 6.957497 0.000000 14972 +pointcom 0 1 6.957497 0.000000 14973 +switchboard 0 1 6.957497 0.000000 14974 +cnnfn 0 1 6.957497 0.000000 14975 +newshour 0 1 6.957497 0.000000 14976 +globe 0 1 6.957497 0.000000 14977 +feed 0 1 6.957497 0.000000 14978 +fedworld 0 1 6.957497 0.000000 14979 +deficit 0 1 6.957497 0.000000 14980 +debt 0 1 6.957497 0.000000 14981 +concord 0 1 6.957497 0.000000 14982 +coalit 0 1 6.957497 0.000000 14983 +bipartisan 0 1 6.957497 0.000000 14984 +andfun 0 1 6.957497 0.000000 14985 +pagebobd 0 1 6.957497 0.000000 14986 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..0e3ea362 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +recent 1 167 1.791759 1.791759 58 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +seattl 0 120 2.079442 0.000000 103 +code 0 108 2.197225 0.000000 116 +mathemat 0 108 2.197225 0.000000 123 +associ 0 93 2.397895 0.000000 151 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +orient 1 80 2.564949 2.564949 205 +logic 0 71 2.639057 0.000000 230 +intellig 0 72 2.639057 0.000000 225 +solv 0 73 2.639057 0.000000 234 +receiv 1 66 2.708050 2.708050 244 +degre 0 69 2.708050 0.000000 259 +interact 0 62 2.772589 0.000000 270 +artifici 0 63 2.772589 0.000000 280 +dept 0 64 2.772589 0.000000 291 +colleg 0 61 2.833213 0.000000 300 +done 0 47 3.091042 0.000000 381 +mechan 0 43 3.178054 0.000000 416 +http 0 41 3.218876 0.000000 420 +societi 1 40 3.258097 3.258097 456 +join 0 39 3.258097 0.000000 457 +winter 0 36 3.367296 0.000000 500 +post 0 35 3.401197 0.000000 505 +taught 0 33 3.433987 0.000000 526 +human 1 32 3.465736 3.465736 546 +idea 0 32 3.465736 0.000000 545 +concept 0 32 3.465736 0.000000 537 +dissert 0 32 3.465736 0.000000 549 +autumn 1 31 3.496508 3.496508 558 +constraint 1 26 3.688879 3.688879 636 +spent 1 25 3.737670 3.737670 676 +concern 0 25 3.737670 0.000000 666 +born 1 21 3.912023 3.912023 798 +media 0 19 4.007333 0.000000 861 +cambridg 0 16 4.174387 0.000000 1008 +alan 0 13 4.382027 0.000000 1146 +xerox 1 8 4.875197 4.875197 1725 +pagei 0 8 4.875197 0.000000 1683 +grew 0 8 4.875197 0.000000 1742 +reed 0 6 5.164786 0.000000 2086 +england 0 5 5.347108 0.000000 2557 +scotland 0 4 5.568345 0.000000 3049 +sabbat 0 4 5.568345 0.000000 2824 +groupuw 0 3 5.857933 0.000000 3934 +atstanford 0 3 5.857933 0.000000 3935 +pagealan 0 2 6.263398 0.000000 5587 +computerinteract 0 2 6.263398 0.000000 5829 +satisfact 0 2 6.263398 0.000000 5656 +idaho 0 2 6.263398 0.000000 5055 +havebeen 0 2 6.263398 0.000000 5830 +eduwww 0 2 6.263398 0.000000 5138 +principalresearch 0 1 6.957497 0.000000 14987 +activitiesuwconstraint 0 1 6.957497 0.000000 14988 +domainsourc 0 1 6.957497 0.000000 14989 +democraci 0 1 6.957497 0.000000 14990 +qualsproject 0 1 6.957497 0.000000 14991 +teachingher 0 1 6.957497 0.000000 14992 +informationhistori 0 1 6.957497 0.000000 14993 +paloalto 0 1 6.957497 0.000000 14994 +simulationlaboratori 0 1 6.957497 0.000000 14995 +doctoralfellow 0 1 6.957497 0.000000 14996 +ofedinburgh 0 1 6.957497 0.000000 14997 +symbolicalgebra 0 1 6.957497 0.000000 14998 +andexcept 0 1 6.957497 0.000000 14999 +europarc 0 1 6.957497 0.000000 15000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..dce8bff7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +address 0 170 1.791759 0.000000 62 +like 0 132 1.945910 0.000000 81 +thing 1 84 2.484907 2.484907 189 +probabl 0 40 3.258097 0.000000 455 +credit 0 38 3.295837 0.000000 460 +ofth 0 36 3.367296 0.000000 491 +ad 0 32 3.465736 0.000000 544 +mike 0 24 3.761200 0.000000 703 +less 0 18 4.060443 0.000000 892 +care 0 13 4.382027 0.000000 1177 +brad 1 12 4.465908 4.465908 1264 +subset 0 10 4.653960 0.000000 1425 +couldn 0 4 5.568345 0.000000 2977 +pagebrad 0 1 6.957497 0.000000 15001 +chamberlainphoto 0 1 6.957497 0.000000 15002 +perkowitzth 0 1 6.957497 0.000000 15003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..f8713e10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +paper 0 205 1.609438 0.000000 38 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +problem 1 147 1.945910 1.945910 75 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +first 0 140 1.945910 0.000000 71 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +look 0 107 2.197225 0.000000 115 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +mathemat 0 108 2.197225 0.000000 123 +version 0 113 2.197225 0.000000 122 +part 0 98 2.302585 0.000000 129 +present 1 91 2.397895 2.397895 145 +call 0 91 2.397895 0.000000 153 +academ 0 82 2.484907 0.000000 178 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +html 1 75 2.639057 2.639057 235 +appli 0 71 2.639057 0.000000 226 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +written 0 63 2.772589 0.000000 278 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +reason 0 57 2.890372 0.000000 318 +physic 1 47 3.091042 3.091042 377 +done 0 47 3.091042 0.000000 381 +math 0 44 3.135494 0.000000 402 +around 0 43 3.178054 0.000000 415 +map 1 39 3.258097 3.258097 452 +theoret 0 39 3.258097 0.000000 446 +error 0 40 3.258097 0.000000 449 +expect 0 37 3.332205 0.000000 484 +tree 0 36 3.367296 0.000000 492 +survei 0 35 3.401197 0.000000 513 +curriculum 0 33 3.433987 0.000000 535 +scientist 0 31 3.496508 0.000000 560 +photo 0 31 3.496508 0.000000 561 +power 0 30 3.555348 0.000000 573 +produc 0 30 3.555348 0.000000 572 +cluster 0 28 3.610918 0.000000 612 +hope 0 28 3.610918 0.000000 610 +challeng 0 26 3.688879 0.000000 653 +bound 0 26 3.688879 0.000000 659 +bookmark 0 26 3.688879 0.000000 639 +aspect 0 25 3.737670 0.000000 663 +sometim 0 24 3.761200 0.000000 696 +theunivers 0 21 3.912023 0.000000 797 +siam 0 21 3.912023 0.000000 800 +binari 0 20 3.951244 0.000000 823 +lower 0 18 4.060443 0.000000 886 +biologi 0 15 4.248495 0.000000 1049 +incomput 0 14 4.317488 0.000000 1096 +galleri 1 13 4.382027 4.382027 1192 +speak 0 12 4.465908 0.000000 1283 +readabl 0 12 4.465908 0.000000 1258 +scienceat 0 11 4.553877 0.000000 1375 +moment 0 11 4.553877 0.000000 1379 +cycl 0 11 4.553877 0.000000 1335 +enter 0 10 4.653960 0.000000 1454 +ski 0 10 4.653960 0.000000 1471 +drink 0 9 4.753590 0.000000 1607 +bridg 0 8 4.875197 0.000000 1764 +interestsi 0 7 5.010635 0.000000 1969 +poster 0 7 5.010635 0.000000 1814 +rough 0 6 5.164786 0.000000 2107 +quickli 0 6 5.164786 0.000000 2000 +alphabet 0 6 5.164786 0.000000 1980 +soda 0 6 5.164786 0.000000 2189 +overlap 0 5 5.347108 0.000000 2368 +upper 0 5 5.347108 0.000000 2481 +latexhtml 0 5 5.347108 0.000000 2347 +older 0 5 5.347108 0.000000 2387 +fulfil 0 4 5.568345 0.000000 2932 +climb 0 4 5.568345 0.000000 2936 +genom 0 3 5.857933 0.000000 3546 +astrophys 0 3 5.857933 0.000000 3936 +dimac 0 3 5.857933 0.000000 3574 +edufor 0 2 6.263398 0.000000 5831 +hpcc 0 2 6.263398 0.000000 5832 +clone 0 2 6.263398 0.000000 5833 +ismb 0 2 6.263398 0.000000 5834 +probe 0 2 6.263398 0.000000 5535 +mumei 1 1 6.957497 6.957497 15004 +brendan 1 1 6.957497 6.957497 15005 +pagebrendan 0 1 6.957497 0.000000 15006 +mumeyi 0 1 6.957497 0.000000 15007 +information 0 1 6.957497 0.000000 15008 +vitaein 0 1 6.957497 0.000000 15009 +htmlorpostscriptformat 0 1 6.957497 0.000000 15010 +landmark 0 1 6.957497 0.000000 15011 +tosolv 0 1 6.957497 0.000000 15012 +groupher 0 1 6.957497 0.000000 15013 +papersb 0 1 6.957497 0.000000 15014 +candidaci 0 1 6.957497 0.000000 15015 +klaw 0 1 6.957497 0.000000 15016 +ofdiscret 0 1 6.957497 0.000000 15017 +containsom 0 1 6.957497 0.000000 15018 +recreationhik 0 1 6.957497 0.000000 15019 +coffeeto 0 1 6.957497 0.000000 15020 +sailingand 0 1 6.957497 0.000000 15021 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..42865a87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +spring 0 131 2.079442 0.000000 88 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +user 0 104 2.302585 0.000000 137 +graphic 1 90 2.397895 2.397895 147 +imag 0 91 2.397895 0.000000 161 +learn 1 86 2.484907 2.484907 170 +info 0 85 2.484907 0.000000 176 +school 0 84 2.484907 0.000000 188 +method 0 80 2.564949 0.000000 213 +interfac 0 79 2.564949 0.000000 209 +mondai 0 77 2.564949 0.000000 206 +june 0 79 2.564949 0.000000 214 +goal 0 66 2.708050 0.000000 250 +interact 0 62 2.772589 0.000000 270 +extens 0 53 2.944439 0.000000 340 +particular 0 51 2.995732 0.000000 352 +quarter 0 47 3.091042 0.000000 389 +even 0 45 3.135494 0.000000 393 +ask 0 28 3.610918 0.000000 597 +rather 0 26 3.688879 0.000000 642 +enjoi 0 26 3.688879 0.000000 660 +although 0 25 3.737670 0.000000 667 +self 0 22 3.850148 0.000000 761 +busi 0 21 3.912023 0.000000 784 +hobbi 0 16 4.174387 0.000000 1009 +excit 0 11 4.553877 0.000000 1329 +junior 0 5 5.347108 0.000000 2519 +kid 0 5 5.347108 0.000000 2516 +fairli 0 5 5.347108 0.000000 2322 +writeup 0 5 5.347108 0.000000 2352 +bricker 1 4 5.568345 5.568345 3050 +asystem 0 4 5.568345 0.000000 2612 +lauren 0 3 5.857933 0.000000 3251 +metip 0 3 5.857933 0.000000 3937 +workin 0 3 5.857933 0.000000 3938 +groupi 0 2 6.263398 0.000000 5544 +stevetanimoto 0 2 6.263398 0.000000 5835 +ofthi 0 2 6.263398 0.000000 5836 +cscl 0 2 6.263398 0.000000 5837 +inthi 0 2 6.263398 0.000000 5509 +studio 0 2 6.263398 0.000000 5838 +brickerlauren 0 1 6.957497 0.000000 15022 +clue 0 1 6.957497 0.000000 15023 +primarli 0 1 6.957497 0.000000 15024 +userinterfac 0 1 6.957497 0.000000 15025 +proclaim 0 1 6.957497 0.000000 15026 +mathematicsexperi 0 1 6.957497 0.000000 15027 +usingexploratori 0 1 6.957497 0.000000 15028 +rote 0 1 6.957497 0.000000 15029 +minterest 0 1 6.957497 0.000000 15030 +supportedcollabor 0 1 6.957497 0.000000 15031 +lawk 0 1 6.957497 0.000000 15032 +dawg 0 1 6.957497 0.000000 15033 +interfacea 0 1 6.957497 0.000000 15034 +resumeschool 0 1 6.957497 0.000000 15035 +dazethi 0 1 6.957497 0.000000 15036 +quarterdoth 0 1 6.957497 0.000000 15037 +quartershuman 0 1 6.957497 0.000000 15038 +writeupwhat 0 1 6.957497 0.000000 15039 +insocieti 0 1 6.957497 0.000000 15040 +lifesportscookingpotteri 0 1 6.957497 0.000000 15041 +garag 0 1 6.957497 0.000000 15042 +stuffbecaus 0 1 6.957497 0.000000 15043 +itaddress 0 1 6.957497 0.000000 15044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..1b0dc4f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +washington 1 236 1.386294 1.386294 32 +adam 0 17 4.110874 0.000000 934 +carlson 1 5 5.347108 5.347108 2351 +carlsonadam 0 1 6.957497 0.000000 15045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..e2055b0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +paper 0 205 1.609438 0.000000 38 +model 1 145 1.945910 1.945910 69 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +look 1 107 2.197225 2.197225 115 +follow 0 92 2.397895 0.000000 143 +build 0 85 2.484907 0.000000 184 +requir 0 81 2.484907 0.000000 167 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +direct 0 57 2.890372 0.000000 316 +browser 0 56 2.890372 0.000000 313 +give 0 50 3.044522 0.000000 359 +netscap 0 44 3.135494 0.000000 395 +workstat 0 37 3.332205 0.000000 479 +download 1 36 3.367296 3.367296 489 +viewer 1 21 3.912023 3.912023 787 +similar 0 21 3.912023 0.000000 771 +andrew 1 19 4.007333 4.007333 849 +fix 0 11 4.553877 0.000000 1327 +certain 1 10 4.653960 4.653960 1393 +werner 0 10 4.653960 0.000000 1385 +salesin 0 4 5.568345 0.000000 3051 +tonyderos 0 2 6.263398 0.000000 5839 +stuetzl 0 2 6.263398 0.000000 5840 +duchamp 0 2 6.263398 0.000000 5841 +jovan 0 2 6.263398 0.000000 5842 +theview 0 1 6.957497 0.000000 15046 +popov 0 1 6.957497 0.000000 15047 +scanningproject 0 1 6.957497 0.000000 15048 +sgigraph 0 1 6.957497 0.000000 15049 +shouldalso 0 1 6.957497 0.000000 15050 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..c78950bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +fall 1 181 1.609438 1.609438 40 +list 0 201 1.609438 0.000000 39 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +first 0 140 1.945910 0.000000 71 +relat 0 139 1.945910 0.000000 68 +like 0 132 1.945910 0.000000 81 +welcom 0 122 2.079442 0.000000 99 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +school 1 84 2.484907 2.484907 188 +info 0 85 2.484907 0.000000 176 +requir 0 81 2.484907 0.000000 167 +line 1 75 2.639057 2.639057 231 +workshop 0 71 2.639057 0.000000 239 +java 1 70 2.708050 2.708050 248 +best 0 59 2.833213 0.000000 299 +think 0 57 2.890372 0.000000 314 +browser 0 56 2.890372 0.000000 313 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +quarter 0 47 3.091042 0.000000 389 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +open 0 38 3.295837 0.000000 469 +computersci 0 30 3.555348 0.000000 562 +chines 0 29 3.583519 0.000000 595 +though 0 27 3.637586 0.000000 622 +enjoi 0 26 3.688879 0.000000 660 +experiment 0 26 3.688879 0.000000 645 +enabl 0 26 3.688879 0.000000 655 +instal 0 22 3.850148 0.000000 754 +grad 0 20 3.951244 0.000000 837 +applet 0 20 3.951244 0.000000 827 +taiwan 0 16 4.174387 0.000000 1006 +charact 0 15 4.248495 0.000000 1028 +promot 0 12 4.465908 0.000000 1235 +rice 0 11 4.553877 0.000000 1336 +cook 0 10 4.653960 0.000000 1464 +yellow 0 9 4.753590 0.000000 1601 +ball 0 9 4.753590 0.000000 1608 +absolut 0 8 4.875197 0.000000 1646 +chinook 0 6 5.164786 0.000000 2229 +greec 0 6 5.164786 0.000000 2208 +restaur 0 6 5.164786 0.000000 2230 +ross 0 5 5.347108 0.000000 2243 +chou 1 4 5.568345 5.568345 3033 +recip 0 3 5.857933 0.000000 3668 +infoth 0 2 6.263398 0.000000 5195 +projectmi 0 2 6.263398 0.000000 5482 +schedulemi 0 2 6.263398 0.000000 5843 +publicationscod 0 2 6.263398 0.000000 5520 +stir 0 2 6.263398 0.000000 4865 +fri 0 2 6.263398 0.000000 5844 +geek 0 2 6.263398 0.000000 5083 +toi 0 2 6.263398 0.000000 5184 +font 0 2 6.263398 0.000000 5845 +purpl 0 2 6.263398 0.000000 5372 +scari 0 1 6.957497 0.000000 15051 +codesignpersonalperson 0 1 6.957497 0.000000 15052 +resumefoodi 0 1 6.957497 0.000000 15053 +ofpeopl 0 1 6.957497 0.000000 15054 +dish 0 1 6.957497 0.000000 15055 +noodl 0 1 6.957497 0.000000 15056 +beefskew 0 1 6.957497 0.000000 15057 +toysb 0 1 6.957497 0.000000 15058 +computersand 0 1 6.957497 0.000000 15059 +taiwanesei 0 1 6.957497 0.000000 15060 +taiwaneselanguag 0 1 6.957497 0.000000 15061 +taiwanes 0 1 6.957497 0.000000 15062 +sureto 0 1 6.957497 0.000000 15063 +taiwanesedictionari 0 1 6.957497 0.000000 15064 +viewedif 0 1 6.957497 0.000000 15065 +beabl 0 1 6.957497 0.000000 15066 +bouncingov 0 1 6.957497 0.000000 15067 +barnei 0 1 6.957497 0.000000 15068 +dynosaur 0 1 6.957497 0.000000 15069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..36dff2a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +read 0 154 1.791759 0.000000 47 +problem 1 147 1.945910 1.945910 75 +year 0 148 1.945910 0.000000 84 +spring 1 131 2.079442 2.079442 88 +schedul 0 119 2.079442 0.000000 85 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +search 1 95 2.397895 2.397895 155 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +info 0 85 2.484907 0.000000 176 +help 0 83 2.484907 0.000000 175 +refer 1 78 2.564949 2.564949 203 +april 0 77 2.564949 0.000000 196 +good 0 77 2.564949 0.000000 200 +mondai 0 77 2.564949 0.000000 206 +html 1 75 2.639057 2.639057 235 +meet 1 72 2.639057 2.639057 229 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +tuesdai 0 73 2.639057 0.000000 219 +august 0 66 2.708050 0.000000 257 +thursdai 0 70 2.708050 0.000000 241 +result 1 65 2.772589 2.772589 281 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +guid 0 63 2.772589 0.000000 267 +wednesdai 0 64 2.772589 0.000000 261 +automat 0 61 2.833213 0.000000 306 +plai 0 60 2.833213 0.000000 307 +colleg 0 61 2.833213 0.000000 300 +found 0 53 2.944439 0.000000 337 +set 1 50 3.044522 3.044522 361 +archiv 0 49 3.044522 0.000000 364 +done 1 47 3.091042 3.091042 381 +adapt 0 46 3.091042 0.000000 387 +featur 0 46 3.091042 0.000000 386 +math 1 44 3.135494 3.135494 402 +netscap 0 44 3.135494 0.000000 395 +fridai 0 44 3.135494 0.000000 390 +term 0 43 3.178054 0.000000 411 +review 0 42 3.218876 0.000000 425 +late 0 40 3.258097 0.000000 439 +programm 0 39 3.258097 0.000000 445 +correct 0 38 3.295837 0.000000 462 +open 0 38 3.295837 0.000000 469 +statist 0 35 3.401197 0.000000 521 +tech 0 35 3.401197 0.000000 515 +chapter 0 32 3.465736 0.000000 536 +autumn 0 31 3.496508 0.000000 558 +linux 1 27 3.637586 3.637586 631 +lab 0 24 3.761200 0.000000 698 +color 1 22 3.850148 3.850148 762 +self 0 22 3.850148 0.000000 761 +hous 0 21 3.912023 0.000000 801 +region 0 19 4.007333 0.000000 875 +macintosh 0 17 4.110874 0.000000 920 +dilbert 0 16 4.174387 0.000000 996 +month 0 15 4.248495 0.000000 1025 +contribut 0 15 4.248495 0.000000 1021 +brother 0 13 4.382027 0.000000 1189 +tune 0 12 4.465908 0.000000 1227 +insid 0 12 4.465908 0.000000 1262 +fpga 0 10 4.653960 0.000000 1433 +pacif 0 8 4.875197 0.000000 1674 +babylon 0 8 4.875197 0.000000 1731 +duke 0 6 5.164786 0.000000 2231 +usag 0 6 5.164786 0.000000 2209 +peek 0 6 5.164786 0.000000 2169 +contest 1 5 5.347108 5.347108 2273 +compet 0 5 5.347108 0.000000 2462 +corei 0 4 5.568345 0.000000 2718 +wavelet 0 4 5.568345 0.000000 2874 +percept 0 3 5.857933 0.000000 3739 +rsum 0 3 5.857933 0.000000 3939 +zone 0 3 5.857933 0.000000 3747 +corin 0 3 5.857933 0.000000 3311 +induc 0 2 6.263398 0.000000 4795 +lurker 0 2 6.263398 0.000000 5050 +andersoncorei 0 1 6.957497 0.000000 15070 +andersonth 0 1 6.957497 0.000000 15071 +localtalk 0 1 6.957497 0.000000 15072 +collegi 0 1 6.957497 0.000000 15073 +univser 0 1 6.957497 0.000000 15074 +highlin 0 1 6.957497 0.000000 15075 +polli 0 1 6.957497 0.000000 15076 +treasuri 0 1 6.957497 0.000000 15077 +providercool 0 1 6.957497 0.000000 15078 +sunsit 0 1 6.957497 0.000000 15079 +pageus 0 1 6.957497 0.000000 15080 +washinton 0 1 6.957497 0.000000 15081 +uwtv 0 1 6.957497 0.000000 15082 +notesmi 0 1 6.957497 0.000000 15083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..14e943d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +address 1 170 1.791759 1.791759 62 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +well 0 109 2.197225 0.000000 121 +text 0 98 2.302585 0.000000 133 +homepag 0 93 2.397895 0.000000 148 +second 0 81 2.484907 0.000000 166 +start 0 83 2.484907 0.000000 173 +know 0 80 2.564949 0.000000 198 +experi 1 64 2.772589 2.772589 283 +locat 0 59 2.833213 0.000000 303 +found 1 53 2.944439 2.944439 337 +visitor 0 49 3.044522 0.000000 371 +express 0 32 3.465736 0.000000 540 +photo 0 31 3.496508 0.000000 561 +repres 0 26 3.688879 0.000000 656 +never 0 25 3.737670 0.000000 671 +grad 0 20 3.951244 0.000000 837 +wonder 0 20 3.951244 0.000000 815 +minut 0 20 3.951244 0.000000 810 +appropri 0 18 4.060443 0.000000 883 +anyon 0 17 4.110874 0.000000 916 +choos 0 16 4.174387 0.000000 964 +universityof 0 15 4.248495 0.000000 1061 +near 0 14 4.317488 0.000000 1091 +cannot 0 13 4.382027 0.000000 1144 +sai 0 13 4.382027 0.000000 1175 +ball 1 9 4.753590 4.753590 1608 +occur 0 9 4.753590 0.000000 1572 +didn 0 9 4.753590 0.000000 1563 +craig 0 7 5.010635 0.000000 1879 +fromth 0 7 5.010635 0.000000 1802 +saturdai 0 7 5.010635 0.000000 1794 +parent 0 6 5.164786 0.000000 2204 +situat 0 5 5.347108 0.000000 2365 +curiou 0 5 5.347108 0.000000 2541 +enjoy 1 4 5.568345 5.568345 2937 +waterloo 0 3 5.857933 0.000000 3523 +tomi 0 2 6.263398 0.000000 5846 +convoc 0 2 6.263398 0.000000 5757 +honour 0 2 6.263398 0.000000 5632 +felt 0 2 6.263398 0.000000 4978 +incident 0 2 6.263398 0.000000 5109 +ceremoni 0 2 6.263398 0.000000 5585 +forgiv 0 2 6.263398 0.000000 5770 +valedictorian 1 1 6.957497 6.957497 15084 +experiencecraig 0 1 6.957497 0.000000 15085 +kaplancurr 0 1 6.957497 0.000000 15086 +copyof 0 1 6.957497 0.000000 15087 +undergraduatewa 0 1 6.957497 0.000000 15088 +cskaplan 0 1 6.957497 0.000000 15089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..79776613 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +welcom 1 122 2.079442 2.079442 99 +peopl 0 96 2.302585 0.000000 132 +take 0 97 2.302585 0.000000 134 +sinc 0 90 2.397895 0.000000 159 +thing 0 84 2.484907 0.000000 189 +java 0 70 2.708050 0.000000 248 +plan 0 65 2.772589 0.000000 272 +think 0 57 2.890372 0.000000 314 +visitor 0 49 3.044522 0.000000 371 +numer 0 49 3.044522 0.000000 369 +futur 0 41 3.218876 0.000000 427 +vision 0 41 3.218876 0.000000 430 +mean 0 37 3.332205 0.000000 477 +anim 0 31 3.496508 0.000000 557 +load 0 28 3.610918 0.000000 601 +applet 0 20 3.951244 0.000000 827 +agent 0 18 4.060443 0.000000 910 +wind 0 18 4.060443 0.000000 908 +figur 0 18 4.060443 0.000000 903 +thought 0 17 4.110874 0.000000 945 +edui 0 13 4.382027 0.000000 1193 +weld 0 9 4.753590 0.000000 1538 +vallei 1 7 5.010635 5.010635 1959 +chung 0 7 5.010635 0.000000 1964 +softbot 0 7 5.010635 0.000000 1974 +etzioni 0 6 5.164786 0.000000 2135 +andsoftwar 0 4 5.568345 0.000000 2753 +arch 0 4 5.568345 0.000000 2995 +codi 1 3 5.857933 5.857933 3940 +kwok 1 3 5.857933 5.857933 3941 +aliv 0 3 5.857933 0.000000 3864 +nausicaa 1 2 6.263398 6.263398 5218 +ingram 0 2 6.263398 0.000000 5847 +castl 0 2 6.263398 0.000000 5217 +doom 0 2 6.263398 0.000000 5848 +sanctuari 1 1 6.957497 6.957497 15090 +asami 0 1 6.957497 0.000000 15091 +chiaki 0 1 6.957497 0.000000 15092 +ctkwok 0 1 6.957497 0.000000 15093 +andoren 0 1 6.957497 0.000000 15094 +aiuw 0 1 6.957497 0.000000 15095 +informationleisur 0 1 6.957497 0.000000 15096 +windlaputa 0 1 6.957497 0.000000 15097 +skyhyp 0 1 6.957497 0.000000 15098 +gunnm 0 1 6.957497 0.000000 15099 +vile 0 1 6.957497 0.000000 15100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..10c3cc12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +postscript 0 131 2.079442 0.000000 90 +homepag 0 93 2.397895 0.000000 148 +resum 1 79 2.564949 2.564949 217 +complet 0 77 2.564949 0.000000 208 +html 0 75 2.639057 0.000000 235 +plan 0 65 2.772589 0.000000 272 +vita 0 38 3.295837 0.000000 473 +curriculum 1 33 3.433987 3.433987 535 +employ 0 12 4.465908 0.000000 1291 +rest 0 12 4.465908 0.000000 1259 +darren 1 5 5.347108 5.347108 2565 +cronquist 1 3 5.857933 5.857933 3942 +myph 0 3 5.857933 0.000000 3880 +underconstruct 0 3 5.857933 0.000000 3889 +darrenc 0 1 6.957497 0.000000 15101 +vitaperson 0 1 6.957497 0.000000 15102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..7af7f36d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +read 0 154 1.791759 0.000000 47 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +studi 1 120 2.079442 2.079442 91 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +find 0 111 2.197225 0.000000 111 +user 1 104 2.302585 2.302585 137 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +build 1 85 2.484907 2.484907 184 +control 0 82 2.484907 0.000000 164 +novemb 0 81 2.484907 0.000000 179 +thing 0 84 2.484907 0.000000 189 +school 0 84 2.484907 0.000000 188 +librari 0 87 2.484907 0.000000 181 +help 0 83 2.484907 0.000000 175 +interfac 1 79 2.564949 2.564949 209 +appear 0 78 2.564949 0.000000 210 +david 1 71 2.639057 2.639057 232 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +symposium 0 72 2.639057 0.000000 238 +nation 0 74 2.639057 0.000000 240 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +plan 1 65 2.772589 2.772589 272 +interact 1 62 2.772589 2.772589 270 +experi 0 64 2.772589 0.000000 283 +import 0 65 2.772589 0.000000 282 +automat 1 61 2.833213 2.833213 306 +simpl 1 60 2.833213 2.833213 298 +variou 0 56 2.890372 0.000000 317 +local 1 55 2.944439 2.944439 334 +undergradu 0 54 2.944439 0.000000 338 +week 0 52 2.995732 0.000000 343 +even 0 45 3.135494 0.000000 393 +favorit 0 44 3.135494 0.000000 410 +third 0 43 3.178054 0.000000 412 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +live 0 40 3.258097 0.000000 451 +map 0 39 3.258097 0.000000 452 +realli 0 40 3.258097 0.000000 444 +prototyp 0 38 3.295837 0.000000 463 +feel 0 37 3.332205 0.000000 483 +respons 0 37 3.332205 0.000000 476 +game 0 36 3.367296 0.000000 498 +represent 0 35 3.401197 0.000000 512 +michael 0 35 3.401197 0.000000 514 +everi 0 34 3.401197 0.000000 519 +manual 0 35 3.401197 0.000000 504 +product 0 33 3.433987 0.000000 527 +human 0 32 3.465736 0.000000 546 +collabor 0 32 3.465736 0.000000 543 +domain 0 30 3.555348 0.000000 564 +built 0 29 3.583519 0.000000 592 +rather 0 26 3.688879 0.000000 642 +client 0 25 3.737670 0.000000 679 +spent 0 25 3.737670 0.000000 676 +store 0 24 3.761200 0.000000 693 +demonstr 0 24 3.761200 0.000000 694 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +famili 0 23 3.806662 0.000000 735 +william 0 22 3.850148 0.000000 765 +theunivers 0 21 3.912023 0.000000 797 +navig 0 21 3.912023 0.000000 796 +watch 0 21 3.912023 0.000000 789 +anderson 1 19 4.007333 4.007333 860 +boston 0 19 4.007333 0.000000 862 +agent 0 18 4.060443 0.000000 910 +adam 0 17 4.110874 0.000000 934 +match 0 16 4.174387 0.000000 965 +atth 0 15 4.248495 0.000000 1019 +mayb 0 15 4.248495 0.000000 1014 +camera 1 14 4.317488 4.317488 1115 +dave 1 14 4.317488 4.317488 1098 +senior 0 14 4.317488 0.000000 1120 +context 0 13 4.382027 0.000000 1153 +whose 0 13 4.382027 0.000000 1166 +sai 0 13 4.382027 0.000000 1175 +touch 0 12 4.465908 0.000000 1288 +grow 0 12 4.465908 0.000000 1209 +career 0 12 4.465908 0.000000 1287 +appl 0 11 4.553877 0.000000 1303 +michigan 0 11 4.553877 0.000000 1368 +surf 0 11 4.553877 0.000000 1301 +shop 1 10 4.653960 4.653960 1469 +death 0 10 4.653960 0.000000 1457 +weld 1 9 4.753590 4.753590 1538 +russel 0 9 4.753590 0.000000 1507 +debugg 0 9 4.753590 0.000000 1493 +declar 0 9 4.753590 0.000000 1526 +leader 0 9 4.753590 0.000000 1576 +juan 0 9 4.753590 0.000000 1580 +sean 0 8 4.875197 0.000000 1705 +aaai 0 8 4.875197 0.000000 1750 +edg 0 8 4.875197 0.000000 1647 +planner 0 7 5.010635 0.000000 1797 +sensor 0 7 5.010635 0.000000 1920 +fortun 0 7 5.010635 0.000000 1872 +spare 0 6 5.164786 0.000000 2177 +mix 0 6 5.164786 0.000000 2200 +chicago 0 6 5.164786 0.000000 2149 +babi 0 5 5.347108 0.000000 2493 +pars 0 5 5.347108 0.000000 2321 +darren 0 5 5.347108 0.000000 2565 +compet 0 5 5.347108 0.000000 2462 +salesin 1 4 5.568345 5.568345 3051 +midnight 0 4 5.568345 0.000000 2599 +gotten 0 4 5.568345 0.000000 2628 +cut 0 4 5.568345 0.000000 2620 +tick 0 4 5.568345 0.000000 2975 +cohen 1 3 5.857933 5.857933 3652 +workin 0 3 5.857933 0.000000 3938 +harold 0 3 5.857933 0.000000 3803 +gloriou 0 3 5.857933 0.000000 3816 +hero 0 3 5.857933 0.000000 3711 +fame 0 3 5.857933 0.000000 3793 +straight 0 3 5.857933 0.000000 3655 +evil 0 3 5.857933 0.000000 3943 +christianson 1 2 6.263398 6.263398 5849 +till 1 2 6.263398 6.263398 5850 +nowher 0 2 6.263398 0.000000 4292 +shopbot 0 2 6.263398 0.000000 5824 +chicken 0 2 6.263398 0.000000 5851 +theanim 0 2 6.263398 0.000000 5852 +thechateau 0 2 6.263398 0.000000 5853 +cynic 0 2 6.263398 0.000000 5854 +duel 0 2 6.263398 0.000000 5855 +christiansondbc 0 1 6.957497 0.000000 15103 +inaiand 0 1 6.957497 0.000000 15104 +graphicsa 0 1 6.957497 0.000000 15105 +directedbehavior 0 1 6.957497 0.000000 15106 +buzzwordacquisit 0 1 6.957497 0.000000 15107 +bobdoorenbo 0 1 6.957497 0.000000 15108 +somehowintegr 0 1 6.957497 0.000000 15109 +applicationthat 0 1 6.957497 0.000000 15110 +basket 0 1 6.957497 0.000000 15111 +determinewhat 0 1 6.957497 0.000000 15112 +moviethat 0 1 6.957497 0.000000 15113 +technologyinto 0 1 6.957497 0.000000 15114 +perpetr 0 1 6.957497 0.000000 15115 +theucpop 0 1 6.957497 0.000000 15116 +isher 0 1 6.957497 0.000000 15117 +carboload 0 1 6.957497 0.000000 15118 +publicationschristianson 0 1 6.957497 0.000000 15119 +cinematographi 0 1 6.957497 0.000000 15120 +firbi 0 1 6.957497 0.000000 15121 +mcdougal 0 1 6.957497 0.000000 15122 +fusion 0 1 6.957497 0.000000 15123 +withfreder 0 1 6.957497 0.000000 15124 +judo 0 1 6.957497 0.000000 15125 +sibl 0 1 6.957497 0.000000 15126 +sisterjust 0 1 6.957497 0.000000 15127 +supercollid 0 1 6.957497 0.000000 15128 +slack 0 1 6.957497 0.000000 15129 +mirski 0 1 6.957497 0.000000 15130 +youth 0 1 6.957497 0.000000 15131 +wwwf 0 1 6.957497 0.000000 15132 +grudg 0 1 6.957497 0.000000 15133 +doomgat 0 1 6.957497 0.000000 15134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..ad436b2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +assist 0 112 2.197225 0.000000 113 +look 0 107 2.197225 0.000000 115 +take 1 97 2.302585 2.302585 134 +activ 0 84 2.484907 0.000000 182 +mondai 0 77 2.564949 0.000000 206 +david 0 71 2.639057 0.000000 232 +creat 0 63 2.772589 0.000000 277 +give 0 50 3.044522 0.000000 359 +tutori 0 39 3.258097 0.000000 437 +form 0 39 3.258097 0.000000 443 +togeth 0 23 3.806662 0.000000 714 +navig 0 21 3.912023 0.000000 796 +hypertext 0 19 4.007333 0.000000 865 +quiz 1 16 4.174387 4.174387 990 +dave 0 14 4.317488 0.000000 1098 +johnson 1 13 4.382027 4.382027 1162 +script 0 13 4.382027 0.000000 1171 +basketbal 0 12 4.465908 0.000000 1289 +softbal 0 9 4.753590 0.000000 1594 +golf 0 6 5.164786 0.000000 2178 +fit 0 5 5.347108 0.000000 2285 +racquetbal 0 4 5.568345 0.000000 3052 +assess 0 4 5.568345 0.000000 2724 +readersproject 0 1 6.957497 0.000000 15135 +theracquetbal 0 1 6.957497 0.000000 15136 +thecreat 0 1 6.957497 0.000000 15137 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..961e6ce8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 0 374 0.693147 0.000000 7 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +first 1 140 1.945910 1.945910 71 +like 0 132 1.945910 0.000000 81 +year 0 148 1.945910 0.000000 84 +construct 0 139 1.945910 0.000000 82 +perform 0 143 1.945910 0.000000 74 +problem 0 147 1.945910 0.000000 75 +seattl 1 120 2.079442 2.079442 103 +studi 0 120 2.079442 0.000000 91 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +manag 0 114 2.197225 0.000000 125 +site 0 106 2.197225 0.000000 119 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +call 0 91 2.397895 0.000000 153 +commun 0 95 2.397895 0.000000 157 +homepag 0 93 2.397895 0.000000 148 +section 0 94 2.397895 0.000000 149 +real 0 93 2.397895 0.000000 144 +level 1 87 2.484907 2.484907 180 +stuff 1 87 2.484907 2.484907 171 +academ 0 82 2.484907 0.000000 178 +environ 0 84 2.484907 0.000000 177 +help 0 83 2.484907 0.000000 175 +institut 0 84 2.484907 0.000000 187 +learn 0 86 2.484907 0.000000 170 +server 1 76 2.564949 2.564949 204 +dynam 1 76 2.564949 2.564949 194 +know 0 80 2.564949 0.000000 198 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +david 1 71 2.639057 2.639057 232 +solv 0 73 2.639057 0.000000 234 +line 0 75 2.639057 0.000000 231 +view 0 70 2.708050 0.000000 254 +main 0 67 2.708050 0.000000 256 +visit 1 63 2.772589 2.772589 288 +written 0 63 2.772589 0.000000 278 +previou 0 62 2.772589 0.000000 290 +guid 0 63 2.772589 0.000000 267 +unix 1 58 2.890372 2.890372 308 +major 1 56 2.890372 2.890372 315 +think 0 57 2.890372 0.000000 314 +reason 0 57 2.890372 0.000000 318 +extens 1 53 2.944439 2.944439 340 +undergradu 1 54 2.944439 2.944439 338 +maintain 0 51 2.995732 0.000000 342 +run 0 51 2.995732 0.000000 347 +date 0 51 2.995732 0.000000 344 +life 1 50 3.044522 3.044522 375 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +featur 0 46 3.091042 0.000000 386 +netscap 0 44 3.135494 0.000000 395 +around 0 43 3.178054 0.000000 415 +past 0 42 3.218876 0.000000 428 +movi 1 40 3.258097 3.258097 459 +littl 0 39 3.258097 0.000000 454 +brian 0 38 3.295837 0.000000 466 +respons 0 37 3.332205 0.000000 476 +soon 0 36 3.367296 0.000000 494 +manual 0 35 3.401197 0.000000 504 +extend 0 32 3.465736 0.000000 539 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +team 0 27 3.637586 0.000000 625 +primari 0 25 3.737670 0.000000 669 +sport 0 25 3.737670 0.000000 683 +todai 0 25 3.737670 0.000000 672 +other 0 24 3.761200 0.000000 697 +dai 0 22 3.850148 0.000000 753 +recommend 0 22 3.850148 0.000000 737 +kernel 1 20 3.951244 3.951244 825 +safeti 0 20 3.951244 0.000000 817 +fine 0 20 3.951244 0.000000 822 +region 0 19 4.007333 0.000000 875 +excel 0 19 4.007333 0.000000 868 +bershad 0 18 4.060443 0.000000 902 +statu 0 18 4.060443 0.000000 885 +protect 0 17 4.110874 0.000000 935 +debug 0 17 4.110874 0.000000 944 +condit 1 16 4.174387 4.174387 975 +anyth 0 16 4.174387 0.000000 998 +dilbert 0 16 4.174387 0.000000 996 +mayb 0 15 4.248495 0.000000 1014 +spin 1 14 4.317488 4.317488 1121 +achiev 0 14 4.317488 0.000000 1088 +stai 1 12 4.465908 4.465908 1215 +touch 1 12 4.465908 4.465908 1288 +rest 0 12 4.465908 0.000000 1259 +surf 1 11 4.553877 4.553877 1301 +traffic 1 10 4.653960 4.653960 1421 +shop 0 10 4.653960 0.000000 1469 +modula 1 9 4.753590 4.753590 1613 +mach 1 8 4.875197 4.875197 1669 +transport 1 8 4.875197 4.875197 1672 +claim 0 8 4.875197 0.000000 1664 +hold 0 8 4.875197 0.000000 1645 +wouldn 0 7 5.010635 0.000000 1970 +occasion 0 7 5.010635 0.000000 1905 +athlet 0 7 5.010635 0.000000 1933 +yeah 0 6 5.164786 0.000000 2195 +impress 0 6 5.164786 0.000000 2096 +variant 0 6 5.164786 0.000000 2043 +subsystem 0 6 5.164786 0.000000 2015 +band 0 6 5.164786 0.000000 2198 +restaur 0 6 5.164786 0.000000 2230 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +afraid 0 4 5.568345 0.000000 3053 +breath 0 4 5.568345 0.000000 2946 +countless 0 4 5.568345 0.000000 3020 +racquetbal 0 4 5.568345 0.000000 3052 +leagu 0 4 5.568345 0.000000 3040 +basebal 0 4 5.568345 0.000000 2969 +bean 0 4 5.568345 0.000000 2968 +slight 0 3 5.857933 0.000000 3894 +emul 0 3 5.857933 0.000000 3944 +meanwhil 0 3 5.857933 0.000000 3129 +distract 0 3 5.857933 0.000000 3945 +trumpet 0 3 5.857933 0.000000 3946 +espn 0 3 5.857933 0.000000 3724 +marin 0 3 5.857933 0.000000 3947 +dion 1 2 6.263398 6.263398 5856 +okai 0 2 6.263398 0.000000 4465 +occupi 0 2 6.263398 0.000000 5857 +ladder 0 2 6.263398 0.000000 5858 +outlet 0 2 6.263398 0.000000 5248 +infam 0 2 6.263398 0.000000 5859 +ddion 0 1 6.957497 0.000000 15138 +thespinoper 0 1 6.957497 0.000000 15139 +intercept 0 1 6.957497 0.000000 15140 +havework 0 1 6.957497 0.000000 15141 +ipromis 0 1 6.957497 0.000000 15142 +enhancedthi 0 1 6.957497 0.000000 15143 +vast 0 1 6.957497 0.000000 15144 +sportzon 0 1 6.957497 0.000000 15145 +eateri 0 1 6.957497 0.000000 15146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..3cb37052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +fall 1 181 1.609438 1.609438 40 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +address 0 170 1.791759 0.000000 62 +perform 1 143 1.945910 1.945910 74 +tool 1 117 2.079442 2.079442 93 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +topic 1 114 2.197225 2.197225 110 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 0 116 2.197225 0.000000 108 +person 0 111 2.197225 0.000000 117 +advanc 1 99 2.302585 2.302585 130 +commun 1 95 2.397895 2.397895 157 +mani 0 92 2.397895 0.000000 150 +center 0 88 2.397895 0.000000 158 +environ 0 84 2.484907 0.000000 177 +start 0 83 2.484907 0.000000 173 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +experi 1 64 2.772589 2.772589 283 +collect 1 65 2.772589 2.772589 268 +interact 0 62 2.772589 0.000000 270 +colleg 1 61 2.833213 2.833213 300 +share 0 59 2.833213 0.000000 304 +index 0 56 2.890372 0.000000 309 +summer 0 56 2.890372 0.000000 311 +extens 1 53 2.944439 2.944439 340 +undergradu 0 54 2.944439 0.000000 338 +run 0 51 2.995732 0.000000 347 +protocol 0 45 3.135494 0.000000 407 +offer 1 43 3.178054 3.178054 414 +around 0 43 3.178054 0.000000 415 +howev 0 41 3.218876 0.000000 422 +field 0 37 3.332205 0.000000 482 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +independ 0 32 3.465736 0.000000 548 +dissert 0 32 3.465736 0.000000 549 +often 0 31 3.496508 0.000000 551 +administr 1 27 3.637586 3.637586 628 +campu 0 27 3.637586 0.000000 623 +arrai 0 27 3.637586 0.000000 627 +challeng 0 26 3.688879 0.000000 653 +task 0 25 3.737670 0.000000 678 +portabl 1 20 3.951244 3.951244 819 +beauti 0 18 4.060443 0.000000 912 +event 0 18 4.060443 0.000000 896 +sheet 0 16 4.174387 0.000000 973 +women 0 16 4.174387 0.000000 1004 +career 0 12 4.465908 0.000000 1287 +skill 0 12 4.465908 0.000000 1205 +asynchron 0 12 4.465908 0.000000 1229 +host 0 11 4.553877 0.000000 1306 +typic 0 11 4.553877 0.000000 1360 +extra 0 11 4.553877 0.000000 1312 +interestsmi 0 10 4.653960 0.000000 1462 +tutor 0 9 4.753590 0.000000 1552 +heart 0 8 4.875197 0.000000 1729 +ring 0 8 4.875197 0.000000 1684 +on 0 8 4.875197 0.000000 1628 +pursu 0 7 5.010635 0.000000 1902 +divers 0 6 5.164786 0.000000 2232 +averag 0 6 5.164786 0.000000 2098 +conveni 0 6 5.164786 0.000000 2088 +li 0 5 5.347108 0.000000 2500 +suffer 0 5 5.347108 0.000000 2268 +spaa 0 3 5.857933 0.000000 3906 +certif 0 3 5.857933 0.000000 3859 +token 0 2 6.263398 0.000000 4415 +foremost 0 2 6.263398 0.000000 5361 +ordistribut 0 2 6.263398 0.000000 5581 +distributedenviron 0 2 6.263398 0.000000 5183 +comm 0 2 6.263398 0.000000 4746 +newslet 0 2 6.263398 0.000000 5860 +derrick 1 1 6.957497 6.957497 15147 +weathersbi 0 1 6.957497 0.000000 15148 +bullssupersonicsi 0 1 6.957497 0.000000 15149 +phdin 0 1 6.957497 0.000000 15150 +ofseattl 0 1 6.957497 0.000000 15151 +prei 0 1 6.957497 0.000000 15152 +therebyextend 0 1 6.957497 0.000000 15153 +interestssignific 0 1 6.957497 0.000000 15154 +securityresearch 0 1 6.957497 0.000000 15155 +challengespres 0 1 6.957497 0.000000 15156 +theseenviron 0 1 6.957497 0.000000 15157 +daunt 0 1 6.957497 0.000000 15158 +projectacadem 0 1 6.957497 0.000000 15159 +achievementsinstructor 0 1 6.957497 0.000000 15160 +collegeinstructor 0 1 6.957497 0.000000 15161 +minoritystud 0 1 6.957497 0.000000 15162 +engineeringoutstand 0 1 6.957497 0.000000 15163 +cnnfinanciala 0 1 6.957497 0.000000 15164 +javaw 0 1 6.957497 0.000000 15165 +weathersbyderrick 0 1 6.957497 0.000000 15166 +edutu 0 1 6.957497 0.000000 15167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..7dcd85e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +develop 0 174 1.791759 0.000000 53 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +postscript 1 131 2.079442 2.079442 90 +provid 0 121 2.079442 0.000000 94 +pleas 1 113 2.197225 2.197225 114 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +need 1 98 2.302585 2.302585 135 +book 0 99 2.302585 0.000000 131 +imag 0 91 2.397895 0.000000 161 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +know 0 80 2.564949 0.000000 198 +june 0 79 2.564949 0.000000 214 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +html 0 75 2.639057 0.000000 235 +tuesdai 0 73 2.639057 0.000000 219 +goal 0 66 2.708050 0.000000 250 +plai 0 60 2.833213 0.000000 307 +think 0 57 2.890372 0.000000 314 +summer 0 56 2.890372 0.000000 311 +finger 0 52 2.995732 0.000000 354 +much 0 52 2.995732 0.000000 349 +format 0 48 3.044522 0.000000 356 +frequent 0 49 3.044522 0.000000 367 +possibl 0 47 3.091042 0.000000 378 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +york 0 41 3.218876 0.000000 435 +late 0 40 3.258097 0.000000 439 +brian 1 38 3.295837 3.295837 466 +china 1 37 3.332205 3.332205 487 +feel 0 37 3.332205 0.000000 483 +game 1 36 3.367296 3.367296 498 +soon 0 36 3.367296 0.000000 494 +ofth 0 36 3.367296 0.000000 491 +statist 1 35 3.401197 3.401197 521 +return 0 34 3.401197 0.000000 502 +bibliographi 0 34 3.401197 0.000000 518 +anim 0 31 3.496508 0.000000 557 +progress 0 28 3.610918 0.000000 598 +color 0 22 3.850148 0.000000 762 +period 0 22 3.850148 0.000000 743 +feedback 0 19 4.007333 0.000000 854 +histori 0 19 4.007333 0.000000 853 +encourag 0 18 4.060443 0.000000 880 +edulast 0 17 4.110874 0.000000 927 +letter 0 16 4.174387 0.000000 981 +alreadi 0 16 4.174387 0.000000 963 +draft 1 14 4.317488 4.317488 1085 +hopefulli 1 14 4.317488 4.317488 1071 +trip 0 14 4.317488 0.000000 1113 +near 0 14 4.317488 0.000000 1091 +carri 1 13 4.382027 4.382027 1152 +readabl 0 12 4.465908 0.000000 1258 +bike 0 10 4.653960 0.000000 1468 +death 0 10 4.653960 0.000000 1457 +drink 0 9 4.753590 0.000000 1607 +ride 0 8 4.875197 0.000000 1741 +blue 0 6 5.164786 0.000000 2227 +seen 0 6 5.164786 0.000000 2202 +scienceand 0 5 5.347108 0.000000 2348 +atlant 0 5 5.347108 0.000000 2508 +semi 0 5 5.347108 0.000000 2510 +chaotic 0 5 5.347108 0.000000 2566 +ireland 1 4 5.568345 5.568345 2853 +pagebrian 0 4 5.568345 0.000000 3054 +myfavorit 0 3 5.857933 0.000000 3852 +interview 0 3 5.857933 0.000000 3324 +fascin 0 3 5.857933 0.000000 3948 +northern 1 2 6.263398 6.263398 5861 +terrorist 1 2 6.263398 6.263398 5190 +thorough 1 2 6.263398 6.263398 4134 +ocean 0 2 6.263398 0.000000 5375 +shoulder 0 2 6.263398 0.000000 4750 +thecurr 0 2 6.263398 0.000000 5862 +addict 0 2 6.263398 0.000000 5576 +dewei 1 1 6.957497 6.957497 15168 +deweyabout 0 1 6.957497 0.000000 15169 +doyou 0 1 6.957497 0.000000 15170 +ilov 0 1 6.957497 0.000000 15171 +oldroomm 0 1 6.957497 0.000000 15172 +irelandi 0 1 6.957497 0.000000 15173 +belfast 0 1 6.957497 0.000000 15174 +sixti 0 1 6.957497 0.000000 15175 +pagesof 0 1 6.957497 0.000000 15176 +luggag 0 1 6.957497 0.000000 15177 +getthos 0 1 6.957497 0.000000 15178 +enlighteningformat 0 1 6.957497 0.000000 15179 +thisproject 0 1 6.957497 0.000000 15180 +sinn 0 1 6.957497 0.000000 15181 +fein 0 1 6.957497 0.000000 15182 +injuri 0 1 6.957497 0.000000 15183 +recuri 0 1 6.957497 0.000000 15184 +ancient 0 1 6.957497 0.000000 15185 +imageek 0 1 6.957497 0.000000 15186 +cuni 0 1 6.957497 0.000000 15187 +jansteen 0 1 6.957497 0.000000 15188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..ea15db5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +seattl 1 120 2.079442 2.079442 103 +theori 1 111 2.197225 2.197225 127 +mathemat 0 108 2.197225 0.000000 123 +find 0 111 2.197225 0.000000 111 +person 0 111 2.197225 0.000000 117 +look 0 107 2.197225 0.000000 115 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +graphic 0 90 2.397895 0.000000 147 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +homepag 0 93 2.397895 0.000000 148 +thing 1 84 2.484907 2.484907 189 +contain 0 81 2.484907 0.000000 174 +academ 0 82 2.484907 0.000000 178 +appli 0 71 2.639057 0.000000 226 +write 0 72 2.639057 0.000000 222 +would 0 67 2.708050 0.000000 251 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +written 0 63 2.772589 0.000000 278 +abstract 0 62 2.772589 0.000000 276 +plai 0 60 2.833213 0.000000 307 +publish 0 57 2.890372 0.000000 326 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +friend 0 48 3.044522 0.000000 376 +favorit 0 44 3.135494 0.000000 410 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +press 0 42 3.218876 0.000000 419 +futur 0 41 3.218876 0.000000 427 +probabl 0 40 3.258097 0.000000 455 +random 1 34 3.401197 3.401197 511 +given 0 32 3.465736 0.000000 538 +taken 0 31 3.496508 0.000000 555 +computersci 0 30 3.555348 0.000000 562 +depend 0 29 3.583519 0.000000 583 +ask 0 28 3.610918 0.000000 597 +mine 0 26 3.688879 0.000000 654 +seri 0 24 3.761200 0.000000 708 +fellow 0 24 3.761200 0.000000 701 +recommend 1 22 3.850148 3.850148 737 +ofwashington 0 22 3.850148 0.000000 766 +identifi 0 22 3.850148 0.000000 760 +william 0 22 3.850148 0.000000 765 +fact 0 21 3.912023 0.000000 780 +tenni 0 20 3.951244 0.000000 838 +grad 0 20 3.951244 0.000000 837 +ever 0 19 4.007333 0.000000 872 +otherwis 0 17 4.110874 0.000000 922 +anyon 0 17 4.110874 0.000000 916 +former 0 17 4.110874 0.000000 956 +whether 0 17 4.110874 0.000000 918 +chateau 0 16 4.174387 0.000000 997 +explan 0 16 4.174387 0.000000 985 +biologi 0 15 4.248495 0.000000 1049 +charact 0 15 4.248495 0.000000 1028 +role 0 14 4.317488 0.000000 1101 +pagewelcom 0 11 4.553877 0.000000 1344 +poetri 0 9 4.753590 0.000000 1596 +distanc 0 9 4.753590 0.000000 1500 +illustr 0 8 4.875197 0.000000 1679 +babylon 0 8 4.875197 0.000000 1731 +creativ 0 8 4.875197 0.000000 1777 +absolut 0 8 4.875197 0.000000 1646 +sean 0 8 4.875197 0.000000 1705 +roger 0 7 5.010635 0.000000 1892 +chronicl 0 7 5.010635 0.000000 1952 +athlet 0 7 5.010635 0.000000 1933 +fiction 1 6 5.164786 5.164786 2217 +cat 0 6 5.164786 0.000000 2194 +fantasi 0 4 5.568345 0.000000 3055 +sandi 0 4 5.568345 0.000000 2765 +portrait 0 3 5.857933 0.000000 3491 +slight 0 3 5.857933 0.000000 3894 +kwon 0 3 5.857933 0.000000 3690 +woman 0 3 5.857933 0.000000 3539 +fasulo 0 2 6.263398 0.000000 4391 +honest 0 2 6.263398 0.000000 5060 +alumnu 0 2 6.263398 0.000000 5863 +wendi 0 2 6.263398 0.000000 5864 +belluomini 0 2 6.263398 0.000000 5865 +worthwhil 0 2 6.263398 0.000000 4951 +dfasulo 1 1 6.957497 6.957497 15189 +amber 1 1 6.957497 6.957497 15190 +williamscolleg 0 1 6.957497 0.000000 15191 +inaccuraci 0 1 6.957497 0.000000 15192 +eastlak 0 1 6.957497 0.000000 15193 +merlin 0 1 6.957497 0.000000 15194 +corwin 0 1 6.957497 0.000000 15195 +zelazni 0 1 6.957497 0.000000 15196 +drpg 0 1 6.957497 0.000000 15197 +phage 0 1 6.957497 0.000000 15198 +dress 0 1 6.957497 0.000000 15199 +dogbert 0 1 6.957497 0.000000 15200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..c336629d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +washington 0 236 1.386294 0.000000 32 +seattl 0 120 2.079442 0.000000 103 +site 0 106 2.197225 0.000000 119 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +tuesdai 0 73 2.639057 0.000000 219 +java 0 70 2.708050 0.000000 248 +septemb 0 65 2.772589 0.000000 274 +plai 0 60 2.833213 0.000000 307 +favorit 0 44 3.135494 0.000000 410 +autumn 0 31 3.496508 0.000000 558 +martin 1 21 3.912023 3.912023 794 +engineeringunivers 0 17 4.110874 0.000000 959 +weekli 0 17 4.110874 0.000000 919 +script 0 13 4.382027 0.000000 1171 +engr 0 10 4.653960 0.000000 1427 +sister 0 9 4.753590 0.000000 1524 +coffe 0 5 5.347108 0.000000 2556 +eduupd 0 4 5.568345 0.000000 3056 +dickei 1 2 6.263398 6.263398 4389 +garg 0 2 6.263398 0.000000 5533 +dickeycomput 0 1 6.957497 0.000000 15201 +washingtonwelcom 0 1 6.957497 0.000000 15202 +schedulenarr 0 1 6.957497 0.000000 15203 +blurbcs 0 1 6.957497 0.000000 15204 +housesfavorit 0 1 6.957497 0.000000 15205 +bookspirograph 0 1 6.957497 0.000000 15206 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..485269c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,188 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +paper 1 205 1.609438 1.609438 38 +recent 0 167 1.791759 0.000000 58 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +report 0 131 2.079442 0.000000 92 +provid 0 121 2.079442 0.000000 94 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +peopl 1 96 2.302585 2.302585 132 +search 0 95 2.397895 0.000000 155 +present 0 91 2.397895 0.000000 145 +larg 0 82 2.484907 0.000000 168 +internet 0 83 2.484907 0.000000 186 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +issu 1 78 2.564949 2.564949 211 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +come 0 78 2.564949 0.000000 202 +meet 0 72 2.639057 0.000000 229 +materi 0 75 2.639057 0.000000 221 +would 1 67 2.708050 2.708050 251 +differ 0 66 2.708050 0.000000 253 +sieg 0 69 2.708050 0.000000 260 +polici 1 64 2.772589 2.772589 279 +guid 0 63 2.772589 0.000000 267 +plan 0 65 2.772589 0.000000 272 +juli 0 60 2.833213 0.000000 305 +back 0 60 2.833213 0.000000 297 +direct 0 57 2.890372 0.000000 316 +index 0 56 2.890372 0.000000 309 +point 0 58 2.890372 0.000000 319 +explor 0 58 2.890372 0.000000 324 +instruct 1 53 2.944439 2.944439 332 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +pointer 0 48 3.044522 0.000000 368 +appoint 0 49 3.044522 0.000000 358 +still 0 50 3.044522 0.000000 362 +without 0 50 3.044522 0.000000 370 +effect 0 46 3.091042 0.000000 385 +move 0 47 3.091042 0.000000 382 +could 0 46 3.091042 0.000000 383 +execut 0 45 3.135494 0.000000 404 +around 0 43 3.178054 0.000000 415 +show 0 43 3.178054 0.000000 417 +cach 1 41 3.218876 3.218876 432 +map 0 39 3.258097 0.000000 452 +realli 0 40 3.258097 0.000000 444 +live 0 40 3.258097 0.000000 451 +brian 0 38 3.295837 0.000000 466 +next 0 34 3.401197 0.000000 517 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +idea 0 32 3.465736 0.000000 545 +consid 0 29 3.583519 0.000000 590 +mind 0 27 3.637586 0.000000 632 +rather 0 26 3.688879 0.000000 642 +enjoi 0 26 3.688879 0.000000 660 +primari 0 25 3.737670 0.000000 669 +concern 0 25 3.737670 0.000000 666 +never 0 25 3.737670 0.000000 671 +alwai 1 24 3.761200 3.761200 691 +yahoo 0 24 3.761200 0.000000 707 +begin 1 23 3.806662 3.806662 716 +thread 0 23 3.806662 0.000000 722 +initi 0 23 3.806662 0.000000 717 +decis 0 23 3.806662 0.000000 728 +almost 0 22 3.850148 0.000000 742 +sort 0 22 3.850148 0.000000 738 +chen 1 21 3.912023 3.912023 791 +avoid 0 21 3.912023 0.000000 799 +among 0 21 3.912023 0.000000 781 +miss 0 19 4.007333 0.000000 866 +lyco 0 19 4.007333 0.000000 871 +definit 0 19 4.007333 0.000000 864 +bershad 1 18 4.060443 4.060443 902 +four 0 18 4.060443 0.000000 905 +element 0 18 4.060443 0.000000 895 +event 0 18 4.060443 0.000000 896 +asplo 0 17 4.110874 0.000000 948 +otherwis 0 17 4.110874 0.000000 922 +whole 0 17 4.110874 0.000000 940 +choic 0 16 4.174387 0.000000 979 +stream 0 15 4.248495 0.000000 1015 +draw 0 14 4.317488 0.000000 1086 +manner 0 14 4.317488 0.000000 1074 +alan 0 13 4.382027 0.000000 1146 +hotlist 0 13 4.382027 0.000000 1199 +brad 1 12 4.465908 4.465908 1264 +denni 1 11 4.553877 4.553877 1321 +baer 1 11 4.553877 4.553877 1353 +smart 0 11 4.553877 0.000000 1352 +isca 0 11 4.553877 0.000000 1354 +moment 0 11 4.553877 0.000000 1379 +magic 0 11 4.553877 0.000000 1358 +occur 1 9 4.753590 4.753590 1572 +osdi 0 9 4.753590 0.000000 1534 +yellow 0 9 4.753590 0.000000 1601 +russel 0 9 4.753590 0.000000 1507 +romer 1 8 4.875197 4.875197 1706 +qualifi 0 8 4.875197 0.000000 1721 +entri 0 8 4.875197 0.000000 1678 +gold 0 8 4.875197 0.000000 1745 +rais 0 8 4.875197 0.000000 1711 +baker 0 7 5.010635 0.000000 1812 +chanc 0 7 5.010635 0.000000 1960 +commit 1 6 5.164786 5.164786 2233 +conflict 1 6 5.164786 5.164786 2041 +dream 1 6 5.164786 5.164786 2165 +loup 0 6 5.164786 0.000000 2228 +presid 0 6 5.164786 0.000000 2196 +truth 0 6 5.164786 0.000000 2179 +whatev 0 6 5.164786 0.000000 2097 +fetch 0 5 5.347108 0.000000 2567 +million 0 5 5.347108 0.000000 2495 +lesson 0 5 5.347108 0.000000 2568 +ignor 0 5 5.347108 0.000000 2288 +favor 0 5 5.347108 0.000000 2414 +vote 1 4 5.568345 5.568345 2953 +ford 0 4 5.568345 0.000000 2636 +kill 0 4 5.568345 0.000000 3000 +countless 0 4 5.568345 0.000000 3020 +dlee 1 3 5.857933 5.857933 3949 +energi 0 3 5.857933 0.000000 3950 +specul 0 3 5.857933 0.000000 3951 +mappedcach 0 3 5.857933 0.000000 3928 +reorder 0 3 5.857933 0.000000 3952 +evil 0 3 5.857933 0.000000 3943 +act 0 3 5.857933 0.000000 3557 +researchwith 0 2 6.263398 0.000000 5594 +eustac 0 2 6.263398 0.000000 5866 +dirk 0 2 6.263398 0.000000 5665 +andt 0 2 6.263398 0.000000 5121 +resolutionon 0 2 6.263398 0.000000 5867 +nixon 0 2 6.263398 0.000000 5868 +court 0 2 6.263398 0.000000 4870 +silver 0 2 6.263398 0.000000 5374 +theblack 0 2 6.263398 0.000000 5869 +hesit 0 2 6.263398 0.000000 5774 +incid 0 2 6.263398 0.000000 5870 +goeth 0 2 6.263398 0.000000 5366 +calder 1 1 6.957497 6.957497 15207 +grunwald 1 1 6.957497 6.957497 15208 +huberthumphrei 1 1 6.957497 6.957497 15209 +likejean 0 1 6.957497 0.000000 15210 +enginefor 0 1 6.957497 0.000000 15211 +sensibl 0 1 6.957497 0.000000 15212 +conced 0 1 6.957497 0.000000 15213 +thatpolit 0 1 6.957497 0.000000 15214 +lesser 0 1 6.957497 0.000000 15215 +tweedledumand 0 1 6.957497 0.000000 15216 +tweedlede 0 1 6.957497 0.000000 15217 +abstain 0 1 6.957497 0.000000 15218 +theyar 0 1 6.957497 0.000000 15219 +torummag 0 1 6.957497 0.000000 15220 +allth 0 1 6.957497 0.000000 15221 +stew 0 1 6.957497 0.000000 15222 +humphrei 0 1 6.957497 0.000000 15223 +suprem 0 1 6.957497 0.000000 15224 +whentricia 0 1 6.957497 0.000000 15225 +flummeri 0 1 6.957497 0.000000 15226 +ineffect 0 1 6.957497 0.000000 15227 +splendid 0 1 6.957497 0.000000 15228 +unforeseen 0 1 6.957497 0.000000 15229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..aa3e24f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +class 0 199 1.609438 0.000000 37 +contact 1 153 1.791759 1.791759 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +databas 0 122 2.079442 0.000000 86 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +user 0 104 2.302585 0.000000 137 +homepag 1 93 2.397895 2.397895 148 +member 0 84 2.484907 0.000000 165 +novemb 0 81 2.484907 0.000000 179 +state 0 76 2.564949 0.000000 207 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +test 0 66 2.708050 0.000000 252 +thursdai 0 70 2.708050 0.000000 241 +much 0 52 2.995732 0.000000 349 +date 0 51 2.995732 0.000000 344 +anoth 0 45 3.135494 0.000000 408 +math 0 44 3.135494 0.000000 402 +execut 0 45 3.135494 0.000000 404 +edit 0 42 3.218876 0.000000 418 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +actual 1 28 3.610918 3.610918 604 +usual 0 28 3.610918 0.000000 608 +intend 0 28 3.610918 0.000000 599 +todai 1 25 3.737670 3.737670 672 +background 0 25 3.737670 0.000000 664 +brows 0 23 3.806662 0.000000 726 +ofwashington 0 22 3.850148 0.000000 766 +sure 0 20 3.951244 0.000000 813 +grad 0 20 3.951244 0.000000 837 +anywai 0 15 4.248495 0.000000 1047 +club 0 15 4.248495 0.000000 1058 +player 0 11 4.553877 0.000000 1371 +michigan 0 11 4.553877 0.000000 1368 +strongli 0 10 4.653960 0.000000 1406 +death 0 10 4.653960 0.000000 1457 +doug 1 9 4.753590 4.753590 1517 +mention 0 9 4.753590 0.000000 1569 +unusu 0 9 4.753590 0.000000 1566 +vice 0 9 4.753590 0.000000 1604 +charg 0 9 4.753590 0.000000 1582 +screen 0 9 4.753590 0.000000 1577 +sister 0 9 4.753590 0.000000 1524 +ground 0 7 5.010635 0.000000 1955 +hit 0 7 5.010635 0.000000 1965 +lucki 0 6 5.164786 0.000000 2163 +presid 0 6 5.164786 0.000000 2196 +emerg 0 6 5.164786 0.000000 2038 +highwai 0 6 5.164786 0.000000 2095 +wast 1 5 5.347108 5.347108 2537 +unnecessari 0 5 5.347108 0.000000 2506 +keeper 0 5 5.347108 0.000000 2569 +bryan 0 5 5.347108 0.000000 2421 +worst 0 5 5.347108 0.000000 2287 +exhaust 0 4 5.568345 0.000000 2825 +engineeringdepart 0 4 5.568345 0.000000 2917 +backup 0 4 5.568345 0.000000 2645 +gear 0 4 5.568345 0.000000 2891 +bold 0 3 5.857933 0.000000 3846 +treasur 0 3 5.857933 0.000000 3229 +sit 0 3 5.857933 0.000000 3953 +tast 0 3 5.857933 0.000000 3666 +labor 0 3 5.857933 0.000000 3195 +sarcasm 1 2 6.263398 6.263398 5871 +casual 0 2 6.263398 0.000000 4542 +buti 0 2 6.263398 0.000000 4775 +meth 0 2 6.263398 0.000000 5872 +useless 0 2 6.263398 0.000000 5564 +caveat 0 2 6.263398 0.000000 4975 +apprentic 0 2 6.263398 0.000000 5873 +cart 0 2 6.263398 0.000000 5874 +stolen 0 2 6.263398 0.000000 5803 +zongker 1 1 6.957497 6.957497 15230 +pagececi 0 1 6.957497 0.000000 15231 +noless 0 1 6.957497 0.000000 15232 +classeshow 0 1 6.957497 0.000000 15233 +toxic 0 1 6.957497 0.000000 15234 +custard 0 1 6.957497 0.000000 15235 +filesth 0 1 6.957497 0.000000 15236 +mento 0 1 6.957497 0.000000 15237 +galleryvisit 0 1 6.957497 0.000000 15238 +supercolliderth 0 1 6.957497 0.000000 15239 +cron 0 1 6.957497 0.000000 15240 +avirtu 0 1 6.957497 0.000000 15241 +trove 0 1 6.957497 0.000000 15242 +whichmai 0 1 6.957497 0.000000 15243 +imajor 0 1 6.957497 0.000000 15244 +andminor 0 1 6.957497 0.000000 15245 +dubiou 0 1 6.957497 0.000000 15246 +honorsjunior 0 1 6.957497 0.000000 15247 +brotherhood 0 1 6.957497 0.000000 15248 +crouton 0 1 6.957497 0.000000 15249 +pizzicato 0 1 6.957497 0.000000 15250 +clicker 0 1 6.957497 0.000000 15251 +cruis 0 1 6.957497 0.000000 15252 +inhigh 0 1 6.957497 0.000000 15253 +buttstar 0 1 6.957497 0.000000 15254 +wheremi 0 1 6.957497 0.000000 15255 +dougz 0 1 6.957497 0.000000 15256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..fe4e5a5d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +oper 2 180 1.609438 3.218876 34 +modifi 0 178 1.609438 0.000000 35 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +perform 1 143 1.945910 1.945910 74 +schedul 1 119 2.079442 2.079442 85 +seattl 0 120 2.079442 0.000000 103 +databas 0 122 2.079442 0.000000 86 +person 0 111 2.197225 0.000000 117 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +activ 1 84 2.484907 2.484907 182 +help 0 83 2.484907 0.000000 175 +level 0 87 2.484907 0.000000 180 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +come 0 78 2.564949 0.000000 202 +addit 0 74 2.639057 0.000000 228 +java 1 70 2.708050 2.708050 248 +differ 0 66 2.708050 0.000000 253 +interact 1 62 2.772589 2.772589 270 +polici 0 64 2.772589 0.000000 279 +experi 0 64 2.772589 0.000000 283 +virtual 0 62 2.772589 0.000000 285 +improv 0 62 2.772589 0.000000 289 +thesi 0 57 2.890372 0.000000 327 +allow 0 53 2.944439 0.000000 333 +investig 0 51 2.995732 0.000000 353 +done 0 47 3.091042 0.000000 381 +quarter 0 47 3.091042 0.000000 389 +mechan 0 43 3.178054 0.000000 416 +seminar 0 38 3.295837 0.000000 470 +slide 0 38 3.295837 0.000000 467 +winter 0 36 3.367296 0.000000 500 +jame 0 35 3.401197 0.000000 507 +idea 0 32 3.465736 0.000000 545 +built 0 29 3.583519 0.000000 592 +demonstr 0 24 3.761200 0.000000 694 +thread 0 23 3.806662 0.000000 722 +lead 0 23 3.806662 0.000000 718 +kernel 0 20 3.951244 0.000000 825 +concentr 0 18 4.060443 0.000000 906 +spin 0 14 4.317488 0.000000 1121 +carri 0 13 4.382027 0.000000 1152 +signific 0 13 4.382027 0.000000 1125 +folk 0 9 4.753590 0.000000 1597 +voelker 0 9 4.753590 0.000000 1557 +dylan 1 8 4.875197 4.875197 1625 +slightli 0 7 5.010635 0.000000 1795 +dedic 0 7 5.010635 0.000000 1843 +gave 0 7 5.010635 0.000000 1922 +geoff 0 6 5.164786 0.000000 2124 +caus 0 5 5.347108 0.000000 2298 +commod 0 5 5.347108 0.000000 2415 +poorli 0 4 5.568345 0.000000 2781 +opal 0 4 5.568345 0.000000 3057 +oodb 1 3 5.857933 5.857933 3954 +mcname 1 2 6.263398 6.263398 5875 +properli 0 2 6.263398 0.000000 5454 +architecturethat 0 2 6.263398 0.000000 5876 +applicationsand 0 1 6.957497 0.000000 15257 +mechanismthat 0 1 6.957497 0.000000 15258 +replacementpolici 0 1 6.957497 0.000000 15259 +machoper 0 1 6.957497 0.000000 15260 +thathelp 0 1 6.957497 0.000000 15261 +kernelthread 0 1 6.957497 0.000000 15262 +tailor 0 1 6.957497 0.000000 15263 +betweenobject 0 1 6.957497 0.000000 15264 +achiv 0 1 6.957497 0.000000 15265 +improvementscan 0 1 6.957497 0.000000 15266 +betterserv 0 1 6.957497 0.000000 15267 +paperscv 0 1 6.957497 0.000000 15268 +lectureintroduc 0 1 6.957497 0.000000 15269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..9dce6c7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +hour 0 165 1.791759 0.000000 46 +seattl 0 120 2.079442 0.000000 103 +make 0 111 2.197225 0.000000 120 +mondai 0 77 2.564949 0.000000 206 +sieg 0 69 2.708050 0.000000 260 +dept 0 64 2.772589 0.000000 291 +wednesdai 0 64 2.772589 0.000000 261 +septemb 0 65 2.772589 0.000000 274 +could 0 46 3.091042 0.000000 383 +tent 0 22 3.850148 0.000000 739 +thur 0 19 4.007333 0.000000 847 +hello 0 10 4.653960 0.000000 1407 +lewi 1 8 4.875197 4.875197 1700 +christoph 1 5 5.347108 5.347108 2512 +glad 0 4 5.568345 0.000000 2657 +echri 0 1 6.957497 0.000000 15270 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..c80defce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +washington 0 236 1.386294 0.000000 32 +blank 0 3 5.857933 0.000000 3379 +ecrock 0 1 6.957497 0.000000 15271 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..d94d504c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +mail 1 238 1.386294 1.386294 22 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +includ 1 208 1.609438 1.609438 42 +list 0 201 1.609438 0.000000 39 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +compil 1 122 2.079442 2.079442 96 +seattl 0 120 2.079442 0.000000 103 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +peopl 1 96 2.302585 2.302585 132 +text 0 98 2.302585 0.000000 133 +access 0 102 2.302585 0.000000 136 +book 0 99 2.302585 0.000000 131 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +help 0 83 2.484907 0.000000 175 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +resum 0 79 2.564949 0.000000 217 +dynam 0 76 2.564949 0.000000 194 +come 0 78 2.564949 0.000000 202 +line 0 75 2.639057 0.000000 231 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +august 0 66 2.708050 0.000000 257 +plan 0 65 2.772589 0.000000 272 +creat 0 63 2.772589 0.000000 277 +visit 0 63 2.772589 0.000000 288 +import 0 65 2.772589 0.000000 282 +type 0 61 2.833213 0.000000 296 +room 0 59 2.833213 0.000000 301 +variou 0 56 2.890372 0.000000 317 +think 0 57 2.890372 0.000000 314 +found 0 53 2.944439 0.000000 337 +give 0 50 3.044522 0.000000 359 +friend 0 48 3.044522 0.000000 376 +anoth 0 45 3.135494 0.000000 408 +keep 0 44 3.135494 0.000000 409 +better 0 45 3.135494 0.000000 401 +howev 0 41 3.218876 0.000000 422 +littl 0 39 3.258097 0.000000 454 +form 0 39 3.258097 0.000000 443 +theoret 0 39 3.258097 0.000000 446 +industri 0 38 3.295837 0.000000 464 +hand 0 37 3.332205 0.000000 475 +power 0 30 3.555348 0.000000 573 +domain 0 30 3.555348 0.000000 564 +sometim 0 24 3.761200 0.000000 696 +alwai 0 24 3.761200 0.000000 691 +busi 0 21 3.912023 0.000000 784 +fact 0 21 3.912023 0.000000 780 +stand 0 18 4.060443 0.000000 891 +macintosh 0 17 4.110874 0.000000 920 +qual 0 15 4.248495 0.000000 1062 +countri 0 15 4.248495 0.000000 1059 +hong 0 14 4.317488 0.000000 1105 +karlin 0 13 4.382027 0.000000 1176 +anna 0 12 4.465908 0.000000 1292 +appl 0 11 4.553877 0.000000 1303 +guess 0 10 4.653960 0.000000 1443 +traffic 0 10 4.653960 0.000000 1421 +plain 0 9 4.753590 0.000000 1495 +sister 0 9 4.753590 0.000000 1524 +chamber 0 8 4.875197 0.000000 1692 +craig 0 7 5.010635 0.000000 1879 +happen 0 7 5.010635 0.000000 1790 +histor 0 6 5.164786 0.000000 2085 +mac 0 5 5.347108 0.000000 2292 +advic 0 5 5.347108 0.000000 2509 +insight 0 4 5.568345 0.000000 3024 +eddi 0 3 5.857933 0.000000 3896 +studentat 0 2 6.263398 0.000000 5877 +seig 0 2 6.263398 0.000000 4462 +commentari 0 2 6.263398 0.000000 4287 +bias 0 2 6.263398 0.000000 5033 +tosai 0 1 6.957497 0.000000 15272 +hadto 0 1 6.957497 0.000000 15273 +postcriptand 0 1 6.957497 0.000000 15274 +workingon 0 1 6.957497 0.000000 15275 +fordynam 0 1 6.957497 0.000000 15276 +activit 0 1 6.957497 0.000000 15277 +vine 0 1 6.957497 0.000000 15278 +branchesmi 0 1 6.957497 0.000000 15279 +knowof 0 1 6.957497 0.000000 15280 +daveneti 0 1 6.957497 0.000000 15281 +towardslik 0 1 6.957497 0.000000 15282 +eveneasi 0 1 6.957497 0.000000 15283 +freewai 0 1 6.957497 0.000000 15284 +worldher 0 1 6.957497 0.000000 15285 +edhong 0 1 6.957497 0.000000 15286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..b21761e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +architectur 1 139 1.945910 1.945910 77 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +seattl 0 120 2.079442 0.000000 103 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +call 0 91 2.397895 0.000000 153 +homepag 0 93 2.397895 0.000000 148 +dynam 1 76 2.564949 2.564949 194 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +sieg 0 69 2.708050 0.000000 260 +degre 0 69 2.708050 0.000000 259 +new 0 64 2.772589 0.000000 262 +share 1 59 2.833213 2.833213 304 +back 0 60 2.833213 0.000000 297 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +http 0 41 3.218876 0.000000 420 +cach 0 41 3.218876 0.000000 432 +might 0 41 3.218876 0.000000 426 +committe 0 34 3.401197 0.000000 522 +computersci 0 30 3.555348 0.000000 562 +miscellan 0 23 3.806662 0.000000 731 +begin 0 23 3.806662 0.000000 716 +emphasi 0 22 3.850148 0.000000 755 +reduc 0 22 3.850148 0.000000 759 +voic 0 21 3.912023 0.000000 806 +asplo 1 17 4.110874 4.110874 948 +susan 1 15 4.248495 4.248495 1050 +qual 0 15 4.248495 0.000000 1062 +coher 0 14 4.317488 0.000000 1109 +workload 0 12 4.465908 0.000000 1210 +multithread 0 11 4.553877 0.000000 1315 +egger 1 8 4.875197 4.875197 1695 +prefetch 0 6 5.164786 0.000000 2039 +fals 0 4 5.568345 0.000000 2861 +amast 0 3 5.857933 0.000000 3955 +onexperiment 0 1 6.957497 0.000000 15287 +incompil 0 1 6.957497 0.000000 15288 +optimizationsand 0 1 6.957497 0.000000 15289 +multithreadedarchitectur 0 1 6.957497 0.000000 15290 +spinprevi 0 1 6.957497 0.000000 15291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..7b8a4977 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +object 0 138 1.945910 0.000000 79 +first 0 140 1.945910 0.000000 71 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +seattl 0 120 2.079442 0.000000 103 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +code 1 108 2.197225 2.197225 116 +specif 1 106 2.197225 2.197225 106 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +call 1 91 2.397895 2.397895 153 +center 0 88 2.397895 0.000000 158 +present 0 91 2.397895 0.000000 145 +follow 0 92 2.397895 0.000000 143 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +activ 0 84 2.484907 0.000000 182 +educ 0 86 2.484907 0.000000 191 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +want 0 79 2.564949 0.000000 199 +server 0 76 2.564949 0.000000 204 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +write 0 72 2.639057 0.000000 222 +simul 1 66 2.708050 2.708050 255 +receiv 0 66 2.708050 0.000000 244 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +prof 0 64 2.772589 0.000000 273 +experi 0 64 2.772589 0.000000 283 +share 1 59 2.833213 2.833213 304 +march 0 61 2.833213 0.000000 295 +summer 1 56 2.890372 2.890372 311 +extens 2 53 2.944439 5.888878 340 +instruct 0 53 2.944439 0.000000 332 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +third 0 43 3.178054 0.000000 412 +http 0 41 3.218876 0.000000 420 +review 0 42 3.218876 0.000000 425 +brian 0 38 3.295837 0.000000 466 +prototyp 0 38 3.295837 0.000000 463 +slide 0 38 3.295837 0.000000 467 +mean 0 37 3.332205 0.000000 477 +fault 0 32 3.465736 0.000000 547 +independ 0 32 3.465736 0.000000 548 +posit 0 31 3.496508 0.000000 552 +domain 1 30 3.555348 3.555348 564 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +load 0 28 3.610918 0.000000 601 +measur 0 28 3.610918 0.000000 609 +symbol 0 27 3.637586 0.000000 620 +compar 0 26 3.688879 0.000000 648 +aspect 1 25 3.737670 3.737670 663 +toward 0 25 3.737670 0.000000 668 +spent 0 25 3.737670 0.000000 676 +thread 1 23 3.806662 3.806662 722 +displai 0 23 3.806662 0.000000 712 +miscellan 0 23 3.806662 0.000000 731 +reduc 0 22 3.850148 0.000000 759 +path 0 21 3.912023 0.000000 778 +kernel 1 20 3.951244 3.951244 825 +wrote 1 20 3.951244 3.951244 830 +fine 1 20 3.951244 3.951244 822 +safeti 0 20 3.951244 0.000000 817 +benchmark 0 19 4.007333 0.000000 859 +bershad 0 18 4.060443 0.000000 902 +protect 1 17 4.110874 4.110874 935 +coupl 0 17 4.110874 0.000000 939 +latenc 0 16 4.174387 0.000000 993 +princeton 1 15 4.248495 4.248495 1042 +novel 1 15 4.248495 4.248495 1039 +overhead 0 15 4.248495 0.000000 1035 +enough 0 15 4.248495 0.000000 1040 +spin 2 14 4.317488 8.634976 1121 +achiev 0 14 4.317488 0.000000 1088 +senior 0 14 4.317488 0.000000 1120 +opportun 0 13 4.382027 0.000000 1161 +safe 1 12 4.465908 4.465908 1274 +robust 0 12 4.465908 0.000000 1271 +arbitrari 0 11 4.553877 0.000000 1359 +grain 1 10 4.653960 4.653960 1448 +sosp 0 10 4.653960 0.000000 1416 +ski 0 10 4.653960 0.000000 1471 +jersei 1 9 4.753590 4.753590 1587 +modula 1 9 4.753590 4.753590 1613 +grew 0 8 4.875197 0.000000 1742 +mach 0 8 4.875197 0.000000 1669 +isol 0 8 4.875197 0.000000 1663 +spec 0 8 4.875197 0.000000 1640 +sigop 0 8 4.875197 0.000000 1727 +european 0 8 4.875197 0.000000 1763 +dylan 0 8 4.875197 0.000000 1625 +interrupt 0 7 5.010635 0.000000 1793 +bell 0 6 5.164786 0.000000 2224 +subsystem 0 6 5.164786 0.000000 2015 +simultan 0 6 5.164786 0.000000 2155 +spinproject 0 5 5.347108 0.000000 2570 +compat 0 5 5.347108 0.000000 2485 +sail 0 5 5.347108 0.000000 2571 +outdoor 0 5 5.347108 0.000000 2514 +microkernel 1 4 5.568345 5.568345 3047 +turkei 0 4 5.568345 0.000000 2914 +hide 0 4 5.568345 0.000000 2996 +mip 0 4 5.568345 0.000000 2738 +wcsss 1 3 5.857933 5.857933 3956 +thesumm 0 3 5.857933 0.000000 3763 +namespac 0 3 5.857933 0.000000 3957 +hoto 0 3 5.857933 0.000000 3577 +arizona 0 3 5.857933 0.000000 3700 +dive 0 3 5.857933 0.000000 3654 +emin 1 2 6.263398 6.263398 5810 +sirer 0 2 6.263398 0.000000 5811 +backgroundi 0 2 6.263398 0.000000 5878 +andsom 0 2 6.263398 0.000000 5483 +schedulingpolici 0 2 6.263398 0.000000 5879 +strand 0 2 6.263398 0.000000 5880 +ofnew 0 2 6.263398 0.000000 5881 +mipsi 0 2 6.263398 0.000000 5882 +tucson 0 2 6.263398 0.000000 5883 +cloth 0 2 6.263398 0.000000 5884 +sirereg 0 1 6.957497 0.000000 15292 +istanbul 0 1 6.957497 0.000000 15293 +labswork 0 1 6.957497 0.000000 15294 +theplan 0 1 6.957497 0.000000 15295 +thevesta 0 1 6.957497 0.000000 15296 +projectsmi 0 1 6.957497 0.000000 15297 +andprotect 0 1 6.957497 0.000000 15298 +specificaspect 0 1 6.957497 0.000000 15299 +alarm 0 1 6.957497 0.000000 15300 +ofextend 0 1 6.957497 0.000000 15301 +allowsu 0 1 6.957497 0.000000 15302 +isdesign 0 1 6.957497 0.000000 15303 +allowsisol 0 1 6.957497 0.000000 15304 +withconflict 0 1 6.957497 0.000000 15305 +beassur 0 1 6.957497 0.000000 15306 +clincher 0 1 6.957497 0.000000 15307 +extensionsthat 0 1 6.957497 0.000000 15308 +protectionenforc 0 1 6.957497 0.000000 15309 +performanceweb 0 1 6.957497 0.000000 15310 +networkingstack 0 1 6.957497 0.000000 15311 +andminim 0 1 6.957497 0.000000 15312 +calledmipsi 0 1 6.957497 0.000000 15313 +researchplatform 0 1 6.957497 0.000000 15314 +featuresand 0 1 6.957497 0.000000 15315 +talkslanguag 0 1 6.957497 0.000000 15316 +interestswhenev 0 1 6.957497 0.000000 15317 +windsurf 0 1 6.957497 0.000000 15318 +bikingmak 0 1 6.957497 0.000000 15319 +andhik 0 1 6.957497 0.000000 15320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..b6c1670d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +recent 1 167 1.791759 1.791759 58 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +like 1 132 1.945910 1.945910 81 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +seattl 1 120 2.079442 2.079442 103 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +final 0 116 2.197225 0.000000 108 +take 0 97 2.302585 0.000000 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +graphic 0 90 2.397895 0.000000 147 +section 0 94 2.397895 0.000000 149 +solut 1 82 2.484907 2.484907 162 +method 1 80 2.564949 2.564949 213 +june 0 79 2.564949 0.000000 214 +master 0 76 2.564949 0.000000 216 +state 0 76 2.564949 0.000000 207 +solv 1 73 2.639057 2.639057 234 +write 0 72 2.639057 0.000000 222 +involv 0 71 2.639057 0.000000 227 +materi 0 75 2.639057 0.000000 221 +sieg 0 69 2.708050 0.000000 260 +simul 0 66 2.708050 0.000000 255 +prof 1 64 2.772589 2.772589 273 +street 0 63 2.772589 0.000000 293 +automat 0 61 2.833213 0.000000 306 +thesi 0 57 2.890372 0.000000 327 +particular 0 51 2.995732 0.000000 352 +still 0 50 3.044522 0.000000 362 +numer 0 49 3.044522 0.000000 369 +could 0 46 3.091042 0.000000 383 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +probabl 0 40 3.258097 0.000000 455 +realli 0 40 3.258097 0.000000 444 +small 0 39 3.258097 0.000000 447 +mean 0 37 3.332205 0.000000 477 +idea 0 32 3.465736 0.000000 545 +richard 0 31 3.496508 0.000000 559 +power 0 30 3.555348 0.000000 573 +weather 0 28 3.610918 0.000000 618 +mind 0 27 3.637586 0.000000 632 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +try 0 22 3.850148 0.000000 764 +finish 0 22 3.850148 0.000000 748 +fact 0 21 3.912023 0.000000 780 +longer 1 20 3.951244 3.951244 816 +applet 0 20 3.951244 0.000000 827 +eric 1 19 4.007333 4.007333 870 +els 1 19 4.007333 4.007333 843 +anderson 0 19 4.007333 0.000000 860 +mostli 0 19 4.007333 0.000000 869 +prove 0 19 4.007333 0.000000 848 +matrix 0 17 4.110874 0.000000 933 +sept 0 17 4.110874 0.000000 952 +fourth 0 16 4.174387 0.000000 999 +spatial 0 16 4.174387 0.000000 988 +spars 0 16 4.174387 0.000000 989 +qual 0 15 4.248495 0.000000 1062 +nonlinear 0 14 4.317488 0.000000 1107 +step 0 13 4.382027 0.000000 1138 +care 0 13 4.382027 0.000000 1177 +resolut 0 13 4.382027 0.000000 1172 +necessari 0 13 4.382027 0.000000 1147 +bodi 0 13 4.382027 0.000000 1178 +karlin 0 13 4.382027 0.000000 1176 +brother 0 13 4.382027 0.000000 1189 +speech 0 12 4.465908 0.000000 1222 +anna 0 12 4.465908 0.000000 1292 +black 1 10 4.653960 4.653960 1418 +henri 0 10 4.653960 0.000000 1417 +explicit 0 9 4.753590 0.000000 1525 +signal 1 7 5.010635 5.010635 1910 +newton 0 7 5.010635 0.000000 1824 +commit 0 6 5.164786 0.000000 2233 +duke 0 6 5.164786 0.000000 2231 +restrict 0 6 5.164786 0.000000 2129 +transcript 0 6 5.164786 0.000000 2067 +everybodi 1 5 5.347108 5.347108 2517 +stabil 0 5 5.347108 0.000000 2286 +aim 0 5 5.347108 0.000000 2477 +isth 0 5 5.347108 0.000000 2532 +snapshot 0 5 5.347108 0.000000 2303 +implicit 1 4 5.568345 5.568345 2830 +backward 0 4 5.568345 0.000000 2638 +freedom 0 3 5.857933 0.000000 3890 +euler 0 3 5.857933 0.000000 3174 +interplai 0 3 5.857933 0.000000 3726 +astrophys 0 3 5.857933 0.000000 3936 +aclu 0 2 6.263398 0.000000 5227 +reno 0 2 6.263398 0.000000 5228 +mere 0 2 6.263398 0.000000 5340 +panic 0 2 6.263398 0.000000 5682 +criterion 0 2 6.263398 0.000000 5885 +acoust 0 2 6.263398 0.000000 5355 +musician 0 2 6.263398 0.000000 5718 +preparedfor 0 2 6.263398 0.000000 5886 +meander 0 2 6.263398 0.000000 5887 +andersonwher 0 1 6.957497 0.000000 15321 +decisionin 0 1 6.957497 0.000000 15322 +thedecis 0 1 6.957497 0.000000 15323 +interim 0 1 6.957497 0.000000 15324 +feloni 0 1 6.957497 0.000000 15325 +themarketplac 0 1 6.957497 0.000000 15326 +imostli 0 1 6.957497 0.000000 15327 +greensideof 0 1 6.957497 0.000000 15328 +onsteadi 0 1 6.957497 0.000000 15329 +biharmon 0 1 6.957497 0.000000 15330 +timesteppingmethod 0 1 6.957497 0.000000 15331 +analysisissu 0 1 6.957497 0.000000 15332 +nonlinearequ 0 1 6.957497 0.000000 15333 +newtonstep 0 1 6.957497 0.000000 15334 +spiffi 0 1 6.957497 0.000000 15335 +structuresbi 0 1 6.957497 0.000000 15336 +andersoni 0 1 6.957497 0.000000 15337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..f640ed32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +hall 0 146 1.945910 0.000000 65 +first 0 140 1.945910 0.000000 71 +machin 1 129 2.079442 2.079442 95 +databas 1 122 2.079442 2.079442 86 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +high 0 130 2.079442 0.000000 101 +postscript 0 131 2.079442 0.000000 90 +world 1 115 2.197225 2.197225 126 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +search 1 95 2.397895 2.397895 155 +select 0 91 2.397895 0.000000 154 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +learn 2 86 2.484907 4.969814 170 +internet 1 83 2.484907 2.484907 186 +contain 1 81 2.484907 2.484907 174 +second 0 81 2.484907 0.000000 166 +wide 0 84 2.484907 0.000000 185 +ieee 0 86 2.484907 0.000000 190 +resourc 0 81 2.484907 0.000000 172 +master 1 76 2.564949 2.564949 216 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +servic 1 72 2.639057 2.639057 236 +effici 1 73 2.639057 2.639057 233 +intellig 1 72 2.639057 2.639057 225 +html 0 75 2.639057 0.000000 235 +addit 0 74 2.639057 0.000000 228 +knowledg 1 67 2.708050 2.708050 243 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +plan 1 65 2.772589 2.772589 272 +juli 1 60 2.833213 2.833213 305 +room 0 59 2.833213 0.000000 301 +thesi 1 57 2.890372 2.890372 327 +reason 0 57 2.890372 0.000000 318 +unix 0 58 2.890372 0.000000 308 +undergradu 0 54 2.944439 0.000000 338 +without 0 50 3.044522 0.000000 370 +understand 1 47 3.091042 3.091042 384 +move 0 47 3.091042 0.000000 382 +error 0 40 3.258097 0.000000 449 +close 0 38 3.295837 0.000000 465 +field 0 37 3.332205 0.000000 482 +multi 1 36 3.367296 3.367296 493 +robot 1 36 3.367296 3.367296 497 +statist 0 35 3.401197 0.000000 521 +human 0 32 3.465736 0.000000 546 +photo 0 31 3.496508 0.000000 561 +robert 0 30 3.555348 0.000000 567 +neural 0 30 3.555348 0.000000 578 +determin 0 27 3.637586 0.000000 630 +enabl 1 26 3.688879 3.688879 655 +relev 1 26 3.688879 3.688879 637 +request 0 26 3.688879 0.000000 635 +magazin 0 24 3.761200 0.000000 704 +greg 0 24 3.761200 0.000000 695 +ofwashington 0 22 3.850148 0.000000 766 +william 0 22 3.850148 0.000000 765 +voic 0 21 3.912023 0.000000 806 +fact 0 21 3.912023 0.000000 780 +expert 0 20 3.951244 0.000000 833 +comparison 1 19 4.007333 4.007333 863 +agent 1 18 4.060443 4.060443 910 +repositori 1 17 4.110874 4.110874 932 +white 0 17 4.110874 0.000000 951 +analyz 0 17 4.110874 0.000000 925 +brief 0 16 4.174387 0.000000 1001 +choic 0 16 4.174387 0.000000 979 +indic 0 15 4.248495 0.000000 1013 +jonathan 0 13 4.382027 0.000000 1174 +food 0 12 4.465908 0.000000 1285 +bruce 0 12 4.465908 0.000000 1226 +count 0 12 4.465908 0.000000 1239 +stephen 0 11 4.553877 0.000000 1342 +induct 0 11 4.553877 0.000000 1304 +metacrawl 1 10 4.653960 4.653960 1455 +packard 1 10 4.653960 4.653960 1444 +shop 0 10 4.653960 0.000000 1469 +sound 0 9 4.753590 0.000000 1605 +incomplet 0 9 4.753590 0.000000 1575 +aaai 1 8 4.875197 4.875197 1750 +hewlett 1 8 4.875197 4.875197 1709 +irvin 1 8 4.875197 4.875197 1660 +accomplish 0 8 4.875197 0.000000 1755 +satisfi 0 8 4.875197 0.000000 1694 +gather 0 8 4.875197 0.000000 1719 +autonom 0 8 4.875197 0.000000 1749 +erik 0 8 4.875197 0.000000 1701 +softbot 2 7 5.010635 10.021270 1974 +discoveri 1 7 5.010635 5.010635 1915 +sparc 0 7 5.010635 0.000000 1860 +foc 0 7 5.010635 0.000000 1880 +planner 0 7 5.010635 0.000000 1797 +golden 0 7 5.010635 0.000000 1962 +hunt 0 7 5.010635 0.000000 1798 +illinoi 0 7 5.010635 0.000000 1941 +usenet 0 7 5.010635 0.000000 1839 +etzioni 1 6 5.164786 5.164786 2135 +oren 1 6 5.164786 5.164786 2134 +advis 1 6 5.164786 5.164786 2173 +fiction 0 6 5.164786 0.000000 2217 +forecast 0 6 5.164786 0.000000 2171 +brook 0 6 5.164786 0.000000 2152 +german 0 6 5.164786 0.000000 2190 +accuraci 0 5 5.347108 0.000000 2450 +cacm 0 5 5.347108 0.000000 2388 +keith 0 5 5.347108 0.000000 2528 +selberg 0 5 5.347108 0.000000 2441 +amherst 0 5 5.347108 0.000000 2484 +disambigu 0 4 5.568345 0.000000 2899 +innov 0 4 5.568345 0.000000 2933 +chain 0 4 5.568345 0.000000 2712 +repli 0 4 5.568345 0.000000 2689 +ijcai 0 4 5.568345 0.000000 2901 +sophist 0 3 5.857933 0.000000 3545 +ahoi 0 3 5.857933 0.000000 3532 +deploi 0 3 5.857933 0.000000 3750 +neal 0 3 5.857933 0.000000 3184 +lockhe 0 3 5.857933 0.000000 3863 +faq 0 3 5.857933 0.000000 3216 +pageoren 0 2 6.263398 0.000000 5888 +pagedepart 0 2 6.263398 0.000000 5052 +anddynam 0 2 6.263398 0.000000 5889 +finalist 0 2 6.263398 0.000000 5890 +discoveraward 0 2 6.263398 0.000000 5891 +brute 0 2 6.263398 0.000000 5892 +hypothes 0 2 6.263398 0.000000 5607 +ascal 0 2 6.263398 0.000000 5893 +toappear 0 2 6.263398 0.000000 4343 +bernard 0 2 6.263398 0.000000 5894 +lesh 0 2 6.263398 0.000000 5895 +goan 0 2 6.263398 0.000000 5896 +zamir 0 2 6.263398 0.000000 5897 +shake 0 2 6.263398 0.000000 5898 +umass 0 2 6.263398 0.000000 5899 +bioand 0 1 6.957497 0.000000 15338 +heor 0 1 6.957497 0.000000 15339 +searchmultipl 0 1 6.957497 0.000000 15340 +pruningopt 0 1 6.957497 0.000000 15341 +netrecommend 0 1 6.957497 0.000000 15342 +locatesindividu 0 1 6.957497 0.000000 15343 +bruteforc 0 1 6.957497 0.000000 15344 +whenrun 0 1 6.957497 0.000000 15345 +theweb 0 1 6.957497 0.000000 15346 +richardseg 0 1 6.957497 0.000000 15347 +fileretriev 0 1 6.957497 0.000000 15348 +universalquantif 0 1 6.957497 0.000000 15349 +terranc 0 1 6.957497 0.000000 15350 +mikeperkowitz 0 1 6.957497 0.000000 15351 +soderland 0 1 6.957497 0.000000 15352 +roomi 0 1 6.957497 0.000000 15353 +lesourd 0 1 6.957497 0.000000 15354 +spiger 0 1 6.957497 0.000000 15355 +alford 0 1 6.957497 0.000000 15356 +fitchenholtz 0 1 6.957497 0.000000 15357 +guido 0 1 6.957497 0.000000 15358 +dymitr 0 1 6.957497 0.000000 15359 +mozdyniewicz 0 1 6.957497 0.000000 15360 +quark 0 1 6.957497 0.000000 15361 +minecontain 0 1 6.957497 0.000000 15362 +neuroprosearch 0 1 6.957497 0.000000 15363 +statlib 0 1 6.957497 0.000000 15364 +learningtoolbox 0 1 6.957497 0.000000 15365 +bonn 0 1 6.957497 0.000000 15366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..9a94fc37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +manag 0 114 2.197225 0.000000 125 +memori 0 101 2.302585 0.000000 139 +juli 0 60 2.833213 0.000000 305 +thesi 0 57 2.890372 0.000000 327 +faculti 0 56 2.890372 0.000000 325 +done 0 47 3.091042 0.000000 381 +join 0 39 3.258097 0.000000 457 +soon 0 36 3.367296 0.000000 494 +global 0 34 3.401197 0.000000 520 +concern 0 25 3.737670 0.000000 666 +mike 1 24 3.761200 3.761200 703 +finish 0 22 3.850148 0.000000 748 +feelei 1 7 5.010635 5.010635 1859 +british 0 5 5.347108 0.000000 2546 +columbia 0 2 6.263398 0.000000 5900 +papersmi 0 2 6.263398 0.000000 5462 +pagemik 0 1 6.957497 0.000000 15367 +workstationclust 0 1 6.957497 0.000000 15368 +opalproject 0 1 6.957497 0.000000 15369 +injanuari 0 1 6.957497 0.000000 15370 +summarycvsoutheast 0 1 6.957497 0.000000 15371 +idaholast 0 1 6.957497 0.000000 15372 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..3084b891 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +seattl 0 120 2.079442 0.000000 103 +pictur 0 89 2.397895 0.000000 160 +sieg 0 69 2.708050 0.000000 260 +room 0 59 2.833213 0.000000 301 +return 0 34 3.401197 0.000000 502 +voic 0 21 3.912023 0.000000 806 +chri 1 11 4.553877 4.553877 1311 +fisher 1 2 6.263398 6.263398 4794 +fisherdepart 0 1 6.957497 0.000000 15373 +engineeringbox 0 1 6.957497 0.000000 15374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..7853c1e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +cours 0 273 1.098612 0.000000 15 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +read 0 154 1.791759 0.000000 47 +algorithm 0 162 1.791759 0.000000 57 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +place 1 106 2.197225 2.197225 124 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +site 0 106 2.197225 0.000000 119 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +exam 0 86 2.484907 0.000000 169 +librari 0 87 2.484907 0.000000 181 +thing 0 84 2.484907 0.000000 189 +server 0 76 2.564949 0.000000 204 +html 0 75 2.639057 0.000000 235 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +found 0 53 2.944439 0.000000 337 +quarter 0 47 3.091042 0.000000 389 +around 0 43 3.178054 0.000000 415 +might 1 41 3.218876 3.218876 426 +music 0 42 3.218876 0.000000 436 +paul 0 38 3.295837 0.000000 471 +autumn 0 31 3.496508 0.000000 558 +mind 0 27 3.637586 0.000000 632 +rather 0 26 3.688879 0.000000 642 +geometri 0 22 3.850148 0.000000 752 +fund 0 21 3.912023 0.000000 805 +nice 0 20 3.951244 0.000000 809 +listen 0 18 4.060443 0.000000 907 +engineeringunivers 0 17 4.110874 0.000000 959 +otherwis 0 17 4.110874 0.000000 922 +chateau 0 16 4.174387 0.000000 997 +drive 0 15 4.248495 0.000000 1052 +galleri 0 13 4.382027 0.000000 1192 +captur 0 12 4.465908 0.000000 1232 +denni 0 11 4.553877 0.000000 1321 +moment 0 11 4.553877 0.000000 1379 +vista 0 10 4.653960 0.000000 1452 +seven 0 9 4.753590 0.000000 1561 +somewher 0 6 5.164786 0.000000 2176 +wolman 0 6 5.164786 0.000000 2093 +alec 0 5 5.347108 0.000000 2563 +lost 0 5 5.347108 0.000000 2358 +ta 0 4 5.568345 0.000000 3058 +soul 0 4 5.568345 0.000000 2907 +luci 0 3 5.857933 0.000000 3705 +schedulethi 0 2 6.263398 0.000000 4068 +meander 0 2 6.263398 0.000000 5887 +booth 0 2 6.263398 0.000000 5119 +scheduleto 0 1 6.957497 0.000000 15375 +probablyb 0 1 6.957497 0.000000 15376 +activitiesmi 0 1 6.957497 0.000000 15377 +areasof 0 1 6.957497 0.000000 15378 +outta 0 1 6.957497 0.000000 15379 +pea 0 1 6.957497 0.000000 15380 +mofo 0 1 6.957497 0.000000 15381 +peach 0 1 6.957497 0.000000 15382 +ruel 0 1 6.957497 0.000000 15383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..429187ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +cornel 0 215 1.386294 0.000000 23 +languag 0 227 1.386294 0.000000 26 +public 0 202 1.609438 0.000000 43 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +model 1 145 1.945910 1.945910 69 +hall 0 146 1.945910 0.000000 65 +relat 0 139 1.945910 0.000000 68 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +welcom 0 122 2.079442 0.000000 99 +machin 0 129 2.079442 0.000000 95 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +user 0 104 2.302585 0.000000 137 +real 0 93 2.397895 0.000000 144 +activ 1 84 2.484907 2.484907 182 +educ 0 86 2.484907 0.000000 191 +interfac 0 79 2.564949 0.000000 209 +onlin 0 75 2.639057 0.000000 223 +summari 0 73 2.639057 0.000000 237 +symposium 0 72 2.639057 0.000000 238 +line 0 75 2.639057 0.000000 231 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +august 0 66 2.708050 0.000000 257 +interact 0 62 2.772589 0.000000 270 +polici 0 64 2.772589 0.000000 279 +previou 0 62 2.772589 0.000000 290 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +profession 0 51 2.995732 0.000000 345 +basic 0 50 3.044522 0.000000 360 +formal 1 37 3.332205 3.332205 478 +concurr 0 34 3.401197 0.000000 501 +human 1 32 3.465736 3.465736 546 +collabor 0 32 3.465736 0.000000 543 +chair 1 29 3.583519 3.583519 596 +demonstr 0 24 3.761200 0.000000 694 +verif 0 20 3.951244 0.000000 826 +safeti 0 20 3.951244 0.000000 817 +engineeringunivers 0 17 4.110874 0.000000 959 +anyth 0 16 4.174387 0.000000 998 +mellon 1 13 4.382027 4.382027 1179 +washingtonbox 0 13 4.382027 0.000000 1200 +opportun 0 13 4.382027 0.000000 1161 +carnegi 1 12 4.465908 4.465908 1260 +ski 0 10 4.653960 0.000000 1471 +cook 0 10 4.653960 0.000000 1464 +cultur 0 7 5.010635 0.000000 1951 +spanish 0 4 5.568345 0.000000 3017 +ofmi 0 3 5.857933 0.000000 3911 +uist 0 2 6.263398 0.000000 5901 +vegetarian 0 2 6.263398 0.000000 5902 +greet 0 2 6.263398 0.000000 5903 +modugno 1 1 6.957497 6.957497 15384 +francesmari 0 1 6.957497 0.000000 15385 +pagefrancesmari 0 1 6.957497 0.000000 15386 +algorthim 0 1 6.957497 0.000000 15387 +includecycl 0 1 6.957497 0.000000 15388 +previouslyitalian 0 1 6.957497 0.000000 15389 +elleri 0 1 6.957497 0.000000 15390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..61198026 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +compil 0 122 2.079442 0.000000 96 +pictur 0 89 2.397895 0.000000 160 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +optim 0 79 2.564949 0.000000 197 +free 0 73 2.639057 0.000000 224 +receiv 0 66 2.708050 0.000000 244 +written 0 63 2.772589 0.000000 278 +dept 0 64 2.772589 0.000000 291 +finger 0 52 2.995732 0.000000 354 +netscap 0 44 3.135494 0.000000 395 +live 0 40 3.258097 0.000000 451 +word 0 34 3.401197 0.000000 508 +someth 0 31 3.496508 0.000000 554 +anim 0 31 3.496508 0.000000 557 +weather 0 28 3.610918 0.000000 618 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +pattern 0 24 3.761200 0.000000 689 +mobil 0 23 3.806662 0.000000 730 +georg 1 16 4.174387 4.174387 994 +match 0 16 4.174387 0.000000 965 +script 0 13 4.382027 0.000000 1171 +song 0 11 4.553877 0.000000 1380 +debugg 0 9 4.753590 0.000000 1493 +pagei 0 8 4.875197 0.000000 1683 +handi 0 6 5.164786 0.000000 2111 +puzzl 0 5 5.347108 0.000000 2507 +water 0 5 5.347108 0.000000 2535 +forman 1 2 6.263398 6.263398 5904 +hyperlink 0 2 6.263398 0.000000 5447 +pagegeorg 0 1 6.957497 0.000000 15391 +ariadn 0 1 6.957497 0.000000 15392 +gforman 0 1 6.957497 0.000000 15393 +comhom 0 1 6.957497 0.000000 15394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..e50cbd52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +cours 0 273 1.098612 0.000000 15 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +file 0 132 1.945910 0.000000 70 +tool 0 117 2.079442 0.000000 93 +sinc 0 90 2.397895 0.000000 159 +chang 0 82 2.484907 0.000000 163 +refer 0 78 2.564949 0.000000 203 +intellig 0 72 2.639057 0.000000 225 +artifici 0 63 2.772589 0.000000 280 +life 0 50 3.044522 0.000000 375 +visitor 0 49 3.044522 0.000000 371 +favorit 0 44 3.135494 0.000000 410 +netscap 0 44 3.135494 0.000000 395 +everi 0 34 3.401197 0.000000 519 +collabor 0 32 3.465736 0.000000 543 +quot 0 29 3.583519 0.000000 582 +bookmark 0 26 3.688879 0.000000 639 +applet 0 20 3.951244 0.000000 827 +agent 0 18 4.060443 0.000000 910 +english 1 15 4.248495 4.248495 1033 +trip 0 14 4.317488 0.000000 1113 +bike 0 10 4.653960 0.000000 1468 +poetri 0 9 4.753590 0.000000 1596 +weld 0 9 4.753590 0.000000 1538 +marc 1 8 4.875197 4.875197 1680 +dictionari 0 8 4.875197 0.000000 1642 +gather 0 8 4.875197 0.000000 1719 +friedman 1 7 5.010635 5.010635 1886 +golden 0 7 5.010635 0.000000 1962 +planner 0 7 5.010635 0.000000 1797 +keith 1 5 5.347108 5.347108 2528 +camp 0 5 5.347108 0.000000 2545 +elsewher 0 5 5.347108 0.000000 2444 +spanish 0 4 5.568345 0.000000 3017 +codi 0 3 5.857933 0.000000 3940 +kwok 0 3 5.857933 0.000000 3941 +ucpop 0 3 5.857933 0.000000 3878 +watercolor 0 1 6.957497 0.000000 15395 +checklist 0 1 6.957497 0.000000 15396 +occam 0 1 6.957497 0.000000 15397 +wordbot 0 1 6.957497 0.000000 15398 +nietzschein 0 1 6.957497 0.000000 15399 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..1b48e5d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +address 0 170 1.791759 0.000000 62 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +file 0 132 1.945910 0.000000 70 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +graphic 0 90 2.397895 0.000000 147 +member 0 84 2.484907 0.000000 165 +line 0 75 2.639057 0.000000 231 +plai 0 60 2.833213 0.000000 307 +game 0 36 3.367296 0.000000 498 +neural 0 30 3.555348 0.000000 578 +ofwashington 0 22 3.850148 0.000000 766 +audio 0 14 4.317488 0.000000 1094 +genet 1 10 4.653960 4.653960 1409 +cecil 0 9 4.753590 0.000000 1547 +garrett 1 3 5.857933 5.857933 3377 +charli 1 2 6.263398 6.263398 5905 +bookshelf 0 2 6.263398 0.000000 5724 +algorithmspap 0 1 6.957497 0.000000 15400 +algorithmsformerli 0 1 6.957497 0.000000 15401 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..a1e48fcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +seattl 0 120 2.079442 0.000000 103 +techniqu 0 99 2.302585 0.000000 138 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +follow 0 92 2.397895 0.000000 143 +david 0 71 2.639057 0.000000 232 +appli 0 71 2.639057 0.000000 226 +multimedia 0 68 2.708050 0.000000 258 +laboratori 0 63 2.772589 0.000000 292 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +prototyp 0 38 3.295837 0.000000 463 +deal 0 22 3.850148 0.000000 736 +supervis 0 20 3.951244 0.000000 840 +dimension 0 18 4.060443 0.000000 909 +render 1 17 4.110874 4.110874 947 +engineeringunivers 0 17 4.110874 0.000000 959 +georg 1 16 4.174387 4.174387 994 +eduphon 0 15 4.248495 0.000000 1060 +washingtonbox 0 13 4.382027 0.000000 1200 +galleri 0 13 4.382027 0.000000 1192 +wife 0 13 4.382027 0.000000 1196 +tradit 0 10 4.653960 0.000000 1404 +illustr 0 8 4.875197 0.000000 1679 +salesin 0 4 5.568345 0.000000 3051 +grail 0 3 5.857933 0.000000 3356 +winkenbach 0 1 6.957497 0.000000 15402 +winkenbachdepart 0 1 6.957497 0.000000 15403 +georgew 0 1 6.957497 0.000000 15404 +doneund 0 1 6.957497 0.000000 15405 +theautomat 0 1 6.957497 0.000000 15406 +imagescr 0 1 6.957497 0.000000 15407 +taweewan 0 1 6.957497 0.000000 15408 +siwadun 0 1 6.957497 0.000000 15409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..64cb642a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,212 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +recent 1 167 1.791759 1.791759 58 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +file 1 132 1.945910 1.945910 70 +first 1 140 1.945910 1.945910 71 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +mathemat 1 108 2.197225 2.197225 123 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +part 1 98 2.302585 2.302585 129 +user 0 104 2.302585 0.000000 137 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +commun 0 95 2.397895 0.000000 157 +imag 0 91 2.397895 0.000000 161 +stuff 0 87 2.484907 0.000000 171 +contain 0 81 2.484907 0.000000 174 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +complet 0 77 2.564949 0.000000 208 +issu 0 78 2.564949 0.000000 211 +server 0 76 2.564949 0.000000 204 +nation 1 74 2.639057 2.639057 240 +free 0 73 2.639057 0.000000 224 +write 0 72 2.639057 0.000000 222 +html 0 75 2.639057 0.000000 235 +materi 0 75 2.639057 0.000000 221 +degre 0 69 2.708050 0.000000 259 +practic 0 70 2.708050 0.000000 246 +view 0 70 2.708050 0.000000 254 +foundat 1 62 2.772589 2.772589 286 +creat 0 63 2.772589 0.000000 277 +virtual 0 62 2.772589 0.000000 285 +back 0 60 2.833213 0.000000 297 +locat 0 59 2.833213 0.000000 303 +best 0 59 2.833213 0.000000 299 +simpl 0 60 2.833213 0.000000 298 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +variou 0 56 2.890372 0.000000 317 +unix 0 58 2.890372 0.000000 308 +index 0 56 2.890372 0.000000 309 +sampl 0 53 2.944439 0.000000 339 +date 0 51 2.995732 0.000000 344 +archiv 1 49 3.044522 3.044522 364 +numer 0 49 3.044522 0.000000 369 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +keep 0 44 3.135494 0.000000 409 +show 0 43 3.178054 0.000000 417 +compani 0 41 3.218876 0.000000 423 +music 0 42 3.218876 0.000000 436 +small 0 39 3.258097 0.000000 447 +author 0 39 3.258097 0.000000 450 +origin 0 38 3.295837 0.000000 472 +seminar 0 38 3.295837 0.000000 470 +microsoft 0 38 3.295837 0.000000 468 +feel 0 37 3.332205 0.000000 483 +connect 0 37 3.332205 0.000000 485 +especi 1 36 3.367296 3.367296 496 +winter 0 36 3.367296 0.000000 500 +game 0 36 3.367296 0.000000 498 +procedur 0 36 3.367296 0.000000 488 +random 0 34 3.401197 0.000000 511 +global 0 34 3.401197 0.000000 520 +articl 1 33 3.433987 3.433987 530 +express 0 32 3.465736 0.000000 540 +scientist 0 31 3.496508 0.000000 560 +computersci 0 30 3.555348 0.000000 562 +full 0 28 3.610918 0.000000 615 +progress 0 28 3.610918 0.000000 598 +univ 0 28 3.610918 0.000000 617 +linux 0 27 3.637586 0.000000 631 +pagecs 1 26 3.688879 3.688879 658 +greg 1 24 3.761200 3.761200 695 +alwai 0 24 3.761200 0.000000 691 +magazin 0 24 3.761200 0.000000 704 +yahoo 0 24 3.761200 0.000000 707 +daili 0 24 3.761200 0.000000 706 +ofwashington 0 22 3.850148 0.000000 766 +recommend 0 22 3.850148 0.000000 737 +busi 0 21 3.912023 0.000000 784 +corpor 0 21 3.912023 0.000000 802 +navig 0 21 3.912023 0.000000 796 +tenni 0 20 3.951244 0.000000 838 +feedback 0 19 4.007333 0.000000 854 +lyco 0 19 4.007333 0.000000 871 +hobbi 0 16 4.174387 0.000000 1009 +devic 0 16 4.174387 0.000000 1002 +upon 0 16 4.174387 0.000000 978 +configur 1 15 4.248495 4.248495 1012 +reflect 0 15 4.248495 0.000000 1034 +incomput 0 14 4.317488 0.000000 1096 +senior 0 14 4.317488 0.000000 1120 +emac 1 13 4.382027 4.382027 1143 +philosophi 0 13 4.382027 0.000000 1167 +misc 0 13 4.382027 0.000000 1124 +newspap 1 12 4.465908 4.465908 1280 +emploi 0 12 4.465908 0.000000 1284 +basketbal 0 12 4.465908 0.000000 1289 +magic 0 11 4.553877 0.000000 1358 +perl 0 11 4.553877 0.000000 1332 +hello 0 10 4.653960 0.000000 1407 +desktop 0 10 4.653960 0.000000 1445 +ski 0 10 4.653960 0.000000 1471 +fellowship 0 10 4.653960 0.000000 1460 +volleybal 0 9 4.753590 0.000000 1598 +inter 0 9 4.753590 0.000000 1530 +competit 1 8 4.875197 4.875197 1635 +readm 1 8 4.875197 4.875197 1699 +joel 1 8 4.875197 4.875197 1698 +entri 0 8 4.875197 0.000000 1678 +extract 0 8 4.875197 0.000000 1728 +opinion 0 8 4.875197 0.000000 1708 +chronicl 1 7 5.010635 5.010635 1952 +gatewai 0 7 5.010635 0.000000 1942 +necessarili 0 7 5.010635 0.000000 1899 +duke 1 6 5.164786 5.164786 2231 +piano 0 6 5.164786 0.000000 2201 +histor 0 6 5.164786 0.000000 2085 +vertic 0 5 5.347108 0.000000 2270 +hole 0 5 5.347108 0.000000 2518 +billi 0 5 5.347108 0.000000 2404 +doubl 0 4 5.568345 0.000000 2951 +patch 0 4 5.568345 0.000000 2710 +drew 0 4 5.568345 0.000000 2980 +cube 0 4 5.568345 0.000000 2940 +jackson 1 3 5.857933 5.857933 3586 +eduaddress 0 3 5.857933 0.000000 3762 +rsum 0 3 5.857933 0.000000 3939 +freewar 0 3 5.857933 0.000000 3504 +hotjava 0 3 5.857933 0.000000 3220 +seinfeld 0 3 5.857933 0.000000 3958 +conclus 0 3 5.857933 0.000000 3367 +pagegreg 0 2 6.263398 0.000000 5906 +amcurr 0 2 6.263398 0.000000 5798 +bermuda 0 2 6.263398 0.000000 5907 +seminarcs 0 2 6.263398 0.000000 4521 +geneticalgorithm 0 2 6.263398 0.000000 5673 +ncaa 0 2 6.263398 0.000000 5908 +unoffici 0 2 6.263398 0.000000 5909 +unif 0 2 6.263398 0.000000 5910 +badro 1 1 6.957497 6.957497 15410 +zshell 1 1 6.957497 6.957497 15411 +nesbit 0 1 6.957497 0.000000 15412 +isuppos 0 1 6.957497 0.000000 15413 +excitingfeatur 0 1 6.957497 0.000000 15414 +dukeunivers 0 1 6.957497 0.000000 15415 +fortransworld 0 1 6.957497 0.000000 15416 +indurham 0 1 6.957497 0.000000 15417 +headquart 0 1 6.957497 0.000000 15418 +myapart 0 1 6.957497 0.000000 15419 +newer 0 1 6.957497 0.000000 15420 +fvwm 0 1 6.957497 0.000000 15421 +redhat 0 1 6.957497 0.000000 15422 +transworldnumer 0 1 6.957497 0.000000 15423 +ieeenat 0 1 6.957497 0.000000 15424 +victori 0 1 6.957497 0.000000 15425 +bycomput 0 1 6.957497 0.000000 15426 +canterburi 0 1 6.957497 0.000000 15427 +definitelynot 0 1 6.957497 0.000000 15428 +juggl 0 1 6.957497 0.000000 15429 +rubik 0 1 6.957497 0.000000 15430 +sarahmclachlan 0 1 6.957497 0.000000 15431 +parliamentari 0 1 6.957497 0.000000 15432 +sgml 0 1 6.957497 0.000000 15433 +sitcom 0 1 6.957497 0.000000 15434 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..3c94e103 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,295 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +first 0 140 1.945910 0.000000 71 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +check 0 115 2.197225 0.000000 118 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +intern 0 108 2.197225 0.000000 128 +book 1 99 2.302585 2.302585 131 +user 1 104 2.302585 2.302585 137 +techniqu 0 99 2.302585 0.000000 138 +imag 1 91 2.397895 2.397895 161 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +associ 0 93 2.397895 0.000000 151 +graphic 0 90 2.397895 0.000000 147 +call 0 91 2.397895 0.000000 153 +learn 1 86 2.484907 2.484907 170 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +environ 0 84 2.484907 0.000000 177 +requir 0 81 2.484907 0.000000 167 +thing 0 84 2.484907 0.000000 189 +second 0 81 2.484907 0.000000 166 +stuff 0 87 2.484907 0.000000 171 +info 0 85 2.484907 0.000000 176 +complet 0 77 2.564949 0.000000 208 +decemb 0 80 2.564949 0.000000 215 +resum 0 79 2.564949 0.000000 217 +sourc 0 77 2.564949 0.000000 201 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +appli 0 71 2.639057 0.000000 226 +java 2 70 2.708050 5.416100 248 +goal 0 66 2.708050 0.000000 250 +artifici 1 63 2.772589 2.772589 280 +function 0 62 2.772589 0.000000 275 +foundat 0 62 2.772589 0.000000 286 +evalu 0 64 2.772589 0.000000 266 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +major 0 56 2.890372 0.000000 315 +publish 0 57 2.890372 0.000000 326 +browser 0 56 2.890372 0.000000 313 +summer 0 56 2.890372 0.000000 311 +thesi 0 57 2.890372 0.000000 327 +reason 0 57 2.890372 0.000000 318 +found 1 53 2.944439 2.944439 337 +allow 0 53 2.944439 0.000000 333 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +cool 1 49 3.044522 3.044522 374 +life 1 50 3.044522 3.044522 375 +give 0 50 3.044522 0.000000 359 +archiv 0 49 3.044522 0.000000 364 +california 0 46 3.091042 0.000000 388 +could 0 46 3.091042 0.000000 383 +made 1 44 3.135494 3.135494 398 +even 0 45 3.135494 0.000000 393 +third 1 43 3.178054 3.178054 412 +show 0 43 3.178054 0.000000 417 +autom 0 41 3.218876 0.000000 434 +linear 0 41 3.218876 0.000000 431 +might 0 41 3.218876 0.000000 426 +movi 1 40 3.258097 3.258097 459 +submit 1 39 3.258097 3.258097 440 +multipl 0 39 3.258097 0.000000 453 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +expect 0 37 3.332205 0.000000 484 +tree 1 36 3.367296 3.367296 492 +staff 1 36 3.367296 3.367296 490 +ofth 0 36 3.367296 0.000000 491 +return 0 34 3.401197 0.000000 502 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +concept 0 32 3.465736 0.000000 537 +posit 1 31 3.496508 3.496508 552 +scientist 0 31 3.496508 0.000000 560 +anim 0 31 3.496508 0.000000 557 +travel 1 30 3.555348 3.555348 579 +graph 0 30 3.555348 0.000000 576 +neural 0 30 3.555348 0.000000 578 +quit 0 27 3.637586 0.000000 633 +though 0 27 3.637586 0.000000 622 +manipul 0 27 3.637586 0.000000 624 +constraint 0 26 3.688879 0.000000 636 +toward 0 25 3.737670 0.000000 668 +client 0 25 3.737670 0.000000 679 +trace 0 25 3.737670 0.000000 677 +greg 1 24 3.761200 3.761200 695 +demonstr 0 24 3.761200 0.000000 694 +pattern 0 24 3.761200 0.000000 689 +famili 0 23 3.806662 0.000000 735 +displai 0 23 3.806662 0.000000 712 +brows 0 23 3.806662 0.000000 726 +highli 0 23 3.806662 0.000000 725 +thread 0 23 3.806662 0.000000 722 +honor 0 23 3.806662 0.000000 729 +compress 0 23 3.806662 0.000000 719 +ofwashington 0 22 3.850148 0.000000 766 +instead 0 22 3.850148 0.000000 756 +love 0 21 3.912023 0.000000 804 +leav 0 21 3.912023 0.000000 772 +avoid 0 21 3.912023 0.000000 799 +applet 2 20 3.951244 7.902488 827 +qualiti 1 20 3.951244 3.951244 832 +wrote 0 20 3.951244 0.000000 830 +repositori 1 17 4.110874 4.110874 932 +thought 0 17 4.110874 0.000000 945 +adam 0 17 4.110874 0.000000 934 +diego 0 16 4.174387 0.000000 992 +earli 0 16 4.174387 0.000000 968 +spatial 0 16 4.174387 0.000000 988 +anyth 0 16 4.174387 0.000000 998 +dilbert 0 16 4.174387 0.000000 996 +cognit 0 16 4.174387 0.000000 986 +chateau 0 16 4.174387 0.000000 997 +reflect 1 15 4.248495 4.248495 1034 +rate 0 15 4.248495 0.000000 1037 +capabl 0 15 4.248495 0.000000 1016 +enough 0 15 4.248495 0.000000 1040 +doesn 0 15 4.248495 0.000000 1055 +draw 0 14 4.317488 0.000000 1086 +econom 0 13 4.382027 0.000000 1184 +iter 1 12 4.465908 4.465908 1206 +buffer 0 12 4.465908 0.000000 1211 +entertain 0 12 4.465908 0.000000 1286 +hank 0 12 4.465908 0.000000 1253 +assembl 0 12 4.465908 0.000000 1207 +transpar 1 11 4.553877 4.553877 1325 +keyword 0 11 4.553877 0.000000 1356 +enter 1 10 4.653960 4.653960 1454 +thecomput 0 10 4.653960 0.000000 1408 +awai 0 10 4.653960 0.000000 1447 +metacrawl 0 10 4.653960 0.000000 1455 +pick 1 9 4.753590 4.753590 1498 +prefer 1 9 4.753590 4.753590 1491 +congress 0 9 4.753590 0.000000 1592 +oop 0 8 4.875197 0.000000 1778 +realist 0 8 4.875197 0.000000 1665 +judg 0 8 4.875197 0.000000 1644 +guggenheim 0 8 4.875197 0.000000 1759 +hit 0 7 5.010635 0.000000 1965 +occasion 0 7 5.010635 0.000000 1905 +polit 1 6 5.164786 5.164786 2115 +impress 0 6 5.164786 0.000000 2096 +unpublish 0 6 5.164786 0.000000 2226 +stage 0 5 5.347108 0.000000 2488 +contest 0 5 5.347108 0.000000 2273 +particl 0 5 5.347108 0.000000 2436 +carlson 0 5 5.347108 0.000000 2351 +chess 0 5 5.347108 0.000000 2486 +annex 0 5 5.347108 0.000000 2572 +doubl 1 4 5.568345 5.568345 2951 +sorri 1 4 5.568345 5.568345 3059 +gradual 0 4 5.568345 0.000000 2997 +mess 0 4 5.568345 0.000000 2886 +jar 1 3 5.857933 5.857933 3223 +prison 1 3 5.857933 5.857933 3907 +slave 0 3 5.857933 0.000000 3959 +emul 0 3 5.857933 0.000000 3944 +cleaner 0 3 5.857933 0.000000 3775 +civil 0 3 5.857933 0.000000 3908 +evolutionari 0 3 5.857933 0.000000 3898 +boolean 0 3 5.857933 0.000000 3202 +recurr 0 3 5.857933 0.000000 3740 +sujai 0 3 5.857933 0.000000 3960 +parekh 0 3 5.857933 0.000000 3961 +shadow 0 3 5.857933 0.000000 3519 +inventor 0 3 5.857933 0.000000 3695 +quicktim 0 3 5.857933 0.000000 3493 +gamelan 1 2 6.263398 6.263398 4221 +elicit 1 2 6.263398 6.263398 4294 +flight 1 2 6.263398 6.263398 5911 +ucsd 1 2 6.263398 6.263398 5192 +scienceher 0 2 6.263398 0.000000 5912 +thejava 0 2 6.263398 0.000000 4704 +certainli 0 2 6.263398 0.000000 4090 +belew 0 2 6.263398 0.000000 4739 +lesh 0 2 6.263398 0.000000 5895 +tracer 0 2 6.263398 0.000000 5913 +inc 0 2 6.263398 0.000000 5914 +duel 0 2 6.263398 0.000000 5855 +rai 0 2 6.263398 0.000000 5915 +theanim 0 2 6.263398 0.000000 5852 +thed 0 2 6.263398 0.000000 4963 +mbquicktim 0 2 6.263398 0.000000 5916 +linden 1 1 6.957497 6.957497 15435 +webview 1 1 6.957497 6.957497 15436 +wasrat 1 1 6.957497 6.957497 15437 +andwa 1 1 6.957497 6.957497 15438 +neuralnetwork 1 1 6.957497 6.957497 15439 +headless 1 1 6.957497 6.957497 15440 +horseman 1 1 6.957497 6.957497 15441 +lindenmi 0 1 6.957497 0.000000 15442 +wifecorina 0 1 6.957497 0.000000 15443 +lofti 0 1 6.957497 0.000000 15444 +undergraduatedegre 0 1 6.957497 0.000000 15445 +anodd 0 1 6.957497 0.000000 15446 +mactiv 0 1 6.957497 0.000000 15447 +orset 0 1 6.957497 0.000000 15448 +altavistawebviewand 0 1 6.957497 0.000000 15449 +metawebview 0 1 6.957497 0.000000 15450 +foraltavista 0 1 6.957497 0.000000 15451 +searchservic 0 1 6.957497 0.000000 15452 +dialog 0 1 6.957497 0.000000 15453 +travelag 0 1 6.957497 0.000000 15454 +whileallow 0 1 6.957497 0.000000 15455 +andjar 0 1 6.957497 0.000000 15456 +altavistawebview 0 1 6.957497 0.000000 15457 +winner 0 1 6.957497 0.000000 15458 +walsh 0 1 6.957497 0.000000 15459 +meilleur 0 1 6.957497 0.000000 15460 +ballet 0 1 6.957497 0.000000 15461 +flicker 0 1 6.957497 0.000000 15462 +standardsto 0 1 6.957497 0.000000 15463 +mylgramm 0 1 6.957497 0.000000 15464 +lgrammer 0 1 6.957497 0.000000 15465 +theparticletre 0 1 6.957497 0.000000 15466 +thejar 0 1 6.957497 0.000000 15467 +dawn 0 1 6.957497 0.000000 15468 +ademonstr 0 1 6.957497 0.000000 15469 +cansuccessfulli 0 1 6.957497 0.000000 15470 +myriadsoftwar 0 1 6.957497 0.000000 15471 +filippo 0 1 6.957497 0.000000 15472 +menzer 0 1 6.957497 0.000000 15473 +latentenergi 0 1 6.957497 0.000000 15474 +developingartifici 0 1 6.957497 0.000000 15475 +enviro 0 1 6.957497 0.000000 15476 +theautom 0 1 6.957497 0.000000 15477 +assit 0 1 6.957497 0.000000 15478 +majeski 0 1 6.957497 0.000000 15479 +spitzer 0 1 6.957497 0.000000 15480 +localizedinteract 0 1 6.957497 0.000000 15481 +dilemma 0 1 6.957497 0.000000 15482 +krishnamoorthi 0 1 6.957497 0.000000 15483 +paturi 0 1 6.957497 0.000000 15484 +blume 0 1 6.957497 0.000000 15485 +liden 0 1 6.957497 0.000000 15486 +esen 0 1 6.957497 0.000000 15487 +hardwaretradeoff 0 1 6.957497 0.000000 15488 +sdilemma 0 1 6.957497 0.000000 15489 +funrai 0 1 6.957497 0.000000 15490 +closeup 0 1 6.957497 0.000000 15491 +sphere 0 1 6.957497 0.000000 15492 +withreflect 0 1 6.957497 0.000000 15493 +adaptivesampl 0 1 6.957497 0.000000 15494 +thespher 0 1 6.957497 0.000000 15495 +causingth 0 1 6.957497 0.000000 15496 +refract 0 1 6.957497 0.000000 15497 +surfaceand 0 1 6.957497 0.000000 15498 +strike 0 1 6.957497 0.000000 15499 +alow 0 1 6.957497 0.000000 15500 +anyfurth 0 1 6.957497 0.000000 15501 +resembl 0 1 6.957497 0.000000 15502 +glinden 0 1 6.957497 0.000000 15503 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..ef71b024 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +relat 0 139 1.945910 0.000000 68 +compil 0 122 2.079442 0.000000 96 +octob 0 89 2.397895 0.000000 156 +homework 0 79 2.564949 0.000000 193 +brian 1 38 3.295837 3.295837 466 +computersci 0 30 3.555348 0.000000 562 +trip 0 14 4.317488 0.000000 1113 +grant 1 12 4.465908 4.465908 1216 +awai 0 10 4.653960 0.000000 1447 +daughter 0 7 5.010635 0.000000 1943 +pagebrian 0 4 5.568345 0.000000 3054 +groupuw 0 3 5.857933 0.000000 3934 +kri 1 1 6.957497 6.957497 15504 +infowork 0 1 6.957497 0.000000 15505 +backgrounduwdynam 0 1 6.957497 0.000000 15506 +engineeringperson 0 1 6.957497 0.000000 15507 +stuffperson 0 1 6.957497 0.000000 15508 +backgroundmi 0 1 6.957497 0.000000 15509 +isismi 0 1 6.957497 0.000000 15510 +singaporemi 0 1 6.957497 0.000000 15511 +bookmarksmi 0 1 6.957497 0.000000 15512 +keylast 0 1 6.957497 0.000000 15513 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..d7499f0f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +paper 0 205 1.609438 0.000000 38 +object 0 138 1.945910 0.000000 79 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +compil 0 122 2.079442 0.000000 96 +number 0 130 2.079442 0.000000 97 +manag 0 114 2.197225 0.000000 125 +pictur 0 89 2.397895 0.000000 160 +environ 0 84 2.484907 0.000000 177 +orient 0 80 2.564949 0.000000 205 +optim 0 79 2.564949 0.000000 197 +complet 0 77 2.564949 0.000000 208 +sieg 0 69 2.708050 0.000000 260 +integr 0 67 2.708050 0.000000 245 +plai 1 60 2.833213 2.833213 307 +back 0 60 2.833213 0.000000 297 +summer 0 56 2.890372 0.000000 311 +much 0 52 2.995732 0.000000 349 +frequent 0 49 3.044522 0.000000 367 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +cool 0 49 3.044522 0.000000 374 +anoth 0 45 3.135494 0.000000 408 +author 1 39 3.258097 3.258097 450 +littl 0 39 3.258097 0.000000 454 +close 0 38 3.295837 0.000000 465 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +taken 0 31 3.496508 0.000000 555 +someth 0 31 3.496508 0.000000 554 +actual 0 28 3.610918 0.000000 604 +team 0 27 3.637586 0.000000 625 +consult 0 24 3.761200 0.000000 687 +sometim 0 24 3.761200 0.000000 696 +spend 1 19 4.007333 4.007333 850 +less 0 18 4.060443 0.000000 892 +along 0 18 4.060443 0.000000 878 +attempt 0 17 4.110874 0.000000 917 +white 0 17 4.110874 0.000000 951 +chateau 0 16 4.174387 0.000000 997 +took 0 16 4.174387 0.000000 1010 +month 0 15 4.248495 0.000000 1025 +dave 1 14 4.317488 4.317488 1098 +trip 1 14 4.317488 4.317488 1113 +council 0 11 4.553877 0.000000 1364 +cecil 1 9 4.753590 4.753590 1547 +hang 0 9 4.753590 0.000000 1499 +grove 1 8 4.875197 4.875197 1675 +pure 0 8 4.875197 0.000000 1776 +vehicl 0 7 5.010635 0.000000 1928 +wouldn 0 7 5.010635 0.000000 1970 +footbal 0 7 5.010635 0.000000 1912 +strip 0 6 5.164786 0.000000 2203 +toronto 0 6 5.164786 0.000000 2156 +spinproject 0 5 5.347108 0.000000 2570 +gui 0 5 5.347108 0.000000 2573 +water 0 5 5.347108 0.000000 2535 +worki 0 4 5.568345 0.000000 3010 +fantasi 0 4 5.568345 0.000000 3055 +silli 0 4 5.568345 0.000000 3038 +raft 0 4 5.568345 0.000000 3060 +langaug 0 3 5.857933 0.000000 3661 +hampshir 0 3 5.857933 0.000000 3280 +kick 0 3 5.857933 0.000000 3962 +hord 0 2 6.263398 0.000000 5917 +aroundth 0 2 6.263398 0.000000 5653 +fring 0 2 6.263398 0.000000 5721 +boi 0 2 6.263398 0.000000 5918 +toseattl 0 2 6.263398 0.000000 5919 +soonish 0 1 6.957497 0.000000 15514 +dilbertfix 0 1 6.957497 0.000000 15515 +thathit 0 1 6.957497 0.000000 15516 +underacheiv 0 1 6.957497 0.000000 15517 +scoutreserv 0 1 6.957497 0.000000 15518 +greaterlowel 0 1 6.957497 0.000000 15519 +casunset 0 1 6.957497 0.000000 15520 +cabin 0 1 6.957497 0.000000 15521 +drove 0 1 6.957497 0.000000 15522 +detour 0 1 6.957497 0.000000 15523 +somehihglight 0 1 6.957497 0.000000 15524 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..beec91f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +develop 1 174 1.791759 1.791759 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +architectur 1 139 1.945910 1.945910 77 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +seattl 0 120 2.079442 0.000000 103 +well 1 109 2.197225 2.197225 121 +person 0 111 2.197225 0.000000 117 +level 1 87 2.484907 2.484907 180 +educ 0 86 2.484907 0.000000 191 +june 0 79 2.564949 0.000000 214 +simul 0 66 2.708050 0.000000 255 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +hardwar 0 51 2.995732 0.000000 350 +prototyp 1 38 3.295837 3.295837 463 +multi 1 36 3.367296 3.367296 493 +survei 0 35 3.401197 0.000000 513 +board 1 33 3.433987 3.433987 528 +curriculum 0 33 3.433987 0.000000 535 +methodolog 0 23 3.806662 0.000000 733 +rout 0 21 3.912023 0.000000 793 +synthesi 0 20 3.951244 0.000000 834 +scott 1 18 4.060443 4.060443 884 +commerci 0 16 4.174387 0.000000 1005 +partit 0 16 4.174387 0.000000 984 +topolog 0 14 4.317488 0.000000 1089 +embed 0 14 4.317488 0.000000 1102 +circuit 1 13 4.382027 4.382027 1131 +asynchron 1 12 4.465908 4.465908 1229 +fpga 2 10 4.653960 9.307920 1433 +rapid 1 10 4.653960 4.653960 1453 +densiti 0 7 5.010635 0.000000 1927 +chinook 0 6 5.164786 0.000000 2229 +triptych 1 4 5.568345 5.568345 3061 +biographi 0 3 5.857933 0.000000 3658 +hauck 1 2 6.263398 6.263398 5920 +montag 1 2 6.263398 6.263398 5921 +springbok 0 2 6.263398 0.000000 5922 +thoughi 0 1 6.957497 0.000000 15525 +vitaeresearch 0 1 6.957497 0.000000 15526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..5e0fdc4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +kevin 0 9 4.753590 0.000000 1482 +hinshaw 0 1 6.957497 0.000000 15527 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..48b422e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +also 1 259 1.386294 1.386294 28 +washington 0 236 1.386294 0.000000 32 +place 1 106 2.197225 2.197225 124 +part 0 98 2.302585 0.000000 129 +imag 0 91 2.397895 0.000000 161 +stuff 1 87 2.484907 2.484907 171 +chang 0 82 2.484907 0.000000 163 +decemb 0 80 2.564949 0.000000 215 +master 0 76 2.564949 0.000000 216 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +view 0 70 2.708050 0.000000 254 +dept 0 64 2.772589 0.000000 291 +back 0 60 2.833213 0.000000 297 +thesi 0 57 2.890372 0.000000 327 +week 1 52 2.995732 2.995732 343 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +pointer 0 48 3.044522 0.000000 368 +possibl 0 47 3.091042 0.000000 378 +around 0 43 3.178054 0.000000 415 +profil 0 30 3.555348 0.000000 581 +pass 0 28 3.610918 0.000000 611 +univ 0 28 3.610918 0.000000 617 +subject 0 26 3.688879 0.000000 647 +notic 0 25 3.737670 0.000000 675 +head 0 23 3.806662 0.000000 732 +brows 0 23 3.806662 0.000000 726 +half 0 21 3.912023 0.000000 776 +busi 0 21 3.912023 0.000000 784 +unfortun 0 13 4.382027 0.000000 1170 +rememb 0 12 4.465908 0.000000 1217 +neat 0 12 4.465908 0.000000 1263 +daughter 0 7 5.010635 0.000000 1943 +chinook 0 6 5.164786 0.000000 2229 +upper 0 5 5.347108 0.000000 2481 +wast 0 5 5.347108 0.000000 2537 +silli 0 4 5.568345 0.000000 3038 +washingtonseattl 0 4 5.568345 0.000000 3044 +macduff 1 2 6.263398 6.263398 5923 +emma 0 2 6.263398 0.000000 5546 +obsess 0 2 6.263398 0.000000 5924 +ultrasound 0 1 6.957497 0.000000 15528 +elspeth 0 1 6.957497 0.000000 15529 +unborn 0 1 6.957497 0.000000 15530 +fromconcept 0 1 6.957497 0.000000 15531 +ripe 0 1 6.957497 0.000000 15532 +inmid 0 1 6.957497 0.000000 15533 +ly 0 1 6.957497 0.000000 15534 +lookingup 0 1 6.957497 0.000000 15535 +torso 0 1 6.957497 0.000000 15536 +theleft 0 1 6.957497 0.000000 15537 +impend 0 1 6.957497 0.000000 15538 +fatherhood 0 1 6.957497 0.000000 15539 +myspam 0 1 6.957497 0.000000 15540 +usingwebcrawl 0 1 6.957497 0.000000 15541 +frogstv 0 1 6.957497 0.000000 15542 +nationpenn 0 1 6.957497 0.000000 15543 +tellermus 0 1 6.957497 0.000000 15544 +lyricsian 0 1 6.957497 0.000000 15545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..6d4c055b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,146 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +softwar 0 220 1.386294 0.000000 30 +oper 0 180 1.609438 0.000000 34 +network 2 168 1.791759 3.583518 61 +distribut 1 162 1.791759 1.791759 51 +phone 0 175 1.791759 0.000000 45 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +object 1 138 1.945910 1.945910 79 +hall 0 146 1.945910 0.000000 65 +file 0 132 1.945910 0.000000 70 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +area 0 144 1.945910 0.000000 80 +high 1 130 2.079442 2.079442 101 +provid 1 121 2.079442 2.079442 94 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +memori 2 101 2.302585 4.605170 139 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +pictur 1 89 2.397895 2.397895 160 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +imag 0 91 2.397895 0.000000 161 +environ 1 84 2.484907 2.484907 177 +level 0 87 2.484907 0.000000 180 +orient 1 80 2.564949 2.564949 205 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +sieg 0 69 2.708050 0.000000 260 +main 0 67 2.708050 0.000000 256 +order 0 69 2.708050 0.000000 249 +septemb 0 65 2.772589 0.000000 274 +virtual 0 62 2.772589 0.000000 285 +visit 0 63 2.772589 0.000000 288 +back 0 60 2.833213 0.000000 297 +march 0 61 2.833213 0.000000 295 +thesi 0 57 2.890372 0.000000 327 +local 0 55 2.944439 0.000000 334 +processor 0 54 2.944439 0.000000 335 +cach 1 41 3.218876 3.218876 432 +small 0 39 3.258097 0.000000 447 +map 0 39 3.258097 0.000000 452 +workstat 0 37 3.332205 0.000000 479 +cost 0 37 3.332205 0.000000 480 +mean 0 37 3.332205 0.000000 477 +global 1 34 3.401197 3.401197 520 +cluster 0 28 3.610918 0.000000 612 +load 0 28 3.610918 0.000000 601 +primari 0 25 3.737670 0.000000 669 +fundament 0 25 3.737670 0.000000 661 +mike 0 24 3.761200 0.000000 703 +store 0 24 3.761200 0.000000 693 +size 1 23 3.806662 3.806662 713 +reduc 1 22 3.850148 3.850148 759 +disk 1 22 3.850148 3.850148 747 +hierarchi 0 22 3.850148 0.000000 744 +unit 0 21 3.912023 0.000000 779 +thu 0 21 3.912023 0.000000 773 +increas 0 20 3.951244 0.000000 829 +speed 1 18 4.060443 4.060443 911 +encourag 0 18 4.060443 0.000000 880 +engineeringunivers 0 17 4.110874 0.000000 959 +debug 0 17 4.110874 0.000000 944 +transfer 1 16 4.174387 4.174387 967 +latenc 1 16 4.174387 4.174387 993 +modern 0 16 4.174387 0.000000 966 +remot 1 15 4.248495 4.248495 1041 +levi 1 14 4.317488 4.317488 1093 +karlin 1 13 4.382027 4.382027 1176 +washingtonbox 0 13 4.382027 0.000000 1200 +introduc 0 13 4.382027 0.000000 1139 +unfortun 0 13 4.382027 0.000000 1170 +galleri 0 13 4.382027 0.000000 1192 +hank 0 12 4.465908 0.000000 1253 +mari 0 12 4.465908 0.000000 1266 +anna 0 12 4.465908 0.000000 1292 +franc 0 12 4.465908 0.000000 1276 +node 1 11 4.553877 4.553877 1326 +extrem 0 11 4.553877 0.000000 1330 +vernon 1 9 4.753590 4.753590 1556 +voelker 1 9 4.753590 4.753590 1557 +factor 0 9 4.753590 0.000000 1544 +postdoc 0 8 4.875197 0.000000 1724 +evan 0 8 4.875197 0.000000 1633 +inproceed 0 8 4.875197 0.000000 1670 +feelei 1 7 5.010635 5.010635 1859 +trend 0 7 5.010635 0.000000 1842 +geoff 0 6 5.164786 0.000000 2124 +temporari 0 6 5.164786 0.000000 2090 +li 0 5 5.347108 0.000000 2500 +seventh 0 5 5.347108 0.000000 2464 +joseph 0 5 5.347108 0.000000 2327 +coverag 0 4 5.568345 0.000000 2656 +greatli 0 3 5.857933 0.000000 3541 +europ 0 3 5.857933 0.000000 3761 +jamrozik 1 2 6.263398 6.263398 5925 +subpag 1 2 6.263398 6.263398 5926 +amort 0 2 6.263398 0.000000 4370 +odd 0 2 6.263398 0.000000 5565 +theuniversit 0 2 6.263398 0.000000 5927 +fourier 0 2 6.263398 0.000000 5698 +grenobl 0 2 6.263398 0.000000 5928 +laboratoir 0 2 6.263398 0.000000 5929 +herv 0 1 6.957497 0.000000 15546 +jamrozikherv 0 1 6.957497 0.000000 15547 +memoi 0 1 6.957497 0.000000 15548 +therebi 0 1 6.957497 0.000000 15549 +intens 0 1 6.957497 0.000000 15550 +lightli 0 1 6.957497 0.000000 15551 +guideproject 0 1 6.957497 0.000000 15552 +bull 0 1 6.957497 0.000000 15553 +imaginstitut 0 1 6.957497 0.000000 15554 +snot 0 1 6.957497 0.000000 15555 +louvr 0 1 6.957497 0.000000 15556 +somefamili 0 1 6.957497 0.000000 15557 +somefriend 0 1 6.957497 0.000000 15558 +eduv 0 1 6.957497 0.000000 15559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..4f679a17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +modifi 0 178 1.609438 0.000000 35 +hall 0 146 1.945910 0.000000 65 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +frequent 0 49 3.044522 0.000000 367 +weather 1 28 3.610918 3.610918 618 +channel 0 7 5.010635 0.000000 1836 +forecast 0 6 5.164786 0.000000 2171 +jason 1 3 5.857933 5.857933 3389 +eduaddress 0 3 5.857933 0.000000 3762 +secoski 1 2 6.263398 6.263398 4526 +pagejason 0 1 6.957497 0.000000 15560 +cunivers 0 1 6.957497 0.000000 15561 +boxseattl 0 1 6.957497 0.000000 15562 +projectseattl 0 1 6.957497 0.000000 15563 +secoskylast 0 1 6.957497 0.000000 15564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..2ce3d0f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +look 0 107 2.197225 0.000000 115 +question 0 91 2.397895 0.000000 141 +educ 1 86 2.484907 2.484907 191 +stuff 1 87 2.484907 2.484907 171 +school 0 84 2.484907 0.000000 188 +intellig 0 72 2.639057 0.000000 225 +multimedia 0 68 2.708050 0.000000 258 +java 0 70 2.708050 0.000000 248 +artifici 0 63 2.772589 0.000000 280 +virtual 0 62 2.772589 0.000000 285 +copi 0 63 2.772589 0.000000 284 +cool 0 49 3.044522 0.000000 374 +effect 0 46 3.091042 0.000000 385 +mark 0 44 3.135494 0.000000 403 +made 0 44 3.135494 0.000000 398 +music 0 42 3.218876 0.000000 436 +realli 0 40 3.258097 0.000000 444 +littl 0 39 3.258097 0.000000 454 +industri 0 38 3.295837 0.000000 464 +feel 0 37 3.332205 0.000000 483 +download 0 36 3.367296 0.000000 489 +human 0 32 3.465736 0.000000 546 +travel 0 30 3.555348 0.000000 579 +progress 0 28 3.610918 0.000000 598 +static 0 27 3.637586 0.000000 619 +mine 0 26 3.688879 0.000000 654 +experiment 0 26 3.688879 0.000000 645 +never 0 25 3.737670 0.000000 671 +william 0 22 3.850148 0.000000 765 +applet 1 20 3.951244 3.951244 827 +wrote 0 20 3.951244 0.000000 830 +spend 0 19 4.007333 0.000000 850 +demo 0 18 4.060443 0.000000 888 +layer 0 17 4.110874 0.000000 926 +macintosh 0 17 4.110874 0.000000 920 +signific 0 13 4.382027 0.000000 1125 +baer 1 11 4.553877 4.553877 1353 +scienceat 0 11 4.553877 0.000000 1375 +eight 0 11 4.553877 0.000000 1331 +metacrawl 0 10 4.653960 0.000000 1455 +creativ 0 8 4.875197 0.000000 1777 +dream 0 6 5.164786 0.000000 2165 +jeremi 1 5 5.347108 5.347108 2360 +interfer 0 5 5.347108 0.000000 2494 +puzzl 0 5 5.347108 0.000000 2507 +silli 0 4 5.568345 0.000000 3038 +thati 0 4 5.568345 0.000000 2616 +museum 0 3 5.857933 0.000000 3933 +computerinteract 0 2 6.263398 0.000000 5829 +stress 0 2 6.263398 0.000000 4146 +baerjeremi 0 1 6.957497 0.000000 15565 +twain 0 1 6.957497 0.000000 15566 +shakespearei 0 1 6.957497 0.000000 15567 +engineeringtool 0 1 6.957497 0.000000 15568 +pierian 0 1 6.957497 0.000000 15569 +softwareoregon 0 1 6.957497 0.000000 15570 +omsi 0 1 6.957497 0.000000 15571 +pomona 0 1 6.957497 0.000000 15572 +collegeher 0 1 6.957497 0.000000 15573 +searchcopyright 0 1 6.957497 0.000000 15574 +jbaer 0 1 6.957497 0.000000 15575 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..22c6448b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +phone 1 175 1.791759 1.791759 45 +address 0 170 1.791759 0.000000 62 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +note 0 142 1.945910 0.000000 67 +postscript 0 131 2.079442 0.000000 90 +control 1 82 2.484907 2.484907 164 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +come 0 78 2.564949 0.000000 202 +sieg 0 69 2.708050 0.000000 260 +import 0 65 2.772589 0.000000 282 +foundat 0 62 2.772589 0.000000 286 +browser 1 56 2.890372 2.890372 313 +undergradu 0 54 2.944439 0.000000 338 +finger 0 52 2.995732 0.000000 354 +electron 0 47 3.091042 0.000000 379 +tree 0 36 3.367296 0.000000 492 +soon 0 36 3.367296 0.000000 494 +return 0 34 3.401197 0.000000 502 +statu 0 18 4.060443 0.000000 885 +attempt 0 17 4.110874 0.000000 917 +latex 0 14 4.317488 0.000000 1064 +rice 0 11 4.553877 0.000000 1336 +transmiss 0 9 4.753590 0.000000 1588 +jeremi 1 5 5.347108 5.347108 2360 +adjust 0 5 5.347108 0.000000 2422 +frontier 0 3 5.857933 0.000000 3771 +alma 0 3 5.857933 0.000000 3963 +schedulemi 0 2 6.263398 0.000000 5843 +mater 0 2 6.263398 0.000000 5930 +buhler 1 1 6.957497 6.957497 15576 +jbuhler 1 1 6.957497 6.957497 15577 +pagejeremi 0 1 6.957497 0.000000 15578 +pagedo 0 1 6.957497 0.000000 15579 +tako 0 1 6.957497 0.000000 15580 +stufflectur 0 1 6.957497 0.000000 15581 +suffix 0 1 6.957497 0.000000 15582 +keycyb 0 1 6.957497 0.000000 15583 +grinsrecommend 0 1 6.957497 0.000000 15584 +readingmi 0 1 6.957497 0.000000 15585 +universityquot 0 1 6.957497 0.000000 15586 +quotesmi 0 1 6.957497 0.000000 15587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..c2798fa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,271 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +languag 2 227 1.386294 2.772588 26 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +base 0 165 1.791759 0.000000 50 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +object 1 138 1.945910 1.945910 79 +area 0 144 1.945910 0.000000 80 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +compil 2 122 2.079442 4.158884 96 +seattl 1 120 2.079442 2.079442 103 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +code 0 108 2.197225 0.000000 116 +look 0 107 2.197225 0.000000 115 +techniqu 1 99 2.302585 2.302585 138 +access 0 102 2.302585 0.000000 136 +part 0 98 2.302585 0.000000 129 +real 0 93 2.397895 0.000000 144 +larg 1 82 2.484907 2.484907 168 +build 0 85 2.484907 0.000000 184 +environ 0 84 2.484907 0.000000 177 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +contain 0 81 2.484907 0.000000 174 +optim 2 79 2.564949 5.129898 197 +orient 1 80 2.564949 2.564949 205 +exampl 1 77 2.564949 2.564949 195 +dynam 0 76 2.564949 0.000000 194 +line 1 75 2.639057 2.639057 231 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +meet 0 72 2.639057 0.000000 229 +would 1 67 2.708050 2.708050 251 +view 1 70 2.708050 2.708050 254 +integr 1 67 2.708050 2.708050 245 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +receiv 0 66 2.708050 0.000000 244 +guid 1 63 2.772589 2.772589 267 +plan 0 65 2.772589 0.000000 272 +laboratori 0 63 2.772589 0.000000 292 +experi 0 64 2.772589 0.000000 283 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +result 0 65 2.772589 0.000000 281 +back 0 60 2.833213 0.000000 297 +explor 1 58 2.890372 2.890372 324 +summer 0 56 2.890372 0.000000 311 +direct 0 57 2.890372 0.000000 316 +extens 0 53 2.944439 0.000000 340 +three 0 54 2.944439 0.000000 330 +much 0 52 2.995732 0.000000 349 +particular 0 51 2.995732 0.000000 352 +run 0 51 2.995732 0.000000 347 +adapt 0 46 3.091042 0.000000 387 +quarter 0 47 3.091042 0.000000 389 +featur 0 46 3.091042 0.000000 386 +keep 0 44 3.135494 0.000000 409 +made 0 44 3.135494 0.000000 398 +better 0 45 3.135494 0.000000 401 +even 0 45 3.135494 0.000000 393 +around 0 43 3.178054 0.000000 415 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +author 1 39 3.258097 3.258097 450 +join 0 39 3.258097 0.000000 457 +littl 0 39 3.258097 0.000000 454 +probabl 0 40 3.258097 0.000000 455 +realli 0 40 3.258097 0.000000 444 +vita 0 38 3.295837 0.000000 473 +seminar 0 38 3.295837 0.000000 470 +feel 0 37 3.332205 0.000000 483 +singl 0 34 3.401197 0.000000 510 +approxim 0 35 3.401197 0.000000 509 +word 0 34 3.401197 0.000000 508 +curriculum 0 33 3.433987 0.000000 535 +obtain 0 33 3.433987 0.000000 534 +kind 0 32 3.465736 0.000000 541 +independ 0 32 3.465736 0.000000 548 +someth 0 31 3.496508 0.000000 554 +exist 0 30 3.555348 0.000000 569 +profil 0 30 3.555348 0.000000 581 +specifi 0 30 3.555348 0.000000 568 +travel 0 30 3.555348 0.000000 579 +pass 1 28 3.610918 3.610918 611 +scale 0 28 3.610918 0.000000 613 +becom 0 28 3.610918 0.000000 603 +framework 0 28 3.610918 0.000000 606 +effort 0 26 3.688879 0.000000 652 +consist 0 26 3.688879 0.000000 651 +enjoi 0 26 3.688879 0.000000 660 +rather 0 26 3.688879 0.000000 642 +jeff 1 25 3.737670 3.737670 673 +spent 1 25 3.737670 3.737670 676 +concern 0 25 3.737670 0.000000 666 +toward 0 25 3.737670 0.000000 668 +wai 0 25 3.737670 0.000000 662 +client 0 25 3.737670 0.000000 679 +never 0 25 3.737670 0.000000 671 +highli 0 23 3.806662 0.000000 725 +defin 0 22 3.850148 0.000000 746 +recommend 0 22 3.850148 0.000000 737 +hous 0 21 3.912023 0.000000 801 +programminglanguag 0 21 3.912023 0.000000 782 +flexibl 0 21 3.912023 0.000000 792 +love 0 21 3.912023 0.000000 804 +entir 0 20 3.951244 0.000000 811 +nice 0 20 3.951244 0.000000 809 +minut 0 20 3.951244 0.000000 810 +predict 0 19 4.007333 0.000000 855 +four 0 18 4.060443 0.000000 905 +whole 1 17 4.110874 4.110874 940 +permit 1 16 4.174387 4.174387 962 +chateau 0 16 4.174387 0.000000 997 +letter 0 16 4.174387 0.000000 981 +anyth 0 16 4.174387 0.000000 998 +took 0 16 4.174387 0.000000 1010 +track 0 15 4.248495 0.000000 1029 +enough 0 15 4.248495 0.000000 1040 +doesn 0 15 4.248495 0.000000 1055 +dean 1 14 4.317488 4.317488 1104 +spin 0 14 4.317488 0.000000 1121 +split 0 14 4.317488 0.000000 1078 +wife 1 13 4.382027 4.382027 1196 +primarili 0 13 4.382027 0.000000 1185 +composit 0 13 4.382027 0.000000 1150 +hotlist 0 13 4.382027 0.000000 1199 +uniqu 0 12 4.465908 0.000000 1228 +iter 0 12 4.465908 0.000000 1206 +food 0 12 4.465908 0.000000 1285 +walk 0 12 4.465908 0.000000 1281 +valid 0 11 4.553877 0.000000 1299 +moment 0 11 4.553877 0.000000 1379 +true 0 10 4.653960 0.000000 1422 +guess 0 10 4.653960 0.000000 1443 +cecil 1 9 4.753590 4.753590 1547 +palo 0 9 4.753590 0.000000 1590 +alto 0 9 4.753590 0.000000 1591 +hang 0 9 4.753590 0.000000 1499 +inter 0 9 4.753590 0.000000 1530 +compos 0 9 4.753590 0.000000 1527 +sound 0 9 4.753590 0.000000 1605 +ride 1 8 4.875197 4.875197 1741 +pure 0 8 4.875197 0.000000 1776 +isol 0 8 4.875197 0.000000 1663 +closur 0 8 4.875197 0.000000 1643 +bug 0 7 5.010635 0.000000 1801 +dead 0 7 5.010635 0.000000 1840 +daughter 0 7 5.010635 0.000000 1943 +affect 1 6 5.164786 5.164786 2044 +park 0 6 5.164786 0.000000 2218 +increment 0 6 5.164786 0.000000 2206 +creation 0 6 5.164786 0.000000 2069 +vortex 1 5 5.347108 5.347108 2362 +spinproject 0 5 5.347108 0.000000 2570 +unnecessari 0 5 5.347108 0.000000 2506 +lesson 0 5 5.347108 0.000000 2568 +western 0 4 5.568345 0.000000 3062 +usedto 0 4 5.568345 0.000000 2643 +inlin 0 4 5.568345 0.000000 2964 +enjoy 0 4 5.568345 0.000000 2937 +insur 0 4 5.568345 0.000000 2939 +coverag 0 4 5.568345 0.000000 2656 +nearbi 0 3 5.857933 0.000000 3291 +langaug 0 3 5.857933 0.000000 3661 +stillmaintain 0 3 5.857933 0.000000 3964 +ofobject 0 3 5.857933 0.000000 3399 +forobject 0 3 5.857933 0.000000 3965 +kick 0 3 5.857933 0.000000 3962 +habit 0 3 5.857933 0.000000 3777 +somedai 0 3 5.857933 0.000000 3919 +fantast 0 3 5.857933 0.000000 3966 +flight 1 2 6.263398 6.263398 5911 +bought 0 2 6.263398 0.000000 5165 +projectsi 0 2 6.263398 0.000000 5931 +andto 0 2 6.263398 0.000000 5771 +vortexcompil 0 2 6.263398 0.000000 5932 +interfacesand 0 2 6.263398 0.000000 5206 +andhow 0 2 6.263398 0.000000 5933 +intraprocedur 0 2 6.263398 0.000000 5934 +coke 0 2 6.263398 0.000000 5935 +caffein 0 2 6.263398 0.000000 5936 +galvin 0 2 6.263398 0.000000 4160 +fly 0 2 6.263398 0.000000 5937 +anymor 0 2 6.263398 0.000000 5938 +downtown 0 2 6.263398 0.000000 5642 +wing 0 2 6.263398 0.000000 4864 +lengthi 0 2 6.263398 0.000000 4273 +jdean 0 2 6.263398 0.000000 4455 +biplan 1 1 6.957497 6.957497 15588 +dang 0 1 6.957497 0.000000 15589 +weren 0 1 6.957497 0.000000 15590 +plansi 0 1 6.957497 0.000000 15591 +sunni 0 1 6.957497 0.000000 15592 +menlo 0 1 6.957497 0.000000 15593 +avehicl 0 1 6.957497 0.000000 15594 +weintend 0 1 6.957497 0.000000 15595 +codein 0 1 6.957497 0.000000 15596 +systemmicrokernel 0 1 6.957497 0.000000 15597 +especiallyprofil 0 1 6.957497 0.000000 15598 +howwhol 0 1 6.957497 0.000000 15599 +assumedthat 0 1 6.957497 0.000000 15600 +manycompromis 0 1 6.957497 0.000000 15601 +wholeprogram 0 1 6.957497 0.000000 15602 +underlyingimplement 0 1 6.957497 0.000000 15603 +principaldesign 0 1 6.957497 0.000000 15604 +independentintermedi 0 1 6.957497 0.000000 15605 +ishigh 0 1 6.957497 0.000000 15606 +messagesend 0 1 6.957497 0.000000 15607 +wayof 0 1 6.957497 0.000000 15608 +repeatedli 0 1 6.957497 0.000000 15609 +passessepar 0 1 6.957497 0.000000 15610 +classanalysi 0 1 6.957497 0.000000 15611 +aliasanalysi 0 1 6.957497 0.000000 15612 +structuringoptim 0 1 6.957497 0.000000 15613 +stillallow 0 1 6.957497 0.000000 15614 +eachoth 0 1 6.957497 0.000000 15615 +flowanalys 0 1 6.957497 0.000000 15616 +withrel 0 1 6.957497 0.000000 15617 +assignmentelimin 0 1 6.957497 0.000000 15618 +publicationssom 0 1 6.957497 0.000000 15619 +personali 0 1 6.957497 0.000000 15620 +spici 0 1 6.957497 0.000000 15621 +mild 0 1 6.957497 0.000000 15622 +heidi 0 1 6.957497 0.000000 15623 +victoria 0 1 6.957497 0.000000 15624 +honeymoon 0 1 6.957497 0.000000 15625 +kauai 0 1 6.957497 0.000000 15626 +hurrican 0 1 6.957497 0.000000 15627 +iniki 0 1 6.957497 0.000000 15628 +puget 0 1 6.957497 0.000000 15629 +dare 0 1 6.957497 0.000000 15630 +sadli 0 1 6.957497 0.000000 15631 +passeng 0 1 6.957497 0.000000 15632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..8eeab4af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +softwar 2 220 1.386294 2.772588 30 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +design 0 213 1.386294 0.000000 25 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +phone 0 175 1.791759 0.000000 45 +process 1 142 1.945910 1.945910 72 +problem 0 147 1.945910 0.000000 75 +analysi 2 124 2.079442 4.158884 98 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +confer 0 126 2.079442 0.000000 100 +document 0 121 2.079442 0.000000 89 +high 0 130 2.079442 0.000000 101 +specif 1 106 2.197225 2.197225 106 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +structur 0 106 2.197225 0.000000 105 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +techniqu 0 99 2.302585 0.000000 138 +commun 0 95 2.397895 0.000000 157 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +requir 2 81 2.484907 4.969814 167 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +academ 0 82 2.484907 0.000000 178 +school 0 84 2.484907 0.000000 188 +state 1 76 2.564949 2.564949 207 +exampl 1 77 2.564949 2.564949 195 +come 0 78 2.564949 0.000000 202 +dynam 0 76 2.564949 0.000000 194 +name 0 72 2.639057 0.000000 220 +html 0 75 2.639057 0.000000 235 +write 0 72 2.639057 0.000000 222 +workshop 0 71 2.639057 0.000000 239 +integr 0 67 2.708050 0.000000 245 +complex 0 64 2.772589 0.000000 269 +improv 0 62 2.772589 0.000000 289 +septemb 0 65 2.772589 0.000000 274 +januari 0 62 2.772589 0.000000 264 +experi 0 64 2.772589 0.000000 283 +best 0 59 2.833213 0.000000 299 +reason 0 57 2.890372 0.000000 318 +thesi 0 57 2.890372 0.000000 327 +three 0 54 2.944439 0.000000 330 +februari 0 54 2.944439 0.000000 328 +hardwar 0 51 2.995732 0.000000 350 +possibl 0 47 3.091042 0.000000 378 +california 0 46 3.091042 0.000000 388 +discuss 0 45 3.135494 0.000000 399 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +expect 0 37 3.332205 0.000000 484 +especi 1 36 3.367296 3.367296 496 +procedur 1 36 3.367296 3.367296 488 +articl 0 33 3.433987 0.000000 530 +dissert 1 32 3.465736 3.465736 549 +concept 0 32 3.465736 0.000000 537 +rang 0 30 3.555348 0.000000 565 +specifi 0 30 3.555348 0.000000 568 +semant 1 29 3.583519 3.583519 587 +becom 0 28 3.610918 0.000000 603 +great 0 27 3.637586 0.000000 626 +doctor 1 24 3.761200 3.761200 709 +interpret 0 24 3.761200 0.000000 686 +displai 0 23 3.806662 0.000000 712 +flexibl 0 21 3.912023 0.000000 792 +safeti 1 20 3.951244 3.951244 817 +histori 0 19 4.007333 0.000000 853 +less 0 18 4.060443 0.000000 892 +behavior 0 18 4.060443 0.000000 881 +concentr 0 18 4.060443 0.000000 906 +engineeringunivers 0 17 4.110874 0.000000 959 +steven 0 17 4.110874 0.000000 953 +critic 1 16 4.174387 4.174387 982 +advantag 1 16 4.174387 4.174387 987 +chateau 0 16 4.174387 0.000000 997 +devic 0 16 4.174387 0.000000 1002 +convent 0 14 4.317488 0.000000 1072 +draft 0 14 4.317488 0.000000 1085 +washingtonbox 0 13 4.382027 0.000000 1200 +difficulti 0 13 4.382027 0.000000 1132 +signific 0 13 4.382027 0.000000 1125 +nanci 0 12 4.465908 0.000000 1256 +island 0 11 4.553877 0.000000 1345 +valid 0 11 4.553877 0.000000 1299 +summar 0 11 4.553877 0.000000 1295 +alpha 0 11 4.553877 0.000000 1348 +rice 0 11 4.553877 0.000000 1336 +itali 0 11 4.553877 0.000000 1378 +success 0 10 4.653960 0.000000 1390 +kurt 1 9 4.753590 4.753590 1548 +leveson 1 9 4.753590 4.753590 1540 +respect 0 9 4.753590 0.000000 1545 +linguist 0 9 4.753590 0.000000 1593 +guggenheim 0 8 4.875197 0.000000 1759 +fail 0 8 4.875197 0.000000 1655 +perhap 0 8 4.875197 0.000000 1693 +mile 0 8 4.875197 0.000000 1743 +sean 0 8 4.875197 0.000000 1705 +irvin 0 8 4.875197 0.000000 1660 +curv 0 8 4.875197 0.000000 1656 +awar 0 7 5.010635 0.000000 1800 +henc 0 7 5.010635 0.000000 1805 +sixth 0 7 5.010635 0.000000 1917 +price 0 6 5.164786 0.000000 1999 +emerg 0 6 5.164786 0.000000 2038 +transcript 0 6 5.164786 0.000000 2067 +variant 0 6 5.164786 0.000000 2043 +annex 0 5 5.347108 0.000000 2572 +caus 0 5 5.347108 0.000000 2298 +stage 0 5 5.347108 0.000000 2488 +colleagu 0 5 5.347108 0.000000 2304 +ortega 0 5 5.347108 0.000000 2559 +expens 0 4 5.568345 0.000000 2678 +avion 0 4 5.568345 0.000000 3018 +invent 0 4 5.568345 0.000000 3028 +sandi 0 4 5.568345 0.000000 2765 +rsml 1 3 5.857933 5.857933 3967 +hazard 1 3 5.857933 5.857933 3191 +partridg 1 3 5.857933 5.857933 3346 +diagnos 0 3 5.857933 0.000000 3968 +borrow 0 3 5.857933 0.000000 3725 +publicli 0 3 5.857933 0.000000 3687 +diagnost 0 3 5.857933 0.000000 3833 +deviat 2 2 6.263398 12.526796 4826 +rees 1 2 6.263398 6.263398 5939 +heimdahl 1 2 6.263398 6.263398 5940 +unpredict 0 2 6.263398 0.000000 5722 +incid 0 2 6.263398 0.000000 5870 +tca 0 2 6.263398 0.000000 5941 +mat 0 2 6.263398 0.000000 5942 +holli 0 2 6.263398 0.000000 5601 +damon 1 1 6.957497 6.957497 15633 +jdrees 1 1 6.957497 6.957497 15634 +hazop 1 1 6.957497 6.957497 15635 +waxahachi 1 1 6.957497 6.957497 15636 +hildreth 1 1 6.957497 6.957497 15637 +pagejon 0 1 6.957497 0.000000 15638 +reesepost 0 1 6.957497 0.000000 15639 +groupdepart 0 1 6.957497 0.000000 15640 +catastroph 0 1 6.957497 0.000000 15641 +wider 0 1 6.957497 0.000000 15642 +siang 0 1 6.957497 0.000000 15643 +dolin 0 1 6.957497 0.000000 15644 +statechart 0 1 6.957497 0.000000 15645 +como 0 1 6.957497 0.000000 15646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..9b6863e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +architectur 1 139 1.945910 1.945910 77 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +compil 2 122 2.079442 4.158884 96 +postscript 1 131 2.079442 2.079442 90 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +level 1 87 2.484907 2.484907 180 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +dynam 1 76 2.564949 2.564949 194 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +abstract 1 62 2.772589 2.772589 276 +written 1 63 2.772589 2.772589 278 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +juli 0 60 2.833213 0.000000 305 +room 0 59 2.833213 0.000000 301 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +california 0 46 3.091042 0.000000 388 +examin 1 42 3.218876 3.218876 424 +submit 0 39 3.258097 0.000000 440 +annual 0 40 3.258097 0.000000 458 +static 1 27 3.637586 3.637586 619 +compar 0 26 3.688879 0.000000 648 +thread 0 23 3.806662 0.000000 722 +exploit 0 20 3.951244 0.000000 836 +increas 0 20 3.951244 0.000000 829 +stanford 0 17 4.110874 0.000000 955 +coupl 0 17 4.110874 0.000000 939 +choic 0 16 4.174387 0.000000 979 +susan 1 15 4.248495 4.248495 1050 +levi 1 14 4.317488 4.317488 1093 +balanc 1 14 4.317488 4.317488 1112 +dean 0 14 4.317488 0.000000 1104 +convert 0 13 4.382027 0.000000 1122 +sigplan 0 13 4.382027 0.000000 1190 +philadelphia 0 12 4.465908 0.000000 1244 +multithread 1 11 4.553877 4.553877 1315 +henri 1 10 4.653960 4.653960 1417 +franklin 0 10 4.653960 0.000000 1436 +jack 2 8 4.875197 9.750394 1780 +egger 1 8 4.875197 4.875197 1695 +joel 1 8 4.875197 4.875197 1698 +qualifi 0 8 4.875197 0.000000 1721 +simultan 1 6 5.164786 5.164786 2155 +tullsen 1 6 5.164786 5.164786 2081 +rebecca 1 6 5.164786 5.164786 2174 +superscalar 1 6 5.164786 5.164786 2082 +fetch 0 5 5.347108 0.000000 2567 +jolla 0 4 5.568345 0.000000 2988 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +vliw 0 3 5.857933 0.000000 3514 +lojlo 0 2 6.263398 0.000000 5943 +suif 0 2 6.263398 0.000000 5944 +anddean 1 1 6.957497 6.957497 15647 +lojack 0 1 6.957497 0.000000 15648 +loph 0 1 6.957497 0.000000 15649 +eseattl 0 1 6.957497 0.000000 15650 +orsieg 0 1 6.957497 0.000000 15651 +paintbal 0 1 6.957497 0.000000 15652 +yahoojlo 0 1 6.957497 0.000000 15653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..ccb80e01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +topic 0 114 2.197225 0.000000 110 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +activ 0 84 2.484907 0.000000 182 +interfac 1 79 2.564949 2.564949 209 +want 0 79 2.564949 0.000000 199 +automat 0 61 2.833213 0.000000 306 +browser 0 56 2.890372 0.000000 313 +local 0 55 2.944439 0.000000 334 +suggest 0 53 2.944439 0.000000 331 +visual 0 48 3.044522 0.000000 372 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +survei 0 35 3.401197 0.000000 513 +navig 0 21 3.912023 0.000000 796 +engineeringunivers 0 17 4.110874 0.000000 959 +washingtonbox 0 13 4.382027 0.000000 1200 +impress 0 6 5.164786 0.000000 2096 +sherman 0 1 6.957497 0.000000 15654 +shermanjoebob 0 1 6.957497 0.000000 15655 +usami 0 1 6.957497 0.000000 15656 +designinform 0 1 6.957497 0.000000 15657 +useclass 0 1 6.957497 0.000000 15658 +hcreat 0 1 6.957497 0.000000 15659 +pagequ 0 1 6.957497 0.000000 15660 +sarahsoftballstuff 0 1 6.957497 0.000000 15661 +pagesif 0 1 6.957497 0.000000 15662 +tojoebob 0 1 6.957497 0.000000 15663 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..3a5cd118 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +washington 0 236 1.386294 0.000000 32 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +year 0 148 1.945910 0.000000 84 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +present 0 91 2.397895 0.000000 145 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +test 0 66 2.708050 0.000000 252 +abstract 0 62 2.772589 0.000000 276 +organ 0 65 2.772589 0.000000 265 +colleg 0 61 2.833213 0.000000 300 +sever 1 56 2.890372 2.890372 322 +discuss 0 45 3.135494 0.000000 399 +futur 0 41 3.218876 0.000000 427 +expect 0 37 3.332205 0.000000 484 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +neural 0 30 3.555348 0.000000 578 +travel 0 30 3.555348 0.000000 579 +task 0 25 3.737670 0.000000 678 +demonstr 0 24 3.761200 0.000000 694 +theunivers 0 21 3.912023 0.000000 797 +thought 0 17 4.110874 0.000000 945 +cognit 0 16 4.174387 0.000000 986 +achiev 0 14 4.317488 0.000000 1088 +context 0 13 4.382027 0.000000 1153 +accomplish 0 8 4.875197 0.000000 1755 +potenti 0 8 4.875197 0.000000 1690 +creativ 0 8 4.875197 0.000000 1777 +successfulli 0 7 5.010635 0.000000 1869 +earn 0 7 5.010635 0.000000 1788 +biolog 0 6 5.164786 0.000000 2147 +slate 0 6 5.164786 0.000000 2021 +addition 0 4 5.568345 0.000000 2593 +joshua 1 3 5.857933 5.857933 3333 +blank 0 3 5.857933 0.000000 3379 +emul 0 3 5.857933 0.000000 3944 +josh 1 2 6.263398 6.263398 5945 +overviewof 0 2 6.263398 0.000000 5469 +seim 1 1 6.957497 6.957497 15664 +begunin 0 1 6.957497 0.000000 15665 +lockean 0 1 6.957497 0.000000 15666 +observedbehavior 0 1 6.957497 0.000000 15667 +graduatingfrom 0 1 6.957497 0.000000 15668 +volit 0 1 6.957497 0.000000 15669 +taskw 0 1 6.957497 0.000000 15670 +ambulatori 0 1 6.957497 0.000000 15671 +academichierarchi 0 1 6.957497 0.000000 15672 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..6ea08ba6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +move 0 47 3.091042 0.000000 382 +jovan 1 2 6.263398 6.263398 5842 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..5de51ef2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +recent 0 167 1.791759 0.000000 58 +site 0 106 2.197225 0.000000 119 +graphic 1 90 2.397895 2.397895 147 +imag 0 91 2.397895 0.000000 161 +proceed 0 93 2.397895 0.000000 152 +real 0 93 2.397895 0.000000 144 +academ 1 82 2.484907 2.484907 178 +school 0 84 2.484907 0.000000 188 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +david 0 71 2.639057 0.000000 232 +main 1 67 2.708050 2.708050 256 +major 0 56 2.890372 0.000000 315 +york 0 41 3.218876 0.000000 435 +especi 0 36 3.367296 0.000000 496 +power 0 30 3.555348 0.000000 573 +color 0 22 3.850148 0.000000 762 +grad 0 20 3.951244 0.000000 837 +eric 0 19 4.007333 0.000000 870 +women 0 16 4.174387 0.000000 1004 +biologi 1 15 4.248495 4.248495 1049 +comic 0 14 4.317488 0.000000 1103 +jonathan 0 13 4.382027 0.000000 1174 +brad 0 12 4.465908 0.000000 1264 +interestsmi 0 10 4.653960 0.000000 1462 +genet 0 10 4.653960 0.000000 1409 +gain 0 8 4.875197 0.000000 1730 +siggraph 0 8 4.875197 0.000000 1773 +sean 0 8 4.875197 0.000000 1705 +molecular 0 7 5.010635 0.000000 1887 +cat 0 6 5.164786 0.000000 2194 +salesin 0 4 5.568345 0.000000 3051 +alma 1 3 5.857933 5.857933 3963 +joanna 1 2 6.263398 6.263398 4503 +reproduc 0 2 6.263398 0.000000 5519 +powerjoanna 0 1 6.957497 0.000000 15673 +pagehi 0 1 6.957497 0.000000 15674 +uwneat 0 1 6.957497 0.000000 15675 +matercool 0 1 6.957497 0.000000 15676 +shadegraph 0 1 6.957497 0.000000 15677 +uwduoton 0 1 6.957497 0.000000 15678 +reproductionmi 0 1 6.957497 0.000000 15679 +matermost 0 1 6.957497 0.000000 15680 +employmentpubl 0 1 6.957497 0.000000 15681 +stollnitz 0 1 6.957497 0.000000 15682 +duoton 0 1 6.957497 0.000000 15683 +lifepast 0 1 6.957497 0.000000 15684 +homesdiversionsgend 0 1 6.957497 0.000000 15685 +issuesstatu 0 1 6.957497 0.000000 15686 +sciencenow 0 1 6.957497 0.000000 15687 +pagefeminist 0 1 6.957497 0.000000 15688 +onlineultim 0 1 6.957497 0.000000 15689 +frisbeefun 0 1 6.957497 0.000000 15690 +stufffroggi 0 1 6.957497 0.000000 15691 +quotesbrad 0 1 6.957497 0.000000 15692 +musicevan 0 1 6.957497 0.000000 15693 +jokes 0 1 6.957497 0.000000 15694 +pagesmi 0 1 6.957497 0.000000 15695 +herojpow 0 1 6.957497 0.000000 15696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..2138089a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +updat 0 191 1.609438 0.000000 41 +hall 0 146 1.945910 0.000000 65 +seattl 0 120 2.079442 0.000000 103 +homepag 0 93 2.397895 0.000000 148 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +jonathan 1 13 4.382027 4.382027 1174 +ahoi 0 3 5.857933 0.000000 3532 +shake 1 2 6.263398 6.263398 5898 +finderresumlinkslast 0 1 6.957497 0.000000 15697 +jshake 0 1 6.957497 0.000000 15698 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..cf4951ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +juan 0 9 4.753590 0.000000 1580 +alemanyjuan 0 1 6.957497 0.000000 15699 +alemani 0 1 6.957497 0.000000 15700 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..8d71fadf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +class 1 199 1.609438 1.609438 37 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +year 1 148 1.945910 1.945910 84 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +note 0 142 1.945910 0.000000 67 +hall 0 146 1.945910 0.000000 65 +report 1 131 2.079442 2.079442 92 +seattl 1 120 2.079442 2.079442 103 +find 1 111 2.197225 2.197225 111 +mathemat 0 108 2.197225 0.000000 123 +topic 0 114 2.197225 0.000000 110 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +sieg 0 69 2.708050 0.000000 260 +function 0 62 2.772589 0.000000 275 +understand 0 47 3.091042 0.000000 384 +third 0 43 3.178054 0.000000 412 +vision 1 41 3.218876 3.218876 430 +combin 0 42 3.218876 0.000000 421 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +slide 1 38 3.295837 3.295837 467 +microsoft 0 38 3.295837 0.000000 468 +statist 0 35 3.401197 0.000000 521 +taught 1 33 3.433987 3.433987 526 +obtain 0 33 3.433987 0.000000 534 +rang 0 30 3.555348 0.000000 565 +actual 0 28 3.610918 0.000000 604 +pass 0 28 3.610918 0.000000 611 +aspect 0 25 3.737670 0.000000 663 +ofwashington 0 22 3.850148 0.000000 766 +try 0 22 3.850148 0.000000 764 +geometri 0 22 3.850148 0.000000 752 +left 0 19 4.007333 0.000000 851 +steven 0 17 4.110874 0.000000 953 +qual 0 15 4.248495 0.000000 1062 +universityof 0 15 4.248495 0.000000 1061 +reflect 0 15 4.248495 0.000000 1034 +remov 0 12 4.465908 0.000000 1225 +werner 0 10 4.653960 0.000000 1385 +linda 0 10 4.653960 0.000000 1394 +tanimoto 0 10 4.653960 0.000000 1429 +surfac 1 9 4.753590 4.753590 1574 +folk 0 9 4.753590 0.000000 1597 +siggraph 0 8 4.875197 0.000000 1773 +theclass 1 6 5.164786 5.164786 2060 +speaker 0 5 5.347108 0.000000 2370 +engineeringdepart 0 4 5.568345 0.000000 2917 +closest 0 4 5.568345 0.000000 2828 +addition 0 4 5.568345 0.000000 2593 +rick 0 4 5.568345 0.000000 2646 +wavelet 0 4 5.568345 0.000000 2874 +union 0 4 5.568345 0.000000 2634 +kari 1 2 6.263398 6.263398 4500 +andmathemat 0 2 6.263398 0.000000 4948 +tonyderos 0 2 6.263398 0.000000 5839 +stuetzl 0 2 6.263398 0.000000 5840 +duchamp 0 2 6.263398 0.000000 5841 +hopp 0 2 6.263398 0.000000 5092 +sketch 0 2 6.263398 0.000000 5946 +getto 0 2 6.263398 0.000000 5806 +herear 0 2 6.263398 0.000000 5947 +pulli 1 1 6.957497 6.957497 15701 +antero 1 1 6.957497 6.957497 15702 +subdivis 1 1 6.957497 6.957497 15703 +pagekari 0 1 6.957497 0.000000 15704 +pullii 0 1 6.957497 0.000000 15705 +thesedisciplin 0 1 6.957497 0.000000 15706 +uwfor 0 1 6.957497 0.000000 15707 +pixar 0 1 6.957497 0.000000 15708 +lindashapiro 0 1 6.957497 0.000000 15709 +andjohn 0 1 6.957497 0.000000 15710 +mcdonald 0 1 6.957497 0.000000 15711 +andhugu 0 1 6.957497 0.000000 15712 +szeliski 0 1 6.957497 0.000000 15713 +tribor 0 1 6.957497 0.000000 15714 +triplet 0 1 6.957497 0.000000 15715 +recognitionsystem 0 1 6.957497 0.000000 15716 +surfacereconstruct 0 1 6.957497 0.000000 15717 +baselin 0 1 6.957497 0.000000 15718 +camerasystem 0 1 6.957497 0.000000 15719 +waveletanalysi 0 1 6.957497 0.000000 15720 +rigidregistr 0 1 6.957497 0.000000 15721 +architecturesystem 0 1 6.957497 0.000000 15722 +susanegg 0 1 6.957497 0.000000 15723 +brianbershad 0 1 6.957497 0.000000 15724 +eacutesum 0 1 6.957497 0.000000 15725 +eacut 0 1 6.957497 0.000000 15726 +kapu 0 1 6.957497 0.000000 15727 +takavainionti 0 1 6.957497 0.000000 15728 +oulu 0 1 6.957497 0.000000 15729 +finland 0 1 6.957497 0.000000 15730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..b8ca82be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +professor 0 137 1.945910 0.000000 76 +seattl 0 120 2.079442 0.000000 103 +anna 0 12 4.465908 0.000000 1292 +karlinanna 0 1 6.957497 0.000000 15731 +rochel 0 1 6.957497 0.000000 15732 +karlinassoci 0 1 6.957497 0.000000 15733 +sincejuli 0 1 6.957497 0.000000 15734 +paperskarlin 0 1 6.957497 0.000000 15735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..e0ff9bc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +email 0 220 1.386294 0.000000 29 +back 0 60 2.833213 0.000000 297 +yeunghom 0 1 6.957497 0.000000 15736 +yeungperson 0 1 6.957497 0.000000 15737 +infomi 0 1 6.957497 0.000000 15738 +picturemi 0 1 6.957497 0.000000 15739 +researchtelnet 0 1 6.957497 0.000000 15740 +machinessend 0 1 6.957497 0.000000 15741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..2b423711 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +area 0 144 1.945910 0.000000 80 +postscript 1 131 2.079442 2.079442 90 +confer 0 126 2.079442 0.000000 100 +seattl 0 120 2.079442 0.000000 103 +specif 1 106 2.197225 2.197225 106 +user 0 104 2.302585 0.000000 137 +environ 1 84 2.484907 2.484907 177 +academ 0 82 2.484907 0.000000 178 +start 0 83 2.484907 0.000000 173 +school 0 84 2.484907 0.000000 188 +method 0 80 2.564949 0.000000 213 +interfac 0 79 2.564949 0.000000 209 +complet 0 77 2.564949 0.000000 208 +html 0 75 2.639057 0.000000 235 +name 0 72 2.639057 0.000000 220 +java 0 70 2.708050 0.000000 248 +interact 1 62 2.772589 2.772589 270 +visual 0 48 3.044522 0.000000 372 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +describ 0 45 3.135494 0.000000 400 +live 0 40 3.258097 0.000000 451 +formal 0 37 3.332205 0.000000 478 +human 0 32 3.465736 0.000000 546 +manipul 0 27 3.637586 0.000000 624 +berkelei 0 26 3.688879 0.000000 657 +background 0 25 3.737670 0.000000 664 +other 0 24 3.761200 0.000000 697 +love 0 21 3.912023 0.000000 804 +voic 0 21 3.912023 0.000000 806 +safeti 1 20 3.951244 3.951244 817 +binari 0 20 3.951244 0.000000 823 +qualiti 0 20 3.951244 0.000000 832 +critic 0 16 4.174387 0.000000 982 +nasa 0 13 4.382027 0.000000 1188 +readabl 0 12 4.465908 0.000000 1258 +nanci 0 12 4.465908 0.000000 1256 +kurt 1 9 4.753590 4.753590 1548 +leveson 0 9 4.753590 0.000000 1540 +sister 0 9 4.753590 0.000000 1524 +wayn 0 8 4.875197 0.000000 1738 +poster 1 7 5.010635 5.010635 1814 +usabl 0 7 5.010635 0.000000 1810 +corner 0 7 5.010635 0.000000 1909 +vivek 0 6 5.164786 0.000000 2210 +parent 0 6 5.164786 0.000000 2204 +ohlrich 0 5 5.347108 0.000000 2564 +humor 0 5 5.347108 0.000000 2533 +partridg 1 3 5.857933 5.857933 3346 +dabbl 0 3 5.857933 0.000000 3971 +preview 0 3 5.857933 0.000000 3306 +bauer 0 2 6.263398 0.000000 5117 +mat 0 2 6.263398 0.000000 5942 +heimdahl 0 2 6.263398 0.000000 5940 +ratan 0 2 6.263398 0.000000 5948 +rees 0 2 6.263398 0.000000 5939 +thousand 0 2 6.263398 0.000000 5949 +oak 0 2 6.263398 0.000000 5566 +kepart 0 2 6.263398 0.000000 4459 +bddtcl 0 1 6.957497 0.000000 15742 +decisiondiagram 0 1 6.957497 0.000000 15743 +suburban 0 1 6.957497 0.000000 15744 +oti 0 1 6.957497 0.000000 15745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..59940977 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +list 0 201 1.609438 0.000000 39 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +stuff 0 87 2.484907 0.000000 171 +complet 0 77 2.564949 0.000000 208 +advisor 0 51 2.995732 0.000000 355 +friend 0 48 3.044522 0.000000 376 +natur 0 44 3.135494 0.000000 406 +vita 0 38 3.295837 0.000000 473 +collabor 0 32 3.465736 0.000000 543 +suit 0 13 4.382027 0.000000 1129 +tour 0 11 4.553877 0.000000 1307 +ofcomput 0 10 4.653960 0.000000 1442 +weld 0 9 4.753590 0.000000 1538 +dictionari 0 8 4.875197 0.000000 1642 +golden 1 7 5.010635 5.010635 1962 +photographi 0 6 5.164786 0.000000 2146 +oren 0 6 5.164786 0.000000 2134 +etzioni 0 6 5.164786 0.000000 2135 +keith 1 5 5.347108 5.347108 2528 +paint 0 5 5.347108 0.000000 2400 +coffe 0 5 5.347108 0.000000 2556 +lawyer 0 4 5.568345 0.000000 2836 +car 0 4 5.568345 0.000000 2931 +bicycl 0 2 6.263398 0.000000 5950 +questa 0 1 6.957497 0.000000 15746 +pagina 0 1 6.957497 0.000000 15747 +anch 0 1 6.957497 0.000000 15748 +italiano 0 1 6.957497 0.000000 15749 +researchsoftbotsplanningkrselect 0 1 6.957497 0.000000 15750 +publicationscurriculum 0 1 6.957497 0.000000 15751 +inpostscriptrandom 0 1 6.957497 0.000000 15752 +hackingwordbot 0 1 6.957497 0.000000 15753 +godless 0 1 6.957497 0.000000 15754 +pinko 0 1 6.957497 0.000000 15755 +dislik 0 1 6.957497 0.000000 15756 +ellenmarcruben 0 1 6.957497 0.000000 15757 +laurennickrich 0 1 6.957497 0.000000 15758 +joannavivek 0 1 6.957497 0.000000 15759 +keithgolden 0 1 6.957497 0.000000 15760 +kgolden 0 1 6.957497 0.000000 15761 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..ca41b9ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,144 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +softwar 2 220 1.386294 2.772588 30 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +schedul 0 119 2.079442 0.000000 85 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +technolog 0 131 2.079442 0.000000 102 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +manag 0 114 2.197225 0.000000 125 +version 0 113 2.197225 0.000000 122 +intern 0 108 2.197225 0.000000 128 +site 0 106 2.197225 0.000000 119 +world 0 115 2.197225 0.000000 126 +technic 1 100 2.302585 2.302585 140 +text 0 98 2.302585 0.000000 133 +book 0 99 2.302585 0.000000 131 +proceed 0 93 2.397895 0.000000 152 +center 0 88 2.397895 0.000000 158 +sinc 0 90 2.397895 0.000000 159 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +wide 0 84 2.484907 0.000000 185 +school 0 84 2.484907 0.000000 188 +resum 1 79 2.564949 2.564949 217 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +server 0 76 2.564949 0.000000 204 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +workshop 1 71 2.639057 2.639057 239 +onlin 1 75 2.639057 2.639057 223 +servic 0 72 2.639057 0.000000 236 +polici 0 64 2.772589 0.000000 279 +new 0 64 2.772589 0.000000 262 +visit 0 63 2.772589 0.000000 288 +content 0 59 2.833213 0.000000 302 +automat 0 61 2.833213 0.000000 306 +march 0 61 2.833213 0.000000 295 +suggest 0 53 2.944439 0.000000 331 +tabl 0 51 2.995732 0.000000 346 +advisor 0 51 2.995732 0.000000 355 +date 0 51 2.995732 0.000000 344 +format 0 48 3.044522 0.000000 356 +editor 0 41 3.218876 0.000000 433 +movi 1 40 3.258097 3.258097 459 +respons 1 37 3.332205 3.332205 476 +china 0 37 3.332205 0.000000 487 +manual 0 35 3.401197 0.000000 504 +transform 1 32 3.465736 3.465736 542 +dissert 0 32 3.465736 0.000000 549 +specifi 0 30 3.555348 0.000000 568 +quot 1 29 3.583519 3.583519 582 +chines 0 29 3.583519 0.000000 595 +campu 0 27 3.637586 0.000000 623 +revis 0 26 3.688879 0.000000 640 +experiment 0 26 3.688879 0.000000 645 +alwai 0 24 3.761200 0.000000 691 +william 0 22 3.850148 0.000000 765 +wang 0 21 3.912023 0.000000 790 +watch 0 21 3.912023 0.000000 789 +fund 0 21 3.912023 0.000000 805 +qualiti 0 20 3.951244 0.000000 832 +minut 0 20 3.951244 0.000000 810 +citi 0 19 4.007333 0.000000 874 +thoma 0 18 4.060443 0.000000 901 +stock 0 16 4.174387 0.000000 1007 +driven 0 15 4.248495 0.000000 1048 +style 0 15 4.248495 0.000000 1036 +hong 2 14 4.317488 8.634976 1105 +asynchron 1 12 4.465908 4.465908 1229 +readi 0 12 4.465908 0.000000 1242 +evolut 1 11 4.553877 4.553877 1314 +market 0 11 4.553877 0.000000 1361 +kong 1 9 4.753590 4.753590 1602 +mainten 0 9 4.753590 0.000000 1543 +upcom 0 8 4.875197 0.000000 1685 +bridg 0 8 4.875197 0.000000 1764 +pacif 0 8 4.875197 0.000000 1674 +delai 0 7 5.010635 0.000000 1848 +highwai 0 6 5.164786 0.000000 2095 +invest 0 6 5.164786 0.000000 2153 +educomput 0 5 5.347108 0.000000 2524 +semi 0 5 5.347108 0.000000 2510 +these 0 5 5.347108 0.000000 2482 +mutual 0 5 5.347108 0.000000 2418 +commod 0 5 5.347108 0.000000 2415 +ics 0 4 5.568345 0.000000 2779 +chart 0 4 5.568345 0.000000 2653 +chow 1 3 5.857933 5.857933 3281 +notkin 1 3 5.857933 5.857933 3345 +polytechn 0 3 5.857933 0.000000 3222 +usathi 0 2 6.263398 0.000000 5951 +glossari 0 2 6.263398 0.000000 4418 +asia 0 2 6.263398 0.000000 5952 +alumnu 0 2 6.263398 0.000000 5863 +kingsum 2 1 6.957497 13.914994 15762 +pcct 1 1 6.957497 6.957497 15763 +feedbackresearchmi 0 1 6.957497 0.000000 15764 +toolspap 0 1 6.957497 0.000000 15765 +icsm 0 1 6.957497 0.000000 15766 +griswold 0 1 6.957497 0.000000 15767 +sorcererpcct 0 1 6.957497 0.000000 15768 +terrenc 0 1 6.957497 0.000000 15769 +parr 0 1 6.957497 0.000000 15770 +newbiesresumepleasedrop 0 1 6.957497 0.000000 15771 +mailto 0 1 6.957497 0.000000 15772 +kongchines 0 1 6.957497 0.000000 15773 +kongsingapor 0 1 6.957497 0.000000 15774 +sitessingapor 0 1 6.957497 0.000000 15775 +websom 0 1 6.957497 0.000000 15776 +friendstom 0 1 6.957497 0.000000 15777 +liew 0 1 6.957497 0.000000 15778 +fook 0 1 6.957497 0.000000 15779 +jiang 0 1 6.957497 0.000000 15780 +weidongu 0 1 6.957497 0.000000 15781 +relatedunivers 0 1 6.957497 0.000000 15782 +webserv 0 1 6.957497 0.000000 15783 +storeinvestmentsfre 0 1 6.957497 0.000000 15784 +analysismisc 0 1 6.957497 0.000000 15785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..2d9ac688 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +parallel 0 169 1.791759 0.000000 60 +like 0 132 1.945910 0.000000 81 +professor 0 137 1.945910 0.000000 76 +seattl 1 120 2.079442 2.079442 103 +high 0 130 2.079442 0.000000 101 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +assist 0 112 2.197225 0.000000 113 +teach 0 108 2.197225 0.000000 112 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +search 0 95 2.397895 0.000000 155 +real 0 93 2.397895 0.000000 144 +build 0 85 2.484907 0.000000 184 +want 0 79 2.564949 0.000000 199 +previou 0 62 2.772589 0.000000 290 +written 0 63 2.772589 0.000000 278 +visit 0 63 2.772589 0.000000 288 +profession 0 51 2.995732 0.000000 345 +case 0 51 2.995732 0.000000 351 +life 0 50 3.044522 0.000000 375 +archiv 0 49 3.044522 0.000000 364 +get 0 46 3.091042 0.000000 380 +adapt 0 46 3.091042 0.000000 387 +anoth 0 45 3.135494 0.000000 408 +form 0 39 3.258097 0.000000 443 +electr 1 38 3.295837 3.295837 461 +ofth 0 36 3.367296 0.000000 491 +photo 0 31 3.496508 0.000000 561 +rather 0 26 3.688879 0.000000 642 +task 0 25 3.737670 0.000000 678 +ofwashington 0 22 3.850148 0.000000 766 +rout 1 21 3.912023 3.912023 793 +spend 0 19 4.007333 0.000000 850 +speed 0 18 4.060443 0.000000 911 +minim 0 18 4.060443 0.000000 887 +took 0 16 4.174387 0.000000 1010 +doesn 0 15 4.248495 0.000000 1055 +signific 0 13 4.382027 0.000000 1125 +kevin 1 9 4.753590 4.753590 1482 +suitabl 0 9 4.753590 0.000000 1486 +pacif 1 8 4.875197 4.875197 1674 +root 0 8 4.875197 0.000000 1650 +brain 0 8 4.875197 0.000000 1638 +router 0 8 4.875197 0.000000 1772 +spot 0 7 5.010635 0.000000 1894 +explain 0 7 5.010635 0.000000 1816 +multicomput 0 7 5.010635 0.000000 1890 +rock 0 6 5.164786 0.000000 2164 +chaotic 1 5 5.347108 5.347108 2566 +coral 0 5 5.347108 0.000000 2538 +engineeringat 0 5 5.347108 0.000000 2561 +wander 0 4 5.568345 0.000000 2896 +chaoticrout 0 4 5.568345 0.000000 3063 +bold 0 3 5.857933 0.000000 3846 +tenur 0 3 5.857933 0.000000 3801 +researchassoci 0 3 5.857933 0.000000 3664 +nervou 0 2 6.263398 0.000000 5953 +conscious 0 2 6.263398 0.000000 5954 +boldingkwb 0 1 6.957497 0.000000 15786 +juvenil 0 1 6.957497 0.000000 15787 +squirt 0 1 6.957497 0.000000 15788 +hunk 0 1 6.957497 0.000000 15789 +cling 0 1 6.957497 0.000000 15790 +rudimentari 0 1 6.957497 0.000000 15791 +eat 0 1 6.957497 0.000000 15792 +dennett 0 1 6.957497 0.000000 15793 +latencylan 0 1 6.957497 0.000000 15794 +researchha 0 1 6.957497 0.000000 15795 +formass 0 1 6.957497 0.000000 15796 +comethyakutak 0 1 6.957497 0.000000 15797 +moustach 0 1 6.957497 0.000000 15798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..f30cf648 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +structur 0 106 2.197225 0.000000 105 +commun 0 95 2.397895 0.000000 157 +sieg 0 69 2.708050 0.000000 260 +room 0 59 2.833213 0.000000 301 +quarter 1 47 3.091042 3.091042 389 +formal 0 37 3.332205 0.000000 478 +short 0 36 3.367296 0.000000 499 +winter 0 36 3.367296 0.000000 500 +richard 0 31 3.496508 0.000000 559 +ladner 1 6 5.164786 5.164786 2062 +ladnerrichard 0 1 6.957497 0.000000 15799 +ladnerprofessor 0 1 6.957497 0.000000 15800 +biographyresearch 0 1 6.957497 0.000000 15801 +studentsteachingcomput 0 1 6.957497 0.000000 15802 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..6bb41ca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +develop 1 174 1.791759 1.791759 53 +recent 0 167 1.791759 0.000000 58 +area 1 144 1.945910 1.945910 80 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +technic 0 100 2.302585 0.000000 140 +sinc 0 90 2.397895 0.000000 159 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +integr 1 67 2.708050 2.708050 245 +sieg 0 69 2.708050 0.000000 260 +test 0 66 2.708050 0.000000 252 +laboratori 1 63 2.772589 2.772589 292 +function 1 62 2.772589 2.772589 275 +evalu 0 64 2.772589 0.000000 266 +creat 0 63 2.772589 0.000000 277 +room 0 59 2.833213 0.000000 301 +allow 0 53 2.944439 0.000000 333 +small 0 39 3.258097 0.000000 447 +join 0 39 3.258097 0.000000 457 +purpos 0 37 3.332205 0.000000 481 +cost 0 37 3.332205 0.000000 480 +staff 1 36 3.367296 3.367296 490 +ofth 0 36 3.367296 0.000000 491 +return 0 34 3.401197 0.000000 502 +board 0 33 3.433987 0.000000 528 +articl 0 33 3.433987 0.000000 530 +focu 0 30 3.555348 0.000000 571 +packag 0 28 3.610918 0.000000 614 +symbol 0 27 3.637586 0.000000 620 +primari 0 25 3.737670 0.000000 669 +ofwashington 0 22 3.850148 0.000000 766 +director 0 22 3.850148 0.000000 767 +chip 1 21 3.912023 3.912023 770 +voic 0 21 3.912023 0.000000 806 +vlsi 0 21 3.912023 0.000000 795 +supervis 0 20 3.951244 0.000000 840 +spars 0 16 4.174387 0.000000 989 +later 0 15 4.248495 0.000000 1043 +driven 0 15 4.248495 0.000000 1048 +larri 1 13 4.382027 4.382027 1142 +calcul 0 12 4.465908 0.000000 1268 +captur 0 12 4.465908 0.000000 1232 +fpga 1 10 4.653960 4.653960 1433 +matric 0 10 4.653960 0.000000 1399 +router 0 8 4.875197 0.000000 1772 +upcom 0 8 4.875197 0.000000 1685 +northwest 0 7 5.010635 0.000000 1973 +densiti 0 7 5.010635 0.000000 1927 +quantum 0 6 5.164786 0.000000 2214 +chemistri 1 5 5.347108 5.347108 2405 +mcmurchi 1 4 5.568345 5.568345 2757 +western 0 4 5.568345 0.000000 3062 +comprehens 0 4 5.568345 0.000000 2745 +andengin 0 4 5.568345 0.000000 3042 +coauthor 0 4 5.568345 0.000000 3064 +tester 0 4 5.568345 0.000000 2754 +triptych 0 4 5.568345 0.000000 3061 +mactest 1 3 5.857933 5.857933 3972 +ofintegr 0 2 6.263398 0.000000 5324 +gaussian 0 2 6.263398 0.000000 4763 +molecul 0 2 6.263398 0.000000 5246 +representationof 0 2 6.263398 0.000000 4119 +andha 0 2 6.263398 0.000000 5955 +mcmurchiedepart 0 1 6.957497 0.000000 15803 +integratedsystem 0 1 6.957497 0.000000 15804 +hework 0 1 6.957497 0.000000 15805 +theconstruct 0 1 6.957497 0.000000 15806 +hamiltonian 0 1 6.957497 0.000000 15807 +coauthorof 0 1 6.957497 0.000000 15808 +meld 0 1 6.957497 0.000000 15809 +abinitio 0 1 6.957497 0.000000 15810 +wirec 0 1 6.957497 0.000000 15811 +aschemat 0 1 6.957497 0.000000 15812 +withschemat 0 1 6.957497 0.000000 15813 +concis 0 1 6.957497 0.000000 15814 +parameteriz 0 1 6.957497 0.000000 15815 +andcommerci 0 1 6.957497 0.000000 15816 +hardwareenviron 0 1 6.957497 0.000000 15817 +andsubsystem 0 1 6.957497 0.000000 15818 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..d2c40a6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 0 293 1.098612 0.000000 17 +softwar 2 220 1.386294 2.772588 30 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +read 0 154 1.791759 0.000000 47 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +model 1 145 1.945910 1.945910 69 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +hall 0 146 1.945910 0.000000 65 +problem 0 147 1.945910 0.000000 75 +like 0 132 1.945910 0.000000 81 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +pleas 0 113 2.197225 0.000000 114 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +real 1 93 2.397895 2.397895 144 +search 0 95 2.397895 0.000000 155 +question 0 91 2.397895 0.000000 141 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +start 0 83 2.484907 0.000000 173 +learn 0 86 2.484907 0.000000 170 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +interfac 1 79 2.564949 2.564949 209 +come 0 78 2.564949 0.000000 202 +state 0 76 2.564949 0.000000 207 +issu 0 78 2.564949 0.000000 211 +nation 1 74 2.639057 2.639057 240 +appli 1 71 2.639057 2.639057 226 +involv 0 71 2.639057 0.000000 227 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +result 0 65 2.772589 0.000000 281 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +copi 0 63 2.772589 0.000000 284 +interact 0 62 2.772589 0.000000 270 +space 1 57 2.890372 2.890372 310 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +publish 0 57 2.890372 0.000000 326 +finger 1 52 2.995732 2.995732 354 +life 1 50 3.044522 3.044522 375 +principl 0 48 3.044522 0.000000 357 +california 1 46 3.091042 3.091042 388 +favorit 0 44 3.135494 0.000000 410 +autom 0 41 3.218876 0.000000 434 +editor 0 41 3.218876 0.000000 433 +join 0 39 3.258097 0.000000 457 +form 0 39 3.258097 0.000000 443 +late 0 40 3.258097 0.000000 439 +transact 0 39 3.258097 0.000000 438 +field 0 37 3.332205 0.000000 482 +tree 0 36 3.367296 0.000000 492 +committe 0 34 3.401197 0.000000 522 +award 0 34 3.401197 0.000000 523 +toler 0 33 3.433987 0.000000 533 +board 0 33 3.433987 0.000000 528 +human 1 32 3.465736 3.465736 546 +fault 1 32 3.465736 3.465736 547 +express 0 32 3.465736 0.000000 540 +titl 0 31 3.496508 0.000000 556 +specifi 1 30 3.555348 3.555348 568 +produc 0 30 3.555348 0.000000 572 +chair 0 29 3.583519 0.000000 596 +except 0 28 3.610918 0.000000 607 +actual 0 28 3.610918 0.000000 604 +though 0 27 3.637586 0.000000 622 +determin 0 27 3.637586 0.000000 630 +spent 0 25 3.737670 0.000000 676 +concern 0 25 3.737670 0.000000 666 +never 0 25 3.737670 0.000000 671 +wai 0 25 3.737670 0.000000 662 +fellow 0 24 3.761200 0.000000 701 +properti 0 22 3.850148 0.000000 749 +director 0 22 3.850148 0.000000 767 +avoid 1 21 3.912023 3.912023 799 +fact 0 21 3.912023 0.000000 780 +safeti 2 20 3.951244 7.902488 817 +verif 0 20 3.951244 0.000000 826 +citi 0 19 4.007333 0.000000 874 +failur 0 18 4.060443 0.000000 898 +seem 0 18 4.060443 0.000000 899 +behavior 0 18 4.060443 0.000000 881 +engineeringunivers 0 17 4.110874 0.000000 959 +analyz 0 17 4.110874 0.000000 925 +anyth 1 16 4.174387 4.174387 998 +advantag 0 16 4.174387 0.000000 987 +commerci 0 16 4.174387 0.000000 1005 +weslei 0 16 4.174387 0.000000 983 +anywai 0 15 4.248495 0.000000 1047 +contribut 0 15 4.248495 0.000000 1021 +qual 0 15 4.248495 0.000000 1062 +style 0 15 4.248495 0.000000 1036 +train 0 14 4.317488 0.000000 1066 +deriv 1 13 4.382027 4.382027 1145 +washingtonbox 0 13 4.382027 0.000000 1200 +conf 0 13 4.382027 0.000000 1181 +nanci 1 12 4.465908 4.465908 1256 +safe 1 12 4.465908 4.465908 1274 +addison 0 12 4.465908 0.000000 1230 +council 1 11 4.553877 4.553877 1364 +valid 0 11 4.553877 0.000000 1299 +leveson 2 9 4.753590 9.507180 1540 +mode 0 9 4.753590 0.000000 1492 +irvin 0 8 4.875197 0.000000 1660 +matter 0 8 4.875197 0.000000 1627 +claim 0 8 4.875197 0.000000 1664 +elect 0 8 4.875197 0.000000 1771 +analys 0 8 4.875197 0.000000 1666 +perhap 0 8 4.875197 0.000000 1693 +chief 0 7 5.010635 0.000000 1829 +awar 0 7 5.010635 0.000000 1800 +rain 0 6 5.164786 0.000000 2137 +highwai 0 6 5.164786 0.000000 2095 +softwareengin 0 6 5.164786 0.000000 2162 +ucla 0 5 5.347108 0.000000 2502 +lesson 0 5 5.347108 0.000000 2568 +adopt 0 5 5.347108 0.000000 2467 +aircraft 1 4 5.568345 5.568345 2872 +melbourn 0 4 5.568345 0.000000 3035 +rsml 1 3 5.857933 5.857933 3967 +loss 0 3 5.857933 0.000000 3805 +automobil 0 3 5.857933 0.000000 3709 +aerospac 0 3 5.857933 0.000000 3555 +hazard 0 3 5.857933 0.000000 3191 +tca 1 2 6.263398 6.263398 5941 +collis 1 2 6.263398 6.263398 5956 +nobodi 0 2 6.263398 0.000000 5474 +thatyou 0 2 6.263398 0.000000 4682 +computingresearch 0 2 6.263398 0.000000 5957 +shuttl 0 2 6.263398 0.000000 4787 +aiaa 0 2 6.263398 0.000000 5239 +aeronaut 0 2 6.263398 0.000000 5958 +andscienc 0 2 6.263398 0.000000 5796 +safewar 0 2 6.263398 0.000000 5959 +isalso 0 2 6.263398 0.000000 5640 +pressur 0 2 6.263398 0.000000 5960 +accid 0 2 6.263398 0.000000 5961 +airport 0 2 6.263398 0.000000 5962 +levesondepart 0 1 6.957497 0.000000 15819 +mathand 0 1 6.957497 0.000000 15820 +misanthrop 0 1 6.957497 0.000000 15821 +aform 0 1 6.957497 0.000000 15822 +airspac 0 1 6.957497 0.000000 15823 +theiroffici 0 1 6.957497 0.000000 15824 +safetyresearch 0 1 6.957497 0.000000 15825 +subtop 0 1 6.957497 0.000000 15826 +commissionon 0 1 6.957497 0.000000 15827 +levesoni 0 1 6.957497 0.000000 15828 +systemsaward 0 1 6.957497 0.000000 15829 +promotingrespons 0 1 6.957497 0.000000 15830 +propertyar 0 1 6.957497 0.000000 15831 +stake 0 1 6.957497 0.000000 15832 +keynoteaddress 0 1 6.957497 0.000000 15833 +steam 0 1 6.957497 0.000000 15834 +hazardanalysi 0 1 6.957497 0.000000 15835 +writtenin 0 1 6.957497 0.000000 15836 +newrequir 0 1 6.957497 0.000000 15837 +cockpit 0 1 6.957497 0.000000 15838 +problemsand 0 1 6.957497 0.000000 15839 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..836d34c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +data 0 170 1.791759 0.000000 49 +architectur 2 139 1.945910 3.891820 77 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +tool 0 117 2.079442 0.000000 93 +seattl 0 120 2.079442 0.000000 103 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +intern 1 108 2.197225 2.197225 128 +manag 0 114 2.197225 0.000000 125 +memori 1 101 2.302585 2.302585 139 +book 0 99 2.302585 0.000000 131 +octob 1 89 2.397895 2.397895 156 +select 0 91 2.397895 0.000000 154 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +environ 1 84 2.484907 2.484907 177 +novemb 1 81 2.484907 2.484907 179 +academ 0 82 2.484907 0.000000 178 +help 0 83 2.484907 0.000000 175 +control 0 82 2.484907 0.000000 164 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +master 0 76 2.564949 0.000000 216 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +symposium 1 72 2.639057 2.639057 238 +effici 0 73 2.639057 0.000000 233 +integr 0 67 2.708050 0.000000 245 +plai 0 60 2.833213 0.000000 307 +share 0 59 2.833213 0.000000 304 +space 1 57 2.890372 2.890372 310 +faculti 0 56 2.890372 0.000000 325 +special 0 56 2.890372 0.000000 320 +major 0 56 2.890372 0.000000 315 +sampl 0 53 2.944439 0.000000 339 +instruct 0 53 2.944439 0.000000 332 +processor 0 54 2.944439 0.000000 335 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +principl 1 48 3.044522 3.044522 357 +numer 0 49 3.044522 0.000000 369 +execut 0 45 3.135494 0.000000 404 +join 0 39 3.258097 0.000000 457 +author 0 39 3.258097 0.000000 450 +annual 0 40 3.258097 0.000000 458 +transact 0 39 3.258097 0.000000 438 +workstat 1 37 3.332205 3.332205 479 +michael 1 35 3.401197 3.401197 514 +singl 1 34 3.401197 3.401197 510 +global 1 34 3.401197 3.401197 520 +award 0 34 3.401197 0.000000 523 +posit 0 31 3.496508 0.000000 552 +focu 0 30 3.555348 0.000000 571 +produc 0 30 3.555348 0.000000 572 +rang 0 30 3.555348 0.000000 565 +chair 1 29 3.583519 3.583519 596 +held 0 28 3.610918 0.000000 600 +usual 0 28 3.610918 0.000000 608 +cluster 0 28 3.610918 0.000000 612 +except 0 28 3.610918 0.000000 607 +team 0 27 3.637586 0.000000 625 +proc 1 26 3.688879 3.688879 649 +consult 0 24 3.761200 0.000000 687 +fellow 0 24 3.761200 0.000000 701 +lab 0 24 3.761200 0.000000 698 +handl 0 24 3.761200 0.000000 685 +lead 0 23 3.806662 0.000000 718 +deal 0 22 3.850148 0.000000 736 +william 0 22 3.850148 0.000000 765 +corpor 0 21 3.912023 0.000000 802 +chip 0 21 3.912023 0.000000 770 +binari 0 20 3.951244 0.000000 823 +supervis 0 20 3.951244 0.000000 840 +tenni 0 20 3.951244 0.000000 838 +exploit 0 20 3.951244 0.000000 836 +particularli 0 19 4.007333 0.000000 867 +separ 0 19 4.007333 0.000000 844 +four 0 18 4.060443 0.000000 905 +asplo 1 17 4.110874 4.110874 948 +former 0 17 4.110874 0.000000 956 +protect 0 17 4.110874 0.000000 935 +latenc 0 16 4.174387 0.000000 993 +choic 0 16 4.174387 0.000000 979 +transfer 0 16 4.174387 0.000000 967 +susan 1 15 4.248495 4.248495 1050 +levi 2 14 4.317488 8.634976 1093 +dean 1 14 4.317488 4.317488 1104 +coher 0 14 4.317488 0.000000 1109 +karlin 1 13 4.382027 4.382027 1176 +conf 1 13 4.382027 4.382027 1181 +mellon 0 13 4.382027 0.000000 1179 +hank 1 12 4.465908 4.465908 1253 +carnegi 0 12 4.465908 0.000000 1260 +anna 0 12 4.465908 0.000000 1292 +multithread 1 11 4.553877 4.553877 1315 +thedepart 0 11 4.553877 0.000000 1350 +henri 1 10 4.653960 4.653960 1417 +equip 0 10 4.653960 0.000000 1459 +ski 0 10 4.653960 0.000000 1471 +bike 0 10 4.653960 0.000000 1468 +death 0 10 4.653960 0.000000 1457 +jeffrei 1 9 4.753590 4.753590 1612 +softbal 0 9 4.753590 0.000000 1594 +voelker 0 9 4.753590 0.000000 1557 +vernon 0 9 4.753590 0.000000 1556 +morgan 0 9 4.753590 0.000000 1484 +egger 1 8 4.875197 4.875197 1695 +sigop 0 8 4.875197 0.000000 1727 +hold 0 8 4.875197 0.000000 1645 +span 0 8 4.875197 0.000000 1751 +evan 0 8 4.875197 0.000000 1633 +inproceed 0 8 4.875197 0.000000 1670 +jack 0 8 4.875197 0.000000 1780 +feelei 1 7 5.010635 5.010635 1859 +instrument 0 7 5.010635 0.000000 1954 +smile 0 7 5.010635 0.000000 1807 +maxim 0 7 5.010635 0.000000 1944 +simultan 1 6 5.164786 5.164786 2155 +tullsen 1 6 5.164786 5.164786 2081 +outstand 0 6 5.164786 0.000000 2136 +onoper 0 6 5.164786 0.000000 2048 +tobe 0 6 5.164786 0.000000 1995 +scholar 0 6 5.164786 0.000000 2180 +nine 0 6 5.164786 0.000000 2047 +rebecca 0 6 5.164786 0.000000 2174 +vivek 0 6 5.164786 0.000000 2210 +theth 0 5 5.347108 0.000000 2325 +seventh 0 5 5.347108 0.000000 2464 +fetch 0 5 5.347108 0.000000 2567 +chase 1 4 5.568345 5.568345 2897 +lazowska 1 4 5.568345 5.568345 2694 +arch 1 4 5.568345 5.568345 2995 +prog 1 4 5.568345 5.568345 2740 +opal 0 4 5.568345 0.000000 3057 +fulbright 0 4 5.568345 0.000000 2963 +escap 0 4 5.568345 0.000000 3016 +pighin 0 4 5.568345 0.000000 2735 +narasayya 0 4 5.568345 0.000000 3065 +thekkath 1 3 5.857933 5.857933 3973 +recipi 0 3 5.857933 0.000000 3627 +eleven 0 3 5.857933 0.000000 3824 +freder 0 3 5.857933 0.000000 3352 +emer 0 3 5.857933 0.000000 3969 +stamm 0 3 5.857933 0.000000 3970 +dessert 1 2 6.263398 6.263398 5194 +projecti 0 2 6.263398 0.000000 5963 +befound 0 2 6.263398 0.000000 5964 +infam 0 2 6.263398 0.000000 5859 +subpag 0 2 6.263398 0.000000 5926 +jamrozik 0 2 6.263398 0.000000 5925 +chandramohan 0 2 6.263398 0.000000 5965 +projectcal 0 1 6.957497 0.000000 15840 +theetch 0 1 6.957497 0.000000 15841 +consecutiveacm 0 1 6.957497 0.000000 15842 +symposia 0 1 6.957497 0.000000 15843 +universityand 0 1 6.957497 0.000000 15844 +machineryand 0 1 6.957497 0.000000 15845 +survivedlevi 0 1 6.957497 0.000000 15846 +haveal 0 1 6.957497 0.000000 15847 +glu 0 1 6.957497 0.000000 15848 +potato 0 1 6.957497 0.000000 15849 +parlor 0 1 6.957497 0.000000 15850 +publicationsreduc 0 1 6.957497 0.000000 15851 +implementablesimultan 0 1 6.957497 0.000000 15852 +joen 0 1 6.957497 0.000000 15853 +edwardd 0 1 6.957497 0.000000 15854 +recover 0 1 6.957497 0.000000 15855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..73de461c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +paper 0 205 1.609438 0.000000 38 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +avail 0 169 1.791759 0.000000 48 +relat 1 139 1.945910 1.945910 68 +object 0 138 1.945910 0.000000 79 +perform 0 143 1.945910 0.000000 74 +provid 0 121 2.079442 0.000000 94 +structur 1 106 2.197225 2.197225 105 +code 0 108 2.197225 0.000000 116 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +larg 1 82 2.484907 2.484907 168 +chang 0 82 2.484907 0.000000 163 +member 0 84 2.484907 0.000000 165 +dynam 0 76 2.564949 0.000000 194 +sourc 0 77 2.564949 0.000000 201 +differ 0 66 2.708050 0.000000 253 +complex 1 64 2.772589 2.772589 269 +virtual 0 62 2.772589 0.000000 285 +result 0 65 2.772589 0.000000 281 +organ 0 65 2.772589 0.000000 265 +share 1 59 2.833213 2.833213 304 +space 1 57 2.890372 2.890372 310 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +much 0 52 2.995732 0.000000 349 +right 1 48 3.044522 3.044522 363 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +execut 0 45 3.135494 0.000000 404 +anoth 0 45 3.135494 0.000000 408 +prototyp 0 38 3.295837 0.000000 463 +ofth 0 36 3.367296 0.000000 491 +singl 1 34 3.401197 3.401197 510 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +storag 0 31 3.496508 0.000000 553 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +depend 0 29 3.583519 0.000000 583 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +manipul 0 27 3.637586 0.000000 624 +enhanc 0 26 3.688879 0.000000 644 +jeff 0 25 3.737670 0.000000 673 +interpret 0 24 3.761200 0.000000 686 +mike 0 24 3.761200 0.000000 703 +thread 0 23 3.806662 0.000000 722 +cooper 1 22 3.850148 3.850148 757 +defin 0 22 3.850148 0.000000 746 +thu 0 21 3.912023 0.000000 773 +flexibl 0 21 3.912023 0.000000 792 +protect 1 17 4.110874 4.110874 935 +permit 0 16 4.174387 0.000000 962 +easili 0 14 4.317488 0.000000 1077 +levi 0 14 4.317488 0.000000 1093 +directli 0 13 4.382027 0.000000 1141 +translat 0 13 4.382027 0.000000 1164 +believ 0 13 4.382027 0.000000 1187 +uniqu 0 12 4.465908 0.000000 1228 +hank 0 12 4.465908 0.000000 1253 +alpha 1 11 4.553877 4.553877 1348 +persist 0 11 4.553877 0.000000 1367 +trust 0 9 4.753590 0.000000 1583 +parti 0 8 4.875197 0.000000 1676 +mach 0 8 4.875197 0.000000 1669 +dylan 0 8 4.875197 0.000000 1625 +secondari 0 7 5.010635 0.000000 1884 +feelei 0 7 5.010635 0.000000 1859 +huge 0 6 5.164786 0.000000 1991 +bestor 0 6 5.164786 0.000000 2099 +risc 0 6 5.164786 0.000000 2016 +duke 0 6 5.164786 0.000000 2231 +vivek 0 6 5.164786 0.000000 2210 +tiwari 0 5 5.347108 0.000000 2385 +opal 2 4 5.568345 11.136690 3057 +simplifi 0 4 5.568345 0.000000 3066 +mip 0 4 5.568345 0.000000 2738 +transmit 0 4 5.568345 0.000000 2835 +lazowska 0 4 5.568345 0.000000 2694 +chase 0 4 5.568345 0.000000 2897 +narasayya 0 4 5.568345 0.000000 3065 +databaseof 0 2 6.263398 0.000000 4696 +ashutosh 0 2 6.263398 0.000000 5966 +mcname 0 2 6.263398 0.000000 5875 +projectop 0 1 6.957497 0.000000 15856 +tunedto 0 1 6.957497 0.000000 15857 +numberof 0 1 6.957497 0.000000 15858 +andcooper 0 1 6.957497 0.000000 15859 +directlycommun 0 1 6.957497 0.000000 15860 +addressspac 0 1 6.957497 0.000000 15861 +domainthat 0 1 6.957497 0.000000 15862 +oneprocess 0 1 6.957497 0.000000 15863 +protectionstructur 0 1 6.957497 0.000000 15864 +relationshipbetween 0 1 6.957497 0.000000 15865 +canimprov 0 1 6.957497 0.000000 15866 +cooperatingappl 0 1 6.957497 0.000000 15867 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..0dc2b3fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +implement 1 152 1.791759 1.791759 52 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +object 1 138 1.945910 1.945910 79 +hall 0 146 1.945910 0.000000 65 +year 0 148 1.945910 0.000000 84 +note 0 142 1.945910 0.000000 67 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +high 0 130 2.079442 0.000000 101 +topic 1 114 2.197225 2.197225 110 +well 0 109 2.197225 0.000000 121 +advanc 1 99 2.302585 2.302585 130 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +octob 0 89 2.397895 0.000000 156 +school 0 84 2.484907 0.000000 188 +orient 1 80 2.564949 2.564949 205 +issu 0 78 2.564949 0.000000 211 +meet 0 72 2.639057 0.000000 229 +sieg 0 69 2.708050 0.000000 260 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +virtual 0 62 2.772589 0.000000 285 +previou 0 62 2.772589 0.000000 290 +juli 0 60 2.833213 0.000000 305 +march 0 61 2.833213 0.000000 295 +direct 0 57 2.890372 0.000000 316 +cover 0 55 2.944439 0.000000 329 +archiv 0 49 3.044522 0.000000 364 +electron 1 47 3.091042 3.091042 379 +california 0 46 3.091042 0.000000 388 +editor 0 41 3.218876 0.000000 433 +past 0 42 3.218876 0.000000 428 +tutori 2 39 3.258097 6.516194 437 +submit 0 39 3.258097 0.000000 440 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +industri 0 38 3.295837 0.000000 464 +respons 0 37 3.332205 0.000000 476 +curriculum 0 33 3.433987 0.000000 535 +chair 1 29 3.583519 3.583519 596 +consid 0 29 3.583519 0.000000 590 +propos 1 28 3.610918 3.610918 602 +constraint 1 26 3.688879 3.688879 636 +request 0 26 3.688879 0.000000 635 +aspect 0 25 3.737670 0.000000 663 +seri 0 24 3.761200 0.000000 708 +known 0 24 3.761200 0.000000 702 +lead 0 23 3.806662 0.000000 718 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +born 1 21 3.912023 3.912023 798 +qualiti 0 20 3.951244 0.000000 832 +accept 1 18 4.060443 4.060443 879 +encourag 1 18 4.060443 4.060443 880 +anyon 0 17 4.110874 0.000000 916 +jose 0 16 4.174387 0.000000 976 +alan 1 13 4.382027 4.382027 1146 +speak 0 12 4.465908 0.000000 1283 +readi 0 12 4.465908 0.000000 1242 +submiss 1 11 4.553877 4.553877 1298 +itali 0 11 4.553877 0.000000 1378 +inproceed 1 8 4.875197 4.875197 1670 +european 0 8 4.875197 0.000000 1763 +upcom 0 8 4.875197 0.000000 1685 +depth 0 8 4.875197 0.000000 1636 +edg 0 8 4.875197 0.000000 1647 +star 0 8 4.875197 0.000000 1717 +portland 0 7 5.010635 0.000000 1878 +oopsla 1 6 5.164786 5.164786 2221 +oregon 0 5 5.347108 0.000000 2437 +imper 1 4 5.568345 5.568345 3067 +freeman 1 4 5.568345 5.568345 2725 +ident 0 4 5.568345 0.000000 2826 +andsoftwar 0 4 5.568345 0.000000 2753 +breadth 0 4 5.568345 0.000000 2695 +green 0 4 5.568345 0.000000 2848 +nato 0 3 5.857933 0.000000 3587 +kaleidoscop 1 2 6.263398 6.263398 5780 +bologna 0 2 6.263398 0.000000 5631 +programmingsystem 0 2 6.263398 0.000000 5688 +hotlin 0 2 6.263398 0.000000 5967 +hendrix 0 2 6.263398 0.000000 5490 +grave 0 2 6.263398 0.000000 5968 +war 0 2 6.263398 0.000000 5969 +collector 0 2 6.263398 0.000000 5683 +lopez 1 1 6.957497 6.957497 15868 +bjorn 1 1 6.957497 6.957497 15869 +benson 1 1 6.957497 6.957497 15870 +lopezgu 0 1 6.957497 0.000000 15871 +lopezlopez 0 1 6.957497 0.000000 15872 +dissertationresearch 0 1 6.957497 0.000000 15873 +publicationsgu 0 1 6.957497 0.000000 15874 +mayoh 0 1 6.957497 0.000000 15875 +tougu 0 1 6.957497 0.000000 15876 +jann 0 1 6.957497 0.000000 15877 +penjam 0 1 6.957497 0.000000 15878 +constraintprogram 0 1 6.957497 0.000000 15879 +instituteseri 0 1 6.957497 0.000000 15880 +publisheda 0 1 6.957497 0.000000 15881 +tutorialsi 0 1 6.957497 0.000000 15882 +conferencein 0 1 6.957497 0.000000 15883 +itsextens 0 1 6.957497 0.000000 15884 +tutorialshav 0 1 6.957497 0.000000 15885 +introductorysurvei 0 1 6.957497 0.000000 15886 +academicresearch 0 1 6.957497 0.000000 15887 +attende 0 1 6.957497 0.000000 15888 +weespeci 0 1 6.957497 0.000000 15889 +requestguidelin 0 1 6.957497 0.000000 15890 +theoopsla 0 1 6.957497 0.000000 15891 +enthusiast 0 1 6.957497 0.000000 15892 +proposalswithout 0 1 6.957497 0.000000 15893 +notif 0 1 6.957497 0.000000 15894 +withcamera 0 1 6.957497 0.000000 15895 +jimi 0 1 6.957497 0.000000 15896 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..ca25e1d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +year 0 148 1.945910 0.000000 84 +like 0 132 1.945910 0.000000 81 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +look 0 107 2.197225 0.000000 115 +graphic 0 90 2.397895 0.000000 147 +academ 0 82 2.484907 0.000000 178 +want 0 79 2.564949 0.000000 199 +best 0 59 2.833213 0.000000 299 +browser 0 56 2.890372 0.000000 313 +life 0 50 3.044522 0.000000 375 +keep 0 44 3.135494 0.000000 409 +citi 0 19 4.007333 0.000000 874 +chateau 0 16 4.174387 0.000000 997 +fourth 0 16 4.174387 0.000000 999 +countri 0 15 4.248495 0.000000 1059 +suit 0 13 4.382027 0.000000 1129 +touch 0 12 4.465908 0.000000 1288 +curiou 0 5 5.347108 0.000000 2541 +areasinclud 0 2 6.263398 0.000000 5747 +omid 1 1 6.957497 6.957497 15897 +madani 1 1 6.957497 6.957497 15898 +bhello 0 1 6.957497 0.000000 15899 +enjoytheori 0 1 6.957497 0.000000 15900 +islamicarchitectur 0 1 6.957497 0.000000 15901 +isfahan 0 1 6.957497 0.000000 15902 +nomine 0 1 6.957497 0.000000 15903 +iran 0 1 6.957497 0.000000 15904 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..6ded1052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +washington 0 236 1.386294 0.000000 32 +area 0 144 1.945910 0.000000 80 +written 0 63 2.772589 0.000000 278 +music 0 42 3.218876 0.000000 436 +mike 1 24 3.761200 3.761200 703 +goe 0 15 4.248495 0.000000 1044 +creativ 0 8 4.875197 0.000000 1777 +academia 0 6 5.164786 0.000000 2036 +perkowitz 1 2 6.263398 6.263398 5970 +perkowitznewsflash 0 1 6.957497 0.000000 15905 +blond 0 1 6.957497 0.000000 15906 +randomfavorit 0 1 6.957497 0.000000 15907 +sheba 0 1 6.957497 0.000000 15908 +voyeur 0 1 6.957497 0.000000 15909 +grooveneedl 0 1 6.957497 0.000000 15910 +espressoresumemik 0 1 6.957497 0.000000 15911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..a9b8b190 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +program 0 374 0.693147 0.000000 7 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +check 0 115 2.197225 0.000000 118 +pleas 0 113 2.197225 0.000000 114 +access 0 102 2.302585 0.000000 136 +follow 0 92 2.397895 0.000000 143 +resum 0 79 2.564949 0.000000 217 +sieg 0 69 2.708050 0.000000 260 +visit 0 63 2.772589 0.000000 288 +septemb 0 65 2.772589 0.000000 274 +back 0 60 2.833213 0.000000 297 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +http 0 41 3.218876 0.000000 420 +short 0 36 3.367296 0.000000 499 +spent 0 25 3.737670 0.000000 676 +finish 0 22 3.850148 0.000000 748 +theunivers 0 21 3.912023 0.000000 797 +germani 0 17 4.110874 0.000000 946 +marc 0 8 4.875197 0.000000 1680 +german 0 6 5.164786 0.000000 2190 +langheinrich 1 1 6.957497 6.957497 15912 +bielefeld 1 1 6.957497 6.957497 15913 +marclang 1 1 6.957497 6.957497 15914 +homepagemarc 1 1 6.957497 6.957497 15915 +langheinrichuniversitt 0 1 6.957497 0.000000 15916 +washingtontechnisch 0 1 6.957497 0.000000 15917 +fakultt 0 1 6.957497 0.000000 15918 +scienceemail 0 1 6.957497 0.000000 15919 +imlangh 0 1 6.957497 0.000000 15920 +techfak 0 1 6.957497 0.000000 15921 +eduabout 0 1 6.957497 0.000000 15922 +myselfi 0 1 6.957497 0.000000 15923 +thefulbright 0 1 6.957497 0.000000 15924 +depthinform 0 1 6.957497 0.000000 15925 +biopost 0 1 6.957497 0.000000 15926 +addressa 0 1 6.957497 0.000000 15927 +mastersat 0 1 6.957497 0.000000 15928 +homeschoolgermanyringstra 0 1 6.957497 0.000000 15929 +maintalphon 0 1 6.957497 0.000000 15930 +paulusplatz 0 1 6.957497 0.000000 15931 +bielefeldphon 0 1 6.957497 0.000000 15932 +woodlawn 0 1 6.957497 0.000000 15933 +formatmarc 0 1 6.957497 0.000000 15934 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..e0e590a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +applic 0 170 1.791759 0.000000 56 +object 1 138 1.945910 1.945910 79 +support 0 132 1.945910 0.000000 83 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +check 0 115 2.197225 0.000000 118 +user 1 104 2.302585 2.302585 137 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +graphic 0 90 2.397895 0.000000 147 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +imag 0 91 2.397895 0.000000 161 +educ 1 86 2.484907 2.484907 191 +journal 1 83 2.484907 2.484907 183 +larg 1 82 2.484907 2.484907 168 +academ 0 82 2.484907 0.000000 178 +learn 0 86 2.484907 0.000000 170 +activ 0 84 2.484907 0.000000 182 +ieee 0 86 2.484907 0.000000 190 +environ 0 84 2.484907 0.000000 177 +interfac 1 79 2.564949 2.564949 209 +resum 0 79 2.564949 0.000000 217 +june 0 79 2.564949 0.000000 214 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +order 0 69 2.708050 0.000000 249 +goal 0 66 2.708050 0.000000 250 +differ 0 66 2.708050 0.000000 253 +sieg 0 69 2.708050 0.000000 260 +interact 1 62 2.772589 2.772589 270 +septemb 0 65 2.772589 0.000000 274 +share 0 59 2.833213 0.000000 304 +march 0 61 2.833213 0.000000 295 +explor 0 58 2.890372 0.000000 324 +space 0 57 2.890372 0.000000 310 +special 0 56 2.890372 0.000000 320 +investig 0 51 2.995732 0.000000 353 +visual 2 48 3.044522 6.089044 372 +editor 0 41 3.218876 0.000000 433 +compani 0 41 3.218876 0.000000 423 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +tutori 0 39 3.258097 0.000000 437 +collabor 1 32 3.465736 3.465736 543 +human 0 32 3.465736 0.000000 546 +given 0 32 3.465736 0.000000 538 +transform 0 32 3.465736 0.000000 542 +scale 0 28 3.610918 0.000000 613 +manipul 0 27 3.637586 0.000000 624 +wai 1 25 3.737670 3.737670 662 +displai 0 23 3.806662 0.000000 712 +cooper 0 22 3.850148 0.000000 757 +geometr 0 19 4.007333 0.000000 852 +hierarch 0 15 4.248495 0.000000 1018 +stephen 1 11 4.553877 4.553877 1342 +fill 0 11 4.553877 0.000000 1349 +itali 0 11 4.553877 0.000000 1378 +packard 0 10 4.653960 0.000000 1444 +classif 0 9 4.753590 0.000000 1586 +partner 0 8 4.875197 0.000000 1648 +yang 0 8 4.875197 0.000000 1652 +baker 2 7 5.010635 10.021270 1812 +chief 0 7 5.010635 0.000000 1829 +bell 0 6 5.164786 0.000000 2224 +carlson 0 5 5.347108 0.000000 2351 +patent 0 5 5.347108 0.000000 2574 +bricker 0 4 5.568345 0.000000 3050 +assess 0 4 5.568345 0.000000 2724 +lauren 0 3 5.857933 0.000000 3251 +metip 0 3 5.857933 0.000000 3937 +marla 2 2 6.263398 12.526796 4510 +eick 1 2 6.263398 6.263398 5971 +burnett 1 2 6.263398 6.263398 4578 +crime 0 2 6.263398 0.000000 5972 +cscl 0 2 6.263398 0.000000 5837 +stevetanimoto 0 2 6.263398 0.000000 5835 +bentlei 0 1 6.957497 0.000000 15935 +interestsgraph 0 1 6.957497 0.000000 15936 +coimag 0 1 6.957497 0.000000 15937 +devleop 0 1 6.957497 0.000000 15938 +contol 0 1 6.957497 0.000000 15939 +cansimultan 0 1 6.957497 0.000000 15940 +publicationsbak 0 1 6.957497 0.000000 15941 +bohu 0 1 6.957497 0.000000 15942 +margaret 0 1 6.957497 0.000000 15943 +sorento 0 1 6.957497 0.000000 15944 +apparatu 0 1 6.957497 0.000000 15945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..49bc6683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +applic 0 170 1.791759 0.000000 56 +phone 0 175 1.791759 0.000000 45 +implement 0 152 1.791759 0.000000 52 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +architectur 0 139 1.945910 0.000000 77 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +confer 0 126 2.079442 0.000000 100 +code 1 108 2.197225 2.197225 116 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +stuff 1 87 2.484907 2.484907 171 +dynam 1 76 2.564949 2.564949 194 +optim 0 79 2.564949 0.000000 197 +good 0 77 2.564949 0.000000 200 +workshop 0 71 2.639057 0.000000 239 +java 0 70 2.708050 0.000000 248 +plai 0 60 2.833213 0.000000 307 +think 0 57 2.890372 0.000000 314 +browser 0 56 2.890372 0.000000 313 +processor 0 54 2.944439 0.000000 335 +extens 0 53 2.944439 0.000000 340 +februari 0 54 2.944439 0.000000 328 +local 0 55 2.944439 0.000000 334 +effect 0 46 3.091042 0.000000 385 +fast 0 42 3.218876 0.000000 429 +past 0 42 3.218876 0.000000 428 +produc 0 30 3.555348 0.000000 572 +constraint 0 26 3.688879 0.000000 636 +bookmark 0 26 3.688879 0.000000 639 +interpret 0 24 3.761200 0.000000 686 +runtim 1 19 4.007333 4.007333 858 +figur 0 18 4.060443 0.000000 903 +bershad 0 18 4.060443 0.000000 902 +event 0 18 4.060443 0.000000 896 +modern 0 16 4.174387 0.000000 966 +side 0 15 4.248495 0.000000 1022 +goe 0 15 4.248495 0.000000 1044 +susan 0 15 4.248495 0.000000 1050 +black 0 10 4.653960 0.000000 1418 +chamber 1 8 4.875197 4.875197 1692 +egger 1 8 4.875197 4.875197 1695 +wire 0 8 4.875197 0.000000 1747 +craig 0 7 5.010635 0.000000 1879 +dispatch 0 7 5.010635 0.000000 1791 +mock 0 6 5.164786 0.000000 2087 +blue 0 6 5.164786 0.000000 2227 +philipos 1 5 5.347108 5.347108 2373 +asystem 0 4 5.568345 0.000000 2612 +andp 0 4 5.568345 0.000000 2811 +pardyak 0 4 5.568345 0.000000 3043 +ausland 1 3 5.857933 5.857933 3917 +matthai 1 2 6.263398 6.263398 4514 +withprofessor 0 2 6.263398 0.000000 5180 +eggersand 0 2 6.263398 0.000000 4522 +ribbon 0 2 6.263398 0.000000 5973 +compileri 0 1 6.957497 0.000000 15946 +beast 0 1 6.957497 0.000000 15947 +shortterm 0 1 6.957497 0.000000 15948 +basedsystem 0 1 6.957497 0.000000 15949 +canbenefit 0 1 6.957497 0.000000 15950 +onprogram 0 1 6.957497 0.000000 15951 +automaticdynam 0 1 6.957497 0.000000 15952 +frequentlymiscellan 0 1 6.957497 0.000000 15953 +importancefrom 0 1 6.957497 0.000000 15954 +abuwhi 0 1 6.957497 0.000000 15955 +campaign 0 1 6.957497 0.000000 15956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..23413c30 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +student 0 343 1.098612 0.000000 19 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +list 0 201 1.609438 0.000000 39 +contact 1 153 1.791759 1.791759 59 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +phone 0 175 1.791759 0.000000 45 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +parallel 0 169 1.791759 0.000000 60 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +person 1 111 2.197225 2.197225 117 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +place 0 106 2.197225 0.000000 124 +user 0 104 2.302585 0.000000 137 +memori 0 101 2.302585 0.000000 139 +real 0 93 2.397895 0.000000 144 +call 0 91 2.397895 0.000000 153 +proceed 0 93 2.397895 0.000000 152 +pictur 0 89 2.397895 0.000000 160 +octob 0 89 2.397895 0.000000 156 +contain 0 81 2.484907 0.000000 174 +messag 1 76 2.564949 2.564949 212 +interfac 1 79 2.564949 2.564949 209 +exampl 0 77 2.564949 0.000000 195 +involv 0 71 2.639057 0.000000 227 +free 0 73 2.639057 0.000000 224 +workshop 0 71 2.639057 0.000000 239 +laboratori 0 63 2.772589 0.000000 292 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +index 0 56 2.890372 0.000000 309 +faculti 0 56 2.890372 0.000000 325 +summer 0 56 2.890372 0.000000 311 +advisor 0 51 2.995732 0.000000 355 +maintain 0 51 2.995732 0.000000 342 +adapt 0 46 3.091042 0.000000 387 +answer 0 45 3.135494 0.000000 391 +past 1 42 3.218876 3.218876 428 +futur 0 41 3.218876 0.000000 427 +live 0 40 3.258097 0.000000 451 +electr 0 38 3.295837 0.000000 461 +industri 0 38 3.295837 0.000000 464 +game 0 36 3.367296 0.000000 498 +dissert 0 32 3.465736 0.000000 549 +graph 0 30 3.555348 0.000000 576 +produc 0 30 3.555348 0.000000 572 +pass 1 28 3.610918 3.610918 611 +bookmark 0 26 3.688879 0.000000 639 +head 0 23 3.806662 0.000000 732 +rout 1 21 3.912023 3.912023 793 +chip 0 21 3.912023 0.000000 770 +hous 0 21 3.912023 0.000000 801 +fine 0 20 3.951244 0.000000 822 +render 0 17 4.110874 0.000000 947 +medic 0 17 4.110874 0.000000 958 +cambridg 0 16 4.174387 0.000000 1008 +carl 1 15 4.248495 4.248495 1024 +countri 0 15 4.248495 0.000000 1059 +princeton 0 15 4.248495 0.000000 1042 +floor 0 14 4.317488 0.000000 1070 +massachusett 0 14 4.317488 0.000000 1118 +larri 1 13 4.382027 4.382027 1142 +menu 0 13 4.382027 0.000000 1156 +canada 0 13 4.382027 0.000000 1158 +speak 0 12 4.465908 0.000000 1283 +volum 0 11 4.553877 0.000000 1347 +mesh 0 11 4.553877 0.000000 1351 +packet 0 10 4.653960 0.000000 1415 +label 0 10 4.653960 0.000000 1423 +purdu 0 10 4.653960 0.000000 1466 +coast 1 8 4.875197 4.875197 1746 +angel 1 8 4.875197 4.875197 1779 +mile 0 8 4.875197 0.000000 1743 +creativ 0 8 4.875197 0.000000 1777 +virginia 0 8 4.875197 0.000000 1659 +shot 1 7 5.010635 5.010635 1898 +marri 0 7 5.010635 0.000000 1946 +adob 0 7 5.010635 0.000000 1873 +layout 0 6 5.164786 0.000000 2183 +east 1 5 5.347108 5.347108 2472 +chaotic 1 5 5.347108 5.347108 2566 +snyder 0 5 5.347108 0.000000 2359 +remain 0 5 5.347108 0.000000 2278 +amus 0 5 5.347108 0.000000 2366 +neil 1 4 5.568345 5.568345 2841 +ebel 0 4 5.568345 0.000000 2756 +tester 0 4 5.568345 0.000000 2754 +mcmurchi 0 4 5.568345 0.000000 2757 +gregori 0 4 5.568345 0.000000 2928 +merl 1 3 5.857933 5.857933 3843 +dine 0 3 5.857933 0.000000 3472 +mitsubishi 0 3 5.857933 0.000000 3842 +mactest 0 3 5.857933 0.000000 3972 +fashion 0 3 5.857933 0.000000 3699 +neighborhood 0 3 5.857933 0.000000 3242 +jar 0 3 5.857933 0.000000 3223 +mckenzi 2 2 6.263398 12.526796 5974 +gemini 1 2 6.263398 6.263398 5975 +andwork 0 2 6.263398 0.000000 5403 +projectsi 0 2 6.263398 0.000000 5931 +isomorph 0 2 6.263398 0.000000 5976 +knowna 0 2 6.263398 0.000000 5480 +shirt 0 2 6.263398 0.000000 5977 +farm 0 2 6.263398 0.000000 4115 +broadwai 0 1 6.957497 0.000000 15957 +projectsgonna 0 1 6.957497 0.000000 15958 +teenag 0 1 6.957497 0.000000 15959 +lobotomi 0 1 6.957497 0.000000 15960 +ramonesi 0 1 6.957497 0.000000 15961 +projectconcern 0 1 6.957497 0.000000 15962 +copiou 0 1 6.957497 0.000000 15963 +expatri 0 1 6.957497 0.000000 15964 +onchaot 0 1 6.957497 0.000000 15965 +routingwith 0 1 6.957497 0.000000 15966 +torusnetwork 0 1 6.957497 0.000000 15967 +thecranium 0 1 6.957497 0.000000 15968 +compatiblewith 0 1 6.957497 0.000000 15969 +netlist 0 1 6.957497 0.000000 15970 +calledgemini 0 1 6.957497 0.000000 15971 +schemat 0 1 6.957497 0.000000 15972 +cranium 0 1 6.957497 0.000000 15973 +packetrout 0 1 6.957497 0.000000 15974 +andcommun 0 1 6.957497 0.000000 15975 +tomactest 0 1 6.957497 0.000000 15976 +arlington 0 1 6.957497 0.000000 15977 +livein 0 1 6.957497 0.000000 15978 +ofballard 0 1 6.957497 0.000000 15979 +artworkcr 0 1 6.957497 0.000000 15980 +photoshop 0 1 6.957497 0.000000 15981 +ownedthi 0 1 6.957497 0.000000 15982 +onlyth 0 1 6.957497 0.000000 15983 +correctlyguess 0 1 6.957497 0.000000 15984 +toriddl 0 1 6.957497 0.000000 15985 +jour 0 1 6.957497 0.000000 15986 +honei 0 1 6.957497 0.000000 15987 +myuncl 0 1 6.957497 0.000000 15988 +edmonton 0 1 6.957497 0.000000 15989 +alberta 0 1 6.957497 0.000000 15990 +linkschairman 0 1 6.957497 0.000000 15991 +linksnorm 0 1 6.957497 0.000000 15992 +halcyon 0 1 6.957497 0.000000 15993 +eugen 0 1 6.957497 0.000000 15994 +spafford 0 1 6.957497 0.000000 15995 +randi 0 1 6.957497 0.000000 15996 +pausch 0 1 6.957497 0.000000 15997 +wallach 0 1 6.957497 0.000000 15998 +scool 0 1 6.957497 0.000000 15999 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..7f1ac80a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +data 0 170 1.791759 0.000000 49 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +analysi 1 124 2.079442 2.079442 98 +seattl 0 120 2.079442 0.000000 103 +report 0 131 2.079442 0.000000 92 +specif 1 106 2.197225 2.197225 106 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +manag 0 114 2.197225 0.000000 125 +topic 0 114 2.197225 0.000000 110 +proceed 1 93 2.397895 2.397895 152 +ieee 1 86 2.484907 2.484907 190 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +decemb 0 80 2.564949 0.000000 215 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +servic 1 72 2.639057 2.639057 236 +symposium 0 72 2.639057 0.000000 238 +workshop 0 71 2.639057 0.000000 239 +receiv 0 66 2.708050 0.000000 244 +creat 1 63 2.772589 2.772589 277 +improv 0 62 2.772589 0.000000 289 +sever 0 56 2.890372 0.000000 322 +summer 0 56 2.890372 0.000000 311 +point 0 58 2.890372 0.000000 319 +extens 2 53 2.944439 5.888878 340 +processor 0 54 2.944439 0.000000 335 +allow 0 53 2.944439 0.000000 333 +run 1 51 2.995732 2.995732 347 +hardwar 0 51 2.995732 0.000000 350 +telephon 1 50 3.044522 3.044522 373 +principl 0 48 3.044522 0.000000 357 +adapt 0 46 3.091042 0.000000 387 +protocol 1 45 3.135494 3.135494 407 +describ 1 45 3.135494 3.135494 400 +mechan 0 43 3.178054 0.000000 416 +http 1 41 3.218876 3.218876 420 +transact 0 39 3.258097 0.000000 438 +winter 0 36 3.367296 0.000000 500 +toler 0 33 3.433987 0.000000 533 +fault 0 32 3.465736 0.000000 547 +posit 0 31 3.496508 0.000000 552 +rang 0 30 3.555348 0.000000 565 +graph 0 30 3.555348 0.000000 576 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +load 0 28 3.610918 0.000000 601 +compar 1 26 3.688879 3.688879 648 +request 1 26 3.688879 3.688879 635 +enabl 0 26 3.688879 0.000000 655 +spent 1 25 3.737670 3.737670 676 +primari 0 25 3.737670 0.000000 669 +demonstr 1 24 3.761200 3.761200 694 +lab 0 24 3.761200 0.000000 698 +flow 0 24 3.761200 0.000000 700 +deal 0 22 3.850148 0.000000 736 +corpor 0 21 3.912023 0.000000 802 +similar 0 21 3.912023 0.000000 771 +kernel 1 20 3.951244 3.951244 825 +safeti 0 20 3.951244 0.000000 817 +spend 0 19 4.007333 0.000000 850 +region 0 19 4.007333 0.000000 875 +protect 1 17 4.110874 4.110874 935 +germani 0 17 4.110874 0.000000 946 +anyon 0 17 4.110874 0.000000 916 +commerci 0 16 4.174387 0.000000 1005 +princeton 0 15 4.248495 0.000000 1042 +contribut 0 15 4.248495 0.000000 1021 +spin 1 14 4.317488 4.317488 1121 +achiev 1 14 4.317488 4.317488 1088 +near 0 14 4.317488 0.000000 1091 +happi 0 14 4.317488 0.000000 1079 +pretti 0 13 4.382027 0.000000 1191 +safe 1 12 4.465908 4.465908 1274 +usenix 0 12 4.465908 0.000000 1240 +abil 0 11 4.553877 0.000000 1341 +custom 0 10 4.653960 0.000000 1414 +elimin 0 9 4.753590 0.000000 1558 +marc 1 8 4.875197 4.875197 1680 +hack 0 7 5.010635 0.000000 1950 +fifth 0 7 5.010635 0.000000 1931 +bell 0 6 5.164786 0.000000 2224 +isth 0 5 5.347108 0.000000 2532 +sole 0 4 5.568345 0.000000 2592 +sell 0 4 5.568345 0.000000 2935 +fiuczynski 1 3 5.857933 5.857933 3390 +forappl 1 3 5.857933 5.857933 3929 +scratch 0 3 5.857933 0.000000 3140 +anin 0 3 5.857933 0.000000 3354 +fifteenth 0 3 5.857933 0.000000 3868 +linker 0 3 5.857933 0.000000 3157 +namespac 0 3 5.857933 0.000000 3957 +shortcom 1 2 6.263398 6.263398 5978 +backgroundi 0 2 6.263398 0.000000 5878 +highschool 0 2 6.263398 0.000000 5672 +ofproject 0 2 6.263398 0.000000 4446 +inord 0 2 6.263398 0.000000 4824 +linkabl 0 2 6.263398 0.000000 5979 +andcollect 0 2 6.263398 0.000000 4249 +contacthttp 1 1 6.957497 6.957497 16000 +grewup 0 1 6.957497 0.000000 16001 +sseldorf 0 1 6.957497 0.000000 16002 +fromrutg 0 1 6.957497 0.000000 16003 +mitr 0 1 6.957497 0.000000 16004 +proprietor 0 1 6.957497 0.000000 16005 +companythat 0 1 6.957497 0.000000 16006 +setof 0 1 6.957497 0.000000 16007 +chasi 0 1 6.957497 0.000000 16008 +univoic 0 1 6.957497 0.000000 16009 +cardsand 0 1 6.957497 0.000000 16010 +vxwork 0 1 6.957497 0.000000 16011 +compellingperform 0 1 6.957497 0.000000 16012 +tosimilar 0 1 6.957497 0.000000 16013 +anextens 0 1 6.957497 0.000000 16014 +betterperform 0 1 6.957497 0.000000 16015 +conventionaloper 0 1 6.957497 0.000000 16016 +technicalconfer 0 1 6.957497 0.000000 16017 +describeshow 0 1 6.957497 0.000000 16018 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..ebf6d461 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +last 0 314 1.098612 0.000000 14 +washington 0 236 1.386294 0.000000 32 +modifi 0 178 1.609438 0.000000 35 +algorithm 1 162 1.791759 1.791759 57 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +model 1 145 1.945910 1.945910 69 +perform 0 143 1.945910 0.000000 74 +machin 0 129 2.079442 0.000000 95 +real 0 93 2.397895 0.000000 144 +help 0 83 2.484907 0.000000 175 +requir 0 81 2.484907 0.000000 167 +method 0 80 2.564949 0.000000 213 +practic 0 70 2.708050 0.000000 246 +compar 0 26 3.688879 0.000000 648 +bound 0 26 3.688879 0.000000 659 +sort 0 22 3.850148 0.000000 738 +rout 1 21 3.912023 3.912023 793 +predict 0 19 4.007333 0.000000 855 +minim 1 18 4.060443 4.060443 887 +lower 0 18 4.060443 0.000000 886 +topolog 0 14 4.317488 0.000000 1089 +mesh 0 11 4.553877 0.000000 1351 +router 0 8 4.875197 0.000000 1772 +versu 0 6 5.164786 0.000000 2052 +upper 0 5 5.347108 0.000000 2481 +melani 0 2 6.263398 0.000000 5784 +deflect 1 1 6.957497 6.957497 16019 +fulgham 0 1 6.957497 0.000000 16020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..ed490309 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +year 0 148 1.945910 0.000000 84 +problem 0 147 1.945910 0.000000 75 +databas 0 122 2.079442 0.000000 86 +provid 0 121 2.079442 0.000000 94 +take 0 97 2.302585 0.000000 134 +imag 1 91 2.397895 2.397895 161 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +stuff 0 87 2.484907 0.000000 171 +exampl 0 77 2.564949 0.000000 195 +nation 0 74 2.639057 0.000000 240 +main 0 67 2.708050 0.000000 256 +guid 0 63 2.772589 0.000000 267 +plan 0 65 2.772589 0.000000 272 +visit 0 63 2.772589 0.000000 288 +done 0 47 3.091042 0.000000 381 +kind 0 32 3.465736 0.000000 541 +transform 0 32 3.465736 0.000000 542 +retriev 0 27 3.637586 0.000000 621 +try 0 22 3.850148 0.000000 764 +similar 0 21 3.912023 0.000000 771 +fact 0 21 3.912023 0.000000 780 +engineeringunivers 0 17 4.110874 0.000000 959 +commerci 0 16 4.174387 0.000000 1005 +english 0 15 4.248495 0.000000 1033 +trip 0 14 4.317488 0.000000 1113 +meng 1 12 4.465908 4.465908 1214 +newspap 0 12 4.465908 0.000000 1280 +scienceat 0 11 4.553877 0.000000 1375 +island 0 11 4.553877 0.000000 1345 +undergrad 0 9 4.753590 0.000000 1589 +charg 0 9 4.753590 0.000000 1582 +pennsylvania 0 7 5.010635 0.000000 1932 +interestsi 0 7 5.010635 0.000000 1969 +huge 0 6 5.164786 0.000000 1991 +singapor 1 5 5.347108 5.347108 2487 +snapshot 0 5 5.347108 0.000000 2303 +washingtonseattl 0 4 5.568345 0.000000 3044 +heng 0 2 6.263398 0.000000 5202 +strait 0 2 6.263398 0.000000 5980 +homepagemenghe 0 1 6.957497 0.000000 16021 +edubox 0 1 6.957497 0.000000 16022 +findimag 0 1 6.957497 0.000000 16023 +virag 0 1 6.957497 0.000000 16024 +andqbicar 0 1 6.957497 0.000000 16025 +singaporesingapor 0 1 6.957497 0.000000 16026 +infomap 0 1 6.957497 0.000000 16027 +andstatist 0 1 6.957497 0.000000 16028 +singaporeonlin 0 1 6.957497 0.000000 16029 +boardi 0 1 6.957497 0.000000 16030 +anintellig 0 1 6.957497 0.000000 16031 +menghe 0 1 6.957497 0.000000 16032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..2884cd2e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +includ 1 208 1.609438 1.609438 42 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +list 0 201 1.609438 0.000000 39 +parallel 0 169 1.791759 0.000000 60 +lectur 0 135 1.945910 0.000000 73 +area 0 144 1.945910 0.000000 80 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +code 0 108 2.197225 0.000000 116 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +technic 0 100 2.302585 0.000000 140 +real 0 93 2.397895 0.000000 144 +optim 0 79 2.564949 0.000000 197 +workshop 1 71 2.639057 2.639057 239 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +maintain 1 51 2.995732 2.995732 342 +frequent 0 49 3.044522 0.000000 367 +life 0 50 3.044522 0.000000 375 +microsoft 0 38 3.295837 0.000000 468 +game 0 36 3.367296 0.000000 498 +michael 1 35 3.401197 3.401197 514 +represent 0 35 3.401197 0.000000 512 +chair 0 29 3.583519 0.000000 596 +static 0 27 3.637586 0.000000 619 +properti 0 22 3.850148 0.000000 749 +particularli 0 19 4.007333 0.000000 867 +previous 0 17 4.110874 0.000000 923 +debug 0 17 4.110874 0.000000 944 +philosophi 0 13 4.382027 0.000000 1167 +carri 0 13 4.382027 0.000000 1152 +awai 0 10 4.653960 0.000000 1447 +intermedi 0 9 4.753590 0.000000 1497 +cryptographi 0 9 4.753590 0.000000 1512 +serial 0 7 5.010635 0.000000 1975 +intellectu 0 7 5.010635 0.000000 1847 +occasion 0 7 5.010635 0.000000 1905 +sciencedepart 0 6 5.164786 0.000000 2172 +slice 0 4 5.568345 0.000000 2622 +popl 0 4 5.568345 0.000000 3068 +denot 0 3 5.857933 0.000000 3147 +ernst 1 2 6.263398 6.263398 4525 +eec 0 2 6.263398 0.000000 5981 +pagemichael 0 1 6.957497 0.000000 16033 +ernsti 0 1 6.957497 0.000000 16034 +riceunivers 0 1 6.957497 0.000000 16035 +programanalysi 0 1 6.957497 0.000000 16036 +coloc 0 1 6.957497 0.000000 16037 +semanticsi 0 1 6.957497 0.000000 16038 +resourcesfor 0 1 6.957497 0.000000 16039 +slip 0 1 6.957497 0.000000 16040 +possibleinterest 0 1 6.957497 0.000000 16041 +mernst 0 1 6.957497 0.000000 16042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..18e2426a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +compil 1 122 2.079442 2.079442 96 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +check 0 115 2.197225 0.000000 118 +well 0 109 2.197225 0.000000 121 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +search 1 95 2.397895 2.397895 155 +sinc 0 90 2.397895 0.000000 159 +stuff 1 87 2.484907 2.484907 171 +journal 0 83 2.484907 0.000000 183 +come 1 78 2.564949 2.564949 202 +state 0 76 2.564949 0.000000 207 +orient 0 80 2.564949 0.000000 205 +dynam 0 76 2.564949 0.000000 194 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +main 0 67 2.708050 0.000000 256 +view 0 70 2.708050 0.000000 254 +still 0 50 3.044522 0.000000 362 +cool 0 49 3.044522 0.000000 374 +even 0 45 3.135494 0.000000 393 +live 1 40 3.258097 3.258097 451 +movi 0 40 3.258097 0.000000 459 +electr 0 38 3.295837 0.000000 461 +expect 0 37 3.332205 0.000000 484 +field 0 37 3.332205 0.000000 482 +obtain 0 33 3.433987 0.000000 534 +travel 0 30 3.555348 0.000000 579 +quot 0 29 3.583519 0.000000 582 +consid 0 29 3.583519 0.000000 590 +american 0 27 3.637586 0.000000 634 +arrai 0 27 3.637586 0.000000 627 +spent 0 25 3.737670 0.000000 676 +grad 0 20 3.951244 0.000000 837 +left 0 19 4.007333 0.000000 851 +els 0 19 4.007333 0.000000 843 +event 0 18 4.060443 0.000000 896 +squar 0 14 4.317488 0.000000 1082 +spin 0 14 4.317488 0.000000 1121 +danc 0 12 4.465908 0.000000 1278 +grow 0 12 4.465908 0.000000 1209 +metacrawl 0 10 4.653960 0.000000 1455 +grew 0 8 4.875197 0.000000 1742 +solomon 0 8 4.875197 0.000000 1716 +cultur 0 7 5.010635 0.000000 1951 +bit 0 7 5.010635 0.000000 1833 +wouldn 0 7 5.010635 0.000000 1970 +mock 1 6 5.164786 5.164786 2087 +whichi 0 6 5.164786 0.000000 2056 +oopsla 0 6 5.164786 0.000000 2221 +altavista 0 6 5.164786 0.000000 2222 +neither 0 6 5.164786 0.000000 1990 +matthew 0 6 5.164786 0.000000 2193 +chess 1 5 5.347108 5.347108 2486 +upper 0 5 5.347108 0.000000 2481 +volunt 0 5 5.347108 0.000000 2307 +lili 0 5 5.347108 0.000000 2240 +fulbright 0 4 5.568345 0.000000 2963 +spanish 0 4 5.568345 0.000000 3017 +marku 1 3 5.857933 5.857933 3872 +district 0 3 5.857933 0.000000 3756 +karlsruh 0 3 5.857933 0.000000 3689 +latin 0 3 5.857933 0.000000 3741 +deutsch 0 3 5.857933 0.000000 3802 +diplom 0 2 6.263398 0.000000 5982 +umass 0 2 6.263398 0.000000 5899 +grante 0 2 6.263398 0.000000 4914 +labyrinth 0 2 6.263398 0.000000 5983 +mainstream 0 2 6.263398 0.000000 5437 +salsa 0 2 6.263398 0.000000 5984 +colloquia 0 2 6.263398 0.000000 4710 +mossi 0 2 6.263398 0.000000 5801 +rttemberg 0 1 6.957497 0.000000 16043 +anotherpart 0 1 6.957497 0.000000 16044 +biberach 0 1 6.957497 0.000000 16045 +swabia 0 1 6.957497 0.000000 16046 +oberschwaben 0 1 6.957497 0.000000 16047 +solitud 0 1 6.957497 0.000000 16048 +dieangst 0 1 6.957497 0.000000 16049 +torwart 0 1 6.957497 0.000000 16050 +beim 0 1 6.957497 0.000000 16051 +elfmet 0 1 6.957497 0.000000 16052 +handk 0 1 6.957497 0.000000 16053 +merengu 0 1 6.957497 0.000000 16054 +publicationssepar 0 1 6.957497 0.000000 16055 +olympiad 0 1 6.957497 0.000000 16056 +yerewan 0 1 6.957497 0.000000 16057 +csek 0 1 6.957497 0.000000 16058 +csebi 0 1 6.957497 0.000000 16059 +cse 0 1 6.957497 0.000000 16060 +studentsimag 0 1 6.957497 0.000000 16061 +engineeringy 0 1 6.957497 0.000000 16062 +realaudio 0 1 6.957497 0.000000 16063 +linksand 0 1 6.957497 0.000000 16064 +toil 0 1 6.957497 0.000000 16065 +unto 0 1 6.957497 0.000000 16066 +glorywa 0 1 6.957497 0.000000 16067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..3ac8f507 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +vivek 1 6 5.164786 5.164786 2210 +narasayya 1 4 5.568345 5.568345 3065 +informationresearch 0 3 5.857933 0.000000 3675 +nara 0 1 6.957497 0.000000 16068 +interestspap 0 1 6.957497 0.000000 16069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..d2284672 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +seattl 0 120 2.079442 0.000000 103 +place 0 106 2.197225 0.000000 124 +take 0 97 2.302585 0.000000 134 +search 0 95 2.397895 0.000000 155 +internet 1 83 2.484907 2.484907 186 +name 1 72 2.639057 2.639057 220 +intellig 0 72 2.639057 0.000000 225 +sieg 0 69 2.708050 0.000000 260 +order 0 69 2.708050 0.000000 249 +artifici 0 63 2.772589 0.000000 280 +collect 0 65 2.772589 0.000000 268 +digit 0 52 2.995732 0.000000 348 +finger 0 52 2.995732 0.000000 354 +much 0 52 2.995732 0.000000 349 +principl 0 48 3.044522 0.000000 357 +friend 0 48 3.044522 0.000000 376 +quarter 0 47 3.091042 0.000000 389 +favorit 0 44 3.135494 0.000000 410 +movi 0 40 3.258097 0.000000 459 +tech 0 35 3.401197 0.000000 515 +india 1 32 3.465736 3.465736 550 +autumn 0 31 3.496508 0.000000 558 +travel 0 30 3.555348 0.000000 579 +finish 0 22 3.850148 0.000000 748 +stop 0 17 4.110874 0.000000 942 +adam 0 17 4.110874 0.000000 934 +cook 0 10 4.653960 0.000000 1464 +sound 0 9 4.753590 0.000000 1605 +heavi 0 7 5.010635 0.000000 1841 +alphabet 0 6 5.164786 0.000000 1980 +dougla 0 5 5.347108 0.000000 2471 +delhi 0 5 5.347108 0.000000 2530 +radio 0 4 5.568345 0.000000 3025 +skate 0 4 5.568345 0.000000 3046 +terri 0 3 5.857933 0.000000 3264 +impli 0 3 5.857933 0.000000 3348 +astronomi 0 3 5.857933 0.000000 3974 +coin 0 3 5.857933 0.000000 3799 +pelham 0 2 6.263398 0.000000 4988 +grenvil 0 2 6.263398 0.000000 4989 +himanshu 1 1 6.957497 6.957497 16070 +nautiy 1 1 6.957497 6.957497 16071 +pagehimanshu 0 1 6.957497 0.000000 16072 +nautiyalthi 0 1 6.957497 0.000000 16073 +nautiyaldept 0 1 6.957497 0.000000 16074 +edugod 0 1 6.957497 0.000000 16075 +gift 0 1 6.957497 0.000000 16076 +personkind 0 1 6.957497 0.000000 16077 +pratchett 0 1 6.957497 0.000000 16078 +wodehouseth 0 1 6.957497 0.000000 16079 +aviat 0 1 6.957497 0.000000 16080 +numismat 0 1 6.957497 0.000000 16081 +profound 0 1 6.957497 0.000000 16082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..d66b68d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +avail 1 169 1.791759 1.791759 48 +contact 0 153 1.791759 0.000000 59 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +technolog 1 131 2.079442 2.079442 102 +provid 0 121 2.079442 0.000000 94 +number 0 130 2.079442 0.000000 97 +seattl 0 120 2.079442 0.000000 103 +version 0 113 2.197225 0.000000 122 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +need 1 98 2.302585 2.302585 135 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +comment 0 93 2.397895 0.000000 146 +resourc 0 81 2.484907 0.000000 172 +stuff 0 87 2.484907 0.000000 171 +know 0 80 2.564949 0.000000 198 +intellig 0 72 2.639057 0.000000 225 +servic 0 72 2.639057 0.000000 236 +line 0 75 2.639057 0.000000 231 +involv 0 71 2.639057 0.000000 227 +artifici 0 63 2.772589 0.000000 280 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +browser 0 56 2.890372 0.000000 313 +date 0 51 2.995732 0.000000 344 +week 0 52 2.995732 0.000000 343 +favorit 0 44 3.135494 0.000000 410 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +random 0 34 3.401197 0.000000 511 +return 0 34 3.401197 0.000000 502 +great 0 27 3.637586 0.000000 626 +enabl 0 26 3.688879 0.000000 655 +bookmark 0 26 3.688879 0.000000 639 +alwai 0 24 3.761200 0.000000 691 +color 0 22 3.850148 0.000000 762 +tell 0 21 3.912023 0.000000 777 +anonym 0 14 4.317488 0.000000 1100 +easili 0 14 4.317488 0.000000 1077 +preliminari 0 9 4.753590 0.000000 1480 +wilson 0 9 4.753590 0.000000 1536 +awar 0 7 5.010635 0.000000 1800 +guidelin 0 7 5.010635 0.000000 1832 +divers 0 6 5.164786 0.000000 2232 +handi 0 6 5.164786 0.000000 2111 +lost 0 5 5.347108 0.000000 2358 +alsointerest 0 3 5.857933 0.000000 3813 +shortli 0 3 5.857933 0.000000 3375 +surgeri 0 3 5.857933 0.000000 3975 +javascript 0 3 5.857933 0.000000 3221 +republican 0 3 5.857933 0.000000 3815 +miscellani 0 3 5.857933 0.000000 3976 +nichola 0 3 5.857933 0.000000 3252 +uwcs 0 3 5.857933 0.000000 3977 +labyrinth 0 2 6.263398 0.000000 5983 +cynic 0 2 6.263398 0.000000 5854 +andcognit 0 2 6.263398 0.000000 5681 +temperatur 0 2 6.263398 0.000000 5985 +bitter 0 2 6.263398 0.000000 5387 +ironi 0 2 6.263398 0.000000 5986 +nonetheless 0 2 6.263398 0.000000 4681 +madeavail 0 2 6.263398 0.000000 4326 +mediocr 0 1 6.957497 0.000000 16083 +bemoan 0 1 6.957497 0.000000 16084 +hype 0 1 6.957497 0.000000 16085 +skeptic 0 1 6.957497 0.000000 16086 +automaticconstruct 0 1 6.957497 0.000000 16087 +wrapper 0 1 6.957497 0.000000 16088 +beeninvolv 0 1 6.957497 0.000000 16089 +glbal 0 1 6.957497 0.000000 16090 +infrmatin 0 1 6.957497 0.000000 16091 +sperhighwai 0 1 6.957497 0.000000 16092 +meter 0 1 6.957497 0.000000 16093 +ronald 0 1 6.957497 0.000000 16094 +reagan 0 1 6.957497 0.000000 16095 +wendel 0 1 6.957497 0.000000 16096 +berri 0 1 6.957497 0.000000 16097 +constitutesgood 0 1 6.957497 0.000000 16098 +kushmerick 0 1 6.957497 0.000000 16099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..bc992e75 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +seattl 1 120 2.079442 2.079442 103 +analysi 0 124 2.079442 0.000000 98 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +memori 1 101 2.302585 2.302585 139 +octob 0 89 2.397895 0.000000 156 +contain 1 81 2.484907 2.484907 174 +onlin 0 75 2.639057 0.000000 223 +test 0 66 2.708050 0.000000 252 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +march 0 61 2.833213 0.000000 295 +sever 0 56 2.890372 0.000000 322 +summer 0 56 2.890372 0.000000 311 +local 0 55 2.944439 0.000000 334 +found 0 53 2.944439 0.000000 337 +brian 0 38 3.295837 0.000000 466 +game 0 36 3.367296 0.000000 498 +known 0 24 3.761200 0.000000 702 +reduc 0 22 3.850148 0.000000 759 +sort 0 22 3.850148 0.000000 738 +safeti 0 20 3.951244 0.000000 817 +wonder 0 20 3.951244 0.000000 815 +bershad 1 18 4.060443 4.060443 902 +chateau 0 16 4.174387 0.000000 997 +overhead 0 15 4.248495 0.000000 1035 +karlin 1 13 4.382027 4.382027 1176 +nanci 0 12 4.465908 0.000000 1256 +anna 0 12 4.465908 0.000000 1292 +promot 0 12 4.465908 0.000000 1235 +isca 1 11 4.553877 4.553877 1354 +itali 0 11 4.553877 0.000000 1378 +leveson 1 9 4.753590 4.753590 1540 +wayn 1 8 4.875197 4.875197 1738 +romer 1 8 4.875197 4.875197 1706 +guggenheim 0 8 4.875197 0.000000 1759 +invest 1 6 5.164786 5.164786 2153 +spare 0 6 5.164786 0.000000 2177 +ohlrich 1 5 5.347108 5.347108 2564 +annex 0 5 5.347108 0.000000 2572 +sytem 0 4 5.568345 0.000000 3015 +superpag 0 3 5.857933 0.000000 3978 +damag 0 2 6.263398 0.000000 5687 +debut 0 1 6.957497 0.000000 16100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..7cd1b309 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 0 213 1.386294 0.000000 25 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +hour 0 165 1.791759 0.000000 46 +year 1 148 1.945910 1.945910 84 +like 0 132 1.945910 0.000000 81 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +seattl 1 120 2.079442 2.079442 103 +welcom 0 122 2.079442 0.000000 99 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +mathemat 0 108 2.197225 0.000000 123 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +resum 0 79 2.564949 0.000000 217 +summari 0 73 2.639057 0.000000 237 +sieg 1 69 2.708050 2.708050 260 +practic 0 70 2.708050 0.000000 246 +august 0 66 2.708050 0.000000 257 +wednesdai 0 64 2.772589 0.000000 261 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +plai 0 60 2.833213 0.000000 307 +summer 1 56 2.890372 2.890372 311 +three 0 54 2.944439 0.000000 330 +cool 0 49 3.044522 0.000000 374 +california 1 46 3.091042 3.091042 388 +move 0 47 3.091042 0.000000 382 +quarter 0 47 3.091042 0.000000 389 +made 0 44 3.135494 0.000000 398 +york 0 41 3.218876 0.000000 435 +live 1 40 3.258097 3.258097 451 +origin 0 38 3.295837 0.000000 472 +seminar 0 38 3.295837 0.000000 470 +option 0 30 3.555348 0.000000 575 +usual 0 28 3.610918 0.000000 608 +berkelei 1 26 3.688879 3.688879 657 +departmentunivers 0 24 3.761200 0.000000 711 +daili 0 24 3.761200 0.000000 706 +ofwashington 0 22 3.850148 0.000000 766 +whole 0 17 4.110874 0.000000 940 +took 0 16 4.174387 0.000000 1010 +basketbal 0 12 4.465908 0.000000 1289 +employ 0 12 4.465908 0.000000 1291 +realiti 0 12 4.465908 0.000000 1272 +israel 1 11 4.553877 4.553877 1366 +seven 0 9 4.753590 0.000000 1561 +angel 0 8 4.875197 0.000000 1779 +potenti 0 8 4.875197 0.000000 1690 +studentcomput 0 7 5.010635 0.000000 1963 +bunch 0 7 5.010635 0.000000 1861 +hike 0 6 5.164786 0.000000 2234 +northeast 0 3 5.857933 0.000000 3922 +haifa 0 3 5.857933 0.000000 3554 +kwon 0 3 5.857933 0.000000 3690 +gershoni 1 2 6.263398 6.263398 4513 +shirt 0 2 6.263398 0.000000 5977 +washingtonoffic 0 1 6.957497 0.000000 16101 +fouryear 0 1 6.957497 0.000000 16102 +lancast 0 1 6.957497 0.000000 16103 +holon 0 1 6.957497 0.000000 16104 +amta 0 1 6.957497 0.000000 16105 +aremondai 0 1 6.957497 0.000000 16106 +tose 0 1 6.957497 0.000000 16107 +graphicsprogram 0 1 6.957497 0.000000 16108 +riderlink 0 1 6.957497 0.000000 16109 +seattletransport 0 1 6.957497 0.000000 16110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..c00015e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +tool 0 117 2.079442 0.000000 93 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +find 0 111 2.197225 0.000000 111 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +section 0 94 2.397895 0.000000 149 +pictur 0 89 2.397895 0.000000 160 +school 0 84 2.484907 0.000000 188 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +educ 0 86 2.484907 0.000000 191 +would 1 67 2.708050 2.708050 251 +sieg 0 69 2.708050 0.000000 260 +experi 0 64 2.772589 0.000000 283 +advisor 0 51 2.995732 0.000000 355 +profession 0 51 2.995732 0.000000 345 +realli 0 40 3.258097 0.000000 444 +go 0 33 3.433987 0.000000 529 +sometim 0 24 3.761200 0.000000 696 +head 0 23 3.806662 0.000000 732 +color 1 22 3.850148 3.850148 762 +try 0 22 3.850148 0.000000 764 +leav 0 21 3.912023 0.000000 772 +boston 0 19 4.007333 0.000000 862 +offici 0 18 4.060443 0.000000 894 +decid 0 14 4.317488 0.000000 1075 +embed 0 14 4.317488 0.000000 1102 +came 0 13 4.382027 0.000000 1197 +jean 0 10 4.653960 0.000000 1440 +northwest 0 7 5.010635 0.000000 1973 +accord 0 7 5.010635 0.000000 1826 +hack 0 7 5.010635 0.000000 1950 +chinook 1 6 5.164786 5.164786 2229 +gaetano 0 6 5.164786 0.000000 2068 +beer 0 6 5.164786 0.000000 2216 +german 0 6 5.164786 0.000000 2190 +myresum 0 6 5.164786 0.000000 2199 +ortega 1 5 5.347108 5.347108 2559 +ross 1 5 5.347108 5.347108 2243 +knew 0 5 5.347108 0.000000 2445 +borriello 0 5 5.347108 0.000000 2349 +wear 0 4 5.568345 0.000000 2785 +albert 0 2 6.263398 0.000000 5987 +unoffici 0 2 6.263398 0.000000 5909 +brew 0 2 6.263398 0.000000 5988 +shepherd 0 2 6.263398 0.000000 4347 +behav 0 2 6.263398 0.000000 4670 +tequila 1 1 6.957497 6.957497 16111 +einstein 0 1 6.957497 0.000000 16112 +pageaft 0 1 6.957497 0.000000 16113 +puppi 0 1 6.957497 0.000000 16114 +updatedthu 0 1 6.957497 0.000000 16115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..51b3cf29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +class 0 199 1.609438 0.000000 37 +list 0 201 1.609438 0.000000 39 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +relat 0 139 1.945910 0.000000 68 +seattl 1 120 2.079442 2.079442 103 +document 0 121 2.079442 0.000000 89 +tool 0 117 2.079442 0.000000 93 +site 1 106 2.197225 2.197225 119 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +need 0 98 2.302585 0.000000 135 +take 0 97 2.302585 0.000000 134 +graphic 0 90 2.397895 0.000000 147 +search 0 95 2.397895 0.000000 155 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +free 0 73 2.639057 0.000000 224 +nation 0 74 2.639057 0.000000 240 +simul 0 66 2.708050 0.000000 255 +share 0 59 2.833213 0.000000 304 +think 1 57 2.890372 2.890372 314 +thesi 0 57 2.890372 0.000000 327 +instruct 0 53 2.944439 0.000000 332 +week 0 52 2.995732 0.000000 343 +particular 0 51 2.995732 0.000000 352 +featur 0 46 3.091042 0.000000 386 +show 1 43 3.178054 3.178054 417 +around 0 43 3.178054 0.000000 415 +http 0 41 3.218876 0.000000 420 +movi 0 40 3.258097 0.000000 459 +everi 0 34 3.401197 0.000000 519 +consid 0 29 3.583519 0.000000 590 +weather 1 28 3.610918 3.610918 618 +though 0 27 3.637586 0.000000 622 +background 0 25 3.737670 0.000000 664 +trace 0 25 3.737670 0.000000 677 +sometim 1 24 3.761200 3.761200 696 +disk 0 22 3.850148 0.000000 747 +item 1 19 4.007333 4.007333 856 +particularli 0 19 4.007333 0.000000 867 +runtim 0 19 4.007333 0.000000 858 +log 0 19 4.007333 0.000000 857 +engineeringunivers 0 17 4.110874 0.000000 959 +white 0 17 4.110874 0.000000 951 +regular 0 17 4.110874 0.000000 929 +letter 0 16 4.174387 0.000000 981 +month 0 15 4.248495 0.000000 1025 +doesn 0 15 4.248495 0.000000 1055 +stori 0 14 4.317488 0.000000 1087 +washingtonbox 0 13 4.382027 0.000000 1200 +weak 0 13 4.382027 0.000000 1159 +minor 0 12 4.465908 0.000000 1237 +newspap 0 12 4.465908 0.000000 1280 +bill 0 11 4.553877 0.000000 1297 +black 0 10 4.653960 0.000000 1418 +telecommun 0 9 4.753590 0.000000 1565 +transport 0 8 4.875197 0.000000 1672 +film 0 8 4.875197 0.000000 1761 +ethic 0 7 5.010635 0.000000 1786 +courtesi 0 7 5.010635 0.000000 1953 +blue 0 6 5.164786 0.000000 2227 +quick 0 6 5.164786 0.000000 2184 +famou 0 6 5.164786 0.000000 2185 +legal 0 6 5.164786 0.000000 2094 +privaci 0 6 5.164786 0.000000 2144 +everybodi 0 5 5.347108 0.000000 2517 +oncomput 0 5 5.347108 0.000000 2326 +truli 0 5 5.347108 0.000000 2476 +festiv 0 4 5.568345 0.000000 2952 +flat 0 3 5.857933 0.000000 3853 +icon 0 3 5.857933 0.000000 3362 +unrel 0 3 5.857933 0.000000 3837 +ribbon 0 2 6.263398 0.000000 5973 +quiet 0 2 6.263398 0.000000 5203 +likewis 0 2 6.263398 0.000000 4100 +blink 0 2 6.263398 0.000000 5067 +anymor 0 2 6.263398 0.000000 5938 +gross 0 2 6.263398 0.000000 5989 +beth 1 1 6.957497 6.957497 16116 +pardo 1 1 6.957497 6.957497 16117 +courtesei 1 1 6.957497 6.957497 16118 +untitl 0 1 6.957497 0.000000 16119 +morri 0 1 6.957497 0.000000 16120 +pardodepart 0 1 6.957497 0.000000 16121 +washingtonusapardo 0 1 6.957497 0.000000 16122 +edunot 0 1 6.957497 0.000000 16123 +asimgsrc 0 1 6.957497 0.000000 16124 +blueribbon 0 1 6.957497 0.000000 16125 +rib_trn_plain_sm 0 1 6.957497 0.000000 16126 +opposit 0 1 6.957497 0.000000 16127 +speechprohibit 0 1 6.957497 0.000000 16128 +academicsom 0 1 6.957497 0.000000 16129 +papersi 0 1 6.957497 0.000000 16130 +rtcg 0 1 6.957497 0.000000 16131 +architectureandcompil 0 1 6.957497 0.000000 16132 +otherpeopl 0 1 6.957497 0.000000 16133 +stylenon 0 1 6.957497 0.000000 16134 +academicfeatur 0 1 6.957497 0.000000 16135 +itemsbicyclesbusinessescomputersfoodhumori 0 1 6.957497 0.000000 16136 +weirdnesslinux 0 1 6.957497 0.000000 16137 +journalmusicgoofi 0 1 6.957497 0.000000 16138 +politicssci 0 1 6.957497 0.000000 16139 +dant 0 1 6.957497 0.000000 16140 +trepan 0 1 6.957497 0.000000 16141 +wors 0 1 6.957497 0.000000 16142 +newhous 0 1 6.957497 0.000000 16143 +yesterdai 0 1 6.957497 0.000000 16144 +stuffpardo 0 1 6.957497 0.000000 16145 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..2999c38d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +phone 1 175 1.791759 1.791759 45 +distribut 0 162 1.791759 0.000000 51 +base 0 165 1.791759 0.000000 50 +relat 1 139 1.945910 1.945910 68 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +seattl 1 120 2.079442 2.079442 103 +compil 0 122 2.079442 0.000000 96 +schedul 0 119 2.079442 0.000000 85 +find 0 111 2.197225 0.000000 111 +book 0 99 2.302585 0.000000 131 +commun 0 95 2.397895 0.000000 157 +school 1 84 2.484907 2.484907 188 +activ 0 84 2.484907 0.000000 182 +resourc 0 81 2.484907 0.000000 172 +resum 0 79 2.564949 0.000000 217 +descript 0 64 2.772589 0.000000 271 +variou 0 56 2.890372 0.000000 317 +extens 0 53 2.944439 0.000000 340 +week 0 52 2.995732 0.000000 343 +life 0 50 3.044522 0.000000 375 +mechan 1 43 3.178054 3.178054 416 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +short 0 36 3.367296 0.000000 499 +built 0 29 3.583519 0.000000 592 +enjoi 0 26 3.688879 0.000000 660 +mine 0 26 3.688879 0.000000 654 +miscellan 0 23 3.806662 0.000000 731 +theunivers 0 21 3.912023 0.000000 797 +busi 0 21 3.912023 0.000000 784 +grad 1 20 3.951244 3.951244 837 +bershad 0 18 4.060443 0.000000 902 +coupl 0 17 4.110874 0.000000 939 +later 0 15 4.248495 0.000000 1043 +fill 0 11 4.553877 0.000000 1349 +besid 0 8 4.875197 0.000000 1681 +hike 0 6 5.164786 0.000000 2234 +outdoor 0 5 5.347108 0.000000 2514 +pardyak 1 4 5.568345 5.568345 3043 +withth 0 4 5.568345 0.000000 2805 +outdat 0 4 5.568345 0.000000 2797 +somehow 0 4 5.568345 0.000000 2974 +ofoper 0 3 5.857933 0.000000 3292 +gloriou 0 3 5.857933 0.000000 3816 +leadership 0 3 5.857933 0.000000 3320 +emerald 0 3 5.857933 0.000000 3979 +poland 0 3 5.857933 0.000000 3665 +unrel 0 3 5.857933 0.000000 3837 +przemyslaw 0 2 6.263398 0.000000 5808 +basedprogram 0 2 6.263398 0.000000 5700 +pardi 1 1 6.957497 6.957497 16146 +drizzl 1 1 6.957497 6.957497 16147 +przemek 0 1 6.957497 0.000000 16148 +interast 0 1 6.957497 0.000000 16149 +notbusi 0 1 6.957497 0.000000 16150 +happenswhen 0 1 6.957497 0.000000 16151 +projectsspinan 0 1 6.957497 0.000000 16152 +systemsgroup 0 1 6.957497 0.000000 16153 +polish 0 1 6.957497 0.000000 16154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..8e277896 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +graduat 1 215 1.386294 1.386294 31 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +contact 0 153 1.791759 0.000000 59 +recent 0 167 1.791759 0.000000 58 +year 2 148 1.945910 3.891820 84 +first 1 140 1.945910 1.945910 71 +professor 0 137 1.945910 0.000000 76 +like 0 132 1.945910 0.000000 81 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +schedul 0 119 2.079442 0.000000 85 +document 0 121 2.079442 0.000000 89 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +school 1 84 2.484907 2.484907 188 +second 1 81 2.484907 2.484907 166 +stuff 0 87 2.484907 0.000000 171 +come 0 78 2.564949 0.000000 202 +april 0 77 2.564949 0.000000 196 +june 0 79 2.564949 0.000000 214 +know 0 80 2.564949 0.000000 198 +html 1 75 2.639057 2.639057 235 +sieg 0 69 2.708050 0.000000 260 +receiv 0 66 2.708050 0.000000 244 +collect 0 65 2.772589 0.000000 268 +best 0 59 2.833213 0.000000 299 +march 0 61 2.833213 0.000000 295 +variou 0 56 2.890372 0.000000 317 +undergradu 0 54 2.944439 0.000000 338 +friend 0 48 3.044522 0.000000 376 +move 0 47 3.091042 0.000000 382 +done 0 47 3.091042 0.000000 381 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +made 0 44 3.135494 0.000000 398 +around 1 43 3.178054 3.178054 415 +might 0 41 3.218876 0.000000 426 +live 0 40 3.258097 0.000000 451 +annual 0 40 3.258097 0.000000 458 +paul 0 38 3.295837 0.000000 471 +electr 0 38 3.295837 0.000000 461 +jame 0 35 3.401197 0.000000 507 +return 0 34 3.401197 0.000000 502 +everi 0 34 3.401197 0.000000 519 +taught 1 33 3.433987 3.433987 526 +express 0 32 3.465736 0.000000 540 +photo 0 31 3.496508 0.000000 561 +someth 0 31 3.496508 0.000000 554 +travel 0 30 3.555348 0.000000 579 +univ 0 28 3.610918 0.000000 617 +usual 0 28 3.610918 0.000000 608 +hope 0 28 3.610918 0.000000 610 +concern 0 25 3.737670 0.000000 666 +hill 0 25 3.737670 0.000000 670 +never 0 25 3.737670 0.000000 671 +togeth 0 23 3.806662 0.000000 714 +dai 1 22 3.850148 3.850148 753 +inth 0 22 3.850148 0.000000 741 +love 0 21 3.912023 0.000000 804 +tell 0 21 3.912023 0.000000 777 +rout 0 21 3.912023 0.000000 793 +longer 0 20 3.951244 0.000000 816 +north 0 19 4.007333 0.000000 873 +particularli 0 19 4.007333 0.000000 867 +offici 0 18 4.060443 0.000000 894 +lot 0 18 4.060443 0.000000 889 +took 0 16 4.174387 0.000000 1010 +easi 0 16 4.174387 0.000000 969 +prolog 1 13 4.382027 4.382027 1155 +someon 0 13 4.382027 0.000000 1128 +everyon 0 13 4.382027 0.000000 1148 +brother 0 13 4.382027 0.000000 1189 +scan 0 12 4.465908 0.000000 1243 +rememb 0 12 4.465908 0.000000 1217 +rest 0 12 4.465908 0.000000 1259 +danc 0 12 4.465908 0.000000 1278 +chri 1 11 4.553877 4.553877 1311 +bike 1 10 4.653960 4.653960 1468 +franklin 1 10 4.653960 4.653960 1436 +rapid 0 10 4.653960 0.000000 1453 +town 0 10 4.653960 0.000000 1458 +folk 0 9 4.753590 0.000000 1597 +pagei 0 8 4.875197 0.000000 1683 +character 0 8 4.875197 0.000000 1767 +on 0 8 4.875197 0.000000 1628 +andcomput 0 8 4.875197 0.000000 1623 +partner 0 8 4.875197 0.000000 1648 +ride 0 8 4.875197 0.000000 1741 +davi 1 7 5.010635 5.010635 1888 +fortun 0 7 5.010635 0.000000 1872 +necessarili 0 7 5.010635 0.000000 1899 +throughout 0 7 5.010635 0.000000 1871 +portland 0 7 5.010635 0.000000 1878 +header 0 7 5.010635 0.000000 1787 +somewher 0 6 5.164786 0.000000 2176 +restaur 0 6 5.164786 0.000000 2230 +drop 0 6 5.164786 0.000000 2008 +poem 0 5 5.347108 0.000000 2483 +exchang 0 5 5.347108 0.000000 2310 +hire 0 4 5.568345 0.000000 2976 +glad 0 4 5.568345 0.000000 2657 +tend 0 4 5.568345 0.000000 3041 +kept 0 4 5.568345 0.000000 2762 +pagepaul 0 3 5.857933 0.000000 3669 +thesumm 0 3 5.857933 0.000000 3763 +matt 0 3 5.857933 0.000000 3792 +flat 0 3 5.857933 0.000000 3853 +aboutthi 0 2 6.263398 0.000000 5627 +oneof 0 2 6.263398 0.000000 4674 +diploma 0 2 6.263398 0.000000 5990 +bergen 0 2 6.263398 0.000000 5991 +itin 0 2 6.263398 0.000000 5992 +toseattl 0 2 6.263398 0.000000 5919 +myoffic 0 1 6.957497 0.000000 16155 +iliv 0 1 6.957497 0.000000 16156 +norwegian 0 1 6.957497 0.000000 16157 +likea 0 1 6.957497 0.000000 16158 +localchines 0 1 6.957497 0.000000 16159 +mundan 0 1 6.957497 0.000000 16160 +stuffi 0 1 6.957497 0.000000 16161 +hotlink 0 1 6.957497 0.000000 16162 +pagesstuff 0 1 6.957497 0.000000 16163 +maintainmi 0 1 6.957497 0.000000 16164 +mewher 0 1 6.957497 0.000000 16165 +inmorgan 0 1 6.957497 0.000000 16166 +fromuc 0 1 6.957497 0.000000 16167 +andy 0 1 6.957497 0.000000 16168 +ididn 0 1 6.957497 0.000000 16169 +ialso 0 1 6.957497 0.000000 16170 +stuffwhil 0 1 6.957497 0.000000 16171 +relatedact 0 1 6.957497 0.000000 16172 +evengot 0 1 6.957497 0.000000 16173 +marriag 0 1 6.957497 0.000000 16174 +joann 0 1 6.957497 0.000000 16175 +anexcus 0 1 6.957497 0.000000 16176 +ofbergen 0 1 6.957497 0.000000 16177 +hillier 0 1 6.957497 0.000000 16178 +returnedto 0 1 6.957497 0.000000 16179 +rollerblad 0 1 6.957497 0.000000 16180 +wasnow 0 1 6.957497 0.000000 16181 +hewlettpackard 0 1 6.957497 0.000000 16182 +vengeanc 0 1 6.957497 0.000000 16183 +intwo 0 1 6.957497 0.000000 16184 +inseason 0 1 6.957497 0.000000 16185 +justcommut 0 1 6.957497 0.000000 16186 +lindyhop 0 1 6.957497 0.000000 16187 +linethat 0 1 6.957497 0.000000 16188 +doctyp 0 1 6.957497 0.000000 16189 +ietf 0 1 6.957497 0.000000 16190 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..233dc620 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +washington 0 236 1.386294 0.000000 32 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +implement 0 152 1.791759 0.000000 52 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +memori 0 101 2.302585 0.000000 139 +take 0 97 2.302585 0.000000 134 +pictur 1 89 2.397895 2.397895 160 +graphic 0 90 2.397895 0.000000 147 +proceed 0 93 2.397895 0.000000 152 +decemb 0 80 2.564949 0.000000 215 +name 0 72 2.639057 0.000000 220 +symposium 0 72 2.639057 0.000000 238 +collect 0 65 2.772589 0.000000 268 +found 0 53 2.944439 0.000000 337 +much 0 52 2.995732 0.000000 349 +principl 0 48 3.044522 0.000000 357 +quarter 0 47 3.091042 0.000000 389 +even 0 45 3.135494 0.000000 393 +might 1 41 3.218876 3.218876 426 +live 1 40 3.258097 3.258097 451 +movi 0 40 3.258097 0.000000 459 +workstat 0 37 3.332205 0.000000 479 +everi 1 34 3.401197 3.401197 519 +global 0 34 3.401197 0.000000 520 +michael 0 35 3.401197 0.000000 514 +taught 0 33 3.433987 0.000000 526 +often 0 31 3.496508 0.000000 551 +cluster 0 28 3.610918 0.000000 612 +although 0 25 3.737670 0.000000 667 +william 0 22 3.850148 0.000000 765 +minut 1 20 3.951244 3.951244 810 +wonder 0 20 3.951244 0.000000 815 +mpeg 0 20 3.951244 0.000000 831 +supervis 0 20 3.951244 0.000000 840 +citi 0 19 4.007333 0.000000 874 +five 0 19 4.007333 0.000000 841 +otherwis 0 17 4.110874 0.000000 922 +action 1 15 4.248495 4.248495 1038 +qual 0 15 4.248495 0.000000 1062 +comic 0 14 4.317488 0.000000 1103 +levi 0 14 4.317488 0.000000 1093 +squar 0 14 4.317488 0.000000 1082 +karlin 1 13 4.382027 4.382027 1176 +nick 0 13 4.382027 0.000000 1180 +anna 1 12 4.465908 4.465908 1292 +rest 0 12 4.465908 0.000000 1259 +song 0 11 4.553877 0.000000 1380 +henri 0 10 4.653960 0.000000 1417 +swim 0 9 4.753590 0.000000 1599 +french 0 9 4.753590 0.000000 1511 +morgan 0 9 4.753590 0.000000 1484 +juan 0 9 4.753590 0.000000 1580 +tourist 0 8 4.875197 0.000000 1710 +surpris 0 7 5.010635 0.000000 1828 +feelei 0 7 5.010635 0.000000 1859 +rain 1 6 5.164786 5.164786 2137 +cat 0 6 5.164786 0.000000 2194 +pari 0 6 5.164786 0.000000 2158 +lucki 0 6 5.164786 0.000000 2163 +gui 0 5 5.347108 0.000000 2573 +british 0 5 5.347108 0.000000 2546 +formerli 0 5 5.347108 0.000000 2397 +door 0 5 5.347108 0.000000 2291 +darren 0 5 5.347108 0.000000 2565 +pighin 1 4 5.568345 5.568345 2735 +ta 0 4 5.568345 0.000000 3058 +breath 0 4 5.568345 0.000000 2946 +dark 0 4 5.568345 0.000000 2910 +berlin 0 3 5.857933 0.000000 3263 +marin 0 3 5.857933 0.000000 3947 +freder 0 3 5.857933 0.000000 3352 +thekkath 0 3 5.857933 0.000000 3973 +monti 0 2 6.263398 0.000000 4993 +python 0 2 6.263398 0.000000 4994 +cave 0 2 6.263398 0.000000 4959 +italian 0 2 6.263398 0.000000 5993 +simpson 0 2 6.263398 0.000000 5994 +chandramohan 0 2 6.263398 0.000000 5965 +refresh 1 1 6.957497 6.957497 16191 +frdric 0 1 6.957497 0.000000 16192 +lcommun 0 1 6.957497 0.000000 16193 +dani 0 1 6.957497 0.000000 16194 +corto 0 1 6.957497 0.000000 16195 +maltes 0 1 6.957497 0.000000 16196 +venis 0 1 6.957497 0.000000 16197 +traditionn 0 1 6.957497 0.000000 16198 +systemher 0 1 6.957497 0.000000 16199 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..356c3954 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +map 0 39 3.258097 0.000000 452 +anderson 1 19 4.007333 4.007333 860 +brother 0 13 4.382027 0.000000 1189 +ruth 1 7 5.010635 5.010635 1870 +wxyc 0 1 6.957497 0.000000 16200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..57f47b7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +redston 1 3 5.857933 5.857933 3332 +joshua 0 3 5.857933 0.000000 3333 +josh 0 2 6.263398 0.000000 5945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..294353c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,163 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +base 1 165 1.791759 1.791759 50 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +professor 1 137 1.945910 1.945910 76 +construct 0 139 1.945910 0.000000 82 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +seattl 1 120 2.079442 2.079442 103 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +site 0 106 2.197225 0.000000 119 +make 0 111 2.197225 0.000000 120 +part 0 98 2.302585 0.000000 129 +advanc 0 99 2.302585 0.000000 130 +book 0 99 2.302585 0.000000 131 +imag 1 91 2.397895 2.397895 161 +pictur 0 89 2.397895 0.000000 160 +build 1 85 2.484907 2.484907 184 +librari 1 87 2.484907 2.484907 181 +good 0 77 2.564949 0.000000 200 +onlin 1 75 2.639057 2.639057 223 +line 0 75 2.639057 0.000000 231 +write 0 72 2.639057 0.000000 222 +knowledg 0 67 2.708050 0.000000 243 +would 0 67 2.708050 0.000000 251 +java 0 70 2.708050 0.000000 248 +interact 0 62 2.772589 0.000000 270 +visit 0 63 2.772589 0.000000 288 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +local 0 55 2.944439 0.000000 334 +extens 0 53 2.944439 0.000000 340 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +run 0 51 2.995732 0.000000 347 +profession 0 51 2.995732 0.000000 345 +get 0 46 3.091042 0.000000 380 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +live 1 40 3.258097 3.258097 451 +probabl 0 40 3.258097 0.000000 455 +map 0 39 3.258097 0.000000 452 +movi 0 40 3.258097 0.000000 459 +multi 0 36 3.367296 0.000000 493 +everi 0 34 3.401197 0.000000 519 +jame 0 35 3.401197 0.000000 507 +word 0 34 3.401197 0.000000 508 +taught 0 33 3.433987 0.000000 526 +human 0 32 3.465736 0.000000 546 +framework 0 28 3.610918 0.000000 606 +team 0 27 3.637586 0.000000 625 +great 0 27 3.637586 0.000000 626 +todai 0 25 3.737670 0.000000 672 +alwai 0 24 3.761200 0.000000 691 +daili 0 24 3.761200 0.000000 706 +famili 0 23 3.806662 0.000000 735 +ofwashington 0 22 3.850148 0.000000 766 +color 0 22 3.850148 0.000000 762 +wonder 0 20 3.951244 0.000000 815 +nice 0 20 3.951244 0.000000 809 +toolkit 0 20 3.951244 0.000000 835 +expert 0 20 3.951244 0.000000 833 +wrote 0 20 3.951244 0.000000 830 +citi 0 19 4.007333 0.000000 874 +media 0 19 4.007333 0.000000 861 +els 0 19 4.007333 0.000000 843 +beauti 0 18 4.060443 0.000000 912 +stand 0 18 4.060443 0.000000 891 +repositori 0 17 4.110874 0.000000 932 +stat 0 17 4.110874 0.000000 924 +stock 0 16 4.174387 0.000000 1007 +psycholog 1 15 4.248495 4.248495 1054 +camera 1 14 4.317488 4.317488 1115 +happi 0 14 4.317488 0.000000 1079 +bodi 0 13 4.382027 0.000000 1178 +script 0 13 4.382027 0.000000 1171 +step 0 13 4.382027 0.000000 1138 +outsid 0 12 4.465908 0.000000 1219 +perl 0 11 4.553877 0.000000 1332 +magic 0 11 4.553877 0.000000 1358 +market 0 11 4.553877 0.000000 1361 +town 0 10 4.653960 0.000000 1458 +traffic 0 10 4.653960 0.000000 1421 +departmentof 0 9 4.753590 0.000000 1539 +clear 0 9 4.753590 0.000000 1488 +brain 0 8 4.875197 0.000000 1638 +rais 0 8 4.875197 0.000000 1711 +leon 0 8 4.875197 0.000000 1631 +chanc 0 7 5.010635 0.000000 1960 +vallei 0 7 5.010635 0.000000 1959 +footbal 0 7 5.010635 0.000000 1912 +rain 0 6 5.164786 0.000000 2137 +peek 0 6 5.164786 0.000000 2169 +camp 0 5 5.347108 0.000000 2545 +poem 0 5 5.347108 0.000000 2483 +fountain 0 4 5.568345 0.000000 3069 +catch 0 4 5.568345 0.000000 2602 +glimps 0 4 5.568345 0.000000 2778 +proud 0 4 5.568345 0.000000 2918 +leagu 0 4 5.568345 0.000000 3040 +fantasi 0 4 5.568345 0.000000 3055 +jakobovit 1 3 5.857933 5.857933 3913 +hawaii 1 3 5.857933 5.857933 3888 +foster 0 3 5.857933 0.000000 3159 +eddi 0 3 5.857933 0.000000 3896 +drag 0 3 5.857933 0.000000 3434 +mount 0 2 6.263398 0.000000 5995 +youcan 0 2 6.263398 0.000000 4373 +glori 0 2 6.263398 0.000000 5327 +consol 0 2 6.263398 0.000000 4048 +atla 0 2 6.263398 0.000000 5996 +databaseenviron 0 2 6.263398 0.000000 5792 +persistentprogram 0 2 6.263398 0.000000 5997 +creator 0 2 6.263398 0.000000 5998 +ladder 0 2 6.263398 0.000000 5858 +newslet 0 2 6.263398 0.000000 5860 +dian 0 2 6.263398 0.000000 5536 +judi 0 2 6.263398 0.000000 4442 +usai 0 1 6.957497 0.000000 16201 +thisup 0 1 6.957497 0.000000 16202 +weatherreport 0 1 6.957497 0.000000 16203 +sneak 0 1 6.957497 0.000000 16204 +drumhel 0 1 6.957497 0.000000 16205 +rainier 0 1 6.957497 0.000000 16206 +cleardai 0 1 6.957497 0.000000 16207 +astructur 0 1 6.957497 0.000000 16208 +anatomist 0 1 6.957497 0.000000 16209 +internetracquetbal 0 1 6.957497 0.000000 16210 +rotisseriebasebal 0 1 6.957497 0.000000 16211 +fromusa 0 1 6.957497 0.000000 16212 +africancichlid 0 1 6.957497 0.000000 16213 +honolulu 0 1 6.957497 0.000000 16214 +kalalau 0 1 6.957497 0.000000 16215 +gambl 0 1 6.957497 0.000000 16216 +darn 0 1 6.957497 0.000000 16217 +javafamili 0 1 6.957497 0.000000 16218 +mydad 0 1 6.957497 0.000000 16219 +whoi 0 1 6.957497 0.000000 16220 +polem 0 1 6.957497 0.000000 16221 +emanuel 0 1 6.957497 0.000000 16222 +swedenborg 0 1 6.957497 0.000000 16223 +nahl 0 1 6.957497 0.000000 16224 +whoprovid 0 1 6.957497 0.000000 16225 +realtor 0 1 6.957497 0.000000 16226 +uncl 0 1 6.957497 0.000000 16227 +bioscienc 0 1 6.957497 0.000000 16228 +bookmarksif 0 1 6.957497 0.000000 16229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..a3d22938 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +welcom 0 122 2.079442 0.000000 99 +right 0 48 3.044522 0.000000 363 +robert 0 30 3.555348 0.000000 567 +head 0 23 3.806662 0.000000 732 +reserv 0 20 3.951244 0.000000 808 +five 0 19 4.007333 0.000000 841 +photograph 0 15 4.248495 0.000000 1056 +galleri 0 13 4.382027 0.000000 1192 +twenti 0 5 5.347108 0.000000 2540 +twilight 0 1 6.957497 0.000000 16230 +grimm 0 1 6.957497 0.000000 16231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..3eb2d53c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +perform 1 143 1.945910 1.945910 74 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +memori 1 101 2.302585 2.302585 139 +peopl 0 96 2.302585 0.000000 132 +larg 0 82 2.484907 0.000000 168 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +dynam 1 76 2.564949 2.564949 194 +appear 0 78 2.564949 0.000000 210 +addit 0 74 2.639057 0.000000 228 +sieg 0 69 2.708050 0.000000 260 +would 0 67 2.708050 0.000000 251 +polici 0 64 2.772589 0.000000 279 +abstract 0 62 2.772589 0.000000 276 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +direct 0 57 2.890372 0.000000 316 +three 0 54 2.944439 0.000000 330 +hardwar 0 51 2.995732 0.000000 350 +friend 1 48 3.044522 3.044522 376 +standard 0 48 3.044522 0.000000 365 +could 0 46 3.091042 0.000000 383 +physic 0 47 3.091042 0.000000 377 +mark 0 44 3.135494 0.000000 403 +cach 0 41 3.218876 0.000000 432 +edit 0 42 3.218876 0.000000 418 +realli 0 40 3.258097 0.000000 444 +map 0 39 3.258097 0.000000 452 +origin 0 38 3.295837 0.000000 472 +bibliographi 0 34 3.401197 0.000000 518 +eduoffic 0 33 3.433987 0.000000 531 +scientist 1 31 3.496508 3.496508 560 +travel 0 30 3.555348 0.000000 579 +american 0 27 3.637586 0.000000 634 +symbol 0 27 3.637586 0.000000 620 +subject 0 26 3.688879 0.000000 647 +hill 0 25 3.737670 0.000000 670 +interpret 0 24 3.761200 0.000000 686 +togeth 0 23 3.806662 0.000000 714 +reduc 0 22 3.850148 0.000000 759 +chen 1 21 3.912023 3.912023 791 +avoid 0 21 3.912023 0.000000 799 +hous 0 21 3.912023 0.000000 801 +wrote 0 20 3.951244 0.000000 830 +miss 0 19 4.007333 0.000000 866 +els 0 19 4.007333 0.000000 843 +bershad 1 18 4.060443 4.060443 902 +attend 0 18 4.060443 0.000000 893 +asplo 1 17 4.110874 4.110874 948 +thought 0 17 4.110874 0.000000 945 +chateau 0 16 4.174387 0.000000 997 +took 0 16 4.174387 0.000000 1010 +overhead 0 15 4.248495 0.000000 1035 +countri 0 15 4.248495 0.000000 1059 +levi 0 14 4.317488 0.000000 1093 +conduct 0 14 4.317488 0.000000 1065 +karlin 1 13 4.382027 4.382027 1176 +alan 0 13 4.382027 0.000000 1146 +brad 0 12 4.465908 0.000000 1264 +anna 0 12 4.465908 0.000000 1292 +promot 0 12 4.465908 0.000000 1235 +food 0 12 4.465908 0.000000 1285 +speak 0 12 4.465908 0.000000 1283 +isca 1 11 4.553877 4.553877 1354 +smart 0 11 4.553877 0.000000 1352 +denni 0 11 4.553877 0.000000 1321 +baer 0 11 4.553877 0.000000 1353 +wong 1 9 4.753590 4.753590 1609 +osdi 0 9 4.753590 0.000000 1534 +voelker 0 9 4.753590 0.000000 1557 +said 0 9 4.753590 0.000000 1571 +didn 0 9 4.753590 0.000000 1563 +romer 2 8 4.875197 9.750394 1706 +wayn 0 8 4.875197 0.000000 1738 +judg 0 8 4.875197 0.000000 1644 +interestsi 0 7 5.010635 0.000000 1969 +supportfor 0 7 5.010635 0.000000 1854 +tip 0 7 5.010635 0.000000 1863 +conflict 1 6 5.164786 5.164786 2041 +theproject 0 6 5.164786 0.000000 1981 +wolman 0 6 5.164786 0.000000 2093 +edward 0 6 5.164786 0.000000 2050 +ohlrich 1 5 5.347108 5.347108 2564 +hair 0 5 5.347108 0.000000 2446 +ration 0 5 5.347108 0.000000 2427 +unknown 0 5 5.347108 0.000000 2318 +tuft 0 5 5.347108 0.000000 2575 +couldn 0 4 5.568345 0.000000 2977 +accompani 0 4 5.568345 0.000000 2666 +mappedcach 0 3 5.857933 0.000000 3928 +knee 0 3 5.857933 0.000000 3980 +surgeri 0 3 5.857933 0.000000 3975 +europ 0 3 5.857933 0.000000 3761 +lunch 0 3 5.857933 0.000000 3369 +father 0 3 5.857933 0.000000 3757 +systemswith 0 2 6.263398 0.000000 5342 +eustac 0 2 6.263398 0.000000 5866 +onlinesuperpag 0 2 6.263398 0.000000 5819 +resolutionon 0 2 6.263398 0.000000 5867 +stuffa 0 2 6.263398 0.000000 5999 +rai 0 2 6.263398 0.000000 5915 +ticker 0 2 6.263398 0.000000 5247 +likebrian 0 1 6.957497 0.000000 16232 +andwayn 0 1 6.957497 0.000000 16233 +ofinterpret 0 1 6.957497 0.000000 16234 +rockyhom 0 1 6.957497 0.000000 16235 +lobo 0 1 6.957497 0.000000 16236 +listrandom 0 1 6.957497 0.000000 16237 +limb 0 1 6.957497 0.000000 16238 +arthroscop 0 1 6.957497 0.000000 16239 +wrist 0 1 6.957497 0.000000 16240 +dylansaid 0 1 6.957497 0.000000 16241 +flowbe 0 1 6.957497 0.000000 16242 +beingexperiment 0 1 6.957497 0.000000 16243 +somepictur 0 1 6.957497 0.000000 16244 +eatsomeon 0 1 6.957497 0.000000 16245 +sincer 0 1 6.957497 0.000000 16246 +forexampl 0 1 6.957497 0.000000 16247 +leftth 0 1 6.957497 0.000000 16248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..153a4eaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,158 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +washington 2 236 1.386294 2.772588 32 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +recent 0 167 1.791759 0.000000 58 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +construct 1 139 1.945910 1.945910 82 +problem 0 147 1.945910 0.000000 75 +architectur 0 139 1.945910 0.000000 77 +report 1 131 2.079442 2.079442 92 +seattl 0 120 2.079442 0.000000 103 +welcom 0 122 2.079442 0.000000 99 +intern 1 108 2.197225 2.197225 128 +look 0 107 2.197225 0.000000 115 +code 0 108 2.197225 0.000000 116 +memori 2 101 2.302585 4.605170 139 +techniqu 0 99 2.302585 0.000000 138 +follow 0 92 2.397895 0.000000 143 +larg 0 82 2.484907 0.000000 168 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +sourc 0 77 2.564949 0.000000 201 +refer 0 78 2.564949 0.000000 203 +good 0 77 2.564949 0.000000 200 +master 0 76 2.564949 0.000000 216 +onlin 1 75 2.639057 2.639057 223 +order 0 69 2.708050 0.000000 249 +simul 0 66 2.708050 0.000000 255 +would 0 67 2.708050 0.000000 251 +differ 0 66 2.708050 0.000000 253 +polici 1 64 2.772589 2.772589 279 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +collect 0 65 2.772589 0.000000 268 +result 0 65 2.772589 0.000000 281 +copi 0 63 2.772589 0.000000 284 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +sever 0 56 2.890372 0.000000 322 +space 0 57 2.890372 0.000000 310 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +cover 0 55 2.944439 0.000000 329 +instruct 0 53 2.944439 0.000000 332 +hardwar 1 51 2.995732 2.995732 350 +investig 0 51 2.995732 0.000000 353 +without 0 50 3.044522 0.000000 370 +standard 0 48 3.044522 0.000000 365 +featur 0 46 3.091042 0.000000 386 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +combin 0 42 3.218876 0.000000 421 +futur 0 41 3.218876 0.000000 427 +small 1 39 3.258097 3.258097 447 +map 1 39 3.258097 3.258097 452 +brian 0 38 3.295837 0.000000 466 +cost 1 37 3.332205 3.332205 480 +bibliographi 0 34 3.401197 0.000000 518 +compon 0 30 3.555348 0.000000 570 +pattern 0 24 3.761200 0.000000 689 +size 1 23 3.806662 3.806662 713 +identifi 1 22 3.850148 3.850148 760 +reduc 1 22 3.850148 3.850148 759 +chen 1 21 3.912023 3.912023 791 +avoid 0 21 3.912023 0.000000 799 +miss 1 19 4.007333 4.007333 866 +region 1 19 4.007333 4.007333 875 +runtim 0 19 4.007333 0.000000 858 +comparison 0 19 4.007333 0.000000 863 +bershad 1 18 4.060443 4.060443 902 +behavior 0 18 4.060443 0.000000 881 +monitor 1 17 4.110874 4.110874 941 +modif 0 17 4.110874 0.000000 913 +asplo 0 17 4.110874 0.000000 948 +overhead 1 15 4.248495 4.248495 1035 +qual 1 15 4.248495 4.248495 1062 +karlin 1 13 4.382027 4.382027 1176 +whose 0 13 4.382027 0.000000 1166 +someon 0 13 4.382027 0.000000 1128 +resolut 0 13 4.382027 0.000000 1172 +promot 1 12 4.465908 4.465908 1235 +overal 0 12 4.465908 0.000000 1254 +anna 0 12 4.465908 0.000000 1292 +isca 1 11 4.553877 4.553877 1354 +denni 0 11 4.553877 0.000000 1321 +alpha 0 11 4.553877 0.000000 1348 +operatingsystem 0 10 4.653960 0.000000 1401 +reli 0 10 4.653960 0.000000 1411 +wong 1 9 4.753590 4.753590 1609 +significantli 0 9 4.753590 0.000000 1508 +osdi 0 9 4.753590 0.000000 1534 +romer 2 8 4.875197 9.750394 1706 +wayn 1 8 4.875197 4.875197 1738 +poor 0 8 4.875197 0.000000 1736 +potenti 0 8 4.875197 0.000000 1690 +delai 0 7 5.010635 0.000000 1848 +larger 0 7 5.010635 0.000000 1875 +reduct 0 7 5.010635 0.000000 1877 +conflict 1 6 5.164786 5.164786 2041 +benefit 0 6 5.164786 0.000000 2213 +ohlrich 1 5 5.347108 5.347108 2564 +resolv 0 4 5.568345 0.000000 2675 +bottleneck 0 4 5.568345 0.000000 2769 +mip 0 4 5.568345 0.000000 2738 +superpag 1 3 5.857933 5.857933 3978 +peoplefaculti 0 3 5.857933 0.000000 3981 +dlee 0 3 5.857933 0.000000 3949 +waynew 0 3 5.857933 0.000000 3982 +reorder 0 3 5.857933 0.000000 3952 +fragment 1 2 6.263398 6.263398 6000 +contigu 0 2 6.263398 0.000000 6001 +warrant 0 2 6.263398 0.000000 5697 +washingtonmemori 0 1 6.957497 0.000000 16249 +researchdepart 0 1 6.957497 0.000000 16250 +sharesth 0 1 6.957497 0.000000 16251 +incur 0 1 6.957497 0.000000 16252 +monitorappl 0 1 6.957497 0.000000 16253 +resolvetlb 0 1 6.957497 0.000000 16254 +tlbi 0 1 6.957497 0.000000 16255 +severalmodern 0 1 6.957497 0.000000 16256 +amultipl 0 1 6.957497 0.000000 16257 +tlbperform 0 1 6.957497 0.000000 16258 +ofwast 0 1 6.957497 0.000000 16259 +todiffer 0 1 6.957497 0.000000 16260 +constructingsuperpag 0 1 6.957497 0.000000 16261 +ofmemori 0 1 6.957497 0.000000 16262 +balancesth 0 1 6.957497 0.000000 16263 +tlbmiss 0 1 6.957497 0.000000 16264 +memorycopi 0 1 6.957497 0.000000 16265 +misspattern 0 1 6.957497 0.000000 16266 +attain 0 1 6.957497 0.000000 16267 +largepag 0 1 6.957497 0.000000 16268 +makea 0 1 6.957497 0.000000 16269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..2f3cb644 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +washington 2 236 1.386294 2.772588 32 +languag 1 227 1.386294 1.386294 26 +updat 0 191 1.609438 0.000000 41 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +applic 0 170 1.791759 0.000000 56 +read 0 154 1.791759 0.000000 47 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +year 0 148 1.945910 0.000000 84 +file 0 132 1.945910 0.000000 70 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +document 0 121 2.079442 0.000000 89 +intern 1 108 2.197225 2.197225 128 +structur 0 106 2.197225 0.000000 105 +part 0 98 2.302585 0.000000 129 +techniqu 0 99 2.302585 0.000000 138 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +environ 1 84 2.484907 2.484907 177 +build 0 85 2.484907 0.000000 184 +appear 0 78 2.564949 0.000000 210 +sourc 0 77 2.564949 0.000000 201 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +differ 0 66 2.708050 0.000000 253 +collect 1 65 2.772589 2.772589 268 +descript 0 64 2.772589 0.000000 271 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +sever 1 56 2.890372 2.890372 322 +variou 0 56 2.890372 0.000000 317 +processor 0 54 2.944439 0.000000 335 +execut 0 45 3.135494 0.000000 404 +examin 0 42 3.218876 0.000000 424 +brian 0 38 3.295837 0.000000 466 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +strategi 0 25 3.737670 0.000000 682 +trace 0 25 3.737670 0.000000 677 +interpret 2 24 3.761200 7.522400 686 +util 0 21 3.912023 0.000000 774 +similar 0 21 3.912023 0.000000 771 +portabl 0 20 3.951244 0.000000 819 +safeti 0 20 3.951244 0.000000 817 +basi 0 20 3.951244 0.000000 828 +binari 0 20 3.951244 0.000000 823 +benchmark 1 19 4.007333 4.007333 859 +bershad 1 18 4.060443 4.060443 902 +asplo 0 17 4.110874 0.000000 948 +levi 1 14 4.317488 4.317488 1093 +demand 0 14 4.317488 0.000000 1073 +characterist 0 12 4.465908 0.000000 1257 +baer 1 11 4.553877 4.553877 1353 +perl 0 11 4.553877 0.000000 1332 +denni 0 11 4.553877 0.000000 1321 +perspect 0 10 4.653960 0.000000 1437 +jean 0 10 4.653960 0.000000 1440 +henri 0 10 4.653960 0.000000 1417 +voelker 1 9 4.753590 4.753590 1557 +wong 1 9 4.753590 4.753590 1609 +romer 1 8 4.875197 4.875197 1706 +gain 0 8 4.875197 0.000000 1730 +wayn 0 8 4.875197 0.000000 1738 +instrument 0 7 5.010635 0.000000 1954 +wolman 1 6 5.164786 5.164786 2093 +loup 0 6 5.164786 0.000000 2228 +geoff 0 6 5.164786 0.000000 2124 +eas 0 5 5.347108 0.000000 2267 +alec 0 5 5.347108 0.000000 2563 +rewrit 0 5 5.347108 0.000000 2367 +rocki 1 4 5.568345 5.568345 3048 +etch 1 4 5.568345 5.568345 2755 +increasingli 0 4 5.568345 0.000000 2766 +popular 0 4 5.568345 0.000000 2802 +insight 0 4 5.568345 0.000000 3024 +peoplefaculti 0 3 5.857933 0.000000 3981 +dlee 0 3 5.857933 0.000000 3949 +waynew 0 3 5.857933 0.000000 3982 +microbenchmark 0 2 6.263398 0.000000 5821 +mipsi 0 2 6.263398 0.000000 5882 +papersrom 0 1 6.957497 0.000000 16270 +abstractpostscriptjava 0 1 6.957497 0.000000 16271 +xjava 0 1 6.957497 0.000000 16272 +benchmarkstoolsto 0 1 6.957497 0.000000 16273 +vebeen 0 1 6.957497 0.000000 16274 +yetpublicli 0 1 6.957497 0.000000 16275 +etchhom 0 1 6.957497 0.000000 16276 +documentationproject 0 1 6.957497 0.000000 16277 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..ba5cc6eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +phone 1 175 1.791759 1.791759 45 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +process 1 142 1.945910 1.945910 72 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +machin 0 129 2.079442 0.000000 95 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +imag 1 91 2.397895 2.397895 161 +commun 0 95 2.397895 0.000000 157 +center 0 88 2.397895 0.000000 158 +grade 0 90 2.397895 0.000000 142 +librari 0 87 2.484907 0.000000 181 +help 0 83 2.484907 0.000000 175 +educ 0 86 2.484907 0.000000 191 +intellig 1 72 2.639057 2.639057 225 +nation 1 74 2.639057 2.639057 240 +sieg 0 69 2.708050 0.000000 260 +simul 0 66 2.708050 0.000000 255 +order 0 69 2.708050 0.000000 249 +foundat 0 62 2.772589 0.000000 286 +best 0 59 2.833213 0.000000 299 +summer 0 56 2.890372 0.000000 311 +februari 0 54 2.944439 0.000000 328 +basic 0 50 3.044522 0.000000 360 +offer 0 43 3.178054 0.000000 414 +long 0 43 3.178054 0.000000 413 +obtain 0 33 3.433987 0.000000 534 +richard 1 31 3.496508 3.496508 559 +produc 0 30 3.555348 0.000000 572 +arrai 0 27 3.637586 0.000000 627 +departmentunivers 0 24 3.761200 0.000000 711 +recognit 0 23 3.806662 0.000000 723 +director 0 22 3.850148 0.000000 767 +fund 0 21 3.912023 0.000000 805 +facil 0 20 3.951244 0.000000 814 +increas 0 20 3.951244 0.000000 829 +beauti 0 18 4.060443 0.000000 912 +chateau 0 16 4.174387 0.000000 997 +massiv 0 15 4.248495 0.000000 1026 +charact 0 15 4.248495 0.000000 1028 +optic 0 12 4.465908 0.000000 1221 +minor 0 12 4.465908 0.000000 1237 +island 0 11 4.553877 0.000000 1345 +length 0 10 4.653960 0.000000 1400 +juan 0 9 4.753590 0.000000 1580 +extract 0 8 4.875197 0.000000 1728 +roger 0 7 5.010635 0.000000 1892 +northwest 0 7 5.010635 0.000000 1973 +usaoffic 0 6 5.164786 0.000000 2159 +layout 0 6 5.164786 0.000000 2183 +camp 1 5 5.347108 5.347108 2545 +educomput 0 5 5.347108 0.000000 2524 +cellular 0 5 5.347108 0.000000 2433 +girl 0 5 5.347108 0.000000 2410 +snake 0 5 5.347108 0.000000 2281 +radio 0 4 5.568345 0.000000 3025 +bake 0 2 6.263398 0.000000 4468 +scam 1 1 6.957497 6.957497 16278 +splash 1 1 6.957497 6.957497 16279 +rogersrrog 0 1 6.957497 0.000000 16280 +laboratri 0 1 6.957497 0.000000 16281 +systol 0 1 6.957497 0.000000 16282 +morpholog 0 1 6.957497 0.000000 16283 +groundtruth 0 1 6.957497 0.000000 16284 +environment 0 1 6.957497 0.000000 16285 +ncee 0 1 6.957497 0.000000 16286 +ag 0 1 6.957497 0.000000 16287 +corn 0 1 6.957497 0.000000 16288 +jessica 0 1 6.957497 0.000000 16289 +squishi 0 1 6.957497 0.000000 16290 +kuow 0 1 6.957497 0.000000 16291 +stationi 0 1 6.957497 0.000000 16292 +pecan 0 1 6.957497 0.000000 16293 +seattlelast 0 1 6.957497 0.000000 16294 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..e89a40d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +seattl 0 120 2.079442 0.000000 103 +school 0 84 2.484907 0.000000 188 +stuff 0 87 2.484907 0.000000 171 +sieg 0 69 2.708050 0.000000 260 +friend 0 48 3.044522 0.000000 376 +mike 1 24 3.761200 3.761200 703 +chateau 0 16 4.174387 0.000000 997 +usaoffic 0 6 5.164786 0.000000 2159 +salisburysalisbur 0 1 6.957497 0.000000 16295 +lifehistori 0 1 6.957497 0.000000 16296 +vitacool 0 1 6.957497 0.000000 16297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..21576f6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 3 443 0.693147 2.079441 6 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +languag 0 227 1.386294 0.000000 26 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +implement 0 152 1.791759 0.000000 52 +first 1 140 1.945910 1.945910 71 +support 1 132 1.945910 1.945910 83 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +construct 0 139 1.945910 0.000000 82 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +provid 0 121 2.079442 0.000000 94 +version 1 113 2.197225 2.197225 122 +specif 1 106 2.197225 2.197225 106 +find 0 111 2.197225 0.000000 111 +code 0 108 2.197225 0.000000 116 +topic 0 114 2.197225 0.000000 110 +intern 0 108 2.197225 0.000000 128 +manag 0 114 2.197225 0.000000 125 +need 1 98 2.302585 2.302585 135 +technic 0 100 2.302585 0.000000 140 +user 0 104 2.302585 0.000000 137 +proceed 2 93 2.397895 4.795790 152 +real 1 93 2.397895 2.397895 144 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +control 0 82 2.484907 0.000000 164 +thing 0 84 2.484907 0.000000 189 +novemb 0 81 2.484907 0.000000 179 +ieee 0 86 2.484907 0.000000 190 +issu 1 78 2.564949 2.564949 211 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +servic 1 72 2.639057 2.639057 236 +write 0 72 2.639057 0.000000 222 +multimedia 1 68 2.708050 2.708050 258 +integr 0 67 2.708050 0.000000 245 +abstract 1 62 2.772589 2.772589 276 +januari 1 62 2.772589 2.772589 264 +march 0 61 2.833213 0.000000 295 +best 0 59 2.833213 0.000000 299 +direct 0 57 2.890372 0.000000 316 +extens 1 53 2.944439 2.944439 340 +talk 1 53 2.944439 2.944439 336 +processor 1 54 2.944439 2.944439 335 +sampl 0 53 2.944439 0.000000 339 +right 0 48 3.044522 0.000000 363 +principl 0 48 3.044522 0.000000 357 +frequent 0 49 3.044522 0.000000 367 +adapt 0 46 3.091042 0.000000 387 +third 0 43 3.178054 0.000000 412 +review 0 42 3.218876 0.000000 425 +music 0 42 3.218876 0.000000 436 +slide 1 38 3.295837 3.295837 467 +industri 1 38 3.295837 3.295837 464 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +winter 0 36 3.367296 0.000000 500 +tech 1 35 3.401197 3.401197 515 +post 0 35 3.401197 0.000000 505 +independ 0 32 3.465736 0.000000 548 +platform 0 29 3.583519 0.000000 591 +american 1 27 3.637586 3.637586 634 +quit 0 27 3.637586 0.000000 633 +arrai 0 27 3.637586 0.000000 627 +disk 0 22 3.850148 0.000000 747 +similar 0 21 3.912023 0.000000 771 +reserv 1 20 3.951244 3.951244 808 +safeti 0 20 3.951244 0.000000 817 +longer 0 20 3.951244 0.000000 816 +histori 0 19 4.007333 0.000000 853 +boston 0 19 4.007333 0.000000 862 +bershad 1 18 4.060443 4.060443 902 +protect 0 17 4.110874 0.000000 935 +modern 0 16 4.174387 0.000000 966 +match 0 16 4.174387 0.000000 965 +diego 0 16 4.174387 0.000000 992 +fourth 0 16 4.174387 0.000000 999 +spin 1 14 4.317488 4.317488 1121 +rank 0 14 4.317488 0.000000 1063 +mellon 0 13 4.382027 0.000000 1179 +usenix 1 12 4.465908 4.465908 1240 +rest 0 12 4.465908 0.000000 1259 +carnegi 0 12 4.465908 0.000000 1260 +statement 0 11 4.553877 0.000000 1313 +island 0 11 4.553877 0.000000 1345 +rich 0 10 4.653960 0.000000 1396 +sosp 0 10 4.653960 0.000000 1416 +mountain 0 10 4.653960 0.000000 1456 +santa 0 10 4.653960 0.000000 1441 +inter 0 9 4.753590 0.000000 1530 +modula 0 9 4.753590 0.000000 1613 +osdi 0 9 4.753590 0.000000 1534 +capac 1 8 4.875197 4.875197 1740 +mach 1 8 4.875197 4.875197 1669 +ride 0 8 4.875197 0.000000 1741 +isol 0 8 4.875197 0.000000 1663 +sigop 0 8 4.875197 0.000000 1727 +european 0 8 4.875197 0.000000 1763 +cultur 0 7 5.010635 0.000000 1951 +migrat 0 7 5.010635 0.000000 1851 +centuri 0 7 5.010635 0.000000 1935 +microprocessor 0 7 5.010635 0.000000 1808 +trend 0 7 5.010635 0.000000 1842 +fifth 0 7 5.010635 0.000000 1931 +sixth 0 7 5.010635 0.000000 1917 +band 0 6 5.164786 0.000000 2198 +unpublish 0 6 5.164786 0.000000 2226 +usag 0 6 5.164786 0.000000 2209 +favor 0 5 5.347108 0.000000 2414 +panel 0 5 5.347108 0.000000 2463 +savag 1 4 5.568345 5.568345 2777 +microkernel 1 4 5.568345 5.568345 3047 +afraid 1 4 5.568345 5.568345 3053 +gradual 0 4 5.568345 0.000000 2997 +witha 0 4 5.568345 0.000000 2617 +firm 0 4 5.568345 0.000000 2684 +peer 0 4 5.568345 0.000000 2742 +andimplement 0 4 5.568345 0.000000 3029 +redund 0 4 5.568345 0.000000 2839 +stefan 1 3 5.857933 5.857933 3921 +wcsss 1 3 5.857933 5.857933 3956 +caught 0 3 5.857933 0.000000 3465 +irrelev 0 3 5.857933 0.000000 3823 +inconveni 0 3 5.857933 0.000000 3866 +distract 0 3 5.857933 0.000000 3945 +stillmaintain 0 3 5.857933 0.000000 3964 +copper 0 3 5.857933 0.000000 3536 +hoto 0 3 5.857933 0.000000 3577 +orca 0 3 5.857933 0.000000 3578 +tucson 1 2 6.263398 6.263398 5883 +fool 0 2 6.263398 0.000000 5353 +ofappl 0 2 6.263398 0.000000 6002 +whichsupport 0 2 6.263398 0.000000 6003 +monterei 0 2 6.263398 0.000000 4362 +wwo 0 2 6.263398 0.000000 5812 +export 0 2 6.263398 0.000000 5689 +pittsburghfor 0 1 6.957497 0.000000 16298 +mnow 0 1 6.957497 0.000000 16299 +strongbackground 0 1 6.957497 0.000000 16300 +trash 0 1 6.957497 0.000000 16301 +tocqeuvil 0 1 6.957497 0.000000 16302 +tiresom 0 1 6.957497 0.000000 16303 +exercisepolit 0 1 6.957497 0.000000 16304 +tocurr 0 1 6.957497 0.000000 16305 +merri 0 1 6.957497 0.000000 16306 +onan 0 1 6.957497 0.000000 16307 +projectsspinspin 0 1 6.957497 0.000000 16308 +omnifemtokernel 0 1 6.957497 0.000000 16309 +writingspin 0 1 6.957497 0.000000 16310 +napa 0 1 6.957497 0.000000 16311 +timer 0 1 6.957497 0.000000 16312 +hikingthi 0 1 6.957497 0.000000 16313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..cd654326 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +david 0 71 2.639057 0.000000 232 +juli 0 60 2.833213 0.000000 305 +revis 0 26 3.688879 0.000000 640 +sean 1 8 4.875197 4.875197 1705 +sandi 1 4 5.568345 5.568345 2765 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..be243470 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +washington 0 236 1.386294 0.000000 32 +public 0 202 1.609438 0.000000 43 +seattl 0 120 2.079442 0.000000 103 +postscript 0 131 2.079442 0.000000 90 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +internet 0 83 2.484907 0.000000 186 +overview 0 56 2.890372 0.000000 323 +better 0 45 3.135494 0.000000 401 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +richard 1 31 3.496508 3.496508 559 +famili 0 23 3.806662 0.000000 735 +half 0 21 3.912023 0.000000 776 +washingtonbox 0 13 4.382027 0.000000 1200 +ski 0 10 4.653960 0.000000 1471 +softbal 0 9 4.753590 0.000000 1594 +softbot 0 7 5.010635 0.000000 1974 +amus 0 5 5.347108 0.000000 2366 +racquetbal 0 4 5.568345 0.000000 3052 +biographi 0 3 5.857933 0.000000 3658 +brute 0 2 6.263398 0.000000 5892 +bicycl 0 2 6.263398 0.000000 5950 +segal 1 1 6.957497 6.957497 16314 +segaldepart 0 1 6.957497 0.000000 16315 +archeri 0 1 6.957497 0.000000 16316 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..2b771d2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +email 0 220 1.386294 0.000000 29 +phone 1 175 1.791759 1.791759 45 +implement 1 152 1.791759 1.791759 52 +address 0 170 1.791759 0.000000 62 +algorithm 0 162 1.791759 0.000000 57 +avail 0 169 1.791759 0.000000 48 +hall 0 146 1.945910 0.000000 65 +like 0 132 1.945910 0.000000 81 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +spring 0 131 2.079442 0.000000 88 +machin 0 129 2.079442 0.000000 95 +studi 0 120 2.079442 0.000000 91 +compil 0 122 2.079442 0.000000 96 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +code 0 108 2.197225 0.000000 116 +peopl 0 96 2.302585 0.000000 132 +pictur 1 89 2.397895 2.397895 160 +activ 1 84 2.484907 2.484907 182 +school 1 84 2.484907 2.484907 188 +solut 1 82 2.484907 2.484907 162 +contain 0 81 2.484907 0.000000 174 +complet 1 77 2.564949 2.564949 208 +come 1 78 2.564949 2.564949 202 +sourc 0 77 2.564949 0.000000 201 +resum 0 79 2.564949 0.000000 217 +line 0 75 2.639057 0.000000 231 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +sieg 0 69 2.708050 0.000000 260 +share 1 59 2.833213 2.833213 304 +content 0 59 2.833213 0.000000 302 +hardwar 1 51 2.995732 2.995732 350 +finger 0 52 2.995732 0.000000 354 +date 0 51 2.995732 0.000000 344 +without 0 50 3.044522 0.000000 370 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +even 0 45 3.135494 0.000000 393 +around 0 43 3.178054 0.000000 415 +past 1 42 3.218876 3.218876 428 +linear 0 41 3.218876 0.000000 431 +field 0 37 3.332205 0.000000 482 +expect 0 37 3.332205 0.000000 484 +print 1 34 3.401197 3.401197 503 +return 0 34 3.401197 0.000000 502 +weather 0 28 3.610918 0.000000 618 +bookmark 0 26 3.688879 0.000000 639 +toward 0 25 3.737670 0.000000 668 +sometim 0 24 3.761200 0.000000 696 +honor 0 23 3.806662 0.000000 729 +sort 0 22 3.850148 0.000000 738 +born 0 21 3.912023 0.000000 798 +unit 0 21 3.912023 0.000000 779 +els 1 19 4.007333 4.007333 843 +comparison 1 19 4.007333 4.007333 863 +north 0 19 4.007333 0.000000 873 +germani 0 17 4.110874 0.000000 946 +bachelor 0 17 4.110874 0.000000 957 +qual 0 15 4.248495 0.000000 1062 +squar 0 14 4.317488 0.000000 1082 +warn 0 14 4.317488 0.000000 1068 +excit 0 11 4.553877 0.000000 1329 +moment 0 11 4.553877 0.000000 1379 +didn 0 9 4.753590 0.000000 1563 +exact 0 9 4.753590 0.000000 1509 +reduct 0 7 5.010635 0.000000 1877 +byte 1 6 5.164786 5.164786 2108 +indiana 0 6 5.164786 0.000000 2057 +trail 0 6 5.164786 0.000000 2071 +fewer 0 6 5.164786 0.000000 2074 +fals 1 4 5.568345 5.568345 2861 +raft 0 4 5.568345 0.000000 3060 +crazi 0 4 5.568345 0.000000 2822 +shouldn 0 4 5.568345 0.000000 2606 +stefan 1 3 5.857933 5.857933 3921 +char 1 2 6.263398 6.263398 4716 +berg 0 2 6.263398 0.000000 4970 +reif 0 2 6.263398 0.000000 5015 +diploma 0 2 6.263398 0.000000 5990 +bloomington 0 2 6.263398 0.000000 5034 +itin 0 2 6.263398 0.000000 5992 +printf 1 1 6.957497 6.957497 16317 +cologn 1 1 6.957497 6.957497 16318 +putchar 1 1 6.957497 6.957497 16319 +bergstefan 0 1 6.957497 0.000000 16320 +sgberg 0 1 6.957497 0.000000 16321 +mittler 0 1 6.957497 0.000000 16322 +thgrade 0 1 6.957497 0.000000 16323 +schillergymnasium 0 1 6.957497 0.000000 16324 +statesto 0 1 6.957497 0.000000 16325 +distinctionin 0 1 6.957497 0.000000 16326 +fromindiana 0 1 6.957497 0.000000 16327 +momenth 0 1 6.957497 0.000000 16328 +thiscenturi 0 1 6.957497 0.000000 16329 +yourselfsometh 0 1 6.957497 0.000000 16330 +particularsolut 0 1 6.957497 0.000000 16331 +sall 0 1 6.957497 0.000000 16332 +carriag 0 1 6.957497 0.000000 16333 +inpostscript 0 1 6.957497 0.000000 16334 +andtex 0 1 6.957497 0.000000 16335 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..dc309b58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +recent 0 167 1.791759 0.000000 58 +contact 0 153 1.791759 0.000000 59 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +schedul 0 119 2.079442 0.000000 85 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +make 0 111 2.197225 0.000000 120 +pictur 1 89 2.397895 2.397895 160 +follow 0 92 2.397895 0.000000 143 +mani 0 92 2.397895 0.000000 150 +graphic 0 90 2.397895 0.000000 147 +imag 0 91 2.397895 0.000000 161 +thing 1 84 2.484907 2.484907 189 +info 0 85 2.484907 0.000000 176 +wide 0 84 2.484907 0.000000 185 +differ 0 66 2.708050 0.000000 253 +interact 0 62 2.772589 0.000000 270 +complex 0 64 2.772589 0.000000 269 +laboratori 0 63 2.772589 0.000000 292 +plan 0 65 2.772589 0.000000 272 +browser 0 56 2.890372 0.000000 313 +get 0 46 3.091042 0.000000 380 +done 0 47 3.091042 0.000000 381 +third 0 43 3.178054 0.000000 412 +least 0 35 3.401197 0.000000 516 +go 0 33 3.433987 0.000000 529 +anim 0 31 3.496508 0.000000 557 +travel 0 30 3.555348 0.000000 579 +daili 0 24 3.761200 0.000000 706 +grad 0 20 3.951244 0.000000 837 +left 0 19 4.007333 0.000000 851 +lower 0 18 4.060443 0.000000 886 +lot 0 18 4.060443 0.000000 889 +scene 0 14 4.317488 0.000000 1114 +island 0 11 4.553877 0.000000 1345 +siggraph 0 8 4.875197 0.000000 1773 +corner 0 7 5.010635 0.000000 1909 +pixel 0 4 5.568345 0.000000 2831 +shortcut 0 3 5.857933 0.000000 3932 +ward 0 2 6.263398 0.000000 4506 +hereat 0 2 6.263398 0.000000 5048 +shadegreet 0 1 6.957497 0.000000 16336 +salut 0 1 6.957497 0.000000 16337 +dubcs 0 1 6.957497 0.000000 16338 +renderingof 0 1 6.957497 0.000000 16339 +walkthruproject 0 1 6.957497 0.000000 16340 +amonglot 0 1 6.957497 0.000000 16341 +paperdescrib 0 1 6.957497 0.000000 16342 +thepictur 0 1 6.957497 0.000000 16343 +aspectsof 0 1 6.957497 0.000000 16344 +thegraph 0 1 6.957497 0.000000 16345 +scrunch 0 1 6.957497 0.000000 16346 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..a824a83c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +summari 0 73 2.639057 0.000000 237 +prof 0 64 2.772589 0.000000 273 +januari 0 62 2.772589 0.000000 264 +pointer 0 48 3.044522 0.000000 368 +vita 0 38 3.295837 0.000000 473 +curriculum 0 33 3.433987 0.000000 535 +request 0 26 3.688879 0.000000 635 +ofwashington 0 22 3.850148 0.000000 766 +upon 0 16 4.174387 0.000000 978 +andengin 0 4 5.568345 0.000000 3042 +shun 1 2 6.263398 6.263398 4533 +leung 1 2 6.263398 6.263398 4534 +johnzahorjan 0 2 6.263398 0.000000 6004 +leungshun 0 1 6.957497 0.000000 16347 +shuntak 0 1 6.957497 0.000000 16348 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..ced4f836 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +relat 0 139 1.945910 0.000000 68 +studi 1 120 2.079442 2.079442 91 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +mathemat 0 108 2.197225 0.000000 123 +look 0 107 2.197225 0.000000 115 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +call 0 91 2.397895 0.000000 153 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +start 0 83 2.484907 0.000000 173 +info 0 85 2.484907 0.000000 176 +interfac 0 79 2.564949 0.000000 209 +involv 1 71 2.639057 2.639057 227 +name 0 72 2.639057 0.000000 220 +degre 0 69 2.708050 0.000000 259 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +origin 0 38 3.295837 0.000000 472 +mean 0 37 3.332205 0.000000 477 +dissert 0 32 3.465736 0.000000 549 +quit 0 27 3.637586 0.000000 633 +notic 0 25 3.737670 0.000000 675 +departmentunivers 0 24 3.761200 0.000000 711 +ofwashington 0 22 3.850148 0.000000 766 +hypertext 0 19 4.007333 0.000000 865 +figur 0 18 4.060443 0.000000 903 +less 0 18 4.060443 0.000000 892 +universityof 0 15 4.248495 0.000000 1061 +anywai 0 15 4.248495 0.000000 1047 +neat 0 12 4.465908 0.000000 1263 +virginia 0 8 4.875197 0.000000 1659 +studentcomput 0 7 5.010635 0.000000 1963 +foreign 0 7 5.010635 0.000000 1919 +smaller 0 7 5.010635 0.000000 1874 +hunt 0 7 5.010635 0.000000 1798 +bug 0 7 5.010635 0.000000 1801 +slate 0 6 5.164786 0.000000 2021 +sciencedepart 0 6 5.164786 0.000000 2172 +haven 1 4 5.568345 5.568345 3037 +asian 1 3 5.857933 5.857933 3598 +heavili 0 3 5.857933 0.000000 3572 +groupand 0 3 5.857933 0.000000 3873 +shuichi 1 2 6.263398 6.263398 4498 +myqual 0 2 6.263398 0.000000 6005 +degreein 0 2 6.263398 0.000000 5116 +koga 1 1 6.957497 6.957497 16349 +skoga 1 1 6.957497 6.957497 16350 +bynow 0 1 6.957497 0.000000 16351 +alsoheavili 0 1 6.957497 0.000000 16352 +andgovern 0 1 6.957497 0.000000 16353 +alic 0 1 6.957497 0.000000 16354 +destroi 0 1 6.957497 0.000000 16355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..b242db44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +year 0 148 1.945910 0.000000 84 +professor 0 137 1.945910 0.000000 76 +schedul 0 119 2.079442 0.000000 85 +seattl 0 120 2.079442 0.000000 103 +site 0 106 2.197225 0.000000 119 +school 0 84 2.484907 0.000000 188 +educ 0 86 2.484907 0.000000 191 +dept 1 64 2.772589 2.772589 291 +previou 0 62 2.772589 0.000000 290 +experi 0 64 2.772589 0.000000 283 +profession 0 51 2.995732 0.000000 345 +littl 0 39 3.258097 0.000000 454 +photo 0 31 3.496508 0.000000 561 +busi 0 21 3.912023 0.000000 784 +vlsi 0 21 3.912023 0.000000 795 +weekli 0 17 4.110874 0.000000 919 +circuit 0 13 4.382027 0.000000 1131 +galleri 0 13 4.382027 0.000000 1192 +chao 0 8 4.875197 0.000000 1753 +patent 0 5 5.347108 0.000000 2574 +soha 1 2 6.263398 6.263398 6006 +hassoun 0 2 6.263398 0.000000 6007 +retim 0 2 6.263398 0.000000 6008 +hassounit 0 1 6.957497 0.000000 16356 +whoturn 0 1 6.957497 0.000000 16357 +onarchitectur 0 1 6.957497 0.000000 16358 +carlebel 0 1 6.957497 0.000000 16359 +deede 0 1 6.957497 0.000000 16360 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..5a53f9e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +cornel 0 215 1.386294 0.000000 23 +class 0 199 1.609438 0.000000 37 +contact 0 153 1.791759 0.000000 59 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +hall 0 146 1.945910 0.000000 65 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +seattl 1 120 2.079442 2.079442 103 +report 0 131 2.079442 0.000000 92 +pleas 0 113 2.197225 0.000000 114 +person 0 111 2.197225 0.000000 117 +search 0 95 2.397895 0.000000 155 +associ 0 93 2.397895 0.000000 151 +control 0 82 2.484907 0.000000 164 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +sieg 0 69 2.708050 0.000000 260 +evalu 0 64 2.772589 0.000000 266 +particular 0 51 2.995732 0.000000 352 +right 0 48 3.044522 0.000000 363 +quarter 0 47 3.091042 0.000000 389 +done 0 47 3.091042 0.000000 381 +favorit 1 44 3.135494 3.135494 410 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +music 0 42 3.218876 0.000000 436 +seminar 1 38 3.295837 3.295837 470 +procedur 0 36 3.367296 0.000000 488 +random 0 34 3.401197 0.000000 511 +bookmark 0 26 3.688879 0.000000 639 +sport 0 25 3.737670 0.000000 683 +sort 0 22 3.850148 0.000000 738 +fund 0 21 3.912023 0.000000 805 +corpor 0 21 3.912023 0.000000 802 +tenni 0 20 3.951244 0.000000 838 +chateau 0 16 4.174387 0.000000 997 +cognit 0 16 4.174387 0.000000 986 +action 0 15 4.248495 0.000000 1038 +track 0 15 4.248495 0.000000 1029 +psycholog 0 15 4.248495 0.000000 1054 +convent 0 14 4.317488 0.000000 1072 +philosophi 0 13 4.382027 0.000000 1167 +danc 1 12 4.465908 4.465908 1278 +food 1 12 4.465908 4.465908 1285 +emploi 0 12 4.465908 0.000000 1284 +remov 0 12 4.465908 0.000000 1225 +island 0 11 4.553877 0.000000 1345 +peter 0 11 4.553877 0.000000 1316 +french 1 9 4.753590 4.753590 1511 +volleybal 0 9 4.753590 0.000000 1598 +soccer 1 8 4.875197 4.875197 1752 +guggenheim 0 8 4.875197 0.000000 1759 +simon 0 8 4.875197 0.000000 1697 +coast 0 8 4.875197 0.000000 1746 +softbot 1 7 5.010635 5.010635 1974 +squash 0 6 5.164786 0.000000 2223 +rock 0 6 5.164786 0.000000 2164 +annex 0 5 5.347108 0.000000 2572 +sail 0 5 5.347108 0.000000 2571 +east 0 5 5.347108 0.000000 2472 +phil 0 5 5.347108 0.000000 2419 +oracl 0 4 5.568345 0.000000 2823 +swing 0 4 5.568345 0.000000 2887 +floyd 0 4 5.568345 0.000000 2682 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +ballroom 0 3 5.857933 0.000000 3983 +spud 0 2 6.263398 0.000000 6009 +chicken 0 2 6.263398 0.000000 5851 +tango 0 2 6.263398 0.000000 6010 +salsa 0 2 6.263398 0.000000 5984 +strait 0 2 6.263398 0.000000 5980 +genesi 0 2 6.263398 0.000000 6011 +gabriel 0 2 6.263398 0.000000 5029 +simultaneousmultithread 0 1 6.957497 0.000000 16361 +tomultithread 0 1 6.957497 0.000000 16362 +controlsystem 0 1 6.957497 0.000000 16363 +patio 0 1 6.957497 0.000000 16364 +workspac 0 1 6.957497 0.000000 16365 +stottler 0 1 6.957497 0.000000 16366 +henk 0 1 6.957497 0.000000 16367 +oondhiu 0 1 6.957497 0.000000 16368 +mango 0 1 6.957497 0.000000 16369 +phad 0 1 6.957497 0.000000 16370 +thai 0 1 6.957497 0.000000 16371 +kung 0 1 6.957497 0.000000 16372 +beverag 0 1 6.957497 0.000000 16373 +screwdriv 0 1 6.957497 0.000000 16374 +scotch 0 1 6.957497 0.000000 16375 +ic 0 1 6.957497 0.000000 16376 +dire 0 1 6.957497 0.000000 16377 +pink 0 1 6.957497 0.000000 16378 +collin 0 1 6.957497 0.000000 16379 +petti 0 1 6.957497 0.000000 16380 +sparekh 0 1 6.957497 0.000000 16381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..1d7da619 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +offic 0 299 1.098612 0.000000 13 +washington 0 236 1.386294 0.000000 32 +link 0 247 1.386294 0.000000 24 +address 0 170 1.791759 0.000000 62 +machin 0 129 2.079442 0.000000 95 +peopl 0 96 2.302585 0.000000 132 +academ 0 82 2.484907 0.000000 178 +name 0 72 2.639057 0.000000 220 +improv 0 62 2.772589 0.000000 289 +colleg 0 61 2.833213 0.000000 300 +friend 0 48 3.044522 0.000000 376 +compani 0 41 3.218876 0.000000 423 +live 0 40 3.258097 0.000000 451 +power 0 30 3.555348 0.000000 573 +sport 0 25 3.737670 0.000000 683 +famili 0 23 3.806662 0.000000 735 +almost 0 22 3.850148 0.000000 742 +hous 0 21 3.912023 0.000000 801 +speed 1 18 4.060443 4.060443 911 +ultim 0 17 4.110874 0.000000 943 +drive 0 15 4.248495 0.000000 1052 +comic 0 14 4.317488 0.000000 1103 +food 0 12 4.465908 0.000000 1285 +mari 0 12 4.465908 0.000000 1266 +lake 0 11 4.553877 0.000000 1373 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +ski 0 10 4.653960 0.000000 1471 +drink 0 9 4.753590 0.000000 1607 +softbal 0 9 4.753590 0.000000 1594 +utah 0 9 4.753590 0.000000 1585 +erik 1 8 4.875197 4.875197 1701 +lewi 0 8 4.875197 0.000000 1700 +star 0 8 4.875197 0.000000 1717 +brain 0 8 4.875197 0.000000 1638 +babylon 0 8 4.875197 0.000000 1731 +cultur 0 7 5.010635 0.000000 1951 +fish 0 6 5.164786 0.000000 2207 +selberg 0 5 5.347108 0.000000 2441 +salt 0 5 5.347108 0.000000 2413 +bean 0 4 5.568345 0.000000 2968 +lara 0 3 5.857933 0.000000 3914 +disc 0 2 6.263398 0.000000 5626 +spud 0 2 6.263398 0.000000 6009 +raquetbal 0 2 6.263398 0.000000 6012 +pepper 0 2 6.263398 0.000000 6013 +war 0 2 6.263398 0.000000 5969 +toon 0 2 6.263398 0.000000 4120 +fishcam 0 1 6.957497 0.000000 16382 +memorialhappi 0 1 6.957497 0.000000 16383 +kay 0 1 6.957497 0.000000 16384 +pasti 0 1 6.957497 0.000000 16385 +ur 0 1 6.957497 0.000000 16386 +pro 0 1 6.957497 0.000000 16387 +wedgwood 0 1 6.957497 0.000000 16388 +diet 0 1 6.957497 0.000000 16389 +roast 0 1 6.957497 0.000000 16390 +bagel 0 1 6.957497 0.000000 16391 +racer 0 1 6.957497 0.000000 16392 +tini 0 1 6.957497 0.000000 16393 +pinki 0 1 6.957497 0.000000 16394 +phantom 0 1 6.957497 0.000000 16395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..dd318d1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +includ 0 208 1.609438 0.000000 42 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +read 0 154 1.791759 0.000000 47 +like 1 132 1.945910 1.945910 81 +year 0 148 1.945910 0.000000 84 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +seattl 0 120 2.079442 0.000000 103 +final 0 116 2.197225 0.000000 108 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +world 0 115 2.197225 0.000000 126 +take 1 97 2.302585 2.302585 134 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +graphic 0 90 2.397895 0.000000 147 +sinc 0 90 2.397895 0.000000 159 +second 0 81 2.484907 0.000000 166 +good 1 77 2.564949 2.564949 200 +optim 0 79 2.564949 0.000000 197 +come 0 78 2.564949 0.000000 202 +involv 0 71 2.639057 0.000000 227 +addit 0 74 2.639057 0.000000 228 +simul 1 66 2.708050 2.708050 255 +goal 0 66 2.708050 0.000000 250 +would 0 67 2.708050 0.000000 251 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +result 0 65 2.772589 0.000000 281 +plai 1 60 2.833213 2.833213 307 +colleg 0 61 2.833213 0.000000 300 +think 0 57 2.890372 0.000000 314 +processor 0 54 2.944439 0.000000 335 +visual 0 48 3.044522 0.000000 372 +quarter 0 47 3.091042 0.000000 389 +anoth 0 45 3.135494 0.000000 408 +late 0 40 3.258097 0.000000 439 +littl 0 39 3.258097 0.000000 454 +movi 0 40 3.258097 0.000000 459 +must 0 40 3.258097 0.000000 442 +game 0 36 3.367296 0.000000 498 +independ 0 32 3.465736 0.000000 548 +quit 0 27 3.637586 0.000000 633 +enjoi 0 26 3.688879 0.000000 660 +primari 0 25 3.737670 0.000000 669 +cooper 0 22 3.850148 0.000000 757 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +watch 1 21 3.912023 3.912023 789 +divis 1 21 3.912023 3.912023 803 +mostli 0 19 4.007333 0.000000 869 +exercis 0 19 4.007333 0.000000 842 +left 0 19 4.007333 0.000000 851 +listen 0 18 4.060443 0.000000 907 +front 0 13 4.382027 0.000000 1154 +came 0 13 4.382027 0.000000 1197 +unfortun 0 13 4.382027 0.000000 1170 +weight 0 12 4.465908 0.000000 1204 +hang 0 9 4.753590 0.000000 1499 +drink 0 9 4.753590 0.000000 1607 +swim 0 9 4.753590 0.000000 1599 +router 0 8 4.875197 0.000000 1772 +soccer 0 8 4.875197 0.000000 1752 +sung 1 6 5.164786 5.164786 2075 +superscalar 0 6 5.164786 0.000000 2082 +seen 0 6 5.164786 0.000000 2202 +yeah 0 6 5.164786 0.000000 2195 +ta 0 4 5.568345 0.000000 3058 +choi 0 4 5.568345 0.000000 2732 +astronomi 0 3 5.857933 0.000000 3974 +comfort 0 3 5.857933 0.000000 3136 +wine 0 3 5.857933 0.000000 3895 +knee 0 3 5.857933 0.000000 3980 +choiwelcom 0 2 6.263398 0.000000 5727 +ironman 0 2 6.263398 0.000000 4226 +vegetarian 0 2 6.263398 0.000000 5902 +season 0 2 6.263398 0.000000 4872 +scrub 1 1 6.957497 6.957497 16396 +thehomepag 0 1 6.957497 0.000000 16397 +ofsung 0 1 6.957497 0.000000 16398 +eunchoi 0 1 6.957497 0.000000 16399 +myschoollifemi 0 1 6.957497 0.000000 16400 +zplcompil 0 1 6.957497 0.000000 16401 +beenspend 0 1 6.957497 0.000000 16402 +communicationgener 0 1 6.957497 0.000000 16403 +architechtur 0 1 6.957497 0.000000 16404 +communicationlibrari 0 1 6.957497 0.000000 16405 +programson 0 1 6.957497 0.000000 16406 +nodeperform 0 1 6.957497 0.000000 16407 +alsobeen 0 1 6.957497 0.000000 16408 +chaosrout 0 1 6.957497 0.000000 16409 +thatexperi 0 1 6.957497 0.000000 16410 +inzpl 0 1 6.957497 0.000000 16411 +myjunior 0 1 6.957497 0.000000 16412 +dinner 0 1 6.957497 0.000000 16413 +samewithout 0 1 6.957497 0.000000 16414 +twosoccerteam 0 1 6.957497 0.000000 16415 +cousin 0 1 6.957497 0.000000 16416 +recdivis 0 1 6.957497 0.000000 16417 +andcoop 0 1 6.957497 0.000000 16418 +sacrifiedmi 0 1 6.957497 0.000000 16419 +usualstep 0 1 6.957497 0.000000 16420 +aerobicsclass 0 1 6.957497 0.000000 16421 +trainingclass 0 1 6.957497 0.000000 16422 +abit 0 1 6.957497 0.000000 16423 +shakespear 0 1 6.957497 0.000000 16424 +publictelevis 0 1 6.957497 0.000000 16425 +classicalmus 0 1 6.957497 0.000000 16426 +myotherlif 0 1 6.957497 0.000000 16427 +sungeun 0 1 6.957497 0.000000 16428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..6138b31a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +support 0 132 1.945910 0.000000 83 +schedul 1 119 2.079442 2.079442 85 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +world 0 115 2.197225 0.000000 126 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +real 0 93 2.397895 0.000000 144 +help 0 83 2.484907 0.000000 175 +build 0 85 2.484907 0.000000 184 +complet 0 77 2.564949 0.000000 208 +advisor 0 51 2.995732 0.000000 355 +run 0 51 2.995732 0.000000 347 +visual 0 48 3.044522 0.000000 372 +execut 0 45 3.135494 0.000000 404 +workstat 0 37 3.332205 0.000000 479 +secur 0 30 3.555348 0.000000 577 +measur 0 28 3.610918 0.000000 609 +decis 0 23 3.806662 0.000000 728 +runtim 1 19 4.007333 4.007333 858 +partial 0 18 4.060443 0.000000 900 +engineeringunivers 0 17 4.110874 0.000000 959 +washingtonbox 0 13 4.382027 0.000000 1200 +characterist 0 12 4.465908 0.000000 1257 +multiprogram 0 6 5.164786 0.000000 2010 +nguyen 1 3 5.857933 5.857933 3290 +andparallel 0 2 6.263398 0.000000 6014 +johnzahorjan 0 2 6.263398 0.000000 6004 +soft 0 2 6.263398 0.000000 5072 +idl 0 2 6.263398 0.000000 4256 +ofappl 0 2 6.263398 0.000000 6002 +tominim 0 2 6.263398 0.000000 5436 +multiprocessorsenviron 1 1 6.957497 6.957497 16429 +frommi 0 1 6.957497 0.000000 16430 +timeappl 0 1 6.957497 0.000000 16431 +innow 0 1 6.957497 0.000000 16432 +uniprogram 0 1 6.957497 0.000000 16433 +goodglob 0 1 6.957497 0.000000 16434 +cvpublic 0 1 6.957497 0.000000 16435 +worldvietnameseresourc 0 1 6.957497 0.000000 16436 +netcyclingplayground 0 1 6.957497 0.000000 16437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..5a33ba8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +click 0 142 1.945910 0.000000 78 +part 0 98 2.302585 0.000000 129 +homepag 0 93 2.397895 0.000000 148 +html 0 75 2.639057 0.000000 235 +browser 0 56 2.890372 0.000000 313 +standard 0 48 3.044522 0.000000 365 +even 0 45 3.135494 0.000000 393 +frame 1 24 3.761200 3.761200 684 +yellow 0 9 4.753590 0.000000 1601 +turkei 0 4 5.568345 0.000000 2914 +tian 0 3 5.857933 0.000000 3680 +homepageyour 0 1 6.957497 0.000000 16438 +rusti 0 1 6.957497 0.000000 16439 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..3e8dcfeb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +work 2 380 0.693147 1.386294 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +oper 1 180 1.609438 1.609438 34 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +distribut 2 162 1.791759 3.583518 51 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +parallel 0 169 1.791759 0.000000 60 +object 2 138 1.945910 3.891820 79 +area 1 144 1.945910 1.945910 80 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +databas 1 122 2.079442 2.079442 86 +seattl 0 120 2.079442 0.000000 103 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +intern 0 108 2.197225 0.000000 128 +user 1 104 2.302585 2.302585 137 +techniqu 0 99 2.302585 0.000000 138 +octob 1 89 2.397895 2.397895 156 +follow 0 92 2.397895 0.000000 143 +proceed 0 93 2.397895 0.000000 152 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +environ 0 84 2.484907 0.000000 177 +orient 1 80 2.564949 2.564949 205 +interfac 0 79 2.564949 0.000000 209 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +workshop 2 71 2.639057 5.278114 239 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +organ 1 65 2.772589 2.772589 265 +creat 0 63 2.772589 0.000000 277 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +space 1 57 2.890372 2.890372 310 +sever 0 56 2.890372 0.000000 322 +advisor 1 51 2.995732 2.995732 355 +profession 0 51 2.995732 0.000000 345 +past 0 42 3.218876 0.000000 428 +multipl 0 39 3.258097 0.000000 453 +close 0 38 3.295837 0.000000 465 +singl 1 34 3.401197 3.401197 510 +global 0 34 3.401197 0.000000 520 +measur 1 28 3.610918 3.610918 609 +except 1 28 3.610918 3.610918 607 +full 0 28 3.610918 0.000000 615 +proc 0 26 3.688879 0.000000 649 +jeff 0 25 3.737670 0.000000 673 +primari 0 25 3.737670 0.000000 669 +handl 1 24 3.761200 3.761200 685 +basi 0 20 3.951244 0.000000 828 +benchmark 1 19 4.007333 4.007333 859 +mostli 0 19 4.007333 0.000000 869 +behavior 1 18 4.060443 4.060443 881 +fourth 0 16 4.174387 0.000000 999 +across 0 16 4.174387 0.000000 974 +levi 1 14 4.317488 4.317488 1093 +opportun 0 13 4.382027 0.000000 1161 +workload 1 12 4.465908 4.465908 1210 +infrastructur 0 12 4.465908 0.000000 1234 +hank 0 12 4.465908 0.000000 1253 +career 0 12 4.465908 0.000000 1287 +persist 1 11 4.553877 4.553877 1367 +architect 0 8 4.875197 0.000000 1624 +character 0 8 4.875197 0.000000 1767 +oop 0 8 4.875197 0.000000 1778 +oopsla 1 6 5.164786 5.164786 2221 +spare 0 6 5.164786 0.000000 2177 +tiwari 2 5 5.347108 10.694216 2385 +opal 1 4 5.568345 5.568345 3057 +chase 1 4 5.568345 5.568345 2897 +narasayya 0 4 5.568345 0.000000 3065 +boe 0 3 5.857933 0.000000 3318 +addendum 0 3 5.857933 0.000000 3150 +ashutosh 1 2 6.263398 6.263398 5966 +projectsopali 0 1 6.957497 0.000000 16440 +thisexperi 0 1 6.957497 0.000000 16441 +distrbut 0 1 6.957497 0.000000 16442 +ecoop 0 1 6.957497 0.000000 16443 +bosch 0 1 6.957497 0.000000 16444 +messeng 0 1 6.957497 0.000000 16445 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..6517b5c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +avail 0 169 1.791759 0.000000 48 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +seattl 0 120 2.079442 0.000000 103 +person 0 111 2.197225 0.000000 117 +take 0 97 2.302585 0.000000 134 +advanc 0 99 2.302585 0.000000 130 +search 0 95 2.397895 0.000000 155 +pictur 0 89 2.397895 0.000000 160 +second 0 81 2.484907 0.000000 166 +requir 0 81 2.484907 0.000000 167 +onlin 0 75 2.639057 0.000000 223 +java 1 70 2.708050 2.708050 248 +share 0 59 2.833213 0.000000 304 +plai 0 60 2.833213 0.000000 307 +simpl 0 60 2.833213 0.000000 298 +found 0 53 2.944439 0.000000 337 +digit 0 52 2.995732 0.000000 348 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +frequent 0 49 3.044522 0.000000 367 +quarter 0 47 3.091042 0.000000 389 +anoth 0 45 3.135494 0.000000 408 +game 0 36 3.367296 0.000000 498 +copyright 0 36 3.367296 0.000000 495 +platform 0 29 3.583519 0.000000 591 +linux 1 27 3.637586 3.637586 631 +great 0 27 3.637586 0.000000 626 +known 0 24 3.761200 0.000000 702 +honor 0 23 3.806662 0.000000 729 +sort 0 22 3.850148 0.000000 738 +applet 0 20 3.951244 0.000000 827 +histori 0 19 4.007333 0.000000 853 +seek 0 17 4.110874 0.000000 954 +attempt 0 17 4.110874 0.000000 917 +qual 0 15 4.248495 0.000000 1062 +pretti 0 13 4.382027 0.000000 1191 +employ 0 12 4.465908 0.000000 1291 +gain 0 8 4.875197 0.000000 1730 +dead 0 7 5.010635 0.000000 1840 +apart 0 7 5.010635 0.000000 1936 +appar 0 7 5.010635 0.000000 1958 +bookstor 0 7 5.010635 0.000000 1837 +myresum 0 6 5.164786 0.000000 2199 +cat 0 6 5.164786 0.000000 2194 +commit 0 6 5.164786 0.000000 2233 +curiou 0 5 5.347108 0.000000 2541 +superhighwai 0 4 5.568345 0.000000 2943 +scotland 0 4 5.568345 0.000000 3049 +fulfil 0 4 5.568345 0.000000 2932 +breadth 0 4 5.568345 0.000000 2695 +gambit 0 3 5.857933 0.000000 3227 +tessa 1 2 6.263398 6.263398 4507 +yeargradu 0 2 6.263398 0.000000 6015 +maze 0 2 6.263398 0.000000 4843 +knit 0 2 6.263398 0.000000 4906 +relatedgoodi 0 1 6.957497 0.000000 16446 +clio 0 1 6.957497 0.000000 16447 +andbrows 0 1 6.957497 0.000000 16448 +kittyi 0 1 6.957497 0.000000 16449 +siames 0 1 6.957497 0.000000 16450 +memor 0 1 6.957497 0.000000 16451 +therear 0 1 6.957497 0.000000 16452 +tofind 0 1 6.957497 0.000000 16453 +alsor 0 1 6.957497 0.000000 16454 +classesi 0 1 6.957497 0.000000 16455 +ofeight 0 1 6.957497 0.000000 16456 +seminarlinux 0 1 6.957497 0.000000 16457 +gameseverybodi 0 1 6.957497 0.000000 16458 +gametom 0 1 6.957497 0.000000 16459 +coolgam 0 1 6.957497 0.000000 16460 +sleepingi 0 1 6.957497 0.000000 16461 +crochet 0 1 6.957497 0.000000 16462 +tlau 0 1 6.957497 0.000000 16463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..4ca4ae7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +phone 0 175 1.791759 0.000000 45 +recent 0 167 1.791759 0.000000 58 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +seattl 0 120 2.079442 0.000000 103 +provid 0 121 2.079442 0.000000 94 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +build 0 85 2.484907 0.000000 184 +educ 0 86 2.484907 0.000000 191 +resourc 0 81 2.484907 0.000000 172 +finger 0 52 2.995732 0.000000 354 +electron 0 47 3.091042 0.000000 379 +articl 0 33 3.433987 0.000000 530 +collabor 0 32 3.465736 0.000000 543 +photo 0 31 3.496508 0.000000 561 +martin 1 21 3.912023 3.912023 794 +among 0 21 3.912023 0.000000 781 +across 0 16 4.174387 0.000000 974 +photograph 0 15 4.248495 0.000000 1056 +holidai 0 12 4.465908 0.000000 1224 +lane 0 8 4.875197 0.000000 1720 +courtesi 0 7 5.010635 0.000000 1953 +moon 0 4 5.568345 0.000000 2991 +pierc 0 4 5.568345 0.000000 2623 +tompa 1 3 5.857933 5.857933 3305 +health 0 3 5.857933 0.000000 3787 +trajectori 0 2 6.263398 0.000000 4260 +pearl 0 2 6.263398 0.000000 4485 +wash 0 2 6.263398 0.000000 5714 +receptionist 0 1 6.957497 0.000000 16464 +thelma 0 1 6.957497 0.000000 16465 +louis 0 1 6.957497 0.000000 16466 +oyster 0 1 6.957497 0.000000 16467 +surrealist 0 1 6.957497 0.000000 16468 +propheci 0 1 6.957497 0.000000 16469 +carol 0 1 6.957497 0.000000 16470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..f152b610 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +list 0 201 1.609438 0.000000 39 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +base 0 165 1.791759 0.000000 50 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +seattl 1 120 2.079442 2.079442 103 +confer 0 126 2.079442 0.000000 100 +version 0 113 2.197225 0.000000 122 +associ 0 93 2.397895 0.000000 151 +proceed 0 93 2.397895 0.000000 152 +institut 0 84 2.484907 0.000000 187 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +appear 1 78 2.564949 2.564949 210 +optim 0 79 2.564949 0.000000 197 +free 0 73 2.639057 0.000000 224 +symposium 0 72 2.639057 0.000000 238 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +foundat 0 62 2.772589 0.000000 286 +abstract 0 62 2.772589 0.000000 276 +detail 0 57 2.890372 0.000000 321 +without 0 50 3.044522 0.000000 370 +move 0 47 3.091042 0.000000 382 +done 0 47 3.091042 0.000000 381 +cach 0 41 3.218876 0.000000 432 +vita 0 38 3.295837 0.000000 473 +brian 0 38 3.295837 0.000000 466 +return 0 34 3.401197 0.000000 502 +random 0 34 3.401197 0.000000 511 +curriculum 0 33 3.433987 0.000000 535 +product 0 33 3.433987 0.000000 527 +extend 0 32 3.465736 0.000000 539 +held 1 28 3.610918 3.610918 600 +trace 0 25 3.737670 0.000000 677 +longer 0 20 3.951244 0.000000 816 +histori 0 19 4.007333 0.000000 853 +comparison 0 19 4.007333 0.000000 863 +andrew 0 19 4.007333 0.000000 849 +bershad 0 18 4.060443 0.000000 902 +matrix 0 17 4.110874 0.000000 933 +letter 0 16 4.174387 0.000000 981 +driven 0 15 4.248495 0.000000 1048 +near 0 14 4.317488 0.000000 1091 +squar 0 14 4.317488 0.000000 1082 +karlin 1 13 4.382027 4.382027 1176 +sigmetr 0 13 4.382027 0.000000 1173 +anna 1 12 4.465908 4.465908 1292 +usenix 0 12 4.465908 0.000000 1240 +verifi 0 12 4.465908 0.000000 1261 +statement 0 11 4.553877 0.000000 1313 +probabilist 0 11 4.553877 0.000000 1343 +forc 1 10 4.653960 4.653960 1384 +charg 0 9 4.753590 0.000000 1582 +patterson 0 9 4.753590 0.000000 1554 +kumar 0 9 4.753590 0.000000 1506 +sigop 0 8 4.875197 0.000000 1727 +bit 0 7 5.010635 0.000000 1833 +prefetch 1 6 5.164786 5.164786 2039 +edward 1 6 5.164786 5.164786 2050 +promis 0 6 5.164786 0.000000 2037 +onoper 0 6 5.164786 0.000000 2048 +escap 0 4 5.568345 0.000000 3016 +kimbrel 2 3 5.857933 11.715866 3924 +traci 1 3 5.857933 5.857933 3984 +prison 1 3 5.857933 5.857933 3907 +cachingtraci 1 3 5.857933 5.857933 3923 +felten 1 3 5.857933 5.857933 3925 +eduher 0 3 5.857933 0.000000 3499 +tomanufactur 0 2 6.263398 0.000000 6016 +airplan 0 2 6.263398 0.000000 4917 +tomkin 0 2 6.263398 0.000000 5814 +hugo 0 2 6.263398 0.000000 5815 +garth 0 2 6.263398 0.000000 5816 +gibson 0 2 6.263398 0.000000 5817 +implemen 0 2 6.263398 0.000000 5809 +rakesh 0 2 6.263398 0.000000 6017 +sinha 0 2 6.263398 0.000000 5754 +imprison 1 1 6.957497 6.957497 16471 +captor 1 1 6.957497 6.957497 16472 +washingtonsinc 0 1 6.957497 0.000000 16473 +trial 0 1 6.957497 0.000000 16474 +toanoth 0 1 6.957497 0.000000 16475 +inmat 0 1 6.957497 0.000000 16476 +wasrecaptur 0 1 6.957497 0.000000 16477 +hisplight 0 1 6.957497 0.000000 16478 +rescu 0 1 6.957497 0.000000 16479 +ofwhat 0 1 6.957497 0.000000 16480 +tracyk 0 1 6.957497 0.000000 16481 +ieeesymposium 0 1 6.957497 0.000000 16482 +measurementand 0 1 6.957497 0.000000 16483 +usingo 0 1 6.957497 0.000000 16484 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..5aea6406 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +modifi 0 178 1.609438 0.000000 35 +seattl 0 120 2.079442 0.000000 103 +take 0 97 2.302585 0.000000 134 +real 1 93 2.397895 2.397895 144 +help 0 83 2.484907 0.000000 175 +run 0 51 2.995732 0.000000 347 +quarter 0 47 3.091042 0.000000 389 +understand 0 47 3.091042 0.000000 384 +keep 0 44 3.135494 0.000000 409 +mechan 0 43 3.178054 0.000000 416 +cach 0 41 3.218876 0.000000 432 +press 0 42 3.218876 0.000000 419 +dissert 0 32 3.465736 0.000000 549 +half 0 21 3.912023 0.000000 776 +corpor 0 21 3.912023 0.000000 802 +latest 0 21 3.912023 0.000000 785 +predict 0 19 4.007333 0.000000 855 +statu 0 18 4.060443 0.000000 885 +side 0 15 4.248495 0.000000 1022 +spin 0 14 4.317488 0.000000 1121 +lock 0 9 4.753590 0.000000 1551 +craig 1 7 5.010635 5.010635 1879 +pool 0 6 5.164786 0.000000 2225 +consum 0 5 5.347108 0.000000 2334 +queu 0 4 5.568345 0.000000 2648 +travi 1 3 5.857933 5.857933 3985 +motor 0 3 5.857933 0.000000 3909 +submarin 0 2 6.263398 0.000000 6018 +restor 0 1 6.957497 0.000000 16485 +arctic 0 1 6.957497 0.000000 16486 +esca 0 1 6.957497 0.000000 16487 +volvo 0 1 6.957497 0.000000 16488 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..e1dbaa74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +download 0 36 3.367296 0.000000 489 +bibliographi 0 34 3.401197 0.000000 518 +hobbi 0 16 4.174387 0.000000 1009 +dean 1 14 4.317488 4.317488 1104 +tullsen 1 6 5.164786 5.164786 2081 +biograph 0 2 6.263398 0.000000 5625 +resumemi 0 2 6.263398 0.000000 4971 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..6fd8ec82 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,236 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 2 380 0.693147 1.386294 9 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 1 183 1.609438 1.609438 36 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +analysi 0 124 2.079442 0.000000 98 +machin 0 129 2.079442 0.000000 95 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +send 0 114 2.197225 0.000000 109 +look 0 107 2.197225 0.000000 115 +part 1 98 2.302585 2.302585 129 +user 0 104 2.302585 0.000000 137 +advanc 0 99 2.302585 0.000000 130 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +call 0 91 2.397895 0.000000 153 +imag 0 91 2.397895 0.000000 161 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +novemb 1 81 2.484907 2.484907 179 +librari 0 87 2.484907 0.000000 181 +thing 0 84 2.484907 0.000000 189 +activ 0 84 2.484907 0.000000 182 +larg 0 82 2.484907 0.000000 168 +come 1 78 2.564949 2.564949 202 +messag 0 76 2.564949 0.000000 212 +orient 0 80 2.564949 0.000000 205 +symposium 1 72 2.639057 2.639057 238 +workshop 0 71 2.639057 0.000000 239 +solv 0 73 2.639057 0.000000 234 +free 0 73 2.639057 0.000000 224 +would 0 67 2.708050 0.000000 251 +degre 0 69 2.708050 0.000000 259 +differ 0 66 2.708050 0.000000 253 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +improv 0 62 2.772589 0.000000 289 +visit 0 63 2.772589 0.000000 288 +locat 1 59 2.833213 2.833213 303 +summer 1 56 2.890372 2.890372 311 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +extens 0 53 2.944439 0.000000 340 +talk 0 53 2.944439 0.000000 336 +hardwar 1 51 2.995732 2.995732 350 +life 0 50 3.044522 0.000000 375 +still 0 50 3.044522 0.000000 362 +quarter 0 47 3.091042 0.000000 389 +could 0 46 3.091042 0.000000 383 +get 0 46 3.091042 0.000000 380 +made 0 44 3.135494 0.000000 398 +anoth 0 45 3.135494 0.000000 408 +fridai 0 44 3.135494 0.000000 390 +around 0 43 3.178054 0.000000 415 +compani 0 41 3.218876 0.000000 423 +programm 1 39 3.258097 3.258097 445 +continu 1 39 3.258097 3.258097 448 +littl 0 39 3.258097 0.000000 454 +realli 0 40 3.258097 0.000000 444 +small 0 39 3.258097 0.000000 447 +winter 0 36 3.367296 0.000000 500 +staff 0 36 3.367296 0.000000 490 +soon 0 36 3.367296 0.000000 494 +michael 1 35 3.401197 3.401197 514 +post 0 35 3.401197 0.000000 505 +within 0 33 3.433987 0.000000 525 +taught 0 33 3.433987 0.000000 526 +product 0 33 3.433987 0.000000 527 +kind 0 32 3.465736 0.000000 541 +chapter 0 32 3.465736 0.000000 536 +taken 0 31 3.496508 0.000000 555 +photo 0 31 3.496508 0.000000 561 +particip 1 29 3.583519 3.583519 589 +steve 0 29 3.583519 0.000000 594 +enjoi 1 26 3.688879 3.688879 660 +subject 0 26 3.688879 0.000000 647 +valu 0 25 3.737670 0.000000 665 +mike 2 24 3.761200 7.522400 703 +other 0 24 3.761200 0.000000 697 +doctor 0 24 3.761200 0.000000 709 +thank 1 23 3.806662 3.806662 721 +begin 0 23 3.806662 0.000000 716 +finish 0 22 3.850148 0.000000 748 +director 0 22 3.850148 0.000000 767 +born 0 21 3.912023 0.000000 798 +wrote 1 20 3.951244 3.951244 830 +wonder 0 20 3.951244 0.000000 815 +citi 1 19 4.007333 4.007333 874 +eric 0 19 4.007333 0.000000 870 +lot 0 18 4.060443 0.000000 889 +demo 0 18 4.060443 0.000000 888 +debug 0 17 4.110874 0.000000 944 +took 0 16 4.174387 0.000000 1010 +contribut 0 15 4.248495 0.000000 1021 +countri 0 15 4.248495 0.000000 1059 +hopefulli 0 14 4.317488 0.000000 1071 +wife 0 13 4.382027 0.000000 1196 +front 0 13 4.382027 0.000000 1154 +earlier 0 13 4.382027 0.000000 1140 +forth 0 13 4.382027 0.000000 1186 +stai 0 12 4.465908 0.000000 1215 +franc 0 12 4.465908 0.000000 1276 +skill 0 12 4.465908 0.000000 1205 +bill 1 11 4.553877 4.553877 1297 +fix 0 11 4.553877 0.000000 1327 +america 0 11 4.553877 0.000000 1370 +motiv 0 11 4.553877 0.000000 1346 +chri 0 11 4.553877 0.000000 1311 +lake 0 11 4.553877 0.000000 1373 +prior 0 10 4.653960 0.000000 1438 +acquisit 0 10 4.653960 0.000000 1465 +ski 0 10 4.653960 0.000000 1471 +correctli 0 9 4.753590 0.000000 1478 +doug 0 9 4.753590 0.000000 1517 +mention 0 9 4.753590 0.000000 1569 +french 0 9 4.753590 0.000000 1511 +folk 0 9 4.753590 0.000000 1597 +screen 0 9 4.753590 0.000000 1577 +swim 0 9 4.753590 0.000000 1599 +cross 0 8 4.875197 0.000000 1703 +harvard 1 7 5.010635 5.010635 1926 +brought 0 7 5.010635 0.000000 1925 +poster 0 7 5.010635 0.000000 1814 +earn 0 7 5.010635 0.000000 1788 +iowa 0 7 5.010635 0.000000 1971 +oopsla 1 6 5.164786 5.164786 2221 +pari 1 6 5.164786 5.164786 2158 +nativ 0 6 5.164786 0.000000 2192 +south 0 6 5.164786 0.000000 2167 +hike 0 6 5.164786 0.000000 2234 +truli 0 5 5.347108 0.000000 2476 +sail 0 5 5.347108 0.000000 2571 +observatori 0 4 5.568345 0.000000 3070 +countless 0 4 5.568345 0.000000 3020 +theintern 0 4 5.568345 0.000000 2981 +theacm 0 4 5.568345 0.000000 2698 +sigsoft 0 4 5.568345 0.000000 3036 +ti 0 4 5.568345 0.000000 3005 +marco 0 4 5.568345 0.000000 2589 +luck 0 3 5.857933 0.000000 3201 +immedi 0 3 5.857933 0.000000 3117 +motif 0 3 5.857933 0.000000 3752 +astrophys 0 3 5.857933 0.000000 3936 +schwarz 0 3 5.857933 0.000000 3986 +talent 0 3 5.857933 0.000000 3768 +traci 0 3 5.857933 0.000000 3984 +harold 0 3 5.857933 0.000000 3803 +scanner 0 3 5.857933 0.000000 3437 +eduperson 0 2 6.263398 0.000000 5776 +contractor 0 2 6.263398 0.000000 4915 +widget 0 2 6.263398 0.000000 5347 +convinc 0 2 6.263398 0.000000 6019 +calibr 0 2 6.263398 0.000000 4502 +francais 0 2 6.263398 0.000000 6020 +uist 0 2 6.263398 0.000000 5901 +grinnel 0 2 6.263398 0.000000 5763 +alexand 0 2 6.263398 0.000000 5329 +smithsonian 1 1 6.957497 6.957497 16489 +uwin 1 1 6.957497 6.957497 16490 +vanhilst 1 1 6.957497 6.957497 16491 +angela 1 1 6.957497 6.957497 16492 +vanhilstmichael 0 1 6.957497 0.000000 16493 +vanhilstvanhilst 0 1 6.957497 0.000000 16494 +edumvh 0 1 6.957497 0.000000 16495 +usaclick 0 1 6.957497 0.000000 16496 +personalmik 0 1 6.957497 0.000000 16497 +theend 0 1 6.957497 0.000000 16498 +udub 0 1 6.957497 0.000000 16499 +atibm 0 1 6.957497 0.000000 16500 +unterfac 0 1 6.957497 0.000000 16501 +sdata 0 1 6.957497 0.000000 16502 +maintainingcomput 0 1 6.957497 0.000000 16503 +saoimagewhich 0 1 6.957497 0.000000 16504 +astronom 0 1 6.957497 0.000000 16505 +saoimag 0 1 6.957497 0.000000 16506 +gnudistribut 0 1 6.957497 0.000000 16507 +wyatt 0 1 6.957497 0.000000 16508 +mandel 0 1 6.957497 0.000000 16509 +minkfor 0 1 6.957497 0.000000 16510 +seismologistsin 0 1 6.957497 0.000000 16511 +theallianc 0 1 6.957497 0.000000 16512 +colombiain 0 1 6.957497 0.000000 16513 +studentsbrows 0 1 6.957497 0.000000 16514 +pine 0 1 6.957497 0.000000 16515 +shirei 0 1 6.957497 0.000000 16516 +stenvik 0 1 6.957497 0.000000 16517 +frommicrosoft 0 1 6.957497 0.000000 16518 +sacrif 0 1 6.957497 0.000000 16519 +isota 0 1 6.957497 0.000000 16520 +inarchitectur 0 1 6.957497 0.000000 16521 +wooden 0 1 6.957497 0.000000 16522 +planningfrom 0 1 6.957497 0.000000 16523 +mitand 0 1 6.957497 0.000000 16524 +visualdesign 0 1 6.957497 0.000000 16525 +andkayak 0 1 6.957497 0.000000 16526 +bronson 0 1 6.957497 0.000000 16527 +sebastien 0 1 6.957497 0.000000 16528 +hilst 0 1 6.957497 0.000000 16529 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..f19e34a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +paper 0 205 1.609438 0.000000 38 +data 1 170 1.791759 1.791759 49 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +object 0 138 1.945910 0.000000 79 +relat 0 139 1.945910 0.000000 68 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +databas 0 122 2.079442 0.000000 86 +person 0 111 2.197225 0.000000 117 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +check 0 115 2.197225 0.000000 118 +access 0 102 2.302585 0.000000 136 +peopl 0 96 2.302585 0.000000 132 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +come 0 78 2.564949 0.000000 202 +orient 0 80 2.564949 0.000000 205 +appear 0 78 2.564949 0.000000 210 +write 0 72 2.639057 0.000000 222 +html 0 75 2.639057 0.000000 235 +collect 1 65 2.772589 2.772589 268 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +cool 0 49 3.044522 0.000000 374 +visitor 0 49 3.044522 0.000000 371 +even 0 45 3.135494 0.000000 393 +http 0 41 3.218876 0.000000 420 +live 0 40 3.258097 0.000000 451 +realli 0 40 3.258097 0.000000 444 +join 0 39 3.258097 0.000000 457 +slide 0 38 3.295837 0.000000 467 +staff 1 36 3.367296 3.367296 490 +download 0 36 3.367296 0.000000 489 +short 0 36 3.367296 0.000000 499 +graph 1 30 3.555348 3.555348 576 +quit 0 27 3.637586 0.000000 633 +arrai 0 27 3.637586 0.000000 627 +repres 0 26 3.688879 0.000000 656 +handl 0 24 3.761200 0.000000 685 +thu 0 21 3.912023 0.000000 773 +expand 1 17 4.110874 4.110874 928 +young 0 16 4.174387 0.000000 991 +mayb 0 15 4.248495 0.000000 1014 +qual 0 15 4.248495 0.000000 1062 +infrastructur 0 12 4.465908 0.000000 1234 +hello 1 10 4.653960 4.653960 1407 +mosaic 0 10 4.653960 0.000000 1426 +cecil 0 9 4.753590 0.000000 1547 +pure 0 8 4.875197 0.000000 1776 +irregular 0 8 4.875197 0.000000 1768 +mirror 0 6 5.164786 0.000000 2028 +shortest 0 5 5.347108 0.000000 2424 +writeup 0 5 5.347108 0.000000 2352 +vass 1 2 6.263398 6.263398 4449 +pageuw 0 2 6.263398 0.000000 6021 +pagerec 0 2 6.263398 0.000000 6022 +cecilproject 0 2 6.263398 0.000000 4457 +cooler 0 2 6.263398 0.000000 6023 +anddynam 0 2 6.263398 0.000000 5889 +myqual 0 2 6.263398 0.000000 6005 +closer 0 2 6.263398 0.000000 6024 +vassilylong 0 1 6.957497 0.000000 16530 +linki 0 1 6.957497 0.000000 16531 +fewfil 0 1 6.957497 0.000000 16532 +thisstuff 0 1 6.957497 0.000000 16533 +quotesrussian 0 1 6.957497 0.000000 16534 +pagesvari 0 1 6.957497 0.000000 16535 +linksguid 0 1 6.957497 0.000000 16536 +formsoth 0 1 6.957497 0.000000 16537 +pagencsa 0 1 6.957497 0.000000 16538 +andvortex 0 1 6.957497 0.000000 16539 +befast 0 1 6.957497 0.000000 16540 +themvi 0 1 6.957497 0.000000 16541 +ourdepartment 0 1 6.957497 0.000000 16542 +beenupgrad 0 1 6.957497 0.000000 16543 +thezpl 0 1 6.957497 0.000000 16544 +languageto 0 1 6.957497 0.000000 16545 +repartit 0 1 6.957497 0.000000 16546 +theslidesfrom 0 1 6.957497 0.000000 16547 +toresourc 0 1 6.957497 0.000000 16548 +eduobject 0 1 6.957497 0.000000 16549 +pastor 0 1 6.957497 0.000000 16550 +vybrasyvalsya 0 1 6.957497 0.000000 16551 +okna 0 1 6.957497 0.000000 16552 +pyatyi 0 1 6.957497 0.000000 16553 +deystvov 0 1 6.957497 0.000000 16554 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..c097fbe6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +washington 2 236 1.386294 2.772588 32 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +seattl 1 120 2.079442 2.079442 103 +present 0 91 2.397895 0.000000 145 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +servic 0 72 2.639057 0.000000 236 +receiv 0 66 2.708050 0.000000 244 +profession 0 51 2.995732 0.000000 345 +join 0 39 3.258097 0.000000 457 +electr 0 38 3.295837 0.000000 461 +scientist 0 31 3.496508 0.000000 560 +recommend 0 22 3.850148 0.000000 737 +theunivers 1 21 3.912023 3.912023 797 +divis 0 21 3.912023 0.000000 803 +histori 0 19 4.007333 0.000000 853 +letter 0 16 4.174387 0.000000 981 +achiev 0 14 4.317488 0.000000 1088 +evan 0 8 4.875197 0.000000 1633 +patent 0 5 5.347108 0.000000 2574 +invent 0 4 5.568345 0.000000 3028 +arizona 0 3 5.857933 0.000000 3700 +electricalengin 0 3 5.857933 0.000000 3987 +expertis 0 3 5.857933 0.000000 3321 +virgil 1 2 6.263398 6.263398 5783 +bourassa 1 2 6.263398 6.263398 5782 +uwvirgil 0 1 6.957497 0.000000 16555 +bourassavirgil 0 1 6.957497 0.000000 16556 +interestsinclud 0 1 6.957497 0.000000 16557 +boeingin 0 1 6.957497 0.000000 16558 +scienceorgan 0 1 6.957497 0.000000 16559 +bellevu 0 1 6.957497 0.000000 16560 +arizonast 0 1 6.957497 0.000000 16561 +temp 0 1 6.957497 0.000000 16562 +accesswhat 0 1 6.957497 0.000000 16563 +statusoccasion 0 1 6.957497 0.000000 16564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..8181de0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +email 0 220 1.386294 0.000000 29 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +avail 2 169 1.791759 3.583518 48 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +read 0 154 1.791759 0.000000 47 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +area 0 144 1.945910 0.000000 80 +high 1 130 2.079442 2.079442 101 +seattl 1 120 2.079442 2.079442 103 +provid 0 121 2.079442 0.000000 94 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +look 1 107 2.197225 2.197225 115 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +pleas 0 113 2.197225 0.000000 114 +place 0 106 2.197225 0.000000 124 +intern 0 108 2.197225 0.000000 128 +topic 0 114 2.197225 0.000000 110 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +follow 0 92 2.397895 0.000000 143 +center 0 88 2.397895 0.000000 158 +learn 1 86 2.484907 2.484907 170 +academ 0 82 2.484907 0.000000 178 +activ 0 84 2.484907 0.000000 182 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +issu 1 78 2.564949 2.564949 211 +server 0 76 2.564949 0.000000 204 +state 0 76 2.564949 0.000000 207 +involv 1 71 2.639057 2.639057 227 +nation 1 74 2.639057 2.639057 240 +servic 0 72 2.639057 0.000000 236 +simul 1 66 2.708050 2.708050 255 +receiv 0 66 2.708050 0.000000 244 +written 0 63 2.772589 0.000000 278 +organ 0 65 2.772589 0.000000 265 +collect 0 65 2.772589 0.000000 268 +visit 0 63 2.772589 0.000000 288 +plai 1 60 2.833213 2.833213 307 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +undergradu 0 54 2.944439 0.000000 338 +particular 0 51 2.995732 0.000000 352 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +right 0 48 3.044522 0.000000 363 +physic 0 47 3.091042 0.000000 377 +directori 0 45 3.135494 0.000000 396 +math 0 44 3.135494 0.000000 402 +mechan 0 43 3.178054 0.000000 416 +continu 0 39 3.258097 0.000000 448 +game 1 36 3.367296 3.367296 498 +tree 0 36 3.367296 0.000000 492 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +toler 0 33 3.433987 0.000000 533 +fault 1 32 3.465736 3.465736 547 +chapter 1 32 3.465736 3.465736 536 +india 0 32 3.465736 0.000000 550 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +secur 0 30 3.555348 0.000000 577 +particip 0 29 3.583519 0.000000 589 +held 0 28 3.610918 0.000000 600 +effort 0 26 3.688879 0.000000 652 +enhanc 0 26 3.688879 0.000000 644 +session 0 26 3.688879 0.000000 643 +highli 0 23 3.806662 0.000000 725 +methodolog 0 23 3.806662 0.000000 733 +head 0 23 3.806662 0.000000 732 +period 0 22 3.850148 0.000000 743 +unit 1 21 3.912023 3.912023 779 +leav 0 21 3.912023 0.000000 772 +born 0 21 3.912023 0.000000 798 +half 0 21 3.912023 0.000000 776 +safeti 1 20 3.951244 3.951244 817 +scheme 0 20 3.951244 0.000000 818 +tenni 0 20 3.951244 0.000000 838 +exploit 0 20 3.951244 0.000000 836 +failur 0 18 4.060443 0.000000 898 +attend 0 18 4.060443 0.000000 893 +english 0 15 4.248495 0.000000 1033 +came 0 13 4.382027 0.000000 1197 +danc 1 12 4.465908 4.465908 1278 +replic 0 12 4.465908 0.000000 1231 +nanci 0 12 4.465908 0.000000 1256 +eight 0 11 4.553877 0.000000 1331 +council 0 11 4.553877 0.000000 1364 +literatur 0 11 4.553877 0.000000 1300 +leveson 0 9 4.753590 0.000000 1540 +poetri 0 9 4.753590 0.000000 1596 +simpli 0 8 4.875197 0.000000 1626 +presenc 0 8 4.875197 0.000000 1671 +coast 0 8 4.875197 0.000000 1746 +cricket 1 7 5.010635 5.010635 1945 +brought 0 7 5.010635 0.000000 1925 +whenev 0 7 5.010635 0.000000 1883 +occasion 0 7 5.010635 0.000000 1905 +saturdai 0 7 5.010635 0.000000 1794 +throughout 0 7 5.010635 0.000000 1871 +vivek 1 6 5.164786 5.164786 2210 +squash 0 6 5.164786 0.000000 2223 +band 0 6 5.164786 0.000000 2198 +corba 0 5 5.347108 0.000000 2320 +focuss 0 5 5.347108 0.000000 2271 +cell 0 5 5.347108 0.000000 2274 +nuclear 0 5 5.347108 0.000000 2576 +toolset 0 4 5.568345 0.000000 3014 +murphi 0 4 5.568345 0.000000 2737 +racquetbal 0 4 5.568345 0.000000 3052 +suffic 0 4 5.568345 0.000000 2869 +ultra 0 4 5.568345 0.000000 2889 +swing 0 4 5.568345 0.000000 2887 +restructur 0 4 5.568345 0.000000 2775 +tend 0 4 5.568345 0.000000 3041 +rsml 1 3 5.857933 5.857933 3967 +wesleyan 0 3 5.857933 0.000000 3988 +marin 0 3 5.857933 0.000000 3947 +ballroom 0 3 5.857933 0.000000 3983 +dabbl 0 3 5.857933 0.000000 3971 +bank 0 3 5.857933 0.000000 3920 +mirza 0 3 5.857933 0.000000 3989 +bellcor 1 2 6.263398 6.263398 5174 +ratan 1 2 6.263398 6.263398 5948 +adher 0 2 6.263398 0.000000 6025 +tango 0 2 6.263398 0.000000 6010 +reform 0 2 6.263398 0.000000 5828 +growth 0 2 6.263398 0.000000 4084 +angelo 1 1 6.957497 6.957497 16565 +scientistat 0 1 6.957497 0.000000 16566 +morristown 0 1 6.957497 0.000000 16567 +researchwork 0 1 6.957497 0.000000 16568 +distributedsoftwar 0 1 6.957497 0.000000 16569 +anatida 0 1 6.957497 0.000000 16570 +indc 0 1 6.957497 0.000000 16571 +foundher 0 1 6.957497 0.000000 16572 +integrationof 0 1 6.957497 0.000000 16573 +bydr 0 1 6.957497 0.000000 16574 +fromrequir 0 1 6.957497 0.000000 16575 +middletown 0 1 6.957497 0.000000 16576 +purus 0 1 6.957497 0.000000 16577 +lesserext 0 1 6.957497 0.000000 16578 +ardent 0 1 6.957497 0.000000 16579 +folow 0 1 6.957497 0.000000 16580 +superson 0 1 6.957497 0.000000 16581 +cowboi 0 1 6.957497 0.000000 16582 +keen 0 1 6.957497 0.000000 16583 +waltz 0 1 6.957497 0.000000 16584 +foxtrot 0 1 6.957497 0.000000 16585 +chacha 0 1 6.957497 0.000000 16586 +rhumba 0 1 6.957497 0.000000 16587 +mambo 0 1 6.957497 0.000000 16588 +ecosoc 0 1 6.957497 0.000000 16589 +rapidpopul 0 1 6.957497 0.000000 16590 +prolifer 0 1 6.957497 0.000000 16591 +ghalib 0 1 6.957497 0.000000 16592 +centuryindian 0 1 6.957497 0.000000 16593 +poet 0 1 6.957497 0.000000 16594 +romant 0 1 6.957497 0.000000 16595 +victorian 0 1 6.957497 0.000000 16596 +obligatori 0 1 6.957497 0.000000 16597 +sitesthat 0 1 6.957497 0.000000 16598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..d00cb684 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +call 0 91 2.397895 0.000000 153 +master 0 76 2.564949 0.000000 216 +window 1 68 2.708050 2.708050 242 +thesi 1 57 2.890372 2.890372 327 +mobil 0 23 3.806662 0.000000 730 +avoid 0 21 3.912023 0.000000 799 +emac 0 13 4.382027 0.000000 1143 +voelker 1 9 4.753590 4.753590 1557 +guggenheim 0 8 4.875197 0.000000 1759 +geoff 1 6 5.164786 5.164786 2124 +annex 0 5 5.347108 0.000000 2572 +wireless 0 4 5.568345 0.000000 2693 +washingtonseattl 0 4 5.568345 0.000000 3044 +mobisa 0 3 5.857933 0.000000 3927 +inseattl 0 2 6.263398 0.000000 6026 +whati 0 2 6.263398 0.000000 6027 +andbuild 0 2 6.263398 0.000000 6028 +settl 0 2 6.263398 0.000000 5778 +skywhoi 0 1 6.957497 0.000000 16599 +wherechateau 0 1 6.957497 0.000000 16600 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..08340daf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +washington 1 236 1.386294 1.386294 32 +also 0 259 1.386294 0.000000 28 +list 1 201 1.609438 1.609438 39 +paper 0 205 1.609438 0.000000 38 +perform 0 143 1.945910 0.000000 74 +seattl 0 120 2.079442 0.000000 103 +look 1 107 2.197225 2.197225 115 +version 0 113 2.197225 0.000000 122 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +memori 0 101 2.302585 0.000000 139 +peopl 0 96 2.302585 0.000000 132 +thing 1 84 2.484907 2.484907 189 +stuff 0 87 2.484907 0.000000 171 +test 0 66 2.708050 0.000000 252 +organ 0 65 2.772589 0.000000 265 +particular 0 51 2.995732 0.000000 352 +investig 0 51 2.995732 0.000000 353 +cool 0 49 3.044522 0.000000 374 +keep 0 44 3.135494 0.000000 409 +howev 0 41 3.218876 0.000000 422 +littl 0 39 3.258097 0.000000 454 +actual 0 28 3.610918 0.000000 604 +interpret 0 24 3.761200 0.000000 686 +other 0 24 3.761200 0.000000 697 +earli 0 16 4.174387 0.000000 968 +baer 0 11 4.553877 0.000000 1353 +denni 0 11 4.553877 0.000000 1321 +alpha 0 11 4.553877 0.000000 1348 +jean 0 10 4.653960 0.000000 1440 +jump 0 9 4.753590 0.000000 1603 +wong 0 9 4.753590 0.000000 1609 +wayn 0 8 4.875197 0.000000 1738 +loup 0 6 5.164786 0.000000 2228 +geoff 0 6 5.164786 0.000000 2124 +fish 0 6 5.164786 0.000000 2207 +alec 0 5 5.347108 0.000000 2563 +rocki 0 4 5.568345 0.000000 3048 +waynew 1 3 5.857933 5.857933 3982 +differentmemori 0 1 6.957497 0.000000 16601 +beingdon 0 1 6.957497 0.000000 16602 +rightnow 0 1 6.957497 0.000000 16603 +peoplewho 0 1 6.957497 0.000000 16604 +testwayn 0 1 6.957497 0.000000 16605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..d6d45d83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +time 1 293 1.098612 1.098612 17 +washington 0 236 1.386294 0.000000 32 +william 0 22 3.850148 0.000000 765 +spend 0 19 4.007333 0.000000 850 +hang 0 9 4.753590 0.000000 1499 +pagei 0 8 4.875197 0.000000 1683 +chan 1 7 5.010635 5.010635 1876 +spare 0 6 5.164786 0.000000 2177 +hell 0 4 5.568345 0.000000 2885 +heaven 0 3 5.857933 0.000000 3589 +wchan 0 3 5.857933 0.000000 3338 +pagewilliam 0 1 6.957497 0.000000 16606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..4d39d7fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +group 0 183 1.609438 0.000000 36 +base 1 165 1.791759 1.791759 50 +data 0 170 1.791759 0.000000 49 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +professor 0 137 1.945910 0.000000 76 +hall 0 146 1.945910 0.000000 65 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +report 0 131 2.079442 0.000000 92 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +world 0 115 2.197225 0.000000 126 +book 0 99 2.302585 0.000000 131 +technic 0 100 2.302585 0.000000 140 +associ 0 93 2.397895 0.000000 151 +mani 0 92 2.397895 0.000000 150 +select 0 91 2.397895 0.000000 154 +journal 0 83 2.484907 0.000000 183 +control 0 82 2.484907 0.000000 164 +wide 0 84 2.484907 0.000000 185 +internet 0 83 2.484907 0.000000 186 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +intellig 1 72 2.639057 2.639057 225 +nation 0 74 2.639057 0.000000 240 +receiv 1 66 2.708050 2.708050 244 +degre 0 69 2.708050 0.000000 259 +sieg 0 69 2.708050 0.000000 260 +august 0 66 2.708050 0.000000 257 +plan 1 65 2.772589 2.772589 272 +artifici 1 63 2.772589 2.772589 280 +dept 0 64 2.772589 0.000000 291 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +visit 0 63 2.772589 0.000000 288 +juli 0 60 2.833213 0.000000 305 +plai 0 60 2.833213 0.000000 307 +publish 0 57 2.890372 0.000000 326 +found 0 53 2.944439 0.000000 337 +investig 0 51 2.995732 0.000000 353 +electron 0 47 3.091042 0.000000 379 +favorit 0 44 3.135494 0.000000 410 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +past 0 42 3.218876 0.000000 428 +winter 0 36 3.367296 0.000000 500 +award 1 34 3.401197 3.401197 523 +least 0 35 3.401197 0.000000 516 +board 0 33 3.433987 0.000000 528 +travel 0 30 3.555348 0.000000 579 +chair 0 29 3.583519 0.000000 596 +repres 0 26 3.688879 0.000000 656 +enjoi 0 26 3.688879 0.000000 660 +reach 0 24 3.761200 0.000000 688 +magazin 0 24 3.761200 0.000000 704 +ofwashington 0 22 3.850148 0.000000 766 +almost 0 22 3.850148 0.000000 742 +comparison 0 19 4.007333 0.000000 863 +agent 1 18 4.060443 4.060443 910 +bachelor 0 17 4.110874 0.000000 957 +adam 0 17 4.110874 0.000000 934 +young 0 16 4.174387 0.000000 991 +action 0 15 4.248495 0.000000 1038 +photograph 0 15 4.248495 0.000000 1056 +role 0 14 4.317488 0.000000 1101 +stori 0 14 4.317488 0.000000 1087 +galleri 0 13 4.382027 0.000000 1192 +daniel 1 12 4.465908 4.465908 1233 +land 0 12 4.465908 0.000000 1273 +guest 0 12 4.465908 0.000000 1220 +infrastructur 0 12 4.465908 0.000000 1234 +sens 0 11 4.553877 0.000000 1305 +shop 0 10 4.653960 0.000000 1469 +invit 0 10 4.653960 0.000000 1428 +weld 2 9 4.753590 9.507180 1538 +hundr 0 9 4.753590 0.000000 1528 +aaai 1 8 4.875197 4.875197 1750 +presidenti 0 8 4.875197 0.000000 1737 +gather 0 8 4.875197 0.000000 1719 +pacif 0 8 4.875197 0.000000 1674 +illustr 0 8 4.875197 0.000000 1679 +planner 0 7 5.010635 0.000000 1797 +ground 0 7 5.010635 0.000000 1955 +softbot 0 7 5.010635 0.000000 1974 +northwest 0 7 5.010635 0.000000 1973 +yale 0 6 5.164786 0.000000 2003 +commit 0 6 5.164786 0.000000 2233 +engineeringat 0 5 5.347108 0.000000 2561 +middl 0 5 5.347108 0.000000 2372 +cacm 0 5 5.347108 0.000000 2388 +allegro 0 5 5.347108 0.000000 2314 +naval 0 4 5.568345 0.000000 2920 +climb 0 4 5.568345 0.000000 2936 +biochemistri 0 3 5.857933 0.000000 3513 +ucpop 0 3 5.857933 0.000000 3878 +revisit 0 3 5.857933 0.000000 3915 +recreat 0 3 5.857933 0.000000 3990 +theworld 0 3 5.857933 0.000000 3158 +twin 0 3 5.857933 0.000000 3657 +younginvestig 0 2 6.263398 0.000000 5794 +ascal 0 2 6.263398 0.000000 5893 +anintroduct 0 2 6.263398 0.000000 4156 +absent 0 2 6.263398 0.000000 4825 +cafe 0 2 6.263398 0.000000 5826 +boi 0 2 6.263398 0.000000 5918 +wilder 0 2 6.263398 0.000000 5516 +theadvisori 0 1 6.957497 0.000000 16607 +airesearch 0 1 6.957497 0.000000 16608 +ofintellig 0 1 6.957497 0.000000 16609 +isco 0 1 6.957497 0.000000 16610 +scad 0 1 6.957497 0.000000 16611 +seattlewa 0 1 6.957497 0.000000 16612 +sitesworldwid 0 1 6.957497 0.000000 16613 +arehi 0 1 6.957497 0.000000 16614 +aip 0 1 6.957497 0.000000 16615 +exhaustivelist 0 1 6.957497 0.000000 16616 +stormymountain 0 1 6.957497 0.000000 16617 +galen 0 1 6.957497 0.000000 16618 +desert 0 1 6.957497 0.000000 16619 +morocco 0 1 6.957497 0.000000 16620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..66a6c113 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +master 0 76 2.564949 0.000000 216 +univ 0 28 3.610918 0.000000 617 +utah 0 9 4.753590 0.000000 1585 +wendi 1 2 6.263398 6.263398 5864 +belluomini 1 2 6.263398 6.263398 5865 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..2729d897 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +washington 1 236 1.386294 1.386294 32 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +architectur 0 139 1.945910 0.000000 77 +compil 1 122 2.079442 2.079442 96 +seattl 0 120 2.079442 0.000000 103 +select 0 91 2.397895 0.000000 154 +member 0 84 2.484907 0.000000 165 +chang 0 82 2.484907 0.000000 163 +receiv 0 66 2.708050 0.000000 244 +sieg 0 69 2.708050 0.000000 260 +interact 0 62 2.772589 0.000000 270 +advisor 0 51 2.995732 0.000000 355 +move 0 47 3.091042 0.000000 382 +electr 0 38 3.295837 0.000000 461 +theunivers 0 21 3.912023 0.000000 797 +among 0 21 3.912023 0.000000 781 +programminglanguag 0 21 3.912023 0.000000 782 +voic 0 21 3.912023 0.000000 806 +runtim 0 19 4.007333 0.000000 858 +thedepart 1 11 4.553877 4.553877 1350 +wilson 0 9 4.753590 0.000000 1536 +postdoc 0 8 4.875197 0.000000 1724 +myresearch 0 4 5.568345 0.000000 2842 +weihl 0 3 5.857933 0.000000 3284 +inseattl 0 2 6.263398 0.000000 6026 +thespin 0 2 6.263398 0.000000 6029 +sciencein 0 2 6.263398 0.000000 5804 +thelaboratori 0 2 6.263398 0.000000 4424 +linksperson 0 2 6.263398 0.000000 5143 +hsieh 0 2 6.263398 0.000000 5818 +hsiehwilson 0 1 6.957497 0.000000 16621 +hsiehi 0 1 6.957497 0.000000 16622 +theschool 0 1 6.957497 0.000000 16623 +engineeringatmit 0 1 6.957497 0.000000 16624 +werefran 0 1 6.957497 0.000000 16625 +kaashoekandbil 0 1 6.957497 0.000000 16626 +publicationsselect 0 1 6.957497 0.000000 16627 +interestswilson 0 1 6.957497 0.000000 16628 +numberha 0 1 6.957497 0.000000 16629 +whsieh 0 1 6.957497 0.000000 16630 +keyoctob 0 1 6.957497 0.000000 16631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..a1fc0f6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +network 1 168 1.791759 1.791759 61 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +architectur 0 139 1.945910 0.000000 77 +seattl 1 120 2.079442 2.079442 103 +analysi 0 124 2.079442 0.000000 98 +structur 0 106 2.197225 0.000000 105 +school 0 84 2.484907 0.000000 188 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +line 0 75 2.639057 0.000000 231 +plai 0 60 2.833213 0.000000 307 +summer 0 56 2.890372 0.000000 311 +realli 0 40 3.258097 0.000000 444 +winter 0 36 3.367296 0.000000 500 +idea 0 32 3.465736 0.000000 545 +interpret 1 24 3.761200 3.761200 686 +departmentunivers 0 24 3.761200 0.000000 711 +scalabl 0 24 3.761200 0.000000 705 +binari 0 20 3.951244 0.000000 823 +bershad 0 18 4.060443 0.000000 902 +asplo 0 17 4.110874 0.000000 948 +cambridg 0 16 4.174387 0.000000 1008 +latenc 0 16 4.174387 0.000000 993 +levi 0 14 4.317488 0.000000 1093 +washingtonbox 0 13 4.382027 0.000000 1200 +usenix 1 12 4.465908 4.465908 1240 +baer 0 11 4.553877 0.000000 1353 +thecomput 0 10 4.653960 0.000000 1408 +equip 0 10 4.653960 0.000000 1459 +voelker 1 9 4.753590 4.753590 1557 +wong 0 9 4.753590 0.000000 1609 +romer 0 8 4.875197 0.000000 1706 +guitar 0 8 4.875197 0.000000 1758 +instrument 0 7 5.010635 0.000000 1954 +wolman 2 6 5.164786 10.329572 2093 +corp 0 6 5.164786 0.000000 2139 +strang 0 6 5.164786 0.000000 2064 +alec 1 5 5.347108 5.347108 2563 +departmentat 0 5 5.347108 0.000000 2513 +treat 0 5 5.347108 0.000000 2521 +gradual 0 4 5.568345 0.000000 2997 +etch 0 4 5.568345 0.000000 2755 +thekkath 0 3 5.857933 0.000000 3973 +habit 0 3 5.857933 0.000000 3777 +thechateau 0 2 6.263398 0.000000 5853 +fordigit 0 2 6.263398 0.000000 5752 +firewal 0 2 6.263398 0.000000 5407 +relai 0 2 6.263398 0.000000 5404 +hungri 0 2 6.263398 0.000000 5511 +otter 0 2 6.263398 0.000000 4166 +nervou 0 2 6.263398 0.000000 5953 +pressur 0 2 6.263398 0.000000 5960 +wolmanwolman 0 1 6.957497 0.000000 16632 +eduworkcomput 0 1 6.957497 0.000000 16633 +isroom 0 1 6.957497 0.000000 16634 +executablesrocki 0 1 6.957497 0.000000 16635 +performanceon 0 1 6.957497 0.000000 16636 +trees 0 1 6.957497 0.000000 16637 +fixha 0 1 6.957497 0.000000 16638 +hallwolman 0 1 6.957497 0.000000 16639 +diseasewolman 0 1 6.957497 0.000000 16640 +lumber 0 1 6.957497 0.000000 16641 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..d6aa52ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +engin 1 297 1.098612 1.098612 20 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +phone 0 175 1.791759 0.000000 45 +base 0 165 1.791759 0.000000 50 +implement 0 152 1.791759 0.000000 52 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +confer 1 126 2.079442 2.079442 100 +seattl 0 120 2.079442 0.000000 103 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +intern 0 108 2.197225 0.000000 128 +proceed 1 93 2.397895 2.397895 152 +school 0 84 2.484907 0.000000 188 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +stuff 0 87 2.484907 0.000000 171 +method 0 80 2.564949 0.000000 213 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +symposium 0 72 2.639057 0.000000 238 +simul 1 66 2.708050 2.708050 255 +sieg 0 69 2.708050 0.000000 260 +goal 0 66 2.708050 0.000000 250 +evalu 1 64 2.772589 2.772589 266 +dept 0 64 2.772589 0.000000 291 +visual 0 48 3.044522 0.000000 372 +possibl 0 47 3.091042 0.000000 378 +term 0 43 3.178054 0.000000 411 +cach 0 41 3.218876 0.000000 432 +submit 0 39 3.258097 0.000000 440 +short 0 36 3.367296 0.000000 499 +soon 0 36 3.367296 0.000000 494 +award 0 34 3.401197 0.000000 523 +tech 0 35 3.401197 0.000000 515 +photo 0 31 3.496508 0.000000 561 +graph 0 30 3.555348 0.000000 576 +multiprocessor 1 28 3.610918 3.610918 605 +cluster 0 28 3.610918 0.000000 612 +univ 0 28 3.610918 0.000000 617 +compar 0 26 3.688879 0.000000 648 +trace 1 25 3.737670 3.737670 677 +predict 0 19 4.007333 0.000000 855 +monitor 0 17 4.110874 0.000000 941 +zhang 1 16 4.174387 4.174387 980 +driven 1 15 4.248495 4.248495 1048 +coher 0 14 4.317488 0.000000 1109 +baer 1 11 4.553877 4.553877 1353 +jean 0 10 4.653960 0.000000 1440 +explicit 0 9 4.753590 0.000000 1525 +loup 0 6 5.164786 0.000000 2228 +optimist 1 5 5.347108 5.347108 2501 +conserv 0 4 5.568345 0.000000 2870 +tran 0 3 5.857933 0.000000 3384 +communicationprimit 0 2 6.263398 0.000000 5449 +hpca 0 2 6.263398 0.000000 6030 +toolfor 0 2 6.263398 0.000000 6031 +numa 0 2 6.263398 0.000000 4905 +xiaohan 1 1 6.957497 6.957497 16642 +xqin 0 1 6.957497 0.000000 16643 +basedmultiprocessor 0 1 6.957497 0.000000 16644 +nalluri 0 1 6.957497 0.000000 16645 +processingon 0 1 6.957497 0.000000 16646 +chinaread 0 1 6.957497 0.000000 16647 +chinesesearch 0 1 6.957497 0.000000 16648 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..7f5ab708 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +graduat 0 215 1.386294 0.000000 31 +oper 0 180 1.609438 0.000000 34 +address 0 170 1.791759 0.000000 62 +year 0 148 1.945910 0.000000 84 +seattl 0 120 2.079442 0.000000 103 +document 0 121 2.079442 0.000000 89 +schedul 0 119 2.079442 0.000000 85 +intern 0 108 2.197225 0.000000 128 +text 0 98 2.302585 0.000000 133 +info 1 85 2.484907 2.484907 176 +second 0 81 2.484907 0.000000 166 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +servic 0 72 2.639057 0.000000 236 +type 0 61 2.833213 0.000000 296 +index 1 56 2.890372 2.890372 309 +talk 0 53 2.944439 0.000000 336 +finger 0 52 2.995732 0.000000 354 +transact 0 39 3.258097 0.000000 438 +brian 0 38 3.295837 0.000000 466 +connect 0 37 3.332205 0.000000 485 +tech 0 35 3.401197 0.000000 515 +survei 0 35 3.401197 0.000000 513 +random 0 34 3.401197 0.000000 511 +linux 0 27 3.637586 0.000000 631 +yahoo 0 24 3.761200 0.000000 707 +lyco 0 19 4.007333 0.000000 871 +bershad 0 18 4.060443 0.000000 902 +qual 0 15 4.248495 0.000000 1062 +spin 1 14 4.317488 4.317488 1121 +touch 0 12 4.465908 0.000000 1288 +perl 0 11 4.553877 0.000000 1332 +desktop 1 10 4.653960 4.653960 1445 +metacrawl 0 10 4.653960 0.000000 1455 +vista 0 10 4.653960 0.000000 1452 +meta 0 9 4.753590 0.000000 1505 +modula 0 9 4.753590 0.000000 1613 +japan 0 8 4.875197 0.000000 1762 +gatewai 0 7 5.010635 0.000000 1942 +lesson 0 5 5.347108 0.000000 2568 +alta 0 4 5.568345 0.000000 3039 +japanes 0 4 5.568345 0.000000 2934 +patch 0 4 5.568345 0.000000 2710 +archi 0 3 5.857933 0.000000 3639 +javascript 0 3 5.857933 0.000000 3221 +thespin 0 2 6.263398 0.000000 6029 +apprentic 0 2 6.263398 0.000000 5873 +yasushi 1 1 6.957497 6.957497 16649 +saitoyasushi 0 1 6.957497 0.000000 16650 +saito 0 1 6.957497 0.000000 16651 +atdepart 0 1 6.957497 0.000000 16652 +workingwith 0 1 6.957497 0.000000 16653 +andperson 0 1 6.957497 0.000000 16654 +sightse 0 1 6.957497 0.000000 16655 +trainer 0 1 6.957497 0.000000 16656 +dvorak 0 1 6.957497 0.000000 16657 +trycanva 0 1 6.957497 0.000000 16658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..d4095aff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +like 0 132 1.945910 0.000000 81 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +mathemat 0 108 2.197225 0.000000 123 +part 0 98 2.302585 0.000000 129 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +resum 0 79 2.564949 0.000000 217 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +servic 0 72 2.639057 0.000000 236 +degre 0 69 2.708050 0.000000 259 +artifici 0 63 2.772589 0.000000 280 +result 0 65 2.772589 0.000000 281 +basic 0 50 3.044522 0.000000 360 +physic 0 47 3.091042 0.000000 377 +done 0 47 3.091042 0.000000 381 +field 0 37 3.332205 0.000000 482 +idea 0 32 3.465736 0.000000 545 +computersci 0 30 3.555348 0.000000 562 +cluster 0 28 3.610918 0.000000 612 +retriev 0 27 3.637586 0.000000 621 +along 0 18 4.060443 0.000000 878 +engineeringunivers 0 17 4.110874 0.000000 959 +chateau 0 16 4.174387 0.000000 997 +trip 0 14 4.317488 0.000000 1113 +washingtonbox 0 13 4.382027 0.000000 1200 +edui 0 13 4.382027 0.000000 1193 +israel 0 11 4.553877 0.000000 1366 +metacrawl 0 10 4.653960 0.000000 1455 +ski 0 10 4.653960 0.000000 1471 +hundr 0 9 4.753590 0.000000 1528 +erik 0 8 4.875197 0.000000 1701 +oren 0 6 5.164786 0.000000 2134 +softwareengin 0 6 5.164786 0.000000 2162 +selberg 0 5 5.347108 0.000000 2441 +algorithmsfor 0 4 5.568345 0.000000 2748 +worki 0 4 5.568345 0.000000 3010 +raft 0 4 5.568345 0.000000 3060 +dive 0 3 5.857933 0.000000 3654 +zamir 1 2 6.263398 6.263398 5897 +pageoren 0 2 6.263398 0.000000 5888 +jerusalem 0 2 6.263398 0.000000 4918 +isra 0 1 6.957497 0.000000 16659 +myundergradu 0 1 6.957497 0.000000 16660 +hebrewunivers 0 1 6.957497 0.000000 16661 +userwith 0 1 6.957497 0.000000 16662 +orenetzioni 0 1 6.957497 0.000000 16663 +sinai 0 1 6.957497 0.000000 16664 +jeeptour 0 1 6.957497 0.000000 16665 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..a374362f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +also 1 259 1.386294 1.386294 28 +washington 0 236 1.386294 0.000000 32 +languag 0 227 1.386294 0.000000 26 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +paper 0 205 1.609438 0.000000 38 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +recent 0 167 1.791759 0.000000 58 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +process 0 142 1.945910 0.000000 72 +lectur 0 135 1.945910 0.000000 73 +year 0 148 1.945910 0.000000 84 +perform 0 143 1.945910 0.000000 74 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +look 0 107 2.197225 0.000000 115 +present 0 91 2.397895 0.000000 145 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +member 0 84 2.484907 0.000000 165 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +nation 0 74 2.639057 0.000000 240 +involv 0 71 2.639057 0.000000 227 +simul 1 66 2.708050 2.708050 255 +receiv 0 66 2.708050 0.000000 244 +improv 0 62 2.772589 0.000000 289 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +hardwar 0 51 2.995732 0.000000 350 +approach 0 48 3.044522 0.000000 366 +protocol 1 45 3.135494 3.135494 407 +textbook 0 44 3.135494 0.000000 397 +execut 0 45 3.135494 0.000000 404 +cach 1 41 3.218876 3.218876 432 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +author 1 39 3.258097 3.258097 450 +join 0 39 3.258097 0.000000 457 +electr 0 38 3.295837 0.000000 461 +industri 0 38 3.295837 0.000000 464 +ofth 0 36 3.367296 0.000000 491 +singl 0 34 3.401197 0.000000 510 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +chair 0 29 3.583519 0.000000 596 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 0 28 3.610918 0.000000 605 +although 0 25 3.737670 0.000000 667 +trace 0 25 3.737670 0.000000 677 +fellow 1 24 3.761200 3.761200 701 +serv 1 22 3.850148 3.850148 758 +comparison 0 19 4.007333 0.000000 863 +asplo 0 17 4.110874 0.000000 948 +driven 0 15 4.248495 0.000000 1048 +coher 0 14 4.317488 0.000000 1109 +difficulti 0 13 4.382027 0.000000 1132 +block 0 13 4.382027 0.000000 1183 +franc 0 12 4.465908 0.000000 1276 +baer 1 11 4.553877 4.553877 1353 +cycl 0 11 4.553877 0.000000 1335 +primit 0 11 4.553877 0.000000 1317 +isca 0 11 4.553877 0.000000 1354 +impact 0 11 4.553877 0.000000 1334 +denni 0 11 4.553877 0.000000 1321 +jean 0 10 4.653960 0.000000 1440 +prior 0 10 4.653960 0.000000 1438 +french 0 9 4.753590 0.000000 1511 +guggenheim 0 8 4.875197 0.000000 1759 +uniprocessor 0 8 4.875197 0.000000 1696 +prefetch 1 6 5.164786 5.164786 2039 +loup 0 6 5.164786 0.000000 2228 +ucla 1 5 5.347108 5.347108 2502 +icpp 0 5 5.347108 0.000000 2382 +anddistribut 0 4 5.568345 0.000000 3031 +coauthor 0 4 5.568345 0.000000 3064 +conserv 0 4 5.568345 0.000000 2870 +chairman 1 3 5.857933 5.857933 3991 +electricalengin 0 3 5.857933 0.000000 3987 +parallelprocess 0 3 5.857933 0.000000 3626 +twelv 0 3 5.857933 0.000000 3899 +specul 0 3 5.857933 0.000000 3951 +grenobl 1 2 6.263398 6.263398 5928 +internationalsymposium 1 2 6.263398 6.263398 6032 +adjunct 0 2 6.263398 0.000000 6033 +diplom 0 2 6.263398 0.000000 5982 +theuniversit 0 2 6.263398 0.000000 5927 +laboratoir 0 2 6.263398 0.000000 5929 +universit 0 2 6.263398 0.000000 5630 +retain 0 2 6.263398 0.000000 5443 +hpca 0 2 6.263398 0.000000 6030 +professorand 0 1 6.957497 0.000000 16666 +ingnieur 0 1 6.957497 0.000000 16667 +doctorat 0 1 6.957497 0.000000 16668 +decalcul 0 1 6.957497 0.000000 16669 +technologygroup 0 1 6.957497 0.000000 16670 +thesearea 0 1 6.957497 0.000000 16671 +distinguishedvisitor 0 1 6.957497 0.000000 16672 +asprogram 0 1 6.957497 0.000000 16673 +sigarch 0 1 6.957497 0.000000 16674 +eighteen 0 1 6.957497 0.000000 16675 +professorba 0 1 6.957497 0.000000 16676 +laboratoriesand 0 1 6.957497 0.000000 16677 +inacademia 0 1 6.957497 0.000000 16678 +hashad 0 1 6.957497 0.000000 16679 +accent 0 1 6.957497 0.000000 16680 +comparisonwith 0 1 6.957497 0.000000 16681 +andisca 0 1 6.957497 0.000000 16682 +optimisticapproach 0 1 6.957497 0.000000 16683 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..b6561808 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +applic 1 170 1.791759 1.791759 56 +implement 0 152 1.791759 0.000000 52 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +object 1 138 1.945910 1.945910 79 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +area 0 144 1.945910 0.000000 80 +hall 0 146 1.945910 0.000000 65 +high 0 130 2.079442 0.000000 101 +seattl 0 120 2.079442 0.000000 103 +assist 0 112 2.197225 0.000000 113 +advanc 0 99 2.302585 0.000000 130 +member 1 84 2.484907 2.484907 165 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +requir 0 81 2.484907 0.000000 167 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +interfac 0 79 2.564949 0.000000 209 +april 0 77 2.564949 0.000000 196 +effici 0 73 2.639057 0.000000 233 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +sieg 0 69 2.708050 0.000000 260 +guid 0 63 2.772589 0.000000 267 +street 0 63 2.772589 0.000000 293 +type 0 61 2.833213 0.000000 296 +room 0 59 2.833213 0.000000 301 +faculti 0 56 2.890372 0.000000 325 +direct 0 57 2.890372 0.000000 316 +extens 1 53 2.944439 2.944439 340 +undergradu 0 54 2.944439 0.000000 338 +investig 1 51 2.995732 2.995732 353 +maintain 0 51 2.995732 0.000000 342 +still 0 50 3.044522 0.000000 362 +pointer 0 48 3.044522 0.000000 368 +featur 0 46 3.091042 0.000000 386 +adapt 0 46 3.091042 0.000000 387 +join 0 39 3.258097 0.000000 457 +multi 0 36 3.367296 0.000000 493 +express 0 32 3.465736 0.000000 540 +profil 0 30 3.555348 0.000000 581 +static 1 27 3.637586 3.637586 619 +lead 0 23 3.806662 0.000000 718 +self 0 22 3.850148 0.000000 761 +programminglanguag 0 21 3.912023 0.000000 782 +util 0 21 3.912023 0.000000 774 +kernel 0 20 3.951244 0.000000 825 +fine 0 20 3.951244 0.000000 822 +stanford 0 17 4.110874 0.000000 955 +previous 0 17 4.110874 0.000000 923 +engineeringunivers 0 17 4.110874 0.000000 959 +spin 1 14 4.317488 4.317488 1121 +achiev 0 14 4.317488 0.000000 1088 +incorpor 1 13 4.382027 4.382027 1163 +washingtonbox 0 13 4.382027 0.000000 1200 +safe 0 12 4.465908 0.000000 1274 +modul 0 10 4.653960 0.000000 1434 +reli 0 10 4.653960 0.000000 1411 +cecil 1 9 4.753590 4.753590 1547 +end 0 9 4.753590 0.000000 1567 +modula 0 9 4.753590 0.000000 1613 +herefor 0 9 4.753590 0.000000 1483 +chamber 1 8 4.875197 4.875197 1692 +pure 0 8 4.875197 0.000000 1776 +analys 0 8 4.875197 0.000000 1666 +isol 0 8 4.875197 0.000000 1663 +craig 1 7 5.010635 5.010635 1879 +implementationof 0 7 5.010635 0.000000 1813 +vehicl 0 7 5.010635 0.000000 1928 +vortex 1 5 5.347108 5.347108 2362 +spinproject 0 5 5.347108 0.000000 2570 +despit 0 5 5.347108 0.000000 2317 +languagesand 0 4 5.568345 0.000000 3071 +microkernel 0 4 5.568345 0.000000 3047 +intra 0 3 5.857933 0.000000 3243 +dialect 0 3 5.857933 0.000000 3226 +dynamiccompil 0 3 5.857933 0.000000 3926 +programmingenviron 0 2 6.263398 0.000000 5240 +whichsupport 0 2 6.263398 0.000000 6003 +ceciland 0 1 6.957497 0.000000 16684 +languageserv 0 1 6.957497 0.000000 16685 +compilersystem 0 1 6.957497 0.000000 16686 +andinterprocedur 0 1 6.957497 0.000000 16687 +withfront 0 1 6.957497 0.000000 16688 +chamberswa 0 1 6.957497 0.000000 16689 +implementationsund 0 1 6.957497 0.000000 16690 +systemintegr 0 1 6.957497 0.000000 16691 +themodula 0 1 6.957497 0.000000 16692 +spinalso 0 1 6.957497 0.000000 16693 +grainedextens 0 1 6.957497 0.000000 16694 +researchproject 0 1 6.957497 0.000000 16695 +informationprof 0 1 6.957497 0.000000 16696 +chambersdepart 0 1 6.957497 0.000000 16697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..6b38aaaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +student 0 343 1.098612 0.000000 19 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +algorithm 0 162 1.791759 0.000000 57 +hour 0 165 1.791759 0.000000 46 +hall 0 146 1.945910 0.000000 65 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +high 0 130 2.079442 0.000000 101 +confer 0 126 2.079442 0.000000 100 +advanc 0 99 2.302585 0.000000 130 +associ 0 93 2.397895 0.000000 151 +graphic 0 90 2.397895 0.000000 147 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +journal 0 83 2.484907 0.000000 183 +method 0 80 2.564949 0.000000 213 +optim 0 79 2.564949 0.000000 197 +mondai 0 77 2.564949 0.000000 206 +april 0 77 2.564949 0.000000 196 +involv 0 71 2.639057 0.000000 227 +logic 0 71 2.639057 0.000000 230 +workshop 0 71 2.639057 0.000000 239 +sieg 0 69 2.708050 0.000000 260 +thursdai 0 70 2.708050 0.000000 241 +integr 0 67 2.708050 0.000000 245 +laboratori 0 63 2.772589 0.000000 292 +room 0 59 2.833213 0.000000 301 +digit 0 52 2.995732 0.000000 348 +physic 0 47 3.091042 0.000000 377 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +paul 0 38 3.295837 0.000000 471 +field 0 37 3.332205 0.000000 482 +articl 0 33 3.433987 0.000000 530 +focu 0 30 3.555348 0.000000 571 +travel 0 30 3.555348 0.000000 579 +arrai 0 27 3.637586 0.000000 627 +ofwashington 0 22 3.850148 0.000000 766 +voic 0 21 3.912023 0.000000 806 +vlsi 0 21 3.912023 0.000000 795 +chip 0 21 3.912023 0.000000 770 +rout 0 21 3.912023 0.000000 793 +particularli 0 19 4.007333 0.000000 867 +aid 0 18 4.060443 0.000000 904 +carl 1 15 4.248495 4.248495 1024 +draw 0 14 4.317488 0.000000 1086 +circuit 0 13 4.382027 0.000000 1131 +carnegi 0 12 4.465908 0.000000 1260 +fpga 1 10 4.653960 4.653960 1433 +franklin 0 10 4.653960 0.000000 1436 +chao 1 8 4.875197 4.875197 1753 +curv 0 8 4.875197 0.000000 1656 +sensit 0 8 4.875197 0.000000 1726 +router 0 8 4.875197 0.000000 1772 +multicomput 0 7 5.010635 0.000000 1890 +northwest 0 7 5.010635 0.000000 1973 +densiti 0 7 5.010635 0.000000 1927 +southern 0 6 5.164786 0.000000 2191 +spline 0 6 5.164786 0.000000 2007 +gate 0 6 5.164786 0.000000 2182 +categori 0 5 5.347108 0.000000 2261 +darren 0 5 5.347108 0.000000 2565 +ebel 1 4 5.568345 5.568345 2756 +triptych 1 4 5.568345 5.568345 3061 +neil 0 4 5.568345 0.000000 2841 +theperform 0 3 5.857933 0.000000 3262 +cronquist 0 3 5.857933 0.000000 3942 +haswork 0 2 6.263398 0.000000 5182 +andsurfac 0 2 6.263398 0.000000 5735 +hei 0 2 6.263398 0.000000 5769 +latch 0 2 6.263398 0.000000 6034 +soha 0 2 6.263398 0.000000 6006 +hassoun 0 2 6.263398 0.000000 6007 +mckenzi 0 2 6.263398 0.000000 5974 +ebelingdepart 0 1 6.957497 0.000000 16698 +wheatoncolleg 0 1 6.957497 0.000000 16699 +illinoisunivers 0 1 6.957497 0.000000 16700 +mellonunivers 0 1 6.957497 0.000000 16701 +vlsiarchitectur 0 1 6.957497 0.000000 16702 +hitech 0 1 6.957497 0.000000 16703 +chessmachin 0 1 6.957497 0.000000 16704 +apex 0 1 6.957497 0.000000 16705 +routingnetwork 0 1 6.957497 0.000000 16706 +placementand 0 1 6.957497 0.000000 16707 +teachingspr 0 1 6.957497 0.000000 16708 +designoffic 0 1 6.957497 0.000000 16709 +fccm 0 1 6.957497 0.000000 16710 +napamai 0 1 6.957497 0.000000 16711 +burlington 0 1 6.957497 0.000000 16712 +chicagojun 0 1 6.957497 0.000000 16713 +vegasresearch 0 1 6.957497 0.000000 16714 +amara 0 1 6.957497 0.000000 16715 +galleryelan 0 1 6.957497 0.000000 16716 +galleryebel 0 1 6.957497 0.000000 16717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..a5e5ac61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +seattl 1 120 2.079442 2.079442 103 +schedul 1 119 2.079442 2.079442 85 +spring 0 131 2.079442 0.000000 88 +confer 0 126 2.079442 0.000000 100 +server 0 76 2.564949 0.000000 204 +new 0 64 2.772589 0.000000 262 +photo 0 31 3.496508 0.000000 561 +steve 0 29 3.583519 0.000000 594 +magazin 0 24 3.761200 0.000000 704 +tenni 0 20 3.951244 0.000000 838 +agent 0 18 4.060443 0.000000 910 +hank 0 12 4.465908 0.000000 1253 +uncertainti 1 7 5.010635 5.010635 1882 +restaur 0 6 5.164786 0.000000 2230 +seriou 0 5 5.347108 0.000000 2252 +carlo 0 5 5.347108 0.000000 2515 +maria 0 4 5.568345 0.000000 2954 +wine 0 3 5.857933 0.000000 3895 +hanksunivers 0 1 6.957497 0.000000 16718 +washingtondepart 0 1 6.957497 0.000000 16719 +architecturesai 0 1 6.957497 0.000000 16720 +symphoni 0 1 6.957497 0.000000 16721 +opera 0 1 6.957497 0.000000 16722 +edita 0 1 6.957497 0.000000 16723 +gruberova 0 1 6.957497 0.000000 16724 +giulini 0 1 6.957497 0.000000 16725 +discographi 0 1 6.957497 0.000000 16726 +sumac 0 1 6.957497 0.000000 16727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..2f340c02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +group 1 183 1.609438 1.609438 36 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +take 0 97 2.302585 0.000000 134 +school 0 84 2.484907 0.000000 188 +intellig 0 72 2.639057 0.000000 225 +degre 1 69 2.708050 2.708050 259 +knowledg 0 67 2.708050 0.000000 243 +integr 0 67 2.708050 0.000000 245 +artifici 0 63 2.772589 0.000000 280 +colleg 0 61 2.833213 0.000000 300 +faculti 0 56 2.890372 0.000000 325 +understand 0 47 3.091042 0.000000 384 +math 0 44 3.135494 0.000000 402 +electr 0 38 3.295837 0.000000 461 +origin 0 38 3.295837 0.000000 472 +within 0 33 3.433987 0.000000 525 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +neural 0 30 3.555348 0.000000 578 +symbol 0 27 3.637586 0.000000 620 +spent 1 25 3.737670 3.737670 676 +initi 0 23 3.806662 0.000000 717 +divis 1 21 3.912023 3.912023 803 +corpor 0 21 3.912023 0.000000 802 +verif 0 20 3.951244 0.000000 826 +expert 0 20 3.951244 0.000000 833 +mostli 0 19 4.007333 0.000000 869 +aid 0 18 4.060443 0.000000 904 +speech 0 12 4.465908 0.000000 1222 +fellowship 0 10 4.653960 0.000000 1460 +yale 0 6 5.164786 0.000000 2003 +british 0 5 5.347108 0.000000 2546 +broadcast 0 5 5.347108 0.000000 2453 +began 0 5 5.347108 0.000000 2498 +scotland 0 4 5.568345 0.000000 3049 +withth 0 4 5.568345 0.000000 2805 +alistair 0 3 5.857933 0.000000 3315 +holden 0 3 5.857933 0.000000 3314 +london 0 3 5.857933 0.000000 3282 +imperi 0 2 6.263398 0.000000 5389 +highland 0 1 6.957497 0.000000 16728 +receivedhi 0 1 6.957497 0.000000 16729 +glasgow 0 1 6.957497 0.000000 16730 +graduateapprentic 0 1 6.957497 0.000000 16731 +edison 0 1 6.957497 0.000000 16732 +phddegre 0 1 6.957497 0.000000 16733 +learningin 0 1 6.957497 0.000000 16734 +coursefrom 0 1 6.957497 0.000000 16735 +colin 0 1 6.957497 0.000000 16736 +cherri 0 1 6.957497 0.000000 16737 +thebbc 0 1 6.957497 0.000000 16738 +theuw 0 1 6.957497 0.000000 16739 +departmentsform 0 1 6.957497 0.000000 16740 +netmethodolog 0 1 6.957497 0.000000 16741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..91cee5bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +professor 1 137 1.945910 1.945910 76 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +mathemat 1 108 2.197225 2.197225 123 +manag 1 114 2.197225 2.197225 125 +teach 0 108 2.197225 0.000000 112 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +advanc 1 99 2.302585 2.302585 130 +part 0 98 2.302585 0.000000 129 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +academ 0 82 2.484907 0.000000 178 +journal 0 83 2.484907 0.000000 183 +nation 1 74 2.639057 2.639057 240 +effici 0 73 2.639057 0.000000 233 +complex 1 64 2.772589 2.772589 269 +improv 0 62 2.772589 0.000000 289 +faculti 0 56 2.890372 0.000000 325 +profession 0 51 2.995732 0.000000 345 +press 0 42 3.218876 0.000000 419 +societi 1 40 3.258097 3.258097 456 +theoret 0 39 3.258097 0.000000 446 +industri 0 38 3.295837 0.000000 464 +respons 0 37 3.332205 0.000000 476 +award 1 34 3.401197 3.401197 523 +random 1 34 3.401197 3.401197 511 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +travel 1 30 3.555348 3.555348 579 +art 0 29 3.583519 0.000000 593 +chair 0 29 3.583519 0.000000 596 +held 0 28 3.610918 0.000000 600 +american 1 27 3.637586 3.637586 634 +berkelei 1 26 3.688879 3.688879 657 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +flow 0 24 3.761200 0.000000 700 +ofwashington 0 22 3.850148 0.000000 766 +reduc 0 22 3.850148 0.000000 759 +among 0 21 3.912023 0.000000 781 +miller 0 17 4.110874 0.000000 949 +match 0 16 4.174387 0.000000 965 +partit 0 16 4.174387 0.000000 984 +massachusett 0 14 4.317488 0.000000 1118 +america 1 11 4.553877 4.553877 1370 +probabilist 0 11 4.553877 0.000000 1343 +minimum 0 9 4.753590 0.000000 1555 +academi 1 8 4.875197 4.875197 1735 +combinatori 0 8 4.875197 0.000000 1629 +pennsylvania 0 7 5.010635 0.000000 1932 +perfect 0 7 5.010635 0.000000 1921 +prize 1 6 5.164786 5.164786 2150 +ture 0 6 5.164786 0.000000 1997 +advisori 0 6 5.164786 0.000000 2148 +plane 0 6 5.164786 0.000000 2187 +karp 1 5 5.347108 5.347108 2284 +weyl 0 4 5.568345 0.000000 2854 +technion 0 4 5.568345 0.000000 2856 +weizmann 0 4 5.568345 0.000000 2858 +combinator 0 4 5.568345 0.000000 2915 +theacm 0 4 5.568345 0.000000 2698 +neumann 1 3 5.857933 5.857933 3720 +medal 0 3 5.857933 0.000000 3912 +truste 0 3 5.857933 0.000000 3900 +combinatorica 0 3 5.857933 0.000000 3649 +ofoper 0 3 5.857933 0.000000 3292 +dick 0 2 6.263398 0.000000 5396 +sciencesmemb 0 2 6.263398 0.000000 5742 +engineeringfellow 0 2 6.263398 0.000000 4902 +sciencesfellow 0 2 6.263398 0.000000 4903 +appliedmathemat 0 2 6.263398 0.000000 5716 +honorari 0 2 6.263398 0.000000 5741 +georgetown 0 2 6.263398 0.000000 5667 +wigderson 0 2 6.263398 0.000000 6035 +fornetwork 0 2 6.263398 0.000000 5580 +edmond 0 2 6.263398 0.000000 4144 +plenum 0 2 6.263398 0.000000 6036 +presentmemb 1 1 6.957497 6.957497 16742 +salesman 1 1 6.957497 6.957497 16743 +karprichard 0 1 6.957497 0.000000 16744 +karpprofessor 0 1 6.957497 0.000000 16745 +ofcomputersci 0 1 6.957497 0.000000 16746 +andadjunct 0 1 6.957497 0.000000 16747 +ofmolecularbiotechnologyunivers 0 1 6.957497 0.000000 16748 +eduaward 0 1 6.957497 0.000000 16749 +membershipsn 0 1 6.957497 0.000000 16750 +babbag 0 1 6.957497 0.000000 16751 +sciencedistinguish 0 1 6.957497 0.000000 16752 +senat 0 1 6.957497 0.000000 16753 +berkeleylanchest 0 1 6.957497 0.000000 16754 +fulkerson 0 1 6.957497 0.000000 16755 +hermann 0 1 6.957497 0.000000 16756 +forsoci 0 1 6.957497 0.000000 16757 +governor 0 1 6.957497 0.000000 16758 +scienceinstitut 0 1 6.957497 0.000000 16759 +presentselect 0 1 6.957497 0.000000 16760 +turingaward 0 1 6.957497 0.000000 16761 +upfal 0 1 6.957497 0.000000 16762 +spanningtre 0 1 6.957497 0.000000 16763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..e03e86cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,251 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +recent 1 167 1.791759 1.791759 58 +data 0 170 1.791759 0.000000 49 +lectur 1 135 1.945910 1.945910 73 +perform 1 143 1.945910 1.945910 74 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +professor 0 137 1.945910 0.000000 76 +technolog 1 131 2.079442 2.079442 102 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +person 1 111 2.197225 2.197225 117 +version 0 113 2.197225 0.000000 122 +memori 1 101 2.302585 2.302585 139 +technic 0 100 2.302585 0.000000 140 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +member 1 84 2.484907 2.484907 165 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +complet 1 77 2.564949 2.564949 208 +master 0 76 2.564949 0.000000 216 +issu 0 78 2.564949 0.000000 211 +april 0 77 2.564949 0.000000 196 +nation 1 74 2.639057 2.639057 240 +servic 0 72 2.639057 0.000000 236 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +test 0 66 2.708050 0.000000 252 +degre 0 69 2.708050 0.000000 259 +polici 0 64 2.772589 0.000000 279 +foundat 0 62 2.772589 0.000000 286 +colleg 0 61 2.833213 0.000000 300 +faculti 1 56 2.890372 2.890372 325 +reason 0 57 2.890372 0.000000 318 +major 0 56 2.890372 0.000000 315 +think 0 57 2.890372 0.000000 314 +talk 0 53 2.944439 0.000000 336 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +profession 0 51 2.995732 0.000000 345 +frequent 0 49 3.044522 0.000000 367 +visitor 0 49 3.044522 0.000000 371 +principl 0 48 3.044522 0.000000 357 +california 1 46 3.091042 3.091042 388 +execut 0 45 3.135494 0.000000 404 +review 1 42 3.218876 3.218876 425 +examin 0 42 3.218876 0.000000 424 +http 0 41 3.218876 0.000000 420 +transact 0 39 3.258097 0.000000 438 +annual 0 40 3.258097 0.000000 458 +industri 1 38 3.295837 3.295837 464 +electr 0 38 3.295837 0.000000 461 +field 0 37 3.332205 0.000000 482 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +committe 2 34 3.401197 6.802394 522 +award 1 34 3.401197 3.401197 523 +board 1 33 3.433987 3.433987 528 +product 0 33 3.433987 0.000000 527 +chair 2 29 3.583519 7.167038 596 +chines 1 29 3.583519 3.583519 595 +intend 0 28 3.610918 0.000000 599 +berkelei 1 26 3.688879 3.688879 657 +concern 1 25 3.737670 3.737670 666 +doctor 0 24 3.761200 0.000000 709 +sometim 0 24 3.761200 0.000000 696 +miscellan 0 23 3.806662 0.000000 731 +famili 0 23 3.806662 0.000000 735 +serv 1 22 3.850148 3.850148 758 +director 1 22 3.850148 3.850148 767 +corpor 0 21 3.912023 0.000000 802 +fund 0 21 3.912023 0.000000 805 +theunivers 0 21 3.912023 0.000000 797 +hous 0 21 3.912023 0.000000 801 +region 0 19 4.007333 0.000000 875 +seem 1 18 4.060443 4.060443 899 +appropri 0 18 4.060443 0.000000 883 +lot 0 18 4.060443 0.000000 889 +stand 0 18 4.060443 0.000000 891 +record 0 18 4.060443 0.000000 890 +demo 0 18 4.060443 0.000000 888 +universityof 1 15 4.248495 4.248495 1061 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +trip 1 14 4.317488 4.317488 1113 +essenti 0 13 4.382027 0.000000 1137 +sigmetr 0 13 4.382027 0.000000 1173 +forth 0 13 4.382027 0.000000 1186 +speech 1 12 4.465908 4.465908 1222 +thedepart 1 11 4.553877 4.553877 1350 +council 1 11 4.553877 4.553877 1364 +host 0 11 4.553877 0.000000 1306 +player 0 11 4.553877 0.000000 1371 +cook 1 10 4.653960 4.653960 1464 +perspect 0 10 4.653960 0.000000 1437 +congress 0 9 4.753590 0.000000 1592 +pick 0 9 4.753590 0.000000 1498 +govern 0 9 4.753590 0.000000 1581 +telecommun 0 9 4.753590 0.000000 1565 +andth 0 9 4.753590 0.000000 1481 +vice 0 9 4.753590 0.000000 1604 +lane 0 8 4.875197 0.000000 1720 +mile 0 8 4.875197 0.000000 1743 +virginia 0 8 4.875197 0.000000 1659 +driver 0 8 4.875197 0.000000 1657 +centuri 1 7 5.010635 5.010635 1935 +surpris 0 7 5.010635 0.000000 1828 +molecular 0 7 5.010635 0.000000 1887 +advisori 1 6 5.164786 5.164786 2148 +ture 0 6 5.164786 0.000000 1997 +brook 0 6 5.164786 0.000000 2152 +deliv 0 6 5.164786 0.000000 2070 +highwai 0 6 5.164786 0.000000 2095 +presid 0 6 5.164786 0.000000 2196 +duke 0 6 5.164786 0.000000 2231 +lazowska 2 4 5.568345 11.136690 2694 +invent 1 4 5.568345 5.568345 3028 +machineri 1 4 5.568345 5.568345 2851 +push 0 4 5.568345 0.000000 2635 +andengin 0 4 5.568345 0.000000 3042 +rack 0 3 5.857933 0.000000 3176 +researchassoci 0 3 5.857933 0.000000 3664 +affair 0 3 5.857933 0.000000 3916 +belong 0 3 5.857933 0.000000 3797 +atstanford 0 3 5.857933 0.000000 3935 +hongkong 0 3 5.857933 0.000000 3677 +theimpact 0 3 5.857933 0.000000 3179 +uwcs 0 3 5.857933 0.000000 3977 +informationtechnolog 0 3 5.857933 0.000000 3836 +down 0 3 5.857933 0.000000 3870 +celebr 1 2 6.263398 6.263398 4946 +onthi 0 2 6.263398 0.000000 5357 +mbquicktim 0 2 6.263398 0.000000 5916 +advisorycommitte 0 2 6.263398 0.000000 6037 +ofdata 0 2 6.263398 0.000000 6038 +ventur 0 2 6.263398 0.000000 4938 +andha 0 2 6.263398 0.000000 5955 +theinstitut 0 2 6.263398 0.000000 6039 +hpcc 0 2 6.263398 0.000000 5832 +pagerec 0 2 6.263398 0.000000 6022 +ahalf 1 1 6.957497 6.957497 16764 +exponentialprogress 1 1 6.957497 6.957497 16765 +annualfaculti 1 1 6.957497 6.957497 16766 +vicepresid 1 1 6.957497 6.957497 16767 +gore 1 1 6.957497 6.957497 16768 +eniac 1 1 6.957497 6.957497 16769 +thanniversari 1 1 6.957497 6.957497 16770 +nathanmyhrvold 1 1 6.957497 6.957497 16771 +joinsedlazowska 1 1 6.957497 6.957497 16772 +theuwcs 1 1 6.957497 6.957497 16773 +testimonyto 1 1 6.957497 6.957497 16774 +georgejetson 0 1 6.957497 0.000000 16775 +forfr 0 1 6.957497 0.000000 16776 +flintston 0 1 6.957497 0.000000 16777 +mostlywearsti 0 1 6.957497 0.000000 16778 +flier 0 1 6.957497 0.000000 16779 +healso 0 1 6.957497 0.000000 16780 +havefunnynos 0 1 6.957497 0.000000 16781 +allgradu 0 1 6.957497 0.000000 16782 +laboratoriesin 0 1 6.957497 0.000000 16783 +ofcra 0 1 6.957497 0.000000 16784 +scomputersci 0 1 6.957497 0.000000 16785 +formicrosoft 0 1 6.957497 0.000000 16786 +personnationalsemiconductor 0 1 6.957497 0.000000 16787 +academicadvisori 0 1 6.957497 0.000000 16788 +forcabl 0 1 6.957497 0.000000 16789 +hows 0 1 6.957497 0.000000 16790 +cascadia 0 1 6.957497 0.000000 16791 +committeesfor 0 1 6.957497 0.000000 16792 +eecsat 0 1 6.957497 0.000000 16793 +councilpanel 0 1 6.957497 0.000000 16794 +agencyhigh 0 1 6.957497 0.000000 16795 +computingand 0 1 6.957497 0.000000 16796 +sutherland 0 1 6.957497 0.000000 16797 +examinersfor 0 1 6.957497 0.000000 16798 +sspecial 0 1 6.957497 0.000000 16799 +chairof 0 1 6.957497 0.000000 16800 +andeditor 0 1 6.957497 0.000000 16801 +servinga 0 1 6.957497 0.000000 16802 +onacadem 0 1 6.957497 0.000000 16803 +thecommitte 0 1 6.957497 0.000000 16804 +deanship 0 1 6.957497 0.000000 16805 +artsand 0 1 6.957497 0.000000 16806 +biotechnolog 0 1 6.957497 0.000000 16807 +amemb 0 1 6.957497 0.000000 16808 +deanof 0 1 6.957497 0.000000 16809 +fellowof 0 1 6.957497 0.000000 16810 +associationfor 0 1 6.957497 0.000000 16811 +andelectron 0 1 6.957497 0.000000 16812 +seventeenph 0 1 6.957497 0.000000 16813 +studentshav 0 1 6.957497 0.000000 16814 +integratedoverview 0 1 6.957497 0.000000 16815 +apersuas 0 1 6.957497 0.000000 16816 +forloc 0 1 6.957497 0.000000 16817 +consumpt 0 1 6.957497 0.000000 16818 +persuas 0 1 6.957497 0.000000 16819 +playertopten 0 1 6.957497 0.000000 16820 +csebuild 0 1 6.957497 0.000000 16821 +abbrevi 0 1 6.957497 0.000000 16822 +cvcomputingresearch 0 1 6.957497 0.000000 16823 +forwardmassi 0 1 6.957497 0.000000 16824 +goldmanreport 0 1 6.957497 0.000000 16825 +alleg 0 1 6.957497 0.000000 16826 +cseph 0 1 6.957497 0.000000 16827 +flaw 0 1 6.957497 0.000000 16828 +medianyear 0 1 6.957497 0.000000 16829 +boardstudi 0 1 6.957497 0.000000 16830 +saturdayseminar 0 1 6.957497 0.000000 16831 +houseappropri 0 1 6.957497 0.000000 16832 +interestinghom 0 1 6.957497 0.000000 16833 +odeto 0 1 6.957497 0.000000 16834 +tallman 0 1 6.957497 0.000000 16835 +trask 0 1 6.957497 0.000000 16836 +departsfor 0 1 6.957497 0.000000 16837 +lanelazowska 0 1 6.957497 0.000000 16838 +pagedirect 0 1 6.957497 0.000000 16839 +houseshilshol 0 1 6.957497 0.000000 16840 +aquat 0 1 6.957497 0.000000 16841 +discoveredreview 0 1 6.957497 0.000000 16842 +poetryfing 0 1 6.957497 0.000000 16843 +scheduleinform 0 1 6.957497 0.000000 16844 +reflector 0 1 6.957497 0.000000 16845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..0f04bab0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +washington 0 236 1.386294 0.000000 32 +cornel 0 215 1.386294 0.000000 23 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +professor 1 137 1.945910 1.945910 76 +document 0 121 2.079442 0.000000 89 +mathemat 0 108 2.197225 0.000000 123 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +book 1 99 2.302585 2.302585 131 +text 0 98 2.302585 0.000000 133 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +journal 0 83 2.484907 0.000000 183 +ieee 0 86 2.484907 0.000000 190 +thing 0 84 2.484907 0.000000 189 +academ 0 82 2.484907 0.000000 178 +method 0 80 2.564949 0.000000 213 +good 0 77 2.564949 0.000000 200 +addit 0 74 2.639057 0.000000 228 +visit 0 63 2.772589 0.000000 288 +physic 0 47 3.091042 0.000000 377 +textbook 0 44 3.135494 0.000000 397 +linear 0 41 3.218876 0.000000 431 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +transact 0 39 3.258097 0.000000 438 +live 0 40 3.258097 0.000000 451 +committe 1 34 3.401197 3.401197 522 +award 0 34 3.401197 0.000000 523 +dissert 1 32 3.465736 3.465736 549 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +serv 0 22 3.850148 0.000000 758 +half 1 21 3.912023 3.912023 776 +corpor 0 21 3.912023 0.000000 802 +among 0 21 3.912023 0.000000 781 +prepar 0 20 3.951244 0.000000 824 +supervis 0 20 3.951244 0.000000 840 +tenni 0 20 3.951244 0.000000 838 +stanford 1 17 4.110874 4.110874 955 +bachelor 0 17 4.110874 0.000000 957 +former 0 17 4.110874 0.000000 956 +hobbi 1 16 4.174387 4.174387 1009 +atth 0 15 4.248495 0.000000 1019 +incomput 0 14 4.317488 0.000000 1096 +alan 0 13 4.382027 0.000000 1146 +guest 0 12 4.465908 0.000000 1220 +food 0 12 4.465908 0.000000 1285 +distinguish 0 11 4.553877 0.000000 1357 +bike 0 10 4.653960 0.000000 1468 +introductori 0 9 4.753590 0.000000 1479 +editori 0 9 4.753590 0.000000 1611 +toronto 0 6 5.164786 0.000000 2156 +scholar 0 6 5.164786 0.000000 2180 +pari 0 6 5.164786 0.000000 2158 +softwareengin 0 6 5.164786 0.000000 2162 +hike 0 6 5.164786 0.000000 2234 +these 0 5 5.347108 0.000000 2482 +fulbright 1 4 5.568345 5.568345 2963 +hasbeen 0 4 5.568345 0.000000 2661 +amast 0 3 5.857933 0.000000 3955 +informat 0 3 5.857933 0.000000 3839 +zurich 0 3 5.857933 0.000000 3550 +memberof 0 3 5.857933 0.000000 3169 +trumpet 0 3 5.857933 0.000000 3946 +sdegre 0 2 6.263398 0.000000 6040 +acceler 0 2 6.263398 0.000000 5411 +fifteen 0 2 6.263398 0.000000 5399 +shaw 1 1 6.957497 6.957497 16846 +facultyappoint 0 1 6.957497 0.000000 16847 +theibm 0 1 6.957497 0.000000 16848 +publicationsinclud 0 1 6.957497 0.000000 16849 +andan 0 1 6.957497 0.000000 16850 +sciencescreen 0 1 6.957497 0.000000 16851 +associateeditor 0 1 6.957497 0.000000 16852 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..e6c9a798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +washington 0 236 1.386294 0.000000 32 +design 0 213 1.386294 0.000000 25 +parallel 1 169 1.791759 1.791759 60 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +mathemat 0 108 2.197225 0.000000 123 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +journal 1 83 2.484907 2.484907 183 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +complet 1 77 2.564949 2.564949 208 +master 0 76 2.564949 0.000000 216 +nation 1 74 2.639057 2.639057 240 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +receiv 1 66 2.708050 2.708050 244 +visit 1 63 2.772589 2.772589 288 +polici 0 64 2.772589 0.000000 279 +guid 0 63 2.772589 0.000000 267 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +investig 0 51 2.995732 0.000000 353 +numer 1 49 3.044522 3.044522 369 +quarter 0 47 3.091042 0.000000 389 +editor 1 41 3.218876 3.218876 433 +futur 0 41 3.218876 0.000000 427 +join 0 39 3.258097 0.000000 457 +transact 0 39 3.258097 0.000000 438 +ofth 0 36 3.367296 0.000000 491 +committe 1 34 3.401197 3.401197 522 +singl 0 34 3.401197 0.000000 510 +award 0 34 3.401197 0.000000 523 +titl 0 31 3.496508 0.000000 556 +computersci 0 30 3.555348 0.000000 562 +rang 0 30 3.555348 0.000000 565 +chair 1 29 3.583519 3.583519 596 +particip 0 29 3.583519 0.000000 589 +doctor 0 24 3.761200 0.000000 709 +proof 0 23 3.806662 0.000000 720 +highli 0 23 3.806662 0.000000 725 +serv 1 22 3.850148 3.850148 758 +properti 0 22 3.850148 0.000000 749 +chip 1 21 3.912023 3.912023 770 +divis 0 21 3.912023 0.000000 803 +bachelor 0 17 4.110874 0.000000 957 +configur 0 15 4.248495 0.000000 1012 +econom 0 13 4.382027 0.000000 1184 +mellon 0 13 4.382027 0.000000 1179 +carnegi 0 12 4.465908 0.000000 1260 +onth 0 12 4.465908 0.000000 1218 +perman 0 11 4.553877 0.000000 1372 +distinguish 0 11 4.553877 0.000000 1357 +purdu 0 10 4.653960 0.000000 1466 +andcomput 0 8 4.875197 0.000000 1623 +lawrenc 0 7 5.010635 0.000000 1908 +iowa 0 7 5.010635 0.000000 1971 +harvard 0 7 5.010635 0.000000 1926 +microprocessor 0 7 5.010635 0.000000 1808 +scholar 0 6 5.164786 0.000000 2180 +yale 0 6 5.164786 0.000000 2003 +blue 0 6 5.164786 0.000000 2227 +snyder 1 5 5.347108 5.347108 2359 +chaoticrout 0 4 5.568345 0.000000 3063 +anddistribut 0 4 5.568345 0.000000 3031 +algorithmsand 0 4 5.568345 0.000000 2680 +cmo 0 3 5.857933 0.000000 3992 +inventor 0 3 5.857933 0.000000 3695 +orca 0 3 5.857933 0.000000 3578 +dozen 0 3 5.857933 0.000000 3905 +sdegre 0 2 6.263398 0.000000 6040 +developmentof 0 2 6.263398 0.000000 6041 +hors 0 2 6.263398 0.000000 5348 +advisorycommitte 0 2 6.263398 0.000000 6037 +andin 0 1 6.957497 0.000000 16853 +scholarat 0 1 6.957497 0.000000 16854 +theundecid 0 1 6.957497 0.000000 16855 +hecreat 0 1 6.957497 0.000000 16856 +thepok 0 1 6.957497 0.000000 16857 +nowprincip 0 1 6.957497 0.000000 16858 +nwli 0 1 6.957497 0.000000 16859 +computerand 0 1 6.957497 0.000000 16860 +foundationadvisori 0 1 6.957497 0.000000 16861 +doctoraldissert 0 1 6.957497 0.000000 16862 +degreesund 0 1 6.957497 0.000000 16863 +seniorproject 0 1 6.957497 0.000000 16864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..b2d2678b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 1 297 1.098612 1.098612 20 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +design 1 213 1.386294 1.386294 25 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +base 0 165 1.791759 0.000000 50 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +object 0 138 1.945910 0.000000 79 +like 0 132 1.945910 0.000000 81 +high 0 130 2.079442 0.000000 101 +tool 0 117 2.079442 0.000000 93 +memori 0 101 2.302585 0.000000 139 +control 0 82 2.484907 0.000000 164 +issu 0 78 2.564949 0.000000 211 +involv 0 71 2.639057 0.000000 227 +degre 0 69 2.708050 0.000000 259 +integr 0 67 2.708050 0.000000 245 +evalu 0 64 2.772589 0.000000 266 +plai 0 60 2.833213 0.000000 307 +major 0 56 2.890372 0.000000 315 +explor 0 58 2.890372 0.000000 324 +scientif 0 53 2.944439 0.000000 341 +three 0 54 2.944439 0.000000 330 +tabl 0 51 2.995732 0.000000 346 +autom 0 41 3.218876 0.000000 434 +electr 1 38 3.295837 3.295837 461 +toler 1 33 3.433987 3.433987 533 +india 0 32 3.465736 0.000000 550 +fault 0 32 3.465736 0.000000 547 +multiprocessor 0 28 3.610918 0.000000 605 +indian 1 22 3.850148 3.850148 769 +period 0 22 3.850148 0.000000 743 +tenni 1 20 3.951244 3.951244 838 +interconnect 1 17 4.110874 4.110874 937 +canada 0 13 4.382027 0.000000 1158 +food 0 12 4.465908 0.000000 1285 +prior 0 10 4.653960 0.000000 1438 +grain 0 10 4.653960 0.000000 1448 +cook 0 10 4.653960 0.000000 1464 +respect 0 9 4.753590 0.000000 1545 +classif 0 9 4.753590 0.000000 1586 +bridg 0 8 4.875197 0.000000 1764 +earn 0 7 5.010635 0.000000 1788 +montreal 0 7 5.010635 0.000000 1961 +hike 0 6 5.164786 0.000000 2234 +delhi 0 5 5.347108 0.000000 2530 +anti 0 5 5.347108 0.000000 2434 +arun 1 4 5.568345 5.568345 2736 +redund 0 4 5.568345 0.000000 2839 +congest 0 3 5.857933 0.000000 3993 +reconfigur 0 3 5.857933 0.000000 3556 +somani 1 2 6.263398 6.263398 4432 +submarin 0 2 6.263398 0.000000 6018 +warfar 0 2 6.263398 0.000000 4910 +navi 0 2 6.263398 0.000000 5155 +proteu 1 1 6.957497 6.957497 16865 +msee 0 1 6.957497 0.000000 16866 +mcgill 0 1 6.957497 0.000000 16867 +govt 0 1 6.957497 0.000000 16868 +offault 0 1 6.957497 0.000000 16869 +tocach 0 1 6.957497 0.000000 16870 +broadband 0 1 6.957497 0.000000 16871 +generalizedenhanc 0 1 6.957497 0.000000 16872 +hypercub 0 1 6.957497 0.000000 16873 +coars 0 1 6.957497 0.000000 16874 +dpcnl 0 1 6.957497 0.000000 16875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..88d7a60b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +parallel 1 169 1.791759 1.791759 60 +develop 0 174 1.791759 0.000000 53 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +year 0 148 1.945910 0.000000 84 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +seattl 0 120 2.079442 0.000000 103 +mathemat 1 108 2.197225 2.197225 123 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +person 0 111 2.197225 0.000000 117 +structur 0 106 2.197225 0.000000 105 +intern 0 108 2.197225 0.000000 128 +take 0 97 2.302585 0.000000 134 +book 0 99 2.302585 0.000000 131 +imag 1 91 2.397895 2.397895 161 +sinc 0 90 2.397895 0.000000 159 +commun 0 95 2.397895 0.000000 157 +grade 0 90 2.397895 0.000000 142 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +institut 0 84 2.484907 0.000000 187 +member 0 84 2.484907 0.000000 165 +educ 0 86 2.484907 0.000000 191 +activ 0 84 2.484907 0.000000 182 +june 1 79 2.564949 2.564949 214 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +meet 0 72 2.639057 0.000000 229 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +visit 1 63 2.772589 2.772589 288 +artifici 1 63 2.772589 2.772589 280 +experi 0 64 2.772589 0.000000 283 +written 0 63 2.772589 0.000000 278 +organ 0 65 2.772589 0.000000 265 +polici 0 64 2.772589 0.000000 279 +colleg 1 61 2.833213 2.833213 300 +plai 0 60 2.833213 0.000000 307 +faculti 0 56 2.890372 0.000000 325 +think 0 57 2.890372 0.000000 314 +variou 0 56 2.890372 0.000000 317 +publish 0 57 2.890372 0.000000 326 +processor 0 54 2.944439 0.000000 335 +visual 1 48 3.044522 3.044522 372 +numer 0 49 3.044522 0.000000 369 +understand 0 47 3.091042 0.000000 384 +textbook 0 44 3.135494 0.000000 397 +edit 1 42 3.218876 3.218876 418 +vision 1 41 3.218876 3.218876 430 +editor 0 41 3.218876 0.000000 433 +music 0 42 3.218876 0.000000 436 +societi 1 40 3.258097 3.258097 456 +join 0 39 3.258097 0.000000 457 +programm 0 39 3.258097 0.000000 445 +author 0 39 3.258097 0.000000 450 +committe 1 34 3.401197 3.401197 522 +scientist 1 31 3.496508 3.496508 560 +common 0 30 3.555348 0.000000 574 +chair 1 29 3.583519 3.583519 596 +steve 0 29 3.583519 0.000000 594 +held 0 28 3.610918 0.000000 600 +subject 0 26 3.688879 0.000000 647 +enjoi 0 26 3.688879 0.000000 660 +pattern 1 24 3.761200 3.761200 689 +fellow 0 24 3.761200 0.000000 701 +recognit 1 23 3.806662 3.806662 723 +serv 2 22 3.850148 7.700296 758 +theunivers 0 21 3.912023 0.000000 797 +corpor 0 21 3.912023 0.000000 802 +particularli 0 19 4.007333 0.000000 867 +lisp 1 18 4.060443 4.060443 897 +element 0 18 4.060443 0.000000 895 +steven 1 17 4.110874 4.110874 953 +cambridg 0 16 4.174387 0.000000 1008 +princeton 0 15 4.248495 0.000000 1042 +atth 0 15 4.248495 0.000000 1019 +massachusett 0 14 4.317488 0.000000 1118 +whose 0 13 4.382027 0.000000 1166 +franc 1 12 4.465908 4.465908 1276 +outsid 0 12 4.465908 0.000000 1219 +motiv 0 11 4.553877 0.000000 1346 +council 0 11 4.553877 0.000000 1364 +tanimoto 1 10 4.653960 4.653960 1429 +conferenceon 1 9 4.753590 4.753590 1595 +entitl 0 9 4.753590 0.000000 1490 +vice 0 9 4.753590 0.000000 1604 +japan 0 8 4.875197 0.000000 1762 +elect 0 8 4.875197 0.000000 1771 +sweden 1 7 5.010635 5.010635 1885 +chief 0 7 5.010635 0.000000 1829 +pari 0 6 5.164786 0.000000 2158 +scholar 0 6 5.164786 0.000000 2180 +sponsor 0 6 5.164786 0.000000 2133 +piano 0 6 5.164786 0.000000 2201 +anda 1 5 5.347108 5.347108 2416 +ofparallel 0 5 5.347108 0.000000 2380 +steer 0 5 5.347108 0.000000 2328 +jazz 0 5 5.347108 0.000000 2527 +devot 0 4 5.568345 0.000000 2711 +coauthor 0 4 5.568345 0.000000 3064 +electricalengin 1 3 5.857933 5.857933 3987 +chairman 0 3 5.857933 0.000000 3991 +adjunct 1 2 6.263398 6.263398 6033 +theinstitut 0 2 6.263398 0.000000 6039 +internationalworkshop 0 2 6.263398 0.000000 5012 +bergen 0 2 6.263398 0.000000 5991 +norwai 0 2 6.263398 0.000000 4908 +programcommitte 0 2 6.263398 0.000000 6042 +theieee 0 2 6.263398 0.000000 6043 +ieeetransact 0 2 6.263398 0.000000 4315 +andclass 0 2 6.263398 0.000000 4330 +linkp 1 1 6.957497 6.957497 16876 +fromharvard 0 1 6.957497 0.000000 16877 +connecticut 0 1 6.957497 0.000000 16878 +professorat 0 1 6.957497 0.000000 16879 +hasalso 0 1 6.957497 0.000000 16880 +atkob 0 1 6.957497 0.000000 16881 +enseign 0 1 6.957497 0.000000 16882 +superieur 0 1 6.957497 0.000000 16883 +techniquesd 0 1 6.957497 0.000000 16884 +electroniqu 0 1 6.957497 0.000000 16885 +irest 0 1 6.957497 0.000000 16886 +nant 0 1 6.957497 0.000000 16887 +hasrec 0 1 6.957497 0.000000 16888 +forimag 0 1 6.957497 0.000000 16889 +processingand 0 1 6.957497 0.000000 16890 +bordeaux 0 1 6.957497 0.000000 16891 +ofimag 0 1 6.957497 0.000000 16892 +currentlydirect 0 1 6.957497 0.000000 16893 +throughimag 0 1 6.957497 0.000000 16894 +softwarethat 0 1 6.957497 0.000000 16895 +thebook 0 1 6.957497 0.000000 16896 +introductionus 0 1 6.957497 0.000000 16897 +accompanyingsoftwar 0 1 6.957497 0.000000 16898 +serveda 0 1 6.957497 0.000000 16899 +subconfer 0 1 6.957497 0.000000 16900 +patternrecognit 0 1 6.957497 0.000000 16901 +societyworkshop 0 1 6.957497 0.000000 16902 +machineintellig 0 1 6.957497 0.000000 16903 +symposiaon 0 1 6.957497 0.000000 16904 +editorialboard 0 1 6.957497 0.000000 16905 +cvgip 0 1 6.957497 0.000000 16906 +engineeringeduc 0 1 6.957497 0.000000 16907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..f7e687ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +also 1 259 1.386294 1.386294 28 +washington 0 236 1.386294 0.000000 32 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +cornel 0 215 1.386294 0.000000 23 +paper 0 205 1.609438 0.000000 38 +algorithm 1 162 1.791759 1.791759 57 +read 0 154 1.791759 0.000000 47 +professor 1 137 1.945910 1.945910 76 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +confer 0 126 2.079442 0.000000 100 +theori 1 111 2.197225 2.197225 127 +mathemat 0 108 2.197225 0.000000 123 +structur 0 106 2.197225 0.000000 105 +associ 1 93 2.397895 2.397895 151 +question 0 91 2.397895 0.000000 141 +journal 1 83 2.484907 2.484907 183 +member 0 84 2.484907 0.000000 165 +ieee 0 86 2.484907 0.000000 190 +control 0 82 2.484907 0.000000 164 +state 0 76 2.564949 0.000000 207 +issu 0 78 2.564949 0.000000 211 +complet 0 77 2.564949 0.000000 208 +nation 1 74 2.639057 2.639057 240 +receiv 0 66 2.708050 0.000000 244 +foundat 1 62 2.772589 2.772589 286 +complex 1 64 2.772589 2.772589 269 +visit 0 63 2.772589 0.000000 288 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +special 1 56 2.890372 2.890372 320 +direct 0 57 2.890372 0.000000 316 +sever 0 56 2.890372 0.000000 322 +three 0 54 2.944439 0.000000 330 +california 0 46 3.091042 0.000000 388 +textbook 0 44 3.135494 0.000000 397 +theoret 1 39 3.258097 3.258097 446 +societi 1 40 3.258097 3.258097 456 +author 0 39 3.258097 0.000000 450 +paul 0 38 3.295837 0.000000 471 +industri 0 38 3.295837 0.000000 464 +connect 0 37 3.332205 0.000000 485 +ofth 0 36 3.367296 0.000000 491 +committe 1 34 3.401197 3.401197 522 +board 1 33 3.433987 3.433987 528 +taught 0 33 3.433987 0.000000 526 +dissert 0 32 3.465736 0.000000 549 +titl 0 31 3.496508 0.000000 556 +posit 0 31 3.496508 0.000000 552 +chair 0 29 3.583519 0.000000 596 +berkelei 1 26 3.688879 3.688879 657 +fellow 0 24 3.761200 0.000000 701 +doctor 0 24 3.761200 0.000000 709 +serv 2 22 3.850148 7.700296 758 +emphasi 0 22 3.850148 0.000000 755 +unit 0 21 3.912023 0.000000 779 +theunivers 0 21 3.912023 0.000000 797 +divis 0 21 3.912023 0.000000 803 +facil 0 20 3.951244 0.000000 814 +histori 0 19 4.007333 0.000000 853 +young 1 16 4.174387 4.174387 991 +earli 0 16 4.174387 0.000000 968 +becam 0 14 4.317488 0.000000 1117 +dean 0 14 4.317488 0.000000 1104 +employ 0 12 4.465908 0.000000 1291 +eight 0 11 4.553877 0.000000 1331 +ofcomput 0 10 4.653960 0.000000 1442 +editori 1 9 4.753590 4.753590 1611 +vice 0 9 4.753590 0.000000 1604 +hold 0 8 4.875197 0.000000 1645 +foc 0 7 5.010635 0.000000 1880 +reed 0 6 5.164786 0.000000 2086 +symposiumon 0 6 5.164786 0.000000 2054 +sigact 0 6 5.164786 0.000000 2212 +chosen 0 6 5.164786 0.000000 1984 +twice 0 4 5.568345 0.000000 2614 +coauthor 0 4 5.568345 0.000000 3064 +notr 0 4 5.568345 0.000000 2880 +dame 0 4 5.568345 0.000000 2881 +gone 0 4 5.568345 0.000000 3072 +chairman 1 3 5.857933 5.857933 3991 +atstanford 0 3 5.857933 0.000000 3935 +briefli 0 3 5.857933 0.000000 3459 +thegener 0 3 5.857933 0.000000 3648 +mathematicallog 0 3 5.857933 0.000000 3796 +eleven 0 3 5.857933 0.000000 3824 +postdoctor 0 2 6.263398 0.000000 5059 +mexico 0 2 6.263398 0.000000 6044 +nomin 0 2 6.263398 0.000000 5758 +programcommitte 0 2 6.263398 0.000000 6042 +annal 0 2 6.263398 0.000000 4912 +underprofessor 0 2 6.263398 0.000000 6045 +ratherthan 0 2 6.263398 0.000000 6046 +graduateof 0 1 6.957497 0.000000 16908 +antioch 0 1 6.957497 0.000000 16909 +hejoin 0 1 6.957497 0.000000 16910 +seventeen 0 1 6.957497 0.000000 16911 +atpurdu 0 1 6.957497 0.000000 16912 +inperhap 0 1 6.957497 0.000000 16913 +aschairman 0 1 6.957497 0.000000 16914 +professorin 0 1 6.957497 0.000000 16915 +iscoauthor 0 1 6.957497 0.000000 16916 +executivecommitte 0 1 6.957497 0.000000 16917 +interestgroup 0 1 6.957497 0.000000 16918 +chairmanof 0 1 6.957497 0.000000 16919 +annualsymposium 0 1 6.957497 0.000000 16920 +hasserv 0 1 6.957497 0.000000 16921 +stechnic 0 1 6.957497 0.000000 16922 +advisorysubcommitte 0 1 6.957497 0.000000 16923 +thiscommitte 0 1 6.957497 0.000000 16924 +formallog 0 1 6.957497 0.000000 16925 +dopostdoctor 0 1 6.957497 0.000000 16926 +ofcalifornia 0 1 6.957497 0.000000 16927 +avarieti 0 1 6.957497 0.000000 16928 +leather 0 1 6.957497 0.000000 16929 +motorcycl 0 1 6.957497 0.000000 16930 +jacket 0 1 6.957497 0.000000 16931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..a5cf0a09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +engin 0 297 1.098612 0.000000 20 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +includ 0 208 1.609438 0.000000 42 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +construct 0 139 1.945910 0.000000 82 +schedul 1 119 2.079442 2.079442 85 +provid 0 121 2.079442 0.000000 94 +topic 0 114 2.197225 0.000000 110 +code 0 108 2.197225 0.000000 116 +techniqu 0 99 2.302585 0.000000 138 +real 0 93 2.397895 0.000000 144 +chang 0 82 2.484907 0.000000 163 +activ 0 84 2.484907 0.000000 182 +ieee 0 86 2.484907 0.000000 190 +interfac 0 79 2.564949 0.000000 209 +involv 0 71 2.639057 0.000000 227 +receiv 1 66 2.708050 2.708050 244 +polici 1 64 2.772589 2.772589 279 +written 0 63 2.772589 0.000000 278 +allow 0 53 2.944439 0.000000 333 +video 0 44 3.135494 0.000000 405 +continu 0 39 3.258097 0.000000 448 +survei 0 35 3.401197 0.000000 513 +john 0 33 3.433987 0.000000 532 +board 0 33 3.433987 0.000000 528 +titl 0 31 3.496508 0.000000 556 +focu 0 30 3.555348 0.000000 571 +platform 0 29 3.583519 0.000000 591 +load 0 28 3.610918 0.000000 601 +intend 0 28 3.610918 0.000000 599 +primari 0 25 3.737670 0.000000 669 +mobil 0 23 3.806662 0.000000 730 +sequenti 0 22 3.850148 0.000000 745 +runtim 1 19 4.007333 4.007333 858 +young 0 16 4.174387 0.000000 991 +audio 0 14 4.317488 0.000000 1094 +easili 0 14 4.317488 0.000000 1077 +editori 0 9 4.753590 0.000000 1611 +presidenti 0 8 4.875197 0.000000 1737 +supportfor 0 7 5.010635 0.000000 1854 +thegoal 0 6 5.164786 0.000000 2033 +zahorjan 1 3 5.857933 5.857933 3383 +formobil 0 3 5.857933 0.000000 3261 +parallelsystem 0 2 6.263398 0.000000 5746 +exhibit 0 2 6.263398 0.000000 5529 +frombrown 0 1 6.957497 0.000000 16932 +oftoronto 0 1 6.957497 0.000000 16933 +investigatoraward 0 1 6.957497 0.000000 16934 +mediaappl 0 1 6.957497 0.000000 16935 +torespond 0 1 6.957497 0.000000 16936 +parallelizationof 0 1 6.957497 0.000000 16937 +bothcontrol 0 1 6.957497 0.000000 16938 +transactionson 0 1 6.957497 0.000000 16939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..7940ef91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +project 2 340 1.098612 2.197224 18 +student 2 343 1.098612 2.197224 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +list 1 201 1.609438 1.609438 39 +group 0 183 1.609438 0.000000 36 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +base 0 165 1.791759 0.000000 50 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +like 0 132 1.945910 0.000000 81 +support 0 132 1.945910 0.000000 83 +number 1 130 2.079442 2.079442 97 +studi 0 120 2.079442 0.000000 91 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +mathemat 1 108 2.197225 2.197225 123 +pleas 1 113 2.197225 2.197225 114 +person 0 111 2.197225 0.000000 117 +version 0 113 2.197225 0.000000 122 +teach 0 108 2.197225 0.000000 112 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +user 0 104 2.302585 0.000000 137 +technic 0 100 2.302585 0.000000 140 +imag 2 91 2.397895 4.795790 161 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +educ 1 86 2.484907 2.484907 191 +help 0 83 2.484907 0.000000 175 +environ 0 84 2.484907 0.000000 177 +thing 0 84 2.484907 0.000000 189 +exampl 0 77 2.564949 0.000000 195 +know 0 80 2.564949 0.000000 198 +free 1 73 2.639057 2.639057 224 +materi 1 75 2.639057 2.639057 221 +meet 0 72 2.639057 0.000000 229 +involv 0 71 2.639057 0.000000 227 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +tuesdai 0 73 2.639057 0.000000 219 +order 1 69 2.708050 2.708050 249 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +window 0 68 2.708050 0.000000 242 +test 0 66 2.708050 0.000000 252 +integr 0 67 2.708050 0.000000 245 +would 0 67 2.708050 0.000000 251 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +collect 0 65 2.772589 0.000000 268 +creat 0 63 2.772589 0.000000 277 +foundat 0 62 2.772589 0.000000 286 +plai 0 60 2.833213 0.000000 307 +variou 0 56 2.890372 0.000000 317 +explor 0 58 2.890372 0.000000 324 +direct 0 57 2.890372 0.000000 316 +allow 1 53 2.944439 2.944439 333 +februari 0 54 2.944439 0.000000 328 +digit 1 52 2.995732 2.995732 348 +particular 0 51 2.995732 0.000000 352 +approach 0 48 3.044522 0.000000 366 +visual 0 48 3.044522 0.000000 372 +done 0 47 3.091042 0.000000 381 +effect 0 46 3.091042 0.000000 385 +discuss 1 45 3.135494 3.135494 399 +keep 0 44 3.135494 0.000000 409 +describ 0 45 3.135494 0.000000 400 +offer 0 43 3.178054 0.000000 414 +programm 0 39 3.258097 0.000000 445 +littl 0 39 3.258097 0.000000 454 +open 0 38 3.295837 0.000000 469 +microsoft 0 38 3.295837 0.000000 468 +close 0 38 3.295837 0.000000 465 +seminar 0 38 3.295837 0.000000 470 +winter 0 36 3.367296 0.000000 500 +curriculum 0 33 3.433987 0.000000 535 +transform 1 32 3.465736 3.465736 542 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +hard 0 30 3.555348 0.000000 563 +common 0 30 3.555348 0.000000 574 +particip 0 29 3.583519 0.000000 589 +intend 0 28 3.610918 0.000000 599 +framework 0 28 3.610918 0.000000 606 +manipul 0 27 3.637586 0.000000 624 +rather 0 26 3.688879 0.000000 642 +experiment 0 26 3.688879 0.000000 645 +todai 1 25 3.737670 3.737670 672 +seri 0 24 3.761200 0.000000 708 +demonstr 0 24 3.761200 0.000000 694 +togeth 0 23 3.806662 0.000000 714 +director 0 22 3.850148 0.000000 767 +disk 0 22 3.850148 0.000000 747 +instal 0 22 3.850148 0.000000 754 +encourag 1 18 4.060443 4.060443 880 +record 0 18 4.060443 0.000000 890 +lisp 0 18 4.060443 0.000000 897 +steven 0 17 4.110874 0.000000 953 +macintosh 0 17 4.110874 0.000000 920 +choic 0 16 4.174387 0.000000 979 +took 0 16 4.174387 0.000000 1010 +role 0 14 4.317488 0.000000 1101 +primarili 0 13 4.382027 0.000000 1185 +forth 0 13 4.382027 0.000000 1186 +essenti 0 13 4.382027 0.000000 1137 +calcul 1 12 4.465908 4.465908 1268 +neat 0 12 4.465908 0.000000 1263 +appl 0 11 4.553877 0.000000 1303 +tanimoto 1 10 4.653960 4.653960 1429 +subset 0 10 4.653960 0.000000 1425 +thecomput 0 10 4.653960 0.000000 1408 +end 0 9 4.753590 0.000000 1567 +successfulli 0 7 5.010635 0.000000 1869 +classroom 0 6 5.164786 0.000000 2006 +pentium 0 6 5.164786 0.000000 2077 +put 0 6 5.164786 0.000000 2017 +volunt 0 5 5.347108 0.000000 2307 +own 0 5 5.347108 0.000000 2531 +pixel 1 4 5.568345 5.568345 2831 +emphas 0 4 5.568345 0.000000 2672 +exploratori 0 4 5.568345 0.000000 3073 +prospect 0 4 5.568345 0.000000 3013 +witha 0 4 5.568345 0.000000 2617 +bricker 0 4 5.568345 0.000000 3050 +metip 1 3 5.857933 5.857933 3937 +teacher 0 3 5.857933 0.000000 3892 +alsoavail 0 3 5.857933 0.000000 3887 +newapproach 0 2 6.263398 0.000000 6047 +pursuit 0 2 6.263398 0.000000 6048 +portrai 0 2 6.263398 0.000000 5386 +xform 1 1 6.957497 6.957497 16940 +theseobject 0 1 6.957497 0.000000 16941 +applicationsdesign 0 1 6.957497 0.000000 16942 +enrich 0 1 6.957497 0.000000 16943 +astandard 0 1 6.957497 0.000000 16944 +withthes 0 1 6.957497 0.000000 16945 +catalyz 0 1 6.957497 0.000000 16946 +bylead 0 1 6.957497 0.000000 16947 +theconcept 0 1 6.957497 0.000000 16948 +toexplor 0 1 6.957497 0.000000 16949 +warper 0 1 6.957497 0.000000 16950 +srun 0 1 6.957497 0.000000 16951 +mathematicsteach 0 1 6.957497 0.000000 16952 +transcriptproject 0 1 6.957497 0.000000 16953 +willfacilit 0 1 6.957497 0.000000 16954 +academicinform 0 1 6.957497 0.000000 16955 +floppi 0 1 6.957497 0.000000 16956 +themetip 0 1 6.957497 0.000000 16957 +ofmultiplay 0 1 6.957497 0.000000 16958 +tointegr 0 1 6.957497 0.000000 16959 +itsxform 0 1 6.957497 0.000000 16960 +somethingfun 0 1 6.957497 0.000000 16961 +beenput 0 1 6.957497 0.000000 16962 +fundamentalattract 0 1 6.957497 0.000000 16963 +digitalimag 0 1 6.957497 0.000000 16964 +discussteach 0 1 6.957497 0.000000 16965 +undergr 0 1 6.957497 0.000000 16966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..2df41771 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +updat 0 191 1.609438 0.000000 41 +applic 0 170 1.791759 0.000000 56 +manag 0 114 2.197225 0.000000 125 +environ 0 84 2.484907 0.000000 177 +resourc 0 81 2.484907 0.000000 172 +prof 0 64 2.772589 0.000000 273 +overview 0 56 2.890372 0.000000 323 +describ 0 45 3.135494 0.000000 400 +transact 0 39 3.258097 0.000000 438 +brian 0 38 3.295837 0.000000 466 +field 0 37 3.332205 0.000000 482 +survei 0 35 3.401197 0.000000 513 +john 0 33 3.433987 0.000000 532 +graph 0 30 3.555348 0.000000 576 +challeng 0 26 3.688879 0.000000 653 +task 0 25 3.737670 0.000000 678 +fundament 0 25 3.737670 0.000000 661 +mobil 1 23 3.806662 3.806662 730 +variabl 0 23 3.806662 0.000000 715 +methodolog 0 23 3.806662 0.000000 733 +infrastructur 1 12 4.465908 4.465908 1234 +hank 0 12 4.465908 0.000000 1253 +gaetano 0 6 5.164786 0.000000 2068 +wireless 0 4 5.568345 0.000000 2693 +disconnect 0 4 5.568345 0.000000 2664 +mobisa 0 3 5.857933 0.000000 3927 +mobilecomput 0 3 5.857933 0.000000 3629 +zahorjan 0 3 5.857933 0.000000 3383 +ubiquit 0 2 6.263398 0.000000 6049 +computingresearch 0 2 6.263398 0.000000 5957 +cope 0 2 6.263398 0.000000 6050 +forman 0 2 6.263398 0.000000 5904 +washingtonher 0 1 6.957497 0.000000 16967 +handheld 0 1 6.957497 0.000000 16968 +operationdistribut 0 1 6.957497 0.000000 16969 +systemcontact 0 1 6.957497 0.000000 16970 +bershadprof 0 1 6.957497 0.000000 16971 +borriellomarc 0 1 6.957497 0.000000 16972 +fiuczynskigeorg 0 1 6.957497 0.000000 16973 +formanprof 0 1 6.957497 0.000000 16974 +levygeoff 0 1 6.957497 0.000000 16975 +voelkerterri 0 1 6.957497 0.000000 16976 +watsonprof 0 1 6.957497 0.000000 16977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..3ca2267b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +model 0 145 1.945910 0.000000 69 +assign 0 135 1.945910 0.000000 66 +relat 0 139 1.945910 0.000000 68 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +compil 0 122 2.079442 0.000000 96 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +postscript 0 131 2.079442 0.000000 90 +intern 1 108 2.197225 2.197225 128 +structur 0 106 2.197225 0.000000 105 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +version 0 113 2.197225 0.000000 122 +peopl 1 96 2.302585 2.302585 132 +technic 0 100 2.302585 0.000000 140 +level 1 87 2.484907 2.484907 180 +member 1 84 2.484907 2.484907 165 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +messag 1 76 2.564949 2.564949 212 +issu 0 78 2.564949 0.000000 211 +method 0 80 2.564949 0.000000 213 +dynam 0 76 2.564949 0.000000 194 +sourc 0 77 2.564949 0.000000 201 +java 0 70 2.708050 0.000000 248 +receiv 0 66 2.708050 0.000000 244 +goal 0 66 2.708050 0.000000 250 +august 0 66 2.708050 0.000000 257 +written 1 63 2.772589 2.772589 278 +guid 0 63 2.772589 0.000000 267 +type 1 61 2.833213 2.833213 296 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +overview 1 56 2.890372 2.890372 323 +special 0 56 2.890372 0.000000 320 +detail 0 57 2.890372 0.000000 321 +direct 0 57 2.890372 0.000000 316 +sampl 1 53 2.944439 2.944439 339 +extens 0 53 2.944439 0.000000 340 +run 0 51 2.995732 0.000000 347 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +pointer 0 48 3.044522 0.000000 368 +describ 0 45 3.135494 0.000000 400 +mechan 0 43 3.178054 0.000000 416 +past 0 42 3.218876 0.000000 428 +form 0 39 3.258097 0.000000 443 +prototyp 0 38 3.295837 0.000000 463 +procedur 0 36 3.367296 0.000000 488 +either 0 35 3.401197 0.000000 506 +obtain 0 33 3.433987 0.000000 534 +profil 1 30 3.555348 3.555348 581 +releas 1 28 3.610918 3.610918 616 +intend 0 28 3.610918 0.000000 599 +static 1 27 3.637586 3.637586 619 +request 0 26 3.688879 0.000000 635 +initi 0 23 3.806662 0.000000 717 +emphasi 0 22 3.850148 0.000000 755 +finish 0 22 3.850148 0.000000 748 +flexibl 0 21 3.912023 0.000000 792 +qualiti 0 20 3.951244 0.000000 832 +entir 0 20 3.951244 0.000000 811 +predict 0 19 4.007333 0.000000 855 +hybrid 0 15 4.248495 0.000000 1057 +conduct 0 14 4.317488 0.000000 1065 +split 0 14 4.317488 0.000000 1078 +bodi 0 13 4.382027 0.000000 1178 +infrastructur 0 12 4.465908 0.000000 1234 +target 0 12 4.465908 0.000000 1282 +solari 0 12 4.465908 0.000000 1238 +modul 0 10 4.653960 0.000000 1434 +cecil 2 9 4.753590 9.507180 1547 +elimin 1 9 4.753590 4.753590 1558 +subscrib 1 9 4.753590 4.753590 1541 +modula 0 9 4.753590 0.000000 1613 +analys 1 8 4.875197 4.875197 1666 +pure 0 8 4.875197 0.000000 1776 +closur 0 8 4.875197 0.000000 1643 +parti 0 8 4.875197 0.000000 1676 +dead 0 7 5.010635 0.000000 1840 +sparc 0 7 5.010635 0.000000 1860 +freeli 0 6 5.164786 0.000000 2014 +beta 0 6 5.164786 0.000000 1993 +vortex 1 5 5.347108 5.347108 2362 +ofinterest 0 5 5.347108 0.000000 2323 +suno 1 4 5.568345 5.568345 2790 +inlin 0 4 5.568345 0.000000 2964 +tosupport 0 3 5.857933 0.000000 3613 +inherit 0 3 5.857933 0.000000 3122 +forobject 0 3 5.857933 0.000000 3965 +uwcs 0 3 5.857933 0.000000 3977 +intraprocedur 1 2 6.263398 6.263398 5934 +orientedlanguag 0 2 6.263398 0.000000 4079 +acollect 0 2 6.263398 0.000000 5039 +vortexcompil 0 2 6.263398 0.000000 5932 +projectuw 0 1 6.957497 0.000000 16978 +projectwelcom 0 1 6.957497 0.000000 16979 +rapidconstruct 0 1 6.957497 0.000000 16980 +incorporatesmulti 0 1 6.957497 0.000000 16981 +basedencapsul 0 1 6.957497 0.000000 16982 +allowsstat 0 1 6.957497 0.000000 16983 +pureobject 0 1 6.957497 0.000000 16984 +incorporateshigh 0 1 6.957497 0.000000 16985 +hierachyanalysi 0 1 6.957497 0.000000 16986 +guidedselect 0 1 6.957497 0.000000 16987 +commonsubexpress 0 1 6.957497 0.000000 16988 +currentlyavail 0 1 6.957497 0.000000 16989 +thebeta 0 1 6.957497 0.000000 16990 +projectslast 0 1 6.957497 0.000000 16991 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..0894f6a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +washington 1 236 1.386294 1.386294 32 +softwar 0 220 1.386294 0.000000 30 +public 0 202 1.609438 0.000000 43 +data 0 170 1.791759 0.000000 49 +seattl 0 120 2.079442 0.000000 103 +peopl 0 96 2.302585 0.000000 132 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +comment 0 93 2.397895 0.000000 146 +laboratori 0 63 2.772589 0.000000 292 +polici 0 64 2.772589 0.000000 279 +juli 0 60 2.833213 0.000000 305 +local 0 55 2.944439 0.000000 334 +cool 0 49 3.044522 0.000000 374 +disk 0 22 3.850148 0.000000 747 +theunivers 0 21 3.912023 0.000000 797 +usag 0 6 5.164786 0.000000 2209 +these 0 5 5.347108 0.000000 2482 +grail 1 3 5.857933 5.857933 3356 +neighborhood 0 3 5.857933 0.000000 3242 +laboratorywelcom 0 2 6.263398 0.000000 5439 +mtwong 0 1 6.957497 0.000000 16992 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..1fab5569 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +implement 0 152 1.791759 0.000000 52 +parallel 0 169 1.791759 0.000000 60 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +technic 1 100 2.302585 2.302585 140 +peopl 0 96 2.302585 0.000000 132 +present 1 91 2.397895 2.397895 145 +commun 0 95 2.397895 0.000000 157 +proceed 0 93 2.397895 0.000000 152 +build 0 85 2.484907 0.000000 184 +come 0 78 2.564949 0.000000 202 +workshop 0 71 2.639057 0.000000 239 +simul 1 66 2.708050 2.708050 255 +test 0 66 2.708050 0.000000 252 +result 1 65 2.772589 2.772589 281 +hardwar 0 51 2.995732 0.000000 350 +friend 0 48 3.044522 0.000000 376 +standard 0 48 3.044522 0.000000 365 +better 0 45 3.135494 0.000000 401 +discuss 0 45 3.135494 0.000000 399 +describ 0 45 3.135494 0.000000 400 +power 0 30 3.555348 0.000000 573 +abl 0 30 3.555348 0.000000 566 +built 0 29 3.583519 0.000000 592 +held 0 28 3.610918 0.000000 600 +mine 0 26 3.688879 0.000000 654 +sort 0 22 3.850148 0.000000 738 +rout 2 21 3.912023 7.824046 793 +chip 1 21 3.912023 3.912023 770 +nice 0 20 3.951244 0.000000 809 +repositori 0 17 4.110874 0.000000 932 +interconnect 0 17 4.110874 0.000000 937 +web 0 12 4.465908 0.000000 1249 +chao 1 8 4.875197 4.875197 1753 +router 1 8 4.875197 4.875197 1772 +dylan 0 8 4.875197 0.000000 1625 +univeristi 0 8 4.875197 0.000000 1754 +guidelin 0 7 5.010635 0.000000 1832 +chaotic 1 5 5.347108 5.347108 2566 +chaoticrout 0 4 5.568345 0.000000 3063 +micron 1 3 5.857933 5.857933 3341 +cmo 0 3 5.857933 0.000000 3992 +redesign 0 3 5.857933 0.000000 3540 +thathav 0 3 5.857933 0.000000 3735 +papersand 0 2 6.263398 0.000000 4867 +pcrcw 1 1 6.957497 6.957497 16993 +peopleal 0 1 6.957497 0.000000 16994 +allsort 0 1 6.957497 0.000000 16995 +graphicalfront 0 1 6.957497 0.000000 16996 +presentationof 0 1 6.957497 0.000000 16997 +upwith 0 1 6.957497 0.000000 16998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..4b80274c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +washington 0 236 1.386294 0.000000 32 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +develop 0 174 1.791759 0.000000 53 +avail 0 169 1.791759 0.000000 48 +recent 0 167 1.791759 0.000000 58 +architectur 1 139 1.945910 1.945910 77 +perform 0 143 1.945910 0.000000 74 +first 0 140 1.945910 0.000000 71 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +schedul 0 119 2.079442 0.000000 85 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +topic 0 114 2.197225 0.000000 110 +user 0 104 2.302585 0.000000 137 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +need 0 98 2.302585 0.000000 135 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +real 1 93 2.397895 2.397895 144 +follow 0 92 2.397895 0.000000 143 +level 1 87 2.484907 2.484907 180 +activ 1 84 2.484907 2.484907 182 +larg 0 82 2.484907 0.000000 168 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +build 0 85 2.484907 0.000000 184 +school 0 84 2.484907 0.000000 188 +june 1 79 2.564949 2.564949 214 +complet 0 77 2.564949 0.000000 208 +issu 0 78 2.564949 0.000000 211 +want 0 79 2.564949 0.000000 199 +interfac 0 79 2.564949 0.000000 209 +nation 1 74 2.639057 2.639057 240 +name 0 72 2.639057 0.000000 220 +effici 0 73 2.639057 0.000000 233 +meet 0 72 2.639057 0.000000 229 +simul 1 66 2.708050 2.708050 255 +main 0 67 2.708050 0.000000 256 +evalu 0 64 2.772589 0.000000 266 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +improv 0 62 2.772589 0.000000 289 +foundat 0 62 2.772589 0.000000 286 +automat 0 61 2.833213 0.000000 306 +detail 1 57 2.890372 2.890372 321 +summer 0 56 2.890372 0.000000 311 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +maintain 0 51 2.995732 0.000000 342 +even 0 45 3.135494 0.000000 393 +autom 0 41 3.218876 0.000000 434 +map 1 39 3.258097 3.258097 452 +connect 1 37 3.332205 3.332205 485 +singl 0 34 3.401197 0.000000 510 +concurr 0 34 3.401197 0.000000 501 +compon 1 30 3.555348 3.555348 570 +robert 0 30 3.555348 0.000000 567 +becom 0 28 3.610918 0.000000 603 +american 0 27 3.637586 0.000000 634 +rather 1 26 3.688879 3.688879 642 +constraint 1 26 3.688879 3.688879 636 +enabl 1 26 3.688879 3.688879 655 +accur 0 25 3.737670 0.000000 680 +sometim 0 24 3.761200 0.000000 696 +demonstr 0 24 3.761200 0.000000 694 +input 0 23 3.806662 0.000000 727 +togeth 0 23 3.806662 0.000000 714 +instead 0 22 3.850148 0.000000 756 +try 0 22 3.850148 0.000000 764 +sequenti 0 22 3.850148 0.000000 745 +util 0 21 3.912023 0.000000 774 +output 0 21 3.912023 0.000000 788 +synthesi 1 20 3.951244 3.951244 834 +kernel 0 20 3.951244 0.000000 825 +wind 1 18 4.060443 4.060443 908 +behavior 0 18 4.060443 0.000000 881 +partit 1 16 4.174387 4.174387 984 +earli 0 16 4.174387 0.000000 968 +advantag 0 16 4.174387 0.000000 987 +diego 0 16 4.174387 0.000000 992 +devic 0 16 4.174387 0.000000 1002 +universityof 0 15 4.248495 0.000000 1061 +embed 1 14 4.317488 4.317488 1102 +believ 0 13 4.382027 0.000000 1187 +incorpor 0 13 4.382027 0.000000 1163 +target 0 12 4.465908 0.000000 1282 +grant 0 12 4.465908 0.000000 1216 +fill 0 11 4.553877 0.000000 1349 +cycl 0 11 4.553877 0.000000 1335 +fix 0 11 4.553877 0.000000 1327 +itali 0 11 4.553877 0.000000 1378 +fellowship 1 10 4.653960 4.653960 1460 +mountain 1 10 4.653960 4.653960 1456 +forc 0 10 4.653960 0.000000 1384 +reli 0 10 4.653960 0.000000 1411 +pacif 0 8 4.875197 0.000000 1674 +character 0 8 4.875197 0.000000 1767 +driver 0 8 4.875197 0.000000 1657 +maxim 0 7 5.010635 0.000000 1944 +chinook 2 6 5.164786 10.329572 2229 +averag 0 6 5.164786 0.000000 2098 +contract 0 6 5.164786 0.000000 1985 +blow 0 5 5.347108 0.000000 2407 +east 0 5 5.347108 0.000000 2472 +synthes 0 5 5.347108 0.000000 2451 +ross 0 5 5.347108 0.000000 2243 +ortega 0 5 5.347108 0.000000 2559 +rocki 1 4 5.568345 5.568345 3048 +chou 1 4 5.568345 5.568345 3033 +ti 0 4 5.568345 0.000000 3005 +shelf 0 4 5.568345 0.000000 2621 +harri 0 4 5.568345 0.000000 3034 +warm 1 3 5.857933 5.857933 3904 +retarget 1 3 5.857933 5.857933 3994 +domin 0 3 5.857933 0.000000 3995 +moredetail 0 3 5.857933 0.000000 3854 +shortli 0 3 5.857933 0.000000 3375 +nato 0 3 5.857933 0.000000 3587 +salmon 1 2 6.263398 6.263398 4802 +rare 0 2 6.263398 0.000000 4184 +toolfor 0 2 6.263398 0.000000 6031 +neededto 0 2 6.263398 0.000000 5379 +ratherthan 0 2 6.263398 0.000000 6046 +differentarchitectur 0 2 6.263398 0.000000 6051 +verilog 0 2 6.263398 0.000000 4441 +softwareprogram 0 2 6.263398 0.000000 4889 +moreeffici 0 2 6.263398 0.000000 4209 +macduff 0 2 6.263398 0.000000 5923 +hauck 0 2 6.263398 0.000000 5920 +shinook 0 1 6.957497 0.000000 16999 +oncorhynchu 0 1 6.957497 0.000000 17000 +tshawytscha 0 1 6.957497 0.000000 17001 +amer 0 1 6.957497 0.000000 17002 +tribe 0 1 6.957497 0.000000 17003 +southerli 0 1 6.957497 0.000000 17004 +sled 0 1 6.957497 0.000000 17005 +doga 0 1 6.957497 0.000000 17006 +cadtool 0 1 6.957497 0.000000 17007 +reactivesystem 0 1 6.957497 0.000000 17008 +descriptionto 0 1 6.957497 0.000000 17009 +designdecis 0 1 6.957497 0.000000 17010 +reiterateaft 0 1 6.957497 0.000000 17011 +willnot 0 1 6.957497 0.000000 17012 +designerto 0 1 6.957497 0.000000 17013 +legacycod 0 1 6.957497 0.000000 17014 +currentlyw 0 1 6.957497 0.000000 17015 +interprocessorcommun 0 1 6.957497 0.000000 17016 +assumesmanu 0 1 6.957497 0.000000 17017 +intricateand 0 1 6.957497 0.000000 17018 +asicarchitectur 0 1 6.957497 0.000000 17019 +onoff 0 1 6.957497 0.000000 17020 +discourag 0 1 6.957497 0.000000 17021 +innovemb 0 1 6.957497 0.000000 17022 +shownat 0 1 6.957497 0.000000 17023 +mainfeatur 0 1 6.957497 0.000000 17024 +peripheraldevic 0 1 6.957497 0.000000 17025 +andsynthes 0 1 6.957497 0.000000 17026 +hardwarenetlist 0 1 6.957497 0.000000 17027 +interfacingproblem 0 1 6.957497 0.000000 17028 +timingconstraint 0 1 6.957497 0.000000 17029 +swcodedesign 0 1 6.957497 0.000000 17030 +tremezzo 0 1 6.957497 0.000000 17031 +severalmor 0 1 6.957497 0.000000 17032 +chinookersfacultygaetano 0 1 6.957497 0.000000 17033 +borriellogradu 0 1 6.957497 0.000000 17034 +ortegaken 0 1 6.957497 0.000000 17035 +hinesian 0 1 6.957497 0.000000 17036 +selizabeth 0 1 6.957497 0.000000 17037 +walkupscott 0 1 6.957497 0.000000 17038 +henrik 0 1 6.957497 0.000000 17039 +hulgaardstafflarri 0 1 6.957497 0.000000 17040 +mcmurchielist 0 1 6.957497 0.000000 17041 +paperschinook 0 1 6.957497 0.000000 17042 +sponsorsarpa 0 1 6.957497 0.000000 17043 +walkup 0 1 6.957497 0.000000 17044 +patricia 0 1 6.957497 0.000000 17045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..b503058e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +develop 1 174 1.791759 1.791759 53 +architectur 1 139 1.945910 1.945910 77 +construct 0 139 1.945910 0.000000 82 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +document 0 121 2.079442 0.000000 89 +compil 0 122 2.079442 0.000000 96 +intern 1 108 2.197225 2.197225 128 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +person 0 111 2.197225 0.000000 117 +need 1 98 2.302585 2.302585 135 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +mani 0 92 2.397895 0.000000 150 +environ 0 84 2.484907 0.000000 177 +contain 0 81 2.484907 0.000000 174 +appear 0 78 2.564949 0.000000 210 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +would 0 67 2.708050 0.000000 251 +evalu 0 64 2.772589 0.000000 266 +copi 0 63 2.772589 0.000000 284 +februari 1 54 2.944439 2.944439 328 +allow 0 53 2.944439 0.000000 333 +maintain 0 51 2.995732 0.000000 342 +right 0 48 3.044522 0.000000 363 +without 0 50 3.044522 0.000000 370 +featur 0 46 3.091042 0.000000 386 +offer 0 43 3.178054 0.000000 414 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +fast 0 42 3.218876 0.000000 429 +map 1 39 3.258097 3.258097 452 +author 1 39 3.258097 3.258097 450 +programm 1 39 3.258097 3.258097 445 +prototyp 0 38 3.295837 0.000000 463 +field 1 37 3.332205 3.332205 482 +mean 0 37 3.332205 0.000000 477 +copyright 1 36 3.367296 3.367296 495 +global 0 34 3.401197 0.000000 520 +given 0 32 3.465736 0.000000 538 +produc 0 30 3.555348 0.000000 572 +power 0 30 3.555348 0.000000 573 +specifi 0 30 3.555348 0.000000 568 +propos 0 28 3.610918 0.000000 602 +arrai 0 27 3.637586 0.000000 627 +constraint 0 26 3.688879 0.000000 636 +reliabl 0 25 3.737670 0.000000 674 +accur 0 25 3.737670 0.000000 680 +frame 0 24 3.761200 0.000000 684 +rout 1 21 3.912023 3.912023 793 +basi 0 20 3.951244 0.000000 828 +definit 0 19 4.007333 0.000000 864 +partit 0 16 4.174387 0.000000 984 +commerci 0 16 4.174387 0.000000 1005 +fourth 0 16 4.174387 0.000000 999 +driven 1 15 4.248495 4.248495 1048 +carl 1 15 4.248495 4.248495 1024 +contribut 0 15 4.248495 0.000000 1021 +larri 1 13 4.382027 4.382027 1142 +unfortun 0 13 4.382027 0.000000 1170 +block 0 13 4.382027 0.000000 1183 +fpga 1 10 4.653960 4.653960 1433 +placement 1 10 4.653960 4.653960 1420 +face 0 9 4.753590 0.000000 1501 +router 1 8 4.875197 4.875197 1772 +satisfi 0 8 4.875197 0.000000 1694 +heart 0 8 4.875197 0.000000 1729 +metric 0 7 5.010635 0.000000 1831 +gate 1 6 5.164786 5.164786 2182 +phase 0 6 5.164786 0.000000 1977 +quickli 0 6 5.164786 0.000000 2000 +ensur 0 6 5.164786 0.000000 2012 +invok 0 6 5.164786 0.000000 2079 +darren 1 5 5.347108 5.347108 2565 +variat 0 5 5.347108 0.000000 2248 +understood 0 5 5.347108 0.000000 2364 +mcmurchi 1 4 5.568345 5.568345 2757 +ebel 1 4 5.568345 5.568345 2756 +permiss 0 4 5.568345 0.000000 2642 +emerald 2 3 5.857933 11.715866 3979 +cronquist 1 3 5.857933 5.857933 3942 +moreov 0 3 5.857933 0.000000 3200 +byth 0 3 5.857933 0.000000 3874 +performanceevalu 0 2 6.263398 0.000000 6052 +thoroughli 0 2 6.263398 0.000000 4801 +parameter 0 2 6.263398 0.000000 5540 +dissemin 0 2 6.263398 0.000000 5080 +adher 0 2 6.263398 0.000000 6025 +sigda 0 2 6.263398 0.000000 5493 +pathfind 0 2 6.263398 0.000000 6053 +negoti 0 2 6.263398 0.000000 6054 +basedperform 0 2 6.263398 0.000000 6055 +projectid 0 1 6.957497 0.000000 17046 +makeus 0 1 6.957497 0.000000 17047 +quickproduct 0 1 6.957497 0.000000 17048 +isoften 0 1 6.957497 0.000000 17049 +postpon 0 1 6.957497 0.000000 17050 +beenfrozen 0 1 6.957497 0.000000 17051 +havedesign 0 1 6.957497 0.000000 17052 +quickdevelop 0 1 6.957497 0.000000 17053 +basicfeatur 0 1 6.957497 0.000000 17054 +synthesisand 0 1 6.957497 0.000000 17055 +anddetail 0 1 6.957497 0.000000 17056 +aneffici 0 1 6.957497 0.000000 17057 +blockarchitectur 0 1 6.957497 0.000000 17058 +tailorplac 0 1 6.957497 0.000000 17059 +schematicspecif 0 1 6.957497 0.000000 17060 +capturedand 0 1 6.957497 0.000000 17061 +ofscholarli 0 1 6.957497 0.000000 17062 +andal 0 1 6.957497 0.000000 17063 +therein 0 1 6.957497 0.000000 17064 +copyrighthold 0 1 6.957497 0.000000 17065 +notwithstand 0 1 6.957497 0.000000 17066 +hereelectron 0 1 6.957497 0.000000 17067 +thisinform 0 1 6.957497 0.000000 17068 +eachauthor 0 1 6.957497 0.000000 17069 +repost 0 1 6.957497 0.000000 17070 +theexplicit 0 1 6.957497 0.000000 17071 +holder 0 1 6.957497 0.000000 17072 +emeraldlarri 0 1 6.957497 0.000000 17073 +arraysaid 0 1 6.957497 0.000000 17074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..ed36f41f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +develop 1 174 1.791759 1.791759 53 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +first 0 140 1.945910 0.000000 71 +assign 0 135 1.945910 0.000000 66 +seattl 0 120 2.079442 0.000000 103 +tool 0 117 2.079442 0.000000 93 +high 0 130 2.079442 0.000000 101 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +sinc 0 90 2.397895 0.000000 159 +level 1 87 2.484907 2.484907 180 +larg 0 82 2.484907 0.000000 168 +educ 0 86 2.484907 0.000000 191 +wide 0 84 2.484907 0.000000 185 +complet 0 77 2.564949 0.000000 208 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +effici 0 73 2.639057 0.000000 233 +integr 1 67 2.708050 2.708050 245 +simul 0 66 2.708050 0.000000 255 +improv 1 62 2.772589 2.772589 289 +laboratori 1 63 2.772589 2.772589 292 +descript 0 64 2.772589 0.000000 271 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +variou 0 56 2.890372 0.000000 317 +direct 0 57 2.890372 0.000000 316 +overview 0 56 2.890372 0.000000 323 +hardwar 0 51 2.995732 0.000000 350 +digit 0 52 2.995732 0.000000 348 +adapt 0 46 3.091042 0.000000 387 +late 0 40 3.258097 0.000000 439 +map 0 39 3.258097 0.000000 452 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +survei 0 35 3.401197 0.000000 513 +board 0 33 3.433987 0.000000 528 +toler 0 33 3.433987 0.000000 533 +focu 0 30 3.555348 0.000000 571 +synchron 0 29 3.583519 0.000000 588 +scale 0 28 3.610918 0.000000 613 +compar 0 26 3.688879 0.000000 648 +todai 0 25 3.737670 0.000000 672 +methodolog 0 23 3.806662 0.000000 733 +varieti 0 22 3.850148 0.000000 740 +reduc 0 22 3.850148 0.000000 759 +self 0 22 3.850148 0.000000 761 +vlsi 1 21 3.912023 3.912023 795 +rout 1 21 3.912023 3.912023 793 +chip 0 21 3.912023 0.000000 770 +synthesi 1 20 3.951244 3.951244 834 +verif 0 20 3.951244 0.000000 826 +separ 0 19 4.007333 0.000000 844 +feedback 0 19 4.007333 0.000000 854 +aid 0 18 4.060443 0.000000 904 +event 0 18 4.060443 0.000000 896 +commerci 0 16 4.174387 0.000000 1005 +partit 0 16 4.174387 0.000000 984 +latenc 0 16 4.174387 0.000000 993 +embed 1 14 4.317488 4.317488 1102 +topolog 0 14 4.317488 0.000000 1089 +circuit 2 13 4.382027 8.764054 1131 +asynchron 1 12 4.465908 4.465908 1229 +tune 1 12 4.465908 4.465908 1227 +clock 1 11 4.553877 4.553877 1320 +valid 0 11 4.553877 0.000000 1299 +arpa 0 11 4.553877 0.000000 1369 +fpga 2 10 4.653960 9.307920 1433 +rapid 1 10 4.653960 4.653960 1453 +placement 0 10 4.653960 0.000000 1420 +paragraph 0 10 4.653960 0.000000 1449 +sensit 0 8 4.875197 0.000000 1726 +accomplish 0 8 4.875197 0.000000 1755 +northwest 1 7 5.010635 5.010635 1973 +densiti 0 7 5.010635 0.000000 1927 +metric 0 7 5.010635 0.000000 1831 +chinook 0 6 5.164786 0.000000 2229 +layout 0 6 5.164786 0.000000 2183 +sytem 0 4 5.568345 0.000000 3015 +triptych 0 4 5.568345 0.000000 3061 +toolset 0 4 5.568345 0.000000 3014 +chaoticrout 0 4 5.568345 0.000000 3063 +tester 0 4 5.568345 0.000000 2754 +emerald 0 3 5.857933 0.000000 3979 +systemsth 0 3 5.857933 0.000000 3835 +mactest 0 3 5.857933 0.000000 3972 +cmo 0 3 5.857933 0.000000 3992 +montag 1 2 6.263398 6.263398 5921 +retim 1 2 6.263398 6.263398 6008 +usath 0 2 6.263398 0.000000 6056 +engag 0 2 6.263398 0.000000 4937 +springbok 0 2 6.263398 0.000000 5922 +latch 0 2 6.263398 0.000000 6034 +skew 0 2 6.263398 0.000000 6057 +gemini 0 2 6.263398 0.000000 5975 +voltag 0 2 6.263398 0.000000 5475 +verificationtim 0 1 6.957497 0.000000 17075 +prototypingtriptych 0 1 6.957497 0.000000 17076 +toolscan 0 1 6.957497 0.000000 17077 +fpgaarchitectur 0 1 6.957497 0.000000 17078 +incorporatedinto 0 1 6.957497 0.000000 17079 +circuitsretim 0 1 6.957497 0.000000 17080 +uselevel 0 1 6.957497 0.000000 17081 +andincreas 0 1 6.957497 0.000000 17082 +synchronouscircuit 0 1 6.957497 0.000000 17083 +contraint 0 1 6.957497 0.000000 17084 +routerth 0 1 6.957497 0.000000 17085 +systemsself 0 1 6.957497 0.000000 17086 +kehlprevi 0 1 6.957497 0.000000 17087 +reportsarpa 0 1 6.957497 0.000000 17088 +bluebook 0 1 6.957497 0.000000 17089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..47ae3032 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +project 0 340 1.098612 0.000000 18 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +link 0 247 1.386294 0.000000 24 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +analysi 1 124 2.079442 2.079442 98 +tool 0 117 2.079442 0.000000 93 +specif 0 106 2.197225 0.000000 106 +techniqu 1 99 2.302585 2.302585 138 +contain 0 81 2.484907 0.000000 174 +build 0 85 2.484907 0.000000 184 +issu 0 78 2.564949 0.000000 211 +appli 0 71 2.639057 0.000000 226 +involv 0 71 2.639057 0.000000 227 +test 0 66 2.708050 0.000000 252 +goal 0 66 2.708050 0.000000 250 +foundat 1 62 2.772589 2.772589 286 +import 0 65 2.772589 0.000000 282 +plai 0 60 2.833213 0.000000 307 +space 0 57 2.890372 0.000000 310 +digit 0 52 2.995732 0.000000 348 +review 0 42 3.218876 0.000000 425 +must 0 40 3.258097 0.000000 442 +theoret 0 39 3.258097 0.000000 446 +prototyp 0 38 3.295837 0.000000 463 +exist 0 30 3.555348 0.000000 569 +built 0 29 3.583519 0.000000 592 +pass 0 28 3.610918 0.000000 611 +although 0 25 3.737670 0.000000 667 +methodolog 1 23 3.806662 3.806662 733 +safeti 2 20 3.951244 7.902488 817 +medic 0 17 4.110874 0.000000 958 +critic 1 16 4.174387 4.174387 982 +upon 0 16 4.174387 0.000000 978 +role 0 14 4.317488 0.000000 1101 +nanci 0 12 4.465908 0.000000 1256 +summar 0 11 4.553877 0.000000 1295 +valid 0 11 4.553877 0.000000 1299 +equip 0 10 4.653960 0.000000 1459 +leveson 0 9 4.753590 0.000000 1540 +consequ 0 6 5.164786 0.000000 1989 +nuclear 0 5 5.347108 0.000000 2576 +chemic 0 5 5.347108 0.000000 2552 +plant 0 5 5.347108 0.000000 2497 +decad 0 5 5.347108 0.000000 2455 +increasingli 0 4 5.568345 0.000000 2766 +aircraft 0 4 5.568345 0.000000 2872 +rigor 0 4 5.568345 0.000000 3030 +lai 0 3 5.857933 0.000000 3694 +safewar 0 2 6.263398 0.000000 5959 +reactor 0 1 6.957497 0.000000 17090 +defenc 0 1 6.957497 0.000000 17091 +malfunct 0 1 6.957497 0.000000 17092 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..24a8678b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +implement 0 152 1.791759 0.000000 52 +support 0 132 1.945910 0.000000 83 +high 1 130 2.079442 2.079442 101 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +world 1 115 2.197225 2.197225 126 +assist 0 112 2.197225 0.000000 113 +check 0 115 2.197225 0.000000 118 +specif 0 106 2.197225 0.000000 106 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +take 0 97 2.302585 0.000000 134 +technic 0 100 2.302585 0.000000 140 +advanc 0 99 2.302585 0.000000 130 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +real 0 93 2.397895 0.000000 144 +internet 1 83 2.484907 2.484907 186 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +learn 0 86 2.484907 0.000000 170 +info 0 85 2.484907 0.000000 176 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +state 0 76 2.564949 0.000000 207 +want 0 79 2.564949 0.000000 199 +dynam 0 76 2.564949 0.000000 194 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +optim 0 79 2.564949 0.000000 197 +intellig 1 72 2.639057 2.639057 225 +servic 0 72 2.639057 0.000000 236 +goal 1 66 2.708050 2.708050 250 +plan 1 65 2.772589 2.772589 272 +interact 0 62 2.772589 0.000000 270 +experi 0 64 2.772589 0.000000 283 +back 1 60 2.833213 2.833213 297 +juli 0 60 2.833213 0.000000 305 +unix 0 58 2.890372 0.000000 308 +space 0 57 2.890372 0.000000 310 +browser 0 56 2.890372 0.000000 313 +found 1 53 2.944439 2.944439 337 +allow 0 53 2.944439 0.000000 333 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +maintain 0 51 2.995732 0.000000 342 +without 0 50 3.044522 0.000000 370 +execut 0 45 3.135494 0.000000 404 +protocol 0 45 3.135494 0.000000 407 +multipl 0 39 3.258097 0.000000 453 +field 0 37 3.332205 0.000000 482 +robot 1 36 3.367296 3.367296 497 +procedur 0 36 3.367296 0.000000 488 +multi 0 36 3.367296 0.000000 493 +articl 0 33 3.433987 0.000000 530 +human 0 32 3.465736 0.000000 546 +extend 0 32 3.465736 0.000000 539 +collabor 0 32 3.465736 0.000000 543 +taken 0 31 3.496508 0.000000 555 +rang 0 30 3.555348 0.000000 565 +option 0 30 3.555348 0.000000 575 +specifi 0 30 3.555348 0.000000 568 +domain 0 30 3.555348 0.000000 564 +determin 0 27 3.637586 0.000000 630 +enabl 1 26 3.688879 3.688879 655 +challeng 0 26 3.688879 0.000000 653 +compar 0 26 3.688879 0.000000 648 +rule 0 26 3.688879 0.000000 638 +magazin 1 24 3.761200 3.761200 704 +mike 0 24 3.761200 0.000000 703 +methodolog 0 23 3.806662 0.000000 733 +util 0 21 3.912023 0.000000 774 +alumni 0 21 3.912023 0.000000 807 +agent 1 18 4.060443 4.060443 910 +accept 0 18 4.060443 0.000000 879 +debug 0 17 4.110874 0.000000 944 +indic 0 15 4.248495 0.000000 1013 +achiev 0 14 4.317488 0.000000 1088 +easili 0 14 4.317488 0.000000 1077 +dave 0 14 4.317488 0.000000 1098 +daniel 0 12 4.465908 0.000000 1233 +tour 0 11 4.553877 0.000000 1307 +motiv 0 11 4.553877 0.000000 1346 +princip 0 10 4.653960 0.000000 1397 +metacrawl 0 10 4.653960 0.000000 1455 +weld 0 9 4.753590 0.000000 1538 +autonom 0 8 4.875197 0.000000 1749 +claim 0 8 4.875197 0.000000 1664 +accomplish 0 8 4.875197 0.000000 1755 +gather 0 8 4.875197 0.000000 1719 +softbot 2 7 5.010635 10.021270 1974 +golden 1 7 5.010635 5.010635 1962 +intellectu 0 7 5.010635 0.000000 1847 +planner 0 7 5.010635 0.000000 1797 +etzioni 1 6 5.164786 5.164786 2135 +oren 1 6 5.164786 5.164786 2134 +moder 0 6 5.164786 0.000000 2112 +brook 0 6 5.164786 0.000000 2152 +versu 0 6 5.164786 0.000000 2052 +keith 1 5 5.347108 5.347108 2528 +shell 0 5 5.347108 0.000000 2353 +cacm 0 5 5.347108 0.000000 2388 +innov 0 4 5.568345 0.000000 2933 +substrat 0 4 5.568345 0.000000 2857 +disambigu 0 4 5.568345 0.000000 2899 +repli 0 4 5.568345 0.000000 2689 +toth 0 4 5.568345 0.000000 2595 +reactiv 1 3 5.857933 5.857933 3575 +kwok 1 3 5.857933 5.857933 3941 +sujai 0 3 5.857933 0.000000 3960 +parekh 0 3 5.857933 0.000000 3961 +hacker 0 3 5.857933 0.000000 3996 +finalist 0 2 6.263398 0.000000 5890 +discoveraward 0 2 6.263398 0.000000 5891 +learningtechniqu 0 2 6.263398 0.000000 5028 +christianson 0 2 6.263398 0.000000 5849 +negoti 0 2 6.263398 0.000000 6054 +goan 0 2 6.263398 0.000000 5896 +ingram 0 2 6.263398 0.000000 5847 +perkowitz 0 2 6.263398 0.000000 5970 +softbotinternet 0 1 6.957497 0.000000 17093 +softbotth 0 1 6.957497 0.000000 17094 +softwareenviron 0 1 6.957497 0.000000 17095 +pragmaticallyconveni 0 1 6.957497 0.000000 17096 +acustomiz 0 1 6.957497 0.000000 17097 +internetaccess 0 1 6.957497 0.000000 17098 +generatesand 0 1 6.957497 0.000000 17099 +itsexperi 0 1 6.957497 0.000000 17100 +requestand 0 1 6.957497 0.000000 17101 +satisfyit 0 1 6.957497 0.000000 17102 +interactwith 0 1 6.957497 0.000000 17103 +sgraphic 0 1 6.957497 0.000000 17104 +tosearch 0 1 6.957497 0.000000 17105 +sophisticatedprun 0 1 6.957497 0.000000 17106 +cartoonrepresent 0 1 6.957497 0.000000 17107 +blanchard 0 1 6.957497 0.000000 17108 +ofcolumn 0 1 6.957497 0.000000 17109 +xiiplann 0 1 6.957497 0.000000 17110 +ilalearn 0 1 6.957497 0.000000 17111 +ying 0 1 6.957497 0.000000 17112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..4527390f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,214 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +link 0 247 1.386294 0.000000 24 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +applic 2 170 1.791759 3.583518 56 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +architectur 0 139 1.945910 0.000000 77 +relat 0 139 1.945910 0.000000 68 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +machin 0 129 2.079442 0.000000 95 +provid 0 121 2.079442 0.000000 94 +confer 0 126 2.079442 0.000000 100 +high 0 130 2.079442 0.000000 101 +document 0 121 2.079442 0.000000 89 +code 2 108 2.197225 4.394450 116 +intern 1 108 2.197225 2.197225 128 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +control 1 82 2.484907 2.484907 164 +member 1 84 2.484907 2.484907 165 +resourc 0 81 2.484907 0.000000 172 +build 0 85 2.484907 0.000000 184 +dynam 2 76 2.564949 5.129898 194 +interfac 1 79 2.564949 2.564949 209 +appear 1 78 2.564949 2.564949 210 +server 0 76 2.564949 0.000000 204 +exampl 0 77 2.564949 0.000000 195 +master 0 76 2.564949 0.000000 216 +servic 1 72 2.639057 2.639057 236 +effici 0 73 2.639057 0.000000 233 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +integr 1 67 2.708050 2.708050 245 +order 0 69 2.708050 0.000000 249 +degre 0 69 2.708050 0.000000 259 +collect 1 65 2.772589 2.772589 268 +result 1 65 2.772589 2.772589 281 +creat 1 63 2.772589 2.772589 277 +copi 0 63 2.772589 0.000000 284 +written 0 63 2.772589 0.000000 278 +function 0 62 2.772589 0.000000 275 +experi 0 64 2.772589 0.000000 283 +improv 0 62 2.772589 0.000000 289 +unix 1 58 2.890372 2.890372 308 +direct 0 57 2.890372 0.000000 316 +space 0 57 2.890372 0.000000 310 +special 0 56 2.890372 0.000000 320 +point 0 58 2.890372 0.000000 319 +overview 0 56 2.890372 0.000000 323 +extens 2 53 2.944439 5.888878 340 +allow 1 53 2.944439 2.944439 333 +talk 0 53 2.944439 0.000000 336 +undergradu 0 54 2.944439 0.000000 338 +run 1 51 2.995732 2.995732 347 +maintain 0 51 2.995732 0.000000 342 +basic 1 50 3.044522 3.044522 360 +friend 0 48 3.044522 0.000000 376 +pointer 0 48 3.044522 0.000000 368 +adapt 0 46 3.091042 0.000000 387 +could 0 46 3.091042 0.000000 383 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +video 0 44 3.135494 0.000000 405 +anoth 0 45 3.135494 0.000000 408 +protocol 0 45 3.135494 0.000000 407 +mechan 0 43 3.178054 0.000000 416 +show 0 43 3.178054 0.000000 417 +join 0 39 3.258097 0.000000 457 +realli 0 40 3.258097 0.000000 444 +credit 1 38 3.295837 3.295837 460 +industri 0 38 3.295837 0.000000 464 +brian 0 38 3.295837 0.000000 466 +workstat 0 37 3.332205 0.000000 479 +purpos 0 37 3.332205 0.000000 481 +connect 0 37 3.332205 0.000000 485 +procedur 0 36 3.367296 0.000000 488 +winter 0 36 3.367296 0.000000 500 +extend 0 32 3.465736 0.000000 539 +fault 0 32 3.465736 0.000000 547 +posit 0 31 3.496508 0.000000 552 +synchron 0 29 3.583519 0.000000 588 +load 1 28 3.610918 3.610918 601 +pass 0 28 3.610918 0.000000 611 +manipul 0 27 3.637586 0.000000 624 +rather 0 26 3.688879 0.000000 642 +effort 0 26 3.688879 0.000000 652 +handl 0 24 3.761200 0.000000 685 +thread 1 23 3.806662 3.806662 722 +almost 0 22 3.850148 0.000000 742 +properti 0 22 3.850148 0.000000 749 +deal 0 22 3.850148 0.000000 736 +flexibl 0 21 3.912023 0.000000 792 +latest 0 21 3.912023 0.000000 785 +fund 0 21 3.912023 0.000000 805 +kernel 2 20 3.951244 7.902488 825 +longer 0 20 3.951244 0.000000 816 +safeti 0 20 3.951244 0.000000 817 +facil 0 20 3.951244 0.000000 814 +benchmark 1 19 4.007333 4.007333 859 +runtim 0 19 4.007333 0.000000 858 +bershad 1 18 4.060443 4.060443 902 +less 0 18 4.060443 0.000000 892 +statu 0 18 4.060443 0.000000 885 +encourag 0 18 4.060443 0.000000 880 +regular 0 17 4.110874 0.000000 929 +critic 0 16 4.174387 0.000000 982 +capabl 0 15 4.248495 0.000000 1016 +piec 0 15 4.248495 0.000000 1020 +overhead 0 15 4.248495 0.000000 1035 +spin 2 14 4.317488 8.634976 1121 +happi 0 14 4.317488 0.000000 1079 +decid 0 14 4.317488 0.000000 1075 +sai 1 13 4.382027 4.382027 1175 +pretti 0 13 4.382027 0.000000 1191 +wait 0 13 4.382027 0.000000 1168 +safe 1 12 4.465908 4.465908 1274 +usenix 0 12 4.465908 0.000000 1240 +alpha 1 11 4.553877 4.553877 1348 +arbitrari 0 11 4.553877 0.000000 1359 +abil 0 11 4.553877 0.000000 1341 +arpa 0 11 4.553877 0.000000 1369 +sosp 0 10 4.653960 0.000000 1416 +modula 1 9 4.753590 4.753590 1613 +inter 0 9 4.753590 0.000000 1530 +osdi 0 9 4.753590 0.000000 1534 +clear 0 9 4.753590 0.000000 1488 +isol 0 8 4.875197 0.000000 1663 +crash 0 8 4.875197 0.000000 1616 +cross 0 8 4.875197 0.000000 1703 +mach 0 8 4.875197 0.000000 1669 +core 0 7 5.010635 0.000000 1809 +prevent 0 7 5.010635 0.000000 1827 +bottom 0 7 5.010635 0.000000 1906 +quick 0 6 5.164786 0.000000 2184 +recov 0 6 5.164786 0.000000 2235 +trail 0 6 5.164786 0.000000 2071 +academia 0 6 5.164786 0.000000 2036 +bind 0 5 5.347108 0.000000 2250 +distinct 0 5 5.347108 0.000000 2319 +adopt 0 5 5.347108 0.000000 2467 +termin 1 4 5.568345 5.568345 2852 +andimplement 0 4 5.568345 0.000000 3029 +fork 0 4 5.568345 0.000000 2801 +gotten 0 4 5.568345 0.000000 2628 +stillmaintain 0 3 5.857933 0.000000 3964 +providesa 0 3 5.857933 0.000000 3884 +thesear 0 3 5.857933 0.000000 3456 +forappl 0 3 5.857933 0.000000 3929 +linker 0 3 5.857933 0.000000 3157 +namespac 0 3 5.857933 0.000000 3957 +arrow 0 3 5.857933 0.000000 3520 +microsecond 1 2 6.263398 6.263398 5435 +shortcom 1 2 6.263398 6.263398 5978 +wella 0 2 6.263398 0.000000 4289 +linkabl 0 2 6.263398 0.000000 5979 +barb 0 2 6.263398 0.000000 6058 +qualif 0 2 6.263398 0.000000 6059 +mascot 0 2 6.263398 0.000000 6060 +systemspin 0 1 6.957497 0.000000 17113 +thatsupport 0 1 6.957497 0.000000 17114 +atruntim 0 1 6.957497 0.000000 17115 +accesshardwar 0 1 6.957497 0.000000 17116 +nooverhead 0 1 6.957497 0.000000 17117 +byrefer 0 1 6.957497 0.000000 17118 +systemservic 0 1 6.957497 0.000000 17119 +allextens 0 1 6.957497 0.000000 17120 +typesaf 0 1 6.957497 0.000000 17121 +oftypesafeti 0 1 6.957497 0.000000 17122 +attemptingto 0 1 6.957497 0.000000 17123 +writeboth 0 1 6.957497 0.000000 17124 +machinerun 0 1 6.957497 0.000000 17125 +withlow 0 1 6.957497 0.000000 17126 +executeit 0 1 6.957497 0.000000 17127 +protectedprocedur 0 1 6.957497 0.000000 17128 +overethernet 0 1 6.957497 0.000000 17129 +oldadapt 0 1 6.957497 0.000000 17130 +operationsund 0 1 6.957497 0.000000 17131 +samehardwar 0 1 6.957497 0.000000 17132 +saveyourself 0 1 6.957497 0.000000 17133 +invoc 0 1 6.957497 0.000000 17134 +andsimpl 0 1 6.957497 0.000000 17135 +interposit 0 1 6.957497 0.000000 17136 +raship 0 1 6.957497 0.000000 17137 +ourmascot 0 1 6.957497 0.000000 17138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..bd1cd674 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +washington 0 236 1.386294 0.000000 32 +paper 1 205 1.609438 1.609438 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +relat 0 139 1.945910 0.000000 68 +perform 0 143 1.945910 0.000000 74 +compil 2 122 2.079442 4.158884 96 +machin 1 129 2.079442 2.079442 95 +high 0 130 2.079442 0.000000 101 +code 1 108 2.197225 2.197225 116 +part 0 98 2.302585 0.000000 129 +section 0 94 2.397895 0.000000 149 +member 0 84 2.484907 0.000000 165 +activ 0 84 2.484907 0.000000 182 +start 0 83 2.484907 0.000000 173 +build 0 85 2.484907 0.000000 184 +second 0 81 2.484907 0.000000 166 +dynam 2 76 2.564949 5.129898 194 +optim 1 79 2.564949 2.564949 197 +exampl 0 77 2.564949 0.000000 195 +effici 0 73 2.639057 0.000000 233 +appli 0 71 2.639057 0.000000 226 +august 0 66 2.708050 0.000000 257 +copi 0 63 2.772589 0.000000 284 +automat 0 61 2.833213 0.000000 306 +simpl 0 60 2.833213 0.000000 298 +explor 0 58 2.890372 0.000000 324 +detail 0 57 2.890372 0.000000 321 +approach 0 48 3.044522 0.000000 366 +execut 0 45 3.135494 0.000000 404 +describ 0 45 3.135494 0.000000 400 +howev 1 41 3.218876 3.218876 422 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +prototyp 0 38 3.295837 0.000000 463 +cost 0 37 3.332205 0.000000 480 +purpos 0 37 3.332205 0.000000 481 +soon 0 36 3.367296 0.000000 494 +produc 1 30 3.555348 3.555348 572 +rang 0 30 3.555348 0.000000 565 +releas 0 28 3.610918 0.000000 616 +static 0 27 3.637586 0.000000 619 +enabl 0 26 3.688879 0.000000 655 +bound 0 26 3.688879 0.000000 659 +valu 1 25 3.737670 3.737670 665 +interpret 0 24 3.761200 0.000000 686 +variabl 0 23 3.806662 0.000000 715 +initi 0 23 3.806662 0.000000 717 +identifi 0 22 3.850148 0.000000 760 +annot 0 21 3.912023 0.000000 775 +kernel 0 20 3.951244 0.000000 825 +region 0 19 4.007333 0.000000 875 +spin 0 14 4.317488 0.000000 1121 +remov 0 12 4.465908 0.000000 1225 +target 0 12 4.465908 0.000000 1282 +grant 0 12 4.465908 0.000000 1216 +branch 0 11 4.553877 0.000000 1318 +loop 0 11 4.553877 0.000000 1310 +elimin 0 9 4.753590 0.000000 1558 +pair 0 9 4.753590 0.000000 1503 +analys 0 8 4.875197 0.000000 1666 +pldi 0 8 4.875197 0.000000 1704 +dispatch 0 7 5.010635 0.000000 1791 +constant 1 5 5.347108 5.347108 2251 +templat 0 5 5.347108 0.000000 2311 +dataflow 0 5 5.347108 0.000000 2390 +willb 0 5 5.347108 0.000000 2277 +spinproject 0 5 5.347108 0.000000 2570 +fold 0 4 5.568345 0.000000 2615 +fulli 0 4 5.568345 0.000000 2986 +theprogram 0 4 5.568345 0.000000 2686 +patch 0 4 5.568345 0.000000 2710 +imper 0 4 5.568345 0.000000 3067 +eventu 0 4 5.568345 0.000000 3074 +wewil 0 4 5.568345 0.000000 2688 +projectth 0 3 5.857933 0.000000 3344 +propag 0 3 5.857933 0.000000 3997 +dynamiccompil 0 3 5.857933 0.000000 3926 +optimizingcompil 0 2 6.263398 0.000000 4456 +projectmor 0 1 6.957497 0.000000 17139 +projectsuw 0 1 6.957497 0.000000 17140 +webdynam 0 1 6.957497 0.000000 17141 +ofinvari 0 1 6.957497 0.000000 17142 +theserun 0 1 6.957497 0.000000 17143 +memoryload 0 1 6.957497 0.000000 17144 +theydetermin 0 1 6.957497 0.000000 17145 +unrol 0 1 6.957497 0.000000 17146 +performancebenefit 0 1 6.957497 0.000000 17147 +offsetbi 0 1 6.957497 0.000000 17148 +strive 0 1 6.957497 0.000000 17149 +qualitydynam 0 1 6.957497 0.000000 17150 +thetempl 0 1 6.957497 0.000000 17151 +initialexperi 0 1 6.957497 0.000000 17152 +producedspeedup 0 1 6.957497 0.000000 17153 +dynamicallycompil 0 1 6.957497 0.000000 17154 +spinev 0 1 6.957497 0.000000 17155 +otherposs 0 1 6.957497 0.000000 17156 +invirtu 0 1 6.957497 0.000000 17157 +systemi 0 1 6.957497 0.000000 17158 +arenow 0 1 6.957497 0.000000 17159 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..f4484093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 0 121 2.079442 0.000000 89 +move 1 47 3.091042 3.091042 382 +permanentlymov 0 12 4.465908 0.000000 1250 +permanentlyth 0 12 4.465908 0.000000 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..585de590 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +modifi 0 178 1.609438 0.000000 35 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +base 1 165 1.791759 1.791759 50 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +seattl 0 120 2.079442 0.000000 103 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +info 1 85 2.484907 2.484907 176 +level 1 87 2.484907 2.484907 180 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +member 0 84 2.484907 0.000000 165 +help 0 83 2.484907 0.000000 175 +write 0 72 2.639057 0.000000 222 +line 0 75 2.639057 0.000000 231 +would 0 67 2.708050 0.000000 251 +written 1 63 2.772589 2.772589 278 +descript 0 64 2.772589 0.000000 271 +direct 1 57 2.890372 2.890372 316 +overview 1 56 2.890372 2.890372 323 +special 0 56 2.890372 0.000000 320 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +detail 0 57 2.890372 0.000000 321 +scientif 1 53 2.944439 2.944439 341 +sampl 0 53 2.944439 0.000000 339 +without 1 50 3.044522 3.044522 370 +right 0 48 3.044522 0.000000 363 +fast 0 42 3.218876 0.000000 429 +futur 0 41 3.218876 0.000000 427 +error 0 40 3.258097 0.000000 449 +small 0 39 3.258097 0.000000 447 +programm 0 39 3.258097 0.000000 445 +manual 0 35 3.401197 0.000000 504 +concept 1 32 3.465736 3.465736 537 +independ 0 32 3.465736 0.000000 548 +scientist 0 31 3.496508 0.000000 560 +autumn 0 31 3.496508 0.000000 558 +arrai 1 27 3.637586 3.637586 627 +higher 0 24 3.761200 0.000000 690 +flow 0 24 3.761200 0.000000 700 +sequenti 0 22 3.850148 0.000000 745 +minut 0 20 3.951244 0.000000 810 +region 0 19 4.007333 0.000000 875 +previous 0 17 4.110874 0.000000 923 +modif 0 17 4.110874 0.000000 913 +fortran 0 15 4.248495 0.000000 1027 +easili 1 14 4.317488 4.317488 1077 +necessari 0 13 4.382027 0.000000 1147 +walk 1 12 4.465908 4.465908 1281 +loop 0 11 4.553877 0.000000 1310 +typic 0 11 4.553877 0.000000 1360 +suitabl 0 9 4.753590 0.000000 1486 +elimin 0 9 4.753590 0.000000 1558 +ideal 0 8 4.875197 0.000000 1630 +understood 0 5 5.347108 0.000000 2364 +enrol 0 4 5.568345 0.000000 2613 +tediou 0 3 5.857933 0.000000 3731 +shorter 0 3 5.857933 0.000000 3998 +conclus 0 3 5.857933 0.000000 3367 +horizon 0 3 5.857933 0.000000 3746 +border 1 2 6.263398 6.263398 4980 +prone 0 2 6.263398 0.000000 5178 +shouldconsid 0 2 6.263398 0.000000 6061 +acknowledg 0 2 6.263398 0.000000 6062 +eduzpl 0 1 6.957497 0.000000 17160 +recompil 0 1 6.957497 0.000000 17161 +shatter 0 1 6.957497 0.000000 17162 +yourmachin 0 1 6.957497 0.000000 17163 +zpthi 0 1 6.957497 0.000000 17164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..a6f2a112 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +public 1 202 1.609438 1.609438 43 +includ 0 208 1.609438 0.000000 42 +list 0 201 1.609438 0.000000 39 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +implement 0 152 1.791759 0.000000 52 +architectur 1 139 1.945910 1.945910 77 +problem 1 147 1.945910 1.945910 75 +perform 1 143 1.945910 1.945910 74 +first 0 140 1.945910 0.000000 71 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +studi 0 120 2.079442 0.000000 91 +intern 1 108 2.197225 2.197225 128 +check 0 115 2.197225 0.000000 118 +techniqu 1 99 2.302585 2.302585 138 +memori 0 101 2.302585 0.000000 139 +proceed 1 93 2.397895 2.397895 152 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +wide 0 84 2.484907 0.000000 185 +activ 0 84 2.484907 0.000000 182 +issu 1 78 2.564949 2.564949 211 +dynam 0 76 2.564949 0.000000 194 +june 0 79 2.564949 0.000000 214 +symposium 1 72 2.639057 2.639057 238 +workshop 0 71 2.639057 0.000000 239 +differ 0 66 2.708050 0.000000 253 +abstract 1 62 2.772589 2.772589 276 +function 0 62 2.772589 0.000000 275 +januari 0 62 2.772589 0.000000 264 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +processor 2 54 2.944439 5.888878 335 +instruct 2 53 2.944439 5.888878 332 +extens 0 53 2.944439 0.000000 340 +maintain 1 51 2.995732 2.995732 342 +hardwar 0 51 2.995732 0.000000 350 +investig 0 51 2.995732 0.000000 353 +digit 0 52 2.995732 0.000000 348 +still 0 50 3.044522 0.000000 362 +effect 0 46 3.091042 0.000000 385 +execut 0 45 3.135494 0.000000 404 +long 0 43 3.178054 0.000000 413 +combin 0 42 3.218876 0.000000 421 +futur 0 41 3.218876 0.000000 427 +fast 0 42 3.218876 0.000000 429 +multipl 1 39 3.258097 3.258097 453 +annual 1 40 3.258097 3.258097 458 +submit 0 39 3.258097 0.000000 440 +singl 1 34 3.401197 3.401197 510 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +collabor 0 32 3.465736 0.000000 543 +limit 0 29 3.583519 0.000000 585 +though 0 27 3.637586 0.000000 622 +enabl 1 26 3.688879 3.688879 655 +todai 0 25 3.737670 0.000000 672 +thread 1 23 3.806662 3.806662 722 +util 1 21 3.912023 3.912023 774 +unit 0 21 3.912023 0.000000 779 +corpor 0 21 3.912023 0.000000 802 +chip 0 21 3.912023 0.000000 770 +increas 0 20 3.951244 0.000000 829 +exploit 0 20 3.951244 0.000000 836 +speed 0 18 4.060443 0.000000 911 +minim 0 18 4.060443 0.000000 887 +ultim 0 17 4.110874 0.000000 943 +stanford 0 17 4.110874 0.000000 955 +latenc 1 16 4.174387 4.174387 993 +modern 1 16 4.174387 4.174387 966 +permit 0 16 4.174387 0.000000 962 +choic 0 16 4.174387 0.000000 979 +susan 0 15 4.248495 0.000000 1050 +levi 1 14 4.317488 4.317488 1093 +shown 0 14 4.317488 0.000000 1080 +conduct 0 14 4.317488 0.000000 1065 +dean 0 14 4.317488 0.000000 1104 +convert 0 13 4.382027 0.000000 1122 +amount 0 12 4.465908 0.000000 1208 +hank 0 12 4.465908 0.000000 1253 +philadelphia 0 12 4.465908 0.000000 1244 +multithread 2 11 4.553877 9.107754 1315 +cycl 1 11 4.553877 4.553877 1335 +itali 0 11 4.553877 0.000000 1378 +equip 0 10 4.653960 0.000000 1459 +santa 0 10 4.653960 0.000000 1441 +face 1 9 4.753590 4.753590 1501 +significantli 0 9 4.753590 0.000000 1508 +egger 1 8 4.875197 4.875197 1695 +jack 1 8 4.875197 4.875197 1780 +gain 0 8 4.875197 0.000000 1730 +joel 0 8 4.875197 0.000000 1698 +microprocessor 0 7 5.010635 0.000000 1808 +maxim 0 7 5.010635 0.000000 1944 +simultan 2 6 5.164786 10.329572 2155 +tullsen 1 6 5.164786 5.164786 2081 +superscalar 1 6 5.164786 5.164786 2082 +multiprogram 0 6 5.164786 0.000000 2010 +rebecca 0 6 5.164786 0.000000 2174 +crucial 0 5 5.347108 0.000000 2384 +compet 0 5 5.347108 0.000000 2462 +fetch 0 5 5.347108 0.000000 2567 +hide 0 4 5.568345 0.000000 2996 +throughput 0 4 5.568345 0.000000 2993 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +allevi 0 3 5.857933 0.000000 3643 +interchang 0 3 5.857933 0.000000 3893 +peoplefaculti 0 3 5.857933 0.000000 3981 +affair 0 3 5.857933 0.000000 3916 +andd 1 2 6.263398 6.263398 4346 +suif 0 2 6.263398 0.000000 5944 +lojlo 0 2 6.263398 0.000000 5943 +pagesimultan 0 1 6.957497 0.000000 17165 +projectoverviewpeoplepubl 0 1 6.957497 0.000000 17166 +overviewth 0 1 6.957497 0.000000 17167 +interleav 0 1 6.957497 0.000000 17168 +differentthread 0 1 6.957497 0.000000 17169 +issuefeatur 0 1 6.957497 0.000000 17170 +abilityof 0 1 6.957497 0.000000 17171 +contextsar 0 1 6.957497 0.000000 17172 +exploitthread 0 1 6.957497 0.000000 17173 +formsof 0 1 6.957497 0.000000 17174 +havedemonstr 0 1 6.957497 0.000000 17175 +improvesprocessor 0 1 6.957497 0.000000 17176 +parallelworkload 0 1 6.957497 0.000000 17177 +achievedin 0 1 6.957497 0.000000 17178 +ordersuperscalar 0 1 6.957497 0.000000 17179 +synchronizationtechniqu 0 1 6.957497 0.000000 17180 +otherarchitectur 0 1 6.957497 0.000000 17181 +levygradu 0 1 6.957497 0.000000 17182 +tullsenindustri 0 1 6.957497 0.000000 17183 +andh 0 1 6.957497 0.000000 17184 +margherita 0 1 6.957497 0.000000 17185 +ligur 0 1 6.957497 0.000000 17186 +doon 0 1 6.957497 0.000000 17187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..aa1f98d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +fall 0 181 1.609438 0.000000 40 +class 0 199 1.609438 0.000000 37 +develop 0 174 1.791759 0.000000 53 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +report 1 131 2.079442 2.079442 92 +dayton 0 119 2.079442 0.000000 104 +peopl 0 96 2.302585 0.000000 132 +technic 0 100 2.302585 0.000000 140 +question 0 91 2.397895 0.000000 141 +member 0 84 2.484907 0.000000 165 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +receiv 0 66 2.708050 0.000000 244 +organ 0 65 2.772589 0.000000 265 +faculti 1 56 2.890372 2.890372 325 +three 1 54 2.944439 2.944439 330 +undergradu 0 54 2.944439 0.000000 338 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +frequent 0 49 3.044522 0.000000 367 +answer 0 45 3.135494 0.000000 391 +offer 0 43 3.178054 0.000000 414 +futur 0 41 3.218876 0.000000 427 +form 0 39 3.258097 0.000000 443 +annual 0 40 3.258097 0.000000 458 +streetmadison 0 38 3.295837 0.000000 474 +award 1 34 3.401197 3.401197 523 +statist 1 35 3.401197 3.401197 521 +dissert 0 32 3.465736 0.000000 549 +scientist 0 31 3.496508 0.000000 560 +ask 0 28 3.610918 0.000000 597 +consist 0 26 3.688879 0.000000 651 +doctor 0 24 3.761200 0.000000 709 +departmentunivers 0 24 3.761200 0.000000 711 +alumni 0 21 3.912023 0.000000 807 +util 0 21 3.912023 0.000000 774 +voic 0 21 3.912023 0.000000 806 +excel 0 19 4.007333 0.000000 868 +young 0 16 4.174387 0.000000 991 +women 0 16 4.174387 0.000000 1004 +countri 0 15 4.248495 0.000000 1059 +rank 0 14 4.317488 0.000000 1063 +packard 0 10 4.653960 0.000000 1444 +fellowship 0 10 4.653960 0.000000 1460 +presidenti 0 8 4.875197 0.000000 1737 +pagecomput 0 7 5.010635 0.000000 1900 +timet 0 3 5.857933 0.000000 3471 +guidebook 1 2 6.263398 6.263398 4643 +departmentabout 0 1 6.957497 0.000000 17188 +departmentour 0 1 6.957497 0.000000 17189 +fourteen 0 1 6.957497 0.000000 17190 +incent 0 1 6.957497 0.000000 17191 +colophon 0 1 6.957497 0.000000 17192 +infocomput 0 1 6.957497 0.000000 17193 +madisona 0 1 6.957497 0.000000 17194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..b3effa9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +base 1 165 1.791759 1.791759 50 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +object 1 138 1.945910 1.945910 79 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +first 0 140 1.945910 0.000000 71 +provid 0 121 2.079442 0.000000 94 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +imag 0 91 2.397895 0.000000 161 +select 0 91 2.397895 0.000000 154 +control 1 82 2.484907 2.484907 164 +activ 0 84 2.484907 0.000000 182 +requir 0 81 2.484907 0.000000 167 +second 0 81 2.484907 0.000000 166 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +order 0 69 2.708050 0.000000 249 +view 0 70 2.708050 0.000000 254 +complex 0 64 2.772589 0.000000 269 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +special 0 56 2.890372 0.000000 320 +direct 0 57 2.890372 0.000000 316 +local 1 55 2.944439 2.944439 334 +suggest 0 53 2.944439 0.000000 331 +maintain 0 51 2.995732 0.000000 342 +approach 1 48 3.044522 3.044522 366 +show 1 43 3.178054 3.178054 417 +combin 0 42 3.218876 0.000000 421 +correct 1 38 3.295837 3.295837 462 +slide 0 38 3.295837 0.000000 467 +purpos 1 37 3.332205 3.332205 481 +connect 0 37 3.332205 0.000000 485 +global 1 34 3.401197 3.401197 520 +either 0 35 3.401197 0.000000 506 +posit 0 31 3.496508 0.000000 552 +exist 0 30 3.555348 0.000000 569 +consid 1 29 3.583519 3.583519 590 +focus 0 29 3.583519 0.000000 584 +task 2 25 3.737670 7.475340 678 +strategi 0 25 3.737670 0.000000 682 +motion 0 24 3.761200 0.000000 699 +other 0 24 3.761200 0.000000 697 +reach 0 24 3.761200 0.000000 688 +frame 0 24 3.761200 0.000000 684 +decis 0 23 3.806662 0.000000 728 +mobil 0 23 3.806662 0.000000 730 +lead 0 23 3.806662 0.000000 718 +defin 0 22 3.850148 0.000000 746 +navig 1 21 3.912023 3.912023 796 +avoid 0 21 3.912023 0.000000 799 +region 0 19 4.007333 0.000000 875 +geometr 0 19 4.007333 0.000000 852 +behavior 1 18 4.060443 4.060443 881 +minim 0 18 4.060443 0.000000 887 +attempt 0 17 4.110874 0.000000 917 +scene 1 14 4.317488 4.317488 1114 +achiev 1 14 4.317488 4.317488 1088 +chuck 0 14 4.317488 0.000000 1108 +consider 0 14 4.317488 0.000000 1076 +deriv 0 13 4.382027 0.000000 1145 +emploi 0 12 4.465908 0.000000 1284 +shape 0 12 4.465908 0.000000 1245 +abil 1 11 4.553877 4.553877 1341 +arbitrari 0 11 4.553877 0.000000 1359 +princip 0 10 4.653960 0.000000 1397 +relationship 0 10 4.653960 0.000000 1383 +observ 1 9 4.753590 4.753590 1578 +surfac 1 9 4.753590 4.753590 1574 +recoveri 1 9 4.753590 4.753590 1474 +dyer 0 9 4.753590 0.000000 1573 +formul 0 8 4.875197 0.000000 1733 +maxim 0 7 5.010635 0.000000 1944 +smooth 0 7 5.010635 0.000000 1855 +viewpoint 1 6 5.164786 5.164786 2116 +reconstruct 1 6 5.164786 5.164786 2170 +recov 0 6 5.164786 0.000000 2235 +provabl 1 5 5.347108 5.347108 2558 +align 0 4 5.568345 0.000000 2863 +visibl 0 4 5.568345 0.000000 2994 +simplifi 0 4 5.568345 0.000000 3066 +kyro 0 2 6.263398 0.000000 6063 +kutulako 0 2 6.263398 0.000000 6064 +descriptionof 0 2 6.263398 0.000000 5513 +thequalit 0 2 6.263398 0.000000 5622 +smoothli 1 1 6.957497 6.957497 17195 +simpleobserv 0 1 6.957497 0.000000 17196 +propertieseasi 0 1 6.957497 0.000000 17197 +fixat 0 1 6.957497 0.000000 17198 +toperform 0 1 6.957497 0.000000 17199 +obstacl 0 1 6.957497 0.000000 17200 +ourwork 0 1 6.957497 0.000000 17201 +pointof 0 1 6.957497 0.000000 17202 +makesimpl 0 1 6.957497 0.000000 17203 +geometryof 0 1 6.957497 0.000000 17204 +thesurfac 0 1 6.957497 0.000000 17205 +generalobserv 0 1 6.957497 0.000000 17206 +objectthan 0 1 6.957497 0.000000 17207 +beexploit 0 1 6.957497 0.000000 17208 +anddeterminist 0 1 6.957497 0.000000 17209 +localshap 0 1 6.957497 0.000000 17210 +qualitativestrategi 0 1 6.957497 0.000000 17211 +viewingdirect 0 1 6.957497 0.000000 17212 +selectedpoint 0 1 6.957497 0.000000 17213 +observationso 0 1 6.957497 0.000000 17214 +observationand 0 1 6.957497 0.000000 17215 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..8e5997dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +us 0 329 1.098612 0.000000 16 +gener 0 220 1.386294 0.000000 27 +softwar 0 220 1.386294 0.000000 30 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +distribut 0 162 1.791759 0.000000 51 +avail 0 169 1.791759 0.000000 48 +model 2 145 1.945910 3.891820 69 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +studi 0 120 2.079442 0.000000 91 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +imag 1 91 2.397895 2.397895 161 +contain 0 81 2.484907 0.000000 174 +activ 0 84 2.484907 0.000000 182 +level 0 87 2.484907 0.000000 180 +optim 0 79 2.564949 0.000000 197 +integr 1 67 2.708050 2.708050 245 +practic 0 70 2.708050 0.000000 246 +function 0 62 2.772589 0.000000 275 +automat 0 61 2.833213 0.000000 306 +special 0 56 2.890372 0.000000 320 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +case 1 51 2.995732 2.995732 351 +approach 0 48 3.044522 0.000000 366 +principl 0 48 3.044522 0.000000 357 +visual 0 48 3.044522 0.000000 372 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +field 0 37 3.332205 0.000000 482 +random 0 34 3.401197 0.000000 511 +global 0 34 3.401197 0.000000 520 +transform 0 32 3.465736 0.000000 542 +consid 1 29 3.583519 3.583519 590 +turn 1 29 3.583519 3.583519 586 +framework 0 28 3.610918 0.000000 606 +determin 0 27 3.637586 0.000000 630 +detect 1 26 3.688879 3.688879 646 +experiment 0 26 3.688879 0.000000 645 +valu 0 25 3.737670 0.000000 665 +task 0 25 3.737670 0.000000 678 +initi 0 23 3.806662 0.000000 717 +recognit 0 23 3.806662 0.000000 723 +region 0 19 4.007333 0.000000 875 +along 0 18 4.060443 0.000000 878 +minim 0 18 4.060443 0.000000 887 +lower 0 18 4.060443 0.000000 886 +regular 1 17 4.110874 4.110874 929 +estim 0 17 4.110874 0.000000 930 +conduct 0 14 4.317488 0.000000 1065 +deriv 1 13 4.382027 4.382027 1145 +directli 0 13 4.382027 0.000000 1141 +arbitrari 1 11 4.553877 4.553877 1359 +valid 0 11 4.553877 0.000000 1299 +classif 1 9 4.753590 4.753590 1586 +classifi 0 9 4.753590 0.000000 1537 +equival 0 9 4.753590 0.000000 1496 +extract 1 8 4.875197 4.875197 1728 +formul 1 8 4.875197 4.875197 1733 +invari 0 8 4.875197 0.000000 1748 +furthermor 0 6 5.164786 0.000000 2141 +snake 1 5 5.347108 5.347108 2281 +yield 1 5 5.347108 5.347108 2458 +chin 0 5 5.347108 0.000000 2408 +stabl 0 5 5.347108 0.000000 2309 +markov 0 5 5.347108 0.000000 2280 +contour 2 4 5.568345 11.136690 2812 +subsequ 0 4 5.568345 0.000000 2665 +bayesian 0 4 5.568345 0.000000 2671 +rigor 0 4 5.568345 0.000000 3030 +energi 1 3 5.857933 5.857933 3950 +implicitli 0 3 5.857933 0.000000 3620 +hough 0 3 5.857933 0.000000 3527 +influenc 0 3 5.857933 0.000000 3349 +deform 1 2 6.263398 6.263398 6065 +criterion 0 2 6.263398 0.000000 5885 +pearson 0 2 6.263398 0.000000 5245 +summat 0 2 6.263398 0.000000 5325 +peak 0 2 6.263398 0.000000 5553 +confirm 0 2 6.263398 0.000000 4101 +noisi 1 1 6.957497 6.957497 17216 +fung 0 1 6.957497 0.000000 17217 +roland 0 1 6.957497 0.000000 17218 +ofact 0 1 6.957497 0.000000 17219 +minimax 0 1 6.957497 0.000000 17220 +wherebi 0 1 6.957497 0.000000 17221 +anduniqu 0 1 6.957497 0.000000 17222 +priordistribut 0 1 6.957497 0.000000 17223 +exert 0 1 6.957497 0.000000 17224 +posterior 0 1 6.957497 0.000000 17225 +withpattern 0 1 6.957497 0.000000 17226 +nearman 0 1 6.957497 0.000000 17227 +lemma 0 1 6.957497 0.000000 17228 +classificationtest 0 1 6.957497 0.000000 17229 +margin 0 1 6.957497 0.000000 17230 +gsnake 0 1 6.957497 0.000000 17231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..54e8ee77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +link 0 247 1.386294 0.000000 24 +class 1 199 1.609438 1.609438 37 +data 3 170 1.791759 5.375277 49 +base 2 165 1.791759 3.583518 50 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +algorithm 0 162 1.791759 0.000000 57 +object 3 138 1.945910 5.837730 79 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +construct 0 139 1.945910 0.000000 82 +number 1 130 2.079442 2.079442 97 +provid 0 121 2.079442 0.000000 94 +mathemat 1 108 2.197225 2.197225 123 +place 0 106 2.197225 0.000000 124 +specif 0 106 2.197225 0.000000 106 +take 0 97 2.302585 0.000000 134 +user 0 104 2.302585 0.000000 137 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +follow 0 92 2.397895 0.000000 143 +real 0 93 2.397895 0.000000 144 +commun 0 95 2.397895 0.000000 157 +contain 1 81 2.484907 2.484907 174 +control 1 82 2.484907 2.484907 164 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +complet 0 77 2.564949 0.000000 208 +exampl 0 77 2.564949 0.000000 195 +interfac 0 79 2.564949 0.000000 209 +orient 0 80 2.564949 0.000000 205 +order 1 69 2.708050 2.708050 249 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +guid 0 63 2.772589 0.000000 267 +experi 0 64 2.772589 0.000000 283 +creat 0 63 2.772589 0.000000 277 +complex 0 64 2.772589 0.000000 269 +type 1 61 2.833213 2.833213 296 +special 0 56 2.890372 0.000000 320 +scientif 1 53 2.944439 2.944439 341 +allow 0 53 2.944439 0.000000 333 +particular 1 51 2.995732 2.995732 352 +visual 2 48 3.044522 6.089044 372 +principl 0 48 3.044522 0.000000 357 +possibl 0 47 3.091042 0.000000 378 +natur 0 44 3.135494 0.000000 406 +anoth 0 45 3.135494 0.000000 408 +show 1 43 3.178054 3.178054 417 +howev 0 41 3.218876 0.000000 422 +map 1 39 3.258097 3.258097 452 +brian 0 38 3.295837 0.000000 466 +paul 0 38 3.295837 0.000000 471 +prototyp 0 38 3.295837 0.000000 463 +close 0 38 3.295837 0.000000 465 +purpos 1 37 3.332205 3.332205 481 +tree 0 36 3.367296 0.000000 492 +approxim 1 35 3.401197 3.401197 509 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +express 0 32 3.465736 0.000000 540 +scientist 1 31 3.496508 3.496508 560 +anim 0 31 3.496508 0.000000 557 +specifi 1 30 3.555348 3.555348 568 +domain 0 30 3.555348 0.000000 564 +graph 0 30 3.555348 0.000000 576 +built 0 29 3.583519 0.000000 592 +arrai 1 27 3.637586 3.637586 627 +quit 0 27 3.637586 0.000000 633 +repres 0 26 3.688879 0.000000 656 +altern 0 26 3.688879 0.000000 641 +fundament 0 25 3.737670 0.000000 661 +frame 0 24 3.761200 0.000000 684 +interpret 0 24 3.761200 0.000000 686 +seri 0 24 3.761200 0.000000 708 +flow 0 24 3.761200 0.000000 700 +displai 3 23 3.806662 11.419986 712 +variabl 1 23 3.806662 3.806662 715 +sequenc 0 23 3.806662 0.000000 734 +size 0 23 3.806662 0.000000 713 +defin 2 22 3.850148 7.700296 746 +color 1 22 3.850148 3.850148 762 +thu 1 21 3.912023 3.912023 773 +fact 1 21 3.912023 3.912023 780 +assum 0 19 4.007333 0.000000 845 +appropri 1 18 4.060443 4.060443 883 +along 1 18 4.060443 4.060443 878 +render 0 17 4.110874 0.000000 947 +condit 1 16 4.174387 4.174387 975 +upon 0 16 4.174387 0.000000 978 +alreadi 0 16 4.174387 0.000000 963 +precis 1 15 4.248495 4.248495 1023 +finit 1 14 4.317488 4.317488 1106 +chuck 0 14 4.317488 0.000000 1108 +context 0 13 4.382027 0.000000 1153 +recurs 0 13 4.382027 0.000000 1127 +amount 1 12 4.465908 4.465908 1208 +primit 1 11 4.553877 4.553877 1317 +bill 0 11 4.553877 0.000000 1297 +sens 0 11 4.553877 0.000000 1305 +volum 0 11 4.553877 0.000000 1347 +relationship 0 10 4.653960 0.000000 1383 +dyer 0 9 4.753590 0.000000 1573 +assumpt 0 9 4.753590 0.000000 1514 +ideal 1 8 4.875197 4.875197 1630 +satisfi 1 8 4.875197 4.875197 1694 +therefor 0 7 5.010635 0.000000 1822 +fromth 0 7 5.010635 0.000000 1802 +pipelin 0 7 5.010635 0.000000 1830 +analyt 0 7 5.010635 0.000000 1913 +consequ 0 6 5.164786 0.000000 1989 +tupl 0 5 5.347108 0.000000 2244 +steer 0 5 5.347108 0.000000 2328 +infinit 1 4 5.568345 5.568345 2596 +pixel 1 4 5.568345 5.568345 2831 +wherea 0 4 5.568345 0.000000 2597 +encod 0 4 5.568345 0.000000 2929 +rigor 0 4 5.568345 0.000000 3030 +fora 0 4 5.568345 0.000000 2697 +lattic 2 3 5.857933 11.715866 3721 +interfacefor 0 3 5.857933 0.000000 3534 +scalar 1 2 6.263398 6.263398 4815 +temperatur 1 2 6.263398 6.263398 5985 +ofdata 1 2 6.263398 6.263398 6038 +hibbard 0 2 6.263398 0.000000 6066 +theidea 0 2 6.263398 0.000000 5428 +themathemat 0 2 6.263398 0.000000 4421 +isomorph 0 2 6.263398 0.000000 5976 +scientificdata 0 2 6.263398 0.000000 6067 +radianc 0 2 6.263398 0.000000 6068 +ofcours 0 2 6.263398 0.000000 4064 +axi 0 2 6.263398 0.000000 6069 +remark 0 2 6.263398 0.000000 4124 +wedo 0 2 6.263398 0.000000 5772 +datatyp 0 2 6.263398 0.000000 4129 +expressivenesscondit 1 1 6.957497 6.957497 17232 +voxel 1 1 6.957497 6.957497 17233 +calleda 0 1 6.957497 0.000000 17234 +adha 0 1 6.957497 0.000000 17235 +objectsrepres 0 1 6.957497 0.000000 17236 +objectsfrequ 0 1 6.957497 0.000000 17237 +functionswith 0 1 6.957497 0.000000 17238 +containfinit 0 1 6.957497 0.000000 17239 +chosenfrom 0 1 6.957497 0.000000 17240 +palett 0 1 6.957497 0.000000 17241 +numbersof 0 1 6.957497 0.000000 17242 +computationalmodel 0 1 6.957497 0.000000 17243 +informationcont 0 1 6.957497 0.000000 17244 +thatdisplai 0 1 6.957497 0.000000 17245 +onlythos 0 1 6.957497 0.000000 17246 +itimpl 0 1 6.957497 0.000000 17247 +satisfyingth 0 1 6.957497 0.000000 17248 +expressivenss 0 1 6.957497 0.000000 17249 +onhow 0 1 6.957497 0.000000 17250 +wecan 0 1 6.957497 0.000000 17251 +howprecis 0 1 6.957497 0.000000 17252 +voxelresolut 0 1 6.957497 0.000000 17253 +visualizationprocess 0 1 6.957497 0.000000 17254 +objectsto 0 1 6.957497 0.000000 17255 +theexpress 0 1 6.957497 0.000000 17256 +primitivevari 0 1 6.957497 0.000000 17257 +latitud 0 1 6.957497 0.000000 17258 +constructor 0 1 6.957497 0.000000 17259 +appropriatefor 0 1 6.957497 0.000000 17260 +containsth 0 1 6.957497 0.000000 17261 +canalso 0 1 6.957497 0.000000 17262 +displayi 0 1 6.957497 0.000000 17263 +graphicsprimit 0 1 6.957497 0.000000 17264 +locationand 0 1 6.957497 0.000000 17265 +animationsequ 0 1 6.957497 0.000000 17266 +thedisplai 0 1 6.957497 0.000000 17267 +isnatur 0 1 6.957497 0.000000 17268 +andtemperatur 0 1 6.957497 0.000000 17269 +calledvi 0 1 6.957497 0.000000 17270 +adthat 0 1 6.957497 0.000000 17271 +theircomput 0 1 6.957497 0.000000 17272 +theirprogram 0 1 6.957497 0.000000 17273 +thevi 0 1 6.957497 0.000000 17274 +vvof 0 1 6.957497 0.000000 17275 +thatsatisfi 0 1 6.957497 0.000000 17276 +implementationi 0 1 6.957497 0.000000 17277 +auser 0 1 6.957497 0.000000 17278 +abstractionof 0 1 6.957497 0.000000 17279 +ofmap 0 1 6.957497 0.000000 17280 +defineddata 0 1 6.957497 0.000000 17281 +ingener 0 1 6.957497 0.000000 17282 +usualapproach 0 1 6.957497 0.000000 17283 +bywrit 0 1 6.957497 0.000000 17284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..a9abb8d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +engin 0 297 1.098612 0.000000 20 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +object 0 138 1.945910 0.000000 79 +support 0 132 1.945910 0.000000 83 +high 1 130 2.079442 2.079442 101 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +world 0 115 2.197225 0.000000 126 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +larg 0 82 2.484907 0.000000 168 +resourc 0 81 2.484907 0.000000 172 +environ 0 84 2.484907 0.000000 177 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +help 0 83 2.484907 0.000000 175 +goal 0 66 2.708050 0.000000 250 +polici 0 64 2.772589 0.000000 279 +collect 0 65 2.772589 0.000000 268 +guid 0 63 2.772589 0.000000 267 +scientist 0 31 3.496508 0.000000 560 +team 0 27 3.637586 0.000000 625 +challeng 0 26 3.688879 0.000000 653 +enabl 0 26 3.688879 0.000000 655 +increas 0 20 3.951244 0.000000 829 +edulast 0 17 4.110874 0.000000 927 +admin 0 9 4.753590 0.000000 1476 +pool 0 6 5.164786 0.000000 2225 +condor 2 5 5.347108 10.694216 2577 +own 0 5 5.347108 0.000000 2531 +throughput 1 4 5.568345 5.568345 2993 +deploi 0 3 5.857933 0.000000 3750 +evaluatemechan 0 1 6.957497 0.000000 17285 +technologicaland 0 1 6.957497 0.000000 17286 +sociolog 0 1 6.957497 0.000000 17287 +suggestionscondor 0 1 6.957497 0.000000 17288 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..2024b75e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +peopl 0 96 2.302585 0.000000 132 +homepag 0 93 2.397895 0.000000 148 +septemb 0 65 2.772589 0.000000 274 +next 1 34 3.401197 3.401197 517 +miron 0 14 4.317488 0.000000 1110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..e67f8e70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 0 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +includ 2 208 1.609438 3.218876 42 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +file 1 132 1.945910 1.945910 70 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +provid 1 121 2.079442 2.079442 94 +databas 1 122 2.079442 2.079442 86 +high 0 130 2.079442 0.000000 101 +compil 0 122 2.079442 0.000000 96 +machin 0 129 2.079442 0.000000 95 +report 0 131 2.079442 0.000000 92 +version 1 113 2.197225 2.197225 122 +structur 0 106 2.197225 0.000000 105 +manag 0 114 2.197225 0.000000 125 +code 0 108 2.197225 0.000000 116 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +octob 1 89 2.397895 2.397895 156 +sinc 0 90 2.397895 0.000000 159 +select 0 91 2.397895 0.000000 154 +question 0 91 2.397895 0.000000 141 +comment 0 93 2.397895 0.000000 146 +contain 1 81 2.484907 2.484907 174 +wide 1 84 2.484907 2.484907 185 +member 0 84 2.484907 0.000000 165 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +environ 0 84 2.484907 0.000000 177 +interfac 1 79 2.564949 2.564949 209 +optim 0 79 2.564949 0.000000 197 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +sourc 0 77 2.564949 0.000000 201 +effici 0 73 2.639057 0.000000 233 +addit 0 74 2.639057 0.000000 228 +degre 0 69 2.708050 0.000000 259 +main 0 67 2.708050 0.000000 256 +evalu 1 64 2.772589 2.772589 266 +complex 0 64 2.772589 0.000000 269 +organ 0 65 2.772589 0.000000 265 +collect 0 65 2.772589 0.000000 268 +interact 0 62 2.772589 0.000000 270 +guid 0 63 2.772589 0.000000 267 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +variou 0 56 2.890372 0.000000 317 +sever 0 56 2.890372 0.000000 322 +overview 0 56 2.890372 0.000000 323 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +instruct 0 53 2.944439 0.000000 332 +investig 0 51 2.995732 0.000000 353 +made 0 44 3.135494 0.000000 398 +term 0 43 3.178054 0.000000 411 +combin 1 42 3.218876 3.218876 421 +announc 1 40 3.258097 3.258097 441 +programm 0 39 3.258097 0.000000 445 +transact 0 39 3.258097 0.000000 438 +submit 0 39 3.258097 0.000000 440 +manual 0 35 3.401197 0.000000 504 +queri 1 33 3.433987 3.433987 524 +extend 0 32 3.465736 0.000000 539 +rang 1 30 3.555348 3.555348 565 +domain 0 30 3.555348 0.000000 564 +releas 1 28 3.610918 3.610918 616 +linux 1 27 3.637586 3.637586 631 +rule 0 26 3.688879 0.000000 638 +enhanc 0 26 3.688879 0.000000 644 +relev 0 26 3.688879 0.000000 637 +comp 0 26 3.688879 0.000000 650 +strategi 1 25 3.737670 3.737670 682 +seri 0 24 3.761200 0.000000 708 +variabl 0 23 3.806662 0.000000 715 +instal 1 22 3.850148 3.850148 754 +disk 1 22 3.850148 3.850148 747 +among 0 21 3.912023 0.000000 781 +newsgroup 0 21 3.912023 0.000000 783 +binari 2 20 3.951244 7.902488 823 +edulast 0 17 4.110874 0.000000 927 +choos 0 16 4.174387 0.000000 964 +permit 0 16 4.174387 0.000000 962 +choic 0 16 4.174387 0.000000 979 +atth 0 15 4.248495 0.000000 1019 +indic 0 15 4.248495 0.000000 1013 +forth 0 13 4.382027 0.000000 1186 +misc 0 13 4.382027 0.000000 1124 +solari 1 12 4.465908 4.465908 1238 +robust 0 12 4.465908 0.000000 1271 +deduct 0 12 4.465908 0.000000 1236 +stai 0 12 4.465908 0.000000 1215 +primit 0 11 4.553877 0.000000 1317 +modul 1 10 4.653960 4.653960 1434 +resid 1 10 4.653960 4.653960 1461 +underli 0 10 4.653960 0.000000 1410 +rich 0 10 4.653960 0.000000 1396 +declar 1 9 4.753590 4.753590 1526 +desir 0 9 4.753590 0.000000 1542 +readm 0 8 4.875197 0.000000 1699 +canb 0 7 5.010635 0.000000 1846 +aggreg 0 6 5.164786 0.000000 2219 +coral 3 5 5.347108 16.041324 2538 +augment 0 5 5.347108 0.000000 2350 +tupl 0 5 5.347108 0.000000 2244 +quantifi 0 5 5.347108 0.000000 2525 +lang 0 5 5.347108 0.000000 2294 +imper 1 4 5.568345 5.568345 3067 +delet 0 4 5.568345 0.000000 2691 +suno 0 4 5.568345 0.000000 2790 +claus 0 3 5.857933 0.000000 3733 +hpux 0 3 5.857933 0.000000 3780 +grab 1 2 6.263398 6.263398 5723 +objectiveoverviewreleas 0 2 6.263398 0.000000 6070 +informationse 0 2 6.263398 0.000000 6071 +horn 0 2 6.263398 0.000000 6072 +negat 0 2 6.263398 0.000000 6073 +andautomat 0 2 6.263398 0.000000 5413 +reciev 0 2 6.263398 0.000000 5600 +nobin 1 1 6.957497 6.957497 17289 +projectcor 0 1 6.957497 0.000000 17290 +projectdocu 0 1 6.957497 0.000000 17291 +coralpeopl 0 1 6.957497 0.000000 17292 +coraloth 0 1 6.957497 0.000000 17293 +madisonobject 0 1 6.957497 0.000000 17294 +efficientdeduct 0 1 6.957497 0.000000 17295 +coralsystem 0 1 6.957497 0.000000 17296 +durationof 0 1 6.957497 0.000000 17297 +declaritiveand 0 1 6.957497 0.000000 17298 +supportsgener 0 1 6.957497 0.000000 17299 +coralimplement 0 1 6.957497 0.000000 17300 +modulein 0 1 6.957497 0.000000 17301 +insertand 0 1 6.957497 0.000000 17302 +canprogram 0 1 6.957497 0.000000 17303 +withcor 0 1 6.957497 0.000000 17304 +allowingc 0 1 6.957497 0.000000 17305 +coralimplemen 0 1 6.957497 0.000000 17306 +theexodusstorag 0 1 6.957497 0.000000 17307 +manang 0 1 6.957497 0.000000 17308 +aclient 0 1 6.957497 0.000000 17309 +requiringy 0 1 6.957497 0.000000 17310 +announcemnt 0 1 6.957497 0.000000 17311 +listwhich 0 1 6.957497 0.000000 17312 +shawn 0 1 6.957497 0.000000 17313 +flisakowski 0 1 6.957497 0.000000 17314 +flisakow 0 1 6.957497 0.000000 17315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..fb954150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +applic 2 170 1.791759 3.583518 56 +algorithm 2 162 1.791759 3.583518 57 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +contact 0 153 1.791759 0.000000 59 +data 0 170 1.791759 0.000000 49 +problem 2 147 1.945910 3.891820 75 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +document 1 121 2.079442 2.079442 89 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +mathemat 0 108 2.197225 0.000000 123 +access 1 102 2.302585 2.302585 136 +techniqu 0 99 2.302585 0.000000 138 +search 1 95 2.397895 2.397895 155 +center 0 88 2.397895 0.000000 158 +commun 0 95 2.397895 0.000000 157 +mani 0 92 2.397895 0.000000 150 +octob 0 89 2.397895 0.000000 156 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +librari 0 87 2.484907 0.000000 181 +requir 0 81 2.484907 0.000000 167 +method 1 80 2.564949 2.564949 213 +optim 0 79 2.564949 0.000000 197 +sourc 0 77 2.564949 0.000000 201 +interfac 0 79 2.564949 0.000000 209 +appli 1 71 2.639057 2.639057 226 +nation 1 74 2.639057 2.639057 240 +solv 0 73 2.639057 0.000000 234 +function 1 62 2.772589 2.772589 275 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +evalu 0 64 2.772589 0.000000 266 +laboratori 0 63 2.772589 0.000000 292 +content 0 59 2.833213 0.000000 302 +point 1 58 2.890372 2.890372 319 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +major 0 56 2.890372 0.000000 315 +overview 0 56 2.890372 0.000000 323 +three 0 54 2.944439 0.000000 330 +allow 0 53 2.944439 0.000000 333 +extens 0 53 2.944439 0.000000 340 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +give 1 50 3.044522 3.044522 359 +basic 0 50 3.044522 0.000000 360 +pointer 0 48 3.044522 0.000000 368 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +possibl 0 47 3.091042 0.000000 378 +directori 1 45 3.135494 3.135494 396 +describ 0 45 3.135494 0.000000 400 +keep 0 44 3.135494 0.000000 409 +linear 1 41 3.218876 3.218876 431 +small 0 39 3.258097 0.000000 447 +form 0 39 3.258097 0.000000 443 +origin 0 38 3.295837 0.000000 472 +download 0 36 3.367296 0.000000 489 +approxim 1 35 3.401197 3.401197 509 +michael 1 35 3.401197 3.401197 514 +survei 0 35 3.401197 0.000000 513 +everi 0 34 3.401197 0.000000 519 +within 0 33 3.433987 0.000000 525 +given 1 32 3.465736 3.465736 538 +taken 0 31 3.496508 0.000000 555 +exist 0 30 3.555348 0.000000 569 +option 0 30 3.555348 0.000000 575 +steve 0 29 3.583519 0.000000 594 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +determin 0 27 3.637586 0.000000 630 +relev 1 26 3.688879 3.688879 637 +consist 1 26 3.688879 3.688879 651 +subject 0 26 3.688879 0.000000 647 +compar 0 26 3.688879 0.000000 648 +strategi 1 25 3.737670 3.737670 682 +known 0 24 3.761200 0.000000 702 +equat 1 23 3.806662 3.806662 724 +sequenc 1 23 3.806662 3.806662 734 +serv 0 22 3.850148 0.000000 758 +almost 0 22 3.850148 0.000000 742 +path 1 21 3.912023 3.912023 778 +similar 0 21 3.912023 0.000000 771 +avoid 0 21 3.912023 0.000000 799 +entir 0 20 3.951244 0.000000 811 +along 0 18 4.060443 0.000000 878 +spars 0 16 4.174387 0.000000 989 +matlab 1 14 4.317488 4.317488 1081 +role 0 14 4.317488 0.000000 1101 +nonlinear 0 14 4.317488 0.000000 1107 +easili 0 14 4.317488 0.000000 1077 +econom 1 13 4.382027 4.382027 1184 +cannot 1 13 4.382027 4.382027 1144 +step 1 13 4.382027 4.382027 1138 +directli 0 13 4.382027 0.000000 1141 +forth 0 13 4.382027 0.000000 1186 +deriv 0 13 4.382027 0.000000 1145 +emploi 1 12 4.465908 4.465908 1284 +iter 1 12 4.465908 4.465908 1206 +evolv 0 12 4.465908 0.000000 1223 +regard 0 11 4.553877 0.000000 1309 +underli 0 10 4.653960 0.000000 1410 +establish 0 9 4.753590 0.000000 1532 +routin 0 9 4.753590 0.000000 1549 +mile 1 8 4.875197 4.875197 1743 +ferri 1 8 4.875197 4.875197 1715 +formul 0 8 4.875197 0.000000 1733 +solver 2 7 5.010635 10.021270 1911 +newton 2 7 5.010635 10.021270 1824 +smooth 1 7 5.010635 5.010635 1855 +secondari 0 7 5.010635 0.000000 1884 +converg 0 7 5.010635 0.000000 1844 +zero 0 7 5.010635 0.000000 1896 +divers 0 6 5.164786 0.000000 2232 +mix 0 6 5.164786 0.000000 2200 +freeli 0 6 5.164786 0.000000 2014 +subsystem 0 6 5.164786 0.000000 2015 +interior 1 5 5.347108 5.347108 2439 +decad 0 5 5.347108 0.000000 2455 +complementari 0 5 5.347108 0.000000 2523 +pivot 0 5 5.347108 0.000000 2426 +merit 0 5 5.347108 0.000000 2466 +argonn 0 5 5.347108 0.000000 2461 +monograph 0 4 5.568345 0.000000 2860 +areavail 0 4 5.568345 0.000000 2810 +colorado 0 4 5.568345 0.000000 2938 +algorithmsand 0 4 5.568345 0.000000 2680 +trick 0 4 5.568345 0.000000 2967 +complementar 2 3 5.857933 11.715866 3999 +toolbox 1 3 5.857933 5.857933 3112 +neta 0 3 5.857933 0.000000 3789 +forthes 0 3 5.857933 0.000000 3199 +andm 0 3 5.857933 0.000000 3901 +violat 0 3 5.857933 0.000000 3211 +engineeringand 0 3 5.857933 0.000000 3779 +preprocessor 0 3 5.857933 0.000000 3844 +energi 0 3 5.857933 0.000000 3950 +gam 2 2 6.263398 12.526796 4758 +edufor 0 2 6.263398 0.000000 5831 +lemk 0 2 6.263398 0.000000 5693 +similarto 0 2 6.263398 0.000000 6074 +anapproxim 0 2 6.263398 0.000000 5045 +norm 0 2 6.263398 0.000000 5643 +perturb 0 2 6.263398 0.000000 6075 +leadto 0 2 6.263398 0.000000 5350 +residu 0 2 6.263398 0.000000 4741 +thecurr 0 2 6.263398 0.000000 5862 +equilibrium 0 2 6.263398 0.000000 4259 +thegam 0 2 6.263398 0.000000 5430 +pointmethod 0 2 6.263398 0.000000 4835 +nonsmooth 1 1 6.957497 6.957497 17316 +mcplib 1 1 6.957497 6.957497 17317 +functionevalu 1 1 6.957497 6.957497 17318 +subproblem 1 1 6.957497 6.957497 17319 +uponreformul 1 1 6.957497 6.957497 17320 +fruitfuldisciplin 0 1 6.957497 0.000000 17321 +incomplementar 0 1 6.957497 0.000000 17322 +meetingsof 0 1 6.957497 0.000000 17323 +forcomplementar 0 1 6.957497 0.000000 17324 +researcherssoftwar 0 1 6.957497 0.000000 17325 +problemdescript 0 1 6.957497 0.000000 17326 +frommatlab 0 1 6.957497 0.000000 17327 +jacobian 0 1 6.957497 0.000000 17328 +specificvers 0 1 6.957497 0.000000 17329 +hook 0 1 6.957497 0.000000 17330 +rutherford 0 1 6.957497 0.000000 17331 +classicaljosephi 0 1 6.957497 0.000000 17332 +linearizedsubproblem 0 1 6.957497 0.000000 17333 +defineth 0 1 6.957497 0.000000 17334 +dampedlinesearch 0 1 6.957497 0.000000 17335 +infeas 0 1 6.957497 0.000000 17336 +restartprocedur 0 1 6.957497 0.000000 17337 +totermin 0 1 6.957497 0.000000 17338 +rescal 0 1 6.957497 0.000000 17339 +equilibr 0 1 6.957497 0.000000 17340 +elementsappear 0 1 6.957497 0.000000 17341 +mcpor 0 1 6.957497 0.000000 17342 +anonsmooth 0 1 6.957497 0.000000 17343 +reformul 0 1 6.957497 0.000000 17344 +algorithmconsist 0 1 6.957497 0.000000 17345 +pathto 0 1 6.957497 0.000000 17346 +aposs 0 1 6.957497 0.000000 17347 +thepath 0 1 6.957497 0.000000 17348 +partiallycomput 0 1 6.957497 0.000000 17349 +relinear 0 1 6.957497 0.000000 17350 +anonmonoton 0 1 6.957497 0.000000 17351 +watchdog 0 1 6.957497 0.000000 17352 +minima 0 1 6.957497 0.000000 17353 +robustnessimprov 0 1 6.957497 0.000000 17354 +proxim 0 1 6.957497 0.000000 17355 +qpcomp 0 1 6.957497 0.000000 17356 +ishandl 0 1 6.957497 0.000000 17357 +thenapproxim 0 1 6.957497 0.000000 17358 +theaccuraci 0 1 6.957497 0.000000 17359 +mpsge 0 1 6.957497 0.000000 17360 +thatallow 0 1 6.957497 0.000000 17361 +nemsth 0 1 6.957497 0.000000 17362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..12ffa2c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +avail 0 169 1.791759 0.000000 48 +contact 0 153 1.791759 0.000000 59 +object 0 138 1.945910 0.000000 79 +construct 0 139 1.945910 0.000000 82 +support 0 132 1.945910 0.000000 83 +relat 0 139 1.945910 0.000000 68 +databas 0 122 2.079442 0.000000 86 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +manag 1 114 2.197225 2.197225 125 +user 1 104 2.302585 2.302585 137 +need 0 98 2.302585 0.000000 135 +requir 0 81 2.484907 0.000000 167 +orient 0 80 2.564949 0.000000 205 +april 0 77 2.564949 0.000000 196 +david 0 71 2.639057 0.000000 232 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +still 0 50 3.044522 0.000000 362 +michael 0 35 3.401197 0.000000 514 +storag 1 31 3.496508 3.496508 553 +mike 0 24 3.761200 0.000000 703 +prepar 0 20 3.951244 0.000000 824 +benchmark 1 19 4.007333 4.007333 859 +minim 0 18 4.060443 0.000000 887 +carei 0 8 4.875197 0.000000 1781 +licens 0 5 5.347108 0.000000 2520 +exodu 1 4 5.568345 5.568345 3075 +zwill 1 4 5.568345 5.568345 3076 +successor 0 3 5.857933 0.000000 3576 +theexodu 0 2 6.263398 0.000000 6076 +persistentprogram 0 2 6.263398 0.000000 5997 +pageexodu 0 1 6.957497 0.000000 17363 +toolkitnot 0 1 6.957497 0.000000 17364 +succed 0 1 6.957497 0.000000 17365 +theshor 0 1 6.957497 0.000000 17366 +eduprincip 0 1 6.957497 0.000000 17367 +dewittse 0 1 6.957497 0.000000 17368 +exodusshor 0 1 6.957497 0.000000 17369 +exoduslatest 0 1 6.957497 0.000000 17370 +compilercontribut 0 1 6.957497 0.000000 17371 +managera 0 1 6.957497 0.000000 17372 +exodus_al 0 1 6.957497 0.000000 17373 +oodbsdat 0 1 6.957497 0.000000 17374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..7df2f4da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 0 284 1.098612 0.000000 21 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +data 2 170 1.791759 3.583518 49 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +base 0 165 1.791759 0.000000 50 +wisconsin 0 169 1.791759 0.000000 54 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +area 1 144 1.945910 1.945910 80 +problem 0 147 1.945910 0.000000 75 +click 0 142 1.945910 0.000000 78 +architectur 0 139 1.945910 0.000000 77 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +document 0 121 2.079442 0.000000 89 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +manag 1 114 2.197225 2.197225 125 +assist 0 112 2.197225 0.000000 113 +version 0 113 2.197225 0.000000 122 +peopl 0 96 2.302585 0.000000 132 +advanc 0 99 2.302585 0.000000 130 +user 0 104 2.302585 0.000000 137 +access 0 102 2.302585 0.000000 136 +graphic 1 90 2.397895 2.397895 147 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +help 0 83 2.484907 0.000000 175 +west 0 83 2.484907 0.000000 192 +server 1 76 2.564949 2.564949 204 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +optim 0 79 2.564949 0.000000 197 +orient 0 80 2.564949 0.000000 205 +exampl 0 77 2.564949 0.000000 195 +come 0 78 2.564949 0.000000 202 +addit 0 74 2.639057 0.000000 228 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +david 0 71 2.639057 0.000000 232 +order 0 69 2.708050 0.000000 249 +creat 1 63 2.772589 2.772589 277 +result 1 65 2.772589 2.772589 281 +complex 0 64 2.772589 0.000000 269 +prof 0 64 2.772589 0.000000 273 +type 1 61 2.833213 2.833213 296 +content 0 59 2.833213 0.000000 302 +back 0 60 2.833213 0.000000 297 +point 0 58 2.890372 0.000000 319 +sever 0 56 2.890372 0.000000 322 +sampl 1 53 2.944439 2.944439 339 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +tabl 0 51 2.995732 0.000000 346 +set 1 50 3.044522 3.044522 361 +could 0 46 3.091042 0.000000 383 +execut 1 45 3.135494 3.135494 404 +video 0 44 3.135494 0.000000 405 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +streetmadison 0 38 3.295837 0.000000 474 +connect 0 37 3.332205 0.000000 485 +especi 0 36 3.367296 0.000000 496 +either 1 35 3.401197 3.401197 506 +queri 2 33 3.433987 6.867974 524 +extend 0 32 3.465736 0.000000 539 +ad 0 32 3.465736 0.000000 544 +built 0 29 3.583519 0.000000 592 +hope 0 28 3.610918 0.000000 610 +manipul 1 27 3.637586 3.637586 624 +client 1 25 3.737670 3.737670 679 +store 1 24 3.761200 3.761200 693 +scalabl 0 24 3.761200 0.000000 705 +handl 0 24 3.761200 0.000000 685 +displai 1 23 3.806662 3.806662 712 +brows 1 23 3.806662 3.806662 726 +size 0 23 3.806662 0.000000 713 +thread 0 23 3.806662 0.000000 722 +defin 0 22 3.850148 0.000000 746 +mpeg 0 20 3.951244 0.000000 831 +benchmark 0 19 4.007333 0.000000 859 +layer 1 17 4.110874 4.110874 926 +spatial 1 16 4.174387 4.174387 988 +massiv 0 15 4.248495 0.000000 1026 +indic 0 15 4.248495 0.000000 1013 +attribut 1 14 4.317488 4.317488 1092 +front 1 13 4.382027 4.382027 1154 +script 0 13 4.382027 0.000000 1171 +menu 0 13 4.382027 0.000000 1156 +composit 0 13 4.382027 0.000000 1150 +context 0 13 4.382027 0.000000 1153 +calcul 0 12 4.465908 0.000000 1268 +emploi 0 12 4.465908 0.000000 1284 +shore 1 11 4.553877 4.553877 1377 +string 0 11 4.553877 0.000000 1340 +persist 0 11 4.553877 0.000000 1367 +abil 0 11 4.553877 0.000000 1341 +subset 1 10 4.653960 4.653960 1425 +vldb 0 10 4.653960 0.000000 1470 +underli 0 10 4.653960 0.000000 1410 +correspond 0 10 4.653960 0.000000 1382 +custom 0 10 4.653960 0.000000 1414 +label 0 10 4.653960 0.000000 1423 +compos 0 9 4.753590 0.000000 1527 +paradis 3 8 4.875197 14.625591 1782 +polygon 1 8 4.875197 4.875197 1723 +databasesystem 0 8 4.875197 0.000000 1617 +sensit 0 8 4.875197 0.000000 1726 +insert 0 8 4.875197 0.000000 1687 +successfulli 0 7 5.010635 0.000000 1869 +geograph 1 6 5.164786 5.164786 2236 +drop 1 6 5.164786 5.164786 2008 +band 0 6 5.164786 0.000000 2198 +invok 0 6 5.164786 0.000000 2079 +syntax 0 6 5.164786 0.000000 2030 +ship 1 5 5.347108 5.347108 2534 +aim 0 5 5.347108 0.000000 2477 +tupl 0 5 5.347108 0.000000 2244 +madisoncomput 0 5 5.347108 0.000000 2391 +andevalu 0 4 5.568345 0.000000 2706 +zoom 0 4 5.568345 0.000000 2961 +insur 0 4 5.568345 0.000000 2939 +providesa 0 3 5.857933 0.000000 3884 +informationse 0 2 6.263398 0.000000 6071 +serverobject 0 2 6.263398 0.000000 6077 +raster 0 2 6.263398 0.000000 6078 +polylin 0 2 6.263398 0.000000 6079 +sketch 0 2 6.263398 0.000000 5946 +extent 0 2 6.263398 0.000000 6080 +paid 0 2 6.263398 0.000000 6081 +biswadeep 0 2 6.263398 0.000000 4805 +projectparadis 0 1 6.957497 0.000000 17375 +frontend 0 1 6.957497 0.000000 17376 +sequoia 0 1 6.957497 0.000000 17377 +iscap 0 1 6.957497 0.000000 17378 +applyingobject 0 1 6.957497 0.000000 17379 +ofstor 0 1 6.957497 0.000000 17380 +tosignificantli 0 1 6.957497 0.000000 17381 +thatcan 0 1 6.957497 0.000000 17382 +andsupport 0 1 6.957497 0.000000 17383 +paradiseprovid 0 1 6.957497 0.000000 17384 +gisappl 0 1 6.957497 0.000000 17385 +asinteg 0 1 6.957497 0.000000 17386 +circl 0 1 6.957497 0.000000 17387 +spatialattribut 0 1 6.957497 0.000000 17388 +foroverlap 0 1 6.957497 0.000000 17389 +selectingcolor 0 1 6.957497 0.000000 17390 +withad 0 1 6.957497 0.000000 17391 +issueimplicit 0 1 6.957497 0.000000 17392 +arubb 0 1 6.957497 0.000000 17393 +querycompos 0 1 6.957497 0.000000 17394 +databaseschema 0 1 6.957497 0.000000 17395 +beview 0 1 6.957497 0.000000 17396 +bedisplai 0 1 6.957497 0.000000 17397 +sqlwe 0 1 6.957497 0.000000 17398 +extendedset 0 1 6.957497 0.000000 17399 +byus 0 1 6.957497 0.000000 17400 +standarddatabas 0 1 6.957497 0.000000 17401 +anddrop 0 1 6.957497 0.000000 17402 +paradiseserv 0 1 6.957497 0.000000 17403 +theresult 0 1 6.957497 0.000000 17404 +ismulti 0 1 6.957497 0.000000 17405 +sameserv 0 1 6.957497 0.000000 17406 +carefulattent 0 1 6.957497 0.000000 17407 +processqueri 0 1 6.957497 0.000000 17408 +largevolum 0 1 6.957497 0.000000 17409 +frontendeurop 0 1 6.957497 0.000000 17410 +pressher 0 1 6.957497 0.000000 17411 +projectattn 0 1 6.957497 0.000000 17412 +dewittunivers 0 1 6.957497 0.000000 17413 +edumor 0 1 6.957497 0.000000 17414 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..e3b3d54a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 0 168 1.791759 0.000000 61 +support 0 132 1.945910 0.000000 83 +report 1 131 2.079442 2.079442 92 +provid 1 121 2.079442 2.079442 94 +tool 0 117 2.079442 0.000000 93 +welcom 0 122 2.079442 0.000000 99 +intern 1 108 2.197225 2.197225 128 +look 0 107 2.197225 0.000000 115 +site 0 106 2.197225 0.000000 119 +text 0 98 2.302585 0.000000 133 +commun 1 95 2.397895 2.397895 157 +comment 0 93 2.397895 0.000000 146 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +educ 0 86 2.484907 0.000000 191 +know 0 80 2.564949 0.000000 198 +servic 1 72 2.639057 2.639057 236 +onlin 0 75 2.639057 0.000000 223 +goal 0 66 2.708050 0.000000 250 +organ 0 65 2.772589 0.000000 265 +best 1 59 2.833213 2.833213 299 +locat 0 59 2.833213 0.000000 303 +sever 0 56 2.890372 0.000000 322 +suggest 1 53 2.944439 2.944439 331 +three 0 54 2.944439 0.000000 330 +week 0 52 2.995732 0.000000 343 +format 0 48 3.044522 0.000000 356 +effect 0 46 3.091042 0.000000 385 +show 0 43 3.178054 0.000000 417 +offer 0 43 3.178054 0.000000 414 +howev 0 41 3.218876 0.000000 422 +announc 0 40 3.258097 0.000000 441 +primari 0 25 3.737670 0.000000 669 +daili 0 24 3.761200 0.000000 706 +annot 0 21 3.912023 0.000000 775 +theunivers 0 21 3.912023 0.000000 797 +longer 0 20 3.951244 0.000000 816 +entir 0 20 3.951244 0.000000 811 +toolkit 0 20 3.951244 0.000000 835 +universityof 0 15 4.248495 0.000000 1061 +everyon 0 13 4.382027 0.000000 1148 +summar 0 11 4.553877 0.000000 1295 +discov 0 9 4.753590 0.000000 1562 +hundr 0 9 4.753590 0.000000 1528 +filter 0 8 4.875197 0.000000 1641 +scout 2 7 5.010635 10.021270 1903 +happen 0 7 5.010635 0.000000 1790 +valuabl 0 5 5.347108 0.000000 2256 +newli 0 3 5.857933 0.000000 3786 +useth 0 3 5.857933 0.000000 3110 +thescout 0 2 6.263398 0.000000 6082 +homepagego 0 1 6.957497 0.000000 17415 +versionnewslett 0 1 6.957497 0.000000 17416 +newand 0 1 6.957497 0.000000 17417 +toolsinternet 0 1 6.957497 0.000000 17418 +effectiveinternet 0 1 6.957497 0.000000 17419 +availablea 0 1 6.957497 0.000000 17420 +studentssurf 0 1 6.957497 0.000000 17421 +smarter 0 1 6.957497 0.000000 17422 +canchoos 0 1 6.957497 0.000000 17423 +annoucementseach 0 1 6.957497 0.000000 17424 +networktool 0 1 6.957497 0.000000 17425 +vefound 0 1 6.957497 0.000000 17426 +byeduc 0 1 6.957497 0.000000 17427 +encouragefeedback 0 1 6.957497 0.000000 17428 +ournewest 0 1 6.957497 0.000000 17429 +feedbackscout 0 1 6.957497 0.000000 17430 +servicesfor 0 1 6.957497 0.000000 17431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..725ff2ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,482 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +mail 2 238 1.386294 2.772588 22 +languag 2 227 1.386294 2.772588 26 +wisc 2 242 1.386294 2.772588 33 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +list 2 201 1.609438 3.218876 39 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +data 2 170 1.791759 3.583518 49 +applic 2 170 1.791759 3.583518 56 +develop 1 174 1.791759 1.791759 53 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +network 0 168 1.791759 0.000000 61 +parallel 0 169 1.791759 0.000000 60 +contact 0 153 1.791759 0.000000 59 +object 3 138 1.945910 5.837730 79 +file 2 132 1.945910 3.891820 70 +support 2 132 1.945910 3.891820 83 +model 1 145 1.945910 1.945910 69 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +hall 0 146 1.945910 0.000000 65 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +high 0 130 2.079442 0.000000 101 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +compil 0 122 2.079442 0.000000 96 +report 0 131 2.079442 0.000000 92 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +world 0 115 2.197225 0.000000 126 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +access 2 102 2.302585 4.605170 136 +text 1 98 2.302585 2.302585 133 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +section 0 94 2.397895 0.000000 149 +commun 0 95 2.397895 0.000000 157 +sinc 0 90 2.397895 0.000000 159 +pictur 0 89 2.397895 0.000000 160 +comment 0 93 2.397895 0.000000 146 +environ 1 84 2.484907 2.484907 177 +larg 1 82 2.484907 2.484907 168 +chang 1 82 2.484907 2.484907 163 +second 1 81 2.484907 2.484907 166 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +requir 0 81 2.484907 0.000000 167 +start 0 83 2.484907 0.000000 173 +server 1 76 2.564949 2.564949 204 +messag 1 76 2.564949 2.564949 212 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +refer 0 78 2.564949 0.000000 203 +want 0 79 2.564949 0.000000 199 +name 1 72 2.639057 2.639057 220 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +write 0 72 2.639057 0.000000 222 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +degre 0 69 2.708050 0.000000 259 +order 0 69 2.708050 0.000000 249 +august 0 66 2.708050 0.000000 257 +receiv 0 66 2.708050 0.000000 244 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +improv 0 62 2.772589 0.000000 289 +copi 0 63 2.772589 0.000000 284 +type 2 61 2.833213 5.666426 296 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +simpl 0 60 2.833213 0.000000 298 +plai 0 60 2.833213 0.000000 307 +unix 2 58 2.890372 5.780744 308 +space 1 57 2.890372 2.890372 310 +major 1 56 2.890372 2.890372 315 +overview 0 56 2.890372 0.000000 323 +detail 0 57 2.890372 0.000000 321 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +point 0 58 2.890372 0.000000 319 +processor 1 54 2.944439 2.944439 335 +three 0 54 2.944439 0.000000 330 +extens 0 53 2.944439 0.000000 340 +found 0 53 2.944439 0.000000 337 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +digit 1 52 2.995732 2.995732 348 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +date 0 51 2.995732 0.000000 344 +basic 1 50 3.044522 3.044522 360 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +give 0 50 3.044522 0.000000 359 +without 0 50 3.044522 0.000000 370 +archiv 0 49 3.044522 0.000000 364 +featur 1 46 3.091042 3.091042 386 +could 0 46 3.091042 0.000000 383 +possibl 0 47 3.091042 0.000000 378 +get 0 46 3.091042 0.000000 380 +describ 1 45 3.135494 3.135494 400 +natur 0 44 3.135494 0.000000 406 +video 0 44 3.135494 0.000000 405 +anoth 0 45 3.135494 0.000000 408 +term 1 43 3.178054 3.178054 411 +mechan 1 43 3.178054 3.178054 416 +futur 1 41 3.218876 3.218876 427 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +multipl 1 39 3.258097 3.258097 453 +submit 0 39 3.258097 0.000000 440 +must 0 40 3.258097 0.000000 442 +close 1 38 3.295837 3.295837 465 +open 0 38 3.295837 0.000000 469 +field 1 37 3.332205 3.332205 482 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +feel 0 37 3.332205 0.000000 483 +purpos 0 37 3.332205 0.000000 481 +multi 0 36 3.367296 0.000000 493 +tree 0 36 3.367296 0.000000 492 +singl 1 34 3.401197 3.401197 510 +either 1 35 3.401197 3.401197 506 +everi 0 34 3.401197 0.000000 519 +approxim 0 35 3.401197 0.000000 509 +post 0 35 3.401197 0.000000 505 +return 0 34 3.401197 0.000000 502 +concurr 0 34 3.401197 0.000000 501 +go 0 33 3.433987 0.000000 529 +product 0 33 3.433987 0.000000 527 +queri 0 33 3.433987 0.000000 524 +obtain 0 33 3.433987 0.000000 534 +ad 0 32 3.465736 0.000000 544 +kind 0 32 3.465736 0.000000 541 +photo 0 31 3.496508 0.000000 561 +someth 0 31 3.496508 0.000000 554 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +option 0 30 3.555348 0.000000 575 +built 0 29 3.583519 0.000000 592 +turn 0 29 3.583519 0.000000 586 +depend 0 29 3.583519 0.000000 583 +particip 0 29 3.583519 0.000000 589 +releas 2 28 3.610918 7.221836 616 +framework 1 28 3.610918 3.610918 606 +intend 0 28 3.610918 0.000000 599 +becom 0 28 3.610918 0.000000 603 +propos 0 28 3.610918 0.000000 602 +hope 0 28 3.610918 0.000000 610 +retriev 1 27 3.637586 3.637586 621 +mind 0 27 3.637586 0.000000 632 +manipul 0 27 3.637586 0.000000 624 +quit 0 27 3.637586 0.000000 633 +symbol 0 27 3.637586 0.000000 620 +linux 0 27 3.637586 0.000000 631 +team 0 27 3.637586 0.000000 625 +effort 1 26 3.688879 3.688879 652 +request 1 26 3.688879 3.688879 635 +rather 1 26 3.688879 3.688879 642 +enhanc 0 26 3.688879 0.000000 644 +enabl 0 26 3.688879 0.000000 655 +subject 0 26 3.688879 0.000000 647 +client 1 25 3.737670 3.737670 679 +wai 0 25 3.737670 0.000000 662 +valu 0 25 3.737670 0.000000 665 +task 0 25 3.737670 0.000000 678 +scalabl 1 24 3.761200 3.761200 705 +store 1 24 3.761200 3.761200 693 +reach 0 24 3.761200 0.000000 688 +defin 1 22 3.850148 3.850148 746 +serv 0 22 3.850148 0.000000 758 +varieti 0 22 3.850148 0.000000 740 +almost 0 22 3.850148 0.000000 742 +deal 0 22 3.850148 0.000000 736 +emphasi 0 22 3.850148 0.000000 755 +sent 0 22 3.850148 0.000000 763 +programminglanguag 1 21 3.912023 3.912023 782 +fund 0 21 3.912023 0.000000 805 +flexibl 0 21 3.912023 0.000000 792 +latest 0 21 3.912023 0.000000 785 +thu 0 21 3.912023 0.000000 773 +similar 0 21 3.912023 0.000000 771 +binari 1 20 3.951244 3.951244 823 +entir 1 20 3.951244 3.951244 811 +fine 0 20 3.951244 0.000000 822 +benchmark 1 19 4.007333 4.007333 859 +media 0 19 4.007333 0.000000 861 +definit 0 19 4.007333 0.000000 864 +separ 0 19 4.007333 0.000000 844 +concentr 1 18 4.060443 4.060443 906 +along 0 18 4.060443 0.000000 878 +regist 1 17 4.110874 4.110874 938 +weekli 1 17 4.110874 4.110874 919 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +whether 0 17 4.110874 0.000000 918 +stop 0 17 4.110874 0.000000 942 +ultim 0 17 4.110874 0.000000 943 +attempt 0 17 4.110874 0.000000 917 +sept 0 17 4.110874 0.000000 952 +anyon 0 17 4.110874 0.000000 916 +commerci 1 16 4.174387 4.174387 1005 +intel 0 16 4.174387 0.000000 1000 +alreadi 0 16 4.174387 0.000000 963 +portion 0 16 4.174387 0.000000 971 +sign 0 16 4.174387 0.000000 970 +transit 1 15 4.248495 4.248495 1046 +capabl 0 15 4.248495 0.000000 1016 +hierarch 0 15 4.248495 0.000000 1018 +hybrid 0 15 4.248495 0.000000 1057 +piec 0 15 4.248495 0.000000 1020 +stream 0 15 4.248495 0.000000 1015 +charact 0 15 4.248495 0.000000 1028 +heterogen 1 14 4.317488 4.317488 1090 +anonym 1 14 4.317488 4.317488 1100 +attribut 1 14 4.317488 4.317488 1092 +role 0 14 4.317488 0.000000 1101 +shown 0 14 4.317488 0.000000 1080 +decid 0 14 4.317488 0.000000 1075 +directli 1 13 4.382027 4.382027 1141 +nasa 0 13 4.382027 0.000000 1188 +earlier 0 13 4.382027 0.000000 1140 +cannot 0 13 4.382027 0.000000 1144 +individu 0 13 4.382027 0.000000 1126 +convert 0 13 4.382027 0.000000 1122 +uniqu 1 12 4.465908 4.465908 1228 +target 0 12 4.465908 0.000000 1282 +safe 0 12 4.465908 0.000000 1274 +solari 0 12 4.465908 0.000000 1238 +nanci 0 12 4.465908 0.000000 1256 +shore 3 11 4.553877 13.661631 1377 +persist 2 11 4.553877 9.107754 1367 +string 1 11 4.553877 4.553877 1340 +arpa 0 11 4.553877 0.000000 1369 +distinguish 0 11 4.553877 0.000000 1357 +fix 0 11 4.553877 0.000000 1327 +facilit 0 10 4.653960 0.000000 1412 +equal 0 10 4.653960 0.000000 1424 +consortium 0 10 4.653960 0.000000 1467 +length 0 10 4.653960 0.000000 1400 +subscrib 2 9 4.753590 9.507180 1541 +inter 1 9 4.753590 4.753590 1530 +rel 0 9 4.753590 0.000000 1487 +mention 0 9 4.753590 0.000000 1569 +familiar 0 9 4.753590 0.000000 1485 +contrast 0 8 4.875197 0.000000 1637 +root 0 8 4.875197 0.000000 1650 +realiz 0 8 4.875197 0.000000 1739 +cross 0 8 4.875197 0.000000 1703 +port 0 8 4.875197 0.000000 1766 +parti 0 8 4.875197 0.000000 1676 +digest 1 7 5.010635 5.010635 1864 +supportfor 0 7 5.010635 0.000000 1854 +sparc 0 7 5.010635 0.000000 1860 +henc 0 7 5.010635 0.000000 1805 +beta 1 6 5.164786 5.164786 1993 +byte 1 6 5.164786 5.164786 2108 +geograph 1 6 5.164786 5.164786 2236 +furthermor 0 6 5.164786 0.000000 2141 +conveni 0 6 5.164786 0.000000 2088 +pool 0 6 5.164786 0.000000 2225 +feasibl 0 6 5.164786 0.000000 2157 +gzip 0 6 5.164786 0.000000 2117 +moder 0 6 5.164786 0.000000 2112 +notifi 0 6 5.164786 0.000000 2106 +compat 1 5 5.347108 5.347108 2485 +eas 1 5 5.347108 5.347108 2267 +default 1 5 5.347108 5.347108 2335 +anda 0 5 5.347108 0.000000 2416 +greater 0 5 5.347108 0.000000 2258 +began 0 5 5.347108 0.000000 2498 +remain 0 5 5.347108 0.000000 2278 +peer 1 4 5.568345 5.568345 2742 +repli 1 4 5.568345 5.568345 2689 +andevalu 0 4 5.568345 0.000000 2706 +satellit 0 4 5.568345 0.000000 3077 +eventu 0 4 5.568345 0.000000 3074 +symmetr 0 4 5.568345 0.000000 2908 +exodu 0 4 5.568345 0.000000 3075 +customiz 0 4 5.568345 0.000000 2966 +simplifi 0 4 5.568345 0.000000 3066 +bulk 1 3 5.857933 5.857933 4000 +oodb 1 3 5.857933 5.857933 3954 +subscript 1 3 5.857933 5.857933 3469 +predecessor 0 3 5.857933 0.000000 3585 +briefli 0 3 5.857933 0.000000 3459 +sector 0 3 5.857933 0.000000 3766 +paragon 0 3 5.857933 0.000000 3359 +serverarchitectur 0 3 5.857933 0.000000 3736 +gigabyt 0 3 5.857933 0.000000 3548 +embodi 0 3 5.857933 0.000000 3236 +intra 0 3 5.857933 0.000000 3243 +reachabl 0 3 5.857933 0.000000 4001 +eduand 0 3 5.857933 0.000000 3452 +membership 0 3 5.857933 0.000000 3751 +oodbm 1 2 6.263398 6.263398 6083 +objectiveoverviewreleas 0 2 6.263398 0.000000 6070 +serverobject 0 2 6.263398 0.000000 6077 +provis 0 2 6.263398 0.000000 4683 +neutral 0 2 6.263398 0.000000 5760 +mount 0 2 6.263398 0.000000 5995 +eduthi 0 2 6.263398 0.000000 5382 +junk 0 2 6.263398 0.000000 5701 +mailbox 0 2 6.263398 0.000000 6084 +sender 0 2 6.263398 0.000000 5064 +shore_al 2 1 6.957497 13.914994 17432 +odmg 1 1 6.957497 6.957497 17433 +listproc 1 1 6.957497 6.957497 17434 +shore_support 1 1 6.957497 6.957497 17435 +informationsystem 1 1 6.957497 6.957497 17436 +ashor 1 1 6.957497 6.957497 17437 +vendor 1 1 6.957497 6.957497 17438 +flatten 1 1 6.957497 6.957497 17439 +legaci 1 1 6.957497 6.957497 17440 +clutter 1 1 6.957497 6.957497 17441 +pageshor 0 1 6.957497 0.000000 17442 +repositorydocu 0 1 6.957497 0.000000 17443 +informationmail 0 1 6.957497 0.000000 17444 +listsse 0 1 6.957497 0.000000 17445 +shorepeopl 0 1 6.957497 0.000000 17446 +shorelatest 0 1 6.957497 0.000000 17447 +arpaparadis 0 1 6.957497 0.000000 17448 +shoreexodu 0 1 6.957497 0.000000 17449 +shoreoo 0 1 6.957497 0.000000 17450 +oodbsshor 0 1 6.957497 0.000000 17451 +albumuw 0 1 6.957497 0.000000 17452 +widevarieti 0 1 6.957497 0.000000 17453 +cadsystem 0 1 6.957497 0.000000 17454 +usedexodusstorag 0 1 6.957497 0.000000 17455 +ofwai 0 1 6.957497 0.000000 17456 +thisinterfac 0 1 6.957497 0.000000 17457 +theunix 0 1 6.957497 0.000000 17458 +viand 0 1 6.957497 0.000000 17459 +withoutmodif 0 1 6.957497 0.000000 17460 +shoreobject 0 1 6.957497 0.000000 17461 +inheritingcharacterist 0 1 6.957497 0.000000 17462 +fromfil 0 1 6.957497 0.000000 17463 +ofshor 0 1 6.957497 0.000000 17464 +scalabilitysupport 0 1 6.957497 0.000000 17465 +heterogeneitysupport 0 1 6.957497 0.000000 17466 +applicationswhen 0 1 6.957497 0.000000 17467 +uniqueamong 0 1 6.957497 0.000000 17468 +languageheterogen 0 1 6.957497 0.000000 17469 +persistentstorag 0 1 6.957497 0.000000 17470 +basicallycompat 0 1 6.957497 0.000000 17471 +betransf 0 1 6.957497 0.000000 17472 +architectureshor 0 1 6.957497 0.000000 17473 +distributedarchitectur 0 1 6.957497 0.000000 17474 +disksattach 0 1 6.957497 0.000000 17475 +architectureus 0 1 6.957497 0.000000 17476 +typicallyus 0 1 6.957497 0.000000 17477 +notionof 0 1 6.957497 0.000000 17478 +runsin 0 1 6.957497 0.000000 17479 +forus 0 1 6.957497 0.000000 17480 +theparadis 0 1 6.957497 0.000000 17481 +seosdi 0 1 6.957497 0.000000 17482 +aimport 0 1 6.957497 0.000000 17483 +endeavor 0 1 6.957497 0.000000 17484 +certainlydepend 0 1 6.957497 0.000000 17485 +transmitobject 0 1 6.957497 0.000000 17486 +whilecurr 0 1 6.957497 0.000000 17487 +orientedtoward 0 1 6.957497 0.000000 17488 +terabyt 0 1 6.957497 0.000000 17489 +libraryar 0 1 6.957497 0.000000 17490 +heterogeneityobject 0 1 6.957497 0.000000 17491 +neutraltyp 0 1 6.957497 0.000000 17492 +databasefeatur 0 1 6.957497 0.000000 17493 +ofsupport 0 1 6.957497 0.000000 17494 +feasibleto 0 1 6.957497 0.000000 17495 +wasrec 0 1 6.957497 0.000000 17496 +onprovid 0 1 6.957497 0.000000 17497 +withina 0 1 6.957497 0.000000 17498 +applicationsa 0 1 6.957497 0.000000 17499 +currentlyus 0 1 6.957497 0.000000 17500 +untyp 0 1 6.957497 0.000000 17501 +structuredobject 0 1 6.957497 0.000000 17502 +displac 0 1 6.957497 0.000000 17503 +orientedfil 0 1 6.957497 0.000000 17504 +standpoint 0 1 6.957497 0.000000 17505 +manypersist 0 1 6.957497 0.000000 17506 +indirectli 0 1 6.957497 0.000000 17507 +usersa 0 1 6.957497 0.000000 17508 +individualpersist 0 1 6.957497 0.000000 17509 +oflarg 0 1 6.957497 0.000000 17510 +unnam 0 1 6.957497 0.000000 17511 +involvessever 0 1 6.957497 0.000000 17512 +includingdirectori 0 1 6.957497 0.000000 17513 +unixappl 0 1 6.957497 0.000000 17514 +fromtradit 0 1 6.957497 0.000000 17515 +standardunix 0 1 6.957497 0.000000 17516 +mkdir 0 1 6.957497 0.000000 17517 +chdir 0 1 6.957497 0.000000 17518 +callsposs 0 1 6.957497 0.000000 17519 +onevari 0 1 6.957497 0.000000 17520 +asb 0 1 6.957497 0.000000 17521 +objectthrough 0 1 6.957497 0.000000 17522 +counterpart 0 1 6.957497 0.000000 17523 +callswil 0 1 6.957497 0.000000 17524 +thatwish 0 1 6.957497 0.000000 17525 +datacontain 0 1 6.957497 0.000000 17526 +bothnew 0 1 6.957497 0.000000 17527 +componentof 0 1 6.957497 0.000000 17528 +morestructur 0 1 6.957497 0.000000 17529 +rleas 0 1 6.957497 0.000000 17530 +completeimplement 0 1 6.957497 0.000000 17531 +tosolari 0 1 6.957497 0.000000 17532 +andpentium 0 1 6.957497 0.000000 17533 +atftp 0 1 6.957497 0.000000 17534 +liststher 0 1 6.957497 0.000000 17535 +usebi 0 1 6.957497 0.000000 17536 +madisonc 0 1 6.957497 0.000000 17537 +unmoder 0 1 6.957497 0.000000 17538 +unlikelyev 0 1 6.957497 0.000000 17539 +isalreadi 0 1 6.957497 0.000000 17540 +belowfor 0 1 6.957497 0.000000 17541 +sentwhen 0 1 6.957497 0.000000 17542 +beingpost 0 1 6.957497 0.000000 17543 +yourrepli 0 1 6.957497 0.000000 17544 +maysubscrib 0 1 6.957497 0.000000 17545 +existenceof 0 1 6.957497 0.000000 17546 +whenit 0 1 6.957497 0.000000 17547 +yoursubscript 0 1 6.957497 0.000000 17548 +conceal 0 1 6.957497 0.000000 17549 +subscriberscannot 0 1 6.957497 0.000000 17550 +specialmessag 0 1 6.957497 0.000000 17551 +sendthi 0 1 6.957497 0.000000 17552 +unsubscrib 0 1 6.957497 0.000000 17553 +messageshould 0 1 6.957497 0.000000 17554 +helplast 0 1 6.957497 0.000000 17555 +nhall 0 1 6.957497 0.000000 17556 +footnot 0 1 6.957497 0.000000 17557 +odlshor 0 1 6.957497 0.000000 17558 +modelidl 0 1 6.957497 0.000000 17559 +odlar 0 1 6.957497 0.000000 17560 +stabilizesw 0 1 6.957497 0.000000 17561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..af6d383f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +find 0 111 2.197225 0.000000 111 +stuff 0 87 2.484907 0.000000 171 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +gupta 1 12 4.465908 4.465908 1241 +avenu 0 12 4.465908 0.000000 1277 +newspap 0 12 4.465908 0.000000 1280 +whereabout 0 4 5.568345 0.000000 3078 +abhinav 1 3 5.857933 5.857933 3428 +agupta 0 3 5.857933 0.000000 3429 +kendal 0 2 6.263398 0.000000 6085 +residenceoffic 0 1 6.957497 0.000000 17562 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..b643fefd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +click 0 142 1.945910 0.000000 78 +pictur 0 89 2.397895 0.000000 160 +live 0 40 3.258097 0.000000 451 +ever 0 19 4.007333 0.000000 872 +larger 0 7 5.010635 0.000000 1875 +largest 0 7 5.010635 0.000000 1858 +alain 1 2 6.263398 6.263398 6086 +pagealain 0 1 6.957497 0.000000 17563 +carnivor 0 1 6.957497 0.000000 17564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..234ac244 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +wisconsin 2 169 1.791759 3.583518 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +relat 0 139 1.945910 0.000000 68 +machin 1 129 2.079442 2.079442 95 +dayton 0 119 2.079442 0.000000 104 +learn 1 86 2.484907 2.484907 170 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +state 0 76 2.564949 0.000000 207 +intellig 1 72 2.639057 2.639057 225 +artifici 0 63 2.772589 0.000000 280 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +neural 0 30 3.555348 0.000000 578 +departmentunivers 0 24 3.761200 0.000000 711 +sequenc 0 23 3.806662 0.000000 734 +biologi 1 15 4.248495 4.248495 1049 +train 0 14 4.317488 0.000000 1066 +edutelephon 0 10 4.653960 0.000000 1473 +purdu 0 10 4.653960 0.000000 1466 +molecular 1 7 5.010635 5.010635 1887 +jude 0 6 5.164786 0.000000 2123 +fold 0 4 5.568345 0.000000 2615 +allex 1 2 6.263398 6.263398 6087 +ismb 1 2 6.263398 6.263398 5834 +carolyn 1 2 6.263398 6.263398 6088 +studentbiotechnolog 0 1 6.957497 0.000000 17565 +traineecomput 0 1 6.957497 0.000000 17566 +shavlikinterest 0 1 6.957497 0.000000 17567 +protein 0 1 6.957497 0.000000 17568 +networkseduc 0 1 6.957497 0.000000 17569 +madisonb 0 1 6.957497 0.000000 17570 +universityb 0 1 6.957497 0.000000 17571 +mankato 0 1 6.957497 0.000000 17572 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..e8b5634f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +cornel 0 215 1.386294 0.000000 23 +group 1 183 1.609438 1.609438 36 +updat 0 191 1.609438 0.000000 41 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +implement 0 152 1.791759 0.000000 52 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +parallel 0 169 1.791759 0.000000 60 +algorithm 0 162 1.791759 0.000000 57 +read 0 154 1.791759 0.000000 47 +like 1 132 1.945910 1.945910 81 +file 0 132 1.945910 0.000000 70 +problem 0 147 1.945910 0.000000 75 +area 0 144 1.945910 0.000000 80 +model 0 145 1.945910 0.000000 69 +perform 0 143 1.945910 0.000000 74 +architectur 0 139 1.945910 0.000000 77 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +topic 0 114 2.197225 0.000000 110 +look 0 107 2.197225 0.000000 115 +find 0 111 2.197225 0.000000 111 +theori 0 111 2.197225 0.000000 127 +advanc 0 99 2.302585 0.000000 130 +peopl 0 96 2.302585 0.000000 132 +associ 0 93 2.397895 0.000000 151 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +novemb 0 81 2.484907 0.000000 179 +thing 0 84 2.484907 0.000000 189 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +resum 0 79 2.564949 0.000000 217 +method 0 80 2.564949 0.000000 213 +master 0 76 2.564949 0.000000 216 +optim 0 79 2.564949 0.000000 197 +good 0 77 2.564949 0.000000 200 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +solv 0 73 2.639057 0.000000 234 +write 0 72 2.639057 0.000000 222 +david 0 71 2.639057 0.000000 232 +degre 1 69 2.708050 2.708050 259 +practic 0 70 2.708050 0.000000 246 +copi 0 63 2.772589 0.000000 284 +polici 0 64 2.772589 0.000000 279 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +think 1 57 2.890372 2.890372 314 +point 0 58 2.890372 0.000000 319 +index 0 56 2.890372 0.000000 309 +three 0 54 2.944439 0.000000 330 +talk 0 53 2.944439 0.000000 336 +week 0 52 2.995732 0.000000 343 +advisor 0 51 2.995732 0.000000 355 +much 0 52 2.995732 0.000000 349 +friend 1 48 3.044522 3.044522 376 +set 0 50 3.044522 0.000000 361 +physic 1 47 3.091042 3.091042 377 +get 0 46 3.091042 0.000000 380 +featur 0 46 3.091042 0.000000 386 +better 0 45 3.135494 0.000000 401 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +linear 0 41 3.218876 0.000000 431 +live 0 40 3.258097 0.000000 451 +author 0 39 3.258097 0.000000 450 +seminar 1 38 3.295837 3.295837 470 +return 0 34 3.401197 0.000000 502 +go 1 33 3.433987 3.433987 529 +articl 0 33 3.433987 0.000000 530 +depend 0 29 3.583519 0.000000 583 +enhanc 0 26 3.688879 0.000000 644 +never 0 25 3.737670 0.000000 671 +magazin 0 24 3.761200 0.000000 704 +watch 0 21 3.912023 0.000000 789 +love 0 21 3.912023 0.000000 804 +leav 0 21 3.912023 0.000000 772 +minut 0 20 3.951244 0.000000 810 +five 0 19 4.007333 0.000000 841 +beauti 1 18 4.060443 4.060443 912 +regist 0 17 4.110874 0.000000 938 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +score 0 15 4.248495 0.000000 1017 +went 0 12 4.465908 0.000000 1279 +multiscalar 1 8 4.875197 4.875197 1783 +partner 0 8 4.875197 0.000000 1648 +parti 0 8 4.875197 0.000000 1676 +vallei 0 7 5.010635 0.000000 1959 +shot 0 7 5.010635 0.000000 1898 +yale 1 6 5.164786 5.164786 2003 +truth 0 6 5.164786 0.000000 2179 +sohi 0 6 5.164786 0.000000 2237 +cat 0 6 5.164786 0.000000 2194 +presid 0 6 5.164786 0.000000 2196 +promis 0 6 5.164786 0.000000 2037 +guri 0 5 5.347108 0.000000 2578 +girlfriend 0 5 5.347108 0.000000 2579 +everybodi 0 5 5.347108 0.000000 2517 +gui 0 5 5.347108 0.000000 2573 +kid 0 5 5.347108 0.000000 2516 +arch 1 4 5.568345 5.568345 2995 +metal 0 4 5.568345 0.000000 3079 +soul 0 4 5.568345 0.000000 2907 +drew 0 4 5.568345 0.000000 2980 +amir 1 3 5.857933 5.857933 3850 +super 1 3 5.857933 5.857933 3918 +preprocessor 0 3 5.857933 0.000000 3844 +detector 0 3 5.857933 0.000000 3745 +allevi 0 3 5.857933 0.000000 3643 +recip 0 3 5.857933 0.000000 3668 +terri 0 3 5.857933 0.000000 3264 +carbon 0 3 5.857933 0.000000 3804 +vagu 0 3 5.857933 0.000000 3393 +roth 1 2 6.263398 6.263398 6089 +out 1 2 6.263398 6.263398 6090 +barb 1 2 6.263398 6.263398 6058 +delphi 0 2 6.263398 0.000000 4192 +airport 0 2 6.263398 0.000000 5962 +curli 0 2 6.263398 0.000000 5691 +fri 0 2 6.263398 0.000000 5844 +charli 0 2 6.263398 0.000000 5905 +regress 0 2 6.263398 0.000000 4501 +weird 0 2 6.263398 0.000000 5503 +subba 0 2 6.263398 0.000000 6091 +officem 0 2 6.263398 0.000000 6092 +wierd 0 2 6.263398 0.000000 6093 +marci 1 1 6.957497 6.957497 17573 +maven 0 1 6.957497 0.000000 17574 +erin 0 1 6.957497 0.000000 17575 +occasionali 0 1 6.957497 0.000000 17576 +cvte 0 1 6.957497 0.000000 17577 +deleg 0 1 6.957497 0.000000 17578 +existencei 0 1 6.957497 0.000000 17579 +nail 0 1 6.957497 0.000000 17580 +lafollett 0 1 6.957497 0.000000 17581 +meantim 0 1 6.957497 0.000000 17582 +wacki 0 1 6.957497 0.000000 17583 +eggplant 0 1 6.957497 0.000000 17584 +daddi 0 1 6.957497 0.000000 17585 +titanium 0 1 6.957497 0.000000 17586 +screw 0 1 6.957497 0.000000 17587 +desi 0 1 6.957497 0.000000 17588 +relaford 0 1 6.957497 0.000000 17589 +mulholland 0 1 6.957497 0.000000 17590 +oxygen 0 1 6.957497 0.000000 17591 +dioxid 0 1 6.957497 0.000000 17592 +whack 0 1 6.957497 0.000000 17593 +scaryarea 0 1 6.957497 0.000000 17594 +rabid 0 1 6.957497 0.000000 17595 +interestth 0 1 6.957497 0.000000 17596 +hmmm 0 1 6.957497 0.000000 17597 +handyinformatik 0 1 6.957497 0.000000 17598 +madcat 0 1 6.957497 0.000000 17599 +sportslin 0 1 6.957497 0.000000 17600 +philli 0 1 6.957497 0.000000 17601 +ickyth 0 1 6.957497 0.000000 17602 +kemin 0 1 6.957497 0.000000 17603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..0829cedb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +construct 0 139 1.945910 0.000000 82 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..b9947c03 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 0 170 1.791759 0.000000 49 +read 0 154 1.791759 0.000000 47 +avail 0 169 1.791759 0.000000 48 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +area 0 144 1.945910 0.000000 80 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +provid 0 121 2.079442 0.000000 94 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +pleas 0 113 2.197225 0.000000 114 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +present 1 91 2.397895 2.397895 145 +homepag 1 93 2.397895 2.397895 148 +search 0 95 2.397895 0.000000 155 +associ 0 93 2.397895 0.000000 151 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +comment 0 93 2.397895 0.000000 146 +activ 1 84 2.484907 2.484907 182 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +want 0 79 2.564949 0.000000 199 +line 1 75 2.639057 2.639057 231 +summari 0 73 2.639057 0.000000 237 +view 1 70 2.708050 2.708050 254 +order 0 69 2.708050 0.000000 249 +main 0 67 2.708050 0.000000 256 +goal 0 66 2.708050 0.000000 250 +function 0 62 2.772589 0.000000 275 +copi 0 63 2.772589 0.000000 284 +handout 0 64 2.772589 0.000000 263 +abstract 0 62 2.772589 0.000000 276 +variou 1 56 2.890372 2.890372 317 +space 0 57 2.890372 0.000000 310 +unix 0 58 2.890372 0.000000 308 +found 1 53 2.944439 2.944439 337 +tabl 0 51 2.995732 0.000000 346 +maintain 0 51 2.995732 0.000000 342 +telephon 0 50 3.044522 0.000000 373 +numer 0 49 3.044522 0.000000 369 +netscap 0 44 3.135494 0.000000 395 +futur 0 41 3.218876 0.000000 427 +vita 1 38 3.295837 3.295837 473 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +download 1 36 3.367296 3.367296 489 +short 0 36 3.367296 0.000000 499 +approxim 1 35 3.401197 3.401197 509 +word 0 34 3.401197 0.000000 508 +articl 0 33 3.433987 0.000000 530 +enhanc 0 26 3.688879 0.000000 644 +mine 0 26 3.688879 0.000000 654 +wish 1 24 3.761200 3.761200 692 +compress 1 23 3.806662 3.806662 719 +togeth 0 23 3.806662 0.000000 714 +miscellan 0 23 3.806662 0.000000 731 +recommend 0 22 3.850148 0.000000 737 +basi 0 20 3.951244 0.000000 828 +item 0 19 4.007333 0.000000 856 +offici 0 18 4.060443 0.000000 894 +otherwis 0 17 4.110874 0.000000 922 +choos 0 16 4.174387 0.000000 964 +carl 0 15 4.248495 0.000000 1024 +anonym 0 14 4.317488 0.000000 1100 +invari 0 8 4.875197 0.000000 1748 +univeristi 0 8 4.875197 0.000000 1754 +none 0 7 5.010635 0.000000 1811 +spline 0 6 5.164786 0.000000 2007 +clickher 1 5 5.347108 5.347108 2428 +shift 0 5 5.347108 0.000000 2357 +wavelet 0 4 5.568345 0.000000 2874 +usa 0 4 5.568345 0.000000 3080 +thin 0 3 5.857933 0.000000 3488 +shen 0 3 5.857933 0.000000 3370 +uncompress 0 3 5.857933 0.000000 3177 +boor 0 3 5.857933 0.000000 3482 +ofwisconsin 0 3 5.857933 0.000000 4002 +amo 1 2 6.263398 6.263398 6094 +professordepart 0 2 6.263398 0.000000 5624 +deposit 0 2 6.263398 0.000000 6095 +mailbox 0 2 6.263398 0.000000 6084 +boxsplin 0 1 6.957497 0.000000 17604 +radial 0 1 6.957497 0.000000 17605 +toscatt 0 1 6.957497 0.000000 17606 +multiquadr 0 1 6.957497 0.000000 17607 +plate 0 1 6.957497 0.000000 17608 +splinesthi 0 1 6.957497 0.000000 17609 +linksat 0 1 6.957497 0.000000 17610 +paperaffin 0 1 6.957497 0.000000 17611 +operatorof 0 1 6.957497 0.000000 17612 +zuowei 0 1 6.957497 0.000000 17613 +fromher 0 1 6.957497 0.000000 17614 +directlyfrom 0 1 6.957497 0.000000 17615 +accounther 0 1 6.957497 0.000000 17616 +articlesof 0 1 6.957497 0.000000 17617 +containspostscript 0 1 6.957497 0.000000 17618 +theapproxim 0 1 6.957497 0.000000 17619 +filesconcern 0 1 6.957497 0.000000 17620 +andpubl 0 1 6.957497 0.000000 17621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..3b93c151 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +andi 0 4 5.568345 0.000000 3081 +pageandi 0 2 6.263398 0.000000 6096 +therber 0 1 6.957497 0.000000 17622 +therberoffic 0 1 6.957497 0.000000 17623 +sphone 0 1 6.957497 0.000000 17624 +andyt 0 1 6.957497 0.000000 17625 +eduzooresumebookmarksapplet 0 1 6.957497 0.000000 17626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..66c4eb7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +world 0 115 2.197225 0.000000 126 +present 0 91 2.397895 0.000000 145 +collect 0 65 2.772589 0.000000 268 +finger 0 52 2.995732 0.000000 354 +log 0 19 4.007333 0.000000 857 +classic 0 14 4.317488 0.000000 1084 +fascin 0 3 5.857933 0.000000 3948 +arvind 1 1 6.957497 6.957497 17627 +ranganathan 1 1 6.957497 6.957497 17628 +workplac 1 1 6.957497 6.957497 17629 +ranga 0 1 6.957497 0.000000 17630 +erstwhil 0 1 6.957497 0.000000 17631 +indiaworld 0 1 6.957497 0.000000 17632 +escher 0 1 6.957497 0.000000 17633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..8b553f7d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +visit 0 63 2.772589 0.000000 288 +undergradu 0 54 2.944439 0.000000 338 +india 0 32 3.465736 0.000000 550 +altern 0 26 3.688879 0.000000 641 +worth 0 11 4.553877 0.000000 1294 +ashish 1 5 5.347108 5.347108 2473 +delhi 0 5 5.347108 0.000000 2530 +whereabout 0 4 5.568345 0.000000 3078 +indianinstitut 0 3 5.857933 0.000000 4003 +fantast 0 3 5.857933 0.000000 3966 +hadmi 0 2 6.263398 0.000000 6097 +canfing 0 2 6.263398 0.000000 6098 +thusoo 0 1 6.957497 0.000000 17634 +iitd 0 1 6.957497 0.000000 17635 +ashisht 0 1 6.957497 0.000000 17636 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..c56818f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +dayton 0 119 2.079442 0.000000 104 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +west 0 83 2.484907 0.000000 192 +info 0 85 2.484907 0.000000 176 +june 0 79 2.564949 0.000000 214 +view 1 70 2.708050 2.708050 254 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +juli 0 60 2.833213 0.000000 305 +finger 0 52 2.995732 0.000000 354 +usaphon 0 9 4.753590 0.000000 1600 +ashraf 1 3 5.857933 5.857933 3421 +aboulnaga 1 3 5.857933 5.857933 3426 +edueduc 0 3 5.857933 0.000000 4004 +egypt 1 2 6.263398 6.263398 4856 +desautel 0 2 6.263398 0.000000 4791 +alexandria 1 1 6.957497 6.957497 17637 +pageashraf 0 1 6.957497 0.000000 17638 +aboulnagacomput 0 1 6.957497 0.000000 17639 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..0246cae4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +gener 0 220 1.386294 0.000000 27 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +seattl 0 120 2.079442 0.000000 103 +back 0 60 2.833213 0.000000 297 +appoint 0 49 3.044522 0.000000 358 +basketbal 0 12 4.465908 0.000000 1289 +tuth 0 9 4.753590 0.000000 1519 +phil 1 5 5.347108 5.347108 2419 +educurr 0 5 5.347108 0.000000 2504 +win 0 3 5.857933 0.000000 3593 +atkinson 1 2 6.263398 6.263398 4722 +ncaa 0 2 6.263398 0.000000 5908 +infooffic 1 1 6.957497 6.957497 17640 +pageucla 0 1 6.957497 0.000000 17641 +bannon 0 1 6.957497 0.000000 17642 +championship 0 1 6.957497 0.000000 17643 +researchsailinghors 0 1 6.957497 0.000000 17644 +ridingscuba 0 1 6.957497 0.000000 17645 +divingc 0 1 6.957497 0.000000 17646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..c29a4c4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +number 2 130 2.079442 4.158884 97 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +theori 1 111 2.197225 2.197225 127 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +code 0 108 2.197225 0.000000 116 +find 0 111 2.197225 0.000000 111 +question 1 91 2.397895 2.397895 141 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +proceed 0 93 2.397895 0.000000 152 +larg 1 82 2.484907 2.484907 168 +info 0 85 2.484907 0.000000 176 +exampl 1 77 2.564949 2.564949 195 +complet 0 77 2.564949 0.000000 208 +effici 1 73 2.639057 2.639057 233 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +test 1 66 2.708050 2.708050 252 +practic 0 70 2.708050 0.000000 246 +complex 1 64 2.772589 2.772589 269 +improv 0 62 2.772589 0.000000 289 +creat 0 63 2.772589 0.000000 277 +simpl 0 60 2.833213 0.000000 298 +juli 0 60 2.833213 0.000000 305 +direct 0 57 2.890372 0.000000 316 +digit 0 52 2.995732 0.000000 348 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +california 0 46 3.091042 0.000000 388 +algebra 0 45 3.135494 0.000000 394 +answer 0 45 3.135494 0.000000 391 +math 0 44 3.135494 0.000000 402 +examin 0 42 3.218876 0.000000 424 +press 0 42 3.218876 0.000000 419 +theoret 1 39 3.258097 3.258097 446 +probabl 0 40 3.258097 0.000000 455 +small 0 39 3.258097 0.000000 447 +annual 0 40 3.258097 0.000000 458 +error 0 40 3.258097 0.000000 449 +vita 0 38 3.295837 0.000000 473 +random 0 34 3.401197 0.000000 511 +least 0 35 3.401197 0.000000 516 +approxim 0 35 3.401197 0.000000 509 +product 0 33 3.433987 0.000000 527 +curriculum 0 33 3.433987 0.000000 535 +given 0 32 3.465736 0.000000 538 +secur 0 30 3.555348 0.000000 577 +usual 0 28 3.610918 0.000000 608 +proc 1 26 3.688879 3.688879 649 +berkelei 0 26 3.688879 0.000000 657 +lead 0 23 3.806662 0.000000 718 +among 0 21 3.912023 0.000000 781 +similar 0 21 3.912023 0.000000 771 +eric 1 19 4.007333 4.007333 870 +prove 0 19 4.007333 0.000000 848 +automata 0 13 4.382027 0.000000 1135 +conf 0 13 4.382027 0.000000 1181 +string 0 11 4.553877 0.000000 1340 +probabilist 0 11 4.553877 0.000000 1343 +volum 0 11 4.553877 0.000000 1347 +cryptographi 0 9 4.753590 0.000000 1512 +transmiss 0 9 4.753590 0.000000 1588 +assumpt 0 9 4.753590 0.000000 1514 +eduto 0 7 5.010635 0.000000 1956 +bach 1 4 5.568345 5.568345 2708 +wit 1 3 5.857933 5.857933 4005 +euler 0 3 5.857933 0.000000 3174 +canadian 0 3 5.857933 0.000000 3508 +condon 0 3 5.857933 0.000000 3309 +prime 1 2 6.263398 6.263398 6099 +designand 0 2 6.263398 0.000000 6100 +functionof 0 2 6.263398 0.000000 5415 +algebraicalgorithm 0 1 6.957497 0.000000 17647 +solvealgebra 0 1 6.957497 0.000000 17648 +onetel 0 1 6.957497 0.000000 17649 +possiblefactor 0 1 6.957497 0.000000 17650 +intrins 0 1 6.957497 0.000000 17651 +forreli 0 1 6.957497 0.000000 17652 +iscomposit 0 1 6.957497 0.000000 17653 +auxiliarynumb 0 1 6.957497 0.000000 17654 +witnessbi 0 1 6.957497 0.000000 17655 +followingnatur 0 1 6.957497 0.000000 17656 +accurateheurist 0 1 6.957497 0.000000 17657 +allowsthi 0 1 6.957497 0.000000 17658 +cnta 0 1 6.957497 0.000000 17659 +glaser 0 1 6.957497 0.000000 17660 +tanguai 0 1 6.957497 0.000000 17661 +shallit 0 1 6.957497 0.000000 17662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..8c4ca6fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +softwar 0 220 1.386294 0.000000 30 +oper 1 180 1.609438 1.609438 34 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +perform 0 143 1.945910 0.000000 74 +tool 1 117 2.079442 2.079442 93 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +spring 0 131 2.079442 0.000000 88 +advanc 0 99 2.302585 0.000000 130 +technic 0 100 2.302585 0.000000 140 +follow 0 92 2.397895 0.000000 143 +center 0 88 2.397895 0.000000 158 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +internet 0 83 2.484907 0.000000 186 +symposium 0 72 2.639057 0.000000 238 +undergradu 0 54 2.944439 0.000000 338 +streetmadison 0 38 3.295837 0.000000 474 +seminar 0 38 3.295837 0.000000 470 +random 0 34 3.401197 0.000000 511 +departmentunivers 0 24 3.761200 0.000000 711 +honor 0 23 3.806662 0.000000 729 +famili 0 23 3.806662 0.000000 735 +director 0 22 3.850148 0.000000 767 +offici 0 18 4.060443 0.000000 894 +miller 1 17 4.110874 4.110874 949 +convent 0 14 4.317488 0.000000 1072 +bart 1 9 4.753590 4.753590 1559 +paradyn 0 9 4.753590 0.000000 1614 +frank 0 9 4.753590 0.000000 1568 +lloyd 0 6 5.164786 0.000000 2103 +advisori 0 6 5.164786 0.000000 2148 +barton 0 5 5.347108 0.000000 2371 +professorcomput 0 3 5.857933 0.000000 3714 +usath 0 2 6.263398 0.000000 6056 +wright 0 2 6.263398 0.000000 5177 +fuzz 0 1 6.957497 0.000000 17663 +testingteach 0 1 6.957497 0.000000 17664 +graduatesprofession 0 1 6.957497 0.000000 17665 +monona 0 1 6.957497 0.000000 17666 +terrac 0 1 6.957497 0.000000 17667 +groupperson 0 1 6.957497 0.000000 17668 +photosbart 0 1 6.957497 0.000000 17669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..5bd2102c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +hour 1 165 1.791759 1.791759 46 +wisconsin 0 169 1.791759 0.000000 54 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +look 0 107 2.197225 0.000000 115 +west 0 83 2.484907 0.000000 192 +internet 0 83 2.484907 0.000000 186 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +street 0 63 2.772589 0.000000 293 +wednesdai 0 64 2.772589 0.000000 261 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +fridai 0 44 3.135494 0.000000 390 +word 0 34 3.401197 0.000000 508 +someth 0 31 3.496508 0.000000 554 +ultim 0 17 4.110874 0.000000 943 +miron 0 14 4.317488 0.000000 1110 +readabl 0 12 4.465908 0.000000 1258 +benjamin 0 11 4.553877 0.000000 1296 +teitelbaum 0 6 5.164786 0.000000 2102 +garbag 0 6 5.164786 0.000000 1986 +hyper 0 5 5.347108 0.000000 2435 +usaben 0 1 6.957497 0.000000 17670 +edursumquinc 0 1 6.957497 0.000000 17671 +gamezillion 0 1 6.957497 0.000000 17672 +bookmarksspr 0 1 6.957497 0.000000 17673 +dbseminar 0 1 6.957497 0.000000 17674 +osseminar 0 1 6.957497 0.000000 17675 +condormeet 0 1 6.957497 0.000000 17676 +plseminar 0 1 6.957497 0.000000 17677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..9a5683e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +softwar 0 220 1.386294 0.000000 30 +group 1 183 1.609438 1.609438 36 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +data 0 170 1.791759 0.000000 49 +applic 0 170 1.791759 0.000000 56 +problem 2 147 1.945910 3.891820 75 +model 1 145 1.945910 1.945910 69 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +postscript 1 131 2.079442 2.079442 90 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +spring 0 131 2.079442 0.000000 88 +introduct 0 126 2.079442 0.000000 87 +structur 1 106 2.197225 2.197225 105 +world 1 115 2.197225 2.197225 126 +teach 1 108 2.197225 2.197225 112 +mathemat 1 108 2.197225 2.197225 123 +assist 0 112 2.197225 0.000000 113 +make 0 111 2.197225 0.000000 120 +instructor 0 108 2.197225 0.000000 107 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +need 0 98 2.302585 0.000000 135 +section 1 94 2.397895 2.397895 149 +imag 1 91 2.397895 2.397895 161 +call 0 91 2.397895 0.000000 153 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +search 0 95 2.397895 0.000000 155 +wide 1 84 2.484907 2.484907 185 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +school 1 84 2.484907 2.484907 188 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +learn 0 86 2.484907 0.000000 170 +start 0 83 2.484907 0.000000 173 +internet 0 83 2.484907 0.000000 186 +resum 0 79 2.564949 0.000000 217 +orient 0 80 2.564949 0.000000 205 +solv 1 73 2.639057 2.639057 234 +servic 0 72 2.639057 0.000000 236 +addit 0 74 2.639057 0.000000 228 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +materi 0 75 2.639057 0.000000 221 +write 0 72 2.639057 0.000000 222 +logic 0 71 2.639057 0.000000 230 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +receiv 0 66 2.708050 0.000000 244 +experi 1 64 2.772589 2.772589 283 +street 0 63 2.772589 0.000000 293 +abstract 0 62 2.772589 0.000000 276 +import 0 65 2.772589 0.000000 282 +result 0 65 2.772589 0.000000 281 +prof 0 64 2.772589 0.000000 273 +virtual 0 62 2.772589 0.000000 285 +artifici 0 63 2.772589 0.000000 280 +copi 0 63 2.772589 0.000000 284 +januari 0 62 2.772589 0.000000 264 +type 0 61 2.833213 0.000000 296 +simpl 0 60 2.833213 0.000000 298 +colleg 0 61 2.833213 0.000000 300 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +major 1 56 2.890372 2.890372 315 +cover 1 55 2.944439 2.944439 329 +instruct 1 53 2.944439 2.944439 332 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +finger 0 52 2.995732 0.000000 354 +advisor 0 51 2.995732 0.000000 355 +telephon 1 50 3.044522 3.044522 373 +basic 1 50 3.044522 3.044522 360 +pointer 0 48 3.044522 0.000000 368 +done 0 47 3.091042 0.000000 381 +algebra 0 45 3.135494 0.000000 394 +vision 1 41 3.218876 3.218876 430 +examin 1 42 3.218876 3.218876 424 +http 0 41 3.218876 0.000000 420 +howev 0 41 3.218876 0.000000 422 +multipl 0 39 3.258097 0.000000 453 +error 0 40 3.258097 0.000000 449 +littl 0 39 3.258097 0.000000 454 +vita 0 38 3.295837 0.000000 473 +credit 0 38 3.295837 0.000000 460 +open 0 38 3.295837 0.000000 469 +robot 1 36 3.367296 3.367296 497 +procedur 0 36 3.367296 0.000000 488 +copyright 0 36 3.367296 0.000000 495 +concurr 1 34 3.401197 3.401197 501 +approxim 0 35 3.401197 0.000000 509 +least 0 35 3.401197 0.000000 516 +survei 0 35 3.401197 0.000000 513 +statist 0 35 3.401197 0.000000 521 +next 0 34 3.401197 0.000000 517 +taught 1 33 3.433987 3.433987 526 +curriculum 0 33 3.433987 0.000000 535 +within 0 33 3.433987 0.000000 525 +dissert 1 32 3.465736 3.465736 549 +transform 1 32 3.465736 3.465736 542 +given 0 32 3.465736 0.000000 538 +posit 1 31 3.496508 3.496508 552 +titl 0 31 3.496508 0.000000 556 +exist 0 30 3.555348 0.000000 569 +specifi 0 30 3.555348 0.000000 568 +intend 1 28 3.610918 3.610918 599 +administr 0 27 3.637586 0.000000 628 +determin 0 27 3.637586 0.000000 630 +enabl 0 26 3.688879 0.000000 655 +accur 0 25 3.737670 0.000000 680 +motion 1 24 3.761200 3.761200 699 +honor 0 23 3.806662 0.000000 729 +instead 0 22 3.850148 0.000000 756 +defin 0 22 3.850148 0.000000 746 +identifi 0 22 3.850148 0.000000 760 +navig 1 21 3.912023 3.912023 796 +prepar 1 20 3.951244 3.951244 824 +entir 1 20 3.951244 3.951244 811 +geometr 0 19 4.007333 0.000000 852 +assum 0 19 4.007333 0.000000 845 +lyco 0 19 4.007333 0.000000 871 +minim 0 18 4.060443 0.000000 887 +fortran 1 15 4.248495 4.248495 1027 +drive 0 15 4.248495 0.000000 1052 +club 0 15 4.248495 0.000000 1058 +scene 1 14 4.317488 4.317488 1114 +camera 0 14 4.317488 0.000000 1115 +primarili 1 13 4.382027 4.382027 1185 +essenti 0 13 4.382027 0.000000 1137 +charl 0 13 4.382027 0.000000 1149 +optic 0 12 4.465908 0.000000 1221 +realiti 0 12 4.465908 0.000000 1272 +pascal 0 12 4.465908 0.000000 1213 +pagewelcom 0 11 4.553877 0.000000 1344 +keyword 0 11 4.553877 0.000000 1356 +perspect 0 10 4.653960 0.000000 1437 +prior 0 10 4.653960 0.000000 1438 +observ 1 9 4.753590 4.753590 1578 +assumpt 1 9 4.753590 4.753590 1514 +minimum 0 9 4.753590 0.000000 1555 +occur 0 9 4.753590 0.000000 1572 +dyer 0 9 4.753590 0.000000 1573 +sensit 0 8 4.875197 0.000000 1726 +dimens 1 7 5.010635 5.010635 1930 +elementari 1 7 5.010635 5.010635 1825 +bestor 2 6 5.164786 10.329572 2099 +recov 0 6 5.164786 0.000000 2235 +restrict 0 6 5.164786 0.000000 2129 +constrain 0 6 5.164786 0.000000 2042 +gareth 1 5 5.347108 5.347108 2392 +rigid 1 5 5.347108 5.347108 2432 +unknown 0 5 5.347108 0.000000 2318 +consent 0 5 5.347108 0.000000 2389 +invers 1 4 5.568345 5.568345 2764 +observatori 0 4 5.568345 0.000000 3070 +projector 1 3 5.857933 5.857933 3409 +edueduc 0 3 5.857933 0.000000 4004 +coursework 0 3 5.857933 0.000000 3588 +duti 0 3 5.857933 0.000000 3317 +prereq 0 3 5.857933 0.000000 3178 +wiscinfo 0 3 5.857933 0.000000 3106 +hoofer 0 2 6.263398 0.000000 6101 +out 0 2 6.263398 0.000000 6090 +nextstep 0 2 6.263398 0.000000 6102 +zealand 1 1 6.957497 6.957497 17678 +massei 1 1 6.957497 6.957497 17679 +pagegareth 0 1 6.957497 0.000000 17680 +dpl 0 1 6.957497 0.000000 17681 +dacc 0 1 6.957497 0.000000 17682 +nois 0 1 6.957497 0.000000 17683 +tradition 0 1 6.957497 0.000000 17684 +intersect 0 1 6.957497 0.000000 17685 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..af9a2341 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +area 0 144 1.945910 0.000000 80 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +west 0 83 2.484907 0.000000 192 +local 0 55 2.944439 0.000000 334 +undergradu 0 54 2.944439 0.000000 338 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +ramakrishnan 0 16 4.174387 0.000000 972 +raghu 0 12 4.465908 0.000000 1212 +kevin 0 9 4.753590 0.000000 1482 +assistantdepart 0 8 4.875197 0.000000 1784 +coral 0 5 5.347108 0.000000 2538 +beyer 1 2 6.263398 6.263398 6103 +caution 0 2 6.263398 0.000000 4754 +pagekevin 0 1 6.957497 0.000000 17686 +beyerbey 0 1 6.957497 0.000000 17687 +researchresearch 0 1 6.957497 0.000000 17688 +coursesinstruct 0 1 6.957497 0.000000 17689 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..dc3eb026 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +offic 0 299 1.098612 0.000000 13 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +oper 0 180 1.609438 0.000000 34 +phone 1 175 1.791759 1.791759 45 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +advanc 0 99 2.302585 0.000000 130 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +html 0 75 2.639057 0.000000 235 +window 1 68 2.708050 2.708050 242 +street 0 63 2.772589 0.000000 293 +back 0 60 2.833213 0.000000 297 +point 0 58 2.890372 0.000000 319 +past 0 42 3.218876 0.000000 428 +press 0 42 3.218876 0.000000 419 +cach 0 41 3.218876 0.000000 432 +microsoft 0 38 3.295837 0.000000 468 +express 0 32 3.465736 0.000000 540 +actual 0 28 3.610918 0.000000 604 +packag 0 28 3.610918 0.000000 614 +consist 0 26 3.688879 0.000000 651 +size 0 23 3.806662 0.000000 713 +miller 0 17 4.110874 0.000000 949 +todd 0 15 4.248495 0.000000 1051 +introduc 0 13 4.382027 0.000000 1139 +faster 0 11 4.553877 0.000000 1323 +bart 0 9 4.753590 0.000000 1559 +microprocessor 0 7 5.010635 0.000000 1808 +yeah 0 6 5.164786 0.000000 2195 +locomot 0 2 6.263398 0.000000 5807 +skew 0 2 6.263398 0.000000 6057 +bezenek 1 1 6.957497 6.957497 17690 +pith 1 1 6.957497 6.957497 17691 +toddm 0 1 6.957497 0.000000 17692 +cpu 0 1 6.957497 0.000000 17693 +_great 0 1 6.957497 0.000000 17694 +present_ 0 1 6.957497 0.000000 17695 +uregina 0 1 6.957497 0.000000 17696 +bayko 0 1 6.957497 0.000000 17697 +squeez 0 1 6.957497 0.000000 17698 +skateboard 0 1 6.957497 0.000000 17699 +helen 0 1 6.957497 0.000000 17700 +custer 0 1 6.957497 0.000000 17701 +_insid 0 1 6.957497 0.000000 17702 +pithi 0 1 6.957497 0.000000 17703 +abound 0 1 6.957497 0.000000 17704 +edubezenek 0 1 6.957497 0.000000 17705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..c2d1af5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 0 443 0.693147 0.000000 6 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +hour 0 165 1.791759 0.000000 46 +distribut 0 162 1.791759 0.000000 51 +click 0 142 1.945910 0.000000 78 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +schedul 0 119 2.079442 0.000000 85 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +make 0 111 2.197225 0.000000 120 +site 0 106 2.197225 0.000000 119 +section 1 94 2.397895 2.397895 149 +pictur 0 89 2.397895 0.000000 160 +grade 0 90 2.397895 0.000000 142 +comment 0 93 2.397895 0.000000 146 +info 1 85 2.484907 2.484907 176 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +mondai 0 77 2.564949 0.000000 206 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +simul 0 66 2.708050 0.000000 255 +wednesdai 0 64 2.772589 0.000000 261 +back 0 60 2.833213 0.000000 297 +anoth 0 45 3.135494 0.000000 408 +review 0 42 3.218876 0.000000 425 +word 0 34 3.401197 0.000000 508 +held 0 28 3.610918 0.000000 600 +dai 0 22 3.850148 0.000000 753 +quiz 0 16 4.174387 0.000000 990 +anywai 0 15 4.248495 0.000000 1047 +speech 0 12 4.465908 0.000000 1222 +neat 0 12 4.465908 0.000000 1263 +averag 0 6 5.164786 0.000000 2098 +jpeg 0 6 5.164786 0.000000 2053 +condor 0 5 5.347108 0.000000 2577 +nathan 1 4 5.568345 5.568345 2794 +bockrath 1 3 5.857933 5.857933 3420 +viru 1 2 6.263398 6.263398 4782 +nate 0 2 6.263398 0.000000 5720 +macro 0 2 6.263398 0.000000 5686 +pageoth 0 2 6.263398 0.000000 6104 +pageback 0 1 6.957497 0.000000 17706 +oraclesend 0 1 6.957497 0.000000 17707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..bd776d1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,289 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +inform 0 412 0.693147 0.000000 8 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +last 0 314 1.098612 0.000000 14 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +number 0 130 2.079442 0.000000 97 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +site 0 106 2.197225 0.000000 119 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +associ 1 93 2.397895 2.397895 151 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +follow 1 92 2.397895 2.397895 143 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +imag 0 91 2.397895 0.000000 161 +commun 0 95 2.397895 0.000000 157 +select 0 91 2.397895 0.000000 154 +thing 1 84 2.484907 2.484907 189 +member 1 84 2.484907 2.484907 165 +internet 1 83 2.484907 2.484907 186 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +solut 0 82 2.484907 0.000000 162 +help 0 83 2.484907 0.000000 175 +school 0 84 2.484907 0.000000 188 +complet 0 77 2.564949 0.000000 208 +appear 0 78 2.564949 0.000000 210 +orient 0 80 2.564949 0.000000 205 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +line 1 75 2.639057 2.639057 231 +appli 0 71 2.639057 0.000000 226 +david 0 71 2.639057 0.000000 232 +addit 0 74 2.639057 0.000000 228 +free 0 73 2.639057 0.000000 224 +meet 0 72 2.639057 0.000000 229 +degre 0 69 2.708050 0.000000 259 +thursdai 0 70 2.708050 0.000000 241 +new 1 64 2.772589 2.772589 262 +creat 0 63 2.772589 0.000000 277 +organ 0 65 2.772589 0.000000 265 +visit 0 63 2.772589 0.000000 288 +import 0 65 2.772589 0.000000 282 +type 0 61 2.833213 0.000000 296 +plai 0 60 2.833213 0.000000 307 +locat 0 59 2.833213 0.000000 303 +unix 0 58 2.890372 0.000000 308 +local 0 55 2.944439 0.000000 334 +much 1 52 2.995732 2.995732 349 +case 0 51 2.995732 0.000000 351 +run 0 51 2.995732 0.000000 347 +right 0 48 3.044522 0.000000 363 +friend 0 48 3.044522 0.000000 376 +done 0 47 3.091042 0.000000 381 +howev 0 41 3.218876 0.000000 422 +realli 0 40 3.258097 0.000000 444 +societi 0 40 3.258097 0.000000 456 +prototyp 0 38 3.295837 0.000000 463 +hand 1 37 3.332205 3.332205 475 +game 0 36 3.367296 0.000000 498 +short 0 36 3.367296 0.000000 499 +everi 1 34 3.401197 3.401197 519 +word 0 34 3.401197 0.000000 508 +either 0 35 3.401197 0.000000 506 +print 0 34 3.401197 0.000000 503 +queri 0 33 3.433987 0.000000 524 +kind 0 32 3.465736 0.000000 541 +scientist 1 31 3.496508 3.496508 560 +often 0 31 3.496508 0.000000 551 +taken 0 31 3.496508 0.000000 555 +storag 0 31 3.496508 0.000000 553 +someth 0 31 3.496508 0.000000 554 +except 0 28 3.610918 0.000000 607 +quit 0 27 3.637586 0.000000 633 +administr 0 27 3.637586 0.000000 628 +though 0 27 3.637586 0.000000 622 +campu 0 27 3.637586 0.000000 623 +rather 1 26 3.688879 3.688879 642 +enjoi 0 26 3.688879 0.000000 660 +experiment 0 26 3.688879 0.000000 645 +although 1 25 3.737670 3.737670 667 +wai 1 25 3.737670 3.737670 662 +notic 0 25 3.737670 0.000000 675 +task 0 25 3.737670 0.000000 678 +store 1 24 3.761200 3.761200 693 +interpret 0 24 3.761200 0.000000 686 +consult 0 24 3.761200 0.000000 687 +alwai 0 24 3.761200 0.000000 691 +methodolog 0 23 3.806662 0.000000 733 +try 1 22 3.850148 3.850148 764 +defin 0 22 3.850148 0.000000 746 +william 0 22 3.850148 0.000000 765 +util 0 21 3.912023 0.000000 774 +voic 0 21 3.912023 0.000000 806 +tell 0 21 3.912023 0.000000 777 +wonder 1 20 3.951244 3.951244 815 +sure 0 20 3.951244 0.000000 813 +kernel 0 20 3.951244 0.000000 825 +longer 0 20 3.951244 0.000000 816 +entir 0 20 3.951244 0.000000 811 +els 1 19 4.007333 4.007333 843 +five 0 19 4.007333 0.000000 841 +histori 0 19 4.007333 0.000000 853 +along 1 18 4.060443 4.060443 878 +thoma 0 18 4.060443 0.000000 901 +seem 0 18 4.060443 0.000000 899 +whole 0 17 4.110874 0.000000 940 +anyth 0 16 4.174387 0.000000 998 +across 0 16 4.174387 0.000000 974 +enough 0 15 4.248495 0.000000 1040 +purchas 0 15 4.248495 0.000000 1030 +drive 0 15 4.248495 0.000000 1052 +goe 0 15 4.248495 0.000000 1044 +draw 0 14 4.317488 0.000000 1086 +comic 0 14 4.317488 0.000000 1103 +role 0 14 4.317488 0.000000 1101 +becam 0 14 4.317488 0.000000 1117 +care 1 13 4.382027 4.382027 1177 +everyon 0 13 4.382027 0.000000 1148 +forth 0 13 4.382027 0.000000 1186 +everyth 0 13 4.382027 0.000000 1169 +dewitt 1 12 4.465908 4.465908 1270 +reader 0 12 4.465908 0.000000 1246 +usenix 0 12 4.465908 0.000000 1240 +shore 1 11 4.553877 4.553877 1377 +road 0 11 4.553877 0.000000 1374 +lake 0 11 4.553877 0.000000 1373 +night 0 11 4.553877 0.000000 1319 +thecomput 0 10 4.653960 0.000000 1408 +drink 1 9 4.753590 4.753590 1607 +occur 0 9 4.753590 0.000000 1572 +departmentof 0 9 4.753590 0.000000 1539 +architect 0 8 4.875197 0.000000 1624 +job 0 8 4.875197 0.000000 1702 +paradis 0 8 4.875197 0.000000 1782 +port 0 8 4.875197 0.000000 1766 +burger 1 7 5.010635 5.010635 1889 +bore 1 7 5.010635 5.010635 1948 +usenet 1 7 5.010635 5.010635 1839 +throughout 0 7 5.010635 0.000000 1871 +parent 1 6 5.164786 5.164786 2204 +beer 1 6 5.164786 5.164786 2216 +sleep 1 6 5.164786 5.164786 2211 +relax 0 6 5.164786 0.000000 2120 +fiction 0 6 5.164786 0.000000 2217 +railroad 0 6 5.164786 0.000000 2161 +put 0 6 5.164786 0.000000 2017 +gate 0 6 5.164786 0.000000 2182 +famou 0 6 5.164786 0.000000 2185 +geograph 0 6 5.164786 0.000000 2236 +whatev 0 6 5.164786 0.000000 2097 +benefit 0 6 5.164786 0.000000 2213 +divers 0 6 5.164786 0.000000 2232 +semi 0 5 5.347108 0.000000 2510 +east 0 5 5.347108 0.000000 2472 +matur 0 5 5.347108 0.000000 2269 +advic 0 5 5.347108 0.000000 2509 +aircraft 1 4 5.568345 5.568345 2872 +moon 0 4 5.568345 0.000000 2991 +haven 0 4 5.568345 0.000000 3037 +hacker 1 3 5.857933 5.857933 3996 +tiger 1 3 5.857933 5.857933 3897 +roll 0 3 5.857933 0.000000 3723 +pai 0 3 5.857933 0.000000 3672 +tremend 0 3 5.857933 0.000000 3453 +insan 0 3 5.857933 0.000000 4006 +beat 0 3 5.857933 0.000000 3840 +gamma 0 3 5.857933 0.000000 3219 +workin 0 3 5.857933 0.000000 3938 +agre 0 3 5.857933 0.000000 4007 +owner 0 3 5.857933 0.000000 3531 +pilot 0 3 5.857933 0.000000 4008 +acquaint 0 3 5.857933 0.000000 3468 +weekend 0 3 5.857933 0.000000 3357 +timeoper 0 2 6.263398 0.000000 4363 +woodwork 0 2 6.263398 0.000000 5463 +brew 0 2 6.263398 0.000000 5988 +disagre 0 2 6.263398 0.000000 6105 +pursuit 0 2 6.263398 0.000000 6048 +fly 0 2 6.263398 0.000000 5937 +stripe 0 2 6.263398 0.000000 6106 +creatur 0 2 6.263398 0.000000 6107 +leap 0 2 6.263398 0.000000 5654 +tovisit 0 2 6.263398 0.000000 4686 +that 0 2 6.263398 0.000000 5111 +differentarchitectur 0 2 6.263398 0.000000 6051 +hord 0 2 6.263398 0.000000 5917 +slowli 0 2 6.263398 0.000000 5363 +pagelast 0 2 6.263398 0.000000 5793 +bolo 1 1 6.957497 6.957497 17708 +uwvax 1 1 6.957497 6.957497 17709 +josef 1 1 6.957497 6.957497 17710 +uucp 1 1 6.957497 6.957497 17711 +essen 1 1 6.957497 6.957497 17712 +hau 1 1 6.957497 6.957497 17713 +bolobologreet 0 1 6.957497 0.000000 17714 +christen 0 1 6.957497 0.000000 17715 +mebolo 0 1 6.957497 0.000000 17716 +bestexplan 0 1 6.957497 0.000000 17717 +bywhat 0 1 6.957497 0.000000 17718 +acomput 0 1 6.957497 0.000000 17719 +shudder 0 1 6.957497 0.000000 17720 +newoper 0 1 6.957497 0.000000 17721 +sameto 0 1 6.957497 0.000000 17722 +myroomm 0 1 6.957497 0.000000 17723 +sublim 0 1 6.957497 0.000000 17724 +thetig 0 1 6.957497 0.000000 17725 +blake 0 1 6.957497 0.000000 17726 +poemtyg 0 1 6.957497 0.000000 17727 +tyger 0 1 6.957497 0.000000 17728 +againin 0 1 6.957497 0.000000 17729 +ahous 0 1 6.957497 0.000000 17730 +isjosef 0 1 6.957497 0.000000 17731 +roadmonona 0 1 6.957497 0.000000 17732 +workwork 0 1 6.957497 0.000000 17733 +banana 0 1 6.957497 0.000000 17734 +grung 0 1 6.957497 0.000000 17735 +perhapssom 0 1 6.957497 0.000000 17736 +othermonth 0 1 6.957497 0.000000 17737 +intosubmiss 0 1 6.957497 0.000000 17738 +andstar 0 1 6.957497 0.000000 17739 +fordav 0 1 6.957497 0.000000 17740 +wiss 0 1 6.957497 0.000000 17741 +themadison 0 1 6.957497 0.000000 17742 +campusof 0 1 6.957497 0.000000 17743 +peninsula 0 1 6.957497 0.000000 17744 +technicalexpertis 0 1 6.957497 0.000000 17745 +newsystem 0 1 6.957497 0.000000 17746 +reviv 0 1 6.957497 0.000000 17747 +oddbal 0 1 6.957497 0.000000 17748 +tasksar 0 1 6.957497 0.000000 17749 +serverbut 0 1 6.957497 0.000000 17750 +mostlyempti 0 1 6.957497 0.000000 17751 +activitiesuwvaxi 0 1 6.957497 0.000000 17752 +svolunt 0 1 6.957497 0.000000 17753 +organizationsi 0 1 6.957497 0.000000 17754 +oftenhav 0 1 6.957497 0.000000 17755 +usersof 0 1 6.957497 0.000000 17756 +aopa 0 1 6.957497 0.000000 17757 +blitz 0 1 6.957497 0.000000 17758 +drinkingwhen 0 1 6.957497 0.000000 17759 +friendsand 0 1 6.957497 0.000000 17760 +loftili 0 1 6.957497 0.000000 17761 +labelledblitz 0 1 6.957497 0.000000 17762 +ofoctoberfest 0 1 6.957497 0.000000 17763 +chud 0 1 6.957497 0.000000 17764 +accumulateda 0 1 6.957497 0.000000 17765 +whatnotof 0 1 6.957497 0.000000 17766 +charad 0 1 6.957497 0.000000 17767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..35ce1068 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +us 0 329 1.098612 0.000000 16 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +oper 1 180 1.609438 1.609438 34 +network 0 168 1.791759 0.000000 61 +model 0 145 1.945910 0.000000 69 +welcom 0 122 2.079442 0.000000 99 +check 0 115 2.197225 0.000000 118 +world 0 115 2.197225 0.000000 126 +advanc 0 99 2.302585 0.000000 130 +search 1 95 2.397895 2.397895 155 +homepag 0 93 2.397895 0.000000 148 +would 0 67 2.708050 0.000000 251 +much 0 52 2.995732 0.000000 349 +possibl 0 47 3.091042 0.000000 378 +probabl 0 40 3.258097 0.000000 455 +intro 0 17 4.110874 0.000000 915 +brad 1 12 4.465908 4.465908 1264 +bore 0 7 5.010635 0.000000 1948 +athlet 0 7 5.010635 0.000000 1933 +altavista 0 6 5.164786 0.000000 2222 +aim 0 5 5.347108 0.000000 2477 +jazz 0 5 5.347108 0.000000 2527 +thayer 0 3 5.857933 0.000000 3441 +thec 0 3 5.857933 0.000000 3132 +badger 0 3 5.857933 0.000000 3502 +packer 0 3 5.857933 0.000000 3728 +foolish 0 2 6.263398 0.000000 6108 +pepper 0 2 6.263398 0.000000 6013 +pagesom 0 2 6.263398 0.000000 6109 +adress 0 2 6.263398 0.000000 5168 +pageuw 0 2 6.263398 0.000000 6021 +neglect 0 1 6.957497 0.000000 17768 +seminaranywai 0 1 6.957497 0.000000 17769 +beaucoup 0 1 6.957497 0.000000 17770 +boir 0 1 6.957497 0.000000 17771 +enginefind 0 1 6.957497 0.000000 17772 +wideth 0 1 6.957497 0.000000 17773 +duan 0 1 6.957497 0.000000 17774 +mclaughlin 0 1 6.957497 0.000000 17775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..eb0ddd4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +austin 0 168 1.791759 0.000000 63 +implement 0 152 1.791759 0.000000 52 +architectur 0 139 1.945910 0.000000 77 +file 0 132 1.945910 0.000000 70 +dayton 0 119 2.079442 0.000000 104 +intern 1 108 2.197225 2.197225 128 +access 0 102 2.302585 0.000000 136 +west 0 83 2.484907 0.000000 192 +symposium 1 72 2.639057 2.639057 238 +effici 0 73 2.639057 0.000000 233 +septemb 0 65 2.772589 0.000000 274 +pointer 0 48 3.044522 0.000000 368 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +arrai 0 27 3.637586 0.000000 627 +detect 0 26 3.688879 0.000000 646 +scott 1 18 4.060443 4.060443 884 +regist 0 17 4.110874 0.000000 938 +mellon 0 13 4.382027 0.000000 1179 +carnegi 0 12 4.465908 0.000000 1260 +multiscalar 0 8 4.875197 0.000000 1783 +sohi 0 6 5.164786 0.000000 2237 +microarchitectur 0 6 5.164786 0.000000 2238 +pagescott 0 4 5.568345 0.000000 2978 +breach 2 3 5.857933 11.715866 4009 +recreat 0 3 5.857933 0.000000 3990 +anatomi 0 3 5.857933 0.000000 4010 +vijaykumar 0 3 5.857933 0.000000 4011 +gurindar 1 2 6.263398 6.263398 6110 +usatel 0 2 6.263398 0.000000 6111 +educationph 0 2 6.263398 0.000000 6112 +interestscomput 0 2 6.263398 0.000000 6113 +addresseseducationresearch 0 1 6.957497 0.000000 17776 +associatesaddressesscott 0 1 6.957497 0.000000 17777 +breachdepart 0 1 6.957497 0.000000 17778 +advisorguri 0 1 6.957497 0.000000 17779 +sohiresearch 0 1 6.957497 0.000000 17780 +architecturemultiscalarpublicationsmultiscalar 0 1 6.957497 0.000000 17781 +processorsgurindar 0 1 6.957497 0.000000 17782 +vijaykumarnd 0 1 6.957497 0.000000 17783 +processorscott 0 1 6.957497 0.000000 17784 +sohith 0 1 6.957497 0.000000 17785 +errorstodd 0 1 6.957497 0.000000 17786 +sohiconfer 0 1 6.957497 0.000000 17787 +recreationwingsbeersquidtvassociatestodd 0 1 6.957497 0.000000 17788 +austindoug 0 1 6.957497 0.000000 17789 +burgerbabak 0 1 6.957497 0.000000 17790 +falsafialain 0 1 6.957497 0.000000 17791 +kagit 0 1 6.957497 0.000000 17792 +vijaykumarlast 0 1 6.957497 0.000000 17793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..9090d848 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +fall 1 181 1.609438 1.609438 40 +provid 0 121 2.079442 0.000000 94 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +make 0 111 2.197225 0.000000 120 +sinc 1 90 2.397895 2.397895 159 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +name 1 72 2.639057 2.639057 220 +differ 0 66 2.708050 0.000000 253 +get 0 46 3.091042 0.000000 380 +societi 0 40 3.258097 0.000000 456 +hand 0 37 3.332205 0.000000 475 +bookmark 0 26 3.688879 0.000000 639 +notic 0 25 3.737670 0.000000 675 +brief 0 16 4.174387 0.000000 1001 +hobbi 0 16 4.174387 0.000000 1009 +zhang 0 16 4.174387 0.000000 980 +becam 0 14 4.317488 0.000000 1117 +unfortun 0 13 4.382027 0.000000 1170 +stai 0 12 4.465908 0.000000 1215 +said 0 9 4.753590 0.000000 1571 +poor 0 8 4.875197 0.000000 1736 +perhap 0 8 4.875197 0.000000 1693 +smooth 0 7 5.010635 0.000000 1855 +whenev 0 7 5.010635 0.000000 1883 +wouldn 0 7 5.010635 0.000000 1970 +smile 0 7 5.010635 0.000000 1807 +pool 0 6 5.164786 0.000000 2225 +suni 1 5 5.347108 5.347108 2452 +skin 0 4 5.568345 0.000000 2840 +temporarili 0 3 5.857933 0.000000 3692 +crack 0 3 5.857933 0.000000 3435 +roll 0 3 5.857933 0.000000 3723 +isaac 0 3 5.857933 0.000000 3855 +albani 1 2 6.263398 6.263398 4892 +tragic 0 2 6.263398 0.000000 6114 +theblack 0 2 6.263398 0.000000 5869 +eggleston 0 2 6.263398 0.000000 4581 +bleed 0 1 6.957497 0.000000 17794 +nontrivi 0 1 6.957497 0.000000 17795 +waysher 0 1 6.957497 0.000000 17796 +underst 0 1 6.957497 0.000000 17797 +unadorn 0 1 6.957497 0.000000 17798 +pizza 0 1 6.957497 0.000000 17799 +stinkin 0 1 6.957497 0.000000 17800 +myclass 0 1 6.957497 0.000000 17801 +hypersensit 0 1 6.957497 0.000000 17802 +rockjock 0 1 6.957497 0.000000 17803 +cretin 0 1 6.957497 0.000000 17804 +brood 0 1 6.957497 0.000000 17805 +glare 0 1 6.957497 0.000000 17806 +clenchesfist 0 1 6.957497 0.000000 17807 +knuckl 0 1 6.957497 0.000000 17808 +flightyfemm 0 1 6.957497 0.000000 17809 +razz 0 1 6.957497 0.000000 17810 +asskick 0 1 6.957497 0.000000 17811 +thirdgrad 0 1 6.957497 0.000000 17812 +hardbodi 0 1 6.957497 0.000000 17813 +leatherboi 0 1 6.957497 0.000000 17814 +leer 0 1 6.957497 0.000000 17815 +atm 0 1 6.957497 0.000000 17816 +todayi 0 1 6.957497 0.000000 17817 +giggl 0 1 6.957497 0.000000 17818 +aprostitut 0 1 6.957497 0.000000 17819 +bigotri 0 1 6.957497 0.000000 17820 +pedagodi 0 1 6.957497 0.000000 17821 +goat 0 1 6.957497 0.000000 17822 +refus 0 1 6.957497 0.000000 17823 +claw 0 1 6.957497 0.000000 17824 +sssuuuhhh 0 1 6.957497 0.000000 17825 +mmuuuhhhh 0 1 6.957497 0.000000 17826 +dddduuuuuhhhhh 0 1 6.957497 0.000000 17827 +mmmmuuuhhhh 0 1 6.957497 0.000000 17828 +maaaahhhjaaaaaahhhhh 0 1 6.957497 0.000000 17829 +fffuuuhhhhh 0 1 6.957497 0.000000 17830 +yyyyyyyuuuuuhhhhh 0 1 6.957497 0.000000 17831 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 0 1 6.957497 0.000000 17832 +uuuhhh 0 1 6.957497 0.000000 17833 +uuummmm 0 1 6.957497 0.000000 17834 +uuuhhhh 0 1 6.957497 0.000000 17835 +wwwwwhhhhuuuuuhhhhh 0 1 6.957497 0.000000 17836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..d38e9c66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,155 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +paper 0 205 1.609438 0.000000 38 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +applic 2 170 1.791759 3.583518 56 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +algorithm 0 162 1.791759 0.000000 57 +file 2 132 1.945910 3.891820 70 +perform 1 143 1.945910 1.945910 74 +professor 0 137 1.945910 0.000000 76 +first 0 140 1.945910 0.000000 71 +architectur 0 139 1.945910 0.000000 77 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +schedul 0 119 2.079442 0.000000 85 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +specif 0 106 2.197225 0.000000 106 +techniqu 1 99 2.302585 2.302585 138 +memori 0 101 2.302585 0.000000 139 +advanc 0 99 2.302585 0.000000 130 +access 0 102 2.302585 0.000000 136 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +present 0 91 2.397895 0.000000 145 +mani 0 92 2.397895 0.000000 150 +control 1 82 2.484907 2.484907 164 +educ 1 86 2.484907 2.484907 191 +resourc 1 81 2.484907 2.484907 172 +west 0 83 2.484907 0.000000 192 +novemb 0 81 2.484907 0.000000 179 +optim 0 79 2.564949 0.000000 197 +appear 0 78 2.564949 0.000000 210 +good 0 77 2.564949 0.000000 200 +summari 0 73 2.639057 0.000000 237 +symposium 0 72 2.639057 0.000000 238 +addit 0 74 2.639057 0.000000 228 +integr 1 67 2.708050 2.708050 245 +simul 0 66 2.708050 0.000000 255 +polici 1 64 2.772589 2.772589 279 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +thesi 0 57 2.890372 0.000000 327 +summer 0 56 2.890372 0.000000 311 +talk 0 53 2.944439 0.000000 336 +investig 1 51 2.995732 2.995732 353 +particular 0 51 2.995732 0.000000 352 +physic 1 47 3.091042 3.091042 377 +cach 2 41 3.218876 6.437752 432 +streetmadison 0 38 3.295837 0.000000 474 +slide 0 38 3.295837 0.000000 467 +prototyp 0 38 3.295837 0.000000 463 +sciencesunivers 0 37 3.332205 0.000000 486 +china 0 37 3.332205 0.000000 487 +respons 0 37 3.332205 0.000000 476 +tech 1 35 3.401197 3.401197 515 +global 0 34 3.401197 0.000000 520 +john 0 33 3.433987 0.000000 532 +extend 0 32 3.465736 0.000000 539 +storag 0 31 3.496508 0.000000 553 +focus 0 29 3.583519 0.000000 584 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +disk 1 22 3.850148 3.850148 747 +alloc 1 20 3.951244 3.951244 821 +kernel 1 20 3.951244 3.951244 825 +department 0 20 3.951244 0.000000 839 +beij 0 19 4.007333 0.000000 876 +princeton 1 15 4.248495 4.248495 1042 +eduphon 0 15 4.248495 0.000000 1060 +decid 0 14 4.317488 0.000000 1075 +karlin 1 13 4.382027 4.382027 1176 +sigmetr 1 13 4.382027 4.382027 1173 +tsinghua 0 13 4.382027 0.000000 1195 +anna 1 12 4.465908 4.465908 1292 +usenix 0 12 4.465908 0.000000 1240 +isca 0 11 4.553877 0.000000 1354 +osdi 1 9 4.753590 4.753590 1534 +significantli 0 9 4.753590 0.000000 1508 +uniprocessor 1 8 4.875197 4.875197 1696 +replac 1 8 4.875197 4.875197 1668 +prefetch 2 6 5.164786 10.329572 2039 +edward 1 6 5.164786 5.164786 2050 +sciencedepart 0 6 5.164786 0.000000 2172 +carefulli 0 6 5.164786 0.000000 2045 +chosen 0 6 5.164786 0.000000 1984 +toc 0 5 5.347108 0.000000 2562 +summarymi 0 5 5.347108 0.000000 2580 +havedevelop 0 4 5.568345 0.000000 2681 +felten 1 3 5.857933 5.857933 3925 +cachingtraci 0 3 5.857933 0.000000 3923 +kimbrel 0 3 5.857933 0.000000 3924 +shorter 0 3 5.857933 0.000000 3998 +raid 0 3 5.857933 0.000000 4012 +aggress 0 3 5.857933 0.000000 3240 +wilk 0 2 6.263398 0.000000 4548 +andpostscript 0 2 6.263398 0.000000 5696 +andparallel 0 2 6.263398 0.000000 6014 +princetonunivers 1 1 6.957497 6.957497 17837 +usacao 0 1 6.957497 0.000000 17838 +cachingacf 0 1 6.957497 0.000000 17839 +tracesrec 0 1 6.957497 0.000000 17840 +papersintegr 0 1 6.957497 0.000000 17841 +schedulingpei 0 1 6.957497 0.000000 17842 +strategiespei 0 1 6.957497 0.000000 17843 +peform 0 1 6.957497 0.000000 17844 +tickertaip 0 1 6.957497 0.000000 17845 +swee 0 1 6.957497 0.000000 17846 +boon 0 1 6.957497 0.000000 17847 +shivakumar 0 1 6.957497 0.000000 17848 +venkataraman 0 1 6.957497 0.000000 17849 +talksslid 0 1 6.957497 0.000000 17850 +improvefil 0 1 6.957497 0.000000 17851 +filecach 0 1 6.957497 0.000000 17852 +individualappl 0 1 6.957497 0.000000 17853 +useit 0 1 6.957497 0.000000 17854 +fairglob 0 1 6.957497 0.000000 17855 +cachereplac 0 1 6.957497 0.000000 17856 +implementationon 0 1 6.957497 0.000000 17857 +demonstratedthat 0 1 6.957497 0.000000 17858 +informationcan 0 1 6.957497 0.000000 17859 +amdevelop 0 1 6.957497 0.000000 17860 +diskarrai 0 1 6.957497 0.000000 17861 +managementproblem 0 1 6.957497 0.000000 17862 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..f7be9154 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +includ 0 208 1.609438 0.000000 42 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +data 2 170 1.791759 3.583518 49 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +austin 0 168 1.791759 0.000000 63 +algorithm 0 162 1.791759 0.000000 57 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +hall 0 146 1.945910 0.000000 65 +model 0 145 1.945910 0.000000 69 +databas 2 122 2.079442 4.158884 86 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +manag 1 114 2.197225 2.197225 125 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +techniqu 0 99 2.302585 0.000000 138 +user 0 104 2.302585 0.000000 137 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +memori 0 101 2.302585 0.000000 139 +center 1 88 2.397895 2.397895 158 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +build 1 85 2.484907 2.484907 184 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +environ 0 84 2.484907 0.000000 177 +academ 0 82 2.484907 0.000000 178 +ieee 0 86 2.484907 0.000000 190 +larg 0 82 2.484907 0.000000 168 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +come 0 78 2.564949 0.000000 202 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +involv 1 71 2.639057 2.639057 227 +workshop 1 71 2.639057 2.639057 239 +appli 0 71 2.639057 0.000000 226 +meet 0 72 2.639057 0.000000 229 +goal 1 66 2.708050 2.708050 250 +multimedia 1 68 2.708050 2.708050 258 +main 0 67 2.708050 0.000000 256 +experi 1 64 2.772589 2.772589 283 +evalu 1 64 2.772589 2.772589 266 +complex 1 64 2.772589 2.772589 269 +septemb 1 65 2.772589 2.772589 274 +street 0 63 2.772589 0.000000 293 +march 1 61 2.833213 2.833213 295 +share 1 59 2.833213 2.833213 304 +best 0 59 2.833213 0.000000 299 +locat 0 59 2.833213 0.000000 303 +content 0 59 2.833213 0.000000 302 +unix 0 58 2.890372 0.000000 308 +browser 0 56 2.890372 0.000000 313 +index 0 56 2.890372 0.000000 309 +publish 0 57 2.890372 0.000000 326 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +move 0 47 3.091042 0.000000 382 +past 0 42 3.218876 0.000000 428 +autom 0 41 3.218876 0.000000 434 +cach 0 41 3.218876 0.000000 432 +transact 0 39 3.258097 0.000000 438 +live 0 40 3.258097 0.000000 451 +multipl 0 39 3.258097 0.000000 453 +join 0 39 3.258097 0.000000 457 +industri 0 38 3.295837 0.000000 464 +staff 0 36 3.367296 0.000000 490 +multi 0 36 3.367296 0.000000 493 +next 1 34 3.401197 3.401197 517 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +storag 0 31 3.496508 0.000000 553 +specifi 0 30 3.555348 0.000000 568 +focus 0 29 3.583519 0.000000 584 +becom 0 28 3.610918 0.000000 603 +measur 0 28 3.610918 0.000000 609 +manipul 0 27 3.637586 0.000000 624 +though 0 27 3.637586 0.000000 622 +proc 2 26 3.688879 7.377758 649 +effort 1 26 3.688879 3.688879 652 +altern 1 26 3.688879 3.688879 641 +challeng 0 26 3.688879 0.000000 653 +spent 1 25 3.737670 3.737670 676 +toward 1 25 3.737670 3.737670 668 +primari 0 25 3.737670 0.000000 669 +accur 0 25 3.737670 0.000000 680 +client 0 25 3.737670 0.000000 679 +mike 0 24 3.761200 0.000000 703 +scalabl 0 24 3.761200 0.000000 705 +known 0 24 3.761200 0.000000 702 +initi 0 23 3.806662 0.000000 717 +william 1 22 3.850148 3.850148 765 +varieti 0 22 3.850148 0.000000 740 +leav 0 21 3.912023 0.000000 772 +fine 0 20 3.951244 0.000000 822 +sigmod 1 19 4.007333 4.007333 877 +benchmark 0 19 4.007333 0.000000 859 +thoma 1 18 4.060443 4.060443 901 +statu 0 18 4.060443 0.000000 885 +repositori 1 17 4.110874 4.110874 932 +white 0 17 4.110874 0.000000 951 +jose 0 16 4.174387 0.000000 976 +upon 0 16 4.174387 0.000000 978 +taiwan 0 16 4.174387 0.000000 1006 +brown 0 16 4.174387 0.000000 977 +cambridg 0 16 4.174387 0.000000 1008 +livni 1 15 4.248495 4.248495 1053 +hybrid 0 15 4.248495 0.000000 1057 +heterogen 1 14 4.317488 4.317488 1090 +conf 2 13 4.382027 8.764054 1181 +dbm 0 13 4.382027 0.000000 1136 +forth 0 13 4.382027 0.000000 1186 +signific 0 13 4.382027 0.000000 1125 +front 0 13 4.382027 0.000000 1154 +sigmetr 0 13 4.382027 0.000000 1173 +workload 1 12 4.465908 4.465908 1210 +dewitt 1 12 4.465908 4.465908 1270 +tune 0 12 4.465908 0.000000 1227 +franc 0 12 4.465908 0.000000 1276 +shore 1 11 4.553877 4.553877 1377 +persist 1 11 4.553877 4.553877 1367 +road 0 11 4.553877 0.000000 1374 +franklin 1 10 4.653960 4.653960 1436 +naughton 1 10 4.653960 4.653960 1450 +resid 0 10 4.653960 0.000000 1461 +grain 0 10 4.653960 0.000000 1448 +rel 0 9 4.753590 0.000000 1487 +vernon 0 9 4.753590 0.000000 1556 +lock 0 9 4.753590 0.000000 1551 +morgan 0 9 4.753590 0.000000 1484 +carei 1 8 4.875197 4.875197 1781 +replac 0 8 4.875197 0.000000 1668 +ride 0 8 4.875197 0.000000 1741 +solomon 0 8 4.875197 0.000000 1716 +hash 0 8 4.875197 0.000000 1618 +portland 0 7 5.010635 0.000000 1878 +eduresearch 0 6 5.164786 0.000000 2205 +whichi 0 6 5.164786 0.000000 2056 +academia 0 6 5.164786 0.000000 2036 +tobe 0 6 5.164786 0.000000 1995 +oopsla 0 6 5.164786 0.000000 2221 +srinivasan 0 6 5.164786 0.000000 2175 +patel 0 6 5.164786 0.000000 2154 +almaden 1 5 5.347108 5.347108 2511 +minneapoli 1 5 5.347108 5.347108 2480 +fraction 0 5 5.347108 0.000000 2259 +ifip 0 5 5.347108 0.000000 2459 +tsatalo 0 5 5.347108 0.000000 2581 +england 0 5 5.347108 0.000000 2557 +kaufmann 0 5 5.347108 0.000000 2254 +harri 0 4 5.568345 0.000000 3034 +exodu 0 4 5.568345 0.000000 3075 +sabbat 0 4 5.568345 0.000000 2824 +taipei 0 4 5.568345 0.000000 2926 +chile 0 4 5.568345 0.000000 3082 +mcauliff 0 4 5.568345 0.000000 3083 +zwill 0 4 5.568345 0.000000 3076 +nashvil 0 4 5.568345 0.000000 2867 +andp 0 4 5.568345 0.000000 2811 +twelv 1 3 5.857933 5.857933 3899 +codi 1 3 5.857933 5.857933 3940 +schwarz 1 3 5.857933 5.857933 3986 +andm 1 3 5.857933 5.857933 3901 +tradeoff 0 3 5.857933 0.000000 3387 +forobject 0 3 5.857933 0.000000 3965 +oodb 0 3 5.857933 0.000000 3954 +switzerland 0 3 5.857933 0.000000 3551 +santiago 0 3 5.857933 0.000000 4013 +schuh 0 3 5.857933 0.000000 4014 +pang 0 3 5.857933 0.000000 3509 +revisit 0 3 5.857933 0.000000 3915 +haa 1 2 6.263398 6.263398 6115 +niblack 1 2 6.263398 6.263398 4364 +oodbm 1 2 6.263398 6.263398 6083 +interestsdatabas 0 2 6.263398 0.000000 6116 +theexodu 0 2 6.263398 0.000000 6076 +aimedat 0 2 6.263398 0.000000 6117 +researchgroup 0 2 6.263398 0.000000 5588 +homogen 0 2 6.263398 0.000000 4821 +roth 0 2 6.263398 0.000000 6089 +lausann 0 2 6.263398 0.000000 4955 +and 0 2 6.263398 0.000000 5241 +dataengin 0 2 6.263398 0.000000 6118 +zaharioudaki 0 2 6.263398 0.000000 6119 +modelingof 0 2 6.263398 0.000000 5734 +indistribut 0 2 6.263398 0.000000 4257 +garlic 1 1 6.957497 6.957497 17863 +arya 1 1 6.957497 6.957497 17864 +fagin 1 1 6.957497 6.957497 17865 +flickner 1 1 6.957497 6.957497 17866 +petkov 1 1 6.957497 6.957497 17867 +wimmer 1 1 6.957497 6.957497 17868 +careymichael 0 1 6.957497 0.000000 17869 +careyprofessor 0 1 6.957497 0.000000 17870 +performanceand 0 1 6.957497 0.000000 17871 +topicsof 0 1 6.957497 0.000000 17872 +algorithmsrel 0 1 6.957497 0.000000 17873 +userdatabas 0 1 6.957497 0.000000 17874 +persistentobject 0 1 6.957497 0.000000 17875 +objectmanag 0 1 6.957497 0.000000 17876 +applicationssuch 0 1 6.957497 0.000000 17877 +greatyear 0 1 6.957497 0.000000 17878 +tackl 0 1 6.957497 0.000000 17879 +anddiffer 0 1 6.957497 0.000000 17880 +thesourc 0 1 6.957497 0.000000 17881 +projectther 0 1 6.957497 0.000000 17882 +multimediainform 0 1 6.957497 0.000000 17883 +objectdatabas 0 1 6.957497 0.000000 17884 +continuedto 0 1 6.957497 0.000000 17885 +aqueri 0 1 6.957497 0.000000 17886 +pesto 0 1 6.957497 0.000000 17887 +thegarl 0 1 6.957497 0.000000 17888 +kiernan 0 1 6.957497 0.000000 17889 +orientedprogram 0 1 6.957497 0.000000 17890 +tork 0 1 6.957497 0.000000 17891 +visualdatabas 0 1 6.957497 0.000000 17892 +garlicapproach 0 1 6.957497 0.000000 17893 +luniewski 0 1 6.957497 0.000000 17894 +withd 0 1 6.957497 0.000000 17895 +kant 0 1 6.957497 0.000000 17896 +onobject 0 1 6.957497 0.000000 17897 +mehta 0 1 6.957497 0.000000 17898 +thint 0 1 6.957497 0.000000 17899 +smrc 0 1 6.957497 0.000000 17900 +withb 0 1 6.957497 0.000000 17901 +reinwald 0 1 6.957497 0.000000 17902 +desslock 0 1 6.957497 0.000000 17903 +lehman 0 1 6.957497 0.000000 17904 +pirahesh 0 1 6.957497 0.000000 17905 +tarascon 0 1 6.957497 0.000000 17906 +provenc 0 1 6.957497 0.000000 17907 +sigmodint 0 1 6.957497 0.000000 17908 +managementof 0 1 6.957497 0.000000 17909 +multivers 0 1 6.957497 0.000000 17910 +bober 0 1 6.957497 0.000000 17911 +oszu 0 1 6.957497 0.000000 17912 +dayal 0 1 6.957497 0.000000 17913 +valduriez 0 1 6.957497 0.000000 17914 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..5dcc3b59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 0 170 1.791759 0.000000 49 +hour 0 165 1.791759 0.000000 46 +assign 0 135 1.945910 0.000000 66 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +structur 0 106 2.197225 0.000000 105 +west 0 83 2.484907 0.000000 192 +mondai 0 77 2.564949 0.000000 206 +tuesdai 0 73 2.639057 0.000000 219 +fridai 0 44 3.135494 0.000000 390 +streetmadison 0 38 3.295837 0.000000 474 +edutelephon 0 10 4.653960 0.000000 1473 +chin 1 5 5.347108 5.347108 2408 +tang 0 5 5.347108 0.000000 2409 +bldg 0 4 5.568345 0.000000 2983 +biochemistri 1 3 5.857933 5.857933 3513 +cchin 1 2 6.263398 6.263398 4691 +pagechin 0 1 6.957497 0.000000 17915 +tanggradu 0 1 6.957497 0.000000 17916 +ameduc 0 1 6.957497 0.000000 17917 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..fdddf34f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +book 0 99 2.302585 0.000000 131 +real 0 93 2.397895 0.000000 144 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +internet 0 83 2.484907 0.000000 186 +come 0 78 2.564949 0.000000 202 +summari 0 73 2.639057 0.000000 237 +york 0 41 3.218876 0.000000 435 +movi 0 40 3.258097 0.000000 459 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +soon 1 36 3.367296 3.367296 494 +miscellan 0 23 3.806662 0.000000 731 +log 0 19 4.007333 0.000000 857 +usaphon 0 9 4.753590 0.000000 1600 +assistantdepart 0 8 4.875197 0.000000 1784 +cultur 0 7 5.010635 0.000000 1951 +chandra 1 6 5.164786 5.164786 2091 +altavista 0 6 5.164786 0.000000 2222 +satish 1 4 5.568345 5.568345 2833 +wodehous 0 2 6.263398 0.000000 4990 +italian 0 2 6.263398 0.000000 5993 +nostalgia 0 1 6.957497 0.000000 17918 +linksclick 0 1 6.957497 0.000000 17919 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..61466b09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +mail 0 238 1.386294 0.000000 22 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +process 0 142 1.945910 0.000000 72 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +mathemat 0 108 2.197225 0.000000 123 +memori 1 101 2.302585 2.302585 139 +real 0 93 2.397895 0.000000 144 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +state 1 76 2.564949 2.564949 207 +messag 0 76 2.564949 0.000000 212 +appear 0 78 2.564949 0.000000 210 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +integr 0 67 2.708050 0.000000 245 +august 0 66 2.708050 0.000000 257 +share 1 59 2.833213 2.833213 304 +automat 0 61 2.833213 0.000000 306 +suggest 0 53 2.944439 0.000000 331 +visual 1 48 3.044522 3.044522 372 +physic 0 47 3.091042 0.000000 377 +electron 0 47 3.091042 0.000000 379 +examin 1 42 3.218876 3.218876 424 +movi 0 40 3.258097 0.000000 459 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 1 35 3.401197 3.401197 507 +tech 0 35 3.401197 0.000000 515 +award 0 34 3.401197 0.000000 523 +curriculum 0 33 3.433987 0.000000 535 +pass 0 28 3.610918 0.000000 611 +enhanc 0 26 3.688879 0.000000 644 +supercomput 0 25 3.737670 0.000000 681 +honor 0 23 3.806662 0.000000 729 +miscellan 0 23 3.806662 0.000000 731 +indian 1 22 3.850148 3.850148 769 +annot 0 21 3.912023 0.000000 775 +wind 0 18 4.060443 0.000000 908 +thoma 0 18 4.060443 0.000000 901 +stephen 0 11 4.553877 0.000000 1342 +laru 1 9 4.753590 4.753590 1560 +ball 0 9 4.753590 0.000000 1608 +assistantdepart 0 8 4.875197 0.000000 1784 +insert 0 8 4.875197 0.000000 1687 +gold 0 8 4.875197 0.000000 1745 +bombai 0 7 5.010635 0.000000 1972 +dream 0 6 5.164786 0.000000 2165 +merit 1 5 5.347108 5.347108 2466 +icpp 0 5 5.347108 0.000000 2382 +chilimbi 1 3 5.857933 5.857933 4015 +trishul 1 3 5.857933 5.857933 4016 +usaadvisor 0 3 5.857933 0.000000 4017 +certif 0 3 5.857933 0.000000 3859 +medal 0 3 5.857933 0.000000 3912 +cico 0 2 6.263398 0.000000 6120 +eick 0 2 6.263398 0.000000 5971 +megradu 0 1 6.957497 0.000000 17920 +designresearch 0 1 6.957497 0.000000 17921 +tunneleduc 0 1 6.957497 0.000000 17922 +publicationscachi 0 1 6.957497 0.000000 17923 +stormwatch 0 1 6.957497 0.000000 17924 +protocolstrishul 0 1 6.957497 0.000000 17925 +olympiadpresid 0 1 6.957497 0.000000 17926 +examinationcertif 0 1 6.957497 0.000000 17927 +chemistrycertif 0 1 6.957497 0.000000 17928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..0d783417 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +june 0 79 2.564949 0.000000 214 +septemb 0 65 2.772589 0.000000 274 +mark 0 44 3.135494 0.000000 403 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +hill 0 25 3.737670 0.000000 670 +indian 0 22 3.850148 0.000000 769 +madra 0 8 4.875197 0.000000 1770 +univeristi 0 8 4.875197 0.000000 1754 +studentdepart 0 5 5.347108 0.000000 2505 +usaadvisor 0 3 5.857933 0.000000 4017 +chandrasekaran 1 2 6.263398 6.263398 6121 +sashikanth 1 2 6.263398 6.263398 6122 +btech 0 2 6.263398 0.000000 6123 +csashi 0 1 6.957497 0.000000 17929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..1682d485 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +wisc 1 242 1.386294 1.386294 33 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +develop 0 174 1.791759 0.000000 53 +network 0 168 1.791759 0.000000 61 +applic 0 170 1.791759 0.000000 56 +implement 0 152 1.791759 0.000000 52 +relat 1 139 1.945910 1.945910 68 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +perform 0 143 1.945910 0.000000 74 +databas 1 122 2.079442 2.079442 86 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +manag 0 114 2.197225 0.000000 125 +world 0 115 2.197225 0.000000 126 +text 0 98 2.302585 0.000000 133 +search 0 95 2.397895 0.000000 155 +center 0 88 2.397895 0.000000 158 +librari 0 87 2.484907 0.000000 181 +internet 0 83 2.484907 0.000000 186 +free 0 73 2.639057 0.000000 224 +onlin 0 75 2.639057 0.000000 223 +nation 0 74 2.639057 0.000000 240 +java 0 70 2.708050 0.000000 248 +previou 0 62 2.772589 0.000000 290 +organ 0 65 2.772589 0.000000 265 +locat 0 59 2.833213 0.000000 303 +index 0 56 2.890372 0.000000 309 +standard 1 48 3.044522 3.044522 365 +life 0 50 3.044522 0.000000 375 +approach 0 48 3.044522 0.000000 366 +transact 0 39 3.258097 0.000000 438 +map 0 39 3.258097 0.000000 452 +prototyp 0 38 3.295837 0.000000 463 +microsoft 0 38 3.295837 0.000000 468 +open 0 38 3.295837 0.000000 469 +sciencesunivers 0 37 3.332205 0.000000 486 +global 0 34 3.401197 0.000000 520 +survei 0 35 3.401197 0.000000 513 +posit 0 31 3.496508 0.000000 552 +focu 0 30 3.555348 0.000000 571 +weather 0 28 3.610918 0.000000 618 +item 1 19 4.007333 4.007333 856 +white 0 17 4.110874 0.000000 951 +dilbert 0 16 4.174387 0.000000 996 +stock 0 16 4.174387 0.000000 1007 +track 0 15 4.248495 0.000000 1029 +doit 0 14 4.317488 0.000000 1111 +draft 0 14 4.317488 0.000000 1085 +land 0 12 4.465908 0.000000 1273 +shore 0 11 4.553877 0.000000 1377 +council 0 11 4.553877 0.000000 1364 +appl 0 11 4.553877 0.000000 1303 +market 0 11 4.553877 0.000000 1361 +consortium 0 10 4.653960 0.000000 1467 +govern 0 9 4.753590 0.000000 1581 +paradis 1 8 4.875197 4.875197 1782 +gopher 0 6 5.164786 0.000000 1982 +feder 0 5 5.347108 0.000000 2266 +wiscinfo 1 3 5.857933 5.857933 3106 +dienst 1 3 5.857933 5.857933 3640 +commerc 0 3 5.857933 0.000000 3209 +eosdi 1 2 6.263398 6.263398 6124 +nebraska 0 2 6.263398 0.000000 5574 +lincoln 0 2 6.263398 0.000000 5575 +metrowerk 0 2 6.263398 0.000000 4131 +worm 0 2 6.263398 0.000000 5775 +projectdepart 0 2 6.263398 0.000000 6125 +curt 1 1 6.957497 6.957497 17930 +ellmann 1 1 6.957497 6.957497 17931 +webgnat 0 1 6.957497 0.000000 17932 +defect 0 1 6.957497 0.000000 17933 +opengi 0 1 6.957497 0.000000 17934 +calmit 0 1 6.957497 0.000000 17935 +illustra 0 1 6.957497 0.000000 17936 +papersmiscellan 0 1 6.957497 0.000000 17937 +sitescampu 0 1 6.957497 0.000000 17938 +wyrm 0 1 6.957497 0.000000 17939 +hoard 0 1 6.957497 0.000000 17940 +wiscnet 0 1 6.957497 0.000000 17941 +netcorpor 0 1 6.957497 0.000000 17942 +paww 0 1 6.957497 0.000000 17943 +taligentsearch 0 1 6.957497 0.000000 17944 +savvi 0 1 6.957497 0.000000 17945 +webcrawl 0 1 6.957497 0.000000 17946 +winsock 0 1 6.957497 0.000000 17947 +geolog 0 1 6.957497 0.000000 17948 +gil 0 1 6.957497 0.000000 17949 +oakridg 0 1 6.957497 0.000000 17950 +datacurt 0 1 6.957497 0.000000 17951 +ellmanncurt 0 1 6.957497 0.000000 17952 +eduparadis 0 1 6.957497 0.000000 17953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..0978c0e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +yong 1 4 5.568345 5.568345 2809 +chee 1 3 5.857933 5.857933 3480 +cychan 0 2 6.263398 0.000000 4737 +pagechan 0 1 6.957497 0.000000 17954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..cb494f96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +engin 1 297 1.098612 1.098612 20 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +public 0 202 1.609438 0.000000 43 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +address 0 170 1.791759 0.000000 62 +schedul 0 119 2.079442 0.000000 85 +dayton 0 119 2.079442 0.000000 104 +mathemat 0 108 2.197225 0.000000 123 +educ 0 86 2.484907 0.000000 191 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +colleg 0 61 2.833213 0.000000 300 +friend 0 48 3.044522 0.000000 376 +physic 1 47 3.091042 3.091042 377 +math 0 44 3.135494 0.000000 402 +mechan 0 43 3.178054 0.000000 416 +statist 0 35 3.401197 0.000000 521 +lewi 0 8 4.875197 0.000000 1700 +nuclear 0 5 5.347108 0.000000 2576 +clark 0 4 5.568345 0.000000 2705 +mace 0 2 6.263398 0.000000 4849 +astronaut 0 2 6.263398 0.000000 5748 +sara 1 1 6.957497 6.957497 17955 +bauman 0 1 6.957497 0.000000 17956 +dailei 0 1 6.957497 0.000000 17957 +baumandailei 0 1 6.957497 0.000000 17958 +edugradu 0 1 6.957497 0.000000 17959 +pagessend 0 1 6.957497 0.000000 17960 +daileytu 0 1 6.957497 0.000000 17961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..fea18c10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,314 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 0 384 0.693147 0.000000 11 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +gener 0 220 1.386294 0.000000 27 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +architectur 2 139 1.945910 3.891820 77 +perform 2 143 1.945910 3.891820 74 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +area 0 144 1.945910 0.000000 80 +first 0 140 1.945910 0.000000 71 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +confer 0 126 2.079442 0.000000 100 +studi 0 120 2.079442 0.000000 91 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +compil 0 122 2.079442 0.000000 96 +intern 1 108 2.197225 2.197225 128 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +version 0 113 2.197225 0.000000 122 +manag 0 114 2.197225 0.000000 125 +memori 2 101 2.302585 4.605170 139 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +take 0 97 2.302585 0.000000 134 +proceed 1 93 2.397895 2.397895 152 +associ 0 93 2.397895 0.000000 151 +select 0 91 2.397895 0.000000 154 +octob 0 89 2.397895 0.000000 156 +follow 0 92 2.397895 0.000000 143 +call 0 91 2.397895 0.000000 153 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +activ 0 84 2.484907 0.000000 182 +novemb 0 81 2.484907 0.000000 179 +chang 0 82 2.484907 0.000000 163 +librari 0 87 2.484907 0.000000 181 +interfac 1 79 2.564949 2.564949 209 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +state 1 76 2.564949 2.564949 207 +dynam 0 76 2.564949 0.000000 194 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +decemb 0 80 2.564949 0.000000 215 +method 0 80 2.564949 0.000000 213 +david 2 71 2.639057 5.278114 232 +symposium 1 72 2.639057 2.639057 238 +line 0 75 2.639057 0.000000 231 +summari 0 73 2.639057 0.000000 237 +effici 0 73 2.639057 0.000000 233 +write 0 72 2.639057 0.000000 222 +simul 2 66 2.708050 5.416100 255 +integr 1 67 2.708050 2.708050 245 +august 0 66 2.708050 0.000000 257 +main 0 67 2.708050 0.000000 256 +goal 0 66 2.708050 0.000000 250 +evalu 1 64 2.772589 2.772589 266 +abstract 1 62 2.772589 2.772589 276 +new 1 64 2.772589 2.772589 262 +organ 0 65 2.772589 0.000000 265 +virtual 0 62 2.772589 0.000000 285 +result 0 65 2.772589 0.000000 281 +function 0 62 2.772589 0.000000 275 +share 2 59 2.833213 5.666426 304 +juli 1 60 2.833213 2.833213 305 +type 0 61 2.833213 0.000000 296 +major 0 56 2.890372 0.000000 315 +explor 0 58 2.890372 0.000000 324 +special 0 56 2.890372 0.000000 320 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +three 0 54 2.944439 0.000000 330 +found 0 53 2.944439 0.000000 337 +hardwar 1 51 2.995732 2.995732 350 +case 1 51 2.995732 2.995732 351 +run 0 51 2.995732 0.000000 347 +tabl 0 51 2.995732 0.000000 346 +investig 0 51 2.995732 0.000000 353 +frequent 0 49 3.044522 0.000000 367 +california 1 46 3.091042 3.091042 388 +effect 0 46 3.091042 0.000000 385 +understand 0 47 3.091042 0.000000 384 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +mechan 1 43 3.178054 3.178054 416 +cach 2 41 3.218876 6.437752 432 +fast 1 42 3.218876 3.218876 429 +combin 0 42 3.218876 0.000000 421 +programm 1 39 3.258097 3.258097 445 +transact 0 39 3.258097 0.000000 438 +electr 0 38 3.295837 0.000000 461 +streetmadison 0 38 3.295837 0.000000 474 +brian 0 38 3.295837 0.000000 466 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +origin 0 38 3.295837 0.000000 472 +sciencesunivers 0 37 3.332205 0.000000 486 +workstat 0 37 3.332205 0.000000 479 +cost 0 37 3.332205 0.000000 480 +especi 1 36 3.367296 3.367296 496 +multi 0 36 3.367296 0.000000 493 +procedur 0 36 3.367296 0.000000 488 +jame 1 35 3.401197 3.401197 507 +bibliographi 0 34 3.401197 0.000000 518 +fault 0 32 3.465736 0.000000 547 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +power 0 30 3.555348 0.000000 573 +robert 0 30 3.555348 0.000000 567 +profil 0 30 3.555348 0.000000 581 +common 0 30 3.555348 0.000000 574 +steve 0 29 3.583519 0.000000 594 +synchron 0 29 3.583519 0.000000 588 +depend 0 29 3.583519 0.000000 583 +platform 0 29 3.583519 0.000000 591 +multiprocessor 1 28 3.610918 3.610918 605 +pass 1 28 3.610918 3.610918 611 +propos 0 28 3.610918 0.000000 602 +manipul 0 27 3.637586 0.000000 624 +berkelei 1 26 3.688879 3.688879 657 +revis 0 26 3.688879 0.000000 640 +altern 0 26 3.688879 0.000000 641 +detect 0 26 3.688879 0.000000 646 +hill 1 25 3.737670 3.737670 670 +supercomput 1 25 3.737670 3.737670 681 +wai 0 25 3.737670 0.000000 662 +trace 0 25 3.737670 0.000000 677 +scalabl 0 24 3.761200 0.000000 705 +self 0 22 3.850148 0.000000 761 +reduc 0 22 3.850148 0.000000 759 +cooper 0 22 3.850148 0.000000 757 +vlsi 0 21 3.912023 0.000000 795 +annot 0 21 3.912023 0.000000 775 +fine 1 20 3.951244 3.951244 822 +department 0 20 3.951244 0.000000 839 +portabl 0 20 3.951244 0.000000 819 +exploit 0 20 3.951244 0.000000 836 +binari 0 20 3.951244 0.000000 823 +benchmark 0 19 4.007333 0.000000 859 +five 0 19 4.007333 0.000000 841 +wind 1 18 4.060443 4.060443 908 +steven 1 17 4.110874 4.110874 953 +interconnect 0 17 4.110874 0.000000 937 +asplo 0 17 4.110874 0.000000 948 +intel 0 16 4.174387 0.000000 1000 +advantag 0 16 4.174387 0.000000 987 +transfer 0 16 4.174387 0.000000 967 +upon 0 16 4.174387 0.000000 978 +overhead 1 15 4.248495 4.248495 1035 +eduphon 0 15 4.248495 0.000000 1060 +hybrid 0 15 4.248495 0.000000 1057 +remot 0 15 4.248495 0.000000 1041 +action 0 15 4.248495 0.000000 1038 +driven 0 15 4.248495 0.000000 1048 +coher 1 14 4.317488 4.317488 1109 +convent 0 14 4.317488 0.000000 1072 +block 1 13 4.382027 4.382027 1183 +tune 0 12 4.465908 0.000000 1227 +wood 2 11 4.553877 9.107754 1355 +isca 1 11 4.553877 4.553877 1354 +transpar 1 11 4.553877 4.553877 1325 +faster 0 11 4.553877 0.000000 1323 +grain 1 10 4.653960 4.653960 1448 +facilit 0 10 4.653960 0.000000 1412 +laru 1 9 4.753590 4.753590 1560 +tunnel 1 9 4.753590 4.753590 1615 +significantli 0 9 4.753590 0.000000 1508 +paradigm 1 8 4.875197 4.875197 1662 +secretari 0 8 4.875197 0.000000 1775 +uniprocessor 0 8 4.875197 0.000000 1696 +spec 0 8 4.875197 0.000000 1640 +burger 0 7 5.010635 0.000000 1889 +roger 0 7 5.010635 0.000000 1892 +hit 0 7 5.010635 0.000000 1965 +tag 0 7 5.010635 0.000000 1821 +instrument 0 7 5.010635 0.000000 1954 +duke 0 6 5.164786 0.000000 2231 +microsystem 0 6 5.164786 0.000000 2160 +ann 0 6 5.164786 0.000000 2065 +feasibl 0 6 5.164786 0.000000 2157 +byte 0 6 5.164786 0.000000 2108 +invok 0 6 5.164786 0.000000 2079 +lebeck 2 5 5.347108 10.694216 2582 +reinhardt 1 5 5.347108 5.347108 2583 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +tempest 1 5 5.347108 5.347108 2548 +ioanni 1 5 5.347108 5.347108 2553 +rewrit 1 5 5.347108 5.347108 2367 +scienceand 0 5 5.347108 0.000000 2348 +mukherje 0 5 5.347108 0.000000 2586 +accuraci 0 5 5.347108 0.000000 2450 +dougla 0 5 5.347108 0.000000 2471 +toc 0 5 5.347108 0.000000 2562 +computerarchitectur 0 5 5.347108 0.000000 2290 +focuss 0 5 5.347108 0.000000 2271 +hypothet 0 5 5.347108 0.000000 2474 +lookup 0 5 5.347108 0.000000 2399 +sparcstat 0 5 5.347108 0.000000 2406 +alvin 1 4 5.568345 5.568345 3084 +wart 1 4 5.568345 5.568345 2987 +hyder 1 4 5.568345 5.568345 2772 +schoina 1 4 5.568345 5.568345 3085 +engineeringdepart 0 4 5.568345 0.000000 2917 +decoupl 0 4 5.568345 0.000000 2898 +talluri 0 4 5.568345 0.000000 2820 +rapidli 0 4 5.568345 0.000000 2850 +myresearch 0 4 5.568345 0.000000 2842 +fulli 0 4 5.568345 0.000000 2986 +pfile 1 3 5.857933 5.857933 3100 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +programmingc 0 3 5.857933 0.000000 3232 +madhusudhan 0 3 5.857933 0.000000 4021 +bulk 0 3 5.857933 0.000000 4000 +anddavid 1 2 6.263398 6.263398 6126 +architecturec 1 2 6.263398 6.263398 6127 +invalid 1 2 6.263398 6.263398 5476 +callaghan 0 2 6.263398 0.000000 6128 +virtualmemori 0 2 6.263398 0.000000 4305 +anal 0 2 6.263398 0.000000 4834 +null 0 2 6.263398 0.000000 4714 +typhoon 1 1 6.957497 6.957497 17962 +usadavid 0 1 6.957497 0.000000 17963 +toonenrec 0 1 6.957497 0.000000 17964 +rahmat 0 1 6.957497 0.000000 17965 +alvi 0 1 6.957497 0.000000 17966 +informix 0 1 6.957497 0.000000 17967 +memorysteven 0 1 6.957497 0.000000 17968 +communicationshubhendu 0 1 6.957497 0.000000 17969 +costrahmat 0 1 6.957497 0.000000 17970 +multiprocessorsalvin 0 1 6.957497 0.000000 17971 +simulationalvin 0 1 6.957497 0.000000 17972 +sigmetricsmai 0 1 6.957497 0.000000 17973 +thrust 0 1 6.957497 0.000000 17974 +hybridprogram 0 1 6.957497 0.000000 17975 +similaritesof 0 1 6.957497 0.000000 17976 +calledtempest 0 1 6.957497 0.000000 17977 +handler 0 1 6.957497 0.000000 17978 +suppliedmechan 0 1 6.957497 0.000000 17979 +tempestmechan 0 1 6.957497 0.000000 17980 +novelmechan 0 1 6.957497 0.000000 17981 +tagblock 0 1 6.957497 0.000000 17982 +theloc 0 1 6.957497 0.000000 17983 +hardwareplatform 0 1 6.957497 0.000000 17984 +revers 0 1 6.957497 0.000000 17985 +translationt 0 1 6.957497 0.000000 17986 +rtlb 0 1 6.957497 0.000000 17987 +grainaccess 0 1 6.957497 0.000000 17988 +thata 0 1 6.957497 0.000000 17989 +performscompar 0 1 6.957497 0.000000 17990 +memoryprogram 0 1 6.957497 0.000000 17991 +thatoptim 0 1 6.957497 0.000000 17992 +reducingsimul 0 1 6.957497 0.000000 17993 +tightli 0 1 6.957497 0.000000 17994 +byprovid 0 1 6.957497 0.000000 17995 +referenceinvok 0 1 6.957497 0.000000 17996 +andmemori 0 1 6.957497 0.000000 17997 +processedbi 0 1 6.957497 0.000000 17998 +functionfor 0 1 6.957497 0.000000 17999 +usingbinari 0 1 6.957497 0.000000 18000 +memoryrefer 0 1 6.957497 0.000000 18001 +tothre 0 1 6.957497 0.000000 18002 +thatcal 0 1 6.957497 0.000000 18003 +onlythre 0 1 6.957497 0.000000 18004 +slower 0 1 6.957497 0.000000 18005 +techniquesto 0 1 6.957497 0.000000 18006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..29da9248 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +tool 0 117 2.079442 0.000000 93 +stuff 0 87 2.484907 0.000000 171 +school 0 84 2.484907 0.000000 188 +maintain 0 51 2.995732 0.000000 342 +grad 0 20 3.951244 0.000000 837 +wind 0 18 4.060443 0.000000 908 +doug 1 9 4.753590 4.753590 1517 +burger 1 7 5.010635 5.010635 1889 +galileo 0 4 5.568345 0.000000 3086 +damn 0 2 6.263398 0.000000 6129 +pageprofession 0 1 6.957497 0.000000 18007 +summaryresum 0 1 6.957497 0.000000 18008 +cvtranscriptcours 0 1 6.957497 0.000000 18009 +projectsadvisoraffili 0 1 6.957497 0.000000 18010 +sciwisconsin 0 1 6.957497 0.000000 18011 +tunnelpag 0 1 6.957497 0.000000 18012 +architectureuw 0 1 6.957497 0.000000 18013 +architecturesimplescalar 0 1 6.957497 0.000000 18014 +setgenericasacmperson 0 1 6.957497 0.000000 18015 +meus 0 1 6.957497 0.000000 18016 +linksphoto 0 1 6.957497 0.000000 18017 +galleryrid 0 1 6.957497 0.000000 18018 +demonhunt 0 1 6.957497 0.000000 18019 +catsbewar 0 1 6.957497 0.000000 18020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..a9257dc0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +last 0 314 1.098612 0.000000 14 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +link 0 247 1.386294 0.000000 24 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +includ 0 208 1.609438 0.000000 42 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +hour 0 165 1.791759 0.000000 46 +algorithm 0 162 1.791759 0.000000 57 +address 0 170 1.791759 0.000000 62 +avail 0 169 1.791759 0.000000 48 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +professor 0 137 1.945910 0.000000 76 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +provid 0 121 2.079442 0.000000 94 +tool 0 117 2.079442 0.000000 93 +introduct 0 126 2.079442 0.000000 87 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +teach 0 108 2.197225 0.000000 112 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +mathemat 0 108 2.197225 0.000000 123 +access 0 102 2.302585 0.000000 136 +book 0 99 2.302585 0.000000 131 +peopl 0 96 2.302585 0.000000 132 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +mani 0 92 2.397895 0.000000 150 +search 0 95 2.397895 0.000000 155 +homepag 0 93 2.397895 0.000000 148 +center 0 88 2.397895 0.000000 158 +pictur 0 89 2.397895 0.000000 160 +journal 1 83 2.484907 2.484907 183 +chang 0 82 2.484907 0.000000 163 +west 0 83 2.484907 0.000000 192 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +contain 0 81 2.484907 0.000000 174 +html 0 75 2.639057 0.000000 235 +david 0 71 2.639057 0.000000 232 +practic 0 70 2.708050 0.000000 246 +written 0 63 2.772589 0.000000 278 +guid 0 63 2.772589 0.000000 267 +organ 0 65 2.772589 0.000000 265 +content 0 59 2.833213 0.000000 302 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +much 0 52 2.995732 0.000000 349 +numer 1 49 3.044522 3.044522 369 +telephon 0 50 3.044522 0.000000 373 +approach 0 48 3.044522 0.000000 366 +even 0 45 3.135494 0.000000 393 +better 0 45 3.135494 0.000000 401 +math 0 44 3.135494 0.000000 402 +third 0 43 3.178054 0.000000 412 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +combin 0 42 3.218876 0.000000 421 +small 0 39 3.258097 0.000000 447 +author 0 39 3.258097 0.000000 450 +tutori 0 39 3.258097 0.000000 437 +paul 1 38 3.295837 3.295837 471 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +approxim 1 35 3.401197 3.401197 509 +print 0 34 3.401197 0.000000 503 +bibliographi 0 34 3.401197 0.000000 518 +next 0 34 3.401197 0.000000 517 +articl 0 33 3.433987 0.000000 530 +postal 0 30 3.555348 0.000000 580 +great 0 27 3.637586 0.000000 626 +thank 1 23 3.806662 3.806662 721 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +latest 0 21 3.912023 0.000000 785 +hous 0 21 3.912023 0.000000 801 +accept 0 18 4.060443 0.000000 879 +stand 0 18 4.060443 0.000000 891 +former 0 17 4.110874 0.000000 956 +seek 0 17 4.110874 0.000000 954 +carl 1 15 4.248495 4.248495 1024 +anonym 0 14 4.317488 0.000000 1100 +individu 0 13 4.382027 0.000000 1126 +errata 1 10 4.653960 4.653960 1403 +town 0 10 4.653960 0.000000 1458 +death 0 10 4.653960 0.000000 1457 +subset 0 10 4.653960 0.000000 1425 +latter 0 9 4.753590 0.000000 1522 +screen 0 9 4.753590 0.000000 1577 +unusu 0 9 4.753590 0.000000 1566 +end 0 9 4.753590 0.000000 1567 +driver 0 8 4.875197 0.000000 1657 +elementari 0 7 5.010635 0.000000 1825 +spline 1 6 5.164786 5.164786 2007 +usaoffic 0 6 5.164786 0.000000 2159 +button 0 5 5.347108 0.000000 2337 +door 0 5 5.347108 0.000000 2291 +areavail 0 4 5.568345 0.000000 2810 +allan 0 4 5.568345 0.000000 2849 +technion 0 4 5.568345 0.000000 2856 +boor 1 3 5.857933 5.857933 3482 +cont 0 3 5.857933 0.000000 3171 +shall 0 3 5.857933 0.000000 3891 +clickabl 1 2 6.263398 6.263398 4788 +deboor 0 2 6.263398 0.000000 4744 +thevari 0 2 6.263398 0.000000 6130 +forconstruct 0 2 6.263398 0.000000 5649 +amo 0 2 6.263398 0.000000 6094 +joi 0 2 6.263398 0.000000 5208 +hermit 0 2 6.263398 0.000000 4150 +soup 0 2 6.263398 0.000000 6131 +kitchen 0 2 6.263398 0.000000 6132 +occupi 0 2 6.263398 0.000000 5857 +ditto 1 1 6.957497 6.957497 18021 +nevai 1 1 6.957497 6.957497 18022 +pinku 1 1 6.957497 6.957497 18023 +mathematicsdepart 0 1 6.957497 0.000000 18024 +schoenberg 0 1 6.957497 0.000000 18025 +approx 0 1 6.957497 0.000000 18026 +theclick 0 1 6.957497 0.000000 18027 +ofapproxim 0 1 6.957497 0.000000 18028 +publishedpap 0 1 6.957497 0.000000 18029 +andmuch 0 1 6.957497 0.000000 18030 +foreast 0 1 6.957497 0.000000 18031 +theirtabl 0 1 6.957497 0.000000 18032 +singli 0 1 6.957497 0.000000 18033 +thishandi 0 1 6.957497 0.000000 18034 +alsoapproxim 0 1 6.957497 0.000000 18035 +slist 0 1 6.957497 0.000000 18036 +ila 0 1 6.957497 0.000000 18037 +seeviva_vi 0 1 6.957497 0.000000 18038 +alsoon 0 1 6.957497 0.000000 18039 +thehtml 0 1 6.957497 0.000000 18040 +primermight 0 1 6.957497 0.000000 18041 +ever_chang 0 1 6.957497 0.000000 18042 +griffeath 0 1 6.957497 0.000000 18043 +sprimordi 0 1 6.957497 0.000000 18044 +seeodd 0 1 6.957497 0.000000 18045 +techunix 0 1 6.957497 0.000000 18046 +nevaiif 0 1 6.957497 0.000000 18047 +makehi 0 1 6.957497 0.000000 18048 +outputavail 0 1 6.957497 0.000000 18049 +taki 0 1 6.957497 0.000000 18050 +souganid 0 1 6.957497 0.000000 18051 +andthaleia 0 1 6.957497 0.000000 18052 +zariphopoul 0 1 6.957497 0.000000 18053 +szego 0 1 6.957497 0.000000 18054 +bust 0 1 6.957497 0.000000 18055 +inscript 0 1 6.957497 0.000000 18056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..4e70902e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +time 1 293 1.098612 1.098612 17 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +group 0 183 1.609438 0.000000 36 +data 2 170 1.791759 3.583518 49 +base 1 165 1.791759 1.791759 50 +support 1 132 1.945910 1.945910 83 +construct 0 139 1.945910 0.000000 82 +file 0 132 1.945910 0.000000 70 +model 0 145 1.945910 0.000000 69 +relat 0 139 1.945910 0.000000 68 +architectur 0 139 1.945910 0.000000 77 +click 0 142 1.945910 0.000000 78 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +introduct 0 126 2.079442 0.000000 87 +intern 0 108 2.197225 0.000000 128 +version 0 113 2.197225 0.000000 122 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +follow 0 92 2.397895 0.000000 143 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +mani 0 92 2.397895 0.000000 150 +sinc 0 90 2.397895 0.000000 159 +octob 0 89 2.397895 0.000000 156 +environ 1 84 2.484907 2.484907 177 +help 1 83 2.484907 2.484907 175 +level 0 87 2.484907 0.000000 180 +larg 0 82 2.484907 0.000000 168 +requir 0 81 2.484907 0.000000 167 +librari 0 87 2.484907 0.000000 181 +interfac 0 79 2.564949 0.000000 209 +exampl 0 77 2.564949 0.000000 195 +decemb 0 80 2.564949 0.000000 215 +dynam 0 76 2.564949 0.000000 194 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +window 0 68 2.708050 0.000000 242 +differ 0 66 2.708050 0.000000 253 +descript 0 64 2.772589 0.000000 271 +januari 0 62 2.772589 0.000000 264 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +explor 1 58 2.890372 2.890372 324 +direct 0 57 2.890372 0.000000 316 +detail 0 57 2.890372 0.000000 321 +februari 0 54 2.944439 0.000000 328 +date 0 51 2.995732 0.000000 344 +visual 2 48 3.044522 6.089044 372 +cool 0 49 3.044522 0.000000 374 +featur 0 46 3.091042 0.000000 386 +execut 0 45 3.135494 0.000000 404 +mechan 0 43 3.178054 0.000000 416 +map 0 39 3.258097 0.000000 452 +tree 0 36 3.367296 0.000000 492 +download 0 36 3.367296 0.000000 489 +michael 1 35 3.401197 3.401197 514 +next 0 34 3.401197 0.000000 517 +queri 1 33 3.433987 3.433987 524 +within 0 33 3.433987 0.000000 525 +platform 0 29 3.583519 0.000000 591 +releas 1 28 3.610918 3.610918 616 +repres 0 26 3.688879 0.000000 656 +compar 0 26 3.688879 0.000000 648 +handl 0 24 3.761200 0.000000 685 +input 1 23 3.806662 3.806662 727 +togeth 0 23 3.806662 0.000000 714 +famili 0 23 3.806662 0.000000 735 +sequenc 0 23 3.806662 0.000000 734 +variabl 0 23 3.806662 0.000000 715 +color 0 22 3.850148 0.000000 762 +flexibl 0 21 3.912023 0.000000 792 +viewer 0 21 3.912023 0.000000 787 +output 0 21 3.912023 0.000000 788 +chen 0 21 3.912023 0.000000 791 +comparison 0 19 4.007333 0.000000 863 +record 1 18 4.060443 4.060443 890 +appropri 0 18 4.060443 0.000000 883 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 1 15 4.248495 4.248495 1053 +ascii 0 15 4.248495 0.000000 1032 +biologi 0 15 4.248495 0.000000 1049 +stream 0 15 4.248495 0.000000 1015 +miron 1 14 4.317488 4.317488 1110 +save 0 14 4.317488 0.000000 1099 +individu 0 13 4.382027 0.000000 1126 +raghu 1 12 4.465908 4.465908 1212 +shape 0 12 4.465908 0.000000 1245 +solari 0 12 4.465908 0.000000 1238 +distinguish 0 11 4.553877 0.000000 1357 +abil 0 11 4.553877 0.000000 1341 +string 0 11 4.553877 0.000000 1340 +devis 2 10 4.653960 9.307920 1451 +cheng 1 10 4.653960 4.653960 1381 +relationship 0 10 4.653960 0.000000 1383 +float 0 9 4.753590 0.000000 1504 +integ 0 8 4.875197 0.000000 1688 +inproceed 0 8 4.875197 0.000000 1670 +larger 0 7 5.010635 0.000000 1875 +dataset 0 7 5.010635 0.000000 1914 +spie 1 6 5.164786 5.164786 2119 +layout 0 6 5.164786 0.000000 2183 +quick 0 6 5.164786 0.000000 2184 +medicin 1 5 5.347108 5.347108 2448 +cell 0 5 5.347108 0.000000 2274 +complementari 0 5 5.347108 0.000000 2523 +kent 0 4 5.568345 0.000000 2744 +myllymaki 1 3 5.857933 5.857933 4022 +asid 0 3 5.857933 0.000000 3770 +comad 0 3 5.857933 0.000000 3737 +wenger 0 3 5.857933 0.000000 4023 +jussi 1 2 6.263398 6.263398 6133 +hotlin 1 2 6.263398 6.263398 5967 +oneset 0 2 6.263398 0.000000 6134 +viewsof 0 2 6.263398 0.000000 6135 +birch 0 2 6.263398 0.000000 6136 +andanalysi 0 2 6.263398 0.000000 4271 +workth 0 2 6.263398 0.000000 6137 +guangshun 0 2 6.263398 0.000000 6138 +pagedevis 0 1 6.957497 0.000000 18057 +visualizationt 0 1 6.957497 0.000000 18058 +featuresexamplesin 0 1 6.957497 0.000000 18059 +depthpublicationsrel 0 1 6.957497 0.000000 18060 +workreleasecontactsfeaturesthes 0 1 6.957497 0.000000 18061 +cancontrol 0 1 6.957497 0.000000 18062 +ax 0 1 6.957497 0.000000 18063 +cursor 0 1 6.957497 0.000000 18064 +examplescheck 0 1 6.957497 0.000000 18065 +validationmolecular 0 1 6.957497 0.000000 18066 +soil 0 1 6.957497 0.000000 18067 +clusteringfinanci 0 1 6.957497 0.000000 18068 +explorationfamili 0 1 6.957497 0.000000 18069 +climatedata 0 1 6.957497 0.000000 18070 +centergeograph 0 1 6.957497 0.000000 18071 +systemsoil 0 1 6.957497 0.000000 18072 +sciencefil 0 1 6.957497 0.000000 18073 +serverprogram 0 1 6.957497 0.000000 18074 +tracesclin 0 1 6.957497 0.000000 18075 +moreexampl 0 1 6.957497 0.000000 18076 +depthfor 0 1 6.957497 0.000000 18077 +visualizationvisu 0 1 6.957497 0.000000 18078 +interfaceperform 0 1 6.957497 0.000000 18079 +issuespublicationsmiron 0 1 6.957497 0.000000 18080 +dataexplor 0 1 6.957497 0.000000 18081 +praveenseshadri 0 1 6.957497 0.000000 18082 +sequencequeri 0 1 6.957497 0.000000 18083 +themanag 0 1 6.957497 0.000000 18084 +seqproject 0 1 6.957497 0.000000 18085 +queryrecord 0 1 6.957497 0.000000 18086 +bevisu 0 1 6.957497 0.000000 18087 +informationw 0 1 6.957497 0.000000 18088 +executablesfor 0 1 6.957497 0.000000 18089 +ld_library_path 0 1 6.957497 0.000000 18090 +rundevis 0 1 6.957497 0.000000 18091 +arestat 0 1 6.957497 0.000000 18092 +shareabl 0 1 6.957497 0.000000 18093 +contactsfor 0 1 6.957497 0.000000 18094 +contactmiron 0 1 6.957497 0.000000 18095 +usersupport 0 1 6.957497 0.000000 18096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..9afab59f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +languag 0 227 1.386294 0.000000 26 +washington 0 236 1.386294 0.000000 32 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +data 2 170 1.791759 3.583518 49 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +object 2 138 1.945910 3.891820 79 +file 1 132 1.945910 1.945910 70 +relat 1 139 1.945910 1.945910 68 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +like 0 132 1.945910 0.000000 81 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +hall 0 146 1.945910 0.000000 65 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +report 0 131 2.079442 0.000000 92 +manag 1 114 2.197225 2.197225 125 +intern 0 108 2.197225 0.000000 128 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +part 0 98 2.302585 0.000000 129 +proceed 1 93 2.397895 2.397895 152 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +present 0 91 2.397895 0.000000 145 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +environ 0 84 2.484907 0.000000 177 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +interfac 0 79 2.564949 0.000000 209 +david 1 71 2.639057 2.639057 232 +summari 0 73 2.639057 0.000000 237 +name 0 72 2.639057 0.000000 220 +appli 0 71 2.639057 0.000000 226 +main 0 67 2.708050 0.000000 256 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +complex 1 64 2.772589 2.772589 269 +evalu 0 64 2.772589 0.000000 266 +prof 0 64 2.772589 0.000000 273 +creat 0 63 2.772589 0.000000 277 +januari 0 62 2.772589 0.000000 264 +type 0 61 2.833213 0.000000 296 +automat 0 61 2.833213 0.000000 306 +unix 1 58 2.890372 2.890372 308 +space 0 57 2.890372 0.000000 310 +talk 1 53 2.944439 2.944439 336 +sampl 0 53 2.944439 0.000000 339 +hardwar 1 51 2.995732 2.995732 350 +basic 1 50 3.044522 3.044522 360 +set 1 50 3.044522 3.044522 361 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +anoth 0 45 3.135494 0.000000 408 +execut 0 45 3.135494 0.000000 404 +multipl 0 39 3.258097 0.000000 453 +must 0 40 3.258097 0.000000 442 +error 0 40 3.258097 0.000000 449 +field 1 37 3.332205 3.332205 482 +workstat 0 37 3.332205 0.000000 479 +connect 0 37 3.332205 0.000000 485 +either 0 35 3.401197 0.000000 506 +singl 0 34 3.401197 0.000000 510 +storag 0 31 3.496508 0.000000 553 +exist 0 30 3.555348 0.000000 569 +abl 0 30 3.555348 0.000000 566 +rang 0 30 3.555348 0.000000 565 +platform 0 29 3.583519 0.000000 591 +intend 0 28 3.610918 0.000000 599 +becom 0 28 3.610918 0.000000 603 +scale 0 28 3.610918 0.000000 613 +multiprocessor 0 28 3.610918 0.000000 605 +cluster 0 28 3.610918 0.000000 612 +manipul 0 27 3.637586 0.000000 624 +client 1 25 3.737670 3.737670 679 +wai 0 25 3.737670 0.000000 662 +store 1 24 3.761200 3.761200 693 +fellow 0 24 3.761200 0.000000 701 +serv 0 22 3.850148 0.000000 758 +varieti 0 22 3.850148 0.000000 740 +instead 0 22 3.850148 0.000000 756 +programminglanguag 0 21 3.912023 0.000000 782 +fund 0 21 3.912023 0.000000 805 +busi 0 21 3.912023 0.000000 784 +sigmod 1 19 4.007333 4.007333 877 +excel 0 19 4.007333 0.000000 868 +benchmark 0 19 4.007333 0.000000 859 +repositori 0 17 4.110874 0.000000 932 +expand 0 17 4.110874 0.000000 928 +modif 0 17 4.110874 0.000000 913 +attempt 0 17 4.110874 0.000000 917 +white 0 17 4.110874 0.000000 951 +match 1 16 4.174387 4.174387 965 +intel 0 16 4.174387 0.000000 1000 +transit 0 15 4.248495 0.000000 1046 +capabl 0 15 4.248495 0.000000 1016 +heterogen 0 14 4.317488 0.000000 1090 +signific 0 13 4.382027 0.000000 1125 +dewitt 1 12 4.465908 4.465908 1270 +target 1 12 4.465908 4.465908 1282 +emploi 1 12 4.465908 4.465908 1284 +fromindividu 0 12 4.465908 0.000000 1290 +shore 2 11 4.553877 9.107754 1377 +persist 1 11 4.553877 4.553877 1367 +michigan 0 11 4.553877 0.000000 1368 +arpa 0 11 4.553877 0.000000 1369 +naughton 1 10 4.653960 4.653960 1450 +facilit 0 10 4.653960 0.000000 1412 +franklin 0 10 4.653960 0.000000 1436 +vldb 0 10 4.653960 0.000000 1470 +invit 0 10 4.653960 0.000000 1428 +conferenceon 0 9 4.753590 0.000000 1595 +paradis 1 8 4.875197 4.875197 1782 +carei 1 8 4.875197 4.875197 1781 +solomon 0 8 4.875197 0.000000 1716 +databasesystem 0 8 4.875197 0.000000 1617 +hold 0 8 4.875197 0.000000 1645 +poor 0 8 4.875197 0.000000 1736 +polygon 0 8 4.875197 0.000000 1723 +sparc 0 7 5.010635 0.000000 1860 +geograph 1 6 5.164786 5.164786 2236 +patel 0 6 5.164786 0.000000 2154 +pub 0 6 5.164786 0.000000 2239 +compat 0 5 5.347108 0.000000 2485 +tsatalo 0 5 5.347108 0.000000 2581 +minneapoli 0 5 5.347108 0.000000 2480 +proceedingsof 0 5 5.347108 0.000000 2331 +satellit 0 4 5.568345 0.000000 3077 +exodu 0 4 5.568345 0.000000 3075 +mcauliff 0 4 5.568345 0.000000 3083 +zwill 0 4 5.568345 0.000000 3076 +chile 0 4 5.568345 0.000000 3082 +gamma 1 3 5.857933 5.857933 3219 +orth 0 3 5.857933 0.000000 3685 +paragon 0 3 5.857933 0.000000 3359 +summit 0 3 5.857933 0.000000 3684 +developeda 0 2 6.263398 0.000000 5205 +polylin 0 2 6.263398 0.000000 6079 +projecti 0 2 6.263398 0.000000 5963 +kabra 0 2 6.263398 0.000000 6139 +romn 0 1 6.957497 0.000000 18097 +databasebenchmark 0 1 6.957497 0.000000 18098 +objectiveof 0 1 6.957497 0.000000 18099 +objectsystem 0 1 6.957497 0.000000 18100 +applicationsinclud 0 1 6.957497 0.000000 18101 +capabilitiesof 0 1 6.957497 0.000000 18102 +typedobject 0 1 6.957497 0.000000 18103 +hierarchicalnam 0 1 6.957497 0.000000 18104 +interfaceto 0 1 6.957497 0.000000 18105 +toeas 0 1 6.957497 0.000000 18106 +systemenviron 0 1 6.957497 0.000000 18107 +ccwill 0 1 6.957497 0.000000 18108 +networksto 0 1 6.957497 0.000000 18109 +ajoint 0 1 6.957497 0.000000 18110 +relationaldatabas 0 1 6.957497 0.000000 18111 +thetask 0 1 6.957497 0.000000 18112 +formanag 0 1 6.957497 0.000000 18113 +modelingne 0 1 6.957497 0.000000 18114 +manipulatingmuch 0 1 6.957497 0.000000 18115 +muchbett 0 1 6.957497 0.000000 18116 +differencefrom 0 1 6.957497 0.000000 18117 +parallelismto 0 1 6.957497 0.000000 18118 +assatellit 0 1 6.957497 0.000000 18119 +withm 0 1 6.957497 0.000000 18120 +persistentappl 0 1 6.957497 0.000000 18121 +chuh 0 1 6.957497 0.000000 18122 +santiego 0 1 6.957497 0.000000 18123 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..76dc1506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +mail 0 238 1.386294 0.000000 22 +graduat 0 215 1.386294 0.000000 31 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +includ 0 208 1.609438 0.000000 42 +algorithm 2 162 1.791759 3.583518 57 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +object 2 138 1.945910 3.891820 79 +model 2 145 1.945910 3.891820 69 +area 1 144 1.945910 1.945910 80 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +provid 0 121 2.079442 0.000000 94 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +make 0 111 2.197225 0.000000 120 +final 0 116 2.197225 0.000000 108 +structur 0 106 2.197225 0.000000 105 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +take 0 97 2.302585 0.000000 134 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +imag 2 91 2.397895 4.795790 161 +real 1 93 2.397895 2.397895 144 +center 1 88 2.397895 2.397895 158 +call 0 91 2.397895 0.000000 153 +graphic 0 90 2.397895 0.000000 147 +control 2 82 2.484907 4.969814 164 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +ieee 1 86 2.484907 2.484907 190 +west 0 83 2.484907 0.000000 192 +start 0 83 2.484907 0.000000 173 +chang 0 82 2.484907 0.000000 163 +appear 1 78 2.564949 2.564949 210 +orient 1 80 2.564949 2.564949 205 +dynam 0 76 2.564949 0.000000 194 +refer 0 78 2.564949 0.000000 203 +complet 0 77 2.564949 0.000000 208 +workshop 1 71 2.639057 2.639057 239 +intellig 1 72 2.639057 2.639057 225 +view 2 70 2.708050 5.416100 254 +order 1 69 2.708050 2.708050 249 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +import 1 65 2.772589 2.772589 282 +artifici 1 63 2.772589 2.772589 280 +experi 0 64 2.772589 0.000000 283 +result 0 65 2.772589 0.000000 281 +guid 0 63 2.772589 0.000000 267 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +simpl 0 60 2.833213 0.000000 298 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +space 0 57 2.890372 0.000000 310 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +finger 0 52 2.995732 0.000000 354 +investig 0 51 2.995732 0.000000 353 +visual 2 48 3.044522 6.089044 372 +without 1 50 3.044522 3.044522 370 +basic 0 50 3.044522 0.000000 360 +approach 0 48 3.044522 0.000000 366 +move 1 47 3.091042 3.091042 382 +understand 1 47 3.091042 3.091042 384 +could 0 46 3.091042 0.000000 383 +adapt 0 46 3.091042 0.000000 387 +possibl 0 47 3.091042 0.000000 378 +physic 0 47 3.091042 0.000000 377 +mark 1 44 3.135494 3.135494 403 +around 0 43 3.178054 0.000000 415 +vision 2 41 3.218876 6.437752 430 +combin 1 42 3.218876 3.218876 421 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +autom 0 41 3.218876 0.000000 434 +continu 1 39 3.258097 3.258097 448 +map 0 39 3.258097 0.000000 452 +societi 0 40 3.258097 0.000000 456 +paul 1 38 3.295837 3.295837 471 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +close 0 38 3.295837 0.000000 465 +correct 0 38 3.295837 0.000000 462 +brian 0 38 3.295837 0.000000 466 +purpos 1 37 3.332205 3.332205 481 +sciencesunivers 0 37 3.332205 0.000000 486 +connect 0 37 3.332205 0.000000 485 +procedur 0 36 3.367296 0.000000 488 +especi 0 36 3.367296 0.000000 496 +robot 0 36 3.367296 0.000000 497 +global 1 34 3.401197 3.401197 520 +represent 1 35 3.401197 3.401197 512 +either 0 35 3.401197 0.000000 506 +curriculum 0 33 3.433987 0.000000 535 +taught 0 33 3.433987 0.000000 526 +rang 0 30 3.555348 0.000000 565 +power 0 30 3.555348 0.000000 573 +chair 0 29 3.583519 0.000000 596 +steve 0 29 3.583519 0.000000 594 +progress 0 28 3.610918 0.000000 598 +measur 0 28 3.610918 0.000000 609 +proc 2 26 3.688879 7.377758 649 +detect 1 26 3.688879 3.688879 646 +bookmark 0 26 3.688879 0.000000 639 +toward 1 25 3.737670 3.737670 668 +task 0 25 3.737670 0.000000 678 +accur 0 25 3.737670 0.000000 680 +trace 0 25 3.737670 0.000000 677 +strategi 0 25 3.737670 0.000000 682 +motion 2 24 3.761200 7.522400 699 +pattern 1 24 3.761200 3.761200 689 +flow 0 24 3.761200 0.000000 700 +recognit 1 23 3.806662 3.806662 723 +displai 1 23 3.806662 3.806662 712 +input 1 23 3.806662 3.806662 727 +mobil 0 23 3.806662 0.000000 730 +sequenc 0 23 3.806662 0.000000 734 +defin 1 22 3.850148 3.850148 746 +period 1 22 3.850148 3.850148 743 +geometri 0 22 3.850148 0.000000 752 +path 1 21 3.912023 3.912023 778 +output 0 21 3.912023 0.000000 788 +navig 0 21 3.912023 0.000000 796 +viewer 0 21 3.912023 0.000000 787 +synthesi 1 20 3.951244 3.951244 834 +basi 0 20 3.951244 0.000000 828 +boston 0 19 4.007333 0.000000 862 +separ 0 19 4.007333 0.000000 844 +dimension 1 18 4.060443 4.060443 909 +behavior 1 18 4.060443 4.060443 881 +four 0 18 4.060443 0.000000 905 +differenti 0 17 4.110874 0.000000 921 +coupl 0 17 4.110874 0.000000 939 +precis 0 15 4.248495 0.000000 1023 +capabl 0 15 4.248495 0.000000 1016 +track 0 15 4.248495 0.000000 1029 +scene 2 14 4.317488 8.634976 1114 +camera 1 14 4.317488 4.317488 1115 +conf 1 13 4.382027 4.382027 1181 +charl 1 13 4.382027 4.382027 1149 +step 0 13 4.382027 0.000000 1138 +coordin 0 13 4.382027 0.000000 1182 +believ 0 13 4.382027 0.000000 1187 +shape 1 12 4.465908 4.465908 1245 +fix 0 11 4.553877 0.000000 1327 +valid 0 11 4.553877 0.000000 1299 +bill 0 11 4.553877 0.000000 1297 +correspond 1 10 4.653960 4.653960 1382 +edutelephon 0 10 4.653960 0.000000 1473 +custom 0 10 4.653960 0.000000 1414 +acquisit 0 10 4.653960 0.000000 1465 +earth 0 10 4.653960 0.000000 1463 +dyer 2 9 4.753590 9.507180 1573 +surfac 1 9 4.753590 4.753590 1574 +observ 1 9 4.753590 4.753590 1578 +leader 0 9 4.753590 0.000000 1576 +intermedi 0 9 4.753590 0.000000 1497 +recoveri 0 9 4.753590 0.000000 1474 +morgan 0 9 4.753590 0.000000 1484 +invari 1 8 4.875197 4.875197 1748 +autonom 0 8 4.875197 0.000000 1749 +siggraph 0 8 4.875197 0.000000 1773 +irregular 0 8 4.875197 0.000000 1768 +curv 0 8 4.875197 0.000000 1656 +edg 0 8 4.875197 0.000000 1647 +textur 0 8 4.875197 0.000000 1677 +seitz 2 7 5.010635 10.021270 1976 +morph 1 7 5.010635 5.010635 1937 +interpol 1 7 5.010635 5.010635 1823 +paramet 0 7 5.010635 0.000000 1796 +smooth 0 7 5.010635 0.000000 1855 +dimens 0 7 5.010635 0.000000 1930 +stereo 0 7 5.010635 0.000000 1818 +viewpoint 1 6 5.164786 5.164786 2116 +reconstruct 1 6 5.164786 5.164786 2170 +recov 1 6 5.164786 5.164786 2235 +maryland 0 6 5.164786 0.000000 2140 +kluwer 0 6 5.164786 0.000000 2143 +bestor 0 6 5.164786 0.000000 2099 +cyclic 1 5 5.347108 5.347108 2383 +unknown 1 5 5.347108 5.347108 2318 +provabl 1 5 5.347108 5.347108 2558 +affin 1 5 5.347108 5.347108 2378 +recogn 0 5 5.347108 0.000000 2302 +revolut 0 5 5.347108 0.000000 2315 +jain 0 5 5.347108 0.000000 2332 +adjust 0 5 5.347108 0.000000 2422 +rigid 0 5 5.347108 0.000000 2432 +gareth 0 5 5.347108 0.000000 2392 +connectionist 0 5 5.347108 0.000000 2430 +bradlei 0 5 5.347108 0.000000 2554 +contour 1 4 5.568345 5.568345 2812 +usa 0 4 5.568345 0.000000 3080 +cvpr 0 4 5.568345 0.000000 2761 +theus 0 4 5.568345 0.000000 2992 +simplifi 0 4 5.568345 0.000000 3066 +satellit 0 4 5.568345 0.000000 3077 +shah 0 4 5.568345 0.000000 2814 +lumelski 0 4 5.568345 0.000000 2837 +harri 0 4 5.568345 0.000000 3034 +asid 0 3 5.857933 0.000000 3770 +stationari 0 3 5.857933 0.000000 3861 +alamito 0 3 5.857933 0.000000 3558 +lattic 0 3 5.857933 0.000000 3721 +groupcours 0 3 5.857933 0.000000 3092 +macc 0 3 5.857933 0.000000 3414 +kutulako 1 2 6.263398 6.263398 6064 +hibbard 1 2 6.263398 6.263398 6066 +stewart 1 2 6.263398 6.263398 5739 +acquir 0 2 6.263398 0.000000 5557 +panoram 0 2 6.263398 0.000000 4755 +howto 0 2 6.263398 0.000000 5761 +arbitrarili 0 2 6.263398 0.000000 5791 +discrimin 0 2 6.263398 0.000000 6140 +festschrift 0 2 6.263398 0.000000 6141 +rosenfeld 0 2 6.263398 0.000000 4495 +articul 0 2 6.263398 0.000000 5799 +kyro 0 2 6.263398 0.000000 6063 +rochest 0 2 6.263398 0.000000 6142 +seal 1 1 6.957497 6.957497 18124 +allmen 1 1 6.957497 6.957497 18125 +kjell 1 1 6.957497 6.957497 18126 +pagecharl 0 1 6.957497 0.000000 18127 +dyerprofessordepart 0 1 6.957497 0.000000 18128 +infoph 0 1 6.957497 0.000000 18129 +visualizationgroup 0 1 6.957497 0.000000 18130 +groupprogram 0 1 6.957497 0.000000 18131 +synthesisth 0 1 6.957497 0.000000 18132 +controllingin 0 1 6.957497 0.000000 18133 +cameraof 0 1 6.957497 0.000000 18134 +videostream 0 1 6.957497 0.000000 18135 +whicha 0 1 6.957497 0.000000 18136 +througha 0 1 6.957497 0.000000 18137 +thesit 0 1 6.957497 0.000000 18138 +predetermin 0 1 6.957497 0.000000 18139 +researchquest 0 1 6.957497 0.000000 18140 +synthesizenew 0 1 6.957497 0.000000 18141 +reconstructiona 0 1 6.957497 0.000000 18142 +innovativetechniqu 0 1 6.957497 0.000000 18143 +callview 0 1 6.957497 0.000000 18144 +basisimag 0 1 6.957497 0.000000 18145 +explorationcomput 0 1 6.957497 0.000000 18146 +controllingcamera 0 1 6.957497 0.000000 18147 +purposefulli 0 1 6.957497 0.000000 18148 +theposit 0 1 6.957497 0.000000 18149 +adjustviewpoint 0 1 6.957497 0.000000 18150 +forsolv 0 1 6.957497 0.000000 18151 +findspecif 0 1 6.957497 0.000000 18152 +unknownshap 0 1 6.957497 0.000000 18153 +appearanceof 0 1 6.957497 0.000000 18154 +computationsrequir 0 1 6.957497 0.000000 18155 +andelimin 0 1 6.957497 0.000000 18156 +thecamera 0 1 6.957497 0.000000 18157 +towardsviewpoint 0 1 6.957497 0.000000 18158 +viewedobject 0 1 6.957497 0.000000 18159 +thisapproach 0 1 6.957497 0.000000 18160 +visualizationin 0 1 6.957497 0.000000 18161 +techniquescap 0 1 6.957497 0.000000 18162 +specificgraph 0 1 6.957497 0.000000 18163 +displayingarbitrari 0 1 6.957497 0.000000 18164 +commonfram 0 1 6.957497 0.000000 18165 +algorithmexecut 0 1 6.957497 0.000000 18166 +dataanalysi 0 1 6.957497 0.000000 18167 +forexperi 0 1 6.957497 0.000000 18168 +visualizingintermedi 0 1 6.957497 0.000000 18169 +forproblem 0 1 6.957497 0.000000 18170 +cloud 0 1 6.957497 0.000000 18171 +azriel 0 1 6.957497 0.000000 18172 +occlud 0 1 6.957497 0.000000 18173 +battaiola 0 1 6.957497 0.000000 18174 +santek 0 1 6.957497 0.000000 18175 +voidrot 0 1 6.957497 0.000000 18176 +martinez 0 1 6.957497 0.000000 18177 +liangyin 0 1 6.957497 0.000000 18178 +yuph 0 1 6.957497 0.000000 18179 +whibbard 0 1 6.957497 0.000000 18180 +onlattic 0 1 6.957497 0.000000 18181 +kiriako 0 1 6.957497 0.000000 18182 +ofobserv 0 1 6.957497 0.000000 18183 +iutech 0 1 6.957497 0.000000 18184 +spatiotempor 0 1 6.957497 0.000000 18185 +brent 0 1 6.957497 0.000000 18186 +dimensionalshap 0 1 6.957497 0.000000 18187 +plantinga 0 1 6.957497 0.000000 18188 +wheaton 0 1 6.957497 0.000000 18189 +representationfor 0 1 6.957497 0.000000 18190 +ccsua 0 1 6.957497 0.000000 18191 +ctstateu 0 1 6.957497 0.000000 18192 +measureslink 0 1 6.957497 0.000000 18193 +interestmi 0 1 6.957497 0.000000 18194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..43c9fe9a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +class 0 199 1.609438 0.000000 37 +welcom 1 122 2.079442 2.079442 99 +check 0 115 2.197225 0.000000 118 +teach 0 108 2.197225 0.000000 112 +section 0 94 2.397895 0.000000 149 +educ 0 86 2.484907 0.000000 191 +friend 0 48 3.044522 0.000000 376 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +curriculum 0 33 3.433987 0.000000 535 +task 0 25 3.737670 0.000000 678 +bring 0 10 4.653960 0.000000 1430 +appreci 0 5 5.347108 0.000000 2374 +patienc 0 2 6.263398 0.000000 5466 +machinew 0 1 6.957497 0.000000 18195 +arduou 0 1 6.957497 0.000000 18196 +vitaecheck 0 1 6.957497 0.000000 18197 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..547dbb00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,166 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +time 1 293 1.098612 1.098612 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +support 1 132 1.945910 1.945910 83 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +machin 0 129 2.079442 0.000000 95 +check 1 115 2.197225 2.197225 118 +make 1 111 2.197225 2.197225 120 +intern 1 108 2.197225 2.197225 128 +assist 1 112 2.197225 2.197225 113 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +part 0 98 2.302585 0.000000 129 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +call 0 91 2.397895 0.000000 153 +question 0 91 2.397895 0.000000 141 +real 0 93 2.397895 0.000000 144 +sinc 0 90 2.397895 0.000000 159 +homepag 0 93 2.397895 0.000000 148 +thing 1 84 2.484907 2.484907 189 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +requir 0 81 2.484907 0.000000 167 +complet 1 77 2.564949 2.564949 208 +want 0 79 2.564949 0.000000 199 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +servic 1 72 2.639057 2.639057 236 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +multimedia 1 68 2.708050 2.708050 258 +practic 0 70 2.708050 0.000000 246 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +locat 0 59 2.833213 0.000000 303 +room 0 59 2.833213 0.000000 301 +unix 0 58 2.890372 0.000000 308 +instruct 0 53 2.944439 0.000000 332 +talk 0 53 2.944439 0.000000 336 +digit 1 52 2.995732 2.995732 348 +date 0 51 2.995732 0.000000 344 +cool 0 49 3.044522 0.000000 374 +standard 0 48 3.044522 0.000000 365 +archiv 0 49 3.044522 0.000000 364 +could 0 46 3.091042 0.000000 383 +video 1 44 3.135494 3.135494 405 +even 0 45 3.135494 0.000000 393 +answer 0 45 3.135494 0.000000 391 +discuss 0 45 3.135494 0.000000 399 +made 0 44 3.135494 0.000000 398 +around 0 43 3.178054 0.000000 415 +societi 0 40 3.258097 0.000000 456 +purpos 1 37 3.332205 3.332205 481 +curriculum 0 33 3.433987 0.000000 535 +anim 0 31 3.496508 0.000000 557 +domain 0 30 3.555348 0.000000 564 +secur 0 30 3.555348 0.000000 577 +full 0 28 3.610918 0.000000 615 +never 0 25 3.737670 0.000000 671 +spent 0 25 3.737670 0.000000 676 +magazin 0 24 3.761200 0.000000 704 +serv 0 22 3.850148 0.000000 758 +half 0 21 3.912023 0.000000 776 +among 0 21 3.912023 0.000000 781 +wonder 0 20 3.951244 0.000000 815 +eric 0 19 4.007333 0.000000 870 +anderson 0 19 4.007333 0.000000 860 +offici 0 18 4.060443 0.000000 894 +edulast 0 17 4.110874 0.000000 927 +explan 0 16 4.174387 0.000000 985 +biologi 1 15 4.248495 4.248495 1049 +susan 0 15 4.248495 0.000000 1050 +shown 0 14 4.317488 0.000000 1080 +wait 1 13 4.382027 4.382027 1168 +philosophi 0 13 4.382027 0.000000 1167 +neat 0 12 4.465908 0.000000 1263 +entertain 0 12 4.465908 0.000000 1286 +holidai 0 12 4.465908 0.000000 1224 +see 0 11 4.553877 0.000000 1337 +fix 0 11 4.553877 0.000000 1327 +leader 0 9 4.753590 0.000000 1576 +mainten 0 9 4.753590 0.000000 1543 +told 0 8 4.875197 0.000000 1658 +scout 1 7 5.010635 5.010635 1903 +molecular 1 7 5.010635 5.010635 1887 +explain 1 7 5.010635 5.010635 1816 +monei 0 7 5.010635 0.000000 1934 +philosoph 0 7 5.010635 0.000000 1904 +meant 0 6 5.164786 0.000000 2055 +lucki 0 6 5.164786 0.000000 2163 +mac 1 5 5.347108 5.347108 2292 +registr 0 5 5.347108 0.000000 2249 +commod 0 5 5.347108 0.000000 2415 +girlfriend 0 5 5.347108 0.000000 2579 +billi 0 5 5.347108 0.000000 2404 +couldn 0 4 5.568345 0.000000 2977 +green 0 4 5.568345 0.000000 2848 +pageer 0 3 5.857933 0.000000 3776 +ofwisconsin 0 3 5.857933 0.000000 4002 +pete 0 3 5.857933 0.000000 3865 +specialist 0 3 5.857933 0.000000 3319 +lauri 0 3 5.857933 0.000000 3867 +wit 0 3 5.857933 0.000000 4005 +popul 0 3 5.857933 0.000000 3235 +facstaff 0 3 5.857933 0.000000 3433 +hazen 0 2 6.263398 0.000000 6143 +calcari 0 2 6.263398 0.000000 6144 +devri 0 2 6.263398 0.000000 6145 +broken 0 2 6.263398 0.000000 5074 +mice 0 2 6.263398 0.000000 5069 +shameless 0 2 6.263398 0.000000 6146 +salon 0 2 6.263398 0.000000 5827 +nixon 0 2 6.263398 0.000000 5868 +hazennon 0 1 6.957497 0.000000 18198 +professorroom 0 1 6.957497 0.000000 18199 +fornet 0 1 6.957497 0.000000 18200 +elegantli 0 1 6.957497 0.000000 18201 +fearless 0 1 6.957497 0.000000 18202 +withtech 0 1 6.957497 0.000000 18203 +capitalist 0 1 6.957497 0.000000 18204 +pragmatist 0 1 6.957497 0.000000 18205 +metaphys 0 1 6.957497 0.000000 18206 +makethi 0 1 6.957497 0.000000 18207 +drosophila 0 1 6.957497 0.000000 18208 +geneticist 0 1 6.957497 0.000000 18209 +ezin 0 1 6.957497 0.000000 18210 +shockwav 0 1 6.957497 0.000000 18211 +kudon 0 1 6.957497 0.000000 18212 +quicktimevr 0 1 6.957497 0.000000 18213 +documentari 0 1 6.957497 0.000000 18214 +plight 0 1 6.957497 0.000000 18215 +bosnia 0 1 6.957497 0.000000 18216 +uproot 0 1 6.957497 0.000000 18217 +preslei 0 1 6.957497 0.000000 18218 +meetingsejhazen 0 1 6.957497 0.000000 18219 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..fb82ad09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +offic 0 299 1.098612 0.000000 13 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +illinoi 0 7 5.010635 0.000000 1941 +tina 0 3 5.857933 0.000000 3744 +urbana 0 3 5.857933 0.000000 3879 +eliassi 1 2 6.263398 6.263398 6147 +champaign 0 2 6.263398 0.000000 5671 +pagetina 0 1 6.957497 0.000000 18220 +bldgphone 0 1 6.957497 0.000000 18221 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..0348a464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html @@ -0,0 +1 @@ +term, tf, in documents count, idf, tfidf, wordid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..5139ac74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +parallel 0 169 1.791759 0.000000 60 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +professor 0 137 1.945910 0.000000 76 +assign 0 135 1.945910 0.000000 66 +high 1 130 2.079442 2.079442 101 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +intern 1 108 2.197225 2.197225 128 +topic 0 114 2.197225 0.000000 110 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +west 0 83 2.484907 0.000000 192 +level 0 87 2.484907 0.000000 180 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +april 0 77 2.564949 0.000000 196 +resum 0 79 2.564949 0.000000 217 +symposium 1 72 2.639057 2.639057 238 +street 0 63 2.772589 0.000000 293 +instruct 1 53 2.944439 2.944439 332 +advisor 0 51 2.995732 0.000000 355 +approach 1 48 3.044522 3.044522 366 +anoth 0 45 3.135494 0.000000 408 +mechan 0 43 3.178054 0.000000 416 +cach 1 41 3.218876 3.218876 432 +annual 1 40 3.258097 3.258097 458 +electr 0 38 3.295837 0.000000 461 +jame 1 35 3.401197 3.401197 507 +steve 1 29 3.583519 3.583519 594 +trace 1 25 3.737670 3.737670 677 +smith 1 20 3.951244 3.951244 820 +eric 1 19 4.007333 4.007333 870 +predict 1 19 4.007333 4.007333 855 +latenc 1 16 4.174387 4.174387 993 +condit 0 16 4.174387 0.000000 975 +drive 0 15 4.248495 0.000000 1052 +johnson 0 13 4.382027 0.000000 1162 +bandwidth 1 11 4.553877 4.553877 1365 +branch 1 11 4.553877 4.553877 1318 +multiscalar 0 8 4.875197 0.000000 1783 +erik 0 8 4.875197 0.000000 1701 +microarchitectur 1 6 5.164786 5.164786 2238 +fetch 1 5 5.347108 5.347108 2567 +kestrel 0 4 5.568345 0.000000 2990 +confid 1 3 5.857933 5.857933 3691 +bennett 1 3 5.857933 5.857933 4024 +cold 0 3 5.857933 0.000000 3637 +rotenberg 1 1 6.957497 6.957497 18222 +passsth 0 1 6.957497 0.000000 18223 +budweisth 0 1 6.957497 0.000000 18224 +ericro 0 1 6.957497 0.000000 18225 +smithresearch 0 1 6.957497 0.000000 18226 +mispredict 0 1 6.957497 0.000000 18227 +tolerancepubl 0 1 6.957497 0.000000 18228 +jacobsen 0 1 6.957497 0.000000 18229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..7187c026 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +read 0 154 1.791759 0.000000 47 +phone 0 175 1.791759 0.000000 45 +like 1 132 1.945910 1.945910 81 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +peopl 0 96 2.302585 0.000000 132 +west 0 83 2.484907 0.000000 192 +school 0 84 2.484907 0.000000 188 +june 1 79 2.564949 2.564949 214 +decemb 0 80 2.564949 0.000000 215 +would 1 67 2.708050 2.708050 251 +evalu 0 64 2.772589 0.000000 266 +streetmadison 0 38 3.295837 0.000000 474 +electr 0 38 3.295837 0.000000 461 +sciencesunivers 0 37 3.332205 0.000000 486 +next 0 34 3.401197 0.000000 517 +idea 0 32 3.465736 0.000000 545 +measur 0 28 3.610918 0.000000 609 +american 0 27 3.637586 0.000000 634 +rather 1 26 3.688879 3.688879 642 +miscellan 0 23 3.806662 0.000000 731 +listen 0 18 4.060443 0.000000 907 +drink 0 9 4.753590 0.000000 1607 +french 0 9 4.753590 0.000000 1511 +assistantdepart 0 8 4.875197 0.000000 1784 +fail 0 8 4.875197 0.000000 1655 +convers 0 8 4.875197 0.000000 1673 +architect 0 8 4.875197 0.000000 1624 +partner 0 8 4.875197 0.000000 1648 +hack 0 7 5.010635 0.000000 1950 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +suni 1 5 5.347108 5.347108 2452 +queen 0 4 5.568345 0.000000 2919 +buffalo 1 2 6.263398 6.263398 4947 +usatel 0 2 6.263398 0.000000 6111 +shubu 0 2 6.263398 0.000000 6148 +crime 0 2 6.263398 0.000000 5972 +mentorcultresearch 0 1 6.957497 0.000000 18230 +modelseduc 0 1 6.957497 0.000000 18231 +morf 0 1 6.957497 0.000000 18232 +dionosi 0 1 6.957497 0.000000 18233 +hillari 0 1 6.957497 0.000000 18234 +profan 0 1 6.957497 0.000000 18235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..5dd5670e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +engin 0 297 1.098612 0.000000 20 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +link 0 247 1.386294 0.000000 24 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +base 0 165 1.791759 0.000000 50 +contact 0 153 1.791759 0.000000 59 +avail 0 169 1.791759 0.000000 48 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +process 0 142 1.945910 0.000000 72 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +machin 0 129 2.079442 0.000000 95 +mathemat 1 108 2.197225 2.197225 123 +structur 1 106 2.197225 2.197225 105 +theori 0 111 2.197225 0.000000 127 +look 0 107 2.197225 0.000000 115 +techniqu 0 99 2.302585 0.000000 138 +associ 0 93 2.397895 0.000000 151 +center 0 88 2.397895 0.000000 158 +larg 1 82 2.484907 2.484907 168 +member 0 84 2.484907 0.000000 165 +west 0 83 2.484907 0.000000 192 +optim 1 79 2.564949 2.564949 197 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +street 0 63 2.772589 0.000000 293 +januari 0 62 2.772589 0.000000 264 +extens 0 53 2.944439 0.000000 340 +investig 1 51 2.995732 2.995732 353 +particular 0 51 2.995732 0.000000 352 +telephon 0 50 3.044522 0.000000 373 +numer 0 49 3.044522 0.000000 369 +effect 1 46 3.091042 3.091042 385 +electron 0 47 3.091042 0.000000 379 +linear 0 41 3.218876 0.000000 431 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +purpos 0 37 3.332205 0.000000 481 +michael 1 35 3.401197 3.401197 514 +within 0 33 3.433987 0.000000 525 +graph 0 30 3.555348 0.000000 576 +consid 0 29 3.583519 0.000000 590 +scale 1 28 3.610918 3.610918 613 +framework 0 28 3.610918 0.000000 606 +determin 0 27 3.637586 0.000000 630 +relev 0 26 3.688879 0.000000 637 +emphasi 1 22 3.850148 3.850148 755 +properti 0 22 3.850148 0.000000 749 +identifi 0 22 3.850148 0.000000 760 +path 0 21 3.912023 0.000000 778 +exploit 0 20 3.951244 0.000000 836 +mostli 0 19 4.007333 0.000000 869 +cambridg 0 16 4.174387 0.000000 1008 +pagec 0 15 4.248495 0.000000 1011 +nonlinear 1 14 4.317488 4.317488 1107 +consider 0 14 4.317488 0.000000 1076 +econom 1 13 4.382027 4.382027 1184 +directli 0 13 4.382027 0.000000 1141 +robust 0 12 4.465908 0.000000 1271 +success 0 10 4.653960 0.000000 1390 +traffic 0 10 4.653960 0.000000 1421 +underli 0 10 4.653960 0.000000 1410 +ferri 1 8 4.875197 4.875197 1715 +pivot 0 5 5.347108 0.000000 2426 +chemic 0 5 5.347108 0.000000 2552 +condor 0 5 5.347108 0.000000 2577 +complementar 1 3 5.857933 5.857933 3999 +engineeringand 0 3 5.857933 0.000000 3779 +congest 0 3 5.857933 0.000000 3993 +followingtechniqu 0 2 6.263398 0.000000 5514 +equilibria 0 2 6.263398 0.000000 4760 +taxat 0 2 6.263398 0.000000 4524 +toll 0 2 6.263398 0.000000 6149 +arealso 0 2 6.263398 0.000000 5650 +beinginvestig 0 2 6.263398 0.000000 5745 +variationalinequ 0 1 6.957497 0.000000 18236 +toproblem 0 1 6.957497 0.000000 18237 +andinterfac 0 1 6.957497 0.000000 18238 +beingconsid 0 1 6.957497 0.000000 18239 +oncarbon 0 1 6.957497 0.000000 18240 +emiss 0 1 6.957497 0.000000 18241 +solvingproblem 0 1 6.957497 0.000000 18242 +partitioningtechniqu 0 1 6.957497 0.000000 18243 +forexploit 0 1 6.957497 0.000000 18244 +underlyingmodel 0 1 6.957497 0.000000 18245 +cpnet 0 1 6.957497 0.000000 18246 +prgram 0 1 6.957497 0.000000 18247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..7b871c29 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,255 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +mail 0 238 1.386294 0.000000 22 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +year 0 148 1.945910 0.000000 84 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +note 0 142 1.945910 0.000000 67 +perform 0 143 1.945910 0.000000 74 +file 0 132 1.945910 0.000000 70 +dayton 0 119 2.079442 0.000000 104 +welcom 0 122 2.079442 0.000000 99 +introduct 0 126 2.079442 0.000000 87 +machin 0 129 2.079442 0.000000 95 +number 0 130 2.079442 0.000000 97 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +world 1 115 2.197225 2.197225 126 +make 1 111 2.197225 2.197225 120 +send 0 114 2.197225 0.000000 109 +site 0 106 2.197225 0.000000 119 +check 0 115 2.197225 0.000000 118 +place 0 106 2.197225 0.000000 124 +need 1 98 2.302585 2.302585 135 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +comment 1 93 2.397895 2.397895 146 +present 0 91 2.397895 0.000000 145 +pictur 0 89 2.397895 0.000000 160 +homepag 0 93 2.397895 0.000000 148 +octob 0 89 2.397895 0.000000 156 +learn 2 86 2.484907 4.969814 170 +environ 1 84 2.484907 2.484907 177 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +librari 0 87 2.484907 0.000000 181 +control 0 82 2.484907 0.000000 164 +chang 0 82 2.484907 0.000000 163 +build 0 85 2.484907 0.000000 184 +start 0 83 2.484907 0.000000 173 +activ 0 84 2.484907 0.000000 182 +wide 0 84 2.484907 0.000000 185 +state 0 76 2.564949 0.000000 207 +master 0 76 2.564949 0.000000 216 +good 0 77 2.564949 0.000000 200 +optim 0 79 2.564949 0.000000 197 +intellig 1 72 2.639057 2.639057 225 +david 1 71 2.639057 2.639057 232 +free 0 73 2.639057 0.000000 224 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +goal 1 66 2.708050 2.708050 250 +degre 0 69 2.708050 0.000000 259 +order 0 69 2.708050 0.000000 249 +receiv 0 66 2.708050 0.000000 244 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +import 1 65 2.772589 2.772589 282 +street 0 63 2.772589 0.000000 293 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +copi 0 63 2.772589 0.000000 284 +function 0 62 2.772589 0.000000 275 +visit 0 63 2.772589 0.000000 288 +virtual 0 62 2.772589 0.000000 285 +plai 0 60 2.833213 0.000000 307 +browser 1 56 2.890372 2.890372 313 +explor 1 58 2.890372 2.890372 324 +thesi 0 57 2.890372 0.000000 327 +direct 0 57 2.890372 0.000000 316 +point 0 58 2.890372 0.000000 319 +think 0 57 2.890372 0.000000 314 +allow 0 53 2.944439 0.000000 333 +advisor 0 51 2.995732 0.000000 355 +finger 0 52 2.995732 0.000000 354 +case 0 51 2.995732 0.000000 351 +investig 0 51 2.995732 0.000000 353 +date 0 51 2.995732 0.000000 344 +understand 1 47 3.091042 3.091042 384 +adapt 0 46 3.091042 0.000000 387 +featur 0 46 3.091042 0.000000 386 +math 0 44 3.135494 0.000000 402 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +netscap 0 44 3.135494 0.000000 395 +favorit 0 44 3.135494 0.000000 410 +show 1 43 3.178054 3.178054 417 +term 0 43 3.178054 0.000000 411 +music 0 42 3.218876 0.000000 436 +combin 0 42 3.218876 0.000000 421 +late 0 40 3.258097 0.000000 439 +littl 0 39 3.258097 0.000000 454 +form 0 39 3.258097 0.000000 443 +realli 0 40 3.258097 0.000000 444 +feel 0 37 3.332205 0.000000 483 +represent 0 35 3.401197 0.000000 512 +next 0 34 3.401197 0.000000 517 +word 0 34 3.401197 0.000000 508 +dissert 0 32 3.465736 0.000000 549 +kind 0 32 3.465736 0.000000 541 +express 0 32 3.465736 0.000000 540 +independ 0 32 3.465736 0.000000 548 +posit 0 31 3.496508 0.000000 552 +specifi 0 30 3.555348 0.000000 568 +platform 0 29 3.583519 0.000000 591 +measur 0 28 3.610918 0.000000 609 +hope 0 28 3.610918 0.000000 610 +actual 0 28 3.610918 0.000000 604 +releas 0 28 3.610918 0.000000 616 +relev 0 26 3.688879 0.000000 637 +bookmark 0 26 3.688879 0.000000 639 +wai 0 25 3.737670 0.000000 662 +task 0 25 3.737670 0.000000 678 +daili 1 24 3.761200 3.761200 706 +reach 0 24 3.761200 0.000000 688 +input 1 23 3.806662 3.806662 727 +head 1 23 3.806662 3.806662 732 +instead 1 22 3.850148 3.850148 756 +output 1 21 3.912023 3.912023 788 +tell 0 21 3.912023 0.000000 777 +grad 0 20 3.951244 0.000000 837 +exploit 0 20 3.951244 0.000000 836 +basi 0 20 3.951244 0.000000 828 +left 0 19 4.007333 0.000000 851 +feedback 0 19 4.007333 0.000000 854 +citi 0 19 4.007333 0.000000 874 +listen 0 18 4.060443 0.000000 907 +whether 0 17 4.110874 0.000000 918 +dilbert 0 16 4.174387 0.000000 996 +action 1 15 4.248495 4.248495 1038 +contribut 0 15 4.248495 0.000000 1021 +balanc 0 14 4.317488 0.000000 1112 +weak 0 13 4.382027 0.000000 1159 +hotlist 0 13 4.382027 0.000000 1199 +nasa 0 13 4.382027 0.000000 1188 +employ 0 12 4.465908 0.000000 1291 +minor 0 12 4.465908 0.000000 1237 +michigan 1 11 4.553877 4.553877 1368 +smart 0 11 4.553877 0.000000 1352 +abil 0 11 4.553877 0.000000 1341 +sens 0 11 4.553877 0.000000 1305 +bill 0 11 4.553877 0.000000 1297 +rapid 0 10 4.653960 0.000000 1453 +traffic 0 10 4.653960 0.000000 1421 +fellowship 0 10 4.653960 0.000000 1460 +true 0 10 4.653960 0.000000 1422 +volleybal 0 9 4.753590 0.000000 1598 +pair 0 9 4.753590 0.000000 1503 +editori 0 9 4.753590 0.000000 1611 +star 1 8 4.875197 4.875197 1717 +grew 0 8 4.875197 0.000000 1742 +gain 0 8 4.875197 0.000000 1730 +irregular 0 8 4.875197 0.000000 1768 +on 0 8 4.875197 0.000000 1628 +extract 0 8 4.875197 0.000000 1728 +opinion 0 8 4.875197 0.000000 1708 +tourist 0 8 4.875197 0.000000 1710 +earn 0 7 5.010635 0.000000 1788 +notion 0 7 5.010635 0.000000 1947 +piano 0 6 5.164786 0.000000 2201 +benefit 0 6 5.164786 0.000000 2213 +variant 0 6 5.164786 0.000000 2043 +gate 0 6 5.164786 0.000000 2182 +sponsor 0 6 5.164786 0.000000 2133 +grand 0 5 5.347108 0.000000 2425 +race 0 5 5.347108 0.000000 2417 +treat 0 5 5.347108 0.000000 2521 +focuss 0 5 5.347108 0.000000 2271 +blow 0 5 5.347108 0.000000 2407 +reinforc 1 4 5.568345 5.568345 2674 +thumb 1 4 5.568345 5.568345 2816 +sorri 0 4 5.568345 0.000000 3059 +trek 1 3 5.857933 5.857933 4025 +trumpet 0 3 5.857933 0.000000 3946 +arm 0 3 5.857933 0.000000 3697 +neg 0 3 5.857933 0.000000 3451 +teacher 0 3 5.857933 0.000000 3892 +thesystem 0 3 5.857933 0.000000 3881 +interv 0 3 5.857933 0.000000 3253 +wit 0 3 5.857933 0.000000 4005 +bibl 0 3 5.857933 0.000000 3143 +glenn 0 3 5.857933 0.000000 3869 +gould 0 3 5.857933 0.000000 3559 +scienceher 0 2 6.263398 0.000000 5912 +essenc 0 2 6.263398 0.000000 6150 +agood 0 2 6.263398 0.000000 5380 +pagefor 0 2 6.263398 0.000000 6151 +nextstep 0 2 6.263398 0.000000 6102 +foral 0 2 6.263398 0.000000 4290 +isthmu 0 2 6.263398 0.000000 6152 +pagesom 0 2 6.263398 0.000000 6109 +finton 1 1 6.957497 6.957497 18248 +openstep 1 1 6.957497 6.957497 18249 +nerdin 0 1 6.957497 0.000000 18250 +intelligenceher 0 1 6.957497 0.000000 18251 +softwarefor 0 1 6.957497 0.000000 18252 +trusti 0 1 6.957497 0.000000 18253 +nextstationor 0 1 6.957497 0.000000 18254 +enjoyplai 0 1 6.957497 0.000000 18255 +longhair 0 1 6.957497 0.000000 18256 +intervarsityfolk 0 1 6.957497 0.000000 18257 +supersoak 0 1 6.957497 0.000000 18258 +accountto 0 1 6.957497 0.000000 18259 +intelligenti 0 1 6.957497 0.000000 18260 +intelligencei 0 1 6.957497 0.000000 18261 +actappropri 0 1 6.957497 0.000000 18262 +todistinguish 0 1 6.957497 0.000000 18263 +orimport 0 1 6.957497 0.000000 18264 +basedfeatur 0 1 6.957497 0.000000 18265 +learningprocess 0 1 6.957497 0.000000 18266 +intelligentadapt 0 1 6.957497 0.000000 18267 +whichwil 0 1 6.957497 0.000000 18268 +hotlistthi 0 1 6.957497 0.000000 18269 +omniweb 0 1 6.957497 0.000000 18270 +eleg 0 1 6.957497 0.000000 18271 +omniwebi 0 1 6.957497 0.000000 18272 +responseto 0 1 6.957497 0.000000 18273 +jehovah 0 1 6.957497 0.000000 18274 +deiti 0 1 6.957497 0.000000 18275 +christwisconsin 0 1 6.957497 0.000000 18276 +intervars 0 1 6.957497 0.000000 18277 +weatherin 0 1 6.957497 0.000000 18278 +nebula 0 1 6.957497 0.000000 18279 +crosssearch 0 1 6.957497 0.000000 18280 +farsid 0 1 6.957497 0.000000 18281 +voyagerent 0 1 6.957497 0.000000 18282 +zoneroam 0 1 6.957497 0.000000 18283 +stereogram 0 1 6.957497 0.000000 18284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..638f6edd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +student 0 343 1.098612 0.000000 19 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +cornel 0 215 1.386294 0.000000 23 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +implement 1 152 1.791759 1.791759 52 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +provid 0 121 2.079442 0.000000 94 +studi 0 120 2.079442 0.000000 91 +confer 0 126 2.079442 0.000000 100 +code 1 108 2.197225 2.197225 116 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +techniqu 1 99 2.302585 2.302585 138 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +commun 0 95 2.397895 0.000000 157 +environ 1 84 2.484907 2.484907 177 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +novemb 0 81 2.484907 0.000000 179 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +complet 1 77 2.564949 2.564949 208 +decemb 1 80 2.564949 2.564949 215 +messag 0 76 2.564949 0.000000 212 +know 0 80 2.564949 0.000000 198 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +involv 0 71 2.639057 0.000000 227 +symposium 0 72 2.639057 0.000000 238 +logic 0 71 2.639057 0.000000 230 +august 1 66 2.708050 2.708050 257 +practic 1 70 2.708050 2.708050 246 +integr 0 67 2.708050 0.000000 245 +januari 1 62 2.772589 2.772589 264 +evalu 1 64 2.772589 2.772589 266 +import 0 65 2.772589 0.000000 282 +experi 0 64 2.772589 0.000000 283 +septemb 0 65 2.772589 0.000000 274 +best 1 59 2.833213 2.833213 299 +juli 1 60 2.833213 2.833213 305 +share 0 59 2.833213 0.000000 304 +automat 0 61 2.833213 0.000000 306 +semest 0 58 2.890372 0.000000 312 +publish 0 57 2.890372 0.000000 326 +undergradu 0 54 2.944439 0.000000 338 +processor 0 54 2.944439 0.000000 335 +instruct 0 53 2.944439 0.000000 332 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +approach 1 48 3.044522 3.044522 366 +pointer 1 48 3.044522 3.044522 368 +telephon 0 50 3.044522 0.000000 373 +principl 0 48 3.044522 0.000000 357 +right 0 48 3.044522 0.000000 363 +effect 0 46 3.091042 0.000000 385 +possibl 0 47 3.091042 0.000000 378 +execut 1 45 3.135494 3.135494 404 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +edit 0 42 3.218876 0.000000 418 +editor 0 41 3.218876 0.000000 433 +error 1 40 3.258097 3.258097 449 +transact 1 39 3.258097 3.258097 438 +must 0 40 3.258097 0.000000 442 +littl 0 39 3.258097 0.000000 454 +author 0 39 3.258097 0.000000 450 +correct 1 38 3.295837 3.295837 462 +cost 1 37 3.332205 3.332205 480 +workstat 0 37 3.332205 0.000000 479 +procedur 1 36 3.367296 3.367296 488 +especi 0 36 3.367296 0.000000 496 +soon 0 36 3.367296 0.000000 494 +short 0 36 3.367296 0.000000 499 +least 1 35 3.401197 3.401197 516 +global 0 34 3.401197 0.000000 520 +concurr 0 34 3.401197 0.000000 501 +john 0 33 3.433987 0.000000 532 +express 0 32 3.465736 0.000000 540 +extend 0 32 3.465736 0.000000 539 +richard 0 31 3.496508 0.000000 559 +focu 0 30 3.555348 0.000000 571 +graph 0 30 3.555348 0.000000 576 +common 0 30 3.555348 0.000000 574 +rang 0 30 3.555348 0.000000 565 +semant 0 29 3.583519 0.000000 587 +steve 0 29 3.583519 0.000000 594 +multiprocessor 0 28 3.610918 0.000000 605 +load 0 28 3.610918 0.000000 601 +framework 0 28 3.610918 0.000000 606 +arrai 1 27 3.637586 3.637586 627 +revis 0 26 3.688879 0.000000 640 +wai 0 25 3.737670 0.000000 662 +store 0 24 3.761200 0.000000 693 +flow 0 24 3.761200 0.000000 700 +william 1 22 3.850148 3.850148 765 +color 0 22 3.850148 0.000000 762 +almost 0 22 3.850148 0.000000 742 +avoid 1 21 3.912023 3.912023 799 +among 0 21 3.912023 0.000000 781 +programminglanguag 0 21 3.912023 0.000000 782 +alloc 2 20 3.951244 7.902488 821 +exploit 0 20 3.951244 0.000000 836 +supervis 0 20 3.951244 0.000000 840 +facil 0 20 3.951244 0.000000 814 +definit 0 19 4.007333 0.000000 864 +seem 0 18 4.060443 0.000000 899 +regist 2 17 4.110874 8.221748 938 +steven 1 17 4.110874 4.110874 953 +monitor 1 17 4.110874 4.110874 941 +ultim 0 17 4.110874 0.000000 943 +analyz 0 17 4.110874 0.000000 925 +anyon 0 17 4.110874 0.000000 916 +modern 0 16 4.174387 0.000000 966 +easi 0 16 4.174387 0.000000 969 +todd 1 15 4.248495 4.248495 1051 +mayb 0 15 4.248495 0.000000 1014 +indic 0 15 4.248495 0.000000 1013 +driven 0 15 4.248495 0.000000 1048 +attribut 2 14 4.317488 8.634976 1092 +polynomi 0 14 4.317488 0.000000 1069 +demand 0 14 4.317488 0.000000 1073 +split 0 14 4.317488 0.000000 1078 +charl 1 13 4.382027 4.382027 1149 +context 1 13 4.382027 4.382027 1153 +sigplan 1 13 4.382027 4.382027 1190 +care 0 13 4.382027 0.000000 1177 +johnson 0 13 4.382027 0.000000 1162 +bruce 0 12 4.465908 0.000000 1226 +benjamin 0 11 4.553877 0.000000 1296 +stephen 0 11 4.553877 0.000000 1342 +transpar 0 11 4.553877 0.000000 1325 +arithmet 0 10 4.653960 0.000000 1388 +routin 0 9 4.753590 0.000000 1549 +minimum 0 9 4.753590 0.000000 1555 +cum 0 8 4.875197 0.000000 1619 +sensit 0 8 4.875197 0.000000 1726 +fischer 2 7 5.010635 10.021270 1893 +delai 1 7 5.010635 5.010635 1848 +pipelin 0 7 5.010635 0.000000 1830 +appar 0 7 5.010635 0.000000 1958 +zero 0 7 5.010635 0.000000 1896 +bookstor 0 7 5.010635 0.000000 1837 +grammar 1 6 5.164786 5.164786 2058 +benefit 0 6 5.164786 0.000000 2213 +mistak 0 6 5.164786 0.000000 2110 +sigact 0 6 5.164786 0.000000 2212 +unnecessari 1 5 5.347108 5.347108 2506 +explicitli 0 5 5.347108 0.000000 2308 +quantifi 0 5 5.347108 0.000000 2525 +attract 0 5 5.347108 0.000000 2356 +craft 0 5 5.347108 0.000000 2412 +leblanc 0 5 5.347108 0.000000 2377 +pars 0 5 5.347108 0.000000 2321 +interprocedur 1 4 5.568345 5.568345 2771 +vital 0 4 5.568345 0.000000 2733 +popl 0 4 5.568345 0.000000 3068 +gregori 0 4 5.568345 0.000000 2928 +teachingc 1 3 5.857933 5.857933 3614 +domin 0 3 5.857933 0.000000 3995 +likelihood 0 3 5.857933 0.000000 3172 +topla 0 3 5.857933 0.000000 3563 +retarget 0 3 5.857933 0.000000 3994 +syntact 1 2 6.263398 6.263398 5552 +insoftwar 0 2 6.263398 0.000000 4932 +everywher 0 2 6.263398 0.000000 5690 +educationph 0 2 6.263398 0.000000 6112 +milton 0 2 6.263398 0.000000 6153 +bernard 0 2 6.263398 0.000000 5894 +dion 0 2 6.263398 0.000000 5856 +venkatesh 0 2 6.263398 0.000000 6154 +nbsp 2 1 6.957497 13.914994 18285 +kurland 1 1 6.957497 6.957497 18286 +proebst 1 1 6.957497 6.957497 18287 +harish 1 1 6.957497 6.957497 18288 +patil 1 1 6.957497 6.957497 18289 +nbspcharl 0 1 6.957497 0.000000 18290 +nbspprofessor 0 1 6.957497 0.000000 18291 +nbspunivers 0 1 6.957497 0.000000 18292 +enormouscap 0 1 6.957497 0.000000 18293 +haveinvestig 0 1 6.957497 0.000000 18294 +registerresid 0 1 6.957497 0.000000 18295 +loadsand 0 1 6.957497 0.000000 18296 +theprocedur 0 1 6.957497 0.000000 18297 +studiedinterprocedur 0 1 6.957497 0.000000 18298 +modelsthat 0 1 6.957497 0.000000 18299 +optimallyalloc 0 1 6.957497 0.000000 18300 +toautomat 0 1 6.957497 0.000000 18301 +orno 0 1 6.957497 0.000000 18302 +slowdown 0 1 6.957497 0.000000 18303 +inacm 0 1 6.957497 0.000000 18304 +activitiesa 0 1 6.957497 0.000000 18305 +cytronand 0 1 6.957497 0.000000 18306 +studentsdonn 0 1 6.957497 0.000000 18307 +rowland 0 1 6.957497 0.000000 18308 +skedzielewski 0 1 6.957497 0.000000 18309 +reevalu 0 1 6.957497 0.000000 18310 +corrector 0 1 6.957497 0.000000 18311 +sensitivepars 0 1 6.957497 0.000000 18312 +mahadevan 0 1 6.957497 0.000000 18313 +ganapathi 0 1 6.957497 0.000000 18314 +vimal 0 1 6.957497 0.000000 18315 +begwami 0 1 6.957497 0.000000 18316 +maunei 0 1 6.957497 0.000000 18317 +anil 0 1 6.957497 0.000000 18318 +winsborough 0 1 6.957497 0.000000 18319 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..d6c0da34 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,260 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +group 0 183 1.609438 0.000000 36 +paper 0 205 1.609438 0.000000 38 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +base 1 165 1.791759 1.791759 50 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +applic 0 170 1.791759 0.000000 56 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +area 0 144 1.945910 0.000000 80 +report 2 131 2.079442 4.158884 92 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +place 0 106 2.197225 0.000000 124 +topic 0 114 2.197225 0.000000 110 +memori 2 101 2.302585 4.605170 139 +technic 2 100 2.302585 4.605170 140 +proceed 1 93 2.397895 2.397895 152 +follow 1 92 2.397895 2.397895 143 +octob 0 89 2.397895 0.000000 156 +larg 1 82 2.484907 2.484907 168 +novemb 1 81 2.484907 2.484907 179 +ieee 0 86 2.484907 0.000000 190 +second 0 81 2.484907 0.000000 166 +appear 1 78 2.564949 2.564949 210 +interfac 1 79 2.564949 2.564949 209 +complet 0 77 2.564949 0.000000 208 +dynam 0 76 2.564949 0.000000 194 +optim 0 79 2.564949 0.000000 197 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +effici 1 73 2.639057 2.639057 233 +symposium 0 72 2.639057 0.000000 238 +involv 0 71 2.639057 0.000000 227 +workshop 0 71 2.639057 0.000000 239 +integr 1 67 2.708050 2.708050 245 +main 1 67 2.708050 2.708050 256 +differ 1 66 2.708050 2.708050 253 +simul 0 66 2.708050 0.000000 255 +januari 0 62 2.772589 0.000000 264 +share 2 59 2.833213 5.666426 304 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +best 0 59 2.833213 0.000000 299 +point 1 58 2.890372 2.890372 319 +variou 0 56 2.890372 0.000000 317 +explor 0 58 2.890372 0.000000 324 +faculti 0 56 2.890372 0.000000 325 +processor 1 54 2.944439 2.944439 335 +februari 1 54 2.944439 2.944439 328 +instruct 0 53 2.944439 0.000000 332 +extens 0 53 2.944439 0.000000 340 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +possibl 0 47 3.091042 0.000000 378 +effect 0 46 3.091042 0.000000 385 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +mechan 1 43 3.178054 3.178054 416 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +third 0 43 3.178054 0.000000 412 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +examin 0 42 3.218876 0.000000 424 +multipl 0 39 3.258097 0.000000 453 +close 0 38 3.295837 0.000000 465 +purpos 0 37 3.332205 0.000000 481 +cost 0 37 3.332205 0.000000 480 +jame 2 35 3.401197 6.802394 507 +least 0 35 3.401197 0.000000 516 +extend 1 32 3.465736 3.465736 539 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +specifi 0 30 3.555348 0.000000 568 +synchron 1 29 3.583519 3.583519 588 +focus 1 29 3.583519 3.583519 584 +limit 0 29 3.583519 0.000000 585 +platform 0 29 3.583519 0.000000 591 +particip 0 29 3.583519 0.000000 589 +scale 1 28 3.610918 3.610918 613 +multiprocessor 0 28 3.610918 0.000000 605 +cluster 0 28 3.610918 0.000000 612 +repres 0 26 3.688879 0.000000 656 +consist 0 26 3.688879 0.000000 651 +bound 0 26 3.688879 0.000000 659 +todai 0 25 3.737670 0.000000 672 +wai 0 25 3.737670 0.000000 662 +supercomput 0 25 3.737670 0.000000 681 +scalabl 1 24 3.761200 3.761200 705 +pattern 0 24 3.761200 0.000000 689 +hierarchi 0 22 3.850148 0.000000 744 +chip 1 21 3.912023 3.912023 770 +alumni 0 21 3.912023 0.000000 807 +increas 0 20 3.951244 0.000000 829 +exploit 0 20 3.951244 0.000000 836 +separ 0 19 4.007333 0.000000 844 +along 0 18 4.060443 0.000000 878 +element 0 18 4.060443 0.000000 895 +minim 0 18 4.060443 0.000000 887 +wind 0 18 4.060443 0.000000 908 +scott 0 18 4.060443 0.000000 884 +lower 0 18 4.060443 0.000000 886 +interconnect 1 17 4.110874 4.110874 937 +layer 1 17 4.110874 4.110874 926 +latenc 1 16 4.174387 4.174387 993 +capabl 1 15 4.248495 4.248495 1016 +massiv 0 15 4.248495 0.000000 1026 +overhead 0 15 4.248495 0.000000 1035 +hierarch 0 15 4.248495 0.000000 1018 +coher 1 14 4.317488 4.317488 1109 +conduct 0 14 4.317488 0.000000 1065 +topolog 0 14 4.317488 0.000000 1089 +johnson 1 13 4.382027 4.382027 1162 +central 0 13 4.382027 0.000000 1160 +individu 0 13 4.382027 0.000000 1126 +optic 1 12 4.465908 4.465908 1221 +mari 1 12 4.465908 4.465908 1266 +grow 0 12 4.465908 0.000000 1209 +bandwidth 1 11 4.553877 4.553877 1365 +primit 1 11 4.553877 4.553877 1317 +evolut 0 11 4.553877 0.000000 1314 +impact 0 11 4.553877 0.000000 1334 +extrem 0 11 4.553877 0.000000 1330 +penalti 0 10 4.653960 0.000000 1405 +modul 0 10 4.653960 0.000000 1434 +resid 0 10 4.653960 0.000000 1461 +label 0 10 4.653960 0.000000 1423 +queue 0 10 4.653960 0.000000 1386 +vernon 1 9 4.753590 4.753590 1556 +doug 1 9 4.753590 4.753590 1517 +elimin 0 9 4.753590 0.000000 1558 +lock 0 9 4.753590 0.000000 1551 +transport 1 8 4.875197 4.875197 1672 +univeristi 0 8 4.875197 0.000000 1754 +evan 0 8 4.875197 0.000000 1633 +goodman 2 7 5.010635 10.021270 1891 +burger 2 7 5.010635 10.021270 1889 +merg 0 7 5.010635 0.000000 1862 +migrat 0 7 5.010635 0.000000 1851 +microprocessor 0 7 5.010635 0.000000 1808 +serial 0 7 5.010635 0.000000 1975 +philip 1 6 5.164786 5.164786 2005 +onto 0 6 5.164786 0.000000 2089 +diagram 0 5 5.347108 0.000000 2346 +quantifi 0 5 5.347108 0.000000 2525 +galileo 1 4 5.568345 5.568345 3086 +medium 0 4 5.568345 0.000000 2834 +eventu 0 4 5.568345 0.000000 3074 +stefano 1 3 5.857933 5.857933 3372 +kaxira 1 3 5.857933 5.857933 3373 +arrow 1 3 5.857933 5.857933 3520 +bank 0 3 5.857933 0.000000 3920 +aswel 0 3 5.857933 0.000000 3286 +fresh 0 3 5.857933 0.000000 3706 +stein 0 3 5.857933 0.000000 3646 +alain 1 2 6.263398 6.263398 6086 +iram 1 2 6.263398 6.263398 4520 +datascalar 1 2 6.263398 6.263398 4518 +wisconsint 0 2 6.263398 0.000000 6155 +groupat 0 2 6.263398 0.000000 5677 +emphasison 0 2 6.263398 0.000000 4157 +extent 0 2 6.263398 0.000000 6080 +dram 0 2 6.263398 0.000000 4173 +spsd 0 2 6.263398 0.000000 4519 +declin 0 2 6.263398 0.000000 5385 +logarithm 0 2 6.263398 0.000000 5322 +multiprocessorsa 0 2 6.263398 0.000000 5455 +gjess 0 2 6.263398 0.000000 6156 +woest 1 1 6.957497 6.957497 18320 +nagi 1 1 6.957497 6.957497 18321 +contentsgalileoproject 0 1 6.957497 0.000000 18322 +descriptionpublicationsrel 0 1 6.957497 0.000000 18323 +projectssci 0 1 6.957497 0.000000 18324 +wisconsinproject 0 1 6.957497 0.000000 18325 +descriptionpublicationsproject 0 1 6.957497 0.000000 18326 +membersgalileo 0 1 6.957497 0.000000 18327 +wisconsingalileo 0 1 6.957497 0.000000 18328 +therelationship 0 1 6.957497 0.000000 18329 +futuresystem 0 1 6.957497 0.000000 18330 +issuabl 0 1 6.957497 0.000000 18331 +orlimit 0 1 6.957497 0.000000 18332 +capacityon 0 1 6.957497 0.000000 18333 +sizabl 0 1 6.957497 0.000000 18334 +fractionof 0 1 6.957497 0.000000 18335 +mopin 0 1 6.957497 0.000000 18336 +ofprocessor 0 1 6.957497 0.000000 18337 +eventuallyobvi 0 1 6.957497 0.000000 18338 +andlimit 0 1 6.957497 0.000000 18339 +systemsperform 0 1 6.957497 0.000000 18340 +theprocessor 0 1 6.957497 0.000000 18341 +spectrumcach 0 1 6.957497 0.000000 18342 +systemsdesign 0 1 6.957497 0.000000 18343 +systemprogram 0 1 6.957497 0.000000 18344 +bottlenecksdoug 0 1 6.957497 0.000000 18345 +modeldoug 0 1 6.957497 0.000000 18346 +microprocessorsdoug 0 1 6.957497 0.000000 18347 +microprocessorsdougla 0 1 6.957497 0.000000 18348 +berkeleyppram 0 1 6.957497 0.000000 18349 +kyushu 0 1 6.957497 0.000000 18350 +japansci 0 1 6.957497 0.000000 18351 +wisconsinour 0 1 6.957497 0.000000 18352 +coherentshar 0 1 6.957497 0.000000 18353 +coherentinterfac 0 1 6.957497 0.000000 18354 +qolb 0 1 6.957497 0.000000 18355 +pairwis 0 1 6.957497 0.000000 18356 +definitionfor 0 1 6.957497 0.000000 18357 +betweenprocess 0 1 6.957497 0.000000 18358 +structureseffici 0 1 6.957497 0.000000 18359 +extensionsaggress 0 1 6.957497 0.000000 18360 +multiprocessorswisconsin 0 1 6.957497 0.000000 18361 +tunneldougla 0 1 6.957497 0.000000 18362 +scijam 0 1 6.957497 0.000000 18363 +memoryross 0 1 6.957497 0.000000 18364 +aboulenein 0 1 6.957497 0.000000 18365 +ringsross 0 1 6.957497 0.000000 18366 +ringsteven 0 1 6.957497 0.000000 18367 +coherenceross 0 1 6.957497 0.000000 18368 +multiprocessorsphilip 0 1 6.957497 0.000000 18369 +multiprocessorjam 0 1 6.957497 0.000000 18370 +abouleneinross 0 1 6.957497 0.000000 18371 +johnsonstev 0 1 6.957497 0.000000 18372 +scottlast 0 1 6.957497 0.000000 18373 +dburger 0 1 6.957497 0.000000 18374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..b9555842 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +structur 0 106 2.197225 0.000000 105 +peopl 0 96 2.302585 0.000000 132 +grade 0 90 2.397895 0.000000 142 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +paul 0 38 3.295837 0.000000 471 +post 0 35 3.401197 0.000000 505 +martin 0 21 3.912023 0.000000 794 +andrew 1 19 4.007333 4.007333 849 +jean 0 10 4.653960 0.000000 1440 +regent 0 5 5.347108 0.000000 2551 +geeri 1 3 5.857933 5.857933 3422 +albert 0 2 6.263398 0.000000 5987 +friedrich 0 2 6.263398 0.000000 5175 +madisonin 0 1 6.957497 0.000000 18375 +compsci 0 1 6.957497 0.000000 18376 +pontif 0 1 6.957497 0.000000 18377 +jacqu 0 1 6.957497 0.000000 18378 +derrida 0 1 6.957497 0.000000 18379 +heidegg 0 1 6.957497 0.000000 18380 +camu 0 1 6.957497 0.000000 18381 +sartr 0 1 6.957497 0.000000 18382 +nietzsch 0 1 6.957497 0.000000 18383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..0e3e41b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +also 0 259 1.386294 0.000000 28 +paper 0 205 1.609438 0.000000 38 +class 0 199 1.609438 0.000000 37 +fall 0 181 1.609438 0.000000 40 +read 1 154 1.791759 1.791759 47 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +madison 0 165 1.791759 0.000000 55 +avail 0 169 1.791759 0.000000 48 +note 0 142 1.945910 0.000000 67 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +machin 1 129 2.079442 2.079442 95 +report 0 131 2.079442 0.000000 92 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +look 0 107 2.197225 0.000000 115 +well 0 109 2.197225 0.000000 121 +book 0 99 2.302585 0.000000 131 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +imag 0 91 2.397895 0.000000 161 +pictur 0 89 2.397895 0.000000 160 +help 0 83 2.484907 0.000000 175 +thing 0 84 2.484907 0.000000 189 +orient 0 80 2.564949 0.000000 205 +main 0 67 2.708050 0.000000 256 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +share 0 59 2.833213 0.000000 304 +back 0 60 2.833213 0.000000 297 +index 1 56 2.890372 2.890372 309 +unix 1 58 2.890372 2.890372 308 +sever 0 56 2.890372 0.000000 322 +think 0 57 2.890372 0.000000 314 +sampl 1 53 2.944439 2.944439 339 +case 0 51 2.995732 0.000000 351 +right 0 48 3.044522 0.000000 363 +move 0 47 3.091042 0.000000 382 +netscap 1 44 3.135494 3.135494 395 +directori 0 45 3.135494 0.000000 396 +might 0 41 3.218876 0.000000 426 +fast 0 42 3.218876 0.000000 429 +continu 0 39 3.258097 0.000000 448 +movi 0 40 3.258097 0.000000 459 +small 0 39 3.258097 0.000000 447 +cost 0 37 3.332205 0.000000 480 +either 0 35 3.401197 0.000000 506 +given 0 32 3.465736 0.000000 538 +someth 0 31 3.496508 0.000000 554 +consid 0 29 3.583519 0.000000 590 +usual 0 28 3.610918 0.000000 608 +load 0 28 3.610918 0.000000 601 +actual 0 28 3.610918 0.000000 604 +great 0 27 3.637586 0.000000 626 +bookmark 0 26 3.688879 0.000000 639 +sometim 0 24 3.761200 0.000000 696 +higher 0 24 3.761200 0.000000 690 +thank 0 23 3.806662 0.000000 721 +sent 0 22 3.850148 0.000000 763 +grad 0 20 3.951244 0.000000 837 +wrote 0 20 3.951244 0.000000 830 +mostli 0 19 4.007333 0.000000 869 +exercis 0 19 4.007333 0.000000 842 +stop 0 17 4.110874 0.000000 942 +mayb 1 15 4.248495 4.248495 1014 +purchas 0 15 4.248495 0.000000 1030 +floor 0 14 4.317488 0.000000 1070 +wait 0 13 4.382027 0.000000 1168 +walk 0 12 4.465908 0.000000 1281 +outsid 0 12 4.465908 0.000000 1219 +grow 0 12 4.465908 0.000000 1209 +noth 0 11 4.553877 0.000000 1328 +denni 0 11 4.553877 0.000000 1321 +calvin 0 9 4.753590 0.000000 1518 +claim 0 8 4.875197 0.000000 1664 +unifi 0 8 4.875197 0.000000 1774 +reload 0 8 4.875197 0.000000 1682 +told 0 8 4.875197 0.000000 1658 +accord 0 7 5.010635 0.000000 1826 +none 0 7 5.010635 0.000000 1811 +monei 0 7 5.010635 0.000000 1934 +christian 0 7 5.010635 0.000000 1949 +huge 0 6 5.164786 0.000000 1991 +handbook 0 6 5.164786 0.000000 2061 +gui 0 5 5.347108 0.000000 2573 +feet 0 5 5.347108 0.000000 2492 +anti 0 5 5.347108 0.000000 2434 +cheap 0 4 5.568345 0.000000 2751 +prospect 0 4 5.568345 0.000000 3013 +shelf 0 4 5.568345 0.000000 2621 +fork 0 4 5.568345 0.000000 2801 +kill 0 4 5.568345 0.000000 3000 +suppos 0 4 5.568345 0.000000 3002 +suffic 0 4 5.568345 0.000000 2869 +glass 1 3 5.857933 5.857933 3759 +dutch 0 3 5.857933 0.000000 3592 +influenc 0 3 5.857933 0.000000 3349 +cash 0 3 5.857933 0.000000 3355 +dabbl 0 3 5.857933 0.000000 3971 +forward 0 3 5.857933 0.000000 3784 +deposit 0 2 6.263398 0.000000 6095 +cooler 0 2 6.263398 0.000000 6023 +suspect 0 2 6.263398 0.000000 5187 +nearest 0 2 6.263398 0.000000 4922 +roommat 0 2 6.263398 0.000000 6157 +withno 0 2 6.263398 0.000000 5370 +eventhough 0 2 6.263398 0.000000 6158 +anyhow 0 2 6.263398 0.000000 5188 +killer 0 2 6.263398 0.000000 6159 +programmingin 0 2 6.263398 0.000000 4135 +ritchi 0 2 6.263398 0.000000 4306 +creator 0 2 6.263398 0.000000 5998 +gideon 1 1 6.957497 6.957497 18384 +tweak 1 1 6.957497 6.957497 18385 +toonion 0 1 6.957497 0.000000 18386 +seethi 0 1 6.957497 0.000000 18387 +blockbust 0 1 6.957497 0.000000 18388 +predica 0 1 6.957497 0.000000 18389 +dismal 0 1 6.957497 0.000000 18390 +donationto 0 1 6.957497 0.000000 18391 +defrai 0 1 6.957497 0.000000 18392 +orderscan 0 1 6.957497 0.000000 18393 +monro 0 1 6.957497 0.000000 18394 +usathank 0 1 6.957497 0.000000 18395 +unread 0 1 6.957497 0.000000 18396 +achil 0 1 6.957497 0.000000 18397 +cstechreport 0 1 6.957497 0.000000 18398 +otherstuff 0 1 6.957497 0.000000 18399 +averagewil 0 1 6.957497 0.000000 18400 +doofu 0 1 6.957497 0.000000 18401 +zippi 0 1 6.957497 0.000000 18402 +pinheadha 0 1 6.957497 0.000000 18403 +justtri 0 1 6.957497 0.000000 18404 +mozilla 0 1 6.957497 0.000000 18405 +buttonher 0 1 6.957497 0.000000 18406 +somethingin 0 1 6.957497 0.000000 18407 +hater 0 1 6.957497 0.000000 18408 +mailand 0 1 6.957497 0.000000 18409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..9b94e9a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,407 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 0 314 1.098612 0.000000 14 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +includ 0 208 1.609438 0.000000 42 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +phone 0 175 1.791759 0.000000 45 +algorithm 0 162 1.791759 0.000000 57 +first 2 140 1.945910 3.891820 71 +architectur 1 139 1.945910 1.945910 77 +like 1 132 1.945910 1.945910 81 +file 0 132 1.945910 0.000000 70 +assign 0 135 1.945910 0.000000 66 +schedul 1 119 2.079442 2.079442 85 +tool 1 117 2.079442 2.079442 93 +high 0 130 2.079442 0.000000 101 +databas 0 122 2.079442 0.000000 86 +number 0 130 2.079442 0.000000 97 +manag 1 114 2.197225 2.197225 125 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +person 1 111 2.197225 2.197225 117 +specif 0 106 2.197225 0.000000 106 +well 0 109 2.197225 0.000000 121 +place 0 106 2.197225 0.000000 124 +make 0 111 2.197225 0.000000 120 +check 0 115 2.197225 0.000000 118 +send 0 114 2.197225 0.000000 109 +topic 0 114 2.197225 0.000000 110 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +book 0 99 2.302585 0.000000 131 +techniqu 0 99 2.302585 0.000000 138 +advanc 0 99 2.302585 0.000000 130 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +real 1 93 2.397895 2.397895 144 +mani 0 92 2.397895 0.000000 150 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +thing 1 84 2.484907 2.484907 189 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +control 1 82 2.484907 2.484907 164 +requir 1 81 2.484907 2.484907 167 +level 0 87 2.484907 0.000000 180 +educ 0 86 2.484907 0.000000 191 +internet 0 83 2.484907 0.000000 186 +build 0 85 2.484907 0.000000 184 +chang 0 82 2.484907 0.000000 163 +resum 1 79 2.564949 2.564949 217 +dynam 0 76 2.564949 0.000000 194 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +meet 2 72 2.639057 5.278114 229 +html 1 75 2.639057 2.639057 235 +involv 0 71 2.639057 0.000000 227 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +workshop 0 71 2.639057 0.000000 239 +appli 0 71 2.639057 0.000000 226 +effici 0 73 2.639057 0.000000 233 +test 1 66 2.708050 2.708050 252 +integr 0 67 2.708050 0.000000 245 +view 0 70 2.708050 0.000000 254 +creat 1 63 2.772589 2.772589 277 +organ 1 65 2.772589 2.772589 265 +previou 0 62 2.772589 0.000000 290 +plan 0 65 2.772589 0.000000 272 +best 0 59 2.833213 0.000000 299 +unix 1 58 2.890372 2.890372 308 +summer 0 56 2.890372 0.000000 311 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +space 0 57 2.890372 0.000000 310 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +suggest 1 53 2.944439 2.944439 331 +instruct 0 53 2.944439 0.000000 332 +allow 0 53 2.944439 0.000000 333 +case 0 51 2.995732 0.000000 351 +week 0 52 2.995732 0.000000 343 +approach 1 48 3.044522 3.044522 366 +frequent 0 49 3.044522 0.000000 367 +without 0 50 3.044522 0.000000 370 +right 0 48 3.044522 0.000000 363 +possibl 1 47 3.091042 3.091042 378 +get 1 46 3.091042 3.091042 380 +adapt 0 46 3.091042 0.000000 387 +effect 0 46 3.091042 0.000000 385 +could 0 46 3.091042 0.000000 383 +featur 0 46 3.091042 0.000000 386 +keep 1 44 3.135494 3.135494 409 +even 1 45 3.135494 3.135494 393 +favorit 0 44 3.135494 0.000000 410 +textbook 0 44 3.135494 0.000000 397 +describ 0 45 3.135494 0.000000 400 +mark 0 44 3.135494 0.000000 403 +futur 1 41 3.218876 3.218876 427 +http 0 41 3.218876 0.000000 420 +edit 0 42 3.218876 0.000000 418 +compani 0 41 3.218876 0.000000 423 +linear 0 41 3.218876 0.000000 431 +past 0 42 3.218876 0.000000 428 +author 1 39 3.258097 3.258097 450 +form 0 39 3.258097 0.000000 443 +multipl 0 39 3.258097 0.000000 453 +programm 0 39 3.258097 0.000000 445 +must 0 40 3.258097 0.000000 442 +open 0 38 3.295837 0.000000 469 +brian 0 38 3.295837 0.000000 466 +credit 0 38 3.295837 0.000000 460 +microsoft 0 38 3.295837 0.000000 468 +connect 0 37 3.332205 0.000000 485 +workstat 0 37 3.332205 0.000000 479 +mean 0 37 3.332205 0.000000 477 +download 1 36 3.367296 3.367296 489 +tree 0 36 3.367296 0.000000 492 +least 1 35 3.401197 3.401197 516 +concurr 1 34 3.401197 3.401197 501 +print 0 34 3.401197 0.000000 503 +singl 0 34 3.401197 0.000000 510 +manual 0 35 3.401197 0.000000 504 +someth 1 31 3.496508 3.496508 554 +often 0 31 3.496508 0.000000 551 +domain 0 30 3.555348 0.000000 564 +synchron 1 29 3.583519 3.583519 588 +quot 0 29 3.583519 0.000000 582 +actual 0 28 3.610918 0.000000 604 +usual 0 28 3.610918 0.000000 608 +static 0 27 3.637586 0.000000 619 +quit 0 27 3.637586 0.000000 633 +team 0 27 3.637586 0.000000 625 +rather 0 26 3.688879 0.000000 642 +enhanc 0 26 3.688879 0.000000 644 +although 1 25 3.737670 3.737670 667 +wai 0 25 3.737670 0.000000 662 +never 0 25 3.737670 0.000000 671 +alwai 1 24 3.761200 3.761200 691 +higher 0 24 3.761200 0.000000 690 +known 0 24 3.761200 0.000000 702 +mike 0 24 3.761200 0.000000 703 +miscellan 1 23 3.806662 3.806662 731 +self 1 22 3.850148 3.850148 761 +william 0 22 3.850148 0.000000 765 +sort 0 22 3.850148 0.000000 738 +identifi 0 22 3.850148 0.000000 760 +sent 0 22 3.850148 0.000000 763 +busi 1 21 3.912023 3.912023 784 +chip 0 21 3.912023 0.000000 770 +love 0 21 3.912023 0.000000 804 +divis 0 21 3.912023 0.000000 803 +similar 0 21 3.912023 0.000000 771 +voic 0 21 3.912023 0.000000 806 +watch 0 21 3.912023 0.000000 789 +tell 0 21 3.912023 0.000000 777 +avoid 0 21 3.912023 0.000000 799 +minut 0 20 3.951244 0.000000 810 +kernel 0 20 3.951244 0.000000 825 +portabl 0 20 3.951244 0.000000 819 +ever 0 19 4.007333 0.000000 872 +miss 0 19 4.007333 0.000000 866 +less 0 18 4.060443 0.000000 892 +element 0 18 4.060443 0.000000 895 +seem 0 18 4.060443 0.000000 899 +record 0 18 4.060443 0.000000 890 +aid 0 18 4.060443 0.000000 904 +thought 1 17 4.110874 4.110874 945 +anyon 0 17 4.110874 0.000000 916 +weekli 0 17 4.110874 0.000000 919 +intel 1 16 4.174387 4.174387 1000 +alreadi 0 16 4.174387 0.000000 963 +critic 0 16 4.174387 0.000000 982 +advantag 0 16 4.174387 0.000000 987 +configur 1 15 4.248495 4.248495 1012 +enough 1 15 4.248495 4.248495 1040 +piec 0 15 4.248495 0.000000 1020 +save 0 14 4.317488 0.000000 1099 +manner 0 14 4.317488 0.000000 1074 +becam 0 14 4.317488 0.000000 1117 +cannot 1 13 4.382027 4.382027 1144 +sai 0 13 4.382027 0.000000 1175 +care 0 13 4.382027 0.000000 1177 +central 0 13 4.382027 0.000000 1160 +everyth 0 13 4.382027 0.000000 1169 +weak 0 13 4.382027 0.000000 1159 +touch 1 12 4.465908 4.465908 1288 +assembl 0 12 4.465908 0.000000 1207 +usenix 0 12 4.465908 0.000000 1240 +went 0 12 4.465908 0.000000 1279 +overal 0 12 4.465908 0.000000 1254 +see 0 11 4.553877 0.000000 1337 +mapl 0 11 4.553877 0.000000 1376 +wood 0 11 4.553877 0.000000 1355 +fix 0 11 4.553877 0.000000 1327 +card 0 10 4.653960 0.000000 1435 +relationship 0 10 4.653960 0.000000 1383 +mainli 0 10 4.653960 0.000000 1432 +invit 0 10 4.653960 0.000000 1428 +bring 0 10 4.653960 0.000000 1430 +lock 1 9 4.753590 4.753590 1551 +trust 1 9 4.753590 4.753590 1583 +motorola 0 9 4.753590 0.000000 1546 +entitl 0 9 4.753590 0.000000 1490 +patterson 0 9 4.753590 0.000000 1554 +charg 0 9 4.753590 0.000000 1582 +login 0 9 4.753590 0.000000 1550 +prefer 0 9 4.753590 0.000000 1491 +admin 0 9 4.753590 0.000000 1476 +calendar 2 8 4.875197 9.750394 1649 +architect 1 8 4.875197 4.875197 1624 +claim 0 8 4.875197 0.000000 1664 +poor 0 8 4.875197 0.000000 1736 +perhap 0 8 4.875197 0.000000 1693 +driver 0 8 4.875197 0.000000 1657 +accomplish 0 8 4.875197 0.000000 1755 +mass 0 8 4.875197 0.000000 1732 +isol 0 8 4.875197 0.000000 1663 +realiz 0 8 4.875197 0.000000 1739 +montreal 1 7 5.010635 5.010635 1961 +serial 0 7 5.010635 0.000000 1975 +suffici 0 7 5.010635 0.000000 1897 +therefor 0 7 5.010635 0.000000 1822 +header 0 7 5.010635 0.000000 1787 +arrang 1 6 5.164786 5.164786 2023 +commit 1 6 5.164786 5.164786 2233 +sohi 0 6 5.164786 0.000000 2237 +versu 0 6 5.164786 0.000000 2052 +snow 0 6 5.164786 0.000000 2031 +sleep 0 6 5.164786 0.000000 2211 +microarchitectur 0 6 5.164786 0.000000 2238 +pentium 0 6 5.164786 0.000000 2077 +quickli 0 6 5.164786 0.000000 2000 +proce 0 6 5.164786 0.000000 2114 +relax 0 6 5.164786 0.000000 2120 +strip 0 6 5.164786 0.000000 2203 +apolog 0 6 5.164786 0.000000 2046 +optimist 1 5 5.347108 5.347108 2501 +pagethi 0 5 5.347108 0.000000 2336 +ship 0 5 5.347108 0.000000 2534 +default 0 5 5.347108 0.000000 2335 +patent 0 5 5.347108 0.000000 2574 +chapel 0 5 5.347108 0.000000 2457 +keeper 0 5 5.347108 0.000000 2569 +knew 0 5 5.347108 0.000000 2445 +exchang 0 5 5.347108 0.000000 2310 +adopt 0 5 5.347108 0.000000 2467 +hennessi 0 5 5.347108 0.000000 2289 +appreci 0 5 5.347108 0.000000 2374 +hate 0 5 5.347108 0.000000 2529 +recogn 0 5 5.347108 0.000000 2302 +truli 0 5 5.347108 0.000000 2476 +notabl 0 5 5.347108 0.000000 2276 +camp 0 5 5.347108 0.000000 2545 +andi 2 4 5.568345 11.136690 3081 +filesystem 1 4 5.568345 5.568345 2587 +unless 0 4 5.568345 0.000000 2607 +scribe 0 4 5.568345 0.000000 2631 +church 0 4 5.568345 0.000000 3011 +aspir 0 4 5.568345 0.000000 3019 +wear 0 4 5.568345 0.000000 2785 +trick 0 4 5.568345 0.000000 2967 +wander 0 4 5.568345 0.000000 2896 +wherea 0 4 5.568345 0.000000 2597 +cshrc 0 4 5.568345 0.000000 2759 +disconnect 0 4 5.568345 0.000000 2664 +pilot 1 3 5.857933 5.857933 4008 +warm 1 3 5.857933 5.857933 3904 +hacker 1 3 5.857933 5.857933 3996 +gould 1 3 5.857933 5.857933 3559 +urgent 1 3 5.857933 5.857933 3316 +fame 0 3 5.857933 0.000000 3793 +coin 0 3 5.857933 0.000000 3799 +harm 0 3 5.857933 0.000000 3515 +advertis 0 3 5.857933 0.000000 3788 +redesign 0 3 5.857933 0.000000 3540 +wilkinson 0 3 5.857933 0.000000 3579 +berlin 0 3 5.857933 0.000000 3263 +fascin 0 3 5.857933 0.000000 3948 +glew 2 2 6.263398 12.526796 4162 +pageandi 0 2 6.263398 0.000000 6096 +pope 0 2 6.263398 0.000000 5506 +parson 0 2 6.263398 0.000000 4528 +king 0 2 6.263398 0.000000 5737 +strand 0 2 6.263398 0.000000 5880 +chop 0 2 6.263398 0.000000 6160 +beard 0 2 6.263398 0.000000 6161 +constantli 0 2 6.263398 0.000000 4181 +verg 0 2 6.263398 0.000000 5488 +disagre 0 2 6.263398 0.000000 6105 +defunct 0 2 6.263398 0.000000 6162 +startup 0 2 6.263398 0.000000 4676 +clone 0 2 6.263398 0.000000 5833 +aitken 0 2 6.263398 0.000000 4941 +ubiquit 0 2 6.263398 0.000000 6049 +intervent 0 2 6.263398 0.000000 6163 +bother 0 2 6.263398 0.000000 6164 +advoc 1 1 6.957497 6.957497 18410 +beef 1 1 6.957497 6.957497 18411 +krazi 1 1 6.957497 6.957497 18412 +wannab 1 1 6.957497 6.957497 18413 +softwareto 1 1 6.957497 6.957497 18414 +teresa 1 1 6.957497 6.957497 18415 +largelyform 0 1 6.957497 0.000000 18416 +snippet 0 1 6.957497 0.000000 18417 +stylishor 0 1 6.957497 0.000000 18418 +summarycontact 0 1 6.957497 0.000000 18419 +addressescalendar 0 1 6.957497 0.000000 18420 +taker 0 1 6.957497 0.000000 18421 +priest 0 1 6.957497 0.000000 18422 +boyn 0 1 6.957497 0.000000 18423 +frost 0 1 6.957497 0.000000 18424 +almighti 0 1 6.957497 0.000000 18425 +dollar 0 1 6.957497 0.000000 18426 +bellow 0 1 6.957497 0.000000 18427 +ranter 0 1 6.957497 0.000000 18428 +preacher 0 1 6.957497 0.000000 18429 +beecher 0 1 6.957497 0.000000 18430 +harbour 0 1 6.957497 0.000000 18431 +deplor 0 1 6.957497 0.000000 18432 +churchmen 0 1 6.957497 0.000000 18433 +notori 0 1 6.957497 0.000000 18434 +atheist 0 1 6.957497 0.000000 18435 +chariti 0 1 6.957497 0.000000 18436 +sailor 0 1 6.957497 0.000000 18437 +chord 0 1 6.957497 0.000000 18438 +firewood 0 1 6.957497 0.000000 18439 +meal 0 1 6.957497 0.000000 18440 +manifesto 0 1 6.957497 0.000000 18441 +handbil 0 1 6.957497 0.000000 18442 +hackeralthough 0 1 6.957497 0.000000 18443 +formerlyhad 0 1 6.957497 0.000000 18444 +fake 0 1 6.957497 0.000000 18445 +andstil 0 1 6.957497 0.000000 18446 +wistfulli 0 1 6.957497 0.000000 18447 +suspend 0 1 6.957497 0.000000 18448 +bald 0 1 6.957497 0.000000 18449 +architectureonc 0 1 6.957497 0.000000 18450 +architecturei 0 1 6.957497 0.000000 18451 +grabbag 0 1 6.957497 0.000000 18452 +antidot 0 1 6.957497 0.000000 18453 +afford 0 1 6.957497 0.000000 18454 +diskspac 0 1 6.957497 0.000000 18455 +provideror 0 1 6.957497 0.000000 18456 +architectureon 0 1 6.957497 0.000000 18457 +datasheet 0 1 6.957497 0.000000 18458 +netscapebookmarksstockscod 0 1 6.957497 0.000000 18459 +standardsroi 0 1 6.957497 0.000000 18460 +standardsi 0 1 6.957497 0.000000 18461 +enfopris 0 1 6.957497 0.000000 18462 +writingto 0 1 6.957497 0.000000 18463 +longstand 0 1 6.957497 0.000000 18464 +configurationmanag 0 1 6.957497 0.000000 18465 +scc 0 1 6.957497 0.000000 18466 +box 0 1 6.957497 0.000000 18467 +hardlink 0 1 6.957497 0.000000 18468 +deprec 0 1 6.957497 0.000000 18469 +livelock 0 1 6.957497 0.000000 18470 +insist 0 1 6.957497 0.000000 18471 +checkinsso 0 1 6.957497 0.000000 18472 +approachin 0 1 6.957497 0.000000 18473 +fetterman 0 1 6.957497 0.000000 18474 +deserv 0 1 6.957497 0.000000 18475 +wisconsinhow 0 1 6.957497 0.000000 18476 +programat 0 1 6.957497 0.000000 18477 +cmtool 0 1 6.957497 0.000000 18478 +ical 0 1 6.957497 0.000000 18479 +anyof 0 1 6.957497 0.000000 18480 +manuallyadd 0 1 6.957497 0.000000 18481 +intelat 0 1 6.957497 0.000000 18482 +devout 0 1 6.957497 0.000000 18483 +ontim 0 1 6.957497 0.000000 18484 +meetingswith 0 1 6.957497 0.000000 18485 +reserveth 0 1 6.957497 0.000000 18486 +blindli 0 1 6.957497 0.000000 18487 +proposeif 0 1 6.957497 0.000000 18488 +overallschedul 0 1 6.957497 0.000000 18489 +secretariesand 0 1 6.957497 0.000000 18490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..1a37f0c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +good 0 77 2.564949 0.000000 200 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 0 35 3.401197 0.000000 507 +lot 0 18 4.060443 0.000000 889 +goodman 1 7 5.010635 5.010635 1891 +sciencesdepart 0 6 5.164786 0.000000 2020 +galileo 0 4 5.568345 0.000000 3086 +usaresearch 0 1 6.957497 0.000000 18491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..99750744 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,339 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 0 374 0.693147 0.000000 7 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +link 2 247 1.386294 2.772588 24 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +class 0 199 1.609438 0.000000 37 +paper 0 205 1.609438 0.000000 38 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +architectur 2 139 1.945910 3.891820 77 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +object 0 138 1.945910 0.000000 79 +databas 2 122 2.079442 4.158884 86 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +instructor 0 108 2.197225 0.000000 107 +topic 0 114 2.197225 0.000000 110 +find 0 111 2.197225 0.000000 111 +version 0 113 2.197225 0.000000 122 +world 0 115 2.197225 0.000000 126 +check 0 115 2.197225 0.000000 118 +text 1 98 2.302585 2.302585 133 +imag 3 91 2.397895 7.193685 161 +graphic 1 90 2.397895 2.397895 147 +section 1 94 2.397895 2.397895 149 +select 0 91 2.397895 0.000000 154 +pictur 0 89 2.397895 0.000000 160 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +learn 1 86 2.484907 2.484907 170 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +requir 0 81 2.484907 0.000000 167 +resourc 0 81 2.484907 0.000000 172 +wide 0 84 2.484907 0.000000 185 +academ 0 82 2.484907 0.000000 178 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +april 0 77 2.564949 0.000000 196 +homework 0 79 2.564949 0.000000 193 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +onlin 0 75 2.639057 0.000000 223 +servic 0 72 2.639057 0.000000 236 +simul 1 66 2.708050 2.708050 255 +view 0 70 2.708050 0.000000 254 +window 0 68 2.708050 0.000000 242 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +guid 0 63 2.772589 0.000000 267 +written 0 63 2.772589 0.000000 278 +experi 0 64 2.772589 0.000000 283 +new 0 64 2.772589 0.000000 262 +back 0 60 2.833213 0.000000 297 +automat 0 61 2.833213 0.000000 306 +index 1 56 2.890372 2.890372 309 +unix 0 58 2.890372 0.000000 308 +browser 0 56 2.890372 0.000000 313 +hardwar 0 51 2.995732 0.000000 350 +cool 1 49 3.044522 3.044522 374 +archiv 1 49 3.044522 3.044522 364 +format 0 48 3.044522 0.000000 356 +standard 0 48 3.044522 0.000000 365 +numer 0 49 3.044522 0.000000 369 +give 0 50 3.044522 0.000000 359 +fridai 0 44 3.135494 0.000000 390 +algebra 0 45 3.135494 0.000000 394 +directori 0 45 3.135494 0.000000 396 +video 0 44 3.135494 0.000000 405 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +mark 0 44 3.135494 0.000000 403 +vision 1 41 3.218876 3.218876 430 +linear 0 41 3.218876 0.000000 431 +might 0 41 3.218876 0.000000 426 +compani 0 41 3.218876 0.000000 423 +http 0 41 3.218876 0.000000 420 +theoret 0 39 3.258097 0.000000 446 +electr 0 38 3.295837 0.000000 461 +robot 1 36 3.367296 3.367296 497 +everi 0 34 3.401197 0.000000 519 +tech 0 35 3.401197 0.000000 515 +survei 0 35 3.401197 0.000000 513 +eduoffic 0 33 3.433987 0.000000 531 +board 0 33 3.433987 0.000000 528 +idea 1 32 3.465736 3.465736 545 +independ 0 32 3.465736 0.000000 548 +storag 0 31 3.496508 0.000000 553 +rang 1 30 3.555348 3.555348 565 +specifi 0 30 3.555348 0.000000 568 +univ 1 28 3.610918 3.610918 617 +packag 0 28 3.610918 0.000000 614 +retriev 2 27 3.637586 7.275172 621 +altern 0 26 3.688879 0.000000 641 +challeng 0 26 3.688879 0.000000 653 +trace 1 25 3.737670 3.737670 677 +todai 0 25 3.737670 0.000000 672 +client 0 25 3.737670 0.000000 679 +greg 1 24 3.761200 3.761200 695 +yahoo 0 24 3.761200 0.000000 707 +recognit 1 23 3.806662 3.806662 723 +thank 0 23 3.806662 0.000000 721 +geometri 1 22 3.850148 3.850148 752 +fund 1 21 3.912023 3.912023 805 +util 0 21 3.912023 0.000000 774 +hous 0 21 3.912023 0.000000 801 +nice 1 20 3.951244 3.951244 809 +portabl 0 20 3.951244 0.000000 819 +toolkit 0 20 3.951244 0.000000 835 +histori 1 19 4.007333 4.007333 853 +lyco 0 19 4.007333 0.000000 871 +feedback 0 19 4.007333 0.000000 854 +thoma 1 18 4.060443 4.060443 901 +lot 0 18 4.060443 0.000000 889 +stanford 0 17 4.110874 0.000000 955 +medic 0 17 4.110874 0.000000 958 +germani 0 17 4.110874 0.000000 946 +repositori 0 17 4.110874 0.000000 932 +white 0 17 4.110874 0.000000 951 +english 1 15 4.248495 4.248495 1033 +overhead 0 15 4.248495 0.000000 1035 +todd 0 15 4.248495 0.000000 1051 +charact 0 15 4.248495 0.000000 1028 +massiv 0 15 4.248495 0.000000 1026 +draft 0 14 4.317488 0.000000 1085 +pretti 1 13 4.382027 4.382027 1191 +suit 0 13 4.382027 0.000000 1129 +resolut 0 13 4.382027 0.000000 1172 +misc 0 13 4.382027 0.000000 1124 +charl 0 13 4.382027 0.000000 1149 +shape 1 12 4.465908 4.465908 1245 +optic 0 12 4.465908 0.000000 1221 +string 1 11 4.553877 4.553877 1340 +excit 0 11 4.553877 0.000000 1329 +night 0 11 4.553877 0.000000 1319 +worth 0 11 4.553877 0.000000 1294 +perl 0 11 4.553877 0.000000 1332 +literatur 0 11 4.553877 0.000000 1300 +metacrawl 0 10 4.653960 0.000000 1455 +card 0 10 4.653960 0.000000 1435 +enter 0 10 4.653960 0.000000 1454 +custom 0 10 4.653960 0.000000 1414 +utah 0 9 4.753590 0.000000 1585 +patterson 0 9 4.753590 0.000000 1554 +linguist 0 9 4.753590 0.000000 1593 +editori 0 9 4.753590 0.000000 1611 +japan 1 8 4.875197 4.875197 1762 +textur 0 8 4.875197 0.000000 1677 +dictionari 0 8 4.875197 0.000000 1642 +jack 0 8 4.875197 0.000000 1780 +attach 1 7 5.010635 5.010635 1785 +shade 1 7 5.010635 5.010635 1881 +usenet 0 7 5.010635 0.000000 1839 +shot 0 7 5.010635 0.000000 1898 +mirror 1 6 5.164786 5.164786 2028 +sharp 1 6 5.164786 5.164786 2100 +invest 1 6 5.164786 5.164786 2153 +altavista 0 6 5.164786 0.000000 2222 +infoseek 0 6 5.164786 0.000000 2188 +postcard 0 6 5.164786 0.000000 2181 +textual 0 6 5.164786 0.000000 1979 +apolog 0 6 5.164786 0.000000 2046 +garbag 0 6 5.164786 0.000000 1986 +price 0 6 5.164786 0.000000 1999 +hyper 1 5 5.347108 5.347108 2435 +appt 0 5 5.347108 0.000000 2312 +multiresolut 0 5 5.347108 0.000000 2423 +medicin 0 5 5.347108 0.000000 2448 +hennessi 0 5 5.347108 0.000000 2289 +travers 0 5 5.347108 0.000000 2363 +row 0 5 5.347108 0.000000 2330 +japanes 1 4 5.568345 5.568345 2934 +planet 1 4 5.568345 5.568345 2912 +zoom 0 4 5.568345 0.000000 2961 +moon 0 4 5.568345 0.000000 2991 +diagnosi 0 4 5.568345 0.000000 3027 +nist 0 4 5.568345 0.000000 2973 +wander 0 4 5.568345 0.000000 2896 +freewar 1 3 5.857933 5.857933 3504 +motif 0 3 5.857933 0.000000 3752 +sharewar 0 3 5.857933 0.000000 3503 +atmospher 0 3 5.857933 0.000000 3388 +rack 0 3 5.857933 0.000000 3176 +lockhe 0 3 5.857933 0.000000 3863 +georgia 0 3 5.857933 0.000000 3834 +trec 0 3 5.857933 0.000000 3547 +gigabyt 0 3 5.857933 0.000000 3548 +spider 0 3 5.857933 0.000000 3605 +latin 0 3 5.857933 0.000000 3741 +citizen 0 3 5.857933 0.000000 3238 +belong 0 3 5.857933 0.000000 3797 +tracer 1 2 6.263398 6.263398 5913 +pagegreg 0 2 6.263398 0.000000 5906 +pagenam 0 2 6.263398 0.000000 6165 +sharpemail 0 2 6.263398 0.000000 4766 +dejanew 0 2 6.263398 0.000000 5602 +harmoni 0 2 6.263398 0.000000 5235 +solar 0 2 6.263398 0.000000 5003 +comet 0 2 6.263398 0.000000 5785 +catalogu 0 2 6.263398 0.000000 6166 +gothic 0 2 6.263398 0.000000 5787 +soup 0 2 6.263398 0.000000 6131 +kitchen 0 2 6.263398 0.000000 6132 +awesom 0 2 6.263398 0.000000 6167 +diagon 0 2 6.263398 0.000000 4974 +atla 0 2 6.263398 0.000000 5996 +raster 0 2 6.263398 0.000000 6078 +radianc 0 2 6.263398 0.000000 6068 +radios 0 2 6.263398 0.000000 4504 +mexico 0 2 6.263398 0.000000 6044 +monash 0 2 6.263398 0.000000 4460 +strictli 0 2 6.263398 0.000000 5726 +pointcast 0 2 6.263398 0.000000 5377 +portfolio 0 2 6.263398 0.000000 4408 +offens 0 2 6.263398 0.000000 6168 +brill 0 2 6.263398 0.000000 4137 +sharpgreg 0 2 6.263398 0.000000 4767 +pic 2 1 6.957497 13.914994 18492 +wyom 1 1 6.957497 6.957497 18493 +satelit 1 1 6.957497 6.957497 18494 +handwrit 1 1 6.957497 6.957497 18495 +schwab 1 1 6.957497 6.957497 18496 +tgif 0 1 6.957497 0.000000 18497 +notesclass 0 1 6.957497 0.000000 18498 +aboutsearch 0 1 6.957497 0.000000 18499 +ohioc 0 1 6.957497 0.000000 18500 +cygnu 0 1 6.957497 0.000000 18501 +mumit 0 1 6.957497 0.000000 18502 +newbi 0 1 6.957497 0.000000 18503 +guideplatform 0 1 6.957497 0.000000 18504 +kit 0 1 6.957497 0.000000 18505 +amulet 0 1 6.957497 0.000000 18506 +dclap 0 1 6.957497 0.000000 18507 +wxwindow 0 1 6.957497 0.000000 18508 +yacl 0 1 6.957497 0.000000 18509 +projectclass 0 1 6.957497 0.000000 18510 +projectmisc 0 1 6.957497 0.000000 18511 +cygwin 0 1 6.957497 0.000000 18512 +gimp 0 1 6.957497 0.000000 18513 +harmonai 0 1 6.957497 0.000000 18514 +vasc 0 1 6.957497 0.000000 18515 +jaida 0 1 6.957497 0.000000 18516 +seamless 0 1 6.957497 0.000000 18517 +meteor 0 1 6.957497 0.000000 18518 +antarctica 0 1 6.957497 0.000000 18519 +niae 0 1 6.957497 0.000000 18520 +vistex 0 1 6.957497 0.000000 18521 +databaseartifici 0 1 6.957497 0.000000 18522 +primoridi 0 1 6.957497 0.000000 18523 +dermatolog 0 1 6.957497 0.000000 18524 +erlang 0 1 6.957497 0.000000 18525 +orthopaed 0 1 6.957497 0.000000 18526 +ecvnet 0 1 6.957497 0.000000 18527 +nici 0 1 6.957497 0.000000 18528 +groupimag 0 1 6.957497 0.000000 18529 +raytrac 0 1 6.957497 0.000000 18530 +rayshad 0 1 6.957497 0.000000 18531 +avalon 0 1 6.957497 0.000000 18532 +grimstead 0 1 6.957497 0.000000 18533 +dsite 0 1 6.957497 0.000000 18534 +intergraph 0 1 6.957497 0.000000 18535 +glint 0 1 6.957497 0.000000 18536 +chipset 0 1 6.957497 0.000000 18537 +nvidia 0 1 6.957497 0.000000 18538 +chipsetcomput 0 1 6.957497 0.000000 18539 +geometeri 0 1 6.957497 0.000000 18540 +geometrylispuseless 0 1 6.957497 0.000000 18541 +pagescomput 0 1 6.957497 0.000000 18542 +superdlx 0 1 6.957497 0.000000 18543 +parl 0 1 6.957497 0.000000 18544 +washingt 0 1 6.957497 0.000000 18545 +groupjapanes 0 1 6.957497 0.000000 18546 +unvers 0 1 6.957497 0.000000 18547 +infowav 0 1 6.957497 0.000000 18548 +edict 0 1 6.957497 0.000000 18549 +shodouka 0 1 6.957497 0.000000 18550 +asiasoftinform 0 1 6.957497 0.000000 18551 +retrev 0 1 6.957497 0.000000 18552 +peregrin 0 1 6.957497 0.000000 18553 +infomin 0 1 6.957497 0.000000 18554 +other_sw 0 1 6.957497 0.000000 18555 +info_retriev 0 1 6.957497 0.000000 18556 +jedi 0 1 6.957497 0.000000 18557 +hartlib 0 1 6.957497 0.000000 18558 +stemmer 0 1 6.957497 0.000000 18559 +twainhumor 0 1 6.957497 0.000000 18560 +threw 0 1 6.957497 0.000000 18561 +investorweb 0 1 6.957497 0.000000 18562 +networth 0 1 6.957497 0.000000 18563 +fundscap 0 1 6.957497 0.000000 18564 +stockmastermutu 0 1 6.957497 0.000000 18565 +brokerag 0 1 6.957497 0.000000 18566 +fidel 0 1 6.957497 0.000000 18567 +vanguard 0 1 6.957497 0.000000 18568 +gabelli 0 1 6.957497 0.000000 18569 +mutualsmisc 0 1 6.957497 0.000000 18570 +psnuplast 0 1 6.957497 0.000000 18571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..fdc8d1a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +sridhar 0 2 6.263398 0.000000 4807 +gopalsridhar 0 1 6.957497 0.000000 18572 +gopalgsri 0 1 6.957497 0.000000 18573 +edubon 0 1 6.957497 0.000000 18574 +marrow 0 1 6.957497 0.000000 18575 +pageresumest 0 1 6.957497 0.000000 18576 +pagecalvin 0 1 6.957497 0.000000 18577 +hobbesbookmark 0 1 6.957497 0.000000 18578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..c4c455d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +email 1 220 1.386294 1.386294 29 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +class 0 199 1.609438 0.000000 37 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +phone 0 175 1.791759 0.000000 45 +relat 1 139 1.945910 1.945910 68 +number 1 130 2.079442 2.079442 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +analysi 0 124 2.079442 0.000000 98 +send 1 114 2.197225 2.197225 109 +manag 0 114 2.197225 0.000000 125 +sinc 1 90 2.397895 2.397895 159 +grade 0 90 2.397895 0.000000 142 +educ 0 86 2.484907 0.000000 191 +environ 0 84 2.484907 0.000000 177 +stuff 0 87 2.484907 0.000000 171 +state 0 76 2.564949 0.000000 207 +june 0 79 2.564949 0.000000 214 +plan 0 65 2.772589 0.000000 272 +explor 0 58 2.890372 0.000000 324 +visitor 1 49 3.044522 3.044522 371 +visual 0 48 3.044522 0.000000 372 +california 0 46 3.091042 0.000000 388 +physic 0 47 3.091042 0.000000 377 +around 0 43 3.178054 0.000000 415 +chines 0 29 3.583519 0.000000 595 +weather 0 28 3.610918 0.000000 618 +famili 0 23 3.806662 0.000000 735 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +raghu 0 12 4.465908 0.000000 1212 +career 0 12 4.465908 0.000000 1287 +devis 0 10 4.653960 0.000000 1451 +angel 0 8 4.875197 0.000000 1779 +advis 0 6 5.164786 0.000000 2173 +forecast 0 6 5.164786 0.000000 2171 +peke 0 5 5.347108 0.000000 2539 +medicin 0 5 5.347108 0.000000 2448 +miscellani 0 3 5.857933 0.000000 3976 +guangshun 1 2 6.263398 6.263398 6138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..405ca2cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +us 0 329 1.098612 0.000000 16 +design 1 213 1.386294 1.386294 25 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +list 0 201 1.609438 0.000000 39 +public 0 202 1.609438 0.000000 43 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +code 0 108 2.197225 0.000000 116 +west 0 83 2.484907 0.000000 192 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +summari 0 73 2.639057 0.000000 237 +thesi 1 57 2.890372 2.890372 327 +detail 0 57 2.890372 0.000000 321 +local 0 55 2.944439 0.000000 334 +investig 0 51 2.995732 0.000000 353 +execut 0 45 3.135494 0.000000 404 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +hand 0 37 3.332205 0.000000 475 +focus 0 29 3.583519 0.000000 584 +target 0 12 4.465908 0.000000 1282 +laru 0 9 4.753590 0.000000 1560 +studentdepart 0 5 5.347108 0.000000 2505 +parallelprogram 0 5 5.347108 0.000000 2379 +parallellanguag 1 3 5.857933 5.857933 4026 +usaadvisor 0 3 5.857933 0.000000 4017 +guhan 1 2 6.263398 6.263398 6169 +viswanathan 1 2 6.263398 6.263398 6170 +amor 0 2 6.263398 0.000000 5486 +gviswana 0 1 6.957497 0.000000 18579 +parallelappl 0 1 6.957497 0.000000 18580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..4d8517d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +address 0 170 1.791759 0.000000 62 +architectur 1 139 1.945910 1.945910 77 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +year 0 148 1.945910 0.000000 84 +databas 0 122 2.079442 0.000000 86 +spring 0 131 2.079442 0.000000 88 +manag 0 114 2.197225 0.000000 125 +world 0 115 2.197225 0.000000 126 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +thing 0 84 2.484907 0.000000 189 +stuff 0 87 2.484907 0.000000 171 +meet 0 72 2.639057 0.000000 229 +line 0 75 2.639057 0.000000 231 +would 0 67 2.708050 0.000000 251 +prof 1 64 2.772589 2.772589 273 +colleg 0 61 2.833213 0.000000 300 +undergradu 0 54 2.944439 0.000000 338 +electron 0 47 3.091042 0.000000 379 +mark 0 44 3.135494 0.000000 403 +jame 0 35 3.401197 0.000000 507 +india 0 32 3.465736 0.000000 550 +photo 0 31 3.496508 0.000000 561 +hill 0 25 3.737670 0.000000 670 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +sept 1 17 4.110874 4.110874 952 +ramakrishnan 0 16 4.174387 0.000000 972 +photograph 0 15 4.248495 0.000000 1056 +draw 0 14 4.317488 0.000000 1086 +warn 0 14 4.317488 0.000000 1068 +raghu 0 12 4.465908 0.000000 1212 +newspap 0 12 4.465908 0.000000 1280 +classmat 0 9 4.753590 0.000000 1516 +folk 0 9 4.753590 0.000000 1597 +counter 1 8 4.875197 4.875197 1765 +goodman 0 7 5.010635 0.000000 1891 +courtesi 0 7 5.010635 0.000000 1953 +famou 0 6 5.164786 0.000000 2185 +mirza 0 3 5.857933 0.000000 3989 +osmania 0 2 6.263398 0.000000 5573 +hyderabad 0 2 6.263398 0.000000 5570 +sastri 0 2 6.263398 0.000000 6171 +roommat 0 2 6.263398 0.000000 6157 +saeed 0 2 6.263398 0.000000 6172 +dust 0 2 6.263398 0.000000 5551 +harit 1 1 6.957497 6.957497 18581 +mvsr 0 1 6.957497 0.000000 18582 +murthi 0 1 6.957497 0.000000 18583 +zubber 0 1 6.957497 0.000000 18584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..4ad5cf40 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +first 0 140 1.945910 0.000000 71 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +mathemat 1 108 2.197225 2.197225 123 +java 1 70 2.708050 2.708050 248 +dept 0 64 2.772589 0.000000 291 +septemb 0 65 2.772589 0.000000 274 +colleg 0 61 2.833213 0.000000 300 +telephon 0 50 3.044522 0.000000 373 +seminar 0 38 3.295837 0.000000 470 +departmentunivers 0 24 3.761200 0.000000 711 +applet 0 20 3.951244 0.000000 827 +basketbal 0 12 4.465908 0.000000 1289 +edutelephon 0 10 4.653960 0.000000 1473 +engr 0 10 4.653960 0.000000 1427 +volleybal 0 9 4.753590 0.000000 1598 +softbal 0 9 4.753590 0.000000 1594 +rebecca 1 6 5.164786 5.164786 2174 +noland 0 5 5.347108 0.000000 2420 +assistantcomput 0 3 5.857933 0.000000 4027 +hasti 1 2 6.263398 6.263398 6173 +carleton 0 2 6.263398 0.000000 5381 +linkag 0 2 6.263398 0.000000 5139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..05eaddb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +offic 1 299 1.098612 1.098612 13 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +address 0 170 1.791759 0.000000 62 +hour 0 165 1.791759 0.000000 46 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +databas 0 122 2.079442 0.000000 86 +person 1 111 2.197225 2.197225 117 +final 0 116 2.197225 0.000000 108 +manag 0 114 2.197225 0.000000 125 +mathemat 0 108 2.197225 0.000000 123 +advanc 0 99 2.302585 0.000000 130 +section 0 94 2.397895 0.000000 149 +imag 0 91 2.397895 0.000000 161 +stuff 1 87 2.484907 2.484907 171 +academ 1 82 2.484907 2.484907 178 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +internet 0 83 2.484907 0.000000 186 +state 1 76 2.564949 2.564949 207 +want 0 79 2.564949 0.000000 199 +good 0 77 2.564949 0.000000 200 +differ 0 66 2.708050 0.000000 253 +dept 1 64 2.772589 2.772589 291 +new 0 64 2.772589 0.000000 262 +talk 0 53 2.944439 0.000000 336 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +get 0 46 3.091042 0.000000 380 +expect 0 37 3.332205 0.000000 484 +download 0 36 3.367296 0.000000 489 +semant 0 29 3.583519 0.000000 587 +hope 0 28 3.610918 0.000000 610 +retriev 0 27 3.637586 0.000000 621 +enjoi 0 26 3.688879 0.000000 660 +tell 0 21 3.912023 0.000000 777 +stand 0 18 4.060443 0.000000 891 +stop 0 17 4.110874 0.000000 942 +seek 0 17 4.110874 0.000000 954 +thought 0 17 4.110874 0.000000 945 +alan 0 13 4.382027 0.000000 1146 +philosophi 0 13 4.382027 0.000000 1167 +brother 0 13 4.382027 0.000000 1189 +neat 1 12 4.465908 4.465908 1263 +count 0 12 4.465908 0.000000 1239 +minor 0 12 4.465908 0.000000 1237 +linguist 0 9 4.753590 0.000000 1593 +bart 0 9 4.753590 0.000000 1559 +lane 1 8 4.875197 4.875197 1720 +rais 0 8 4.875197 0.000000 1711 +jack 0 8 4.875197 0.000000 1780 +reload 0 8 4.875197 0.000000 1682 +on 0 8 4.875197 0.000000 1628 +marri 0 7 5.010635 0.000000 1946 +accord 0 7 5.010635 0.000000 1826 +creation 0 6 5.164786 0.000000 2069 +handi 0 6 5.164786 0.000000 2111 +advic 0 5 5.347108 0.000000 2509 +formerli 0 5 5.347108 0.000000 2397 +truli 0 5 5.347108 0.000000 2476 +plant 0 5 5.347108 0.000000 2497 +ling 0 4 5.568345 0.000000 3045 +cyber 0 4 5.568345 0.000000 2909 +luck 0 3 5.857933 0.000000 3201 +audit 0 3 5.857933 0.000000 3391 +northeast 0 3 5.857933 0.000000 3922 +chad 1 2 6.263398 6.263398 4768 +biggest 0 2 6.263398 0.000000 4790 +arthur 0 2 6.263398 0.000000 5767 +deep 0 2 6.263398 0.000000 5528 +claud 1 1 6.957497 6.957497 18585 +welcomethank 0 1 6.957497 0.000000 18586 +bestbet 0 1 6.957497 0.000000 18587 +onmai 0 1 6.957497 0.000000 18588 +nichol 0 1 6.957497 0.000000 18589 +discours 0 1 6.957497 0.000000 18590 +barwis 0 1 6.957497 0.000000 18591 +epigram 0 1 6.957497 0.000000 18592 +perli 0 1 6.957497 0.000000 18593 +laud 0 1 6.957497 0.000000 18594 +truman 0 1 6.957497 0.000000 18595 +missouri 0 1 6.957497 0.000000 18596 +poop 0 1 6.957497 0.000000 18597 +unabash 0 1 6.957497 0.000000 18598 +psychot 0 1 6.957497 0.000000 18599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..04443634 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 0 284 1.098612 0.000000 21 +link 1 247 1.386294 1.386294 24 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +design 0 213 1.386294 0.000000 25 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +develop 0 174 1.791759 0.000000 53 +applic 0 170 1.791759 0.000000 56 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +assist 0 112 2.197225 0.000000 113 +book 1 99 2.302585 2.302585 131 +technic 0 100 2.302585 0.000000 140 +graphic 0 90 2.397895 0.000000 147 +select 0 91 2.397895 0.000000 154 +proceed 0 93 2.397895 0.000000 152 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +environ 0 84 2.484907 0.000000 177 +ieee 0 86 2.484907 0.000000 190 +educ 0 86 2.484907 0.000000 191 +appear 1 78 2.564949 2.564949 210 +issu 0 78 2.564949 0.000000 211 +refer 0 78 2.564949 0.000000 203 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +line 0 75 2.639057 0.000000 231 +august 1 66 2.708050 2.708050 257 +simul 0 66 2.708050 0.000000 255 +plan 1 65 2.772589 2.772589 272 +abstract 0 62 2.772589 0.000000 276 +laboratori 0 63 2.772589 0.000000 292 +juli 0 60 2.833213 0.000000 305 +publish 0 57 2.890372 0.000000 326 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +move 0 47 3.091042 0.000000 382 +autom 0 41 3.218876 0.000000 434 +multipl 1 39 3.258097 3.258097 453 +vita 0 38 3.295837 0.000000 473 +robot 2 36 3.367296 6.734592 497 +curriculum 0 33 3.433987 0.000000 535 +extend 0 32 3.465736 0.000000 539 +anim 0 31 3.496508 0.000000 557 +common 0 30 3.555348 0.000000 574 +travel 0 30 3.555348 0.000000 579 +proc 1 26 3.688879 3.688879 649 +experiment 0 26 3.688879 0.000000 645 +motion 1 24 3.761200 3.761200 699 +geometri 1 22 3.850148 3.850148 752 +rout 0 21 3.912023 0.000000 793 +unit 0 21 3.912023 0.000000 779 +basi 0 20 3.951244 0.000000 828 +geometr 0 19 4.007333 0.000000 852 +susan 2 15 4.248495 8.496990 1050 +configur 0 15 4.248495 0.000000 1012 +vladimir 1 11 4.553877 4.553877 1324 +arbitrari 0 11 4.553877 0.000000 1359 +cook 0 10 4.653960 0.000000 1464 +congress 0 9 4.753590 0.000000 1592 +autonom 1 8 4.875197 4.875197 1749 +curv 1 8 4.875197 4.875197 1656 +plane 0 6 5.164786 0.000000 2187 +bind 0 5 5.347108 0.000000 2250 +tiwari 0 5 5.347108 0.000000 2385 +lumelski 1 4 5.568345 5.568345 2837 +ti 0 4 5.568345 0.000000 3005 +underwat 0 4 5.568345 0.000000 2838 +shelf 0 4 5.568345 0.000000 2621 +canadian 0 3 5.857933 0.000000 3508 +planar 0 3 5.857933 0.000000 3647 +hert 2 2 6.263398 12.526796 4848 +tether 1 2 6.263398 6.263398 4844 +deform 0 2 6.263398 0.000000 6065 +terrain 0 2 6.263398 0.000000 6174 +epicuri 0 2 6.263398 0.000000 5105 +veggi 0 2 6.263398 0.000000 5426 +alogirthm 0 1 6.957497 0.000000 18600 +sanjai 0 1 6.957497 0.000000 18601 +reznik 0 1 6.957497 0.000000 18602 +samantha 0 1 6.957497 0.000000 18603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..65fdc291 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +visit 0 63 2.772589 0.000000 288 +experi 0 64 2.772589 0.000000 283 +finger 0 52 2.995732 0.000000 354 +maintain 0 51 2.995732 0.000000 342 +appoint 0 49 3.044522 0.000000 358 +departmentunivers 0 24 3.761200 0.000000 711 +serv 0 22 3.850148 0.000000 758 +countri 0 15 4.248495 0.000000 1059 +wife 0 13 4.382027 0.000000 1196 +sai 0 13 4.382027 0.000000 1175 +tue 0 11 4.553877 0.000000 1308 +edutelephon 0 10 4.653960 0.000000 1473 +counter 0 8 4.875197 0.000000 1765 +studentcomput 0 7 5.010635 0.000000 1963 +none 0 7 5.010635 0.000000 1811 +corp 0 6 5.164786 0.000000 2139 +peac 0 3 5.857933 0.000000 3769 +kirk 1 2 6.263398 6.263398 6175 +hogenson 1 1 6.957497 6.957497 18604 +myschedul 0 1 6.957497 0.000000 18605 +workout 0 1 6.957497 0.000000 18606 +tryto 0 1 6.957497 0.000000 18607 +ghana 0 1 6.957497 0.000000 18608 +usernam 0 1 6.957497 0.000000 18609 +pnhp 0 1 6.957497 0.000000 18610 +eilun 0 1 6.957497 0.000000 18611 +accessedtim 0 1 6.957497 0.000000 18612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..e8a1ad8d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +madison 0 165 1.791759 0.000000 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +linear 0 41 3.218876 0.000000 431 +road 0 11 4.553877 0.000000 1374 +jeffrei 1 9 4.753590 4.753590 1612 +wise 0 3 5.857933 0.000000 3631 +horn 1 2 6.263398 6.263398 6072 +swanton 0 1 6.957497 0.000000 18613 +familyemploymenteducationresearchgenealog 0 1 6.957497 0.000000 18614 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..9a7f481a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +cornel 0 215 1.386294 0.000000 23 +design 0 213 1.386294 0.000000 25 +also 0 259 1.386294 0.000000 28 +gener 0 220 1.386294 0.000000 27 +washington 0 236 1.386294 0.000000 32 +class 1 199 1.609438 1.609438 37 +oper 0 180 1.609438 0.000000 34 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +problem 1 147 1.945910 1.945910 75 +like 0 132 1.945910 0.000000 81 +analysi 2 124 2.079442 4.158884 98 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +structur 0 106 2.197225 0.000000 105 +specif 0 106 2.197225 0.000000 106 +teach 0 108 2.197225 0.000000 112 +proceed 1 93 2.397895 2.397895 152 +call 1 91 2.397895 2.397895 153 +octob 0 89 2.397895 0.000000 156 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +chang 0 82 2.484907 0.000000 163 +larg 0 82 2.484907 0.000000 168 +second 0 81 2.484907 0.000000 166 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +symposium 1 72 2.639057 2.639057 238 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +appli 0 71 2.639057 0.000000 226 +differ 1 66 2.708050 2.708050 253 +would 0 67 2.708050 0.000000 251 +practic 0 70 2.708050 0.000000 246 +test 0 66 2.708050 0.000000 252 +januari 1 62 2.772589 2.772589 264 +previou 0 62 2.772589 0.000000 290 +foundat 0 62 2.772589 0.000000 286 +point 0 58 2.890372 0.000000 319 +telephon 0 50 3.044522 0.000000 373 +understand 1 47 3.091042 3.091042 384 +combin 0 42 3.218876 0.000000 421 +fast 0 42 3.218876 0.000000 429 +programm 0 39 3.258097 0.000000 445 +streetmadison 0 38 3.295837 0.000000 474 +represent 0 35 3.401197 0.000000 512 +graph 1 30 3.555348 3.555348 576 +exist 0 30 3.555348 0.000000 569 +produc 0 30 3.555348 0.000000 572 +depend 1 29 3.583519 3.583519 583 +semant 1 29 3.583519 3.583519 587 +propos 0 28 3.610918 0.000000 602 +accur 0 25 3.737670 0.000000 680 +departmentunivers 0 24 3.761200 0.000000 711 +flow 0 24 3.761200 0.000000 700 +identifi 0 22 3.850148 0.000000 760 +record 1 18 4.060443 4.060443 890 +thoma 0 18 4.060443 0.000000 901 +modif 0 17 4.110874 0.000000 913 +white 0 17 4.110874 0.000000 951 +fourth 0 16 4.174387 0.000000 999 +precis 1 15 4.248495 4.248495 1023 +susan 0 15 4.248495 0.000000 1050 +piec 0 15 4.248495 0.000000 1020 +demand 0 14 4.317488 0.000000 1073 +francisco 0 14 4.317488 0.000000 1095 +individu 0 13 4.382027 0.000000 1126 +joint 0 13 4.382027 0.000000 1130 +sigplan 0 13 4.382027 0.000000 1190 +franc 0 12 4.465908 0.000000 1276 +mainli 1 10 4.653960 4.653960 1432 +guarante 0 10 4.653960 0.000000 1391 +conferenceon 0 9 4.753590 0.000000 1595 +plain 0 9 4.753590 0.000000 1495 +secretari 0 8 4.875197 0.000000 1775 +shapiro 0 8 4.875197 0.000000 1686 +implementationof 0 7 5.010635 0.000000 1813 +necessarili 0 7 5.010635 0.000000 1899 +sixth 0 7 5.010635 0.000000 1917 +textual 1 6 5.164786 5.164786 1979 +pari 0 6 5.164786 0.000000 2158 +softwareengin 0 6 5.164786 0.000000 2162 +increment 0 6 5.164786 0.000000 2206 +horwitz 2 5 5.347108 10.694216 2411 +dataflow 1 5 5.347108 5.347108 2390 +twenti 1 5 5.347108 5.347108 2540 +summarymi 0 5 5.347108 0.000000 2580 +australia 0 5 5.347108 0.000000 2478 +rep 1 4 5.568345 5.568345 3087 +interprocedur 1 4 5.568345 5.568345 2771 +slice 1 4 5.568345 5.568345 2622 +usa 0 4 5.568345 0.000000 3080 +theprogram 0 4 5.568345 0.000000 2686 +insensit 0 4 5.568345 0.000000 2716 +sigsoft 0 4 5.568345 0.000000 3036 +melbourn 0 4 5.568345 0.000000 3035 +principlesof 1 3 5.857933 5.857933 3145 +onprincipl 0 3 5.857933 0.000000 3701 +theoryand 0 3 5.857933 0.000000 3350 +denmark 0 3 5.857933 0.000000 3676 +reachabl 0 3 5.857933 0.000000 4001 +twentieth 0 3 5.857933 0.000000 3760 +fourteenth 0 3 5.857933 0.000000 3615 +sagiv 1 2 6.263398 6.263398 6176 +differenc 0 2 6.263398 0.000000 6177 +interproceduraldataflow 0 2 6.263398 0.000000 6178 +mooli 0 2 6.263398 0.000000 6179 +aarhu 0 2 6.263398 0.000000 6180 +charleston 0 2 6.263398 0.000000 6181 +aprogram 0 2 6.263398 0.000000 4943 +languagedesign 0 2 6.263398 0.000000 6182 +horwitzsusan 0 1 6.957497 0.000000 18615 +horwitzprofessorcomput 0 1 6.957497 0.000000 18616 +environmentsprogram 0 1 6.957497 0.000000 18617 +mergingstat 0 1 6.957497 0.000000 18618 +programsinterprocedur 0 1 6.957497 0.000000 18619 +analysisresearch 0 1 6.957497 0.000000 18620 +affectedbi 0 1 6.957497 0.000000 18621 +betweentwo 0 1 6.957497 0.000000 18622 +retest 0 1 6.957497 0.000000 18623 +certainsemant 0 1 6.957497 0.000000 18624 +concentratedeith 0 1 6.957497 0.000000 18625 +newalgorithm 0 1 6.957497 0.000000 18626 +publicationsm 0 1 6.957497 0.000000 18627 +constantpropag 0 1 6.957497 0.000000 18628 +bate 0 1 6.957497 0.000000 18629 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..a2141977 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +offic 0 299 1.098612 0.000000 13 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +click 0 142 1.945910 0.000000 78 +postscript 0 131 2.079442 0.000000 90 +version 0 113 2.197225 0.000000 122 +pictur 0 89 2.397895 0.000000 160 +resum 0 79 2.564949 0.000000 217 +hummert 0 3 5.857933 0.000000 3416 +pagenam 0 2 6.263398 0.000000 6165 +sidnei 0 2 6.263398 0.000000 4587 +edua 0 2 6.263398 0.000000 5764 +pagesid 0 1 6.957497 0.000000 18630 +hummertoffic 0 1 6.957497 0.000000 18631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..c5e9642a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +offic 0 299 1.098612 0.000000 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 0 247 1.386294 0.000000 24 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +like 0 132 1.945910 0.000000 81 +dayton 0 119 2.079442 0.000000 104 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +particular 0 51 2.995732 0.000000 352 +math 0 44 3.135494 0.000000 402 +vision 0 41 3.218876 0.000000 430 +streetmadison 0 38 3.295837 0.000000 474 +feel 0 37 3.332205 0.000000 483 +robot 0 36 3.367296 0.000000 497 +actual 0 28 3.610918 0.000000 604 +departmentunivers 0 24 3.761200 0.000000 711 +ever 0 19 4.007333 0.000000 872 +alreadi 0 16 4.174387 0.000000 963 +speak 0 12 4.465908 0.000000 1283 +undergrad 0 9 4.753590 0.000000 1589 +disclaim 0 4 5.568345 0.000000 2847 +alien 1 3 5.857933 5.857933 3930 +slave 0 3 5.857933 0.000000 3959 +igor 0 2 6.263398 0.000000 6183 +ivanisev 0 2 6.263398 0.000000 6184 +newest 0 2 6.263398 0.000000 5518 +needless 0 2 6.263398 0.000000 4694 +drake 0 2 6.263398 0.000000 5668 +pageigorivanisev 0 1 6.957497 0.000000 18632 +generalgradu 0 1 6.957497 0.000000 18633 +departmentwa 0 1 6.957497 0.000000 18634 +departmentaddress 0 1 6.957497 0.000000 18635 +iigor 0 1 6.957497 0.000000 18636 +eduiigor 0 1 6.957497 0.000000 18637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..5f228ce1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 0 242 1.386294 0.000000 33 +pageireland 0 1 6.957497 0.000000 18638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..fa21e625 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +email 0 220 1.386294 0.000000 29 +class 0 199 1.609438 0.000000 37 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +section 1 94 2.397895 2.397895 149 +comment 0 93 2.397895 0.000000 146 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 0 70 2.708050 0.000000 241 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +profil 0 30 3.555348 0.000000 581 +peterson 0 7 5.010635 0.000000 1850 +sharenow 1 3 5.857933 5.857933 3439 +jone 0 3 5.857933 0.000000 3703 +recreat 0 3 5.857933 0.000000 3990 +madisonmadison 0 1 6.957497 0.000000 18639 +sciencestelephon 0 1 6.957497 0.000000 18640 +pmsection 0 1 6.957497 0.000000 18641 +pmboth 0 1 6.957497 0.000000 18642 +sciencesc 0 1 6.957497 0.000000 18643 +announcementshandoutsmoth 0 1 6.957497 0.000000 18644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..e614509c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +avail 0 169 1.791759 0.000000 48 +area 0 144 1.945910 0.000000 80 +dayton 0 119 2.079442 0.000000 104 +resum 0 79 2.564949 0.000000 217 +onlin 0 75 2.639057 0.000000 223 +receiv 0 66 2.708050 0.000000 244 +prof 0 64 2.772589 0.000000 273 +previou 0 62 2.772589 0.000000 290 +colleg 0 61 2.833213 0.000000 300 +eduoffic 0 33 3.433987 0.000000 531 +departmentunivers 0 24 3.761200 0.000000 711 +livni 0 15 4.248495 0.000000 1053 +english 0 15 4.248495 0.000000 1033 +miron 0 14 4.317488 0.000000 1110 +condor 0 5 5.347108 0.000000 2577 +webpag 0 4 5.568345 0.000000 2660 +assistantcomput 0 3 5.857933 0.000000 4027 +statisticsoffic 0 2 6.263398 0.000000 4810 +andnetwork 0 2 6.263398 0.000000 5751 +basnei 0 2 6.263398 0.000000 4804 +basneyjim 0 1 6.957497 0.000000 18645 +basneygradu 0 1 6.957497 0.000000 18646 +jbasnei 0 1 6.957497 0.000000 18647 +directionof 0 1 6.957497 0.000000 18648 +fromoberlin 0 1 6.957497 0.000000 18649 +oberlin 0 1 6.957497 0.000000 18650 +codefrom 0 1 6.957497 0.000000 18651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..3da13a00 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +also 1 259 1.386294 1.386294 28 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +support 0 132 1.945910 0.000000 83 +click 0 142 1.945910 0.000000 78 +first 0 140 1.945910 0.000000 71 +like 0 132 1.945910 0.000000 81 +databas 1 122 2.079442 2.079442 86 +assist 0 112 2.197225 0.000000 113 +stuff 0 87 2.484907 0.000000 171 +know 0 80 2.564949 0.000000 198 +plai 1 60 2.833213 2.833213 307 +special 0 56 2.890372 0.000000 320 +cover 0 55 2.944439 0.000000 329 +sampl 0 53 2.944439 0.000000 339 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +favorit 0 44 3.135494 0.000000 410 +show 0 43 3.178054 0.000000 417 +around 0 43 3.178054 0.000000 415 +error 0 40 3.258097 0.000000 449 +electr 0 38 3.295837 0.000000 461 +origin 0 38 3.295837 0.000000 472 +respons 0 37 3.332205 0.000000 476 +soon 0 36 3.367296 0.000000 494 +abl 0 30 3.555348 0.000000 566 +wai 0 25 3.737670 0.000000 662 +instal 0 22 3.850148 0.000000 754 +watch 0 21 3.912023 0.000000 789 +mostli 0 19 4.007333 0.000000 869 +record 0 18 4.060443 0.000000 890 +stand 0 18 4.060443 0.000000 891 +train 0 14 4.317488 0.000000 1066 +classic 0 14 4.317488 0.000000 1084 +believ 0 13 4.382027 0.000000 1187 +mainli 0 10 4.653960 0.000000 1432 +didn 0 9 4.753590 0.000000 1563 +guitar 0 8 4.875197 0.000000 1758 +hit 0 7 5.010635 0.000000 1965 +seen 0 6 5.164786 0.000000 2202 +golf 0 6 5.164786 0.000000 2178 +pool 0 6 5.164786 0.000000 2225 +yeah 0 6 5.164786 0.000000 2195 +backup 0 4 5.568345 0.000000 2645 +metal 0 4 5.568345 0.000000 3079 +hourli 0 3 5.857933 0.000000 3734 +thrash 0 3 5.857933 0.000000 3400 +evil 0 3 5.857933 0.000000 3943 +mackai 1 2 6.263398 6.263398 5762 +fulltim 0 2 6.263398 0.000000 5170 +sybas 0 2 6.263398 0.000000 4723 +ingr 0 2 6.263398 0.000000 4097 +ey 0 2 6.263398 0.000000 5068 +metallica 0 2 6.263398 0.000000 4991 +raquetbal 0 2 6.263398 0.000000 6012 +towel 0 2 6.263398 0.000000 4793 +jerel 1 1 6.957497 6.957497 18652 +pagejerel 0 1 6.957497 0.000000 18653 +specialti 0 1 6.957497 0.000000 18654 +violin 0 1 6.957497 0.000000 18655 +baroqu 0 1 6.957497 0.000000 18656 +shock 0 1 6.957497 0.000000 18657 +funni 0 1 6.957497 0.000000 18658 +abba 0 1 6.957497 0.000000 18659 +shoot 0 1 6.957497 0.000000 18660 +jerellast 0 1 6.957497 0.000000 18661 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..4c6b17e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +construct 0 139 1.945910 0.000000 82 +homepag 0 93 2.397895 0.000000 148 +johan 0 2 6.263398 0.000000 4900 +larson 0 1 6.957497 0.000000 18662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..7f28ed9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +link 1 247 1.386294 1.386294 24 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +class 0 199 1.609438 0.000000 37 +year 1 148 1.945910 1.945910 84 +note 0 142 1.945910 0.000000 67 +like 0 132 1.945910 0.000000 81 +click 0 142 1.945910 0.000000 78 +number 1 130 2.079442 2.079442 97 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +take 0 97 2.302585 0.000000 134 +memori 0 101 2.302585 0.000000 139 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +homepag 1 93 2.397895 2.397895 148 +search 1 95 2.397895 2.397895 155 +mani 0 92 2.397895 0.000000 150 +follow 0 92 2.397895 0.000000 143 +select 0 91 2.397895 0.000000 154 +stuff 1 87 2.484907 2.484907 171 +thing 0 84 2.484907 0.000000 189 +start 0 83 2.484907 0.000000 173 +contain 0 81 2.484907 0.000000 174 +good 0 77 2.564949 0.000000 200 +want 0 79 2.564949 0.000000 199 +name 0 72 2.639057 0.000000 220 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +would 1 67 2.708050 2.708050 251 +java 0 70 2.708050 0.000000 248 +order 0 69 2.708050 0.000000 249 +date 0 51 2.995732 0.000000 344 +cool 1 49 3.044522 3.044522 374 +friend 1 48 3.044522 3.044522 376 +format 0 48 3.044522 0.000000 356 +archiv 0 49 3.044522 0.000000 364 +made 0 44 3.135494 0.000000 398 +directori 0 45 3.135494 0.000000 396 +realli 1 40 3.258097 3.258097 444 +societi 0 40 3.258097 0.000000 456 +join 0 39 3.258097 0.000000 457 +movi 0 40 3.258097 0.000000 459 +go 0 33 3.433987 0.000000 529 +anim 0 31 3.496508 0.000000 557 +someth 0 31 3.496508 0.000000 554 +exist 0 30 3.555348 0.000000 569 +though 0 27 3.637586 0.000000 622 +great 0 27 3.637586 0.000000 626 +mine 0 26 3.688879 0.000000 654 +enjoi 0 26 3.688879 0.000000 660 +rule 0 26 3.688879 0.000000 638 +todai 0 25 3.737670 0.000000 672 +frame 1 24 3.761200 3.761200 684 +seri 0 24 3.761200 0.000000 708 +head 0 23 3.806662 0.000000 732 +sequenc 0 23 3.806662 0.000000 734 +almost 0 22 3.850148 0.000000 742 +hierarchi 0 22 3.850148 0.000000 744 +half 0 21 3.912023 0.000000 776 +grad 0 20 3.951244 0.000000 837 +mpeg 0 20 3.951244 0.000000 831 +els 0 19 4.007333 0.000000 843 +club 0 15 4.248495 0.000000 1058 +happi 0 14 4.317488 0.000000 1079 +becam 0 14 4.317488 0.000000 1117 +trip 0 14 4.317488 0.000000 1113 +pretti 0 13 4.382027 0.000000 1191 +neat 0 12 4.465908 0.000000 1263 +realiti 0 12 4.465908 0.000000 1272 +death 0 10 4.653960 0.000000 1457 +float 0 9 4.753590 0.000000 1504 +explicit 0 9 4.753590 0.000000 1525 +said 0 9 4.753590 0.000000 1571 +sound 0 9 4.753590 0.000000 1605 +jack 0 8 4.875197 0.000000 1780 +relax 0 6 5.164786 0.000000 2120 +jpeg 0 6 5.164786 0.000000 2053 +apolog 0 6 5.164786 0.000000 2046 +benefit 0 6 5.164786 0.000000 2213 +default 1 5 5.347108 5.347108 2335 +girlfriend 0 5 5.347108 0.000000 2579 +frog 0 5 5.347108 0.000000 2479 +chaotic 0 5 5.347108 0.000000 2566 +semi 0 5 5.347108 0.000000 2510 +paus 1 4 5.568345 5.568345 2965 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +relief 0 4 5.568345 0.000000 2784 +afraid 0 4 5.568345 0.000000 3053 +repeat 0 4 5.568345 0.000000 2798 +vital 0 4 5.568345 0.000000 2733 +bear 0 4 5.568345 0.000000 2651 +exploratori 0 4 5.568345 0.000000 3073 +exclus 0 4 5.568345 0.000000 2947 +jherro 0 3 5.857933 0.000000 3427 +let 0 3 5.857933 0.000000 3790 +tortur 0 3 5.857933 0.000000 3634 +nicknam 0 3 5.857933 0.000000 3716 +lame 0 3 5.857933 0.000000 3717 +haiku 0 3 5.857933 0.000000 3811 +bout 0 3 5.857933 0.000000 3670 +cano 1 2 6.263398 6.263398 5207 +apictur 0 2 6.263398 0.000000 5024 +kermit 0 2 6.263398 0.000000 4742 +intervent 0 2 6.263398 0.000000 6163 +shack 0 2 6.263398 0.000000 5369 +roomat 1 1 6.957497 6.957497 18663 +censorship 0 1 6.957497 0.000000 18664 +disembody 0 1 6.957497 0.000000 18665 +millisecond 0 1 6.957497 0.000000 18666 +overriden 0 1 6.957497 0.000000 18667 +aquir 0 1 6.957497 0.000000 18668 +skellington 0 1 6.957497 0.000000 18669 +thath 0 1 6.957497 0.000000 18670 +forgotten 0 1 6.957497 0.000000 18671 +cult 0 1 6.957497 0.000000 18672 +hippothi 0 1 6.957497 0.000000 18673 +matriarch 0 1 6.957497 0.000000 18674 +yahooooooooooooo 0 1 6.957497 0.000000 18675 +bazillion 0 1 6.957497 0.000000 18676 +muppet 0 1 6.957497 0.000000 18677 +rachel 0 1 6.957497 0.000000 18678 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..5fc895a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +perform 0 143 1.945910 0.000000 74 +click 0 142 1.945910 0.000000 78 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +manag 1 114 2.197225 2.197225 125 +version 0 113 2.197225 0.000000 122 +find 0 111 2.197225 0.000000 111 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +imag 0 91 2.397895 0.000000 161 +educ 0 86 2.484907 0.000000 191 +west 0 83 2.484907 0.000000 192 +larg 0 82 2.484907 0.000000 168 +ieee 0 86 2.484907 0.000000 190 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +orient 0 80 2.564949 0.000000 205 +david 0 71 2.639057 0.000000 232 +effici 0 73 2.639057 0.000000 233 +workshop 0 71 2.639057 0.000000 239 +septemb 1 65 2.772589 2.772589 274 +prof 0 64 2.772589 0.000000 273 +evalu 0 64 2.772589 0.000000 266 +juli 0 60 2.833213 0.000000 305 +index 0 56 2.890372 0.000000 309 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +pointer 1 48 3.044522 3.044522 368 +approach 0 48 3.044522 0.000000 366 +set 0 50 3.044522 0.000000 361 +execut 0 45 3.135494 0.000000 404 +submit 1 39 3.258097 3.258097 440 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +tree 0 36 3.367296 0.000000 492 +queri 1 33 3.433987 3.433987 524 +storag 1 31 3.496508 3.496508 553 +full 0 28 3.610918 0.000000 615 +constraint 1 26 3.688879 3.688879 636 +client 1 25 3.737670 3.737670 679 +store 0 24 3.761200 0.000000 693 +size 1 23 3.806662 3.806662 713 +tenni 1 20 3.951244 3.951244 838 +sigmod 0 19 4.007333 0.000000 877 +white 0 17 4.110874 0.000000 951 +hobbi 0 16 4.174387 0.000000 1009 +spatial 0 16 4.174387 0.000000 988 +ramakrishnan 0 16 4.174387 0.000000 972 +dbm 1 13 4.382027 4.382027 1136 +nasa 0 13 4.382027 0.000000 1188 +dewitt 1 12 4.465908 4.465908 1270 +impact 0 11 4.553877 0.000000 1334 +naughton 1 10 4.653960 4.653960 1450 +resid 0 10 4.653960 0.000000 1461 +conferenceon 0 9 4.753590 0.000000 1595 +volleybal 0 9 4.753590 0.000000 1598 +paradis 1 8 4.875197 4.875197 1782 +assistantdepart 0 8 4.875197 0.000000 1784 +mass 0 8 4.875197 0.000000 1732 +burger 0 7 5.010635 0.000000 1889 +patel 1 6 5.164786 5.164786 2154 +geograph 0 6 5.164786 0.000000 2236 +goldstein 0 6 5.164786 0.000000 2168 +tuft 0 5 5.347108 0.000000 2575 +houston 0 5 5.347108 0.000000 2460 +water 0 5 5.347108 0.000000 2535 +ramasami 0 4 5.568345 0.000000 3088 +batch 0 4 5.568345 0.000000 2700 +tape 0 4 5.568345 0.000000 2959 +satellit 0 4 5.568345 0.000000 3077 +chile 0 4 5.568345 0.000000 3082 +yong 0 4 5.568345 0.000000 2809 +raft 0 4 5.568345 0.000000 3060 +tertiari 1 3 5.857933 5.857933 3193 +informationresearch 0 3 5.857933 0.000000 3675 +edueduc 0 3 5.857933 0.000000 4004 +shorter 0 3 5.857933 0.000000 3998 +santiago 0 3 5.857933 0.000000 4013 +kabra 1 2 6.263398 6.263398 6139 +dewittresearch 0 2 6.263398 0.000000 6185 +shaft 0 2 6.263398 0.000000 6186 +eosdi 0 2 6.263398 0.000000 6124 +bing 1 1 6.957497 6.957497 18679 +jieb 1 1 6.957497 6.957497 18680 +systemsresearch 0 1 6.957497 0.000000 18681 +shorepublicationsbuild 0 1 6.957497 0.000000 18682 +scaleabl 0 1 6.957497 0.000000 18683 +implment 0 1 6.957497 0.000000 18684 +lueder 0 1 6.957497 0.000000 18685 +ellman 0 1 6.957497 0.000000 18686 +kupsch 0 1 6.957497 0.000000 18687 +prong 0 1 6.957497 0.000000 18688 +tile 0 1 6.957497 0.000000 18689 +goddard 0 1 6.957497 0.000000 18690 +reclam 0 1 6.957497 0.000000 18691 +reorgan 0 1 6.957497 0.000000 18692 +serverpersist 0 1 6.957497 0.000000 18693 +grouphobbi 0 1 6.957497 0.000000 18694 +volleyballweb 0 1 6.957497 0.000000 18695 +whitewat 0 1 6.957497 0.000000 18696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..247348f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +base 0 165 1.791759 0.000000 50 +algorithm 0 162 1.791759 0.000000 57 +relat 1 139 1.945910 1.945910 68 +object 0 138 1.945910 0.000000 79 +model 0 145 1.945910 0.000000 69 +databas 1 122 2.079442 2.079442 86 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +virtual 0 62 2.772589 0.000000 285 +publish 1 57 2.890372 2.890372 326 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +join 1 39 3.258097 3.258097 457 +bookmark 0 26 3.688879 0.000000 639 +client 0 25 3.737670 0.000000 679 +accur 0 25 3.737670 0.000000 680 +miscellan 0 23 3.806662 0.000000 731 +sigmod 0 19 4.007333 0.000000 877 +partit 0 16 4.174387 0.000000 984 +spatial 0 16 4.174387 0.000000 988 +hybrid 0 15 4.248495 0.000000 1057 +sigmetr 0 13 4.382027 0.000000 1173 +dewitt 0 12 4.465908 0.000000 1270 +vldb 0 10 4.653960 0.000000 1470 +paradis 1 8 4.875197 4.875197 1782 +hash 0 8 4.875197 0.000000 1618 +tourist 0 8 4.875197 0.000000 1710 +merg 0 7 5.010635 0.000000 1862 +patel 0 6 5.164786 0.000000 2154 +inlin 0 4 5.568345 0.000000 2964 +skate 0 4 5.568345 0.000000 3046 +jignesh 1 1 6.957497 6.957497 18697 +madhuri 0 1 6.957497 0.000000 18698 +kashmir 0 1 6.957497 0.000000 18699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..46dd1b88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +click 0 142 1.945910 0.000000 78 +peopl 0 96 2.302585 0.000000 132 +netscap 0 44 3.135494 0.000000 395 +download 0 36 3.367296 0.000000 489 +georg 0 16 4.174387 0.000000 994 +warn 0 14 4.317488 0.000000 1068 +pretti 0 13 4.382027 0.000000 1191 +varghes 0 3 5.857933 0.000000 3442 +lame 0 3 5.857933 0.000000 3717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..55b4ae66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +washington 0 236 1.386294 0.000000 32 +public 1 202 1.609438 1.609438 43 +utexa 0 189 1.609438 0.000000 44 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +data 0 170 1.791759 0.000000 49 +contact 0 153 1.791759 0.000000 59 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +area 1 144 1.945910 1.945910 80 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +real 1 93 2.397895 2.397895 144 +proceed 0 93 2.397895 0.000000 152 +resourc 1 81 2.484907 2.484907 172 +ieee 1 86 2.484907 2.484907 190 +west 0 83 2.484907 0.000000 192 +decemb 0 80 2.564949 0.000000 215 +appear 0 78 2.564949 0.000000 210 +april 0 77 2.564949 0.000000 196 +symposium 0 72 2.639057 0.000000 238 +street 0 63 2.772589 0.000000 293 +share 1 59 2.833213 2.833213 304 +room 0 59 2.833213 0.000000 301 +februari 0 54 2.944439 0.000000 328 +multipl 0 39 3.258097 0.000000 453 +mine 0 26 3.688879 0.000000 654 +task 0 25 3.737670 0.000000 678 +period 0 22 3.850148 0.000000 743 +alloc 0 20 3.951244 0.000000 821 +expand 0 17 4.110874 0.000000 928 +universityof 0 15 4.248495 0.000000 1061 +inproceed 0 8 4.875197 0.000000 1670 +sciencesdepart 0 6 5.164786 0.000000 2020 +height 0 4 5.568345 0.000000 2890 +johann 1 3 5.857933 5.857933 3758 +plaxton 1 3 5.857933 5.857933 3886 +ofwisconsin 0 3 5.857933 0.000000 4002 +proport 0 3 5.857933 0.000000 3293 +baruah 1 2 6.263398 6.263398 5753 +studentat 0 2 6.263398 0.000000 5877 +databasemanag 0 2 6.263398 0.000000 4089 +underprofessor 0 2 6.263398 0.000000 6045 +linkscontact 0 2 6.263398 0.000000 5708 +eagl 0 2 6.263398 0.000000 5731 +jeffai 0 2 6.263398 0.000000 4357 +technicalreport 0 2 6.263398 0.000000 5615 +gehrk 1 1 6.957497 6.957497 18700 +homepagejohann 0 1 6.957497 0.000000 18701 +gehrkewelcom 0 1 6.957497 0.000000 18702 +raghuramakrishnan 0 1 6.957497 0.000000 18703 +stoica 0 1 6.957497 0.000000 18704 +abdel 0 1 6.957497 0.000000 18705 +wahab 0 1 6.957497 0.000000 18706 +algorithmfor 0 1 6.957497 0.000000 18707 +anexpand 0 1 6.957497 0.000000 18708 +fastschedul 0 1 6.957497 0.000000 18709 +processingsymposium 0 1 6.957497 0.000000 18710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..4dbfa9f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +student 1 343 1.098612 1.098612 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +list 1 201 1.609438 1.609438 39 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +model 0 145 1.945910 0.000000 69 +click 0 142 1.945910 0.000000 78 +construct 0 139 1.945910 0.000000 82 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +databas 0 122 2.079442 0.000000 86 +compil 0 122 2.079442 0.000000 96 +site 1 106 2.197225 2.197225 119 +look 0 107 2.197225 0.000000 115 +check 0 115 2.197225 0.000000 118 +manag 0 114 2.197225 0.000000 125 +question 0 91 2.397895 0.000000 141 +section 0 94 2.397895 0.000000 149 +thing 0 84 2.484907 0.000000 189 +good 0 77 2.564949 0.000000 200 +visit 0 63 2.772589 0.000000 288 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +maintain 0 51 2.995732 0.000000 342 +frequent 0 49 3.044522 0.000000 367 +keep 0 44 3.135494 0.000000 409 +industri 0 38 3.295837 0.000000 464 +often 0 31 3.496508 0.000000 551 +ask 0 28 3.610918 0.000000 597 +releas 0 28 3.610918 0.000000 616 +great 0 27 3.637586 0.000000 626 +mind 0 27 3.637586 0.000000 632 +latest 0 21 3.912023 0.000000 785 +dilbert 0 16 4.174387 0.000000 996 +appl 1 11 4.553877 4.553877 1303 +pagewelcom 0 11 4.553877 0.000000 1344 +spot 0 7 5.010635 0.000000 1894 +bodner 0 5 5.347108 0.000000 2401 +alma 0 3 5.857933 0.000000 3963 +herei 0 2 6.263398 0.000000 6187 +mater 0 2 6.263398 0.000000 5930 +nando 0 2 6.263398 0.000000 5458 +numb 0 2 6.263398 0.000000 5505 +soap 0 2 6.263398 0.000000 4511 +jonb 0 2 6.263398 0.000000 4771 +mound 0 2 6.263398 0.000000 4773 +powerbook 0 1 6.957497 0.000000 18711 +amass 0 1 6.957497 0.000000 18712 +catagori 0 1 6.957497 0.000000 18713 +needsth 0 1 6.957497 0.000000 18714 +coverageth 0 1 6.957497 0.000000 18715 +operaish 0 1 6.957497 0.000000 18716 +drivelziffnet 0 1 6.957497 0.000000 18717 +newsc 0 1 6.957497 0.000000 18718 +classworktodai 0 1 6.957497 0.000000 18719 +chucklejon 0 1 6.957497 0.000000 18720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..6aa54a26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +student 1 343 1.098612 1.098612 19 +engin 0 297 1.098612 0.000000 20 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +wisconsin 0 169 1.791759 0.000000 54 +data 0 170 1.791759 0.000000 49 +address 0 170 1.791759 0.000000 62 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +process 0 142 1.945910 0.000000 72 +spring 1 131 2.079442 2.079442 88 +welcom 0 122 2.079442 0.000000 99 +databas 0 122 2.079442 0.000000 86 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +structur 0 106 2.197225 0.000000 105 +place 0 106 2.197225 0.000000 124 +site 0 106 2.197225 0.000000 119 +advanc 1 99 2.302585 2.302585 130 +technic 0 100 2.302585 0.000000 140 +imag 0 91 2.397895 0.000000 161 +commun 0 95 2.397895 0.000000 157 +sinc 0 90 2.397895 0.000000 159 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +refer 0 78 2.564949 0.000000 203 +degre 0 69 2.708050 0.000000 259 +street 0 63 2.772589 0.000000 293 +finger 0 52 2.995732 0.000000 354 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +autom 0 41 3.218876 0.000000 434 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +pattern 0 24 3.761200 0.000000 689 +recognit 0 23 3.806662 0.000000 723 +beij 0 19 4.007333 0.000000 876 +stanford 0 17 4.110874 0.000000 955 +medic 0 17 4.110874 0.000000 958 +came 0 13 4.382027 0.000000 1197 +academi 1 8 4.875197 4.875197 1735 +signal 0 7 5.010635 0.000000 1910 +provinc 0 4 5.568345 0.000000 3009 +biomed 0 4 5.568345 0.000000 2905 +hometown 0 3 5.857933 0.000000 3817 +nanj 1 2 6.263398 6.263398 5728 +jiangsu 0 2 6.263398 0.000000 5586 +southeast 0 2 6.263398 0.000000 6188 +frombeij 0 1 6.957497 0.000000 18721 +capitol 0 1 6.957497 0.000000 18722 +specil 0 1 6.957497 0.000000 18723 +chinacurr 0 1 6.957497 0.000000 18724 +tele 0 1 6.957497 0.000000 18725 +stuffjava 0 1 6.957497 0.000000 18726 +placeshor 0 1 6.957497 0.000000 18727 +tutorialchina 0 1 6.957497 0.000000 18728 +affairchina 0 1 6.957497 0.000000 18729 +democracybeij 0 1 6.957497 0.000000 18730 +groupstanford 0 1 6.957497 0.000000 18731 +informaticsmit 0 1 6.957497 0.000000 18732 +processingjob 0 1 6.957497 0.000000 18733 +newsyou 0 1 6.957497 0.000000 18734 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..0b9a13d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +student 0 343 1.098612 0.000000 19 +graduat 1 215 1.386294 1.386294 31 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +hour 0 165 1.791759 0.000000 46 +architectur 0 139 1.945910 0.000000 77 +year 0 148 1.945910 0.000000 84 +high 1 130 2.079442 2.079442 101 +welcom 0 122 2.079442 0.000000 99 +schedul 0 119 2.079442 0.000000 85 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +school 1 84 2.484907 2.484907 188 +second 0 81 2.484907 0.000000 166 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +state 0 76 2.564949 0.000000 207 +view 1 70 2.708050 2.708050 254 +receiv 0 66 2.708050 0.000000 244 +window 0 68 2.708050 0.000000 242 +new 1 64 2.772589 2.772589 262 +plan 0 65 2.772589 0.000000 272 +appoint 0 49 3.044522 0.000000 358 +favorit 1 44 3.135494 3.135494 410 +music 0 42 3.218876 0.000000 436 +join 1 39 3.258097 3.258097 457 +electr 0 38 3.295837 0.000000 461 +print 0 34 3.401197 0.000000 503 +team 0 27 3.637586 0.000000 625 +jeff 1 25 3.737670 3.737670 673 +background 0 25 3.737670 0.000000 664 +sport 0 25 3.737670 0.000000 683 +miscellan 0 23 3.806662 0.000000 731 +emphasi 0 22 3.850148 0.000000 755 +deal 0 22 3.850148 0.000000 736 +hous 1 21 3.912023 3.912023 801 +thur 0 19 4.007333 0.000000 847 +statu 0 18 4.060443 0.000000 885 +jose 1 16 4.174387 4.174387 976 +diego 0 16 4.174387 0.000000 992 +club 0 15 4.248495 0.000000 1058 +francisco 0 14 4.317488 0.000000 1095 +basketbal 0 12 4.465908 0.000000 1289 +song 0 11 4.553877 0.000000 1380 +tue 0 11 4.553877 0.000000 1308 +town 0 10 4.653960 0.000000 1458 +vista 0 10 4.653960 0.000000 1452 +theme 0 8 4.875197 0.000000 1707 +hockei 0 8 4.875197 0.000000 1760 +golden 0 7 5.010635 0.000000 1962 +footbal 0 7 5.010635 0.000000 1912 +tip 0 7 5.010635 0.000000 1863 +cupertino 1 2 6.263398 6.263398 4956 +columbia 1 2 6.263398 6.263398 5900 +mercuri 1 1 6.957497 6.957497 18735 +andnando 1 1 6.957497 6.957497 18736 +shabel 1 1 6.957497 6.957497 18737 +pagech 0 1 6.957497 0.000000 18738 +wisconsinch 0 1 6.957497 0.000000 18739 +informationmajor 0 1 6.957497 0.000000 18740 +monta 0 1 6.957497 0.000000 18741 +warrior 0 1 6.957497 0.000000 18742 +shark 0 1 6.957497 0.000000 18743 +oakland 0 1 6.957497 0.000000 18744 +newsmus 0 1 6.957497 0.000000 18745 +jshabel 0 1 6.957497 0.000000 18746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..d82724b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +email 0 220 1.386294 0.000000 29 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +data 2 170 1.791759 3.583518 49 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +problem 0 147 1.945910 0.000000 75 +model 0 145 1.945910 0.000000 69 +report 2 131 2.079442 4.158884 92 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +manag 0 114 2.197225 0.000000 125 +technic 1 100 2.302585 2.302585 140 +memori 0 101 2.302585 0.000000 139 +access 0 102 2.302585 0.000000 136 +need 0 98 2.302585 0.000000 135 +proceed 1 93 2.397895 2.397895 152 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +search 0 95 2.397895 0.000000 155 +larg 1 82 2.484907 2.484907 168 +west 0 83 2.484907 0.000000 192 +environ 0 84 2.484907 0.000000 177 +master 1 76 2.564949 2.564949 216 +server 0 76 2.564949 0.000000 204 +appli 1 71 2.639057 2.639057 226 +solv 0 73 2.639057 0.000000 234 +involv 0 71 2.639057 0.000000 227 +workshop 0 71 2.639057 0.000000 239 +integr 1 67 2.708050 2.708050 245 +august 0 66 2.708050 0.000000 257 +organ 1 65 2.772589 2.772589 265 +street 0 63 2.772589 0.000000 293 +prof 0 64 2.772589 0.000000 273 +improv 0 62 2.772589 0.000000 289 +complex 0 64 2.772589 0.000000 269 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +experi 0 64 2.772589 0.000000 283 +explor 1 58 2.890372 2.890372 324 +thesi 0 57 2.890372 0.000000 327 +overview 0 56 2.890372 0.000000 323 +scientif 0 53 2.944439 0.000000 341 +februari 0 54 2.944439 0.000000 328 +found 0 53 2.944439 0.000000 337 +digit 1 52 2.995732 2.995732 348 +advisor 0 51 2.995732 0.000000 355 +visual 1 48 3.044522 3.044522 372 +set 1 50 3.044522 3.044522 361 +telephon 0 50 3.044522 0.000000 373 +frequent 0 49 3.044522 0.000000 367 +adapt 0 46 3.091042 0.000000 387 +discuss 0 45 3.135494 0.000000 399 +third 0 43 3.178054 0.000000 412 +linear 0 41 3.218876 0.000000 431 +join 1 39 3.258097 3.258097 457 +submit 1 39 3.258097 3.258097 440 +societi 0 40 3.258097 0.000000 456 +industri 0 38 3.295837 0.000000 464 +concurr 0 34 3.401197 0.000000 501 +queri 0 33 3.433987 0.000000 524 +storag 0 31 3.496508 0.000000 553 +synchron 1 29 3.583519 3.583519 588 +limit 0 29 3.583519 0.000000 585 +measur 0 28 3.610918 0.000000 609 +arrai 0 27 3.637586 0.000000 627 +client 0 25 3.737670 0.000000 679 +jeff 0 25 3.737670 0.000000 673 +disk 2 22 3.850148 7.700296 747 +chen 0 21 3.912023 0.000000 791 +media 0 19 4.007333 0.000000 861 +sigmod 0 19 4.007333 0.000000 877 +miller 0 17 4.110874 0.000000 949 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 2 15 4.248495 8.496990 1053 +miron 1 14 4.317488 4.317488 1110 +dbm 0 13 4.382027 0.000000 1136 +buffer 1 12 4.465908 4.465908 1211 +characterist 0 12 4.465908 0.000000 1257 +daniel 0 12 4.465908 0.000000 1233 +raghu 0 12 4.465908 0.000000 1212 +optic 0 12 4.465908 0.000000 1221 +volum 0 11 4.553877 0.000000 1347 +alpha 0 11 4.553877 0.000000 1348 +resid 0 10 4.653960 0.000000 1461 +unifi 0 8 4.875197 0.000000 1774 +dataset 1 7 5.010635 5.010635 1914 +refere 0 7 5.010635 0.000000 1895 +eduresearch 0 6 5.164786 0.000000 2205 +divers 0 6 5.164786 0.000000 2232 +spie 0 6 5.164786 0.000000 2119 +quantum 0 6 5.164786 0.000000 2214 +almaden 0 5 5.347108 0.000000 2511 +tape 2 4 5.568345 11.136690 2959 +theintern 1 4 5.568345 5.568345 2981 +metadata 0 4 5.568345 0.000000 2945 +karen 0 4 5.568345 0.000000 2796 +ford 0 4 5.568345 0.000000 2636 +filesystem 0 4 5.568345 0.000000 2587 +tertiari 1 3 5.857933 5.857933 3193 +myllymaki 1 3 5.857933 5.857933 4022 +alsoavail 0 3 5.857933 0.000000 3887 +wenger 0 3 5.857933 0.000000 4023 +schwarz 0 3 5.857933 0.000000 3986 +trishul 0 3 5.857933 0.000000 4016 +chilimbi 0 3 5.857933 0.000000 4015 +raid 0 3 5.857933 0.000000 4012 +jussi 1 2 6.263398 6.263398 6133 +andvisu 0 2 6.263398 0.000000 6189 +karavan 0 2 6.263398 0.000000 6190 +andtool 0 2 6.263398 0.000000 5126 +beyer 0 2 6.263398 0.000000 6103 +lawand 0 2 6.263398 0.000000 6191 +dataengin 0 2 6.263398 0.000000 6118 +helsinki 0 2 6.263398 0.000000 5702 +storageto 1 1 6.957497 6.957497 18747 +andtap 1 1 6.957497 6.957497 18748 +yoav 1 1 6.957497 6.957497 18749 +weiss 1 1 6.957497 6.957497 18750 +scsi 1 1 6.957497 6.957497 18751 +myllymakijussi 0 1 6.957497 0.000000 18752 +summaryi 0 1 6.957497 0.000000 18753 +onadvanc 0 1 6.957497 0.000000 18754 +mcurrent 0 1 6.957497 0.000000 18755 +deviseproject 0 1 6.957497 0.000000 18756 +mironlivni 0 1 6.957497 0.000000 18757 +joinsof 0 1 6.957497 0.000000 18758 +listbelow 0 1 6.957497 0.000000 18759 +andfunct 0 1 6.957497 0.000000 18760 +datavisu 0 1 6.957497 0.000000 18761 +managementissu 0 1 6.957497 0.000000 18762 +publicationseffici 0 1 6.957497 0.000000 18763 +programperform 0 1 6.957497 0.000000 18764 +bartonp 0 1 6.957497 0.000000 18765 +tertiarystorag 0 1 6.957497 0.000000 18766 +withmiron 0 1 6.957497 0.000000 18767 +acmsigmetr 0 1 6.957497 0.000000 18768 +publicationdevis 0 1 6.957497 0.000000 18769 +donjerkov 0 1 6.957497 0.000000 18770 +andmiron 0 1 6.957497 0.000000 18771 +publicationsdisk 0 1 6.957497 0.000000 18772 +tapeaccess 0 1 6.957497 0.000000 18773 +degreeproject 0 1 6.957497 0.000000 18774 +networkarchitectur 0 1 6.957497 0.000000 18775 +finnish 0 1 6.957497 0.000000 18776 +documentsimplement 0 1 6.957497 0.000000 18777 +treealgorithm 0 1 6.957497 0.000000 18778 +productsoverview 0 1 6.957497 0.000000 18779 +supplier 0 1 6.957497 0.000000 18780 +productssom 0 1 6.957497 0.000000 18781 +adaptec 0 1 6.957497 0.000000 18782 +workstationsandpcsandtechn 0 1 6.957497 0.000000 18783 +journaland 0 1 6.957497 0.000000 18784 +whitepap 0 1 6.957497 0.000000 18785 +researchandcyberjourn 0 1 6.957497 0.000000 18786 +tapeanddlt 0 1 6.957497 0.000000 18787 +faqandwhitepap 0 1 6.957497 0.000000 18788 +solarisandsparcstationsandtechn 0 1 6.957497 0.000000 18789 +faqandstorag 0 1 6.957497 0.000000 18790 +faqand 0 1 6.957497 0.000000 18791 +otherusenet 0 1 6.957497 0.000000 18792 +faqsmani 0 1 6.957497 0.000000 18793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..1cc1a551 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +construct 0 139 1.945910 0.000000 82 +homepag 0 93 2.397895 0.000000 148 +grade 0 90 2.397895 0.000000 142 +info 0 85 2.484907 0.000000 176 +other 0 24 3.761200 0.000000 697 +sorri 0 4 5.568345 0.000000 3059 +jyothi 0 3 5.857933 0.000000 3423 +jyothithi 0 1 6.957497 0.000000 18794 +dissappoint 0 1 6.957497 0.000000 18795 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..8f2f6b18 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +class 0 199 1.609438 0.000000 37 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +wisconsin 0 169 1.791759 0.000000 54 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +process 0 142 1.945910 0.000000 72 +high 1 130 2.079442 2.079442 101 +tool 0 117 2.079442 0.000000 93 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +site 0 106 2.197225 0.000000 119 +need 0 98 2.302585 0.000000 135 +school 1 84 2.484907 2.484907 188 +learn 0 86 2.484907 0.000000 170 +west 0 83 2.484907 0.000000 192 +environ 0 84 2.484907 0.000000 177 +thing 0 84 2.484907 0.000000 189 +know 0 80 2.564949 0.000000 198 +servic 0 72 2.639057 0.000000 236 +free 0 73 2.639057 0.000000 224 +street 0 63 2.772589 0.000000 293 +undergradu 0 54 2.944439 0.000000 338 +cool 0 49 3.044522 0.000000 374 +life 0 50 3.044522 0.000000 375 +could 0 46 3.091042 0.000000 383 +autom 0 41 3.218876 0.000000 434 +alumni 0 21 3.912023 0.000000 807 +miss 0 19 4.007333 0.000000 866 +thoma 0 18 4.060443 0.000000 901 +women 0 16 4.174387 0.000000 1004 +anyth 0 16 4.174387 0.000000 998 +save 0 14 4.317488 0.000000 1099 +tune 0 12 4.465908 0.000000 1227 +safe 0 12 4.465908 0.000000 1274 +paradyn 0 9 4.753590 0.000000 1614 +tutor 0 9 4.753590 0.000000 1552 +port 0 8 4.875197 0.000000 1766 +pursu 0 7 5.010635 0.000000 1902 +constitut 0 6 5.164786 0.000000 2026 +ship 1 5 5.347108 5.347108 2534 +salt 0 5 5.347108 0.000000 2413 +water 0 5 5.347108 0.000000 2535 +sail 0 5 5.347108 0.000000 2571 +karen 1 4 5.568345 5.568345 2796 +frontier 0 3 5.857933 0.000000 3771 +counti 0 3 5.857933 0.000000 3682 +karavan 0 2 6.263398 0.000000 6190 +wic 0 2 6.263398 0.000000 4673 +dane 0 2 6.263398 0.000000 5534 +pagefor 0 2 6.263398 0.000000 6151 +lover 0 2 6.263398 0.000000 6192 +tear 0 2 6.263398 0.000000 5076 +karavaniceveryth 0 1 6.957497 0.000000 18796 +karavanicresearch 0 1 6.957497 0.000000 18797 +databasesask 0 1 6.957497 0.000000 18798 +studentstrio 0 1 6.957497 0.000000 18799 +safer 0 1 6.957497 0.000000 18800 +chocol 0 1 6.957497 0.000000 18801 +onlystuyves 0 1 6.957497 0.000000 18802 +associationstuyves 0 1 6.957497 0.000000 18803 +legisl 0 1 6.957497 0.000000 18804 +internetth 0 1 6.957497 0.000000 18805 +cure 0 1 6.957497 0.000000 18806 +sweat 0 1 6.957497 0.000000 18807 +isak 0 1 6.957497 0.000000 18808 +dinesen 0 1 6.957497 0.000000 18809 +admir 0 1 6.957497 0.000000 18810 +grace 0 1 6.957497 0.000000 18811 +hopper 0 1 6.957497 0.000000 18812 +pioneer 0 1 6.957497 0.000000 18813 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..c52b4913 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 0 380 0.693147 0.000000 9 +system 0 443 0.693147 0.000000 6 +us 1 329 1.098612 1.098612 16 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +implement 0 152 1.791759 0.000000 52 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +process 0 142 1.945910 0.000000 72 +high 1 130 2.079442 2.079442 101 +confer 0 126 2.079442 0.000000 100 +report 0 131 2.079442 0.000000 92 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +intern 1 108 2.197225 2.197225 128 +assist 0 112 2.197225 0.000000 113 +specif 0 106 2.197225 0.000000 106 +memori 1 101 2.302585 2.302585 139 +technic 0 100 2.302585 0.000000 140 +proceed 1 93 2.397895 2.397895 152 +ieee 0 86 2.484907 0.000000 190 +wide 0 84 2.484907 0.000000 185 +appear 1 78 2.564949 2.564949 210 +interfac 0 79 2.564949 0.000000 209 +april 0 77 2.564949 0.000000 196 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +david 0 71 2.639057 0.000000 232 +symposium 0 72 2.639057 0.000000 238 +august 0 66 2.708050 0.000000 257 +simul 0 66 2.708050 0.000000 255 +dept 0 64 2.772589 0.000000 291 +share 1 59 2.833213 2.833213 304 +march 0 61 2.833213 0.000000 295 +juli 0 60 2.833213 0.000000 305 +extens 2 53 2.944439 5.888878 340 +processor 1 54 2.944439 2.944439 335 +hardwar 0 51 2.995732 0.000000 350 +standard 0 48 3.044522 0.000000 365 +protocol 0 45 3.135494 0.000000 407 +cach 1 41 3.218876 3.218876 432 +editor 0 41 3.218876 0.000000 433 +examin 0 42 3.218876 0.000000 424 +autom 0 41 3.218876 0.000000 434 +prototyp 0 38 3.295837 0.000000 463 +cost 1 37 3.332205 3.332205 480 +jame 1 35 3.401197 3.401197 507 +option 0 30 3.555348 0.000000 575 +multiprocessor 0 28 3.610918 0.000000 605 +aspect 0 25 3.737670 0.000000 663 +supercomput 0 25 3.737670 0.000000 681 +scalabl 0 24 3.761200 0.000000 705 +methodolog 0 23 3.806662 0.000000 733 +synthesi 1 20 3.951244 3.951244 834 +north 0 19 4.007333 0.000000 873 +wind 0 18 4.060443 0.000000 908 +monitor 0 17 4.110874 0.000000 941 +hierarch 1 15 4.248495 4.248495 1018 +coher 1 14 4.317488 4.317488 1109 +prolog 1 13 4.382027 4.382027 1155 +introduc 0 13 4.382027 0.000000 1139 +tunnel 0 9 4.753590 0.000000 1615 +depth 0 8 4.875197 0.000000 1636 +upcom 0 8 4.875197 0.000000 1685 +goodman 1 7 5.010635 5.010635 1891 +dedic 0 7 5.010635 0.000000 1843 +greec 0 6 5.164786 0.000000 2208 +holland 0 5 5.347108 0.000000 2490 +publicationsresearch 0 4 5.568345 0.000000 2876 +galileo 0 4 5.568345 0.000000 3086 +microprogram 0 4 5.568345 0.000000 2604 +stefano 2 3 5.857933 11.715866 3372 +kaxira 1 3 5.857933 5.857933 3373 +stein 0 3 5.857933 0.000000 3646 +multiprocess 0 2 6.263398 0.000000 5142 +gjess 0 2 6.263398 0.000000 6156 +kiloprocessor 1 1 6.957497 6.957497 18814 +glow 1 1 6.957497 6.957497 18815 +papakonstantin 1 1 6.957497 6.957497 18816 +tsanaka 1 1 6.957497 6.957497 18817 +sciresearch 0 1 6.957497 0.000000 18818 +collaborationwith 0 1 6.957497 0.000000 18819 +incolabor 0 1 6.957497 0.000000 18820 +goodmanto 0 1 6.957497 0.000000 18821 +kaxirasto 0 1 6.957497 0.000000 18822 +goodmannd 0 1 6.957497 0.000000 18823 +goodmanst 0 1 6.957497 0.000000 18824 +kaxirasunivers 0 1 6.957497 0.000000 18825 +stafylopati 0 1 6.957497 0.000000 18826 +kaxirasinform 0 1 6.957497 0.000000 18827 +pekmestzi 0 1 6.957497 0.000000 18828 +kaxirasp 0 1 6.957497 0.000000 18829 +kaxirasmicroprocess 0 1 6.957497 0.000000 18830 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..071f55d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,252 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +list 1 201 1.609438 1.609438 39 +includ 0 208 1.609438 0.000000 42 +updat 0 191 1.609438 0.000000 41 +contact 1 153 1.791759 1.791759 59 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +distribut 0 162 1.791759 0.000000 51 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +support 0 132 1.945910 0.000000 83 +object 0 138 1.945910 0.000000 79 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +machin 0 129 2.079442 0.000000 95 +postscript 0 131 2.079442 0.000000 90 +document 0 121 2.079442 0.000000 89 +provid 0 121 2.079442 0.000000 94 +world 1 115 2.197225 2.197225 126 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +structur 0 106 2.197225 0.000000 105 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +person 0 111 2.197225 0.000000 117 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +sinc 0 90 2.397895 0.000000 159 +associ 0 93 2.397895 0.000000 151 +section 0 94 2.397895 0.000000 149 +call 0 91 2.397895 0.000000 153 +start 1 83 2.484907 2.484907 173 +contain 0 81 2.484907 0.000000 174 +level 0 87 2.484907 0.000000 180 +institut 0 84 2.484907 0.000000 187 +thing 0 84 2.484907 0.000000 189 +want 1 79 2.564949 2.564949 199 +know 0 80 2.564949 0.000000 198 +interfac 0 79 2.564949 0.000000 209 +come 0 78 2.564949 0.000000 202 +refer 0 78 2.564949 0.000000 203 +sourc 0 77 2.564949 0.000000 201 +write 1 72 2.639057 2.639057 222 +free 0 73 2.639057 0.000000 224 +html 0 75 2.639057 0.000000 235 +would 1 67 2.708050 2.708050 251 +degre 0 69 2.708050 0.000000 259 +java 0 70 2.708050 0.000000 248 +result 0 65 2.772589 0.000000 281 +locat 0 59 2.833213 0.000000 303 +index 1 56 2.890372 2.890372 309 +publish 0 57 2.890372 0.000000 326 +variou 0 56 2.890372 0.000000 317 +think 0 57 2.890372 0.000000 314 +talk 1 53 2.944439 2.944439 336 +allow 1 53 2.944439 2.944439 333 +week 0 52 2.995732 0.000000 343 +date 0 51 2.995732 0.000000 344 +much 0 52 2.995732 0.000000 349 +standard 0 48 3.044522 0.000000 365 +visual 0 48 3.044522 0.000000 372 +give 0 50 3.044522 0.000000 359 +archiv 0 49 3.044522 0.000000 364 +pointer 0 48 3.044522 0.000000 368 +possibl 0 47 3.091042 0.000000 378 +done 0 47 3.091042 0.000000 381 +electron 0 47 3.091042 0.000000 379 +made 1 44 3.135494 3.135494 398 +directori 0 45 3.135494 0.000000 396 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +long 0 43 3.178054 0.000000 413 +might 0 41 3.218876 0.000000 426 +probabl 1 40 3.258097 3.258097 455 +littl 0 39 3.258097 0.000000 454 +realli 0 40 3.258097 0.000000 444 +programm 0 39 3.258097 0.000000 445 +form 0 39 3.258097 0.000000 443 +small 0 39 3.258097 0.000000 447 +slide 1 38 3.295837 3.295837 467 +especi 0 36 3.367296 0.000000 496 +short 0 36 3.367296 0.000000 499 +soon 0 36 3.367296 0.000000 494 +print 0 34 3.401197 0.000000 503 +taught 0 33 3.433987 0.000000 526 +idea 0 32 3.465736 0.000000 545 +often 0 31 3.496508 0.000000 551 +someth 0 31 3.496508 0.000000 554 +quot 1 29 3.583519 3.583519 582 +pass 0 28 3.610918 0.000000 611 +becom 0 28 3.610918 0.000000 603 +rather 0 26 3.688879 0.000000 642 +relev 0 26 3.688879 0.000000 637 +never 0 25 3.737670 0.000000 671 +hill 0 25 3.737670 0.000000 670 +reliabl 0 25 3.737670 0.000000 674 +notic 0 25 3.737670 0.000000 675 +wish 0 24 3.761200 0.000000 692 +consult 0 24 3.761200 0.000000 687 +displai 0 23 3.806662 0.000000 712 +proof 0 23 3.806662 0.000000 720 +varieti 0 22 3.850148 0.000000 740 +hierarchi 0 22 3.850148 0.000000 744 +properti 0 22 3.850148 0.000000 749 +leav 1 21 3.912023 3.912023 772 +love 0 21 3.912023 0.000000 804 +break 0 20 3.951244 0.000000 812 +ever 1 19 4.007333 4.007333 872 +anderson 0 19 4.007333 0.000000 860 +steven 1 17 4.110874 4.110874 953 +thought 0 17 4.110874 0.000000 945 +anyon 0 17 4.110874 0.000000 916 +anyth 0 16 4.174387 0.000000 998 +doesn 0 15 4.248495 0.000000 1055 +indic 0 15 4.248495 0.000000 1013 +piec 0 15 4.248495 0.000000 1020 +side 0 15 4.248495 0.000000 1022 +anywai 0 15 4.248495 0.000000 1047 +rate 0 15 4.248495 0.000000 1037 +atth 0 15 4.248495 0.000000 1019 +psycholog 0 15 4.248495 0.000000 1054 +convent 0 14 4.317488 0.000000 1072 +attribut 0 14 4.317488 0.000000 1092 +everyth 1 13 4.382027 4.382027 1169 +care 0 13 4.382027 0.000000 1177 +front 0 13 4.382027 0.000000 1154 +unfortun 0 13 4.382027 0.000000 1170 +translat 0 13 4.382027 0.000000 1164 +insid 1 12 4.465908 4.465908 1262 +prelim 0 12 4.465908 0.000000 1201 +skill 0 12 4.465908 0.000000 1205 +uniqu 0 12 4.465908 0.000000 1228 +primit 0 11 4.553877 0.000000 1317 +perman 0 11 4.553877 0.000000 1372 +success 0 10 4.653960 0.000000 1390 +enter 0 10 4.653960 0.000000 1454 +subset 0 10 4.653960 0.000000 1425 +light 0 9 4.753590 0.000000 1533 +end 0 9 4.753590 0.000000 1567 +discov 0 9 4.753590 0.000000 1562 +clear 0 9 4.753590 0.000000 1488 +angel 0 8 4.875197 0.000000 1779 +unifi 0 8 4.875197 0.000000 1774 +gave 1 7 5.010635 5.010635 1922 +surpris 1 7 5.010635 5.010635 1828 +perfect 0 7 5.010635 0.000000 1921 +tag 0 7 5.010635 0.000000 1821 +intellectu 0 7 5.010635 0.000000 1847 +serial 0 7 5.010635 0.000000 1975 +adob 0 7 5.010635 0.000000 1873 +chanc 0 7 5.010635 0.000000 1960 +shot 0 7 5.010635 0.000000 1898 +markup 1 6 5.164786 5.164786 2059 +strang 0 6 5.164786 0.000000 2064 +meant 0 6 5.164786 0.000000 2055 +vari 0 6 5.164786 0.000000 2001 +impress 0 6 5.164786 0.000000 2096 +ifyou 0 6 5.164786 0.000000 1992 +creation 0 6 5.164786 0.000000 2069 +somewher 0 6 5.164786 0.000000 2176 +keeper 1 5 5.347108 5.347108 2569 +stupid 0 5 5.347108 0.000000 2489 +junior 0 5 5.347108 0.000000 2519 +explicitli 0 5 5.347108 0.000000 2308 +hate 0 5 5.347108 0.000000 2529 +bean 0 4 5.568345 0.000000 2968 +hell 0 4 5.568345 0.000000 2885 +suppos 0 4 5.568345 0.000000 3002 +cheap 0 4 5.568345 0.000000 2751 +aliv 1 3 5.857933 5.857933 3864 +heaven 0 3 5.857933 0.000000 3589 +lauri 0 3 5.857933 0.000000 3867 +wasn 0 3 5.857933 0.000000 3800 +argu 0 3 5.857933 0.000000 3698 +rsum 0 3 5.857933 0.000000 3939 +outof 0 3 5.857933 0.000000 3296 +aren 0 3 5.857933 0.000000 3512 +easier 0 3 5.857933 0.000000 3470 +caltech 1 2 6.263398 6.263398 5223 +ironi 0 2 6.263398 0.000000 5986 +sarcasm 0 2 6.263398 0.000000 5871 +offens 0 2 6.263398 0.000000 6168 +miracl 0 2 6.263398 0.000000 5710 +convinc 0 2 6.263398 0.000000 6019 +defunct 0 2 6.263398 0.000000 6162 +personnel 0 2 6.263398 0.000000 4381 +danger 0 2 6.263398 0.000000 5725 +informationag 0 2 6.263398 0.000000 5446 +bui 0 2 6.263398 0.000000 4486 +ofread 0 2 6.263398 0.000000 4417 +possibleto 0 2 6.263398 0.000000 4942 +hedgehog 1 1 6.957497 6.957497 18831 +pager 1 1 6.957497 6.957497 18832 +foughtthei 0 1 6.957497 0.000000 18833 +bitmap 0 1 6.957497 0.000000 18834 +theblind 0 1 6.957497 0.000000 18835 +whateverbrows 0 1 6.957497 0.000000 18836 +literari 0 1 6.957497 0.000000 18837 +satir 0 1 6.957497 0.000000 18838 +butnoth 0 1 6.957497 0.000000 18839 +herein 0 1 6.957497 0.000000 18840 +areoffend 0 1 6.957497 0.000000 18841 +firsttwo 0 1 6.957497 0.000000 18842 +addup 0 1 6.957497 0.000000 18843 +fizzl 0 1 6.957497 0.000000 18844 +areobtain 0 1 6.957497 0.000000 18845 +creatingkiosk 0 1 6.957497 0.000000 18846 +thosewho 0 1 6.957497 0.000000 18847 +mybe 0 1 6.957497 0.000000 18848 +thoughtson 0 1 6.957497 0.000000 18849 +wantto 0 1 6.957497 0.000000 18850 +todo 0 1 6.957497 0.000000 18851 +sporad 0 1 6.957497 0.000000 18852 +danenet 0 1 6.957497 0.000000 18853 +dilhr 0 1 6.957497 0.000000 18854 +jobnet 0 1 6.957497 0.000000 18855 +photonet 0 1 6.957497 0.000000 18856 +databaseus 0 1 6.957497 0.000000 18857 +freez 0 1 6.957497 0.000000 18858 +fought 0 1 6.957497 0.000000 18859 +sfuai 0 1 6.957497 0.000000 18860 +assigna 0 1 6.957497 0.000000 18861 +contextu 0 1 6.957497 0.000000 18862 +distil 0 1 6.957497 0.000000 18863 +rsuminto 0 1 6.957497 0.000000 18864 +pinch 0 1 6.957497 0.000000 18865 +certaintruth 0 1 6.957497 0.000000 18866 +eventuallypick 0 1 6.957497 0.000000 18867 +mull 0 1 6.957497 0.000000 18868 +accessibleto 0 1 6.957497 0.000000 18869 +tough 0 1 6.957497 0.000000 18870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..41d6e73b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +last 0 314 1.098612 0.000000 14 +languag 0 227 1.386294 0.000000 26 +modifi 0 178 1.609438 0.000000 35 +read 0 154 1.791759 0.000000 47 +data 0 170 1.791759 0.000000 49 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +perform 1 143 1.945910 1.945910 74 +welcom 0 122 2.079442 0.000000 99 +postscript 0 131 2.079442 0.000000 90 +search 0 95 2.397895 0.000000 155 +music 0 42 3.218876 0.000000 436 +indian 0 22 3.850148 0.000000 769 +classic 0 14 4.317488 0.000000 1084 +gzip 0 6 5.164786 0.000000 2117 +steer 0 5 5.347108 0.000000 2328 +krishna 0 3 5.857933 0.000000 3495 +kunchithapadamkrishna 0 1 6.957497 0.000000 18871 +kunchithapadamgreet 0 1 6.957497 0.000000 18872 +miscellaneouspubl 0 1 6.957497 0.000000 18873 +toolsresum 0 1 6.957497 0.000000 18874 +bykk 0 1 6.957497 0.000000 18875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..508881ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +wisc 1 242 1.386294 1.386294 33 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +server 0 76 2.564949 0.000000 204 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +sigmod 0 19 4.007333 0.000000 877 +dbm 0 13 4.382027 0.000000 1136 +dewitt 0 12 4.465908 0.000000 1270 +tuft 1 5 5.347108 5.347108 2575 +kristin 1 4 5.568345 5.568345 3089 +miscellani 0 3 5.857933 0.000000 3976 +pagekristin 0 1 6.957497 0.000000 18876 +eduadvisor 0 1 6.957497 0.000000 18877 +serveruw 0 1 6.957497 0.000000 18878 +groupacm 0 1 6.957497 0.000000 18879 +pageeo 0 1 6.957497 0.000000 18880 +officelast 0 1 6.957497 0.000000 18881 +tuftekristin 0 1 6.957497 0.000000 18882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..23ae361c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +cours 1 273 1.098612 1.098612 15 +project 0 340 1.098612 0.000000 18 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +person 1 111 2.197225 2.197225 117 +topic 0 114 2.197225 0.000000 110 +mathemat 0 108 2.197225 0.000000 123 +follow 1 92 2.397895 2.397895 143 +homepag 0 93 2.397895 0.000000 148 +novemb 0 81 2.484907 0.000000 179 +librari 0 87 2.484907 0.000000 181 +good 0 77 2.564949 0.000000 200 +import 0 65 2.772589 0.000000 282 +life 0 50 3.044522 0.000000 375 +electron 0 47 3.091042 0.000000 379 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +compani 0 41 3.218876 0.000000 423 +short 0 36 3.367296 0.000000 499 +whole 0 17 4.110874 0.000000 940 +hobbi 0 16 4.174387 0.000000 1009 +uniqu 0 12 4.465908 0.000000 1228 +opinion 0 8 4.875197 0.000000 1708 +pursu 0 7 5.010635 0.000000 1902 +entiti 0 3 5.857933 0.000000 3096 +krung 1 1 6.957497 6.957497 18883 +homepageupd 0 1 6.957497 0.000000 18884 +underconstructioni 0 1 6.957497 0.000000 18885 +serf 0 1 6.957497 0.000000 18886 +cometh 0 1 6.957497 0.000000 18887 +linkedth 0 1 6.957497 0.000000 18888 +sinapiromsaran 0 1 6.957497 0.000000 18889 +emailkrung 0 1 6.957497 0.000000 18890 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..baf77fab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +fall 0 181 1.609438 0.000000 40 +wisconsin 1 169 1.791759 1.791759 54 +applic 1 170 1.791759 1.791759 56 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +like 0 132 1.945910 0.000000 81 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +file 0 132 1.945910 0.000000 70 +construct 0 139 1.945910 0.000000 82 +note 0 142 1.945910 0.000000 67 +report 1 131 2.079442 2.079442 92 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +studi 0 120 2.079442 0.000000 91 +postscript 0 131 2.079442 0.000000 90 +spring 0 131 2.079442 0.000000 88 +mathemat 2 108 2.197225 4.394450 123 +theori 1 111 2.197225 2.197225 127 +specif 0 106 2.197225 0.000000 106 +topic 0 114 2.197225 0.000000 110 +structur 0 106 2.197225 0.000000 105 +technic 1 100 2.302585 2.302585 140 +book 0 99 2.302585 0.000000 131 +mani 0 92 2.397895 0.000000 150 +question 0 91 2.397895 0.000000 141 +select 0 91 2.397895 0.000000 154 +follow 0 92 2.397895 0.000000 143 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +chang 0 82 2.484907 0.000000 163 +appear 1 78 2.564949 2.564949 210 +complet 0 77 2.564949 0.000000 208 +logic 1 71 2.639057 2.639057 230 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +abstract 0 62 2.772589 0.000000 276 +function 0 62 2.772589 0.000000 275 +result 0 65 2.772589 0.000000 281 +foundat 0 62 2.772589 0.000000 286 +artifici 0 63 2.772589 0.000000 280 +reason 1 57 2.890372 2.890372 318 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +space 0 57 2.890372 0.000000 310 +local 0 55 2.944439 0.000000 334 +extens 0 53 2.944439 0.000000 340 +right 0 48 3.044522 0.000000 363 +basic 0 50 3.044522 0.000000 360 +algebra 1 45 3.135494 3.135494 394 +math 1 44 3.135494 3.135494 402 +answer 0 45 3.135494 0.000000 391 +autom 2 41 3.218876 6.437752 434 +review 1 42 3.218876 3.218876 425 +press 0 42 3.218876 0.000000 419 +theoret 0 39 3.258097 0.000000 446 +close 0 38 3.295837 0.000000 465 +singl 1 34 3.401197 3.401197 510 +taught 0 33 3.433987 0.000000 526 +independ 0 32 3.465736 0.000000 548 +semant 1 29 3.583519 3.583519 587 +consid 0 29 3.583519 0.000000 590 +turn 0 29 3.583519 0.000000 586 +measur 1 28 3.610918 3.610918 609 +american 0 27 3.637586 0.000000 634 +comp 0 26 3.688879 0.000000 650 +theorem 1 21 3.912023 3.912023 786 +prove 0 19 4.007333 0.000000 848 +geometr 0 19 4.007333 0.000000 852 +failur 0 18 4.060443 0.000000 898 +stanford 0 17 4.110874 0.000000 955 +moor 0 17 4.110874 0.000000 936 +style 0 15 4.248495 0.000000 1036 +topolog 1 14 4.317488 4.317488 1089 +draft 0 14 4.317488 0.000000 1085 +prolog 0 13 4.382027 0.000000 1155 +resolut 0 13 4.382027 0.000000 1172 +weak 0 13 4.382027 0.000000 1159 +deduct 1 12 4.465908 4.465908 1236 +kenneth 0 12 4.465908 0.000000 1265 +loop 1 11 4.553877 4.553877 1310 +typic 0 11 4.553877 0.000000 1360 +fix 0 11 4.553877 0.000000 1327 +edutelephon 0 10 4.653960 0.000000 1473 +besid 0 8 4.875197 0.000000 1681 +compact 0 7 5.010635 0.000000 1907 +boyer 0 6 5.164786 0.000000 2013 +rough 0 6 5.164786 0.000000 2107 +infer 0 6 5.164786 0.000000 2040 +shortest 0 5 5.347108 0.000000 2424 +constant 0 5 5.347108 0.000000 2251 +kunen 2 3 5.857933 11.715866 3500 +mathematica 1 3 5.857933 5.857933 3303 +preprint 1 3 5.857933 5.857933 3481 +axiomat 0 3 5.857933 0.000000 3288 +monthli 0 3 5.857933 0.000000 3910 +expon 1 2 6.263398 6.263398 5323 +negat 0 2 6.263398 0.000000 6073 +mill 0 2 6.263398 0.000000 6193 +liter 0 2 6.263398 0.000000 4689 +law 0 2 6.263398 0.000000 4896 +hart 1 1 6.957497 6.957497 18891 +axiom 1 1 6.957497 6.957497 18892 +fundamenta 1 1 6.957497 6.957497 18893 +quasigroup 1 1 6.957497 6.957497 18894 +professormath 0 1 6.957497 0.000000 18895 +resolutionto 0 1 6.957497 0.000000 18896 +likeprolog 0 1 6.957497 0.000000 18897 +prologus 0 1 6.957497 0.000000 18898 +incompat 0 1 6.957497 0.000000 18899 +betweenleast 0 1 6.957497 0.000000 18900 +backtrack 0 1 6.957497 0.000000 18901 +thissubject 0 1 6.957497 0.000000 18902 +usualaxiom 0 1 6.957497 0.000000 18903 +ramsei 0 1 6.957497 0.000000 18904 +corson 0 1 6.957497 0.000000 18905 +moufang 0 1 6.957497 0.000000 18906 +conjugaci 0 1 6.957497 0.000000 18907 +moschovaki 0 1 6.957497 0.000000 18908 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..e389c602 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,246 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +offic 0 299 1.098612 0.000000 13 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +languag 2 227 1.386294 2.772588 26 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +parallel 2 169 1.791759 3.583518 60 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +support 2 132 1.945910 3.891820 83 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +compil 2 122 2.079442 4.158884 96 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +spring 0 131 2.079442 0.000000 88 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +intern 1 108 2.197225 2.197225 128 +specif 0 106 2.197225 0.000000 106 +memori 2 101 2.302585 4.605170 139 +user 1 104 2.302585 2.302585 137 +access 0 102 2.302585 0.000000 136 +techniqu 0 99 2.302585 0.000000 138 +part 0 98 2.302585 0.000000 129 +octob 1 89 2.397895 2.397895 156 +associ 0 93 2.397895 0.000000 151 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +educ 1 86 2.484907 2.484907 191 +larg 1 82 2.484907 2.484907 168 +level 1 87 2.484907 2.484907 180 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +ieee 0 86 2.484907 0.000000 190 +help 0 83 2.484907 0.000000 175 +appear 1 78 2.564949 2.564949 210 +decemb 0 80 2.564949 0.000000 215 +june 0 79 2.564949 0.000000 214 +messag 0 76 2.564949 0.000000 212 +dynam 0 76 2.564949 0.000000 194 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +summari 0 73 2.639057 0.000000 237 +write 0 72 2.639057 0.000000 222 +workshop 0 71 2.639057 0.000000 239 +august 1 66 2.708050 2.708050 257 +java 0 70 2.708050 0.000000 248 +new 0 64 2.772589 0.000000 262 +septemb 0 65 2.772589 0.000000 274 +evalu 0 64 2.772589 0.000000 266 +improv 0 62 2.772589 0.000000 289 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +colleg 1 61 2.833213 2.833213 300 +march 0 61 2.833213 0.000000 295 +automat 0 61 2.833213 0.000000 306 +instruct 0 53 2.944439 0.000000 332 +februari 0 54 2.944439 0.000000 328 +hardwar 1 51 2.995732 2.995732 350 +particular 0 51 2.995732 0.000000 352 +principl 0 48 3.044522 0.000000 357 +frequent 0 49 3.044522 0.000000 367 +california 1 46 3.091042 3.091042 388 +understand 0 47 3.091042 0.000000 384 +possibl 0 47 3.091042 0.000000 378 +mark 1 44 3.135494 3.135494 403 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +better 0 45 3.135494 0.000000 401 +edit 1 42 3.218876 3.218876 418 +press 0 42 3.218876 0.000000 419 +annual 0 40 3.258097 0.000000 458 +transact 0 39 3.258097 0.000000 438 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 2 35 3.401197 6.802394 507 +bibliographi 0 34 3.401197 0.000000 518 +within 0 33 3.433987 0.000000 525 +independ 0 32 3.465736 0.000000 548 +richard 1 31 3.496508 3.496508 559 +profil 1 30 3.555348 3.555348 581 +power 0 30 3.555348 0.000000 573 +depend 0 29 3.583519 0.000000 583 +focus 0 29 3.583519 0.000000 584 +pass 0 28 3.610918 0.000000 611 +multiprocessor 0 28 3.610918 0.000000 605 +symbol 0 27 3.637586 0.000000 620 +static 0 27 3.637586 0.000000 619 +berkelei 1 26 3.688879 3.688879 657 +revis 0 26 3.688879 0.000000 640 +hill 1 25 3.737670 3.737670 670 +trace 0 25 3.737670 0.000000 677 +supercomput 0 25 3.737670 0.000000 681 +spent 0 25 3.737670 0.000000 676 +lab 1 24 3.761200 3.761200 698 +flow 1 24 3.761200 3.761200 700 +scalabl 0 24 3.761200 0.000000 705 +demonstr 0 24 3.761200 0.000000 694 +cooper 0 22 3.850148 0.000000 757 +path 1 21 3.912023 3.912023 778 +annot 1 21 3.912023 3.912023 775 +programminglanguag 0 21 3.912023 0.000000 782 +department 0 20 3.951244 0.000000 839 +portabl 0 20 3.951244 0.000000 819 +fine 0 20 3.951244 0.000000 822 +exploit 0 20 3.951244 0.000000 836 +eric 1 19 4.007333 4.007333 870 +wind 1 18 4.060443 4.060443 908 +thoma 1 18 4.060443 4.060443 901 +steven 1 17 4.110874 4.110874 953 +asplo 1 17 4.110874 4.110874 948 +micro 1 15 4.248495 4.248495 1031 +eduphon 0 15 4.248495 0.000000 1060 +hybrid 0 15 4.248495 0.000000 1057 +coher 0 14 4.317488 0.000000 1109 +sigplan 1 13 4.382027 4.382027 1190 +employ 1 12 4.465908 4.465908 1291 +brad 1 12 4.465908 4.465908 1264 +wood 1 11 4.553877 4.553877 1355 +branch 0 11 4.553877 0.000000 1318 +grain 1 10 4.653960 4.653960 1448 +facilit 0 10 4.653960 0.000000 1412 +laru 3 9 4.753590 14.260770 1560 +tunnel 1 9 4.753590 4.753590 1615 +ball 1 9 4.753590 4.753590 1608 +wilson 0 9 4.753590 0.000000 1536 +routin 0 9 4.753590 0.000000 1549 +pldi 1 8 4.875197 4.875197 1704 +secretari 0 8 4.875197 0.000000 1775 +upcom 0 8 4.875197 0.000000 1685 +irregular 0 8 4.875197 0.000000 1768 +joel 0 8 4.875197 0.000000 1698 +insert 0 8 4.875197 0.000000 1687 +sixth 1 7 5.010635 5.010635 1917 +roger 1 7 5.010635 5.010635 1892 +harvard 0 7 5.010635 0.000000 1926 +fifth 0 7 5.010635 0.000000 1931 +chandra 1 6 5.164786 5.164786 2091 +ann 1 6 5.164786 5.164786 2065 +bell 1 6 5.164786 5.164786 2224 +sciencedepart 0 6 5.164786 0.000000 2172 +microarchitectur 0 6 5.164786 0.000000 2238 +unpublish 0 6 5.164786 0.000000 2226 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +forprogram 1 5 5.347108 5.347108 2361 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +ioanni 1 5 5.347108 5.347108 2553 +mukherje 0 5 5.347108 0.000000 2586 +tempest 0 5 5.347108 0.000000 2548 +icpp 0 5 5.347108 0.000000 2382 +toc 0 5 5.347108 0.000000 2562 +summarymi 0 5 5.347108 0.000000 2580 +computerarchitectur 0 5 5.347108 0.000000 2290 +alvin 1 4 5.568345 5.568345 3084 +satish 1 4 5.568345 5.568345 2833 +schoina 1 4 5.568345 5.568345 3085 +languagesand 0 4 5.568345 0.000000 3071 +gregori 0 4 5.568345 0.000000 2928 +sharma 0 4 5.568345 0.000000 2752 +ppopp 0 4 5.568345 0.000000 2774 +substrat 0 4 5.568345 0.000000 2857 +compcon 0 4 5.568345 0.000000 2958 +markhil 0 4 5.568345 0.000000 2819 +manuscript 0 4 5.568345 0.000000 2750 +wart 0 4 5.568345 0.000000 2987 +talluri 0 4 5.568345 0.000000 2820 +oracl 0 4 5.568345 0.000000 2823 +andi 0 4 5.568345 0.000000 3081 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +wcsss 0 3 5.857933 0.000000 3956 +shubhendu 0 3 5.857933 0.000000 4028 +saltz 0 3 5.857933 0.000000 3385 +frequenc 0 3 5.857933 0.000000 3206 +trishul 0 3 5.857933 0.000000 4016 +chilimbi 0 3 5.857933 0.000000 4015 +madhusudhan 0 3 5.857933 0.000000 4021 +parallelmachin 0 3 5.857933 0.000000 3693 +moredetail 0 3 5.857933 0.000000 3854 +guhan 1 2 6.263398 6.263398 6169 +viswanathan 1 2 6.263398 6.263398 6170 +schnarr 1 2 6.263398 6.263398 6194 +lorenz 1 2 6.263398 6.263398 4846 +shamik 0 2 6.263398 0.000000 6195 +cico 0 2 6.263398 0.000000 6120 +thewisconsin 0 2 6.263398 0.000000 6196 +usalaru 0 1 6.957497 0.000000 18909 +structuresc 0 1 6.957497 0.000000 18910 +spim 0 1 6.957497 0.000000 18911 +wartsrec 0 1 6.957497 0.000000 18912 +paperseffici 0 1 6.957497 0.000000 18913 +teapot 0 1 6.957497 0.000000 18914 +andjam 0 1 6.957497 0.000000 18915 +annerog 0 1 6.957497 0.000000 18916 +practiceof 0 1 6.957497 0.000000 18917 +languagesdesign 0 1 6.957497 0.000000 18918 +youfeng 0 1 6.957497 0.000000 18919 +jameslaru 0 1 6.957497 0.000000 18920 +cachier 0 1 6.957497 0.000000 18921 +graduatesbrad 0 1 6.957497 0.000000 18922 +vassar 0 1 6.957497 0.000000 18923 +languagesfirst 0 1 6.957497 0.000000 18924 +huelsbergen 0 1 6.957497 0.000000 18925 +tball 0 1 6.957497 0.000000 18926 +havehelp 0 1 6.957497 0.000000 18927 +coherencepolici 0 1 6.957497 0.000000 18928 +programmersunderstand 0 1 6.957497 0.000000 18929 +hasidentifi 0 1 6.957497 0.000000 18930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..5bc08c7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +tuesdai 0 73 2.639057 0.000000 219 +eduoffic 0 33 3.433987 0.000000 531 +nick 0 13 4.382027 0.000000 1180 +leavi 0 3 5.857933 0.000000 3438 +pagenick 0 1 6.957497 0.000000 18931 +pageoffic 0 1 6.957497 0.000000 18932 +wednessdai 0 1 6.957497 0.000000 18933 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..3427ea37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +project 0 340 1.098612 0.000000 18 +current 0 284 1.098612 0.000000 21 +offic 0 299 1.098612 0.000000 13 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +includ 0 208 1.609438 0.000000 42 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +parallel 0 169 1.791759 0.000000 60 +recent 0 167 1.791759 0.000000 58 +avail 0 169 1.791759 0.000000 48 +phone 0 175 1.791759 0.000000 45 +file 1 132 1.945910 1.945910 70 +relat 0 139 1.945910 0.000000 68 +area 0 144 1.945910 0.000000 80 +document 0 121 2.079442 0.000000 89 +postscript 0 131 2.079442 0.000000 90 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +pleas 0 113 2.197225 0.000000 114 +book 1 99 2.302585 2.302585 131 +member 0 84 2.484907 0.000000 165 +build 0 85 2.484907 0.000000 184 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +html 0 75 2.639057 0.000000 235 +order 0 69 2.708050 0.000000 249 +would 0 67 2.708050 0.000000 251 +dept 0 64 2.772589 0.000000 291 +sever 0 56 2.890372 0.000000 322 +publish 0 57 2.890372 0.000000 326 +finger 0 52 2.995732 0.000000 354 +algebra 0 45 3.135494 0.000000 394 +keep 0 44 3.135494 0.000000 409 +linear 0 41 3.218876 0.000000 431 +press 0 42 3.218876 0.000000 419 +editor 0 41 3.218876 0.000000 433 +http 0 41 3.218876 0.000000 420 +origin 0 38 3.295837 0.000000 472 +committe 0 34 3.401197 0.000000 522 +statist 0 35 3.401197 0.000000 521 +univ 1 28 3.610918 3.610918 617 +intend 0 28 3.610918 0.000000 599 +mind 0 27 3.637586 0.000000 632 +other 0 24 3.761200 0.000000 697 +compress 0 23 3.806662 0.000000 719 +wind 0 18 4.060443 0.000000 908 +steven 1 17 4.110874 4.110874 953 +draft 0 14 4.317488 0.000000 1085 +individu 0 13 4.382027 0.000000 1126 +tunnel 0 9 4.753590 0.000000 1615 +isbn 0 7 5.010635 0.000000 1901 +forum 1 6 5.164786 5.164786 2027 +ongo 0 6 5.164786 0.000000 2215 +heavili 0 3 5.857933 0.000000 3572 +andit 0 3 5.857933 0.000000 3328 +thewisconsin 0 2 6.263398 0.000000 6196 +lederman 1 1 6.957497 6.957497 18934 +huss 1 1 6.957497 6.957497 18935 +mpistandard 1 1 6.957497 6.957497 18936 +iscov 0 1 6.957497 0.000000 18937 +prismproject 0 1 6.957497 0.000000 18938 +invol 0 1 6.957497 0.000000 18939 +ongoingwork 0 1 6.957497 0.000000 18940 +compressedtar 0 1 6.957497 0.000000 18941 +desper 0 1 6.957497 0.000000 18942 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..f8edffe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +design 1 213 1.386294 1.386294 25 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +network 1 168 1.791759 1.791759 61 +wisconsin 1 169 1.791759 1.791759 54 +implement 1 152 1.791759 1.791759 52 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +proceed 1 93 2.397895 2.397895 152 +control 1 82 2.484907 2.484907 164 +second 0 81 2.484907 0.000000 166 +ieee 0 86 2.484907 0.000000 190 +dynam 1 76 2.564949 2.564949 194 +april 0 77 2.564949 0.000000 196 +summari 0 73 2.639057 0.000000 237 +nation 0 74 2.639057 0.000000 240 +window 1 68 2.708050 2.708050 242 +august 0 66 2.708050 0.000000 257 +virtual 1 62 2.772589 2.772589 285 +sampl 0 53 2.944439 0.000000 339 +tabl 0 51 2.995732 0.000000 346 +telephon 0 50 3.044522 0.000000 373 +protocol 1 45 3.135494 3.135494 407 +fast 0 42 3.218876 0.000000 429 +combin 0 42 3.218876 0.000000 421 +close 0 38 3.295837 0.000000 465 +open 0 38 3.295837 0.000000 469 +focus 0 29 3.583519 0.000000 584 +feedback 0 19 4.007333 0.000000 854 +speed 1 18 4.060443 4.060443 911 +rate 0 15 4.248495 0.000000 1037 +francisco 0 14 4.317488 0.000000 1095 +circuit 0 13 4.382027 0.000000 1131 +loop 1 11 4.553877 4.553877 1310 +clock 0 11 4.553877 0.000000 1320 +purdu 0 10 4.653960 0.000000 1466 +packet 0 10 4.653960 0.000000 1415 +lawrenc 1 7 5.010635 5.010635 1908 +conferenc 0 7 5.010635 0.000000 1857 +mukherje 1 5 5.347108 5.347108 2586 +testb 0 5 5.347108 0.000000 2456 +admiss 1 4 5.568345 5.568345 2704 +darpa 0 4 5.568345 0.000000 2944 +phenomena 0 4 5.568345 0.000000 2962 +landweb 1 3 5.857933 5.857933 3402 +congest 1 3 5.857933 5.857933 3993 +infocom 1 3 5.857933 5.857933 3283 +atmospher 0 3 5.857933 0.000000 3388 +baltimor 0 3 5.857933 0.000000 3809 +theieee 0 2 6.263398 0.000000 6043 +florenc 0 2 6.263398 0.000000 4950 +faber 1 1 6.957497 6.957497 18943 +electronicmail 0 1 6.957497 0.000000 18944 +participatingin 0 1 6.957497 0.000000 18945 +gigabit 0 1 6.957497 0.000000 18946 +involvesth 0 1 6.957497 0.000000 18947 +atgigabit 0 1 6.957497 0.000000 18948 +onissu 0 1 6.957497 0.000000 18949 +visualizationof 0 1 6.957497 0.000000 18950 +establishmentmethod 0 1 6.957497 0.000000 18951 +olsen 0 1 6.957497 0.000000 18952 +witht 0 1 6.957497 0.000000 18953 +sigcommconfer 0 1 6.957497 0.000000 18954 +coursesconnect 0 1 6.957497 0.000000 18955 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..f930ff53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +us 0 329 1.098612 0.000000 16 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +madison 1 165 1.791759 1.791759 55 +address 0 170 1.791759 0.000000 62 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +lectur 0 135 1.945910 0.000000 73 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +person 0 111 2.197225 0.000000 117 +section 0 94 2.397895 0.000000 149 +west 0 83 2.484907 0.000000 192 +servic 0 72 2.639057 0.000000 236 +intellig 0 72 2.639057 0.000000 225 +thursdai 0 70 2.708050 0.000000 241 +street 0 63 2.772589 0.000000 293 +wednesdai 0 64 2.772589 0.000000 261 +artifici 0 63 2.772589 0.000000 280 +variou 0 56 2.890372 0.000000 317 +appoint 0 49 3.044522 0.000000 358 +archiv 0 49 3.044522 0.000000 364 +natur 0 44 3.135494 0.000000 406 +respons 0 37 3.332205 0.000000 476 +comp 1 26 3.688879 3.688879 650 +women 0 16 4.174387 0.000000 1004 +cognit 0 16 4.174387 0.000000 986 +career 0 12 4.465908 0.000000 1287 +linguist 1 9 4.753590 4.753590 1593 +utah 1 9 4.753590 4.753590 1585 +lloyd 1 6 5.164786 5.164786 2103 +chemistri 0 5 5.347108 0.000000 2405 +shannon 1 1 6.957497 6.957497 18956 +xsoft 0 1 6.957497 0.000000 18957 +lexdemo 0 1 6.957497 0.000000 18958 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..92d80dbe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +cours 0 273 1.098612 0.000000 15 +current 0 284 1.098612 0.000000 21 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +languag 0 227 1.386294 0.000000 26 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +phone 0 175 1.791759 0.000000 45 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +number 0 130 2.079442 0.000000 97 +technolog 0 131 2.079442 0.000000 102 +check 1 115 2.197225 2.197225 118 +site 0 106 2.197225 0.000000 119 +well 0 109 2.197225 0.000000 121 +pleas 0 113 2.197225 0.000000 114 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +search 1 95 2.397895 2.397895 155 +real 0 93 2.397895 0.000000 144 +pictur 0 89 2.397895 0.000000 160 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +good 0 77 2.564949 0.000000 200 +meet 1 72 2.639057 2.639057 229 +free 0 73 2.639057 0.000000 224 +name 0 72 2.639057 0.000000 220 +appli 0 71 2.639057 0.000000 226 +html 0 75 2.639057 0.000000 235 +java 0 70 2.708050 0.000000 248 +interact 1 62 2.772589 2.772589 270 +virtual 0 62 2.772589 0.000000 285 +organ 0 65 2.772589 0.000000 265 +automat 0 61 2.833213 0.000000 306 +semest 0 58 2.890372 0.000000 312 +space 0 57 2.890372 0.000000 310 +advisor 0 51 2.995732 0.000000 355 +maintain 0 51 2.995732 0.000000 342 +case 0 51 2.995732 0.000000 351 +much 0 52 2.995732 0.000000 349 +cool 1 49 3.044522 3.044522 374 +friend 0 48 3.044522 0.000000 376 +electron 0 47 3.091042 0.000000 379 +favorit 0 44 3.135494 0.000000 410 +realli 1 40 3.258097 3.258097 444 +submit 0 39 3.258097 0.000000 440 +seminar 0 38 3.295837 0.000000 470 +feel 1 37 3.332205 3.332205 483 +go 0 33 3.433987 0.000000 529 +taught 0 33 3.433987 0.000000 526 +someth 0 31 3.496508 0.000000 554 +option 1 30 3.555348 3.555348 575 +becom 0 28 3.610918 0.000000 603 +concern 0 25 3.737670 0.000000 666 +higher 0 24 3.761200 0.000000 690 +longer 0 20 3.951244 0.000000 816 +figur 0 18 4.060443 0.000000 903 +women 1 16 4.174387 4.174387 1004 +stock 0 16 4.174387 0.000000 1007 +todd 0 15 4.248495 0.000000 1051 +anywai 0 15 4.248495 0.000000 1047 +mayb 0 15 4.248495 0.000000 1014 +save 0 14 4.317488 0.000000 1099 +edui 0 13 4.382027 0.000000 1193 +coordin 0 13 4.382027 0.000000 1182 +wait 0 13 4.382027 0.000000 1168 +wife 0 13 4.382027 0.000000 1196 +entertain 1 12 4.465908 4.465908 1286 +basketbal 0 12 4.465908 0.000000 1289 +readi 0 12 4.465908 0.000000 1242 +food 0 12 4.465908 0.000000 1285 +fill 1 11 4.553877 4.553877 1349 +market 0 11 4.553877 0.000000 1361 +keyword 0 11 4.553877 0.000000 1356 +candid 0 9 4.753590 0.000000 1606 +jump 0 9 4.753590 0.000000 1603 +simpli 0 8 4.875197 0.000000 1626 +reload 0 8 4.875197 0.000000 1682 +appar 0 7 5.010635 0.000000 1958 +iowa 0 7 5.010635 0.000000 1971 +polit 0 6 5.164786 0.000000 2115 +legal 0 6 5.164786 0.000000 2094 +troubl 0 6 5.164786 0.000000 2002 +christoph 0 5 5.347108 0.000000 2512 +czar 0 5 5.347108 0.000000 2503 +tuft 0 5 5.347108 0.000000 2575 +amus 0 5 5.347108 0.000000 2366 +sing 0 5 5.347108 0.000000 2499 +gui 0 5 5.347108 0.000000 2573 +girlfriend 0 5 5.347108 0.000000 2579 +festiv 0 4 5.568345 0.000000 2952 +turnidg 0 4 5.568345 0.000000 2829 +superhighwai 0 4 5.568345 0.000000 2943 +chees 0 4 5.568345 0.000000 3090 +rival 0 3 5.857933 0.000000 3583 +tiger 0 3 5.857933 0.000000 3897 +wealth 0 3 5.857933 0.000000 3353 +traci 0 3 5.857933 0.000000 3984 +child 0 3 5.857933 0.000000 3542 +tast 0 3 5.857933 0.000000 3666 +kick 0 3 5.857933 0.000000 3962 +alien 0 3 5.857933 0.000000 3930 +laugh 0 3 5.857933 0.000000 3659 +defeat 0 2 6.263398 0.000000 5401 +kirk 0 2 6.263398 0.000000 6175 +killer 0 2 6.263398 0.000000 6159 +tragic 0 2 6.263398 0.000000 6114 +junki 0 2 6.263398 0.000000 5457 +outer 0 2 6.263398 0.000000 4464 +gross 0 2 6.263398 0.000000 5989 +nake 0 2 6.263398 0.000000 6197 +offspr 0 2 6.263398 0.000000 5699 +bogu 0 2 6.263398 0.000000 5471 +wierd 0 2 6.263398 0.000000 6093 +luka 1 1 6.957497 6.957497 18959 +lone 1 1 6.957497 6.957497 18960 +checkbox 1 1 6.957497 6.957497 18961 +pagechristoph 0 1 6.957497 0.000000 18962 +lukasrelev 0 1 6.957497 0.000000 18963 +mspl 0 1 6.957497 0.000000 18964 +workshipi 0 1 6.957497 0.000000 18965 +quest 0 1 6.957497 0.000000 18966 +sunivers 0 1 6.957497 0.000000 18967 +prisonerthi 0 1 6.957497 0.000000 18968 +quoteserv 0 1 6.957497 0.000000 18969 +fabul 0 1 6.957497 0.000000 18970 +pagebet 0 1 6.957497 0.000000 18971 +identitycaptain 0 1 6.957497 0.000000 18972 +throughamaz 0 1 6.957497 0.000000 18973 +withtri 0 1 6.957497 0.000000 18974 +teri 0 1 6.957497 0.000000 18975 +incred 0 1 6.957497 0.000000 18976 +catthi 0 1 6.957497 0.000000 18977 +buttmunchextrem 0 1 6.957497 0.000000 18978 +dudemichael 0 1 6.957497 0.000000 18979 +nesmith 0 1 6.957497 0.000000 18980 +fanfoolmyth 0 1 6.957497 0.000000 18981 +figurewick 0 1 6.957497 0.000000 18982 +playervalu 0 1 6.957497 0.000000 18983 +studentment 0 1 6.957497 0.000000 18984 +defectivea 0 1 6.957497 0.000000 18985 +wkrp 0 1 6.957497 0.000000 18986 +cincinatti 0 1 6.957497 0.000000 18987 +figuregeek 0 1 6.957497 0.000000 18988 +tradesgonzo 0 1 6.957497 0.000000 18989 +admirernetscap 0 1 6.957497 0.000000 18990 +pornpersonifi 0 1 6.957497 0.000000 18991 +condom 0 1 6.957497 0.000000 18992 +stretch 0 1 6.957497 0.000000 18993 +blowflam 0 1 6.957497 0.000000 18994 +testicl 0 1 6.957497 0.000000 18995 +goodpoetri 0 1 6.957497 0.000000 18996 +guruhogwildthi 0 1 6.957497 0.000000 18997 +assman 0 1 6.957497 0.000000 18998 +manbig 0 1 6.957497 0.000000 18999 +dudeuh 0 1 6.957497 0.000000 19000 +ohprofession 0 1 6.957497 0.000000 19001 +muff 0 1 6.957497 0.000000 19002 +diverregress 0 1 6.957497 0.000000 19003 +lifeformherald 0 1 6.957497 0.000000 19004 +invas 0 1 6.957497 0.000000 19005 +forcechri 0 1 6.957497 0.000000 19006 +formsalienherpetophiletodd 0 1 6.957497 0.000000 19007 +hatth 0 1 6.957497 0.000000 19008 +mancreepi 0 1 6.957497 0.000000 19009 +headsmal 0 1 6.957497 0.000000 19010 +planetdr 0 1 6.957497 0.000000 19011 +companioneast 0 1 6.957497 0.000000 19012 +bunnycyberweenietcl 0 1 6.957497 0.000000 19013 +hellbeast 0 1 6.957497 0.000000 19014 +drug 0 1 6.957497 0.000000 19015 +cosmo 0 1 6.957497 0.000000 19016 +irrit 0 1 6.957497 0.000000 19017 +scatolog 0 1 6.957497 0.000000 19018 +pervert 0 1 6.957497 0.000000 19019 +etymolog 0 1 6.957497 0.000000 19020 +phat 0 1 6.957497 0.000000 19021 +gnarli 0 1 6.957497 0.000000 19022 +cybermuffin 0 1 6.957497 0.000000 19023 +erotica 0 1 6.957497 0.000000 19024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..30a362fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +phone 0 175 1.791759 0.000000 45 +click 1 142 1.945910 1.945910 78 +architectur 0 139 1.945910 0.000000 77 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +look 1 107 2.197225 2.197225 115 +version 0 113 2.197225 0.000000 122 +assist 0 112 2.197225 0.000000 113 +place 0 106 2.197225 0.000000 124 +take 1 97 2.302585 2.302585 134 +text 0 98 2.302585 0.000000 133 +pictur 0 89 2.397895 0.000000 160 +homepag 0 93 2.397895 0.000000 148 +sinc 0 90 2.397895 0.000000 159 +educ 0 86 2.484907 0.000000 191 +school 0 84 2.484907 0.000000 188 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +resum 0 79 2.564949 0.000000 217 +dept 1 64 2.772589 2.772589 291 +januari 0 62 2.772589 0.000000 264 +best 1 59 2.833213 2.833213 299 +march 0 61 2.833213 0.000000 295 +suggest 0 53 2.944439 0.000000 331 +visitor 0 49 3.044522 0.000000 371 +could 0 46 3.091042 0.000000 383 +compani 0 41 3.218876 0.000000 423 +china 0 37 3.332205 0.000000 487 +thank 0 23 3.806662 0.000000 721 +self 0 22 3.850148 0.000000 761 +alumni 1 21 3.912023 3.912023 807 +miller 0 17 4.110874 0.000000 949 +side 0 15 4.248495 0.000000 1022 +promot 0 12 4.465908 0.000000 1235 +surf 0 11 4.553877 0.000000 1301 +america 0 11 4.553877 0.000000 1370 +paradyn 1 9 4.753590 4.753590 1614 +charg 0 9 4.753590 0.000000 1582 +port 0 8 4.875197 0.000000 1766 +hack 0 7 5.010635 0.000000 1950 +iowa 0 7 5.010635 0.000000 1971 +onto 0 6 5.164786 0.000000 2089 +barton 0 5 5.347108 0.000000 2371 +girlfriend 0 5 5.347108 0.000000 2579 +ignor 0 5 5.347108 0.000000 2288 +ling 1 4 5.568345 5.568345 3045 +hpux 0 3 5.857933 0.000000 3780 +temporarili 0 3 5.857933 0.000000 3692 +wuhan 1 2 6.263398 6.263398 5589 +sheboygan 0 2 6.263398 0.000000 6198 +shameless 0 2 6.263398 0.000000 6146 +chinaand 0 2 6.263398 0.000000 5151 +officem 0 2 6.263398 0.000000 6092 +marcelo 0 2 6.263398 0.000000 6199 +infom 0 2 6.263398 0.000000 5425 +bother 0 2 6.263398 0.000000 6164 +zheng 1 1 6.957497 6.957497 19025 +lzheng 0 1 6.957497 0.000000 19026 +boss 0 1 6.957497 0.000000 19027 +prese 0 1 6.957497 0.000000 19028 +winsconsin 0 1 6.957497 0.000000 19029 +goncalv 0 1 6.957497 0.000000 19030 +hereif 0 1 6.957497 0.000000 19031 +sthe 0 1 6.957497 0.000000 19032 +schoolssend 0 1 6.957497 0.000000 19033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..b90e30f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +cours 0 273 1.098612 0.000000 15 +email 0 220 1.386294 0.000000 29 +gener 0 220 1.386294 0.000000 27 +wisc 0 242 1.386294 0.000000 33 +like 1 132 1.945910 1.945910 81 +look 1 107 2.197225 2.197225 115 +final 0 116 2.197225 0.000000 108 +need 0 98 2.302585 0.000000 135 +start 0 83 2.484907 0.000000 173 +know 1 80 2.564949 2.564949 198 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +plai 0 60 2.833213 0.000000 307 +advisor 0 51 2.995732 0.000000 355 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +around 0 43 3.178054 0.000000 415 +photo 0 31 3.496508 0.000000 561 +turn 0 29 3.583519 0.000000 586 +team 0 27 3.637586 0.000000 625 +dai 0 22 3.850148 0.000000 753 +later 0 15 4.248495 0.000000 1043 +america 0 11 4.553877 0.000000 1370 +hello 0 10 4.653960 0.000000 1407 +sundai 0 10 4.653960 0.000000 1387 +said 0 9 4.753590 0.000000 1571 +round 0 8 4.875197 0.000000 1769 +monei 0 7 5.010635 0.000000 1934 +golf 1 6 5.164786 5.164786 2178 +leagu 0 4 5.568345 0.000000 3040 +passion 0 3 5.857933 0.000000 3633 +manuvir 1 1 6.957497 6.957497 19034 +pagemanuvir 0 1 6.957497 0.000000 19035 +dasnow 0 1 6.957497 0.000000 19036 +andwhat 0 1 6.957497 0.000000 19037 +feelfre 0 1 6.957497 0.000000 19038 +somethingsend 0 1 6.957497 0.000000 19039 +anact 0 1 6.957497 0.000000 19040 +manuvirwhat 0 1 6.957497 0.000000 19041 +thisto 0 1 6.957497 0.000000 19042 +theorigin 0 1 6.957497 0.000000 19043 +consin 0 1 6.957497 0.000000 19044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..16318d16 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,395 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +fall 0 181 1.609438 0.000000 40 +wisconsin 2 169 1.791759 3.583518 54 +parallel 2 169 1.791759 3.583518 60 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +algorithm 0 162 1.791759 0.000000 57 +architectur 2 139 1.945910 3.891820 77 +perform 2 143 1.945910 3.891820 74 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +like 1 132 1.945910 1.945910 81 +first 1 140 1.945910 1.945910 71 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +click 0 142 1.945910 0.000000 78 +confer 1 126 2.079442 2.079442 100 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +technolog 0 131 2.079442 0.000000 102 +report 0 131 2.079442 0.000000 92 +intern 2 108 2.197225 4.394450 128 +teach 0 108 2.197225 0.000000 112 +world 0 115 2.197225 0.000000 126 +topic 0 114 2.197225 0.000000 110 +final 0 116 2.197225 0.000000 108 +specif 0 106 2.197225 0.000000 106 +assist 0 112 2.197225 0.000000 113 +memori 2 101 2.302585 4.605170 139 +advanc 1 99 2.302585 2.302585 130 +part 0 98 2.302585 0.000000 129 +user 0 104 2.302585 0.000000 137 +techniqu 0 99 2.302585 0.000000 138 +associ 1 93 2.397895 2.397895 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +search 0 95 2.397895 0.000000 155 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +educ 0 86 2.484907 0.000000 191 +wide 0 84 2.484907 0.000000 185 +stuff 0 87 2.484907 0.000000 171 +west 0 83 2.484907 0.000000 192 +librari 0 87 2.484907 0.000000 181 +chang 0 82 2.484907 0.000000 163 +journal 0 83 2.484907 0.000000 183 +help 0 83 2.484907 0.000000 175 +june 2 79 2.564949 5.129898 214 +interfac 1 79 2.564949 2.564949 209 +mondai 0 77 2.564949 0.000000 206 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +complet 0 77 2.564949 0.000000 208 +decemb 0 80 2.564949 0.000000 215 +david 2 71 2.639057 5.278114 232 +symposium 2 72 2.639057 5.278114 238 +onlin 0 75 2.639057 0.000000 223 +name 0 72 2.639057 0.000000 220 +workshop 0 71 2.639057 0.000000 239 +effici 0 73 2.639057 0.000000 233 +solv 0 73 2.639057 0.000000 234 +august 1 66 2.708050 2.708050 257 +simul 1 66 2.708050 2.708050 255 +java 0 70 2.708050 0.000000 248 +differ 0 66 2.708050 0.000000 253 +order 0 69 2.708050 0.000000 249 +wednesdai 0 64 2.772589 0.000000 261 +organ 0 65 2.772589 0.000000 265 +import 0 65 2.772589 0.000000 282 +prof 0 64 2.772589 0.000000 273 +improv 0 62 2.772589 0.000000 289 +evalu 0 64 2.772589 0.000000 266 +new 0 64 2.772589 0.000000 262 +virtual 0 62 2.772589 0.000000 285 +abstract 0 62 2.772589 0.000000 276 +share 2 59 2.833213 5.666426 304 +juli 1 60 2.833213 2.833213 305 +content 1 59 2.833213 2.833213 302 +march 0 61 2.833213 0.000000 295 +space 1 57 2.890372 2.890372 310 +think 0 57 2.890372 0.000000 314 +index 0 56 2.890372 0.000000 309 +sampl 1 53 2.944439 2.944439 339 +talk 0 53 2.944439 0.000000 336 +februari 0 54 2.944439 0.000000 328 +hardwar 2 51 2.995732 5.991464 350 +tabl 1 51 2.995732 2.995732 346 +much 0 52 2.995732 0.000000 349 +investig 0 51 2.995732 0.000000 353 +give 0 50 3.044522 0.000000 359 +appoint 0 49 3.044522 0.000000 358 +frequent 0 49 3.044522 0.000000 367 +california 1 46 3.091042 3.091042 388 +effect 0 46 3.091042 0.000000 385 +mark 3 44 3.135494 9.406482 403 +protocol 1 45 3.135494 3.135494 407 +execut 0 45 3.135494 0.000000 404 +directori 0 45 3.135494 0.000000 396 +show 0 43 3.178054 0.000000 417 +mechan 0 43 3.178054 0.000000 416 +cach 2 41 3.218876 6.437752 432 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +transact 1 39 3.258097 3.258097 438 +programm 0 39 3.258097 0.000000 445 +streetmadison 0 38 3.295837 0.000000 474 +industri 0 38 3.295837 0.000000 464 +prototyp 0 38 3.295837 0.000000 463 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +sciencesunivers 0 37 3.332205 0.000000 486 +cost 0 37 3.332205 0.000000 480 +formal 0 37 3.332205 0.000000 478 +multi 1 36 3.367296 3.367296 493 +jame 2 35 3.401197 6.802394 507 +bibliographi 0 34 3.401197 0.000000 518 +award 0 34 3.401197 0.000000 523 +toler 0 33 3.433987 0.000000 533 +john 0 33 3.433987 0.000000 532 +fault 0 32 3.465736 0.000000 547 +extend 0 32 3.465736 0.000000 539 +richard 1 31 3.496508 3.496508 559 +often 0 31 3.496508 0.000000 551 +option 0 30 3.555348 0.000000 575 +robert 0 30 3.555348 0.000000 567 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +pass 0 28 3.610918 0.000000 611 +propos 0 28 3.610918 0.000000 602 +scale 0 28 3.610918 0.000000 613 +determin 0 27 3.637586 0.000000 630 +consist 1 26 3.688879 3.688879 651 +berkelei 1 26 3.688879 3.688879 657 +experiment 1 26 3.688879 3.688879 645 +enabl 0 26 3.688879 0.000000 655 +revis 0 26 3.688879 0.000000 640 +detect 0 26 3.688879 0.000000 646 +hill 3 25 3.737670 11.213010 670 +supercomput 1 25 3.737670 3.737670 681 +trace 1 25 3.737670 3.737670 677 +scalabl 0 24 3.761200 0.000000 705 +higher 0 24 3.761200 0.000000 690 +proof 0 23 3.806662 0.000000 720 +size 0 23 3.806662 0.000000 713 +highli 0 23 3.806662 0.000000 725 +cooper 1 22 3.850148 3.850148 757 +sequenti 0 22 3.850148 0.000000 745 +hierarchi 0 22 3.850148 0.000000 744 +annot 0 21 3.912023 0.000000 775 +department 0 20 3.951244 0.000000 839 +fine 0 20 3.951244 0.000000 822 +smith 0 20 3.951244 0.000000 820 +scheme 0 20 3.951244 0.000000 818 +benchmark 1 19 4.007333 4.007333 859 +comparison 1 19 4.007333 4.007333 863 +miss 0 19 4.007333 0.000000 866 +definit 0 19 4.007333 0.000000 864 +wind 1 18 4.060443 4.060443 908 +partial 0 18 4.060443 0.000000 900 +less 0 18 4.060443 0.000000 892 +four 0 18 4.060443 0.000000 905 +steven 1 17 4.110874 4.110874 953 +seek 0 17 4.110874 0.000000 954 +asplo 0 17 4.110874 0.000000 948 +miller 0 17 4.110874 0.000000 949 +estim 0 17 4.110874 0.000000 930 +transfer 0 16 4.174387 0.000000 967 +young 0 16 4.174387 0.000000 991 +eduphon 0 15 4.248495 0.000000 1060 +massiv 0 15 4.248495 0.000000 1026 +hybrid 0 15 4.248495 0.000000 1057 +micro 0 15 4.248495 0.000000 1031 +coher 1 14 4.317488 4.317488 1109 +manner 0 14 4.317488 0.000000 1074 +convent 0 14 4.317488 0.000000 1072 +rank 0 14 4.317488 0.000000 1063 +sigmetr 1 13 4.382027 4.382027 1173 +suit 1 13 4.382027 4.382027 1129 +translat 1 13 4.382027 4.382027 1164 +weak 1 13 4.382027 4.382027 1159 +alan 0 13 4.382027 0.000000 1146 +employ 1 12 4.465908 4.465908 1291 +target 0 12 4.465908 0.000000 1282 +buffer 0 12 4.465908 0.000000 1211 +gupta 0 12 4.465908 0.000000 1241 +mari 0 12 4.465908 0.000000 1266 +wood 2 11 4.553877 9.107754 1355 +isca 1 11 4.553877 4.553877 1354 +rice 1 11 4.553877 4.553877 1336 +michigan 0 11 4.553877 0.000000 1368 +node 0 11 4.553877 0.000000 1326 +transpar 0 11 4.553877 0.000000 1325 +keyword 0 11 4.553877 0.000000 1356 +catalog 0 10 4.653960 0.000000 1431 +grain 0 10 4.653960 0.000000 1448 +princip 0 10 4.653960 0.000000 1397 +sosp 0 10 4.653960 0.000000 1416 +placement 0 10 4.653960 0.000000 1420 +stack 0 10 4.653960 0.000000 1389 +laru 2 9 4.753590 9.507180 1560 +tunnel 1 9 4.753590 4.753590 1615 +patterson 1 9 4.753590 4.753590 1554 +sound 0 9 4.753590 0.000000 1605 +frank 0 9 4.753590 0.000000 1568 +jeffrei 0 9 4.753590 0.000000 1612 +kong 0 9 4.753590 0.000000 1602 +vernon 0 9 4.753590 0.000000 1556 +spec 1 8 4.875197 4.875197 1640 +lewi 1 8 4.875197 4.875197 1700 +secretari 0 8 4.875197 0.000000 1775 +uniprocessor 0 8 4.875197 0.000000 1696 +quantit 0 8 4.875197 0.000000 1654 +presidenti 0 8 4.875197 0.000000 1737 +irregular 0 8 4.875197 0.000000 1768 +joel 0 8 4.875197 0.000000 1698 +unifi 0 8 4.875197 0.000000 1774 +roger 1 7 5.010635 5.010635 1892 +secondari 0 7 5.010635 0.000000 1884 +ann 1 6 5.164786 5.164786 2065 +consensu 0 6 5.164786 0.000000 2080 +unpublish 0 6 5.164786 0.000000 2226 +chandra 0 6 5.164786 0.000000 2091 +microsystem 0 6 5.164786 0.000000 2160 +mukherje 1 5 5.347108 5.347108 2586 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +tempest 1 5 5.347108 5.347108 2548 +engineeringat 0 5 5.347108 0.000000 2561 +advic 0 5 5.347108 0.000000 2509 +educurr 0 5 5.347108 0.000000 2504 +summarymi 0 5 5.347108 0.000000 2580 +middl 0 5 5.347108 0.000000 2372 +hypothet 0 5 5.347108 0.000000 2474 +optimist 0 5 5.347108 0.000000 2501 +chemic 0 5 5.347108 0.000000 2552 +ioanni 0 5 5.347108 0.000000 2553 +forprogram 0 5 5.347108 0.000000 2361 +toc 0 5 5.347108 0.000000 2562 +andrea 0 5 5.347108 0.000000 2375 +hennessi 0 5 5.347108 0.000000 2289 +race 0 5 5.347108 0.000000 2417 +barton 0 5 5.347108 0.000000 2371 +talluri 1 4 5.568345 5.568345 2820 +alvin 1 4 5.568345 5.568345 3084 +markhil 1 4 5.568345 5.568345 2819 +wart 1 4 5.568345 5.568345 2987 +crai 1 4 5.568345 5.568345 3012 +emphas 0 4 5.568345 0.000000 2672 +languagesand 0 4 5.568345 0.000000 3071 +align 0 4 5.568345 0.000000 2863 +manuscript 0 4 5.568345 0.000000 2750 +sabbat 0 4 5.568345 0.000000 2824 +sharma 0 4 5.568345 0.000000 2752 +ppopp 0 4 5.568345 0.000000 2774 +schoina 0 4 5.568345 0.000000 3085 +medium 0 4 5.568345 0.000000 2834 +implic 0 4 5.568345 0.000000 2696 +satish 0 4 5.568345 0.000000 2833 +anoop 0 4 5.568345 0.000000 2770 +ratio 0 4 5.568345 0.000000 2942 +shubhendu 1 3 5.857933 5.857933 4028 +madhusudhan 1 3 5.857933 5.857933 4021 +superpag 1 3 5.857933 5.857933 3978 +megabyt 1 3 5.857933 5.857933 3732 +fingerson 0 3 5.857933 0.000000 4018 +thea 0 3 5.857933 0.000000 4019 +sklenar 0 3 5.857933 0.000000 4020 +programmingc 0 3 5.857933 0.000000 3232 +saltz 0 3 5.857933 0.000000 3385 +surpass 0 3 5.857933 0.000000 3247 +tradeoff 0 3 5.857933 0.000000 3387 +adv 2 2 6.263398 12.526796 4540 +andelectr 0 2 6.263398 0.000000 6200 +wisconsint 0 2 6.263398 0.000000 6155 +teachingfal 0 2 6.263398 0.000000 5532 +ifal 0 2 6.263398 0.000000 4776 +architecturec 0 2 6.263398 0.000000 6127 +sustain 0 2 6.263398 0.000000 6201 +mywork 0 2 6.263398 0.000000 5800 +projectwith 0 2 6.263398 0.000000 4986 +uniformli 0 2 6.263398 0.000000 6202 +todevelop 0 2 6.263398 0.000000 5448 +aredevelop 0 2 6.263398 0.000000 4930 +similarto 0 2 6.263398 0.000000 6074 +aeronaut 0 2 6.263398 0.000000 5958 +anddavid 0 2 6.263398 0.000000 6126 +sashikanth 0 2 6.263398 0.000000 6122 +chandrasekaran 0 2 6.263398 0.000000 6121 +shamik 0 2 6.263398 0.000000 6195 +memorymultiprocessor 0 2 6.263398 0.000000 4529 +dionisio 0 2 6.263398 0.000000 6203 +pnevmatikato 0 2 6.263398 0.000000 6204 +subbarao 0 2 6.263398 0.000000 6205 +shing 0 2 6.263398 0.000000 5146 +sarita 2 1 6.957497 13.914994 19045 +kessler 1 1 6.957497 6.957497 19046 +subblock 1 1 6.957497 6.957497 19047 +sampler 1 1 6.957497 6.957497 19048 +madhu 1 1 6.957497 6.957497 19049 +tlb 1 1 6.957497 6.957497 19050 +pagemark 0 1 6.957497 0.000000 19051 +andsummari 0 1 6.957497 0.000000 19052 +graduateslink 0 1 6.957497 0.000000 19053 +oralpresent 0 1 6.957497 0.000000 19054 +forcach 0 1 6.957497 0.000000 19055 +usamarkhil 0 1 6.957497 0.000000 19056 +icatalog 0 1 6.957497 0.000000 19057 +teachc 0 1 6.957497 0.000000 19058 +iieduc 0 1 6.957497 0.000000 19059 +evaluationresearch 0 1 6.957497 0.000000 19060 +multiprocessorsand 0 1 6.957497 0.000000 19061 +evaluationtechniqu 0 1 6.957497 0.000000 19062 +windtunnel 0 1 6.957497 0.000000 19063 +manystud 0 1 6.957497 0.000000 19064 +computerswil 0 1 6.957497 0.000000 19065 +levelparallel 0 1 6.957497 0.000000 19066 +inwhich 0 1 6.957497 0.000000 19067 +recentlypropos 0 1 6.957497 0.000000 19068 +aclust 0 1 6.957497 0.000000 19069 +toolsto 0 1 6.957497 0.000000 19070 +cull 0 1 6.957497 0.000000 19071 +designairplan 0 1 6.957497 0.000000 19072 +talluritarget 0 1 6.957497 0.000000 19073 +lookasid 0 1 6.957497 0.000000 19074 +superpagesand 0 1 6.957497 0.000000 19075 +asplosandsosppap 0 1 6.957497 0.000000 19076 +papersth 0 1 6.957497 0.000000 19077 +bidirect 0 1 6.957497 0.000000 19078 +pad 0 1 6.957497 0.000000 19079 +yousef 0 1 6.957497 0.000000 19080 +khalidi 0 1 6.957497 0.000000 19081 +microstructur 0 1 6.957497 0.000000 19082 +electrostat 0 1 6.957497 0.000000 19083 +traenkl 0 1 6.957497 0.000000 19084 +sangta 0 1 6.957497 0.000000 19085 +tpd 0 1 6.957497 0.000000 19086 +farid 0 1 6.957497 0.000000 19087 +pour 0 1 6.957497 0.000000 19088 +palacharla 0 1 6.957497 0.000000 19089 +kourosh 0 1 6.957497 0.000000 19090 +gharachorloo 0 1 6.957497 0.000000 19091 +netzer 0 1 6.957497 0.000000 19092 +vikram 0 1 6.957497 0.000000 19093 +kessleracm 0 1 6.957497 0.000000 19094 +graduatesmadhusudhan 0 1 6.957497 0.000000 19095 +updatedw 0 1 6.957497 0.000000 19096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..25031e1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +usaphon 0 9 4.753590 0.000000 1600 +assistantdepart 0 8 4.875197 0.000000 1784 +zaharioudaki 0 2 6.263398 0.000000 6119 +marko 1 1 6.957497 6.957497 19097 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..e0d2494d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +list 0 201 1.609438 0.000000 39 +section 1 94 2.397895 2.397895 149 +michael 1 35 3.401197 3.401197 514 +birk 1 4 5.568345 5.568345 2791 +mbirk 0 3 5.857933 0.000000 3501 +alltraxx 0 1 6.957497 0.000000 19098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..90b5789a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +implement 0 152 1.791759 0.000000 52 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +data 0 170 1.791759 0.000000 49 +object 0 138 1.945910 0.000000 79 +hall 0 146 1.945910 0.000000 65 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +manag 0 114 2.197225 0.000000 125 +techniqu 0 99 2.302585 0.000000 138 +proceed 1 93 2.397895 2.397895 152 +ieee 0 86 2.484907 0.000000 190 +orient 0 80 2.564949 0.000000 205 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +simul 0 66 2.708050 0.000000 255 +march 0 61 2.833213 0.000000 295 +space 0 57 2.890372 0.000000 310 +pointer 0 48 3.044522 0.000000 368 +mark 1 44 3.135494 3.135494 403 +michael 0 35 3.401197 0.000000 514 +proc 0 26 3.688879 0.000000 649 +sigmod 1 19 4.007333 4.007333 877 +white 0 17 4.110874 0.000000 951 +dewitt 0 12 4.465908 0.000000 1270 +persist 0 11 4.553877 0.000000 1367 +franklin 0 10 4.653960 0.000000 1436 +naughton 0 10 4.653960 0.000000 1450 +solomon 1 8 4.875197 4.875197 1716 +carei 1 8 4.875197 4.875197 1781 +tsatalo 0 5 5.347108 0.000000 2581 +mcauliff 1 4 5.568345 5.568345 3083 +marvin 1 4 5.568345 5.568345 2806 +zwill 0 4 5.568345 0.000000 3076 +schuh 0 3 5.857933 0.000000 4014 +swizzl 0 3 5.857933 0.000000 3883 +shoringup 0 1 6.957497 0.000000 19099 +atrac 0 1 6.957497 0.000000 19100 +towardseffect 0 1 6.957497 0.000000 19101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..b99139b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 0 380 0.693147 0.000000 9 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +paper 0 205 1.609438 0.000000 38 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +read 0 154 1.791759 0.000000 47 +address 0 170 1.791759 0.000000 62 +file 0 132 1.945910 0.000000 70 +introduct 0 126 2.079442 0.000000 87 +high 0 130 2.079442 0.000000 101 +schedul 0 119 2.079442 0.000000 85 +analysi 0 124 2.079442 0.000000 98 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +peopl 0 96 2.302585 0.000000 132 +school 0 84 2.484907 0.000000 188 +level 0 87 2.484907 0.000000 180 +appear 0 78 2.564949 0.000000 210 +know 0 80 2.564949 0.000000 198 +html 1 75 2.639057 2.639057 235 +symposium 0 72 2.639057 0.000000 238 +meet 0 72 2.639057 0.000000 229 +dept 0 64 2.772589 0.000000 291 +automat 0 61 2.833213 0.000000 306 +think 1 57 2.890372 2.890372 314 +point 0 58 2.890372 0.000000 319 +variou 0 56 2.890372 0.000000 317 +talk 0 53 2.944439 0.000000 336 +much 0 52 2.995732 0.000000 349 +finger 0 52 2.995732 0.000000 354 +pointer 1 48 3.044522 3.044522 368 +cool 0 49 3.044522 0.000000 374 +principl 0 48 3.044522 0.000000 357 +term 0 43 3.178054 0.000000 411 +fast 1 42 3.218876 3.218876 429 +movi 0 40 3.258097 0.000000 459 +littl 0 39 3.258097 0.000000 454 +submit 0 39 3.258097 0.000000 440 +realli 0 40 3.258097 0.000000 444 +paul 0 38 3.295837 0.000000 471 +abl 0 30 3.555348 0.000000 566 +hope 0 28 3.610918 0.000000 610 +never 0 25 3.737670 0.000000 671 +accur 0 25 3.737670 0.000000 680 +flow 0 24 3.761200 0.000000 700 +togeth 0 23 3.806662 0.000000 714 +try 0 22 3.850148 0.000000 764 +hous 1 21 3.912023 3.912023 801 +watch 0 21 3.912023 0.000000 789 +wrote 0 20 3.951244 0.000000 830 +mostli 0 19 4.007333 0.000000 869 +miss 0 19 4.007333 0.000000 866 +lot 0 18 4.060443 0.000000 889 +previous 0 17 4.110874 0.000000 923 +brown 0 16 4.174387 0.000000 977 +todd 0 15 4.248495 0.000000 1051 +doesn 0 15 4.248495 0.000000 1055 +susan 0 15 4.248495 0.000000 1050 +believ 0 13 4.382027 0.000000 1187 +recurs 0 13 4.382027 0.000000 1127 +step 0 13 4.382027 0.000000 1138 +emac 0 13 4.382027 0.000000 1143 +menu 0 13 4.382027 0.000000 1156 +jonathan 0 13 4.382027 0.000000 1174 +submiss 0 11 4.553877 0.000000 1298 +mode 0 9 4.753590 0.000000 1492 +marc 1 8 4.875197 4.875197 1680 +shapiro 1 8 4.875197 4.875197 1686 +analys 0 8 4.875197 0.000000 1666 +pldi 0 8 4.875197 0.000000 1704 +chan 0 7 5.010635 0.000000 1876 +elementari 0 7 5.010635 0.000000 1825 +interrupt 0 7 5.010635 0.000000 1793 +tag 0 7 5.010635 0.000000 1821 +lawrenc 0 7 5.010635 0.000000 1908 +recov 0 6 5.164786 0.000000 2235 +goldstein 0 6 5.164786 0.000000 2168 +elain 0 5 5.347108 0.000000 2496 +hyper 0 5 5.347108 0.000000 2435 +horwitz 0 5 5.347108 0.000000 2411 +fear 0 4 5.568345 0.000000 2911 +backward 0 4 5.568345 0.000000 2638 +popl 0 4 5.568345 0.000000 3068 +insensit 0 4 5.568345 0.000000 2716 +hoar 0 3 5.857933 0.000000 3875 +obsess 0 2 6.263398 0.000000 5924 +disappear 0 2 6.263398 0.000000 4748 +accid 0 2 6.263398 0.000000 5961 +softwarei 0 2 6.263398 0.000000 4960 +tautolog 1 1 6.957497 6.957497 19102 +fond 0 1 6.957497 0.000000 19103 +repuls 0 1 6.957497 0.000000 19104 +ponder 0 1 6.957497 0.000000 19105 +jacki 0 1 6.957497 0.000000 19106 +dimasi 0 1 6.957497 0.000000 19107 +twisti 0 1 6.957497 0.000000 19108 +amanda 0 1 6.957497 0.000000 19109 +peet 0 1 6.957497 0.000000 19110 +retreather 0 1 6.957497 0.000000 19111 +thepul 0 1 6.957497 0.000000 19112 +cobbl 0 1 6.957497 0.000000 19113 +nowinclud 0 1 6.957497 0.000000 19114 +shapiroand 0 1 6.957497 0.000000 19115 +marion 0 1 6.957497 0.000000 19116 +ferguson 0 1 6.957497 0.000000 19117 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..1bbe09b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +modifi 0 178 1.609438 0.000000 35 +oper 0 180 1.609438 0.000000 34 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +introduct 0 126 2.079442 0.000000 87 +build 0 85 2.484907 0.000000 184 +august 0 66 2.708050 0.000000 257 +appoint 0 49 3.044522 0.000000 358 +mellen 1 2 6.263398 6.263398 4708 +mellencamp 0 2 6.263398 0.000000 4707 +pagerob 0 1 6.957497 0.000000 19118 +minimalist 0 1 6.957497 0.000000 19119 +taship 0 1 6.957497 0.000000 19120 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..a8069d88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +recent 0 167 1.791759 0.000000 58 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +person 1 111 2.197225 2.197225 117 +teach 0 108 2.197225 0.000000 112 +make 0 111 2.197225 0.000000 120 +need 0 98 2.302585 0.000000 135 +section 0 94 2.397895 0.000000 149 +info 0 85 2.484907 0.000000 176 +west 0 83 2.484907 0.000000 192 +second 0 81 2.484907 0.000000 166 +want 0 79 2.564949 0.000000 199 +david 1 71 2.639057 2.639057 232 +plan 0 65 2.772589 0.000000 272 +back 0 60 2.833213 0.000000 297 +semest 0 58 2.890372 0.000000 312 +major 0 56 2.890372 0.000000 315 +direct 0 57 2.890372 0.000000 316 +still 0 50 3.044522 0.000000 362 +give 0 50 3.044522 0.000000 359 +even 0 45 3.135494 0.000000 393 +map 0 39 3.258097 0.000000 452 +often 0 31 3.496508 0.000000 551 +great 0 27 3.637586 0.000000 626 +determin 0 27 3.637586 0.000000 630 +spent 0 25 3.737670 0.000000 676 +eric 0 19 4.007333 0.000000 870 +miss 0 19 4.007333 0.000000 866 +coupl 0 17 4.110874 0.000000 939 +brother 0 13 4.382027 0.000000 1189 +sister 0 9 4.753590 0.000000 1524 +undergrad 0 9 4.753590 0.000000 1589 +soccer 0 8 4.875197 0.000000 1752 +chanc 0 7 5.010635 0.000000 1960 +chess 0 5 5.347108 0.000000 2486 +rewrit 0 5 5.347108 0.000000 2367 +rep 0 4 5.568345 0.000000 3087 +somedai 1 3 5.857933 5.857933 3919 +michel 0 3 5.857933 0.000000 3791 +distract 0 3 5.857933 0.000000 3945 +melski 1 2 6.263398 6.263398 4780 +pagedavid 0 2 6.263398 0.000000 5114 +mill 0 2 6.263398 0.000000 6193 +awesom 0 2 6.263398 0.000000 6167 +russia 0 2 6.263398 0.000000 5756 +hasti 0 2 6.263398 0.000000 6173 +steal 0 2 6.263398 0.000000 5485 +russian 1 1 6.957497 6.957497 19121 +melskicurr 0 1 6.957497 0.000000 19122 +statisticsmadison 0 1 6.957497 0.000000 19123 +permen 0 1 6.957497 0.000000 19124 +ivesmarshfield 0 1 6.957497 0.000000 19125 +kasei 0 1 6.957497 0.000000 19126 +myexact 0 1 6.957497 0.000000 19127 +studiesher 0 1 6.957497 0.000000 19128 +semesterof 0 1 6.957497 0.000000 19129 +beenbik 0 1 6.957497 0.000000 19130 +numerousbook 0 1 6.957497 0.000000 19131 +tomapquest 0 1 6.957497 0.000000 19132 +alot 0 1 6.957497 0.000000 19133 +marshfield 0 1 6.957497 0.000000 19134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..0800ab59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 0 329 1.098612 0.000000 16 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +design 0 213 1.386294 0.000000 25 +mail 0 238 1.386294 0.000000 22 +email 0 220 1.386294 0.000000 29 +oper 1 180 1.609438 1.609438 34 +class 0 199 1.609438 0.000000 37 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +develop 0 174 1.791759 0.000000 53 +parallel 0 169 1.791759 0.000000 60 +applic 0 170 1.791759 0.000000 56 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +construct 0 139 1.945910 0.000000 82 +first 0 140 1.945910 0.000000 71 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +postscript 0 131 2.079442 0.000000 90 +teach 0 108 2.197225 0.000000 112 +specif 0 106 2.197225 0.000000 106 +mathemat 0 108 2.197225 0.000000 123 +person 0 111 2.197225 0.000000 117 +world 0 115 2.197225 0.000000 126 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +mani 1 92 2.397895 2.397895 150 +section 0 94 2.397895 0.000000 149 +present 0 91 2.397895 0.000000 145 +imag 0 91 2.397895 0.000000 161 +sinc 0 90 2.397895 0.000000 159 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +thing 0 84 2.484907 0.000000 189 +ieee 0 86 2.484907 0.000000 190 +school 0 84 2.484907 0.000000 188 +resourc 0 81 2.484907 0.000000 172 +educ 0 86 2.484907 0.000000 191 +build 0 85 2.484907 0.000000 184 +optim 0 79 2.564949 0.000000 197 +know 0 80 2.564949 0.000000 198 +dynam 0 76 2.564949 0.000000 194 +tuesdai 0 73 2.639057 0.000000 219 +addit 0 74 2.639057 0.000000 228 +nation 0 74 2.639057 0.000000 240 +symposium 0 72 2.639057 0.000000 238 +java 1 70 2.708050 2.708050 248 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +receiv 0 66 2.708050 0.000000 244 +evalu 1 64 2.772589 2.772589 266 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +best 0 59 2.833213 0.000000 299 +simpl 0 60 2.833213 0.000000 298 +direct 1 57 2.890372 2.890372 316 +explor 1 58 2.890372 2.890372 324 +summer 0 56 2.890372 0.000000 311 +thesi 0 57 2.890372 0.000000 327 +space 0 57 2.890372 0.000000 310 +found 0 53 2.944439 0.000000 337 +scientif 0 53 2.944439 0.000000 341 +hardwar 0 51 2.995732 0.000000 350 +profession 0 51 2.995732 0.000000 345 +standard 0 48 3.044522 0.000000 365 +right 0 48 3.044522 0.000000 363 +mark 1 44 3.135494 3.135494 403 +algebra 0 45 3.135494 0.000000 394 +even 0 45 3.135494 0.000000 393 +show 0 43 3.178054 0.000000 417 +fast 1 42 3.218876 3.218876 429 +compani 0 41 3.218876 0.000000 423 +combin 0 42 3.218876 0.000000 421 +live 1 40 3.258097 3.258097 451 +streetmadison 0 38 3.295837 0.000000 474 +open 0 38 3.295837 0.000000 469 +game 1 36 3.367296 3.367296 498 +multi 0 36 3.367296 0.000000 493 +next 1 34 3.401197 3.401197 517 +jame 0 35 3.401197 0.000000 507 +eduoffic 0 33 3.433987 0.000000 531 +quot 1 29 3.583519 3.583519 582 +team 0 27 3.637586 0.000000 625 +detect 1 26 3.688879 3.688879 646 +rule 1 26 3.688879 3.688879 638 +challeng 0 26 3.688879 0.000000 653 +hill 1 25 3.737670 3.737670 670 +departmentunivers 0 24 3.761200 0.000000 711 +mobil 0 23 3.806662 0.000000 730 +honor 0 23 3.806662 0.000000 729 +head 0 23 3.806662 0.000000 732 +serv 0 22 3.850148 0.000000 758 +martin 1 21 3.912023 3.912023 794 +programminglanguag 0 21 3.912023 0.000000 782 +divis 0 21 3.912023 0.000000 803 +wrote 0 20 3.951244 0.000000 830 +wonder 0 20 3.951244 0.000000 815 +element 0 18 4.060443 0.000000 895 +ultim 1 17 4.110874 4.110874 943 +medic 0 17 4.110874 0.000000 958 +senior 0 14 4.317488 0.000000 1120 +train 0 14 4.317488 0.000000 1066 +charl 1 13 4.382027 4.382027 1149 +everyon 1 13 4.382027 4.382027 1148 +land 1 12 4.465908 4.465908 1273 +promot 0 12 4.465908 0.000000 1235 +basketbal 0 12 4.465908 0.000000 1289 +player 1 11 4.553877 4.553877 1371 +transmiss 0 9 4.753590 0.000000 1588 +discov 0 9 4.753590 0.000000 1562 +babylon 0 8 4.875197 0.000000 1731 +footbal 1 7 5.010635 5.010635 1912 +fischer 0 7 5.010635 0.000000 1893 +interestsi 0 7 5.010635 0.000000 1969 +paramet 0 7 5.010635 0.000000 1796 +dedic 0 7 5.010635 0.000000 1843 +ethic 0 7 5.010635 0.000000 1786 +trade 0 7 5.010635 0.000000 1815 +advis 1 6 5.164786 5.164786 2173 +reconstruct 0 6 5.164786 0.000000 2170 +determinist 0 6 5.164786 0.000000 2034 +pace 0 6 5.164786 0.000000 2011 +minnesota 1 5 5.347108 5.347108 2469 +argonn 0 5 5.347108 0.000000 2461 +nuclear 0 5 5.347108 0.000000 2576 +frisbe 0 5 5.347108 0.000000 2560 +publicationsresearch 1 4 5.568345 5.568345 2876 +chees 0 4 5.568345 0.000000 3090 +fink 1 3 5.857933 5.857933 3425 +assistantcomput 0 3 5.857933 0.000000 4027 +usaemail 0 3 5.857933 0.000000 3722 +sit 0 3 5.857933 0.000000 3953 +informationtechnolog 0 3 5.857933 0.000000 3836 +interchang 0 3 5.857933 0.000000 3893 +myfavorit 0 3 5.857933 0.000000 3852 +armi 0 3 5.857933 0.000000 3562 +milo 1 2 6.263398 6.263398 4781 +spectroscopi 0 2 6.263398 0.000000 6206 +meth 0 2 6.263398 0.000000 5872 +eventhough 0 2 6.263398 0.000000 6158 +conquer 0 2 6.263398 0.000000 5112 +combat 0 2 6.263398 0.000000 5473 +fight 0 2 6.263398 0.000000 5209 +monster 0 2 6.263398 0.000000 6207 +gustavu 1 1 6.957497 6.957497 19135 +adolphu 1 1 6.957497 6.957497 19136 +atlanti 1 1 6.957497 6.957497 19137 +humm 1 1 6.957497 6.957497 19138 +micklich 1 1 6.957497 6.957497 19139 +illicitsubst 1 1 6.957497 6.957497 19140 +neutron 1 1 6.957497 6.957497 19141 +hailperin 1 1 6.957497 6.957497 19142 +pagemilo 0 1 6.957497 0.000000 19143 +byappointmentba 0 1 6.957497 0.000000 19144 +larusteach 0 1 6.957497 0.000000 19145 +beinfluenc 0 1 6.957497 0.000000 19146 +yule 0 1 6.957497 0.000000 19147 +sagalovski 0 1 6.957497 0.000000 19148 +nucl 0 1 6.957497 0.000000 19149 +inst 0 1 6.957497 0.000000 19150 +languageflex 0 1 6.957497 0.000000 19151 +anintern 0 1 6.957497 0.000000 19152 +toadvanc 0 1 6.957497 0.000000 19153 +fosteringth 0 1 6.957497 0.000000 19154 +highestprofession 0 1 6.957497 0.000000 19155 +bignfl 0 1 6.957497 0.000000 19156 +vike 0 1 6.957497 0.000000 19157 +colon 0 1 6.957497 0.000000 19158 +imho 0 1 6.957497 0.000000 19159 +mythic 0 1 6.957497 0.000000 19160 +engaug 0 1 6.957497 0.000000 19161 +wizard 0 1 6.957497 0.000000 19162 +underworld 0 1 6.957497 0.000000 19163 +ofsocc 0 1 6.957497 0.000000 19164 +afrisbe 0 1 6.957497 0.000000 19165 +quarterback 0 1 6.957497 0.000000 19166 +ultimatein 0 1 6.957497 0.000000 19167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..6d665431 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +engin 0 297 1.098612 0.000000 20 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +wisconsin 1 169 1.791759 1.791759 54 +phone 0 175 1.791759 0.000000 45 +algorithm 0 162 1.791759 0.000000 57 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +construct 0 139 1.945910 0.000000 82 +databas 1 122 2.079442 2.079442 86 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +technolog 0 131 2.079442 0.000000 102 +assist 0 112 2.197225 0.000000 113 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +technic 1 100 2.302585 2.302585 140 +proceed 0 93 2.397895 0.000000 152 +octob 0 89 2.397895 0.000000 156 +real 0 93 2.397895 0.000000 144 +center 0 88 2.397895 0.000000 158 +resourc 1 81 2.484907 2.484907 172 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +optim 1 79 2.564949 2.564949 197 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +server 0 76 2.564949 0.000000 204 +free 0 73 2.639057 0.000000 224 +logic 0 71 2.639057 0.000000 230 +multimedia 1 68 2.708050 2.708050 258 +view 0 70 2.708050 0.000000 254 +dept 1 64 2.772589 2.772589 291 +complex 0 64 2.772589 0.000000 269 +abstract 0 62 2.772589 0.000000 276 +juli 0 60 2.833213 0.000000 305 +februari 0 54 2.944439 0.000000 328 +advisor 0 51 2.995732 0.000000 355 +pointer 0 48 3.044522 0.000000 368 +effect 0 46 3.091042 0.000000 385 +continu 0 39 3.258097 0.000000 448 +submit 0 39 3.258097 0.000000 440 +societi 0 40 3.258097 0.000000 456 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +feel 0 37 3.332205 0.000000 483 +multi 0 36 3.367296 0.000000 493 +survei 0 35 3.401197 0.000000 513 +michael 0 35 3.401197 0.000000 514 +queri 1 33 3.433987 3.433987 524 +enhanc 0 26 3.688879 0.000000 644 +sequenti 0 22 3.850148 0.000000 745 +sigmod 1 19 4.007333 4.007333 877 +media 0 19 4.007333 0.000000 861 +dimension 0 18 4.060443 0.000000 909 +stat 0 17 4.110874 0.000000 924 +canada 0 13 4.382027 0.000000 1158 +dbm 0 13 4.382027 0.000000 1136 +probabilist 0 11 4.553877 0.000000 1343 +vldb 0 10 4.653960 0.000000 1470 +candid 0 9 4.753590 0.000000 1606 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +watson 0 8 4.875197 0.000000 1691 +refere 0 7 5.010635 0.000000 1895 +montreal 0 7 5.010635 0.000000 1961 +usaoffic 0 6 5.164786 0.000000 2159 +silberschatz 0 6 5.164786 0.000000 1978 +peek 0 6 5.164786 0.000000 2169 +almaden 0 5 5.347108 0.000000 2511 +informat 0 3 5.857933 0.000000 3839 +mino 1 2 6.263398 6.263398 6208 +garofalaki 1 2 6.263398 6.263398 6209 +patra 1 2 6.263398 6.263398 5537 +ozden 0 2 6.263398 0.000000 5749 +reasearch 0 2 6.263398 0.000000 5538 +hellen 0 2 6.263398 0.000000 6210 +garofalakismino 0 1 6.957497 0.000000 19168 +eduphd 0 1 6.957497 0.000000 19169 +workresearch 0 1 6.957497 0.000000 19170 +theoryeduc 0 1 6.957497 0.000000 19171 +banu 0 1 6.957497 0.000000 19172 +ioannidismor 0 1 6.957497 0.000000 19173 +centerdr 0 1 6.957497 0.000000 19174 +bibliograpi 0 1 6.957497 0.000000 19175 +perpetu 0 1 6.957497 0.000000 19176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..3dd01822 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +wisconsin 0 169 1.791759 0.000000 54 +dayton 0 119 2.079442 0.000000 104 +associ 0 93 2.397895 0.000000 151 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +sciencesunivers 0 37 3.332205 0.000000 486 +paradyn 0 9 4.753590 0.000000 1614 +marcelo 1 2 6.263398 6.263398 6199 +sheboygan 0 2 6.263398 0.000000 6198 +gonalv 1 1 6.957497 6.957497 19177 +mjrg 0 1 6.957497 0.000000 19178 +addresswork 0 1 6.957497 0.000000 19179 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..8ddf78a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +interest 0 384 0.693147 0.000000 11 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +group 0 183 1.609438 0.000000 36 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +network 0 168 1.791759 0.000000 61 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +imag 0 91 2.397895 0.000000 161 +west 0 83 2.484907 0.000000 192 +advisor 0 51 2.995732 0.000000 355 +telephon 0 50 3.044522 0.000000 373 +video 0 44 3.135494 0.000000 405 +vision 0 41 3.218876 0.000000 430 +brian 1 38 3.295837 3.295837 466 +streetmadison 0 38 3.295837 0.000000 474 +compress 0 23 3.806662 0.000000 719 +chuck 0 14 4.317488 0.000000 1108 +bandwidth 0 11 4.553877 0.000000 1365 +morgan 1 9 4.753590 4.753590 1484 +conferenc 1 7 5.010635 5.010635 1857 +studentcomput 0 7 5.010635 0.000000 1963 +morgangradu 0 1 6.957497 0.000000 19180 +dyerresearch 0 1 6.957497 0.000000 19181 +interestsvirtu 0 1 6.957497 0.000000 19182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..24066142 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +gener 0 220 1.386294 0.000000 27 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +address 0 170 1.791759 0.000000 62 +parallel 0 169 1.791759 0.000000 60 +implement 0 152 1.791759 0.000000 52 +algorithm 0 162 1.791759 0.000000 57 +network 0 168 1.791759 0.000000 61 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +click 0 142 1.945910 0.000000 78 +like 0 132 1.945910 0.000000 81 +postscript 1 131 2.079442 2.079442 90 +report 0 131 2.079442 0.000000 92 +compil 0 122 2.079442 0.000000 96 +spring 0 131 2.079442 0.000000 88 +assist 0 112 2.197225 0.000000 113 +send 0 114 2.197225 0.000000 109 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +mani 1 92 2.397895 2.397895 150 +sinc 0 90 2.397895 0.000000 159 +level 0 87 2.484907 0.000000 180 +chang 0 82 2.484907 0.000000 163 +want 0 79 2.564949 0.000000 199 +write 0 72 2.639057 0.000000 222 +meet 0 72 2.639057 0.000000 229 +nation 0 74 2.639057 0.000000 240 +free 0 73 2.639057 0.000000 224 +degre 0 69 2.708050 0.000000 259 +descript 0 64 2.772589 0.000000 271 +visit 0 63 2.772589 0.000000 288 +copi 0 63 2.772589 0.000000 284 +processor 1 54 2.944439 2.944439 335 +sampl 0 53 2.944439 0.000000 339 +talk 0 53 2.944439 0.000000 336 +instruct 0 53 2.944439 0.000000 332 +found 0 53 2.944439 0.000000 337 +local 0 55 2.944439 0.000000 334 +advisor 0 51 2.995732 0.000000 355 +numer 0 49 3.044522 0.000000 369 +futur 0 41 3.218876 0.000000 427 +york 0 41 3.218876 0.000000 435 +howev 0 41 3.218876 0.000000 422 +editor 0 41 3.218876 0.000000 433 +edit 0 42 3.218876 0.000000 418 +slide 0 38 3.295837 0.000000 467 +sciencesunivers 0 37 3.332205 0.000000 486 +download 1 36 3.367296 3.367296 489 +short 0 36 3.367296 0.000000 499 +obtain 0 33 3.433987 0.000000 534 +depend 0 29 3.583519 0.000000 583 +load 0 28 3.610918 0.000000 601 +bookmark 0 26 3.688879 0.000000 639 +compress 0 23 3.806662 0.000000 719 +instal 0 22 3.850148 0.000000 754 +leav 0 21 3.912023 0.000000 772 +vlsi 0 21 3.912023 0.000000 795 +sure 0 20 3.951244 0.000000 813 +excel 0 19 4.007333 0.000000 868 +transfer 0 16 4.174387 0.000000 967 +balanc 0 14 4.317488 0.000000 1112 +brother 0 13 4.382027 0.000000 1189 +wife 0 13 4.382027 0.000000 1196 +resid 0 10 4.653960 0.000000 1461 +poetri 0 9 4.753590 0.000000 1596 +herefor 0 9 4.753590 0.000000 1483 +multiscalar 1 8 4.875197 4.875197 1783 +dictionari 0 8 4.875197 0.000000 1642 +earn 0 7 5.010635 0.000000 1788 +pipelin 0 7 5.010635 0.000000 1830 +greec 1 6 5.164786 5.164786 2208 +peek 0 6 5.164786 0.000000 2169 +andrea 1 5 5.347108 5.347108 2375 +guri 0 5 5.347108 0.000000 2578 +hyper 0 5 5.347108 0.000000 2435 +kestrel 0 4 5.568345 0.000000 2990 +decoupl 0 4 5.568345 0.000000 2898 +mess 0 4 5.568345 0.000000 2886 +specul 1 3 5.857933 5.857933 3951 +crete 1 3 5.857933 5.857933 3773 +greek 1 3 5.857933 5.857933 3595 +uncompress 0 3 5.857933 0.000000 3177 +moshovo 0 2 6.263398 0.000000 6211 +madisonadvisor 0 2 6.263398 0.000000 6212 +instituteof 0 2 6.263398 0.000000 5507 +architecturethat 0 2 6.263398 0.000000 5876 +hellen 0 2 6.263398 0.000000 6210 +font 0 2 6.263398 0.000000 5845 +moshovosresearch 0 1 6.957497 0.000000 19183 +sohigroup 0 1 6.957497 0.000000 19184 +notese 0 1 6.957497 0.000000 19185 +aroundw 0 1 6.957497 0.000000 19186 +clickheremi 0 1 6.957497 0.000000 19187 +explot 0 1 6.957497 0.000000 19188 +thecour 0 1 6.957497 0.000000 19189 +theopportun 0 1 6.957497 0.000000 19190 +kateveni 0 1 6.957497 0.000000 19191 +viha 0 1 6.957497 0.000000 19192 +resouc 0 1 6.957497 0.000000 19193 +atwww 0 1 6.957497 0.000000 19194 +devil 0 1 6.957497 0.000000 19195 +fraud 0 1 6.957497 0.000000 19196 +centerusenet 0 1 6.957497 0.000000 19197 +afax 0 1 6.957497 0.000000 19198 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..eb7db0c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +updat 1 191 1.609438 1.609438 41 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +contact 1 153 1.791759 1.791759 59 +octob 0 89 2.397895 0.000000 156 +chang 1 82 2.484907 2.484907 163 +second 0 81 2.484907 0.000000 166 +level 0 87 2.484907 0.000000 180 +main 0 67 2.708050 0.000000 256 +back 0 60 2.833213 0.000000 297 +sever 0 56 2.890372 0.000000 322 +variou 0 56 2.890372 0.000000 317 +index 0 56 2.890372 0.000000 309 +friend 0 48 3.044522 0.000000 376 +better 0 45 3.135494 0.000000 401 +keep 0 44 3.135494 0.000000 409 +favorit 0 44 3.135494 0.000000 410 +background 0 25 3.737670 0.000000 664 +navig 0 21 3.912023 0.000000 796 +brief 0 16 4.174387 0.000000 1001 +minor 0 12 4.465908 0.000000 1237 +black 0 10 4.653960 0.000000 1418 +prefer 0 9 4.753590 0.000000 1491 +contrast 0 8 4.875197 0.000000 1637 +older 0 5 5.347108 0.000000 2387 +toni 1 3 5.857933 5.857933 3415 +herear 0 2 6.263398 0.000000 5947 +pagewhat 0 1 6.957497 0.000000 19199 +newoctob 0 1 6.957497 0.000000 19200 +inmadison 0 1 6.957497 0.000000 19201 +informationlast 0 1 6.957497 0.000000 19202 +educopyright 0 1 6.957497 0.000000 19203 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..5928c863 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +gener 0 220 1.386294 0.000000 27 +updat 0 191 1.609438 0.000000 41 +modifi 0 178 1.609438 0.000000 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +year 1 148 1.945910 1.945910 84 +model 0 145 1.945910 0.000000 69 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +machin 0 129 2.079442 0.000000 95 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +studi 0 120 2.079442 0.000000 91 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +mathemat 0 108 2.197225 0.000000 123 +well 0 109 2.197225 0.000000 121 +section 0 94 2.397895 0.000000 149 +exam 1 86 2.484907 2.484907 169 +thing 1 84 2.484907 2.484907 189 +educ 0 86 2.484907 0.000000 191 +learn 0 86 2.484907 0.000000 170 +stuff 0 87 2.484907 0.000000 171 +want 1 79 2.564949 2.564949 199 +resum 0 79 2.564949 0.000000 217 +know 0 80 2.564949 0.000000 198 +logic 0 71 2.639057 0.000000 230 +html 0 75 2.639057 0.000000 235 +free 0 73 2.639057 0.000000 224 +line 0 75 2.639057 0.000000 231 +dept 1 64 2.772589 2.772589 291 +previou 0 62 2.772589 0.000000 290 +plai 0 60 2.833213 0.000000 307 +reason 0 57 2.890372 0.000000 318 +summer 0 56 2.890372 0.000000 311 +talk 0 53 2.944439 0.000000 336 +februari 0 54 2.944439 0.000000 328 +finger 0 52 2.995732 0.000000 354 +particular 0 51 2.995732 0.000000 352 +digit 0 52 2.995732 0.000000 348 +telephon 1 50 3.044522 3.044522 373 +life 0 50 3.044522 0.000000 375 +even 1 45 3.135494 3.135494 393 +better 0 45 3.135494 0.000000 401 +third 0 43 3.178054 0.000000 412 +around 0 43 3.178054 0.000000 415 +might 1 41 3.218876 3.218876 426 +examin 0 42 3.218876 0.000000 424 +realli 0 40 3.258097 0.000000 444 +probabl 0 40 3.258097 0.000000 455 +feel 0 37 3.332205 0.000000 483 +product 0 33 3.433987 0.000000 527 +often 0 31 3.496508 0.000000 551 +usual 0 28 3.610918 0.000000 608 +weather 0 28 3.610918 0.000000 618 +campu 0 27 3.637586 0.000000 623 +enjoi 0 26 3.688879 0.000000 660 +notic 0 25 3.737670 0.000000 675 +departmentunivers 0 24 3.761200 0.000000 711 +sometim 0 24 3.761200 0.000000 696 +alwai 0 24 3.761200 0.000000 691 +wish 0 24 3.761200 0.000000 692 +head 0 23 3.806662 0.000000 732 +hierarchi 0 22 3.850148 0.000000 744 +martin 1 21 3.912023 3.912023 794 +exploit 0 20 3.951244 0.000000 836 +exercis 0 19 4.007333 0.000000 842 +concentr 0 18 4.060443 0.000000 906 +ultim 0 17 4.110874 0.000000 943 +doesn 0 15 4.248495 0.000000 1055 +qual 0 15 4.248495 0.000000 1062 +senior 0 14 4.317488 0.000000 1120 +role 0 14 4.317488 0.000000 1101 +affili 0 13 4.382027 0.000000 1194 +basketbal 0 12 4.465908 0.000000 1289 +mountain 0 10 4.653960 0.000000 1456 +bike 0 10 4.653960 0.000000 1468 +mention 0 9 4.753590 0.000000 1569 +qualifi 0 8 4.875197 0.000000 1721 +heart 0 8 4.875197 0.000000 1729 +besid 0 8 4.875197 0.000000 1681 +round 0 8 4.875197 0.000000 1769 +relax 0 6 5.164786 0.000000 2120 +squash 0 6 5.164786 0.000000 2223 +adjust 0 5 5.347108 0.000000 2422 +crucial 0 5 5.347108 0.000000 2384 +frisbe 0 5 5.347108 0.000000 2560 +gone 1 4 5.568345 5.568345 3072 +afraid 0 4 5.568345 0.000000 3053 +poorli 0 4 5.568345 0.000000 2781 +wesleyan 0 3 5.857933 0.000000 3988 +coke 1 2 6.263398 6.263398 5935 +ream 1 2 6.263398 6.263398 4783 +mream 1 2 6.263398 6.263398 4784 +terrain 0 2 6.263398 0.000000 6174 +logicprogram 0 2 6.263398 0.000000 4262 +interestsin 0 2 6.263398 0.000000 6213 +unif 0 2 6.263398 0.000000 5910 +mighti 0 2 6.263398 0.000000 4863 +tomi 0 2 6.263398 0.000000 5846 +mental 0 2 6.263398 0.000000 5802 +yeargradu 0 2 6.263398 0.000000 6015 +poobah 1 1 6.957497 6.957497 19204 +edufal 0 1 6.957497 0.000000 19205 +scheduleresearch 0 1 6.957497 0.000000 19206 +tin 0 1 6.957497 0.000000 19207 +orientedenviron 0 1 6.957497 0.000000 19208 +postscriptand 0 1 6.957497 0.000000 19209 +faint 0 1 6.957497 0.000000 19210 +alink 0 1 6.957497 0.000000 19211 +honorsthesi 0 1 6.957497 0.000000 19212 +poobahlook 0 1 6.957497 0.000000 19213 +dear 0 1 6.957497 0.000000 19214 +tosomeon 0 1 6.957497 0.000000 19215 +youshould 0 1 6.957497 0.000000 19216 +elton 0 1 6.957497 0.000000 19217 +imaginethat 0 1 6.957497 0.000000 19218 +aforement 0 1 6.957497 0.000000 19219 +poobahship 0 1 6.957497 0.000000 19220 +ill 0 1 6.957497 0.000000 19221 +afew 0 1 6.957497 0.000000 19222 +indatabas 0 1 6.957497 0.000000 19223 +inearli 0 1 6.957497 0.000000 19224 +andinfrequ 0 1 6.957497 0.000000 19225 +rapidlyrid 0 1 6.957497 0.000000 19226 +chilliest 0 1 6.957497 0.000000 19227 +helmet 0 1 6.957497 0.000000 19228 +mynot 0 1 6.957497 0.000000 19229 +ilik 0 1 6.957497 0.000000 19230 +librarylast 0 1 6.957497 0.000000 19231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..ee62b87a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +avail 0 169 1.791759 0.000000 48 +architectur 1 139 1.945910 1.945910 77 +relat 0 139 1.945910 0.000000 68 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +peopl 0 96 2.302585 0.000000 132 +user 0 104 2.302585 0.000000 137 +wide 0 84 2.484907 0.000000 185 +sourc 0 77 2.564949 0.000000 201 +talk 0 53 2.944439 0.000000 336 +local 0 55 2.944439 0.000000 334 +februari 0 54 2.944439 0.000000 328 +given 0 32 3.465736 0.000000 538 +fund 0 21 3.912023 0.000000 805 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 1 6 5.164786 5.164786 2237 +departmentat 0 5 5.347108 0.000000 2513 +guri 0 5 5.347108 0.000000 2578 +contributor 0 2 6.263398 0.000000 6214 +pagewisconsin 0 1 6.957497 0.000000 19232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..ac86c4c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +also 0 259 1.386294 0.000000 28 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +note 0 142 1.945910 0.000000 67 +year 0 148 1.945910 0.000000 84 +perform 0 143 1.945910 0.000000 74 +introduct 1 126 2.079442 2.079442 87 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +technic 0 100 2.302585 0.000000 140 +question 1 91 2.397895 2.397895 141 +center 0 88 2.397895 0.000000 158 +section 0 94 2.397895 0.000000 149 +pictur 0 89 2.397895 0.000000 160 +build 0 85 2.484907 0.000000 184 +school 0 84 2.484907 0.000000 188 +start 0 83 2.484907 0.000000 173 +member 0 84 2.484907 0.000000 165 +wide 0 84 2.484907 0.000000 185 +come 1 78 2.564949 2.564949 202 +april 0 77 2.564949 0.000000 196 +know 0 80 2.564949 0.000000 198 +intellig 0 72 2.639057 0.000000 225 +receiv 0 66 2.708050 0.000000 244 +degre 0 69 2.708050 0.000000 259 +test 0 66 2.708050 0.000000 252 +artifici 0 63 2.772589 0.000000 280 +back 0 60 2.833213 0.000000 297 +semest 0 58 2.890372 0.000000 312 +undergradu 0 54 2.944439 0.000000 338 +friend 1 48 3.044522 3.044522 376 +frequent 1 49 3.044522 3.044522 367 +right 0 48 3.044522 0.000000 363 +still 0 50 3.044522 0.000000 362 +favorit 1 44 3.135494 3.135494 410 +algebra 0 45 3.135494 0.000000 394 +around 1 43 3.178054 3.178054 415 +vision 0 41 3.218876 0.000000 430 +hand 0 37 3.332205 0.000000 475 +michael 0 35 3.401197 0.000000 514 +eduoffic 0 33 3.433987 0.000000 531 +photo 0 31 3.496508 0.000000 561 +ask 1 28 3.610918 3.610918 597 +univ 0 28 3.610918 0.000000 617 +hope 0 28 3.610918 0.000000 610 +administr 0 27 3.637586 0.000000 628 +comp 0 26 3.688879 0.000000 650 +sport 0 25 3.737670 0.000000 683 +mike 0 24 3.761200 0.000000 703 +sometim 0 24 3.761200 0.000000 696 +theunivers 0 21 3.912023 0.000000 797 +lower 0 18 4.060443 0.000000 886 +stat 0 17 4.110874 0.000000 924 +bachelor 0 17 4.110874 0.000000 957 +chuck 0 14 4.317488 0.000000 1108 +dave 0 14 4.317488 0.000000 1098 +sai 0 13 4.382027 0.000000 1175 +suit 0 13 4.382027 0.000000 1129 +land 0 12 4.465908 0.000000 1273 +mari 0 12 4.465908 0.000000 1266 +touch 0 12 4.465908 0.000000 1288 +host 0 11 4.553877 0.000000 1306 +folk 0 9 4.753590 0.000000 1597 +joel 0 8 4.875197 0.000000 1698 +usenet 1 7 5.010635 5.010635 1839 +corner 0 7 5.010635 0.000000 1909 +maryland 1 6 5.164786 5.164786 2140 +gzip 0 6 5.164786 0.000000 2117 +billi 0 5 5.347108 0.000000 2404 +madisoncomput 0 5 5.347108 0.000000 2391 +steel 1 4 5.568345 5.568345 2818 +chees 0 4 5.568345 0.000000 3090 +kill 0 4 5.568345 0.000000 3000 +sit 0 3 5.857933 0.000000 3953 +stamp 0 3 5.857933 0.000000 3678 +artificialintellig 0 3 5.857933 0.000000 3608 +narrow 0 3 5.857933 0.000000 3807 +predat 0 3 5.857933 0.000000 3135 +forgot 0 2 6.263398 0.000000 4769 +linksmi 0 2 6.263398 0.000000 6215 +barri 0 2 6.263398 0.000000 5149 +eduunivers 0 2 6.263398 0.000000 6216 +homepagemik 0 1 6.957497 0.000000 19233 +homepagemsteel 0 1 6.957497 0.000000 19234 +struggl 0 1 6.957497 0.000000 19235 +sometimearound 0 1 6.957497 0.000000 19236 +motto 0 1 6.957497 0.000000 19237 +freezein 0 1 6.957497 0.000000 19238 +graduateinstructor 0 1 6.957497 0.000000 19239 +scomput 0 1 6.957497 0.000000 19240 +publicationsgrindston 0 1 6.957497 0.000000 19241 +jefferyk 0 1 6.957497 0.000000 19242 +hollingsworth 0 1 6.957497 0.000000 19243 +reportc 0 1 6.957497 0.000000 19244 +postscriptfil 0 1 6.957497 0.000000 19245 +semesterc 0 1 6.957497 0.000000 19246 +vernonc 0 1 6.957497 0.000000 19247 +dyermi 0 1 6.957497 0.000000 19248 +pagesinform 0 1 6.957497 0.000000 19249 +gettingin 0 1 6.957497 0.000000 19250 +marylandwhom 0 1 6.957497 0.000000 19251 +teamssom 0 1 6.957497 0.000000 19252 +listth 0 1 6.957497 0.000000 19253 +listi 0 1 6.957497 0.000000 19254 +thefruit 0 1 6.957497 0.000000 19255 +ofmaryland 0 1 6.957497 0.000000 19256 +insidejok 0 1 6.957497 0.000000 19257 +andnow 0 1 6.957497 0.000000 19258 +someinfrar 0 1 6.957497 0.000000 19259 +looklik 0 1 6.957497 0.000000 19260 +infrar 0 1 6.957497 0.000000 19261 +memik 0 1 6.957497 0.000000 19262 +steelemsteel 0 1 6.957497 0.000000 19263 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..2fb55bb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +section 0 94 2.397895 0.000000 149 +want 0 79 2.564949 0.000000 199 +visit 1 63 2.772589 2.772589 288 +colleg 0 61 2.833213 0.000000 300 +might 0 41 3.218876 0.000000 426 +mayb 0 15 4.248495 0.000000 1014 +maryland 0 6 5.164786 0.000000 2140 +park 0 6 5.164786 0.000000 2218 +maria 0 4 5.568345 0.000000 2954 +pagemaria 0 1 6.957497 0.000000 19264 +pagehow 0 1 6.957497 0.000000 19265 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..f53f67bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 0 374 0.693147 0.000000 7 +project 0 340 1.098612 0.000000 18 +time 0 293 1.098612 0.000000 17 +also 0 259 1.386294 0.000000 28 +wisc 0 242 1.386294 0.000000 33 +parallel 0 169 1.791759 0.000000 60 +read 0 154 1.791759 0.000000 47 +like 1 132 1.945910 1.945910 81 +area 0 144 1.945910 0.000000 80 +perform 0 143 1.945910 0.000000 74 +year 0 148 1.945910 0.000000 84 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +check 0 115 2.197225 0.000000 118 +part 0 98 2.302585 0.000000 129 +peopl 0 96 2.302585 0.000000 132 +book 0 99 2.302585 0.000000 131 +pictur 0 89 2.397895 0.000000 160 +member 0 84 2.484907 0.000000 165 +good 0 77 2.564949 0.000000 200 +main 0 67 2.708050 0.000000 256 +locat 0 59 2.833213 0.000000 303 +plai 0 60 2.833213 0.000000 307 +finger 0 52 2.995732 0.000000 354 +visual 0 48 3.044522 0.000000 372 +around 0 43 3.178054 0.000000 415 +music 0 42 3.218876 0.000000 436 +howev 0 41 3.218876 0.000000 422 +staff 0 36 3.367296 0.000000 490 +known 0 24 3.761200 0.000000 702 +finish 0 22 3.850148 0.000000 748 +born 0 21 3.912023 0.000000 798 +fact 0 21 3.912023 0.000000 780 +watch 0 21 3.912023 0.000000 789 +citi 0 19 4.007333 0.000000 874 +spend 0 19 4.007333 0.000000 850 +beauti 0 18 4.060443 0.000000 912 +stori 0 14 4.317488 0.000000 1087 +central 0 13 4.382027 0.000000 1160 +cook 0 10 4.653960 0.000000 1464 +paradyn 0 9 4.753590 0.000000 1614 +undergrad 0 9 4.753590 0.000000 1589 +guitar 1 8 4.875197 4.875197 1758 +simon 0 8 4.875197 0.000000 1697 +capit 0 7 5.010635 0.000000 1957 +apart 0 7 5.010635 0.000000 1936 +antonio 0 6 5.164786 0.000000 2186 +england 0 5 5.347108 0.000000 2557 +million 0 5 5.347108 0.000000 2495 +western 0 4 5.568345 0.000000 3062 +basebal 0 4 5.568345 0.000000 2969 +myph 0 3 5.857933 0.000000 3880 +popul 0 3 5.857933 0.000000 3235 +pleaseclick 0 2 6.263398 0.000000 5432 +venezuela 1 1 6.957497 6.957497 19266 +barquisimeto 1 1 6.957497 6.957497 19267 +naim 1 1 6.957497 6.957497 19268 +oscar 0 1 6.957497 0.000000 19269 +bienvenido 0 1 6.957497 0.000000 19270 +southampton 0 1 6.957497 0.000000 19271 +universidad 0 1 6.957497 0.000000 19272 +bolivar 0 1 6.957497 0.000000 19273 +caraca 0 1 6.957497 0.000000 19274 +barquisimetoi 0 1 6.957497 0.000000 19275 +ofabout 0 1 6.957497 0.000000 19276 +playclass 0 1 6.957497 0.000000 19277 +excellentmaestro 0 1 6.957497 0.000000 19278 +rodrigo 0 1 6.957497 0.000000 19279 +riera 0 1 6.957497 0.000000 19280 +lauro 0 1 6.957497 0.000000 19281 +sherlock 0 1 6.957497 0.000000 19282 +holm 0 1 6.957497 0.000000 19283 +beati 0 1 6.957497 0.000000 19284 +mundo 0 1 6.957497 0.000000 19285 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..0f84f900 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +find 0 111 2.197225 0.000000 111 +make 0 111 2.197225 0.000000 120 +send 0 114 2.197225 0.000000 109 +pictur 0 89 2.397895 0.000000 160 +want 0 79 2.564949 0.000000 199 +street 0 63 2.772589 0.000000 293 +import 0 65 2.772589 0.000000 282 +guid 0 63 2.772589 0.000000 267 +visit 0 63 2.772589 0.000000 288 +realli 1 40 3.258097 3.258097 444 +notic 0 25 3.737670 0.000000 675 +nice 0 20 3.951244 0.000000 809 +georg 0 16 4.174387 0.000000 994 +worth 0 11 4.553877 0.000000 1294 +alex 0 6 5.164786 0.000000 2130 +greek 0 3 5.857933 0.000000 3595 +decent 0 2 6.263398 0.000000 5542 +rochest 0 2 6.263398 0.000000 6142 +anastassia 1 1 6.957497 6.957497 19286 +ailamaki 1 1 6.957497 6.957497 19287 +islandsar 0 1 6.957497 0.000000 19288 +natassa 0 1 6.957497 0.000000 19289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..73b40a76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +current 1 284 1.098612 1.098612 21 +wisc 0 242 1.386294 0.000000 33 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +develop 0 174 1.791759 0.000000 53 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +perform 1 143 1.945910 1.945910 74 +area 0 144 1.945910 0.000000 80 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +proceed 0 93 2.397895 0.000000 152 +larg 1 82 2.484907 2.484907 168 +david 0 71 2.639057 0.000000 232 +goal 0 66 2.708050 0.000000 250 +main 0 67 2.708050 0.000000 256 +improv 0 62 2.772589 0.000000 289 +three 0 54 2.944439 0.000000 330 +submit 0 39 3.258097 0.000000 440 +multi 0 36 3.367296 0.000000 493 +michael 0 35 3.401197 0.000000 514 +storag 1 31 3.496508 3.496508 553 +arrai 0 27 3.637586 0.000000 627 +toward 0 25 3.737670 0.000000 668 +hierarchi 0 22 3.850148 0.000000 744 +prepar 0 20 3.951244 0.000000 824 +benchmark 1 19 4.007333 4.007333 859 +dimension 1 18 4.060443 4.060443 909 +estim 0 17 4.110874 0.000000 930 +spatial 0 16 4.174387 0.000000 988 +ramakrishnan 0 16 4.174387 0.000000 972 +indic 0 15 4.248495 0.000000 1013 +dbm 1 13 4.382027 4.382027 1136 +overal 0 12 4.465908 0.000000 1254 +workload 0 12 4.465908 0.000000 1210 +gupta 0 12 4.465908 0.000000 1241 +raghu 0 12 4.465908 0.000000 1212 +dewitt 0 12 4.465908 0.000000 1270 +jeffrei 0 9 4.753590 0.000000 1612 +presenc 0 8 4.875197 0.000000 1671 +carei 0 8 4.875197 0.000000 1781 +bombai 1 7 5.010635 5.010635 1972 +prasad 1 6 5.164786 5.164786 2126 +eduresearch 0 6 5.164786 0.000000 2205 +aggreg 0 6 5.164786 0.000000 2219 +deshpand 1 5 5.347108 5.347108 2431 +eas 0 5 5.347108 0.000000 2267 +ofinterest 0 5 5.347108 0.000000 2323 +ashish 0 5 5.347108 0.000000 2473 +tuft 0 5 5.347108 0.000000 2575 +multidimension 1 4 5.568345 5.568345 3091 +cube 0 4 5.568345 0.000000 2940 +amit 0 4 5.568345 0.000000 2972 +ramasami 0 4 5.568345 0.000000 3088 +shah 0 4 5.568345 0.000000 2814 +zhao 0 4 5.568345 0.000000 2699 +kristin 0 4 5.568345 0.000000 3089 +mumbai 1 3 5.857933 5.857933 4029 +surpass 0 3 5.857933 0.000000 3247 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +johann 0 3 5.857933 0.000000 3758 +moham 0 3 5.857933 0.000000 3848 +asgarian 0 3 5.857933 0.000000 3447 +andprocess 0 2 6.263398 0.000000 4925 +agarw 0 2 6.263398 0.000000 5352 +rakesh 0 2 6.263398 0.000000 6017 +agraw 0 2 6.263398 0.000000 4536 +molap 0 2 6.263398 0.000000 6217 +naughtonjeffrei 0 1 6.957497 0.000000 19290 +naughtonnaughton 0 1 6.957497 0.000000 19291 +interestsolap 0 1 6.957497 0.000000 19292 +relationaldbm 0 1 6.957497 0.000000 19293 +ofdatabas 0 1 6.957497 0.000000 19294 +inperform 0 1 6.957497 0.000000 19295 +ofmulti 0 1 6.957497 0.000000 19296 +computingth 0 1 6.957497 0.000000 19297 +valuedattribut 0 1 6.957497 0.000000 19298 +withsameet 0 1 6.957497 0.000000 19299 +sunita 0 1 6.957497 0.000000 19300 +sarawagi 0 1 6.957497 0.000000 19301 +thend 0 1 6.957497 0.000000 19302 +aggregatesin 0 1 6.957497 0.000000 19303 +bucki 0 1 6.957497 0.000000 19304 +gerhk 0 1 6.957497 0.000000 19305 +dhaval 0 1 6.957497 0.000000 19306 +withyihong 0 1 6.957497 0.000000 19307 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..ccc61c52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +address 0 170 1.791759 0.000000 62 +area 0 144 1.945910 0.000000 80 +databas 0 122 2.079442 0.000000 86 +look 0 107 2.197225 0.000000 115 +stuff 0 87 2.484907 0.000000 171 +thing 0 84 2.484907 0.000000 189 +optim 0 79 2.564949 0.000000 197 +david 0 71 2.639057 0.000000 232 +plan 0 65 2.772589 0.000000 272 +explor 0 58 2.890372 0.000000 324 +archiv 0 49 3.044522 0.000000 364 +could 0 46 3.091042 0.000000 383 +better 0 45 3.135494 0.000000 401 +queri 0 33 3.433987 0.000000 524 +bookmark 0 26 3.688879 0.000000 639 +indian 0 22 3.850148 0.000000 769 +among 0 21 3.912023 0.000000 781 +noth 0 11 4.553877 0.000000 1328 +song 0 11 4.553877 0.000000 1380 +paradis 0 8 4.875197 0.000000 1782 +customiz 0 4 5.568345 0.000000 2966 +hindi 0 3 5.857933 0.000000 3753 +navin 1 2 6.263398 6.263398 5351 +madisonadvisor 0 2 6.263398 0.000000 6212 +dewittresearch 0 2 6.263398 0.000000 6185 +kabranavin 0 1 6.957497 0.000000 19308 +kabragradu 0 1 6.957497 0.000000 19309 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..b873b2ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +group 0 183 1.609438 0.000000 36 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +perform 1 143 1.945910 1.945910 74 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +analysi 0 124 2.079442 0.000000 98 +pictur 0 89 2.397895 0.000000 160 +chang 0 82 2.484907 0.000000 163 +java 0 70 2.708050 0.000000 248 +august 0 66 2.708050 0.000000 257 +telephon 0 50 3.044522 0.000000 373 +scalabl 0 24 3.761200 0.000000 705 +predict 0 19 4.007333 0.000000 855 +bart 0 9 4.753590 0.000000 1559 +newhal 1 1 6.957497 6.957497 19310 +newhalltia 0 1 6.957497 0.000000 19311 +paradynadvisor 0 1 6.957497 0.000000 19312 +millermummi 0 1 6.957497 0.000000 19313 +guanajuato 0 1 6.957497 0.000000 19314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..67e75bd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +project 0 340 1.098612 0.000000 18 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +object 0 138 1.945910 0.000000 79 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +scalabl 0 24 3.761200 0.000000 705 +repositori 0 17 4.110874 0.000000 932 +heterogen 0 14 4.317488 0.000000 1090 +nanci 0 12 4.465908 0.000000 1256 +shore 0 11 4.553877 0.000000 1377 +hallcomput 0 1 6.957497 0.000000 19315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..4a41085f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +email 0 220 1.386294 0.000000 29 +softwar 0 220 1.386294 0.000000 30 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +class 0 199 1.609438 0.000000 37 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +algorithm 0 162 1.791759 0.000000 57 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +problem 2 147 1.945910 3.891820 75 +process 1 142 1.945910 1.945910 72 +professor 0 137 1.945910 0.000000 76 +report 3 131 2.079442 6.238326 92 +machin 1 129 2.079442 2.079442 95 +dayton 0 119 2.079442 0.000000 104 +mathemat 3 108 2.197225 6.591675 123 +theori 0 111 2.197225 0.000000 127 +topic 0 114 2.197225 0.000000 110 +well 0 109 2.197225 0.000000 121 +specif 0 106 2.197225 0.000000 106 +technic 3 100 2.302585 6.907755 140 +advanc 1 99 2.302585 2.302585 130 +techniqu 0 99 2.302585 0.000000 138 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +center 0 88 2.397895 0.000000 158 +mani 0 92 2.397895 0.000000 150 +real 0 93 2.397895 0.000000 144 +select 0 91 2.397895 0.000000 154 +learn 1 86 2.484907 2.484907 170 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +member 0 84 2.484907 0.000000 165 +optim 2 79 2.564949 5.129898 197 +decemb 1 80 2.564949 2.564949 215 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +solv 1 73 2.639057 2.639057 234 +summari 0 73 2.639057 0.000000 237 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +august 2 66 2.708050 5.416100 257 +view 1 70 2.708050 2.708050 254 +street 1 63 2.772589 2.772589 293 +function 1 62 2.772589 2.772589 275 +januari 1 62 2.772589 2.772589 264 +septemb 1 65 2.772589 2.772589 274 +result 0 65 2.772589 0.000000 281 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +new 0 64 2.772589 0.000000 262 +juli 1 60 2.833213 2.833213 305 +publish 0 57 2.890372 0.000000 326 +februari 1 54 2.944439 2.944439 328 +telephon 0 50 3.044522 0.000000 373 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +california 0 46 3.091042 0.000000 388 +linear 1 41 3.218876 3.218876 431 +editor 1 41 3.218876 3.218876 433 +press 0 42 3.218876 0.000000 419 +submit 1 39 3.258097 3.258097 440 +error 0 40 3.258097 0.000000 449 +paul 0 38 3.295837 0.000000 471 +download 0 36 3.367296 0.000000 489 +global 1 34 3.401197 3.401197 520 +bibliographi 0 34 3.401197 0.000000 518 +john 0 33 3.433987 0.000000 532 +toler 0 33 3.433987 0.000000 533 +neural 1 30 3.555348 3.555348 578 +rang 0 30 3.555348 0.000000 565 +cluster 0 28 3.610918 0.000000 612 +revis 2 26 3.688879 7.377758 640 +bound 1 26 3.688879 3.688879 659 +constraint 1 26 3.688879 3.688879 636 +aspect 1 25 3.737670 3.737670 663 +accur 0 25 3.737670 0.000000 680 +proof 0 23 3.806662 0.000000 720 +variabl 0 23 3.806662 0.000000 715 +equat 0 23 3.806662 0.000000 724 +verlag 0 22 3.850148 0.000000 751 +period 0 22 3.850148 0.000000 743 +chen 1 21 3.912023 3.912023 791 +siam 0 21 3.912023 0.000000 800 +predict 0 19 4.007333 0.000000 855 +eric 0 19 4.007333 0.000000 870 +separ 0 19 4.007333 0.000000 844 +minim 1 18 4.060443 4.060443 887 +differenti 0 17 4.110874 0.000000 921 +germani 0 17 4.110874 0.000000 946 +hybrid 0 15 4.248495 0.000000 1057 +nonlinear 1 14 4.317488 4.317488 1107 +train 0 14 4.317488 0.000000 1066 +francisco 0 14 4.317488 0.000000 1095 +nick 1 13 4.382027 4.382027 1180 +context 0 13 4.382027 0.000000 1153 +individu 0 13 4.382027 0.000000 1126 +broad 0 11 4.553877 0.000000 1302 +rich 0 10 4.653960 0.000000 1396 +strongli 0 10 4.653960 0.000000 1406 +penalti 0 10 4.653960 0.000000 1405 +mangasarian 3 9 4.753590 14.260770 1570 +pose 0 9 4.753590 0.000000 1535 +morgan 0 9 4.753590 0.000000 1484 +converg 1 7 5.010635 5.010635 1844 +smooth 1 7 5.010635 5.010635 1855 +harvard 0 7 5.010635 0.000000 1926 +fischer 0 7 5.010635 0.000000 1893 +serial 0 7 5.010635 0.000000 1975 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +constrain 0 6 5.164786 0.000000 2042 +strong 0 6 5.164786 0.000000 2029 +mix 0 6 5.164786 0.000000 2200 +determinist 0 6 5.164786 0.000000 2034 +bradlei 1 5 5.347108 5.347108 2554 +variat 0 5 5.347108 0.000000 2248 +kaufmann 0 5 5.347108 0.000000 2254 +convex 1 4 5.568345 5.568345 2807 +concav 1 4 5.568345 5.568345 2808 +diagnosi 0 4 5.568345 0.000000 3027 +nonmonoton 0 4 5.568345 0.000000 3023 +net 0 4 5.568345 0.000000 2741 +complementar 1 3 5.857933 5.857933 3999 +cancer 1 3 5.857933 5.857933 4032 +breast 1 3 5.857933 5.857933 4033 +backpropag 1 3 5.857933 5.857933 3507 +neumann 0 3 5.857933 0.000000 3720 +programsand 0 3 5.857933 0.000000 3111 +programmingtechniqu 0 3 5.857933 0.000000 3113 +diagnos 0 3 5.857933 0.000000 3968 +diagnost 0 3 5.857933 0.000000 3833 +baltimor 0 3 5.857933 0.000000 3809 +chronolog 0 3 5.857933 0.000000 4034 +wolberg 1 2 6.263398 6.263398 6218 +perturb 1 2 6.263398 6.263398 6075 +interestsin 0 2 6.263398 0.000000 6213 +spectrum 0 2 6.263398 0.000000 5405 +applicationsto 0 2 6.263398 0.000000 4254 +computer 0 2 6.263398 0.000000 6219 +linearli 0 2 6.263398 0.000000 6220 +qualif 0 2 6.263398 0.000000 6059 +prognost 0 2 6.263398 0.000000 6221 +polyhedr 0 2 6.263398 0.000000 5412 +festschrift 0 2 6.263398 0.000000 6141 +klau 0 2 6.263398 0.000000 4999 +internationalsymposium 0 2 6.263398 0.000000 6032 +plenum 0 2 6.263398 0.000000 6036 +prognosi 0 2 6.263398 0.000000 6222 +chunhui 1 1 6.957497 6.957497 19316 +misclassif 1 1 6.957497 6.957497 19317 +solodov 1 1 6.957497 6.957497 19318 +effectivecomputation 0 1 6.957497 0.000000 19319 +encompassestheoret 0 1 6.957497 0.000000 19320 +parallelgradi 0 1 6.957497 0.000000 19321 +problemsa 0 1 6.957497 0.000000 19322 +animport 0 1 6.957497 0.000000 19323 +ahighli 0 1 6.957497 0.000000 19324 +useat 0 1 6.957497 0.000000 19325 +hospit 0 1 6.957497 0.000000 19326 +solodova 0 1 6.957497 0.000000 19327 +descent 0 1 6.957497 0.000000 19328 +monotonecomplementar 0 1 6.957497 0.000000 19329 +jong 0 1 6.957497 0.000000 19330 +pangexact 0 1 6.957497 0.000000 19331 +programswith 0 1 6.957497 0.000000 19332 +mangasarianmathemat 0 1 6.957497 0.000000 19333 +miningmathemat 0 1 6.957497 0.000000 19334 +mangasarianerror 0 1 6.957497 0.000000 19335 +nondifferenti 0 1 6.957497 0.000000 19336 +slater 0 1 6.957497 0.000000 19337 +ritter 0 1 6.957497 0.000000 19338 +riedmuel 0 1 6.957497 0.000000 19339 +schaeffler 0 1 6.957497 0.000000 19340 +physica 0 1 6.957497 0.000000 19341 +siag 0 1 6.957497 0.000000 19342 +bilinear 0 1 6.957497 0.000000 19343 +cowan 0 1 6.957497 0.000000 19344 +tesauro 0 1 6.957497 0.000000 19345 +alspector 0 1 6.957497 0.000000 19346 +inequalitiesand 0 1 6.957497 0.000000 19347 +vianonmonoton 0 1 6.957497 0.000000 19348 +minimn 0 1 6.957497 0.000000 19349 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..764a85bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,422 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +us 2 329 1.098612 2.197224 16 +time 2 293 1.098612 2.197224 17 +student 0 343 1.098612 0.000000 19 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +softwar 0 220 1.386294 0.000000 30 +email 0 220 1.386294 0.000000 29 +also 0 259 1.386294 0.000000 28 +washington 0 236 1.386294 0.000000 32 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +includ 0 208 1.609438 0.000000 42 +oper 0 180 1.609438 0.000000 34 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +perform 0 143 1.945910 0.000000 74 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +first 0 140 1.945910 0.000000 71 +model 0 145 1.945910 0.000000 69 +machin 1 129 2.079442 2.079442 95 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +studi 0 120 2.079442 0.000000 91 +high 0 130 2.079442 0.000000 101 +pleas 1 113 2.197225 2.197225 114 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +theori 0 111 2.197225 0.000000 127 +user 1 104 2.302585 2.302585 137 +technic 1 100 2.302585 2.302585 140 +take 0 97 2.302585 0.000000 134 +need 0 98 2.302585 0.000000 135 +text 0 98 2.302585 0.000000 133 +imag 2 91 2.397895 4.795790 161 +proceed 1 93 2.397895 2.397895 152 +section 0 94 2.397895 0.000000 149 +follow 0 92 2.397895 0.000000 143 +grade 0 90 2.397895 0.000000 142 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +center 0 88 2.397895 0.000000 158 +learn 2 86 2.484907 4.969814 170 +journal 1 83 2.484907 2.484907 183 +second 0 81 2.484907 0.000000 166 +solut 0 82 2.484907 0.000000 162 +larg 0 82 2.484907 0.000000 168 +librari 0 87 2.484907 0.000000 181 +institut 0 84 2.484907 0.000000 187 +april 1 77 2.564949 2.564949 196 +method 1 80 2.564949 2.564949 213 +good 0 77 2.564949 0.000000 200 +sourc 0 77 2.564949 0.000000 201 +exampl 0 77 2.564949 0.000000 195 +optim 0 79 2.564949 0.000000 197 +june 0 79 2.564949 0.000000 214 +free 1 73 2.639057 2.639057 224 +appli 1 71 2.639057 2.639057 226 +addit 0 74 2.639057 0.000000 228 +materi 0 75 2.639057 0.000000 221 +workshop 0 71 2.639057 0.000000 239 +nation 0 74 2.639057 0.000000 240 +august 1 66 2.708050 2.708050 257 +goal 0 66 2.708050 0.000000 250 +abstract 2 62 2.772589 5.545178 276 +street 2 63 2.772589 5.545178 293 +result 1 65 2.772589 2.772589 281 +new 1 64 2.772589 2.772589 262 +januari 1 62 2.772589 2.772589 264 +copi 0 63 2.772589 0.000000 284 +prof 0 64 2.772589 0.000000 273 +interact 0 62 2.772589 0.000000 270 +previou 0 62 2.772589 0.000000 290 +function 0 62 2.772589 0.000000 275 +collect 0 65 2.772589 0.000000 268 +improv 0 62 2.772589 0.000000 289 +septemb 0 65 2.772589 0.000000 274 +march 1 61 2.833213 2.833213 295 +content 0 59 2.833213 0.000000 302 +simpl 0 60 2.833213 0.000000 298 +automat 0 61 2.833213 0.000000 306 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +variou 1 56 2.890372 2.890372 317 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +point 0 58 2.890372 0.000000 319 +detail 0 57 2.890372 0.000000 321 +sampl 1 53 2.944439 2.944439 339 +local 1 55 2.944439 2.944439 334 +allow 1 53 2.944439 2.944439 333 +found 0 53 2.944439 0.000000 337 +suggest 0 53 2.944439 0.000000 331 +case 2 51 2.995732 5.991464 351 +tabl 0 51 2.995732 0.000000 346 +digit 0 52 2.995732 0.000000 348 +date 0 51 2.995732 0.000000 344 +particular 0 51 2.995732 0.000000 352 +approach 1 48 3.044522 3.044522 366 +visual 0 48 3.044522 0.000000 372 +pointer 0 48 3.044522 0.000000 368 +right 0 48 3.044522 0.000000 363 +format 0 48 3.044522 0.000000 356 +numer 0 49 3.044522 0.000000 369 +without 0 50 3.044522 0.000000 370 +friend 0 48 3.044522 0.000000 376 +featur 1 46 3.091042 3.091042 386 +could 1 46 3.091042 3.091042 383 +describ 0 45 3.135494 0.000000 400 +better 0 45 3.135494 0.000000 401 +netscap 0 44 3.135494 0.000000 395 +term 1 43 3.178054 3.178054 411 +long 0 43 3.178054 0.000000 413 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +vision 0 41 3.218876 0.000000 430 +examin 0 42 3.218876 0.000000 424 +probabl 1 40 3.258097 3.258097 455 +small 1 39 3.258097 3.258097 447 +submit 1 39 3.258097 3.258097 440 +societi 0 40 3.258097 0.000000 456 +author 0 39 3.258097 0.000000 450 +slide 1 38 3.295837 3.295837 467 +seminar 0 38 3.295837 0.000000 470 +mean 1 37 3.332205 3.332205 477 +ofth 0 36 3.367296 0.000000 491 +procedur 0 36 3.367296 0.000000 488 +download 0 36 3.367296 0.000000 489 +approxim 1 35 3.401197 3.401197 509 +bibliographi 0 34 3.401197 0.000000 518 +singl 0 34 3.401197 0.000000 510 +return 0 34 3.401197 0.000000 502 +print 0 34 3.401197 0.000000 503 +tech 0 35 3.401197 0.000000 515 +board 0 33 3.433987 0.000000 528 +obtain 0 33 3.433987 0.000000 534 +collabor 1 32 3.465736 3.465736 543 +human 1 32 3.465736 3.465736 546 +taken 0 31 3.496508 0.000000 555 +often 0 31 3.496508 0.000000 551 +power 0 30 3.555348 0.000000 573 +consid 0 29 3.583519 0.000000 590 +releas 0 28 3.610918 0.000000 616 +measur 0 28 3.610918 0.000000 609 +actual 0 28 3.610918 0.000000 604 +scale 0 28 3.610918 0.000000 613 +progress 0 28 3.610918 0.000000 598 +american 1 27 3.637586 3.637586 634 +team 0 27 3.637586 0.000000 625 +repres 1 26 3.688879 3.688879 656 +consist 0 26 3.688879 0.000000 651 +compar 0 26 3.688879 0.000000 648 +detect 0 26 3.688879 0.000000 646 +valu 1 25 3.737670 3.737670 665 +accur 0 25 3.737670 0.000000 680 +todai 0 25 3.737670 0.000000 672 +known 1 24 3.761200 3.761200 702 +pattern 1 24 3.761200 3.761200 689 +interpret 1 24 3.761200 3.761200 686 +recognit 1 23 3.806662 3.806662 723 +size 0 23 3.806662 0.000000 713 +togeth 0 23 3.806662 0.000000 714 +william 0 22 3.850148 0.000000 765 +identifi 0 22 3.850148 0.000000 760 +siam 1 21 3.912023 3.912023 800 +theunivers 0 21 3.912023 0.000000 797 +util 0 21 3.912023 0.000000 774 +similar 0 21 3.912023 0.000000 771 +viewer 0 21 3.912023 0.000000 787 +fine 1 20 3.951244 3.951244 822 +minut 0 20 3.951244 0.000000 810 +predict 2 19 4.007333 8.014666 855 +separ 1 19 4.007333 4.007333 844 +five 0 19 4.007333 0.000000 841 +comparison 0 19 4.007333 0.000000 863 +aid 1 18 4.060443 4.060443 904 +behavior 0 18 4.060443 0.000000 881 +statu 0 18 4.060443 0.000000 885 +medic 1 17 4.110874 4.110874 958 +differenti 1 17 4.110874 4.110874 921 +segment 1 17 4.110874 4.110874 931 +analyz 0 17 4.110874 0.000000 925 +seek 0 17 4.110874 0.000000 954 +portion 0 16 4.174387 0.000000 971 +capabl 0 15 4.248495 0.000000 1016 +indic 0 15 4.248495 0.000000 1013 +precis 0 15 4.248495 0.000000 1023 +ascii 0 15 4.248495 0.000000 1032 +train 1 14 4.317488 4.317488 1066 +shown 1 14 4.317488 4.317488 1080 +camera 0 14 4.317488 0.000000 1115 +draw 0 14 4.317488 0.000000 1086 +individu 1 13 4.382027 4.382027 1126 +deriv 1 13 4.382027 4.382027 1145 +nick 0 13 4.382027 0.000000 1180 +incorpor 0 13 4.382027 0.000000 1163 +characterist 1 12 4.465908 4.465908 1257 +scan 0 12 4.465908 0.000000 1243 +shape 0 12 4.465908 0.000000 1245 +remov 0 12 4.465908 0.000000 1225 +philadelphia 0 12 4.465908 0.000000 1244 +extrem 1 11 4.553877 4.553877 1330 +node 1 11 4.553877 4.553877 1326 +eight 0 11 4.553877 0.000000 1331 +distinguish 0 11 4.553877 0.000000 1357 +induct 0 11 4.553877 0.000000 1304 +total 0 10 4.653960 0.000000 1398 +subset 0 10 4.653960 0.000000 1425 +black 0 10 4.653960 0.000000 1418 +equal 0 10 4.653960 0.000000 1424 +tradit 0 10 4.653960 0.000000 1404 +perspect 0 10 4.653960 0.000000 1437 +mangasarian 2 9 4.753590 9.507180 1570 +surfac 1 9 4.753590 4.753590 1574 +factor 1 9 4.753590 4.753590 1544 +desir 0 9 4.753590 0.000000 1542 +exact 0 9 4.753590 0.000000 1509 +classifi 0 9 4.753590 0.000000 1537 +hundr 0 9 4.753590 0.000000 1528 +correctli 0 9 4.753590 0.000000 1478 +russel 0 9 4.753590 0.000000 1507 +morgan 0 9 4.753590 0.000000 1484 +curv 1 8 4.875197 4.875197 1656 +mass 1 8 4.875197 4.875197 1732 +isol 1 8 4.875197 4.875197 1663 +textur 1 8 4.875197 4.875197 1677 +quantit 1 8 4.875197 4.875197 1654 +grew 0 8 4.875197 0.000000 1742 +judg 0 8 4.875197 0.000000 1644 +aaai 0 8 4.875197 0.000000 1750 +replac 0 8 4.875197 0.000000 1668 +angel 0 8 4.875197 0.000000 1779 +boundari 1 7 5.010635 5.010635 1929 +analyt 1 7 5.010635 5.010635 1913 +hunt 1 7 5.010635 5.010635 1798 +converg 0 7 5.010635 0.000000 1844 +smooth 0 7 5.010635 0.000000 1855 +densiti 0 7 5.010635 0.000000 1927 +ruth 0 7 5.010635 0.000000 1870 +chronicl 0 7 5.010635 0.000000 1952 +capit 0 7 5.010635 0.000000 1957 +nine 1 6 5.164786 5.164786 2047 +plane 1 6 5.164786 5.164786 2187 +olvi 0 6 5.164786 0.000000 2109 +onto 0 6 5.164786 0.000000 2089 +versu 0 6 5.164786 0.000000 2052 +averag 0 6 5.164786 0.000000 2098 +nuclear 1 5 5.347108 5.347108 2576 +medicin 1 5 5.347108 5.347108 2448 +began 0 5 5.347108 0.000000 2498 +highlight 0 5 5.347108 0.000000 2340 +cell 0 5 5.347108 0.000000 2274 +snake 0 5 5.347108 0.000000 2281 +accuraci 0 5 5.347108 0.000000 2450 +shift 0 5 5.347108 0.000000 2357 +kaufmann 0 5 5.347108 0.000000 2254 +houston 0 5 5.347108 0.000000 2460 +diagnosi 2 4 5.568345 11.136690 3027 +surviv 1 4 5.568345 5.568345 2734 +aspir 1 4 5.568345 5.568345 3019 +popular 1 4 5.568345 5.568345 2802 +biomed 1 4 5.568345 5.568345 2905 +writer 0 4 5.568345 0.000000 2783 +sole 0 4 5.568345 0.000000 2592 +assess 0 4 5.568345 0.000000 2724 +kristin 0 4 5.568345 0.000000 3089 +prospect 0 4 5.568345 0.000000 3013 +cancer 2 3 5.857933 11.715866 4032 +breast 2 3 5.857933 11.715866 4033 +recurr 1 3 5.857933 5.857933 3740 +diagnos 1 3 5.857933 5.857933 3968 +diseas 1 3 5.857933 5.857933 3635 +surgeri 1 3 5.857933 5.857933 3975 +citat 1 3 5.857933 5.857933 3617 +bennett 0 3 5.857933 0.000000 4024 +microscop 0 3 5.857933 0.000000 4035 +confid 0 3 5.857933 0.000000 3691 +pain 0 3 5.857933 0.000000 3460 +chronolog 0 3 5.857933 0.000000 4034 +man 0 3 5.857933 0.000000 3417 +detroit 0 3 5.857933 0.000000 3565 +paulb 0 3 5.857933 0.000000 4036 +wolberg 2 2 6.263398 12.526796 6218 +prognosi 2 2 6.263398 12.526796 6222 +patient 1 2 6.263398 6.263398 6223 +benign 1 2 6.263398 6.263398 4893 +prognost 1 2 6.263398 6.263398 6221 +plot 1 2 6.263398 6.263398 4236 +milwauke 1 2 6.263398 6.263398 5797 +rudi 0 2 6.263398 0.000000 5487 +multisurfac 0 2 6.263398 0.000000 6224 +ofthi 0 2 6.263398 0.000000 5836 +grabber 0 2 6.263398 0.000000 5521 +nucleu 0 2 6.263398 0.000000 4302 +thenorm 0 2 6.263398 0.000000 4412 +ofvari 0 2 6.263398 0.000000 4582 +twelfth 0 2 6.263398 0.000000 5035 +icml 0 2 6.263398 0.000000 5669 +prime 0 2 6.263398 0.000000 6099 +computer 0 2 6.263398 0.000000 6219 +cope 0 2 6.263398 0.000000 6050 +nevada 0 2 6.263398 0.000000 4875 +malign 2 1 6.957497 13.914994 19350 +nuclei 1 1 6.957497 6.957497 19351 +cytolog 1 1 6.957497 6.957497 19352 +biopsi 1 1 6.957497 6.957497 19353 +oncolog 1 1 6.957497 6.957497 19354 +needl 1 1 6.957497 6.957497 19355 +xcyt 1 1 6.957497 6.957497 19356 +setiono 1 1 6.957497 6.957497 19357 +ofeach 1 1 6.957497 6.957497 19358 +ofdiseas 1 1 6.957497 6.957497 19359 +lymph 1 1 6.957497 6.957497 19360 +histolog 1 1 6.957497 6.957497 19361 +heisei 1 1 6.957497 6.957497 19362 +prognosismachin 0 1 6.957497 0.000000 19363 +prognosisthi 0 1 6.957497 0.000000 19364 +learningapproach 0 1 6.957497 0.000000 19365 +ofbreast 0 1 6.957497 0.000000 19366 +betweenprof 0 1 6.957497 0.000000 19367 +anddr 0 1 6.957497 0.000000 19368 +wolbergof 0 1 6.957497 0.000000 19369 +thepress 0 1 6.957497 0.000000 19370 +inmarch 0 1 6.957497 0.000000 19371 +linksdiagnosisthi 0 1 6.957497 0.000000 19372 +diagnosebreast 0 1 6.957497 0.000000 19373 +heidentifi 0 1 6.957497 0.000000 19374 +consideredrelev 0 1 6.957497 0.000000 19375 +andtwo 0 1 6.957497 0.000000 19376 +aclassifi 0 1 6.957497 0.000000 19377 +thatsuccessfulli 0 1 6.957497 0.000000 19378 +iswel 0 1 6.957497 0.000000 19379 +streetto 0 1 6.957497 0.000000 19380 +adigit 0 1 6.957497 0.000000 19381 +consolid 0 1 6.957497 0.000000 19382 +clinicalpractic 0 1 6.957497 0.000000 19383 +thenmount 0 1 6.957497 0.000000 19384 +stain 0 1 6.957497 0.000000 19385 +cellularnuclei 0 1 6.957497 0.000000 19386 +arewel 0 1 6.957497 0.000000 19387 +afram 0 1 6.957497 0.000000 19388 +mous 0 1 6.957497 0.000000 19389 +showingxcyt 0 1 6.957497 0.000000 19390 +thisfas 0 1 6.957497 0.000000 19391 +standarderror 0 1 6.957497 0.000000 19392 +wasconstruct 0 1 6.957497 0.000000 19393 +thisclassifi 0 1 6.957497 0.000000 19394 +threeof 0 1 6.957497 0.000000 19395 +bayesiancomput 0 1 6.957497 0.000000 19396 +thesedens 0 1 6.957497 0.000000 19397 +consecut 0 1 6.957497 0.000000 19398 +newpati 0 1 6.957497 0.000000 19399 +didxcyt 0 1 6.957497 0.000000 19400 +suspici 0 1 6.957497 0.000000 19401 +estimatedprob 0 1 6.957497 0.000000 19402 +goodtest 0 1 6.957497 0.000000 19403 +petsegment 0 1 6.957497 0.000000 19404 +inthes 0 1 6.957497 0.000000 19405 +prognosisth 0 1 6.957497 0.000000 19406 +haveapproach 0 1 6.957497 0.000000 19407 +inputfeatur 0 1 6.957497 0.000000 19408 +atim 0 1 6.957497 0.000000 19409 +censor 0 1 6.957497 0.000000 19410 +linearprogram 0 1 6.957497 0.000000 19411 +fornew 0 1 6.957497 0.000000 19412 +caseswith 0 1 6.957497 0.000000 19413 +anindividu 0 1 6.957497 0.000000 19414 +intoxcyt 0 1 6.957497 0.000000 19415 +ourorigin 0 1 6.957497 0.000000 19416 +thereforeha 0 1 6.957497 0.000000 19417 +freeafter 0 1 6.957497 0.000000 19418 +xcytgiv 0 1 6.957497 0.000000 19419 +tumors 0 1 6.957497 0.000000 19420 +corrobor 0 1 6.957497 0.000000 19421 +axillari 0 1 6.957497 0.000000 19422 +bibliographylink 0 1 6.957497 0.000000 19423 +notlink 0 1 6.957497 0.000000 19424 +patholog 0 1 6.957497 0.000000 19425 +priediti 0 1 6.957497 0.000000 19426 +teagu 0 1 6.957497 0.000000 19427 +indetermin 0 1 6.957497 0.000000 19428 +imit 0 1 6.957497 0.000000 19429 +sentinel 0 1 6.957497 0.000000 19430 +marilynn 0 1 6.957497 0.000000 19431 +marchion 0 1 6.957497 0.000000 19432 +sorel 0 1 6.957497 0.000000 19433 +surgic 0 1 6.957497 0.000000 19434 +column 0 1 6.957497 0.000000 19435 +schooloth 0 1 6.957497 0.000000 19436 +oncolink 0 1 6.957497 0.000000 19437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..8381e192 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +work 0 380 0.693147 0.000000 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +inform 0 412 0.693147 0.000000 8 +us 1 329 1.098612 1.098612 16 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +softwar 0 220 1.386294 0.000000 30 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +assign 1 135 1.945910 1.945910 66 +construct 1 139 1.945910 1.945910 82 +problem 0 147 1.945910 0.000000 75 +process 0 142 1.945910 0.000000 72 +file 0 132 1.945910 0.000000 70 +confer 0 126 2.079442 0.000000 100 +mathemat 1 108 2.197225 2.197225 123 +well 0 109 2.197225 0.000000 121 +theori 0 111 2.197225 0.000000 127 +follow 1 92 2.397895 2.397895 143 +section 0 94 2.397895 0.000000 149 +proceed 0 93 2.397895 0.000000 152 +contain 1 81 2.484907 2.484907 174 +learn 0 86 2.484907 0.000000 170 +requir 0 81 2.484907 0.000000 167 +ieee 0 86 2.484907 0.000000 190 +novemb 0 81 2.484907 0.000000 179 +journal 0 83 2.484907 0.000000 183 +method 2 80 2.564949 5.129898 213 +optim 1 79 2.564949 2.564949 197 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +solv 0 73 2.639057 0.000000 234 +appli 0 71 2.639057 0.000000 226 +intellig 0 72 2.639057 0.000000 225 +view 1 70 2.708050 2.708050 254 +goal 0 66 2.708050 0.000000 250 +artifici 1 63 2.772589 2.772589 280 +street 0 63 2.772589 0.000000 293 +descript 0 64 2.772589 0.000000 271 +best 0 59 2.833213 0.000000 299 +point 2 58 2.890372 5.780744 319 +space 1 57 2.890372 2.890372 310 +found 0 53 2.944439 0.000000 337 +particular 0 51 2.995732 0.000000 352 +much 0 52 2.995732 0.000000 349 +set 2 50 3.044522 6.089044 361 +approach 1 48 3.044522 3.044522 366 +numer 0 49 3.044522 0.000000 369 +could 0 46 3.091042 0.000000 383 +describ 1 45 3.135494 3.135494 400 +anoth 0 45 3.135494 0.000000 408 +better 0 45 3.135494 0.000000 401 +long 0 43 3.178054 0.000000 413 +linear 2 41 3.218876 6.437752 431 +howev 0 41 3.218876 0.000000 422 +error 0 40 3.258097 0.000000 449 +transact 0 39 3.258097 0.000000 438 +societi 0 40 3.258097 0.000000 456 +paul 1 38 3.295837 3.295837 471 +close 0 38 3.295837 0.000000 465 +tree 1 36 3.367296 3.367296 492 +procedur 0 36 3.367296 0.000000 488 +bibliographi 0 34 3.401197 0.000000 518 +concept 0 32 3.465736 0.000000 537 +neural 1 30 3.555348 3.555348 578 +packag 1 28 3.610918 3.610918 614 +determin 0 27 3.637586 0.000000 630 +pattern 1 24 3.761200 3.761200 689 +reach 0 24 3.761200 0.000000 688 +togeth 1 23 3.806662 3.806662 714 +sequenc 1 23 3.806662 3.806662 734 +decis 1 23 3.806662 3.806662 728 +avoid 0 21 3.912023 0.000000 799 +separ 2 19 4.007333 8.014666 844 +region 1 19 4.007333 4.007333 875 +mostli 1 19 4.007333 4.007333 869 +histori 0 19 4.007333 0.000000 853 +dimension 1 18 4.060443 4.060443 909 +minim 1 18 4.060443 4.060443 887 +stop 0 17 4.110874 0.000000 942 +otherwis 0 17 4.110874 0.000000 922 +layer 0 17 4.110874 0.000000 926 +brief 0 16 4.174387 0.000000 1001 +choos 0 16 4.174387 0.000000 964 +advantag 0 16 4.174387 0.000000 987 +cognit 0 16 4.174387 0.000000 986 +side 1 15 4.248495 4.248495 1022 +nonlinear 1 14 4.317488 4.317488 1107 +finit 1 14 4.317488 4.317488 1106 +split 1 14 4.317488 4.317488 1078 +matlab 1 14 4.317488 4.317488 1081 +polynomi 0 14 4.317488 0.000000 1069 +shown 0 14 4.317488 0.000000 1080 +train 0 14 4.317488 0.000000 1066 +difficulti 0 13 4.382027 0.000000 1132 +nick 0 13 4.382027 0.000000 1180 +robust 0 12 4.465908 0.000000 1271 +node 1 11 4.553877 4.553877 1326 +branch 0 11 4.553877 0.000000 1318 +faster 0 11 4.553877 0.000000 1323 +surfac 1 9 4.753590 4.753590 1574 +mangasarian 1 9 4.753590 4.753590 1570 +distanc 0 9 4.753590 0.000000 1500 +formul 0 8 4.875197 0.000000 1733 +paramet 1 7 5.010635 5.010635 1796 +whenev 0 7 5.010635 0.000000 1883 +plane 2 6 5.164786 10.329572 2187 +variant 0 6 5.164786 0.000000 2043 +averag 0 6 5.164786 0.000000 2098 +hidden 0 6 5.164786 0.000000 1987 +proce 0 6 5.164786 0.000000 2114 +li 1 5 5.347108 5.347108 2500 +bradlei 1 5 5.347108 5.347108 2554 +disjoint 1 4 5.568345 5.568345 2709 +repeat 0 4 5.568345 0.000000 2798 +kristin 0 4 5.568345 0.000000 3089 +bennett 1 3 5.857933 5.857933 4024 +todetermin 0 3 5.857933 0.000000 3182 +similarli 0 3 5.857933 0.000000 3241 +backpropag 0 3 5.857933 0.000000 3507 +chronolog 0 3 5.857933 0.000000 4034 +paulb 0 3 5.857933 0.000000 4036 +multisurfac 1 2 6.263398 6.263398 6224 +linearli 1 2 6.263398 6.263398 6220 +euclidean 1 2 6.263398 6.263398 5198 +quadrat 0 2 6.263398 0.000000 4497 +oneset 0 2 6.263398 0.000000 6134 +cart 0 2 6.263398 0.000000 5874 +mino 0 2 6.263398 0.000000 6208 +midwest 0 2 6.263398 0.000000 6225 +discrimin 0 2 6.263398 0.000000 6140 +misclassifi 1 1 6.957497 6.957497 19438 +euclideanspac 1 1 6.957497 6.957497 19439 +programmingpattern 0 1 6.957497 0.000000 19440 +programmingthi 0 1 6.957497 0.000000 19441 +outlinemathemat 0 1 6.957497 0.000000 19442 +failon 0 1 6.957497 0.000000 19443 +discard 0 1 6.957497 0.000000 19444 +eachnod 0 1 6.957497 0.000000 19445 +thesam 0 1 6.957497 0.000000 19446 +astrain 0 1 6.957497 0.000000 19447 +traditionallearn 0 1 6.957497 0.000000 19448 +inthat 0 1 6.957497 0.000000 19449 +insepar 0 1 6.957497 0.000000 19450 +orsa 0 1 6.957497 0.000000 19451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..7abfd9e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +inform 0 412 0.693147 0.000000 8 +project 2 340 1.098612 2.197224 18 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +email 0 220 1.386294 0.000000 29 +paper 0 205 1.609438 0.000000 38 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +parallel 1 169 1.791759 1.791759 60 +recent 0 167 1.791759 0.000000 58 +develop 0 174 1.791759 0.000000 53 +distribut 0 162 1.791759 0.000000 51 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +perform 1 143 1.945910 1.945910 74 +relat 0 139 1.945910 0.000000 68 +tool 1 117 2.079442 2.079442 93 +report 0 131 2.079442 0.000000 92 +high 0 130 2.079442 0.000000 101 +dayton 0 119 2.079442 0.000000 104 +version 0 113 2.197225 0.000000 122 +technic 0 100 2.302585 0.000000 140 +access 0 102 2.302585 0.000000 136 +present 1 91 2.397895 2.397895 145 +contain 0 81 2.484907 0.000000 174 +build 0 85 2.484907 0.000000 184 +level 0 87 2.484907 0.000000 180 +west 0 83 2.484907 0.000000 192 +meet 1 72 2.639057 2.639057 229 +symposium 0 72 2.639057 0.000000 238 +copi 0 63 2.772589 0.000000 284 +explor 0 58 2.890372 0.000000 324 +tabl 0 51 2.995732 0.000000 346 +made 1 44 3.135494 3.135494 398 +describ 0 45 3.135494 0.000000 400 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +staff 0 36 3.367296 0.000000 490 +common 0 30 3.555348 0.000000 574 +releas 0 28 3.610918 0.000000 616 +symbol 0 27 3.637586 0.000000 620 +effort 0 26 3.688879 0.000000 652 +scalabl 0 24 3.761200 0.000000 705 +hypertext 0 19 4.007333 0.000000 865 +statu 0 18 4.060443 0.000000 885 +sigmetr 0 13 4.382027 0.000000 1173 +arpa 1 11 4.553877 4.553877 1369 +paradyn 2 9 4.753590 9.507180 1614 +routin 0 9 4.753590 0.000000 1549 +bart 0 9 4.753590 0.000000 1559 +poster 0 7 5.010635 0.000000 1814 +antonio 0 6 5.164786 0.000000 2186 +restaur 0 6 5.164786 0.000000 2230 +temporari 0 6 5.164786 0.000000 2090 +panel 0 5 5.347108 0.000000 2463 +elsewher 0 5 5.347108 0.000000 2444 +super 1 3 5.857933 5.857933 3918 +insan 0 3 5.857933 0.000000 4006 +parallellanguag 0 3 5.857933 0.000000 4026 +informationthi 0 2 6.263398 0.000000 5477 +ofreleas 0 2 6.263398 0.000000 4860 +newapproach 0 2 6.263398 0.000000 6047 +blizzard 0 2 6.263398 0.000000 6226 +projectdepart 0 2 6.263398 0.000000 6125 +edufax 0 2 6.263398 0.000000 5479 +csto 1 1 6.957497 6.957497 19452 +presentationthi 1 1 6.957497 6.957497 19453 +goalsth 0 1 6.957497 0.000000 19454 +manualsstatu 0 1 6.957497 0.000000 19455 +reporta 0 1 6.957497 0.000000 19456 +inflorida 0 1 6.957497 0.000000 19457 +tocompil 0 1 6.957497 0.000000 19458 +postera 0 1 6.957497 0.000000 19459 +spdt 0 1 6.957497 0.000000 19460 +toolsyou 0 1 6.957497 0.000000 19461 +placehold 0 1 6.957497 0.000000 19462 +informationparadyn 0 1 6.957497 0.000000 19463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..372506bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +mail 0 238 1.386294 0.000000 22 +modifi 0 178 1.609438 0.000000 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +area 0 144 1.945910 0.000000 80 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +send 0 114 2.197225 0.000000 109 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +numer 0 49 3.044522 0.000000 369 +math 0 44 3.135494 0.000000 402 +steven 1 17 4.110874 4.110874 953 +employ 0 12 4.465908 0.000000 1291 +depth 0 8 4.875197 0.000000 1636 +parker 1 1 6.957497 6.957497 19464 +prism 1 1 6.957497 6.957497 19465 +projectfal 0 1 6.957497 0.000000 19466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..06b7c5c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +paper 0 205 1.609438 0.000000 38 +modifi 0 178 1.609438 0.000000 35 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +area 0 144 1.945910 0.000000 80 +professor 0 137 1.945910 0.000000 76 +file 0 132 1.945910 0.000000 70 +click 0 142 1.945910 0.000000 78 +process 0 142 1.945910 0.000000 72 +postscript 1 131 2.079442 2.079442 90 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +specif 0 106 2.197225 0.000000 106 +pleas 0 113 2.197225 0.000000 114 +site 0 106 2.197225 0.000000 119 +technic 1 100 2.302585 2.302585 140 +techniqu 0 99 2.302585 0.000000 138 +text 0 98 2.302585 0.000000 133 +select 0 91 2.397895 0.000000 154 +present 0 91 2.397895 0.000000 145 +learn 0 86 2.484907 0.000000 170 +journal 0 83 2.484907 0.000000 183 +decemb 0 80 2.564949 0.000000 215 +summari 0 73 2.639057 0.000000 237 +street 1 63 2.772589 2.772589 293 +abstract 1 62 2.772589 2.772589 276 +guid 0 63 2.772589 0.000000 267 +march 0 61 2.833213 0.000000 295 +advisor 0 51 2.995732 0.000000 355 +format 0 48 3.044522 0.000000 356 +done 0 47 3.091042 0.000000 381 +featur 0 46 3.091042 0.000000 386 +netscap 0 44 3.135494 0.000000 395 +linear 0 41 3.218876 0.000000 431 +submit 0 39 3.258097 0.000000 440 +paul 1 38 3.295837 3.295837 471 +download 0 36 3.367296 0.000000 489 +print 0 34 3.401197 0.000000 503 +eduoffic 0 33 3.433987 0.000000 531 +neural 0 30 3.555348 0.000000 578 +cluster 0 28 3.610918 0.000000 612 +revis 0 26 3.688879 0.000000 640 +store 0 24 3.761200 0.000000 693 +viewer 0 21 3.912023 0.000000 787 +minim 0 18 4.060443 0.000000 887 +accept 0 18 4.060443 0.000000 879 +ascii 0 15 4.248495 0.000000 1032 +nonlinear 0 14 4.317488 0.000000 1107 +nick 1 13 4.382027 4.382027 1180 +nasa 0 13 4.382027 0.000000 1188 +induct 0 11 4.553877 0.000000 1304 +mangasarian 1 9 4.753590 4.753590 1570 +dead 0 7 5.010635 0.000000 1840 +fish 1 6 5.164786 5.164786 2207 +bradlei 1 5 5.347108 5.347108 2554 +shift 0 5 5.347108 0.000000 2357 +frog 0 5 5.347108 0.000000 2479 +concav 0 4 5.568345 0.000000 2808 +paulb 1 3 5.857933 5.857933 4036 +csphone 0 3 5.857933 0.000000 3394 +backcountri 0 3 5.857933 0.000000 3686 +espnet 0 2 6.263398 0.000000 5634 +bradleygradu 0 1 6.957497 0.000000 19467 +mangasarianinterestsmathemat 0 1 6.957497 0.000000 19468 +programmingmachin 0 1 6.957497 0.000000 19469 +learningfli 0 1 6.957497 0.000000 19470 +currentlyb 0 1 6.957497 0.000000 19471 +madisonmathemat 0 1 6.957497 0.000000 19472 +thiswork 0 1 6.957497 0.000000 19473 +olvimangasarian 0 1 6.957497 0.000000 19474 +publicationsal 0 1 6.957497 0.000000 19475 +picksthes 0 1 6.957497 0.000000 19476 +grate 0 1 6.957497 0.000000 19477 +timesfax 0 1 6.957497 0.000000 19478 +uroullett 0 1 6.957497 0.000000 19479 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..c31c49de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +also 0 259 1.386294 0.000000 28 +group 0 183 1.609438 0.000000 36 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +develop 1 174 1.791759 1.791759 53 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +recent 0 167 1.791759 0.000000 58 +year 0 148 1.945910 0.000000 84 +relat 0 139 1.945910 0.000000 68 +tool 1 117 2.079442 2.079442 93 +technolog 0 131 2.079442 0.000000 102 +studi 0 120 2.079442 0.000000 91 +intern 1 108 2.197225 2.197225 128 +site 1 106 2.197225 2.197225 119 +make 0 111 2.197225 0.000000 120 +look 0 107 2.197225 0.000000 115 +technic 0 100 2.302585 0.000000 140 +techniqu 0 99 2.302585 0.000000 138 +book 0 99 2.302585 0.000000 131 +imag 1 91 2.397895 2.397895 161 +present 0 91 2.397895 0.000000 145 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +journal 1 83 2.484907 2.484907 183 +thing 0 84 2.484907 0.000000 189 +help 0 83 2.484907 0.000000 175 +info 0 85 2.484907 0.000000 176 +appear 0 78 2.564949 0.000000 210 +issu 0 78 2.564949 0.000000 211 +resum 0 79 2.564949 0.000000 217 +decemb 0 80 2.564949 0.000000 215 +write 0 72 2.639057 0.000000 222 +servic 0 72 2.639057 0.000000 236 +involv 0 71 2.639057 0.000000 227 +tuesdai 0 73 2.639057 0.000000 219 +integr 0 67 2.708050 0.000000 245 +receiv 0 66 2.708050 0.000000 244 +prof 1 64 2.772589 2.772589 273 +laboratori 0 63 2.772589 0.000000 292 +creat 0 63 2.772589 0.000000 277 +guid 0 63 2.772589 0.000000 267 +experi 0 64 2.772589 0.000000 283 +room 0 59 2.833213 0.000000 301 +overview 0 56 2.890372 0.000000 323 +think 0 57 2.890372 0.000000 314 +major 0 56 2.890372 0.000000 315 +cover 0 55 2.944439 0.000000 329 +basic 0 50 3.044522 0.000000 360 +cool 0 49 3.044522 0.000000 374 +standard 0 48 3.044522 0.000000 365 +join 0 39 3.258097 0.000000 457 +multipl 0 39 3.258097 0.000000 453 +mean 0 37 3.332205 0.000000 477 +articl 1 33 3.433987 3.433987 530 +photo 0 31 3.496508 0.000000 561 +great 0 27 3.637586 0.000000 626 +team 0 27 3.637586 0.000000 625 +rather 0 26 3.688879 0.000000 642 +although 0 25 3.737670 0.000000 667 +lab 0 24 3.761200 0.000000 698 +inth 0 22 3.850148 0.000000 741 +tell 0 21 3.912023 0.000000 777 +toolkit 0 20 3.951244 0.000000 835 +eric 0 19 4.007333 0.000000 870 +excel 0 19 4.007333 0.000000 868 +dimension 0 18 4.060443 0.000000 909 +anywai 0 15 4.248495 0.000000 1047 +webmast 0 15 4.248495 0.000000 1045 +biologi 0 15 4.248495 0.000000 1049 +everyth 1 13 4.382027 4.382027 1169 +rest 0 12 4.465908 0.000000 1259 +guest 0 12 4.465908 0.000000 1220 +peter 0 11 4.553877 0.000000 1316 +sens 0 11 4.553877 0.000000 1305 +eight 0 11 4.553877 0.000000 1331 +label 0 10 4.653960 0.000000 1423 +star 0 8 4.875197 0.000000 1717 +scout 1 7 5.010635 5.010635 1903 +fortun 1 7 5.010635 5.010635 1872 +molecular 0 7 5.010635 0.000000 1887 +isthat 0 4 5.568345 0.000000 2723 +biomed 0 4 5.568345 0.000000 2905 +specialist 1 3 5.857933 5.857933 3319 +microscop 1 3 5.857933 5.857933 4035 +pete 0 3 5.857933 0.000000 3865 +devri 1 2 6.263398 6.263398 6145 +foolish 0 2 6.263398 0.000000 6108 +hazen 0 2 6.263398 0.000000 6143 +nearli 0 2 6.263398 0.000000 5608 +magellan 0 2 6.263398 0.000000 5825 +isdescrib 0 2 6.263398 0.000000 5444 +molecularbiolog 1 1 6.957497 6.957497 19480 +embryo 1 1 6.957497 6.957497 19481 +westdayton 0 1 6.957497 0.000000 19482 +pdevri 0 1 6.957497 0.000000 19483 +andthen 0 1 6.957497 0.000000 19484 +iread 0 1 6.957497 0.000000 19485 +topai 0 1 6.957497 0.000000 19486 +alsoprovid 0 1 6.957497 0.000000 19487 +folksat 0 1 6.957497 0.000000 19488 +microscopi 0 1 6.957497 0.000000 19489 +seancarrol 0 1 6.957497 0.000000 19490 +confoc 0 1 6.957497 0.000000 19491 +lotof 0 1 6.957497 0.000000 19492 +johnwhit 0 1 6.957497 0.000000 19493 +imrstaff 0 1 6.957497 0.000000 19494 +augustnd 0 1 6.957497 0.000000 19495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..77ebc028 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +program 0 374 0.693147 0.000000 7 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +class 1 199 1.609438 1.609438 37 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +contact 0 153 1.791759 0.000000 59 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +support 0 132 1.945910 0.000000 83 +architectur 0 139 1.945910 0.000000 77 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +check 0 115 2.197225 0.000000 118 +book 0 99 2.302585 0.000000 131 +need 0 98 2.302585 0.000000 135 +access 0 102 2.302585 0.000000 136 +stuff 1 87 2.484907 2.484907 171 +school 0 84 2.484907 0.000000 188 +state 0 76 2.564949 0.000000 207 +dept 0 64 2.772589 0.000000 291 +visit 0 63 2.772589 0.000000 288 +street 0 63 2.772589 0.000000 293 +major 0 56 2.890372 0.000000 315 +variou 0 56 2.890372 0.000000 317 +tabl 0 51 2.995732 0.000000 346 +friend 1 48 3.044522 3.044522 376 +life 0 50 3.044522 0.000000 375 +featur 0 46 3.091042 0.000000 386 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +origin 0 38 3.295837 0.000000 472 +industri 0 38 3.295837 0.000000 464 +either 0 35 3.401197 0.000000 506 +everi 0 34 3.401197 0.000000 519 +india 0 32 3.465736 0.000000 550 +though 0 27 3.637586 0.000000 622 +altern 0 26 3.688879 0.000000 641 +bookmark 0 26 3.688879 0.000000 639 +log 0 19 4.007333 0.000000 857 +north 0 19 4.007333 0.000000 873 +countri 0 15 4.248495 0.000000 1059 +galleri 0 13 4.382027 0.000000 1192 +stai 0 12 4.465908 0.000000 1215 +avenu 0 12 4.465908 0.000000 1277 +undergrad 0 9 4.753590 0.000000 1589 +kanpur 1 8 4.875197 4.875197 1744 +hack 0 7 5.010635 0.000000 1950 +gatewai 0 7 5.010635 0.000000 1942 +corner 0 7 5.010635 0.000000 1909 +rock 0 6 5.164786 0.000000 2164 +blue 0 6 5.164786 0.000000 2227 +chat 0 6 5.164786 0.000000 2128 +metal 0 4 5.568345 0.000000 3079 +randal 0 4 5.568345 0.000000 2776 +venkat 0 4 5.568345 0.000000 2702 +slave 1 3 5.857933 5.857933 3959 +kerala 0 3 5.857933 0.000000 3749 +assistantship 0 3 5.857933 0.000000 3660 +acad 0 3 5.857933 0.000000 3847 +icon 0 3 5.857933 0.000000 3362 +plakal 1 2 6.263398 6.263398 5568 +blah 0 2 6.263398 0.000000 5695 +geeki 0 2 6.263398 0.000000 5823 +iitk 0 2 6.263398 0.000000 6227 +snap 0 2 6.263398 0.000000 4962 +nerd 0 2 6.263398 0.000000 5231 +acknowledg 0 2 6.263398 0.000000 6062 +nifti 0 2 6.263398 0.000000 5504 +igor 0 2 6.263398 0.000000 6183 +ivanisev 0 2 6.263398 0.000000 6184 +calcutta 1 1 6.957497 6.957497 19496 +bosco 1 1 6.957497 6.957497 19497 +yumpe 0 1 6.957497 0.000000 19498 +manoj 0 1 6.957497 0.000000 19499 +universityofwisconsin 0 1 6.957497 0.000000 19500 +salesian 0 1 6.957497 0.000000 19501 +stare 0 1 6.957497 0.000000 19502 +barrel 0 1 6.957497 0.000000 19503 +nerdi 0 1 6.957497 0.000000 19504 +seealso 0 1 6.957497 0.000000 19505 +pinup 0 1 6.957497 0.000000 19506 +suresh 0 1 6.957497 0.000000 19507 +wisecrack 0 1 6.957497 0.000000 19508 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..ed253715 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 0 443 0.693147 0.000000 6 +research 0 431 0.693147 0.000000 10 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +area 0 144 1.945910 0.000000 80 +databas 1 122 2.079442 2.079442 86 +dayton 0 119 2.079442 0.000000 104 +schedul 0 119 2.079442 0.000000 85 +introduct 0 126 2.079442 0.000000 87 +theori 1 111 2.197225 2.197225 127 +world 0 115 2.197225 0.000000 126 +find 0 111 2.197225 0.000000 111 +sinc 0 90 2.397895 0.000000 159 +build 0 85 2.484907 0.000000 184 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +want 0 79 2.564949 0.000000 199 +meet 0 72 2.639057 0.000000 229 +java 0 70 2.708050 0.000000 248 +prof 0 64 2.772589 0.000000 273 +creat 0 63 2.772589 0.000000 277 +explor 0 58 2.890372 0.000000 324 +finger 0 52 2.995732 0.000000 354 +music 1 42 3.218876 3.218876 436 +download 0 36 3.367296 0.000000 489 +random 0 34 3.401197 0.000000 511 +india 0 32 3.465736 0.000000 550 +storag 0 31 3.496508 0.000000 553 +packag 0 28 3.610918 0.000000 614 +bookmark 0 26 3.688879 0.000000 639 +jeff 0 25 3.737670 0.000000 673 +hierarchi 0 22 3.850148 0.000000 744 +spend 0 19 4.007333 0.000000 850 +estim 0 17 4.110874 0.000000 930 +dilbert 0 16 4.174387 0.000000 996 +princeton 0 15 4.248495 0.000000 1042 +econom 0 13 4.382027 0.000000 1184 +vldb 1 10 4.653960 4.653960 1470 +naughton 0 10 4.653960 0.000000 1450 +presenc 0 8 4.875197 0.000000 1671 +prasad 1 6 5.164786 5.164786 2126 +invest 0 6 5.164786 0.000000 2153 +aggreg 0 6 5.164786 0.000000 2219 +deshpand 0 5 5.347108 0.000000 2431 +multidimension 0 4 5.568345 0.000000 3091 +meanwhil 0 3 5.857933 0.000000 3129 +manageri 0 2 6.263398 0.000000 5135 +constuct 0 1 6.957497 0.000000 19509 +depar 0 1 6.957497 0.000000 19510 +multidimensionalaggreg 0 1 6.957497 0.000000 19511 +timex 0 1 6.957497 0.000000 19512 +comix 0 1 6.957497 0.000000 19513 +hakuna 0 1 6.957497 0.000000 19514 +matata 0 1 6.957497 0.000000 19515 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..98a3a8f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +wisc 0 242 1.386294 0.000000 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +databas 0 122 2.079442 0.000000 86 +assist 0 112 2.197225 0.000000 113 +west 0 83 2.484907 0.000000 192 +help 0 83 2.484907 0.000000 175 +educ 0 86 2.484907 0.000000 191 +resum 0 79 2.564949 0.000000 217 +summari 0 73 2.639057 0.000000 237 +html 0 75 2.639057 0.000000 235 +street 0 63 2.772589 0.000000 293 +prof 0 64 2.772589 0.000000 273 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +advisor 0 51 2.995732 0.000000 355 +basic 0 50 3.044522 0.000000 360 +india 0 32 3.465736 0.000000 550 +yanni 0 8 4.875197 0.000000 1713 +ioannidi 0 8 4.875197 0.000000 1714 +asha 0 3 5.857933 0.000000 4037 +poosala 1 2 6.263398 6.263398 6228 +vishi 0 1 6.957497 0.000000 19516 +viswanath 0 1 6.957497 0.000000 19517 +reseach 0 1 6.957497 0.000000 19518 +voluntari 0 1 6.957497 0.000000 19519 +interestsuw 0 1 6.957497 0.000000 19520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..4b108036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +project 2 340 1.098612 2.197224 18 +us 2 329 1.098612 2.197224 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +current 0 284 1.098612 0.000000 21 +engin 0 297 1.098612 0.000000 20 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +design 0 213 1.386294 0.000000 25 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +modifi 0 178 1.609438 0.000000 35 +data 3 170 1.791759 5.375277 49 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +applic 0 170 1.791759 0.000000 56 +avail 0 169 1.791759 0.000000 48 +algorithm 0 162 1.791759 0.000000 57 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +relat 2 139 1.945910 3.891820 68 +model 2 145 1.945910 3.891820 69 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +databas 2 122 2.079442 4.158884 86 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +number 0 130 2.079442 0.000000 97 +analysi 0 124 2.079442 0.000000 98 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +theori 0 111 2.197225 0.000000 127 +find 0 111 2.197225 0.000000 111 +check 0 115 2.197225 0.000000 118 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +intern 0 108 2.197225 0.000000 128 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +memori 0 101 2.302585 0.000000 139 +part 0 98 2.302585 0.000000 129 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +peopl 0 96 2.302585 0.000000 132 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +call 1 91 2.397895 2.397895 153 +present 0 91 2.397895 0.000000 145 +commun 0 95 2.397895 0.000000 157 +associ 0 93 2.397895 0.000000 151 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +requir 1 81 2.484907 2.484907 167 +larg 0 82 2.484907 0.000000 168 +build 0 85 2.484907 0.000000 184 +contain 0 81 2.484907 0.000000 174 +ieee 0 86 2.484907 0.000000 190 +optim 2 79 2.564949 5.129898 197 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +issu 0 78 2.564949 0.000000 211 +decemb 0 80 2.564949 0.000000 215 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +order 2 69 2.708050 5.416100 249 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +goal 0 66 2.708050 0.000000 250 +integr 0 67 2.708050 0.000000 245 +knowledg 0 67 2.708050 0.000000 243 +practic 0 70 2.708050 0.000000 246 +evalu 1 64 2.772589 2.772589 266 +import 1 65 2.772589 2.772589 282 +complex 1 64 2.772589 2.772589 269 +plan 1 65 2.772589 2.772589 272 +collect 1 65 2.772589 2.772589 268 +previou 1 62 2.772589 2.772589 290 +result 1 65 2.772589 2.772589 281 +abstract 0 62 2.772589 0.000000 276 +descript 0 64 2.772589 0.000000 271 +function 0 62 2.772589 0.000000 275 +street 0 63 2.772589 0.000000 293 +content 0 59 2.833213 0.000000 302 +type 0 61 2.833213 0.000000 296 +march 0 61 2.833213 0.000000 295 +variou 1 56 2.890372 2.890372 317 +detail 1 57 2.890372 2.890372 321 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +sever 0 56 2.890372 0.000000 322 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 0 51 2.995732 0.000000 351 +set 1 50 3.044522 3.044522 361 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +could 1 46 3.091042 3.091042 383 +featur 0 46 3.091042 0.000000 386 +possibl 0 47 3.091042 0.000000 378 +answer 1 45 3.135494 3.135494 391 +natur 0 44 3.135494 0.000000 406 +algebra 0 45 3.135494 0.000000 394 +describ 0 45 3.135494 0.000000 400 +execut 0 45 3.135494 0.000000 404 +even 0 45 3.135494 0.000000 393 +combin 0 42 3.218876 0.000000 421 +howev 0 41 3.218876 0.000000 422 +cach 0 41 3.218876 0.000000 432 +examin 0 42 3.218876 0.000000 424 +form 1 39 3.258097 3.258097 443 +join 1 39 3.258097 3.258097 457 +map 1 39 3.258097 3.258097 452 +probabl 1 40 3.258097 3.258097 455 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +multipl 0 39 3.258097 0.000000 453 +submit 0 39 3.258097 0.000000 440 +mean 0 37 3.332205 0.000000 477 +cost 0 37 3.332205 0.000000 480 +multi 0 36 3.367296 0.000000 493 +next 1 34 3.401197 3.401197 517 +singl 1 34 3.401197 3.401197 510 +either 0 35 3.401197 0.000000 506 +michael 0 35 3.401197 0.000000 514 +queri 3 33 3.433987 10.301961 524 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +idea 0 32 3.465736 0.000000 545 +given 0 32 3.465736 0.000000 538 +extend 0 32 3.465736 0.000000 539 +transform 0 32 3.465736 0.000000 542 +posit 1 31 3.496508 3.496508 552 +storag 1 31 3.496508 3.496508 553 +scientist 0 31 3.496508 0.000000 560 +domain 1 30 3.555348 3.555348 564 +exist 1 30 3.555348 3.555348 569 +specifi 0 30 3.555348 0.000000 568 +built 1 29 3.583519 3.583519 592 +consid 0 29 3.583519 0.000000 590 +propos 1 28 3.610918 3.610918 602 +weather 0 28 3.610918 0.000000 618 +ask 0 28 3.610918 0.000000 597 +scale 0 28 3.610918 0.000000 613 +except 0 28 3.610918 0.000000 607 +framework 0 28 3.610918 0.000000 606 +client 1 25 3.737670 3.737670 679 +valu 0 25 3.737670 0.000000 665 +wai 0 25 3.737670 0.000000 662 +strategi 0 25 3.737670 0.000000 682 +demonstr 1 24 3.761200 3.761200 694 +store 1 24 3.761200 3.761200 693 +daili 0 24 3.761200 0.000000 706 +sequenc 3 23 3.806662 11.419986 734 +input 0 23 3.806662 0.000000 727 +thread 0 23 3.806662 0.000000 722 +defin 1 22 3.850148 3.850148 746 +sequenti 1 22 3.850148 3.850148 745 +sort 1 22 3.850148 3.850148 738 +serv 0 22 3.850148 0.000000 758 +identifi 0 22 3.850148 0.000000 760 +disk 0 22 3.850148 0.000000 747 +deal 0 22 3.850148 0.000000 736 +instead 0 22 3.850148 0.000000 756 +similar 1 21 3.912023 3.912023 771 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +record 2 18 4.060443 8.120886 890 +statu 0 18 4.060443 0.000000 885 +event 0 18 4.060443 0.000000 896 +account 0 18 4.060443 0.000000 882 +expand 1 17 4.110874 4.110874 928 +medic 0 17 4.110874 0.000000 958 +monitor 0 17 4.110874 0.000000 941 +weekli 0 17 4.110874 0.000000 919 +estim 0 17 4.110874 0.000000 930 +ramakrishnan 1 16 4.174387 4.174387 972 +advantag 1 16 4.174387 4.174387 987 +easi 0 16 4.174387 0.000000 969 +livni 1 15 4.248495 4.248495 1053 +indic 0 15 4.248495 0.000000 1013 +stream 0 15 4.248495 0.000000 1015 +miron 1 14 4.317488 4.317488 1110 +manner 1 14 4.317488 4.317488 1074 +embed 1 14 4.317488 4.317488 1102 +convent 0 14 4.317488 0.000000 1072 +econom 0 13 4.382027 0.000000 1184 +social 0 13 4.382027 0.000000 1123 +opportun 0 13 4.382027 0.000000 1161 +composit 0 13 4.382027 0.000000 1150 +step 0 13 4.382027 0.000000 1138 +front 0 13 4.382027 0.000000 1154 +raghu 1 12 4.465908 4.465908 1212 +scan 1 12 4.465908 4.465908 1243 +buffer 1 12 4.465908 4.465908 1211 +insid 1 12 4.465908 4.465908 1262 +amount 0 12 4.465908 0.000000 1208 +uniqu 0 12 4.465908 0.000000 1228 +shore 1 11 4.553877 4.553877 1377 +regard 0 11 4.553877 0.000000 1309 +motiv 0 11 4.553877 0.000000 1346 +instanc 0 11 4.553877 0.000000 1322 +devis 1 10 4.653960 4.653960 1451 +relationship 0 10 4.653960 0.000000 1383 +reli 0 10 4.653960 0.000000 1411 +subset 0 10 4.653960 0.000000 1425 +vldb 0 10 4.653960 0.000000 1470 +cheng 0 10 4.653960 0.000000 1381 +declar 1 9 4.753590 4.753590 1526 +tempor 1 9 4.753590 4.753590 1584 +strength 1 9 4.753590 4.753590 1494 +compos 0 9 4.753590 0.000000 1527 +vice 0 9 4.753590 0.000000 1604 +lock 0 9 4.753590 0.000000 1551 +respect 0 9 4.753590 0.000000 1545 +meta 0 9 4.753590 0.000000 1505 +intermedi 0 9 4.753590 0.000000 1497 +observ 0 9 4.753590 0.000000 1578 +mode 0 9 4.753590 0.000000 1492 +pose 0 9 4.753590 0.000000 1535 +seshadri 1 7 5.010635 5.010635 1803 +pageth 0 7 5.010635 0.000000 1939 +notion 0 7 5.010635 0.000000 1947 +merg 0 7 5.010635 0.000000 1862 +whenev 0 7 5.010635 0.000000 1883 +therefor 0 7 5.010635 0.000000 1822 +supportfor 0 7 5.010635 0.000000 1854 +praveen 1 6 5.164786 5.164786 1996 +nest 1 6 5.164786 5.164786 2151 +feasibl 1 6 5.164786 5.164786 2157 +financi 0 6 5.164786 0.000000 2197 +histor 0 6 5.164786 0.000000 2085 +consequ 0 6 5.164786 0.000000 1989 +temporari 0 6 5.164786 0.000000 2090 +greater 1 5 5.347108 5.347108 2258 +treat 0 5 5.347108 0.000000 2521 +correl 0 5 5.347108 0.000000 2279 +dual 0 5 5.347108 0.000000 2522 +distinct 0 5 5.347108 0.000000 2319 +overlap 0 5 5.347108 0.000000 2368 +complementari 0 5 5.347108 0.000000 2523 +educomput 0 5 5.347108 0.000000 2524 +zoom 1 4 5.568345 5.568345 2961 +phenomena 1 4 5.568345 5.568345 2962 +flavor 1 4 5.568345 5.568345 2625 +richter 0 4 5.568345 0.000000 2957 +collaps 1 3 5.857933 5.857933 3729 +inadequ 0 3 5.857933 0.000000 3730 +tediou 0 3 5.857933 0.000000 3731 +ineffici 0 3 5.857933 0.000000 3457 +megabyt 0 3 5.857933 0.000000 3732 +claus 0 3 5.857933 0.000000 3733 +offset 0 3 5.857933 0.000000 3467 +hourli 0 3 5.857933 0.000000 3734 +thathav 0 3 5.857933 0.000000 3735 +serverarchitectur 0 3 5.857933 0.000000 3736 +comad 0 3 5.857933 0.000000 3737 +informationfor 0 3 5.857933 0.000000 3738 +sequin 2 2 6.263398 12.526796 5250 +earthquak 1 2 6.263398 6.263398 5251 +volcano 1 2 6.263398 6.263398 5252 +meteorolog 1 2 6.263398 6.263398 5253 +aredescrib 1 2 6.263398 6.263398 5254 +objectivescurr 0 2 6.263398 0.000000 5255 +statusmotiv 0 2 6.263398 0.000000 5256 +exampleseq 0 2 6.263398 0.000000 5257 +languageoptim 0 2 6.263398 0.000000 5258 +techniquesseq 0 2 6.263398 0.000000 5259 +developmentpublicationsrel 0 2 6.263398 0.000000 5260 +workcontact 0 2 6.263398 0.000000 5261 +informationproject 0 2 6.263398 0.000000 5262 +processingof 0 2 6.263398 0.000000 5263 +theseappl 0 2 6.263398 0.000000 5264 +metereolog 0 2 6.263398 0.000000 5265 +andbiolog 0 2 6.263398 0.000000 5266 +semanticstak 0 2 6.263398 0.000000 5267 +evaluationintegr 0 2 6.263398 0.000000 5268 +canstor 0 2 6.263398 0.000000 5269 +sequencesthes 0 2 6.263398 0.000000 5270 +themost 0 2 6.263398 0.000000 5271 +statusth 0 2 6.263398 0.000000 5272 +algebraicqueri 0 2 6.263398 0.000000 5273 +analogousto 0 2 6.263398 0.000000 5274 +candeclar 0 2 6.263398 0.000000 5275 +likesql 0 2 6.263398 0.000000 5276 +versa 0 2 6.263398 0.000000 5277 +querya 0 2 6.263398 0.000000 5278 +occurr 0 2 6.263398 0.000000 5279 +erupt 0 2 6.263398 0.000000 5280 +didth 0 2 6.263398 0.000000 5281 +groupbi 0 2 6.263398 0.000000 5282 +subqueri 0 2 6.263398 0.000000 5283 +aggregatefunct 0 2 6.263398 0.000000 5284 +sequencesord 0 2 6.263398 0.000000 5285 +modelth 0 2 6.263398 0.000000 5286 +gist 0 2 6.263398 0.000000 5287 +ordereddomain 0 2 6.263398 0.000000 5288 +andposit 0 2 6.263398 0.000000 5289 +recordsmap 0 2 6.263398 0.000000 5290 +rise 0 2 6.263398 0.000000 5291 +relationaloper 0 2 6.263398 0.000000 5292 +andaggreg 0 2 6.263398 0.000000 5293 +researchersin 0 2 6.263398 0.000000 5294 +movingaggreg 0 2 6.263398 0.000000 5295 +worldsitu 0 2 6.263398 0.000000 5296 +extensionof 0 2 6.263398 0.000000 5297 +ofseq 0 2 6.263398 0.000000 5298 +languagew 0 2 6.263398 0.000000 5299 +usingwhich 0 2 6.263398 0.000000 5300 +languagei 0 2 6.263398 0.000000 5301 +queriesa 0 2 6.263398 0.000000 5302 +techniquesw 0 2 6.263398 0.000000 5303 +developmentth 0 2 6.263398 0.000000 5304 +viaa 0 2 6.263398 0.000000 5305 +ontop 0 2 6.263398 0.000000 5306 +languageswhich 0 2 6.263398 0.000000 5307 +arbitrarylevel 0 2 6.263398 0.000000 5308 +viceversa 0 2 6.263398 0.000000 5309 +detailson 0 2 6.263398 0.000000 5310 +publicationssequ 0 2 6.263398 0.000000 5311 +datapraveen 0 2 6.263398 0.000000 5312 +systempraveen 0 2 6.263398 0.000000 5313 +queriesraghu 0 2 6.263398 0.000000 5314 +workthedevis 0 2 6.263398 0.000000 5315 +visualizationenviron 0 2 6.263398 0.000000 5316 +servercontact 0 2 6.263398 0.000000 5317 +eduraghu 0 2 6.263398 0.000000 5318 +edumiron 0 2 6.263398 0.000000 5319 +seshadripraveen 0 2 6.263398 0.000000 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..9086464a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +class 1 199 1.609438 1.609438 37 +hour 0 165 1.791759 0.000000 46 +madison 0 165 1.791759 0.000000 55 +person 0 111 2.197225 0.000000 117 +school 1 84 2.484907 2.484907 188 +resum 0 79 2.564949 0.000000 217 +bookmark 0 26 3.688879 0.000000 639 +andrew 1 19 4.007333 4.007333 849 +histori 0 19 4.007333 0.000000 853 +vista 1 10 4.653960 4.653960 1452 +alta 1 4 5.568345 5.568345 3039 +prock 1 2 6.263398 6.263398 4786 +clemen 0 1 6.957497 0.000000 19521 +hockert 0 1 6.957497 0.000000 19522 +prockoffic 0 1 6.957497 0.000000 19523 +doonesburi 0 1 6.957497 0.000000 19524 +trot 0 1 6.957497 0.000000 19525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..1d876527 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +us 1 329 1.098612 1.098612 16 +current 0 284 1.098612 0.000000 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +washington 0 236 1.386294 0.000000 32 +also 0 259 1.386294 0.000000 28 +design 0 213 1.386294 0.000000 25 +public 0 202 1.609438 0.000000 43 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +theori 1 111 2.197225 2.197225 127 +well 0 109 2.197225 0.000000 121 +associ 0 93 2.397895 0.000000 151 +commun 0 95 2.397895 0.000000 157 +octob 0 89 2.397895 0.000000 156 +journal 1 83 2.484907 2.484907 183 +solut 0 82 2.484907 0.000000 162 +exampl 0 77 2.564949 0.000000 195 +complet 0 77 2.564949 0.000000 208 +state 0 76 2.564949 0.000000 207 +summari 0 73 2.639057 0.000000 237 +solv 0 73 2.639057 0.000000 234 +symposium 0 72 2.639057 0.000000 238 +goal 0 66 2.708050 0.000000 250 +practic 0 70 2.708050 0.000000 246 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +sever 0 56 2.890372 0.000000 322 +sampl 0 53 2.944439 0.000000 339 +februari 0 54 2.944439 0.000000 328 +much 0 52 2.995732 0.000000 349 +telephon 0 50 3.044522 0.000000 373 +understand 0 47 3.091042 0.000000 384 +long 0 43 3.178054 0.000000 413 +combin 0 42 3.218876 0.000000 421 +annual 0 40 3.258097 0.000000 458 +error 0 40 3.258097 0.000000 449 +game 0 36 3.367296 0.000000 498 +tree 0 36 3.367296 0.000000 492 +random 1 34 3.401197 3.401197 511 +approxim 1 35 3.401197 3.401197 509 +posit 0 31 3.496508 0.000000 552 +hard 1 30 3.555348 3.555348 563 +graph 1 30 3.555348 3.555348 576 +turn 0 29 3.583519 0.000000 586 +synchron 0 29 3.583519 0.000000 588 +progress 0 28 3.610918 0.000000 598 +bound 0 26 3.688879 0.000000 659 +although 0 25 3.737670 0.000000 667 +strategi 0 25 3.737670 0.000000 682 +proof 1 23 3.806662 3.806662 720 +identifi 0 22 3.850148 0.000000 760 +prove 0 19 4.007333 0.000000 848 +easi 0 16 4.174387 0.000000 969 +novel 0 15 4.248495 0.000000 1039 +polynomi 0 14 4.317488 0.000000 1069 +finit 0 14 4.317488 0.000000 1106 +automata 1 13 4.382027 4.382027 1135 +minimum 0 9 4.753590 0.000000 1555 +span 0 8 4.875197 0.000000 1751 +prover 0 8 4.875197 0.000000 1653 +round 0 8 4.875197 0.000000 1769 +aris 0 7 5.010635 0.000000 1924 +eduto 0 7 5.010635 0.000000 1956 +ann 1 6 5.164786 5.164786 2065 +ladner 0 6 5.164786 0.000000 2062 +pub 0 6 5.164786 0.000000 2239 +proceedingsof 0 5 5.347108 0.000000 2331 +provabl 0 5 5.347108 0.000000 2558 +surprisingli 0 4 5.568345 0.000000 2609 +expens 0 4 5.568345 0.000000 2678 +condon 1 3 5.857933 5.857933 3309 +neg 0 3 5.857933 0.000000 3451 +theoryand 0 3 5.857933 0.000000 3350 +nondeterminist 0 3 5.857933 0.000000 3560 +wigderson 0 2 6.263398 0.000000 6035 +complexityclass 0 1 6.957497 0.000000 19526 +interactiveproof 0 1 6.957497 0.000000 19527 +nondetermin 0 1 6.957497 0.000000 19528 +suchmodel 0 1 6.957497 0.000000 19529 +proven 0 1 6.957497 0.000000 19530 +classicproblem 0 1 6.957497 0.000000 19531 +theoryof 0 1 6.957497 0.000000 19532 +computationalproblem 0 1 6.957497 0.000000 19533 +whichhard 0 1 6.957497 0.000000 19534 +recentresult 0 1 6.957497 0.000000 19535 +modelsof 0 1 6.957497 0.000000 19536 +approximabilityresult 0 1 6.957497 0.000000 19537 +developingboth 0 1 6.957497 0.000000 19538 +hardcombinatori 0 1 6.957497 0.000000 19539 +forsort 0 1 6.957497 0.000000 19540 +costscan 0 1 6.957497 0.000000 19541 +probabilisticst 0 1 6.957497 0.000000 19542 +hellerstein 0 1 6.957497 0.000000 19543 +pottl 0 1 6.957497 0.000000 19544 +pspace 0 1 6.957497 0.000000 19545 +caiand 0 1 6.957497 0.000000 19546 +lipton 0 1 6.957497 0.000000 19547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..ff1c5b47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +time 1 293 1.098612 1.098612 17 +last 0 314 1.098612 0.000000 14 +project 0 340 1.098612 0.000000 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 0 213 1.386294 0.000000 25 +class 1 199 1.609438 1.609438 37 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +area 1 144 1.945910 1.945910 80 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +year 0 148 1.945910 0.000000 84 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +analysi 0 124 2.079442 0.000000 98 +report 0 131 2.079442 0.000000 92 +structur 1 106 2.197225 2.197225 105 +mathemat 1 108 2.197225 2.197225 123 +theori 0 111 2.197225 0.000000 127 +specif 0 106 2.197225 0.000000 106 +techniqu 1 99 2.302585 2.302585 138 +question 1 91 2.397895 2.397895 141 +proceed 1 93 2.397895 2.397895 152 +associ 0 93 2.397895 0.000000 151 +octob 0 89 2.397895 0.000000 156 +larg 0 82 2.484907 0.000000 168 +method 1 80 2.564949 2.564949 213 +dynam 0 76 2.564949 0.000000 194 +appli 0 71 2.639057 0.000000 226 +logic 0 71 2.639057 0.000000 230 +summari 0 73 2.639057 0.000000 237 +addit 0 74 2.639057 0.000000 228 +degre 0 69 2.708050 0.000000 259 +complex 1 64 2.772589 2.772589 269 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +automat 0 61 2.833213 0.000000 306 +sampl 0 53 2.944439 0.000000 339 +local 0 55 2.944439 0.000000 334 +set 1 50 3.044522 3.044522 361 +telephon 0 50 3.044522 0.000000 373 +theoret 0 39 3.258097 0.000000 446 +littl 0 39 3.258097 0.000000 454 +error 0 40 3.258097 0.000000 449 +formal 0 37 3.332205 0.000000 478 +global 0 34 3.401197 0.000000 520 +obtain 0 33 3.433987 0.000000 534 +graph 0 30 3.555348 0.000000 576 +scale 0 28 3.610918 0.000000 613 +great 0 27 3.637586 0.000000 626 +effort 0 26 3.688879 0.000000 652 +detect 0 26 3.688879 0.000000 646 +concern 1 25 3.737670 3.737670 666 +known 0 24 3.761200 0.000000 702 +sequenc 1 23 3.806662 3.806662 734 +proof 1 23 3.806662 3.806662 720 +properti 1 22 3.850148 3.850148 749 +geometri 1 22 3.850148 3.850148 752 +deal 0 22 3.850148 0.000000 736 +inth 0 22 3.850148 0.000000 741 +springer 0 22 3.850148 0.000000 750 +verlag 0 22 3.850148 0.000000 751 +similar 1 21 3.912023 3.912023 771 +fact 0 21 3.912023 0.000000 780 +util 0 21 3.912023 0.000000 774 +whether 0 17 4.110874 0.000000 918 +young 0 16 4.174387 0.000000 991 +spars 0 16 4.174387 0.000000 989 +fourth 0 16 4.174387 0.000000 999 +biologi 1 15 4.248495 4.248495 1049 +decid 0 14 4.317488 0.000000 1075 +polynomi 0 14 4.317488 0.000000 1069 +anonym 0 14 4.317488 0.000000 1100 +incomput 0 14 4.317488 0.000000 1096 +necessari 0 13 4.382027 0.000000 1147 +primarili 0 13 4.382027 0.000000 1185 +discret 0 13 4.382027 0.000000 1165 +assembl 0 12 4.465908 0.000000 1207 +weight 0 12 4.465908 0.000000 1204 +purdu 0 10 4.653960 0.000000 1466 +rapid 0 10 4.653960 0.000000 1453 +eduto 0 7 5.010635 0.000000 1956 +biolog 0 6 5.164786 0.000000 2147 +determinist 0 6 5.164786 0.000000 2034 +pub 0 6 5.164786 0.000000 2239 +joseph 1 5 5.347108 5.347108 2327 +twenti 0 5 5.347108 0.000000 2540 +despit 0 5 5.347108 0.000000 2317 +tiwari 0 5 5.347108 0.000000 2385 +gone 0 4 5.568345 0.000000 3072 +resolv 0 4 5.568345 0.000000 2675 +algorithmsfor 0 4 5.568345 0.000000 2748 +genom 1 3 5.857933 5.857933 3546 +collaps 0 3 5.857933 0.000000 3729 +ninth 0 3 5.857933 0.000000 3616 +soar 0 3 5.857933 0.000000 3506 +adequaci 0 2 6.263398 0.000000 6229 +fragment 0 2 6.263398 0.000000 6000 +homolog 0 2 6.263398 0.000000 5441 +analysisof 0 2 6.263398 0.000000 4277 +deborah 1 1 6.957497 6.957497 19548 +studyingth 0 1 6.957497 0.000000 19549 +andnondeterminist 0 1 6.957497 0.000000 19550 +stillknow 0 1 6.957497 0.000000 19551 +computerscientist 0 1 6.957497 0.000000 19552 +techniquesfor 0 1 6.957497 0.000000 19553 +investigatesth 0 1 6.957497 0.000000 19554 +exploresin 0 1 6.957497 0.000000 19555 +resolveproblem 0 1 6.957497 0.000000 19556 +theseinclud 0 1 6.957497 0.000000 19557 +handlingrepetit 0 1 6.957497 0.000000 19558 +graphtheoret 0 1 6.957497 0.000000 19559 +subexponenti 0 1 6.957497 0.000000 19560 +pruim 0 1 6.957497 0.000000 19561 +theoryconfer 0 1 6.957497 0.000000 19562 +spanner 0 1 6.957497 0.000000 19563 +althof 0 1 6.957497 0.000000 19564 +dobkin 0 1 6.957497 0.000000 19565 +meidanisand 0 1 6.957497 0.000000 19566 +scandinavianworkshop 0 1 6.957497 0.000000 19567 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..01745594 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +design 0 213 1.386294 0.000000 25 +gener 0 220 1.386294 0.000000 27 +languag 0 227 1.386294 0.000000 26 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +analysi 0 124 2.079442 0.000000 98 +schedul 0 119 2.079442 0.000000 85 +studi 0 120 2.079442 0.000000 91 +report 0 131 2.079442 0.000000 92 +manag 1 114 2.197225 2.197225 125 +well 0 109 2.197225 0.000000 121 +techniqu 0 99 2.302585 0.000000 138 +access 0 102 2.302585 0.000000 136 +proceed 1 93 2.397895 2.397895 152 +sinc 0 90 2.397895 0.000000 159 +graphic 0 90 2.397895 0.000000 147 +octob 0 89 2.397895 0.000000 156 +resourc 1 81 2.484907 2.484907 172 +institut 0 84 2.484907 0.000000 187 +journal 0 83 2.484907 0.000000 183 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +simul 1 66 2.708050 2.708050 255 +differ 0 66 2.708050 0.000000 253 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +foundat 0 62 2.772589 0.000000 286 +creat 0 63 2.772589 0.000000 277 +type 0 61 2.833213 0.000000 296 +juli 0 60 2.833213 0.000000 305 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +sampl 0 53 2.944439 0.000000 339 +telephon 0 50 3.044522 0.000000 373 +visual 0 48 3.044522 0.000000 372 +join 0 39 3.258097 0.000000 457 +error 0 40 3.258097 0.000000 449 +purpos 0 37 3.332205 0.000000 481 +queri 0 33 3.433987 0.000000 524 +consid 0 29 3.583519 0.000000 590 +synchron 0 29 3.583519 0.000000 588 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +disk 1 22 3.850148 3.850148 747 +emphasi 0 22 3.850148 0.000000 755 +properti 0 22 3.850148 0.000000 749 +util 0 21 3.912023 0.000000 774 +sigmod 0 19 4.007333 0.000000 877 +event 1 18 4.060443 4.060443 896 +ramakrishnan 0 16 4.174387 0.000000 972 +livni 1 15 4.248495 4.248495 1053 +novel 0 15 4.248495 0.000000 1039 +miron 1 14 4.317488 4.317488 1110 +discret 1 13 4.382027 4.382027 1165 +sigmetr 0 13 4.382027 0.000000 1173 +emploi 0 12 4.465908 0.000000 1284 +israel 0 11 4.553877 0.000000 1366 +ioannidi 0 8 4.875197 0.000000 1714 +eduto 0 7 5.010635 0.000000 1956 +schema 0 6 5.164786 0.000000 1988 +pub 0 6 5.164786 0.000000 2239 +tape 1 4 5.568345 5.568345 2959 +weizmann 0 4 5.568345 0.000000 2858 +evaluationof 0 3 5.857933 0.000000 3192 +interplai 0 3 5.857933 0.000000 3726 +myllymaki 0 3 5.857933 0.000000 4022 +metaphor 0 3 5.857933 0.000000 4038 +rehovot 0 2 6.263398 0.000000 4891 +developmentof 0 2 6.263398 0.000000 6041 +managementsystem 0 2 6.263398 0.000000 4365 +researchinvolv 0 2 6.263398 0.000000 5556 +asreal 0 1 6.957497 0.000000 19568 +specialemphasi 0 1 6.957497 0.000000 19569 +systemand 0 1 6.957497 0.000000 19570 +performancestudi 0 1 6.957497 0.000000 19571 +modelingand 0 1 6.957497 0.000000 19572 +implementinga 0 1 6.957497 0.000000 19573 +visualizationtool 0 1 6.957497 0.000000 19574 +sashadri 0 1 6.957497 0.000000 19575 +haberand 0 1 6.957497 0.000000 19576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..369fec63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 0 259 1.386294 0.000000 28 +class 0 199 1.609438 0.000000 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +distribut 1 162 1.791759 1.791759 51 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +recent 0 167 1.791759 0.000000 58 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +studi 0 120 2.079442 0.000000 91 +number 0 130 2.079442 0.000000 97 +report 0 131 2.079442 0.000000 92 +mathemat 0 108 2.197225 0.000000 123 +part 0 98 2.302585 0.000000 129 +real 0 93 2.397895 0.000000 144 +octob 0 89 2.397895 0.000000 156 +journal 1 83 2.484907 2.484907 183 +second 0 81 2.484907 0.000000 166 +method 1 80 2.564949 2.564949 213 +appear 0 78 2.564949 0.000000 210 +summari 0 73 2.639057 0.000000 237 +order 0 69 2.708050 0.000000 249 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +major 0 56 2.890372 0.000000 315 +sever 0 56 2.890372 0.000000 322 +sampl 0 53 2.944439 0.000000 339 +numer 1 49 3.044522 3.044522 369 +telephon 0 50 3.044522 0.000000 373 +without 0 50 3.044522 0.000000 370 +effect 1 46 3.091042 3.091042 385 +york 0 41 3.218876 0.000000 435 +small 0 39 3.258097 0.000000 447 +error 0 40 3.258097 0.000000 449 +hand 0 37 3.332205 0.000000 475 +procedur 0 36 3.367296 0.000000 488 +concept 0 32 3.465736 0.000000 537 +posit 1 31 3.496508 3.496508 552 +valu 1 25 3.737670 3.737670 665 +equat 1 23 3.806662 3.806662 724 +emphasi 0 22 3.850148 0.000000 755 +siam 1 21 3.912023 3.912023 800 +thu 0 21 3.912023 0.000000 773 +definit 1 19 4.007333 4.007333 864 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +attempt 0 17 4.110874 0.000000 917 +estim 0 17 4.110874 0.000000 930 +condit 1 16 4.174387 4.174387 975 +spars 0 16 4.174387 0.000000 989 +discret 1 13 4.382027 4.382027 1165 +boundari 0 7 5.010635 0.000000 1929 +eduto 0 7 5.010635 0.000000 1956 +mix 0 6 5.164786 0.000000 2200 +pub 0 6 5.164786 0.000000 2239 +pivot 0 5 5.347108 0.000000 2426 +symmetr 0 4 5.568345 0.000000 2908 +ellipt 1 3 5.857933 5.857933 3774 +singular 1 3 5.857933 5.857933 3366 +preserv 0 3 5.857933 0.000000 3628 +thesystem 0 3 5.857933 0.000000 3881 +encount 0 3 5.857933 0.000000 3128 +attack 0 3 5.857933 0.000000 3168 +parter 1 2 6.263398 6.263398 4075 +solutionof 0 2 6.263398 0.000000 5056 +linearalgebra 0 2 6.263398 0.000000 4833 +scientificcomput 0 2 6.263398 0.000000 4145 +precondit 1 1 6.957497 6.957497 19577 +seymour 1 1 6.957497 6.957497 19578 +indefinit 1 1 6.957497 6.957497 19579 +classicalit 0 1 6.957497 0.000000 19580 +multigrid 0 1 6.957497 0.000000 19581 +effectivelywhen 0 1 6.957497 0.000000 19582 +bemad 0 1 6.957497 0.000000 19583 +operatori 0 1 6.957497 0.000000 19584 +casedirect 0 1 6.957497 0.000000 19585 +challengingproblem 0 1 6.957497 0.000000 19586 +nowinvolv 0 1 6.957497 0.000000 19587 +specialmultigrid 0 1 6.957497 0.000000 19588 +chebyshev 0 1 6.957497 0.000000 19589 +collact 0 1 6.957497 0.000000 19590 +ellipticparti 0 1 6.957497 0.000000 19591 +journalon 0 1 6.957497 0.000000 19592 +numbersand 0 1 6.957497 0.000000 19593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..8133f706 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +current 0 284 1.098612 0.000000 21 +project 0 340 1.098612 0.000000 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +design 0 213 1.386294 0.000000 25 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +parallel 2 169 1.791759 3.583518 60 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +distribut 0 162 1.791759 0.000000 51 +develop 0 174 1.791759 0.000000 53 +algorithm 0 162 1.791759 0.000000 57 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +report 0 131 2.079442 0.000000 92 +techniqu 1 99 2.302585 2.302585 138 +proceed 1 93 2.397895 2.397895 152 +call 0 91 2.397895 0.000000 153 +octob 0 89 2.397895 0.000000 156 +ieee 0 86 2.484907 0.000000 190 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +server 0 76 2.564949 0.000000 204 +complet 0 77 2.564949 0.000000 208 +summari 0 73 2.639057 0.000000 237 +servic 0 72 2.639057 0.000000 236 +solv 0 73 2.639057 0.000000 234 +effici 0 73 2.639057 0.000000 233 +multimedia 0 68 2.708050 0.000000 258 +august 0 66 2.708050 0.000000 257 +polici 1 64 2.772589 2.772589 279 +creat 0 63 2.772589 0.000000 277 +automat 0 61 2.833213 0.000000 306 +processor 1 54 2.944439 2.944439 335 +sampl 0 53 2.944439 0.000000 339 +telephon 0 50 3.044522 0.000000 373 +california 0 46 3.091042 0.000000 388 +featur 0 46 3.091042 0.000000 386 +join 0 39 3.258097 0.000000 457 +error 0 40 3.258097 0.000000 449 +industri 0 38 3.295837 0.000000 464 +workstat 0 37 3.332205 0.000000 479 +approxim 0 35 3.401197 0.000000 509 +synchron 0 29 3.583519 0.000000 588 +limit 0 29 3.583519 0.000000 585 +repres 1 26 3.688879 3.688879 656 +valu 0 25 3.737670 0.000000 665 +accur 0 25 3.737670 0.000000 680 +togeth 0 23 3.806662 0.000000 714 +emphasi 0 22 3.850148 0.000000 755 +alloc 0 20 3.951244 0.000000 821 +precis 0 15 4.248495 0.000000 1023 +hybrid 0 15 4.248495 0.000000 1057 +sigmetr 0 13 4.382027 0.000000 1173 +mari 1 12 4.465908 4.465908 1266 +workload 0 12 4.465908 0.000000 1210 +characterist 0 12 4.465908 0.000000 1257 +operatingsystem 0 10 4.653960 0.000000 1401 +custom 0 10 4.653960 0.000000 1414 +vernon 1 9 4.753590 4.753590 1556 +angel 0 8 4.875197 0.000000 1779 +character 0 8 4.875197 0.000000 1767 +reus 0 8 4.875197 0.000000 1661 +hash 0 8 4.875197 0.000000 1618 +carei 0 8 4.875197 0.000000 1781 +analyt 0 7 5.010635 0.000000 1913 +prioriti 0 7 5.010635 0.000000 1792 +interpol 0 7 5.010635 0.000000 1823 +chiang 0 7 5.010635 0.000000 1853 +eduto 0 7 5.010635 0.000000 1956 +pub 0 6 5.164786 0.000000 2239 +yield 0 5 5.347108 0.000000 2458 +fair 0 5 5.347108 0.000000 2333 +infocom 0 3 5.857933 0.000000 3283 +paralleland 0 2 6.263398 0.000000 5805 +petri 0 2 6.263398 0.000000 4414 +intuit 0 2 6.263398 0.000000 4921 +performanceanalysi 0 2 6.263398 0.000000 5629 +schedulingpolici 0 2 6.263398 0.000000 5879 +memorymanag 0 2 6.263398 0.000000 4158 +preemption 0 2 6.263398 0.000000 6230 +mansharamani 0 2 6.263398 0.000000 6231 +applicationto 0 1 6.957497 0.000000 19594 +techniquesi 0 1 6.957497 0.000000 19595 +colleaguesinclud 0 1 6.957497 0.000000 19596 +customizedmean 0 1 6.957497 0.000000 19597 +gtpn 0 1 6.957497 0.000000 19598 +systemfeatur 0 1 6.957497 0.000000 19599 +equationsthat 0 1 6.957497 0.000000 19600 +butcan 0 1 6.957497 0.000000 19601 +proposedth 0 1 6.957497 0.000000 19602 +approximationsfor 0 1 6.957497 0.000000 19603 +techniquemai 0 1 6.957497 0.000000 19604 +broader 0 1 6.957497 0.000000 19605 +performanceparallel 0 1 6.957497 0.000000 19606 +dqdb 0 1 6.957497 0.000000 19607 +slot 0 1 6.957497 0.000000 19608 +brewster 0 1 6.957497 0.000000 19609 +pateland 0 1 6.957497 0.000000 19610 +forrun 0 1 6.957497 0.000000 19611 +with 0 1 6.957497 0.000000 19612 +sigmetricsconfer 0 1 6.957497 0.000000 19613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..df0ebc8b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +welcom 0 122 2.079442 0.000000 99 +wang 1 21 3.912023 3.912023 790 +edulast 0 17 4.110874 0.000000 927 +qinqin 1 1 6.957497 6.957497 19614 +pageqw 0 1 6.957497 0.000000 19615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..2cf44852 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +time 0 293 1.098612 0.000000 17 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +data 2 170 1.791759 3.583518 49 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +wisconsin 0 169 1.791759 0.000000 54 +texa 0 160 1.791759 0.000000 64 +austin 0 168 1.791759 0.000000 63 +recent 0 167 1.791759 0.000000 58 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +professor 0 137 1.945910 0.000000 76 +area 0 144 1.945910 0.000000 80 +object 0 138 1.945910 0.000000 79 +databas 2 122 2.079442 4.158884 86 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +teach 1 108 2.197225 2.197225 112 +manag 1 114 2.197225 2.197225 125 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +take 0 97 2.302585 0.000000 134 +imag 1 91 2.397895 2.397895 161 +call 1 91 2.397895 2.397895 153 +associ 0 93 2.397895 0.000000 151 +educ 1 86 2.484907 2.484907 191 +activ 1 84 2.484907 2.484907 182 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +school 0 84 2.484907 0.000000 188 +sourc 1 77 2.564949 2.564949 201 +optim 1 79 2.564949 2.564949 197 +orient 0 80 2.564949 0.000000 205 +logic 1 71 2.639057 2.639057 230 +appli 1 71 2.639057 2.639057 226 +effici 0 73 2.639057 0.000000 233 +involv 0 71 2.639057 0.000000 227 +august 1 66 2.708050 2.708050 257 +integr 1 67 2.708050 2.708050 245 +practic 0 70 2.708050 0.000000 246 +goal 0 66 2.708050 0.000000 250 +prof 1 64 2.772589 2.772589 273 +evalu 1 64 2.772589 2.772589 266 +street 0 63 2.772589 0.000000 293 +result 0 65 2.772589 0.000000 281 +complex 0 64 2.772589 0.000000 269 +import 0 65 2.772589 0.000000 282 +collect 0 65 2.772589 0.000000 268 +content 1 59 2.833213 2.833213 302 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +publish 0 57 2.890372 0.000000 326 +index 0 56 2.890372 0.000000 309 +cover 0 55 2.944439 0.000000 329 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +set 1 50 3.044522 3.044522 361 +visual 1 48 3.044522 3.044522 372 +long 0 43 3.178054 0.000000 413 +term 0 43 3.178054 0.000000 411 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +sciencesunivers 0 37 3.332205 0.000000 486 +formal 0 37 3.332205 0.000000 478 +next 1 34 3.401197 3.401197 517 +tech 0 35 3.401197 0.000000 515 +queri 2 33 3.433987 6.867974 524 +extend 0 32 3.465736 0.000000 539 +independ 0 32 3.465736 0.000000 548 +express 0 32 3.465736 0.000000 540 +given 0 32 3.465736 0.000000 538 +abl 0 30 3.555348 0.000000 566 +focu 0 30 3.555348 0.000000 571 +power 0 30 3.555348 0.000000 573 +specifi 0 30 3.555348 0.000000 568 +rang 0 30 3.555348 0.000000 565 +focus 0 29 3.583519 0.000000 584 +cluster 0 28 3.610918 0.000000 612 +retriev 0 27 3.637586 0.000000 621 +constraint 1 26 3.688879 3.688879 636 +mine 0 26 3.688879 0.000000 654 +hill 1 25 3.737670 3.737670 670 +aspect 0 25 3.737670 0.000000 663 +lab 1 24 3.761200 3.761200 698 +pattern 0 24 3.761200 0.000000 689 +sequenc 1 23 3.806662 3.806662 734 +deal 1 22 3.850148 3.850148 736 +indian 0 22 3.850148 0.000000 769 +identifi 0 22 3.850148 0.000000 760 +toolkit 0 20 3.951244 0.000000 835 +definit 0 19 4.007333 0.000000 864 +stand 0 18 4.060443 0.000000 891 +ramakrishnan 1 16 4.174387 4.174387 972 +advantag 0 16 4.174387 0.000000 987 +upon 0 16 4.174387 0.000000 978 +livni 1 15 4.248495 4.248495 1053 +transit 0 15 4.248495 0.000000 1046 +heterogen 1 14 4.317488 4.317488 1090 +easili 0 14 4.317488 0.000000 1077 +joint 1 13 4.382027 4.382027 1130 +dbm 1 13 4.382027 4.382027 1136 +recurs 0 13 4.382027 0.000000 1127 +employ 1 12 4.465908 4.465908 1291 +raghu 1 12 4.465908 4.465908 1212 +grow 0 12 4.465908 0.000000 1209 +deduct 0 12 4.465908 0.000000 1236 +broad 0 11 4.553877 0.000000 1302 +usaphon 0 9 4.753590 0.000000 1600 +madra 0 8 4.875197 0.000000 1770 +ioannidi 0 8 4.875197 0.000000 1714 +closur 0 8 4.875197 0.000000 1643 +bottom 1 7 5.010635 5.010635 1906 +dataset 0 7 5.010635 0.000000 1914 +seshadri 0 7 5.010635 0.000000 1803 +bell 1 6 5.164786 5.164786 2224 +ongo 0 6 5.164786 0.000000 2215 +praveen 0 6 5.164786 0.000000 1996 +coral 1 5 5.347108 5.347108 2538 +mcgraw 0 5 5.347108 0.000000 2262 +minibas 0 4 5.568345 0.000000 2608 +exploratori 0 4 5.568345 0.000000 3073 +ofinform 0 4 5.568345 0.000000 2707 +successor 0 3 5.857933 0.000000 3576 +sudarshan 0 3 5.857933 0.000000 3885 +murrai 1 2 6.263398 6.263398 5647 +aimedat 0 2 6.263398 0.000000 6117 +srivastava 0 2 6.263398 0.000000 5395 +minibaseand 0 1 6.957497 0.000000 19616 +coralth 0 1 6.957497 0.000000 19617 +undergraduateand 0 1 6.957497 0.000000 19618 +inconjunct 0 1 6.957497 0.000000 19619 +coursesthat 0 1 6.957497 0.000000 19620 +deductiona 0 1 6.957497 0.000000 19621 +diversifi 0 1 6.957497 0.000000 19622 +increasinglyimport 0 1 6.957497 0.000000 19623 +dispers 0 1 6.957497 0.000000 19624 +rodin 0 1 6.957497 0.000000 19625 +severalissu 0 1 6.957497 0.000000 19626 +forsemant 0 1 6.957497 0.000000 19627 +serviceand 0 1 6.957497 0.000000 19628 +networkedclust 0 1 6.957497 0.000000 19629 +explorationfrom 0 1 6.957497 0.000000 19630 +assequ 0 1 6.957497 0.000000 19631 +seqsystem 0 1 6.957497 0.000000 19632 +optimizationissu 0 1 6.957497 0.000000 19633 +identifyingtrend 0 1 6.957497 0.000000 19634 +fromlarg 0 1 6.957497 0.000000 19635 +implementingan 0 1 6.957497 0.000000 19636 +customizea 0 1 6.957497 0.000000 19637 +specializedinform 0 1 6.957497 0.000000 19638 +indexedand 0 1 6.957497 0.000000 19639 +andmin 0 1 6.957497 0.000000 19640 +birchfor 0 1 6.957497 0.000000 19641 +devisea 0 1 6.957497 0.000000 19642 +databasequeri 0 1 6.957497 0.000000 19643 +featuressuch 0 1 6.957497 0.000000 19644 +ofarithmet 0 1 6.957497 0.000000 19645 +morecompactli 0 1 6.957497 0.000000 19646 +coraldeduct 0 1 6.957497 0.000000 19647 +fixpointevalu 0 1 6.957497 0.000000 19648 +efficientacross 0 1 6.957497 0.000000 19649 +sudarsha 0 1 6.957497 0.000000 19650 +divesh 0 1 6.957497 0.000000 19651 +managementfirst 0 1 6.957497 0.000000 19652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..9986a91a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +wisc 0 242 1.386294 0.000000 33 +fall 1 181 1.609438 1.609438 40 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +wisconsin 0 169 1.791759 0.000000 54 +read 0 154 1.791759 0.000000 47 +year 0 148 1.945910 0.000000 84 +area 0 144 1.945910 0.000000 80 +like 0 132 1.945910 0.000000 81 +file 0 132 1.945910 0.000000 70 +welcom 0 122 2.079442 0.000000 99 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +pleas 0 113 2.197225 0.000000 114 +check 0 115 2.197225 0.000000 118 +person 0 111 2.197225 0.000000 117 +well 0 109 2.197225 0.000000 121 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +take 0 97 2.302585 0.000000 134 +call 0 91 2.397895 0.000000 153 +internet 0 83 2.484907 0.000000 186 +stuff 0 87 2.484907 0.000000 171 +learn 0 86 2.484907 0.000000 170 +know 1 80 2.564949 2.564949 198 +master 0 76 2.564949 0.000000 216 +resum 0 79 2.564949 0.000000 217 +want 0 79 2.564949 0.000000 199 +state 0 76 2.564949 0.000000 207 +good 0 77 2.564949 0.000000 200 +come 0 78 2.564949 0.000000 202 +degre 0 69 2.708050 0.000000 259 +collect 0 65 2.772589 0.000000 268 +plai 0 60 2.833213 0.000000 307 +semest 1 58 2.890372 2.890372 312 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +much 0 52 2.995732 0.000000 349 +california 1 46 3.091042 3.091042 388 +get 0 46 3.091042 0.000000 380 +could 0 46 3.091042 0.000000 383 +natur 0 44 3.135494 0.000000 406 +long 1 43 3.178054 3.178054 413 +show 1 43 3.178054 3.178054 417 +third 0 43 3.178054 0.000000 412 +music 0 42 3.218876 0.000000 436 +littl 1 39 3.258097 3.258097 454 +small 0 39 3.258097 0.000000 447 +live 0 40 3.258097 0.000000 451 +movi 0 40 3.258097 0.000000 459 +form 0 39 3.258097 0.000000 443 +winter 0 36 3.367296 0.000000 500 +go 0 33 3.433987 0.000000 529 +india 1 32 3.465736 3.465736 550 +kind 1 32 3.465736 3.465736 541 +travel 0 30 3.555348 0.000000 579 +hope 0 28 3.610918 0.000000 610 +great 1 27 3.637586 3.637586 626 +though 0 27 3.637586 0.000000 622 +enjoi 0 26 3.688879 0.000000 660 +sport 0 25 3.737670 0.000000 683 +concern 0 25 3.737670 0.000000 666 +magazin 0 24 3.761200 0.000000 704 +wish 0 24 3.761200 0.000000 692 +famili 0 23 3.806662 0.000000 735 +almost 0 22 3.850148 0.000000 742 +try 0 22 3.850148 0.000000 764 +love 1 21 3.912023 3.912023 804 +born 0 21 3.912023 0.000000 798 +watch 0 21 3.912023 0.000000 789 +nice 0 20 3.951244 0.000000 809 +tenni 0 20 3.951244 0.000000 838 +citi 0 19 4.007333 0.000000 874 +offici 1 18 4.060443 4.060443 894 +figur 0 18 4.060443 0.000000 903 +bachelor 0 17 4.110874 0.000000 957 +normal 0 16 4.174387 0.000000 995 +jose 0 16 4.174387 0.000000 976 +enough 0 15 4.248495 0.000000 1040 +rate 0 15 4.248495 0.000000 1037 +anywai 0 15 4.248495 0.000000 1047 +novel 0 15 4.248495 0.000000 1039 +came 1 13 4.382027 4.382027 1197 +cannot 0 13 4.382027 0.000000 1144 +sai 0 13 4.382027 0.000000 1175 +philosophi 0 13 4.382027 0.000000 1167 +stai 0 12 4.465908 0.000000 1215 +employ 0 12 4.465908 0.000000 1291 +walk 0 12 4.465908 0.000000 1281 +rest 0 12 4.465908 0.000000 1259 +surf 0 11 4.553877 0.000000 1301 +town 0 10 4.653960 0.000000 1458 +guess 0 10 4.653960 0.000000 1443 +sister 0 9 4.753590 0.000000 1524 +prefer 0 9 4.753590 0.000000 1491 +swim 0 9 4.753590 0.000000 1599 +kanpur 1 8 4.875197 4.875197 1744 +star 0 8 4.875197 0.000000 1717 +bridg 0 8 4.875197 0.000000 1764 +job 0 8 4.875197 0.000000 1702 +fortun 0 7 5.010635 0.000000 1872 +monei 0 7 5.010635 0.000000 1934 +cricket 0 7 5.010635 0.000000 1945 +slightli 0 7 5.010635 0.000000 1795 +parent 0 6 5.164786 0.000000 2204 +whatev 0 6 5.164786 0.000000 2097 +hike 0 6 5.164786 0.000000 2234 +televis 0 6 5.164786 0.000000 2118 +almaden 0 5 5.347108 0.000000 2511 +cyber 0 4 5.568345 0.000000 2909 +gone 0 4 5.568345 0.000000 3072 +compris 0 4 5.568345 0.000000 2862 +shouldn 0 4 5.568345 0.000000 2606 +suppos 0 4 5.568345 0.000000 3002 +skate 0 4 5.568345 0.000000 3046 +rahul 1 3 5.857933 5.857933 3464 +indianinstitut 0 3 5.857933 0.000000 4003 +romanc 0 3 5.857933 0.000000 3632 +trek 0 3 5.857933 0.000000 4025 +win 0 3 5.857933 0.000000 3593 +comedi 1 2 6.263398 6.263398 5822 +surfer 0 2 6.263398 0.000000 4982 +centr 0 2 6.263398 0.000000 4222 +northern 0 2 6.263398 0.000000 5861 +lover 0 2 6.263398 0.000000 6192 +paid 0 2 6.263398 0.000000 6081 +livabl 1 1 6.957497 6.957497 19653 +kapoorhello 0 1 6.957497 0.000000 19654 +schedulemydepartmentmyuniversityiitkanpuriitkclass 0 1 6.957497 0.000000 19655 +relatedlink 0 1 6.957497 0.000000 19656 +menow 0 1 6.957497 0.000000 19657 +andrais 0 1 6.957497 0.000000 19658 +elder 0 1 6.957497 0.000000 19659 +moneymagazin 0 1 6.957497 0.000000 19660 +editormust 0 1 6.957497 0.000000 19661 +greenland 0 1 6.957497 0.000000 19662 +complain 0 1 6.957497 0.000000 19663 +isawesom 0 1 6.957497 0.000000 19664 +regret 0 1 6.957497 0.000000 19665 +genr 0 1 6.957497 0.000000 19666 +gymnast 0 1 6.957497 0.000000 19667 +cloudi 0 1 6.957497 0.000000 19668 +breezi 0 1 6.957497 0.000000 19669 +youget 0 1 6.957497 0.000000 19670 +musicstuffmovi 0 1 6.957497 0.000000 19671 +televisioninternettravelotherbookmark 0 1 6.957497 0.000000 19672 +meget 0 1 6.957497 0.000000 19673 +guestbookrahul 0 1 6.957497 0.000000 19674 +eduh 0 1 6.957497 0.000000 19675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..4eb8967f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +work 0 380 0.693147 0.000000 9 +current 1 284 1.098612 1.098612 21 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +graduat 0 215 1.386294 0.000000 31 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +distribut 0 162 1.791759 0.000000 51 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +architectur 0 139 1.945910 0.000000 77 +perform 0 143 1.945910 0.000000 74 +model 0 145 1.945910 0.000000 69 +dayton 0 119 2.079442 0.000000 104 +postscript 0 131 2.079442 0.000000 90 +mathemat 0 108 2.197225 0.000000 123 +person 0 111 2.197225 0.000000 117 +specif 0 106 2.197225 0.000000 106 +part 0 98 2.302585 0.000000 129 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +thing 0 84 2.484907 0.000000 189 +integr 0 67 2.708050 0.000000 245 +street 0 63 2.772589 0.000000 293 +evalu 0 64 2.772589 0.000000 266 +major 0 56 2.890372 0.000000 315 +telephon 0 50 3.044522 0.000000 373 +music 0 42 3.218876 0.000000 436 +error 0 40 3.258097 0.000000 449 +vita 0 38 3.295837 0.000000 473 +committe 0 34 3.401197 0.000000 522 +curriculum 0 33 3.433987 0.000000 535 +team 0 27 3.637586 0.000000 625 +bookmark 0 26 3.688879 0.000000 639 +subject 0 26 3.688879 0.000000 647 +livni 0 15 4.248495 0.000000 1053 +minor 0 12 4.465908 0.000000 1237 +pascal 0 12 4.465908 0.000000 1213 +earth 0 10 4.653960 0.000000 1463 +chao 0 8 4.875197 0.000000 1753 +judg 0 8 4.875197 0.000000 1644 +uncertainti 0 7 5.010635 0.000000 1882 +truth 0 6 5.164786 0.000000 2179 +ohio 0 5 5.347108 0.000000 2447 +condor 0 5 5.347108 0.000000 2577 +raman 1 4 5.568345 5.568345 2827 +rajesh 1 3 5.857933 5.857933 3511 +off 0 3 5.857933 0.000000 3170 +wesleyan 0 3 5.857933 0.000000 3988 +saluja 0 3 5.857933 0.000000 3104 +novelti 0 2 6.263398 0.000000 5765 +monster 0 2 6.263398 0.000000 6207 +prodigi 0 2 6.263398 0.000000 5670 +old 0 1 6.957497 0.000000 19676 +homm 0 1 6.957497 0.000000 19677 +winsonsin 0 1 6.957497 0.000000 19678 +chimera 0 1 6.957497 0.000000 19679 +contradict 0 1 6.957497 0.000000 19680 +feebleworm 0 1 6.957497 0.000000 19681 +depositari 0 1 6.957497 0.000000 19682 +cloaca 0 1 6.957497 0.000000 19683 +theglori 0 1 6.957497 0.000000 19684 +shame 0 1 6.957497 0.000000 19685 +blais 0 1 6.957497 0.000000 19686 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..04f16196 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +wisconsin 1 169 1.791759 1.791759 54 +parallel 0 169 1.791759 0.000000 60 +avail 0 169 1.791759 0.000000 48 +address 0 170 1.791759 0.000000 62 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +databas 1 122 2.079442 2.079442 86 +confer 0 126 2.079442 0.000000 100 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +intern 0 108 2.197225 0.000000 128 +person 0 111 2.197225 0.000000 117 +comment 0 93 2.397895 0.000000 146 +larg 0 82 2.484907 0.000000 168 +journal 0 83 2.484907 0.000000 183 +david 0 71 2.639057 0.000000 232 +street 0 63 2.772589 0.000000 293 +improv 0 62 2.772589 0.000000 289 +think 0 57 2.890372 0.000000 314 +electron 0 47 3.091042 0.000000 379 +might 0 41 3.218876 0.000000 426 +eduoffic 0 33 3.433987 0.000000 531 +altern 0 26 3.688879 0.000000 641 +scalabl 0 24 3.761200 0.000000 705 +hierarchi 0 22 3.850148 0.000000 744 +tell 0 21 3.912023 0.000000 777 +mostli 0 19 4.007333 0.000000 869 +estim 0 17 4.110874 0.000000 930 +dewitt 0 12 4.465908 0.000000 1270 +naughton 1 10 4.653960 4.653960 1450 +jeffrei 1 9 4.753590 4.753590 1612 +wall 0 9 4.753590 0.000000 1553 +paradis 0 8 4.875197 0.000000 1782 +presenc 0 8 4.875197 0.000000 1671 +bombai 0 7 5.010635 0.000000 1972 +aggreg 0 6 5.164786 0.000000 2219 +prasad 0 6 5.164786 0.000000 2126 +deshpand 0 5 5.347108 0.000000 2431 +multidimension 0 4 5.568345 0.000000 3091 +amit 0 4 5.568345 0.000000 2972 +ramasami 0 4 5.568345 0.000000 3088 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +mumbai 0 3 5.857933 0.000000 4029 +karthik 0 1 6.957497 0.000000 19687 +pagekarthikeyan 0 1 6.957497 0.000000 19688 +ramasamyabouti 0 1 6.957497 0.000000 19689 +projectshack 0 1 6.957497 0.000000 19690 +connectivityparadis 0 1 6.957497 0.000000 19691 +pthread 0 1 6.957497 0.000000 19692 +wrapperspublicationsstorag 0 1 6.957497 0.000000 19693 +presentationsweb 0 1 6.957497 0.000000 19694 +picturearchitectur 0 1 6.957497 0.000000 19695 +serversphoto 0 1 6.957497 0.000000 19696 +albumencount 0 1 6.957497 0.000000 19697 +leafperson 0 1 6.957497 0.000000 19698 +inforesum 0 1 6.957497 0.000000 19699 +financemonei 0 1 6.957497 0.000000 19700 +interestshack 0 1 6.957497 0.000000 19701 +photographycontact 0 1 6.957497 0.000000 19702 +informationstreet 0 1 6.957497 0.000000 19703 +addresskarthik 0 1 6.957497 0.000000 19704 +suggestionspleas 0 1 6.957497 0.000000 19705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..12194197 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +student 0 343 1.098612 0.000000 19 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +phone 0 175 1.791759 0.000000 45 +hour 0 165 1.791759 0.000000 46 +construct 0 139 1.945910 0.000000 82 +place 0 106 2.197225 0.000000 124 +visit 0 63 2.772589 0.000000 288 +space 0 57 2.890372 0.000000 310 +edulast 0 17 4.110874 0.000000 927 +stai 0 12 4.465908 0.000000 1215 +tune 0 12 4.465908 0.000000 1227 +login 0 9 4.753590 0.000000 1550 +kelli 1 4 5.568345 5.568345 2793 +ratliffoffic 0 1 6.957497 0.000000 19706 +genealog 0 1 6.957497 0.000000 19707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..34f63c89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 0 384 0.693147 0.000000 11 +wisc 0 242 1.386294 0.000000 33 +public 0 202 1.609438 0.000000 43 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +avail 1 169 1.791759 1.791759 48 +base 0 165 1.791759 0.000000 50 +area 0 144 1.945910 0.000000 80 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +assist 0 112 2.197225 0.000000 113 +imag 1 91 2.397895 2.397895 161 +control 0 82 2.484907 0.000000 164 +thing 0 84 2.484907 0.000000 189 +west 0 83 2.484907 0.000000 192 +main 0 67 2.708050 0.000000 256 +street 0 63 2.772589 0.000000 293 +digit 0 52 2.995732 0.000000 348 +advisor 0 51 2.995732 0.000000 355 +video 0 44 3.135494 0.000000 405 +product 0 33 3.433987 0.000000 527 +compress 1 23 3.806662 3.806662 719 +qualiti 0 20 3.951244 0.000000 832 +vector 0 16 4.174387 0.000000 961 +livni 0 15 4.248495 0.000000 1053 +miron 0 14 4.317488 0.000000 1110 +mode 0 9 4.753590 0.000000 1492 +invok 1 6 5.164786 5.164786 2079 +reveal 0 4 5.568345 0.000000 2647 +fractal 0 3 5.857933 0.000000 3475 +quantiz 0 2 6.263398 0.000000 5692 +ratnakar 1 1 6.957497 6.957497 19708 +viresh 1 1 6.957497 6.957497 19709 +lossi 0 1 6.957497 0.000000 19710 +qclicauthor 0 1 6.957497 0.000000 19711 +qclic 0 1 6.957497 0.000000 19712 +qclicbrows 0 1 6.957497 0.000000 19713 +rever 0 1 6.957497 0.000000 19714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..10539397 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +offic 1 299 1.098612 1.098612 13 +current 0 284 1.098612 0.000000 21 +cours 0 273 1.098612 0.000000 15 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +distribut 0 162 1.791759 0.000000 51 +architectur 0 139 1.945910 0.000000 77 +number 1 130 2.079442 2.079442 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +introduct 0 126 2.079442 0.000000 87 +person 0 111 2.197225 0.000000 117 +check 0 115 2.197225 0.000000 118 +advanc 0 99 2.302585 0.000000 130 +need 0 98 2.302585 0.000000 135 +mani 0 92 2.397895 0.000000 150 +graphic 0 90 2.397895 0.000000 147 +west 0 83 2.484907 0.000000 192 +mondai 1 77 2.564949 2.564949 206 +tuesdai 0 73 2.639057 0.000000 219 +thursdai 0 70 2.708050 0.000000 241 +order 0 69 2.708050 0.000000 249 +wednesdai 1 64 2.772589 2.772589 261 +street 0 63 2.772589 0.000000 293 +best 0 59 2.833213 0.000000 299 +publish 0 57 2.890372 0.000000 326 +profession 0 51 2.995732 0.000000 345 +without 0 50 3.044522 0.000000 370 +math 0 44 3.135494 0.000000 402 +fridai 0 44 3.135494 0.000000 390 +richard 1 31 3.496508 3.496508 559 +load 0 28 3.610918 0.000000 601 +fellow 0 24 3.761200 0.000000 701 +emphasi 0 22 3.850148 0.000000 755 +thur 0 19 4.007333 0.000000 847 +ever 0 19 4.007333 0.000000 872 +whole 0 17 4.110874 0.000000 940 +brother 1 13 4.382027 4.382027 1189 +sundai 0 10 4.653960 0.000000 1387 +desktop 0 10 4.653960 0.000000 1445 +prevent 0 7 5.010635 0.000000 1827 +saturdai 0 7 5.010635 0.000000 1794 +shade 0 7 5.010635 0.000000 1881 +polit 0 6 5.164786 0.000000 2115 +artist 0 6 5.164786 0.000000 2127 +seriou 0 5 5.347108 0.000000 2252 +haven 1 4 5.568345 5.568345 3037 +underwat 0 4 5.568345 0.000000 2838 +fire 0 4 5.568345 0.000000 3001 +beard 0 2 6.263398 0.000000 6161 +grave 0 2 6.263398 0.000000 5968 +doom 0 2 6.263398 0.000000 5848 +goofi 0 2 6.263398 0.000000 4074 +omin 1 1 6.957497 6.957497 19715 +monasteriu 1 1 6.957497 6.957497 19716 +doominu 1 1 6.957497 6.957497 19717 +rcarl 0 1 6.957497 0.000000 19718 +subsurfac 0 1 6.957497 0.000000 19719 +depositori 0 1 6.957497 0.000000 19720 +dig 0 1 6.957497 0.000000 19721 +solitari 0 1 6.957497 0.000000 19722 +innebri 0 1 6.957497 0.000000 19723 +vampir 0 1 6.957497 0.000000 19724 +nostalg 0 1 6.957497 0.000000 19725 +funki 0 1 6.957497 0.000000 19726 +monk 0 1 6.957497 0.000000 19727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..3b476994 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,559 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 4 374 0.693147 2.772588 7 +depart 2 457 0.693147 1.386294 12 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +student 0 343 1.098612 0.000000 19 +languag 3 227 1.386294 4.158882 26 +softwar 3 220 1.386294 4.158882 30 +also 2 259 1.386294 2.772588 28 +gener 2 220 1.386294 2.772588 27 +cornel 1 215 1.386294 1.386294 23 +washington 1 236 1.386294 1.386294 32 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +paper 2 205 1.609438 3.218876 38 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +includ 0 208 1.609438 0.000000 42 +class 0 199 1.609438 0.000000 37 +wisconsin 3 169 1.791759 5.375277 54 +madison 2 165 1.791759 3.583518 55 +algorithm 2 162 1.791759 3.583518 57 +base 2 165 1.791759 3.583518 50 +develop 2 174 1.791759 3.583518 53 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +recent 0 167 1.791759 0.000000 58 +data 0 170 1.791759 0.000000 49 +note 2 142 1.945910 3.891820 67 +problem 2 147 1.945910 3.891820 75 +click 2 142 1.945910 3.891820 78 +lectur 2 135 1.945910 3.891820 73 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +professor 1 137 1.945910 1.945910 76 +object 0 138 1.945910 0.000000 79 +architectur 0 139 1.945910 0.000000 77 +first 0 140 1.945910 0.000000 71 +analysi 3 124 2.079442 6.238326 98 +confer 2 126 2.079442 4.158884 100 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +report 1 131 2.079442 2.079442 92 +dayton 0 119 2.079442 0.000000 104 +provid 0 121 2.079442 0.000000 94 +spring 0 131 2.079442 0.000000 88 +number 0 130 2.079442 0.000000 97 +version 2 113 2.197225 4.394450 122 +intern 2 108 2.197225 4.394450 128 +world 1 115 2.197225 2.197225 126 +theori 1 111 2.197225 2.197225 127 +specif 0 106 2.197225 0.000000 106 +make 0 111 2.197225 0.000000 120 +find 0 111 2.197225 0.000000 111 +manag 0 114 2.197225 0.000000 125 +check 0 115 2.197225 0.000000 118 +site 0 106 2.197225 0.000000 119 +access 2 102 2.302585 4.605170 136 +book 1 99 2.302585 2.302585 131 +text 1 98 2.302585 2.302585 133 +proceed 3 93 2.397895 7.193685 152 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +call 0 91 2.397895 0.000000 153 +present 0 91 2.397895 0.000000 145 +environ 2 84 2.484907 4.969814 177 +ieee 2 86 2.484907 4.969814 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +second 1 81 2.484907 2.484907 166 +larg 1 82 2.484907 2.484907 168 +solut 1 82 2.484907 2.484907 162 +west 0 83 2.484907 0.000000 192 +member 0 84 2.484907 0.000000 165 +academ 0 82 2.484907 0.000000 178 +chang 0 82 2.484907 0.000000 163 +control 0 82 2.484907 0.000000 164 +institut 0 84 2.484907 0.000000 187 +june 2 79 2.564949 5.129898 214 +april 2 77 2.564949 5.129898 196 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +refer 1 78 2.564949 2.564949 203 +dynam 1 76 2.564949 2.564949 194 +method 0 80 2.564949 0.000000 213 +complet 0 77 2.564949 0.000000 208 +optim 0 79 2.564949 0.000000 197 +symposium 2 72 2.639057 5.278114 238 +solv 1 73 2.639057 2.639057 234 +logic 1 71 2.639057 2.639057 230 +david 1 71 2.639057 2.639057 232 +workshop 1 71 2.639057 2.639057 239 +summari 0 73 2.639057 0.000000 237 +line 0 75 2.639057 0.000000 231 +effici 0 73 2.639057 0.000000 233 +nation 0 74 2.639057 0.000000 240 +integr 2 67 2.708050 5.416100 245 +august 1 66 2.708050 2.708050 257 +test 0 66 2.708050 0.000000 252 +order 0 69 2.708050 0.000000 249 +practic 0 70 2.708050 0.000000 246 +differ 0 66 2.708050 0.000000 253 +januari 2 62 2.772589 5.545178 264 +evalu 2 64 2.772589 5.545178 266 +foundat 1 62 2.772589 2.772589 286 +septemb 1 65 2.772589 2.772589 274 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +creat 0 63 2.772589 0.000000 277 +previou 0 62 2.772589 0.000000 290 +abstract 0 62 2.772589 0.000000 276 +juli 2 60 2.833213 5.666426 305 +march 1 61 2.833213 2.833213 295 +content 0 59 2.833213 0.000000 302 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +direct 1 57 2.890372 2.890372 316 +special 1 56 2.890372 2.890372 320 +index 1 56 2.890372 2.890372 309 +explor 0 58 2.890372 0.000000 324 +overview 0 56 2.890372 0.000000 323 +space 0 57 2.890372 0.000000 310 +scientif 1 53 2.944439 2.944439 341 +allow 0 53 2.944439 0.000000 333 +particular 1 51 2.995732 2.995732 352 +maintain 0 51 2.995732 0.000000 342 +principl 2 48 3.044522 6.089044 357 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +telephon 0 50 3.044522 0.000000 373 +set 0 50 3.044522 0.000000 361 +california 1 46 3.091042 3.091042 388 +understand 0 47 3.091042 0.000000 384 +algebra 1 45 3.135494 3.135494 394 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +third 1 43 3.178054 3.178054 412 +show 0 43 3.178054 0.000000 417 +york 3 41 3.218876 9.656628 435 +editor 2 41 3.218876 6.437752 433 +combin 1 42 3.218876 3.218876 421 +edit 0 42 3.218876 0.000000 418 +press 0 42 3.218876 0.000000 419 +compani 0 41 3.218876 0.000000 423 +transact 2 39 3.258097 6.516194 438 +societi 1 40 3.258097 3.258097 456 +submit 1 39 3.258097 3.258097 440 +theoret 1 39 3.258097 3.258097 446 +tutori 1 39 3.258097 3.258097 437 +small 0 39 3.258097 0.000000 447 +probabl 0 40 3.258097 0.000000 455 +seminar 1 38 3.295837 3.295837 470 +streetmadison 0 38 3.295837 0.000000 474 +vita 0 38 3.295837 0.000000 473 +origin 0 38 3.295837 0.000000 472 +correct 0 38 3.295837 0.000000 462 +prototyp 0 38 3.295837 0.000000 463 +connect 0 37 3.332205 0.000000 485 +mean 0 37 3.332205 0.000000 477 +china 0 37 3.332205 0.000000 487 +procedur 1 36 3.367296 3.367296 488 +tree 1 36 3.367296 3.367296 492 +multi 1 36 3.367296 3.367296 493 +manual 1 35 3.401197 3.401197 504 +post 1 35 3.401197 3.401197 505 +tech 1 35 3.401197 3.401197 515 +represent 1 35 3.401197 3.401197 512 +either 0 35 3.401197 0.000000 506 +bibliographi 0 34 3.401197 0.000000 518 +curriculum 0 33 3.433987 0.000000 535 +obtain 0 33 3.433987 0.000000 534 +dissert 1 32 3.465736 3.465736 549 +transform 1 32 3.465736 3.465736 542 +kind 0 32 3.465736 0.000000 541 +chapter 0 32 3.465736 0.000000 536 +graph 2 30 3.555348 7.110696 576 +power 0 30 3.555348 0.000000 573 +robert 0 30 3.555348 0.000000 567 +semant 2 29 3.583519 7.167038 587 +depend 2 29 3.583519 7.167038 583 +chines 0 29 3.583519 0.000000 595 +releas 1 28 3.610918 3.610918 616 +univ 1 28 3.610918 3.610918 617 +manipul 1 27 3.637586 3.637586 624 +static 0 27 3.637586 0.000000 619 +consist 0 26 3.688879 0.000000 651 +subject 0 26 3.688879 0.000000 647 +repres 0 26 3.688879 0.000000 656 +bound 0 26 3.688879 0.000000 659 +notic 1 25 3.737670 3.737670 675 +hill 1 25 3.737670 3.737670 670 +valu 0 25 3.737670 0.000000 665 +fundament 0 25 3.737670 0.000000 661 +doctor 1 24 3.761200 3.761200 709 +handl 1 24 3.761200 3.761200 685 +departmentunivers 0 24 3.761200 0.000000 711 +demonstr 0 24 3.761200 0.000000 694 +methodolog 1 23 3.806662 3.806662 733 +variabl 1 23 3.806662 3.806662 715 +miscellan 0 23 3.806662 0.000000 731 +proof 0 23 3.806662 0.000000 720 +springer 2 22 3.850148 7.700296 750 +verlag 2 22 3.850148 7.700296 751 +properti 1 22 3.850148 3.850148 749 +serv 0 22 3.850148 0.000000 758 +sequenti 0 22 3.850148 0.000000 745 +identifi 0 22 3.850148 0.000000 760 +path 1 21 3.912023 3.912023 778 +latest 1 21 3.912023 3.912023 785 +corpor 0 21 3.912023 0.000000 802 +programminglanguag 0 21 3.912023 0.000000 782 +theorem 0 21 3.912023 0.000000 786 +basi 0 20 3.951244 0.000000 828 +citi 1 19 4.007333 4.007333 874 +beij 0 19 4.007333 0.000000 876 +comparison 0 19 4.007333 0.000000 863 +boston 0 19 4.007333 0.000000 862 +north 0 19 4.007333 0.000000 873 +record 2 18 4.060443 8.120886 890 +thoma 1 18 4.060443 4.060443 901 +element 1 18 4.060443 4.060443 895 +partial 0 18 4.060443 0.000000 900 +speed 0 18 4.060443 0.000000 911 +germani 1 17 4.110874 4.110874 946 +modif 1 17 4.110874 4.110874 913 +debug 0 17 4.110874 0.000000 944 +fourth 1 16 4.174387 4.174387 999 +diego 1 16 4.174387 4.174387 992 +cambridg 0 16 4.174387 0.000000 1008 +letter 0 16 4.174387 0.000000 981 +ramakrishnan 0 16 4.174387 0.000000 972 +taiwan 0 16 4.174387 0.000000 1006 +precis 1 15 4.248495 4.248495 1023 +transit 0 15 4.248495 0.000000 1046 +princeton 0 15 4.248495 0.000000 1042 +configur 0 15 4.248495 0.000000 1012 +remot 0 15 4.248495 0.000000 1041 +reprint 2 14 4.317488 8.634976 1097 +attribut 2 14 4.317488 8.634976 1092 +demand 1 14 4.317488 4.317488 1073 +francisco 1 14 4.317488 4.317488 1095 +polynomi 0 14 4.317488 0.000000 1069 +sigplan 2 13 4.382027 8.764054 1190 +directli 0 13 4.382027 0.000000 1141 +carri 0 13 4.382027 0.000000 1152 +context 0 13 4.382027 0.000000 1153 +shape 2 12 4.465908 8.931816 1245 +pascal 0 12 4.465908 0.000000 1213 +nanci 0 12 4.465908 0.000000 1256 +franc 0 12 4.465908 0.000000 1276 +onth 0 12 4.465908 0.000000 1218 +scan 0 12 4.465908 0.000000 1243 +instanc 1 11 4.553877 4.553877 1322 +magic 0 11 4.553877 0.000000 1358 +impact 0 11 4.553877 0.000000 1334 +lake 0 11 4.553877 0.000000 1373 +israel 0 11 4.553877 0.000000 1366 +decomposit 0 10 4.653960 0.000000 1439 +underli 0 10 4.653960 0.000000 1410 +invit 0 10 4.653960 0.000000 1428 +respect 0 9 4.753590 0.000000 1545 +mainten 0 9 4.753590 0.000000 1543 +establish 0 9 4.753590 0.000000 1532 +conferenceon 0 9 4.753590 0.000000 1595 +utah 0 9 4.753590 0.000000 1585 +herefor 0 9 4.753590 0.000000 1483 +ball 0 9 4.753590 0.000000 1608 +equival 0 9 4.753590 0.000000 1496 +yang 2 8 4.875197 9.750394 1652 +colloquium 1 8 4.875197 4.875197 1734 +reus 1 8 4.875197 4.875197 1661 +european 1 8 4.875197 4.875197 1763 +secretari 0 8 4.875197 0.000000 1775 +pldi 0 8 4.875197 0.000000 1704 +competit 0 8 4.875197 0.000000 1635 +irvin 0 8 4.875197 0.000000 1660 +illustr 0 8 4.875197 0.000000 1679 +analys 0 8 4.875197 0.000000 1666 +merg 2 7 5.010635 10.021270 1862 +portland 1 7 5.010635 5.010635 1878 +fifth 1 7 5.010635 5.010635 1931 +iowa 1 7 5.010635 5.010635 1971 +bottom 0 7 5.010635 0.000000 1906 +prioriti 0 7 5.010635 0.000000 1792 +pittsburgh 0 7 5.010635 0.000000 1938 +digest 0 7 5.010635 0.000000 1864 +increment 2 6 5.164786 10.329572 2206 +teitelbaum 2 6 5.164786 10.329572 2102 +symposiumon 1 6 5.164786 5.164786 2054 +grammar 1 6 5.164786 5.164786 2058 +syntax 1 6 5.164786 5.164786 2030 +affect 0 6 5.164786 0.000000 2044 +textual 0 6 5.164786 0.000000 1979 +kluwer 0 6 5.164786 0.000000 2143 +variant 0 6 5.164786 0.000000 2043 +german 0 6 5.164786 0.000000 2190 +unpublish 0 6 5.164786 0.000000 2226 +carolina 0 6 5.164786 0.000000 2142 +horwitz 3 5 5.347108 16.041324 2411 +synthes 2 5 5.347108 10.694216 2451 +dataflow 2 5 5.347108 10.694216 2390 +licens 1 5 5.347108 5.347108 2520 +twenti 1 5 5.347108 5.347108 2540 +aim 1 5 5.347108 5.347108 2477 +summarymi 0 5 5.347108 0.000000 2580 +cacm 0 5 5.347108 0.000000 2388 +shortest 0 5 5.347108 0.000000 2424 +australia 0 5 5.347108 0.000000 2478 +singapor 0 5 5.347108 0.000000 2487 +mcgraw 0 5 5.347108 0.000000 2262 +bind 0 5 5.347108 0.000000 2250 +orlean 0 5 5.347108 0.000000 2550 +interfer 0 5 5.347108 0.000000 2494 +forprogram 0 5 5.347108 0.000000 2361 +salt 0 5 5.347108 0.000000 2413 +patent 0 5 5.347108 0.000000 2574 +chapel 0 5 5.347108 0.000000 2457 +rep 4 4 5.568345 22.273380 3087 +slice 3 4 5.568345 16.705035 2622 +interprocedur 2 4 5.568345 11.136690 2771 +popl 2 4 5.568345 11.136690 3068 +sigsoft 2 4 5.568345 11.136690 3036 +dagstuhl 2 4 5.568345 11.136690 2871 +compcon 1 4 5.568345 5.568345 2958 +ofprogram 1 4 5.568345 5.568345 2624 +ics 1 4 5.568345 5.568345 2779 +petersburg 1 4 5.568345 5.568345 2989 +jolla 1 4 5.568345 5.568345 2988 +bricker 1 4 5.568345 5.568345 3050 +usa 0 4 5.568345 0.000000 3080 +exhaust 0 4 5.568345 0.000000 2825 +melbourn 0 4 5.568345 0.000000 3035 +turnidg 0 4 5.568345 0.000000 2829 +imper 0 4 5.568345 0.000000 3067 +scotland 0 4 5.568345 0.000000 3049 +topla 2 3 5.857933 11.715866 3563 +reachabl 1 3 5.857933 5.857933 4001 +alamito 1 3 5.857933 5.857933 3558 +categor 1 3 5.857933 5.857933 3765 +schloss 1 3 5.857933 5.857933 3727 +denmark 1 3 5.857933 5.857933 3676 +amast 1 3 5.857933 5.857933 3955 +spaa 1 3 5.857933 5.857933 3906 +propag 1 3 5.857933 5.857933 3997 +accommod 1 3 5.857933 5.857933 3337 +fifteenth 1 3 5.857933 5.857933 3868 +principlesof 1 3 5.857933 5.857933 3145 +twentieth 1 3 5.857933 5.857933 3760 +thedevelop 0 3 5.857933 0.000000 3903 +meaning 0 3 5.857933 0.000000 3458 +nearbi 0 3 5.857933 0.000000 3291 +retarget 0 3 5.857933 0.000000 3994 +fourteenth 0 3 5.857933 0.000000 3615 +domin 0 3 5.857933 0.000000 3995 +preserv 0 3 5.857933 0.000000 3628 +jone 0 3 5.857933 0.000000 3703 +atlanta 0 3 5.857933 0.000000 3778 +onprincipl 0 3 5.857933 0.000000 3701 +ninth 0 3 5.857933 0.000000 3616 +sagiv 2 2 6.263398 12.526796 6176 +acta 2 2 6.263398 12.526796 5124 +differenc 1 2 6.263398 6.263398 6177 +chop 1 2 6.263398 6.263398 6160 +informatica 1 2 6.263398 6.263398 5125 +destruct 1 2 6.263398 6.263398 6232 +copenhagen 1 2 6.263398 6.263398 5145 +alia 1 2 6.263398 6.263398 5383 +charleston 1 2 6.263398 6.263398 6181 +thevari 0 2 6.263398 0.000000 6130 +contigu 0 2 6.263398 0.000000 6001 +worker 0 2 6.263398 0.000000 4841 +andbuild 0 2 6.263398 0.000000 6028 +clickherefor 0 2 6.263398 0.000000 5344 +interproceduraldataflow 0 2 6.263398 0.000000 6178 +unrestrict 0 2 6.263398 0.000000 4879 +arnold 0 2 6.263398 0.000000 4705 +wasserman 0 2 6.263398 0.000000 5331 +aarhu 0 2 6.263398 0.000000 6180 +moss 0 2 6.263398 0.000000 5820 +fritzson 0 2 6.263398 0.000000 4546 +andarchitectur 0 2 6.263398 0.000000 5755 +languagedesign 0 2 6.263398 0.000000 6182 +spain 0 2 6.263398 0.000000 5522 +adequaci 0 2 6.263398 0.000000 6229 +thirteenth 0 2 6.263398 0.000000 5733 +eleventh 0 2 6.263398 0.000000 5031 +eighth 0 2 6.263398 0.000000 5750 +leeuwen 0 2 6.263398 0.000000 5543 +doc 0 2 6.263398 0.000000 5022 +mooli 0 2 6.263398 0.000000 6179 +tung 0 2 6.263398 0.000000 4709 +binklei 2 1 6.957497 13.914994 19728 +ramalingam 2 1 6.957497 13.914994 19729 +prin 1 1 6.957497 6.957497 19730 +idfa 1 1 6.957497 6.957497 19731 +interf 1 1 6.957497 6.957497 19732 +wilhelm 1 1 6.957497 6.957497 19733 +tosem 1 1 6.957497 6.957497 19734 +pfeiffer 1 1 6.957497 6.957497 19735 +demer 1 1 6.957497 6.957497 19736 +fromacm 1 1 6.957497 6.957497 19737 +berzin 1 1 6.957497 6.957497 19738 +sigsoftsymposium 1 1 6.957497 6.957497 19739 +wadern 1 1 6.957497 6.957497 19740 +rosai 1 1 6.957497 6.957497 19741 +fseb 1 1 6.957497 6.957497 19742 +thesiswuu 1 1 6.957497 6.957497 19743 +esop 1 1 6.957497 6.957497 19744 +poplb 1 1 6.957497 6.957497 19745 +pepma 1 1 6.957497 6.957497 19746 +fsea 1 1 6.957497 6.957497 19747 +diku 1 1 6.957497 6.957497 19748 +fase 1 1 6.957497 6.957497 19749 +pepmb 1 1 6.957497 6.957497 19750 +lape 1 1 6.957497 6.957497 19751 +psde 1 1 6.957497 6.957497 19752 +toconst 1 1 6.957497 6.957497 19753 +paradigmsfor 1 1 6.957497 6.957497 19754 +brighton 1 1 6.957497 6.957497 19755 +abramski 1 1 6.957497 6.957497 19756 +maibaum 1 1 6.957497 6.957497 19757 +wherefor 1 1 6.957497 6.957497 19758 +sigoa 1 1 6.957497 6.957497 19759 +pepm 1 1 6.957497 6.957497 19760 +onparti 1 1 6.957497 6.957497 19761 +ibfi 1 1 6.957497 6.957497 19762 +repsprofessorcomput 0 1 6.957497 0.000000 19763 +thehom 0 1 6.957497 0.000000 19764 +createtool 0 1 6.957497 0.000000 19765 +manipulationoper 0 1 6.957497 0.000000 19766 +slicingcan 0 1 6.957497 0.000000 19767 +elementss 0 1 6.957497 0.000000 19768 +thatmight 0 1 6.957497 0.000000 19769 +findsemant 0 1 6.957497 0.000000 19770 +thedecomposit 0 1 6.957497 0.000000 19771 +solvingmani 0 1 6.957497 0.000000 19772 +applicationsin 0 1 6.957497 0.000000 19773 +atimprov 0 1 6.957497 0.000000 19774 +relatedoper 0 1 6.957497 0.000000 19775 +slicer 0 1 6.957497 0.000000 19776 +unexpect 0 1 6.957497 0.000000 19777 +betweeninterprocedur 0 1 6.957497 0.000000 19778 +oninterprocedur 0 1 6.957497 0.000000 19779 +transformingthem 0 1 6.957497 0.000000 19780 +timebi 0 1 6.957497 0.000000 19781 +probleminst 0 1 6.957497 0.000000 19782 +publicationsprogram 0 1 6.957497 0.000000 19783 +slicing_pat 0 1 6.957497 0.000000 19784 +thesismerg 0 1 6.957497 0.000000 19785 +iwscm 0 1 6.957497 0.000000 19786 +popla 0 1 6.957497 0.000000 19787 +iwsvcc 0 1 6.957497 0.000000 19788 +ccpsd 0 1 6.957497 0.000000 19789 +npfo_submiss 0 1 6.957497 0.000000 19790 +ccipl 0 1 6.957497 0.000000 19791 +prog_integration_system 0 1 6.957497 0.000000 19792 +prog_integration_manu 0 1 6.957497 0.000000 19793 +subsetof 0 1 6.957497 0.000000 19794 +clickingher 0 1 6.957497 0.000000 19795 +andexpect 0 1 6.957497 0.000000 19796 +anddifferenc 0 1 6.957497 0.000000 19797 +thesesdavid 0 1 6.957497 0.000000 19798 +thesisphil 0 1 6.957497 0.000000 19799 +thesisinterprocedur 0 1 6.957497 0.000000 19800 +analysisdemand 0 1 6.957497 0.000000 19801 +tcs_ide_pap 0 1 6.957497 0.000000 19802 +ptime 0 1 6.957497 0.000000 19803 +acta_pap 0 1 6.957497 0.000000 19804 +pfeiffer_thesi 0 1 6.957497 0.000000 19805 +jalg_pap 0 1 6.957497 0.000000 19806 +popl_not 0 1 6.957497 0.000000 19807 +publicationsbooksrep 0 1 6.957497 0.000000 19808 +constructinglanguag 0 1 6.957497 0.000000 19809 +publicationssagiv 0 1 6.957497 0.000000 19810 +j_alg 0 1 6.957497 0.000000 19811 +preservingtransform 0 1 6.957497 0.000000 19812 +grammarswith 0 1 6.957497 0.000000 19813 +movement 0 1 6.957497 0.000000 19814 +sublinear 0 1 6.957497 0.000000 19815 +papershorwitz 0 1 6.957497 0.000000 19816 +ganzing 0 1 6.957497 0.000000 19817 +chaptersrep 0 1 6.957497 0.000000 19818 +bohner 0 1 6.957497 0.000000 19819 +fromproceed 0 1 6.957497 0.000000 19820 +ichikawa 0 1 6.957497 0.000000 19821 +tsubotani 0 1 6.957497 0.000000 19822 +barstow 0 1 6.957497 0.000000 19823 +sandewal 0 1 6.957497 0.000000 19824 +shrobe 0 1 6.957497 0.000000 19825 +publicationssiff 0 1 6.957497 0.000000 19826 +danvi 0 1 6.957497 0.000000 19827 +glueck 0 1 6.957497 0.000000 19828 +thiemann 0 1 6.957497 0.000000 19829 +hentenryck 0 1 6.957497 0.000000 19830 +formalapproach 0 1 6.957497 0.000000 19831 +nielsen 0 1 6.957497 0.000000 19832 +schwartzbach 0 1 6.957497 0.000000 19833 +tapsoft 0 1 6.957497 0.000000 19834 +compilerconstruct 0 1 6.957497 0.000000 19835 +edinburgh 0 1 6.957497 0.000000 19836 +reducibleflowgraph 0 1 6.957497 0.000000 19837 +velen 0 1 6.957497 0.000000 19838 +onalgebra 0 1 6.957497 0.000000 19839 +softwareconfigur 0 1 6.957497 0.000000 19840 +issuesin 0 1 6.957497 0.000000 19841 +barcelona 0 1 6.957497 0.000000 19842 +diaz 0 1 6.957497 0.000000 19843 +oreja 0 1 6.957497 0.000000 19844 +versionand 0 1 6.957497 0.000000 19845 +grassau 0 1 6.957497 0.000000 19846 +bericht 0 1 6.957497 0.000000 19847 +winkler 0 1 6.957497 0.000000 19848 +teubner 0 1 6.957497 0.000000 19849 +stuttgart 0 1 6.957497 0.000000 19850 +marceau 0 1 6.957497 0.000000 19851 +engineeringsymposium 0 1 6.957497 0.000000 19852 +alpern 0 1 6.957497 0.000000 19853 +albuquerqu 0 1 6.957497 0.000000 19854 +tosyntax 0 1 6.957497 0.000000 19855 +williamsburg 0 1 6.957497 0.000000 19856 +softwarerep 0 1 6.957497 0.000000 19857 +patentsrep 0 1 6.957497 0.000000 19858 +pend 0 1 6.957497 0.000000 19859 +submissionsrep 0 1 6.957497 0.000000 19860 +reportsrep 0 1 6.957497 0.000000 19861 +mehlhorn 0 1 6.957497 0.000000 19862 +datalogisk 0 1 6.957497 0.000000 19863 +psramalingam 0 1 6.957497 0.000000 19864 +klint 0 1 6.957497 0.000000 19865 +snelt 0 1 6.957497 0.000000 19866 +extendedabstract 0 1 6.957497 0.000000 19867 +reconstitut 0 1 6.957497 0.000000 19868 +studentsvisitor 0 1 6.957497 0.000000 19869 +jiazhen 0 1 6.957497 0.000000 19870 +paig 0 1 6.957497 0.000000 19871 +chiao 0 1 6.957497 0.000000 19872 +studentsramalingam 0 1 6.957497 0.000000 19873 +programintegr 0 1 6.957497 0.000000 19874 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..923a66a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +read 0 154 1.791759 0.000000 47 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +send 0 114 2.197225 0.000000 109 +check 0 115 2.197225 0.000000 118 +peopl 0 96 2.302585 0.000000 132 +access 0 102 2.302585 0.000000 136 +pictur 0 89 2.397895 0.000000 160 +sinc 0 90 2.397895 0.000000 159 +institut 0 84 2.484907 0.000000 187 +window 1 68 2.708050 2.708050 242 +function 1 62 2.772589 2.772589 275 +street 0 63 2.772589 0.000000 293 +copi 0 63 2.772589 0.000000 284 +best 0 59 2.833213 0.000000 299 +suggest 0 53 2.944439 0.000000 331 +date 0 51 2.995732 0.000000 344 +right 1 48 3.044522 3.044522 363 +friend 0 48 3.044522 0.000000 376 +tech 0 35 3.401197 0.000000 515 +india 0 32 3.465736 0.000000 550 +univ 1 28 3.610918 3.610918 617 +comp 0 26 3.688879 0.000000 650 +indian 1 22 3.850148 3.850148 769 +love 1 21 3.912023 3.912023 804 +reserv 0 20 3.951244 0.000000 808 +spend 0 19 4.007333 0.000000 850 +beauti 1 18 4.060443 4.060443 912 +statu 0 18 4.060443 0.000000 885 +speed 0 18 4.060443 0.000000 911 +seem 0 18 4.060443 0.000000 899 +comic 0 14 4.317488 0.000000 1103 +song 0 11 4.553877 0.000000 1380 +calvin 0 9 4.753590 0.000000 1518 +kanpur 0 8 4.875197 0.000000 1744 +film 0 8 4.875197 0.000000 1761 +apart 0 7 5.010635 0.000000 1936 +settimeout 0 5 5.347108 0.000000 2536 +guestbook 0 5 5.347108 0.000000 2475 +randal 0 4 5.568345 0.000000 2776 +mirza 0 3 5.857933 0.000000 3989 +hero 0 3 5.857933 0.000000 3711 +saeed 1 2 6.263398 6.263398 6172 +statusclock 1 1 6.957497 6.957497 19875 +pagespe 0 1 6.957497 0.000000 19876 +clearid 0 1 6.957497 0.000000 19877 +cleartimeout 0 1 6.957497 0.000000 19878 +lucknow 0 1 6.957497 0.000000 19879 +listn 0 1 6.957497 0.000000 19880 +netsurf 0 1 6.957497 0.000000 19881 +wismad 0 1 6.957497 0.000000 19882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..94598e85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +languag 0 227 1.386294 0.000000 26 +oper 0 180 1.609438 0.000000 34 +fall 0 181 1.609438 0.000000 40 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +lectur 0 135 1.945910 0.000000 73 +dayton 0 119 2.079442 0.000000 104 +real 0 93 2.397895 0.000000 144 +chang 0 82 2.484907 0.000000 163 +septemb 0 65 2.772589 0.000000 274 +taught 0 33 3.433987 0.000000 526 +comp 0 26 3.688879 0.000000 650 +edutelephon 0 10 4.653960 0.000000 1473 +desktop 0 10 4.653960 0.000000 1445 +peterson 1 7 5.010635 5.010635 1850 +salli 1 3 5.857933 5.857933 3432 +goodwin 0 1 6.957497 0.000000 19883 +lecturercomput 0 1 6.957497 0.000000 19884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..3f586580 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,130 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +system 0 443 0.693147 0.000000 6 +offic 0 299 1.098612 0.000000 13 +engin 0 297 1.098612 0.000000 20 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +softwar 0 220 1.386294 0.000000 30 +also 0 259 1.386294 0.000000 28 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +base 0 165 1.791759 0.000000 50 +applic 0 170 1.791759 0.000000 56 +process 1 142 1.945910 1.945910 72 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +support 0 132 1.945910 0.000000 83 +databas 2 122 2.079442 4.158884 86 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +postscript 0 131 2.079442 0.000000 90 +seattl 0 120 2.079442 0.000000 103 +introduct 0 126 2.079442 0.000000 87 +intern 1 108 2.197225 2.197225 128 +manag 0 114 2.197225 0.000000 125 +look 0 107 2.197225 0.000000 115 +person 0 111 2.197225 0.000000 117 +present 0 91 2.397895 0.000000 145 +educ 1 86 2.484907 2.484907 191 +larg 1 82 2.484907 2.484907 168 +institut 0 84 2.484907 0.000000 187 +activ 0 84 2.484907 0.000000 182 +help 0 83 2.484907 0.000000 175 +server 1 76 2.564949 2.564949 204 +master 0 76 2.564949 0.000000 216 +sourc 0 77 2.564949 0.000000 201 +onlin 1 75 2.639057 2.639057 223 +logic 0 71 2.639057 0.000000 230 +name 0 72 2.639057 0.000000 220 +nation 0 74 2.639057 0.000000 240 +multimedia 0 68 2.708050 0.000000 258 +dept 0 64 2.772589 0.000000 291 +evalu 0 64 2.772589 0.000000 266 +run 0 51 2.995732 0.000000 347 +date 0 51 2.995732 0.000000 344 +digit 0 52 2.995732 0.000000 348 +pointer 0 48 3.044522 0.000000 368 +archiv 0 49 3.044522 0.000000 364 +protocol 1 45 3.135494 3.135494 407 +york 0 41 3.218876 0.000000 435 +transact 1 39 3.258097 3.258097 438 +slide 0 38 3.295837 0.000000 467 +industri 0 38 3.295837 0.000000 464 +bibliographi 0 34 3.401197 0.000000 518 +queri 0 33 3.433987 0.000000 524 +articl 0 33 3.433987 0.000000 530 +idea 0 32 3.465736 0.000000 545 +storag 0 31 3.496508 0.000000 553 +profil 0 30 3.555348 0.000000 581 +toward 0 25 3.737670 0.000000 668 +jeff 0 25 3.737670 0.000000 673 +reach 0 24 3.761200 0.000000 688 +initi 0 23 3.806662 0.000000 717 +sort 1 22 3.850148 3.850148 738 +indian 0 22 3.850148 0.000000 769 +hierarchi 0 22 3.850148 0.000000 744 +cooper 0 22 3.850148 0.000000 757 +boston 0 19 4.007333 0.000000 862 +sigmod 0 19 4.007333 0.000000 877 +bachelor 0 17 4.110874 0.000000 957 +estim 0 17 4.110874 0.000000 930 +georg 0 16 4.174387 0.000000 994 +princeton 0 15 4.248495 0.000000 1042 +massiv 0 15 4.248495 0.000000 1026 +warn 0 14 4.317488 0.000000 1068 +infrastructur 0 12 4.465908 0.000000 1234 +council 0 11 4.553877 0.000000 1364 +naughton 1 10 4.653960 4.653960 1450 +vldb 1 10 4.653960 4.653960 1470 +consortium 0 10 4.653960 0.000000 1467 +jeffrei 0 9 4.753590 0.000000 1612 +utah 0 9 4.753590 0.000000 1585 +madra 0 8 4.875197 0.000000 1770 +presenc 0 8 4.875197 0.000000 1671 +competit 0 8 4.875197 0.000000 1635 +spec 0 8 4.875197 0.000000 1640 +analyt 0 7 5.010635 0.000000 1913 +bombai 0 7 5.010635 0.000000 1972 +aggreg 0 6 5.164786 0.000000 2219 +prasad 0 6 5.164786 0.000000 2126 +chicago 0 6 5.164786 0.000000 2149 +deshpand 0 5 5.347108 0.000000 2431 +amit 1 4 5.568345 5.568345 2972 +snail 0 4 5.568345 0.000000 2916 +multidimension 0 4 5.568345 0.000000 3091 +ramasami 0 4 5.568345 0.000000 3088 +shukla 0 3 5.857933 0.000000 4030 +karthikeyan 0 3 5.857933 0.000000 4031 +mumbai 0 3 5.857933 0.000000 4029 +pilot 0 3 5.857933 0.000000 4008 +children 0 3 5.857933 0.000000 3767 +asha 0 3 5.857933 0.000000 4037 +marathon 1 2 6.263398 6.263398 5592 +olap 1 2 6.263398 6.263398 6233 +endow 0 2 6.263398 0.000000 6234 +guidanc 0 1 6.957497 0.000000 19885 +trier 0 1 6.957497 0.000000 19886 +mdd 0 1 6.957497 0.000000 19887 +niiip 0 1 6.957497 0.000000 19888 +transcoop 0 1 6.957497 0.000000 19889 +needi 0 1 6.957497 0.000000 19890 +pageand 0 1 6.957497 0.000000 19891 +bookmarksar 0 1 6.957497 0.000000 19892 +garfield 0 1 6.957497 0.000000 19893 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..e3507e19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +contact 0 153 1.791759 0.000000 59 +construct 0 139 1.945910 0.000000 82 +like 0 132 1.945910 0.000000 81 +technolog 0 131 2.079442 0.000000 102 +place 0 106 2.197225 0.000000 124 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +educ 0 86 2.484907 0.000000 191 +come 0 78 2.564949 0.000000 202 +name 0 72 2.639057 0.000000 220 +visit 0 63 2.772589 0.000000 288 +undergradu 0 54 2.944439 0.000000 338 +india 0 32 3.465736 0.000000 550 +altern 0 26 3.688879 0.000000 641 +bookmark 0 26 3.688879 0.000000 639 +worth 0 11 4.553877 0.000000 1294 +bombai 0 7 5.010635 0.000000 1972 +whereabout 0 4 5.568345 0.000000 3078 +indianinstitut 0 3 5.857933 0.000000 4003 +fantast 0 3 5.857933 0.000000 3966 +hadmi 0 2 6.263398 0.000000 6097 +canfing 0 2 6.263398 0.000000 6098 +ashwin 1 1 6.957497 6.957497 19894 +iitb 0 1 6.957497 0.000000 19895 +meto 0 1 6.957497 0.000000 19896 +sashwin 0 1 6.957497 0.000000 19897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..102d1a88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +time 1 293 1.098612 1.098612 17 +engin 0 297 1.098612 0.000000 20 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +updat 0 191 1.609438 0.000000 41 +read 1 154 1.791759 1.791759 47 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +like 2 132 1.945910 3.891820 81 +year 1 148 1.945910 1.945910 84 +architectur 0 139 1.945910 0.000000 77 +technolog 0 131 2.079442 0.000000 102 +spring 0 131 2.079442 0.000000 88 +compil 0 122 2.079442 0.000000 96 +provid 0 121 2.079442 0.000000 94 +place 1 106 2.197225 2.197225 124 +version 0 113 2.197225 0.000000 122 +site 0 106 2.197225 0.000000 119 +make 0 111 2.197225 0.000000 120 +send 0 114 2.197225 0.000000 109 +mani 0 92 2.397895 0.000000 150 +homepag 0 93 2.397895 0.000000 148 +comment 0 93 2.397895 0.000000 146 +school 0 84 2.484907 0.000000 188 +institut 0 84 2.484907 0.000000 187 +contain 0 81 2.484907 0.000000 174 +academ 0 82 2.484907 0.000000 178 +come 1 78 2.564949 2.564949 202 +know 0 80 2.564949 0.000000 198 +solv 0 73 2.639057 0.000000 234 +would 0 67 2.708050 0.000000 251 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +back 1 60 2.833213 2.833213 297 +plai 0 60 2.833213 0.000000 307 +undergradu 0 54 2.944439 0.000000 338 +talk 0 53 2.944439 0.000000 336 +suggest 0 53 2.944439 0.000000 331 +much 1 52 2.995732 2.995732 349 +maintain 0 51 2.995732 0.000000 342 +date 0 51 2.995732 0.000000 344 +friend 1 48 3.044522 3.044522 376 +better 0 45 3.135494 0.000000 401 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +music 0 42 3.218876 0.000000 436 +past 0 42 3.218876 0.000000 428 +must 0 40 3.258097 0.000000 442 +realli 0 40 3.258097 0.000000 444 +author 0 39 3.258097 0.000000 450 +field 1 37 3.332205 3.332205 482 +mean 0 37 3.332205 0.000000 477 +india 0 32 3.465736 0.000000 550 +kind 0 32 3.465736 0.000000 541 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +hard 0 30 3.555348 0.000000 563 +built 0 29 3.583519 0.000000 592 +consid 0 29 3.583519 0.000000 590 +hope 0 28 3.610918 0.000000 610 +mine 1 26 3.688879 3.688879 654 +bookmark 0 26 3.688879 0.000000 639 +enjoi 0 26 3.688879 0.000000 660 +rather 0 26 3.688879 0.000000 642 +sport 0 25 3.737670 0.000000 683 +indian 0 22 3.850148 0.000000 769 +inth 0 22 3.850148 0.000000 741 +watch 1 21 3.912023 3.912023 789 +wonder 0 20 3.951244 0.000000 815 +tenni 0 20 3.951244 0.000000 838 +beauti 0 18 4.060443 0.000000 912 +listen 0 18 4.060443 0.000000 907 +anyth 1 16 4.174387 4.174387 998 +across 0 16 4.174387 0.000000 974 +hobbi 0 16 4.174387 0.000000 1009 +photograph 0 15 4.248495 0.000000 1056 +goe 0 15 4.248495 0.000000 1044 +near 0 14 4.317488 0.000000 1091 +unfortun 0 13 4.382027 0.000000 1170 +scan 0 12 4.465908 0.000000 1243 +reader 0 12 4.465908 0.000000 1246 +awai 1 10 4.653960 4.653960 1447 +town 0 10 4.653960 0.000000 1458 +interestsmi 0 10 4.653960 0.000000 1462 +earth 0 10 4.653960 0.000000 1463 +hint 0 10 4.653960 0.000000 1419 +ball 0 9 4.753590 0.000000 1608 +jeffrei 0 9 4.753590 0.000000 1612 +pick 0 9 4.753590 0.000000 1498 +kanpur 1 8 4.875197 4.875197 1744 +pagei 0 8 4.875197 0.000000 1683 +empir 0 8 4.875197 0.000000 1722 +bridg 0 8 4.875197 0.000000 1764 +cricket 1 7 5.010635 5.010635 1945 +river 0 6 5.164786 0.000000 2220 +rock 0 6 5.164786 0.000000 2164 +whatev 0 6 5.164786 0.000000 2097 +neither 0 6 5.164786 0.000000 1990 +fiction 0 6 5.164786 0.000000 2217 +tri 0 6 5.164786 0.000000 2166 +album 1 4 5.568345 5.568345 2888 +gokul 0 4 5.568345 0.000000 2668 +thati 0 4 5.568345 0.000000 2616 +metal 0 4 5.568345 0.000000 3079 +fantasi 0 4 5.568345 0.000000 3055 +devot 0 4 5.568345 0.000000 2711 +dont 1 3 5.857933 5.857933 3473 +pleasant 1 3 5.857933 5.857933 3825 +seinfeld 0 3 5.857933 0.000000 3958 +romanc 0 3 5.857933 0.000000 3632 +iitk 1 2 6.263398 6.263398 6227 +sastri 1 2 6.263398 6.263398 6171 +mugshot 0 2 6.263398 0.000000 4984 +karnataka 0 2 6.263398 0.000000 5106 +whati 0 2 6.263398 0.000000 6027 +horror 0 2 6.263398 0.000000 5075 +eduunivers 0 2 6.263398 0.000000 6216 +subramanya 1 1 6.957497 6.957497 19898 +hospet 1 1 6.957497 6.957497 19899 +tungabhadra 1 1 6.957497 6.957497 19900 +favourit 1 1 6.957497 6.957497 19901 +hampi 0 1 6.957497 0.000000 19902 +ruin 0 1 6.957497 0.000000 19903 +vijayanagara 0 1 6.957497 0.000000 19904 +fewphotograph 0 1 6.957497 0.000000 19905 +classmatesat 0 1 6.957497 0.000000 19906 +presentcurr 0 1 6.957497 0.000000 19907 +registeredfor 0 1 6.957497 0.000000 19908 +playphatta 0 1 6.957497 0.000000 19909 +champ 0 1 6.957497 0.000000 19910 +entertainmentin 0 1 6.957497 0.000000 19911 +donot 0 1 6.957497 0.000000 19912 +sshow 0 1 6.957497 0.000000 19913 +voraci 0 1 6.957497 0.000000 19914 +unsuccesfulli 0 1 6.957497 0.000000 19915 +grip 0 1 6.957497 0.000000 19916 +ifposs 0 1 6.957497 0.000000 19917 +archer 0 1 6.957497 0.000000 19918 +jane 0 1 6.957497 0.000000 19919 +austen 0 1 6.957497 0.000000 19920 +pride 0 1 6.957497 0.000000 19921 +prejudic 0 1 6.957497 0.000000 19922 +ramesh 0 1 6.957497 0.000000 19923 +mahadeven 0 1 6.957497 0.000000 19924 +sarticl 0 1 6.957497 0.000000 19925 +wonderfulgam 0 1 6.957497 0.000000 19926 +itagain 0 1 6.957497 0.000000 19927 +crossword 0 1 6.957497 0.000000 19928 +cryptic 0 1 6.957497 0.000000 19929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..892b641c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,220 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 0 384 0.693147 0.000000 11 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +group 0 183 1.609438 0.000000 36 +list 0 201 1.609438 0.000000 39 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +develop 0 174 1.791759 0.000000 53 +address 0 170 1.791759 0.000000 62 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +first 0 140 1.945910 0.000000 71 +area 0 144 1.945910 0.000000 80 +report 1 131 2.079442 2.079442 92 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +studi 0 120 2.079442 0.000000 91 +dayton 0 119 2.079442 0.000000 104 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +assist 0 112 2.197225 0.000000 113 +topic 0 114 2.197225 0.000000 110 +site 0 106 2.197225 0.000000 119 +find 0 111 2.197225 0.000000 111 +send 0 114 2.197225 0.000000 109 +user 1 104 2.302585 2.302585 137 +peopl 1 96 2.302585 2.302585 132 +need 0 98 2.302585 0.000000 135 +part 0 98 2.302585 0.000000 129 +commun 1 95 2.397895 2.397895 157 +present 0 91 2.397895 0.000000 145 +select 0 91 2.397895 0.000000 154 +sinc 0 90 2.397895 0.000000 159 +internet 2 83 2.484907 4.969814 186 +educ 1 86 2.484907 2.484907 191 +resourc 1 81 2.484907 2.484907 172 +help 0 83 2.484907 0.000000 175 +wide 0 84 2.484907 0.000000 185 +level 0 87 2.484907 0.000000 180 +come 0 78 2.564949 0.000000 202 +issu 0 78 2.564949 0.000000 211 +june 0 79 2.564949 0.000000 214 +orient 0 80 2.564949 0.000000 205 +servic 2 72 2.639057 5.278114 236 +nation 1 74 2.639057 2.639057 240 +addit 1 74 2.639057 2.639057 228 +involv 0 71 2.639057 0.000000 227 +appli 0 71 2.639057 0.000000 226 +write 0 72 2.639057 0.000000 222 +free 0 73 2.639057 0.000000 224 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +import 0 65 2.772589 0.000000 282 +written 0 63 2.772589 0.000000 278 +result 0 65 2.772589 0.000000 281 +plan 0 65 2.772589 0.000000 272 +visit 0 63 2.772589 0.000000 288 +descript 0 64 2.772589 0.000000 271 +street 0 63 2.772589 0.000000 293 +best 0 59 2.833213 0.000000 299 +special 0 56 2.890372 0.000000 320 +three 0 54 2.944439 0.000000 330 +undergradu 0 54 2.944439 0.000000 338 +sampl 0 53 2.944439 0.000000 339 +week 1 52 2.995732 2.995732 343 +profession 0 51 2.995732 0.000000 345 +format 0 48 3.044522 0.000000 356 +telephon 0 50 3.044522 0.000000 373 +effect 0 46 3.091042 0.000000 385 +natur 0 44 3.135494 0.000000 406 +third 0 43 3.178054 0.000000 412 +edit 0 42 3.218876 0.000000 418 +futur 0 41 3.218876 0.000000 427 +join 1 39 3.258097 3.258097 457 +multipl 0 39 3.258097 0.000000 453 +continu 0 39 3.258097 0.000000 448 +open 1 38 3.295837 3.295837 469 +seminar 0 38 3.295837 0.000000 470 +feel 0 37 3.332205 0.000000 483 +staff 1 36 3.367296 3.367296 490 +everi 0 34 3.401197 0.000000 519 +award 0 34 3.401197 0.000000 523 +kind 0 32 3.465736 0.000000 541 +collabor 0 32 3.465736 0.000000 543 +posit 1 31 3.496508 3.496508 552 +titl 0 31 3.496508 0.000000 556 +produc 0 30 3.555348 0.000000 572 +focus 0 29 3.583519 0.000000 584 +depend 0 29 3.583519 0.000000 583 +propos 0 28 3.610918 0.000000 602 +retriev 0 27 3.637586 0.000000 621 +background 0 25 3.737670 0.000000 664 +spent 0 25 3.737670 0.000000 676 +departmentunivers 1 24 3.761200 3.761200 711 +higher 0 24 3.761200 0.000000 690 +seri 0 24 3.761200 0.000000 708 +director 0 22 3.850148 0.000000 767 +cooper 0 22 3.850148 0.000000 757 +newsgroup 0 21 3.912023 0.000000 783 +divis 0 21 3.912023 0.000000 803 +toolkit 1 20 3.951244 3.951244 835 +wrote 0 20 3.951244 0.000000 830 +item 0 19 4.007333 0.000000 856 +expand 0 17 4.110874 0.000000 928 +diego 1 16 4.174387 4.174387 992 +susan 1 15 4.248495 4.248495 1050 +later 0 15 4.248495 0.000000 1043 +becam 0 14 4.317488 0.000000 1117 +speak 0 12 4.465908 0.000000 1283 +onth 0 12 4.465908 0.000000 1218 +branch 0 11 4.553877 0.000000 1318 +thecomput 0 10 4.653960 0.000000 1408 +hundr 0 9 4.753590 0.000000 1528 +discov 0 9 4.753590 0.000000 1562 +respect 0 9 4.753590 0.000000 1545 +filter 0 8 4.875197 0.000000 1641 +elect 0 8 4.875197 0.000000 1771 +jack 0 8 4.875197 0.000000 1780 +potenti 0 8 4.875197 0.000000 1690 +gather 0 8 4.875197 0.000000 1719 +scout 2 7 5.010635 10.021270 1903 +usabl 0 7 5.010635 0.000000 1810 +happen 0 7 5.010635 0.000000 1790 +discoveri 0 7 5.010635 0.000000 1915 +edumi 0 6 5.164786 0.000000 2132 +approv 0 6 5.164786 0.000000 2078 +matthew 0 6 5.164786 0.000000 2193 +ifyou 0 6 5.164786 0.000000 1992 +kid 1 5 5.347108 5.347108 2516 +merit 1 5 5.347108 5.347108 2466 +devot 1 4 5.568345 5.568345 2711 +newslett 0 4 5.568345 0.000000 2873 +termin 0 4 5.568345 0.000000 2852 +chose 0 4 5.568345 0.000000 2629 +hire 0 4 5.568345 0.000000 2976 +agreement 1 3 5.857933 5.857933 3207 +newli 0 3 5.857933 0.000000 3786 +orth 0 3 5.857933 0.000000 3685 +moreinform 0 3 5.857933 0.000000 3307 +audienc 0 3 5.857933 0.000000 3180 +aproject 0 3 5.857933 0.000000 3142 +expans 0 3 5.857933 0.000000 3755 +disciplin 0 3 5.857933 0.000000 3392 +sciencefound 1 2 6.263398 6.263398 5150 +calcari 0 2 6.263398 0.000000 6144 +thehigh 0 2 6.263398 0.000000 4095 +thousand 0 2 6.263398 0.000000 5949 +arbor 0 2 6.263398 0.000000 6235 +backbon 0 2 6.263398 0.000000 5623 +thescout 0 2 6.263398 0.000000 6082 +andeduc 1 1 6.957497 6.957497 19930 +reloc 1 1 6.957497 6.957497 19931 +speciallibrarian 1 1 6.957497 6.957497 19932 +systemadministr 1 1 6.957497 6.957497 19933 +calcarimanag 0 1 6.957497 0.000000 19934 +servicescomput 0 1 6.957497 0.000000 19935 +madisonsc 0 1 6.957497 0.000000 19936 +scoutservic 0 1 6.957497 0.000000 19937 +internicand 0 1 6.957497 0.000000 19938 +bestresourc 0 1 6.957497 0.000000 19939 +soonth 0 1 6.957497 0.000000 19940 +sprout 0 1 6.957497 0.000000 19941 +andthousand 0 1 6.957497 0.000000 19942 +annotatedlist 0 1 6.957497 0.000000 19943 +itemsinclud 0 1 6.957497 0.000000 19944 +happeningspost 0 1 6.957497 0.000000 19945 +weekdai 0 1 6.957497 0.000000 19946 +wheni 0 1 6.957497 0.000000 19947 +thensfnet 0 1 6.957497 0.000000 19948 +informationservic 0 1 6.957497 0.000000 19949 +tonat 0 1 6.957497 0.000000 19950 +internetand 0 1 6.957497 0.000000 19951 +seminarseri 0 1 6.957497 0.000000 19952 +internetend 0 1 6.957497 0.000000 19953 +forcerfnet 0 1 6.957497 0.000000 19954 +internicproject 0 1 6.957497 0.000000 19955 +theport 0 1 6.957497 0.000000 19956 +workof 0 1 6.957497 0.000000 19957 +andrequest 0 1 6.957497 0.000000 19958 +heartilyagre 0 1 6.957497 0.000000 19959 +servicesat 0 1 6.957497 0.000000 19960 +solock 0 1 6.957497 0.000000 19961 +theaddit 0 1 6.957497 0.000000 19962 +livesei 0 1 6.957497 0.000000 19963 +asscout 0 1 6.957497 0.000000 19964 +researcharea 0 1 6.957497 0.000000 19965 +campus 0 1 6.957497 0.000000 19966 +includenetwork 0 1 6.957497 0.000000 19967 +nidr 0 1 6.957497 0.000000 19968 +anddisciplin 0 1 6.957497 0.000000 19969 +willincludecomput 0 1 6.957497 0.000000 19970 +ofour 0 1 6.957497 0.000000 19971 +theonlin 0 1 6.957497 0.000000 19972 +librarian 0 1 6.957497 0.000000 19973 +aresum 0 1 6.957497 0.000000 19974 +contactm 0 1 6.957497 0.000000 19975 +calcariinternet 0 1 6.957497 0.000000 19976 +scal 0 1 6.957497 0.000000 19977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..ce339cc6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +depart 0 457 0.693147 0.000000 12 +student 0 343 1.098612 0.000000 19 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +time 0 293 1.098612 0.000000 17 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +welcom 0 122 2.079442 0.000000 99 +high 0 130 2.079442 0.000000 101 +technolog 0 131 2.079442 0.000000 102 +dayton 0 119 2.079442 0.000000 104 +person 0 111 2.197225 0.000000 117 +find 0 111 2.197225 0.000000 111 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +present 0 91 2.397895 0.000000 145 +sinc 0 90 2.397895 0.000000 159 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +institut 0 84 2.484907 0.000000 187 +stuff 0 87 2.484907 0.000000 171 +june 0 79 2.564949 0.000000 214 +state 0 76 2.564949 0.000000 207 +dept 0 64 2.772589 0.000000 291 +major 0 56 2.890372 0.000000 315 +undergradu 0 54 2.944439 0.000000 338 +finger 0 52 2.995732 0.000000 354 +past 0 42 3.218876 0.000000 428 +higher 0 24 3.761200 0.000000 690 +born 0 21 3.912023 0.000000 798 +happi 0 14 4.317488 0.000000 1079 +avenu 0 12 4.465908 0.000000 1277 +resid 0 10 4.653960 0.000000 1461 +secondari 0 7 5.010635 0.000000 1884 +southern 0 6 5.164786 0.000000 2191 +whereabout 0 4 5.568345 0.000000 3078 +worri 0 3 5.857933 0.000000 3130 +coimbator 1 2 6.263398 6.263398 5130 +theindian 0 2 6.263398 0.000000 5795 +kharagpur 0 2 6.263398 0.000000 6236 +kendal 0 2 6.263398 0.000000 6085 +chandrasekar 1 1 6.957497 6.957497 19978 +tamilnadu 0 1 6.957497 0.000000 19979 +inindia 0 1 6.957497 0.000000 19980 +officedept 0 1 6.957497 0.000000 19981 +sivasankaran 0 1 6.957497 0.000000 19982 +schandra 0 1 6.957497 0.000000 19983 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..29fcf4c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +wisc 1 242 1.386294 1.386294 33 +languag 0 227 1.386294 0.000000 26 +link 0 247 1.386294 0.000000 24 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +west 0 83 2.484907 0.000000 192 +descript 0 64 2.772589 0.000000 271 +advisor 0 51 2.995732 0.000000 355 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +eric 0 19 4.007333 0.000000 870 +wind 0 18 4.060443 0.000000 908 +club 0 15 4.248495 0.000000 1058 +usaphon 0 9 4.753590 0.000000 1600 +tunnel 0 9 4.753590 0.000000 1615 +assistantdepart 0 8 4.875197 0.000000 1784 +hockei 0 8 4.875197 0.000000 1760 +byte 0 6 5.164786 0.000000 2108 +pageer 0 3 5.857933 0.000000 3776 +schnarr 1 2 6.263398 6.263398 6194 +dragon 0 2 6.263398 0.000000 4176 +larusresearch 0 1 6.957497 0.000000 19984 +languagesfunct 0 1 6.957497 0.000000 19985 +designinterest 0 1 6.957497 0.000000 19986 +sacm 0 1 6.957497 0.000000 19987 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..3eb502b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +system 0 443 0.693147 0.000000 6 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +parallel 0 169 1.791759 0.000000 60 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +architectur 0 139 1.945910 0.000000 77 +support 0 132 1.945910 0.000000 83 +dayton 0 119 2.079442 0.000000 104 +confer 0 126 2.079442 0.000000 100 +intern 0 108 2.197225 0.000000 128 +specif 0 106 2.197225 0.000000 106 +memori 1 101 2.302585 2.302585 139 +access 0 102 2.302585 0.000000 136 +user 0 104 2.302585 0.000000 137 +west 0 83 2.484907 0.000000 192 +control 0 82 2.484907 0.000000 164 +level 0 87 2.484907 0.000000 180 +educ 0 86 2.484907 0.000000 191 +david 1 71 2.639057 2.639057 232 +share 1 59 2.833213 2.833213 304 +juli 0 60 2.833213 0.000000 305 +advisor 0 51 2.995732 0.000000 355 +mark 1 44 3.135494 3.135494 403 +protocol 0 45 3.135494 0.000000 407 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +jame 1 35 3.401197 3.401197 507 +hill 1 25 3.737670 3.737670 670 +supercomput 0 25 3.737670 0.000000 681 +programminglanguag 0 21 3.912023 0.000000 782 +fine 0 20 3.951244 0.000000 822 +steven 1 17 4.110874 4.110874 953 +asplo 0 17 4.110874 0.000000 948 +wood 1 11 4.553877 4.553877 1355 +grain 0 10 4.653960 0.000000 1448 +cook 0 10 4.653960 0.000000 1464 +laru 1 9 4.753590 4.753590 1560 +yanni 1 8 4.875197 4.875197 1713 +assistantdepart 0 8 4.875197 0.000000 1784 +sixth 0 7 5.010635 0.000000 1917 +roger 0 7 5.010635 0.000000 1892 +ann 0 6 5.164786 0.000000 2065 +ioanni 1 5 5.347108 5.347108 2553 +babak 1 5 5.347108 5.347108 2584 +falsafi 1 5 5.347108 5.347108 2585 +lebeck 1 5 5.347108 5.347108 2582 +reinhardt 1 5 5.347108 5.347108 2583 +schoina 1 4 5.568345 5.568345 3085 +alvin 1 4 5.568345 5.568345 3084 +crete 1 3 5.857933 5.857933 3773 +iraklio 1 1 6.957497 6.957497 19988 +systemspubl 0 1 6.957497 0.000000 19989 +cretan 0 1 6.957497 0.000000 19990 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..f4aac9e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +interest 0 384 0.693147 0.000000 11 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +includ 0 208 1.609438 0.000000 42 +version 0 113 2.197225 0.000000 122 +degre 0 69 2.708050 0.000000 259 +differ 0 66 2.708050 0.000000 253 +special 0 56 2.890372 0.000000 320 +finger 1 52 2.995732 2.995732 354 +give 0 50 3.044522 0.000000 359 +could 0 46 3.091042 0.000000 383 +india 1 32 3.465736 3.465736 550 +mine 0 26 3.688879 0.000000 654 +wish 0 24 3.761200 0.000000 692 +instead 0 22 3.850148 0.000000 756 +grad 0 20 3.951244 0.000000 837 +account 0 18 4.060443 0.000000 882 +regist 0 17 4.110874 0.000000 938 +biologi 0 15 4.248495 0.000000 1049 +classic 0 14 4.317488 0.000000 1084 +danc 1 12 4.465908 4.465908 1278 +switch 0 8 4.875197 0.000000 1718 +keeper 0 5 5.347108 0.000000 2569 +keyboard 0 4 5.568345 0.000000 2970 +asian 0 3 5.857933 0.000000 3598 +southeast 1 2 6.263398 6.263398 6188 +asia 1 2 6.263398 6.263398 5952 +hairbal 0 2 6.263398 0.000000 6237 +beverli 1 1 6.957497 6.957497 19991 +seavei 1 1 6.957497 6.957497 19992 +ramayana 1 1 6.957497 6.957497 19993 +drama 0 1 6.957497 0.000000 19994 +ramakien 0 1 6.957497 0.000000 19995 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..ccb6b034 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +link 0 247 1.386294 0.000000 24 +also 0 259 1.386294 0.000000 28 +list 0 201 1.609438 0.000000 39 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +address 0 170 1.791759 0.000000 62 +wisconsin 0 169 1.791759 0.000000 54 +welcom 1 122 2.079442 2.079442 99 +dayton 0 119 2.079442 0.000000 104 +databas 0 122 2.079442 0.000000 86 +world 1 115 2.197225 2.197225 126 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +book 0 99 2.302585 0.000000 131 +school 1 84 2.484907 2.484907 188 +educ 0 86 2.484907 0.000000 191 +want 0 79 2.564949 0.000000 199 +addit 0 74 2.639057 0.000000 228 +guid 0 63 2.772589 0.000000 267 +septemb 0 65 2.772589 0.000000 274 +back 0 60 2.833213 0.000000 297 +game 0 36 3.367296 0.000000 498 +eduoffic 0 33 3.433987 0.000000 531 +enjoi 0 26 3.688879 0.000000 660 +fact 0 21 3.912023 0.000000 780 +scott 0 18 4.060443 0.000000 884 +rate 0 15 4.248495 0.000000 1037 +english 0 15 4.248495 0.000000 1033 +hopefulli 0 14 4.317488 0.000000 1071 +franc 0 12 4.465908 0.000000 1276 +ball 0 9 4.753590 0.000000 1608 +drink 0 9 4.753590 0.000000 1607 +lock 0 9 4.753590 0.000000 1551 +poetri 0 9 4.753590 0.000000 1596 +absolut 0 8 4.875197 0.000000 1646 +dictionari 0 8 4.875197 0.000000 1642 +largest 0 7 5.010635 0.000000 1858 +seen 0 6 5.164786 0.000000 2202 +beer 0 6 5.164786 0.000000 2216 +soda 0 6 5.164786 0.000000 2189 +constitut 0 6 5.164786 0.000000 2026 +pagescott 0 4 5.568345 0.000000 2978 +chees 0 4 5.568345 0.000000 3090 +add 0 3 5.857933 0.000000 3131 +uwisc 1 2 6.263398 6.263398 4738 +caffein 0 2 6.263398 0.000000 5936 +thesauru 0 2 6.263398 0.000000 6238 +colvil 1 1 6.957497 6.957497 19996 +pagein 0 1 6.957497 0.000000 19997 +pickingand 0 1 6.957497 0.000000 19998 +artsi 0 1 6.957497 0.000000 19999 +roget 0 1 6.957497 0.000000 20000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..d0d9a9b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +project 0 340 1.098612 0.000000 18 +includ 0 208 1.609438 0.000000 42 +applic 0 170 1.791759 0.000000 56 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +perform 0 143 1.945910 0.000000 74 +object 0 138 1.945910 0.000000 79 +analysi 1 124 2.079442 2.079442 98 +high 0 130 2.079442 0.000000 101 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +control 0 82 2.484907 0.000000 164 +learn 0 86 2.484907 0.000000 170 +level 0 87 2.484907 0.000000 180 +knowledg 0 67 2.708050 0.000000 243 +goal 0 66 2.708050 0.000000 250 +virtual 1 62 2.772589 2.772589 285 +robot 0 36 3.367296 0.000000 497 +anim 1 31 3.496508 3.496508 557 +steve 1 29 3.583519 3.583519 594 +task 0 25 3.737670 0.000000 678 +motion 1 24 3.761200 3.761200 699 +store 0 24 3.761200 0.000000 693 +input 0 23 3.806662 0.000000 727 +sequenc 0 23 3.806662 0.000000 734 +period 0 22 3.850148 0.000000 743 +modern 0 16 4.174387 0.000000 966 +devic 0 16 4.174387 0.000000 1002 +charact 0 15 4.248495 0.000000 1028 +track 0 15 4.248495 0.000000 1029 +chuck 0 14 4.317488 0.000000 1108 +directli 0 13 4.382027 0.000000 1141 +realiti 0 12 4.465908 0.000000 1272 +walk 0 12 4.465908 0.000000 1281 +motiv 0 11 4.553877 0.000000 1346 +realist 0 8 4.875197 0.000000 1665 +root 0 8 4.875197 0.000000 1650 +seitz 1 7 5.010635 5.010635 1976 +smile 0 7 5.010635 0.000000 1807 +infer 0 6 5.164786 0.000000 2040 +writeup 0 5 5.347108 0.000000 2352 +rigid 0 5 5.347108 0.000000 2432 +tocomput 0 3 5.857933 0.000000 3162 +endow 0 2 6.263398 0.000000 6234 +cue 0 2 6.263398 0.000000 5391 +anabstract 0 2 6.263398 0.000000 5491 +dyerour 0 1 6.957497 0.000000 20001 +teachinga 0 1 6.957497 0.000000 20002 +hasit 0 1 6.957497 0.000000 20003 +cartoon 0 1 6.957497 0.000000 20004 +teleconferenc 0 1 6.957497 0.000000 20005 +performa 0 1 6.957497 0.000000 20006 +repertoir 0 1 6.957497 0.000000 20007 +beinvok 0 1 6.957497 0.000000 20008 +cu 0 1 6.957497 0.000000 20009 +levelev 0 1 6.957497 0.000000 20010 +nonrigid 0 1 6.957497 0.000000 20011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..eea44228 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +last 0 314 1.098612 0.000000 14 +recent 0 167 1.791759 0.000000 58 +first 0 140 1.945910 0.000000 71 +process 0 142 1.945910 0.000000 72 +click 0 142 1.945910 0.000000 78 +provid 0 121 2.079442 0.000000 94 +techniqu 1 99 2.302585 2.302585 138 +user 0 104 2.302585 0.000000 137 +imag 2 91 2.397895 4.795790 161 +center 0 88 2.397895 0.000000 158 +chang 0 82 2.484907 0.000000 163 +june 0 79 2.564949 0.000000 214 +workshop 0 71 2.639057 0.000000 239 +view 2 70 2.708050 5.416100 254 +differ 0 66 2.708050 0.000000 253 +creat 0 63 2.772589 0.000000 277 +interact 0 62 2.772589 0.000000 270 +guid 0 63 2.772589 0.000000 267 +three 0 54 2.944439 0.000000 330 +investig 0 51 2.995732 0.000000 353 +particular 0 51 2.995732 0.000000 352 +right 0 48 3.044522 0.000000 363 +visual 0 48 3.044522 0.000000 372 +physic 1 47 3.091042 3.091042 377 +describ 1 45 3.135494 3.135494 400 +answer 0 45 3.135494 0.000000 391 +show 0 43 3.178054 0.000000 417 +autom 1 41 3.218876 3.218876 434 +theoret 0 39 3.258097 0.000000 446 +movi 0 40 3.258097 0.000000 459 +origin 1 38 3.295837 3.295837 472 +correct 0 38 3.295837 0.000000 462 +procedur 0 36 3.367296 0.000000 488 +represent 0 35 3.401197 0.000000 512 +produc 1 30 3.555348 3.555348 572 +steve 1 29 3.583519 3.583519 594 +consid 0 29 3.583519 0.000000 590 +enjoi 0 26 3.688879 0.000000 660 +proc 0 26 3.688879 0.000000 649 +although 0 25 3.737670 0.000000 667 +known 0 24 3.761200 0.000000 702 +sequenc 0 23 3.806662 0.000000 734 +synthesi 1 20 3.951244 3.951244 834 +basi 0 20 3.951244 0.000000 828 +mpeg 0 20 3.951244 0.000000 831 +geometr 0 19 4.007333 0.000000 852 +left 0 19 4.007333 0.000000 851 +scene 1 14 4.317488 4.317488 1114 +chuck 0 14 4.317488 0.000000 1108 +shown 0 14 4.317488 0.000000 1080 +valid 1 11 4.553877 4.553877 1299 +devis 0 10 4.653960 0.000000 1451 +reli 0 10 4.653960 0.000000 1411 +certain 0 10 4.653960 0.000000 1393 +correspond 0 10 4.653960 0.000000 1382 +intermedi 0 9 4.753590 0.000000 1497 +establish 0 9 4.753590 0.000000 1532 +assumpt 0 9 4.753590 0.000000 1514 +pair 0 9 4.753590 0.000000 1503 +dyer 0 9 4.753590 0.000000 1573 +satisfi 0 8 4.875197 0.000000 1694 +interpol 2 7 5.010635 10.021270 1823 +seitz 1 7 5.010635 5.010635 1976 +morph 1 7 5.010635 5.010635 1937 +stereo 0 7 5.010635 0.000000 1818 +theproject 0 6 5.164786 0.000000 1981 +provabl 0 5 5.347108 0.000000 2558 +surprisingli 0 4 5.568345 0.000000 2609 +visibl 0 4 5.568345 0.000000 2994 +todetermin 0 3 5.857933 0.000000 3182 +widespread 0 2 6.263398 0.000000 4911 +viewsof 0 2 6.263398 0.000000 6135 +undergo 0 2 6.263398 0.000000 4253 +dyerw 0 1 6.957497 0.000000 20012 +graphicscommun 0 1 6.957497 0.000000 20013 +techniquescurr 0 1 6.957497 0.000000 20014 +validityha 0 1 6.957497 0.000000 20015 +ofthat 0 1 6.957497 0.000000 20016 +simplerectif 0 1 6.957497 0.000000 20017 +therectifi 0 1 6.957497 0.000000 20018 +theinterpol 0 1 6.957497 0.000000 20019 +computedinterpol 0 1 6.957497 0.000000 20020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..906a2b5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 0 775 0.000000 0.000000 2 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +us 1 329 1.098612 1.098612 16 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +paper 0 205 1.609438 0.000000 38 +base 0 165 1.791759 0.000000 50 +click 1 142 1.945910 1.945910 78 +object 1 138 1.945910 1.945910 79 +relat 0 139 1.945910 0.000000 68 +high 0 130 2.079442 0.000000 101 +well 0 109 2.197225 0.000000 121 +techniqu 1 99 2.302585 2.302585 138 +imag 2 91 2.397895 4.795790 161 +call 0 91 2.397895 0.000000 153 +chang 1 82 2.484907 2.484907 163 +requir 0 81 2.484907 0.000000 167 +wide 0 84 2.484907 0.000000 185 +appear 1 78 2.564949 2.564949 210 +view 2 70 2.708050 5.416100 254 +differ 1 66 2.708050 2.708050 253 +knowledg 0 67 2.708050 0.000000 243 +virtual 0 62 2.772589 0.000000 285 +septemb 0 65 2.772589 0.000000 274 +simpl 1 60 2.833213 2.833213 298 +extens 0 53 2.944439 0.000000 340 +investig 0 51 2.995732 0.000000 353 +basic 0 50 3.044522 0.000000 360 +principl 0 48 3.044522 0.000000 357 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +move 0 47 3.091042 0.000000 382 +howev 0 41 3.218876 0.000000 422 +movi 1 40 3.258097 3.258097 459 +correct 0 38 3.295837 0.000000 462 +represent 0 35 3.401197 0.000000 512 +manual 0 35 3.401197 0.000000 504 +transform 0 32 3.465736 0.000000 542 +often 0 31 3.496508 0.000000 551 +taken 0 31 3.496508 0.000000 555 +produc 1 30 3.555348 3.555348 572 +steve 1 29 3.583519 3.583519 594 +toward 0 25 3.737670 0.000000 668 +frame 1 24 3.761200 3.761200 684 +handl 0 24 3.761200 0.000000 685 +varieti 0 22 3.850148 0.000000 740 +color 0 22 3.850148 0.000000 762 +mpeg 1 20 3.951244 3.951244 831 +render 0 17 4.110874 0.000000 947 +transit 0 15 4.248495 0.000000 1046 +photograph 0 15 4.248495 0.000000 1056 +reflect 0 15 4.248495 0.000000 1034 +scene 1 14 4.317488 4.317488 1114 +chuck 1 14 4.317488 4.317488 1108 +camera 1 14 4.317488 4.317488 1115 +draw 0 14 4.317488 0.000000 1086 +resolut 1 13 4.382027 4.382027 1172 +introduc 0 13 4.382027 0.000000 1139 +shape 1 12 4.465908 4.465908 1245 +abil 0 11 4.553877 0.000000 1341 +dyer 1 9 4.753590 4.753590 1573 +pose 1 9 4.753590 4.753590 1535 +correctli 0 9 4.753590 0.000000 1478 +face 0 9 4.753590 0.000000 1501 +siggraph 0 8 4.875197 0.000000 1773 +morph 2 7 5.010635 10.021270 1937 +interpol 1 7 5.010635 5.010635 1823 +seitz 1 7 5.010635 5.010635 1976 +viewpoint 1 6 5.164786 5.164786 2116 +difficult 0 6 5.164786 0.000000 2035 +simultan 0 6 5.164786 0.000000 2155 +jude 0 6 5.164786 0.000000 2123 +synthes 0 5 5.347108 0.000000 2451 +facial 0 5 5.347108 0.000000 2438 +shavlik 0 5 5.347108 0.000000 2429 +illus 0 4 5.568345 0.000000 2603 +mona 1 2 6.263398 6.263398 5786 +lisa 1 2 6.263398 6.263398 5427 +icpr 0 1 6.957497 0.000000 20021 +compel 0 1 6.957497 0.000000 20022 +betweenimag 0 1 6.957497 0.000000 20023 +causeunnatur 0 1 6.957497 0.000000 20024 +distort 0 1 6.957497 0.000000 20025 +projectivegeometri 0 1 6.957497 0.000000 20026 +morphingthat 0 1 6.957497 0.000000 20027 +prewarp 0 1 6.957497 0.000000 20028 +imagesprior 0 1 6.957497 0.000000 20029 +postwarp 0 1 6.957497 0.000000 20030 +appliedto 0 1 6.957497 0.000000 20031 +structureafford 0 1 6.957497 0.000000 20032 +imagetransform 0 1 6.957497 0.000000 20033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..d9ba38e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +gener 0 220 1.386294 0.000000 27 +also 0 259 1.386294 0.000000 28 +paper 1 205 1.609438 1.609438 38 +object 1 138 1.945910 1.945910 79 +click 0 142 1.945910 0.000000 78 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +analysi 0 124 2.079442 0.000000 98 +provid 0 121 2.079442 0.000000 94 +person 0 111 2.197225 0.000000 117 +make 0 111 2.197225 0.000000 120 +theori 0 111 2.197225 0.000000 127 +imag 2 91 2.397895 4.795790 161 +real 1 93 2.397895 2.397895 144 +call 0 91 2.397895 0.000000 153 +refer 1 78 2.564949 2.564949 203 +line 0 75 2.639057 0.000000 231 +addit 0 74 2.639057 0.000000 228 +view 0 70 2.708050 0.000000 254 +previou 0 62 2.772589 0.000000 290 +import 0 65 2.772589 0.000000 282 +locat 0 59 2.833213 0.000000 303 +sever 0 56 2.890372 0.000000 322 +allow 0 53 2.944439 0.000000 333 +run 0 51 2.995732 0.000000 347 +life 0 50 3.044522 0.000000 375 +approach 0 48 3.044522 0.000000 366 +without 0 50 3.044522 0.000000 370 +move 1 47 3.091042 3.091042 382 +could 0 46 3.091042 0.000000 383 +physic 0 47 3.091042 0.000000 377 +featur 0 46 3.091042 0.000000 386 +even 0 45 3.135494 0.000000 393 +describ 0 45 3.135494 0.000000 400 +natur 0 44 3.135494 0.000000 406 +show 0 43 3.178054 0.000000 417 +error 0 40 3.258097 0.000000 449 +mean 0 37 3.332205 0.000000 477 +next 0 34 3.401197 0.000000 517 +represent 0 35 3.401197 0.000000 512 +singl 0 34 3.401197 0.000000 510 +human 0 32 3.465736 0.000000 546 +steve 0 29 3.583519 0.000000 594 +determin 1 27 3.637586 3.637586 630 +enhanc 1 26 3.688879 3.688879 644 +trace 1 25 3.737670 3.737670 677 +reliabl 0 25 3.737670 0.000000 674 +motion 2 24 3.761200 7.522400 699 +frame 1 24 3.761200 3.761200 684 +sequenc 1 23 3.806662 3.806662 734 +period 2 22 3.850148 7.700296 743 +defin 0 22 3.850148 0.000000 746 +identifi 0 22 3.850148 0.000000 760 +attempt 0 17 4.110874 0.000000 917 +medic 0 17 4.110874 0.000000 958 +spatial 0 16 4.174387 0.000000 988 +scene 1 14 4.317488 4.317488 1114 +chuck 0 14 4.317488 0.000000 1108 +camera 0 14 4.317488 0.000000 1115 +composit 1 13 4.382027 4.382027 1150 +whose 0 13 4.382027 0.000000 1166 +deriv 0 13 4.382027 0.000000 1145 +walk 0 12 4.465908 0.000000 1281 +cycl 1 11 4.553877 4.553877 1335 +instanc 0 11 4.553877 0.000000 1322 +moment 0 11 4.553877 0.000000 1379 +correspond 1 10 4.653960 4.653960 1382 +tempor 1 9 4.753590 4.753590 1584 +surfac 0 9 4.753590 0.000000 1574 +explicit 0 9 4.753590 0.000000 1525 +pure 1 8 4.875197 4.875197 1776 +invari 1 8 4.875197 4.875197 1748 +film 0 8 4.875197 0.000000 1761 +irregular 0 8 4.875197 0.000000 1768 +heart 0 8 4.875197 0.000000 1729 +seitz 0 7 5.010635 0.000000 1976 +compact 0 7 5.010635 0.000000 1907 +canb 0 7 5.010635 0.000000 1846 +bottom 0 7 5.010635 0.000000 1906 +appar 0 7 5.010635 0.000000 1958 +recov 1 6 5.164786 5.164786 2235 +furthermor 0 6 5.164786 0.000000 2141 +cyclic 1 5 5.347108 5.347108 2383 +skip 0 5 5.347108 0.000000 2402 +variat 0 5 5.347108 0.000000 2248 +affin 0 5 5.347108 0.000000 2378 +clickher 0 5 5.347108 0.000000 2428 +havedevelop 0 4 5.568345 0.000000 2681 +repeat 0 4 5.568345 0.000000 2798 +tend 0 4 5.568345 0.000000 3041 +visibl 0 4 5.568345 0.000000 2994 +fashion 0 3 5.857933 0.000000 3699 +unlik 0 2 6.263398 0.000000 5063 +slow 0 2 6.263398 0.000000 5341 +perfectli 0 2 6.263398 0.000000 5569 +poscript 1 1 6.957497 6.957497 20034 +turntabl 1 1 6.957497 6.957497 20035 +dyermani 0 1 6.957497 0.000000 20036 +locomotori 0 1 6.957497 0.000000 20037 +shuffl 0 1 6.957497 0.000000 20038 +areperiod 0 1 6.957497 0.000000 20039 +beenproduc 0 1 6.957497 0.000000 20040 +ourapproach 0 1 6.957497 0.000000 20041 +tracethi 0 1 6.957497 0.000000 20042 +imagesequ 0 1 6.957497 0.000000 20043 +phonograph 0 1 6.957497 0.000000 20044 +ramp 0 1 6.957497 0.000000 20045 +timewher 0 1 6.957497 0.000000 20046 +momentarili 0 1 6.957497 0.000000 20047 +shownsuperimpos 0 1 6.957497 0.000000 20048 +variesslightli 0 1 6.957497 0.000000 20049 +changesin 0 1 6.957497 0.000000 20050 +motionsthat 0 1 6.957497 0.000000 20051 +evolutionof 0 1 6.957497 0.000000 20052 +quantiti 0 1 6.957497 0.000000 20053 +asposit 0 1 6.957497 0.000000 20054 +veloc 0 1 6.957497 0.000000 20055 +delimit 0 1 6.957497 0.000000 20056 +correspondencesacross 0 1 6.957497 0.000000 20057 +parsinga 0 1 6.957497 0.000000 20058 +tracecan 0 1 6.957497 0.000000 20059 +fromdiffer 0 1 6.957497 0.000000 20060 +recoveredfrom 0 1 6.957497 0.000000 20061 +angiograph 0 1 6.957497 0.000000 20062 +additionalstructur 0 1 6.957497 0.000000 20063 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..e27eae94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 0 384 0.693147 0.000000 11 +research 0 431 0.693147 0.000000 10 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +base 0 165 1.791759 0.000000 50 +recent 0 167 1.791759 0.000000 58 +wisconsin 0 169 1.791759 0.000000 54 +click 1 142 1.945910 1.945910 78 +area 0 144 1.945910 0.000000 80 +analysi 1 124 2.079442 2.079442 98 +machin 0 129 2.079442 0.000000 95 +look 0 107 2.197225 0.000000 115 +imag 1 91 2.397895 2.397895 161 +graphic 0 90 2.397895 0.000000 147 +stuff 0 87 2.484907 0.000000 171 +chang 0 82 2.484907 0.000000 163 +exampl 0 77 2.564949 0.000000 195 +view 1 70 2.708050 2.708050 254 +juli 0 60 2.833213 0.000000 305 +frequent 0 49 3.044522 0.000000 367 +math 0 44 3.135494 0.000000 402 +show 0 43 3.178054 0.000000 417 +vision 1 41 3.218876 3.218876 430 +cach 0 41 3.218876 0.000000 432 +movi 1 40 3.258097 3.258097 459 +steve 1 29 3.583519 3.583519 594 +berkelei 0 26 3.688879 0.000000 657 +motion 1 24 3.761200 3.761200 699 +mpeg 1 20 3.951244 3.951244 831 +synthesi 0 20 3.951244 0.000000 834 +left 0 19 4.007333 0.000000 851 +render 0 17 4.110874 0.000000 947 +seitz 1 7 5.010635 5.010635 1976 +morph 0 7 5.010635 0.000000 1937 +interpol 0 7 5.010635 0.000000 1823 +cyclic 0 5 5.347108 0.000000 2383 +closer 0 2 6.263398 0.000000 6024 +surreal 0 1 6.957497 0.000000 20064 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..9e96ff27 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +inform 0 412 0.693147 0.000000 8 +program 0 374 0.693147 0.000000 7 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +modifi 0 178 1.609438 0.000000 35 +madison 2 165 1.791759 3.583518 55 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +wisconsin 1 169 1.791759 1.791759 54 +avail 0 169 1.791759 0.000000 48 +file 0 132 1.945910 0.000000 70 +machin 1 129 2.079442 2.079442 95 +schedul 1 119 2.079442 2.079442 85 +databas 0 122 2.079442 0.000000 86 +confer 0 126 2.079442 0.000000 100 +theori 0 111 2.197225 0.000000 127 +mathemat 0 108 2.197225 0.000000 123 +intern 0 108 2.197225 0.000000 128 +access 1 102 2.302585 2.302585 136 +center 0 88 2.397895 0.000000 158 +select 0 91 2.397895 0.000000 154 +learn 2 86 2.484907 4.969814 170 +contain 1 81 2.484907 2.484907 174 +member 1 84 2.484907 2.484907 165 +librari 1 87 2.484907 2.484907 181 +school 0 84 2.484907 0.000000 188 +journal 0 83 2.484907 0.000000 183 +david 0 71 2.639057 0.000000 232 +line 0 75 2.639057 0.000000 231 +workshop 0 71 2.639057 0.000000 239 +august 1 66 2.708050 2.708050 257 +dept 1 64 2.772589 2.772589 291 +prof 0 64 2.772589 0.000000 273 +abstract 0 62 2.772589 0.000000 276 +content 1 59 2.833213 2.833213 302 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +archiv 1 49 3.044522 3.044522 364 +mark 0 44 3.135494 0.000000 403 +describ 0 45 3.135494 0.000000 400 +directori 0 45 3.135494 0.000000 396 +math 0 44 3.135494 0.000000 402 +vision 0 41 3.218876 0.000000 430 +seminar 1 38 3.295837 3.295837 470 +robot 0 36 3.367296 0.000000 497 +richard 0 31 3.496508 0.000000 559 +domain 1 30 3.555348 3.555348 564 +held 0 28 3.610918 0.000000 600 +relev 1 26 3.688879 3.688879 637 +proc 0 26 3.688879 0.000000 649 +mostli 0 19 4.007333 0.000000 869 +agent 1 18 4.060443 4.060443 910 +ascii 0 15 4.248495 0.000000 1032 +biologi 0 15 4.248495 0.000000 1049 +doit 0 14 4.317488 0.000000 1111 +readabl 0 12 4.465908 0.000000 1258 +wendt 0 10 4.653960 0.000000 1446 +kevin 0 9 4.753590 0.000000 1482 +mangasarian 0 9 4.753590 0.000000 1570 +dataset 1 7 5.010635 5.010635 1914 +jude 1 6 5.164786 5.164786 2123 +extern 0 6 5.164786 0.000000 2105 +olvi 0 6 5.164786 0.000000 2109 +gopher 0 6 5.164786 0.000000 1982 +shavlik 1 5 5.347108 5.347108 2429 +bodner 0 5 5.347108 0.000000 2401 +testb 0 5 5.347108 0.000000 2456 +tina 0 3 5.857933 0.000000 3744 +breast 0 3 5.857933 0.000000 4033 +cancer 0 3 5.857933 0.000000 4032 +carolyn 0 2 6.263398 0.000000 6088 +allex 0 2 6.263398 0.000000 6087 +eliassi 0 2 6.263398 0.000000 6147 +mlrg 2 1 6.957497 13.914994 20065 +thememb 0 1 6.957497 0.000000 20066 +jonathon 0 1 6.957497 0.000000 20067 +cherkauer 0 1 6.957497 0.000000 20068 +craven 0 1 6.957497 0.000000 20069 +maclin 0 1 6.957497 0.000000 20070 +opitz 0 1 6.957497 0.000000 20071 +papersvisit 0 1 6.957497 0.000000 20072 +recentabstractsi 0 1 6.957497 0.000000 20073 +theoriesy 0 1 6.957497 0.000000 20074 +severalml 0 1 6.957497 0.000000 20075 +sgroup 0 1 6.957497 0.000000 20076 +neurosci 0 1 6.957497 0.000000 20077 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..ca0343c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 0 340 1.098612 0.000000 18 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +applic 0 170 1.791759 0.000000 56 +parallel 0 169 1.791759 0.000000 60 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +spring 0 131 2.079442 0.000000 88 +technolog 0 131 2.079442 0.000000 102 +assist 0 112 2.197225 0.000000 113 +world 0 115 2.197225 0.000000 126 +person 0 111 2.197225 0.000000 117 +memori 1 101 2.302585 2.302585 139 +west 0 83 2.484907 0.000000 192 +wide 0 84 2.484907 0.000000 185 +institut 0 84 2.484907 0.000000 187 +interfac 0 79 2.564949 0.000000 209 +summari 0 73 2.639057 0.000000 237 +simul 1 66 2.708050 2.708050 255 +street 0 63 2.772589 0.000000 293 +copi 0 63 2.772589 0.000000 284 +share 1 59 2.833213 2.833213 304 +space 0 57 2.890372 0.000000 310 +advisor 0 51 2.995732 0.000000 355 +right 0 48 3.044522 0.000000 363 +protocol 1 45 3.135494 3.135494 407 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +cach 1 41 3.218876 3.218876 432 +submit 0 39 3.258097 0.000000 440 +tutori 0 39 3.258097 0.000000 437 +expect 0 37 3.332205 0.000000 484 +workstat 0 37 3.332205 0.000000 479 +copyright 0 36 3.367296 0.000000 495 +random 0 34 3.401197 0.000000 511 +tech 0 35 3.401197 0.000000 515 +articl 0 33 3.433987 0.000000 530 +india 0 32 3.465736 0.000000 550 +dissert 0 32 3.465736 0.000000 549 +steve 0 29 3.583519 0.000000 594 +progress 0 28 3.610918 0.000000 598 +team 0 27 3.637586 0.000000 625 +hill 0 25 3.737670 0.000000 670 +departmentunivers 0 24 3.761200 0.000000 711 +indian 0 22 3.850148 0.000000 769 +cooper 0 22 3.850148 0.000000 757 +reserv 0 20 3.951244 0.000000 808 +wind 1 18 4.060443 4.060443 908 +hobbi 0 16 4.174387 0.000000 1009 +month 0 15 4.248495 0.000000 1025 +coher 1 14 4.317488 4.317488 1109 +danc 0 12 4.465908 0.000000 1278 +isca 1 11 4.553877 4.553877 1354 +correspond 0 10 4.653960 0.000000 1382 +queue 0 10 4.653960 0.000000 1386 +custom 0 10 4.653960 0.000000 1414 +tunnel 1 9 4.753590 4.753590 1615 +usaphon 0 9 4.753590 0.000000 1600 +jump 0 9 4.753590 0.000000 1603 +architect 1 8 4.875197 4.875197 1624 +kanpur 0 8 4.875197 0.000000 1744 +irregular 0 8 4.875197 0.000000 1768 +morph 0 7 5.010635 0.000000 1937 +courtesi 0 7 5.010635 0.000000 1953 +seitz 0 7 5.010635 0.000000 1976 +mukherje 1 5 5.347108 5.347108 2586 +button 0 5 5.347108 0.000000 2337 +commod 0 5 5.347108 0.000000 2415 +ppopp 1 4 5.568345 5.568345 2774 +shubhendu 0 3 5.857933 0.000000 4028 +badger 0 3 5.857933 0.000000 3502 +ballroom 0 3 5.857933 0.000000 3983 +shubu 1 2 6.263398 6.263398 6148 +fiance 0 2 6.263398 0.000000 5497 +nephew 0 2 6.263398 0.000000 5332 +dionisio 0 2 6.263398 0.000000 6203 +grai 0 2 6.263398 0.000000 4098 +mimi 0 1 6.957497 0.000000 20078 +avirup 0 1 6.957497 0.000000 20079 +linkseducationph 0 1 6.957497 0.000000 20080 +cachabl 0 1 6.957497 0.000000 20081 +dirsw 0 1 6.957497 0.000000 20082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..ab8f3f9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +time 0 293 1.098612 0.000000 17 +fall 0 181 1.609438 0.000000 40 +book 0 99 2.302585 0.000000 131 +academ 0 82 2.484907 0.000000 178 +resourc 0 81 2.484907 0.000000 172 +new 0 64 2.772589 0.000000 262 +run 0 51 2.995732 0.000000 347 +movi 0 40 3.258097 0.000000 459 +seminar 0 38 3.295837 0.000000 470 +michael 1 35 3.401197 3.401197 514 +wai 0 25 3.737670 0.000000 662 +sport 0 25 3.737670 0.000000 683 +wonder 0 20 3.951244 0.000000 815 +club 0 15 4.248495 0.000000 1058 +philosophi 0 13 4.382027 0.000000 1167 +televis 0 6 5.164786 0.000000 2118 +wast 0 5 5.347108 0.000000 2537 +humor 0 5 5.347108 0.000000 2533 +midwest 0 2 6.263398 0.000000 6225 +siff 1 1 6.957497 6.957497 20083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..92152723 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +link 0 247 1.386294 0.000000 24 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +relat 0 139 1.945910 0.000000 68 +dayton 0 119 2.079442 0.000000 104 +februari 0 54 2.944439 0.000000 328 +departmentunivers 0 24 3.761200 0.000000 711 +edutelephon 0 10 4.653960 0.000000 1473 +studentcomput 0 7 5.010635 0.000000 1963 +skrentni 1 6 5.164786 5.164786 2104 +lecturerc 0 1 6.957497 0.000000 20084 +coordinatorgradu 0 1 6.957497 0.000000 20085 +sciencesemail 0 1 6.957497 0.000000 20086 +groupskrentni 0 1 6.957497 0.000000 20087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..59fd33b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +oper 0 180 1.609438 0.000000 34 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 0 167 1.791759 0.000000 58 +relat 0 139 1.945910 0.000000 68 +professor 0 137 1.945910 0.000000 76 +dayton 0 119 2.079442 0.000000 104 +report 0 131 2.079442 0.000000 92 +studi 0 120 2.079442 0.000000 91 +technic 0 100 2.302585 0.000000 140 +select 0 91 2.397895 0.000000 154 +commun 0 95 2.397895 0.000000 157 +help 1 83 2.484907 2.484907 175 +activ 0 84 2.484907 0.000000 182 +april 0 77 2.564949 0.000000 196 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +interact 0 62 2.772589 0.000000 270 +unix 1 58 2.890372 2.890372 308 +advisor 0 51 2.995732 0.000000 355 +represent 1 35 3.401197 3.401197 512 +human 0 32 3.465736 0.000000 546 +toward 0 25 3.737670 0.000000 668 +reliabl 0 25 3.737670 0.000000 674 +departmentunivers 0 24 3.761200 0.000000 711 +util 1 21 3.912023 3.912023 774 +miller 0 17 4.110874 0.000000 949 +step 0 13 4.382027 0.000000 1138 +larri 0 13 4.382027 0.000000 1142 +edutelephon 0 10 4.653960 0.000000 1473 +purdu 0 10 4.653960 0.000000 1466 +empir 0 8 4.875197 0.000000 1722 +studentcomput 0 7 5.010635 0.000000 1963 +bryan 1 5 5.347108 5.347108 2421 +travi 1 3 5.857933 5.857933 3985 +fredriksen 0 1 6.957497 0.000000 20139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..188138ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +current 0 284 1.098612 0.000000 21 +student 0 343 1.098612 0.000000 19 +last 0 314 1.098612 0.000000 14 +wisc 1 242 1.386294 1.386294 33 +mail 0 238 1.386294 0.000000 22 +design 0 213 1.386294 0.000000 25 +softwar 0 220 1.386294 0.000000 30 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +group 0 183 1.609438 0.000000 36 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +implement 0 152 1.791759 0.000000 52 +support 1 132 1.945910 1.945910 83 +model 0 145 1.945910 0.000000 69 +databas 1 122 2.079442 2.079442 86 +dayton 0 119 2.079442 0.000000 104 +high 0 130 2.079442 0.000000 101 +introduct 0 126 2.079442 0.000000 87 +technolog 0 131 2.079442 0.000000 102 +analysi 0 124 2.079442 0.000000 98 +manag 0 114 2.197225 0.000000 125 +center 0 88 2.397895 0.000000 158 +associ 0 93 2.397895 0.000000 151 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +west 1 83 2.484907 2.484907 192 +level 0 87 2.484907 0.000000 180 +activ 0 84 2.484907 0.000000 182 +journal 0 83 2.484907 0.000000 183 +chang 0 82 2.484907 0.000000 163 +issu 0 78 2.564949 0.000000 211 +appear 0 78 2.564949 0.000000 210 +june 0 79 2.564949 0.000000 214 +intellig 2 72 2.639057 5.278114 225 +summari 0 73 2.639057 0.000000 237 +logic 0 71 2.639057 0.000000 230 +involv 0 71 2.639057 0.000000 227 +integr 1 67 2.708050 2.708050 245 +knowledg 0 67 2.708050 0.000000 243 +test 0 66 2.708050 0.000000 252 +artifici 1 63 2.772589 2.772589 280 +complex 0 64 2.772589 0.000000 269 +foundat 0 62 2.772589 0.000000 286 +abstract 0 62 2.772589 0.000000 276 +dept 0 64 2.772589 0.000000 291 +automat 0 61 2.833213 0.000000 306 +special 0 56 2.890372 0.000000 320 +sever 0 56 2.890372 0.000000 322 +visual 0 48 3.044522 0.000000 372 +approach 0 48 3.044522 0.000000 366 +understand 0 47 3.091042 0.000000 384 +could 0 46 3.091042 0.000000 383 +around 0 43 3.178054 0.000000 415 +examin 0 42 3.218876 0.000000 424 +form 0 39 3.258097 0.000000 443 +map 0 39 3.258097 0.000000 452 +societi 0 40 3.258097 0.000000 456 +formal 0 37 3.332205 0.000000 478 +procedur 1 36 3.367296 3.367296 488 +represent 0 35 3.401197 0.000000 512 +singl 0 34 3.401197 0.000000 510 +focus 0 29 3.583519 0.000000 584 +altern 1 26 3.688879 3.688879 641 +enhanc 0 26 3.688879 0.000000 644 +magazin 1 24 3.761200 3.761200 704 +departmentunivers 0 24 3.761200 0.000000 711 +pattern 0 24 3.761200 0.000000 689 +displai 0 23 3.806662 0.000000 712 +expert 1 20 3.951244 3.951244 833 +basi 0 20 3.951244 0.000000 828 +aid 0 18 4.060443 0.000000 904 +scott 0 18 4.060443 0.000000 884 +failur 0 18 4.060443 0.000000 898 +heterogen 0 14 4.317488 0.000000 1090 +chuck 0 14 4.317488 0.000000 1108 +larri 1 13 4.382027 4.382027 1142 +social 1 13 4.382027 4.382027 1123 +incorpor 0 13 4.382027 0.000000 1163 +deduct 1 12 4.465908 4.465908 1236 +edutelephon 0 10 4.653960 0.000000 1473 +angel 0 8 4.875197 0.000000 1779 +databasesystem 0 8 4.875197 0.000000 1617 +attent 0 8 4.875197 0.000000 1651 +philosoph 0 7 5.010635 0.000000 1904 +geograph 0 6 5.164786 0.000000 2236 +augment 0 5 5.347108 0.000000 2350 +bryan 0 5 5.347108 0.000000 2421 +implic 0 4 5.568345 0.000000 2696 +andi 0 4 5.568345 0.000000 3081 +travi 1 3 5.857933 5.857933 3985 +metaphor 1 3 5.857933 5.857933 4038 +landscap 0 3 5.857933 0.000000 3525 +waysthat 0 2 6.263398 0.000000 5445 +andwith 0 2 6.263398 0.000000 5051 +derek 0 2 6.263398 0.000000 4537 +travisprofessorcomput 0 1 6.957497 0.000000 20140 +californa 0 1 6.957497 0.000000 20141 +ofartifici 0 1 6.957497 0.000000 20142 +automaticdeduct 0 1 6.957497 0.000000 20143 +contruct 0 1 6.957497 0.000000 20144 +informationcontain 0 1 6.957497 0.000000 20145 +beingdevot 0 1 6.957497 0.000000 20146 +visualiz 0 1 6.957497 0.000000 20147 +organiz 0 1 6.957497 0.000000 20148 +suppositionsunderli 0 1 6.957497 0.000000 20149 +ohar 0 1 6.957497 0.000000 20150 +swanson 0 1 6.957497 0.000000 20151 +whitsitt 0 1 6.957497 0.000000 20152 +zahn 0 1 6.957497 0.000000 20153 +oravec 0 1 6.957497 0.000000 20154 +reflex 0 1 6.957497 0.000000 20155 +falsework 0 1 6.957497 0.000000 20156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..e0c6de4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 0 374 0.693147 0.000000 7 +project 1 340 1.098612 1.098612 18 +student 0 343 1.098612 0.000000 19 +cours 0 273 1.098612 0.000000 15 +time 0 293 1.098612 0.000000 17 +link 1 247 1.386294 1.386294 24 +wisc 0 242 1.386294 0.000000 33 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +relat 1 139 1.945910 1.945910 68 +architectur 0 139 1.945910 0.000000 77 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +compil 0 122 2.079442 0.000000 96 +look 0 107 2.197225 0.000000 115 +world 0 115 2.197225 0.000000 126 +access 0 102 2.302585 0.000000 136 +center 0 88 2.397895 0.000000 158 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +west 0 83 2.484907 0.000000 192 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +academ 0 82 2.484907 0.000000 178 +info 0 85 2.484907 0.000000 176 +meet 0 72 2.639057 0.000000 229 +onlin 0 75 2.639057 0.000000 223 +free 0 73 2.639057 0.000000 224 +java 0 70 2.708050 0.000000 248 +street 0 63 2.772589 0.000000 293 +new 0 64 2.772589 0.000000 262 +juli 0 60 2.833213 0.000000 305 +directori 0 45 3.135494 0.000000 396 +edit 0 42 3.218876 0.000000 418 +download 0 36 3.367296 0.000000 489 +tech 0 35 3.401197 0.000000 515 +random 0 34 3.401197 0.000000 511 +india 1 32 3.465736 3.465736 550 +packag 1 28 3.610918 3.610918 614 +indian 0 22 3.850148 0.000000 769 +rank 0 14 4.317488 0.000000 1063 +multiscalar 0 8 4.875197 0.000000 1783 +cricket 0 7 5.010635 0.000000 1945 +kestrel 0 4 5.568345 0.000000 2990 +batch 0 4 5.568345 0.000000 2700 +will 0 4 5.568345 0.000000 2782 +avinash 1 3 5.857933 5.857933 3510 +mate 0 3 5.857933 0.000000 3127 +hindu 0 3 5.857933 0.000000 3590 +sodani 1 2 6.263398 6.263398 4803 +kharagpur 0 2 6.263398 0.000000 6236 +toll 0 2 6.263398 0.000000 6149 +hon 0 1 6.957497 0.000000 20088 +kgpite 0 1 6.957497 0.000000 20089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..d46fb7ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,253 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +offic 0 299 1.098612 0.000000 13 +last 0 314 1.098612 0.000000 14 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +also 0 259 1.386294 0.000000 28 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +group 1 183 1.609438 1.609438 36 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +austin 2 168 1.791759 3.583518 63 +parallel 2 169 1.791759 3.583518 60 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +applic 0 170 1.791759 0.000000 56 +develop 0 174 1.791759 0.000000 53 +data 0 170 1.791759 0.000000 49 +network 0 168 1.791759 0.000000 61 +distribut 0 162 1.791759 0.000000 51 +implement 0 152 1.791759 0.000000 52 +base 0 165 1.791759 0.000000 50 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +professor 0 137 1.945910 0.000000 76 +model 0 145 1.945910 0.000000 69 +support 0 132 1.945910 0.000000 83 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +confer 0 126 2.079442 0.000000 100 +intern 2 108 2.197225 4.394450 128 +place 0 106 2.197225 0.000000 124 +memori 1 101 2.302585 2.302585 139 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +associ 0 93 2.397895 0.000000 151 +proceed 0 93 2.397895 0.000000 152 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +educ 1 86 2.484907 2.484907 191 +west 0 83 2.484907 0.000000 192 +institut 0 84 2.484907 0.000000 187 +resourc 0 81 2.484907 0.000000 172 +control 0 82 2.484907 0.000000 164 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +symposium 2 72 2.639057 5.278114 238 +summari 1 73 2.639057 2.639057 237 +effici 1 73 2.639057 2.639057 233 +goal 0 66 2.708050 0.000000 250 +order 0 69 2.708050 0.000000 249 +simul 0 66 2.708050 0.000000 255 +window 0 68 2.708050 0.000000 242 +import 0 65 2.772589 0.000000 282 +evalu 0 64 2.772589 0.000000 266 +januari 0 62 2.772589 0.000000 264 +septemb 0 65 2.772589 0.000000 274 +share 0 59 2.833213 0.000000 304 +juli 0 60 2.833213 0.000000 305 +sever 1 56 2.890372 2.890372 322 +detail 0 57 2.890372 0.000000 321 +processor 2 54 2.944439 5.888878 335 +instruct 1 53 2.944439 2.944439 332 +talk 1 53 2.944439 2.944439 336 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +pointer 1 48 3.044522 3.044522 368 +numer 0 49 3.044522 0.000000 369 +set 0 50 3.044522 0.000000 361 +electron 0 47 3.091042 0.000000 379 +possibl 0 47 3.091042 0.000000 378 +understand 0 47 3.091042 0.000000 384 +could 0 46 3.091042 0.000000 383 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +futur 0 41 3.218876 0.000000 427 +cach 0 41 3.218876 0.000000 432 +fast 0 42 3.218876 0.000000 429 +combin 0 42 3.218876 0.000000 421 +press 0 42 3.218876 0.000000 419 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +continu 0 39 3.258097 0.000000 448 +multipl 0 39 3.258097 0.000000 453 +error 0 40 3.258097 0.000000 449 +electr 1 38 3.295837 3.295837 461 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +given 1 32 3.465736 3.465736 538 +india 0 32 3.465736 0.000000 550 +concept 0 32 3.465736 0.000000 537 +exist 0 30 3.555348 0.000000 569 +focus 0 29 3.583519 0.000000 584 +depend 0 29 3.583519 0.000000 583 +multiprocessor 1 28 3.610918 3.610918 605 +load 1 28 3.610918 3.610918 601 +held 0 28 3.610918 0.000000 600 +arrai 1 27 3.637586 3.637586 627 +detect 1 26 3.688879 3.688879 646 +challeng 0 26 3.688879 0.000000 653 +effort 0 26 3.688879 0.000000 652 +request 0 26 3.688879 0.000000 635 +flow 0 24 3.761200 0.000000 700 +compress 1 23 3.806662 3.806662 719 +reduc 1 22 3.850148 3.850148 759 +sequenti 0 22 3.850148 0.000000 745 +hierarchi 0 22 3.850148 0.000000 744 +chip 0 21 3.912023 0.000000 770 +exploit 1 20 3.951244 3.951244 836 +fine 1 20 3.951244 3.951244 822 +department 0 20 3.951244 0.000000 839 +smith 0 20 3.951244 0.000000 820 +predict 1 19 4.007333 4.007333 855 +scott 0 18 4.060443 0.000000 884 +regist 1 17 4.110874 4.110874 938 +interconnect 0 17 4.110874 0.000000 937 +expand 0 17 4.110874 0.000000 928 +micro 1 15 4.248495 4.248495 1031 +eduphon 0 15 4.248495 0.000000 1060 +novel 0 15 4.248495 0.000000 1039 +todd 0 15 4.248495 0.000000 1051 +achiev 0 14 4.317488 0.000000 1088 +split 0 14 4.317488 0.000000 1078 +translat 0 13 4.382027 0.000000 1164 +sigplan 0 13 4.382027 0.000000 1190 +incorpor 0 13 4.382027 0.000000 1163 +prolog 0 13 4.382027 0.000000 1155 +target 0 12 4.465908 0.000000 1282 +calcul 0 12 4.465908 0.000000 1268 +cycl 1 11 4.553877 4.553877 1335 +branch 1 11 4.553877 4.553877 1318 +bandwidth 0 11 4.553877 0.000000 1365 +arbitrari 0 11 4.553877 0.000000 1359 +franklin 1 10 4.653960 4.653960 1436 +grain 1 10 4.653960 4.653960 1448 +traffic 0 10 4.653960 0.000000 1421 +inter 0 9 4.753590 0.000000 1530 +multiscalar 2 8 4.875197 9.750394 1783 +character 1 8 4.875197 4.875197 1767 +paradigm 1 8 4.875197 4.875197 1662 +elect 0 8 4.875197 0.000000 1771 +uniprocessor 0 8 4.875197 0.000000 1696 +potenti 0 8 4.875197 0.000000 1690 +watson 0 8 4.875197 0.000000 1691 +illinoi 1 7 5.010635 5.010635 1941 +zero 1 7 5.010635 5.010635 1896 +goodman 0 7 5.010635 0.000000 1891 +serial 0 7 5.010635 0.000000 1975 +friedman 0 7 5.010635 0.000000 1886 +chiang 0 7 5.010635 0.000000 1853 +sohi 3 6 5.164786 15.494358 2237 +microarchitectur 1 6 5.164786 5.164786 2238 +risc 0 6 5.164786 0.000000 2016 +superscalar 0 6 5.164786 0.000000 2082 +handbook 0 6 5.164786 0.000000 2061 +guri 0 5 5.347108 0.000000 2578 +andrea 0 5 5.347108 0.000000 2375 +lebeck 0 5 5.347108 0.000000 2582 +highest 0 4 5.568345 0.000000 2950 +resolv 0 4 5.568345 0.000000 2675 +height 0 4 5.568345 0.000000 2890 +appendix 0 4 5.568345 0.000000 2739 +crai 0 4 5.568345 0.000000 3012 +breach 1 3 5.857933 5.857933 4009 +vijaykumar 1 3 5.857933 5.857933 4011 +urbana 1 3 5.857933 5.857933 3879 +ordinari 1 3 5.857933 5.857933 3233 +streamlin 1 3 5.857933 5.857933 3573 +bulk 0 3 5.857933 0.000000 4000 +thedevelop 0 3 5.857933 0.000000 3903 +reorder 0 3 5.857933 0.000000 3952 +anatomi 0 3 5.857933 0.000000 4010 +chow 0 3 5.857933 0.000000 3281 +pnevmatikato 1 2 6.263398 6.263398 6204 +guard 1 2 6.263398 6.263398 5738 +gurindar 0 2 6.263398 0.000000 6110 +andelectr 0 2 6.263398 0.000000 6200 +birla 0 2 6.263398 0.000000 6239 +pilani 0 2 6.263398 0.000000 6240 +plenti 0 2 6.263398 0.000000 5465 +sustain 0 2 6.263398 0.000000 6201 +needto 0 2 6.263398 0.000000 4927 +andhow 0 2 6.263398 0.000000 5933 +expend 0 2 6.263398 0.000000 5451 +moshovo 0 2 6.263398 0.000000 6211 +inrd 0 2 6.263398 0.000000 4531 +tetra 0 2 6.263398 0.000000 5196 +framemak 1 1 6.957497 6.957497 20090 +graduatesaddress 0 1 6.957497 0.000000 20091 +usasohi 0 1 6.957497 0.000000 20092 +thehighest 0 1 6.957497 0.000000 20093 +circa 0 1 6.957497 0.000000 20094 +transistor 0 1 6.957497 0.000000 20095 +availableon 0 1 6.957497 0.000000 20096 +getth 0 1 6.957497 0.000000 20097 +ofov 0 1 6.957497 0.000000 20098 +thenatur 0 1 6.957497 0.000000 20099 +numericappl 0 1 6.957497 0.000000 20100 +andcarri 0 1 6.957497 0.000000 20101 +assessth 0 1 6.957497 0.000000 20102 +vijaykumarrec 0 1 6.957497 0.000000 20103 +talkswil 0 1 6.957497 0.000000 20104 +researchcent 0 1 6.957497 0.000000 20105 +yorktown 0 1 6.957497 0.000000 20106 +publicationshigh 0 1 6.957497 0.000000 20107 +ofdetail 0 1 6.957497 0.000000 20108 +resultsi 0 1 6.957497 0.000000 20109 +latencyt 0 1 6.957497 0.000000 20110 +processorsj 0 1 6.957497 0.000000 20111 +referencesm 0 1 6.957497 0.000000 20112 +communicationin 0 1 6.957497 0.000000 20113 +errorst 0 1 6.957497 0.000000 20114 +processorsd 0 1 6.957497 0.000000 20115 +knapsack 0 1 6.957497 0.000000 20116 +componentt 0 1 6.957497 0.000000 20117 +processorst 0 1 6.957497 0.000000 20118 +gradstodd 0 1 6.957497 0.000000 20119 +latencydionisio 0 1 6.957497 0.000000 20120 +setsmanoj 0 1 6.957497 0.000000 20121 +architecturemark 0 1 6.957497 0.000000 20122 +executionsriram 0 1 6.957497 0.000000 20123 +vajapeyam 0 1 6.957497 0.000000 20124 +processormen 0 1 6.957497 0.000000 20125 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..6a3000c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +us 0 329 1.098612 0.000000 16 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +oper 0 180 1.609438 0.000000 34 +updat 0 191 1.609438 0.000000 41 +data 1 170 1.791759 1.791759 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +network 0 168 1.791759 0.000000 61 +implement 0 152 1.791759 0.000000 52 +recent 0 167 1.791759 0.000000 58 +applic 0 170 1.791759 0.000000 56 +object 1 138 1.945910 1.945910 79 +professor 0 137 1.945910 0.000000 76 +support 0 132 1.945910 0.000000 83 +hall 0 146 1.945910 0.000000 65 +lectur 0 135 1.945910 0.000000 73 +note 0 142 1.945910 0.000000 67 +report 1 131 2.079442 2.079442 92 +databas 1 122 2.079442 2.079442 86 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +spring 0 131 2.079442 0.000000 88 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +theori 0 111 2.197225 0.000000 127 +intern 0 108 2.197225 0.000000 128 +graphic 0 90 2.397895 0.000000 147 +environ 1 84 2.484907 2.484907 177 +west 0 83 2.484907 0.000000 192 +larg 0 82 2.484907 0.000000 168 +june 1 79 2.564949 2.564949 214 +appear 1 78 2.564949 2.564949 210 +orient 0 80 2.564949 0.000000 205 +april 0 77 2.564949 0.000000 196 +interfac 0 79 2.564949 0.000000 209 +effici 0 73 2.639057 0.000000 233 +free 0 73 2.639057 0.000000 224 +david 0 71 2.639057 0.000000 232 +workshop 0 71 2.639057 0.000000 239 +logic 0 71 2.639057 0.000000 230 +java 0 70 2.708050 0.000000 248 +street 0 63 2.772589 0.000000 293 +septemb 0 65 2.772589 0.000000 274 +room 0 59 2.833213 0.000000 301 +space 0 57 2.890372 0.000000 310 +overview 0 56 2.890372 0.000000 323 +point 0 58 2.890372 0.000000 319 +effect 0 46 3.091042 0.000000 385 +physic 0 47 3.091042 0.000000 377 +mark 1 44 3.135494 3.135494 403 +paul 1 38 3.295837 3.295837 471 +michael 1 35 3.401197 3.401197 514 +independ 0 32 3.465736 0.000000 548 +chair 1 29 3.583519 3.583519 596 +built 0 29 3.583519 0.000000 592 +univ 0 28 3.610918 0.000000 617 +proc 1 26 3.688879 3.688879 649 +todai 0 25 3.737670 0.000000 672 +inth 0 22 3.850148 0.000000 741 +sigmod 1 19 4.007333 4.007333 877 +adam 1 17 4.110874 4.110874 934 +former 0 17 4.110874 0.000000 956 +white 0 17 4.110874 0.000000 951 +fourth 0 16 4.174387 0.000000 999 +dilbert 0 16 4.174387 0.000000 996 +configur 0 15 4.248495 0.000000 1012 +conf 1 13 4.382027 4.382027 1181 +dewitt 0 12 4.465908 0.000000 1270 +nanci 0 12 4.465908 0.000000 1256 +daniel 0 12 4.465908 0.000000 1233 +persist 1 11 4.553877 4.553877 1367 +shore 1 11 4.553877 4.553877 1377 +vldb 0 10 4.653960 0.000000 1470 +franklin 0 10 4.653960 0.000000 1436 +naughton 0 10 4.653960 0.000000 1450 +jeffrei 0 9 4.753590 0.000000 1612 +solomon 2 8 4.875197 9.750394 1716 +carei 1 8 4.875197 4.875197 1781 +ioannidi 0 8 4.875197 0.000000 1714 +goodman 0 7 5.010635 0.000000 1891 +tsatalo 1 5 5.347108 5.347108 2581 +marvin 1 4 5.568345 5.568345 2806 +mcauliff 1 4 5.568345 5.568345 3083 +schuh 0 3 5.857933 0.000000 4014 +gmap 0 2 6.263398 0.000000 6241 +versatil 0 2 6.263398 0.000000 6242 +seth 0 2 6.263398 0.000000 4998 +andmarvin 1 1 6.957497 6.957497 20126 +astech 1 1 6.957497 6.957497 20127 +odyssea 1 1 6.957497 6.957497 20128 +publicationstoward 0 1 6.957497 0.000000 20129 +abstractpostscriptth 0 1 6.957497 0.000000 20130 +andyanni 0 1 6.957497 0.000000 20131 +abstractpostscriptexpand 0 1 6.957497 0.000000 20132 +journalv 0 1 6.957497 0.000000 20133 +abstractpostscriptshor 0 1 6.957497 0.000000 20134 +andmichael 0 1 6.957497 0.000000 20135 +zwillingavail 0 1 6.957497 0.000000 20136 +capitl 0 1 6.957497 0.000000 20137 +photoalbum 0 1 6.957497 0.000000 20138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..ef89bf54 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 0 242 1.386294 0.000000 33 +welcom 0 122 2.079442 0.000000 99 +sowmya 1 4 5.568345 5.568345 2670 +subramanian 0 2 6.263398 0.000000 5666 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..d4c8d5e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +inform 0 412 0.693147 0.000000 8 +student 0 343 1.098612 0.000000 19 +us 0 329 1.098612 0.000000 16 +time 0 293 1.098612 0.000000 17 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +modifi 0 178 1.609438 0.000000 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +read 0 154 1.791759 0.000000 47 +relat 0 139 1.945910 0.000000 68 +first 0 140 1.945910 0.000000 71 +welcom 0 122 2.079442 0.000000 99 +person 0 111 2.197225 0.000000 117 +place 0 106 2.197225 0.000000 124 +send 0 114 2.197225 0.000000 109 +book 0 99 2.302585 0.000000 131 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +follow 0 92 2.397895 0.000000 143 +sinc 0 90 2.397895 0.000000 159 +comment 0 93 2.397895 0.000000 146 +stuff 1 87 2.484907 2.484907 171 +info 0 85 2.484907 0.000000 176 +second 0 81 2.484907 0.000000 166 +resum 0 79 2.564949 0.000000 217 +want 0 79 2.564949 0.000000 199 +june 0 79 2.564949 0.000000 214 +html 0 75 2.639057 0.000000 235 +meet 0 72 2.639057 0.000000 229 +java 0 70 2.708050 0.000000 248 +juli 0 60 2.833213 0.000000 305 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +finger 0 52 2.995732 0.000000 354 +cool 0 49 3.044522 0.000000 374 +music 0 42 3.218876 0.000000 436 +movi 0 40 3.258097 0.000000 459 +decis 0 23 3.806662 0.000000 728 +love 0 21 3.912023 0.000000 804 +women 0 16 4.174387 0.000000 1004 +ascii 0 15 4.248495 0.000000 1032 +philadelphia 0 12 4.465908 0.000000 1244 +guest 0 12 4.465908 0.000000 1220 +pagei 0 8 4.875197 0.000000 1683 +judg 0 8 4.875197 0.000000 1644 +constitut 0 6 5.164786 0.000000 2026 +sail 0 5 5.347108 0.000000 2571 +panel 0 5 5.347108 0.000000 2463 +vote 0 4 5.568345 0.000000 2953 +lawand 1 2 6.263398 6.263398 6191 +stuffa 0 2 6.263398 0.000000 5999 +resours 0 2 6.263398 0.000000 5211 +serverth 0 2 6.263398 0.000000 4448 +hoofer 0 2 6.263398 0.000000 6101 +shilpa 1 1 6.957497 6.957497 20157 +pastfor 0 1 6.957497 0.000000 20158 +schoolher 0 1 6.957497 0.000000 20159 +syster 0 1 6.957497 0.000000 20160 +madisonsurf 0 1 6.957497 0.000000 20161 +madisonst 0 1 6.957497 0.000000 20162 +clubowl 0 1 6.957497 0.000000 20163 +signatur 0 1 6.957497 0.000000 20164 +lovesnowi 0 1 6.957497 0.000000 20165 +linksher 0 1 6.957497 0.000000 20166 +iswher 0 1 6.957497 0.000000 20167 +tossl 0 1 6.957497 0.000000 20168 +shilpal 0 1 6.957497 0.000000 20169 +thru 0 1 6.957497 0.000000 20170 +formlast 0 1 6.957497 0.000000 20171 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..68cc620c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 0 343 1.098612 0.000000 19 +offic 0 299 1.098612 0.000000 13 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +phone 0 175 1.791759 0.000000 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +compil 0 122 2.079442 0.000000 96 +teach 0 108 2.197225 0.000000 112 +take 0 97 2.302585 0.000000 134 +section 1 94 2.397895 2.397895 149 +west 0 83 2.484907 0.000000 192 +street 0 63 2.772589 0.000000 293 +sport 0 25 3.737670 0.000000 683 +pageth 0 7 5.010635 0.000000 1939 +jeremi 1 5 5.347108 5.347108 2360 +simpson 0 2 6.263398 0.000000 5994 +stenglein 1 1 6.957497 6.957497 20172 +stenglei 0 1 6.957497 0.000000 20173 +pageespn 0 1 6.957497 0.000000 20174 +hotwir 0 1 6.957497 0.000000 20175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..a7e6fcf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 0 380 0.693147 0.000000 9 +interest 0 384 0.693147 0.000000 11 +student 0 343 1.098612 0.000000 19 +project 0 340 1.098612 0.000000 18 +graduat 0 215 1.386294 0.000000 31 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +updat 0 191 1.609438 0.000000 41 +wisconsin 1 169 1.791759 1.791759 54 +phone 0 175 1.791759 0.000000 45 +madison 0 165 1.791759 0.000000 55 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +well 0 109 2.197225 0.000000 121 +find 0 111 2.197225 0.000000 111 +west 0 83 2.484907 0.000000 192 +june 0 79 2.564949 0.000000 214 +david 0 71 2.639057 0.000000 232 +free 0 73 2.639057 0.000000 224 +summari 0 73 2.639057 0.000000 237 +plan 0 65 2.772589 0.000000 272 +januari 0 62 2.772589 0.000000 264 +faculti 0 56 2.890372 0.000000 325 +advisor 0 51 2.995732 0.000000 355 +finger 0 52 2.995732 0.000000 354 +mark 0 44 3.135494 0.000000 403 +join 0 39 3.258097 0.000000 457 +streetmadison 0 38 3.295837 0.000000 474 +feel 0 37 3.332205 0.000000 483 +sciencesunivers 0 37 3.332205 0.000000 486 +ofth 0 36 3.367296 0.000000 491 +often 0 31 3.496508 0.000000 551 +steve 0 29 3.583519 0.000000 594 +although 0 25 3.737670 0.000000 667 +hill 0 25 3.737670 0.000000 670 +finish 0 22 3.850148 0.000000 748 +tell 0 21 3.912023 0.000000 777 +wind 0 18 4.060443 0.000000 908 +steven 0 17 4.110874 0.000000 953 +wood 0 11 4.553877 0.000000 1355 +michigan 0 11 4.553877 0.000000 1368 +laru 0 9 4.753590 0.000000 1560 +reinhardt 1 5 5.347108 5.347108 2583 +computerarchitectur 0 5 5.347108 0.000000 2290 +publicationsresearch 0 4 5.568345 0.000000 2876 +eec 0 2 6.263398 0.000000 5981 +tunnelgroup 0 1 6.957497 0.000000 20176 +andjim 0 1 6.957497 0.000000 20177 +mewhat 0 1 6.957497 0.000000 20178 +stever 0 1 6.957497 0.000000 20179 +usalast 0 1 6.957497 0.000000 20180 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..3b6f5790 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +depart 0 457 0.693147 0.000000 12 +us 0 329 1.098612 0.000000 16 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 0 165 1.791759 0.000000 55 +professor 0 137 1.945910 0.000000 76 +problem 0 147 1.945910 0.000000 75 +assign 0 135 1.945910 0.000000 66 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +analysi 1 124 2.079442 2.079442 98 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +search 0 95 2.397895 0.000000 155 +west 0 83 2.484907 0.000000 192 +exam 0 86 2.484907 0.000000 169 +stuff 0 87 2.484907 0.000000 171 +solv 0 73 2.639057 0.000000 234 +nation 0 74 2.639057 0.000000 240 +januari 0 62 2.772589 0.000000 264 +foundat 0 62 2.772589 0.000000 286 +best 0 59 2.833213 0.000000 299 +point 0 58 2.890372 0.000000 319 +talk 0 53 2.944439 0.000000 336 +numer 1 49 3.044522 3.044522 369 +telephon 0 50 3.044522 0.000000 373 +show 0 43 3.178054 0.000000 417 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +field 0 37 3.332205 0.000000 482 +john 1 33 3.433987 3.433987 532 +begin 0 23 3.806662 0.000000 716 +rate 0 15 4.248495 0.000000 1037 +qualifi 0 8 4.875197 0.000000 1721 +chicago 1 6 5.164786 5.164786 2149 +fluid 0 5 5.347108 0.000000 2440 +kid 0 5 5.347108 0.000000 2516 +nathan 1 4 5.568345 5.568345 2794 +radio 0 4 5.568345 0.000000 3025 +car 0 4 5.568345 0.000000 2931 +drew 0 4 5.568345 0.000000 2980 +museum 0 3 5.857933 0.000000 3933 +pageoth 0 2 6.263398 0.000000 6104 +strikwerda 1 1 6.957497 6.957497 20181 +strikwerdadepart 0 1 6.957497 0.000000 20182 +strik 0 1 6.957497 0.000000 20183 +dynamicsmyoffici 0 1 6.957497 0.000000 20184 +tribun 0 1 6.957497 0.000000 20185 +footballmi 0 1 6.957497 0.000000 20186 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..45aad437 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +thing 0 84 2.484907 0.000000 189 +david 0 71 2.639057 0.000000 232 +evalu 0 64 2.772589 0.000000 266 +processor 0 54 2.944439 0.000000 335 +execut 0 45 3.135494 0.000000 404 +show 0 43 3.178054 0.000000 417 +cach 0 41 3.218876 0.000000 432 +late 0 40 3.258097 0.000000 439 +enjoi 0 26 3.688879 0.000000 660 +cambridg 0 16 4.174387 0.000000 1008 +stream 0 15 4.248495 0.000000 1015 +buffer 0 12 4.465908 0.000000 1211 +calvin 0 9 4.753590 0.000000 1518 +replac 0 8 4.875197 0.000000 1668 +integ 0 8 4.875197 0.000000 1688 +secondari 0 7 5.010635 0.000000 1884 +histor 0 6 5.164786 0.000000 2085 +superscalar 0 6 5.164786 0.000000 2082 +hobb 0 4 5.568345 0.000000 2893 +decoupl 0 4 5.568345 0.000000 2898 +letterman 0 3 5.857933 0.000000 3931 +seinfeld 0 3 5.857933 0.000000 3958 +subba 0 2 6.263398 0.000000 6091 +subbarao 0 2 6.263398 0.000000 6205 +prooocessor 0 1 6.957497 0.000000 20187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..88a3c29e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +engin 0 297 1.098612 0.000000 20 +last 0 314 1.098612 0.000000 14 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +fall 0 181 1.609438 0.000000 40 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +parallel 1 169 1.791759 1.791759 60 +wisconsin 0 169 1.791759 0.000000 54 +hour 0 165 1.791759 0.000000 46 +applic 0 170 1.791759 0.000000 56 +base 0 165 1.791759 0.000000 50 +network 0 168 1.791759 0.000000 61 +construct 0 139 1.945910 0.000000 82 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +relat 0 139 1.945910 0.000000 68 +schedul 1 119 2.079442 2.079442 85 +send 0 114 2.197225 0.000000 109 +world 0 115 2.197225 0.000000 126 +search 0 95 2.397895 0.000000 155 +resourc 0 81 2.484907 0.000000 172 +wide 0 84 2.484907 0.000000 185 +librari 0 87 2.484907 0.000000 181 +complet 0 77 2.564949 0.000000 208 +dynam 0 76 2.564949 0.000000 194 +april 0 77 2.564949 0.000000 196 +sourc 0 77 2.564949 0.000000 201 +workshop 0 71 2.639057 0.000000 239 +servic 0 72 2.639057 0.000000 236 +polici 0 64 2.772589 0.000000 279 +virtual 0 62 2.772589 0.000000 285 +new 0 64 2.772589 0.000000 262 +guid 0 63 2.772589 0.000000 267 +processor 1 54 2.944439 2.944439 335 +still 0 50 3.044522 0.000000 362 +bibliographi 0 34 3.401197 0.000000 518 +limit 0 29 3.583519 0.000000 585 +measur 0 28 3.610918 0.000000 609 +static 0 27 3.637586 0.000000 619 +proc 0 26 3.688879 0.000000 649 +subject 0 26 3.688879 0.000000 647 +strategi 0 25 3.737670 0.000000 682 +yahoo 0 24 3.761200 0.000000 707 +alloc 0 20 3.951244 0.000000 821 +thur 0 19 4.007333 0.000000 847 +taiwan 1 16 4.174387 4.174387 1006 +sigmetr 0 13 4.382027 0.000000 1173 +conf 0 13 4.382027 0.000000 1181 +mari 1 12 4.465908 4.465908 1266 +characterist 0 12 4.465908 0.000000 1257 +shop 0 10 4.653960 0.000000 1469 +vernon 1 9 4.753590 4.753590 1556 +job 0 8 4.875197 0.000000 1702 +calendar 0 8 4.875197 0.000000 1649 +chiang 1 7 5.010635 5.010635 1853 +quantum 0 6 5.164786 0.000000 2214 +academia 0 6 5.164786 0.000000 2036 +ta 0 4 5.568345 0.000000 3058 +nashvil 0 4 5.568345 0.000000 2867 +sinanet 0 4 5.568345 0.000000 2883 +suhui 0 3 5.857933 0.000000 3430 +educlick 0 3 5.857933 0.000000 3612 +rajesh 0 3 5.857933 0.000000 3511 +conjunct 0 3 5.857933 0.000000 3743 +ipp 0 3 5.857933 0.000000 3381 +sinica 0 3 5.857933 0.000000 3819 +preemption 0 2 6.263398 0.000000 6230 +mansharamani 0 2 6.263398 0.000000 6231 +catalogu 0 2 6.263398 0.000000 6166 +magzin 0 2 6.263398 0.000000 5614 +madisonoffic 0 1 6.957497 0.000000 20188 +stelephon 0 1 6.957497 0.000000 20189 +emailoffic 0 1 6.957497 0.000000 20190 +seednet 0 1 6.957497 0.000000 20191 +vistor 0 1 6.957497 0.000000 20192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..7352bf73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,193 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +link 2 247 1.386294 2.772588 24 +also 0 259 1.386294 0.000000 28 +mail 0 238 1.386294 0.000000 22 +class 1 199 1.609438 1.609438 37 +includ 0 208 1.609438 0.000000 42 +public 0 202 1.609438 0.000000 43 +list 0 201 1.609438 0.000000 39 +madison 1 165 1.791759 1.791759 55 +recent 0 167 1.791759 0.000000 58 +hall 0 146 1.945910 0.000000 65 +construct 0 139 1.945910 0.000000 82 +relat 0 139 1.945910 0.000000 68 +note 0 142 1.945910 0.000000 67 +provid 0 121 2.079442 0.000000 94 +site 2 106 2.197225 4.394450 119 +specif 1 106 2.197225 2.197225 106 +manag 0 114 2.197225 0.000000 125 +assist 0 112 2.197225 0.000000 113 +theori 0 111 2.197225 0.000000 127 +take 1 97 2.302585 2.302585 134 +technic 0 100 2.302585 0.000000 140 +homepag 0 93 2.397895 0.000000 148 +call 0 91 2.397895 0.000000 153 +pictur 0 89 2.397895 0.000000 160 +select 0 91 2.397895 0.000000 154 +follow 0 92 2.397895 0.000000 143 +info 1 85 2.484907 2.484907 176 +institut 0 84 2.484907 0.000000 187 +contain 0 81 2.484907 0.000000 174 +sourc 0 77 2.564949 0.000000 201 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +addit 0 74 2.639057 0.000000 228 +servic 0 72 2.639057 0.000000 236 +view 1 70 2.708050 2.708050 254 +new 1 64 2.772589 2.772589 262 +artifici 0 63 2.772589 0.000000 280 +laboratori 0 63 2.772589 0.000000 292 +organ 0 65 2.772589 0.000000 265 +foundat 0 62 2.772589 0.000000 286 +written 0 63 2.772589 0.000000 278 +back 0 60 2.833213 0.000000 297 +variou 1 56 2.890372 2.890372 317 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +publish 0 57 2.890372 0.000000 326 +scientif 1 53 2.944439 2.944439 341 +local 0 55 2.944439 0.000000 334 +tabl 0 51 2.995732 0.000000 346 +life 0 50 3.044522 0.000000 375 +friend 0 48 3.044522 0.000000 376 +numer 0 49 3.044522 0.000000 369 +physic 0 47 3.091042 0.000000 377 +math 0 44 3.135494 0.000000 402 +natur 0 44 3.135494 0.000000 406 +show 0 43 3.178054 0.000000 417 +edit 0 42 3.218876 0.000000 418 +review 0 42 3.218876 0.000000 425 +realli 0 40 3.258097 0.000000 444 +late 0 40 3.258097 0.000000 439 +movi 0 40 3.258097 0.000000 459 +hand 0 37 3.332205 0.000000 475 +print 0 34 3.401197 0.000000 503 +articl 1 33 3.433987 3.433987 530 +obtain 0 33 3.433987 0.000000 534 +within 0 33 3.433987 0.000000 525 +india 0 32 3.465736 0.000000 550 +scientist 0 31 3.496508 0.000000 560 +steve 0 29 3.583519 0.000000 594 +weather 1 28 3.610918 3.610918 618 +american 0 27 3.637586 0.000000 634 +great 0 27 3.637586 0.000000 626 +william 0 22 3.850148 0.000000 765 +tell 0 21 3.912023 0.000000 777 +ever 0 19 4.007333 0.000000 872 +hypertext 0 19 4.007333 0.000000 865 +histori 0 19 4.007333 0.000000 853 +thoma 0 18 4.060443 0.000000 901 +regist 0 17 4.110874 0.000000 938 +weekli 0 17 4.110874 0.000000 919 +upon 0 16 4.174387 0.000000 978 +choos 0 16 4.174387 0.000000 964 +brief 0 16 4.174387 0.000000 1001 +dilbert 0 16 4.174387 0.000000 996 +advantag 0 16 4.174387 0.000000 987 +piec 0 15 4.248495 0.000000 1020 +floor 0 14 4.317488 0.000000 1070 +camera 0 14 4.317488 0.000000 1115 +care 0 13 4.382027 0.000000 1177 +wait 0 13 4.382027 0.000000 1168 +reader 0 12 4.465908 0.000000 1246 +outsid 0 12 4.465908 0.000000 1219 +newspap 0 12 4.465908 0.000000 1280 +smart 0 11 4.553877 0.000000 1352 +see 0 11 4.553877 0.000000 1337 +fill 0 11 4.553877 0.000000 1349 +santa 0 10 4.653960 0.000000 1441 +jump 0 9 4.753590 0.000000 1603 +joke 0 8 4.875197 0.000000 1620 +philosoph 1 7 5.010635 5.010635 1904 +channel 1 7 5.010635 5.010635 1836 +dedic 0 7 5.010635 0.000000 1843 +centuri 0 7 5.010635 0.000000 1935 +smaller 0 7 5.010635 0.000000 1874 +tri 0 6 5.164786 0.000000 2166 +strip 0 6 5.164786 0.000000 2203 +somewher 0 6 5.164786 0.000000 2176 +artist 0 6 5.164786 0.000000 2127 +feet 1 5 5.347108 5.347108 2492 +optimist 0 5 5.347108 0.000000 2501 +push 0 4 5.568345 0.000000 2635 +climb 0 4 5.568345 0.000000 2936 +surviv 0 4 5.568345 0.000000 2734 +writer 0 4 5.568345 0.000000 2783 +observatori 0 4 5.568345 0.000000 3070 +ultra 0 4 5.568345 0.000000 2889 +sundaram 1 3 5.857933 5.857933 3463 +astronomi 1 3 5.857933 5.857933 3974 +blind 0 3 5.857933 0.000000 3662 +scream 0 3 5.857933 0.000000 3609 +knee 0 3 5.857933 0.000000 3980 +woman 0 3 5.857933 0.000000 3539 +lunch 0 3 5.857933 0.000000 3369 +beat 0 3 5.857933 0.000000 3840 +packer 0 3 5.857933 0.000000 3728 +dozen 0 3 5.857933 0.000000 3905 +hindu 0 3 5.857933 0.000000 3590 +stukel 1 2 6.263398 6.263398 4698 +patient 0 2 6.263398 0.000000 6223 +spurt 0 2 6.263398 0.000000 5464 +crawl 0 2 6.263398 0.000000 5561 +damn 0 2 6.263398 0.000000 6129 +nake 0 2 6.263398 0.000000 6197 +cloth 0 2 6.263398 0.000000 5884 +conscious 0 2 6.263398 0.000000 5954 +destruct 0 2 6.263398 0.000000 6232 +phoenix 0 2 6.263398 0.000000 4552 +reward 0 2 6.263398 0.000000 5402 +disinform 0 2 6.263398 0.000000 5494 +trendi 0 2 6.263398 0.000000 4717 +browbeck 1 1 6.957497 6.957497 20193 +effronteri 0 1 6.957497 0.000000 20194 +femor 0 1 6.957497 0.000000 20195 +arteri 0 1 6.957497 0.000000 20196 +blood 0 1 6.957497 0.000000 20197 +anesthetist 0 1 6.957497 0.000000 20198 +groin 0 1 6.957497 0.000000 20199 +hamstr 0 1 6.957497 0.000000 20200 +scalpel 0 1 6.957497 0.000000 20201 +stab 0 1 6.957497 0.000000 20202 +leg 0 1 6.957497 0.000000 20203 +voilet 0 1 6.957497 0.000000 20204 +baboon 0 1 6.957497 0.000000 20205 +wig 0 1 6.957497 0.000000 20206 +pois 0 1 6.957497 0.000000 20207 +stomp 0 1 6.957497 0.000000 20208 +cop 0 1 6.957497 0.000000 20209 +rush 0 1 6.957497 0.000000 20210 +burrough 0 1 6.957497 0.000000 20211 +catapult 0 1 6.957497 0.000000 20212 +mann 0 1 6.957497 0.000000 20213 +wearabl 0 1 6.957497 0.000000 20214 +tierra 0 1 6.957497 0.000000 20215 +arcosanti 0 1 6.957497 0.000000 20216 +arcolog 0 1 6.957497 0.000000 20217 +krishnamurti 0 1 6.957497 0.000000 20218 +harass 0 1 6.957497 0.000000 20219 +factoid 0 1 6.957497 0.000000 20220 +astound 0 1 6.957497 0.000000 20221 +onion 0 1 6.957497 0.000000 20222 +washburn 0 1 6.957497 0.000000 20223 +len 0 1 6.957497 0.000000 20224 +insignific 0 1 6.957497 0.000000 20225 +webweath 0 1 6.957497 0.000000 20226 +timothi 0 1 6.957497 0.000000 20227 +leari 0 1 6.957497 0.000000 20228 +noam 0 1 6.957497 0.000000 20229 +chomski 0 1 6.957497 0.000000 20230 +conspiraci 0 1 6.957497 0.000000 20231 +buri 0 1 6.957497 0.000000 20232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..6049dfbf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +hour 0 165 1.791759 0.000000 46 +think 0 57 2.890372 0.000000 314 +mark 0 44 3.135494 0.000000 403 +brian 0 38 3.295837 0.000000 466 +bookmark 0 26 3.688879 0.000000 639 +pagebrian 0 4 5.568345 0.000000 3054 +swander 1 3 5.857933 5.857933 3440 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..53a47735 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 0 431 0.693147 0.000000 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +offic 0 299 1.098612 0.000000 13 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +link 0 247 1.386294 0.000000 24 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +area 0 144 1.945910 0.000000 80 +architectur 0 139 1.945910 0.000000 77 +object 0 138 1.945910 0.000000 79 +spring 1 131 2.079442 2.079442 88 +dayton 0 119 2.079442 0.000000 104 +tool 0 117 2.079442 0.000000 93 +postscript 0 131 2.079442 0.000000 90 +analysi 0 124 2.079442 0.000000 98 +version 1 113 2.197225 2.197225 122 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +structur 0 106 2.197225 0.000000 105 +memori 1 101 2.302585 2.302585 139 +peopl 0 96 2.302585 0.000000 132 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +search 0 95 2.397895 0.000000 155 +octob 0 89 2.397895 0.000000 156 +west 0 83 2.484907 0.000000 192 +stuff 0 87 2.484907 0.000000 171 +level 0 87 2.484907 0.000000 180 +resourc 0 81 2.484907 0.000000 172 +april 0 77 2.564949 0.000000 196 +free 0 73 2.639057 0.000000 224 +differ 1 66 2.708050 2.708050 253 +street 0 63 2.772589 0.000000 293 +organ 0 65 2.772589 0.000000 265 +improv 0 62 2.772589 0.000000 289 +share 1 59 2.833213 2.833213 304 +colleg 0 61 2.833213 0.000000 300 +thesi 0 57 2.890372 0.000000 327 +talk 1 53 2.944439 2.944439 336 +instruct 0 53 2.944439 0.000000 332 +cool 0 49 3.044522 0.000000 374 +favorit 0 44 3.135494 0.000000 410 +music 0 42 3.218876 0.000000 436 +concurr 0 34 3.401197 0.000000 501 +multiprocessor 0 28 3.610918 0.000000 605 +enhanc 0 26 3.688879 0.000000 644 +supercomput 0 25 3.737670 0.000000 681 +highli 0 23 3.806662 0.000000 725 +els 0 19 4.007333 0.000000 843 +interconnect 0 17 4.110874 0.000000 937 +countri 0 15 4.248495 0.000000 1059 +fortran 0 15 4.248495 0.000000 1027 +block 0 13 4.382027 0.000000 1183 +wait 0 13 4.382027 0.000000 1168 +asynchron 0 12 4.465908 0.000000 1229 +typic 0 11 4.553877 0.000000 1360 +pose 0 9 4.753590 0.000000 1535 +paradyn 0 9 4.753590 0.000000 1614 +joke 0 8 4.875197 0.000000 1620 +attach 0 7 5.010635 0.000000 1785 +park 0 6 5.164786 0.000000 2218 +risc 0 6 5.164786 0.000000 2016 +matur 0 5 5.347108 0.000000 2269 +hate 0 5 5.347108 0.000000 2529 +sorri 0 4 5.568345 0.000000 3059 +eventu 0 4 5.568345 0.000000 3074 +cleaner 0 3 5.857933 0.000000 3775 +raid 0 3 5.857933 0.000000 4012 +snowboard 1 2 6.263398 6.263398 5084 +angri 0 2 6.263398 0.000000 5088 +greet 0 2 6.263398 0.000000 5903 +dude 0 2 6.263398 0.000000 4977 +callaghan 0 2 6.263398 0.000000 6128 +stripe 0 2 6.263398 0.000000 6106 +tamch 1 1 6.957497 6.957497 20233 +ariel 1 1 6.957497 6.957497 20234 +municip 1 1 6.957497 6.957497 20235 +bond 1 1 6.957497 6.957497 20236 +tamchesari 0 1 6.957497 0.000000 20237 +assistantemail 0 1 6.957497 0.000000 20238 +posei 0 1 6.957497 0.000000 20239 +sresearch 0 1 6.957497 0.000000 20240 +toolsstatu 0 1 6.957497 0.000000 20241 +toolsparallel 0 1 6.957497 0.000000 20242 +systemsbluesth 0 1 6.957497 0.000000 20243 +simpsonsseinfeldskiingskinetkeyston 0 1 6.957497 0.000000 20244 +vacum 0 1 6.957497 0.000000 20245 +dirt 0 1 6.957497 0.000000 20246 +whoa 0 1 6.957497 0.000000 20247 +incom 0 1 6.957497 0.000000 20248 +yahooespncpu 0 1 6.957497 0.000000 20249 +infoskinetoth 0 1 6.957497 0.000000 20250 +exokernel 0 1 6.957497 0.000000 20251 +zebra 0 1 6.957497 0.000000 20252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..bd989047 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +program 0 374 0.693147 0.000000 7 +last 0 314 1.098612 0.000000 14 +link 0 247 1.386294 0.000000 24 +wisc 0 242 1.386294 0.000000 33 +class 0 199 1.609438 0.000000 37 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +welcom 0 122 2.079442 0.000000 99 +high 0 130 2.079442 0.000000 101 +look 1 107 2.197225 2.197225 115 +make 0 111 2.197225 0.000000 120 +well 0 109 2.197225 0.000000 121 +person 0 111 2.197225 0.000000 117 +take 1 97 2.302585 2.302585 134 +pictur 1 89 2.397895 2.397895 160 +school 0 84 2.484907 0.000000 188 +academ 0 82 2.484907 0.000000 178 +good 1 77 2.564949 2.564949 200 +know 0 80 2.564949 0.000000 198 +resum 0 79 2.564949 0.000000 217 +name 0 72 2.639057 0.000000 220 +would 0 67 2.708050 0.000000 251 +copi 0 63 2.772589 0.000000 284 +dept 0 64 2.772589 0.000000 291 +import 0 65 2.772589 0.000000 282 +juli 0 60 2.833213 0.000000 305 +think 0 57 2.890372 0.000000 314 +found 0 53 2.944439 0.000000 337 +still 0 50 3.044522 0.000000 362 +basic 0 50 3.044522 0.000000 360 +favorit 0 44 3.135494 0.000000 410 +show 1 43 3.178054 3.178054 417 +music 1 42 3.218876 3.218876 436 +movi 0 40 3.258097 0.000000 459 +least 0 35 3.401197 0.000000 516 +human 0 32 3.465736 0.000000 546 +turn 1 29 3.583519 3.583519 586 +jeff 0 25 3.737670 0.000000 673 +never 0 25 3.737670 0.000000 671 +wish 0 24 3.761200 0.000000 692 +miscellan 0 23 3.806662 0.000000 731 +voic 0 21 3.912023 0.000000 806 +newsgroup 0 21 3.912023 0.000000 783 +separ 0 19 4.007333 0.000000 844 +andrew 0 19 4.007333 0.000000 849 +coupl 0 17 4.110874 0.000000 939 +monitor 0 17 4.110874 0.000000 941 +seek 0 17 4.110874 0.000000 954 +took 0 16 4.174387 0.000000 1010 +sign 0 16 4.174387 0.000000 970 +choos 0 16 4.174387 0.000000 964 +hobbi 0 16 4.174387 0.000000 1009 +doesn 0 15 4.248495 0.000000 1055 +club 0 15 4.248495 0.000000 1058 +someon 1 13 4.382027 4.382027 1128 +song 1 11 4.553877 4.553877 1380 +night 0 11 4.553877 0.000000 1319 +sound 1 9 4.753590 4.753590 1605 +theme 0 8 4.875197 0.000000 1707 +henc 0 7 5.010635 0.000000 1805 +pace 0 6 5.164786 0.000000 2011 +plu 0 6 5.164786 0.000000 2004 +lampert 1 5 5.347108 5.347108 2398 +babi 1 5 5.347108 5.347108 2493 +tick 1 4 5.568345 5.568345 2975 +heard 0 4 5.568345 0.000000 2895 +fire 0 4 5.568345 0.000000 3001 +nota 0 3 5.857933 0.000000 3785 +crow 0 3 5.857933 0.000000 3845 +straight 0 3 5.857933 0.000000 3655 +forward 0 3 5.857933 0.000000 3784 +yearbook 0 2 6.263398 0.000000 6243 +cute 0 2 6.263398 0.000000 5108 +befound 0 2 6.263398 0.000000 5964 +sick 0 2 6.263398 0.000000 5773 +mstk 0 2 6.263398 0.000000 5501 +pagejeff 0 1 6.957497 0.000000 20253 +ricardo 0 1 6.957497 0.000000 20254 +montalban 0 1 6.957497 0.000000 20255 +foron 0 1 6.957497 0.000000 20256 +incrimin 0 1 6.957497 0.000000 20257 +aconvict 0 1 6.957497 0.000000 20258 +lasttim 0 1 6.957497 0.000000 20259 +threaten 0 1 6.957497 0.000000 20260 +intoa 0 1 6.957497 0.000000 20261 +dispens 0 1 6.957497 0.000000 20262 +anautograph 0 1 6.957497 0.000000 20263 +pictureappear 0 1 6.957497 0.000000 20264 +weasel 0 1 6.957497 0.000000 20265 +factswho 0 1 6.957497 0.000000 20266 +relatedwhat 0 1 6.957497 0.000000 20267 +entertainmentbook 0 1 6.957497 0.000000 20268 +subjectsfriendsno 0 1 6.957497 0.000000 20269 +organizationsgroup 0 1 6.957497 0.000000 20270 +inmi 0 1 6.957497 0.000000 20271 +linksugh 0 1 6.957497 0.000000 20272 +servo 0 1 6.957497 0.000000 20273 +eclect 0 1 6.957497 0.000000 20274 +paraphenaliai 0 1 6.957497 0.000000 20275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..cf50394d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +inform 0 412 0.693147 0.000000 8 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +area 0 144 1.945910 0.000000 80 +teach 0 108 2.197225 0.000000 112 +mathemat 0 108 2.197225 0.000000 123 +section 0 94 2.397895 0.000000 149 +sinc 0 90 2.397895 0.000000 159 +contain 0 81 2.484907 0.000000 174 +todd 0 15 4.248495 0.000000 1051 +wealth 0 3 5.857933 0.000000 3353 +plug 0 2 6.263398 0.000000 5167 +tmunson 0 2 6.263398 0.000000 4809 +mathematicalprogram 1 1 6.957497 6.957497 20276 +homepagetodd 0 1 6.957497 0.000000 20277 +homepagein 0 1 6.957497 0.000000 20278 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..f991e1e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +offic 0 299 1.098612 0.000000 13 +modifi 0 178 1.609438 0.000000 35 +wisconsin 0 169 1.791759 0.000000 54 +dayton 0 119 2.079442 0.000000 104 +thing 0 84 2.484907 0.000000 189 +life 0 50 3.044522 0.000000 375 +brian 0 38 3.295837 0.000000 466 +streetmadison 0 38 3.295837 0.000000 474 +departmentunivers 0 24 3.761200 0.000000 711 +accept 0 18 4.060443 0.000000 879 +stand 0 18 4.060443 0.000000 891 +chief 0 7 5.010635 0.000000 1829 +ground 0 7 5.010635 0.000000 1955 +pagebrian 0 4 5.568345 0.000000 3054 +bear 0 4 5.568345 0.000000 2651 +civil 0 3 5.857933 0.000000 3908 +medit 0 2 6.263398 0.000000 5777 +creatur 0 2 6.263398 0.000000 6107 +essenc 0 2 6.263398 0.000000 6150 +toonen 0 1 6.957497 0.000000 20279 +cswhatev 0 1 6.957497 0.000000 20280 +seattleth 0 1 6.957497 0.000000 20281 +tipi 0 1 6.957497 0.000000 20282 +itsmean 0 1 6.957497 0.000000 20283 +kinship 0 1 6.957497 0.000000 20284 +acknowledgingun 0 1 6.957497 0.000000 20285 +infus 0 1 6.957497 0.000000 20286 +thetru 0 1 6.957497 0.000000 20287 +luther 0 1 6.957497 0.000000 20288 +oglala 0 1 6.957497 0.000000 20289 +siouxlast 0 1 6.957497 0.000000 20290 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..6e22424b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +read 0 154 1.791759 0.000000 47 +site 0 106 2.197225 0.000000 119 +version 0 113 2.197225 0.000000 122 +browser 0 56 2.890372 0.000000 313 +netscap 1 44 3.135494 3.135494 395 +option 0 30 3.555348 0.000000 575 +enhanc 0 26 3.688879 0.000000 644 +latest 0 21 3.912023 0.000000 785 +thano 0 3 5.857933 0.000000 3424 +tsioli 0 3 5.857933 0.000000 3418 +shouldconsid 0 2 6.263398 0.000000 6061 +upgrad 0 1 6.957497 0.000000 20291 +ifthat 0 1 6.957497 0.000000 20292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..4066ff12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 0 640 0.000000 0.000000 4 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +year 0 148 1.945910 0.000000 84 +click 0 142 1.945910 0.000000 78 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +section 0 94 2.397895 0.000000 149 +locat 0 59 2.833213 0.000000 303 +case 0 51 2.995732 0.000000 351 +keep 0 44 3.135494 0.000000 409 +long 0 43 3.178054 0.000000 413 +compani 0 41 3.218876 0.000000 423 +live 0 40 3.258097 0.000000 451 +origin 0 38 3.295837 0.000000 472 +computersci 0 30 3.555348 0.000000 562 +departmentunivers 0 24 3.761200 0.000000 711 +left 0 19 4.007333 0.000000 851 +todd 1 15 4.248495 4.248495 1051 +enough 0 15 4.248495 0.000000 1040 +awai 0 10 4.653960 0.000000 1447 +babylon 0 8 4.875197 0.000000 1731 +hold 0 8 4.875197 0.000000 1645 +judg 0 8 4.875197 0.000000 1644 +sciencesat 0 7 5.010635 0.000000 1968 +ohio 1 5 5.347108 5.347108 2447 +dougla 0 5 5.347108 0.000000 2471 +amus 0 5 5.347108 0.000000 2366 +turnidg 1 4 5.568345 5.568345 2829 +rep 0 4 5.568345 0.000000 3087 +western 0 4 5.568345 0.000000 3062 +kent 0 4 5.568345 0.000000 2744 +evid 0 4 5.568345 0.000000 2768 +shortcut 0 3 5.857933 0.000000 3932 +axi 1 2 6.263398 6.263398 6069 +milton 0 2 6.263398 0.000000 6153 +professorthoma 0 2 6.263398 0.000000 5053 +turnidgeschoolcomput 0 1 6.957497 0.000000 20293 +homemuppet 0 1 6.957497 0.000000 20294 +eyesightright 0 1 6.957497 0.000000 20295 +studyingprogram 0 1 6.957497 0.000000 20296 +mathematicsand 0 1 6.957497 0.000000 20297 +reserveunivers 0 1 6.957497 0.000000 20298 +cleveland 0 1 6.957497 0.000000 20299 +myfamili 0 1 6.957497 0.000000 20300 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..c0194548 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +last 0 314 1.098612 0.000000 14 +graduat 0 215 1.386294 0.000000 31 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +contact 0 153 1.791759 0.000000 59 +wisconsin 0 169 1.791759 0.000000 54 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +number 1 130 2.079442 2.079442 97 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +teach 0 108 2.197225 0.000000 112 +assist 0 112 2.197225 0.000000 113 +sinc 1 90 2.397895 2.397895 159 +june 0 79 2.564949 0.000000 214 +street 0 63 2.772589 0.000000 293 +visit 0 63 2.772589 0.000000 288 +finger 0 52 2.995732 0.000000 354 +visitor 1 49 3.044522 3.044522 371 +wang 1 21 3.912023 3.912023 790 +heavi 0 7 5.010635 0.000000 1841 +bldg 0 4 5.568345 0.000000 2983 +taxiao 1 2 6.263398 6.263398 4806 +twang 0 2 6.263398 0.000000 5730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..23824786 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +wisc 0 242 1.386294 0.000000 33 +start 0 83 2.484907 0.000000 173 +meet 1 72 2.639057 2.639057 229 +window 0 68 2.708050 0.000000 242 +microsoft 0 38 3.295837 0.000000 468 +trek 1 3 5.857933 5.857933 4025 +shaft 1 2 6.263398 6.263398 6186 +pageuri 0 1 6.957497 0.000000 20301 +pageemail 0 1 6.957497 0.000000 20302 +eduinterest 0 1 6.957497 0.000000 20303 +diversionsstart 0 1 6.957497 0.000000 20304 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..87d7edc1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +research 0 431 0.693147 0.000000 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +graduat 1 215 1.386294 1.386294 31 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +click 0 142 1.945910 0.000000 78 +databas 1 122 2.079442 2.079442 86 +dayton 0 119 2.079442 0.000000 104 +look 0 107 2.197225 0.000000 115 +homepag 1 93 2.397895 2.397895 148 +real 1 93 2.397895 2.397895 144 +present 0 91 2.397895 0.000000 145 +info 0 85 2.484907 0.000000 176 +educ 0 86 2.484907 0.000000 191 +know 0 80 2.564949 0.000000 198 +want 0 79 2.564949 0.000000 199 +onlin 0 75 2.639057 0.000000 223 +januari 0 62 2.772589 0.000000 264 +undergradu 0 54 2.944439 0.000000 338 +basic 0 50 3.044522 0.000000 360 +past 0 42 3.218876 0.000000 428 +india 1 32 3.465736 3.465736 550 +hope 0 28 3.610918 0.000000 610 +sometim 0 24 3.761200 0.000000 696 +earlier 0 13 4.382027 0.000000 1140 +usaphon 0 9 4.753590 0.000000 1600 +madra 1 8 4.875197 4.875197 1770 +nativ 0 6 5.164786 0.000000 2192 +asha 1 3 5.857933 5.857933 4037 +venkatesh 0 2 6.263398 0.000000 6154 +andhra 0 2 6.263398 0.000000 5571 +pradesh 0 2 6.263398 0.000000 5572 +yearbook 0 2 6.263398 0.000000 6243 +till 0 2 6.263398 0.000000 5850 +btech 0 2 6.263398 0.000000 6123 +genesi 0 2 6.263398 0.000000 6011 +ganti 1 1 6.957497 6.957497 20305 +godav 1 1 6.957497 6.957497 20306 +pagevenkatesh 0 1 6.957497 0.000000 20307 +vganti 0 1 6.957497 0.000000 20308 +studentoffic 0 1 6.957497 0.000000 20309 +kakinada 0 1 6.957497 0.000000 20310 +hostel 0 1 6.957497 0.000000 20311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..c04d32d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 0 431 0.693147 0.000000 10 +work 0 380 0.693147 0.000000 9 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +design 0 213 1.386294 0.000000 25 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +contact 0 153 1.791759 0.000000 59 +address 0 170 1.791759 0.000000 62 +phone 0 175 1.791759 0.000000 45 +distribut 0 162 1.791759 0.000000 51 +data 0 170 1.791759 0.000000 49 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +compil 1 122 2.079442 2.079442 96 +dayton 0 119 2.079442 0.000000 104 +technolog 0 131 2.079442 0.000000 102 +schedul 0 119 2.079442 0.000000 85 +intern 1 108 2.197225 2.197225 128 +person 0 111 2.197225 0.000000 117 +memori 0 101 2.302585 0.000000 139 +commun 1 95 2.397895 2.397895 157 +educ 0 86 2.484907 0.000000 191 +institut 0 84 2.484907 0.000000 187 +symposium 1 72 2.639057 2.639057 238 +august 0 66 2.708050 0.000000 257 +street 0 63 2.772589 0.000000 293 +processor 1 54 2.944439 2.944439 335 +undergradu 0 54 2.944439 0.000000 338 +profession 0 51 2.995732 0.000000 345 +advisor 0 51 2.995732 0.000000 355 +annual 1 40 3.258097 3.258097 458 +submit 0 39 3.258097 0.000000 440 +go 0 33 3.433987 0.000000 529 +india 0 32 3.465736 0.000000 550 +dissert 0 32 3.465736 0.000000 549 +depend 0 29 3.583519 0.000000 583 +strategi 0 25 3.737670 0.000000 682 +doctor 0 24 3.761200 0.000000 709 +predict 0 19 4.007333 0.000000 855 +regist 1 17 4.110874 4.110874 938 +micro 1 15 4.248495 4.248495 1031 +side 0 15 4.248495 0.000000 1022 +affili 0 13 4.382027 0.000000 1194 +multiscalar 2 8 4.875197 9.750394 1783 +sohi 1 6 5.164786 5.164786 2237 +microarchitectur 1 6 5.164786 5.164786 2238 +guri 0 5 5.347108 0.000000 2578 +vijai 1 4 5.568345 5.568345 2960 +vijaykumar 1 3 5.857933 5.857933 4011 +breach 1 3 5.857933 5.857933 4009 +anatomi 0 3 5.857933 0.000000 4010 +birla 0 2 6.263398 0.000000 6239 +pilani 0 2 6.263398 0.000000 6240 +architecturet 0 1 6.957497 0.000000 20312 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..a7d94131 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +link 1 247 1.386294 1.386294 24 +wisc 0 242 1.386294 0.000000 33 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +algorithm 0 162 1.791759 0.000000 57 +dayton 0 119 2.079442 0.000000 104 +number 0 130 2.079442 0.000000 97 +confer 0 126 2.079442 0.000000 100 +theori 1 111 2.197225 2.197225 127 +site 0 106 2.197225 0.000000 119 +associ 0 93 2.397895 0.000000 151 +refer 0 78 2.564949 0.000000 203 +laboratori 1 63 2.772589 2.772589 292 +foundat 0 62 2.772589 0.000000 286 +telephon 0 50 3.044522 0.000000 373 +archiv 0 49 3.044522 0.000000 364 +physic 0 47 3.091042 0.000000 377 +theoret 0 39 3.258097 0.000000 446 +streetmadison 0 38 3.295837 0.000000 474 +bibliographi 1 34 3.401197 3.401197 518 +approxim 0 35 3.401197 0.000000 509 +random 0 34 3.401197 0.000000 511 +john 1 33 3.433987 3.433987 532 +proc 0 26 3.688879 0.000000 649 +departmentunivers 0 24 3.761200 0.000000 711 +daili 0 24 3.761200 0.000000 706 +theorem 0 21 3.912023 0.000000 786 +hypertext 0 19 4.007333 0.000000 865 +dimension 0 18 4.060443 0.000000 909 +element 0 18 4.060443 0.000000 895 +stanford 0 17 4.110874 0.000000 955 +fourth 0 16 4.174387 0.000000 999 +polynomi 0 14 4.317488 0.000000 1069 +automata 0 13 4.382027 0.000000 1135 +montreal 0 7 5.010635 0.000000 1961 +quantum 1 6 5.164786 5.164786 2214 +oxford 0 6 5.164786 0.000000 2121 +forecast 0 6 5.164786 0.000000 2171 +cellular 0 5 5.347108 0.000000 2433 +symp 0 5 5.347108 0.000000 2376 +particl 0 5 5.347108 0.000000 2436 +beam 0 5 5.347108 0.000000 2344 +ucla 0 5 5.347108 0.000000 2502 +webster 0 5 5.347108 0.000000 2468 +preprint 0 3 5.857933 0.000000 3481 +quotat 0 3 5.857933 0.000000 3121 +pagejohn 0 2 6.263398 0.000000 5603 +thesauru 0 2 6.263398 0.000000 6238 +isthmu 0 2 6.263398 0.000000 6152 +watrou 1 1 6.957497 6.957497 20313 +artin 0 1 6.957497 0.000000 20314 +whapl 0 1 6.957497 0.000000 20315 +canadiannumb 0 1 6.957497 0.000000 20316 +assort 0 1 6.957497 0.000000 20317 +lanl 0 1 6.957497 0.000000 20318 +hypatia 0 1 6.957497 0.000000 20319 +stylehypertext 0 1 6.957497 0.000000 20320 +interfaceroget 0 1 6.957497 0.000000 20321 +parasol 0 1 6.957497 0.000000 20322 +recordsplayst 0 1 6.957497 0.000000 20323 +linksweath 0 1 6.957497 0.000000 20324 +madisonth 0 1 6.957497 0.000000 20325 +pagemathemat 0 1 6.957497 0.000000 20326 +servermathematician 0 1 6.957497 0.000000 20327 +biographiesgeek 0 1 6.957497 0.000000 20328 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..7f8dcea0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 0 457 0.693147 0.000000 12 +last 0 314 1.098612 0.000000 14 +email 0 220 1.386294 0.000000 29 +languag 0 227 1.386294 0.000000 26 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +madison 0 165 1.791759 0.000000 55 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +find 0 111 2.197225 0.000000 111 +peopl 0 96 2.302585 0.000000 132 +part 0 98 2.302585 0.000000 129 +call 0 91 2.397895 0.000000 153 +associ 0 93 2.397895 0.000000 151 +pictur 0 89 2.397895 0.000000 160 +david 0 71 2.639057 0.000000 232 +test 0 66 2.708050 0.000000 252 +guid 0 63 2.772589 0.000000 267 +plai 1 60 2.833213 2.833213 307 +think 0 57 2.890372 0.000000 314 +three 0 54 2.944439 0.000000 330 +suggest 0 53 2.944439 0.000000 331 +date 1 51 2.995732 2.995732 344 +friend 0 48 3.044522 0.000000 376 +possibl 0 47 3.091042 0.000000 378 +physic 0 47 3.091042 0.000000 377 +favorit 0 44 3.135494 0.000000 410 +show 1 43 3.178054 3.178054 417 +around 0 43 3.178054 0.000000 415 +review 0 42 3.218876 0.000000 425 +movi 1 40 3.258097 3.258097 459 +must 0 40 3.258097 0.000000 442 +littl 0 39 3.258097 0.000000 454 +late 0 40 3.258097 0.000000 439 +correct 0 38 3.295837 0.000000 462 +game 0 36 3.367296 0.000000 498 +least 0 35 3.401197 0.000000 516 +richard 0 31 3.496508 0.000000 559 +steve 0 29 3.583519 0.000000 594 +weather 0 28 3.610918 0.000000 618 +great 0 27 3.637586 0.000000 626 +sometim 0 24 3.761200 0.000000 696 +seri 0 24 3.761200 0.000000 708 +wish 0 24 3.761200 0.000000 692 +deal 0 22 3.850148 0.000000 736 +reduc 0 22 3.850148 0.000000 759 +alumni 0 21 3.912023 0.000000 807 +increas 0 20 3.951244 0.000000 829 +longer 0 20 3.951244 0.000000 816 +miss 0 19 4.007333 0.000000 866 +english 0 15 4.248495 0.000000 1033 +manner 0 14 4.317488 0.000000 1074 +someon 0 13 4.382027 0.000000 1128 +entertain 1 12 4.465908 4.465908 1286 +food 1 12 4.465908 4.465908 1285 +amount 0 12 4.465908 0.000000 1208 +player 0 11 4.553877 0.000000 1371 +recit 1 9 4.753590 4.753590 1475 +french 0 9 4.753590 0.000000 1511 +hockei 0 8 4.875197 0.000000 1760 +affect 1 6 5.164786 5.164786 2044 +commit 0 6 5.164786 0.000000 2233 +moder 0 6 5.164786 0.000000 2112 +postcard 0 6 5.164786 0.000000 2181 +forecast 0 6 5.164786 0.000000 2171 +ship 1 5 5.347108 5.347108 2534 +speaker 0 5 5.347108 0.000000 2370 +feet 0 5 5.347108 0.000000 2492 +circumst 0 5 5.347108 0.000000 2283 +peke 0 5 5.347108 0.000000 2539 +cell 0 5 5.347108 0.000000 2274 +lesson 0 5 5.347108 0.000000 2568 +insan 0 3 5.857933 0.000000 4006 +omit 0 3 5.857933 0.000000 3466 +letterman 0 3 5.857933 0.000000 3931 +truck 0 2 6.263398 0.000000 5713 +proportion 0 2 6.263398 0.000000 4091 +behaviour 0 2 6.263398 0.000000 4724 +studio 0 2 6.263398 0.000000 5838 +francais 0 2 6.263398 0.000000 6020 +weiru 0 1 6.957497 0.000000 20329 +eiru 0 1 6.957497 0.000000 20330 +ppppleas 0 1 6.957497 0.000000 20331 +asylum 0 1 6.957497 0.000000 20332 +verbal 0 1 6.957497 0.000000 20333 +cargo 0 1 6.957497 0.000000 20334 +havenos 0 1 6.957497 0.000000 20335 +smell 0 1 6.957497 0.000000 20336 +leder 0 1 6.957497 0.000000 20337 +beoffer 0 1 6.957497 0.000000 20338 +customari 0 1 6.957497 0.000000 20339 +begina 0 1 6.957497 0.000000 20340 +amountof 0 1 6.957497 0.000000 20341 +merest 0 1 6.957497 0.000000 20342 +ofaffect 0 1 6.957497 0.000000 20343 +excruciatingli 0 1 6.957497 0.000000 20344 +atmadison 0 1 6.957497 0.000000 20345 +grei 0 1 6.957497 0.000000 20346 +francai 0 1 6.957497 0.000000 20347 +dictionnairefrancai 0 1 6.957497 0.000000 20348 +anglai 0 1 6.957497 0.000000 20349 +dictionnair 0 1 6.957497 0.000000 20350 +relatif 0 1 6.957497 0.000000 20351 +lafrancophoni 0 1 6.957497 0.000000 20352 +degrammair 0 1 6.957497 0.000000 20353 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..3620d0ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 0 297 1.098612 0.000000 20 +softwar 1 220 1.386294 1.386294 30 +graduat 0 215 1.386294 0.000000 31 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +includ 1 208 1.609438 1.609438 42 +oper 0 180 1.609438 0.000000 34 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +develop 0 174 1.791759 0.000000 53 +first 0 140 1.945910 0.000000 71 +year 0 148 1.945910 0.000000 84 +technolog 1 131 2.079442 2.079442 102 +welcom 0 122 2.079442 0.000000 99 +provid 0 121 2.079442 0.000000 94 +manag 1 114 2.197225 2.197225 125 +peopl 1 96 2.302585 2.302585 132 +memori 1 101 2.302585 2.302585 139 +center 0 88 2.397895 0.000000 158 +educ 0 86 2.484907 0.000000 191 +level 0 87 2.484907 0.000000 180 +second 0 81 2.484907 0.000000 166 +state 0 76 2.564949 0.000000 207 +differ 0 66 2.708050 0.000000 253 +written 0 63 2.772589 0.000000 278 +best 0 59 2.833213 0.000000 299 +variou 0 56 2.890372 0.000000 317 +tabl 0 51 2.995732 0.000000 346 +without 0 50 3.044522 0.000000 370 +california 0 46 3.091042 0.000000 388 +china 0 37 3.332205 0.000000 487 +game 0 36 3.367296 0.000000 498 +within 0 33 3.433987 0.000000 525 +platform 1 29 3.583519 3.583519 591 +quot 0 29 3.583519 0.000000 582 +framework 0 28 3.610918 0.000000 606 +mine 0 26 3.688879 0.000000 654 +corpor 1 21 3.912023 3.912023 802 +tenni 0 20 3.951244 0.000000 838 +expert 0 20 3.951244 0.000000 833 +beij 0 19 4.007333 0.000000 876 +ultim 0 17 4.110874 0.000000 943 +jose 1 16 4.174387 4.174387 976 +zhang 0 16 4.174387 0.000000 980 +tsinghua 0 13 4.382027 0.000000 1195 +nasa 0 13 4.382027 0.000000 1188 +solari 0 12 4.465908 0.000000 1238 +card 0 10 4.653960 0.000000 1435 +cheat 0 10 4.653960 0.000000 1395 +puzzl 0 5 5.347108 0.000000 2507 +republ 1 4 5.568345 5.568345 3032 +shanghai 0 4 5.568345 0.000000 2925 +hometown 0 3 5.857933 0.000000 3817 +am 0 3 5.857933 0.000000 3386 +weiz 0 2 6.263398 0.000000 4693 +eduwork 0 2 6.263398 0.000000 5813 +windowsnt 0 2 6.263398 0.000000 5440 +tandem 0 2 6.263398 0.000000 5027 +chinaemail 0 1 6.957497 0.000000 20354 +experiencecontractor 0 1 6.957497 0.000000 20355 +tuxedo 0 1 6.957497 0.000000 20356 +pathwai 0 1 6.957497 0.000000 20357 +sherpa 0 1 6.957497 0.000000 20358 +hobbiesma 0 1 6.957497 0.000000 20359 +jiangbridg 0 1 6.957497 0.000000 20360 +pingpong 0 1 6.957497 0.000000 20361 +joggingth 0 1 6.957497 0.000000 20362 +challengesolv 0 1 6.957497 0.000000 20363 +sweeper 0 1 6.957497 0.000000 20364 +dayth 0 1 6.957497 0.000000 20365 +ackowledgementthi 0 1 6.957497 0.000000 20366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..3a1eb8aa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +comput 0 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +inform 0 412 0.693147 0.000000 8 +research 0 431 0.693147 0.000000 10 +project 1 340 1.098612 1.098612 18 +email 0 220 1.386294 0.000000 29 +wisc 0 242 1.386294 0.000000 33 +softwar 0 220 1.386294 0.000000 30 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +note 0 142 1.945910 0.000000 67 +construct 0 139 1.945910 0.000000 82 +welcom 0 122 2.079442 0.000000 99 +dayton 0 119 2.079442 0.000000 104 +provid 0 121 2.079442 0.000000 94 +need 0 98 2.302585 0.000000 135 +peopl 0 96 2.302585 0.000000 132 +pictur 0 89 2.397895 0.000000 160 +west 0 83 2.484907 0.000000 192 +come 0 78 2.564949 0.000000 202 +good 0 77 2.564949 0.000000 200 +main 0 67 2.708050 0.000000 256 +explor 0 58 2.890372 0.000000 324 +still 0 50 3.044522 0.000000 362 +telephon 0 50 3.044522 0.000000 373 +streetmadison 0 38 3.295837 0.000000 474 +cluster 0 28 3.610918 0.000000 612 +departmentunivers 0 24 3.761200 0.000000 711 +definit 0 19 4.007333 0.000000 864 +miron 0 14 4.317488 0.000000 1110 +dbm 0 13 4.382027 0.000000 1136 +scan 0 12 4.465908 0.000000 1243 +devis 0 10 4.653960 0.000000 1451 +yanni 0 8 4.875197 0.000000 1713 +ioannidi 0 8 4.875197 0.000000 1714 +wouldn 0 7 5.010635 0.000000 1970 +kent 1 4 5.568345 5.568345 2744 +edufing 0 4 5.568345 0.000000 2713 +wenger 1 3 5.857933 5.857933 4023 +agre 0 3 5.857933 0.000000 4007 +groupuw 0 3 5.857933 0.000000 3934 +preparedfor 0 2 6.263398 0.000000 5886 +workth 0 2 6.263398 0.000000 6137 +andvisu 0 2 6.263398 0.000000 6189 +pothol 0 1 6.957497 0.000000 20367 +wengerassoci 0 1 6.957497 0.000000 20368 +researchercomput 0 1 6.957497 0.000000 20369 +arecod 0 1 6.957497 0.000000 20370 +anddevis 0 1 6.957497 0.000000 20371 +acronym 0 1 6.957497 0.000000 20372 +importantpart 0 1 6.957497 0.000000 20373 +visualizationproduc 0 1 6.957497 0.000000 20374 +livnyraghu 0 1 6.957497 0.000000 20375 +ramakrishnanmor 0 1 6.957497 0.000000 20376 +pagewiscinfo 0 1 6.957497 0.000000 20377 +personallinksimageslast 0 1 6.957497 0.000000 20378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..00acd7bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 0 672 0.000000 0.000000 1 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +program 0 374 0.693147 0.000000 7 +work 0 380 0.693147 0.000000 9 +inform 0 412 0.693147 0.000000 8 +project 1 340 1.098612 1.098612 18 +last 0 314 1.098612 0.000000 14 +softwar 1 220 1.386294 1.386294 30 +languag 0 227 1.386294 0.000000 26 +wisc 0 242 1.386294 0.000000 33 +paper 0 205 1.609438 0.000000 38 +group 0 183 1.609438 0.000000 36 +updat 0 191 1.609438 0.000000 41 +wisconsin 2 169 1.791759 3.583518 54 +implement 1 152 1.791759 1.791759 52 +parallel 0 169 1.791759 0.000000 60 +address 0 170 1.791759 0.000000 62 +data 0 170 1.791759 0.000000 49 +develop 0 174 1.791759 0.000000 53 +recent 0 167 1.791759 0.000000 58 +like 1 132 1.945910 1.945910 81 +architectur 1 139 1.945910 1.945910 77 +support 0 132 1.945910 0.000000 83 +first 0 140 1.945910 0.000000 71 +perform 0 143 1.945910 0.000000 74 +relat 0 139 1.945910 0.000000 68 +compil 1 122 2.079442 2.079442 96 +high 0 130 2.079442 0.000000 101 +provid 0 121 2.079442 0.000000 94 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +world 0 115 2.197225 0.000000 126 +memori 1 101 2.302585 2.302585 139 +technic 0 100 2.302585 0.000000 140 +call 0 91 2.397895 0.000000 153 +level 1 87 2.484907 2.484907 180 +librari 0 87 2.484907 0.000000 181 +novemb 0 81 2.484907 0.000000 179 +wide 0 84 2.484907 0.000000 185 +interfac 1 79 2.564949 2.564949 209 +refer 0 78 2.564949 0.000000 203 +messag 0 76 2.564949 0.000000 212 +complet 0 77 2.564949 0.000000 208 +sourc 0 77 2.564949 0.000000 201 +logic 0 71 2.639057 0.000000 230 +name 0 72 2.639057 0.000000 220 +share 1 59 2.833213 2.833213 304 +juli 0 60 2.833213 0.000000 305 +space 0 57 2.890372 0.000000 310 +think 0 57 2.890372 0.000000 314 +overview 0 56 2.890372 0.000000 323 +extens 0 53 2.944439 0.000000 340 +allow 0 53 2.944439 0.000000 333 +talk 0 53 2.944439 0.000000 336 +hardwar 1 51 2.995732 2.995732 350 +week 0 52 2.995732 0.000000 343 +approach 0 48 3.044522 0.000000 366 +adapt 0 46 3.091042 0.000000 387 +mark 0 44 3.135494 0.000000 403 +mechan 0 43 3.178054 0.000000 416 +futur 0 41 3.218876 0.000000 427 +combin 0 42 3.218876 0.000000 421 +programm 0 39 3.258097 0.000000 445 +slide 1 38 3.295837 3.295837 467 +origin 0 38 3.295837 0.000000 472 +bibliographi 0 34 3.401197 0.000000 518 +articl 0 33 3.433987 0.000000 530 +built 0 29 3.583519 0.000000 592 +platform 0 29 3.583519 0.000000 591 +propos 0 28 3.610918 0.000000 602 +pass 0 28 3.610918 0.000000 611 +cluster 0 28 3.610918 0.000000 612 +hill 0 25 3.737670 0.000000 670 +annot 0 21 3.912023 0.000000 775 +fund 0 21 3.912023 0.000000 805 +wind 1 18 4.060443 4.060443 908 +four 0 18 4.060443 0.000000 905 +seek 0 17 4.110874 0.000000 954 +massiv 0 15 4.248495 0.000000 1026 +hybrid 0 15 4.248495 0.000000 1057 +node 0 11 4.553877 0.000000 1326 +fpga 0 10 4.653960 0.000000 1433 +tunnel 1 9 4.753590 4.753590 1615 +paradyn 1 9 4.753590 4.753590 1614 +consensu 0 6 5.164786 0.000000 2080 +tempest 1 5 5.347108 5.347108 2548 +middl 0 5 5.347108 0.000000 2372 +hypothet 0 5 5.347108 0.000000 2474 +departmentat 0 5 5.347108 0.000000 2513 +aboutth 0 4 5.568345 0.000000 2720 +ofworkst 0 4 5.568345 0.000000 2679 +markhil 0 4 5.568345 0.000000 2819 +parallellanguag 0 3 5.857933 0.000000 4026 +evolutionari 0 3 5.857933 0.000000 3898 +uniformli 0 2 6.263398 0.000000 6202 +havebeen 0 2 6.263398 0.000000 5830 +snoop 0 2 6.263398 0.000000 5364 +contributor 0 2 6.263398 0.000000 6214 +projectmost 0 1 6.957497 0.000000 20379 +fromworkst 0 1 6.957497 0.000000 20380 +whichprocess 0 1 6.957497 0.000000 20381 +abovesystem 0 1 6.957497 0.000000 20382 +wascoop 0 1 6.957497 0.000000 20383 +toconvent 0 1 6.957497 0.000000 20384 +revolutionari 0 1 6.957497 0.000000 20385 +andprogram 0 1 6.957497 0.000000 20386 +transparentshar 0 1 6.957497 0.000000 20387 +developingimplement 0 1 6.957497 0.000000 20388 +wisconsincow 0 1 6.957497 0.000000 20389 +cowus 0 1 6.957497 0.000000 20390 +sram 0 1 6.957497 0.000000 20391 +collaboratingwith 0 1 6.957497 0.000000 20392 +overviewand 0 1 6.957497 0.000000 20393 +pageor 0 1 6.957497 0.000000 20394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..b0981385 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +program 0 374 0.693147 0.000000 7 +time 0 293 1.098612 0.000000 17 +base 0 165 1.791759 0.000000 50 +develop 0 174 1.791759 0.000000 53 +first 1 140 1.945910 1.945910 71 +like 0 132 1.945910 0.000000 81 +make 1 111 2.197225 2.197225 120 +person 0 111 2.197225 0.000000 117 +pictur 0 89 2.397895 0.000000 160 +imag 0 91 2.397895 0.000000 161 +thing 0 84 2.484907 0.000000 189 +appear 0 78 2.564949 0.000000 210 +creat 0 63 2.772589 0.000000 277 +back 0 60 2.833213 0.000000 297 +finger 1 52 2.995732 2.995732 354 +give 0 50 3.044522 0.000000 359 +featur 0 46 3.091042 0.000000 386 +could 0 46 3.091042 0.000000 383 +obtain 0 33 3.433987 0.000000 534 +product 0 33 3.433987 0.000000 527 +human 0 32 3.465736 0.000000 546 +anim 0 31 3.496508 0.000000 557 +pass 0 28 3.610918 0.000000 611 +team 0 27 3.637586 0.000000 625 +seri 0 24 3.761200 0.000000 708 +wish 0 24 3.761200 0.000000 692 +instead 0 22 3.850148 0.000000 756 +spend 0 19 4.007333 0.000000 850 +statu 0 18 4.060443 0.000000 885 +seem 0 18 4.060443 0.000000 899 +account 0 18 4.060443 0.000000 882 +whether 0 17 4.110874 0.000000 918 +across 0 16 4.174387 0.000000 974 +charact 0 15 4.248495 0.000000 1028 +save 0 14 4.317488 0.000000 1099 +shown 0 14 4.317488 0.000000 1080 +comic 0 14 4.317488 0.000000 1103 +charl 0 13 4.382027 0.000000 1149 +newspap 0 12 4.465908 0.000000 1280 +remov 0 12 4.465908 0.000000 1225 +magic 0 11 4.553877 0.000000 1358 +pick 0 9 4.753590 0.000000 1498 +didn 0 9 4.753590 0.000000 1563 +hundr 0 9 4.753590 0.000000 1528 +star 1 8 4.875197 4.875197 1717 +film 0 8 4.875197 0.000000 1761 +successfulli 0 7 5.010635 0.000000 1869 +televis 1 6 5.164786 5.164786 2118 +famou 0 6 5.164786 0.000000 2185 +strip 0 6 5.164786 0.000000 2203 +put 0 6 5.164786 0.000000 2017 +chat 0 6 5.164786 0.000000 2128 +keeper 0 5 5.347108 0.000000 2569 +trick 1 4 5.568345 5.568345 2967 +accompani 0 4 5.568345 0.000000 2666 +transmit 0 4 5.568345 0.000000 2835 +somehow 0 4 5.568345 0.000000 2974 +keyboard 0 4 5.568345 0.000000 2970 +rival 0 3 5.857933 0.000000 3583 +agre 0 3 5.857933 0.000000 4007 +advertis 0 3 5.857933 0.000000 3788 +felix 1 2 6.263398 6.263398 5103 +princ 0 2 6.263398 0.000000 4813 +wale 0 2 6.263398 0.000000 4827 +mascot 0 2 6.263398 0.000000 6060 +ear 0 2 6.263398 0.000000 5071 +hairbal 0 2 6.263398 0.000000 6237 +xuelin 0 1 6.957497 0.000000 20395 +otto 0 1 6.957497 0.000000 20396 +messmer 0 1 6.957497 0.000000 20397 +whichwa 0 1 6.957497 0.000000 20398 +chaplin 0 1 6.957497 0.000000 20399 +keaton 0 1 6.957497 0.000000 20400 +polo 0 1 6.957497 0.000000 20401 +lindbergh 0 1 6.957497 0.000000 20402 +theatlant 0 1 6.957497 0.000000 20403 +oneev 0 1 6.957497 0.000000 20404 +teeth 0 1 6.957497 0.000000 20405 +whisker 0 1 6.957497 0.000000 20406 +tail 0 1 6.957497 0.000000 20407 +sui 0 1 6.957497 0.000000 20408 +vritabl 0 1 6.957497 0.000000 20409 +partout 0 1 6.957497 0.000000 20410 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..6561fab7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +us 0 329 1.098612 0.000000 16 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +wisc 0 242 1.386294 0.000000 33 +design 0 213 1.386294 0.000000 25 +languag 0 227 1.386294 0.000000 26 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 0 170 1.791759 0.000000 56 +address 0 170 1.791759 0.000000 62 +recent 0 167 1.791759 0.000000 58 +implement 0 152 1.791759 0.000000 52 +problem 1 147 1.945910 1.945910 75 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +area 0 144 1.945910 0.000000 80 +process 0 142 1.945910 0.000000 72 +model 0 145 1.945910 0.000000 69 +file 0 132 1.945910 0.000000 70 +databas 2 122 2.079442 4.158884 86 +confer 2 126 2.079442 4.158884 100 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +number 1 130 2.079442 2.079442 97 +technolog 0 131 2.079442 0.000000 102 +provid 0 121 2.079442 0.000000 94 +intern 2 108 2.197225 4.394450 128 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +find 0 111 2.197225 0.000000 111 +look 0 107 2.197225 0.000000 115 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +access 0 102 2.302585 0.000000 136 +take 0 97 2.302585 0.000000 134 +part 0 98 2.302585 0.000000 129 +mani 0 92 2.397895 0.000000 150 +associ 0 93 2.397895 0.000000 151 +imag 0 91 2.397895 0.000000 161 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +larg 0 82 2.484907 0.000000 168 +help 0 83 2.484907 0.000000 175 +contain 0 81 2.484907 0.000000 174 +journal 0 83 2.484907 0.000000 183 +resourc 0 81 2.484907 0.000000 172 +optim 2 79 2.564949 5.129898 197 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +orient 1 80 2.564949 2.564949 205 +june 0 79 2.564949 0.000000 214 +april 0 77 2.564949 0.000000 196 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +involv 0 71 2.639057 0.000000 227 +intellig 0 72 2.639057 0.000000 225 +integr 1 67 2.708050 2.708050 245 +simul 1 66 2.708050 2.708050 255 +august 1 66 2.708050 2.708050 257 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +differ 0 66 2.708050 0.000000 253 +knowledg 0 67 2.708050 0.000000 243 +septemb 2 65 2.772589 5.545178 274 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +evalu 0 64 2.772589 0.000000 266 +plan 0 65 2.772589 0.000000 272 +experi 0 64 2.772589 0.000000 283 +guid 0 63 2.772589 0.000000 267 +foundat 0 62 2.772589 0.000000 286 +januari 0 62 2.772589 0.000000 264 +improv 0 62 2.772589 0.000000 289 +march 1 61 2.833213 2.833213 295 +juli 0 60 2.833213 0.000000 305 +special 1 56 2.890372 2.890372 320 +sever 1 56 2.890372 2.890372 322 +variou 0 56 2.890372 0.000000 317 +major 0 56 2.890372 0.000000 315 +index 0 56 2.890372 0.000000 309 +scientif 1 53 2.944439 2.944439 341 +februari 0 54 2.944439 0.000000 328 +investig 1 51 2.995732 2.995732 353 +much 0 52 2.995732 0.000000 349 +maintain 0 51 2.995732 0.000000 342 +particular 0 51 2.995732 0.000000 352 +case 0 51 2.995732 0.000000 351 +visual 2 48 3.044522 6.089044 372 +life 0 50 3.044522 0.000000 375 +right 0 48 3.044522 0.000000 363 +format 0 48 3.044522 0.000000 356 +set 0 50 3.044522 0.000000 361 +natur 0 44 3.135494 0.000000 406 +futur 1 41 3.218876 3.218876 427 +edit 0 42 3.218876 0.000000 418 +york 0 41 3.218876 0.000000 435 +transact 1 39 3.258097 3.258097 438 +error 1 40 3.258097 3.258097 449 +join 1 39 3.258097 3.258097 457 +must 0 40 3.258097 0.000000 442 +multipl 0 39 3.258097 0.000000 453 +cost 0 37 3.332205 0.000000 480 +expect 0 37 3.332205 0.000000 484 +especi 0 36 3.367296 0.000000 496 +multi 0 36 3.367296 0.000000 493 +survei 1 35 3.401197 3.401197 513 +random 0 34 3.401197 0.000000 511 +approxim 0 35 3.401197 0.000000 509 +statist 0 35 3.401197 0.000000 521 +queri 2 33 3.433987 6.867974 524 +india 1 32 3.465736 3.465736 550 +independ 0 32 3.465736 0.000000 548 +express 0 32 3.465736 0.000000 540 +scientist 1 31 3.496508 3.496508 560 +compon 0 30 3.555348 0.000000 570 +power 0 30 3.555348 0.000000 573 +graph 0 30 3.555348 0.000000 576 +rang 0 30 3.555348 0.000000 565 +focus 1 29 3.583519 3.583519 584 +limit 0 29 3.583519 0.000000 585 +ask 0 28 3.610918 0.000000 597 +load 0 28 3.610918 0.000000 601 +framework 0 28 3.610918 0.000000 606 +proc 2 26 3.688879 7.377758 649 +altern 1 26 3.688879 3.688879 641 +challeng 0 26 3.688879 0.000000 653 +repres 0 26 3.688879 0.000000 656 +effort 0 26 3.688879 0.000000 652 +valu 1 25 3.737670 3.737670 665 +primari 0 25 3.737670 0.000000 669 +although 0 25 3.737670 0.000000 667 +higher 0 24 3.761200 0.000000 690 +size 1 23 3.806662 3.806662 713 +sequenc 0 23 3.806662 0.000000 734 +displai 0 23 3.806662 0.000000 712 +identifi 1 22 3.850148 3.850148 760 +try 0 22 3.850148 0.000000 764 +properti 0 22 3.850148 0.000000 749 +thu 0 21 3.912023 0.000000 773 +among 0 21 3.912023 0.000000 781 +path 0 21 3.912023 0.000000 778 +flexibl 0 21 3.912023 0.000000 792 +sigmod 1 19 4.007333 4.007333 877 +boston 0 19 4.007333 0.000000 862 +concentr 1 18 4.060443 4.060443 906 +record 0 18 4.060443 0.000000 890 +dimension 0 18 4.060443 0.000000 909 +estim 1 17 4.110874 4.110874 930 +miller 1 17 4.110874 4.110874 949 +attempt 0 17 4.110874 0.000000 917 +former 0 17 4.110874 0.000000 956 +ramakrishnan 1 16 4.174387 4.174387 972 +advantag 0 16 4.174387 0.000000 987 +jose 0 16 4.174387 0.000000 976 +cambridg 0 16 4.174387 0.000000 1008 +livni 1 15 4.248495 4.248495 1053 +transit 0 15 4.248495 0.000000 1046 +heterogen 1 14 4.317488 4.317488 1090 +balanc 1 14 4.317488 4.317488 1112 +attribut 0 14 4.317488 0.000000 1092 +primarili 1 13 4.382027 4.382027 1185 +translat 1 13 4.382027 4.382027 1164 +canada 1 13 4.382027 4.382027 1158 +readi 0 12 4.465908 0.000000 1242 +gupta 0 12 4.465908 0.000000 1241 +extrem 0 11 4.553877 0.000000 1330 +cycl 0 11 4.553877 0.000000 1335 +itali 0 11 4.553877 0.000000 1378 +desktop 1 10 4.653960 4.653960 1445 +tradit 0 10 4.653960 0.000000 1404 +genet 0 10 4.653960 0.000000 1409 +vldb 0 10 4.653960 0.000000 1470 +pose 0 9 4.753590 0.000000 1535 +significantli 0 9 4.753590 0.000000 1508 +mode 0 9 4.753590 0.000000 1492 +latter 0 9 4.753590 0.000000 1522 +conferenceon 0 9 4.753590 0.000000 1595 +incomplet 0 9 4.753590 0.000000 1575 +ioannidi 3 8 4.875197 14.625591 1714 +yanni 1 8 4.875197 4.875197 1713 +bridg 1 8 4.875197 4.875197 1764 +closur 0 8 4.875197 0.000000 1643 +solomon 0 8 4.875197 0.000000 1716 +unifi 0 8 4.875197 0.000000 1774 +databasesystem 0 8 4.875197 0.000000 1617 +aris 1 7 5.010635 5.010635 1924 +bombai 1 7 5.010635 5.010635 1972 +montreal 1 7 5.010635 5.010635 1961 +paramet 0 7 5.010635 0.000000 1796 +parametr 0 7 5.010635 0.000000 1819 +throughout 0 7 5.010635 0.000000 1871 +sweden 0 7 5.010635 0.000000 1885 +predic 0 7 5.010635 0.000000 1806 +serial 0 7 5.010635 0.000000 1975 +schema 1 6 5.164786 5.164786 1988 +eduresearch 0 6 5.164786 0.000000 2205 +divers 0 6 5.164786 0.000000 2232 +greec 0 6 5.164786 0.000000 2208 +tsatalo 1 5 5.347108 5.347108 2581 +travers 0 5 5.347108 0.000000 2363 +frog 0 5 5.347108 0.000000 2479 +desk 0 5 5.347108 0.000000 2297 +minneapoli 0 5 5.347108 0.000000 2480 +england 0 5 5.347108 0.000000 2557 +ireland 1 4 5.568345 5.568345 2853 +algorithmsfor 0 4 5.568345 0.000000 2748 +multimediasystem 0 4 5.568345 0.000000 2701 +forparallel 0 4 5.568345 0.000000 2703 +customiz 0 4 5.568345 0.000000 2966 +chile 0 4 5.568345 0.000000 3082 +histogram 1 3 5.857933 5.857933 3490 +propag 1 3 5.857933 5.857933 3997 +disciplin 1 3 5.857933 5.857933 3392 +metaphor 1 3 5.857933 5.857933 4038 +inadequ 0 3 5.857933 0.000000 3730 +andsemant 0 3 5.857933 0.000000 3246 +microscop 0 3 5.857933 0.000000 4035 +publicationsi 0 3 5.857933 0.000000 3827 +conjunct 0 3 5.857933 0.000000 3743 +stockholm 0 3 5.857933 0.000000 3715 +zurich 0 3 5.857933 0.000000 3550 +switzerland 0 3 5.857933 0.000000 3551 +santiago 0 3 5.857933 0.000000 4013 +poosala 1 2 6.263398 6.263398 6228 +queryoptim 1 2 6.263398 6.263398 4057 +garofalaki 1 2 6.263398 6.263398 6209 +dublin 1 2 6.263398 6.263398 4883 +interestsdatabas 0 2 6.263398 0.000000 6116 +andinform 0 2 6.263398 0.000000 5550 +scientificdata 0 2 6.263398 0.000000 6067 +algorithmsa 0 2 6.263398 0.000000 4487 +anneal 0 2 6.263398 0.000000 4136 +basedperform 0 2 6.263398 0.000000 6055 +spectroscopi 0 2 6.263398 0.000000 6206 +anniversari 0 2 6.263398 0.000000 4945 +performanceevalu 0 2 6.263398 0.000000 6052 +bermuda 0 2 6.263398 0.000000 5907 +turtl 0 2 6.263398 0.000000 4235 +haa 0 2 6.263398 0.000000 6115 +gmap 0 2 6.263398 0.000000 6241 +versatil 0 2 6.263398 0.000000 6242 +haber 1 1 6.957497 6.957497 20411 +vldbconfer 1 1 6.957497 6.957497 20412 +tod 1 1 6.957497 6.957497 20413 +ofheterogen 1 1 6.957497 6.957497 20414 +ondatabas 1 1 6.957497 6.957497 20415 +opossum 1 1 6.957497 6.957497 20416 +ioannidisyanni 0 1 6.957497 0.000000 20417 +toqueri 0 1 6.957497 0.000000 20418 +thanin 0 1 6.957497 0.000000 20419 +highera 0 1 6.957497 0.000000 20420 +tooptim 0 1 6.957497 0.000000 20421 +querywil 0 1 6.957497 0.000000 20422 +optimum 0 1 6.957497 0.000000 20423 +viabl 0 1 6.957497 0.000000 20424 +propertiesof 0 1 6.957497 0.000000 20425 +especiallythos 0 1 6.957497 0.000000 20426 +alsopart 0 1 6.957497 0.000000 20427 +appropriateinform 0 1 6.957497 0.000000 20428 +thepropag 0 1 6.957497 0.000000 20429 +ofoptim 0 1 6.957497 0.000000 20430 +inrel 0 1 6.957497 0.000000 20431 +manyexperi 0 1 6.957497 0.000000 20432 +aspectsthat 0 1 6.957497 0.000000 20433 +managementenviron 0 1 6.957497 0.000000 20434 +theirexperiment 0 1 6.957497 0.000000 20435 +arefor 0 1 6.957497 0.000000 20436 +scientistsso 0 1 6.957497 0.000000 20437 +facilitatetransl 0 1 6.957497 0.000000 20438 +experimentalscientif 0 1 6.957497 0.000000 20439 +specificproject 0 1 6.957497 0.000000 20440 +plantgrowth 0 1 6.957497 0.000000 20441 +issueon 0 1 6.957497 0.000000 20442 +beyondrel 0 1 6.957497 0.000000 20443 +forschema 0 1 6.957497 0.000000 20444 +tsangari 0 1 6.957497 0.000000 20445 +tkde 0 1 6.957497 0.000000 20446 +christodoulaki 0 1 6.957497 0.000000 20447 +limitingworst 0 1 6.957497 0.000000 20448 +winger 0 1 6.957497 0.000000 20449 +algorithmsbas 0 1 6.957497 0.000000 20450 +databaseestim 0 1 6.957497 0.000000 20451 +ponnekanti 0 1 6.957497 0.000000 20452 +experimentmanag 0 1 6.957497 0.000000 20453 +itsappl 0 1 6.957497 0.000000 20454 +anjur 0 1 6.957497 0.000000 20455 +bridgesbetween 0 1 6.957497 0.000000 20456 +shekita 0 1 6.957497 0.000000 20457 +forselect 0 1 6.957497 0.000000 20458 +internationalacm 0 1 6.957497 0.000000 20459 +layoutat 0 1 6.957497 0.000000 20460 +granular 0 1 6.957497 0.000000 20461 +advancedvisu 0 1 6.957497 0.000000 20462 +gubbio 0 1 6.957497 0.000000 20463 +managementthrough 0 1 6.957497 0.000000 20464 +practicalityfor 0 1 6.957497 0.000000 20465 +sigmodconfer 0 1 6.957497 0.000000 20466 +forphys 0 1 6.957497 0.000000 20467 +dexa 0 1 6.957497 0.000000 20468 +athen 0 1 6.957497 0.000000 20469 +lashkari 0 1 6.957497 0.000000 20470 +theirdisambigu 0 1 6.957497 0.000000 20471 +schemavisu 0 1 6.957497 0.000000 20472 +edbt 0 1 6.957497 0.000000 20473 +internationalvldb 0 1 6.957497 0.000000 20474 +capacityin 0 1 6.957497 0.000000 20475 +wiener 0 1 6.957497 0.000000 20476 +moos 0 1 6.957497 0.000000 20477 +withdata 0 1 6.957497 0.000000 20478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..1159e78c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +homepag 0 93 2.397895 0.000000 148 +kevin 0 9 4.753590 0.000000 1482 +zhongbin 0 1 6.957497 0.000000 20496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..359d564e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +work 0 380 0.693147 0.000000 9 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +offic 0 299 1.098612 0.000000 13 +modifi 0 178 1.609438 0.000000 35 +madison 1 165 1.791759 1.791759 55 +address 0 170 1.791759 0.000000 62 +read 0 154 1.791759 0.000000 47 +support 0 132 1.945910 0.000000 83 +welcom 0 122 2.079442 0.000000 99 +seattl 0 120 2.079442 0.000000 103 +number 0 130 2.079442 0.000000 97 +person 1 111 2.197225 2.197225 117 +pleas 0 113 2.197225 0.000000 114 +look 0 107 2.197225 0.000000 115 +send 0 114 2.197225 0.000000 109 +take 0 97 2.302585 0.000000 134 +access 0 102 2.302585 0.000000 136 +homepag 0 93 2.397895 0.000000 148 +pictur 0 89 2.397895 0.000000 160 +mani 0 92 2.397895 0.000000 150 +comment 0 93 2.397895 0.000000 146 +info 0 85 2.484907 0.000000 176 +good 1 77 2.564949 2.564949 200 +collect 0 65 2.772589 0.000000 268 +organ 0 65 2.772589 0.000000 265 +give 0 50 3.044522 0.000000 359 +telephon 0 50 3.044522 0.000000 373 +visitor 0 49 3.044522 0.000000 371 +press 0 42 3.218876 0.000000 419 +littl 0 39 3.258097 0.000000 454 +especi 0 36 3.367296 0.000000 496 +everi 0 34 3.401197 0.000000 519 +articl 0 33 3.433987 0.000000 530 +travel 0 30 3.555348 0.000000 579 +chines 0 29 3.583519 0.000000 595 +univ 0 28 3.610918 0.000000 617 +comp 0 26 3.688879 0.000000 650 +client 0 25 3.737670 0.000000 679 +sometim 0 24 3.761200 0.000000 696 +alumni 0 21 3.912023 0.000000 807 +wrote 0 20 3.951244 0.000000 830 +stat 0 17 4.110874 0.000000 924 +took 0 16 4.174387 0.000000 1010 +month 0 15 4.248495 0.000000 1025 +trip 0 14 4.317488 0.000000 1113 +employ 0 12 4.465908 0.000000 1291 +classmat 0 9 4.753590 0.000000 1516 +chicago 0 6 5.164786 0.000000 2149 +bldg 0 4 5.568345 0.000000 2983 +amaz 0 4 5.568345 0.000000 2600 +usathi 0 2 6.263398 0.000000 5951 +diari 0 2 6.263398 0.000000 4740 +linksmi 0 2 6.263398 0.000000 6215 +oversea 0 2 6.263398 0.000000 5781 +yinng 0 1 6.957497 0.000000 20479 +pageindexofyinongwei 0 1 6.957497 0.000000 20480 +spagehi 0 1 6.957497 0.000000 20481 +alsolink 0 1 6.957497 0.000000 20482 +inforesumehobbiestravel 0 1 6.957497 0.000000 20483 +pointersr 0 1 6.957497 0.000000 20484 +computingmacin 0 1 6.957497 0.000000 20485 +learningpattern 0 1 6.957497 0.000000 20486 +recognitioncomputatin 0 1 6.957497 0.000000 20487 +geometrydatabasevisionacadem 0 1 6.957497 0.000000 20488 +diarythi 0 1 6.957497 0.000000 20489 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 0 1 6.957497 0.000000 20490 +beida 0 1 6.957497 0.000000 20491 +classmatespek 0 1 6.957497 0.000000 20492 +ciumi 0 1 6.957497 0.000000 20493 +bookmarkcom 0 1 6.957497 0.000000 20494 +yinong 0 1 6.957497 0.000000 20495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..069a0755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 0 412 0.693147 0.000000 8 +offic 0 299 1.098612 0.000000 13 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +updat 0 191 1.609438 0.000000 41 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +call 1 91 2.397895 2.397895 153 +room 0 59 2.833213 0.000000 301 +basic 0 50 3.044522 0.000000 360 +give 0 50 3.044522 0.000000 359 +california 0 46 3.091042 0.000000 388 +around 0 43 3.178054 0.000000 415 +strategi 0 25 3.737670 0.000000 682 +voic 0 21 3.912023 0.000000 806 +hous 0 21 3.912023 0.000000 801 +beauti 0 18 4.060443 0.000000 912 +drive 0 15 4.248495 0.000000 1052 +food 1 12 4.465908 4.465908 1285 +poor 1 8 4.875197 4.875197 1736 +matthew 0 6 5.164786 0.000000 2193 +parent 0 6 5.164786 0.000000 2204 +observatori 0 4 5.568345 0.000000 3070 +matt 0 3 5.857933 0.000000 3792 +beach 0 3 5.857933 0.000000 3782 +convuls 1 1 6.957497 6.957497 20497 +sera 1 1 6.957497 6.957497 20498 +pageuntil 0 1 6.957497 0.000000 20499 +zeidenbergcent 0 1 6.957497 0.000000 20500 +gilson 0 1 6.957497 0.000000 20501 +zeiden 0 1 6.957497 0.000000 20502 +eduzeidenb 0 1 6.957497 0.000000 20503 +eduwhen 0 1 6.957497 0.000000 20504 +coho 0 1 6.957497 0.000000 20505 +huntington 0 1 6.957497 0.000000 20506 +breton 0 1 6.957497 0.000000 20507 +nadja 0 1 6.957497 0.000000 20508 +beaut 0 1 6.957497 0.000000 20509 +saint 0 1 6.957497 0.000000 20510 +whyth 0 1 6.957497 0.000000 20511 +communist 0 1 6.957497 0.000000 20512 +helder 0 1 6.957497 0.000000 20513 +camara 0 1 6.957497 0.000000 20514 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..f79f69db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 0 412 0.693147 0.000000 8 +depart 0 457 0.693147 0.000000 12 +us 1 329 1.098612 1.098612 16 +student 0 343 1.098612 0.000000 19 +time 0 293 1.098612 0.000000 17 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +gener 0 220 1.386294 0.000000 27 +mail 0 238 1.386294 0.000000 22 +wisc 0 242 1.386294 0.000000 33 +link 0 247 1.386294 0.000000 24 +public 0 202 1.609438 0.000000 43 +updat 0 191 1.609438 0.000000 41 +data 2 170 1.791759 3.583518 49 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +develop 0 174 1.791759 0.000000 53 +implement 0 152 1.791759 0.000000 52 +distribut 0 162 1.791759 0.000000 51 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +report 0 131 2.079442 0.000000 92 +technolog 0 131 2.079442 0.000000 102 +document 0 121 2.079442 0.000000 89 +topic 0 114 2.197225 0.000000 110 +manag 0 114 2.197225 0.000000 125 +find 0 111 2.197225 0.000000 111 +technic 1 100 2.302585 2.302585 140 +need 0 98 2.302585 0.000000 135 +techniqu 0 99 2.302585 0.000000 138 +memori 0 101 2.302585 0.000000 139 +mani 0 92 2.397895 0.000000 150 +imag 0 91 2.397895 0.000000 161 +select 0 91 2.397895 0.000000 154 +larg 2 82 2.484907 4.969814 168 +journal 1 83 2.484907 2.484907 183 +educ 1 86 2.484907 2.484907 191 +resourc 0 81 2.484907 0.000000 172 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +issu 0 78 2.564949 0.000000 211 +effici 1 73 2.639057 2.639057 233 +intellig 1 72 2.639057 2.639057 225 +symposium 1 72 2.639057 2.639057 238 +appli 0 71 2.639057 0.000000 226 +workshop 0 71 2.639057 0.000000 239 +free 0 73 2.639057 0.000000 224 +integr 0 67 2.708050 0.000000 245 +practic 0 70 2.708050 0.000000 246 +knowledg 0 67 2.708050 0.000000 243 +artifici 1 63 2.772589 2.772589 280 +prof 1 64 2.772589 2.772589 273 +plan 1 65 2.772589 2.772589 272 +dept 0 64 2.772589 0.000000 291 +function 0 62 2.772589 0.000000 275 +import 0 65 2.772589 0.000000 282 +interact 0 62 2.772589 0.000000 270 +organ 0 65 2.772589 0.000000 265 +room 0 59 2.833213 0.000000 301 +juli 0 60 2.833213 0.000000 305 +major 0 56 2.890372 0.000000 315 +thesi 0 57 2.890372 0.000000 327 +space 0 57 2.890372 0.000000 310 +run 0 51 2.995732 0.000000 347 +profession 0 51 2.995732 0.000000 345 +telephon 1 50 3.044522 3.044522 373 +fast 0 42 3.218876 0.000000 429 +submit 0 39 3.258097 0.000000 440 +probabl 0 40 3.258097 0.000000 455 +purpos 0 37 3.332205 0.000000 481 +china 0 37 3.332205 0.000000 487 +robot 1 36 3.367296 3.367296 497 +multi 1 36 3.367296 3.367296 493 +statist 0 35 3.401197 0.000000 521 +eduoffic 0 33 3.433987 0.000000 531 +given 0 32 3.465736 0.000000 538 +domain 0 30 3.555348 0.000000 564 +limit 0 29 3.583519 0.000000 585 +cluster 1 28 3.610918 3.610918 612 +univ 0 28 3.610918 0.000000 617 +manipul 0 27 3.637586 0.000000 624 +mine 1 26 3.688879 3.688879 654 +proc 1 26 3.688879 3.688879 649 +relev 0 26 3.688879 0.000000 637 +accur 0 25 3.737670 0.000000 680 +pattern 1 24 3.761200 3.761200 689 +motion 1 24 3.761200 3.761200 699 +compress 0 23 3.806662 0.000000 719 +recognit 0 23 3.806662 0.000000 723 +mobil 0 23 3.806662 0.000000 730 +identifi 0 22 3.850148 0.000000 760 +cooper 0 22 3.850148 0.000000 757 +divis 0 21 3.912023 0.000000 803 +path 0 21 3.912023 0.000000 778 +kernel 0 20 3.951244 0.000000 825 +sigmod 1 19 4.007333 4.007333 877 +beij 1 19 4.007333 4.007333 876 +region 0 19 4.007333 0.000000 875 +concentr 0 18 4.060443 0.000000 906 +dimension 0 18 4.060443 0.000000 909 +estim 1 17 4.110874 4.110874 930 +zhang 3 16 4.174387 12.523161 980 +ramakrishnan 1 16 4.174387 4.174387 972 +spars 0 16 4.174387 0.000000 989 +young 0 16 4.174387 0.000000 991 +livni 1 15 4.248495 4.248495 1053 +configur 0 15 4.248495 0.000000 1012 +miron 1 14 4.317488 4.317488 1110 +topolog 1 14 4.317488 4.317488 1089 +finit 0 14 4.317488 0.000000 1106 +joint 1 13 4.382027 4.382027 1130 +canada 1 13 4.382027 4.382027 1158 +conf 0 13 4.382027 0.000000 1181 +raghu 1 12 4.465908 4.465908 1212 +grow 0 12 4.465908 0.000000 1209 +amount 0 12 4.465908 0.000000 1208 +overal 0 12 4.465908 0.000000 1254 +branch 0 11 4.553877 0.000000 1318 +discov 0 9 4.753590 0.000000 1562 +classif 0 9 4.753590 0.000000 1586 +manufactur 0 8 4.875197 0.000000 1634 +dataset 1 7 5.010635 5.010635 1914 +densiti 1 7 5.010635 5.010635 1927 +discoveri 0 7 5.010635 0.000000 1915 +trend 0 7 5.010635 0.000000 1842 +dimens 0 7 5.010635 0.000000 1930 +reduct 0 7 5.010635 0.000000 1877 +financi 0 6 5.164786 0.000000 2197 +invest 0 6 5.164786 0.000000 2153 +ling 1 4 5.568345 5.568345 3045 +exploratori 0 4 5.568345 0.000000 3073 +ijcai 0 4 5.568345 0.000000 2901 +tian 2 3 5.857933 11.715866 3680 +birch 1 2 6.263398 6.263398 6136 +ortool 0 2 6.263398 0.000000 4169 +andmanufactur 0 2 6.263398 0.000000 6244 +collis 0 2 6.263398 0.000000 5956 +jianwei 1 1 6.957497 6.957497 20515 +assistantadvisor 0 1 6.957497 0.000000 20516 +compilerminor 0 1 6.957497 0.000000 20517 +bankingoffic 0 1 6.957497 0.000000 20518 +intereststher 0 1 6.957497 0.000000 20519 +territori 0 1 6.957497 0.000000 20520 +densityanalysi 0 1 6.957497 0.000000 20521 +crowd 0 1 6.957497 0.000000 20522 +dataclassif 0 1 6.957497 0.000000 20523 +knowledgediscoveri 0 1 6.957497 0.000000 20524 +dimensionreduct 0 1 6.957497 0.000000 20525 +findpath 0 1 6.957497 0.000000 20526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..27742649 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +univers 0 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 0 457 0.693147 0.000000 12 +interest 0 384 0.693147 0.000000 11 +engin 0 297 1.098612 0.000000 20 +wisc 0 242 1.386294 0.000000 33 +fall 0 181 1.609438 0.000000 40 +group 0 183 1.609438 0.000000 36 +data 1 170 1.791759 1.791759 49 +madison 1 165 1.791759 1.791759 55 +wisconsin 0 169 1.791759 0.000000 54 +parallel 0 169 1.791759 0.000000 60 +relat 1 139 1.945910 1.945910 68 +object 0 138 1.945910 0.000000 79 +process 0 142 1.945910 0.000000 72 +dayton 0 119 2.079442 0.000000 104 +site 1 106 2.197225 2.197225 119 +assist 0 112 2.197225 0.000000 113 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +west 0 83 2.484907 0.000000 192 +server 1 76 2.564949 2.564949 204 +line 0 75 2.639057 0.000000 231 +new 1 64 2.772589 2.772589 262 +prof 0 64 2.772589 0.000000 273 +detail 0 57 2.890372 0.000000 321 +streetmadison 0 38 3.295837 0.000000 474 +sciencesunivers 0 37 3.332205 0.000000 486 +graph 0 30 3.555348 0.000000 576 +chines 0 29 3.583519 0.000000 595 +mine 1 26 3.688879 3.688879 654 +jeff 0 25 3.737670 0.000000 673 +todai 0 25 3.737670 0.000000 672 +daili 1 24 3.761200 3.761200 706 +yahoo 0 24 3.761200 0.000000 707 +benchmark 0 19 4.007333 0.000000 859 +north 0 19 4.007333 0.000000 873 +sigmod 0 19 4.007333 0.000000 877 +lyco 0 19 4.007333 0.000000 871 +stock 0 16 4.174387 0.000000 1007 +taiwan 0 16 4.174387 0.000000 1006 +club 0 15 4.248495 0.000000 1058 +dbm 1 13 4.382027 4.382027 1136 +excit 0 11 4.553877 0.000000 1329 +surf 0 11 4.553877 0.000000 1301 +naughton 0 10 4.653960 0.000000 1450 +analyt 0 7 5.010635 0.000000 1913 +monei 0 7 5.010635 0.000000 1934 +financi 1 6 5.164786 5.164786 2197 +advis 0 6 5.164786 0.000000 2173 +carolina 0 6 5.164786 0.000000 2142 +maryland 0 6 5.164786 0.000000 2140 +chapel 0 5 5.347108 0.000000 2457 +zhao 1 4 5.568345 5.568345 2699 +ters 0 3 5.857933 0.000000 3297 +pathfind 1 2 6.263398 6.263398 6053 +olap 0 2 6.263398 0.000000 6233 +arbor 0 2 6.263398 0.000000 6235 +molap 0 2 6.263398 0.000000 6217 +yihong 1 1 6.957497 6.957497 20527 +educationb 0 1 6.957497 0.000000 20528 +hillm 0 1 6.957497 0.000000 20529 +wiscosin 0 1 6.957497 0.000000 20530 +datamin 0 1 6.957497 0.000000 20531 +microstrategi 0 1 6.957497 0.000000 20532 +rolap 0 1 6.957497 0.000000 20533 +lombard 0 1 6.957497 0.000000 20534 +kiwi 0 1 6.957497 0.000000 20535 +pgmo 0 1 6.957497 0.000000 20536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..3fb63cf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +inform 0 412 0.693147 0.000000 8 +offic 1 299 1.098612 1.098612 13 +student 0 343 1.098612 0.000000 19 +wisc 1 242 1.386294 1.386294 33 +email 0 220 1.386294 0.000000 29 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +construct 0 139 1.945910 0.000000 82 +dayton 0 119 2.079442 0.000000 104 +homepag 0 93 2.397895 0.000000 148 +street 0 63 2.772589 0.000000 293 +still 0 50 3.044522 0.000000 362 +offer 0 43 3.178054 0.000000 414 +wang 0 21 3.912023 0.000000 790 +johnson 0 13 4.382027 0.000000 1162 +zhewang 0 1 6.957497 0.000000 20537 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..c1cc8851 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 0 457 0.693147 0.000000 12 +work 0 380 0.693147 0.000000 9 +offic 0 299 1.098612 0.000000 13 +project 0 340 1.098612 0.000000 18 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +link 0 247 1.386294 0.000000 24 +paper 0 205 1.609438 0.000000 38 +public 0 202 1.609438 0.000000 43 +oper 0 180 1.609438 0.000000 34 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +madison 0 165 1.791759 0.000000 55 +phone 0 175 1.791759 0.000000 45 +network 0 168 1.791759 0.000000 61 +texa 0 160 1.791759 0.000000 64 +develop 0 174 1.791759 0.000000 53 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +object 0 138 1.945910 0.000000 79 +click 0 142 1.945910 0.000000 78 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +dayton 0 119 2.079442 0.000000 104 +machin 0 129 2.079442 0.000000 95 +tool 0 117 2.079442 0.000000 93 +technolog 0 131 2.079442 0.000000 102 +postscript 0 131 2.079442 0.000000 90 +version 1 113 2.197225 2.197225 122 +assist 0 112 2.197225 0.000000 113 +intern 0 108 2.197225 0.000000 128 +place 0 106 2.197225 0.000000 124 +techniqu 1 99 2.302585 2.302585 138 +memori 0 101 2.302585 0.000000 139 +environ 1 84 2.484907 2.484907 177 +issu 0 78 2.564949 0.000000 211 +orient 0 80 2.564949 0.000000 205 +nation 0 74 2.639057 0.000000 240 +html 0 75 2.639057 0.000000 235 +simul 0 66 2.708050 0.000000 255 +java 0 70 2.708050 0.000000 248 +evalu 0 64 2.772589 0.000000 266 +laboratori 0 63 2.772589 0.000000 292 +juli 0 60 2.833213 0.000000 305 +share 0 59 2.833213 0.000000 304 +think 0 57 2.890372 0.000000 314 +publish 0 57 2.890372 0.000000 326 +sever 0 56 2.890372 0.000000 322 +advisor 0 51 2.995732 0.000000 355 +friend 0 48 3.044522 0.000000 376 +press 0 42 3.218876 0.000000 419 +combin 0 42 3.218876 0.000000 421 +workstat 0 37 3.332205 0.000000 479 +field 0 37 3.332205 0.000000 482 +china 0 37 3.332205 0.000000 487 +jame 0 35 3.401197 0.000000 507 +award 0 34 3.401197 0.000000 523 +particip 0 29 3.583519 0.000000 589 +cluster 0 28 3.610918 0.000000 612 +detect 0 26 3.688879 0.000000 646 +supercomput 0 25 3.737670 0.000000 681 +benchmark 0 19 4.007333 0.000000 859 +predict 0 19 4.007333 0.000000 855 +asplo 0 17 4.110874 0.000000 948 +novel 0 15 4.248495 0.000000 1039 +paradyn 0 9 4.753590 0.000000 1614 +tunnel 0 9 4.753590 0.000000 1615 +andth 0 9 4.753590 0.000000 1481 +antonio 0 6 5.164786 0.000000 2186 +barton 0 5 5.347108 0.000000 2371 +ofparallel 0 5 5.347108 0.000000 2380 +departmentat 0 5 5.347108 0.000000 2513 +anddistribut 0 4 5.568345 0.000000 3031 +bottleneck 0 4 5.568345 0.000000 2769 +fudan 1 3 5.857933 5.857933 3707 +blizzard 0 2 6.263398 0.000000 6226 +levelprogram 0 2 6.263398 0.000000 5452 +zhichen 1 1 6.957497 6.957497 20538 +larusprofessor 0 1 6.957497 0.000000 20539 +millerawardbest 0 1 6.957497 0.000000 20540 +eliminateperform 0 1 6.957497 0.000000 20541 +toolwith 0 1 6.957497 0.000000 20542 +wisconsinwind 0 1 6.957497 0.000000 20543 +interestprogram 0 1 6.957497 0.000000 20544 +andimcrement 0 1 6.957497 0.000000 20545 +programjourn 0 1 6.957497 0.000000 20546 +researchchines 0 1 6.957497 0.000000 20547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..f1445fe0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +home 0 672 0.000000 0.000000 1 +page 0 705 0.000000 0.000000 3 +comput 0 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +depart 0 457 0.693147 0.000000 12 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 0 119 2.079442 0.000000 104 +pictur 0 89 2.397895 0.000000 160 +west 0 83 2.484907 0.000000 192 +name 0 72 2.639057 0.000000 220 +street 0 63 2.772589 0.000000 293 +taken 0 31 3.496508 0.000000 555 +chen 0 21 3.912023 0.000000 791 +wang 0 21 3.912023 0.000000 790 +zhang 1 16 4.174387 4.174387 980 +tsinghua 0 13 4.382027 0.000000 1195 +hello 0 10 4.653960 0.000000 1407 +invit 0 10 4.653960 0.000000 1428 +restaur 0 6 5.164786 0.000000 2230 +theth 0 5 5.347108 0.000000 2325 +tong 0 3 5.857933 0.000000 3258 +supper 0 1 6.957497 0.000000 20548 +weihai 0 1 6.957497 0.000000 20549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..61b49ea9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +student 0 343 1.098612 0.000000 19 +number 0 130 2.079442 0.000000 97 +pictur 0 89 2.397895 0.000000 160 +visitor 0 49 3.044522 0.000000 371 +thank 0 23 3.806662 0.000000 721 +stop 0 17 4.110874 0.000000 942 +poland 0 3 5.857933 0.000000 3665 +inc 0 2 6.263398 0.000000 5914 +krzysztof 0 1 6.957497 0.000000 20550 +zmudzinskikrzysztof 0 1 6.957497 0.000000 20551 +zmudzinskispin 0 1 6.957497 0.000000 20552 +pole 0 1 6.957497 0.000000 20553 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..0d3c7ab7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 0 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 0 384 0.693147 0.000000 11 +work 0 380 0.693147 0.000000 9 +program 0 374 0.693147 0.000000 7 +engin 1 297 1.098612 1.098612 20 +us 0 329 1.098612 0.000000 16 +last 0 314 1.098612 0.000000 14 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +modifi 0 178 1.609438 0.000000 35 +data 1 170 1.791759 1.791759 49 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +algorithm 0 162 1.791759 0.000000 57 +implement 0 152 1.791759 0.000000 52 +address 0 170 1.791759 0.000000 62 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +hall 0 146 1.945910 0.000000 65 +architectur 0 139 1.945910 0.000000 77 +model 0 145 1.945910 0.000000 69 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +analysi 0 124 2.079442 0.000000 98 +tool 0 117 2.079442 0.000000 93 +number 0 130 2.079442 0.000000 97 +make 1 111 2.197225 2.197225 120 +techniqu 0 99 2.302585 0.000000 138 +center 0 88 2.397895 0.000000 158 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +educ 0 86 2.484907 0.000000 191 +help 0 83 2.484907 0.000000 175 +involv 0 71 2.639057 0.000000 227 +effici 0 73 2.639057 0.000000 233 +logic 0 71 2.639057 0.000000 230 +servic 0 72 2.639057 0.000000 236 +test 2 66 2.708050 5.416100 252 +integr 0 67 2.708050 0.000000 245 +simul 0 66 2.708050 0.000000 255 +goal 0 66 2.708050 0.000000 250 +view 0 70 2.708050 0.000000 254 +laboratori 1 63 2.772589 2.772589 292 +dept 0 64 2.772589 0.000000 291 +colleg 1 61 2.833213 2.833213 300 +best 0 59 2.833213 0.000000 299 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +extens 0 53 2.944439 0.000000 340 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +hardwar 0 51 2.995732 0.000000 350 +much 0 52 2.995732 0.000000 349 +tabl 0 51 2.995732 0.000000 346 +anoth 0 45 3.135494 0.000000 408 +fridai 0 44 3.135494 0.000000 390 +combin 0 42 3.218876 0.000000 421 +littl 1 39 3.258097 3.258097 454 +programm 0 39 3.258097 0.000000 445 +continu 0 39 3.258097 0.000000 448 +credit 0 38 3.295837 0.000000 460 +toler 0 33 3.433987 0.000000 533 +fault 1 32 3.465736 3.465736 547 +profil 0 30 3.555348 0.000000 581 +built 1 29 3.583519 3.583519 592 +arrai 0 27 3.637586 0.000000 627 +enhanc 0 26 3.688879 0.000000 644 +reliabl 0 25 3.737670 0.000000 674 +wai 0 25 3.737670 0.000000 662 +compress 1 23 3.806662 3.806662 719 +self 1 22 3.850148 3.850148 761 +vlsi 1 21 3.912023 3.912023 795 +hous 0 21 3.912023 0.000000 801 +facil 0 20 3.951244 0.000000 814 +concentr 0 18 4.060443 0.000000 906 +engineeringunivers 0 17 4.110874 0.000000 959 +modif 0 17 4.110874 0.000000 913 +monitor 0 17 4.110874 0.000000 941 +normal 0 16 4.174387 0.000000 995 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +circuit 1 13 4.382027 4.382027 1131 +carri 0 13 4.382027 0.000000 1152 +station 0 13 4.382027 0.000000 1157 +engr 1 10 4.653960 4.653960 1427 +penalti 0 10 4.653960 0.000000 1405 +iowa 0 7 5.010635 0.000000 1971 +compact 0 7 5.010635 0.000000 1907 +asystem 0 4 5.568345 0.000000 2612 +termin 0 4 5.568345 0.000000 2852 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +saluja 1 3 5.857933 5.857933 3104 +eduportrait 0 3 5.857933 0.000000 4039 +fabric 0 3 5.857933 0.000000 3607 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +testabl 1 2 6.263398 6.263398 5606 +kewal 1 2 6.263398 6.263398 4072 +drivemadison 0 2 6.263398 0.000000 6245 +andsequenti 0 2 6.263398 0.000000 4532 +salujaprofessor 0 1 6.957497 0.000000 20554 +jpgdepartmentselectr 0 1 6.957497 0.000000 20555 +engineeringcomput 0 1 6.957497 0.000000 20556 +interestsdesign 0 1 6.957497 0.000000 20557 +testableand 0 1 6.957497 0.000000 20558 +thisarea 0 1 6.957497 0.000000 20559 +theresearch 0 1 6.957497 0.000000 20560 +testgener 0 1 6.957497 0.000000 20561 +inself 0 1 6.957497 0.000000 20562 +andfault 0 1 6.957497 0.000000 20563 +methodsapplic 0 1 6.957497 0.000000 20564 +testenviron 0 1 6.957497 0.000000 20565 +regularstructur 0 1 6.957497 0.000000 20566 +ram 0 1 6.957497 0.000000 20567 +areinvestig 0 1 6.957497 0.000000 20568 +inhardwar 0 1 6.957497 0.000000 20569 +projectw 0 1 6.957497 0.000000 20570 +thatth 0 1 6.957497 0.000000 20571 +noimpact 0 1 6.957497 0.000000 20572 +digitalsystem 0 1 6.957497 0.000000 20573 +withcolor 0 1 6.957497 0.000000 20574 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..a37ba02a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 0 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +time 0 293 1.098612 0.000000 17 +cours 0 273 1.098612 0.000000 15 +last 0 314 1.098612 0.000000 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 0 180 1.609438 0.000000 34 +modifi 0 178 1.609438 0.000000 35 +wisconsin 1 169 1.791759 1.791759 54 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +madison 0 165 1.791759 0.000000 55 +data 0 170 1.791759 0.000000 49 +base 0 165 1.791759 0.000000 50 +address 0 170 1.791759 0.000000 62 +perform 1 143 1.945910 1.945910 74 +professor 0 137 1.945910 0.000000 76 +architectur 0 139 1.945910 0.000000 77 +construct 0 139 1.945910 0.000000 82 +process 0 142 1.945910 0.000000 72 +support 0 132 1.945910 0.000000 83 +machin 1 129 2.079442 2.079442 95 +high 0 130 2.079442 0.000000 101 +studi 0 120 2.079442 0.000000 91 +schedul 0 119 2.079442 0.000000 85 +theori 0 111 2.197225 0.000000 127 +well 0 109 2.197225 0.000000 121 +teach 0 108 2.197225 0.000000 112 +advanc 0 99 2.302585 0.000000 130 +center 1 88 2.397895 2.397895 158 +real 0 93 2.397895 0.000000 144 +associ 0 93 2.397895 0.000000 151 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +control 2 82 2.484907 4.969814 164 +build 0 85 2.484907 0.000000 184 +larg 0 82 2.484907 0.000000 168 +help 0 83 2.484907 0.000000 175 +optim 0 79 2.564949 0.000000 197 +method 0 80 2.564949 0.000000 213 +involv 0 71 2.639057 0.000000 227 +materi 0 75 2.639057 0.000000 221 +servic 0 72 2.639057 0.000000 236 +tuesdai 0 73 2.639057 0.000000 219 +integr 1 67 2.708050 2.708050 245 +test 0 66 2.708050 0.000000 252 +view 0 70 2.708050 0.000000 254 +guid 0 63 2.772589 0.000000 267 +complex 0 64 2.772589 0.000000 269 +evalu 0 64 2.772589 0.000000 266 +dept 0 64 2.772589 0.000000 291 +colleg 1 61 2.833213 2.833213 300 +automat 0 61 2.833213 0.000000 306 +best 0 59 2.833213 0.000000 299 +space 1 57 2.890372 2.890372 310 +sever 0 56 2.890372 0.000000 322 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +tabl 0 51 2.995732 0.000000 346 +autom 1 41 3.218876 3.218876 434 +author 0 39 3.258097 0.000000 450 +close 0 38 3.295837 0.000000 465 +industri 0 38 3.295837 0.000000 464 +credit 0 38 3.295837 0.000000 460 +cost 0 37 3.332205 0.000000 480 +robot 1 36 3.367296 3.367296 497 +product 1 33 3.433987 3.433987 527 +toler 0 33 3.433987 0.000000 533 +human 1 32 3.465736 3.465736 546 +fault 0 32 3.465736 0.000000 547 +profil 0 30 3.555348 0.000000 581 +hope 0 28 3.610918 0.000000 610 +scale 0 28 3.610918 0.000000 613 +experiment 1 26 3.688879 3.688879 645 +handl 0 24 3.761200 0.000000 685 +highli 0 23 3.806662 0.000000 725 +self 0 22 3.850148 0.000000 761 +finish 0 22 3.850148 0.000000 748 +reduc 0 22 3.850148 0.000000 759 +properti 0 22 3.850148 0.000000 749 +director 0 22 3.850148 0.000000 767 +flexibl 0 21 3.912023 0.000000 792 +fund 0 21 3.912023 0.000000 805 +increas 0 20 3.951244 0.000000 829 +feedback 1 19 4.007333 4.007333 854 +engineeringunivers 0 17 4.110874 0.000000 959 +precis 0 15 4.248495 0.000000 1023 +hierarch 0 15 4.248495 0.000000 1018 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +incorpor 0 13 4.382027 0.000000 1163 +nasa 0 13 4.382027 0.000000 1188 +carri 0 13 4.382027 0.000000 1152 +engr 1 10 4.653960 4.653960 1427 +weld 0 9 4.753590 0.000000 1538 +factor 0 9 4.753590 0.000000 1544 +manufactur 2 8 4.875197 9.750394 1634 +sensor 1 7 5.010635 5.010635 1920 +explain 0 7 5.010635 0.000000 1816 +actuat 0 5 5.347108 0.000000 2442 +neil 1 4 5.568345 5.568345 2841 +fulli 0 4 5.568345 0.000000 2986 +emphas 0 4 5.568345 0.000000 2672 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +eduportrait 0 3 5.857933 0.000000 4039 +aerospac 0 3 5.857933 0.000000 3555 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +duffi 2 2 6.263398 12.526796 4845 +telerobot 1 2 6.263398 6.263398 4847 +drivemadison 0 2 6.263398 0.000000 6245 +departmentsmechan 0 1 6.957497 0.000000 20575 +engineeringeducationb 0 1 6.957497 0.000000 20576 +madisonm 0 1 6.957497 0.000000 20577 +madisonphd 0 1 6.957497 0.000000 20578 +madisonresearch 0 1 6.957497 0.000000 20579 +interestsrobot 0 1 6.957497 0.000000 20580 +micromechanismscent 0 1 6.957497 0.000000 20581 +consortiamanufactur 0 1 6.957497 0.000000 20582 +programwisconsin 0 1 6.957497 0.000000 20583 +roboticsprofessor 0 1 6.957497 0.000000 20584 +inspect 0 1 6.957497 0.000000 20585 +mold 0 1 6.957497 0.000000 20586 +rework 0 1 6.957497 0.000000 20587 +agricultur 0 1 6.957497 0.000000 20588 +tactil 0 1 6.957497 0.000000 20589 +sensori 0 1 6.957497 0.000000 20590 +fatigu 0 1 6.957497 0.000000 20591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..8fb4cbba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 0 640 0.000000 0.000000 4 +page 0 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 0 384 0.693147 0.000000 11 +inform 0 412 0.693147 0.000000 8 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 0 340 1.098612 0.000000 18 +last 0 314 1.098612 0.000000 14 +design 2 213 1.386294 2.772588 25 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +graduat 0 215 1.386294 0.000000 31 +gener 0 220 1.386294 0.000000 27 +public 0 202 1.609438 0.000000 43 +includ 0 208 1.609438 0.000000 42 +modifi 0 178 1.609438 0.000000 35 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +wisconsin 0 169 1.791759 0.000000 54 +madison 0 165 1.791759 0.000000 55 +applic 0 170 1.791759 0.000000 56 +recent 0 167 1.791759 0.000000 58 +address 0 170 1.791759 0.000000 62 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +analysi 1 124 2.079442 2.079442 98 +studi 0 120 2.079442 0.000000 91 +tool 0 117 2.079442 0.000000 93 +specif 1 106 2.197225 2.197225 106 +assist 0 112 2.197225 0.000000 113 +intern 0 108 2.197225 0.000000 128 +structur 0 106 2.197225 0.000000 105 +make 0 111 2.197225 0.000000 120 +part 1 98 2.302585 2.302585 129 +techniqu 0 99 2.302585 0.000000 138 +need 0 98 2.302585 0.000000 135 +center 1 88 2.397895 2.397895 158 +select 0 91 2.397895 0.000000 154 +real 0 93 2.397895 0.000000 144 +graphic 0 90 2.397895 0.000000 147 +commun 0 95 2.397895 0.000000 157 +present 0 91 2.397895 0.000000 145 +call 0 91 2.397895 0.000000 153 +search 0 95 2.397895 0.000000 155 +comment 0 93 2.397895 0.000000 146 +journal 0 83 2.484907 0.000000 183 +activ 0 84 2.484907 0.000000 182 +contain 0 81 2.484907 0.000000 174 +help 0 83 2.484907 0.000000 175 +april 1 77 2.564949 2.564949 196 +appli 0 71 2.639057 0.000000 226 +servic 0 72 2.639057 0.000000 236 +simul 1 66 2.708050 2.708050 255 +practic 0 70 2.708050 0.000000 246 +thursdai 0 70 2.708050 0.000000 241 +view 0 70 2.708050 0.000000 254 +function 1 62 2.772589 2.772589 275 +foundat 0 62 2.772589 0.000000 286 +januari 0 62 2.772589 0.000000 264 +creat 0 63 2.772589 0.000000 277 +dept 0 64 2.772589 0.000000 291 +colleg 1 61 2.833213 2.833213 300 +simpl 0 60 2.833213 0.000000 298 +best 0 59 2.833213 0.000000 299 +space 0 57 2.890372 0.000000 310 +major 0 56 2.890372 0.000000 315 +index 0 56 2.890372 0.000000 309 +browser 0 56 2.890372 0.000000 313 +suggest 0 53 2.944439 0.000000 331 +investig 1 51 2.995732 2.995732 353 +maintain 0 51 2.995732 0.000000 342 +tabl 0 51 2.995732 0.000000 346 +physic 2 47 3.091042 6.182084 377 +california 0 46 3.091042 0.000000 388 +possibl 0 47 3.091042 0.000000 378 +algebra 0 45 3.135494 0.000000 394 +mechan 2 43 3.178054 6.356108 416 +term 0 43 3.178054 0.000000 411 +http 0 41 3.218876 0.000000 420 +york 0 41 3.218876 0.000000 435 +autom 0 41 3.218876 0.000000 434 +form 1 39 3.258097 3.258097 443 +transact 0 39 3.258097 0.000000 438 +industri 0 38 3.295837 0.000000 464 +credit 0 38 3.295837 0.000000 460 +formal 1 37 3.332205 3.332205 478 +represent 1 35 3.401197 3.401197 512 +award 1 34 3.401197 3.401197 523 +product 0 33 3.433987 0.000000 527 +collabor 0 32 3.465736 0.000000 543 +focu 0 30 3.555348 0.000000 571 +profil 0 30 3.555348 0.000000 581 +manipul 0 27 3.637586 0.000000 624 +repres 0 26 3.688879 0.000000 656 +consist 0 26 3.688879 0.000000 651 +effort 0 26 3.688879 0.000000 652 +reliabl 0 25 3.737670 0.000000 674 +todai 0 25 3.737670 0.000000 672 +aspect 0 25 3.737670 0.000000 663 +fellow 0 24 3.761200 0.000000 701 +famili 0 23 3.806662 0.000000 735 +geometri 1 22 3.850148 3.850148 752 +deal 0 22 3.850148 0.000000 736 +thu 0 21 3.912023 0.000000 773 +basi 0 20 3.951244 0.000000 828 +geometr 1 19 4.007333 4.007333 852 +separ 0 19 4.007333 0.000000 844 +behavior 1 18 4.060443 4.060443 881 +aid 1 18 4.060443 4.060443 904 +engineeringunivers 0 17 4.110874 0.000000 959 +analyz 0 17 4.110874 0.000000 925 +seek 0 17 4.110874 0.000000 954 +novel 0 15 4.248495 0.000000 1039 +photograph 0 15 4.248495 0.000000 1056 +webmast 0 15 4.248495 0.000000 1045 +topolog 0 14 4.317488 0.000000 1089 +convert 0 13 4.382027 0.000000 1122 +cannot 0 13 4.382027 0.000000 1144 +discret 0 13 4.382027 0.000000 1165 +career 0 12 4.465908 0.000000 1287 +captur 0 12 4.465908 0.000000 1232 +abil 0 11 4.553877 0.000000 1341 +engr 1 10 4.653960 4.653960 1427 +decomposit 0 10 4.653960 0.000000 1439 +relationship 0 10 4.653960 0.000000 1383 +facilit 0 10 4.653960 0.000000 1412 +mainten 0 9 4.753590 0.000000 1543 +establish 0 9 4.753590 0.000000 1532 +shapiro 1 8 4.875197 4.875197 1686 +manufactur 1 8 4.875197 4.875197 1634 +combinatori 1 8 4.875197 4.875197 1629 +competit 1 8 4.875197 4.875197 1635 +convers 0 8 4.875197 0.000000 1673 +boundari 0 7 5.010635 0.000000 1929 +appar 0 7 5.010635 0.000000 1958 +ongo 0 6 5.164786 0.000000 2215 +lack 0 6 5.164786 0.000000 1994 +solid 1 5 5.347108 5.347108 2255 +rigid 0 5 5.347108 0.000000 2432 +chain 1 4 5.568345 5.568345 2712 +phenomena 0 4 5.568345 0.000000 2962 +languagesand 0 4 5.568345 0.000000 3071 +fountain 0 4 5.568345 0.000000 3069 +eduupd 0 4 5.568345 0.000000 3056 +systemat 1 3 5.857933 5.857933 3781 +eduportrait 0 3 5.857933 0.000000 4039 +motor 0 3 5.857933 0.000000 3909 +fabric 0 3 5.857933 0.000000 3607 +consortia 0 3 5.857933 0.000000 4040 +cdtthi 0 3 5.857933 0.000000 4041 +artifact 1 2 6.263398 6.263398 5346 +avenuemadison 0 2 6.263398 0.000000 4842 +interestscomput 0 2 6.263398 0.000000 6113 +palmer 0 2 6.263398 0.000000 5453 +methodsand 0 2 6.263398 0.000000 5779 +amajor 0 2 6.263398 0.000000 5343 +designand 0 2 6.263398 0.000000 6100 +andmanufactur 0 2 6.263398 0.000000 6244 +tomanufactur 0 2 6.263398 0.000000 6016 +ofnew 0 2 6.263398 0.000000 5881 +vadim 1 1 6.957497 6.957497 20592 +vshapiro 0 1 6.957497 0.000000 20593 +jpgurl 0 1 6.957497 0.000000 20594 +departmentscomput 0 1 6.957497 0.000000 20595 +sciencemechan 0 1 6.957497 0.000000 20596 +engineeringeducationba 0 1 6.957497 0.000000 20597 +universitym 0 1 6.957497 0.000000 20598 +angelesm 0 1 6.957497 0.000000 20599 +universityphd 0 1 6.957497 0.000000 20600 +univeristyresearch 0 1 6.957497 0.000000 20601 +automationcent 0 1 6.957497 0.000000 20602 +consortiamathemat 0 1 6.957497 0.000000 20603 +programmanufactur 0 1 6.957497 0.000000 20604 +programspati 0 1 6.957497 0.000000 20605 +laboratoryselect 0 1 6.957497 0.000000 20606 +honorsn 0 1 6.957497 0.000000 20607 +vossler 0 1 6.957497 0.000000 20608 +betweengeometri 0 1 6.957497 0.000000 20609 +bemodel 0 1 6.957497 0.000000 20610 +manufacturedbas 0 1 6.957497 0.000000 20611 +ofdistinct 0 1 6.957497 0.000000 20612 +technologicalbarri 0 1 6.957497 0.000000 20613 +undermin 0 1 6.957497 0.000000 20614 +commercialgeometr 0 1 6.957497 0.000000 20615 +eliminatingambigu 0 1 6.957497 0.000000 20616 +ofparametr 0 1 6.957497 0.000000 20617 +bedescrib 0 1 6.957497 0.000000 20618 +interactingprimit 0 1 6.957497 0.000000 20619 +roadblock 0 1 6.957497 0.000000 20620 +withtheoret 0 1 6.957497 0.000000 20621 +smoothintegr 0 1 6.957497 0.000000 20622 +thedesir 0 1 6.957497 0.000000 20623 +tounifi 0 1 6.957497 0.000000 20624 +theseand 0 1 6.957497 0.000000 20625 +physicalobject 0 1 6.957497 0.000000 20626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..214fde4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 0 571 0.000000 0.000000 5 +depart 0 457 0.693147 0.000000 12 +system 0 443 0.693147 0.000000 6 +inform 0 412 0.693147 0.000000 8 +student 1 343 1.098612 1.098612 19 +current 0 284 1.098612 0.000000 21 +us 0 329 1.098612 0.000000 16 +also 0 259 1.386294 0.000000 28 +list 1 201 1.609438 1.609438 39 +group 0 183 1.609438 0.000000 36 +texa 0 160 1.791759 0.000000 64 +avail 0 169 1.791759 0.000000 48 +develop 0 174 1.791759 0.000000 53 +contact 0 153 1.791759 0.000000 59 +first 1 140 1.945910 1.945910 71 +problem 0 147 1.945910 0.000000 75 +relat 0 139 1.945910 0.000000 68 +report 1 131 2.079442 2.079442 92 +technolog 0 131 2.079442 0.000000 102 +mathemat 1 108 2.197225 2.197225 123 +site 0 106 2.197225 0.000000 119 +theori 0 111 2.197225 0.000000 127 +part 0 98 2.302585 0.000000 129 +present 0 91 2.397895 0.000000 145 +method 1 80 2.564949 2.564949 213 +want 0 79 2.564949 0.000000 199 +logic 1 71 2.639057 2.639057 230 +appli 0 71 2.639057 0.000000 226 +order 1 69 2.708050 2.708050 249 +knowledg 0 67 2.708050 0.000000 243 +previou 0 62 2.772589 0.000000 290 +improv 0 62 2.772589 0.000000 289 +descript 0 64 2.772589 0.000000 271 +index 0 56 2.890372 0.000000 309 +faculti 0 56 2.890372 0.000000 325 +variou 0 56 2.890372 0.000000 317 +visitor 0 49 3.044522 0.000000 371 +electron 0 47 3.091042 0.000000 379 +done 0 47 3.091042 0.000000 381 +natur 0 44 3.135494 0.000000 406 +autom 1 41 3.218876 3.218876 434 +past 0 42 3.218876 0.000000 428 +continu 0 39 3.258097 0.000000 448 +late 0 40 3.258097 0.000000 439 +tech 1 35 3.401197 3.401197 515 +ad 0 32 3.465736 0.000000 544 +produc 0 30 3.555348 0.000000 572 +computersci 0 30 3.555348 0.000000 562 +robert 0 30 3.555348 0.000000 567 +profil 0 30 3.555348 0.000000 581 +intend 0 28 3.610918 0.000000 599 +higher 0 24 3.761200 0.000000 690 +seri 0 24 3.761200 0.000000 708 +other 0 24 3.761200 0.000000 697 +proof 1 23 3.806662 3.806662 720 +geometri 0 22 3.850148 0.000000 752 +theorem 1 21 3.912023 3.912023 786 +prove 1 19 4.007333 4.007333 848 +feedback 0 19 4.007333 0.000000 854 +primarili 0 13 4.382027 0.000000 1185 +deduct 0 12 4.465908 0.000000 1236 +benjamin 0 11 4.553877 0.000000 1296 +incomplet 1 9 4.753590 4.753590 1575 +ataustin 0 9 4.753590 0.000000 1610 +prover 1 8 4.875197 4.875197 1653 +boyer 0 6 5.164786 0.000000 2013 +inequ 0 6 5.164786 0.000000 2113 +groupth 1 5 5.347108 5.347108 2549 +bledso 1 4 5.568345 5.568345 2999 +systemsand 0 4 5.568345 0.000000 2804 +chou 0 4 5.568345 0.000000 3033 +analog 0 4 5.568345 0.000000 2875 +feng 0 3 5.857933 0.000000 3300 +woodi 1 2 6.263398 6.263398 5459 +hine 1 2 6.263398 6.263398 4475 +intent 0 2 6.263398 0.000000 5768 +herei 0 2 6.263398 0.000000 6187 +hein 1 1 6.957497 6.957497 20627 +borel 1 1 6.957497 6.957497 20628 +groupautom 0 1 6.957497 0.000000 20629 +techreport 0 1 6.957497 0.000000 20630 +reportseri 0 1 6.957497 0.000000 20631 +grouplarri 0 1 6.957497 0.000000 20632 +hinesmarti 0 1 6.957497 0.000000 20633 +mayberrybenjamin 0 1 6.957497 0.000000 20634 +shultsalumniprevi 0 1 6.957497 0.000000 20635 +robertboyerj 0 1 6.957497 0.000000 20636 +strother 0 1 6.957497 0.000000 20637 +moorethi 0 1 6.957497 0.000000 20638 +collaboratorswhat 0 1 6.957497 0.000000 20639 +implyth 0 1 6.957497 0.000000 20640 +proverstrivelarri 0 1 6.957497 0.000000 20641 +struvelarri 0 1 6.957497 0.000000 20642 +proverand 0 1 6.957497 0.000000 20643 +theretoinclud 0 1 6.957497 0.000000 20644 +mcphee 0 1 6.957497 0.000000 20645 +theoryimplement 0 1 6.957497 0.000000 20646 +theoremprecondit 0 1 6.957497 0.000000 20647 +proverbledso 0 1 6.957497 0.000000 20648 +theoremnqthmboy 0 1 6.957497 0.000000 20649 +andmoor 0 1 6.957497 0.000000 20650 +clinc 0 1 6.957497 0.000000 20651 +iprshult 0 1 6.957497 0.000000 20652 +relatedlinksdo 0 1 6.957497 0.000000 20653 +shult 0 1 6.957497 0.000000 20654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..b81f8eea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 0 47 3.091042 0.000000 382 +perman 0 11 4.553877 0.000000 1372 +moveddocu 0 2 6.263398 0.000000 6246 +movedthi 0 2 6.263398 0.000000 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..b81f8eea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_log/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 0 47 3.091042 0.000000 382 +perman 0 11 4.553877 0.000000 1372 +moveddocu 0 2 6.263398 0.000000 6246 +movedthi 0 2 6.263398 0.000000 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..d3ea31d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 3 299 1.098612 3.295836 13 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +cornel 3 215 1.386294 4.158882 23 +link 1 247 1.386294 1.386294 24 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +phone 3 175 1.791759 5.375277 45 +hour 3 165 1.791759 5.375277 46 +hall 3 146 1.945910 5.837730 65 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +structur 1 106 2.197225 2.197225 105 +solut 7 82 2.484907 17.394349 162 +homework 5 79 2.564949 12.824745 193 +dynam 1 76 2.564949 2.564949 194 +upson 3 71 2.639057 7.917171 218 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 2 70 2.708050 5.416100 241 +wednesdai 2 64 2.772589 5.545178 261 +new 1 64 2.772589 2.772589 262 +unix 1 58 2.890372 2.890372 308 +fridai 1 44 3.135494 3.135494 390 +static 1 27 3.637586 3.637586 619 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +prelim 2 12 4.465908 8.931816 1201 +huang 1 12 4.465908 4.465908 1202 +systemsc 1 11 4.553877 4.553877 1293 +cheng 1 10 4.653960 4.653960 1381 +lili 1 5 5.347108 5.347108 2240 +filesystem 1 4 5.568345 5.568345 2587 +groupcours 1 3 5.857933 5.857933 3092 +ychuang 1 3 5.857933 5.857933 3093 +budiu 2 2 6.263398 12.526796 4042 +systemkenneth 1 2 6.263398 6.263398 4043 +birmanc 1 2 6.263398 6.263398 4044 +syllabuslectur 1 2 6.263398 6.263398 4045 +taslili 1 2 6.263398 6.263398 4046 +mihai 1 2 6.263398 6.263398 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html new file mode 100644 index 00000000..f9ce9742 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Fall-95^CS415^CS415.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +last 2 314 1.098612 2.197224 14 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +oper 5 180 1.609438 8.047190 34 +group 1 183 1.609438 1.609438 36 +schedul 1 119 2.079442 2.079442 85 +specif 1 106 2.197225 2.197225 106 +question 1 91 2.397895 2.397895 141 +chang 2 82 2.484907 4.969814 163 +exampl 1 77 2.564949 2.564949 195 +window 1 68 2.708050 2.708050 242 +handout 2 64 2.772589 5.545178 263 +maintain 1 51 2.995732 2.995732 342 +format 2 48 3.044522 6.089044 356 +principl 1 48 3.044522 3.044522 357 +answer 1 45 3.135494 3.135494 391 +tutori 1 39 3.258097 3.258097 437 +hand 1 37 3.332205 3.332205 475 +symbol 1 27 3.637586 3.637586 620 +displai 1 23 3.806662 3.806662 712 +chip 3 21 3.912023 11.736069 770 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +configur 1 15 4.248495 4.248495 1012 +systemsc 1 11 4.553877 4.553877 1293 +correspond 1 10 4.653960 4.653960 1382 +phase 1 6 5.164786 5.164786 1977 +hoca 2 5 5.347108 10.694216 2241 +lorenzo 1 4 5.568345 5.568345 2588 +penn 1 3 5.857933 5.857933 3094 +alvisi 1 3 5.857933 5.857933 3095 +consol 2 2 6.263398 12.526796 4048 +systemsselect 1 2 6.263398 6.263398 4049 +postcript 1 2 6.263398 6.263398 4050 +postcriptdocu 1 1 6.957497 6.957497 6248 +hocacours 1 1 6.957497 6.957497 6249 +broccoli 1 1 6.957497 6.957497 6250 +fileth 1 1 6.957497 6.957497 6251 +systemth 1 1 6.957497 6.957497 6252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html new file mode 100644 index 00000000..85da584c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^cs.cornell.edu^Info^Courses^Spring-96^CS432^cs432.html @@ -0,0 +1,241 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 8 412 0.693147 5.545176 8 +system 5 443 0.693147 3.465735 6 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +cours 8 273 1.098612 8.788896 15 +time 2 293 1.098612 2.197224 17 +offic 2 299 1.098612 2.197224 13 +last 2 314 1.098612 2.197224 14 +cornel 5 215 1.386294 6.931470 23 +design 4 213 1.386294 5.545176 25 +mail 2 238 1.386294 2.772588 22 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +class 4 199 1.609438 6.437752 37 +group 4 183 1.609438 6.437752 36 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +read 17 154 1.791759 30.459903 47 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +relat 9 139 1.945910 17.513190 68 +model 7 145 1.945910 13.621370 69 +file 2 132 1.945910 3.891820 70 +first 2 140 1.945910 3.891820 71 +process 2 142 1.945910 3.891820 72 +note 1 142 1.945910 1.945910 67 +databas 11 122 2.079442 22.873862 86 +introduct 2 126 2.079442 4.158884 87 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +instructor 3 108 2.197225 6.591675 107 +final 3 116 2.197225 6.591675 108 +structur 2 106 2.197225 4.394450 105 +send 2 114 2.197225 4.394450 109 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +pleas 1 113 2.197225 2.197225 114 +part 3 98 2.302585 6.907755 129 +advanc 2 99 2.302585 4.605170 130 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +grade 8 90 2.397895 19.183160 142 +follow 2 92 2.397895 4.795790 143 +control 3 82 2.484907 7.454721 164 +member 3 84 2.484907 7.454721 165 +solut 2 82 2.484907 4.969814 162 +second 2 81 2.484907 4.969814 166 +requir 2 81 2.484907 4.969814 167 +larg 1 82 2.484907 2.484907 168 +exam 1 86 2.484907 2.484907 169 +learn 1 86 2.484907 2.484907 170 +homework 24 79 2.564949 61.558776 193 +april 9 77 2.564949 23.084541 196 +optim 2 79 2.564949 5.129898 197 +know 1 80 2.564949 2.564949 198 +exampl 1 77 2.564949 2.564949 195 +want 1 79 2.564949 2.564949 199 +tuesdai 10 73 2.639057 26.390570 219 +name 4 72 2.639057 10.556228 220 +upson 2 71 2.639057 5.278114 218 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +thursdai 8 70 2.708050 21.664400 241 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +integr 1 67 2.708050 2.708050 245 +januari 3 62 2.772589 8.317767 264 +organ 1 65 2.772589 2.772589 265 +evalu 1 64 2.772589 2.772589 266 +wednesdai 1 64 2.772589 2.772589 261 +guid 1 63 2.772589 2.772589 267 +march 6 61 2.833213 16.999278 295 +type 1 61 2.833213 2.833213 296 +back 1 60 2.833213 2.833213 297 +index 3 56 2.890372 8.671116 309 +space 2 57 2.890372 5.780744 310 +summer 1 56 2.890372 2.890372 311 +semest 1 58 2.890372 2.890372 312 +februari 9 54 2.944439 26.499951 328 +cover 6 55 2.944439 17.666634 329 +three 1 54 2.944439 2.944439 330 +week 2 52 2.995732 5.991464 343 +date 1 51 2.995732 2.995732 344 +appoint 2 49 3.044522 6.089044 358 +principl 1 48 3.044522 3.044522 357 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +midterm 1 45 3.135494 3.135494 392 +even 1 45 3.135494 3.135494 393 +algebra 1 45 3.135494 3.135494 394 +term 2 43 3.178054 6.356108 411 +third 1 43 3.178054 3.178054 412 +edit 2 42 3.218876 6.437752 418 +press 1 42 3.218876 3.218876 419 +transact 2 39 3.258097 6.516194 438 +late 1 40 3.258097 3.258097 439 +submit 1 39 3.258097 3.258097 440 +credit 1 38 3.295837 3.295837 460 +concurr 3 34 3.401197 10.203591 501 +return 2 34 3.401197 6.802394 502 +queri 3 33 3.433987 10.301961 524 +within 1 33 3.433987 3.433987 525 +chapter 18 32 3.465736 62.383248 536 +concept 1 32 3.465736 3.465736 537 +computersci 1 30 3.555348 3.555348 562 +hard 1 30 3.555348 3.555348 563 +domain 1 30 3.555348 3.555348 564 +retriev 6 27 3.637586 21.825516 621 +though 1 27 3.637586 3.637586 622 +request 1 26 3.688879 3.688879 635 +constraint 1 26 3.688879 3.688879 636 +relev 1 26 3.688879 3.688879 637 +fundament 2 25 3.737670 7.475340 661 +deal 1 22 3.850148 3.850148 736 +recommend 1 22 3.850148 3.850148 737 +sort 1 22 3.850148 3.850148 738 +tent 1 22 3.850148 3.850148 739 +similar 1 21 3.912023 3.912023 771 +reserv 2 20 3.951244 7.902488 808 +nice 1 20 3.951244 3.951244 809 +minut 1 20 3.951244 3.951244 810 +entir 1 20 3.951244 3.951244 811 +break 1 20 3.951244 3.951244 812 +five 1 19 4.007333 4.007333 841 +along 1 18 4.060443 4.060443 878 +accept 1 18 4.060443 4.060443 879 +encourag 1 18 4.060443 4.060443 880 +vector 3 16 4.174387 12.523161 961 +permit 1 16 4.174387 4.174387 962 +alreadi 1 16 4.174387 4.174387 963 +pagec 1 15 4.248495 4.248495 1011 +indic 1 15 4.248495 4.248495 1013 +rank 1 14 4.317488 4.317488 1063 +latex 1 14 4.317488 4.317488 1064 +calculu 2 12 4.465908 8.931816 1203 +prelim 2 12 4.465908 8.931816 1201 +weight 1 12 4.465908 4.465908 1204 +worth 3 11 4.553877 13.661631 1294 +summar 1 11 4.553877 4.553877 1295 +benjamin 1 11 4.553877 4.553877 1296 +bill 1 11 4.553877 4.553877 1297 +relationship 4 10 4.653960 18.615840 1383 +forc 1 10 4.653960 4.653960 1384 +recoveri 3 9 4.753590 14.260770 1474 +crash 3 8 4.875197 14.625591 1616 +databasesystem 1 8 4.875197 4.875197 1617 +hash 1 8 4.875197 4.875197 1618 +cum 1 8 4.875197 4.875197 1619 +attach 1 7 5.010635 5.010635 1785 +silberschatz 2 6 5.164786 10.329572 1978 +textual 1 6 5.164786 5.164786 1979 +alphabet 1 6 5.164786 5.164786 1980 +phrase 1 5 5.347108 5.347108 2242 +ross 1 5 5.347108 5.347108 2243 +tupl 1 5 5.347108 5.347108 2244 +marco 1 4 5.568345 5.568345 2589 +clearli 1 4 5.568345 5.568345 2590 +entiti 3 3 5.857933 17.573799 3096 +roughli 1 3 5.857933 5.857933 3097 +singhal 1 3 5.857933 5.857933 3098 +sendmail 1 3 5.857933 5.857933 3099 +korth 2 2 6.263398 12.526796 4051 +aguilera 2 2 6.263398 12.526796 4052 +amith 2 2 6.263398 12.526796 4053 +thegroup 2 2 6.263398 12.526796 4054 +universityspr 1 2 6.263398 6.263398 4055 +introductionthi 1 2 6.263398 6.263398 4056 +queryoptim 1 2 6.263398 6.263398 4057 +prerequisitesc 1 2 6.263398 6.263398 4058 +elmasri 1 2 6.263398 6.263398 4059 +salton 1 2 6.263398 6.263398 4060 +amitsingh 1 2 6.263398 6.263398 4061 +yamasani 1 2 6.263398 6.263398 4062 +ofyour 1 2 6.263398 6.263398 4063 +ofcours 1 2 6.263398 6.263398 4064 +throughth 1 2 6.263398 6.263398 4065 +iti 1 2 6.263398 6.263398 4066 +dole 1 2 6.263398 6.263398 4067 +schedulethi 1 2 6.263398 6.263398 4068 +availablethursdai 5 1 6.957497 34.787485 6253 +duetuesdai 4 1 6.957497 27.829988 6254 +regrad 2 1 6.957497 13.914994 6255 +retrievalthursdai 2 1 6.957497 13.914994 6256 +retrievaldepart 1 1 6.957497 6.957497 6257 +gradeshav 1 1 6.957497 6.957497 6258 +twothird 1 1 6.957497 6.957497 6259 +systemsinclud 1 1 6.957497 6.957497 6260 +transactionprocess 1 1 6.957497 6.957497 6261 +usefulinform 1 1 6.957497 6.957497 6262 +willcov 1 1 6.957497 6.957497 6263 +invert 1 1 6.957497 6.957497 6264 +smartsystem 1 1 6.957497 6.957497 6265 +relevancefeedback 1 1 6.957497 6.957497 6266 +thesaurusconstruct 1 1 6.957497 6.957497 6267 +automatictext 1 1 6.957497 6.957497 6268 +placetuesdai 1 1 6.957497 6.957497 6269 +thurston 1 1 6.957497 6.957497 6270 +booksdatabas 1 1 6.957497 6.957497 6271 +mcgrawhil 1 1 6.957497 6.957497 6272 +andnavath 1 1 6.957497 6.957497 6273 +byullman 1 1 6.957497 6.957497 6274 +photocopiedmateri 1 1 6.957497 6.957497 6275 +sophia 1 1 6.957497 6.957497 6276 +georgiakaki 1 1 6.957497 6.957497 6277 +officehour 1 1 6.957497 6.957497 6278 +gradingexam 1 1 6.957497 6.957497 6279 +yourfin 1 1 6.957497 6.957497 6280 +policiesy 1 1 6.957497 6.957497 6281 +samegrad 1 1 6.957497 6.957497 6282 +tuesdayand 1 1 6.957497 6.957497 6283 +illeg 1 1 6.957497 6.957497 6284 +latexif 1 1 6.957497 6.957497 6285 +goodopportun 1 1 6.957497 6.957497 6286 +submissionpleas 1 1 6.957497 6.957497 6287 +clinton 1 1 6.957497 6.957497 6288 +perot 1 1 6.957497 6.957497 6289 +homeworksgrad 1 1 6.957497 6.957497 6290 +sortedalphabet 1 1 6.957497 6.957497 6291 +thecov 1 1 6.957497 6.957497 6292 +pagefollow 1 1 6.957497 6.957497 6293 +policyal 1 1 6.957497 6.957497 6294 +inwrit 1 1 6.957497 6.957497 6295 +referto 1 1 6.957497 6.957497 6296 +modelhomework 1 1 6.957497 6.957497 6297 +weightingthursdai 1 1 6.957497 6.957497 6298 +indexinghomework 1 1 6.957497 6.957497 6299 +evaluationtuesdai 1 1 6.957497 6.957497 6300 +feedbackthursdai 1 1 6.957497 6.957497 6301 +clusteringhomework 1 1 6.957497 6.957497 6302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ new file mode 100644 index 00000000..662da152 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Current^CS401^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +hour 2 165 1.791759 3.583518 46 +file 6 132 1.945910 11.675460 70 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +postscript 3 131 2.079442 6.238326 90 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +text 2 98 2.302585 4.605170 133 +peopl 1 96 2.302585 2.302585 132 +stuff 2 87 2.484907 4.969814 171 +resourc 1 81 2.484907 2.484907 172 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +come 1 78 2.564949 2.564949 202 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +new 1 64 2.772589 2.772589 262 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +electron 1 47 3.091042 3.091042 379 +submit 1 39 3.258097 3.258097 440 +respons 1 37 3.332205 3.332205 476 +procedur 1 36 3.367296 3.367296 488 +print 2 34 3.401197 6.802394 503 +quot 1 29 3.583519 3.583519 582 +ask 1 28 3.610918 3.610918 597 +wai 1 25 3.737670 3.737670 662 +leav 1 21 3.912023 3.912023 772 +break 1 20 3.951244 3.951244 812 +nice 1 20 3.951244 3.951244 809 +modif 1 17 4.110874 4.110874 913 +mayb 1 15 4.248495 4.248495 1014 +convert 2 13 4.382027 8.764054 1122 +social 1 13 4.382027 4.382027 1123 +misc 1 13 4.382027 4.382027 1124 +submiss 1 11 4.553877 4.553877 1298 +recit 2 9 4.753590 9.507180 1475 +admin 1 9 4.753590 4.753590 1476 +joke 1 8 4.875197 4.875197 1620 +printer 1 8 4.875197 4.875197 1621 +ethic 1 7 5.010635 5.010635 1786 +header 1 7 5.010635 5.010635 1787 +pfile 3 3 5.857933 17.573799 3100 +sumedh 1 3 5.857933 5.857933 3101 +enscript 3 2 6.263398 18.790194 4069 +incl 1 2 6.263398 6.263398 4070 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ new file mode 100644 index 00000000..53dfea5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^simon.cs.cornell.edu^Info^Courses^Spring-96^CS515^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 4 443 0.693147 2.772588 6 +work 3 380 0.693147 2.079441 9 +inform 3 412 0.693147 2.079441 8 +interest 1 384 0.693147 0.693147 11 +project 6 340 1.098612 6.591672 18 +cours 4 273 1.098612 4.394448 15 +student 2 343 1.098612 2.197224 19 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +also 1 259 1.386294 1.386294 28 +distribut 4 162 1.791759 7.167036 51 +implement 1 152 1.791759 1.791759 52 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +real 1 93 2.397895 2.397895 144 +present 1 91 2.397895 2.397895 145 +comment 1 93 2.397895 2.397895 146 +practic 2 70 2.708050 5.416100 246 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +descript 1 64 2.772589 2.772589 271 +plan 1 65 2.772589 2.772589 272 +simpl 1 60 2.833213 2.833213 298 +semest 1 58 2.890372 2.890372 312 +instruct 1 53 2.944439 2.944439 332 +tabl 1 51 2.995732 2.995732 346 +basic 1 50 3.044522 3.044522 360 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +rang 2 30 3.555348 7.110696 565 +depend 1 29 3.583519 3.583519 583 +progress 1 28 3.610918 3.610918 598 +aspect 1 25 3.737670 3.737670 663 +size 1 23 3.806662 3.806662 713 +varieti 1 22 3.850148 3.850148 740 +practicum 2 16 4.174387 8.348774 960 +choos 1 16 4.174387 4.174387 964 +signific 1 13 4.382027 4.382027 1125 +werner 1 10 4.653960 4.653960 1385 +vogel 1 8 4.875197 4.875197 1622 +earn 1 7 5.010635 5.010635 1788 +theywil 1 3 5.857933 5.857933 3102 +contentspag 1 3 5.857933 5.857933 3103 +offersa 1 2 6.263398 6.263398 4071 +systemsor 1 1 6.957497 6.957497 6303 +dirti 1 1 6.957497 6.957497 6304 +internetworkingto 1 1 6.957497 6.957497 6305 +teamsof 1 1 6.957497 6.957497 6306 +trough 1 1 6.957497 6.957497 6307 +complexityof 1 1 6.957497 6.957497 6308 +offcial 1 1 6.957497 6.957497 6309 +pageslink 1 1 6.957497 6.957497 6310 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cae.wisc.edu^~ece552^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cae.wisc.edu^~ece552^ new file mode 100644 index 00000000..fa2713ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cae.wisc.edu^~ece552^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 5 273 1.098612 5.493060 15 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +last 1 314 1.098612 1.098612 14 +link 2 247 1.386294 2.772588 24 +email 2 220 1.386294 2.772588 29 +design 1 213 1.386294 1.386294 25 +fall 3 181 1.609438 4.828314 40 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +avail 6 169 1.791759 10.750554 48 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +problem 6 147 1.945910 11.675460 75 +professor 3 137 1.945910 5.837730 76 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +tool 2 117 2.079442 4.158884 93 +document 1 121 2.079442 2.079442 89 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +part 3 98 2.302585 6.907755 129 +need 1 98 2.302585 2.302585 135 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +homepag 1 93 2.397895 2.397895 148 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +solut 6 82 2.484907 14.909442 162 +contain 2 81 2.484907 4.969814 174 +help 2 83 2.484907 4.969814 175 +start 1 83 2.484907 2.484907 173 +refer 2 78 2.564949 5.129898 203 +homework 1 79 2.564949 2.564949 193 +server 1 76 2.564949 2.564949 204 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +semest 1 58 2.890372 2.890372 312 +get 1 46 3.091042 3.091042 380 +announc 1 40 3.258097 3.258097 441 +electr 1 38 3.295837 3.295837 461 +manual 1 35 3.401197 3.401197 504 +intend 1 28 3.610918 3.610918 599 +outlin 2 17 4.110874 8.221748 914 +intro 1 17 4.110874 4.110874 915 +anyon 1 17 4.110874 4.110874 916 +conduct 1 14 4.317488 4.317488 1065 +train 1 14 4.317488 4.317488 1066 +valid 1 11 4.553877 4.553877 1299 +literatur 1 11 4.553877 4.553877 1300 +surf 1 11 4.553877 4.553877 1301 +andcomput 1 8 4.875197 4.875197 1623 +architect 1 8 4.875197 4.875197 1624 +theproject 1 6 5.164786 5.164786 1981 +gopher 1 6 5.164786 5.164786 1982 +mentor 3 4 5.568345 16.705035 2591 +sole 1 4 5.568345 5.568345 2592 +addition 1 4 5.568345 5.568345 2593 +saluja 3 3 5.857933 17.573799 3104 +duedat 1 3 5.857933 5.857933 3105 +wiscinfo 1 3 5.857933 5.857933 3106 +kewal 2 2 6.263398 12.526796 4072 +studentsenrol 1 2 6.263398 6.263398 4073 +sorin 3 1 6.957497 20.872491 6311 +generalinform 1 1 6.957497 6.957497 6312 +midtermsyllabu 1 1 6.957497 6.957497 6313 +midtermi 1 1 6.957497 6.957497 6314 +caeworkst 1 1 6.957497 6.957497 6315 +whomai 1 1 6.957497 6.957497 6316 +throughbold_brows 1 1 6.957497 6.957497 6317 +gettingstart 1 1 6.957497 6.957497 6318 +workbook 1 1 6.957497 6.957497 6319 +quicksim 1 1 6.957497 6.957497 6320 +trainingworkbook 1 1 6.957497 6.957497 6321 +exersis 1 1 6.957497 6.957497 6322 +thesedocu 1 1 6.957497 6.957497 6323 +uwengin 1 1 6.957497 6.957497 6324 +pmcst 1 1 6.957497 6.957497 6325 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html new file mode 100644 index 00000000..c7e4bf7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS100A^home.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +pleas 1 113 2.197225 2.197225 114 +grade 2 90 2.397895 4.795790 142 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +info 4 85 2.484907 9.939628 176 +exam 2 86 2.484907 4.969814 169 +stuff 1 87 2.484907 2.484907 171 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +homework 3 79 2.564949 7.694847 193 +refer 1 78 2.564949 2.564949 203 +syllabu 2 67 2.708050 5.416100 247 +handout 2 64 2.772589 5.545178 263 +browser 1 56 2.890372 2.890372 313 +tabl 2 51 2.995732 5.991464 346 +run 1 51 2.995732 2.995732 347 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +abl 1 30 3.555348 3.555348 566 +frame 1 24 3.761200 3.761200 684 +handl 1 24 3.761200 3.761200 685 +individu 1 13 4.382027 4.382027 1126 +criteria 1 9 4.753590 4.753590 1477 +preced 1 3 5.857933 5.857933 3107 +goofi 1 2 6.263398 6.263398 4074 +herelink 1 1 6.957497 6.957497 6326 +motw 1 1 6.957497 6.957497 6327 +stuffnot 1 1 6.957497 6.957497 6328 +edupag 1 1 6.957497 6.957497 6329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html new file mode 100644 index 00000000..bfcfdf99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^CS212.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +cours 3 273 1.098612 3.295836 15 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +access 2 102 2.302585 4.605170 136 +user 1 104 2.302585 2.302585 137 +chang 2 82 2.484907 4.969814 163 +requir 1 81 2.484907 2.484907 167 +info 1 85 2.484907 2.484907 176 +materi 3 75 2.639057 7.917171 221 +window 2 68 2.708050 5.416100 242 +java 1 70 2.708050 2.708050 248 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +directori 1 45 3.135494 3.135494 396 +announc 1 40 3.258097 3.258097 441 +request 1 26 3.688879 3.688879 635 +interpret 3 24 3.761200 11.283600 686 +thu 1 21 3.912023 3.912023 773 +util 1 21 3.912023 3.912023 774 +behavior 1 18 4.060443 4.060443 881 +attempt 1 17 4.110874 4.110874 917 +pagec 1 15 4.248495 4.248495 1011 +correctli 1 9 4.753590 4.753590 1478 +dylan 2 8 4.875197 9.750394 1625 +password 1 4 5.568345 5.568345 2594 +parter 1 2 6.263398 6.263398 4075 +noodll 1 1 6.957497 6.957497 6330 +inconsist 1 1 6.957497 6.957497 6331 +partnerjoin 1 1 6.957497 6.957497 6332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html new file mode 100644 index 00000000..5cd9fbb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS212^outline.html @@ -0,0 +1,360 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 11 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 11 374 0.693147 7.624617 7 +work 5 380 0.693147 3.465735 9 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +cours 18 273 1.098612 19.775016 15 +time 6 293 1.098612 6.591672 17 +us 4 329 1.098612 4.394448 16 +student 3 343 1.098612 3.295836 19 +current 2 284 1.098612 2.197224 21 +offic 1 299 1.098612 1.098612 13 +languag 5 227 1.386294 6.931470 26 +gener 4 220 1.386294 5.545176 27 +cornel 2 215 1.386294 2.772588 23 +also 2 259 1.386294 2.772588 28 +email 1 220 1.386294 1.386294 29 +public 4 202 1.609438 6.437752 43 +list 3 201 1.609438 4.828314 39 +includ 2 208 1.609438 3.218876 42 +oper 2 180 1.609438 3.218876 34 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +data 5 170 1.791759 8.958795 49 +hour 4 165 1.791759 7.167036 46 +avail 4 169 1.791759 7.167036 48 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +assign 16 135 1.945910 31.134560 66 +problem 13 147 1.945910 25.296830 75 +object 8 138 1.945910 15.567280 79 +lectur 4 135 1.945910 7.783640 73 +model 2 145 1.945910 3.891820 69 +process 2 142 1.945910 3.891820 72 +first 1 140 1.945910 1.945910 71 +professor 1 137 1.945910 1.945910 76 +note 1 142 1.945910 1.945910 67 +provid 5 121 2.079442 10.397210 94 +schedul 3 119 2.079442 6.238326 85 +machin 3 129 2.079442 6.238326 95 +compil 2 122 2.079442 4.158884 96 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +site 7 106 2.197225 15.380575 119 +topic 3 114 2.197225 6.591675 110 +make 3 111 2.197225 6.591675 120 +final 3 116 2.197225 6.591675 108 +well 2 109 2.197225 4.394450 121 +version 2 113 2.197225 4.394450 122 +person 2 111 2.197225 4.394450 117 +teach 1 108 2.197225 2.197225 112 +mathemat 1 108 2.197225 2.197225 123 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +structur 1 106 2.197225 2.197225 105 +techniqu 2 99 2.302585 4.605170 138 +take 2 97 2.302585 4.605170 134 +need 2 98 2.302585 4.605170 135 +user 2 104 2.302585 4.605170 137 +access 1 102 2.302585 2.302585 136 +peopl 1 96 2.302585 2.302585 132 +memori 1 101 2.302585 2.302585 139 +question 4 91 2.397895 9.591580 141 +grade 2 90 2.397895 4.795790 142 +comment 1 93 2.397895 2.397895 146 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +exam 6 86 2.484907 14.909442 169 +help 2 83 2.484907 4.969814 175 +info 1 85 2.484907 2.484907 176 +contain 1 81 2.484907 2.484907 174 +chang 1 82 2.484907 2.484907 163 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +orient 6 80 2.564949 15.389694 205 +mondai 2 77 2.564949 5.129898 206 +exampl 2 77 2.564949 5.129898 195 +state 2 76 2.564949 5.129898 207 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +come 1 78 2.564949 2.564949 202 +server 1 76 2.564949 2.564949 204 +optim 1 79 2.564949 2.564949 197 +tuesdai 5 73 2.639057 13.195285 219 +materi 3 75 2.639057 7.917171 221 +upson 2 71 2.639057 5.278114 218 +name 2 72 2.639057 5.278114 220 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +java 4 70 2.708050 10.832200 248 +order 3 69 2.708050 8.124150 249 +thursdai 2 70 2.708050 5.416100 241 +goal 1 66 2.708050 2.708050 250 +window 1 68 2.708050 2.708050 242 +would 1 67 2.708050 2.708050 251 +function 5 62 2.772589 13.862945 275 +evalu 5 64 2.772589 13.862945 266 +abstract 3 62 2.772589 8.317767 276 +handout 2 64 2.772589 5.545178 263 +wednesdai 2 64 2.772589 5.545178 261 +creat 1 63 2.772589 2.772589 277 +written 1 63 2.772589 2.772589 278 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +best 3 59 2.833213 8.499639 299 +colleg 1 61 2.833213 2.833213 300 +type 1 61 2.833213 2.833213 296 +think 3 57 2.890372 8.671116 314 +browser 3 56 2.890372 8.671116 313 +major 1 56 2.890372 2.890372 315 +direct 1 57 2.890372 2.890372 316 +variou 1 56 2.890372 2.890372 317 +reason 1 57 2.890372 2.890372 318 +cover 3 55 2.944439 8.833317 329 +allow 1 53 2.944439 2.944439 333 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +date 3 51 2.995732 8.987196 344 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +set 8 50 3.044522 24.356176 361 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +even 2 45 3.135494 6.270988 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +http 1 41 3.218876 3.218876 420 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +late 4 40 3.258097 13.032388 439 +must 2 40 3.258097 6.516194 442 +submit 2 39 3.258097 6.516194 440 +correct 2 38 3.295837 6.591674 462 +credit 1 38 3.295837 3.295837 460 +respons 1 37 3.332205 3.332205 476 +hand 1 37 3.332205 3.332205 475 +staff 3 36 3.367296 10.101888 490 +procedur 2 36 3.367296 6.734592 488 +ofth 1 36 3.367296 3.367296 491 +download 1 36 3.367296 3.367296 489 +tree 1 36 3.367296 3.367296 492 +post 2 35 3.401197 6.802394 505 +either 1 35 3.401197 3.401197 506 +jame 1 35 3.401197 3.401197 507 +word 1 34 3.401197 3.401197 508 +approxim 1 35 3.401197 3.401197 509 +return 1 34 3.401197 3.401197 502 +singl 1 34 3.401197 3.401197 510 +random 1 34 3.401197 3.401197 511 +concept 1 32 3.465736 3.465736 537 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +express 1 32 3.465736 3.465736 540 +often 1 31 3.496508 3.496508 551 +rang 1 30 3.555348 3.555348 565 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +focus 1 29 3.583519 3.583519 584 +ask 1 28 3.610918 3.610918 597 +held 1 28 3.610918 3.610918 600 +campu 1 27 3.637586 3.637586 623 +symbol 1 27 3.637586 3.637586 620 +request 2 26 3.688879 7.377758 635 +rule 1 26 3.688879 3.688879 638 +background 1 25 3.737670 3.737670 664 +valu 1 25 3.737670 3.737670 665 +consult 5 24 3.761200 18.806000 687 +reach 3 24 3.761200 11.283600 688 +pattern 1 24 3.761200 3.761200 689 +interpret 1 24 3.761200 3.761200 686 +higher 1 24 3.761200 3.761200 690 +togeth 3 23 3.806662 11.419986 714 +variabl 1 23 3.806662 3.806662 715 +inth 1 22 3.850148 3.850148 741 +almost 1 22 3.850148 3.850148 742 +period 1 22 3.850148 3.850148 743 +thu 2 21 3.912023 7.824046 773 +annot 1 21 3.912023 3.912023 775 +half 1 21 3.912023 3.912023 776 +tell 1 21 3.912023 3.912023 777 +sure 2 20 3.951244 7.902488 813 +facil 2 20 3.951244 7.902488 814 +wonder 1 20 3.951244 3.951244 815 +break 1 20 3.951244 3.951244 812 +exercis 1 19 4.007333 4.007333 842 +els 1 19 4.007333 4.007333 843 +accept 3 18 4.060443 12.181329 879 +encourag 1 18 4.060443 4.060443 880 +account 1 18 4.060443 4.060443 882 +whether 1 17 4.110874 4.110874 918 +weekli 1 17 4.110874 4.110874 919 +macintosh 1 17 4.110874 4.110874 920 +outlin 1 17 4.110874 4.110874 914 +differenti 1 17 4.110874 4.110874 921 +match 1 16 4.174387 4.174387 965 +modern 1 16 4.174387 4.174387 966 +transfer 1 16 4.174387 4.174387 967 +earli 1 16 4.174387 4.174387 968 +easi 1 16 4.174387 4.174387 969 +stream 2 15 4.248495 8.496990 1015 +capabl 1 15 4.248495 4.248495 1016 +score 1 15 4.248495 4.248495 1017 +hierarch 1 15 4.248495 4.248495 1018 +borland 1 14 4.317488 4.317488 1067 +warn 1 14 4.317488 4.317488 1068 +polynomi 1 14 4.317488 4.317488 1069 +recurs 3 13 4.382027 13.146081 1127 +someon 2 13 4.382027 8.764054 1128 +suit 1 13 4.382027 4.382027 1129 +joint 1 13 4.382027 4.382027 1130 +skill 2 12 4.465908 8.931816 1205 +prelim 2 12 4.465908 8.931816 1201 +iter 1 12 4.465908 4.465908 1206 +broad 3 11 4.553877 13.661631 1302 +appl 1 11 4.553877 4.553877 1303 +induct 1 11 4.553877 4.553877 1304 +queue 2 10 4.653960 9.307920 1386 +sundai 1 10 4.653960 4.653960 1387 +arithmet 1 10 4.653960 4.653960 1388 +stack 1 10 4.653960 4.653960 1389 +introductori 2 9 4.753590 9.507180 1479 +recit 1 9 4.753590 4.753590 1475 +preliminari 1 9 4.753590 4.753590 1480 +andth 1 9 4.753590 4.753590 1481 +dylan 7 8 4.875197 34.126379 1625 +simpli 1 8 4.875197 4.875197 1626 +matter 1 8 4.875197 4.875197 1627 +on 1 8 4.875197 4.875197 1628 +justin 2 7 5.010635 10.021270 1789 +happen 1 7 5.010635 5.010635 1790 +dispatch 1 7 5.010635 5.010635 1791 +prioriti 1 7 5.010635 5.010635 1792 +huttenloch 2 6 5.164786 10.329572 1983 +chosen 1 6 5.164786 5.164786 1984 +contract 1 6 5.164786 5.164786 1985 +garbag 1 6 5.164786 5.164786 1986 +conot 2 5 5.347108 10.694216 2245 +hardcopi 1 5 5.347108 5.347108 2246 +substitut 1 5 5.347108 5.347108 2247 +variat 1 5 5.347108 5.347108 2248 +password 2 4 5.568345 11.136690 2594 +toth 2 4 5.568345 11.136690 2595 +infinit 2 4 5.568345 11.136690 2596 +clearli 1 4 5.568345 5.568345 2590 +wherea 1 4 5.568345 5.568345 2597 +exposur 1 4 5.568345 5.568345 2598 +midnight 1 4 5.568345 5.568345 2599 +amaz 1 4 5.568345 5.568345 2600 +thiscours 1 4 5.568345 5.568345 2601 +catch 1 4 5.568345 5.568345 2602 +illus 1 4 5.568345 5.568345 2603 +szewczyk 2 3 5.857933 11.715866 3108 +voskuhl 2 3 5.857933 11.715866 3109 +useth 1 3 5.857933 5.857933 3110 +programsand 1 3 5.857933 5.857933 3111 +toolbox 1 3 5.857933 5.857933 3112 +programmingtechniqu 1 3 5.857933 5.857933 3113 +kimbal 1 3 5.857933 5.857933 3114 +andon 1 3 5.857933 5.857933 3115 +requirementsstud 1 3 5.857933 5.857933 3116 +immedi 1 3 5.857933 5.857933 3117 +jointli 1 3 5.857933 5.857933 3118 +doubt 1 3 5.857933 5.857933 3119 +argument 1 3 5.857933 5.857933 3120 +quotat 1 3 5.857933 5.857933 3121 +inherit 1 3 5.857933 5.857933 3122 +heap 1 3 5.857933 5.857933 3123 +exit 1 3 5.857933 5.857933 3124 +rangeof 2 2 6.263398 12.526796 4076 +standalon 2 2 6.263398 12.526796 4077 +developedat 1 2 6.263398 6.263398 4078 +orientedlanguag 1 2 6.263398 6.263398 4079 +therewil 1 2 6.263398 6.263398 4080 +combinationof 1 2 6.263398 6.263398 4081 +programmingproblem 1 2 6.263398 6.263398 4082 +youwork 1 2 6.263398 6.263398 4083 +growth 1 2 6.263398 6.263398 4084 +ugrad 3 1 6.957497 20.872491 6333 +idand 2 1 6.957497 13.914994 6334 +tobia 2 1 6.957497 13.914994 6335 +mayr 2 1 6.957497 13.914994 6336 +hamblin 2 1 6.957497 13.914994 6337 +mutabl 2 1 6.957497 13.914994 6338 +informationaugust 1 1 6.957497 6.957497 6339 +courseabout 1 1 6.957497 6.957497 6340 +notationthat 1 1 6.957497 6.957497 6341 +takec 1 1 6.957497 6.957497 6342 +programmingparadigm 1 1 6.957497 6.957497 6343 +imperativeprogram 1 1 6.957497 6.957497 6344 +goodform 1 1 6.957497 6.957497 6345 +probablytak 1 1 6.957497 6.957497 6346 +questionsor 1 1 6.957497 6.957497 6347 +serverwhich 1 1 6.957497 6.957497 6348 +answersa 1 1 6.957497 6.957497 6349 +thisweek 1 1 6.957497 6.957497 6350 +edubut 1 1 6.957497 6.957497 6351 +aboutproblem 1 1 6.957497 6.957497 6352 +upsonjam 1 1 6.957497 6.957497 6353 +tarobert 1 1 6.957497 6.957497 6354 +tajustin 1 1 6.957497 6.957497 6355 +taandra 1 1 6.957497 6.957497 6356 +ferencz 1 1 6.957497 6.957497 6357 +melissa 1 1 6.957497 6.957497 6358 +consultantwhen 1 1 6.957497 6.957497 6359 +meetlectur 1 1 6.957497 6.957497 6360 +andrecit 1 1 6.957497 6.957497 6361 +recitationsexpand 1 1 6.957497 6.957497 6362 +opportunityto 1 1 6.957497 6.957497 6363 +eachproblem 1 1 6.957497 6.957497 6364 +setsdu 1 1 6.957497 6.957497 6365 +mondayeven 1 1 6.957497 6.957497 6366 +consultinghour 1 1 6.957497 6.957497 6367 +voskuhltba 1 1 6.957497 6.957497 6368 +materialsther 1 1 6.957497 6.957497 6369 +handoutsand 1 1 6.957497 6.957497 6370 +implementedin 1 1 6.957497 6.957497 6371 +downloadonto 1 1 6.957497 6.957497 6372 +ontoyour 1 1 6.957497 6.957497 6373 +recentvers 1 1 6.957497 6.957497 6374 +gradeswil 1 1 6.957497 6.957497 6375 +thetot 1 1 6.957497 6.957497 6376 +willgener 1 1 6.957497 6.957497 6377 +followingclass 1 1 6.957497 6.957497 6378 +sittingdown 1 1 6.957497 6.957497 6379 +sink 1 1 6.957497 6.957497 6380 +beforesit 1 1 6.957497 6.957497 6381 +workmuch 1 1 6.957497 6.957497 6382 +jointassign 1 1 6.957497 6.957497 6383 +circumstancesmai 1 1 6.957497 6.957497 6384 +yourown 1 1 6.957497 6.957497 6385 +yougot 1 1 6.957497 6.957497 6386 +whenpeopl 1 1 6.957497 6.957497 6387 +lifeunpleas 1 1 6.957497 6.957497 6388 +facilitiescit 1 1 6.957497 6.957497 6389 +andpc 1 1 6.957497 6.957497 6390 +upsonmac 1 1 6.957497 6.957497 6391 +datesal 1 1 6.957497 6.957497 6392 +mondaynight 1 1 6.957497 6.957497 6393 +submityour 1 1 6.957497 6.957497 6394 +multimethod 1 1 6.957497 6.957497 6395 +heapsort 1 1 6.957497 6.957497 6396 +metacircular 1 1 6.957497 6.957497 6397 +nonloc 1 1 6.957497 6.957497 6398 +throw 1 1 6.957497 6.957497 6399 +quicksort 1 1 6.957497 6.957497 6400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ new file mode 100644 index 00000000..c19c4126 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS314^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +inform 4 412 0.693147 2.772588 8 +system 1 443 0.693147 0.693147 6 +cours 9 273 1.098612 9.887508 15 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +email 2 220 1.386294 2.772588 29 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +includ 2 208 1.609438 3.218876 42 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +process 2 142 1.945910 3.891820 72 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +send 2 114 2.197225 4.394450 109 +pleas 2 113 2.197225 4.394450 114 +check 1 115 2.197225 2.197225 118 +memori 1 101 2.302585 2.302585 139 +section 2 94 2.397895 4.795790 149 +follow 1 92 2.397895 2.397895 143 +help 2 83 2.484907 4.969814 175 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +homework 3 79 2.564949 7.694847 193 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +materi 5 75 2.639057 13.195285 221 +organ 1 65 2.772589 2.772589 265 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +allow 2 53 2.944439 5.888878 333 +processor 1 54 2.944439 2.944439 335 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +date 2 51 2.995732 5.991464 344 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +made 1 44 3.135494 3.135494 398 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +post 1 35 3.401197 3.401197 505 +bookmark 1 26 3.688879 3.688879 639 +request 1 26 3.688879 3.688879 635 +consult 1 24 3.761200 3.761200 687 +hierarchi 1 22 3.850148 3.850148 744 +sequenti 1 22 3.850148 3.850148 745 +annot 2 21 3.912023 7.824046 775 +path 1 21 3.912023 3.912023 778 +unit 1 21 3.912023 3.912023 779 +separ 1 19 4.007333 4.007333 844 +appropri 1 18 4.060443 4.060443 883 +account 1 18 4.060443 4.060443 882 +otherwis 1 17 4.110874 4.110874 922 +sign 1 16 4.174387 4.174387 970 +circuit 1 13 4.382027 4.382027 1131 +difficulti 1 13 4.382027 4.382027 1132 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +assembl 1 12 4.465908 4.465908 1207 +combinatori 1 8 4.875197 4.875197 1629 +interrupt 1 7 5.010635 5.010635 1793 +saturdai 1 7 5.010635 5.010635 1794 +hidden 1 6 5.164786 5.164786 1987 +conot 3 5 5.347108 16.041324 2245 +registr 2 5 5.347108 10.694216 2249 +microprogram 1 4 5.568345 5.568345 2604 +eickenfal 1 3 5.857933 5.857933 3125 +kimbal 1 3 5.857933 5.857933 3114 +helpif 1 3 5.857933 5.857933 3126 +mate 1 3 5.857933 5.857933 3127 +encount 1 3 5.857933 5.857933 3128 +btopic 1 2 6.263398 6.263398 4085 +organizationthorsten 1 1 6.957497 6.957497 6401 +materialsal 1 1 6.957497 6.957497 6402 +listlist 1 1 6.957497 6.957497 6403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html new file mode 100644 index 00000000..5053d570 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS401^home.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +hour 2 165 1.791759 3.583518 46 +file 6 132 1.945910 11.675460 70 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +postscript 3 131 2.079442 6.238326 90 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +text 2 98 2.302585 4.605170 133 +peopl 1 96 2.302585 2.302585 132 +stuff 2 87 2.484907 4.969814 171 +resourc 1 81 2.484907 2.484907 172 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +mondai 1 77 2.564949 2.564949 206 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +new 1 64 2.772589 2.772589 262 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +electron 1 47 3.091042 3.091042 379 +submit 1 39 3.258097 3.258097 440 +respons 1 37 3.332205 3.332205 476 +procedur 1 36 3.367296 3.367296 488 +print 2 34 3.401197 6.802394 503 +quot 1 29 3.583519 3.583519 582 +held 1 28 3.610918 3.610918 600 +ask 1 28 3.610918 3.610918 597 +wai 1 25 3.737670 3.737670 662 +leav 1 21 3.912023 3.912023 772 +break 1 20 3.951244 3.951244 812 +nice 1 20 3.951244 3.951244 809 +modif 1 17 4.110874 4.110874 913 +mayb 1 15 4.248495 4.248495 1014 +floor 1 14 4.317488 4.317488 1070 +convert 2 13 4.382027 8.764054 1122 +social 1 13 4.382027 4.382027 1123 +misc 1 13 4.382027 4.382027 1124 +submiss 1 11 4.553877 4.553877 1298 +recit 2 9 4.753590 9.507180 1475 +admin 1 9 4.753590 4.753590 1476 +joke 1 8 4.875197 4.875197 1620 +printer 1 8 4.875197 4.875197 1621 +ethic 1 7 5.010635 5.010635 1786 +header 1 7 5.010635 5.010635 1787 +pfile 3 3 5.857933 17.573799 3100 +sumedh 1 3 5.857933 5.857933 3101 +enscript 3 2 6.263398 18.790194 4069 +incl 1 2 6.263398 6.263398 4070 +csuglab 1 1 6.957497 6.957497 6404 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html new file mode 100644 index 00000000..b519c119 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS415^CS414.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +offic 3 299 1.098612 3.295836 13 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +cornel 3 215 1.386294 4.158882 23 +link 1 247 1.386294 1.386294 24 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +phone 3 175 1.791759 5.375277 45 +hour 3 165 1.791759 5.375277 46 +assign 6 135 1.945910 11.675460 66 +hall 3 146 1.945910 5.837730 65 +note 1 142 1.945910 1.945910 67 +structur 1 106 2.197225 2.197225 105 +solut 7 82 2.484907 17.394349 162 +dynam 1 76 2.564949 2.564949 194 +upson 3 71 2.639057 7.917171 218 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 2 70 2.708050 5.416100 241 +wednesdai 2 64 2.772589 5.545178 261 +new 1 64 2.772589 2.772589 262 +unix 1 58 2.890372 2.890372 308 +fridai 1 44 3.135494 3.135494 390 +static 1 27 3.637586 3.637586 619 +practicum 1 16 4.174387 4.174387 960 +pagec 1 15 4.248495 4.248495 1011 +prelim 2 12 4.465908 8.931816 1201 +huang 1 12 4.465908 4.465908 1202 +systemsc 1 11 4.553877 4.553877 1293 +cheng 1 10 4.653960 4.653960 1381 +lili 1 5 5.347108 5.347108 2240 +filesystem 1 4 5.568345 5.568345 2587 +groupcours 1 3 5.857933 5.857933 3092 +ychuang 1 3 5.857933 5.857933 3093 +budiu 2 2 6.263398 12.526796 4042 +systemkenneth 1 2 6.263398 6.263398 4043 +birmanc 1 2 6.263398 6.263398 4044 +syllabuslectur 1 2 6.263398 6.263398 4045 +taslili 1 2 6.263398 6.263398 4046 +mihai 1 2 6.263398 6.263398 4047 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html new file mode 100644 index 00000000..fd029155 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS472^cs472.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 5 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 5 374 0.693147 3.465735 7 +depart 2 457 0.693147 1.386294 12 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +cours 4 273 1.098612 4.394448 15 +us 2 329 1.098612 2.197224 16 +time 2 293 1.098612 2.197224 17 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +class 2 199 1.609438 3.218876 37 +oper 2 180 1.609438 3.218876 34 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +includ 1 208 1.609438 1.609438 42 +avail 4 169 1.791759 7.167036 48 +read 1 154 1.791759 1.791759 47 +assign 5 135 1.945910 9.729550 66 +file 3 132 1.945910 5.837730 70 +problem 2 147 1.945910 3.891820 75 +postscript 2 131 2.079442 4.158884 90 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +code 3 108 2.197225 6.591675 116 +pleas 2 113 2.197225 4.394450 114 +find 2 111 2.197225 4.394450 111 +look 1 107 2.197225 2.197225 115 +need 1 98 2.302585 2.302585 135 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +solut 5 82 2.484907 12.424535 162 +academ 1 82 2.484907 2.484907 178 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +novemb 1 81 2.484907 2.484907 179 +homework 5 79 2.564949 12.824745 193 +mondai 2 77 2.564949 5.129898 206 +complet 1 77 2.564949 2.564949 208 +materi 2 75 2.639057 5.278114 221 +intellig 1 72 2.639057 2.639057 225 +appli 1 71 2.639057 2.639057 226 +tuesdai 1 73 2.639057 2.639057 219 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +result 1 65 2.772589 2.772589 281 +special 1 56 2.890372 2.890372 320 +found 1 53 2.944439 2.944439 337 +date 1 51 2.995732 2.995732 344 +still 2 50 3.044522 6.089044 362 +right 1 48 3.044522 3.044522 363 +move 1 47 3.091042 3.091042 382 +get 1 46 3.091042 3.091042 380 +netscap 1 44 3.135494 3.135494 395 +midterm 1 45 3.135494 3.135494 392 +offer 2 43 3.178054 6.356108 414 +third 1 43 3.178054 3.178054 412 +announc 1 40 3.258097 3.258097 441 +either 1 35 3.401197 3.401197 506 +queri 1 33 3.433987 3.433987 524 +posit 1 31 3.496508 3.496508 552 +abl 1 30 3.555348 3.555348 566 +specifi 1 30 3.555348 3.555348 568 +exist 1 30 3.555348 3.555348 569 +limit 1 29 3.583519 3.583519 585 +load 1 28 3.610918 3.610918 601 +concern 1 25 3.737670 3.737670 666 +begin 1 23 3.806662 3.806662 716 +variabl 1 23 3.806662 3.806662 715 +defin 1 22 3.850148 3.850148 746 +fact 2 21 3.912023 7.824046 780 +longer 1 20 3.951244 3.951244 816 +assum 1 19 4.007333 4.007333 845 +scott 3 18 4.060443 12.181329 884 +account 1 18 4.060443 4.060443 882 +statu 1 18 4.060443 4.060443 885 +previous 1 17 4.110874 4.110874 923 +otherwis 1 17 4.110874 4.110874 922 +alreadi 1 16 4.174387 4.174387 963 +portion 1 16 4.174387 4.174387 971 +atth 1 15 4.248495 4.248495 1019 +success 1 10 4.653960 4.653960 1390 +guarante 1 10 4.653960 4.653960 1391 +kevin 5 9 4.753590 23.767950 1482 +slightli 1 7 5.010635 5.010635 1795 +paramet 1 7 5.010635 5.010635 1796 +planner 1 7 5.010635 5.010635 1797 +hunt 1 7 5.010635 5.010635 1798 +remind 1 7 5.010635 5.010635 1799 +schema 1 6 5.164786 5.164786 1988 +bind 2 5 5.347108 10.694216 2250 +constant 1 5 5.347108 5.347108 2251 +seriou 1 5 5.347108 5.347108 2252 +clarif 1 5 5.347108 5.347108 2253 +clair 5 4 5.568345 27.841725 2605 +shouldn 1 4 5.568345 5.568345 2606 +unless 1 4 5.568345 5.568345 2607 +meanwhil 1 3 5.857933 5.857933 3129 +worri 1 3 5.857933 5.857933 3130 +add 1 3 5.857933 5.857933 3131 +thec 1 3 5.857933 5.857933 3132 +pagesc 1 3 5.857933 5.857933 3133 +pagecsfound 1 2 6.263398 6.263398 4086 +dodg 2 1 6.957497 13.914994 6405 +notethat 1 1 6.957497 6.957497 6406 +rubix 1 1 6.957497 6.957497 6407 +thefunct 1 1 6.957497 6.957497 6408 +rearrang 1 1 6.957497 6.957497 6409 +appeas 1 1 6.957497 6.957497 6410 +var 1 1 6.957497 6.957497 6411 +bracket 1 1 6.957497 6.957497 6412 +youus 1 1 6.957497 6.957497 6413 +machinesshould 1 1 6.957497 6.957497 6414 +sbin 1 1 6.957497 6.957497 6415 +ksaunder 1 1 6.957497 6.957497 6416 +sbinfor 1 1 6.957497 6.957497 6417 +gremlin 1 1 6.957497 6.957497 6418 +codefor 1 1 6.957497 6.957497 6419 +andget 1 1 6.957497 6.957497 6420 +uponcomplet 1 1 6.957497 6.957497 6421 +thoseus 1 1 6.957497 6.957497 6422 +zeroon 1 1 6.957497 6.957497 6423 +asspecifi 1 1 6.957497 6.957497 6424 +oneassign 1 1 6.957497 6.957497 6425 +vanto 1 1 6.957497 6.957497 6426 +thisclarif 1 1 6.957497 6.957497 6427 +newhomework 1 1 6.957497 6.957497 6428 +coursemateri 1 1 6.957497 6.957497 6429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html new file mode 100644 index 00000000..98f92d38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS481^CS481.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +offic 2 299 1.098612 2.197224 13 +fall 2 181 1.609438 3.218876 40 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +note 3 142 1.945910 5.837730 67 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +studi 1 120 2.079442 2.079442 91 +pleas 1 113 2.197225 2.197225 114 +exam 1 86 2.484907 2.484907 169 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +homework 4 79 2.564949 10.259796 193 +integr 1 67 2.708050 2.708050 245 +room 1 59 2.833213 2.833213 301 +date 1 51 2.995732 2.995732 344 +set 1 50 3.044522 3.044522 361 +revis 1 26 3.688879 3.688879 640 +pagec 1 15 4.248495 4.248495 1011 +automata 1 13 4.382027 4.382027 1135 +prelim 2 12 4.465908 8.931816 1201 +hardcopi 1 5 5.347108 5.347108 2246 +incorrect 1 3 5.857933 5.857933 3134 +nikolai 1 2 6.263398 6.263398 4087 +theorywelcom 1 1 6.957497 6.957497 6430 +guideannounc 1 1 6.957497 6.957497 6431 +erratum 1 1 6.957497 6.957497 6432 +hourscod 1 1 6.957497 6.957497 6433 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html new file mode 100644 index 00000000..914cd480 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS537^course.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +system 19 443 0.693147 13.169793 6 +work 6 380 0.693147 4.158882 9 +research 5 431 0.693147 3.465735 10 +program 4 374 0.693147 2.772588 7 +inform 3 412 0.693147 2.079441 8 +project 26 340 1.098612 28.563912 18 +cours 7 273 1.098612 7.690284 15 +us 6 329 1.098612 6.591672 16 +student 4 343 1.098612 4.394448 19 +offic 4 299 1.098612 4.394448 13 +time 2 293 1.098612 2.197224 17 +engin 2 297 1.098612 2.197224 20 +softwar 5 220 1.386294 6.931470 30 +also 5 259 1.386294 6.931470 28 +mail 3 238 1.386294 4.158882 22 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +paper 7 205 1.609438 11.266066 38 +list 6 201 1.609438 9.656628 39 +class 3 199 1.609438 4.828314 37 +fall 2 181 1.609438 3.218876 40 +includ 1 208 1.609438 1.609438 42 +develop 7 174 1.791759 12.542313 53 +data 4 170 1.791759 7.167036 49 +base 3 165 1.791759 5.375277 50 +hour 3 165 1.791759 5.375277 46 +avail 2 169 1.791759 3.583518 48 +phone 2 175 1.791759 3.583518 45 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +note 4 142 1.945910 7.783640 67 +process 4 142 1.945910 7.783640 72 +lectur 3 135 1.945910 5.837730 73 +click 3 142 1.945910 5.837730 78 +assign 3 135 1.945910 5.837730 66 +area 3 144 1.945910 5.837730 80 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +databas 19 122 2.079442 39.509398 86 +provid 3 121 2.079442 6.238326 94 +number 2 130 2.079442 4.158884 97 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +code 6 108 2.197225 13.183350 116 +final 5 116 2.197225 10.986125 108 +topic 4 114 2.197225 8.788900 110 +specif 3 106 2.197225 6.591675 106 +place 2 106 2.197225 4.394450 124 +person 2 111 2.197225 4.394450 117 +manag 2 114 2.197225 4.394450 125 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +part 4 98 2.302585 9.210340 129 +advanc 3 99 2.302585 6.907755 130 +need 3 98 2.302585 6.907755 135 +text 3 98 2.302585 6.907755 133 +access 2 102 2.302585 4.605170 136 +take 2 97 2.302585 4.605170 134 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +follow 4 92 2.397895 9.591580 143 +grade 3 90 2.397895 7.193685 142 +mani 3 92 2.397895 7.193685 150 +associ 2 93 2.397895 4.795790 151 +proceed 1 93 2.397895 2.397895 152 +homepag 1 93 2.397895 2.397895 148 +question 1 91 2.397895 2.397895 141 +exam 8 86 2.484907 19.879256 169 +level 4 87 2.484907 9.939628 180 +second 3 81 2.484907 7.454721 166 +larg 2 82 2.484907 4.969814 168 +librari 2 87 2.484907 4.969814 181 +info 1 85 2.484907 2.484907 176 +activ 1 84 2.484907 2.484907 182 +journal 1 83 2.484907 2.484907 183 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +resourc 1 81 2.484907 2.484907 172 +refer 8 78 2.564949 20.519592 203 +complet 3 77 2.564949 7.694847 208 +want 2 79 2.564949 5.129898 199 +interfac 2 79 2.564949 5.129898 209 +homework 1 79 2.564949 2.564949 193 +appear 1 78 2.564949 2.564949 210 +come 1 78 2.564949 2.564949 202 +optim 1 79 2.564949 2.564949 197 +know 1 80 2.564949 2.564949 198 +materi 6 75 2.639057 15.834342 221 +upson 4 71 2.639057 10.556228 218 +involv 4 71 2.639057 10.556228 227 +write 4 72 2.639057 10.556228 222 +tuesdai 2 73 2.639057 5.278114 219 +addit 1 74 2.639057 2.639057 228 +meet 1 72 2.639057 2.639057 229 +free 1 73 2.639057 2.639057 224 +thursdai 2 70 2.708050 5.416100 241 +test 2 66 2.708050 5.416100 252 +goal 2 66 2.708050 5.416100 250 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +import 3 65 2.772589 8.317767 282 +result 2 65 2.772589 5.545178 281 +descript 2 64 2.772589 5.545178 271 +function 2 62 2.772589 5.545178 275 +evalu 1 64 2.772589 2.772589 266 +handout 1 64 2.772589 2.772589 263 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +interact 1 62 2.772589 2.772589 270 +collect 1 65 2.772589 2.772589 268 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +type 1 61 2.833213 2.833213 296 +detail 5 57 2.890372 14.451860 321 +variou 1 56 2.890372 2.890372 317 +semest 1 58 2.890372 2.890372 312 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +cover 2 55 2.944439 5.888878 329 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +much 1 52 2.995732 2.995732 349 +date 1 51 2.995732 2.995732 344 +basic 2 50 3.044522 6.089044 360 +archiv 1 49 3.044522 3.044522 364 +give 1 50 3.044522 3.044522 359 +principl 1 48 3.044522 3.044522 357 +standard 1 48 3.044522 3.044522 365 +possibl 3 47 3.091042 9.273126 378 +could 2 46 3.091042 6.182084 383 +understand 1 47 3.091042 3.091042 384 +textbook 4 44 3.135494 12.541976 397 +discuss 2 45 3.135494 6.270988 399 +midterm 2 45 3.135494 6.270988 392 +answer 1 45 3.135494 3.135494 391 +made 1 44 3.135494 3.135494 398 +even 1 45 3.135494 3.135494 393 +term 5 43 3.178054 15.890270 411 +offer 1 43 3.178054 3.178054 414 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +edit 3 42 3.218876 9.656628 418 +compani 1 41 3.218876 3.218876 423 +examin 1 42 3.218876 3.218876 424 +howev 1 41 3.218876 3.218876 422 +review 1 42 3.218876 3.218876 425 +might 1 41 3.218876 3.218876 426 +transact 4 39 3.258097 13.032388 438 +form 2 39 3.258097 6.516194 443 +realli 1 40 3.258097 3.258097 444 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +prototyp 2 38 3.295837 6.591674 463 +industri 1 38 3.295837 3.295837 464 +close 1 38 3.295837 3.295837 465 +mean 1 37 3.332205 3.332205 477 +hand 1 37 3.332205 3.332205 475 +survei 6 35 3.401197 20.407182 513 +singl 1 34 3.401197 3.401197 510 +michael 1 35 3.401197 3.401197 514 +concurr 1 34 3.401197 3.401197 501 +queri 3 33 3.433987 10.301961 524 +taught 2 33 3.433987 6.867974 526 +concept 4 32 3.465736 13.862944 537 +storag 1 31 3.496508 3.496508 553 +someth 1 31 3.496508 3.496508 554 +compon 5 30 3.555348 17.776740 570 +domain 1 30 3.555348 3.555348 564 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +produc 1 30 3.555348 3.555348 572 +turn 2 29 3.583519 7.167038 586 +depend 2 29 3.583519 7.167038 583 +semant 1 29 3.583519 3.583519 587 +propos 4 28 3.610918 14.443672 602 +becom 3 28 3.610918 10.832754 603 +intend 1 28 3.610918 3.610918 599 +actual 1 28 3.610918 3.610918 604 +progress 1 28 3.610918 3.610918 598 +manipul 1 27 3.637586 3.637586 624 +team 1 27 3.637586 3.637586 625 +altern 1 26 3.688879 3.688879 641 +background 4 25 3.737670 14.950680 664 +fundament 3 25 3.737670 11.213010 661 +although 1 25 3.737670 3.737670 667 +toward 1 25 3.737670 3.737670 668 +primari 1 25 3.737670 3.737670 669 +hill 1 25 3.737670 3.737670 670 +aspect 1 25 3.737670 3.737670 663 +alwai 1 24 3.761200 3.761200 691 +wish 1 24 3.761200 3.761200 692 +higher 1 24 3.761200 3.761200 690 +store 1 24 3.761200 3.761200 693 +begin 1 23 3.806662 3.806662 716 +initi 1 23 3.806662 3.806662 717 +lead 1 23 3.806662 3.806662 718 +tent 3 22 3.850148 11.550444 739 +disk 1 22 3.850148 3.850148 747 +recommend 1 22 3.850148 3.850148 737 +finish 1 22 3.850148 3.850148 748 +period 1 22 3.850148 3.850148 743 +among 1 21 3.912023 3.912023 781 +half 1 21 3.912023 3.912023 776 +prerequisit 1 19 4.007333 4.007333 846 +thur 1 19 4.007333 4.007333 847 +lower 2 18 4.060443 8.120886 886 +minim 1 18 4.060443 4.060443 887 +demo 1 18 4.060443 4.060443 888 +stat 2 17 4.110874 8.221748 924 +whether 2 17 4.110874 8.221748 918 +anyon 1 17 4.110874 4.110874 916 +choos 2 16 4.174387 8.348774 964 +ramakrishnan 1 16 4.174387 4.174387 972 +piec 2 15 4.248495 8.496990 1020 +contribut 1 15 4.248495 4.248495 1021 +hopefulli 2 14 4.317488 8.634976 1071 +convent 2 14 4.317488 8.634976 1072 +demand 1 14 4.317488 4.317488 1073 +manner 1 14 4.317488 4.317488 1074 +decid 1 14 4.317488 4.317488 1075 +dbm 6 13 4.382027 26.292162 1136 +essenti 1 13 4.382027 4.382027 1137 +signific 1 13 4.382027 4.382027 1125 +step 1 13 4.382027 4.382027 1138 +introduc 1 13 4.382027 4.382027 1139 +earlier 1 13 4.382027 4.382027 1140 +prelim 6 12 4.465908 26.795448 1201 +amount 5 12 4.465908 22.329540 1208 +grow 1 12 4.465908 4.465908 1209 +workload 1 12 4.465908 4.465908 1210 +buffer 1 12 4.465908 4.465908 1211 +raghu 1 12 4.465908 4.465908 1212 +sens 1 11 4.553877 4.553877 1305 +submiss 1 11 4.553877 4.553877 1298 +benjamin 1 11 4.553877 4.553877 1296 +modular 2 10 4.653960 9.307920 1392 +forc 1 10 4.653960 4.653960 1384 +certain 1 10 4.653960 4.653960 1393 +introductori 4 9 4.753590 19.014360 1479 +herefor 3 9 4.753590 14.260770 1483 +morgan 2 9 4.753590 9.507180 1484 +familiar 1 9 4.753590 4.753590 1485 +suitabl 1 9 4.753590 4.753590 1486 +rel 1 9 4.753590 4.753590 1487 +clear 1 9 4.753590 4.753590 1488 +recoveri 1 9 4.753590 4.753590 1474 +databasesystem 1 8 4.875197 4.875197 1617 +cum 1 8 4.875197 4.875197 1619 +awar 1 7 5.010635 5.010635 1800 +bug 1 7 5.010635 5.010635 1801 +fromth 1 7 5.010635 5.010635 1802 +seshadri 1 7 5.010635 5.010635 1803 +noon 1 7 5.010635 5.010635 1804 +consequ 2 6 5.164786 10.329572 1989 +neither 2 6 5.164786 10.329572 1990 +huge 1 6 5.164786 5.164786 1991 +ifyou 1 6 5.164786 5.164786 1992 +beta 1 6 5.164786 5.164786 1993 +silberschatz 1 6 5.164786 5.164786 1978 +lack 1 6 5.164786 5.164786 1994 +tobe 1 6 5.164786 5.164786 1995 +praveen 1 6 5.164786 5.164786 1996 +kaufmann 2 5 5.347108 10.694216 2254 +solid 1 5 5.347108 5.347108 2255 +valuabl 1 5 5.347108 5.347108 2256 +thrive 1 5 5.347108 5.347108 2257 +greater 1 5 5.347108 5.347108 2258 +fraction 1 5 5.347108 5.347108 2259 +interestedin 1 5 5.347108 5.347108 2260 +categori 1 5 5.347108 5.347108 2261 +mcgraw 1 5 5.347108 5.347108 2262 +minibas 6 4 5.568345 33.410070 2608 +surprisingli 1 4 5.568345 5.568345 2609 +behind 1 4 5.568345 5.568345 2610 +suppli 1 4 5.568345 5.568345 2611 +thiscours 1 4 5.568345 5.568345 2601 +asystem 1 4 5.568345 5.568345 2612 +enrol 1 4 5.568345 5.568345 2613 +twice 1 4 5.568345 5.568345 2614 +fold 1 4 5.568345 5.568345 2615 +thati 1 4 5.568345 5.568345 2616 +predat 5 3 5.857933 29.289665 3135 +comfort 2 3 5.857933 11.715866 3136 +giant 1 3 5.857933 5.857933 3137 +explos 1 3 5.857933 5.857933 3138 +alon 1 3 5.857933 5.857933 3139 +scratch 1 3 5.857933 5.857933 3140 +parser 1 3 5.857933 5.857933 3141 +aproject 1 3 5.857933 5.857933 3142 +bibl 1 3 5.857933 5.857933 3143 +confus 1 3 5.857933 5.857933 3144 +weitsang 2 2 6.263398 12.526796 4088 +databasemanag 1 2 6.263398 6.263398 4089 +certainli 1 2 6.263398 6.263398 4090 +proportion 1 2 6.263398 6.263398 4091 +thefirst 1 2 6.263398 6.263398 4092 +youto 1 2 6.263398 6.263398 4093 +builton 1 2 6.263398 6.263398 4094 +thehigh 1 2 6.263398 6.263398 4095 +korth 1 2 6.263398 6.263398 4051 +secondedit 1 2 6.263398 6.263398 4096 +ingr 1 2 6.263398 6.263398 4097 +elmasri 1 2 6.263398 6.263398 4059 +grai 1 2 6.263398 6.263398 4098 +reuter 1 2 6.263398 6.263398 4099 +likewis 1 2 6.263398 6.263398 4100 +confirm 1 2 6.263398 6.263398 4101 +outsidefirewal 2 1 6.957497 13.914994 6434 +stonebrak 2 1 6.957497 13.914994 6435 +samplequest 1 1 6.957497 6.957497 6436 +predatordbm 1 1 6.957497 6.957497 6437 +currentproject 1 1 6.957497 6.957497 6438 +coursedescript 1 1 6.957497 6.957497 6439 +intendedto 1 1 6.957497 6.957497 6440 +slargest 1 1 6.957497 6.957497 6441 +piecesof 1 1 6.957497 6.957497 6442 +knowledgeabledatabas 1 1 6.957497 6.957497 6443 +researchcommun 1 1 6.957497 6.957497 6444 +addressedbecaus 1 1 6.957497 6.957497 6445 +informedus 1 1 6.957497 6.957497 6446 +teller 1 1 6.957497 6.957497 6447 +newcours 1 1 6.957497 6.957497 6448 +quickreview 1 1 6.957497 6.957497 6449 +abreadth 1 1 6.957497 6.957497 6450 +advancedtop 1 1 6.957497 6.957497 6451 +thepurpos 1 1 6.957497 6.957497 6452 +coursei 1 1 6.957497 6.957497 6453 +weeksaft 1 1 6.957497 6.957497 6454 +requireread 1 1 6.957497 6.957497 6455 +engineeringlibrari 1 1 6.957497 6.957497 6456 +pursueaddit 1 1 6.957497 6.957497 6457 +forinform 1 1 6.957497 6.957497 6458 +examtim 1 1 6.957497 6.957497 6459 +developmentproject 1 1 6.957497 6.957497 6460 +involvea 1 1 6.957497 6.957497 6461 +wishto 1 1 6.957497 6.957497 6462 +willinvolv 1 1 6.957497 6.957497 6463 +andmodifi 1 1 6.957497 6.957497 6464 +andrar 1 1 6.957497 6.957497 6465 +luxuri 1 1 6.957497 6.957497 6466 +thediffer 1 1 6.957497 6.957497 6467 +inevit 1 1 6.957497 6.957497 6468 +varioussystem 1 1 6.957497 6.957497 6469 +buffermanag 1 1 6.957497 6.957497 6470 +enginethat 1 1 6.957497 6.957497 6471 +possibleproject 1 1 6.957497 6.957497 6472 +likecomplex 1 1 6.957497 6.957497 6473 +becauseth 1 1 6.957497 6.957497 6474 +betweenminibas 1 1 6.957497 6.957497 6475 +somegener 1 1 6.957497 6.957497 6476 +ideaon 1 1 6.957497 6.957497 6477 +advanceof 1 1 6.957497 6.957497 6478 +submitan 1 1 6.957497 6.957497 6479 +discussth 1 1 6.957497 6.957497 6480 +particularsystem 1 1 6.957497 6.957497 6481 +documentwil 1 1 6.957497 6.957497 6482 +picki 1 1 6.957497 6.957497 6483 +geton 1 1 6.957497 6.957497 6484 +oftest 1 1 6.957497 6.957497 6485 +coursetextbook 1 1 6.957497 6.957497 6486 +bookcontain 1 1 6.957497 6.957497 6487 +databasebook 1 1 6.957497 6.957497 6488 +thecampu 1 1 6.957497 6.957497 6489 +collectedand 1 1 6.957497 6.957497 6490 +postgr 1 1 6.957497 6.957497 6491 +andillustra 1 1 6.957497 6.957497 6492 +corearea 1 1 6.957497 6.957497 6493 +navath 1 1 6.957497 6.957497 6494 +tellsyou 1 1 6.957497 6.957497 6495 +wonderfulrefer 1 1 6.957497 6.957497 6496 +debuggingwith 1 1 6.957497 6.957497 6497 +gradingpolici 1 1 6.957497 6.957497 6498 +percentag 1 1 6.957497 6.957497 6499 +anextra 1 1 6.957497 6.957497 6500 +thefin 1 1 6.957497 6.957497 6501 +willfocu 1 1 6.957497 6.957497 6502 +coveredin 1 1 6.957497 6.957497 6503 +professorpraveen 1 1 6.957497 6.957497 6504 +teachingassist 1 1 6.957497 6.957497 6505 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html new file mode 100644 index 00000000..54323038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS611^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 11 374 0.693147 7.624617 7 +inform 4 412 0.693147 2.772588 8 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +cours 6 273 1.098612 6.591672 15 +student 5 343 1.098612 5.493060 19 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +languag 11 227 1.386294 15.249234 26 +cornel 3 215 1.386294 4.158882 23 +link 2 247 1.386294 2.772588 24 +design 1 213 1.386294 1.386294 25 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +implement 2 152 1.791759 3.583518 52 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +contact 1 153 1.791759 1.791759 59 +base 1 165 1.791759 1.791759 50 +lectur 2 135 1.945910 3.891820 73 +like 2 132 1.945910 3.891820 81 +relat 1 139 1.945910 1.945910 68 +studi 4 120 2.079442 8.317768 91 +tool 2 117 2.079442 4.158884 93 +document 2 121 2.079442 4.158884 89 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +specif 2 106 2.197225 4.394450 106 +well 2 109 2.197225 4.394450 121 +instructor 2 108 2.197225 4.394450 107 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +call 1 91 2.397895 2.397895 153 +environ 2 84 2.484907 4.969814 177 +learn 1 86 2.484907 2.484907 170 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +refer 2 78 2.564949 5.129898 203 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +upson 4 71 2.639057 10.556228 218 +logic 3 71 2.639057 7.917171 230 +line 1 75 2.639057 2.639057 231 +knowledg 2 67 2.708050 5.416100 243 +java 1 70 2.708050 2.708050 248 +goal 1 66 2.708050 2.708050 250 +descript 4 64 2.772589 11.090356 271 +experi 2 64 2.772589 5.545178 283 +function 2 62 2.772589 5.545178 275 +abstract 1 62 2.772589 2.772589 276 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +undergradu 2 54 2.944439 5.888878 338 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +tabl 1 51 2.995732 2.995732 346 +basic 2 50 3.044522 6.089044 360 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +understand 1 47 3.091042 3.091042 384 +describ 2 45 3.135494 6.270988 400 +better 1 45 3.135494 3.135494 401 +textbook 1 44 3.135494 3.135494 397 +math 1 44 3.135494 3.135494 402 +mark 1 44 3.135494 3.135494 403 +mechan 2 43 3.178054 6.356108 416 +edit 1 42 3.218876 3.218876 418 +programm 1 39 3.258097 3.258097 445 +theoret 1 39 3.258097 3.258097 446 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +formal 1 37 3.332205 3.332205 478 +survei 1 35 3.401197 3.401197 513 +tech 1 35 3.401197 3.401197 515 +least 1 35 3.401197 3.401197 516 +concept 1 32 3.465736 3.465736 537 +specifi 1 30 3.555348 3.555348 568 +semant 6 29 3.583519 21.501114 587 +turn 1 29 3.583519 3.583519 586 +though 1 27 3.637586 3.637586 622 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +concern 1 25 3.737670 3.737670 666 +demonstr 1 24 3.761200 3.761200 694 +greg 1 24 3.761200 3.761200 695 +compress 1 23 3.806662 3.806662 719 +lead 1 23 3.806662 3.806662 718 +proof 1 23 3.806662 3.806662 720 +properti 3 22 3.850148 11.550444 749 +programminglanguag 1 21 3.912023 3.912023 782 +newsgroup 1 21 3.912023 3.912023 783 +safeti 1 20 3.951244 3.951244 817 +scheme 1 20 3.951244 3.951244 818 +prove 2 19 4.007333 8.014666 848 +assum 2 19 4.007333 8.014666 845 +prerequisit 1 19 4.007333 4.007333 846 +andrew 1 19 4.007333 4.007333 849 +analyz 1 17 4.110874 4.110874 925 +side 2 15 4.248495 8.496990 1022 +precis 1 15 4.248495 4.248495 1023 +carl 1 15 4.248495 4.248495 1024 +conduct 1 14 4.317488 4.317488 1065 +directli 1 13 4.382027 4.382027 1141 +larri 1 13 4.382027 4.382027 1142 +recurs 1 13 4.382027 4.382027 1127 +emac 1 13 4.382027 4.382027 1143 +pascal 1 12 4.465908 4.465908 1213 +calculu 1 12 4.465908 4.465908 1203 +meng 1 12 4.465908 4.465908 1214 +broad 1 11 4.553877 4.553877 1302 +induct 1 11 4.553877 4.553877 1304 +linda 1 10 4.653960 4.653960 1394 +notat 5 9 4.753590 23.767950 1489 +entitl 1 9 4.753590 4.753590 1490 +prefer 1 9 4.753590 4.753590 1491 +suitabl 1 9 4.753590 4.753590 1486 +admin 1 9 4.753590 4.753590 1476 +mode 1 9 4.753590 4.753590 1492 +ideal 1 8 4.875197 4.875197 1630 +leon 1 8 4.875197 4.875197 1631 +cum 1 8 4.875197 4.875197 1619 +dispatch 1 7 5.010635 5.010635 1791 +henc 1 7 5.010635 5.010635 1805 +predic 1 7 5.010635 5.010635 1806 +ture 1 6 5.164786 5.164786 1997 +morrisett 1 5 5.347108 5.347108 2263 +gentl 1 5 5.347108 5.347108 2264 +witha 1 4 5.568345 5.568345 2617 +haskel 1 4 5.568345 5.568345 2618 +principlesof 1 3 5.857933 5.857933 3145 +deeper 1 3 5.857933 5.857933 3146 +denot 1 3 5.857933 5.857933 3147 +noteshomework 1 2 6.263398 6.263398 4102 +profici 1 2 6.263398 6.263398 4103 +andlog 1 2 6.263398 6.263398 4104 +competillo 1 2 6.263398 6.263398 4105 +lfar 1 2 6.263398 6.263398 4106 +erlingsson 1 2 6.263398 6.263398 4107 +indexdocument 1 2 6.263398 6.263398 4108 +toolsa 1 2 6.263398 6.263398 4109 +prerequisiteshandoutsscrib 1 1 6.957497 6.957497 6506 +assignmentscontact 1 1 6.957497 6.957497 6507 +informationrelev 1 1 6.957497 6.957497 6508 +goalof 1 1 6.957497 6.957497 6509 +multipleinherit 1 1 6.957497 6.957497 6510 +subsum 1 1 6.957497 6.957497 6511 +thestudi 1 1 6.957497 6.957497 6512 +abstractli 1 1 6.957497 6.957497 6513 +howprogram 1 1 6.957497 6.957497 6514 +asnot 1 1 6.957497 6.957497 6515 +preciser 1 1 6.957497 6.957497 6516 +forform 1 1 6.957497 6.957497 6517 +somethingabout 1 1 6.957497 6.957497 6518 +tomanipul 1 1 6.957497 6.957497 6519 +gunter 1 1 6.957497 6.957497 6520 +paulson 1 1 6.957497 6.957497 6521 +undergraduatemathemat 1 1 6.957497 6.957497 6522 +mathematicalmatur 1 1 6.957497 6.957497 6523 +anmeng 1 1 6.957497 6.957497 6524 +ifth 1 1 6.957497 6.957497 6525 +ulfar 1 1 6.957497 6.957497 6526 +pmrelev 1 1 6.957497 6.957497 6527 +comint 1 1 6.957497 6.957497 6528 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html new file mode 100644 index 00000000..be3ec3ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^Welcome.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cornel 2 215 1.386294 2.772588 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +schedul 1 119 2.079442 2.079442 85 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +info 1 85 2.484907 2.484907 176 +materi 1 75 2.639057 2.639057 221 +staff 1 36 3.367296 3.367296 490 +newsgroup 2 21 3.912023 7.824046 783 +rivl 1 8 4.875197 4.875197 1632 +systemscomput 1 3 5.857933 5.857933 3148 +janosi 1 3 5.857933 5.857933 3149 +pagecsmultimedia 1 2 6.263398 6.263398 4110 +anounc 1 2 6.263398 6.263398 4111 +bugcom 1 2 6.263398 6.263398 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html new file mode 100644 index 00000000..bd021f80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS631^home.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 2 215 1.386294 2.772588 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +question 1 91 2.397895 2.397895 141 +info 1 85 2.484907 2.484907 176 +materi 1 75 2.639057 2.639057 221 +staff 1 36 3.367296 3.367296 490 +newsgroup 2 21 3.912023 7.824046 783 +rivl 1 8 4.875197 4.875197 1632 +systemscomput 1 3 5.857933 5.857933 3148 +janosi 1 3 5.857933 5.857933 3149 +pagecsmultimedia 1 2 6.263398 6.263398 4110 +anounc 1 2 6.263398 6.263398 4111 +bugcom 1 2 6.263398 6.263398 4112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html new file mode 100644 index 00000000..ca5feefe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Current^CS681^CS681.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +last 9 314 1.098612 9.887508 14 +time 2 293 1.098612 2.197224 17 +cours 1 273 1.098612 1.098612 15 +design 2 213 1.386294 2.772588 25 +modifi 9 178 1.609438 14.484942 35 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +algorithm 2 162 1.791759 3.583518 57 +note 2 142 1.945910 3.891820 67 +lectur 1 135 1.945910 1.945910 73 +analysi 2 124 2.079442 4.158884 98 +instructor 1 108 2.197225 2.197225 107 +make 1 111 2.197225 2.197225 120 +text 2 98 2.302585 4.605170 133 +homepag 1 93 2.397895 2.397895 148 +solut 8 82 2.484907 19.879256 162 +exam 1 86 2.484907 2.484907 169 +homework 10 79 2.564949 25.649490 193 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +copi 1 63 2.772589 2.772589 284 +locat 1 59 2.833213 2.833213 303 +talk 1 53 2.944439 2.944439 336 +announc 2 40 3.258097 6.516194 441 +approxim 1 35 3.401197 3.401197 509 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +sheet 1 16 4.174387 4.174387 973 +cannot 1 13 4.382027 4.382027 1144 +cheat 1 10 4.653960 4.653960 1395 +evan 2 8 4.875197 9.750394 1633 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +kozen 2 4 5.568345 11.136690 2619 +addendum 3 3 5.857933 17.573799 3150 +moran 1 3 5.857933 5.857933 3151 +rajeev 1 3 5.857933 5.857933 3152 +inupson 1 1 6.957497 6.957497 6529 +tome 1 1 6.957497 6.957497 6530 +reschedul 1 1 6.957497 6.957497 6531 +motwani 1 1 6.957497 6.957497 6532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ new file mode 100644 index 00000000..58b5bffd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-94^CS617^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 9 443 0.693147 6.238323 6 +program 4 374 0.693147 2.772588 7 +cours 6 273 1.098612 6.591672 15 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +languag 3 227 1.386294 4.158882 26 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +oper 2 180 1.609438 3.218876 34 +parallel 9 169 1.791759 16.125831 60 +algorithm 2 162 1.791759 3.583518 57 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +architectur 3 139 1.945910 5.837730 77 +support 3 132 1.945910 5.837730 83 +perform 2 143 1.945910 3.891820 74 +first 2 140 1.945910 3.891820 71 +model 2 145 1.945910 3.891820 69 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +machin 7 129 2.079442 14.556094 95 +number 2 130 2.079442 4.158884 97 +studi 2 120 2.079442 4.158884 91 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +topic 2 114 2.197225 4.394450 110 +look 1 107 2.197225 2.197225 115 +specif 1 106 2.197225 2.197225 106 +memori 2 101 2.302585 4.605170 139 +part 2 98 2.302585 4.605170 129 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +level 2 87 2.484907 4.969814 180 +larg 2 82 2.484907 4.969814 168 +requir 2 81 2.484907 4.969814 167 +second 1 81 2.484907 2.484907 166 +issu 2 78 2.564949 5.129898 211 +complet 1 77 2.564949 2.564949 208 +upson 1 71 2.639057 2.639057 218 +line 1 75 2.639057 2.639057 231 +integr 1 67 2.708050 2.708050 245 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +locat 1 59 2.833213 2.833213 303 +share 1 59 2.833213 2.833213 304 +point 1 58 2.890372 2.890372 319 +processor 2 54 2.944439 5.888878 335 +allow 1 53 2.944439 2.944439 333 +much 1 52 2.995732 2.995732 349 +week 1 52 2.995732 2.995732 343 +hardwar 1 51 2.995732 2.995732 350 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +execut 1 45 3.135494 3.135494 404 +offer 2 43 3.178054 6.356108 414 +term 1 43 3.178054 3.178054 411 +howev 1 41 3.218876 3.218876 422 +futur 1 41 3.218876 3.218876 427 +past 1 42 3.218876 3.218876 428 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +workstat 2 37 3.332205 6.664410 479 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +product 1 33 3.433987 3.433987 527 +taken 1 31 3.496508 3.496508 555 +focu 2 30 3.555348 7.110696 571 +power 1 30 3.555348 3.555348 573 +multiprocessor 1 28 3.610918 3.610918 605 +altern 1 26 3.688879 3.688879 641 +although 1 25 3.737670 3.737670 667 +aspect 1 25 3.737670 3.737670 663 +almost 1 22 3.850148 3.850148 742 +sequenti 1 22 3.850148 3.850148 745 +fact 1 21 3.912023 3.912023 780 +busi 1 21 3.912023 3.912023 784 +latest 1 21 3.912023 3.912023 785 +portabl 1 20 3.951244 3.951244 819 +spend 1 19 4.007333 4.007333 850 +layer 1 17 4.110874 4.110874 926 +across 1 16 4.174387 4.174387 974 +month 1 15 4.248495 4.248495 1025 +massiv 1 15 4.248495 4.248495 1026 +consider 2 14 4.317488 8.634976 1076 +easili 1 14 4.317488 4.317488 1077 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +cannot 1 13 4.382027 4.382027 1144 +stai 1 12 4.465908 4.465908 1215 +workload 1 12 4.465908 4.465908 1210 +grant 1 12 4.465908 4.465908 1216 +host 1 11 4.553877 4.553877 1306 +debugg 1 9 4.753590 4.753590 1493 +manufactur 2 8 4.875197 9.750394 1634 +competit 1 8 4.875197 4.875197 1635 +depth 1 8 4.875197 4.875197 1636 +contrast 1 8 4.875197 4.875197 1637 +smile 1 7 5.010635 5.010635 1807 +microprocessor 1 7 5.010635 5.010635 1808 +core 1 7 5.010635 5.010635 1809 +usabl 1 7 5.010635 5.010635 1810 +price 2 6 5.164786 10.329572 1999 +quickli 1 6 5.164786 5.164786 2000 +vari 1 6 5.164786 5.164786 2001 +feder 1 5 5.347108 5.347108 2266 +eas 1 5 5.347108 5.347108 2267 +suffer 1 5 5.347108 5.347108 2268 +matur 1 5 5.347108 5.347108 2269 +vertic 1 5 5.347108 5.347108 2270 +focuss 1 5 5.347108 5.347108 2271 +cut 1 4 5.568345 5.568345 2620 +shelf 1 4 5.568345 5.568345 2621 +slice 1 4 5.568345 5.568345 2622 +eickenfal 1 3 5.857933 5.857933 3125 +leverag 1 3 5.857933 5.857933 3153 +heat 1 2 6.263398 6.263398 4113 +glorifi 1 2 6.263398 6.263398 4114 +farm 1 2 6.263398 6.263398 4115 +adequ 1 2 6.263398 6.263398 4116 +horizont 1 2 6.263398 6.263398 4117 +debat 2 1 6.957497 13.914994 6533 +pagefronti 1 1 6.957497 6.957497 6534 +pmoffic 1 1 6.957497 6.957497 6535 +pmcours 1 1 6.957497 6.957497 6536 +descriptionparallel 1 1 6.957497 6.957497 6537 +underscor 1 1 6.957497 6.957497 6538 +erad 1 1 6.957497 6.957497 6539 +competitor 1 1 6.957497 6.957497 6540 +dash 1 1 6.957497 6.957497 6541 +materialscours 1 1 6.957497 6.957497 6542 +formatlectur 1 1 6.957497 6.957497 6543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ new file mode 100644 index 00000000..4901ae44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS314^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 4 273 1.098612 4.394448 15 +us 4 329 1.098612 4.394448 16 +languag 2 227 1.386294 2.772588 26 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +lectur 3 135 1.945910 5.837730 73 +note 2 142 1.945910 3.891820 67 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +assign 1 135 1.945910 1.945910 66 +like 1 132 1.945910 1.945910 81 +document 2 121 2.079442 4.158884 89 +introduct 2 126 2.079442 4.158884 87 +machin 1 129 2.079442 2.079442 95 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +question 2 91 2.397895 4.795790 141 +search 1 95 2.397895 2.397895 155 +start 2 83 2.484907 4.969814 173 +control 1 82 2.484907 2.484907 164 +help 1 83 2.484907 2.484907 175 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +exampl 2 77 2.564949 5.129898 195 +homework 1 79 2.564949 2.564949 193 +know 1 80 2.564949 2.564949 198 +refer 1 78 2.564949 2.564949 203 +come 1 78 2.564949 2.564949 202 +materi 2 75 2.639057 5.278114 221 +onlin 2 75 2.639057 5.278114 223 +david 1 71 2.639057 2.639057 232 +organ 2 65 2.772589 5.545178 265 +guid 1 63 2.772589 2.772589 267 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +sever 1 56 2.890372 2.890372 322 +allow 2 53 2.944439 5.888878 333 +processor 1 54 2.944439 2.944439 335 +talk 1 53 2.944439 2.944439 336 +digit 1 52 2.995732 2.995732 348 +case 1 51 2.995732 2.995732 351 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +get 1 46 3.091042 3.091042 380 +video 1 44 3.135494 3.135494 405 +tutori 3 39 3.258097 9.774291 437 +small 1 39 3.258097 3.258097 447 +staff 1 36 3.367296 3.367296 490 +procedur 1 36 3.367296 3.367296 488 +common 1 30 3.555348 3.555348 574 +ask 1 28 3.610918 3.610918 597 +great 1 27 3.637586 3.637586 626 +never 1 25 3.737670 3.737670 671 +todai 1 25 3.737670 3.737670 672 +consult 1 24 3.761200 3.761200 687 +wish 1 24 3.761200 3.761200 692 +hierarchi 1 22 3.850148 3.850148 744 +annot 3 21 3.912023 11.736069 775 +path 1 21 3.912023 3.912023 778 +unit 1 21 3.912023 3.912023 779 +newsgroup 1 21 3.912023 3.912023 783 +lot 1 18 4.060443 4.060443 889 +otherwis 1 17 4.110874 4.110874 922 +fortran 1 15 4.248495 4.248495 1027 +thorsten 2 13 4.382027 8.764054 1133 +assembl 1 12 4.465908 4.465908 1207 +pascal 1 12 4.465908 4.465908 1213 +surf 1 11 4.553877 4.553877 1301 +combinatori 1 8 4.875197 4.875197 1629 +brain 1 8 4.875197 4.875197 1638 +interrupt 1 7 5.010635 5.010635 1793 +conot 2 5 5.347108 10.694216 2245 +eickenfal 1 3 5.857933 5.857933 3125 +kimbal 1 3 5.857933 5.857933 3114 +helpif 1 3 5.857933 5.857933 3126 +aproject 1 3 5.857933 5.857933 3142 +mate 1 3 5.857933 5.857933 3127 +cardiff 1 3 5.857933 5.857933 3154 +programsand 1 3 5.857933 5.857933 3111 +marshal 2 2 6.263398 12.526796 4118 +btopic 1 2 6.263398 6.263398 4085 +representationof 1 2 6.263398 6.263398 4119 +toon 1 2 6.263398 6.263398 4120 +pageintroduct 1 1 6.957497 6.957497 6544 +sequentialcircuit 1 1 6.957497 6.957497 6545 +andmicroprogram 1 1 6.957497 6.957497 6546 +theappropri 1 1 6.957497 6.957497 6547 +gethelp 1 1 6.957497 6.957497 6548 +informationcoursemateri 1 1 6.957497 6.957497 6549 +announcementsannounc 1 1 6.957497 6.957497 6550 +onlinean 1 1 6.957497 6.957497 6551 +forpeopl 1 1 6.957497 6.957497 6552 +cclass 1 1 6.957497 6.957497 6553 +learnc 1 1 6.957497 6.957497 6554 +theyahoo 1 1 6.957497 6.957497 6555 +ofmor 1 1 6.957497 6.957497 6556 +inansw 1 1 6.957497 6.957497 6557 +voneicken 1 1 6.957497 6.957497 6558 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html new file mode 100644 index 00000000..6576ccd1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS472^cs472.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +cours 3 273 1.098612 3.295836 15 +mail 2 238 1.386294 2.772588 22 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +welcom 1 122 2.079442 2.079442 99 +final 2 116 2.197225 4.394450 108 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +grade 1 90 2.397895 2.397895 142 +academ 1 82 2.484907 2.484907 178 +exam 1 86 2.484907 2.484907 169 +upson 2 71 2.639057 5.278114 218 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +date 1 51 2.995732 2.995732 344 +announc 1 40 3.258097 3.258097 441 +request 1 26 3.688879 3.688879 635 +altern 1 26 3.688879 3.688879 641 +sometim 1 24 3.761200 3.761200 696 +saturdai 1 7 5.010635 5.010635 1794 +clair 2 4 5.568345 11.136690 2605 +pagesc 1 3 5.857933 5.857933 3133 +pagecsfound 1 2 6.263398 6.263398 4086 +yourgrad 1 2 6.263398 6.263398 4121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html new file mode 100644 index 00000000..b68b6556 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Fall-95^CS501^CS401-501.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +cours 4 273 1.098612 4.394448 15 +offic 4 299 1.098612 4.394448 13 +engin 1 297 1.098612 1.098612 20 +cornel 5 215 1.386294 6.931470 23 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +hour 4 165 1.791759 7.167036 46 +note 2 142 1.945910 3.891820 67 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +technolog 1 131 2.079442 2.079442 102 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +upson 3 71 2.639057 7.917171 218 +materi 1 75 2.639057 2.639057 221 +line 1 75 2.639057 2.639057 231 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +overview 2 56 2.890372 5.780744 323 +frequent 1 49 3.044522 3.044522 367 +staff 1 36 3.367296 3.367296 490 +ask 1 28 3.610918 3.610918 597 +edulast 1 17 4.110874 4.110874 927 +pagec 1 15 4.248495 4.248495 1011 +borland 1 14 4.317488 4.317488 1067 +recit 1 9 4.753590 4.753590 1475 +vineet 1 8 4.875197 4.875197 1639 +none 1 7 5.010635 5.010635 1811 +buch 2 5 5.347108 10.694216 2272 +samuel 2 3 5.857933 11.715866 3155 +weber 2 3 5.857933 11.715866 3156 +yaron 1 2 6.263398 6.263398 4122 +minski 1 2 6.263398 6.263398 4123 +remark 1 2 6.263398 6.263398 4124 +techniquescomput 1 1 6.957497 6.957497 6559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html new file mode 100644 index 00000000..292f5bbd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-95^CS314^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +project 3 340 1.098612 3.295836 18 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +assign 5 135 1.945910 9.729550 66 +lectur 1 135 1.945910 1.945910 73 +introduct 2 126 2.079442 4.158884 87 +postscript 1 131 2.079442 2.079442 90 +homework 1 79 2.564949 2.564949 193 +logic 1 71 2.639057 2.639057 230 +guid 1 63 2.772589 2.772589 267 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +tabl 1 51 2.995732 2.995732 346 +brian 1 38 3.295837 3.295837 466 +slide 1 38 3.295837 3.295837 467 +procedur 1 36 3.367296 3.367296 488 +smith 1 20 3.951244 3.951244 820 +recurs 1 13 4.382027 4.382027 1127 +assembl 1 12 4.465908 4.465908 1207 +tour 1 11 4.553877 4.553877 1307 +stack 1 10 4.653960 4.653960 1389 +spec 1 8 4.875197 4.875197 1640 +interrupt 1 7 5.010635 5.010635 1793 +linker 1 3 5.857933 5.857933 3157 +loader 1 1 6.957497 6.957497 6560 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html new file mode 100644 index 00000000..483a8819 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS100^CS100.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +program 8 374 0.693147 5.545176 7 +inform 2 412 0.693147 1.386294 8 +cours 3 273 1.098612 3.295836 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +spring 3 131 2.079442 6.238326 88 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +exam 2 86 2.484907 4.969814 169 +wide 1 84 2.484907 2.484907 185 +april 4 77 2.564949 10.259796 196 +messag 1 76 2.564949 2.564949 212 +tuesdai 1 73 2.639057 2.639057 219 +materi 1 75 2.639057 2.639057 221 +import 1 65 2.772589 2.772589 282 +march 1 61 2.833213 2.833213 295 +februari 1 54 2.944439 2.944439 328 +get 1 46 3.091042 3.091042 380 +review 1 42 3.218876 3.218876 425 +held 1 28 3.610918 3.610918 600 +session 1 26 3.688879 3.688879 643 +thur 1 19 4.007333 4.007333 847 +prelim 4 12 4.465908 17.863632 1201 +rememb 1 12 4.465908 4.465908 1217 +tue 2 11 4.553877 9.107754 1308 +regard 1 11 4.553877 4.553877 1309 +baker 1 7 5.010635 5.010635 1812 +pierc 1 4 5.568345 5.568345 2623 +theworld 1 3 5.857933 5.857933 3158 +codewarrior 1 2 6.263398 6.263398 4125 +frequentlyfor 1 1 6.957497 6.957497 6561 +onsundai 1 1 6.957497 6.957497 6562 +personalmac 1 1 6.957497 6.957497 6563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html new file mode 100644 index 00000000..f0e85a43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS211^CS211.html @@ -0,0 +1,261 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 13 374 0.693147 9.010911 7 +depart 3 457 0.693147 2.079441 12 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +offic 7 299 1.098612 7.690284 13 +cours 5 273 1.098612 5.493060 15 +last 2 314 1.098612 2.197224 14 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +link 9 247 1.386294 12.476646 24 +cornel 8 215 1.386294 11.090352 23 +email 5 220 1.386294 6.931470 29 +languag 2 227 1.386294 2.772588 26 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +class 22 199 1.609438 35.407636 37 +list 8 201 1.609438 12.875504 39 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +hour 7 165 1.791759 12.542313 46 +algorithm 6 162 1.791759 10.750554 57 +avail 5 169 1.791759 8.958795 48 +data 4 170 1.791759 7.167036 49 +applic 3 170 1.791759 5.375277 56 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +lectur 87 135 1.945910 169.294170 73 +assign 10 135 1.945910 19.459100 66 +file 4 132 1.945910 7.783640 70 +note 3 142 1.945910 5.837730 67 +architectur 3 139 1.945910 5.837730 77 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +spring 5 131 2.079442 10.397210 88 +introduct 4 126 2.079442 8.317768 87 +analysi 4 124 2.079442 8.317768 98 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +structur 5 106 2.197225 10.986125 105 +final 4 116 2.197225 8.788900 108 +topic 3 114 2.197225 6.591675 110 +code 2 108 2.197225 4.394450 116 +find 2 111 2.197225 4.394450 111 +site 2 106 2.197225 4.394450 119 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +text 4 98 2.302585 9.210340 133 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +section 2 94 2.397895 4.795790 149 +question 1 91 2.397895 2.397895 141 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +exam 2 86 2.484907 4.969814 169 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +server 5 76 2.564949 12.824745 204 +dynam 3 76 2.564949 7.694847 194 +mondai 2 77 2.564949 5.129898 206 +april 1 77 2.564949 2.564949 196 +orient 1 80 2.564949 2.564949 205 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +sourc 1 77 2.564949 2.564949 201 +upson 15 71 2.639057 39.585855 218 +david 4 71 2.639057 10.556228 232 +tuesdai 2 73 2.639057 5.278114 219 +name 1 72 2.639057 2.639057 220 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +thursdai 3 70 2.708050 8.124150 241 +java 3 70 2.708050 8.124150 248 +view 2 70 2.708050 5.416100 254 +window 1 68 2.708050 2.708050 242 +handout 9 64 2.772589 24.953301 263 +function 3 62 2.772589 8.317767 275 +descript 2 64 2.772589 5.545178 271 +abstract 1 62 2.772589 2.772589 276 +complex 1 64 2.772589 2.772589 269 +type 3 61 2.833213 8.499639 296 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +simpl 1 60 2.833213 2.833213 298 +point 3 58 2.890372 8.671116 319 +summer 1 56 2.890372 2.890372 311 +unix 1 58 2.890372 2.890372 308 +think 1 57 2.890372 2.890372 314 +cover 2 55 2.944439 5.888878 329 +sampl 2 53 2.944439 5.888878 339 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +date 3 51 2.995732 8.987196 344 +tabl 1 51 2.995732 2.995732 346 +format 3 48 3.044522 9.133566 356 +basic 3 50 3.044522 9.133566 360 +pointer 3 48 3.044522 9.133566 368 +appoint 1 49 3.044522 3.044522 358 +set 1 50 3.044522 3.044522 361 +effect 1 46 3.091042 3.091042 385 +netscap 1 44 3.135494 3.135494 395 +fridai 1 44 3.135494 3.135494 390 +http 1 41 3.218876 3.218876 420 +correct 8 38 3.295837 26.366696 462 +industri 3 38 3.295837 9.887511 464 +credit 2 38 3.295837 6.591674 460 +microsoft 2 38 3.295837 6.591674 468 +tree 3 36 3.367296 10.101888 492 +staff 2 36 3.367296 6.734592 490 +soon 1 36 3.367296 3.367296 494 +word 2 34 3.401197 6.802394 508 +next 1 34 3.401197 3.401197 517 +print 1 34 3.401197 3.401197 503 +manual 1 35 3.401197 3.401197 504 +given 1 32 3.465736 3.465736 538 +storag 3 31 3.496508 10.489524 553 +framework 3 28 3.610918 10.832754 606 +held 1 28 3.610918 3.610918 600 +arrai 3 27 3.637586 10.912758 627 +session 1 26 3.688879 3.688879 643 +enhanc 1 26 3.688879 3.688879 644 +jeff 5 25 3.737670 18.688350 673 +consult 4 24 3.761200 15.044800 687 +other 1 24 3.761200 3.761200 697 +lab 1 24 3.761200 3.761200 698 +thank 2 23 3.806662 7.613324 721 +proof 1 23 3.806662 3.806662 720 +almost 1 22 3.850148 3.850148 742 +inth 1 22 3.850148 3.850148 741 +alloc 3 20 3.951244 11.853732 821 +fine 3 20 3.951244 11.853732 822 +binari 3 20 3.951244 11.853732 823 +prove 3 19 4.007333 12.021999 848 +prerequisit 1 19 4.007333 4.007333 846 +expand 2 17 4.110874 8.221748 928 +regular 1 17 4.110874 4.110874 929 +macintosh 1 17 4.110874 4.110874 920 +intro 1 17 4.110874 4.110874 915 +condit 3 16 4.174387 12.523161 975 +jose 2 16 4.174387 8.348774 976 +charact 1 15 4.248495 4.248495 1028 +piec 1 15 4.248495 4.248495 1020 +recurs 4 13 4.382027 17.528108 1127 +deriv 3 13 4.382027 13.146081 1145 +alan 1 13 4.382027 4.382027 1146 +prelim 3 12 4.465908 13.397724 1201 +iter 3 12 4.465908 13.397724 1206 +grant 1 12 4.465908 4.465908 1216 +onth 1 12 4.465908 4.465908 1218 +loop 6 11 4.553877 27.323262 1310 +chri 1 11 4.553877 4.553877 1311 +rich 2 10 4.653960 9.307920 1396 +princip 1 10 4.653960 4.653960 1397 +sundai 1 10 4.653960 4.653960 1387 +preliminari 4 9 4.753590 19.014360 1480 +notat 3 9 4.753590 14.260770 1489 +strength 3 9 4.753590 14.260770 1494 +plain 2 9 4.753590 9.507180 1495 +equival 1 9 4.753590 4.753590 1496 +intermedi 1 9 4.753590 4.753590 1497 +filter 3 8 4.875197 14.625591 1641 +printer 1 8 4.875197 4.875197 1621 +dictionari 1 8 4.875197 4.875197 1642 +none 5 7 5.010635 25.053175 1811 +noon 1 7 5.010635 5.010635 1804 +troubl 1 6 5.164786 5.164786 2002 +yale 1 6 5.164786 5.164786 2003 +contest 1 5 5.347108 5.347108 2273 +cell 1 5 5.347108 5.347108 2274 +ofprogram 1 4 5.568345 5.568345 2624 +flavor 1 4 5.568345 5.568345 2625 +haskel 1 4 5.568345 5.568345 2618 +cuinfo 1 4 5.568345 5.568345 2626 +foster 4 3 5.857933 23.431732 3159 +tripl 3 3 5.857933 17.573799 3160 +walker 2 3 5.857933 11.715866 3161 +tocomput 1 3 5.857933 5.857933 3162 +ghostview 1 3 5.857933 5.857933 3163 +maker 1 3 5.857933 5.857933 3164 +kwan 2 2 6.263398 12.526796 4126 +stuffit 2 2 6.263398 12.526796 4127 +codewarrior 2 2 6.263398 12.526796 4125 +thesecond 1 2 6.263398 6.263398 4128 +datatyp 1 2 6.263398 6.263398 4129 +csdepart 1 2 6.263398 6.263398 4130 +metrowerk 1 2 6.263398 6.263398 4131 +gofer 10 1 6.957497 69.574970 6564 +macgof 4 1 6.957497 27.829988 6565 +jfoster 3 1 6.957497 20.872491 6566 +hollist 3 1 6.957497 20.872491 6567 +binhqx 3 1 6.957497 20.872491 6568 +dynamicdata 3 1 6.957497 20.872491 6569 +curri 3 1 6.957497 20.872491 6570 +olin 2 1 6.957497 13.914994 6571 +ahal 2 1 6.957497 13.914994 6572 +walkerwednesdai 2 1 6.957497 13.914994 6573 +kaykylesteveericvasantha 2 1 6.957497 13.914994 6574 +danerickaychrisdan 2 1 6.957497 13.914994 6575 +earlyvers 1 1 6.957497 6.957497 6576 +announcetim 1 1 6.957497 6.957497 6577 +theprelim 1 1 6.957497 6.957497 6578 +wereannounc 1 1 6.957497 6.957497 6579 +lastnam 1 1 6.957497 6.957497 6580 +covereveryth 1 1 6.957497 6.957497 6581 +topicsconv 1 1 6.957497 6.957497 6582 +daywhenwherewhomondai 1 1 6.957497 6.957497 6583 +davetuesdai 1 1 6.957497 6.957497 6584 +jeffwednesdai 1 1 6.957497 6.957497 6585 +davethursdai 1 1 6.957497 6.957497 6586 +halfridai 1 1 6.957497 6.957497 6587 +halsaturdai 1 1 6.957497 6.957497 6588 +breview 1 1 6.957497 6.957497 6589 +chrisand 1 1 6.957497 6.957497 6590 +engrd 1 1 6.957497 6.957497 6591 +bothcom 1 1 6.957497 6.957497 6592 +programmingexperi 1 1 6.957497 6.957497 6593 +ofalgorithm 1 1 6.957497 6.957497 6594 +perkin 1 1 6.957497 6.957497 6595 +sectionsdaytimeroominstructortuesdai 1 1 6.957497 6.957497 6596 +perkinstuesdai 1 1 6.957497 6.957497 6597 +perkinswednesdai 1 1 6.957497 6.957497 6598 +walkerthursdai 1 1 6.957497 6.957497 6599 +fosterfridai 1 1 6.957497 6.957497 6600 +ofclass 1 1 6.957497 6.957497 6601 +consultingsundaymondaytuesdaywednesdaythursdayfridai 1 1 6.957497 6.957497 6602 +steveerickylechrisjpkyl 1 1 6.957497 6.957497 6603 +steveerickylechrisjpvasantha 1 1 6.957497 6.957497 6604 +josejosekayjosejpvasantha 1 1 6.957497 6.957497 6605 +josejosekayjosejp 1 1 6.957497 6.957497 6606 +macbinari 1 1 6.957497 6.957497 6607 +parseabl 1 1 6.957497 6.957497 6608 +waspost 1 1 6.957497 6.957497 6609 +foraladdin 1 1 6.957497 6.957497 6610 +armandonunez 1 1 6.957497 6.957497 6611 +anylas 1 1 6.957497 6.957497 6612 +applicationlik 1 1 6.957497 6.957497 6613 +ishaskel 1 1 6.957497 6.957497 6614 +systemsz 1 1 6.957497 6.957497 6615 +ofgof 1 1 6.957497 6.957497 6616 +itavail 1 1 6.957497 6.957497 6617 +enhance_assign 1 1 6.957497 6.957497 6618 +aladdin 1 1 6.957497 6.957497 6619 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html new file mode 100644 index 00000000..33672368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS212^CS212.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 3 273 1.098612 3.295836 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +section 1 94 2.397895 2.397895 149 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +materi 1 75 2.639057 2.639057 221 +room 1 59 2.833213 2.833213 301 +extens 1 53 2.944439 2.944439 340 +date 1 51 2.995732 2.995732 344 +announc 2 40 3.258097 6.516194 441 +staff 1 36 3.367296 3.367296 490 +demo 1 18 4.060443 4.060443 888 +pagec 1 15 4.248495 4.248495 1011 +happi 1 14 4.317488 4.317488 1079 +emac 1 13 4.382027 4.382027 1143 +prelim 1 12 4.465908 4.465908 1201 +departmentcornel 1 5 5.347108 5.347108 2275 +grader 1 3 5.857933 5.857933 3165 +universityspr 1 2 6.263398 6.263398 4055 +interpretationof 1 1 6.957497 6.957497 6620 +programscomput 1 1 6.957497 6.957497 6621 +macmarlai 1 1 6.957497 6.957497 6622 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html new file mode 100644 index 00000000..bea5bf94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS512^home.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +offic 4 299 1.098612 4.394448 13 +project 1 340 1.098612 1.098612 18 +languag 6 227 1.386294 8.317764 26 +email 3 220 1.386294 4.158882 29 +link 2 247 1.386294 2.772588 24 +cornel 2 215 1.386294 2.772588 23 +class 2 199 1.609438 3.218876 37 +implement 3 152 1.791759 5.375277 52 +phone 2 175 1.791759 3.583518 45 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +object 2 138 1.945910 3.891820 79 +construct 1 139 1.945910 1.945910 82 +document 2 121 2.079442 4.158884 89 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +environ 2 84 2.484907 4.969814 177 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +refer 2 78 2.564949 5.129898 203 +orient 1 80 2.564949 2.564949 205 +upson 3 71 2.639057 7.917171 218 +effici 1 73 2.639057 2.639057 233 +tuesdai 1 73 2.639057 2.639057 219 +line 1 75 2.639057 2.639057 231 +java 1 70 2.708050 2.708050 248 +thursdai 1 70 2.708050 2.708050 241 +descript 3 64 2.772589 8.317767 271 +handout 2 64 2.772589 5.545178 263 +abstract 1 62 2.772589 2.772589 276 +collect 1 65 2.772589 2.772589 268 +function 1 62 2.772589 2.772589 275 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +featur 2 46 3.091042 6.182084 386 +mark 1 44 3.135494 3.135494 403 +continu 1 39 3.258097 3.258097 448 +survei 1 35 3.401197 3.401197 513 +kind 1 32 3.465736 3.465736 541 +focu 1 30 3.555348 3.555348 571 +synchron 1 29 3.583519 3.583519 588 +except 1 28 3.610918 3.610918 607 +greg 1 24 3.761200 3.761200 695 +thread 1 23 3.806662 3.806662 722 +separ 1 19 4.007333 4.007333 844 +andrew 1 19 4.007333 4.007333 849 +modern 2 16 4.174387 8.348774 966 +linda 1 10 4.653960 4.653960 1394 +admin 1 9 4.753590 4.753590 1476 +evan 2 8 4.875197 9.750394 1633 +dylan 1 8 4.875197 4.875197 1625 +closur 1 8 4.875197 4.875197 1643 +leon 1 8 4.875197 4.875197 1631 +cum 1 8 4.875197 4.875197 1619 +implementationof 1 7 5.010635 5.010635 1813 +garbag 1 6 5.164786 5.164786 1986 +notabl 1 5 5.347108 5.347108 2276 +morrisett 1 5 5.347108 5.347108 2263 +gentl 1 5 5.347108 5.347108 2264 +haskel 1 4 5.568345 5.568345 2618 +polymorph 1 4 5.568345 5.568345 2627 +administrivia 1 3 5.857933 5.857933 3166 +moran 1 3 5.857933 5.857933 3151 +competillo 1 2 6.263398 6.263398 4105 +indexdocument 1 2 6.263398 6.263398 4108 +toolsa 1 2 6.263398 6.263398 4109 +descriptionhandoutsadministriviaweb 1 1 6.957497 6.957497 6623 +ofmodern 1 1 6.957497 6.957497 6624 +connectionsto 1 1 6.957497 6.957497 6625 +pmweb 1 1 6.957497 6.957497 6626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html new file mode 100644 index 00000000..7444d40f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS514^index.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +distribut 2 162 1.791759 3.583518 51 +read 1 154 1.791759 1.791759 47 +postscript 3 131 2.079442 6.238326 90 +final 1 116 2.197225 2.197225 108 +homework 2 79 2.564949 5.129898 193 +practic 1 70 2.708050 2.708050 246 +overview 1 56 2.890372 2.890372 323 +examin 2 42 3.218876 6.437752 424 +bibliographi 1 34 3.401197 3.401197 518 +annot 2 21 3.912023 7.824046 775 +prepar 1 20 3.951244 3.951244 824 +necessari 1 13 4.382027 4.382027 1147 +registr 1 5 5.347108 5.347108 2249 +informationcours 1 3 5.857933 5.857933 3167 +systemspract 1 1 6.957497 6.957497 6627 +takingc 1 1 6.957497 6.957497 6628 +logist 1 1 6.957497 6.957497 6629 +homeworkshomework 1 1 6.957497 6.957497 6630 +amexaminationsmidterm 1 1 6.957497 6.957497 6631 +bibliographiesselect 1 1 6.957497 6.957497 6632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ new file mode 100644 index 00000000..163f63e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS516^ @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +work 2 380 0.693147 1.386294 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +problem 2 147 1.945910 3.891820 75 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +report 2 131 2.079442 4.158884 92 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +pleas 3 113 2.197225 6.591675 114 +well 3 109 2.197225 6.591675 121 +final 3 116 2.197225 6.591675 108 +find 2 111 2.197225 4.394450 111 +check 2 115 2.197225 4.394450 118 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +present 2 91 2.397895 4.795790 145 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +start 2 83 2.484907 4.969814 173 +solut 1 82 2.484907 2.484907 162 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +homework 3 79 2.564949 7.694847 193 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +know 1 80 2.564949 2.564949 198 +upson 3 71 2.639057 7.917171 218 +solv 1 73 2.639057 2.639057 234 +order 1 69 2.708050 2.708050 249 +import 2 65 2.772589 5.545178 282 +result 1 65 2.772589 2.772589 281 +think 1 57 2.890372 2.890372 314 +instruct 3 53 2.944439 8.833317 332 +sampl 1 53 2.944439 2.944439 339 +give 1 50 3.044522 3.044522 359 +discuss 1 45 3.135494 3.135494 399 +futur 2 41 3.218876 6.437752 427 +might 1 41 3.218876 3.218876 426 +late 1 40 3.258097 3.258097 439 +must 1 40 3.258097 3.258097 442 +open 1 38 3.295837 3.295837 469 +everi 1 34 3.401197 3.401197 519 +board 3 33 3.433987 10.301961 528 +taken 1 31 3.496508 3.496508 555 +option 1 30 3.555348 3.555348 575 +limit 1 29 3.583519 3.583519 585 +particip 1 29 3.583519 3.583519 589 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +measur 1 28 3.610918 3.610918 609 +session 2 26 3.688879 7.377758 643 +experiment 1 26 3.688879 3.688879 645 +begin 1 23 3.806662 3.806662 716 +minut 2 20 3.951244 7.902488 810 +left 1 19 4.007333 4.007333 851 +sign 1 16 4.174387 4.174387 970 +across 1 16 4.174387 4.174387 974 +contribut 1 15 4.248495 4.248495 1021 +split 4 14 4.317488 17.269952 1078 +consider 1 14 4.317488 4.317488 1076 +shown 1 14 4.317488 4.317488 1080 +thorsten 2 13 4.382027 8.764054 1133 +everyon 1 13 4.382027 4.382027 1148 +difficulti 1 13 4.382027 4.382027 1132 +eicken 1 13 4.382027 4.382027 1134 +outsid 1 12 4.465908 4.465908 1219 +pick 1 9 4.753590 4.753590 1498 +hang 1 9 4.753590 4.753590 1499 +andth 1 9 4.753590 4.753590 1481 +judg 2 8 4.875197 9.750394 1644 +hold 1 8 4.875197 4.875197 1645 +absolut 1 8 4.875197 4.875197 1646 +poster 5 7 5.010635 25.053175 1814 +noon 1 7 5.010635 5.010635 1804 +trade 1 7 5.010635 5.010635 1815 +explain 1 7 5.010635 5.010635 1816 +exactli 1 7 5.010635 5.010635 1817 +plu 1 6 5.164786 5.164786 2004 +willb 1 5 5.347108 5.347108 2277 +remain 1 5 5.347108 5.347108 2278 +gotten 1 4 5.568345 5.568345 2628 +chose 1 4 5.568345 5.568345 2629 +cuc 1 4 5.568345 5.568345 2630 +attack 1 3 5.857933 5.857933 3168 +memberof 1 3 5.857933 5.857933 3169 +off 1 3 5.857933 5.857933 3170 +arriv 2 2 6.263398 12.526796 4132 +subdirectori 1 2 6.263398 6.263398 4133 +thorough 1 2 6.263398 6.263398 4134 +programmingin 1 2 6.263398 6.263398 4135 +pagehigh 1 1 6.957497 6.957497 6633 +eickenspr 1 1 6.957497 6.957497 6634 +sessionthu 1 1 6.957497 6.957497 6635 +tbdpleas 1 1 6.957497 6.957497 6636 +willdetermin 1 1 6.957497 6.957497 6637 +postersess 1 1 6.957497 6.957497 6638 +cindywilliam 1 1 6.957497 6.957497 6639 +ithorizont 1 1 6.957497 6.957497 6640 +corridor 1 1 6.957497 6.957497 6641 +presentyour 1 1 6.957497 6.957497 6642 +asens 1 1 6.957497 6.957497 6643 +contempl 1 1 6.957497 6.957497 6644 +presentationswil 1 1 6.957497 6.957497 6645 +nativespeak 1 1 6.957497 6.957497 6646 +thelongest 1 1 6.957497 6.957497 6647 +tocom 1 1 6.957497 6.957497 6648 +finalreport 1 1 6.957497 6.957497 6649 +aretri 1 1 6.957497 6.957497 6650 +thesolut 1 1 6.957497 6.957497 6651 +youreject 1 1 6.957497 6.957497 6652 +webread 1 1 6.957497 6.957497 6653 +convic 1 1 6.957497 6.957497 6654 +bestsolut 1 1 6.957497 6.957497 6655 +showcas 1 1 6.957497 6.957497 6656 +ampl 1 1 6.957497 6.957497 6657 +goodexplan 1 1 6.957497 6.957497 6658 +whatyou 1 1 6.957497 6.957497 6659 +projectsproject 1 1 6.957497 6.957497 6660 +reportsproject 1 1 6.957497 6.957497 6661 +proposalsiniti 1 1 6.957497 6.957497 6662 +ideascours 1 1 6.957497 6.957497 6663 +materialshomework 1 1 6.957497 6.957497 6664 +pagebefor 1 1 6.957497 6.957497 6665 +introc 1 1 6.957497 6.957497 6666 +casec 1 1 6.957497 6.957497 6667 +technologyc 1 1 6.957497 6.957497 6668 +cachesc 1 1 6.957497 6.957497 6669 +netsc 1 1 6.957497 6.957497 6670 +spc 1 1 6.957497 6.957497 6671 +cyou 1 1 6.957497 6.957497 6672 +emdc 1 1 6.957497 6.957497 6673 +sortingc 1 1 6.957497 6.957497 6674 +spamc 1 1 6.957497 6.957497 6675 +msgpassc 1 1 6.957497 6.957497 6676 +mpic 1 1 6.957497 6.957497 6677 +cachecohc 1 1 6.957497 6.957497 6678 +locksc 1 1 6.957497 6.957497 6679 +threadsc 1 1 6.957497 6.957497 6680 +atmc 1 1 6.957497 6.957497 6681 +netc 1 1 6.957497 6.957497 6682 +scoreboardc 1 1 6.957497 6.957497 6683 +tomasuloc 1 1 6.957497 6.957497 6684 +predc 1 1 6.957497 6.957497 6685 +superscalarc 1 1 6.957497 6.957497 6686 +busesc 1 1 6.957497 6.957497 6687 +pentiummaintain 1 1 6.957497 6.957497 6688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html new file mode 100644 index 00000000..c6e13e37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS664^CS664.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +class 1 199 1.609438 1.609438 37 +base 1 165 1.791759 1.791759 50 +lectur 3 135 1.945910 5.837730 73 +problem 2 147 1.945910 3.891820 75 +note 2 142 1.945910 3.891820 67 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +machin 2 129 2.079442 4.158884 95 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +april 9 77 2.564949 23.084541 196 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +simul 1 66 2.708050 2.708050 255 +januari 2 62 2.772589 5.545178 264 +march 6 61 2.833213 16.999278 295 +februari 8 54 2.944439 23.555512 328 +week 1 52 2.995732 2.995732 343 +set 1 50 3.044522 3.044522 361 +vision 4 41 3.218876 12.875504 430 +continu 1 39 3.258097 3.258097 448 +field 1 37 3.332205 3.332205 482 +staff 1 36 3.367296 3.367296 490 +random 1 34 3.401197 3.401197 511 +transform 3 32 3.465736 10.397208 542 +detect 1 26 3.688879 3.688879 646 +constraint 1 26 3.688879 3.688879 636 +motion 4 24 3.761200 15.044800 699 +flow 1 24 3.761200 3.761200 700 +recognit 1 23 3.806662 3.806662 723 +geometri 1 22 3.850148 3.850148 752 +geometr 1 19 4.007333 4.007333 852 +histori 1 19 4.007333 4.007333 853 +regular 2 17 4.110874 8.221748 929 +estim 1 17 4.110874 4.110874 930 +segment 1 17 4.110874 4.110874 931 +track 2 15 4.248495 8.496990 1029 +guest 2 12 4.465908 8.931816 1220 +calculu 1 12 4.465908 4.465908 1203 +optic 1 12 4.465908 4.465908 1221 +distanc 1 9 4.753590 4.753590 1500 +face 1 9 4.753590 4.753590 1501 +edg 1 8 4.875197 4.875197 1647 +stereo 2 7 5.010635 10.021270 1818 +parametr 2 7 5.010635 10.021270 1819 +ramin 1 7 5.010635 5.010635 1820 +justin 1 7 5.010635 5.010635 1789 +correl 2 5 5.347108 10.694216 2279 +variat 1 5 5.347108 5.347108 2248 +markov 1 5 5.347108 5.347108 2280 +snake 1 5 5.347108 5.347108 2281 +scribe 1 4 5.568345 5.568345 2631 +maximum 1 4 5.568345 5.568345 2632 +hausdorff 1 4 5.568345 5.568345 2633 +cont 4 3 5.857933 23.431732 3171 +likelihood 1 3 5.857933 5.857933 3172 +anneal 1 2 6.263398 6.263398 4136 +zabihteach 1 1 6.957497 6.957497 6689 +millerclass 1 1 6.957497 6.957497 6690 +phillip 1 1 6.957497 6.957497 6691 +suggestionsproblem 1 1 6.957497 6.957497 6692 +mestim 1 1 6.957497 6.957497 6693 +censu 1 1 6.957497 6.957497 6694 +eigenhausdorff 1 1 6.957497 6.957497 6695 +recognitionsect 1 1 6.957497 6.957497 6696 +equationoth 1 1 6.957497 6.957497 6697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html new file mode 100644 index 00000000..4feb4873 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Spring-96^CS674^CS674.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +project 4 340 1.098612 4.394448 18 +cours 3 273 1.098612 3.295836 15 +us 1 329 1.098612 1.098612 16 +cornel 2 215 1.386294 2.772588 23 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +code 2 108 2.197225 4.394450 116 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +site 1 106 2.197225 2.197225 119 +part 3 98 2.302585 6.907755 129 +text 2 98 2.302585 4.605170 133 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +contain 2 81 2.484907 4.969814 174 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +materi 1 75 2.639057 2.639057 221 +integr 1 67 2.708050 2.708050 245 +descript 1 64 2.772589 2.772589 271 +content 1 59 2.833213 2.833213 302 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +directori 3 45 3.135494 9.406482 396 +natur 1 44 3.135494 3.135494 406 +execut 1 45 3.135494 3.135494 404 +small 1 39 3.258097 3.258097 447 +ofth 1 36 3.367296 3.367296 491 +compon 1 30 3.555348 3.555348 570 +turn 1 29 3.583519 3.583519 586 +variabl 1 23 3.806662 3.806662 715 +annot 2 21 3.912023 7.824046 775 +sure 1 20 3.951244 3.951244 813 +repositori 1 17 4.110874 4.110874 932 +brown 1 16 4.174387 4.174387 977 +speech 3 12 4.465908 13.397724 1222 +tag 1 7 5.010635 5.010635 1821 +corpu 3 5 5.347108 16.041324 2282 +penn 1 3 5.857933 5.857933 3094 +pagesc 1 3 5.857933 5.857933 3133 +brill 2 2 6.263398 12.526796 4137 +treebank 1 2 6.263398 6.263398 4138 +schedulewhat 1 2 6.263398 6.263398 4139 +wordnet 2 1 6.957497 13.914994 6698 +pagecsintroduct 1 1 6.957497 6.957497 6699 +understandingcomput 1 1 6.957497 6.957497 6700 +announcementsher 1 1 6.957497 6.957497 6701 +taggerbrown 1 1 6.957497 6.957497 6702 +withpart 1 1 6.957497 6.957497 6703 +wnsearchdir 1 1 6.957497 6.957497 6704 +dict 1 1 6.957497 6.957497 6705 +iicollect 1 1 6.957497 6.957497 6706 +canus 1 1 6.957497 6.957497 6707 +francisabout 1 1 6.957497 6.957497 6708 +computationallinguist 1 1 6.957497 6.957497 6709 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html new file mode 100644 index 00000000..43023992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Courses^Summer-96^CS99^CS99.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +cours 2 273 1.098612 2.197224 15 +link 1 247 1.386294 1.386294 24 +class 4 199 1.609438 6.437752 37 +updat 1 191 1.609438 1.609438 41 +assign 2 135 1.945910 3.891820 66 +first 2 140 1.945910 3.891820 71 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +section 2 94 2.397895 4.795790 149 +follow 1 92 2.397895 2.397895 143 +info 1 85 2.484907 2.484907 176 +exam 1 86 2.484907 2.484907 169 +upson 2 71 2.639057 5.278114 218 +tuesdai 2 73 2.639057 5.278114 219 +meet 1 72 2.639057 2.639057 229 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 2 64 2.772589 5.545178 261 +room 1 59 2.833213 2.833213 301 +cover 1 55 2.944439 2.944439 329 +still 1 50 3.044522 3.044522 362 +close 1 38 3.295837 3.295837 465 +usual 1 28 3.610918 3.610918 608 +lab 2 24 3.761200 7.522400 698 +prelim 2 12 4.465908 8.931816 1201 +therefor 1 7 5.010635 5.010635 1822 +philip 1 6 5.164786 5.164786 2005 +circumst 1 5 5.347108 5.347108 2283 +materialcov 1 2 6.263398 6.263398 4140 +announcementsroom 1 1 6.957497 6.957497 6710 +unforseen 1 1 6.957497 6.957497 6711 +unableto 1 1 6.957497 6.957497 6712 +maclab 1 1 6.957497 6.957497 6713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html new file mode 100644 index 00000000..c35f21b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^mhr^681^681.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +fall 2 181 1.609438 3.218876 40 +algorithm 5 162 1.791759 8.958795 57 +implement 1 152 1.791759 1.791759 52 +lectur 23 135 1.945910 44.755930 73 +professor 1 137 1.945910 1.945910 76 +find 3 111 2.197225 6.591675 111 +solut 1 82 2.484907 2.484907 162 +dynam 1 76 2.564949 2.564949 194 +explor 1 58 2.890372 2.890372 324 +tree 2 36 3.367296 6.734592 492 +graph 1 30 3.555348 3.555348 576 +theorem 1 21 3.912023 3.912023 786 +matrix 1 17 4.110874 4.110874 933 +closur 1 8 4.875197 4.875197 1643 +karp 1 5 5.347108 5.347108 2284 +union 3 4 5.568345 16.705035 2634 +push 2 4 5.568345 11.136690 2635 +ford 1 4 5.568345 5.568345 2636 +heap 3 3 5.857933 17.573799 3123 +dijkstra 1 3 5.857933 5.857933 3173 +monika 1 2 6.263398 6.263398 4141 +rauch 1 2 6.263398 6.263398 4142 +greedi 1 2 6.263398 6.263398 4143 +edmond 1 2 6.263398 6.263398 4144 +maxflow 7 1 6.957497 48.702479 6714 +matroid 2 1 6.957497 13.914994 6715 +binomi 2 1 6.957497 13.914994 6716 +preflow 2 1 6.957497 13.914994 6717 +henzingeremail 1 1 6.957497 6.957497 6718 +informationhomework 1 1 6.957497 6.957497 6719 +bellman 1 1 6.957497 6.957497 6720 +fibonacci 1 1 6.957497 6.957497 6721 +treap 1 1 6.957497 6.957497 6722 +randomizedsearch 1 1 6.957497 6.957497 6723 +mincut 1 1 6.957497 6.957497 6724 +dinitz 1 1 6.957497 6.957497 6725 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html new file mode 100644 index 00000000..6073f139 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^nikos^CS222^cs222.html @@ -0,0 +1,213 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +work 3 380 0.693147 2.079441 9 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 6 273 1.098612 6.591672 15 +offic 4 299 1.098612 4.394448 13 +time 3 293 1.098612 3.295836 17 +student 2 343 1.098612 2.197224 19 +us 1 329 1.098612 1.098612 16 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +class 9 199 1.609438 14.484942 37 +includ 2 208 1.609438 3.218876 42 +list 1 201 1.609438 1.609438 39 +oper 1 180 1.609438 1.609438 34 +hour 2 165 1.791759 3.583518 46 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +assign 13 135 1.945910 25.296830 66 +problem 4 147 1.945910 7.783640 75 +hall 3 146 1.945910 5.837730 65 +file 3 132 1.945910 5.837730 70 +lectur 1 135 1.945910 1.945910 73 +introduct 6 126 2.079442 12.476652 87 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +final 7 116 2.197225 15.380575 108 +code 4 108 2.197225 8.788900 116 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +need 1 98 2.302585 2.302585 135 +grade 7 90 2.397895 16.785265 142 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +exam 3 86 2.484907 7.454721 169 +chang 2 82 2.484907 4.969814 163 +environ 1 84 2.484907 2.484907 177 +june 5 79 2.564949 12.824745 214 +sourc 4 77 2.564949 10.259796 201 +method 2 80 2.564949 5.129898 213 +exampl 2 77 2.564949 5.129898 195 +upson 7 71 2.639057 18.473399 218 +solv 2 73 2.639057 5.278114 234 +name 2 72 2.639057 5.278114 220 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +syllabu 2 67 2.708050 5.416100 247 +integr 1 67 2.708050 2.708050 245 +order 1 69 2.708050 2.708050 249 +handout 2 64 2.772589 5.545178 263 +function 2 62 2.772589 5.545178 275 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +plan 1 65 2.772589 2.772589 272 +juli 23 60 2.833213 65.163899 305 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +unix 2 58 2.890372 5.780744 308 +point 1 58 2.890372 2.890372 319 +scientif 4 53 2.944439 11.777756 341 +three 1 54 2.944439 2.944439 330 +numer 2 49 3.044522 6.089044 369 +appoint 2 49 3.044522 6.089044 358 +set 1 50 3.044522 3.044522 361 +done 2 47 3.091042 6.182084 381 +adapt 1 46 3.091042 3.091042 387 +midterm 3 45 3.135494 9.406482 392 +math 2 44 3.135494 6.270988 402 +linear 4 41 3.218876 12.875504 431 +review 2 42 3.218876 6.437752 425 +late 1 40 3.258097 3.258097 439 +error 1 40 3.258097 3.258097 449 +credit 2 38 3.295837 6.591674 460 +hand 2 37 3.332205 6.664410 475 +least 2 35 3.401197 6.802394 516 +everi 1 34 3.401197 3.401197 519 +return 1 34 3.401197 3.401197 502 +given 1 32 3.465736 3.465736 538 +chapter 1 32 3.465736 3.465736 536 +administr 1 27 3.637586 3.637586 628 +though 1 27 3.637586 3.637586 622 +rule 1 26 3.688879 3.688879 638 +session 1 26 3.688879 3.688879 643 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +valu 1 25 3.737670 3.737670 665 +lab 2 24 3.761200 7.522400 698 +equat 2 23 3.806662 7.613324 724 +begin 1 23 3.806662 3.806662 716 +variabl 1 23 3.806662 3.806662 715 +initi 1 23 3.806662 3.806662 717 +highli 1 23 3.806662 3.806662 725 +brows 1 23 3.806662 3.806662 726 +dai 1 22 3.850148 3.850148 753 +recommend 1 22 3.850148 3.850148 737 +instal 1 22 3.850148 3.850148 754 +viewer 1 21 3.912023 3.912023 787 +minim 2 18 4.060443 8.120886 887 +record 1 18 4.060443 4.060443 890 +account 1 18 4.060443 4.060443 882 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +matrix 1 17 4.110874 4.110874 933 +macintosh 1 17 4.110874 4.110874 920 +adam 1 17 4.110874 4.110874 934 +vector 2 16 4.174387 8.348774 961 +score 2 15 4.248495 8.496990 1017 +purchas 1 15 4.248495 4.248495 1030 +matlab 7 14 4.317488 30.222416 1081 +squar 2 14 4.317488 8.634976 1082 +rank 1 14 4.317488 4.317488 1063 +polynomi 1 14 4.317488 4.317488 1069 +command 1 14 4.317488 4.317488 1083 +charl 1 13 4.382027 4.382027 1149 +composit 1 13 4.382027 4.382027 1150 +outsid 1 12 4.465908 4.465908 1219 +extra 1 11 4.553877 4.553877 1312 +total 2 10 4.653960 9.307920 1398 +matric 1 10 4.653960 4.653960 1399 +length 1 10 4.653960 4.653960 1400 +deadlin 4 9 4.753590 19.014360 1502 +pair 1 9 4.753590 4.753590 1503 +rel 1 9 4.753590 4.753590 1487 +float 1 9 4.753590 4.753590 1504 +partner 1 8 4.875197 4.875197 1648 +calendar 1 8 4.875197 4.875197 1649 +root 1 8 4.875197 4.875197 1650 +on 1 8 4.875197 4.875197 1628 +interpol 3 7 5.010635 15.031905 1823 +newton 2 7 5.010635 10.021270 1824 +elementari 1 7 5.010635 5.010635 1825 +accord 1 7 5.010635 5.010635 1826 +classroom 2 6 5.164786 10.329572 2006 +spline 1 6 5.164786 5.164786 2007 +drop 1 6 5.164786 5.164786 2008 +otherthan 1 6 5.164786 5.164786 2009 +fit 1 5 5.347108 5.347108 2285 +stabil 1 5 5.347108 5.347108 2286 +worst 1 5 5.347108 5.347108 2287 +ignor 1 5 5.347108 5.347108 2288 +registr 1 5 5.347108 5.347108 2249 +niko 2 4 5.568345 11.136690 2637 +backward 1 4 5.568345 5.568345 2638 +alon 2 3 5.857933 11.715866 3139 +euler 2 3 5.857933 11.715866 3174 +pitsiani 1 3 5.857933 5.857933 3175 +rack 1 3 5.857933 5.857933 3176 +duedat 1 3 5.857933 5.857933 3105 +uncompress 1 3 5.857933 5.857933 3177 +scientificcomput 1 2 6.263398 6.263398 4145 +stress 1 2 6.263398 6.263398 4146 +prerequisitesc 1 2 6.263398 6.263398 4058 +loan 1 2 6.263398 6.263398 4147 +renssela 1 2 6.263398 6.263398 4148 +examsther 1 2 6.263398 6.263398 4149 +hermit 1 2 6.263398 6.263398 4150 +multivari 1 2 6.263398 6.263398 4151 +folder 1 2 6.263398 6.263398 4152 +scmv 3 1 6.957497 20.872491 6726 +quadratur 2 1 6.957497 13.914994 6727 +ozan 2 1 6.957497 13.914994 6728 +siblei 2 1 6.957497 13.914994 6729 +martha 2 1 6.957497 13.914994 6730 +cubic 2 1 6.957497 13.914994 6731 +zcat 2 1 6.957497 13.914994 6732 +computationsumm 1 1 6.957497 6.957497 6733 +setsan 1 1 6.957497 6.957497 6734 +andnonlinear 1 1 6.957497 6.957497 6735 +ordinarydifferenti 1 1 6.957497 6.957497 6736 +informationstaff 1 1 6.957497 6.957497 6737 +hafizogullari 1 1 6.957497 6.957497 6738 +lecturesclass 1 1 6.957497 6.957497 6739 +administrationlauri 1 1 6.957497 6.957497 6740 +buck 1 1 6.957497 6.957497 6741 +addressedto 1 1 6.957497 6.957497 6742 +corequisit 1 1 6.957497 6.957497 6743 +materialstext 1 1 6.957497 6.957497 6744 +approachus 1 1 6.957497 6.957497 6745 +eitherth 1 1 6.957497 6.957497 6746 +labsthi 1 1 6.957497 6.957497 6747 +setsther 1 1 6.957497 6.957497 6748 +orfrom 1 1 6.957497 6.957497 6749 +computingproblem 1 1 6.957497 6.957497 6750 +behandl 1 1 6.957497 6.957497 6751 +gradefrom 1 1 6.957497 6.957497 6752 +printyour 1 1 6.957497 6.957497 6753 +firstpag 1 1 6.957497 6.957497 6754 +partnernam 1 1 6.957497 6.957497 6755 +gradingyour 1 1 6.957497 6.957497 6756 +beassign 1 1 6.957497 6.957497 6757 +onyour 1 1 6.957497 6.957497 6758 +vandermond 1 1 6.957497 6.957497 6759 +piecewis 1 1 6.957497 6.957497 6760 +cote 1 1 6.957497 6.957497 6761 +choleski 1 1 6.957497 6.957497 6762 +rung 1 1 6.957497 6.957497 6763 +kutta 1 1 6.957497 6.957497 6764 +computingat 1 1 6.957497 6.957497 6765 +rennselaerhal 1 1 6.957497 6.957497 6766 +untar 1 1 6.957497 6.957497 6767 +randperm 1 1 6.957497 6.957497 6768 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html new file mode 100644 index 00000000..961abc0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^prakas^cs414^cs414.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +system 11 443 0.693147 7.624617 6 +program 4 374 0.693147 2.772588 7 +inform 1 412 0.693147 0.693147 8 +cours 8 273 1.098612 8.788896 15 +offic 2 299 1.098612 2.197224 13 +also 4 259 1.386294 5.545176 28 +cornel 2 215 1.386294 2.772588 23 +design 1 213 1.386294 1.386294 25 +class 10 199 1.609438 16.094380 37 +oper 5 180 1.609438 8.047190 34 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +hour 2 165 1.791759 3.583518 46 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +file 3 132 1.945910 5.837730 70 +first 2 140 1.945910 3.891820 71 +architectur 1 139 1.945910 1.945910 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +schedul 2 119 2.079442 4.158884 85 +final 4 116 2.197225 8.788900 108 +instructor 2 108 2.197225 4.394450 107 +send 2 114 2.197225 4.394450 109 +topic 2 114 2.197225 4.394450 110 +look 2 107 2.197225 4.394450 115 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +memori 3 101 2.302585 6.907755 139 +book 2 99 2.302585 4.605170 131 +advanc 1 99 2.302585 2.302585 130 +text 1 98 2.302585 2.302585 133 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +comment 2 93 2.397895 4.795790 146 +follow 2 92 2.397895 4.795790 143 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +requir 2 81 2.484907 4.969814 167 +start 2 83 2.484907 4.969814 173 +environ 1 84 2.484907 2.484907 177 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +second 1 81 2.484907 2.484907 166 +issu 3 78 2.564949 7.694847 211 +method 2 80 2.564949 5.129898 213 +complet 1 77 2.564949 2.564949 208 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +materi 7 75 2.639057 18.473399 221 +logic 1 71 2.639057 2.639057 230 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +tuesdai 1 73 2.639057 2.639057 219 +upson 1 71 2.639057 2.639057 218 +thursdai 3 70 2.708050 8.124150 241 +knowledg 1 67 2.708050 2.708050 243 +descript 2 64 2.772589 5.545178 271 +polici 2 64 2.772589 5.545178 279 +virtual 2 62 2.772589 5.545178 285 +organ 1 65 2.772589 2.772589 265 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +share 1 59 2.833213 2.833213 304 +content 1 59 2.833213 2.833213 302 +summer 2 56 2.890372 5.780744 311 +overview 1 56 2.890372 2.890372 323 +variou 1 56 2.890372 2.890372 317 +cover 3 55 2.944439 8.833317 329 +particular 2 51 2.995732 5.991464 352 +week 2 52 2.995732 5.991464 343 +maintain 1 51 2.995732 2.995732 342 +archiv 1 49 3.044522 3.044522 364 +understand 1 47 3.091042 3.091042 384 +discuss 2 45 3.135494 6.270988 399 +textbook 1 44 3.135494 3.135494 397 +fast 1 42 3.218876 3.218876 429 +form 1 39 3.258097 3.258097 443 +submit 1 39 3.258097 3.258097 440 +close 1 38 3.295837 3.295837 465 +purpos 1 37 3.332205 3.332205 481 +hand 1 37 3.332205 3.332205 475 +next 2 34 3.401197 6.802394 517 +concurr 1 34 3.401197 3.401197 501 +collabor 2 32 3.465736 6.931472 543 +secur 1 30 3.555348 3.555348 577 +depend 1 29 3.583519 3.583519 583 +synchron 1 29 3.583519 3.583519 588 +multiprocessor 1 28 3.610918 3.610918 605 +subject 3 26 3.688879 11.066637 647 +detect 1 26 3.688879 3.688879 646 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +programminglanguag 1 21 3.912023 3.912023 782 +kernel 1 20 3.951244 3.951244 825 +assum 1 19 4.007333 4.007333 845 +feedback 1 19 4.007333 4.007333 854 +outlin 1 17 4.110874 4.110874 914 +protect 1 17 4.110874 4.110874 935 +segment 1 17 4.110874 4.110874 931 +weekli 1 17 4.110874 4.110874 919 +micro 1 15 4.248495 4.248495 1031 +quizz 5 13 4.382027 21.910135 1151 +carri 1 13 4.382027 4.382027 1152 +assembl 1 12 4.465908 4.465908 1207 +statement 2 11 4.553877 9.107754 1313 +worth 2 11 4.553877 9.107754 1294 +evolut 1 11 4.553877 4.553877 1314 +multithread 1 11 4.553877 4.553877 1315 +peter 1 11 4.553877 4.553877 1316 +operatingsystem 1 10 4.653960 4.653960 1401 +princip 1 10 4.653960 4.653960 1397 +familiar 1 9 4.753590 4.753590 1485 +attent 1 8 4.875197 4.875197 1651 +remind 1 7 5.010635 5.010635 1799 +prevent 1 7 5.010635 5.010635 1827 +surpris 1 7 5.010635 5.010635 1828 +multiprogram 1 6 5.164786 5.164786 2010 +pace 1 6 5.164786 5.164786 2011 +ensur 1 6 5.164786 5.164786 2012 +silberschatz 1 6 5.164786 5.164786 1978 +indupraka 2 4 5.568345 11.136690 2639 +kodukula 2 4 5.568345 11.136690 2640 +deadlock 2 4 5.568345 11.136690 2641 +permiss 1 4 5.568345 5.568345 2642 +usedto 1 4 5.568345 5.568345 2643 +abraham 1 4 5.568345 5.568345 2644 +roughli 2 3 5.857933 11.715866 3097 +prereq 1 3 5.857933 5.857933 3178 +theimpact 1 3 5.857933 5.857933 3179 +audienc 1 3 5.857933 5.857933 3180 +serverless 1 3 5.857933 5.857933 3181 +todetermin 1 3 5.857933 5.857933 3182 +nawaaz 3 2 6.263398 18.790194 4153 +ahm 2 2 6.263398 12.526796 4154 +praka 1 2 6.263398 6.263398 4155 +anintroduct 1 2 6.263398 6.263398 4156 +emphasison 1 2 6.263398 6.263398 4157 +memorymanag 1 2 6.263398 6.263398 4158 +thetradit 1 2 6.263398 6.263398 4159 +galvin 1 2 6.263398 6.263398 4160 +motd 2 1 6.957497 13.914994 6769 +lldiscuss 2 1 6.957497 13.914994 6770 +prerequsit 1 1 6.957497 6.957497 6771 +processsynchron 1 1 6.957497 6.957497 6772 +requiringconst 1 1 6.957497 6.957497 6773 +prerequsitescomplet 1 1 6.957497 6.957497 6774 +inparticular 1 1 6.957497 6.957497 6775 +theintroductori 1 1 6.957497 6.957497 6776 +thatwil 1 1 6.957497 6.957497 6777 +outlineth 1 1 6.957497 6.957497 6778 +theorder 1 1 6.957497 6.957497 6779 +mutualexclus 1 1 6.957497 6.957497 6780 +timepermit 1 1 6.957497 6.957497 6781 +textbooksth 1 1 6.957497 6.957497 6782 +conceptsbook 1 1 6.957497 6.957497 6783 +distributeclass 1 1 6.957497 6.957497 6784 +noteswil 1 1 6.957497 6.957497 6785 +pageat 1 1 6.957497 6.957497 6786 +mondaythru 1 1 6.957497 6.957497 6787 +thesewil 1 1 6.957497 6.957497 6788 +thursdayat 1 1 6.957497 6.957497 6789 +gradingeach 1 1 6.957497 6.957497 6790 +weightag 1 1 6.957497 6.957497 6791 +combinedweightag 1 1 6.957497 6.957497 6792 +twomidterm 1 1 6.957497 6.957497 6793 +collaborationat 1 1 6.957497 6.957497 6794 +eachhomework 1 1 6.957497 6.957497 6795 +thehomework 1 1 6.957497 6.957497 6796 +closednot 1 1 6.957497 6.957497 6797 +induprakaskodukula 1 1 6.957497 6.957497 6798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html new file mode 100644 index 00000000..f06b7f74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^People^tah^cs611.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +cornel 2 215 1.386294 2.772588 23 +class 1 199 1.609438 1.609438 37 +hour 2 165 1.791759 3.583518 46 +lectur 32 135 1.945910 62.269120 73 +note 22 142 1.945910 42.810020 67 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +advanc 1 99 2.302585 2.302585 130 +octob 19 89 2.397895 45.560005 156 +novemb 16 81 2.484907 39.758512 179 +solut 9 82 2.484907 22.364163 162 +start 1 83 2.484907 2.484907 173 +decemb 8 80 2.564949 20.519592 215 +homework 7 79 2.564949 17.954643 193 +upson 1 71 2.639057 2.639057 218 +septemb 24 65 2.772589 66.542136 274 +appoint 2 49 3.044522 6.089044 358 +get 1 46 3.091042 3.091042 380 +midterm 1 45 3.135494 3.135494 392 +meta 1 9 4.753590 4.753590 1505 +scribe 1 4 5.568345 5.568345 2631 +csc 1 3 5.857933 5.857933 3183 +neal 1 3 5.857933 5.857933 3184 +languagesfal 1 2 6.263398 6.263398 4161 +glew 1 2 6.263398 6.263398 4162 +informationhandout 1 2 6.263398 6.263398 4163 +henzingerupson 1 1 6.957497 6.957497 6799 +glewupson 1 1 6.957497 6.957497 6800 +handoutshandout 1 1 6.957497 6.957497 6801 +mlhandout 1 1 6.957497 6.957497 6802 +lambdahomeworkshomework 1 1 6.957497 6.957497 6803 +grieshomework 1 1 6.957497 6.957497 6804 +notesraw 1 1 6.957497 6.957497 6805 +noteslectur 1 1 6.957497 6.957497 6806 +mllectur 1 1 6.957497 6.957497 6807 +grieslectur 1 1 6.957497 6.957497 6808 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html new file mode 100644 index 00000000..b7c58845 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^cs611^CS611.html @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +fall 7 181 1.609438 11.266066 40 +hour 2 165 1.791759 3.583518 46 +note 4 142 1.945910 7.783640 67 +assign 1 135 1.945910 1.945910 66 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +pleas 1 113 2.197225 2.197225 114 +advanc 1 99 2.302585 2.302585 130 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +mondai 1 77 2.564949 2.564949 206 +upson 2 71 2.639057 5.278114 218 +prof 1 64 2.772589 2.772589 273 +faculti 1 56 2.890372 2.890372 325 +suggest 1 53 2.944439 2.944439 331 +robert 1 30 3.555348 3.555348 567 +thur 1 19 4.007333 4.007333 847 +classic 1 14 4.317488 4.317488 1084 +nuprl 1 10 4.653960 4.653960 1402 +kumar 1 9 4.753590 4.753590 1506 +ravi 2 3 5.857933 11.715866 3185 +constabl 1 3 5.857933 5.857933 3186 +pavel 1 2 6.263398 6.263398 4164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ new file mode 100644 index 00000000..64a25d1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^UTCS^courses^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +cours 2 273 1.098612 2.197224 15 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +spring 1 131 2.079442 2.079442 88 +member 1 84 2.484907 2.484907 165 +addit 2 74 2.639057 5.278114 228 +maintain 1 51 2.995732 2.995732 342 +consult 1 24 3.761200 3.761200 687 +coursesc 1 4 5.568345 5.568345 2692 +individualfaculti 1 1 6.957497 6.957497 7418 +contactgloria 1 1 6.957497 6.957497 7419 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ new file mode 100644 index 00000000..8b573fb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^almstrum^classes^cs336^fall96^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +utexa 3 189 1.609438 4.828314 44 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +austin 1 168 1.791759 1.791759 63 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +analysi 2 124 2.079442 4.158884 98 +welcom 1 122 2.079442 2.079442 99 +instructor 2 108 2.197225 4.394450 107 +send 1 114 2.197225 2.197225 109 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +new 1 64 2.772589 2.772589 262 +suggest 1 53 2.944439 2.944439 331 +announc 1 40 3.258097 3.258097 441 +tutori 1 39 3.258097 3.258097 437 +prepar 1 20 3.951244 3.951244 824 +yang 1 8 4.875197 4.875197 1652 +vicki 2 3 5.857933 11.715866 3187 +almstrum 4 2 6.263398 25.053592 4165 +linyuan 2 1 6.957497 13.914994 6809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html new file mode 100644 index 00000000..76704c7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^boyer^courses^cs395t-spring96.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 5 443 0.693147 3.465735 6 +program 2 374 0.693147 1.386294 7 +cours 1 273 1.098612 1.098612 15 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +tool 4 117 2.079442 8.317768 93 +number 2 130 2.079442 4.158884 97 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +theori 2 111 2.197225 4.394450 127 +version 2 113 2.197225 4.394450 122 +final 2 116 2.197225 4.394450 108 +world 1 115 2.197225 2.197225 126 +instructor 1 108 2.197225 2.197225 107 +make 1 111 2.197225 2.197225 120 +text 1 98 2.302585 2.302585 133 +present 4 91 2.397895 9.591580 145 +mani 2 92 2.397895 4.795790 150 +grade 1 90 2.397895 2.397895 142 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +help 1 83 2.484907 2.484907 175 +april 5 77 2.564949 12.824745 196 +method 4 80 2.564949 10.259796 213 +exampl 1 77 2.564949 2.564949 195 +good 1 77 2.564949 2.564949 200 +logic 3 71 2.639057 7.917171 230 +order 2 69 2.708050 5.416100 249 +differ 1 66 2.708050 2.708050 253 +test 1 66 2.708050 2.708050 252 +foundat 2 62 2.772589 5.545178 286 +taylor 1 63 2.772589 2.772589 287 +interact 1 62 2.772589 2.772589 270 +copi 1 63 2.772589 2.772589 284 +type 1 61 2.833213 2.833213 296 +reason 4 57 2.890372 11.561488 318 +approach 1 48 3.044522 3.044522 366 +numer 1 49 3.044522 3.044522 369 +algebra 1 45 3.135494 3.135494 394 +mechan 1 43 3.178054 3.178054 416 +examin 3 42 3.218876 9.656628 424 +continu 1 39 3.258097 3.258097 448 +formal 6 37 3.332205 19.993230 478 +either 1 35 3.401197 3.401197 506 +specifi 1 30 3.555348 3.555348 568 +hope 1 28 3.610918 3.610918 610 +higher 1 24 3.761200 3.761200 690 +consult 1 24 3.761200 3.761200 687 +equat 1 23 3.806662 3.806662 724 +tent 1 22 3.850148 3.850148 739 +moor 1 17 4.110874 4.110874 936 +upon 2 16 4.174387 8.348774 978 +choos 1 16 4.174387 4.174387 964 +choic 1 16 4.174387 4.174387 979 +squar 1 14 4.317488 4.317488 1082 +recurs 1 13 4.382027 4.382027 1127 +guest 1 12 4.465908 4.465908 1220 +primit 1 11 4.553877 4.553877 1317 +arithmet 1 10 4.653960 4.653960 1388 +nuprl 1 10 4.653960 4.653960 1402 +pair 1 9 4.753590 4.753590 1503 +russel 1 9 4.753590 4.753590 1507 +prover 1 8 4.875197 4.875197 1653 +root 1 8 4.875197 4.875197 1650 +chief 1 7 5.010635 5.010635 1829 +boyer 1 6 5.164786 5.164786 2013 +freeli 1 6 5.164786 5.164786 2014 +backup 1 4 5.568345 5.568345 2645 +rick 1 4 5.568345 5.568345 2646 +lego 1 3 5.857933 5.857933 3188 +oral 1 3 5.857933 5.857933 3189 +samuel 1 3 5.857933 5.857933 3155 +sawada 1 3 5.857933 5.857933 3190 +otter 3 2 6.263398 18.790194 4166 +ofmathemat 1 2 6.263398 6.263398 4167 +nelson 1 2 6.263398 6.263398 4168 +ortool 1 2 6.263398 6.263398 4169 +bowen 1 2 6.263398 6.263398 4170 +guyer 1 2 6.263398 6.263398 4171 +blurb 1 1 6.957497 6.957497 6810 +theobject 1 1 6.957497 6.957497 6811 +formalizationof 1 1 6.957497 6.957497 6812 +creationof 1 1 6.957497 6.957497 6813 +systemsfor 1 1 6.957497 6.957497 6814 +formalmethod 1 1 6.957497 6.957497 6815 +suchsystem 1 1 6.957497 6.957497 6816 +imp 1 1 6.957497 6.957497 6817 +mizar 1 1 6.957497 6.957497 6818 +quaif 1 1 6.957497 6.957497 6819 +coqstud 1 1 6.957497 6.957497 6820 +aboutthes 1 1 6.957497 6.957497 6821 +projecthtml 1 1 6.957497 6.957497 6822 +theqe 1 1 6.957497 6.957497 6823 +manifestoplain 1 1 6.957497 6.957497 6824 +qedmanifestobowen 1 1 6.957497 6.957497 6825 +localform 1 1 6.957497 6.957497 6826 +tannei 1 1 6.957497 6.957497 6827 +trevor 1 1 6.957497 6.957497 6828 +hick 1 1 6.957497 6.957497 6829 +ruben 1 1 6.957497 6.957497 6830 +gamboa 1 1 6.957497 6.957497 6831 +circal 1 1 6.957497 6.957497 6832 +turpin 1 1 6.957497 6.957497 6833 +galoi 1 1 6.957497 6.957497 6834 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html new file mode 100644 index 00000000..cd80c75f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^GradArch^index.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +project 8 340 1.098612 8.788896 18 +cours 3 273 1.098612 3.295836 15 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +design 3 213 1.386294 4.158882 25 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +parallel 2 169 1.791759 3.583518 60 +network 2 168 1.791759 3.583518 61 +hour 2 165 1.791759 3.583518 46 +read 1 154 1.791759 1.791759 47 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +note 1 142 1.945910 1.945910 67 +analysi 2 124 2.079442 4.158884 98 +spring 2 131 2.079442 4.158884 88 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +memori 4 101 2.302585 9.210340 139 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +exam 1 86 2.484907 2.484907 169 +orient 1 80 2.564949 2.564949 205 +homework 1 79 2.564949 2.564949 193 +dynam 1 76 2.564949 2.564949 194 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +differ 1 66 2.708050 2.708050 253 +written 1 63 2.772589 2.772589 278 +major 2 56 2.890372 5.780744 315 +variou 1 56 2.890372 2.890372 317 +space 1 57 2.890372 2.890372 310 +instruct 1 53 2.944439 2.944439 332 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +pointer 1 48 3.044522 3.044522 368 +midterm 3 45 3.135494 9.406482 392 +textbook 1 44 3.135494 3.135494 397 +edit 2 42 3.218876 6.437752 418 +review 1 42 3.218876 3.218876 425 +cach 1 41 3.218876 3.218876 432 +compani 1 41 3.218876 3.218876 423 +cost 1 37 3.332205 3.332205 480 +tech 2 35 3.401197 6.802394 515 +bibliographi 1 34 3.401197 3.401197 518 +product 1 33 3.433987 3.433987 527 +compon 1 30 3.555348 3.555348 570 +focus 1 29 3.583519 3.583519 584 +particip 1 29 3.583519 3.583519 589 +limit 1 29 3.583519 3.583519 585 +administr 1 27 3.637586 3.637586 628 +static 1 27 3.637586 3.637586 619 +compar 1 26 3.688879 3.688879 648 +altern 1 26 3.688879 3.688879 641 +aspect 1 25 3.737670 3.737670 663 +input 1 23 3.806662 3.806662 727 +emphasi 1 22 3.850148 3.850148 755 +recommend 1 22 3.850148 3.850148 737 +disk 1 22 3.850148 3.850148 747 +output 1 21 3.912023 3.912023 788 +watch 1 21 3.912023 3.912023 789 +predict 1 19 4.007333 4.007333 855 +appropri 1 18 4.060443 4.060443 883 +interconnect 1 17 4.110874 4.110874 937 +attempt 1 17 4.110874 4.110874 917 +intro 1 17 4.110874 4.110874 915 +modern 1 16 4.174387 4.174387 966 +choic 1 16 4.174387 4.174387 979 +choos 1 16 4.174387 4.174387 964 +vector 1 16 4.174387 4.174387 961 +branch 2 11 4.553877 9.107754 1318 +errata 1 10 4.653960 4.653960 1403 +pair 2 9 4.753590 9.507180 1503 +significantli 1 9 4.753590 4.753590 1508 +admin 1 9 4.753590 4.753590 1476 +quantit 2 8 4.875197 9.750394 1654 +pipelin 3 7 5.010635 15.031905 1830 +metric 1 7 5.010635 5.010635 1831 +subsystem 1 6 5.164786 5.164786 2015 +hennessi 2 5 5.347108 10.694216 2289 +computerarchitectur 1 5 5.347108 5.347108 2290 +reveal 1 4 5.568345 5.568345 2647 +queu 1 4 5.568345 5.568345 2648 +buss 1 4 5.568345 5.568345 2649 +hazard 2 3 5.857933 11.715866 3191 +evaluationof 1 3 5.857933 5.857933 3192 +tertiari 1 3 5.857933 5.857933 3193 +mpp 1 3 5.857933 5.857933 3194 +insystem 1 2 6.263398 6.263398 4172 +dram 1 2 6.263398 6.263398 4173 +stedit 2 1 6.957497 13.914994 6835 +predictionch 2 1 6.957497 13.914994 6836 +revieww 2 1 6.957497 13.914994 6837 +architecturethi 1 1 6.957497 6.957497 6838 +benchmarksto 1 1 6.957497 6.957497 6839 +highperform 1 1 6.957497 6.957497 6840 +memoryhierarchi 1 1 6.957497 6.957497 6841 +studentswil 1 1 6.957497 6.957497 6842 +undertak 1 1 6.957497 6.957497 6843 +oftheir 1 1 6.957497 6.957497 6844 +informationuniqu 1 1 6.957497 6.957497 6845 +mikedahlinoffic 1 1 6.957497 6.957497 6846 +tbdtaoffic 1 1 6.957497 6.957497 6847 +tbdreadingstextbook 1 1 6.957497 6.957497 6848 +patteson 1 1 6.957497 6.957497 6849 +sheetfor 1 1 6.957497 6.957497 6850 +pattersonin 1 1 6.957497 6.957497 6851 +currentcomput 1 1 6.957497 6.957497 6852 +readinglist 1 1 6.957497 6.957497 6853 +scheduleweekdatetopicreadingduejan 1 1 6.957497 6.957497 6854 +perf 1 1 6.957497 6.957497 6855 +amdahl 1 1 6.957497 6.957497 6856 +trendsch 1 1 6.957497 6.957497 6857 +isa 1 1 6.957497 6.957497 6858 +mlkholidayf 1 1 6.957497 6.957497 6859 +proposalfeb 1 1 6.957497 6.957497 6860 +scoreboard 1 1 6.957497 6.957497 6861 +tomasulu 1 1 6.957497 6.957497 6862 +speculationch 1 1 6.957497 6.957497 6863 +processorsch 1 1 6.957497 6.957497 6864 +dfeb 1 1 6.957497 6.957497 6865 +hierarchych 1 1 6.957497 6.957497 6866 +surveyfeb 1 1 6.957497 6.957497 6867 +banksf 1 1 6.957497 6.957497 6868 +breakm 1 1 6.957497 6.957497 6869 +breakmar 1 1 6.957497 6.957497 6870 +raidch 1 1 6.957497 6.957497 6871 +networksf 1 1 6.957497 6.957497 6872 +networksch 1 1 6.957497 6.957497 6873 +checkpointapr 1 1 6.957497 6.957497 6874 +architecturesf 1 1 6.957497 6.957497 6875 +mppsch 1 1 6.957497 6.957497 6876 +preseantationsm 1 1 6.957497 6.957497 6877 +presentationsfri 1 1 6.957497 6.957497 6878 +classesm 1 1 6.957497 6.957497 6879 +reportaddit 1 1 6.957497 6.957497 6880 +resourcescours 1 1 6.957497 6.957497 6881 +reportsyahoo 1 1 6.957497 6.957497 6882 +businessand 1 1 6.957497 6.957497 6883 +economi 1 1 6.957497 6.957497 6884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ new file mode 100644 index 00000000..21ea72bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dahlin^Classes^WebOS^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +system 4 443 0.693147 2.772588 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +gener 2 220 1.386294 2.772588 27 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +oper 5 180 1.609438 8.047190 34 +class 5 199 1.609438 8.047190 37 +paper 2 205 1.609438 3.218876 38 +list 2 201 1.609438 3.218876 39 +address 2 170 1.791759 3.583518 62 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +provid 2 121 2.079442 4.158884 94 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +internet 2 83 2.484907 4.969814 186 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +state 1 76 2.564949 2.564949 207 +refer 1 78 2.564949 2.564949 203 +solv 1 73 2.639057 2.639057 234 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +organ 1 65 2.772589 2.772589 265 +talk 1 53 2.944439 2.944439 336 +protocol 2 45 3.135494 6.270988 407 +must 1 40 3.258097 3.258097 442 +purpos 1 37 3.332205 3.332205 481 +secur 1 30 3.555348 3.555348 577 +concern 1 25 3.737670 3.737670 666 +alloc 1 20 3.951244 3.951244 821 +longer 1 20 3.951244 3.951244 816 +verif 1 20 3.951244 3.951244 826 +less 1 18 4.060443 4.060443 892 +context 1 13 4.382027 4.382027 1153 +evolv 1 12 4.465908 4.465908 1223 +operatingsystem 1 10 4.653960 4.653960 1401 +tradit 1 10 4.653960 4.653960 1404 +guidelin 1 7 5.010635 5.010635 1832 +matur 1 5 5.347108 5.347108 2269 +hypothesi 1 4 5.568345 5.568345 2650 +behind 1 4 5.568345 5.568345 2610 +bear 1 4 5.568345 5.568345 2651 +explos 1 3 5.857933 5.857933 3138 +thetradit 1 2 6.263398 6.263398 4159 +interprocess 1 2 6.263398 6.263398 4174 +systemsuniqu 1 1 6.957497 6.957497 6885 +resultedin 1 1 6.957497 6.957497 6886 +contextof 1 1 6.957497 6.957497 6887 +understandingof 1 1 6.957497 6.957497 6888 +addressproblem 1 1 6.957497 6.957497 6889 +theissu 1 1 6.957497 6.957497 6890 +addressedin 1 1 6.957497 6.957497 6891 +occasionallyread 1 1 6.957497 6.957497 6892 +understandingcurr 1 1 6.957497 6.957497 6893 +reportspoint 1 1 6.957497 6.957497 6894 +rosterhandout 1 1 6.957497 6.957497 6895 +sslprotocol 1 1 6.957497 6.957497 6896 +proofsketch 1 1 6.957497 6.957497 6897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ new file mode 100644 index 00000000..572f8ccf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dragon^cs310^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 36 374 0.693147 24.953292 7 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +last 11 314 1.098612 12.084732 14 +offic 8 299 1.098612 8.788896 13 +cours 7 273 1.098612 7.690284 15 +time 3 293 1.098612 3.295836 17 +us 3 329 1.098612 3.295836 16 +student 1 343 1.098612 1.098612 19 +also 4 259 1.386294 5.545176 28 +email 2 220 1.386294 2.772588 29 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +class 14 199 1.609438 22.532132 37 +utexa 9 189 1.609438 14.484942 44 +fall 2 181 1.609438 3.218876 40 +updat 2 191 1.609438 3.218876 41 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +avail 14 169 1.791759 25.084626 48 +hour 7 165 1.791759 12.542313 46 +data 1 170 1.791759 1.791759 49 +austin 1 168 1.791759 1.791759 63 +object 11 138 1.945910 21.405010 79 +lectur 10 135 1.945910 19.459100 73 +problem 8 147 1.945910 15.567280 75 +note 6 142 1.945910 11.675460 67 +professor 2 137 1.945910 3.891820 76 +assign 2 135 1.945910 3.891820 66 +model 2 145 1.945910 3.891820 69 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +document 2 121 2.079442 4.158884 89 +compil 2 122 2.079442 4.158884 96 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +final 5 116 2.197225 10.986125 108 +check 4 115 2.197225 8.788900 118 +place 4 106 2.197225 8.788900 124 +version 2 113 2.197225 4.394450 122 +pleas 2 113 2.197225 4.394450 114 +make 2 111 2.197225 4.394450 120 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +part 10 98 2.302585 23.025850 129 +user 3 104 2.302585 6.907755 137 +memori 1 101 2.302585 2.302585 139 +section 4 94 2.397895 9.591580 149 +call 2 91 2.397895 4.795790 153 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +real 1 93 2.397895 2.397895 144 +exam 28 86 2.484907 69.577396 169 +solut 22 82 2.484907 54.667954 162 +start 8 83 2.484907 19.879256 173 +info 5 85 2.484907 12.424535 176 +academ 3 82 2.484907 7.454721 178 +chang 3 82 2.484907 7.454721 163 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +homework 14 79 2.564949 35.909286 193 +exampl 9 77 2.564949 23.084541 195 +mondai 6 77 2.564949 15.389694 206 +sourc 3 77 2.564949 7.694847 201 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +meet 3 72 2.639057 7.917171 229 +appli 1 71 2.639057 2.639057 226 +free 1 73 2.639057 2.639057 224 +test 8 66 2.708050 21.664400 252 +practic 6 70 2.708050 16.248300 246 +thursdai 3 70 2.708050 8.124150 241 +syllabu 1 67 2.708050 2.708050 247 +handout 57 64 2.772589 158.037573 263 +wednesdai 6 64 2.772589 16.635534 261 +import 3 65 2.772589 8.317767 282 +organ 2 65 2.772589 5.545178 265 +new 1 64 2.772589 2.772589 262 +visit 1 63 2.772589 2.772589 288 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +polici 1 64 2.772589 2.772589 279 +guid 1 63 2.772589 2.772589 267 +content 13 59 2.833213 36.831769 302 +back 12 60 2.833213 33.998556 297 +locat 1 59 2.833213 2.833213 303 +automat 1 61 2.833213 2.833213 306 +reason 1 57 2.890372 2.890372 318 +overview 1 56 2.890372 2.890372 323 +instruct 2 53 2.944439 5.888878 332 +date 16 51 2.995732 47.931712 344 +maintain 1 51 2.995732 2.995732 342 +electron 5 47 3.091042 15.455210 379 +discuss 9 45 3.135494 28.219446 399 +fridai 2 44 3.135494 6.270988 390 +might 1 41 3.218876 3.218876 426 +late 8 40 3.258097 26.064776 439 +announc 2 40 3.258097 6.516194 441 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +slide 3 38 3.295837 9.887511 467 +correct 1 38 3.295837 3.295837 462 +procedur 1 36 3.367296 3.367296 488 +manual 3 35 3.401197 10.203591 504 +least 1 35 3.401197 3.401197 516 +post 1 35 3.401197 3.401197 505 +singl 1 34 3.401197 3.401197 510 +next 1 34 3.401197 3.401197 517 +jame 1 35 3.401197 3.401197 507 +go 1 33 3.433987 3.433987 529 +titl 1 31 3.496508 3.496508 556 +turn 3 29 3.583519 10.750557 586 +pass 4 28 3.610918 14.443672 611 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +utc 1 27 3.637586 3.637586 629 +session 6 26 3.688879 22.133274 643 +proc 1 26 3.688879 3.688879 649 +valu 1 25 3.737670 3.737670 665 +toward 1 25 3.737670 3.737670 668 +begin 1 23 3.806662 3.806662 716 +thank 1 23 3.806662 3.806662 721 +disk 3 22 3.850148 11.550444 747 +period 2 22 3.850148 7.700296 743 +hierarchi 1 22 3.850148 3.850148 744 +output 3 21 3.912023 11.736069 788 +newsgroup 2 21 3.912023 7.824046 783 +thur 2 19 4.007333 8.014666 847 +prerequisit 1 19 4.007333 4.007333 846 +attend 1 18 4.060443 4.060443 893 +offici 1 18 4.060443 4.060443 894 +regist 5 17 4.110874 20.554370 938 +earli 8 16 4.174387 33.395096 968 +zhang 1 16 4.174387 4.174387 980 +letter 1 16 4.174387 4.174387 981 +ascii 1 15 4.248495 4.248495 1032 +charact 1 15 4.248495 4.248495 1028 +conduct 1 14 4.317488 4.317488 1065 +front 2 13 4.382027 8.764054 1154 +holidai 2 12 4.465908 8.931816 1224 +remov 2 12 4.465908 8.931816 1225 +pascal 2 12 4.465908 8.931816 1213 +tue 3 11 4.553877 13.661631 1308 +chri 1 11 4.553877 4.553877 1311 +extra 1 11 4.553877 4.553877 1312 +night 1 11 4.553877 4.553877 1319 +penalti 1 10 4.653960 4.653960 1405 +stack 1 10 4.653960 4.653960 1389 +cheat 1 10 4.653960 4.653960 1395 +pick 11 9 4.753590 52.289490 1498 +deadlin 1 9 4.753590 4.753590 1502 +calendar 1 8 4.875197 4.875197 1649 +fail 1 8 4.875197 4.875197 1655 +spec 1 8 4.875197 4.875197 1640 +noon 6 7 5.010635 30.063810 1804 +bit 4 7 5.010635 20.042540 1833 +paramet 3 7 5.010635 15.031905 1796 +saturdai 2 7 5.010635 10.021270 1794 +drop 6 6 5.164786 30.988716 2008 +risc 1 6 5.164786 5.164786 2016 +door 2 5 5.347108 10.694216 2291 +mac 2 5 5.347108 10.694216 2292 +circumst 1 5 5.347108 5.347108 2283 +registr 1 5 5.347108 5.347108 2249 +glanc 1 4 5.568345 5.568345 2652 +chart 1 4 5.568345 5.568345 2653 +turnin 1 4 5.568345 5.568345 2654 +labor 1 3 5.857933 5.857933 3195 +obsolet 1 3 5.857933 5.857933 3196 +yurkanan 2 2 6.263398 12.526796 4175 +dragon 2 2 6.263398 12.526796 4176 +yoonsuck 2 2 6.263398 12.526796 4177 +choe 2 2 6.263398 12.526796 4178 +yschoe 2 2 6.263398 12.526796 4179 +typo 2 2 6.263398 12.526796 4180 +folder 2 2 6.263398 12.526796 4152 +constantli 1 2 6.263398 6.263398 4181 +edmondson 1 2 6.263398 6.263398 4182 +gzhang 1 2 6.263398 6.263398 4183 +rare 1 2 6.263398 6.263398 4184 +thanksgiv 1 2 6.263398 6.263398 4185 +appeal 1 2 6.263398 6.263398 4186 +fantasm 9 1 6.957497 62.617473 6898 +p_global 7 1 6.957497 48.702479 6899 +bonu 3 1 6.957497 20.872491 6900 +electronc 3 1 6.957497 20.872491 6901 +macsbug 3 1 6.957497 20.872491 6902 +electoron 3 1 6.957497 20.872491 6903 +edum 2 1 6.957497 13.914994 6904 +withdraw 2 1 6.957497 13.914994 6905 +rightmost 2 1 6.957497 13.914994 6906 +procudur 2 1 6.957497 13.914994 6907 +powermac 2 1 6.957497 13.914994 6908 +quadra 2 1 6.957497 13.914994 6909 +onmon 1 1 6.957497 6.957497 6910 +cynthia 1 1 6.957497 6.957497 6911 +deepa 1 1 6.957497 6.957497 6912 +ramani 1 1 6.957497 6.957497 6913 +dparam 1 1 6.957497 6.957497 6914 +eduw 1 1 6.957497 6.957497 6915 +eduf 1 1 6.957497 6.957497 6916 +refund 1 1 6.957497 6.957497 6917 +extenu 1 1 6.957497 6.957497 6918 +boxin 1 1 6.957497 6.957497 6919 +endia 1 1 6.957497 6.957497 6920 +func 1 1 6.957497 6.957497 6921 +practiv 1 1 6.957497 6.957497 6922 +questionair 1 1 6.957497 6.957497 6923 +brett 1 1 6.957497 6.957497 6924 +subroutine_fil 1 1 6.957497 6.957497 6925 +exception_fil 1 1 6.957497 6.957497 6926 +avali 1 1 6.957497 6.957497 6927 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html new file mode 100644 index 00000000..e4bbfc1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^dwip^cs304p^cs304p.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 6 374 0.693147 4.158882 7 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +offic 5 299 1.098612 5.493060 13 +us 3 329 1.098612 3.295836 16 +last 2 314 1.098612 2.197224 14 +time 2 293 1.098612 2.197224 17 +cours 1 273 1.098612 1.098612 15 +link 6 247 1.386294 8.317764 24 +also 2 259 1.386294 2.772588 28 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +class 6 199 1.609438 9.656628 37 +utexa 4 189 1.609438 6.437752 44 +includ 1 208 1.609438 1.609438 42 +avail 4 169 1.791759 7.167036 48 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +assign 4 135 1.945910 7.783640 66 +note 3 142 1.945910 5.837730 67 +lectur 3 135 1.945910 5.837730 73 +relat 2 139 1.945910 3.891820 68 +click 2 142 1.945910 3.891820 78 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +postscript 3 131 2.079442 6.238326 90 +studi 2 120 2.079442 4.158884 91 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +final 8 116 2.197225 17.577800 108 +version 6 113 2.197225 13.183350 122 +topic 3 114 2.197225 6.591675 110 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +structur 1 106 2.197225 2.197225 105 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +take 2 97 2.302585 4.605170 134 +need 2 98 2.302585 4.605170 135 +technic 1 100 2.302585 2.302585 140 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +question 7 91 2.397895 16.785265 141 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +exam 5 86 2.484907 12.424535 169 +solut 3 82 2.484907 7.454721 162 +help 2 83 2.484907 4.969814 175 +activ 1 84 2.484907 2.484907 182 +good 2 77 2.564949 5.129898 200 +decemb 2 80 2.564949 5.129898 215 +come 1 78 2.564949 2.564949 202 +mondai 1 77 2.564949 2.564949 206 +html 3 75 2.639057 7.917171 235 +free 1 73 2.639057 2.639057 224 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +test 3 66 2.708050 8.124150 252 +thursdai 1 70 2.708050 2.708050 241 +new 6 64 2.772589 16.635534 262 +import 4 65 2.772589 11.090356 282 +guid 2 63 2.772589 5.545178 267 +complex 1 64 2.772589 2.772589 269 +descript 1 64 2.772589 2.772589 271 +room 4 59 2.833213 11.332852 301 +locat 1 59 2.833213 2.833213 303 +semest 2 58 2.890372 5.780744 312 +special 2 56 2.890372 5.780744 320 +cover 2 55 2.944439 5.888878 329 +sampl 1 53 2.944439 2.944439 339 +suggest 1 53 2.944439 2.944439 331 +week 5 52 2.995732 14.978660 343 +maintain 1 51 2.995732 2.995732 342 +right 2 48 3.044522 6.089044 363 +format 1 48 3.044522 3.044522 356 +frequent 1 49 3.044522 3.044522 367 +discuss 4 45 3.135494 12.541976 399 +midterm 4 45 3.135494 12.541976 392 +review 7 42 3.218876 22.532132 425 +howev 2 41 3.218876 6.437752 422 +might 1 41 3.218876 3.218876 426 +announc 2 40 3.258097 6.516194 441 +tutori 2 39 3.258097 6.516194 437 +programm 1 39 3.258097 3.258097 445 +slide 1 38 3.295837 3.295837 467 +feel 1 37 3.332205 3.332205 483 +soon 1 36 3.367296 3.367296 494 +download 1 36 3.367296 3.367296 489 +post 4 35 3.401197 13.604788 505 +next 3 34 3.401197 10.203591 517 +articl 2 33 3.433987 6.867974 530 +ad 1 32 3.465736 3.465736 544 +concept 1 32 3.465736 3.465736 537 +option 1 30 3.555348 3.555348 575 +becom 1 28 3.610918 3.610918 603 +progress 1 28 3.610918 3.610918 598 +held 1 28 3.610918 3.610918 600 +hope 1 28 3.610918 3.610918 610 +ask 1 28 3.610918 3.610918 597 +usual 1 28 3.610918 3.610918 608 +session 2 26 3.688879 7.377758 643 +relev 1 26 3.688879 3.688879 637 +comp 1 26 3.688879 3.688879 650 +todai 1 25 3.737670 3.737670 672 +lead 1 23 3.806662 3.806662 718 +dai 1 22 3.850148 3.850148 753 +almost 1 22 3.850148 3.850148 742 +newsgroup 3 21 3.912023 11.736069 783 +reserv 1 20 3.951244 3.951244 808 +item 1 19 4.007333 4.007333 856 +coupl 1 17 4.110874 4.110874 939 +whole 1 17 4.110874 4.110874 940 +sheet 1 16 4.174387 4.174387 973 +critic 1 16 4.174387 4.174387 982 +warn 1 14 4.317488 4.317488 1068 +someon 1 13 4.382027 4.382027 1128 +circuit 1 13 4.382027 4.382027 1131 +difficulti 1 13 4.382027 4.382027 1132 +prolog 1 13 4.382027 4.382027 1155 +menu 1 13 4.382027 4.382027 1156 +pascal 15 12 4.465908 66.988620 1213 +bruce 2 12 4.465908 8.931816 1226 +tune 1 12 4.465908 4.465908 1227 +uniqu 1 12 4.465908 4.465908 1228 +rememb 1 12 4.465908 4.465908 1217 +regard 1 11 4.553877 4.553877 1309 +summar 1 11 4.553877 4.553877 1295 +total 1 10 4.653960 4.653960 1398 +length 1 10 4.653960 4.653960 1400 +exact 1 9 4.753590 4.753590 1509 +prefer 1 9 4.753590 4.753590 1491 +bit 1 7 5.010635 5.010635 1833 +beyond 1 7 5.010635 5.010635 1834 +put 1 6 5.164786 5.164786 2017 +banerje 1 6 5.164786 5.164786 2018 +assignmentsprogram 1 6 5.164786 5.164786 2019 +porter 11 5 5.347108 58.818188 2293 +lang 6 5 5.347108 32.082648 2294 +door 1 5 5.347108 5.347108 2291 +rotat 1 5 5.347108 5.347108 2295 +scope 1 5 5.347108 5.347108 2296 +desk 1 5 5.347108 5.347108 2297 +caus 1 5 5.347108 5.347108 2298 +welch 3 4 5.568345 16.705035 2655 +coverag 1 4 5.568345 5.568345 2656 +glad 1 4 5.568345 5.568345 2657 +arora 1 4 5.568345 5.568345 2658 +somewhat 1 4 5.568345 5.568345 2659 +webpag 1 4 5.568345 5.568345 2660 +dwip 3 3 5.857933 17.573799 3197 +addendum 2 3 5.857933 11.715866 3150 +ansi 2 3 5.857933 11.715866 3198 +forthes 1 3 5.857933 5.857933 3199 +moreov 1 3 5.857933 5.857933 3200 +luck 1 3 5.857933 5.857933 3201 +boolean 1 3 5.857933 5.857933 3202 +experienc 1 3 5.857933 5.857933 3203 +painter 1 2 6.263398 6.263398 4187 +therewil 1 2 6.263398 6.263398 4080 +nimar 1 2 6.263398 6.263398 4188 +disregard 1 2 6.263398 6.263398 4189 +schedulec 1 2 6.263398 6.263398 4190 +newgroup 1 2 6.263398 6.263398 4191 +delphi 1 2 6.263398 6.263398 4192 +dell 1 2 6.263398 6.263398 4193 +turbo 4 1 6.957497 27.829988 6928 +andther 1 1 6.957497 6.957497 6929 +unabl 1 1 6.957497 6.957497 6930 +luckfor 1 1 6.957497 6.957497 6931 +dependon 1 1 6.957497 6.957497 6932 +availib 1 1 6.957497 6.957497 6933 +uptoth 1 1 6.957497 6.957497 6934 +resolutio 1 1 6.957497 6.957497 6935 +porterquest 1 1 6.957497 6.957497 6936 +thecont 1 1 6.957497 6.957497 6937 +atugl 1 1 6.957497 6.957497 6938 +sostai 1 1 6.957497 6.957497 6939 +iinstructorbruc 1 1 6.957497 6.957497 6940 +tasoffic 1 1 6.957497 6.957497 6941 +hourslab 1 1 6.957497 6.957497 6942 +descriptionclass 1 1 6.957497 6.957497 6943 +scheduleclass 1 1 6.957497 6.957497 6944 +articlesclass 1 1 6.957497 6.957497 6945 +newsgroupprogram 1 1 6.957497 6.957497 6946 +pascaltutori 1 1 6.957497 6.957497 6947 +faqyou 1 1 6.957497 6.957497 6948 +zipe 1 1 6.957497 6.957497 6949 +isocomp 1 1 6.957497 6.957497 6950 +maccomp 1 1 6.957497 6.957497 6951 +borlandcomp 1 1 6.957497 6.957497 6952 +misccomp 1 1 6.957497 6.957497 6953 +miscfj 1 1 6.957497 6.957497 6954 +serverto 1 1 6.957497 6.957497 6955 +importantstuff 1 1 6.957497 6.957497 6956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html new file mode 100644 index 00000000..d9d4095f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs354.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +cours 4 273 1.098612 4.394448 15 +us 2 329 1.098612 2.197224 16 +student 2 343 1.098612 2.197224 19 +gener 2 220 1.386294 2.772588 27 +updat 3 191 1.609438 4.828314 41 +public 1 202 1.609438 1.609438 43 +assign 7 135 1.945910 13.621370 66 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +specif 1 106 2.197225 2.197225 106 +book 2 99 2.302585 4.605170 131 +graphic 3 90 2.397895 7.193685 147 +center 1 88 2.397895 2.397895 158 +librari 4 87 2.484907 9.939628 181 +second 2 81 2.484907 4.969814 166 +requir 2 81 2.484907 4.969814 167 +exam 1 86 2.484907 2.484907 169 +contain 1 81 2.484907 2.484907 174 +exampl 4 77 2.564949 10.259796 195 +refer 1 78 2.564949 2.564949 203 +syllabu 1 67 2.708050 2.708050 247 +descript 1 64 2.772589 2.772589 271 +copi 1 63 2.772589 2.772589 284 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +directori 1 45 3.135494 3.135494 396 +show 1 43 3.178054 3.178054 417 +submit 1 39 3.258097 3.258097 440 +workstat 1 37 3.332205 3.332205 479 +manual 1 35 3.401197 3.401197 504 +option 1 30 3.555348 3.555348 575 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +utc 2 27 3.637586 7.275172 629 +wish 1 24 3.761200 3.761200 692 +higher 1 24 3.761200 3.761200 690 +instal 1 22 3.850148 3.850148 754 +score 1 15 4.248495 4.248495 1017 +donald 1 9 4.753590 4.753590 1510 +curv 1 8 4.875197 4.875197 1656 +driver 1 8 4.875197 4.875197 1657 +sciencesdepart 1 6 5.164786 5.164786 2020 +slate 1 6 5.164786 5.164786 2021 +opengl 3 5 5.347108 16.041324 2299 +fussel 1 5 5.347108 5.347108 2300 +ousterhout 1 5 5.347108 5.347108 2301 +hasbeen 1 4 5.568345 5.568345 2661 +makefil 1 4 5.568345 5.568345 2662 +welch 1 4 5.568345 5.568345 2655 +xlib 1 3 5.857933 5.857933 3204 +mesa 5 2 6.263398 31.316990 4194 +cscomput 1 2 6.263398 6.263398 4195 +anopengl 1 2 6.263398 6.263398 4196 +billthecat 1 2 6.263398 6.263398 4197 +graphicsspr 1 1 6.957497 6.957497 6957 +oneor 1 1 6.957497 6.957497 6958 +examwil 1 1 6.957497 6.957497 6959 +bothmai 1 1 6.957497 6.957497 6960 +willcount 1 1 6.957497 6.957497 6961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g new file mode 100644 index 00000000..e56592cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^fussell^cs384g @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 4 273 1.098612 4.394448 15 +us 2 329 1.098612 2.197224 16 +gener 2 220 1.386294 2.772588 27 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +assign 5 135 1.945910 9.729550 66 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +machin 3 129 2.079442 6.238326 95 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +specif 1 106 2.197225 2.197225 106 +book 3 99 2.302585 6.907755 131 +graphic 5 90 2.397895 11.989475 147 +center 1 88 2.397895 2.397895 158 +librari 4 87 2.484907 9.939628 181 +contain 1 81 2.484907 2.484907 174 +exampl 5 77 2.564949 12.824745 195 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +syllabu 1 67 2.708050 2.708050 247 +descript 1 64 2.772589 2.772589 271 +copi 1 63 2.772589 2.772589 284 +new 1 64 2.772589 2.772589 262 +instruct 1 53 2.944439 2.944439 332 +sampl 1 53 2.944439 2.944439 339 +directori 1 45 3.135494 3.135494 396 +workstat 1 37 3.332205 3.332205 479 +manual 1 35 3.401197 3.401197 504 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +utc 2 27 3.637586 7.275172 629 +wish 1 24 3.761200 3.761200 692 +instal 1 22 3.850148 3.850148 754 +demo 1 18 4.060443 4.060443 888 +donald 1 9 4.753590 4.753590 1510 +driver 1 8 4.875197 4.875197 1657 +sciencesdepart 1 6 5.164786 5.164786 2020 +slate 1 6 5.164786 5.164786 2021 +opengl 3 5 5.347108 16.041324 2299 +fussel 1 5 5.347108 5.347108 2300 +ousterhout 1 5 5.347108 5.347108 2301 +welch 2 4 5.568345 11.136690 2655 +hasbeen 1 4 5.568345 5.568345 2661 +makefil 1 4 5.568345 5.568345 2662 +turnin 1 4 5.568345 5.568345 2654 +walker 2 3 5.857933 11.715866 3161 +xlib 1 3 5.857933 5.857933 3204 +mesa 5 2 6.263398 31.316990 4194 +anopengl 1 2 6.263398 6.263398 4196 +billthecat 1 2 6.263398 6.263398 4197 +repair 1 2 6.263398 6.263398 4198 +reinstal 2 1 6.957497 13.914994 6962 +gcomput 1 1 6.957497 6.957497 6963 +graphicsfal 1 1 6.957497 6.957497 6964 +libtcl 1 1 6.957497 6.957497 6965 +libtk 1 1 6.957497 6.957497 6966 +tclsh 1 1 6.957497 6.957497 6967 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html new file mode 100644 index 00000000..2b900326 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^hqliu^cs378.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +offic 2 299 1.098612 2.197224 13 +last 2 314 1.098612 2.197224 14 +email 3 220 1.386294 4.158882 29 +utexa 3 189 1.609438 4.828314 44 +modifi 1 178 1.609438 1.609438 35 +hour 2 165 1.791759 3.583518 46 +problem 2 147 1.945910 3.891820 75 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +mathemat 1 108 2.197225 2.197225 123 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +chang 1 82 2.484907 2.484907 163 +homework 2 79 2.564949 5.129898 193 +appear 2 78 2.564949 5.129898 210 +david 1 71 2.639057 2.639057 232 +syllabu 1 67 2.708050 2.708050 247 +taylor 2 63 2.772589 5.545178 287 +septemb 1 65 2.772589 2.772589 274 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +howev 1 41 3.218876 3.218876 422 +word 1 34 3.401197 3.401197 508 +abl 1 30 3.555348 3.555348 566 +common 1 30 3.555348 3.555348 574 +background 1 25 3.737670 3.737670 664 +notic 1 25 3.737670 3.737670 675 +letter 1 16 4.174387 4.174387 981 +english 1 15 4.248495 4.248495 1033 +station 1 13 4.382027 4.382027 1157 +canada 1 13 4.382027 4.382027 1158 +french 3 9 4.753590 14.260770 1511 +cryptographi 1 9 4.753590 4.753590 1512 +recogn 1 5 5.347108 5.347108 2302 +basement 1 4 5.568345 5.568345 2663 +zuckerman 1 3 5.857933 5.857933 3205 +frequenc 1 3 5.857933 5.857933 3206 +hqliu 2 2 6.263398 12.526796 4199 +huiqun 1 2 6.263398 6.263398 4200 +drastic 1 2 6.263398 6.263398 4201 +ciphertext 1 1 6.957497 6.957497 6968 +digram 1 1 6.957497 6.957497 6969 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html new file mode 100644 index 00000000..745b98ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^joshi^cs380d-main.html @@ -0,0 +1,315 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 6 443 0.693147 4.158882 6 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 3 299 1.098612 3.295836 13 +time 3 293 1.098612 3.295836 17 +us 3 329 1.098612 3.295836 16 +cours 2 273 1.098612 2.197224 15 +student 2 343 1.098612 2.197224 19 +last 2 314 1.098612 2.197224 14 +link 3 247 1.386294 4.158882 24 +design 2 213 1.386294 2.772588 25 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +class 10 199 1.609438 16.094380 37 +paper 6 205 1.609438 9.656628 38 +utexa 2 189 1.609438 3.218876 44 +includ 2 208 1.609438 3.218876 42 +group 2 183 1.609438 3.218876 36 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +distribut 9 162 1.791759 16.125831 51 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +algorithm 2 162 1.791759 3.583518 57 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +problem 10 147 1.945910 19.459100 75 +assign 5 135 1.945910 9.729550 66 +file 3 132 1.945910 5.837730 70 +hall 2 146 1.945910 3.891820 65 +lectur 1 135 1.945910 1.945910 73 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +number 2 130 2.079442 4.158884 97 +postscript 2 131 2.079442 4.158884 90 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +final 5 116 2.197225 10.986125 108 +topic 4 114 2.197225 8.788900 110 +manag 2 114 2.197225 4.394450 125 +send 2 114 2.197225 4.394450 109 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +techniqu 2 99 2.302585 4.605170 138 +take 2 97 2.302585 4.605170 134 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +present 2 91 2.397895 4.795790 145 +commun 2 95 2.397895 4.795790 157 +grade 1 90 2.397895 2.397895 142 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +solut 8 82 2.484907 19.879256 162 +requir 6 81 2.484907 14.909442 167 +exam 4 86 2.484907 9.939628 169 +second 2 81 2.484907 4.969814 166 +build 1 85 2.484907 2.484907 184 +wide 1 84 2.484907 2.484907 185 +academ 1 82 2.484907 2.484907 178 +start 1 83 2.484907 2.484907 173 +contain 1 81 2.484907 2.484907 174 +librari 1 87 2.484907 2.484907 181 +mondai 3 77 2.564949 7.694847 206 +state 3 76 2.564949 7.694847 207 +messag 3 76 2.564949 7.694847 212 +homework 3 79 2.564949 7.694847 193 +sourc 2 77 2.564949 5.129898 201 +refer 2 78 2.564949 5.129898 203 +come 1 78 2.564949 2.564949 202 +solv 2 73 2.639057 5.278114 234 +tuesdai 1 73 2.639057 2.639057 219 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +logic 1 71 2.639057 2.639057 230 +servic 1 72 2.639057 2.639057 236 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +effici 1 73 2.639057 2.639057 233 +integr 2 67 2.708050 5.416100 245 +would 2 67 2.708050 5.416100 251 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +wednesdai 2 64 2.772589 5.545178 261 +written 2 63 2.772589 5.545178 278 +improv 2 62 2.772589 5.545178 289 +taylor 1 63 2.772589 2.772589 287 +abstract 1 62 2.772589 2.772589 276 +visit 1 63 2.772589 2.772589 288 +locat 1 59 2.833213 2.833213 303 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +point 4 58 2.890372 11.561488 319 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +suggest 3 53 2.944439 8.833317 331 +cover 3 55 2.944439 8.833317 329 +allow 2 53 2.944439 5.888878 333 +instruct 1 53 2.944439 2.944439 332 +three 1 54 2.944439 2.944439 330 +case 1 51 2.995732 2.995732 351 +week 1 52 2.995732 2.995732 343 +particular 1 51 2.995732 2.995732 352 +give 3 50 3.044522 9.133566 359 +set 2 50 3.044522 6.089044 361 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +right 1 48 3.044522 3.044522 363 +electron 1 47 3.091042 3.091042 379 +protocol 9 45 3.135494 28.219446 407 +textbook 3 44 3.135494 9.406482 397 +discuss 3 45 3.135494 9.406482 399 +midterm 2 45 3.135494 6.270988 392 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +show 4 43 3.178054 12.712216 417 +mechan 1 43 3.178054 3.178054 416 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +howev 1 41 3.218876 3.218876 422 +author 1 39 3.258097 3.258097 450 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +expect 2 37 3.332205 6.664410 484 +cost 2 37 3.332205 6.664410 480 +feel 1 37 3.332205 3.332205 483 +ofth 2 36 3.367296 6.734592 491 +global 4 34 3.401197 13.604788 520 +survei 2 35 3.401197 6.802394 513 +singl 1 34 3.401197 3.401197 510 +collabor 2 32 3.465736 6.931472 543 +given 1 32 3.465736 3.465736 538 +idea 1 32 3.465736 3.465736 545 +someth 2 31 3.496508 6.993016 554 +often 1 31 3.496508 3.496508 551 +secur 2 30 3.555348 7.110696 577 +robert 1 30 3.555348 3.555348 567 +option 1 30 3.555348 3.555348 575 +produc 1 30 3.555348 3.555348 572 +depend 2 29 3.583519 7.167038 583 +synchron 1 29 3.583519 3.583519 588 +consid 1 29 3.583519 3.583519 590 +ask 1 28 3.610918 3.610918 597 +team 1 27 3.637586 3.637586 625 +detect 3 26 3.688879 11.066637 646 +consist 2 26 3.688879 7.377758 651 +effort 1 26 3.688879 3.688879 652 +primari 1 25 3.737670 3.737670 669 +demonstr 1 24 3.761200 3.761200 694 +proof 3 23 3.806662 11.419986 720 +size 1 23 3.806662 3.806662 713 +properti 1 22 3.850148 3.850148 749 +newsgroup 2 21 3.912023 7.824046 783 +kernel 1 20 3.951244 3.951244 825 +prepar 1 20 3.951244 3.951244 824 +prove 5 19 4.007333 20.036665 848 +assum 2 19 4.007333 8.014666 845 +log 1 19 4.007333 4.007333 857 +encourag 2 18 4.060443 8.120886 880 +appropri 1 18 4.060443 4.060443 883 +moor 1 17 4.110874 4.110874 936 +whether 1 17 4.110874 4.110874 918 +previous 1 17 4.110874 4.110874 923 +otherwis 1 17 4.110874 4.110874 922 +monitor 1 17 4.110874 4.110874 941 +weslei 1 16 4.174387 4.174387 983 +vector 1 16 4.174387 4.174387 961 +precis 1 15 4.248495 4.248495 1023 +deriv 3 13 4.382027 13.146081 1145 +weak 1 13 4.382027 4.382027 1159 +cannot 1 13 4.382027 4.382027 1144 +central 1 13 4.382027 4.382027 1160 +asynchron 2 12 4.465908 8.931816 1229 +addison 1 12 4.465908 4.465908 1230 +replic 1 12 4.465908 4.465908 1231 +skill 1 12 4.465908 4.465908 1205 +onth 1 12 4.465908 4.465908 1218 +clock 2 11 4.553877 9.107754 1320 +imposs 1 9 4.753590 4.753590 1513 +clear 1 9 4.753590 4.753590 1488 +assumpt 1 9 4.753590 4.753590 1514 +matter 1 8 4.875197 4.875197 1627 +told 1 8 4.875197 4.875197 1658 +ideal 1 8 4.875197 4.875197 1630 +predic 2 7 5.010635 10.021270 1806 +encrypt 1 7 5.010635 5.010635 1835 +henc 1 7 5.010635 5.010635 1805 +guidelin 1 7 5.010635 5.010635 1832 +channel 1 7 5.010635 5.010635 1836 +distributedsystem 2 6 5.164786 10.329572 2022 +arrang 1 6 5.164786 5.164786 2023 +tobe 1 6 5.164786 5.164786 1995 +causal 1 6 5.164786 5.164786 2024 +wrong 1 6 5.164786 5.164786 2025 +constitut 1 6 5.164786 5.164786 2026 +snapshot 4 5 5.347108 21.388432 2303 +colleagu 2 5 5.347108 10.694216 2304 +multicast 1 5 5.347108 5.347108 2305 +authent 1 5 5.347108 5.347108 2306 +volunt 1 5 5.347108 5.347108 2307 +explicitli 1 5 5.347108 5.347108 2308 +stabl 1 5 5.347108 5.347108 2309 +exchang 1 5 5.347108 5.347108 2310 +lorenzo 3 4 5.568345 16.705035 2588 +deadlock 2 4 5.568345 11.136690 2641 +cut 1 4 5.568345 5.568345 2620 +disconnect 1 4 5.568345 5.568345 2664 +subsequ 1 4 5.568345 5.568345 2665 +accompani 1 4 5.568345 5.568345 2666 +unless 1 4 5.568345 5.568345 2607 +rajeev 4 3 5.857933 23.431732 3152 +agreement 2 3 5.857933 11.715866 3207 +pertain 1 3 5.857933 5.857933 3208 +alvisi 1 3 5.857933 5.857933 3095 +commerc 1 3 5.857933 5.857933 3209 +credibl 1 3 5.857933 5.857933 3210 +violat 1 3 5.857933 5.857933 3211 +urg 1 3 5.857933 5.857933 3212 +agener 1 3 5.857933 5.857933 3213 +conceptu 1 3 5.857933 5.857933 3214 +joshi 3 2 6.263398 18.790194 4202 +byzantin 2 2 6.263398 12.526796 4203 +requiredtextbook 1 2 6.263398 6.263398 4204 +checkpoint 1 2 6.263398 6.263398 4205 +replica 1 2 6.263398 6.263398 4206 +towrit 1 2 6.263398 6.263398 4207 +algorithmi 1 2 6.263398 6.263398 4208 +moreeffici 1 2 6.263398 6.263398 4209 +simpler 1 2 6.263398 6.263398 4210 +fifo 2 1 6.957497 13.914994 6970 +ispr 1 1 6.957497 6.957497 6971 +alvisiteach 1 1 6.957497 6.957497 6972 +joshicont 1 1 6.957497 6.957497 6973 +stafflorenzo 1 1 6.957497 6.957497 6974 +mechanicsi 1 1 6.957497 6.957497 6975 +remaind 1 1 6.957497 6.957497 6976 +classat 1 1 6.957497 6.957497 6977 +isutexa 1 1 6.957497 6.957497 6978 +mullend 1 1 6.957497 6.957497 6979 +acmpress 1 1 6.957497 6.957497 6980 +contentc 1 1 6.957497 6.957497 6981 +tomorrow 1 1 6.957497 6.957497 6982 +messagedeliveri 1 1 6.957497 6.957497 6983 +backupapproach 1 1 6.957497 6.957497 6984 +thepresent 1 1 6.957497 6.957497 6985 +exemplifi 1 1 6.957497 6.957497 6986 +principleshav 1 1 6.957497 6.957497 6987 +meor 1 1 6.957497 6.957497 6988 +apresent 1 1 6.957497 6.957497 6989 +networksgradingther 1 1 6.957497 6.957497 6990 +begrad 1 1 6.957497 6.957497 6991 +onbehalf 1 1 6.957497 6.957497 6992 +willrec 1 1 6.957497 6.957497 6993 +ispermit 1 1 6.957497 6.957497 6994 +acollabor 1 1 6.957497 6.957497 6995 +forgrad 1 1 6.957497 6.957497 6996 +collaborationswil 1 1 6.957497 6.957497 6997 +nocollabor 1 1 6.957497 6.957497 6998 +issuesthat 1 1 6.957497 6.957497 6999 +bedistribut 1 1 6.957497 6.957497 7000 +tocomplet 1 1 6.957497 6.957497 7001 +twolectur 1 1 6.957497 6.957497 7002 +choosethi 1 1 6.957497 6.957497 7003 +asingl 1 1 6.957497 6.957497 7004 +warmli 1 1 6.957497 6.957497 7005 +toconsid 1 1 6.957497 6.957497 7006 +excellentopportun 1 1 6.957497 6.957497 7007 +setsin 1 1 6.957497 6.957497 7008 +shouldconform 1 1 6.957497 6.957497 7009 +synonym 1 1 6.957497 6.957497 7010 +isrequir 1 1 6.957497 6.957497 7011 +thatmak 1 1 6.957497 6.957497 7012 +insuffici 1 1 6.957497 6.957497 7013 +ofcorrect 1 1 6.957497 6.957497 7014 +thetextbook 1 1 6.957497 6.957497 7015 +asnapshot 1 1 6.957497 6.957497 7016 +theprotocol 1 1 6.957497 6.957497 7017 +atmost 1 1 6.957497 6.957497 7018 +mattern 1 1 6.957497 6.957497 7019 +thatcontain 1 1 6.957497 6.957497 7020 +resist 1 1 6.957497 6.957497 7021 +thetempt 1 1 6.957497 6.957497 7022 +monitorprocess 1 1 6.957497 6.957497 7023 +basedsnapshot 1 1 6.957497 6.957497 7024 +nowonlin 1 1 6.957497 6.957497 7025 +filedescrib 1 1 6.957497 6.957497 7026 +examth 1 1 6.957497 6.957497 7027 +fridaymai 1 1 6.957497 6.957497 7028 +thepostscript 1 1 6.957497 6.957497 7029 +freeto 1 1 6.957497 6.957497 7030 +yoursuggest 1 1 6.957497 6.957497 7031 +edurajeev 1 1 6.957497 6.957497 7032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ new file mode 100644 index 00000000..4c8727ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^kornerup^cs105^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +link 1 247 1.386294 1.386294 24 +class 5 199 1.609438 8.047190 37 +fall 3 181 1.609438 4.828314 40 +avail 2 169 1.791759 3.583518 48 +read 2 154 1.791759 3.583518 47 +austin 1 168 1.791759 1.791759 63 +note 2 142 1.945910 3.891820 67 +welcom 1 122 2.079442 2.079442 99 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +version 1 113 2.197225 2.197225 122 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +technic 1 100 2.302585 2.302585 140 +homepag 1 93 2.397895 2.397895 148 +solut 1 82 2.484907 2.484907 162 +requir 1 81 2.484907 2.484907 167 +homework 1 79 2.564949 2.564949 193 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +differ 1 66 2.708050 2.708050 253 +practic 1 70 2.708050 2.708050 246 +syllabu 1 67 2.708050 2.708050 247 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +electron 1 47 3.091042 3.091042 379 +midterm 2 45 3.135494 6.270988 392 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +taught 2 33 3.433987 6.867974 526 +chapter 1 32 3.465736 3.465736 536 +turn 1 29 3.583519 3.583519 586 +adam 1 17 4.110874 4.110874 934 +reflect 1 15 4.248495 4.248495 1034 +overhead 1 15 4.248495 4.248495 1035 +correspond 1 10 4.653960 4.653960 1382 +informationabout 1 9 4.753590 4.753590 1515 +scope 1 5 5.347108 5.347108 2296 +jacob 3 4 5.568345 16.705035 2667 +kornerup 3 3 5.857933 17.573799 3215 +bywil 1 1 6.957497 6.957497 7033 +linea 1 1 6.957497 6.957497 7034 +inhomework 1 1 6.957497 6.957497 7035 +crude 1 1 6.957497 6.957497 7036 +newsgrouputexa 1 1 6.957497 6.957497 7037 +takesplac 1 1 6.957497 6.957497 7038 +pascalprogramm 1 1 6.957497 6.957497 7039 +viewinginform 1 1 6.957497 6.957497 7040 +projecthow 1 1 6.957497 6.957497 7041 +examand 1 1 6.957497 6.957497 7042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ new file mode 100644 index 00000000..27edc6b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lavender^CS378^ @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 7 374 0.693147 4.852029 7 +inform 4 412 0.693147 2.772588 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +cours 13 273 1.098612 14.281956 15 +student 3 343 1.098612 3.295836 19 +us 2 329 1.098612 2.197224 16 +current 1 284 1.098612 1.098612 21 +design 5 213 1.386294 6.931470 25 +link 2 247 1.386294 2.772588 24 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +utexa 4 189 1.609438 6.437752 44 +class 3 199 1.609438 4.828314 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +object 6 138 1.945910 11.675460 79 +relat 3 139 1.945910 5.837730 68 +problem 2 147 1.945910 3.891820 75 +note 2 142 1.945910 3.891820 67 +professor 1 137 1.945910 1.945910 76 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +hall 1 146 1.945910 1.945910 65 +postscript 2 131 2.079442 4.158884 90 +compil 2 122 2.079442 4.158884 96 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +code 3 108 2.197225 6.591675 116 +site 2 106 2.197225 4.394450 119 +pleas 1 113 2.197225 2.197225 114 +text 2 98 2.302585 4.605170 133 +advanc 1 99 2.302585 2.302585 130 +follow 3 92 2.397895 7.193685 143 +associ 1 93 2.397895 2.397895 151 +librari 6 87 2.484907 14.909442 181 +solut 2 82 2.484907 4.969814 162 +internet 1 83 2.484907 2.484907 186 +institut 1 84 2.484907 2.484907 187 +info 1 85 2.484907 2.484907 176 +orient 6 80 2.564949 15.389694 205 +sourc 5 77 2.564949 12.824745 201 +server 2 76 2.564949 5.129898 204 +mondai 1 77 2.564949 2.564949 206 +master 1 76 2.564949 2.564949 216 +refer 1 78 2.564949 2.564949 203 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +summari 1 73 2.639057 2.639057 237 +java 20 70 2.708050 54.161000 248 +wednesdai 1 64 2.772589 2.772589 261 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +think 1 57 2.890372 2.890372 314 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +standard 3 48 3.044522 9.133566 365 +archiv 2 49 3.044522 6.089044 364 +give 1 50 3.044522 3.044522 359 +discuss 1 45 3.135494 3.135494 399 +offer 1 43 3.178054 3.178054 414 +edit 1 42 3.218876 3.218876 418 +programm 1 39 3.258097 3.258097 445 +open 1 38 3.295837 3.295837 469 +manual 3 35 3.401197 10.203591 504 +tech 1 35 3.401197 3.401197 515 +eduoffic 2 33 3.433987 6.867974 531 +john 1 33 3.433987 3.433987 532 +product 1 33 3.433987 3.433987 527 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +comp 11 26 3.688879 40.577669 650 +challeng 1 26 3.688879 3.688879 653 +greg 1 24 3.761200 3.761200 695 +pattern 1 24 3.761200 3.761200 689 +fellow 1 24 3.761200 3.761200 701 +lab 1 24 3.761200 3.761200 698 +newsgroup 6 21 3.912023 23.472138 783 +annot 1 21 3.912023 3.912023 775 +applet 3 20 3.951244 11.853732 827 +element 1 18 4.060443 4.060443 895 +encourag 1 18 4.060443 4.060443 880 +event 1 18 4.060443 4.060443 896 +weslei 7 16 4.174387 29.220709 983 +alreadi 1 16 4.174387 4.174387 963 +style 2 15 4.248495 8.496990 1036 +pagec 1 15 4.248495 4.248495 1011 +rate 1 15 4.248495 4.248495 1037 +draft 2 14 4.317488 8.634976 1085 +manner 1 14 4.317488 4.317488 1074 +opportun 2 13 4.382027 8.764054 1161 +station 1 13 4.382027 4.382027 1157 +johnson 1 13 4.382027 4.382027 1162 +misc 1 13 4.382027 4.382027 1124 +addison 7 12 4.465908 31.261356 1230 +captur 1 12 4.465908 4.465908 1232 +evolut 1 11 4.553877 4.553877 1314 +denni 1 11 4.553877 4.553877 1321 +strongli 1 10 4.653960 4.653960 1406 +classmat 1 9 4.753590 4.753590 1516 +doug 1 9 4.753590 4.753590 1517 +virginia 1 8 4.875197 4.875197 1659 +irvin 1 8 4.875197 4.875197 1660 +bookstor 1 7 5.010635 5.010635 1837 +prentic 1 7 5.010635 5.010635 1838 +usenet 1 7 5.010635 5.010635 1839 +dead 1 7 5.010635 5.010635 1840 +forum 1 6 5.164786 5.164786 2027 +mirror 1 6 5.164786 5.164786 2028 +huge 1 6 5.164786 5.164786 1991 +lang 11 5 5.347108 58.818188 2294 +templat 2 5 5.347108 10.694216 2311 +appt 1 5 5.347108 5.347108 2312 +gokul 2 4 5.568345 11.136690 2668 +polymorph 1 4 5.568345 5.568345 2627 +wilei 1 4 5.568345 5.568345 2669 +faq 3 3 5.857933 17.573799 3216 +lavend 2 3 5.857933 11.715866 3217 +cline 1 3 5.857933 5.857933 3218 +gamma 1 3 5.857933 5.857933 3219 +hotjava 1 3 5.857933 5.857933 3220 +javascript 1 3 5.857933 5.857933 3221 +ansi 1 3 5.857933 5.857933 3198 +polytechn 1 3 5.857933 5.857933 3222 +jar 1 3 5.857933 5.857933 3223 +setup 2 2 6.263398 12.526796 4211 +infocours 1 2 6.263398 6.263398 4212 +noteshomework 1 2 6.263398 6.263398 4102 +profici 1 2 6.263398 6.263398 4103 +coop 1 2 6.263398 6.263398 4213 +materiali 1 2 6.263398 6.263398 4214 +drawn 1 2 6.263398 6.263398 4215 +elli 1 2 6.263398 6.263398 4216 +helm 1 2 6.263398 6.263398 4217 +reusabl 1 2 6.263398 6.263398 4218 +renssela 1 2 6.263398 6.263398 4148 +sourcesth 1 2 6.263398 6.263398 4219 +javasoft 1 2 6.263398 6.263398 4220 +gamelan 1 2 6.263398 6.263398 4221 +centr 1 2 6.263398 6.263398 4222 +stroustrup 3 1 6.957497 20.872491 7043 +libg 2 1 6.957497 13.914994 7044 +descriptionc 1 1 6.957497 6.957497 7045 +programminglast 1 1 6.957497 6.957497 7046 +rajaram 1 1 6.957497 6.957497 7047 +lavendercours 1 1 6.957497 6.957497 7048 +syllabusannouncementslectur 1 1 6.957497 6.957497 7049 +solutionsprogram 1 1 6.957497 6.957497 7050 +assignmentsgnu 1 1 6.957497 6.957497 7051 +manualsstandard 1 1 6.957497 6.957497 7052 +codesocket 1 1 6.957497 6.957497 7053 +manualdescript 1 1 6.957497 6.957497 7054 +anintroductori 1 1 6.957497 6.957497 7055 +reusablepattern 1 1 6.957497 6.957497 7056 +typehierarchi 1 1 6.957497 6.957497 7057 +professionallyus 1 1 6.957497 6.957497 7058 +horstmann 1 1 6.957497 6.957497 7059 +cargil 1 1 6.957497 6.957497 7060 +lomow 1 1 6.957497 6.957497 7061 +coplien 1 1 6.957497 6.957497 7062 +idiom 1 1 6.957497 6.957497 7063 +plauger 1 1 6.957497 6.957497 7064 +vlissid 1 1 6.957497 6.957497 7065 +announcementsabout 1 1 6.957497 6.957497 7066 +linediscuss 1 1 6.957497 6.957497 7067 +lavendery 1 1 6.957497 6.957497 7068 +helpjava 1 1 6.957497 6.957497 7069 +advocaci 1 1 6.957497 6.957497 7070 +oopth 1 1 6.957497 6.957497 7071 +objectspac 1 1 6.957497 6.957497 7072 +libstdc 1 1 6.957497 6.957497 7073 +mitgnu 1 1 6.957497 6.957497 7074 +cygnusgnu 1 1 6.957497 6.957497 7075 +ftpobject 1 1 6.957497 6.957497 7076 +developmentindex 1 1 6.957497 6.957497 7077 +librariesth 1 1 6.957497 6.957497 7078 +libraryindex 1 1 6.957497 6.957497 7079 +talig 1 1 6.957497 6.957497 7080 +frameworkjava 1 1 6.957497 6.957497 7081 +registri 1 1 6.957497 6.957497 7082 +espresso 1 1 6.957497 6.957497 7083 +kafura 1 1 6.957497 6.957497 7084 +techdoug 1 1 6.957497 6.957497 7085 +schmidt 1 1 6.957497 6.957497 7086 +universitydoug 1 1 6.957497 6.957497 7087 +sunyintroductori 1 1 6.957497 6.957497 7088 +groningen 1 1 6.957497 6.957497 7089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ new file mode 100644 index 00000000..33c15449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lin^cs395t^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 5 374 0.693147 3.465735 7 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +languag 3 227 1.386294 4.158882 26 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +utexa 2 189 1.609438 3.218876 44 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +parallel 3 169 1.791759 5.375277 60 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +assign 3 135 1.945910 5.837730 66 +lectur 1 135 1.945910 1.945910 73 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +world 1 115 2.197225 2.197225 126 +memori 1 101 2.302585 2.302585 139 +commun 2 95 2.397895 4.795790 157 +exampl 2 77 2.564949 5.129898 195 +interfac 1 79 2.564949 2.564949 209 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +decemb 1 80 2.564949 2.564949 215 +tuesdai 2 73 2.639057 5.278114 219 +onlin 1 75 2.639057 2.639057 223 +thursdai 2 70 2.708050 5.416100 241 +practic 1 70 2.708050 2.708050 246 +taylor 1 63 2.772589 2.772589 287 +handout 1 64 2.772589 2.772589 263 +foundat 1 62 2.772589 2.772589 286 +share 1 59 2.833213 2.833213 304 +case 1 51 2.995732 2.995732 351 +adapt 1 46 3.091042 3.091042 387 +tutori 1 39 3.258097 3.258097 437 +copyright 1 36 3.367296 3.367296 495 +manual 2 35 3.401197 6.802394 504 +pass 1 28 3.610918 3.610918 611 +spent 1 25 3.737670 3.737670 676 +thread 1 23 3.806662 3.806662 722 +partit 1 16 4.174387 4.174387 984 +hello 1 10 4.653960 4.653960 1407 +calvin 2 9 4.753590 9.507180 1518 +compilersfal 1 2 6.263398 6.263398 4223 +tera 1 2 6.263398 6.263398 4224 +skeleton 1 2 6.263398 6.263398 4225 +ironman 1 2 6.263398 6.263398 4226 +logp 1 2 6.263398 6.263398 4227 +grid 1 2 6.263398 6.263398 4228 +compilerscst 1 1 6.957497 6.957497 7090 +posix 1 1 6.957497 6.957497 7091 +hierarchieslast 1 1 6.957497 6.957497 7092 +linlin 1 1 6.957497 6.957497 7093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ new file mode 100644 index 00000000..351bb005 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs372^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +mail 2 238 1.386294 2.772588 22 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +read 1 154 1.791759 1.791759 47 +assign 1 135 1.945910 1.945910 66 +send 2 114 2.197225 4.394450 109 +handout 1 64 2.772589 2.772589 263 +prof 1 64 2.772589 2.772589 273 +newsgroup 1 21 3.912023 3.912023 783 +csintroduct 1 1 6.957497 6.957497 7094 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ new file mode 100644 index 00000000..f1171f4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^lorenzo^corsi^cs395t^96F^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +distribut 2 162 1.791759 3.583518 51 +read 1 154 1.791759 1.791759 47 +topic 1 114 2.197225 2.197225 110 +thot 1 1 6.957497 6.957497 7095 +systemsfil 1 1 6.957497 6.957497 7096 +systemstopolog 1 1 6.957497 6.957497 7097 +systemselectron 1 1 6.957497 6.957497 7098 +commenrcefailur 1 1 6.957497 6.957497 7099 +detectorsdistribut 1 1 6.957497 6.957497 7100 +objectsconsistencysecuregroup 1 1 6.957497 6.957497 7101 +communicationlanguag 1 1 6.957497 6.957497 7102 +dsmmobil 1 1 6.957497 6.957497 7103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html new file mode 100644 index 00000000..e6de2d47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^miranker^395t^index.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +list 1 201 1.609438 1.609438 39 +databas 1 122 2.079442 2.079442 86 +materi 1 75 2.639057 2.639057 221 +term 1 43 3.178054 3.178054 411 +mine 2 26 3.688879 7.377758 654 +monitor 2 17 4.110874 8.221748 941 +daniel 1 12 4.465908 4.465908 1233 +databasesprof 1 1 6.957497 6.957497 7104 +mirankernew 1 1 6.957497 6.957497 7105 +seminarschedul 1 1 6.957497 6.957497 7106 +overviewtentativeread 1 1 6.957497 6.957497 7107 +homeworkproject 1 1 6.957497 6.957497 7108 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ new file mode 100644 index 00000000..e05be453 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ai2^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +introduct 1 126 2.079442 2.079442 87 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +intellig 1 72 2.639057 2.639057 225 +syllabu 1 67 2.708050 2.708050 247 +artifici 1 63 2.772589 2.772589 280 +taylor 1 63 2.772589 2.772589 287 +trace 1 25 3.737670 3.737670 677 +tuth 1 9 4.753590 4.753590 1519 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +mooneytim 1 2 6.263398 6.263398 4229 +sheetand 1 2 6.263398 6.263398 4230 +placespr 1 1 6.957497 6.957497 7109 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html new file mode 100644 index 00000000..69f8ee68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^cs351^welcome.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +info 1 85 2.484907 2.484907 176 +homework 4 79 2.564949 10.259796 193 +intellig 1 72 2.639057 2.639057 225 +test 3 66 2.708050 8.124150 252 +syllabu 1 67 2.708050 2.708050 247 +artifici 1 63 2.772589 2.772589 280 +case 1 51 2.995732 2.995732 351 +common 1 30 3.555348 3.555348 574 +symbol 1 27 3.637586 3.637586 620 +trace 1 25 3.737670 3.737670 677 +lisp 2 18 4.060443 8.120886 897 +sheet 1 16 4.174387 4.174387 973 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +allegro 1 5 5.347108 5.347108 2314 +sowmya 1 4 5.568345 5.568345 2670 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +placetu 1 2 6.263398 6.263398 4231 +informationon 1 2 6.263398 6.263398 4232 +mooneyteach 1 1 6.957497 6.957497 7110 +ramachandrantim 1 1 6.957497 6.957497 7111 +alsout 1 1 6.957497 6.957497 7112 +textparadigm 1 1 6.957497 6.957497 7113 +lispassignmentsse 1 1 6.957497 6.957497 7114 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html new file mode 100644 index 00000000..e44379d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^mooney^ml-course^welcome.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +project 2 340 1.098612 2.197224 18 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +base 2 165 1.791759 3.583518 50 +network 1 168 1.791759 1.791759 61 +file 1 132 1.945910 1.945910 70 +machin 2 129 2.079442 4.158884 95 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +learn 10 86 2.484907 24.849070 170 +homework 5 79 2.564949 12.824745 193 +logic 1 71 2.639057 2.639057 230 +syllabu 1 67 2.708050 2.708050 247 +order 1 69 2.708050 2.708050 249 +evalu 1 64 2.772589 2.772589 266 +suggest 1 53 2.944439 2.944439 331 +talk 1 53 2.944439 2.944439 336 +format 1 48 3.044522 3.044522 356 +slide 1 38 3.295837 3.295837 467 +tree 1 36 3.367296 3.367296 492 +concept 1 32 3.465736 3.465736 537 +neural 1 30 3.555348 3.555348 578 +cluster 1 28 3.610918 3.610918 612 +experiment 1 26 3.688879 3.688879 645 +rule 1 26 3.688879 3.688879 638 +trace 1 25 3.737670 3.737670 677 +decis 1 23 3.806662 3.806662 728 +outlin 1 17 4.110874 4.110874 914 +explan 1 16 4.174387 4.174387 985 +induct 1 11 4.553877 4.553877 1304 +instanc 1 11 4.553877 4.553877 1322 +moonei 1 9 4.753590 4.753590 1520 +raymond 1 5 5.347108 5.347108 2313 +bayesian 1 4 5.568345 5.568345 2671 +informationclick 1 3 5.857933 5.857933 3224 +networkfor 1 3 5.857933 5.857933 3225 +mooneytim 1 2 6.263398 6.263398 4229 +placetu 1 2 6.263398 6.263398 4231 +sheetand 1 2 6.263398 6.263398 4230 +unsupervis 1 2 6.263398 6.263398 4233 +textmachinelearninglectur 1 1 6.957497 6.957497 7115 +learningassignmentsse 1 1 6.957497 6.957497 7116 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html new file mode 100644 index 00000000..6b8e0cd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs304p.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 6 374 0.693147 4.158882 7 +work 1 380 0.693147 0.693147 9 +cours 2 273 1.098612 2.197224 15 +languag 4 227 1.386294 5.545176 26 +softwar 1 220 1.386294 1.386294 30 +list 1 201 1.609438 1.609438 39 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +assign 2 135 1.945910 3.891820 66 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +studi 2 120 2.079442 4.158884 91 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +section 2 94 2.397895 4.795790 149 +call 1 91 2.397895 2.397895 153 +exam 5 86 2.484907 12.424535 169 +school 1 84 2.484907 2.484907 188 +chang 1 82 2.484907 2.484907 163 +requir 1 81 2.484907 2.484907 167 +syllabu 1 67 2.708050 2.708050 247 +guid 4 63 2.772589 11.090356 267 +foundat 2 62 2.772589 5.545178 286 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +run 1 51 2.995732 2.995732 347 +basic 1 50 3.044522 3.044522 360 +move 1 47 3.091042 3.091042 382 +math 1 44 3.135494 3.135494 402 +directori 1 45 3.135494 3.135494 396 +next 1 34 3.401197 3.401197 517 +least 1 35 3.401197 3.401197 516 +concept 1 32 3.465736 3.465736 537 +express 1 32 3.465736 3.465736 540 +taken 1 31 3.496508 3.496508 555 +hard 1 30 3.555348 3.555348 563 +intend 1 28 3.610918 3.610918 599 +symbol 1 27 3.637586 3.637586 620 +never 1 25 3.737670 3.737670 671 +background 1 25 3.737670 3.737670 664 +instead 1 22 3.850148 3.850148 756 +scheme 3 20 3.951244 11.853732 818 +thur 1 19 4.007333 4.007333 847 +lisp 1 18 4.060443 4.060443 897 +hopefulli 1 14 4.317488 4.317488 1071 +draw 1 14 4.317488 4.317488 1086 +faster 1 11 4.553877 4.553877 1323 +surf 1 11 4.553877 4.553877 1301 +submiss 1 11 4.553877 4.553877 1298 +novak 1 9 4.753590 4.753590 1521 +strong 1 6 5.164786 5.164786 2029 +syntax 1 6 5.164786 5.164786 2030 +snow 1 6 5.164786 5.164786 2031 +gordon 1 6 5.164786 5.164786 2032 +assignmentsprogram 1 6 5.164786 5.164786 2019 +willb 1 5 5.347108 5.347108 2277 +porter 1 5 5.347108 5.347108 2293 +emphas 1 4 5.568345 5.568345 2672 +dialect 1 3 5.857933 5.857933 3226 +gambit 1 3 5.857933 5.857933 3227 +macintoshcomput 1 3 5.857933 5.857933 3228 +treasur 1 3 5.857933 5.857933 3229 +gradingmidterm 1 3 5.857933 5.857933 3230 +guidefin 1 3 5.857933 5.857933 3231 +peano 1 2 6.263398 6.263398 4234 +turtl 1 2 6.263398 6.263398 4235 +plot 1 2 6.263398 6.263398 4236 +sciencec 2 1 6.957497 13.914994 7117 +treesassign 2 1 6.957497 13.914994 7118 +atleast 1 1 6.957497 6.957497 7119 +programmingcours 1 1 6.957497 6.957497 7120 +precalculu 1 1 6.957497 6.957497 7121 +theschem 1 1 6.957497 6.957497 7122 +coursesand 1 1 6.957497 6.957497 7123 +learninga 1 1 6.957497 6.957497 7124 +tutorcopi 1 1 6.957497 6.957497 7125 +pcassign 1 1 6.957497 6.957497 7126 +simulationassign 1 1 6.957497 6.957497 7127 +webassign 1 1 6.957497 6.957497 7128 +schemeassign 1 1 6.957497 6.957497 7129 +gamblingassign 1 1 6.957497 6.957497 7130 +graphicsassign 1 1 6.957497 6.957497 7131 +manipulationstudi 1 1 6.957497 6.957497 7132 +vocabulari 1 1 6.957497 6.957497 7133 +thickensassign 1 1 6.957497 6.957497 7134 +huntassign 1 1 6.957497 6.957497 7135 +algebraassign 1 1 6.957497 6.957497 7136 +matricesstudi 1 1 6.957497 6.957497 7137 +unparsingassign 1 1 6.957497 6.957497 7138 +translationstudi 1 1 6.957497 6.957497 7139 +descriptionsprogram 1 1 6.957497 6.957497 7140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html new file mode 100644 index 00000000..5b3f4089 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs375.html @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +program 3 374 0.693147 2.079441 7 +cours 3 273 1.098612 3.295836 15 +student 2 343 1.098612 2.197224 19 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +file 2 132 1.945910 3.891820 70 +construct 1 139 1.945910 1.945910 82 +compil 2 122 2.079442 4.158884 96 +studi 2 120 2.079442 4.158884 91 +take 1 97 2.302585 2.302585 134 +exam 1 86 2.484907 2.484907 169 +server 1 76 2.564949 2.564949 204 +write 1 72 2.639057 2.639057 222 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +summer 2 56 2.890372 5.780744 311 +cover 1 55 2.944439 2.944439 329 +processor 1 54 2.944439 2.944439 335 +week 1 52 2.995732 2.995732 343 +directori 1 45 3.135494 3.135494 396 +live 1 40 3.258097 3.258097 451 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +programminglanguag 1 21 3.912023 3.912023 782 +chip 1 21 3.912023 3.912023 770 +five 1 19 4.007333 4.007333 841 +incorpor 1 13 4.382027 4.382027 1163 +pascal 1 12 4.465908 4.465908 1213 +workload 1 12 4.465908 4.465908 1210 +submiss 1 11 4.553877 4.553877 1298 +novak 1 9 4.753590 4.753590 1521 +heavi 1 7 5.010635 5.010635 1841 +assignmentsprogram 1 6 5.164786 5.164786 2019 +gradingmidterm 1 3 5.857933 5.857933 3230 +guidefin 1 3 5.857933 5.857933 3231 +compilersc 2 2 6.263398 12.526796 4237 +powerpc 2 2 6.263398 12.526796 4238 +syllabusprogram 1 2 6.263398 6.263398 4239 +codei 1 1 6.957497 6.957497 7141 +dedicatetheir 1 1 6.957497 6.957497 7142 +guidegordon 1 1 6.957497 6.957497 7143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html new file mode 100644 index 00000000..4fb14cb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs381k.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +studi 3 120 2.079442 6.238326 91 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +requir 1 81 2.484907 2.484907 167 +environ 1 84 2.484907 2.484907 177 +exam 1 86 2.484907 2.484907 169 +intellig 3 72 2.639057 7.917171 225 +logic 1 71 2.639057 2.639057 230 +artifici 2 63 2.772589 5.545178 280 +major 1 56 2.890372 2.890372 315 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +survei 1 35 3.401197 3.401197 513 +represent 1 35 3.401197 3.401197 512 +bibliographi 1 34 3.401197 3.401197 518 +human 1 32 3.465736 3.465736 546 +defin 1 22 3.850148 3.850148 746 +behavior 1 18 4.060443 4.060443 881 +appropri 1 18 4.060443 4.060443 883 +attempt 1 17 4.110874 4.110874 917 +action 1 15 4.248495 4.248495 1038 +stori 2 14 4.317488 8.634976 1087 +achiev 1 14 4.317488 4.317488 1088 +calculu 1 12 4.465908 4.465908 1203 +thecomput 1 10 4.653960 4.653960 1408 +novak 1 9 4.753590 4.753590 1521 +brain 1 8 4.875197 4.875197 1638 +thegoal 1 6 5.164786 5.164786 2033 +assignmentsprogram 1 6 5.164786 5.164786 2019 +intelligencec 1 4 5.568345 5.568345 2673 +coverag 1 4 5.568345 5.568345 2656 +guidefin 1 3 5.857933 5.857933 3231 +actor 1 2 6.263398 6.263398 4240 +syllabusprogram 1 2 6.263398 6.263398 4239 +problemssolut 1 2 6.263398 6.263398 4241 +intelligenceartifici 1 1 6.957497 6.957497 7144 +todupl 1 1 6.957497 6.957497 7145 +connectspercept 1 1 6.957497 6.957497 7146 +andknowledg 1 1 6.957497 6.957497 7147 +withbrief 1 1 6.957497 6.957497 7148 +descriptionsmidterm 1 1 6.957497 6.957497 7149 +guidepred 1 1 6.957497 6.957497 7150 +problemsnot 1 1 6.957497 6.957497 7151 +braingordon 1 1 6.957497 6.957497 7152 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html new file mode 100644 index 00000000..db65021b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^novak^cs395t.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +program 5 374 0.693147 3.465735 7 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +read 1 154 1.791759 1.791759 47 +lectur 2 135 1.945910 3.891820 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +compil 1 122 2.079442 2.079442 96 +specif 1 106 2.197225 2.197225 106 +present 1 91 2.397895 2.397895 145 +graphic 1 90 2.397895 2.397895 147 +level 1 87 2.484907 2.484907 180 +homework 1 79 2.564949 2.564949 193 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +materi 1 75 2.639057 2.639057 221 +automat 2 61 2.833213 5.666426 306 +semest 2 58 2.890372 5.780744 312 +sever 1 56 2.890372 2.890372 322 +cover 1 55 2.944439 2.944439 329 +done 1 47 3.091042 3.091042 381 +execut 1 45 3.135494 3.135494 404 +third 1 43 3.178054 3.178054 412 +long 1 43 3.178054 3.178054 413 +expect 1 37 3.332205 3.332205 484 +given 1 32 3.465736 3.465736 538 +kind 1 32 3.465736 3.465736 541 +consist 1 26 3.688879 3.688879 651 +higher 1 24 3.761200 3.761200 690 +literatur 1 11 4.553877 4.553877 1300 +latter 1 9 4.753590 4.753590 1522 +novak 1 9 4.753590 4.753590 1521 +programmingc 1 3 5.857933 5.857933 3232 +ordinari 1 3 5.857933 5.857933 3233 +programmingautomat 1 1 6.957497 6.957497 7153 +programsfrom 1 1 6.957497 6.957497 7154 +illustrateth 1 1 6.957497 6.957497 7155 +requirelearn 1 1 6.957497 6.957497 7156 +partof 1 1 6.957497 6.957497 7157 +syllabusbibliographyassign 1 1 6.957497 6.957497 7158 +handpattern 1 1 6.957497 6.957497 7159 +matchingobject 1 1 6.957497 6.957497 7160 +programmingintroduct 1 1 6.957497 6.957497 7161 +glispview 1 1 6.957497 6.957497 7162 +programminggordon 1 1 6.957497 6.957497 7163 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ new file mode 100644 index 00000000..d51aada4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs195T^ @@ -0,0 +1,676 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 12 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +system 13 443 0.693147 9.010911 6 +program 13 374 0.693147 9.010911 7 +inform 3 412 0.693147 2.079441 8 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +us 7 329 1.098612 7.690284 16 +cours 5 273 1.098612 5.493060 15 +current 3 284 1.098612 3.295836 21 +engin 3 297 1.098612 3.295836 20 +offic 2 299 1.098612 2.197224 13 +time 2 293 1.098612 2.197224 17 +student 2 343 1.098612 2.197224 19 +last 1 314 1.098612 1.098612 14 +languag 13 227 1.386294 18.021822 26 +softwar 7 220 1.386294 9.704058 30 +design 6 213 1.386294 8.317764 25 +graduat 4 215 1.386294 5.545176 31 +gener 3 220 1.386294 4.158882 27 +also 3 259 1.386294 4.158882 28 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +includ 2 208 1.609438 3.218876 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +data 11 170 1.791759 19.709349 49 +parallel 11 169 1.791759 19.709349 60 +algorithm 10 162 1.791759 17.917590 57 +network 9 168 1.791759 16.125831 61 +implement 6 152 1.791759 10.750554 52 +applic 5 170 1.791759 8.958795 56 +avail 4 169 1.791759 7.167036 48 +distribut 3 162 1.791759 5.375277 51 +develop 3 174 1.791759 5.375277 53 +address 2 170 1.791759 3.583518 62 +base 2 165 1.791759 3.583518 50 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +recent 1 167 1.791759 1.791759 58 +problem 7 147 1.945910 13.621370 75 +file 5 132 1.945910 9.729550 70 +model 5 145 1.945910 9.729550 69 +process 4 142 1.945910 7.783640 72 +area 4 144 1.945910 7.783640 80 +perform 3 143 1.945910 5.837730 74 +lectur 2 135 1.945910 3.891820 73 +support 2 132 1.945910 3.891820 83 +architectur 2 139 1.945910 3.891820 77 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +compil 7 122 2.079442 14.556094 96 +machin 5 129 2.079442 10.397210 95 +provid 5 121 2.079442 10.397210 94 +high 4 130 2.079442 8.317768 101 +analysi 4 124 2.079442 8.317768 98 +introduct 3 126 2.079442 6.238326 87 +databas 3 122 2.079442 6.238326 86 +technolog 2 131 2.079442 4.158884 102 +number 2 130 2.079442 4.158884 97 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +well 4 109 2.197225 8.788900 121 +mathemat 4 108 2.197225 8.788900 123 +make 3 111 2.197225 6.591675 120 +structur 2 106 2.197225 4.394450 105 +place 2 106 2.197225 4.394450 124 +code 2 108 2.197225 4.394450 116 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +site 1 106 2.197225 2.197225 119 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +user 5 104 2.302585 11.512925 137 +access 2 102 2.302585 4.605170 136 +need 2 98 2.302585 4.605170 135 +advanc 2 99 2.302585 4.605170 130 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +present 6 91 2.397895 14.387370 145 +call 5 91 2.397895 11.989475 153 +search 3 95 2.397895 7.193685 155 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +level 10 87 2.484907 24.849070 180 +librari 6 87 2.484907 14.909442 181 +learn 6 86 2.484907 14.909442 170 +wide 4 84 2.484907 9.939628 185 +environ 3 84 2.484907 7.454721 177 +build 3 85 2.484907 7.454721 184 +requir 2 81 2.484907 4.969814 167 +solut 2 82 2.484907 4.969814 162 +chang 1 82 2.484907 2.484907 163 +resourc 1 81 2.484907 2.484907 172 +contain 1 81 2.484907 2.484907 174 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +thing 1 84 2.484907 2.484907 189 +novemb 1 81 2.484907 2.484907 179 +interfac 5 79 2.564949 12.824745 209 +state 4 76 2.564949 10.259796 207 +sourc 4 77 2.564949 10.259796 201 +issu 3 78 2.564949 7.694847 211 +method 3 80 2.564949 7.694847 213 +optim 3 79 2.564949 7.694847 197 +server 2 76 2.564949 5.129898 204 +exampl 2 77 2.564949 5.129898 195 +mondai 1 77 2.564949 2.564949 206 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +orient 1 80 2.564949 2.564949 205 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +logic 7 71 2.639057 18.473399 230 +effici 5 73 2.639057 13.195285 233 +free 2 73 2.639057 5.278114 224 +appli 2 71 2.639057 5.278114 226 +addit 1 74 2.639057 2.639057 228 +solv 1 73 2.639057 2.639057 234 +write 1 72 2.639057 2.639057 222 +differ 3 66 2.708050 8.124150 253 +goal 2 66 2.708050 5.416100 250 +integr 2 67 2.708050 5.416100 245 +view 2 70 2.708050 5.416100 254 +main 2 67 2.708050 5.416100 256 +thursdai 1 70 2.708050 2.708050 241 +receiv 1 66 2.708050 2.708050 244 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +abstract 4 62 2.772589 11.090356 276 +result 4 65 2.772589 11.090356 281 +evalu 3 64 2.772589 8.317767 266 +experi 2 64 2.772589 5.545178 283 +taylor 1 63 2.772589 2.772589 287 +organ 1 65 2.772589 2.772589 265 +foundat 1 62 2.772589 2.772589 286 +plan 1 65 2.772589 2.772589 272 +virtual 1 62 2.772589 2.772589 285 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +artifici 1 63 2.772589 2.772589 280 +previou 1 62 2.772589 2.772589 290 +written 1 63 2.772589 2.772589 278 +simpl 2 60 2.833213 5.666426 298 +automat 2 61 2.833213 5.666426 306 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +special 6 56 2.890372 17.342232 320 +explor 3 58 2.890372 8.671116 324 +sever 2 56 2.890372 5.780744 322 +overview 2 56 2.890372 5.780744 323 +space 2 57 2.890372 5.780744 310 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +reason 1 57 2.890372 2.890372 318 +direct 1 57 2.890372 2.890372 316 +major 1 56 2.890372 2.890372 315 +allow 5 53 2.944439 14.722195 333 +talk 4 53 2.944439 11.777756 336 +undergradu 2 54 2.944439 5.888878 338 +extens 2 53 2.944439 5.888878 340 +local 1 55 2.944439 2.944439 334 +instruct 1 53 2.944439 2.944439 332 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +run 1 51 2.995732 2.995732 347 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +approach 6 48 3.044522 18.267132 366 +principl 2 48 3.044522 6.089044 357 +give 1 50 3.044522 3.044522 359 +without 1 50 3.044522 3.044522 370 +standard 1 48 3.044522 3.044522 365 +adapt 6 46 3.091042 18.546252 387 +featur 4 46 3.091042 12.364168 386 +effect 2 46 3.091042 6.182084 385 +move 1 47 3.091042 3.091042 382 +describ 6 45 3.135494 18.812964 400 +natur 5 44 3.135494 15.677470 406 +execut 3 45 3.135494 9.406482 404 +even 2 45 3.135494 6.270988 393 +discuss 2 45 3.135494 6.270988 399 +better 1 45 3.135494 3.135494 401 +offer 2 43 3.178054 6.356108 414 +fast 2 42 3.218876 6.437752 429 +autom 1 41 3.218876 3.218876 434 +past 1 42 3.218876 3.218876 428 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +continu 2 39 3.258097 6.516194 448 +form 2 39 3.258097 6.516194 443 +must 1 40 3.258097 3.258097 442 +programm 1 39 3.258097 3.258097 445 +map 1 39 3.258097 3.258097 452 +theoret 1 39 3.258097 3.258097 446 +open 4 38 3.295837 13.183348 469 +close 2 38 3.295837 6.591674 465 +seminar 1 38 3.295837 3.295837 470 +credit 1 38 3.295837 3.295837 460 +paul 1 38 3.295837 3.295837 471 +formal 3 37 3.332205 9.996615 478 +hand 2 37 3.332205 6.664410 475 +workstat 2 37 3.332205 6.664410 479 +feel 1 37 3.332205 3.332205 483 +cost 1 37 3.332205 3.332205 480 +mean 1 37 3.332205 3.332205 477 +robot 5 36 3.367296 16.836480 497 +procedur 3 36 3.367296 10.101888 488 +game 1 36 3.367296 3.367296 498 +represent 5 35 3.401197 17.005985 512 +singl 2 34 3.401197 6.802394 510 +least 1 35 3.401197 3.401197 516 +random 1 34 3.401197 3.401197 511 +toler 8 33 3.433987 27.471896 533 +queri 5 33 3.433987 17.169935 524 +within 2 33 3.433987 6.867974 525 +obtain 1 33 3.433987 3.433987 534 +fault 9 32 3.465736 31.191624 547 +idea 4 32 3.465736 13.862944 545 +human 1 32 3.465736 3.465736 546 +given 1 32 3.465736 3.465736 538 +ad 1 32 3.465736 3.465736 544 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +neural 4 30 3.555348 14.221392 578 +robert 3 30 3.555348 10.666044 567 +produc 3 30 3.555348 10.666044 572 +power 2 30 3.555348 7.110696 573 +abl 2 30 3.555348 7.110696 566 +graph 1 30 3.555348 3.555348 576 +domain 1 30 3.555348 3.555348 564 +rang 1 30 3.555348 3.555348 565 +common 1 30 3.555348 3.555348 574 +compon 1 30 3.555348 3.555348 570 +focu 1 30 3.555348 3.555348 571 +semant 3 29 3.583519 10.750557 587 +depend 2 29 3.583519 7.167038 583 +built 2 29 3.583519 7.167038 592 +limit 1 29 3.583519 3.583519 585 +turn 1 29 3.583519 3.583519 586 +platform 1 29 3.583519 3.583519 591 +consid 1 29 3.583519 3.583519 590 +cluster 3 28 3.610918 10.832754 612 +scale 2 28 3.610918 7.221836 613 +propos 1 28 3.610918 3.610918 602 +usual 1 28 3.610918 3.610918 608 +framework 1 28 3.610918 3.610918 606 +enabl 3 26 3.688879 11.066637 655 +consist 2 26 3.688879 7.377758 651 +detect 1 26 3.688879 3.688879 646 +request 1 26 3.688879 3.688879 635 +challeng 1 26 3.688879 3.688879 653 +effort 1 26 3.688879 3.688879 652 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +task 4 25 3.737670 14.950680 678 +reliabl 2 25 3.737670 7.475340 674 +concern 2 25 3.737670 7.475340 666 +client 1 25 3.737670 3.737670 679 +known 2 24 3.761200 7.522400 702 +higher 2 24 3.761200 7.522400 690 +mike 1 24 3.761200 3.761200 703 +greg 1 24 3.761200 3.761200 695 +lab 1 24 3.761200 3.761200 698 +decis 3 23 3.806662 11.419986 728 +highli 2 23 3.806662 7.613324 725 +honor 1 23 3.806662 3.806662 729 +cooper 4 22 3.850148 15.400592 757 +hierarchi 3 22 3.850148 11.550444 744 +sequenti 3 22 3.850148 11.550444 745 +serv 2 22 3.850148 7.700296 758 +emphasi 1 22 3.850148 3.850148 755 +instead 1 22 3.850148 3.850148 756 +inth 1 22 3.850148 3.850148 741 +deal 1 22 3.850148 3.850148 736 +reduc 1 22 3.850148 3.850148 759 +among 2 21 3.912023 7.824046 781 +path 2 21 3.912023 7.824046 778 +busi 1 21 3.912023 3.912023 784 +fact 1 21 3.912023 3.912023 780 +output 1 21 3.912023 3.912023 788 +facil 2 20 3.951244 7.902488 814 +portabl 2 20 3.951244 7.902488 819 +basi 1 20 3.951244 3.951244 828 +increas 1 20 3.951244 3.951244 829 +runtim 2 19 4.007333 8.014666 858 +separ 1 19 4.007333 4.007333 844 +failur 4 18 4.060443 16.241772 898 +attend 1 18 4.060443 4.060443 893 +encourag 1 18 4.060443 4.060443 880 +seem 1 18 4.060443 4.060443 899 +appropri 1 18 4.060443 4.060443 883 +stop 1 17 4.110874 4.110874 942 +ultim 1 17 4.110874 4.110874 943 +previous 1 17 4.110874 4.110874 923 +modif 1 17 4.110874 4.110874 913 +cognit 3 16 4.174387 12.523161 986 +advantag 3 16 4.174387 12.523161 987 +spatial 2 16 4.174387 8.348774 988 +easi 2 16 4.174387 8.348774 969 +spars 1 16 4.174387 4.174387 989 +vector 1 16 4.174387 4.174387 961 +across 1 16 4.174387 4.174387 974 +action 2 15 4.248495 8.496990 1038 +novel 1 15 4.248495 4.248495 1039 +english 1 15 4.248495 4.248495 1033 +side 1 15 4.248495 4.248495 1022 +massiv 1 15 4.248495 4.248495 1026 +reflect 1 15 4.248495 4.248495 1034 +topolog 1 14 4.317488 4.317488 1089 +heterogen 1 14 4.317488 4.317488 1090 +convent 1 14 4.317488 4.317488 1072 +translat 2 13 4.382027 8.764054 1164 +discret 2 13 4.382027 8.764054 1165 +prolog 2 13 4.382027 8.764054 1155 +whose 1 13 4.382027 4.382027 1166 +central 1 13 4.382027 4.382027 1160 +signific 1 13 4.382027 4.382027 1125 +difficulti 1 13 4.382027 4.382027 1132 +infrastructur 1 12 4.465908 4.465908 1234 +grow 1 12 4.465908 4.465908 1209 +onth 1 12 4.465908 4.465908 1218 +amount 1 12 4.465908 4.465908 1208 +evolv 1 12 4.465908 4.465908 1223 +promot 1 12 4.465908 4.465908 1235 +buffer 1 12 4.465908 4.465908 1211 +deduct 1 12 4.465908 4.465908 1236 +minor 1 12 4.465908 4.465908 1237 +evolut 4 11 4.553877 18.215508 1314 +broad 2 11 4.553877 9.107754 1302 +vladimir 1 11 4.553877 4.553877 1324 +transpar 1 11 4.553877 4.553877 1325 +multithread 1 11 4.553877 4.553877 1315 +node 1 11 4.553877 4.553877 1326 +induct 1 11 4.553877 4.553877 1304 +fix 1 11 4.553877 4.553877 1327 +noth 1 11 4.553877 4.553877 1328 +genet 2 10 4.653960 9.307920 1409 +underli 2 10 4.653960 9.307920 1410 +guarante 1 10 4.653960 4.653960 1391 +reli 1 10 4.653960 4.653960 1411 +facilit 1 10 4.653960 4.653960 1412 +length 1 10 4.653960 4.653960 1400 +sentenc 1 10 4.653960 4.653960 1413 +custom 1 10 4.653960 4.653960 1414 +certain 1 10 4.653960 4.653960 1393 +tradit 1 10 4.653960 4.653960 1404 +modular 1 10 4.653960 4.653960 1392 +notat 3 9 4.753590 14.260770 1489 +risto 1 9 4.753590 4.753590 1523 +calvin 1 9 4.753590 4.753590 1518 +sister 1 9 4.753590 4.753590 1524 +explicit 1 9 4.753590 4.753590 1525 +declar 1 9 4.753590 4.753590 1526 +compos 1 9 4.753590 4.753590 1527 +meta 1 9 4.753590 4.753590 1505 +novak 1 9 4.753590 4.753590 1521 +assumpt 1 9 4.753590 4.753590 1514 +significantli 1 9 4.753590 4.753590 1508 +reus 4 8 4.875197 19.500788 1661 +simpli 2 8 4.875197 9.750394 1626 +fail 1 8 4.875197 4.875197 1655 +paradigm 1 8 4.875197 4.875197 1662 +isol 1 8 4.875197 4.875197 1663 +claim 1 8 4.875197 4.875197 1664 +realist 1 8 4.875197 4.875197 1665 +analys 1 8 4.875197 4.875197 1666 +beyond 2 7 5.010635 10.021270 1834 +trend 2 7 5.010635 10.021270 1842 +dedic 1 7 5.010635 5.010635 1843 +metric 1 7 5.010635 5.010635 1831 +usabl 1 7 5.010635 5.010635 1810 +converg 1 7 5.010635 5.010635 1844 +uniform 1 7 5.010635 5.010635 1845 +determinist 2 6 5.164786 10.329572 2034 +difficult 2 6 5.164786 10.329572 2035 +gordon 1 6 5.164786 5.164786 2032 +academia 1 6 5.164786 5.164786 2036 +promis 1 6 5.164786 5.164786 2037 +pace 1 6 5.164786 5.164786 2011 +emerg 1 6 5.164786 5.164786 2038 +causal 1 6 5.164786 5.164786 2024 +prefetch 1 6 5.164786 5.164786 2039 +infer 1 6 5.164786 5.164786 2040 +conflict 1 6 5.164786 5.164786 2041 +constrain 1 6 5.164786 5.164786 2042 +variant 1 6 5.164786 5.164786 2043 +affect 1 6 5.164786 5.164786 2044 +corpu 2 5 5.347108 10.694216 2282 +revolut 1 5 5.347108 5.347108 2315 +ofdistribut 1 5 5.347108 5.347108 2316 +scope 1 5 5.347108 5.347108 2296 +despit 1 5 5.347108 5.347108 2317 +unknown 1 5 5.347108 5.347108 2318 +stabl 1 5 5.347108 5.347108 2309 +distinct 1 5 5.347108 5.347108 2319 +corba 1 5 5.347108 5.347108 2320 +pars 1 5 5.347108 5.347108 2321 +fairli 1 5 5.347108 5.347108 2322 +ofinterest 1 5 5.347108 5.347108 2323 +blumoferdb 1 5 5.347108 5.347108 2324 +reinforc 2 4 5.568345 11.136690 2674 +resolv 2 4 5.568345 11.136690 2675 +asymptot 2 4 5.568345 11.136690 2676 +vijaya 1 4 5.568345 5.568345 2677 +lorenzo 1 4 5.568345 5.568345 2588 +expens 1 4 5.568345 5.568345 2678 +ofworkst 1 4 5.568345 5.568345 2679 +illus 1 4 5.568345 5.568345 2603 +algorithmsand 1 4 5.568345 5.568345 2680 +havedevelop 1 4 5.568345 5.568345 2681 +clearli 1 4 5.568345 5.568345 2590 +surprisingli 1 4 5.568345 5.568345 2609 +floyd 1 4 5.568345 5.568345 2682 +queu 1 4 5.568345 5.568345 2648 +conceptu 3 3 5.857933 17.573799 3214 +lightweight 2 3 5.857933 11.715866 3234 +popul 2 3 5.857933 11.715866 3235 +embodi 2 3 5.857933 11.715866 3236 +blumof 1 3 5.857933 5.857933 3237 +citizen 1 3 5.857933 5.857933 3238 +dramat 1 3 5.857933 5.857933 3239 +aggress 1 3 5.857933 5.857933 3240 +similarli 1 3 5.857933 5.857933 3241 +neighborhood 1 3 5.857933 5.857933 3242 +intra 1 3 5.857933 5.857933 3243 +enumer 1 3 5.857933 5.857933 3244 +compliant 1 3 5.857933 5.857933 3245 +andsemant 1 3 5.857933 5.857933 3246 +parser 1 3 5.857933 5.857933 3141 +surpass 1 3 5.857933 5.857933 3247 +agener 1 3 5.857933 5.857933 3213 +theworld 1 3 5.857933 5.857933 3158 +rivest 1 3 5.857933 5.857933 3248 +parallelalgorithm 1 3 5.857933 5.857933 3249 +rscheme 1 3 5.857933 5.857933 3250 +cilk 8 2 6.263398 50.107184 4242 +alamo 4 2 6.263398 25.053592 4243 +chill 3 2 6.263398 18.790194 4244 +theform 2 2 6.263398 12.526796 4245 +sciencefal 1 2 6.263398 6.263398 4246 +andresearch 1 2 6.263398 6.263398 4247 +government 1 2 6.263398 6.263398 4248 +andcollect 1 2 6.263398 6.263398 4249 +todramat 1 2 6.263398 6.263398 4250 +thedesign 1 2 6.263398 6.263398 4251 +har 1 2 6.263398 6.263398 4252 +undergo 1 2 6.263398 6.263398 4253 +applicationsto 1 2 6.263398 6.263398 4254 +offailur 1 2 6.263398 6.263398 4255 +idl 1 2 6.263398 6.263398 4256 +indistribut 1 2 6.263398 6.263398 4257 +andmap 1 2 6.263398 6.263398 4258 +equilibrium 1 2 6.263398 6.263398 4259 +trajectori 1 2 6.263398 6.263398 4260 +sufficientto 1 2 6.263398 6.263398 4261 +logicprogram 1 2 6.263398 6.263398 4262 +thesetechniqu 1 2 6.263398 6.263398 4263 +ofneur 1 2 6.263398 6.263398 4264 +neuro 1 2 6.263398 6.263398 4265 +resourcemanag 1 2 6.263398 6.263398 4266 +anobject 1 2 6.263398 6.263398 4267 +anticip 1 2 6.263398 6.263398 4268 +corpora 1 2 6.263398 6.263398 4269 +foidl 1 2 6.263398 6.263398 4270 +andanalysi 1 2 6.263398 6.263398 4271 +straightforward 1 2 6.263398 6.263398 4272 +lengthi 1 2 6.263398 6.263398 4273 +andform 1 2 6.263398 6.263398 4274 +succinctli 1 2 6.263398 6.263398 4275 +concret 1 2 6.263398 6.263398 4276 +analysisof 1 2 6.263398 6.263398 4277 +tarjan 1 2 6.263398 6.263398 4278 +maspar 1 2 6.263398 6.263398 4279 +workon 1 2 6.263398 6.263398 4280 +symbiot 3 1 6.957497 20.872491 7164 +pram 3 1 6.957497 20.872491 7165 +programmingoctob 2 1 6.957497 13.914994 7166 +wilsonextens 2 1 6.957497 13.914994 7167 +ramachandranth 2 1 6.957497 13.914994 7168 +theapplic 2 1 6.957497 13.914994 7169 +sane 2 1 6.957497 13.914994 7170 +datasourc 2 1 6.957497 13.914994 7171 +theabstract 2 1 6.957497 13.914994 7172 +sciencecst 1 1 6.957497 6.957497 7173 +apass 1 1 6.957497 6.957497 7174 +beregist 1 1 6.957497 6.957497 7175 +schedulespeakertitleseptemb 1 1 6.957497 6.957497 7176 +mirankeralamo 1 1 6.957497 6.957497 7177 +warehouseseptemb 1 1 6.957497 6.957497 7178 +kuipersth 1 1 6.957497 6.957497 7179 +humanand 1 1 6.957497 6.957497 7180 +mapsseptemb 1 1 6.957497 6.957497 7181 +blumofecilk 1 1 6.957497 6.957497 7182 +reliableparallel 1 1 6.957497 6.957497 7183 +workstationsseptemb 1 1 6.957497 6.957497 7184 +miikkulainenlearn 1 1 6.957497 6.957497 7185 +throughsymbiot 1 1 6.957497 6.957497 7186 +networksoctob 1 1 6.957497 6.957497 7187 +lifschitzmathemat 1 1 6.957497 6.957497 7188 +reflectionoctob 1 1 6.957497 6.957497 7189 +mooneylearn 1 1 6.957497 6.957497 7190 +usinginduct 1 1 6.957497 6.957497 7191 +dahlindistribut 1 1 6.957497 6.957497 7192 +internetsnovemb 1 1 6.957497 6.957497 7193 +novaksoftwar 1 1 6.957497 6.957497 7194 +genericprocedur 1 1 6.957497 6.957497 7195 +viewsnovemb 1 1 6.957497 6.957497 7196 +parallelalgorithmsnovemb 1 1 6.957497 6.957497 7197 +alvisilighweight 1 1 6.957497 6.957497 7198 +tolerancenovemb 1 1 6.957497 6.957497 7199 +linadapt 1 1 6.957497 6.957497 7200 +optimizationdecemb 1 1 6.957497 6.957497 7201 +plaxtonanalysi 1 1 6.957497 6.957497 7202 +algorithmslighweight 1 1 6.957497 6.957497 7203 +tolerancelorenzo 1 1 6.957497 6.957497 7204 +alvisidistribut 1 1 6.957497 6.957497 7205 +confin 1 1 6.957497 6.957497 7206 +revolution 1 1 6.957497 6.957497 7207 +beyondth 1 1 6.957497 6.957497 7208 +toleranttechniqu 1 1 6.957497 6.957497 7209 +willceas 1 1 6.957497 6.957497 7210 +exot 1 1 6.957497 6.957497 7211 +distributedinform 1 1 6.957497 6.957497 7212 +acompetit 1 1 6.957497 6.957497 7213 +criticalinform 1 1 6.957497 6.957497 7214 +engineerfault 1 1 6.957497 6.957497 7215 +negligibleimpact 1 1 6.957497 6.957497 7216 +communicatethrough 1 1 6.957497 6.957497 7217 +onnetwork 1 1 6.957497 6.957497 7218 +workstationsrobert 1 1 6.957497 6.957497 7219 +blumofethi 1 1 6.957497 6.957497 7220 +pronouncedsilk 1 1 6.957497 6.957497 7221 +andcilk 1 1 6.957497 6.957497 7222 +functionalsubset 1 1 6.957497 6.957497 7223 +providesadapt 1 1 6.957497 6.957497 7224 +tranpar 1 1 6.957497 6.957497 7225 +touser 1 1 6.957497 6.957497 7226 +shrinkdynam 1 1 6.957497 6.957497 7227 +cilkprogram 1 1 6.957497 6.957497 7228 +workstationscrash 1 1 6.957497 6.957497 7229 +andrecov 1 1 6.957497 6.957497 7230 +livedemonstr 1 1 6.957497 6.957497 7231 +internetsmik 1 1 6.957497 6.957497 7232 +dahlinthi 1 1 6.957497 6.957497 7233 +applicationsmotiv 1 1 6.957497 6.957497 7234 +inclust 1 1 6.957497 6.957497 7235 +servicei 1 1 6.957497 6.957497 7236 +nodesto 1 1 6.957497 6.957497 7237 +centralserv 1 1 6.957497 6.957497 7238 +goodperform 1 1 6.957497 6.957497 7239 +networkperform 1 1 6.957497 6.957497 7240 +projectwil 1 1 6.957497 6.957497 7241 +mapsbenjamin 1 1 6.957497 6.957497 7242 +kuipershuman 1 1 6.957497 6.957497 7243 +forlarg 1 1 6.957497 6.957497 7244 +ontolog 1 1 6.957497 6.957497 7245 +varietyof 1 1 6.957497 6.957497 7246 +cast 1 1 6.957497 6.957497 7247 +diverserepresent 1 1 6.957497 6.957497 7248 +spatialsemant 1 1 6.957497 6.957497 7249 +andassumpt 1 1 6.957497 6.957497 7250 +thecontrol 1 1 6.957497 6.957497 7251 +beabstract 1 1 6.957497 6.957497 7252 +givinga 1 1 6.957497 6.957497 7253 +causalgraph 1 1 6.957497 6.957497 7254 +topologicalnetwork 1 1 6.957497 6.957497 7255 +occupancygrid 1 1 6.957497 6.957497 7256 +theframework 1 1 6.957497 6.957497 7257 +ofglob 1 1 6.957497 6.957497 7258 +programmingvladimir 1 1 6.957497 6.957497 7259 +lifschitzlog 1 1 6.957497 6.957497 7260 +functionalprogram 1 1 6.957497 6.957497 7261 +notne 1 1 6.957497 6.957497 7262 +itcan 1 1 6.957497 6.957497 7263 +executedus 1 1 6.957497 6.957497 7264 +withdefin 1 1 6.957497 6.957497 7265 +thereason 1 1 6.957497 6.957497 7266 +thesound 1 1 6.957497 6.957497 7267 +optimizationcalvin 1 1 6.957497 6.957497 7268 +linthi 1 1 6.957497 6.957497 7269 +andtheir 1 1 6.957497 6.957497 7270 +differenthardwar 1 1 6.957497 6.957497 7271 +efficientand 1 1 6.957497 6.957497 7272 +suchlibrari 1 1 6.957497 6.957497 7273 +weexplain 1 1 6.957497 6.957497 7274 +networksristo 1 1 6.957497 6.957497 7275 +miikkulainena 1 1 6.957497 6.957497 7276 +neuronsthrough 1 1 6.957497 6.957497 7277 +anddiscourag 1 1 6.957497 6.957497 7278 +suboptim 1 1 6.957497 6.957497 7279 +toextract 1 1 6.957497 6.957497 7280 +sequentialdecis 1 1 6.957497 6.957497 7281 +warehousedan 1 1 6.957497 6.957497 7282 +mirankerth 1 1 6.957497 6.957497 7283 +andint 1 1 6.957497 6.957497 7284 +theuser 1 1 6.957497 6.957497 7285 +byqueri 1 1 6.957497 6.957497 7286 +interfacethat 1 1 6.957497 6.957497 7287 +ofabstract 1 1 6.957497 6.957497 7288 +clever 1 1 6.957497 6.957497 7289 +anddata 1 1 6.957497 6.957497 7290 +activedatabas 1 1 6.957497 6.957497 7291 +constructedus 1 1 6.957497 6.957497 7292 +databasefacil 1 1 6.957497 6.957497 7293 +thealamo 1 1 6.957497 6.957497 7294 +dataintegr 1 1 6.957497 6.957497 7295 +elementsof 1 1 6.957497 6.957497 7296 +furthercomposit 1 1 6.957497 6.957497 7297 +answerhigh 1 1 6.957497 6.957497 7298 +logicprogrammingraymond 1 1 6.957497 6.957497 7299 +mooneyinduct 1 1 6.957497 6.957497 7300 +learningprolog 1 1 6.957497 6.957497 7301 +offirst 1 1 6.957497 6.957497 7302 +learningmethod 1 1 6.957497 6.957497 7303 +areappli 1 1 6.957497 6.957497 7304 +believethi 1 1 6.957497 6.957497 7305 +richer 1 1 6.957497 6.957497 7306 +parsersfrom 1 1 6.957497 6.957497 7307 +superior 1 1 6.957497 6.957497 7308 +onsever 1 1 6.957497 6.957497 7309 +networkmethod 1 1 6.957497 6.957497 7310 +ati 1 1 6.957497 6.957497 7311 +ofairlin 1 1 6.957497 6.957497 7312 +automaticallydevelop 1 1 6.957497 6.957497 7313 +englishdatabas 1 1 6.957497 6.957497 7314 +moreaccur 1 1 6.957497 6.957497 7315 +smallgeograph 1 1 6.957497 6.957497 7316 +tens 1 1 6.957497 6.957497 7317 +treemethod 1 1 6.957497 6.957497 7318 +throughviewsgordon 1 1 6.957497 6.957497 7319 +toachiev 1 1 6.957497 6.957497 7320 +thesoftwar 1 1 6.957497 6.957497 7321 +typesus 1 1 6.957497 6.957497 7322 +specifyview 1 1 6.957497 6.957497 7323 +adesir 1 1 6.957497 6.957497 7324 +algorithmsgreg 1 1 6.957497 6.957497 7325 +plaxtona 1 1 6.957497 6.957497 7326 +forspecif 1 1 6.957497 6.957497 7327 +notuncommon 1 1 6.957497 6.957497 7328 +havelittl 1 1 6.957497 6.957497 7329 +suchpap 1 1 6.957497 6.957497 7330 +gapsinher 1 1 6.957497 6.957497 7331 +inadequatefor 1 1 6.957497 6.957497 7332 +straightforwardalgorithm 1 1 6.957497 6.957497 7333 +theconceptu 1 1 6.957497 6.957497 7334 +trivialclass 1 1 6.957497 6.957497 7335 +blum 1 1 6.957497 6.957497 7336 +pratt 1 1 6.957497 6.957497 7337 +algorithmsvijaya 1 1 6.957497 6.957497 7338 +forcombinatori 1 1 6.957497 6.957497 7339 +recentyear 1 1 6.957497 6.957497 7340 +willdescrib 1 1 6.957497 6.957497 7341 +thesealgorithm 1 1 6.957497 6.957497 7342 +thendescrib 1 1 6.957497 6.957497 7343 +wepropos 1 1 6.957497 6.957497 7344 +parallelshar 1 1 6.957497 6.957497 7345 +reflectionpaul 1 1 6.957497 6.957497 7346 +addnew 1 1 6.957497 6.957497 7347 +structureaccordingli 1 1 6.957497 6.957497 7348 +adapat 1 1 6.957497 6.957497 7349 +extensiblelanguag 1 1 6.957497 6.957497 7350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ new file mode 100644 index 00000000..80c8054d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^rdb^cs372^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 4 374 0.693147 2.772588 7 +inform 1 412 0.693147 0.693147 8 +offic 4 299 1.098612 4.394448 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +email 2 220 1.386294 2.772588 29 +gener 1 220 1.386294 1.386294 27 +utexa 3 189 1.609438 4.828314 44 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +phone 2 175 1.791759 3.583518 45 +hour 2 165 1.791759 3.583518 46 +implement 1 152 1.791759 1.791759 52 +problem 12 147 1.945910 23.350920 75 +assign 4 135 1.945910 7.783640 66 +lectur 2 135 1.945910 3.891820 73 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +introduct 2 126 2.079442 4.158884 87 +topic 2 114 2.197225 4.394450 110 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +book 1 99 2.302585 2.302585 131 +octob 4 89 2.397895 9.591580 156 +solut 5 82 2.484907 12.424535 162 +exam 3 86 2.484907 7.454721 169 +novemb 3 81 2.484907 7.454721 179 +exampl 2 77 2.564949 5.129898 195 +decemb 2 80 2.564949 5.129898 215 +mondai 1 77 2.564949 2.564949 206 +free 1 73 2.639057 2.639057 224 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 2 70 2.708050 5.416100 241 +test 1 66 2.708050 2.708050 252 +septemb 4 65 2.772589 11.090356 274 +wednesdai 1 64 2.772589 2.772589 261 +taylor 1 63 2.772589 2.772589 287 +handout 1 64 2.772589 2.772589 263 +found 2 53 2.944439 5.888878 337 +cover 2 55 2.944439 5.888878 329 +date 1 51 2.995732 2.995732 344 +midterm 2 45 3.135494 6.270988 392 +long 1 43 3.178054 3.178054 413 +map 1 39 3.258097 3.258097 452 +feel 1 37 3.332205 3.332205 483 +least 1 35 3.401197 3.401197 516 +chapter 11 32 3.465736 38.123096 536 +robert 2 30 3.555348 7.110696 567 +except 6 28 3.610918 21.665508 607 +assum 1 19 4.007333 4.007333 845 +stop 1 17 4.110874 4.110874 942 +station 1 13 4.382027 4.382027 1157 +solari 1 12 4.465908 4.465908 1238 +systemsc 1 11 4.553877 4.553877 1293 +canb 1 7 5.010635 5.010635 1846 +encrypt 1 7 5.010635 5.010635 1835 +blumoferdb 1 5 5.347108 5.347108 2324 +systemsfal 1 4 5.568345 5.568345 2683 +blumof 1 3 5.857933 5.857933 3237 +gooti 2 2 6.263398 12.526796 4281 +subramanyam 1 2 6.263398 6.263398 4282 +crypt 2 1 6.957497 13.914994 7351 +multiplemap 1 1 6.957497 6.957497 7352 +themap 1 1 6.957497 6.957497 7353 +decrypt 1 1 6.957497 6.957497 7354 +solutionsread 1 1 6.957497 6.957497 7355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ new file mode 100644 index 00000000..36e6338f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs378-nn^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +utexa 3 189 1.609438 4.828314 44 +class 2 199 1.609438 3.218876 37 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +network 3 168 1.791759 5.375277 61 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +select 1 91 2.397895 2.397895 154 +grade 1 90 2.397895 2.397895 142 +homework 1 79 2.564949 2.564949 193 +copi 1 63 2.772589 2.772589 284 +detail 1 57 2.890372 2.890372 321 +midterm 1 45 3.135494 3.135494 392 +slide 1 38 3.295837 3.295837 467 +neural 3 30 3.555348 10.666044 578 +fundament 1 25 3.737670 3.737670 661 +station 1 13 4.382027 4.382027 1157 +uniqu 1 12 4.465908 4.465908 1228 +risto 2 9 4.753590 9.507180 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +lauren 1 3 5.857933 5.857933 3251 +bednar 1 2 6.263398 6.263398 4283 +jbednar 1 2 6.263398 6.263398 4284 +ofneur 1 2 6.263398 6.263398 4264 +cliff 1 2 6.263398 6.263398 4285 +edusun 1 2 6.263398 6.263398 4286 +networksfal 1 1 6.957497 6.957497 7356 +fausett 1 1 6.957497 6.957497 7357 +englewood 1 1 6.957497 6.957497 7358 +prenticehal 1 1 6.957497 6.957497 7359 +schedulehomework 1 1 6.957497 6.957497 7360 +assignmentsexamsclass 1 1 6.957497 6.957497 7361 +resourcesa 1 1 6.957497 6.957497 7362 +versionof 1 1 6.957497 6.957497 7363 +syllabusristo 1 1 6.957497 6.957497 7364 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ new file mode 100644 index 00000000..fb6aec44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^risto^cs395t-cs^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +offic 2 299 1.098612 2.197224 13 +student 2 343 1.098612 2.197224 19 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +utexa 3 189 1.609438 4.828314 44 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +read 3 154 1.791759 5.375277 47 +hour 2 165 1.791759 3.583518 46 +hall 2 146 1.945910 3.891820 65 +note 2 142 1.945910 3.891820 67 +introduct 2 126 2.079442 4.158884 87 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +center 1 88 2.397895 2.397895 158 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +write 1 72 2.639057 2.639057 222 +dept 2 64 2.772589 5.545178 291 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +detail 1 57 2.890372 2.890372 321 +pointer 1 48 3.044522 3.044522 368 +discuss 3 45 3.135494 9.406482 399 +submit 1 39 3.258097 3.258097 440 +short 2 36 3.367296 6.734592 499 +approxim 1 35 3.401197 3.401197 509 +collabor 1 32 3.465736 3.465736 543 +particip 1 29 3.583519 3.583519 589 +toward 1 25 3.737670 3.737670 668 +attend 1 18 4.060443 4.060443 893 +regular 1 17 4.110874 4.110874 929 +cognit 6 16 4.174387 25.046322 986 +critic 1 16 4.174387 4.174387 982 +philosophi 1 13 4.382027 4.382027 1167 +signific 1 13 4.382027 4.382027 1125 +count 1 12 4.465908 4.465908 1239 +packet 1 10 4.653960 4.653960 1415 +risto 3 9 4.753590 14.260770 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +appt 2 5 5.347108 10.694216 2312 +ofinterest 1 5 5.347108 5.347108 2323 +nichola 1 3 5.857933 5.857933 3252 +interv 1 3 5.857933 5.857933 3253 +thepap 1 3 5.857933 5.857933 3254 +sciencefal 1 2 6.263398 6.263398 4246 +commentari 1 2 6.263398 6.263398 4287 +edusun 1 2 6.263398 6.263398 4286 +asher 1 1 6.957497 6.957497 7365 +waggen 1 1 6.957497 6.957497 7366 +nasher 1 1 6.957497 6.957497 7367 +berti 1 1 6.957497 6.957497 7368 +posner 1 1 6.957497 6.957497 7369 +mitpress 1 1 6.957497 6.957497 7370 +withanoth 1 1 6.957497 6.957497 7371 +alsorequir 1 1 6.957497 6.957497 7372 +descriptioncours 1 1 6.957497 6.957497 7373 +schedulediscuss 1 1 6.957497 6.957497 7374 +notesperson 1 1 6.957497 6.957497 7375 +adscollabor 1 1 6.957497 6.957497 7376 +paperclass 1 1 6.957497 6.957497 7377 +resourcesstud 1 1 6.957497 6.957497 7378 +questionnaireus 1 1 6.957497 6.957497 7379 +sciencefaculti 1 1 6.957497 6.957497 7380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html new file mode 100644 index 00000000..7cec8e8f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^suzy^cs304p^index.html @@ -0,0 +1,191 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 6 374 0.693147 4.158882 7 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +cours 4 273 1.098612 4.394448 15 +student 4 343 1.098612 4.394448 19 +time 1 293 1.098612 1.098612 17 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +class 3 199 1.609438 4.828314 37 +utexa 2 189 1.609438 3.218876 44 +updat 2 191 1.609438 3.218876 41 +group 2 183 1.609438 3.218876 36 +hour 4 165 1.791759 7.167036 46 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +assign 4 135 1.945910 7.783640 66 +note 2 142 1.945910 3.891820 67 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +schedul 2 119 2.079442 4.158884 85 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +take 4 97 2.302585 9.210340 134 +need 2 98 2.302585 4.605170 135 +access 1 102 2.302585 2.302585 136 +grade 4 90 2.397895 9.591580 142 +mani 2 92 2.397895 4.795790 150 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +exam 3 86 2.484907 7.454721 169 +requir 2 81 2.484907 4.969814 167 +contain 1 81 2.484907 2.484907 174 +mondai 1 77 2.564949 2.564949 206 +exampl 1 77 2.564949 2.564949 195 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +materi 1 75 2.639057 2.639057 221 +syllabu 3 67 2.708050 8.124150 247 +foundat 1 62 2.772589 2.772589 286 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +copi 1 63 2.772589 2.772589 284 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +laboratori 1 63 2.772589 2.772589 292 +room 3 59 2.833213 8.499639 301 +detail 3 57 2.890372 8.671116 321 +semest 3 58 2.890372 8.671116 312 +sever 1 56 2.890372 2.890372 322 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +date 1 51 2.995732 2.995732 344 +give 1 50 3.044522 3.044522 359 +without 1 50 3.044522 3.044522 370 +frequent 1 49 3.044522 3.044522 367 +still 1 50 3.044522 3.044522 362 +get 1 46 3.091042 3.091042 380 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +discuss 1 45 3.135494 3.135494 399 +even 1 45 3.135494 3.135494 393 +long 2 43 3.178054 6.356108 413 +howev 1 41 3.218876 3.218876 422 +late 1 40 3.258097 3.258097 439 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +form 1 39 3.258097 3.258097 443 +credit 1 38 3.295837 3.295837 460 +respons 5 37 3.332205 16.661025 476 +expect 1 37 3.332205 3.332205 484 +feel 1 37 3.332205 3.332205 483 +procedur 1 36 3.367296 3.367296 488 +ofth 1 36 3.367296 3.367296 491 +soon 1 36 3.367296 3.367296 494 +staff 1 36 3.367296 3.367296 490 +everi 1 34 3.401197 3.401197 519 +within 1 33 3.433987 3.433987 525 +articl 1 33 3.433987 3.433987 530 +chapter 1 32 3.465736 3.465736 536 +often 1 31 3.496508 3.496508 551 +limit 2 29 3.583519 7.167038 585 +depend 1 29 3.583519 3.583519 583 +turn 1 29 3.583519 3.583519 586 +becom 2 28 3.610918 7.221836 603 +challeng 1 26 3.688879 3.688879 653 +effort 1 26 3.688879 3.688879 652 +background 1 25 3.737670 3.737670 664 +highli 1 23 3.806662 3.806662 725 +begin 1 23 3.806662 3.806662 716 +identifi 1 22 3.850148 3.850148 760 +self 1 22 3.850148 3.850148 761 +half 1 21 3.912023 3.912023 776 +prepar 2 20 3.951244 7.902488 824 +break 1 20 3.951244 3.951244 812 +event 1 18 4.060443 4.060443 896 +attend 1 18 4.060443 4.060443 893 +less 1 18 4.060443 4.060443 892 +partial 1 18 4.060443 4.060443 900 +monitor 1 17 4.110874 4.110874 941 +debug 1 17 4.110874 4.110874 944 +thought 1 17 4.110874 4.110874 945 +quiz 2 16 4.174387 8.348774 990 +earli 1 16 4.174387 4.174387 968 +enough 1 15 4.248495 4.248495 1040 +warn 2 14 4.317488 8.634976 1068 +near 1 14 4.317488 4.317488 1091 +consider 1 14 4.317488 4.317488 1076 +wait 3 13 4.382027 13.146081 1168 +quizz 3 13 4.382027 13.146081 1151 +everyth 1 13 4.382027 4.382027 1169 +unfortun 1 13 4.382027 4.382027 1170 +everyon 1 13 4.382027 4.382027 1148 +individu 1 13 4.382027 4.382027 1126 +pascal 4 12 4.465908 17.863632 1213 +uniqu 1 12 4.465908 4.465908 1228 +excit 1 11 4.553877 4.553877 1329 +extrem 1 11 4.553877 4.553877 1330 +eight 1 11 4.553877 4.553877 1331 +deadlin 3 9 4.753590 14.260770 1502 +hundr 1 9 4.753590 4.753590 1528 +intellectu 1 7 5.010635 5.010635 1847 +delai 1 7 5.010635 5.010635 1848 +carefulli 1 6 5.164786 5.164786 2045 +difficult 1 6 5.164786 5.164786 2035 +apolog 1 6 5.164786 5.164786 2046 +nine 1 6 5.164786 5.164786 2047 +vari 1 6 5.164786 5.164786 2001 +pace 1 6 5.164786 5.164786 2011 +firm 1 4 5.568345 5.568345 2684 +thecours 1 4 5.568345 5.568345 2685 +behind 1 4 5.568345 5.568345 2610 +welch 1 4 5.568345 5.568345 2655 +theprogram 1 4 5.568345 5.568345 2686 +dale 1 4 5.568345 5.568345 2687 +wewil 1 4 5.568345 5.568345 2688 +thiscours 1 4 5.568345 5.568345 2601 +repli 1 4 5.568345 5.568345 2689 +guadalup 1 3 5.857933 5.857933 3255 +andyou 1 3 5.857933 5.857933 3256 +gripe 1 3 5.857933 5.857933 3257 +suzi 2 2 6.263398 12.526796 4288 +wella 1 2 6.263398 6.263398 4289 +foral 1 2 6.263398 6.263398 4290 +riski 1 2 6.263398 6.263398 4291 +nowher 1 2 6.263398 6.263398 4292 +gallagh 1 2 6.263398 6.263398 4293 +requiredtextbook 1 2 6.263398 6.263398 4204 +elicit 1 2 6.263398 6.263398 4294 +thenewsgroup 2 1 6.957497 13.914994 7381 +thetest 2 1 6.957497 13.914994 7382 +programmingcsp 1 1 6.957497 6.957497 7383 +pascalintroductori 1 1 6.957497 6.957497 7384 +programminginstructor 1 1 6.957497 6.957497 7385 +gallagherwelcom 1 1 6.957497 6.957497 7386 +cspi 1 1 6.957497 6.957497 7387 +andso 1 1 6.957497 6.957497 7388 +otherdeadlin 1 1 6.957497 6.957497 7389 +thesyllabu 1 1 6.957497 6.957497 7390 +jenn 1 1 6.957497 6.957497 7391 +takethi 1 1 6.957497 6.957497 7392 +courseeach 1 1 6.957497 6.957497 7393 +howwel 1 1 6.957497 6.957497 7394 +deadlineto 1 1 6.957497 6.957497 7395 +thursdayeven 1 1 6.957497 6.957497 7396 +intosmal 1 1 6.957497 6.957497 7397 +ateach 1 1 6.957497 6.957497 7398 +thatlaboratori 1 1 6.957497 6.957497 7399 +thatgrad 1 1 6.957497 6.957497 7400 +thattest 1 1 6.957497 6.957497 7401 +limitedand 1 1 6.957497 6.957497 7402 +foravail 1 1 6.957497 6.957497 7403 +proctor 1 1 6.957497 6.957497 7404 +hoursbefor 1 1 6.957497 6.957497 7405 +andquizz 1 1 6.957497 6.957497 7406 +betaken 1 1 6.957497 6.957497 7407 +prescrib 1 1 6.957497 6.957497 7408 +openedfor 1 1 6.957497 6.957497 7409 +yourstud 1 1 6.957497 6.957497 7410 +orsak 1 1 6.957497 6.957497 7411 +weem 1 1 6.957497 6.957497 7412 +liabl 1 1 6.957497 6.957497 7413 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html new file mode 100644 index 00000000..c8a4bb1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^twang^cs387h^index.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +project 4 340 1.098612 4.394448 18 +student 2 343 1.098612 2.197224 19 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +data 8 170 1.791759 14.334072 49 +implement 2 152 1.791759 3.583518 52 +read 1 154 1.791759 1.791759 47 +file 4 132 1.945910 7.783640 70 +problem 3 147 1.945910 5.837730 75 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +databas 2 122 2.079442 4.158884 86 +pleas 2 113 2.197225 4.394450 114 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +solut 3 82 2.484907 7.454721 162 +homework 1 79 2.564949 2.564949 193 +test 3 66 2.708050 8.124150 252 +order 2 69 2.708050 5.416100 249 +differ 2 66 2.708050 5.416100 253 +syllabu 1 67 2.708050 2.708050 247 +dept 2 64 2.772589 5.545178 291 +reason 1 57 2.890372 2.890372 318 +sampl 6 53 2.944439 17.666634 339 +suggest 1 53 2.944439 2.944439 331 +run 1 51 2.995732 2.995732 347 +without 1 50 3.044522 3.044522 370 +anoth 1 45 3.135494 3.135494 408 +error 1 40 3.258097 3.258097 449 +transform 1 32 3.465736 3.465736 542 +turn 1 29 3.583519 3.583519 586 +pass 2 28 3.610918 7.221836 611 +measur 1 28 3.610918 3.610918 609 +retriev 1 27 3.637586 3.637586 621 +compar 1 26 3.688879 3.688879 648 +output 8 21 3.912023 31.296184 788 +wang 2 21 3.912023 7.824046 790 +wrote 1 20 3.951244 3.951244 830 +benchmark 2 19 4.007333 8.014666 859 +attribut 1 14 4.317488 4.317488 1092 +script 2 13 4.382027 8.764054 1171 +perl 1 11 4.553877 4.553877 1332 +recoveri 4 9 4.753590 19.014360 1474 +replac 1 8 4.875197 4.875197 1668 +fail 1 8 4.875197 4.875197 1655 +contest 2 5 5.347108 10.694216 2273 +tupl 2 5 5.347108 10.694216 2244 +batori 1 4 5.568345 5.568345 2690 +delet 1 4 5.568345 5.568345 2691 +tong 2 3 5.857933 11.715866 3258 +append 1 2 6.263398 6.263398 4295 +ret_into 1 1 6.957497 6.957497 7414 +mdb 1 1 6.957497 6.957497 7415 +diff 1 1 6.957497 6.957497 7416 +medec 1 1 6.957497 6.957497 7417 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html new file mode 100644 index 00000000..08daa449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs380l.html @@ -0,0 +1,303 @@ +term, tf, in documents count, idf, tfidf, wordid +page 30 705 0.000000 0.000000 3 +comput 16 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 42 443 0.693147 29.112174 6 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +time 9 293 1.098612 9.887508 17 +cours 7 273 1.098612 7.690284 15 +project 2 340 1.098612 2.197224 18 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +design 5 213 1.386294 6.931470 25 +gener 2 220 1.386294 2.772588 27 +softwar 1 220 1.386294 1.386294 30 +oper 21 180 1.609438 33.798198 34 +fall 3 181 1.609438 4.828314 40 +list 2 201 1.609438 3.218876 39 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +distribut 10 162 1.791759 17.917590 51 +implement 5 152 1.791759 8.958795 52 +read 3 154 1.791759 5.375277 47 +avail 2 169 1.791759 3.583518 48 +network 2 168 1.791759 3.583518 61 +parallel 1 169 1.791759 1.791759 60 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +file 7 132 1.945910 13.621370 70 +process 6 142 1.945910 11.675460 72 +support 4 132 1.945910 7.783640 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +schedul 4 119 2.079442 8.317768 85 +confer 3 126 2.079442 6.238326 100 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +instructor 2 108 2.197225 4.394450 107 +topic 2 114 2.197225 4.394450 110 +intern 2 108 2.197225 4.394450 128 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +advanc 4 99 2.302585 9.210340 130 +memori 2 101 2.302585 4.605170 139 +techniqu 1 99 2.302585 2.302585 138 +proceed 10 93 2.397895 23.978950 152 +real 6 93 2.397895 14.387370 144 +commun 6 95 2.397895 14.387370 157 +call 2 91 2.397895 4.795790 153 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +octob 1 89 2.397895 2.397895 156 +ieee 6 86 2.484907 14.909442 190 +environ 3 84 2.484907 7.454721 177 +requir 2 81 2.484907 4.969814 167 +novemb 2 81 2.484907 4.969814 179 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +decemb 7 80 2.564949 17.954643 215 +issu 4 78 2.564949 10.259796 211 +april 3 77 2.564949 7.694847 196 +june 2 79 2.564949 5.129898 214 +symposium 3 72 2.639057 7.917171 238 +workshop 2 71 2.639057 5.278114 239 +august 2 66 2.708050 5.416100 257 +practic 1 70 2.708050 2.708050 246 +januari 3 62 2.772589 8.317767 264 +descript 2 64 2.772589 5.545178 271 +collect 1 65 2.772589 2.772589 268 +septemb 1 65 2.772589 2.772589 274 +foundat 1 62 2.772589 2.772589 286 +share 3 59 2.833213 8.499639 304 +juli 2 60 2.833213 5.666426 305 +march 2 61 2.833213 5.666426 295 +content 1 59 2.833213 2.833213 302 +unix 3 58 2.890372 8.671116 308 +summer 2 56 2.890372 5.780744 311 +overview 1 56 2.890372 2.890372 323 +februari 2 54 2.944439 5.888878 328 +undergradu 1 54 2.944439 2.944439 338 +cover 1 55 2.944439 2.944439 329 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +principl 1 48 3.044522 3.044522 357 +basic 1 50 3.044522 3.044522 360 +textbook 2 44 3.135494 6.270988 397 +made 1 44 3.135494 3.135494 398 +execut 1 45 3.135494 3.135494 404 +offer 1 43 3.178054 3.178054 414 +review 7 42 3.218876 22.532132 425 +cach 3 41 3.218876 9.656628 432 +examin 1 42 3.218876 3.218876 424 +fast 1 42 3.218876 3.218876 429 +transact 6 39 3.258097 19.548582 438 +theoret 1 39 3.258097 3.258097 446 +programm 1 39 3.258097 3.258097 445 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +procedur 2 36 3.367296 6.734592 488 +survei 2 35 3.401197 6.802394 513 +concurr 1 34 3.401197 3.401197 501 +articl 1 33 3.433987 3.433987 530 +toler 1 33 3.433987 3.433987 533 +concept 4 32 3.465736 13.862944 537 +idea 2 32 3.465736 6.931472 545 +fault 2 32 3.465736 6.931472 547 +chapter 1 32 3.465736 3.465736 536 +titl 1 31 3.496508 3.496508 556 +robert 1 30 3.555348 3.555348 567 +hard 1 30 3.555348 3.555348 563 +art 1 29 3.583519 3.583519 593 +multiprocessor 2 28 3.610918 7.221836 605 +determin 1 27 3.637586 3.637586 630 +altern 1 26 3.688879 3.688879 641 +proc 1 26 3.688879 3.688879 649 +constraint 1 26 3.688879 3.688879 636 +reliabl 1 25 3.737670 3.737670 674 +mobil 3 23 3.806662 11.419986 730 +thread 2 23 3.806662 7.613324 722 +highli 1 23 3.806662 3.806662 725 +springer 2 22 3.850148 7.700296 750 +verlag 2 22 3.850148 7.700296 751 +inth 1 22 3.850148 3.850148 741 +kernel 4 20 3.951244 15.804976 825 +andrew 2 19 4.007333 8.014666 849 +anderson 2 19 4.007333 8.014666 860 +prerequisit 1 19 4.007333 4.007333 846 +stand 1 18 4.060443 4.060443 891 +thoma 1 18 4.060443 4.060443 901 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +germani 2 17 4.110874 8.221748 946 +protect 2 17 4.110874 8.221748 935 +earli 2 16 4.174387 8.348774 968 +young 1 16 4.174387 4.174387 991 +remot 2 15 4.248495 8.496990 1041 +princeton 2 15 4.248495 8.496990 1042 +levi 3 14 4.317488 12.952464 1093 +joint 1 13 4.382027 4.382027 1130 +usenix 3 12 4.465908 13.397724 1240 +gupta 1 12 4.465908 4.465908 1241 +bruce 1 12 4.465908 4.465908 1226 +denni 1 11 4.553877 4.553877 1321 +reness 1 11 4.553877 4.553877 1333 +impact 1 11 4.553877 4.553877 1334 +sosp 4 10 4.653960 18.615840 1416 +operatingsystem 2 10 4.653960 9.307920 1401 +henri 1 10 4.653960 4.653960 1417 +black 1 10 4.653960 4.653960 1418 +hint 1 10 4.653960 4.653960 1419 +familiar 1 9 4.753590 4.753590 1485 +robbert 1 9 4.753590 4.753590 1529 +inter 1 9 4.753590 4.753590 1530 +birman 1 9 4.753590 4.753590 1531 +kumar 1 9 4.753590 4.753590 1506 +mach 4 8 4.875197 19.500788 1669 +inproceed 1 8 4.875197 4.875197 1670 +presenc 1 8 4.875197 4.875197 1671 +harrick 3 7 5.010635 15.031905 1849 +beyond 2 7 5.010635 10.021270 1834 +peterson 1 7 5.010635 5.010635 1850 +migrat 1 7 5.010635 5.010635 1851 +encrypt 1 7 5.010635 5.010635 1835 +onoper 2 6 5.164786 10.329572 2048 +multiprogram 1 6 5.164786 5.164786 2010 +thompson 1 6 5.164786 5.164786 2049 +silberschatz 1 6 5.164786 5.164786 1978 +edward 1 6 5.164786 5.164786 2050 +internationalconfer 1 6 5.164786 5.164786 2051 +distributedsystem 1 6 5.164786 5.164786 2022 +versu 1 6 5.164786 5.164786 2052 +ousterhout 2 5 5.347108 10.694216 2301 +ofdistribut 1 5 5.347108 5.347108 2316 +theth 1 5 5.347108 5.347108 2325 +oncomput 1 5 5.347108 5.347108 2326 +joseph 1 5 5.347108 5.347108 2327 +authent 1 5 5.347108 5.347108 2306 +steer 1 5 5.347108 5.347108 2328 +wireless 3 4 5.568345 16.705035 2693 +lazowska 2 4 5.568345 11.136690 2694 +breadth 1 4 5.568345 5.568345 2695 +implic 1 4 5.568345 5.568345 2696 +fora 1 4 5.568345 5.568345 2697 +theacm 1 4 5.568345 5.568345 2698 +zhao 1 4 5.568345 5.568345 2699 +cheriton 3 3 5.857933 17.573799 3259 +synopsi 1 3 5.857933 5.857933 3260 +formobil 1 3 5.857933 5.857933 3261 +requirementsstud 1 3 5.857933 5.857933 3116 +theperform 1 3 5.857933 5.857933 3262 +berlin 1 3 5.857933 5.857933 3263 +terri 1 3 5.857933 5.857933 3264 +golub 1 3 5.857933 5.857933 3265 +tokuda 1 3 5.857933 5.857933 3266 +kistler 1 3 5.857933 5.857933 3267 +synopsisc 1 2 6.263398 6.263398 4296 +systemdesign 1 2 6.263398 6.263398 4297 +theinstructor 1 2 6.263398 6.263398 4298 +anexperiment 1 2 6.263398 6.263398 4299 +afip 1 2 6.263398 6.263398 4300 +hansen 1 2 6.263398 6.263398 4301 +nucleu 1 2 6.263398 6.263398 4302 +bensoussan 1 2 6.263398 6.263398 4303 +multic 1 2 6.263398 6.263398 4304 +virtualmemori 1 2 6.263398 6.263398 4305 +ritchi 1 2 6.263398 6.263398 4306 +tucker 1 2 6.263398 6.263398 4307 +bunt 1 2 6.263398 6.263398 4308 +barrera 1 2 6.263398 6.263398 4309 +acmtransact 1 2 6.263398 6.263398 4310 +nelson 1 2 6.263398 6.263398 4168 +cristian 1 2 6.263398 6.263398 4311 +offailur 1 2 6.263398 6.263398 4255 +systemsr 1 2 6.263398 6.263398 4312 +goldberg 1 2 6.263398 6.263398 4313 +rosenblum 1 2 6.263398 6.263398 4314 +ieeetransact 1 2 6.263398 6.263398 4315 +oninform 1 2 6.263398 6.263398 4316 +baron 1 2 6.263398 6.263398 4317 +rashid 1 2 6.263398 6.263398 4318 +preemptiv 1 2 6.263398 6.263398 4319 +ondistribut 1 2 6.263398 6.263398 4320 +dalei 2 1 6.957497 13.914994 7420 +formultiprogram 2 1 6.957497 13.914994 7421 +karshmer 2 1 6.957497 13.914994 7422 +nehmer 2 1 6.957497 13.914994 7423 +schroeder 2 1 6.957497 13.914994 7424 +needham 2 1 6.957497 13.914994 7425 +trigger 2 1 6.957497 13.914994 7426 +prerequisitegradu 1 1 6.957497 6.957497 7427 +systemssuch 1 1 6.957497 6.957497 7428 +materialin 1 1 6.957497 6.957497 7429 +andsilberschatz 1 1 6.957497 6.957497 7430 +coveringboth 1 1 6.957497 6.957497 7431 +anemphasi 1 1 6.957497 6.957497 7432 +anddiscuss 1 1 6.957497 6.957497 7433 +aterm 1 1 6.957497 6.957497 7434 +systemsfernando 1 1 6.957497 6.957497 7435 +corbato 1 1 6.957497 6.957497 7436 +marjori 1 1 6.957497 6.957497 7437 +merwin 1 1 6.957497 6.957497 7438 +daggett 1 1 6.957497 6.957497 7439 +brinch 1 1 6.957497 6.957497 7440 +clingen 1 1 6.957497 6.957497 7441 +tannenbaum 1 1 6.957497 6.957497 7442 +andexampl 1 1 6.957497 6.957497 7443 +managementa 1 1 6.957497 6.957497 7444 +forshar 1 1 6.957497 6.957497 7445 +schedulingr 1 1 6.957497 6.957497 7446 +communicationj 1 1 6.957497 6.957497 7447 +birel 1 1 6.957497 6.957497 7448 +rpc 1 1 6.957497 6.957497 7449 +lightweightremot 1 1 6.957497 6.957497 7450 +migrationf 1 1 6.957497 6.957497 7451 +dougli 1 1 6.957497 6.957497 7452 +spriteoper 1 1 6.957497 6.957497 7453 +theimer 1 1 6.957497 6.957497 7454 +lantz 1 1 6.957497 6.957497 7455 +preemptabl 1 1 6.957497 6.957497 7456 +tolerancef 1 1 6.957497 6.957497 7457 +sand 1 1 6.957497 6.957497 7458 +sandberg 1 1 6.957497 6.957497 7459 +kleiman 1 1 6.957497 6.957497 7460 +ofsun 1 1 6.957497 6.957497 7461 +mckusick 1 1 6.957497 6.957497 7462 +leffler 1 1 6.957497 6.957497 7463 +fabri 1 1 6.957497 6.957497 7464 +fastfil 1 1 6.957497 6.957497 7465 +alog 1 1 6.957497 6.957497 7466 +systemsm 1 1 6.957497 6.957497 7467 +gifford 1 1 6.957497 6.957497 7468 +securityr 1 1 6.957497 6.957497 7469 +inlarg 1 1 6.957497 6.957497 7470 +butler 1 1 6.957497 6.957497 7471 +lampson 1 1 6.957497 6.957497 7472 +accetta 1 1 6.957497 6.957497 7473 +boloski 1 1 6.957497 6.957497 7474 +tevanian 1 1 6.957497 6.957497 7475 +systemsh 1 1 6.957497 6.957497 7476 +kopetz 1 1 6.957497 6.957497 7477 +timesystem 1 1 6.957497 6.957497 7478 +layland 1 1 6.957497 6.957497 7479 +ramamritham 1 1 6.957497 6.957497 7480 +stankov 1 1 6.957497 6.957497 7481 +schedulingund 1 1 6.957497 6.957497 7482 +mercer 1 1 6.957497 6.957497 7483 +computingb 1 1 6.957497 6.957497 7484 +badrinath 1 1 6.957497 6.957497 7485 +acharya 1 1 6.957497 6.957497 7486 +imielinski 1 1 6.957497 6.957497 7487 +satyanarayanan 1 1 6.957497 6.957497 7488 +okasaki 1 1 6.957497 6.957497 7489 +siegel 1 1 6.957497 6.957497 7490 +coda 1 1 6.957497 6.957497 7491 +distributedworkst 1 1 6.957497 6.957497 7492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html new file mode 100644 index 00000000..35c232cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs384m.html @@ -0,0 +1,404 @@ +term, tf, in documents count, idf, tfidf, wordid +page 30 705 0.000000 0.000000 3 +comput 12 775 0.000000 0.000000 2 +system 19 443 0.693147 13.169793 6 +research 4 431 0.693147 2.772588 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 8 273 1.098612 8.788896 15 +time 5 293 1.098612 5.493060 17 +offic 3 299 1.098612 3.295836 13 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +current 1 284 1.098612 1.098612 21 +design 11 213 1.386294 15.249234 25 +mail 2 238 1.386294 2.772588 22 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +oper 10 180 1.609438 16.094380 34 +list 5 201 1.609438 8.047190 39 +class 2 199 1.609438 3.218876 37 +fall 2 181 1.609438 3.218876 40 +utexa 2 189 1.609438 3.218876 44 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +network 10 168 1.791759 17.917590 61 +algorithm 8 162 1.791759 14.334072 57 +applic 4 170 1.791759 7.167036 56 +hour 3 165 1.791759 5.375277 46 +read 2 154 1.791759 3.583518 47 +implement 2 152 1.791759 3.583518 52 +phone 2 175 1.791759 3.583518 45 +base 2 165 1.791759 3.583518 50 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +support 7 132 1.945910 13.621370 83 +area 4 144 1.945910 7.783640 80 +architectur 3 139 1.945910 5.837730 77 +problem 2 147 1.945910 3.891820 75 +note 1 142 1.945910 1.945910 67 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +schedul 5 119 2.079442 10.397210 85 +analysi 3 124 2.079442 6.238326 98 +technolog 2 131 2.079442 4.158884 102 +introduct 2 126 2.079442 4.158884 87 +number 2 130 2.079442 4.158884 97 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +instructor 4 108 2.197225 8.788900 107 +manag 3 114 2.197225 6.591675 125 +intern 3 108 2.197225 6.591675 128 +teach 2 108 2.197225 4.394450 112 +place 2 106 2.197225 4.394450 124 +topic 2 114 2.197225 4.394450 110 +assist 1 112 2.197225 2.197225 113 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +techniqu 7 99 2.302585 16.118095 138 +advanc 1 99 2.302585 2.302585 130 +proceed 20 93 2.397895 47.957900 152 +commun 11 95 2.397895 26.376845 157 +octob 10 89 2.397895 23.978950 156 +real 3 93 2.397895 7.193685 144 +select 3 91 2.397895 7.193685 154 +follow 1 92 2.397895 2.397895 143 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +novemb 9 81 2.484907 22.364163 179 +ieee 8 86 2.484907 19.879256 190 +journal 4 83 2.484907 9.939628 183 +control 3 82 2.484907 7.454721 164 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +resourc 1 81 2.484907 2.484907 172 +second 1 81 2.484907 2.484907 166 +environ 1 84 2.484907 2.484907 177 +server 11 76 2.564949 28.214439 204 +issu 8 78 2.564949 20.519592 211 +april 5 77 2.564949 12.824745 196 +decemb 2 80 2.564949 5.129898 215 +optim 2 79 2.564949 5.129898 197 +june 2 79 2.564949 5.129898 214 +dynam 2 76 2.564949 5.129898 194 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +effici 3 73 2.639057 7.917171 233 +symposium 2 72 2.639057 5.278114 238 +workshop 2 71 2.639057 5.278114 239 +servic 1 72 2.639057 2.639057 236 +multimedia 35 68 2.708050 94.781750 258 +august 5 66 2.708050 13.540250 257 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +septemb 9 65 2.772589 24.953301 274 +descript 2 64 2.772589 5.545178 271 +copi 2 63 2.772589 5.545178 284 +polici 2 64 2.772589 5.545178 279 +handout 1 64 2.772589 2.772589 263 +taylor 1 63 2.772589 2.772589 287 +collect 1 65 2.772589 2.772589 268 +januari 1 62 2.772589 2.772589 264 +content 1 59 2.833213 2.833213 302 +locat 1 59 2.833213 2.833213 303 +march 1 61 2.833213 2.833213 295 +simpl 1 60 2.833213 2.833213 298 +overview 2 56 2.890372 5.780744 323 +detail 1 57 2.890372 2.890372 321 +summer 1 56 2.890372 2.890372 311 +space 1 57 2.890372 2.890372 310 +cover 2 55 2.944439 5.888878 329 +processor 1 54 2.944439 2.944439 335 +digit 5 52 2.995732 14.978660 348 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +format 2 48 3.044522 6.089044 356 +basic 2 50 3.044522 6.089044 360 +appoint 2 49 3.044522 6.089044 358 +principl 2 48 3.044522 6.089044 357 +standard 2 48 3.044522 6.089044 365 +still 1 50 3.044522 3.044522 362 +possibl 1 47 3.091042 3.091042 378 +california 1 46 3.091042 3.091042 388 +adapt 1 46 3.091042 3.091042 387 +video 13 44 3.135494 40.761422 405 +protocol 4 45 3.135494 12.541976 407 +textbook 1 44 3.135494 3.135494 397 +discuss 1 45 3.135494 3.135494 399 +answer 1 45 3.135494 3.135494 391 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +long 1 43 3.178054 3.178054 413 +third 1 43 3.178054 3.178054 412 +mechan 1 43 3.178054 3.178054 416 +review 3 42 3.218876 9.656628 425 +cach 2 41 3.218876 6.437752 432 +examin 1 42 3.218876 3.218876 424 +futur 1 41 3.218876 3.218876 427 +tutori 1 39 3.258097 3.258097 437 +multipl 1 39 3.258097 3.258097 453 +expect 2 37 3.332205 6.664410 484 +workstat 1 37 3.332205 3.332205 479 +multi 1 36 3.367296 3.367296 493 +ofth 1 36 3.367296 3.367296 491 +survei 3 35 3.401197 10.203591 513 +represent 1 35 3.401197 3.401197 512 +statist 1 35 3.401197 3.401197 521 +articl 1 33 3.433987 3.433987 530 +toler 1 33 3.433987 3.433987 533 +concept 2 32 3.465736 6.931472 537 +fault 1 32 3.465736 3.465736 547 +storag 4 31 3.496508 13.986032 553 +titl 1 31 3.496508 3.496508 556 +particip 1 29 3.583519 3.583519 589 +synchron 1 29 3.583519 3.583519 588 +scale 2 28 3.610918 7.221836 613 +framework 2 28 3.610918 7.221836 606 +packag 1 28 3.610918 3.610918 614 +retriev 2 27 3.637586 7.275172 621 +determin 2 27 3.637586 7.275172 630 +arrai 2 27 3.637586 7.275172 627 +session 1 26 3.688879 3.688879 643 +fundament 2 25 3.737670 7.475340 661 +magazin 2 24 3.761200 7.522400 704 +scalabl 1 24 3.761200 3.761200 705 +compress 8 23 3.806662 30.453296 719 +variabl 1 23 3.806662 3.806662 715 +disk 4 22 3.850148 15.400592 747 +emphasi 1 22 3.850148 3.850148 755 +reduc 1 22 3.850148 3.850148 759 +chen 2 21 3.912023 7.824046 791 +flexibl 1 21 3.912023 3.912023 792 +mpeg 3 20 3.951244 11.853732 831 +scheme 3 20 3.951244 11.853732 818 +sure 1 20 3.951244 3.951244 813 +qualiti 1 20 3.951244 3.951244 832 +media 3 19 4.007333 12.021999 861 +boston 2 19 4.007333 8.014666 862 +prerequisit 1 19 4.007333 4.007333 846 +comparison 1 19 4.007333 4.007333 863 +anderson 1 19 4.007333 4.007333 860 +stand 1 18 4.060443 4.060443 891 +failur 1 18 4.060443 4.060443 898 +layer 1 17 4.110874 4.110874 926 +zhang 3 16 4.174387 12.523161 980 +diego 2 16 4.174387 8.348774 992 +transfer 1 16 4.174387 4.174387 967 +hierarch 2 15 4.248495 8.496990 1018 +rate 2 15 4.248495 8.496990 1037 +contribut 1 15 4.248495 4.248495 1021 +stream 1 15 4.248495 4.248495 1015 +audio 4 14 4.317488 17.269952 1094 +francisco 2 14 4.317488 8.634976 1095 +incomput 1 14 4.317488 4.317488 1096 +demand 1 14 4.317488 4.317488 1073 +heterogen 1 14 4.317488 4.317488 1090 +consider 1 14 4.317488 4.317488 1076 +introduc 1 13 4.382027 4.382027 1139 +carri 1 13 4.382027 4.382027 1152 +resolut 1 13 4.382027 4.382027 1172 +sigmetr 1 13 4.382027 4.382027 1173 +buffer 2 12 4.465908 8.931816 1211 +uniqu 1 12 4.465908 4.465908 1228 +readi 1 12 4.465908 4.465908 1242 +scan 1 12 4.465908 4.465908 1243 +philadelphia 1 12 4.465908 4.465908 1244 +weight 1 12 4.465908 4.465908 1204 +placement 3 10 4.653960 13.961880 1420 +packet 3 10 4.653960 13.961880 1415 +traffic 1 10 4.653960 4.653960 1421 +operatingsystem 1 10 4.653960 4.653960 1401 +establish 1 9 4.753590 4.753590 1532 +familiar 1 9 4.753590 4.753590 1485 +recoveri 1 9 4.753590 4.753590 1474 +light 1 9 4.753590 4.753590 1533 +osdi 1 9 4.753590 4.753590 1534 +transport 3 8 4.875197 14.625591 1672 +convers 1 8 4.875197 4.875197 1673 +pacif 1 8 4.875197 4.875197 1674 +grove 1 8 4.875197 4.875197 1675 +harrick 4 7 5.010635 20.042540 1849 +trend 2 7 5.010635 10.021270 1842 +channel 2 7 5.010635 10.021270 1836 +keshav 2 7 5.010635 10.021270 1852 +peterson 2 7 5.010635 10.021270 1850 +chiang 1 7 5.010635 5.010635 1853 +supportfor 1 7 5.010635 5.010635 1854 +smooth 1 7 5.010635 5.010635 1855 +misra 1 7 5.010635 5.010635 1856 +conferenc 1 7 5.010635 5.010635 1857 +jpeg 2 6 5.164786 10.329572 2053 +internationalconfer 1 6 5.164786 5.164786 2051 +subsystem 1 6 5.164786 5.164786 2015 +symposiumon 1 6 5.164786 5.164786 2054 +sigcomm 6 5 5.347108 32.082648 2329 +row 1 5 5.347108 5.347108 2330 +proceedingsof 1 5 5.347108 5.347108 2331 +batch 2 4 5.568345 11.136690 2700 +multimediasystem 2 4 5.568345 11.136690 2701 +venkat 2 4 5.568345 11.136690 2702 +addition 1 4 5.568345 5.568345 2593 +forparallel 1 4 5.568345 5.568345 2703 +admiss 1 4 5.568345 5.568345 2704 +clark 1 4 5.568345 5.568345 2705 +floyd 1 4 5.568345 5.568345 2682 +buss 1 4 5.568345 5.568345 2649 +goyal 9 3 5.857933 52.721397 3268 +shenoi 5 3 5.857933 29.289665 3269 +rangan 2 3 5.857933 11.715866 3270 +anaheim 2 3 5.857933 11.715866 3271 +campbel 2 3 5.857933 11.715866 3272 +mccann 2 3 5.857933 11.715866 3273 +synopsi 1 3 5.857933 5.857933 3260 +informationcours 1 3 5.857933 5.857933 3167 +guadalup 1 3 5.857933 5.857933 3255 +multimediaappl 1 3 5.857933 5.857933 3274 +ftc 1 3 5.857933 5.857933 3275 +katz 1 3 5.857933 5.857933 3276 +reddi 1 3 5.857933 5.857933 3277 +deliveri 1 3 5.857933 5.857933 3278 +durham 1 3 5.857933 5.857933 3279 +hampshir 1 3 5.857933 5.857933 3280 +chow 1 3 5.857933 5.857933 3281 +london 1 3 5.857933 5.857933 3282 +kandlur 3 2 6.263398 18.790194 4321 +ofmultimedia 2 2 6.263398 12.526796 4322 +icmc 2 2 6.263398 12.526796 4323 +jacobson 2 2 6.263398 12.526796 4324 +prerequisitesgradu 1 2 6.263398 6.263398 4325 +synopsisc 1 2 6.263398 6.263398 4296 +madeavail 1 2 6.263398 6.263398 4326 +thetop 1 2 6.263398 6.263398 4327 +critiqu 1 2 6.263398 6.263398 4328 +ofpap 1 2 6.263398 6.263398 4329 +andclass 1 2 6.263398 6.263398 4330 +prashant 1 2 6.263398 6.263398 4331 +gemmel 1 2 6.263398 6.263398 4332 +ieeeintern 1 2 6.263398 6.263398 4333 +inmulti 1 2 6.263398 6.263398 4334 +annualintern 1 2 6.263398 6.263398 4335 +pasadena 1 2 6.263398 6.263398 4336 +multimediai 1 2 6.263398 6.263398 4337 +acmmultimedia 1 2 6.263398 6.263398 4338 +sanfrancisco 1 2 6.263398 6.263398 4339 +shenker 1 2 6.263398 6.263398 4340 +verma 1 2 6.263398 6.263398 4341 +delaybound 1 2 6.263398 6.263398 4342 +toappear 1 2 6.263398 6.263398 4343 +nossdav 1 2 6.263398 6.263398 4344 +acmsigcomm 1 2 6.263398 6.263398 4345 +andd 1 2 6.263398 6.263398 4346 +shepherd 1 2 6.263398 6.263398 4347 +basedcommun 1 2 6.263398 6.263398 4348 +incommun 1 2 6.263398 6.263398 4349 +govindan 1 2 6.263398 6.263398 4350 +forcontinu 1 2 6.263398 6.263398 4351 +formultimedia 1 2 6.263398 6.263398 4352 +zellweg 1 2 6.263398 6.263398 4353 +swinehart 1 2 6.263398 6.263398 4354 +etherphon 1 2 6.263398 6.263398 4355 +steinmetz 2 1 6.957497 13.914994 7493 +sitaram 2 1 6.957497 13.914994 7494 +coulson 2 1 6.957497 13.914994 7495 +descriptiongener 1 1 6.957497 6.957497 7496 +boththeoret 1 1 6.957497 6.957497 7497 +systemsupport 1 1 6.957497 6.957497 7498 +transportprotocol 1 1 6.957497 6.957497 7499 +designissu 1 1 6.957497 6.957497 7500 +textbooka 1 1 6.957497 6.957497 7501 +requirementsth 1 1 6.957497 6.957497 7502 +relatedpap 1 1 6.957497 6.957497 7503 +tounderstand 1 1 6.957497 6.957497 7504 +asemest 1 1 6.957497 6.957497 7505 +vintuesdai 1 1 6.957497 6.957497 7506 +assistantmr 1 1 6.957497 6.957497 7507 +eduread 1 1 6.957497 6.957497 7508 +cntain 1 1 6.957497 6.957497 7509 +theread 1 1 6.957497 6.957497 7510 +speedwai 1 1 6.957497 6.957497 7511 +dobi 1 1 6.957497 6.957497 7512 +mall 1 1 6.957497 6.957497 7513 +callthem 1 1 6.957497 6.957497 7514 +compressionr 1 1 6.957497 6.957497 7515 +wallac 1 1 6.957497 6.957497 7516 +gall 1 1 6.957497 6.957497 7517 +anastassi 1 1 6.957497 6.957497 7518 +digitaltelevis 1 1 6.957497 6.957497 7519 +serversoverview 1 1 6.957497 6.957497 7520 +serverdesign 1 1 6.957497 6.957497 7521 +chiueh 1 1 6.957497 6.957497 7522 +groupedsweep 1 1 6.957497 6.957497 7523 +ofthird 1 1 6.957497 6.957497 7524 +narasimha 1 1 6.957497 6.957497 7525 +wylli 1 1 6.957497 6.957497 7526 +admissioncontrol 1 1 6.957497 6.957497 7527 +designinglarg 1 1 6.957497 6.957497 7528 +inmultimedia 1 1 6.957497 6.957497 7529 +interactivevideo 1 1 6.957497 6.957497 7530 +playout 1 1 6.957497 6.957497 7531 +shahabuddin 1 1 6.957497 6.957497 7532 +foran 1 1 6.957497 6.957497 7533 +demandvideo 1 1 6.957497 6.957497 7534 +papadimitri 1 1 6.957497 6.957497 7535 +ramanathan 1 1 6.957497 6.957497 7536 +informationcach 1 1 6.957497 6.957497 7537 +homeentertain 1 1 6.957497 6.957497 7538 +multimedianetwork 1 1 6.957497 6.957497 7539 +ferrari 1 1 6.957497 6.957497 7540 +channelestablish 1 1 6.957497 6.957497 7541 +areasin 1 1 6.957497 6.957497 7542 +servicedisciplin 1 1 6.957497 6.957497 7543 +workshopon 1 1 6.957497 6.957497 7544 +losslesssmooth 1 1 6.957497 6.957497 7545 +salehi 1 1 6.957497 6.957497 7546 +kuros 1 1 6.957497 6.957497 7547 +towslei 1 1 6.957497 6.957497 7548 +storedvideo 1 1 6.957497 6.957497 7549 +requirementsthrough 1 1 6.957497 6.957497 7550 +grossglaus 1 1 6.957497 6.957497 7551 +rcbr 1 1 6.957497 6.957497 7552 +efficientservic 1 1 6.957497 6.957497 7553 +kanakia 1 1 6.957497 6.957497 7554 +reibman 1 1 6.957497 6.957497 7555 +congestioncontrol 1 1 6.957497 6.957497 7556 +tennenhous 1 1 6.957497 6.957497 7557 +newgener 1 1 6.957497 6.957497 7558 +hutchison 1 1 6.957497 6.957497 7559 +servicearchitectur 1 1 6.957497 6.957497 7560 +turner 1 1 6.957497 6.957497 7561 +reliablemulticast 1 1 6.957497 6.957497 7562 +levelfram 1 1 6.957497 6.957497 7563 +deffner 1 1 6.957497 6.957497 7564 +schulzrinn 1 1 6.957497 6.957497 7565 +blakowski 1 1 6.957497 6.957497 7566 +onselect 1 1 6.957497 6.957497 7567 +januaryoper 1 1 6.957497 6.957497 7568 +multimediag 1 1 6.957497 6.957497 7569 +robin 1 1 6.957497 6.957497 7570 +blair 1 1 6.957497 6.957497 7571 +papathoma 1 1 6.957497 6.957497 7572 +choru 1 1 6.957497 6.957497 7573 +druschel 1 1 6.957497 6.957497 7574 +abbott 1 1 6.957497 6.957497 7575 +pagel 1 1 6.957497 6.957497 7576 +systemssupport 1 1 6.957497 6.957497 7577 +conferencingh 1 1 6.957497 6.957497 7578 +venkatrangan 1 1 6.957497 6.957497 7579 +packetvideo 1 1 6.957497 6.957497 7580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html new file mode 100644 index 00000000..8bba4ccd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vin^cs395t.html @@ -0,0 +1,297 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 8 443 0.693147 5.545176 6 +inform 3 412 0.693147 2.079441 8 +research 1 431 0.693147 0.693147 10 +cours 8 273 1.098612 8.788896 15 +time 3 293 1.098612 3.295836 17 +offic 2 299 1.098612 2.197224 13 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +design 5 213 1.386294 6.931470 25 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +oper 3 180 1.609438 4.828314 34 +paper 3 205 1.609438 4.828314 38 +fall 2 181 1.609438 3.218876 40 +list 2 201 1.609438 3.218876 39 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +read 3 154 1.791759 5.375277 47 +base 3 165 1.791759 5.375277 50 +applic 2 170 1.791759 3.583518 56 +hour 1 165 1.791759 1.791759 46 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +support 2 132 1.945910 3.891820 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +databas 6 122 2.079442 12.476652 86 +schedul 4 119 2.079442 8.317768 85 +report 3 131 2.079442 6.238326 92 +confer 2 126 2.079442 4.158884 100 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +high 1 130 2.079442 2.079442 101 +seattl 1 120 2.079442 2.079442 103 +instructor 2 108 2.197225 4.394450 107 +topic 2 114 2.197225 4.394450 110 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +technic 3 100 2.302585 6.907755 140 +advanc 1 99 2.302585 2.302585 130 +proceed 17 93 2.397895 40.764215 152 +real 5 93 2.397895 11.989475 144 +commun 3 95 2.397895 7.193685 157 +pictur 3 89 2.397895 7.193685 160 +present 2 91 2.397895 4.795790 145 +grade 2 90 2.397895 4.795790 142 +imag 2 91 2.397895 4.795790 161 +octob 1 89 2.397895 2.397895 156 +requir 3 81 2.484907 7.454721 167 +internet 3 83 2.484907 7.454721 186 +novemb 3 81 2.484907 7.454721 179 +resourc 2 81 2.484907 4.969814 172 +build 1 85 2.484907 2.484907 184 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +symposium 2 72 2.639057 5.278114 238 +servic 1 72 2.639057 2.639057 236 +meet 1 72 2.639057 2.639057 229 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +multimedia 10 68 2.708050 27.080500 258 +receiv 1 66 2.708050 2.708050 244 +simul 1 66 2.708050 2.708050 255 +august 1 66 2.708050 2.708050 257 +descript 2 64 2.772589 5.545178 271 +interact 2 62 2.772589 5.545178 270 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +share 5 59 2.833213 14.166065 304 +content 1 59 2.833213 2.833213 302 +locat 1 59 2.833213 2.833213 303 +semest 1 58 2.890372 2.890372 312 +processor 2 54 2.944439 5.888878 335 +februari 1 54 2.944439 2.944439 328 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +textbook 2 44 3.135494 6.270988 397 +protocol 2 45 3.135494 6.270988 407 +video 2 44 3.135494 6.270988 405 +fridai 1 44 3.135494 3.135494 390 +made 1 44 3.135494 3.135494 398 +discuss 1 45 3.135494 3.135494 399 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +transact 2 39 3.258097 6.516194 438 +submit 1 39 3.258097 3.258097 440 +paul 1 38 3.295837 3.295837 471 +cost 1 37 3.332205 3.332205 480 +connect 1 37 3.332205 3.332205 485 +tree 2 36 3.367296 6.734592 492 +multi 1 36 3.367296 3.367296 493 +ofth 1 36 3.367296 3.367296 491 +queri 3 33 3.433987 10.301961 524 +articl 1 33 3.433987 3.433987 530 +concept 1 32 3.465736 3.465736 537 +collabor 1 32 3.465736 3.465736 543 +extend 1 32 3.465736 3.465736 539 +titl 1 31 3.496508 3.496508 556 +domain 1 30 3.555348 3.555348 564 +particip 1 29 3.583519 3.583519 589 +semant 1 29 3.583519 3.583519 587 +framework 1 28 3.610918 3.610918 606 +retriev 1 27 3.637586 3.637586 621 +berkelei 1 26 3.688879 3.688879 657 +reliabl 2 25 3.737670 7.475340 674 +scalabl 2 24 3.761200 7.522400 705 +frame 1 24 3.761200 3.761200 684 +mobil 1 23 3.806662 3.806662 730 +emphasi 1 22 3.850148 3.850148 755 +self 1 22 3.850148 3.850148 761 +color 1 22 3.850148 3.850148 762 +rout 8 21 3.912023 31.296184 793 +scheme 1 20 3.951244 3.951244 818 +boston 5 19 4.007333 20.036665 862 +prerequisit 1 19 4.007333 4.007333 846 +anderson 1 19 4.007333 4.007333 860 +media 1 19 4.007333 4.007333 861 +predict 1 19 4.007333 4.007333 855 +stand 1 18 4.060443 4.060443 891 +letter 1 16 4.174387 4.174387 981 +vector 1 16 4.174387 4.174387 961 +diego 1 16 4.174387 4.174387 992 +zhang 1 16 4.174387 4.174387 980 +latenc 1 16 4.174387 4.174387 993 +hierarch 1 15 4.248495 4.248495 1018 +francisco 3 14 4.317488 12.952464 1095 +audio 1 14 4.317488 4.317488 1094 +gupta 2 12 4.465908 8.931816 1241 +replic 1 12 4.465908 4.465908 1231 +shape 1 12 4.465908 4.465908 1245 +clock 1 11 4.553877 4.553877 1320 +packet 2 10 4.653960 9.307920 1415 +queue 1 10 4.653960 4.653960 1386 +sosp 1 10 4.653960 4.653960 1416 +familiar 1 9 4.753590 4.753590 1485 +inter 1 9 4.753590 4.753590 1530 +distanc 1 9 4.753590 4.753590 1500 +light 1 9 4.753590 4.753590 1533 +osdi 1 9 4.753590 4.753590 1534 +face 1 9 4.753590 4.753590 1501 +paradigm 2 8 4.875197 9.750394 1662 +transport 1 8 4.875197 4.875197 1672 +parti 1 8 4.875197 4.875197 1676 +textur 1 8 4.875197 4.875197 1677 +conferenc 4 7 5.010635 20.042540 1857 +harrick 3 7 5.010635 15.031905 1849 +core 1 7 5.010635 5.010635 1809 +channel 1 7 5.010635 5.010635 1836 +determinist 1 6 5.164786 5.164786 2034 +onoper 1 6 5.164786 5.164786 2048 +multicast 10 5 5.347108 53.471080 2305 +sigcomm 7 5 5.347108 37.429756 2329 +jain 2 5 5.347108 10.694216 2332 +fair 1 5 5.347108 5.347108 2333 +consum 1 5 5.347108 5.347108 2334 +multimediasystem 1 4 5.568345 5.568345 2701 +venkat 1 4 5.568345 5.568345 2702 +andevalu 1 4 5.568345 5.568345 2706 +floyd 1 4 5.568345 5.568345 2682 +theacm 1 4 5.568345 5.568345 2698 +ofinform 1 4 5.568345 5.568345 2707 +bach 1 4 5.568345 5.568345 2708 +mccann 2 3 5.857933 11.715866 3273 +cheriton 2 3 5.857933 11.715866 3259 +infocom 2 3 5.857933 11.715866 3283 +weihl 2 3 5.857933 11.715866 3284 +synopsi 1 3 5.857933 5.857933 3260 +networkprotocol 1 3 5.857933 5.857933 3285 +requirementsstud 1 3 5.857933 5.857933 3116 +aswel 1 3 5.857933 5.857933 3286 +kistler 1 3 5.857933 5.857933 3267 +rangan 1 3 5.857933 5.857933 3270 +franci 1 3 5.857933 5.857933 3287 +singhal 1 3 5.857933 5.857933 3098 +axiomat 1 3 5.857933 5.857933 3288 +how 1 3 5.857933 5.857933 3289 +moran 1 3 5.857933 5.857933 3151 +nguyen 1 3 5.857933 5.857933 3290 +nearbi 1 3 5.857933 5.857933 3291 +ofoper 1 3 5.857933 5.857933 3292 +proport 1 3 5.857933 5.857933 3293 +qbic 1 3 5.857933 5.857933 3294 +acmmultimedia 3 2 6.263398 18.790194 4338 +jacobson 2 2 6.263398 12.526796 4324 +deer 2 2 6.263398 12.526796 4356 +jeffai 2 2 6.263398 12.526796 4357 +prerequisitesgradu 1 2 6.263398 6.263398 4325 +formultimedia 1 2 6.263398 6.263398 4352 +andresearch 1 2 6.263398 6.263398 4247 +theinstructor 1 2 6.263398 6.263398 4298 +studentsenrol 1 2 6.263398 6.263398 4073 +redel 1 2 6.263398 6.263398 4358 +zellweg 1 2 6.263398 6.263398 4353 +swinehart 1 2 6.263398 6.263398 4354 +etherphon 1 2 6.263398 6.263398 4355 +lan 1 2 6.263398 6.263398 4359 +computersystem 1 2 6.263398 6.263398 4360 +mbone 1 2 6.263398 6.263398 4361 +shenker 1 2 6.263398 6.263398 4340 +acmsigcomm 1 2 6.263398 6.263398 4345 +resourcemanag 1 2 6.263398 6.263398 4266 +govindan 1 2 6.263398 6.263398 4350 +forcontinu 1 2 6.263398 6.263398 4351 +monterei 1 2 6.263398 6.263398 4362 +timeoper 1 2 6.263398 6.263398 4363 +niblack 1 2 6.263398 6.263398 4364 +managementsystem 1 2 6.263398 6.263398 4365 +knowledgeand 1 2 6.263398 6.263398 4366 +onveri 1 2 6.263398 6.263398 4367 +crowcroft 2 1 6.957497 13.914994 7581 +warldersburg 2 1 6.957497 13.914994 7582 +synopsisthi 1 1 6.957497 6.957497 7583 +bediscuss 1 1 6.957497 6.957497 7584 +andmultimedia 1 1 6.957497 6.957497 7585 +multimediadatabas 1 1 6.957497 6.957497 7586 +determinedbas 1 1 6.957497 6.957497 7587 +orcarri 1 1 6.957497 6.957497 7588 +hoursfridai 1 1 6.957497 6.957497 7589 +flexibleframework 1 1 6.957497 6.957497 7590 +handlei 1 1 6.957497 6.957497 7591 +wakeman 1 1 6.957497 6.957497 7592 +controlchannel 1 1 6.957497 6.957497 7593 +cccp 1 1 6.957497 6.957497 7594 +conferencecontrol 1 1 6.957497 6.957497 7595 +gajewska 1 1 6.957497 6.957497 7596 +manass 1 1 6.957497 6.957497 7597 +argo 1 1 6.957497 6.957497 7598 +systemfor 1 1 6.957497 6.957497 7599 +gong 1 1 6.957497 6.957497 7600 +multipoint 1 1 6.957497 6.957497 7601 +basedmultimedia 1 1 6.957497 6.957497 7602 +ieeecomput 1 1 6.957497 6.957497 7603 +datagraminternetwork 1 1 6.957497 6.957497 7604 +ballardi 1 1 6.957497 6.957497 7605 +thyagarajan 1 1 6.957497 6.957497 7606 +widyono 1 1 6.957497 6.957497 7607 +msthesi 1 1 6.957497 6.957497 7608 +kompella 1 1 6.957497 6.957497 7609 +pasqual 1 1 6.957497 6.957497 7610 +polyzo 1 1 6.957497 6.957497 7611 +multimediacommun 1 1 6.957497 6.957497 7612 +weightsess 1 1 6.957497 6.957497 7613 +ofacm 1 1 6.957497 6.957497 7614 +holbrook 1 1 6.957497 6.957497 7615 +fordistribut 1 1 6.957497 6.957497 7616 +herzog 1 1 6.957497 6.957497 7617 +estrin 1 1 6.957497 6.957497 7618 +timecommun 1 1 6.957497 6.957497 7619 +servicesj 1 1 6.957497 6.957497 7620 +guyton 1 1 6.957497 6.957497 7621 +schwartz 1 1 6.957497 6.957497 7622 +mogul 1 1 6.957497 6.957497 7623 +forpersist 1 1 6.957497 6.957497 7624 +supportc 1 1 6.957497 6.957497 7625 +lotteri 1 1 6.957497 6.957497 7626 +flexibleproport 1 1 6.957497 6.957497 7627 +mangement 1 1 6.957497 6.957497 7628 +strideschedul 1 1 6.957497 6.957497 7629 +golestani 1 1 6.957497 6.957497 7630 +speedappl 1 1 6.957497 6.957497 7631 +timeproduc 1 1 6.957497 6.957497 7632 +ofeffici 1 1 6.957497 6.957497 7633 +sigapp 1 1 6.957497 6.957497 7634 +intim 1 1 6.957497 6.957497 7635 +databasesw 1 1 6.957497 6.957497 7636 +contentus 1 1 6.957497 6.957497 7637 +cawkel 1 1 6.957497 6.957497 7638 +weymouth 1 1 6.957497 6.957497 7639 +vimsi 1 1 6.957497 6.957497 7640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html new file mode 100644 index 00000000..f4c11681 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^vlr^f96.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +home 6 672 0.000000 0.000000 1 +work 6 380 0.693147 4.158882 9 +time 6 293 1.098612 6.591672 17 +cours 3 273 1.098612 3.295836 15 +last 2 314 1.098612 2.197224 14 +us 1 329 1.098612 1.098612 16 +updat 3 191 1.609438 4.828314 41 +fall 2 181 1.609438 3.218876 40 +algorithm 2 162 1.791759 3.583518 57 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +problem 4 147 1.945910 7.783640 75 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +number 1 130 2.079442 2.079442 97 +final 4 116 2.197225 8.788900 108 +theori 2 111 2.197225 4.394450 127 +instructor 1 108 2.197225 2.197225 107 +well 1 109 2.197225 2.197225 121 +structur 1 106 2.197225 2.197225 105 +pleas 1 113 2.197225 2.197225 114 +techniqu 2 99 2.302585 4.605170 138 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +question 6 91 2.397895 14.387370 141 +exam 4 86 2.484907 9.939628 169 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +decemb 2 80 2.564949 5.129898 215 +refer 1 78 2.564949 2.564949 203 +mondai 1 77 2.564949 2.564949 206 +receiv 2 66 2.708050 5.416100 244 +differ 1 66 2.708050 2.708050 253 +handout 2 64 2.772589 5.545178 263 +descript 1 64 2.772589 2.772589 271 +think 1 57 2.890372 2.890372 314 +instruct 1 53 2.944439 2.944439 332 +case 1 51 2.995732 2.995732 351 +right 2 48 3.044522 6.089044 363 +set 1 50 3.044522 3.044522 361 +answer 5 45 3.135494 15.677470 391 +fridai 1 44 3.135494 3.135494 390 +littl 1 39 3.258097 3.258097 454 +respons 1 37 3.332205 3.332205 476 +word 1 34 3.401197 3.401197 508 +either 1 35 3.401197 3.401197 506 +chapter 1 32 3.465736 3.465736 536 +quot 1 29 3.583519 3.583519 582 +known 1 24 3.761200 3.761200 702 +size 2 23 3.806662 7.613324 713 +inth 1 22 3.850148 3.850148 741 +sent 1 22 3.850148 3.850148 763 +cycl 2 11 4.553877 9.107754 1335 +true 4 10 4.653960 18.615840 1422 +sentenc 3 10 4.653960 13.961880 1413 +total 2 10 4.653960 9.307920 1398 +label 2 10 4.653960 9.307920 1423 +equal 1 10 4.653960 4.653960 1424 +pose 1 9 4.753590 4.753590 1535 +face 1 9 4.753590 4.753590 1501 +entri 1 8 4.875197 4.875197 1678 +largest 2 7 5.010635 10.021270 1858 +meant 1 6 5.164786 5.164786 2055 +vertic 2 5 5.347108 10.694216 2270 +worst 1 5 5.347108 5.347108 2287 +vijaya 1 4 5.568345 5.568345 2677 +disjoint 1 4 5.568345 5.568345 2709 +denot 1 3 5.857933 5.857933 3147 +omega 3 2 6.263398 18.790194 4368 +sigma 1 2 6.263398 6.263398 4369 +amort 1 2 6.263398 6.263398 4370 +submatrix 3 1 6.957497 20.872491 7641 +ramachandranuniqu 1 1 6.957497 6.957497 7642 +onsigma 1 1 6.957497 6.957497 7643 +oroth 1 1 6.957497 6.957497 7644 +isther 1 1 6.957497 6.957497 7645 +paragraphof 1 1 6.957497 6.957497 7646 +containdistinct 1 1 6.957497 6.957497 7647 +cancontain 1 1 6.957497 6.957497 7648 +unclear 1 1 6.957497 6.957497 7649 +somek 1 1 6.957497 6.957497 7650 +bepost 1 1 6.957497 6.957497 7651 +youhav 1 1 6.957497 6.957497 7652 +yourbest 1 1 6.957497 6.957497 7653 +judgment 1 1 6.957497 6.957497 7654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html new file mode 100644 index 00000000..d90d3675 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^wilson^cs345.html @@ -0,0 +1,228 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +system 9 443 0.693147 6.238323 6 +program 6 374 0.693147 4.158882 7 +work 1 380 0.693147 0.693147 9 +us 7 329 1.098612 7.690284 16 +cours 6 273 1.098612 6.591672 15 +languag 4 227 1.386294 5.545176 26 +also 3 259 1.386294 4.158882 28 +gener 1 220 1.386294 1.386294 27 +class 8 199 1.609438 12.875504 37 +includ 2 208 1.609438 3.218876 42 +public 2 202 1.609438 3.218876 43 +list 1 201 1.609438 1.609438 39 +implement 8 152 1.791759 14.334072 52 +read 4 154 1.791759 7.167036 47 +base 2 165 1.791759 3.583518 50 +avail 2 169 1.791759 3.583518 48 +data 1 170 1.791759 1.791759 49 +object 8 138 1.945910 15.567280 79 +note 7 142 1.945910 13.621370 67 +first 3 140 1.945910 5.837730 71 +assign 3 135 1.945910 5.837730 66 +construct 2 139 1.945910 3.891820 82 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +machin 8 129 2.079442 16.635536 95 +document 3 121 2.079442 6.238326 89 +version 10 113 2.197225 21.972250 122 +code 5 108 2.197225 10.986125 116 +make 4 111 2.197225 8.788900 120 +find 2 111 2.197225 4.394450 111 +place 2 106 2.197225 4.394450 124 +look 2 107 2.197225 4.394450 115 +well 1 109 2.197225 2.197225 121 +structur 1 106 2.197225 2.197225 105 +take 3 97 2.302585 6.907755 134 +text 2 98 2.302585 4.605170 133 +question 3 91 2.397895 7.193685 141 +section 2 94 2.397895 4.795790 149 +pictur 2 89 2.397895 4.795790 160 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +thing 4 84 2.484907 9.939628 189 +chang 2 82 2.484907 4.969814 163 +start 2 83 2.484907 4.969814 173 +learn 2 86 2.484907 4.969814 170 +internet 2 83 2.484907 4.969814 186 +stuff 1 87 2.484907 2.484907 171 +help 1 83 2.484907 2.484907 175 +second 1 81 2.484907 2.484907 166 +solut 1 82 2.484907 2.484907 162 +homework 2 79 2.564949 5.129898 193 +orient 2 80 2.564949 5.129898 205 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +free 4 73 2.639057 10.556228 224 +html 3 75 2.639057 7.917171 235 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +logic 1 71 2.639057 2.639057 230 +practic 3 70 2.708050 8.124150 246 +test 2 66 2.708050 5.416100 252 +main 2 67 2.708050 5.416100 256 +window 2 68 2.708050 5.416100 242 +syllabu 1 67 2.708050 2.708050 247 +interact 1 62 2.772589 2.772589 270 +simpl 3 60 2.833213 8.499639 298 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +unix 4 58 2.890372 11.561488 308 +browser 2 56 2.890372 5.780744 313 +sever 2 56 2.890372 5.780744 322 +variou 2 56 2.890372 5.780744 317 +index 1 56 2.890372 2.890372 309 +reason 1 57 2.890372 2.890372 318 +cover 2 55 2.944439 5.888878 329 +suggest 1 53 2.944439 2.944439 331 +allow 1 53 2.944439 2.944439 333 +three 1 54 2.944439 2.944439 330 +run 9 51 2.995732 26.961588 347 +standard 3 48 3.044522 9.133566 365 +format 1 48 3.044522 3.044522 356 +get 2 46 3.091042 6.182084 380 +featur 1 46 3.091042 3.091042 386 +answer 6 45 3.135494 18.812964 391 +describ 2 45 3.135494 6.270988 400 +mark 2 44 3.135494 6.270988 403 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +show 1 43 3.178054 3.178054 417 +might 2 41 3.218876 6.437752 426 +past 1 42 3.218876 3.218876 428 +error 2 40 3.258097 6.516194 449 +tutori 1 39 3.258097 3.258097 437 +author 1 39 3.258097 3.258097 450 +littl 1 39 3.258097 3.258097 454 +correct 1 38 3.295837 3.295837 462 +especi 2 36 3.367296 6.734592 496 +procedur 1 36 3.367296 3.367296 488 +chapter 2 32 3.465736 6.931472 536 +express 1 32 3.465736 3.465736 540 +kind 1 32 3.465736 3.465736 541 +someth 2 31 3.496508 6.993016 554 +anim 1 31 3.496508 3.496508 557 +abl 2 30 3.555348 7.110696 566 +actual 1 28 3.610918 3.610918 604 +linux 2 27 3.637586 7.275172 631 +comp 4 26 3.688879 14.755516 650 +subject 1 26 3.688879 3.688879 647 +rather 1 26 3.688879 3.688879 642 +brows 2 23 3.806662 7.613324 726 +recommend 4 22 3.850148 15.400592 737 +instal 2 22 3.850148 7.700296 754 +sort 1 22 3.850148 3.850148 738 +self 1 22 3.850148 3.850148 761 +newsgroup 2 21 3.912023 7.824046 783 +theorem 1 21 3.912023 3.912023 786 +scheme 43 20 3.951244 169.903492 818 +sure 5 20 3.951244 19.756220 813 +definit 1 19 4.007333 4.007333 864 +lot 2 18 4.060443 8.120886 889 +along 1 18 4.060443 4.060443 878 +repositori 10 17 4.110874 41.108740 932 +regular 1 17 4.110874 4.110874 929 +quiz 2 16 4.174387 8.348774 990 +explan 2 16 4.174387 8.348774 985 +advantag 2 16 4.174387 8.348774 987 +later 4 15 4.248495 16.993980 1043 +pagec 1 15 4.248495 4.248495 1011 +goe 1 15 4.248495 4.248495 1044 +command 2 14 4.317488 8.634976 1083 +convent 1 14 4.317488 4.317488 1072 +draw 1 14 4.317488 4.317488 1086 +essenti 1 13 4.382027 4.382027 1137 +prolog 1 13 4.382027 4.382027 1155 +solari 4 12 4.465908 17.863632 1238 +reader 1 12 4.465908 4.465908 1246 +calculu 1 12 4.465908 4.465908 1203 +rice 2 11 4.553877 9.107754 1336 +see 1 11 4.553877 4.553877 1337 +instanc 1 11 4.553877 4.553877 1322 +subset 1 10 4.653960 4.653960 1425 +wilson 2 9 4.753590 9.507180 1536 +classifi 1 9 4.753590 4.753590 1537 +latter 1 9 4.753590 4.753590 1522 +illustr 2 8 4.875197 9.750394 1679 +marc 2 8 4.875197 9.750394 1680 +besid 2 8 4.875197 9.750394 1681 +reload 1 8 4.875197 4.875197 1682 +prover 1 8 4.875197 4.875197 1653 +feelei 4 7 5.010635 20.042540 1859 +sparc 2 7 5.010635 10.021270 1860 +bunch 2 7 5.010635 10.021270 1861 +exactli 1 7 5.010635 5.010635 1817 +merg 1 7 5.010635 5.010635 1862 +whichi 2 6 5.164786 10.329572 2056 +indiana 2 6 5.164786 10.329572 2057 +grammar 1 6 5.164786 5.164786 2058 +lang 4 5 5.347108 21.388432 2294 +default 2 5 5.347108 10.694216 2335 +pagethi 1 5 5.347108 5.347108 2336 +button 1 5 5.347108 5.347108 2337 +ahead 1 5 5.347108 5.347108 2338 +hardcopi 1 5 5.347108 5.347108 2246 +proposit 1 5 5.347108 5.347108 2339 +patch 2 4 5.568345 11.136690 2710 +devot 2 4 5.568345 11.136690 2711 +backward 1 4 5.568345 5.568345 2638 +chain 1 4 5.568345 5.568345 2712 +rscheme 12 3 5.857933 70.295196 3250 +inherit 2 3 5.857933 11.715866 3122 +qing 2 3 5.857933 11.715866 3295 +gambit 2 3 5.857933 11.715866 3227 +outof 1 3 5.857933 5.857933 3296 +ters 1 3 5.857933 5.857933 3297 +theoremprov 1 3 5.857933 5.857933 3298 +donovan 2 2 6.263398 12.526796 4371 +kolbl 2 2 6.263398 12.526796 4372 +youcan 2 2 6.263398 12.526796 4373 +indent 1 2 6.263398 6.263398 4374 +subtyp 1 2 6.263398 6.263398 4375 +meroon 4 1 6.957497 27.829988 7655 +runschem 2 1 6.957497 13.914994 7656 +orani 2 1 6.957497 13.914994 7657 +andinstal 2 1 6.957497 13.914994 7658 +itfrom 2 1 6.957497 13.914994 7659 +friendlier 2 1 6.957497 13.914994 7660 +fornewbi 2 1 6.957497 13.914994 7661 +gettinggambit 2 1 6.957497 13.914994 7662 +bestschem 2 1 6.957497 13.914994 7663 +guil 2 1 6.957497 13.914994 7664 +mzscheme 2 1 6.957497 13.914994 7665 +doingobject 2 1 6.957497 13.914994 7666 +tous 2 1 6.957497 13.914994 7667 +freeimplement 2 1 6.957497 13.914994 7668 +getinterest 2 1 6.957497 13.914994 7669 +paulwilson 1 1 6.957497 6.957497 7670 +yourbrows 1 1 6.957497 6.957497 7671 +mostrec 1 1 6.957497 6.957497 7672 +ondeclar 1 1 6.957497 6.957497 7673 +arereason 1 1 6.957497 6.957497 7674 +willchang 1 1 6.957497 6.957497 7675 +islik 1 1 6.957497 6.957497 7676 +adventur 1 1 6.957497 6.957497 7677 +usinga 1 1 6.957497 6.957497 7678 +throughchapt 1 1 6.957497 6.957497 7679 +sanoth 1 1 6.957497 6.957497 7680 +thanprint 1 1 6.957497 6.957497 7681 +weget 1 1 6.957497 6.957497 7682 +onlinebrows 1 1 6.957497 6.957497 7683 +coursenot 1 1 6.957497 6.957497 7684 +miscellanousfunct 1 1 6.957497 6.957497 7685 +shouldconsult 1 1 6.957497 6.957497 7686 +itsens 1 1 6.957497 6.957497 7687 +andnot 1 1 6.957497 6.957497 7688 +setofrul 1 1 6.957497 6.957497 7689 +ofanim 1 1 6.957497 6.957497 7690 +simpleobject 1 1 6.957497 6.957497 7691 +metaclass 1 1 6.957497 6.957497 7692 +circular 1 1 6.957497 6.957497 7693 +onclass 1 1 6.957497 6.957497 7694 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html new file mode 100644 index 00000000..c90a698c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^xfeng^cs105^cs105.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +cours 3 273 1.098612 3.295836 15 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +utexa 5 189 1.609438 8.047190 44 +class 4 199 1.609438 6.437752 37 +fall 2 181 1.609438 3.218876 40 +group 1 183 1.609438 1.609438 36 +austin 2 168 1.791759 3.583518 63 +contact 2 153 1.791759 3.583518 59 +hour 1 165 1.791759 1.791759 46 +texa 1 160 1.791759 1.791759 64 +file 9 132 1.945910 17.513190 70 +note 4 142 1.945910 7.783640 67 +model 3 145 1.945910 5.837730 69 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +postscript 7 131 2.079442 14.556094 90 +welcom 1 122 2.079442 2.079442 99 +provid 1 121 2.079442 2.079442 94 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +need 1 98 2.302585 2.302585 135 +section 3 94 2.397895 7.193685 149 +homepag 1 93 2.397895 2.397895 148 +solut 14 82 2.484907 34.788698 162 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +homework 16 79 2.564949 41.039184 193 +sourc 1 77 2.564949 2.564949 201 +mondai 1 77 2.564949 2.564949 206 +tuesdai 2 73 2.639057 5.278114 219 +test 2 66 2.708050 5.416100 252 +thursdai 2 70 2.708050 5.416100 241 +view 1 70 2.708050 2.708050 254 +new 2 64 2.772589 5.545178 262 +guid 1 63 2.772589 2.772589 267 +locat 1 59 2.833213 2.833213 303 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +tabl 1 51 2.995732 2.995732 346 +maintain 1 51 2.995732 2.995732 342 +format 3 48 3.044522 9.133566 356 +possibl 1 47 3.091042 3.091042 378 +midterm 2 45 3.135494 6.270988 392 +fridai 1 44 3.135494 3.135494 390 +answer 1 45 3.135494 3.135494 391 +made 1 44 3.135494 3.135494 398 +review 4 42 3.218876 12.875504 425 +announc 1 40 3.258097 3.258097 441 +realli 1 40 3.258097 3.258097 444 +slide 3 38 3.295837 9.887511 467 +sciencesunivers 1 37 3.332205 3.332205 486 +download 1 36 3.367296 3.367296 489 +print 2 34 3.401197 6.802394 503 +taught 1 33 3.433987 3.433987 526 +linux 1 27 3.637586 3.637586 631 +session 5 26 3.688879 18.444395 643 +half 1 21 3.912023 3.912023 776 +wang 1 21 3.912023 3.912023 790 +exercis 1 19 4.007333 4.007333 842 +weekli 2 17 4.110874 8.221748 919 +adam 1 17 4.110874 4.110874 934 +modif 1 17 4.110874 4.110874 913 +georg 1 16 4.174387 4.174387 994 +score 1 15 4.248495 4.248495 1017 +station 2 13 4.382027 8.764054 1157 +kumar 5 9 4.753590 23.767950 1506 +surpris 1 7 5.010635 5.010635 1828 +tip 1 7 5.010635 5.010635 1863 +ajit 2 3 5.857933 11.715866 3299 +feng 2 3 5.857933 11.715866 3300 +warren 1 3 5.857933 5.857933 3301 +edudepart 1 3 5.857933 5.857933 3302 +xfeng 2 2 6.263398 12.526796 4376 +natarajan 1 2 6.263398 6.263398 4377 +decimalinteg 2 1 6.957497 13.914994 7695 +hexinteg 2 1 6.957497 13.914994 7696 +octalinteg 2 1 6.957497 13.914994 7697 +xunnow 1 1 6.957497 6.957497 7698 +homeworksreview 1 1 6.957497 6.957497 7699 +slidesth 1 1 6.957497 6.957497 7700 +onlineif 1 1 6.957497 6.957497 7701 +updatedhomework 1 1 6.957497 6.957497 7702 +filemidterm 1 1 6.957497 6.957497 7703 +webta 1 1 6.957497 6.957497 7704 +timetableta 1 1 6.957497 6.957497 7705 +guana 1 1 6.957497 6.957497 7706 +eduxun 1 1 6.957497 6.957497 7707 +wordlist 1 1 6.957497 6.957497 7708 +wwang 1 1 6.957497 6.957497 7709 +afternoon 1 1 6.957497 6.957497 7710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ new file mode 100644 index 00000000..c52a694e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yangyang^cs352^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 3 374 0.693147 2.079441 7 +system 1 443 0.693147 0.693147 6 +offic 3 299 1.098612 3.295836 13 +email 2 220 1.386294 2.772588 29 +softwar 1 220 1.386294 1.386294 30 +utexa 3 189 1.609438 4.828314 44 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +group 1 183 1.609438 1.609438 36 +hour 3 165 1.791759 5.375277 46 +contact 2 153 1.791759 3.583518 59 +assign 3 135 1.945910 5.837730 66 +file 2 132 1.945910 3.891820 70 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +section 1 94 2.397895 2.397895 149 +comment 1 93 2.397895 2.397895 146 +solut 6 82 2.484907 14.909442 162 +resourc 1 81 2.484907 2.484907 172 +syllabu 1 67 2.708050 2.708050 247 +august 1 66 2.708050 2.708050 257 +new 1 64 2.772589 2.772589 262 +creat 1 63 2.772589 2.772589 277 +statist 4 35 3.401197 13.604788 521 +print 2 34 3.401197 6.802394 503 +station 2 13 4.382027 8.764054 1157 +classmat 1 9 4.753590 4.753590 1516 +yang 4 8 4.875197 19.500788 1652 +pagei 1 8 4.875197 4.875197 1683 +schwetman 2 1 6.957497 13.914994 7711 +mesquit 2 1 6.957497 13.914994 7712 +yangyang 2 1 6.957497 13.914994 7713 +herb 1 1 6.957497 6.957497 7714 +appointmentcontact 1 1 6.957497 6.957497 7715 +statisticsassign 1 1 6.957497 6.957497 7716 +asga 1 1 6.957497 6.957497 7717 +statisticsyour 1 1 6.957497 6.957497 7718 +gradesect 1 1 6.957497 6.957497 7719 +microsparc 1 1 6.957497 6.957497 7720 +datasheetonlin 1 1 6.957497 6.957497 7721 +ruiliu 1 1 6.957497 6.957497 7722 +postmessag 1 1 6.957497 6.957497 7723 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html new file mode 100644 index 00000000..7040d1ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.utexas.edu^users^yufeng^cs378.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +class 3 199 1.609438 4.828314 37 +group 3 183 1.609438 4.828314 36 +utexa 2 189 1.609438 3.218876 44 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +hour 2 165 1.791759 3.583518 46 +read 2 154 1.791759 3.583518 47 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +file 1 132 1.945910 1.945910 70 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +schedul 2 119 2.079442 4.158884 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +grade 1 90 2.397895 2.397895 142 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +homework 6 79 2.564949 15.389694 193 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +java 1 70 2.708050 2.708050 248 +handout 13 64 2.772589 36.043657 263 +descript 1 64 2.772589 2.772589 271 +sampl 1 53 2.944439 2.944439 339 +protocol 3 45 3.135494 9.406482 407 +http 2 41 3.218876 6.437752 420 +tutori 1 39 3.258097 3.258097 437 +platform 1 29 3.583519 3.583519 591 +turn 1 29 3.583519 3.583519 586 +comp 1 26 3.688879 3.688879 650 +background 1 25 3.737670 3.737670 664 +mobil 1 23 3.806662 3.806662 730 +newsgroup 2 21 3.912023 7.824046 783 +rout 1 21 3.912023 3.912023 793 +alloc 1 20 3.951244 3.951244 821 +prerequisit 1 19 4.007333 4.007333 846 +configur 1 15 4.248495 4.248495 1012 +draft 2 14 4.317488 8.634976 1085 +station 1 13 4.382027 4.382027 1157 +individu 1 13 4.382027 4.382027 1126 +ring 1 8 4.875197 4.875197 1684 +digest 1 7 5.010635 5.010635 1864 +multicast 1 5 5.347108 5.347108 2305 +authent 1 5 5.347108 5.347108 2306 +edufing 1 4 5.568345 5.568345 2713 +csnet 2 1 6.957497 13.914994 7724 +wensdai 1 1 6.957497 6.957497 7725 +netsim 1 1 6.957497 6.957497 7726 +corejava 1 1 6.957497 6.957497 7727 +fengyufeng 1 1 6.957497 6.957497 7728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ new file mode 100644 index 00000000..5522a064 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^135^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +home 4 672 0.000000 0.000000 1 +scienc 4 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +inform 5 412 0.693147 3.465735 8 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +engin 4 297 1.098612 4.394448 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +class 2 199 1.609438 3.218876 37 +read 2 154 1.791759 3.583518 47 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +assign 2 135 1.945910 3.891820 66 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +document 3 121 2.079442 6.238326 89 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +topic 1 114 2.197225 2.197225 110 +techniqu 1 99 2.302585 2.302585 138 +follow 1 92 2.397895 2.397895 143 +help 3 83 2.484907 7.454721 175 +wide 1 84 2.484907 2.484907 185 +chang 1 82 2.484907 2.484907 163 +resourc 1 81 2.484907 2.484907 172 +homework 2 79 2.564949 5.129898 193 +sourc 1 77 2.564949 2.564949 201 +html 1 75 2.639057 2.639057 235 +degre 2 69 2.708050 5.416100 259 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +colleg 2 61 2.833213 5.666426 300 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +visitor 1 49 3.044522 3.044522 371 +basic 1 50 3.044522 3.044522 360 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +offer 2 43 3.178054 6.356108 414 +announc 1 40 3.258097 3.258097 441 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +art 1 29 3.583519 3.583519 593 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +accur 1 25 3.737670 3.737670 680 +item 1 19 4.007333 4.007333 856 +hypertext 1 19 4.007333 4.007333 865 +offici 1 18 4.060443 4.060443 894 +charact 1 15 4.248495 4.248495 1028 +reprint 1 14 4.317488 4.317488 1097 +hypermedia 1 12 4.465908 4.465908 1247 +duli 1 12 4.465908 4.465908 1248 +regularli 1 11 4.553877 4.553877 1338 +nonprofit 1 11 4.553877 4.553877 1339 +mosaic 2 10 4.653960 9.307920 1426 +engr 2 10 4.653960 9.307920 1427 +weld 1 9 4.753590 4.753590 1538 +departmentof 1 9 4.753590 4.753590 1539 +uniform 1 7 5.010635 5.010635 1845 +markup 1 6 5.164786 5.164786 2059 +highlight 1 5 5.347108 5.347108 2340 +foracadem 1 5 5.347108 5.347108 2341 +whichcontain 1 4 5.568345 5.568345 2714 +bounti 1 4 5.568345 5.568345 2715 +mathematica 1 3 5.857933 5.857933 3303 +quotedand 1 3 5.857933 5.857933 3304 +quarterwelcom 1 2 6.263398 6.263398 4378 +thatthi 1 2 6.263398 6.263398 4379 +addedfrequ 1 2 6.263398 6.263398 4380 +personnel 1 2 6.263398 6.263398 4381 +mvi 1 2 6.263398 6.263398 4382 +usinglynx 1 2 6.263398 6.263398 4383 +pageclick 1 1 6.957497 6.957497 7729 +gradesoth 1 1 6.957497 6.957497 7730 +browserport 1 1 6.957497 6.957497 7731 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ new file mode 100644 index 00000000..de3298ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^95a^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +inform 3 412 0.693147 2.079441 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 4 297 1.098612 4.394448 20 +cours 3 273 1.098612 3.295836 15 +last 2 314 1.098612 2.197224 14 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +washington 4 236 1.386294 5.545176 32 +mail 2 238 1.386294 2.772588 22 +link 1 247 1.386294 1.386294 24 +class 3 199 1.609438 4.828314 37 +updat 2 191 1.609438 3.218876 41 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +problem 2 147 1.945910 3.891820 75 +click 2 142 1.945910 3.891820 78 +like 2 132 1.945910 3.891820 81 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +document 2 121 2.079442 4.158884 89 +studi 2 120 2.079442 4.158884 91 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +send 2 114 2.197225 4.394450 109 +final 2 116 2.197225 4.394450 108 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +code 1 108 2.197225 2.197225 116 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +help 5 83 2.484907 12.424535 175 +exam 5 86 2.484907 12.424535 169 +solut 2 82 2.484907 4.969814 162 +activ 2 84 2.484907 4.969814 182 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +info 1 85 2.484907 2.484907 176 +messag 3 76 2.564949 7.694847 212 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +onlin 1 75 2.639057 2.639057 223 +degre 2 69 2.708050 5.416100 259 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +guid 2 63 2.772589 5.545178 267 +previou 1 62 2.772589 2.772589 290 +colleg 2 61 2.833213 5.666426 300 +think 2 57 2.890372 5.780744 314 +special 1 56 2.890372 2.890372 320 +summer 1 56 2.890372 2.890372 311 +major 1 56 2.890372 2.890372 315 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +particular 1 51 2.995732 2.995732 352 +frequent 2 49 3.044522 6.089044 367 +quarter 2 47 3.091042 6.182084 389 +adapt 1 46 3.091042 3.091042 387 +netscap 3 44 3.135494 9.406482 395 +keep 1 44 3.135494 3.135494 409 +midterm 1 45 3.135494 3.135494 392 +textbook 1 44 3.135494 3.135494 397 +offer 2 43 3.178054 6.356108 414 +might 1 41 3.218876 3.218876 426 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +staff 1 36 3.367296 3.367296 490 +winter 1 36 3.367296 3.367296 500 +copyright 1 36 3.367296 3.367296 495 +word 1 34 3.401197 3.401197 508 +board 1 33 3.433987 3.433987 528 +ad 1 32 3.465736 3.465736 544 +autumn 4 31 3.496508 13.986032 558 +art 1 29 3.583519 3.583519 593 +consid 1 29 3.583519 3.583519 590 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +miscellan 1 23 3.806662 3.806662 731 +martin 1 21 3.912023 3.912023 794 +item 1 19 4.007333 4.007333 856 +demo 1 18 4.060443 4.060443 888 +less 1 18 4.060443 4.060443 892 +previous 1 17 4.110874 4.110874 923 +whole 1 17 4.110874 4.110874 940 +match 1 16 4.174387 4.174387 965 +portion 1 16 4.174387 4.174387 971 +webmast 3 15 4.248495 12.745485 1045 +reprint 1 14 4.317488 4.317488 1097 +earlier 1 13 4.382027 4.382027 1140 +menu 1 13 4.382027 4.382027 1156 +hypermedia 1 12 4.465908 4.465908 1247 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +regularli 1 11 4.553877 4.553877 1338 +instanc 1 11 4.553877 4.553877 1322 +nonprofit 1 11 4.553877 4.553877 1339 +engr 5 10 4.653960 23.269800 1427 +debugg 1 9 4.753590 4.753590 1493 +departmentof 1 9 4.753590 4.753590 1539 +tip 2 7 5.010635 10.021270 1863 +documentfor 1 7 5.010635 5.010635 1865 +theclass 1 6 5.164786 5.164786 2060 +handbook 1 6 5.164786 5.164786 2061 +newinform 1 5 5.347108 5.347108 2342 +highlight 1 5 5.347108 5.347108 2340 +mac 1 5 5.347108 5.347108 2292 +bulletin 1 5 5.347108 5.347108 2343 +foracadem 1 5 5.347108 5.347108 2341 +bounti 1 4 5.568345 5.568345 2715 +insensit 1 4 5.568345 5.568345 2716 +tompa 1 3 5.857933 5.857933 3305 +preview 1 3 5.857933 5.857933 3306 +quotedand 1 3 5.857933 5.857933 3304 +raini 2 2 6.263398 12.526796 4384 +intact 1 2 6.263398 6.263398 4385 +nonmajor 1 2 6.263398 6.263398 4386 +itemsund 1 2 6.263398 6.263398 4387 +balloon 1 2 6.263398 6.263398 4388 +dugan 1 1 6.957497 6.957497 7732 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ new file mode 100644 index 00000000..ae4f3993 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^142^CurrentQtr^ @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 3 273 1.098612 3.295836 15 +engin 3 297 1.098612 3.295836 20 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +hour 3 165 1.791759 5.375277 46 +avail 1 169 1.791759 1.791759 48 +lectur 2 135 1.945910 3.891820 73 +like 2 132 1.945910 3.891820 81 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +place 2 106 2.197225 4.394450 124 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +comment 1 93 2.397895 2.397895 146 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +refer 1 78 2.564949 2.564949 203 +test 2 66 2.708050 5.416100 252 +degre 2 69 2.708050 5.416100 259 +syllabu 1 67 2.708050 2.708050 247 +guid 1 63 2.772589 2.772589 267 +colleg 2 61 2.833213 5.666426 300 +summer 2 56 2.890372 5.780744 311 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +week 2 52 2.995732 5.991464 343 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +textbook 1 44 3.135494 3.135494 397 +examin 1 42 3.218876 3.218876 424 +might 1 41 3.218876 3.218876 426 +tutori 1 39 3.258097 3.258097 437 +slide 2 38 3.295837 6.591674 467 +short 1 36 3.367296 3.367296 499 +staff 1 36 3.367296 3.367296 490 +winter 1 36 3.367296 3.367296 500 +kind 1 32 3.465736 3.465736 541 +autumn 3 31 3.496508 10.489524 558 +richard 1 31 3.496508 3.496508 559 +art 1 29 3.583519 3.583519 593 +notic 1 25 3.737670 3.737670 675 +lab 1 24 3.761200 3.761200 698 +brows 1 23 3.806662 3.806662 726 +sort 1 22 3.850148 3.850148 738 +martin 1 21 3.912023 3.912023 794 +watch 1 21 3.912023 3.912023 789 +item 1 19 4.007333 4.007333 856 +demo 1 18 4.060443 4.060443 888 +less 1 18 4.060443 4.060443 892 +macintosh 1 17 4.110874 4.110874 920 +previous 1 17 4.110874 4.110874 923 +webmast 2 15 4.248495 8.496990 1045 +earlier 1 13 4.382027 4.382027 1140 +web 1 12 4.465908 4.465908 1249 +engr 3 10 4.653960 13.961880 1427 +invit 1 10 4.653960 4.653960 1428 +informationabout 1 9 4.753590 4.753590 1515 +andth 1 9 4.753590 4.753590 1481 +tip 1 7 5.010635 5.010635 1863 +ladner 1 6 5.164786 5.164786 2062 +highlight 1 5 5.347108 5.347108 2340 +bounti 1 4 5.568345 5.568345 2715 +moreinform 1 3 5.857933 5.857933 3307 +dickei 1 2 6.263398 6.263398 4389 +nonmajor 1 2 6.263398 6.263398 4386 +hypermediadocu 1 1 6.957497 6.957497 7733 +schedulesth 1 1 6.957497 6.957497 7734 +glanceweek 1 1 6.957497 6.957497 7735 +schedulecomput 1 1 6.957497 6.957497 7736 +includinglab 1 1 6.957497 6.957497 7737 +andta 1 1 6.957497 6.957497 7738 +audiofrom 1 1 6.957497 6.957497 7739 +midtermand 1 1 6.957497 6.957497 7740 +originallyschedul 1 1 6.957497 6.957497 7741 +andtim 1 1 6.957497 6.957497 7742 +usingth 1 1 6.957497 6.957497 7743 +intactand 1 1 6.957497 6.957497 7744 +forinst 1 1 6.957497 6.957497 7745 +andrel 1 1 6.957497 6.957497 7746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^143^CurrentQtr @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ new file mode 100644 index 00000000..3f3abcc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^321^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 3 299 1.098612 3.295836 13 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +washington 4 236 1.386294 5.545176 32 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +assign 8 135 1.945910 15.567280 66 +spring 2 131 2.079442 4.158884 88 +postscript 1 131 2.079442 2.079442 90 +seattl 1 120 2.079442 2.079442 103 +structur 1 106 2.197225 2.197225 105 +teach 1 108 2.197225 2.197225 112 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +novemb 1 81 2.484907 2.484907 179 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +sieg 3 69 2.708050 8.124150 260 +thursdai 3 70 2.708050 8.124150 241 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 2 64 2.772589 5.545178 261 +handout 1 64 2.772589 2.772589 263 +previou 1 62 2.772589 2.772589 290 +browser 1 56 2.890372 2.890372 313 +sampl 1 53 2.944439 2.944439 339 +appoint 1 49 3.044522 3.044522 358 +set 1 50 3.044522 3.044522 361 +midterm 2 45 3.135494 6.270988 392 +winter 1 36 3.367296 3.367296 500 +autumn 2 31 3.496508 6.993016 558 +defin 1 22 3.850148 3.850148 746 +edulast 1 17 4.110874 4.110874 927 +discret 1 13 4.382027 4.382027 1165 +johnson 1 13 4.382027 4.382027 1162 +recurs 1 13 4.382027 4.382027 1127 +loew 1 12 4.465908 4.465908 1252 +reader 1 12 4.465908 4.465908 1246 +web 1 12 4.465908 4.465908 1249 +induct 1 11 4.553877 4.553877 1304 +leveson 1 9 4.753590 4.753590 1540 +acrobat 2 6 5.164786 10.329572 2063 +beam 4 5 5.347108 21.388432 2344 +karp 1 5 5.347108 5.347108 2284 +ruzzo 1 5 5.347108 5.347108 2345 +nowitz 2 2 6.263398 12.526796 4390 +ofyour 1 2 6.263398 6.263398 4063 +instructorpaul 1 1 6.957497 6.957497 7747 +edulectur 1 1 6.957497 6.957497 7748 +assistantjonathan 1 1 6.957497 6.957497 7749 +edusect 1 1 6.957497 6.957497 7750 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ new file mode 100644 index 00000000..b5c657e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^96w^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +last 2 314 1.098612 2.197224 14 +cours 2 273 1.098612 2.197224 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +updat 2 191 1.609438 3.218876 41 +includ 1 208 1.609438 1.609438 42 +read 1 154 1.791759 1.791759 47 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +document 3 121 2.079442 6.238326 89 +provid 2 121 2.079442 4.158884 94 +postscript 1 131 2.079442 2.079442 90 +final 2 116 2.197225 4.394450 108 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +text 8 98 2.302585 18.420680 133 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +mani 1 92 2.397895 2.397895 150 +solut 11 82 2.484907 27.333977 162 +exam 4 86 2.484907 9.939628 169 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +homework 11 79 2.564949 28.214439 193 +state 3 76 2.564949 7.694847 207 +messag 1 76 2.564949 2.564949 212 +html 4 75 2.639057 10.556228 235 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +syllabu 1 67 2.708050 2.708050 247 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +handout 6 64 2.772589 16.635534 263 +complex 1 64 2.772589 2.772589 269 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +three 1 54 2.944439 2.944439 330 +format 5 48 3.044522 15.222610 356 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +midterm 2 45 3.135494 6.270988 392 +math 1 44 3.135494 3.135494 402 +review 1 42 3.218876 3.218876 425 +origin 1 38 3.295837 3.295837 472 +formal 1 37 3.332205 3.332205 478 +winter 3 36 3.367296 10.101888 500 +express 1 32 3.465736 3.465736 540 +autumn 3 31 3.496508 10.489524 558 +richard 1 31 3.496508 3.496508 559 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +handl 1 24 3.761200 3.761200 685 +proof 1 23 3.806662 3.806662 720 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +regular 2 17 4.110874 8.221748 929 +intro 1 17 4.110874 4.110874 915 +ascii 1 15 4.248495 4.248495 1032 +latex 31 14 4.317488 133.842128 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +web 1 12 4.465908 4.465908 1249 +extra 1 11 4.553877 4.553877 1312 +regard 1 11 4.553877 4.553877 1309 +notat 1 9 4.753590 4.753590 1489 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +grammar 2 6 5.164786 10.329572 2058 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +ladner 1 6 5.164786 5.164786 2062 +diagram 3 5 5.347108 16.041324 2346 +pars 1 5 5.347108 5.347108 2321 +latexhtml 1 5 5.347108 5.347108 2347 +rambl 1 3 5.857933 5.857933 3308 +ladnerclass 1 1 6.957497 6.957497 7751 +construc 1 1 6.957497 6.957497 7752 +halt 1 1 6.957497 6.957497 7753 +undecidableexam 1 1 6.957497 6.957497 7754 +edufix 1 1 6.957497 6.957497 7755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ new file mode 100644 index 00000000..1ed4d1b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^322^CurrentQtr^ @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +last 2 314 1.098612 2.197224 14 +mail 3 238 1.386294 4.158882 22 +washington 2 236 1.386294 2.772588 32 +email 1 220 1.386294 1.386294 29 +list 3 201 1.609438 4.828314 39 +fall 2 181 1.609438 3.218876 40 +class 2 199 1.609438 3.218876 37 +updat 2 191 1.609438 3.218876 41 +includ 1 208 1.609438 1.609438 42 +lectur 3 135 1.945910 5.837730 73 +model 2 145 1.945910 3.891820 69 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +check 2 115 2.197225 4.394450 118 +send 1 114 2.197225 2.197225 109 +solut 1 82 2.484907 2.484907 162 +exam 1 86 2.484907 2.484907 169 +messag 3 76 2.564949 7.694847 212 +homework 1 79 2.564949 2.564949 193 +previou 2 62 2.772589 5.545178 290 +handout 1 64 2.772589 2.772589 263 +septemb 1 65 2.772589 2.772589 274 +content 1 59 2.833213 2.833213 302 +set 1 50 3.044522 3.044522 361 +pointer 1 48 3.044522 3.044522 368 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +formal 2 37 3.332205 6.664410 478 +winter 2 36 3.367296 6.734592 500 +autumn 3 31 3.496508 10.489524 558 +sent 1 22 3.850148 3.850148 763 +log 1 19 4.007333 4.007333 857 +intro 1 17 4.110874 4.110874 915 +web 1 12 4.465908 4.465908 1249 +regularli 1 11 4.553877 4.553877 1338 +subscrib 2 9 4.753590 9.507180 1541 +upcom 2 8 4.875197 9.750394 1685 +ann 1 6 5.164786 5.164786 2065 +majordomo 1 6 5.164786 5.164786 2066 +willb 1 5 5.347108 5.347108 2277 +condon 2 3 5.857933 11.715866 3309 +findhomework 1 1 6.957497 6.957497 7756 +userid 1 1 6.957497 6.957497 7757 +edukaye 1 1 6.957497 6.957497 7758 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ new file mode 100644 index 00000000..cabeb4e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +last 2 314 1.098612 2.197224 14 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +updat 2 191 1.609438 3.218876 41 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +spring 2 131 2.079442 4.158884 88 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +messag 1 76 2.564949 2.564949 212 +previou 1 62 2.772589 2.772589 290 +unix 1 58 2.890372 2.890372 308 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +winter 2 36 3.367296 6.734592 500 +autumn 3 31 3.496508 10.489524 558 +request 1 26 3.688879 3.688879 635 +martin 1 21 3.912023 3.912023 794 +web 1 12 4.465908 4.465908 1249 +tompaclass 1 3 5.857933 5.857933 3310 +informationlab 1 1 6.957497 6.957497 7759 +technot 1 1 6.957497 6.957497 7760 +questionnaireloc 1 1 6.957497 6.957497 7761 +cdeletemin 1 1 6.957497 6.957497 7762 +treeshomework 1 1 6.957497 6.957497 7763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ new file mode 100644 index 00000000..4ba58c01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^326^95sp^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +class 2 199 1.609438 3.218876 37 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +click 1 142 1.945910 1.945910 78 +lectur 1 135 1.945910 1.945910 73 +spring 3 131 2.079442 6.238326 88 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +seattl 1 120 2.079442 2.079442 103 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exam 1 86 2.484907 2.484907 169 +messag 2 76 2.564949 5.129898 212 +homework 1 79 2.564949 2.564949 193 +sourc 1 77 2.564949 2.564949 201 +suggest 1 53 2.944439 2.944439 331 +frequent 2 49 3.044522 6.089044 367 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +ad 1 32 3.465736 3.465736 544 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +reprint 1 14 4.317488 4.317488 1097 +hypermedia 1 12 4.465908 4.465908 1247 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +departmentof 1 9 4.753590 4.753590 1539 +documentfor 1 7 5.010635 5.010635 1865 +ladner 2 6 5.164786 10.329572 2062 +theclass 1 6 5.164786 5.164786 2060 +newinform 1 5 5.347108 5.347108 2342 +foracadem 1 5 5.347108 5.347108 2341 +quotedand 1 3 5.857933 5.857933 3304 +fasulo 1 2 6.263398 6.263398 4391 +structuresrichard 1 1 6.957497 6.957497 7764 +instructordan 1 1 6.957497 6.957497 7765 +assistantthi 1 1 6.957497 6.957497 7766 +overheadsport 1 1 6.957497 6.957497 7767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ new file mode 100644 index 00000000..a017f2bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 7 672 0.000000 0.000000 1 +page 7 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +languag 3 227 1.386294 4.158882 26 +washington 2 236 1.386294 2.772588 32 +spring 2 131 2.079442 4.158884 88 +provid 1 121 2.079442 2.079442 94 +sourc 1 77 2.564949 2.564949 201 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +offer 6 43 3.178054 19.068324 414 +purpos 1 37 3.332205 3.332205 481 +winter 2 36 3.367296 6.734592 500 +copyright 1 36 3.367296 3.367296 495 +everi 1 34 3.401197 3.401197 519 +autumn 2 31 3.496508 6.993016 558 +computersci 1 30 3.555348 3.555348 562 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +scienceand 1 5 5.347108 5.347108 2348 +ofcs 5 2 6.263398 31.316990 4392 +languagesfal 1 2 6.263398 6.263398 4161 +informationth 1 2 6.263398 6.263398 4393 +listinfo 1 2 6.263398 6.263398 4394 +pagehom 1 2 6.263398 6.263398 4395 +engineeringport 1 2 6.263398 6.263398 4396 +academicnonprofit 1 2 6.263398 6.263398 4397 +dulycredit 1 2 6.263398 6.263398 4398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html new file mode 100644 index 00000000..5c2cc2a5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^341^spring96^index.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 5 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +last 16 314 1.098612 17.577792 14 +us 5 329 1.098612 5.493060 16 +offic 2 299 1.098612 2.197224 13 +cours 2 273 1.098612 2.197224 15 +engin 1 297 1.098612 1.098612 20 +washington 4 236 1.386294 5.545176 32 +languag 3 227 1.386294 4.158882 26 +mail 2 238 1.386294 2.772588 22 +updat 16 191 1.609438 25.751008 41 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +read 9 154 1.791759 16.125831 47 +hour 2 165 1.791759 3.583518 46 +note 24 142 1.945910 46.701840 67 +assign 12 135 1.945910 23.350920 66 +lectur 1 135 1.945910 1.945910 73 +relat 1 139 1.945910 1.945910 68 +postscript 8 131 2.079442 16.635536 90 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +code 14 108 2.197225 30.761150 116 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +section 2 94 2.397895 4.795790 149 +solut 4 82 2.484907 9.939628 162 +exam 2 86 2.484907 4.969814 169 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +build 1 85 2.484907 2.484907 184 +april 13 77 2.564949 33.344337 196 +method 4 80 2.564949 10.259796 213 +june 3 79 2.564949 7.694847 214 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +interfac 1 79 2.564949 2.564949 209 +sourc 1 77 2.564949 2.564949 201 +html 8 75 2.639057 21.112456 235 +tuesdai 1 73 2.639057 2.639057 219 +sieg 6 69 2.708050 16.248300 260 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +new 1 64 2.772589 2.772589 262 +march 2 61 2.833213 5.666426 295 +unix 1 58 2.890372 2.890372 308 +reason 1 57 2.890372 2.890372 318 +suggest 9 53 2.944439 26.499951 331 +sampl 3 53 2.944439 8.833317 339 +quarter 1 47 3.091042 3.091042 389 +electron 1 47 3.091042 3.091042 379 +done 1 47 3.091042 3.091042 381 +adapt 1 46 3.091042 3.091042 387 +netscap 1 44 3.135494 3.135494 395 +offer 4 43 3.178054 12.712216 414 +review 1 42 3.218876 3.218876 425 +hand 1 37 3.332205 3.332205 475 +purpos 1 37 3.332205 3.332205 481 +winter 2 36 3.367296 6.734592 500 +copyright 1 36 3.367296 3.367296 495 +everi 1 34 3.401197 3.401197 519 +eduoffic 2 33 3.433987 6.867974 531 +autumn 1 31 3.496508 3.496508 558 +computersci 1 30 3.555348 3.555348 562 +steve 1 29 3.583519 3.583519 594 +quot 1 29 3.583519 3.583519 582 +full 1 28 3.610918 3.610918 615 +administr 1 27 3.637586 3.637586 628 +pagecs 1 26 3.688879 3.688879 658 +session 1 26 3.688879 3.688879 643 +accur 1 25 3.737670 3.737670 680 +daili 1 24 3.761200 3.761200 706 +newsgroup 1 21 3.912023 3.912023 783 +miss 1 19 4.007333 4.007333 866 +lisp 6 18 4.060443 24.362658 897 +figur 2 18 4.060443 8.120886 903 +partial 1 18 4.060443 4.060443 900 +quiz 3 16 4.174387 12.523161 990 +webmast 1 15 4.248495 4.248495 1045 +dave 2 14 4.317488 8.634976 1098 +save 1 14 4.317488 4.317488 1099 +reprint 1 14 4.317488 4.317488 1097 +prolog 5 13 4.382027 21.910135 1155 +emac 3 13 4.382027 13.146081 1143 +hank 1 12 4.465908 4.465908 1253 +submiss 1 11 4.553877 4.553877 1298 +grove 3 8 4.875197 14.625591 1675 +clip 1 7 5.010635 5.010635 1868 +transcript 9 6 5.164786 46.483074 2067 +scienceand 1 5 5.347108 5.347108 2348 +turnin 1 4 5.568345 5.568345 2654 +employe 1 4 5.568345 5.568345 2717 +ofcs 4 2 6.263398 25.053592 4392 +overviewcours 1 2 6.263398 6.263398 4399 +informationth 1 2 6.263398 6.263398 4393 +listinfo 1 2 6.263398 6.263398 4394 +pagehom 1 2 6.263398 6.263398 4395 +engineeringport 1 2 6.263398 6.263398 4396 +academicnonprofit 1 2 6.263398 6.263398 4397 +dulycredit 1 2 6.263398 6.263398 4398 +smalltalk 11 1 6.957497 76.532467 7768 +transcipt 4 1 6.957497 27.829988 7769 +htmlpostscript 2 1 6.957497 13.914994 7770 +languagesspr 1 1 6.957497 6.957497 7771 +hanks 1 1 6.957497 6.957497 7772 +documentsgeneralintroduct 1 1 6.957497 6.957497 7773 +relatedrun 1 1 6.957497 6.957497 7774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ new file mode 100644 index 00000000..127ac73d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +engin 4 297 1.098612 4.394448 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 4 236 1.386294 5.545176 32 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +updat 1 191 1.609438 1.609438 41 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +academ 1 82 2.484907 2.484907 178 +sourc 1 77 2.564949 2.564949 201 +degre 2 69 2.708050 5.416100 259 +previou 1 62 2.772589 2.772589 290 +colleg 2 61 2.833213 5.666426 300 +index 2 56 2.890372 5.780744 309 +quarter 2 47 3.091042 6.182084 389 +adapt 1 46 3.091042 3.091042 387 +offer 2 43 3.178054 6.356108 414 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +art 1 29 3.583519 3.583519 593 +quot 1 29 3.583519 3.583519 582 +accur 1 25 3.737670 3.737670 680 +less 1 18 4.060443 4.060443 892 +portion 1 16 4.174387 4.174387 971 +webmast 2 15 4.248495 8.496990 1045 +reprint 1 14 4.317488 4.317488 1097 +earlier 1 13 4.382027 4.382027 1140 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +instanc 1 11 4.553877 4.553877 1322 +nonprofit 1 11 4.553877 4.553877 1339 +intact 1 2 6.263398 6.263398 4385 +pagecurr 1 1 6.957497 6.957497 7775 +quarterth 1 1 6.957497 6.957497 7776 +quarterscours 1 1 6.957497 6.957497 7777 +younotic 1 1 6.957497 6.957497 7778 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ new file mode 100644 index 00000000..9a7e02a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^370^CurrentQtr^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +last 3 314 1.098612 3.295836 14 +us 2 329 1.098612 2.197224 16 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +design 4 213 1.386294 5.545176 25 +mail 4 238 1.386294 5.545176 22 +washington 3 236 1.386294 4.158882 32 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +class 5 199 1.609438 8.047190 37 +updat 3 191 1.609438 4.828314 41 +hour 2 165 1.791759 3.583518 46 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +lectur 4 135 1.945910 7.783640 73 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +document 2 121 2.079442 4.158884 89 +tool 2 117 2.079442 4.158884 93 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +instructor 4 108 2.197225 8.788900 107 +send 2 114 2.197225 4.394450 109 +final 2 116 2.197225 4.394450 108 +topic 2 114 2.197225 4.394450 110 +version 1 113 2.197225 2.197225 122 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +exam 2 86 2.484907 4.969814 169 +contain 1 81 2.484907 2.484907 174 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +sourc 1 77 2.564949 2.564949 201 +logic 2 71 2.639057 5.278114 230 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +sieg 2 69 2.708050 5.416100 260 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +publish 1 57 2.890372 2.890372 326 +think 1 57 2.890372 2.890372 314 +maintain 2 51 2.995732 5.991464 342 +digit 1 52 2.995732 2.995732 348 +frequent 1 49 3.044522 3.044522 367 +archiv 1 49 3.044522 3.044522 364 +quarter 2 47 3.091042 6.182084 389 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +even 1 45 3.135494 3.135494 393 +announc 3 40 3.258097 9.774291 441 +author 1 39 3.258097 3.258097 450 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +go 1 33 3.433987 3.433987 529 +ad 1 32 3.465736 3.465736 544 +collabor 1 32 3.465736 3.465736 543 +autumn 2 31 3.496508 6.993016 558 +quot 1 29 3.583519 3.583519 582 +administr 2 27 3.637586 7.275172 628 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +tell 1 21 3.912023 3.912023 777 +anderson 1 19 4.007333 4.007333 860 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +whole 1 17 4.110874 4.110874 940 +weekli 1 17 4.110874 4.110874 919 +weslei 2 16 4.174387 8.348774 983 +portion 1 16 4.174387 4.174387 971 +webmast 2 15 4.248495 8.496990 1045 +anonym 1 14 4.317488 4.317488 1100 +reprint 1 14 4.317488 4.317488 1097 +everyon 1 13 4.382027 4.382027 1148 +quizz 1 13 4.382027 4.382027 1151 +addison 2 12 4.465908 8.931816 1230 +workload 1 12 4.465908 4.465908 1210 +overal 1 12 4.465908 4.465908 1254 +duli 1 12 4.465908 4.465908 1248 +benjamin 2 11 4.553877 9.107754 1296 +evolut 1 11 4.553877 4.553877 1314 +nonprofit 1 11 4.553877 4.553877 1339 +cheat 1 10 4.653960 4.653960 1395 +desir 1 9 4.753590 4.753590 1542 +cum 2 8 4.875197 9.750394 1619 +bunch 1 7 5.010635 5.010635 1861 +gaetano 3 6 5.164786 15.494358 2068 +borriello 2 5 5.347108 10.694216 2349 +corei 2 4 5.568345 11.136690 2718 +contemporari 1 4 5.568345 5.568345 2719 +katz 2 3 5.857933 11.715866 3276 +corin 1 3 5.857933 5.857933 3311 +aweekli 1 3 5.857933 5.857933 3312 +andersonwelcom 1 2 6.263398 6.263398 4400 +tocs 1 2 6.263398 6.263398 4401 +messagess 1 2 6.263398 6.263398 4402 +synario 1 2 6.263398 6.263398 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ new file mode 100644 index 00000000..c4428eaf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 3 412 0.693147 2.079441 8 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +data 2 170 1.791759 3.583518 49 +hour 2 165 1.791759 3.583518 46 +algorithm 1 162 1.791759 1.791759 57 +assign 4 135 1.945910 7.783640 66 +hall 2 146 1.945910 3.891820 65 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +structur 2 106 2.197225 4.394450 105 +assist 2 112 2.197225 4.394450 113 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +grade 2 90 2.397895 4.795790 142 +requir 1 81 2.484907 2.484907 167 +sieg 1 69 2.708050 2.708050 260 +practic 1 70 2.708050 2.708050 246 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +publish 1 57 2.890372 2.890372 326 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +basic 1 50 3.044522 3.044522 360 +appoint 1 49 3.044522 3.044522 358 +midterm 2 45 3.135494 6.270988 392 +textbook 1 44 3.135494 3.135494 397 +keep 1 44 3.135494 3.135494 409 +offer 1 43 3.178054 3.178054 414 +late 2 40 3.258097 6.516194 439 +announc 1 40 3.258097 3.258097 441 +winter 1 36 3.367296 3.367296 500 +autumn 1 31 3.496508 3.496508 558 +option 2 30 3.555348 7.110696 575 +steve 1 29 3.583519 3.583519 594 +pagecs 1 26 3.688879 3.688879 658 +dai 1 22 3.850148 3.850148 753 +tent 1 22 3.850148 3.850148 739 +smith 1 20 3.951244 3.951244 820 +facil 1 20 3.951244 3.951244 814 +account 1 18 4.060443 4.060443 882 +lisp 1 18 4.060443 4.060443 897 +encourag 1 18 4.060443 4.060443 880 +deduct 1 12 4.465908 4.465908 1236 +tanimoto 2 10 4.653960 9.307920 1429 +penalti 1 10 4.653960 4.653960 1405 +prentic 1 7 5.010635 5.010635 1838 +aboutth 1 4 5.568345 5.568345 2720 +punctual 1 3 5.857933 5.857933 3313 +anhai 1 2 6.263398 6.263398 4404 +doan 1 2 6.263398 6.263398 4405 +mscc 1 2 6.263398 6.263398 4406 +breakdown 1 2 6.263398 6.263398 4407 +algorithmsautumn 1 1 6.957497 6.957497 7779 +shaffer 1 1 6.957497 6.957497 7780 +examinform 1 1 6.957497 6.957497 7781 +exambas 1 1 6.957497 6.957497 7782 +compilerassignmentssolut 1 1 6.957497 6.957497 7783 +assignmentsteach 1 1 6.957497 6.957497 7784 +informationscheduleweb 1 1 6.957497 6.957497 7785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a new file mode 100644 index 00000000..d1265709 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^95a^index.html.95a @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +engin 3 297 1.098612 3.295836 20 +cours 2 273 1.098612 2.197224 15 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +structur 1 106 2.197225 2.197225 105 +instructor 1 108 2.197225 2.197225 107 +exam 1 86 2.484907 2.484907 169 +help 1 83 2.484907 2.484907 175 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +materi 1 75 2.639057 2.639057 221 +degre 2 69 2.708050 5.416100 259 +syllabu 1 67 2.708050 2.708050 247 +colleg 2 61 2.833213 5.666426 300 +major 2 56 2.890372 5.780744 315 +offer 2 43 3.178054 6.356108 414 +art 1 29 3.583519 3.583519 593 +pagecs 1 26 3.688879 3.688879 658 +demo 1 18 4.060443 4.060443 888 +jonathan 1 13 4.382027 4.382027 1174 +mosaic 1 10 4.653960 4.653960 1426 +holden 2 3 5.857933 11.715866 3314 +alistair 1 3 5.857933 5.857933 3315 +nowitz 2 2 6.263398 12.526796 4390 +raini 1 2 6.263398 6.263398 4384 +algorithmsspr 1 1 6.957497 6.957497 7786 +funnowitz 1 1 6.957497 6.957497 7787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html new file mode 100644 index 00000000..0d702d4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^373^96w^w96index.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +program 3 374 0.693147 2.079441 7 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +email 2 220 1.386294 2.772588 29 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +data 2 170 1.791759 3.583518 49 +hour 2 165 1.791759 3.583518 46 +file 7 132 1.945910 13.621370 70 +assign 4 135 1.945910 7.783640 66 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +search 2 95 2.397895 4.795790 155 +follow 1 92 2.397895 2.397895 143 +homework 3 79 2.564949 7.694847 193 +state 1 76 2.564949 2.564949 207 +come 1 78 2.564949 2.564949 202 +meet 2 72 2.639057 5.278114 229 +line 1 75 2.639057 2.639057 231 +test 7 66 2.708050 18.956350 252 +sieg 1 69 2.708050 2.708050 260 +set 1 50 3.044522 3.044522 361 +answer 1 45 3.135494 3.135494 391 +long 1 43 3.178054 3.178054 413 +tree 1 36 3.367296 3.367296 492 +next 2 34 3.401197 6.802394 517 +eduoffic 2 33 3.433987 6.867974 531 +quot 2 29 3.583519 7.167038 582 +pagecs 1 26 3.688879 3.688879 658 +valu 1 25 3.737670 3.737670 665 +begin 1 23 3.806662 3.806662 716 +sheet 1 16 4.174387 4.174387 973 +indic 3 15 4.248495 12.745485 1013 +charact 1 15 4.248495 4.248495 1028 +denis 1 12 4.465908 4.465908 1255 +string 1 11 4.553877 4.553877 1340 +linda 1 10 4.653960 4.653960 1394 +length 1 10 4.653960 4.653960 1400 +shapiro 1 8 4.875197 4.875197 1686 +insert 1 8 4.875197 4.875197 1687 +integ 1 8 4.875197 4.875197 1688 +assignmentsprogram 1 6 5.164786 5.164786 2019 +assignmentshomework 1 4 5.568345 5.568345 2721 +enclos 3 1 6.957497 20.872491 7788 +siegtelephon 2 1 6.957497 13.914994 7789 +algorithmswint 1 1 6.957497 6.957497 7790 +shapirooffic 1 1 6.957497 6.957497 7791 +pinneloffic 1 1 6.957497 6.957497 7792 +denisep 1 1 6.957497 6.957497 7793 +syllabustransparencieshomework 1 1 6.957497 6.957497 7794 +inquot 1 1 6.957497 6.957497 7795 +associatedvalu 1 1 6.957497 6.957497 7796 +linebegin 1 1 6.957497 6.957497 7797 +isfollow 1 1 6.957497 6.957497 7798 +graphimag 1 1 6.957497 6.957497 7799 +graphreview 1 1 6.957497 6.957497 7800 +listsfin 1 1 6.957497 6.957497 7801 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ new file mode 100644 index 00000000..3938a7eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^401^CurrentQuarter^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 2 412 0.693147 1.386294 8 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +class 2 199 1.609438 3.218876 37 +modifi 1 178 1.609438 1.609438 35 +assign 1 135 1.945910 1.945910 66 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +world 1 115 2.197225 2.197225 126 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +announc 1 40 3.258097 3.258097 441 +ad 1 32 3.465736 3.465736 544 +static 1 27 3.637586 3.637586 619 +hypermedia 1 12 4.465908 4.465908 1247 +admin 1 9 4.753590 4.753590 1476 +documentfor 1 7 5.010635 5.010635 1865 +willb 1 5 5.347108 5.347108 2277 +urgent 1 3 5.857933 5.857933 3316 +classhomethi 1 1 6.957497 6.957497 7802 +inmind 1 1 6.957497 6.957497 7803 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ new file mode 100644 index 00000000..d975e3db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403X^ @@ -0,0 +1,235 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 7 443 0.693147 4.852029 6 +work 6 380 0.693147 4.158882 9 +program 4 374 0.693147 2.772588 7 +project 18 340 1.098612 19.775016 18 +engin 9 297 1.098612 9.887508 20 +student 5 343 1.098612 5.493060 19 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +softwar 17 220 1.386294 23.566998 30 +design 9 213 1.386294 12.476646 25 +also 2 259 1.386294 2.772588 28 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +includ 8 208 1.609438 12.875504 42 +class 7 199 1.609438 11.266066 37 +group 4 183 1.609438 6.437752 36 +updat 1 191 1.609438 1.609438 41 +develop 3 174 1.791759 5.375277 53 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +implement 1 152 1.791759 1.791759 52 +first 2 140 1.945910 3.891820 71 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +document 7 121 2.079442 14.556094 89 +provid 4 121 2.079442 8.317768 94 +analysi 3 124 2.079442 6.238326 98 +tool 2 117 2.079442 4.158884 93 +studi 1 120 2.079442 2.079442 91 +manag 11 114 2.197225 24.169475 125 +teach 3 108 2.197225 6.591675 112 +specif 3 106 2.197225 6.591675 106 +topic 2 114 2.197225 4.394450 110 +instructor 2 108 2.197225 4.394450 107 +make 2 111 2.197225 4.394450 120 +version 1 113 2.197225 2.197225 122 +assist 1 112 2.197225 2.197225 113 +user 4 104 2.302585 9.210340 137 +technic 2 100 2.302585 4.605170 140 +part 1 98 2.302585 2.302585 129 +take 1 97 2.302585 2.302585 134 +real 2 93 2.397895 4.795790 144 +commun 2 95 2.397895 4.795790 157 +search 1 95 2.397895 2.397895 155 +present 1 91 2.397895 2.397895 145 +learn 4 86 2.484907 9.939628 170 +requir 3 81 2.484907 7.454721 167 +control 3 82 2.484907 7.454721 164 +activ 2 84 2.484907 4.969814 182 +chang 2 82 2.484907 4.969814 163 +larg 1 82 2.484907 2.484907 168 +member 1 84 2.484907 2.484907 165 +exampl 2 77 2.564949 5.129898 195 +issu 2 78 2.564949 5.129898 211 +interfac 2 79 2.564949 5.129898 209 +method 1 80 2.564949 2.564949 213 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +meet 3 72 2.639057 7.917171 229 +addit 2 74 2.639057 5.278114 228 +write 2 72 2.639057 5.278114 222 +test 2 66 2.708050 5.416100 252 +sieg 1 69 2.708050 2.708050 260 +plan 8 65 2.772589 22.180712 272 +experi 6 64 2.772589 16.635534 283 +organ 4 65 2.772589 11.090356 265 +written 2 63 2.772589 5.545178 278 +interact 2 62 2.772589 5.545178 270 +evalu 2 64 2.772589 5.545178 266 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +creat 1 63 2.772589 2.772589 277 +guid 1 63 2.772589 2.772589 267 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +allow 1 53 2.944439 2.944439 333 +profession 1 51 2.995732 2.995732 345 +hardwar 1 51 2.995732 2.995732 350 +set 2 50 3.044522 6.089044 361 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +quarter 3 47 3.091042 9.273126 389 +done 2 47 3.091042 6.182084 381 +possibl 1 47 3.091042 3.091042 378 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +discuss 1 45 3.135494 3.135494 399 +natur 1 44 3.135494 3.135494 406 +review 5 42 3.218876 16.094380 425 +howev 2 41 3.218876 6.437752 422 +industri 2 38 3.295837 6.591674 464 +prototyp 2 38 3.295837 6.591674 463 +correct 1 38 3.295837 3.295837 462 +respons 11 37 3.332205 36.654255 476 +cost 1 37 3.332205 3.332205 480 +short 1 36 3.367296 3.367296 499 +ofth 1 36 3.367296 3.367296 491 +survei 1 35 3.401197 3.401197 513 +manual 1 35 3.401197 3.401197 504 +product 4 33 3.433987 13.735948 527 +concept 2 32 3.465736 6.931472 537 +human 2 32 3.465736 6.931472 546 +often 1 31 3.496508 3.496508 551 +posit 1 31 3.496508 3.496508 552 +secur 3 30 3.555348 10.666044 577 +produc 1 30 3.555348 3.555348 572 +hard 1 30 3.555348 3.555348 563 +exist 1 30 3.555348 3.555348 569 +particip 3 29 3.583519 10.750557 589 +platform 1 29 3.583519 3.583519 591 +usual 1 28 3.610918 3.610918 608 +held 1 28 3.610918 3.610918 600 +releas 1 28 3.610918 3.610918 616 +team 2 27 3.637586 7.275172 625 +administr 1 27 3.637586 3.637586 628 +determin 1 27 3.637586 3.637586 630 +experiment 2 26 3.688879 7.377758 645 +pagecs 1 26 3.688879 3.688879 658 +session 1 26 3.688879 3.688879 643 +consist 1 26 3.688879 3.688879 651 +primari 8 25 3.737670 29.901360 669 +reliabl 2 25 3.737670 7.475340 674 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +handl 1 24 3.761200 3.761200 685 +head 1 23 3.806662 3.806662 732 +try 1 22 3.850148 3.850148 764 +qualiti 4 20 3.951244 15.804976 832 +verif 1 20 3.951244 3.951244 826 +safeti 1 20 3.951244 3.951244 817 +sure 1 20 3.951244 3.951244 813 +feedback 1 19 4.007333 4.007333 854 +statu 1 18 4.060443 4.060443 885 +regular 1 17 4.110874 4.110874 929 +outlin 1 17 4.110874 4.110874 914 +estim 1 17 4.110874 4.110874 930 +normal 1 16 4.174387 4.174387 995 +configur 4 15 4.248495 16.993980 1012 +enough 2 15 4.248495 8.496990 1040 +track 1 15 4.248495 4.248495 1029 +transit 1 15 4.248495 4.248495 1046 +role 4 14 4.317488 17.269952 1101 +conduct 2 14 4.317488 8.634976 1065 +embed 1 14 4.317488 4.317488 1102 +everyon 2 13 4.382027 8.764054 1148 +essenti 1 13 4.382027 4.382027 1137 +sai 1 13 4.382027 4.382027 1175 +necessari 1 13 4.382027 4.382027 1147 +overal 2 12 4.465908 8.931816 1254 +nanci 1 12 4.465908 4.465908 1256 +skill 1 12 4.465908 4.465908 1205 +characterist 1 12 4.465908 4.465908 1257 +readabl 1 12 4.465908 4.465908 1258 +valid 1 11 4.553877 4.553877 1299 +evolut 1 11 4.553877 4.553877 1314 +princip 1 10 4.653960 4.653960 1397 +mainten 3 9 4.753590 14.260770 1543 +leveson 2 9 4.753590 9.507180 1540 +latter 1 9 4.753590 4.753590 1522 +factor 1 9 4.753590 4.753590 1544 +respect 1 9 4.753590 4.753590 1545 +realist 1 8 4.875197 4.875197 1665 +risk 1 8 4.875197 4.875197 1689 +reus 1 8 4.875197 4.875197 1661 +architect 1 8 4.875197 4.875197 1624 +successfulli 1 7 5.010635 5.010635 1869 +attach 1 7 5.010635 5.010635 1785 +metric 1 7 5.010635 5.010635 1831 +ethic 1 7 5.010635 5.010635 1786 +creation 2 6 5.164786 10.329572 2069 +ensur 2 6 5.164786 10.329572 2012 +lack 1 6 5.164786 5.164786 1994 +phase 1 6 5.164786 5.164786 1977 +theproject 1 6 5.164786 5.164786 1981 +deliv 1 6 5.164786 5.164786 2070 +augment 1 5 5.347108 5.347108 2350 +assur 2 4 5.568345 11.136690 2722 +isthat 1 4 5.568345 5.568345 2723 +assess 1 4 5.568345 5.568345 2724 +employe 1 4 5.568345 5.568345 2717 +duti 7 3 5.857933 41.005531 3317 +boe 3 3 5.857933 17.573799 3318 +specialist 3 3 5.857933 17.573799 3319 +leadership 2 3 5.857933 11.715866 3320 +expertis 2 3 5.857933 11.715866 3321 +oral 1 3 5.857933 5.857933 3189 +listof 1 3 5.857933 5.857933 3322 +proper 1 3 5.857933 5.857933 3323 +interview 1 3 5.857933 5.857933 3324 +portfolio 3 2 6.263398 18.790194 4408 +educours 1 2 6.263398 6.263398 4409 +terminolog 1 2 6.263398 6.263398 4410 +thegroup 1 2 6.263398 6.263398 4054 +beavoid 1 2 6.263398 6.263398 4411 +thenorm 1 2 6.263398 6.263398 4412 +clariti 1 2 6.263398 6.263398 4413 +deliver 2 1 6.957497 13.914994 7804 +mockup 2 1 6.957497 13.914994 7805 +descriptioninstruct 1 1 6.957497 6.957497 7806 +softwaresystem 1 1 6.957497 6.957497 7807 +tocreat 1 1 6.957497 6.957497 7808 +effectiveor 1 1 6.957497 6.957497 7809 +topicsar 1 1 6.957497 6.957497 7810 +employersand 1 1 6.957497 6.957497 7811 +realbo 1 1 6.957497 6.957497 7812 +largegroup 1 1 6.957497 6.957497 7813 +cannotlearn 1 1 6.957497 6.957497 7814 +devotedto 1 1 6.957497 6.957497 7815 +isto 1 1 6.957497 6.957497 7816 +effectivelytogeth 1 1 6.957497 6.957497 7817 +disast 1 1 6.957497 6.957497 7818 +worktogeth 1 1 6.957497 6.957497 7819 +requirementsanalysi 1 1 6.957497 6.957497 7820 +areal 1 1 6.957497 6.957497 7821 +engineeringinstitut 1 1 6.957497 6.957497 7822 +providedat 1 1 6.957497 6.957497 7823 +playthat 1 1 6.957497 6.957497 7824 +projectso 1 1 6.957497 6.957497 7825 +softwaredevelop 1 1 6.957497 6.957497 7826 +responsiblefor 1 1 6.957497 6.957497 7827 +duri 1 1 6.957497 6.957497 7828 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ new file mode 100644 index 00000000..429a509a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^403^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +cours 3 273 1.098612 3.295836 15 +time 2 293 1.098612 2.197224 17 +offic 2 299 1.098612 2.197224 13 +engin 2 297 1.098612 2.197224 20 +project 1 340 1.098612 1.098612 18 +softwar 5 220 1.386294 6.931470 30 +mail 3 238 1.386294 4.158882 22 +washington 3 236 1.386294 4.158882 32 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +includ 2 208 1.609438 3.218876 42 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +assign 6 135 1.945910 11.675460 66 +note 5 142 1.945910 9.729550 67 +professor 1 137 1.945910 1.945910 76 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +document 1 121 2.079442 2.079442 89 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +specif 2 106 2.197225 4.394450 106 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +question 1 91 2.397895 2.397895 141 +requir 2 81 2.484907 4.969814 167 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +method 1 80 2.564949 2.564949 213 +sieg 2 69 2.708050 5.416100 260 +test 1 66 2.708050 2.708050 252 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +creat 1 63 2.772589 2.772589 277 +complex 1 64 2.772589 2.772589 269 +new 1 64 2.772589 2.772589 262 +locat 1 59 2.833213 2.833213 303 +sampl 1 53 2.944439 2.944439 339 +appoint 2 49 3.044522 6.089044 358 +standard 1 48 3.044522 3.044522 365 +fridai 1 44 3.135494 3.135494 390 +winter 2 36 3.367296 6.734592 500 +concept 1 32 3.465736 3.465736 537 +produc 1 30 3.555348 3.555348 572 +pagecs 1 26 3.688879 3.688879 658 +comp 1 26 3.688879 3.688879 650 +request 1 26 3.688879 3.688879 635 +fundament 1 25 3.737670 3.737670 661 +newsgroup 1 21 3.912023 3.912023 783 +feedback 1 19 4.007333 4.007333 854 +adam 1 17 4.110874 4.110874 934 +coupl 1 17 4.110874 4.110874 939 +essenti 1 13 4.382027 4.382027 1137 +loew 1 12 4.465908 4.465908 1252 +nanci 1 12 4.465908 4.465908 1256 +leveson 2 9 4.753590 9.507180 1540 +risk 1 8 4.875197 4.875197 1689 +successfulli 1 7 5.010635 5.010635 1869 +prentic 1 7 5.010635 5.010635 1838 +carlson 2 5 5.347108 10.694216 2351 +consum 1 5 5.347108 5.347108 2334 +interview 1 3 5.857933 5.857933 3324 +axiomat 1 3 5.857933 5.857933 3288 +mailinglist 1 3 5.857933 5.857933 3325 +militari 1 3 5.857933 5.857933 3326 +defens 1 3 5.857933 5.857933 3327 +educours 1 2 6.263398 6.263398 4409 +petri 1 2 6.263398 6.263398 4414 +engineeringmeet 1 1 6.957497 6.957497 7829 +eduta 1 1 6.957497 6.957497 7830 +descriptionthi 1 1 6.957497 6.957497 7831 +textbookghezzi 1 1 6.957497 6.957497 7832 +jazayeri 1 1 6.957497 6.957497 7833 +mandrioli 1 1 6.957497 6.957497 7834 +cohes 1 1 6.957497 6.957497 7835 +departmentsuggest 1 1 6.957497 6.957497 7836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ new file mode 100644 index 00000000..0d0a12d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^413^ @@ -0,0 +1,347 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 12 374 0.693147 8.317764 7 +system 6 443 0.693147 4.158882 6 +work 3 380 0.693147 2.079441 9 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +cours 8 273 1.098612 8.788896 15 +project 7 340 1.098612 7.690284 18 +student 6 343 1.098612 6.591672 19 +offic 3 299 1.098612 3.295836 13 +us 3 329 1.098612 3.295836 16 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +languag 10 227 1.386294 13.862940 26 +mail 5 238 1.386294 6.931470 22 +email 5 220 1.386294 6.931470 29 +also 3 259 1.386294 4.158882 28 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +list 5 201 1.609438 8.047190 39 +class 3 199 1.609438 4.828314 37 +includ 3 208 1.609438 4.828314 42 +group 1 183 1.609438 1.609438 36 +read 6 154 1.791759 10.750554 47 +implement 5 152 1.791759 8.958795 52 +hour 3 165 1.791759 5.375277 46 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +assign 9 135 1.945910 17.513190 66 +hall 5 146 1.945910 9.729550 65 +file 5 132 1.945910 9.729550 70 +support 3 132 1.945910 5.837730 83 +note 3 142 1.945910 5.837730 67 +click 2 142 1.945910 3.891820 78 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +schedul 5 119 2.079442 10.397210 85 +provid 3 121 2.079442 6.238326 94 +compil 3 122 2.079442 6.238326 96 +number 2 130 2.079442 4.158884 97 +seattl 1 120 2.079442 2.079442 103 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +final 6 116 2.197225 13.183350 108 +pleas 4 113 2.197225 8.788900 114 +topic 3 114 2.197225 6.591675 110 +world 2 115 2.197225 4.394450 126 +check 2 115 2.197225 4.394450 118 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +part 7 98 2.302585 16.118095 129 +book 6 99 2.302585 13.815510 131 +text 2 98 2.302585 4.605170 133 +access 2 102 2.302585 4.605170 136 +techniqu 2 99 2.302585 4.605170 138 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +mani 3 92 2.397895 7.193685 150 +center 3 88 2.397895 7.193685 158 +call 3 91 2.397895 7.193685 153 +question 2 91 2.397895 4.795790 141 +grade 1 90 2.397895 2.397895 142 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +pictur 1 89 2.397895 2.397895 160 +exam 8 86 2.484907 19.879256 169 +info 4 85 2.484907 9.939628 176 +solut 4 82 2.484907 9.939628 162 +help 3 83 2.484907 7.454721 175 +wide 2 84 2.484907 4.969814 185 +librari 1 87 2.484907 2.484907 181 +build 1 85 2.484907 2.484907 184 +internet 1 83 2.484907 2.484907 186 +mondai 3 77 2.564949 7.694847 206 +refer 3 78 2.564949 7.694847 203 +issu 3 78 2.564949 7.694847 211 +homework 2 79 2.564949 5.129898 193 +messag 2 76 2.564949 5.129898 212 +sourc 1 77 2.564949 2.564949 201 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +tuesdai 8 73 2.639057 21.112456 219 +onlin 7 75 2.639057 18.473399 223 +materi 5 75 2.639057 13.195285 221 +intellig 3 72 2.639057 7.917171 225 +meet 2 72 2.639057 5.278114 229 +html 2 75 2.639057 5.278114 235 +free 2 73 2.639057 5.278114 224 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +sieg 8 69 2.708050 21.664400 260 +thursdai 7 70 2.708050 18.956350 241 +window 4 68 2.708050 10.832200 242 +java 3 70 2.708050 8.124150 248 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +test 1 66 2.708050 2.708050 252 +receiv 1 66 2.708050 2.708050 244 +januari 13 62 2.772589 36.043657 264 +copi 9 63 2.772589 24.953301 284 +artifici 3 63 2.772589 8.317767 280 +wednesdai 2 64 2.772589 5.545178 261 +descript 2 64 2.772589 5.545178 271 +guid 1 63 2.772589 2.772589 267 +new 1 64 2.772589 2.772589 262 +import 1 65 2.772589 2.772589 282 +march 8 61 2.833213 22.665704 295 +room 5 59 2.833213 14.166065 301 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +sever 3 56 2.890372 8.671116 322 +unix 3 58 2.890372 8.671116 308 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +point 1 58 2.890372 2.890372 319 +detail 1 57 2.890372 2.890372 321 +februari 9 54 2.944439 26.499951 328 +cover 3 55 2.944439 8.833317 329 +extens 2 53 2.944439 5.888878 340 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +archiv 4 49 3.044522 12.178088 364 +standard 2 48 3.044522 6.089044 365 +visual 1 48 3.044522 3.044522 372 +format 1 48 3.044522 3.044522 356 +get 2 46 3.091042 6.182084 380 +understand 1 47 3.091042 3.091042 384 +quarter 1 47 3.091042 3.091042 389 +move 1 47 3.091042 3.091042 382 +midterm 3 45 3.135494 9.406482 392 +fridai 2 44 3.135494 6.270988 390 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +mark 1 44 3.135494 3.135494 403 +offer 2 43 3.178054 6.356108 414 +show 1 43 3.178054 3.178054 417 +examin 4 42 3.218876 12.875504 424 +review 4 42 3.218876 12.875504 425 +edit 3 42 3.218876 9.656628 418 +past 1 42 3.218876 3.218876 428 +combin 1 42 3.218876 3.218876 421 +compani 1 41 3.218876 3.218876 423 +editor 1 41 3.218876 3.218876 433 +howev 1 41 3.218876 3.218876 422 +might 1 41 3.218876 3.218876 426 +tutori 4 39 3.258097 13.032388 437 +announc 1 40 3.258097 3.258097 441 +must 1 40 3.258097 3.258097 442 +multipl 1 39 3.258097 3.258097 453 +form 1 39 3.258097 3.258097 443 +close 2 38 3.295837 6.591674 465 +credit 1 38 3.295837 3.295837 460 +cost 1 37 3.332205 3.332205 480 +winter 2 36 3.367296 6.734592 500 +download 2 36 3.367296 6.734592 489 +post 2 35 3.401197 6.802394 505 +either 1 35 3.401197 3.401197 506 +approxim 1 35 3.401197 3.401197 509 +go 1 33 3.433987 3.433987 529 +given 2 32 3.465736 6.931472 538 +chapter 1 32 3.465736 3.465736 536 +common 9 30 3.555348 31.998132 574 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +hard 1 30 3.555348 3.555348 563 +option 1 30 3.555348 3.555348 575 +turn 3 29 3.583519 10.750557 586 +steve 2 29 3.583519 7.167038 594 +particip 1 29 3.583519 3.583519 589 +limit 1 29 3.583519 3.583519 585 +except 1 28 3.610918 3.610918 607 +full 1 28 3.610918 3.610918 615 +packag 1 28 3.610918 3.610918 614 +held 1 28 3.610918 3.610918 600 +symbol 1 27 3.637586 3.637586 620 +session 4 26 3.688879 14.755516 643 +pagecs 1 26 3.688879 3.688879 658 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +altern 1 26 3.688879 3.688879 641 +although 1 25 3.737670 3.737670 667 +todai 1 25 3.737670 3.737670 672 +alwai 1 24 3.761200 3.761200 691 +interpret 1 24 3.761200 3.761200 686 +wish 1 24 3.761200 3.761200 692 +demonstr 1 24 3.761200 3.761200 694 +displai 1 23 3.806662 3.806662 712 +tent 1 22 3.850148 3.850148 739 +try 1 22 3.850148 3.850148 764 +varieti 1 22 3.850148 3.850148 740 +instead 1 22 3.850148 3.850148 756 +sent 1 22 3.850148 3.850148 763 +programminglanguag 1 21 3.912023 3.912023 782 +path 1 21 3.912023 3.912023 778 +viewer 1 21 3.912023 3.912023 787 +facil 5 20 3.951244 19.756220 814 +expert 1 20 3.951244 3.951244 833 +entir 1 20 3.951244 3.951244 811 +particularli 1 19 4.007333 4.007333 867 +lisp 22 18 4.060443 89.329746 897 +element 2 18 4.060443 8.120886 895 +seem 1 18 4.060443 4.060443 899 +macintosh 7 17 4.110874 28.776118 920 +stat 2 17 4.110874 8.221748 924 +whole 1 17 4.110874 4.110874 940 +regular 1 17 4.110874 4.110874 929 +modif 1 17 4.110874 4.110874 913 +transfer 1 16 4.174387 4.174387 967 +normal 1 16 4.174387 4.174387 995 +choic 1 16 4.174387 4.174387 979 +purchas 3 15 4.248495 12.745485 1030 +micro 1 15 4.248495 4.248495 1031 +easili 1 14 4.317488 4.317488 1077 +prolog 1 13 4.382027 4.382027 1155 +difficulti 1 13 4.382027 4.382027 1132 +emac 1 13 4.382027 4.382027 1143 +introduc 1 13 4.382027 4.382027 1139 +convert 1 13 4.382027 4.382027 1122 +host 2 11 4.553877 9.107754 1306 +transpar 1 11 4.553877 4.553877 1325 +abil 1 11 4.553877 4.553877 1341 +string 1 11 4.553877 4.553877 1340 +sens 1 11 4.553877 4.553877 1305 +tanimoto 2 10 4.653960 9.307920 1429 +bring 2 10 4.653960 9.307920 1430 +catalog 1 10 4.653960 4.653960 1431 +mainli 1 10 4.653960 4.653960 1432 +tradit 1 10 4.653960 4.653960 1404 +entitl 1 9 4.753590 4.753590 1490 +deadlin 1 9 4.753590 4.753590 1502 +attent 1 8 4.875197 4.875197 1651 +ruth 5 7 5.010635 25.053175 1870 +throughout 1 7 5.010635 5.010635 1871 +bookstor 1 7 5.010635 5.010635 1837 +fromth 1 7 5.010635 5.010635 1802 +fortun 1 7 5.010635 5.010635 1872 +remind 1 7 5.010635 5.010635 1799 +thompson 3 6 5.164786 15.494358 2049 +grammar 1 6 5.164786 5.164786 2058 +trail 1 6 5.164786 5.164786 2071 +fred 1 6 5.164786 5.164786 2072 +classroom 1 6 5.164786 5.164786 2006 +plu 1 6 5.164786 5.164786 2004 +mac 4 5 5.347108 21.388432 2292 +writeup 3 5 5.347108 16.041324 2352 +hardcopi 1 5 5.347108 5.347108 2246 +shell 1 5 5.347108 5.347108 2353 +respond 1 5 5.347108 5.347108 2354 +supplement 1 5 5.347108 5.347108 2355 +allegro 1 5 5.347108 5.347108 2314 +attract 1 5 5.347108 5.347108 2356 +freeman 1 4 5.568345 5.568345 2725 +basement 1 4 5.568345 5.568345 2663 +websit 1 4 5.568345 5.568345 2726 +andit 1 3 5.857933 5.857933 3328 +contentspag 1 3 5.857933 5.857933 3103 +orpostscript 1 3 5.857933 5.857933 3329 +mathematica 1 3 5.857933 5.857933 3303 +beginn 1 3 5.857933 5.857933 3330 +insieg 1 3 5.857933 5.857933 3331 +mscc 7 2 6.263398 43.843786 4406 +token 2 2 6.263398 12.526796 4415 +mileston 2 2 6.263398 12.526796 4416 +ofread 1 2 6.263398 6.263398 4417 +glossari 1 2 6.263398 6.263398 4418 +referenceon 1 2 6.263398 6.263398 4419 +usingcommon 1 2 6.263398 6.263398 4420 +themathemat 1 2 6.263398 6.263398 4421 +yacc 1 2 6.263398 6.263398 4422 +franz 1 2 6.263398 6.263398 4423 +thelaboratori 1 2 6.263398 6.263398 4424 +onthursdai 1 2 6.263398 6.263398 4425 +pencil 1 2 6.263398 6.263398 4426 +inthompson 2 1 6.957497 13.914994 7837 +koch 2 1 6.957497 13.914994 7838 +andersonmeet 1 1 6.957497 6.957497 7839 +andpars 1 1 6.957497 6.957497 7840 +incommon 1 1 6.957497 6.957497 7841 +purchasedsepar 1 1 6.957497 6.957497 7842 +fordigitool 1 1 6.957497 6.957497 7843 +thatmaintain 1 1 6.957497 6.957497 7844 +currentinform 1 1 6.957497 6.957497 7845 +introductionto 1 1 6.957497 6.957497 7846 +thatdoesn 1 1 6.957497 6.957497 7847 +promptli 1 1 6.957497 6.957497 7848 +theirimplement 1 1 6.957497 6.957497 7849 +buildingprogram 1 1 6.957497 6.957497 7850 +tointepret 1 1 6.957497 6.957497 7851 +alsolook 1 1 6.957497 6.957497 7852 +programmingfacil 1 1 6.957497 6.957497 7853 +thebas 1 1 6.957497 6.957497 7854 +allegrocommon 1 1 6.957497 6.957497 7855 +powerfulenviron 1 1 6.957497 6.957497 7856 +graphicsand 1 1 6.957497 6.957497 7857 +machinesof 1 1 6.957497 6.957497 7858 +theirown 1 1 6.957497 6.957497 7859 +xlisp 1 1 6.957497 6.957497 7860 +theseresourc 1 1 6.957497 6.957497 7861 +thatxlisp 1 1 6.957497 6.957497 7862 +bare 1 1 6.957497 6.957497 7863 +bone 1 1 6.957497 6.957497 7864 +nothav 1 1 6.957497 6.957497 7865 +disadvantag 1 1 6.957497 6.957497 7866 +labunless 1 1 6.957497 6.957497 7867 +fromdigitool 1 1 6.957497 6.957497 7868 +dealallow 1 1 6.957497 6.957497 7869 +lispfor 1 1 6.957497 6.957497 7870 +thistim 1 1 6.957497 6.957497 7871 +regardingread 1 1 6.957497 6.957497 7872 +printout 1 1 6.957497 6.957497 7873 +becov 1 1 6.957497 6.957497 7874 +announcedearli 1 1 6.957497 6.957497 7875 +snowflak 1 1 6.957497 6.957497 7876 +projectgener 1 1 6.957497 6.957497 7877 +aboutdemonstr 1 1 6.957497 6.957497 7878 +onmondai 1 1 6.957497 6.957497 7879 +exercisestokenizerassign 1 1 6.957497 6.957497 7880 +andpart 1 1 6.957497 6.957497 7881 +parsertokenizerpart 1 1 6.957497 6.957497 7882 +snowflakeassign 1 1 6.957497 6.957497 7883 +ondemonstr 1 1 6.957497 6.957497 7884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ new file mode 100644 index 00000000..631e8d31 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^415^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 7 374 0.693147 4.852029 7 +inform 4 412 0.693147 2.772588 8 +offic 2 299 1.098612 2.197224 13 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +hour 3 165 1.791759 5.375277 46 +assign 8 135 1.945910 15.567280 66 +first 2 140 1.945910 3.891820 71 +professor 1 137 1.945910 1.945910 76 +note 1 142 1.945910 1.945910 67 +introduct 4 126 2.079442 8.317768 87 +postscript 3 131 2.079442 6.238326 90 +spring 1 131 2.079442 2.079442 88 +final 2 116 2.197225 4.394450 108 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +solut 3 82 2.484907 7.454721 162 +second 3 81 2.484907 7.454721 166 +exam 1 86 2.484907 2.484907 169 +april 4 77 2.564949 10.259796 196 +mondai 2 77 2.564949 5.129898 206 +homework 2 79 2.564949 5.129898 193 +june 1 79 2.564949 2.564949 214 +interfac 1 79 2.564949 2.564949 209 +intellig 2 72 2.639057 5.278114 225 +thursdai 1 70 2.708050 2.708050 241 +artifici 3 63 2.772589 8.317767 280 +type 1 61 2.833213 2.833213 296 +instruct 2 53 2.944439 5.888878 332 +sampl 1 53 2.944439 2.944439 339 +basic 1 50 3.044522 3.044522 360 +third 2 43 3.178054 6.356108 412 +download 1 36 3.367296 3.367296 489 +common 1 30 3.555348 3.555348 574 +lisp 1 18 4.060443 4.060443 897 +record 1 18 4.060443 4.060443 890 +outlin 1 17 4.110874 4.110874 914 +macintosh 1 17 4.110874 4.110874 920 +emac 2 13 4.382027 8.764054 1143 +rich 1 10 4.653960 4.653960 1396 +noon 1 7 5.010635 5.010635 1804 +thompson 1 6 5.164786 5.164786 2049 +allegro 2 5 5.347108 10.694216 2314 +gentl 1 5 5.347108 5.347108 2264 +csoffic 1 4 5.568345 5.568345 2727 +knight 1 4 5.568345 5.568345 2728 +turnin 1 4 5.568345 5.568345 2654 +holden 2 3 5.857933 11.715866 3314 +redston 2 3 5.857933 11.715866 3332 +alistair 1 3 5.857933 5.857933 3315 +joshua 1 3 5.857933 5.857933 3333 +noonta 1 2 6.263398 6.263398 4427 +secondedit 1 2 6.263398 6.263398 4096 +touretzki 1 2 6.263398 6.263398 4428 +refcard 2 1 6.957497 13.914994 7885 +intelligencecs 1 1 6.957497 6.957497 7886 +msoffic 1 1 6.957497 6.957497 7887 +symboliccomput 1 1 6.957497 6.957497 7888 +emacsinterfac 1 1 6.957497 6.957497 7889 +standalonelisp 1 1 6.957497 6.957497 7890 +gradesredston 1 1 6.957497 6.957497 7891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ new file mode 100644 index 00000000..9415466f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^421^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +last 2 314 1.098612 2.197224 14 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +updat 2 191 1.609438 3.218876 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +algorithm 1 162 1.791759 1.791759 57 +read 1 154 1.791759 1.791759 47 +file 2 132 1.945910 3.891820 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +postscript 3 131 2.079442 6.238326 90 +provid 1 121 2.079442 2.079442 94 +check 1 115 2.197225 2.197225 118 +text 2 98 2.302585 4.605170 133 +book 1 99 2.302585 2.302585 131 +solut 3 82 2.484907 7.454721 162 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +free 2 73 2.639057 5.278114 224 +materi 1 75 2.639057 2.639057 221 +syllabu 3 67 2.708050 8.124150 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +previou 1 62 2.772589 2.772589 290 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +three 1 54 2.944439 2.944439 330 +format 4 48 3.044522 12.178088 356 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +math 1 44 3.135494 3.135494 402 +winter 3 36 3.367296 10.101888 500 +print 1 34 3.401197 3.401197 503 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +viewer 2 21 3.912023 7.824046 787 +martin 1 21 3.912023 3.912023 794 +latest 1 21 3.912023 3.912023 785 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +render 1 17 4.110874 4.110874 947 +ascii 1 15 4.248495 4.248495 1032 +latex 3 14 4.317488 12.952464 1064 +command 1 14 4.317488 4.317488 1083 +larri 1 13 4.382027 4.382027 1142 +karlin 1 13 4.382027 4.382027 1176 +web 1 12 4.465908 4.465908 1249 +errata 1 10 4.653960 4.653960 1403 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 2 7 5.010635 10.021270 1866 +adob 2 7 5.010635 10.021270 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 5 6 5.164786 25.823930 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 2 5 5.347108 10.694216 2345 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +tompaclass 1 3 5.857933 5.857933 3310 +tompa 1 3 5.857933 5.857933 3305 +aberman 1 2 6.263398 6.263398 4429 +midtem 3 1 6.957497 20.872491 7892 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ new file mode 100644 index 00000000..abc3f43d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^431^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +time 2 293 1.098612 2.197224 17 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +washington 3 236 1.386294 4.158882 32 +gener 2 220 1.386294 2.772588 27 +mail 2 238 1.386294 2.772588 22 +class 2 199 1.609438 3.218876 37 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +hour 2 165 1.791759 3.583518 46 +read 1 154 1.791759 1.791759 47 +problem 2 147 1.945910 3.891820 75 +file 2 132 1.945910 3.891820 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +spring 4 131 2.079442 8.317768 88 +postscript 2 131 2.079442 4.158884 90 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +final 6 116 2.197225 13.183350 108 +theori 2 111 2.197225 4.394450 127 +instructor 2 108 2.197225 4.394450 107 +text 1 98 2.302585 2.302585 133 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +free 2 73 2.639057 5.278114 224 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +syllabu 3 67 2.708050 8.124150 247 +sieg 2 69 2.708050 5.416100 260 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +three 1 54 2.944439 2.944439 330 +format 4 48 3.044522 12.178088 356 +midterm 6 45 3.135494 18.812964 392 +textbook 1 44 3.135494 3.135494 397 +math 1 44 3.135494 3.135494 402 +print 1 34 3.401197 3.401197 503 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +tent 1 22 3.850148 3.850148 739 +sent 1 22 3.850148 3.850148 763 +viewer 2 21 3.912023 7.824046 787 +latest 1 21 3.912023 3.912023 785 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +render 1 17 4.110874 4.110874 947 +ascii 1 15 4.248495 4.248495 1032 +latex 2 14 4.317488 8.634976 1064 +command 1 14 4.317488 4.317488 1083 +everyth 4 13 4.382027 17.528108 1169 +larri 2 13 4.382027 8.764054 1142 +web 1 12 4.465908 4.465908 1249 +errata 1 10 4.653960 4.653960 1403 +admin 1 9 4.753590 4.753590 1476 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +legibl 2 7 5.010635 10.021270 1866 +adob 2 7 5.010635 10.021270 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 4 6 5.164786 20.659144 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 2 5 5.347108 10.694216 2345 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +sendmail 1 3 5.857933 5.857933 3099 +jayram 3 1 6.957497 20.872491 7893 +thathachar 1 1 6.957497 6.957497 7894 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ new file mode 100644 index 00000000..ce64f5d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^444^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +offic 3 299 1.098612 3.295836 13 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +link 3 247 1.386294 4.158882 24 +washington 1 236 1.386294 1.386294 32 +hour 2 165 1.791759 3.583518 46 +assign 1 135 1.945910 1.945910 66 +click 1 142 1.945910 1.945910 78 +databas 3 122 2.079442 6.238326 86 +introduct 2 126 2.079442 4.158884 87 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +imag 2 91 2.397895 4.795790 161 +homework 1 79 2.564949 2.564949 193 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +sieg 2 69 2.708050 5.416100 260 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +telephon 1 50 3.044522 3.044522 373 +quarter 1 47 3.091042 3.091042 389 +announc 1 40 3.258097 3.258097 441 +probabl 1 40 3.258097 3.258097 455 +word 1 34 3.401197 3.401197 508 +queri 1 33 3.433987 3.433987 524 +request 1 26 3.688879 3.688879 635 +left 1 19 4.007333 4.007333 851 +save 1 14 4.317488 4.317488 1099 +dbm 1 13 4.382027 4.382027 1136 +linda 1 10 4.653960 4.653960 1394 +shapiro 2 8 4.875197 9.750394 1686 +potenti 1 8 4.875197 4.875197 1690 +shift 1 5 5.347108 5.347108 2357 +systemsfal 1 4 5.568345 5.568345 2683 +patrick 1 3 5.857933 5.857933 3334 +qbic 1 3 5.857933 5.857933 3294 +systemscs 1 1 6.957497 6.957497 7895 +crowlei 1 1 6.957497 6.957497 7896 +pcrowlei 1 1 6.957497 6.957497 7897 +unisql 1 1 6.957497 6.957497 7898 +webcs 1 1 6.957497 6.957497 7899 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ new file mode 100644 index 00000000..62e9c9ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^451^CurrentQuarter^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 6 340 1.098612 6.591672 18 +cours 3 273 1.098612 3.295836 15 +offic 2 299 1.098612 2.197224 13 +washington 3 236 1.386294 4.158882 32 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +lectur 4 135 1.945910 7.783640 73 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +click 1 142 1.945910 1.945910 78 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +instructor 3 108 2.197225 6.591675 107 +person 2 111 2.197225 4.394450 117 +send 1 114 2.197225 2.197225 109 +section 4 94 2.397895 9.591580 149 +grade 1 90 2.397895 2.397895 142 +solut 3 82 2.484907 7.454721 162 +info 1 85 2.484907 2.484907 176 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +materi 2 75 2.639057 5.278114 221 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +handout 1 64 2.772589 2.772589 263 +space 1 57 2.890372 2.890372 310 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +archiv 1 49 3.044522 3.044522 364 +midterm 2 45 3.135494 6.270988 392 +textbook 1 44 3.135494 3.135494 397 +brian 2 38 3.295837 6.591674 466 +slide 1 38 3.295837 3.295837 467 +word 1 34 3.401197 3.401197 508 +autumn 1 31 3.496508 3.496508 558 +scale 1 28 3.610918 3.610918 613 +actual 1 28 3.610918 3.610918 604 +administr 1 27 3.637586 3.637586 628 +wish 1 24 3.761200 3.761200 692 +sent 1 22 3.850148 3.850148 763 +feedback 3 19 4.007333 12.021999 854 +bershad 2 18 4.060443 8.120886 902 +intro 1 17 4.110874 4.110874 915 +outlin 1 17 4.110874 4.110874 914 +reflect 1 15 4.248495 4.248495 1034 +webmast 1 15 4.248495 4.248495 1045 +anonym 1 14 4.317488 4.317488 1100 +regularli 1 11 4.553877 4.553877 1338 +hint 1 10 4.653960 4.653960 1419 +sung 1 6 5.164786 5.164786 2075 +pace 1 6 5.164786 5.164786 2011 +carefulli 1 6 5.164786 5.164786 2045 +lost 1 5 5.347108 5.347108 2358 +choi 1 4 5.568345 5.568345 2732 +vital 1 4 5.568345 5.568345 2733 +surviv 1 4 5.568345 5.568345 2734 +aggress 1 3 5.857933 5.857933 3240 +andwil 1 3 5.857933 5.857933 3335 +wisdom 1 2 6.263398 6.263398 4430 +schedulewhat 1 2 6.263398 6.263398 4139 +adminth 1 1 6.957497 6.957497 7900 +andoth 1 1 6.957497 6.957497 7901 +projectsdescript 1 1 6.957497 6.957497 7902 +solutionsto 1 1 6.957497 6.957497 7903 +notesnot 1 1 6.957497 6.957497 7904 +watchthi 1 1 6.957497 6.957497 7905 +andgrad 1 1 6.957497 6.957497 7906 +onproject 1 1 6.957497 6.957497 7907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ new file mode 100644 index 00000000..f8eec3a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^457^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 6 672 0.000000 0.000000 1 +page 5 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 2 273 1.098612 2.197224 15 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 1 199 1.609438 1.609438 37 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +document 2 121 2.079442 4.158884 89 +spring 2 131 2.079442 4.158884 88 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +grade 1 90 2.397895 2.397895 142 +help 2 83 2.484907 4.969814 175 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +exampl 1 77 2.564949 2.564949 195 +write 1 72 2.639057 2.639057 222 +degre 2 69 2.708050 5.416100 259 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +instruct 1 53 2.944439 2.944439 332 +frequent 1 49 3.044522 3.044522 367 +visitor 1 49 3.044522 3.044522 371 +quarter 4 47 3.091042 12.364168 389 +netscap 2 44 3.135494 6.270988 395 +mean 1 37 3.332205 3.332205 477 +winter 1 36 3.367296 3.367296 500 +ad 1 32 3.465736 3.465736 544 +autumn 2 31 3.496508 6.993016 558 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +hypermedia 1 12 4.465908 4.465908 1247 +baker 1 7 5.010635 5.010635 1812 +silicon 1 6 5.164786 5.164786 2076 +opengl 1 5 5.347108 5.347108 2299 +bounti 1 4 5.568345 5.568345 2715 +pighin 1 4 5.568345 5.568345 2735 +assignmentshomework 1 4 5.568345 5.568345 2721 +thisdocu 1 3 5.857933 5.857933 3336 +quarterwelcom 1 2 6.263398 6.263398 4378 +indi 1 2 6.263398 6.263398 4431 +keepin 1 1 6.957497 6.957497 7908 +informationwil 1 1 6.957497 6.957497 7909 +classpersonnel 1 1 6.957497 6.957497 7910 +syllabuscours 1 1 6.957497 6.957497 7911 +calendarta 1 1 6.957497 6.957497 7912 +hourshandout 1 1 6.957497 6.957497 7913 +assignmentslectur 1 1 6.957497 6.957497 7914 +notesread 1 1 6.957497 6.957497 7915 +assignmentsprojectsproject 1 1 6.957497 6.957497 7916 +handoutsproject 1 1 6.957497 6.957497 7917 +artifactsproject 1 1 6.957497 6.957497 7918 +sessionsproject 1 1 6.957497 6.957497 7919 +policyproject 1 1 6.957497 6.957497 7920 +upslibui 1 1 6.957497 6.957497 7921 +documentationoth 1 1 6.957497 6.957497 7922 +informationget 1 1 6.957497 6.957497 7923 +classhearn 1 1 6.957497 6.957497 7924 +erratath 1 1 6.957497 6.957497 7925 +labus 1 1 6.957497 6.957497 7926 +pagegraph 1 1 6.957497 6.957497 7927 +linkssgi 1 1 6.957497 6.957497 7928 +surfgrafica 1 1 6.957497 6.957497 7929 +obscurasiggraphgrailgraph 1 1 6.957497 6.957497 7930 +indexoth 1 1 6.957497 6.957497 7931 +linksmvi 1 1 6.957497 6.957497 7932 +departmentth 1 1 6.957497 6.957497 7933 +programth 1 1 6.957497 6.957497 7934 +programweb 1 1 6.957497 6.957497 7935 +helpbas 1 1 6.957497 6.957497 7936 +helpmosa 1 1 6.957497 6.957497 7937 +lynxus 1 1 6.957497 6.957497 7938 +indyspighin 1 1 6.957497 6.957497 7939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ new file mode 100644 index 00000000..50a7fdb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 5 299 1.098612 5.493060 13 +student 2 343 1.098612 2.197224 19 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +hour 4 165 1.791759 7.167036 46 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +document 3 121 2.079442 6.238326 89 +provid 2 121 2.079442 4.158884 94 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +html 4 75 2.639057 10.556228 235 +servic 2 72 2.639057 5.278114 236 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +sieg 1 69 2.708050 2.708050 260 +would 1 67 2.708050 2.708050 251 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +format 5 48 3.044522 15.222610 356 +frequent 1 49 3.044522 3.044522 367 +discuss 1 45 3.135494 3.135494 399 +math 1 44 3.135494 3.135494 402 +might 1 41 3.218876 3.218876 426 +origin 1 38 3.295837 3.295837 472 +everi 1 34 3.401197 3.401197 519 +autumn 1 31 3.496508 3.496508 558 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +request 1 26 3.688879 3.688879 635 +consult 1 24 3.761200 3.761200 687 +handl 1 24 3.761200 3.761200 685 +thu 1 21 3.912023 3.912023 773 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +letter 2 16 4.174387 8.348774 981 +indic 1 15 4.248495 4.248495 1013 +ascii 1 15 4.248495 4.248495 1032 +latex 2 14 4.317488 8.634976 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +larger 1 7 5.010635 5.010635 1875 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +latexhtml 1 5 5.347108 5.347108 2347 +arun 1 4 5.568345 5.568345 2736 +csoffic 1 4 5.568345 5.568345 2727 +accommod 3 3 5.857933 17.573799 3337 +somani 3 2 6.263398 18.790194 4432 +cslectur 1 2 6.263398 6.263398 4433 +havea 1 2 6.263398 6.263398 4434 +disabl 4 1 6.957497 27.829988 7940 +jari 3 1 6.957497 20.872491 7941 +networksautumn 1 1 6.957497 6.957497 7942 +eebphon 1 1 6.957497 6.957497 7943 +kristensen 1 1 6.957497 6.957497 7944 +tomatch 1 1 6.957497 6.957497 7945 +andprovid 1 1 6.957497 6.957497 7946 +timewindow 1 1 6.957497 6.957497 7947 +overheadshomeworksprojectsinterest 1 1 6.957497 6.957497 7948 +stuffattentionif 1 1 6.957497 6.957497 7949 +pleasecontact 1 1 6.957497 6.957497 7950 +schmitz 1 1 6.957497 6.957497 7951 +requiresacadem 1 1 6.957497 6.957497 7952 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ new file mode 100644 index 00000000..2e6c8f20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^461^Sp96^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +document 3 121 2.079442 6.238326 89 +provid 2 121 2.079442 4.158884 94 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +text 1 98 2.302585 2.302585 133 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +html 4 75 2.639057 10.556228 235 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +sieg 4 69 2.708050 10.832200 260 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +simpl 1 60 2.833213 2.833213 298 +browser 1 56 2.890372 2.890372 313 +three 1 54 2.944439 2.944439 330 +format 5 48 3.044522 15.222610 356 +frequent 1 49 3.044522 3.044522 367 +math 1 44 3.135494 3.135494 402 +origin 1 38 3.295837 3.295837 472 +richard 1 31 3.496508 3.496508 559 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +handl 1 24 3.761200 3.761200 685 +william 1 22 3.850148 3.850148 765 +fact 1 21 3.912023 3.912023 780 +viewer 1 21 3.912023 3.912023 787 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +intro 1 17 4.110874 4.110874 915 +ascii 1 15 4.248495 4.248495 1032 +latex 2 14 4.317488 8.634976 1064 +command 1 14 4.317488 4.317488 1083 +convert 1 13 4.382027 4.382027 1122 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +chan 1 7 5.010635 5.010635 1876 +legibl 1 7 5.010635 5.010635 1866 +ghostscript 1 7 5.010635 5.010635 1867 +ladner 3 6 5.164786 15.494358 2062 +markup 1 6 5.164786 5.164786 2059 +strang 1 6 5.164786 5.164786 2064 +latexhtml 1 5 5.347108 5.347108 2347 +csoffic 1 4 5.568345 5.568345 2727 +wchan 1 3 5.857933 5.857933 3338 +cslectur 1 2 6.263398 6.263398 4433 +noonta 1 2 6.263398 6.263398 4427 +eduwchan 1 2 6.263398 6.263398 4435 +networksspr 1 1 6.957497 6.957497 7953 +overheadshomeworksprojectsabout 1 1 6.957497 6.957497 7954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ new file mode 100644 index 00000000..1c28a184 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +fall 2 181 1.609438 3.218876 40 +advanc 1 99 2.302585 2.302585 130 +found 1 53 2.944439 2.944439 337 +digit 1 52 2.995732 2.995732 348 +pagecs 1 26 3.688879 3.688879 658 +designt 1 2 6.263398 6.263398 4436 +kehl 1 2 6.263398 6.263398 4437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ new file mode 100644 index 00000000..754029d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^467^Fall96^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +data 4 170 1.791759 7.167036 49 +hour 2 165 1.791759 3.583518 46 +recent 1 167 1.791759 1.791759 58 +assign 1 135 1.945910 1.945910 66 +welcom 1 122 2.079442 2.079442 99 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +final 2 116 2.197225 4.394450 108 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +book 2 99 2.302585 4.605170 131 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +resourc 1 81 2.484907 2.484907 172 +academ 1 82 2.484907 2.484907 178 +homework 3 79 2.564949 7.694847 193 +server 2 76 2.564949 5.129898 204 +state 1 76 2.564949 2.564949 207 +sourc 1 77 2.564949 2.564949 201 +logic 4 71 2.639057 10.556228 230 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +test 1 66 2.708050 2.708050 252 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +collect 1 65 2.772589 2.772589 268 +march 3 61 2.833213 8.499639 295 +februari 2 54 2.944439 5.888878 328 +sampl 2 53 2.944439 5.888878 339 +cover 1 55 2.944439 2.944439 329 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +adapt 1 46 3.091042 3.091042 387 +fridai 2 44 3.135494 6.270988 390 +mark 1 44 3.135494 3.135494 403 +combin 2 42 3.218876 6.437752 421 +review 1 42 3.218876 3.218876 425 +announc 1 40 3.258097 3.258097 441 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +staff 1 36 3.367296 3.367296 490 +copyright 1 36 3.367296 3.367296 495 +richard 1 31 3.496508 3.496508 559 +option 2 30 3.555348 7.110696 575 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +sequenti 2 22 3.850148 7.700296 745 +vlsi 1 21 3.912023 3.912023 795 +thur 1 19 4.007333 4.007333 847 +scott 1 18 4.060443 4.060443 884 +dilbert 1 16 4.174387 4.174387 996 +sheet 1 16 4.174387 4.174387 973 +portion 1 16 4.174387 4.174387 971 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +johnson 1 13 4.382027 4.382027 1162 +care 1 13 4.382027 4.382027 1177 +duli 1 12 4.465908 4.465908 1248 +tue 1 11 4.553877 4.553877 1308 +stephen 1 11 4.553877 4.553877 1342 +nonprofit 1 11 4.553877 4.553877 1339 +fpga 2 10 4.653960 9.307920 1433 +motorola 1 9 4.753590 4.753590 1546 +gaetano 1 6 5.164786 5.164786 2068 +philip 1 6 5.164786 5.164786 2005 +writeup 1 5 5.347108 5.347108 2352 +borriello 1 5 5.347108 5.347108 2349 +midnight 2 4 5.568345 11.136690 2599 +murphi 1 4 5.568345 5.568345 2737 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +micron 1 3 5.857933 5.857933 3341 +kehl 2 2 6.263398 12.526796 4437 +designt 1 2 6.263398 6.263398 4436 +aaron 1 2 6.263398 6.263398 4438 +comprehensivelist 1 2 6.263398 6.263398 4439 +icmanufactur 1 2 6.263398 6.263398 4440 +optionlab 2 1 6.957497 13.914994 7955 +syllabusschedul 1 1 6.957497 6.957497 7956 +savoi 1 1 6.957497 6.957497 7957 +savac 1 1 6.957497 6.957497 7958 +chinn 1 1 6.957497 6.957497 7959 +richin 1 1 6.957497 6.957497 7960 +howard 1 1 6.957497 6.957497 7961 +shchang 1 1 6.957497 6.957497 7962 +csjason 1 1 6.957497 6.957497 7963 +quarterhomework 1 1 6.957497 6.957497 7964 +assignmentsweb 1 1 6.957497 6.957497 7965 +duehomework 1 1 6.957497 6.957497 7966 +abel 1 1 6.957497 6.957497 7967 +fixtur 1 1 6.957497 6.957497 7968 +communicationoth 1 1 6.957497 6.957497 7969 +sheetsth 1 1 6.957497 6.957497 7970 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ new file mode 100644 index 00000000..396cf244 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^471^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 4 374 0.693147 2.772588 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +offic 2 299 1.098612 2.197224 13 +design 7 213 1.386294 9.704058 25 +mail 2 238 1.386294 2.772588 22 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +fall 2 181 1.609438 3.218876 40 +class 1 199 1.609438 1.609438 37 +read 10 154 1.791759 17.917590 47 +address 3 170 1.791759 5.375277 62 +hour 2 165 1.791759 3.583518 46 +data 2 170 1.791759 3.583518 49 +avail 1 169 1.791759 1.791759 48 +model 1 145 1.945910 1.945910 69 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +postscript 30 131 2.079442 62.383260 90 +instructor 1 108 2.197225 2.197225 107 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +memori 1 101 2.302585 2.302585 139 +follow 1 92 2.397895 2.397895 143 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +mondai 11 77 2.564949 28.214439 206 +homework 5 79 2.564949 12.824745 193 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +free 1 73 2.639057 2.639057 224 +sieg 3 69 2.708050 8.124150 260 +simul 2 66 2.708050 5.416100 255 +test 1 66 2.708050 2.708050 252 +wednesdai 11 64 2.772589 30.498479 261 +organ 2 65 2.772589 5.545178 265 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +previou 1 62 2.772589 2.772589 290 +simpl 1 60 2.833213 2.833213 298 +sampl 3 53 2.944439 8.833317 339 +instruct 2 53 2.944439 5.888878 332 +hardwar 1 51 2.995732 2.995732 350 +appoint 1 49 3.044522 3.044522 358 +quarter 1 47 3.091042 3.091042 389 +fridai 10 44 3.135494 31.354940 390 +answer 3 45 3.135494 9.406482 391 +midterm 1 45 3.135494 3.135494 392 +review 3 42 3.218876 9.656628 425 +fast 1 42 3.218876 3.218876 429 +form 1 39 3.258097 3.258097 443 +robert 1 30 3.555348 3.555348 567 +compon 1 30 3.555348 3.555348 570 +common 1 30 3.555348 3.555348 574 +revis 1 26 3.688879 3.688879 640 +hierarchi 1 22 3.850148 3.850148 744 +color 1 22 3.850148 3.850148 762 +unit 1 21 3.912023 3.912023 779 +binari 2 20 3.951244 7.902488 823 +prerequisit 1 19 4.007333 4.007333 846 +segment 2 17 4.110874 8.221748 931 +regist 1 17 4.110874 4.110874 938 +interconnect 1 17 4.110874 4.110874 937 +sheet 2 16 4.174387 8.348774 973 +transfer 1 16 4.174387 4.174387 967 +larri 1 13 4.382027 4.382027 1142 +assembl 3 12 4.465908 13.397724 1207 +holidai 2 12 4.465908 8.931816 1224 +loew 1 12 4.465908 4.465908 1252 +catalog 1 10 4.653960 4.653960 1431 +arithmet 1 10 4.653960 4.653960 1388 +modul 1 10 4.653960 4.653960 1434 +card 1 10 4.653960 4.653960 1435 +watson 1 8 4.875197 4.875197 1691 +pipelin 4 7 5.010635 20.042540 1830 +snyder 2 5 5.347108 10.694216 2359 +mip 2 4 5.568345 11.136690 2738 +microprogram 1 4 5.568345 5.568345 2604 +appendix 1 4 5.568345 5.568345 2739 +prog 1 4 5.568345 5.568345 2740 +verilog 4 2 6.263398 25.053592 4441 +judi 1 2 6.263398 6.263398 4442 +andorgan 1 2 6.263398 6.263398 4443 +skim 2 1 6.957497 13.914994 7971 +jwatson 1 1 6.957497 6.957497 7972 +chenoffic 1 1 6.957497 6.957497 7973 +thursdays 1 1 6.957497 6.957497 7974 +chensg 1 1 6.957497 6.957497 7975 +laboratoryproject 1 1 6.957497 6.957497 7976 +setprocessor 1 1 6.957497 6.957497 7977 +chap 1 1 6.957497 6.957497 7978 +referencesthi 1 1 6.957497 6.957497 7979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ new file mode 100644 index 00000000..0ac7731e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^473^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 4 374 0.693147 2.772588 7 +system 3 443 0.693147 2.079441 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +cours 8 273 1.098612 8.788896 15 +us 5 329 1.098612 5.493060 16 +project 5 340 1.098612 5.493060 18 +offic 2 299 1.098612 2.197224 13 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +mail 4 238 1.386294 5.545176 22 +washington 2 236 1.386294 2.772588 32 +languag 2 227 1.386294 2.772588 26 +link 2 247 1.386294 2.772588 24 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +class 11 199 1.609438 17.703818 37 +list 2 201 1.609438 3.218876 39 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +hour 2 165 1.791759 3.583518 46 +implement 2 152 1.791759 3.583518 52 +read 2 154 1.791759 3.583518 47 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +assign 8 135 1.945910 15.567280 66 +hall 2 146 1.945910 3.891820 65 +lectur 2 135 1.945910 3.891820 73 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +schedul 4 119 2.079442 8.317768 85 +spring 2 131 2.079442 4.158884 88 +machin 2 129 2.079442 4.158884 95 +introduct 1 126 2.079442 2.079442 87 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +final 7 116 2.197225 15.380575 108 +topic 3 114 2.197225 6.591675 110 +send 2 114 2.197225 4.394450 109 +mathemat 2 108 2.197225 4.394450 123 +theori 2 111 2.197225 4.394450 127 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +book 4 99 2.302585 9.210340 131 +access 3 102 2.302585 6.907755 136 +part 2 98 2.302585 4.605170 129 +text 1 98 2.302585 2.302585 133 +take 1 97 2.302585 2.302585 134 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +search 2 95 2.397895 4.795790 155 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +exam 6 86 2.484907 14.909442 169 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +solut 1 82 2.484907 2.484907 162 +april 9 77 2.564949 23.084541 196 +mondai 6 77 2.564949 15.389694 206 +interfac 4 79 2.564949 10.259796 209 +messag 2 76 2.564949 5.129898 212 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +know 1 80 2.564949 2.564949 198 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +intellig 4 72 2.639057 10.556228 225 +tuesdai 4 73 2.639057 10.556228 219 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +meet 1 72 2.639057 2.639057 229 +sieg 7 69 2.708050 18.956350 260 +test 3 66 2.708050 8.124150 252 +window 2 68 2.708050 5.416100 242 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +wednesdai 5 64 2.772589 13.862945 261 +artifici 4 63 2.772589 11.090356 280 +creat 3 63 2.772589 8.317767 277 +new 2 64 2.772589 5.545178 262 +laboratori 1 63 2.772589 2.772589 292 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +evalu 1 64 2.772589 2.772589 266 +march 3 61 2.833213 8.499639 295 +room 2 59 2.833213 5.666426 301 +best 1 59 2.833213 2.833213 299 +reason 2 57 2.890372 5.780744 318 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +featur 1 46 3.091042 3.091042 386 +understand 1 47 3.091042 3.091042 384 +move 1 47 3.091042 3.091042 382 +fridai 6 44 3.135494 18.812964 390 +midterm 3 45 3.135494 9.406482 392 +mark 2 44 3.135494 6.270988 403 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +term 2 43 3.178054 6.356108 411 +edit 3 42 3.218876 9.656628 418 +examin 3 42 3.218876 9.656628 424 +review 3 42 3.218876 9.656628 425 +vision 1 41 3.218876 3.218876 430 +form 2 39 3.258097 6.516194 443 +multipl 1 39 3.258097 3.258097 453 +announc 1 40 3.258097 3.258097 441 +continu 1 39 3.258097 3.258097 448 +credit 1 38 3.295837 3.295837 460 +close 1 38 3.295837 3.295837 465 +download 1 36 3.367296 3.367296 489 +staff 1 36 3.367296 3.367296 490 +post 4 35 3.401197 13.604788 505 +represent 2 35 3.401197 6.802394 512 +least 1 35 3.401197 3.401197 516 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +kind 1 32 3.465736 3.465736 541 +given 1 32 3.465736 3.465736 538 +common 5 30 3.555348 17.776740 574 +neural 2 30 3.555348 7.110696 578 +hard 1 30 3.555348 3.555348 563 +option 1 30 3.555348 3.555348 575 +steve 2 29 3.583519 7.167038 594 +particip 1 29 3.583519 3.583519 589 +turn 1 29 3.583519 3.583519 586 +held 1 28 3.610918 3.610918 600 +propos 1 28 3.610918 3.610918 602 +progress 1 28 3.610918 3.610918 598 +session 2 26 3.688879 7.377758 643 +pagecs 1 26 3.688879 3.688879 658 +rather 1 26 3.688879 3.688879 642 +demonstr 1 24 3.761200 3.761200 694 +tent 1 22 3.850148 3.850148 739 +self 1 22 3.850148 3.850148 761 +try 1 22 3.850148 3.850148 764 +newsgroup 8 21 3.912023 31.296184 783 +expert 2 20 3.951244 7.902488 833 +entir 1 20 3.951244 3.951244 811 +facil 1 20 3.951244 3.951244 814 +separ 3 19 4.007333 12.021999 844 +excel 1 19 4.007333 4.007333 868 +exercis 1 19 4.007333 4.007333 842 +lisp 16 18 4.060443 64.967088 897 +element 2 18 4.060443 8.120886 895 +demo 2 18 4.060443 8.120886 888 +seem 1 18 4.060443 4.060443 899 +statu 1 18 4.060443 4.060443 885 +regular 1 17 4.110874 4.110874 929 +advantag 1 16 4.174387 4.174387 987 +choic 1 16 4.174387 4.174387 979 +explan 1 16 4.174387 4.174387 985 +portion 1 16 4.174387 4.174387 971 +purchas 2 15 4.248495 8.496990 1030 +workload 1 12 4.465908 4.465908 1210 +rest 1 12 4.465908 4.465908 1259 +holidai 1 12 4.465908 4.465908 1224 +sens 2 11 4.553877 9.107754 1305 +probabilist 1 11 4.553877 4.553877 1343 +tanimoto 2 10 4.653960 9.307920 1429 +bring 2 10 4.653960 9.307920 1430 +preliminari 2 9 4.753590 9.507180 1480 +implementationof 1 7 5.010635 5.010635 1813 +reduct 1 7 5.010635 5.010635 1877 +pentium 1 6 5.164786 5.164786 2077 +approv 1 6 5.164786 5.164786 2078 +jeremi 2 5 5.347108 10.694216 2360 +allegro 2 5 5.347108 10.694216 2314 +hardcopi 1 5 5.347108 5.347108 2246 +forprogram 1 5 5.347108 5.347108 2361 +attract 1 5 5.347108 5.347108 2356 +net 2 4 5.568345 11.136690 2741 +peer 2 4 5.568345 11.136690 2742 +freeman 1 4 5.568345 5.568345 2725 +screenshot 1 4 5.568345 5.568345 2743 +andit 1 3 5.857933 5.857933 3328 +contentspag 1 3 5.857933 5.857933 3103 +orpostscript 1 3 5.857933 5.857933 3329 +programmingtechniqu 1 3 5.857933 5.857933 3113 +insieg 1 3 5.857933 5.857933 3331 +evaluationof 1 3 5.857933 5.857933 3192 +assignmentsassign 1 3 5.857933 5.857933 3342 +youdon 1 2 6.263398 6.263398 4444 +referenceon 1 2 6.263398 6.263398 4419 +usingcommon 1 2 6.263398 6.263398 4420 +franz 1 2 6.263398 6.263398 4423 +inour 1 2 6.263398 6.263398 4445 +ofproject 1 2 6.263398 6.263398 4446 +csor 2 1 6.957497 13.914994 7980 +pnew 2 1 6.957497 13.914994 7981 +baermeet 1 1 6.957497 6.957497 7982 +windowsimplement 1 1 6.957497 6.957497 7983 +programdevelop 1 1 6.957497 6.957497 7984 +theintel 1 1 6.957497 6.957497 7985 +isfor 1 1 6.957497 6.957497 7986 +bedownload 1 1 6.957497 6.957497 7987 +givenaccord 1 1 6.957497 6.957497 7988 +alist 1 1 6.957497 6.957497 7989 +coversboth 1 1 6.957497 6.957497 7990 +logicalreason 1 1 6.957497 6.957497 7991 +clo 1 1 6.957497 6.957497 7992 +programmingpart 1 1 6.957497 6.957497 7993 +ofhow 1 1 6.957497 6.957497 7994 +circul 1 1 6.957497 6.957497 7995 +orturn 1 1 6.957497 6.957497 7996 +wrap 1 1 6.957497 6.957497 7997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ new file mode 100644 index 00000000..a402967b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^477^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +offic 1 299 1.098612 1.098612 13 +washington 2 236 1.386294 2.772588 32 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +data 3 170 1.791759 5.375277 49 +hour 2 165 1.791759 3.583518 46 +recent 1 167 1.791759 1.791759 58 +assign 1 135 1.945910 1.945910 66 +area 1 144 1.945910 1.945910 80 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +manag 1 114 2.197225 2.197225 125 +book 2 99 2.302585 4.605170 131 +info 1 85 2.484907 2.484907 176 +resourc 1 81 2.484907 2.484907 172 +academ 1 82 2.484907 2.484907 178 +sourc 2 77 2.564949 5.129898 201 +server 1 76 2.564949 2.564949 204 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +syllabu 1 67 2.708050 2.708050 247 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +laboratori 1 63 2.772589 2.772589 292 +collect 1 65 2.772589 2.772589 268 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +adapt 1 46 3.091042 3.091042 387 +societi 2 40 3.258097 6.516194 456 +announc 1 40 3.258097 3.258097 441 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +robot 2 36 3.367296 6.734592 497 +staff 1 36 3.367296 3.367296 490 +copyright 1 36 3.367296 3.367296 495 +steve 1 29 3.583519 3.583519 594 +quot 1 29 3.583519 3.583519 582 +pagecs 1 26 3.688879 3.688879 658 +accur 1 25 3.737670 3.737670 680 +smith 1 20 3.951244 3.951244 820 +nice 1 20 3.951244 3.951244 809 +anderson 1 19 4.007333 4.007333 860 +dilbert 1 16 4.174387 4.174387 996 +sheet 1 16 4.174387 4.174387 973 +portion 1 16 4.174387 4.174387 971 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +care 1 13 4.382027 4.382027 1177 +loew 1 12 4.465908 4.465908 1252 +duli 1 12 4.465908 4.465908 1248 +stephen 1 11 4.553877 4.553877 1342 +nonprofit 1 11 4.553877 4.553877 1339 +motorola 1 9 4.753590 4.753590 1546 +portland 1 7 5.010635 5.010635 1878 +fred 1 6 5.164786 5.164786 2072 +gaetano 1 6 5.164786 5.164786 2068 +philip 1 6 5.164786 5.164786 2005 +borriello 1 5 5.347108 5.347108 2349 +kent 1 4 5.568345 5.568345 2744 +murphi 1 4 5.568345 5.568345 2737 +comprehens 1 4 5.568345 5.568345 2745 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +burn 3 2 6.263398 18.790194 4447 +serverth 1 2 6.263398 6.263398 4448 +designstev 1 1 6.957497 6.957497 7998 +casei 1 1 6.957497 6.957497 7999 +studentslab 1 1 6.957497 6.957497 8000 +mchc 1 1 6.957497 6.957497 8001 +martinrobot 1 1 6.957497 6.957497 8002 +societyoth 1 1 6.957497 6.957497 8003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ new file mode 100644 index 00000000..4f8474bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^490ani^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +cours 2 273 1.098612 2.197224 15 +project 2 340 1.098612 2.197224 18 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +send 1 114 2.197225 2.197225 109 +final 1 116 2.197225 2.197225 108 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +handout 1 64 2.772589 2.772589 263 +march 1 61 2.833213 2.833213 295 +usual 1 28 3.610918 3.610918 608 +session 1 26 3.688879 3.688879 643 +pagewelcom 1 11 4.553877 4.553877 1344 +bug 1 7 5.010635 5.010635 1801 +encount 1 3 5.857933 5.857933 3128 +bevi 1 1 6.957497 6.957497 8004 +relatingto 1 1 6.957497 6.957497 8005 +frequentlychang 1 1 6.957497 6.957497 8006 +bswest 1 1 6.957497 6.957497 8007 +csif 1 1 6.957497 6.957497 8008 +classpersonnelsyllabuslectur 1 1 6.957497 6.957497 8009 +scheduleguest 1 1 6.957497 6.957497 8010 +scheduleoffic 1 1 6.957497 6.957497 8011 +hoursproject 1 1 6.957497 6.957497 8012 +projectoth 1 1 6.957497 6.957497 8013 +erratarefer 1 1 6.957497 6.957497 8014 +pagesmidterm 1 1 6.957497 6.957497 8015 +questionnairebswest 1 1 6.957497 6.957497 8016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ new file mode 100644 index 00000000..c357e4ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +last 2 314 1.098612 2.197224 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +languag 4 227 1.386294 5.545176 26 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +list 3 201 1.609438 4.828314 39 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +avail 11 169 1.791759 19.709349 48 +hour 2 165 1.791759 3.583518 46 +read 2 154 1.791759 3.583518 47 +implement 1 152 1.791759 1.791759 52 +year 4 148 1.945910 7.783640 84 +assign 3 135 1.945910 5.837730 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +file 1 132 1.945910 1.945910 70 +compil 4 122 2.079442 8.317768 96 +postscript 4 131 2.079442 8.317768 90 +version 3 113 2.197225 6.591675 122 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +question 1 91 2.397895 2.397895 141 +exam 4 86 2.484907 9.939628 169 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +refer 3 78 2.564949 7.694847 203 +homework 1 79 2.564949 2.564949 193 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +html 1 75 2.639057 2.639057 235 +sieg 2 69 2.708050 5.416100 260 +test 1 66 2.708050 2.708050 252 +import 1 65 2.772589 2.772589 282 +handout 1 64 2.772589 2.772589 263 +descript 1 64 2.772589 2.772589 271 +previou 1 62 2.772589 2.772589 290 +simpl 1 60 2.833213 2.833213 298 +sampl 1 53 2.944439 2.944439 339 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +quarter 2 47 3.091042 6.182084 389 +answer 3 45 3.135494 9.406482 391 +midterm 2 45 3.135494 6.270988 392 +slide 1 38 3.295837 3.295837 467 +manual 2 35 3.401197 6.802394 504 +kind 1 32 3.465736 3.465736 541 +full 1 28 3.610918 3.610918 615 +pagecs 1 26 3.688879 3.688879 658 +sent 1 22 3.850148 3.850148 763 +floor 1 14 4.317488 4.317488 1070 +front 1 13 4.382027 4.382027 1154 +cecil 6 9 4.753590 28.521540 1547 +chamber 2 8 4.875197 9.750394 1692 +leon 1 8 4.875197 4.875197 1631 +affect 1 6 5.164786 5.164786 2044 +textual 1 6 5.164786 5.164786 1979 +vortex 4 5 5.347108 21.388432 2362 +travers 2 5 5.347108 10.694216 2363 +litvinov 1 3 5.857933 5.857933 3343 +vass 2 2 6.263398 12.526796 4449 +informationmeet 1 2 6.263398 6.263398 4450 +cubicl 1 2 6.263398 6.263398 4451 +archivesslid 1 2 6.263398 6.263398 4452 +informationhandout 1 2 6.263398 6.263398 4163 +tutorialth 1 2 6.263398 6.263398 4453 +onmark 1 2 6.263398 6.263398 4454 +languageswint 1 1 6.957497 6.957497 8017 +craigchamb 1 1 6.957497 6.957497 8018 +archivedher 1 1 6.957497 6.957497 8019 +closedbook 1 1 6.957497 6.957497 8020 +wereask 1 1 6.957497 6.957497 8021 +tutorialsth 1 1 6.957497 6.957497 8022 +tutorialhow 1 1 6.957497 6.957497 8023 +enda 1 1 6.957497 6.957497 8024 +interestdead 1 1 6.957497 6.957497 8025 +elim 1 1 6.957497 6.957497 8026 +idfacfg 1 1 6.957497 6.957497 8027 +frameworkvortex 1 1 6.957497 6.957497 8028 +grammarcecil 1 1 6.957497 6.957497 8029 +documentationdocument 1 1 6.957497 6.957497 8030 +resourcesth 1 1 6.957497 6.957497 8031 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ new file mode 100644 index 00000000..d1deb26a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^501^95^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +cours 2 273 1.098612 2.197224 15 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +languag 4 227 1.386294 5.545176 26 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +list 2 201 1.609438 3.218876 39 +class 1 199 1.609438 1.609438 37 +avail 5 169 1.791759 8.958795 48 +implement 2 152 1.791759 3.583518 52 +hour 2 165 1.791759 3.583518 46 +read 2 154 1.791759 3.583518 47 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +compil 3 122 2.079442 6.238326 96 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +instructor 1 108 2.197225 2.197225 107 +find 1 111 2.197225 2.197225 111 +come 1 78 2.564949 2.564949 202 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +sieg 2 69 2.708050 5.416100 260 +handout 1 64 2.772589 2.772589 263 +written 1 63 2.772589 2.772589 278 +room 1 59 2.833213 2.833213 301 +found 3 53 2.944439 8.833317 337 +archiv 1 49 3.044522 3.044522 364 +slide 1 38 3.295837 3.295837 467 +manual 1 35 3.401197 3.401197 504 +full 1 28 3.610918 3.610918 615 +pagecs 1 26 3.688879 3.688879 658 +jeff 1 25 3.737670 3.737670 673 +sent 1 22 3.850148 3.850148 763 +sort 1 22 3.850148 3.850148 738 +chateau 2 16 4.174387 8.348774 997 +dean 1 14 4.317488 4.317488 1104 +dave 1 14 4.317488 4.317488 1098 +cecil 3 9 4.753590 14.260770 1547 +chamber 2 8 4.875197 9.750394 1692 +grove 2 8 4.875197 9.750394 1675 +leon 1 8 4.875197 4.875197 1631 +craig 1 7 5.010635 5.010635 1879 +vortex 3 5 5.347108 16.041324 2362 +projectth 1 3 5.857933 5.857933 3344 +jdean 2 2 6.263398 12.526796 4455 +informationmeet 1 2 6.263398 6.263398 4450 +archivesslid 1 2 6.263398 6.263398 4452 +optimizingcompil 1 2 6.263398 6.263398 4456 +cecilproject 1 2 6.263398 6.263398 4457 +onmark 1 2 6.263398 6.263398 4454 +languagesimport 1 1 6.957497 6.957497 8032 +turori 1 1 6.957497 6.957497 8033 +andtransform 1 1 6.957497 6.957497 8034 +resourcesmor 1 1 6.957497 6.957497 8035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ new file mode 100644 index 00000000..50a56f4b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^503^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +softwar 1 220 1.386294 1.386294 30 +washington 1 236 1.386294 1.386294 32 +assign 5 135 1.945910 9.729550 66 +spring 1 131 2.079442 2.079442 88 +handout 1 64 2.772589 2.772589 263 +sampl 1 53 2.944439 2.944439 339 +pagecs 1 26 3.688879 3.688879 658 +introductori 1 9 4.753590 4.753590 1479 +notkin 1 3 5.857933 5.857933 3345 +engineeringdavid 1 1 6.957497 6.957497 8036 +kwic 1 1 6.957497 6.957497 8037 +projectsnotkin 1 1 6.957497 6.957497 8038 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ new file mode 100644 index 00000000..7ff6febd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^CurrentQuarter^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +program 4 374 0.693147 2.772588 7 +research 2 431 0.693147 1.386294 10 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +languag 6 227 1.386294 8.317764 26 +mail 4 238 1.386294 5.545176 22 +washington 3 236 1.386294 4.158882 32 +list 5 201 1.609438 8.047190 39 +hour 2 165 1.791759 3.583518 46 +assign 1 135 1.945910 1.945910 66 +object 1 138 1.945910 1.945910 79 +postscript 1 131 2.079442 2.079442 90 +studi 1 120 2.079442 2.079442 91 +introduct 1 126 2.079442 2.079442 87 +send 3 114 2.197225 6.591675 109 +instructor 1 108 2.197225 2.197225 107 +question 1 91 2.397895 2.397895 141 +resourc 2 81 2.484907 4.969814 172 +info 1 85 2.484907 2.484907 176 +messag 2 76 2.564949 5.129898 212 +orient 2 80 2.564949 5.129898 205 +refer 1 78 2.564949 2.564949 203 +david 1 71 2.639057 2.639057 232 +html 1 75 2.639057 2.639057 235 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +handout 2 64 2.772589 5.545178 263 +archiv 1 49 3.044522 3.044522 364 +standard 1 48 3.044522 3.044522 365 +mark 1 44 3.135494 3.135494 403 +singl 1 34 3.401197 3.401197 510 +concept 1 32 3.465736 3.465736 537 +administr 1 27 3.637586 3.637586 628 +pagecs 1 26 3.688879 3.688879 658 +subject 1 26 3.688879 3.688879 647 +wish 1 24 3.761200 3.761200 692 +yahoo 1 24 3.761200 3.761200 707 +thread 2 23 3.806662 7.613324 722 +sent 1 22 3.850148 3.850148 763 +self 1 22 3.850148 3.850148 761 +scheme 1 20 3.951244 3.951244 818 +excel 1 19 4.007333 4.007333 868 +previous 1 17 4.110874 4.110874 923 +floor 1 14 4.317488 4.317488 1070 +bodi 1 13 4.382027 4.382027 1178 +mellon 1 13 4.382027 4.382027 1179 +calculu 2 12 4.465908 8.931816 1203 +loew 1 12 4.465908 4.465908 1252 +carnegi 1 12 4.465908 4.465908 1260 +appl 1 11 4.553877 4.553877 1303 +subscrib 2 9 4.753590 9.507180 1541 +kurt 1 9 4.753590 4.753590 1548 +introductori 1 9 4.753590 4.753590 1479 +cecil 1 9 4.753590 4.753590 1547 +leon 1 8 4.875197 4.875197 1631 +dylan 1 8 4.875197 4.875197 1625 +majordomo 1 6 5.164786 5.164786 2066 +gentl 1 5 5.347108 5.347108 2264 +notkin 2 3 5.857933 11.715866 3345 +partridg 1 3 5.857933 5.857933 3346 +lambda 2 2 6.263398 12.526796 4458 +kepart 1 2 6.263398 6.263398 4459 +monash 1 2 6.263398 6.263398 4460 +languagesautumn 1 1 6.957497 6.957497 8039 +byappoint 1 1 6.957497 6.957497 8040 +cubiclescours 1 1 6.957497 6.957497 8041 +readingsmail 1 1 6.957497 6.957497 8042 +archivesw 1 1 6.957497 6.957497 8043 +instructionalpurpos 1 1 6.957497 6.957497 8044 +emailto 1 1 6.957497 6.957497 8045 +csegener 1 1 6.957497 6.957497 8046 +pagesprogram 1 1 6.957497 6.957497 8047 +critiquesgari 1 1 6.957497 6.957497 8048 +leaven 1 1 6.957497 6.957497 8049 +pagefunct 1 1 6.957497 6.957497 8050 +resourcesmit 1 1 6.957497 6.957497 8051 +pagecmu 1 1 6.957497 6.957497 8052 +pagea 1 1 6.957497 6.957497 8053 +mlhaskel 1 1 6.957497 6.957497 8054 +universityobject 1 1 6.957497 6.957497 8055 +geneva 1 1 6.957497 6.957497 8056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^505^fall94 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ new file mode 100644 index 00000000..1b2be26d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^521^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +project 3 340 1.098612 3.295836 18 +offic 2 299 1.098612 2.197224 13 +time 2 293 1.098612 2.197224 17 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +washington 4 236 1.386294 5.545176 32 +design 2 213 1.386294 2.772588 25 +gener 1 220 1.386294 1.386294 27 +class 4 199 1.609438 6.437752 37 +list 1 201 1.609438 1.609438 39 +hour 4 165 1.791759 7.167036 46 +algorithm 2 162 1.791759 3.583518 57 +base 1 165 1.791759 1.791759 50 +lectur 2 135 1.945910 3.891820 73 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +analysi 2 124 2.079442 4.158884 98 +confer 2 126 2.079442 4.158884 100 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +book 1 99 2.302585 2.302585 131 +question 1 91 2.397895 2.397895 141 +solut 8 82 2.484907 19.879256 162 +exam 6 86 2.484907 14.909442 169 +learn 1 86 2.484907 2.484907 170 +homework 14 79 2.564949 35.909286 193 +mondai 3 77 2.564949 7.694847 206 +know 1 80 2.564949 2.564949 198 +write 2 72 2.639057 5.278114 222 +tuesdai 1 73 2.639057 2.639057 219 +materi 1 75 2.639057 2.639057 221 +solv 1 73 2.639057 2.639057 234 +sieg 1 69 2.708050 2.708050 260 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +written 1 63 2.772589 2.772589 278 +room 2 59 2.833213 5.666426 301 +march 1 61 2.833213 2.833213 295 +undergradu 1 54 2.944439 2.944439 338 +suggest 1 53 2.944439 2.944439 331 +cover 1 55 2.944439 2.944439 329 +set 2 50 3.044522 6.089044 361 +appoint 1 49 3.044522 3.044522 358 +possibl 1 47 3.091042 3.091042 378 +could 1 46 3.091042 3.091042 383 +discuss 2 45 3.135494 6.270988 399 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +realli 1 40 3.258097 3.258097 444 +probabl 1 40 3.258097 3.258097 455 +must 1 40 3.258097 3.258097 442 +close 1 38 3.295837 3.295837 465 +winter 1 36 3.367296 3.367296 500 +soon 1 36 3.367296 3.367296 494 +short 1 36 3.367296 3.367296 499 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +richard 1 31 3.496508 3.496508 559 +particip 1 29 3.583519 3.583519 589 +rule 1 26 3.688879 3.688879 638 +background 1 25 3.737670 3.737670 664 +togeth 1 23 3.806662 3.806662 714 +william 1 22 3.850148 3.850148 765 +half 1 21 3.912023 3.912023 776 +anderson 4 19 4.007333 16.029332 860 +els 1 19 4.007333 4.007333 843 +prerequisit 1 19 4.007333 4.007333 846 +assum 1 19 4.007333 4.007333 845 +chateau 1 16 4.174387 4.174387 997 +alreadi 1 16 4.174387 4.174387 963 +quiz 1 16 4.174387 4.174387 990 +upon 1 16 4.174387 4.174387 978 +anyth 1 16 4.174387 4.174387 998 +floor 1 14 4.317488 4.317488 1070 +script 1 13 4.382027 4.382027 1171 +verifi 1 12 4.465908 4.465908 1261 +island 2 11 4.553877 9.107754 1345 +errata 1 10 4.653960 4.653960 1403 +classmat 1 9 4.753590 4.753590 1516 +equival 1 9 4.753590 4.753590 1496 +told 1 8 4.875197 4.875197 1658 +chan 1 7 5.010635 5.010635 1876 +wrong 1 6 5.164786 5.164786 2025 +lack 1 6 5.164786 5.164786 1994 +invok 1 6 5.164786 5.164786 2079 +understood 1 5 5.347108 5.347108 2364 +cancel 1 4 5.568345 5.568345 2746 +episod 1 4 5.568345 5.568345 2747 +wchan 1 3 5.857933 5.857933 3338 +preview 1 3 5.857933 5.857933 3306 +algorithmscs 1 2 6.263398 6.263398 4461 +seig 1 2 6.263398 6.263398 4462 +cubicl 1 2 6.263398 6.263398 4451 +somebodi 1 2 6.263398 6.263398 4463 +outer 1 2 6.263398 6.263398 4464 +okai 1 2 6.263398 6.263398 4465 +eduwchan 1 2 6.263398 6.263398 4435 +gilligan 3 1 6.957497 20.872491 8057 +readingtextbook 1 1 6.957497 6.957497 8058 +sapplet 1 1 6.957497 6.957497 8059 +willconsist 1 1 6.957497 6.957497 8060 +bureaucrat 1 1 6.957497 6.957497 8061 +stuffgrad 1 1 6.957497 6.957497 8062 +homeworkproblem 1 1 6.957497 6.957497 8063 +upindepend 1 1 6.957497 6.957497 8064 +betweenani 1 1 6.957497 6.957497 8065 +mustwatch 1 1 6.957497 6.957497 8066 +thatan 1 1 6.957497 6.957497 8067 +reboot 1 1 6.957497 6.957497 8068 +thatsurv 1 1 6.957497 6.957497 8069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ new file mode 100644 index 00000000..2e83f1f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^524^ @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +cours 12 273 1.098612 13.183344 15 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +paper 4 205 1.609438 6.437752 38 +list 2 201 1.609438 3.218876 39 +parallel 10 169 1.791759 17.917590 60 +algorithm 6 162 1.791759 10.750554 57 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +problem 4 147 1.945910 7.783640 75 +note 3 142 1.945910 5.837730 67 +lectur 3 135 1.945910 5.837730 73 +model 2 145 1.945910 3.891820 69 +year 2 148 1.945910 3.891820 84 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +machin 3 129 2.079442 6.238326 95 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +topic 8 114 2.197225 17.577800 110 +theori 4 111 2.197225 8.788900 127 +instructor 2 108 2.197225 4.394450 107 +teach 2 108 2.197225 4.394450 112 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +specif 1 106 2.197225 2.197225 106 +memori 5 101 2.302585 11.512925 139 +book 3 99 2.302585 6.907755 131 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +real 2 93 2.397895 4.795790 144 +select 1 91 2.397895 2.397895 154 +section 1 94 2.397895 2.397895 149 +present 1 91 2.397895 2.397895 145 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +homework 7 79 2.564949 17.954643 193 +april 3 77 2.564949 7.694847 196 +refer 1 78 2.564949 2.564949 203 +come 1 78 2.564949 2.564949 202 +effici 3 73 2.639057 7.917171 233 +meet 1 72 2.639057 2.639057 229 +addit 1 74 2.639057 2.639057 228 +tuesdai 1 73 2.639057 2.639057 219 +write 1 72 2.639057 2.639057 222 +thursdai 4 70 2.708050 10.832200 241 +sieg 2 69 2.708050 5.416100 260 +syllabu 2 67 2.708050 5.416100 247 +would 1 67 2.708050 2.708050 251 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +descript 3 64 2.772589 8.317767 271 +collect 1 65 2.772589 2.772589 268 +copi 1 63 2.772589 2.772589 284 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +share 3 59 2.833213 8.499639 304 +content 1 59 2.833213 2.833213 302 +major 1 56 2.890372 2.890372 315 +special 1 56 2.890372 2.890372 320 +think 1 57 2.890372 2.890372 314 +cover 1 55 2.944439 2.944439 329 +three 1 54 2.944439 2.944439 330 +particular 1 51 2.995732 2.995732 352 +pointer 2 48 3.044522 6.089044 368 +approach 1 48 3.044522 3.044522 366 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +could 4 46 3.091042 12.364168 383 +term 1 43 3.178054 3.178054 411 +howev 2 41 3.218876 6.437752 422 +fast 1 42 3.218876 3.218876 429 +probabl 1 40 3.258097 3.258097 455 +correct 1 38 3.295837 3.295837 462 +close 1 38 3.295837 3.295837 465 +origin 1 38 3.295837 3.295837 472 +open 1 38 3.295837 3.295837 469 +connect 2 37 3.332205 6.664410 485 +expect 2 37 3.332205 6.664410 484 +cost 1 37 3.332205 3.332205 480 +feel 1 37 3.332205 3.332205 483 +next 1 34 3.401197 3.401197 517 +go 2 33 3.433987 6.867974 529 +taught 1 33 3.433987 3.433987 526 +richard 1 31 3.496508 3.496508 559 +titl 1 31 3.496508 3.496508 556 +graph 1 30 3.555348 3.555348 576 +compon 1 30 3.555348 3.555348 570 +exist 1 30 3.555348 3.555348 569 +consid 2 29 3.583519 7.167038 590 +limit 1 29 3.583519 3.583519 585 +progress 1 28 3.610918 3.610918 598 +quit 1 27 3.637586 3.637586 633 +mind 1 27 3.637586 3.637586 632 +challeng 1 26 3.688879 3.688879 653 +although 2 25 3.737670 7.475340 667 +fundament 1 25 3.737670 3.737670 661 +supercomput 1 25 3.737670 3.737670 681 +other 1 24 3.761200 3.761200 697 +sort 1 22 3.850148 3.850148 738 +emphasi 1 22 3.850148 3.850148 755 +instead 1 22 3.850148 3.850148 756 +theorem 1 21 3.912023 3.912023 786 +flexibl 1 21 3.912023 3.912023 792 +half 1 21 3.912023 3.912023 776 +nice 2 20 3.951244 7.902488 809 +anderson 4 19 4.007333 16.029332 860 +prerequisit 1 19 4.007333 4.007333 846 +spend 1 19 4.007333 4.007333 850 +prove 1 19 4.007333 4.007333 848 +four 1 18 4.060443 4.060443 905 +matrix 1 17 4.110874 4.110874 933 +interconnect 1 17 4.110874 4.110874 937 +upon 1 16 4.174387 4.174387 978 +choic 1 16 4.174387 4.174387 979 +mayb 1 15 4.248495 4.248495 1014 +indic 1 15 4.248495 4.248495 1013 +purchas 1 15 4.248495 4.248495 1030 +rank 1 14 4.317488 4.317488 1063 +latex 1 14 4.317488 4.317488 1064 +topolog 1 14 4.317488 4.317488 1089 +consider 1 14 4.317488 4.317488 1076 +happi 1 14 4.317488 4.317488 1079 +insid 1 12 4.465908 4.465908 1262 +asynchron 1 12 4.465908 4.465908 1229 +transpar 1 11 4.553877 4.553877 1325 +sens 1 11 4.553877 4.553877 1305 +motiv 1 11 4.553877 4.553877 1346 +volum 1 11 4.553877 4.553877 1347 +catalog 1 10 4.653960 4.653960 1431 +arithmet 1 10 4.653960 4.653960 1388 +tradit 1 10 4.653960 4.653960 1404 +equival 1 9 4.753590 4.753590 1496 +routin 1 9 4.753590 4.753590 1549 +foc 1 7 5.010635 5.010635 1880 +uniform 1 7 5.010635 5.010635 1845 +plu 1 6 5.164786 5.164786 2004 +consensu 1 6 5.164786 5.164786 2080 +situat 1 5 5.347108 5.347108 2365 +volunt 1 5 5.347108 5.347108 2307 +algorithmsfor 1 4 5.568345 5.568345 2748 +ullman 1 4 5.568345 5.568345 2749 +union 1 4 5.568345 5.568345 2634 +buss 1 4 5.568345 5.568345 2649 +manuscript 1 4 5.568345 5.568345 2750 +wewil 1 4 5.568345 5.568345 2688 +cheap 1 4 5.568345 5.568345 2751 +chose 1 4 5.568345 5.568345 2629 +rambl 1 3 5.857933 5.857933 3308 +crew 1 3 5.857933 5.857933 3347 +impli 1 3 5.857933 5.857933 3348 +pertain 1 3 5.857933 5.857933 3208 +andyou 1 3 5.857933 5.857933 3256 +parallelalgorithm 1 3 5.857933 5.857933 3249 +influenc 1 3 5.857933 5.857933 3349 +algorithmscs 1 2 6.263398 6.263398 4461 +algorithmi 1 2 6.263398 6.263398 4208 +simpler 1 2 6.263398 6.263398 4210 +swap 1 2 6.263398 6.263398 4466 +exception 1 2 6.263398 6.263398 4467 +bake 1 2 6.263398 6.263398 4468 +ideason 1 2 6.263398 6.263398 4469 +appointment 1 1 6.957497 6.957497 8070 +developingfast 1 1 6.957497 6.957497 8071 +theirefficaci 1 1 6.957497 6.957497 8072 +commentsabout 1 1 6.957497 6.957497 8073 +analysisfor 1 1 6.957497 6.957497 8074 +referencesfor 1 1 6.957497 6.957497 8075 +erew 1 1 6.957497 6.957497 8076 +yannakaki 1 1 6.957497 6.957497 8077 +certifi 1 1 6.957497 6.957497 8078 +likelysometh 1 1 6.957497 6.957497 8079 +martel 1 1 6.957497 6.957497 8080 +whim 1 1 6.957497 6.957497 8081 +smpc 1 1 6.957497 6.957497 8082 +lookingat 1 1 6.957497 6.957497 8083 +isnon 1 1 6.957497 6.957497 8084 +notconsid 1 1 6.957497 6.957497 8085 +indevelop 1 1 6.957497 6.957497 8086 +algorithmswhich 1 1 6.957497 6.957497 8087 +conceiv 1 1 6.957497 6.957497 8088 +goingto 1 1 6.957497 6.957497 8089 +outsidework 1 1 6.957497 6.957497 8090 +befollow 1 1 6.957497 6.957497 8091 +youcould 1 1 6.957497 6.957497 8092 +textwould 1 1 6.957497 6.957497 8093 +artof 1 1 6.957497 6.957497 8094 +mychoic 1 1 6.957497 6.957497 8095 +interestingor 1 1 6.957497 6.957497 8096 +uninterest 1 1 6.957497 6.957497 8097 +aseith 1 1 6.957497 6.957497 8098 +researchcont 1 1 6.957497 6.957497 8099 +turninto 1 1 6.957497 6.957497 8100 +andenergi 1 1 6.957497 6.957497 8101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ new file mode 100644 index 00000000..f12cce74 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +updat 1 191 1.609438 1.609438 41 +provid 1 121 2.079442 2.079442 94 +comment 1 93 2.397895 2.397895 146 +sourc 1 77 2.564949 2.564949 201 +complex 1 64 2.772589 2.772589 269 +move 1 47 3.091042 3.091042 382 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +autumn 2 31 3.496508 6.993016 558 +ofwashington 1 22 3.850148 3.850148 766 +portion 1 16 4.174387 4.174387 971 +webmast 1 15 4.248495 4.248495 1045 +reprint 1 14 4.317488 4.317488 1097 +automata 1 13 4.382027 4.382027 1135 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +foracadem 1 5 5.347108 5.347108 2341 +accuratelyquot 1 2 6.263398 6.263398 4470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ new file mode 100644 index 00000000..8dd2cd5b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^91a^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +postscript 3 131 2.079442 6.238326 90 +welcom 1 122 2.079442 2.079442 99 +instructor 1 108 2.197225 2.197225 107 +world 1 115 2.197225 2.197225 126 +final 1 116 2.197225 2.197225 108 +wide 1 84 2.484907 2.484907 185 +exam 1 86 2.484907 2.484907 169 +paul 1 38 3.295837 3.295837 471 +short 1 36 3.367296 3.367296 499 +quiz 2 16 4.174387 8.348774 990 +latex 1 14 4.317488 4.317488 1064 +hypermedia 1 12 4.465908 4.465908 1247 +documentfor 1 7 5.010635 5.010635 1865 +beam 2 5 5.347108 10.694216 2344 +automataautumn 1 1 6.957497 6.957497 8102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ new file mode 100644 index 00000000..bf2224ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^531^CurrentQtr^ @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +system 2 443 0.693147 1.386294 6 +cours 4 273 1.098612 4.394448 15 +last 2 314 1.098612 2.197224 14 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +mail 2 238 1.386294 2.772588 22 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +fall 2 181 1.609438 3.218876 40 +class 2 199 1.609438 3.218876 37 +updat 2 191 1.609438 3.218876 41 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +file 2 132 1.945910 3.891820 70 +assign 1 135 1.945910 1.945910 66 +support 1 132 1.945910 1.945910 83 +postscript 2 131 2.079442 4.158884 90 +provid 2 121 2.079442 4.158884 94 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +comment 1 93 2.397895 2.397895 146 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +sourc 2 77 2.564949 5.129898 201 +messag 1 76 2.564949 2.564949 212 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +free 2 73 2.639057 5.278114 224 +materi 1 75 2.639057 2.639057 221 +sieg 3 69 2.708050 8.124150 260 +syllabu 3 67 2.708050 8.124150 247 +window 1 68 2.708050 2.708050 242 +organ 3 65 2.772589 8.317767 265 +complex 2 64 2.772589 5.545178 269 +handout 1 64 2.772589 2.772589 263 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +three 1 54 2.944439 2.944439 330 +format 4 48 3.044522 12.178088 356 +adapt 1 46 3.091042 3.091042 387 +midterm 4 45 3.135494 12.541976 392 +textbook 1 44 3.135494 3.135494 397 +math 1 44 3.135494 3.135494 402 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +copyright 1 36 3.367296 3.367296 495 +print 1 34 3.401197 3.401197 503 +collabor 3 32 3.465736 10.397208 543 +autumn 2 31 3.496508 6.993016 558 +hard 1 30 3.555348 3.555348 563 +quit 1 27 3.637586 3.637586 633 +linux 1 27 3.637586 3.637586 631 +sent 1 22 3.850148 3.850148 763 +ofwashington 1 22 3.850148 3.850148 766 +viewer 2 21 3.912023 7.824046 787 +latest 1 21 3.912023 3.912023 785 +figur 1 18 4.060443 4.060443 903 +render 1 17 4.110874 4.110874 947 +portion 1 16 4.174387 4.174387 971 +ascii 1 15 4.248495 4.248495 1032 +webmast 1 15 4.248495 4.248495 1045 +latex 2 14 4.317488 8.634976 1064 +command 1 14 4.317488 4.317488 1083 +reprint 1 14 4.317488 4.317488 1097 +larri 2 13 4.382027 8.764054 1142 +automata 1 13 4.382027 4.382027 1135 +web 1 12 4.465908 4.465908 1249 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +errata 1 10 4.653960 4.653960 1403 +tuth 1 9 4.753590 4.753590 1519 +plain 1 9 4.753590 4.753590 1495 +imposs 1 9 4.753590 4.753590 1513 +perhap 1 8 4.875197 4.875197 1693 +legibl 2 7 5.010635 10.021270 1866 +adob 2 7 5.010635 10.021270 1873 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +acrobat 4 6 5.164786 20.659144 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 3 5 5.347108 16.041324 2345 +foracadem 1 5 5.347108 5.347108 2341 +sharma 1 4 5.568345 5.568345 2752 +thecours 1 4 5.568345 5.568345 2685 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +administrivia 1 3 5.857933 5.857933 3166 +ghostview 1 3 5.857933 5.857933 3163 +accuratelyquot 1 2 6.263398 6.263398 4470 +nitin 2 1 6.957497 13.914994 8103 +staffnameemailphoneoffic 1 1 6.957497 6.957497 8104 +csmw 1 1 6.957497 6.957497 8105 +acroread 1 1 6.957497 6.957497 8106 +aavail 1 1 6.957497 6.957497 8107 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ new file mode 100644 index 00000000..df8024f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^533^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +paper 2 205 1.609438 3.218876 38 +algorithm 2 162 1.791759 3.583518 57 +implement 1 152 1.791759 1.791759 52 +first 2 140 1.945910 3.891820 71 +process 1 142 1.945910 1.945910 72 +well 3 109 2.197225 6.591675 121 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +search 2 95 2.397895 4.795790 155 +thing 1 84 2.484907 2.484907 189 +issu 3 78 2.564949 7.694847 211 +good 1 77 2.564949 2.564949 200 +june 1 79 2.564949 2.564949 214 +logic 5 71 2.639057 13.195285 230 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +involv 1 71 2.639057 2.639057 227 +test 3 66 2.708050 8.124150 252 +order 3 69 2.708050 8.124150 249 +practic 2 70 2.708050 5.416100 246 +thursdai 1 70 2.708050 2.708050 241 +complex 6 64 2.772589 16.635534 269 +talk 1 53 2.944439 2.944439 336 +give 1 50 3.044522 3.044522 359 +even 1 45 3.135494 3.135494 393 +autom 1 41 3.218876 3.218876 434 +examin 1 42 3.218876 3.218876 424 +theoret 2 39 3.258097 6.516194 446 +paul 1 38 3.295837 3.295837 471 +slide 1 38 3.295837 3.295837 467 +survei 1 35 3.401197 3.401197 513 +within 1 33 3.433987 3.433987 525 +autumn 1 31 3.496508 3.496508 558 +often 1 31 3.496508 3.496508 551 +consid 1 29 3.583519 3.583519 590 +compar 1 26 3.688879 3.688879 648 +strategi 2 25 3.737670 7.475340 682 +higher 2 24 3.761200 7.522400 690 +interpret 1 24 3.761200 3.761200 686 +proof 5 23 3.806662 19.033310 720 +instal 2 22 3.850148 7.700296 754 +varieti 1 22 3.850148 3.850148 740 +theorem 4 21 3.912023 15.648092 786 +vlsi 1 21 3.912023 3.912023 795 +verif 1 20 3.951244 3.951244 826 +prove 3 19 4.007333 12.021999 848 +concentr 2 18 4.060443 8.120886 906 +aid 1 18 4.060443 4.060443 904 +attempt 1 17 4.110874 4.110874 917 +moor 1 17 4.110874 4.110874 936 +choic 1 16 4.174387 4.174387 979 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +decid 1 14 4.317488 4.317488 1075 +consider 1 14 4.317488 4.317488 1076 +loew 1 12 4.465908 4.465908 1252 +statement 1 11 4.553877 4.553877 1313 +rel 1 9 4.753590 4.753590 1487 +satisfi 4 8 4.875197 19.500788 1694 +prover 2 8 4.875197 9.750394 1653 +proposit 6 5 5.347108 32.082648 2339 +beam 1 5 5.347108 5.347108 2344 +amus 1 5 5.347108 5.347108 2366 +andsoftwar 1 4 5.568345 5.568345 2753 +tester 1 4 5.568345 5.568345 2754 +theoremprov 1 3 5.857933 5.857933 3298 +theoryand 1 3 5.857933 5.857933 3350 +scatter 1 3 5.857933 5.857933 3351 +truthof 1 1 6.957497 6.957497 8108 +casea 1 1 6.957497 6.957497 8109 +flip 1 1 6.957497 6.957497 8110 +oftheorem 1 1 6.957497 6.957497 8111 +finitedomain 1 1 6.957497 6.957497 8112 +thesequest 1 1 6.957497 6.957497 8113 +complexityand 1 1 6.957497 6.957497 8114 +anumb 1 1 6.957497 6.957497 8115 +urquhart 1 1 6.957497 6.957497 8116 +sato 1 1 6.957497 6.957497 8117 +andboy 1 1 6.957497 6.957497 8118 +gsat 1 1 6.957497 6.957497 8119 +thedirectori 1 1 6.957497 6.957497 8120 +proversther 1 1 6.957497 6.957497 8121 +ofinstal 1 1 6.957497 6.957497 8122 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ new file mode 100644 index 00000000..bf9ba890 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^543^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +confer 1 126 2.079442 2.079442 100 +topic 1 114 2.197225 2.197225 110 +solut 1 82 2.484907 2.484907 162 +mondai 1 77 2.564949 2.564949 206 +meet 1 72 2.639057 2.639057 229 +wednesdai 1 64 2.772589 2.772589 261 +fridai 1 44 3.135494 3.135494 390 +packag 1 28 3.610918 3.610918 614 +measur 1 28 3.610918 3.610918 609 +pagecs 1 26 3.688879 3.688879 658 +sigmetr 1 13 4.382027 4.382027 1173 +loew 1 12 4.465908 4.465908 1252 +host 1 11 4.553877 4.553877 1306 +queue 1 10 4.653960 4.653960 1386 +systemperform 1 1 6.957497 6.957497 8123 +modelingspr 1 1 6.957497 6.957497 8124 +lazowskaandmaryvernonwelcom 1 1 6.957497 6.957497 8125 +performancemodel 1 1 6.957497 6.957497 8126 +hourstent 1 1 6.957497 6.957497 8127 +schedulecom 1 1 6.957497 6.957497 8128 +goingsassignmentsproject 1 1 6.957497 6.957497 8129 +informationmap 1 1 6.957497 6.957497 8130 +emailoth 1 1 6.957497 6.957497 8131 +computersystemsuw 1 1 6.957497 6.957497 8132 +engineeringlazowska 1 1 6.957497 6.957497 8133 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ new file mode 100644 index 00000000..b94c0872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^548^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +offic 2 299 1.098612 2.197224 13 +cours 2 273 1.098612 2.197224 15 +current 1 284 1.098612 1.098612 21 +washington 2 236 1.386294 2.772588 32 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +applic 1 170 1.791759 1.791759 56 +architectur 2 139 1.945910 3.891820 77 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +postscript 5 131 2.079442 10.397210 90 +tool 2 117 2.079442 4.158884 93 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +code 1 108 2.197225 2.197225 116 +user 1 104 2.302585 2.302585 137 +center 1 88 2.397895 2.397895 158 +info 2 85 2.484907 4.969814 176 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +simul 5 66 2.708050 13.540250 255 +sieg 3 69 2.708050 8.124150 260 +test 1 66 2.708050 2.708050 252 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +set 1 50 3.044522 3.044522 361 +execut 1 45 3.135494 3.135494 404 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +continu 1 39 3.258097 3.258097 448 +close 1 38 3.295837 3.295837 465 +manual 1 35 3.401197 3.401197 504 +multiprocessor 1 28 3.610918 3.610918 605 +binari 1 20 3.951244 3.951244 823 +histori 1 19 4.007333 4.007333 853 +benchmark 1 19 4.007333 4.007333 859 +analyz 1 17 4.110874 4.110874 925 +monitor 1 17 4.110874 4.110874 941 +rate 1 15 4.248495 4.248495 1037 +driven 1 15 4.248495 4.248495 1048 +neat 1 12 4.465908 4.465908 1263 +alpha 4 11 4.553877 18.215508 1348 +tuth 1 9 4.753590 4.753590 1519 +egger 2 8 4.875197 9.750394 1695 +uniprocessor 2 8 4.875197 9.750394 1696 +spec 1 8 4.875197 4.875197 1640 +sparc 2 7 5.010635 10.021270 1860 +shade 1 7 5.010635 5.010635 1881 +pentium 2 6 5.164786 10.329572 2077 +tullsen 1 6 5.164786 5.164786 2081 +superscalar 1 6 5.164786 5.164786 2082 +rewrit 1 5 5.347108 5.347108 2367 +etch 1 4 5.568345 5.568345 2755 +redston 2 3 5.857933 11.715866 3332 +specmark 1 2 6.263398 6.263398 4471 +atom 1 2 6.263398 6.263398 4472 +multiflow 1 2 6.263398 6.263398 4473 +powerpc 1 2 6.263398 6.263398 4238 +architecturewint 1 1 6.957497 6.957497 8134 +instructorsusan 1 1 6.957497 6.957497 8135 +tajoshua 1 1 6.957497 6.957497 8136 +instuct 1 1 6.957497 6.957497 8137 +pixi 1 1 6.957497 6.957497 8138 +dinero 1 1 6.957497 6.957497 8139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ new file mode 100644 index 00000000..1b16db4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^551^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +system 1 443 0.693147 0.693147 6 +offic 2 299 1.098612 2.197224 13 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +mail 2 238 1.386294 2.772588 22 +washington 1 236 1.386294 1.386294 32 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +hour 2 165 1.791759 3.583518 46 +assign 2 135 1.945910 3.891820 66 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +number 1 130 2.079442 2.079442 97 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +instructor 2 108 2.197225 4.394450 107 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +april 2 77 2.564949 5.129898 196 +meet 1 72 2.639057 2.639057 229 +room 1 59 2.833213 2.833213 301 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +announc 1 40 3.258097 3.258097 441 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +ad 1 32 3.465736 3.465736 544 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +unit 1 21 3.912023 3.912023 779 +chateau 1 16 4.174387 4.174387 997 +levi 1 14 4.317488 4.317488 1093 +hank 1 12 4.465908 4.465908 1253 +hypermedia 1 12 4.465908 4.465908 1247 +readi 1 12 4.465908 4.465908 1242 +pighin 2 4 5.568345 11.136690 2735 +thisdocu 2 3 5.857933 11.715866 3336 +freder 1 3 5.857933 5.857933 3352 +iti 1 2 6.263398 6.263398 4066 +forcs 1 1 6.957497 6.957497 8140 +classmessag 1 1 6.957497 6.957497 8141 +projectlevi 1 1 6.957497 6.957497 8142 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ new file mode 100644 index 00000000..bd0726f1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^557^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +inform 6 412 0.693147 4.158882 8 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +engin 3 297 1.098612 3.295836 20 +last 2 314 1.098612 2.197224 14 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +class 2 199 1.609438 3.218876 37 +avail 2 169 1.791759 3.583518 48 +read 1 154 1.791759 1.791759 47 +base 1 165 1.791759 1.791759 50 +year 2 148 1.945910 3.891820 84 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +professor 1 137 1.945910 1.945910 76 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +document 3 121 2.079442 6.238326 89 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +grade 1 90 2.397895 2.397895 142 +imag 1 91 2.397895 2.397895 161 +follow 1 92 2.397895 2.397895 143 +help 3 83 2.484907 7.454721 175 +wide 1 84 2.484907 2.484907 185 +solut 1 82 2.484907 2.484907 162 +resourc 1 81 2.484907 2.484907 172 +homework 1 79 2.564949 2.564949 193 +addit 1 74 2.639057 2.639057 228 +html 1 75 2.639057 2.639057 235 +degre 2 69 2.708050 5.416100 259 +syllabu 1 67 2.708050 2.708050 247 +test 1 66 2.708050 2.708050 252 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +colleg 2 61 2.833213 5.666426 300 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +instruct 1 53 2.944439 2.944439 332 +cool 1 49 3.044522 3.044522 374 +visitor 1 49 3.044522 3.044522 371 +basic 1 50 3.044522 3.044522 360 +quarter 1 47 3.091042 3.091042 389 +get 1 46 3.091042 3.091042 380 +keep 1 44 3.135494 3.135494 409 +offer 2 43 3.178054 6.356108 414 +art 1 29 3.583519 3.583519 593 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +pagecs 1 26 3.688879 3.688879 658 +hypertext 1 19 4.007333 4.007333 865 +charact 1 15 4.248495 4.248495 1028 +hypermedia 1 12 4.465908 4.465908 1247 +mosaic 2 10 4.653960 9.307920 1426 +uniform 1 7 5.010635 5.010635 1845 +markup 1 6 5.164786 5.164786 2059 +whichcontain 1 4 5.568345 5.568345 2714 +wealth 1 3 5.857933 5.857933 3353 +thatthi 1 2 6.263398 6.263398 4379 +addedfrequ 1 2 6.263398 6.263398 4380 +deros 1 2 6.263398 6.263398 4474 +indi 1 2 6.263398 6.263398 4431 +mvi 1 2 6.263398 6.263398 4382 +usinglynx 1 2 6.263398 6.263398 4383 +graphicsautumn 1 1 6.957497 6.957497 8143 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ new file mode 100644 index 00000000..5b7d6a73 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^567^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +offic 3 299 1.098612 3.295836 13 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +design 2 213 1.386294 2.772588 25 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +class 8 199 1.609438 12.875504 37 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +data 4 170 1.791759 7.167036 49 +hour 3 165 1.791759 5.375277 46 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +site 1 106 2.197225 2.197225 119 +book 3 99 2.302585 6.907755 131 +text 1 98 2.302585 2.302585 133 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +exam 1 86 2.484907 2.484907 169 +academ 1 82 2.484907 2.484907 178 +homework 9 79 2.564949 23.084541 193 +mondai 3 77 2.564949 7.694847 206 +server 2 76 2.564949 5.129898 204 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +logic 4 71 2.639057 10.556228 230 +summari 1 73 2.639057 2.639057 237 +tuesdai 1 73 2.639057 2.639057 219 +nation 1 74 2.639057 2.639057 240 +sieg 3 69 2.708050 8.124150 260 +syllabu 1 67 2.708050 2.708050 247 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +wednesdai 3 64 2.772589 8.317767 261 +import 1 65 2.772589 2.772589 282 +handout 1 64 2.772589 2.772589 263 +cover 1 55 2.944439 2.944439 329 +digit 1 52 2.995732 2.995732 348 +principl 1 48 3.044522 3.044522 357 +still 1 50 3.044522 3.044522 362 +quarter 1 47 3.091042 3.091042 389 +adapt 1 46 3.091042 3.091042 387 +fridai 5 44 3.135494 15.677470 390 +combin 2 42 3.218876 6.437752 421 +review 1 42 3.218876 3.218876 425 +announc 1 40 3.258097 3.258097 441 +paul 2 38 3.295837 6.591674 471 +credit 1 38 3.295837 3.295837 460 +hand 1 37 3.332205 3.332205 475 +purpos 1 37 3.332205 3.332205 481 +staff 2 36 3.367296 6.734592 490 +copyright 1 36 3.367296 3.367296 495 +board 1 33 3.433987 3.433987 528 +quot 1 29 3.583519 3.583519 582 +accur 1 25 3.737670 3.737670 680 +begin 7 23 3.806662 26.646634 716 +sequenti 2 22 3.850148 7.700296 745 +vlsi 1 21 3.912023 3.912023 795 +synthesi 1 20 3.951244 3.951244 834 +mostli 1 19 4.007333 4.007333 869 +sheet 2 16 4.174387 8.348774 973 +dilbert 1 16 4.174387 4.174387 996 +portion 1 16 4.174387 4.174387 971 +carl 2 15 4.248495 8.496990 1024 +comic 1 14 4.317488 4.317488 1103 +reprint 1 14 4.317488 4.317488 1097 +larri 2 13 4.382027 8.764054 1142 +loew 1 12 4.465908 4.465908 1252 +duli 1 12 4.465908 4.465908 1248 +nonprofit 1 11 4.553877 4.553877 1339 +fpga 2 10 4.653960 9.307920 1433 +franklin 1 10 4.653960 4.653960 1436 +motorola 1 9 4.753590 4.753590 1546 +mother 1 6 5.164786 5.164786 2083 +philip 1 6 5.164786 5.164786 2005 +ebel 4 4 5.568345 22.273380 2756 +mcmurchi 1 4 5.568345 5.568345 2757 +murphi 1 4 5.568345 5.568345 2737 +semiconductor 1 3 5.857933 5.857933 3339 +semiconduct 1 3 5.857933 5.857933 3340 +micron 1 3 5.857933 5.857933 3341 +hine 1 2 6.263398 6.263398 4475 +guru 1 2 6.263398 6.263398 4476 +comprehensivelist 1 2 6.263398 6.263398 4439 +icmanufactur 1 2 6.263398 6.263398 4440 +hineskj 1 1 6.957497 6.957497 8144 +pamett 1 1 6.957497 6.957497 8145 +groupsfin 1 1 6.957497 6.957497 8146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ new file mode 100644 index 00000000..f629cfcd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +hour 3 165 1.791759 5.375277 46 +address 1 170 1.791759 1.791759 62 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +topic 1 114 2.197225 2.197225 110 +question 2 91 2.397895 4.795790 141 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +build 1 85 2.484907 2.484907 184 +messag 1 76 2.564949 2.564949 212 +intellig 1 72 2.639057 2.639057 225 +sieg 3 69 2.708050 8.124150 260 +knowledg 1 67 2.708050 2.708050 243 +artifici 2 63 2.772589 5.545178 280 +plan 1 65 2.772589 2.772589 272 +index 1 56 2.890372 2.890372 309 +reason 1 57 2.890372 2.890372 318 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +past 1 42 3.218876 3.218876 428 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +pagecs 1 26 3.688879 3.688879 658 +fundament 1 25 3.737670 3.737670 661 +methodolog 1 23 3.806662 3.806662 733 +outlin 1 17 4.110874 4.110874 914 +nick 1 13 4.382027 4.382027 1180 +pose 1 9 4.753590 4.753590 1535 +depth 1 8 4.875197 4.875197 1636 +marc 1 8 4.875197 4.875197 1680 +uncertainti 1 7 5.010635 5.010635 1882 +machinelearn 1 6 5.164786 5.164786 2084 +anin 1 3 5.857933 5.857933 3354 +assignmentsassign 1 3 5.857933 5.857933 3342 +mailinglist 1 3 5.857933 5.857933 3325 +intelligencefal 1 2 6.263398 6.263398 4477 +andchalleng 1 2 6.263398 6.263398 4478 +intelligentmachin 1 2 6.263398 6.263398 4479 +agentarchitectur 1 2 6.263398 6.263398 4480 +weldweld 1 2 6.263398 6.263398 4481 +friedmanfriedman 1 2 6.263398 6.263398 4482 +kushmericknick 1 2 6.263398 6.263398 4483 +examsgradingresourcesth 1 2 6.263398 6.263398 4484 +topicsread 1 1 6.957497 6.957497 8147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ new file mode 100644 index 00000000..5795ea80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^574^ @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +mail 2 238 1.386294 2.772588 22 +email 1 220 1.386294 1.386294 29 +class 2 199 1.609438 3.218876 37 +list 2 201 1.609438 3.218876 39 +paper 2 205 1.609438 3.218876 38 +updat 1 191 1.609438 1.609438 41 +read 5 154 1.791759 8.958795 47 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +postscript 1 131 2.079442 2.079442 90 +theori 4 111 2.197225 8.788900 127 +make 2 111 2.197225 4.394450 120 +look 2 107 2.197225 4.394450 115 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +book 2 99 2.302585 4.605170 131 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +graphic 1 90 2.397895 2.397895 147 +librari 2 87 2.484907 4.969814 181 +member 1 84 2.484907 2.484907 165 +requir 1 81 2.484907 2.484907 167 +refer 2 78 2.564949 5.129898 203 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +intellig 2 72 2.639057 5.278114 225 +summari 2 73 2.639057 5.278114 237 +materi 1 75 2.639057 2.639057 221 +logic 1 71 2.639057 2.639057 230 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +artifici 1 63 2.772589 2.772589 280 +collect 1 65 2.772589 2.772589 268 +foundat 1 62 2.772589 2.772589 286 +copi 1 63 2.772589 2.772589 284 +written 1 63 2.772589 2.772589 278 +juli 1 60 2.833213 2.833213 305 +reason 3 57 2.890372 8.671116 318 +sever 2 56 2.890372 5.780744 322 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +without 1 50 3.044522 3.044522 370 +math 2 44 3.135494 6.270988 402 +around 1 43 3.178054 3.178054 415 +edit 1 42 3.218876 3.218876 418 +probabl 3 40 3.258097 9.774291 455 +winter 1 36 3.367296 3.367296 500 +statist 1 35 3.401197 3.401197 521 +go 1 33 3.433987 3.433987 529 +chapter 1 32 3.465736 3.465736 536 +steve 1 29 3.583519 3.583519 594 +though 1 27 3.637586 3.637586 622 +request 1 26 3.688879 3.688879 635 +decis 3 23 3.806662 11.419986 728 +nice 2 20 3.951244 7.902488 809 +expert 1 20 3.951244 3.951244 833 +definit 1 19 4.007333 4.007333 864 +goe 1 15 4.248495 4.248495 1044 +signific 1 13 4.382027 4.382027 1125 +hank 3 12 4.465908 13.397724 1253 +probabilist 2 11 4.553877 9.107754 1343 +extrem 1 11 4.553877 4.553877 1330 +worth 1 11 4.553877 4.553877 1294 +perspect 1 10 4.653960 4.653960 1437 +uncertainti 3 7 5.010635 15.031905 1882 +whenev 1 7 5.010635 5.010635 1883 +heavi 1 7 5.010635 5.010635 1841 +secondari 1 7 5.010635 5.010635 1884 +histor 2 6 5.164786 10.329572 2085 +arrang 1 6 5.164786 5.164786 2023 +overlap 1 5 5.347108 5.347108 2368 +uncertain 1 4 5.568345 5.568345 2758 +cash 1 3 5.857933 5.857933 3355 +grail 1 3 5.857933 5.857933 3356 +alon 1 3 5.857933 5.857933 3139 +pearl 3 2 6.263398 18.790194 4485 +bui 1 2 6.263398 6.263398 4486 +algorithmsa 1 2 6.263398 6.263398 4487 +systemsthi 1 1 6.957497 6.957497 8148 +strappedfor 1 1 6.957497 6.957497 8149 +shafer 1 1 6.957497 6.957497 8150 +reasoningthi 1 1 6.957497 6.957497 8151 +jayn 1 1 6.957497 6.957497 8152 +fragmentari 1 1 6.957497 6.957497 8153 +foundationsof 1 1 6.957497 6.957497 8154 +beautifulli 1 1 6.957497 6.957497 8155 +neapolitan 1 1 6.957497 6.957497 8156 +propagationalgorithm 1 1 6.957497 6.957497 8157 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ new file mode 100644 index 00000000..d4a8a405 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^576^ @@ -0,0 +1,272 @@ +term, tf, in documents count, idf, tfidf, wordid +home 6 672 0.000000 0.000000 1 +page 6 705 0.000000 0.000000 3 +comput 5 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 6 412 0.693147 4.158882 8 +interest 1 384 0.693147 0.693147 11 +cours 9 273 1.098612 9.887508 15 +engin 3 297 1.098612 3.295836 20 +student 2 343 1.098612 2.197224 19 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +also 2 259 1.386294 2.772588 28 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +class 5 199 1.609438 8.047190 37 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +read 5 154 1.791759 8.958795 47 +avail 2 169 1.791759 3.583518 48 +applic 2 170 1.791759 3.583518 56 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +assign 7 135 1.945910 13.621370 66 +note 3 142 1.945910 5.837730 67 +first 2 140 1.945910 3.891820 71 +file 2 132 1.945910 3.891820 70 +process 2 142 1.945910 3.891820 72 +lectur 2 135 1.945910 3.891820 73 +problem 1 147 1.945910 1.945910 75 +document 2 121 2.079442 4.158884 89 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +make 2 111 2.197225 4.394450 120 +final 2 116 2.197225 4.394450 108 +version 2 113 2.197225 4.394450 122 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +topic 1 114 2.197225 2.197225 110 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +part 4 98 2.302585 9.210340 129 +take 2 97 2.302585 4.605170 134 +need 1 98 2.302585 2.302585 135 +imag 6 91 2.397895 14.387370 161 +follow 2 92 2.397895 4.795790 143 +select 2 91 2.397895 4.795790 154 +proceed 1 93 2.397895 2.397895 152 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +environ 3 84 2.484907 7.454721 177 +help 3 83 2.484907 7.454721 175 +contain 2 81 2.484907 4.969814 174 +exam 2 86 2.484907 4.969814 169 +start 2 83 2.484907 4.969814 173 +wide 1 84 2.484907 2.484907 185 +second 1 81 2.484907 2.484907 166 +educ 1 86 2.484907 2.484907 191 +level 1 87 2.484907 2.484907 180 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +librari 1 87 2.484907 2.484907 181 +resourc 1 81 2.484907 2.484907 172 +mondai 8 77 2.564949 20.519592 206 +april 6 77 2.564949 15.389694 196 +june 2 79 2.564949 5.129898 214 +messag 1 76 2.564949 2.564949 212 +know 1 80 2.564949 2.564949 198 +meet 4 72 2.639057 10.556228 229 +onlin 3 75 2.639057 7.917171 223 +materi 2 75 2.639057 5.278114 221 +name 1 72 2.639057 2.639057 220 +sieg 2 69 2.708050 5.416100 260 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +wednesdai 5 64 2.772589 13.862945 261 +copi 3 63 2.772589 8.317767 284 +experi 2 64 2.772589 5.545178 283 +plan 2 65 2.772589 5.545178 272 +import 1 65 2.772589 2.772589 282 +laboratori 1 63 2.772589 2.772589 292 +march 3 61 2.833213 8.499639 295 +room 1 59 2.833213 2.833213 301 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +local 3 55 2.944439 8.833317 334 +undergradu 2 54 2.944439 5.888878 338 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +week 3 52 2.995732 8.987196 343 +run 2 51 2.995732 5.991464 347 +frequent 1 49 3.044522 3.044522 367 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +fridai 7 44 3.135494 21.948458 390 +midterm 2 45 3.135494 6.270988 392 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +review 1 42 3.218876 3.218876 425 +vision 1 41 3.218876 3.218876 430 +tutori 2 39 3.258097 6.516194 437 +announc 1 40 3.258097 3.258097 441 +form 1 39 3.258097 3.258097 443 +correct 1 38 3.295837 3.295837 462 +slide 1 38 3.295837 3.295837 467 +workstat 1 37 3.332205 3.332205 479 +copyright 2 36 3.367296 6.734592 495 +short 1 36 3.367296 3.367296 499 +especi 1 36 3.367296 3.367296 496 +ofth 1 36 3.367296 3.367296 491 +next 3 34 3.401197 10.203591 517 +least 1 35 3.401197 3.401197 516 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +articl 2 33 3.433987 6.867974 530 +chapter 2 32 3.465736 6.931472 536 +ad 1 32 3.465736 3.465736 544 +transform 1 32 3.465736 3.465736 542 +turn 1 29 3.583519 3.583519 586 +univ 1 28 3.610918 3.610918 617 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +determin 1 27 3.637586 3.637586 630 +pagecs 1 26 3.688879 3.688879 658 +effort 1 26 3.688879 3.688879 652 +notic 1 25 3.737670 3.737670 675 +pattern 1 24 3.761200 3.761200 689 +store 1 24 3.761200 3.761200 693 +displai 1 23 3.806662 3.806662 712 +variabl 1 23 3.806662 3.806662 715 +recognit 1 23 3.806662 3.806662 723 +instal 3 22 3.850148 11.550444 754 +period 1 22 3.850148 3.850148 743 +path 2 21 3.912023 7.824046 778 +navig 1 21 3.912023 3.912023 796 +exercis 1 19 4.007333 4.007333 842 +comparison 1 19 4.007333 4.007333 863 +account 2 18 4.060443 8.120886 882 +appropri 1 18 4.060443 4.060443 883 +statu 1 18 4.060443 4.060443 885 +outlin 2 17 4.110874 8.221748 914 +regular 1 17 4.110874 4.110874 929 +intro 1 17 4.110874 4.110874 915 +sign 2 16 4.174387 8.348774 970 +georg 1 16 4.174387 4.174387 994 +spatial 1 16 4.174387 4.174387 988 +fourth 1 16 4.174387 4.174387 999 +permit 1 16 4.174387 4.174387 962 +alreadi 1 16 4.174387 4.174387 963 +earli 1 16 4.174387 4.174387 968 +overhead 1 15 4.248495 4.248495 1035 +floor 1 14 4.317488 4.317488 1070 +menu 2 13 4.382027 8.764054 1156 +resolut 1 13 4.382027 4.382027 1172 +introduc 1 13 4.382027 4.382027 1139 +care 1 13 4.382027 4.382027 1177 +hypermedia 1 12 4.465908 4.465908 1247 +noth 1 11 4.553877 4.553877 1328 +transpar 1 11 4.553877 4.553877 1325 +fill 1 11 4.553877 4.553877 1349 +thedepart 1 11 4.553877 4.553877 1350 +sundai 1 10 4.653960 4.653960 1387 +packet 1 10 4.653960 4.653960 1415 +prior 1 10 4.653960 4.653960 1438 +card 1 10 4.653960 4.653960 1435 +mosaic 1 10 4.653960 4.653960 1426 +login 2 9 4.753590 9.507180 1550 +classifi 1 9 4.753590 4.753590 1537 +pick 1 9 4.753590 4.753590 1498 +lock 1 9 4.753590 4.753590 1551 +ideal 1 8 4.875197 4.875197 1630 +evan 1 8 4.875197 4.875197 1633 +documentfor 1 7 5.010635 5.010635 1865 +remind 1 7 5.010635 5.010635 1799 +reed 2 6 5.164786 10.329572 2086 +arrang 2 6 5.164786 10.329572 2023 +theclass 1 6 5.164786 5.164786 2060 +mock 1 6 5.164786 5.164786 2087 +otherthan 1 6 5.164786 5.164786 2009 +conveni 1 6 5.164786 5.164786 2088 +onto 1 6 5.164786 5.164786 2089 +approv 1 6 5.164786 5.164786 2078 +temporari 1 6 5.164786 5.164786 2090 +contract 1 6 5.164786 5.164786 1985 +pentium 1 6 5.164786 5.164786 2077 +newinform 1 5 5.347108 5.347108 2342 +subjectto 1 5 5.347108 5.347108 2369 +ahead 1 5 5.347108 5.347108 2338 +cshrc 1 4 5.568345 5.568345 2759 +assignmentsand 1 4 5.568345 5.568345 2760 +cvpr 1 4 5.568345 5.568345 2761 +net 1 4 5.568345 5.568345 2741 +accompani 1 4 5.568345 5.568345 2666 +password 1 4 5.568345 5.568345 2594 +kept 1 4 5.568345 5.568345 2762 +insieg 1 3 5.857933 5.857933 3331 +weekend 1 3 5.857933 5.857933 3357 +khoro 8 2 6.263398 50.107184 4488 +cantata 5 2 6.263398 31.316990 4489 +sun 3 2 6.263398 18.790194 4490 +setenv 2 2 6.263398 12.526796 4491 +pmin 1 2 6.263398 6.263398 4492 +combinationof 1 2 6.263398 6.263398 4081 +includingth 1 2 6.263398 6.263398 4493 +onthursdai 1 2 6.263398 6.263398 4425 +itemsund 1 2 6.263398 6.263398 4387 +balloon 1 2 6.263398 6.263398 4388 +khoros_hom 2 1 6.957497 13.914994 8158 +msvc 2 1 6.957497 13.914994 8159 +rene 2 1 6.957497 13.914994 8160 +understandingwelcom 1 1 6.957497 6.957497 8161 +doexercis 1 1 6.957497 6.957497 8162 +torun 1 1 6.957497 6.957497 8163 +aslillith 1 1 6.957497 6.957497 8164 +containxhost 1 1 6.957497 6.957497 8165 +lilliththen 1 1 6.957497 6.957497 8166 +manpath 1 1 6.957497 6.957497 8167 +rlogin 1 1 6.957497 6.957497 8168 +lillith 1 1 6.957497 6.957497 8169 +rhost 1 1 6.957497 6.957497 8170 +typecantata 1 1 6.957497 6.957497 8171 +prompt 1 1 6.957497 6.957497 8172 +haskhoro 1 1 6.957497 6.957497 8173 +wwwhttp 1 1 6.957497 6.957497 8174 +htmland 1 1 6.957497 6.957497 8175 +itscours 1 1 6.957497 6.957497 8176 +twotop 1 1 6.957497 6.957497 8177 +pagesand 1 1 6.957497 6.957497 8178 +huerta 1 1 6.957497 6.957497 8179 +andnevatia 1 1 6.957497 6.957497 8180 +tolook 1 1 6.957497 6.957497 8181 +wolff 1 1 6.957497 6.957497 8182 +onneur 1 1 6.957497 6.957497 8183 +trainabl 1 1 6.957497 6.957497 8184 +ofmatlab 1 1 6.957497 6.957497 8185 +requirethat 1 1 6.957497 6.957497 8186 +mclain 1 1 6.957497 6.957497 8187 +documentexplain 1 1 6.957497 6.957497 8188 +withkhoro 1 1 6.957497 6.957497 8189 +accesskhoro 1 1 6.957497 6.957497 8190 +youraccount 1 1 6.957497 6.957497 8191 +itov 1 1 6.957497 6.957497 8192 +arelimit 1 1 6.957497 6.957497 8193 +andsh 1 1 6.957497 6.957497 8194 +knock 1 1 6.957497 6.957497 8195 +orhav 1 1 6.957497 6.957497 8196 +willhav 1 1 6.957497 6.957497 8197 +delft 1 1 6.957497 6.957497 8198 +brochur 1 1 6.957497 6.957497 8199 +brochuremosa 1 1 6.957497 6.957497 8200 +macmosa 1 1 6.957497 6.957497 8201 +itemund 1 1 6.957497 6.957497 8202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ new file mode 100644 index 00000000..e7c41618 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^581^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +project 4 340 1.098612 4.394448 18 +cours 3 273 1.098612 3.295836 15 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +languag 2 227 1.386294 2.772588 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +class 6 199 1.609438 9.656628 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +parallel 5 169 1.791759 8.958795 60 +algorithm 5 162 1.791759 8.958795 57 +network 3 168 1.791759 5.375277 61 +implement 2 152 1.791759 3.583518 52 +avail 1 169 1.791759 1.791759 48 +architectur 2 139 1.945910 3.891820 77 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +schedul 4 119 2.079442 8.317768 85 +document 3 121 2.079442 6.238326 89 +machin 3 129 2.079442 6.238326 95 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +topic 5 114 2.197225 10.986125 110 +final 2 116 2.197225 4.394450 108 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +site 1 106 2.197225 2.197225 119 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +octob 5 89 2.397895 11.989475 156 +imag 2 91 2.397895 4.795790 161 +present 1 91 2.397895 2.397895 145 +center 1 88 2.397895 2.397895 158 +novemb 7 81 2.484907 17.394349 179 +start 2 83 2.484907 4.969814 173 +exam 2 86 2.484907 4.969814 169 +wide 1 84 2.484907 2.484907 185 +contain 1 81 2.484907 2.484907 174 +librari 1 87 2.484907 2.484907 181 +info 1 85 2.484907 2.484907 176 +resourc 1 81 2.484907 2.484907 172 +decemb 3 80 2.564949 7.694847 215 +complet 2 77 2.564949 5.129898 208 +good 1 77 2.564949 2.564949 200 +orient 1 80 2.564949 2.564949 205 +tuesdai 4 73 2.639057 10.556228 219 +meet 2 72 2.639057 5.278114 229 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +thursdai 2 70 2.708050 5.416100 241 +view 1 70 2.708050 2.708050 254 +sieg 1 69 2.708050 2.708050 260 +prof 1 64 2.772589 2.772589 273 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +descript 1 64 2.772589 2.772589 271 +collect 1 65 2.772589 2.772589 268 +import 1 65 2.772589 2.772589 282 +wednesdai 1 64 2.772589 2.772589 261 +room 1 59 2.833213 2.833213 301 +overview 2 56 2.890372 5.780744 323 +found 1 53 2.944439 2.944439 337 +week 2 52 2.995732 5.991464 343 +digit 1 52 2.995732 2.995732 348 +approach 1 48 3.044522 3.044522 366 +keep 1 44 3.135494 3.135494 409 +discuss 1 45 3.135494 3.135494 399 +made 1 44 3.135494 3.135494 398 +fridai 1 44 3.135494 3.135494 390 +term 4 43 3.178054 12.712216 411 +review 1 42 3.218876 3.218876 425 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +hand 1 37 3.332205 3.332205 475 +copyright 2 36 3.367296 6.734592 495 +short 1 36 3.367296 3.367296 499 +ofth 1 36 3.367296 3.367296 491 +ad 1 32 3.465736 3.465736 544 +titl 1 31 3.496508 3.496508 556 +neural 3 30 3.555348 10.666044 578 +scale 1 28 3.610918 3.610918 613 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +symbol 1 27 3.637586 3.637586 620 +arrai 1 27 3.637586 3.637586 627 +pagecs 1 26 3.688879 3.688879 658 +session 1 26 3.688879 3.688879 643 +supercomput 2 25 3.737670 7.475340 681 +notic 1 25 3.737670 3.737670 675 +begin 4 23 3.806662 15.226648 716 +recognit 1 23 3.806662 3.806662 723 +dai 1 22 3.850148 3.850148 753 +instal 1 22 3.850148 3.850148 754 +half 1 21 3.912023 3.912023 776 +theunivers 1 21 3.912023 3.912023 797 +demo 1 18 4.060443 4.060443 888 +segment 1 17 4.110874 4.110874 931 +intel 3 16 4.174387 12.523161 1000 +normal 2 16 4.174387 8.348774 995 +brief 1 16 4.174387 4.174387 1001 +permit 1 16 4.174387 4.174387 962 +embed 2 14 4.317488 8.634976 1102 +hong 1 14 4.317488 4.317488 1105 +heterogen 1 14 4.317488 4.317488 1090 +canada 1 13 4.382027 4.382027 1158 +guest 2 12 4.465908 8.931816 1220 +hypermedia 1 12 4.465908 4.465908 1247 +onth 1 12 4.465908 4.465908 1218 +mesh 1 11 4.553877 4.553877 1351 +simon 1 8 4.875197 4.875197 1697 +documentfor 1 7 5.010635 5.010635 1865 +sweden 1 7 5.010635 5.010635 1885 +friedman 1 7 5.010635 5.010635 1886 +theclass 1 6 5.164786 5.164786 2060 +conveni 1 6 5.164786 5.164786 2088 +otherthan 1 6 5.164786 5.164786 2009 +speaker 2 5 5.347108 10.694216 2370 +newinform 1 5 5.347108 5.347108 2342 +templat 1 5 5.347108 5.347108 2311 +subjectto 1 5 5.347108 5.347108 2369 +tennesse 1 4 5.568345 5.568345 2763 +pyramid 2 3 5.857933 11.715866 3358 +paragon 2 3 5.857933 11.715866 3359 +simd 1 3 5.857933 5.857933 3360 +mimd 1 3 5.857933 5.857933 3361 +icon 1 3 5.857933 5.857933 3362 +neal 1 3 5.857933 5.857933 3184 +maspar 2 2 6.263398 12.526796 4279 +informationon 1 2 6.263398 6.263398 4232 +burt 1 2 6.263398 6.263398 4494 +rosenfeld 1 2 6.263398 6.263398 4495 +inon 1 2 6.263398 6.263398 4496 +processingwelcom 1 1 6.957497 6.957497 8203 +hourearli 1 1 6.957497 6.957497 8204 +nian 1 1 6.957497 6.957497 8205 +fraser 1 1 6.957497 6.957497 8206 +burnabi 1 1 6.957497 6.957497 8207 +bharath 1 1 6.957497 6.957497 8208 +modayur 1 1 6.957497 6.957497 8209 +invariantoper 1 1 6.957497 6.957497 8210 +hierarchicalrelax 1 1 6.957497 6.957497 8211 +isodata 1 1 6.957497 6.957497 8212 +treatment 1 1 6.957497 6.957497 8213 +topicsdur 1 1 6.957497 6.957497 8214 +activelyexplor 1 1 6.957497 6.957497 8215 +writeupsi 1 1 6.957497 6.957497 8216 +resourcespvm 1 1 6.957497 6.957497 8217 +virtualmachin 1 1 6.957497 6.957497 8218 +layear 1 1 6.957497 6.957497 8219 +aviru 1 1 6.957497 6.957497 8220 +moreworkst 1 1 6.957497 6.957497 8221 +studydistribut 1 1 6.957497 6.957497 8222 +technicalpubl 1 1 6.957497 6.957497 8223 +paragonparallel 1 1 6.957497 6.957497 8224 +variousvendor 1 1 6.957497 6.957497 8225 +correctionsto 1 1 6.957497 6.957497 8226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ new file mode 100644 index 00000000..15110427 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590B^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +program 3 374 0.693147 2.079441 7 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +paper 2 205 1.609438 3.218876 38 +modifi 1 178 1.609438 1.609438 35 +data 1 170 1.791759 1.791759 49 +find 1 111 2.197225 2.197225 111 +graphic 2 90 2.397895 4.795790 147 +method 4 80 2.564949 10.259796 213 +april 4 77 2.564949 10.259796 196 +optim 3 79 2.564949 7.694847 197 +exampl 2 77 2.564949 5.129898 195 +solv 1 73 2.639057 2.639057 234 +differ 1 66 2.708050 2.708050 253 +numer 1 49 3.044522 3.044522 369 +linear 5 41 3.218876 16.094380 431 +global 1 34 3.401197 3.401197 520 +valu 1 25 3.737670 3.737670 665 +mike 1 24 3.761200 3.761200 703 +equat 2 23 3.806662 7.613324 724 +properti 1 22 3.850148 3.850148 749 +definit 1 19 4.007333 4.007333 864 +eric 1 19 4.007333 4.007333 870 +element 1 18 4.060443 4.060443 895 +intro 4 17 4.110874 16.443496 915 +matrix 2 17 4.110874 8.221748 933 +differenti 1 17 4.110874 4.110874 921 +adam 1 17 4.110874 4.110874 934 +finit 2 14 4.317488 8.634976 1106 +nonlinear 1 14 4.317488 4.317488 1107 +chuck 1 14 4.317488 4.317488 1108 +discret 1 13 4.382027 4.382027 1165 +jonathan 1 13 4.382027 4.382027 1174 +brad 2 12 4.465908 8.931816 1264 +daniel 1 12 4.465908 4.465908 1233 +decomposit 1 10 4.653960 4.653960 1439 +arithmet 1 10 4.653960 4.653960 1388 +kevin 1 9 4.753590 4.753590 1482 +joel 1 8 4.875197 4.875197 1698 +root 1 8 4.875197 4.875197 1650 +constrain 1 6 5.164786 5.164786 2042 +fred 1 6 5.164786 5.164786 2072 +fit 1 5 5.347108 5.347108 2285 +invers 1 4 5.568345 5.568345 2764 +corei 1 4 5.568345 5.568345 2718 +eigenvalu 1 3 5.857933 5.857933 3364 +eigenvector 1 3 5.857933 5.857933 3365 +singular 1 3 5.857933 5.857933 3366 +conclus 1 3 5.857933 5.857933 3367 +ordinari 1 3 5.857933 5.857933 3233 +interv 1 3 5.857933 5.857933 3253 +quadrat 2 2 6.263398 12.526796 4497 +shuichi 1 2 6.263398 6.263398 4498 +unconstrain 1 2 6.263398 6.263398 4499 +kari 1 2 6.263398 6.263398 4500 +regress 1 2 6.263398 6.263398 4501 +calibr 1 2 6.263398 6.263398 4502 +joanna 1 2 6.263398 6.263398 4503 +radios 1 2 6.263398 6.263398 4504 +pde 1 2 6.263398 6.263398 4505 +seminarc 1 1 6.957497 6.957497 8228 +rspring 1 1 6.957497 6.957497 8229 +ronen 1 1 6.957497 6.957497 8230 +troi 1 1 6.957497 6.957497 8231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ new file mode 100644 index 00000000..9fcdd0a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +process 2 142 1.945910 3.891820 72 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +mathemat 2 108 2.197225 4.394450 123 +topic 2 114 2.197225 4.394450 110 +site 1 106 2.197225 2.197225 119 +specif 1 106 2.197225 2.197225 106 +imag 2 91 2.397895 4.795790 161 +educ 2 86 2.484907 4.969814 191 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +materi 1 75 2.639057 2.639057 221 +view 1 70 2.708050 2.708050 254 +experi 2 64 2.772589 5.545178 283 +septemb 1 65 2.772589 2.772589 274 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +quarter 2 47 3.091042 6.182084 389 +seminar 1 38 3.295837 3.295837 470 +copyright 2 36 3.367296 6.734592 495 +winter 1 36 3.367296 3.367296 500 +collabor 1 32 3.465736 3.465736 543 +autumn 2 31 3.496508 6.993016 558 +notic 1 25 3.737670 3.737670 675 +instal 1 22 3.850148 3.850148 754 +varieti 1 22 3.850148 3.850148 740 +tanimoto 2 10 4.653960 9.307920 1429 +transcript 1 6 5.164786 5.164786 2067 +otherthan 1 6 5.164786 5.164786 2009 +subjectto 1 5 5.347108 5.347108 2369 +useof 1 3 5.857933 5.857933 3368 +quarterscs 1 1 6.957497 6.957497 8232 +topicssteven 1 1 6.957497 6.957497 8233 +instructorcs 1 1 6.957497 6.957497 8234 +varyfrom 1 1 6.957497 6.957497 8235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html new file mode 100644 index 00000000..6b2a4c1b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn95.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +inform 2 412 0.693147 1.386294 8 +time 2 293 1.098612 2.197224 17 +student 2 343 1.098612 2.197224 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +paper 10 205 1.609438 16.094380 38 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +read 8 154 1.791759 14.334072 47 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +first 2 140 1.945910 3.891820 71 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +document 1 121 2.079442 2.079442 89 +technolog 1 131 2.079442 2.079442 102 +world 2 115 2.197225 4.394450 126 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +technic 1 100 2.302585 2.302585 140 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +octob 4 89 2.397895 9.591580 156 +proceed 4 93 2.397895 9.591580 152 +present 3 91 2.397895 7.193685 145 +comment 1 93 2.397895 2.397895 146 +novemb 5 81 2.484907 12.424535 179 +educ 4 86 2.484907 9.939628 191 +wide 2 84 2.484907 4.969814 185 +contain 1 81 2.484907 2.484907 174 +environ 1 84 2.484907 2.484907 177 +second 1 81 2.484907 2.484907 166 +internet 1 83 2.484907 2.484907 186 +state 1 76 2.564949 2.564949 207 +html 4 75 2.639057 10.556228 235 +meet 3 72 2.639057 7.917171 229 +materi 2 75 2.639057 5.278114 221 +intellig 1 72 2.639057 2.639057 225 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +onlin 1 75 2.639057 2.639057 223 +line 1 75 2.639057 2.639057 231 +degre 2 69 2.708050 5.416100 259 +view 1 70 2.708050 2.708050 254 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +share 1 59 2.833213 2.833213 304 +possibl 2 47 3.091042 6.182084 378 +discuss 2 45 3.135494 6.270988 399 +keep 1 44 3.135494 3.135494 409 +describ 1 45 3.135494 3.135494 400 +netscap 1 44 3.135494 3.135494 395 +protocol 1 45 3.135494 3.135494 407 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +combin 1 42 3.218876 3.218876 421 +vision 1 41 3.218876 3.218876 430 +futur 1 41 3.218876 3.218876 427 +map 2 39 3.258097 6.516194 452 +littl 1 39 3.258097 3.258097 454 +tutori 1 39 3.258097 3.258097 437 +paul 1 38 3.295837 3.295837 471 +respons 1 37 3.332205 3.332205 476 +copyright 2 36 3.367296 6.734592 495 +short 1 36 3.367296 3.367296 499 +either 3 35 3.401197 10.203591 506 +michael 1 35 3.401197 3.401197 514 +john 1 33 3.433987 3.433987 532 +concept 2 32 3.465736 6.931472 537 +ad 1 32 3.465736 3.465736 544 +autumn 2 31 3.496508 6.993016 558 +someth 1 31 3.496508 3.496508 554 +option 1 30 3.555348 3.555348 575 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +enhanc 1 26 3.688879 3.688879 644 +notic 1 25 3.737670 3.737670 675 +brows 2 23 3.806662 7.613324 726 +instal 1 22 3.850148 3.850148 754 +geometri 1 22 3.850148 3.850148 752 +annot 1 21 3.912023 3.912023 775 +navig 1 21 3.912023 3.912023 796 +toolkit 2 20 3.951244 7.902488 835 +anderson 1 19 4.007333 4.007333 860 +particularli 1 19 4.007333 4.007333 867 +lower 1 18 4.060443 4.060443 886 +layer 2 17 4.110874 8.221748 926 +adam 1 17 4.110874 4.110874 934 +choic 1 16 4.174387 4.174387 979 +piec 1 15 4.248495 4.248495 1020 +achiev 1 14 4.317488 4.317488 1088 +hong 1 14 4.317488 4.317488 1105 +central 1 13 4.382027 4.382027 1160 +promot 2 12 4.465908 8.931816 1235 +hypermedia 1 12 4.465908 4.465908 1247 +infrastructur 1 12 4.465908 4.465908 1234 +noth 1 11 4.553877 4.553877 1328 +smart 1 11 4.553877 4.553877 1352 +baer 1 11 4.553877 4.553877 1353 +mosaic 1 10 4.653960 4.653960 1426 +tutor 1 9 4.753590 4.753590 1552 +beyond 2 7 5.010635 10.021270 1834 +documentfor 1 7 5.010635 5.010635 1865 +davi 1 7 5.010635 5.010635 1888 +baker 1 7 5.010635 5.010635 1812 +transcript 1 6 5.164786 5.164786 2067 +theclass 1 6 5.164786 5.164786 2060 +otherthan 1 6 5.164786 5.164786 2009 +plu 1 6 5.164786 5.164786 2004 +trail 1 6 5.164786 5.164786 2071 +newinform 1 5 5.347108 5.347108 2342 +subjectto 1 5 5.347108 5.347108 2369 +barton 1 5 5.347108 5.347108 2371 +jeremi 1 5 5.347108 5.347108 2360 +carlson 1 5 5.347108 5.347108 2351 +sandi 1 4 5.568345 5.568345 2765 +increasingli 1 4 5.568345 5.568345 2766 +ncsa 1 4 5.568345 5.568345 2767 +ward 2 2 6.263398 12.526796 4506 +tessa 1 2 6.263398 6.263398 4507 +learner 1 2 6.263398 6.263398 4508 +uiuc 1 2 6.263398 6.263398 4509 +marla 1 2 6.263398 6.263398 4510 +soap 1 2 6.263398 6.263398 4511 +presentor 5 1 6.957497 34.787485 8236 +labord 2 1 6.957497 13.914994 8237 +wwwwelcom 1 1 6.957497 6.957497 8238 +mccalla 1 1 6.957497 6.957497 8239 +importanceof 1 1 6.957497 6.957497 8240 +youngquist 1 1 6.957497 6.957497 8241 +aboutinternet 1 1 6.957497 6.957497 8242 +microworld 1 1 6.957497 6.957497 8243 +tointellig 1 1 6.957497 6.957497 8244 +bartel 1 1 6.957497 6.957497 8245 +mathematicsconnect 1 1 6.957497 6.957497 8246 +gari 1 1 6.957497 6.957497 8247 +ambiti 1 1 6.957497 6.957497 8248 +thethem 1 1 6.957497 6.957497 8249 +moresophist 1 1 6.957497 6.957497 8250 +elabor 1 1 6.957497 6.957497 8251 +ofwww 1 1 6.957497 6.957497 8252 +intechn 1 1 6.957497 6.957497 8253 +couldmak 1 1 6.957497 6.957497 8254 +applicationsthat 1 1 6.957497 6.957497 8255 +webhttp 1 1 6.957497 6.957497 8256 +empow 1 1 6.957497 6.957497 8257 +agehttp 1 1 6.957497 6.957497 8258 +communitieshttp 1 1 6.957497 6.957497 8259 +dietz 1 1 6.957497 6.957497 8260 +serviceshttp 1 1 6.957497 6.957497 8261 +dcewebkit 1 1 6.957497 6.957497 8262 +zhumeet 1 1 6.957497 6.957497 8263 +aboutcurriculum 1 1 6.957497 6.957497 8264 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html new file mode 100644 index 00000000..31c3610d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590D^autumn96.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +current 2 284 1.098612 2.197224 21 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 3 205 1.609438 4.828314 38 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +technolog 3 131 2.079442 6.238326 102 +schedul 3 119 2.079442 6.238326 85 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +site 1 106 2.197225 2.197225 119 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +present 1 91 2.397895 2.397895 145 +learn 3 86 2.484907 7.454721 170 +internet 1 83 2.484907 2.484907 186 +school 1 84 2.484907 2.484907 188 +come 1 78 2.564949 2.564949 202 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +view 1 70 2.708050 2.708050 254 +descript 1 64 2.772589 2.772589 271 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +explor 2 58 2.890372 5.780744 324 +cover 1 55 2.944439 2.944439 329 +visual 1 48 3.044522 3.044522 372 +possibl 1 47 3.091042 3.091042 378 +move 1 47 3.091042 3.091042 382 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +seminar 1 38 3.295837 3.295837 470 +respons 1 37 3.332205 3.332205 476 +copyright 2 36 3.367296 6.734592 495 +collabor 1 32 3.465736 3.465736 543 +autumn 3 31 3.496508 10.489524 558 +particip 1 29 3.583519 3.583519 589 +depend 1 29 3.583519 3.583519 583 +notic 1 25 3.737670 3.737670 675 +wai 1 25 3.737670 3.737670 662 +togeth 1 23 3.806662 3.806662 714 +instal 1 22 3.850148 3.850148 754 +decid 1 14 4.317488 4.317488 1075 +context 1 13 4.382027 4.382027 1153 +subset 1 10 4.653960 4.653960 1425 +tanimoto 1 10 4.653960 4.653960 1429 +otherthan 1 6 5.164786 5.164786 2009 +middl 2 5 5.347108 10.694216 2372 +subjectto 1 5 5.347108 5.347108 2369 +evid 1 4 5.568345 5.568345 2768 +innew 1 2 6.263398 6.263398 4512 +learningwelcom 1 1 6.957497 6.957497 8265 +methodologiesfor 1 1 6.957497 6.957497 8266 +forcollabor 1 1 6.957497 6.957497 8267 +willtak 1 1 6.957497 6.957497 8268 +ofthes 1 1 6.957497 6.957497 8269 +ofai 1 1 6.957497 6.957497 8270 +ofstud 1 1 6.957497 6.957497 8271 +intopeopl 1 1 6.957497 6.957497 8272 +meani 1 1 6.957497 6.957497 8273 +schoolmai 1 1 6.957497 6.957497 8274 +participatingstud 1 1 6.957497 6.957497 8275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ new file mode 100644 index 00000000..078f8078 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590MV^ @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +system 14 443 0.693147 9.704058 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +us 2 329 1.098612 2.197224 16 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +parallel 12 169 1.791759 21.501108 60 +distribut 6 162 1.791759 10.750554 51 +base 4 165 1.791759 7.167036 50 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +model 3 145 1.945910 5.837730 69 +process 2 142 1.945910 3.891820 72 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +schedul 17 119 2.079442 35.350514 85 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +memori 6 101 2.302585 13.815510 139 +techniqu 1 99 2.302585 2.302585 138 +technic 1 100 2.302585 2.302585 140 +follow 1 92 2.397895 2.397895 143 +octob 1 89 2.397895 2.397895 156 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +requir 1 81 2.484907 2.484907 167 +activ 1 84 2.484907 2.484907 182 +dynam 2 76 2.564949 5.129898 194 +april 1 77 2.564949 2.564949 196 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +appear 1 78 2.564949 2.564949 210 +workshop 4 71 2.639057 10.556228 239 +polici 3 64 2.772589 8.317767 279 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +share 4 59 2.833213 11.332852 304 +processor 11 54 2.944439 32.388829 335 +scientif 1 53 2.944439 2.944439 341 +cover 1 55 2.944439 2.944439 329 +principl 1 48 3.044522 3.044522 357 +set 1 50 3.044522 3.044522 361 +adapt 1 46 3.091042 3.091042 387 +discuss 1 45 3.135494 3.135494 399 +tutori 2 39 3.258097 6.516194 437 +transact 1 39 3.258097 3.258097 438 +theoret 1 39 3.258097 3.258097 446 +open 1 38 3.295837 3.295837 469 +workstat 1 37 3.332205 3.332205 479 +ofth 1 36 3.367296 3.367296 491 +global 1 34 3.401197 3.401197 520 +concurr 1 34 3.401197 3.401197 501 +product 1 33 3.433987 3.433987 527 +richard 1 31 3.496508 3.496508 559 +multiprocessor 6 28 3.610918 21.665508 605 +measur 4 28 3.610918 14.443672 609 +univ 1 28 3.610918 3.610918 617 +proc 14 26 3.688879 51.644306 649 +strategi 4 25 3.737670 14.950680 682 +supercomput 2 25 3.737670 7.475340 681 +ofwashington 1 22 3.850148 3.850148 766 +william 1 22 3.850148 3.850148 765 +programminglanguag 1 21 3.912023 3.912023 782 +alloc 7 20 3.951244 27.658708 821 +smith 1 20 3.951244 3.951244 820 +runtim 2 19 4.007333 8.014666 858 +eric 2 19 4.007333 8.014666 870 +anderson 1 19 4.007333 4.007333 860 +thoma 1 18 4.060443 4.060443 901 +scott 1 18 4.060443 4.060443 884 +expand 1 17 4.110874 4.110874 928 +miller 1 17 4.110874 4.110874 949 +asplo 1 17 4.110874 4.110874 948 +partit 1 16 4.174387 4.174387 984 +jose 1 16 4.174387 4.174387 976 +demand 3 14 4.317488 12.952464 1073 +conf 5 13 4.382027 21.910135 1181 +sigmetr 4 13 4.382027 17.528108 1173 +coordin 2 13 4.382027 8.764054 1182 +karlin 1 13 4.382027 4.382027 1176 +workload 3 12 4.465908 13.397724 1210 +mari 2 12 4.465908 8.931816 1266 +gupta 2 12 4.465908 8.931816 1241 +kenneth 2 12 4.465908 8.931816 1265 +characterist 1 12 4.465908 4.465908 1257 +philadelphia 1 12 4.465908 4.465908 1244 +bill 1 11 4.553877 4.553877 1297 +impact 1 11 4.553877 4.553877 1334 +wood 1 11 4.553877 4.553877 1355 +santa 6 10 4.653960 27.923760 1441 +ofcomput 2 10 4.653960 9.307920 1442 +patterson 1 9 4.753590 4.753590 1554 +vernon 1 9 4.753590 4.753590 1556 +job 1 8 4.875197 4.875197 1702 +migrat 2 7 5.010635 10.021270 1851 +burger 1 7 5.010635 5.010635 1889 +multiprogram 5 6 5.164786 25.823930 2010 +chandra 1 6 5.164786 5.164786 2091 +symp 2 5 5.347108 10.694216 2376 +ousterhout 1 5 5.347108 5.347108 2301 +leblanc 1 5 5.347108 5.347108 2377 +affin 1 5 5.347108 5.347108 2378 +parallelprogram 1 5 5.347108 5.347108 2379 +ofparallel 1 5 5.347108 5.347108 2380 +culler 1 5 5.347108 5.347108 2381 +hyder 1 4 5.568345 5.568345 2772 +anoop 1 4 5.568345 5.568345 2770 +identif 1 4 5.568345 5.568345 2773 +barbara 6 3 5.857933 35.147598 3380 +ipp 4 3 5.857933 23.431732 3381 +dusseau 2 3 5.857933 11.715866 3382 +nguyen 2 3 5.857933 11.715866 3290 +zahorjan 2 3 5.857933 11.715866 3383 +mccann 2 3 5.857933 11.715866 3273 +tran 1 3 5.857933 5.857933 3384 +saltz 1 3 5.857933 5.857933 3385 +am 1 3 5.857933 5.857933 3386 +patrick 1 3 5.857933 5.857933 3334 +weihl 1 3 5.857933 5.857933 3284 +tradeoff 1 3 5.857933 5.857933 3387 +parson 2 2 6.263398 12.526796 4528 +memorymultiprocessor 2 2 6.263398 12.526796 4529 +gang 1 2 6.263398 6.263398 4530 +inrd 1 2 6.263398 6.263398 4531 +andsequenti 1 2 6.263398 6.263398 4532 +tucker 1 2 6.263398 6.263398 4307 +shun 1 2 6.263398 6.263398 4533 +leung 1 2 6.263398 6.263398 4534 +han 1 2 6.263398 6.263398 4535 +agraw 1 2 6.263398 6.263398 4536 +derek 1 2 6.263398 6.263398 4537 +bunt 1 2 6.263398 6.263398 4308 +rosenblum 1 2 6.263398 6.263398 4314 +tera 1 2 6.263398 6.263398 4224 +computersystem 1 2 6.263398 6.263398 4360 +arpaci 2 1 6.957497 13.914994 8345 +vaswani 2 1 6.957497 13.914994 8346 +sevcik 2 1 6.957497 13.914994 8347 +feitelson 2 1 6.957497 13.914994 8348 +coschedul 2 1 6.957497 13.914994 8349 +mvmv 1 1 6.957497 6.957497 8350 +systemsprofessor 1 1 6.957497 6.957497 8351 +vernontim 1 1 6.957497 6.957497 8352 +pmlocat 1 1 6.957497 6.957497 8353 +now 1 1 6.957497 6.957497 8354 +vahdat 1 1 6.957497 6.957497 8355 +equi 1 1 6.957497 6.957497 8356 +issuesfor 1 1 6.957497 6.957497 8357 +workloadcharacterist 1 1 6.957497 6.957497 8358 +evangelo 1 1 6.957497 6.957497 8359 +markato 1 1 6.957497 6.957497 8360 +loopschedul 1 1 6.957497 6.957497 8361 +iniee 1 1 6.957497 6.957497 8362 +zima 1 1 6.957497 6.957497 8363 +chapman 1 1 6.957497 6.957497 8364 +edjlali 1 1 6.957497 6.957497 8365 +sussman 1 1 6.957497 6.957497 8366 +comparisonsshikharesh 1 1 6.957497 6.957497 8367 +majumdar 1 1 6.957497 6.957497 8368 +eager 1 1 6.957497 6.957497 8369 +variabilityservic 1 1 6.957497 6.957497 8370 +dror 1 1 6.957497 6.957497 8371 +nitzberg 1 1 6.957497 6.957497 8372 +thenasa 1 1 6.957497 6.957497 8373 +ipsc 1 1 6.957497 6.957497 8374 +leutenegg 1 1 6.957497 6.957497 8375 +sobalvarro 1 1 6.957497 6.957497 8376 +rohit 1 1 6.957497 6.957497 8377 +devin 1 1 6.957497 6.957497 8378 +verghes 1 1 6.957497 6.957497 8379 +mendel 1 1 6.957497 6.957497 8380 +multiprocessorcomput 1 1 6.957497 6.957497 8381 +alverson 1 1 6.957497 6.957497 8382 +kahan 1 1 6.957497 6.957497 8383 +korri 1 1 6.957497 6.957497 8384 +effectivedistribut 1 1 6.957497 6.957497 8385 +rudolph 1 1 6.957497 6.957497 8386 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590b b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590b new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590b @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ new file mode 100644 index 00000000..e1f00e57 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590bi^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +last 2 314 1.098612 2.197224 14 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +updat 2 191 1.609438 3.218876 41 +algorithm 1 162 1.791759 1.791759 57 +file 2 132 1.945910 3.891820 70 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +schedul 3 119 2.079442 6.238326 85 +postscript 2 131 2.079442 4.158884 90 +provid 1 121 2.079442 2.079442 94 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +html 2 75 2.639057 5.278114 235 +free 2 73 2.639057 5.278114 224 +materi 1 75 2.639057 2.639057 221 +syllabu 3 67 2.708050 8.124150 247 +window 1 68 2.708050 2.708050 242 +handout 1 64 2.772589 2.772589 263 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +format 4 48 3.044522 12.178088 356 +fast 1 42 3.218876 3.218876 429 +slide 1 38 3.295837 3.295837 467 +origin 1 38 3.295837 3.295837 472 +winter 2 36 3.367296 6.734592 500 +print 1 34 3.401197 3.401197 503 +titl 2 31 3.496508 6.993016 556 +richard 1 31 3.496508 3.496508 559 +usual 2 28 3.610918 7.221836 608 +load 1 28 3.610918 3.610918 601 +administr 1 27 3.637586 3.637586 628 +linux 1 27 3.637586 3.637586 631 +viewer 2 21 3.912023 7.824046 787 +martin 1 21 3.912023 3.912023 794 +latest 1 21 3.912023 3.912023 785 +render 1 17 4.110874 4.110874 947 +biologi 1 15 4.248495 4.248495 1049 +draft 1 14 4.317488 4.317488 1085 +latex 1 14 4.317488 4.317488 1064 +translat 2 13 4.382027 8.764054 1164 +larri 1 13 4.382027 4.382027 1142 +readabl 1 12 4.465908 4.465908 1258 +adob 2 7 5.010635 10.021270 1873 +molecular 1 7 5.010635 5.010635 1887 +ghostscript 1 7 5.010635 5.010635 1867 +smaller 1 7 5.010635 5.010635 1874 +legibl 1 7 5.010635 5.010635 1866 +acrobat 4 6 5.164786 20.659144 2063 +greatest 1 6 5.164786 5.164786 2073 +fewer 1 6 5.164786 5.164786 2074 +ruzzo 2 5 5.347108 10.694216 2345 +karp 1 5 5.347108 5.347108 2284 +isprefer 1 4 5.568345 5.568345 2729 +isfast 1 4 5.568345 5.568345 2730 +ghostscriptcan 1 4 5.568345 5.568345 2731 +tompaclass 1 3 5.857933 5.857933 3310 +faith 1 3 5.857933 5.857933 3363 +bboard 1 1 6.957497 6.957497 8227 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ new file mode 100644 index 00000000..2efcafd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590g^ @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +mail 3 238 1.386294 4.158882 22 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +paper 11 205 1.609438 17.703818 38 +list 2 201 1.609438 3.218876 39 +read 6 154 1.791759 10.750554 47 +avail 2 169 1.791759 3.583518 48 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +architectur 5 139 1.945910 9.729550 77 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +machin 2 129 2.079442 4.158884 95 +report 1 131 2.079442 2.079442 92 +welcom 1 122 2.079442 2.079442 99 +send 1 114 2.197225 2.197225 109 +memori 3 101 2.302585 6.907755 139 +technic 1 100 2.302585 2.302585 140 +present 3 91 2.397895 7.193685 145 +follow 2 92 2.397895 4.795790 143 +select 1 91 2.397895 2.397895 154 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +member 1 84 2.484907 2.484907 165 +start 1 83 2.484907 2.484907 173 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +tuesdai 7 73 2.639057 18.473399 219 +line 3 75 2.639057 7.917171 231 +meet 2 72 2.639057 5.278114 229 +workshop 1 71 2.639057 2.639057 239 +summari 1 73 2.639057 2.639057 237 +intellig 1 72 2.639057 2.639057 225 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +receiv 1 66 2.708050 2.708050 244 +organ 2 65 2.772589 5.545178 265 +previou 2 62 2.772589 5.545178 290 +copi 1 63 2.772589 2.772589 284 +prof 1 64 2.772589 2.772589 273 +juli 1 60 2.833213 2.833213 305 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +processor 2 54 2.944439 5.888878 335 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +week 3 52 2.995732 8.987196 343 +case 1 51 2.995732 2.995732 351 +format 2 48 3.044522 6.089044 356 +quarter 4 47 3.091042 12.364168 389 +discuss 2 45 3.135494 6.270988 399 +execut 1 45 3.135494 3.135494 404 +might 1 41 3.218876 3.218876 426 +continu 1 39 3.258097 3.258097 448 +author 1 39 3.258097 3.258097 450 +credit 2 38 3.295837 6.591674 460 +slide 1 38 3.295837 3.295837 467 +formal 1 37 3.332205 3.332205 478 +ofth 2 36 3.367296 6.734592 491 +short 1 36 3.367296 3.367296 499 +either 1 35 3.401197 3.401197 506 +bibliographi 1 34 3.401197 3.401197 518 +jame 1 35 3.401197 3.401197 507 +posit 3 31 3.496508 10.489524 552 +progress 1 28 3.610918 3.610918 598 +usual 1 28 3.610918 3.610918 608 +load 1 28 3.610918 3.610918 601 +subject 1 26 3.688879 3.688879 647 +valu 2 25 3.737670 7.475340 665 +lead 2 23 3.806662 7.613324 718 +begin 1 23 3.806662 3.806662 716 +thank 1 23 3.806662 3.806662 721 +variabl 1 23 3.806662 3.806662 715 +chip 1 21 3.912023 3.912023 770 +leav 1 21 3.912023 3.912023 772 +anderson 3 19 4.007333 12.021999 860 +predict 1 19 4.007333 4.007333 855 +miss 1 19 4.007333 4.007333 866 +asplo 3 17 4.110874 12.332622 948 +brown 1 16 4.174387 4.174387 977 +micro 1 15 4.248495 4.248495 1031 +hopefulli 1 14 4.317488 4.317488 1071 +sai 1 13 4.382027 4.382027 1175 +rememb 1 12 4.465908 4.465908 1217 +literatur 1 11 4.553877 4.553877 1300 +isca 1 11 4.553877 4.553877 1354 +baer 1 11 4.553877 4.553877 1353 +jean 1 10 4.653960 4.653960 1440 +subscrib 2 9 4.753590 9.507180 1541 +herefor 1 9 4.753590 4.753590 1483 +wall 1 9 4.753590 4.753590 1553 +patterson 1 9 4.753590 4.753590 1554 +readm 1 8 4.875197 4.875197 1699 +burger 2 7 5.010635 10.021270 1889 +ruth 1 7 5.010635 5.010635 1870 +multicomput 1 7 5.010635 5.010635 1890 +goodman 1 7 5.010635 5.010635 1891 +fortun 1 7 5.010635 5.010635 1872 +tobe 1 6 5.164786 5.164786 1995 +ifyou 1 6 5.164786 5.164786 1992 +majordomo 1 6 5.164786 5.164786 2066 +philipos 1 5 5.347108 5.347108 2373 +appreci 1 5 5.347108 5.347108 2374 +volunt 1 5 5.347108 5.347108 2307 +andrea 1 5 5.347108 5.347108 2375 +lunch 2 3 5.857933 11.715866 3369 +shen 1 3 5.857933 5.857933 3370 +pong 1 3 5.857933 5.857933 3371 +stefano 1 3 5.857933 5.857933 3372 +kaxira 1 3 5.857933 5.857933 3373 +yelick 1 3 5.857933 5.857933 3374 +shortli 1 3 5.857933 5.857933 3375 +heat 1 2 6.263398 6.263398 4113 +gershoni 1 2 6.263398 6.263398 4513 +matthai 1 2 6.263398 6.263398 4514 +tabular 1 2 6.263398 6.263398 4515 +guru 1 2 6.263398 6.263398 4476 +wilkerson 1 2 6.263398 6.263398 4516 +dalli 1 2 6.263398 6.263398 4517 +datascalar 1 2 6.263398 6.263398 4518 +spsd 1 2 6.263398 6.263398 4519 +iram 1 2 6.263398 6.263398 4520 +cseg 2 1 6.957497 13.914994 8276 +lunchcs 1 1 6.957497 6.957497 8277 +lunchcours 1 1 6.957497 6.957497 8278 +loupbaermeet 1 1 6.957497 6.957497 8279 +withalmost 1 1 6.957497 6.957497 8280 +discussedat 1 1 6.957497 6.957497 8281 +byesteem 1 1 6.957497 6.957497 8282 +mostlyw 1 1 6.957497 6.957497 8283 +discussionson 1 1 6.957497 6.957497 8284 +quartersi 1 1 6.957497 6.957497 8285 +fromparticip 1 1 6.957497 6.957497 8286 +oncrit 1 1 6.957497 6.957497 8287 +hereread 1 1 6.957497 6.957497 8288 +morethem 1 1 6.957497 6.957497 8289 +molli 1 1 6.957497 6.957497 8290 +thestud 1 1 6.957497 6.957497 8291 +informallyor 1 1 6.957497 6.957497 8292 +lipasti 1 1 6.957497 6.957497 8293 +advanceprogrami 1 1 6.957497 6.957497 8294 +thesaulsburi 1 1 6.957497 6.957497 8295 +readashlei 1 1 6.957497 6.957497 8296 +saulsburi 1 1 6.957497 6.957497 8297 +fong 1 1 6.957497 6.957497 8298 +nowatzyk 1 1 6.957497 6.957497 8299 +fillo 1 1 6.957497 6.957497 8300 +keckler 1 1 6.957497 6.957497 8301 +machinelink 1 1 6.957497 6.957497 8302 +readdoug 1 1 6.957497 6.957497 8303 +neton 1 1 6.957497 6.957497 8304 +cardwel 1 1 6.957497 6.957497 8305 +fromm 1 1 6.957497 6.957497 8306 +keeton 1 1 6.957497 6.957497 8307 +kozyraki 1 1 6.957497 6.957497 8308 +thomasand 1 1 6.957497 6.957497 8309 +availableher 1 1 6.957497 6.957497 8310 +themajordomo 1 1 6.957497 6.957497 8311 +shouldinclud 1 1 6.957497 6.957497 8312 +lineblank 1 1 6.957497 6.957497 8313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ new file mode 100644 index 00000000..0c81bf37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590h^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +spring 1 131 2.079442 2.079442 88 +interact 1 62 2.772589 2.772589 270 +offer 1 43 3.178054 3.178054 414 +human 1 32 3.465736 3.465736 546 +pagecs 1 26 3.688879 3.688879 658 +experiment 1 26 3.688879 3.688879 645 +born 1 21 3.912023 3.912023 798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ new file mode 100644 index 00000000..ea6c9aa3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590k^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 2 293 1.098612 2.197224 17 +gener 2 220 1.386294 2.772588 27 +mail 2 238 1.386294 2.772588 22 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +data 2 170 1.791759 3.583518 49 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +compil 4 122 2.079442 8.317768 96 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +welcom 1 122 2.079442 2.079442 99 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +memori 2 101 2.302585 4.605170 139 +second 1 81 2.484907 2.484907 166 +optim 2 79 2.564949 5.129898 197 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +line 2 75 2.639057 5.278114 231 +meet 1 72 2.639057 2.639057 229 +practic 1 70 2.708050 2.708050 246 +organ 1 65 2.772589 2.772589 265 +wednesdai 1 64 2.772589 2.772589 261 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +special 2 56 2.890372 5.780744 320 +week 9 52 2.995732 26.961588 343 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +cost 1 37 3.332205 3.332205 480 +represent 1 35 3.401197 3.401197 512 +michael 1 35 3.401197 3.401197 514 +graph 1 30 3.555348 3.555348 576 +depend 1 29 3.583519 3.583519 583 +framework 1 28 3.610918 3.610918 606 +arrai 1 27 3.637586 3.637586 627 +subject 1 26 3.688879 3.688879 647 +valu 1 25 3.737670 3.737670 665 +flow 1 24 3.761200 3.761200 700 +leav 1 21 3.912023 3.912023 772 +alloc 1 20 3.951244 3.951244 821 +anderson 2 19 4.007333 8.014666 860 +andrew 1 19 4.007333 4.007333 849 +offici 1 18 4.060443 4.060443 894 +thoma 1 18 4.060443 4.060443 901 +regist 2 17 4.110874 8.221748 938 +analyz 1 17 4.110874 4.110874 925 +georg 1 16 4.174387 4.174387 994 +susan 1 15 4.248495 4.248495 1050 +todd 1 15 4.248495 4.248495 1051 +floor 1 14 4.317488 4.317488 1070 +dean 1 14 4.317488 4.317488 1104 +charl 2 13 4.382027 8.764054 1149 +sai 1 13 4.382027 4.382027 1175 +gupta 2 12 4.465908 8.931816 1241 +loew 1 12 4.465908 4.465908 1252 +iter 1 12 4.465908 4.465908 1206 +kenneth 1 12 4.465908 4.465908 1265 +grant 1 12 4.465908 4.465908 1216 +minimum 1 9 4.753590 4.753590 1555 +strength 1 9 4.753590 4.753590 1494 +subscrib 1 9 4.753590 4.753590 1541 +paradigm 1 8 4.875197 4.875197 1662 +lewi 1 8 4.875197 4.875197 1700 +erik 1 8 4.875197 4.875197 1701 +grove 1 8 4.875197 4.875197 1675 +roger 2 7 5.010635 10.021270 1892 +multicomput 1 7 5.010635 5.010635 1890 +fischer 1 7 5.010635 5.010635 1893 +reduct 1 7 5.010635 5.010635 1877 +ruth 1 7 5.010635 5.010635 1870 +banerje 1 6 5.164786 5.164786 2018 +mock 1 6 5.164786 5.164786 2087 +tullsen 1 6 5.164786 5.164786 2081 +majordomo 1 6 5.164786 5.164786 2066 +bottleneck 1 4 5.568345 5.568345 2769 +anoop 1 4 5.568345 5.568345 2770 +interprocedur 1 4 5.568345 5.568345 2771 +noel 1 3 5.857933 5.857933 3376 +crew 1 3 5.857933 5.857933 3347 +litvinov 1 3 5.857933 5.857933 3343 +garrett 1 3 5.857933 5.857933 3377 +jen 1 3 5.857933 5.857933 3378 +blank 1 3 5.857933 5.857933 3379 +seminarcs 1 2 6.263398 6.263398 4521 +eggersand 1 2 6.263398 6.263398 4522 +francoi 1 2 6.263398 6.263398 4523 +taxat 1 2 6.263398 6.263398 4524 +ernst 1 2 6.263398 6.263398 4525 +secoski 1 2 6.263398 6.263398 4526 +lazi 1 2 6.263398 6.263398 4527 +seminarcours 1 1 6.957497 6.957497 8314 +craigchambersmeet 1 1 6.957497 6.957497 8315 +butreal 1 1 6.957497 6.957497 8316 +atrium 1 1 6.957497 6.957497 8317 +scheduleweek 1 1 6.957497 6.957497 8318 +memspi 1 1 6.957497 6.957497 8319 +margaretmartonosi 1 1 6.957497 6.957497 8320 +consel 1 1 6.957497 6.957497 8321 +itsus 1 1 6.957497 6.957497 8322 +evelyn 1 1 6.957497 6.957497 8323 +duesterwald 1 1 6.957497 6.957497 8324 +rajiv 1 1 6.957497 6.957497 8325 +maryl 1 1 6.957497 6.957497 8326 +soffa 1 1 6.957497 6.957497 8327 +danielweis 1 1 6.957497 6.957497 8328 +bjarn 1 1 6.957497 6.957497 8329 +steensgaard 1 1 6.957497 6.957497 8330 +coalesc 1 1 6.957497 6.957497 8331 +appel 1 1 6.957497 6.957497 8332 +hooverand 1 1 6.957497 6.957497 8333 +zadeck 1 1 6.957497 6.957497 8334 +byprivthviraj 1 1 6.957497 6.957497 8335 +stevenkurland 1 1 6.957497 6.957497 8336 +knoblock 1 1 6.957497 6.957497 8337 +knoop 1 1 6.957497 6.957497 8338 +oliv 1 1 6.957497 6.957497 8339 +andbernhard 1 1 6.957497 6.957497 8340 +steffen 1 1 6.957497 6.957497 8341 +subscribecsek 1 1 6.957497 6.957497 8342 +shortlyrec 1 1 6.957497 6.957497 8343 +melodi 1 1 6.957497 6.957497 8344 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ new file mode 100644 index 00000000..9e1ab4a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590o^ @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 11 374 0.693147 7.624617 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 2 238 1.386294 2.772588 22 +gener 2 220 1.386294 2.772588 27 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +parallel 14 169 1.791759 25.084626 60 +data 11 170 1.791759 19.709349 49 +applic 5 170 1.791759 8.958795 56 +distribut 4 162 1.791759 7.167036 51 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +model 4 145 1.945910 7.783640 69 +perform 2 143 1.945910 3.891820 74 +object 1 138 1.945910 1.945910 79 +compil 9 122 2.079442 18.714978 96 +analysi 7 124 2.079442 14.556094 98 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +tool 1 117 2.079442 2.079442 93 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +access 1 102 2.302585 2.302585 136 +commun 5 95 2.397895 11.989475 157 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +environ 2 84 2.484907 4.969814 177 +librari 1 87 2.484907 2.484907 181 +larg 1 82 2.484907 2.484907 168 +optim 6 79 2.564949 15.389694 197 +messag 2 76 2.564949 5.129898 212 +mondai 1 77 2.564949 2.564949 206 +orient 1 80 2.564949 2.564949 205 +effici 1 73 2.639057 2.639057 233 +tuesdai 1 73 2.639057 2.639057 219 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +septemb 1 65 2.772589 2.772589 274 +local 2 55 2.944439 5.888878 334 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +quarter 3 47 3.091042 9.273126 389 +discuss 1 45 3.135494 3.135494 399 +cach 2 41 3.218876 6.437752 432 +live 1 40 3.258097 3.258097 451 +seminar 1 38 3.295837 3.295837 470 +open 1 38 3.295837 3.295837 469 +expect 1 37 3.332205 3.332205 484 +global 2 34 3.401197 6.802394 520 +task 2 25 3.737670 7.475340 678 +strategi 2 25 3.737670 7.475340 682 +supercomput 1 25 3.737670 3.737670 681 +scalabl 2 24 3.761200 7.522400 705 +input 2 23 3.806662 7.613324 727 +thread 2 23 3.806662 7.613324 722 +cooper 2 22 3.850148 7.700296 757 +output 2 21 3.912023 7.824046 788 +util 2 21 3.912023 7.824046 774 +exploit 2 20 3.951244 7.902488 836 +portabl 1 20 3.951244 3.951244 819 +eric 1 19 4.007333 4.007333 870 +runtim 1 19 4.007333 4.007333 858 +attend 1 18 4.060443 4.060443 893 +debug 1 17 4.110874 4.110874 944 +sign 1 16 4.174387 4.174387 970 +driven 1 15 4.248495 4.248495 1048 +stream 1 15 4.248495 4.248495 1015 +matlab 3 14 4.317488 12.952464 1081 +hopefulli 1 14 4.317488 4.317488 1071 +block 2 13 4.382027 8.764054 1183 +everyon 1 13 4.382027 4.382027 1148 +bodi 1 13 4.382027 4.382027 1178 +gupta 2 12 4.465908 8.931816 1241 +characterist 2 12 4.465908 8.931816 1257 +iter 2 12 4.465908 8.931816 1206 +loew 1 12 4.465908 4.465908 1252 +holidai 1 12 4.465908 4.465908 1224 +brad 1 12 4.465908 4.465908 1264 +loop 2 11 4.553877 9.107754 1310 +multithread 1 11 4.553877 4.553877 1315 +subscrib 1 9 4.753590 4.753590 1541 +rel 1 9 4.753590 4.753590 1487 +cross 2 8 4.875197 9.750394 1703 +reus 2 8 4.875197 9.750394 1661 +pldi 2 8 4.875197 9.750394 1704 +sean 1 8 4.875197 4.875197 1705 +core 2 7 5.010635 10.021270 1809 +spot 1 7 5.010635 5.010635 1894 +ruth 1 7 5.010635 5.010635 1870 +banerje 2 6 5.164786 10.329572 2018 +sung 2 6 5.164786 10.329572 2075 +reed 2 6 5.164786 10.329572 2086 +majordomo 1 6 5.164786 5.164786 2066 +zhou 1 6 5.164786 5.164786 2092 +icpp 4 5 5.347108 21.388432 2382 +cyclic 2 5 5.347108 10.694216 2383 +ppopp 6 4 5.568345 33.410070 2774 +choi 3 4 5.568345 16.705035 2732 +restructur 1 4 5.568345 5.568345 2775 +randal 1 4 5.568345 5.568345 2776 +ipp 4 3 5.857933 23.431732 3381 +atmospher 1 3 5.857933 5.857933 3388 +andwil 1 3 5.857933 5.857933 3335 +thepap 1 3 5.857933 5.857933 3254 +jason 1 3 5.857933 5.857933 3389 +blumof 1 3 5.857933 5.857933 3237 +foster 1 3 5.857933 5.857933 3159 +lcpc 4 2 6.263398 25.053592 4538 +kennedi 4 2 6.263398 25.053592 4539 +adv 2 2 6.263398 12.526796 4540 +chien 2 2 6.263398 12.526796 4541 +casual 1 2 6.263398 6.263398 4542 +subscribeto 1 2 6.263398 6.263398 4543 +deros 1 2 6.263398 6.263398 4474 +padua 1 2 6.263398 6.263398 4544 +kale 1 2 6.263398 6.263398 4545 +cilk 1 2 6.263398 6.263398 4242 +fritzson 1 2 6.263398 6.263398 4546 +potpourri 1 2 6.263398 6.263398 4547 +scalapack 2 1 6.957497 13.914994 8387 +ramaswami 2 1 6.957497 13.914994 8388 +hodg 2 1 6.957497 13.914994 8389 +mcintosh 2 1 6.957497 13.914994 8390 +chakarabarti 2 1 6.957497 13.914994 8391 +integer 2 1 6.957497 13.914994 8392 +crandal 2 1 6.957497 13.914994 8393 +aydt 2 1 6.957497 13.914994 8394 +bordawekar 2 1 6.957497 13.914994 8395 +choudahari 2 1 6.957497 13.914994 8396 +koelbel 2 1 6.957497 13.914994 8397 +paleczni 2 1 6.957497 13.914994 8398 +midkiff 2 1 6.957497 13.914994 8399 +fahring 2 1 6.957497 13.914994 8400 +hain 2 1 6.957497 13.914994 8401 +mehrotra 2 1 6.957497 13.914994 8402 +environmentslarri 1 1 6.957497 6.957497 8403 +snyderautumn 1 1 6.957497 6.957497 8404 +ten 1 1 6.957497 6.957497 8405 +ignit 1 1 6.957497 6.957497 8406 +hurri 1 1 6.957497 6.957497 8407 +cseo 1 1 6.957497 6.957497 8408 +datepaperpresentor 1 1 6.957497 6.957497 8409 +falcon 1 1 6.957497 6.957497 8410 +gallivan 1 1 6.957497 6.957497 8411 +gallopoulo 1 1 6.957497 6.957497 8412 +marsolf 1 1 6.957497 6.957497 8413 +ramkumar 1 1 6.957497 6.957497 8414 +forb 1 1 6.957497 6.957497 8415 +gotwal 1 1 6.957497 6.957497 8416 +sriniva 1 1 6.957497 6.957497 8417 +gannon 1 1 6.957497 6.957497 8418 +joerg 1 1 6.957497 6.957497 8419 +kuszmaul 1 1 6.957497 6.957497 8420 +leiserson 1 1 6.957497 6.957497 8421 +andersson 1 1 6.957497 6.957497 8422 +realign 1 1 6.957497 6.957497 8423 +kamachi 1 1 6.957497 6.957497 8424 +kusano 1 1 6.957497 6.957497 8425 +suehiro 1 1 6.957497 6.957497 8426 +tamura 1 1 6.957497 6.957497 8427 +sakon 1 1 6.957497 6.957497 8428 +rinard 1 1 6.957497 6.957497 8429 +abramson 1 1 6.957497 6.957497 8430 +michalak 1 1 6.957497 6.957497 8431 +sosic 1 1 6.957497 6.957497 8432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ new file mode 100644 index 00000000..88e3b33c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +system 3 443 0.693147 2.079441 6 +mail 2 238 1.386294 2.772588 22 +list 2 201 1.609438 3.218876 39 +spring 1 131 2.079442 2.079442 88 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +messag 1 76 2.564949 2.564949 212 +line 1 75 2.639057 2.639057 231 +variou 1 56 2.890372 2.890372 317 +summer 1 56 2.890372 2.890372 311 +week 1 52 2.995732 2.995732 343 +seminar 2 38 3.295837 6.591674 470 +winter 1 36 3.367296 3.367296 500 +autumn 2 31 3.496508 6.993016 558 +request 1 26 3.688879 3.688879 635 +alreadi 1 16 4.174387 4.174387 963 +bodi 1 13 4.382027 4.382027 1178 +web 1 12 4.465908 4.465908 1249 +subscrib 1 9 4.753590 4.753590 1541 +bit 1 7 5.010635 5.010635 1833 +crucial 1 5 5.347108 5.347108 2384 +ofinform 1 4 5.568345 5.568345 2707 +cancel 1 4 5.568345 5.568345 2746 +preliminariesif 1 1 6.957497 6.957497 8433 +besent 1 1 6.957497 6.957497 8434 +systemsin 1 1 6.957497 6.957497 8435 +quarterli 1 1 6.957497 6.957497 8436 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html new file mode 100644 index 00000000..e2ad7c2c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590s^au95^index.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +system 4 443 0.693147 2.772588 6 +paper 2 205 1.609438 3.218876 38 +oper 2 180 1.609438 3.218876 34 +read 2 154 1.791759 3.583518 47 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +file 3 132 1.945910 5.837730 70 +perform 2 143 1.945910 3.891820 74 +architectur 1 139 1.945910 1.945910 77 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +manag 1 114 2.197225 2.197225 125 +memori 2 101 2.302585 4.605170 139 +access 1 102 2.302585 2.302585 136 +present 8 91 2.397895 19.183160 145 +appear 1 78 2.564949 2.564949 210 +meet 2 72 2.639057 5.278114 229 +symposium 1 72 2.639057 2.639057 238 +interact 1 62 2.772589 2.772589 270 +virtual 1 62 2.772589 2.772589 285 +summer 1 56 2.890372 2.890372 311 +principl 1 48 3.044522 3.044522 357 +quarter 2 47 3.091042 6.182084 389 +fridai 1 44 3.135494 3.135494 390 +discuss 1 45 3.135494 3.135494 399 +cach 1 41 3.218876 3.218876 432 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +storag 1 31 3.496508 3.496508 553 +cluster 1 28 3.610918 3.610918 612 +mobil 1 23 3.806662 3.806662 730 +exploit 1 20 3.951244 3.951244 836 +log 1 19 4.007333 4.007333 857 +anderson 1 19 4.007333 4.007333 860 +hierarch 1 15 4.248495 4.248495 1018 +coher 1 14 4.317488 4.317488 1109 +weak 1 13 4.382027 4.382027 1159 +loew 1 12 4.465908 4.465908 1252 +impact 1 11 4.553877 4.553877 1334 +sosp 1 10 4.653960 4.653960 1416 +franklin 1 10 4.653960 4.653960 1436 +voelker 1 9 4.753590 4.753590 1557 +romer 1 8 4.875197 4.875197 1706 +feelei 1 7 5.010635 5.010635 1859 +chan 1 7 5.010635 5.010635 1876 +trend 1 7 5.010635 5.010635 1842 +wolman 1 6 5.164786 5.164786 2093 +tiwari 1 5 5.347108 5.347108 2385 +philipos 1 5 5.347108 5.347108 2373 +wewil 1 4 5.568345 5.568345 2688 +savag 1 4 5.568345 5.568345 2777 +serverless 1 3 5.857933 5.857933 3181 +litvinov 1 3 5.857933 5.857933 3343 +fiuczynski 1 3 5.857933 5.857933 3390 +wilk 1 2 6.263398 6.263398 4548 +hypervisor 1 2 6.263398 6.263398 4549 +sriram 1 2 6.263398 6.263398 4550 +quarterw 1 1 6.957497 6.957497 8437 +upcomingacm 1 1 6.957497 6.957497 8438 +havean 1 1 6.957497 6.957497 8439 +scheduleoct 1 1 6.957497 6.957497 8440 +autoraid 1 1 6.957497 6.957497 8441 +montgomeri 1 1 6.957497 6.957497 8442 +stackabl 1 1 6.957497 6.957497 8443 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ new file mode 100644 index 00000000..8d24e678 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^590zpl^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 11 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +program 8 374 0.693147 5.545176 7 +interest 1 384 0.693147 0.693147 11 +student 3 343 1.098612 3.295836 19 +us 3 329 1.098612 3.295836 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +mail 5 238 1.386294 6.931470 22 +languag 3 227 1.386294 4.158882 26 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 5 199 1.609438 8.047190 37 +list 4 201 1.609438 6.437752 39 +includ 2 208 1.609438 3.218876 42 +modifi 1 178 1.609438 1.609438 35 +parallel 3 169 1.791759 5.375277 60 +develop 3 174 1.791759 5.375277 53 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +perform 6 143 1.945910 11.675460 74 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +high 3 130 2.079442 6.238326 101 +machin 3 129 2.079442 6.238326 95 +compil 2 122 2.079442 4.158884 96 +welcom 1 122 2.079442 2.079442 99 +document 1 121 2.079442 2.079442 89 +send 2 114 2.197225 4.394450 109 +pleas 1 113 2.197225 2.197225 114 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +follow 2 92 2.397895 4.795790 143 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +learn 1 86 2.484907 2.484907 170 +messag 2 76 2.564949 5.129898 212 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +write 2 72 2.639057 5.278114 222 +materi 1 75 2.639057 2.639057 221 +appli 1 71 2.639057 2.639057 226 +sieg 1 69 2.708050 2.708050 260 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 2 64 2.772589 5.545178 261 +written 1 63 2.772589 2.772589 278 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +index 1 56 2.890372 2.890372 309 +unix 1 58 2.890372 2.890372 308 +faculti 1 56 2.890372 2.890372 325 +scientif 7 53 2.944439 20.611073 341 +allow 1 53 2.944439 2.944439 333 +cover 1 55 2.944439 2.944439 329 +found 1 53 2.944439 2.944439 337 +run 1 51 2.995732 2.995732 347 +quarter 1 47 3.091042 3.091042 389 +effect 1 46 3.091042 3.091042 385 +fast 2 42 3.218876 6.437752 429 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +close 1 38 3.295837 3.295837 465 +credit 1 38 3.295837 3.295837 460 +workstat 2 37 3.332205 6.664410 479 +staff 1 36 3.367296 3.367296 490 +ad 1 32 3.465736 3.465736 544 +scientist 2 31 3.496508 6.993016 560 +rang 1 30 3.555348 3.555348 565 +semant 1 29 3.583519 3.583519 587 +platform 1 29 3.583519 3.583519 591 +releas 2 28 3.610918 7.221836 616 +arrai 1 27 3.637586 3.637586 627 +supercomput 3 25 3.737670 11.213010 681 +variabl 1 23 3.806662 3.806662 715 +dai 1 22 3.850148 3.850148 753 +exploit 1 20 3.951244 3.951244 836 +kernel 1 20 3.951244 3.951244 825 +assum 1 19 4.007333 4.007333 845 +account 1 18 4.060443 4.060443 882 +previous 1 17 4.110874 4.110874 923 +debug 1 17 4.110874 4.110874 944 +whole 1 17 4.110874 4.110874 940 +modern 2 16 4.174387 8.348774 966 +fortran 1 15 4.248495 4.248495 1027 +easili 1 14 4.317488 4.317488 1077 +matlab 1 14 4.317488 4.317488 1081 +bodi 2 13 4.382027 8.764054 1178 +block 1 13 4.382027 4.382027 1183 +loew 1 12 4.465908 4.465908 1252 +grant 1 12 4.465908 4.465908 1216 +loop 2 11 4.553877 9.107754 1310 +faster 1 11 4.553877 4.553877 1323 +reli 1 10 4.653960 4.653960 1411 +subscrib 2 9 4.753590 9.507180 1541 +suitabl 2 9 4.753590 9.507180 1486 +informationabout 1 9 4.753590 4.753590 1515 +elimin 1 9 4.753590 4.753590 1558 +simpli 1 8 4.875197 4.875197 1626 +migrat 1 7 5.010635 5.010635 1851 +largest 1 7 5.010635 5.010635 1858 +majordomo 2 6 5.164786 10.329572 2066 +syntax 1 6 5.164786 5.164786 2030 +sung 1 6 5.164786 5.164786 2075 +snyder 1 5 5.347108 5.347108 2359 +toth 1 4 5.568345 5.568345 2595 +ncsa 1 4 5.568345 5.568345 2767 +choi 1 4 5.568345 5.568345 2732 +audit 1 3 5.857933 5.857933 3391 +disciplin 1 3 5.857933 5.857933 3392 +informationcours 1 3 5.857933 5.857933 3167 +subscribeto 1 2 6.263398 6.263398 4543 +inner 1 2 6.263398 6.263398 4551 +zphigh 1 1 6.957497 6.957497 8444 +zpllarri 1 1 6.957497 6.957497 8445 +teamautumn 1 1 6.957497 6.957497 8446 +csezpl 1 1 6.957497 6.957497 8447 +usersmail 1 1 6.957497 6.957497 8448 +librarai 1 1 6.957497 6.957497 8449 +relatedinform 1 1 6.957497 6.957497 8450 +descriptionzpl 1 1 6.957497 6.957497 8451 +scientificprogram 1 1 6.957497 6.957497 8452 +infortran 1 1 6.957497 6.957497 8453 +dramaticallysimplifi 1 1 6.957497 6.957497 8454 +nuisanc 1 1 6.957497 6.957497 8455 +andtrivi 1 1 6.957497 6.957497 8456 +byrecompil 1 1 6.957497 6.957497 8457 +wysiwyg 1 1 6.957497 6.957497 8458 +booknon 1 1 6.957497 6.957497 8459 +onin 1 1 6.957497 6.957497 8460 +zplprogram 1 1 6.957497 6.957497 8461 +prerequisitesfamiliar 1 1 6.957497 6.957497 8462 +ormatlab 1 1 6.957497 6.957497 8463 +remotezpl 1 1 6.957497 6.957497 8464 +compileroth 1 1 6.957497 6.957497 8465 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ new file mode 100644 index 00000000..03371819 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse370^CurrentQtr^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +last 4 314 1.098612 4.394448 14 +us 2 329 1.098612 2.197224 16 +cours 2 273 1.098612 2.197224 15 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +design 4 213 1.386294 5.545176 25 +mail 4 238 1.386294 5.545176 22 +washington 3 236 1.386294 4.158882 32 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +class 6 199 1.609438 9.656628 37 +updat 3 191 1.609438 4.828314 41 +hour 2 165 1.791759 3.583518 46 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +lectur 4 135 1.945910 7.783640 73 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +document 2 121 2.079442 4.158884 89 +tool 2 117 2.079442 4.158884 93 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +instructor 4 108 2.197225 8.788900 107 +send 2 114 2.197225 4.394450 109 +final 2 116 2.197225 4.394450 108 +topic 2 114 2.197225 4.394450 110 +version 1 113 2.197225 2.197225 122 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +exam 2 86 2.484907 4.969814 169 +contain 1 81 2.484907 2.484907 174 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +messag 1 76 2.564949 2.564949 212 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +logic 2 71 2.639057 5.278114 230 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +sieg 2 69 2.708050 5.416100 260 +goal 1 66 2.708050 2.708050 250 +syllabu 1 67 2.708050 2.708050 247 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +previou 1 62 2.772589 2.772589 290 +publish 1 57 2.890372 2.890372 326 +think 1 57 2.890372 2.890372 314 +maintain 2 51 2.995732 5.991464 342 +digit 1 52 2.995732 2.995732 348 +frequent 1 49 3.044522 3.044522 367 +archiv 1 49 3.044522 3.044522 364 +quarter 2 47 3.091042 6.182084 389 +adapt 1 46 3.091042 3.091042 387 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +even 1 45 3.135494 3.135494 393 +announc 3 40 3.258097 9.774291 441 +author 1 39 3.258097 3.258097 450 +slide 1 38 3.295837 3.295837 467 +credit 1 38 3.295837 3.295837 460 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +especi 1 36 3.367296 3.367296 496 +copyright 1 36 3.367296 3.367296 495 +go 1 33 3.433987 3.433987 529 +ad 1 32 3.465736 3.465736 544 +collabor 1 32 3.465736 3.465736 543 +autumn 2 31 3.496508 6.993016 558 +quot 1 29 3.583519 3.583519 582 +administr 2 27 3.637586 7.275172 628 +mind 1 27 3.637586 3.637586 632 +static 1 27 3.637586 3.637586 619 +notic 1 25 3.737670 3.737670 675 +accur 1 25 3.737670 3.737670 680 +tell 1 21 3.912023 3.912023 777 +anderson 1 19 4.007333 4.007333 860 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +whole 1 17 4.110874 4.110874 940 +weekli 1 17 4.110874 4.110874 919 +weslei 2 16 4.174387 8.348774 983 +portion 1 16 4.174387 4.174387 971 +webmast 2 15 4.248495 8.496990 1045 +anonym 1 14 4.317488 4.317488 1100 +reprint 1 14 4.317488 4.317488 1097 +everyon 1 13 4.382027 4.382027 1148 +quizz 1 13 4.382027 4.382027 1151 +addison 2 12 4.465908 8.931816 1230 +workload 1 12 4.465908 4.465908 1210 +overal 1 12 4.465908 4.465908 1254 +duli 1 12 4.465908 4.465908 1248 +benjamin 2 11 4.553877 9.107754 1296 +evolut 1 11 4.553877 4.553877 1314 +nonprofit 1 11 4.553877 4.553877 1339 +cheat 1 10 4.653960 4.653960 1395 +desir 1 9 4.753590 4.753590 1542 +cum 2 8 4.875197 9.750394 1619 +bunch 1 7 5.010635 5.010635 1861 +gaetano 3 6 5.164786 15.494358 2068 +borriello 2 5 5.347108 10.694216 2349 +corei 2 4 5.568345 11.136690 2718 +contemporari 1 4 5.568345 5.568345 2719 +katz 2 3 5.857933 11.715866 3276 +corin 1 3 5.857933 5.857933 3311 +aweekli 1 3 5.857933 5.857933 3312 +andersonwelcom 1 2 6.263398 6.263398 4400 +tocs 1 2 6.263398 6.263398 4401 +messagess 1 2 6.263398 6.263398 4402 +synario 1 2 6.263398 6.263398 4403 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse403^95w @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ new file mode 100644 index 00000000..55b3272f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse500^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +instructor 1 108 2.197225 2.197225 107 +present 1 91 2.397895 2.397895 145 +internet 1 83 2.484907 2.484907 186 +journal 1 83 2.484907 2.484907 183 +nation 1 74 2.639057 2.639057 240 +sieg 1 69 2.708050 2.708050 260 +futur 1 41 3.218876 3.218876 427 +societi 2 40 3.258097 6.516194 456 +focu 1 30 3.555348 3.555348 571 +relev 1 26 3.688879 3.688879 637 +born 1 21 3.912023 3.912023 798 +thur 1 19 4.007333 4.007333 847 +social 1 13 4.382027 4.382027 1123 +econom 1 13 4.382027 4.382027 1184 +alan 1 13 4.382027 4.382027 1146 +tue 1 11 4.553877 4.553877 1308 +ethic 1 7 5.010635 5.010635 1786 +legal 1 6 5.164786 5.164786 2094 +highwai 1 6 5.164786 5.164786 2095 +implic 1 4 5.568345 5.568345 2696 +societycs 1 1 6.957497 6.957497 8466 +societywelcom 1 1 6.957497 6.957497 8467 +wintercs 1 1 6.957497 6.957497 8468 +andglob 1 1 6.957497 6.957497 8469 +syllabusclass 1 1 6.957497 6.957497 8470 +schedulelink 1 1 6.957497 6.957497 8471 +sitesbook 1 1 6.957497 6.957497 8472 +referenceassignmentsassign 1 1 6.957497 6.957497 8473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse567 @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ new file mode 100644 index 00000000..ba5ebc47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^education^courses^cse573^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +cours 1 273 1.098612 1.098612 15 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +hour 3 165 1.791759 5.375277 46 +address 1 170 1.791759 1.791759 62 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +topic 1 114 2.197225 2.197225 110 +question 2 91 2.397895 4.795790 141 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +build 1 85 2.484907 2.484907 184 +messag 1 76 2.564949 2.564949 212 +intellig 1 72 2.639057 2.639057 225 +sieg 3 69 2.708050 8.124150 260 +knowledg 1 67 2.708050 2.708050 243 +artifici 2 63 2.772589 5.545178 280 +plan 1 65 2.772589 2.772589 272 +index 1 56 2.890372 2.890372 309 +reason 1 57 2.890372 2.890372 318 +archiv 1 49 3.044522 3.044522 364 +quarter 1 47 3.091042 3.091042 389 +past 1 42 3.218876 3.218876 428 +staff 1 36 3.367296 3.367296 490 +represent 1 35 3.401197 3.401197 512 +pagecs 1 26 3.688879 3.688879 658 +fundament 1 25 3.737670 3.737670 661 +methodolog 1 23 3.806662 3.806662 733 +outlin 1 17 4.110874 4.110874 914 +nick 1 13 4.382027 4.382027 1180 +pose 1 9 4.753590 4.753590 1535 +depth 1 8 4.875197 4.875197 1636 +marc 1 8 4.875197 4.875197 1680 +uncertainti 1 7 5.010635 5.010635 1882 +machinelearn 1 6 5.164786 5.164786 2084 +anin 1 3 5.857933 5.857933 3354 +assignmentsassign 1 3 5.857933 5.857933 3342 +mailinglist 1 3 5.857933 5.857933 3325 +intelligencefal 1 2 6.263398 6.263398 4477 +andchalleng 1 2 6.263398 6.263398 4478 +intelligentmachin 1 2 6.263398 6.263398 4479 +agentarchitectur 1 2 6.263398 6.263398 4480 +weldweld 1 2 6.263398 6.263398 4481 +friedmanfriedman 1 2 6.263398 6.263398 4482 +kushmericknick 1 2 6.263398 6.263398 4483 +examsgradingresourcesth 1 2 6.263398 6.263398 4484 +topicsprojectread 1 1 6.957497 6.957497 8474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ new file mode 100644 index 00000000..b9418671 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.washington.edu^research^projects^ai^590i^ @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 5 412 0.693147 3.465735 8 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +us 3 329 1.098612 3.295836 16 +current 1 284 1.098612 1.098612 21 +mail 4 238 1.386294 5.545176 22 +link 2 247 1.386294 2.772588 24 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +list 2 201 1.609438 3.218876 39 +class 2 199 1.609438 3.218876 37 +updat 1 191 1.609438 1.609438 41 +paper 1 205 1.609438 1.609438 38 +read 2 154 1.791759 3.583518 47 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +file 2 132 1.945910 3.891820 70 +first 2 140 1.945910 3.891820 71 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +provid 2 121 2.079442 4.158884 94 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +check 7 115 2.197225 15.380575 118 +make 2 111 2.197225 4.394450 120 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +user 2 104 2.302585 4.605170 137 +peopl 1 96 2.302585 2.302585 132 +comment 3 93 2.397895 7.193685 146 +follow 1 92 2.397895 2.397895 143 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +server 4 76 2.564949 10.259796 204 +want 2 79 2.564949 5.129898 199 +interfac 2 79 2.564949 5.129898 209 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +decemb 1 80 2.564949 2.564949 215 +html 4 75 2.639057 10.556228 235 +intellig 2 72 2.639057 5.278114 225 +meet 1 72 2.639057 2.639057 229 +tuesdai 1 73 2.639057 2.639057 219 +summari 1 73 2.639057 2.639057 237 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +back 2 60 2.833213 5.666426 297 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +index 5 56 2.890372 14.451860 309 +detail 1 57 2.890372 2.890372 321 +anoth 2 45 3.135494 6.270988 408 +execut 1 45 3.135494 3.135494 404 +discuss 1 45 3.135494 3.135494 399 +show 3 43 3.178054 9.534162 417 +futur 1 41 3.218876 3.218876 427 +review 1 42 3.218876 3.218876 425 +paul 2 38 3.295837 6.591674 471 +winter 1 36 3.367296 3.367296 500 +short 1 36 3.367296 3.367296 499 +manual 1 35 3.401197 3.401197 504 +articl 1 33 3.433987 3.433987 530 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +releas 1 28 3.610918 3.610918 616 +except 1 28 3.610918 3.610918 607 +rather 1 26 3.688879 3.688879 642 +wai 1 25 3.737670 3.737670 662 +mike 1 24 3.761200 3.761200 703 +displai 1 23 3.806662 3.806662 712 +miscellan 1 23 3.806662 3.806662 731 +instal 1 22 3.850148 3.850148 754 +sent 1 22 3.850148 3.850148 763 +offici 1 18 4.060443 4.060443 894 +statu 1 18 4.060443 4.060443 885 +regist 3 17 4.110874 12.332622 938 +anyon 1 17 4.110874 4.110874 916 +side 1 15 4.248495 4.248495 1022 +someon 1 13 4.382027 4.382027 1128 +nick 1 13 4.382027 4.382027 1180 +rememb 2 12 4.465908 8.931816 1217 +usenix 1 12 4.465908 4.465908 1240 +extra 1 11 4.553877 4.553877 1312 +guess 1 10 4.653960 4.653960 1443 +mosaic 1 10 4.653960 4.653960 1426 +bring 1 10 4.653960 4.653960 1430 +perspect 1 10 4.653960 4.653960 1437 +kurt 1 9 4.753590 4.753590 1548 +filter 2 8 4.875197 9.750394 1641 +on 1 8 4.875197 4.875197 1628 +guidelin 1 7 5.010635 5.010635 1832 +impress 1 6 5.164786 5.164786 2096 +begun 1 5 5.347108 5.347108 2386 +older 1 5 5.347108 5.347108 2387 +cacm 1 5 5.347108 5.347108 2388 +glimps 4 4 5.568345 22.273380 2778 +ics 1 4 5.568345 5.568345 2779 +rambl 1 3 5.857933 5.857933 3308 +vagu 1 3 5.857933 5.857933 3393 +towrit 1 2 6.263398 6.263398 4207 +phoenix 1 2 6.263398 6.263398 4552 +belief 1 2 6.263398 6.263398 4553 +zephyr 4 1 6.957497 27.829988 8475 +siegcreat 1 1 6.957497 6.957497 8476 +scriptspleas 1 1 6.957497 6.957497 8477 +tothem 1 1 6.957497 6.957497 8478 +zwhere 1 1 6.957497 6.957497 8479 +mosiac 1 1 6.957497 6.957497 8480 +znol 1 1 6.957497 6.957497 8481 +zwatch 1 1 6.957497 6.957497 8482 +zlocat 1 1 6.957497 6.957497 8483 +releg 1 1 6.957497 6.957497 8484 +grumbl 1 1 6.957497 6.957497 8485 +luddit 1 1 6.957497 6.957497 8486 +itout 1 1 6.957497 6.957497 8487 +withci 1 1 6.957497 6.957497 8488 +theentir 1 1 6.957497 6.957497 8489 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html new file mode 100644 index 00000000..027e5cce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs354.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +studi 1 120 2.079442 2.079442 91 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +instructor 1 108 2.197225 2.197225 107 +memori 1 101 2.302585 2.302585 139 +commun 1 95 2.397895 2.397895 157 +control 1 82 2.484907 2.484907 164 +info 1 85 2.484907 2.484907 176 +involv 1 71 2.639057 2.639057 227 +organ 1 65 2.772589 2.772589 265 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +small 1 39 3.258097 3.258097 447 +open 2 38 3.295837 6.591674 469 +credit 1 38 3.295837 3.295837 460 +taken 1 31 3.496508 3.496508 555 +prerequisit 1 19 4.007333 4.007333 846 +devic 1 16 4.174387 4.174387 1002 +consent 1 5 5.347108 5.347108 2389 +semesterli 1 4 5.568345 5.568345 2780 +freshmen 1 2 6.263398 6.263398 4554 +computerhardwar 1 1 6.957497 6.957497 8490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html new file mode 100644 index 00000000..266c7ded --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs552.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +design 3 213 1.386294 4.158882 25 +fall 1 181 1.609438 1.609438 40 +address 1 170 1.791759 1.791759 62 +architectur 1 139 1.945910 1.945910 77 +introduct 1 126 2.079442 2.079442 87 +structur 2 106 2.197225 4.394450 105 +manag 1 114 2.197225 2.197225 125 +memori 2 101 2.302585 4.605170 139 +control 1 82 2.484907 2.484907 164 +info 1 85 2.484907 2.484907 176 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +cach 1 41 3.218876 3.218876 432 +credit 1 38 3.295837 3.295837 460 +compon 1 30 3.555348 3.555348 570 +hierarchi 1 22 3.850148 3.850148 744 +prerequisit 1 19 4.007333 4.007333 846 +interrupt 1 7 5.010635 5.010635 1793 +microprogram 1 4 5.568345 5.568345 2604 +semesterli 1 4 5.568345 5.568345 2780 +andc 1 1 6.957497 6.957497 8491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html new file mode 100644 index 00000000..b1b294bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs752.html @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +fall 1 181 1.609438 1.609438 40 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +architectur 2 139 1.945910 3.891820 77 +process 1 142 1.945910 1.945910 72 +high 1 130 2.079442 2.079442 101 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +special 1 56 2.890372 2.890372 320 +processor 1 54 2.944439 2.944439 335 +credit 1 38 3.295837 3.295837 460 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +multiprocessor 1 28 3.610918 3.610918 605 +flow 1 24 3.761200 3.761200 700 +prerequisit 1 19 4.007333 4.007333 846 +semesterli 1 4 5.568345 5.568345 2780 +andpipelin 1 1 6.957497 6.957497 8492 +performancemachin 1 1 6.957497 6.957497 8493 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html new file mode 100644 index 00000000..80fcc490 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~arch^uwarch^courses^cs757.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +fall 1 181 1.609438 1.609438 40 +parallel 2 169 1.791759 3.583518 60 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +architectur 1 139 1.945910 1.945910 77 +machin 2 129 2.079442 4.158884 95 +instructor 1 108 2.197225 2.197225 107 +advanc 1 99 2.302585 2.302585 130 +info 1 85 2.484907 2.484907 176 +special 1 56 2.890372 2.890372 320 +principl 1 48 3.044522 3.044522 357 +credit 1 38 3.295837 3.295837 460 +multi 1 36 3.367296 3.367296 493 +detect 1 26 3.688879 3.688879 646 +prerequisit 1 19 4.007333 4.007333 846 +interconnect 1 17 4.110874 4.110874 937 +coher 1 14 4.317488 4.317488 1109 +dataflow 1 5 5.347108 5.347108 2390 +consent 1 5 5.347108 5.347108 2389 +semesterli 1 4 5.568345 5.568345 2780 +simd 1 3 5.857933 5.857933 3360 +mimd 1 3 5.857933 5.857933 3361 +vectorizingcompil 1 1 6.957497 6.957497 8494 +processorsynchron 1 1 6.957497 6.957497 8495 +purposeprocessor 1 1 6.957497 6.957497 8496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html new file mode 100644 index 00000000..9962890c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs537.html @@ -0,0 +1,244 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 6 374 0.693147 4.158882 7 +system 5 443 0.693147 3.465735 6 +work 3 380 0.693147 2.079441 9 +us 7 329 1.098612 7.690284 16 +cours 4 273 1.098612 4.394448 15 +offic 3 299 1.098612 3.295836 13 +last 3 314 1.098612 3.295836 14 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +wisc 3 242 1.386294 4.158882 33 +softwar 1 220 1.386294 1.386294 30 +class 10 199 1.609438 16.094380 37 +oper 5 180 1.609438 8.047190 34 +group 2 183 1.609438 3.218876 36 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +avail 8 169 1.791759 14.334072 48 +read 4 154 1.791759 7.167036 47 +hour 3 165 1.791759 5.375277 46 +wisconsin 1 169 1.791759 1.791759 54 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +assign 11 135 1.945910 21.405010 66 +problem 9 147 1.945910 17.513190 75 +lectur 6 135 1.945910 11.675460 73 +note 3 142 1.945910 5.837730 67 +process 3 142 1.945910 5.837730 72 +first 2 140 1.945910 3.891820 71 +schedul 5 119 2.079442 10.397210 85 +spring 4 131 2.079442 8.317768 88 +introduct 3 126 2.079442 6.238326 87 +final 8 116 2.197225 17.577800 108 +topic 2 114 2.197225 4.394450 110 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +manag 1 114 2.197225 2.197225 125 +memori 3 101 2.302585 6.907755 139 +take 3 97 2.302585 6.907755 134 +need 2 98 2.302585 4.605170 135 +book 2 99 2.302585 4.605170 131 +advanc 2 99 2.302585 4.605170 130 +section 8 94 2.397895 19.183160 149 +grade 4 90 2.397895 9.591580 142 +follow 2 92 2.397895 4.795790 143 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +learn 4 86 2.484907 9.939628 170 +exam 3 86 2.484907 7.454721 169 +start 1 83 2.484907 2.484907 173 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +solut 1 82 2.484907 2.484907 162 +chang 1 82 2.484907 2.484907 163 +april 5 77 2.564949 12.824745 196 +mondai 2 77 2.564949 5.129898 206 +orient 2 80 2.564949 5.129898 205 +messag 2 76 2.564949 5.129898 212 +come 1 78 2.564949 2.564949 202 +homework 1 79 2.564949 2.564949 193 +dynam 1 76 2.564949 2.564949 194 +tuesdai 2 73 2.639057 5.278114 219 +solv 1 73 2.639057 2.639057 234 +goal 3 66 2.708050 8.124150 250 +thursdai 2 70 2.708050 5.416100 241 +simul 2 66 2.708050 5.416100 255 +differ 1 66 2.708050 2.708050 253 +receiv 1 66 2.708050 2.708050 244 +window 1 68 2.708050 2.708050 242 +wednesdai 5 64 2.772589 13.862945 261 +januari 2 62 2.772589 5.545178 264 +organ 1 65 2.772589 2.772589 265 +copi 1 63 2.772589 2.772589 284 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +handout 1 64 2.772589 2.772589 263 +virtual 1 62 2.772589 2.772589 285 +march 4 61 2.833213 11.332852 295 +room 1 59 2.833213 2.833213 301 +unix 3 58 2.890372 8.671116 308 +semest 2 58 2.890372 5.780744 312 +detail 1 57 2.890372 2.890372 321 +variou 1 56 2.890372 2.890372 317 +overview 1 56 2.890372 2.890372 323 +februari 9 54 2.944439 26.499951 328 +three 3 54 2.944439 8.833317 330 +week 9 52 2.995732 26.961588 343 +hardwar 1 51 2.995732 2.995732 350 +date 1 51 2.995732 2.995732 344 +run 1 51 2.995732 2.995732 347 +set 5 50 3.044522 15.222610 361 +appoint 1 49 3.044522 3.044522 358 +done 1 47 3.091042 3.091042 381 +could 1 46 3.091042 3.091042 383 +discuss 4 45 3.135494 12.541976 399 +answer 3 45 3.135494 9.406482 391 +fridai 2 44 3.135494 6.270988 390 +favorit 1 44 3.135494 3.135494 410 +midterm 1 45 3.135494 3.135494 392 +mark 1 44 3.135494 3.135494 403 +around 1 43 3.178054 3.178054 415 +past 2 42 3.218876 6.437752 428 +late 5 40 3.258097 16.290485 439 +probabl 2 40 3.258097 6.516194 455 +credit 1 38 3.295837 3.295837 460 +hand 2 37 3.332205 6.664410 475 +workstat 2 37 3.332205 6.664410 479 +ofth 1 36 3.367296 3.367296 491 +post 1 35 3.401197 3.401197 505 +print 1 34 3.401197 3.401197 503 +concurr 1 34 3.401197 3.401197 501 +eduoffic 3 33 3.433987 10.301961 531 +taught 1 33 3.433987 3.433987 526 +independ 1 32 3.465736 3.465736 548 +option 1 30 3.555348 3.555348 575 +rang 1 30 3.555348 3.555348 565 +secur 1 30 3.555348 3.555348 577 +turn 1 29 3.583519 3.583519 586 +though 1 27 3.637586 3.637586 622 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +store 1 24 3.761200 3.761200 693 +dai 4 22 3.850148 15.400592 753 +inth 1 22 3.850148 3.850148 741 +tent 1 22 3.850148 3.850148 739 +cooper 1 22 3.850148 3.850148 757 +disk 1 22 3.850148 3.850148 747 +leav 1 21 3.912023 3.912023 772 +alloc 2 20 3.951244 7.902488 821 +sure 1 20 3.951244 3.951244 813 +minut 1 20 3.951244 3.951244 810 +entir 1 20 3.951244 3.951244 811 +break 1 20 3.951244 3.951244 812 +attend 1 18 4.060443 4.060443 893 +accept 1 18 4.060443 4.060443 879 +account 1 18 4.060443 4.060443 882 +weekli 1 17 4.110874 4.110874 919 +monitor 1 17 4.110874 4.110874 941 +analyz 1 17 4.110874 4.110874 925 +regist 1 17 4.110874 4.110874 938 +debug 1 17 4.110874 4.110874 944 +segment 1 17 4.110874 4.110874 931 +quiz 3 16 4.174387 12.523161 990 +modern 1 16 4.174387 4.174387 966 +devic 1 16 4.174387 4.174387 1002 +purchas 1 15 4.248495 4.248495 1030 +drive 1 15 4.248495 4.248495 1052 +driven 1 15 4.248495 4.248495 1048 +happi 1 14 4.317488 4.317488 1079 +quizz 7 13 4.382027 30.674189 1151 +jonathan 1 13 4.382027 4.382027 1174 +cannot 1 13 4.382027 4.382027 1144 +solari 2 12 4.465908 8.931816 1238 +readi 1 12 4.465908 4.465908 1242 +count 1 12 4.465908 4.465908 1239 +extra 1 11 4.553877 4.553877 1312 +mainli 1 10 4.653960 4.653960 1432 +penalti 1 10 4.653960 4.653960 1405 +bart 3 9 4.753590 14.260770 1559 +recit 1 9 4.753590 4.753590 1475 +quantit 1 8 4.875197 4.875197 1654 +absolut 1 8 4.875197 4.875197 1646 +partner 1 8 4.875197 4.875197 1648 +replac 1 8 4.875197 4.875197 1668 +dispatch 1 7 5.010635 5.010635 1791 +whatev 1 6 5.164786 5.164786 2097 +transcript 1 6 5.164786 5.164786 2067 +drop 1 6 5.164786 5.164786 2008 +averag 1 6 5.164786 5.164786 2098 +madisoncomput 1 5 5.347108 5.347108 2391 +poorli 1 4 5.568345 5.568345 2781 +maximum 1 4 5.568345 5.568345 2632 +csphone 3 3 5.857933 17.573799 3394 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +tanenbaum 1 3 5.857933 5.857933 3397 +programmingassign 1 3 5.857933 5.857933 3398 +ofobject 1 3 5.857933 5.857933 3399 +thrash 1 3 5.857933 5.857933 3400 +bybart 1 3 5.857933 5.857933 3401 +semaphor 2 2 6.263398 12.526796 4555 +milleremail 1 2 6.263398 6.263398 4556 +noonor 1 2 6.263398 6.263398 4557 +weyer 1 2 6.263398 6.263398 4558 +notesar 1 2 6.263398 6.263398 4559 +materialcov 1 2 6.263398 6.263398 4140 +youdon 1 2 6.263398 6.263398 4444 +problemssolut 1 2 6.263398 6.263398 4241 +theproblem 1 2 6.263398 6.263398 4560 +andlook 1 2 6.263398 6.263398 4561 +havethre 1 2 6.263398 6.263398 4562 +daysof 1 2 6.263398 6.263398 4563 +eachof 1 2 6.263398 6.263398 4564 +lowest 1 2 6.263398 6.263398 4565 +breakweek 1 2 6.263398 6.263398 4566 +systemsnew 1 1 6.957497 6.957497 8497 +stufffin 1 1 6.957497 6.957497 8498 +staffinstructor 1 1 6.957497 6.957497 8499 +karuna 1 1 6.957497 6.957497 8500 +muthiahemail 1 1 6.957497 6.957497 8501 +muthiah 1 1 6.957497 6.957497 8502 +weyersemail 1 1 6.957497 6.957497 8503 +materialsth 1 1 6.957497 6.957497 8504 +thelectur 1 1 6.957497 6.957497 8505 +textbookmodern 1 1 6.957497 6.957497 8506 +pohl 1 1 6.957497 6.957497 8507 +systemsandobject 1 1 6.957497 6.957497 8508 +sectionslectur 1 1 6.957497 6.957497 8509 +sciencesdiscuss 1 1 6.957497 6.957497 8510 +nolandnot 1 1 6.957497 6.957497 8511 +occas 1 1 6.957497 6.957497 8512 +quizzesther 1 1 6.957497 6.957497 8513 +thediscuss 1 1 6.957497 6.957497 8514 +usetrac 1 1 6.957497 6.957497 8515 +setsdur 1 1 6.957497 6.957497 8516 +severalwritten 1 1 6.957497 6.957497 8517 +synchronizationprimit 1 1 6.957497 6.957497 8518 +workassign 1 1 6.957497 6.957497 8519 +assignmentthat 1 1 6.957497 6.957497 8520 +weekof 1 1 6.957497 6.957497 8521 +cheatingprogram 1 1 6.957497 6.957497 8522 +cheater 1 1 6.957497 6.957497 8523 +receivingan 1 1 6.957497 6.957497 8524 +facilitiesw 1 1 6.957497 6.957497 8525 +policyif 1 1 6.957497 6.957497 8526 +beno 1 1 6.957497 6.957497 8527 +scheduleth 1 1 6.957497 6.957497 8528 +processesweek 1 1 6.957497 6.957497 8529 +creationweek 1 1 6.957497 6.957497 8530 +synchronizationweek 1 1 6.957497 6.957497 8531 +semaphoresweek 1 1 6.957497 6.957497 8532 +monitorsweek 1 1 6.957497 6.957497 8533 +deadlocksweek 1 1 6.957497 6.957497 8534 +relocationweek 1 1 6.957497 6.957497 8535 +tlbsweek 1 1 6.957497 6.957497 8536 +filesweek 1 1 6.957497 6.957497 8537 +directoriesweek 1 1 6.957497 6.957497 8538 +protectionweek 1 1 6.957497 6.957497 8539 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html new file mode 100644 index 00000000..d02ee4f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs638.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +spring 2 131 2.079442 4.158884 88 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +follow 1 92 2.397895 2.397895 143 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +internet 4 83 2.484907 9.939628 186 +requir 1 81 2.484907 2.484907 167 +chang 1 82 2.484907 2.484907 163 +april 5 77 2.564949 12.824745 196 +mondai 1 77 2.564949 2.564949 206 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +januari 2 62 2.772589 5.545178 264 +wednesdai 1 64 2.772589 2.772589 261 +written 1 63 2.772589 2.772589 278 +march 4 61 2.833213 11.332852 295 +februari 4 54 2.944439 11.777756 328 +week 5 52 2.995732 14.978660 343 +appoint 1 49 3.044522 3.044522 358 +right 1 48 3.044522 3.044522 363 +could 1 46 3.091042 3.091042 383 +fridai 1 44 3.135494 3.135494 390 +discuss 1 45 3.135494 3.135494 399 +procedur 1 36 3.367296 3.367296 488 +eduoffic 1 33 3.433987 3.433987 531 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +client 1 25 3.737670 3.737670 679 +honor 2 23 3.806662 7.613324 729 +mostli 1 19 4.007333 4.007333 869 +eric 1 19 4.007333 4.007333 870 +miller 3 17 4.110874 12.332622 949 +remot 1 15 4.248495 4.248495 1041 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +larri 1 13 4.382027 4.382027 1142 +wood 1 11 4.553877 4.553877 1355 +bart 6 9 4.753590 28.521540 1559 +laru 1 9 4.753590 4.753590 1560 +madisoncomput 1 5 5.347108 5.347108 2391 +bach 1 4 5.568345 5.568345 2708 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +csphone 1 3 5.857933 5.857933 3394 +landweb 1 3 5.857933 5.857933 3402 +bybart 1 3 5.857933 5.857933 3401 +milleremail 1 2 6.263398 6.263398 4556 +noonor 1 2 6.263398 6.263398 4557 +breakweek 1 2 6.263398 6.263398 4566 +seminarunivers 1 1 6.957497 6.957497 8540 +seminarinstructor 1 1 6.957497 6.957497 8541 +lectureslectur 1 1 6.957497 6.957497 8542 +sciencesclass 1 1 6.957497 6.957497 8543 +schedulether 1 1 6.957497 6.957497 8544 +attendal 1 1 6.957497 6.957497 8545 +overviewweek 1 1 6.957497 6.957497 8546 +protocolsweek 1 1 6.957497 6.957497 8547 +callsweek 1 1 6.957497 6.957497 8548 +securityweek 1 1 6.957497 6.957497 8549 +encryptionweek 1 1 6.957497 6.957497 8550 +netweek 1 1 6.957497 6.957497 8551 +systemsweek 1 1 6.957497 6.957497 8552 +supercomputerweek 1 1 6.957497 6.957497 8553 +javaweek 1 1 6.957497 6.957497 8554 +discussionslast 1 1 6.957497 6.957497 8555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html new file mode 100644 index 00000000..41718f66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bart^cs736.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 6 443 0.693147 4.158882 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 4 340 1.098612 4.394448 18 +cours 3 273 1.098612 3.295836 15 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +paper 12 205 1.609438 19.313256 38 +class 8 199 1.609438 12.875504 37 +oper 4 180 1.609438 6.437752 34 +fall 2 181 1.609438 3.218876 40 +group 2 183 1.609438 3.218876 36 +modifi 1 178 1.609438 1.609438 35 +read 14 154 1.791759 25.084626 47 +wisconsin 1 169 1.791759 1.791759 54 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +hour 1 165 1.791759 1.791759 46 +first 3 140 1.945910 5.837730 71 +lectur 2 135 1.945910 3.891820 73 +assign 2 135 1.945910 3.891820 66 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +schedul 1 119 2.079442 2.079442 85 +topic 5 114 2.197225 10.986125 110 +well 2 109 2.197225 4.394450 121 +final 2 116 2.197225 4.394450 108 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +structur 1 106 2.197225 2.197225 105 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +advanc 2 99 2.302585 4.605170 130 +memori 1 101 2.302585 2.302585 139 +text 1 98 2.302585 2.302585 133 +part 1 98 2.302585 2.302585 129 +comment 2 93 2.397895 4.795790 146 +grade 2 90 2.397895 4.795790 142 +proceed 1 93 2.397895 2.397895 152 +second 2 81 2.484907 4.969814 166 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +exam 1 86 2.484907 2.484907 169 +issu 1 78 2.564949 2.564949 211 +good 1 77 2.564949 2.564949 200 +write 4 72 2.639057 10.556228 222 +tuesdai 2 73 2.639057 5.278114 219 +name 1 72 2.639057 2.639057 220 +meet 1 72 2.639057 2.639057 229 +involv 1 71 2.639057 2.639057 227 +summari 1 73 2.639057 2.639057 237 +thursdai 2 70 2.708050 5.416100 241 +import 3 65 2.772589 8.317767 282 +handout 1 64 2.772589 2.772589 263 +detail 3 57 2.890372 8.671116 321 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +talk 1 53 2.944439 2.944439 336 +extens 1 53 2.944439 2.944439 340 +week 2 52 2.995732 5.991464 343 +give 2 50 3.044522 6.089044 359 +discuss 7 45 3.135494 21.948458 399 +textbook 1 44 3.135494 3.135494 397 +anoth 1 45 3.135494 3.135494 408 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +review 2 42 3.218876 6.437752 425 +form 2 39 3.258097 6.516194 443 +realli 1 40 3.258097 3.258097 444 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +short 1 36 3.367296 3.367296 499 +post 2 35 3.401197 6.802394 505 +least 1 35 3.401197 3.401197 516 +articl 1 33 3.433987 3.433987 530 +idea 2 32 3.465736 6.931472 545 +secur 1 30 3.555348 3.555348 577 +abl 1 30 3.555348 3.555348 566 +particip 2 29 3.583519 7.167038 589 +synchron 1 29 3.583519 3.583519 588 +intend 1 28 3.610918 3.610918 599 +pass 1 28 3.610918 3.610918 611 +relev 1 26 3.688879 3.688879 637 +revis 1 26 3.688879 3.688879 640 +daili 1 24 3.761200 3.761200 706 +try 1 22 3.850148 3.850148 764 +busi 1 21 3.912023 3.912023 784 +kernel 1 20 3.951244 3.951244 825 +longer 1 20 3.951244 3.951244 816 +els 1 19 4.007333 4.007333 843 +listen 1 18 4.060443 4.060443 907 +protect 1 17 4.110874 4.110874 935 +sheet 1 16 4.174387 4.174387 973 +choos 1 16 4.174387 4.174387 964 +critic 1 16 4.174387 4.174387 982 +purchas 1 15 4.248495 4.248495 1030 +score 1 15 4.248495 4.248495 1017 +doit 1 14 4.317488 4.317488 1111 +someon 1 13 4.382027 4.382027 1128 +rest 1 12 4.465908 4.465908 1259 +reader 1 12 4.465908 4.465908 1246 +broad 1 11 4.553877 4.553877 1302 +literatur 1 11 4.553877 4.553877 1300 +success 1 10 4.653960 4.653960 1390 +bart 1 9 4.753590 4.753590 1559 +classmat 1 9 4.753590 4.753590 1516 +theme 1 8 4.875197 4.875197 1707 +opinion 1 8 4.875197 4.875197 1708 +refere 1 7 5.010635 5.010635 1895 +fromth 1 7 5.010635 5.010635 1802 +carefulli 1 6 5.164786 5.164786 2045 +madisoncomput 1 5 5.347108 5.347108 2391 +understood 1 5 5.347108 5.347108 2364 +twice 2 4 5.568345 11.136690 2614 +exposur 1 4 5.568345 5.568345 2598 +will 1 4 5.568345 5.568345 2782 +writer 1 4 5.568345 5.568345 2783 +csoffic 1 4 5.568345 5.568345 2727 +departmentc 1 3 5.857933 5.857933 3395 +millerc 1 3 5.857933 5.857933 3396 +advancedoper 1 3 5.857933 5.857933 3403 +focal 1 3 5.857933 5.857933 3404 +formula 1 3 5.857933 5.857933 3405 +thepap 1 3 5.857933 5.857933 3254 +andon 1 3 5.857933 5.857933 3115 +bybart 1 3 5.857933 5.857933 3401 +satisfactori 1 2 6.263398 6.263398 4567 +andconfer 1 2 6.263398 6.263398 4568 +willinstead 1 2 6.263398 6.263398 4569 +adiscuss 1 2 6.263398 6.263398 4570 +geta 1 2 6.263398 6.263398 4571 +quietli 1 2 6.263398 6.263398 4572 +examsther 1 2 6.263398 6.263398 4149 +assignmenti 1 2 6.263398 6.263398 4573 +availbl 2 1 6.957497 13.914994 8556 +systemssummarythi 1 1 6.957497 6.957497 8557 +textther 1 1 6.957497 6.957497 8558 +operatingsystemsclass 1 1 6.957497 6.957497 8559 +meetonc 1 1 6.957497 6.957497 8560 +listaccord 1 1 6.957497 6.957497 8561 +papersindepend 1 1 6.957497 6.957497 8562 +identifyth 1 1 6.957497 6.957497 8563 +discussionsclass 1 1 6.957497 6.957497 8564 +besupport 1 1 6.957497 6.957497 8565 +beveri 1 1 6.957497 6.957497 8566 +unhappi 1 1 6.957497 6.957497 8567 +papersdur 1 1 6.957497 6.957497 8568 +paperwil 1 1 6.957497 6.957497 8569 +facilityand 1 1 6.957497 6.957497 8570 +summaryof 1 1 6.957497 6.957497 8571 +aselect 1 1 6.957497 6.957497 8572 +topicsfrom 1 1 6.957497 6.957497 8573 +fellowstud 1 1 6.957497 6.957497 8574 +giveth 1 1 6.957497 6.957497 8575 +gradesscor 1 1 6.957497 6.957497 8576 +proposalsi 1 1 6.957497 6.957497 8577 +gradesar 1 1 6.957497 6.957497 8578 +detailstim 1 1 6.957497 6.957497 8579 +noonlast 1 1 6.957497 6.957497 8580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html new file mode 100644 index 00000000..e00e108c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs110^cs110.html @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 12 775 0.000000 0.000000 2 +home 5 672 0.000000 0.000000 1 +scienc 4 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +program 10 374 0.693147 6.931470 7 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +cours 5 273 1.098612 5.493060 15 +student 3 343 1.098612 3.295836 19 +engin 3 297 1.098612 3.295836 20 +us 3 329 1.098612 3.295836 16 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 2 227 1.386294 2.772588 26 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +click 8 142 1.945910 15.567280 78 +lectur 4 135 1.945910 7.783640 73 +problem 3 147 1.945910 5.837730 75 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +structur 2 106 2.197225 4.394450 105 +instructor 2 108 2.197225 4.394450 107 +pleas 2 113 2.197225 4.394450 114 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +question 7 91 2.397895 16.785265 141 +section 3 94 2.397895 7.193685 149 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +solut 9 82 2.484907 22.364163 162 +requir 1 81 2.484907 2.484907 167 +school 1 84 2.484907 2.484907 188 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +write 3 72 2.639057 7.917171 222 +solv 3 73 2.639057 7.917171 234 +materi 1 75 2.639057 2.639057 221 +window 2 68 2.708050 5.416100 242 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +syllabu 1 67 2.708050 2.708050 247 +copi 2 63 2.772589 5.545178 284 +experi 1 64 2.772589 2.772589 283 +descript 1 64 2.772589 2.772589 271 +import 1 65 2.772589 2.772589 282 +polici 1 64 2.772589 2.772589 279 +function 1 62 2.772589 2.772589 275 +simpl 1 60 2.833213 2.833213 298 +march 1 61 2.833213 2.833213 295 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +cover 3 55 2.944439 8.833317 329 +instruct 1 53 2.944439 2.944439 332 +week 7 52 2.995732 20.970124 343 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +basic 2 50 3.044522 6.089044 360 +pointer 2 48 3.044522 6.089044 368 +done 1 47 3.091042 3.091042 381 +even 1 45 3.135494 3.135494 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +howev 2 41 3.218876 6.437752 422 +littl 1 39 3.258097 3.258097 454 +announc 1 40 3.258097 3.258097 441 +probabl 1 40 3.258097 3.258097 455 +microsoft 5 38 3.295837 16.479185 468 +credit 1 38 3.295837 3.295837 460 +open 1 38 3.295837 3.295837 469 +copyright 1 36 3.367296 3.367296 495 +statist 1 35 3.401197 3.401197 521 +taught 1 33 3.433987 3.433987 526 +depend 1 29 3.583519 3.583519 583 +intend 2 28 3.610918 7.221836 599 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +enabl 1 26 3.688879 3.688879 655 +lab 1 24 3.761200 3.761200 698 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +prepar 1 20 3.951244 3.951244 824 +entir 1 20 3.951244 3.951244 811 +exercis 3 19 4.007333 12.021999 842 +assum 1 19 4.007333 4.007333 845 +lyco 1 19 4.007333 4.007333 871 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +fortran 10 15 4.248495 42.484950 1027 +pagec 1 15 4.248495 4.248495 1011 +psycholog 1 15 4.248495 4.248495 1054 +purchas 1 15 4.248495 4.248495 1030 +comic 1 14 4.317488 4.317488 1103 +primarili 2 13 4.382027 8.764054 1185 +menu 1 13 4.382027 4.382027 1156 +vectra 2 12 4.465908 8.931816 1267 +overal 1 12 4.465908 4.465908 1254 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +prior 1 10 4.653960 4.653960 1438 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +seven 1 9 4.753590 4.753590 1561 +hewlett 1 8 4.875197 4.875197 1709 +printer 1 8 4.875197 4.875197 1621 +elementari 2 7 5.010635 10.021270 1825 +bestor 4 6 5.164786 20.659144 2099 +gareth 3 5 5.347108 16.041324 2392 +relief 1 4 5.568345 5.568345 2784 +punctual 1 3 5.857933 5.857933 3313 +labyou 1 3 5.857933 5.857933 3406 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +programmingsect 1 2 6.263398 6.263398 4574 +disturb 1 2 6.263398 6.263398 4575 +subroutin 1 2 6.263398 6.263398 4576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html new file mode 100644 index 00000000..2e76fba9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~bestor^cs302^cs302.html @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 13 775 0.000000 0.000000 2 +home 7 672 0.000000 0.000000 1 +scienc 5 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +program 20 374 0.693147 13.862940 7 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +offic 5 299 1.098612 5.493060 13 +cours 4 273 1.098612 4.394448 15 +us 4 329 1.098612 4.394448 16 +engin 2 297 1.098612 2.197224 20 +student 2 343 1.098612 2.197224 19 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +mail 6 238 1.386294 8.317764 22 +languag 5 227 1.386294 6.931470 26 +gener 2 220 1.386294 2.772588 27 +also 2 259 1.386294 2.772588 28 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +class 10 199 1.609438 16.094380 37 +includ 3 208 1.609438 4.828314 42 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 4 165 1.791759 7.167036 46 +read 3 154 1.791759 5.375277 47 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +algorithm 1 162 1.791759 1.791759 57 +click 14 142 1.945910 27.242740 78 +assign 13 135 1.945910 25.296830 66 +problem 12 147 1.945910 23.350920 75 +lectur 8 135 1.945910 15.567280 73 +note 5 142 1.945910 9.729550 67 +first 2 140 1.945910 3.891820 71 +perform 1 143 1.945910 1.945910 74 +hall 1 146 1.945910 1.945910 65 +compil 3 122 2.079442 6.238326 96 +final 7 116 2.197225 15.380575 108 +pleas 4 113 2.197225 8.788900 114 +instructor 3 108 2.197225 6.591675 107 +send 3 114 2.197225 6.591675 109 +well 2 109 2.197225 4.394450 121 +topic 1 114 2.197225 2.197225 110 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +text 4 98 2.302585 9.210340 133 +need 2 98 2.302585 4.605170 135 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +question 16 91 2.397895 38.366320 141 +grade 11 90 2.397895 26.376845 142 +section 6 94 2.397895 14.387370 149 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +exam 18 86 2.484907 44.728326 169 +solut 14 82 2.484907 34.788698 162 +help 3 83 2.484907 7.454721 175 +learn 2 86 2.484907 4.969814 170 +academ 1 82 2.484907 2.484907 178 +requir 1 81 2.484907 2.484907 167 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +complet 4 77 2.564949 10.259796 208 +good 3 77 2.564949 7.694847 200 +mondai 3 77 2.564949 7.694847 206 +april 2 77 2.564949 5.129898 196 +come 2 78 2.564949 5.129898 202 +want 2 79 2.564949 5.129898 199 +exampl 1 77 2.564949 2.564949 195 +messag 1 76 2.564949 2.564949 212 +solv 9 73 2.639057 23.751513 234 +write 3 72 2.639057 7.917171 222 +line 2 75 2.639057 5.278114 231 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +onlin 1 75 2.639057 2.639057 223 +name 1 72 2.639057 2.639057 220 +receiv 3 66 2.708050 8.124150 244 +syllabu 2 67 2.708050 5.416100 247 +window 2 68 2.708050 5.416100 242 +would 1 67 2.708050 2.708050 251 +import 4 65 2.772589 11.090356 282 +polici 3 64 2.772589 8.317767 279 +copi 3 63 2.772589 8.317767 284 +wednesdai 2 64 2.772589 5.545178 261 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +room 1 59 2.833213 2.833213 301 +locat 1 59 2.833213 2.833213 303 +major 1 56 2.890372 2.890372 315 +semest 1 58 2.890372 2.890372 312 +think 1 57 2.890372 2.890372 314 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +cover 4 55 2.944439 11.777756 329 +februari 1 54 2.944439 2.944439 328 +three 1 54 2.944439 2.944439 330 +week 33 52 2.995732 98.859156 343 +particular 2 51 2.995732 5.991464 352 +run 1 51 2.995732 2.995732 347 +pointer 2 48 3.044522 6.089044 368 +without 1 50 3.044522 3.044522 370 +appoint 1 49 3.044522 3.044522 358 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +understand 2 47 3.091042 6.182084 384 +fridai 4 44 3.135494 12.541976 390 +even 3 45 3.135494 9.406482 393 +answer 2 45 3.135494 6.270988 391 +netscap 2 44 3.135494 6.270988 395 +algebra 1 45 3.135494 3.135494 394 +directori 1 45 3.135494 3.135494 396 +textbook 1 44 3.135494 3.135494 397 +term 5 43 3.178054 15.890270 411 +long 3 43 3.178054 9.534162 413 +show 1 43 3.178054 3.178054 417 +howev 2 41 3.218876 6.437752 422 +edit 1 42 3.218876 3.218876 418 +must 3 40 3.258097 9.774291 442 +error 2 40 3.258097 6.516194 449 +announc 1 40 3.258097 3.258097 441 +realli 1 40 3.258097 3.258097 444 +late 1 40 3.258097 3.258097 439 +programm 1 39 3.258097 3.258097 445 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +microsoft 5 38 3.295837 16.479185 468 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +hand 4 37 3.332205 13.328820 475 +mean 1 37 3.332205 3.332205 477 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +approxim 3 35 3.401197 10.203591 509 +everi 1 34 3.401197 3.401197 519 +statist 1 35 3.401197 3.401197 521 +taught 1 33 3.433987 3.433987 526 +board 1 33 3.433987 3.433987 528 +dissert 1 32 3.465736 3.465736 549 +someth 1 31 3.496508 3.496508 554 +abl 2 30 3.555348 7.110696 566 +rang 1 30 3.555348 3.555348 565 +depend 1 29 3.583519 3.583519 583 +pass 2 28 3.610918 7.221836 611 +except 2 28 3.610918 7.221836 607 +intend 1 28 3.610918 3.610918 599 +though 2 27 3.637586 7.275172 622 +campu 1 27 3.637586 3.637586 623 +relev 1 26 3.688879 3.688879 637 +consult 4 24 3.761200 15.044800 687 +lab 1 24 3.761200 3.761200 698 +tent 1 22 3.850148 3.850148 739 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +entir 1 20 3.951244 3.951244 811 +longer 1 20 3.951244 3.951244 816 +exercis 4 19 4.007333 16.029332 842 +thur 1 19 4.007333 4.007333 847 +lyco 1 19 4.007333 4.007333 871 +attempt 1 17 4.110874 4.110874 917 +weekli 1 17 4.110874 4.110874 919 +anyth 1 16 4.174387 4.174387 998 +misconduct 1 16 4.174387 4.174387 1003 +explan 1 16 4.174387 4.174387 985 +normal 1 16 4.174387 4.174387 995 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +fortran 10 15 4.248495 42.484950 1027 +contribut 2 15 4.248495 8.496990 1021 +pagec 1 15 4.248495 4.248495 1011 +psycholog 1 15 4.248495 4.248495 1054 +score 1 15 4.248495 4.248495 1017 +overhead 1 15 4.248495 4.248495 1035 +doesn 1 15 4.248495 4.248495 1055 +purchas 1 15 4.248495 4.248495 1030 +easili 1 14 4.317488 4.317488 1077 +comic 1 14 4.317488 4.317488 1103 +primarili 2 13 4.382027 8.764054 1185 +menu 1 13 4.382027 4.382027 1156 +wait 1 13 4.382027 4.382027 1168 +necessari 1 13 4.382027 4.382027 1147 +step 1 13 4.382027 4.382027 1138 +skill 4 12 4.465908 17.863632 1205 +vectra 2 12 4.465908 8.931816 1267 +stai 1 12 4.465908 4.465908 1215 +calcul 1 12 4.465908 4.465908 1268 +outsid 1 12 4.465908 4.465908 1219 +overal 1 12 4.465908 4.465908 1254 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +extra 1 11 4.553877 4.553877 1312 +regularli 1 11 4.553877 4.553877 1338 +distinguish 1 11 4.553877 4.553877 1357 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +bring 1 10 4.653960 4.653960 1430 +cheat 1 10 4.653960 4.653960 1395 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +seven 2 9 4.753590 9.507180 1561 +login 2 9 4.753590 9.507180 1550 +discov 1 9 4.753590 4.753590 1562 +didn 1 9 4.753590 4.753590 1563 +familiar 1 9 4.753590 4.753590 1485 +curv 2 8 4.875197 9.750394 1656 +printer 2 8 4.875197 9.750394 1621 +risk 1 8 4.875197 4.875197 1689 +fail 1 8 4.875197 4.875197 1655 +matter 1 8 4.875197 4.875197 1627 +hewlett 1 8 4.875197 4.875197 1709 +friedman 1 7 5.010635 5.010635 1886 +zero 1 7 5.010635 5.010635 1896 +tag 1 7 5.010635 5.010635 1821 +therefor 1 7 5.010635 5.010635 1822 +bestor 4 6 5.164786 20.659144 2099 +constitut 1 6 5.164786 5.164786 2026 +ensur 1 6 5.164786 5.164786 2012 +syntax 1 6 5.164786 5.164786 2030 +gareth 3 5 5.347108 16.041324 2392 +substitut 1 5 5.347108 5.347108 2247 +handin 1 5 5.347108 5.347108 2393 +identif 1 4 5.568345 5.568345 2773 +wear 1 4 5.568345 5.568345 2785 +trivial 1 4 5.568345 5.568345 2786 +relief 1 4 5.568345 5.568345 2784 +punctual 1 3 5.857933 5.857933 3313 +projector 1 3 5.857933 5.857933 3409 +duti 1 3 5.857933 5.857933 3317 +labyou 1 3 5.857933 5.857933 3406 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +programmingsect 1 2 6.263398 6.263398 4574 +disturb 1 2 6.263398 6.263398 4575 +lowest 1 2 6.263398 6.263398 4565 +regardless 1 2 6.263398 6.263398 4577 +pencil 1 2 6.263398 6.263398 4426 +subroutin 1 2 6.263398 6.263398 4576 +elig 2 1 6.957497 13.914994 8581 +amclick 1 1 6.957497 6.957497 8582 +unsur 1 1 6.957497 6.957497 8583 +notestext 1 1 6.957497 6.957497 8584 +koffman 1 1 6.957497 6.957497 8585 +assignmentsther 1 1 6.957497 6.957497 8586 +gradesheet 1 1 6.957497 6.957497 8587 +pmhow 1 1 6.957497 6.957497 8588 +modem 1 1 6.957497 6.957497 8589 +exerciseson 1 1 6.957497 6.957497 8590 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html new file mode 100644 index 00000000..f740568a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~burnett^cs302.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +program 4 374 0.693147 2.772588 7 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +fall 2 181 1.609438 3.218876 40 +updat 2 191 1.609438 3.218876 41 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +note 1 142 1.945910 1.945910 67 +object 1 138 1.945910 1.945910 79 +section 2 94 2.397895 4.795790 149 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +name 1 72 2.639057 2.639057 220 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +handout 1 64 2.772589 2.772589 263 +publish 1 57 2.890372 2.890372 326 +week 1 52 2.995732 2.995732 343 +algebra 1 45 3.135494 3.135494 394 +compani 1 41 3.218876 3.218876 423 +announc 1 40 3.258097 3.258097 441 +error 1 40 3.258097 3.258097 449 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +eduoffic 1 33 3.433987 3.433987 531 +ask 1 28 3.610918 3.610918 597 +valu 1 25 3.737670 3.737670 665 +known 1 24 3.761200 3.761200 702 +greg 1 24 3.761200 3.761200 695 +dai 1 22 3.850148 3.850148 753 +output 1 21 3.912023 3.912023 788 +walter 1 17 4.110874 4.110874 950 +quiz 1 16 4.174387 4.174387 990 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +dave 2 14 4.317488 8.634976 1098 +scan 1 12 4.465908 4.465908 1243 +informationemail 1 9 4.753590 4.753590 1564 +sharp 1 6 5.164786 5.164786 2100 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +prog 1 4 5.568345 5.568345 2740 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +objectivesvectra 1 3 5.857933 5.857933 3410 +homeclass 1 3 5.857933 5.857933 3411 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +burnett 2 2 6.263398 12.526796 4578 +consultantssyllabuswork 1 2 6.263398 6.263398 4579 +archivepolici 1 2 6.263398 6.263398 4580 +eggleston 1 2 6.263398 6.263398 4581 +egglestonemail 1 1 6.957497 6.957497 8591 +hourlywork 1 1 6.957497 6.957497 8592 +classread 1 1 6.957497 6.957497 8593 +gradeshomeworkexam 1 1 6.957497 6.957497 8594 +quizzesmiscellan 1 1 6.957497 6.957497 8595 +policytextproblem 1 1 6.957497 6.957497 8596 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html new file mode 100644 index 00000000..e57676a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cao^cs736^cs736.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 8 443 0.693147 5.545176 6 +research 3 431 0.693147 2.079441 10 +project 10 340 1.098612 10.986120 18 +cours 3 273 1.098612 3.295836 15 +us 3 329 1.098612 3.295836 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +paper 7 205 1.609438 11.266066 38 +oper 3 180 1.609438 4.828314 34 +class 2 199 1.609438 3.218876 37 +list 1 201 1.609438 1.609438 39 +implement 4 152 1.791759 7.167036 52 +read 3 154 1.791759 5.375277 47 +distribut 1 162 1.791759 1.791759 51 +hour 1 165 1.791759 1.791759 46 +assign 5 135 1.945910 9.729550 66 +lectur 4 135 1.945910 7.783640 73 +first 3 140 1.945910 5.837730 71 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +spring 2 131 2.079442 4.158884 88 +schedul 2 119 2.079442 4.158884 85 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +topic 3 114 2.197225 6.591675 110 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +text 2 98 2.302585 4.605170 133 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +need 1 98 2.302585 2.302585 135 +peopl 1 96 2.302585 2.302585 132 +grade 2 90 2.397895 4.795790 142 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +requir 1 81 2.484907 2.484907 167 +solut 1 82 2.484907 2.484907 162 +info 1 85 2.484907 2.484907 176 +activ 1 84 2.484907 2.484907 182 +exam 1 86 2.484907 2.484907 169 +second 1 81 2.484907 2.484907 166 +come 1 78 2.564949 2.564949 202 +involv 2 71 2.639057 5.278114 227 +tuesdai 2 73 2.639057 5.278114 219 +summari 1 73 2.639057 2.639057 237 +thursdai 1 70 2.708050 2.708050 241 +differ 1 66 2.708050 2.708050 253 +window 1 68 2.708050 2.708050 242 +import 1 65 2.772589 2.772589 282 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +semest 2 58 2.890372 5.780744 312 +detail 1 57 2.890372 2.890372 321 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +cover 1 55 2.944439 2.944439 329 +suggest 1 53 2.944439 2.944439 331 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +give 1 50 3.044522 3.044522 359 +appoint 1 49 3.044522 3.044522 358 +discuss 5 45 3.135494 15.677470 399 +review 1 42 3.218876 3.218876 425 +slide 2 38 3.295837 6.591674 467 +prototyp 1 38 3.295837 3.295837 463 +hand 1 37 3.332205 3.332205 475 +either 1 35 3.401197 3.401197 506 +compon 1 30 3.555348 3.555348 570 +secur 1 30 3.555348 3.555348 577 +particip 2 29 3.583519 7.167038 589 +synchron 1 29 3.583519 3.583519 588 +propos 2 28 3.610918 7.221836 602 +intend 1 28 3.610918 3.610918 599 +measur 1 28 3.610918 3.610918 609 +linux 1 27 3.637586 3.637586 631 +team 1 27 3.637586 3.637586 625 +relev 1 26 3.688879 3.688879 637 +instead 1 22 3.850148 3.850148 756 +tent 1 22 3.850148 3.850148 739 +benchmark 1 19 4.007333 4.007333 859 +encourag 1 18 4.060443 4.060443 880 +protect 1 17 4.110874 4.110874 935 +choos 1 16 4.174387 4.174387 964 +purchas 1 15 4.248495 4.248495 1030 +classic 1 14 4.317488 4.317488 1084 +doit 1 14 4.317488 4.317488 1111 +suit 1 13 4.382027 4.382027 1129 +count 3 12 4.465908 13.397724 1239 +solari 1 12 4.465908 4.465908 1238 +broad 1 11 4.553877 4.553877 1302 +strongli 1 10 4.653960 4.653960 1406 +total 1 10 4.653960 4.653960 1398 +theme 1 8 4.875197 4.875197 1707 +formerli 1 5 5.347108 5.347108 2397 +exposur 1 4 5.568345 5.568345 2598 +suno 1 4 5.568345 5.568345 2790 +advancedoper 1 3 5.857933 5.857933 3403 +focal 1 3 5.857933 5.857933 3404 +macc 1 3 5.857933 5.857933 3414 +ofvari 1 2 6.263398 6.263398 4582 +anexperiment 1 2 6.263398 6.263398 4299 +halloffic 1 2 6.263398 6.263398 4583 +deskfor 1 2 6.263398 6.263398 4584 +performanceof 1 2 6.263398 6.263398 4585 +topicsinclud 1 1 6.957497 6.957497 8597 +tochoos 1 1 6.957497 6.957497 8598 +rathera 1 1 6.957497 6.957497 8599 +manya 1 1 6.957497 6.957497 8600 +assig 1 1 6.957497 6.957497 8601 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html new file mode 100644 index 00000000..cbcf43c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs110^cs110.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +cours 3 273 1.098612 3.295836 15 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +design 1 213 1.386294 1.386294 25 +fall 2 181 1.609438 3.218876 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +lectur 15 135 1.945910 29.188650 73 +problem 1 147 1.945910 1.945910 75 +first 1 140 1.945910 1.945910 71 +introduct 1 126 2.079442 2.079442 87 +need 1 98 2.302585 2.302585 135 +section 1 94 2.397895 2.397895 149 +materi 2 75 2.639057 5.278114 221 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +simpl 1 60 2.833213 2.833213 298 +cover 2 55 2.944439 5.888878 329 +basic 1 50 3.044522 3.044522 360 +credit 1 38 3.295837 3.295837 460 +michael 2 35 3.401197 6.802394 514 +jeff 2 25 3.737670 7.475340 673 +half 1 21 3.912023 3.912023 776 +martin 1 21 3.912023 3.912023 794 +prepar 1 20 3.951244 3.951244 824 +fortran 2 15 4.248495 8.496990 1027 +essenti 1 13 4.382027 4.382027 1137 +russel 2 9 4.753590 9.507180 1507 +suffici 1 7 5.010635 5.010635 1897 +lampert 2 5 5.347108 10.694216 2398 +birk 2 4 5.568345 11.136690 2791 +anthoni 1 4 5.568345 5.568345 2792 +toni 4 3 5.857933 23.431732 3415 +hummert 4 3 5.857933 23.431732 3416 +man 2 3 5.857933 11.715866 3417 +silva 5 2 6.263398 31.316990 4586 +sidnei 4 2 6.263398 25.053592 4587 +programmingstructur 1 1 6.957497 6.957497 8602 +elementaryengin 1 1 6.957497 6.957497 8603 +enableth 1 1 6.957497 6.957497 8604 +inelementari 1 1 6.957497 6.957497 8605 +reameslast 1 1 6.957497 6.957497 8606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html new file mode 100644 index 00000000..c7dc69a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-1^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +offic 34 299 1.098612 37.352808 13 +us 1 329 1.098612 1.098612 16 +wisc 12 242 1.386294 16.635528 33 +email 11 220 1.386294 15.249234 29 +link 2 247 1.386294 2.772588 24 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +list 2 201 1.609438 3.218876 39 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +hour 12 165 1.791759 21.501108 46 +phone 12 175 1.791759 21.501108 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +databas 1 122 2.079442 2.079442 86 +site 4 106 2.197225 8.788900 119 +world 2 115 2.197225 4.394450 126 +teach 1 108 2.197225 2.197225 112 +find 1 111 2.197225 2.197225 111 +section 11 94 2.397895 26.376845 149 +grade 10 90 2.397895 23.978950 142 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +name 11 72 2.639057 29.029627 220 +wednesdai 1 64 2.772589 2.772589 261 +dept 1 64 2.772589 2.772589 291 +virtual 1 62 2.772589 2.772589 285 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +maintain 2 51 2.995732 5.991464 342 +hardwar 1 51 2.995732 2.995732 350 +appoint 1 49 3.044522 3.044522 358 +cool 1 49 3.044522 3.044522 374 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +origin 1 38 3.295837 3.295837 472 +especi 1 36 3.367296 3.367296 496 +jame 1 35 3.401197 3.401197 507 +yahoo 1 24 3.761200 3.761200 707 +andrew 1 19 4.007333 4.007333 849 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +borland 1 14 4.317488 4.317488 1067 +whose 1 13 4.382027 4.382027 1166 +gupta 1 12 4.465908 4.465908 1241 +packard 1 10 4.653960 4.653960 1444 +classifi 1 9 4.753590 4.753590 1537 +hewlett 1 8 4.875197 4.875197 1709 +tourist 1 8 4.875197 4.875197 1710 +chiang 1 7 5.010635 5.010635 1853 +sciencesoffic 1 6 5.164786 5.164786 2101 +mother 1 6 5.164786 5.164786 2083 +alphabet 1 6 5.164786 5.164786 1980 +teitelbaum 1 6 5.164786 5.164786 2102 +categori 1 5 5.347108 5.347108 2261 +lookup 1 5 5.347108 5.347108 2399 +kelli 3 4 5.568345 16.705035 2793 +nathan 1 4 5.568345 5.568345 2794 +tsioli 3 3 5.857933 17.573799 3418 +ratliff 2 3 5.857933 11.715866 3419 +bockrath 2 3 5.857933 11.715866 3420 +ashraf 2 3 5.857933 11.715866 3421 +geeri 2 3 5.857933 11.715866 3422 +jyothi 2 3 5.857933 11.715866 3423 +thano 2 3 5.857933 11.715866 3424 +fink 1 3 5.857933 5.857933 3425 +aboulnaga 1 3 5.857933 5.857933 3426 +jherro 1 3 5.857933 5.857933 3427 +abhinav 1 3 5.857933 5.857933 3428 +agupta 1 3 5.857933 5.857933 3429 +suhui 1 3 5.857933 5.857933 3430 +enorm 1 3 5.857933 5.857933 3431 +rehnuma 2 2 6.263398 12.526796 4588 +keyinstructorprofessor 1 2 6.263398 6.263398 4589 +desautelsoffic 1 2 6.263398 6.263398 4590 +assistantsfollow 1 2 6.263398 6.263398 4591 +rahman 1 2 6.263398 6.263398 4592 +jaim 1 2 6.263398 6.263398 4593 +jfink 1 2 6.263398 6.263398 4594 +herro 1 2 6.263398 6.263398 4595 +krothap 1 2 6.263398 6.263398 4596 +gradesexplor 1 2 6.263398 6.263398 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html new file mode 100644 index 00000000..c7dc69a9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-2^cs132.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +offic 34 299 1.098612 37.352808 13 +us 1 329 1.098612 1.098612 16 +wisc 12 242 1.386294 16.635528 33 +email 11 220 1.386294 15.249234 29 +link 2 247 1.386294 2.772588 24 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +list 2 201 1.609438 3.218876 39 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +hour 12 165 1.791759 21.501108 46 +phone 12 175 1.791759 21.501108 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +databas 1 122 2.079442 2.079442 86 +site 4 106 2.197225 8.788900 119 +world 2 115 2.197225 4.394450 126 +teach 1 108 2.197225 2.197225 112 +find 1 111 2.197225 2.197225 111 +section 11 94 2.397895 26.376845 149 +grade 10 90 2.397895 23.978950 142 +search 1 95 2.397895 2.397895 155 +exam 1 86 2.484907 2.484907 169 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +mondai 1 77 2.564949 2.564949 206 +name 11 72 2.639057 29.029627 220 +wednesdai 1 64 2.772589 2.772589 261 +dept 1 64 2.772589 2.772589 291 +virtual 1 62 2.772589 2.772589 285 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +maintain 2 51 2.995732 5.991464 342 +hardwar 1 51 2.995732 2.995732 350 +appoint 1 49 3.044522 3.044522 358 +cool 1 49 3.044522 3.044522 374 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +origin 1 38 3.295837 3.295837 472 +especi 1 36 3.367296 3.367296 496 +jame 1 35 3.401197 3.401197 507 +yahoo 1 24 3.761200 3.761200 707 +andrew 1 19 4.007333 4.007333 849 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +borland 1 14 4.317488 4.317488 1067 +whose 1 13 4.382027 4.382027 1166 +gupta 1 12 4.465908 4.465908 1241 +packard 1 10 4.653960 4.653960 1444 +classifi 1 9 4.753590 4.753590 1537 +hewlett 1 8 4.875197 4.875197 1709 +tourist 1 8 4.875197 4.875197 1710 +chiang 1 7 5.010635 5.010635 1853 +sciencesoffic 1 6 5.164786 5.164786 2101 +mother 1 6 5.164786 5.164786 2083 +alphabet 1 6 5.164786 5.164786 1980 +teitelbaum 1 6 5.164786 5.164786 2102 +categori 1 5 5.347108 5.347108 2261 +lookup 1 5 5.347108 5.347108 2399 +kelli 3 4 5.568345 16.705035 2793 +nathan 1 4 5.568345 5.568345 2794 +tsioli 3 3 5.857933 17.573799 3418 +ratliff 2 3 5.857933 11.715866 3419 +bockrath 2 3 5.857933 11.715866 3420 +ashraf 2 3 5.857933 11.715866 3421 +geeri 2 3 5.857933 11.715866 3422 +jyothi 2 3 5.857933 11.715866 3423 +thano 2 3 5.857933 11.715866 3424 +fink 1 3 5.857933 5.857933 3425 +aboulnaga 1 3 5.857933 5.857933 3426 +jherro 1 3 5.857933 5.857933 3427 +abhinav 1 3 5.857933 5.857933 3428 +agupta 1 3 5.857933 5.857933 3429 +suhui 1 3 5.857933 5.857933 3430 +enorm 1 3 5.857933 5.857933 3431 +rehnuma 2 2 6.263398 12.526796 4588 +keyinstructorprofessor 1 2 6.263398 6.263398 4589 +desautelsoffic 1 2 6.263398 6.263398 4590 +assistantsfollow 1 2 6.263398 6.263398 4591 +rahman 1 2 6.263398 6.263398 4592 +jaim 1 2 6.263398 6.263398 4593 +jfink 1 2 6.263398 6.263398 4594 +herro 1 2 6.263398 6.263398 4595 +krothap 1 2 6.263398 6.263398 4596 +gradesexplor 1 2 6.263398 6.263398 4597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html new file mode 100644 index 00000000..ba37ed7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-3^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 6 329 1.098612 6.591672 16 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +gener 2 220 1.386294 2.772588 27 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +class 3 199 1.609438 4.828314 37 +includ 2 208 1.609438 3.218876 42 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +lectur 8 135 1.945910 15.567280 73 +assign 3 135 1.945910 5.837730 66 +click 2 142 1.945910 3.891820 78 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +databas 2 122 2.079442 4.158884 86 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +topic 2 114 2.197225 4.394450 110 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +part 3 98 2.302585 6.907755 129 +text 2 98 2.302585 4.605170 133 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +section 4 94 2.397895 9.591580 149 +follow 2 92 2.397895 4.795790 143 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +info 2 85 2.484907 4.969814 176 +exam 2 86 2.484907 4.969814 169 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +issu 1 78 2.564949 2.564949 211 +tuesdai 1 73 2.639057 2.639057 219 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +syllabu 2 67 2.708050 5.416100 247 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +point 1 58 2.890372 2.890372 319 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +electron 1 47 3.091042 3.091042 379 +discuss 3 45 3.135494 9.406482 399 +netscap 1 44 3.135494 3.135494 395 +term 1 43 3.178054 3.178054 411 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +word 3 34 3.401197 10.203591 508 +eduoffic 1 33 3.433987 3.433987 531 +taught 1 33 3.433987 3.433987 526 +storag 1 31 3.496508 3.496508 553 +compon 1 30 3.555348 3.555348 570 +held 1 28 3.610918 3.610918 600 +background 2 25 3.737670 7.475340 664 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +newsgroup 1 21 3.912023 3.912023 783 +expert 1 20 3.951244 3.951244 833 +qualiti 1 20 3.951244 3.951244 832 +excel 1 19 4.007333 4.007333 868 +macintosh 3 17 4.110874 12.332622 920 +regular 1 17 4.110874 4.110874 929 +devic 1 16 4.174387 4.174387 1002 +draw 1 14 4.317488 4.317488 1086 +quizz 2 13 4.382027 8.764054 1151 +social 1 13 4.382027 4.382027 1123 +necessari 1 13 4.382027 4.382027 1147 +skill 1 12 4.465908 4.465908 1205 +desktop 1 10 4.653960 4.653960 1445 +rich 1 10 4.653960 4.653960 1396 +telecommun 1 9 4.753590 4.753590 1565 +zero 1 7 5.010635 5.010635 1896 +shot 1 7 5.010635 5.010635 1898 +necessarili 1 7 5.010635 5.010635 1899 +thegoal 1 6 5.164786 5.164786 2033 +lloyd 1 6 5.164786 5.164786 2103 +paint 1 5 5.347108 5.347108 2400 +bodner 1 5 5.347108 5.347108 2401 +chart 1 4 5.568345 5.568345 2653 +assignmentsand 1 4 5.568345 5.568345 2760 +glanc 1 4 5.568345 5.568345 2652 +salli 2 3 5.857933 11.715866 3432 +facstaff 1 3 5.857933 5.857933 3433 +drag 1 3 5.857933 5.857933 3434 +crack 1 3 5.857933 5.857933 3435 +macintoshcomput 1 3 5.857933 5.857933 3228 +iici 1 3 5.857933 5.857933 3436 +scanner 1 3 5.857933 5.857933 3437 +leavi 1 3 5.857933 5.857933 3438 +sharenow 1 3 5.857933 5.857933 3439 +swander 1 3 5.857933 5.857933 3440 +thayer 1 3 5.857933 5.857933 3441 +varghes 1 3 5.857933 5.857933 3442 +weinberg 1 3 5.857933 5.857933 3443 +spreadsheet 2 2 6.263398 12.526796 4598 +aldu 2 2 6.263398 12.526796 4599 +computersinstructor 1 2 6.263398 6.263398 4600 +petersonoffic 1 2 6.263398 6.263398 4601 +sciencephon 1 2 6.263398 6.263398 4602 +slpeter 1 2 6.263398 6.263398 4603 +appointmentvit 1 2 6.263398 6.263398 4604 +halllectur 1 2 6.263398 6.263398 4605 +laudon 1 2 6.263398 6.263398 4606 +traver 1 2 6.263398 6.263398 4607 +laudonlab 1 2 6.263398 6.263398 4608 +petersoncours 1 2 6.263398 6.263398 4609 +computersto 1 2 6.263398 6.263398 4610 +throughcolleg 1 2 6.263398 6.263398 4611 +arena 1 2 6.263398 6.263398 4612 +csuse 1 2 6.263398 6.263398 4613 +experienceon 1 2 6.263398 6.263398 4614 +eudora 1 2 6.263398 6.263398 4615 +superpaint 1 2 6.263398 6.263398 4616 +filemak 1 2 6.263398 6.263398 4617 +hypercard 1 2 6.263398 6.263398 4618 +pagemak 1 2 6.263398 6.263398 4619 +educationalexperi 1 2 6.263398 6.263398 4620 +namesectiontimedai 1 2 6.263398 6.263398 4621 +mwnick 1 2 6.263398 6.263398 4622 +mwtrshannon 1 2 6.263398 6.263398 4623 +trtrjeff 1 2 6.263398 6.263398 4624 +reminga 1 2 6.263398 6.263398 4625 +mwfmwira 1 2 6.263398 6.263398 4626 +trtrbrian 1 2 6.263398 6.263398 4627 +mwfmwfbrad 1 2 6.263398 6.263398 4628 +mwfmwfjoe 1 2 6.263398 6.263398 4629 +trtrgeoff 1 2 6.263398 6.263398 4630 +mwftrmaria 1 2 6.263398 6.263398 4631 +yuin 1 2 6.263398 6.263398 4632 +mwfmwrecommend 1 2 6.263398 6.263398 4633 +nitti 1 2 6.263398 6.263398 4634 +gritti 1 2 6.263398 6.263398 4635 +superpaintassign 1 2 6.263398 6.263398 4636 +excellast 1 2 6.263398 6.263398 4637 +jonbodn 1 2 6.263398 6.263398 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html new file mode 100644 index 00000000..ba37ed7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs132-4^cs132.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 6 329 1.098612 6.591672 16 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +gener 2 220 1.386294 2.772588 27 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +class 3 199 1.609438 4.828314 37 +includ 2 208 1.609438 3.218876 42 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +lectur 8 135 1.945910 15.567280 73 +assign 3 135 1.945910 5.837730 66 +click 2 142 1.945910 3.891820 78 +relat 1 139 1.945910 1.945910 68 +process 1 142 1.945910 1.945910 72 +databas 2 122 2.079442 4.158884 86 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +topic 2 114 2.197225 4.394450 110 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +part 3 98 2.302585 6.907755 129 +text 2 98 2.302585 4.605170 133 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +section 4 94 2.397895 9.591580 149 +follow 2 92 2.397895 4.795790 143 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +octob 1 89 2.397895 2.397895 156 +info 2 85 2.484907 4.969814 176 +exam 2 86 2.484907 4.969814 169 +wide 1 84 2.484907 2.484907 185 +learn 1 86 2.484907 2.484907 170 +contain 1 81 2.484907 2.484907 174 +issu 1 78 2.564949 2.564949 211 +tuesdai 1 73 2.639057 2.639057 219 +intellig 1 72 2.639057 2.639057 225 +addit 1 74 2.639057 2.639057 228 +syllabu 2 67 2.708050 5.416100 247 +thursdai 1 70 2.708050 2.708050 241 +knowledg 1 67 2.708050 2.708050 243 +order 1 69 2.708050 2.708050 249 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +point 1 58 2.890372 2.890372 319 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +detail 1 57 2.890372 2.890372 321 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +electron 1 47 3.091042 3.091042 379 +discuss 3 45 3.135494 9.406482 399 +netscap 1 44 3.135494 3.135494 395 +term 1 43 3.178054 3.178054 411 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +word 3 34 3.401197 10.203591 508 +eduoffic 1 33 3.433987 3.433987 531 +taught 1 33 3.433987 3.433987 526 +storag 1 31 3.496508 3.496508 553 +compon 1 30 3.555348 3.555348 570 +held 1 28 3.610918 3.610918 600 +background 2 25 3.737670 7.475340 664 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +newsgroup 1 21 3.912023 3.912023 783 +expert 1 20 3.951244 3.951244 833 +qualiti 1 20 3.951244 3.951244 832 +excel 1 19 4.007333 4.007333 868 +macintosh 3 17 4.110874 12.332622 920 +regular 1 17 4.110874 4.110874 929 +devic 1 16 4.174387 4.174387 1002 +draw 1 14 4.317488 4.317488 1086 +quizz 2 13 4.382027 8.764054 1151 +social 1 13 4.382027 4.382027 1123 +necessari 1 13 4.382027 4.382027 1147 +skill 1 12 4.465908 4.465908 1205 +desktop 1 10 4.653960 4.653960 1445 +rich 1 10 4.653960 4.653960 1396 +telecommun 1 9 4.753590 4.753590 1565 +zero 1 7 5.010635 5.010635 1896 +shot 1 7 5.010635 5.010635 1898 +necessarili 1 7 5.010635 5.010635 1899 +thegoal 1 6 5.164786 5.164786 2033 +lloyd 1 6 5.164786 5.164786 2103 +paint 1 5 5.347108 5.347108 2400 +bodner 1 5 5.347108 5.347108 2401 +chart 1 4 5.568345 5.568345 2653 +assignmentsand 1 4 5.568345 5.568345 2760 +glanc 1 4 5.568345 5.568345 2652 +salli 2 3 5.857933 11.715866 3432 +facstaff 1 3 5.857933 5.857933 3433 +drag 1 3 5.857933 5.857933 3434 +crack 1 3 5.857933 5.857933 3435 +macintoshcomput 1 3 5.857933 5.857933 3228 +iici 1 3 5.857933 5.857933 3436 +scanner 1 3 5.857933 5.857933 3437 +leavi 1 3 5.857933 5.857933 3438 +sharenow 1 3 5.857933 5.857933 3439 +swander 1 3 5.857933 5.857933 3440 +thayer 1 3 5.857933 5.857933 3441 +varghes 1 3 5.857933 5.857933 3442 +weinberg 1 3 5.857933 5.857933 3443 +spreadsheet 2 2 6.263398 12.526796 4598 +aldu 2 2 6.263398 12.526796 4599 +computersinstructor 1 2 6.263398 6.263398 4600 +petersonoffic 1 2 6.263398 6.263398 4601 +sciencephon 1 2 6.263398 6.263398 4602 +slpeter 1 2 6.263398 6.263398 4603 +appointmentvit 1 2 6.263398 6.263398 4604 +halllectur 1 2 6.263398 6.263398 4605 +laudon 1 2 6.263398 6.263398 4606 +traver 1 2 6.263398 6.263398 4607 +laudonlab 1 2 6.263398 6.263398 4608 +petersoncours 1 2 6.263398 6.263398 4609 +computersto 1 2 6.263398 6.263398 4610 +throughcolleg 1 2 6.263398 6.263398 4611 +arena 1 2 6.263398 6.263398 4612 +csuse 1 2 6.263398 6.263398 4613 +experienceon 1 2 6.263398 6.263398 4614 +eudora 1 2 6.263398 6.263398 4615 +superpaint 1 2 6.263398 6.263398 4616 +filemak 1 2 6.263398 6.263398 4617 +hypercard 1 2 6.263398 6.263398 4618 +pagemak 1 2 6.263398 6.263398 4619 +educationalexperi 1 2 6.263398 6.263398 4620 +namesectiontimedai 1 2 6.263398 6.263398 4621 +mwnick 1 2 6.263398 6.263398 4622 +mwtrshannon 1 2 6.263398 6.263398 4623 +trtrjeff 1 2 6.263398 6.263398 4624 +reminga 1 2 6.263398 6.263398 4625 +mwfmwira 1 2 6.263398 6.263398 4626 +trtrbrian 1 2 6.263398 6.263398 4627 +mwfmwfbrad 1 2 6.263398 6.263398 4628 +mwfmwfjoe 1 2 6.263398 6.263398 4629 +trtrgeoff 1 2 6.263398 6.263398 4630 +mwftrmaria 1 2 6.263398 6.263398 4631 +yuin 1 2 6.263398 6.263398 4632 +mwfmwrecommend 1 2 6.263398 6.263398 4633 +nitti 1 2 6.263398 6.263398 4634 +gritti 1 2 6.263398 6.263398 4635 +superpaintassign 1 2 6.263398 6.263398 4636 +excellast 1 2 6.263398 6.263398 4637 +jonbodn 1 2 6.263398 6.263398 4638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^ new file mode 100644 index 00000000..b05f68a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 2 227 1.386294 2.772588 26 +email 2 220 1.386294 2.772588 29 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +introduct 2 126 2.079442 4.158884 87 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +section 4 94 2.397895 9.591580 149 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +window 3 68 2.708050 8.124150 242 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +overview 1 56 2.890372 2.890372 323 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +algebra 1 45 3.135494 3.135494 394 +netscap 1 44 3.135494 3.135494 395 +offer 1 43 3.178054 3.178054 414 +microsoft 1 38 3.295837 3.295837 468 +ask 1 28 3.610918 3.610918 597 +jeff 1 25 3.737670 3.737670 673 +consult 2 24 3.761200 7.522400 687 +feedback 1 19 4.007333 4.007333 854 +misconduct 1 16 4.174387 4.174387 1003 +borland 2 14 4.317488 8.634976 1067 +coordin 2 13 4.382027 8.764054 1182 +savitch 1 12 4.465908 4.465908 1269 +mainli 1 10 4.653960 4.653960 1432 +hint 1 10 4.653960 4.653960 1419 +tutor 1 9 4.753590 4.753590 1552 +pagecomput 1 7 5.010635 5.010635 1900 +skrentni 4 6 5.164786 20.659144 2104 +lampert 1 5 5.347108 5.347108 2398 +complaint 1 4 5.568345 5.568345 2795 +microcomput 1 3 5.857933 5.857933 3444 +instructorsw 1 2 6.263398 6.263398 4639 +csinform 1 2 6.263398 6.263398 4640 +subdirectoriesc 1 2 6.263398 6.263398 4641 +environmentfortran 1 2 6.263398 6.263398 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html new file mode 100644 index 00000000..bcd84310 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^course.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +languag 2 227 1.386294 2.772588 26 +algorithm 1 162 1.791759 1.791759 57 +construct 1 139 1.945910 1.945910 82 +problem 1 147 1.945910 1.945910 75 +high 1 130 2.079442 2.079442 101 +mathemat 2 108 2.197225 4.394450 123 +instructor 1 108 2.197225 2.197225 107 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +school 1 84 2.484907 2.484907 188 +orient 1 80 2.564949 2.564949 205 +solv 1 73 2.639057 2.639057 234 +logic 1 71 2.639057 2.639057 230 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +instruct 1 53 2.944439 2.944439 332 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +least 1 35 3.401197 3.401197 516 +survei 1 35 3.401197 3.401197 513 +statist 1 35 3.401197 3.401197 521 +prepar 1 20 3.951244 3.951244 824 +fortran 1 15 4.248495 4.248495 1027 +pascal 1 12 4.465908 4.465908 1213 +consent 1 5 5.347108 5.347108 2389 +prereq 1 3 5.857933 5.857933 3178 +infocours 1 2 6.263398 6.263398 4212 +guidebook 1 2 6.263398 6.263398 4643 +cscours 1 1 6.957497 6.957497 8607 +descriptionfrom 1 1 6.957497 6.957497 8608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html new file mode 100644 index 00000000..b05f68a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs302^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +languag 2 227 1.386294 2.772588 26 +email 2 220 1.386294 2.772588 29 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +introduct 2 126 2.079442 4.158884 87 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +section 4 94 2.397895 9.591580 149 +comment 1 93 2.397895 2.397895 146 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +window 3 68 2.708050 8.124150 242 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +creat 1 63 2.772589 2.772589 277 +overview 1 56 2.890372 2.890372 323 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +algebra 1 45 3.135494 3.135494 394 +netscap 1 44 3.135494 3.135494 395 +offer 1 43 3.178054 3.178054 414 +microsoft 1 38 3.295837 3.295837 468 +ask 1 28 3.610918 3.610918 597 +jeff 1 25 3.737670 3.737670 673 +consult 2 24 3.761200 7.522400 687 +feedback 1 19 4.007333 4.007333 854 +misconduct 1 16 4.174387 4.174387 1003 +borland 2 14 4.317488 8.634976 1067 +coordin 2 13 4.382027 8.764054 1182 +savitch 1 12 4.465908 4.465908 1269 +mainli 1 10 4.653960 4.653960 1432 +hint 1 10 4.653960 4.653960 1419 +tutor 1 9 4.753590 4.753590 1552 +pagecomput 1 7 5.010635 5.010635 1900 +skrentni 4 6 5.164786 20.659144 2104 +lampert 1 5 5.347108 5.347108 2398 +complaint 1 4 5.568345 5.568345 2795 +microcomput 1 3 5.857933 5.857933 3444 +instructorsw 1 2 6.263398 6.263398 4639 +csinform 1 2 6.263398 6.263398 4640 +subdirectoriesc 1 2 6.263398 6.263398 4641 +environmentfortran 1 2 6.263398 6.263398 4642 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html new file mode 100644 index 00000000..926109cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-1^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 4 412 0.693147 2.772588 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +includ 4 208 1.609438 6.437752 42 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +assign 3 135 1.945910 5.837730 66 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +document 2 121 2.079442 4.158884 89 +postscript 2 131 2.079442 4.158884 90 +check 3 115 2.197225 6.591675 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +servic 2 72 2.639057 5.278114 236 +solv 1 73 2.639057 2.639057 234 +syllabu 1 67 2.708050 2.708050 247 +copi 3 63 2.772589 8.317767 284 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +direct 1 57 2.890372 2.890372 316 +local 2 55 2.944439 5.888878 334 +suggest 1 53 2.944439 2.944439 331 +directori 1 45 3.135494 3.135494 396 +examin 1 42 3.218876 3.218876 424 +past 1 42 3.218876 3.218876 428 +obtain 1 33 3.433987 3.433987 534 +lab 1 24 3.761200 3.761200 698 +viewer 1 21 3.912023 3.912023 787 +explan 1 16 4.174387 4.174387 985 +readm 1 8 4.875197 4.875197 1699 +ghost 1 2 6.263398 6.263398 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html new file mode 100644 index 00000000..926109cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs310-2^cs310.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 4 412 0.693147 2.772588 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +includ 4 208 1.609438 6.437752 42 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +assign 3 135 1.945910 5.837730 66 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +document 2 121 2.079442 4.158884 89 +postscript 2 131 2.079442 4.158884 90 +check 3 115 2.197225 6.591675 118 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +mani 1 92 2.397895 2.397895 150 +section 1 94 2.397895 2.397895 149 +exam 1 86 2.484907 2.484907 169 +servic 2 72 2.639057 5.278114 236 +solv 1 73 2.639057 2.639057 234 +syllabu 1 67 2.708050 2.708050 247 +copi 3 63 2.772589 8.317767 284 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +direct 1 57 2.890372 2.890372 316 +local 2 55 2.944439 5.888878 334 +suggest 1 53 2.944439 2.944439 331 +directori 1 45 3.135494 3.135494 396 +examin 1 42 3.218876 3.218876 424 +past 1 42 3.218876 3.218876 428 +obtain 1 33 3.433987 3.433987 534 +lab 1 24 3.761200 3.761200 698 +viewer 1 21 3.912023 3.912023 787 +explan 1 16 4.174387 4.174387 985 +readm 1 8 4.875197 4.875197 1699 +ghost 1 2 6.263398 6.263398 4644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html new file mode 100644 index 00000000..1084d59d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-1^cs354.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 8 374 0.693147 5.545176 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +offic 7 299 1.098612 7.690284 13 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 5 238 1.386294 6.931470 22 +wisc 5 242 1.386294 6.931470 33 +class 13 199 1.609438 20.922694 37 +fall 8 181 1.609438 12.875504 40 +updat 1 191 1.609438 1.609438 41 +hour 7 165 1.791759 12.542313 46 +phone 5 175 1.791759 8.958795 45 +data 2 170 1.791759 3.583518 49 +assign 12 135 1.945910 23.350920 66 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +spring 3 131 2.079442 6.238326 88 +postscript 2 131 2.079442 4.158884 90 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +instructor 2 108 2.197225 4.394450 107 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +book 1 99 2.302585 2.302585 131 +section 18 94 2.397895 43.162110 149 +grade 3 90 2.397895 7.193685 142 +octob 3 89 2.397895 7.193685 156 +question 1 91 2.397895 2.397895 141 +graphic 1 90 2.397895 2.397895 147 +exam 12 86 2.484907 29.818884 169 +novemb 3 81 2.484907 7.454721 179 +solut 2 82 2.484907 4.969814 162 +help 2 83 2.484907 4.969814 175 +homework 4 79 2.564949 10.259796 193 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +html 10 75 2.639057 26.390570 235 +simul 2 66 2.708050 5.416100 255 +syllabu 2 67 2.708050 5.416100 247 +septemb 3 65 2.772589 8.317767 274 +handout 2 64 2.772589 5.545178 263 +wednesdai 2 64 2.772589 5.545178 261 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +case 1 51 2.995732 2.995732 351 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +featur 1 46 3.091042 3.091042 386 +fridai 7 44 3.135494 21.948458 390 +answer 2 45 3.135494 6.270988 391 +midterm 1 45 3.135494 3.135494 392 +programm 1 39 3.258097 3.258097 445 +probabl 1 40 3.258097 3.258097 455 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +manual 1 35 3.401197 3.401197 504 +represent 1 35 3.401197 3.401197 512 +chapter 15 32 3.465736 51.986040 536 +ask 1 28 3.610918 3.610918 597 +except 1 28 3.610918 3.610918 607 +revis 1 26 3.688879 3.688879 640 +miller 2 17 4.110874 8.221748 949 +regist 1 17 4.110874 4.110874 938 +quiz 12 16 4.174387 50.092644 990 +psycholog 1 15 4.248495 4.248495 1054 +quizz 1 13 4.382027 4.382027 1151 +calcul 1 12 4.465908 4.465908 1268 +assembl 1 12 4.465908 4.465908 1207 +arithmet 2 10 4.653960 9.307920 1388 +tuth 2 9 4.753590 9.507180 1519 +float 1 9 4.753590 4.753590 1504 +integ 1 8 4.875197 4.875197 1688 +lookup 1 5 5.347108 5.347108 2399 +karen 4 4 5.568345 22.273380 2796 +cancel 3 4 5.568345 16.705035 2746 +jerri 5 3 5.857933 29.289665 3445 +suen 1 3 5.857933 5.857933 3446 +asgarian 1 3 5.857933 5.857933 3447 +architecur 1 3 5.857933 5.857933 3448 +tusch 1 2 6.263398 6.263398 4645 +tutsch 1 2 6.263398 6.263398 4646 +execpc 1 2 6.263398 6.263398 4647 +nolandsect 1 2 6.263398 6.263398 4648 +smoler 1 2 6.263398 6.263398 4649 +sunlung 1 2 6.263398 6.263398 4650 +ssuen 1 2 6.263398 6.263398 4651 +edusridevi 1 2 6.263398 6.263398 4652 +bhamidipati 1 2 6.263398 6.263398 4653 +bsri 1 2 6.263398 6.263398 4654 +edumohammad 1 2 6.263398 6.263398 4655 +programs 1 2 6.263398 6.263398 4656 +examsal 1 2 6.263398 6.263398 4657 +noteskaren 1 2 6.263398 6.263398 4658 +updatedmondai 1 2 6.263398 6.263398 4659 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html new file mode 100644 index 00000000..95330849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs354-3^cs354.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 9 374 0.693147 6.238323 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +offic 7 299 1.098612 7.690284 13 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 5 238 1.386294 6.931470 22 +wisc 5 242 1.386294 6.931470 33 +class 13 199 1.609438 20.922694 37 +fall 8 181 1.609438 12.875504 40 +updat 1 191 1.609438 1.609438 41 +hour 7 165 1.791759 12.542313 46 +phone 5 175 1.791759 8.958795 45 +data 2 170 1.791759 3.583518 49 +assign 14 135 1.945910 27.242740 66 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +spring 3 131 2.079442 6.238326 88 +postscript 2 131 2.079442 4.158884 90 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +final 3 116 2.197225 6.591675 108 +instructor 2 108 2.197225 4.394450 107 +structur 1 106 2.197225 2.197225 105 +book 1 99 2.302585 2.302585 131 +advanc 1 99 2.302585 2.302585 130 +section 20 94 2.397895 47.957900 149 +grade 4 90 2.397895 9.591580 142 +octob 3 89 2.397895 7.193685 156 +question 1 91 2.397895 2.397895 141 +graphic 1 90 2.397895 2.397895 147 +exam 12 86 2.484907 29.818884 169 +novemb 3 81 2.484907 7.454721 179 +solut 2 82 2.484907 4.969814 162 +help 2 83 2.484907 4.969814 175 +homework 5 79 2.564949 12.824745 193 +decemb 3 80 2.564949 7.694847 215 +mondai 1 77 2.564949 2.564949 206 +interfac 1 79 2.564949 2.564949 209 +html 11 75 2.639057 29.029627 235 +simul 2 66 2.708050 5.416100 255 +syllabu 2 67 2.708050 5.416100 247 +thursdai 1 70 2.708050 2.708050 241 +septemb 3 65 2.772589 8.317767 274 +handout 2 64 2.772589 5.545178 263 +wednesdai 2 64 2.772589 5.545178 261 +previou 1 62 2.772589 2.772589 290 +overview 1 56 2.890372 2.890372 323 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +case 1 51 2.995732 2.995732 351 +frequent 1 49 3.044522 3.044522 367 +format 1 48 3.044522 3.044522 356 +featur 1 46 3.091042 3.091042 386 +fridai 7 44 3.135494 21.948458 390 +answer 2 45 3.135494 6.270988 391 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +probabl 1 40 3.258097 3.258097 455 +open 1 38 3.295837 3.295837 469 +procedur 1 36 3.367296 3.367296 488 +manual 1 35 3.401197 3.401197 504 +represent 1 35 3.401197 3.401197 512 +chapter 15 32 3.465736 51.986040 536 +option 1 30 3.555348 3.555348 575 +ask 1 28 3.610918 3.610918 597 +except 1 28 3.610918 3.610918 607 +revis 1 26 3.688879 3.688879 640 +miller 2 17 4.110874 8.221748 949 +regist 1 17 4.110874 4.110874 938 +quiz 14 16 4.174387 58.441418 990 +sign 1 16 4.174387 4.174387 970 +psycholog 1 15 4.248495 4.248495 1054 +quizz 1 13 4.382027 4.382027 1151 +calcul 1 12 4.465908 4.465908 1268 +assembl 1 12 4.465908 4.465908 1207 +arithmet 2 10 4.653960 9.307920 1388 +tuth 2 9 4.753590 9.507180 1519 +float 1 9 4.753590 4.753590 1504 +rais 1 8 4.875197 4.875197 1711 +integ 1 8 4.875197 4.875197 1688 +difficult 1 6 5.164786 5.164786 2035 +lookup 1 5 5.347108 5.347108 2399 +karen 4 4 5.568345 22.273380 2796 +cancel 3 4 5.568345 16.705035 2746 +jerri 5 3 5.857933 29.289665 3445 +suen 1 3 5.857933 5.857933 3446 +asgarian 1 3 5.857933 5.857933 3447 +architecur 1 3 5.857933 5.857933 3448 +tusch 1 2 6.263398 6.263398 4645 +tutsch 1 2 6.263398 6.263398 4646 +execpc 1 2 6.263398 6.263398 4647 +nolandsect 1 2 6.263398 6.263398 4648 +smoler 1 2 6.263398 6.263398 4649 +sunlung 1 2 6.263398 6.263398 4650 +ssuen 1 2 6.263398 6.263398 4651 +edusridevi 1 2 6.263398 6.263398 4652 +bhamidipati 1 2 6.263398 6.263398 4653 +bsri 1 2 6.263398 6.263398 4654 +edumohammad 1 2 6.263398 6.263398 4655 +programs 1 2 6.263398 6.263398 4656 +examsal 1 2 6.263398 6.263398 4657 +noteskaren 1 2 6.263398 6.263398 4658 +updatedmondai 1 2 6.263398 6.263398 4659 +programa 1 1 6.957497 6.957497 8609 +programb 1 1 6.957497 6.957497 8610 +cumul 1 1 6.957497 6.957497 8611 +desperateto 1 1 6.957497 6.957497 8612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html new file mode 100644 index 00000000..a471e073 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-1^cs367.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 17 705 0.000000 0.000000 3 +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +offic 6 299 1.098612 6.591672 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +wisc 4 242 1.386294 5.545176 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +list 4 201 1.609438 6.437752 39 +updat 1 191 1.609438 1.609438 41 +oper 1 180 1.609438 1.609438 34 +hour 3 165 1.791759 5.375277 46 +data 2 170 1.791759 3.583518 49 +read 2 154 1.791759 3.583518 47 +algorithm 2 162 1.791759 3.583518 57 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +lectur 16 135 1.945910 31.134560 73 +assign 5 135 1.945910 9.729550 66 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +code 6 108 2.197225 13.183350 116 +place 3 106 2.197225 6.591675 124 +structur 1 106 2.197225 2.197225 105 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +need 1 98 2.302585 2.302585 135 +memori 1 101 2.302585 2.302585 139 +search 4 95 2.397895 9.591580 155 +exam 5 86 2.484907 12.424535 169 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +dynam 1 76 2.564949 2.564949 194 +onlin 6 75 2.639057 15.834342 223 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +simul 1 66 2.708050 2.708050 255 +copi 3 63 2.772589 8.317767 284 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +sampl 10 53 2.944439 29.444390 339 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +tabl 2 51 2.995732 5.991464 346 +basic 3 50 3.044522 9.133566 360 +pointer 1 48 3.044522 3.044522 368 +get 3 46 3.091042 9.273126 380 +done 1 47 3.091042 3.091042 381 +directori 1 45 3.135494 3.135494 396 +discuss 1 45 3.135494 3.135494 399 +futur 2 41 3.218876 6.437752 427 +cach 1 41 3.218876 3.218876 432 +announc 5 40 3.258097 16.290485 441 +error 3 40 3.258097 9.774291 449 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +tree 3 36 3.367296 10.101888 492 +either 1 35 3.401197 3.401197 506 +jame 1 35 3.401197 3.401197 507 +chapter 13 32 3.465736 45.054568 536 +common 1 30 3.555348 3.555348 574 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +becom 1 28 3.610918 3.610918 603 +lab 2 24 3.761200 7.522400 698 +sort 2 22 3.850148 7.700296 738 +binari 1 20 3.951244 3.951244 823 +reserv 1 20 3.951244 3.951244 808 +alloc 1 20 3.951244 3.951244 821 +thur 2 19 4.007333 8.014666 847 +comparison 1 19 4.007333 4.007333 863 +attend 2 18 4.060443 8.120886 893 +otherwis 1 17 4.110874 4.110874 922 +balanc 1 14 4.317488 4.317488 1112 +recurs 1 13 4.382027 4.382027 1127 +magic 1 11 4.553877 4.553877 1358 +queue 2 10 4.653960 9.307920 1386 +stack 2 10 4.653960 9.307920 1389 +wendt 1 10 4.653960 4.653960 1446 +cheng 1 10 4.653960 4.653960 1381 +kurt 1 9 4.753590 4.753590 1548 +unusu 1 9 4.753590 4.753590 1566 +forget 1 8 4.875197 4.875197 1712 +reload 1 8 4.875197 4.875197 1682 +hash 1 8 4.875197 4.875197 1618 +skrentni 4 6 5.164786 20.659144 2104 +skip 2 5 5.347108 10.694216 2402 +handin 1 5 5.347108 5.347108 2393 +overload 1 5 5.347108 5.347108 2403 +billi 1 5 5.347108 5.347108 2404 +outdat 1 4 5.568345 5.568345 2797 +appendix 1 4 5.568345 5.568345 2739 +makeup 1 3 5.857933 5.857933 3449 +vega 1 3 5.857933 5.857933 3450 +stale 2 2 6.263398 12.526796 4660 +lec 1 2 6.263398 6.263398 4661 +structureslectur 1 2 6.263398 6.263398 4662 +psychologylectur 1 2 6.263398 6.263398 4663 +psychologycours 1 2 6.263398 6.263398 4664 +baicheng 1 2 6.263398 6.263398 4665 +liao 1 2 6.263398 6.263398 4666 +bail 1 2 6.263398 6.263398 4667 +jiacheng 1 2 6.263398 6.263398 4668 +pmcopyright 1 2 6.263398 6.263398 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html new file mode 100644 index 00000000..5a575bc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-2^cs367.html @@ -0,0 +1,549 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 9 775 0.000000 0.000000 2 +home 7 672 0.000000 0.000000 1 +scienc 7 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +program 38 374 0.693147 26.339586 7 +inform 8 412 0.693147 5.545176 8 +work 3 380 0.693147 2.079441 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +cours 12 273 1.098612 13.183344 15 +time 7 293 1.098612 7.690284 17 +offic 6 299 1.098612 6.591672 13 +student 4 343 1.098612 4.394448 19 +us 3 329 1.098612 3.295836 16 +last 1 314 1.098612 1.098612 14 +wisc 6 242 1.386294 8.317764 33 +email 5 220 1.386294 6.931470 29 +also 5 259 1.386294 6.931470 28 +gener 4 220 1.386294 5.545176 27 +mail 3 238 1.386294 4.158882 22 +languag 2 227 1.386294 2.772588 26 +graduat 2 215 1.386294 2.772588 31 +design 2 213 1.386294 2.772588 25 +includ 5 208 1.609438 8.047190 42 +class 4 199 1.609438 6.437752 37 +group 3 183 1.609438 4.828314 36 +list 2 201 1.609438 3.218876 39 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +data 7 170 1.791759 12.542313 49 +hour 6 165 1.791759 10.750554 46 +avail 6 169 1.791759 10.750554 48 +read 5 154 1.791759 8.958795 47 +address 4 170 1.791759 7.167036 62 +algorithm 4 162 1.791759 7.167036 57 +develop 3 174 1.791759 5.375277 53 +phone 1 175 1.791759 1.791759 45 +base 1 165 1.791759 1.791759 50 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +assign 14 135 1.945910 27.242740 66 +lectur 10 135 1.945910 19.459100 73 +note 10 142 1.945910 19.459100 67 +problem 5 147 1.945910 9.729550 75 +like 3 132 1.945910 5.837730 81 +first 3 140 1.945910 5.837730 71 +file 2 132 1.945910 3.891820 70 +construct 1 139 1.945910 1.945910 82 +document 7 121 2.079442 14.556094 89 +compil 6 122 2.079442 12.476652 96 +machin 5 129 2.079442 10.397210 95 +schedul 3 119 2.079442 6.238326 85 +introduct 2 126 2.079442 4.158884 87 +dayton 2 119 2.079442 4.158884 104 +provid 2 121 2.079442 4.158884 94 +studi 1 120 2.079442 2.079442 91 +welcom 1 122 2.079442 2.079442 99 +code 5 108 2.197225 10.986125 116 +final 4 116 2.197225 8.788900 108 +intern 3 108 2.197225 6.591675 128 +well 3 109 2.197225 6.591675 121 +make 3 111 2.197225 6.591675 120 +find 2 111 2.197225 4.394450 111 +topic 2 114 2.197225 4.394450 110 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +send 1 114 2.197225 2.197225 109 +pleas 1 113 2.197225 2.197225 114 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +text 6 98 2.302585 13.815510 133 +take 5 97 2.302585 11.512925 134 +need 3 98 2.302585 6.907755 135 +book 2 99 2.302585 4.605170 131 +user 2 104 2.302585 4.605170 137 +peopl 1 96 2.302585 2.302585 132 +comment 7 93 2.397895 16.785265 146 +follow 5 92 2.397895 11.989475 143 +grade 4 90 2.397895 9.591580 142 +section 3 94 2.397895 7.193685 149 +call 2 91 2.397895 4.795790 153 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +exam 10 86 2.484907 24.849070 169 +help 7 83 2.484907 17.394349 175 +chang 3 82 2.484907 7.454721 163 +requir 2 81 2.484907 4.969814 167 +build 2 85 2.484907 4.969814 184 +start 2 83 2.484907 4.969814 173 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +contain 1 81 2.484907 2.484907 174 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +want 4 79 2.564949 10.259796 199 +complet 2 77 2.564949 5.129898 208 +know 2 80 2.564949 5.129898 198 +sourc 1 77 2.564949 2.564949 201 +decemb 1 80 2.564949 2.564949 215 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +appear 1 78 2.564949 2.564949 210 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +name 5 72 2.639057 13.195285 220 +materi 4 75 2.639057 10.556228 221 +tuesdai 3 73 2.639057 7.917171 219 +effici 3 73 2.639057 7.917171 233 +line 3 75 2.639057 7.917171 231 +write 2 72 2.639057 5.278114 222 +addit 2 74 2.639057 5.278114 228 +html 1 75 2.639057 2.639057 235 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +appli 1 71 2.639057 2.639057 226 +would 2 67 2.708050 5.416100 251 +test 2 66 2.708050 5.416100 252 +main 2 67 2.708050 5.416100 256 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +function 3 62 2.772589 8.317767 275 +copi 3 63 2.772589 8.317767 284 +experi 2 64 2.772589 5.545178 283 +descript 2 64 2.772589 5.545178 271 +new 1 64 2.772589 2.772589 262 +polici 1 64 2.772589 2.772589 279 +abstract 1 62 2.772589 2.772589 276 +written 1 63 2.772589 2.772589 278 +street 1 63 2.772589 2.772589 293 +creat 1 63 2.772589 2.772589 277 +handout 1 64 2.772589 2.772589 263 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +result 1 65 2.772589 2.772589 281 +room 2 59 2.833213 5.666426 301 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +share 1 59 2.833213 2.833213 304 +best 1 59 2.833213 2.833213 299 +type 1 61 2.833213 2.833213 296 +unix 6 58 2.890372 17.342232 308 +semest 5 58 2.890372 14.451860 312 +major 3 56 2.890372 8.671116 315 +faculti 1 56 2.890372 2.890372 325 +detail 1 57 2.890372 2.890372 321 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +special 1 56 2.890372 2.890372 320 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +cover 3 55 2.944439 8.833317 329 +suggest 2 53 2.944439 5.888878 331 +sampl 1 53 2.944439 2.944439 339 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +local 1 55 2.944439 2.944439 334 +instruct 1 53 2.944439 2.944439 332 +case 2 51 2.995732 5.991464 351 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +much 1 52 2.995732 2.995732 349 +still 2 50 3.044522 6.089044 362 +give 2 50 3.044522 6.089044 359 +appoint 1 49 3.044522 3.044522 358 +basic 1 50 3.044522 3.044522 360 +right 1 48 3.044522 3.044522 363 +numer 1 49 3.044522 3.044522 369 +format 1 48 3.044522 3.044522 356 +possibl 5 47 3.091042 15.455210 378 +done 3 47 3.091042 9.273126 381 +understand 3 47 3.091042 9.273126 384 +electron 2 47 3.091042 6.182084 379 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +describ 2 45 3.135494 6.270988 400 +midterm 1 45 3.135494 3.135494 392 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +made 1 44 3.135494 3.135494 398 +long 2 43 3.178054 6.356108 413 +show 1 43 3.178054 3.178054 417 +http 2 41 3.218876 6.437752 420 +edit 2 42 3.218876 6.437752 418 +howev 1 41 3.218876 3.218876 422 +must 6 40 3.258097 19.548582 442 +late 4 40 3.258097 13.032388 439 +continu 3 39 3.258097 9.774291 448 +error 2 40 3.258097 6.516194 449 +littl 1 39 3.258097 3.258097 454 +multipl 1 39 3.258097 3.258097 453 +tutori 1 39 3.258097 3.258097 437 +correct 2 38 3.295837 6.591674 462 +open 1 38 3.295837 3.295837 469 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +respons 1 37 3.332205 3.332205 476 +expect 1 37 3.332205 3.332205 484 +purpos 1 37 3.332205 3.332205 481 +short 1 36 3.367296 3.367296 499 +download 1 36 3.367296 3.367296 489 +soon 1 36 3.367296 3.367296 494 +print 3 34 3.401197 10.203591 503 +statist 1 35 3.401197 3.401197 521 +either 1 35 3.401197 3.401197 506 +approxim 1 35 3.401197 3.401197 509 +everi 1 34 3.401197 3.401197 519 +singl 1 34 3.401197 3.401197 510 +next 1 34 3.401197 3.401197 517 +product 1 33 3.433987 3.433987 527 +within 1 33 3.433987 3.433987 525 +go 1 33 3.433987 3.433987 529 +express 1 32 3.465736 3.465736 540 +ad 1 32 3.465736 3.465736 544 +given 1 32 3.465736 3.465736 538 +often 4 31 3.496508 13.986032 551 +posit 1 31 3.496508 3.496508 552 +computersci 1 30 3.555348 3.555348 562 +abl 1 30 3.555348 3.555348 566 +hard 1 30 3.555348 3.555348 563 +turn 3 29 3.583519 10.750557 586 +limit 3 29 3.583519 10.750557 585 +actual 1 28 3.610918 3.610918 604 +load 1 28 3.610918 3.610918 601 +except 1 28 3.610918 3.610918 607 +intend 1 28 3.610918 3.610918 599 +full 1 28 3.610918 3.610918 615 +becom 1 28 3.610918 3.610918 603 +held 1 28 3.610918 3.610918 600 +manipul 1 27 3.637586 3.637586 624 +determin 1 27 3.637586 3.637586 630 +administr 1 27 3.637586 3.637586 628 +quit 1 27 3.637586 3.637586 633 +consist 2 26 3.688879 7.377758 651 +subject 1 26 3.688879 3.688879 647 +bound 1 26 3.688879 3.688879 659 +comp 1 26 3.688879 3.688879 650 +session 1 26 3.688879 3.688879 643 +valu 2 25 3.737670 7.475340 665 +reliabl 1 25 3.737670 3.737670 674 +never 1 25 3.737670 3.737670 671 +aspect 1 25 3.737670 3.737670 663 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +handl 1 24 3.761200 3.761200 685 +sometim 1 24 3.761200 3.761200 696 +wish 1 24 3.761200 3.761200 692 +variabl 3 23 3.806662 11.419986 715 +begin 2 23 3.806662 7.613324 716 +size 1 23 3.806662 3.806662 713 +input 1 23 3.806662 3.806662 727 +identifi 3 22 3.850148 11.550444 760 +period 1 22 3.850148 3.850148 743 +recommend 1 22 3.850148 3.850148 737 +almost 1 22 3.850148 3.850148 742 +sort 1 22 3.850148 3.850148 738 +inth 1 22 3.850148 3.850148 741 +dai 1 22 3.850148 3.850148 753 +avoid 3 21 3.912023 11.736069 799 +tell 2 21 3.912023 7.824046 777 +output 1 21 3.912023 3.912023 788 +binari 2 20 3.951244 7.902488 823 +sure 2 20 3.951244 7.902488 813 +prepar 1 20 3.951244 3.951244 824 +grad 1 20 3.951244 3.951244 837 +minut 1 20 3.951244 3.951244 810 +qualiti 1 20 3.951244 3.951244 832 +scheme 1 20 3.951244 3.951244 818 +break 1 20 3.951244 3.951244 812 +assum 2 19 4.007333 8.014666 845 +item 1 19 4.007333 4.007333 856 +ever 1 19 4.007333 4.007333 872 +separ 1 19 4.007333 4.007333 844 +exercis 1 19 4.007333 4.007333 842 +log 1 19 4.007333 4.007333 857 +five 1 19 4.007333 4.007333 841 +particularli 1 19 4.007333 4.007333 867 +account 4 18 4.060443 16.241772 882 +encourag 3 18 4.060443 12.181329 880 +record 1 18 4.060443 4.060443 890 +accept 1 18 4.060443 4.060443 879 +along 1 18 4.060443 4.060443 878 +behavior 1 18 4.060443 4.060443 881 +four 1 18 4.060443 4.060443 905 +element 1 18 4.060443 4.060443 895 +attend 1 18 4.060443 4.060443 893 +debug 3 17 4.110874 12.332622 944 +segment 2 17 4.110874 8.221748 931 +outlin 1 17 4.110874 4.110874 914 +macintosh 1 17 4.110874 4.110874 920 +women 5 16 4.174387 20.871935 1004 +earli 2 16 4.174387 8.348774 968 +normal 1 16 4.174387 4.174387 995 +easi 1 16 4.174387 4.174387 969 +style 4 15 4.248495 16.993980 1036 +later 2 15 4.248495 8.496990 1043 +purchas 1 15 4.248495 4.248495 1030 +capabl 1 15 4.248495 4.248495 1016 +doit 2 14 4.317488 8.634976 1111 +near 2 14 4.317488 8.634976 1091 +incomput 1 14 4.317488 4.317488 1096 +consider 1 14 4.317488 4.317488 1076 +floor 1 14 4.317488 4.317488 1070 +manner 1 14 4.317488 4.317488 1074 +easili 1 14 4.317488 4.317488 1077 +convent 1 14 4.317488 4.317488 1072 +necessari 2 13 4.382027 8.764054 1147 +menu 1 13 4.382027 4.382027 1156 +someon 1 13 4.382027 4.382027 1128 +wait 1 13 4.382027 4.382027 1168 +incorpor 1 13 4.382027 4.382027 1163 +dewitt 5 12 4.465908 22.329540 1270 +readi 2 12 4.465908 8.931816 1242 +pascal 2 12 4.465908 8.931816 1213 +grow 1 12 4.465908 4.465908 1209 +onth 1 12 4.465908 4.465908 1218 +weight 1 12 4.465908 4.465908 1204 +count 1 12 4.465908 4.465908 1239 +robust 1 12 4.465908 4.465908 1271 +reader 1 12 4.465908 4.465908 1246 +cycl 3 11 4.553877 13.661631 1335 +extra 3 11 4.553877 13.661631 1312 +statement 3 11 4.553877 13.661631 1313 +extrem 1 11 4.553877 4.553877 1330 +arbitrari 1 11 4.553877 4.553877 1359 +loop 1 11 4.553877 4.553877 1310 +typic 1 11 4.553877 4.553877 1360 +summar 1 11 4.553877 4.553877 1295 +submiss 1 11 4.553877 4.553877 1298 +true 3 10 4.653960 13.961880 1422 +cheat 2 10 4.653960 9.307920 1395 +modular 2 10 4.653960 9.307920 1392 +equal 1 10 4.653960 4.653960 1424 +hint 1 10 4.653960 4.653960 1419 +awai 1 10 4.653960 4.653960 1447 +certain 1 10 4.653960 4.653960 1393 +thecomput 1 10 4.653960 4.653960 1408 +label 1 10 4.653960 4.653960 1423 +strongli 1 10 4.653960 4.653960 1406 +wall 3 9 4.753590 14.260770 1553 +notat 2 9 4.753590 9.507180 1489 +assumpt 2 9 4.753590 9.507180 1514 +declar 2 9 4.753590 9.507180 1526 +end 1 9 4.753590 4.753590 1567 +frank 1 9 4.753590 4.753590 1568 +mention 1 9 4.753590 4.753590 1569 +andth 1 9 4.753590 4.753590 1481 +familiar 1 9 4.753590 4.753590 1485 +equival 1 9 4.753590 4.753590 1496 +prefer 1 9 4.753590 4.753590 1491 +criteria 1 9 4.753590 4.753590 1477 +correctli 1 9 4.753590 4.753590 1478 +informationabout 1 9 4.753590 4.753590 1515 +unusu 1 9 4.753590 4.753590 1566 +clear 1 9 4.753590 4.753590 1488 +pick 1 9 4.753590 4.753590 1498 +yanni 5 8 4.875197 24.375985 1713 +simpli 2 8 4.875197 9.750394 1626 +ioannidi 1 8 4.875197 4.875197 1714 +paramet 3 7 5.010635 15.031905 1796 +explain 2 7 5.010635 10.021270 1816 +header 2 7 5.010635 10.021270 1787 +isbn 1 7 5.010635 5.010635 1901 +exactli 1 7 5.010635 5.010635 1817 +pursu 1 7 5.010635 5.010635 1902 +whenev 1 7 5.010635 5.010635 1883 +bug 1 7 5.010635 5.010635 1801 +extern 2 6 5.164786 10.329572 2105 +mirror 2 6 5.164786 10.329572 2028 +sciencesoffic 1 6 5.164786 5.164786 2101 +notifi 1 6 5.164786 5.164786 2106 +wrong 1 6 5.164786 5.164786 2025 +approv 1 6 5.164786 5.164786 2078 +troubl 1 6 5.164786 5.164786 2002 +desk 2 5 5.347108 10.694216 2297 +situat 2 5 5.347108 10.694216 2365 +supplement 1 5 5.347108 5.347108 2355 +chemistri 1 5 5.347108 5.347108 2405 +sparcstat 1 5 5.347108 5.347108 2406 +caus 1 5 5.347108 5.347108 2298 +respond 1 5 5.347108 5.347108 2354 +blow 1 5 5.347108 5.347108 2407 +skip 1 5 5.347108 5.347108 2402 +thiscours 2 4 5.568345 11.136690 2601 +expens 1 4 5.568345 5.568345 2678 +repeat 1 4 5.568345 5.568345 2798 +suppli 1 4 5.568345 5.568345 2611 +tire 1 4 5.568345 5.568345 2799 +thec 2 3 5.857933 11.715866 3132 +neg 2 3 5.857933 11.715866 3451 +ghostview 1 3 5.857933 5.857933 3163 +eduand 1 3 5.857933 5.857933 3452 +tremend 1 3 5.857933 5.857933 3453 +narr 1 3 5.857933 5.857933 3454 +gradingther 1 3 5.857933 5.857933 3455 +programmingassign 1 3 5.857933 5.857933 3398 +thesear 1 3 5.857933 5.857933 3456 +duedat 1 3 5.857933 5.857933 3105 +helpif 1 3 5.857933 5.857933 3126 +confus 1 3 5.857933 5.857933 3144 +ineffici 1 3 5.857933 5.857933 3457 +useof 1 3 5.857933 5.857933 3368 +meaning 1 3 5.857933 5.857933 3458 +argument 1 3 5.857933 5.857933 3120 +briefli 1 3 5.857933 5.857933 3459 +urg 1 3 5.857933 5.857933 3212 +comfort 1 3 5.857933 5.857933 3136 +pain 1 3 5.857933 5.857933 3460 +clariti 4 2 6.263398 25.053592 4413 +behav 2 2 6.263398 12.526796 4670 +indent 2 2 6.263398 12.526796 4374 +amoffic 1 2 6.263398 6.263398 4671 +femal 1 2 6.263398 6.263398 4672 +wic 1 2 6.263398 6.263398 4673 +oneof 1 2 6.263398 6.263398 4674 +tomak 1 2 6.263398 6.263398 4675 +startup 1 2 6.263398 6.263398 4676 +textth 1 2 6.263398 6.263398 4677 +carrano 1 2 6.263398 6.263398 4678 +lecturenot 1 2 6.263398 6.263398 4679 +notesar 1 2 6.263398 6.263398 4559 +invalu 1 2 6.263398 6.263398 4680 +nonetheless 1 2 6.263398 6.263398 4681 +thatyou 1 2 6.263398 6.263398 4682 +youwork 1 2 6.263398 6.263398 4083 +provis 1 2 6.263398 6.263398 4683 +excus 1 2 6.263398 6.263398 4684 +datastructur 1 2 6.263398 6.263398 4685 +tovisit 1 2 6.263398 6.263398 4686 +andlog 1 2 6.263398 6.263398 4104 +facet 1 2 6.263398 6.263398 4687 +unnecessarili 1 2 6.263398 6.263398 4688 +liter 1 2 6.263398 6.263398 4689 +convei 1 2 6.263398 6.263398 4690 +beavoid 1 2 6.263398 6.263398 4411 +thefirst 1 2 6.263398 6.263398 4092 +outputfil 4 1 6.957497 27.829988 8613 +suzan 2 1 6.957497 13.914994 8614 +inputfil 2 1 6.957497 13.914994 8615 +structuresfal 1 1 6.957497 6.957497 8616 +htmlinstructor 1 1 6.957497 6.957497 8617 +newsassign 1 1 6.957497 6.957497 8618 +statisticssom 1 1 6.957497 6.957497 8619 +median 1 1 6.957497 6.957497 8620 +midterma 1 1 6.957497 6.957497 8621 +oldmidterm 1 1 6.957497 6.957497 8622 +ownmidterm 1 1 6.957497 6.957497 8623 +searchth 1 1 6.957497 6.957497 8624 +filemenu 1 1 6.957497 6.957497 8625 +andchoos 1 1 6.957497 6.957497 8626 +sciencesom 1 1 6.957497 6.957497 8627 +haveform 1 1 6.957497 6.957497 8628 +becomecomput 1 1 6.957497 6.957497 8629 +thisclass 1 1 6.957497 6.957497 8630 +withtheir 1 1 6.957497 6.957497 8631 +classwork 1 1 6.957497 6.957497 8632 +stodder 1 1 6.957497 6.957497 8633 +theodd 1 1 6.957497 6.957497 8634 +statementi 1 1 6.957497 6.957497 8635 +aniniti 1 1 6.957497 6.957497 8636 +exceptionsy 1 1 6.957497 6.957497 8637 +isdata 1 1 6.957497 6.957497 8638 +notnecessari 1 1 6.957497 6.957497 8639 +isveri 1 1 6.957497 6.957497 8640 +whichar 1 1 6.957497 6.957497 8641 +entranceof 1 1 6.957497 6.957497 8642 +needsom 1 1 6.957497 6.957497 8643 +handoutc 1 1 6.957497 6.957497 8644 +althoughi 1 1 6.957497 6.957497 8645 +courseof 1 1 6.957497 6.957497 8646 +apoint 1 1 6.957497 6.957497 8647 +prerequisitecours 1 1 6.957497 6.957497 8648 +certainrestrict 1 1 6.957497 6.957497 8649 +emailand 1 1 6.957497 6.957497 8650 +toyour 1 1 6.957497 6.957497 8651 +runwith 1 1 6.957497 6.957497 8652 +inassign 1 1 6.957497 6.957497 8653 +allelectron 1 1 6.957497 6.957497 8654 +policyno 1 1 6.957497 6.957497 8655 +coincid 1 1 6.957497 6.957497 8656 +oneach 1 1 6.957497 6.957497 8657 +thelast 1 1 6.957497 6.957497 8658 +cheatingth 1 1 6.957497 6.957497 8659 +linest 1 1 6.957497 6.957497 8660 +tocommun 1 1 6.957497 6.957497 8661 +butther 1 1 6.957497 6.957497 8662 +obei 1 1 6.957497 6.957497 8663 +policiesgovern 1 1 6.957497 6.957497 8664 +policiesif 1 1 6.957497 6.957497 8665 +currenthard 1 1 6.957497 6.957497 8666 +conceptsthat 1 1 6.957497 6.957497 8667 +emailsever 1 1 6.957497 6.957497 8668 +gradingprogram 1 1 6.957497 6.957497 8669 +typicalinput 1 1 6.957497 6.957497 8670 +projectspecif 1 1 6.957497 6.957497 8671 +shoulddemonstr 1 1 6.957497 6.957497 8672 +includingunusu 1 1 6.957497 6.957497 8673 +considerationof 1 1 6.957497 6.957497 8674 +orcomplex 1 1 6.957497 6.957497 8675 +definedconst 1 1 6.957497 6.957497 8676 +thosevalu 1 1 6.957497 6.957497 8677 +styleus 1 1 6.957497 6.957497 8678 +variable_nam 1 1 6.957497 6.957497 8679 +function_nam 1 1 6.957497 6.957497 8680 +const 1 1 6.957497 6.957497 8681 +defined_const 1 1 6.957497 6.957497 8682 +enum 1 1 6.957497 6.957497 8683 +enumtyp 1 1 6.957497 6.957497 8684 +classnam 1 1 6.957497 6.957497 8685 +notesfor 1 1 6.957497 6.957497 8686 +meaningfulli 1 1 6.957497 6.957497 8687 +documentationthi 1 1 6.957497 6.957497 8688 +yourprogram 1 1 6.957497 6.957497 8689 +someonewho 1 1 6.957497 6.957497 8690 +superfici 1 1 6.957497 6.957497 8691 +unawar 1 1 6.957497 6.957497 8692 +descriptionne 1 1 6.957497 6.957497 8693 +thensuffici 1 1 6.957497 6.957497 8694 +documentationther 1 1 6.957497 6.957497 8695 +structuresshould 1 1 6.957497 6.957497 8696 +membershould 1 1 6.957497 6.957497 8697 +sname 1 1 6.957497 6.957497 8698 +withoutmak 1 1 6.957497 6.957497 8699 +stackyou 1 1 6.957497 6.957497 8700 +tricki 1 1 6.957497 6.957497 8701 +opaqu 1 1 6.957497 6.957497 8702 +commentcan 1 1 6.957497 6.957497 8703 +clarifi 1 1 6.957497 6.957497 8704 +outlineof 1 1 6.957497 6.957497 8705 +vimani 1 1 6.957497 6.957497 8706 +becomecomfort 1 1 6.957497 6.957497 8707 +youronli 1 1 6.957497 6.957497 8708 +macpasc 1 1 6.957497 6.957497 8709 +withunix 1 1 6.957497 6.957497 8710 +wellspent 1 1 6.957497 6.957497 8711 +thefollow 1 1 6.957497 6.957497 8712 +tbayou 1 1 6.957497 6.957497 8713 +goto 1 1 6.957497 6.957497 8714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html new file mode 100644 index 00000000..ca058017 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-3^cs367.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +program 20 374 0.693147 13.862940 7 +inform 6 412 0.693147 4.158882 8 +cours 11 273 1.098612 12.084732 15 +us 5 329 1.098612 5.493060 16 +offic 3 299 1.098612 3.295836 13 +student 1 343 1.098612 1.098612 19 +wisc 6 242 1.386294 8.317764 33 +also 6 259 1.386294 8.317764 28 +languag 5 227 1.386294 6.931470 26 +email 2 220 1.386294 2.772588 29 +mail 2 238 1.386294 2.772588 22 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +fall 3 181 1.609438 4.828314 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +data 12 170 1.791759 21.501108 49 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +address 2 170 1.791759 3.583518 62 +avail 2 169 1.791759 3.583518 48 +read 2 154 1.791759 3.583518 47 +base 1 165 1.791759 1.791759 50 +assign 20 135 1.945910 38.918200 66 +lectur 15 135 1.945910 29.188650 73 +note 9 142 1.945910 17.513190 67 +first 2 140 1.945910 3.891820 71 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +introduct 2 126 2.079442 4.158884 87 +dayton 2 119 2.079442 4.158884 104 +schedul 2 119 2.079442 4.158884 85 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +structur 10 106 2.197225 21.972250 105 +teach 3 108 2.197225 6.591675 112 +final 3 116 2.197225 6.591675 108 +place 2 106 2.197225 4.394450 124 +topic 2 114 2.197225 4.394450 110 +assist 1 112 2.197225 2.197225 113 +well 1 109 2.197225 2.197225 121 +text 8 98 2.302585 18.420680 133 +book 3 99 2.302585 6.907755 131 +need 1 98 2.302585 2.302585 135 +advanc 1 99 2.302585 2.302585 130 +section 4 94 2.397895 9.591580 149 +grade 4 90 2.397895 9.591580 142 +follow 4 92 2.397895 9.591580 143 +present 2 91 2.397895 4.795790 145 +select 1 91 2.397895 2.397895 154 +question 1 91 2.397895 2.397895 141 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +exam 11 86 2.484907 27.333977 169 +build 2 85 2.484907 4.969814 184 +requir 2 81 2.484907 4.969814 167 +contain 2 81 2.484907 4.969814 174 +second 2 81 2.484907 4.969814 166 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +stuff 1 87 2.484907 2.484907 171 +school 1 84 2.484907 2.484907 188 +homework 1 79 2.564949 2.564949 193 +mondai 1 77 2.564949 2.564949 206 +complet 1 77 2.564949 2.564949 208 +want 1 79 2.564949 2.564949 199 +appear 1 78 2.564949 2.564949 210 +know 1 80 2.564949 2.564949 198 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +materi 5 75 2.639057 13.195285 221 +tuesdai 4 73 2.639057 10.556228 219 +write 4 72 2.639057 10.556228 222 +html 3 75 2.639057 7.917171 235 +line 3 75 2.639057 7.917171 231 +addit 2 74 2.639057 5.278114 228 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +free 1 73 2.639057 2.639057 224 +name 1 72 2.639057 2.639057 220 +thursdai 2 70 2.708050 5.416100 241 +abstract 4 62 2.772589 11.090356 276 +wednesdai 2 64 2.772589 5.545178 261 +written 2 63 2.772589 5.545178 278 +experi 2 64 2.772589 5.545178 283 +street 1 63 2.772589 2.772589 293 +creat 1 63 2.772589 2.772589 277 +handout 1 64 2.772589 2.772589 263 +complex 1 64 2.772589 2.772589 269 +function 1 62 2.772589 2.772589 275 +previou 1 62 2.772589 2.772589 290 +type 2 61 2.833213 5.666426 296 +simpl 2 60 2.833213 5.666426 298 +detail 2 57 2.890372 5.780744 321 +unix 1 58 2.890372 2.890372 308 +semest 1 58 2.890372 2.890372 312 +index 1 56 2.890372 2.890372 309 +summer 1 56 2.890372 2.890372 311 +cover 6 55 2.944439 17.666634 329 +maintain 2 51 2.995732 5.991464 342 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +principl 2 48 3.044522 6.089044 357 +give 1 50 3.044522 3.044522 359 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +electron 3 47 3.091042 9.273126 379 +even 4 45 3.135494 12.541976 393 +fridai 2 44 3.135494 6.270988 390 +answer 1 45 3.135494 3.135494 391 +textbook 1 44 3.135494 3.135494 397 +anoth 1 45 3.135494 3.135494 408 +describ 1 45 3.135494 3.135494 400 +long 1 43 3.178054 3.178054 413 +http 3 41 3.218876 9.656628 420 +edit 1 42 3.218876 3.218876 418 +littl 2 39 3.258097 6.516194 454 +must 1 40 3.258097 3.258097 442 +correct 1 38 3.295837 3.295837 462 +close 1 38 3.295837 3.295837 465 +connect 1 37 3.332205 3.332205 485 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +short 1 36 3.367296 3.367296 499 +tree 1 36 3.367296 3.367296 492 +jame 2 35 3.401197 6.802394 507 +approxim 1 35 3.401197 3.401197 509 +taught 1 33 3.433987 3.433987 526 +concept 2 32 3.465736 6.931472 537 +given 1 32 3.465736 3.465736 538 +often 3 31 3.496508 10.489524 551 +photo 1 31 3.496508 3.496508 561 +abl 1 30 3.555348 3.555348 566 +produc 1 30 3.555348 3.555348 572 +turn 1 29 3.583519 3.583519 586 +determin 2 27 3.637586 7.275172 630 +manipul 1 27 3.637586 3.637586 624 +revis 1 26 3.688879 3.688879 640 +bound 1 26 3.688879 3.688879 659 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +background 1 25 3.737670 3.737670 664 +alwai 1 24 3.761200 3.761200 691 +size 1 23 3.806662 3.806662 713 +sequenc 1 23 3.806662 3.806662 734 +identifi 1 22 3.850148 3.850148 760 +emphasi 1 22 3.850148 3.850148 755 +recommend 1 22 3.850148 3.850148 737 +sort 1 22 3.850148 3.850148 738 +color 1 22 3.850148 3.850148 762 +basi 1 20 3.951244 3.951244 828 +binari 1 20 3.951244 3.951244 823 +tenni 1 20 3.951244 3.951244 838 +exercis 2 19 4.007333 8.014666 842 +separ 1 19 4.007333 4.007333 844 +log 1 19 4.007333 4.007333 857 +assum 1 19 4.007333 4.007333 845 +five 1 19 4.007333 4.007333 841 +appropri 1 18 4.060443 4.060443 883 +wind 1 18 4.060443 4.060443 908 +account 1 18 4.060443 4.060443 882 +attend 1 18 4.060443 4.060443 893 +record 1 18 4.060443 4.060443 890 +debug 1 17 4.110874 4.110874 944 +outlin 1 17 4.110874 4.110874 914 +white 1 17 4.110874 4.110874 951 +zhang 1 16 4.174387 4.174387 980 +portion 1 16 4.174387 4.174387 971 +purchas 1 15 4.248495 4.248495 1030 +psycholog 1 15 4.248495 4.248495 1054 +later 1 15 4.248495 4.248495 1043 +photograph 1 15 4.248495 4.248495 1056 +score 1 15 4.248495 4.248495 1017 +doit 2 14 4.317488 8.634976 1111 +happi 1 14 4.317488 4.317488 1079 +trip 1 14 4.317488 4.317488 1113 +necessari 2 13 4.382027 8.764054 1147 +block 1 13 4.382027 4.382027 1183 +introduc 1 13 4.382027 4.382027 1139 +essenti 1 13 4.382027 4.382027 1137 +dewitt 3 12 4.465908 13.397724 1270 +weight 1 12 4.465908 4.465908 1204 +count 1 12 4.465908 4.465908 1239 +regularli 2 11 4.553877 9.107754 1338 +modular 1 10 4.653960 4.653960 1392 +sundai 1 10 4.653960 4.653960 1387 +true 1 10 4.653960 4.653960 1422 +strongli 1 10 4.653960 4.653960 1406 +hint 1 10 4.653960 4.653960 1419 +equal 1 10 4.653960 4.653960 1424 +card 1 10 4.653960 4.653960 1435 +black 1 10 4.653960 4.653960 1418 +laru 2 9 4.753590 9.507180 1560 +clear 1 9 4.753590 4.753590 1488 +wall 1 9 4.753590 4.753590 1553 +frank 1 9 4.753590 4.753590 1568 +mention 1 9 4.753590 4.753590 1569 +debugg 1 9 4.753590 4.753590 1493 +notat 1 9 4.753590 4.753590 1489 +login 1 9 4.753590 4.753590 1550 +absolut 1 8 4.875197 4.875197 1646 +integ 1 8 4.875197 4.875197 1688 +hash 1 8 4.875197 4.875197 1618 +isbn 1 7 5.010635 5.010635 1901 +scout 1 7 5.010635 5.010635 1903 +sciencesoffic 2 6 5.164786 10.329572 2101 +strong 1 6 5.164786 5.164786 2029 +troubl 1 6 5.164786 5.164786 2002 +mirror 1 6 5.164786 5.164786 2028 +notifi 1 6 5.164786 5.164786 2106 +skrentni 1 6 5.164786 5.164786 2104 +difficult 1 6 5.164786 5.164786 2035 +rough 1 6 5.164786 5.164786 2107 +byte 1 6 5.164786 5.164786 2108 +chin 4 5 5.347108 21.388432 2408 +tang 2 5 5.347108 10.694216 2409 +desk 2 5 5.347108 10.694216 2297 +situat 1 5 5.347108 5.347108 2365 +skip 1 5 5.347108 5.347108 2402 +crucial 1 5 5.347108 5.347108 2384 +chemistri 1 5 5.347108 5.347108 2405 +girl 1 5 5.347108 5.347108 2410 +assignmentsand 1 4 5.568345 5.568345 2760 +unless 1 4 5.568345 5.568345 2607 +birthdai 1 4 5.568345 5.568345 2800 +fora 1 4 5.568345 5.568345 2697 +reiter 1 3 5.857933 5.857933 3461 +narr 1 3 5.857933 5.857933 3454 +gradingther 1 3 5.857933 5.857933 3455 +freshman 1 3 5.857933 5.857933 3462 +cchin 3 2 6.263398 18.790194 4691 +compuer 1 2 6.263398 6.263398 4692 +weiz 1 2 6.263398 6.263398 4693 +amoffic 1 2 6.263398 6.263398 4671 +textth 1 2 6.263398 6.263398 4677 +carrano 1 2 6.263398 6.263398 4678 +needless 1 2 6.263398 6.263398 4694 +sophomor 1 2 6.263398 6.263398 4695 +databaseof 1 2 6.263398 6.263398 4696 +larusinstructor 1 1 6.957497 6.957497 8715 +laruslaru 1 1 6.957497 6.957497 8716 +amcontentsteach 1 1 6.957497 6.957497 8717 +assistantstextlectur 1 1 6.957497 6.957497 8718 +informationelectron 1 1 6.957497 6.957497 8719 +mailth 1 1 6.957497 6.957497 8720 +languagegradingexamscours 1 1 6.957497 6.957497 8721 +scheduleassign 1 1 6.957497 6.957497 8722 +assignmentscours 1 1 6.957497 6.957497 8723 +objectivesc 1 1 6.957497 6.957497 8724 +assistantswei 1 1 6.957497 6.957497 8725 +forthi 1 1 6.957497 6.957497 8726 +theassign 1 1 6.957497 6.957497 8727 +zhangoffic 1 1 6.957497 6.957497 8728 +entranc 1 1 6.957497 6.957497 8729 +maili 1 1 6.957497 6.957497 8730 +gdbthere 1 1 6.957497 6.957497 8731 +administrationbas 1 1 6.957497 6.957497 8732 +storagelectur 1 1 6.957497 6.957497 8733 +listslectur 1 1 6.957497 6.957497 8734 +stackslectur 1 1 6.957497 6.957497 8735 +queueslectur 1 1 6.957497 6.957497 8736 +hashinglectur 1 1 6.957497 6.957497 8737 +recursionlectur 1 1 6.957497 6.957497 8738 +treesbinari 1 1 6.957497 6.957497 8739 +searchlectur 1 1 6.957497 6.957497 8740 +treesgraphslectur 1 1 6.957497 6.957497 8741 +sortinglectur 1 1 6.957497 6.957497 8742 +tbaassign 1 1 6.957497 6.957497 8743 +nameyear 1 1 6.957497 6.957497 8744 +coursesprevi 1 1 6.957497 6.957497 8745 +experiencerec 1 1 6.957497 6.957497 8746 +tournament 1 1 6.957497 6.957497 8747 +aconcord 1 1 6.957497 6.957497 8748 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html new file mode 100644 index 00000000..6808a555 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs367-4^cs367.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +page 15 705 0.000000 0.000000 3 +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +offic 6 299 1.098612 6.591672 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +wisc 4 242 1.386294 5.545176 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +list 4 201 1.609438 6.437752 39 +updat 1 191 1.609438 1.609438 41 +oper 1 180 1.609438 1.609438 34 +hour 3 165 1.791759 5.375277 46 +data 2 170 1.791759 3.583518 49 +read 2 154 1.791759 3.583518 47 +algorithm 2 162 1.791759 3.583518 57 +recent 1 167 1.791759 1.791759 58 +lectur 14 135 1.945910 27.242740 73 +assign 5 135 1.945910 9.729550 66 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +code 5 108 2.197225 10.986125 116 +place 3 106 2.197225 6.591675 124 +structur 1 106 2.197225 2.197225 105 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +need 1 98 2.302585 2.302585 135 +memori 1 101 2.302585 2.302585 139 +search 2 95 2.397895 4.795790 155 +exam 5 86 2.484907 12.424535 169 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +dynam 1 76 2.564949 2.564949 194 +onlin 5 75 2.639057 13.195285 223 +materi 1 75 2.639057 2.639057 221 +addit 1 74 2.639057 2.639057 228 +line 1 75 2.639057 2.639057 231 +simul 1 66 2.708050 2.708050 255 +copi 3 63 2.772589 8.317767 284 +creat 1 63 2.772589 2.772589 277 +wednesdai 1 64 2.772589 2.772589 261 +locat 1 59 2.833213 2.833213 303 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +sampl 8 53 2.944439 23.555512 339 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +basic 3 50 3.044522 9.133566 360 +pointer 1 48 3.044522 3.044522 368 +get 3 46 3.091042 9.273126 380 +done 1 47 3.091042 3.091042 381 +directori 1 45 3.135494 3.135494 396 +cach 1 41 3.218876 3.218876 432 +futur 1 41 3.218876 3.218876 427 +announc 5 40 3.258097 16.290485 441 +error 3 40 3.258097 9.774291 449 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +tree 1 36 3.367296 3.367296 492 +either 1 35 3.401197 3.401197 506 +jame 1 35 3.401197 3.401197 507 +chapter 11 32 3.465736 38.123096 536 +common 1 30 3.555348 3.555348 574 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +becom 1 28 3.610918 3.610918 603 +lab 2 24 3.761200 7.522400 698 +sort 2 22 3.850148 7.700296 738 +reserv 1 20 3.951244 3.951244 808 +alloc 1 20 3.951244 3.951244 821 +thur 2 19 4.007333 8.014666 847 +attend 2 18 4.060443 8.120886 893 +otherwis 1 17 4.110874 4.110874 922 +recurs 1 13 4.382027 4.382027 1127 +magic 1 11 4.553877 4.553877 1358 +queue 2 10 4.653960 9.307920 1386 +stack 2 10 4.653960 9.307920 1389 +wendt 1 10 4.653960 4.653960 1446 +cheng 1 10 4.653960 4.653960 1381 +kurt 1 9 4.753590 4.753590 1548 +unusu 1 9 4.753590 4.753590 1566 +forget 1 8 4.875197 4.875197 1712 +reload 1 8 4.875197 4.875197 1682 +hash 1 8 4.875197 4.875197 1618 +skrentni 4 6 5.164786 20.659144 2104 +skip 2 5 5.347108 10.694216 2402 +handin 1 5 5.347108 5.347108 2393 +overload 1 5 5.347108 5.347108 2403 +billi 1 5 5.347108 5.347108 2404 +outdat 1 4 5.568345 5.568345 2797 +appendix 1 4 5.568345 5.568345 2739 +makeup 1 3 5.857933 5.857933 3449 +vega 1 3 5.857933 5.857933 3450 +stale 2 2 6.263398 12.526796 4660 +lec 1 2 6.263398 6.263398 4661 +structureslectur 1 2 6.263398 6.263398 4662 +psychologylectur 1 2 6.263398 6.263398 4663 +psychologycours 1 2 6.263398 6.263398 4664 +baicheng 1 2 6.263398 6.263398 4665 +liao 1 2 6.263398 6.263398 4666 +bail 1 2 6.263398 6.263398 4667 +jiacheng 1 2 6.263398 6.263398 4668 +pmcopyright 1 2 6.263398 6.263398 4669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html new file mode 100644 index 00000000..5cfe8620 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs520-1^cs520.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +offic 4 299 1.098612 4.394448 13 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +hour 2 165 1.791759 3.583518 46 +madison 2 165 1.791759 3.583518 55 +lectur 3 135 1.945910 5.837730 73 +assign 2 135 1.945910 3.891820 66 +introduct 3 126 2.079442 6.238326 87 +schedul 1 119 2.079442 2.079442 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +mondai 1 77 2.564949 2.564949 206 +state 1 76 2.564949 2.564949 207 +david 1 71 2.639057 2.639057 232 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +written 1 63 2.772589 2.772589 278 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +archiv 1 49 3.044522 3.044522 364 +fridai 1 44 3.135494 3.135494 390 +term 1 43 3.178054 3.178054 411 +examin 2 42 3.218876 6.437752 424 +theoret 2 39 3.258097 6.516194 446 +brian 2 38 3.295837 6.591674 466 +john 1 33 3.433987 3.433987 532 +hill 1 25 3.737670 3.737670 670 +tent 1 22 3.850148 3.850148 739 +martin 1 21 3.912023 3.912023 794 +north 1 19 4.007333 4.007333 873 +isbn 1 7 5.010635 5.010635 1901 +mcgraw 1 5 5.347108 5.347108 2262 +clarif 1 5 5.347108 5.347108 2253 +sundaram 2 3 5.857933 11.715866 3463 +cole 2 2 6.263398 12.526796 4697 +stukel 1 2 6.263398 6.263398 4698 +dakota 1 1 6.957497 6.957497 8749 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html new file mode 100644 index 00000000..e521eb47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs525-1^cs525.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +offic 4 299 1.098612 4.394448 13 +cours 4 273 1.098612 4.394448 15 +engin 2 297 1.098612 2.197224 20 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +also 2 259 1.386294 2.772588 28 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +link 1 247 1.386294 1.386294 24 +fall 2 181 1.609438 3.218876 40 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +schedul 1 119 2.079442 2.079442 85 +databas 1 122 2.079442 2.079442 86 +final 4 116 2.197225 8.788900 108 +site 2 106 2.197225 4.394450 119 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +mathemat 1 108 2.197225 2.197225 123 +book 3 99 2.302585 6.907755 131 +octob 7 89 2.397895 16.785265 156 +exam 11 86 2.484907 27.333977 169 +solut 4 82 2.484907 9.939628 162 +novemb 3 81 2.484907 7.454721 179 +librari 1 87 2.484907 2.484907 181 +homework 12 79 2.564949 30.779388 193 +decemb 3 80 2.564949 7.694847 215 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +septemb 4 65 2.772589 11.090356 274 +wednesdai 2 64 2.772589 5.545178 261 +march 4 61 2.833213 11.332852 295 +locat 2 59 2.833213 5.666426 303 +semest 2 58 2.890372 5.780744 312 +overview 1 56 2.890372 2.890372 323 +variou 1 56 2.890372 2.890372 317 +sampl 3 53 2.944439 8.833317 339 +date 2 51 2.995732 5.991464 344 +telephon 1 50 3.044522 3.044522 373 +midterm 7 45 3.135494 21.948458 392 +math 2 44 3.135494 6.270988 402 +textbook 1 44 3.135494 3.135494 397 +mechan 2 43 3.178054 6.356108 416 +linear 2 41 3.218876 6.437752 431 +open 2 38 3.295837 6.591674 469 +comp 2 26 3.688879 7.377758 650 +relev 1 26 3.688879 3.688879 637 +period 1 22 3.850148 3.850148 743 +reserv 1 20 3.951244 3.951244 808 +item 1 19 4.007333 4.007333 856 +stat 4 17 4.110874 16.443496 924 +matlab 2 14 4.317488 8.634976 1081 +doit 1 14 4.317488 4.317488 1111 +wendt 1 10 4.653960 4.653960 1446 +mangasarian 2 9 4.753590 9.507180 1570 +preliminari 1 9 4.753590 4.753590 1480 +kurt 1 9 4.753590 4.753590 1548 +ferri 1 8 4.875197 4.875197 1715 +olvi 2 6 5.164786 10.329572 2109 +setup 1 2 6.263398 6.263398 4211 +bibliograph 1 2 6.263398 6.263398 4699 +programmingfal 1 1 6.957497 6.957497 8750 +pphone 1 1 6.957497 6.957497 8751 +searchabl 1 1 6.957497 6.957497 8752 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html new file mode 100644 index 00000000..7ede2193 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs536-1^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +offic 4 299 1.098612 4.394448 13 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 2 165 1.791759 3.583518 46 +read 1 154 1.791759 1.791759 47 +lectur 2 135 1.945910 3.891820 73 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +compil 2 122 2.079442 4.158884 96 +tool 2 117 2.079442 4.158884 93 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +techniqu 1 99 2.302585 2.302585 138 +grade 2 90 2.397895 4.795790 142 +octob 1 89 2.397895 2.397895 156 +librari 1 87 2.484907 2.484907 181 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 2 50 3.044522 6.089044 373 +appoint 2 49 3.044522 6.089044 358 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +fridai 1 44 3.135494 3.135494 390 +examin 1 42 3.218876 3.218876 424 +late 1 40 3.258097 3.258097 439 +comp 1 26 3.688879 3.688879 650 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +month 1 15 4.248495 4.248495 1025 +psycholog 1 15 4.248495 4.248495 1054 +susan 1 15 4.248495 4.248495 1050 +stori 1 14 4.317488 4.317488 1087 +regularli 1 11 4.553877 4.553877 1338 +wendt 1 10 4.653960 4.653960 1446 +tuth 1 9 4.753590 4.753590 1519 +recit 1 9 4.753590 4.753590 1475 +fischer 1 7 5.010635 5.010635 1893 +horwitz 2 5 5.347108 10.694216 2411 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +ullman 1 4 5.568345 5.568345 2749 +rahul 2 3 5.857933 11.715866 3464 +compilersspr 1 2 6.263398 6.263398 4700 +kapoor 1 2 6.263398 6.263398 4701 +sethi 1 2 6.263398 6.263398 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html new file mode 100644 index 00000000..c9bd5021 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-1^cs537.html @@ -0,0 +1,527 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +page 6 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 11 374 0.693147 7.624617 7 +system 8 443 0.693147 5.545176 6 +work 2 380 0.693147 1.386294 9 +depart 2 457 0.693147 1.386294 12 +inform 2 412 0.693147 1.386294 8 +project 23 340 1.098612 25.268076 18 +cours 6 273 1.098612 6.591672 15 +time 4 293 1.098612 4.394448 17 +us 4 329 1.098612 4.394448 16 +student 3 343 1.098612 3.295836 19 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +languag 9 227 1.386294 12.476646 26 +wisc 4 242 1.386294 5.545176 33 +also 3 259 1.386294 4.158882 28 +email 2 220 1.386294 2.772588 29 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +oper 6 180 1.609438 9.656628 34 +class 6 199 1.609438 9.656628 37 +public 6 202 1.609438 9.656628 43 +updat 3 191 1.609438 4.828314 41 +fall 2 181 1.609438 3.218876 40 +includ 2 208 1.609438 3.218876 42 +list 1 201 1.609438 1.609438 39 +avail 12 169 1.791759 21.501108 48 +implement 8 152 1.791759 14.334072 52 +algorithm 3 162 1.791759 5.375277 57 +hour 2 165 1.791759 3.583518 46 +phone 2 175 1.791759 3.583518 45 +address 2 170 1.791759 3.583518 62 +data 2 170 1.791759 3.583518 49 +read 2 154 1.791759 3.583518 47 +network 1 168 1.791759 1.791759 61 +file 12 132 1.945910 23.350920 70 +process 8 142 1.945910 15.567280 72 +assign 7 135 1.945910 13.621370 66 +lectur 7 135 1.945910 13.621370 73 +note 6 142 1.945910 11.675460 67 +first 6 140 1.945910 11.675460 71 +relat 2 139 1.945910 3.891820 68 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +schedul 10 119 2.079442 20.794420 85 +introduct 8 126 2.079442 16.635536 87 +number 3 130 2.079442 6.238326 97 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +specif 8 106 2.197225 17.577800 106 +manag 7 114 2.197225 15.380575 125 +final 4 116 2.197225 8.788900 108 +place 3 106 2.197225 6.591675 124 +topic 3 114 2.197225 6.591675 110 +code 2 108 2.197225 4.394450 116 +make 2 111 2.197225 4.394450 120 +structur 2 106 2.197225 4.394450 105 +look 1 107 2.197225 2.197225 115 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +memori 7 101 2.302585 16.118095 139 +take 4 97 2.302585 9.210340 134 +text 3 98 2.302585 6.907755 133 +book 2 99 2.302585 4.605170 131 +access 2 102 2.302585 4.605170 136 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +section 7 94 2.397895 16.785265 149 +grade 6 90 2.397895 14.387370 142 +call 3 91 2.397895 7.193685 153 +follow 3 92 2.397895 7.193685 143 +octob 2 89 2.397895 4.795790 156 +comment 1 93 2.397895 2.397895 146 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +exam 7 86 2.484907 17.394349 169 +start 4 83 2.484907 9.939628 173 +help 3 83 2.484907 7.454721 175 +contain 2 81 2.484907 4.969814 174 +second 2 81 2.484907 4.969814 166 +requir 2 81 2.484907 4.969814 167 +environ 2 84 2.484907 4.969814 177 +resourc 2 81 2.484907 4.969814 172 +larg 1 82 2.484907 2.484907 168 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +level 1 87 2.484907 2.484907 180 +librari 1 87 2.484907 2.484907 181 +learn 1 86 2.484907 2.484907 170 +messag 4 76 2.564949 10.259796 212 +refer 4 78 2.564949 10.259796 203 +issu 2 78 2.564949 5.129898 211 +sourc 2 77 2.564949 5.129898 201 +mondai 2 77 2.564949 5.129898 206 +exampl 1 77 2.564949 2.564949 195 +orient 1 80 2.564949 2.564949 205 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +decemb 1 80 2.564949 2.564949 215 +state 1 76 2.564949 2.564949 207 +come 1 78 2.564949 2.564949 202 +summari 4 73 2.639057 10.556228 237 +onlin 3 75 2.639057 7.917171 223 +meet 2 72 2.639057 5.278114 229 +tuesdai 2 73 2.639057 5.278114 219 +addit 2 74 2.639057 5.278114 228 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +materi 1 75 2.639057 2.639057 221 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +java 26 70 2.708050 70.409300 248 +differ 4 66 2.708050 10.832200 253 +receiv 3 66 2.708050 8.124150 244 +thursdai 2 70 2.708050 5.416100 241 +main 2 67 2.708050 5.416100 256 +view 2 70 2.708050 5.416100 254 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +simul 1 66 2.708050 2.708050 255 +knowledg 1 67 2.708050 2.708050 243 +creat 3 63 2.772589 8.317767 277 +wednesdai 3 64 2.772589 8.317767 261 +collect 3 65 2.772589 8.317767 268 +new 2 64 2.772589 5.545178 262 +import 2 65 2.772589 5.545178 282 +virtual 2 62 2.772589 5.545178 285 +copi 1 63 2.772589 2.772589 284 +room 5 59 2.833213 14.166065 301 +type 2 61 2.833213 5.666426 296 +back 2 60 2.833213 5.666426 297 +content 1 59 2.833213 2.833213 302 +share 1 59 2.833213 2.833213 304 +semest 4 58 2.890372 11.561488 312 +point 4 58 2.890372 11.561488 319 +detail 3 57 2.890372 8.671116 321 +unix 3 58 2.890372 8.671116 308 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +think 1 57 2.890372 2.890372 314 +sever 1 56 2.890372 2.890372 322 +reason 1 57 2.890372 2.890372 318 +three 3 54 2.944439 8.833317 330 +processor 3 54 2.944439 8.833317 335 +cover 2 55 2.944439 5.888878 329 +sampl 2 53 2.944439 5.888878 339 +local 2 55 2.944439 5.888878 334 +run 2 51 2.995732 5.991464 347 +date 1 51 2.995732 2.995732 344 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +archiv 1 49 3.044522 3.044522 364 +set 1 50 3.044522 3.044522 361 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +frequent 1 49 3.044522 3.044522 367 +right 1 48 3.044522 3.044522 363 +get 2 46 3.091042 6.182084 380 +move 1 47 3.091042 3.091042 382 +electron 1 47 3.091042 3.091042 379 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +midterm 7 45 3.135494 21.948458 392 +discuss 4 45 3.135494 12.541976 399 +directori 4 45 3.135494 12.541976 396 +answer 2 45 3.135494 6.270988 391 +describ 1 45 3.135494 3.135494 400 +even 1 45 3.135494 3.135494 393 +long 1 43 3.178054 3.178054 413 +show 1 43 3.178054 3.178054 417 +term 1 43 3.178054 3.178054 411 +edit 1 42 3.218876 3.218876 418 +fast 1 42 3.218876 3.218876 429 +howev 1 41 3.218876 3.218876 422 +late 3 40 3.258097 9.774291 439 +error 2 40 3.258097 6.516194 449 +tutori 2 39 3.258097 6.516194 437 +must 2 40 3.258097 6.516194 442 +littl 1 39 3.258097 3.258097 454 +author 1 39 3.258097 3.258097 450 +programm 1 39 3.258097 3.258097 445 +correct 4 38 3.295837 13.183348 462 +seminar 3 38 3.295837 9.887511 470 +credit 1 38 3.295837 3.295837 460 +hand 3 37 3.332205 9.996615 475 +workstat 2 37 3.332205 6.664410 479 +respons 1 37 3.332205 3.332205 476 +feel 1 37 3.332205 3.332205 483 +procedur 1 36 3.367296 3.367296 488 +short 1 36 3.367296 3.367296 499 +copyright 1 36 3.367296 3.367296 495 +post 1 35 3.401197 3.401197 505 +either 1 35 3.401197 3.401197 506 +least 1 35 3.401197 3.401197 516 +jame 1 35 3.401197 3.401197 507 +random 1 34 3.401197 3.401197 511 +concurr 1 34 3.401197 3.401197 501 +manual 1 35 3.401197 3.401197 504 +statist 1 35 3.401197 3.401197 521 +go 1 33 3.433987 3.433987 529 +chapter 4 32 3.465736 13.862944 536 +ad 1 32 3.465736 3.465736 544 +kind 1 32 3.465736 3.465736 541 +storag 1 31 3.496508 3.496508 553 +graph 3 30 3.555348 10.666044 576 +secur 2 30 3.555348 7.110696 577 +specifi 1 30 3.555348 3.555348 568 +option 1 30 3.555348 3.555348 575 +focu 1 30 3.555348 3.555348 571 +synchron 6 29 3.583519 21.501114 588 +built 1 29 3.583519 3.583519 592 +becom 2 28 3.610918 7.221836 603 +intend 1 28 3.610918 3.610918 599 +packag 1 28 3.610918 3.610918 614 +except 1 28 3.610918 3.610918 607 +quit 3 27 3.637586 10.912758 633 +determin 1 27 3.637586 3.637586 630 +arrai 1 27 3.637586 3.637586 627 +comp 2 26 3.688879 7.377758 650 +request 1 26 3.688879 3.688879 635 +session 1 26 3.688879 3.688879 643 +bound 1 26 3.688879 3.688879 659 +detect 1 26 3.688879 3.688879 646 +primari 1 25 3.737670 3.737670 669 +although 1 25 3.737670 3.737670 667 +other 2 24 3.761200 7.522400 697 +thank 4 23 3.806662 15.226648 721 +initi 2 23 3.806662 7.613324 717 +begin 2 23 3.806662 7.613324 716 +input 1 23 3.806662 3.806662 727 +variabl 1 23 3.806662 3.806662 715 +thread 1 23 3.806662 3.806662 722 +togeth 1 23 3.806662 3.806662 714 +disk 4 22 3.850148 15.400592 747 +dai 3 22 3.850148 11.550444 753 +finish 2 22 3.850148 7.700296 748 +sent 1 22 3.850148 3.850148 763 +recommend 1 22 3.850148 3.850148 737 +varieti 1 22 3.850148 3.850148 740 +tent 1 22 3.850148 3.850148 739 +avoid 3 21 3.912023 11.736069 799 +path 2 21 3.912023 7.824046 778 +watch 1 21 3.912023 3.912023 789 +latest 1 21 3.912023 3.912023 785 +theunivers 1 21 3.912023 3.912023 797 +among 1 21 3.912023 3.912023 781 +output 1 21 3.912023 3.912023 788 +programminglanguag 1 21 3.912023 3.912023 782 +entir 1 20 3.951244 3.951244 811 +alloc 1 20 3.951244 3.951244 821 +reserv 1 20 3.951244 3.951244 808 +thur 2 19 4.007333 8.014666 847 +histori 2 19 4.007333 8.014666 853 +definit 1 19 4.007333 4.007333 864 +separ 1 19 4.007333 4.007333 844 +andrew 1 19 4.007333 4.007333 849 +five 1 19 4.007333 4.007333 841 +exercis 1 19 4.007333 4.007333 842 +runtim 1 19 4.007333 4.007333 858 +excel 1 19 4.007333 4.007333 868 +assum 1 19 4.007333 4.007333 845 +lot 1 18 4.060443 4.060443 889 +behavior 1 18 4.060443 4.060443 881 +encourag 1 18 4.060443 4.060443 880 +four 1 18 4.060443 4.060443 905 +sept 17 17 4.110874 69.884858 952 +monitor 3 17 4.110874 12.332622 941 +protect 2 17 4.110874 8.221748 935 +anyon 2 17 4.110874 8.221748 916 +regist 1 17 4.110874 4.110874 938 +weekli 1 17 4.110874 4.110874 919 +segment 1 17 4.110874 4.110874 931 +outlin 1 17 4.110874 4.110874 914 +devic 2 16 4.174387 8.348774 1002 +zhang 1 16 4.174387 4.174387 980 +modern 1 16 4.174387 4.174387 966 +weslei 1 16 4.174387 4.174387 983 +transfer 1 16 4.174387 4.174387 967 +easi 1 16 4.174387 4.174387 969 +choos 1 16 4.174387 4.174387 964 +condit 1 16 4.174387 4.174387 975 +critic 1 16 4.174387 4.174387 982 +later 2 15 4.248495 8.496990 1043 +todd 2 15 4.248495 8.496990 1051 +score 1 15 4.248495 4.248495 1017 +enough 1 15 4.248495 4.248495 1040 +demand 1 14 4.317488 4.317488 1073 +warn 1 14 4.317488 4.317488 1068 +shown 1 14 4.317488 4.317488 1080 +topolog 1 14 4.317488 4.317488 1089 +central 1 13 4.382027 4.382027 1160 +directli 1 13 4.382027 4.382027 1141 +forth 1 13 4.382027 4.382027 1186 +everyth 1 13 4.382027 4.382027 1169 +count 3 12 4.465908 13.397724 1239 +solari 2 12 4.465908 8.931816 1238 +minor 1 12 4.465908 4.465908 1237 +rememb 1 12 4.465908 4.465908 1217 +readi 1 12 4.465908 4.465908 1242 +addison 1 12 4.465908 4.465908 1230 +grow 1 12 4.465908 4.465908 1209 +readabl 1 12 4.465908 4.465908 1258 +buffer 1 12 4.465908 4.465908 1211 +string 4 11 4.553877 18.215508 1340 +fix 2 11 4.553877 9.107754 1327 +tue 1 11 4.553877 4.553877 1308 +regard 1 11 4.553877 4.553877 1309 +extrem 1 11 4.553877 4.553877 1330 +faster 1 11 4.553877 4.553877 1323 +market 1 11 4.553877 4.553877 1361 +placement 2 10 4.653960 9.307920 1420 +strongli 2 10 4.653960 9.307920 1406 +grain 1 10 4.653960 4.653960 1448 +paragraph 1 10 4.653960 4.653960 1449 +hint 1 10 4.653960 4.653960 1419 +cheat 1 10 4.653960 4.653960 1395 +recoveri 2 9 4.753590 9.507180 1474 +familiar 2 9 4.753590 9.507180 1485 +pair 2 9 4.753590 9.507180 1503 +correctli 1 9 4.753590 4.753590 1478 +mention 1 9 4.753590 4.753590 1569 +said 1 9 4.753590 4.753590 1571 +introductori 1 9 4.753590 4.753590 1479 +solomon 5 8 4.875197 24.375985 1716 +star 2 8 4.875197 9.750394 1717 +replac 1 8 4.875197 4.875197 1668 +simpli 1 8 4.875197 4.875197 1626 +rais 1 8 4.875197 4.875197 1711 +partner 1 8 4.875197 4.875197 1648 +crash 1 8 4.875197 4.875197 1616 +switch 1 8 4.875197 4.875197 1718 +gather 1 8 4.875197 4.875197 1719 +peterson 2 7 5.010635 10.021270 1850 +philosoph 2 7 5.010635 10.021270 1904 +bookstor 2 7 5.010635 10.021270 1837 +prevent 2 7 5.010635 10.021270 1827 +bug 1 7 5.010635 5.010635 1801 +slightli 1 7 5.010635 5.010635 1795 +chan 1 7 5.010635 5.010635 1876 +occasion 1 7 5.010635 5.010635 1905 +awar 1 7 5.010635 5.010635 1800 +prentic 1 7 5.010635 5.010635 1838 +spot 1 7 5.010635 5.010635 1894 +fortun 1 7 5.010635 5.010635 1872 +bottom 1 7 5.010635 5.010635 1906 +compact 1 7 5.010635 5.010635 1907 +theproject 3 6 5.164786 15.494358 1981 +sciencesoffic 2 6 5.164786 10.329572 2101 +garbag 2 6 5.164786 10.329572 1986 +notifi 1 6 5.164786 5.164786 2106 +nine 1 6 5.164786 5.164786 2047 +mistak 1 6 5.164786 5.164786 2110 +creation 1 6 5.164786 5.164786 2069 +handi 1 6 5.164786 5.164786 2111 +neither 1 6 5.164786 5.164786 1990 +caus 2 5 5.347108 10.694216 2298 +salt 1 5 5.347108 5.347108 2413 +forprogram 1 5 5.347108 5.347108 2361 +sparcstat 1 5 5.347108 5.347108 2406 +favor 1 5 5.347108 5.347108 2414 +commod 1 5 5.347108 5.347108 2415 +eas 1 5 5.347108 5.347108 2267 +anda 1 5 5.347108 5.347108 2416 +remain 1 5 5.347108 5.347108 2278 +race 1 5 5.347108 5.347108 2417 +deadlock 7 4 5.568345 38.978415 2641 +fork 4 4 5.568345 22.273380 2801 +makefil 3 4 5.568345 16.705035 2662 +popular 2 4 5.568345 11.136690 2802 +cshrc 2 4 5.568345 11.136690 2759 +theprogram 1 4 5.568345 5.568345 2686 +multitask 1 4 5.568345 5.568345 2803 +systemsand 1 4 5.568345 5.568345 2804 +usedto 1 4 5.568345 5.568345 2643 +subsequ 1 4 5.568345 5.568345 2665 +withth 1 4 5.568345 5.568345 2805 +marvin 1 4 5.568345 5.568345 2806 +argument 2 3 5.857933 11.715866 3120 +caught 2 3 5.857933 11.715866 3465 +omit 1 3 5.857933 5.857933 3466 +offset 1 3 5.857933 5.857933 3467 +urgent 1 3 5.857933 5.857933 3316 +listof 1 3 5.857933 5.857933 3322 +sendmail 1 3 5.857933 5.857933 3099 +tanenbaum 1 3 5.857933 5.857933 3397 +dialect 1 3 5.857933 5.857933 3226 +acquaint 1 3 5.857933 5.857933 3468 +subscript 1 3 5.857933 5.857933 3469 +easier 1 3 5.857933 5.857933 3470 +timet 1 3 5.857933 5.857933 3471 +dine 1 3 5.857933 5.857933 3472 +avaiabl 3 2 6.263398 18.790194 4703 +thejava 2 2 6.263398 12.526796 4704 +swap 2 2 6.263398 12.526796 4466 +arnold 2 2 6.263398 12.526796 4705 +semaphor 2 2 6.263398 12.526796 4555 +troffic 1 2 6.263398 6.263398 4706 +mellencamp 1 2 6.263398 6.263398 4707 +mellen 1 2 6.263398 6.263398 4708 +breakdown 1 2 6.263398 6.263398 4407 +typo 1 2 6.263398 6.263398 4180 +tung 1 2 6.263398 6.263398 4709 +preemptiv 1 2 6.263398 6.263398 4319 +colloquia 1 2 6.263398 6.263398 4710 +sciencesand 1 2 6.263398 6.263398 4711 +tutorialth 1 2 6.263398 6.263398 4453 +designedto 1 2 6.263398 6.263398 4712 +havethre 1 2 6.263398 6.263398 4562 +daysof 1 2 6.263398 6.263398 4563 +eachof 1 2 6.263398 6.263398 4564 +congeni 1 2 6.263398 6.263398 4713 +null 1 2 6.263398 6.263398 4714 +mysteri 1 2 6.263398 6.263398 4715 +char 1 2 6.263398 6.263398 4716 +trendi 1 2 6.263398 6.263398 4717 +coursewil 1 2 6.263398 6.263398 4718 +primer 1 2 6.263398 6.263398 4719 +manualfor 1 2 6.263398 6.263398 4720 +yourgrad 1 2 6.263398 6.263398 4121 +terminolog 1 2 6.263398 6.263398 4410 +eduthu 1 2 6.263398 6.263398 4721 +threadschedul 4 1 6.957497 27.829988 8753 +forproject 3 1 6.957497 20.872491 8754 +graphcontain 3 1 6.957497 20.872491 8755 +sched 2 1 6.957497 13.914994 8756 +substr 2 1 6.957497 13.914994 8757 +thejavaprogram 2 1 6.957497 13.914994 8758 +languagebi 2 1 6.957497 13.914994 8759 +gosl 2 1 6.957497 13.914994 8760 +systemssect 1 1 6.957497 6.957497 8761 +instructormarvin 1 1 6.957497 6.957497 8762 +tarob 1 1 6.957497 6.957497 8763 +mwfoffic 1 1 6.957497 6.957497 8764 +distributioni 1 1 6.957497 6.957497 8765 +typograph 1 1 6.957497 6.957497 8766 +importantli 1 1 6.957497 6.957497 8767 +arraywa 1 1 6.957497 6.957497 8768 +isavail 1 1 6.957497 6.957497 8769 +courseus 1 1 6.957497 6.957497 8770 +likelyb 1 1 6.957497 6.957497 8771 +presentedin 1 1 6.957497 6.957497 8772 +givefork 1 1 6.957497 6.957497 8773 +specificationshould 1 1 6.957497 6.957497 8774 +garbl 1 1 6.957497 6.957497 8775 +jake 1 1 6.957497 6.957497 8776 +dawlei 1 1 6.957497 6.957497 8777 +carr 1 1 6.957497 6.957497 8778 +detailssect 1 1 6.957497 6.957497 8779 +lipe 1 1 6.957497 6.957497 8780 +srccontain 1 1 6.957497 6.957497 8781 +javacontain 1 1 6.957497 6.957497 8782 +classgraphdescrib 1 1 6.957497 6.957497 8783 +petersoncycl 1 1 6.957497 6.957497 8784 +notacycl 1 1 6.957497 6.957497 8785 +petersonacycl 1 1 6.957497 6.957497 8786 +acycl 1 1 6.957497 6.957497 8787 +sharingfork 1 1 6.957497 6.957497 8788 +jenner 1 1 6.957497 6.957497 8789 +maxthink 1 1 6.957497 6.957497 8790 +maxeat 1 1 6.957497 6.957497 8791 +versionha 1 1 6.957497 6.957497 8792 +argumenti 1 1 6.957497 6.957497 8793 +charactersin 1 1 6.957497 6.957497 8794 +franco 1 1 6.957497 6.957497 8795 +maketo 1 1 6.957497 6.957497 8796 +compilewithout 1 1 6.957497 6.957497 8797 +computershav 1 1 6.957497 6.957497 8798 +tutoriali 1 1 6.957497 6.957497 8799 +onthread 1 1 6.957497 6.957497 8800 +checkth 1 1 6.957497 6.957497 8801 +ajava 1 1 6.957497 6.957497 8802 +afil 1 1 6.957497 6.957497 8803 +onelin 1 1 6.957497 6.957497 8804 +localor 1 1 6.957497 6.957497 8805 +csmon 1 1 6.957497 6.957497 8806 +cslast 1 1 6.957497 6.957497 8807 +beprocess 1 1 6.957497 6.957497 8808 +replacementalgorithm 1 1 6.957497 6.957497 8809 +statisticsdiscuss 1 1 6.957497 6.957497 8810 +psychologyth 1 1 6.957497 6.957497 8811 +anyquest 1 1 6.957497 6.957497 8812 +thetext 1 1 6.957497 6.957497 8813 +systemsbi 1 1 6.957497 6.957497 8814 +specificationjava 1 1 6.957497 6.957497 8815 +documentationwatch 1 1 6.957497 6.957497 8816 +unixoper 1 1 6.957497 6.957497 8817 +anycomput 1 1 6.957497 6.957497 8818 +requireddata 1 1 6.957497 6.957497 8819 +involveprocess 1 1 6.957497 6.957497 8820 +butyou 1 1 6.957497 6.957497 8821 +vigor 1 1 6.957497 6.957497 8822 +punish 1 1 6.957497 6.957497 8823 +dateind 1 1 6.957497 6.957497 8824 +uniniti 1 1 6.957497 6.957497 8825 +runtimerath 1 1 6.957497 6.957497 8826 +byproduct 1 1 6.957497 6.957497 8827 +withlanguag 1 1 6.957497 6.957497 8828 +alwaysa 1 1 6.957497 6.957497 8829 +disloc 1 1 6.957497 6.957497 8830 +thetransit 1 1 6.957497 6.957497 8831 +amazingli 1 1 6.957497 6.957497 8832 +youalreadi 1 1 6.957497 6.957497 8833 +arefer 1 1 6.957497 6.957497 8834 +manuali 1 1 6.957497 6.957497 8835 +wayfrom 1 1 6.957497 6.957497 8836 +sophisticatedprogram 1 1 6.957497 6.957497 8837 +ofoth 1 1 6.957497 6.957497 8838 +niceonlin 1 1 6.957497 6.957497 8839 +tutorialabout 1 1 6.957497 6.957497 8840 +javaoct 1 1 6.957497 6.957497 8841 +synchronizationoct 1 1 6.957497 6.957497 8842 +schedulingoct 1 1 6.957497 6.957497 8843 +schedulingdec 1 1 6.957497 6.957497 8844 +systemsdec 1 1 6.957497 6.957497 8845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html new file mode 100644 index 00000000..9cb883f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs537-2^cs537.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +system 4 443 0.693147 2.772588 6 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +address 2 170 1.791759 3.583518 62 +distribut 2 162 1.791759 3.583518 51 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +file 2 132 1.945910 3.891820 70 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +introduct 2 126 2.079442 4.158884 87 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +manag 2 114 2.197225 4.394450 125 +instructor 1 108 2.197225 2.197225 107 +memori 2 101 2.302585 4.605170 139 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +solut 1 82 2.484907 2.484907 162 +tuesdai 1 73 2.639057 2.639057 219 +name 1 72 2.639057 2.639057 220 +thursdai 2 70 2.708050 5.416100 241 +java 2 70 2.708050 5.416100 248 +virtual 1 62 2.772589 2.772589 285 +space 1 57 2.890372 2.890372 310 +date 1 51 2.995732 2.995732 344 +discuss 2 45 3.135494 6.270988 399 +fridai 2 44 3.135494 6.270988 390 +textbook 1 44 3.135494 3.135494 397 +cach 1 41 3.218876 3.218876 432 +review 1 42 3.218876 3.218876 425 +procedur 1 36 3.367296 3.367296 488 +concurr 2 34 3.401197 6.802394 501 +survei 1 35 3.401197 3.401197 513 +global 1 34 3.401197 3.401197 520 +chapter 1 32 3.465736 3.465736 536 +secur 1 30 3.555348 3.555348 577 +synchron 1 29 3.583519 3.583519 588 +thread 3 23 3.806662 11.419986 722 +cooper 1 22 3.850148 3.850148 757 +protect 2 17 4.110874 8.221748 935 +monitor 1 17 4.110874 4.110874 941 +quiz 1 16 4.174387 4.174387 990 +pagec 1 15 4.248495 4.248495 1011 +remot 1 15 4.248495 4.248495 1041 +demand 1 14 4.317488 4.317488 1073 +quizz 1 13 4.382027 4.382027 1151 +translat 1 13 4.382027 4.382027 1164 +host 1 11 4.553877 4.553877 1306 +vernon 1 9 4.753590 4.753590 1556 +core 1 7 5.010635 5.010635 1809 +mutual 1 5 5.347108 5.347108 2418 +systemsfal 1 4 5.568345 5.568345 2683 +deadlock 1 4 5.568345 5.568345 2641 +thanksgiv 1 2 6.263398 6.263398 4185 +maryvernon 1 1 6.957497 6.957497 8846 +andkarunamuthiah 1 1 6.957497 6.957497 8847 +beinterchang 1 1 6.957497 6.957497 8848 +archiveapproxim 1 1 6.957497 6.957497 8849 +topicsweek 1 1 6.957497 6.957497 8850 +oftopicsreadingsep 1 1 6.957497 6.957497 8851 +processeschapt 1 1 6.957497 6.957497 8852 +threadschapt 1 1 6.957497 6.957497 8853 +exclusioncont 1 1 6.957497 6.957497 8854 +semaphorescont 1 1 6.957497 6.957497 8855 +summarycont 1 1 6.957497 6.957497 8856 +doct 1 1 6.957497 6.957497 8857 +schedulingchapt 1 1 6.957497 6.957497 8858 +tlbschapter 1 1 6.957497 6.957497 8859 +memorycont 1 1 6.957497 6.957497 8860 +systemschapt 1 1 6.957497 6.957497 8861 +directorieschapt 1 1 6.957497 6.957497 8862 +methodstbanov 1 1 6.957497 6.957497 8863 +reviewchapt 1 1 6.957497 6.957497 8864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html new file mode 100644 index 00000000..96a972bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs552-2^cs552.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +project 5 340 1.098612 5.493060 18 +offic 4 299 1.098612 4.394448 13 +cours 3 273 1.098612 3.295836 15 +time 2 293 1.098612 2.197224 17 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +fall 4 181 1.609438 6.437752 40 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +hour 4 165 1.791759 7.167036 46 +phone 2 175 1.791759 3.583518 45 +assign 11 135 1.945910 21.405010 66 +lectur 1 135 1.945910 1.945910 73 +spring 6 131 2.079442 12.476652 88 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +instructor 2 108 2.197225 4.394450 107 +check 1 115 2.197225 2.197225 118 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +select 3 91 2.397895 7.193685 154 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +exam 4 86 2.484907 9.939628 169 +solut 3 82 2.484907 7.454721 162 +start 2 83 2.484907 4.969814 173 +help 1 83 2.484907 2.484907 175 +decemb 1 80 2.564949 2.564949 215 +tuesdai 4 73 2.639057 10.556228 219 +david 2 71 2.639057 5.278114 232 +thursdai 2 70 2.708050 5.416100 241 +simul 1 66 2.708050 2.708050 255 +handout 2 64 2.772589 5.545178 263 +wednesdai 2 64 2.772589 5.545178 261 +descript 2 64 2.772589 5.545178 271 +previou 1 62 2.772589 2.772589 290 +room 2 59 2.833213 5.666426 301 +locat 1 59 2.833213 2.833213 303 +sampl 1 53 2.944439 2.944439 339 +appoint 2 49 3.044522 6.089044 358 +get 2 46 3.091042 6.182084 380 +midterm 8 45 3.135494 25.083952 392 +answer 4 45 3.135494 12.541976 391 +error 1 40 3.258097 3.258097 449 +correct 1 38 3.295837 3.295837 462 +demonstr 1 24 3.761200 3.761200 694 +wood 1 11 4.553877 4.553877 1355 +deadlin 1 9 4.753590 4.753590 1502 +phil 1 5 5.347108 5.347108 2419 +mentor 1 4 5.568345 5.568345 2591 +atkinson 2 2 6.263398 12.526796 4722 +vhdl 2 1 6.957497 13.914994 8865 +mentorassign 1 1 6.957497 6.957497 8866 +projectthi 1 1 6.957497 6.957497 8867 +examsth 1 1 6.957497 6.957497 8868 +endterm 1 1 6.957497 6.957497 8869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html new file mode 100644 index 00000000..3e78f1cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-1^cs564.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +offic 6 299 1.098612 6.591672 13 +last 6 314 1.098612 6.591672 14 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +mail 3 238 1.386294 4.158882 22 +design 2 213 1.386294 2.772588 25 +languag 1 227 1.386294 1.386294 26 +updat 3 191 1.609438 4.828314 41 +class 2 199 1.609438 3.218876 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +hour 4 165 1.791759 7.167036 46 +phone 2 175 1.791759 3.583518 45 +assign 11 135 1.945910 21.405010 66 +year 2 148 1.945910 3.891820 84 +lectur 2 135 1.945910 3.891820 73 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +postscript 8 131 2.079442 16.635536 90 +databas 2 122 2.079442 4.158884 86 +manag 2 114 2.197225 4.394450 125 +version 1 113 2.197225 2.197225 122 +pleas 1 113 2.197225 2.197225 114 +topic 1 114 2.197225 2.197225 110 +check 1 115 2.197225 2.197225 118 +code 1 108 2.197225 2.197225 116 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +grade 2 90 2.397895 4.795790 142 +info 4 85 2.484907 9.939628 176 +solut 2 82 2.484907 4.969814 162 +chang 1 82 2.484907 2.484907 163 +help 1 83 2.484907 2.484907 175 +resourc 1 81 2.484907 2.484907 172 +issu 1 78 2.564949 2.564949 211 +html 2 75 2.639057 5.278114 235 +handout 6 64 2.772589 16.635534 263 +import 2 65 2.772589 5.545178 282 +polici 1 64 2.772589 2.772589 279 +experi 1 64 2.772589 2.772589 283 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +fridai 2 44 3.135494 6.270988 390 +midterm 2 45 3.135494 6.270988 392 +discuss 1 45 3.135494 3.135494 399 +tutori 1 39 3.258097 3.258097 437 +tree 2 36 3.367296 6.734592 492 +print 1 34 3.401197 3.401197 503 +chapter 2 32 3.465736 6.931472 536 +yahoo 1 24 3.761200 3.761200 707 +instead 1 22 3.850148 3.850148 756 +exercis 2 19 4.007333 8.014666 842 +prerequisit 1 19 4.007333 4.007333 846 +thur 1 19 4.007333 4.007333 847 +sept 1 17 4.110874 4.110874 952 +ramakrishnan 1 16 4.174387 4.174387 972 +convent 1 14 4.317488 4.317488 1072 +raghu 2 12 4.465908 8.931816 1212 +tue 1 11 4.553877 4.553877 1308 +debugg 1 9 4.753590 4.753590 1493 +entri 1 8 4.875197 4.875197 1678 +minibas 1 4 5.568345 5.568345 2608 +dont 1 3 5.857933 5.857933 3473 +sybas 2 2 6.263398 12.526796 4723 +xbao 2 1 6.957497 13.914994 8870 +implementationc 1 1 6.957497 6.957497 8871 +implementationcours 1 1 6.957497 6.957497 8872 +assignmentoth 1 1 6.957497 6.957497 8873 +ingraham 1 1 6.957497 6.957497 8874 +xuemei 1 1 6.957497 6.957497 8875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html new file mode 100644 index 00000000..f365748a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs564-2^cs564.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +lectur 7 135 1.945910 13.621370 73 +assign 2 135 1.945910 3.891820 66 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +place 2 106 2.197225 4.394450 124 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +exam 1 86 2.484907 2.484907 169 +know 2 80 2.564949 5.129898 198 +meet 1 72 2.639057 2.639057 229 +import 1 65 2.772589 2.772589 282 +wednesdai 1 64 2.772589 2.772589 261 +semest 1 58 2.890372 2.890372 312 +particular 1 51 2.995732 2.995732 352 +still 1 50 3.044522 3.044522 362 +discuss 3 45 3.135494 9.406482 399 +fridai 1 44 3.135494 3.135494 390 +probabl 1 40 3.258097 3.258097 455 +close 1 38 3.295837 3.295837 465 +taught 1 33 3.433987 3.433987 526 +option 2 30 3.555348 7.110696 575 +progress 1 28 3.610918 3.610918 598 +jeff 1 25 3.737670 3.737670 673 +lab 1 24 3.761200 3.761200 698 +cooper 1 22 3.850148 3.850148 757 +fact 1 21 3.912023 3.912023 780 +psycholog 2 15 4.248495 8.496990 1054 +naughton 1 10 4.653960 4.653960 1450 +russel 2 9 4.753590 9.507180 1507 +minibas 1 4 5.568345 5.568345 2608 +obvious 1 3 5.857933 5.857933 3474 +addinginform 1 1 6.957497 6.957497 8876 +meetingroom 1 1 6.957497 6.957497 8877 +labsfor 1 1 6.957497 6.957497 8878 +beenmov 1 1 6.957497 6.957497 8879 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html new file mode 100644 index 00000000..4214c516 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs577-1^cs577.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 1 412 0.693147 0.693147 8 +cours 4 273 1.098612 4.394448 15 +offic 3 299 1.098612 3.295836 13 +mail 4 238 1.386294 5.545176 22 +wisc 3 242 1.386294 4.158882 33 +phone 3 175 1.791759 5.375277 45 +hour 3 165 1.791759 5.375277 46 +algorithm 2 162 1.791759 3.583518 57 +introduct 2 126 2.079442 4.158884 87 +teach 2 108 2.197225 4.394450 112 +assist 2 112 2.197225 4.394450 113 +instructor 1 108 2.197225 2.197225 107 +book 1 99 2.302585 2.302585 131 +exam 1 86 2.484907 2.484907 169 +solut 1 82 2.484907 2.484907 162 +homework 6 79 2.564949 15.389694 193 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +descript 1 64 2.772589 2.772589 271 +organ 1 65 2.772589 2.772589 265 +archiv 1 49 3.044522 3.044522 364 +midterm 1 45 3.135494 3.135494 392 +graph 1 30 3.555348 3.555348 576 +reserv 1 20 3.951244 3.951244 808 +eric 1 19 4.007333 4.007333 870 +bill 1 11 4.553877 4.553877 1297 +appt 1 5 5.347108 5.347108 2312 +bach 2 4 5.568345 11.136690 2708 +fractal 1 3 5.857933 5.857933 3475 +behaviour 1 2 6.263398 6.263398 4724 +raji 2 1 6.957497 13.914994 8880 +donaldson 1 1 6.957497 6.957497 8881 +gopalakrishnan 1 1 6.957497 6.957497 8882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html new file mode 100644 index 00000000..239255cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs640-1^cs640.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +program 4 374 0.693147 2.772588 7 +inform 1 412 0.693147 0.693147 8 +cours 6 273 1.098612 6.591672 15 +project 5 340 1.098612 5.493060 18 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +email 5 220 1.386294 6.931470 29 +wisc 5 242 1.386294 6.931470 33 +mail 3 238 1.386294 4.158882 22 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +fall 5 181 1.609438 8.047190 40 +class 1 199 1.609438 1.609438 37 +network 8 168 1.791759 14.334072 61 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +implement 1 152 1.791759 1.791759 52 +assign 4 135 1.945910 7.783640 66 +lectur 2 135 1.945910 3.891820 73 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +postscript 4 131 2.079442 8.317768 90 +document 2 121 2.079442 4.158884 89 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +teach 3 108 2.197225 6.591675 112 +assist 3 112 2.197225 6.591675 113 +version 2 113 2.197225 4.394450 122 +intern 1 108 2.197225 2.197225 128 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +advanc 2 99 2.302585 4.605170 130 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +select 1 91 2.397895 2.397895 154 +pictur 1 89 2.397895 2.397895 160 +grade 1 90 2.397895 2.397895 142 +exam 2 86 2.484907 4.969814 169 +refer 3 78 2.564949 7.694847 203 +interfac 2 79 2.564949 5.129898 209 +mondai 1 77 2.564949 2.564949 206 +complet 1 77 2.564949 2.564949 208 +free 2 73 2.639057 5.278114 224 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +syllabu 2 67 2.708050 5.416100 247 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +evalu 1 64 2.772589 2.772589 266 +content 1 59 2.833213 2.833213 302 +overview 1 56 2.890372 2.890372 323 +unix 1 58 2.890372 2.890372 308 +tabl 1 51 2.995732 2.995732 346 +archiv 3 49 3.044522 9.133566 364 +appoint 2 49 3.044522 6.089044 358 +adapt 1 46 3.091042 3.091042 387 +midterm 3 45 3.135494 9.406482 392 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +review 1 42 3.218876 3.218876 425 +form 2 39 3.258097 6.516194 443 +error 1 40 3.258097 3.258097 449 +slide 3 38 3.295837 9.887511 467 +feel 2 37 3.332205 6.664410 483 +connect 1 37 3.332205 3.332205 485 +eduoffic 3 33 3.433987 10.301961 531 +richard 1 31 3.496508 3.496508 559 +option 1 30 3.555348 3.555348 575 +packag 1 28 3.610918 3.610918 614 +comp 2 26 3.688879 7.377758 650 +reliabl 1 25 3.737670 3.737670 674 +latest 1 21 3.912023 3.912023 785 +annot 1 21 3.912023 3.912023 775 +partial 1 18 4.060443 4.060443 900 +layer 2 17 4.110874 8.221748 926 +steven 1 17 4.110874 4.110874 953 +warn 1 14 4.317488 4.317488 1068 +prior 1 10 4.653960 4.653960 1438 +criteria 1 9 4.753590 4.753590 1477 +lawrenc 1 7 5.010635 5.010635 1908 +prentic 1 7 5.010635 5.010635 1838 +isbn 1 7 5.010635 5.010635 1901 +conveni 2 6 5.164786 10.329572 2088 +moder 1 6 5.164786 5.164786 2112 +landweb 1 3 5.857933 5.857933 3402 +hereto 1 3 5.857933 5.857933 3476 +gradingmidterm 1 3 5.857933 5.857933 3230 +socket 2 2 6.263398 12.526796 4725 +statphon 1 2 6.263398 6.263398 4726 +ipng 1 2 6.263398 6.263398 4727 +powerpoint 2 1 6.957497 13.914994 8883 +networksintroduct 1 1 6.957497 6.957497 8884 +readingsclick 1 1 6.957497 6.957497 8885 +networkingcours 1 1 6.957497 6.957497 8886 +madisoncours 1 1 6.957497 6.957497 8887 +informationlecturetim 1 1 6.957497 6.957497 8888 +mwfplace 1 1 6.957497 6.957497 8889 +statclass 1 1 6.957497 6.957497 8890 +listinstructor 1 1 6.957497 6.957497 8891 +landweberoffic 1 1 6.957497 6.957497 8892 +srinivasa 1 1 6.957497 6.957497 8893 +narayananoffic 1 1 6.957497 6.957497 8894 +teitelbaumoffic 1 1 6.957497 6.957497 8895 +naemail 1 1 6.957497 6.957497 8896 +garbler 1 1 6.957497 6.957497 8897 +bibliographyread 1 1 6.957497 6.957497 8898 +icmp 1 1 6.957497 6.957497 8899 +ospf 1 1 6.957497 6.957497 8900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html new file mode 100644 index 00000000..251b1c25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs720-1^cs720.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +wisc 1 242 1.386294 1.386294 33 +fall 2 181 1.609438 3.218876 40 +hour 1 165 1.791759 1.791759 46 +note 2 142 1.945910 3.891820 67 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +part 2 98 2.302585 4.605170 129 +solut 2 82 2.484907 4.969814 162 +homework 4 79 2.564949 10.259796 193 +descript 1 64 2.772589 2.772589 271 +robert 1 30 3.555348 3.555348 567 +option 1 30 3.555348 3.555348 575 +comp 1 26 3.688879 3.688879 650 +meyer 1 2 6.263398 6.263398 4728 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ new file mode 100644 index 00000000..e244bdba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 6 374 0.693147 4.158882 7 +inform 1 412 0.693147 0.693147 8 +cours 5 273 1.098612 5.493060 15 +offic 4 299 1.098612 4.394448 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +wisc 4 242 1.386294 5.545176 33 +mail 3 238 1.386294 4.158882 22 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +algorithm 1 162 1.791759 1.791759 57 +assign 4 135 1.945910 7.783640 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 2 111 2.197225 4.394450 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 2 98 2.302585 4.605170 133 +book 1 99 2.302585 2.302585 131 +grade 4 90 2.397895 9.591580 142 +second 2 81 2.484907 4.969814 166 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +homework 6 79 2.564949 15.389694 193 +optim 4 79 2.564949 10.259796 197 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +order 2 69 2.708050 5.416100 249 +thursdai 1 70 2.708050 2.708050 241 +function 3 62 2.772589 8.317767 275 +wednesdai 2 64 2.772589 5.545178 261 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +semest 1 58 2.890372 2.890372 312 +scientif 1 53 2.944439 2.944439 341 +week 1 52 2.995732 2.995732 343 +telephon 2 50 3.044522 6.089044 373 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +examin 2 42 3.218876 6.437752 424 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +linear 1 41 3.218876 3.218876 431 +michael 1 35 3.401197 3.401197 514 +altern 1 26 3.688879 3.688879 641 +period 1 22 3.850148 3.850148 743 +siam 1 21 3.912023 3.912023 800 +theorem 1 21 3.912023 3.912023 786 +reserv 1 20 3.951244 3.951244 808 +differenti 4 17 4.110874 16.443496 921 +stat 1 17 4.110874 4.110874 924 +condit 1 16 4.174387 4.174387 975 +nonlinear 5 14 4.317488 21.587440 1107 +philadelphia 1 12 4.465908 4.465908 1244 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +criteria 3 9 4.753590 14.260770 1477 +mangasarian 1 9 4.753590 4.753590 1570 +exact 1 9 4.753590 4.753590 1509 +kurt 1 9 4.753590 4.753590 1548 +ferri 2 8 4.875197 9.750394 1715 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +augment 1 5 5.347108 5.347108 2350 +convex 4 4 5.568345 22.273380 2807 +concav 2 4 5.568345 11.136690 2808 +wilei 1 4 5.568345 5.568345 2669 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +gradient 1 3 5.857933 5.857933 3479 +applicationsfal 1 2 6.263398 6.263398 4729 +bazaraa 1 2 6.263398 6.263398 4730 +sherali 1 2 6.263398 6.263398 4731 +shetti 1 2 6.263398 6.263398 4732 +athena 1 2 6.263398 6.263398 4733 +saddlepoint 1 2 6.263398 6.263398 4734 +dualiti 1 2 6.263398 6.263398 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html new file mode 100644 index 00000000..e244bdba --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs726-1^cs726.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 6 374 0.693147 4.158882 7 +inform 1 412 0.693147 0.693147 8 +cours 5 273 1.098612 5.493060 15 +offic 4 299 1.098612 4.394448 13 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +wisc 4 242 1.386294 5.545176 33 +mail 3 238 1.386294 4.158882 22 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +algorithm 1 162 1.791759 1.791759 57 +assign 4 135 1.945910 7.783640 66 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 2 111 2.197225 4.394450 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 2 98 2.302585 4.605170 133 +book 1 99 2.302585 2.302585 131 +grade 4 90 2.397895 9.591580 142 +second 2 81 2.484907 4.969814 166 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +homework 6 79 2.564949 15.389694 193 +optim 4 79 2.564949 10.259796 197 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +order 2 69 2.708050 5.416100 249 +thursdai 1 70 2.708050 2.708050 241 +function 3 62 2.772589 8.317767 275 +wednesdai 2 64 2.772589 5.545178 261 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +semest 1 58 2.890372 2.890372 312 +scientif 1 53 2.944439 2.944439 341 +week 1 52 2.995732 2.995732 343 +telephon 2 50 3.044522 6.089044 373 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +midterm 1 45 3.135494 3.135494 392 +examin 2 42 3.218876 6.437752 424 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +linear 1 41 3.218876 3.218876 431 +michael 1 35 3.401197 3.401197 514 +altern 1 26 3.688879 3.688879 641 +period 1 22 3.850148 3.850148 743 +siam 1 21 3.912023 3.912023 800 +theorem 1 21 3.912023 3.912023 786 +reserv 1 20 3.951244 3.951244 808 +differenti 4 17 4.110874 16.443496 921 +stat 1 17 4.110874 4.110874 924 +condit 1 16 4.174387 4.174387 975 +nonlinear 5 14 4.317488 21.587440 1107 +philadelphia 1 12 4.465908 4.465908 1244 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +criteria 3 9 4.753590 14.260770 1477 +mangasarian 1 9 4.753590 4.753590 1570 +exact 1 9 4.753590 4.753590 1509 +kurt 1 9 4.753590 4.753590 1548 +ferri 2 8 4.875197 9.750394 1715 +olvi 1 6 5.164786 5.164786 2109 +inequ 1 6 5.164786 5.164786 2113 +augment 1 5 5.347108 5.347108 2350 +convex 4 4 5.568345 22.273380 2807 +concav 2 4 5.568345 11.136690 2808 +wilei 1 4 5.568345 5.568345 2669 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +gradient 1 3 5.857933 5.857933 3479 +applicationsfal 1 2 6.263398 6.263398 4729 +bazaraa 1 2 6.263398 6.263398 4730 +sherali 1 2 6.263398 6.263398 4731 +shetti 1 2 6.263398 6.263398 4732 +athena 1 2 6.263398 6.263398 4733 +saddlepoint 1 2 6.263398 6.263398 4734 +dualiti 1 2 6.263398 6.263398 4735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html new file mode 100644 index 00000000..8c963fc7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs737-1^cs737.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +wisc 3 242 1.386294 4.158882 33 +softwar 2 220 1.386294 2.772588 30 +mail 2 238 1.386294 2.772588 22 +public 1 202 1.609438 1.609438 43 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +avail 1 169 1.791759 1.791759 48 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +professor 1 137 1.945910 1.945910 76 +postscript 3 131 2.079442 6.238326 90 +pleas 2 113 2.197225 4.394450 114 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +text 2 98 2.302585 4.605170 133 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +comment 1 93 2.397895 2.397895 146 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +exampl 1 77 2.564949 2.564949 195 +html 4 75 2.639057 10.556228 235 +onlin 1 75 2.639057 2.639057 223 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +instruct 1 53 2.944439 2.944439 332 +suggest 1 53 2.944439 2.944439 331 +tutori 1 39 3.258097 3.258097 437 +manual 1 35 3.401197 3.401197 504 +print 1 34 3.401197 3.401197 503 +least 1 35 3.401197 3.401197 516 +initi 1 23 3.806662 3.806662 717 +half 1 21 3.912023 3.912023 776 +sept 2 17 4.110874 8.221748 952 +livni 1 15 4.248495 4.248495 1053 +miron 2 14 4.317488 8.634976 1110 +devis 1 10 4.653960 4.653960 1451 +chan 1 7 5.010635 5.010635 1876 +yong 1 4 5.568345 5.568345 2809 +chee 1 3 5.857933 5.857933 3480 +mimic 3 2 6.263398 18.790194 4736 +cychan 2 2 6.263398 12.526796 4737 +qnet 1 1 6.957497 6.957497 8901 +devc 1 1 6.957497 6.957497 8902 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html new file mode 100644 index 00000000..c58bdef4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~cs838-2^cs838.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +student 3 343 1.098612 3.295836 19 +cours 2 273 1.098612 2.197224 15 +last 1 314 1.098612 1.098612 14 +email 2 220 1.386294 2.772588 29 +design 1 213 1.386294 1.386294 25 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +lectur 1 135 1.945910 1.945910 73 +relat 1 139 1.945910 1.945910 68 +assign 1 135 1.945910 1.945910 66 +postscript 3 131 2.079442 6.238326 90 +technolog 1 131 2.079442 2.079442 102 +schedul 1 119 2.079442 2.079442 85 +find 2 111 2.197225 4.394450 111 +world 1 115 2.197225 2.197225 126 +topic 1 114 2.197225 2.197225 110 +part 2 98 2.302585 4.605170 129 +techniqu 1 99 2.302585 2.302585 138 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +resourc 2 81 2.484907 4.969814 172 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +complet 1 77 2.564949 2.564949 208 +knowledg 1 67 2.708050 2.708050 243 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +room 1 59 2.833213 2.833213 301 +overview 2 56 2.890372 5.780744 323 +major 1 56 2.890372 2.890372 315 +suggest 1 53 2.944439 2.944439 331 +much 1 52 2.995732 2.995732 349 +understand 1 47 3.091042 3.091042 384 +represent 1 35 3.401197 3.401197 512 +taken 1 31 3.496508 3.496508 555 +consid 1 29 3.583519 3.583519 590 +univ 1 28 3.610918 3.610918 617 +retriev 2 27 3.637586 7.275172 621 +tent 1 22 3.850148 3.850148 739 +minut 1 20 3.951244 3.951244 810 +thur 1 19 4.007333 4.007333 847 +seek 1 17 4.110874 4.110874 954 +sept 1 17 4.110874 4.110874 952 +onth 1 12 4.465908 4.465908 1218 +excit 1 11 4.553877 4.553877 1329 +underli 1 10 4.653960 4.653960 1410 +occur 1 9 4.753590 4.753590 1572 +compos 1 9 4.753590 4.753590 1527 +digest 1 7 5.010635 5.010635 1864 +machinelearn 1 6 5.164786 5.164786 2084 +proce 1 6 5.164786 5.164786 2114 +polit 1 6 5.164786 5.164786 2115 +anda 1 5 5.347108 5.347108 2416 +aboutth 1 4 5.568345 5.568345 2720 +thesear 1 3 5.857933 5.857933 3456 +uwisc 1 2 6.263398 6.263398 4738 +belew 1 2 6.263398 6.263398 4739 +knowledgerichard 1 1 6.957497 6.957497 8903 +belewvisit 1 1 6.957497 6.957497 8904 +professorc 1 1 6.957497 6.957497 8905 +departmentfal 1 1 6.957497 6.957497 8906 +acal 1 1 6.957497 6.957497 8907 +engrthi 1 1 6.957497 6.957497 8908 +coures 1 1 6.957497 6.957497 8909 +canse 1 1 6.957497 6.957497 8910 +asyllabu 1 1 6.957497 6.957497 8911 +mapof 1 1 6.957497 6.957497 8912 +semesterwil 1 1 6.957497 6.957497 8913 +infidel 1 1 6.957497 6.957497 8914 +hypermai 1 1 6.957497 6.957497 8915 +classrel 1 1 6.957497 6.957497 8916 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html new file mode 100644 index 00000000..d9449def --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs412.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 3 343 1.098612 3.295836 19 +last 2 314 1.098612 2.197224 14 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +also 3 259 1.386294 4.158882 28 +email 2 220 1.386294 2.772588 29 +class 4 199 1.609438 6.437752 37 +list 3 201 1.609438 4.828314 39 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +assign 4 135 1.945910 7.783640 66 +note 2 142 1.945910 3.891820 67 +problem 2 147 1.945910 3.891820 75 +file 1 132 1.945910 1.945910 70 +relat 1 139 1.945910 1.945910 68 +introduct 3 126 2.079442 6.238326 87 +machin 1 129 2.079442 2.079442 95 +well 3 109 2.197225 6.591675 121 +look 2 107 2.197225 4.394450 115 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +book 3 99 2.302585 6.907755 131 +access 2 102 2.302585 4.605170 136 +text 1 98 2.302585 2.302585 133 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +question 3 91 2.397895 7.193685 141 +sinc 1 90 2.397895 2.397895 159 +present 1 91 2.397895 2.397895 145 +grade 1 90 2.397895 2.397895 142 +chang 8 82 2.484907 19.879256 163 +contain 1 81 2.484907 2.484907 174 +solut 1 82 2.484907 2.484907 162 +method 1 80 2.564949 2.564949 213 +orient 1 80 2.564949 2.564949 205 +good 1 77 2.564949 2.564949 200 +materi 3 75 2.639057 7.917171 221 +addit 2 74 2.639057 5.278114 228 +name 2 72 2.639057 5.278114 220 +syllabu 2 67 2.708050 5.416100 247 +order 2 69 2.708050 5.416100 249 +complex 1 64 2.772589 2.772589 269 +organ 1 65 2.772589 2.772589 265 +plan 1 65 2.772589 2.772589 272 +handout 1 64 2.772589 2.772589 263 +simpl 1 60 2.833213 2.833213 298 +unix 2 58 2.890372 5.780744 308 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +numer 2 49 3.044522 6.089044 369 +frequent 1 49 3.044522 3.044522 367 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +edit 2 42 3.218876 6.437752 418 +might 1 41 3.218876 3.218876 426 +error 1 40 3.258097 3.258097 449 +tutori 1 39 3.258097 3.258097 437 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +approxim 1 35 3.401197 3.401197 509 +word 1 34 3.401197 3.401197 508 +chapter 1 32 3.465736 3.465736 536 +ask 1 28 3.610918 3.610918 597 +mind 1 27 3.637586 3.637586 632 +though 1 27 3.637586 3.637586 622 +session 2 26 3.688879 7.377758 643 +subject 1 26 3.688879 3.688879 647 +rather 1 26 3.688879 3.688879 642 +concern 2 25 3.737670 7.475340 666 +todai 1 25 3.737670 3.737670 672 +sometim 1 24 3.761200 3.761200 696 +wish 1 24 3.761200 3.761200 692 +begin 2 23 3.806662 7.613324 716 +tent 2 22 3.850148 7.700296 739 +cooper 1 22 3.850148 3.850148 757 +lower 1 18 4.060443 4.060443 886 +four 1 18 4.060443 4.060443 905 +condit 1 16 4.174387 4.174387 975 +score 2 15 4.248495 8.496990 1017 +carl 1 15 4.248495 4.248495 1024 +fortran 1 15 4.248495 4.248495 1027 +matlab 6 14 4.317488 25.904928 1081 +squar 1 14 4.317488 4.317488 1082 +doit 1 14 4.317488 4.317488 1111 +conduct 1 14 4.317488 4.317488 1065 +total 1 10 4.653960 4.653960 1398 +errata 1 10 4.653960 4.653960 1403 +mention 2 9 4.753590 9.507180 1569 +smile 1 7 5.010635 5.010635 1807 +slightli 1 7 5.010635 5.010635 1795 +awar 1 7 5.010635 5.010635 1800 +supplement 1 5 5.347108 5.347108 2355 +rick 1 4 5.568345 5.568345 2646 +areavail 1 4 5.568345 5.568345 2810 +andp 1 4 5.568345 5.568345 2811 +preprint 1 3 5.857933 5.857933 3481 +diari 4 2 6.263398 25.053592 4740 +residu 1 2 6.263398 6.263398 4741 +kermit 1 2 6.263398 6.263398 4742 +primer 1 2 6.263398 6.263398 4719 +overviewcours 1 2 6.263398 6.263398 4399 +linksyou 1 2 6.263398 6.263398 4743 +csdepart 1 2 6.263398 6.263398 4130 +telnet 2 1 6.957497 13.914994 8917 +methodsthi 1 1 6.957497 6.957497 8918 +orderli 1 1 6.957497 6.957497 8919 +assignmentson 1 1 6.957497 6.957497 8920 +numericalanalysi 1 1 6.957497 6.957497 8921 +foremostmathematician 1 1 6.957497 6.957497 8922 +trickytop 1 1 6.957497 6.957497 8923 +textmai 1 1 6.957497 6.957497 8924 +byaddit 1 1 6.957497 6.957497 8925 +capitallett 1 1 6.957497 6.957497 8926 +caselett 1 1 6.957497 6.957497 8927 +sigmon 1 1 6.957497 6.957497 8928 +reaction 1 1 6.957497 6.957497 8929 +winor 1 1 6.957497 6.957497 8930 +referenceviva 1 1 6.957497 6.957497 8931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html new file mode 100644 index 00000000..00bae96c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^cs717.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +email 2 220 1.386294 2.772588 29 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +fall 1 181 1.609438 1.609438 40 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +hour 1 165 1.791759 1.791759 46 +relat 2 139 1.945910 3.891820 68 +note 1 142 1.945910 1.945910 67 +assign 1 135 1.945910 1.945910 66 +analysi 1 124 2.079442 2.079442 98 +version 1 113 2.197225 2.197225 122 +well 1 109 2.197225 2.197225 121 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +question 1 91 2.397895 2.397895 141 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +homework 1 79 2.564949 2.564949 193 +line 1 75 2.639057 2.639057 231 +function 2 62 2.772589 5.545178 275 +copi 1 63 2.772589 2.772589 284 +locat 1 59 2.833213 2.833213 303 +index 1 56 2.890372 2.890372 309 +explor 1 58 2.890372 2.890372 324 +numer 2 49 3.044522 6.089044 369 +math 1 44 3.135494 3.135494 402 +directori 1 45 3.135494 3.135494 396 +might 1 41 3.218876 3.218876 426 +announc 1 40 3.258097 3.258097 441 +post 1 35 3.401197 3.401197 505 +hard 1 30 3.555348 3.555348 563 +concern 1 25 3.737670 3.737670 666 +wish 1 24 3.761200 3.761200 692 +stat 1 17 4.110874 4.110874 924 +carl 1 15 4.248495 4.248495 1024 +doit 1 14 4.317488 4.317488 1111 +none 1 7 5.010635 5.010635 1811 +boor 1 3 5.857933 5.857933 3482 +deboor 1 2 6.263398 6.263398 4744 +linksyou 1 2 6.263398 6.263398 4743 +analysisthi 1 1 6.957497 6.957497 8932 +statlectur 1 1 6.957497 6.957497 8933 +classnot 1 1 6.957497 6.957497 8934 +viii 1 1 6.957497 6.957497 8935 +courseoff 1 1 6.957497 6.957497 8936 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html new file mode 100644 index 00000000..0d2a1631 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~deboor^ma887.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +cours 2 273 1.098612 2.197224 15 +last 1 314 1.098612 1.098612 14 +note 1 142 1.945910 1.945910 67 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +version 1 113 2.197225 2.197225 122 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +math 1 44 3.135494 3.135494 402 +approxim 2 35 3.401197 6.802394 509 +theorythi 1 1 6.957497 6.957497 8937 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html new file mode 100644 index 00000000..ddb56446 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dsilva^cs110.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 6 374 0.693147 4.158882 7 +inform 3 412 0.693147 2.079441 8 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 2 126 2.079442 4.158884 87 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +section 1 94 2.397895 2.397895 149 +grade 1 90 2.397895 2.397895 142 +solut 3 82 2.484907 7.454721 162 +academ 1 82 2.484907 2.484907 178 +tuesdai 3 73 2.639057 7.917171 219 +solv 1 73 2.639057 2.639057 234 +window 2 68 2.708050 5.416100 242 +thursdai 2 70 2.708050 5.416100 241 +syllabu 1 67 2.708050 2.708050 247 +handout 2 64 2.772589 5.545178 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +semest 1 58 2.890372 2.890372 312 +appoint 1 49 3.044522 3.044522 358 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +statist 2 35 3.401197 6.802394 521 +specifi 1 30 3.555348 3.555348 568 +comp 1 26 3.688879 3.688879 650 +tent 1 22 3.850148 3.850148 739 +sept 2 17 4.110874 8.221748 952 +walter 1 17 4.110874 4.110874 950 +stat 1 17 4.110874 4.110874 924 +quiz 2 16 4.174387 8.348774 990 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +quizz 1 13 4.382027 4.382027 1151 +savitch 2 12 4.465908 8.931816 1269 +criteria 1 9 4.753590 4.753590 1477 +noland 1 5 5.347108 5.347108 2420 +anthoni 1 4 5.568345 5.568345 2792 +toni 2 3 5.857933 11.715866 3415 +textbookproblem 1 3 5.857933 5.857933 3483 +timet 1 3 5.857933 5.857933 3471 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +silva 3 2 6.263398 18.790194 4586 +chamberlin 1 2 6.263398 6.263398 4745 +dsilva 2 1 6.957497 13.914994 8938 +sectioncsm 1 1 6.957497 6.957497 8939 +firstdai 1 1 6.957497 6.957497 8940 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html new file mode 100644 index 00000000..e8f961ad --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dyer^cs766.html @@ -0,0 +1,433 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +univers 5 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +program 4 374 0.693147 2.772588 7 +system 3 443 0.693147 2.079441 6 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +us 10 329 1.098612 10.986120 16 +project 8 340 1.098612 8.788896 18 +cours 6 273 1.098612 6.591672 15 +student 6 343 1.098612 6.591672 19 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +email 5 220 1.386294 6.931470 29 +wisc 3 242 1.386294 4.158882 33 +also 3 259 1.386294 4.158882 28 +softwar 3 220 1.386294 4.158882 30 +gener 2 220 1.386294 2.772588 27 +link 2 247 1.386294 2.772588 24 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +paper 10 205 1.609438 16.094380 38 +includ 4 208 1.609438 6.437752 42 +class 4 199 1.609438 6.437752 37 +public 2 202 1.609438 3.218876 43 +list 2 201 1.609438 3.218876 39 +modifi 1 178 1.609438 1.609438 35 +oper 1 180 1.609438 1.609438 34 +read 8 154 1.791759 14.334072 47 +algorithm 6 162 1.791759 10.750554 57 +avail 5 169 1.791759 8.958795 48 +base 3 165 1.791759 5.375277 50 +hour 2 165 1.791759 3.583518 46 +applic 2 170 1.791759 3.583518 56 +parallel 2 169 1.791759 3.583518 60 +develop 1 174 1.791759 1.791759 53 +process 9 142 1.945910 17.513190 72 +assign 5 135 1.945910 9.729550 66 +note 3 142 1.945910 5.837730 67 +model 2 145 1.945910 3.891820 69 +file 2 132 1.945910 3.891820 70 +first 1 140 1.945910 1.945910 71 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +tool 8 117 2.079442 16.635536 93 +spring 4 131 2.079442 8.317768 88 +document 3 121 2.079442 6.238326 89 +introduct 2 126 2.079442 4.158884 87 +analysi 2 124 2.079442 4.158884 98 +machin 2 129 2.079442 4.158884 95 +databas 2 122 2.079442 4.158884 86 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +send 4 114 2.197225 8.788900 109 +code 4 108 2.197225 8.788900 116 +instructor 2 108 2.197225 4.394450 107 +well 2 109 2.197225 4.394450 121 +topic 2 114 2.197225 4.394450 110 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +imag 51 91 2.397895 122.292645 161 +graphic 3 90 2.397895 7.193685 147 +grade 2 90 2.397895 4.795790 142 +select 2 91 2.397895 4.795790 154 +octob 2 89 2.397895 4.795790 156 +follow 2 92 2.397895 4.795790 143 +proceed 1 93 2.397895 2.397895 152 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +exam 11 86 2.484907 27.333977 169 +environ 4 84 2.484907 9.939628 177 +activ 3 84 2.484907 7.454721 182 +novemb 3 81 2.484907 7.454721 179 +start 3 83 2.484907 7.454721 173 +larg 3 82 2.484907 7.454721 168 +requir 2 81 2.484907 4.969814 167 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +journal 1 83 2.484907 2.484907 183 +thing 1 84 2.484907 2.484907 189 +learn 1 86 2.484907 2.484907 170 +chang 1 82 2.484907 2.484907 163 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +help 1 83 2.484907 2.484907 175 +solut 1 82 2.484907 2.484907 162 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +method 9 80 2.564949 23.084541 213 +homework 9 79 2.564949 23.084541 193 +exampl 3 77 2.564949 7.694847 195 +want 2 79 2.564949 5.129898 199 +mondai 1 77 2.564949 2.564949 206 +good 1 77 2.564949 2.564949 200 +decemb 1 80 2.564949 2.564949 215 +interfac 1 79 2.564949 2.564949 209 +line 3 75 2.639057 7.917171 231 +onlin 2 75 2.639057 5.278114 223 +free 2 73 2.639057 5.278114 224 +appli 2 71 2.639057 5.278114 226 +addit 2 74 2.639057 5.278114 228 +tuesdai 1 73 2.639057 2.639057 219 +html 1 75 2.639057 2.639057 235 +name 1 72 2.639057 2.639057 220 +test 6 66 2.708050 16.248300 252 +thursdai 4 70 2.708050 10.832200 241 +window 3 68 2.708050 8.124150 242 +differ 2 66 2.708050 5.416100 253 +syllabu 1 67 2.708050 2.708050 247 +order 1 69 2.708050 2.708050 249 +main 1 67 2.708050 2.708050 256 +result 4 65 2.772589 11.090356 281 +handout 3 64 2.772589 8.317767 263 +interact 3 62 2.772589 8.317767 270 +collect 2 65 2.772589 5.545178 268 +wednesdai 1 64 2.772589 2.772589 261 +copi 1 63 2.772589 2.772589 284 +function 1 62 2.772589 2.772589 275 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +experi 1 64 2.772589 2.772589 283 +abstract 1 62 2.772589 2.772589 276 +virtual 1 62 2.772589 2.772589 285 +locat 3 59 2.833213 8.499639 303 +type 2 61 2.833213 5.666426 296 +room 2 59 2.833213 5.666426 301 +simpl 1 60 2.833213 2.833213 298 +point 2 58 2.890372 5.780744 319 +space 2 57 2.890372 5.780744 310 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +unix 1 58 2.890372 2.890372 308 +three 2 54 2.944439 5.888878 330 +found 1 53 2.944439 2.944439 337 +cover 1 55 2.944439 2.944439 329 +talk 1 53 2.944439 2.944439 336 +digit 3 52 2.995732 8.987196 348 +date 1 51 2.995732 2.995732 344 +case 1 51 2.995732 2.995732 351 +finger 1 52 2.995732 2.995732 354 +format 5 48 3.044522 15.222610 356 +basic 2 50 3.044522 6.089044 360 +numer 2 49 3.044522 6.089044 369 +principl 1 48 3.044522 3.044522 357 +right 1 48 3.044522 3.044522 363 +approach 1 48 3.044522 3.044522 366 +visual 1 48 3.044522 3.044522 372 +get 1 46 3.091042 3.091042 380 +understand 1 47 3.091042 3.091042 384 +done 1 47 3.091042 3.091042 381 +directori 4 45 3.135494 12.541976 396 +execut 4 45 3.135494 12.541976 404 +fridai 1 44 3.135494 3.135494 390 +algebra 1 45 3.135494 3.135494 394 +midterm 1 45 3.135494 3.135494 392 +textbook 1 44 3.135494 3.135494 397 +video 1 44 3.135494 3.135494 405 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +vision 23 41 3.218876 74.034148 430 +might 3 41 3.218876 9.656628 426 +fast 3 42 3.218876 9.656628 429 +examin 2 42 3.218876 6.437752 424 +linear 1 41 3.218876 3.218876 431 +york 1 41 3.218876 3.218876 435 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +howev 1 41 3.218876 3.218876 422 +probabl 1 40 3.258097 3.258097 455 +small 1 39 3.258097 3.258097 447 +origin 2 38 3.295837 6.591674 472 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +hand 3 37 3.332205 9.996615 475 +feel 1 37 3.332205 3.332205 483 +especi 1 36 3.367296 3.367296 496 +print 4 34 3.401197 13.604788 503 +least 3 35 3.401197 10.203591 516 +manual 2 35 3.401197 6.802394 504 +committe 1 34 3.401197 3.401197 522 +eduoffic 2 33 3.433987 6.867974 531 +board 1 33 3.433987 3.433987 528 +product 1 33 3.433987 3.433987 527 +chapter 8 32 3.465736 27.725888 536 +transform 1 32 3.465736 3.465736 542 +idea 1 32 3.465736 3.465736 545 +photo 1 31 3.496508 3.496508 561 +posit 1 31 3.496508 3.496508 552 +titl 1 31 3.496508 3.496508 556 +focu 2 30 3.555348 7.110696 571 +option 1 30 3.555348 3.555348 575 +produc 1 30 3.555348 3.555348 572 +particip 1 29 3.583519 3.583519 589 +except 1 28 3.610918 3.610918 607 +packag 1 28 3.610918 3.610918 614 +held 1 28 3.610918 3.610918 600 +ask 1 28 3.610918 3.610918 597 +determin 1 27 3.637586 3.637586 630 +detect 3 26 3.688879 11.066637 646 +enhanc 2 26 3.688879 7.377758 644 +altern 1 26 3.688879 3.688879 641 +relev 1 26 3.688879 3.688879 637 +fundament 1 25 3.737670 3.737670 661 +hill 1 25 3.737670 3.737670 670 +although 1 25 3.737670 3.737670 667 +pattern 2 24 3.761200 7.522400 689 +motion 1 24 3.761200 3.761200 699 +wish 1 24 3.761200 3.761200 692 +store 1 24 3.761200 3.761200 693 +other 1 24 3.761200 3.761200 697 +displai 2 23 3.806662 7.613324 712 +recognit 1 23 3.806662 3.806662 723 +head 1 23 3.806662 3.806662 732 +compress 1 23 3.806662 3.806662 719 +proof 1 23 3.806662 3.806662 720 +highli 1 23 3.806662 3.806662 725 +miscellan 1 23 3.806662 3.806662 731 +color 2 22 3.850148 7.700296 762 +varieti 2 22 3.850148 7.700296 740 +defin 1 22 3.850148 3.850148 746 +instead 1 22 3.850148 3.850148 756 +william 1 22 3.850148 3.850148 765 +disk 1 22 3.850148 3.850148 747 +sent 1 22 3.850148 3.850148 763 +recommend 1 22 3.850148 3.850148 737 +output 2 21 3.912023 7.824046 788 +tell 1 21 3.912023 3.912023 777 +fact 1 21 3.912023 3.912023 780 +wang 1 21 3.912023 3.912023 790 +sure 2 20 3.951244 7.902488 813 +entir 1 20 3.951244 3.951244 811 +toolkit 1 20 3.951244 3.951244 835 +prerequisit 1 19 4.007333 4.007333 846 +citi 1 19 4.007333 4.007333 874 +comparison 1 19 4.007333 4.007333 863 +boston 1 19 4.007333 4.007333 862 +account 4 18 4.060443 16.241772 882 +dimension 2 18 4.060443 8.120886 909 +four 2 18 4.060443 8.120886 905 +demo 2 18 4.060443 8.120886 888 +segment 2 17 4.110874 8.221748 931 +modif 2 17 4.110874 8.221748 913 +matrix 2 17 4.110874 8.221748 933 +stanford 2 17 4.110874 8.221748 955 +estim 1 17 4.110874 4.110874 930 +regular 1 17 4.110874 4.110874 929 +condit 2 16 4.174387 8.348774 975 +zhang 1 16 4.174387 4.174387 980 +earli 1 16 4.174387 4.174387 968 +sheet 1 16 4.174387 4.174387 973 +track 1 15 4.248495 4.248495 1029 +score 1 15 4.248495 4.248495 1017 +transit 1 15 4.248495 4.248495 1046 +goe 1 15 4.248495 4.248495 1044 +side 1 15 4.248495 4.248495 1022 +rate 1 15 4.248495 4.248495 1037 +doit 8 14 4.317488 34.539904 1111 +chuck 2 14 4.317488 8.634976 1108 +scene 2 14 4.317488 8.634976 1114 +matlab 2 14 4.317488 8.634976 1081 +save 1 14 4.317488 4.317488 1099 +manner 1 14 4.317488 4.317488 1074 +consider 1 14 4.317488 4.317488 1076 +command 1 14 4.317488 4.317488 1083 +block 2 13 4.382027 8.764054 1183 +primarili 1 13 4.382027 4.382027 1185 +convert 1 13 4.382027 4.382027 1122 +emac 1 13 4.382027 4.382027 1143 +everyon 1 13 4.382027 4.382027 1148 +shape 3 12 4.465908 13.397724 1245 +calculu 1 12 4.465908 4.465908 1203 +overal 1 12 4.465908 4.465908 1254 +count 1 12 4.465908 4.465908 1239 +optic 1 12 4.465908 4.465908 1221 +realiti 1 12 4.465908 4.465908 1272 +qualit 1 11 4.553877 4.553877 1362 +appl 1 11 4.553877 4.553877 1303 +vista 7 10 4.653960 32.577720 1452 +mosaic 5 10 4.653960 23.269800 1426 +modul 2 10 4.653960 9.307920 1434 +hint 2 10 4.653960 9.307920 1419 +queue 2 10 4.653960 9.307920 1386 +rapid 1 10 4.653960 4.653960 1453 +bring 1 10 4.653960 4.653960 1430 +dyer 3 9 4.753590 14.260770 1573 +face 2 9 4.753590 9.507180 1501 +distanc 2 9 4.753590 9.507180 1500 +recoveri 1 9 4.753590 4.753590 1474 +printer 4 8 4.875197 19.500788 1621 +depth 2 8 4.875197 9.750394 1636 +convers 2 8 4.875197 9.750394 1673 +edg 1 8 4.875197 4.875197 1647 +contrast 1 8 4.875197 4.875197 1637 +job 1 8 4.875197 4.875197 1702 +virginia 1 8 4.875197 4.875197 1659 +shade 2 7 5.010635 10.021270 1881 +stereo 2 7 5.010635 10.021270 1818 +prevent 1 7 5.010635 5.010635 1827 +corner 1 7 5.010635 5.010635 1909 +header 1 7 5.010635 5.010635 1787 +compact 1 7 5.010635 5.010635 1907 +signal 1 7 5.010635 5.010635 1910 +sweden 1 7 5.010635 5.010635 1885 +spline 3 6 5.164786 15.494358 2007 +viewpoint 1 6 5.164786 5.164786 2116 +gzip 1 6 5.164786 5.164786 2117 +invok 1 6 5.164786 5.164786 2079 +classroom 1 6 5.164786 5.164786 2006 +televis 1 6 5.164786 5.164786 2118 +spie 1 6 5.164786 5.164786 2119 +rotat 2 5 5.347108 10.694216 2295 +snake 2 5 5.347108 10.694216 2281 +bryan 1 5 5.347108 5.347108 2421 +jain 1 5 5.347108 5.347108 2332 +mcgraw 1 5 5.347108 5.347108 2262 +adjust 1 5 5.347108 5.347108 2422 +button 1 5 5.347108 5.347108 2337 +constant 1 5 5.347108 5.347108 2251 +multiresolut 1 5 5.347108 5.347108 2423 +sparcstat 1 5 5.347108 5.347108 2406 +shortest 1 5 5.347108 5.347108 2424 +grand 1 5 5.347108 5.347108 2425 +contour 3 4 5.568345 16.705035 2812 +sold 2 4 5.568345 11.136690 2813 +delet 2 4 5.568345 11.136690 2691 +assignmentshomework 1 4 5.568345 5.568345 2721 +shah 1 4 5.568345 5.568345 2814 +thin 5 3 5.857933 29.289665 3488 +pyramid 2 3 5.857933 11.715866 3358 +tran 2 3 5.857933 11.715866 3384 +faq 2 3 5.857933 11.715866 3216 +visionc 1 3 5.857933 5.857933 3489 +histogram 1 3 5.857933 5.857933 3490 +portrait 1 3 5.857933 5.857933 3491 +gradient 1 3 5.857933 5.857933 3479 +surround 1 3 5.857933 5.857933 3492 +suen 1 3 5.857933 5.857933 3446 +toolbox 1 3 5.857933 5.857933 3112 +quicktim 1 3 5.857933 5.857933 3493 +qbic 1 3 5.857933 5.857933 3294 +cardiff 1 3 5.857933 5.857933 3154 +khoro 5 2 6.263398 31.316990 4488 +comm 3 2 6.263398 18.790194 4746 +laser 3 2 6.263398 18.790194 4747 +skeleton 2 2 6.263398 12.526796 4225 +disappear 2 2 6.263398 12.526796 4748 +burt 2 2 6.263398 12.526796 4494 +visionfal 1 2 6.263398 6.263398 4749 +shoulder 1 2 6.263398 6.263398 4750 +altogeth 1 2 6.263398 6.263398 4751 +supplementari 1 2 6.263398 6.263398 4752 +quota 1 2 6.263398 6.263398 4753 +caution 1 2 6.263398 6.263398 4754 +cantata 1 2 6.263398 6.263398 4489 +panoram 1 2 6.263398 6.263398 4755 +royal 1 2 6.263398 6.263398 4756 +adelson 3 1 6.957497 20.872491 8941 +csstelephon 2 1 6.957497 13.914994 8942 +ubyt 2 1 6.957497 13.914994 8943 +imgstar 2 1 6.957497 13.914994 8944 +hdtv 2 1 6.957497 13.914994 8945 +atsc 2 1 6.957497 13.914994 8946 +dyeroffic 1 1 6.957497 6.957497 8947 +appointmentteach 1 1 6.957497 6.957497 8948 +sooffic 1 1 6.957497 6.957497 8949 +appointmentstud 1 1 6.957497 6.957497 8950 +informationfundament 1 1 6.957497 6.957497 8951 +featuredetect 1 1 6.957497 6.957497 8952 +forreconstruct 1 1 6.957497 6.957497 8953 +usingtechniqu 1 1 6.957497 6.957497 8954 +asshap 1 1 6.957497 6.957497 8955 +andocclud 1 1 6.957497 6.957497 8956 +kasturi 1 1 6.957497 6.957497 8957 +schunck 1 1 6.957497 6.957497 8958 +readingsfrom 1 1 6.957497 6.957497 8959 +batchessupplementari 1 1 6.957497 6.957497 8960 +sourcesonlin 1 1 6.957497 6.957497 8961 +informationmost 1 1 6.957497 6.957497 8962 +urlhttp 1 1 6.957497 6.957497 8963 +byfirst 1 1 6.957497 6.957497 8964 +crop 1 1 6.957497 6.957497 8965 +theintens 1 1 6.957497 6.957497 8966 +thewindow 1 1 6.957497 6.957497 8967 +colorif 1 1 6.957497 6.957497 8968 +grayscal 1 1 6.957497 6.957497 8969 +transformationsav 1 1 6.957497 6.957497 8970 +andput 1 1 6.957497 6.957497 8971 +whereth 1 1 6.957497 6.957497 8972 +whatintens 1 1 6.957497 6.957497 8973 +qualityof 1 1 6.957497 6.957497 8974 +ownweb 1 1 6.957497 6.957497 8975 +infin 1 1 6.957497 6.957497 8976 +chessboard 1 1 6.957497 6.957497 8977 +vconvert 1 1 6.957497 6.957497 8978 +clean 1 1 6.957497 6.957497 8979 +repn 1 1 6.957497 6.957497 8980 +component_interp 1 1 6.957497 6.957497 8981 +low_threshold 1 1 6.957497 6.957497 8982 +high_threshold 1 1 6.957497 6.957497 8983 +vlink 1 1 6.957497 6.957497 8984 +vsegedg 1 1 6.957497 6.957497 8985 +laplacian 1 1 6.957497 6.957497 8986 +kass 1 1 6.957497 6.957497 8987 +witkin 1 1 6.957497 6.957497 8988 +terzopoulo 1 1 6.957497 6.957497 8989 +curvatur 1 1 6.957497 6.957497 8990 +laserprint 1 1 6.957497 6.957497 8991 +netpbm 1 1 6.957497 6.957497 8992 +pbmplu 1 1 6.957497 6.957497 8993 +wandel 1 1 6.957497 6.957497 8994 +allianc 1 1 6.957497 6.957497 8995 +panoramix 1 1 6.957497 6.957497 8996 +decfac 1 1 6.957497 6.957497 8997 +synthet 1 1 6.957497 6.957497 8998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html new file mode 100644 index 00000000..7b245641 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~dzimm^cs302.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 3 374 0.693147 2.079441 7 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +fall 2 181 1.609438 3.218876 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +base 1 165 1.791759 1.791759 50 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +number 1 130 2.079442 2.079442 97 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +section 2 94 2.397895 4.795790 149 +octob 1 89 2.397895 2.397895 156 +novemb 1 81 2.484907 2.484907 179 +meet 1 72 2.639057 2.639057 229 +solv 1 73 2.639057 2.639057 234 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +algebra 1 45 3.135494 3.135494 394 +fridai 1 44 3.135494 3.135494 390 +compani 1 41 3.218876 3.218876 423 +known 1 24 3.761200 3.761200 702 +greg 1 24 3.761200 3.761200 695 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +style 1 15 4.248495 4.248495 1036 +dave 2 14 4.317488 8.634976 1098 +readi 1 12 4.465908 4.465908 1242 +informationemail 1 9 4.753590 4.753590 1564 +sharp 2 6 5.164786 10.329572 2100 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +objectivesvectra 1 3 5.857933 5.857933 3410 +homeclass 1 3 5.857933 5.857933 3411 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +consultantssyllabuswork 1 2 6.263398 6.263398 4579 +referenc 1 2 6.263398 6.263398 4757 +dzimm 2 1 6.957497 13.914994 8999 +zimmermannemail 1 1 6.957497 6.957497 9000 +educlass 1 1 6.957497 6.957497 9001 +nolandoffic 1 1 6.957497 6.957497 9002 +announcementsprogram 1 1 6.957497 6.957497 9003 +handoutsprogramsexam 1 1 6.957497 6.957497 9004 +quizzeslectur 1 1 6.957497 6.957497 9005 +notesgreg 1 1 6.957497 6.957497 9006 +guidegrad 1 1 6.957497 6.957497 9007 +quizzesprogramsexam 1 1 6.957497 6.957497 9008 +policytext 1 1 6.957497 6.957497 9009 +zimmermann 1 1 6.957497 6.957497 9010 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html new file mode 100644 index 00000000..0e2225f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs525-all.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 5 374 0.693147 3.465735 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +cours 3 273 1.098612 3.295836 15 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +fall 2 181 1.609438 3.218876 40 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +spring 2 131 2.079442 4.158884 88 +instructor 1 108 2.197225 2.197225 107 +theori 1 111 2.197225 2.197225 127 +larg 1 82 2.484907 2.484907 168 +method 2 80 2.564949 5.129898 213 +januari 1 62 2.772589 2.772589 264 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +offer 1 43 3.178054 3.178054 414 +linear 2 41 3.218876 6.437752 431 +michael 1 35 3.401197 3.401197 514 +flow 1 24 3.761200 3.761200 700 +spars 1 16 4.174387 4.174387 989 +nonlinear 2 14 4.317488 8.634976 1107 +mangasarian 1 9 4.753590 4.753590 1570 +ferri 1 8 4.875197 4.875197 1715 +integ 1 8 4.875197 4.875197 1688 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html new file mode 100644 index 00000000..4878676f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs719.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +cours 5 273 1.098612 5.493060 15 +offic 4 299 1.098612 4.394448 13 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +wisc 3 242 1.386294 4.158882 33 +gener 2 220 1.386294 2.772588 27 +class 4 199 1.609438 6.437752 37 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +network 4 168 1.791759 7.167036 61 +hour 2 165 1.791759 3.583518 46 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +assign 3 135 1.945910 5.837730 66 +first 2 140 1.945910 3.891820 71 +lectur 1 135 1.945910 1.945910 73 +hall 1 146 1.945910 1.945910 65 +machin 2 129 2.079442 4.158884 95 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +text 2 98 2.302585 4.605170 133 +user 2 104 2.302585 4.605170 137 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +grade 4 90 2.397895 9.591580 142 +question 1 91 2.397895 2.397895 141 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +homework 9 79 2.564949 23.084541 193 +mondai 6 77 2.564949 15.389694 206 +april 3 77 2.564949 7.694847 196 +orient 2 80 2.564949 5.129898 205 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 3 70 2.708050 8.124150 241 +knowledg 1 67 2.708050 2.708050 243 +wednesdai 2 64 2.772589 5.545178 261 +januari 1 62 2.772589 2.772589 264 +march 4 61 2.833213 11.332852 295 +room 2 59 2.833213 5.666426 301 +unix 5 58 2.890372 14.451860 308 +overview 1 56 2.890372 2.890372 323 +local 3 55 2.944439 8.833317 334 +februari 1 54 2.944439 2.944439 328 +allow 1 53 2.944439 2.944439 333 +week 2 52 2.995732 5.991464 343 +telephon 2 50 3.044522 6.089044 373 +set 1 50 3.044522 3.044522 361 +fridai 7 44 3.135494 21.948458 390 +directori 1 45 3.135494 3.135494 396 +linear 3 41 3.218876 9.656628 431 +press 1 42 3.218876 3.218876 419 +examin 1 42 3.218876 3.218876 424 +close 1 38 3.295837 3.295837 465 +cost 1 37 3.332205 3.332205 480 +workstat 1 37 3.332205 3.332205 479 +tree 1 36 3.367296 3.367296 492 +michael 1 35 3.401197 3.401197 514 +jame 1 35 3.401197 3.401197 507 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +session 2 26 3.688879 7.377758 643 +repres 1 26 3.688879 3.688879 656 +flow 4 24 3.761200 15.044800 700 +path 3 21 3.912023 11.736069 778 +minut 1 20 3.951244 3.951244 810 +prerequisit 1 19 4.007333 4.007333 846 +thoma 1 18 4.060443 4.060443 901 +appropri 1 18 4.060443 4.060443 883 +previous 1 17 4.110874 4.110874 923 +sheet 1 16 4.174387 4.174387 973 +solari 1 12 4.465908 4.465908 1238 +cycl 1 11 4.553877 4.553877 1335 +login 1 9 4.753590 4.753590 1550 +ferri 2 8 4.875197 9.750394 1715 +prentic 1 7 5.010635 5.010635 1838 +relax 1 6 5.164786 5.164786 2120 +shortest 1 5 5.347108 5.347108 2424 +cshrc 3 4 5.568345 16.705035 2759 +freeman 1 4 5.568345 5.568345 2725 +convex 1 4 5.568345 5.568345 2807 +novic 1 4 5.568345 5.568345 2815 +ahuja 1 3 5.857933 5.857933 3494 +bertseka 1 3 5.857933 5.857933 3477 +lagrangian 1 3 5.857933 5.857933 3478 +gam 2 2 6.263398 12.526796 4758 +leei 1 2 6.263398 6.263398 4759 +equilibria 1 2 6.263398 6.263398 4760 +multicommod 1 2 6.263398 6.263398 4761 +flowsspr 1 1 6.957497 6.957497 9011 +ravindra 1 1 6.957497 6.957497 9012 +magnanti 1 1 6.957497 6.957497 9013 +orlin 1 1 6.957497 6.957497 9014 +chvatal 1 1 6.957497 6.957497 9015 +simplex 1 1 6.957497 6.957497 9016 +alter 1 1 6.957497 6.957497 9017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html new file mode 100644 index 00000000..c3a254f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ferris^cs733.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +cours 5 273 1.098612 5.493060 15 +offic 4 299 1.098612 4.394448 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +wisc 3 242 1.386294 4.158882 33 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +class 4 199 1.609438 6.437752 37 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +hour 2 165 1.791759 3.583518 46 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +assign 3 135 1.945910 5.837730 66 +first 2 140 1.945910 3.891820 71 +lectur 1 135 1.945910 1.945910 73 +introduct 2 126 2.079442 4.158884 87 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +instructor 2 108 2.197225 4.394450 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +user 2 104 2.302585 4.605170 137 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +grade 4 90 2.397895 9.591580 142 +question 1 91 2.397895 2.397895 141 +second 2 81 2.484907 4.969814 166 +larg 1 82 2.484907 2.484907 168 +ieee 1 86 2.484907 2.484907 190 +homework 10 79 2.564949 25.649490 193 +mondai 7 77 2.564949 17.954643 206 +april 3 77 2.564949 7.694847 196 +method 2 80 2.564949 5.129898 213 +orient 2 80 2.564949 5.129898 205 +optim 1 79 2.564949 2.564949 197 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 3 70 2.708050 8.124150 241 +handout 3 64 2.772589 8.317767 263 +wednesdai 2 64 2.772589 5.545178 261 +januari 1 62 2.772589 2.772589 264 +march 4 61 2.833213 11.332852 295 +room 2 59 2.833213 5.666426 301 +unix 5 58 2.890372 14.451860 308 +direct 1 57 2.890372 2.890372 316 +space 1 57 2.890372 2.890372 310 +overview 1 56 2.890372 2.890372 323 +februari 2 54 2.944439 5.888878 328 +local 1 55 2.944439 2.944439 334 +allow 1 53 2.944439 2.944439 333 +instruct 1 53 2.944439 2.944439 332 +week 2 52 2.995732 5.991464 343 +telephon 2 50 3.044522 6.089044 373 +fridai 7 44 3.135494 21.948458 390 +math 2 44 3.135494 6.270988 402 +textbook 1 44 3.135494 3.135494 397 +press 1 42 3.218876 3.218876 419 +edit 1 42 3.218876 3.218876 418 +linear 1 41 3.218876 3.218876 431 +examin 1 42 3.218876 3.218876 424 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +workstat 1 37 3.332205 3.332205 479 +michael 1 35 3.401197 3.401197 514 +least 1 35 3.401197 3.401197 516 +john 1 33 3.433987 3.433987 532 +storag 1 31 3.496508 3.496508 553 +except 1 28 3.610918 3.610918 607 +held 1 28 3.610918 3.610918 600 +session 2 26 3.688879 7.377758 643 +repres 1 26 3.688879 3.688879 656 +strategi 1 25 3.737670 3.737670 682 +equat 1 23 3.806662 3.806662 724 +recommend 1 22 3.850148 3.850148 737 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +scheme 1 20 3.951244 3.951244 818 +minut 1 20 3.951244 3.951244 810 +prerequisit 1 19 4.007333 4.007333 846 +dimension 1 18 4.060443 4.060443 909 +matrix 2 17 4.110874 8.221748 933 +modif 1 17 4.110874 4.110874 913 +previous 1 17 4.110874 4.110874 923 +spars 6 16 4.174387 25.046322 989 +vector 1 16 4.174387 4.174387 961 +sheet 1 16 4.174387 4.174387 973 +finit 1 14 4.317488 4.317488 1106 +squar 1 14 4.317488 4.317488 1082 +nonlinear 1 14 4.317488 4.317488 1107 +matlab 1 14 4.317488 4.317488 1081 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +arithmet 1 10 4.653960 4.653960 1388 +elimin 1 9 4.753590 4.753590 1558 +ferri 2 8 4.875197 9.750394 1715 +solver 1 7 5.010635 5.010635 1911 +oxford 1 6 5.164786 5.164786 2121 +dens 1 6 5.164786 5.164786 2122 +pivot 1 5 5.347108 5.347108 2426 +consent 1 5 5.347108 5.347108 2389 +novic 1 4 5.568345 5.568345 2815 +golub 1 3 5.857933 5.857933 3265 +eigenvalu 1 3 5.857933 5.857933 3364 +eigenvector 1 3 5.857933 5.857933 3365 +systemsspr 1 2 6.263398 6.263398 4762 +leei 1 2 6.263398 6.263398 4759 +loan 1 2 6.263398 6.263398 4147 +gaussian 1 2 6.263398 6.263398 4763 +hopkinsunivers 1 1 6.957497 6.957497 9018 +duff 1 1 6.957497 6.957497 9019 +erisman 1 1 6.957497 6.957497 9020 +reid 1 1 6.957497 6.957497 9021 +halmo 1 1 6.957497 6.957497 9022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html new file mode 100644 index 00000000..e9335970 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~fischer^cs701.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 6 299 1.098612 6.591672 13 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +mail 3 238 1.386294 4.158882 22 +wisc 3 242 1.386294 4.158882 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +hour 3 165 1.791759 5.375277 46 +read 1 154 1.791759 1.791759 47 +lectur 2 135 1.945910 3.891820 73 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +note 1 142 1.945910 1.945910 67 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +teach 2 108 2.197225 4.394450 112 +assist 2 112 2.197225 4.394450 113 +instructor 1 108 2.197225 2.197225 107 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +grade 1 90 2.397895 2.397895 142 +start 1 83 2.484907 2.484907 173 +mondai 2 77 2.564949 5.129898 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +wednesdai 2 64 2.772589 5.545178 261 +handout 1 64 2.772589 2.772589 263 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 3 50 3.044522 9.133566 373 +appoint 2 49 3.044522 6.089044 358 +get 1 46 3.091042 3.091042 380 +fridai 2 44 3.135494 6.270988 390 +examin 1 42 3.218876 3.218876 424 +richard 1 31 3.496508 3.496508 559 +charl 2 13 4.382027 8.764054 1149 +benjamin 1 11 4.553877 4.553877 1296 +regularli 1 11 4.553877 4.553877 1338 +tuth 1 9 4.753590 4.753590 1519 +cum 1 8 4.875197 4.875197 1619 +fischer 3 7 5.010635 15.031905 1893 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +krishna 1 3 5.857933 5.857933 3495 +weyer 2 2 6.263398 12.526796 4558 +compilersfal 1 2 6.263398 6.263398 4223 +csst 1 2 6.263398 6.263398 4764 +krisna 1 2 6.263398 6.263398 4765 +kunchithapadam 1 1 6.957497 6.957497 9023 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html new file mode 100644 index 00000000..2fe9a933 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~greg^cs302.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +wisc 4 242 1.386294 5.545176 33 +languag 1 227 1.386294 1.386294 26 +fall 2 181 1.609438 3.218876 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +object 1 138 1.945910 1.945910 79 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +section 2 94 2.397895 4.795790 149 +solv 1 73 2.639057 2.639057 234 +html 1 75 2.639057 2.639057 235 +polici 1 64 2.772589 2.772589 279 +publish 1 57 2.890372 2.890372 326 +algebra 1 45 3.135494 3.135494 394 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +must 1 40 3.258097 3.258097 442 +eduoffic 1 33 3.433987 3.433987 531 +greg 5 24 3.761200 18.806000 695 +known 1 24 3.761200 3.761200 702 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +informationemail 1 9 4.753590 4.753590 1564 +appt 1 5 5.347108 5.347108 2312 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +grader 1 3 5.857933 5.857933 3165 +krishna 1 3 5.857933 5.857933 3495 +objectivesvectra 1 3 5.857933 5.857933 3410 +policyl 1 3 5.857933 5.857933 3412 +policyacadem 1 3 5.857933 5.857933 3413 +sharpemail 1 2 6.263398 6.263398 4766 +krisna 1 2 6.263398 6.263398 4765 +archivepolici 1 2 6.263398 6.263398 4580 +sharpgreg 1 2 6.263398 6.263398 4767 +kunchithapadamemail 1 1 6.957497 6.957497 9024 +edugener 1 1 6.957497 6.957497 9025 +consultantssyllabuscours 1 1 6.957497 6.957497 9026 +difficultywork 1 1 6.957497 6.957497 9027 +homenewsstartup 1 1 6.957497 6.957497 9028 +informationclass 1 1 6.957497 6.957497 9029 +noteshomeworkexam 1 1 6.957497 6.957497 9030 +quizzesstyl 1 1 6.957497 6.957497 9031 +guideemail 1 1 6.957497 6.957497 9032 +textproblem 1 1 6.957497 6.957497 9033 +porgrammingwalt 1 1 6.957497 6.957497 9034 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html new file mode 100644 index 00000000..b5e2947c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hcl^cs302.html @@ -0,0 +1,160 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +program 9 374 0.693147 6.238323 7 +inform 4 412 0.693147 2.772588 8 +work 2 380 0.693147 1.386294 9 +us 2 329 1.098612 2.197224 16 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +also 3 259 1.386294 4.158882 28 +link 2 247 1.386294 2.772588 24 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +class 7 199 1.609438 11.266066 37 +fall 2 181 1.609438 3.218876 40 +public 2 202 1.609438 3.218876 43 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +read 2 154 1.791759 3.583518 47 +assign 3 135 1.945910 5.837730 66 +problem 2 147 1.945910 3.891820 75 +note 2 142 1.945910 3.891820 67 +file 2 132 1.945910 3.891820 70 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +compil 2 122 2.079442 4.158884 96 +introduct 2 126 2.079442 4.158884 87 +version 3 113 2.197225 6.591675 122 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +code 1 108 2.197225 2.197225 116 +need 4 98 2.302585 9.210340 135 +text 2 98 2.302585 4.605170 133 +part 1 98 2.302585 2.302585 129 +section 4 94 2.397895 9.591580 149 +grade 3 90 2.397895 7.193685 142 +call 2 91 2.397895 4.795790 153 +question 2 91 2.397895 4.795790 141 +comment 1 93 2.397895 2.397895 146 +mani 1 92 2.397895 2.397895 150 +chang 3 82 2.484907 7.454721 163 +help 1 83 2.484907 2.484907 175 +start 1 83 2.484907 2.484907 173 +requir 1 81 2.484907 2.484907 167 +stuff 1 87 2.484907 2.484907 171 +academ 1 82 2.484907 2.484907 178 +second 1 81 2.484907 2.484907 166 +want 2 79 2.564949 5.129898 199 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +name 5 72 2.639057 13.195285 220 +line 4 75 2.639057 10.556228 231 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +test 2 66 2.708050 5.416100 252 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +polici 5 64 2.772589 13.862945 279 +import 2 65 2.772589 5.545178 282 +copi 2 63 2.772589 5.545178 284 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +descript 1 64 2.772589 2.772589 271 +handout 1 64 2.772589 2.772589 263 +type 2 61 2.833213 5.666426 296 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +sampl 2 53 2.944439 5.888878 339 +suggest 1 53 2.944439 2.944439 331 +basic 1 50 3.044522 3.044522 360 +understand 1 47 3.091042 3.091042 384 +directori 2 45 3.135494 6.270988 396 +algebra 1 45 3.135494 3.135494 394 +midterm 1 45 3.135494 3.135494 392 +answer 1 45 3.135494 3.135494 391 +might 1 41 3.218876 3.218876 426 +compani 1 41 3.218876 3.218876 423 +announc 1 40 3.258097 3.258097 441 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +mean 3 37 3.332205 9.996615 477 +either 1 35 3.401197 3.401197 506 +within 1 33 3.433987 3.433987 525 +ad 1 32 3.465736 3.465736 544 +someth 1 31 3.496508 3.496508 554 +hard 1 30 3.555348 3.555348 563 +great 1 27 3.637586 3.637586 626 +administr 1 27 3.637586 3.637586 628 +consist 2 26 3.688879 7.377758 651 +valu 1 25 3.737670 3.737670 665 +consult 1 24 3.761200 3.761200 687 +togeth 1 23 3.806662 3.806662 714 +defin 1 22 3.850148 3.850148 746 +tent 1 22 3.850148 3.850148 739 +sure 1 20 3.951244 3.951244 813 +entir 1 20 3.951244 3.951244 811 +fine 1 20 3.951244 3.951244 822 +prepar 1 20 3.951244 3.951244 824 +definit 1 19 4.007333 4.007333 864 +attend 1 18 4.060443 4.060443 893 +attempt 1 17 4.110874 4.110874 917 +walter 1 17 4.110874 4.110874 950 +earli 1 16 4.174387 4.174387 968 +weslei 1 16 4.174387 4.174387 983 +misconduct 1 16 4.174387 4.174387 1003 +piec 1 15 4.248495 4.248495 1020 +style 1 15 4.248495 4.248495 1036 +shown 1 14 4.317488 4.317488 1080 +borland 1 14 4.317488 4.317488 1067 +everyth 2 13 4.382027 8.764054 1169 +quizz 2 13 4.382027 8.764054 1151 +bodi 1 13 4.382027 4.382027 1178 +outsid 1 12 4.465908 4.465908 1219 +insid 1 12 4.465908 4.465908 1262 +readi 1 12 4.465908 4.465908 1242 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +vectra 1 12 4.465908 4.465908 1267 +tue 1 11 4.553877 4.553877 1308 +chri 1 11 4.553877 4.553877 1311 +noth 1 11 4.553877 4.553877 1328 +extra 1 11 4.553877 4.553877 1312 +correspond 1 10 4.653960 4.653960 1382 +bring 1 10 4.653960 4.653960 1430 +declar 1 9 4.753590 4.753590 1526 +lane 2 8 4.875197 9.750394 1720 +matter 1 8 4.875197 4.875197 1627 +bottom 1 7 5.010635 5.010635 1906 +throughout 1 7 5.010635 5.010635 1871 +sharp 2 6 5.164786 10.329572 2100 +recogn 1 5 5.347108 5.347108 2302 +crucial 1 5 5.347108 5.347108 2384 +prog 7 4 5.568345 38.978415 2740 +shouldn 1 4 5.568345 5.568345 2606 +thumb 1 4 5.568345 5.568345 2816 +enumer 2 3 5.857933 11.715866 3244 +privat 2 3 5.857933 11.715866 3496 +bump 1 3 5.857933 5.857933 3497 +obsolet 1 3 5.857933 5.857933 3196 +freshman 1 3 5.857933 5.857933 3462 +chad 2 2 6.263398 12.526796 4768 +forgot 1 2 6.263398 6.263398 4769 +weaver 1 2 6.263398 6.263398 4770 +freshmen 1 2 6.263398 6.263398 4554 +disregard 1 2 6.263398 6.263398 4189 +tribbl 2 1 6.957497 13.914994 9035 +randomintinrang 1 1 6.957497 6.957497 9036 +uppercas 1 1 6.957497 6.957497 9037 +overwrit 1 1 6.957497 6.957497 9038 +discrep 1 1 6.957497 6.957497 9039 +solutionscours 1 1 6.957497 6.957497 9040 +vleck 1 1 6.957497 6.957497 9041 +guidelast 1 1 6.957497 6.957497 9042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html new file mode 100644 index 00000000..7ede2193 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~horwitz^cs536^cs536.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +program 3 374 0.693147 2.079441 7 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +offic 4 299 1.098612 4.394448 13 +cours 2 273 1.098612 2.197224 15 +us 1 329 1.098612 1.098612 16 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +hour 2 165 1.791759 3.583518 46 +read 1 154 1.791759 1.791759 47 +lectur 2 135 1.945910 3.891820 73 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +compil 2 122 2.079442 4.158884 96 +tool 2 117 2.079442 4.158884 93 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +techniqu 1 99 2.302585 2.302585 138 +grade 2 90 2.397895 4.795790 142 +octob 1 89 2.397895 2.397895 156 +librari 1 87 2.484907 2.484907 181 +exam 1 86 2.484907 2.484907 169 +start 1 83 2.484907 2.484907 173 +mondai 1 77 2.564949 2.564949 206 +homework 1 79 2.564949 2.564949 193 +tuesdai 1 73 2.639057 2.639057 219 +wednesdai 1 64 2.772589 2.772589 261 +polici 1 64 2.772589 2.772589 279 +overview 1 56 2.890372 2.890372 323 +date 1 51 2.995732 2.995732 344 +telephon 2 50 3.044522 6.089044 373 +appoint 2 49 3.044522 6.089044 358 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +fridai 1 44 3.135494 3.135494 390 +examin 1 42 3.218876 3.218876 424 +late 1 40 3.258097 3.258097 439 +comp 1 26 3.688879 3.688879 650 +reserv 1 20 3.951244 3.951244 808 +stat 1 17 4.110874 4.110874 924 +month 1 15 4.248495 4.248495 1025 +psycholog 1 15 4.248495 4.248495 1054 +susan 1 15 4.248495 4.248495 1050 +stori 1 14 4.317488 4.317488 1087 +regularli 1 11 4.553877 4.553877 1338 +wendt 1 10 4.653960 4.653960 1446 +tuth 1 9 4.753590 4.753590 1519 +recit 1 9 4.753590 4.753590 1475 +fischer 1 7 5.010635 5.010635 1893 +horwitz 2 5 5.347108 10.694216 2411 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +ullman 1 4 5.568345 5.568345 2749 +rahul 2 3 5.857933 11.715866 3464 +compilersspr 1 2 6.263398 6.263398 4700 +kapoor 1 2 6.263398 6.263398 4701 +sethi 1 2 6.263398 6.263398 4702 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html new file mode 100644 index 00000000..c002c233 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~hummert^cs110^cs110.html @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 3 374 0.693147 2.079441 7 +inform 3 412 0.693147 2.079441 8 +offic 3 299 1.098612 3.295836 13 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 2 126 2.079442 4.158884 87 +instructor 1 108 2.197225 2.197225 107 +text 1 98 2.302585 2.302585 133 +grade 2 90 2.397895 4.795790 142 +section 1 94 2.397895 2.397895 149 +academ 1 82 2.484907 2.484907 178 +mondai 1 77 2.564949 2.564949 206 +solv 1 73 2.639057 2.639057 234 +window 2 68 2.708050 5.416100 242 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +semest 1 58 2.890372 2.890372 312 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +statist 2 35 3.401197 6.802394 521 +tent 1 22 3.850148 3.850148 739 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +savitch 2 12 4.465908 8.931816 1269 +criteria 1 9 4.753590 4.753590 1477 +hummert 3 3 5.857933 17.573799 3416 +psych 2 3 5.857933 11.715866 3498 +textbookproblem 1 3 5.857933 5.857933 3483 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +sectionsc 1 1 6.957497 6.957497 9043 +viewgraph 1 1 6.957497 6.957497 9044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html new file mode 100644 index 00000000..50476cd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~jonb^cs132^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 3 299 1.098612 3.295836 13 +last 1 314 1.098612 1.098612 14 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +modifi 1 178 1.609438 1.609438 35 +hour 2 165 1.791759 3.583518 46 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +madison 1 165 1.791759 1.791759 55 +click 3 142 1.945910 5.837730 78 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +number 1 130 2.079442 2.079442 97 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +section 3 94 2.397895 7.193685 149 +question 1 91 2.397895 2.397895 141 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +thing 1 84 2.484907 2.484907 189 +mondai 1 77 2.564949 2.564949 206 +name 1 72 2.639057 2.639057 220 +thursdai 1 70 2.708050 2.708050 241 +copi 1 63 2.772589 2.772589 284 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +keep 1 44 3.135494 3.135494 409 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +mind 1 27 3.637586 3.637586 632 +stop 1 17 4.110874 4.110874 942 +choos 1 16 4.174387 4.174387 964 +doit 1 14 4.317488 4.317488 1111 +menu 1 13 4.382027 4.382027 1156 +bodner 1 5 5.347108 5.347108 2401 +eduher 1 3 5.857933 5.857933 3499 +jonb 2 2 6.263398 12.526796 4771 +infoc 1 2 6.263398 6.263398 4772 +mound 1 2 6.263398 6.263398 4773 +bodnersect 1 1 6.957497 6.957497 9045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html new file mode 100644 index 00000000..e54f983f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~kunen^cs540.html @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 10 374 0.693147 6.931470 7 +inform 4 412 0.693147 2.772588 8 +cours 5 273 1.098612 5.493060 15 +time 3 293 1.098612 3.295836 17 +us 3 329 1.098612 3.295836 16 +last 2 314 1.098612 2.197224 14 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +languag 2 227 1.386294 2.772588 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +avail 4 169 1.791759 7.167036 48 +hour 1 165 1.791759 1.791759 46 +network 1 168 1.791759 1.791759 61 +assign 3 135 1.945910 5.837730 66 +click 3 142 1.945910 5.837730 78 +problem 2 147 1.945910 3.891820 75 +like 1 132 1.945910 1.945910 81 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +introduct 3 126 2.079442 6.238326 87 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +topic 3 114 2.197225 6.591675 110 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +code 1 108 2.197225 2.197225 116 +teach 1 108 2.197225 2.197225 112 +book 2 99 2.302585 4.605170 131 +section 4 94 2.397895 9.591580 149 +search 2 95 2.397895 4.795790 155 +sinc 2 90 2.397895 4.795790 159 +grade 1 90 2.397895 2.397895 142 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +question 1 91 2.397895 2.397895 141 +exam 9 86 2.484907 22.364163 169 +learn 1 86 2.484907 2.484907 170 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +refer 2 78 2.564949 5.129898 203 +appear 1 78 2.564949 2.564949 210 +line 5 75 2.639057 13.195285 231 +intellig 2 72 2.639057 5.278114 225 +logic 2 71 2.639057 5.278114 230 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +artifici 2 63 2.772589 5.545178 280 +previou 2 62 2.772589 5.545178 290 +plan 1 65 2.772589 2.772589 272 +plai 1 60 2.833213 2.833213 307 +best 1 59 2.833213 2.833213 299 +detail 1 57 2.890372 2.890372 321 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +cover 1 55 2.944439 2.944439 329 +appoint 1 49 3.044522 3.044522 358 +approach 1 48 3.044522 3.044522 366 +give 1 50 3.044522 3.044522 359 +still 1 50 3.044522 3.044522 362 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +directori 4 45 3.135494 12.541976 396 +discuss 2 45 3.135494 6.270988 399 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +textbook 1 44 3.135494 3.135494 397 +answer 1 45 3.135494 3.135494 391 +edit 1 42 3.218876 3.218876 418 +review 1 42 3.218876 3.218876 425 +late 2 40 3.258097 6.516194 439 +probabl 2 40 3.258097 6.516194 455 +game 1 36 3.367296 3.367296 498 +eduoffic 1 33 3.433987 3.433987 531 +within 1 33 3.433987 3.433987 525 +common 5 30 3.555348 17.776740 574 +neural 1 30 3.555348 3.555348 578 +turn 1 29 3.583519 3.583519 586 +usual 1 28 3.610918 3.610918 608 +session 1 26 3.688879 3.688879 643 +notic 1 25 3.737670 3.737670 675 +begin 1 23 3.806662 3.806662 716 +entir 1 20 3.951244 3.951244 811 +minut 1 20 3.951244 3.951244 810 +lisp 10 18 4.060443 40.604430 897 +four 1 18 4.060443 4.060443 905 +attend 1 18 4.060443 4.060443 893 +intro 1 17 4.110874 4.110874 915 +stat 1 17 4.110874 4.110874 924 +ultim 1 17 4.110874 4.110874 943 +modern 1 16 4.174387 4.174387 966 +later 1 15 4.248495 4.248495 1043 +prolog 1 13 4.382027 4.382027 1155 +count 2 12 4.465908 8.931816 1239 +deduct 1 12 4.465908 4.465908 1236 +alpha 1 11 4.553877 4.553877 1348 +engr 1 10 4.653960 4.653960 1427 +hint 1 10 4.653960 4.653960 1419 +recit 2 9 4.753590 9.507180 1475 +russel 1 9 4.753590 4.753590 1507 +beta 1 6 5.164786 5.164786 1993 +supplement 1 5 5.347108 5.347108 2355 +older 1 5 5.347108 5.347108 2387 +midnight 1 4 5.568345 5.568345 2599 +uncertain 1 4 5.568345 5.568345 2758 +graham 1 4 5.568345 5.568345 2817 +steel 1 4 5.568345 5.568345 2818 +kunen 4 3 5.857933 23.431732 3500 +ansi 1 3 5.857933 5.857933 3198 +psych 1 3 5.857933 5.857933 3498 +coursewil 2 2 6.263398 12.526796 4718 +loos 1 2 6.263398 6.263398 4774 +buti 1 2 6.263398 6.263398 4775 +assignmenti 1 2 6.263398 6.263398 4573 +sun 1 2 6.263398 6.263398 4490 +kunenoffic 1 1 6.957497 6.957497 9046 +buildingtelephon 1 1 6.957497 6.957497 9047 +thirdexam 1 1 6.957497 6.957497 9048 +thedai 1 1 6.957497 6.957497 9049 +usefulto 1 1 6.957497 6.957497 9050 +lecturesand 1 1 6.957497 6.957497 9051 +manypaperback 1 1 6.957497 6.957497 9052 +lispcraft 1 1 6.957497 6.957497 9053 +wilenski 1 1 6.957497 6.957497 9054 +norvig 1 1 6.957497 6.957497 9055 +essentiallli 1 1 6.957497 6.957497 9056 +alpha_beta 1 1 6.957497 6.957497 9057 +astar 1 1 6.957497 6.957497 9058 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html new file mode 100644 index 00000000..24b37750 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~markhill^cs752^fall94-95^cs752.html @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +inform 2 412 0.693147 1.386294 8 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +paper 4 205 1.609438 6.437752 38 +class 2 199 1.609438 3.218876 37 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +parallel 3 169 1.791759 5.375277 60 +hour 2 165 1.791759 3.583518 46 +wisconsin 1 169 1.791759 1.791759 54 +assign 7 135 1.945910 13.621370 66 +architectur 5 139 1.945910 9.729550 77 +note 3 142 1.945910 5.837730 67 +lectur 2 135 1.945910 3.891820 73 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +spring 3 131 2.079442 6.238326 88 +introduct 1 126 2.079442 2.079442 87 +report 1 131 2.079442 2.079442 92 +instructor 1 108 2.197225 2.197225 107 +final 1 116 2.197225 2.197225 108 +world 1 115 2.197225 2.197225 126 +part 6 98 2.302585 13.815510 129 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +question 1 91 2.397895 2.397895 141 +solut 4 82 2.484907 9.939628 162 +level 2 87 2.484907 4.969814 180 +novemb 1 81 2.484907 2.484907 179 +exam 1 86 2.484907 2.484907 169 +wide 1 84 2.484907 2.484907 185 +homework 7 79 2.564949 17.954643 193 +decemb 2 80 2.564949 5.129898 215 +mondai 1 77 2.564949 2.564949 206 +sourc 1 77 2.564949 2.564949 201 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +content 5 59 2.833213 14.166065 302 +instruct 3 53 2.944439 8.833317 332 +talk 2 53 2.944439 5.888878 336 +tabl 5 51 2.995732 14.978660 346 +appoint 2 49 3.044522 6.089044 358 +give 2 50 3.044522 6.089044 359 +basic 2 50 3.044522 6.089044 360 +set 1 50 3.044522 3.044522 361 +mark 1 44 3.135494 3.135494 403 +fridai 1 44 3.135494 3.135494 390 +midterm 1 45 3.135494 3.135494 392 +offer 1 43 3.178054 3.178054 414 +cach 1 41 3.218876 3.218876 432 +edit 1 42 3.218876 3.218876 418 +seminar 1 38 3.295837 3.295837 470 +cost 1 37 3.332205 3.332205 480 +eduoffic 2 33 3.433987 6.867974 531 +chapter 12 32 3.465736 41.588832 536 +hard 1 30 3.555348 3.555348 563 +full 4 28 3.610918 14.443672 615 +propos 1 28 3.610918 3.610918 602 +comp 2 26 3.688879 7.377758 650 +hill 1 25 3.737670 3.737670 670 +input 1 23 3.806662 3.806662 727 +output 1 21 3.912023 3.912023 788 +interconnect 1 17 4.110874 4.110874 937 +doit 4 14 4.317488 17.269952 1111 +reader 5 12 4.465908 22.329540 1246 +patterson 1 9 4.753590 4.753590 1554 +qualifi 1 8 4.875197 4.875197 1721 +pipelin 2 7 5.010635 10.021270 1830 +hennessi 1 5 5.347108 5.347108 2289 +markhil 1 4 5.568345 5.568345 2819 +talluri 1 4 5.568345 5.568345 2820 +ifal 1 2 6.263398 6.263398 4776 +statphon 1 2 6.263398 6.263398 4726 +hilloffic 1 1 6.957497 6.957497 9059 +statemail 1 1 6.957497 6.957497 9060 +shenoffic 1 1 6.957497 6.957497 9061 +mshen 1 1 6.957497 6.957497 9062 +miscellaneawhat 1 1 6.957497 6.957497 9063 +talksread 1 1 6.957497 6.957497 9064 +solutionproject 1 1 6.957497 6.957497 9065 +noonmiscellanea 1 1 6.957497 6.957497 9066 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ new file mode 100644 index 00000000..73c4920e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs110^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +us 6 329 1.098612 6.591672 16 +offic 4 299 1.098612 4.394448 13 +time 1 293 1.098612 1.098612 17 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +class 5 199 1.609438 8.047190 37 +oper 2 180 1.609438 3.218876 34 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +assign 2 135 1.945910 3.891820 66 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +introduct 2 126 2.079442 4.158884 87 +number 1 130 2.079442 2.079442 97 +instructor 1 108 2.197225 2.197225 107 +structur 1 106 2.197225 2.197225 105 +text 1 98 2.302585 2.302585 133 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +academ 1 82 2.484907 2.484907 178 +exampl 5 77 2.564949 12.824745 195 +dynam 2 76 2.564949 5.129898 194 +solv 1 73 2.639057 2.639057 234 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +complex 2 64 2.772589 5.545178 269 +polici 1 64 2.772589 2.772589 279 +room 1 59 2.833213 2.833213 301 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +appoint 1 49 3.044522 3.044522 358 +standard 1 48 3.044522 3.044522 365 +anoth 1 45 3.135494 3.135494 408 +late 1 40 3.258097 3.258097 439 +tutori 1 39 3.258097 3.258097 437 +microsoft 1 38 3.295837 3.295837 468 +procedur 1 36 3.367296 3.367296 488 +michael 1 35 3.401197 3.401197 514 +administr 1 27 3.637586 3.637586 628 +comp 2 26 3.688879 7.377758 650 +repres 1 26 3.688879 3.688879 656 +consult 1 24 3.761200 3.761200 687 +size 1 23 3.806662 3.806662 713 +alloc 2 20 3.951244 7.902488 821 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +psycholog 1 15 4.248495 4.248495 1054 +borland 1 14 4.317488 4.317488 1067 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +string 1 11 4.553877 4.553877 1340 +cheat 1 10 4.653960 4.653960 1395 +float 1 9 4.753590 4.753590 1504 +debugg 1 9 4.753590 4.753590 1493 +overload 2 5 5.347108 10.694216 2403 +handin 1 5 5.347108 5.347108 2393 +ration 1 5 5.347108 5.347108 2427 +birk 1 4 5.568345 5.568345 2791 +struct 1 4 5.568345 5.568345 2821 +mbirk 2 3 5.857933 11.715866 3501 +intstack 1 2 6.263398 6.263398 4777 +unlimit 1 2 6.263398 6.263398 4778 +classinfo 1 2 6.263398 6.263398 4779 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ new file mode 100644 index 00000000..b1ece7f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mbirk^cs302^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +program 11 374 0.693147 7.624617 7 +inform 1 412 0.693147 0.693147 8 +us 6 329 1.098612 6.591672 16 +offic 4 299 1.098612 4.394448 13 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +class 7 199 1.609438 11.266066 37 +oper 2 180 1.609438 3.218876 34 +fall 1 181 1.609438 1.609438 40 +avail 2 169 1.791759 3.583518 48 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +data 1 170 1.791759 1.791759 49 +assign 3 135 1.945910 5.837730 66 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +number 2 130 2.079442 4.158884 97 +spring 2 131 2.079442 4.158884 88 +introduct 2 126 2.079442 4.158884 87 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +text 1 98 2.302585 2.302585 133 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +octob 1 89 2.397895 2.397895 156 +exam 2 86 2.484907 4.969814 169 +second 1 81 2.484907 2.484907 166 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +exampl 5 77 2.564949 12.824745 195 +dynam 2 76 2.564949 5.129898 194 +mondai 1 77 2.564949 2.564949 206 +come 1 78 2.564949 2.564949 202 +onlin 1 75 2.639057 2.639057 223 +meet 1 72 2.639057 2.639057 229 +solv 1 73 2.639057 2.639057 234 +tuesdai 1 73 2.639057 2.639057 219 +syllabu 2 67 2.708050 5.416100 247 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +window 1 68 2.708050 2.708050 242 +polici 2 64 2.772589 5.545178 279 +complex 2 64 2.772589 5.545178 269 +room 3 59 2.833213 8.499639 301 +simpl 1 60 2.833213 2.833213 298 +point 1 58 2.890372 2.890372 319 +instruct 2 53 2.944439 5.888878 332 +week 2 52 2.995732 5.991464 343 +case 1 51 2.995732 2.995732 351 +digit 1 52 2.995732 2.995732 348 +appoint 1 49 3.044522 3.044522 358 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +discuss 1 45 3.135494 3.135494 399 +anoth 1 45 3.135494 3.135494 408 +past 1 42 3.218876 3.218876 428 +late 2 40 3.258097 6.516194 439 +announc 1 40 3.258097 3.258097 441 +tutori 1 39 3.258097 3.258097 437 +microsoft 1 38 3.295837 3.295837 468 +soon 1 36 3.367296 3.367296 494 +procedur 1 36 3.367296 3.367296 488 +michael 1 35 3.401197 3.401197 514 +print 1 34 3.401197 3.401197 503 +administr 1 27 3.637586 3.637586 628 +comp 5 26 3.688879 18.444395 650 +repres 1 26 3.688879 3.688879 656 +consult 1 24 3.761200 3.761200 687 +size 1 23 3.806662 3.806662 713 +output 1 21 3.912023 3.912023 788 +alloc 2 20 3.951244 7.902488 821 +four 1 18 4.060443 4.060443 905 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +rank 1 14 4.317488 4.317488 1063 +borland 1 14 4.317488 4.317488 1067 +outsid 1 12 4.465908 4.465908 1219 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +eight 2 11 4.553877 9.107754 1331 +string 1 11 4.553877 4.553877 1340 +cheat 1 10 4.653960 4.653960 1395 +float 1 9 4.753590 4.753590 1504 +debugg 1 9 4.753590 4.753590 1493 +overload 2 5 5.347108 10.694216 2403 +handin 1 5 5.347108 5.347108 2393 +ration 1 5 5.347108 5.347108 2427 +birk 1 4 5.568345 5.568345 2791 +struct 1 4 5.568345 5.568345 2821 +mbirk 2 3 5.857933 11.715866 3501 +intstack 1 2 6.263398 6.263398 4777 +unlimit 1 2 6.263398 6.263398 4778 +classinfo 1 2 6.263398 6.263398 4779 +dice 1 1 6.957497 6.957497 9067 +hangman 1 1 6.957497 6.957497 9068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html new file mode 100644 index 00000000..29360908 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~melski^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +us 2 329 1.098612 2.197224 16 +last 1 314 1.098612 1.098612 14 +email 4 220 1.386294 5.545176 29 +link 2 247 1.386294 2.772588 24 +wisc 2 242 1.386294 2.772588 33 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +assign 2 135 1.945910 3.891820 66 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +code 1 108 2.197225 2.197225 116 +check 1 115 2.197225 2.197225 118 +text 1 98 2.302585 2.302585 133 +section 7 94 2.397895 16.785265 149 +grade 1 90 2.397895 2.397895 142 +info 3 85 2.484907 7.454721 176 +stuff 2 87 2.484907 4.969814 171 +help 2 83 2.484907 4.969814 175 +chang 1 82 2.484907 2.484907 163 +start 1 83 2.484907 2.484907 173 +academ 1 82 2.484907 2.484907 178 +solut 1 82 2.484907 2.484907 162 +refer 1 78 2.564949 2.564949 203 +david 2 71 2.639057 5.278114 232 +meet 2 72 2.639057 5.278114 229 +solv 1 73 2.639057 2.639057 234 +materi 1 75 2.639057 2.639057 221 +window 1 68 2.708050 2.708050 242 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +handout 1 64 2.772589 2.772589 263 +share 1 59 2.833213 2.833213 304 +archiv 2 49 3.044522 6.089044 364 +frequent 1 49 3.044522 3.044522 367 +get 1 46 3.091042 3.091042 380 +press 1 42 3.218876 3.218876 419 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +statist 1 35 3.401197 3.401197 521 +often 1 31 3.496508 3.496508 551 +rule 1 26 3.688879 3.688879 638 +daili 1 24 3.761200 3.761200 706 +consult 1 24 3.761200 3.761200 687 +walter 1 17 4.110874 4.110874 950 +alreadi 1 16 4.174387 4.174387 963 +misconduct 1 16 4.174387 4.174387 1003 +psycholog 1 15 4.248495 4.248495 1054 +dave 1 14 4.317488 4.317488 1098 +floor 1 14 4.317488 4.317488 1070 +essenti 1 13 4.382027 4.382027 1137 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +andth 1 9 4.753590 4.753590 1481 +tutor 1 9 4.753590 4.753590 1552 +reload 1 8 4.875197 4.875197 1682 +attach 1 7 5.010635 5.010635 1785 +rough 1 6 5.164786 5.164786 2107 +button 1 5 5.347108 5.347108 2337 +noland 1 5 5.347108 5.347108 2420 +thumb 1 4 5.568345 5.568345 2816 +melski 4 2 6.263398 25.053592 4780 +melskithes 1 1 6.957497 6.957497 9069 +uncomfort 1 1 6.957497 6.957497 9070 +wesleypublish 1 1 6.957497 6.957497 9071 +usingborland 1 1 6.957497 6.957497 9072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html new file mode 100644 index 00000000..274a3b91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~milo^cs302.html @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +home 5 672 0.000000 0.000000 1 +program 14 374 0.693147 9.704058 7 +inform 2 412 0.693147 1.386294 8 +us 3 329 1.098612 3.295836 16 +time 2 293 1.098612 2.197224 17 +offic 2 299 1.098612 2.197224 13 +current 2 284 1.098612 2.197224 21 +project 1 340 1.098612 1.098612 18 +languag 3 227 1.386294 4.158882 26 +wisc 3 242 1.386294 4.158882 33 +gener 2 220 1.386294 2.772588 27 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +class 8 199 1.609438 12.875504 37 +fall 2 181 1.609438 3.218876 40 +list 2 201 1.609438 3.218876 39 +updat 1 191 1.609438 1.609438 41 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +file 3 132 1.945910 5.837730 70 +assign 1 135 1.945910 1.945910 66 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +document 3 121 2.079442 6.238326 89 +postscript 3 131 2.079442 6.238326 90 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +code 7 108 2.197225 15.380575 116 +check 2 115 2.197225 4.394450 118 +version 2 113 2.197225 4.394450 122 +instructor 1 108 2.197225 2.197225 107 +place 1 106 2.197225 2.197225 124 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +take 3 97 2.302585 6.907755 134 +user 2 104 2.302585 4.605170 137 +octob 9 89 2.397895 21.581055 156 +section 3 94 2.397895 7.193685 149 +call 3 91 2.397895 7.193685 153 +homepag 2 93 2.397895 4.795790 148 +grade 2 90 2.397895 4.795790 142 +novemb 10 81 2.484907 24.849070 179 +solut 7 82 2.484907 17.394349 162 +exam 5 86 2.484907 12.424535 169 +chang 1 82 2.484907 2.484907 163 +academ 1 82 2.484907 2.484907 178 +requir 1 81 2.484907 2.484907 167 +mondai 6 77 2.564949 15.389694 206 +refer 4 78 2.564949 10.259796 203 +exampl 2 77 2.564949 5.129898 195 +decemb 2 80 2.564949 5.129898 215 +good 1 77 2.564949 2.564949 200 +html 5 75 2.639057 13.195285 235 +tuesdai 2 73 2.639057 5.278114 219 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +test 1 66 2.708050 2.708050 252 +syllabu 1 67 2.708050 2.708050 247 +main 1 67 2.708050 2.708050 256 +wednesdai 10 64 2.772589 27.725890 261 +septemb 5 65 2.772589 13.862945 274 +guid 4 63 2.772589 11.090356 267 +function 3 62 2.772589 8.317767 275 +creat 1 63 2.772589 2.772589 277 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +publish 1 57 2.890372 2.890372 326 +case 1 51 2.995732 2.995732 351 +standard 2 48 3.044522 6.089044 365 +appoint 1 49 3.044522 3.044522 358 +frequent 1 49 3.044522 3.044522 367 +fridai 2 44 3.135494 6.270988 390 +algebra 1 45 3.135494 3.135494 394 +textbook 1 44 3.135494 3.135494 397 +compani 1 41 3.218876 3.218876 423 +form 1 39 3.258097 3.258097 443 +respons 1 37 3.332205 3.332205 476 +survei 1 35 3.401197 3.401197 513 +eduoffic 1 33 3.433987 3.433987 531 +ad 2 32 3.465736 6.931472 544 +given 1 32 3.465736 3.465736 538 +often 1 31 3.496508 3.496508 551 +titl 1 31 3.496508 3.496508 556 +specifi 1 30 3.555348 3.555348 568 +turn 1 29 3.583519 3.583519 586 +releas 1 28 3.610918 3.610918 616 +todai 1 25 3.737670 3.737670 672 +valu 1 25 3.737670 3.737670 665 +consult 2 24 3.761200 7.522400 687 +known 1 24 3.761200 3.761200 702 +input 2 23 3.806662 7.613324 727 +martin 3 21 3.912023 11.736069 794 +latest 1 21 3.912023 3.912023 785 +sure 1 20 3.951244 3.951244 813 +coupl 1 17 4.110874 4.110874 939 +walter 1 17 4.110874 4.110874 950 +quiz 9 16 4.174387 37.569483 990 +sheet 1 16 4.174387 4.174387 973 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +score 7 15 4.248495 29.739465 1017 +style 1 15 4.248495 4.248495 1036 +introduc 1 13 4.382027 4.382027 1139 +onth 1 12 4.465908 4.465908 1218 +vectra 1 12 4.465908 4.465908 1267 +errata 1 10 4.653960 4.653960 1403 +minimum 1 9 4.753590 4.753590 1555 +remind 1 7 5.010635 5.010635 1799 +ethic 1 7 5.010635 5.010635 1786 +savitchaddison 1 5 5.347108 5.347108 2396 +struct 1 4 5.568345 5.568345 2821 +maximum 1 4 5.568345 5.568345 2632 +toth 1 4 5.568345 5.568345 2595 +beginn 1 3 5.857933 5.857933 3330 +milo 6 2 6.263398 37.580388 4781 +viru 1 2 6.263398 6.263398 4782 +bankaccount 7 1 6.957497 48.702479 9073 +withprompt 2 1 6.957497 13.914994 9074 +psychologyinstructor 1 1 6.957497 6.957497 9075 +announcementsthi 1 1 6.957497 6.957497 9076 +scheduledfor 1 1 6.957497 6.957497 9077 +haseveryth 1 1 6.957497 6.957497 9078 +isaccur 1 1 6.957497 6.957497 9079 +withinform 1 1 6.957497 6.957497 9080 +linksar 1 1 6.957497 6.957497 9081 +onfridai 1 1 6.957497 6.957497 9082 +policyclass 1 1 6.957497 6.957497 9083 +minmax 1 1 6.957497 6.957497 9084 +findth 1 1 6.957497 6.957497 9085 +enteredfrom 1 1 6.957497 6.957497 9086 +stdin 1 1 6.957497 6.957497 9087 +formlett 1 1 6.957497 6.957497 9088 +theopen_fil 1 1 6.957497 6.957497 9089 +hoax 1 1 6.957497 6.957497 9090 +andprofession 1 1 6.957497 6.957497 9091 +conductassign 1 1 6.957497 6.957497 9092 +questionar 1 1 6.957497 6.957497 9093 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html new file mode 100644 index 00000000..c8753ce5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~mreames^cs302.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +program 11 374 0.693147 7.624617 7 +inform 3 412 0.693147 2.079441 8 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +class 3 199 1.609438 4.828314 37 +list 2 201 1.609438 3.218876 39 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +code 1 108 2.197225 2.197225 116 +instructor 1 108 2.197225 2.197225 107 +section 6 94 2.397895 14.387370 149 +grade 1 90 2.397895 2.397895 142 +exam 3 86 2.484907 7.454721 169 +contain 1 81 2.484907 2.484907 174 +april 4 77 2.564949 10.259796 196 +messag 1 76 2.564949 2.564949 212 +tuesdai 7 73 2.639057 18.473399 219 +solv 1 73 2.639057 2.639057 234 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +thursdai 6 70 2.708050 16.248300 241 +window 1 68 2.708050 2.708050 242 +syllabu 1 67 2.708050 2.708050 247 +januari 1 62 2.772589 2.772589 264 +polici 1 64 2.772589 2.772589 279 +march 3 61 2.833213 8.499639 295 +share 1 59 2.833213 2.833213 304 +semest 2 58 2.890372 5.780744 312 +detail 1 57 2.890372 2.890372 321 +februari 3 54 2.944439 8.833317 328 +talk 1 53 2.944439 2.944439 336 +run 1 51 2.995732 2.995732 347 +appoint 1 49 3.044522 3.044522 358 +algebra 1 45 3.135494 3.135494 394 +made 1 44 3.135494 3.135494 398 +late 1 40 3.258097 3.258097 439 +statist 1 35 3.401197 3.401197 521 +rule 1 26 3.688879 3.688879 638 +sent 1 22 3.850148 3.850148 763 +tent 1 22 3.850148 3.850148 739 +martin 3 21 3.912023 11.736069 794 +walter 1 17 4.110874 4.110874 950 +former 1 17 4.110874 4.110874 956 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +extra 1 11 4.553877 4.553877 1312 +criteria 1 9 4.753590 4.753590 1477 +absolut 1 8 4.875197 4.875197 1646 +calendar 1 8 4.875197 4.875197 1649 +noland 2 5 5.347108 10.694216 2420 +informationc 1 5 5.347108 5.347108 2394 +thumb 1 4 5.568345 5.568345 2816 +textbookproblem 1 3 5.857933 5.857933 3483 +ream 3 2 6.263398 18.790194 4783 +mream 2 2 6.263398 12.526796 4784 +csst 1 2 6.263398 6.263398 4764 +classc 1 1 6.957497 6.957497 9094 +programmingspr 1 1 6.957497 6.957497 9095 +nothingeverydai 1 1 6.957497 6.957497 9096 +pagescommon 1 1 6.957497 6.957497 9097 +programmingmistakesarch 1 1 6.957497 6.957497 9098 +placeto 1 1 6.957497 6.957497 9099 +announcedcours 1 1 6.957497 6.957497 9100 +andborland 1 1 6.957497 6.957497 9101 +academicmisconduct 1 1 6.957497 6.957497 9102 +anyform 1 1 6.957497 6.957497 9103 +bigtodd 1 1 6.957497 6.957497 9104 +thielwendi 1 1 6.957497 6.957497 9105 +staatsabout 1 1 6.957497 6.957497 9106 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html new file mode 100644 index 00000000..653647f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~msteele^cs302.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 6 374 0.693147 4.158882 7 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +wisc 2 242 1.386294 2.772588 33 +mail 2 238 1.386294 2.772588 22 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +fall 2 181 1.609438 3.218876 40 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +read 2 154 1.791759 3.583518 47 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +note 2 142 1.945910 3.891820 67 +object 1 138 1.945910 1.945910 79 +check 2 115 2.197225 4.394450 118 +pleas 1 113 2.197225 2.197225 114 +find 1 111 2.197225 2.197225 111 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +novemb 2 81 2.484907 4.969814 179 +stuff 1 87 2.484907 2.484907 171 +academ 1 82 2.484907 2.484907 178 +exampl 3 77 2.564949 7.694847 195 +refer 1 78 2.564949 2.564949 203 +tuesdai 2 73 2.639057 5.278114 219 +line 1 75 2.639057 2.639057 231 +solv 1 73 2.639057 2.639057 234 +syllabu 1 67 2.708050 2.708050 247 +polici 2 64 2.772589 5.545178 279 +import 1 65 2.772589 2.772589 282 +function 1 62 2.772589 2.772589 275 +publish 1 57 2.890372 2.890372 326 +sampl 1 53 2.944439 2.944439 339 +week 1 52 2.995732 2.995732 343 +appoint 1 49 3.044522 3.044522 358 +understand 1 47 3.091042 3.091042 384 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +compani 1 41 3.218876 3.218876 423 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +hand 1 37 3.332205 3.332205 475 +eduoffic 1 33 3.433987 3.433987 531 +extend 1 32 3.465736 3.465736 539 +taken 1 31 3.496508 3.496508 555 +comp 2 26 3.688879 7.377758 650 +mike 2 24 3.761200 7.522400 703 +greg 1 24 3.761200 3.761200 695 +known 1 24 3.761200 3.761200 702 +miss 1 19 4.007333 4.007333 866 +stat 2 17 4.110874 8.221748 924 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +weslei 1 16 4.174387 4.174387 983 +near 1 14 4.317488 4.317488 1091 +borland 1 14 4.317488 4.317488 1067 +everyth 1 13 4.382027 4.382027 1169 +rememb 1 12 4.465908 4.465908 1217 +vectra 1 12 4.465908 4.465908 1267 +fill 1 11 4.553877 4.553877 1349 +deadlin 2 9 4.753590 9.507180 1502 +didn 1 9 4.753590 4.753590 1563 +informationemail 1 9 4.753590 4.753590 1564 +bottom 1 7 5.010635 5.010635 1906 +ifyou 1 6 5.164786 5.164786 1992 +sharp 1 6 5.164786 5.164786 2100 +clarif 1 5 5.347108 5.347108 2253 +informationc 1 5 5.347108 5.347108 2394 +pagecours 1 5 5.347108 5.347108 2395 +savitchaddison 1 5 5.347108 5.347108 2396 +labc 1 4 5.568345 5.568345 2787 +policygrad 1 4 5.568345 5.568345 2788 +erratalast 1 4 5.568345 5.568345 2789 +steel 1 4 5.568345 5.568345 2818 +programmingassign 1 3 5.857933 5.857933 3398 +homeclass 1 3 5.857933 5.857933 3411 +nolandinstructor 1 2 6.263398 6.263398 4785 +msteel 2 1 6.957497 13.914994 9107 +steeleemail 1 1 6.957497 6.957497 9108 +buildingoffic 1 1 6.957497 6.957497 9109 +soffic 1 1 6.957497 6.957497 9110 +announcementsi 1 1 6.957497 6.957497 9111 +thenew 1 1 6.957497 6.957497 9112 +informationmidterm 1 1 6.957497 6.957497 9113 +pastfew 1 1 6.957497 6.957497 9114 +gloss 1 1 6.957497 6.957497 9115 +makefulli 1 1 6.957497 6.957497 9116 +objectivesabout 1 1 6.957497 6.957497 9117 +consultantscours 1 1 6.957497 6.957497 9118 +assignmentsnot 1 1 6.957497 6.957497 9119 +handoutsprogram 1 1 6.957497 6.957497 9120 +assignmentsexam 1 1 6.957497 6.957497 9121 +quizzessom 1 1 6.957497 6.957497 9122 +examplespolici 1 1 6.957497 6.957497 9123 +policyus 1 1 6.957497 6.957497 9124 +pagesintroduct 1 1 6.957497 6.957497 9125 +windowsintroduct 1 1 6.957497 6.957497 9126 +styleguid 1 1 6.957497 6.957497 9127 +codetextproblem 1 1 6.957497 6.957497 9128 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html new file mode 100644 index 00000000..fc4ca681 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~prock^cs302.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 4 299 1.098612 4.394448 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +wisc 3 242 1.386294 4.158882 33 +email 3 220 1.386294 4.158882 29 +link 2 247 1.386294 2.772588 24 +gener 2 220 1.386294 2.772588 27 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +fall 2 181 1.609438 3.218876 40 +class 2 199 1.609438 3.218876 37 +modifi 1 178 1.609438 1.609438 35 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +read 1 154 1.791759 1.791759 47 +assign 5 135 1.945910 9.729550 66 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +introduct 2 126 2.079442 4.158884 87 +welcom 1 122 2.079442 2.079442 99 +check 3 115 2.197225 6.591675 118 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +pleas 1 113 2.197225 2.197225 114 +topic 1 114 2.197225 2.197225 110 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +text 2 98 2.302585 4.605170 133 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +grade 6 90 2.397895 14.387370 142 +section 6 94 2.397895 14.387370 149 +question 2 91 2.397895 4.795790 141 +mani 1 92 2.397895 2.397895 150 +info 2 85 2.484907 4.969814 176 +exam 1 86 2.484907 2.484907 169 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +second 1 81 2.484907 2.484907 166 +good 1 77 2.564949 2.564949 200 +decemb 1 80 2.564949 2.564949 215 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +onlin 4 75 2.639057 10.556228 223 +materi 2 75 2.639057 5.278114 221 +tuesdai 1 73 2.639057 2.639057 219 +solv 1 73 2.639057 2.639057 234 +test 4 66 2.708050 10.832200 252 +knowledg 1 67 2.708050 2.708050 243 +syllabu 1 67 2.708050 2.708050 247 +window 1 68 2.708050 2.708050 242 +polici 5 64 2.772589 13.862945 279 +copi 1 63 2.772589 2.772589 284 +result 1 65 2.772589 2.772589 281 +import 1 65 2.772589 2.772589 282 +guid 1 63 2.772589 2.772589 267 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +think 1 57 2.890372 2.890372 314 +sampl 1 53 2.944439 2.944439 339 +archiv 2 49 3.044522 6.089044 364 +give 1 50 3.044522 3.044522 359 +done 1 47 3.091042 3.091042 381 +made 1 44 3.135494 3.135494 398 +anoth 1 45 3.135494 3.135494 408 +mark 1 44 3.135494 3.135494 403 +answer 1 45 3.135494 3.135494 391 +error 1 40 3.258097 3.258097 449 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +feel 1 37 3.332205 3.332205 483 +statist 3 35 3.401197 10.203591 521 +eduoffic 2 33 3.433987 6.867974 531 +ad 1 32 3.465736 3.465736 544 +idea 1 32 3.465736 3.465736 545 +rang 1 30 3.555348 3.555348 565 +administr 1 27 3.637586 3.637586 628 +session 1 26 3.688879 3.688879 643 +todai 2 25 3.737670 7.475340 672 +notic 1 25 3.737670 3.737670 675 +consult 2 24 3.761200 7.522400 687 +tent 1 22 3.850148 3.850148 739 +sure 1 20 3.951244 3.951244 813 +andrew 1 19 4.007333 4.007333 849 +five 1 19 4.007333 4.007333 841 +seem 1 18 4.060443 4.060443 899 +modif 1 17 4.110874 4.110874 913 +walter 1 17 4.110874 4.110874 950 +quiz 2 16 4.174387 8.348774 990 +alreadi 1 16 4.174387 4.174387 963 +score 1 15 4.248495 4.248495 1017 +style 1 15 4.248495 4.248495 1036 +borland 1 14 4.317488 4.317488 1067 +everyon 1 13 4.382027 4.382027 1148 +verifi 1 12 4.465908 4.465908 1261 +minor 1 12 4.465908 4.465908 1237 +rememb 1 12 4.465908 4.465908 1217 +savitch 1 12 4.465908 4.465908 1269 +vectra 1 12 4.465908 4.465908 1267 +extra 1 11 4.553877 4.553877 1312 +calendar 1 8 4.875197 4.875197 1649 +carefulli 1 6 5.164786 5.164786 2045 +ahead 1 5 5.347108 5.347108 2338 +crazi 2 4 5.568345 11.136690 2822 +webpag 1 4 5.568345 5.568345 2660 +prock 2 2 6.263398 12.526796 4786 +infoc 1 2 6.263398 6.263398 4772 +sessionalgebra 1 1 6.957497 6.957497 9129 +prockemail 1 1 6.957497 6.957497 9130 +thgrader 1 1 6.957497 6.957497 9131 +haihong 1 1 6.957497 6.957497 9132 +wangemail 1 1 6.957497 6.957497 9133 +mtwrannounc 1 1 6.957497 6.957497 9134 +gotton 1 1 6.957497 6.957497 9135 +perus 1 1 6.957497 6.957497 9136 +assing 1 1 6.957497 6.957497 9137 +misconductcours 1 1 6.957497 6.957497 9138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html new file mode 100644 index 00000000..5f809fc4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~ratliff^132.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +softwar 3 220 1.386294 4.158882 30 +mail 2 238 1.386294 2.772588 22 +link 2 247 1.386294 2.772588 24 +also 1 259 1.386294 1.386294 28 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +site 3 106 2.197225 6.591675 119 +world 2 115 2.197225 4.394450 126 +need 2 98 2.302585 4.605170 135 +grade 1 90 2.397895 2.397895 142 +search 1 95 2.397895 2.397895 155 +info 9 85 2.484907 22.364163 176 +wide 1 84 2.484907 2.484907 185 +stuff 1 87 2.484907 2.484907 171 +resourc 1 81 2.484907 2.484907 172 +internet 1 83 2.484907 2.484907 186 +messag 2 76 2.564949 5.129898 212 +exampl 1 77 2.564949 2.564949 195 +addit 1 74 2.639057 2.639057 228 +function 2 62 2.772589 5.545178 275 +copi 1 63 2.772589 2.772589 284 +virtual 1 62 2.772589 2.772589 285 +visit 1 63 2.772589 2.772589 288 +type 1 61 2.833213 2.833213 296 +best 1 59 2.833213 2.833213 299 +semest 1 58 2.890372 2.890372 312 +index 1 56 2.890372 2.890372 309 +space 1 57 2.890372 2.890372 310 +maintain 1 51 2.995732 2.995732 342 +archiv 3 49 3.044522 9.133566 364 +without 1 50 3.044522 3.044522 370 +might 1 41 3.218876 3.218876 426 +everi 1 34 3.401197 3.401197 519 +word 1 34 3.401197 3.401197 508 +post 1 35 3.401197 3.401197 505 +quot 1 29 3.583519 3.583519 582 +usual 1 28 3.610918 3.610918 608 +compress 1 23 3.806662 3.806662 719 +sent 1 22 3.850148 3.850148 763 +disk 1 22 3.850148 3.850148 747 +try 1 22 3.850148 3.850148 764 +command 1 14 4.317488 4.317488 1083 +keyword 1 11 4.553877 4.553877 1356 +enter 1 10 4.653960 4.653960 1454 +paragraph 1 10 4.653960 4.653960 1449 +tourist 1 8 4.875197 4.875197 1710 +usenet 2 7 5.010635 10.021270 1839 +nine 1 6 5.164786 5.164786 2047 +ignor 1 5 5.347108 5.347108 2288 +clickher 1 5 5.347108 5.347108 2428 +kelli 2 4 5.568345 11.136690 2793 +backup 1 4 5.568345 5.568345 2645 +oracl 1 4 5.568345 5.568345 2823 +ratliff 1 3 5.857933 5.857933 3419 +badger 1 3 5.857933 5.857933 3502 +sharewar 1 3 5.857933 5.857933 3503 +freewar 1 3 5.857933 5.857933 3504 +shuttl 1 2 6.263398 6.263398 4787 +clickabl 1 2 6.263398 6.263398 4788 +herald 1 2 6.263398 6.263398 4789 +biggest 1 2 6.263398 6.263398 4790 +desautel 1 2 6.263398 6.263398 4791 +simtel 3 1 6.957497 20.872491 9139 +wildcard 1 1 6.957497 6.957497 9140 +filesviru 1 1 6.957497 6.957497 9141 +faqfun 1 1 6.957497 6.957497 9142 +mapth 1 1 6.957497 6.957497 9143 +comicshumor 1 1 6.957497 6.957497 9144 +abort 1 1 6.957497 6.957497 9145 +retri 1 1 6.957497 6.957497 9146 +usersfin 1 1 6.957497 6.957497 9147 +weeklab 1 1 6.957497 6.957497 9148 +jokesget 1 1 6.957497 6.957497 9149 +tryingsom 1 1 6.957497 6.957497 9150 +somecompress 1 1 6.957497 6.957497 9151 +unpack 1 1 6.957497 6.957497 9152 +reviewsom 1 1 6.957497 6.957497 9153 +minclud 1 1 6.957497 6.957497 9154 +infocompress 1 1 6.957497 6.957497 9155 +infofavorit 1 1 6.957497 6.957497 9156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html new file mode 100644 index 00000000..308ac038 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~rmanning^cs110.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 8 374 0.693147 5.545176 7 +inform 3 412 0.693147 2.079441 8 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +like 1 132 1.945910 1.945910 81 +lectur 1 135 1.945910 1.945910 73 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +assign 1 135 1.945910 1.945910 66 +introduct 3 126 2.079442 6.238326 87 +instructor 1 108 2.197225 2.197225 107 +find 1 111 2.197225 2.197225 111 +final 1 116 2.197225 2.197225 108 +grade 1 90 2.397895 2.397895 142 +section 1 94 2.397895 2.397895 149 +novemb 2 81 2.484907 4.969814 179 +build 1 85 2.484907 2.484907 184 +academ 1 82 2.484907 2.484907 178 +come 1 78 2.564949 2.564949 202 +mondai 1 77 2.564949 2.564949 206 +solv 1 73 2.639057 2.639057 234 +window 2 68 2.708050 5.416100 242 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 1 64 2.772589 2.772589 261 +handout 1 64 2.772589 2.772589 263 +polici 1 64 2.772589 2.772589 279 +import 1 65 2.772589 2.772589 282 +room 2 59 2.833213 5.666426 301 +semest 1 58 2.890372 2.890372 312 +keep 1 44 3.135494 3.135494 409 +textbook 1 44 3.135494 3.135494 397 +compani 1 41 3.218876 3.218876 423 +late 1 40 3.258097 3.258097 439 +microsoft 1 38 3.295837 3.295837 468 +game 1 36 3.367296 3.367296 498 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +except 1 28 3.610918 3.610918 607 +although 1 25 3.737670 3.737670 667 +walter 1 17 4.110874 4.110874 950 +misconduct 1 16 4.174387 4.174387 1003 +borland 1 14 4.317488 4.317488 1067 +quizz 1 13 4.382027 4.382027 1151 +savitch 2 12 4.465908 8.931816 1269 +vectra 1 12 4.465908 4.465908 1267 +sundai 1 10 4.653960 4.653960 1387 +russel 1 9 4.753590 4.753590 1507 +saturdai 1 7 5.010635 5.010635 1794 +footbal 1 7 5.010635 5.010635 1912 +prioriti 1 7 5.010635 5.010635 1792 +none 1 7 5.010635 5.010635 1811 +basement 1 4 5.568345 5.568345 2663 +man 1 3 5.857933 5.857933 3417 +csc 1 3 5.857933 5.857933 3183 +windowshint 1 3 5.857933 5.857933 3484 +compilersth 1 3 5.857933 5.857933 3485 +systememailmosaicnetscap 1 3 5.857933 5.857933 3486 +languageth 1 3 5.857933 5.857933 3487 +russ 1 1 6.957497 6.957497 9157 +manningemail 1 1 6.957497 6.957497 9158 +rman 1 1 6.957497 6.957497 9159 +scienceoffic 1 1 6.957497 6.957497 9160 +rotc 1 1 6.957497 6.957497 9161 +textold 1 1 6.957497 6.957497 9162 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html new file mode 100644 index 00000000..60e216d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs540-all.html @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +cours 3 273 1.098612 3.295836 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 8 242 1.386294 11.090352 33 +languag 2 227 1.386294 2.772588 26 +link 2 247 1.386294 2.772588 24 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +fall 4 181 1.609438 6.437752 40 +group 4 183 1.609438 6.437752 36 +includ 2 208 1.609438 3.218876 42 +modifi 1 178 1.609438 1.609438 35 +network 2 168 1.791759 3.583518 61 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +problem 2 147 1.945910 3.891820 75 +relat 2 139 1.945910 3.891820 68 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +spring 6 131 2.079442 12.476652 88 +introduct 2 126 2.079442 4.158884 87 +machin 2 129 2.079442 4.158884 95 +topic 1 114 2.197225 2.197225 110 +instructor 1 108 2.197225 2.197225 107 +techniqu 1 99 2.302585 2.302585 138 +search 3 95 2.397895 7.193685 155 +section 1 94 2.397895 2.397895 149 +learn 3 86 2.484907 7.454721 170 +academ 1 82 2.484907 2.484907 178 +exam 1 86 2.484907 2.484907 169 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +intellig 2 72 2.639057 5.278114 225 +solv 2 73 2.639057 5.278114 234 +logic 1 71 2.639057 2.639057 230 +knowledg 3 67 2.708050 8.124150 243 +artifici 2 63 2.772589 5.545178 280 +plan 2 65 2.772589 5.545178 272 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +best 1 59 2.833213 2.833213 299 +plai 1 60 2.833213 2.833213 307 +content 1 59 2.833213 2.833213 302 +semest 1 58 2.890372 2.890372 312 +variou 1 56 2.890372 2.890372 317 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +principl 1 48 3.044522 3.044522 357 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +offer 1 43 3.178054 3.178054 414 +vision 3 41 3.218876 9.656628 430 +autom 1 41 3.218876 3.218876 434 +seminar 1 38 3.295837 3.295837 470 +robot 3 36 3.367296 10.101888 497 +game 1 36 3.367296 3.367296 498 +represent 1 35 3.401197 3.401197 512 +semant 1 29 3.583519 3.583519 587 +rule 1 26 3.688879 3.688879 638 +frame 1 24 3.761200 3.761200 684 +motion 1 24 3.761200 3.761200 699 +expert 1 20 3.951244 3.951244 833 +mostli 1 19 4.007333 4.007333 869 +lisp 1 18 4.060443 4.060443 897 +biologi 1 15 4.248495 4.248495 1049 +chuck 1 14 4.317488 4.317488 1108 +prolog 1 13 4.382027 4.382027 1155 +deduct 2 12 4.465908 8.931816 1236 +readabl 1 12 4.465908 4.465908 1258 +alpha 1 11 4.553877 4.553877 1348 +wendt 1 10 4.653960 4.653960 1446 +dyer 1 9 4.753590 4.753590 1573 +qualifi 1 8 4.875197 4.875197 1721 +predic 1 7 5.010635 5.010635 1806 +jude 2 6 5.164786 10.329572 2123 +beta 1 6 5.164786 5.164786 1993 +extern 1 6 5.164786 5.164786 2105 +shavlik 3 5 5.347108 16.041324 2429 +connectionist 1 5 5.347108 5.347108 2430 +bryan 1 5 5.347108 5.347108 2421 +sabbat 1 4 5.568345 5.568345 2824 +kunen 1 3 5.857933 5.857933 3500 +thefal 1 1 6.957497 6.957497 9163 +assumedprerequisit 1 1 6.957497 6.957497 9164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html new file mode 100644 index 00000000..2c0891a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~shavlik^cs760.html @@ -0,0 +1,217 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +cours 5 273 1.098612 5.493060 15 +us 4 329 1.098612 4.394448 16 +offic 3 299 1.098612 3.295836 13 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 11 242 1.386294 15.249234 33 +link 4 247 1.386294 5.545176 24 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +class 3 199 1.609438 4.828314 37 +paper 3 205 1.609438 4.828314 38 +group 3 183 1.609438 4.828314 36 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +read 14 154 1.791759 25.084626 47 +network 7 168 1.791759 12.542313 61 +algorithm 3 162 1.791759 5.375277 57 +base 2 165 1.791759 3.583518 50 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +assign 17 135 1.945910 33.080470 66 +relat 3 139 1.945910 5.837730 68 +lectur 1 135 1.945910 1.945910 73 +spring 10 131 2.079442 20.794420 88 +postscript 8 131 2.079442 16.635536 90 +machin 5 129 2.079442 10.397210 95 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +databas 1 122 2.079442 2.079442 86 +theori 3 111 2.197225 6.591675 127 +instructor 1 108 2.197225 2.197225 107 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +question 2 91 2.397895 4.795790 141 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +learn 16 86 2.484907 39.758512 170 +journal 8 83 2.484907 19.879256 183 +librari 2 87 2.484907 4.969814 181 +resourc 2 81 2.484907 4.969814 172 +institut 2 84 2.484907 4.969814 187 +help 2 83 2.484907 4.969814 175 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +start 1 83 2.484907 2.484907 173 +exam 1 86 2.484907 2.484907 169 +stuff 1 87 2.484907 2.484907 171 +ieee 1 86 2.484907 2.484907 190 +homework 15 79 2.564949 38.474235 193 +april 10 77 2.564949 25.649490 196 +mondai 7 77 2.564949 17.954643 206 +server 3 76 2.564949 7.694847 204 +refer 2 78 2.564949 5.129898 203 +free 2 73 2.639057 5.278114 224 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +materi 1 75 2.639057 2.639057 221 +line 1 75 2.639057 2.639057 231 +write 1 72 2.639057 2.639057 222 +knowledg 3 67 2.708050 8.124150 243 +syllabu 1 67 2.708050 2.708050 247 +wednesdai 5 64 2.772589 13.862945 261 +januari 4 62 2.772589 11.090356 264 +polici 2 64 2.772589 5.545178 279 +artifici 1 63 2.772589 2.772589 280 +creat 1 63 2.772589 2.772589 277 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +march 5 61 2.833213 14.166065 295 +best 1 59 2.833213 2.833213 299 +content 1 59 2.833213 2.833213 302 +space 3 57 2.890372 8.671116 310 +semest 2 58 2.890372 5.780744 312 +overview 1 56 2.890372 2.890372 323 +sever 1 56 2.890372 2.890372 322 +februari 6 54 2.944439 17.666634 328 +local 2 55 2.944439 5.888878 334 +suggest 1 53 2.944439 2.944439 331 +cover 1 55 2.944439 2.944439 329 +week 1 52 2.995732 2.995732 343 +tabl 1 51 2.995732 2.995732 346 +archiv 2 49 3.044522 6.089044 364 +frequent 2 49 3.044522 6.089044 367 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +adapt 1 46 3.091042 3.091042 387 +textbook 8 44 3.135494 25.083952 397 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +edit 1 42 3.218876 3.218876 418 +author 8 39 3.258097 26.064776 450 +late 4 40 3.258097 13.032388 439 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +connect 1 37 3.332205 3.332205 485 +workstat 1 37 3.332205 3.332205 479 +tree 2 36 3.367296 6.734592 492 +bibliographi 2 34 3.401197 6.802394 518 +print 2 34 3.401197 6.802394 503 +next 1 34 3.401197 3.401197 517 +manual 1 35 3.401197 3.401197 504 +articl 5 33 3.433987 17.169935 530 +chapter 8 32 3.465736 27.725888 536 +concept 3 32 3.465736 10.397208 537 +idea 1 32 3.465736 3.465736 545 +neural 8 30 3.555348 28.442784 578 +common 2 30 3.555348 7.110696 574 +ask 2 28 3.610918 7.221836 597 +measur 1 28 3.610918 3.610918 609 +progress 1 28 3.610918 3.610918 598 +compar 2 26 3.688879 7.377758 648 +experiment 2 26 3.688879 7.377758 645 +comp 1 26 3.688879 3.688879 650 +lab 1 24 3.761200 3.761200 698 +decis 2 23 3.806662 7.613324 728 +lead 1 23 3.806662 3.806662 718 +methodolog 1 23 3.806662 3.806662 733 +instead 1 22 3.850148 3.850148 756 +dai 1 22 3.850148 3.850148 753 +sure 1 20 3.951244 3.951244 813 +department 1 20 3.951244 3.951244 839 +feedback 8 19 4.007333 32.058664 854 +definit 1 19 4.007333 4.007333 864 +five 1 19 4.007333 4.007333 841 +mostli 1 19 4.007333 4.007333 869 +benchmark 1 19 4.007333 4.007333 859 +lisp 6 18 4.060443 24.362658 897 +accept 1 18 4.060443 4.060443 879 +behavior 1 18 4.060443 4.060443 881 +stat 1 17 4.110874 4.110874 924 +analyz 1 17 4.110874 4.110874 925 +previous 1 17 4.110874 4.110874 923 +sheet 1 16 4.174387 4.174387 973 +explan 1 16 4.174387 4.174387 985 +biologi 1 15 4.248495 4.248495 1049 +train 2 14 4.317488 8.634976 1066 +emac 1 13 4.382027 4.382027 1143 +readabl 2 12 4.465908 8.931816 1258 +refin 1 11 4.553877 4.553877 1363 +summar 1 11 4.553877 4.553877 1295 +council 1 11 4.553877 4.553877 1364 +genet 1 10 4.653960 4.653960 1409 +sentenc 1 10 4.653960 4.653960 1413 +penalti 1 10 4.653960 4.653960 1405 +wendt 1 10 4.653960 4.653960 1446 +moonei 2 9 4.753590 9.507180 1520 +debugg 1 9 4.753590 4.753590 1493 +empir 2 8 4.875197 9.750394 1722 +irvin 1 8 4.875197 4.875197 1660 +printer 1 8 4.875197 4.875197 1621 +noon 2 7 5.010635 10.021270 1804 +tip 2 7 5.010635 10.021270 1863 +analyt 1 7 5.010635 5.010635 1913 +migrat 1 7 5.010635 5.010635 1851 +dataset 1 7 5.010635 5.010635 1914 +discoveri 1 7 5.010635 5.010635 1915 +jude 2 6 5.164786 10.329572 2123 +geoff 1 6 5.164786 5.164786 2124 +highwai 1 6 5.164786 5.164786 2095 +heurist 1 6 5.164786 5.164786 2125 +extern 1 6 5.164786 5.164786 2105 +shavlik 6 5 5.347108 32.082648 2429 +reinforc 3 4 5.568345 16.705035 2674 +basement 1 4 5.568345 5.568345 2663 +exhaust 1 4 5.568345 5.568345 2825 +novic 1 4 5.568345 5.568345 2815 +steel 1 4 5.568345 5.568345 2818 +weinberg 1 3 5.857933 5.857933 3443 +geoffrei 1 3 5.857933 5.857933 3505 +soar 1 3 5.857933 5.857933 3506 +backpropag 1 3 5.857933 5.857933 3507 +weekend 1 3 5.857933 5.857933 3357 +canadian 1 3 5.857933 5.857933 3508 +mitchel 7 2 6.263398 43.843786 4792 +towel 4 2 6.263398 25.053592 4793 +fisher 2 2 6.263398 12.526796 4794 +induc 2 2 6.263398 12.526796 4795 +akcl 2 2 6.263398 12.526796 4796 +quinlan 1 2 6.263398 6.263398 4797 +unsupervis 1 2 6.263398 6.263398 4233 +cogsci 1 2 6.263398 6.263398 4798 +tractabl 1 2 6.263398 6.263398 4799 +rumelhart 2 1 6.957497 13.914994 9165 +backprop 2 1 6.957497 13.914994 9166 +cobweb 2 1 6.957497 13.914994 9167 +austrian 2 1 6.957497 13.914994 9168 +chunk 1 1 6.957497 6.957497 9169 +laird 1 1 6.957497 6.957497 9170 +rosenbloom 1 1 6.957497 6.957497 9171 +newel 1 1 6.957497 6.957497 9172 +dietterich 1 1 6.957497 6.957497 9173 +zipser 1 1 6.957497 6.957497 9174 +lenat 1 1 6.957497 6.957497 9175 +kibler 1 1 6.957497 6.957497 9176 +kbann 1 1 6.957497 6.957497 9177 +brr 1 1 6.957497 6.957497 9178 +ineedagoodicon 1 1 6.957497 6.957497 9179 +nip 1 1 6.957497 6.957497 9180 +premier 1 1 6.957497 6.957497 9181 +shavlikshavlik 1 1 6.957497 6.957497 9182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html new file mode 100644 index 00000000..9d402d25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~solomon^cs736.html @@ -0,0 +1,248 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +project 16 340 1.098612 17.577792 18 +cours 5 273 1.098612 5.493060 15 +time 2 293 1.098612 2.197224 17 +us 2 329 1.098612 2.197224 16 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +wisc 2 242 1.386294 2.772588 33 +design 2 213 1.386294 2.772588 25 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +paper 8 205 1.609438 12.875504 38 +class 5 199 1.609438 8.047190 37 +updat 2 191 1.609438 3.218876 41 +list 2 201 1.609438 3.218876 39 +group 2 183 1.609438 3.218876 36 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +read 5 154 1.791759 8.958795 47 +avail 2 169 1.791759 3.583518 48 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +lectur 4 135 1.945910 7.783640 73 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +schedul 4 119 2.079442 8.317768 85 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +topic 5 114 2.197225 10.986125 110 +final 3 116 2.197225 6.591675 108 +make 3 111 2.197225 6.591675 120 +manag 2 114 2.197225 4.394450 125 +place 1 106 2.197225 2.197225 124 +version 1 113 2.197225 2.197225 122 +structur 1 106 2.197225 2.197225 105 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +memori 2 101 2.302585 4.605170 139 +text 2 98 2.302585 4.605170 133 +advanc 1 99 2.302585 2.302585 130 +present 5 91 2.397895 11.989475 145 +grade 3 90 2.397895 7.193685 142 +octob 1 89 2.397895 2.397895 156 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +exam 5 86 2.484907 12.424535 169 +activ 2 84 2.484907 4.969814 182 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +requir 1 81 2.484907 2.484907 167 +mondai 2 77 2.564949 5.129898 206 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +summari 2 73 2.639057 5.278114 237 +write 2 72 2.639057 5.278114 222 +tuesdai 1 73 2.639057 2.639057 219 +involv 1 71 2.639057 2.639057 227 +meet 1 72 2.639057 2.639057 229 +thursdai 1 70 2.708050 2.708050 241 +simul 1 66 2.708050 2.708050 255 +new 2 64 2.772589 5.545178 262 +copi 2 63 2.772589 5.545178 284 +wednesdai 1 64 2.772589 2.772589 261 +previou 1 62 2.772589 2.772589 290 +result 1 65 2.772589 2.772589 281 +room 3 59 2.833213 8.499639 301 +content 2 59 2.833213 5.666426 302 +semest 3 58 2.890372 8.671116 312 +detail 3 57 2.890372 8.671116 321 +space 1 57 2.890372 2.890372 310 +major 1 56 2.890372 2.890372 315 +point 1 58 2.890372 2.890372 319 +suggest 3 53 2.944439 8.833317 331 +sampl 1 53 2.944439 2.944439 339 +cover 1 55 2.944439 2.944439 329 +week 1 52 2.995732 2.995732 343 +much 1 52 2.995732 2.995732 349 +case 1 51 2.995732 2.995732 351 +give 1 50 3.044522 3.044522 359 +standard 1 48 3.044522 3.044522 365 +done 1 47 3.091042 3.091042 381 +midterm 3 45 3.135494 9.406482 392 +discuss 2 45 3.135494 6.270988 399 +fridai 1 44 3.135494 3.135494 390 +answer 1 45 3.135494 3.135494 391 +textbook 1 44 3.135494 3.135494 397 +term 2 43 3.178054 6.356108 411 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +review 1 42 3.218876 3.218876 425 +must 2 40 3.258097 6.516194 442 +realli 1 40 3.258097 3.258097 444 +expect 1 37 3.332205 3.332205 484 +statist 2 35 3.401197 6.802394 521 +survei 1 35 3.401197 3.401197 513 +approxim 1 35 3.401197 3.401197 509 +jame 1 35 3.401197 3.401197 507 +michael 1 35 3.401197 3.401197 514 +articl 1 33 3.433987 3.433987 530 +richard 1 31 3.496508 3.496508 559 +secur 1 30 3.555348 3.555348 577 +compon 1 30 3.555348 3.555348 570 +particip 2 29 3.583519 7.167038 589 +synchron 1 29 3.583519 3.583519 588 +intend 1 28 3.610918 3.610918 599 +measur 1 28 3.610918 3.610918 609 +experiment 2 26 3.688879 7.377758 645 +relev 1 26 3.688879 3.688879 637 +daili 1 24 3.761200 3.761200 706 +tent 1 22 3.850148 3.850148 739 +wang 3 21 3.912023 11.736069 790 +chen 3 21 3.912023 11.736069 791 +watch 1 21 3.912023 3.912023 789 +latest 1 21 3.912023 3.912023 785 +similar 1 21 3.912023 3.912023 771 +basi 1 20 3.951244 3.951244 828 +qualiti 1 20 3.951244 3.951244 832 +assum 1 19 4.007333 4.007333 845 +andrew 1 19 4.007333 4.007333 849 +eric 1 19 4.007333 4.007333 870 +encourag 2 18 4.060443 8.120886 880 +figur 1 18 4.060443 4.060443 903 +listen 1 18 4.060443 4.060443 907 +protect 1 17 4.110874 4.110874 935 +normal 1 16 4.174387 4.174387 995 +zhang 1 16 4.174387 4.174387 980 +purchas 1 15 4.248495 4.248495 1030 +todd 1 15 4.248495 4.248495 1051 +classic 1 14 4.317488 4.317488 1084 +doit 1 14 4.317488 4.317488 1111 +individu 1 13 4.382027 4.382027 1126 +readabl 1 12 4.465908 4.465908 1258 +verifi 1 12 4.465908 4.465908 1261 +literatur 2 11 4.553877 9.107754 1300 +broad 1 11 4.553877 4.553877 1302 +worth 1 11 4.553877 4.553877 1294 +summar 1 11 4.553877 4.553877 1295 +strongli 2 10 4.653960 9.307920 1406 +operatingsystem 1 10 4.653960 4.653960 1401 +total 1 10 4.653960 4.653960 1398 +exact 1 9 4.753590 4.753590 1509 +informationabout 1 9 4.753590 4.753590 1515 +equival 1 9 4.753590 4.753590 1496 +herefor 1 9 4.753590 4.753590 1483 +solomon 2 8 4.875197 9.750394 1716 +theme 1 8 4.875197 4.875197 1707 +noon 1 7 5.010635 5.010635 1804 +suffici 1 7 5.010635 5.010635 1897 +larger 1 7 5.010635 5.010635 1875 +smaller 1 7 5.010635 5.010635 1874 +craig 1 7 5.010635 5.010635 1879 +sciencesoffic 1 6 5.164786 5.164786 2101 +onoper 1 6 5.164786 5.164786 2048 +carefulli 1 6 5.164786 5.164786 2045 +approv 1 6 5.164786 5.164786 2078 +prasad 1 6 5.164786 5.164786 2126 +formerli 1 5 5.347108 5.347108 2397 +deshpand 1 5 5.347108 5.347108 2431 +systemsfal 1 4 5.568345 5.568345 2683 +marvin 1 4 5.568345 5.568345 2806 +exposur 1 4 5.568345 5.568345 2598 +ident 1 4 5.568345 5.568345 2826 +will 1 4 5.568345 5.568345 2782 +raman 1 4 5.568345 5.568345 2827 +advancedoper 1 3 5.857933 5.857933 3403 +macc 1 3 5.857933 5.857933 3414 +focal 1 3 5.857933 5.857933 3404 +gradingther 1 3 5.857933 5.857933 3455 +franci 1 3 5.857933 5.857933 3287 +pang 1 3 5.857933 5.857933 3509 +avinash 1 3 5.857933 5.857933 3510 +rajesh 1 3 5.857933 5.857933 3511 +troffic 1 2 6.263398 6.263398 4706 +pmin 1 2 6.263398 6.263398 4492 +avaiabl 1 2 6.263398 6.263398 4703 +multic 1 2 6.263398 6.263398 4304 +interprocess 1 2 6.263398 6.263398 4174 +satisfactori 1 2 6.263398 6.263398 4567 +usea 1 2 6.263398 6.263398 4800 +andconfer 1 2 6.263398 6.263398 4568 +deskfor 1 2 6.263398 6.263398 4584 +youto 1 2 6.263398 6.263398 4093 +willinstead 1 2 6.263398 6.263398 4569 +adiscuss 1 2 6.263398 6.263398 4570 +geta 1 2 6.263398 6.263398 4571 +quietli 1 2 6.263398 6.263398 4572 +thoroughli 1 2 6.263398 6.263398 4801 +salmon 1 2 6.263398 6.263398 4802 +chien 1 2 6.263398 6.263398 4541 +sodani 1 2 6.263398 6.263398 4803 +basnei 1 2 6.263398 6.263398 4804 +biswadeep 1 2 6.263398 6.263398 4805 +taxiao 1 2 6.263398 6.263398 4806 +sridhar 1 2 6.263398 6.263398 4807 +eduthu 1 2 6.263398 6.263398 4721 +inroom 1 1 6.957497 6.957497 9183 +examtogeth 1 1 6.957497 6.957497 9184 +bedetermin 1 1 6.957497 6.957497 9185 +inconsider 1 1 6.957497 6.957497 9186 +sciencestextther 1 1 6.957497 6.957497 9187 +papersa 1 1 6.957497 6.957497 9188 +thoseof 1 1 6.957497 6.957497 9189 +lessout 1 1 6.957497 6.957497 9190 +projecty 1 1 6.957497 6.957497 9191 +implementationsof 1 1 6.957497 6.957497 9192 +unvalid 1 1 6.957497 6.957497 9193 +ashort 1 1 6.957497 6.957497 9194 +presentationabout 1 1 6.957497 6.957497 9195 +presentationsher 1 1 6.957497 6.957497 9196 +presen 1 1 6.957497 6.957497 9197 +manyan 1 1 6.957497 6.957497 9198 +stubb 1 1 6.957497 6.957497 9199 +bigg 1 1 6.957497 6.957497 9200 +gunawan 1 1 6.957497 6.957497 9201 +agu 1 1 6.957497 6.957497 9202 +qingmin 1 1 6.957497 6.957497 9203 +larsen 1 1 6.957497 6.957497 9204 +conroi 1 1 6.957497 6.957497 9205 +fritz 1 1 6.957497 6.957497 9206 +jordan 1 1 6.957497 6.957497 9207 +yanm 1 1 6.957497 6.957497 9208 +xinyu 1 1 6.957497 6.957497 9209 +munson 1 1 6.957497 6.957497 9210 +wenjun 1 1 6.957497 6.957497 9211 +xinyi 1 1 6.957497 6.957497 9212 +yufei 1 1 6.957497 6.957497 9213 +zeyu 1 1 6.957497 6.957497 9214 +gopal 1 1 6.957497 6.957497 9215 +leesolomon 1 1 6.957497 6.957497 9216 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html new file mode 100644 index 00000000..55b8662f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~suhui^cs132.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +time 1 293 1.098612 1.098612 17 +homepag 2 93 2.397895 4.795790 148 +chiang 1 7 5.010635 5.010635 1853 +gradesgo 1 1 6.957497 6.957497 9217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html new file mode 100644 index 00000000..7dbd044f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tick^cs110.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 3 329 1.098612 3.295836 16 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +modifi 2 178 1.609438 3.218876 35 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +machin 2 129 2.079442 4.158884 95 +compil 1 122 2.079442 2.079442 96 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +solut 2 82 2.484907 4.969814 162 +contain 1 81 2.484907 2.484907 174 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +want 1 79 2.564949 2.564949 199 +mondai 1 77 2.564949 2.564949 206 +write 2 72 2.639057 5.278114 222 +solv 1 73 2.639057 2.639057 234 +window 2 68 2.708050 5.416100 242 +copi 2 63 2.772589 5.545178 284 +room 3 59 2.833213 8.499639 301 +locat 1 59 2.833213 2.833213 303 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +cover 1 55 2.944439 2.944439 329 +particular 2 51 2.995732 5.991464 352 +run 1 51 2.995732 2.995732 347 +week 1 52 2.995732 2.995732 343 +pointer 1 48 3.044522 3.044522 368 +even 1 45 3.135494 3.135494 393 +textbook 1 44 3.135494 3.135494 397 +netscap 1 44 3.135494 3.135494 395 +long 1 43 3.178054 3.178054 413 +howev 2 41 3.218876 6.437752 422 +must 1 40 3.258097 3.258097 442 +probabl 1 40 3.258097 3.258097 455 +microsoft 5 38 3.295837 16.479185 468 +open 1 38 3.295837 3.295837 469 +hand 1 37 3.332205 3.332205 475 +copyright 1 36 3.367296 3.367296 495 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +depend 1 29 3.583519 3.583519 583 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +toward 1 25 3.737670 3.737670 668 +jeff 1 25 3.737670 3.737670 673 +lab 1 24 3.761200 3.761200 698 +dai 1 22 3.850148 3.850148 753 +avoid 1 21 3.912023 3.912023 799 +increas 1 20 3.951244 3.951244 829 +exercis 2 19 4.007333 8.014666 842 +left 1 19 4.007333 4.007333 851 +lyco 1 19 4.007333 4.007333 871 +along 1 18 4.060443 4.060443 878 +across 1 16 4.174387 4.174387 974 +dilbert 1 16 4.174387 4.174387 996 +fortran 7 15 4.248495 29.739465 1027 +configur 1 15 4.248495 4.248495 1012 +purchas 1 15 4.248495 4.248495 1030 +comic 1 14 4.317488 4.317488 1103 +primarili 1 13 4.382027 4.382027 1185 +vectra 3 12 4.465908 13.397724 1267 +overal 1 12 4.465908 4.465908 1254 +holidai 1 12 4.465908 4.465908 1224 +insid 1 12 4.465908 4.465908 1262 +keyword 1 11 4.553877 4.553877 1356 +night 1 11 4.553877 4.553877 1319 +packard 1 10 4.653960 4.653960 1444 +certain 1 10 4.653960 4.653960 1393 +seven 1 9 4.753590 4.753590 1561 +prefer 1 9 4.753590 4.753590 1491 +correctli 1 9 4.753590 4.753590 1478 +wall 1 9 4.753590 4.753590 1553 +hewlett 1 8 4.875197 4.875197 1709 +printer 1 8 4.875197 4.875197 1621 +bestor 2 6 5.164786 10.329572 2099 +lampert 1 5 5.347108 5.347108 2398 +gareth 1 5 5.347108 5.347108 2392 +closest 1 4 5.568345 5.568345 2828 +relief 1 4 5.568345 5.568345 2784 +labyou 1 3 5.857933 5.857933 3406 +aren 1 3 5.857933 5.857933 3512 +dorm 1 3 5.857933 5.857933 3407 +lahei 1 3 5.857933 5.857933 3408 +quota 3 2 6.263398 18.790194 4753 +exce 1 1 6.957497 6.957497 9218 +bewar 1 1 6.957497 6.957497 9219 +outsidehallwai 1 1 6.957497 6.957497 9220 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html new file mode 100644 index 00000000..69476f38 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~tmunson^302^home.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +wisc 2 242 1.386294 2.772588 33 +gener 1 220 1.386294 1.386294 27 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +provid 1 121 2.079442 2.079442 94 +document 1 121 2.079442 2.079442 89 +check 1 115 2.197225 2.197225 118 +homepag 2 93 2.397895 4.795790 148 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +chang 1 82 2.484907 2.484907 163 +solv 1 73 2.639057 2.639057 234 +frequent 1 49 3.044522 3.044522 367 +textbook 1 44 3.135494 3.135494 397 +purpos 1 37 3.332205 3.332205 481 +eduoffic 1 33 3.433987 3.433987 531 +often 1 31 3.496508 3.496508 551 +walter 1 17 4.110874 4.110874 950 +todd 1 15 4.248495 4.248495 1051 +assignmentsprogram 1 6 5.164786 5.164786 2019 +assignmentshomework 1 4 5.568345 5.568345 2721 +pertain 1 3 5.857933 5.857933 3208 +homepagewelcom 1 2 6.263398 6.263398 4808 +tmunson 1 2 6.263398 6.263398 4809 +statisticsoffic 1 2 6.263398 6.263398 4810 +homepagec 1 1 6.957497 6.957497 9221 +responsibilityto 1 1 6.957497 6.957497 9222 +informationinstructor 1 1 6.957497 6.957497 9223 +munsonemail 1 1 6.957497 6.957497 9224 +appointmentsect 1 1 6.957497 6.957497 9225 +savitchclass 1 1 6.957497 6.957497 9226 +informationexpectationssyllabusexam 1 1 6.957497 6.957497 9227 +schedule 1 1 6.957497 6.957497 9228 +mailgradingl 1 1 6.957497 6.957497 9229 +assignmentsextra 1 1 6.957497 6.957497 9230 +creditpoliciesconsult 1 1 6.957497 6.957497 9231 +responsibilitiesacadem 1 1 6.957497 6.957497 9232 +misconductoth 1 1 6.957497 6.957497 9233 +informationdaili 1 1 6.957497 6.957497 9234 +classoth 1 1 6.957497 6.957497 9235 +resourcesc 1 1 6.957497 6.957497 9236 +homepagetmunson 1 1 6.957497 6.957497 9237 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html new file mode 100644 index 00000000..421b448a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~turnidge^cs302.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 5 412 0.693147 3.465735 8 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +cours 3 273 1.098612 3.295836 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +class 3 199 1.609438 4.828314 37 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +hour 2 165 1.791759 3.583518 46 +avail 2 169 1.791759 3.583518 48 +read 2 154 1.791759 3.583518 47 +assign 2 135 1.945910 3.891820 66 +note 1 142 1.945910 1.945910 67 +provid 1 121 2.079442 2.079442 94 +place 1 106 2.197225 2.197225 124 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +instructor 1 108 2.197225 2.197225 107 +text 2 98 2.302585 4.605170 133 +section 4 94 2.397895 9.591580 149 +grade 1 90 2.397895 2.397895 142 +start 2 83 2.484907 4.969814 173 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +syllabu 2 67 2.708050 5.416100 247 +window 1 68 2.708050 2.708050 242 +handout 4 64 2.772589 11.090356 263 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +date 1 51 2.995732 2.995732 344 +get 2 46 3.091042 6.182084 380 +netscap 1 44 3.135494 3.135494 395 +consult 1 24 3.761200 3.761200 687 +tent 1 22 3.850148 3.850148 739 +facil 1 20 3.951244 3.951244 814 +whole 1 17 4.110874 4.110874 940 +todd 2 15 4.248495 8.496990 1051 +difficulti 1 13 4.382027 4.382027 1132 +tutor 1 9 4.753590 4.753590 1552 +turnidg 2 4 5.568345 11.136690 2829 +struct 1 4 5.568345 5.568345 2821 +nolandinstructor 1 2 6.263398 6.263398 4785 +muchinform 1 2 6.263398 6.263398 4811 +turnidgeoffic 1 1 6.957497 6.957497 9238 +tbalab 1 1 6.957497 6.957497 9239 +tbaannouncementsclass 1 1 6.957497 6.957497 9240 +classa 1 1 6.957497 6.957497 9241 +byother 1 1 6.957497 6.957497 9242 +gregorysharp 1 1 6.957497 6.957497 9243 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html new file mode 100644 index 00000000..ed03b9eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.cs.wisc.edu^~weaver^cs302.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 4 374 0.693147 2.772588 7 +depart 1 457 0.693147 0.693147 12 +offic 6 299 1.098612 6.591672 13 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 3 220 1.386294 4.158882 29 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +read 1 154 1.791759 1.791759 47 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +assign 2 135 1.945910 3.891820 66 +first 1 140 1.945910 1.945910 71 +problem 1 147 1.945910 1.945910 75 +object 1 138 1.945910 1.945910 79 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +instructor 1 108 2.197225 2.197225 107 +code 1 108 2.197225 2.197225 116 +text 2 98 2.302585 4.605170 133 +section 4 94 2.397895 9.591580 149 +grade 2 90 2.397895 4.795790 142 +exam 2 86 2.484907 4.969814 169 +info 1 85 2.484907 2.484907 176 +chang 1 82 2.484907 2.484907 163 +sourc 1 77 2.564949 2.564949 201 +homework 1 79 2.564949 2.564949 193 +exampl 1 77 2.564949 2.564949 195 +solv 1 73 2.639057 2.639057 234 +thursdai 1 70 2.708050 2.708050 241 +syllabu 1 67 2.708050 2.708050 247 +polici 3 64 2.772589 8.317767 279 +handout 2 64 2.772589 5.545178 263 +locat 1 59 2.833213 2.833213 303 +week 2 52 2.995732 5.991464 343 +appoint 1 49 3.044522 3.044522 358 +still 1 50 3.044522 3.044522 362 +algebra 1 45 3.135494 3.135494 394 +announc 1 40 3.258097 3.258097 441 +late 1 40 3.258097 3.258097 439 +statist 3 35 3.401197 10.203591 521 +everi 1 34 3.401197 3.401197 519 +print 1 34 3.401197 3.401197 503 +consult 1 24 3.761200 3.761200 687 +dai 1 22 3.850148 3.850148 753 +walter 1 17 4.110874 4.110874 950 +zhang 1 16 4.174387 4.174387 980 +weslei 1 16 4.174387 4.174387 983 +misconduct 1 16 4.174387 4.174387 1003 +quiz 1 16 4.174387 4.174387 990 +style 1 15 4.248495 4.248495 1036 +vectra 1 12 4.465908 4.465908 1267 +savitch 1 12 4.465908 4.465908 1269 +addison 1 12 4.465908 4.465908 1230 +chri 3 11 4.553877 13.661631 1311 +statement 1 11 4.553877 4.553877 1313 +errata 1 10 4.653960 4.653960 1403 +login 1 9 4.753590 4.753590 1550 +seven 1 9 4.753590 4.753590 1561 +reload 1 8 4.875197 4.875197 1682 +isbn 1 7 5.010635 5.010635 1901 +guidelin 1 7 5.010635 5.010635 1832 +rough 1 6 5.164786 5.164786 2107 +noland 2 5 5.347108 10.694216 2420 +psych 1 3 5.857933 5.857933 3498 +grader 1 3 5.857933 5.857933 3165 +weaver 4 2 6.263398 25.053592 4770 +kei 1 2 6.263398 6.263398 4812 +staf 1 1 6.957497 6.957497 9244 +policyassign 1 1 6.957497 6.957497 9245 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html new file mode 100644 index 00000000..a57b4091 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.ece.wisc.edu^~jes^ece752.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +offic 6 299 1.098612 6.591672 13 +project 5 340 1.098612 5.493060 18 +cours 2 273 1.098612 2.197224 15 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +paper 4 205 1.609438 6.437752 38 +list 1 201 1.609438 1.609438 39 +read 5 154 1.791759 8.958795 47 +network 4 168 1.791759 7.167036 61 +hour 3 165 1.791759 5.375277 46 +phone 2 175 1.791759 3.583518 45 +address 1 170 1.791759 1.791759 62 +assign 6 135 1.945910 11.675460 66 +lectur 4 135 1.945910 7.783640 73 +note 3 142 1.945910 5.837730 67 +architectur 2 139 1.945910 3.891820 77 +hall 2 146 1.945910 3.891820 65 +perform 1 143 1.945910 1.945910 74 +spring 2 131 2.079442 4.158884 88 +technolog 2 131 2.079442 4.158884 102 +introduct 1 126 2.079442 2.079442 87 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +final 2 116 2.197225 4.394450 108 +instructor 1 108 2.197225 2.197225 107 +part 6 98 2.302585 13.815510 129 +advanc 5 99 2.302585 11.512925 130 +memori 3 101 2.302585 6.907755 139 +solut 5 82 2.484907 12.424535 162 +exam 3 86 2.484907 7.454721 169 +homework 14 79 2.564949 35.909286 193 +summari 1 73 2.639057 2.639057 237 +main 2 67 2.708050 5.416100 256 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +content 5 59 2.833213 14.166065 302 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +instruct 1 53 2.944439 2.944439 332 +tabl 5 51 2.995732 14.978660 346 +set 1 50 3.044522 3.044522 361 +midterm 2 45 3.135494 6.270988 392 +offer 1 43 3.178054 3.178054 414 +cach 4 41 3.218876 12.875504 432 +review 1 42 3.218876 3.218876 425 +cost 1 37 3.332205 3.332205 480 +jame 1 35 3.401197 3.401197 507 +limit 1 29 3.583519 3.583519 585 +consid 1 29 3.583519 3.583519 590 +full 4 28 3.610918 14.443672 615 +multiprocessor 4 28 3.610918 14.443672 605 +arrai 2 27 3.637586 7.275172 627 +trace 1 25 3.737670 3.737670 677 +miscellan 1 23 3.806662 3.806662 731 +disk 2 22 3.850148 7.700296 747 +rout 2 21 3.912023 7.824046 793 +smith 1 20 3.951244 3.951244 820 +reserv 1 20 3.951244 3.951244 808 +thur 1 19 4.007333 4.007333 847 +interconnect 2 17 4.110874 8.221748 937 +vector 1 16 4.174387 4.174387 961 +doit 4 14 4.317488 17.269952 1111 +station 1 13 4.382027 4.382027 1157 +tue 1 11 4.553877 4.553877 1308 +clock 1 11 4.553877 4.553877 1320 +pipelin 4 7 5.010635 20.042540 1830 +biochemistri 1 3 5.857933 5.857933 3513 +vliw 1 3 5.857933 5.857933 3514 +harm 1 3 5.857933 5.857933 3515 +princ 1 2 6.263398 6.263398 4813 +specmark 1 2 6.263398 6.263398 4471 +princeoffic 1 1 6.957497 6.957497 9246 +miscellaneousnew 1 1 6.957497 6.957497 9247 +soln 1 1 6.957497 6.957497 9248 +pmread 1 1 6.957497 6.957497 9249 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ new file mode 100644 index 00000000..0a6e014f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs417^ @@ -0,0 +1,245 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +univers 5 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +system 2 443 0.693147 1.386294 6 +cours 9 273 1.098612 9.887508 15 +project 6 340 1.098612 6.591672 18 +student 2 343 1.098612 2.197224 19 +last 2 314 1.098612 2.197224 14 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +cornel 8 215 1.386294 11.090352 23 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +group 3 183 1.609438 4.828314 36 +list 2 201 1.609438 3.218876 39 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +data 3 170 1.791759 5.375277 49 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +object 6 138 1.945910 11.675460 79 +model 3 145 1.945910 5.837730 69 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +first 1 140 1.945910 1.945910 71 +lectur 1 135 1.945910 1.945910 73 +schedul 4 119 2.079442 8.317768 85 +spring 2 131 2.079442 4.158884 88 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +welcom 1 122 2.079442 2.079442 99 +theori 3 111 2.197225 6.591675 127 +topic 3 114 2.197225 6.591675 110 +final 3 116 2.197225 6.591675 108 +mathemat 2 108 2.197225 4.394450 123 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +part 1 98 2.302585 2.302585 129 +graphic 8 90 2.397895 19.183160 147 +center 3 88 2.397895 7.193685 158 +comment 2 93 2.397895 4.795790 146 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +imag 1 91 2.397895 2.397895 161 +exam 2 86 2.484907 4.969814 169 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +homework 12 79 2.564949 30.779388 193 +dynam 2 76 2.564949 5.129898 194 +method 2 80 2.564949 5.129898 213 +april 2 77 2.564949 5.129898 196 +state 1 76 2.564949 2.564949 207 +upson 4 71 2.639057 10.556228 218 +summari 2 73 2.639057 5.278114 237 +line 2 75 2.639057 5.278114 231 +tuesdai 1 73 2.639057 2.639057 219 +onlin 1 75 2.639057 2.639057 223 +view 2 70 2.708050 5.416100 254 +test 2 66 2.708050 5.416100 252 +main 1 67 2.708050 2.708050 256 +practic 1 70 2.708050 2.708050 246 +simul 1 66 2.708050 2.708050 255 +order 1 69 2.708050 2.708050 249 +complex 1 64 2.772589 2.772589 269 +guid 1 63 2.772589 2.772589 267 +march 3 61 2.833213 8.499639 295 +content 2 59 2.833213 5.666426 302 +simpl 1 60 2.833213 2.833213 298 +colleg 1 61 2.833213 2.833213 300 +space 2 57 2.890372 5.780744 310 +faculti 1 56 2.890372 2.890372 325 +scientif 4 53 2.944439 11.777756 341 +cover 1 55 2.944439 2.944439 329 +visual 4 48 3.044522 12.178088 372 +principl 1 48 3.044522 3.044522 357 +standard 1 48 3.044522 3.044522 365 +california 1 46 3.091042 3.091042 388 +math 5 44 3.135494 15.677470 402 +textbook 2 44 3.135494 6.270988 397 +vision 2 41 3.218876 6.437752 430 +combin 1 42 3.218876 3.218876 421 +examin 1 42 3.218876 3.218876 424 +error 2 40 3.258097 6.516194 449 +map 2 39 3.258097 6.516194 452 +form 1 39 3.258097 3.258097 443 +prototyp 1 38 3.295837 3.295837 463 +field 5 37 3.332205 16.661025 482 +connect 1 37 3.332205 3.332205 485 +mean 1 37 3.332205 3.332205 477 +staff 2 36 3.367296 6.734592 490 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +product 1 33 3.433987 3.433987 527 +transform 3 32 3.465736 10.397208 542 +human 2 32 3.465736 6.931472 546 +anim 4 31 3.496508 13.986032 557 +focus 1 29 3.583519 3.583519 584 +limit 1 29 3.583519 3.583519 585 +relev 1 26 3.688879 3.688879 637 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +other 1 24 3.761200 3.761200 697 +begin 1 23 3.806662 3.806662 716 +equat 1 23 3.806662 3.806662 724 +color 4 22 3.850148 15.400592 762 +deal 1 22 3.850148 3.850148 736 +properti 1 22 3.850148 3.850148 749 +serv 1 22 3.850148 3.850148 758 +period 1 22 3.850148 3.850148 743 +sent 1 22 3.850148 3.850148 763 +among 1 21 3.912023 3.912023 781 +viewer 1 21 3.912023 3.912023 787 +break 1 20 3.951244 3.951244 812 +prepar 1 20 3.951244 3.951244 824 +geometr 2 19 4.007333 8.014666 852 +miss 1 19 4.007333 4.007333 866 +dimension 2 18 4.060443 8.120886 909 +figur 1 18 4.060443 4.060443 903 +render 2 17 4.110874 8.221748 947 +differenti 1 17 4.110874 4.110874 921 +modif 1 17 4.110874 4.110874 913 +vector 2 16 4.174387 8.348774 961 +normal 1 16 4.174387 4.174387 995 +devic 1 16 4.174387 4.174387 1002 +atth 2 15 4.248495 8.496990 1019 +hierarch 2 15 4.248495 8.496990 1018 +style 1 15 4.248495 4.248495 1036 +camera 2 14 4.317488 8.634976 1115 +hopefulli 1 14 4.317488 4.317488 1071 +scene 1 14 4.317488 4.317488 1114 +alan 2 13 4.382027 8.764054 1146 +composit 1 13 4.382027 4.382027 1150 +coordin 1 13 4.382027 4.382027 1182 +automata 1 13 4.382027 4.382027 1135 +difficulti 1 13 4.382027 4.382027 1132 +opportun 1 13 4.382027 4.382027 1161 +prelim 3 12 4.465908 13.397724 1201 +skill 2 12 4.465908 8.931816 1205 +holidai 2 12 4.465908 8.931816 1224 +bruce 2 12 4.465908 8.931816 1226 +land 2 12 4.465908 8.931816 1273 +huang 2 12 4.465908 8.931816 1202 +evolv 1 12 4.465908 4.465908 1223 +optic 1 12 4.465908 4.465908 1221 +remov 1 12 4.465908 4.465908 1225 +buffer 1 12 4.465908 4.465908 1211 +scan 1 12 4.465908 4.465908 1243 +volum 3 11 4.553877 13.661631 1347 +transpar 1 11 4.553877 4.553877 1325 +statement 1 11 4.553877 4.553877 1313 +forc 1 10 4.653960 4.653960 1384 +perspect 1 10 4.653960 4.653960 1437 +facilit 1 10 4.653960 4.653960 1412 +surfac 13 9 4.753590 61.796670 1574 +light 2 9 4.753590 9.507180 1533 +incomplet 1 9 4.753590 4.753590 1575 +leader 1 9 4.753590 4.753590 1576 +explicit 1 9 4.753590 4.753590 1525 +screen 1 9 4.753590 4.753590 1577 +wall 1 9 4.753590 4.753590 1553 +observ 1 9 4.753590 4.753590 1578 +rhode 1 9 4.753590 4.753590 1579 +polygon 2 8 4.875197 9.750394 1723 +textur 2 8 4.875197 9.750394 1677 +convers 1 8 4.875197 4.875197 1673 +parametr 2 7 5.010635 10.021270 1819 +clip 2 7 5.010635 10.021270 1868 +shade 2 7 5.010635 10.021270 1881 +pagecomput 1 7 5.010635 5.010635 1900 +stereo 1 7 5.010635 5.010635 1818 +channel 1 7 5.010635 5.010635 1836 +justin 1 7 5.010635 5.010635 1789 +davi 1 7 5.010635 5.010635 1888 +artist 1 6 5.164786 5.164786 2127 +onto 1 6 5.164786 5.164786 2089 +hidden 1 6 5.164786 5.164786 1987 +notifi 1 6 5.164786 5.164786 2106 +rotat 1 5 5.347108 5.347108 2295 +rigid 1 5 5.347108 5.347108 2432 +cellular 1 5 5.347108 5.347108 2433 +anti 1 5 5.347108 5.347108 2434 +hyper 1 5 5.347108 5.347108 2435 +particl 1 5 5.347108 5.347108 2436 +oregon 1 5 5.347108 5.347108 2437 +implicit 1 4 5.568345 5.568345 2830 +invers 1 4 5.568345 5.568345 2764 +pixel 1 4 5.568345 5.568345 2831 +contour 1 4 5.568345 5.568345 2812 +administrivia 2 3 5.857933 11.715866 3166 +kinemat 2 3 5.857933 11.715866 3516 +computergraph 1 3 5.857933 5.857933 3517 +wave 1 3 5.857933 5.857933 3518 +shadow 1 3 5.857933 5.857933 3519 +bump 1 3 5.857933 5.857933 3497 +arrow 1 3 5.857933 5.857933 3520 +makeup 1 3 5.857933 5.857933 3449 +jing 1 3 5.857933 5.857933 3521 +mccune 1 3 5.857933 5.857933 3522 +waterloo 1 3 5.857933 5.857933 3523 +cardiff 1 3 5.857933 5.857933 3154 +watt 2 2 6.263398 12.526796 4814 +scalar 2 2 6.263398 12.526796 4815 +religi 2 2 6.263398 12.526796 4816 +folei 1 2 6.263398 6.263398 4817 +bruceland 1 2 6.263398 6.263398 4818 +designedto 1 2 6.263398 6.263398 4712 +illumin 1 2 6.263398 6.263398 4819 +blobbi 1 2 6.263398 6.263398 4820 +homogen 1 2 6.263398 6.263398 4821 +mimic 1 2 6.263398 6.263398 4736 +phong 1 2 6.263398 6.263398 4822 +alias 1 2 6.263398 6.263398 4823 +tomak 1 2 6.263398 6.263398 4675 +belief 1 2 6.263398 6.263398 4553 +inord 1 2 6.263398 6.263398 4824 +absent 1 2 6.263398 6.263398 4825 +deviat 1 2 6.263398 6.263398 4826 +wale 1 2 6.263398 6.263398 4827 +manchest 1 2 6.263398 6.263398 4828 +todoc 1 2 6.263398 6.263398 4829 +quadric 2 1 6.957497 13.914994 9250 +swept 1 1 6.957497 6.957497 9251 +tensor 1 1 6.957497 6.957497 9252 +tessel 1 1 6.957497 6.957497 9253 +gourand 1 1 6.957497 6.957497 9254 +vernier 1 1 6.957497 6.957497 9255 +acuiti 1 1 6.957497 6.957497 9256 +mispercept 1 1 6.957497 6.957497 9257 +advect 1 1 6.957497 6.957497 9258 +multiparamet 1 1 6.957497 6.957497 9259 +educationlaw 1 1 6.957497 6.957497 9260 +mandat 1 1 6.957497 6.957497 9261 +intendingto 1 1 6.957497 6.957497 9262 +requestedto 1 1 6.957497 6.957497 9263 +jmccune 1 1 6.957497 6.957497 9264 +csrelev 1 1 6.957497 6.957497 9265 +universityrel 1 1 6.957497 6.957497 9266 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ new file mode 100644 index 00000000..10266202 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs418^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +current 3 284 1.098612 3.295836 21 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 4 215 1.386294 5.545176 23 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +base 1 165 1.791759 1.791759 50 +model 2 145 1.945910 3.891820 69 +relat 2 139 1.945910 3.891820 68 +first 1 140 1.945910 1.945910 71 +object 1 138 1.945910 1.945910 79 +note 1 142 1.945910 1.945910 67 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +welcom 1 122 2.079442 2.079442 99 +theori 3 111 2.197225 6.591675 127 +topic 2 114 2.197225 4.394450 110 +site 1 106 2.197225 2.197225 119 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +graphic 3 90 2.397895 7.193685 147 +center 3 88 2.397895 7.193685 158 +comment 2 93 2.397895 4.795790 146 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +contain 1 81 2.484907 2.484907 174 +level 1 87 2.484907 2.484907 180 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +refer 1 78 2.564949 2.564949 203 +materi 2 75 2.639057 5.278114 221 +onlin 1 75 2.639057 2.639057 223 +main 1 67 2.708050 2.708050 256 +practic 1 70 2.708050 2.708050 246 +order 1 69 2.708050 2.708050 249 +laboratori 2 63 2.772589 5.545178 292 +result 1 65 2.772589 2.772589 281 +virtual 1 62 2.772589 2.772589 285 +scientif 2 53 2.944439 5.888878 341 +visual 4 48 3.044522 12.178088 372 +principl 1 48 3.044522 3.044522 357 +get 1 46 3.091042 3.091042 380 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +mark 2 44 3.135494 6.270988 403 +map 1 39 3.258097 3.258097 452 +procedur 2 36 3.367296 6.734592 488 +copyright 1 36 3.367296 3.367296 495 +taught 1 33 3.433987 3.433987 526 +transform 2 32 3.465736 6.931472 542 +anim 2 31 3.496508 6.993016 557 +deal 1 22 3.850148 3.850148 736 +sent 1 22 3.850148 3.850148 763 +facil 1 20 3.951244 3.951244 814 +exercis 4 19 4.007333 16.029332 842 +atth 1 15 4.248495 4.248495 1019 +camera 2 14 4.317488 8.634976 1115 +train 1 14 4.317488 4.317488 1066 +land 1 12 4.465908 4.465908 1273 +statement 1 11 4.553877 4.553877 1313 +perspect 1 10 4.653960 4.653960 1437 +surfac 2 9 4.753590 9.507180 1574 +leader 1 9 4.753590 4.753590 1576 +light 1 9 4.753590 4.753590 1533 +textur 2 8 4.875197 9.750394 1677 +competit 1 8 4.875197 4.875197 1635 +polygon 1 8 4.875197 4.875197 1723 +pagecomput 1 7 5.010635 5.010635 1900 +parametr 1 7 5.010635 5.010635 1819 +chat 1 6 5.164786 5.164786 2128 +restrict 1 6 5.164786 5.164786 2129 +implicit 1 4 5.568345 5.568345 2830 +enrol 1 4 5.568345 5.568345 2613 +computergraph 2 3 5.857933 11.715866 3517 +bump 1 3 5.857933 5.857933 3497 +folei 1 2 6.263398 6.263398 4817 +watt 1 2 6.263398 6.263398 4814 +bruceland 1 2 6.263398 6.263398 4818 +todoc 1 2 6.263398 6.263398 4829 +exercisesthi 1 1 6.957497 6.957497 9267 +universityundergradu 1 1 6.957497 6.957497 9268 +dcomput 1 1 6.957497 6.957497 9269 +sigucc 1 1 6.957497 6.957497 9270 +basededuc 1 1 6.957497 6.957497 9271 +areinclud 1 1 6.957497 6.957497 9272 +aboutc 1 1 6.957497 6.957497 9273 +semesteraccess 1 1 6.957497 6.957497 9274 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ new file mode 100644 index 00000000..b9acda33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/c/http_^^www.tc.cornell.edu^Visualization^Education^cs718^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +us 2 329 1.098612 2.197224 16 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +fall 2 181 1.609438 3.218876 40 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +model 2 145 1.945910 3.891820 69 +lectur 1 135 1.945910 1.945910 73 +document 1 121 2.079442 2.079442 89 +topic 4 114 2.197225 8.788900 110 +code 1 108 2.197225 2.197225 116 +imag 2 91 2.397895 4.795790 161 +graphic 1 90 2.397895 2.397895 147 +window 1 68 2.708050 2.708050 242 +content 1 59 2.833213 2.833213 302 +sampl 1 53 2.944439 2.944439 339 +visual 1 48 3.044522 3.044522 372 +video 1 44 3.135494 3.135494 405 +michael 1 35 3.401197 3.401197 514 +human 1 32 3.465736 3.465736 546 +express 1 32 3.465736 3.465736 540 +retriev 1 27 3.637586 3.637586 621 +appropri 1 18 4.060443 4.060443 883 +spatial 1 16 4.174387 4.174387 988 +huang 1 12 4.465908 4.465908 1202 +explicit 1 9 4.753590 4.753590 1525 +sean 1 8 4.875197 4.875197 1705 +justin 1 7 5.010635 5.010635 1789 +deliv 1 6 5.164786 5.164786 2070 +chosen 1 6 5.164786 5.164786 1984 +alex 1 6 5.164786 5.164786 2130 +facial 1 5 5.347108 5.347108 2438 +interior 1 5 5.347108 5.347108 2439 +particl 1 5 5.347108 5.347108 2436 +chose 1 4 5.568345 5.568345 2629 +arun 1 4 5.568345 5.568345 2736 +computergraph 1 3 5.857933 5.857933 3517 +hung 1 3 5.857933 5.857933 3524 +mccune 1 3 5.857933 5.857933 3522 +landscap 1 3 5.857933 5.857933 3525 +landi 1 2 6.263398 6.263398 4830 +tsai 1 2 6.263398 6.263398 4831 +stochast 1 2 6.263398 6.263398 4832 +semestereach 1 1 6.957497 6.957497 9275 +anddocu 1 1 6.957497 6.957497 9276 +metabal 1 1 6.957497 6.957497 9277 +arcuri 1 1 6.957497 6.957497 9278 +benton 1 1 6.957497 6.957497 9279 +interdepend 1 1 6.957497 6.957497 9280 +diffus 1 1 6.957497 6.957497 9281 +pollut 1 1 6.957497 6.957497 9282 +modelsfu 1 1 6.957497 6.957497 9283 +antialias 1 1 6.957497 6.957497 9284 +vermach 1 1 6.957497 6.957497 9285 +hsun 1 1 6.957497 6.957497 9286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^ph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^ph^index.html new file mode 100644 index 00000000..06695ce2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^ph^index.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +applic 1 170 1.791759 1.791759 56 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +resum 1 79 2.564949 2.564949 217 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +nation 1 74 2.639057 2.639057 240 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +goal 1 66 2.708050 2.708050 250 +laboratori 1 63 2.772589 2.772589 292 +thesi 2 57 2.890372 5.780744 327 +scientif 2 53 2.944439 5.888878 341 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +least 2 35 3.401197 6.802394 516 +committe 1 34 3.401197 3.401197 522 +tech 1 35 3.401197 3.401197 515 +steve 1 29 3.583519 3.583519 594 +hous 1 21 3.912023 3.912023 801 +siam 1 21 3.912023 3.912023 800 +matrix 1 17 4.110874 4.110874 933 +squar 2 14 4.317488 8.634976 1082 +weight 1 12 4.465908 4.465908 1204 +statement 1 11 4.553877 4.553877 1313 +decomposit 1 10 4.653960 4.653960 1439 +rhode 1 9 4.753590 4.753590 1579 +juan 1 9 4.753590 4.753590 1580 +postdoc 1 8 4.875197 4.875197 1724 +cornellunivers 1 7 5.010635 5.010635 1916 +whichi 1 6 5.164786 5.164786 2056 +stabl 1 5 5.347108 5.347108 2309 +interior 1 5 5.347108 5.347108 2439 +orthogon 1 4 5.568345 5.568345 2832 +vavasi 2 3 5.857933 11.715866 3526 +hough 1 3 5.857933 5.857933 3527 +linearalgebra 1 2 6.263398 6.263398 4833 +anal 1 2 6.263398 6.263398 4834 +pointmethod 1 2 6.263398 6.263398 4835 +sandia 2 1 6.957497 13.914994 9287 +livermor 2 1 6.957497 13.914994 9288 +patti 1 1 6.957497 6.957497 9289 +houghpatti 1 1 6.957497 6.957497 9290 +frankh 1 1 6.957497 6.957497 9291 +nicktrefethen 1 1 6.957497 6.957497 9292 +schatz 1 1 6.957497 6.957497 9293 +optimizationi 1 1 6.957497 6.957497 9294 +meza 1 1 6.957497 6.957497 9295 +nationallaboratori 1 1 6.957497 6.957497 9296 +ofweight 1 1 6.957497 6.957497 9297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html new file mode 100644 index 00000000..4e768456 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cam.cornell.edu^~baggett^index.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +applic 1 170 1.791759 1.791759 56 +year 2 148 1.945910 3.891820 84 +like 2 132 1.945910 3.891820 81 +model 2 145 1.945910 3.891820 69 +hall 1 146 1.945910 1.945910 65 +postscript 6 131 2.079442 12.476652 90 +mathemat 2 108 2.197225 4.394450 123 +theori 1 111 2.197225 2.197225 127 +center 1 88 2.397895 2.397895 158 +activ 1 84 2.484907 2.484907 182 +dynam 2 76 2.564949 5.129898 194 +april 1 77 2.564949 2.564949 196 +exampl 1 77 2.564949 2.564949 195 +appli 1 71 2.639057 2.639057 226 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +thesi 2 57 2.890372 5.780744 327 +summer 1 56 2.890372 2.890372 311 +detail 1 57 2.890372 2.890372 321 +scientif 1 53 2.944439 2.944439 341 +physic 2 47 3.091042 6.182084 377 +mechan 1 43 3.178054 3.178054 416 +linear 1 41 3.218876 3.218876 431 +submit 2 39 3.258097 6.516194 440 +continu 1 39 3.258097 3.258097 448 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +next 1 34 3.401197 3.401197 517 +curriculum 1 33 3.433987 3.433987 535 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +propos 1 28 3.610918 3.610918 602 +jeff 1 25 3.737670 3.737670 673 +background 1 25 3.737670 3.737670 664 +hill 1 25 3.737670 3.737670 670 +equat 1 23 3.806662 3.806662 724 +finish 1 22 3.850148 3.850148 748 +supervis 1 20 3.951244 3.951244 840 +mostli 1 19 4.007333 4.007333 869 +dimension 1 18 4.060443 4.060443 909 +seek 1 17 4.110874 4.110874 954 +outlin 1 17 4.110874 4.110874 914 +coupl 1 17 4.110874 4.110874 939 +normal 1 16 4.174387 4.174387 995 +transit 2 15 4.248495 8.496990 1046 +nick 1 13 4.382027 4.382027 1180 +misc 1 13 4.382027 4.382027 1124 +frank 1 9 4.753590 4.753590 1568 +rhode 1 9 4.753590 4.753590 1579 +unusu 1 9 4.753590 4.753590 1566 +sixth 1 7 5.010635 5.010635 1917 +atcornel 1 6 5.164786 5.164786 2131 +versu 1 6 5.164786 5.164786 2052 +fluid 3 5 5.347108 16.041324 2440 +stabil 1 5 5.347108 5.347108 2286 +satish 1 4 5.568345 5.568345 2833 +trefethen 3 3 5.857933 17.573799 3528 +exponenti 1 3 5.857933 5.857933 3529 +driscol 1 2 6.263398 6.263398 4836 +spectral 1 2 6.263398 6.263398 4837 +baggett 2 1 6.957497 13.914994 9298 +turbul 2 1 6.957497 13.914994 9299 +baggettjeff 1 1 6.957497 6.957497 9300 +hydrodynam 1 1 6.957497 6.957497 9301 +blend 1 1 6.957497 6.957497 9302 +iwould 1 1 6.957497 6.957497 9303 +abscissa 1 1 6.957497 6.957497 9304 +andphillip 1 1 6.957497 6.957497 9305 +subcrit 1 1 6.957497 6.957497 9306 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cs-tr.cs.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cs-tr.cs.cornell.edu new file mode 100644 index 00000000..b3b84faf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^cs-tr.cs.cornell.edu @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +list 2 201 1.609438 3.218876 39 +network 2 168 1.791759 3.583518 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +report 5 131 2.079442 10.397210 92 +document 2 121 2.079442 4.158884 89 +specif 1 106 2.197225 2.197225 106 +send 1 114 2.197225 2.197225 109 +technic 3 100 2.302585 6.907755 140 +search 5 95 2.397895 11.989475 155 +institut 2 84 2.484907 4.969814 187 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +want 1 79 2.564949 2.564949 199 +goal 1 66 2.708050 2.708050 250 +collect 3 65 2.772589 8.317767 268 +laboratori 1 63 2.772589 2.772589 292 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +sever 2 56 2.890372 5.780744 322 +allow 1 53 2.944439 2.944439 333 +made 1 44 3.135494 3.135494 398 +form 1 39 3.258097 3.258097 443 +author 1 39 3.258097 3.258097 450 +join 1 39 3.258097 3.258097 457 +industri 1 38 3.295837 3.295837 464 +field 2 37 3.332205 6.664410 482 +word 2 34 3.401197 6.802394 508 +titl 1 31 3.496508 3.496508 556 +limit 1 29 3.583519 3.583519 585 +particip 1 29 3.583519 3.583519 589 +packag 1 28 3.610918 3.610918 614 +background 1 25 3.737670 3.737670 664 +brows 2 23 3.806662 7.613324 726 +among 1 21 3.912023 3.912023 781 +tell 1 21 3.912023 3.912023 777 +offici 1 18 4.060443 4.060443 894 +commerci 1 16 4.174387 4.174387 1005 +whose 1 13 4.382027 4.382027 1166 +enter 1 10 4.653960 4.653960 1454 +govern 1 9 4.753590 4.753590 1581 +pronounc 1 7 5.010635 5.010635 1918 +ncstrl 7 3 5.857933 41.005531 3530 +interoper 1 2 6.263398 6.263398 4838 +andorgan 1 2 6.263398 6.263398 4443 +bibliograph 1 2 6.263398 6.263398 4699 +libraryncstrl 1 1 6.957497 6.957497 9307 +ancestr 1 1 6.957497 6.957497 9308 +internationalcollect 1 1 6.957497 6.957497 9309 +departmentsand 1 1 6.957497 6.957497 9310 +availablefor 1 1 6.957497 6.957497 9311 +eduat 1 1 6.957497 6.957497 9312 +ncstrlcollect 1 1 6.957497 6.957497 9313 +serversoper 1 1 6.957497 6.957497 9314 +participatinginstitut 1 1 6.957497 6.957497 9315 +ncstrlpress 1 1 6.957497 6.957497 9316 +theparticip 1 1 6.957497 6.957497 9317 +moreread 1 1 6.957497 6.957497 9318 +forinstitut 1 1 6.957497 6.957497 9319 +informationfind 1 1 6.957497 6.957497 9320 +snew 1 1 6.957497 6.957497 9321 +totech 1 1 6.957497 6.957497 9322 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu new file mode 100644 index 00000000..c88763b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +design 2 213 1.386294 2.772588 25 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +file 1 132 1.945910 1.945910 70 +process 1 142 1.945910 1.945910 72 +report 1 131 2.079442 2.079442 92 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +technic 2 100 2.302585 4.605170 140 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +institut 2 84 2.484907 4.969814 187 +server 1 76 2.564949 2.564949 204 +integr 1 67 2.708050 2.708050 245 +anoth 1 45 3.135494 3.135494 408 +mike 1 24 3.761200 3.761200 703 +brows 1 23 3.806662 3.806662 726 +enterpris 1 2 6.263398 6.263398 4839 +informationand 1 2 6.263398 6.263398 4840 +instituteabout 1 1 6.957497 6.957497 9323 +researchersat 1 1 6.957497 6.957497 9324 +searchal 1 1 6.957497 6.957497 9325 +reportssearch 1 1 6.957497 6.957497 9326 +ipic 1 1 6.957497 6.957497 9327 +itisingapor 1 1 6.957497 6.957497 9328 +altavistaforum 1 1 6.957497 6.957497 9329 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html new file mode 100644 index 00000000..81d9a333 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^dri.cornell.edu^pub^People^davis.html @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +work 3 380 0.693147 2.079441 9 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +us 6 329 1.098612 6.591672 16 +project 3 340 1.098612 3.295836 18 +cours 3 273 1.098612 3.295836 15 +student 3 343 1.098612 3.295836 19 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +design 3 213 1.386294 4.158882 25 +gener 3 220 1.386294 4.158882 27 +also 3 259 1.386294 4.158882 28 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +group 2 183 1.609438 3.218876 36 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +note 1 142 1.945910 1.945910 67 +report 2 131 2.079442 4.158884 92 +document 2 121 2.079442 4.158884 89 +machin 2 129 2.079442 4.158884 95 +tool 1 117 2.079442 2.079442 93 +structur 2 106 2.197225 4.394450 105 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +access 3 102 2.302585 6.907755 136 +technic 3 100 2.302585 6.907755 140 +memori 2 101 2.302585 4.605170 139 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +question 4 91 2.397895 9.591580 141 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +resourc 4 81 2.484907 9.939628 172 +institut 2 84 2.484907 4.969814 187 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +learn 1 86 2.484907 2.484907 170 +messag 1 76 2.564949 2.564949 212 +server 1 76 2.564949 2.564949 204 +sourc 1 77 2.564949 2.564949 201 +resum 1 79 2.564949 2.564949 217 +write 1 72 2.639057 2.639057 222 +onlin 1 75 2.639057 2.639057 223 +knowledg 3 67 2.708050 8.124150 243 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +improv 1 62 2.772589 2.772589 289 +copi 1 63 2.772589 2.772589 284 +collect 1 65 2.772589 2.772589 268 +share 2 59 2.833213 5.666426 304 +content 1 59 2.833213 2.833213 302 +explor 1 58 2.890372 2.890372 324 +run 1 51 2.995732 2.995732 347 +investig 1 51 2.995732 2.995732 353 +set 1 50 3.044522 3.044522 361 +without 1 50 3.044522 3.044522 370 +understand 1 47 3.091042 3.091042 384 +answer 2 45 3.135494 6.270988 391 +even 1 45 3.135494 3.135494 393 +made 1 44 3.135494 3.135494 398 +natur 1 44 3.135494 3.135494 406 +futur 2 41 3.218876 6.437752 427 +might 1 41 3.218876 3.218876 426 +author 1 39 3.258097 3.258097 450 +mean 2 37 3.332205 6.664410 477 +staff 2 36 3.367296 6.734592 490 +ofth 1 36 3.367296 3.367296 491 +especi 1 36 3.367296 3.367296 496 +product 2 33 3.433987 6.867974 527 +obtain 1 33 3.433987 3.433987 534 +collabor 1 32 3.465736 3.465736 543 +often 1 31 3.496508 3.496508 551 +produc 1 30 3.555348 3.555348 572 +abl 1 30 3.555348 3.555348 566 +ask 1 28 3.610918 3.610918 597 +manipul 1 27 3.637586 3.637586 624 +effort 1 26 3.688879 3.688879 652 +sport 1 25 3.737670 3.737670 683 +decis 1 23 3.806662 3.806662 728 +initi 1 23 3.806662 3.806662 717 +thank 1 23 3.806662 3.806662 721 +reduc 1 22 3.850148 3.850148 759 +corpor 2 21 3.912023 7.824046 802 +among 1 21 3.912023 3.912023 781 +annot 1 21 3.912023 3.912023 775 +increas 2 20 3.951244 7.902488 829 +qualiti 1 20 3.951244 3.951244 832 +fine 1 20 3.951244 3.951244 822 +media 1 19 4.007333 4.007333 861 +feedback 1 19 4.007333 4.007333 854 +seem 2 18 4.060443 8.120886 899 +agent 1 18 4.060443 4.060443 910 +whether 3 17 4.110874 12.332622 918 +remot 2 15 4.248495 8.496990 1041 +believ 1 13 4.382027 4.382027 1187 +captur 1 12 4.465908 4.465908 1232 +safe 1 12 4.465908 4.465908 1274 +market 1 11 4.553877 4.553877 1361 +end 1 9 4.753590 4.753590 1567 +correctli 1 9 4.753590 4.753590 1478 +risk 1 8 4.875197 4.875197 1689 +xerox 1 8 4.875197 4.875197 1725 +davi 2 7 5.010635 10.021270 1888 +intellectu 1 7 5.010635 5.010635 1847 +dead 1 7 5.010635 5.010635 1840 +foreign 1 7 5.010635 5.010635 1919 +edumi 1 6 5.164786 5.164786 2132 +sponsor 1 6 5.164786 5.164786 2133 +whichi 1 6 5.164786 5.164786 2056 +contract 1 6 5.164786 5.164786 1985 +huttenloch 1 6 5.164786 5.164786 1983 +begun 1 5 5.347108 5.347108 2386 +clarif 1 5 5.347108 5.347108 2253 +medium 1 4 5.568345 5.568345 2834 +transmit 1 4 5.568345 5.568345 2835 +lawyer 1 4 5.568345 5.568345 2836 +evid 1 4 5.568345 5.568345 2768 +isthat 1 4 5.568345 5.568345 2723 +owner 1 3 5.857933 5.857933 3531 +narr 1 3 5.857933 5.857933 3454 +worker 1 2 6.263398 6.263398 4841 +institutejim 1 1 6.957497 6.957497 9330 +davisxerox 1 1 6.957497 6.957497 9331 +corporationphd 1 1 6.957497 6.957497 9332 +improvecommun 1 1 6.957497 6.957497 9333 +andcont 1 1 6.957497 6.957497 9334 +reformat 1 1 6.957497 6.957497 9335 +inhypertext 1 1 6.957497 6.957497 9336 +thecstr 1 1 6.957497 6.957497 9337 +anarpa 1 1 6.957497 6.957497 9338 +moreeasili 1 1 6.957497 6.957497 9339 +electronicsystem 1 1 6.957497 6.957497 9340 +ofor 1 1 6.957497 6.957497 9341 +memoryinclud 1 1 6.957497 6.957497 9342 +sscreenplai 1 1 6.957497 6.957497 9343 +producedth 1 1 6.957497 6.957497 9344 +andjustif 1 1 6.957497 6.957497 9345 +developingcorpor 1 1 6.957497 6.957497 9346 +sharedannot 1 1 6.957497 6.957497 9347 +howpeopl 1 1 6.957497 6.957497 9348 +inelectron 1 1 6.957497 6.957497 9349 +prototypeimplement 1 1 6.957497 6.957497 9350 +shareddocu 1 1 6.957497 6.957497 9351 +nnotat 1 1 6.957497 6.957497 9352 +berequest 1 1 6.957497 6.957497 9353 +orcorrect 1 1 6.957497 6.957497 9354 +aus 1 1 6.957497 6.957497 9355 +willfind 1 1 6.957497 6.957497 9356 +whetherstud 1 1 6.957497 6.957497 9357 +usefulmean 1 1 6.957497 6.957497 9358 +designof 1 1 6.957497 6.957497 9359 +proxi 1 1 6.957497 6.957497 9360 +reliablycarri 1 1 6.957497 6.957497 9361 +toeither 1 1 6.957497 6.957497 9362 +alsopap 1 1 6.957497 6.957497 9363 +publicatiion 1 1 6.957497 6.957497 9364 +thedrimi 1 1 6.957497 6.957497 9365 +meprofession 1 1 6.957497 6.957497 9366 +historythi 1 1 6.957497 6.957497 9367 +improvisationi 1 1 6.957497 6.957497 9368 +resumeno 1 1 6.957497 6.957497 9369 +likeit 1 1 6.957497 6.957497 9370 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 new file mode 100644 index 00000000..4d6bba6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^metacrawler.cs.washington.edu_8080 @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +person 1 111 2.197225 2.197225 117 +search 3 95 2.397895 7.193685 155 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +result 1 65 2.772589 2.772589 281 +right 1 48 3.044522 3.044522 363 +word 1 34 3.401197 3.401197 508 +relev 1 26 3.688879 3.688879 637 +greg 2 24 3.761200 7.522400 695 +reserv 1 20 3.951244 3.951244 808 +configur 1 15 4.248495 4.248495 1012 +metacrawl 2 10 4.653960 9.307920 1455 +erik 2 8 4.875197 9.750394 1701 +oren 2 6 5.164786 10.329572 2134 +etzioni 1 6 5.164786 5.164786 2135 +selberg 2 5 5.347108 10.694216 2441 +phrase 1 5 5.347108 5.347108 2242 +ahoi 1 3 5.857933 5.857933 3532 +searchingmetacrawlerbi 1 1 6.957497 6.957497 9371 +lauckhartand 1 1 6.957497 6.957497 9372 +etzioniif 1 1 6.957497 6.957497 9373 +wordssort 1 1 6.957497 6.957497 9374 +locationcontrol 1 1 6.957497 6.957497 9375 +problemswebmast 1 1 6.957497 6.957497 9376 +comcopyright 1 1 6.957497 6.957497 9377 +lauckhart 1 1 6.957497 6.957497 9378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^ new file mode 100644 index 00000000..8108ae5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +research 3 431 0.693147 2.079441 10 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +link 2 247 1.386294 2.772588 24 +wisc 1 242 1.386294 1.386294 33 +group 2 183 1.609438 3.218876 36 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +madison 5 165 1.791759 8.958795 55 +base 3 165 1.791759 5.375277 50 +wisconsin 1 169 1.791759 1.791759 54 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +relat 2 139 1.945910 3.891820 68 +model 1 145 1.945910 1.945910 69 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +peopl 1 96 2.302585 2.302585 132 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +ieee 3 86 2.484907 7.454721 190 +activ 1 84 2.484907 2.484907 182 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +resourc 1 81 2.484907 2.484907 172 +dynam 1 76 2.564949 2.564949 194 +server 1 76 2.564949 2.564949 204 +intellig 1 72 2.639057 2.639057 225 +simul 1 66 2.708050 2.708050 255 +plan 4 65 2.772589 11.090356 272 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +special 1 56 2.890372 2.890372 320 +local 1 55 2.944439 2.944439 334 +suggest 1 53 2.944439 2.944439 331 +frequent 1 49 3.044522 3.044522 367 +effect 1 46 3.091042 3.091042 385 +mechan 1 43 3.178054 3.178054 416 +review 1 42 3.218876 3.218876 425 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +societi 1 40 3.258097 3.258097 456 +seminar 1 38 3.295837 3.295837 470 +robot 13 36 3.367296 43.774848 497 +tech 1 35 3.401197 3.401197 515 +committe 1 34 3.401197 3.401197 522 +human 1 32 3.465736 3.465736 546 +robert 1 30 3.555348 3.555348 567 +ask 1 28 3.610918 3.610918 597 +motion 3 24 3.761200 11.283600 699 +compress 1 23 3.806662 3.806662 719 +director 1 22 3.850148 3.850148 767 +geometri 1 22 3.850148 3.850148 752 +path 1 21 3.912023 3.912023 778 +brief 1 16 4.174387 4.174387 1001 +chuck 1 14 4.317488 4.317488 1108 +nasa 1 13 4.382027 4.382027 1188 +vladimir 1 11 4.553877 4.553877 1324 +sens 1 11 4.553877 4.553877 1305 +errata 1 10 4.653960 4.653960 1403 +dyer 1 9 4.753590 4.753590 1573 +sensit 1 8 4.875197 4.875197 1726 +manufactur 1 8 4.875197 4.875197 1634 +sensor 4 7 5.010635 20.042540 1920 +jude 1 6 5.164786 5.164786 2123 +actuat 1 5 5.347108 5.347108 2442 +shavlik 1 5 5.347108 5.347108 2429 +lumelski 1 4 5.568345 5.568345 2837 +underwat 1 4 5.568345 5.568345 2838 +redund 1 4 5.568345 5.568345 2839 +skin 1 4 5.568345 5.568345 2840 +neil 1 4 5.568345 5.568345 2841 +kinemat 2 3 5.857933 11.715866 3516 +avenuemadison 1 2 6.263398 6.263398 4842 +maze 1 2 6.263398 6.263398 4843 +tether 1 2 6.263398 6.263398 4844 +duffi 1 2 6.263398 6.263398 4845 +lorenz 1 2 6.263398 6.263398 4846 +telerobot 1 2 6.263398 6.263398 4847 +hert 1 2 6.263398 6.263398 4848 +jogger 1 1 6.957497 6.957497 9379 +decentr 1 1 6.957497 6.957497 9380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html new file mode 100644 index 00000000..25c41eb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^robios8.me.wisc.edu^~lumelsky^lumelsky.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 5 297 1.098612 5.493060 20 +project 1 340 1.098612 1.098612 18 +link 2 247 1.386294 2.772588 24 +graduat 1 215 1.386294 1.386294 31 +wisconsin 1 169 1.791759 1.791759 54 +mathemat 2 108 2.197225 4.394450 123 +center 1 88 2.397895 2.397895 158 +ieee 3 86 2.484907 7.454721 190 +institut 1 84 2.484907 2.484907 187 +dept 4 64 2.772589 11.090356 291 +plan 1 65 2.772589 2.772589 272 +colleg 1 61 2.833213 2.833213 300 +mechan 1 43 3.178054 3.178054 416 +autom 1 41 3.218876 3.218876 434 +societi 1 40 3.258097 3.258097 456 +electr 1 38 3.295837 3.295837 461 +robot 4 36 3.367296 13.469184 497 +global 1 34 3.401197 3.401197 520 +tech 1 35 3.401197 3.401197 515 +committe 1 34 3.401197 3.401197 522 +human 1 32 3.465736 3.465736 546 +motion 1 24 3.761200 3.761200 699 +geometri 1 22 3.850148 3.850148 752 +path 1 21 3.912023 3.912023 778 +grant 1 12 4.465908 4.465908 1216 +vladimir 2 11 4.553877 9.107754 1324 +sensit 1 8 4.875197 4.875197 1726 +lumelski 1 4 5.568345 5.568345 2837 +underwat 1 4 5.568345 5.568345 2838 +redund 1 4 5.568345 5.568345 2839 +skin 1 4 5.568345 5.568345 2840 +kinemat 1 3 5.857933 5.857933 3516 +mace 1 2 6.263398 6.263398 4849 +lumelskyprofessormechan 1 1 6.957497 6.957497 9381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ new file mode 100644 index 00000000..c456ea64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^People^vogels^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 10 443 0.693147 6.931470 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +cornel 3 215 1.386294 4.158882 23 +design 3 213 1.386294 4.158882 25 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +group 3 183 1.609438 4.828314 36 +oper 2 180 1.609438 3.218876 34 +public 1 202 1.609438 1.609438 43 +network 6 168 1.791759 10.750554 61 +distribut 5 162 1.791759 8.958795 51 +applic 2 170 1.791759 3.583518 56 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +support 2 132 1.945910 3.891820 83 +perform 2 143 1.945910 3.891820 74 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +high 8 130 2.079442 16.635536 101 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +find 1 111 2.197225 2.197225 111 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +structur 1 106 2.197225 2.197225 105 +need 3 98 2.302585 6.907755 135 +take 2 97 2.302585 4.605170 134 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +commun 6 95 2.397895 14.387370 157 +proceed 4 93 2.397895 9.591580 152 +real 2 93 2.397895 4.795790 144 +level 3 87 2.484907 7.454721 180 +environ 2 84 2.484907 4.969814 177 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +messag 3 76 2.564949 7.694847 212 +issu 2 78 2.564949 5.129898 211 +appear 2 78 2.564949 5.129898 210 +method 1 80 2.564949 2.564949 213 +want 1 79 2.564949 2.564949 199 +decemb 1 80 2.564949 2.564949 215 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +integr 1 67 2.708050 2.708050 245 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +septemb 2 65 2.772589 5.545178 274 +ithaca 1 65 2.772589 2.772589 294 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +function 1 62 2.772589 2.772589 275 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +think 2 57 2.890372 5.780744 314 +major 1 56 2.890372 2.890372 315 +reason 1 57 2.890372 2.890372 318 +explor 1 58 2.890372 2.890372 324 +adapt 1 46 3.091042 3.091042 387 +done 1 47 3.091042 3.091042 381 +protocol 3 45 3.135494 9.406482 407 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +video 1 44 3.135494 3.135494 405 +mark 1 44 3.135494 3.135494 403 +mechan 3 43 3.178054 9.534162 416 +third 1 43 3.178054 3.178054 412 +small 1 39 3.258097 3.258097 447 +brian 1 38 3.295837 3.295837 466 +respons 1 37 3.332205 3.332205 476 +abl 2 30 3.555348 7.110696 566 +focu 1 30 3.555348 3.555348 571 +cluster 3 28 3.610918 10.832754 612 +pass 1 28 3.610918 3.610918 611 +packag 1 28 3.610918 3.610918 614 +detect 2 26 3.688879 7.377758 646 +bound 1 26 3.688879 3.688879 659 +reliabl 1 25 3.737670 3.737670 674 +reach 1 24 3.761200 3.761200 688 +pattern 1 24 3.761200 3.761200 689 +highli 2 23 3.806662 7.613324 725 +lead 1 23 3.806662 3.806662 718 +sciencecornel 1 22 3.850148 3.850148 768 +deal 1 22 3.850148 3.850148 736 +cooper 1 22 3.850148 3.850148 757 +tell 1 21 3.912023 3.912023 777 +flexibl 1 21 3.912023 3.912023 792 +exploit 1 20 3.951244 3.951244 836 +smith 1 20 3.951244 3.951244 820 +left 2 19 4.007333 8.014666 851 +predict 1 19 4.007333 4.007333 855 +miss 1 19 4.007333 4.007333 866 +speed 4 18 4.060443 16.241772 911 +failur 3 18 4.060443 12.181329 898 +anyon 1 17 4.110874 4.110874 916 +latenc 2 16 4.174387 8.348774 993 +transfer 1 16 4.174387 4.174387 967 +devic 1 16 4.174387 4.174387 1002 +practicum 1 16 4.174387 4.174387 960 +horu 3 14 4.317488 12.952464 1116 +achiev 1 14 4.317488 4.317488 1088 +demand 1 14 4.317488 4.317488 1073 +eicken 3 13 4.382027 13.146081 1134 +thorsten 2 13 4.382027 8.764054 1133 +kenneth 1 12 4.465908 4.465908 1265 +brad 1 12 4.465908 4.465908 1264 +reness 3 11 4.553877 13.661631 1333 +noth 2 11 4.553877 9.107754 1328 +bandwidth 1 11 4.553877 4.553877 1365 +node 1 11 4.553877 4.553877 1326 +werner 7 10 4.653960 32.577720 1385 +guarante 2 10 4.653960 9.307920 1391 +awai 1 10 4.653960 4.653960 1447 +mountain 1 10 4.653960 4.653960 1456 +robbert 3 9 4.753590 14.260770 1529 +birman 2 9 4.753590 9.507180 1531 +desir 1 9 4.753590 4.753590 1542 +deadlin 1 9 4.753590 4.753590 1502 +vogel 8 8 4.875197 39.001576 1622 +sigop 2 8 4.875197 9.750394 1727 +extract 1 8 4.875197 4.875197 1728 +vineet 1 8 4.875197 4.875197 1639 +perfect 1 7 5.010635 5.010635 1921 +gave 1 7 5.010635 5.010635 1922 +synchroni 1 7 5.010635 5.010635 1923 +implementationof 1 7 5.010635 5.010635 1813 +deliv 1 6 5.164786 5.164786 2070 +subsystem 1 6 5.164786 5.164786 2015 +alex 1 6 5.164786 5.164786 2130 +situat 1 5 5.347108 5.347108 2365 +scope 1 5 5.347108 5.347108 2296 +buch 1 5 5.347108 5.347108 2272 +myresearch 1 4 5.568345 5.568345 2842 +behind 1 4 5.568345 5.568345 2610 +basu 1 4 5.568345 5.568345 2843 +hayden 1 4 5.568345 5.568345 2844 +hickei 1 4 5.568345 5.568345 2845 +vaysburd 1 4 5.568345 5.568345 2846 +concert 1 3 5.857933 5.857933 3533 +interfacefor 1 3 5.857933 5.857933 3534 +anindya 1 3 5.857933 5.857933 3535 +copper 1 3 5.857933 5.857933 3536 +glade 1 3 5.857933 5.857933 3537 +takako 1 3 5.857933 5.857933 3538 +amwork 1 2 6.263398 6.263398 4850 +regardless 1 2 6.263398 6.263398 4577 +katherin 1 2 6.263398 6.263398 4851 +dalia 1 2 6.263398 6.263398 4852 +malki 1 2 6.263398 6.263398 4853 +workshopconnamoran 2 1 6.957497 13.914994 9382 +ierland 2 1 6.957497 13.914994 9383 +researchera 1 1 6.957497 6.957497 9384 +halldept 1 1 6.957497 6.957497 9385 +thehorusand 1 1 6.957497 6.957497 9386 +bandwith 1 1 6.957497 6.957497 9387 +horuswith 1 1 6.957497 6.957497 9388 +fallen 1 1 6.957497 6.957497 9389 +latencyfor 1 1 6.957497 6.957497 9390 +protocolsar 1 1 6.957497 6.957497 9391 +structureand 1 1 6.957497 6.957497 9392 +guarant 1 1 6.957497 6.957497 9393 +acur 1 1 6.957497 6.957497 9394 +aglob 1 1 6.957497 6.957497 9395 +supportfailur 1 1 6.957497 6.957497 9396 +suspis 1 1 6.957497 6.957497 9397 +workwith 1 1 6.957497 6.957497 9398 +middlewar 1 1 6.957497 6.957497 9399 +brainchild 1 1 6.957497 6.957497 9400 +andken 1 1 6.957497 6.957497 9401 +withthorsten 1 1 6.957497 6.957497 9402 +horusexperi 1 1 6.957497 6.957497 9403 +lectureson 1 1 6.957497 6.957497 9404 +virtuallysynchron 1 1 6.957497 6.957497 9405 +princpl 1 1 6.957497 6.957497 9406 +hpc 1 1 6.957497 6.957497 9407 +kati 1 1 6.957497 6.957497 9408 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ new file mode 100644 index 00000000..88018458 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^simon.cs.cornell.edu^Info^Projects^HORUS^ @@ -0,0 +1,266 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 9 443 0.693147 6.238323 6 +research 1 431 0.693147 0.693147 10 +project 5 340 1.098612 5.493060 18 +us 2 329 1.098612 2.197224 16 +time 1 293 1.098612 1.098612 17 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +gener 2 220 1.386294 2.772588 27 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +develop 5 174 1.791759 8.958795 53 +distribut 4 162 1.791759 7.167036 51 +applic 3 170 1.791759 5.375277 56 +data 2 170 1.791759 3.583518 49 +network 2 168 1.791759 3.583518 61 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +high 2 130 2.079442 4.158884 101 +introduct 2 126 2.079442 4.158884 87 +provid 1 121 2.079442 2.079442 94 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +version 2 113 2.197225 4.394450 122 +manag 1 114 2.197225 2.197225 125 +theori 1 111 2.197225 2.197225 127 +code 1 108 2.197225 2.197225 116 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +book 1 99 2.302585 2.302585 131 +commun 4 95 2.397895 9.591580 157 +real 1 93 2.397895 2.397895 144 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +follow 1 92 2.397895 2.397895 143 +present 1 91 2.397895 2.397895 145 +comment 1 93 2.397895 2.397895 146 +requir 2 81 2.484907 4.969814 167 +activ 2 84 2.484907 4.969814 182 +environ 2 84 2.484907 4.969814 177 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +practic 1 70 2.708050 2.708050 246 +java 1 70 2.708050 2.708050 248 +multimedia 1 68 2.708050 2.708050 258 +collect 1 65 2.772589 2.772589 268 +virtual 1 62 2.772589 2.772589 285 +written 1 63 2.772589 2.772589 278 +type 1 61 2.833213 2.833213 296 +special 1 56 2.890372 2.890372 320 +overview 1 56 2.890372 2.890372 323 +publish 1 57 2.890372 2.890372 326 +found 2 53 2.944439 5.888878 337 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +scientif 1 53 2.944439 2.944439 341 +much 1 52 2.995732 2.995732 349 +life 2 50 3.044522 6.089044 375 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +effect 1 46 3.091042 3.091042 385 +made 2 44 3.135494 6.270988 398 +describ 1 45 3.135494 3.135494 400 +better 1 45 3.135494 3.135494 401 +compani 2 41 3.218876 6.437752 423 +cach 1 41 3.218876 3.218876 432 +origin 1 38 3.295837 3.295837 472 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +purpos 2 37 3.332205 6.664410 481 +cost 1 37 3.332205 3.332205 480 +workstat 1 37 3.332205 3.332205 479 +ofth 1 36 3.367296 3.367296 491 +everi 1 34 3.401197 3.401197 519 +word 1 34 3.401197 3.401197 508 +toler 2 33 3.433987 6.867974 533 +within 1 33 3.433987 3.433987 525 +articl 1 33 3.433987 3.433987 530 +fault 1 32 3.465736 3.465736 547 +independ 1 32 3.465736 3.465736 548 +collabor 1 32 3.465736 3.465736 543 +idea 1 32 3.465736 3.465736 545 +secur 1 30 3.555348 3.555348 577 +exist 1 30 3.555348 3.555348 569 +power 1 30 3.555348 3.555348 573 +platform 1 29 3.583519 3.583519 591 +framework 2 28 3.610918 7.221836 606 +cluster 1 28 3.610918 3.610918 612 +american 1 27 3.637586 3.637586 634 +effort 2 26 3.688879 7.377758 652 +toward 1 25 3.737670 3.737670 668 +never 1 25 3.737670 3.737670 671 +reliabl 1 25 3.737670 3.737670 674 +wish 1 24 3.761200 3.761200 692 +seri 1 24 3.761200 3.761200 708 +initi 1 23 3.806662 3.806662 717 +varieti 1 22 3.850148 3.850148 740 +unit 1 21 3.912023 3.912023 779 +thu 1 21 3.912023 3.912023 773 +born 1 21 3.912023 3.912023 798 +runtim 1 19 4.007333 4.007333 858 +minim 1 18 4.060443 4.060443 887 +earli 1 16 4.174387 4.174387 968 +style 1 15 4.248495 4.248495 1036 +contribut 1 15 4.248495 4.248495 1021 +horu 15 14 4.317488 64.762320 1116 +attribut 1 14 4.317488 4.317488 1092 +coher 1 14 4.317488 4.317488 1109 +becam 1 14 4.317488 4.317488 1117 +whose 2 13 4.382027 8.764054 1166 +bodi 2 13 4.382027 8.764054 1178 +brother 1 13 4.382027 4.382027 1189 +replic 2 12 4.465908 8.931816 1231 +kenneth 2 12 4.465908 8.931816 1265 +robust 1 12 4.465908 4.465908 1271 +evolv 1 12 4.465908 4.465908 1223 +weight 1 12 4.465908 4.465908 1204 +rest 1 12 4.465908 4.465908 1259 +reness 2 11 4.553877 9.107754 1333 +faster 1 11 4.553877 4.553877 1323 +volum 1 11 4.553877 4.553877 1347 +death 2 10 4.653960 9.307920 1457 +modular 1 10 4.653960 4.653960 1392 +modul 1 10 4.653960 4.653960 1434 +length 1 10 4.653960 4.653960 1400 +sentenc 1 10 4.653960 4.653960 1413 +werner 1 10 4.653960 4.653960 1385 +birman 2 9 4.753590 9.507180 1531 +robbert 2 9 4.753590 9.507180 1529 +light 1 9 4.753590 4.753590 1533 +heart 1 8 4.875197 4.875197 1729 +gain 1 8 4.875197 4.875197 1730 +vogel 1 8 4.875197 4.875197 1622 +dead 2 7 5.010635 10.021270 1840 +aris 1 7 5.010635 5.010635 1924 +exactli 1 7 5.010635 5.010635 1817 +synchroni 1 7 5.010635 5.010635 1923 +usabl 1 7 5.010635 5.010635 1810 +conferenc 1 7 5.010635 5.010635 1857 +brought 1 7 5.010635 5.010635 1925 +restrict 1 6 5.164786 5.164786 2129 +outstand 1 6 5.164786 5.164786 2136 +mother 1 6 5.164786 5.164786 2083 +greatest 1 6 5.164786 5.164786 2073 +isi 4 5 5.347108 21.388432 2443 +elsewher 1 5 5.347108 5.347108 2444 +circumst 1 5 5.347108 5.347108 2283 +knew 1 5 5.347108 5.347108 2445 +hair 1 5 5.347108 5.347108 2446 +firm 1 4 5.568345 5.568345 2684 +areavail 1 4 5.568345 5.568345 2810 +projectth 1 3 5.857933 5.857933 3344 +woman 1 3 5.857933 5.857933 3539 +redesign 1 3 5.857933 5.857933 3540 +greatli 1 3 5.857933 5.857933 3541 +child 1 3 5.857933 5.857933 3542 +london 1 3 5.857933 5.857933 3282 +ensembl 3 2 6.263398 18.790194 4854 +oppos 2 2 6.263398 12.526796 4855 +egypt 1 2 6.263398 6.263398 4856 +groupwar 1 2 6.263398 6.263398 4857 +toconstruct 1 2 6.263398 6.263398 4858 +communicationarchitectur 1 2 6.263398 6.263398 4859 +ofreleas 1 2 6.263398 6.263398 4860 +transi 1 2 6.263398 6.263398 4861 +froma 1 2 6.263398 6.263398 4862 +mighti 1 2 6.263398 6.263398 4863 +wing 1 2 6.263398 6.263398 4864 +stir 1 2 6.263398 6.263398 4865 +lament 1 2 6.263398 6.263398 4866 +papersand 1 2 6.263398 6.263398 4867 +silvano 1 2 6.263398 6.263398 4868 +mytholog 1 2 6.263398 6.263398 4869 +court 1 2 6.263398 6.263398 4870 +osiri 6 1 6.957497 41.744982 9409 +egyptian 3 1 6.957497 20.872491 9410 +god 2 1 6.957497 13.914994 9411 +rejoic 2 1 6.957497 13.914994 9412 +groupcommun 2 1 6.957497 13.914994 9413 +triumphant 1 1 6.957497 6.957497 9414 +ofisi 1 1 6.957497 6.957497 9415 +heir 1 1 6.957497 6.957497 9416 +appealedstrongli 1 1 6.957497 6.957497 9417 +becausein 1 1 6.957497 6.957497 9418 +possess 1 1 6.957497 6.957497 9419 +renew 1 1 6.957497 6.957497 9420 +movementa 1 1 6.957497 6.957497 9421 +inact 1 1 6.957497 6.957497 9422 +applicationsbas 1 1 6.957497 6.957497 9423 +infault 1 1 6.957497 6.957497 9424 +thatexploit 1 1 6.957497 6.957497 9425 +theoveral 1 1 6.957497 6.957497 9426 +applicationprotocol 1 1 6.957497 6.957497 9427 +applicationrequir 1 1 6.957497 6.957497 9428 +launch 1 1 6.957497 6.957497 9429 +theisi 1 1 6.957497 6.957497 9430 +robustdistribut 1 1 6.957497 6.957497 9431 +unsuit 1 1 6.957497 6.957497 9432 +asappl 1 1 6.957497 6.957497 9433 +besidesth 1 1 6.957497 6.957497 9434 +usedfor 1 1 6.957497 6.957497 9435 +sametim 1 1 6.957497 6.957497 9436 +lighter 1 1 6.957497 6.957497 9437 +beus 1 1 6.957497 6.957497 9438 +commericalright 1 1 6.957497 6.957497 9439 +manyoth 1 1 6.957497 6.957497 9440 +nofe 1 1 6.957497 6.957497 9441 +ensemblewil 1 1 6.957497 6.957497 9442 +groupwareappl 1 1 6.957497 6.957497 9443 +differentclass 1 1 6.957497 6.957497 9444 +onnext 1 1 6.957497 6.957497 9445 +speedcommun 1 1 6.957497 6.957497 9446 +systemsproject 1 1 6.957497 6.957497 9447 +navtech 1 1 6.957497 6.957497 9448 +stormcast 1 1 6.957497 6.957497 9449 +tacomaproject 1 1 6.957497 6.957497 9450 +thesepag 1 1 6.957497 6.957497 9451 +begotten 1 1 6.957497 6.957497 9452 +sorrow 1 1 6.957497 6.957497 9453 +herhusband 1 1 6.957497 6.957497 9454 +goddess 1 1 6.957497 6.957497 9455 +distress 1 1 6.957497 6.957497 9456 +equippedwith 1 1 6.957497 6.957497 9457 +utter 1 1 6.957497 6.957497 9458 +mighthav 1 1 6.957497 6.957497 9459 +secret 1 1 6.957497 6.957497 9460 +suckl 1 1 6.957497 6.957497 9461 +rear 1 1 6.957497 6.957497 9462 +horusvisit 1 1 6.957497 6.957497 9463 +abstractpag 1 1 6.957497 6.957497 9464 +relatedto 1 1 6.957497 6.957497 9465 +maffei 1 1 6.957497 6.957497 9466 +flexiblegroup 1 1 6.957497 6.957497 9467 +hyme 1 1 6.957497 6.957497 9468 +osirisfrom 1 1 6.957497 6.957497 9469 +papyru 1 1 6.957497 6.957497 9470 +walli 1 1 6.957497 6.957497 9471 +budg 1 1 6.957497 6.957497 9472 +studiesin 1 1 6.957497 6.957497 9473 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^ new file mode 100644 index 00000000..e3aae6fe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +depart 10 457 0.693147 6.931470 12 +research 4 431 0.693147 2.772588 10 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +cours 2 273 1.098612 2.197224 15 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 6 215 1.386294 8.317764 23 +gener 3 220 1.386294 4.158882 27 +also 2 259 1.386294 2.772588 28 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +read 1 154 1.791759 1.791759 47 +report 5 131 2.079442 10.397210 92 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +find 2 111 2.197225 4.394450 111 +check 2 115 2.197225 4.394450 118 +site 1 106 2.197225 2.197225 119 +person 1 111 2.197225 2.197225 117 +peopl 2 96 2.302585 4.605170 132 +technic 1 100 2.302585 2.302585 140 +comment 1 93 2.397895 2.397895 146 +info 2 85 2.484907 4.969814 176 +academ 1 82 2.484907 2.484907 178 +activ 1 84 2.484907 2.484907 182 +know 2 80 2.564949 5.129898 198 +server 2 76 2.564949 5.129898 204 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +free 1 73 2.639057 2.639057 224 +degre 1 69 2.708050 2.708050 259 +locat 1 59 2.833213 2.833213 303 +faculti 3 56 2.890372 8.671116 325 +semest 1 58 2.890372 2.890372 312 +direct 1 57 2.890372 2.890372 316 +undergradu 1 54 2.944439 2.944439 338 +standard 1 48 3.044522 3.044522 365 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +annual 3 40 3.258097 9.774291 458 +feel 1 37 3.332205 3.332205 483 +staff 1 36 3.367296 3.367296 490 +tech 1 35 3.401197 3.401197 515 +go 2 33 3.433987 6.867974 529 +taught 1 33 3.433987 3.433987 526 +collabor 1 32 3.465736 3.465736 543 +team 1 27 3.637586 3.637586 625 +doctor 1 24 3.761200 3.761200 709 +brows 1 23 3.806662 3.806662 726 +size 1 23 3.806662 3.806662 713 +offici 1 18 4.060443 4.060443 894 +anonym 1 14 4.317488 4.317488 1100 +outstand 1 6 5.164786 5.164786 2136 +disclaim 1 4 5.568345 5.568345 2847 +projector 1 3 5.857933 5.857933 3409 +universitydepart 1 2 6.263398 6.263398 4871 +infoget 1 1 6.957497 6.957497 9474 +contactswithin 1 1 6.957497 6.957497 9475 +facultyfind 1 1 6.957497 6.957497 9476 +ortheir 1 1 6.957497 6.957497 9477 +researchcheck 1 1 6.957497 6.957497 9478 +aboutour 1 1 6.957497 6.957497 9479 +publicationsfind 1 1 6.957497 6.957497 9480 +researcherseith 1 1 6.957497 6.957497 9481 +degreeslook 1 1 6.957497 6.957497 9482 +orundergradu 1 1 6.957497 6.957497 9483 +academicsrefer 1 1 6.957497 6.957497 9484 +webfor 1 1 6.957497 6.957497 9485 +generalcoursedescript 1 1 6.957497 6.957497 9486 +peopleget 1 1 6.957497 6.957497 9487 +directorylist 1 1 6.957497 6.957497 9488 +activitiesfind 1 1 6.957497 6.957497 9489 +theassoci 1 1 6.957497 6.957497 9490 +excellenthockei 1 1 6.957497 6.957497 9491 +serverscheck 1 1 6.957497 6.957497 9492 +gopherserv 1 1 6.957497 6.957497 9493 +ftpserver 1 1 6.957497 6.957497 9494 +sitesquest 1 1 6.957497 6.957497 9495 +informationpres 1 1 6.957497 6.957497 9496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html new file mode 100644 index 00000000..d7df744e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Faculty^Salton.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 11 412 0.693147 7.624617 8 +research 5 431 0.693147 3.465735 10 +system 5 443 0.693147 3.465735 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +us 5 329 1.098612 5.493060 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 4 215 1.386294 5.545176 23 +gener 3 220 1.386294 4.158882 27 +link 3 247 1.386294 4.158882 24 +washington 2 236 1.386294 2.772588 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +oper 2 180 1.609438 3.218876 34 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 3 174 1.791759 5.375277 53 +avail 3 169 1.791759 5.375277 48 +base 3 165 1.791759 5.375277 50 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +process 4 142 1.945910 7.783640 72 +file 2 132 1.945910 3.891820 70 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +model 1 145 1.945910 1.945910 69 +analysi 5 124 2.079442 10.397210 98 +document 3 121 2.079442 6.238326 89 +databas 3 122 2.079442 6.238326 86 +high 2 130 2.079442 4.158884 101 +confer 2 126 2.079442 4.158884 100 +seattl 2 120 2.079442 4.158884 103 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +structur 2 106 2.197225 4.394450 105 +version 2 113 2.197225 4.394450 122 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +pleas 1 113 2.197225 2.197225 114 +text 26 98 2.302585 59.867210 133 +user 2 104 2.302585 4.605170 137 +access 1 102 2.302585 2.302585 136 +search 3 95 2.397895 7.193685 155 +proceed 3 93 2.397895 7.193685 152 +octob 2 89 2.397895 4.795790 156 +associ 2 93 2.397895 4.795790 151 +section 1 94 2.397895 2.397895 149 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +larg 5 82 2.484907 12.424535 168 +novemb 4 81 2.484907 9.939628 179 +librari 3 87 2.484907 7.454721 181 +environ 1 84 2.484907 2.484907 177 +institut 1 84 2.484907 2.484907 187 +june 2 79 2.564949 5.129898 214 +method 1 80 2.564949 2.564949 213 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +nation 4 74 2.639057 10.556228 240 +addit 1 74 2.639057 2.639057 228 +servic 1 72 2.639057 2.639057 236 +meet 1 72 2.639057 2.639057 229 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +free 1 73 2.639057 2.639057 224 +main 2 67 2.708050 5.416100 256 +differ 2 66 2.708050 5.416100 253 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +degre 1 69 2.708050 2.708050 259 +test 1 66 2.708050 2.708050 252 +multimedia 1 68 2.708050 2.708050 258 +collect 2 65 2.772589 5.545178 268 +import 1 65 2.772589 2.772589 282 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +copi 1 63 2.772589 2.772589 284 +automat 7 61 2.833213 19.832491 306 +colleg 1 61 2.833213 2.833213 300 +unix 1 58 2.890372 2.890372 308 +publish 1 57 2.890372 2.890372 326 +sever 1 56 2.890372 2.890372 322 +faculti 1 56 2.890372 2.890372 325 +extens 1 53 2.944439 2.944439 340 +cover 1 55 2.944439 2.944439 329 +februari 1 54 2.944439 2.944439 328 +without 1 50 3.044522 3.044522 370 +approach 1 48 3.044522 3.044522 366 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +natur 1 44 3.135494 3.135494 406 +made 1 44 3.135494 3.135494 398 +third 1 43 3.178054 3.178054 412 +around 1 43 3.178054 3.178054 415 +york 2 41 3.218876 6.437752 435 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +annual 3 40 3.258097 9.774291 458 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +microsoft 1 38 3.295837 3.295837 468 +purpos 3 37 3.332205 9.996615 481 +field 1 37 3.332205 3.332205 482 +mean 1 37 3.332205 3.332205 477 +respons 1 37 3.332205 3.332205 476 +word 1 34 3.401197 3.401197 508 +statist 1 35 3.401197 3.401197 521 +represent 1 35 3.401197 3.401197 512 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +queri 2 33 3.433987 6.867974 524 +articl 1 33 3.433987 3.433987 530 +express 1 32 3.465736 3.465736 540 +storag 1 31 3.496508 3.496508 553 +exist 2 30 3.555348 7.110696 569 +abl 1 30 3.555348 3.555348 566 +semant 1 29 3.583519 3.583519 587 +full 3 28 3.610918 10.832754 615 +progress 1 28 3.610918 3.610918 598 +retriev 17 27 3.637586 61.838962 621 +determin 2 27 3.637586 7.275172 630 +subject 2 26 3.688879 7.377758 647 +consist 2 26 3.688879 7.377758 651 +store 1 24 3.761200 3.761200 693 +handl 1 24 3.761200 3.761200 685 +size 1 23 3.806662 3.806662 713 +brows 1 23 3.806662 3.806662 726 +util 3 21 3.912023 11.736069 774 +similar 2 21 3.912023 7.824046 771 +flexibl 1 21 3.912023 3.912023 792 +corpor 1 21 3.912023 3.912023 802 +department 1 20 3.951244 3.951244 839 +hypertext 6 19 4.007333 24.043998 865 +media 1 19 4.007333 4.007333 861 +item 1 19 4.007333 4.007333 856 +appropri 1 18 4.060443 4.060443 883 +germani 2 17 4.110874 8.221748 946 +expand 1 17 4.110874 4.110874 928 +analyz 1 17 4.110874 4.110874 925 +moor 1 17 4.110874 4.110874 936 +vector 1 16 4.174387 4.174387 961 +capabl 2 15 4.248495 8.496990 1016 +piec 1 15 4.248495 4.248495 1020 +demand 1 14 4.317488 4.317488 1073 +conduct 1 14 4.317488 4.317488 1065 +rank 1 14 4.317488 4.317488 1063 +context 1 13 4.382027 4.382027 1153 +station 1 13 4.382027 4.382027 1157 +denis 2 12 4.465908 8.931816 1255 +readabl 1 12 4.465908 4.465908 1258 +optic 1 12 4.465908 4.465908 1221 +hypermedia 1 12 4.465908 4.465908 1247 +realiti 1 12 4.465908 4.465908 1272 +pageif 1 12 4.465908 4.465908 1275 +smart 3 11 4.553877 13.661631 1352 +refin 1 11 4.553877 4.553877 1363 +probabilist 1 11 4.553877 4.553877 1343 +rapid 1 10 4.653960 4.653960 1453 +paragraph 1 10 4.653960 4.653960 1449 +sentenc 1 10 4.653960 4.653960 1413 +equip 1 10 4.653960 4.653960 1459 +criteria 1 9 4.753590 4.753590 1477 +charg 1 9 4.753590 4.753590 1582 +hundr 1 9 4.753590 4.753590 1528 +mass 1 8 4.875197 4.875197 1732 +formul 1 8 4.875197 4.875197 1733 +matter 1 8 4.875197 4.875197 1627 +colloquium 1 8 4.875197 4.875197 1734 +harvard 1 7 5.010635 5.010635 1926 +densiti 1 7 5.010635 5.010635 1927 +vehicl 1 7 5.010635 5.010635 1928 +sparc 1 7 5.010635 5.010635 1860 +maryland 2 6 5.164786 10.329572 2140 +furthermor 1 6 5.164786 5.164786 2141 +restrict 1 6 5.164786 5.164786 2129 +ohio 3 5 5.347108 16.041324 2447 +corpu 2 5 5.347108 10.694216 2282 +medicin 2 5 5.347108 10.694216 2448 +eduph 1 5 5.347108 5.347108 2449 +accuraci 1 5 5.347108 5.347108 2450 +feder 1 5 5.347108 5.347108 2266 +travers 1 5 5.347108 5.347108 2363 +allan 4 4 5.568345 22.273380 2849 +rapidli 2 4 5.568345 11.136690 2850 +machineri 2 4 5.568345 11.136690 2851 +termin 1 4 5.568345 5.568345 2852 +ireland 1 4 5.568345 5.568345 2853 +sophist 2 3 5.857933 11.715866 3545 +genom 2 3 5.857933 11.715866 3546 +trec 1 3 5.857933 5.857933 3547 +gigabyt 1 3 5.857933 5.857933 3548 +activitiesmemb 1 3 5.857933 5.857933 3549 +zurich 1 3 5.857933 5.857933 3550 +switzerland 1 3 5.857933 5.857933 3551 +vega 1 3 5.857933 5.857933 3450 +softwareth 1 3 5.857933 5.857933 3552 +sigir 2 2 6.263398 12.526796 4873 +bucklei 2 2 6.263398 12.526796 4874 +nevada 2 2 6.263398 12.526796 4875 +gerard 1 2 6.263398 6.263398 4876 +decreas 1 2 6.263398 6.263398 4877 +absenc 1 2 6.263398 6.263398 4878 +unrestrict 1 2 6.263398 6.263398 4879 +excerpt 1 2 6.263398 6.263398 4880 +activitiesassoci 1 2 6.263398 6.263398 4881 +systemsprogram 1 2 6.263398 6.263398 4882 +dublin 1 2 6.263398 6.263398 4883 +moscow 1 2 6.263398 6.263398 4884 +encyclopedia 3 1 6.957497 20.872491 9505 +bethesda 2 1 6.957497 13.914994 9506 +columbu 2 1 6.957497 13.914994 9507 +saltongerard 1 1 6.957497 6.957497 9508 +saltonprofessorg 1 1 6.957497 6.957497 9509 +cheapli 1 1 6.957497 6.957497 9510 +funk 1 1 6.957497 6.957497 9511 +wagnal 1 1 6.957497 6.957497 9512 +committeeprofession 1 1 6.957497 6.957497 9513 +seventeenth 1 1 6.957497 6.957497 9514 +darmstadt 1 1 6.957497 6.957497 9515 +lecturesautomat 1 1 6.957497 6.957497 9516 +konstanz 1 1 6.957497 6.957497 9517 +asi 1 1 6.957497 6.957497 9518 +publicationsapproach 1 1 6.957497 6.957497 9519 +passag 1 1 6.957497 6.957497 9520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html new file mode 100644 index 00000000..48e0d12d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Li.html @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +interest 3 384 0.693147 2.079441 11 +research 2 431 0.693147 1.386294 10 +engin 3 297 1.098612 3.295836 20 +us 2 329 1.098612 2.197224 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +gener 3 220 1.386294 4.158882 27 +also 1 259 1.386294 1.386294 28 +includ 2 208 1.609438 3.218876 42 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +problem 5 147 1.945910 9.729550 75 +confer 2 126 2.079442 4.158884 100 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +world 1 115 2.197225 2.197225 126 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +techniqu 2 99 2.302585 4.605170 138 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +method 5 80 2.564949 12.824745 213 +optim 4 79 2.564949 10.259796 197 +solv 2 73 2.639057 5.278114 234 +appli 1 71 2.639057 2.639057 226 +august 1 66 2.708050 2.708050 257 +function 1 62 2.772589 2.772589 275 +scientif 3 53 2.944439 8.833317 341 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +numer 1 49 3.044522 3.044522 369 +approach 1 48 3.044522 3.044522 366 +annual 1 40 3.258097 3.258097 458 +china 1 37 3.332205 3.332205 487 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +scientist 2 31 3.496508 6.993016 560 +exist 1 30 3.555348 3.555348 569 +chines 2 29 3.583519 7.167038 595 +consid 1 29 3.583519 3.583519 590 +scale 1 28 3.610918 3.610918 613 +subject 2 26 3.688879 7.377758 647 +bound 2 26 3.688879 7.377758 659 +enhanc 1 26 3.688879 3.688879 644 +siam 1 21 3.912023 3.912023 800 +department 1 20 3.951244 3.951244 839 +region 5 19 4.007333 20.036665 875 +beij 1 19 4.007333 4.007333 876 +minim 6 18 4.060443 24.362658 887 +accept 1 18 4.060443 4.060443 879 +moor 1 17 4.110874 4.110874 936 +young 2 16 4.174387 8.348774 991 +condit 1 16 4.174387 4.174387 975 +reflect 1 15 4.248495 4.248495 1034 +nonlinear 3 14 4.317488 12.952464 1107 +denis 2 12 4.465908 8.931816 1255 +pageif 1 12 4.465908 4.465908 1275 +penalti 1 10 4.653960 4.653960 1405 +trust 5 9 4.753590 23.767950 1583 +exact 1 9 4.753590 4.753590 1509 +converg 2 7 5.010635 10.021270 1844 +constrain 3 6 5.164786 15.494358 2042 +eduph 1 5 5.347108 5.347108 2449 +affin 1 5 5.347108 5.347108 2378 +interior 1 5 5.347108 5.347108 2439 +waterloo 1 3 5.857933 5.857933 3523 +unconstrain 1 2 6.263398 6.263398 4499 +publicationsa 1 2 6.263398 6.263398 4885 +nonlinearli 3 1 6.957497 20.872491 9521 +yui 1 1 6.957497 6.957497 9522 +liyui 1 1 6.957497 6.957497 9523 +liresearch 1 1 6.957497 6.957497 9524 +associateyui 1 1 6.957497 6.957497 9525 +lecturesan 1 1 6.957497 6.957497 9526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html new file mode 100644 index 00000000..d4b296c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual94^Researchers^Zippel.html @@ -0,0 +1,179 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +depart 5 457 0.693147 3.465735 12 +system 4 443 0.693147 2.772588 6 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +us 4 329 1.098612 4.394448 16 +engin 3 297 1.098612 3.295836 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +softwar 2 220 1.386294 2.772588 30 +gener 2 220 1.386294 2.772588 27 +languag 2 227 1.386294 2.772588 26 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +algorithm 3 162 1.791759 5.375277 57 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +object 5 138 1.945910 9.729550 79 +architectur 4 139 1.945910 7.783640 77 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +studi 2 120 2.079442 4.158884 91 +provid 2 121 2.079442 4.158884 94 +number 2 130 2.079442 4.158884 97 +databas 2 122 2.079442 4.158884 86 +machin 1 129 2.079442 2.079442 95 +introduct 1 126 2.079442 2.079442 87 +report 1 131 2.079442 2.079442 92 +mathemat 6 108 2.197225 13.183350 123 +intern 3 108 2.197225 6.591675 128 +code 2 108 2.197225 4.394450 116 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +pleas 1 113 2.197225 2.197225 114 +techniqu 2 99 2.302585 4.605170 138 +present 4 91 2.397895 9.591580 145 +call 1 91 2.397895 2.397895 153 +center 1 88 2.397895 2.397895 158 +proceed 1 93 2.397895 2.397895 152 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 2 83 2.484907 4.969814 183 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +decemb 3 80 2.564949 7.694847 215 +dynam 2 76 2.564949 5.129898 194 +orient 2 80 2.564949 5.129898 205 +method 1 80 2.564949 2.564949 213 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +workshop 3 71 2.639057 7.917171 239 +meet 3 72 2.639057 7.917171 229 +symposium 2 72 2.639057 5.278114 238 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +januari 3 62 2.772589 8.317767 264 +function 2 62 2.772589 5.545178 275 +septemb 2 65 2.772589 5.545178 274 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +special 1 56 2.890372 2.890372 320 +space 1 57 2.890372 2.890372 310 +reason 1 57 2.890372 2.890372 318 +publish 1 57 2.890372 2.890372 326 +scientif 3 53 2.944439 8.833317 341 +allow 1 53 2.944439 2.944439 333 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +principl 1 48 3.044522 3.044522 357 +done 1 47 3.091042 3.091042 381 +effect 1 46 3.091042 3.091042 385 +algebra 4 45 3.135494 12.541976 394 +mechan 1 43 3.178054 3.178054 416 +york 5 41 3.218876 16.094380 435 +autom 1 41 3.218876 3.218876 434 +review 1 42 3.218876 3.218876 425 +societi 3 40 3.258097 9.774291 456 +transact 1 39 3.258097 3.258097 438 +error 1 40 3.258097 3.258097 449 +annual 1 40 3.258097 3.258097 458 +electr 2 38 3.295837 6.591674 461 +correct 1 38 3.295837 3.295837 462 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +extend 1 32 3.465736 3.465736 539 +richard 1 31 3.496508 3.496508 559 +scientist 1 31 3.496508 3.496508 560 +specifi 1 30 3.555348 3.555348 568 +compon 1 30 3.555348 3.555348 570 +common 1 30 3.555348 3.555348 574 +focus 2 29 3.583519 7.167038 584 +symbol 3 27 3.637586 10.912758 620 +american 3 27 3.637586 10.912758 634 +challeng 1 26 3.688879 3.688879 653 +constraint 1 26 3.688879 3.688879 636 +equat 2 23 3.806662 7.613324 724 +toolkit 3 20 3.951244 11.853732 835 +department 1 20 3.951244 3.951244 839 +region 3 19 4.007333 12.021999 875 +north 1 19 4.007333 4.007333 873 +boston 1 19 4.007333 4.007333 862 +lisp 1 18 4.060443 4.060443 897 +differenti 2 17 4.110874 8.221748 921 +layer 1 17 4.110874 4.110874 926 +moor 1 17 4.110874 4.110874 936 +modern 1 16 4.174387 4.174387 966 +vector 1 16 4.174387 4.174387 961 +susan 2 15 4.248495 8.496990 1050 +polynomi 4 14 4.317488 17.269952 1069 +dean 3 14 4.317488 12.952464 1104 +massachusett 1 14 4.317488 4.317488 1118 +joint 2 13 4.382027 8.764054 1130 +convert 1 13 4.382027 4.382027 1122 +opportun 1 13 4.382027 4.382027 1161 +denis 2 12 4.465908 8.931816 1255 +calcul 1 12 4.465908 4.465908 1268 +deduct 1 12 4.465908 4.465908 1236 +pageif 1 12 4.465908 4.465908 1275 +israel 4 11 4.553877 18.215508 1366 +vista 2 10 4.653960 9.307920 1452 +matric 1 10 4.653960 4.653960 1399 +decomposit 1 10 4.653960 4.653960 1439 +modular 1 10 4.653960 4.653960 1392 +factor 1 9 4.753590 4.753590 1544 +ring 1 8 4.875197 4.875197 1684 +ideal 1 8 4.875197 4.875197 1630 +aris 1 7 5.010635 5.010635 1924 +boundari 1 7 5.010635 5.010635 1929 +pursu 1 7 5.010635 5.010635 1902 +refere 1 7 5.010635 5.010635 1895 +interpol 1 7 5.010635 5.010635 1823 +rubinfeld 2 6 5.164786 10.329572 1998 +carolina 1 6 5.164786 5.164786 2142 +kluwer 1 6 5.164786 5.164786 2143 +fluid 2 5 5.347108 10.694216 2440 +ronitt 2 5 5.347108 10.694216 2265 +eduph 1 5 5.347108 5.347108 2449 +colleagu 1 5 5.347108 5.347108 2304 +ration 1 5 5.347108 5.347108 2427 +synthes 1 5 5.347108 5.347108 2451 +suni 1 5 5.347108 5.347108 2452 +weyl 2 4 5.568345 11.136690 2854 +dexter 2 4 5.568345 11.136690 2855 +kozen 2 4 5.568345 11.136690 2619 +technion 2 4 5.568345 11.136690 2856 +suppli 1 4 5.568345 5.568345 2611 +substrat 1 4 5.568345 5.568345 2857 +weizmann 1 4 5.568345 5.568345 2858 +syracus 3 3 5.857933 17.573799 3553 +haifa 3 3 5.857933 17.573799 3554 +aerospac 1 3 5.857933 5.857933 3555 +durham 1 3 5.857933 5.857933 3279 +dawson 3 2 6.263398 18.790194 4886 +microstorag 2 2 6.263398 12.526796 4887 +activitieseditori 1 2 6.263398 6.263398 4888 +softwareprogram 1 2 6.263398 6.263398 4889 +irreduc 1 2 6.263398 6.263398 4890 +rehovot 1 2 6.263398 6.263398 4891 +albani 1 2 6.263398 6.263398 4892 +multivari 1 2 6.263398 6.263398 4151 +landau 2 1 6.957497 13.914994 9527 +zippelrichard 1 1 6.957497 6.957497 9528 +zippelsenior 1 1 6.957497 6.957497 9529 +associaterz 1 1 6.957497 6.957497 9530 +computationlecturesalgebra 1 1 6.957497 6.957497 9531 +publicationseffect 1 1 6.957497 6.957497 9532 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html new file mode 100644 index 00000000..0846b9d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Birman.html @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 9 443 0.693147 6.238323 6 +work 5 380 0.693147 3.465735 9 +program 4 374 0.693147 2.772588 7 +depart 3 457 0.693147 2.079441 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +us 4 329 1.098612 4.394448 16 +time 2 293 1.098612 2.197224 17 +student 2 343 1.098612 2.197224 19 +engin 2 297 1.098612 2.197224 20 +last 1 314 1.098612 1.098612 14 +softwar 3 220 1.386294 4.158882 30 +also 3 259 1.386294 4.158882 28 +cornel 3 215 1.386294 4.158882 23 +graduat 2 215 1.386294 2.772588 31 +languag 1 227 1.386294 1.386294 26 +group 5 183 1.609438 8.047190 36 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +distribut 9 162 1.791759 16.125831 51 +develop 6 174 1.791759 10.750554 53 +avail 2 169 1.791759 3.583518 48 +parallel 2 169 1.791759 3.583518 60 +implement 2 152 1.791759 3.583518 52 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +model 3 145 1.945910 5.837730 69 +process 3 142 1.945910 5.837730 72 +problem 2 147 1.945910 3.891820 75 +support 2 132 1.945910 3.891820 83 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +tool 3 117 2.079442 6.238326 93 +technolog 2 131 2.079442 4.158884 102 +high 2 130 2.079442 4.158884 101 +studi 2 120 2.079442 4.158884 91 +provid 1 121 2.079442 2.079442 94 +report 1 131 2.079442 2.079442 92 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +pleas 1 113 2.197225 2.197225 114 +techniqu 2 99 2.302585 4.605170 138 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +commun 4 95 2.397895 9.591580 157 +real 2 93 2.397895 4.795790 144 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +member 3 84 2.484907 7.454721 165 +ieee 2 86 2.484907 4.969814 190 +start 1 83 2.484907 2.484907 173 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +academ 1 82 2.484907 2.484907 178 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +refer 1 78 2.564949 2.564949 203 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +practic 2 70 2.708050 5.416100 246 +goal 1 66 2.708050 2.708050 250 +multimedia 1 68 2.708050 2.708050 258 +integr 1 67 2.708050 2.708050 245 +virtual 3 62 2.772589 8.317767 285 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +polici 1 64 2.772589 2.772589 279 +reason 1 57 2.890372 2.890372 318 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +faculti 1 56 2.890372 2.890372 325 +extens 2 53 2.944439 5.888878 340 +much 1 52 2.995732 2.995732 349 +profession 1 51 2.995732 2.995732 345 +set 1 50 3.044522 3.044522 361 +basic 1 50 3.044522 3.044522 360 +approach 1 48 3.044522 3.044522 366 +california 2 46 3.091042 6.182084 388 +featur 2 46 3.091042 6.182084 386 +done 1 47 3.091042 3.091042 381 +execut 1 45 3.135494 3.135494 404 +mark 1 44 3.135494 3.135494 403 +protocol 1 45 3.135494 3.135494 407 +combin 1 42 3.218876 3.218876 421 +press 1 42 3.218876 3.218876 419 +theoret 1 39 3.258097 3.258097 446 +transact 1 39 3.258097 3.258097 438 +societi 1 40 3.258097 3.258097 456 +annual 1 40 3.258097 3.258097 458 +origin 1 38 3.295837 3.295837 472 +brian 1 38 3.295837 3.295837 466 +purpos 1 37 3.332205 3.332205 481 +committe 3 34 3.401197 10.203591 522 +singl 1 34 3.401197 3.401197 510 +return 1 34 3.401197 3.401197 502 +toler 3 33 3.433987 10.301961 533 +within 1 33 3.433987 3.433987 525 +fault 3 32 3.465736 10.397208 547 +collabor 2 32 3.465736 6.931472 543 +idea 1 32 3.465736 3.465736 545 +ad 1 32 3.465736 3.465736 544 +scientist 1 31 3.496508 3.496508 560 +secur 4 30 3.555348 14.221392 577 +focu 1 30 3.555348 3.555348 571 +option 1 30 3.555348 3.555348 575 +specifi 1 30 3.555348 3.555348 568 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +univ 1 28 3.610918 3.610918 617 +intend 1 28 3.610918 3.610918 599 +actual 1 28 3.610918 3.610918 604 +packag 1 28 3.610918 3.610918 614 +effort 3 26 3.688879 11.066637 652 +consist 2 26 3.688879 7.377758 651 +berkelei 1 26 3.688879 3.688879 657 +reliabl 3 25 3.737670 11.213010 674 +concern 1 25 3.737670 3.737670 666 +although 1 25 3.737670 3.737670 667 +supercomput 1 25 3.737670 3.737670 681 +fundament 1 25 3.737670 3.737670 661 +aspect 1 25 3.737670 3.737670 663 +mike 1 24 3.761200 3.761200 703 +head 2 23 3.806662 7.613324 732 +cooper 1 22 3.850148 3.850148 757 +properti 1 22 3.850148 3.850148 749 +flexibl 1 21 3.912023 3.912023 792 +toolkit 2 20 3.951244 7.902488 835 +exploit 1 20 3.951244 3.951244 836 +smith 1 20 3.951244 3.951244 820 +department 1 20 3.951244 3.951244 839 +prove 2 19 4.007333 8.014666 848 +runtim 1 19 4.007333 4.007333 858 +failur 1 18 4.060443 4.060443 898 +speed 1 18 4.060443 4.060443 911 +element 1 18 4.060443 4.060443 895 +layer 2 17 4.110874 8.221748 926 +seek 1 17 4.110874 4.110874 954 +former 1 17 4.110874 4.110874 956 +moor 1 17 4.110874 4.110874 936 +upon 1 16 4.174387 4.174387 978 +permit 1 16 4.174387 4.174387 962 +critic 1 16 4.174387 4.174387 982 +side 3 15 4.248495 12.745485 1022 +action 1 15 4.248495 4.248495 1038 +horu 10 14 4.317488 43.174880 1116 +becam 1 14 4.317488 4.317488 1117 +coordin 1 13 4.382027 4.382027 1182 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +denis 2 12 4.465908 8.931816 1255 +kenneth 1 12 4.465908 4.465908 1265 +replic 1 12 4.465908 4.465908 1231 +robust 1 12 4.465908 4.465908 1271 +infrastructur 1 12 4.465908 4.465908 1234 +pageif 1 12 4.465908 4.465908 1275 +reness 2 11 4.553877 9.107754 1333 +broad 1 11 4.553877 4.553877 1302 +probabilist 1 11 4.553877 4.553877 1343 +primit 1 11 4.553877 4.553877 1317 +certain 1 10 4.653960 4.653960 1393 +nuprl 1 10 4.653960 4.653960 1402 +guarante 1 10 4.653960 4.653960 1391 +werner 1 10 4.653960 4.653960 1385 +unusu 1 9 4.753590 4.753590 1566 +latter 1 9 4.753590 4.753590 1522 +correctli 1 9 4.753590 4.753590 1478 +robbert 1 9 4.753590 4.753590 1529 +birman 1 9 4.753590 4.753590 1531 +vogel 1 8 4.875197 4.875197 1622 +synchroni 2 7 5.010635 10.021270 1923 +friedman 2 7 5.010635 10.021270 1886 +chief 2 7 5.010635 10.021270 1829 +privaci 2 6 5.164786 10.329572 2144 +emerg 1 6 5.164786 5.164786 2038 +recruit 1 6 5.164786 5.164786 2145 +isi 5 5 5.347108 26.735540 2443 +notabl 2 5 5.347108 10.694216 2276 +broadcast 1 5 5.347108 5.347108 2453 +activitieseditor 1 5 5.347108 5.347108 2454 +popular 1 4 5.568345 5.568345 2802 +hayden 1 4 5.568345 5.568345 2844 +publicationsth 1 4 5.568345 5.568345 2859 +reconfigur 2 3 5.857933 11.715866 3556 +leverag 1 3 5.857933 5.857933 3153 +embodi 1 3 5.857933 5.857933 3236 +reiter 1 3 5.857933 5.857933 3461 +constabl 1 3 5.857933 5.857933 3186 +act 1 3 5.857933 5.857933 3557 +leadership 1 3 5.857933 5.857933 3320 +alamito 1 3 5.857933 5.857933 3558 +glade 1 3 5.857933 5.857933 3537 +benign 1 2 6.263398 6.263398 4893 +activitieschair 1 2 6.263398 6.263398 4894 +isat 1 2 6.263398 6.263398 4895 +birmankenneth 1 1 6.957497 6.957497 9533 +birmanprofessorphd 1 1 6.957497 6.957497 9534 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html new file mode 100644 index 00000000..6376e13a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Greenberg.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 9 775 0.000000 0.000000 2 +scienc 7 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 5 431 0.693147 3.465735 10 +program 2 374 0.693147 1.386294 7 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +gener 2 220 1.386294 2.772588 27 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +develop 2 174 1.791759 3.583518 53 +algorithm 2 162 1.791759 3.583518 57 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +model 7 145 1.945910 13.621370 69 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +technolog 5 131 2.079442 10.397210 102 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +topic 2 114 2.197225 4.394450 110 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +techniqu 3 99 2.302585 6.907755 138 +graphic 10 90 2.397895 23.978950 147 +imag 4 91 2.397895 9.591580 161 +center 3 88 2.397895 7.193685 158 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +environ 2 84 2.484907 4.969814 177 +institut 2 84 2.484907 4.969814 187 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +journal 1 83 2.484907 2.484907 183 +novemb 1 81 2.484907 2.484907 179 +method 2 80 2.564949 5.129898 213 +dynam 1 76 2.564949 2.564949 194 +nation 3 74 2.639057 7.917171 240 +involv 2 71 2.639057 5.278114 227 +effici 1 73 2.639057 2.639057 233 +differ 1 66 2.708050 2.708050 253 +complex 2 64 2.772589 5.545178 269 +foundat 2 62 2.772589 5.545178 286 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +evalu 1 64 2.772589 2.772589 266 +creat 1 63 2.772589 2.772589 277 +laboratori 1 63 2.772589 2.772589 292 +interact 1 62 2.772589 2.772589 270 +improv 1 62 2.772589 2.772589 289 +scientif 4 53 2.944439 11.777756 341 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +visual 3 48 3.044522 9.133566 372 +california 1 46 3.091042 3.091042 388 +made 1 44 3.135494 3.135494 398 +past 1 42 3.218876 3.218876 428 +annual 1 40 3.258097 3.258097 458 +multi 1 36 3.367296 3.367296 493 +within 1 33 3.433987 3.433987 525 +board 1 33 3.433987 3.433987 528 +anim 1 31 3.496508 3.496508 557 +rang 1 30 3.555348 3.555348 565 +focu 1 30 3.555348 3.555348 571 +particip 1 29 3.583519 3.583519 589 +progress 1 28 3.610918 3.610918 598 +determin 1 27 3.637586 3.637586 630 +american 1 27 3.637586 3.637586 634 +constraint 1 26 3.688879 3.688879 636 +strategi 2 25 3.737670 7.475340 682 +trace 1 25 3.737670 3.737670 677 +hill 1 25 3.737670 3.737670 670 +fellow 2 24 3.761200 7.522400 701 +motion 1 24 3.761200 3.761200 699 +displai 3 23 3.806662 11.419986 712 +input 2 23 3.806662 7.613324 727 +director 2 22 3.850148 7.700296 767 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +geometri 1 22 3.850148 3.850148 752 +flexibl 1 21 3.912023 3.912023 792 +synthesi 1 20 3.951244 3.951244 834 +facil 1 20 3.951244 3.951244 814 +department 1 20 3.951244 3.951244 839 +geometr 1 19 4.007333 4.007333 852 +media 1 19 4.007333 4.007333 861 +north 1 19 4.007333 4.007333 873 +dimension 1 18 4.060443 4.060443 909 +medic 2 17 4.110874 8.221748 958 +previous 1 17 4.110874 4.110874 923 +render 1 17 4.110874 4.110874 947 +moor 1 17 4.110874 4.110874 936 +spatial 1 16 4.174387 4.174387 988 +brown 1 16 4.174387 4.174387 977 +reflect 2 15 4.248495 8.496990 1034 +micro 1 15 4.248495 4.248495 1031 +conduct 2 14 4.317488 8.634976 1065 +denis 2 12 4.465908 8.931816 1255 +pageif 1 12 4.465908 4.465908 1275 +host 1 11 4.553877 4.553877 1306 +volum 1 11 4.553877 4.553877 1347 +modular 1 10 4.653960 4.653960 1392 +surfac 4 9 4.753590 19.014360 1574 +donald 1 9 4.753590 4.753590 1510 +routin 1 9 4.753590 4.753590 1549 +tempor 1 9 4.753590 4.753590 1584 +light 1 9 4.753590 4.753590 1533 +utah 1 9 4.753590 4.753590 1585 +realist 2 8 4.875197 9.750394 1665 +polygon 1 8 4.875197 4.875197 1723 +textur 1 8 4.875197 4.875197 1677 +academi 1 8 4.875197 4.875197 1735 +clip 1 7 5.010635 5.010635 1868 +parametr 1 7 5.010635 5.010635 1819 +suffici 1 7 5.010635 5.010635 1897 +core 1 7 5.010635 5.010635 1809 +hidden 1 6 5.164786 5.164786 1987 +photographi 1 6 5.164786 5.164786 2146 +carolina 1 6 5.164786 5.164786 2142 +biolog 1 6 5.164786 5.164786 2147 +decad 1 5 5.347108 5.347108 2455 +testb 1 5 5.347108 5.347108 2456 +anti 1 5 5.347108 5.347108 2434 +chapel 1 5 5.347108 5.347108 2457 +jacob 1 4 5.568345 5.568345 2667 +gould 1 3 5.857933 5.857933 3559 +alias 2 2 6.263398 12.526796 4823 +activitieseditori 1 2 6.263398 6.263398 4888 +greenbergdonald 1 1 6.957497 6.957497 9535 +greenberg 1 1 6.957497 6.957497 9536 +schurman 1 1 6.957497 6.957497 9537 +perceptu 1 1 6.957497 6.957497 9538 +activitiesdirector 1 1 6.957497 6.957497 9539 +visualizationprofession 1 1 6.957497 6.957497 9540 +acmreturn 1 1 6.957497 6.957497 9541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html new file mode 100644 index 00000000..72804459 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hartmanis.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 31 775 0.000000 0.000000 2 +scienc 20 640 0.000000 0.000000 4 +univers 8 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 5 297 1.098612 5.493060 20 +time 3 293 1.098612 3.295836 17 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +also 1 259 1.386294 1.386294 28 +class 4 199 1.609438 6.437752 37 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +problem 4 147 1.945910 7.783640 75 +lectur 4 135 1.945910 7.783640 73 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +technolog 2 131 2.079442 4.158884 102 +studi 2 120 2.079442 4.158884 91 +report 1 131 2.079442 2.079442 92 +intern 3 108 2.197225 6.591675 128 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +structur 1 106 2.197225 2.197225 105 +world 1 115 2.197225 2.197225 126 +pleas 1 113 2.197225 2.197225 114 +need 2 98 2.302585 4.605170 135 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +associ 2 93 2.397895 4.795790 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +journal 8 83 2.484907 19.879256 183 +resourc 4 81 2.484907 9.939628 172 +member 3 84 2.484907 7.454721 165 +novemb 2 81 2.484907 4.969814 179 +institut 1 84 2.484907 2.484907 187 +requir 1 81 2.484907 2.484907 167 +school 1 84 2.484907 2.484907 188 +chang 1 82 2.484907 2.484907 163 +state 1 76 2.564949 2.564949 207 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +solv 3 73 2.639057 7.917171 234 +nation 3 74 2.639057 7.917171 240 +logic 1 71 2.639057 2.639057 230 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +degre 1 69 2.708050 2.708050 259 +august 1 66 2.708050 2.708050 257 +complex 8 64 2.772589 22.180712 269 +foundat 4 62 2.772589 11.090356 286 +guid 1 63 2.772589 2.772589 267 +interact 1 62 2.772589 2.772589 270 +visit 1 63 2.772589 2.772589 288 +variou 2 56 2.890372 5.780744 317 +explor 1 58 2.890372 2.890372 324 +faculti 1 56 2.890372 2.890372 325 +februari 2 54 2.944439 5.888878 328 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +advisor 1 51 2.995732 2.995732 355 +electron 2 47 3.091042 6.182084 379 +california 1 46 3.091042 3.091042 388 +understand 1 47 3.091042 3.091042 384 +physic 1 47 3.091042 3.091042 377 +natur 2 44 3.135494 6.270988 406 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +futur 1 41 3.218876 3.218876 427 +theoret 2 39 3.258097 6.516194 446 +annual 1 40 3.258097 3.258097 458 +committe 4 34 3.401197 13.604788 522 +random 2 34 3.401197 6.802394 511 +award 2 34 3.401197 6.802394 523 +return 1 34 3.401197 3.401197 502 +board 5 33 3.433987 17.169935 528 +within 1 33 3.433987 3.433987 525 +given 1 32 3.465736 3.465736 538 +richard 1 31 3.496508 3.496508 559 +hard 1 30 3.555348 3.555348 563 +art 1 29 3.583519 3.583519 593 +american 2 27 3.637586 7.275172 634 +consist 1 26 3.688879 3.688879 651 +bound 1 26 3.688879 3.688879 659 +fellow 4 24 3.761200 15.044800 701 +seri 2 24 3.761200 7.522400 708 +doctor 1 24 3.761200 3.761200 709 +springer 2 22 3.850148 7.700296 750 +verlag 2 22 3.850148 7.700296 751 +sequenti 1 22 3.850148 3.850148 745 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +divis 1 21 3.912023 3.912023 803 +exploit 1 20 3.951244 3.951244 836 +department 1 20 3.951244 3.951244 839 +walter 1 17 4.110874 4.110874 950 +germani 1 17 4.110874 4.110874 946 +moor 1 17 4.110874 4.110874 936 +georg 1 16 4.174387 4.174387 994 +brown 1 16 4.174387 4.174387 977 +contribut 1 15 4.248495 4.248495 1021 +topolog 1 14 4.317488 4.317488 1089 +essenti 1 13 4.382027 4.382027 1137 +denis 2 12 4.465908 8.931816 1255 +grow 1 12 4.465908 4.465908 1209 +amount 1 12 4.465908 4.465908 1208 +speech 1 12 4.465908 4.465908 1222 +weight 1 12 4.465908 4.465908 1204 +pageif 1 12 4.465908 4.465908 1275 +council 4 11 4.553877 18.215508 1364 +distinguish 2 11 4.553877 9.107754 1357 +rice 1 11 4.553877 4.553877 1336 +govern 1 9 4.753590 4.753590 1581 +classifi 1 9 4.753590 4.753590 1537 +classif 1 9 4.753590 4.753590 1586 +telecommun 1 9 4.753590 4.753590 1565 +observ 1 9 4.753590 4.753590 1578 +academi 5 8 4.875197 24.375985 1735 +quantit 1 8 4.875197 4.875197 1654 +gain 1 8 4.875197 4.875197 1730 +attent 1 8 4.875197 4.875197 1651 +virginia 1 8 4.875197 4.875197 1659 +trade 1 7 5.010635 5.010635 1815 +foreign 1 7 5.010635 5.010635 1919 +dimens 1 7 5.010635 5.010635 1930 +ture 2 6 5.164786 10.329572 1997 +advisori 2 6 5.164786 10.329572 2148 +chicago 2 6 5.164786 10.329572 2149 +recruit 1 6 5.164786 5.164786 2145 +prize 1 6 5.164786 5.164786 2150 +yield 1 5 5.347108 5.347108 2458 +activitieseditor 1 5 5.347108 5.347108 2454 +ifip 1 5 5.347108 5.347108 2459 +houston 1 5 5.347108 5.347108 2460 +scope 1 5 5.347108 5.347108 2296 +bulletin 1 5 5.347108 5.347108 2343 +comprehens 1 4 5.568345 5.568345 2745 +monograph 1 4 5.568345 5.568345 2860 +peer 1 4 5.568345 5.568345 2742 +tennesse 1 4 5.568345 5.568345 2763 +oracl 1 4 5.568345 5.568345 2823 +hypothesi 1 4 5.568345 5.568345 2650 +fals 1 4 5.568345 5.568345 2861 +hausdorff 1 4 5.568345 5.568345 2633 +explos 1 3 5.857933 5.857933 3138 +deeper 1 3 5.857933 5.857933 3146 +off 1 3 5.857933 5.857933 3170 +nondeterminist 1 3 5.857933 5.857933 3560 +activitiesmemb 1 3 5.857933 5.857933 3549 +law 1 2 6.263398 6.263398 4896 +har 1 2 6.263398 6.263398 4252 +aaa 1 2 6.263398 6.263398 4897 +banquet 1 2 6.263398 6.263398 4898 +publicationson 1 2 6.263398 6.263398 4899 +johan 1 2 6.263398 6.263398 4900 +eatc 3 1 6.957497 20.872491 9542 +juri 1 1 6.957497 6.957497 9543 +hartmanisjuri 1 1 6.957497 6.957497 9544 +hartmani 1 1 6.957497 6.957497 9545 +strateg 1 1 6.957497 6.957497 9546 +representativeschair 1 1 6.957497 6.957497 9547 +committeehonorsacm 1 1 6.957497 6.957497 9548 +stearn 1 1 6.957497 6.957497 9549 +latvian 1 1 6.957497 6.957497 9550 +charter 1 1 6.957497 6.957497 9551 +presseditori 1 1 6.957497 6.957497 9552 +sciencegoedel 1 1 6.957497 6.957497 9553 +awardshonorari 1 1 6.957497 6.957497 9554 +dortmund 1 1 6.957497 6.957497 9555 +lecturessom 1 1 6.957497 6.957497 9556 +benni 1 1 6.957497 6.957497 9557 +chor 1 1 6.957497 6.957497 9558 +od 1 1 6.957497 6.957497 9559 +goldreich 1 1 6.957497 6.957497 9560 +hastad 1 1 6.957497 6.957497 9561 +desh 1 1 6.957497 6.957497 9562 +ranjan 1 1 6.957497 6.957497 9563 +pankaj 1 1 6.957497 6.957497 9564 +rohatgi 1 1 6.957497 6.957497 9565 +kolmogorov 1 1 6.957497 6.957497 9566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html new file mode 100644 index 00000000..77cd57e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Hopcroft.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 7 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 7 431 0.693147 4.852029 10 +inform 4 412 0.693147 2.772588 8 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +engin 3 297 1.098612 3.295836 20 +last 1 314 1.098612 1.098612 14 +design 2 213 1.386294 2.772588 25 +cornel 2 215 1.386294 2.772588 23 +modifi 1 178 1.609438 1.609438 35 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +technolog 4 131 2.079442 8.317768 102 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +intern 2 108 2.197225 4.394450 128 +well 1 109 2.197225 2.197225 121 +manag 1 114 2.197225 2.197225 125 +mathemat 1 108 2.197225 2.197225 123 +pleas 1 113 2.197225 2.197225 114 +access 2 102 2.302585 4.605170 136 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +associ 2 93 2.397895 4.795790 151 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +institut 3 84 2.484907 7.454721 187 +journal 3 83 2.484907 7.454721 183 +academ 1 82 2.484907 2.484907 178 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +interfac 1 79 2.564949 2.564949 209 +state 1 76 2.564949 2.564949 207 +nation 3 74 2.639057 7.917171 240 +involv 1 71 2.639057 2.639057 227 +david 1 71 2.639057 2.639057 232 +simul 1 66 2.708050 2.708050 255 +multimedia 1 68 2.708050 2.708050 258 +knowledg 1 67 2.708050 2.708050 243 +januari 1 62 2.772589 2.772589 264 +organ 1 65 2.772589 2.772589 265 +colleg 2 61 2.833213 5.666426 300 +variou 1 56 2.890372 2.890372 317 +scientif 2 53 2.944439 5.888878 341 +profession 1 51 2.995732 2.995732 345 +appoint 1 49 3.044522 3.044522 358 +physic 1 47 3.091042 3.091042 377 +electron 1 47 3.091042 3.091042 379 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +continu 1 39 3.258097 3.258097 448 +annual 1 40 3.258097 3.258097 458 +electr 1 38 3.295837 3.295837 461 +represent 1 35 3.401197 3.401197 512 +committe 1 34 3.401197 3.401197 522 +board 3 33 3.433987 10.301961 528 +john 1 33 3.433987 3.433987 532 +within 1 33 3.433987 3.433987 525 +collabor 2 32 3.465736 6.931472 543 +storag 1 31 3.496508 3.496508 553 +art 1 29 3.583519 3.583519 593 +american 2 27 3.637586 7.275172 634 +supercomput 1 25 3.737670 3.737670 681 +fellow 2 24 3.761200 7.522400 701 +seri 1 24 3.761200 3.761200 708 +geometri 1 22 3.850148 3.850148 752 +unit 2 21 3.912023 7.824046 779 +among 1 21 3.912023 3.912023 781 +siam 1 21 3.912023 3.912023 800 +department 1 20 3.951244 3.951244 839 +geometr 1 19 4.007333 4.007333 852 +stanford 1 17 4.110874 4.110874 955 +moor 1 17 4.110874 4.110874 936 +remot 1 15 4.248495 4.248495 1041 +dean 2 14 4.317488 8.634976 1104 +heterogen 1 14 4.317488 4.317488 1090 +discret 1 13 4.382027 4.382027 1165 +captur 2 12 4.465908 8.931816 1232 +denis 2 12 4.465908 8.931816 1255 +robust 1 12 4.465908 4.465908 1271 +pageif 1 12 4.465908 4.465908 1275 +persist 1 11 4.553877 4.553877 1367 +council 1 11 4.553877 4.553877 1364 +facilit 1 10 4.653960 4.653960 1412 +packard 1 10 4.653960 4.653960 1444 +fellowship 1 10 4.653960 4.653960 1460 +academi 2 8 4.875197 9.750394 1735 +advisori 2 6 5.164786 10.329572 2148 +oxford 1 6 5.164786 5.164786 2121 +compris 1 4 5.568345 5.568345 2862 +activitiesmemb 1 3 5.857933 5.857933 3549 +defens 1 3 5.857933 5.857933 3327 +algorithmica 1 3 5.857933 5.857933 3561 +commiss 1 2 6.263398 6.263398 4901 +engineeringfellow 1 2 6.263398 6.263398 4902 +sciencesfellow 1 2 6.263398 6.263398 4903 +aaa 1 2 6.263398 6.263398 4897 +hopcroftjohn 1 1 6.957497 6.957497 9567 +hopcroftjoseph 1 1 6.957497 6.957497 9568 +silbert 1 1 6.957497 6.957497 9569 +engineeringprofessor 1 1 6.957497 6.957497 9570 +sciencephd 1 1 6.957497 6.957497 9571 +overse 1 1 6.957497 6.957497 9572 +applicationsmemb 1 1 6.957497 6.957497 9573 +boardmemb 1 1 6.957497 6.957497 9574 +forcememb 1 1 6.957497 6.957497 9575 +machinerychairman 1 1 6.957497 6.957497 9576 +trusteesmemb 1 1 6.957497 6.957497 9577 +lucil 1 1 6.957497 6.957497 9578 +foundationmemb 1 1 6.957497 6.957497 9579 +sloan 1 1 6.957497 6.957497 9580 +committeeadvisori 1 1 6.957497 6.957497 9581 +analysiseditor 1 1 6.957497 6.957497 9582 +geometryassoci 1 1 6.957497 6.957497 9583 +sciencesreturn 1 1 6.957497 6.957497 9584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html new file mode 100644 index 00000000..64a37421 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Pingali.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 14 374 0.693147 9.704058 7 +work 5 380 0.693147 3.465735 9 +research 4 431 0.693147 2.772588 10 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 3 329 1.098612 3.295836 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 3 227 1.386294 4.158882 26 +gener 2 220 1.386294 2.772588 27 +cornel 2 215 1.386294 2.772588 23 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +paper 3 205 1.609438 4.828314 38 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +parallel 10 169 1.791759 17.917590 60 +develop 9 174 1.791759 16.125831 53 +algorithm 5 162 1.791759 8.958795 57 +data 4 170 1.791759 7.167036 49 +applic 3 170 1.791759 5.375277 56 +distribut 3 162 1.791759 5.375277 51 +recent 2 167 1.791759 3.583518 58 +implement 2 152 1.791759 3.583518 52 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +problem 3 147 1.945910 5.837730 75 +area 2 144 1.945910 3.891820 80 +architectur 2 139 1.945910 3.891820 77 +like 2 132 1.945910 3.891820 81 +perform 1 143 1.945910 1.945910 74 +assign 1 135 1.945910 1.945910 66 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +compil 12 122 2.079442 24.953304 96 +tool 2 117 2.079442 4.158884 93 +technolog 2 131 2.079442 4.158884 102 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +code 5 108 2.197225 10.986125 116 +structur 2 106 2.197225 4.394450 105 +intern 1 108 2.197225 2.197225 128 +pleas 1 113 2.197225 2.197225 114 +techniqu 4 99 2.302585 9.210340 138 +memori 4 101 2.302585 9.210340 139 +access 3 102 2.302585 6.907755 136 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +question 2 91 2.397895 4.795790 141 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +comment 1 93 2.397895 2.397895 146 +control 4 82 2.484907 9.939628 164 +journal 3 83 2.484907 7.454721 183 +larg 2 82 2.484907 4.969814 168 +librari 2 87 2.484907 4.969814 181 +member 2 84 2.484907 4.969814 165 +ieee 2 86 2.484907 4.969814 190 +solut 1 82 2.484907 2.484907 162 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +optim 4 79 2.564949 10.259796 197 +refer 2 78 2.564949 5.129898 203 +june 2 79 2.564949 5.129898 214 +method 1 80 2.564949 2.564949 213 +good 1 77 2.564949 2.564949 200 +messag 1 76 2.564949 2.564949 212 +exampl 1 77 2.564949 2.564949 195 +state 1 76 2.564949 2.564949 207 +involv 2 71 2.639057 5.278114 227 +solv 1 73 2.639057 2.639057 234 +line 1 75 2.639057 2.639057 231 +symposium 1 72 2.639057 2.639057 238 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +test 1 66 2.708050 2.708050 252 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +result 3 65 2.772589 8.317767 281 +foundat 2 62 2.772589 5.545178 286 +experi 1 64 2.772589 2.772589 283 +organ 1 65 2.772589 2.772589 265 +januari 1 62 2.772589 2.772589 264 +laboratori 1 63 2.772589 2.772589 292 +ithaca 1 65 2.772589 2.772589 294 +best 3 59 2.833213 8.499639 299 +automat 1 61 2.833213 2.833213 306 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +faculti 1 56 2.890372 2.890372 325 +local 5 55 2.944439 14.722195 334 +scientif 1 53 2.944439 2.944439 341 +processor 1 54 2.944439 2.944439 335 +februari 1 54 2.944439 2.944439 328 +investig 2 51 2.995732 5.991464 353 +profession 1 51 2.995732 2.995732 345 +approach 3 48 3.044522 9.133566 366 +numer 1 49 3.044522 3.044522 369 +principl 1 48 3.044522 3.044522 357 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +algebra 2 45 3.135494 6.270988 394 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +show 1 43 3.178054 3.178054 417 +linear 3 41 3.218876 9.656628 431 +review 1 42 3.218876 3.218876 425 +must 3 40 3.258097 9.774291 442 +annual 2 40 3.258097 6.516194 458 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +transact 1 39 3.258097 3.258097 438 +microsoft 2 38 3.295837 6.591674 468 +open 1 38 3.295837 3.295837 469 +paul 1 38 3.295837 3.295837 471 +hand 1 37 3.332205 3.332205 475 +award 2 34 3.401197 6.802394 523 +singl 1 34 3.401197 3.401197 510 +return 1 34 3.401197 3.401197 502 +product 1 33 3.433987 3.433987 527 +board 1 33 3.433987 3.433987 528 +transform 2 32 3.465736 6.931472 542 +extend 1 32 3.465736 3.465736 539 +independ 1 32 3.465736 3.465736 548 +produc 3 30 3.555348 10.666044 572 +exist 1 30 3.555348 3.555348 569 +graph 1 30 3.555348 3.555348 576 +depend 5 29 3.583519 17.917595 583 +multiprocessor 1 28 3.610918 3.610918 605 +framework 1 28 3.610918 3.610918 606 +static 1 27 3.637586 3.637586 619 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +lab 2 24 3.761200 7.522400 698 +alwai 1 24 3.761200 3.761200 691 +known 1 24 3.761200 3.761200 702 +flow 1 24 3.761200 3.761200 700 +consult 1 24 3.761200 3.761200 687 +equat 2 23 3.806662 7.613324 724 +togeth 1 23 3.806662 3.806662 714 +almost 2 22 3.850148 7.700296 742 +deal 1 22 3.850148 3.850148 736 +sequenti 1 22 3.850148 3.850148 745 +corpor 2 21 3.912023 7.824046 802 +increas 1 20 3.951244 3.951244 829 +toolkit 1 20 3.951244 3.951244 835 +department 1 20 3.951244 3.951244 839 +partial 1 18 4.060443 4.060443 900 +matrix 4 17 4.110874 16.443496 933 +asplo 2 17 4.110874 8.221748 948 +differenti 1 17 4.110874 4.110874 921 +moor 1 17 4.110874 4.110874 936 +spars 4 16 4.174387 16.697548 989 +transfer 3 16 4.174387 12.523161 967 +young 2 16 4.174387 8.348774 991 +match 1 16 4.174387 4.174387 965 +normal 1 16 4.174387 4.174387 995 +permit 1 16 4.174387 4.174387 962 +intel 1 16 4.174387 4.174387 1000 +novel 1 15 4.248495 4.248495 1039 +fortran 1 15 4.248495 4.248495 1027 +massachusett 1 14 4.317488 4.317488 1118 +earlier 2 13 4.382027 8.764054 1140 +block 2 13 4.382027 8.764054 1183 +incorpor 2 13 4.382027 8.764054 1163 +unfortun 1 13 4.382027 4.382027 1170 +sigplan 1 13 4.382027 4.382027 1190 +denis 2 12 4.465908 8.931816 1255 +pageif 1 12 4.465908 4.465908 1275 +loop 3 11 4.553877 13.661631 1310 +mesh 1 11 4.553877 4.553877 1351 +refin 1 11 4.553877 4.553877 1363 +faster 1 11 4.553877 4.553877 1323 +summar 1 11 4.553877 4.553877 1295 +michigan 1 11 4.553877 4.553877 1368 +vladimir 1 11 4.553877 4.553877 1324 +matric 3 10 4.653960 13.961880 1399 +packard 3 10 4.653960 13.961880 1444 +preliminari 1 9 4.753590 4.753590 1480 +prefer 1 9 4.753590 4.753590 1491 +jersei 1 9 4.753590 4.753590 1587 +hewlett 3 8 4.875197 14.625591 1709 +poor 1 8 4.875197 4.875197 1736 +competit 1 8 4.875197 4.875197 1635 +potenti 1 8 4.875197 4.875197 1690 +uniprocessor 1 8 4.875197 4.875197 1696 +presidenti 1 8 4.875197 4.875197 1737 +wayn 1 8 4.875197 4.875197 1738 +pldi 1 8 4.875197 4.875197 1704 +keshav 1 7 5.010635 5.010635 1852 +solver 1 7 5.010635 5.010635 1911 +uniform 1 7 5.010635 5.010635 1845 +refere 1 7 5.010635 5.010635 1895 +elementari 1 7 5.010635 5.010635 1825 +dens 2 6 5.164786 10.329572 2122 +prize 2 6 5.164786 10.329572 2150 +ensur 1 6 5.164786 5.164786 2012 +nest 1 6 5.164786 5.164786 2151 +argonn 1 5 5.347108 5.347108 2461 +compet 1 5 5.347108 5.347108 2462 +decad 1 5 5.347108 5.347108 2455 +panel 1 5 5.347108 5.347108 2463 +seventh 1 5 5.347108 5.347108 2464 +restructur 4 4 5.568345 22.273380 2775 +align 2 4 5.568345 11.136690 2863 +flavor 1 4 5.568345 5.568345 2625 +indupraka 1 4 5.568345 5.568345 2639 +kodukula 1 4 5.568345 5.568345 2640 +stodghil 1 4 5.568345 5.568345 2864 +armi 1 3 5.857933 5.857933 3562 +topla 1 3 5.857933 5.857933 3563 +chelmsford 1 3 5.857933 5.857933 3564 +detroit 1 3 5.857933 5.857933 3565 +rutger 1 3 5.857933 5.857933 3566 +brunswick 1 3 5.857933 5.857933 3567 +redmond 1 3 5.857933 5.857933 3568 +professorphd 1 2 6.263398 6.263398 4904 +numa 1 2 6.263398 6.263398 4905 +lambda 1 2 6.263398 6.263398 4458 +knit 1 2 6.263398 6.263398 4906 +lcpc 1 2 6.263398 6.263398 4538 +kotlyar 1 2 6.263398 6.263398 4907 +pingalikeshav 1 1 6.957497 6.957497 9585 +pingaliassoci 1 1 6.957497 6.957497 9586 +radic 1 1 6.957497 6.957497 9587 +sparsiti 1 1 6.957497 6.957497 9588 +krylov 1 1 6.957497 6.957497 9589 +petsc 1 1 6.957497 6.957497 9590 +activitiespanel 1 1 6.957497 6.957497 9591 +ballist 1 1 6.957497 6.957497 9592 +odyssei 1 1 6.957497 6.957497 9593 +computereditori 1 1 6.957497 6.957497 9594 +awardsn 1 1 6.957497 6.957497 9595 +lecturesfast 1 1 6.957497 6.957497 9596 +publicationssolv 1 1 6.957497 6.957497 9597 +gianfranco 1 1 6.957497 6.957497 9598 +bilardi 1 1 6.957497 6.957497 9599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html new file mode 100644 index 00000000..dcc92537 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Schneider.html @@ -0,0 +1,367 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 16 775 0.000000 0.000000 2 +univers 8 571 0.000000 0.000000 5 +scienc 8 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 17 443 0.693147 11.783499 6 +program 12 374 0.693147 8.317764 7 +research 6 431 0.693147 4.158882 10 +inform 5 412 0.693147 3.465735 8 +work 3 380 0.693147 2.079441 9 +time 9 293 1.098612 9.887508 17 +project 2 340 1.098612 2.197224 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +softwar 5 220 1.386294 6.931470 30 +washington 4 236 1.386294 5.545176 32 +cornel 3 215 1.386294 4.158882 23 +design 2 213 1.386294 2.772588 25 +also 1 259 1.386294 1.386294 28 +oper 3 180 1.609438 4.828314 34 +group 3 183 1.609438 4.828314 36 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +implement 7 152 1.791759 12.542313 52 +distribut 5 162 1.791759 8.958795 51 +applic 5 170 1.791759 8.958795 56 +base 3 165 1.791759 5.375277 50 +data 3 170 1.791759 5.375277 49 +algorithm 3 162 1.791759 5.375277 57 +develop 2 174 1.791759 3.583518 53 +network 2 168 1.791759 3.583518 61 +avail 1 169 1.791759 1.791759 48 +read 1 154 1.791759 1.791759 47 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +lectur 4 135 1.945910 7.783640 73 +architectur 3 139 1.945910 5.837730 77 +support 3 132 1.945910 5.837730 83 +process 3 142 1.945910 5.837730 72 +note 3 142 1.945910 5.837730 67 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +confer 3 126 2.079442 6.238326 100 +machin 2 129 2.079442 4.158884 95 +high 2 130 2.079442 4.158884 101 +studi 2 120 2.079442 4.158884 91 +tool 2 117 2.079442 4.158884 93 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +intern 9 108 2.197225 19.775025 128 +teach 4 108 2.197225 8.788900 112 +specif 3 106 2.197225 6.591675 106 +manag 2 114 2.197225 4.394450 125 +final 2 116 2.197225 4.394450 108 +mathemat 2 108 2.197225 4.394450 123 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +site 1 106 2.197225 2.197225 119 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +pleas 1 113 2.197225 2.197225 114 +techniqu 7 99 2.302585 16.118095 138 +technic 3 100 2.302585 6.907755 140 +advanc 2 99 2.302585 4.605170 130 +access 1 102 2.302585 2.302585 136 +text 1 98 2.302585 2.302585 133 +real 9 93 2.397895 21.581055 144 +center 4 88 2.397895 9.591580 158 +proceed 3 93 2.397895 7.193685 152 +associ 2 93 2.397895 4.795790 151 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +member 7 84 2.484907 17.394349 165 +environ 5 84 2.484907 12.424535 177 +school 4 84 2.484907 9.939628 188 +build 2 85 2.484907 4.969814 184 +ieee 2 86 2.484907 4.969814 190 +educ 2 86 2.484907 4.969814 191 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +control 1 82 2.484907 2.484907 164 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +state 3 76 2.564949 7.694847 207 +complet 3 77 2.564949 7.694847 208 +issu 2 78 2.564949 5.129898 211 +april 2 77 2.564949 5.129898 196 +june 2 79 2.564949 5.129898 214 +messag 1 76 2.564949 2.564949 212 +workshop 6 71 2.639057 15.834342 239 +logic 5 71 2.639057 13.195285 230 +symposium 5 72 2.639057 13.195285 238 +meet 2 72 2.639057 5.278114 229 +involv 1 71 2.639057 2.639057 227 +appli 1 71 2.639057 2.639057 226 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +write 1 72 2.639057 2.639057 222 +materi 1 75 2.639057 2.639057 221 +summari 1 73 2.639057 2.639057 237 +order 2 69 2.708050 5.416100 249 +integr 2 67 2.708050 5.416100 245 +differ 1 66 2.708050 2.708050 253 +view 1 70 2.708050 2.708050 254 +virtual 4 62 2.772589 11.090356 285 +polici 2 64 2.772589 5.545178 279 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +septemb 1 65 2.772589 2.772589 274 +foundat 1 62 2.772589 2.772589 286 +march 5 61 2.833213 14.166065 295 +juli 4 60 2.833213 11.332852 305 +reason 3 57 2.890372 8.671116 318 +summer 3 56 2.890372 8.671116 311 +publish 1 57 2.890372 2.890372 326 +instruct 3 53 2.944439 8.833317 332 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +investig 2 51 2.995732 5.991464 353 +run 1 51 2.995732 2.995732 347 +hardwar 1 51 2.995732 2.995732 350 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +standard 1 48 3.044522 3.044522 365 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +physic 1 47 3.091042 3.091042 377 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +possibl 1 47 3.091042 3.091042 378 +protocol 3 45 3.135494 9.406482 407 +execut 3 45 3.135494 9.406482 404 +textbook 1 44 3.135494 3.135494 397 +term 2 43 3.178054 6.356108 411 +third 1 43 3.178054 3.178054 412 +past 3 42 3.218876 9.656628 428 +york 2 41 3.218876 6.437752 435 +editor 1 41 3.218876 3.218876 433 +continu 1 39 3.258097 3.258097 448 +programm 1 39 3.258097 3.258097 445 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +formal 5 37 3.332205 16.661025 478 +respons 1 37 3.332205 3.332205 476 +hand 1 37 3.332205 3.332205 475 +committe 9 34 3.401197 30.610773 522 +concurr 3 34 3.401197 10.203591 501 +least 1 35 3.401197 3.401197 516 +singl 1 34 3.401197 3.401197 510 +everi 1 34 3.401197 3.401197 519 +global 1 34 3.401197 3.401197 520 +return 1 34 3.401197 3.401197 502 +toler 9 33 3.433987 30.905883 533 +fault 10 32 3.465736 34.657360 547 +given 2 32 3.465736 6.931472 538 +ad 2 32 3.465736 6.931472 544 +exist 1 30 3.555348 3.555348 569 +depend 2 29 3.583519 7.167038 583 +synchron 2 29 3.583519 7.167038 588 +focus 1 29 3.583519 3.583519 584 +art 1 29 3.583519 3.583519 593 +becom 3 28 3.610918 10.832754 603 +pass 2 28 3.610918 7.221836 611 +univ 1 28 3.610918 3.610918 617 +american 1 27 3.637586 3.637586 634 +detect 2 26 3.688879 7.377758 646 +subject 1 26 3.688879 3.688879 647 +rule 1 26 3.688879 3.688879 638 +relev 1 26 3.688879 3.688879 637 +compar 1 26 3.688879 3.688879 648 +hill 2 25 3.737670 7.475340 670 +concern 1 25 3.737670 3.737670 666 +task 1 25 3.737670 3.737670 678 +handl 1 24 3.761200 3.761200 685 +seri 1 24 3.761200 3.761200 708 +proof 4 23 3.806662 15.226648 720 +equat 2 23 3.806662 7.613324 724 +mobil 2 23 3.806662 7.613324 730 +sequenc 1 23 3.806662 3.806662 734 +methodolog 1 23 3.806662 3.806662 733 +springer 3 22 3.850148 11.550444 750 +verlag 2 22 3.850148 7.700296 751 +reduc 1 22 3.850148 3.850148 759 +leav 1 21 3.912023 3.912023 772 +avoid 1 21 3.912023 3.912023 799 +exploit 5 20 3.951244 19.756220 836 +verif 2 20 3.951244 7.902488 826 +safeti 1 20 3.951244 3.951244 817 +department 1 20 3.951244 3.951244 839 +north 4 19 4.007333 16.029332 873 +agent 6 18 4.060443 24.362658 910 +partial 2 18 4.060443 8.120886 900 +along 1 18 4.060443 4.060443 878 +thoma 1 18 4.060443 4.060443 901 +scott 1 18 4.060443 4.060443 884 +germani 8 17 4.110874 32.886992 946 +sept 5 17 4.110874 20.554370 952 +outlin 4 17 4.110874 16.443496 914 +analyz 1 17 4.110874 4.110874 925 +otherwis 1 17 4.110874 4.110874 922 +whether 1 17 4.110874 4.110874 918 +moor 1 17 4.110874 4.110874 936 +critic 4 16 4.174387 16.697548 982 +georg 1 16 4.174387 4.174387 994 +letter 1 16 4.174387 4.174387 981 +hybrid 3 15 4.248495 12.745485 1057 +coordin 5 13 4.382027 21.910135 1182 +carri 1 13 4.382027 4.382027 1152 +nasa 1 13 4.382027 4.382027 1188 +discret 1 13 4.382027 4.382027 1165 +asynchron 2 12 4.465908 8.931816 1229 +verifi 2 12 4.465908 8.931816 1261 +denis 2 12 4.465908 8.931816 1255 +replic 1 12 4.465908 4.465908 1231 +speech 1 12 4.465908 4.465908 1222 +pageif 1 12 4.465908 4.465908 1275 +reness 2 11 4.553877 9.107754 1333 +refin 2 11 4.553877 9.107754 1363 +israel 2 11 4.553877 9.107754 1366 +bandwidth 1 11 4.553877 4.553877 1365 +typic 1 11 4.553877 4.553877 1360 +distinguish 1 11 4.553877 4.553877 1357 +arpa 1 11 4.553877 4.553877 1369 +volum 1 11 4.553877 4.553877 1347 +island 1 11 4.553877 4.553877 1345 +forc 2 10 4.653960 9.307920 1384 +resid 1 10 4.653960 4.653960 1461 +tradit 1 10 4.653960 4.653960 1404 +invit 1 10 4.653960 4.653960 1428 +traffic 1 10 4.653960 4.653960 1421 +robbert 2 9 4.753590 9.507180 1529 +compos 1 9 4.753590 4.753590 1527 +realiz 2 8 4.875197 9.750394 1739 +filter 1 8 4.875197 4.875197 1641 +satisfi 1 8 4.875197 4.875197 1694 +virginia 1 8 4.875197 4.875197 1659 +colloquium 1 8 4.875197 4.875197 1734 +paradigm 1 8 4.875197 4.875197 1662 +fifth 2 7 5.010635 10.021270 1931 +predic 1 7 5.010635 5.010635 1806 +chief 1 7 5.010635 5.010635 1829 +merg 1 7 5.010635 5.010635 1862 +carolina 4 6 5.164786 20.659144 2142 +moder 2 6 5.164786 10.329572 2112 +fred 1 6 5.164786 5.164786 2072 +brook 1 6 5.164786 5.164786 2152 +infer 1 6 5.164786 5.164786 2040 +risc 1 6 5.164786 5.164786 2016 +ensur 1 6 5.164786 5.164786 2012 +mistak 1 6 5.164786 5.164786 2110 +kluwer 1 6 5.164786 5.164786 2143 +causal 1 6 5.164786 5.164786 2024 +mission 2 5 5.347108 10.694216 2465 +ifip 2 5 5.347108 10.694216 2459 +chapel 2 5 5.347108 10.694216 2457 +attract 1 5 5.347108 5.347108 2356 +activitieseditor 1 5 5.347108 5.347108 2454 +merit 1 5 5.347108 5.347108 2466 +speaker 1 5 5.347108 5.347108 2370 +panel 1 5 5.347108 5.347108 2463 +proposit 1 5 5.347108 5.347108 2339 +bulletin 1 5 5.347108 5.347108 2343 +sigcs 3 4 5.568345 16.705035 2865 +stoller 2 4 5.568345 11.136690 2866 +nashvil 2 4 5.568345 11.136690 2867 +tennesse 2 4 5.568345 11.136690 2763 +schneider 1 4 5.568345 5.568345 2868 +increasingli 1 4 5.568345 5.568345 2766 +ident 1 4 5.568345 5.568345 2826 +suffic 1 4 5.568345 5.568345 2869 +conserv 1 4 5.568345 5.568345 2870 +monograph 1 4 5.568345 5.568345 2860 +assur 1 4 5.568345 5.568345 2722 +dagstuhl 1 4 5.568345 5.568345 2871 +technion 1 4 5.568345 5.568345 2856 +aircraft 1 4 5.568345 5.568345 2872 +newslett 1 4 5.568345 5.568345 2873 +gri 5 3 5.857933 29.289665 3569 +defens 2 3 5.857933 11.715866 3327 +munich 2 3 5.857933 11.715866 3570 +stoni 1 3 5.857933 5.857933 3571 +heavili 1 3 5.857933 5.857933 3572 +streamlin 1 3 5.857933 5.857933 3573 +jointli 1 3 5.857933 5.857933 3118 +dimac 1 3 5.857933 5.857933 3574 +reactiv 1 3 5.857933 5.857933 3575 +am 1 3 5.857933 5.857933 3386 +haifa 1 3 5.857933 5.857933 3554 +successor 1 3 5.857933 5.857933 3576 +hoto 1 3 5.857933 5.857933 3577 +orca 1 3 5.857933 5.857933 3578 +hypervisor 6 2 6.263398 37.580388 4549 +replica 4 2 6.263398 25.053592 4206 +norwai 3 2 6.263398 18.790194 4908 +systemsprogram 3 2 6.263398 18.790194 4882 +tacoma 2 2 6.263398 12.526796 4909 +isat 2 2 6.263398 12.526796 4895 +warfar 2 2 6.263398 12.526796 4910 +professorphd 1 2 6.263398 6.263398 4904 +widespread 1 2 6.263398 6.263398 4911 +annal 1 2 6.263398 6.263398 4912 +hebrew 1 2 6.263398 6.263398 4913 +banquet 1 2 6.263398 6.263398 4898 +grante 1 2 6.263398 6.263398 4914 +contractor 1 2 6.263398 6.263398 4915 +mason 1 2 6.263398 6.263398 4916 +airplan 1 2 6.263398 6.263398 4917 +jerusalem 1 2 6.263398 6.263398 4918 +marzullo 1 2 6.263398 6.263398 4919 +trom 6 1 6.957497 41.744982 9600 +marktoberdorf 3 1 6.957497 20.872491 9601 +lubeck 3 1 6.957497 20.872491 9602 +johansen 2 1 6.957497 13.914994 9603 +engineeringeditor 2 1 6.957497 13.914994 9604 +schneiderfr 1 1 6.957497 6.957497 9605 +assert 1 1 6.957497 6.957497 9606 +undefin 1 1 6.957497 6.957497 9607 +bressoud 1 1 6.957497 6.957497 9608 +instantli 1 1 6.957497 6.957497 9609 +freed 1 1 6.957497 6.957497 9610 +roam 1 1 6.957497 6.957497 9611 +activitiessabbat 1 1 6.957497 6.957497 9612 +computingeditor 1 1 6.957497 6.957497 9613 +letterseditor 1 1 6.957497 6.957497 9614 +systemseditor 1 1 6.957497 6.957497 9615 +surveysco 1 1 6.957497 6.957497 9616 +verlagprogram 1 1 6.957497 6.957497 9617 +constructionprogram 1 1 6.957497 6.957497 9618 +resili 1 1 6.957497 6.957497 9619 +applicationsprogram 1 1 6.957497 6.957497 9620 +sixteenth 1 1 6.957497 6.957497 9621 +symposiumprogram 1 1 6.957497 6.957497 9622 +systemsst 1 1 6.957497 6.957497 9623 +chissa 1 1 6.957497 6.957497 9624 +technologymemb 1 1 6.957497 6.957497 9625 +agencyreview 1 1 6.957497 6.957497 9626 +leibniz 1 1 6.957497 6.957497 9627 +universitymemb 1 1 6.957497 6.957497 9628 +awardsfellow 1 1 6.957497 6.957497 9629 +sciencefellow 1 1 6.957497 6.957497 9630 +machinerylecturesproof 1 1 6.957497 6.957497 9631 +afosr 1 1 6.957497 6.957497 9632 +panelist 1 1 6.957497 6.957497 9633 +publicationsreason 1 1 6.957497 6.957497 9634 +icalp 1 1 6.957497 6.957497 9635 +boll 1 1 6.957497 6.957497 9636 +limor 1 1 6.957497 6.957497 9637 +ultradepend 1 1 6.957497 6.957497 9638 +dehn 1 1 6.957497 6.957497 9639 +primu 1 1 6.957497 6.957497 9640 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html new file mode 100644 index 00000000..460cd736 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^VanLoan.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 7 571 0.000000 0.000000 5 +scienc 6 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +last 2 314 1.098612 2.197224 14 +current 1 284 1.098612 1.098612 21 +cornel 2 215 1.386294 2.772588 23 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +develop 2 174 1.791759 3.583518 53 +applic 2 170 1.791759 3.583518 56 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +process 2 142 1.945910 3.891820 72 +perform 2 143 1.945910 3.891820 74 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +pleas 1 113 2.197225 2.197225 114 +text 1 98 2.302585 2.302585 133 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +member 3 84 2.484907 7.454721 165 +journal 2 83 2.484907 4.969814 183 +build 2 85 2.484907 4.969814 184 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +method 2 80 2.564949 5.129898 213 +state 2 76 2.564949 5.129898 207 +april 2 77 2.564949 5.129898 196 +solv 2 73 2.639057 5.278114 234 +januari 3 62 2.772589 8.317767 264 +descript 2 64 2.772589 5.545178 271 +result 1 65 2.772589 2.772589 281 +import 1 65 2.772589 2.772589 282 +organ 1 65 2.772589 2.772589 265 +plai 1 60 2.833213 2.833213 307 +variou 2 56 2.890372 5.780744 317 +semest 1 58 2.890372 2.890372 312 +undergradu 2 54 2.944439 5.888878 338 +particular 1 51 2.995732 2.995732 352 +profession 1 51 2.995732 2.995732 345 +adapt 1 46 3.091042 3.091042 387 +fast 1 42 3.218876 3.218876 429 +continu 1 39 3.258097 3.258097 448 +multipl 1 39 3.258097 3.258097 453 +societi 1 40 3.258097 3.258097 456 +annual 1 40 3.258097 3.258097 458 +close 1 38 3.295837 3.295837 465 +committe 4 34 3.401197 13.604788 522 +return 1 34 3.401197 3.401197 502 +product 6 33 3.433987 20.603922 527 +curriculum 1 33 3.433987 3.433987 535 +transform 1 32 3.465736 3.465736 542 +art 2 29 3.583519 7.167038 593 +chair 1 29 3.583519 3.583519 596 +subject 1 26 3.688879 3.688879 647 +constraint 1 26 3.688879 3.688879 636 +repres 1 26 3.688879 3.688879 656 +siam 3 21 3.912023 11.736069 800 +department 1 20 3.951244 3.951244 839 +four 1 18 4.060443 4.060443 905 +matrix 1 17 4.110874 4.110874 933 +moor 1 17 4.110874 4.110874 936 +role 1 14 4.317488 4.317488 1101 +charl 1 13 4.382027 4.382027 1149 +deriv 1 13 4.382027 4.382027 1145 +translat 1 13 4.382027 4.382027 1164 +optic 2 12 4.465908 8.931816 1221 +denis 2 12 4.465908 8.931816 1255 +reader 1 12 4.465908 4.465908 1246 +pageif 1 12 4.465908 4.465908 1275 +michigan 1 11 4.553877 4.553877 1368 +loop 1 11 4.553877 4.553877 1310 +bandwidth 1 11 4.553877 4.553877 1365 +america 1 11 4.553877 4.553877 1370 +matric 1 10 4.653960 4.653960 1399 +factor 1 9 4.753590 4.753590 1544 +sweden 3 7 5.010635 15.031905 1885 +signal 1 7 5.010635 5.010635 1910 +prize 3 6 5.164786 15.494358 2150 +proce 1 6 5.164786 5.164786 2114 +ohio 2 5 5.347108 10.694216 2447 +markov 1 5 5.347108 5.347108 2280 +activitieseditor 1 5 5.347108 5.347108 2454 +wavelet 1 4 5.568345 5.568345 2874 +analog 1 4 5.568345 5.568345 2875 +admiss 1 4 5.568345 5.568345 2704 +wilkinson 1 3 5.857933 5.857933 3579 +pitsiani 1 3 5.857933 5.857933 3175 +household 2 2 6.263398 12.526796 4920 +intuit 2 2 6.263398 12.526796 4921 +nearest 1 2 6.263398 6.263398 4922 +anticip 1 2 6.263398 6.263398 4268 +activitiescomput 1 2 6.263398 6.263398 4923 +kroneck 5 1 6.957497 34.787485 9641 +umea 2 1 6.957497 13.914994 9642 +loancharl 1 1 6.957497 6.957497 9643 +loanprofessorphd 1 1 6.957497 6.957497 9644 +inhomogen 1 1 6.957497 6.957497 9645 +committeedepart 1 1 6.957497 6.957497 9646 +meetingfreshman 1 1 6.957497 6.957497 9647 +analysismemb 1 1 6.957497 6.957497 9648 +diprima 1 1 6.957497 6.957497 9649 +lecturesappl 1 1 6.957497 6.957497 9650 +linkop 1 1 6.957497 6.957497 9651 +publicationsoptim 1 1 6.957497 6.957497 9652 +ellerbroek 1 1 6.957497 6.957497 9653 +plemmon 1 1 6.957497 6.957497 9654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html new file mode 100644 index 00000000..4383900d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Department^Annual95^Faculty^Wagner.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +cours 3 273 1.098612 3.295836 15 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +report 1 131 2.079442 2.079442 92 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +mathemat 1 108 2.197225 2.197225 123 +pleas 1 113 2.197225 2.197225 114 +associ 2 93 2.397895 4.795790 151 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +level 1 87 2.484907 2.484907 180 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +logic 1 71 2.639057 2.639057 230 +undergradu 1 54 2.944439 2.944439 338 +profession 1 51 2.995732 2.995732 345 +electron 1 47 3.091042 3.091042 379 +annual 1 40 3.258097 3.258097 458 +electr 1 38 3.295837 3.295837 461 +respons 1 37 3.332205 3.332205 476 +committe 1 34 3.401197 3.401197 522 +return 1 34 3.401197 3.401197 502 +curriculum 2 33 3.433987 6.867974 535 +symbol 1 27 3.637586 3.637586 620 +revis 1 26 3.688879 3.688879 640 +primari 1 25 3.737670 3.737670 669 +other 1 24 3.761200 3.761200 697 +prepar 1 20 3.951244 3.951244 824 +department 1 20 3.951244 3.951244 839 +lower 1 18 4.060443 4.060443 886 +moor 1 17 4.110874 4.110874 936 +women 1 16 4.174387 4.174387 1004 +denis 2 12 4.465908 8.931816 1255 +pageif 1 12 4.465908 4.465908 1275 +introductori 1 9 4.753590 4.753590 1479 +machineri 1 4 5.568345 5.568345 2851 +activitiescomput 1 2 6.263398 6.263398 4923 +activitiesassoci 1 2 6.263398 6.263398 4881 +catherin 1 1 6.957497 6.957497 9655 +wagnercatherin 1 1 6.957497 6.957497 9656 +wagnersenior 1 1 6.957497 6.957497 9657 +lecturerphd 1 1 6.957497 6.957497 9658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html new file mode 100644 index 00000000..3484f9e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^Brian_Smith.html @@ -0,0 +1,351 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +research 24 431 0.693147 16.635528 10 +system 6 443 0.693147 4.158882 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +us 5 329 1.098612 5.493060 16 +offic 3 299 1.098612 3.295836 13 +cours 3 273 1.098612 3.295836 15 +current 2 284 1.098612 2.197224 21 +project 1 340 1.098612 1.098612 18 +softwar 4 220 1.386294 5.545176 30 +also 4 259 1.386294 5.545176 28 +languag 4 227 1.386294 5.545176 26 +cornel 3 215 1.386294 4.158882 23 +gener 2 220 1.386294 2.772588 27 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +paper 3 205 1.609438 4.828314 38 +class 2 199 1.609438 3.218876 37 +oper 2 180 1.609438 3.218876 34 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +data 13 170 1.791759 23.292867 49 +network 10 168 1.791759 17.917590 61 +avail 6 169 1.791759 10.750554 48 +distribut 3 162 1.791759 5.375277 51 +develop 3 174 1.791759 5.375277 53 +algorithm 3 162 1.791759 5.375277 57 +applic 2 170 1.791759 3.583518 56 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +process 16 142 1.945910 31.134560 72 +first 4 140 1.945910 7.783640 71 +support 2 132 1.945910 3.891820 83 +file 2 132 1.945910 3.891820 70 +architectur 2 139 1.945910 3.891820 77 +area 2 144 1.945910 3.891820 80 +perform 2 143 1.945910 3.891820 74 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +technolog 3 131 2.079442 6.238326 102 +confer 3 126 2.079442 6.238326 100 +provid 2 121 2.079442 4.158884 94 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +intern 4 108 2.197225 8.788900 128 +make 2 111 2.197225 4.394450 120 +well 2 109 2.197225 4.394450 121 +version 2 113 2.197225 4.394450 122 +teach 1 108 2.197225 2.197225 112 +site 1 106 2.197225 2.197225 119 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +commun 3 95 2.397895 7.193685 157 +imag 3 91 2.397895 7.193685 161 +center 2 88 2.397895 4.795790 158 +call 2 91 2.397895 4.795790 153 +graphic 2 90 2.397895 4.795790 147 +select 2 91 2.397895 4.795790 154 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +proceed 1 93 2.397895 2.397895 152 +environ 5 84 2.484907 12.424535 177 +build 3 85 2.484907 7.454721 184 +resourc 2 81 2.484907 4.969814 172 +novemb 2 81 2.484907 4.969814 179 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +ieee 1 86 2.484907 2.484907 190 +server 9 76 2.564949 23.084541 204 +method 3 80 2.564949 7.694847 213 +june 2 79 2.564949 5.129898 214 +sourc 1 77 2.564949 2.564949 201 +onlin 4 75 2.639057 10.556228 223 +workshop 3 71 2.639057 7.917171 239 +servic 2 72 2.639057 5.278114 236 +html 2 75 2.639057 5.278114 235 +upson 1 71 2.639057 2.639057 218 +nation 1 74 2.639057 2.639057 240 +materi 1 75 2.639057 2.639057 221 +line 1 75 2.639057 2.639057 231 +david 1 71 2.639057 2.639057 232 +name 1 72 2.639057 2.639057 220 +symposium 1 72 2.639057 2.639057 238 +multimedia 7 68 2.708050 18.956350 258 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +august 1 66 2.708050 2.708050 257 +laboratori 2 63 2.772589 5.545178 292 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +copi 1 63 2.772589 2.772589 284 +septemb 1 65 2.772589 2.772589 274 +best 3 59 2.833213 8.499639 299 +type 2 61 2.833213 5.666426 296 +plai 1 60 2.833213 2.833213 307 +automat 1 61 2.833213 2.833213 306 +locat 1 59 2.833213 2.833213 303 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +faculti 1 56 2.890372 2.890372 325 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +talk 6 53 2.944439 17.666634 336 +local 2 55 2.944439 5.888878 334 +found 1 53 2.944439 2.944439 337 +undergradu 1 54 2.944439 2.944439 338 +februari 1 54 2.944439 2.944439 328 +hardwar 2 51 2.995732 5.991464 350 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +digit 1 52 2.995732 2.995732 348 +approach 3 48 3.044522 9.133566 366 +format 3 48 3.044522 9.133566 356 +without 1 50 3.044522 3.044522 370 +california 2 46 3.091042 6.182084 388 +electron 2 47 3.091042 6.182084 379 +possibl 1 47 3.091042 3.091042 378 +effect 1 46 3.091042 3.091042 385 +video 27 44 3.135494 84.658338 405 +protocol 6 45 3.135494 18.812964 407 +describ 3 45 3.135494 9.406482 400 +anoth 1 45 3.135494 3.135494 408 +made 1 44 3.135494 3.135494 398 +third 3 43 3.178054 9.534162 412 +around 1 43 3.178054 3.178054 415 +review 3 42 3.218876 9.656628 425 +music 1 42 3.218876 3.218876 436 +fast 1 42 3.218876 3.218876 429 +continu 5 39 3.258097 16.290485 448 +must 2 40 3.258097 6.516194 442 +brian 12 38 3.295837 39.550044 466 +slide 1 38 3.295837 3.295837 467 +workstat 4 37 3.332205 13.328820 479 +connect 3 37 3.332205 9.996615 485 +represent 1 35 3.401197 3.401197 512 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +queri 1 33 3.433987 3.433987 524 +independ 3 32 3.465736 10.397208 548 +ad 2 32 3.465736 6.931472 544 +idea 2 32 3.465736 6.931472 545 +storag 2 31 3.496508 6.993016 553 +common 2 30 3.555348 7.110696 574 +exist 2 30 3.555348 7.110696 569 +specifi 2 30 3.555348 7.110696 568 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +load 2 28 3.610918 7.221836 601 +retriev 1 27 3.637586 3.637586 621 +manipul 1 27 3.637586 3.637586 624 +proc 6 26 3.688879 22.133274 649 +effort 4 26 3.688879 14.755516 652 +berkelei 2 26 3.688879 7.377758 657 +compar 1 26 3.688879 3.688879 648 +client 7 25 3.737670 26.163690 679 +toward 1 25 3.737670 3.737670 668 +store 1 24 3.761200 3.761200 693 +scalabl 1 24 3.761200 3.761200 705 +motion 1 24 3.761200 3.761200 699 +compress 4 23 3.806662 15.226648 719 +initi 2 23 3.806662 7.613324 717 +lead 1 23 3.806662 3.806662 718 +decis 1 23 3.806662 3.806662 728 +famili 1 23 3.806662 3.806662 735 +serv 1 22 3.850148 3.850148 758 +almost 1 22 3.850148 3.850148 742 +sent 1 22 3.850148 3.850148 763 +util 1 21 3.912023 3.912023 774 +programminglanguag 1 21 3.912023 3.912023 782 +thu 1 21 3.912023 3.912023 773 +smith 12 20 3.951244 47.414928 820 +qualiti 2 20 3.951244 7.902488 832 +mpeg 2 20 3.951244 7.902488 831 +reserv 1 20 3.951244 3.951244 808 +basi 1 20 3.951244 3.951244 828 +media 5 19 4.007333 20.036665 861 +thur 1 19 4.007333 4.007333 847 +appropri 1 18 4.060443 4.060443 883 +speed 1 18 4.060443 4.060443 911 +across 2 16 4.174387 8.348774 974 +advantag 1 16 4.174387 4.174387 987 +earli 1 16 4.174387 4.174387 968 +jose 1 16 4.174387 4.174387 976 +diego 1 16 4.174387 4.174387 992 +driven 1 15 4.248495 4.248495 1048 +audio 3 14 4.317488 12.952464 1094 +francisco 2 14 4.317488 8.634976 1095 +balanc 1 14 4.317488 4.317488 1112 +heterogen 1 14 4.317488 4.317488 1090 +camera 1 14 4.317488 4.317488 1115 +suit 2 13 4.382027 8.764054 1129 +jonathan 2 13 4.382027 8.764054 1174 +resolut 2 13 4.382027 8.764054 1172 +central 1 13 4.382027 4.382027 1160 +translat 1 13 4.382027 4.382027 1164 +canada 1 13 4.382027 4.382027 1158 +misc 1 13 4.382027 4.382027 1124 +verifi 1 12 4.465908 4.465908 1261 +infrastructur 1 12 4.465908 4.465908 1234 +promot 1 12 4.465908 4.465908 1235 +remov 1 12 4.465908 4.465908 1225 +amount 1 12 4.465908 4.465908 1208 +readi 1 12 4.465908 4.465908 1242 +tue 1 11 4.553877 4.553877 1308 +peter 1 11 4.553877 4.553877 1316 +stephen 1 11 4.553877 4.553877 1342 +player 1 11 4.553877 4.553877 1371 +interestsmi 1 10 4.653960 4.653960 1462 +operatingsystem 1 10 4.653960 4.653960 1401 +equal 1 10 4.653960 4.653960 1424 +transmiss 2 9 4.753590 9.507180 1588 +establish 1 9 4.753590 4.753590 1532 +charg 1 9 4.753590 4.753590 1582 +xerox 3 8 4.875197 14.625591 1725 +contrast 2 8 4.875197 9.750394 1637 +transport 1 8 4.875197 4.875197 1672 +rivl 1 8 4.875197 4.875197 1632 +theme 1 8 4.875197 4.875197 1707 +colloquium 1 8 4.875197 4.875197 1734 +lawrenc 6 7 5.010635 30.063810 1908 +usabl 1 7 5.010635 5.010635 1810 +suffici 1 7 5.010635 5.010635 1897 +pronounc 1 7 5.010635 5.010635 1918 +prioriti 1 7 5.010635 5.010635 1792 +supportfor 1 7 5.010635 5.010635 1854 +invest 2 6 5.164786 10.329572 2153 +jpeg 2 6 5.164786 10.329572 2053 +patel 2 6 5.164786 10.329572 2154 +simultan 1 6 5.164786 5.164786 2155 +quickli 1 6 5.164786 5.164786 2000 +toronto 1 6 5.164786 5.164786 2156 +spie 1 6 5.164786 5.164786 2119 +internationalconfer 1 6 5.164786 5.164786 2051 +row 6 5 5.347108 32.082648 2330 +cyclic 4 5 5.347108 21.388432 2383 +adopt 1 5 5.347108 5.347108 2467 +consum 1 5 5.347108 5.347108 2334 +computerarchitectur 1 5 5.347108 5.347108 2290 +webster 1 5 5.347108 5.347108 2468 +minnesota 1 5 5.347108 5.347108 2469 +ofworkst 2 4 5.568345 11.136690 2679 +publicationsresearch 1 4 5.568345 5.568345 2876 +isthat 1 4 5.568345 5.568345 2723 +hypothesi 1 4 5.568345 5.568345 2650 +poorli 1 4 5.568345 5.568345 2781 +commonli 1 4 5.568345 5.568345 2877 +fold 1 4 5.568345 5.568345 2615 +swartz 1 4 5.568345 5.568345 2878 +zeno 4 3 5.857933 23.431732 3580 +networkprotocol 1 3 5.857933 5.857933 3285 +thetim 1 3 5.857933 5.857933 3581 +magnitud 1 3 5.857933 5.857933 3582 +rival 1 3 5.857933 5.857933 3583 +quicktim 1 3 5.857933 5.857933 3493 +anaheim 1 3 5.857933 5.857933 3271 +intereststeachingselect 1 2 6.263398 6.263398 4924 +andprocess 1 2 6.263398 6.263398 4925 +withlarg 1 2 6.263398 6.263398 4926 +needto 1 2 6.263398 6.263398 4927 +thecommun 1 2 6.263398 6.263398 4928 +builton 1 2 6.263398 6.263398 4094 +availableonlin 1 2 6.263398 6.263398 4929 +aredevelop 1 2 6.263398 6.263398 4930 +todramat 1 2 6.263398 6.263398 4250 +animplement 1 2 6.263398 6.263398 4931 +insoftwar 1 2 6.263398 6.263398 4932 +asif 1 2 6.263398 6.263398 4933 +ghia 1 2 6.263398 6.263398 4934 +chamberlin 1 2 6.263398 6.263398 4745 +hum 1 2 6.263398 6.263398 4935 +sanfrancisco 1 2 6.263398 6.263398 4339 +decod 1 2 6.263398 6.263398 4936 +fileserv 2 1 6.957497 13.914994 9659 +playback 2 1 6.957497 13.914994 9660 +decompress 2 1 6.957497 13.914994 9661 +transcod 2 1 6.957497 13.914994 9662 +ketan 2 1 6.957497 13.914994 9663 +bsmith 1 1 6.957497 6.957497 9664 +talksmisc 1 1 6.957497 6.957497 9665 +linksresearch 1 1 6.957497 6.957497 9666 +ourcomput 1 1 6.957497 6.957497 9667 +commercialand 1 1 6.957497 6.957497 9668 +aredesign 1 1 6.957497 6.957497 9669 +premis 1 1 6.957497 6.957497 9670 +infrastructurei 1 1 6.957497 6.957497 9671 +andappl 1 1 6.957497 6.957497 9672 +workingsystem 1 1 6.957497 6.957497 9673 +zenodistribut 1 1 6.957497 6.957497 9674 +anethernet 1 1 6.957497 6.957497 9675 +serverof 1 1 6.957497 6.957497 9676 +videostor 1 1 6.957497 6.957497 9677 +severalserv 1 1 6.957497 6.957497 9678 +effortdeliveri 1 1 6.957497 6.957497 9679 +resourcereserv 1 1 6.957497 6.957497 9680 +communicationinfrastructur 1 1 6.957497 6.957497 9681 +forbandwidth 1 1 6.957497 6.957497 9682 +networkenviron 1 1 6.957497 6.957497 9683 +accessiblebi 1 1 6.957497 6.957497 9684 +latterenviron 1 1 6.957497 6.957497 9685 +datagram 1 1 6.957497 6.957497 9686 +audioand 1 1 6.957497 6.957497 9687 +metropolitan 1 1 6.957497 6.957497 9688 +andwid 1 1 6.957497 6.957497 9689 +todeliv 1 1 6.957497 6.957497 9690 +compressedrepresent 1 1 6.957497 6.957497 9691 +reducesth 1 1 6.957497 6.957497 9692 +indicatesthat 1 1 6.957497 6.957497 9693 +fasterthan 1 1 6.957497 6.957497 9694 +currentlyextend 1 1 6.957497 6.957497 9695 +onecompress 1 1 6.957497 6.957497 9696 +compresseddomain 1 1 6.957497 6.957497 9697 +simplifyexperiment 1 1 6.957497 6.957497 9698 +calledrivl 1 1 6.957497 6.957497 9699 +allowsvideo 1 1 6.957497 6.957497 9700 +resolutionand 1 1 6.957497 6.957497 9701 +whatpostscript 1 1 6.957497 6.957497 9702 +resolutionindepend 1 1 6.957497 6.957497 9703 +sameprogram 1 1 6.957497 6.957497 9704 +whileedit 1 1 6.957497 6.957497 9705 +qualityfinish 1 1 6.957497 6.957497 9706 +bepreview 1 1 6.957497 6.957497 9707 +dpiprint 1 1 6.957497 6.957497 9708 +onvideo 1 1 6.957497 6.957497 9709 +videous 1 1 6.957497 6.957497 9710 +teachingat 1 1 6.957497 6.957497 9711 +logan 1 1 6.957497 6.957497 9712 +ontario 1 1 6.957497 6.957497 9713 +annett 1 1 6.957497 6.957497 9714 +hanna 1 1 6.957497 6.957497 9715 +mmcn 1 1 6.957497 6.957497 9716 +documentationth 1 1 6.957497 6.957497 9717 +priceweb 1 1 6.957497 6.957497 9718 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html new file mode 100644 index 00000000..8743b34c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Faculty^rc^rc.html @@ -0,0 +1,280 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +scienc 9 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +system 13 443 0.693147 9.010911 6 +program 7 374 0.693147 4.852029 7 +us 9 329 1.098612 9.887508 16 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +softwar 6 220 1.386294 8.317764 30 +design 5 213 1.386294 6.931470 25 +languag 3 227 1.386294 4.158882 26 +cornel 2 215 1.386294 2.772588 23 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +implement 3 152 1.791759 5.375277 52 +develop 3 174 1.791759 5.375277 53 +algorithm 2 162 1.791759 3.583518 57 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +year 2 148 1.945910 3.891820 84 +construct 2 139 1.945910 3.891820 82 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +studi 4 120 2.079442 8.317768 91 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +theori 8 111 2.197225 17.577800 127 +mathemat 5 108 2.197225 10.986125 123 +make 3 111 2.197225 6.591675 120 +version 2 113 2.197225 4.394450 122 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +final 1 116 2.197225 2.197225 108 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +associ 1 93 2.397895 2.397895 151 +build 3 85 2.484907 7.454721 184 +wide 2 84 2.484907 4.969814 185 +journal 2 83 2.484907 4.969814 183 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +control 1 82 2.484907 2.484907 164 +method 3 80 2.564949 7.694847 213 +decemb 2 80 2.564949 5.129898 215 +state 2 76 2.564949 5.129898 207 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +logic 8 71 2.639057 21.112456 230 +involv 3 71 2.639057 7.917171 227 +line 3 75 2.639057 7.917171 231 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +write 1 72 2.639057 2.639057 222 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +appli 1 71 2.639057 2.639057 226 +differ 1 66 2.708050 2.708050 253 +januari 2 62 2.772589 5.545178 264 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +experi 1 64 2.772589 2.772589 283 +type 5 61 2.833213 14.166065 296 +march 2 61 2.833213 5.666426 295 +content 1 59 2.833213 2.833213 302 +major 2 56 2.890372 5.780744 315 +explor 2 58 2.890372 5.780744 324 +three 1 54 2.944439 2.944439 330 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +effect 1 46 3.091042 3.091042 385 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +mark 1 44 3.135494 3.135494 403 +algebra 1 45 3.135494 3.135494 394 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +mechan 1 43 3.178054 3.178054 416 +futur 2 41 3.218876 6.437752 427 +york 2 41 3.218876 6.437752 435 +past 1 42 3.218876 3.218876 428 +review 1 42 3.218876 3.218876 425 +theoret 3 39 3.258097 9.774291 446 +continu 1 39 3.258097 3.258097 448 +annual 1 40 3.258097 3.258097 458 +paul 1 38 3.295837 3.295837 471 +electr 1 38 3.295837 3.295837 461 +formal 5 37 3.332205 16.661025 478 +connect 3 37 3.332205 9.996615 485 +hand 1 37 3.332205 3.332205 475 +expect 1 37 3.332205 3.332205 484 +especi 2 36 3.367296 6.734592 496 +soon 1 36 3.367296 3.367296 494 +committe 3 34 3.401197 10.203591 522 +transform 1 32 3.465736 3.465736 542 +collabor 1 32 3.465736 3.465736 543 +richard 3 31 3.496508 10.489524 559 +robert 1 30 3.555348 3.555348 567 +power 1 30 3.555348 3.555348 573 +compon 1 30 3.555348 3.555348 570 +domain 1 30 3.555348 3.555348 564 +chair 2 29 3.583519 7.167038 596 +built 1 29 3.583519 3.583519 592 +hope 2 28 3.610918 7.221836 610 +packag 1 28 3.610918 3.610918 614 +symbol 2 27 3.637586 7.275172 620 +great 1 27 3.637586 3.637586 626 +american 1 27 3.637586 3.637586 634 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +effort 1 26 3.688879 3.688879 652 +fundament 2 25 3.737670 7.475340 661 +wai 1 25 3.737670 3.737670 662 +aspect 1 25 3.737670 3.737670 663 +seri 1 24 3.761200 3.761200 708 +proof 1 23 3.806662 3.806662 720 +togeth 1 23 3.806662 3.806662 714 +varieti 2 22 3.850148 7.700296 740 +springer 2 22 3.850148 7.700296 750 +verlag 2 22 3.850148 7.700296 751 +william 1 22 3.850148 3.850148 765 +deal 1 22 3.850148 3.850148 736 +inth 1 22 3.850148 3.850148 741 +theorem 5 21 3.912023 19.560115 786 +divis 1 21 3.912023 3.912023 803 +entir 1 20 3.951244 3.951244 811 +synthesi 1 20 3.951244 3.951244 834 +verif 1 20 3.951244 3.951244 826 +expert 1 20 3.951244 3.951244 833 +facil 1 20 3.951244 3.951244 814 +prove 5 19 4.007333 20.036665 848 +definit 1 19 4.007333 4.007333 864 +north 1 19 4.007333 4.007333 873 +minim 1 18 4.060443 4.060443 887 +explan 1 16 4.174387 4.174387 985 +stream 1 15 4.248495 4.248495 1015 +contribut 1 15 4.248495 4.248495 1021 +style 1 15 4.248495 4.248495 1036 +consider 1 14 4.317488 4.317488 1076 +incomput 1 14 4.317488 4.317488 1096 +near 1 14 4.317488 4.317488 1091 +circuit 3 13 4.382027 13.146081 1131 +joint 2 13 4.382027 8.764054 1130 +weak 1 13 4.382027 4.382027 1159 +canada 1 13 4.382027 4.382027 1158 +captur 1 12 4.465908 4.465908 1232 +franc 1 12 4.465908 4.465908 1276 +philadelphia 1 12 4.465908 4.465908 1244 +israel 3 11 4.553877 13.661631 1366 +excit 1 11 4.553877 4.553877 1329 +refin 1 11 4.553877 4.553877 1363 +nuprl 12 10 4.653960 55.847520 1402 +modular 1 10 4.653960 4.653960 1392 +devis 1 10 4.653960 4.653960 1451 +suitabl 1 9 4.753590 4.753590 1486 +colloquium 1 8 4.875197 4.875197 1734 +pennsylvania 2 7 5.010635 10.021270 1932 +beyond 1 7 5.010635 5.010635 1834 +feasibl 1 6 5.164786 5.164786 2157 +handbook 1 6 5.164786 5.164786 2061 +oxford 1 6 5.164786 5.164786 2121 +recruit 1 6 5.164786 5.164786 2145 +pari 1 6 5.164786 5.164786 2158 +indiana 1 6 5.164786 5.164786 2057 +allen 3 5 5.347108 16.041324 2470 +eduph 1 5 5.347108 5.347108 2449 +dougla 1 5 5.347108 5.347108 2471 +begun 1 5 5.347108 5.347108 2386 +activitieseditor 1 5 5.347108 5.347108 2454 +weyl 3 4 5.568345 16.705035 2854 +zippel 3 4 5.568345 16.705035 2879 +notr 2 4 5.568345 11.136690 2880 +dame 2 4 5.568345 11.136690 2881 +stuart 2 3 5.857933 11.715866 3584 +how 2 3 5.857933 11.715866 3289 +gri 2 3 5.857933 11.715866 3569 +theoremprov 1 3 5.857933 5.857933 3298 +moreov 1 3 5.857933 5.857933 3200 +predecessor 1 3 5.857933 5.857933 3585 +jackson 1 3 5.857933 5.857933 3586 +boolean 1 3 5.857933 5.857933 3202 +nato 1 3 5.857933 5.857933 3587 +engag 2 2 6.263398 12.526796 4937 +ventur 2 2 6.263398 12.526796 4938 +polya 2 2 6.263398 12.526796 4939 +programmingand 1 2 6.263398 6.263398 4940 +theform 1 2 6.263398 6.263398 4245 +aitken 1 2 6.263398 6.263398 4941 +possibleto 1 2 6.263398 6.263398 4942 +aprogram 1 2 6.263398 6.263398 4943 +thiswil 1 2 6.263398 6.263398 4944 +activitieschair 1 2 6.263398 6.263398 4894 +anniversari 1 2 6.263398 6.263398 4945 +celebr 1 2 6.263398 6.263398 4946 +buffalo 1 2 6.263398 6.263398 4947 +bensoussan 1 2 6.263398 6.263398 4303 +andmathemat 1 2 6.263398 6.263398 4948 +manfr 1 2 6.263398 6.263398 4949 +leeser 3 1 6.957497 20.872491 9719 +eaton 2 1 6.957497 13.914994 9720 +computationeditor 2 1 6.957497 13.914994 9721 +betweencomput 2 1 6.957497 13.914994 9722 +constablerobert 1 1 6.957497 6.957497 9723 +constabledepart 1 1 6.957497 6.957497 9724 +professorrc 1 1 6.957497 6.957497 9725 +researchw 1 1 6.957497 6.957497 9726 +providemechan 1 1 6.957497 6.957497 9727 +implementedthre 1 1 6.957497 6.957497 9728 +lispprogram 1 1 6.957497 6.957497 9729 +sucha 1 1 6.957497 6.957497 9730 +canexpress 1 1 6.957497 6.957497 9731 +asmetalevel 1 1 6.957497 6.957497 9732 +canevalu 1 1 6.957497 6.957497 9733 +nuprli 1 1 6.957497 6.957497 9734 +fomal 1 1 6.957497 6.957497 9735 +iscal 1 1 6.957497 6.957497 9736 +termeditor 1 1 6.957497 6.957497 9737 +itsintern 1 1 6.957497 6.957497 9738 +hedefinit 1 1 6.957497 6.957497 9739 +inconstruct 1 1 6.957497 6.957497 9740 +mechanismha 1 1 6.957497 6.957497 9741 +rebuilt 1 1 6.957497 6.957497 9742 +thework 1 1 6.957497 6.957497 9743 +isan 1 1 6.957497 6.957497 9744 +builtprincip 1 1 6.957497 6.957497 9745 +internaldescript 1 1 6.957497 6.957497 9746 +withmiriam 1 1 6.957497 6.957497 9747 +davidgri 1 1 6.957497 6.957497 9748 +richardzippel 1 1 6.957497 6.957497 9749 +withless 1 1 6.957497 6.957497 9750 +aagard 1 1 6.957497 6.957497 9751 +thecorrect 1 1 6.957497 6.957497 9752 +bedrocsystem 1 1 6.957497 6.957497 9753 +widelyus 1 1 6.957497 6.957497 9754 +efforttaught 1 1 6.957497 6.957497 9755 +themann 1 1 6.957497 6.957497 9756 +programmingprocess 1 1 6.957497 6.957497 9757 +givn 1 1 6.957497 6.957497 9758 +ofpolya 1 1 6.957497 6.957497 9759 +tryingto 1 1 6.957497 6.957497 9760 +thepolya 1 1 6.957497 6.957497 9761 +conal 1 1 6.957497 6.957497 9762 +mannion 1 1 6.957497 6.957497 9763 +ofus 1 1 6.957497 6.957497 9764 +discussingproblem 1 1 6.957497 6.957497 9765 +ssymbol 1 1 6.957497 6.957497 9766 +computingsoftwar 1 1 6.957497 6.957497 9767 +isbuild 1 1 6.957497 6.957497 9768 +presseditor 1 1 6.957497 6.957497 9769 +pressgener 1 1 6.957497 6.957497 9770 +licsprogram 1 1 6.957497 6.957497 9771 +jumelageprogram 1 1 6.957497 6.957497 9772 +softwarerefere 1 1 6.957497 6.957497 9773 +nserc 1 1 6.957497 6.957497 9774 +scienceunivers 1 1 6.957497 6.957497 9775 +committeecomput 1 1 6.957497 6.957497 9776 +committeeprovost 1 1 6.957497 6.957497 9777 +mathematicslecturesform 1 1 6.957497 6.957497 9778 +inria 1 1 6.957497 6.957497 9779 +bengurion 1 1 6.957497 6.957497 9780 +sheva 1 1 6.957497 6.957497 9781 +aviv 1 1 6.957497 6.957497 9782 +metaprogram 1 1 6.957497 6.957497 9783 +engineeringworkshop 1 1 6.957497 6.957497 9784 +publicationsform 1 1 6.957497 6.957497 9785 +tendenc 1 1 6.957497 6.957497 9786 +verju 1 1 6.957497 6.957497 9787 +metalevel 1 1 6.957497 6.957497 9788 +broi 1 1 6.957497 6.957497 9789 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html new file mode 100644 index 00000000..f1f8e0e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aflorenc^home.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +analysi 1 124 2.079442 2.079442 98 +find 1 111 2.197225 2.197225 111 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +upson 1 71 2.639057 2.639057 218 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +numer 1 49 3.044522 3.044522 369 +correct 1 38 3.295837 3.295837 462 +adam 2 17 4.110874 8.221748 934 +sept 1 17 4.110874 4.110874 952 +incomput 1 14 4.317488 4.317488 1096 +athlet 1 7 5.010635 5.010635 1933 +atcornel 1 6 5.164786 5.164786 2131 +florenc 2 2 6.263398 12.526796 4950 +aflorenc 1 1 6.957497 6.957497 9790 +professionalinterest 1 1 6.957497 6.957497 9791 +academicsresearchworkinterest 1 1 6.957497 6.957497 9792 +mewith 1 1 6.957497 6.957497 9793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html new file mode 100644 index 00000000..9ee252db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aguilera^home.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +cornel 4 215 1.386294 5.545176 23 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +check 1 115 2.197225 2.197225 118 +second 1 81 2.484907 2.484907 166 +solv 1 73 2.639057 2.639057 234 +ithaca 1 65 2.772589 2.772589 294 +suggest 1 53 2.944439 2.944439 331 +approach 1 48 3.044522 3.044522 366 +detect 1 26 3.688879 3.688879 646 +failur 1 18 4.060443 4.060443 898 +hybrid 1 15 4.248495 4.248495 1057 +warn 1 14 4.317488 4.317488 1068 +tour 1 11 4.553877 4.553877 1307 +perman 1 11 4.553877 4.553877 1372 +marco 3 4 5.568345 16.705035 2589 +aguilera 6 2 6.263398 37.580388 4052 +kawazo 4 1 6.957497 27.829988 9794 +algorithmsrandom 1 1 6.957497 6.957497 9795 +consensusgo 1 1 6.957497 6.957497 9796 +brazil 1 1 6.957497 6.957497 9797 +constructionmarco 1 1 6.957497 6.957497 9798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html new file mode 100644 index 00000000..4b67ddf3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahong^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +cornel 2 215 1.386294 2.772588 23 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +requir 1 81 2.484907 2.484907 167 +stuff 1 87 2.484907 2.484907 171 +upson 1 71 2.639057 2.639057 218 +new 2 64 2.772589 5.545178 262 +ithaca 1 65 2.772589 2.772589 294 +get 1 46 3.091042 3.091042 380 +chines 2 29 3.583519 7.167038 595 +annot 1 21 3.912023 3.912023 775 +taiwan 1 16 4.174387 4.174387 1006 +hong 2 14 4.317488 8.634976 1105 +essenti 1 13 4.382027 4.382027 1137 +usaoffic 1 6 5.164786 5.164786 2159 +corba 1 5 5.347108 5.347108 2320 +alfr 2 4 5.568345 11.136690 2882 +sinanet 1 4 5.568345 5.568345 2883 +worthwhil 1 2 6.263398 6.263398 4951 +dizzi 1 1 6.957497 6.957497 9799 +nandonet 1 1 6.957497 6.957497 9800 +sunworld 1 1 6.957497 6.957497 9801 +javaworldcours 1 1 6.957497 6.957497 9802 +bibliographyc 1 1 6.957497 6.957497 9803 +reportalfr 1 1 6.957497 6.957497 9804 +ahong 1 1 6.957497 6.957497 9805 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html new file mode 100644 index 00000000..587cbf4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ahuja^ahuja.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cornel 3 215 1.386294 4.158882 23 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +network 2 168 1.791759 3.583518 61 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +architectur 2 139 1.945910 3.891820 77 +perform 2 143 1.945910 3.891820 74 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +object 1 138 1.945910 1.945910 79 +high 3 130 2.079442 6.238326 101 +postscript 2 131 2.079442 4.158884 90 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +final 2 116 2.197225 4.394450 108 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +academ 1 82 2.484907 2.484907 178 +orient 1 80 2.564949 2.564949 205 +resum 1 79 2.564949 2.564949 217 +html 1 75 2.639057 2.639057 235 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +automata 1 13 4.382027 4.382027 1135 +vineet 1 8 4.875197 4.875197 1639 +capac 1 8 4.875197 4.875197 1740 +engg 1 4 5.568345 5.568345 2884 +ahuja 1 3 5.857933 5.857933 3494 +coursework 1 3 5.857933 5.857933 3588 +hasbrouck 1 2 6.263398 6.263398 4952 +pagevineet 1 1 6.957497 6.957497 9806 +ahujam 1 1 6.957497 6.957497 9807 +apt 1 1 6.957497 6.957497 9808 +reportfal 1 1 6.957497 6.957497 9809 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html new file mode 100644 index 00000000..5e99f4c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alan^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +pair 1 9 4.753590 4.753590 1503 +ching 2 1 6.957497 13.914994 9810 +chinglan 1 1 6.957497 6.957497 9811 +edumast 1 1 6.957497 6.957497 9812 +beau 1 1 6.957497 6.957497 9813 +seneca 1 1 6.957497 6.957497 9814 +examplegraph 1 1 6.957497 6.957497 9815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html new file mode 100644 index 00000000..251048cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^alexey^alexey.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cornel 7 215 1.386294 9.704058 23 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +distribut 2 162 1.791759 3.583518 51 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +report 3 131 2.079442 6.238326 92 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +technic 3 100 2.302585 6.907755 140 +commun 1 95 2.397895 2.397895 157 +search 1 95 2.397895 2.397895 155 +larg 1 82 2.484907 2.484907 168 +wide 1 84 2.484907 2.484907 185 +orient 1 80 2.564949 2.564949 205 +state 1 76 2.564949 2.564949 207 +new 2 64 2.772589 5.545178 262 +ithaca 1 65 2.772589 2.772589 294 +direct 1 57 2.890372 2.890372 316 +directori 1 45 3.135494 3.135494 396 +music 1 42 3.218876 3.218876 436 +staff 1 36 3.367296 3.367296 490 +within 1 33 3.433987 3.433987 525 +art 1 29 3.583519 3.583519 593 +weather 2 28 3.610918 7.221836 618 +scale 1 28 3.610918 3.610918 613 +seri 1 24 3.761200 3.761200 708 +lead 1 23 3.806662 3.806662 718 +lyco 1 19 4.007333 4.007333 871 +event 1 18 4.060443 4.060443 896 +condit 1 16 4.174387 4.174387 975 +brief 1 16 4.174387 4.174387 1001 +horu 1 14 4.317488 4.317488 1116 +replic 1 12 4.465908 4.465908 1231 +reness 1 11 4.553877 4.553877 1333 +birman 1 9 4.753590 4.753590 1531 +friedman 1 7 5.010635 5.010635 1886 +gopher 1 6 5.164786 5.164786 1982 +broadcast 1 5 5.347108 5.347108 2453 +vaysburd 1 4 5.568345 5.568345 2846 +cuinfo 1 4 5.568345 5.568345 2626 +androbbert 1 2 6.263398 6.263398 4953 +partition 1 2 6.263398 6.263398 4954 +lausann 1 2 6.263398 6.263398 4955 +hebrew 1 2 6.263398 6.263398 4913 +transi 1 2 6.263398 6.263398 4861 +alexei 2 1 6.957497 13.914994 9816 +pagealexei 1 1 6.957497 6.957497 9817 +vaysburdalexei 1 1 6.957497 6.957497 9818 +andobject 1 1 6.957497 6.957497 9819 +ecol 1 1 6.957497 6.957497 9820 +polytechniqu 1 1 6.957497 6.957497 9821 +federal 1 1 6.957497 6.957497 9822 +cornellcornel 1 1 6.957497 6.957497 9823 +directorycornel 1 1 6.957497 6.957497 9824 +directorycours 1 1 6.957497 6.957497 9825 +examscornel 1 1 6.957497 6.957497 9826 +calendarcornel 1 1 6.957497 6.957497 9827 +musicbailei 1 1 6.957497 6.957497 9828 +concertscornel 1 1 6.957497 6.957497 9829 +ithacaworld 1 1 6.957497 6.957497 9830 +odessa 1 1 6.957497 6.957497 9831 +odessaweb 1 1 6.957497 6.957497 9832 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html new file mode 100644 index 00000000..07c063f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^amith^yam.html @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +us 3 329 1.098612 3.295836 16 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +cours 2 273 1.098612 2.197224 15 +softwar 2 220 1.386294 2.772588 30 +cornel 1 215 1.386294 1.386294 23 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +includ 2 208 1.609438 3.218876 42 +data 3 170 1.791759 5.375277 49 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +look 1 107 2.197225 2.197225 115 +memori 2 101 2.302585 4.605170 139 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +graphic 4 90 2.397895 9.591580 147 +imag 3 91 2.397895 7.193685 161 +present 1 91 2.397895 2.397895 145 +environ 2 84 2.484907 4.969814 177 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +second 1 81 2.484907 2.484907 166 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +involv 1 71 2.639057 2.639057 227 +simul 5 66 2.708050 13.540250 255 +view 1 70 2.708050 2.708050 254 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +descript 1 64 2.772589 2.772589 271 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +written 1 63 2.772589 2.772589 278 +content 1 59 2.833213 2.833213 302 +processor 1 54 2.944439 2.944439 335 +sampl 1 53 2.944439 2.944439 339 +give 1 50 3.044522 3.044522 359 +california 1 46 3.091042 3.091042 388 +done 1 47 3.091042 3.091042 381 +better 2 45 3.135494 6.270988 401 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +might 1 41 3.218876 3.218876 426 +past 1 42 3.218876 3.218876 428 +futur 1 41 3.218876 3.218876 427 +game 2 36 3.367296 6.734592 498 +ofth 1 36 3.367296 3.367296 491 +kind 1 32 3.465736 3.465736 541 +propos 1 28 3.610918 3.610918 602 +hope 1 28 3.610918 3.610918 610 +though 1 27 3.637586 3.637586 622 +bookmark 1 26 3.688879 3.688879 639 +trace 1 25 3.737670 3.737670 677 +pattern 2 24 3.761200 7.522400 689 +store 1 24 3.761200 3.761200 693 +compress 5 23 3.806662 19.033310 719 +chip 5 21 3.912023 19.560115 770 +watch 1 21 3.912023 3.912023 789 +vlsi 1 21 3.912023 3.912023 795 +mpeg 1 20 3.951244 3.951244 831 +speed 1 18 4.060443 4.060443 911 +render 1 17 4.110874 4.110874 947 +track 1 15 4.248495 4.248495 1029 +stream 1 15 4.248495 4.248495 1015 +rate 1 15 4.248495 4.248495 1037 +transit 1 15 4.248495 4.248495 1046 +train 3 14 4.317488 12.952464 1066 +station 1 13 4.382027 4.382027 1157 +suit 1 13 4.382027 4.382027 1129 +avenu 2 12 4.465908 8.931816 1277 +realiti 1 12 4.465908 4.465908 1272 +death 1 10 4.653960 4.653960 1457 +earth 1 10 4.653960 4.653960 1463 +mountain 1 10 4.653960 4.653960 1456 +santa 1 10 4.653960 4.653960 1441 +wall 1 9 4.753590 4.753590 1553 +occur 1 9 4.753590 4.753590 1572 +routin 1 9 4.753590 4.753590 1549 +screen 1 9 4.753590 4.753590 1577 +ride 3 8 4.875197 14.625591 1741 +switch 1 8 4.875197 4.875197 1718 +accord 1 7 5.010635 5.010635 1826 +monei 1 7 5.010635 5.010635 1934 +stereo 1 7 5.010635 5.010635 1818 +microsystem 2 6 5.164786 10.329572 2160 +railroad 1 6 5.164786 5.164786 2161 +silicon 1 6 5.164786 5.164786 2076 +extern 1 6 5.164786 5.164786 2105 +byte 1 6 5.164786 5.164786 2108 +snapshot 1 5 5.347108 5.347108 2303 +hell 2 4 5.568345 11.136690 2885 +mess 1 4 5.568345 5.568345 2886 +engg 1 4 5.568345 5.568345 2884 +heaven 2 3 5.857933 11.715866 3589 +landscap 2 3 5.857933 11.715866 3525 +hindu 1 3 5.857933 5.857933 3590 +xlib 1 3 5.857933 5.857933 3204 +fernandez 1 3 5.857933 5.857933 3591 +cupertino 2 2 6.263398 12.526796 4956 +javasoft 2 2 6.263398 12.526796 4220 +pyramania 2 2 6.263398 12.526796 4957 +mytholog 1 2 6.263398 6.263398 4869 +clara 1 2 6.263398 6.263398 4958 +amith 1 2 6.263398 6.263398 4053 +yamasani 1 2 6.263398 6.263398 4062 +cscomput 1 2 6.263398 6.263398 4195 +cave 1 2 6.263398 6.263398 4959 +softwarei 1 2 6.263398 6.263398 4960 +nano 1 2 6.263398 6.263398 4961 +snap 1 2 6.263398 6.263398 4962 +thed 1 2 6.263398 6.263398 4963 +pal 1 2 6.263398 6.263398 4964 +joselui 1 2 6.263398 6.263398 4965 +yama 3 1 6.957497 20.872491 9833 +coursesvlsi 2 1 6.957497 13.914994 9834 +downto 1 1 6.957497 6.957497 9835 +imparti 1 1 6.957497 6.957497 9836 +amithyamasanim 1 1 6.957497 6.957497 9837 +yorki 1 1 6.957497 6.957497 9838 +garcia 1 1 6.957497 6.957497 9839 +mailstop 1 1 6.957497 6.957497 9840 +ucup 1 1 6.957497 6.957497 9841 +warburton 1 1 6.957497 6.957497 9842 +comi 1 1 6.957497 6.957497 9843 +currentlyemploi 1 1 6.957497 6.957497 9844 +workingin 1 1 6.957497 6.957497 9845 +javamedia 1 1 6.957497 6.957497 9846 +groupeducationfal 1 1 6.957497 6.957497 9847 +cssoftwar 1 1 6.957497 6.957497 9848 +csspring 1 1 6.957497 6.957497 9849 +csproject 1 1 6.957497 6.957497 9850 +railroadsystem 1 1 6.957497 6.957497 9851 +onyx 1 1 6.957497 6.957497 9852 +openinventord 1 1 6.957497 6.957497 9853 +through 1 1 6.957497 6.957497 9854 +documentimag 1 1 6.957497 6.957497 9855 +chipdevelop 1 1 6.957497 6.957497 9856 +basicallycompress 1 1 6.957497 6.957497 9857 +thisalgorithm 1 1 6.957497 6.957497 9858 +cachecam 1 1 6.957497 6.957497 9859 +inputstream 1 1 6.957497 6.957497 9860 +capableof 1 1 6.957497 6.957497 9861 +nowai 1 1 6.957497 6.957497 9862 +rivlproposalpresentationand 1 1 6.957497 6.957497 9863 +dthi 1 1 6.957497 6.957497 9864 +itswritten 1 1 6.957497 6.957497 9865 +parallelomania 1 1 6.957497 6.957497 9866 +resumehtmlpostscript 1 1 6.957497 6.957497 9867 +satyaprasad 1 1 6.957497 6.957497 9868 +avinashgupta 1 1 6.957497 6.957497 9869 +kartikh 1 1 6.957497 6.957497 9870 +kapadia 1 1 6.957497 6.957497 9871 +hrishikeshdixit 1 1 6.957497 6.957497 9872 +vineetahuja 1 1 6.957497 6.957497 9873 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html new file mode 100644 index 00000000..8dd19262 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ankit^index.html @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +cours 2 273 1.098612 2.197224 15 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +cornel 2 215 1.386294 2.772588 23 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +homepag 1 93 2.397895 2.397895 148 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +real 1 93 2.397895 2.397895 144 +school 1 84 2.484907 2.484907 188 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +david 1 71 2.639057 2.639057 232 +multimedia 2 68 2.708050 5.416100 258 +order 1 69 2.708050 2.708050 249 +virtual 2 62 2.772589 5.545178 285 +street 1 63 2.772589 2.772589 293 +ithaca 1 65 2.772589 2.772589 294 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +summer 1 56 2.890372 2.890372 311 +understand 1 47 3.091042 3.091042 384 +video 1 44 3.135494 3.135494 405 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +industri 1 38 3.295837 3.295837 464 +bibliographi 1 34 3.401197 3.401197 518 +curriculum 1 33 3.433987 3.433987 535 +independ 1 32 3.465736 3.465736 548 +photo 1 31 3.496508 3.496508 561 +common 1 30 3.555348 3.555348 574 +platform 1 29 3.583519 3.583519 591 +limit 1 29 3.583519 3.583519 585 +request 1 26 3.688879 3.688879 635 +annot 1 21 3.912023 3.912023 775 +smith 1 20 3.951244 3.951244 820 +toolkit 1 20 3.951244 3.951244 835 +kernel 1 20 3.951244 3.951244 825 +mellon 1 13 4.382027 4.382027 1179 +realiti 2 12 4.465908 8.931816 1272 +carnegi 1 12 4.465908 4.465908 1260 +total 1 10 4.653960 4.653960 1398 +patel 2 6 5.164786 10.329572 2154 +causal 1 6 5.164786 5.164786 2024 +east 1 5 5.347108 5.347108 2472 +corba 1 5 5.347108 5.347108 2320 +dale 1 4 5.568345 5.568345 2687 +zeno 1 3 5.857933 5.857933 3580 +cheriton 1 3 5.857933 5.857933 3259 +ankit 2 2 6.263398 12.526796 4966 +endpoint 1 2 6.263398 6.263398 4967 +broker 1 2 6.263398 6.263398 4968 +critiqu 1 2 6.263398 6.263398 4328 +apatel 1 1 6.957497 6.957497 9874 +galleria 1 1 6.957497 6.957497 9875 +chronologia 1 1 6.957497 6.957497 9876 +universityresumedepart 1 1 6.957497 6.957497 9877 +enrolledgradu 1 1 6.957497 6.957497 9878 +canvasd 1 1 6.957497 6.957497 9879 +conferencingmultimedia 1 1 6.957497 6.957497 9880 +assignmentsproject 1 1 6.957497 6.957497 9881 +netan 1 1 6.957497 6.957497 9882 +skeen 1 1 6.957497 6.957497 9883 +scienceworld 1 1 6.957497 6.957497 9884 +multimediamaharaja 1 1 6.957497 6.957497 9885 +sayajirao 1 1 6.957497 6.957497 9886 +academicsfriend 1 1 6.957497 6.957497 9887 +techoreli 1 1 6.957497 6.957497 9888 +limitedjob 1 1 6.957497 6.957497 9889 +profilelif 1 1 6.957497 6.957497 9890 +relianc 1 1 6.957497 6.957497 9891 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html new file mode 100644 index 00000000..15886de1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ashish^ashish.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +languag 1 227 1.386294 1.386294 26 +databas 1 122 2.079442 2.079442 86 +softwareengin 1 6 5.164786 5.164786 2162 +ashish 1 5 5.347108 5.347108 2473 +sciencemast 1 2 6.263398 6.263398 4969 +jhaveriashish 1 1 6.957497 6.957497 9892 +jhaveridepart 1 1 6.957497 6.957497 9893 +engineeringresumehtmlpost 1 1 6.957497 6.957497 9894 +scriptcourseworkadvanc 1 1 6.957497 6.957497 9895 +systemscsmultimedia 1 1 6.957497 6.957497 9896 +systemscsengin 1 1 6.957497 6.957497 9897 +networkscsprogram 1 1 6.957497 6.957497 9898 +csashish 1 1 6.957497 6.957497 9899 +jhaveri 1 1 6.957497 6.957497 9900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html new file mode 100644 index 00000000..ef2eb95d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^aswin^aswin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +student 2 343 1.098612 2.197224 19 +cornel 3 215 1.386294 4.158882 23 +graduat 2 215 1.386294 2.772588 31 +languag 1 227 1.386294 1.386294 26 +phone 2 175 1.791759 3.583518 45 +hall 1 146 1.945910 1.945910 65 +person 2 111 2.197225 4.394450 117 +pictur 1 89 2.397895 2.397895 160 +server 1 76 2.564949 2.564949 204 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +guid 1 63 2.772589 2.772589 267 +life 1 50 3.044522 3.044522 375 +eduoffic 1 33 3.433987 3.433987 531 +transform 2 32 3.465736 6.931472 542 +famili 1 23 3.806662 3.806662 735 +record 1 18 4.060443 4.060443 890 +drive 1 15 4.248495 4.248495 1052 +club 1 15 4.248495 4.248495 1058 +danc 3 12 4.465908 13.397724 1278 +jean 1 10 4.653960 4.653960 1440 +atcornel 1 6 5.164786 5.164786 2131 +swing 4 4 5.568345 22.273380 2887 +album 1 4 5.568345 5.568345 2888 +dutch 1 3 5.857933 5.857933 3592 +berg 2 2 6.263398 12.526796 4970 +aswin 4 1 6.957497 27.829988 9901 +skyacr 1 1 6.957497 6.957497 9902 +systemmi 1 1 6.957497 6.957497 9903 +annek 1 1 6.957497 6.957497 9904 +deejay 1 1 6.957497 6.957497 9905 +isdn 1 1 6.957497 6.957497 9906 +hop 1 1 6.957497 6.957497 9907 +nederlands 1 1 6.957497 6.957497 9908 +clubi 1 1 6.957497 6.957497 9909 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html new file mode 100644 index 00000000..aca04b0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^avinash^avinash.html @@ -0,0 +1,123 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +system 6 443 0.693147 4.158882 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +oper 3 180 1.609438 4.828314 34 +distribut 2 162 1.791759 3.583518 51 +network 2 168 1.791759 3.583518 61 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +like 2 132 1.945910 3.891820 81 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +make 1 111 2.197225 2.197225 120 +user 3 104 2.302585 6.907755 137 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +sinc 1 90 2.397895 2.397895 159 +requir 2 81 2.484907 4.969814 167 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +interfac 2 79 2.564949 5.129898 209 +appli 1 71 2.639057 2.639057 226 +multimedia 2 68 2.708050 5.416100 258 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +virtual 1 62 2.772589 2.772589 285 +guid 1 63 2.772589 2.772589 267 +point 4 58 2.890372 11.561488 319 +space 2 57 2.890372 5.780744 310 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +still 1 50 3.044522 3.044522 362 +friend 1 48 3.044522 3.044522 376 +featur 1 46 3.091042 3.091042 386 +effect 1 46 3.091042 3.091042 385 +even 1 45 3.135494 3.135494 393 +protocol 1 45 3.135494 3.135494 407 +multipl 1 39 3.258097 3.258097 453 +mean 1 37 3.332205 3.332205 477 +game 4 36 3.367296 13.469184 498 +next 2 34 3.401197 6.802394 517 +either 1 35 3.401197 3.401197 506 +everi 1 34 3.401197 3.401197 519 +board 1 33 3.433987 3.433987 528 +abl 1 30 3.555348 3.555348 566 +full 1 28 3.610918 3.610918 615 +subject 2 26 3.688879 7.377758 647 +universityithaca 1 24 3.761200 3.761200 710 +magazin 1 24 3.761200 3.761200 704 +reach 1 24 3.761200 3.761200 688 +brows 1 23 3.806662 3.806662 726 +sciencecornel 1 22 3.850148 3.850148 768 +almost 1 22 3.850148 3.850148 742 +chip 2 21 3.912023 7.824046 770 +five 4 19 4.007333 16.029332 841 +sign 1 16 4.174387 4.174387 970 +piec 5 15 4.248495 21.242475 1020 +transit 1 15 4.248495 4.248495 1046 +stream 1 15 4.248495 4.248495 1015 +hierarch 1 15 4.248495 4.248495 1018 +scene 1 14 4.317488 4.317488 1114 +skill 1 12 4.465908 4.465908 1205 +player 2 11 4.553877 9.107754 1371 +earth 1 10 4.653960 4.653960 1463 +pair 1 9 4.753590 4.753590 1503 +rivl 1 8 4.875197 4.875197 1632 +pronounc 1 7 5.010635 5.010635 1918 +earn 1 7 5.010635 5.010635 1788 +hoca 2 5 5.347108 10.694216 2241 +hypothet 1 5 5.347108 5.347108 2474 +vertic 1 5 5.347108 5.347108 2270 +guestbook 1 5 5.347108 5.347108 2475 +engg 1 4 5.568345 5.568345 2884 +multitask 1 4 5.568345 5.568345 2803 +screenshot 1 4 5.568345 5.568345 2743 +avinash 1 3 5.857933 5.857933 3510 +win 1 3 5.857933 5.857933 3593 +ipng 2 2 6.263398 12.526796 4727 +resumemi 1 2 6.263398 6.263398 4971 +hodja 1 2 6.263398 6.263398 4972 +fledg 1 2 6.263398 6.263398 4973 +horizont 1 2 6.263398 6.263398 4117 +diagon 1 2 6.263398 6.263398 4974 +caveat 1 2 6.263398 6.263398 4975 +trap 3 1 6.957497 20.872491 9910 +welcomeavinash 1 1 6.957497 6.957497 9911 +guptam 1 1 6.957497 6.957497 9912 +streetcambridg 1 1 6.957497 6.957497 9913 +thecia 1 1 6.957497 6.957497 9914 +presentationpent 1 1 6.957497 6.957497 9915 +skillpent 1 1 6.957497 6.957497 9916 +oppon 1 1 6.957497 6.957497 9917 +gamedownload 1 1 6.957497 6.957497 9918 +ipvimpl 1 1 6.957497 6.957497 9919 +proposalprogress 1 1 6.957497 6.957497 9920 +reportsam 1 1 6.957497 6.957497 9921 +pageon 1 1 6.957497 6.957497 9922 +internethytelnetth 1 1 6.957497 6.957497 9923 +catalogeinet 1 1 6.957497 6.957497 9924 +galaxyplanet 1 1 6.957497 6.957497 9925 +pagejoel 1 1 6.957497 6.957497 9926 +indexyahoo 1 1 6.957497 6.957497 9927 +wwwwebcrawlerlycosmi 1 1 6.957497 6.957497 9928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html new file mode 100644 index 00000000..9707ce53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^barber^Welcome.html @@ -0,0 +1,258 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +work 4 380 0.693147 2.772588 9 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +cours 3 273 1.098612 3.295836 15 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +engin 2 297 1.098612 2.197224 20 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +also 5 259 1.386294 6.931470 28 +cornel 4 215 1.386294 5.545176 23 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +list 3 201 1.609438 4.828314 39 +paper 2 205 1.609438 3.218876 38 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +implement 3 152 1.791759 5.375277 52 +develop 2 174 1.791759 3.583518 53 +parallel 2 169 1.791759 3.583518 60 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +perform 2 143 1.945910 3.891820 74 +like 2 132 1.945910 3.891820 81 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +high 4 130 2.079442 8.317768 101 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +final 1 116 2.197225 2.197225 108 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +peopl 7 96 2.302585 16.118095 132 +need 2 98 2.302585 4.605170 135 +take 1 97 2.302585 2.302585 134 +call 3 91 2.397895 7.193685 153 +graphic 2 90 2.397895 4.795790 147 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +chang 1 82 2.484907 2.484907 163 +school 1 84 2.484907 2.484907 188 +good 3 77 2.564949 7.694847 200 +master 2 76 2.564949 5.129898 216 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +upson 2 71 2.639057 5.278114 218 +write 2 72 2.639057 5.278114 222 +name 1 72 2.639057 2.639057 220 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +would 1 67 2.708050 2.708050 251 +window 1 68 2.708050 2.708050 242 +prof 1 64 2.772589 2.772589 273 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +previou 1 62 2.772589 2.772589 290 +locat 2 59 2.833213 5.666426 303 +plai 1 60 2.833213 2.833213 307 +share 1 59 2.833213 2.833213 304 +room 1 59 2.833213 2.833213 301 +summer 3 56 2.890372 8.671116 311 +semest 2 58 2.890372 5.780744 312 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +digit 1 52 2.995732 2.995732 348 +friend 2 48 3.044522 6.089044 376 +cool 1 49 3.044522 3.044522 374 +still 1 50 3.044522 3.044522 362 +electron 1 47 3.091042 3.091042 379 +favorit 3 44 3.135494 9.406482 410 +protocol 2 45 3.135494 6.270988 407 +video 2 44 3.135494 6.270988 405 +keep 2 44 3.135494 6.270988 409 +algebra 1 45 3.135494 3.135494 394 +futur 3 41 3.218876 9.656628 427 +past 2 42 3.218876 6.437752 428 +linear 1 41 3.218876 3.218876 431 +live 4 40 3.258097 13.032388 451 +continu 1 39 3.258097 3.258097 448 +form 1 39 3.258097 3.258097 443 +realli 1 40 3.258097 3.258097 444 +hand 1 37 3.332205 3.332205 475 +feel 1 37 3.332205 3.332205 483 +soon 1 36 3.367296 3.367296 494 +least 1 35 3.401197 3.401197 516 +product 2 33 3.433987 6.867974 527 +taught 1 33 3.433987 3.433987 526 +kind 1 32 3.465736 3.465736 541 +concept 1 32 3.465736 3.465736 537 +independ 1 32 3.465736 3.465736 548 +taken 2 31 3.496508 6.993016 555 +computersci 1 30 3.555348 3.555348 562 +hard 1 30 3.555348 3.555348 563 +depend 1 29 3.583519 3.583519 583 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +framework 1 28 3.610918 3.610918 606 +actual 1 28 3.610918 3.610918 604 +campu 1 27 3.637586 3.637586 623 +session 1 26 3.688879 3.688879 643 +spent 1 25 3.737670 3.737670 676 +fundament 1 25 3.737670 3.737670 661 +greg 1 24 3.761200 3.761200 695 +frame 1 24 3.761200 3.761200 684 +brows 1 23 3.806662 3.806662 726 +finish 3 22 3.850148 11.550444 748 +dai 1 22 3.850148 3.850148 753 +hous 2 21 3.912023 7.824046 801 +half 1 21 3.912023 3.912023 776 +mpeg 1 20 3.951244 3.951244 831 +nice 1 20 3.951244 3.951244 809 +media 1 19 4.007333 4.007333 861 +offici 1 18 4.060443 4.060443 894 +coupl 1 17 4.110874 4.110874 939 +thought 1 17 4.110874 4.110874 945 +otherwis 1 17 4.110874 4.110874 922 +georg 2 16 4.174387 8.348774 994 +intel 1 16 4.174387 4.174387 1000 +goe 2 15 4.248495 8.496990 1044 +fortran 1 15 4.248495 4.248495 1027 +stream 1 15 4.248495 4.248495 1015 +trip 1 14 4.317488 4.317488 1113 +hopefulli 1 14 4.317488 4.317488 1071 +camera 1 14 4.317488 4.317488 1115 +decid 1 14 4.317488 4.317488 1075 +dave 1 14 4.317488 4.317488 1098 +jonathan 3 13 4.382027 13.146081 1174 +believ 1 13 4.382027 4.382027 1187 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +resolut 1 13 4.382027 4.382027 1172 +unfortun 1 13 4.382027 4.382027 1170 +pretti 1 13 4.382027 4.382027 1191 +went 1 12 4.465908 4.465908 1279 +pageif 1 12 4.465908 4.465908 1275 +lake 1 11 4.553877 4.553877 1373 +bandwidth 1 11 4.553877 4.553877 1365 +smart 1 11 4.553877 4.553877 1352 +perman 1 11 4.553877 4.553877 1372 +road 1 11 4.553877 4.553877 1374 +sentenc 1 10 4.653960 4.653960 1413 +town 1 10 4.653960 4.653960 1458 +packet 1 10 4.653960 4.653960 1415 +bring 1 10 4.653960 4.653960 1430 +forc 1 10 4.653960 4.653960 1384 +undergrad 1 9 4.753590 4.753590 1589 +jersei 1 9 4.753590 4.753590 1587 +introductori 1 9 4.753590 4.753590 1479 +trust 1 9 4.753590 4.753590 1583 +grew 1 8 4.875197 4.875197 1742 +mile 1 8 4.875197 4.875197 1743 +filter 1 8 4.875197 4.875197 1641 +rivl 1 8 4.875197 4.875197 1632 +encrypt 2 7 5.010635 10.021270 1835 +cornellunivers 1 7 5.010635 5.010635 1916 +portland 1 7 5.010635 5.010635 1878 +conveni 1 6 5.164786 5.164786 2088 +corp 1 6 5.164786 5.164786 2139 +lucki 1 6 5.164786 5.164786 2163 +oregon 2 5 5.347108 10.694216 2437 +ahead 1 5 5.347108 5.347108 2338 +clarif 1 5 5.347108 5.347108 2253 +fork 1 4 5.568345 5.568345 2801 +skin 1 4 5.568345 5.568345 2840 +cheap 1 4 5.568345 5.568345 2751 +ultra 1 4 5.568345 5.568345 2889 +height 1 4 5.568345 5.568345 2890 +gear 1 4 5.568345 5.568345 2891 +birth 1 3 5.857933 5.857933 3594 +greek 1 3 5.857933 5.857933 3595 +labor 1 3 5.857933 5.857933 3195 +weber 1 3 5.857933 5.857933 3156 +bright 1 3 5.857933 5.857933 3596 +pack 1 3 5.857933 5.857933 3597 +urg 1 3 5.857933 5.857933 3212 +sugata 2 2 6.263398 12.526796 4976 +dude 1 2 6.263398 6.263398 4977 +felt 1 2 6.263398 6.263398 4978 +fratern 1 2 6.263398 6.263398 4979 +border 1 2 6.263398 6.263398 4980 +mukhopadhyai 1 2 6.263398 6.263398 4981 +surfer 1 2 6.263398 6.263398 4982 +captain 1 2 6.263398 6.263398 4983 +barber 3 1 6.957497 20.872491 9929 +bulli 3 1 6.957497 20.872491 9930 +ponch 2 1 6.957497 13.914994 9931 +inde 2 1 6.957497 13.914994 9932 +bush 2 1 6.957497 13.914994 9933 +fleshpooooooooooooooch 1 1 6.957497 6.957497 9934 +inclin 1 1 6.957497 6.957497 9935 +callm 1 1 6.957497 6.957497 9936 +orpooch 1 1 6.957497 6.957497 9937 +guppi 1 1 6.957497 6.957497 9938 +mama 1 1 6.957497 6.957497 9939 +phin 1 1 6.957497 6.957497 9940 +attendedmontgomeri 1 1 6.957497 6.957497 9941 +collegetown 1 1 6.957497 6.957497 9942 +adjac 1 1 6.957497 6.957497 9943 +sublet 1 1 6.957497 6.957497 9944 +oncolleg 1 1 6.957497 6.957497 9945 +radiu 1 1 6.957497 6.957497 9946 +epsilon 1 1 6.957497 6.957497 9947 +cayuga 1 1 6.957497 6.957497 9948 +thefilt 1 1 6.957497 6.957497 9949 +ofc 1 1 6.957497 6.957497 9950 +intereststhi 1 1 6.957497 6.957497 9951 +rivlan 1 1 6.957497 6.957497 9952 +tracker 1 1 6.957497 6.957497 9953 +rivli 1 1 6.957497 6.957497 9954 +smpd 1 1 6.957497 6.957497 9955 +generatorfor 1 1 6.957497 6.957497 9956 +webar 1 1 6.957497 6.957497 9957 +buddi 1 1 6.957497 6.957497 9958 +resourceful 1 1 6.957497 6.957497 9959 +pipe 1 1 6.957497 6.957497 9960 +meanth 1 1 6.957497 6.957497 9961 +comrad 1 1 6.957497 6.957497 9962 +ofhi 1 1 6.957497 6.957497 9963 +swirl 1 1 6.957497 6.957497 9964 +nefari 1 1 6.957497 6.957497 9965 +toilet 1 1 6.957497 6.957497 9966 +mukhopadyai 1 1 6.957497 6.957497 9967 +bonei 1 1 6.957497 6.957497 9968 +magoo 1 1 6.957497 6.957497 9969 +fletop 1 1 6.957497 6.957497 9970 +bigro 1 1 6.957497 6.957497 9971 +koster 1 1 6.957497 6.957497 9972 +bot 1 1 6.957497 6.957497 9973 +tffl 1 1 6.957497 6.957497 9974 +pageuuencod 1 1 6.957497 6.957497 9975 +pagetar 1 1 6.957497 6.957497 9976 +zip 1 1 6.957497 6.957497 9977 +downloadsgraphicsbarb 1 1 6.957497 6.957497 9978 +gifponch 1 1 6.957497 6.957497 9979 +htmlres_htmlres_curemmittemmitt 1 1 6.957497 6.957497 9980 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html new file mode 100644 index 00000000..95be207b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^basu^home.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 4 380 0.693147 2.772588 9 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +us 2 329 1.098612 2.197224 16 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +cornel 5 215 1.386294 6.931470 23 +also 3 259 1.386294 4.158882 28 +graduat 2 215 1.386294 2.772588 31 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +parallel 3 169 1.791759 5.375277 60 +phone 2 175 1.791759 3.583518 45 +develop 2 174 1.791759 3.583518 53 +implement 2 152 1.791759 3.583518 52 +network 2 168 1.791759 3.583518 61 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +perform 4 143 1.945910 7.783640 74 +like 3 132 1.945910 5.837730 81 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +look 1 107 2.197225 2.197225 115 +final 1 116 2.197225 2.197225 108 +user 1 104 2.302585 2.302585 137 +commun 5 95 2.397895 11.989475 157 +homepag 2 93 2.397895 4.795790 148 +proceed 2 93 2.397895 4.795790 152 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +activ 4 84 2.484907 9.939628 182 +stuff 2 87 2.484907 4.969814 171 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +messag 5 76 2.564949 12.824745 212 +complet 2 77 2.564949 5.129898 208 +appear 2 78 2.564949 5.129898 210 +good 1 77 2.564949 2.564949 200 +state 1 76 2.564949 2.564949 207 +interfac 1 79 2.564949 2.564949 209 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +addit 1 74 2.639057 2.639057 228 +david 1 71 2.639057 2.639057 232 +would 2 67 2.708050 5.416100 251 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +ithaca 2 65 2.772589 5.545178 294 +locat 1 59 2.833213 2.833213 303 +back 1 60 2.833213 2.833213 297 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +run 1 51 2.995732 2.995732 347 +cool 1 49 3.044522 3.044522 374 +archiv 1 49 3.044522 3.044522 364 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +fast 2 42 3.218876 6.437752 429 +review 1 42 3.218876 3.218876 425 +realli 2 40 3.258097 6.516194 444 +live 1 40 3.258097 3.258097 451 +especi 1 36 3.367296 3.367296 496 +singl 1 34 3.401197 3.401197 510 +collabor 1 32 3.465736 3.465736 543 +someth 1 31 3.496508 3.496508 554 +turn 1 29 3.583519 3.583519 586 +cluster 4 28 3.610918 14.443672 612 +hope 1 28 3.610918 3.610918 610 +team 1 27 3.637586 3.637586 625 +compar 1 26 3.688879 3.688879 648 +berkelei 1 26 3.688879 3.688879 657 +enabl 1 26 3.688879 3.688879 655 +known 1 24 3.761200 3.761200 702 +size 1 23 3.806662 3.806662 713 +indian 1 22 3.850148 3.850148 769 +love 2 21 3.912023 7.824046 804 +listen 1 18 4.060443 4.060443 907 +layer 2 17 4.110874 8.221748 926 +segment 1 17 4.110874 4.110874 931 +interconnect 1 17 4.110874 4.110874 937 +latenc 2 16 4.174387 8.348774 993 +photograph 2 15 4.248495 8.496990 1056 +micro 1 15 4.248495 4.248495 1031 +split 1 14 4.317488 4.317488 1078 +thorsten 4 13 4.382027 17.528108 1133 +eicken 4 13 4.382027 17.528108 1134 +avenu 1 12 4.465908 4.465908 1277 +went 1 12 4.465908 4.465908 1279 +philadelphia 1 12 4.465908 4.465908 1244 +scienceat 1 11 4.553877 4.553877 1375 +see 1 11 4.553877 4.553877 1337 +motiv 1 11 4.553877 4.553877 1346 +cook 1 10 4.653960 4.653960 1464 +werner 1 10 4.653960 4.653960 1385 +sosp 1 10 4.653960 4.653960 1416 +calvin 1 9 4.753590 4.753590 1518 +trust 1 9 4.753590 4.753590 1583 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +vineet 2 8 4.875197 9.750394 1639 +kanpur 1 8 4.875197 4.875197 1744 +realiz 1 8 4.875197 4.875197 1739 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +vogel 1 8 4.875197 4.875197 1622 +centuri 1 7 5.010635 5.010635 1935 +happen 1 7 5.010635 5.010635 1790 +rock 1 6 5.164786 5.164786 2164 +dream 1 6 5.164786 5.164786 2165 +tri 1 6 5.164786 5.164786 2166 +south 1 6 5.164786 5.164786 2167 +goldstein 1 6 5.164786 5.164786 2168 +buch 2 5 5.347108 10.694216 2272 +truli 1 5 5.347108 5.347108 2476 +aim 1 5 5.347108 5.347108 2477 +culler 1 5 5.347108 5.347108 2381 +symp 1 5 5.347108 5.347108 2376 +australia 1 5 5.347108 5.347108 2478 +basu 3 4 5.568345 16.705035 2843 +thecornel 1 4 5.568345 5.568345 2892 +hobb 1 4 5.568345 5.568345 2893 +writer 1 4 5.568345 5.568345 2783 +cuinfo 1 4 5.568345 5.568345 2626 +ofworkst 1 4 5.568345 5.568345 2679 +withth 1 4 5.568345 5.568345 2805 +anindya 7 3 5.857933 41.005531 3535 +experienc 1 3 5.857933 5.857933 3203 +asian 1 3 5.857933 5.857933 3598 +mpp 1 3 5.857933 5.857933 3194 +schauser 1 3 5.857933 5.857933 3599 +avula 1 3 5.857933 5.857933 3600 +mugshot 1 2 6.263398 6.263398 4984 +goof 1 2 6.263398 6.263398 4985 +projectwith 1 2 6.263398 6.263398 4986 +thegreat 1 2 6.263398 6.263398 4987 +pelham 1 2 6.263398 6.263398 4988 +grenvil 1 2 6.263398 6.263398 4989 +wodehous 1 2 6.263398 6.263398 4990 +metallica 1 2 6.263398 6.263398 4991 +fanci 1 2 6.263398 6.263398 4992 +monti 1 2 6.263398 6.263398 4993 +python 1 2 6.263398 6.263398 4994 +beavi 1 2 6.263398 6.263398 4995 +meiko 1 2 6.263398 6.263398 4996 +untrust 1 2 6.263398 6.263398 4997 +seth 1 2 6.263398 6.263398 4998 +klau 1 2 6.263398 6.263398 4999 +veena 1 2 6.263398 6.263398 5000 +homepagelast 1 2 6.263398 6.263398 5001 +delawar 1 1 6.957497 6.957497 9981 +eduwhat 1 1 6.957497 6.957497 9982 +musicor 1 1 6.957497 6.957497 9983 +coollik 1 1 6.957497 6.957497 9984 +indiawho 1 1 6.957497 6.957497 9985 +hardpink 1 1 6.957497 6.957497 9986 +floydfanat 1 1 6.957497 6.957497 9987 +childhood 1 1 6.957497 6.957497 9988 +livelast 1 1 6.957497 6.957497 9989 +plum 1 1 6.957497 6.957497 9990 +unwash 1 1 6.957497 6.957497 9991 +attendedwoodstock 1 1 6.957497 6.957497 9992 +onlinewoodstock 1 1 6.957497 6.957497 9993 +woodstock 1 1 6.957497 6.957497 9994 +thesocc 1 1 6.957497 6.957497 9995 +worldcup 1 1 6.957497 6.957497 9996 +butunfortun 1 1 6.957497 6.957497 9997 +putsomerecip 1 1 6.957497 6.957497 9998 +connoisseurof 1 1 6.957497 6.957497 9999 +whiski 1 1 6.957497 6.957497 10000 +malt 1 1 6.957497 6.957497 10001 +cheer 1 1 6.957497 6.957497 10002 +buttheadoth 1 1 6.957497 6.957497 10003 +gopherand 1 1 6.957497 6.957497 10004 +projectwhich 1 1 6.957497 6.957497 10005 +acheiv 1 1 6.957497 6.957497 10006 +passinglay 1 1 6.957497 6.957497 10007 +thatshow 1 1 6.957497 6.957497 10008 +saturateth 1 1 6.957497 6.957497 10009 +fibr 1 1 6.957497 6.957497 10010 +specificationfor 1 1 6.957497 6.957497 10011 +processesboth 1 1 6.957497 6.957497 10012 +abridgedvers 1 1 6.957497 6.957497 10013 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html new file mode 100644 index 00000000..c2236849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhao^bhao.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 5 215 1.386294 6.931470 23 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +area 2 144 1.945910 3.891820 80 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +studi 1 120 2.079442 2.079442 91 +check 3 115 2.197225 6.591675 118 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +internet 3 83 2.484907 7.454721 186 +librari 2 87 2.484907 4.969814 181 +info 2 85 2.484907 4.969814 176 +stuff 1 87 2.484907 2.484907 171 +upson 1 71 2.639057 2.639057 218 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +interact 2 62 2.772589 5.545178 270 +ithaca 1 65 2.772589 2.772589 294 +dept 1 64 2.772589 2.772589 291 +simpl 1 60 2.833213 2.833213 298 +overview 1 56 2.890372 2.890372 323 +processor 1 54 2.944439 2.944439 335 +local 1 55 2.944439 2.944439 334 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +life 1 50 3.044522 3.044522 375 +music 2 42 3.218876 6.437752 436 +movi 1 40 3.258097 3.258097 459 +map 1 39 3.258097 3.258097 452 +tutori 1 39 3.258097 3.258097 437 +seminar 1 38 3.295837 3.295837 470 +global 1 34 3.401197 3.401197 520 +richard 1 31 3.496508 3.496508 559 +weather 3 28 3.610918 10.832754 618 +magazin 1 24 3.761200 3.761200 704 +famili 1 23 3.806662 3.806662 735 +geometri 1 22 3.850148 3.850148 752 +navig 2 21 3.912023 7.824046 796 +synthesi 1 20 3.951244 3.951244 834 +georg 1 16 4.174387 4.174387 994 +stock 1 16 4.174387 4.174387 1007 +massiv 1 15 4.248495 4.248495 1026 +incomput 1 14 4.317488 4.317488 1096 +francisco 1 14 4.317488 4.317488 1095 +levi 1 14 4.317488 4.317488 1093 +forth 1 13 4.382027 4.382027 1186 +galleri 1 13 4.382027 4.382027 1192 +insid 1 12 4.465908 4.465908 1262 +newspap 1 12 4.465908 4.465908 1280 +neat 1 12 4.465908 4.465908 1263 +congress 1 9 4.753590 4.753590 1592 +forget 1 8 4.875197 4.875197 1712 +wire 1 8 4.875197 4.875197 1747 +microsystem 1 6 5.164786 5.164786 2160 +peek 1 6 5.164786 5.164786 2169 +frog 1 5 5.347108 5.347108 2479 +hallithaca 1 4 5.568345 5.568345 2894 +zippel 1 4 5.568345 5.568345 2879 +heard 1 4 5.568345 5.568345 2895 +aboutth 1 4 5.568345 5.568345 2720 +wander 1 4 5.568345 5.568345 2896 +educornel 1 3 5.857933 5.857933 3601 +universitydept 1 3 5.857933 5.857933 3602 +galaxi 1 3 5.857933 5.857933 3603 +underground 1 3 5.857933 5.857933 3604 +spider 1 3 5.857933 5.857933 3605 +intertext 1 2 6.263398 6.263398 5002 +solar 1 2 6.263398 6.263398 5003 +martial 1 2 6.263398 6.263398 5004 +jpop 3 1 6.957497 20.872491 10014 +homepageben 1 1 6.957497 6.957497 10015 +haogradu 1 1 6.957497 6.957497 10016 +studentbhao 1 1 6.957497 6.957497 10017 +flea 1 1 6.957497 6.957497 10018 +taylorwhen 1 1 6.957497 6.957497 10019 +itsgorg 1 1 6.957497 6.957497 10020 +cornellwhat 1 1 6.957497 6.957497 10021 +dissectionmagazin 1 1 6.957497 6.957497 10022 +magazinea 1 1 6.957497 6.957497 10023 +shoemak 1 1 6.957497 6.957497 10024 +weblouvr 1 1 6.957497 6.957497 10025 +xmorphia 1 1 6.957497 6.957497 10026 +kaleidospac 1 1 6.957497 6.957497 10027 +bonsai 1 1 6.957497 6.957497 10028 +seiyuu 1 1 6.957497 6.957497 10029 +archivenetwork 1 1 6.957497 6.957497 10030 +edgelibrari 1 1 6.957497 6.957497 10031 +infonih 1 1 6.957497 6.957497 10032 +courseth 1 1 6.957497 6.957497 10033 +guidecern 1 1 6.957497 6.957497 10034 +bhao 1 1 6.957497 6.957497 10035 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html new file mode 100644 index 00000000..a918f0e7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^bhardwaj^bhardwaj.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +network 1 168 1.791759 1.791759 61 +ithaca 1 65 2.772589 2.772589 294 +york 1 41 3.218876 3.218876 435 +apart 1 7 5.010635 5.010635 1936 +aastha 1 2 6.263398 6.263398 5005 +sciencemast 1 2 6.263398 6.263398 4969 +hasbrouck 1 2 6.263398 6.263398 4952 +pageaastha 1 1 6.957497 6.957497 10036 +bhardwajdepart 1 1 6.957497 6.957497 10037 +ofengineeeringresumehtmlpost 1 1 6.957497 6.957497 10038 +scriptcourseworkadvanceddatabas 1 1 6.957497 6.957497 10039 +csmultimediasystem 1 1 6.957497 6.957497 10040 +csengineeringcomput 1 1 6.957497 6.957497 10041 +cssoftwareengin 1 1 6.957497 6.957497 10042 +cscontact 1 1 6.957497 6.957497 10043 +bhardwaj 1 1 6.957497 6.957497 10044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html new file mode 100644 index 00000000..61b361c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^brd^brd.html @@ -0,0 +1,284 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +inform 6 412 0.693147 4.158882 8 +system 3 443 0.693147 2.079441 6 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +us 4 329 1.098612 4.394448 16 +student 2 343 1.098612 2.197224 19 +cornel 9 215 1.386294 12.476646 23 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +paper 7 205 1.609438 11.266066 38 +group 3 183 1.609438 4.828314 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +algorithm 7 162 1.791759 12.542313 57 +distribut 4 162 1.791759 7.167036 51 +parallel 3 169 1.791759 5.375277 60 +develop 2 174 1.791759 3.583518 53 +recent 2 167 1.791759 3.583518 58 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +click 5 142 1.945910 9.729550 78 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +confer 5 126 2.079442 10.397210 100 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +intern 9 108 2.197225 19.775025 128 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +part 5 98 2.302585 11.512925 129 +peopl 3 96 2.302585 6.907755 132 +book 2 99 2.302585 4.605170 131 +access 1 102 2.302585 2.302585 136 +pictur 5 89 2.397895 11.989475 160 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +search 1 95 2.397895 2.397895 155 +ieee 7 86 2.484907 17.394349 190 +journal 3 83 2.484907 7.454721 183 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +librari 1 87 2.484907 2.484907 181 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +april 2 77 2.564949 5.129898 196 +optim 2 79 2.564949 5.129898 197 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +workshop 4 71 2.639057 10.556228 239 +intellig 2 72 2.639057 5.278114 225 +nation 1 74 2.639057 2.639057 240 +write 1 72 2.639057 2.639057 222 +symposium 1 72 2.639057 2.639057 238 +onlin 1 75 2.639057 2.639057 223 +view 1 70 2.708050 2.708050 254 +plan 6 65 2.772589 16.635534 272 +laboratori 4 63 2.772589 11.090356 292 +foundat 3 62 2.772589 8.317767 286 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +ithaca 1 65 2.772589 2.772589 294 +plai 2 60 2.833213 5.666426 307 +automat 1 61 2.833213 2.833213 306 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +found 1 53 2.944439 2.944439 337 +februari 1 54 2.944439 2.944439 328 +without 1 50 3.044522 3.044522 370 +visual 1 48 3.044522 3.044522 372 +california 2 46 3.091042 6.182084 388 +could 1 46 3.091042 3.091042 383 +move 1 47 3.091042 3.091042 382 +video 2 44 3.135494 6.270988 405 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +around 1 43 3.178054 3.178054 415 +vision 7 41 3.218876 22.532132 430 +autom 3 41 3.218876 9.656628 434 +press 1 42 3.218876 3.218876 419 +programm 4 39 3.258097 13.032388 445 +small 2 39 3.258097 6.516194 447 +societi 1 40 3.258097 3.258097 456 +open 1 38 3.295837 3.295837 469 +field 3 37 3.332205 9.996615 482 +robot 30 36 3.367296 101.018880 497 +approxim 2 35 3.401197 6.802394 509 +singl 1 34 3.401197 3.401197 510 +post 1 35 3.401197 3.401197 505 +tech 1 35 3.401197 3.401197 515 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +built 1 29 3.583519 3.583519 592 +weather 1 28 3.610918 3.610918 618 +scale 1 28 3.610918 3.610918 613 +arrai 6 27 3.637586 21.825516 627 +manipul 6 27 3.637586 21.825516 624 +team 2 27 3.637586 7.275172 625 +bound 3 26 3.688879 11.066637 659 +proc 3 26 3.688879 11.066637 649 +experiment 2 26 3.688879 7.377758 645 +detect 1 26 3.688879 3.688879 646 +revis 1 26 3.688879 3.688879 640 +task 2 25 3.737670 7.475340 678 +motion 1 24 3.761200 3.761200 699 +sometim 1 24 3.761200 3.761200 696 +mobil 5 23 3.806662 19.033310 730 +famili 1 23 3.806662 3.806662 735 +lead 1 23 3.806662 3.806662 718 +cooper 1 22 3.850148 3.850148 757 +chip 2 21 3.912023 7.824046 770 +vlsi 1 21 3.912023 3.912023 795 +department 1 20 3.951244 3.951244 839 +mpeg 1 20 3.951244 3.951244 831 +scheme 1 20 3.951244 3.951244 818 +boston 1 19 4.007333 4.007333 862 +demo 2 18 4.060443 8.120886 888 +offici 1 18 4.060443 4.060443 894 +lower 1 18 4.060443 4.060443 886 +minim 1 18 4.060443 4.060443 887 +agent 1 18 4.060443 4.060443 910 +stanford 1 17 4.110874 4.110874 955 +vector 3 16 4.174387 12.523161 961 +diego 3 16 4.174387 12.523161 992 +explan 1 16 4.174387 4.174387 985 +micro 4 15 4.248495 16.993980 1031 +massiv 3 15 4.248495 12.745485 1026 +track 1 15 4.248495 4.248495 1029 +configur 1 15 4.248495 4.248495 1012 +researchmi 1 14 4.317488 4.317488 1119 +draft 1 14 4.317488 4.317488 1085 +train 1 14 4.317488 4.317488 1066 +anonym 1 14 4.317488 4.317488 1100 +cannot 2 13 4.382027 8.764054 1144 +jonathan 1 13 4.382027 4.382027 1174 +franc 3 12 4.465908 13.397724 1276 +bruce 1 12 4.465908 4.465908 1226 +walk 1 12 4.465908 4.465908 1281 +target 1 12 4.465908 4.465908 1282 +peter 1 11 4.553877 4.553877 1316 +donald 6 9 4.753590 28.521540 1510 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +face 1 9 4.753590 4.753590 1501 +classif 1 9 4.753590 4.753590 1586 +entitl 1 9 4.753590 4.753590 1490 +wilson 1 9 4.753590 4.753590 1536 +herefor 1 9 4.753590 4.753590 1483 +invari 5 8 4.875197 24.375985 1748 +autonom 1 8 4.875197 4.875197 1749 +sensor 3 7 5.010635 15.031905 1920 +morph 1 7 5.010635 5.010635 1937 +pittsburgh 1 7 5.010635 5.010635 1938 +beyond 1 7 5.010635 5.010635 1834 +huttenloch 2 6 5.164786 10.329572 1983 +silicon 1 6 5.164786 5.164786 2076 +lili 6 5 5.347108 32.082648 2240 +actuat 4 5 5.347108 21.388432 2442 +minneapoli 2 5 5.347108 10.694216 2480 +minnesota 2 5 5.347108 10.694216 2469 +eduph 1 5 5.347108 5.347108 2449 +upper 1 5 5.347108 5.347108 2481 +these 1 5 5.347108 5.347108 2482 +rotat 1 5 5.347108 5.347108 2295 +poem 1 5 5.347108 5.347108 2483 +clickher 1 5 5.347108 5.347108 2428 +randal 2 4 5.568345 11.136690 2776 +thecornel 2 4 5.568345 11.136690 2892 +chase 2 4 5.568345 11.136690 2897 +decoupl 1 4 5.568345 5.568345 2898 +chain 1 4 5.568345 5.568345 2712 +push 1 4 5.568345 5.568345 2635 +alfr 1 4 5.568345 5.568345 2882 +bhringer 5 3 5.857933 29.289665 3606 +jen 4 3 5.857933 23.431732 3378 +hereto 3 3 5.857933 17.573799 3476 +noel 2 3 5.857933 11.715866 3376 +fabric 2 3 5.857933 11.715866 3607 +algorithmica 2 3 5.857933 11.715866 3561 +artificialintellig 2 3 5.857933 11.715866 3608 +scream 1 3 5.857933 5.857933 3609 +microfabr 1 3 5.857933 5.857933 3610 +daniela 1 3 5.857933 5.857933 3611 +portrait 1 3 5.857933 5.857933 3491 +harm 1 3 5.857933 5.857933 3515 +macdonald 6 2 6.263398 37.580388 5006 +mem 3 2 6.263398 18.790194 5007 +brigg 2 2 6.263398 12.526796 5008 +ree 2 2 6.263398 12.526796 5009 +nanofabr 1 2 6.263398 6.263398 5010 +toconstruct 1 2 6.263398 6.263398 4858 +ofmobil 1 2 6.263398 6.263398 5011 +internationalworkshop 1 2 6.263398 6.263398 5012 +crystal 1 2 6.263398 6.263398 5013 +electro 1 2 6.263398 6.263398 5014 +reif 1 2 6.263398 6.263398 5015 +furnitur 1 2 6.263398 6.263398 5016 +actuatorarrai 1 2 6.263398 6.263398 5017 +mihailovich 1 2 6.263398 6.263398 5018 +automationnic 1 2 6.263398 6.263398 5019 +andj 1 2 6.263398 6.263398 5020 +latomb 1 2 6.263398 6.263398 5021 +doc 1 2 6.263398 6.263398 5022 +catalogc 1 2 6.263398 6.263398 5023 +apictur 1 2 6.263398 6.263398 5024 +drawn 1 2 6.263398 6.263398 4215 +swallow 1 2 6.263398 6.263398 5025 +tommi 6 1 6.957497 41.744982 10045 +feeder 3 1 6.957497 20.872491 10046 +vibratori 3 1 6.957497 20.872491 10047 +kinodynam 3 1 6.957497 20.872491 10048 +xavier 3 1 6.957497 20.872491 10049 +ourlab 2 1 6.957497 13.914994 10050 +toulous 2 1 6.957497 13.914994 10051 +icra 2 1 6.957497 13.914994 10052 +provablygood 2 1 6.957497 13.914994 10053 +couch 2 1 6.957497 13.914994 10054 +donaldbruc 1 1 6.957497 6.957497 10055 +donaldassoci 1 1 6.957497 6.957497 10056 +professorbrd 1 1 6.957497 6.957497 10057 +laboratorydan 1 1 6.957497 6.957497 10058 +microactu 1 1 6.957497 6.957497 10059 +arrayi 1 1 6.957497 6.957497 10060 +squarecentemet 1 1 6.957497 6.957497 10061 +sensoryfeedback 1 1 6.957497 6.957497 10062 +buildself 1 1 6.957497 6.957497 10063 +propel 1 1 6.957497 6.957497 10064 +amybrigg 1 1 6.957497 6.957497 10065 +surveil 1 1 6.957497 6.957497 10066 +andintercept 1 1 6.957497 6.957497 10067 +developedbi 1 1 6.957497 6.957497 10068 +informationalon 1 1 6.957497 6.957497 10069 +andlow 1 1 6.957497 6.957497 10070 +memsand 1 1 6.957497 6.957497 10071 +thealgorithm 1 1 6.957497 6.957497 10072 +robustgeometr 1 1 6.957497 6.957497 10073 +andimprov 1 1 6.957497 6.957497 10074 +partsfeed 1 1 6.957497 6.957497 10075 +partii 1 1 6.957497 6.957497 10076 +robotswith 1 1 6.957497 6.957497 10077 +forcartesian 1 1 6.957497 6.957497 10078 +canni 1 1 6.957497 6.957497 10079 +inpress 1 1 6.957497 6.957497 10080 +supermodular 1 1 6.957497 6.957497 10081 +andtheoret 1 1 6.957497 6.957497 10082 +jetai 1 1 6.957497 6.957497 10083 +firstquart 1 1 6.957497 6.957497 10084 +inminim 1 1 6.957497 6.957497 10085 +iser 1 1 6.957497 6.957497 10086 +automon 1 1 6.957497 6.957497 10087 +ofjapan 1 1 6.957497 6.957497 10088 +iro 1 1 6.957497 6.957497 10089 +sensorlessmanipul 1 1 6.957497 6.957497 10090 +andautom 1 1 6.957497 6.957497 10091 +ofrobot 1 1 6.957497 6.957497 10092 +otherpubl 1 1 6.957497 6.957497 10093 +dinesh 1 1 6.957497 6.957497 10094 +aval 1 1 6.957497 6.957497 10095 +indexobtain 1 1 6.957497 6.957497 10096 +paperscopi 1 1 6.957497 6.957497 10097 +teamof 1 1 6.957497 6.957497 10098 +movefurnitur 1 1 6.957497 6.957497 10099 +mobot 1 1 6.957497 6.957497 10100 +loretta 1 1 6.957497 6.957497 10101 +pompilio 1 1 6.957497 6.957497 10102 +discoverychannel 1 1 6.957497 6.957497 10103 +funa 1 1 6.957497 6.957497 10104 +moreoth 1 1 6.957497 6.957497 10105 +tallest 1 1 6.957497 6.957497 10106 +darkest 1 1 6.957497 6.957497 10107 +hollywood 1 1 6.957497 6.957497 10108 +merian 1 1 6.957497 6.957497 10109 +wrai 1 1 6.957497 6.957497 10110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html new file mode 100644 index 00000000..b39101b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^cardie^cardie.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 6 431 0.693147 4.158882 10 +system 6 443 0.693147 4.158882 6 +work 4 380 0.693147 2.772588 9 +inform 3 412 0.693147 2.079441 8 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +us 6 329 1.098612 6.591672 16 +project 4 340 1.098612 4.394448 18 +current 1 284 1.098612 1.098612 21 +languag 10 227 1.386294 13.862940 26 +cornel 3 215 1.386294 4.158882 23 +link 2 247 1.386294 2.772588 24 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +group 2 183 1.609438 3.218876 36 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +base 6 165 1.791759 10.750554 50 +develop 2 174 1.791759 3.583518 53 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +process 4 142 1.945910 7.783640 72 +note 3 142 1.945910 5.837730 67 +area 2 144 1.945910 3.891820 80 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +lectur 1 135 1.945910 1.945910 73 +file 1 132 1.945910 1.945910 70 +machin 7 129 2.079442 14.556094 95 +confer 7 126 2.079442 14.556094 100 +analysi 4 124 2.079442 8.317768 98 +report 2 131 2.079442 4.158884 92 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +specif 3 106 2.197225 6.591675 106 +intern 2 108 2.197225 4.394450 128 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +techniqu 4 99 2.302585 9.210340 138 +part 2 98 2.302585 4.605170 129 +technic 2 100 2.302585 4.605170 140 +text 2 98 2.302585 4.605170 133 +user 1 104 2.302585 2.302585 137 +proceed 8 93 2.397895 19.183160 152 +select 2 91 2.397895 4.795790 154 +associ 2 93 2.397895 4.795790 151 +present 1 91 2.397895 2.397895 145 +learn 13 86 2.484907 32.303791 170 +contain 2 81 2.484907 4.969814 174 +build 1 85 2.484907 2.484907 184 +activ 1 84 2.484907 2.484907 182 +resourc 1 81 2.484907 2.484907 172 +method 1 80 2.564949 2.564949 213 +intellig 8 72 2.639057 21.112456 225 +nation 3 74 2.639057 7.917171 240 +workshop 2 71 2.639057 5.278114 239 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +addit 1 74 2.639057 2.639057 228 +knowledg 6 67 2.708050 16.248300 243 +goal 1 66 2.708050 2.708050 250 +artifici 7 63 2.772589 19.408123 280 +guid 2 63 2.772589 5.545178 267 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +foundat 1 62 2.772589 2.772589 286 +improv 1 62 2.772589 2.772589 289 +content 1 59 2.833213 2.833213 302 +thesi 4 57 2.890372 11.561488 327 +reason 1 57 2.890372 2.890372 318 +variou 1 56 2.890372 2.890372 317 +case 4 51 2.995732 11.982928 351 +finger 1 52 2.995732 2.995732 354 +investig 1 51 2.995732 2.995732 353 +approach 5 48 3.044522 15.222610 366 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +understand 3 47 3.091042 9.273126 384 +featur 3 46 3.091042 9.273126 386 +effect 1 46 3.091042 3.091042 385 +natur 8 44 3.135494 25.083952 406 +mechan 1 43 3.178054 3.178054 416 +offer 1 43 3.178054 3.178054 414 +press 7 42 3.218876 22.532132 419 +autom 1 41 3.218876 3.218876 434 +combin 1 42 3.218876 3.218876 421 +annual 4 40 3.258097 13.032388 458 +societi 2 40 3.258097 6.516194 456 +seminar 1 38 3.295837 3.295837 470 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +cost 1 37 3.332205 3.332205 480 +tree 1 36 3.367296 3.367296 492 +statist 1 35 3.401197 3.401197 521 +print 1 34 3.401197 3.401197 503 +within 1 33 3.433987 3.433987 525 +chapter 2 32 3.465736 6.931472 536 +extend 1 32 3.465736 3.465736 539 +domain 4 30 3.555348 14.221392 564 +focu 2 30 3.555348 7.110696 571 +compon 1 30 3.555348 3.555348 570 +focus 1 29 3.583519 3.583519 584 +semant 1 29 3.583519 3.583519 587 +framework 1 28 3.610918 3.610918 606 +symbol 1 27 3.637586 3.637586 620 +determin 1 27 3.637586 3.637586 630 +task 1 25 3.737670 3.737670 678 +handl 1 24 3.761200 3.761200 685 +brows 1 23 3.806662 3.806662 726 +decis 1 23 3.806662 3.806662 728 +springer 1 22 3.850148 3.850148 750 +stat 1 17 4.110874 4.110874 924 +analyz 1 17 4.110874 4.110874 925 +repositori 1 17 4.110874 4.110874 932 +cognit 4 16 4.174387 16.697548 986 +jose 2 16 4.174387 8.348774 976 +condit 1 16 4.174387 4.174387 975 +practicum 1 16 4.174387 4.174387 960 +cambridg 1 16 4.174387 4.174387 1008 +massachusett 3 14 4.317488 12.952464 1118 +train 1 14 4.317488 4.317488 1066 +embed 1 14 4.317488 4.317488 1102 +primarili 1 13 4.382027 4.382027 1185 +context 1 13 4.382027 4.382027 1153 +robust 1 12 4.465908 4.465908 1271 +speech 1 12 4.465908 4.465908 1222 +lake 1 11 4.553877 4.553877 1373 +tour 1 11 4.553877 4.553877 1307 +acquisit 5 10 4.653960 23.269800 1465 +sentenc 4 10 4.653960 18.615840 1413 +underli 1 10 4.653960 4.653960 1410 +prior 1 10 4.653960 4.653960 1438 +linguist 4 9 4.753590 19.014360 1593 +rel 3 9 4.753590 14.260770 1487 +softbal 1 9 4.753590 4.753590 1594 +conferenceon 1 9 4.753590 4.753590 1595 +introductori 1 9 4.753590 4.753590 1479 +morgan 1 9 4.753590 4.753590 1484 +aaai 5 8 4.875197 24.375985 1750 +extract 2 8 4.875197 9.750394 1728 +entri 1 8 4.875197 4.875197 1678 +span 1 8 4.875197 4.875197 1751 +empir 1 8 4.875197 4.875197 1722 +tag 2 7 5.010635 10.021270 1821 +larger 2 7 5.010635 10.021270 1875 +lawrenc 2 7 5.010635 10.021270 1908 +pennsylvania 1 7 5.010635 5.010635 1932 +machinelearn 1 6 5.164786 5.164786 2084 +heurist 1 6 5.164786 5.164786 2125 +syntax 1 6 5.164786 5.164786 2030 +amherst 3 5 5.347108 16.041324 2484 +connectionist 1 5 5.347108 5.347108 2430 +kaufmann 1 5 5.347108 5.347108 2254 +corpu 1 5 5.347108 5.347108 2282 +disambigu 2 4 5.568345 11.136690 2899 +clair 1 4 5.568345 5.568345 2605 +hallphon 1 4 5.568345 5.568345 2900 +compris 1 4 5.568345 5.568345 2862 +ijcai 1 4 5.568345 5.568345 2901 +complic 1 4 5.568345 5.568345 2902 +educlick 1 3 5.857933 5.857933 3612 +tosupport 1 3 5.857933 5.857933 3613 +teachingc 1 3 5.857933 5.857933 3614 +agener 1 3 5.857933 5.857933 3213 +conceptu 1 3 5.857933 5.857933 3214 +fourteenth 1 3 5.857933 5.857933 3615 +ninth 1 3 5.857933 5.857933 3616 +anaheim 1 3 5.857933 5.857933 3271 +citat 1 3 5.857933 5.857933 3617 +penn 1 3 5.857933 5.857933 3094 +corpora 2 2 6.263398 12.526796 4269 +interestscours 1 2 6.263398 6.263398 5026 +tandem 1 2 6.263398 6.263398 5027 +learningtechniqu 1 2 6.263398 6.263398 5028 +gabriel 1 2 6.263398 6.263398 5029 +jointconfer 1 2 6.263398 6.263398 5030 +eleventh 1 2 6.263398 6.263398 5031 +newark 1 2 6.263398 6.263398 5032 +bias 1 2 6.263398 6.263398 5033 +bloomington 1 2 6.263398 6.263398 5034 +twelfth 1 2 6.263398 6.263398 5035 +treebank 1 2 6.263398 6.263398 4138 +cardi 13 1 6.957497 90.447461 10111 +kenmor 3 1 6.957497 20.872491 10112 +pronoun 3 1 6.957497 20.872491 10113 +naturallanguag 2 1 6.957497 13.914994 10114 +knowledgeacquisit 2 1 6.957497 13.914994 10115 +riloff 2 1 6.957497 13.914994 10116 +tenth 2 1 6.957497 13.914994 10117 +erlbaumassoci 2 1 6.957497 13.914994 10118 +lehnert 2 1 6.957497 13.914994 10119 +cardieclair 1 1 6.957497 6.957497 10120 +teachselect 1 1 6.957497 6.957497 10121 +publicationsnlp 1 1 6.957497 6.957497 10122 +amalgam 1 1 6.957497 6.957497 10123 +westi 1 1 6.957497 6.957497 10124 +interestsalthough 1 1 6.957497 6.957497 10125 +subfield 1 1 6.957497 6.957497 10126 +cognitivemodel 1 1 6.957497 6.957497 10127 +forexplor 1 1 6.957497 6.957497 10128 +tworel 1 1 6.957497 6.957497 10129 +reliablyextract 1 1 6.957497 6.957497 10130 +cstr 1 1 6.957497 6.957497 10131 +kenmoreacquir 1 1 6.957497 6.957497 10132 +tworeal 1 1 6.957497 6.957497 10133 +andconcept 1 1 6.957497 6.957497 10134 +anteced 1 1 6.957497 6.957497 10135 +disambiguationtask 1 1 6.957497 6.957497 10136 +learningcompon 1 1 6.957497 6.957497 10137 +isembed 1 1 6.957497 6.957497 10138 +inartifici 1 1 6.957497 6.957497 10139 +understandingselect 1 1 6.957497 6.957497 10140 +publicationsautom 1 1 6.957497 6.957497 10141 +wermter 1 1 6.957497 6.957497 10142 +scheler 1 1 6.957497 6.957497 10143 +andsymbol 1 1 6.957497 6.957497 10144 +tolearn 1 1 6.957497 6.957497 10145 +conceptualsent 1 1 6.957497 6.957497 10146 +cmpsci 1 1 6.957497 6.957497 10147 +onconstrain 1 1 6.957497 6.957497 10148 +plausibl 1 1 6.957497 6.957497 10149 +linkscomput 1 1 6.957497 6.957497 10150 +linguistics 1 1 6.957497 6.957497 10151 +aclspeci 1 1 6.957497 6.957497 10152 +learningmachin 1 1 6.957497 6.957497 10153 +digestmachinelearn 1 1 6.957497 6.957497 10154 +researchersmachin 1 1 6.957497 6.957497 10155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html new file mode 100644 index 00000000..8fde4d47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chew^chew.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +engin 1 297 1.098612 1.098612 20 +gener 3 220 1.386294 4.158882 27 +cornel 2 215 1.386294 2.772588 23 +softwar 2 220 1.386294 2.772588 30 +applic 2 170 1.791759 3.583518 56 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +click 1 142 1.945910 1.945910 78 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +mathemat 3 108 2.197225 6.591675 123 +version 1 113 2.197225 2.197225 122 +make 1 111 2.197225 2.197225 120 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +level 2 87 2.484907 4.969814 180 +environ 1 84 2.484907 2.484907 177 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +java 1 70 2.708050 2.708050 248 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +creat 2 63 2.772589 5.545178 277 +plan 1 65 2.772589 2.772589 272 +ithaca 1 65 2.772589 2.772589 294 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +paul 2 38 3.295837 6.591674 471 +tech 1 35 3.401197 3.401197 515 +concept 1 32 3.465736 3.465736 537 +transform 1 32 3.465736 3.465736 542 +express 1 32 3.465736 3.465736 540 +specifi 1 30 3.555348 3.555348 568 +symbol 1 27 3.637586 3.637586 620 +primari 1 25 3.737670 3.737670 669 +motion 1 24 3.761200 3.761200 699 +equat 1 23 3.806662 3.806662 724 +emphasi 1 22 3.850148 3.850148 755 +thu 1 21 3.912023 3.912023 773 +applet 1 20 3.951244 3.951244 827 +geometr 3 19 4.007333 12.021999 852 +comparison 1 19 4.007333 4.007333 863 +senior 1 14 4.317488 4.317488 1120 +shape 1 12 4.465908 4.465908 1245 +mesh 2 11 4.553877 9.107754 1351 +sens 1 11 4.553877 4.553877 1305 +purdu 1 10 4.653960 4.653960 1466 +rhode 1 9 4.753590 4.753590 1579 +rais 1 8 4.875197 4.875197 1711 +canb 1 7 5.010635 5.010635 1846 +beta 1 6 5.164786 5.164786 1993 +compat 1 5 5.347108 5.347108 2485 +diagram 1 5 5.347108 5.347108 2346 +triangul 1 4 5.568345 5.568345 2903 +chew 3 3 5.857933 17.573799 3618 +delaunai 2 3 5.857933 11.715866 3619 +implicitli 1 3 5.857933 5.857933 3620 +voronoi 1 2 6.263398 6.263398 5036 +agenda 1 2 6.263398 6.263398 5037 +scientificsoftwar 1 2 6.263398 6.263398 5038 +acollect 1 2 6.263398 6.263398 5039 +associatephd 1 1 6.957497 6.957497 10156 +eduappletsy 1 1 6.957497 6.957497 10157 +asnetscap 1 1 6.957497 6.957497 10158 +avoronoi 1 1 6.957497 6.957497 10159 +onpract 1 1 6.957497 6.957497 10160 +includedplac 1 1 6.957497 6.957497 10161 +thataris 1 1 6.957497 6.957497 10162 +isspecifi 1 1 6.957497 6.957497 10163 +ofphys 1 1 6.957497 6.957497 10164 +techniquesar 1 1 6.957497 6.957497 10165 +effectiveprogram 1 1 6.957497 6.957497 10166 +myonlin 1 1 6.957497 6.957497 10167 +reportscornel 1 1 6.957497 6.957497 10168 +computerscienceth 1 1 6.957497 6.957497 10169 +simlabprojectaddress 1 1 6.957497 6.957497 10170 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html new file mode 100644 index 00000000..da10a8d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^chichao^chichao.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +student 2 343 1.098612 2.197224 19 +cornel 5 215 1.386294 6.931470 23 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +network 2 168 1.791759 3.583518 61 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +perform 1 143 1.945910 1.945910 74 +number 1 130 2.079442 2.079442 97 +compil 1 122 2.079442 2.079442 96 +world 1 115 2.197225 2.197225 126 +commun 1 95 2.397895 2.397895 157 +chang 3 82 2.484907 7.454721 163 +activ 2 84 2.484907 4.969814 182 +stuff 1 87 2.484907 2.484907 171 +messag 1 76 2.564949 2.564949 212 +server 1 76 2.564949 2.564949 204 +effici 1 73 2.639057 2.639057 233 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +guid 1 63 2.772589 2.772589 267 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +processor 1 54 2.944439 2.944439 335 +advisor 1 51 2.995732 2.995732 355 +cool 1 49 3.044522 3.044522 374 +multipl 1 39 3.258097 3.258097 453 +microsoft 1 38 3.295837 3.295837 468 +concurr 1 34 3.401197 3.401197 501 +toward 1 25 3.737670 3.737670 668 +sport 1 25 3.737670 3.737670 683 +latest 1 21 3.912023 3.912023 785 +runtim 1 19 4.007333 4.007333 858 +anyon 1 17 4.110874 4.110874 916 +latenc 1 16 4.174387 4.174387 993 +matlab 1 14 4.317488 4.317488 1081 +edui 1 13 4.382027 4.382027 1193 +eicken 1 13 4.382027 4.382027 1134 +composit 1 13 4.382027 4.382027 1150 +thedepart 1 11 4.553877 4.553877 1350 +scienceat 1 11 4.553877 4.553877 1375 +soccer 4 8 4.875197 19.500788 1752 +chao 3 8 4.875197 14.625591 1753 +risc 1 6 5.164786 5.164786 2016 +chess 1 5 5.347108 5.347108 2486 +andoper 1 3 5.857933 5.857933 3621 +messageslow 1 2 6.263398 6.263398 5040 +homepagelast 1 2 6.263398 6.263398 5001 +chichao 2 1 6.957497 13.914994 10171 +isthorsten 1 1 6.957497 6.957497 10172 +addressesand 1 1 6.957497 6.957497 10173 +overheterogen 1 1 6.957497 6.957497 10174 +tham 1 1 6.957497 6.957497 10175 +multimatlab 1 1 6.957497 6.957497 10176 +newsbraziliansocc 1 1 6.957497 6.957497 10177 +portugues 1 1 6.957497 6.957497 10178 +andhomepagesoliv 1 1 6.957497 6.957497 10179 +lubrasa 1 1 6.957497 6.957497 10180 +luso 1 1 6.957497 6.957497 10181 +brazilian 1 1 6.957497 6.957497 10182 +associationu 1 1 6.957497 6.957497 10183 +centerjorn 1 1 6.957497 6.957497 10184 +brasilmi 1 1 6.957497 6.957497 10185 +carstockmasterjayhawk 1 1 6.957497 6.957497 10186 +basketballwww 1 1 6.957497 6.957497 10187 +tennisserverback 1 1 6.957497 6.957497 10188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html new file mode 100644 index 00000000..0f990bf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ckliau^ckliau.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +graduat 2 215 1.386294 2.772588 31 +cornel 2 215 1.386294 2.772588 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +avail 1 169 1.791759 1.791759 48 +construct 1 139 1.945910 1.945910 82 +databas 1 122 2.079442 2.079442 86 +technolog 1 131 2.079442 2.079442 102 +find 1 111 2.197225 2.197225 111 +advanc 1 99 2.302585 2.302585 130 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +master 1 76 2.564949 2.564949 216 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +ithaca 2 65 2.772589 5.545178 294 +improv 1 62 2.772589 2.772589 289 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +programm 1 39 3.258097 3.258097 445 +small 1 39 3.258097 3.258097 447 +soon 1 36 3.367296 3.367296 494 +edulast 1 17 4.110874 4.110874 927 +countri 1 15 4.248495 4.248495 1059 +hopefulli 1 14 4.317488 4.317488 1071 +forth 1 13 4.382027 4.382027 1186 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +apolog 1 6 5.164786 5.164786 2046 +singapor 2 5 5.347108 10.694216 2487 +intelligencec 1 4 5.568345 5.568345 2673 +engineeringc 1 4 5.568345 5.568345 2904 +chee 2 3 5.857933 11.715866 3480 +tokyo 2 3 5.857933 11.715866 3622 +keong 2 1 6.957497 13.914994 10189 +liau 1 1 6.957497 6.957497 10190 +liauwelcom 1 1 6.957497 6.957497 10191 +networksc 1 1 6.957497 6.957497 10192 +systemsbaccalaur 1 1 6.957497 6.957497 10193 +japanhomei 1 1 6.957497 6.957497 10194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html new file mode 100644 index 00000000..1dc784c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^coleman^coleman.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 6 431 0.693147 4.158882 10 +interest 2 384 0.693147 1.386294 11 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +project 2 340 1.098612 2.197224 18 +student 2 343 1.098612 2.197224 19 +current 1 284 1.098612 1.098612 21 +cornel 5 215 1.386294 6.931470 23 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +problem 2 147 1.945910 3.891820 75 +hall 1 146 1.945910 1.945910 65 +tool 1 117 2.079442 2.079442 93 +mathemat 2 108 2.197225 4.394450 123 +theori 1 111 2.197225 2.197225 127 +final 1 116 2.197225 2.197225 108 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +center 2 88 2.397895 4.795790 158 +imag 2 91 2.397895 4.795790 161 +associ 1 93 2.397895 2.397895 151 +larg 2 82 2.484907 4.969814 168 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +activ 1 84 2.484907 2.484907 182 +optim 3 79 2.564949 7.694847 197 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +numer 1 49 3.044522 3.044522 369 +understand 1 47 3.091042 3.091042 384 +linear 2 41 3.218876 6.437752 431 +http 1 41 3.218876 3.218876 420 +york 1 41 3.218876 3.218876 435 +continu 2 39 3.258097 6.516194 448 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +scale 2 28 3.610918 7.221836 613 +constraint 1 26 3.688879 3.688879 636 +concern 1 25 3.737670 3.737670 666 +primari 1 25 3.737670 3.737670 669 +director 1 22 3.850148 3.850148 767 +thoma 2 18 4.060443 8.120886 901 +minim 1 18 4.060443 4.060443 887 +differenti 1 17 4.110874 4.110874 921 +former 1 17 4.110874 4.110874 956 +match 1 16 4.174387 4.174387 965 +nonlinear 1 14 4.317488 4.317488 1107 +affili 1 13 4.382027 4.382027 1194 +discret 1 13 4.382027 4.382027 1165 +equal 1 10 4.653960 4.653960 1424 +rhode 1 9 4.753590 4.753590 1579 +postdoc 2 8 4.875197 9.750394 1724 +strong 1 6 5.164786 5.164786 2029 +reconstruct 1 6 5.164786 5.164786 2170 +inequ 1 6 5.164786 5.164786 2113 +biomed 1 4 5.568345 5.568345 2905 +coleman 2 2 6.263398 12.526796 5041 +colemanthoma 1 1 6.957497 6.957497 10195 +colemancornel 1 1 6.957497 6.957497 10196 +universityi 1 1 6.957497 6.957497 10197 +professcp 1 1 6.957497 6.957497 10198 +ccop 1 1 6.957497 6.957497 10199 +broadfield 1 1 6.957497 6.957497 10200 +programmi 1 1 6.957497 6.957497 10201 +computationalmethod 1 1 6.957497 6.957497 10202 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html new file mode 100644 index 00000000..1bac5f01 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^crary^home.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 7 374 0.693147 4.852029 7 +interest 3 384 0.693147 2.079441 11 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +project 2 340 1.098612 2.197224 18 +languag 7 227 1.386294 9.704058 26 +cornel 4 215 1.386294 5.545176 23 +also 4 259 1.386294 5.545176 28 +design 1 213 1.386294 1.386294 25 +implement 2 152 1.791759 3.583518 52 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +compil 4 122 2.079442 8.317768 96 +report 1 131 2.079442 2.079442 92 +theori 6 111 2.197225 13.183350 127 +mathemat 2 108 2.197225 4.394450 123 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +resourc 1 81 2.484907 2.484907 172 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +name 1 72 2.639057 2.639057 220 +logic 1 71 2.639057 2.639057 230 +onlin 1 75 2.639057 2.639057 223 +view 2 70 2.708050 5.416100 254 +practic 1 70 2.708050 2.708050 246 +import 2 65 2.772589 5.545178 282 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +type 8 61 2.833213 22.665704 296 +maintain 1 51 2.995732 2.995732 342 +set 1 50 3.044522 3.044522 361 +standard 1 48 3.044522 3.044522 365 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +answer 1 45 3.135494 3.135494 391 +mark 1 44 3.135494 3.135494 403 +combin 1 42 3.218876 3.218876 421 +theoret 4 39 3.258097 13.032388 446 +map 1 39 3.258097 3.258097 452 +form 1 39 3.258097 3.258097 443 +origin 1 38 3.295837 3.295837 472 +close 1 38 3.295837 3.295837 465 +formal 1 37 3.332205 3.332205 478 +approxim 2 35 3.401197 6.802394 509 +committe 1 34 3.401197 3.401197 522 +eduoffic 1 33 3.433987 3.433987 531 +often 2 31 3.496508 6.993016 551 +semant 3 29 3.583519 10.750557 587 +mind 1 27 3.637586 3.637586 632 +consist 1 26 3.688879 3.688879 651 +primari 1 25 3.737670 3.737670 669 +strategi 1 25 3.737670 3.737670 682 +seri 1 24 3.761200 3.761200 708 +interpret 1 24 3.761200 3.761200 686 +greg 1 24 3.761200 3.761200 695 +emphasi 1 22 3.850148 3.850148 755 +programminglanguag 2 21 3.912023 7.824046 782 +love 2 21 3.912023 7.824046 804 +martin 1 21 3.912023 3.912023 794 +safeti 1 20 3.951244 3.951244 817 +grad 1 20 3.951244 3.951244 837 +particularli 1 19 4.007333 4.007333 867 +lower 1 18 4.060443 4.060443 886 +whole 1 17 4.110874 4.110874 940 +embed 1 14 4.317488 4.317488 1102 +command 1 14 4.317488 4.317488 1083 +translat 2 13 4.382027 8.764054 1164 +care 1 13 4.382027 4.382027 1177 +speak 1 12 4.465908 4.465908 1283 +calculu 1 12 4.465908 4.465908 1203 +israel 1 11 4.553877 4.553877 1366 +modular 1 10 4.653960 4.653960 1392 +relationship 1 10 4.653960 4.653960 1383 +correspond 1 10 4.653960 4.653960 1382 +guarante 1 10 4.653960 4.653960 1391 +nuprl 1 10 4.653960 4.653960 1402 +intermedi 3 9 4.753590 14.260770 1497 +andth 1 9 4.753590 4.753590 1481 +strength 1 9 4.753590 4.753590 1494 +formul 3 8 4.875197 14.625591 1733 +convers 1 8 4.875197 4.875197 1673 +paradigm 1 8 4.875197 4.875197 1662 +invari 1 8 4.875197 4.875197 1748 +leon 1 8 4.875197 4.875197 1631 +babylon 1 8 4.875197 4.875197 1731 +heart 1 8 4.875197 4.875197 1729 +pageth 1 7 5.010635 5.010635 1939 +hear 1 7 5.010635 5.010635 1940 +understood 1 5 5.347108 5.347108 2364 +stage 1 5 5.347108 5.347108 2488 +morrisett 1 5 5.347108 5.347108 2263 +lord 2 4 5.568345 11.136690 2906 +dexter 1 4 5.568345 5.568345 2855 +kozen 1 4 5.568345 5.568345 2619 +ofprogram 1 4 5.568345 5.568345 2624 +soul 1 4 5.568345 5.568345 2907 +karl 1 3 5.857933 5.857933 3623 +constabl 1 3 5.857933 5.857933 3186 +jesu 1 3 5.857933 5.857933 3624 +atyp 2 2 6.263398 12.526796 5042 +tractabl 2 2 6.263398 12.526796 4799 +pagekarl 1 2 6.263398 6.263398 5043 +halloffic 1 2 6.263398 6.263398 4583 +subtyp 1 2 6.263398 6.263398 4375 +intract 1 2 6.263398 6.263398 5044 +anapproxim 1 2 6.263398 6.263398 5045 +unavail 1 2 6.263398 6.263398 5046 +thenuprl 1 2 6.263398 6.263398 5047 +hereat 1 2 6.263398 6.263398 5048 +papersoth 1 2 6.263398 6.263398 5049 +lurker 1 2 6.263398 6.263398 5050 +andwith 1 2 6.263398 6.263398 5051 +thesecond 1 2 6.263398 6.263398 4128 +pagedepart 1 2 6.263398 6.263398 5052 +calculi 2 1 6.957497 13.914994 10203 +crari 1 1 6.957497 6.957497 10204 +crarycrari 1 1 6.957497 6.957497 10205 +researchbroadli 1 1 6.957497 6.957497 10206 +implementationand 1 1 6.957497 6.957497 10207 +kmlwhich 1 1 6.957497 6.957497 10208 +richworld 1 1 6.957497 6.957497 10209 +newprogram 1 1 6.957497 6.957497 10210 +aminterest 1 1 6.957497 6.957497 10211 +deepen 1 1 6.957497 6.957497 10212 +mitig 1 1 6.957497 6.957497 10213 +modelallow 1 1 6.957497 6.957497 10214 +allowsth 1 1 6.957497 6.957497 10215 +andcorrect 1 1 6.957497 6.957497 10216 +additionaloptim 1 1 6.957497 6.957497 10217 +automatedreason 1 1 6.957497 6.957497 10218 +ofrobert 1 1 6.957497 6.957497 10219 +jasonhickei 1 1 6.957497 6.957497 10220 +linksmark 1 1 6.957497 6.957497 10221 +cansearch 1 1 6.957497 6.957497 10222 +biblestudi 1 1 6.957497 6.957497 10223 +thelord 1 1 6.957497 6.957497 10224 +neighbor 1 1 6.957497 6.957497 10225 +commandmentgreat 1 1 6.957497 6.957497 10226 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html new file mode 100644 index 00000000..82532154 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^csun^sun.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 11 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 7 215 1.386294 9.704058 23 +softwar 2 220 1.386294 2.772588 30 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +parallel 15 169 1.791759 26.876385 60 +distribut 5 162 1.791759 8.958795 51 +algorithm 2 162 1.791759 3.583518 57 +data 1 170 1.791759 1.791759 49 +phone 1 175 1.791759 1.791759 45 +problem 8 147 1.945910 15.567280 75 +process 1 142 1.945910 1.945910 72 +confer 4 126 2.079442 8.317768 100 +report 2 131 2.079442 4.158884 92 +welcom 1 122 2.079442 2.079442 99 +theori 4 111 2.197225 8.788900 127 +mathemat 1 108 2.197225 2.197225 123 +memori 5 101 2.302585 11.512925 139 +advanc 2 99 2.302585 4.605170 130 +technic 2 100 2.302585 4.605170 140 +center 4 88 2.397895 9.591580 158 +proceed 3 93 2.397895 7.193685 152 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +select 1 91 2.397895 2.397895 154 +solut 8 82 2.484907 19.879256 162 +institut 2 84 2.484907 4.969814 187 +journal 2 83 2.484907 4.969814 183 +larg 2 82 2.484907 4.969814 168 +contain 1 81 2.484907 2.484907 174 +second 1 81 2.484907 2.484907 166 +optim 2 79 2.564949 5.129898 197 +state 1 76 2.564949 2.564949 207 +decemb 1 80 2.564949 2.564949 215 +septemb 1 65 2.772589 2.772589 274 +ithaca 1 65 2.772589 2.772589 294 +scientif 7 53 2.944439 20.611073 341 +februari 1 54 2.944439 2.944439 328 +numer 2 49 3.044522 6.089044 369 +algebra 1 45 3.135494 3.135494 394 +linear 8 41 3.218876 25.751008 431 +map 1 39 3.258097 3.258097 452 +close 1 38 3.295837 3.295837 465 +tree 2 36 3.367296 6.734592 492 +least 8 35 3.401197 27.209576 516 +posit 1 31 3.496508 3.496508 552 +multiprocessor 5 28 3.610918 18.054590 605 +packag 1 28 3.610918 3.610918 614 +scale 1 28 3.610918 3.610918 613 +bound 1 26 3.688879 3.688879 659 +deal 1 22 3.850148 3.850148 736 +siam 11 21 3.912023 43.032253 800 +definit 1 19 4.007333 4.007333 864 +matrix 3 17 4.110874 12.332622 933 +spars 16 16 4.174387 66.790192 989 +squar 8 14 4.317488 34.539904 1082 +rank 1 14 4.317488 4.317488 1063 +francisco 1 14 4.317488 4.317488 1095 +affili 1 13 4.382027 4.382027 1194 +philadelphia 4 12 4.465908 17.863632 1244 +matric 1 10 4.653960 4.653960 1399 +factor 4 9 4.753590 19.014360 1544 +conferenceon 1 9 4.753590 4.753590 1595 +simon 1 8 4.875197 4.875197 1697 +watson 1 8 4.875197 4.875197 1691 +univeristi 1 8 4.875197 4.875197 1754 +pennsylvania 1 7 5.010635 5.010635 1932 +cornellunivers 1 7 5.010635 5.010635 1916 +sixth 1 7 5.010635 5.010635 1917 +fifth 1 7 5.010635 5.010635 1931 +compact 1 7 5.010635 5.010635 1907 +dens 2 6 5.164786 10.329572 2122 +reed 1 6 5.164786 5.164786 2086 +row 2 5 5.347108 10.694216 2330 +seventh 2 5 5.347108 10.694216 2464 +orthogon 2 4 5.568345 11.136690 2832 +thecornel 1 4 5.568345 5.568345 2892 +symmetr 1 4 5.568345 5.568345 2908 +ctctr 2 3 5.857933 11.715866 3625 +parallelprocess 1 3 5.857933 5.857933 3626 +coleman 3 2 6.263398 18.790194 5041 +professorthoma 1 2 6.263398 6.263398 5053 +defici 1 2 6.263398 6.263398 5054 +idaho 1 2 6.263398 6.263398 5055 +ondistribut 1 2 6.263398 6.263398 4320 +solutionof 1 2 6.263398 6.263398 5056 +key 1 2 6.263398 6.263398 5057 +dongarra 1 2 6.263398 6.263398 5058 +kennedi 1 2 6.263398 6.263398 4539 +multifront 3 1 6.957497 20.872491 10227 +pothen 3 1 6.957497 20.872491 10228 +chunguang 2 1 6.957497 13.914994 10229 +processingfor 2 1 6.957497 13.914994 10230 +cliqu 2 1 6.957497 13.914994 10231 +sunchunguang 1 1 6.957497 6.957497 10232 +sunphd 1 1 6.957497 6.957497 10233 +ppcx 1 1 6.957497 6.957497 10234 +pssl 1 1 6.957497 6.957497 10235 +psspd 1 1 6.957497 6.957497 10236 +systemsrec 1 1 6.957497 6.957497 10237 +lecturesparallel 1 1 6.957497 6.957497 10238 +coeur 1 1 6.957497 6.957497 10239 +alen 1 1 6.957497 6.957497 10240 +bailei 1 1 6.957497 6.957497 10241 +bjorstad 1 1 6.957497 6.957497 10242 +gilbert 1 1 6.957497 6.957497 10243 +mascagni 1 1 6.957497 6.957497 10244 +schreiber 1 1 6.957497 6.957497 10245 +torczon 1 1 6.957497 6.957497 10246 +choleskyfactor 1 1 6.957497 6.957497 10247 +matriceson 1 1 6.957497 6.957497 10248 +sinovec 1 1 6.957497 6.957497 10249 +leuz 1 1 6.957497 6.957497 10250 +petzold 1 1 6.957497 6.957497 10251 +messina 1 1 6.957497 6.957497 10252 +sorensen 1 1 6.957497 6.957497 10253 +voigt 1 1 6.957497 6.957497 10254 +structuresin 1 1 6.957497 6.957497 10255 +csun 1 1 6.957497 6.957497 10256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html new file mode 100644 index 00000000..1d0736f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dcooper^dcooper.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 4 431 0.693147 2.772588 10 +inform 3 412 0.693147 2.079441 8 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +current 3 284 1.098612 3.295836 21 +us 3 329 1.098612 3.295836 16 +design 3 213 1.386294 4.158882 25 +cornel 2 215 1.386294 2.772588 23 +also 2 259 1.386294 2.772588 28 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +group 15 183 1.609438 24.141570 36 +public 1 202 1.609438 1.609438 43 +implement 5 152 1.791759 8.958795 52 +network 5 168 1.791759 8.958795 61 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +process 7 142 1.945910 13.621370 72 +architectur 7 139 1.945910 13.621370 77 +first 2 140 1.945910 3.891820 71 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +problem 1 147 1.945910 1.945910 75 +provid 2 121 2.079442 4.158884 94 +tool 1 117 2.079442 2.079442 93 +send 2 114 2.197225 4.394450 109 +make 1 111 2.197225 2.197225 120 +version 1 113 2.197225 2.197225 122 +user 4 104 2.302585 9.210340 137 +commun 2 95 2.397895 4.795790 157 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +proceed 1 93 2.397895 2.397895 152 +member 3 84 2.484907 7.454721 165 +solut 2 82 2.484907 4.969814 162 +ieee 1 86 2.484907 2.484907 190 +messag 5 76 2.564949 12.824745 212 +server 3 76 2.564949 7.694847 204 +exampl 1 77 2.564949 2.564949 195 +david 6 71 2.639057 15.834342 232 +servic 4 72 2.639057 10.556228 236 +involv 2 71 2.639057 5.278114 227 +upson 1 71 2.639057 2.639057 218 +addit 1 74 2.639057 2.639057 228 +symposium 1 72 2.639057 2.639057 238 +would 3 67 2.708050 8.124150 251 +order 2 69 2.708050 5.416100 249 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +august 1 66 2.708050 2.708050 257 +virtual 2 62 2.772589 5.545178 285 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +type 2 61 2.833213 5.666426 296 +locat 1 59 2.833213 2.833213 303 +thesi 2 57 2.890372 5.780744 327 +allow 3 53 2.944439 8.833317 333 +februari 1 54 2.944439 2.944439 328 +maintain 2 51 2.995732 5.991464 342 +set 2 50 3.044522 6.089044 361 +basic 1 50 3.044522 3.044522 360 +keep 1 44 3.135494 3.135494 409 +made 1 44 3.135494 3.135494 398 +protocol 1 45 3.135494 3.135494 407 +howev 3 41 3.218876 9.656628 422 +might 1 41 3.218876 3.218876 426 +join 3 39 3.258097 9.774291 457 +origin 5 38 3.295837 16.479185 472 +respons 1 37 3.332205 3.332205 476 +within 2 33 3.433987 6.867974 525 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +dissert 1 32 3.465736 3.465736 549 +secur 5 30 3.555348 17.776740 577 +semant 2 29 3.583519 7.167038 587 +limit 1 29 3.583519 3.583519 585 +propos 2 28 3.610918 7.221836 602 +static 1 27 3.637586 3.637586 619 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +client 3 25 3.737670 11.213010 679 +wish 2 24 3.761200 7.522400 692 +mike 1 24 3.761200 3.761200 703 +higher 1 24 3.761200 3.761200 690 +mobil 3 23 3.806662 11.419986 730 +cooper 5 22 3.850148 19.250740 757 +scheme 3 20 3.951244 11.853732 818 +accept 1 18 4.060443 4.060443 879 +along 1 18 4.060443 4.060443 878 +layer 1 17 4.110874 4.110874 926 +devic 3 16 4.174387 12.523161 1002 +permit 2 16 4.174387 8.348774 962 +overhead 1 15 4.248495 4.248495 1035 +horu 5 14 4.317488 21.587440 1116 +command 1 14 4.317488 4.317488 1083 +necessari 1 13 4.382027 4.382027 1147 +whose 1 13 4.382027 4.382027 1166 +carri 1 13 4.382027 4.382027 1152 +kenneth 2 12 4.465908 8.931816 1265 +arbitrari 1 11 4.553877 4.553877 1359 +ofcomput 1 10 4.653960 4.653960 1442 +trust 3 9 4.753590 14.260770 1583 +birman 3 9 4.753590 14.260770 1531 +desir 2 9 4.753590 9.507180 1542 +assumpt 1 9 4.753590 4.753590 1514 +screen 1 9 4.753590 4.753590 1577 +informationabout 1 9 4.753590 4.753590 1515 +crash 1 8 4.875197 4.875197 1616 +accomplish 1 8 4.875197 4.875197 1755 +synchroni 3 7 5.010635 15.031905 1923 +prevent 2 7 5.010635 10.021270 1827 +fromth 1 7 5.010635 5.010635 1802 +slightli 1 7 5.010635 5.010635 1795 +encrypt 1 7 5.010635 5.010635 1835 +privaci 5 6 5.164786 25.823930 2144 +extern 1 6 5.164786 5.164786 2105 +authent 2 5 5.347108 10.694216 2306 +trivial 1 4 5.568345 5.568345 2786 +witha 1 4 5.568345 5.568345 2617 +complic 1 4 5.568345 5.568345 2902 +ofinform 1 4 5.568345 5.568345 2707 +reveal 1 4 5.568345 5.568345 2647 +wireless 1 4 5.568345 5.568345 2693 +anthoni 1 4 5.568345 5.568345 2792 +privat 3 3 5.857933 17.573799 3496 +reiter 1 3 5.857933 5.857933 3461 +proper 1 3 5.857933 5.857933 3323 +recipi 1 3 5.857933 5.857933 3627 +owner 1 3 5.857933 5.857933 3531 +attack 1 3 5.857933 5.857933 3168 +preserv 1 3 5.857933 5.857933 3628 +mobilecomput 1 3 5.857933 5.857933 3629 +untrust 2 2 6.263398 12.526796 4997 +postdoctor 1 2 6.263398 6.263398 5059 +honest 1 2 6.263398 6.263398 5060 +ofvirtu 1 2 6.263398 6.263398 5061 +communicatewith 1 2 6.263398 6.263398 5062 +unlik 1 2 6.263398 6.263398 5063 +sender 1 2 6.263398 6.263398 5064 +adversari 1 2 6.263398 6.263398 5065 +ofmobil 1 2 6.263398 6.263398 5011 +securityand 1 2 6.263398 6.263398 5066 +relationshipsamong 2 1 6.957497 13.914994 10257 +dcooper 1 1 6.957497 6.957497 10258 +securityarchitectur 1 1 6.957497 6.957497 10259 +horuswhich 1 1 6.957497 6.957497 10260 +kerberosnetwork 1 1 6.957497 6.957497 10261 +cryptograph 1 1 6.957497 6.957497 10262 +toprovid 1 1 6.957497 6.957497 10263 +originalimplement 1 1 6.957497 6.957497 10264 +failuremodel 1 1 6.957497 6.957497 10265 +anyprocess 1 1 6.957497 6.957497 10266 +isposs 1 1 6.957497 6.957497 10267 +weaker 1 1 6.957497 6.957497 10268 +untrustedprocess 1 1 6.957497 6.957497 10269 +clientsto 1 1 6.957497 6.957497 10270 +horussecur 1 1 6.957497 6.957497 10271 +keymanag 1 1 6.957497 6.957497 10272 +impersonateanoth 1 1 6.957497 6.957497 10273 +achieveth 1 1 6.957497 6.957497 10274 +asclient 1 1 6.957497 6.957497 10275 +inherentin 1 1 6.957497 6.957497 10276 +contentsof 1 1 6.957497 6.957497 10277 +hiddenwith 1 1 6.957497 6.957497 10278 +outsidersfrom 1 1 6.957497 6.957497 10279 +maintainingth 1 1 6.957497 6.957497 10280 +unlink 1 1 6.957497 6.957497 10281 +chaum 1 1 6.957497 6.957497 10282 +severaloth 1 1 6.957497 6.957497 10283 +staticnetwork 1 1 6.957497 6.957497 10284 +mobilecommun 1 1 6.957497 6.957497 10285 +themessag 1 1 6.957497 6.957497 10286 +advisorken 1 1 6.957497 6.957497 10287 +internaland 1 1 6.957497 6.957497 10288 +apriv 1 1 6.957497 6.957497 10289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html new file mode 100644 index 00000000..5abb8c68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ddhung^index.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +project 3 340 1.098612 3.295836 18 +model 2 145 1.945910 3.891820 69 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +question 1 91 2.397895 2.397895 141 +mani 1 92 2.397895 2.397895 150 +resum 1 79 2.564949 2.564949 217 +would 1 67 2.708050 2.708050 251 +type 1 61 2.833213 2.833213 296 +space 1 57 2.890372 2.890372 310 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +hand 1 37 3.332205 3.332205 475 +field 1 37 3.332205 3.332205 482 +game 1 36 3.367296 3.367296 498 +word 1 34 3.401197 3.401197 508 +express 1 32 3.465736 3.465736 540 +fault 1 32 3.465736 3.465736 547 +human 1 32 3.465736 3.465736 546 +scientist 1 31 3.496508 3.496508 560 +travel 1 30 3.555348 3.555348 579 +hope 3 28 3.610918 10.832754 610 +releas 1 28 3.610918 3.610918 616 +mine 2 26 3.688879 7.377758 654 +never 1 25 3.737670 3.737670 671 +reach 1 24 3.761200 3.761200 688 +instead 2 22 3.850148 7.700296 756 +love 3 21 3.912023 11.736069 804 +born 1 21 3.912023 3.912023 798 +ever 3 19 4.007333 12.021999 872 +brief 1 16 4.174387 4.174387 1001 +role 1 14 4.317488 4.317488 1101 +sai 1 13 4.382027 4.382027 1175 +pretti 1 13 4.382027 4.382027 1191 +cannot 1 13 4.382027 4.382027 1144 +rest 1 12 4.465908 4.465908 1259 +surf 1 11 4.553877 4.553877 1301 +shore 1 11 4.553877 4.553877 1377 +night 1 11 4.553877 4.553877 1319 +road 1 11 4.553877 4.553877 1374 +true 1 10 4.653960 4.653960 1422 +poetri 1 9 4.753590 4.753590 1596 +told 1 8 4.875197 4.875197 1658 +heart 1 8 4.875197 4.875197 1729 +hold 1 8 4.875197 4.875197 1645 +prize 1 6 5.164786 5.164786 2150 +gentl 1 5 5.347108 5.347108 2264 +facial 1 5 5.347108 5.347108 2438 +unknown 1 5 5.347108 5.347108 2318 +favor 1 5 5.347108 5.347108 2414 +suffer 1 5 5.347108 5.347108 2268 +cyber 2 4 5.568345 11.136690 2909 +lawyer 1 4 5.568345 5.568345 2836 +uncertain 1 4 5.568345 5.568345 2758 +fals 1 4 5.568345 5.568345 2861 +dark 1 4 5.568345 5.568345 2910 +soul 1 4 5.568345 5.568345 2907 +fear 1 4 5.568345 5.568345 2911 +faith 2 3 5.857933 11.715866 3363 +dread 2 3 5.857933 11.715866 3630 +wise 1 3 5.857933 5.857933 3631 +romanc 1 3 5.857933 5.857933 3632 +passion 1 3 5.857933 5.857933 3633 +tortur 1 3 5.857933 5.857933 3634 +diseas 1 3 5.857933 5.857933 3635 +pain 1 3 5.857933 5.857933 3460 +blame 1 3 5.857933 5.857933 3636 +cold 1 3 5.857933 5.857933 3637 +burn 2 2 6.263398 12.526796 4447 +blink 1 2 6.263398 6.263398 5067 +ey 1 2 6.263398 6.263398 5068 +kei 1 2 6.263398 6.263398 4812 +mice 1 2 6.263398 6.263398 5069 +autobiographi 1 2 6.263398 6.263398 5070 +concret 1 2 6.263398 6.263398 4276 +ear 1 2 6.263398 6.263398 5071 +soft 1 2 6.263398 6.263398 5072 +belov 1 2 6.263398 6.263398 5073 +broken 1 2 6.263398 6.263398 5074 +horror 1 2 6.263398 6.263398 5075 +tear 1 2 6.263398 6.263398 5076 +deed 1 2 6.263398 6.263398 5077 +frozen 1 2 6.263398 6.263398 5078 +deidr 2 1 6.957497 13.914994 10290 +pandora 2 1 6.957497 13.914994 10291 +abodedan 1 1 6.957497 6.957497 10292 +abodegreet 1 1 6.957497 6.957497 10293 +humbl 1 1 6.957497 6.957497 10294 +prithe 1 1 6.957497 6.957497 10295 +teari 1 1 6.957497 6.957497 10296 +weari 1 1 6.957497 6.957497 10297 +thyselv 1 1 6.957497 6.957497 10298 +abod 1 1 6.957497 6.957497 10299 +emot 1 1 6.957497 6.957497 10300 +simnet 1 1 6.957497 6.957497 10301 +builder 1 1 6.957497 6.957497 10302 +faiththei 1 1 6.957497 6.957497 10303 +hardli 1 1 6.957497 6.957497 10304 +ferro 1 1 6.957497 6.957497 10305 +scorn 1 1 6.957497 6.957497 10306 +bend 1 1 6.957497 6.957497 10307 +tone 1 1 6.957497 6.957497 10308 +unseen 1 1 6.957497 6.957497 10309 +unheard 1 1 6.957497 6.957497 10310 +untouch 1 1 6.957497 6.957497 10311 +silenc 1 1 6.957497 6.957497 10312 +yearn 1 1 6.957497 6.957497 10313 +lordlovewarm 1 1 6.957497 6.957497 10314 +friendship 1 1 6.957497 6.957497 10315 +mindless 1 1 6.957497 6.957497 10316 +infatu 1 1 6.957497 6.957497 10317 +sensual 1 1 6.957497 6.957497 10318 +sigh 1 1 6.957497 6.957497 10319 +hopemyth 1 1 6.957497 6.957497 10320 +beauteou 1 1 6.957497 6.957497 10321 +demon 1 1 6.957497 6.957497 10322 +astrai 1 1 6.957497 6.957497 10323 +glimmer 1 1 6.957497 6.957497 10324 +tread 1 1 6.957497 6.957497 10325 +amidst 1 1 6.957497 6.957497 10326 +thorn 1 1 6.957497 6.957497 10327 +filthi 1 1 6.957497 6.957497 10328 +miseri 1 1 6.957497 6.957497 10329 +etern 1 1 6.957497 6.957497 10330 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html new file mode 100644 index 00000000..58c2039d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^dean.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +depart 1 457 0.693147 0.693147 12 +engin 2 297 1.098612 2.197224 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +cornel 3 215 1.386294 4.158882 23 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +network 2 168 1.791759 3.583518 61 +tool 3 117 2.079442 6.238326 93 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +site 1 106 2.197225 2.197225 119 +intern 1 108 2.197225 2.197225 128 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +search 4 95 2.397895 9.591580 155 +select 1 91 2.397895 2.397895 154 +imag 1 91 2.397895 2.397895 161 +librari 4 87 2.484907 9.939628 181 +institut 1 84 2.484907 2.484907 187 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +server 5 76 2.564949 12.824745 204 +refer 2 78 2.564949 5.129898 203 +collect 1 65 2.772589 2.772589 268 +ithaca 1 65 2.772589 2.772589 294 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +digit 4 52 2.995732 11.982928 348 +understand 1 47 3.091042 3.091042 384 +directori 1 45 3.135494 3.135494 396 +music 1 42 3.218876 3.218876 436 +global 1 34 3.401197 3.401197 520 +secur 1 30 3.555348 3.555348 577 +weather 2 28 3.610918 7.221836 618 +berkelei 1 26 3.688879 3.688879 657 +recognit 1 23 3.806662 3.806662 723 +togeth 1 23 3.806662 3.806662 714 +head 1 23 3.806662 3.806662 732 +siam 1 21 3.912023 3.912023 800 +navig 1 21 3.912023 3.912023 796 +lyco 1 19 4.007333 4.007333 871 +excel 1 19 4.007333 4.007333 868 +stanford 1 17 4.110874 4.110874 955 +whole 1 17 4.110874 4.110874 940 +charact 1 15 4.248495 4.248495 1028 +dean 1 14 4.317488 4.317488 1104 +anonym 1 14 4.317488 4.317488 1100 +audio 1 14 4.317488 4.317488 1094 +captur 1 12 4.465908 4.465908 1232 +michigan 1 11 4.553877 4.553877 1368 +earth 1 10 4.653960 4.653960 1463 +catalog 1 10 4.653960 4.653960 1431 +folk 1 9 4.753590 4.753590 1597 +illinoi 1 7 5.010635 5.010635 1941 +gatewai 1 7 5.010635 5.010635 1942 +scout 1 7 5.010635 5.010635 1903 +gopher 2 6 5.164786 10.329572 1982 +legal 1 6 5.164786 5.164786 2094 +forecast 1 6 5.164786 5.164786 2171 +elsewher 1 5 5.347108 5.347108 2444 +cuinfo 1 4 5.568345 5.568345 2626 +planet 1 4 5.568345 5.568345 2912 +gear 1 4 5.568345 5.568345 2891 +krafft 1 3 5.857933 5.857933 3638 +archi 1 3 5.857933 5.857933 3639 +cern 1 2 6.263398 6.263398 5079 +urlsdean 1 1 6.957497 6.957497 10331 +interestcornel 1 1 6.957497 6.957497 10332 +dimund 1 1 6.957497 6.957497 10333 +librarysearch 1 1 6.957497 6.957497 10334 +veronica 1 1 6.957497 6.957497 10335 +faqsvari 1 1 6.957497 6.957497 10336 +folkbook 1 1 6.957497 6.957497 10337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html new file mode 100644 index 00000000..c253b424 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dean^home.html @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 6 431 0.693147 4.158882 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +current 1 284 1.098612 1.098612 21 +cornel 5 215 1.386294 6.931470 23 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +avail 2 169 1.791759 3.583518 48 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +report 3 131 2.079442 6.238326 92 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +technic 2 100 2.302585 4.605170 140 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +select 1 91 2.397895 2.397895 154 +build 1 85 2.484907 2.484907 184 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +issu 1 78 2.564949 2.564949 211 +upson 1 71 2.639057 2.639057 218 +servic 1 72 2.639057 2.639057 236 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +materi 1 75 2.639057 2.639057 221 +investig 1 51 2.995732 2.995732 353 +protocol 1 45 3.135494 3.135494 407 +secur 1 30 3.555348 3.555348 577 +intend 1 28 3.610918 3.610918 599 +administr 1 27 3.637586 3.637586 628 +universityithaca 1 24 3.761200 3.761200 710 +initi 1 23 3.806662 3.806662 717 +director 1 22 3.850148 3.850148 767 +sciencecornel 1 22 3.850148 3.850148 768 +serv 1 22 3.850148 3.850148 758 +inth 1 22 3.850148 3.850148 741 +fund 1 21 3.912023 3.912023 805 +similar 1 21 3.912023 3.912023 771 +facil 2 20 3.951244 7.902488 814 +break 1 20 3.951244 3.951244 812 +five 1 19 4.007333 4.007333 841 +side 1 15 4.248495 4.248495 1022 +carl 1 15 4.248495 4.248495 1024 +dean 3 14 4.317488 12.952464 1104 +emploi 1 12 4.465908 4.465908 1284 +arpa 1 11 4.553877 4.553877 1369 +eight 1 11 4.553877 4.553877 1331 +consortium 2 10 4.653960 9.307920 1467 +princip 1 10 4.653960 4.653960 1397 +rapid 1 10 4.653960 4.653960 1453 +researchi 1 8 4.875197 4.875197 1756 +xerox 1 8 4.875197 4.875197 1725 +davi 1 7 5.010635 5.010635 1888 +sciencedepart 1 6 5.164786 5.164786 2172 +interestedin 1 5 5.347108 5.347108 2260 +employe 1 4 5.568345 5.568345 2717 +krafft 2 3 5.857933 11.715866 3638 +dienst 2 3 5.857933 11.715866 3640 +halldepart 1 3 5.857933 5.857933 3641 +dissemin 1 2 6.263398 6.263398 5080 +thedesign 1 2 6.263398 6.263398 4251 +lagoz 1 2 6.263398 6.263398 5081 +facilitiesaddress 1 1 6.957497 6.957497 10338 +guis 1 1 6.957497 6.957497 10339 +anadministr 1 1 6.957497 6.957497 10340 +andworri 1 1 6.957497 6.957497 10341 +spart 1 1 6.957497 6.957497 10342 +thecorpor 1 1 6.957497 6.957497 10343 +cnri 1 1 6.957497 6.957497 10344 +technicalresearch 1 1 6.957497 6.957497 10345 +theexist 1 1 6.957497 6.957497 10346 +disseminationov 1 1 6.957497 6.957497 10347 +atechn 1 1 6.957497 6.957497 10348 +ondienst 1 1 6.957497 6.957497 10349 +togethera 1 1 6.957497 6.957497 10350 +url 1 1 6.957497 6.957497 10351 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html new file mode 100644 index 00000000..9b678a5a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^divakar^divakar.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +cornel 3 215 1.386294 4.158882 23 +graduat 1 215 1.386294 1.386294 31 +address 1 170 1.791759 1.791759 62 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +analysi 2 124 2.079442 4.158884 98 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +good 1 77 2.564949 2.564949 200 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +numer 1 49 3.044522 3.044522 369 +advis 1 6 5.164786 5.164786 2173 +divakar 1 1 6.957497 6.957497 10352 +pagedivakar 1 1 6.957497 6.957497 10353 +viswanathdivakar 1 1 6.957497 6.957497 10354 +isnumer 1 1 6.957497 6.957497 10355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html new file mode 100644 index 00000000..6edb7572 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^diyu^y.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +cornel 3 215 1.386294 4.158882 23 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +databas 2 122 2.079442 4.158884 86 +compil 2 122 2.079442 4.158884 96 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +advanc 1 99 2.302585 2.302585 130 +institut 1 84 2.484907 2.484907 187 +know 1 80 2.564949 2.564949 198 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +receiv 2 66 2.708050 5.416100 244 +practic 1 70 2.708050 2.708050 246 +ithaca 2 65 2.772589 5.545178 294 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +locat 1 59 2.833213 2.833213 303 +friend 1 48 3.044522 3.044522 376 +physic 2 47 3.091042 6.182084 377 +york 1 41 3.218876 3.218876 435 +littl 1 39 3.258097 3.258097 454 +live 1 40 3.258097 3.258097 451 +china 2 37 3.332205 6.664410 487 +winter 1 36 3.367296 3.367296 500 +except 1 28 3.610918 3.610918 607 +campu 1 27 3.637586 3.637586 623 +beij 1 19 4.007333 4.007333 876 +miss 1 19 4.007333 4.007333 866 +beauti 1 18 4.060443 4.060443 912 +miller 1 17 4.110874 4.110874 949 +practicum 1 16 4.174387 4.174387 960 +mayb 1 15 4.248495 4.248495 1014 +anywai 1 15 4.248495 4.248495 1047 +translat 1 13 4.382027 4.382027 1164 +central 1 13 4.382027 4.382027 1160 +tsinghua 1 13 4.382027 4.382027 1195 +realiti 1 12 4.465908 4.465908 1272 +jersei 2 9 4.753590 9.507180 1587 +brought 1 7 5.010635 5.010635 1925 +railroad 1 6 5.164786 5.164786 2161 +coursesc 1 4 5.568345 5.568345 2692 +engineeringc 1 4 5.568345 5.568345 2904 +doubt 1 3 5.857933 5.857933 3119 +gorgeou 1 2 6.263398 6.263398 5082 +newark 1 2 6.263398 6.263398 5032 +diyu 2 1 6.957497 13.914994 10356 +pagediyu 1 1 6.957497 6.957497 10357 +daisi 1 1 6.957497 6.957497 10358 +translatorsfal 1 1 6.957497 6.957497 10359 +systemc 1 1 6.957497 6.957497 10360 +systemsel 1 1 6.957497 6.957497 10361 +telecommunicationsm 1 1 6.957497 6.957497 10362 +projectorigin 1 1 6.957497 6.957497 10363 +projectsinc 1 1 6.957497 6.957497 10364 +unviers 1 1 6.957497 6.957497 10365 +linksjava 1 1 6.957497 6.957497 10366 +tkfavorit 1 1 6.957497 6.957497 10367 +sitestimecnnlondon 1 1 6.957497 6.957497 10368 +timeswashington 1 1 6.957497 6.957497 10369 +postchines 1 1 6.957497 6.957497 10370 +digestchina 1 1 6.957497 6.957497 10371 +digestfeng 1 1 6.957497 6.957497 10372 +yuanxin 1 1 6.957497 6.957497 10373 +siart 1 1 6.957497 6.957497 10374 +chinaloc 1 1 6.957497 6.957497 10375 +connectionsctc 1 1 6.957497 6.957497 10376 +sunlabweathermovi 1 1 6.957497 6.957497 10377 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html new file mode 100644 index 00000000..56eb639e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dph^dph.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +interest 2 384 0.693147 1.386294 11 +program 2 374 0.693147 1.386294 7 +us 3 329 1.098612 3.295836 16 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +also 2 259 1.386294 2.772588 28 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +algorithm 2 162 1.791759 3.583518 57 +implement 2 152 1.791759 3.583518 52 +avail 2 169 1.791759 3.583518 48 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +area 3 144 1.945910 5.837730 80 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +problem 1 147 1.945910 1.945910 75 +document 6 121 2.079442 12.476652 89 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +teach 2 108 2.197225 4.394450 112 +structur 1 106 2.197225 2.197225 105 +techniqu 1 99 2.302585 2.302585 138 +imag 3 91 2.397895 7.193685 161 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +activ 2 84 2.484907 4.969814 182 +wide 1 84 2.484907 2.484907 185 +educ 1 86 2.484907 2.484907 191 +start 1 83 2.484907 2.484907 173 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +method 2 80 2.564949 5.129898 213 +june 1 79 2.564949 2.564949 214 +main 1 67 2.708050 2.708050 256 +view 1 70 2.708050 2.708050 254 +evalu 1 64 2.772589 2.772589 266 +type 1 61 2.833213 2.833213 296 +share 1 59 2.833213 2.833213 304 +index 1 56 2.890372 2.890372 309 +profession 1 51 2.995732 2.995732 345 +investig 1 51 2.995732 2.995732 353 +visual 3 48 3.044522 9.133566 372 +format 1 48 3.044522 3.044522 356 +without 1 50 3.044522 3.044522 370 +cool 1 49 3.044522 3.044522 374 +electron 2 47 3.091042 6.182084 379 +video 2 44 3.135494 6.270988 405 +favorit 1 44 3.135494 3.135494 410 +offer 1 43 3.178054 3.178054 414 +vision 2 41 3.218876 6.437752 430 +fast 1 42 3.218876 3.218876 429 +theoret 1 39 3.258097 3.258097 446 +author 1 39 3.258097 3.258097 450 +small 1 39 3.258097 3.258097 447 +brian 1 38 3.295837 3.295837 466 +approxim 1 35 3.401197 3.401197 509 +collabor 3 32 3.465736 10.397208 543 +rang 1 30 3.555348 3.555348 565 +chair 1 29 3.583519 3.583519 596 +held 1 28 3.610918 3.610918 600 +compar 1 26 3.688879 3.688879 648 +sport 1 25 3.737670 3.737670 683 +pattern 1 24 3.761200 3.761200 689 +recognit 5 23 3.806662 19.033310 723 +highli 1 23 3.806662 3.806662 725 +geometri 1 22 3.850148 3.850148 752 +smith 1 20 3.951244 3.951244 820 +geometr 1 19 4.007333 4.007333 852 +monitor 2 17 4.110874 8.221748 941 +match 3 16 4.174387 12.523161 965 +remot 2 15 4.248495 8.496990 1041 +track 1 15 4.248495 4.248495 1029 +matlab 1 14 4.317488 4.317488 1081 +daniel 1 12 4.465908 4.465908 1233 +target 1 12 4.465908 4.465908 1282 +extrem 1 11 4.553877 4.553877 1330 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +juan 1 9 4.753590 4.753590 1580 +xerox 1 8 4.875197 4.875197 1725 +compact 1 7 5.010635 5.010635 1907 +huttenloch 2 6 5.164786 10.329572 1983 +fraction 1 5 5.347108 5.347108 2259 +conot 1 5 5.347108 5.347108 2245 +stupid 1 5 5.347108 5.347108 2489 +hausdorff 2 4 5.568345 11.136690 2633 +identif 1 4 5.568345 5.568345 2773 +cvpr 1 4 5.568345 5.568345 2761 +geek 1 2 6.263398 6.263398 5083 +snowboard 1 2 6.263398 6.263398 5084 +professordph 1 1 6.957497 6.957497 10378 +eigenspac 1 1 6.957497 6.957497 10379 +digipap 1 1 6.957497 6.957497 10380 +viewabl 1 1 6.957497 6.957497 10381 +parc 1 1 6.957497 6.957497 10382 +attitud 1 1 6.957497 6.957497 10383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html new file mode 100644 index 00000000..9a7553dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^dsouza^dsouza.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 3 380 0.693147 2.079441 9 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +process 8 142 1.945910 15.567280 72 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +tool 3 117 2.079442 6.238326 93 +postscript 2 131 2.079442 4.158884 90 +technolog 1 131 2.079442 2.079442 102 +specif 2 106 2.197225 4.394450 106 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +mani 1 92 2.397895 2.397895 150 +present 1 91 2.397895 2.397895 145 +wide 1 84 2.484907 2.484907 185 +method 1 80 2.564949 2.564949 213 +exampl 1 77 2.564949 2.564949 195 +june 1 79 2.564949 2.564949 214 +addit 1 74 2.639057 2.639057 228 +order 1 69 2.708050 2.708050 249 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +investig 1 51 2.995732 2.995732 353 +basic 1 50 3.044522 3.044522 360 +understand 1 47 3.091042 3.091042 384 +algebra 7 45 3.135494 21.948458 394 +better 1 45 3.135494 3.135494 401 +form 1 39 3.258097 3.258097 443 +theoret 1 39 3.258097 3.258097 446 +prototyp 1 38 3.295837 3.295837 463 +concurr 1 34 3.401197 3.401197 501 +express 2 32 3.465736 6.931472 540 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +computersci 1 30 3.555348 3.555348 562 +semant 1 29 3.583519 3.583519 587 +becom 1 28 3.610918 3.610918 603 +full 1 28 3.610918 3.610918 615 +effort 1 26 3.688879 3.688879 652 +compar 1 26 3.688879 3.688879 648 +input 1 23 3.806662 3.806662 727 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +former 1 17 4.110874 4.110874 956 +edui 1 13 4.382027 4.382027 1193 +calculu 1 12 4.465908 4.465908 1203 +verifi 1 12 4.465908 4.465908 1261 +custom 1 10 4.653960 4.653960 1414 +latter 1 9 4.753590 4.753590 1522 +bloom 1 4 5.568345 5.568345 2913 +commonli 1 4 5.568345 5.568345 2877 +metatheori 1 3 5.857933 5.857933 3642 +allevi 1 3 5.857933 5.857933 3643 +checker 1 3 5.857933 5.857933 3644 +lnc 2 2 6.263398 12.526796 5085 +theproblem 1 2 6.263398 6.263398 4560 +inher 1 2 6.263398 6.263398 5086 +dsouza 3 1 6.957497 20.872491 10384 +ashvin 2 1 6.957497 13.914994 10385 +bard 1 1 6.957497 6.957497 10386 +oftool 1 1 6.957497 6.957497 10387 +andverif 1 1 6.957497 6.957497 10388 +withrespect 1 1 6.957497 6.957497 10389 +immediatelyavail 1 1 6.957497 6.957497 10390 +duplic 1 1 6.957497 6.957497 10391 +gso 1 1 6.957497 6.957497 10392 +loto 1 1 6.957497 6.957497 10393 +exploringappl 1 1 6.957497 6.957497 10394 +bdd 1 1 6.957497 6.957497 10395 +algebraterm 1 1 6.957497 6.957497 10396 +postscipt 1 1 6.957497 6.957497 10397 +lite 1 1 6.957497 6.957497 10398 +presentedth 1 1 6.957497 6.957497 10399 +compass 1 1 6.957497 6.957497 10400 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html new file mode 100644 index 00000000..fc04f1c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ergun^ergun.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +cornel 2 215 1.386294 2.772588 23 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +sinc 1 90 2.397895 2.397895 159 +stuff 1 87 2.484907 2.484907 171 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +improv 1 62 2.772589 2.772589 289 +dept 1 64 2.772589 2.772589 291 +prof 1 64 2.772589 2.772589 273 +written 1 63 2.772589 2.772589 278 +visit 1 63 2.772589 2.772589 288 +might 1 41 3.218876 3.218876 426 +origin 1 38 3.295837 3.295837 472 +art 1 29 3.583519 3.583519 593 +alwai 1 24 3.761200 3.761200 691 +fine 1 20 3.951244 3.951244 822 +ever 1 19 4.007333 4.007333 872 +warn 1 14 4.317488 4.317488 1068 +minor 1 12 4.465908 4.465908 1237 +undergrad 1 9 4.753590 4.753590 1589 +risk 1 8 4.875197 4.875197 1689 +heavi 1 7 5.010635 5.010635 1841 +rubinfeld 1 6 5.164786 5.164786 1998 +ronitt 1 5 5.347108 5.347108 2265 +paint 1 5 5.347108 5.347108 2400 +turkei 2 4 5.568345 11.136690 2914 +funda 2 3 5.857933 11.715866 3645 +ergun 2 2 6.263398 12.526796 5087 +angri 1 2 6.263398 6.263398 5088 +dog 1 2 6.263398 6.263398 5089 +pagefunda 1 1 6.957497 6.957497 10401 +ergn 1 1 6.957497 6.957497 10402 +eduhi 1 1 6.957497 6.957497 10403 +studentin 1 1 6.957497 6.957497 10404 +programcheck 1 1 6.957497 6.957497 10405 +researchpag 1 1 6.957497 6.957497 10406 +izmir 1 1 6.957497 6.957497 10407 +bilkentunivers 1 1 6.957497 6.957497 10408 +ankara 1 1 6.957497 6.957497 10409 +encounterpag 1 1 6.957497 6.957497 10410 +turkish 1 1 6.957497 6.957497 10411 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html new file mode 100644 index 00000000..e3574390 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^eva^eva.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 11 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +research 6 431 0.693147 4.158882 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +current 3 284 1.098612 3.295836 21 +time 1 293 1.098612 1.098612 17 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +paper 3 205 1.609438 4.828314 38 +oper 2 180 1.609438 3.218876 34 +public 2 202 1.609438 3.218876 43 +includ 1 208 1.609438 1.609438 42 +algorithm 12 162 1.791759 21.501108 57 +network 4 168 1.791759 7.167036 61 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +problem 7 147 1.945910 13.621370 75 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +assign 1 135 1.945910 1.945910 66 +high 1 130 2.079442 2.079442 101 +theori 8 111 2.197225 17.577800 127 +version 7 113 2.197225 15.380575 122 +mathemat 1 108 2.197225 2.197225 123 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +proceed 9 93 2.397895 21.581055 152 +mani 1 92 2.397895 2.397895 150 +activ 2 84 2.484907 4.969814 182 +school 2 84 2.484907 4.969814 188 +journal 2 83 2.484907 4.969814 183 +ieee 1 86 2.484907 2.484907 190 +appear 9 78 2.564949 23.084541 210 +optim 2 79 2.564949 5.129898 197 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +symposium 11 72 2.639057 29.029627 238 +upson 1 71 2.639057 2.639057 218 +practic 1 70 2.708050 2.708050 246 +januari 3 62 2.772589 8.317767 264 +complex 2 64 2.772589 5.545178 269 +improv 2 62 2.772589 5.545178 289 +foundat 1 62 2.772589 2.772589 286 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +summer 1 56 2.890372 2.890372 311 +cover 1 55 2.944439 2.944439 329 +particular 1 51 2.995732 2.995732 352 +fast 4 42 3.218876 12.875504 429 +linear 1 41 3.218876 3.218876 431 +annual 8 40 3.258097 26.064776 458 +industri 1 38 3.295837 3.295837 464 +approxim 7 35 3.401197 23.808379 509 +concurr 1 34 3.401197 3.401197 501 +survei 1 35 3.401197 3.401197 513 +bibliographi 1 34 3.401197 3.401197 518 +graph 2 30 3.555348 7.110696 576 +computersci 1 30 3.555348 3.555348 562 +bound 1 26 3.688879 3.688879 659 +proc 1 26 3.688879 3.688879 649 +aspect 1 25 3.737670 3.737670 663 +flow 7 24 3.761200 26.328400 700 +universityithaca 1 24 3.761200 3.761200 710 +springer 2 22 3.850148 7.700296 750 +verlag 2 22 3.850148 7.700296 751 +siam 5 21 3.912023 19.560115 800 +path 2 21 3.912023 7.824046 778 +theorem 2 21 3.912023 7.824046 786 +unit 1 21 3.912023 3.912023 779 +rout 1 21 3.912023 3.912023 793 +vlsi 1 21 3.912023 3.912023 795 +annot 1 21 3.912023 3.912023 775 +north 2 19 4.007333 8.014666 873 +separ 1 19 4.007333 4.007333 844 +sept 1 17 4.110874 4.110874 952 +spars 1 16 4.174387 4.174387 989 +polynomi 2 14 4.317488 8.634976 1069 +embed 1 14 4.317488 4.317488 1102 +discret 2 13 4.382027 8.764054 1165 +speak 1 12 4.465908 4.465908 1283 +arbitrari 1 11 4.553877 4.553877 1359 +itali 1 11 4.553877 4.553877 1378 +packet 1 10 4.653960 4.653960 1415 +strongli 1 10 4.653960 4.653960 1406 +preliminari 5 9 4.753590 23.767950 1480 +congress 1 9 4.753590 4.753590 1592 +combinatori 3 8 4.875197 14.625591 1629 +hallcornel 1 8 4.875197 4.875197 1757 +integ 1 8 4.875197 4.875197 1688 +capac 1 8 4.875197 4.875197 1740 +switch 1 8 4.875197 4.875197 1718 +daughter 1 7 5.010635 5.010635 1943 +maxim 1 7 5.010635 5.010635 1944 +handbook 2 6 5.164786 10.329572 2061 +rebecca 1 6 5.164786 5.164786 2174 +dens 1 6 5.164786 5.164786 2122 +inequ 1 6 5.164786 5.164786 2113 +holland 2 5 5.347108 10.694216 2490 +oncomput 1 5 5.347108 5.347108 2326 +stoc 1 5 5.347108 5.347108 2491 +fraction 1 5 5.347108 5.347108 2259 +proceedingsof 1 5 5.347108 5.347108 2331 +combinator 3 4 5.568345 16.705035 2915 +disjoint 2 4 5.568345 11.136690 2709 +graham 2 4 5.568345 11.136690 2817 +cut 1 4 5.568345 5.568345 2620 +stein 2 3 5.857933 11.715866 3646 +planar 2 3 5.857933 11.715866 3647 +thegener 1 3 5.857933 5.857933 3648 +combinatorica 1 3 5.857933 5.857933 3649 +pack 1 3 5.857933 5.857933 3597 +violat 1 3 5.857933 5.857933 3211 +tokyo 1 3 5.857933 5.857933 3622 +netherland 1 3 5.857933 5.857933 3650 +tardo 20 2 6.263398 125.267960 5090 +multicommod 4 2 6.263398 25.053592 4761 +lovasz 3 2 6.263398 18.790194 5091 +goldberg 2 2 6.263398 12.526796 4313 +hopp 2 2 6.263398 12.526796 5092 +kleinberg 2 2 6.263398 12.526796 5093 +julia 1 2 6.263398 6.263398 5094 +broadli 1 2 6.263398 6.263398 5095 +programmingproblem 1 2 6.263398 6.263398 4082 +appearedin 1 2 6.263398 6.263398 5096 +leighton 1 2 6.263398 6.263398 5097 +inmathemat 1 2 6.263398 6.263398 5098 +hasappear 1 2 6.263398 6.263398 5099 +goeman 1 2 6.263398 6.263398 5100 +williamson 1 2 6.263398 6.263398 5101 +diamet 1 2 6.263398 6.263398 5102 +tarjan 1 2 6.263398 6.263398 4278 +ori 10 1 6.957497 69.574970 10412 +shmoi 6 1 6.957497 41.744982 10413 +plotkin 6 1 6.957497 41.744982 10414 +approximationalgorithm 4 1 6.957497 27.829988 10415 +klein 2 1 6.957497 13.914994 10416 +grotschel 2 1 6.957497 13.914994 10417 +tardosassoci 1 1 6.957497 6.957497 10418 +engineeringphon 1 1 6.957497 6.957497 10419 +researchrec 1 1 6.957497 6.957497 10420 +mostlywork 1 1 6.957497 6.957497 10421 +networkproblem 1 1 6.957497 6.957497 10422 +paperssurvei 1 1 6.957497 6.957497 10423 +cutratio 1 1 6.957497 6.957497 10424 +fasterapproxim 1 1 6.957497 6.957497 10425 +problemwith 1 1 6.957497 6.957497 10426 +makedon 1 1 6.957497 6.957497 10427 +tragouda 1 1 6.957497 6.957497 10428 +flowproblem 1 1 6.957497 6.957497 10429 +annualacm 1 1 6.957497 6.957497 10430 +thefound 1 1 6.957497 6.957497 10431 +designproblem 1 1 6.957497 6.957497 10432 +discretealgorithm 1 1 6.957497 6.957497 10433 +someevacu 1 1 6.957497 6.957497 10434 +ondiscret 1 1 6.957497 6.957497 10435 +quickest 1 1 6.957497 6.957497 10436 +transship 1 1 6.957497 6.957497 10437 +theproceed 1 1 6.957497 6.957497 10438 +steiner 1 1 6.957497 6.957497 10439 +multicut 1 1 6.957497 6.957497 10440 +pathsproblem 1 1 6.957497 6.957497 10441 +annualiee 1 1 6.957497 6.957497 10442 +rabani 1 1 6.957497 6.957497 10443 +fleischer 1 1 6.957497 6.957497 10444 +comb 1 1 6.957497 6.957497 10445 +ipco 1 1 6.957497 6.957497 10446 +kort 1 1 6.957497 6.957497 10447 +lovaszand 1 1 6.957497 6.957497 10448 +schrijver 1 1 6.957497 6.957497 10449 +inoptim 1 1 6.957497 6.957497 10450 +ofmathematician 1 1 6.957497 6.957497 10451 +kyoto 1 1 6.957497 6.957497 10452 +inproc 1 1 6.957497 6.957497 10453 +maastricht 1 1 6.957497 6.957497 10454 +networkoptim 1 1 6.957497 6.957497 10455 +netflow 1 1 6.957497 6.957497 10456 +miniato 1 1 6.957497 6.957497 10457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html new file mode 100644 index 00000000..73b73237 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fcc^fcc.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +california 1 46 3.091042 3.091042 388 +departmentcornel 1 5 5.347108 5.347108 2275 +franci 2 3 5.857933 11.715866 3287 +universitycomput 1 3 5.857933 5.857933 3651 +berkeleymathemat 1 1 6.957497 6.957497 10458 +departmentcomput 1 1 6.957497 6.957497 10459 +departmenthumorfcc 1 1 6.957497 6.957497 10460 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html new file mode 100644 index 00000000..592c7022 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^flung^flung.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +world 1 115 2.197225 2.197225 126 +felix 1 2 6.263398 6.263398 5103 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html new file mode 100644 index 00000000..e08f6487 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fms^fms.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +link 2 247 1.386294 2.772588 24 +languag 2 227 1.386294 2.772588 26 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +click 1 142 1.945910 1.945910 78 +report 2 131 2.079442 4.158884 92 +introduct 1 126 2.079442 2.079442 87 +analysi 1 124 2.079442 2.079442 98 +person 1 111 2.197225 2.197225 117 +technic 2 100 2.302585 4.605170 140 +take 1 97 2.302585 2.302585 134 +homepag 2 93 2.397895 4.795790 148 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +upson 1 71 2.639057 2.639057 218 +test 2 66 2.708050 5.416100 252 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +complex 1 64 2.772589 2.772589 269 +juli 1 60 2.833213 2.833213 305 +algebra 2 45 3.135494 6.270988 394 +math 1 44 3.135494 3.135494 402 +semant 1 29 3.583519 3.583519 587 +universityithaca 1 24 3.761200 3.761200 710 +greg 1 24 3.761200 3.761200 695 +sciencecornel 1 22 3.850148 3.850148 768 +smith 4 20 3.951244 15.804976 820 +grad 1 20 3.951244 3.951244 837 +decid 1 14 4.317488 4.317488 1075 +food 1 12 4.465908 4.465908 1285 +sundai 1 10 4.653960 4.653960 1387 +morrisett 1 5 5.347108 5.347108 2263 +dexter 3 4 5.568345 16.705035 2855 +kozen 3 4 5.568345 16.705035 2619 +catch 1 4 5.568345 5.568345 2602 +halldepart 1 3 5.857933 5.857933 3641 +cohen 1 3 5.857933 5.857933 3652 +erni 1 2 6.263398 6.263398 5104 +epicuri 1 2 6.263398 6.263398 5105 +frederick 3 1 6.957497 20.872491 10461 +kleen 2 1 6.957497 13.914994 10462 +homepagefrederick 1 1 6.957497 6.957497 10463 +zine 1 1 6.957497 6.957497 10464 +cartalk 1 1 6.957497 6.957497 10465 +clack 1 1 6.957497 6.957497 10466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html new file mode 100644 index 00000000..80e3fce8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^frank^frank.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +updat 1 191 1.609438 1.609438 41 +phone 1 175 1.791759 1.791759 45 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +improv 1 62 2.772589 2.772589 289 +electron 1 47 3.091042 3.091042 379 +either 1 35 3.401197 3.401197 506 +post 1 35 3.401197 3.401197 505 +actual 1 28 3.610918 3.610918 604 +doctor 1 24 3.761200 3.761200 709 +happi 1 14 4.317488 4.317488 1079 +frank 7 9 4.753590 33.275130 1568 +matter 1 8 4.875197 4.875197 1627 +xerox 1 8 4.875197 4.875197 1725 +planet 1 4 5.568345 5.568345 2912 +adelstein 1 1 6.957497 6.957497 10467 +checkout 1 1 6.957497 6.957497 10468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html new file mode 100644 index 00000000..65a6772d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^fred^fred.html @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +current 2 284 1.098612 2.197224 21 +project 1 340 1.098612 1.098612 18 +cornel 4 215 1.386294 5.545176 23 +languag 3 227 1.386294 4.158882 26 +softwar 2 220 1.386294 2.772588 30 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +model 1 145 1.945910 1.945910 69 +year 1 148 1.945910 1.945910 84 +postscript 1 131 2.079442 2.079442 90 +theori 2 111 2.197225 4.394450 127 +version 1 113 2.197225 2.197225 122 +peopl 1 96 2.302585 2.302585 132 +center 2 88 2.397895 4.795790 158 +section 1 94 2.397895 2.397895 149 +resum 1 79 2.564949 2.564949 217 +appear 1 78 2.564949 2.564949 210 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +previou 1 62 2.772589 2.772589 290 +visual 2 48 3.044522 6.089044 372 +http 2 41 3.218876 6.437752 420 +soon 2 36 3.367296 6.734592 494 +next 1 34 3.401197 3.401197 517 +anim 1 31 3.496508 3.496508 557 +berkelei 1 26 3.688879 3.688879 657 +other 1 24 3.761200 3.761200 697 +sciencecornel 1 22 3.850148 3.850148 768 +hobbi 1 16 4.174387 4.174387 1009 +mayb 1 15 4.248495 4.248495 1014 +drive 1 15 4.248495 4.248495 1052 +shown 1 14 4.317488 4.317488 1080 +massachusett 1 14 4.317488 4.317488 1118 +affili 2 13 4.382027 8.764054 1194 +hewlett 1 8 4.875197 4.875197 1709 +guitar 1 8 4.875197 4.875197 1758 +lawrenc 1 7 5.010635 5.010635 1908 +fred 7 6 5.164786 36.153502 2072 +photographi 1 6 5.164786 5.164786 2146 +feet 1 5 5.347108 5.347108 2492 +snail 1 4 5.568345 5.568345 2916 +yuan 2 3 5.857933 11.715866 3653 +chelmsford 2 3 5.857933 11.715866 3564 +binghamton 1 3 5.857933 5.857933 3544 +apollo 3 1 6.957497 20.872491 10469 +scramo 1 1 6.957497 6.957497 10470 +midi 1 1 6.957497 6.957497 10471 +choreograph 1 1 6.957497 6.957497 10472 +vpla 1 1 6.957497 6.957497 10473 +animationlink 1 1 6.957497 6.957497 10474 +packardlink 1 1 6.957497 6.957497 10475 +laboratoryinterest 1 1 6.957497 6.957497 10476 +cello 1 1 6.957497 6.957497 10477 +aquarium 1 1 6.957497 6.957497 10478 +burl 1 1 6.957497 6.957497 10479 +fredhsu 1 1 6.957497 6.957497 10480 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html new file mode 100644 index 00000000..41481867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gbd^gbd.html @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +page 4 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +engin 4 297 1.098612 4.394448 20 +cours 3 273 1.098612 3.295836 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +cornel 3 215 1.386294 4.158882 23 +graduat 2 215 1.386294 2.772588 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +postscript 1 131 2.079442 2.079442 90 +compil 1 122 2.079442 2.079442 96 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +well 3 109 2.197225 6.591675 121 +world 3 115 2.197225 6.591675 126 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +final 1 116 2.197225 2.197225 108 +part 2 98 2.302585 4.605170 129 +advanc 1 99 2.302585 2.302585 130 +commun 1 95 2.397895 2.397895 157 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +school 3 84 2.484907 7.454721 188 +educ 1 86 2.484907 2.484907 191 +start 1 83 2.484907 2.484907 173 +novemb 1 81 2.484907 2.484907 179 +chang 1 82 2.484907 2.484907 163 +master 2 76 2.564949 5.129898 216 +resum 1 79 2.564949 2.564949 217 +complet 1 77 2.564949 2.564949 208 +june 1 79 2.564949 2.564949 214 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +intellig 1 72 2.639057 2.639057 225 +involv 1 71 2.639057 2.639057 227 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +main 4 67 2.708050 10.832200 256 +multimedia 4 68 2.708050 10.832200 258 +degre 2 69 2.708050 5.416100 259 +prof 4 64 2.772589 11.090356 273 +artifici 1 63 2.772589 2.772589 280 +back 4 60 2.833213 11.332852 297 +colleg 2 61 2.833213 5.666426 300 +major 3 56 2.890372 8.671116 315 +semest 1 58 2.890372 2.890372 312 +detail 1 57 2.890372 2.890372 321 +undergradu 2 54 2.944439 5.888878 338 +still 1 50 3.044522 3.044522 362 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +better 1 45 3.135494 3.135494 401 +anoth 1 45 3.135494 3.135494 408 +long 3 43 3.178054 9.534162 413 +littl 1 39 3.258097 3.258097 454 +probabl 1 40 3.258097 3.258097 455 +brian 1 38 3.295837 3.295837 466 +mean 1 37 3.332205 3.332205 477 +michael 1 35 3.401197 3.401197 514 +approxim 1 35 3.401197 3.401197 509 +india 2 32 3.465736 6.931472 550 +taken 1 31 3.496508 3.496508 555 +actual 2 28 3.610918 7.221836 604 +hope 1 28 3.610918 3.610918 610 +never 1 25 3.737670 3.737670 671 +sciencecornel 1 22 3.850148 3.850148 768 +deal 1 22 3.850148 3.850148 736 +born 3 21 3.912023 11.736069 798 +leav 1 21 3.912023 3.912023 772 +smith 1 20 3.951244 3.951244 820 +wonder 1 20 3.951244 3.951244 815 +region 1 19 4.007333 4.007333 875 +miss 1 19 4.007333 4.007333 866 +four 2 18 4.060443 8.120886 905 +upon 1 16 4.174387 4.174387 978 +took 1 16 4.174387 4.174387 1010 +match 1 16 4.174387 4.174387 965 +goe 1 15 4.248495 4.248495 1044 +precis 1 15 4.248495 4.248495 1023 +earlier 1 13 4.382027 4.382027 1140 +someon 1 13 4.382027 4.382027 1128 +land 1 12 4.465908 4.465908 1273 +outsid 1 12 4.465908 4.465908 1219 +went 1 12 4.465908 4.465908 1279 +holidai 1 12 4.465908 4.465908 1224 +noth 1 11 4.553877 4.553877 1328 +light 1 9 4.753590 4.753590 1533 +vineet 1 8 4.875197 4.875197 1639 +pursu 2 7 5.010635 10.021270 1902 +seshadri 1 7 5.010635 5.010635 1803 +keshav 1 7 5.010635 5.010635 1852 +lucki 2 6 5.164786 10.329572 2163 +praveen 1 6 5.164786 5.164786 1996 +srinivasan 1 6 5.164786 5.164786 2175 +somewher 1 6 5.164786 5.164786 2176 +babi 1 5 5.347108 5.347108 2493 +interfer 1 5 5.347108 5.347108 2494 +greater 1 5 5.347108 5.347108 2258 +ashish 1 5 5.347108 5.347108 2473 +engineeringdepart 1 4 5.568345 5.568345 2917 +dive 1 3 5.857933 5.857933 3654 +straight 1 3 5.857933 5.857933 3655 +indira 1 3 5.857933 5.857933 3656 +karnataka 1 2 6.263398 6.263398 5106 +bharat 1 2 6.263398 6.263398 5107 +cute 1 2 6.263398 6.263398 5108 +incident 1 2 6.263398 6.263398 5109 +bangalor 1 2 6.263398 6.263398 5110 +that 1 2 6.263398 6.263398 5111 +conquer 1 2 6.263398 6.263398 5112 +aastha 1 2 6.263398 6.263398 5005 +ankit 1 2 6.263398 6.263398 4966 +deepak 4 1 6.957497 27.829988 10481 +balakrishna 2 1 6.957497 13.914994 10482 +balakrishnamast 1 1 6.957497 6.957497 10483 +resumeeducationcoursesperson 1 1 6.957497 6.957497 10484 +surathk 1 1 6.957497 6.957497 10485 +specialis 1 1 6.957497 6.957497 10486 +godfrei 1 1 6.957497 6.957497 10487 +chubbi 1 1 6.957497 6.957497 10488 +weigh 1 1 6.957497 6.957497 10489 +pound 1 1 6.957497 6.957497 10490 +divin 1 1 6.957497 6.957497 10491 +aishwarya 1 1 6.957497 6.957497 10492 +miniscul 1 1 6.957497 6.957497 10493 +krec 1 1 6.957497 6.957497 10494 +here 1 1 6.957497 6.957497 10495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html new file mode 100644 index 00000000..0e3d621f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ghias^home.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 6 443 0.693147 4.158882 6 +interest 3 384 0.693147 2.079441 11 +work 2 380 0.693147 1.386294 9 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +engin 3 297 1.098612 3.295836 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 3 215 1.386294 4.158882 23 +includ 1 208 1.609438 1.609438 42 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +sinc 1 90 2.397895 2.397895 159 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +solut 1 82 2.484907 2.484907 162 +educ 1 86 2.484907 2.484907 191 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +onlin 1 75 2.639057 2.639057 223 +multimedia 1 68 2.708050 2.708050 258 +degre 1 69 2.708050 2.708050 259 +plan 1 65 2.772589 2.772589 272 +unix 1 58 2.890372 2.890372 308 +music 1 42 3.218876 3.218876 436 +respons 1 37 3.332205 3.332205 476 +global 1 34 3.401197 3.401197 520 +administr 1 27 3.637586 3.637586 628 +leav 1 21 3.912023 3.912023 772 +bachelor 1 17 4.110874 4.110874 957 +warn 1 14 4.317488 4.317488 1068 +hopefulli 1 14 4.317488 4.317488 1071 +cricket 1 7 5.010635 5.010635 1945 +publicationsth 1 4 5.568345 5.568345 2859 +ghia 3 2 6.263398 18.790194 4934 +asif 1 2 6.263398 6.263398 4933 +mywww 1 2 6.263398 6.263398 5113 +uddin 3 1 6.957497 20.872491 10496 +ghiasasif 1 1 6.957497 6.957497 10497 +constructioni 1 1 6.957497 6.957497 10498 +karachi 1 1 6.957497 6.957497 10499 +pakistan 1 1 6.957497 6.957497 10500 +installationso 1 1 6.957497 6.957497 10501 +astronomyasif 1 1 6.957497 6.957497 10502 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html new file mode 100644 index 00000000..22c9f81b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^glaser^home.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +visit 1 63 2.772589 2.772589 288 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +berkelei 1 26 3.688879 3.688879 657 +million 1 5 5.347108 5.347108 2495 +dglaser 1 1 6.957497 6.957497 10503 +htmlpleas 1 1 6.957497 6.957497 10504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html new file mode 100644 index 00000000..057cef77 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^gries^gries.html @@ -0,0 +1,334 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 9 775 0.000000 0.000000 2 +scienc 7 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +program 6 374 0.693147 4.158882 7 +depart 3 457 0.693147 2.079441 12 +system 3 443 0.693147 2.079441 6 +work 3 380 0.693147 2.079441 9 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +us 4 329 1.098612 4.394448 16 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cornel 5 215 1.386294 6.931470 23 +languag 2 227 1.386294 2.772588 26 +also 2 259 1.386294 2.772588 28 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +softwar 1 220 1.386294 1.386294 30 +paper 4 205 1.609438 6.437752 38 +includ 1 208 1.609438 1.609438 42 +implement 1 152 1.791759 1.791759 52 +read 1 154 1.791759 1.791759 47 +hour 1 165 1.791759 1.791759 46 +year 3 148 1.945910 5.837730 84 +professor 2 137 1.945910 3.891820 76 +area 2 144 1.945910 3.891820 80 +relat 1 139 1.945910 1.945910 68 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +technolog 2 131 2.079442 4.158884 102 +tool 2 117 2.079442 4.158884 93 +document 2 121 2.079442 4.158884 89 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +teach 3 108 2.197225 6.591675 112 +topic 3 114 2.197225 6.591675 110 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +text 2 98 2.302585 4.605170 133 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +sinc 2 90 2.397895 4.795790 159 +associ 2 93 2.397895 4.795790 151 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +educ 5 86 2.484907 12.424535 191 +institut 2 84 2.484907 4.969814 187 +help 2 83 2.484907 4.969814 175 +level 1 87 2.484907 2.484907 180 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +know 2 80 2.564949 5.129898 198 +master 1 76 2.564949 2.564949 216 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +logic 4 71 2.639057 10.556228 230 +david 2 71 2.639057 5.278114 232 +symposium 2 72 2.639057 5.278114 238 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +effici 1 73 2.639057 2.639057 233 +servic 1 72 2.639057 2.639057 236 +receiv 5 66 2.708050 13.540250 244 +goal 1 66 2.708050 2.708050 250 +written 1 63 2.772589 2.772589 278 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +import 1 65 2.772589 2.772589 282 +colleg 3 61 2.833213 8.499639 300 +best 2 59 2.833213 5.666426 299 +plai 1 60 2.833213 2.833213 307 +content 1 59 2.833213 2.833213 302 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +tabl 3 51 2.995732 8.987196 346 +particular 1 51 2.995732 2.995732 352 +numer 1 49 3.044522 3.044522 369 +telephon 1 50 3.044522 3.044522 373 +give 1 50 3.044522 3.044522 359 +move 2 47 3.091042 6.182084 382 +understand 1 47 3.091042 3.091042 384 +effect 1 46 3.091042 3.091042 385 +math 2 44 3.135494 6.270988 402 +made 2 44 3.135494 6.270988 398 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +editor 2 41 3.218876 6.437752 433 +york 1 41 3.218876 3.218876 435 +edit 1 42 3.218876 3.218876 418 +howev 1 41 3.218876 3.218876 422 +announc 1 40 3.258097 3.258097 441 +programm 1 39 3.258097 3.258097 445 +societi 1 40 3.258097 3.258097 456 +author 1 39 3.258097 3.258097 450 +late 1 40 3.258097 3.258097 439 +paul 2 38 3.295837 6.591674 471 +vita 1 38 3.295837 3.295837 473 +correct 1 38 3.295837 3.295837 462 +open 1 38 3.295837 3.295837 469 +formal 2 37 3.332205 6.664410 478 +respons 1 37 3.332205 3.332205 476 +china 1 37 3.332205 3.332205 487 +short 1 36 3.367296 3.367296 499 +award 7 34 3.401197 23.808379 523 +survei 3 35 3.401197 10.203591 513 +return 2 34 3.401197 6.802394 502 +curriculum 1 33 3.433987 3.433987 535 +go 1 33 3.433987 3.433987 529 +articl 1 33 3.433987 3.433987 530 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +dissert 1 32 3.465736 3.465736 549 +concept 1 32 3.465736 3.465736 537 +taken 1 31 3.496508 3.496508 555 +abl 1 30 3.555348 3.555348 566 +produc 1 30 3.555348 3.555348 572 +chair 2 29 3.583519 7.167038 596 +semant 1 29 3.583519 3.583519 587 +art 1 29 3.583519 3.583519 593 +weather 2 28 3.610918 7.221836 618 +full 1 28 3.610918 3.610918 615 +usual 1 28 3.610918 3.610918 608 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +compar 1 26 3.688879 3.688879 648 +enjoi 1 26 3.688879 3.688879 660 +concern 1 25 3.737670 3.737670 666 +spent 1 25 3.737670 3.737670 676 +toward 1 25 3.737670 3.737670 668 +aspect 1 25 3.737670 3.737670 663 +sport 1 25 3.737670 3.737670 683 +universityithaca 1 24 3.761200 3.761200 710 +doctor 1 24 3.761200 3.761200 709 +known 1 24 3.761200 3.761200 702 +interpret 1 24 3.761200 3.761200 686 +methodolog 1 23 3.806662 3.806662 733 +proof 1 23 3.806662 3.806662 720 +serv 2 22 3.850148 7.700296 758 +period 2 22 3.850148 7.700296 743 +almost 1 22 3.850148 3.850148 742 +william 1 22 3.850148 3.850148 765 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +born 3 21 3.912023 11.736069 798 +programminglanguag 1 21 3.912023 3.912023 782 +fact 1 21 3.912023 3.912023 780 +busi 1 21 3.912023 3.912023 784 +hous 1 21 3.912023 3.912023 801 +tenni 1 20 3.951244 3.951244 838 +item 1 19 4.007333 4.007333 856 +left 1 19 4.007333 4.007333 851 +ever 1 19 4.007333 4.007333 872 +figur 1 18 4.060443 4.060443 903 +four 1 18 4.060443 4.060443 905 +stand 1 18 4.060443 4.060443 891 +stanford 3 17 4.110874 12.332622 955 +germani 1 17 4.110874 4.110874 946 +whether 1 17 4.110874 4.110874 918 +alreadi 1 16 4.174387 4.174387 963 +later 2 15 4.248495 8.496990 1043 +susan 1 15 4.248495 4.248495 1050 +contribut 1 15 4.248495 4.248495 1021 +rate 1 15 4.248495 4.248495 1037 +becam 1 14 4.317488 4.317488 1117 +latex 1 14 4.317488 4.317488 1064 +manner 1 14 4.317488 4.317488 1074 +conduct 1 14 4.317488 4.317488 1065 +wife 2 13 4.382027 8.764054 1196 +essenti 1 13 4.382027 4.382027 1137 +individu 1 13 4.382027 4.382027 1126 +believ 1 13 4.382027 4.382027 1187 +went 2 12 4.465908 8.931816 1279 +speak 1 12 4.465908 4.465908 1283 +grant 1 12 4.465908 4.465908 1216 +excit 1 11 4.553877 4.553877 1329 +fellowship 1 10 4.653960 4.653960 1460 +thecomput 1 10 4.653960 4.653960 1408 +end 1 9 4.753590 4.753590 1567 +respect 1 9 4.753590 4.753590 1545 +softbal 1 9 4.753590 4.753590 1594 +volleybal 1 9 4.753590 4.753590 1598 +swim 1 9 4.753590 4.753590 1599 +mention 1 9 4.753590 4.753590 1569 +lewi 1 8 4.875197 4.875197 1700 +hallcornel 1 8 4.875197 4.875197 1757 +guggenheim 1 8 4.875197 4.875197 1759 +told 1 8 4.875197 4.875197 1658 +joke 1 8 4.875197 4.875197 1620 +wire 1 8 4.875197 4.875197 1747 +illinoi 2 7 5.010635 10.021270 1941 +pagecomput 1 7 5.010635 5.010635 1900 +marri 1 7 5.010635 5.010635 1946 +notion 1 7 5.010635 5.010635 1947 +explain 1 7 5.010635 5.010635 1816 +snow 1 6 5.164786 5.164786 2031 +outstand 1 6 5.164786 5.164786 2136 +spare 1 6 5.164786 5.164786 2177 +golf 1 6 5.164786 5.164786 2178 +truth 1 6 5.164786 5.164786 2179 +elain 2 5 5.347108 10.694216 2496 +joseph 1 5 5.347108 5.347108 2327 +these 1 5 5.347108 5.347108 2482 +plant 1 5 5.347108 5.347108 2497 +feder 1 5 5.347108 5.347108 2266 +ofparallel 1 5 5.347108 5.347108 2380 +interfer 1 5 5.347108 5.347108 2494 +began 1 5 5.347108 5.347108 2498 +rewrit 1 5 5.347108 5.347108 2367 +sing 1 5 5.347108 5.347108 2499 +yield 1 5 5.347108 5.347108 2458 +proud 2 4 5.568345 11.136690 2918 +raman 2 4 5.568345 11.136690 2827 +queen 1 4 5.568345 5.568345 2919 +naval 1 4 5.568345 5.568345 2920 +birthdai 1 4 5.568345 5.568345 2800 +bloom 1 4 5.568345 5.568345 2913 +substanti 1 4 5.568345 5.568345 2921 +schneider 1 4 5.568345 5.568345 2868 +ping 1 4 5.568345 5.568345 2922 +gri 4 3 5.857933 23.431732 3569 +munich 3 3 5.857933 17.573799 3570 +twin 3 3 5.857933 17.573799 3657 +biographi 2 3 5.857933 11.715866 3658 +laugh 2 3 5.857933 11.715866 3659 +freshman 1 3 5.857933 5.857933 3462 +dimac 1 3 5.857933 5.857933 3574 +assistantship 1 3 5.857933 5.857933 3660 +langaug 1 3 5.857933 5.857933 3661 +blind 1 3 5.857933 5.857933 3662 +serious 1 3 5.857933 5.857933 3663 +researchassoci 1 3 5.857933 5.857933 3664 +pong 1 3 5.857933 5.857933 3371 +audienc 1 3 5.857933 5.857933 3180 +pagedavid 1 2 6.263398 6.263398 5114 +sophomor 1 2 6.263398 6.263398 4695 +polya 1 2 6.263398 6.263398 4939 +weapon 1 2 6.263398 6.263398 5115 +degreein 1 2 6.263398 6.263398 5116 +manfr 1 2 6.263398 6.263398 4949 +bauer 1 2 6.263398 6.263398 5117 +cake 1 2 6.263398 6.263398 5118 +booth 1 2 6.263398 6.263398 5119 +theamerican 1 2 6.263398 6.263398 5120 +afip 1 2 6.263398 6.263398 4300 +andt 1 2 6.263398 6.263398 5121 +spoken 1 2 6.263398 6.263398 5122 +researchinterest 1 2 6.263398 6.263398 5123 +acta 1 2 6.263398 6.263398 5124 +informatica 1 2 6.263398 6.263398 5125 +andtool 1 2 6.263398 6.263398 5126 +grieswilliam 1 1 6.957497 6.957497 10505 +engineeringdr 1 1 6.957497 6.957497 10506 +formaldevelop 1 1 6.957497 6.957497 10507 +asinterest 1 1 6.957497 6.957497 10508 +researchin 1 1 6.957497 6.957497 10509 +taughta 1 1 6.957497 6.957497 10510 +anoverrid 1 1 6.957497 6.957497 10511 +edushort 1 1 6.957497 6.957497 10512 +griesi 1 1 6.957497 6.957497 10513 +flush 1 1 6.957497 6.957497 10514 +iescap 1 1 6.957497 6.957497 10515 +workfor 1 1 6.957497 6.957497 10516 +civilian 1 1 6.957497 6.957497 10517 +amathematician 1 1 6.957497 6.957497 10518 +fewmonth 1 1 6.957497 6.957497 10519 +twogerman 1 1 6.957497 6.957497 10520 +ruedig 1 1 6.957497 6.957497 10521 +wiehl 1 1 6.957497 6.957497 10522 +algol 1 1 6.957497 6.957497 10523 +compilerfor 1 1 6.957497 6.957497 10524 +implementrecurs 1 1 6.957497 6.957497 10525 +stoer 1 1 6.957497 6.957497 10526 +wasin 1 1 6.957497 6.957497 10527 +notyet 1 1 6.957497 6.957497 10528 +kosher 1 1 6.957497 6.957497 10529 +thebirthdai 1 1 6.957497 6.957497 10530 +intown 1 1 6.957497 6.957497 10531 +whichha 1 1 6.957497 6.957497 10532 +wasdepart 1 1 6.957497 6.957497 10533 +lewisprofessor 1 1 6.957497 6.957497 10534 +contentsi 1 1 6.957497 6.957497 10535 +mytext 1 1 6.957497 6.957497 10536 +writingand 1 1 6.957497 6.957497 10537 +thewond 1 1 6.957497 6.957497 10538 +wherey 1 1 6.957497 6.957497 10539 +contributionsto 1 1 6.957497 6.957497 10540 +sigcseaward 1 1 6.957497 6.957497 10541 +clarkaward 1 1 6.957497 6.957497 10542 +advise 1 1 6.957497 6.957497 10543 +susanowicki 1 1 6.957497 6.957497 10544 +laid 1 1 6.957497 6.957497 10545 +freeness 1 1 6.957497 6.957497 10546 +bestpap 1 1 6.957497 6.957497 10547 +sthesi 1 1 6.957497 6.957497 10548 +designedand 1 1 6.957497 6.957497 10549 +printedor 1 1 6.957497 6.957497 10550 +speakmathemat 1 1 6.957497 6.957497 10551 +audiocassett 1 1 6.957497 6.957497 10552 +officein 1 1 6.957497 6.957497 10553 +taulbe 1 1 6.957497 6.957497 10554 +responsesfrom 1 1 6.957497 6.957497 10555 +noother 1 1 6.957497 6.957497 10556 +itrequir 1 1 6.957497 6.957497 10557 +sendin 1 1 6.957497 6.957497 10558 +questionnair 1 1 6.957497 6.957497 10559 +forchair 1 1 6.957497 6.957497 10560 +andrespons 1 1 6.957497 6.957497 10561 +takean 1 1 6.957497 6.957497 10562 +willsuggest 1 1 6.957497 6.957497 10563 +servewher 1 1 6.957497 6.957497 10564 +fredb 1 1 6.957497 6.957497 10565 +andmonograph 1 1 6.957497 6.957497 10566 +isplit 1 1 6.957497 6.957497 10567 +pant 1 1 6.957497 6.957497 10568 +alectur 1 1 6.957497 6.957497 10569 +turnedaround 1 1 6.957497 6.957497 10570 +spoke 1 1 6.957497 6.957497 10571 +everyonelaugh 1 1 6.957497 6.957497 10572 +justsaid 1 1 6.957497 6.957497 10573 +barbershop 1 1 6.957497 6.957497 10574 +andgilbert 1 1 6.957497 6.957497 10575 +sullivan 1 1 6.957497 6.957497 10576 +carpentri 1 1 6.957497 6.957497 10577 +remodel 1 1 6.957497 6.957497 10578 +considerablesatisfact 1 1 6.957497 6.957497 10579 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html new file mode 100644 index 00000000..af917ad8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grinzayd^home.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +first 1 140 1.945910 1.945910 71 +learn 1 86 2.484907 2.484907 170 +html 1 75 2.639057 2.639057 235 +week 1 52 2.995732 2.995732 343 +express 1 32 3.465736 3.465736 540 +sciencecornel 1 22 3.850148 3.850148 768 +shop 1 10 4.653960 4.653960 1469 +bore 1 7 5.010635 5.010635 1948 +alex 1 6 5.164786 5.164786 2130 +grinzayd 2 1 6.957497 13.914994 10580 +homepagealex 1 1 6.957497 6.957497 10581 +grinzaydm 1 1 6.957497 6.957497 10582 +universitytel 1 1 6.957497 6.957497 10583 +necx 1 1 6.957497 6.957497 10584 +directinternet 1 1 6.957497 6.957497 10585 +networkcomput 1 1 6.957497 6.957497 10586 +damarkwarn 1 1 6.957497 6.957497 10587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html new file mode 100644 index 00000000..d30fc454 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^grzes^grzes.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +homepag 1 93 2.397895 2.397895 148 +second 1 81 2.484907 2.484907 166 +activ 1 84 2.484907 2.484907 182 +novemb 1 81 2.484907 2.484907 179 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +involv 1 71 2.639057 2.639057 227 +degre 1 69 2.708050 2.708050 259 +ithaca 1 65 2.772589 2.772589 294 +sever 1 56 2.890372 2.890372 322 +advisor 1 51 2.995732 2.995732 355 +york 1 41 3.218876 3.218876 435 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +split 1 14 4.317488 4.317488 1078 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +scienceat 1 11 4.553877 4.553877 1375 +charg 1 9 4.753590 4.753590 1582 +grzegorz 2 4 5.568345 11.136690 2923 +czajkowski 1 4 5.568345 5.568345 2924 +cuc 1 4 5.568345 5.568345 2630 +poland 1 3 5.857933 5.857933 3665 +grze 2 1 6.957497 13.914994 10588 +czajkowskidepart 1 1 6.957497 6.957497 10589 +krakow 1 1 6.957497 6.957497 10590 +administ 1 1 6.957497 6.957497 10591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ new file mode 100644 index 00000000..1988a931 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^halpern^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +also 2 259 1.386294 2.772588 28 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +paper 2 205 1.609438 3.218876 38 +fall 2 181 1.609438 3.218876 40 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +like 1 132 1.945910 1.945910 81 +number 1 130 2.079442 2.079442 97 +studi 1 120 2.079442 2.079442 91 +teach 2 108 2.197225 4.394450 112 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +check 1 115 2.197225 2.197225 118 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +activ 1 84 2.484907 2.484907 182 +resum 1 79 2.564949 2.564949 217 +upson 1 71 2.639057 2.639057 218 +knowledg 2 67 2.708050 5.416100 243 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +septemb 1 65 2.772589 2.772589 274 +best 1 59 2.833213 2.833213 299 +reason 3 57 2.890372 8.671116 318 +detail 2 57 2.890372 5.780744 321 +talk 2 53 2.944439 5.888878 336 +case 1 51 2.995732 2.995732 351 +give 2 50 3.044522 6.089044 359 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +describ 1 45 3.135494 3.135494 400 +continu 1 39 3.258097 3.258097 448 +probabl 1 40 3.258097 3.258097 455 +field 1 37 3.332205 3.332205 482 +game 1 36 3.367296 3.367296 498 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +scientist 1 31 3.496508 3.496508 560 +focus 1 29 3.583519 3.583519 584 +semant 1 29 3.583519 3.583519 587 +subject 1 26 3.688879 3.688879 647 +although 1 25 3.737670 3.737670 667 +princeton 1 15 4.248495 4.248495 1042 +econom 1 13 4.382027 4.382027 1184 +someon 1 13 4.382027 4.382027 1128 +mainli 1 10 4.653960 4.653960 1432 +sentenc 1 10 4.653960 4.653960 1413 +uncertainti 2 7 5.010635 10.021270 1882 +boundari 1 7 5.010635 5.010635 1929 +gave 1 7 5.010635 5.010635 1922 +philosoph 1 7 5.010635 5.010635 1904 +li 1 5 5.347108 5.347108 2500 +hallithaca 1 4 5.568345 5.568345 2894 +universitycomput 1 3 5.857933 5.857933 3651 +halpern 4 1 6.957497 27.829988 10592 +pagejoseph 1 1 6.957497 6.957497 10593 +professorcornel 1 1 6.957497 6.957497 10594 +economist 1 1 6.957497 6.957497 10595 +abouta 1 1 6.957497 6.957497 10596 +sequel 1 1 6.957497 6.957497 10597 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html new file mode 100644 index 00000000..bd284d0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hashi^hashi.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +cornel 1 215 1.386294 1.386294 23 +construct 1 139 1.945910 1.945910 82 +tsuneshi 2 1 6.957497 13.914994 10598 +hashimoto 2 1 6.957497 13.914994 10599 +hashimototsuneshi 1 1 6.957497 6.957497 10600 +hashimotothi 1 1 6.957497 6.957497 10601 +cstsuneshi 1 1 6.957497 6.957497 10602 +hashi 1 1 6.957497 6.957497 10603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html new file mode 100644 index 00000000..73438da6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hayden^hayden.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +develop 1 174 1.791759 1.791759 53 +teach 1 108 2.197225 2.197225 112 +commun 2 95 2.397895 4.795790 157 +novemb 1 81 2.484907 2.484907 179 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +unix 1 58 2.890372 2.890372 308 +mark 2 44 3.135494 6.270988 403 +proof 1 23 3.806662 3.806662 720 +horu 1 14 4.317488 4.317488 1116 +nuprl 1 10 4.653960 4.653960 1402 +hockei 1 8 4.875197 4.875197 1760 +hayden 3 4 5.568345 16.705035 2844 +tast 1 3 5.857933 5.857933 3666 +ensembl 1 2 6.263398 6.263398 4854 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html new file mode 100644 index 00000000..f6603e89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hejik^hejik.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +improv 1 62 2.772589 2.772589 289 +cyber 1 4 5.568345 5.568345 2909 +pond 1 2 6.263398 6.263398 5127 +heji 2 1 6.957497 13.914994 10604 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html new file mode 100644 index 00000000..9673b14c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^hu^hu.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +student 2 343 1.098612 2.197224 19 +last 1 314 1.098612 1.098612 14 +cornel 4 215 1.386294 5.545176 23 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +year 1 148 1.945910 1.945910 84 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +faculti 1 56 2.890372 2.890372 325 +undergradu 1 54 2.944439 2.944439 338 +advisor 1 51 2.995732 2.995732 355 +third 1 43 3.178054 3.178054 412 +china 1 37 3.332205 3.332205 487 +berkelei 1 26 3.688879 3.688879 657 +born 1 21 3.912023 3.912023 798 +thorsten 1 13 4.382027 4.382027 1133 +eicken 1 13 4.382027 4.382027 1134 +hallithaca 1 4 5.568345 5.568345 2894 +shanghai 1 4 5.568345 5.568345 2925 +universitydept 1 3 5.857933 5.857933 3602 +deyu 3 1 6.957497 20.872491 10606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html new file mode 100644 index 00000000..62fa0cb4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^huang^huang.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +cornel 6 215 1.386294 8.317764 23 +link 1 247 1.386294 1.386294 24 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +mathemat 1 108 2.197225 2.197225 123 +check 1 115 2.197225 2.197225 118 +imag 1 91 2.397895 2.397895 161 +center 1 88 2.397895 2.397895 158 +homepag 1 93 2.397895 2.397895 148 +academ 1 82 2.484907 2.484907 178 +learn 1 86 2.484907 2.484907 170 +resourc 1 81 2.484907 2.484907 172 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +video 1 44 3.135494 3.135494 405 +vision 2 41 3.218876 6.437752 430 +bibliographi 1 34 3.401197 3.401197 518 +chines 3 29 3.583519 10.750557 595 +retriev 1 27 3.637586 3.637586 621 +motion 1 24 3.761200 3.761200 699 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +annot 1 21 3.912023 3.912023 775 +beij 1 19 4.007333 4.007333 876 +track 1 15 4.248495 4.248495 1029 +tsinghua 1 13 4.382027 4.382027 1195 +huang 3 12 4.465908 13.397724 1202 +thedepart 1 11 4.553877 4.553877 1350 +scienceat 1 11 4.553877 4.553877 1375 +fellowship 2 10 4.653960 9.307920 1460 +christian 3 7 5.010635 15.031905 1949 +ramin 1 7 5.010635 5.010635 1820 +zabih 1 6 5.164786 5.164786 2138 +mission 1 5 5.347108 5.347108 2465 +jing 2 3 5.857933 11.715866 3521 +bachelorand 1 2 6.263398 6.263398 5128 +chinami 1 2 6.263398 6.263398 5129 +evangel 1 1 6.957497 6.957497 10605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html new file mode 100644 index 00000000..021ab2e3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^icchen^icchen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 2 443 0.693147 1.386294 6 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +address 2 170 1.791759 3.583518 62 +distribut 2 162 1.791759 3.583518 51 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +databas 1 122 2.079442 2.079442 86 +manag 1 114 2.197225 2.197225 125 +graphic 1 90 2.397895 2.397895 147 +resum 1 79 2.564949 2.564949 217 +practic 2 70 2.708050 5.416100 246 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +china 1 37 3.332205 3.332205 487 +chen 2 21 3.912023 7.824046 791 +taiwan 1 16 4.174387 4.174387 1006 +practicum 1 16 4.174387 4.174387 960 +mapl 1 11 4.553877 4.553877 1376 +perman 1 11 4.553877 4.553877 1372 +sung 1 6 5.164786 5.164786 2075 +chin 2 5 5.347108 10.694216 2408 +taipei 1 4 5.568345 5.568345 2926 +album 1 4 5.568345 5.568345 2888 +icchen 1 1 6.957497 6.957497 10607 +nctu 1 1 6.957497 6.957497 10608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html new file mode 100644 index 00000000..0fc1bd32 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^imalik^indira.html @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 3 297 1.098612 3.295836 20 +cours 1 273 1.098612 1.098612 15 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +network 1 168 1.791759 1.791759 61 +construct 1 139 1.945910 1.945910 82 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +advanc 1 99 2.302585 2.302585 130 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +post 1 35 3.401197 3.401197 505 +script 1 13 4.382027 4.382027 1171 +indira 1 3 5.857933 5.857933 3656 +malik 1 1 6.957497 6.957497 10609 +imalik 1 1 6.957497 6.957497 10610 +tap 1 1 6.957497 6.957497 10611 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html new file mode 100644 index 00000000..5241d4ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^indira^homepage.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 5 443 0.693147 3.465735 6 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 4 340 1.098612 4.394448 18 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +oper 3 180 1.609438 4.828314 34 +updat 1 191 1.609438 1.609438 41 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +graphic 3 90 2.397895 7.193685 147 +homepag 1 93 2.397895 2.397895 148 +java 3 70 2.708050 8.124150 248 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +polici 1 64 2.772589 2.772589 279 +colleg 1 61 2.833213 2.833213 300 +cool 1 49 3.044522 3.044522 374 +directori 1 45 3.135494 3.135494 396 +india 1 32 3.465736 3.465736 550 +independ 1 32 3.465736 3.465736 548 +taken 1 31 3.496508 3.496508 555 +anim 1 31 3.496508 3.496508 557 +sciencecornel 1 22 3.850148 3.850148 768 +indian 1 22 3.850148 3.850148 769 +applet 2 20 3.951244 7.902488 827 +toolkit 1 20 3.951244 3.951244 835 +practicum 1 16 4.174387 4.174387 960 +drive 1 15 4.248495 4.248495 1052 +camera 2 14 4.317488 8.634976 1115 +audio 1 14 4.317488 4.317488 1094 +galleri 1 13 4.382027 4.382027 1192 +magic 1 11 4.553877 4.553877 1358 +wood 1 11 4.553877 4.553877 1355 +perspect 1 10 4.653960 4.653960 1437 +hoca 1 5 5.347108 5.347108 2241 +hobb 1 4 5.568345 5.568345 2893 +nashvil 1 4 5.568345 5.568345 2867 +tennesse 1 4 5.568345 5.568345 2763 +indira 3 3 5.857933 17.573799 3656 +engineeringclass 1 3 5.857933 5.857933 3667 +recip 1 3 5.857933 5.857933 3668 +coimbator 1 2 6.263398 6.263398 5130 +cornelluniversityfal 1 2 6.263398 6.263398 5131 +cspracticum 1 2 6.263398 6.263398 5132 +carpet 1 2 6.263398 6.263398 5133 +colloqium 1 2 6.263398 6.263398 5134 +manageri 1 2 6.263398 6.263398 5135 +vidyaprakash 1 1 6.957497 6.957497 10612 +vidyaprakashmast 1 1 6.957497 6.957497 10613 +universitywelcom 1 1 6.957497 6.957497 10614 +financesumm 1 1 6.957497 6.957497 10615 +tracingin 1 1 6.957497 6.957497 10616 +perspectivetransform 1 1 6.957497 6.957497 10617 +myresumeclick 1 1 6.957497 6.957497 10618 +transformssom 1 1 6.957497 6.957497 10619 +sgamelan 1 1 6.957497 6.957497 10620 +calvinand 1 1 6.957497 6.957497 10621 +gif 1 1 6.957497 6.957497 10622 +chicker 1 1 6.957497 6.957497 10623 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html new file mode 100644 index 00000000..6262ddeb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ioi^home.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 3 329 1.098612 3.295836 16 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +cornel 2 215 1.386294 2.772588 23 +softwar 2 220 1.386294 2.772588 30 +includ 1 208 1.609438 1.609438 42 +base 2 165 1.791759 3.583518 50 +parallel 1 169 1.791759 1.791759 60 +file 2 132 1.945910 3.891820 70 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +code 1 108 2.197225 2.197225 116 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +user 1 104 2.302585 2.302585 137 +section 1 94 2.397895 2.397895 149 +environ 1 84 2.484907 2.484907 177 +start 1 83 2.484907 2.484907 173 +solut 1 82 2.484907 2.484907 162 +come 2 78 2.564949 5.129898 202 +good 1 77 2.564949 2.564949 200 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +homework 1 79 2.564949 2.564949 193 +write 3 72 2.639057 7.917171 222 +test 2 66 2.708050 5.416100 252 +knowledg 1 67 2.708050 2.708050 243 +prof 1 64 2.772589 2.772589 273 +descript 1 64 2.772589 2.772589 271 +virtual 1 62 2.772589 2.772589 285 +detail 2 57 2.890372 5.780744 321 +index 1 56 2.890372 2.890372 309 +instruct 1 53 2.944439 2.944439 332 +extens 1 53 2.944439 2.944439 340 +cover 1 55 2.944439 2.944439 329 +much 1 52 2.995732 2.995732 349 +week 1 52 2.995732 2.995732 343 +set 1 50 3.044522 3.044522 361 +right 1 48 3.044522 3.044522 363 +get 1 46 3.091042 3.091042 380 +directori 1 45 3.135494 3.135494 396 +video 1 44 3.135494 3.135494 405 +protocol 1 45 3.135494 3.135494 407 +http 1 41 3.218876 3.218876 420 +programm 1 39 3.258097 3.258097 445 +tutori 1 39 3.258097 3.258097 437 +brian 1 38 3.295837 3.295837 466 +prototyp 1 38 3.295837 3.295837 463 +short 2 36 3.367296 6.734592 499 +multi 1 36 3.367296 3.367296 493 +manual 3 35 3.401197 10.203591 504 +packag 2 28 3.610918 7.221836 614 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +smith 1 20 3.951244 3.951244 820 +mpeg 1 20 3.951244 3.951244 831 +media 1 19 4.007333 4.007333 861 +repositori 1 17 4.110874 4.110874 932 +doesn 1 15 4.248495 4.248495 1055 +remot 1 15 4.248495 4.248495 1041 +script 1 13 4.382027 4.382027 1171 +suit 1 13 4.382027 4.382027 1129 +realiti 1 12 4.465908 4.465908 1272 +guidelin 1 7 5.010635 5.010635 1832 +conferenc 1 7 5.010635 5.010635 1857 +put 1 6 5.164786 5.164786 2017 +valuabl 1 5 5.347108 5.347108 2256 +templat 1 5 5.347108 5.347108 2311 +spam 1 4 5.568345 5.568345 2927 +knowledgebas 1 2 6.263398 6.263398 5136 +pageioi 2 1 6.957497 13.914994 10624 +homeless 1 1 6.957497 6.957497 10625 +lamioi 1 1 6.957497 6.957497 10626 +multim 1 1 6.957497 6.957497 10627 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html new file mode 100644 index 00000000..bbef5d4a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jackson^jackson.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 4 215 1.386294 5.545176 23 +design 4 213 1.386294 5.545176 25 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +list 3 201 1.609438 4.828314 39 +includ 2 208 1.609438 3.218876 42 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +develop 4 174 1.791759 7.167036 53 +avail 4 169 1.791759 7.167036 48 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +relat 1 139 1.945910 1.945910 68 +postscript 3 131 2.079442 6.238326 90 +confer 2 126 2.079442 4.158884 100 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +introduct 1 126 2.079442 2.079442 87 +theori 4 111 2.197225 8.788900 127 +intern 1 108 2.197225 2.197225 128 +world 1 115 2.197225 2.197225 126 +make 1 111 2.197225 2.197225 120 +text 3 98 2.302585 6.907755 133 +access 2 102 2.302585 4.605170 136 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +proceed 2 93 2.397895 4.795790 152 +commun 1 95 2.397895 2.397895 157 +info 1 85 2.484907 2.484907 176 +environ 1 84 2.484907 2.484907 177 +wide 1 84 2.484907 2.484907 185 +method 1 80 2.564949 2.564949 213 +june 1 79 2.564949 2.564949 214 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +summari 1 73 2.639057 2.639057 237 +abstract 5 62 2.772589 13.862945 276 +ithaca 1 65 2.772589 2.772589 294 +collect 1 65 2.772589 2.772589 268 +type 1 61 2.833213 2.833213 296 +thesi 3 57 2.890372 8.671116 327 +explor 1 58 2.890372 2.890372 324 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +hardwar 2 51 2.995732 5.991464 350 +format 4 48 3.044522 12.178088 356 +basic 1 50 3.044522 3.044522 360 +still 1 50 3.044522 3.044522 362 +algebra 3 45 3.135494 9.406482 394 +editor 2 41 3.218876 6.437752 433 +http 1 41 3.218876 3.218876 420 +live 1 40 3.258097 3.258097 451 +paul 5 38 3.295837 16.479185 471 +correct 1 38 3.295837 3.295837 462 +formal 1 37 3.332205 3.332205 478 +next 2 34 3.401197 6.802394 517 +post 1 35 3.401197 3.401197 505 +full 3 28 3.610918 10.832754 615 +load 1 28 3.610918 3.610918 601 +enhanc 1 26 3.688879 3.688879 644 +session 1 26 3.688879 3.688879 643 +doctor 1 24 3.761200 3.761200 709 +sometim 1 24 3.761200 3.761200 696 +proof 3 23 3.806662 11.419986 720 +methodolog 1 23 3.806662 3.806662 733 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +dai 1 22 3.850148 3.850148 753 +theorem 1 21 3.912023 3.912023 786 +synthesi 1 20 3.951244 3.951244 834 +toolkit 1 20 3.951244 3.951244 835 +prove 1 19 4.007333 4.007333 848 +north 1 19 4.007333 4.007333 873 +els 1 19 4.007333 4.007333 843 +hypertext 1 19 4.007333 4.007333 865 +definit 1 19 4.007333 4.007333 864 +coupl 1 17 4.110874 4.110874 939 +month 1 15 4.248495 4.248495 1025 +circuit 2 13 4.382027 8.764054 1131 +someon 1 13 4.382027 4.382027 1128 +moment 1 11 4.553877 4.553877 1379 +nuprl 6 10 4.653960 27.923760 1402 +usaphon 1 9 4.753590 4.753590 1600 +entitl 1 9 4.753590 4.753590 1490 +inter 1 9 4.753590 4.753590 1530 +float 1 9 4.753590 4.753590 1504 +prover 1 8 4.875197 4.875197 1653 +attent 1 8 4.875197 4.875197 1651 +ifip 1 5 5.347108 5.347108 2459 +holland 1 5 5.347108 5.347108 2490 +jackson 10 3 5.857933 58.579330 3586 +pagepaul 1 3 5.857933 5.857933 3669 +bout 1 3 5.857933 5.857933 3670 +elsevi 1 3 5.857933 5.857933 3671 +pai 1 3 5.857933 5.857933 3672 +shouldb 1 3 5.857933 5.857933 3673 +associatecornel 1 2 6.263398 6.263398 5137 +eduwww 1 2 6.263398 6.263398 5138 +linkag 1 2 6.263398 6.263398 5139 +thenuprl 1 2 6.263398 6.263398 5047 +workon 1 2 6.263398 6.263398 4280 +htmladdress 1 1 6.957497 6.957497 10628 +intereststheorem 1 1 6.957497 6.957497 10629 +andhardwar 1 1 6.957497 6.957497 10630 +informationmi 1 1 6.957497 6.957497 10631 +developmentsystem 1 1 6.957497 6.957497 10632 +bundi 1 1 6.957497 6.957497 10633 +automateddeduct 1 1 6.957497 6.957497 10634 +artif 1 1 6.957497 6.957497 10635 +stavrid 1 1 6.957497 6.957497 10636 +melham 1 1 6.957497 6.957497 10637 +transactionsa 1 1 6.957497 6.957497 10638 +theadvanc 1 1 6.957497 6.957497 10639 +nuprlth 1 1 6.957497 6.957497 10640 +getround 1 1 6.957497 6.957497 10641 +thetheori 1 1 6.957497 6.957497 10642 +foreach 1 1 6.957497 6.957497 10643 +andtheorem 1 1 6.957497 6.957497 10644 +thepolynomi 1 1 6.957497 6.957497 10645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html new file mode 100644 index 00000000..25bda0fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jamjoom^index.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +hani 1 2 6.263398 6.263398 5140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html new file mode 100644 index 00000000..82b68d1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janosi^home.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +cornel 2 215 1.386294 2.772588 23 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +perman 1 11 4.553877 4.553877 1372 +usaoffic 1 6 5.164786 5.164786 2159 +janosi 1 3 5.857933 5.857933 3149 +mywww 1 2 6.263398 6.263398 5113 +tibor 1 1 6.957497 6.957497 10646 +jnositibor 1 1 6.957497 6.957497 10647 +jnosiwelcom 1 1 6.957497 6.957497 10648 +constructionoffic 1 1 6.957497 6.957497 10649 +zenotibor 1 1 6.957497 6.957497 10650 +jnosi 1 1 6.957497 6.957497 10651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html new file mode 100644 index 00000000..fec650a6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^janwun^janwun.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 1 215 1.386294 1.386294 23 +address 1 170 1.791759 1.791759 62 +master 1 76 2.564949 2.564949 216 +ithaca 1 65 2.772589 2.772589 294 +telephon 1 50 3.044522 3.044522 373 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +janwun 3 1 6.957497 20.872491 10652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html new file mode 100644 index 00000000..36b8ede4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jgm^home.html @@ -0,0 +1,184 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 13 374 0.693147 9.010911 7 +research 9 431 0.693147 6.238323 10 +system 5 443 0.693147 3.465735 6 +interest 3 384 0.693147 2.079441 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +us 3 329 1.098612 3.295836 16 +offic 2 299 1.098612 2.197224 13 +time 2 293 1.098612 2.197224 17 +project 2 340 1.098612 2.197224 18 +languag 16 227 1.386294 22.180704 26 +also 4 259 1.386294 5.545176 28 +cornel 3 215 1.386294 4.158882 23 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +implement 3 152 1.791759 5.375277 52 +phone 2 175 1.791759 3.583518 45 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +construct 2 139 1.945910 3.891820 82 +support 2 132 1.945910 3.891820 83 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +compil 6 122 2.079442 12.476652 96 +high 3 130 2.079442 6.238326 101 +report 3 131 2.079442 6.238326 92 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +version 4 113 2.197225 8.788900 122 +code 2 108 2.197225 4.394450 116 +manag 2 114 2.197225 4.394450 125 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +technic 4 100 2.302585 9.210340 140 +memori 3 101 2.302585 6.907755 139 +advanc 1 99 2.302585 2.302585 130 +proceed 2 93 2.397895 4.795790 152 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +level 3 87 2.484907 7.454721 180 +resourc 1 81 2.484907 2.484907 172 +member 1 84 2.484907 2.484907 165 +appear 3 78 2.564949 7.694847 210 +decemb 2 80 2.564949 5.129898 215 +june 2 79 2.564949 5.129898 214 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +state 1 76 2.564949 2.564949 207 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +symposium 3 72 2.639057 7.917171 238 +workshop 2 71 2.639057 5.278114 239 +upson 1 71 2.639057 2.639057 218 +line 1 75 2.639057 2.639057 231 +practic 1 70 2.708050 2.708050 246 +ithaca 2 65 2.772589 5.545178 294 +abstract 2 62 2.772589 5.545178 276 +evalu 1 64 2.772589 2.772589 266 +septemb 1 65 2.772589 2.772589 274 +function 1 62 2.772589 2.772589 275 +januari 1 62 2.772589 2.772589 264 +type 5 61 2.833213 14.166065 296 +content 1 59 2.833213 2.833213 302 +juli 1 60 2.833213 2.833213 305 +publish 3 57 2.890372 8.671116 326 +direct 2 57 2.890372 5.780744 316 +faculti 1 56 2.890372 2.890372 325 +thesi 1 57 2.890372 2.890372 327 +tabl 1 51 2.995732 2.995732 346 +standard 5 48 3.044522 15.222610 365 +principl 3 48 3.044522 9.133566 357 +mark 1 44 3.135494 3.135494 403 +late 1 40 3.258097 3.258097 439 +annual 1 40 3.258097 3.258097 458 +bibliographi 1 34 3.401197 3.401197 518 +extend 2 32 3.465736 6.931472 539 +ad 1 32 3.465736 3.465736 544 +robert 4 30 3.555348 14.221392 567 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +semant 2 29 3.583519 7.167038 587 +focus 1 29 3.583519 3.583519 584 +platform 1 29 3.583519 3.583519 591 +proc 2 26 3.688879 7.377758 649 +primari 1 25 3.737670 3.737670 669 +greg 7 24 3.761200 26.328400 695 +interpret 1 24 3.761200 3.761200 686 +store 1 24 3.761200 3.761200 693 +thread 1 23 3.806662 3.806662 722 +cooper 1 22 3.850148 3.850148 757 +portabl 2 20 3.951244 7.902488 819 +andrew 2 19 4.007333 8.014666 849 +particularli 1 19 4.007333 4.007333 867 +eric 1 19 4.007333 4.007333 870 +concentr 1 18 4.060443 4.060443 906 +less 1 18 4.060443 4.060443 892 +partial 1 18 4.060443 4.060443 900 +fourth 1 16 4.174387 4.174387 999 +diego 1 16 4.174387 4.174387 992 +princeton 1 15 4.248495 4.248495 1042 +francisco 1 14 4.317488 4.317488 1095 +sigplan 3 13 4.382027 13.146081 1190 +conf 1 13 4.382027 4.382027 1181 +mellon 1 13 4.382027 4.382027 1179 +onth 1 12 4.465908 4.465908 1218 +carnegi 1 12 4.465908 4.465908 1260 +faster 1 11 4.553877 4.553877 1323 +refin 1 11 4.553877 4.553877 1363 +road 1 11 4.553877 4.553877 1374 +cheng 2 10 4.653960 9.307920 1381 +interestsmi 1 10 4.653960 4.653960 1462 +operatingsystem 1 10 4.653960 4.653960 1401 +bring 1 10 4.653960 4.653960 1430 +jersei 2 9 4.753590 9.507180 1587 +lock 1 9 4.753590 4.753590 1551 +closur 1 8 4.875197 4.875197 1643 +convers 1 8 4.875197 4.875197 1673 +andcomput 1 8 4.875197 4.875197 1623 +leon 1 8 4.875197 4.875197 1631 +hack 1 7 5.010635 5.010635 1950 +bit 1 7 5.010635 5.010635 1833 +gzip 1 6 5.164786 5.164786 2117 +morrisett 14 5 5.347108 74.859512 2263 +interestedin 1 5 5.347108 5.347108 2260 +consum 1 5 5.347108 5.347108 2334 +optimist 1 5 5.347108 5.347108 2501 +gregori 4 4 5.568345 22.273380 2928 +polymorph 2 4 5.568345 11.136690 2627 +kept 1 4 5.568345 5.568345 2762 +stone 2 3 5.857933 11.715866 3674 +informationresearch 1 3 5.857933 5.857933 3675 +teachingc 1 3 5.857933 5.857933 3614 +denmark 1 3 5.857933 5.857933 3676 +warren 1 3 5.857933 5.857933 3301 +harper 6 2 6.263398 37.580388 5141 +multiprocess 2 2 6.263398 12.526796 5142 +intereststeachingselect 1 2 6.263398 6.263398 4924 +linksperson 1 2 6.263398 6.263398 5143 +herlihi 1 2 6.263398 6.263398 5144 +copenhagen 1 2 6.263398 6.263398 5145 +tarditi 2 1 6.957497 13.914994 10653 +tolmach 2 1 6.957497 13.914994 10654 +papersrel 1 1 6.957497 6.957497 10655 +ofadvanc 1 1 6.957497 6.957497 10656 +forbuild 1 1 6.957497 6.957497 10657 +safelanguag 1 1 6.957497 6.957497 10658 +toolsfrom 1 1 6.957497 6.957497 10659 +systemssoftwar 1 1 6.957497 6.957497 10660 +paperssemant 1 1 6.957497 6.957497 10661 +safetythrough 1 1 6.957497 6.957497 10662 +yasuhiko 1 1 6.957497 6.957497 10663 +minamid 1 1 6.957497 6.957497 10664 +matthia 1 1 6.957497 6.957497 10665 +felleisen 1 1 6.957497 6.957497 10666 +reportcmu 1 1 6.957497 6.957497 10667 +notecmu 1 1 6.957497 6.957497 10668 +intensionaltyp 1 1 6.957497 6.957497 10669 +parallelizationgreg 1 1 6.957497 6.957497 10670 +mauric 1 1 6.957497 6.957497 10671 +scienceperson 1 1 6.957497 6.957497 10672 +informationhom 1 1 6.957497 6.957497 10673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html new file mode 100644 index 00000000..09419962 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhlin^index.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +address 2 170 1.791759 3.583518 62 +resum 1 79 2.564949 2.564949 217 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +taiwan 1 16 4.174387 4.174387 1006 +avenu 1 12 4.465908 4.465908 1277 +mapl 1 11 4.553877 4.553877 1376 +perman 1 11 4.553877 4.553877 1372 +taipei 1 4 5.568345 5.568345 2926 +shing 1 2 6.263398 6.263398 5146 +jiun 2 1 6.957497 13.914994 10674 +jhlin 1 1 6.957497 6.957497 10675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ new file mode 100644 index 00000000..c7f78a97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jhsu^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +homepag 1 93 2.397895 2.397895 148 +jerri 2 3 5.857933 11.715866 3445 +edujerri 1 1 6.957497 6.957497 10676 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html new file mode 100644 index 00000000..ade417b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jiawang^jiawang.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 8 571 0.000000 0.000000 5 +scienc 7 640 0.000000 0.000000 4 +comput 5 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 3 343 1.098612 3.295836 19 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +cornel 5 215 1.386294 6.931470 23 +graduat 4 215 1.386294 5.545176 31 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +phone 2 175 1.791759 3.583518 45 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +mathemat 2 108 2.197225 4.394450 123 +site 2 106 2.197225 4.394450 119 +homepag 2 93 2.397895 4.795790 148 +pictur 1 89 2.397895 2.397895 160 +internet 2 83 2.484907 4.969814 186 +educ 1 86 2.484907 2.484907 191 +school 1 84 2.484907 2.484907 188 +ieee 1 86 2.484907 2.484907 190 +state 1 76 2.564949 2.564949 207 +nation 2 74 2.639057 5.278114 240 +servic 2 72 2.639057 5.278114 236 +upson 1 71 2.639057 2.639057 218 +degre 1 69 2.708050 2.708050 259 +window 1 68 2.708050 2.708050 242 +new 3 64 2.772589 8.317767 262 +foundat 1 62 2.772589 2.772589 286 +guid 1 63 2.772589 2.772589 267 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +cool 1 49 3.044522 3.044522 374 +directori 1 45 3.135494 3.135494 396 +music 2 42 3.218876 6.437752 436 +york 1 41 3.218876 3.218876 435 +review 1 42 3.218876 3.218876 425 +futur 1 41 3.218876 3.218876 427 +societi 1 40 3.258097 3.258097 456 +china 12 37 3.332205 39.986460 487 +award 1 34 3.401197 3.401197 523 +chines 9 29 3.583519 32.251671 595 +art 1 29 3.583519 3.583519 593 +weather 1 28 3.610918 3.610918 618 +magazin 2 24 3.761200 7.522400 704 +daili 1 24 3.761200 3.761200 706 +yahoo 1 24 3.761200 3.761200 707 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +wang 1 21 3.912023 3.912023 790 +beij 1 19 4.007333 4.007333 876 +histori 1 19 4.007333 4.007333 853 +taiwan 3 16 4.174387 12.523161 1006 +transfer 1 16 4.174387 4.174387 967 +rank 3 14 4.317488 12.952464 1063 +incomput 1 14 4.317488 4.317488 1096 +entertain 1 12 4.465908 4.465908 1286 +tour 1 11 4.553877 4.553877 1307 +america 1 11 4.553877 4.553877 1370 +fellowship 2 10 4.653960 9.307920 1460 +sister 2 9 4.753590 9.507180 1524 +film 1 8 4.875197 4.875197 1761 +tourist 1 8 4.875197 4.875197 1710 +digest 2 7 5.010635 10.021270 1864 +cultur 1 7 5.010635 5.010635 1951 +monei 1 7 5.010635 5.010635 1934 +peterson 1 7 5.010635 5.010635 1850 +christian 1 7 5.010635 5.010635 1949 +scholar 1 6 5.164786 5.164786 2180 +forum 1 6 5.164786 5.164786 2027 +postcard 1 6 5.164786 5.164786 2181 +brook 1 6 5.164786 5.164786 2152 +suni 2 5 5.347108 10.694216 2452 +ucla 1 5 5.347108 5.347108 2502 +hallithaca 1 4 5.568345 5.568345 2894 +binghamton 3 3 5.857933 17.573799 3544 +twin 2 3 5.857933 11.715866 3657 +hongkong 1 3 5.857933 5.857933 3677 +stamp 1 3 5.857933 5.857933 3678 +ryan 1 3 5.857933 5.857933 3679 +tian 1 3 5.857933 5.857933 3680 +stoni 1 3 5.857933 5.857933 3571 +nankai 1 2 6.263398 6.263398 5147 +tianjin 1 2 6.263398 6.263398 5148 +barri 1 2 6.263398 6.263398 5149 +sciencefound 1 2 6.263398 6.263398 5150 +chinaand 1 2 6.263398 6.263398 5151 +sceneri 1 2 6.263398 6.263398 5152 +sheng 1 2 6.263398 6.263398 5153 +liber 1 2 6.263398 6.263398 5154 +wangphd 1 1 6.957497 6.957497 10677 +jiawang 1 1 6.957497 6.957497 10678 +goldwat 1 1 6.957497 6.957497 10679 +cbnet 1 1 6.957497 6.957497 10680 +chinanet 1 1 6.957497 6.957497 10681 +chinesecalendar 1 1 6.957497 6.957497 10682 +mediainform 1 1 6.957497 6.957497 10683 +hongkonglaserdisccent 1 1 6.957497 6.957497 10684 +internetdistribut 1 1 6.957497 6.957497 10685 +multilingu 1 1 6.957497 6.957497 10686 +smovieplex 1 1 6.957497 6.957497 10687 +diwww 1 1 6.957497 6.957497 10688 +thesenior 1 1 6.957497 6.957497 10689 +worldmap 1 1 6.957497 6.957497 10690 +mandarin 1 1 6.957497 6.957497 10691 +cssa 1 1 6.957497 6.957497 10692 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html new file mode 100644 index 00000000..027920af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmiller^jmiller.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +process 1 142 1.945910 1.945910 72 +machin 1 129 2.079442 2.079442 95 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +mani 2 92 2.397895 4.795790 150 +imag 1 91 2.397895 2.397895 161 +level 1 87 2.484907 2.484907 180 +info 1 85 2.484907 2.484907 176 +master 1 76 2.564949 2.564949 216 +prof 1 64 2.772589 2.772589 273 +colleg 1 61 2.833213 2.833213 300 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +long 1 43 3.178054 3.178054 413 +vision 3 41 3.218876 9.656628 430 +robot 1 36 3.367296 3.367296 497 +primari 1 25 3.737670 3.737670 669 +particularli 1 19 4.007333 4.007333 867 +miller 2 17 4.110874 8.221748 949 +night 1 11 4.553877 4.553877 1319 +justin 2 7 5.010635 10.021270 1789 +uniform 1 7 5.010635 5.010635 1845 +ramin 1 7 5.010635 5.010635 1820 +zabih 1 6 5.164786 5.164786 2138 +csrvl 1 3 5.857933 5.857933 3543 +navi 1 2 6.263398 6.263398 5155 +com 1 2 6.263398 6.263398 5156 +ofengin 1 1 6.957497 6.957497 10693 +assistantwork 1 1 6.957497 6.957497 10694 +ismachin 1 1 6.957497 6.957497 10695 +informationsom 1 1 6.957497 6.957497 10696 +rant 1 1 6.957497 6.957497 10697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html new file mode 100644 index 00000000..0c202260 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jmm^jmm.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +softwar 3 220 1.386294 4.158882 30 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +parallel 2 169 1.791759 3.583518 60 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +high 3 130 2.079442 6.238326 101 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +school 2 84 2.484907 4.969814 188 +west 1 83 2.484907 2.484907 192 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +free 1 73 2.639057 2.639057 224 +receiv 1 66 2.708050 2.708050 244 +practic 1 70 2.708050 2.708050 246 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +new 4 64 2.772589 11.090356 262 +creat 1 63 2.772589 2.772589 277 +street 1 63 2.772589 2.772589 293 +januari 1 62 2.772589 2.772589 264 +cool 2 49 3.044522 6.089044 374 +york 2 41 3.218876 6.437752 435 +press 2 42 3.218876 6.437752 419 +seminar 1 38 3.295837 3.295837 470 +formal 1 37 3.332205 3.332205 478 +game 2 36 3.367296 6.734592 498 +product 1 33 3.433987 3.433987 527 +campu 1 27 3.637586 3.637586 623 +jeff 1 25 3.737670 3.737670 673 +daili 1 24 3.761200 3.761200 706 +divis 1 21 3.912023 3.912023 803 +mpeg 2 20 3.951244 7.902488 831 +anyon 1 17 4.110874 4.110874 916 +intel 1 16 4.174387 4.174387 1000 +practicum 1 16 4.174387 4.174387 960 +jose 1 16 4.174387 4.174387 976 +francisco 1 14 4.317488 4.317488 1095 +went 1 12 4.465908 4.465908 1279 +entertain 1 12 4.465908 4.465908 1286 +newspap 1 12 4.465908 4.465908 1280 +systemsc 1 11 4.553877 4.553877 1293 +mapl 1 11 4.553877 4.553877 1376 +purdu 1 10 4.653960 4.653960 1466 +sundai 1 10 4.653960 4.653960 1387 +leader 1 9 4.753590 4.753590 1576 +sister 1 9 4.753590 4.753590 1524 +portland 1 7 5.010635 5.010635 1878 +chronicl 1 7 5.010635 5.010635 1952 +indiana 1 6 5.164786 5.164786 2057 +oregon 1 5 5.347108 5.347108 2437 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +encod 2 4 5.568345 11.136690 2929 +dalla 1 4 5.568345 5.568345 2930 +classesc 1 3 5.857933 5.857933 3681 +detroit 1 3 5.857933 5.857933 3565 +counti 1 3 5.857933 5.857933 3682 +cornellopoli 1 2 6.263398 6.263398 5157 +techniquec 1 2 6.263398 6.263398 5158 +methodsc 1 2 6.263398 6.263398 5159 +colloquiumc 1 2 6.263398 6.263398 5160 +magazinepc 1 2 6.263398 6.263398 5161 +morn 1 2 6.263398 6.263398 5162 +orang 1 2 6.263398 6.263398 5163 +herald 1 2 6.263398 6.263398 4789 +hillsboro 2 1 6.957497 13.914994 10698 +moorejeff 1 1 6.957497 6.957497 10699 +moorewel 1 1 6.957497 6.957497 10700 +mastersof 1 1 6.957497 6.957497 10701 +lafayett 1 1 6.957497 6.957497 10702 +suburb 1 1 6.957497 6.957497 10703 +employmentmi 1 1 6.957497 6.957497 10704 +classesnba 1 1 6.957497 6.957497 10705 +sectorc 1 1 6.957497 6.957497 10706 +researchfal 1 1 6.957497 6.957497 10707 +paperc 1 1 6.957497 6.957497 10708 +opendoc 1 1 6.957497 6.957497 10709 +mfcoptim 1 1 6.957497 6.957497 10710 +researchsoftwar 1 1 6.957497 6.957497 10711 +companiesmicrosoftnetscapelotusnovellwordperfectcoreloracleappleibmhardwar 1 1 6.957497 6.957497 10712 +companiesintelsilicon 1 1 6.957497 6.957497 10713 +graphicsibmsunapplemagazinespc 1 1 6.957497 6.957497 10714 +weekpc 1 1 6.957497 6.957497 10715 +computingcomput 1 1 6.957497 6.957497 10716 +shopperwindow 1 1 6.957497 6.957497 10717 +sourcescomput 1 1 6.957497 6.957497 10718 +lifemacusermacweekinteract 1 1 6.957497 6.957497 10719 +weekfamili 1 1 6.957497 6.957497 10720 +pccomput 1 1 6.957497 6.957497 10721 +worldelectron 1 1 6.957497 6.957497 10722 +newspapersusa 1 1 6.957497 6.957497 10723 +todaywal 1 1 6.957497 6.957497 10724 +journalnew 1 1 6.957497 6.957497 10725 +timesphiladelphia 1 1 6.957497 6.957497 10726 +onlineth 1 1 6.957497 6.957497 10727 +worldwideth 1 1 6.957497 6.957497 10728 +opinionsth 1 1 6.957497 6.957497 10729 +gopherth 1 1 6.957497 6.957497 10730 +knoxvil 1 1 6.957497 6.957497 10731 +sentinelth 1 1 6.957497 6.957497 10732 +onlinelat 1 1 6.957497 6.957497 10733 +serviceth 1 1 6.957497 6.957497 10734 +nugget 1 1 6.957497 6.957497 10735 +oregonrworld 1 1 6.957497 6.957497 10736 +registerth 1 1 6.957497 6.957497 10737 +examinersan 1 1 6.957497 6.957497 10738 +mercuryth 1 1 6.957497 6.957497 10739 +timesnando 1 1 6.957497 6.957497 10740 +netusa 1 1 6.957497 6.957497 10741 +todayboston 1 1 6.957497 6.957497 10742 +globeportland 1 1 6.957497 6.957497 10743 +telegramvisitor 1 1 6.957497 6.957497 10744 +fdithaca 1 1 6.957497 6.957497 10745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html new file mode 100644 index 00000000..c551991e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^joseluis^joseluis.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +project 7 340 1.098612 7.690284 18 +us 2 329 1.098612 2.197224 16 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +distribut 2 162 1.791759 3.583518 51 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +click 1 142 1.945910 1.945910 78 +report 1 131 2.079442 2.079442 92 +number 1 130 2.079442 2.079442 97 +code 1 108 2.197225 2.197225 116 +text 1 98 2.302585 2.302585 133 +memori 1 101 2.302585 2.302585 139 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +resum 1 79 2.564949 2.564949 217 +exampl 1 77 2.564949 2.564949 195 +java 3 70 2.708050 8.124150 248 +would 2 67 2.708050 5.416100 251 +view 2 70 2.708050 5.416100 254 +multimedia 1 68 2.708050 2.708050 258 +simul 1 66 2.708050 2.708050 255 +virtual 1 62 2.772589 2.772589 285 +browser 3 56 2.890372 8.671116 313 +direct 1 57 2.890372 2.890372 316 +friend 1 48 3.044522 3.044522 376 +visitor 1 49 3.044522 3.044522 371 +video 2 44 3.135494 6.270988 405 +better 1 45 3.135494 3.135494 401 +movi 1 40 3.258097 3.258097 459 +vita 1 38 3.295837 3.295837 473 +game 1 36 3.367296 3.367296 498 +curriculum 1 33 3.433987 3.433987 535 +anim 2 31 3.496508 6.993016 557 +enabl 3 26 3.688879 11.066637 655 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +jose 1 16 4.174387 4.174387 976 +sign 1 16 4.174387 4.174387 970 +hobbi 1 16 4.174387 4.174387 1009 +transit 1 15 4.248495 4.248495 1046 +avenu 1 12 4.465908 4.465908 1277 +meng 1 12 4.465908 4.465908 1214 +clock 2 11 4.553877 9.107754 1320 +mapl 1 11 4.553877 4.553877 1376 +bill 1 11 4.553877 4.553877 1297 +rivl 1 8 4.875197 4.875197 1632 +autonom 1 8 4.875197 4.875197 1749 +vehicl 1 7 5.010635 5.010635 1928 +courtesi 1 7 5.010635 5.010635 1953 +photographi 1 6 5.164786 5.164786 2146 +recruit 1 6 5.164786 5.164786 2145 +hoca 1 5 5.347108 5.347108 2241 +multitask 1 4 5.568345 5.568345 2803 +crazi 1 4 5.568345 5.568345 2822 +fernandez 1 3 5.857933 5.857933 3591 +lui 3 2 6.263398 18.790194 5164 +joselui 1 2 6.263398 6.263398 4965 +pyramania 1 2 6.263398 6.263398 4957 +actor 1 2 6.263398 6.263398 4240 +pagejos 1 1 6.957497 6.957497 10746 +fernandezjos 1 1 6.957497 6.957497 10747 +fernandezmast 1 1 6.957497 6.957497 10748 +ebithaca 1 1 6.957497 6.957497 10749 +scroll 1 1 6.957497 6.957497 10750 +presentationc 1 1 6.957497 6.957497 10751 +spaceship 1 1 6.957497 6.957497 10752 +battl 1 1 6.957497 6.957497 10753 +picturesmusiccomputerswrit 1 1 6.957497 6.957497 10754 +giel 1 1 6.957497 6.957497 10755 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html new file mode 100644 index 00000000..ca40705a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jrdiazh^jrdiazh.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +distribut 1 162 1.791759 1.791759 51 +click 1 142 1.945910 1.945910 78 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +simul 1 66 2.708050 2.708050 255 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +administr 1 27 3.637586 3.637586 628 +busi 1 21 3.912023 3.912023 784 +johnson 1 13 4.382027 4.382027 1162 +linda 1 10 4.653960 4.653960 1394 +autonom 1 8 4.875197 4.875197 1749 +vehicl 1 7 5.010635 5.010635 1928 +hurtado 2 1 6.957497 13.914994 10756 +julin 1 1 6.957497 6.957497 10757 +pagejulin 1 1 6.957497 6.957497 10758 +universitymast 1 1 6.957497 6.957497 10759 +managementmast 1 1 6.957497 6.957497 10760 +science 1 1 6.957497 6.957497 10761 +colombia 1 1 6.957497 6.957497 10762 +er 1 1 6.957497 6.957497 10763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html new file mode 100644 index 00000000..9c12a813 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jreich^jreich.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +group 1 183 1.609438 1.609438 36 +texa 2 160 1.791759 3.583518 64 +welcom 1 122 2.079442 2.079442 99 +send 1 114 2.197225 2.197225 109 +thing 1 84 2.484907 2.484907 189 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +august 1 66 2.708050 2.708050 257 +septemb 1 65 2.772589 2.772589 274 +favorit 1 44 3.135494 3.135494 410 +join 1 39 3.258097 3.258097 457 +ad 1 32 3.465736 3.465736 544 +instrument 1 7 5.010635 5.010635 1954 +edumi 1 6 5.164786 5.164786 2132 +dalla 1 4 5.568345 5.568345 2930 +janeen 1 1 6.957497 6.957497 10764 +homepagejaneen 1 1 6.957497 6.957497 10765 +reich 1 1 6.957497 6.957497 10766 +jreich 1 1 6.957497 6.957497 10767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm new file mode 100644 index 00000000..a47bb1f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jshapiro^jshapiro.htm @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +home 5 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +engin 3 297 1.098612 3.295836 20 +cours 2 273 1.098612 2.197224 15 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +list 4 201 1.609438 6.437752 39 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +high 2 130 2.079442 4.158884 101 +machin 1 129 2.079442 2.079442 95 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +master 2 76 2.564949 5.129898 216 +come 2 78 2.564949 5.129898 202 +resum 1 79 2.564949 2.564949 217 +dynam 1 76 2.564949 2.564949 194 +receiv 1 66 2.708050 2.708050 244 +multimedia 1 68 2.708050 2.708050 258 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +septemb 2 65 2.772589 5.545178 274 +januari 1 62 2.772589 2.772589 264 +visit 1 63 2.772589 2.772589 288 +still 1 50 3.044522 3.044522 362 +get 1 46 3.091042 3.091042 380 +cost 1 37 3.332205 3.332205 480 +synchron 1 29 3.583519 3.583519 588 +particip 1 29 3.583519 3.583519 589 +although 1 25 3.737670 3.737670 667 +recognit 1 23 3.806662 3.806662 723 +chip 1 21 3.912023 3.912023 770 +portabl 1 20 3.951244 3.951244 819 +speed 1 18 4.060443 4.060443 911 +stock 2 16 4.174387 8.348774 1007 +massachusett 1 14 4.317488 4.317488 1118 +bodi 3 13 4.382027 13.146081 1178 +meng 1 12 4.465908 4.465908 1214 +speech 1 12 4.465908 4.465908 1222 +grant 1 12 4.465908 4.465908 1216 +systemsc 1 11 4.553877 4.553877 1293 +desktop 1 10 4.653960 4.653960 1445 +capac 1 8 4.875197 4.875197 1740 +filter 1 8 4.875197 4.875197 1641 +mile 1 8 4.875197 4.875197 1743 +ground 1 7 5.010635 5.010635 1955 +facial 2 5 5.347108 10.694216 2438 +amherst 1 5 5.347108 5.347108 2484 +thrive 1 5 5.347108 5.347108 2257 +stage 1 5 5.347108 5.347108 2488 +steer 1 5 5.347108 5.347108 2328 +car 2 4 5.568345 11.136690 2931 +ford 2 4 5.568345 11.136690 2636 +sold 1 4 5.568345 5.568345 2813 +exhaust 1 4 5.568345 5.568345 2825 +gear 1 4 5.568345 5.568345 2891 +visionc 1 3 5.857933 5.857933 3489 +obvious 1 3 5.857933 5.857933 3474 +memberof 1 3 5.857933 5.857933 3169 +bought 1 2 6.263398 6.263398 5165 +accel 1 2 6.263398 6.263398 5166 +plug 1 2 6.263398 6.263398 5167 +camaro 3 1 6.957497 20.872491 10768 +chevi 3 1 6.957497 20.872491 10769 +jodi 1 1 6.957497 6.957497 10770 +shapirojodi 1 1 6.957497 6.957497 10771 +shapiroeduc 1 1 6.957497 6.957497 10772 +engineeringe 1 1 6.957497 6.957497 10773 +telecommunicationc 1 1 6.957497 6.957497 10774 +researchspr 1 1 6.957497 6.957497 10775 +systemse 1 1 6.957497 6.957497 10776 +networksnba 1 1 6.957497 6.957497 10777 +revolutionc 1 1 6.957497 6.957497 10778 +researchma 1 1 6.957497 6.957497 10779 +automot 1 1 6.957497 6.957497 10780 +engineeringinterest 1 1 6.957497 6.957497 10781 +animationlow 1 1 6.957497 6.957497 10782 +videoconferenc 1 1 6.957497 6.957497 10783 +recognitioninterest 1 1 6.957497 6.957497 10784 +firebird 1 1 6.957497 6.957497 10785 +yourselfelectron 1 1 6.957497 6.957497 10786 +fuel 1 1 6.957497 6.957497 10787 +inject 1 1 6.957497 6.957497 10788 +alwayshav 1 1 6.957497 6.957497 10789 +designingan 1 1 6.957497 6.957497 10790 +pageefi 1 1 6.957497 6.957497 10791 +pagethes 1 1 6.957497 6.957497 10792 +gearsmodif 1 1 6.957497 6.957497 10793 +hypertech 1 1 6.957497 6.957497 10794 +flowmast 1 1 6.957497 6.957497 10795 +hurst 1 1 6.957497 6.957497 10796 +shifter 1 1 6.957497 6.957497 10797 +wheel 1 1 6.957497 6.957497 10798 +mustang 1 1 6.957497 6.957497 10799 +speedmodif 1 1 6.957497 6.957497 10800 +motorsport 1 1 6.957497 6.957497 10801 +wiresbest 1 1 6.957497 6.957497 10802 +mphbest 1 1 6.957497 6.957497 10803 +pagenumb 1 1 6.957497 6.957497 10804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html new file mode 100644 index 00000000..85cab4e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^juliak^juliak.html @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +cornel 1 215 1.386294 1.386294 23 +continu 1 39 3.258097 3.258097 448 +eduto 1 7 5.010635 5.010635 1956 +julia 1 2 6.263398 6.263398 5094 +pagejulia 1 1 6.957497 6.957497 10805 +komissarchik 1 1 6.957497 6.957497 10806 +juliak 1 1 6.957497 6.957497 10807 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html new file mode 100644 index 00000000..8bde9462 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^julian^julian.html @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +engin 3 297 1.098612 3.295836 20 +offic 2 299 1.098612 2.197224 13 +current 2 284 1.098612 2.197224 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 4 215 1.386294 5.545176 23 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +paper 1 205 1.609438 1.609438 38 +network 4 168 1.791759 7.167036 61 +avail 2 169 1.791759 3.583518 48 +develop 2 174 1.791759 3.583518 53 +implement 2 152 1.791759 3.583518 52 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +high 1 130 2.079442 2.079442 101 +theori 3 111 2.197225 6.591675 127 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +techniqu 2 99 2.302585 4.605170 138 +need 1 98 2.302585 2.302585 135 +center 2 88 2.397895 4.795790 158 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +wide 1 84 2.484907 2.484907 185 +master 1 76 2.564949 2.564949 216 +complet 1 77 2.564949 2.564949 208 +server 1 76 2.564949 2.564949 204 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +addit 1 74 2.639057 2.639057 228 +ithaca 1 65 2.772589 2.772589 294 +hardwar 1 51 2.995732 2.995732 350 +pointer 1 48 3.044522 3.044522 368 +video 3 44 3.135494 9.406482 405 +better 1 45 3.135494 3.135494 401 +describ 1 45 3.135494 3.135494 400 +fast 2 42 3.218876 6.437752 429 +http 1 41 3.218876 3.218876 420 +realli 1 40 3.258097 3.258097 444 +workstat 2 37 3.332205 6.664410 479 +global 1 34 3.401197 3.401197 520 +toler 2 33 3.433987 6.867974 533 +fault 2 32 3.465736 6.931472 547 +independ 1 32 3.465736 3.465736 548 +produc 1 30 3.555348 3.555348 572 +platform 1 29 3.583519 3.583519 591 +full 3 28 3.610918 10.832754 615 +becom 1 28 3.610918 3.610918 603 +cluster 1 28 3.610918 3.610918 612 +campu 1 27 3.637586 3.637586 623 +administr 1 27 3.637586 3.637586 628 +compress 1 23 3.806662 3.806662 719 +color 1 22 3.850148 3.850148 762 +toolkit 1 20 3.951244 3.951244 835 +increas 1 20 3.951244 3.951244 829 +commerci 1 16 4.174387 4.174387 1005 +critic 1 16 4.174387 4.174387 982 +topolog 1 14 4.317488 4.317488 1089 +demand 1 14 4.317488 4.317488 1073 +horu 1 14 4.317488 4.317488 1116 +achiev 1 14 4.317488 4.317488 1088 +grow 2 12 4.465908 8.931816 1209 +faster 1 11 4.553877 4.553877 1323 +screen 1 9 4.753590 4.753590 1577 +hallcornel 1 8 4.875197 4.875197 1757 +capit 1 7 5.010635 5.010635 1957 +thegoal 1 6 5.164786 5.164786 2033 +sparcstat 1 5 5.347108 5.347108 2406 +fulfil 1 4 5.568345 5.568345 2932 +innov 1 4 5.568345 5.568345 2933 +emilio 1 3 5.857933 5.857933 3683 +summit 1 3 5.857933 5.857933 3684 +adress 1 2 6.263398 6.263398 5168 +occup 1 2 6.263398 6.263398 5169 +fulltim 1 2 6.263398 6.263398 5170 +ethernet 1 2 6.263398 6.263398 5171 +blast 1 2 6.263398 6.263398 5172 +julian 3 1 6.957497 20.872491 10808 +pelenur 2 1 6.957497 13.914994 10809 +centerithaca 1 1 6.957497 6.957497 10810 +wfinger 1 1 6.957497 6.957497 10811 +cyberserv 1 1 6.957497 6.957497 10812 +httpserver 1 1 6.957497 6.957497 10813 +prvf 1 1 6.957497 6.957497 10814 +poss 1 1 6.957497 6.957497 10815 +screenmot 1 1 6.957497 6.957497 10816 +showthat 1 1 6.957497 6.957497 10817 +snarf 1 1 6.957497 6.957497 10818 +transferwith 1 1 6.957497 6.957497 10819 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html new file mode 100644 index 00000000..229a9238 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^jyh^jyh.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +cornel 4 215 1.386294 5.545176 23 +languag 3 227 1.386294 4.158882 26 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +paper 3 205 1.609438 4.828314 38 +includ 2 208 1.609438 3.218876 42 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +relat 1 139 1.945910 1.945910 68 +perform 1 143 1.945910 1.945910 74 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +theori 3 111 2.197225 6.591675 127 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +technic 1 100 2.302585 2.302585 140 +take 1 97 2.302585 2.302585 134 +center 1 88 2.397895 2.397895 158 +resourc 3 81 2.484907 7.454721 172 +info 1 85 2.484907 2.484907 176 +want 1 79 2.564949 2.564949 199 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +practic 1 70 2.708050 2.708050 246 +abstract 1 62 2.772589 2.772589 276 +type 3 61 2.833213 8.499639 296 +back 1 60 2.833213 2.833213 297 +publish 1 57 2.890372 2.890372 326 +overview 1 56 2.890372 2.890372 323 +talk 1 53 2.944439 2.944439 336 +done 1 47 3.091042 3.091042 381 +slide 2 38 3.295837 6.591674 467 +seminar 1 38 3.295837 3.295837 470 +especi 1 36 3.367296 3.367296 496 +bibliographi 1 34 3.401197 3.401197 518 +art 3 29 3.583519 10.750557 593 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +higher 1 24 3.761200 3.761200 690 +sequenc 1 23 3.806662 3.806662 734 +fine 2 20 3.951244 7.902488 822 +supervis 1 20 3.951244 3.951244 840 +verif 1 20 3.951244 3.951244 826 +statu 1 18 4.060443 4.060443 885 +pretti 1 13 4.382027 4.382027 1191 +forth 1 13 4.382027 4.382027 1186 +nuprl 2 10 4.653960 9.307920 1402 +mainli 1 10 4.653960 4.653960 1432 +equip 1 10 4.653960 4.653960 1459 +hockei 2 8 4.875197 9.750394 1760 +forum 1 6 5.164786 5.164786 2027 +czar 3 5 5.347108 16.041324 2503 +hickei 1 4 5.568345 5.568345 2845 +identif 1 4 5.568345 5.568345 2773 +jason 1 3 5.857933 5.857933 3389 +orth 1 3 5.857933 5.857933 3685 +backcountri 1 3 5.857933 5.857933 3686 +publicli 1 3 5.857933 5.857933 3687 +theatr 2 2 6.263398 12.526796 5173 +universitydepart 1 2 6.263398 6.263398 4871 +bellcor 1 2 6.263398 6.263398 5174 +robertconst 1 1 6.957497 6.957497 10820 +thefox 1 1 6.957497 6.957497 10821 +markleon 1 1 6.957497 6.957497 10822 +formalsystem 1 1 6.957497 6.957497 10823 +levelmodul 1 1 6.957497 6.957497 10824 +cornella 1 1 6.957497 6.957497 10825 +publishedat 1 1 6.957497 6.957497 10826 +havegiven 1 1 6.957497 6.957497 10827 +theygiv 1 1 6.957497 6.957497 10828 +galleryof 1 1 6.957497 6.957497 10829 +tryth 1 1 6.957497 6.957497 10830 +fineart 1 1 6.957497 6.957497 10831 +cucshockei 1 1 6.957497 6.957497 10832 +thebackcountri 1 1 6.957497 6.957497 10833 +maintainedsoftwar 1 1 6.957497 6.957497 10834 +hockeyfor 1 1 6.957497 6.957497 10835 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html new file mode 100644 index 00000000..d11969b1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kamijo^kamijo.html @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +thing 1 84 2.484907 2.484907 189 +ithaca 1 65 2.772589 2.772589 294 +juli 2 60 2.833213 5.666426 305 +back 2 60 2.833213 5.666426 297 +go 1 33 3.433987 3.433987 529 +reach 1 24 3.761200 3.761200 688 +accept 1 18 4.060443 4.060443 879 +english 2 15 4.248495 8.496990 1033 +japan 2 8 4.875197 9.750394 1762 +japanes 2 4 5.568345 11.136690 2934 +sell 1 4 5.568345 5.568345 2935 +sold 1 4 5.568345 5.568345 2813 +sale 1 3 5.857933 5.857933 3688 +kamijo 4 1 6.957497 27.829988 10836 +koichi 2 1 6.957497 13.914994 10837 +kamijokoichi 1 1 6.957497 6.957497 10838 +papershometownseduc 1 1 6.957497 6.957497 10839 +experienceskoichi 1 1 6.957497 6.957497 10840 +muriel 1 1 6.957497 6.957497 10841 +kkamijoh 1 1 6.957497 6.957497 10842 +vnet 1 1 6.957497 6.957497 10843 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html new file mode 100644 index 00000000..4d705c69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karl^home.html @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +cornel 6 215 1.386294 8.317764 23 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +structur 1 106 2.197225 2.197225 105 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +build 2 85 2.484907 4.969814 184 +level 2 87 2.484907 4.969814 180 +upson 1 71 2.639057 2.639057 218 +laboratori 2 63 2.772589 5.545178 292 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +previou 1 62 2.772589 2.772589 290 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +investig 1 51 2.995732 2.995732 353 +advisor 1 51 2.995732 2.995732 355 +better 1 45 3.135494 3.135494 401 +video 1 44 3.135494 3.135494 405 +autom 1 41 3.218876 3.218876 434 +vision 1 41 3.218876 3.218876 430 +york 1 41 3.218876 3.218876 435 +announc 1 40 3.258097 3.258097 441 +close 1 38 3.295837 3.295837 465 +field 1 37 3.332205 3.332205 482 +robot 3 36 3.367296 10.101888 497 +next 1 34 3.401197 3.401197 517 +articl 1 33 3.433987 3.433987 530 +collabor 1 32 3.465736 3.465736 543 +anim 1 31 3.496508 3.496508 557 +graph 1 30 3.555348 3.555348 576 +manipul 3 27 3.637586 10.912758 624 +arrai 1 27 3.637586 3.637586 627 +strategi 2 25 3.737670 7.475340 682 +handl 1 24 3.761200 3.761200 685 +magazin 1 24 3.761200 3.761200 704 +higher 1 24 3.761200 3.761200 690 +director 1 22 3.850148 3.850148 767 +navig 1 21 3.912023 3.912023 796 +facil 1 20 3.951244 3.951244 814 +stanford 2 17 4.110874 8.221748 955 +germani 1 17 4.110874 4.110874 946 +devic 1 16 4.174387 4.174387 1002 +vector 1 16 4.174387 4.174387 961 +micro 3 15 4.248495 12.745485 1031 +club 1 15 4.248495 4.248495 1058 +earlier 1 13 4.382027 4.382027 1140 +forc 1 10 4.653960 4.653960 1384 +donald 1 9 4.753590 4.753590 1510 +frank 1 9 4.753590 4.753590 1568 +wall 1 9 4.753590 4.753590 1553 +wire 1 8 4.875197 4.875197 1747 +gate 1 6 5.164786 5.164786 2182 +layout 1 6 5.164786 5.164786 2183 +lloyd 1 6 5.164786 5.164786 2103 +educurr 1 5 5.347108 5.347108 2504 +actuat 1 5 5.347108 5.347108 2442 +climb 1 4 5.568345 5.568345 2936 +karl 3 3 5.857933 17.573799 3623 +bhringer 2 3 5.857933 11.715866 3606 +karlsruh 2 3 5.857933 11.715866 3689 +microfabr 2 3 5.857933 11.715866 3610 +noel 1 3 5.857933 5.857933 3376 +kwon 1 3 5.857933 5.857933 3690 +deeper 1 3 5.857933 5.857933 3146 +friedrich 2 2 6.263398 12.526796 5175 +nanofabr 1 2 6.263398 6.263398 5010 +innew 1 2 6.263398 6.263398 4512 +sculptur 1 2 6.263398 6.263398 5176 +wright 1 2 6.263398 6.263398 5177 +nano 1 2 6.263398 6.263398 4961 +pagekarl 1 2 6.263398 6.263398 5043 +andassembl 2 1 6.957497 13.914994 10844 +dipl 1 1 6.957497 6.957497 10845 +implementmicro 1 1 6.957497 6.957497 10846 +withprogramm 1 1 6.957497 6.957497 10847 +professorbruc 1 1 6.957497 6.957497 10848 +founder 1 1 6.957497 6.957497 10849 +macdonaldand 1 1 6.957497 6.957497 10850 +hisresearch 1 1 6.957497 6.957497 10851 +invis 1 1 6.957497 6.957497 10852 +cantilev 1 1 6.957497 6.957497 10853 +fallingwat 1 1 6.957497 6.957497 10854 +outin 1 1 6.957497 6.957497 10855 +lindseth 1 1 6.957497 6.957497 10856 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html new file mode 100644 index 00000000..e6fd657a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^karr^karr.html @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +interest 8 384 0.693147 5.545176 11 +work 3 380 0.693147 2.079441 9 +research 3 431 0.693147 2.079441 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +engin 3 297 1.098612 3.295836 20 +us 2 329 1.098612 2.197224 16 +time 2 293 1.098612 2.197224 17 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 5 215 1.386294 6.931470 23 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 2 208 1.609438 3.218876 42 +updat 1 191 1.609438 1.609438 41 +distribut 5 162 1.791759 8.958795 51 +applic 4 170 1.791759 7.167036 56 +develop 3 174 1.791759 5.375277 53 +network 2 168 1.791759 3.583518 61 +implement 1 152 1.791759 1.791759 52 +avail 1 169 1.791759 1.791759 48 +problem 4 147 1.945910 7.783640 75 +area 2 144 1.945910 3.891820 80 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +provid 4 121 2.079442 8.317768 94 +high 1 130 2.079442 2.079442 101 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +mathemat 2 108 2.197225 4.394450 123 +site 2 106 2.197225 4.394450 119 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +techniqu 2 99 2.302585 4.605170 138 +part 2 98 2.302585 4.605170 129 +user 1 104 2.302585 2.302585 137 +commun 3 95 2.397895 7.193685 157 +call 1 91 2.397895 2.397895 153 +mani 1 92 2.397895 2.397895 150 +wide 3 84 2.484907 7.454721 185 +help 1 83 2.484907 2.484907 175 +control 1 82 2.484907 2.484907 164 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +larg 1 82 2.484907 2.484907 168 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +messag 2 76 2.564949 5.129898 212 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +exampl 1 77 2.564949 2.564949 195 +june 1 79 2.564949 2.564949 214 +david 2 71 2.639057 5.278114 232 +html 2 75 2.639057 5.278114 235 +upson 1 71 2.639057 2.639057 218 +logic 1 71 2.639057 2.639057 230 +write 1 72 2.639057 2.639057 222 +java 3 70 2.708050 8.124150 248 +order 2 69 2.708050 5.416100 249 +differ 1 66 2.708050 2.708050 253 +ithaca 1 65 2.772589 2.772589 294 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +browser 1 56 2.890372 2.890372 313 +profession 1 51 2.995732 2.995732 345 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +protocol 8 45 3.135494 25.083952 407 +even 1 45 3.135494 3.135494 393 +execut 1 45 3.135494 3.135494 404 +offer 1 43 3.178054 3.178054 414 +might 1 41 3.218876 3.218876 426 +programm 1 39 3.258097 3.258097 445 +multipl 1 39 3.258097 3.258097 453 +correct 1 38 3.295837 3.295837 462 +field 1 37 3.332205 3.332205 482 +cost 1 37 3.332205 3.332205 480 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +download 1 36 3.367296 3.367296 489 +statist 2 35 3.401197 6.802394 521 +concurr 1 34 3.401197 3.401197 501 +random 1 34 3.401197 3.401197 511 +dissert 1 32 3.465736 3.465736 549 +kind 1 32 3.465736 3.465736 541 +specifi 4 30 3.555348 14.221392 568 +abl 1 30 3.555348 3.555348 566 +depend 1 29 3.583519 3.583519 583 +intend 1 28 3.610918 3.610918 599 +propos 1 28 3.610918 3.610918 602 +becom 1 28 3.610918 3.610918 603 +progress 1 28 3.610918 3.610918 598 +measur 1 28 3.610918 3.610918 609 +load 1 28 3.610918 3.610918 601 +except 1 28 3.610918 3.610918 607 +though 1 27 3.637586 3.637586 622 +consist 3 26 3.688879 11.066637 651 +revis 1 26 3.688879 3.688879 640 +reliabl 1 25 3.737670 3.737670 674 +fundament 1 25 3.737670 3.737670 661 +wai 1 25 3.737670 3.737670 662 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +properti 7 22 3.850148 26.951036 749 +sciencecornel 1 22 3.850148 3.850148 768 +avoid 1 21 3.912023 3.912023 799 +verif 2 20 3.951244 7.902488 826 +applet 2 20 3.951244 7.902488 827 +basi 1 20 3.951244 3.951244 828 +assum 1 19 4.007333 4.007333 845 +hypertext 1 19 4.007333 4.007333 865 +concentr 3 18 4.060443 12.181329 906 +failur 1 18 4.060443 4.060443 898 +behavior 1 18 4.060443 4.060443 881 +encourag 1 18 4.060443 4.060443 880 +layer 12 17 4.110874 49.330488 926 +ultim 1 17 4.110874 4.110874 943 +condit 1 16 4.174387 4.174387 975 +portion 1 16 4.174387 4.174387 971 +action 1 15 4.248495 4.248495 1038 +horu 8 14 4.317488 34.539904 1116 +weak 3 13 4.382027 13.146081 1159 +whose 2 13 4.382027 8.764054 1166 +edui 1 13 4.382027 4.382027 1193 +suit 1 13 4.382027 4.382027 1129 +verifi 2 12 4.465908 8.931816 1261 +kenneth 1 12 4.465908 4.465908 1265 +minor 1 12 4.465908 4.465908 1237 +emploi 1 12 4.465908 4.465908 1284 +calcul 1 12 4.465908 4.465908 1268 +scienceat 1 11 4.553877 4.553877 1375 +reness 1 11 4.553877 4.553877 1333 +host 1 11 4.553877 4.553877 1306 +stack 10 10 4.653960 46.539600 1389 +guarante 2 10 4.653960 9.307920 1391 +certain 1 10 4.653960 4.653960 1393 +equal 1 10 4.653960 4.653960 1424 +desir 2 9 4.753590 9.507180 1542 +robbert 1 9 4.753590 4.753590 1529 +tempor 1 9 4.753590 4.753590 1584 +crash 1 8 4.875197 4.875197 1616 +notion 1 7 5.010635 5.010635 1947 +appar 1 7 5.010635 5.010635 1958 +dedic 1 7 5.010635 5.010635 1843 +hack 1 7 5.010635 5.010635 1950 +furthermor 1 6 5.164786 5.164786 2141 +rough 1 6 5.164786 5.164786 2107 +studentdepart 1 5 5.347108 5.347108 2505 +unnecessari 1 5 5.347108 5.347108 2506 +lost 1 5 5.347108 5.347108 2358 +ofdistribut 1 5 5.347108 5.347108 2316 +notabl 1 5 5.347108 5.347108 2276 +puzzl 1 5 5.347108 5.347108 2507 +disconnect 2 4 5.568345 11.136690 2664 +clearli 1 4 5.568345 5.568345 2590 +formula 1 3 5.857933 5.857933 3405 +omit 1 3 5.857933 5.857933 3466 +confid 1 3 5.857933 5.857933 3691 +temporarili 1 3 5.857933 5.857933 3692 +parallelmachin 1 3 5.857933 5.857933 3693 +lego 1 3 5.857933 5.857933 3188 +prone 2 2 6.263398 12.526796 5178 +thehoru 1 2 6.263398 6.263398 5179 +withprofessor 1 2 6.263398 6.263398 5180 +ofhoru 1 2 6.263398 6.263398 5181 +straightforward 1 2 6.263398 6.263398 4272 +thetop 1 2 6.263398 6.263398 4327 +atyp 1 2 6.263398 6.263398 5042 +sufficientto 1 2 6.263398 6.263398 4261 +haswork 1 2 6.263398 6.263398 5182 +distributedenviron 1 2 6.263398 6.263398 5183 +toi 1 2 6.263398 6.263398 5184 +linksfor 1 2 6.263398 6.263398 5185 +karr 3 1 6.957497 20.872491 10857 +karrdavid 1 1 6.957497 6.957497 10858 +karrphd 1 1 6.957497 6.957497 10859 +birmananddr 1 1 6.957497 6.957497 10860 +protocolsmi 1 1 6.957497 6.957497 10861 +formalspecif 1 1 6.957497 6.957497 10862 +variousinterest 1 1 6.957497 6.957497 10863 +usedin 1 1 6.957497 6.957497 10864 +stylefor 1 1 6.957497 6.957497 10865 +itsinterfac 1 1 6.957497 6.957497 10866 +andbelow 1 1 6.957497 6.957497 10867 +agiven 1 1 6.957497 6.957497 10868 +unusualcombin 1 1 6.957497 6.957497 10869 +systemsshould 1 1 6.957497 6.957497 10870 +constructcustom 1 1 6.957497 6.957497 10871 +theirassoci 1 1 6.957497 6.957497 10872 +thesecur 1 1 6.957497 6.957497 10873 +harden 1 1 6.957497 6.957497 10874 +ofverifi 1 1 6.957497 6.957497 10875 +stem 1 1 6.957497 6.957497 10876 +thepromis 1 1 6.957497 6.957497 10877 +variousguarante 1 1 6.957497 6.957497 10878 +passingenviron 1 1 6.957497 6.957497 10879 +delayedor 1 1 6.957497 6.957497 10880 +componentswer 1 1 6.957497 6.957497 10881 +considerablepromis 1 1 6.957497 6.957497 10882 +consistencywhil 1 1 6.957497 6.957497 10883 +filesin 1 1 6.957497 6.957497 10884 +partitionedinto 1 1 6.957497 6.957497 10885 +wouldallow 1 1 6.957497 6.957497 10886 +performancemi 1 1 6.957497 6.957497 10887 +andeffici 1 1 6.957497 6.957497 10888 +ofsystem 1 1 6.957497 6.957497 10889 +suscept 1 1 6.957497 6.957497 10890 +javath 1 1 6.957497 6.957497 10891 +applicationwith 1 1 6.957497 6.957497 10892 +tonavig 1 1 6.957497 6.957497 10893 +myweb 1 1 6.957497 6.957497 10894 +anetscap 1 1 6.957497 6.957497 10895 +abirthdai 1 1 6.957497 6.957497 10896 +forverifi 1 1 6.957497 6.957497 10897 +affiliationsi 1 1 6.957497 6.957497 10898 +andmaa 1 1 6.957497 6.957497 10899 +informationseemi 1 1 6.957497 6.957497 10900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html new file mode 100644 index 00000000..437ba001 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kartikhk^homepg.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 7 443 0.693147 4.852029 6 +work 4 380 0.693147 2.772588 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +engin 3 297 1.098612 3.295836 20 +cours 2 273 1.098612 2.197224 15 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +softwar 2 220 1.386294 2.772588 30 +languag 2 227 1.386294 2.772588 26 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +oper 3 180 1.609438 4.828314 34 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +network 2 168 1.791759 3.583518 61 +develop 2 174 1.791759 3.583518 53 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +click 5 142 1.945910 9.729550 78 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +postscript 2 131 2.079442 4.158884 90 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +code 2 108 2.197225 4.394450 116 +site 1 106 2.197225 2.197225 119 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +graphic 4 90 2.397895 9.591580 147 +present 1 91 2.397895 2.397895 145 +help 1 83 2.484907 2.484907 175 +interfac 2 79 2.564949 5.129898 209 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +multimedia 2 68 2.708050 5.416100 258 +view 2 70 2.708050 5.416100 254 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +simul 1 66 2.708050 2.708050 255 +virtual 2 62 2.772589 5.545178 285 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +scientif 1 53 2.944439 2.944439 341 +cool 2 49 3.044522 6.089044 374 +visual 1 48 3.044522 3.044522 372 +effect 2 46 3.091042 6.182084 385 +california 1 46 3.091042 3.091042 388 +featur 1 46 3.091042 3.091042 386 +favorit 2 44 3.135494 6.270988 410 +video 1 44 3.135494 3.135494 405 +combin 1 42 3.218876 3.218876 421 +music 1 42 3.218876 3.218876 436 +brian 1 38 3.295837 3.295837 466 +open 1 38 3.295837 3.295837 469 +game 2 36 3.367296 6.734592 498 +singl 1 34 3.401197 3.401197 510 +independ 1 32 3.465736 3.465736 548 +platform 1 29 3.583519 3.583519 591 +full 1 28 3.610918 3.610918 615 +enhanc 1 26 3.688879 3.688879 644 +aspect 1 25 3.737670 3.737670 663 +universityithaca 1 24 3.761200 3.761200 710 +motion 1 24 3.761200 3.761200 699 +sciencecornel 1 22 3.850148 3.850148 768 +chip 1 21 3.912023 3.912023 770 +break 2 20 3.951244 7.902488 812 +smith 1 20 3.951244 3.951244 820 +facil 1 20 3.951244 3.951244 814 +mpeg 1 20 3.951244 3.951244 831 +excel 1 19 4.007333 4.007333 868 +lot 2 18 4.060443 8.120886 889 +stand 1 18 4.060443 4.060443 891 +took 2 16 4.174387 8.348774 1010 +diego 1 16 4.174387 4.174387 992 +transit 2 15 4.248495 8.496990 1046 +drive 1 15 4.248495 4.248495 1052 +track 1 15 4.248495 4.248495 1029 +scene 2 14 4.317488 8.634976 1114 +resolut 1 13 4.382027 4.382027 1172 +jonathan 1 13 4.382027 4.382027 1174 +incorpor 1 13 4.382027 4.382027 1163 +entertain 1 12 4.465908 4.465908 1286 +captur 1 12 4.465908 4.465908 1232 +realiti 1 12 4.465908 4.465908 1272 +player 1 11 4.553877 4.553877 1371 +primit 1 11 4.553877 4.553877 1317 +screen 1 9 4.753590 4.753590 1577 +rivl 2 8 4.875197 9.750394 1632 +capac 1 8 4.875197 4.875197 1740 +star 1 8 4.875197 4.875197 1717 +pronounc 1 7 5.010635 5.010635 1918 +vehicl 1 7 5.010635 5.010635 1928 +clip 1 7 5.010635 5.010635 1868 +quick 1 6 5.164786 5.164786 2184 +railroad 1 6 5.164786 5.164786 2161 +hypothet 1 5 5.347108 5.347108 2474 +opengl 1 5 5.347108 5.347108 2299 +screenshot 2 4 5.568345 11.136690 2743 +multitask 1 4 5.568345 5.568345 2803 +enjoy 1 4 5.568345 5.568345 2937 +swartz 1 4 5.568345 5.568345 2878 +engineeringclass 1 3 5.857933 5.857933 3667 +lai 1 3 5.857933 5.857933 3694 +inventor 1 3 5.857933 5.857933 3695 +hodja 1 2 6.263398 6.263398 4972 +fledg 1 2 6.263398 6.263398 4973 +resumesom 1 2 6.263398 6.263398 5186 +hogman 3 1 6.957497 20.872491 10901 +qualcomm 2 1 6.957497 13.914994 10902 +pagekartik 1 1 6.957497 6.957497 10903 +kapadiamast 1 1 6.957497 6.957497 10904 +dabnei 1 1 6.957497 6.957497 10905 +kkapadia 1 1 6.957497 6.957497 10906 +comcurr 1 1 6.957497 6.957497 10907 +incorporatedmi 1 1 6.957497 6.957497 10908 +projectshoca 1 1 6.957497 6.957497 10909 +chiphoca 1 1 6.957497 6.957497 10910 +gameboard 1 1 6.957497 6.957497 10911 +rivlrivl 1 1 6.957497 6.957497 10912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html new file mode 100644 index 00000000..97cb45a2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kettnake^kettnake.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +sinc 1 90 2.397895 2.397895 159 +center 1 88 2.397895 2.397895 158 +thing 2 84 2.484907 4.969814 189 +school 1 84 2.484907 2.484907 188 +wide 1 84 2.484907 2.484907 185 +write 2 72 2.639057 5.278114 222 +onlin 1 75 2.639057 2.639057 223 +written 1 63 2.772589 2.772589 278 +back 1 60 2.833213 2.833213 297 +publish 4 57 2.890372 11.561488 326 +think 1 57 2.890372 2.890372 314 +much 1 52 2.995732 2.995732 349 +keep 1 44 3.135494 3.135494 409 +press 1 42 3.218876 3.218876 419 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +go 1 33 3.433987 3.433987 529 +mine 1 26 3.688879 3.688879 654 +rather 1 26 3.688879 3.688879 642 +other 1 24 3.761200 3.761200 697 +wrote 2 20 3.951244 7.902488 830 +wonder 1 20 3.951244 3.951244 815 +letter 4 16 4.174387 16.697548 981 +went 1 12 4.465908 4.465908 1279 +newspap 1 12 4.465908 4.465908 1280 +true 1 10 4.653960 4.653960 1422 +rapid 1 10 4.653960 4.653960 1453 +poetri 1 9 4.753590 4.753590 1596 +mile 1 8 4.875197 4.875197 1743 +conflict 1 6 5.164786 5.164786 2041 +famou 1 6 5.164786 5.164786 2185 +grand 1 5 5.347108 5.347108 2425 +mess 1 4 5.568345 5.568345 2886 +flame 1 3 5.857933 5.857933 3696 +arm 1 3 5.857933 5.857933 3697 +argu 1 3 5.857933 5.857933 3698 +dread 1 3 5.857933 5.857933 3630 +suspect 2 2 6.263398 12.526796 5187 +anyhow 1 2 6.263398 6.263398 5188 +mathematician 1 2 6.263398 6.263398 5189 +terrorist 1 2 6.263398 6.263398 5190 +writingsi 1 1 6.957497 6.957497 10913 +proudof 1 1 6.957497 6.957497 10914 +morethought 1 1 6.957497 6.957497 10915 +conscienti 1 1 6.957497 6.957497 10916 +objector 1 1 6.957497 6.957497 10917 +myfirst 1 1 6.957497 6.957497 10918 +fewyear 1 1 6.957497 6.957497 10919 +gulf 1 1 6.957497 6.957497 10920 +vestart 1 1 6.957497 6.957497 10921 +lest 1 1 6.957497 6.957497 10922 +dprobabl 1 1 6.957497 6.957497 10923 +essayist 1 1 6.957497 6.957497 10924 +byron 1 1 6.957497 6.957497 10925 +asuburb 1 1 6.957497 6.957497 10926 +unabomb 1 1 6.957497 6.957497 10927 +mathematiciansar 1 1 6.957497 6.957497 10928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html new file mode 100644 index 00000000..e2a5a1c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kguo^home.html @@ -0,0 +1,269 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +scienc 4 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +system 4 443 0.693147 2.772588 6 +work 2 380 0.693147 1.386294 9 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 6 215 1.386294 8.317764 23 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 3 183 1.609438 4.828314 36 +modifi 1 178 1.609438 1.609438 35 +austin 4 168 1.791759 7.167036 63 +distribut 3 162 1.791759 5.375277 51 +network 2 168 1.791759 3.583518 61 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +texa 1 160 1.791759 1.791759 64 +hall 2 146 1.945910 3.891820 65 +relat 2 139 1.945910 3.891820 68 +lectur 2 135 1.945910 3.891820 73 +file 1 132 1.945910 1.945910 70 +report 2 131 2.079442 4.158884 92 +databas 2 122 2.079442 4.158884 86 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +confer 1 126 2.079442 2.079442 100 +place 2 106 2.197225 4.394450 124 +structur 1 106 2.197225 2.197225 105 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +commun 2 95 2.397895 4.795790 157 +proceed 2 93 2.397895 4.795790 152 +imag 2 91 2.397895 4.795790 161 +octob 1 89 2.397895 2.397895 156 +search 1 95 2.397895 2.397895 155 +info 5 85 2.484907 12.424535 176 +ieee 2 86 2.484907 4.969814 190 +journal 2 83 2.484907 4.969814 183 +school 2 84 2.484907 4.969814 188 +institut 1 84 2.484907 2.484907 187 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +appear 2 78 2.564949 5.129898 210 +sourc 2 77 2.564949 5.129898 201 +interfac 1 79 2.564949 2.564949 209 +refer 1 78 2.564949 2.564949 203 +html 5 75 2.639057 13.195285 235 +upson 1 71 2.639057 2.639057 218 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +david 1 71 2.639057 2.639057 232 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +ithaca 4 65 2.772589 11.090356 294 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +dept 1 64 2.772589 2.772589 291 +plan 1 65 2.772589 2.772589 272 +new 1 64 2.772589 2.772589 262 +street 1 63 2.772589 2.772589 293 +march 1 61 2.833213 2.833213 295 +simpl 1 60 2.833213 2.833213 298 +content 1 59 2.833213 2.833213 302 +direct 1 57 2.890372 2.890372 316 +explor 1 58 2.890372 2.890372 324 +overview 1 56 2.890372 2.890372 323 +index 1 56 2.890372 2.890372 309 +tabl 1 51 2.995732 2.995732 346 +cool 1 49 3.044522 3.044522 374 +pointer 1 48 3.044522 3.044522 368 +life 1 50 3.044522 3.044522 375 +electron 2 47 3.091042 6.182084 379 +mark 1 44 3.135494 3.135494 403 +netscap 1 44 3.135494 3.135494 395 +music 2 42 3.218876 6.437752 436 +map 1 39 3.258097 3.258097 452 +industri 2 38 3.295837 6.591674 464 +open 1 38 3.295837 3.295837 469 +china 1 37 3.332205 3.332205 487 +global 1 34 3.401197 3.401197 520 +common 1 30 3.555348 3.555348 574 +synchron 1 29 3.583519 3.583519 588 +weather 1 28 3.610918 3.610918 618 +bound 1 26 3.688879 3.688879 659 +reliabl 2 25 3.737670 7.475340 674 +sport 1 25 3.737670 3.737670 683 +other 2 24 3.761200 7.522400 697 +scalabl 1 24 3.761200 3.761200 705 +yahoo 1 24 3.761200 3.761200 707 +flexibl 1 21 3.912023 3.912023 792 +navig 1 21 3.912023 3.912023 796 +kernel 1 20 3.951244 3.951244 825 +binari 1 20 3.951244 3.951244 823 +lyco 1 19 4.007333 4.007333 871 +women 2 16 4.174387 8.348774 1004 +commerci 1 16 4.174387 4.174387 1005 +brief 1 16 4.174387 4.174387 1001 +stock 1 16 4.174387 4.174387 1007 +horu 2 14 4.317488 8.634976 1116 +canada 1 13 4.382027 4.382027 1158 +alan 1 13 4.382027 4.382027 1146 +brad 2 12 4.465908 8.931816 1264 +weight 1 12 4.465908 4.465908 1204 +kenneth 1 12 4.465908 4.465908 1265 +reness 4 11 4.553877 18.215508 1333 +transpar 1 11 4.553877 4.553877 1325 +lake 1 11 4.553877 4.553877 1373 +itali 1 11 4.553877 4.553877 1378 +werner 3 10 4.653960 13.961880 1385 +cook 2 10 4.653960 9.307920 1464 +consortium 1 10 4.653960 4.653960 1467 +mosaic 1 10 4.653960 4.653960 1426 +sosp 1 10 4.653960 4.653960 1416 +mountain 1 10 4.653960 4.653960 1456 +jean 1 10 4.653960 4.653960 1440 +cheng 1 10 4.653960 4.653960 1381 +robbert 4 9 4.753590 19.014360 1529 +birman 3 9 4.753590 14.260770 1531 +wall 2 9 4.753590 9.507180 1553 +light 1 9 4.753590 4.753590 1533 +motorola 1 9 4.753590 4.753590 1546 +vogel 3 8 4.875197 14.625591 1622 +sigop 2 8 4.875197 9.750394 1727 +european 1 8 4.875197 4.875197 1763 +heart 1 8 4.875197 4.875197 1729 +grove 1 8 4.875197 4.875197 1675 +wire 1 8 4.875197 4.875197 1747 +edg 1 8 4.875197 4.875197 1647 +synchroni 1 7 5.010635 5.010635 1923 +gatewai 1 7 5.010635 5.010635 1942 +discoveri 1 7 5.010635 5.010635 1915 +hunt 1 7 5.010635 5.010635 1798 +scout 1 7 5.010635 5.010635 1903 +antonio 1 6 5.164786 5.164786 2186 +symposiumon 1 6 5.164786 5.164786 2054 +alex 1 6 5.164786 5.164786 2130 +theproject 1 6 5.164786 5.164786 1981 +postcard 1 6 5.164786 5.164786 2181 +atlant 1 5 5.347108 5.347108 2508 +advic 1 5 5.347108 5.347108 2509 +colorado 2 4 5.568345 11.136690 2938 +ncsa 2 4 5.568345 11.136690 2767 +ireland 1 4 5.568345 5.568345 2853 +hayden 1 4 5.568345 5.568345 2844 +hickei 1 4 5.568345 5.568345 2845 +vaysburd 1 4 5.568345 5.568345 2846 +insur 1 4 5.568345 5.568345 2939 +glade 2 3 5.857933 11.715866 3537 +fashion 2 3 5.857933 11.715866 3699 +educornel 1 3 5.857933 5.857933 3601 +universitydept 1 3 5.857933 5.857933 3602 +takako 1 3 5.857933 5.857933 3538 +systemscomput 1 3 5.857933 5.857933 3148 +arizona 1 3 5.857933 5.857933 3700 +beginn 1 3 5.857933 5.857933 3330 +hotjava 1 3 5.857933 5.857933 3220 +ftc 1 3 5.857933 5.857933 3275 +elsevi 1 3 5.857933 5.857933 3671 +copper 1 3 5.857933 5.857933 3536 +summit 1 3 5.857933 5.857933 3684 +counti 1 3 5.857933 5.857933 3682 +crew 1 3 5.857933 5.857933 3347 +underground 1 3 5.857933 5.857933 3604 +spider 1 3 5.857933 5.857933 3605 +katherin 5 2 6.263398 31.316990 4851 +ofvirtu 1 2 6.263398 6.263398 5061 +lui 1 2 6.263398 6.263398 5164 +dalia 1 2 6.263398 6.263398 4852 +malki 1 2 6.263398 6.263398 4853 +uiuc 1 2 6.263398 6.263398 4509 +cern 1 2 6.263398 6.263398 5079 +icdc 1 2 6.263398 6.263398 5191 +ucsd 1 2 6.263398 6.263398 5192 +amazon 1 2 6.263398 6.263398 5193 +dessert 1 2 6.263398 6.263398 5194 +intertext 1 2 6.263398 6.263398 5002 +infoth 1 2 6.263398 6.263398 5195 +infodistribut 2 1 6.957497 13.914994 10929 +infocompani 2 1 6.957497 13.914994 10930 +lisboa 2 1 6.957497 13.914994 10931 +httpd 2 1 6.957497 13.914994 10932 +xmosaic 2 1 6.957497 13.914994 10933 +guokguo 1 1 6.957497 6.957497 10934 +multicastprotocol 1 1 6.957497 6.957497 10935 +publicationskatherin 1 1 6.957497 6.957497 10936 +connemara 1 1 6.957497 6.957497 10937 +rodrigu 1 1 6.957497 6.957497 10938 +sargento 1 1 6.957497 6.957497 10939 +paulo 1 1 6.957497 6.957497 10940 +verisimo 1 1 6.957497 6.957497 10941 +niagara 1 1 6.957497 6.957497 10942 +networkscool 1 1 6.957497 6.957497 10943 +toolsbibliographyconferencesjournalsacademia 1 1 6.957497 6.957497 10944 +infoschool 1 1 6.957497 6.957497 10945 +infojob 1 1 6.957497 6.957497 10946 +searchinterest 1 1 6.957497 6.957497 10947 +infoartbookscardscookingfashionfriendsinsurancelibrarymagazinesmailingmapmusicnewssportsstockweatherhunt 1 1 6.957497 6.957497 10948 +inforesearch 1 1 6.957497 6.957497 10949 +systempointershoru 1 1 6.957497 6.957497 10950 +productspringtotemtransisx 1 1 6.957497 6.957497 10951 +microsystemslab 1 1 6.957497 6.957497 10952 +networksmulticast 1 1 6.957497 6.957497 10953 +protocolsn 1 1 6.957497 6.957497 10954 +fromlblgun 1 1 6.957497 6.957497 10955 +sguid 1 1 6.957497 6.957497 10956 +quickrefer 1 1 6.957497 6.957497 10957 +htmldocument 1 1 6.957497 6.957497 10958 +finder 1 1 6.957497 6.957497 10959 +bibliographybibliographi 1 1 6.957497 6.957497 10960 +oldindex 1 1 6.957497 6.957497 10961 +hpdc 1 1 6.957497 6.957497 10962 +srd 1 1 6.957497 6.957497 10963 +jsac 1 1 6.957497 6.957497 10964 +scienceacademia 1 1 6.957497 6.957497 10965 +openingsibmdelltandemtiapplebel 1 1 6.957497 6.957497 10966 +gradschool 1 1 6.957497 6.957497 10967 +gradjob 1 1 6.957497 6.957497 10968 +ukinterest 1 1 6.957497 6.957497 10969 +moviesbailei 1 1 6.957497 6.957497 10970 +concertslibrari 1 1 6.957497 6.957497 10971 +hightechin 1 1 6.957497 6.957497 10972 +inesc 1 1 6.957497 6.957497 10973 +resort 1 1 6.957497 6.957497 10974 +coloradooth 1 1 6.957497 6.957497 10975 +infoart 1 1 6.957497 6.957497 10976 +weblouvreth 1 1 6.957497 6.957497 10977 +linebook 1 1 6.957497 6.957497 10978 +calvinhobb 1 1 6.957497 6.957497 10979 +archivecardsmagicchinaart 1 1 6.957497 6.957497 10980 +gourmetl 1 1 6.957497 6.957497 10981 +cordonbleu 1 1 6.957497 6.957497 10982 +fashional 1 1 6.957497 6.957497 10983 +linksa 1 1 6.957497 6.957497 10984 +cjlutz 1 1 6.957497 6.957497 10985 +wwweb 1 1 6.957497 6.957497 10986 +pagewith 1 1 6.957497 6.957497 10987 +tmexpressfirst 1 1 6.957497 6.957497 10988 +wireirc 1 1 6.957497 6.957497 10989 +faqfashion 1 1 6.957497 6.957497 10990 +nethair 1 1 6.957497 6.957497 10991 +diesel 1 1 6.957497 6.957497 10992 +guessfriend 1 1 6.957497 6.957497 10993 +deng 1 1 6.957497 6.957497 10994 +shiji 1 1 6.957497 6.957497 10995 +edulibrari 1 1 6.957497 6.957497 10996 +congressmagazin 1 1 6.957497 6.957497 10997 +timegeorg 1 1 6.957497 6.957497 10998 +gilder 1 1 6.957497 6.957497 10999 +archivesinanet 1 1 6.957497 6.957497 11000 +newsworld 1 1 6.957497 6.957497 11001 +olymp 1 1 6.957497 6.957497 11002 +streetheadlin 1 1 6.957497 6.957497 11003 +weatherhunt 1 1 6.957497 6.957497 11004 +informationglob 1 1 6.957497 6.957497 11005 +navigatorhom 1 1 6.957497 6.957497 11006 +wanderersand 1 1 6.957497 6.957497 11007 +kguo 1 1 6.957497 6.957497 11008 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html new file mode 100644 index 00000000..1c5dfab1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kityan^project.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +program 3 374 0.693147 2.079441 7 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +object 1 138 1.945910 1.945910 79 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +user 1 104 2.302585 2.302585 137 +graphic 1 90 2.397895 2.397895 147 +learn 1 86 2.484907 2.484907 170 +environ 1 84 2.484907 2.484907 177 +chang 1 82 2.484907 2.484907 163 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +java 4 70 2.708050 10.832200 248 +creat 2 63 2.772589 5.545178 277 +written 1 63 2.772589 2.772589 278 +interact 1 62 2.772589 2.772589 270 +simpl 1 60 2.833213 2.833213 298 +understand 1 47 3.091042 3.091042 384 +form 1 39 3.258097 3.258097 443 +multi 1 36 3.367296 3.367296 493 +power 1 30 3.555348 3.555348 573 +platform 1 29 3.583519 3.583519 591 +input 1 23 3.806662 3.806662 727 +portabl 1 20 3.951244 3.951244 819 +applet 1 20 3.951244 3.951244 827 +safe 1 12 4.465908 4.465908 1274 +polygon 1 8 4.875197 4.875197 1723 +rotat 2 5 5.347108 10.694216 2295 +vertic 1 5 5.347108 5.347108 2270 +cube 1 4 5.568345 5.568345 2940 +introductionthi 1 2 6.263398 6.263398 4056 +tetra 1 2 6.263398 6.263398 5196 +wirefram 2 1 6.957497 13.914994 11009 +desgin 1 1 6.957497 6.957497 11010 +threader 1 1 6.957497 6.957497 11011 +speific 1 1 6.957497 6.957497 11012 +react 1 1 6.957497 6.957497 11013 +cone 1 1 6.957497 6.957497 11014 +cylind 1 1 6.957497 6.957497 11015 +toru 1 1 6.957497 6.957497 11016 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html new file mode 100644 index 00000000..2c8bd4b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kleinber^kleinber.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 29 775 0.000000 0.000000 2 +scienc 14 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +program 3 374 0.693147 2.079441 7 +inform 3 412 0.693147 2.079441 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +project 3 340 1.098612 3.295836 18 +time 1 293 1.098612 1.098612 17 +cornel 6 215 1.386294 8.317764 23 +link 1 247 1.386294 1.386294 24 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +algorithm 12 162 1.791759 21.501108 57 +network 5 168 1.791759 8.958795 61 +distribut 3 162 1.791759 5.375277 51 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +problem 6 147 1.945910 11.675460 75 +process 2 142 1.945910 3.891820 72 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +confer 3 126 2.079442 6.238326 100 +report 2 131 2.079442 4.158884 92 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +theori 10 111 2.197225 21.972250 127 +assist 1 112 2.197225 2.197225 113 +technic 2 100 2.302585 4.605170 140 +book 1 99 2.302585 2.302585 131 +associ 2 93 2.397895 4.795790 151 +homepag 1 93 2.397895 2.397895 148 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +ieee 5 86 2.484907 12.424535 190 +resourc 3 81 2.484907 7.454721 172 +academ 2 82 2.484907 4.969814 178 +internet 2 83 2.484907 4.969814 186 +librari 1 87 2.484907 2.484907 181 +optim 5 79 2.564949 12.824745 197 +appear 4 78 2.564949 10.259796 210 +server 2 76 2.564949 5.129898 204 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +symposium 14 72 2.639057 36.946798 238 +line 4 75 2.639057 10.556228 231 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +foundat 7 62 2.772589 19.408123 286 +ithaca 1 65 2.772589 2.772589 294 +visit 1 63 2.772589 2.772589 288 +result 1 65 2.772589 2.772589 281 +virtual 1 62 2.772589 2.772589 285 +interact 1 62 2.772589 2.772589 270 +content 1 59 2.833213 2.833213 302 +juli 1 60 2.833213 2.833213 305 +point 2 58 2.890372 5.780744 319 +thesi 1 57 2.890372 2.890372 327 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +talk 1 53 2.944439 2.944439 336 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +set 1 50 3.044522 3.044522 361 +protocol 2 45 3.135494 6.270988 407 +math 1 44 3.135494 3.135494 402 +combin 1 42 3.218876 3.218876 421 +error 1 40 3.258097 3.258097 449 +connect 1 37 3.332205 3.332205 485 +short 1 36 3.367296 3.367296 499 +robot 1 36 3.367296 3.367296 497 +approxim 3 35 3.401197 10.203591 509 +bibliographi 3 34 3.401197 10.203591 518 +singl 1 34 3.401197 3.401197 510 +posit 1 31 3.496508 3.496508 552 +graph 2 30 3.555348 7.110696 576 +secur 2 30 3.555348 7.110696 577 +computersci 1 30 3.555348 3.555348 562 +proc 15 26 3.688879 55.333185 649 +bound 1 26 3.688879 3.688879 659 +compar 1 26 3.688879 3.688879 648 +berkelei 1 26 3.688879 3.688879 657 +jeff 1 25 3.737670 3.737670 673 +flow 1 24 3.761200 3.761200 700 +motion 1 24 3.761200 3.761200 699 +yahoo 1 24 3.761200 3.761200 707 +universityithaca 1 24 3.761200 3.761200 710 +geometri 4 22 3.850148 15.400592 752 +emphasi 1 22 3.850148 3.850148 755 +siam 4 21 3.912023 15.648092 800 +path 3 21 3.912023 11.736069 778 +rout 2 21 3.912023 7.824046 793 +vlsi 1 21 3.912023 3.912023 795 +tenni 1 20 3.951244 3.951244 838 +geometr 4 19 4.007333 16.029332 852 +definit 2 19 4.007333 8.014666 864 +spend 1 19 4.007333 4.007333 850 +andrew 1 19 4.007333 4.007333 849 +hypertext 1 19 4.007333 4.007333 865 +lower 1 18 4.060443 4.060443 886 +dimension 1 18 4.060443 4.060443 909 +analyz 1 17 4.110874 4.110874 925 +expand 1 17 4.110874 4.110874 928 +segment 1 17 4.110874 4.110874 931 +stanford 1 17 4.110874 4.110874 955 +letter 2 16 4.174387 8.348774 981 +latenc 1 16 4.174387 4.174387 993 +biologi 3 15 4.248495 12.745485 1049 +princeton 1 15 4.248495 4.248495 1042 +embed 1 14 4.317488 4.317488 1102 +discret 4 13 4.382027 17.528108 1165 +resolut 1 13 4.382027 4.382027 1172 +safe 1 12 4.465908 4.465908 1274 +probabilist 1 11 4.553877 4.553877 1343 +node 1 11 4.553877 4.553877 1326 +mesh 1 11 4.553877 4.553877 1351 +israel 1 11 4.553877 4.553877 1366 +arbitrari 1 11 4.553877 4.553877 1359 +excit 1 11 4.553877 4.553877 1329 +queue 2 10 4.653960 9.307920 1386 +minimum 2 9 4.753590 9.507180 1555 +assumpt 1 9 4.753590 4.753590 1514 +distanc 1 9 4.753590 4.753590 1500 +yellow 1 9 4.753590 4.753590 1601 +cryptographi 1 9 4.753590 4.753590 1512 +combinatori 3 8 4.875197 14.625591 1629 +hallcornel 1 8 4.875197 4.875197 1757 +molecular 2 7 5.010635 10.021270 1887 +trade 2 7 5.010635 10.021270 1815 +foc 1 7 5.010635 5.010635 1880 +huttenloch 3 6 5.164786 15.494358 1983 +rubinfeld 1 6 5.164786 5.164786 1998 +dens 1 6 5.164786 5.164786 2122 +layout 1 6 5.164786 5.164786 2183 +relax 1 6 5.164786 5.164786 2120 +consensu 1 6 5.164786 5.164786 2080 +reconstruct 1 6 5.164786 5.164786 2170 +plane 1 6 5.164786 5.164786 2187 +symposiumon 1 6 5.164786 5.164786 2054 +infoseek 1 6 5.164786 5.164786 2188 +soda 1 6 5.164786 5.164786 2189 +corp 1 6 5.164786 5.164786 2139 +semi 2 5 5.347108 10.694216 2510 +almaden 1 5 5.347108 5.347108 2511 +stabil 1 5 5.347108 5.347108 2286 +diagram 1 5 5.347108 5.347108 2346 +stoc 1 5 5.347108 5.347108 2491 +chess 1 5 5.347108 5.347108 2486 +conform 1 4 5.568345 5.568345 2941 +disjoint 1 4 5.568345 5.568345 2709 +ratio 1 4 5.568345 5.568345 2942 +hausdorff 1 4 5.568345 5.568345 2633 +glimps 1 4 5.568345 5.568345 2778 +planar 1 3 5.857933 5.857933 3647 +formobil 1 3 5.857933 5.857933 3261 +fernandez 1 3 5.857933 5.857933 3591 +deliveri 1 3 5.857933 5.857933 3278 +onprincipl 1 3 5.857933 5.857933 3701 +berger 1 3 5.857933 5.857933 3702 +universitycomput 1 3 5.857933 5.857933 3651 +ncstrl 1 3 5.857933 5.857933 3530 +jone 1 3 5.857933 5.857933 3703 +rivest 1 3 5.857933 5.857933 3248 +kleinberg 22 2 6.263398 137.794756 5093 +adversari 2 2 6.263398 12.526796 5065 +tardo 2 2 6.263398 12.526796 5090 +williamson 2 2 6.263398 12.526796 5101 +goeman 2 2 6.263398 12.526796 5100 +leighton 2 2 6.263398 12.526796 5097 +diamet 1 2 6.263398 6.263398 5102 +lovasz 1 2 6.263398 6.263398 5091 +greedi 1 2 6.263398 6.263398 4143 +attiya 1 2 6.263398 6.263398 5197 +voronoi 1 2 6.263398 6.263398 5036 +euclidean 1 2 6.263398 6.263398 5198 +sdsc 1 2 6.263398 6.263398 5199 +kleinber 2 1 6.957497 13.914994 11017 +disjointpath 2 1 6.957497 13.914994 11018 +anddisjoint 1 1 6.957497 6.957497 11019 +stabilityof 1 1 6.957497 6.957497 11020 +particularlyth 1 1 6.957497 6.957497 11021 +seeselect 1 1 6.957497 6.957497 11022 +publicationsmiscellan 1 1 6.957497 6.957497 11023 +linkspapersapproxim 1 1 6.957497 6.957497 11024 +unsplitt 1 1 6.957497 6.957497 11025 +aggarw 1 1 6.957497 6.957497 11026 +improvedapproxim 1 1 6.957497 6.957497 11027 +thetafunct 1 1 6.957497 6.957497 11028 +vertex 1 1 6.957497 6.957497 11029 +simplepolygon 1 1 6.957497 6.957497 11030 +serverbalanc 1 1 6.957497 6.957497 11031 +yaniv 1 1 6.957497 6.957497 11032 +serveralgorithm 1 1 6.957497 6.957497 11033 +robotnavig 1 1 6.957497 6.957497 11034 +awerbuch 1 1 6.957497 6.957497 11035 +borodin 1 1 6.957497 6.957497 11036 +raghavan 1 1 6.957497 6.957497 11037 +sudan 1 1 6.957497 6.957497 11038 +lynch 1 1 6.957497 6.957497 11039 +offsbetween 1 1 6.957497 6.957497 11040 +quiesc 1 1 6.957497 6.957497 11041 +managementprotocol 1 1 6.957497 6.957497 11042 +mullainathan 1 1 6.957497 6.957497 11043 +boundsand 1 1 6.957497 6.957497 11044 +athre 1 1 6.957497 6.957497 11045 +kedem 1 1 6.957497 6.957497 11046 +pointset 1 1 6.957497 6.957497 11047 +invariantsof 1 1 6.957497 6.957497 11048 +linkssearch 1 1 6.957497 6.957497 11049 +bibliographiesaltavista 1 1 6.957497 6.957497 11050 +nynex 1 1 6.957497 6.957497 11051 +sitescornel 1 1 6.957497 6.957497 11052 +computingtc 1 1 6.957497 6.957497 11053 +crescenzi 1 1 6.957497 6.957497 11054 +kann 1 1 6.957497 6.957497 11055 +compendium 1 1 6.957497 6.957497 11056 +biologycomput 1 1 6.957497 6.957497 11057 +carb 1 1 6.957497 6.957497 11058 +biocomput 1 1 6.957497 6.957497 11059 +geometrydavid 1 1 6.957497 6.957497 11060 +eppstein 1 1 6.957497 6.957497 11061 +junkyard 1 1 6.957497 6.957497 11062 +erickson 1 1 6.957497 6.957497 11063 +securitymitr 1 1 6.957497 6.957497 11064 +miscellaneousnetscap 1 1 6.957497 6.957497 11065 +intellicast 1 1 6.957497 6.957497 11066 +kleinbergdepart 1 1 6.957497 6.957497 11067 +scienceupson 1 1 6.957497 6.957497 11068 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html new file mode 100644 index 00000000..6fd5aed8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kota^kota.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +cornel 3 215 1.386294 4.158882 23 +construct 1 139 1.945910 1.945910 82 +assign 1 135 1.945910 1.945910 66 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +homepag 3 93 2.397895 7.193685 148 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +thing 1 84 2.484907 2.484907 189 +master 1 76 2.564949 2.564949 216 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +html 1 75 2.639057 2.639057 235 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +back 1 60 2.833213 2.833213 297 +march 1 61 2.833213 2.833213 295 +think 1 57 2.890372 2.890372 314 +februari 1 54 2.944439 2.944439 328 +move 1 47 3.091042 3.091042 382 +music 1 42 3.218876 3.218876 436 +electr 1 38 3.295837 3.295837 461 +return 1 34 3.401197 3.401197 502 +corpor 1 21 3.912023 3.912023 802 +worth 1 11 4.553877 4.553877 1294 +forc 1 10 4.653960 4.653960 1384 +japan 2 8 4.875197 9.750394 1762 +cornellunivers 1 7 5.010635 5.010635 1916 +superhighwai 1 4 5.568345 5.568345 2943 +sell 1 4 5.568345 5.568345 2935 +tokyo 1 3 5.857933 5.857933 3622 +acquaint 1 3 5.857933 5.857933 3468 +sale 1 3 5.857933 5.857933 3688 +melco 1 2 6.263398 6.263398 5200 +advert 1 2 6.263398 6.263398 5201 +kazushi 1 1 6.957497 6.957497 11069 +otakota 1 1 6.957497 6.957497 11070 +edukazushi 1 1 6.957497 6.957497 11071 +mitusbishi 1 1 6.957497 6.957497 11072 +isund 1 1 6.957497 6.957497 11073 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html new file mode 100644 index 00000000..933e65fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kozen^kozen.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cornel 3 215 1.386294 4.158882 23 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +algorithm 2 162 1.791759 3.583518 57 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +technic 1 100 2.302585 2.302585 140 +pictur 1 89 2.397895 2.397895 160 +logic 2 71 2.639057 5.278114 230 +complex 2 64 2.772589 5.545178 269 +type 1 61 2.833213 2.833213 296 +faculti 1 56 2.890372 2.890372 325 +algebra 2 45 3.135494 6.270988 394 +york 1 41 3.218876 3.218876 435 +constraint 1 26 3.688879 3.688879 636 +interpret 1 24 3.761200 3.761200 686 +universityithaca 1 24 3.761200 3.761200 710 +decis 1 23 3.806662 3.806662 728 +automata 1 13 4.382027 4.382027 1135 +interestsmi 1 10 4.653960 4.653960 1462 +hallcornel 1 8 4.875197 4.875197 1757 +newton 1 7 5.010635 5.010635 1824 +infer 1 6 5.164786 5.164786 2040 +dexter 1 4 5.568345 5.568345 2855 +andsemant 1 3 5.857933 5.857933 3246 +kozendext 1 1 6.957497 6.957497 11074 +kozenjoseph 1 1 6.957497 6.957497 11075 +engineeringphd 1 1 6.957497 6.957497 11076 +especiallycomplex 1 1 6.957497 6.957497 11077 +onlinekleen 1 1 6.957497 6.957497 11078 +algebraautomata 1 1 6.957497 6.957497 11079 +logicbibliographylist 1 1 6.957497 6.957497 11080 +reportscours 1 1 6.957497 6.957497 11081 +notesc 1 1 6.957497 6.957497 11082 +programsc 1 1 6.957497 6.957497 11083 +theoryfun 1 1 6.957497 6.957497 11084 +stufffamili 1 1 6.957497 6.957497 11085 +rugbi 1 1 6.957497 6.957497 11086 +effectcomput 1 1 6.957497 6.957497 11087 +departmentupson 1 1 6.957497 6.957497 11088 +usakozen 1 1 6.957497 6.957497 11089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html new file mode 100644 index 00000000..04b482df --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kreitz^index.html @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +theori 2 111 2.197225 4.394450 127 +topic 1 114 2.197225 2.197225 110 +teach 1 108 2.197225 2.197225 112 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +associ 1 93 2.397895 2.397895 151 +learn 1 86 2.484907 2.484907 170 +novemb 1 81 2.484907 2.484907 179 +upson 1 71 2.639057 2.639057 218 +ithaca 1 65 2.772589 2.772589 294 +type 1 61 2.833213 2.833213 296 +autom 1 41 3.218876 3.218876 434 +soon 2 36 3.367296 6.734592 494 +synthesi 1 20 3.951244 3.951244 834 +deduct 1 12 4.465908 4.465908 1236 +german 1 6 5.164786 5.164786 2190 +christoph 3 5 5.347108 16.041324 2512 +kreitz 5 1 6.957497 34.787485 11090 +lehr 1 1 6.957497 6.957497 11091 +lernen 1 1 6.957497 6.957497 11092 +vorlesungsskript 1 1 6.957497 6.957497 11093 +medienunterst 1 1 6.957497 6.957497 11094 +uumltzt 1 1 6.957497 6.957497 11095 +lehren 1 1 6.957497 6.957497 11096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html new file mode 100644 index 00000000..89837071 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^kuen^kuen.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +cornel 2 215 1.386294 2.772588 23 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +place 1 106 2.197225 2.197225 124 +master 1 76 2.564949 2.564949 216 +multimedia 1 68 2.708050 2.708050 258 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +enjoi 1 26 3.688879 3.688879 660 +daili 1 24 3.761200 3.761200 706 +taiwan 1 16 4.174387 4.174387 1006 +countri 1 15 4.248495 4.248495 1059 +grove 1 8 4.875197 4.875197 1675 +newton 1 7 5.010635 5.010635 1824 +isi 1 5 5.347108 5.347108 2443 +heng 2 2 6.263398 12.526796 5202 +kuen 4 1 6.957497 27.829988 11097 +myproject 1 1 6.957497 6.957497 11098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html new file mode 100644 index 00000000..940ef425 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lagoze^home.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 9 775 0.000000 0.000000 2 +scienc 9 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 7 431 0.693147 4.852029 10 +work 4 380 0.693147 2.772588 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 7 215 1.386294 9.704058 23 +also 3 259 1.386294 4.158882 28 +design 2 213 1.386294 2.772588 25 +group 2 183 1.609438 3.218876 36 +paper 2 205 1.609438 3.218876 38 +oper 1 180 1.609438 1.609438 34 +implement 4 152 1.791759 7.167036 52 +distribut 3 162 1.791759 5.375277 51 +develop 2 174 1.791759 3.583518 53 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +read 1 154 1.791759 1.791759 47 +contact 1 153 1.791759 1.791759 59 +object 4 138 1.945910 7.783640 79 +architectur 2 139 1.945910 3.891820 77 +area 2 144 1.945910 3.891820 80 +hall 1 146 1.945910 1.945910 65 +report 9 131 2.079442 18.714978 92 +number 3 130 2.079442 6.238326 97 +technolog 2 131 2.079442 4.158884 102 +confer 2 126 2.079442 4.158884 100 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +person 3 111 2.197225 6.591675 117 +world 3 115 2.197225 6.591675 126 +final 2 116 2.197225 4.394450 108 +manag 1 114 2.197225 2.197225 125 +intern 1 108 2.197225 2.197225 128 +find 1 111 2.197225 2.197225 111 +site 1 106 2.197225 2.197225 119 +technic 9 100 2.302585 20.723265 140 +part 3 98 2.302585 6.907755 129 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +commun 1 95 2.397895 2.397895 157 +pictur 1 89 2.397895 2.397895 160 +librari 11 87 2.484907 27.333977 181 +wide 2 84 2.484907 4.969814 185 +internet 1 83 2.484907 2.484907 186 +build 1 85 2.484907 2.484907 184 +member 1 84 2.484907 2.484907 165 +server 3 76 2.564949 7.694847 204 +issu 2 78 2.564949 5.129898 211 +refer 2 78 2.564949 5.129898 203 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +know 1 80 2.564949 2.564949 198 +workshop 3 71 2.639057 7.917171 239 +servic 2 72 2.639057 5.278114 236 +meet 2 72 2.639057 5.278114 229 +upson 1 71 2.639057 2.639057 218 +involv 1 71 2.639057 2.639057 227 +nation 1 74 2.639057 2.639057 240 +publish 1 57 2.890372 2.890372 326 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +digit 11 52 2.995732 32.953052 348 +maintain 1 51 2.995732 2.995732 342 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +life 2 50 3.044522 6.089044 375 +give 1 50 3.044522 3.044522 359 +protocol 2 45 3.135494 6.270988 407 +fast 1 42 3.218876 3.218876 429 +author 5 39 3.258097 16.290485 450 +littl 1 39 3.258097 3.258097 454 +open 1 38 3.295837 3.295837 469 +mean 1 37 3.332205 3.332205 477 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +extend 2 32 3.465736 6.931472 539 +chapter 1 32 3.465736 3.465736 536 +collabor 1 32 3.465736 3.465736 543 +posit 1 31 3.496508 3.496508 552 +secur 2 30 3.555348 7.110696 577 +framework 2 28 3.610918 7.221836 606 +releas 1 28 3.610918 3.610918 616 +hope 1 28 3.610918 3.610918 610 +enabl 1 26 3.688879 3.688879 655 +rule 1 26 3.688879 3.688879 638 +challeng 1 26 3.688879 3.688879 653 +never 2 25 3.737670 7.475340 671 +primari 1 25 3.737670 3.737670 669 +universityithaca 1 24 3.761200 3.761200 710 +store 1 24 3.761200 3.761200 693 +magazin 1 24 3.761200 3.761200 704 +sometim 1 24 3.761200 3.761200 696 +lead 1 23 3.806662 3.806662 718 +mobil 1 23 3.806662 3.806662 730 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +defin 1 22 3.850148 3.850148 746 +corpor 1 21 3.912023 3.912023 802 +fund 1 21 3.912023 3.912023 805 +tell 1 21 3.912023 3.912023 777 +spend 1 19 4.007333 4.007333 850 +ever 1 19 4.007333 4.007333 872 +beauti 1 18 4.060443 4.060443 912 +repositori 2 17 4.110874 8.221748 932 +carl 3 15 4.248495 12.745485 1024 +charact 1 15 4.248495 4.248495 1028 +edui 1 13 4.382027 4.382027 1193 +joint 1 13 4.382027 4.382027 1130 +infrastructur 1 12 4.465908 4.465908 1234 +road 1 11 4.553877 4.553877 1374 +consortium 1 10 4.653960 4.653960 1467 +bike 1 10 4.653960 4.653960 1468 +leader 1 9 4.753590 4.753590 1576 +desir 1 9 4.753590 4.753590 1542 +poor 1 8 4.875197 4.875197 1736 +pagei 1 8 4.875197 4.875197 1683 +davi 1 7 5.010635 5.010635 1888 +daughter 1 7 5.010635 5.010635 1943 +drop 1 6 5.164786 5.164786 2008 +trail 1 6 5.164786 5.164786 2071 +substitut 2 5 5.347108 10.694216 2247 +departmentat 1 5 5.347108 5.347108 2513 +constant 1 5 5.347108 5.347108 2251 +outdoor 1 5 5.347108 5.347108 2514 +interfer 1 5 5.347108 5.347108 2494 +darpa 1 4 5.568345 5.568345 2944 +metadata 1 4 5.568345 5.568345 2945 +breath 1 4 5.568345 5.568345 2946 +dienst 5 3 5.857933 29.289665 3640 +ncstrl 2 3 5.857933 11.715866 3530 +worldwid 1 3 5.857933 5.857933 3704 +luci 1 3 5.857933 5.857933 3705 +fresh 1 3 5.857933 5.857933 3706 +lagoz 3 2 6.263398 18.790194 5081 +interoper 2 2 6.263398 12.526796 4838 +quiet 2 2 6.263398 12.526796 5203 +protocolsfor 1 2 6.263398 6.263398 5204 +developeda 1 2 6.263398 6.263398 5205 +interfacesand 1 2 6.263398 6.263398 5206 +cano 1 2 6.263398 6.263398 5207 +joi 1 2 6.263398 6.263398 5208 +fight 1 2 6.263398 6.263398 5209 +groupin 1 1 6.957497 6.957497 11099 +ourgroup 1 1 6.957497 6.957497 11100 +adistribut 1 1 6.957497 6.957497 11101 +collaborateson 1 1 6.957497 6.957497 11102 +thedienstsoftwar 1 1 6.957497 6.957497 11103 +providesdistribut 1 1 6.957497 6.957497 11104 +initiativesto 1 1 6.957497 6.957497 11105 +iso 1 1 6.957497 6.957497 11106 +dlib 1 1 6.957497 6.957497 11107 +dlibwork 1 1 6.957497 6.957497 11108 +iiin 1 1 6.957497 6.957497 11109 +warwick 1 1 6.957497 6.957497 11110 +amveri 1 1 6.957497 6.957497 11111 +distributedobject 1 1 6.957497 6.957497 11112 +paperfor 1 1 6.957497 6.957497 11113 +codeworkshop 1 1 6.957497 6.957497 11114 +meetm 1 1 6.957497 6.957497 11115 +moreabout 1 1 6.957497 6.957497 11116 +outsideof 1 1 6.957497 6.957497 11117 +toddler 1 1 6.957497 6.957497 11118 +lucyg 1 1 6.957497 6.957497 11119 +avid 1 1 6.957497 6.957497 11120 +movingwat 1 1 6.957497 6.957497 11121 +lakeand 1 1 6.957497 6.957497 11122 +itch 1 1 6.957497 6.957497 11123 +ridingalong 1 1 6.957497 6.957497 11124 +backwood 1 1 6.957497 6.957497 11125 +sparehour 1 1 6.957497 6.957497 11126 +shoe 1 1 6.957497 6.957497 11127 +deeplyth 1 1 6.957497 6.957497 11128 +physicalnor 1 1 6.957497 6.957497 11129 +itspreserv 1 1 6.957497 6.957497 11130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html new file mode 100644 index 00000000..e9d0f254 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ldzhou^index.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 3 443 0.693147 2.079441 6 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +cours 1 273 1.098612 1.098612 15 +cornel 3 215 1.386294 4.158882 23 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +network 2 168 1.791759 3.583518 61 +applic 1 170 1.791759 1.791759 56 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +report 1 131 2.079442 2.079442 92 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +homepag 7 93 2.397895 16.785265 148 +center 1 88 2.397895 2.397895 158 +internet 2 83 2.484907 4.969814 186 +level 1 87 2.484907 2.484907 180 +school 1 84 2.484907 2.484907 188 +materi 2 75 2.639057 5.278114 221 +servic 2 72 2.639057 5.278114 236 +onlin 1 75 2.639057 2.639057 223 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +guid 3 63 2.772589 8.317767 267 +colleg 1 61 2.833213 2.833213 300 +back 1 60 2.833213 2.833213 297 +local 1 55 2.944439 2.944439 334 +friend 2 48 3.044522 6.089044 376 +standard 1 48 3.044522 3.044522 365 +basic 1 50 3.044522 3.044522 360 +author 1 39 3.258097 3.258097 450 +tutori 1 39 3.258097 3.258097 437 +open 1 38 3.295837 3.295837 469 +robot 1 36 3.367296 3.367296 497 +concept 1 32 3.465736 3.465736 537 +secur 2 30 3.555348 7.110696 577 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +grad 1 20 3.951244 3.951244 837 +ultim 1 17 4.110874 4.110874 943 +cambridg 1 16 4.174387 4.174387 1008 +massiv 1 15 4.248495 4.248495 1026 +rank 1 14 4.317488 4.317488 1063 +opportun 1 13 4.382027 4.382027 1161 +career 4 12 4.465908 17.863632 1287 +safe 1 12 4.465908 4.465908 1274 +classmat 1 9 4.753590 4.753590 1516 +yellow 1 9 4.753590 4.753590 1601 +sigop 1 8 4.875197 4.875197 1727 +soccer 1 8 4.875197 4.875197 1752 +zhou 2 6 5.164786 10.329572 2092 +legal 1 6 5.164786 5.164786 2094 +authent 1 5 5.347108 5.347108 2306 +exclus 1 4 5.568345 5.568345 2947 +insur 1 4 5.568345 5.568345 2939 +surviv 1 4 5.568345 5.568345 2734 +fudan 2 3 5.857933 11.715866 3707 +legion 1 3 5.857933 5.857933 3708 +automobil 1 3 5.857933 5.857933 3709 +buyer 2 2 6.263398 12.526796 5210 +resours 1 2 6.263398 6.263398 5211 +sunris 1 2 6.263398 6.263398 5212 +edmund 1 2 6.263398 6.263398 5213 +succe 1 2 6.263398 6.263398 5214 +lidong 2 1 6.957497 13.914994 11131 +auto 2 1 6.957497 13.914994 11132 +oasi 1 1 6.957497 6.957497 11133 +adag 1 1 6.957497 6.957497 11134 +sirac 1 1 6.957497 6.957497 11135 +kerbero 1 1 6.957497 6.957497 11136 +ocaml 1 1 6.957497 6.957497 11137 +jobtrak 1 1 6.957497 6.957497 11138 +hunter 1 1 6.957497 6.957497 11139 +careermosa 1 1 6.957497 6.957497 11140 +jobweb 1 1 6.957497 6.957497 11141 +xjob 1 1 6.957497 6.957497 11142 +yingjun 1 1 6.957497 6.957497 11143 +isso 1 1 6.957497 6.957497 11144 +autosit 1 1 6.957497 6.957497 11145 +agenc 1 1 6.957497 6.957497 11146 +indexlast 1 1 6.957497 6.957497 11147 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html new file mode 100644 index 00000000..b2a6c731 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lhwang^lhwang.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +project 4 340 1.098612 4.394448 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +link 4 247 1.386294 5.545176 24 +cornel 2 215 1.386294 2.772588 23 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +distribut 3 162 1.791759 5.375277 51 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +object 2 138 1.945910 3.891820 79 +construct 1 139 1.945910 1.945910 82 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +final 2 116 2.197225 4.394450 108 +site 2 106 2.197225 4.394450 119 +world 1 115 2.197225 2.197225 126 +look 1 107 2.197225 2.197225 115 +book 1 99 2.302585 2.302585 131 +take 1 97 2.302585 2.302585 134 +octob 1 89 2.397895 2.397895 156 +info 1 85 2.484907 2.484907 176 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +advisor 1 51 2.995732 2.995732 355 +cool 2 49 3.044522 6.089044 374 +still 1 50 3.044522 3.044522 362 +visual 1 48 3.044522 3.044522 372 +move 1 47 3.091042 3.091042 382 +video 1 44 3.135494 3.135494 405 +natur 1 44 3.135494 3.135494 406 +vision 1 41 3.218876 3.218876 430 +autom 1 41 3.218876 3.218876 434 +york 1 41 3.218876 3.218876 435 +small 1 39 3.258097 3.258097 447 +annual 1 40 3.258097 3.258097 458 +movi 1 40 3.258097 3.258097 459 +connect 1 37 3.332205 3.332205 485 +chapter 1 32 3.465736 3.465736 536 +anim 3 31 3.496508 10.489524 557 +quot 1 29 3.583519 3.583519 582 +chines 1 29 3.583519 3.583519 595 +releas 1 28 3.610918 3.610918 616 +linux 1 27 3.637586 3.637586 631 +berkelei 1 26 3.688879 3.688879 657 +wang 1 21 3.912023 3.912023 790 +born 1 21 3.912023 3.912023 798 +annot 1 21 3.912023 3.912023 775 +wind 1 18 4.060443 4.060443 908 +edulast 1 17 4.110874 4.110874 927 +taiwan 2 16 4.174387 8.348774 1006 +practicum 1 16 4.174387 4.174387 960 +track 1 15 4.248495 4.248495 1029 +scene 1 14 4.317488 4.317488 1114 +classic 1 14 4.317488 4.317488 1084 +hong 1 14 4.317488 4.317488 1105 +galleri 1 13 4.382027 4.382027 1192 +remov 2 12 4.465908 8.931816 1225 +scienceat 2 11 4.553877 9.107754 1375 +moment 1 11 4.553877 4.553877 1379 +alpha 1 11 4.553877 4.553877 1348 +earth 1 10 4.653960 4.653960 1463 +kong 1 9 4.753590 4.753590 1602 +jump 1 9 4.753590 4.753590 1603 +coast 1 8 4.875197 4.875197 1746 +edg 1 8 4.875197 4.875197 1647 +bridg 1 8 4.875197 4.875197 1764 +ramin 1 7 5.010635 5.010635 1820 +bookstor 1 7 5.010635 5.010635 1837 +vallei 1 7 5.010635 5.010635 1959 +southern 1 6 5.164786 5.164786 2191 +ohio 1 5 5.347108 5.347108 2447 +carlo 1 5 5.347108 5.347108 2515 +swartz 2 4 5.568345 11.136690 2878 +sinanet 1 4 5.568345 5.568345 2883 +sell 1 4 5.568345 5.568345 2935 +japanes 1 4 5.568345 5.568345 2934 +classesc 2 3 5.857933 11.715866 3681 +visionc 1 3 5.857933 5.857933 3489 +audit 1 3 5.857933 5.857933 3391 +headlin 1 3 5.857933 5.857933 3710 +hongkong 1 3 5.857933 5.857933 3677 +hero 1 3 5.857933 5.857933 3711 +ming 1 3 5.857933 5.857933 3712 +villag 1 2 6.263398 6.263398 5215 +computingc 1 2 6.263398 6.263398 5216 +linksfor 1 2 6.263398 6.263398 5185 +castl 1 2 6.263398 6.263398 5217 +nausicaa 1 2 6.263398 6.263398 5218 +galact 1 2 6.263398 6.263398 5219 +hsian 2 1 6.957497 13.914994 11148 +orwel 2 1 6.957497 13.914994 11149 +totoro 2 1 6.957497 13.914994 11150 +wangthi 1 1 6.957497 6.957497 11151 +constructionlin 1 1 6.957497 6.957497 11152 +fangliao 1 1 6.957497 6.957497 11153 +videoe 1 1 6.957497 6.957497 11154 +networkse 1 1 6.957497 6.957497 11155 +amidonc 1 1 6.957497 6.957497 11156 +transcrib 1 1 6.957497 6.957497 11157 +zabihspr 1 1 6.957497 6.957497 11158 +processingc 1 1 6.957497 6.957497 11159 +managementc 1 1 6.957497 6.957497 11160 +colloquimc 1 1 6.957497 6.957497 11161 +webspac 1 1 6.957497 6.957497 11162 +stuffscornel 1 1 6.957497 6.957497 11163 +reportiee 1 1 6.957497 6.957497 11164 +societytaiwan 1 1 6.957497 6.957497 11165 +comth 1 1 6.957497 6.957497 11166 +musicmovi 1 1 6.957497 6.957497 11167 +movieweb 1 1 6.957497 6.957497 11168 +moviemania 1 1 6.957497 6.957497 11169 +picturesth 1 1 6.957497 6.957497 11170 +linkstcl 1 1 6.957497 6.957497 11171 +hacksth 1 1 6.957497 6.957497 11172 +pagemiscellan 1 1 6.957497 6.957497 11173 +cja 1 1 6.957497 6.957497 11174 +calanimag 1 1 6.957497 6.957497 11175 +pagelaputa 1 1 6.957497 6.957497 11176 +conan 1 1 6.957497 6.957497 11177 +slump 1 1 6.957497 6.957497 11178 +kiki 1 1 6.957497 6.957497 11179 +legend 1 1 6.957497 6.957497 11180 +pagecampu 1 1 6.957497 6.957497 11181 +uptown 1 1 6.957497 6.957497 11182 +eithaca 1 1 6.957497 6.957497 11183 +linhsian 1 1 6.957497 6.957497 11184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html new file mode 100644 index 00000000..a70e8c6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^libby^home.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +read 1 154 1.791759 1.791759 47 +look 2 107 2.197225 4.394450 115 +check 1 115 2.197225 2.197225 118 +take 2 97 2.302585 4.605170 134 +thing 1 84 2.484907 2.484907 189 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +collect 1 65 2.772589 2.772589 268 +septemb 1 65 2.772589 2.772589 274 +digit 1 52 2.995732 2.995732 348 +video 1 44 3.135494 3.135494 405 +show 2 43 3.178054 6.356108 417 +http 1 41 3.218876 3.218876 420 +littl 1 39 3.258097 3.258097 454 +download 1 36 3.367296 3.367296 489 +thought 3 17 4.110874 12.332622 945 +sept 1 17 4.110874 4.110874 952 +pagewelcom 1 11 4.553877 4.553877 1344 +song 1 11 4.553877 4.553877 1380 +theme 1 8 4.875197 4.875197 1707 +counter 1 8 4.875197 4.875197 1765 +clip 1 7 5.010635 5.010635 1868 +courtesi 1 7 5.010635 5.010635 1953 +essai 2 4 5.568345 11.136690 2948 +libbi 2 1 6.957497 13.914994 11185 +lista 1 1 6.957497 6.957497 11186 +projectemail 1 1 6.957497 6.957497 11187 +mehit 1 1 6.957497 6.957497 11188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html new file mode 100644 index 00000000..ad986d65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lili^ll.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +system 1 443 0.693147 0.693147 6 +offic 3 299 1.098612 3.295836 13 +cours 1 273 1.098612 1.098612 15 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +oper 1 180 1.609438 1.609438 34 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +take 1 97 2.302585 2.302585 134 +name 1 72 2.639057 2.639057 220 +upson 1 71 2.639057 2.639057 218 +lili 1 5 5.347108 5.347108 2240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html new file mode 100644 index 00000000..d1361b3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lnt^lnt.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +cornel 4 215 1.386294 5.545176 23 +also 1 259 1.386294 1.386294 28 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +recent 3 167 1.791759 5.375277 58 +applic 1 170 1.791759 1.791759 56 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +analysi 1 124 2.079442 2.079442 98 +theori 3 111 2.197225 6.591675 127 +mathemat 2 108 2.197225 4.394450 123 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +center 1 88 2.397895 2.397895 158 +method 1 80 2.564949 2.564949 213 +appli 1 71 2.639057 2.639057 226 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +scientif 1 53 2.944439 2.944439 341 +much 1 52 2.995732 2.995732 349 +numer 5 49 3.044522 15.222610 369 +appoint 1 49 3.044522 3.044522 358 +algebra 2 45 3.135494 6.270988 394 +textbook 2 44 3.135494 6.270988 397 +mechan 1 43 3.178054 3.178054 416 +linear 3 41 3.218876 9.656628 431 +map 1 39 3.258097 3.258097 452 +multipl 1 39 3.258097 3.258097 453 +field 1 37 3.332205 3.332205 482 +mean 1 37 3.332205 3.332205 477 +approxim 1 35 3.401197 3.401197 509 +bibliographi 1 34 3.401197 3.401197 518 +jeff 1 25 3.737670 3.737670 673 +siam 1 21 3.912023 3.912023 800 +walter 1 17 4.110874 4.110874 950 +normal 1 16 4.174387 4.174387 995 +matlab 1 14 4.317488 4.317488 1081 +affili 1 13 4.382027 4.382027 1194 +whose 1 13 4.382027 4.382027 1166 +nick 1 13 4.382027 4.382027 1180 +iter 1 12 4.465908 4.465908 1206 +peter 1 11 4.553877 4.553877 1316 +matric 2 10 4.653960 9.307920 1399 +thecomput 1 10 4.653960 4.653960 1408 +lloyd 2 6 5.164786 10.329572 2103 +edumi 1 6 5.164786 5.164786 2132 +fluid 1 5 5.347108 5.347108 2440 +thecornel 1 4 5.568345 5.568345 2892 +conform 1 4 5.568345 5.568345 2941 +hasbeen 1 4 5.568345 5.568345 2661 +trefethen 2 3 5.857933 11.715866 3528 +eigenvector 1 3 5.857933 5.857933 3365 +vicki 1 3 5.857933 5.857933 3187 +reddi 1 3 5.857933 5.857933 3277 +havea 1 2 6.263398 6.263398 4434 +spectral 1 2 6.263398 6.263398 4837 +papersoth 1 2 6.263398 6.263398 5049 +loui 1 2 6.263398 6.263398 5220 +trefethenprofessorlnt 1 1 6.957497 6.957497 11189 +thecent 1 1 6.957497 6.957497 11190 +numericalsolut 1 1 6.957497 6.957497 11191 +notorthogon 1 1 6.957497 6.957497 11192 +textbooksfinit 1 1 6.957497 6.957497 11193 +papersmultimatlab 1 1 6.957497 6.957497 11194 +processorsmatrix 1 1 6.957497 6.957497 11195 +gap 1 1 6.957497 6.957497 11196 +betweenpotenti 1 1 6.957497 6.957497 11197 +convergencepseudospectra 1 1 6.957497 6.957497 11198 +operatorssom 1 1 6.957497 6.957497 11199 +itemsclass 1 1 6.957497 6.957497 11200 +analysiscurriculum 1 1 6.957497 6.957497 11201 +vitaepseudospectra 1 1 6.957497 6.957497 11202 +alfeldcurr 1 1 6.957497 6.957497 11203 +howlegubjrn 1 1 6.957497 6.957497 11204 +jnsson 1 1 6.957497 6.957497 11205 +yohan 1 1 6.957497 6.957497 11206 +kimdivakar 1 1 6.957497 6.957497 11207 +viswanathprevi 1 1 6.957497 6.957497 11208 +baggetttobi 1 1 6.957497 6.957497 11209 +driscollalan 1 1 6.957497 6.957497 11210 +edelman 1 1 6.957497 6.957497 11211 +howel 1 1 6.957497 6.957497 11212 +mascarenhasnoel 1 1 6.957497 6.957497 11213 +nachtigalsatish 1 1 6.957497 6.957497 11214 +chuan 1 1 6.957497 6.957497 11215 +tohsom 1 1 6.957497 6.957497 11216 +colleaguesjim 1 1 6.957497 6.957497 11217 +demmelann 1 1 6.957497 6.957497 11218 +greenbaummartin 1 1 6.957497 6.957497 11219 +gutknechtd 1 1 6.957497 6.957497 11220 +highamann 1 1 6.957497 6.957497 11221 +trefethenandr 1 1 6.957497 6.957497 11222 +weideman 1 1 6.957497 6.957497 11223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html new file mode 100644 index 00000000..aa454da7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lucy^lucy.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 6 443 0.693147 4.158882 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +cornel 3 215 1.386294 4.158882 23 +softwar 2 220 1.386294 2.772588 30 +languag 2 227 1.386294 2.772588 26 +mail 1 238 1.386294 1.386294 22 +class 2 199 1.609438 3.218876 37 +oper 2 180 1.609438 3.218876 34 +fall 1 181 1.609438 1.609438 40 +network 4 168 1.791759 7.167036 61 +distribut 3 162 1.791759 5.375277 51 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +databas 3 122 2.079442 6.238326 86 +spring 2 131 2.079442 4.158884 88 +welcom 1 122 2.079442 2.079442 99 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +manag 2 114 2.197225 4.394450 125 +site 2 106 2.197225 4.394450 119 +topic 1 114 2.197225 2.197225 110 +person 1 111 2.197225 2.197225 117 +search 1 95 2.397895 2.397895 155 +internet 3 83 2.484907 7.454721 186 +stuff 2 87 2.484907 4.969814 171 +librari 1 87 2.484907 2.484907 181 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +archiv 1 49 3.044522 3.044522 364 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +music 2 42 3.218876 6.437752 436 +compani 1 41 3.218876 3.218876 423 +microsoft 1 38 3.295837 3.295837 468 +china 2 37 3.332205 6.664410 487 +connect 1 37 3.332205 3.332205 485 +product 1 33 3.433987 3.433987 527 +travel 2 30 3.555348 7.110696 579 +chines 2 29 3.583519 7.167038 595 +programminglanguag 1 21 3.912023 3.912023 782 +busi 1 21 3.912023 3.912023 784 +hobbi 1 16 4.174387 4.174387 1009 +stock 1 16 4.174387 4.174387 1007 +practicum 1 16 4.174387 4.174387 960 +photograph 1 15 4.248495 4.248495 1056 +novel 1 15 4.248495 4.248495 1039 +misc 1 13 4.382027 4.382027 1124 +galleri 1 13 4.382027 4.382027 1192 +tune 1 12 4.465908 4.465908 1227 +catalog 1 10 4.653960 4.653960 1431 +swim 1 9 4.753590 4.753590 1599 +corba 1 5 5.347108 5.347108 2320 +ping 1 4 5.568345 5.568345 2922 +vrml 1 4 5.568345 5.568345 2949 +cube 1 4 5.568345 5.568345 2940 +luci 2 3 5.857933 11.715866 3705 +pong 1 3 5.857933 5.857933 3371 +underground 1 3 5.857933 5.857933 3604 +badminton 1 2 6.263398 6.263398 5221 +silvano 1 2 6.263398 6.263398 4868 +sunlab 1 2 6.263398 6.263398 5222 +caltech 1 2 6.263398 6.263398 5223 +whiz 1 1 6.957497 6.957497 11224 +systemscontact 1 1 6.957497 6.957497 11225 +yuwu 1 1 6.957497 6.957497 11226 +tkcgi 1 1 6.957497 6.957497 11227 +securitypc 1 1 6.957497 6.957497 11228 +lube 1 1 6.957497 6.957497 11229 +ipngip_atmcomput 1 1 6.957497 6.957497 11230 +sapient 1 1 6.957497 6.957497 11231 +jobtrack 1 1 6.957497 6.957497 11232 +artvark 1 1 6.957497 6.957497 11233 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html new file mode 100644 index 00000000..9df79c59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^lxwu^home.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 3 215 1.386294 4.158882 23 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +network 2 168 1.791759 3.583518 61 +click 1 142 1.945910 1.945910 78 +sinc 1 90 2.397895 2.397895 159 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +window 1 68 2.708050 2.708050 242 +multimedia 1 68 2.708050 2.708050 258 +digit 1 52 2.995732 2.995732 348 +electron 1 47 3.091042 3.091042 379 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +layer 1 17 4.110874 4.110874 926 +massachusett 1 14 4.317488 4.317488 1118 +linda 1 10 4.653960 4.653960 1394 +equip 1 10 4.653960 4.653960 1459 +stack 1 10 4.653960 4.653960 1389 +corp 1 6 5.164786 5.164786 2139 +nativ 1 6 5.164786 5.164786 2192 +multicast 1 5 5.347108 5.347108 2305 +commerc 1 3 5.857933 5.857933 3209 +lowel 1 2 6.263398 6.263398 5224 +coursesfal 1 2 6.263398 6.263398 5225 +universitylinda 1 1 6.957497 6.957497 11234 +lxwu 1 1 6.957497 6.957497 11235 +univsers 1 1 6.957497 6.957497 11236 +banyan 1 1 6.957497 6.957497 11237 +mulitimedia 1 1 6.957497 6.957497 11238 +kramer 1 1 6.957497 6.957497 11239 +mart 1 1 6.957497 6.957497 11240 +photoesus 1 1 6.957497 6.957497 11241 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html new file mode 100644 index 00000000..f4b962ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mateev^mateev.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +last 1 314 1.098612 1.098612 14 +cornel 1 215 1.386294 1.386294 23 +updat 1 191 1.609438 1.609438 41 +upson 1 71 2.639057 2.639057 218 +juli 1 60 2.833213 2.833213 305 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +halldepart 1 3 5.857933 5.857933 3641 +nikolai 1 2 6.263398 6.263398 4087 +mateevnikolai 1 1 6.957497 6.957497 11242 +mateevgradu 1 1 6.957497 6.957497 11243 +studentmateev 1 1 6.957497 6.957497 11244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html new file mode 100644 index 00000000..89d4b54d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^matthew^matthew.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +year 1 148 1.945910 1.945910 84 +databas 2 122 2.079442 4.158884 86 +document 1 121 2.079442 2.079442 89 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +select 1 91 2.397895 2.397895 154 +academ 1 82 2.484907 2.484907 178 +come 1 78 2.564949 2.564949 202 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +laboratori 1 63 2.772589 2.772589 292 +summer 1 56 2.890372 2.890372 311 +soon 1 36 3.367296 3.367296 494 +scientist 1 31 3.496508 3.496508 560 +universityithaca 1 24 3.761200 3.761200 710 +fellow 1 24 3.761200 3.761200 701 +fund 1 21 3.912023 3.912023 805 +supervis 1 20 3.951244 3.951244 840 +stop 1 17 4.110874 4.110874 942 +heterogen 1 14 4.317488 4.317488 1090 +arpa 2 11 4.553877 9.107754 1369 +princip 1 10 4.653960 4.653960 1397 +xerox 1 8 4.875197 4.875197 1725 +matthew 1 6 5.164786 5.164786 2193 +chat 1 6 5.164786 5.164786 2128 +metadata 1 4 5.568345 5.568345 2945 +morgenstern 2 1 6.957497 13.914994 11245 +pagematthew 1 1 6.957497 6.957497 11246 +morgensternresearch 1 1 6.957497 6.957497 11247 +leaderaddress 1 1 6.957497 6.957497 11248 +centerxerox 1 1 6.957497 6.957497 11249 +institutecornel 1 1 6.957497 6.957497 11250 +edustatu 1 1 6.957497 6.957497 11251 +scienceproject 1 1 6.957497 6.957497 11252 +fundedresearch 1 1 6.957497 6.957497 11253 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html new file mode 100644 index 00000000..a69cc16f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhr^mhr.html @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +algorithm 3 162 1.791759 5.375277 57 +data 2 170 1.791759 3.583518 49 +recent 1 167 1.791759 1.791759 58 +structur 2 106 2.197225 4.394450 105 +theori 1 111 2.197225 2.197225 127 +homepag 1 93 2.397895 2.397895 148 +dynam 2 76 2.564949 5.129898 194 +digit 1 52 2.995732 2.995732 348 +especi 1 36 3.367296 3.367296 496 +random 1 34 3.401197 3.401197 511 +committe 1 34 3.401197 3.401197 522 +graph 4 30 3.555348 14.221392 576 +bound 1 26 3.688879 3.688879 659 +universityithaca 1 24 3.761200 3.761200 710 +leav 1 21 3.912023 3.912023 772 +corpor 1 21 3.912023 3.912023 802 +lower 1 18 4.060443 4.060443 886 +eduphon 1 15 4.248495 4.248495 1060 +equip 1 10 4.653960 4.653960 1459 +soda 1 6 5.164786 5.164786 2189 +departmentcornel 1 5 5.347108 5.347108 2275 +stoc 1 5 5.347108 5.347108 2491 +henzing 1 3 5.857933 5.857933 3713 +professorcomput 1 3 5.857933 5.857933 3714 +monika 1 2 6.263398 6.263398 4141 +rauch 1 2 6.263398 6.263398 4142 +homepagemonika 1 1 6.957497 6.957497 11254 +henzingerassist 1 1 6.957497 6.957497 11255 +centerhomepageresearch 1 1 6.957497 6.957497 11256 +interestscombinatori 1 1 6.957497 6.957497 11257 +pageprogram 1 1 6.957497 6.957497 11258 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html new file mode 100644 index 00000000..1ed8507b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mhuang^stanley.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 3 384 0.693147 2.079441 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 3 340 1.098612 3.295836 18 +engin 2 297 1.098612 2.197224 20 +cornel 3 215 1.386294 4.158882 23 +link 1 247 1.386294 1.386294 24 +paper 3 205 1.609438 4.828314 38 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +distribut 2 162 1.791759 3.583518 51 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +area 2 144 1.945910 3.891820 80 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +final 1 116 2.197225 2.197225 108 +user 2 104 2.302585 4.605170 137 +technic 1 100 2.302585 2.302585 140 +exam 1 86 2.484907 2.484907 169 +level 1 87 2.484907 2.484907 180 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +integr 1 67 2.708050 2.708050 245 +plan 2 65 2.772589 5.545178 272 +ithaca 1 65 2.772589 2.772589 294 +collect 1 65 2.772589 2.772589 268 +share 2 59 2.833213 5.666426 304 +movi 1 40 3.258097 3.258097 459 +field 1 37 3.332205 3.332205 482 +travel 1 30 3.555348 3.555348 579 +tenni 1 20 3.951244 3.951244 838 +bachelor 1 17 4.110874 4.110874 957 +horu 1 14 4.317488 4.317488 1116 +huang 2 12 4.465908 8.931816 1202 +reness 1 11 4.553877 4.553877 1333 +werner 1 10 4.653960 4.653960 1385 +ride 1 8 4.875197 4.875197 1741 +vogel 1 8 4.875197 4.875197 1622 +sheldon 1 2 6.263398 6.263398 5226 +stanlei 3 1 6.957497 20.872491 11259 +kentucki 2 1 6.957497 13.914994 11260 +huangmast 1 1 6.957497 6.957497 11261 +studentmhuang 1 1 6.957497 6.957497 11262 +courtcornel 1 1 6.957497 6.957497 11263 +systemsdistribut 1 1 6.957497 6.957497 11264 +systemsdatabas 1 1 6.957497 6.957497 11265 +retrievalgraph 1 1 6.957497 6.957497 11266 +interfacesoth 1 1 6.957497 6.957497 11267 +horse_back 1 1 6.957497 6.957497 11268 +myadvisor 1 1 6.957497 6.957497 11269 +robbertvan 1 1 6.957497 6.957497 11270 +planplan 1 1 6.957497 6.957497 11271 +distributionplan 1 1 6.957497 6.957497 11272 +updateplan 1 1 6.957497 6.957497 11273 +faqhorusc 1 1 6.957497 6.957497 11274 +memorydistribut 1 1 6.957497 6.957497 11275 +memorysom 1 1 6.957497 6.957497 11276 +communicationsnapshotu 1 1 6.957497 6.957497 11277 +architecturejobscar 1 1 6.957497 6.957497 11278 +pathbai 1 1 6.957497 6.957497 11279 +jobscyberezumescar 1 1 6.957497 6.957497 11280 +opportunitiesus 1 1 6.957497 6.957497 11281 +stufftechn 1 1 6.957497 6.957497 11282 +searchbel 1 1 6.957497 6.957497 11283 +labsspbsd 1 1 6.957497 6.957497 11284 +sourcesjavarfclast 1 1 6.957497 6.957497 11285 +mhuang 1 1 6.957497 6.957497 11286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html new file mode 100644 index 00000000..d6101bcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^millett^home.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +last 2 314 1.098612 2.197224 14 +cornel 2 215 1.386294 2.772588 23 +link 1 247 1.386294 1.386294 24 +list 2 201 1.609438 3.218876 39 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +develop 1 174 1.791759 1.791759 53 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +person 2 111 2.197225 4.394450 117 +find 1 111 2.197225 2.197225 111 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +homepag 2 93 2.397895 4.795790 148 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +comment 1 93 2.397895 2.397895 146 +second 1 81 2.484907 2.484907 166 +west 1 83 2.484907 2.484907 192 +know 1 80 2.564949 2.564949 198 +copi 1 63 2.772589 2.772589 284 +copyright 1 36 3.367296 3.367296 495 +express 1 32 3.465736 3.465736 540 +abl 1 30 3.555348 3.555348 566 +challeng 1 26 3.688879 3.688879 653 +never 1 25 3.737670 3.737670 671 +universityithaca 1 24 3.761200 3.761200 710 +decis 1 23 3.806662 3.806662 728 +sciencecornel 1 22 3.850148 3.850148 768 +protect 1 17 4.110874 4.110874 935 +differenti 1 17 4.110874 4.110874 921 +precis 1 15 4.248495 4.248495 1023 +script 1 13 4.382027 4.382027 1171 +speech 1 12 4.465908 4.465908 1222 +holidai 1 12 4.465908 4.465908 1224 +mass 1 8 4.875197 4.875197 1732 +parti 1 8 4.875197 4.875197 1676 +cat 1 6 5.164786 5.164786 2194 +rebecca 1 6 5.164786 5.164786 2174 +highest 1 4 5.568345 5.568345 2950 +cuc 1 4 5.568345 5.568345 2630 +government 1 2 6.263398 6.263398 4248 +aclu 1 2 6.263398 6.263398 5227 +reno 1 2 6.263398 6.263398 5228 +millett 4 1 6.957497 27.829988 11287 +lynett 3 1 6.957497 20.872491 11288 +millettdepart 1 1 6.957497 6.957497 11289 +participatoryform 1 1 6.957497 6.957497 11290 +internetdeserv 1 1 6.957497 6.957497 11291 +intrus 1 1 6.957497 6.957497 11292 +skit 1 1 6.957497 6.957497 11293 +femin 1 1 6.957497 6.957497 11294 +feminist 1 1 6.957497 6.957497 11295 +whenver 1 1 6.957497 6.957497 11296 +sentiment 1 1 6.957497 6.957497 11297 +doormat 1 1 6.957497 6.957497 11298 +prostitut 1 1 6.957497 6.957497 11299 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html new file mode 100644 index 00000000..3d5008d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mishaal^home.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +link 4 247 1.386294 5.545176 24 +cornel 3 215 1.386294 4.158882 23 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +network 1 168 1.791759 1.791759 61 +perform 1 143 1.945910 1.945910 74 +high 2 130 2.079442 4.158884 101 +machin 1 129 2.079442 2.079442 95 +databas 1 122 2.079442 2.079442 86 +place 1 106 2.197225 2.197225 124 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +involv 1 71 2.639057 2.639057 227 +servic 1 72 2.639057 2.639057 236 +practic 1 70 2.708050 2.708050 246 +ithaca 1 65 2.772589 2.772589 294 +new 1 64 2.772589 2.772589 262 +major 1 56 2.890372 2.890372 315 +reason 1 57 2.890372 2.890372 318 +extens 1 53 2.944439 2.944439 340 +cool 1 49 3.044522 3.044522 374 +video 1 44 3.135494 3.135494 405 +offer 1 43 3.178054 3.178054 414 +vision 1 41 3.218876 3.218876 430 +electr 1 38 3.295837 3.295837 461 +soon 1 36 3.367296 3.367296 494 +computersci 1 30 3.555348 3.555348 562 +hope 1 28 3.610918 3.610918 610 +weather 1 28 3.610918 3.610918 618 +latest 1 21 3.912023 3.912023 785 +sure 1 20 3.951244 3.951244 813 +ever 1 19 4.007333 4.007333 872 +practicum 1 16 4.174387 4.174387 960 +stock 1 16 4.174387 4.174387 1007 +menu 1 13 4.382027 4.382027 1156 +everyth 1 13 4.382027 4.382027 1169 +meng 1 12 4.465908 4.465908 1214 +earth 1 10 4.653960 4.653960 1463 +transmiss 1 9 4.753590 4.753590 1588 +andcomput 1 8 4.875197 4.875197 1623 +capac 1 8 4.875197 4.875197 1740 +film 1 8 4.875197 4.875197 1761 +temporari 1 6 5.164786 5.164786 2090 +wrong 1 6 5.164786 5.164786 2025 +conot 1 5 5.347108 5.347108 2245 +doubl 1 4 5.568345 5.568345 2951 +festiv 1 4 5.568345 5.568345 2952 +polytechn 1 3 5.857933 5.857933 3222 +educornel 1 3 5.857933 5.857933 3601 +coolest 1 2 6.263398 6.263398 5229 +newgroup 1 2 6.263398 6.263398 4191 +pagemi 1 2 6.263398 6.263398 5230 +nerd 1 2 6.263398 6.263398 5231 +newgroupc 3 1 6.957497 20.872491 11300 +mishaal 2 1 6.957497 13.914994 11301 +pagemisha 1 1 6.957497 6.957497 11302 +kuwaiti 1 1 6.957497 6.957497 11303 +mengc 1 1 6.957497 6.957497 11304 +worcest 1 1 6.957497 6.957497 11305 +inworcest 1 1 6.957497 6.957497 11306 +bearaccess 1 1 6.957497 6.957497 11307 +newgroupnba 1 1 6.957497 6.957497 11308 +newgroupoptim 1 1 6.957497 6.957497 11309 +kuwait 1 1 6.957497 6.957497 11310 +quotescool 1 1 6.957497 6.957497 11311 +cann 1 1 6.957497 6.957497 11312 +accus 1 1 6.957497 6.957497 11313 +almashanmisha 1 1 6.957497 6.957497 11314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html new file mode 100644 index 00000000..6d03afe3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mkorby^main1.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +last 2 314 1.098612 2.197224 14 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +link 4 247 1.386294 5.545176 24 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +list 2 201 1.609438 3.218876 39 +class 1 199 1.609438 1.609438 37 +recent 1 167 1.791759 1.791759 58 +like 2 132 1.945910 3.891820 81 +click 2 142 1.945910 3.891820 78 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +number 1 130 2.079442 2.079442 97 +pleas 3 113 2.197225 6.591675 114 +make 3 111 2.197225 6.591675 120 +look 3 107 2.197225 6.591675 115 +version 1 113 2.197225 2.197225 122 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +stuff 3 87 2.484907 7.454721 171 +thing 3 84 2.484907 7.454721 189 +member 1 84 2.484907 2.484907 165 +internet 1 83 2.484907 2.484907 186 +know 2 80 2.564949 5.129898 198 +complet 2 77 2.564949 5.129898 208 +want 1 79 2.564949 2.564949 199 +come 1 78 2.564949 2.564949 202 +name 2 72 2.639057 5.278114 220 +onlin 1 75 2.639057 2.639057 223 +line 1 75 2.639057 2.639057 231 +would 2 67 2.708050 5.416100 251 +result 1 65 2.772589 2.772589 281 +plai 1 60 2.833213 2.833213 307 +browser 1 56 2.890372 2.890372 313 +date 2 51 2.995732 5.991464 344 +much 1 52 2.995732 2.995732 349 +cool 5 49 3.044522 15.222610 374 +right 1 48 3.044522 3.044522 363 +visitor 1 49 3.044522 3.044522 371 +better 1 45 3.135494 3.135494 401 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +keep 1 44 3.135494 3.135494 409 +realli 3 40 3.258097 9.774291 444 +movi 1 40 3.258097 3.258097 459 +join 1 39 3.258097 3.258097 457 +connect 1 37 3.332205 3.332205 485 +hand 1 37 3.332205 3.332205 475 +statist 1 35 3.401197 3.401197 521 +random 1 34 3.401197 3.401197 511 +idea 1 32 3.465736 3.465736 545 +anim 1 31 3.496508 3.496508 557 +someth 1 31 3.496508 3.496508 554 +quit 1 27 3.637586 3.637586 633 +mike 2 24 3.761200 7.522400 703 +other 1 24 3.761200 3.761200 697 +togeth 1 23 3.806662 3.806662 714 +thank 1 23 3.806662 3.806662 721 +love 1 21 3.912023 3.912023 804 +mpeg 1 20 3.951244 3.951244 831 +andrew 1 19 4.007333 4.007333 849 +coupl 2 17 4.110874 8.221748 939 +stop 1 17 4.110874 4.110874 942 +expand 1 17 4.110874 4.110874 928 +whole 1 17 4.110874 4.110874 940 +sign 1 16 4.174387 4.174387 970 +anyth 1 16 4.174387 4.174387 998 +pretti 3 13 4.382027 13.146081 1191 +count 1 12 4.465908 4.465908 1239 +danc 1 12 4.465908 4.465908 1278 +guess 1 10 4.653960 4.653960 1443 +card 1 10 4.653960 4.653960 1435 +kevin 1 9 4.753590 4.753590 1482 +pick 1 9 4.753590 4.753590 1498 +opinion 1 8 4.875197 4.875197 1708 +attent 1 8 4.875197 4.875197 1651 +chanc 1 7 5.010635 5.010635 1960 +brought 1 7 5.010635 5.010635 1925 +bunch 1 7 5.010635 5.010635 1861 +golf 2 6 5.164786 10.329572 2178 +yeah 1 6 5.164786 5.164786 2195 +kid 2 5 5.347108 10.694216 2516 +pagethi 1 5 5.347108 5.347108 2336 +frog 1 5 5.347108 5.347108 2479 +exchang 1 5 5.347108 5.347108 2310 +sing 1 5 5.347108 5.347108 2499 +everybodi 1 5 5.347108 5.347108 2517 +dark 1 4 5.568345 5.568345 2910 +vote 1 4 5.568345 5.568345 2953 +maria 1 4 5.568345 5.568345 2954 +amaz 1 4 5.568345 5.568345 2600 +stockholm 1 3 5.857933 5.857933 3715 +ryan 1 3 5.857933 5.857933 3679 +nicknam 1 3 5.857933 5.857933 3716 +lame 1 3 5.857933 5.857933 3717 +beavi 2 2 6.263398 12.526796 4995 +suck 1 2 6.263398 6.263398 5232 +donnel 1 2 6.263398 6.263398 5233 +spirit 1 2 6.263398 6.263398 5234 +harmoni 1 2 6.263398 6.263398 5235 +reset 1 2 6.263398 6.263398 5236 +korbi 2 1 6.957497 13.914994 11315 +myguestbook 1 1 6.957497 6.957497 11316 +poll 1 1 6.957497 6.957497 11317 +vitya 1 1 6.957497 6.957497 11318 +corbett 1 1 6.957497 6.957497 11319 +eryn 1 1 6.957497 6.957497 11320 +crave 1 1 6.957497 6.957497 11321 +guttermouth 1 1 6.957497 6.957497 11322 +byjust 1 1 6.957497 6.957497 11323 +peic 1 1 6.957497 6.957497 11324 +accuar 1 1 6.957497 6.957497 11325 +atmak 1 1 6.957497 6.957497 11326 +edubas 1 1 6.957497 6.957497 11327 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html new file mode 100644 index 00000000..0e29fd84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^mukai^mukai2.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +fall 1 181 1.609438 1.609438 40 +construct 1 139 1.945910 1.945910 82 +spring 1 131 2.079442 2.079442 88 +graphic 1 90 2.397895 2.397895 147 +multimedia 1 68 2.708050 2.708050 258 +semest 2 58 2.890372 5.780744 312 +effect 1 46 3.091042 3.091042 385 +made 1 44 3.135494 3.135494 398 +anim 1 31 3.496508 3.496508 557 +titl 1 31 3.496508 3.496508 556 +jpeg 1 6 5.164786 5.164786 2053 +nobuhiko 2 1 6.957497 13.914994 11328 +mukai 2 1 6.957497 13.914994 11329 +mukainobuhiko 1 1 6.957497 6.957497 11330 +compressionon 1 1 6.957497 6.957497 11331 +magicon 1 1 6.957497 6.957497 11332 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html new file mode 100644 index 00000000..9d28e722 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nihowe^nihowe.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +person 1 111 2.197225 2.197225 117 +homepag 1 93 2.397895 2.397895 148 +imag 1 91 2.397895 2.397895 161 +info 1 85 2.484907 2.484907 176 +resum 1 79 2.564949 2.564949 217 +refer 1 78 2.564949 2.564949 203 +upson 1 71 2.639057 2.639057 218 +eduoffic 1 33 3.433987 3.433987 531 +photo 1 31 3.496508 3.496508 561 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +studentdepart 1 5 5.347108 5.347108 2505 +nichola 2 3 5.857933 11.715866 3252 +how 2 3 5.857933 11.715866 3289 +nihow 1 1 6.957497 6.957497 11333 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html new file mode 100644 index 00000000..36721bda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikos^nikos.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +cornel 2 215 1.386294 2.772588 23 +public 1 202 1.609438 1.609438 43 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +sinc 1 90 2.397895 2.397895 159 +upson 1 71 2.639057 2.639057 218 +java 1 70 2.708050 2.708050 248 +dept 1 64 2.772589 2.772589 291 +ithaca 1 65 2.772589 2.772589 294 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +vita 1 38 3.295837 3.295837 473 +niko 3 4 5.568345 16.705035 2637 +pitsiani 1 3 5.857933 5.857933 3175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html new file mode 100644 index 00000000..bf167ced --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^nikosc.html @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 13 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +research 2 431 0.693147 1.386294 10 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +project 5 340 1.098612 5.493060 18 +student 3 343 1.098612 3.295836 19 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +softwar 4 220 1.386294 5.545176 30 +gener 4 220 1.386294 5.545176 27 +cornel 4 215 1.386294 5.545176 23 +design 1 213 1.386294 1.386294 25 +public 2 202 1.609438 3.218876 43 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +parallel 17 169 1.791759 30.459903 60 +algorithm 4 162 1.791759 7.167036 57 +data 4 170 1.791759 7.167036 49 +distribut 3 162 1.791759 5.375277 51 +implement 2 152 1.791759 3.583518 52 +base 1 165 1.791759 1.791759 50 +phone 1 175 1.791759 1.791759 45 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +hall 1 146 1.945910 1.945910 65 +confer 4 126 2.079442 8.317768 100 +tool 2 117 2.079442 4.158884 93 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +intern 3 108 2.197225 6.591675 128 +mathemat 2 108 2.197225 4.394450 123 +well 1 109 2.197225 2.197225 121 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +advanc 2 99 2.302585 4.605170 130 +proceed 5 93 2.397895 11.989475 152 +mani 2 92 2.397895 4.795790 150 +center 2 88 2.397895 4.795790 158 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +environ 5 84 2.484907 12.424535 177 +journal 3 83 2.484907 7.454721 183 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +dynam 4 76 2.564949 10.259796 194 +appear 3 78 2.564949 7.694847 210 +method 3 80 2.564949 7.694847 213 +master 2 76 2.564949 5.129898 216 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +state 1 76 2.564949 2.564949 207 +solv 1 73 2.639057 2.639057 234 +workshop 1 71 2.639057 2.639057 239 +appli 1 71 2.639057 2.639057 226 +nation 1 74 2.639057 2.639057 240 +symposium 1 72 2.639057 2.639057 238 +simul 3 66 2.708050 8.124150 255 +foundat 1 62 2.772589 2.772589 286 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +room 1 59 2.833213 2.833213 301 +special 1 56 2.890372 2.890372 320 +undergradu 1 54 2.944439 2.944439 338 +scientif 1 53 2.944439 2.944439 341 +numer 4 49 3.044522 12.178088 369 +adapt 2 46 3.091042 6.182084 387 +map 4 39 3.258097 13.032388 452 +field 2 37 3.332205 6.664410 482 +domain 2 30 3.555348 7.110696 564 +compon 1 30 3.555348 3.555348 570 +load 3 28 3.610918 10.832754 601 +challeng 1 26 3.688879 3.688879 653 +altern 1 26 3.688879 3.688879 641 +task 1 25 3.737670 3.737670 678 +known 1 24 3.761200 3.761200 702 +scalabl 1 24 3.761200 3.761200 705 +equat 1 23 3.806662 3.806662 724 +geometri 1 22 3.850148 3.850148 752 +siam 1 21 3.912023 3.912023 800 +wang 1 21 3.912023 3.912023 790 +portabl 1 20 3.951244 3.951244 819 +binari 1 20 3.951244 3.951244 823 +toolkit 1 20 3.951244 3.951244 835 +runtim 2 19 4.007333 8.014666 858 +partial 2 18 4.060443 8.120886 900 +former 1 17 4.110874 4.110874 956 +differenti 1 17 4.110874 4.110874 921 +partit 1 16 4.174387 4.174387 984 +balanc 3 14 4.317488 12.952464 1112 +incomput 1 14 4.317488 4.317488 1096 +menu 1 13 4.382027 4.382027 1156 +iter 2 12 4.465908 8.931816 1206 +touch 1 12 4.465908 4.465908 1288 +characterist 1 12 4.465908 4.465908 1257 +rice 4 11 4.553877 18.215508 1336 +multithread 1 11 4.553877 4.553877 1315 +black 2 10 4.653960 9.307920 1418 +purdu 1 10 4.653960 4.653960 1466 +decomposit 1 10 4.653960 4.653960 1439 +rhode 1 9 4.753590 4.753590 1579 +sensit 1 8 4.875197 4.875197 1726 +vineet 1 8 4.875197 4.875197 1639 +watson 1 8 4.875197 4.875197 1691 +yang 1 8 4.875197 4.875197 1652 +univeristi 1 8 4.875197 4.875197 1754 +multicomput 2 7 5.010635 10.021270 1890 +solver 2 7 5.010635 10.021270 1911 +thompson 1 6 5.164786 5.164786 2049 +heurist 1 6 5.164786 5.164786 2125 +hole 2 5 5.347108 10.694216 2518 +fluid 2 5 5.347108 10.694216 2440 +grand 1 5 5.347108 5.347108 2425 +niko 3 4 5.568345 16.705035 2637 +bernoulli 1 4 5.568345 5.568345 2955 +kodukula 1 4 5.568345 5.568345 2640 +indupraka 1 4 5.568345 5.568345 2639 +pingali 1 4 5.568345 5.568345 2956 +contemporari 1 4 5.568345 5.568345 2719 +colorado 1 4 5.568345 5.568345 2938 +knight 1 4 5.568345 5.568345 2728 +richter 1 4 5.568345 5.568345 2957 +architecur 1 3 5.857933 5.857933 3448 +ahuja 1 3 5.857933 5.857933 3494 +ctctr 1 3 5.857933 5.857933 3625 +imac 1 3 5.857933 5.857933 3718 +brunswick 1 3 5.857933 5.857933 3567 +mimd 1 3 5.857933 5.857933 3361 +chrisochoid 13 2 6.263398 81.424174 5237 +grid 4 2 6.263398 25.053592 4228 +facet 1 2 6.263398 6.263398 4687 +prema 1 2 6.263398 6.263398 5238 +kale 1 2 6.263398 6.263398 4545 +key 1 2 6.263398 6.263398 5057 +aiaa 1 2 6.263398 6.263398 5239 +moscow 1 2 6.263398 6.263398 4884 +programmingenviron 1 2 6.263398 6.263398 5240 +and 1 2 6.263398 6.263398 5241 +nikosc 1 2 6.263398 6.263398 5242 +housti 4 1 6.957497 27.829988 11334 +ellpack 3 1 6.957497 20.872491 11335 +sukup 2 1 6.957497 13.914994 11336 +mississippi 2 1 6.957497 13.914994 11337 +papachi 2 1 6.957497 13.914994 11338 +florian 1 1 6.957497 6.957497 11339 +reza 1 1 6.957497 6.957497 11340 +behforooz 1 1 6.957497 6.957497 11341 +animesh 1 1 6.957497 6.957497 11342 +chatterje 1 1 6.957497 6.957497 11343 +rajani 1 1 6.957497 6.957497 11344 +vaidyanathan 1 1 6.957497 6.957497 11345 +bowyer 1 1 6.957497 6.957497 11346 +offifth 1 1 6.957497 6.957497 11347 +kohl 1 1 6.957497 6.957497 11348 +yellick 1 1 6.957497 6.957497 11349 +unstructur 1 1 6.957497 6.957497 11350 +collid 1 1 6.957497 6.957497 11351 +haupt 1 1 6.957497 6.957497 11352 +scalableparallel 1 1 6.957497 6.957497 11353 +engineeringresearch 1 1 6.957497 6.957497 11354 +parallelhardwar 1 1 6.957497 6.957497 11355 +differentialequ 1 1 6.957497 6.957497 11356 +vichnevetski 1 1 6.957497 6.957497 11357 +decompos 1 1 6.957497 6.957497 11358 +kortesi 1 1 6.957497 6.957497 11359 +domaindecomposit 1 1 6.957497 6.957497 11360 +ussr 1 1 6.957497 6.957497 11361 +glowinski 1 1 6.957497 6.957497 11362 +karathanas 1 1 6.957497 6.957497 11363 +samartzi 1 1 6.957497 6.957497 11364 +vavali 1 1 6.957497 6.957497 11365 +weerawarana 1 1 6.957497 6.957497 11366 +onsupercomput 1 1 6.957497 6.957497 11367 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html new file mode 100644 index 00000000..ff89ec26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^nikosc^projects^prema^index.html @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +cornel 4 215 1.386294 5.545176 23 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +parallel 5 169 1.791759 8.958795 60 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +model 3 145 1.945910 5.837730 69 +support 2 132 1.945910 3.891820 83 +architectur 2 139 1.945910 3.891820 77 +relat 1 139 1.945910 1.945910 68 +compil 2 122 2.079442 4.158884 96 +welcom 1 122 2.079442 2.079442 99 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +center 1 88 2.397895 2.397895 158 +environ 3 84 2.484907 7.454721 177 +institut 1 84 2.484907 2.484907 187 +build 1 85 2.484907 2.484907 184 +journal 1 83 2.484907 2.484907 183 +dynam 2 76 2.564949 5.129898 194 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +copi 1 63 2.772589 2.772589 284 +automat 1 61 2.833213 2.833213 306 +share 1 59 2.833213 2.833213 304 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +scientif 1 53 2.944439 2.944439 341 +numer 1 49 3.044522 3.044522 369 +adapt 1 46 3.091042 3.091042 387 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +multi 1 36 3.367296 3.367296 493 +copyright 1 36 3.367296 3.367296 495 +global 1 34 3.401197 3.401197 520 +load 2 28 3.610918 7.221836 601 +task 1 25 3.737670 3.737670 678 +thread 1 23 3.806662 3.806662 722 +varieti 1 22 3.850148 3.850148 740 +portabl 3 20 3.951244 11.853732 819 +runtim 6 19 4.007333 24.043998 858 +style 1 15 4.248495 4.248495 1036 +balanc 2 14 4.317488 8.634976 1112 +target 1 12 4.465908 4.465908 1282 +multithread 1 11 4.553877 4.553877 1315 +consortium 1 10 4.653960 4.653960 1467 +port 2 8 4.875197 9.750394 1766 +multicomput 2 7 5.010635 10.021270 1890 +niko 2 4 5.568345 11.136690 2637 +ctctr 1 3 5.857933 5.857933 3625 +prema 6 2 6.263398 37.580388 5238 +chrisochoid 2 2 6.263398 12.526796 5237 +suppot 1 2 6.263398 6.263398 5243 +nikosc 1 2 6.263398 6.263398 5242 +andproblem 1 1 6.957497 6.957497 11368 +computingappl 1 1 6.957497 6.957497 11369 +pdecomput 1 1 6.957497 6.957497 11370 +pcrc 1 1 6.957497 6.957497 11371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html new file mode 100644 index 00000000..570c28a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pavel^Welcome.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +welcom 1 122 2.079442 2.079442 99 +look 2 107 2.197225 4.394450 115 +world 1 115 2.197225 2.197225 126 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +real 1 93 2.397895 2.397895 144 +learn 1 86 2.484907 2.484907 170 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +written 1 63 2.772589 2.772589 278 +locat 1 59 2.833213 2.833213 303 +game 1 36 3.367296 3.367296 498 +photo 1 31 3.496508 3.496508 561 +galleri 1 13 4.382027 4.382027 1192 +rest 1 12 4.465908 4.465908 1259 +invit 1 10 4.653960 4.653960 1428 +nuprl 1 10 4.653960 4.653960 1402 +sundai 1 10 4.653960 4.653960 1387 +tire 1 4 5.568345 5.568345 2799 +cyberspac 1 3 5.857933 5.857933 3719 +pavel 1 2 6.263398 6.263398 4164 +cinema 1 2 6.263398 6.263398 5244 +naumov 1 1 6.957497 6.957497 11372 +orplai 1 1 6.957497 6.957497 11373 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html new file mode 100644 index 00000000..c1c31b1c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pearson^pearson.html @@ -0,0 +1,195 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 12 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +program 4 374 0.693147 2.772588 7 +work 3 380 0.693147 2.079441 9 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +time 4 293 1.098612 4.394448 17 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +design 5 213 1.386294 6.931470 25 +languag 4 227 1.386294 5.545176 26 +cornel 3 215 1.386294 4.158882 23 +gener 2 220 1.386294 2.772588 27 +softwar 1 220 1.386294 1.386294 30 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +algorithm 10 162 1.791759 17.917590 57 +parallel 7 169 1.791759 12.542313 60 +implement 3 152 1.791759 5.375277 52 +network 2 168 1.791759 3.583518 61 +architectur 4 139 1.945910 7.783640 77 +problem 3 147 1.945910 5.837730 75 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +machin 3 129 2.079442 6.238326 95 +confer 2 126 2.079442 4.158884 100 +technolog 2 131 2.079442 4.158884 102 +report 2 131 2.079442 4.158884 92 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +schedul 1 119 2.079442 2.079442 85 +intern 2 108 2.197225 4.394450 128 +find 2 111 2.197225 4.394450 111 +structur 2 106 2.197225 4.394450 105 +place 1 106 2.197225 2.197225 124 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +need 1 98 2.302585 2.302585 135 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +select 1 91 2.397895 2.397895 154 +ieee 3 86 2.484907 7.454721 190 +journal 2 83 2.484907 4.969814 183 +control 2 82 2.484907 4.969814 164 +requir 1 81 2.484907 2.484907 167 +wide 1 84 2.484907 2.484907 185 +thing 1 84 2.484907 2.484907 189 +chang 1 82 2.484907 2.484907 163 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +david 2 71 2.639057 5.278114 232 +effici 2 73 2.639057 5.278114 233 +materi 1 75 2.639057 2.639057 221 +workshop 1 71 2.639057 2.639057 239 +upson 1 71 2.639057 2.639057 218 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +simul 1 66 2.708050 2.708050 255 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +simpl 1 60 2.833213 2.833213 298 +publish 3 57 2.890372 8.671116 326 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +processor 2 54 2.944439 5.888878 335 +instruct 2 53 2.944439 5.888878 332 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +set 1 50 3.044522 3.044522 361 +physic 1 47 3.091042 3.091042 377 +could 1 46 3.091042 3.091042 383 +video 1 44 3.135494 3.135494 405 +long 1 43 3.178054 3.178054 413 +offer 1 43 3.178054 3.178054 414 +vision 2 41 3.218876 6.437752 430 +fast 2 42 3.218876 6.437752 429 +linear 2 41 3.218876 6.437752 431 +futur 1 41 3.218876 3.218876 427 +press 1 42 3.218876 3.218876 419 +york 1 41 3.218876 3.218876 435 +theoret 2 39 3.258097 6.516194 446 +realli 2 40 3.258097 6.516194 444 +must 1 40 3.258097 3.258097 442 +societi 1 40 3.258097 3.258097 456 +transact 1 39 3.258097 3.258097 438 +connect 2 37 3.332205 6.664410 485 +cost 2 37 3.332205 6.664410 480 +purpos 2 37 3.332205 6.664410 481 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +singl 2 34 3.401197 6.802394 510 +tech 2 35 3.401197 6.802394 515 +global 2 34 3.401197 6.802394 520 +taken 1 31 3.496508 3.496508 555 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +multiprocessor 2 28 3.610918 7.221836 605 +becom 1 28 3.610918 3.610918 603 +campu 1 27 3.637586 3.637586 623 +challeng 1 26 3.688879 3.688879 653 +notic 1 25 3.737670 3.737670 675 +scalabl 1 24 3.761200 3.761200 705 +universityithaca 1 24 3.761200 3.761200 710 +highli 1 23 3.806662 3.806662 725 +sequenti 1 22 3.850148 3.850148 745 +chip 1 21 3.912023 3.912023 770 +vlsi 1 21 3.912023 3.912023 795 +alloc 2 20 3.951244 7.902488 821 +prepar 1 20 3.951244 3.951244 824 +exploit 1 20 3.951244 3.951244 836 +region 3 19 4.007333 12.021999 875 +dimension 1 18 4.060443 4.060443 909 +speed 1 18 4.060443 4.060443 911 +element 1 18 4.060443 4.060443 895 +regist 2 17 4.110874 8.221748 938 +estim 1 17 4.110874 4.110874 930 +sept 1 17 4.110874 4.110874 952 +spatial 1 16 4.174387 4.174387 988 +reflect 1 15 4.248495 4.248495 1034 +near 1 14 4.317488 4.317488 1091 +polynomi 1 14 4.317488 4.317488 1069 +believ 2 13 4.382027 8.764054 1187 +johnson 2 13 4.382027 8.764054 1162 +sigplan 2 13 4.382027 8.764054 1190 +mesh 2 11 4.553877 9.107754 1351 +desktop 1 10 4.653960 4.653960 1445 +placement 1 10 4.653960 4.653960 1420 +cryptographi 1 9 4.753590 4.753590 1512 +realiz 2 8 4.875197 9.750394 1739 +perhap 1 8 4.875197 4.875197 1693 +attent 1 8 4.875197 4.875197 1651 +character 1 8 4.875197 4.875197 1767 +entri 1 8 4.875197 4.875197 1678 +pldi 1 8 4.875197 4.875197 1704 +irregular 1 8 4.875197 4.875197 1768 +hallcornel 1 8 4.875197 4.875197 1757 +maxim 2 7 5.010635 10.021270 1944 +henc 1 7 5.010635 5.010635 1805 +pursu 1 7 5.010635 5.010635 1902 +sensor 1 7 5.010635 5.010635 1920 +hidden 1 6 5.164786 5.164786 1987 +feasibl 1 6 5.164786 5.164786 2157 +cellular 1 5 5.347108 5.347108 2433 +grand 1 5 5.347108 5.347108 2425 +pingali 2 4 5.568345 11.136690 2956 +sold 1 4 5.568345 5.568345 2813 +compcon 1 4 5.568345 5.568345 2958 +zippel 1 4 5.568345 5.568345 2879 +neumann 2 3 5.857933 11.715866 3720 +simd 2 3 5.857933 11.715866 3360 +lattic 1 3 5.857933 5.857933 3721 +parallelmachin 1 3 5.857933 5.857933 3693 +arm 1 3 5.857933 5.857933 3697 +exit 1 3 5.857933 5.857933 3124 +usaemail 1 3 5.857933 5.857933 3722 +pearson 13 2 6.263398 81.424174 5245 +crystal 1 2 6.263398 6.263398 5013 +molecul 1 2 6.263398 6.263398 5246 +succe 1 2 6.263398 6.263398 5214 +vazirani 2 1 6.957497 13.914994 11374 +bipartit 2 1 6.957497 13.914994 11375 +consistingof 1 1 6.957497 6.957497 11376 +theubiquit 1 1 6.957497 6.957497 11377 +heed 1 1 6.957497 6.957497 11378 +lawsof 1 1 6.957497 6.957497 11379 +layoutand 1 1 6.957497 6.957497 11380 +accomplishedbi 1 1 6.957497 6.957497 11381 +ihav 1 1 6.957497 6.957497 11382 +couldb 1 1 6.957497 6.957497 11383 +thisarchitectur 1 1 6.957497 6.957497 11384 +designfor 1 1 6.957497 6.957497 11385 +proteinstructur 1 1 6.957497 6.957497 11386 +parallelcomput 1 1 6.957497 6.957497 11387 +commodityand 1 1 6.957497 6.957497 11388 +architectureand 1 1 6.957497 6.957497 11389 +hideth 1 1 6.957497 6.957497 11390 +underlyingvon 1 1 6.957497 6.957497 11391 +architectureha 1 1 6.957497 6.957497 11392 +easyto 1 1 6.957497 6.957497 11393 +dunten 1 1 6.957497 6.957497 11394 +kiewit 1 1 6.957497 6.957497 11395 +pillai 1 1 6.957497 6.957497 11396 +irregularli 1 1 6.957497 6.957497 11397 +allerton 1 1 6.957497 6.957497 11398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html new file mode 100644 index 00000000..5cd5b01e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^peskin^Welcome.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 4 380 0.693147 2.772588 9 +program 4 374 0.693147 2.772588 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 4 297 1.098612 4.394448 20 +student 3 343 1.098612 3.295836 19 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +cornel 7 215 1.386294 9.704058 23 +mail 4 238 1.386294 5.545176 22 +also 2 259 1.386294 2.772588 28 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +contact 2 153 1.791759 3.583518 59 +develop 2 174 1.791759 3.583518 53 +year 4 148 1.945910 7.783640 84 +construct 2 139 1.945910 3.891820 82 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +site 2 106 2.197225 4.394450 119 +intern 2 108 2.197225 4.394450 128 +manag 1 114 2.197225 2.197225 125 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +part 1 98 2.302585 2.302585 129 +follow 2 92 2.397895 4.795790 143 +sinc 2 90 2.397895 4.795790 159 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +school 1 84 2.484907 2.484907 188 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +master 2 76 2.564949 5.129898 216 +server 2 76 2.564949 5.129898 204 +resum 1 79 2.564949 2.564949 217 +complet 1 77 2.564949 2.564949 208 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +java 3 70 2.708050 8.124150 248 +receiv 2 66 2.708050 5.416100 244 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +practic 1 70 2.708050 2.708050 246 +window 1 68 2.708050 2.708050 242 +experi 2 64 2.772589 5.545178 283 +creat 2 63 2.772589 5.545178 277 +content 2 59 2.833213 5.666426 302 +colleg 2 61 2.833213 5.666426 300 +best 1 59 2.833213 2.833213 299 +semest 2 58 2.890372 5.780744 312 +found 1 53 2.944439 2.944439 337 +februari 1 54 2.944439 2.944439 328 +run 1 51 2.995732 2.995732 347 +case 1 51 2.995732 2.995732 351 +visitor 1 49 3.044522 3.044522 371 +still 1 50 3.044522 3.044522 362 +possibl 1 47 3.091042 3.091042 378 +favorit 2 44 3.135494 6.270988 410 +offer 1 43 3.178054 3.178054 414 +compani 1 41 3.218876 3.218876 423 +soon 1 36 3.367296 3.367296 494 +chapter 4 32 3.465736 13.862944 536 +taken 1 31 3.496508 3.496508 555 +abl 1 30 3.555348 3.555348 566 +quot 1 29 3.583519 3.583519 582 +becom 1 28 3.610918 3.610918 603 +hope 1 28 3.610918 3.610918 610 +administr 1 27 3.637586 3.637586 628 +request 1 26 3.688879 3.688879 635 +enabl 1 26 3.688879 3.688879 655 +enhanc 1 26 3.688879 3.688879 644 +client 3 25 3.737670 11.213010 679 +reach 1 24 3.761200 3.761200 688 +alwai 1 24 3.761200 3.761200 691 +cooper 1 22 3.850148 3.850148 757 +busi 1 21 3.912023 3.912023 784 +applet 3 20 3.951244 11.853732 827 +mpeg 1 20 3.951244 3.951244 831 +wonder 1 20 3.951244 3.951244 815 +log 1 19 4.007333 4.007333 857 +stock 1 16 4.174387 4.174387 1007 +todd 3 15 4.248495 12.745485 1051 +joint 1 13 4.382027 4.382027 1130 +johnson 1 13 4.382027 4.382027 1162 +brother 1 13 4.382027 4.382027 1189 +meng 2 12 4.465908 8.931816 1214 +round 1 8 4.875197 4.875197 1769 +presid 1 6 5.164786 5.164786 2196 +quickli 1 6 5.164786 5.164786 2000 +classroom 1 6 5.164786 5.164786 2006 +microsystem 1 6 5.164786 5.164786 2160 +junior 1 5 5.347108 5.347108 2519 +supplement 1 5 5.347108 5.347108 2355 +suppli 1 4 5.568345 5.568345 2611 +tape 1 4 5.568345 5.568345 2959 +permiss 1 4 5.568345 5.568345 2642 +jointli 1 3 5.857933 5.857933 3118 +eduand 1 3 5.857933 5.857933 3452 +roll 1 3 5.857933 5.857933 3723 +espn 1 3 5.857933 5.857933 3724 +borrow 1 3 5.857933 5.857933 3725 +fratern 1 2 6.263398 6.263398 4979 +throughth 1 2 6.263398 6.263398 4065 +ticker 1 2 6.263398 6.263398 5247 +peskin 3 1 6.957497 20.872491 11399 +acacia 2 1 6.957497 13.914994 11400 +andyour 1 1 6.957497 6.957497 11401 +workeda 1 1 6.957497 6.957497 11402 +cornellundergradu 1 1 6.957497 6.957497 11403 +theirfield 1 1 6.957497 6.957497 11404 +isrun 1 1 6.957497 6.957497 11405 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html new file mode 100644 index 00000000..84476df1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^pierce^pierce.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +offic 1 299 1.098612 1.098612 13 +cornel 3 215 1.386294 4.158882 23 +list 1 201 1.609438 1.609438 39 +read 2 154 1.791759 3.583518 47 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +mani 1 92 2.397895 2.397895 150 +stuff 4 87 2.484907 9.939628 171 +second 1 81 2.484907 2.484907 166 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +david 2 71 2.639057 5.278114 232 +upson 1 71 2.639057 2.639057 218 +ithaca 2 65 2.772589 5.545178 294 +without 1 50 3.044522 3.044522 370 +favorit 1 44 3.135494 3.135494 410 +howev 1 41 3.218876 3.218876 422 +approxim 1 35 3.401197 3.401197 509 +go 1 33 3.433987 3.433987 529 +quot 1 29 3.583519 3.583519 582 +great 1 27 3.637586 3.637586 626 +although 1 25 3.737670 3.737670 667 +citi 1 19 4.007333 4.007333 874 +otherwis 1 17 4.110874 4.110874 922 +alreadi 1 16 4.174387 4.174387 963 +month 1 15 4.248495 4.248495 1025 +dave 1 14 4.317488 4.317488 1098 +philadelphia 1 12 4.465908 4.465908 1244 +resid 1 10 4.653960 4.653960 1461 +shop 1 10 4.653960 4.653960 1469 +imposs 1 9 4.753590 4.753590 1513 +pittsburgh 2 7 5.010635 10.021270 1938 +pennsylvania 2 7 5.010635 10.021270 1932 +famou 1 6 5.164786 5.164786 2185 +pierc 4 4 5.568345 22.273380 2623 +outlet 1 2 6.263398 6.263398 5248 +valentin 1 1 6.957497 6.957497 11406 +familycurr 1 1 6.957497 6.957497 11407 +halfwai 1 1 6.957497 6.957497 11408 +andharrisburg 1 1 6.957497 6.957497 11409 +younev 1 1 6.957497 6.957497 11410 +sinceit 1 1 6.957497 6.957497 11411 +throughpittsburgh 1 1 6.957497 6.957497 11412 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html new file mode 100644 index 00000000..c2e94013 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^prakas^home.html @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +work 2 380 0.693147 1.386294 9 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 5 215 1.386294 6.931470 23 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +group 3 183 1.609438 4.828314 36 +public 2 202 1.609438 3.218876 43 +applic 2 170 1.791759 3.583518 56 +architectur 2 139 1.945910 3.891820 77 +support 2 132 1.945910 3.891820 83 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +construct 1 139 1.945910 1.945910 82 +compil 3 122 2.079442 6.238326 96 +technolog 1 131 2.079442 2.079442 102 +theori 2 111 2.197225 4.394450 127 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +center 3 88 2.397895 7.193685 158 +present 3 91 2.397895 7.193685 145 +imag 1 91 2.397895 2.397895 161 +octob 1 89 2.397895 2.397895 156 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +level 1 87 2.484907 2.484907 180 +info 1 85 2.484907 2.484907 176 +april 1 77 2.564949 2.564949 196 +multimedia 1 68 2.708050 2.708050 258 +ithaca 1 65 2.772589 2.772589 294 +prof 1 64 2.772589 2.772589 273 +summer 2 56 2.890372 5.780744 311 +talk 5 53 2.944439 14.722195 336 +undergradu 1 54 2.944439 2.944439 338 +extens 1 53 2.944439 2.944439 340 +maintain 1 51 2.995732 2.995732 342 +paul 1 38 3.295837 3.295837 471 +seminar 1 38 3.295837 3.295837 470 +multi 1 36 3.367296 3.367296 493 +random 1 34 3.401197 3.401197 511 +taught 1 33 3.433987 3.433987 526 +transform 2 32 3.465736 6.931472 542 +given 1 32 3.465736 3.465736 538 +abl 2 30 3.555348 7.110696 566 +packag 2 28 3.610918 7.221836 614 +framework 1 28 3.610918 3.610918 606 +seri 1 24 3.761200 3.761200 708 +handl 1 24 3.761200 3.761200 685 +lab 1 24 3.761200 3.761200 698 +deal 1 22 3.850148 3.850148 736 +instal 1 22 3.850148 3.850148 754 +runtim 1 19 4.007333 4.007333 858 +affili 1 13 4.382027 4.382027 1194 +deriv 1 13 4.382027 4.382027 1145 +block 1 13 4.382027 4.382027 1183 +loop 2 11 4.553877 9.107754 1310 +vladimir 1 11 4.553877 4.553877 1324 +regard 1 11 4.553877 4.553877 1309 +prior 1 10 4.653960 4.653960 1438 +tradit 1 10 4.653960 4.653960 1404 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +madra 1 8 4.875197 4.875197 1770 +watson 1 8 4.875197 4.875197 1691 +keshav 1 7 5.010635 5.010635 1852 +tip 1 7 5.010635 5.010635 1863 +nest 2 6 5.164786 10.329572 2151 +dens 1 6 5.164786 5.164786 2122 +handi 1 6 5.164786 5.164786 2111 +czar 1 5 5.347108 5.347108 2503 +licens 1 5 5.347108 5.347108 2520 +indupraka 2 4 5.568345 11.136690 2639 +kodukula 2 4 5.568345 11.136690 2640 +bernoulli 1 4 5.568345 5.568345 2955 +pingali 1 4 5.568345 5.568345 2956 +vijai 1 4 5.568345 5.568345 2960 +stodghil 1 4 5.568345 5.568345 2864 +trivial 1 4 5.568345 5.568345 2786 +dagstuhl 1 4 5.568345 5.568345 2871 +vliw 2 3 5.857933 11.715866 3514 +interplai 1 3 5.857933 5.857933 3726 +chelmsford 1 3 5.857933 5.857933 3564 +schloss 1 3 5.857933 5.857933 3727 +useof 1 3 5.857933 5.857933 3368 +andoper 1 3 5.857933 5.857933 3621 +praka 1 2 6.263398 6.263398 4155 +nawaaz 1 2 6.263398 6.263398 4153 +ahm 1 2 6.263398 6.263398 4154 +kotlyar 1 2 6.263398 6.263398 4907 +menon 1 2 6.263398 6.263398 5249 +imperfectli 2 1 6.957497 13.914994 11413 +tothat 1 1 6.957497 6.957497 11414 +andmultiprocessor 1 1 6.957497 6.957497 11415 +fromscientif 1 1 6.957497 6.957497 11416 +withibm 1 1 6.957497 6.957497 11417 +hasinterest 1 1 6.957497 6.957497 11418 +athp 1 1 6.957497 6.957497 11419 +wasabout 1 1 6.957497 6.957497 11420 +necess 1 1 6.957497 6.957497 11421 +looptransform 1 1 6.957497 6.957497 11422 +loopparallel 1 1 6.957497 6.957497 11423 +regardingdata 1 1 6.957497 6.957497 11424 +centric 1 1 6.957497 6.957497 11425 +availableund 1 1 6.957497 6.957497 11426 +departmentmachin 1 1 6.957497 6.957497 11427 +andfind 1 1 6.957497 6.957497 11428 +alsofind 1 1 6.957497 6.957497 11429 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html new file mode 100644 index 00000000..88e84751 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^praveen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +project 2 340 1.098612 2.197224 18 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +data 2 170 1.791759 3.583518 49 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +databas 2 122 2.079442 4.158884 86 +postscript 1 131 2.079442 2.079442 90 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +know 1 80 2.564949 2.564949 198 +upson 1 71 2.639057 2.639057 218 +order 1 69 2.708050 2.708050 249 +ithaca 2 65 2.772589 5.545178 294 +abstract 1 62 2.772589 2.772589 276 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +case 1 51 2.995732 2.995732 351 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +tree 1 36 3.367296 3.367296 492 +enhanc 1 26 3.688879 3.688879 644 +sequenc 1 23 3.806662 3.806662 734 +sigmod 1 19 4.007333 4.007333 877 +save 1 14 4.317488 4.317488 1099 +dbm 1 13 4.382027 4.382027 1136 +submiss 1 11 4.553877 4.553877 1298 +road 1 11 4.553877 4.553877 1374 +seshadri 2 7 5.010635 10.021270 1803 +praveen 2 6 5.164786 10.329572 1996 +green 1 4 5.568345 5.568345 2848 +predat 1 3 5.857933 5.857933 3135 +warren 1 3 5.857933 5.857933 3301 +packer 1 3 5.857933 5.857933 3728 +adt 1 1 6.957497 6.957497 11430 +ranjani 1 1 6.957497 6.957497 11431 +ramamurthi 1 1 6.957497 6.957497 11432 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html new file mode 100644 index 00000000..73960e17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 10 443 0.693147 6.931470 6 +inform 2 412 0.693147 1.386294 8 +work 2 380 0.693147 1.386294 9 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +project 8 340 1.098612 8.788896 18 +us 7 329 1.098612 7.690284 16 +time 6 293 1.098612 6.591672 17 +last 2 314 1.098612 2.197224 14 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +languag 6 227 1.386294 8.317764 26 +wisc 4 242 1.386294 5.545176 33 +also 3 259 1.386294 4.158882 28 +gener 2 220 1.386294 2.772588 27 +design 1 213 1.386294 1.386294 25 +oper 13 180 1.609438 20.922694 34 +includ 4 208 1.609438 6.437752 42 +group 3 183 1.609438 4.828314 36 +paper 2 205 1.609438 3.218876 38 +modifi 1 178 1.609438 1.609438 35 +data 23 170 1.791759 41.210457 49 +implement 4 152 1.791759 7.167036 52 +recent 3 167 1.791759 5.375277 58 +madison 3 165 1.791759 5.375277 55 +base 2 165 1.791759 3.583518 50 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +relat 13 139 1.945910 25.296830 68 +model 11 145 1.945910 21.405010 69 +support 5 132 1.945910 9.729550 83 +process 5 142 1.945910 9.729550 72 +object 4 138 1.945910 7.783640 79 +click 4 142 1.945910 7.783640 78 +like 3 132 1.945910 5.837730 81 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +databas 13 122 2.079442 27.032746 86 +confer 3 126 2.079442 6.238326 100 +document 2 121 2.079442 4.158884 89 +postscript 2 131 2.079442 4.158884 90 +provid 2 121 2.079442 4.158884 94 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +manag 6 114 2.197225 13.183350 125 +version 2 113 2.197225 4.394450 122 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +techniqu 4 99 2.302585 9.210340 138 +need 3 98 2.302585 6.907755 135 +user 2 104 2.302585 4.605170 137 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +mani 3 92 2.397895 7.193685 150 +proceed 3 93 2.397895 7.193685 152 +call 2 91 2.397895 4.795790 153 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +requir 4 81 2.484907 9.939628 167 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +optim 10 79 2.564949 25.649490 197 +orient 3 80 2.564949 7.694847 205 +server 3 76 2.564949 7.694847 204 +exampl 2 77 2.564949 5.129898 195 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +effici 6 73 2.639057 15.834342 233 +involv 3 71 2.639057 7.917171 227 +order 7 69 2.708050 18.956350 249 +view 3 70 2.708050 8.124150 254 +would 2 67 2.708050 5.416100 251 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +knowledg 1 67 2.708050 2.708050 243 +practic 1 70 2.708050 2.708050 246 +evalu 5 64 2.772589 13.862945 266 +import 4 65 2.772589 11.090356 282 +complex 4 64 2.772589 11.090356 269 +plan 3 65 2.772589 8.317767 272 +collect 2 65 2.772589 5.545178 268 +previou 2 62 2.772589 5.545178 290 +result 2 65 2.772589 5.545178 281 +abstract 1 62 2.772589 2.772589 276 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +march 1 61 2.833213 2.833213 295 +variou 5 56 2.890372 14.451860 317 +detail 2 57 2.890372 5.780744 321 +publish 2 57 2.890372 5.780744 326 +explor 2 58 2.890372 5.780744 324 +sever 1 56 2.890372 2.890372 322 +allow 3 53 2.944439 8.833317 333 +extens 3 53 2.944439 8.833317 340 +case 1 51 2.995732 2.995732 351 +set 2 50 3.044522 6.089044 361 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +could 2 46 3.091042 6.182084 383 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +answer 2 45 3.135494 6.270988 391 +natur 1 44 3.135494 3.135494 406 +algebra 1 45 3.135494 3.135494 394 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +form 2 39 3.258097 6.516194 443 +join 2 39 3.258097 6.516194 457 +map 2 39 3.258097 6.516194 452 +probabl 2 40 3.258097 6.516194 455 +theoret 1 39 3.258097 3.258097 446 +littl 1 39 3.258097 3.258097 454 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +mean 1 37 3.332205 3.332205 477 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +next 3 34 3.401197 10.203591 517 +singl 3 34 3.401197 10.203591 510 +either 1 35 3.401197 3.401197 506 +michael 1 35 3.401197 3.401197 514 +queri 38 33 3.433987 130.491506 524 +express 5 32 3.465736 17.328680 540 +kind 4 32 3.465736 13.862944 541 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +posit 6 31 3.496508 20.979048 552 +storag 4 31 3.496508 13.986032 553 +scientist 1 31 3.496508 3.496508 560 +domain 3 30 3.555348 10.666044 564 +exist 3 30 3.555348 10.666044 569 +specifi 1 30 3.555348 3.555348 568 +built 3 29 3.583519 10.750557 592 +consid 1 29 3.583519 3.583519 590 +propos 3 28 3.610918 10.832754 602 +weather 1 28 3.610918 3.610918 618 +ask 1 28 3.610918 3.610918 597 +scale 1 28 3.610918 3.610918 613 +except 1 28 3.610918 3.610918 607 +framework 1 28 3.610918 3.610918 606 +client 2 25 3.737670 7.475340 679 +valu 1 25 3.737670 3.737670 665 +wai 1 25 3.737670 3.737670 662 +strategi 1 25 3.737670 3.737670 682 +demonstr 2 24 3.761200 7.522400 694 +store 2 24 3.761200 7.522400 693 +daili 1 24 3.761200 3.761200 706 +sequenc 41 23 3.806662 156.073142 734 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +defin 3 22 3.850148 11.550444 746 +sequenti 2 22 3.850148 7.700296 745 +sort 2 22 3.850148 7.700296 738 +serv 1 22 3.850148 3.850148 758 +identifi 1 22 3.850148 3.850148 760 +disk 1 22 3.850148 3.850148 747 +deal 1 22 3.850148 3.850148 736 +instead 1 22 3.850148 3.850148 756 +similar 4 21 3.912023 15.648092 771 +util 1 21 3.912023 3.912023 774 +sigmod 1 19 4.007333 4.007333 877 +record 11 18 4.060443 44.664873 890 +statu 1 18 4.060443 4.060443 885 +event 1 18 4.060443 4.060443 896 +account 1 18 4.060443 4.060443 882 +expand 2 17 4.110874 8.221748 928 +medic 1 17 4.110874 4.110874 958 +monitor 1 17 4.110874 4.110874 941 +weekli 1 17 4.110874 4.110874 919 +estim 1 17 4.110874 4.110874 930 +ramakrishnan 5 16 4.174387 20.871935 972 +advantag 2 16 4.174387 8.348774 987 +easi 1 16 4.174387 4.174387 969 +livni 5 15 4.248495 21.242475 1053 +indic 1 15 4.248495 4.248495 1013 +stream 1 15 4.248495 4.248495 1015 +miron 5 14 4.317488 21.587440 1110 +manner 2 14 4.317488 8.634976 1074 +embed 2 14 4.317488 8.634976 1102 +convent 1 14 4.317488 4.317488 1072 +econom 1 13 4.382027 4.382027 1184 +social 1 13 4.382027 4.382027 1123 +opportun 1 13 4.382027 4.382027 1161 +composit 1 13 4.382027 4.382027 1150 +step 1 13 4.382027 4.382027 1138 +front 1 13 4.382027 4.382027 1154 +raghu 4 12 4.465908 17.863632 1212 +scan 3 12 4.465908 13.397724 1243 +buffer 2 12 4.465908 8.931816 1211 +insid 2 12 4.465908 8.931816 1262 +amount 1 12 4.465908 4.465908 1208 +uniqu 1 12 4.465908 4.465908 1228 +shore 3 11 4.553877 13.661631 1377 +regard 1 11 4.553877 4.553877 1309 +motiv 1 11 4.553877 4.553877 1346 +instanc 1 11 4.553877 4.553877 1322 +devis 2 10 4.653960 9.307920 1451 +relationship 1 10 4.653960 4.653960 1383 +reli 1 10 4.653960 4.653960 1411 +subset 1 10 4.653960 4.653960 1425 +vldb 1 10 4.653960 4.653960 1470 +cheng 1 10 4.653960 4.653960 1381 +declar 2 9 4.753590 9.507180 1526 +tempor 2 9 4.753590 9.507180 1584 +strength 2 9 4.753590 9.507180 1494 +compos 1 9 4.753590 4.753590 1527 +vice 1 9 4.753590 4.753590 1604 +lock 1 9 4.753590 4.753590 1551 +respect 1 9 4.753590 4.753590 1545 +meta 1 9 4.753590 4.753590 1505 +intermedi 1 9 4.753590 4.753590 1497 +observ 1 9 4.753590 4.753590 1578 +mode 1 9 4.753590 4.753590 1492 +pose 1 9 4.753590 4.753590 1535 +seshadri 6 7 5.010635 30.063810 1803 +pageth 1 7 5.010635 5.010635 1939 +notion 1 7 5.010635 5.010635 1947 +merg 1 7 5.010635 5.010635 1862 +whenev 1 7 5.010635 5.010635 1883 +therefor 1 7 5.010635 5.010635 1822 +supportfor 1 7 5.010635 5.010635 1854 +praveen 6 6 5.164786 30.988716 1996 +nest 4 6 5.164786 20.659144 2151 +feasibl 2 6 5.164786 10.329572 2157 +financi 1 6 5.164786 5.164786 2197 +histor 1 6 5.164786 5.164786 2085 +consequ 1 6 5.164786 5.164786 1989 +temporari 1 6 5.164786 5.164786 2090 +greater 2 5 5.347108 10.694216 2258 +treat 1 5 5.347108 5.347108 2521 +correl 1 5 5.347108 5.347108 2279 +dual 1 5 5.347108 5.347108 2522 +distinct 1 5 5.347108 5.347108 2319 +overlap 1 5 5.347108 5.347108 2368 +complementari 1 5 5.347108 5.347108 2523 +educomput 1 5 5.347108 5.347108 2524 +zoom 3 4 5.568345 16.705035 2961 +phenomena 2 4 5.568345 11.136690 2962 +flavor 2 4 5.568345 11.136690 2625 +richter 1 4 5.568345 5.568345 2957 +collaps 2 3 5.857933 11.715866 3729 +inadequ 1 3 5.857933 5.857933 3730 +tediou 1 3 5.857933 5.857933 3731 +ineffici 1 3 5.857933 5.857933 3457 +megabyt 1 3 5.857933 5.857933 3732 +claus 1 3 5.857933 5.857933 3733 +offset 1 3 5.857933 5.857933 3467 +hourli 1 3 5.857933 5.857933 3734 +thathav 1 3 5.857933 5.857933 3735 +serverarchitectur 1 3 5.857933 5.857933 3736 +comad 1 3 5.857933 5.857933 3737 +informationfor 1 3 5.857933 5.857933 3738 +sequin 7 2 6.263398 43.843786 5250 +earthquak 4 2 6.263398 25.053592 5251 +volcano 3 2 6.263398 18.790194 5252 +meteorolog 2 2 6.263398 12.526796 5253 +aredescrib 2 2 6.263398 12.526796 5254 +objectivescurr 1 2 6.263398 6.263398 5255 +statusmotiv 1 2 6.263398 6.263398 5256 +exampleseq 1 2 6.263398 6.263398 5257 +languageoptim 1 2 6.263398 6.263398 5258 +techniquesseq 1 2 6.263398 6.263398 5259 +developmentpublicationsrel 1 2 6.263398 6.263398 5260 +workcontact 1 2 6.263398 6.263398 5261 +informationproject 1 2 6.263398 6.263398 5262 +processingof 1 2 6.263398 6.263398 5263 +theseappl 1 2 6.263398 6.263398 5264 +metereolog 1 2 6.263398 6.263398 5265 +andbiolog 1 2 6.263398 6.263398 5266 +semanticstak 1 2 6.263398 6.263398 5267 +evaluationintegr 1 2 6.263398 6.263398 5268 +canstor 1 2 6.263398 6.263398 5269 +sequencesthes 1 2 6.263398 6.263398 5270 +themost 1 2 6.263398 6.263398 5271 +statusth 1 2 6.263398 6.263398 5272 +algebraicqueri 1 2 6.263398 6.263398 5273 +analogousto 1 2 6.263398 6.263398 5274 +candeclar 1 2 6.263398 6.263398 5275 +likesql 1 2 6.263398 6.263398 5276 +versa 1 2 6.263398 6.263398 5277 +querya 1 2 6.263398 6.263398 5278 +occurr 1 2 6.263398 6.263398 5279 +erupt 1 2 6.263398 6.263398 5280 +didth 1 2 6.263398 6.263398 5281 +groupbi 1 2 6.263398 6.263398 5282 +subqueri 1 2 6.263398 6.263398 5283 +aggregatefunct 1 2 6.263398 6.263398 5284 +sequencesord 1 2 6.263398 6.263398 5285 +modelth 1 2 6.263398 6.263398 5286 +gist 1 2 6.263398 6.263398 5287 +ordereddomain 1 2 6.263398 6.263398 5288 +andposit 1 2 6.263398 6.263398 5289 +recordsmap 1 2 6.263398 6.263398 5290 +rise 1 2 6.263398 6.263398 5291 +relationaloper 1 2 6.263398 6.263398 5292 +andaggreg 1 2 6.263398 6.263398 5293 +researchersin 1 2 6.263398 6.263398 5294 +movingaggreg 1 2 6.263398 6.263398 5295 +worldsitu 1 2 6.263398 6.263398 5296 +extensionof 1 2 6.263398 6.263398 5297 +ofseq 1 2 6.263398 6.263398 5298 +languagew 1 2 6.263398 6.263398 5299 +usingwhich 1 2 6.263398 6.263398 5300 +languagei 1 2 6.263398 6.263398 5301 +queriesa 1 2 6.263398 6.263398 5302 +techniquesw 1 2 6.263398 6.263398 5303 +developmentth 1 2 6.263398 6.263398 5304 +viaa 1 2 6.263398 6.263398 5305 +ontop 1 2 6.263398 6.263398 5306 +languageswhich 1 2 6.263398 6.263398 5307 +arbitrarylevel 1 2 6.263398 6.263398 5308 +viceversa 1 2 6.263398 6.263398 5309 +detailson 1 2 6.263398 6.263398 5310 +publicationssequ 1 2 6.263398 6.263398 5311 +datapraveen 1 2 6.263398 6.263398 5312 +systempraveen 1 2 6.263398 6.263398 5313 +queriesraghu 1 2 6.263398 6.263398 5314 +workthedevis 1 2 6.263398 6.263398 5315 +visualizationenviron 1 2 6.263398 6.263398 5316 +servercontact 1 2 6.263398 6.263398 5317 +eduraghu 1 2 6.263398 6.263398 5318 +edumiron 1 2 6.263398 6.263398 5319 +seshadripraveen 1 2 6.263398 6.263398 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html new file mode 100644 index 00000000..df6475f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ralph^index.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 2 343 1.098612 2.197224 19 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +advanc 1 99 2.302585 2.302585 130 +member 1 84 2.484907 2.484907 165 +upson 1 71 2.639057 2.639057 218 +august 1 66 2.708050 2.708050 257 +knowledg 1 67 2.708050 2.708050 243 +reason 1 57 2.890372 2.890372 318 +taken 1 31 3.496508 3.496508 555 +fellow 2 24 3.761200 7.522400 701 +germani 1 17 4.110874 4.110874 946 +stori 1 14 4.317488 4.317488 1087 +german 1 6 5.164786 5.164786 2190 +exchang 1 5 5.347108 5.347108 2310 +fulbright 1 4 5.568345 5.568345 2963 +karlsruh 1 3 5.857933 5.857933 3689 +ralph 2 1 6.957497 13.914994 11433 +benzingerralph 1 1 6.957497 6.957497 11434 +benzingerw 1 1 6.957497 6.957497 11435 +sich 1 1 6.957497 6.957497 11436 +seinen 1 1 6.957497 6.957497 11437 +lorbeeren 1 1 6.957497 6.957497 11438 +ausruht 1 1 6.957497 6.957497 11439 +trgt 1 1 6.957497 6.957497 11440 +derfalschen 1 1 6.957497 6.957497 11441 +stell 1 1 6.957497 6.957497 11442 +studienstiftung 1 1 6.957497 6.957497 11443 +deutschen 1 1 6.957497 6.957497 11444 +volk 1 1 6.957497 6.957497 11445 +siemen 1 1 6.957497 6.957497 11446 +international 1 1 6.957497 6.957497 11447 +studentenkrei 1 1 6.957497 6.957497 11448 +alumnusat 1 1 6.957497 6.957497 11449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html new file mode 100644 index 00000000..ed35878a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^raman^aster^demo.html @@ -0,0 +1,292 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +us 8 329 1.098612 8.788896 16 +last 1 314 1.098612 1.098612 14 +gener 4 220 1.386294 5.545176 27 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +base 3 165 1.791759 5.375277 50 +avail 2 169 1.791759 3.583518 48 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +first 5 140 1.945910 9.729550 71 +file 2 132 1.945910 3.891820 70 +process 2 142 1.945910 3.891820 72 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +postscript 68 131 2.079442 141.402056 90 +document 4 121 2.079442 8.317768 89 +structur 6 106 2.197225 13.183350 105 +mathemat 4 108 2.197225 8.788900 123 +version 3 113 2.197225 6.591675 122 +specif 2 106 2.197225 4.394450 106 +look 1 107 2.197225 2.197225 115 +place 1 106 2.197225 2.197225 124 +well 1 109 2.197225 2.197225 121 +book 4 99 2.302585 9.210340 131 +technic 2 100 2.302585 4.605170 140 +user 2 104 2.302585 4.605170 137 +text 2 98 2.302585 4.605170 133 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +section 23 94 2.397895 55.151585 149 +present 3 91 2.397895 7.193685 145 +follow 2 92 2.397895 4.795790 143 +sinc 2 90 2.397895 4.795790 159 +imag 1 91 2.397895 2.397895 161 +second 6 81 2.484907 14.909442 166 +level 3 87 2.484907 7.454721 180 +contain 2 81 2.484907 4.969814 174 +school 1 84 2.484907 2.484907 188 +exampl 11 77 2.564949 28.214439 195 +refer 3 78 2.564949 7.694847 203 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +complet 1 77 2.564949 2.564949 208 +good 1 77 2.564949 2.564949 200 +interfac 1 79 2.564949 2.564949 209 +name 2 72 2.639057 5.278114 220 +integr 4 67 2.708050 10.832200 245 +would 1 67 2.708050 2.708050 251 +complex 3 64 2.772589 8.317767 269 +written 2 63 2.772589 5.545178 278 +guid 1 63 2.772589 2.772589 267 +abstract 1 62 2.772589 2.772589 276 +interact 1 62 2.772589 2.772589 270 +simpl 2 60 2.833213 5.666426 298 +space 2 57 2.890372 5.780744 310 +thesi 1 57 2.890372 2.890372 327 +overview 1 56 2.890372 2.890372 323 +sever 1 56 2.890372 2.890372 322 +talk 3 53 2.944439 8.833317 336 +allow 3 53 2.944439 8.833317 333 +three 2 54 2.944439 5.888878 330 +suggest 1 53 2.944439 2.944439 331 +digit 1 52 2.995732 2.995732 348 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +format 4 48 3.044522 12.178088 356 +visual 2 48 3.044522 6.089044 372 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +effect 3 46 3.091042 9.273126 385 +move 2 47 3.091042 6.182084 382 +featur 1 46 3.091042 3.091042 386 +made 2 44 3.135494 6.270988 398 +algebra 2 45 3.135494 6.270988 394 +even 1 45 3.135494 3.135494 393 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +continu 2 39 3.258097 6.516194 448 +probabl 1 40 3.258097 3.258097 455 +error 1 40 3.258097 3.258097 449 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +short 1 36 3.367296 3.367296 499 +tree 1 36 3.367296 3.367296 492 +soon 1 36 3.367296 3.367296 494 +approxim 1 35 3.401197 3.401197 509 +print 1 34 3.401197 3.401197 503 +singl 1 34 3.401197 3.401197 510 +express 13 32 3.465736 45.054568 540 +independ 2 32 3.465736 6.931472 548 +concept 1 32 3.465736 3.465736 537 +human 1 32 3.465736 3.465736 546 +taken 2 31 3.496508 6.993016 555 +titl 1 31 3.496508 3.496508 556 +produc 8 30 3.555348 28.442784 572 +power 2 30 3.555348 7.110696 573 +hard 2 30 3.555348 7.110696 563 +compon 1 30 3.555348 3.555348 570 +limit 1 29 3.583519 3.583519 585 +progress 2 28 3.610918 7.221836 598 +load 1 28 3.610918 3.610918 601 +full 1 28 3.610918 3.610918 615 +symbol 1 27 3.637586 3.637586 620 +determin 1 27 3.637586 3.637586 630 +enhanc 1 26 3.688879 3.688879 644 +rule 1 26 3.688879 3.688879 638 +enabl 1 26 3.688879 3.688879 655 +challeng 1 26 3.688879 3.688879 653 +request 1 26 3.688879 3.688879 635 +constraint 1 26 3.688879 3.688879 636 +notic 8 25 3.737670 29.901360 675 +demonstr 4 24 3.761200 15.044800 694 +interpret 3 24 3.761200 11.283600 686 +seri 1 24 3.761200 3.761200 708 +variabl 2 23 3.806662 7.613324 715 +input 1 23 3.806662 3.806662 727 +initi 1 23 3.806662 3.806662 717 +brows 1 23 3.806662 3.806662 726 +equat 1 23 3.806662 3.806662 724 +size 1 23 3.806662 3.806662 713 +head 1 23 3.806662 3.806662 732 +begin 1 23 3.806662 3.806662 716 +sequenti 1 22 3.850148 3.850148 745 +defin 1 22 3.850148 3.850148 746 +reduc 1 22 3.850148 3.850148 759 +identifi 1 22 3.850148 3.850148 760 +output 3 21 3.912023 11.736069 788 +voic 1 21 3.912023 3.912023 806 +util 1 21 3.912023 3.912023 774 +hypertext 1 19 4.007333 4.007333 865 +left 1 19 4.007333 4.007333 851 +separ 1 19 4.007333 4.007333 844 +listen 4 18 4.060443 16.241772 907 +demo 3 18 4.060443 12.181329 888 +along 3 18 4.060443 12.181329 878 +record 1 18 4.060443 4.060443 890 +dimension 1 18 4.060443 4.060443 909 +element 1 18 4.060443 4.060443 895 +lower 1 18 4.060443 4.060443 886 +render 15 17 4.110874 61.663110 947 +matrix 1 17 4.110874 4.110874 933 +choic 1 16 4.174387 4.174387 979 +cognit 1 16 4.174387 4.174387 986 +took 1 16 4.174387 4.174387 1010 +upon 1 16 4.174387 4.174387 978 +piec 1 15 4.248495 4.248495 1020 +later 1 15 4.248495 4.248495 1043 +audio 84 14 4.317488 362.668992 1094 +latex 70 14 4.317488 302.224160 1064 +attribut 1 14 4.317488 4.317488 1092 +squar 1 14 4.317488 4.317488 1082 +shown 1 14 4.317488 4.317488 1080 +context 2 13 4.382027 8.764054 1153 +emac 1 13 4.382027 4.382027 1143 +directli 1 13 4.382027 4.382027 1141 +reader 4 12 4.465908 17.863632 1246 +speak 3 12 4.465908 13.397724 1283 +calcul 2 12 4.465908 8.931816 1268 +speech 1 12 4.465908 4.465908 1222 +shape 1 12 4.465908 4.465908 1245 +typic 1 11 4.553877 4.553877 1360 +ofcomput 1 10 4.653960 4.653960 1442 +donald 1 9 4.753590 4.753590 1510 +notat 1 9 4.753590 4.753590 1489 +imposs 1 9 4.753590 4.753590 1513 +distanc 1 9 4.753590 4.753590 1500 +equival 1 9 4.753590 4.753590 1496 +cross 2 8 4.875197 9.750394 1703 +root 1 8 4.875197 4.875197 1650 +wire 1 8 4.875197 4.875197 1747 +illustr 1 8 4.875197 4.875197 1679 +forget 1 8 4.875197 4.875197 1712 +replac 1 8 4.875197 4.875197 1668 +dimens 3 7 5.010635 15.031905 1930 +stereo 2 7 5.010635 10.021270 1818 +hear 2 7 5.010635 10.021270 1940 +dedic 1 7 5.010635 5.010635 1843 +notion 1 7 5.010635 5.010635 1947 +nest 4 6 5.164786 20.659144 2151 +vari 2 6 5.164786 10.329572 2001 +difficult 1 6 5.164786 5.164786 2035 +quick 1 6 5.164786 5.164786 2184 +heurist 1 6 5.164786 5.164786 2125 +chosen 1 6 5.164786 5.164786 1984 +meant 1 6 5.164786 5.164786 2055 +fraction 4 5 5.347108 21.388432 2259 +recogn 3 5 5.347108 16.041324 2302 +quantifi 2 5 5.347108 10.694216 2525 +substitut 2 5 5.347108 10.694216 2247 +mutual 1 5 5.347108 5.347108 2418 +raman 2 4 5.568345 11.136690 2827 +inlin 1 4 5.568345 5.568345 2964 +encod 1 4 5.568345 5.568345 2929 +paus 1 4 5.568345 5.568345 2965 +orthogon 1 4 5.568345 5.568345 2832 +compris 1 4 5.568345 5.568345 2862 +vital 1 4 5.568345 5.568345 2733 +ident 1 4 5.568345 5.568345 2826 +customiz 1 4 5.568345 5.568345 2966 +trick 1 4 5.568345 5.568345 2967 +thati 1 4 5.568345 5.568345 2616 +heard 1 4 5.568345 5.568345 2895 +formula 6 3 5.857933 35.147598 3405 +percept 3 3 5.857933 17.573799 3739 +subscript 2 3 5.857933 11.715866 3469 +tripl 2 3 5.857933 11.715866 3160 +meaning 2 3 5.857933 11.715866 3458 +blind 1 3 5.857933 5.857933 3662 +forthes 1 3 5.857933 5.857933 3199 +experienc 1 3 5.857933 5.857933 3203 +exponenti 1 3 5.857933 5.857933 3529 +thetim 1 3 5.857933 5.857933 3581 +proper 1 3 5.857933 5.857933 3323 +orpostscript 1 3 5.857933 5.857933 3329 +convei 7 2 6.263398 43.843786 4690 +spoken 3 2 6.263398 18.790194 5122 +succinctli 2 2 6.263398 12.526796 4275 +monoton 2 2 6.263398 12.526796 5321 +logarithm 2 2 6.263398 12.526796 5322 +expon 1 2 6.263398 6.263398 5323 +absenc 1 2 6.263398 6.263398 4878 +oppos 1 2 6.263398 6.263398 4855 +ofintegr 1 2 6.263398 6.263398 5324 +summat 1 2 6.263398 6.263398 5325 +referenc 1 2 6.263398 6.263398 4757 +justa 1 2 6.263398 6.263398 5326 +glori 1 2 6.263398 6.263398 5327 +aster 22 1 6.957497 153.064934 11450 +bruno 4 1 6.957497 27.829988 11451 +superscript 3 1 6.957497 20.872491 11452 +knuth 3 1 6.957497 20.872491 11453 +unambigu 2 1 6.957497 13.914994 11454 +inton 2 1 6.957497 13.914994 11455 +intermix 2 1 6.957497 13.914994 11456 +demonstrationi 1 1 6.957497 6.957497 11457 +forrend 1 1 6.957497 6.957497 11458 +myphd 1 1 6.957497 6.957497 11459 +dectalk 1 1 6.957497 6.957497 11460 +mulaw 1 1 6.957497 6.957497 11461 +mono 1 1 6.957497 6.957497 11462 +dvip 1 1 6.957497 6.957497 11463 +andround 1 1 6.957497 6.957497 11464 +faad 1 1 6.957497 6.957497 11465 +casey 1 1 6.957497 6.957497 11466 +examplessinc 1 1 6.957497 6.957497 11467 +inflect 1 1 6.957497 6.957497 11468 +toconvei 1 1 6.957497 6.957497 11469 +renderingsub 1 1 6.957497 6.957497 11470 +audiost 1 1 6.957497 6.957497 11471 +dimensionus 1 1 6.957497 6.957497 11472 +verbatim 1 1 6.957497 6.957497 11473 +layoutoper 1 1 6.957497 6.957497 11474 +verydiffer 1 1 6.957497 6.957497 11475 +monotonicchang 1 1 6.957497 6.957497 11476 +trigonometr 1 1 6.957497 6.957497 11477 +ambigu 1 1 6.957497 6.957497 11478 +parenthesi 1 1 6.957497 6.957497 11479 +asexpon 1 1 6.957497 6.957497 11480 +isfulli 1 1 6.957497 6.957497 11481 +innocu 1 1 6.957497 6.957497 11482 +mostdifficult 1 1 6.957497 6.957497 11483 +theintegr 1 1 6.957497 6.957497 11484 +ofhuman 1 1 6.957497 6.957497 11485 +ofcross 1 1 6.957497 6.957497 11486 +referenceableobject 1 1 6.957497 6.957497 11487 +latercross 1 1 6.957497 6.957497 11488 +followingdeepli 1 1 6.957497 6.957497 11489 +fledgedsymbol 1 1 6.957497 6.957497 11490 +thematrix 1 1 6.957497 6.957497 11491 +commenc 1 1 6.957497 6.957497 11492 +aseach 1 1 6.957497 6.957497 11493 +secondsto 1 1 6.957497 6.957497 11494 +spacenot 1 1 6.957497 6.957497 11495 +changeth 1 1 6.957497 6.957497 11496 +techniquefor 1 1 6.957497 6.957497 11497 +renderingsconvei 1 1 6.957497 6.957497 11498 +thesub 1 1 6.957497 6.957497 11499 +denomin 1 1 6.957497 6.957497 11500 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html new file mode 100644 index 00000000..9042ab64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ravi^home.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +cornel 2 215 1.386294 2.772588 23 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +parallel 2 169 1.791759 3.583518 60 +process 2 142 1.945910 3.891820 72 +confer 3 126 2.079442 6.238326 100 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +check 2 115 2.197225 4.394450 118 +theori 2 111 2.197225 4.394450 127 +intern 1 108 2.197225 2.197225 128 +octob 2 89 2.397895 4.795790 156 +learn 3 86 2.484907 7.454721 170 +ieee 2 86 2.484907 4.969814 190 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +effici 1 73 2.639057 2.639057 233 +test 2 66 2.708050 5.416100 252 +august 1 66 2.708050 2.708050 257 +foundat 3 62 2.772589 8.317767 286 +ithaca 1 65 2.772589 2.772589 294 +function 1 62 2.772589 2.772589 275 +juli 1 60 2.833213 2.833213 305 +extens 1 53 2.944439 2.944439 340 +without 1 50 3.044522 3.044522 370 +linear 1 41 3.218876 3.218876 431 +theoret 1 39 3.258097 3.258097 446 +correct 1 38 3.295837 3.295837 462 +approxim 2 35 3.401197 6.802394 509 +bound 1 26 3.688879 3.688879 659 +scalabl 1 24 3.761200 3.761200 705 +equat 1 23 3.806662 3.806662 724 +self 3 22 3.850148 11.550444 761 +polynomi 1 14 4.317488 4.317488 1069 +squar 1 14 4.317488 4.317488 1082 +branch 1 11 4.553877 4.553877 1318 +kumar 2 9 4.753590 9.507180 1506 +russel 1 9 4.753590 4.753590 1507 +rubinfeld 2 6 5.164786 10.329572 1998 +ronitt 2 5 5.347108 10.694216 2265 +combinator 2 4 5.568345 11.136690 2915 +bottleneck 1 4 5.568345 5.568345 2769 +shah 1 4 5.568345 5.568345 2814 +ravi 4 3 5.857933 23.431732 3185 +funda 2 3 5.857933 11.715866 3645 +recurr 1 3 5.857933 5.857933 3740 +latin 1 3 5.857933 5.857933 3741 +sundaram 1 3 5.857933 5.857933 3463 +ramachandran 1 3 5.857933 5.857933 3742 +lnc 2 2 6.263398 12.526796 5085 +width 1 2 6.263398 6.263398 5328 +alexand 1 2 6.263398 6.263398 5329 +uumln 2 1 6.957497 13.914994 11501 +sivakumar 2 1 6.957497 13.914994 11502 +jeyakumar 1 1 6.957497 6.957497 11503 +muthukumarasami 1 1 6.957497 6.957497 11504 +umakishor 1 1 6.957497 6.957497 11505 +gautam 1 1 6.957497 6.957497 11506 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html new file mode 100644 index 00000000..72c350b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rdz^rdz.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +interest 3 384 0.693147 2.079441 11 +program 3 374 0.693147 2.079441 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +current 2 284 1.098612 2.197224 21 +us 2 329 1.098612 2.197224 16 +cours 2 273 1.098612 2.197224 15 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +also 3 259 1.386294 4.158882 28 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +avail 4 169 1.791759 7.167036 48 +base 3 165 1.791759 5.375277 50 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +note 1 142 1.945910 1.945910 67 +lectur 1 135 1.945910 1.945910 73 +confer 4 126 2.079442 8.317768 100 +introduct 2 126 2.079442 4.158884 87 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +teach 2 108 2.197225 4.394450 112 +access 1 102 2.302585 2.302585 136 +imag 4 91 2.397895 9.591580 161 +search 1 95 2.397895 2.397895 155 +ieee 3 86 2.484907 7.454721 190 +novemb 3 81 2.484907 7.454721 179 +librari 1 87 2.484907 2.484907 181 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +workshop 2 71 2.639057 5.278114 239 +free 1 73 2.639057 2.639057 224 +multimedia 3 68 2.708050 8.124150 258 +differ 1 66 2.708050 2.708050 253 +organ 1 65 2.772589 2.772589 265 +content 2 59 2.833213 5.666426 302 +march 1 61 2.833213 2.833213 295 +think 1 57 2.890372 2.890372 314 +variou 1 56 2.890372 2.890372 317 +undergradu 2 54 2.944439 5.888878 338 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +visual 1 48 3.044522 3.044522 372 +electron 1 47 3.091042 3.091042 379 +featur 1 46 3.091042 3.091042 386 +california 1 46 3.091042 3.091042 388 +video 1 44 3.135494 3.135494 405 +third 2 43 3.178054 6.356108 412 +vision 5 41 3.218876 16.094380 430 +committe 1 34 3.401197 3.401197 522 +john 1 33 3.433987 3.433987 532 +taught 1 33 3.433987 3.433987 526 +transform 1 32 3.465736 3.465736 542 +pass 3 28 3.610918 10.832754 611 +held 2 28 3.610918 7.221836 600 +retriev 1 27 3.637586 3.637586 621 +subject 1 26 3.688879 3.688879 647 +compar 1 26 3.688879 3.688879 648 +detect 1 26 3.688879 3.688879 646 +greg 3 24 3.761200 11.283600 695 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +break 1 20 3.951244 3.951244 812 +spend 1 19 4.007333 4.007333 850 +boston 1 19 4.007333 4.007333 862 +scott 1 18 4.060443 4.060443 884 +miller 3 17 4.110874 12.332622 949 +vector 1 16 4.174387 4.174387 961 +fourth 1 16 4.174387 4.174387 999 +researchmi 1 14 4.317488 4.317488 1119 +coher 1 14 4.317488 4.317488 1109 +massachusett 1 14 4.317488 4.317488 1118 +scene 1 14 4.317488 4.317488 1114 +francisco 1 14 4.317488 4.317488 1095 +econom 1 13 4.382027 4.382027 1184 +huang 1 12 4.465908 4.465908 1202 +amount 1 12 4.465908 4.465908 1208 +reader 1 12 4.465908 4.465908 1246 +impact 1 11 4.553877 4.553877 1334 +refin 1 11 4.553877 4.553877 1363 +princip 1 10 4.653960 4.653960 1397 +correspond 1 10 4.653960 4.653960 1382 +observ 1 9 4.753590 4.753590 1578 +classifi 1 9 4.753590 4.753590 1537 +kevin 1 9 4.753590 4.753590 1482 +juan 1 9 4.753590 4.753590 1580 +european 1 8 4.875197 4.875197 1763 +ramin 5 7 5.010635 25.053175 1820 +justin 4 7 5.010635 20.042540 1789 +adob 1 7 5.010635 5.010635 1873 +parametr 1 7 5.010635 5.010635 1819 +sweden 1 7 5.010635 5.010635 1885 +courtesi 1 7 5.010635 5.010635 1953 +zabih 5 6 5.164786 25.823930 2138 +freeli 1 6 5.164786 5.164786 2014 +price 1 6 5.164786 5.164786 1999 +acrobat 1 6 5.164786 5.164786 2063 +phil 1 5 5.347108 5.347108 2419 +fair 1 5 5.347108 5.347108 2333 +florida 1 5 5.347108 5.347108 2526 +cvpr 2 4 5.568345 11.136690 2761 +essai 1 4 5.568345 5.568345 2948 +newslett 1 4 5.568345 5.568345 2873 +scribe 1 4 5.568345 5.568345 2631 +jing 1 3 5.857933 5.857933 3521 +voskuhl 1 3 5.857933 5.857933 3109 +szewczyk 1 3 5.857933 5.857933 3108 +histogram 1 3 5.857933 5.857933 3490 +stockholm 1 3 5.857933 5.857933 3715 +conjunct 1 3 5.857933 5.857933 3743 +cytacki 1 2 6.263398 6.263398 5330 +pageramin 1 1 6.957497 6.957497 11507 +zabihassist 1 1 6.957497 6.957497 11508 +professorrdz 1 1 6.957497 6.957497 11509 +agr 1 1 6.957497 6.957497 11510 +studentsi 1 1 6.957497 6.957497 11511 +vera 1 1 6.957497 6.957497 11512 +kettnak 1 1 6.957497 6.957497 11513 +olga 1 1 6.957497 6.957497 11514 +veksler 1 1 6.957497 6.957497 11515 +publicationsmost 1 1 6.957497 6.957497 11516 +sarasota 1 1 6.957497 6.957497 11517 +woodfil 1 1 6.957497 6.957497 11518 +teachingi 1 1 6.957497 6.957497 11519 +activitiesi 1 1 6.957497 6.957497 11520 +comitte 1 1 6.957497 6.957497 11521 +acknowledgementsthi 1 1 6.957497 6.957497 11522 +huttenlocherlast 1 1 6.957497 6.957497 11523 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html new file mode 100644 index 00000000..a6e08e84 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rodmoten^home.html @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +roderick 1 1 6.957497 6.957497 11524 +moten 1 1 6.957497 6.957497 11525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html new file mode 100644 index 00000000..920c3f0b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ronitt^homepage.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +fall 2 181 1.609438 3.218876 40 +paper 1 205 1.609438 1.609438 38 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +homepag 1 93 2.397895 2.397895 148 +upson 1 71 2.639057 2.639057 218 +result 1 65 2.772589 2.772589 281 +talk 1 53 2.944439 2.944439 336 +telephon 1 50 3.044522 3.044522 373 +describ 1 45 3.135494 3.135494 400 +york 1 41 3.218876 3.218876 435 +random 1 34 3.401197 3.401197 511 +universityithaca 1 24 3.761200 3.761200 710 +kumar 1 9 4.753590 4.753590 1506 +hallcornel 1 8 4.875197 4.875197 1757 +rubinfeld 2 6 5.164786 10.329572 1998 +ronitt 3 5 5.347108 16.041324 2265 +fair 1 5 5.347108 5.347108 2333 +funda 1 3 5.857933 5.857933 3645 +ravi 1 3 5.857933 5.857933 3185 +ergun 1 2 6.263398 6.263398 5087 +wasserman 1 2 6.263398 6.263398 5331 +nephew 1 2 6.263398 6.263398 5332 +homepageronitt 1 1 6.957497 6.957497 11526 +rubinfeldi 1 1 6.957497 6.957497 11527 +rubinfeldcomput 1 1 6.957497 6.957497 11528 +edupictur 1 1 6.957497 6.957497 11529 +eitan 1 1 6.957497 6.957497 11530 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html new file mode 100644 index 00000000..62e8fd4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^roy^roy.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 3 457 0.693147 2.079441 12 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cornel 2 215 1.386294 2.772588 23 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +distribut 3 162 1.791759 5.375277 51 +implement 2 152 1.791759 3.583518 52 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +technolog 3 131 2.079442 6.238326 102 +report 2 131 2.079442 4.158884 92 +machin 1 129 2.079442 2.079442 95 +technic 2 100 2.302585 4.605170 140 +memori 1 101 2.302585 2.302585 139 +associ 2 93 2.397895 4.795790 151 +commun 1 95 2.397895 2.397895 157 +institut 2 84 2.484907 4.969814 187 +appear 1 78 2.564949 2.564949 210 +state 1 76 2.564949 2.564949 207 +involv 1 71 2.639057 2.639057 227 +receiv 1 66 2.708050 2.708050 244 +share 1 59 2.833213 2.833213 304 +thesi 1 57 2.890372 2.890372 327 +advisor 1 51 2.995732 2.995732 355 +post 1 35 3.401197 3.401197 505 +titl 1 31 3.496508 3.496508 556 +full 1 28 3.610918 3.610918 615 +consist 1 26 3.688879 3.688879 651 +reliabl 1 25 3.737670 3.737670 674 +doctor 2 24 3.761200 7.522400 709 +condit 1 16 4.174387 4.174387 975 +edui 1 13 4.382027 4.382027 1193 +replic 1 12 4.465908 4.465908 1231 +thedepart 2 11 4.553877 9.107754 1350 +israel 2 11 4.553877 9.107754 1366 +mainli 1 10 4.653960 4.653960 1432 +birman 3 9 4.753590 14.260770 1531 +friedman 3 7 5.010635 15.031905 1886 +cornellunivers 2 7 5.010635 10.021270 1916 +trade 1 7 5.010635 5.010635 1815 +clickher 1 5 5.347108 5.347108 2428 +vaysburd 1 4 5.568345 5.568345 2846 +tina 1 3 5.857933 5.857933 3744 +scienceatcornel 1 2 6.263398 6.263398 5333 +withken 1 2 6.263398 6.263398 5334 +androbbert 1 2 6.263398 6.263398 4953 +thehoru 1 2 6.263398 6.263398 5179 +attiya 1 2 6.263398 6.263398 5197 +partition 1 2 6.263398 6.263398 4954 +thetechnion 2 1 6.957497 13.914994 11531 +friedmanroi 1 1 6.957497 6.957497 11532 +friedmanpost 1 1 6.957497 6.957497 11533 +universityroi 1 1 6.957497 6.957497 11534 +rennessein 1 1 6.957497 6.957497 11535 +washagit 1 1 6.957497 6.957497 11536 +wasconsist 1 1 6.957497 6.957497 11537 +themilliped 1 1 6.957497 6.957497 11538 +withassaf 1 1 6.957497 6.957497 11539 +schuster 1 1 6.957497 6.957497 11540 +papersr 1 1 6.957497 6.957497 11541 +scalabledistribut 1 1 6.957497 6.957497 11542 +coprocessor 1 1 6.957497 6.957497 11543 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html new file mode 100644 index 00000000..28066a8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rus^home.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +cornel 3 215 1.386294 4.158882 23 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +report 1 131 2.079442 2.079442 92 +version 1 113 2.197225 2.197225 122 +associ 1 93 2.397895 2.397895 151 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +simul 1 66 2.708050 2.708050 255 +ithaca 1 65 2.772589 2.772589 294 +dept 1 64 2.772589 2.772589 291 +tech 1 35 3.401197 3.401197 515 +photograph 1 15 4.248495 4.248495 1056 +daniela 2 3 5.857933 11.715866 3611 +catalogc 1 2 6.263398 6.263398 5023 +infodesign 1 1 6.957497 6.957497 11544 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html new file mode 100644 index 00000000..65d31276 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^rvr^rvr.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 8 443 0.693147 5.545176 6 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +group 3 183 1.609438 4.828314 36 +oper 1 180 1.609438 1.609438 34 +network 2 168 1.791759 3.583518 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +perform 2 143 1.945910 3.891820 74 +support 2 132 1.945910 3.891820 83 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +version 2 113 2.197225 4.394450 122 +place 1 106 2.197225 2.197225 124 +commun 3 95 2.397895 7.193685 157 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +contain 1 81 2.484907 2.484907 174 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +html 1 75 2.639057 2.639057 235 +ithaca 3 65 2.772589 8.317767 294 +virtual 1 62 2.772589 2.772589 285 +complex 1 64 2.772589 2.772589 269 +scientif 1 53 2.944439 2.944439 341 +advisor 1 51 2.995732 2.995732 355 +protocol 2 45 3.135494 6.270988 407 +multi 1 36 3.367296 3.367296 493 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +secur 1 30 3.555348 3.555348 577 +framework 1 28 3.610918 3.610918 606 +american 1 27 3.637586 3.637586 634 +reliabl 1 25 3.737670 3.737670 674 +flow 1 24 3.761200 3.761200 700 +mobil 1 23 3.806662 3.806662 730 +hous 1 21 3.912023 3.912023 801 +flexibl 1 21 3.912023 3.912023 792 +applet 1 20 3.951244 3.951244 827 +media 1 19 4.007333 4.007333 861 +agent 1 18 4.060443 4.060443 910 +club 1 15 4.248495 4.248495 1058 +horu 8 14 4.317488 34.539904 1116 +senior 1 14 4.317488 4.317488 1120 +composit 2 13 4.382027 8.764054 1150 +edui 1 13 4.382027 4.382027 1193 +incorpor 1 13 4.382027 4.382027 1163 +weak 1 13 4.382027 4.382027 1159 +danc 1 12 4.465908 4.465908 1278 +market 1 11 4.553877 4.553877 1361 +interestsmi 1 10 4.653960 4.653960 1462 +robbert 1 9 4.753590 4.753590 1529 +guitar 1 8 4.875197 4.875197 1758 +synchroni 1 7 5.010635 5.010635 1923 +band 1 6 5.164786 5.164786 2198 +strong 1 6 5.164786 5.164786 2029 +jazz 3 5 5.347108 16.041324 2527 +babi 1 5 5.347108 5.347108 2493 +girl 1 5 5.347108 5.347108 2410 +swing 1 4 5.568345 5.568345 2887 +dutch 3 3 5.857933 17.573799 3592 +lightweight 2 3 5.857933 11.715866 3234 +tanenbaum 1 3 5.857933 5.857933 3397 +sharewar 1 3 5.857933 5.857933 3503 +netherland 1 3 5.857933 5.857933 3650 +associatecornel 1 2 6.263398 6.263398 5137 +scienceatcornel 1 2 6.263398 6.263398 5333 +withken 1 2 6.263398 6.263398 5334 +tacoma 1 2 6.263398 6.263398 4909 +brand 2 1 6.957497 13.914994 11545 +renesserobbert 1 1 6.957497 6.957497 11546 +renessesenior 1 1 6.957497 6.957497 11547 +universityrvr 1 1 6.957497 6.957497 11548 +universityinithaca 1 1 6.957497 6.957497 11549 +birmanin 1 1 6.957497 6.957497 11550 +wasandi 1 1 6.957497 6.957497 11551 +caml 1 1 6.957497 6.957497 11552 +nynetth 1 1 6.957497 6.957497 11553 +ageless 1 1 6.957497 6.957497 11554 +accordion 1 1 6.957497 6.957497 11555 +stuffcornel 1 1 6.957497 6.957497 11556 +ithacaithacanet 1 1 6.957497 6.957497 11557 +spinner 1 1 6.957497 6.957497 11558 +paperssoftwar 1 1 6.957497 6.957497 11559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html new file mode 100644 index 00000000..7e3ebe97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sabel^sabel.html @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +scienc 4 640 0.000000 0.000000 4 +system 5 443 0.693147 3.465735 6 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +cornel 4 215 1.386294 5.545176 23 +public 1 202 1.609438 1.609438 43 +distribut 4 162 1.791759 7.167036 51 +algorithm 1 162 1.791759 1.791759 57 +click 3 142 1.945910 5.837730 78 +process 1 142 1.945910 1.945910 72 +report 3 131 2.079442 6.238326 92 +postscript 3 131 2.079442 6.238326 90 +version 2 113 2.197225 4.394450 122 +final 1 116 2.197225 2.197225 108 +intern 1 108 2.197225 2.197225 128 +technic 3 100 2.302585 6.907755 140 +proceed 3 93 2.397895 7.193685 152 +octob 2 89 2.397895 4.795790 156 +appear 2 78 2.564949 5.129898 210 +june 1 79 2.564949 2.564949 214 +symposium 2 72 2.639057 5.278114 238 +workshop 1 71 2.639057 2.639057 239 +free 1 73 2.639057 2.639057 224 +august 1 66 2.708050 2.708050 257 +copi 3 63 2.772589 8.317767 284 +experi 1 64 2.772589 2.772589 283 +guid 1 63 2.772589 2.772589 267 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +februari 1 54 2.944439 2.944439 328 +sampl 1 53 2.944439 2.944439 339 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +electron 1 47 3.091042 3.091042 379 +answer 1 45 3.135494 3.135494 391 +submit 1 39 3.258097 3.258097 440 +annual 1 40 3.258097 3.258097 458 +especi 1 36 3.367296 3.367296 496 +approxim 2 35 3.401197 6.802394 509 +award 1 34 3.401197 3.401197 523 +survei 1 35 3.401197 3.401197 513 +detect 3 26 3.688879 11.066637 646 +revis 1 26 3.688879 3.688879 640 +consist 1 26 3.688879 3.688879 651 +reliabl 1 25 3.737670 3.737670 674 +doctor 1 24 3.761200 3.761200 709 +seri 1 24 3.761200 3.761200 708 +finish 1 22 3.850148 3.850148 748 +properti 1 22 3.850148 3.850148 749 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +failur 4 18 4.060443 16.241772 898 +expand 1 17 4.110874 4.110874 928 +diego 1 16 4.174387 4.174387 992 +letter 1 16 4.174387 4.174387 981 +asynchron 2 12 4.465908 8.931816 1229 +elect 1 8 4.875197 4.875197 1771 +perfect 2 7 5.010635 10.021270 1921 +chandra 1 6 5.164786 5.164786 2091 +consensu 1 6 5.164786 5.164786 2080 +prize 1 6 5.164786 5.164786 2150 +keith 3 5 5.347108 16.041324 2528 +stabl 1 5 5.347108 5.347108 2309 +blow 1 5 5.347108 5.347108 2407 +spam 1 4 5.568345 5.568345 2927 +bean 1 4 5.568345 5.568345 2968 +detector 3 3 5.857933 17.573799 3745 +horizon 1 3 5.857933 5.857933 3746 +zone 1 3 5.857933 5.857933 3747 +cash 1 3 5.857933 5.857933 3355 +marzullo 4 2 6.263398 25.053592 4919 +formor 1 2 6.263398 6.263398 5335 +distributedcomput 1 2 6.263398 6.263398 5336 +lecturenot 1 2 6.263398 6.263398 4679 +sabel 6 1 6.957497 41.744982 11560 +laura 5 1 6.957497 34.787485 11561 +asynchronousdistribut 2 1 6.957497 13.914994 11562 +jelli 2 1 6.957497 13.914994 11563 +bingo 2 1 6.957497 13.914994 11564 +professorkeith 1 1 6.957497 6.957497 11565 +tushar 1 1 6.957497 6.957497 11566 +sfailur 1 1 6.957497 6.957497 11567 +subcut 1 1 6.957497 6.957497 11568 +wdag 1 1 6.957497 6.957497 11569 +cow 1 1 6.957497 6.957497 11570 +strawberri 1 1 6.957497 6.957497 11571 +tart 1 1 6.957497 6.957497 11572 +torch 1 1 6.957497 6.957497 11573 +alpacanet 1 1 6.957497 6.957497 11574 +gourmet 1 1 6.957497 6.957497 11575 +thebobbi 1 1 6.957497 6.957497 11576 +belli 1 1 6.957497 6.957497 11577 +canplai 1 1 6.957497 6.957497 11578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html new file mode 100644 index 00000000..4e0c5a25 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sam^sam.html @@ -0,0 +1,169 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 11 443 0.693147 7.624617 6 +research 3 431 0.693147 2.079441 10 +work 3 380 0.693147 2.079441 9 +interest 2 384 0.693147 1.386294 11 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +distribut 7 162 1.791759 12.542313 51 +algorithm 4 162 1.791759 7.167036 57 +implement 3 152 1.791759 5.375277 52 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +object 7 138 1.945910 13.621370 79 +process 4 142 1.945910 7.783640 72 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +databas 2 122 2.079442 4.158884 86 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +well 1 109 2.197225 2.197225 121 +memori 2 101 2.302585 4.605170 139 +need 1 98 2.302585 2.302585 135 +proceed 3 93 2.397895 7.193685 152 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +journal 4 83 2.484907 9.939628 183 +level 3 87 2.484907 7.454721 180 +ieee 1 86 2.484907 2.484907 190 +messag 2 76 2.564949 5.129898 212 +optim 1 79 2.564949 2.564949 197 +free 7 73 2.639057 18.473399 224 +solv 5 73 2.639057 13.195285 234 +symposium 3 72 2.639057 7.917171 238 +effici 1 73 2.639057 2.639057 233 +august 2 66 2.708050 5.416100 257 +goal 1 66 2.708050 2.708050 250 +simul 1 66 2.708050 2.708050 255 +knowledg 1 67 2.708050 2.708050 243 +result 2 65 2.772589 5.545178 281 +foundat 1 62 2.772589 2.772589 286 +share 3 59 2.833213 8.499639 304 +type 2 61 2.833213 5.666426 296 +automat 1 61 2.833213 2.833213 306 +explor 3 58 2.890372 8.671116 324 +faculti 1 56 2.890372 2.890372 325 +major 1 56 2.890372 2.890372 315 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +maintain 1 51 2.995732 2.995732 342 +principl 2 48 3.044522 6.089044 357 +even 1 45 3.135494 3.135494 393 +protocol 1 45 3.135494 3.135494 407 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +show 1 43 3.178054 3.178054 417 +theoret 1 39 3.258097 3.258097 446 +continu 1 39 3.258097 3.258097 448 +transact 1 39 3.258097 3.258097 438 +correct 2 38 3.295837 6.591674 462 +respons 1 37 3.332205 3.332205 476 +least 1 35 3.401197 3.401197 516 +concurr 1 34 3.401197 3.401197 501 +toler 4 33 3.433987 13.735948 533 +fault 4 32 3.465736 13.862944 547 +collabor 1 32 3.465736 3.465736 543 +abl 1 30 3.555348 3.555348 566 +exist 1 30 3.555348 3.555348 569 +common 1 30 3.555348 3.555348 574 +synchron 2 29 3.583519 7.167038 588 +pass 2 28 3.610918 7.221836 611 +determin 2 27 3.637586 7.275172 630 +consist 1 26 3.688879 3.688879 651 +fundament 1 25 3.737670 3.737670 661 +known 1 24 3.761200 3.761200 702 +methodolog 1 23 3.806662 3.806662 733 +hierarchi 4 22 3.850148 15.400592 744 +thu 1 21 3.912023 3.912023 773 +increas 1 20 3.951244 3.951244 829 +prove 1 19 4.007333 4.007333 848 +failur 5 18 4.060443 20.302215 898 +whether 1 17 4.110874 4.110874 918 +partit 1 16 4.174387 4.174387 984 +princeton 1 15 4.248495 4.248495 1042 +shown 1 14 4.317488 4.317488 1080 +wait 7 13 4.382027 30.674189 1168 +canada 2 13 4.382027 8.764054 1158 +cannot 1 13 4.382027 4.382027 1144 +difficulti 1 13 4.382027 4.382027 1132 +necessari 1 13 4.382027 4.382027 1147 +asynchron 4 12 4.465908 17.863632 1229 +robust 1 12 4.465908 4.465908 1271 +replic 1 12 4.465908 4.465908 1231 +clock 2 11 4.553877 9.107754 1320 +abil 1 11 4.553877 4.553877 1341 +interestsmi 1 10 4.653960 4.653960 1462 +reli 1 10 4.653960 4.653960 1411 +guarante 1 10 4.653960 4.653960 1391 +informationabout 1 9 4.753590 4.753590 1515 +crash 2 8 4.875197 9.750394 1616 +paradigm 1 8 4.875197 4.875197 1662 +bridg 1 8 4.875197 4.875197 1764 +exactli 1 7 5.010635 5.010635 1817 +suffici 1 7 5.010635 5.010635 1897 +montreal 1 7 5.010635 5.010635 1961 +pittsburgh 1 7 5.010635 5.010635 1938 +pennsylvania 1 7 5.010635 5.010635 1932 +consensu 6 6 5.164786 30.988716 2080 +chandra 3 6 5.164786 15.494358 2091 +prasad 2 6 5.164786 10.329572 2126 +mistak 1 6 5.164786 5.164786 2110 +broadcast 1 5 5.347108 5.347108 2453 +infinit 1 4 5.568345 5.568345 2596 +detector 2 3 5.857933 11.715866 3745 +forfault 1 3 5.857933 5.857933 3748 +ofobject 1 3 5.857933 5.857933 3399 +toueg 10 2 6.263398 62.633980 5339 +inher 1 2 6.263398 6.263398 5086 +mere 1 2 6.263398 6.263398 5340 +slow 1 2 6.263398 6.263398 5341 +inour 1 2 6.263398 6.263398 4445 +systemswith 1 2 6.263398 6.263398 5342 +muchinform 1 2 6.263398 6.263398 4811 +amajor 1 2 6.263398 6.263398 5343 +herlihi 1 2 6.263398 6.263398 5144 +anobject 1 2 6.263398 6.263398 4267 +hadzilaco 1 2 6.263398 6.263398 5338 +jayanti 3 1 6.957497 20.872491 11583 +failuredetector 3 1 6.957497 20.872491 11584 +unreli 2 1 6.957497 13.914994 11585 +weakest 2 1 6.957497 13.914994 11586 +neiger 2 1 6.957497 13.914994 11587 +professorph 1 1 6.957497 6.957497 11588 +toleranceand 1 1 6.957497 6.957497 11589 +andshar 1 1 6.957497 6.957497 11590 +gapbetween 1 1 6.957497 6.957497 11591 +practicalsolut 1 1 6.957497 6.957497 11592 +withtushar 1 1 6.957497 6.957497 11593 +chandraand 1 1 6.957497 6.957497 11594 +onunreli 1 1 6.957497 6.957497 11595 +computingst 1 1 6.957497 6.957497 11596 +adeterminist 1 1 6.957497 6.957497 11597 +impossibilityresult 1 1 6.957497 6.957497 11598 +aprocess 1 1 6.957497 6.957497 11599 +wefirst 1 1 6.957497 6.957497 11600 +canmak 1 1 6.957497 6.957497 11601 +solveconsensu 1 1 6.957497 6.957497 11602 +practicalityof 1 1 6.957497 6.957497 11603 +theircorrect 1 1 6.957497 6.957497 11604 +sharedobject 1 1 6.957497 6.957497 11605 +accessesthi 1 1 6.957497 6.957497 11606 +otherprocess 1 1 6.957497 6.957497 11607 +thatcorrespond 1 1 6.957497 6.957497 11608 +atani 1 1 6.957497 6.957497 11609 +whetherrobust 1 1 6.957497 6.957497 11610 +bracha 1 1 6.957497 6.957497 11611 +srikanth 1 1 6.957497 6.957497 11612 +abbadi 1 1 6.957497 6.957497 11613 +detectorfor 1 1 6.957497 6.957497 11614 +vancouv 1 1 6.957497 6.957497 11615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ new file mode 100644 index 00000000..cf6bce7f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^samuel^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 4 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +program 4 374 0.693147 2.772588 7 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +cornel 5 215 1.386294 6.931470 23 +design 3 213 1.386294 4.158882 25 +softwar 2 220 1.386294 2.772588 30 +languag 2 227 1.386294 2.772588 26 +email 1 220 1.386294 1.386294 29 +public 3 202 1.609438 4.828314 43 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +distribut 2 162 1.791759 3.583518 51 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +professor 2 137 1.945910 3.891820 76 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +compil 6 122 2.079442 12.476652 96 +confer 3 126 2.079442 6.238326 100 +report 2 131 2.079442 4.158884 92 +technolog 1 131 2.079442 2.079442 102 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +assist 3 112 2.197225 6.591675 113 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +technic 2 100 2.302585 4.605170 140 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +proceed 2 93 2.397895 4.795790 152 +journal 1 83 2.484907 2.484907 183 +master 2 76 2.564949 5.129898 216 +messag 2 76 2.564949 5.129898 212 +upson 1 71 2.639057 2.639057 218 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +complex 2 64 2.772589 5.545178 269 +foundat 1 62 2.772589 2.772589 286 +septemb 1 65 2.772589 2.772589 274 +thesi 2 57 2.890372 5.780744 327 +algebra 2 45 3.135494 6.270988 394 +editor 2 41 3.218876 6.437752 433 +submit 2 39 3.258097 6.516194 440 +small 1 39 3.258097 3.258097 447 +formal 1 37 3.332205 3.332205 478 +concurr 1 34 3.401197 3.401197 501 +semant 4 29 3.583519 14.334076 587 +bound 1 26 3.688879 3.688879 659 +director 1 22 3.850148 3.850148 767 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +vlsi 1 21 3.912023 3.912023 795 +verif 1 20 3.951244 3.951244 826 +scheme 1 20 3.951244 3.951244 818 +binari 1 20 3.951244 3.951244 823 +exercis 1 19 4.007333 4.007333 842 +failur 1 18 4.060443 4.060443 898 +brown 3 16 4.174387 12.523161 977 +circuit 1 13 4.382027 4.382027 1131 +sigplan 1 13 4.382027 4.382027 1190 +verifi 2 12 4.465908 8.931816 1261 +calculu 1 12 4.465908 4.465908 1203 +meta 1 9 4.753590 4.753590 1505 +crash 1 8 4.875197 4.875197 1616 +delai 1 7 5.010635 5.010635 1848 +cornellunivers 1 7 5.010635 5.010635 1916 +seshadri 1 7 5.010635 5.010635 1803 +silicon 4 6 5.164786 20.659144 2076 +toronto 1 6 5.164786 5.164786 2156 +educurr 1 5 5.347108 5.347108 2504 +bloom 4 4 5.568345 22.273380 2913 +hallphon 1 4 5.568345 5.568345 2900 +insensit 1 4 5.568345 5.568345 2716 +knight 1 4 5.568345 5.568345 2728 +savag 1 4 5.568345 5.568345 2777 +weber 11 3 5.857933 64.437263 3156 +samuel 5 3 5.857933 29.289665 3155 +act 2 3 5.857933 11.715866 3557 +agreement 2 3 5.857933 11.715866 3207 +metatheori 1 3 5.857933 5.857933 3642 +byzantin 2 2 6.263398 12.526796 4203 +bakker 1 2 6.263398 6.263398 5337 +hadzilaco 1 2 6.263398 6.263398 5338 +roever 1 1 6.957497 6.957497 11579 +rozenberg 1 1 6.957497 6.957497 11580 +amdur 1 1 6.957497 6.957497 11581 +wortman 1 1 6.957497 6.957497 11582 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html new file mode 100644 index 00000000..e707a387 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^scl^sean.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 5 443 0.693147 3.465735 6 +interest 3 384 0.693147 2.079441 11 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +project 5 340 1.098612 5.493060 18 +engin 3 297 1.098612 3.295836 20 +current 2 284 1.098612 2.197224 21 +cours 2 273 1.098612 2.197224 15 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +design 2 213 1.386294 2.772588 25 +modifi 1 178 1.609438 1.609438 35 +base 2 165 1.791759 3.583518 50 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +object 3 138 1.945910 5.837730 79 +databas 1 122 2.079442 2.079442 86 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +final 1 116 2.197225 2.197225 108 +version 1 113 2.197225 2.197225 122 +topic 1 114 2.197225 2.197225 110 +person 1 111 2.197225 2.197225 117 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +graphic 5 90 2.397895 11.989475 147 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +master 3 76 2.564949 7.694847 216 +orient 2 80 2.564949 5.129898 205 +resum 1 79 2.564949 2.564949 217 +window 3 68 2.708050 8.124150 242 +content 1 59 2.833213 2.833213 302 +plai 1 60 2.833213 2.833213 307 +sampl 1 53 2.944439 2.944439 339 +favorit 1 44 3.135494 3.135494 410 +past 1 42 3.218876 3.218876 428 +combin 1 42 3.218876 3.218876 421 +product 1 33 3.433987 3.433987 527 +releas 1 28 3.610918 3.610918 616 +team 2 27 3.637586 7.275172 625 +retriev 1 27 3.637586 3.637586 621 +request 1 26 3.688879 3.688879 635 +reach 1 24 3.761200 3.761200 688 +lead 1 23 3.806662 3.806662 718 +color 1 22 3.850148 3.850148 762 +divis 1 21 3.912023 3.912023 803 +render 2 17 4.110874 8.221748 947 +analyz 1 17 4.110874 4.110874 925 +sheet 1 16 4.174387 4.174387 973 +draw 2 14 4.317488 8.634976 1086 +systemsc 1 11 4.553877 4.553877 1293 +ski 1 10 4.653960 4.653960 1471 +card 1 10 4.653960 4.653960 1435 +softbal 1 9 4.753590 4.753590 1594 +sean 2 8 4.875197 9.750394 1705 +golf 1 6 5.164786 5.164786 2178 +isi 5 5 5.347108 26.735540 2443 +interior 1 5 5.347108 5.347108 2439 +basebal 2 4 5.568345 11.136690 2969 +percept 1 3 5.857933 5.857933 3739 +compliant 1 3 5.857933 5.857933 3245 +landi 2 2 6.263398 12.526796 4830 +clickherefor 1 2 6.263398 6.263398 5344 +stratu 1 2 6.263398 6.263398 5345 +broker 1 2 6.263398 6.263398 4968 +orbix 3 1 6.957497 20.872491 11616 +landissean 1 1 6.957497 6.957497 11617 +sciencewelcom 1 1 6.957497 6.957497 11618 +weanalyz 1 1 6.957497 6.957497 11619 +patternsprofession 1 1 6.957497 6.957497 11620 +acorba 1 1 6.957497 6.957497 11621 +iona 1 1 6.957497 6.957497 11622 +alpin 1 1 6.957497 6.957497 11623 +collectingi 1 1 6.957497 6.957497 11624 +comeduc 1 1 6.957497 6.957497 11625 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html new file mode 100644 index 00000000..f87e312c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^seena^homepage.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +system 5 443 0.693147 3.465735 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +engin 3 297 1.098612 3.295836 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +oper 3 180 1.609438 4.828314 34 +updat 1 191 1.609438 1.609438 41 +distribut 1 162 1.791759 1.791759 51 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +graphic 3 90 2.397895 7.193685 147 +june 1 79 2.564949 2.564949 214 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +ithaca 2 65 2.772589 5.545178 294 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +summer 1 56 2.890372 2.890372 311 +york 1 41 3.218876 3.218876 435 +tech 1 35 3.401197 3.401197 515 +post 1 35 3.401197 3.401197 505 +india 1 32 3.465736 3.465736 550 +taken 1 31 3.496508 3.496508 555 +anim 1 31 3.496508 3.496508 557 +equat 1 23 3.806662 3.806662 724 +sciencecornel 1 22 3.850148 3.850148 768 +viewer 1 21 3.912023 3.912023 787 +practicum 1 16 4.174387 4.174387 960 +magic 1 11 4.553877 4.553877 1358 +mapl 1 11 4.553877 4.553877 1376 +reduct 1 7 5.010635 5.010635 1877 +parametr 1 7 5.010635 5.010635 1819 +jpeg 1 6 5.164786 5.164786 2053 +myresum 1 6 5.164786 5.164786 2199 +hoca 1 5 5.347108 5.347108 2241 +engineeringclass 1 3 5.857933 5.857933 3667 +kerala 1 3 5.857933 5.857933 3749 +cornelluniversityfal 1 2 6.263398 6.263398 5131 +artifact 1 2 6.263398 6.263398 5346 +cspracticum 1 2 6.263398 6.263398 5132 +carpet 1 2 6.263398 6.263398 5133 +colloqium 1 2 6.263398 6.263398 5134 +seena 3 1 6.957497 20.872491 11626 +cherangara 1 1 6.957497 6.957497 11627 +cherangaramast 1 1 6.957497 6.957497 11628 +homepagecurr 1 1 6.957497 6.957497 11629 +trivandrum 1 1 6.957497 6.957497 11630 +processingalgorithm 1 1 6.957497 6.957497 11631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html new file mode 100644 index 00000000..dbe20e60 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sharma^index.html @@ -0,0 +1,239 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +us 7 329 1.098612 7.690284 16 +student 2 343 1.098612 2.197224 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +languag 5 227 1.386294 6.931470 26 +cornel 2 215 1.386294 2.772588 23 +link 2 247 1.386294 2.772588 24 +softwar 2 220 1.386294 2.772588 30 +also 2 259 1.386294 2.772588 28 +gener 2 220 1.386294 2.772588 27 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +class 2 199 1.609438 3.218876 37 +paper 2 205 1.609438 3.218876 38 +oper 2 180 1.609438 3.218876 34 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +develop 3 174 1.791759 5.375277 53 +applic 2 170 1.791759 3.583518 56 +data 2 170 1.791759 3.583518 49 +implement 2 152 1.791759 3.583518 52 +network 1 168 1.791759 1.791759 61 +like 6 132 1.945910 11.675460 81 +file 4 132 1.945910 7.783640 70 +support 2 132 1.945910 3.891820 83 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +first 1 140 1.945910 1.945910 71 +postscript 2 131 2.079442 4.158884 90 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +make 2 111 2.197225 4.394450 120 +site 1 106 2.197225 2.197225 119 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +look 1 107 2.197225 2.197225 115 +part 2 98 2.302585 4.605170 129 +text 2 98 2.302585 4.605170 133 +peopl 2 96 2.302585 4.605170 132 +access 1 102 2.302585 2.302585 136 +imag 10 91 2.397895 23.978950 161 +call 3 91 2.397895 7.193685 153 +question 1 91 2.397895 2.397895 141 +internet 4 83 2.484907 9.939628 186 +stuff 2 87 2.484907 4.969814 171 +start 1 83 2.484907 2.484907 173 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +method 1 80 2.564949 2.564949 213 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +name 5 72 2.639057 13.195285 220 +write 2 72 2.639057 5.278114 222 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +multimedia 1 68 2.708050 2.708050 258 +test 1 66 2.708050 2.708050 252 +window 1 68 2.708050 2.708050 242 +written 2 63 2.772589 5.545178 278 +virtual 1 62 2.772589 2.772589 285 +dept 1 64 2.772589 2.772589 291 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +sever 1 56 2.890372 2.890372 322 +thesi 1 57 2.890372 2.890372 327 +instruct 1 53 2.944439 2.944439 332 +allow 1 53 2.944439 2.944439 333 +cool 2 49 3.044522 6.089044 374 +give 2 50 3.044522 6.089044 359 +format 1 48 3.044522 3.044522 356 +friend 1 48 3.044522 3.044522 376 +quarter 1 47 3.091042 3.091042 389 +video 7 44 3.135494 21.948458 405 +protocol 3 45 3.135494 9.406482 407 +directori 1 45 3.135494 3.135494 396 +term 1 43 3.178054 3.178054 411 +compani 1 41 3.218876 3.218876 423 +fast 1 42 3.218876 3.218876 429 +live 1 40 3.258097 3.258097 451 +small 1 39 3.258097 3.258097 447 +prototyp 1 38 3.295837 3.295837 463 +slide 1 38 3.295837 3.295837 467 +microsoft 1 38 3.295837 3.295837 468 +mean 3 37 3.332205 9.996615 477 +statist 1 35 3.401197 3.401197 521 +transform 1 32 3.465736 3.465736 542 +given 1 32 3.465736 3.465736 538 +someth 1 31 3.496508 3.496508 554 +often 1 31 3.496508 3.496508 551 +steve 1 29 3.583519 3.583519 594 +limit 1 29 3.583519 3.583519 585 +consid 1 29 3.583519 3.583519 590 +semant 1 29 3.583519 3.583519 587 +chines 1 29 3.583519 3.583519 595 +releas 2 28 3.610918 7.221836 616 +ask 1 28 3.610918 3.610918 597 +manipul 2 27 3.637586 7.275172 624 +altern 1 26 3.688879 3.688879 641 +enhanc 1 26 3.688879 3.688879 644 +spent 1 25 3.737670 3.737670 676 +client 1 25 3.737670 3.737670 679 +interpret 2 24 3.761200 7.522400 686 +motion 1 24 3.761200 3.761200 699 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +variabl 1 23 3.806662 3.806662 715 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +indian 2 22 3.850148 7.700296 769 +fact 1 21 3.912023 3.912023 780 +similar 1 21 3.912023 3.912023 771 +viewer 1 21 3.912023 3.912023 787 +wonder 1 20 3.951244 3.951244 815 +media 2 19 4.007333 8.014666 861 +ever 1 19 4.007333 4.007333 872 +record 2 18 4.060443 8.120886 890 +event 1 18 4.060443 4.060443 896 +lot 1 18 4.060443 4.060443 889 +stanford 3 17 4.110874 12.332622 955 +coupl 1 17 4.110874 4.110874 939 +layer 1 17 4.110874 4.110874 926 +segment 1 17 4.110874 4.110874 931 +bachelor 1 17 4.110874 4.110874 957 +upon 1 16 4.174387 4.174387 978 +portion 1 16 4.174387 4.174387 971 +stream 2 15 4.248495 8.496990 1015 +remot 1 15 4.248495 4.248495 1041 +charact 1 15 4.248495 4.248495 1028 +rate 1 15 4.248495 4.248495 1037 +audio 1 14 4.317488 4.317488 1094 +script 3 13 4.382027 13.146081 1171 +came 1 13 4.382027 4.382027 1197 +asynchron 1 12 4.465908 4.465908 1229 +gupta 1 12 4.465908 4.465908 1241 +optic 1 12 4.465908 4.465908 1221 +bill 1 11 4.553877 4.553877 1297 +player 1 11 4.553877 4.553877 1371 +fix 1 11 4.553877 4.553877 1327 +smart 1 11 4.553877 4.553877 1352 +mode 2 9 4.753590 9.507180 1492 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +leader 1 9 4.753590 4.753590 1576 +claim 2 8 4.875197 9.750394 1664 +character 1 8 4.875197 4.875197 1767 +vallei 1 7 5.010635 5.010635 1959 +signal 1 7 5.010635 5.010635 1910 +keshav 1 7 5.010635 5.010635 1852 +conferenc 1 7 5.010635 5.010635 1857 +nativ 2 6 5.164786 10.329572 2192 +classroom 1 6 5.164786 5.164786 2006 +televis 1 6 5.164786 5.164786 2118 +silicon 1 6 5.164786 5.164786 2076 +mix 1 6 5.164786 5.164786 2200 +multicast 3 5 5.347108 16.041324 2305 +shell 2 5 5.347108 10.694216 2353 +sigcomm 1 5 5.347108 5.347108 2329 +affin 1 5 5.347108 5.347108 2378 +hate 1 5 5.347108 5.347108 2529 +hole 1 5 5.347108 5.347108 2518 +stupid 1 5 5.347108 5.347108 2489 +sharma 8 4 5.568345 44.546760 2752 +anoop 1 4 5.568345 5.568345 2770 +height 1 4 5.568345 5.568345 2890 +deploi 1 3 5.857933 5.857933 3750 +greatli 1 3 5.857933 5.857933 3541 +deliveri 1 3 5.857933 5.857933 3278 +membership 1 3 5.857933 5.857933 3751 +predecessor 1 3 5.857933 5.857933 3585 +motif 1 3 5.857933 5.857933 3752 +fractal 1 3 5.857933 5.857933 3475 +hindi 1 3 5.857933 5.857933 3753 +nicknam 1 3 5.857933 5.857933 3716 +widget 3 2 6.263398 18.790194 5347 +hors 2 2 6.263398 12.526796 5348 +stumbl 1 2 6.263398 6.263398 5349 +leadto 1 2 6.263398 6.263398 5350 +navin 1 2 6.263398 6.263398 5351 +agarw 1 2 6.263398 6.263398 5352 +deer 1 2 6.263398 6.263398 4356 +width 1 2 6.263398 6.263398 5328 +yacc 1 2 6.263398 6.263398 4422 +coolest 1 2 6.263398 6.263398 5229 +fool 1 2 6.263398 6.263398 5353 +frozen 1 2 6.263398 6.263398 5078 +rosen 8 1 6.957497 55.659976 11632 +sharmila 4 1 6.957497 27.829988 11633 +vxtreme 2 1 6.957497 13.914994 11634 +imagefram 2 1 6.957497 13.914994 11635 +modifiedigmp 1 1 6.957497 6.957497 11636 +unicast 1 1 6.957497 6.957497 11637 +sitn 1 1 6.957497 6.957497 11638 +microwav 1 1 6.957497 6.957497 11639 +chaddha 1 1 6.957497 6.957497 11640 +avneesh 1 1 6.957497 6.957497 11641 +asilomar 1 1 6.957497 6.957497 11642 +igmp 1 1 6.957497 6.957497 11643 +internetdraft 1 1 6.957497 6.957497 11644 +fenner 1 1 6.957497 6.957497 11645 +niten 1 1 6.957497 6.957497 11646 +malhan 1 1 6.957497 6.957497 11647 +delhiunpublish 1 1 6.957497 6.957497 11648 +preform 1 1 6.957497 6.957497 11649 +blur 1 1 6.957497 6.957497 11650 +speckl 1 1 6.957497 6.957497 11651 +subband 1 1 6.957497 6.957497 11652 +estmat 1 1 6.957497 6.957497 11653 +writen 1 1 6.957497 6.957497 11654 +flavour 1 1 6.957497 6.957497 11655 +ifram 1 1 6.957497 6.957497 11656 +nodisplai 1 1 6.957497 6.957497 11657 +filenam 1 1 6.957497 6.957497 11658 +putimageincanva 1 1 6.957497 6.957497 11659 +dummi 1 1 6.957497 6.957497 11660 +snooper 1 1 6.957497 6.957497 11661 +doesnt 1 1 6.957497 6.957497 11662 +replai 1 1 6.957497 6.957497 11663 +kludg 1 1 6.957497 6.957497 11664 +dissalow 1 1 6.957497 6.957497 11665 +gaveth 1 1 6.957497 6.957497 11666 +tongu 1 1 6.957497 6.957497 11667 +sharm 1 1 6.957497 6.957497 11668 +shyness 1 1 6.957497 6.957497 11669 +actress 1 1 6.957497 6.957497 11670 +tagor 1 1 6.957497 6.957497 11671 +ealri 1 1 6.957497 6.957497 11672 +jewish 1 1 6.957497 6.957497 11673 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html new file mode 100644 index 00000000..faa68e41 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^shim^shim.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +univers 3 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +project 3 340 1.098612 3.295836 18 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +cornel 3 215 1.386294 4.158882 23 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +version 2 113 2.197225 4.394450 122 +final 1 116 2.197225 2.197225 108 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +graphic 1 90 2.397895 2.397895 147 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +view 2 70 2.708050 5.416100 254 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +friend 3 48 3.044522 9.133566 376 +still 1 50 3.044522 3.044522 362 +california 1 46 3.091042 3.091042 388 +favorit 1 44 3.135494 3.135494 410 +music 3 42 3.218876 9.656628 436 +futur 1 41 3.218876 3.218876 427 +movi 2 40 3.258097 6.516194 459 +soon 1 36 3.367296 3.367296 494 +john 1 33 3.433987 3.433987 532 +transform 1 32 3.465736 3.465736 542 +sciencecornel 1 22 3.850148 3.850148 768 +love 1 21 3.912023 3.912023 804 +watch 1 21 3.912023 3.912023 789 +eric 3 19 4.007333 12.021999 870 +citi 1 19 4.007333 4.007333 874 +miss 1 19 4.007333 4.007333 866 +listen 1 18 4.060443 4.060443 907 +young 1 16 4.174387 4.174387 991 +camera 1 14 4.317488 4.317488 1115 +hong 1 14 4.317488 4.317488 1105 +near 1 14 4.317488 4.317488 1091 +meng 2 12 4.465908 8.931816 1214 +went 1 12 4.465908 4.465908 1279 +pagewelcom 1 11 4.553877 4.553877 1344 +french 1 9 4.753590 4.753590 1511 +guitar 1 8 4.875197 4.875197 1758 +mile 1 8 4.875197 4.875197 1743 +instrument 1 7 5.010635 5.010635 1954 +davi 1 7 5.010635 5.010635 1888 +piano 1 6 5.164786 5.164786 2201 +antonio 1 6 5.164786 5.164786 2186 +jazz 2 5 5.347108 10.694216 2527 +carlo 1 5 5.347108 5.347108 2515 +middl 1 5 5.347108 5.347108 2372 +keyboard 1 4 5.568345 5.568345 2970 +korea 1 4 5.568345 5.568345 2971 +cyberspac 1 3 5.857933 5.857933 3719 +korean 3 2 6.263398 18.790194 5354 +acoust 2 2 6.263398 12.526796 5355 +kwan 2 2 6.263398 12.526796 4126 +sang 1 2 6.263398 6.263398 5356 +onthi 1 2 6.263398 6.263398 5357 +chopin 1 2 6.263398 6.263398 5358 +cinema 1 2 6.263398 6.263398 5244 +miser 1 2 6.263398 6.263398 5359 +melco 1 2 6.263398 6.263398 5200 +kang 1 2 6.263398 6.263398 5360 +shim 2 1 6.957497 13.914994 11674 +shimmast 1 1 6.957497 6.957497 11675 +dryden 1 1 6.957497 6.957497 11676 +irvinestudi 1 1 6.957497 6.957497 11677 +classi 1 1 6.957497 6.957497 11678 +stan 1 1 6.957497 6.957497 11679 +getz 1 1 6.957497 6.957497 11680 +jobim 1 1 6.957497 6.957497 11681 +coltran 1 1 6.957497 6.957497 11682 +earl 1 1 6.957497 6.957497 11683 +klugh 1 1 6.957497 6.957497 11684 +metheni 1 1 6.957497 6.957497 11685 +archemi 1 1 6.957497 6.957497 11686 +paradiso 1 1 6.957497 6.957497 11687 +kiss 1 1 6.957497 6.957497 11688 +saigon 1 1 6.957497 6.957497 11689 +newswant 1 1 6.957497 6.957497 11690 +anybodi 1 1 6.957497 6.957497 11691 +hana 1 1 6.957497 6.957497 11692 +jung 1 1 6.957497 6.957497 11693 +hwan 1 1 6.957497 6.957497 11694 +victor 1 1 6.957497 6.957497 11695 +jiyang 1 1 6.957497 6.957497 11696 +timessinc 1 1 6.957497 6.957497 11697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html new file mode 100644 index 00000000..f1c16c72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^singhal^singhal.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +inform 4 412 0.693147 2.772588 8 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 5 329 1.098612 5.493060 16 +student 3 343 1.098612 3.295836 19 +current 2 284 1.098612 2.197224 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +cornel 3 215 1.386294 4.158882 23 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +paper 5 205 1.609438 8.047190 38 +group 4 183 1.609438 6.437752 36 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +process 2 142 1.945910 3.891820 72 +area 1 144 1.945910 1.945910 80 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +document 4 121 2.079442 8.317768 89 +confer 3 126 2.079442 6.238326 100 +provid 2 121 2.079442 4.158884 94 +analysi 2 124 2.079442 4.158884 98 +postscript 1 131 2.079442 2.079442 90 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +structur 5 106 2.197225 10.986125 105 +text 16 98 2.302585 36.841360 133 +techniqu 2 99 2.302585 4.605170 138 +need 1 98 2.302585 2.302585 135 +select 2 91 2.397895 4.795790 154 +proceed 1 93 2.397895 2.397895 152 +associ 1 93 2.397895 2.397895 151 +sinc 1 90 2.397895 2.397895 159 +help 1 83 2.484907 2.484907 175 +appear 2 78 2.564949 5.129898 210 +resum 1 79 2.564949 2.564949 217 +come 1 78 2.564949 2.564949 202 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +involv 1 71 2.639057 2.639057 227 +david 1 71 2.639057 2.639057 232 +prof 2 64 2.772589 5.545178 273 +copi 1 63 2.772589 2.772589 284 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +automat 7 61 2.833213 19.832491 306 +best 1 59 2.833213 2.833213 299 +thesi 2 57 2.890372 5.780744 327 +variou 1 56 2.890372 2.890372 317 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +advisor 1 51 2.995732 2.995732 355 +approach 1 48 3.044522 3.044522 366 +visitor 1 49 3.044522 3.044522 371 +effect 1 46 3.091042 3.091042 385 +term 1 43 3.178054 3.178054 411 +show 1 43 3.178054 3.178054 417 +third 1 43 3.178054 3.178054 412 +late 1 40 3.258097 3.258097 439 +field 2 37 3.332205 6.664410 482 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +jame 3 35 3.401197 10.203591 507 +queri 1 33 3.433987 3.433987 524 +independ 1 32 3.465736 3.465736 548 +exist 1 30 3.555348 3.555348 569 +propos 2 28 3.610918 7.221836 602 +usual 1 28 3.610918 3.610918 608 +retriev 7 27 3.637586 25.463102 621 +relev 1 26 3.688879 3.688879 637 +other 1 24 3.761200 3.761200 697 +size 2 23 3.806662 7.613324 713 +brows 1 23 3.806662 3.806662 726 +thank 1 23 3.806662 3.806662 721 +hypertext 1 19 4.007333 4.007333 865 +segment 1 17 4.110874 4.110874 931 +normal 3 16 4.174387 12.523161 995 +eduphon 1 15 4.248495 4.248495 1060 +senior 1 14 4.317488 4.317488 1120 +weight 1 12 4.465908 4.465908 1204 +reader 1 12 4.465908 4.465908 1246 +chri 9 11 4.553877 40.984893 1311 +smart 5 11 4.553877 22.769385 1352 +arpa 1 11 4.553877 4.553877 1369 +summar 1 11 4.553877 4.553877 1295 +length 2 10 4.653960 9.307920 1400 +decomposit 2 10 4.653960 9.307920 1439 +theme 2 8 4.875197 9.750394 1707 +counter 1 8 4.875197 4.875197 1765 +vari 1 6 5.164786 5.164786 2001 +pivot 2 5 5.347108 10.694216 2426 +fairli 1 5 5.347108 5.347108 2322 +yield 1 5 5.347108 5.347108 2458 +testb 1 5 5.347108 5.347108 2456 +circumst 1 5 5.347108 5.347108 2283 +proceedingsof 1 5 5.347108 5.347108 2331 +dual 1 5 5.347108 5.347108 2522 +travers 1 5 5.347108 5.347108 2363 +amit 12 4 5.568345 66.820140 2972 +allan 3 4 5.568345 16.705035 2849 +nist 2 4 5.568345 11.136690 2973 +commonli 1 4 5.568345 5.568345 2877 +substanti 1 4 5.568345 5.568345 2921 +singhal 12 3 5.857933 70.295196 3098 +trec 5 3 5.857933 29.289665 3547 +supervisor 1 3 5.857933 5.857933 3754 +likelihood 1 3 5.857933 5.857933 3172 +expans 1 3 5.857933 5.857933 3755 +gerard 10 2 6.263398 62.633980 4876 +salton 10 2 6.263398 62.633980 4060 +bucklei 9 2 6.263398 56.370582 4874 +foremost 1 2 6.263398 6.263398 5361 +excerpt 1 2 6.263398 6.263398 4880 +degrad 1 2 6.263398 6.263398 5362 +amitsingh 1 2 6.263398 6.263398 4061 +slowli 1 2 6.263398 6.263398 5363 +mandar 3 1 6.957497 20.872491 11698 +gerardsalton 2 1 6.957497 13.914994 11699 +lengthnorm 2 1 6.957497 13.914994 11700 +mandarmitra 2 1 6.957497 13.914994 11701 +mitra 2 1 6.957497 13.914994 11702 +pageamit 1 1 6.957497 6.957497 11703 +singhaldepart 1 1 6.957497 6.957497 11704 +universitysingh 1 1 6.957497 6.957497 11705 +andtext 1 1 6.957497 6.957497 11706 +clairecardieher 1 1 6.957497 6.957497 11707 +beenon 1 1 6.957497 6.957497 11708 +informationretriev 1 1 6.957497 6.957497 11709 +thirti 1 1 6.957497 6.957497 11710 +thateffect 1 1 6.957497 6.957497 11711 +chancessimilar 1 1 6.957497 6.957497 11712 +normalizationfunct 1 1 6.957497 6.957497 11713 +retrievaleffect 1 1 6.957497 6.957497 11714 +normalizationtechniqu 1 1 6.957497 6.957497 11715 +trecparticipationtext 1 1 6.957497 6.957497 11716 +sponsoredeffort 1 1 6.957497 6.957497 11717 +retrievaltechniqu 1 1 6.957497 6.957497 11718 +hasconsist 1 1 6.957497 6.957497 11719 +somepap 1 1 6.957497 6.957497 11720 +summarizationnon 1 1 6.957497 6.957497 11721 +expositori 1 1 6.957497 6.957497 11722 +tocov 1 1 6.957497 6.957497 11723 +selectiveaccess 1 1 6.957497 6.957497 11724 +toanalyz 1 1 6.957497 6.957497 11725 +texttravers 1 1 6.957497 6.957497 11726 +papersnorm 1 1 6.957497 6.957497 11727 +documentlength 1 1 6.957497 6.957497 11728 +mitraand 1 1 6.957497 6.957497 11729 +usingsmart 1 1 6.957497 6.957497 11730 +textthem 1 1 6.957497 6.957497 11731 +andmanag 1 1 6.957497 6.957497 11732 +vectorspac 1 1 6.957497 6.957497 11733 +machineread 1 1 6.957497 6.957497 11734 +groupmemb 1 1 6.957497 6.957497 11735 +fluctuat 1 1 6.957497 6.957497 11736 +iinstal 1 1 6.957497 6.957497 11737 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ new file mode 100644 index 00000000..ecb1bcce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skeshav^ @@ -0,0 +1,154 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 3 380 0.693147 2.079441 9 +depart 2 457 0.693147 1.386294 12 +last 2 314 1.098612 2.197224 14 +current 1 284 1.098612 1.098612 21 +cornel 4 215 1.386294 5.545176 23 +also 2 259 1.386294 2.772588 28 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +paper 2 205 1.609438 3.218876 38 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +network 2 168 1.791759 3.583518 61 +base 2 165 1.791759 3.583518 50 +avail 2 169 1.791759 3.583518 48 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +postscript 1 131 2.079442 2.079442 90 +version 4 113 2.197225 8.788900 122 +site 2 106 2.197225 4.394450 119 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +peopl 2 96 2.302585 4.605170 132 +part 1 98 2.302585 2.302585 129 +sinc 2 90 2.397895 4.795790 159 +real 2 93 2.397895 4.795790 144 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +build 3 85 2.484907 7.454721 184 +wide 1 84 2.484907 2.484907 185 +internet 1 83 2.484907 2.484907 186 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +name 7 72 2.639057 18.473399 220 +upson 1 71 2.639057 2.639057 218 +write 1 72 2.639057 2.639057 222 +simul 2 66 2.708050 5.416100 255 +ithaca 2 65 2.772589 5.545178 294 +virtual 1 62 2.772589 2.772589 285 +reason 1 57 2.890372 2.890372 318 +talk 1 53 2.944439 2.944439 336 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +still 1 50 3.044522 3.044522 362 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +littl 1 39 3.258097 3.258097 454 +everi 1 34 3.401197 3.401197 519 +least 1 35 3.401197 3.401197 516 +word 1 34 3.401197 3.401197 508 +independ 1 32 3.465736 3.465736 548 +idea 1 32 3.465736 3.465736 545 +india 1 32 3.465736 3.465736 550 +anim 1 31 3.496508 3.496508 557 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +quot 1 29 3.583519 3.583519 582 +actual 1 28 3.610918 3.610918 604 +releas 1 28 3.610918 3.610918 616 +sometim 1 24 3.761200 3.761200 696 +head 1 23 3.806662 3.806662 732 +instal 1 22 3.850148 3.850148 754 +latest 1 21 3.912023 3.912023 785 +thu 1 21 3.912023 3.912023 773 +driven 1 15 4.248495 4.248495 1048 +remot 1 15 4.248495 4.248495 1041 +goe 1 15 4.248495 4.248495 1044 +squar 1 14 4.317488 4.317488 1082 +edui 1 13 4.382027 4.382027 1193 +directli 1 13 4.382027 4.382027 1141 +circuit 1 13 4.382027 4.382027 1131 +unfortun 1 13 4.382027 4.382027 1170 +speak 1 12 4.465908 4.465908 1283 +song 1 11 4.553877 4.553877 1380 +equip 1 10 4.653960 4.653960 1459 +stack 1 10 4.653960 4.653960 1389 +packet 1 10 4.653960 4.653960 1415 +custom 1 10 4.653960 4.653960 1414 +mode 2 9 4.753590 9.507180 1492 +lane 1 8 4.875197 4.875197 1720 +router 1 8 4.875197 4.875197 1772 +switch 1 8 4.875197 4.875197 1718 +span 1 8 4.875197 4.875197 1751 +round 1 8 4.875197 4.875197 1769 +keshav 2 7 5.010635 10.021270 1852 +signal 1 7 5.010635 5.010635 1910 +canb 1 7 5.010635 5.010635 1846 +hear 1 7 5.010635 5.010635 1940 +nativ 2 6 5.164786 10.329572 2192 +south 1 6 5.164786 5.164786 2167 +srinivasan 1 6 5.164786 5.164786 2175 +christoph 1 5 5.347108 5.347108 2512 +delhi 1 5 5.347108 5.347108 2530 +facial 1 5 5.347108 5.347108 2438 +hole 1 5 5.347108 5.347108 2518 +poem 1 5 5.347108 5.347108 2483 +systemsand 1 4 5.568345 5.568345 2804 +scratch 1 3 5.857933 5.857933 3140 +compliant 1 3 5.857933 5.857933 3245 +district 1 3 5.857933 5.857933 3756 +father 1 3 5.857933 5.857933 3757 +johann 1 3 5.857933 5.857933 3758 +goof 1 2 6.263398 6.263398 4985 +snoop 1 2 6.263398 6.263398 5364 +mbone 1 2 6.263398 6.263398 4361 +pagemi 1 2 6.263398 6.263398 5230 +villag 1 2 6.263398 6.263398 5215 +ought 1 2 6.263398 6.263398 5365 +goeth 1 2 6.263398 6.263398 5366 +skeshav 2 1 6.957497 13.914994 11738 +idlinet 2 1 6.957497 13.914994 11739 +keshavemail 1 1 6.957497 6.957497 11740 +spentfiv 1 1 6.957497 6.957497 11741 +xunet 1 1 6.957497 6.957497 11742 +incollabor 1 1 6.957497 6.957497 11743 +fore 1 1 6.957497 6.957497 11744 +zeitnet 1 1 6.957497 6.957497 11745 +idlinetsourc 1 1 6.957497 6.957497 11746 +applicationget 1 1 6.957497 6.957497 11747 +linkspapersher 1 1 6.957497 6.957497 11748 +linkto 1 1 6.957497 6.957497 11749 +reali 1 1 6.957497 6.957497 11750 +beout 1 1 6.957497 6.957497 11751 +native_mod 1 1 6.957497 6.957497 11752 +namein 1 1 6.957497 6.957497 11753 +thanjavur 1 1 6.957497 6.957497 11754 +beprecis 1 1 6.957497 6.957497 11755 +prefix 1 1 6.957497 6.957497 11756 +sonli 1 1 6.957497 6.957497 11757 +surnam 1 1 6.957497 6.957497 11758 +myfath 1 1 6.957497 6.957497 11759 +intoth 1 1 6.957497 6.957497 11760 +beconfus 1 1 6.957497 6.957497 11761 +quotabl 1 1 6.957497 6.957497 11762 +wolfgang 1 1 6.957497 6.957497 11763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html new file mode 100644 index 00000000..f535862e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^skl^skl.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +cornel 4 215 1.386294 5.545176 23 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +construct 1 139 1.945910 1.945910 82 +place 1 106 2.197225 2.197225 124 +sinc 1 90 2.397895 2.397895 159 +prof 1 64 2.772589 2.772589 273 +electr 1 38 3.295837 3.295837 461 +alwai 1 24 3.761200 3.761200 691 +came 1 13 4.382027 4.382027 1197 +kenneth 2 12 4.465908 8.931816 1265 +meng 1 12 4.465908 4.465908 1214 +road 1 11 4.553877 4.553877 1374 +success 1 10 4.653960 4.653960 1390 +zabih 1 6 5.164786 5.164786 2138 +sunlab 1 2 6.263398 6.263398 5222 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html new file mode 100644 index 00000000..e23f5421 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^snowman^index.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 4 380 0.693147 2.772588 9 +us 4 329 1.098612 4.394448 16 +last 3 314 1.098612 3.295836 14 +time 2 293 1.098612 2.197224 17 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +also 4 259 1.386294 5.545176 28 +link 2 247 1.386294 2.772588 24 +cornel 2 215 1.386294 2.772588 23 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +read 2 154 1.791759 3.583518 47 +recent 1 167 1.791759 1.791759 58 +like 4 132 1.945910 7.783640 81 +year 2 148 1.945910 3.891820 84 +object 1 138 1.945910 1.945910 79 +high 1 130 2.079442 2.079442 101 +well 3 109 2.197225 6.591675 121 +look 3 107 2.197225 6.591675 115 +place 2 106 2.197225 4.394450 124 +topic 2 114 2.197225 4.394450 110 +world 2 115 2.197225 4.394450 126 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +teach 1 108 2.197225 2.197225 112 +book 2 99 2.302585 4.605170 131 +peopl 2 96 2.302585 4.605170 132 +pictur 4 89 2.397895 9.591580 160 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +commun 1 95 2.397895 2.397895 157 +start 5 83 2.484907 12.424535 173 +school 3 84 2.484907 7.454721 188 +internet 2 83 2.484907 4.969814 186 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +thing 1 84 2.484907 2.484907 189 +novemb 1 81 2.484907 2.484907 179 +come 2 78 2.564949 5.129898 202 +want 1 79 2.564949 2.564949 199 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +june 1 79 2.564949 2.564949 214 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +visit 3 63 2.772589 8.317767 288 +plan 1 65 2.772589 2.772589 272 +written 1 63 2.772589 2.772589 278 +dept 1 64 2.772589 2.772589 291 +best 1 59 2.833213 2.833213 299 +summer 2 56 2.890372 5.780744 311 +direct 1 57 2.890372 2.890372 316 +point 1 58 2.890372 2.890372 319 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +talk 3 53 2.944439 8.833317 336 +much 5 52 2.995732 14.978660 349 +finger 2 52 2.995732 5.991464 354 +week 1 52 2.995732 2.995732 343 +friend 3 48 3.044522 9.133566 376 +right 1 48 3.044522 3.044522 363 +still 1 50 3.044522 3.044522 362 +could 1 46 3.091042 3.091042 383 +featur 1 46 3.091042 3.091042 386 +better 2 45 3.135494 6.270988 401 +even 1 45 3.135494 3.135494 393 +around 3 43 3.178054 9.534162 415 +music 1 42 3.218876 3.218876 436 +small 2 39 3.258097 6.516194 447 +probabl 2 40 3.258097 6.516194 455 +must 1 40 3.258097 3.258097 442 +late 1 40 3.258097 3.258097 439 +theoret 1 39 3.258097 3.258097 446 +live 1 40 3.258097 3.258097 451 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +tree 2 36 3.367296 6.734592 492 +especi 2 36 3.367296 6.734592 496 +winter 1 36 3.367296 3.367296 500 +next 2 34 3.401197 6.802394 517 +random 1 34 3.401197 3.401197 511 +either 1 35 3.401197 3.401197 506 +go 1 33 3.433987 3.433987 529 +kind 1 32 3.465736 3.465736 541 +power 1 30 3.555348 3.555348 573 +actual 1 28 3.610918 3.610918 604 +quit 4 27 3.637586 14.550344 633 +great 1 27 3.637586 3.637586 626 +rather 1 26 3.688879 3.688879 642 +never 1 25 3.737670 3.737670 671 +spent 1 25 3.737670 3.737670 676 +magazin 1 24 3.761200 3.761200 704 +alwai 1 24 3.761200 3.761200 691 +pattern 1 24 3.761200 3.761200 689 +sometim 1 24 3.761200 3.761200 696 +togeth 1 23 3.806662 3.806662 714 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +inth 1 22 3.850148 3.850148 741 +instead 1 22 3.850148 3.850148 756 +leav 2 21 3.912023 7.824046 772 +half 2 21 3.912023 7.824046 776 +busi 2 21 3.912023 7.824046 784 +path 1 21 3.912023 3.912023 778 +hous 1 21 3.912023 3.912023 801 +tell 1 21 3.912023 3.912023 777 +theorem 1 21 3.912023 3.912023 786 +nice 1 20 3.951244 3.951244 809 +left 2 19 4.007333 8.014666 851 +mostli 1 19 4.007333 4.007333 869 +seem 7 18 4.060443 28.423101 899 +figur 2 18 4.060443 8.120886 903 +listen 1 18 4.060443 4.060443 907 +coupl 1 17 4.110874 4.110874 939 +stop 1 17 4.110874 4.110874 942 +sign 1 16 4.174387 4.174387 970 +modern 1 16 4.174387 4.174387 966 +brown 1 16 4.174387 4.174387 977 +side 1 15 4.248495 4.248495 1022 +score 1 15 4.248495 4.248495 1017 +english 1 15 4.248495 4.248495 1033 +indic 1 15 4.248495 4.248495 1013 +near 1 14 4.317488 4.317488 1091 +warn 1 14 4.317488 4.317488 1068 +stori 1 14 4.317488 4.317488 1087 +sai 2 13 4.382027 8.764054 1175 +someon 2 13 4.382027 8.764054 1128 +step 1 13 4.382027 4.382027 1138 +front 1 13 4.382027 4.382027 1154 +everyth 1 13 4.382027 4.382027 1169 +nick 1 13 4.382027 4.382027 1180 +insid 2 12 4.465908 8.931816 1262 +walk 1 12 4.465908 4.465908 1281 +tune 1 12 4.465908 4.465908 1227 +went 1 12 4.465908 4.465908 1279 +daniel 1 12 4.465908 4.465908 1233 +neat 1 12 4.465908 4.465908 1263 +america 1 11 4.553877 4.553877 1370 +label 2 10 4.653960 9.307920 1423 +mountain 1 10 4.653960 4.653960 1456 +traffic 1 10 4.653960 4.653960 1421 +hello 1 10 4.653960 4.653960 1407 +paragraph 1 10 4.653960 4.653960 1449 +awai 1 10 4.653960 4.653960 1447 +clear 5 9 4.753590 23.767950 1488 +sound 5 9 4.753590 23.767950 1605 +distanc 1 9 4.753590 4.753590 1500 +ring 3 8 4.875197 14.625591 1684 +realiz 1 8 4.875197 4.875197 1739 +perhap 1 8 4.875197 4.875197 1693 +contrast 1 8 4.875197 4.875197 1637 +gold 1 8 4.875197 4.875197 1745 +switch 1 8 4.875197 4.875197 1718 +manufactur 1 8 4.875197 4.875197 1634 +japan 1 8 4.875197 4.875197 1762 +ideal 1 8 4.875197 4.875197 1630 +hear 1 7 5.010635 5.010635 1940 +throughout 1 7 5.010635 5.010635 1871 +corner 1 7 5.010635 5.010635 1909 +smile 1 7 5.010635 5.010635 1807 +golden 1 7 5.010635 5.010635 1962 +centuri 1 7 5.010635 5.010635 1935 +intellectu 1 7 5.010635 5.010635 1847 +occasion 1 7 5.010635 5.010635 1905 +pronounc 1 7 5.010635 5.010635 1918 +largest 1 7 5.010635 5.010635 1858 +happen 1 7 5.010635 5.010635 1790 +snow 2 6 5.164786 10.329572 2031 +rain 1 6 5.164786 5.164786 2137 +strang 1 6 5.164786 5.164786 2064 +mother 1 6 5.164786 5.164786 2083 +seen 1 6 5.164786 5.164786 2202 +ifyou 1 6 5.164786 5.164786 1992 +polit 1 6 5.164786 5.164786 2115 +hair 2 5 5.347108 10.694216 2446 +fairli 1 5 5.347108 5.347108 2322 +respond 1 5 5.347108 5.347108 2354 +door 1 5 5.347108 5.347108 2291 +own 1 5 5.347108 5.347108 2531 +isth 1 5 5.347108 5.347108 2532 +sing 1 5 5.347108 5.347108 2499 +wear 3 4 5.568345 16.705035 2785 +somewhat 2 4 5.568345 11.136690 2659 +somehow 1 4 5.568345 5.568345 2974 +hell 1 4 5.568345 5.568345 2885 +glanc 1 4 5.568345 5.568345 2652 +tick 1 4 5.568345 5.568345 2975 +heard 1 4 5.568345 5.568345 2895 +paus 1 4 5.568345 5.568345 2965 +dark 1 4 5.568345 5.568345 2910 +usedto 1 4 5.568345 5.568345 2643 +gotten 1 4 5.568345 5.568345 2628 +hire 1 4 5.568345 5.568345 2976 +couldn 1 4 5.568345 5.568345 2977 +glass 2 3 5.857933 11.715866 3759 +vagu 2 3 5.857933 11.715866 3393 +surround 1 3 5.857933 5.857933 3492 +stone 1 3 5.857933 5.857933 3674 +scatter 1 3 5.857933 5.857933 3351 +worri 1 3 5.857933 5.857933 3130 +thin 1 3 5.857933 5.857933 3488 +bright 1 3 5.857933 5.857933 3596 +twentieth 1 3 5.857933 5.857933 3760 +wave 1 3 5.857933 5.857933 3518 +europ 1 3 5.857933 5.857933 3761 +child 1 3 5.857933 5.857933 3542 +blame 1 3 5.857933 5.857933 3636 +dread 1 3 5.857933 5.857933 3630 +wise 1 3 5.857933 5.857933 3631 +pyramid 1 3 5.857933 5.857933 3358 +maker 1 3 5.857933 5.857933 3164 +obviou 2 2 6.263398 12.526796 5367 +forest 1 2 6.263398 6.263398 5368 +shack 1 2 6.263398 6.263398 5369 +withno 1 2 6.263398 6.263398 5370 +pile 1 2 6.263398 6.263398 5371 +hum 1 2 6.263398 6.263398 4935 +purpl 1 2 6.263398 6.263398 5372 +silk 1 2 6.263398 6.263398 5373 +silver 1 2 6.263398 6.263398 5374 +ocean 1 2 6.263398 6.263398 5375 +andlook 1 2 6.263398 6.263398 4561 +altogeth 1 2 6.263398 6.263398 4751 +nowadai 1 2 6.263398 6.263398 5376 +pointcast 1 2 6.263398 6.263398 5377 +inner 1 2 6.263398 6.263398 4551 +furnitur 1 2 6.263398 6.263398 5016 +haveth 1 2 6.263398 6.263398 5378 +hani 1 2 6.263398 6.263398 5140 +neededto 1 2 6.263398 6.263398 5379 +agood 1 2 6.263398 6.263398 5380 +presum 2 1 6.957497 13.914994 11764 +heha 2 1 6.957497 13.914994 11765 +beaver 2 1 6.957497 13.914994 11766 +perri 2 1 6.957497 13.914994 11767 +clearinglook 1 1 6.957497 6.957497 11768 +thanyou 1 1 6.957497 6.957497 11769 +onal 1 1 6.957497 6.957497 11770 +theweath 1 1 6.957497 6.957497 11771 +overcast 1 1 6.957497 6.957497 11772 +ifit 1 1 6.957497 6.957497 11773 +quiteclear 1 1 6.957497 6.957497 11774 +bird 1 1 6.957497 6.957497 11775 +chirp 1 1 6.957497 6.957497 11776 +theymai 1 1 6.957497 6.957497 11777 +louder 1 1 6.957497 6.957497 11778 +nearbywaterfal 1 1 6.957497 6.957497 11779 +gotta 1 1 6.957497 6.957497 11780 +apath 1 1 6.957497 6.957497 11781 +asign 1 1 6.957497 6.957497 11782 +hillschool 1 1 6.957497 6.957497 11783 +wormhol 1 1 6.957497 6.957497 11784 +nearbyhous 1 1 6.957497 6.957497 11785 +clearinginsid 1 1 6.957497 6.957497 11786 +offand 1 1 6.957497 6.957497 11787 +theclear 1 1 6.957497 6.957497 11788 +rhyme 1 1 6.957497 6.957497 11789 +reasonto 1 1 6.957497 6.957497 11790 +sortsof 1 1 6.957497 6.957497 11791 +betteridea 1 1 6.957497 6.957497 11792 +itseem 1 1 6.957497 6.957497 11793 +importantth 1 1 6.957497 6.957497 11794 +clearingh 1 1 6.957497 6.957497 11795 +oftendescrib 1 1 6.957497 6.957497 11796 +mostdistinct 1 1 6.957497 6.957497 11797 +quitelong 1 1 6.957497 6.957497 11798 +elfin 1 1 6.957497 6.957497 11799 +peoplebefor 1 1 6.957497 6.957497 11800 +theresoon 1 1 6.957497 6.957497 11801 +startstel 1 1 6.957497 6.957497 11802 +whynichola 1 1 6.957497 6.957497 11803 +negropont 1 1 6.957497 6.957497 11804 +moron 1 1 6.957497 6.957497 11805 +thenh 1 1 6.957497 6.957497 11806 +obscur 1 1 6.957497 6.957497 11807 +hetend 1 1 6.957497 6.957497 11808 +appearanceinstead 1 1 6.957497 6.957497 11809 +turquois 1 1 6.957497 6.957497 11810 +linen 1 1 6.957497 6.957497 11811 +imageof 1 1 6.957497 6.957497 11812 +fromhim 1 1 6.957497 6.957497 11813 +pewter 1 1 6.957497 6.957497 11814 +pentacl 1 1 6.957497 6.957497 11815 +neck 1 1 6.957497 6.957497 11816 +hippi 1 1 6.957497 6.957497 11817 +asclass 1 1 6.957497 6.957497 11818 +clearingdan 1 1 6.957497 6.957497 11819 +briani 1 1 6.957497 6.957497 11820 +anundergrad 1 1 6.957497 6.957497 11821 +newsprovid 1 1 6.957497 6.957497 11822 +ancamosoiu 1 1 6.957497 6.957497 11823 +schwa 1 1 6.957497 6.957497 11824 +backwhen 1 1 6.957497 6.957497 11825 +twoand 1 1 6.957497 6.957497 11826 +wegradu 1 1 6.957497 6.957497 11827 +shejust 1 1 6.957497 6.957497 11828 +onewav 1 1 6.957497 6.957497 11829 +issomeon 1 1 6.957497 6.957497 11830 +severalmonth 1 1 6.957497 6.957497 11831 +becamemuch 1 1 6.957497 6.957497 11832 +eedepart 1 1 6.957497 6.957497 11833 +multimediastud 1 1 6.957497 6.957497 11834 +friendof 1 1 6.957497 6.957497 11835 +finlei 1 1 6.957497 6.957497 11836 +notanymor 1 1 6.957497 6.957497 11837 +steelcas 1 1 6.957497 6.957497 11838 +dserver 1 1 6.957497 6.957497 11839 +kinda 1 1 6.957497 6.957497 11840 +cheesi 1 1 6.957497 6.957497 11841 +thebuild 1 1 6.957497 6.957497 11842 +ius 1 1 6.957497 6.957497 11843 +programcal 1 1 6.957497 6.957497 11844 +graduatedfrom 1 1 6.957497 6.957497 11845 +cuter 1 1 6.957497 6.957497 11846 +thanth 1 1 6.957497 6.957497 11847 +blurri 1 1 6.957497 6.957497 11848 +dreamer 1 1 6.957497 6.957497 11849 +ofdream 1 1 6.957497 6.957497 11850 +aphex 1 1 6.957497 6.957497 11851 +twindan 1 1 6.957497 6.957497 11852 +snowman 1 1 6.957497 6.957497 11853 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html new file mode 100644 index 00000000..a6058c6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sonia^My.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +inform 1 412 0.693147 0.693147 8 +cornel 1 215 1.386294 1.386294 23 +take 1 97 2.302585 2.302585 134 +cool 1 49 3.044522 3.044522 374 +keep 1 44 3.135494 3.135494 409 +long 1 43 3.178054 3.178054 413 +soon 1 36 3.367296 3.367296 494 +lot 1 18 4.060443 4.060443 889 +rate 1 15 4.248495 4.248495 1037 +ashish 1 5 5.347108 5.347108 2473 +ultra 1 4 5.568345 5.568345 2889 +autobiographi 1 2 6.263398 6.263398 5070 +lookin 2 1 6.957497 13.914994 11854 +upkeep 1 1 6.957497 6.957497 11855 +setuup 1 1 6.957497 6.957497 11856 +doingajaymanishanujmom 1 1 6.957497 6.957497 11857 +daddepart 1 1 6.957497 6.957497 11858 +sciencesearch 1 1 6.957497 6.957497 11859 +netentertain 1 1 6.957497 6.957497 11860 +weeklycricket 1 1 6.957497 6.957497 11861 +soni 1 1 6.957497 6.957497 11862 +sonia 1 1 6.957497 6.957497 11863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html new file mode 100644 index 00000000..ea8c79c1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^spdawson^alpha.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +would 1 67 2.708050 2.708050 251 +browser 1 56 2.890372 2.890372 313 +better 1 45 3.135494 3.135494 401 +form 1 39 3.258097 3.258097 443 +scott 1 18 4.060443 4.060443 884 +capabl 1 15 4.248495 4.248495 1016 +dawson 1 2 6.263398 6.263398 4886 +padif 1 1 6.957497 6.957497 11864 +scottdawson 1 1 6.957497 6.957497 11865 +shomebas 1 1 6.957497 6.957497 11866 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html new file mode 100644 index 00000000..b0daedf9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stodghil^home.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +cornel 2 215 1.386294 2.772588 23 +hall 1 146 1.945910 1.945910 65 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +center 1 88 2.397895 2.397895 158 +institut 1 84 2.484907 2.484907 187 +paul 1 38 3.295837 3.295837 471 +scheme 1 20 3.951244 3.951244 818 +ultim 1 17 4.110874 4.110874 943 +affili 1 13 4.382027 4.382027 1194 +rhode 1 9 4.753590 4.753590 1579 +hockei 1 8 4.875197 4.875197 1760 +atcornel 1 6 5.164786 5.164786 2131 +stodghil 1 4 5.568345 5.568345 2864 +bernoulli 1 4 5.568345 5.568345 2955 +pagepaul 1 3 5.857933 5.857933 3669 +stodghillstodghil 1 1 6.957497 6.957497 11867 +acri 1 1 6.957497 6.957497 11868 +projectinterest 1 1 6.957497 6.957497 11869 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html new file mode 100644 index 00000000..d2f80534 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^stoller^stoller.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +updat 1 191 1.609438 1.609438 41 +august 1 66 2.708050 2.708050 257 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +scott 1 18 4.060443 4.060443 884 +former 2 17 4.110874 8.221748 956 +indiana 1 6 5.164786 5.164786 2057 +stoller 3 4 5.568345 16.705035 2866 +pagescott 1 4 5.568345 5.568345 2978 +hyplan 1 1 6.957497 6.957497 11870 +htmllast 1 1 6.957497 6.957497 11871 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html new file mode 100644 index 00000000..9145891f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sugata^home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 3 380 0.693147 2.079441 9 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +cornel 3 215 1.386294 4.158882 23 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +perform 2 143 1.945910 3.891820 74 +architectur 1 139 1.945910 1.945910 77 +high 2 130 2.079442 4.158884 101 +welcom 1 122 2.079442 2.079442 99 +spring 1 131 2.079442 2.079442 88 +compil 1 122 2.079442 2.079442 96 +person 1 111 2.197225 2.197225 117 +theori 1 111 2.197225 2.197225 127 +take 2 97 2.302585 4.605170 134 +advanc 1 99 2.302585 2.302585 130 +multimedia 1 68 2.708050 2.708050 258 +prof 1 64 2.772589 2.772589 273 +previou 1 62 2.772589 2.772589 290 +semest 1 58 2.890372 2.890372 312 +brian 1 38 3.295837 3.295837 466 +seminar 1 38 3.295837 3.295837 470 +option 1 30 3.555348 3.555348 575 +progress 1 28 3.610918 3.610918 598 +smith 1 20 3.951244 3.951244 820 +wonder 1 20 3.951244 3.951244 815 +ofcomput 1 10 4.653960 4.653960 1442 +earth 1 10 4.653960 4.653960 1463 +marri 1 7 5.010635 5.010635 1946 +price 1 6 5.164786 5.164786 1999 +czar 1 5 5.347108 5.347108 2503 +mehom 1 4 5.568345 5.568345 2979 +eduaddress 1 3 5.857933 5.857933 3762 +sugata 3 2 6.263398 18.790194 4976 +mukhopadhyai 2 2 6.263398 12.526796 4981 +ritu 1 1 6.957497 6.957497 11872 +mailsugata 1 1 6.957497 6.957497 11873 +hichori 1 1 6.957497 6.957497 11874 +estat 1 1 6.957497 6.957497 11875 +owego 1 1 6.957497 6.957497 11876 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html new file mode 100644 index 00000000..06671fcb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sukhpal^sukhpal.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 2 215 1.386294 2.772588 23 +need 1 98 2.302585 2.302585 135 +present 1 91 2.397895 2.397895 145 +resum 1 79 2.564949 2.564949 217 +java 1 70 2.708050 2.708050 248 +view 1 70 2.708050 2.708050 254 +ithaca 1 65 2.772589 2.772589 294 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +physic 1 47 3.091042 3.091042 377 +paul 2 38 3.295837 6.591674 471 +anim 1 31 3.496508 3.496508 557 +background 1 25 3.737670 3.737670 664 +rout 1 21 3.912023 3.912023 793 +capabl 1 15 4.248495 4.248495 1016 +philosophi 1 13 4.382027 4.382027 1167 +clock 1 11 4.553877 4.553877 1320 +tick 1 4 5.568345 5.568345 2975 +carleton 1 2 6.263398 6.263398 5381 +sukhpal 3 1 6.957497 20.872491 11877 +sanghera 2 1 6.957497 13.914994 11878 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html new file mode 100644 index 00000000..6b8e76bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sumedh^home.html @@ -0,0 +1,133 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +work 2 380 0.693147 1.386294 9 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +read 2 154 1.791759 3.583518 47 +first 3 140 1.945910 5.837730 71 +high 1 130 2.079442 2.079442 101 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +part 3 98 2.302585 6.907755 129 +peopl 1 96 2.302585 2.302585 132 +mani 1 92 2.397895 2.397895 150 +want 1 79 2.564949 2.564949 199 +orient 1 80 2.564949 2.564949 205 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +summer 1 56 2.890372 2.890372 311 +space 1 57 2.890372 2.890372 310 +found 1 53 2.944439 2.944439 337 +week 1 52 2.995732 2.995732 343 +run 1 51 2.995732 2.995732 347 +understand 1 47 3.091042 3.091042 384 +video 1 44 3.135494 3.135494 405 +show 1 43 3.178054 3.178054 417 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +realli 1 40 3.258097 3.258097 444 +microsoft 3 38 3.295837 9.887511 468 +industri 1 38 3.295837 3.295837 464 +feel 1 37 3.332205 3.332205 483 +hand 1 37 3.332205 3.332205 475 +post 1 35 3.401197 3.401197 505 +print 1 34 3.401197 3.401197 503 +everi 1 34 3.401197 3.401197 519 +within 1 33 3.433987 3.433987 525 +kind 1 32 3.465736 3.465736 541 +progress 1 28 3.610918 3.610918 598 +enjoi 1 26 3.688879 3.688879 660 +session 1 26 3.688879 3.688879 643 +notic 1 25 3.737670 3.737670 675 +never 1 25 3.737670 3.737670 671 +toward 1 25 3.737670 3.737670 668 +seri 1 24 3.761200 3.761200 708 +recommend 1 22 3.850148 3.850148 737 +corpor 1 21 3.912023 3.912023 802 +stand 1 18 4.060443 4.060443 891 +weekli 2 17 4.110874 8.221748 919 +whether 1 17 4.110874 4.110874 918 +attempt 1 17 4.110874 4.110874 917 +sign 1 16 4.174387 4.174387 970 +anywai 1 15 4.248495 4.248495 1047 +comic 1 14 4.317488 4.317488 1103 +social 1 13 4.382027 4.382027 1123 +everyon 1 13 4.382027 4.382027 1148 +regularli 1 11 4.553877 4.553877 1338 +fill 1 11 4.553877 4.553877 1349 +strongli 1 10 4.653960 4.653960 1406 +didn 1 9 4.753590 4.753590 1563 +parti 3 8 4.875197 14.625591 1676 +empir 1 8 4.875197 4.875197 1722 +illustr 1 8 4.875197 4.875197 1679 +told 1 8 4.875197 4.875197 1658 +strip 3 6 5.164786 15.494358 2203 +tri 1 6 5.164786 5.164786 2166 +viewpoint 1 6 5.164786 5.164786 2116 +gate 1 6 5.164786 5.164786 2182 +begun 1 5 5.347108 5.347108 2386 +suffer 1 5 5.347108 5.347108 2268 +fit 1 5 5.347108 5.347108 2285 +episod 5 4 5.568345 27.841725 2747 +lord 4 4 5.568345 22.273380 2906 +employe 3 4 5.568345 16.705035 2717 +drew 2 4 5.568345 11.136690 2980 +theintern 1 4 5.568345 5.568345 2981 +newslett 1 4 5.568345 5.568345 2873 +sumedh 1 3 5.857933 5.857933 3101 +thesumm 1 3 5.857933 5.857933 3763 +redmond 1 3 5.857933 5.857933 3568 +internship 1 3 5.857933 5.857933 3764 +flame 1 3 5.857933 5.857933 3696 +galact 2 2 6.263398 12.526796 5219 +eduthi 1 2 6.263398 6.263398 5382 +alia 1 2 6.263398 6.263398 5383 +arriv 1 2 6.263398 6.263398 4132 +persuad 1 2 6.263398 6.263398 5384 +declin 1 2 6.263398 6.263398 5385 +portrai 1 2 6.263398 6.263398 5386 +bitter 1 2 6.263398 6.263398 5387 +rebel 1 2 6.263398 6.263398 5388 +imperi 1 2 6.263398 6.263398 5389 +gater 6 1 6.957497 41.744982 11879 +bilth 2 1 6.957497 13.914994 11880 +empirewritten 1 1 6.957497 6.957497 11881 +kanetkaremail 1 1 6.957497 6.957497 11882 +kanetkar 1 1 6.957497 6.957497 11883 +atmicrosoft 1 1 6.957497 6.957497 11884 +artwork 1 1 6.957497 6.957497 11885 +leak 1 1 6.957497 6.957497 11886 +theful 1 1 6.957497 6.957497 11887 +stripi 1 1 6.957497 6.957497 11888 +themicrosoft 1 1 6.957497 6.957497 11889 +perceiv 1 1 6.957497 6.957497 11890 +problemand 1 1 6.957497 6.957497 11891 +evilempir 1 1 6.957497 6.957497 11892 +comicstrip 1 1 6.957497 6.957497 11893 +theoutsid 1 1 6.957497 6.957497 11894 +eitherbil 1 1 6.957497 6.957497 11895 +heck 1 1 6.957497 6.957497 11896 +summersof 1 1 6.957497 6.957497 11897 +anyoneinterest 1 1 6.957497 6.957497 11898 +thateveri 1 1 6.957497 6.957497 11899 +theyshow 1 1 6.957497 6.957497 11900 +trivia 1 1 6.957497 6.957497 11901 +makey 1 1 6.957497 6.957497 11902 +disclosur 1 1 6.957497 6.957497 11903 +agreeement 1 1 6.957497 6.957497 11904 +theymad 1 1 6.957497 6.957497 11905 +theirheart 1 1 6.957497 6.957497 11906 +pledg 1 1 6.957497 6.957497 11907 +alleig 1 1 6.957497 6.957497 11908 +everydesk 1 1 6.957497 6.957497 11909 +roosterepisod 1 1 6.957497 6.957497 11910 +threatepisod 1 1 6.957497 6.957497 11911 +insigniaepisod 1 1 6.957497 6.957497 11912 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html new file mode 100644 index 00000000..c6246b14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^summers^summers.html @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +system 1 443 0.693147 0.693147 6 +student 2 343 1.098612 2.197224 19 +us 2 329 1.098612 2.197224 16 +current 1 284 1.098612 1.098612 21 +cornel 3 215 1.386294 4.158882 23 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +paper 2 205 1.609438 3.218876 38 +group 1 183 1.609438 1.609438 36 +address 1 170 1.791759 1.791759 62 +algorithm 1 162 1.791759 1.791759 57 +support 2 132 1.945910 3.891820 83 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +problem 1 147 1.945910 1.945910 75 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +document 13 121 2.079442 27.032746 89 +report 2 131 2.079442 4.158884 92 +number 2 130 2.079442 4.158884 97 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +structur 5 106 2.197225 10.986125 105 +version 1 113 2.197225 2.197225 122 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +technic 2 100 2.302585 4.605170 140 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +memori 1 101 2.302585 2.302585 139 +proceed 3 93 2.397895 7.193685 152 +section 1 94 2.397895 2.397895 149 +question 1 91 2.397895 2.397895 141 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +issu 3 78 2.564949 7.694847 211 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +logic 4 71 2.639057 10.556228 230 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +goal 2 66 2.708050 5.416100 250 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +august 1 66 2.708050 2.708050 257 +evalu 1 64 2.772589 2.772589 266 +previou 1 62 2.772589 2.772589 290 +descript 1 64 2.772589 2.772589 271 +experi 1 64 2.772589 2.772589 283 +type 2 61 2.833213 5.666426 296 +best 1 59 2.833213 2.833213 299 +summer 3 56 2.890372 8.671116 311 +index 2 56 2.890372 5.780744 309 +space 1 57 2.890372 2.890372 310 +publish 1 57 2.890372 2.890372 326 +digit 1 52 2.995732 2.995732 348 +principl 1 48 3.044522 3.044522 357 +electron 4 47 3.091042 12.364168 379 +term 1 43 3.178054 3.178054 411 +autom 2 41 3.218876 6.437752 434 +editor 1 41 3.218876 3.218876 433 +theoret 1 39 3.258097 3.258097 446 +author 1 39 3.258097 3.258097 450 +close 1 38 3.295837 3.295837 465 +correct 1 38 3.295837 3.295837 462 +return 1 34 3.401197 3.401197 502 +given 1 32 3.465736 3.465736 538 +chapter 1 32 3.465736 3.465736 536 +abl 1 30 3.555348 3.555348 566 +compon 1 30 3.555348 3.555348 570 +limit 1 29 3.583519 3.583519 585 +retriev 2 27 3.637586 7.275172 621 +relev 1 26 3.688879 3.688879 637 +enabl 1 26 3.688879 3.688879 655 +primari 2 25 3.737670 7.475340 669 +task 1 25 3.737670 3.737670 678 +toward 1 25 3.737670 3.737670 668 +magazin 1 24 3.761200 3.761200 704 +handl 1 24 3.761200 3.761200 685 +seri 1 24 3.761200 3.761200 708 +brows 3 23 3.806662 11.419986 726 +input 1 23 3.806662 3.806662 727 +head 1 23 3.806662 3.806662 732 +recognit 1 23 3.806662 3.806662 723 +identifi 1 22 3.850148 3.850148 760 +hierarchi 1 22 3.850148 3.850148 744 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +flexibl 3 21 3.912023 11.736069 792 +output 1 21 3.912023 3.912023 788 +busi 1 21 3.912023 3.912023 784 +geometr 1 19 4.007333 4.007333 852 +boston 1 19 4.007333 4.007333 862 +segment 1 17 4.110874 4.110874 931 +stop 1 17 4.110874 4.110874 942 +adam 1 17 4.110874 4.110874 934 +white 1 17 4.110874 4.110874 951 +letter 1 16 4.174387 4.174387 981 +piec 3 15 4.248495 12.745485 1020 +hierarch 1 15 4.248495 4.248495 1018 +near 1 14 4.317488 4.317488 1091 +bodi 1 13 4.382027 4.382027 1178 +johnson 1 13 4.382027 4.382027 1162 +captur 1 12 4.465908 4.465908 1232 +scan 1 12 4.465908 4.465908 1243 +bruce 1 12 4.465908 4.465908 1226 +arbitrari 1 11 4.553877 4.553877 1359 +paragraph 1 10 4.653960 4.653960 1449 +discov 1 9 4.753590 4.753590 1562 +donald 1 9 4.753590 4.753590 1510 +classif 1 9 4.753590 4.753590 1586 +rais 1 8 4.875197 4.875197 1711 +tobe 1 6 5.164786 5.164786 1995 +textual 1 6 5.164786 5.164786 1979 +superhighwai 1 4 5.568345 5.568345 2943 +similarli 1 3 5.857933 5.857933 3241 +categor 1 3 5.857933 5.857933 3765 +daniela 1 3 5.857933 5.857933 3611 +recipi 1 3 5.857933 5.857933 3627 +andclassif 1 2 6.263398 6.263398 5390 +cue 1 2 6.263398 6.263398 5391 +bharat 1 2 6.263398 6.263398 5107 +forthcom 1 2 6.263398 6.263398 5392 +dartmouth 1 2 6.263398 6.263398 5393 +montral 1 2 6.263398 6.263398 5394 +kristen 2 1 6.957497 13.914994 11913 +divid 2 1 6.957497 13.914994 11914 +accessresearch 1 1 6.957497 6.957497 11915 +mylong 1 1 6.957497 6.957497 11916 +forsophist 1 1 6.957497 6.957497 11917 +manipulationtool 1 1 6.957497 6.957497 11918 +logicalstructur 1 1 6.957497 6.957497 11919 +documentrepresent 1 1 6.957497 6.957497 11920 +hierarchyof 1 1 6.957497 6.957497 11921 +postscriptvers 1 1 6.957497 6.957497 11922 +croft 1 1 6.957497 6.957497 11923 +determininglog 1 1 6.957497 6.957497 11924 +soin 1 1 6.957497 6.957497 11925 +ofmultipl 1 1 6.957497 6.957497 11926 +browsingco 1 1 6.957497 6.957497 11927 +nabil 1 1 6.957497 6.957497 11928 +bhargava 1 1 6.957497 6.957497 11929 +yelena 1 1 6.957497 6.957497 11930 +yesha 1 1 6.957497 6.957497 11931 +seeheim 1 1 6.957497 6.957497 11932 +podp 1 1 6.957497 6.957497 11933 +taxonomi 1 1 6.957497 6.957497 11934 +structureselectron 1 1 6.957497 6.957497 11935 +dag 1 1 6.957497 6.957497 11936 +scholaraward 1 1 6.957497 6.957497 11937 +wordless 1 1 6.957497 6.957497 11938 +analysisand 1 1 6.957497 6.957497 11939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html new file mode 100644 index 00000000..73d97ee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^suzuki^suzuki.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +system 5 443 0.693147 3.465735 6 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 3 180 1.609438 4.828314 34 +fall 1 181 1.609438 1.609438 40 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +model 1 145 1.945910 1.945910 69 +spring 2 131 2.079442 4.158884 88 +introduct 2 126 2.079442 4.158884 87 +databas 2 122 2.079442 4.158884 86 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +structur 1 106 2.197225 2.197225 105 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +techniqu 1 99 2.302585 2.302585 138 +graphic 2 90 2.397895 4.795790 147 +optim 1 79 2.564949 2.564949 197 +resum 1 79 2.564949 2.564949 217 +java 2 70 2.708050 5.416100 248 +would 1 67 2.708050 2.708050 251 +simul 1 66 2.708050 2.708050 255 +multimedia 1 68 2.708050 2.708050 258 +polici 1 64 2.772589 2.772589 279 +browser 1 56 2.890372 2.890372 313 +summer 1 56 2.890372 2.890372 311 +case 1 51 2.995732 2.995732 351 +probabl 1 40 3.258097 3.258097 455 +prototyp 1 38 3.295837 3.295837 463 +statist 1 35 3.401197 3.401197 521 +independ 1 32 3.465736 3.465736 548 +displai 1 23 3.806662 3.806662 712 +applet 1 20 3.951244 3.951244 827 +telecommun 1 9 4.753590 4.753590 1565 +polygon 1 8 4.875197 4.875197 1723 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +sector 1 3 5.857933 5.857933 3766 +suppot 1 2 6.263398 6.263398 5243 +stochast 1 2 6.263398 6.263398 4832 +masafumi 2 1 6.957497 13.914994 11940 +suzukither 1 1 6.957497 6.957497 11941 +suzukisuzuki 1 1 6.957497 6.957497 11942 +educlassesfal 1 1 6.957497 6.957497 11943 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html new file mode 100644 index 00000000..cd24f4e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^swartz^swartz.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +cornel 3 215 1.386294 4.158882 23 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +phone 1 175 1.791759 1.791759 45 +process 1 142 1.945910 1.945910 72 +number 1 130 2.079442 2.079442 97 +multimedia 1 68 2.708050 2.708050 258 +littl 1 39 3.258097 3.258097 454 +movi 1 40 3.258097 3.258097 459 +spend 1 19 4.007333 4.007333 850 +jonathan 3 13 4.382027 13.146081 1174 +edui 1 13 4.382027 4.382027 1193 +departmentof 1 9 4.753590 4.753590 1539 +humor 1 5 5.347108 5.347108 2533 +swartz 5 4 5.568345 27.841725 2878 +heredevelopingrivl 1 1 6.957497 6.957497 11944 +myaddress 1 1 6.957497 6.957497 11945 +brighten 1 1 6.957497 6.957497 11946 +dayjon 1 1 6.957497 6.957497 11947 +connectioncool 1 1 6.957497 6.957497 11948 +siteslast 1 1 6.957497 6.957497 11949 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html new file mode 100644 index 00000000..0f063d65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^sxsrivas^sxsrivas.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +class 2 199 1.609438 3.218876 37 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +question 1 91 2.397895 2.397895 141 +academ 1 82 2.484907 2.484907 178 +pagewelcom 1 11 4.553877 4.553877 1344 +studentcomput 1 7 5.010635 5.010635 1963 +departmentcornel 1 5 5.347108 5.347108 2275 +srivastava 1 2 6.263398 6.263398 5395 +sunil 2 1 6.957497 13.914994 11950 +srivastavamast 1 1 6.957497 6.957497 11951 +linkscom 1 1 6.957497 6.957497 11952 +sxsriva 1 1 6.957497 6.957497 11953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html new file mode 100644 index 00000000..310ceea8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^szuwen^szuwen.html @@ -0,0 +1,219 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 3 374 0.693147 2.079441 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +cours 3 273 1.098612 3.295836 15 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +also 2 259 1.386294 2.772588 28 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +design 1 213 1.386294 1.386294 25 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +read 2 154 1.791759 3.583518 47 +network 2 168 1.791759 3.583518 61 +avail 1 169 1.791759 1.791759 48 +year 5 148 1.945910 9.729550 84 +model 5 145 1.945910 9.729550 69 +like 2 132 1.945910 3.891820 81 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +check 1 115 2.197225 2.197225 118 +part 1 98 2.302585 2.302585 129 +mani 2 92 2.397895 4.795790 150 +pictur 1 89 2.397895 2.397895 160 +grade 1 90 2.397895 2.397895 142 +graphic 1 90 2.397895 2.397895 147 +octob 1 89 2.397895 2.397895 156 +build 2 85 2.484907 4.969814 184 +second 1 81 2.484907 2.484907 166 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +want 2 79 2.564949 5.129898 199 +good 1 77 2.564949 2.564949 200 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +involv 2 71 2.639057 5.278114 227 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +practic 1 70 2.708050 2.708050 246 +import 1 65 2.772589 2.772589 282 +septemb 1 65 2.772589 2.772589 274 +street 1 63 2.772589 2.772589 293 +colleg 1 61 2.833213 2.833213 300 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +unix 1 58 2.890372 2.890372 308 +found 2 53 2.944439 5.888878 337 +three 1 54 2.944439 2.944439 330 +run 1 51 2.995732 2.995732 347 +life 2 50 3.044522 6.089044 375 +effect 1 46 3.091042 3.091042 385 +made 1 44 3.135494 3.135494 398 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +around 1 43 3.178054 3.178054 415 +long 1 43 3.178054 3.178054 413 +music 1 42 3.218876 3.218876 436 +editor 1 41 3.218876 3.218876 433 +live 1 40 3.258097 3.258097 451 +mean 1 37 3.332205 3.332205 477 +field 1 37 3.332205 3.332205 482 +survei 1 35 3.401197 3.401197 513 +human 1 32 3.465736 3.465736 546 +independ 1 32 3.465736 3.465736 548 +becom 1 28 3.610918 3.610918 603 +quit 3 27 3.637586 10.912758 633 +though 1 27 3.637586 3.637586 622 +linux 1 27 3.637586 3.637586 631 +rather 1 26 3.688879 3.688879 642 +higher 1 24 3.761200 3.761200 690 +other 1 24 3.761200 3.761200 697 +wish 1 24 3.761200 3.761200 692 +almost 1 22 3.850148 3.850148 742 +self 1 22 3.850148 3.850148 761 +thu 2 21 3.912023 7.824046 773 +love 2 21 3.912023 7.824046 804 +watch 2 21 3.912023 7.824046 789 +nice 1 20 3.951244 3.951244 809 +supervis 1 20 3.951244 3.951244 840 +ever 1 19 4.007333 4.007333 872 +four 1 18 4.060443 4.060443 905 +listen 1 18 4.060443 4.060443 907 +concentr 1 18 4.060443 4.060443 906 +steven 2 17 4.110874 8.221748 953 +taiwan 1 16 4.174387 4.174387 1006 +hobbi 1 16 4.174387 4.174387 1009 +anyth 1 16 4.174387 4.174387 998 +intel 1 16 4.174387 4.174387 1000 +later 1 15 4.248495 4.248495 1043 +enough 1 15 4.248495 4.248495 1040 +becam 1 14 4.317488 4.317488 1117 +stori 1 14 4.317488 4.317488 1087 +draw 1 14 4.317488 4.317488 1086 +happi 1 14 4.317488 4.317488 1079 +audio 1 14 4.317488 4.317488 1094 +everyth 1 13 4.382027 4.382027 1169 +resolut 1 13 4.382027 4.382027 1172 +huang 2 12 4.465908 8.931816 1202 +career 2 12 4.465908 8.931816 1287 +promot 1 12 4.465908 4.465908 1235 +bruce 1 12 4.465908 4.465908 1226 +land 1 12 4.465908 4.465908 1273 +island 1 11 4.553877 4.553877 1345 +night 1 11 4.553877 4.553877 1319 +host 1 11 4.553877 4.553877 1306 +literatur 1 11 4.553877 4.553877 1300 +broad 1 11 4.553877 4.553877 1302 +ofcomput 2 10 4.653960 9.307920 1442 +rich 1 10 4.653960 4.653960 1396 +seven 2 9 4.753590 9.507180 1561 +calvin 1 9 4.753590 4.753590 1518 +face 1 9 4.753590 4.753590 1501 +ideal 1 8 4.875197 4.875197 1630 +film 1 8 4.875197 4.875197 1761 +exactli 1 7 5.010635 5.010635 1817 +hunt 1 7 5.010635 5.010635 1798 +brought 1 7 5.010635 5.010635 1925 +migrat 1 7 5.010635 5.010635 1851 +discoveri 1 7 5.010635 5.010635 1915 +channel 1 7 5.010635 5.010635 1836 +truth 1 6 5.164786 5.164786 2179 +parent 1 6 5.164786 5.164786 2204 +south 1 6 5.164786 5.164786 2167 +lucki 1 6 5.164786 5.164786 2163 +chat 1 6 5.164786 5.164786 2128 +railroad 1 6 5.164786 5.164786 2161 +myresum 1 6 5.164786 5.164786 2199 +freeli 1 6 5.164786 5.164786 2014 +ship 1 5 5.347108 5.347108 2534 +tang 1 5 5.347108 5.347108 2409 +knew 1 5 5.347108 5.347108 2445 +ofinterest 1 5 5.347108 5.347108 2323 +champion 1 4 5.568345 5.568345 2982 +proud 1 4 5.568345 5.568345 2918 +assur 1 4 5.568345 5.568345 2722 +somewhat 1 4 5.568345 5.568345 2659 +fulfil 1 4 5.568345 5.568345 2932 +hobb 1 4 5.568345 5.568345 2893 +children 1 3 5.857933 5.857933 3767 +talent 1 3 5.857933 5.857933 3768 +peac 1 3 5.857933 5.857933 3769 +pai 1 3 5.857933 5.857933 3672 +asid 1 3 5.857933 5.857933 3770 +lego 1 3 5.857933 5.857933 3188 +dick 1 2 6.263398 6.263398 5396 +tender 1 2 6.263398 6.263398 5397 +tropic 1 2 6.263398 6.263398 5398 +fifteen 1 2 6.263398 6.263398 5399 +marvel 1 2 6.263398 6.263398 5400 +defeat 1 2 6.263398 6.263398 5401 +reward 1 2 6.263398 6.263398 5402 +andwork 1 2 6.263398 6.263398 5403 +relai 1 2 6.263398 6.263398 5404 +weapon 1 2 6.263398 6.263398 5115 +spectrum 1 2 6.263398 6.263398 5405 +blobbi 1 2 6.263398 6.263398 4820 +huangszu 1 1 6.957497 6.957497 11954 +defend 1 1 6.957497 6.957497 11955 +justic 1 1 6.957497 6.957497 11956 +nevermind 1 1 6.957497 6.957497 11957 +iarriv 1 1 6.957497 6.957497 11958 +soundslik 1 1 6.957497 6.957497 11959 +mobi 1 1 6.957497 6.957497 11960 +nointent 1 1 6.957497 6.957497 11961 +whale 1 1 6.957497 6.957497 11962 +digress 1 1 6.957497 6.957497 11963 +wholefamili 1 1 6.957497 6.957497 11964 +philippin 1 1 6.957497 6.957497 11965 +aroundsix 1 1 6.957497 6.957497 11966 +fluentli 1 1 6.957497 6.957497 11967 +bilingu 1 1 6.957497 6.957497 11968 +thepoetri 1 1 6.957497 6.957497 11969 +dynasti 1 1 6.957497 6.957497 11970 +arabian 1 1 6.957497 6.957497 11971 +doveright 1 1 6.957497 6.957497 11972 +ienter 1 1 6.957497 6.957497 11973 +philippineswith 1 1 6.957497 6.957497 11974 +unabashedli 1 1 6.957497 6.957497 11975 +alsoin 1 1 6.957497 6.957497 11976 +whirlwind 1 1 6.957497 6.957497 11977 +awoman 1 1 6.957497 6.957497 11978 +effortlessli 1 1 6.957497 6.957497 11979 +eek 1 1 6.957497 6.957497 11980 +blunt 1 1 6.957497 6.957497 11981 +ienrol 1 1 6.957497 6.957497 11982 +segreg 1 1 6.957497 6.957497 11983 +everydaygeek 1 1 6.957497 6.957497 11984 +fromactu 1 1 6.957497 6.957497 11985 +happili 1 1 6.957497 6.957497 11986 +myspar 1 1 6.957497 6.957497 11987 +linuxnet 1 1 6.957497 6.957497 11988 +plastic 1 1 6.957497 6.957497 11989 +suspens 1 1 6.957497 6.957497 11990 +thriller 1 1 6.957497 6.957497 11991 +sting 1 1 6.957497 6.957497 11992 +sesam 1 1 6.957497 6.957497 11993 +offend 1 1 6.957497 6.957497 11994 +bysom 1 1 6.957497 6.957497 11995 +blatant 1 1 6.957497 6.957497 11996 +highlyinterest 1 1 6.957497 6.957497 11997 +compatiblecomput 1 1 6.957497 6.957497 11998 +metaballsund 1 1 6.957497 6.957497 11999 +techniquesin 1 1 6.957497 6.957497 12000 +andport 1 1 6.957497 6.957497 12001 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html new file mode 100644 index 00000000..2d5b37e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^hytech.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +technolog 1 131 2.079442 2.079442 102 +move 1 47 3.091042 3.091042 382 +hybrid 1 15 4.248495 4.248495 1057 +henzing 1 3 5.857933 5.857933 3713 +hytechhytech 1 1 6.957497 6.957497 12002 +toolw 1 1 6.957497 6.957497 12003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html new file mode 100644 index 00000000..41a9fecf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tah^tah.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 3 443 0.693147 2.079441 6 +program 1 374 0.693147 0.693147 7 +time 6 293 1.098612 6.591672 17 +cornel 2 215 1.386294 2.772588 23 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +analysi 5 124 2.079442 10.397210 98 +advanc 1 99 2.302585 2.302585 130 +real 3 93 2.397895 7.193685 144 +control 1 82 2.484907 2.484907 164 +logic 1 71 2.639057 2.639057 230 +septemb 1 65 2.772589 2.772589 274 +linear 1 41 3.218876 3.218876 431 +formal 4 37 3.332205 13.328820 478 +concurr 2 34 3.401197 6.802394 501 +symbol 1 27 3.637586 3.637586 620 +universityithaca 1 24 3.761200 3.761200 710 +methodolog 4 23 3.806662 15.226648 733 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +hybrid 2 15 4.248495 8.496990 1057 +eduphon 1 15 4.248495 4.248495 1060 +transit 1 15 4.248495 4.248495 1046 +embed 2 14 4.317488 8.634976 1102 +automata 2 13 4.382027 8.764054 1135 +modul 1 10 4.653960 4.653960 1434 +departmentcornel 1 5 5.347108 5.347108 2275 +henzing 1 3 5.857933 5.857933 3713 +professorcomput 1 3 5.857933 5.857933 3714 +worldwid 1 3 5.857933 5.857933 3704 +checker 1 3 5.857933 5.857933 3644 +systemsr 1 2 6.263398 6.263398 4312 +bibtex 1 2 6.263398 6.263398 5406 +henzingerthoma 1 1 6.957497 6.957497 12004 +movedassist 1 1 6.957497 6.957497 12005 +researchform 1 1 6.957497 6.957497 12006 +researchat 1 1 6.957497 6.957497 12007 +cornelland 1 1 6.957497 6.957497 12008 +resumepublicationsreact 1 1 6.957497 6.957497 12009 +systemsclock 1 1 6.957497 6.957497 12010 +systemshybrid 1 1 6.957497 6.957497 12011 +systemsbibliographi 1 1 6.957497 6.957497 12012 +publicationstoolshytech 1 1 6.957497 6.957497 12013 +systemscoursesc 1 1 6.957497 6.957497 12014 +languagesconferenceshybrid 1 1 6.957497 6.957497 12015 +systemscav 1 1 6.957497 6.957497 12016 +verificationlast 1 1 6.957497 6.957497 12017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html new file mode 100644 index 00000000..08e6d7e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^takako^home.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +manag 1 114 2.197225 2.197225 125 +homepag 1 93 2.397895 2.397895 148 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +upson 1 71 2.639057 2.639057 218 +previou 1 62 2.772589 2.772589 290 +life 1 50 3.044522 3.044522 375 +eduoffic 1 33 3.433987 3.433987 531 +quot 1 29 3.583519 3.583519 582 +psycholog 1 15 4.248495 4.248495 1054 +horu 1 14 4.317488 4.317488 1116 +social 1 13 4.382027 4.382027 1123 +reness 1 11 4.553877 4.553877 1333 +hockei 1 8 4.875197 4.875197 1760 +atcornel 1 6 5.164786 5.164786 2131 +advis 1 6 5.164786 5.164786 2173 +hickei 2 4 5.568345 11.136690 2845 +hallphon 1 4 5.568345 5.568345 2900 +schneider 1 4 5.568345 5.568345 2868 +takako 3 3 5.857933 17.573799 3538 +backcountri 1 3 5.857933 5.857933 3686 +byrobbert 1 1 6.957497 6.957497 12018 +andfr 1 1 6.957497 6.957497 12019 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html new file mode 100644 index 00000000..82fcb834 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tt^Tim_Teitelbaum.html @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +languag 1 227 1.386294 1.386294 26 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +professor 1 137 1.945910 1.945910 76 +compil 1 122 2.079442 2.079442 96 +environ 1 84 2.484907 2.484907 177 +editor 1 41 3.218876 3.218876 433 +transform 1 32 3.465736 3.465736 542 +attribut 1 14 4.317488 4.317488 1092 +eduresearch 1 6 5.164786 5.164786 2205 +increment 1 6 5.164786 5.164786 2206 +grammar 1 6 5.164786 5.164786 2058 +tim_teitelbaum 1 1 6.957497 6.957497 12020 +teitelbaumassoci 1 1 6.957497 6.957497 12021 +adavita 1 1 6.957497 6.957497 12022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html new file mode 100644 index 00000000..1959ebf5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^tve^tve.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +program 6 374 0.693147 4.158882 7 +system 3 443 0.693147 2.079441 6 +work 1 380 0.693147 0.693147 9 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +languag 3 227 1.386294 4.158882 26 +email 1 220 1.386294 1.386294 29 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +fall 4 181 1.609438 6.437752 40 +paper 3 205 1.609438 4.828314 38 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +parallel 13 169 1.791759 23.292867 60 +network 3 168 1.791759 5.375277 61 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +architectur 9 139 1.945910 17.513190 77 +support 2 132 1.945910 3.891820 83 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +lectur 1 135 1.945910 1.945910 73 +compil 4 122 2.079442 8.317768 96 +high 3 130 2.079442 6.238326 101 +report 3 131 2.079442 6.238326 92 +machin 3 129 2.079442 6.238326 95 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +analysi 1 124 2.079442 2.079442 98 +version 2 113 2.197225 4.394450 122 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +user 3 104 2.302585 6.907755 137 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +proceed 5 93 2.397895 11.989475 152 +commun 4 95 2.397895 9.591580 157 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +activ 6 84 2.484907 14.909442 182 +level 3 87 2.484907 7.454721 180 +novemb 2 81 2.484907 4.969814 179 +control 2 82 2.484907 4.969814 164 +ieee 1 86 2.484907 2.484907 190 +journal 1 83 2.484907 2.484907 183 +messag 6 76 2.564949 15.389694 212 +appear 2 78 2.564949 5.129898 210 +june 2 79 2.564949 5.129898 214 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +upson 1 71 2.639057 2.639057 218 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +abstract 2 62 2.772589 5.545178 276 +evalu 1 64 2.772589 2.772589 266 +function 1 62 2.772589 2.772589 275 +simpl 1 60 2.833213 2.833213 298 +share 1 59 2.833213 2.833213 304 +automat 1 61 2.833213 2.833213 306 +juli 1 60 2.833213 2.833213 305 +sever 2 56 2.890372 5.780744 322 +think 1 57 2.890372 2.890372 314 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +talk 3 53 2.944439 8.833317 336 +run 1 51 2.995732 2.995732 347 +digit 1 52 2.995732 2.995732 348 +without 1 50 3.044522 3.044522 370 +principl 1 48 3.044522 3.044522 357 +california 1 46 3.091042 3.091042 388 +mechan 3 43 3.178054 9.534162 416 +offer 1 43 3.178054 3.178054 414 +annual 3 40 3.258097 9.774291 458 +slide 2 38 3.295837 6.591674 467 +workstat 2 37 3.332205 6.664410 479 +extend 1 32 3.465736 3.465736 539 +platform 1 29 3.583519 3.583519 591 +limit 1 29 3.583519 3.583519 585 +cluster 2 28 3.610918 7.221836 612 +multiprocessor 1 28 3.610918 3.610918 605 +proc 4 26 3.688879 14.755516 649 +berkelei 2 26 3.688879 7.377758 657 +supercomput 1 25 3.737670 3.737670 681 +toward 1 25 3.737670 3.737670 668 +fundament 1 25 3.737670 3.737670 661 +magazin 1 24 3.761200 3.761200 704 +lead 1 23 3.806662 3.806662 718 +thread 1 23 3.806662 3.806662 722 +fine 3 20 3.951244 11.853732 822 +department 1 20 3.951244 3.951244 839 +speed 1 18 4.060443 4.060443 911 +interconnect 2 17 4.110874 8.221748 937 +diego 2 16 4.174387 8.348774 992 +latenc 1 16 4.174387 4.174387 993 +fourth 1 16 4.174387 4.174387 999 +cambridg 1 16 4.174387 4.174387 1008 +month 1 15 4.248495 4.248495 1025 +micro 1 15 4.248495 4.248495 1031 +split 3 14 4.317488 12.952464 1078 +eicken 14 13 4.382027 61.348378 1134 +thorsten 3 13 4.382027 13.146081 1133 +conf 2 13 4.382027 8.764054 1181 +sigplan 1 13 4.382027 4.382027 1190 +guest 1 12 4.465908 4.465908 1220 +multithread 2 11 4.553877 9.107754 1315 +bandwidth 1 11 4.553877 4.553877 1365 +grain 3 10 4.653960 13.961880 1448 +werner 1 10 4.653960 4.653960 1385 +santa 1 10 4.653960 4.653960 1441 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +patterson 1 9 4.753590 4.753590 1554 +port 2 8 4.875197 9.750394 1766 +entri 2 8 4.875197 9.750394 1678 +gold 2 8 4.875197 9.750394 1745 +coast 2 8 4.875197 9.750394 1746 +vineet 1 8 4.875197 4.875197 1639 +vogel 1 8 4.875197 4.875197 1622 +andcomput 1 8 4.875197 4.875197 1623 +realist 1 8 4.875197 4.875197 1665 +goldstein 5 6 5.164786 25.823930 2168 +forum 2 6 5.164786 10.329572 2027 +fish 1 6 5.164786 5.164786 2207 +greec 1 6 5.164786 5.164786 2208 +culler 10 5 5.347108 53.471080 2381 +symp 4 5 5.347108 21.388432 2376 +buch 2 5 5.347108 10.694216 2272 +australia 2 5 5.347108 10.694216 2478 +water 1 5 5.347108 5.347108 2535 +plant 1 5 5.347108 5.347108 2497 +dataflow 1 5 5.347108 5.347108 2390 +karp 1 5 5.347108 5.347108 2284 +ifip 1 5 5.347108 5.347108 2459 +basu 2 4 5.568345 11.136690 2843 +hallphon 1 4 5.568345 5.568345 2900 +coursesc 1 4 5.568345 5.568345 2692 +tire 1 4 5.568345 5.568345 2799 +password 1 4 5.568345 5.568345 2594 +medium 1 4 5.568345 5.568345 2834 +schauser 8 3 5.857933 46.863464 3599 +interfacefor 1 3 5.857933 5.857933 3534 +frontier 1 3 5.857933 5.857933 3771 +anindya 1 3 5.857933 5.857933 3535 +avula 1 3 5.857933 5.857933 3600 +abridg 1 3 5.857933 5.857933 3772 +dusseau 1 3 5.857933 5.857933 3382 +yelick 1 3 5.857933 5.857933 3374 +crete 1 3 5.857933 5.857933 3773 +lan 1 2 6.263398 6.263398 4359 +includingth 1 2 6.263398 6.263398 4493 +pond 1 2 6.263398 6.263398 5127 +firewal 1 2 6.263398 6.263398 5407 +distributedcomput 1 2 6.263398 6.263398 5336 +communicationarchitectur 1 2 6.263398 6.263398 4859 +krishnamurthi 1 2 6.263398 6.263398 5408 +lumetta 1 2 6.263398 6.263398 5409 +dalli 1 2 6.263398 6.263398 4517 +logp 1 2 6.263398 6.263398 4227 +orlando 1 2 6.263398 6.263398 5410 +clara 1 2 6.263398 6.263398 4958 +barrera 1 2 6.263398 6.263398 4309 +departement 2 1 6.957497 13.914994 12023 +eickenassist 1 1 6.957497 6.957497 12024 +eduprojectsth 1 1 6.957497 6.957497 12025 +architectureprovid 1 1 6.957497 6.957497 12026 +latencyand 1 1 6.957497 6.957497 12027 +currentimplement 1 1 6.957497 6.957497 12028 +tonon 1 1 6.957497 6.957497 12029 +spmd 1 1 6.957497 6.957497 12030 +extensionto 1 1 6.957497 6.957497 12031 +newplatform 1 1 6.957497 6.957497 12032 +multprocessor 1 1 6.957497 6.957497 12033 +computerorgan 1 1 6.957497 6.957497 12034 +maynd 1 1 6.957497 6.957497 12035 +pagestv 1 1 6.957497 6.957497 12036 +macpppwhich 1 1 6.957497 6.957497 12037 +everhav 1 1 6.957497 6.957497 12038 +passwordssuddenli 1 1 6.957497 6.957497 12039 +installationinstruct 1 1 6.957497 6.957497 12040 +publicationsu 1 1 6.957497 6.957497 12041 +atmnetwork 1 1 6.957497 6.957497 12042 +controlledthread 1 1 6.957497 6.957497 12043 +spertu 1 1 6.957497 6.957497 12044 +modelof 1 1 6.957497 6.957497 12045 +sahai 1 1 6.957497 6.957497 12046 +santo 1 1 6.957497 6.957497 12047 +subramonian 1 1 6.957497 6.957497 12048 +dataflowmultiprocess 1 1 6.957497 6.957497 12049 +forintegr 1 1 6.957497 6.957497 12050 +forleni 1 1 6.957497 6.957497 12051 +minimalhardwar 1 1 6.957497 6.957497 12052 +wawrzynek 1 1 6.957497 6.957497 12053 +architecturesfor 1 1 6.957497 6.957497 12054 +saavedra 1 1 6.957497 6.957497 12055 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html new file mode 100644 index 00000000..a9e91991 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ulfar^index.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +contact 1 153 1.791759 1.791759 59 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +pleas 1 113 2.197225 2.197225 114 +real 1 93 2.397895 2.397895 144 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +appli 1 71 2.639057 2.639057 226 +date 1 51 2.995732 2.995732 344 +done 1 47 3.091042 3.091042 381 +often 1 31 3.496508 3.496508 551 +actual 1 28 3.610918 3.610918 604 +enjoi 1 26 3.688879 3.688879 660 +assum 1 19 4.007333 4.007333 845 +incomput 1 14 4.317488 4.317488 1096 +touch 1 12 4.465908 4.465908 1288 +moment 1 11 4.553877 4.553877 1379 +apart 1 7 5.010635 5.010635 1936 +somewhat 1 4 5.568345 5.568345 2659 +disclaim 1 4 5.568345 5.568345 2847 +erlingsson 3 2 6.263398 18.790194 4107 +lfar 2 2 6.263398 12.526796 4106 +pagelfar 1 1 6.957497 6.957497 12056 +specificationi 1 1 6.957497 6.957497 12057 +incongruousiceland 1 1 6.957497 6.957497 12058 +implementationbackgroundwher 1 1 6.957497 6.957497 12059 +activitieswhat 1 1 6.957497 6.957497 12060 +schedulewher 1 1 6.957497 6.957497 12061 +researchwhat 1 1 6.957497 6.957497 12062 +interestswhat 1 1 6.957497 6.957497 12063 +acquaintancesthos 1 1 6.957497 6.957497 12064 +infohow 1 1 6.957497 6.957497 12065 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html new file mode 100644 index 00000000..76aba5d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^qmg-home.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +gener 5 220 1.386294 6.931470 27 +softwar 3 220 1.386294 4.158882 30 +cornel 2 215 1.386294 2.772588 23 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +relat 1 139 1.945910 1.945910 68 +model 1 145 1.945910 1.945910 69 +center 1 88 2.397895 2.397895 158 +novemb 1 81 2.484907 2.484907 179 +resourc 1 81 2.484907 2.484907 172 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +ithaca 1 65 2.772589 2.772589 294 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +run 1 51 2.995732 2.995732 347 +robert 1 30 3.555348 3.555348 567 +packag 3 28 3.610918 10.832754 614 +releas 3 28 3.610918 10.832754 616 +geometri 1 22 3.850148 3.850148 752 +geometr 2 19 4.007333 8.014666 852 +element 3 18 4.060443 12.181329 895 +finit 2 14 4.317488 8.634976 1106 +jonathan 1 13 4.382027 4.382027 1174 +mesh 4 11 4.553877 18.215508 1351 +stephen 1 11 4.553877 4.553877 1342 +solver 1 7 5.010635 5.010635 1911 +minnesota 1 5 5.347108 5.347108 2469 +websit 1 4 5.568345 5.568345 2726 +schneider 1 4 5.568345 5.568345 2868 +vavasi 3 3 5.857933 17.573799 3526 +threedimens 1 1 6.957497 6.957497 12066 +themesh 1 1 6.957497 6.957497 12067 +softwaredownload 1 1 6.957497 6.957497 12068 +andqmg 1 1 6.957497 6.957497 12069 +mcphedran 1 1 6.957497 6.957497 12070 +offinit 1 1 6.957497 6.957497 12071 +ofsoftwar 1 1 6.957497 6.957497 12072 +computationalgeometri 1 1 6.957497 6.957497 12073 +shewchuk 1 1 6.957497 6.957497 12074 +triangl 1 1 6.957497 6.957497 12075 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html new file mode 100644 index 00000000..f8c52297 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vavasis^vavasis.html @@ -0,0 +1,148 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +gener 4 220 1.386294 5.545176 27 +cornel 2 215 1.386294 2.772588 23 +email 2 220 1.386294 2.772588 29 +softwar 2 220 1.386294 2.772588 30 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +algorithm 3 162 1.791759 5.375277 57 +phone 2 175 1.791759 3.583518 45 +recent 2 167 1.791759 3.583518 58 +avail 2 169 1.791759 3.583518 48 +base 1 165 1.791759 1.791759 50 +click 4 142 1.945910 7.783640 78 +problem 2 147 1.945910 3.891820 75 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +object 1 138 1.945910 1.945910 79 +analysi 2 124 2.079442 4.158884 98 +document 1 121 2.079442 2.079442 89 +code 4 108 2.197225 8.788900 116 +well 2 109 2.197225 4.394450 121 +pleas 1 113 2.197225 2.197225 114 +specif 1 106 2.197225 2.197225 106 +intern 1 108 2.197225 2.197225 128 +associ 1 93 2.397895 2.397895 151 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +level 2 87 2.484907 4.969814 180 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +method 3 80 2.564949 7.694847 213 +complet 2 77 2.564949 5.129898 208 +sourc 2 77 2.564949 5.129898 201 +optim 1 79 2.564949 2.564949 197 +line 2 75 2.639057 5.278114 231 +nation 1 74 2.639057 2.639057 240 +solv 1 73 2.639057 2.639057 234 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +ithaca 1 65 2.772589 2.772589 294 +laboratori 1 63 2.772589 2.772589 292 +complex 1 64 2.772589 2.772589 269 +creat 1 63 2.772589 2.772589 277 +unix 1 58 2.890372 2.890372 308 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +numer 4 49 3.044522 12.178088 369 +effect 1 46 3.091042 3.091042 385 +featur 1 46 3.091042 3.091042 386 +map 1 39 3.258097 3.258097 452 +annual 1 40 3.258097 3.258097 458 +microsoft 1 38 3.295837 3.295837 468 +least 1 35 3.401197 3.401197 516 +domain 1 30 3.555348 3.555348 564 +depend 1 29 3.583519 3.583519 583 +packag 2 28 3.610918 7.221836 614 +releas 1 28 3.610918 3.610918 616 +bound 1 26 3.688879 3.688879 659 +valu 1 25 3.737670 3.737670 665 +aspect 1 25 3.737670 3.737670 663 +period 1 22 3.850148 3.850148 743 +divis 1 21 3.912023 3.912023 803 +grad 1 20 3.951244 3.951244 837 +geometr 1 19 4.007333 4.007333 852 +element 1 18 4.060443 4.060443 895 +scott 1 18 4.060443 4.060443 884 +matrix 1 17 4.110874 4.110874 933 +anonym 2 14 4.317488 8.634976 1100 +matlab 2 14 4.317488 8.634976 1081 +squar 1 14 4.317488 4.317488 1082 +finit 1 14 4.317488 4.317488 1106 +topolog 1 14 4.317488 4.317488 1089 +whose 1 13 4.382027 4.382027 1166 +forth 1 13 4.382027 4.382027 1186 +weight 1 12 4.465908 4.465908 1204 +mesh 5 11 4.553877 22.769385 1351 +stephen 2 11 4.553877 9.107754 1342 +faster 1 11 4.553877 4.553877 1323 +rhode 1 9 4.753590 4.753590 1579 +cross 1 8 4.875197 4.875197 1703 +boundari 3 7 5.010635 15.031905 1929 +aris 1 7 5.010635 5.010635 1924 +dimens 1 7 5.010635 5.010635 1930 +argonn 2 5 5.347108 10.694216 2461 +colleagu 1 5 5.347108 5.347108 2304 +dual 1 5 5.347108 5.347108 2522 +hole 1 5 5.347108 5.347108 2518 +compat 1 5 5.347108 5.347108 2485 +ratio 2 4 5.568345 11.136690 2942 +triangul 2 4 5.568345 11.136690 2903 +bldg 1 4 5.568345 5.568345 2983 +manuscript 1 4 5.568345 5.568345 2750 +orthogon 1 4 5.568345 5.568345 2832 +conform 1 4 5.568345 5.568345 2941 +vrml 1 4 5.568345 5.568345 2949 +vavasi 9 3 5.857933 52.721397 3526 +aren 1 3 5.857933 5.857933 3512 +trefethen 1 3 5.857933 5.857933 3528 +hough 1 3 5.857933 5.857933 3527 +delaunai 1 3 5.857933 5.857933 3619 +ellipt 1 3 5.857933 5.857933 3774 +cleaner 1 3 5.857933 5.857933 3775 +mitchel 2 2 6.263398 12.526796 4792 +acceler 1 2 6.263398 6.263398 5411 +driscol 1 2 6.263398 6.263398 4836 +polyhedr 1 2 6.263398 6.263398 5412 +andautomat 1 2 6.263398 6.263398 5413 +onsabbat 1 1 6.957497 6.957497 12076 +cass 1 1 6.957497 6.957497 12077 +tsure 1 1 6.957497 6.957497 12078 +essaybi 1 1 6.957497 6.957497 12079 +issuesnumer 1 1 6.957497 6.957497 12080 +problemsgeometr 1 1 6.957497 6.957497 12081 +computingspars 1 1 6.957497 6.957497 12082 +computationsi 1 1 6.957497 6.957497 12083 +primal 1 1 6.957497 6.957497 12084 +interiorpoint 1 1 6.957497 6.957497 12085 +decompositionfor 1 1 6.957497 6.957497 12086 +gridcut 1 1 6.957497 6.957497 12087 +hyperplan 1 1 6.957497 6.957497 12088 +packagei 1 1 6.957497 6.957497 12089 +verycompl 1 1 6.957497 6.957497 12090 +unstructuredtetrahedr 1 1 6.957497 6.957497 12091 +boundaryvalu 1 1 6.957497 6.957497 12092 +iswritten 1 1 6.957497 6.957497 12093 +distributedfor 1 1 6.957497 6.957497 12094 +distributionbegan 1 1 6.957497 6.957497 12095 +manyimprov 1 1 6.957497 6.957497 12096 +compatibilitywith 1 1 6.957497 6.957497 12097 +pleasese 1 1 6.957497 6.957497 12098 +reportback 1 1 6.957497 6.957497 12099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html new file mode 100644 index 00000000..26ed3678 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^verma^verma.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +support 1 132 1.945910 1.945910 83 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +browser 1 56 2.890372 2.890372 313 +netscap 1 44 3.135494 3.135494 395 +frame 1 24 3.761200 3.761200 684 +higher 1 24 3.761200 3.761200 690 +arun 1 4 5.568345 5.568345 2736 +verma 1 2 6.263398 6.263398 4341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html new file mode 100644 index 00000000..08df42ca --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vince^vince.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +read 1 154 1.791759 1.791759 47 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +thank 1 23 3.806662 3.806662 721 +vinc 1 2 6.263398 6.263398 5414 +suck 1 2 6.263398 6.263398 5232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html new file mode 100644 index 00000000..96ae839b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vitrano^vitrano.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +engin 1 297 1.098612 1.098612 20 +cornel 1 215 1.386294 1.386294 23 +databas 1 122 2.079442 2.079442 86 +advanc 1 99 2.302585 2.302585 130 +start 1 83 2.484907 2.484907 173 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +multimedia 1 68 2.708050 2.708050 258 +give 1 50 3.044522 3.044522 359 +break 1 20 3.951244 3.951244 812 +pagec 2 15 4.248495 8.496990 1011 +pageer 1 3 5.857933 5.857933 3776 +vitrano 4 1 6.957497 27.829988 12100 +pagehei 1 1 6.957497 6.957497 12101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html new file mode 100644 index 00000000..00302db9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vivek^vivek.html @@ -0,0 +1,102 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +time 4 293 1.098612 4.394448 17 +us 3 329 1.098612 3.295836 16 +current 1 284 1.098612 1.098612 21 +mail 1 238 1.386294 1.386294 22 +cornel 1 215 1.386294 1.386294 23 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +specif 2 106 2.197225 4.394450 106 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +info 1 85 2.484907 2.484907 176 +larg 1 82 2.484907 2.484907 168 +know 2 80 2.564949 5.129898 198 +good 1 77 2.564949 2.564949 200 +java 3 70 2.708050 8.124150 248 +view 2 70 2.708050 5.416100 254 +window 1 68 2.708050 2.708050 242 +function 1 62 2.772589 2.772589 275 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +back 1 60 2.833213 2.833213 297 +browser 4 56 2.890372 11.561488 313 +think 1 57 2.890372 2.890372 314 +maintain 1 51 2.995732 2.995732 342 +right 1 48 3.044522 3.044522 363 +visitor 1 49 3.044522 3.044522 371 +life 1 50 3.044522 3.044522 375 +effect 1 46 3.091042 3.091042 385 +favorit 1 44 3.135494 3.135494 410 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +might 1 41 3.218876 3.218876 426 +small 1 39 3.258097 3.258097 447 +respons 1 37 3.332205 3.332205 476 +expect 1 37 3.332205 3.332205 484 +copyright 1 36 3.367296 3.367296 495 +word 1 34 3.401197 3.401197 508 +given 1 32 3.465736 3.465736 538 +anim 2 31 3.496508 6.993016 557 +someth 1 31 3.496508 3.496508 554 +held 1 28 3.610918 3.610918 600 +relev 1 26 3.688879 3.688879 637 +reach 2 24 3.761200 7.522400 688 +yahoo 1 24 3.761200 3.761200 707 +wonder 2 20 3.951244 7.902488 815 +reserv 1 20 3.951244 3.951244 808 +applet 1 20 3.951244 3.951244 827 +qualiti 1 20 3.951244 3.951244 832 +els 1 19 4.007333 4.007333 843 +capabl 1 15 4.248495 4.248495 1016 +happi 1 14 4.317488 4.317488 1079 +deriv 1 13 4.382027 4.382027 1145 +clock 1 11 4.553877 4.553877 1320 +regard 1 11 4.553877 4.553877 1309 +vista 1 10 4.653960 4.653960 1452 +ground 1 7 5.010635 5.010635 1955 +usabl 1 7 5.010635 5.010635 1810 +heavi 1 7 5.010635 5.010635 1841 +usag 1 6 5.164786 5.164786 2209 +vivek 1 6 5.164786 5.164786 2210 +promis 1 6 5.164786 5.164786 2037 +million 1 5 5.347108 5.347108 2495 +settimeout 1 5 5.347108 5.347108 2536 +wast 1 5 5.347108 5.347108 2537 +seed 5 4 5.568345 27.841725 2984 +timertwo 1 4 5.568345 5.568345 2985 +transmit 1 4 5.568345 5.568345 2835 +fulli 1 4 5.568345 5.568345 2986 +dont 2 3 5.857933 11.715866 3473 +impli 1 3 5.857933 5.857933 3348 +kolla 2 1 6.957497 13.914994 12102 +scrollit 2 1 6.957497 13.914994 12103 +unwant 1 1 6.957497 6.957497 12104 +warrante 1 1 6.957497 6.957497 12105 +zillion 1 1 6.957497 6.957497 12106 +thoughtsfriend 1 1 6.957497 6.957497 12107 +foeslinksa 1 1 6.957497 6.957497 12108 +tryalta 1 1 6.957497 6.957497 12109 +theinktomiresumein 1 1 6.957497 6.957497 12110 +htmlin 1 1 6.957497 6.957497 12111 +postscriptin 1 1 6.957497 6.957497 12112 +perfectin 1 1 6.957497 6.957497 12113 +asciith 1 1 6.957497 6.957497 12114 +wanna 1 1 6.957497 6.957497 12115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html new file mode 100644 index 00000000..0b378336 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vladimir^vladimir.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 4 380 0.693147 2.772588 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +professor 4 137 1.945910 7.783640 76 +like 2 132 1.945910 3.891820 81 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +number 2 130 2.079442 4.158884 97 +compil 1 122 2.079442 2.079442 96 +high 1 130 2.079442 2.079442 101 +find 2 111 2.197225 4.394450 111 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +part 1 98 2.302585 2.302585 129 +comment 1 93 2.397895 2.397895 146 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +member 1 84 2.484907 2.484907 165 +solut 1 82 2.484907 2.484907 162 +david 1 71 2.639057 2.639057 232 +differ 1 66 2.708050 2.708050 253 +prof 1 64 2.772589 2.772589 273 +function 1 62 2.772589 2.772589 275 +simpl 1 60 2.833213 2.833213 298 +particular 1 51 2.995732 2.995732 352 +friend 2 48 3.044522 6.089044 376 +might 1 41 3.218876 3.218876 426 +paul 1 38 3.295837 3.295837 471 +everi 2 34 3.401197 6.802394 519 +either 1 35 3.401197 3.401197 506 +seem 1 18 4.060443 4.060443 899 +matrix 1 17 4.110874 4.110874 933 +spars 1 16 4.174387 4.174387 989 +difficulti 3 13 4.382027 13.146081 1132 +guess 1 10 4.653960 4.653960 1443 +henri 1 10 4.653960 4.653960 1417 +said 2 9 4.753590 9.507180 1571 +govern 1 9 4.753590 4.753590 1581 +judg 1 8 4.875197 4.875197 1644 +cornellunivers 1 7 5.010635 5.010635 1916 +keshav 1 7 5.010635 5.010635 1852 +remind 1 7 5.010635 5.010635 1799 +encrypt 1 7 5.010635 5.010635 1835 +legal 1 6 5.164786 5.164786 2094 +privaci 1 6 5.164786 5.164786 2144 +strong 1 6 5.164786 5.164786 2029 +lawyer 4 4 5.568345 22.273380 2836 +pingali 1 4 5.568345 5.568345 2956 +bernoulli 1 4 5.568345 5.568345 2955 +stodghil 1 4 5.568345 5.568345 2864 +lord 1 4 5.568345 5.568345 2906 +wherea 1 4 5.568345 5.568345 2597 +functionof 1 2 6.263398 6.263398 5415 +todayth 1 2 6.263398 6.263398 5416 +vlad 1 1 6.957497 6.957497 12116 +pagevladimir 1 1 6.957497 6.957497 12117 +kotlyarvladimir 1 1 6.957497 6.957497 12118 +wereteach 1 1 6.957497 6.957497 12119 +andindu 1 1 6.957497 6.957497 12120 +kodukulapubl 1 1 6.957497 6.957497 12121 +kissing 1 1 6.957497 6.957497 12122 +profess 1 1 6.957497 6.957497 12123 +abritish 1 1 6.957497 6.957497 12124 +sveri 1 1 6.957497 6.957497 12125 +den 1 1 6.957497 6.957497 12126 +asolut 1 1 6.957497 6.957497 12127 +outpac 1 1 6.957497 6.957497 12128 +ofsolut 1 1 6.957497 6.957497 12129 +hardenough 1 1 6.957497 6.957497 12130 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html new file mode 100644 index 00000000..0e7deb64 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^vsm^home.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +address 2 170 1.791759 3.583518 62 +hall 1 146 1.945910 1.945910 65 +august 1 66 2.708050 2.708050 257 +ithaca 2 65 2.772589 5.545178 294 +mapl 1 11 4.553877 4.553877 1376 +rhode 1 9 4.753590 4.753590 1579 +vijai 1 4 5.568345 5.568345 2960 +menon 1 2 6.263398 6.263398 5249 +menonvijai 1 1 6.957497 6.957497 12131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html new file mode 100644 index 00000000..ca3fa69b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weichen^weichen.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +depart 3 457 0.693147 2.079441 12 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +academ 1 82 2.484907 2.484907 178 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +august 1 66 2.708050 2.708050 257 +third 1 43 3.178054 3.178054 412 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +computersci 1 30 3.555348 3.555348 562 +detect 1 26 3.688879 3.688879 646 +bookmark 1 26 3.688879 3.688879 639 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +chen 2 21 3.912023 7.824046 791 +beij 1 19 4.007333 4.007333 876 +failur 1 18 4.060443 4.060443 898 +tsinghua 1 13 4.382027 4.382027 1195 +soccer 1 8 4.875197 4.875197 1752 +spare 2 6 5.164786 10.329572 2177 +distributedsystem 1 6 5.164786 5.164786 2022 +membership 1 3 5.857933 5.857933 3751 +bachelorand 1 2 6.263398 6.263398 5128 +chinami 1 2 6.263398 6.263398 5129 +toueg 1 2 6.263398 6.263398 5339 +pagewei 1 1 6.957497 6.957497 12132 +weichen 1 1 6.957497 6.957497 12133 +inpartition 1 1 6.957497 6.957497 12134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html new file mode 100644 index 00000000..7816e7c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^weitsang^index.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +site 1 106 2.197225 2.197225 119 +homepag 1 93 2.397895 2.397895 148 +window 1 68 2.708050 2.708050 242 +movi 1 40 3.258097 3.258097 459 +newspap 1 12 4.465908 4.465908 1280 +weitsang 1 2 6.263398 6.263398 4088 +lwhere 1 1 6.957497 6.957497 12135 +fromwhat 1 1 6.957497 6.957497 12136 +watchwhat 1 1 6.957497 6.957497 12137 +likec 1 1 6.957497 6.957497 12138 +wrotepictur 1 1 6.957497 6.957497 12139 +drawa 1 1 6.957497 6.957497 12140 +motifcomput 1 1 6.957497 6.957497 12141 +theoryhom 1 1 6.957497 6.957497 12142 +vimi 1 1 6.957497 6.957497 12143 +tsearch 1 1 6.957497 6.957497 12144 +webcoolest 1 1 6.957497 6.957497 12145 +sitessharewar 1 1 6.957497 6.957497 12146 +archivem 1 1 6.957497 6.957497 12147 +onlineunivers 1 1 6.957497 6.957497 12148 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html new file mode 100644 index 00000000..1874b105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^whkao^whkao.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +graduat 2 215 1.386294 2.772588 31 +cornel 2 215 1.386294 2.772588 23 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +develop 1 174 1.791759 1.791759 53 +relat 1 139 1.945910 1.945910 68 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +final 1 116 2.197225 2.197225 108 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +academ 1 82 2.484907 2.484907 178 +resum 1 79 2.564949 2.564949 217 +nation 1 74 2.639057 2.639057 240 +multimedia 2 68 2.708050 5.416100 258 +window 1 68 2.708050 2.708050 242 +java 1 70 2.708050 2.708050 248 +ithaca 1 65 2.772589 2.772589 294 +plan 1 65 2.772589 2.772589 272 +result 1 65 2.772589 2.772589 281 +extens 1 53 2.944439 2.944439 340 +tabl 1 51 2.995732 2.995732 346 +telephon 1 50 3.044522 3.044522 373 +still 1 50 3.044522 3.044522 362 +understand 1 47 3.091042 3.091042 384 +favorit 2 44 3.135494 6.270988 410 +vision 1 41 3.218876 3.218876 430 +field 1 37 3.332205 3.332205 482 +photo 1 31 3.496508 3.496508 561 +except 1 28 3.610918 3.610918 607 +team 1 27 3.637586 3.637586 625 +background 1 25 3.737670 3.737670 664 +sport 1 25 3.737670 3.737670 683 +other 1 24 3.761200 3.761200 697 +tenni 2 20 3.951244 7.902488 838 +partial 1 18 4.060443 4.060443 900 +taiwan 1 16 4.174387 4.174387 1006 +drive 1 15 4.248495 4.248495 1052 +avenu 1 12 4.465908 4.465908 1277 +basketbal 1 12 4.465908 4.465908 1289 +danc 1 12 4.465908 4.465908 1278 +skill 1 12 4.465908 4.465908 1205 +calcul 1 12 4.465908 4.465908 1268 +meng 1 12 4.465908 4.465908 1214 +mapl 1 11 4.553877 4.553877 1376 +magic 1 11 4.553877 4.553877 1358 +player 1 11 4.553877 4.553877 1371 +market 1 11 4.553877 4.553877 1361 +swim 1 9 4.753590 4.753590 1599 +volleybal 1 9 4.753590 4.753590 1598 +rivl 1 8 4.875197 4.875197 1632 +job 1 8 4.875197 4.875197 1702 +morph 1 7 5.010635 5.010635 1937 +financi 1 6 5.164786 5.164786 2197 +sing 1 5 5.347108 5.347108 2499 +hung 2 3 5.857933 11.715866 3524 +habit 1 3 5.857933 5.857933 3777 +atlanta 1 3 5.857933 5.857933 3778 +bowl 1 2 6.263398 6.263398 5417 +orlando 1 2 6.263398 6.263398 5410 +glavin 2 1 6.957497 13.914994 12149 +billiard 1 1 6.957497 6.957497 12150 +brave 1 1 6.957497 6.957497 12151 +anferne 1 1 6.957497 6.957497 12152 +hardawai 1 1 6.957497 6.957497 12153 +warp 1 1 6.957497 6.957497 12154 +webpaint 1 1 6.957497 6.957497 12155 +whkao 1 1 6.957497 6.957497 12156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html new file mode 100644 index 00000000..cc1c2265 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^wwlee^wwlee.html @@ -0,0 +1,151 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 4 431 0.693147 2.772588 10 +system 3 443 0.693147 2.079441 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +cornel 2 215 1.386294 2.772588 23 +softwar 2 220 1.386294 2.772588 30 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +parallel 3 169 1.791759 5.375277 60 +network 2 168 1.791759 3.583518 61 +distribut 2 162 1.791759 3.583518 51 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +databas 2 122 2.079442 4.158884 86 +spring 2 131 2.079442 4.158884 88 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +present 1 91 2.397895 2.397895 145 +search 1 95 2.397895 2.397895 155 +school 3 84 2.484907 7.454721 188 +west 1 83 2.484907 2.484907 192 +optim 3 79 2.564949 7.694847 197 +master 1 76 2.564949 2.564949 216 +write 1 72 2.639057 2.639057 222 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +practic 1 70 2.708050 2.708050 246 +collect 2 65 2.772589 5.545178 268 +copi 1 63 2.772589 2.772589 284 +plai 1 60 2.833213 2.833213 307 +type 1 61 2.833213 2.833213 296 +reason 1 57 2.890372 2.890372 318 +week 1 52 2.995732 2.995732 343 +visitor 1 49 3.044522 3.044522 371 +cool 1 49 3.044522 3.044522 374 +could 1 46 3.091042 3.091042 383 +get 1 46 3.091042 3.091042 380 +even 1 45 3.135494 3.135494 393 +york 1 41 3.218876 3.218876 435 +probabl 2 40 3.258097 6.516194 455 +live 1 40 3.258097 3.258097 451 +seminar 1 38 3.295837 3.295837 470 +formal 1 37 3.332205 3.332205 478 +robot 1 36 3.367296 3.367296 497 +campu 1 27 3.637586 3.637586 623 +enjoi 1 26 3.688879 3.688879 660 +spent 1 25 3.737670 3.737670 676 +never 1 25 3.737670 3.737670 671 +william 2 22 3.850148 7.700296 765 +mpeg 3 20 3.951244 11.853732 831 +tenni 2 20 3.951244 7.902488 838 +minut 1 20 3.951244 3.951244 810 +wrote 1 20 3.951244 3.951244 830 +beauti 1 18 4.060443 4.060443 912 +weekli 1 17 4.110874 4.110874 919 +match 1 16 4.174387 4.174387 965 +practicum 1 16 4.174387 4.174387 960 +princeton 1 15 4.248495 4.248495 1042 +mellon 1 13 4.382027 4.382027 1179 +scienceat 1 11 4.553877 4.553877 1375 +systemsc 1 11 4.553877 4.553877 1293 +mapl 1 11 4.553877 4.553877 1376 +awai 1 10 4.653960 4.653960 1447 +guess 1 10 4.653960 4.653960 1443 +jersei 1 9 4.753590 4.753590 1587 +motorola 1 9 4.753590 4.753590 1546 +besid 1 8 4.875197 4.875197 1681 +partner 1 8 4.875197 4.875197 1648 +parti 1 8 4.875197 4.875197 1676 +on 1 8 4.875197 4.875197 1628 +south 2 6 5.164786 10.329572 2167 +piano 2 6 5.164786 10.329572 2201 +sleep 1 6 5.164786 5.164786 2211 +florida 3 5 5.347108 16.041324 2526 +compet 1 5 5.347108 5.347108 2462 +coral 1 5 5.347108 5.347108 2538 +quantifi 1 5 5.347108 5.347108 2525 +thrive 1 5 5.347108 5.347108 2257 +revolut 1 5 5.347108 5.347108 2315 +encod 3 4 5.568345 16.705035 2929 +somehow 1 4 5.568345 5.568345 2974 +essai 1 4 5.568345 5.568345 2948 +wart 1 4 5.568345 5.568345 2987 +classesc 2 3 5.857933 11.715866 3681 +exit 1 3 5.857933 5.857933 3124 +engineeringand 1 3 5.857933 5.857933 3779 +hpux 1 3 5.857933 5.857933 3780 +sector 1 3 5.857933 5.857933 3766 +cornellopoli 2 2 6.263398 12.526796 5157 +chopin 1 2 6.263398 6.263398 5358 +reject 1 2 6.263398 6.263398 5418 +techniquec 1 2 6.263398 6.263398 5158 +methodsc 1 2 6.263398 6.263398 5159 +colloquiumc 1 2 6.263398 6.263398 5160 +computingc 1 2 6.263398 6.263398 5216 +concerto 3 1 6.957497 20.872491 12157 +sciencefrom 1 1 6.957497 6.957497 12158 +carneig 1 1 6.957497 6.957497 12159 +didresearch 1 1 6.957497 6.957497 12160 +institu 1 1 6.957497 6.957497 12161 +xsro 1 1 6.957497 6.957497 12162 +atft 1 1 6.957497 6.957497 12163 +lauderdal 1 1 6.957497 6.957497 12164 +usta 1 1 6.957497 6.957497 12165 +tournment 1 1 6.957497 6.957497 12166 +faviorit 1 1 6.957497 6.957497 12167 +boca 1 1 6.957497 6.957497 12168 +ratonkei 1 1 6.957497 6.957497 12169 +beethoven 1 1 6.957497 6.957497 12170 +gershwin 1 1 6.957497 6.957497 12171 +liszt 1 1 6.957497 6.957497 12172 +mendelssohn 1 1 6.957497 6.957497 12173 +mozart 1 1 6.957497 6.957497 12174 +rachmaninoff 1 1 6.957497 6.957497 12175 +ravel 1 1 6.957497 6.957497 12176 +tchaikovski 1 1 6.957497 6.957497 12177 +violinconcerto 1 1 6.957497 6.957497 12178 +purifi 1 1 6.957497 6.957497 12179 +computerc 1 1 6.957497 6.957497 12180 +sectorcool 1 1 6.957497 6.957497 12181 +links_leap 1 1 6.957497 6.957497 12182 +frogski 1 1 6.957497 6.957497 12183 +serverident 1 1 6.957497 6.957497 12184 +crisi 1 1 6.957497 6.957497 12185 +testweath 1 1 6.957497 6.957497 12186 +undergroundinktomi 1 1 6.957497 6.957497 12187 +enginequest 1 1 6.957497 6.957497 12188 +archiveslast 1 1 6.957497 6.957497 12189 +ecithaca 1 1 6.957497 6.957497 12190 +wwlee 1 1 6.957497 6.957497 12191 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html new file mode 100644 index 00000000..05218895 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xichun^xichun.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +current 2 284 1.098612 2.197224 21 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +network 2 168 1.791759 3.583518 61 +hall 1 146 1.945910 1.945910 65 +welcom 2 122 2.079442 4.158884 99 +spring 1 131 2.079442 2.079442 88 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +site 1 106 2.197225 2.197225 119 +manag 1 114 2.197225 2.197225 125 +graphic 1 90 2.397895 2.397895 147 +school 1 84 2.484907 2.484907 188 +master 2 76 2.564949 5.129898 216 +upson 1 71 2.639057 2.639057 218 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +multimedia 1 68 2.708050 2.708050 258 +china 1 37 3.332205 3.332205 487 +taken 1 31 3.496508 3.496508 555 +universityithaca 1 24 3.761200 3.761200 710 +sciencecornel 1 22 3.850148 3.850148 768 +alumni 1 21 3.912023 3.912023 807 +bachelor 1 17 4.110874 4.110874 957 +edui 1 13 4.382027 4.382027 1193 +systemsc 1 11 4.553877 4.553877 1293 +capac 1 8 4.875197 4.875197 1740 +shade 2 7 5.010635 10.021270 1881 +atcornel 1 6 5.164786 5.164786 2131 +engineeringc 1 4 5.568345 5.568345 2904 +phong 1 2 6.263398 6.263398 4822 +xichun 3 1 6.957497 20.872491 12192 +zhejiang 3 1 6.957497 20.872491 12193 +jennif 2 1 6.957497 13.914994 12194 +hangzhou 1 1 6.957497 6.957497 12195 +javaworldsunhigh 1 1 6.957497 6.957497 12196 +alumnimeng 1 1 6.957497 6.957497 12197 +gouraud 1 1 6.957497 6.957497 12198 +systeme 1 1 6.957497 6.957497 12199 +communicationby 1 1 6.957497 6.957497 12200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html new file mode 100644 index 00000000..22cb19b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^xliu^home.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +topic 1 114 2.197225 2.197225 110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html new file mode 100644 index 00000000..4bbaac94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^cachet.html @@ -0,0 +1,81 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 12 374 0.693147 8.317764 7 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +languag 2 227 1.386294 2.772588 26 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +cornel 1 215 1.386294 1.386294 23 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +base 4 165 1.791759 7.167036 50 +relat 1 139 1.945910 1.945910 68 +analysi 3 124 2.079442 6.238326 98 +confer 1 126 2.079442 2.079442 100 +techniqu 1 99 2.302585 2.302585 138 +proceed 3 93 2.397895 7.193685 152 +select 1 91 2.397895 2.397895 154 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +june 1 79 2.564949 2.564949 214 +optim 1 79 2.564949 2.564949 197 +effici 3 73 2.639057 7.917171 233 +symposium 2 72 2.639057 5.278114 238 +knowledg 1 67 2.708050 2.708050 243 +improv 3 62 2.772589 8.317767 289 +interact 2 62 2.772589 5.545178 270 +written 1 63 2.772589 2.772589 278 +function 1 62 2.772589 2.772589 275 +result 1 65 2.772589 2.772589 281 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +juli 1 60 2.833213 2.833213 305 +februari 1 54 2.944439 2.944439 328 +principl 2 48 3.044522 6.089044 357 +approach 1 48 3.044522 3.044522 366 +california 1 46 3.091042 3.091042 388 +cach 1 41 3.218876 3.218876 432 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +societi 1 40 3.258097 3.258097 456 +transform 5 32 3.465736 17.328680 542 +semant 1 29 3.583519 3.583519 587 +manipul 1 27 3.637586 3.637586 624 +boston 1 19 4.007333 4.007333 862 +partial 1 18 4.060443 4.060443 900 +attribut 2 14 4.317488 8.634976 1092 +massachusett 1 14 4.317488 4.317488 1118 +deriv 2 13 4.382027 8.764054 1145 +sigplan 2 13 4.382027 8.764054 1190 +intermedi 1 9 4.753590 4.753590 1497 +discov 1 9 4.753590 4.753590 1562 +strength 1 9 4.753590 4.753590 1494 +reduct 1 7 5.010635 5.010635 1877 +increment 10 6 5.164786 51.647860 2206 +teitelbaum 3 6 5.164786 15.494358 2102 +sigact 1 6 5.164786 5.164786 2212 +florida 1 5 5.347108 5.347108 2526 +jolla 1 4 5.568345 5.568345 2988 +stoller 1 4 5.568345 5.568345 2866 +petersburg 1 4 5.568345 5.568345 2989 +systemat 2 3 5.857933 11.715866 3781 +beach 1 3 5.857933 5.857933 3782 +cachet 3 2 6.263398 18.790194 5419 +anni 2 2 6.263398 12.526796 5420 +auxiliari 1 2 6.263398 6.263398 5421 +yanhong 1 2 6.263398 6.263398 5422 +computationderiv 1 1 6.957497 6.957497 12201 +programsa 1 1 6.957497 6.957497 12202 +themeprogram 1 1 6.957497 6.957497 12203 +usessystemat 1 1 6.957497 6.957497 12204 +deriveincrement 1 1 6.957497 6.957497 12205 +peoplei 1 1 6.957497 6.957497 12206 +liutim 1 1 6.957497 6.957497 12207 +teitelbaumkeyword 1 1 6.957497 6.957497 12208 +cacheti 1 1 6.957497 6.957497 12209 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html new file mode 100644 index 00000000..18ca4db5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yanhong^index-postdoc.html @@ -0,0 +1,192 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 24 775 0.000000 0.000000 2 +scienc 11 640 0.000000 0.000000 4 +univers 9 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +program 26 374 0.693147 18.021822 7 +system 7 443 0.693147 4.852029 6 +depart 6 457 0.693147 4.158882 12 +research 5 431 0.693147 3.465735 10 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 8 215 1.386294 11.090352 23 +softwar 6 220 1.386294 8.317764 30 +gener 3 220 1.386294 4.158882 27 +languag 3 227 1.386294 4.158882 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +public 3 202 1.609438 4.828314 43 +updat 1 191 1.609438 1.609438 41 +base 7 165 1.791759 12.542313 50 +algorithm 3 162 1.791759 5.375277 57 +develop 3 174 1.791759 5.375277 53 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +problem 2 147 1.945910 3.891820 75 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +report 9 131 2.079442 18.714978 92 +confer 8 126 2.079442 16.635536 100 +analysi 2 124 2.079442 4.158884 98 +compil 1 122 2.079442 2.079442 96 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +intern 7 108 2.197225 15.380575 128 +world 1 115 2.197225 2.197225 126 +technic 7 100 2.302585 16.118095 140 +techniqu 4 99 2.302585 9.210340 138 +peopl 1 96 2.302585 2.302585 132 +proceed 7 93 2.397895 16.785265 152 +select 4 91 2.397895 9.591580 154 +center 3 88 2.397895 7.193685 158 +associ 2 93 2.397895 4.795790 151 +octob 2 89 2.397895 4.795790 156 +present 1 91 2.397895 2.397895 145 +novemb 3 81 2.484907 7.454721 179 +institut 2 84 2.484907 4.969814 187 +ieee 1 86 2.484907 2.484907 190 +build 1 85 2.484907 2.484907 184 +optim 2 79 2.564949 5.129898 197 +june 2 79 2.564949 5.129898 214 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +effici 5 73 2.639057 13.195285 233 +symposium 5 72 2.639057 13.195285 238 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +upson 1 71 2.639057 2.639057 218 +html 1 75 2.639057 2.639057 235 +august 5 66 2.708050 13.540250 257 +knowledg 3 67 2.708050 8.124150 243 +test 1 66 2.708050 2.708050 252 +improv 5 62 2.772589 13.862945 289 +ithaca 5 65 2.772589 13.862945 294 +result 5 65 2.772589 13.862945 281 +januari 3 62 2.772589 8.317767 264 +evalu 3 64 2.772589 8.317767 266 +interact 2 62 2.772589 5.545178 270 +descript 1 64 2.772589 2.772589 271 +septemb 1 65 2.772589 2.772589 274 +laboratori 1 63 2.772589 2.772589 292 +juli 6 60 2.833213 16.999278 305 +march 2 61 2.833213 5.666426 295 +automat 1 61 2.833213 2.833213 306 +reason 2 57 2.890372 5.780744 318 +publish 1 57 2.890372 2.890372 326 +explor 1 58 2.890372 2.890372 324 +februari 1 54 2.944439 2.944439 328 +talk 1 53 2.944439 2.944439 336 +profession 1 51 2.995732 2.995732 345 +approach 3 48 3.044522 9.133566 366 +principl 3 48 3.044522 9.133566 357 +california 3 46 3.091042 9.273126 388 +move 1 47 3.091042 3.091042 382 +york 7 41 3.218876 22.532132 435 +cach 5 41 3.218876 16.094380 432 +press 3 42 3.218876 9.656628 419 +combin 3 42 3.218876 9.656628 421 +annual 2 40 3.258097 6.516194 458 +societi 1 40 3.258097 3.258097 456 +map 1 39 3.258097 3.258097 452 +author 1 39 3.258097 3.258097 450 +seminar 1 38 3.295837 3.295837 470 +china 5 37 3.332205 16.661025 487 +formal 1 37 3.332205 3.332205 478 +multi 2 36 3.367296 6.734592 493 +post 1 35 3.401197 3.401197 505 +concurr 1 34 3.401197 3.401197 501 +survei 1 35 3.401197 3.401197 513 +manual 1 35 3.401197 3.401197 504 +obtain 1 33 3.433987 3.433987 534 +transform 6 32 3.465736 20.794416 542 +dissert 1 32 3.465736 3.465736 549 +scientist 4 31 3.496508 13.986032 560 +compon 1 30 3.555348 3.555348 570 +semant 3 29 3.583519 10.750557 587 +manipul 2 27 3.637586 7.275172 624 +revis 1 26 3.688879 3.688879 640 +doctor 1 24 3.761200 3.761200 709 +universityithaca 1 24 3.761200 3.761200 710 +wang 3 21 3.912023 11.736069 790 +hous 1 21 3.912023 3.912023 801 +expert 2 20 3.951244 7.902488 833 +break 1 20 3.951244 3.951244 812 +department 1 20 3.951244 3.951244 839 +beij 6 19 4.007333 24.043998 876 +boston 2 19 4.007333 8.014666 862 +partial 2 18 4.060443 8.120886 900 +germani 1 17 4.110874 4.110874 946 +young 5 16 4.174387 20.871935 991 +zhang 3 16 4.174387 12.523161 980 +partit 1 16 4.174387 4.174387 984 +attribut 2 14 4.317488 8.634976 1092 +massachusett 2 14 4.317488 8.634976 1118 +deriv 11 13 4.382027 48.202297 1145 +sigplan 3 13 4.382027 13.146081 1190 +tsinghua 1 13 4.382027 4.382027 1195 +huang 1 12 4.465908 4.465908 1202 +qualit 2 11 4.553877 9.107754 1362 +song 1 11 4.553877 4.553877 1380 +ofcomput 1 10 4.653960 4.653960 1442 +intermedi 5 9 4.753590 23.767950 1497 +discov 2 9 4.753590 9.507180 1562 +factor 2 9 4.753590 9.507180 1544 +mainten 1 9 4.753590 4.753590 1543 +congress 1 9 4.753590 4.753590 1592 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +compos 1 9 4.753590 4.753590 1527 +quantit 2 8 4.875197 9.750394 1654 +xerox 2 8 4.875197 9.750394 1725 +hallcornel 1 8 4.875197 4.875197 1757 +refere 1 7 5.010635 5.010635 1895 +uncertainti 1 7 5.010635 5.010635 1882 +increment 22 6 5.164786 113.625292 2206 +teitelbaum 8 6 5.164786 41.318288 2102 +sigact 2 6 5.164786 10.329572 2212 +indiana 2 6 5.164786 10.329572 2057 +usag 1 6 5.164786 5.164786 2209 +webster 4 5 5.347108 21.388432 2468 +florida 2 5 5.347108 10.694216 2526 +peke 1 5 5.347108 5.347108 2539 +petersburg 2 4 5.568345 11.136690 2989 +jolla 2 4 5.568345 11.136690 2988 +dagstuhl 2 4 5.568345 11.136690 2871 +stoller 1 4 5.568345 5.568345 2866 +kestrel 1 4 5.568345 5.568345 2990 +systemat 7 3 5.857933 41.005531 3781 +beach 2 3 5.857933 11.715866 3782 +schloss 1 3 5.857933 5.857933 3727 +tocomput 1 3 5.857933 5.857933 3162 +yanhong 3 2 6.263398 18.790194 5422 +cachet 3 2 6.263398 18.790194 5419 +anni 2 2 6.263398 12.526796 5420 +auxiliari 2 2 6.263398 12.526796 5421 +fuzzi 1 2 6.263398 6.263398 5423 +eduhttp 1 2 6.263398 6.263398 5424 +pageyanhong 1 1 6.957497 6.957497 12210 +forincrement 1 1 6.957497 6.957497 12211 +interactivesystem 1 1 6.957497 6.957497 12212 +systemorgan 1 1 6.957497 6.957497 12213 +talksph 1 1 6.957497 6.957497 12214 +basedsystemat 1 1 6.957497 6.957497 12215 +abstractjourn 1 1 6.957497 6.957497 12216 +inexact 1 1 6.957497 6.957497 12217 +wakayama 1 1 6.957497 6.957497 12218 +oggeb 1 1 6.957497 6.957497 12219 +basin 1 1 6.957497 6.957497 12220 +ri 1 1 6.957497 6.957497 12221 +tshinghua 1 1 6.957497 6.957497 12222 +lindlei 1 1 6.957497 6.957497 12223 +hallindiana 1 1 6.957497 6.957497 12224 +universitybloomington 1 1 6.957497 6.957497 12225 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html new file mode 100644 index 00000000..23a66bd9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychuang^ychuang.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +student 1 343 1.098612 1.098612 19 +cornel 4 215 1.386294 5.545176 23 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +hall 1 146 1.945910 1.945910 65 +upson 1 71 2.639057 2.639057 218 +onlin 1 75 2.639057 2.639057 223 +ithaca 1 65 2.772589 2.772589 294 +favorit 1 44 3.135494 3.135494 410 +edui 1 13 4.382027 4.382027 1193 +huang 2 12 4.465908 8.931816 1202 +cheng 3 10 4.653960 13.961880 1381 +ychuang 2 3 5.857933 11.715866 3093 +huangyi 1 1 6.957497 6.957497 12226 +documentscoursesprojectaccess 1 1 6.957497 6.957497 12227 +byvisitorslast 1 1 6.957497 6.957497 12228 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html new file mode 100644 index 00000000..a38bfdfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^ychung^ychung.html @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +email 2 220 1.386294 2.772588 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +like 5 132 1.945910 9.729550 81 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +construct 1 139 1.945910 1.945910 82 +high 4 130 2.079442 8.317768 101 +studi 2 120 2.079442 4.158884 91 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +version 1 113 2.197225 2.197225 122 +person 1 111 2.197225 2.197225 117 +search 2 95 2.397895 4.795790 155 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +school 7 84 2.484907 17.394349 188 +chang 2 82 2.484907 4.969814 163 +thing 1 84 2.484907 2.484907 189 +wide 1 84 2.484907 2.484907 185 +novemb 1 81 2.484907 2.484907 179 +april 2 77 2.564949 5.129898 196 +master 2 76 2.564949 5.129898 216 +resum 2 79 2.564949 5.129898 217 +want 1 79 2.564949 2.564949 199 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +java 3 70 2.708050 8.124150 248 +degre 2 69 2.708050 5.416100 259 +would 2 67 2.708050 5.416100 251 +plai 2 60 2.833213 5.666426 307 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +still 1 50 3.044522 3.044522 362 +better 1 45 3.135494 3.135494 401 +music 4 42 3.218876 12.875504 436 +programm 1 39 3.258097 3.258097 445 +origin 1 38 3.295837 3.295837 472 +word 1 34 3.401197 3.401197 508 +kind 1 32 3.465736 3.465736 541 +titl 1 31 3.496508 3.496508 556 +actual 1 28 3.610918 3.610918 604 +brows 1 23 3.806662 3.806662 726 +sciencecornel 1 22 3.850148 3.850148 768 +love 1 21 3.912023 3.912023 804 +wonder 1 20 3.951244 3.951244 815 +beauti 1 18 4.060443 4.060443 912 +listen 1 18 4.060443 4.060443 907 +women 1 16 4.174387 4.174387 1004 +georg 1 16 4.174387 4.174387 994 +classic 1 14 4.317488 4.317488 1084 +came 3 13 4.382027 13.146081 1197 +forth 1 13 4.382027 4.382027 1186 +went 1 12 4.465908 4.465908 1279 +meng 1 12 4.465908 4.465908 1214 +america 2 11 4.553877 9.107754 1370 +pagewelcom 1 11 4.553877 4.553877 1344 +moment 1 11 4.553877 4.553877 1379 +virginia 2 8 4.875197 9.750394 1659 +chung 1 7 5.010635 5.010635 1964 +elementari 1 7 5.010635 5.010635 1825 +marri 1 7 5.010635 5.010635 1946 +perfect 1 7 5.010635 5.010635 1921 +piano 2 6 5.164786 10.329572 2201 +sung 1 6 5.164786 5.164786 2075 +emerg 1 6 5.164786 5.164786 2038 +junior 2 5 5.347108 10.694216 2519 +everybodi 1 5 5.347108 5.347108 2517 +korea 4 4 5.568345 22.273380 2971 +keyboard 2 4 5.568345 11.136690 2970 +moon 1 4 5.568345 5.568345 2991 +hire 1 4 5.568345 5.568345 2976 +seoul 1 3 5.857933 5.857933 3783 +forward 1 3 5.857933 5.857933 3784 +korean 2 2 6.263398 12.526796 5354 +sang 1 2 6.263398 6.263398 5356 +kang 1 2 6.263398 6.263398 5360 +mason 1 2 6.263398 6.263398 4916 +infom 1 2 6.263398 6.263398 5425 +ilbo 3 1 6.957497 20.872491 12229 +myoung 2 1 6.957497 13.914994 12230 +husband 2 1 6.957497 13.914994 12231 +chungyou 1 1 6.957497 6.957497 12232 +thvisitor 1 1 6.957497 6.957497 12233 +universitywher 1 1 6.957497 6.957497 12234 +kindergarten 1 1 6.957497 6.957497 12235 +universityin 1 1 6.957497 6.957497 12236 +happiest 1 1 6.957497 6.957497 12237 +forsaic 1 1 6.957497 6.957497 12238 +shin 1 1 6.957497 6.957497 12239 +seung 1 1 6.957497 6.957497 12240 +hoon 1 1 6.957497 6.957497 12241 +newpap 1 1 6.957497 6.957497 12242 +hangook 1 1 6.957497 6.957497 12243 +chosun 1 1 6.957497 6.957497 12244 +joongang 1 1 6.957497 6.957497 12245 +appletyoosun 1 1 6.957497 6.957497 12246 +triphamm 1 1 6.957497 6.957497 12247 +sbithaca 1 1 6.957497 6.957497 12248 +ychung 1 1 6.957497 6.957497 12249 +yooschung 1 1 6.957497 6.957497 12250 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html new file mode 100644 index 00000000..551746e5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yminsky^yminsky.html @@ -0,0 +1,222 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 3 293 1.098612 3.295836 17 +student 2 343 1.098612 2.197224 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +cornel 3 215 1.386294 4.158882 23 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +public 3 202 1.609438 4.828314 43 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +hour 1 165 1.791759 1.791759 46 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +place 2 106 2.197225 4.394450 124 +make 2 111 2.197225 4.394450 120 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +version 1 113 2.197225 2.197225 122 +take 3 97 2.302585 6.907755 134 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +center 1 88 2.397895 2.397895 158 +follow 1 92 2.397895 2.397895 143 +comment 1 93 2.397895 2.397895 146 +school 2 84 2.484907 4.969814 188 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +internet 1 83 2.484907 2.484907 186 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +good 4 77 2.564949 10.259796 200 +know 3 80 2.564949 7.694847 198 +want 1 79 2.564949 2.564949 199 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +upson 1 71 2.639057 2.639057 218 +free 1 73 2.639057 2.639057 224 +order 1 69 2.708050 2.708050 249 +ithaca 2 65 2.772589 5.545178 294 +virtual 1 62 2.772589 2.772589 285 +plai 3 60 2.833213 8.499639 307 +simpl 1 60 2.833213 2.833213 298 +unix 1 58 2.890372 2.890372 308 +direct 1 57 2.890372 2.890372 316 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +much 3 52 2.995732 8.987196 349 +particular 1 51 2.995732 2.995732 352 +date 1 51 2.995732 2.995732 344 +run 1 51 2.995732 2.995732 347 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +right 1 48 3.044522 3.044522 363 +effect 1 46 3.091042 3.091042 385 +better 2 45 3.135494 6.270988 401 +execut 1 45 3.135494 3.135494 404 +howev 1 41 3.218876 3.218876 422 +york 1 41 3.218876 3.218876 435 +compani 1 41 3.218876 3.218876 423 +live 3 40 3.258097 9.774291 451 +movi 3 40 3.258097 9.774291 459 +microsoft 1 38 3.295837 3.295837 468 +expect 1 37 3.332205 3.332205 484 +game 2 36 3.367296 6.734592 498 +download 1 36 3.367296 3.367296 489 +everi 2 34 3.401197 6.802394 519 +michael 1 35 3.401197 3.401197 514 +toler 2 33 3.433987 6.867974 533 +go 1 33 3.433987 3.433987 529 +within 1 33 3.433987 3.433987 525 +often 1 31 3.496508 3.496508 551 +someth 1 31 3.496508 3.496508 554 +computersci 1 30 3.555348 3.555348 562 +focus 1 29 3.583519 3.583519 584 +hope 1 28 3.610918 3.610918 610 +great 5 27 3.637586 18.187930 626 +linux 2 27 3.637586 7.275172 631 +though 1 27 3.637586 3.637586 622 +rule 1 26 3.688879 3.688879 638 +compar 1 26 3.688879 3.688879 648 +notic 1 25 3.737670 3.737670 675 +strategi 1 25 3.737670 3.737670 682 +other 1 24 3.761200 3.761200 697 +yahoo 1 24 3.761200 3.761200 707 +magazin 1 24 3.761200 3.761200 704 +highli 1 23 3.806662 3.806662 725 +begin 1 23 3.806662 3.806662 716 +recommend 1 22 3.850148 3.850148 737 +almost 1 22 3.850148 3.850148 742 +instal 1 22 3.850148 3.850148 754 +love 2 21 3.912023 7.824046 804 +longer 2 20 3.951244 7.902488 816 +agent 1 18 4.060443 4.060443 910 +medic 2 17 4.110874 8.221748 958 +attempt 1 17 4.110874 4.110874 917 +intro 1 17 4.110874 4.110874 915 +thought 1 17 4.110874 4.110874 945 +easi 2 16 4.174387 8.348774 969 +critic 1 16 4.174387 4.174387 982 +rate 2 15 4.248495 8.496990 1037 +save 1 14 4.317488 4.317488 1099 +block 2 13 4.382027 8.764054 1183 +front 1 13 4.382027 4.382027 1154 +wife 1 13 4.382027 4.382027 1196 +brother 1 13 4.382027 4.382027 1189 +emac 1 13 4.382027 4.382027 1143 +uniqu 1 12 4.465908 4.465908 1228 +town 1 10 4.653960 4.653960 1458 +yellow 2 9 4.753590 9.507180 1601 +seven 1 9 4.753590 4.753590 1561 +crash 1 8 4.875197 4.875197 1616 +satisfi 1 8 4.875197 4.875197 1694 +qualifi 1 8 4.875197 4.875197 1721 +contrast 1 8 4.875197 4.875197 1637 +fail 1 8 4.875197 4.875197 1655 +slightli 1 7 5.010635 5.010635 1795 +marri 1 7 5.010635 5.010635 1946 +perfect 1 7 5.010635 5.010635 1921 +bookstor 1 7 5.010635 5.010635 1837 +tri 2 6 5.164786 10.329572 2166 +seen 1 6 5.164786 5.164786 2202 +impress 1 6 5.164786 5.164786 2096 +plu 1 6 5.164786 5.164786 2004 +slate 1 6 5.164786 5.164786 2021 +suni 1 5 5.347108 5.347108 2452 +twenti 1 5 5.347108 5.347108 2540 +fairli 1 5 5.347108 5.347108 2322 +hate 1 5 5.347108 5.347108 2529 +solid 1 5 5.347108 5.347108 2255 +webpag 1 4 5.568345 5.568345 2660 +complic 1 4 5.568345 5.568345 2902 +trivial 1 4 5.568345 5.568345 2786 +closest 1 4 5.568345 5.568345 2828 +syracus 2 3 5.857933 11.715866 3553 +edudepart 1 3 5.857933 5.857933 3302 +forfault 1 3 5.857933 5.857933 3748 +outof 1 3 5.857933 5.857933 3296 +nota 1 3 5.857933 5.857933 3785 +newli 1 3 5.857933 5.857933 3786 +health 1 3 5.857933 5.857933 3787 +advertis 1 3 5.857933 5.857933 3788 +yaron 2 2 6.263398 12.526796 4122 +minski 2 2 6.263398 12.526796 4123 +veggi 1 2 6.263398 6.263398 5426 +coop 1 2 6.263398 6.263398 4213 +nowadai 1 2 6.263398 6.263398 5376 +lisa 1 2 6.263398 6.263398 5427 +theidea 1 2 6.263398 6.263398 5428 +resumesom 1 2 6.263398 6.263398 5186 +miser 1 2 6.263398 6.263398 5359 +admit 1 2 6.263398 6.263398 5429 +amazon 1 2 6.263398 6.263398 5193 +flapdragon 2 1 6.957497 13.914994 12251 +yminski 1 1 6.957497 6.957497 12252 +comstock 1 1 6.957497 6.957497 12253 +onfault 1 1 6.957497 6.957497 12254 +thetacoma 1 1 6.957497 6.957497 12255 +livether 1 1 6.957497 6.957497 12256 +anopen 1 1 6.957497 6.957497 12257 +recommendit 1 1 6.957497 6.957497 12258 +ancientchines 1 1 6.957497 6.957497 12259 +extremlysimpl 1 1 6.957497 6.957497 12260 +thannoth 1 1 6.957497 6.957497 12261 +cgoban 1 1 6.957497 6.957497 12262 +nicest 1 1 6.957497 6.957497 12263 +goboard 1 1 6.957497 6.957497 12264 +thenet 1 1 6.957497 6.957497 12265 +minutesof 1 1 6.957497 6.957497 12266 +favoritepoem 1 1 6.957497 6.957497 12267 +lafiglia 1 1 6.957497 6.957497 12268 +piang 1 1 6.957497 6.957497 12269 +advicefor 1 1 6.957497 6.957497 12270 +interestinglink 1 1 6.957497 6.957497 12271 +alarmingli 1 1 6.957497 6.957497 12272 +firefli 1 1 6.957497 6.957497 12273 +bakeri 1 1 6.957497 6.957497 12274 +bigbook 1 1 6.957497 6.957497 12275 +bigyellow 1 1 6.957497 6.957497 12276 +kinslei 1 1 6.957497 6.957497 12277 +discount 1 1 6.957497 6.957497 12278 +booksel 1 1 6.957497 6.957497 12279 +mailcrypt 1 1 6.957497 6.957497 12280 +interfacemqbtazgjohoaaaedalfhlgjmdg 1 1 6.957497 6.957497 12281 +vhtnclpaifbwdcotmhzismfgcekuiegnpqqswfzbjwxdtyftcgilgjqvea 1 1 6.957497 6.957497 12282 +rbylf 1 1 6.957497 6.957497 12283 +zwqujcioczoecv 1 1 6.957497 6.957497 12284 +eykbnyxtncqafebqowwfybgtsgtwluctidxbwluctqgnzlmnvcmlbgwuzwrpokadqmfedgjohoykbnyxtncqebmsc 1 1 6.957497 6.957497 12285 +gkgarsokrinnoazihja 1 1 6.957497 6.957497 12286 +gcjsajjxnoertfeylbvhpnjtsweabogzkxaixpnhv 1 1 6.957497 6.957497 12287 +wumjgzsnvispwkrvzgdrojswmc 1 1 6.957497 6.957497 12288 +eigsqsb 1 1 6.957497 6.957497 12289 +bsbpw 1 1 6.957497 6.957497 12290 +jcwz 1 1 6.957497 6.957497 12291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html new file mode 100644 index 00000000..7950458b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^People^yuichi^yuichi.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +last 1 314 1.098612 1.098612 14 +cornel 3 215 1.386294 4.158882 23 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +introduct 2 126 2.079442 4.158884 87 +compil 2 122 2.079442 4.158884 96 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +welcom 1 122 2.079442 2.079442 99 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +peopl 2 96 2.302585 4.605170 132 +info 2 85 2.484907 4.969814 176 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +resum 1 79 2.564949 2.564949 217 +html 1 75 2.639057 2.639057 235 +artifici 2 63 2.772589 5.545178 280 +foundat 1 62 2.772589 2.772589 286 +semest 1 58 2.890372 2.890372 312 +format 2 48 3.044522 6.089044 356 +http 1 41 3.218876 3.218876 420 +unit 1 21 3.912023 3.912023 779 +modif 1 17 4.110874 4.110874 913 +practicum 2 16 4.174387 8.348774 960 +systemsc 1 11 4.553877 4.553877 1293 +pagecours 1 5 5.347108 5.347108 2395 +intelligencec 2 4 5.568345 11.136690 2673 +visionfal 1 2 6.263398 6.263398 4749 +eduhttp 1 2 6.263398 6.263398 5424 +yuichi 5 1 6.957497 34.787485 12292 +tsuchimoto 3 1 6.957497 20.872491 12293 +translatorsc 2 1 6.957497 13.914994 12294 +pageyuichi 1 1 6.957497 6.957497 12295 +workfal 1 1 6.957497 6.957497 12296 +engineeringspr 1 1 6.957497 6.957497 12297 +computingi 1 1 6.957497 6.957497 12298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ new file mode 100644 index 00000000..f8cd43cd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^CAM^ @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +us 5 329 1.098612 5.493060 16 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +design 5 213 1.386294 6.931470 25 +cornel 3 215 1.386294 4.158882 23 +softwar 3 220 1.386294 4.158882 30 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +paper 7 205 1.609438 11.266066 38 +includ 2 208 1.609438 3.218876 42 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +implement 8 152 1.791759 14.334072 52 +network 7 168 1.791759 12.542313 61 +read 3 154 1.791759 5.375277 47 +avail 2 169 1.791759 3.583518 48 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +perform 5 143 1.945910 9.729550 74 +architectur 4 139 1.945910 7.783640 77 +first 2 140 1.945910 3.891820 71 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +machin 6 129 2.079442 12.476652 95 +high 4 130 2.079442 8.317768 101 +report 2 131 2.079442 4.158884 92 +document 1 121 2.079442 2.079442 89 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +version 5 113 2.197225 10.986125 122 +pleas 3 113 2.197225 6.591675 114 +specif 2 106 2.197225 4.394450 106 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +well 1 109 2.197225 2.197225 121 +part 3 98 2.302585 6.907755 129 +technic 2 100 2.302585 4.605170 140 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +commun 11 95 2.397895 26.376845 157 +select 2 91 2.397895 4.795790 154 +present 2 91 2.397895 4.795790 145 +proceed 1 93 2.397895 2.397895 152 +activ 19 84 2.484907 47.213233 182 +chang 4 82 2.484907 9.939628 163 +novemb 2 81 2.484907 4.969814 179 +build 1 85 2.484907 2.484907 184 +ieee 1 86 2.484907 2.484907 190 +level 1 87 2.484907 2.484907 180 +control 1 82 2.484907 2.484907 164 +larg 1 82 2.484907 2.484907 168 +messag 20 76 2.564949 51.298980 212 +know 2 80 2.564949 5.129898 198 +appear 2 78 2.564949 5.129898 210 +interfac 2 79 2.564949 5.129898 209 +sourc 1 77 2.564949 2.564949 201 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +state 1 76 2.564949 2.564949 207 +dynam 1 76 2.564949 2.564949 194 +order 2 69 2.708050 5.416100 249 +differ 1 66 2.708050 2.708050 253 +integr 1 67 2.708050 2.708050 245 +abstract 3 62 2.772589 8.317767 276 +evalu 2 64 2.772589 5.545178 266 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +share 1 59 2.833213 2.833213 304 +detail 2 57 2.890372 5.780744 321 +major 1 56 2.890372 2.890372 315 +direct 1 57 2.890372 2.890372 316 +think 1 57 2.890372 2.890372 314 +thesi 1 57 2.890372 2.890372 327 +processor 4 54 2.944439 11.777756 335 +allow 3 53 2.944439 8.833317 333 +februari 1 54 2.944439 2.944439 328 +talk 1 53 2.944439 2.944439 336 +investig 1 51 2.995732 2.995732 353 +hardwar 1 51 2.995732 2.995732 350 +standard 3 48 3.044522 9.133566 365 +without 2 50 3.044522 6.089044 370 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +california 1 46 3.091042 3.091042 388 +describ 4 45 3.135494 12.541976 400 +made 1 44 3.135494 3.135494 398 +even 1 45 3.135494 3.135494 393 +show 4 43 3.178054 12.712216 417 +mechan 3 43 3.178054 9.534162 416 +offer 1 43 3.178054 3.178054 414 +fast 1 42 3.218876 3.218876 429 +examin 1 42 3.218876 3.218876 424 +slide 1 38 3.295837 3.295837 467 +prototyp 1 38 3.295837 3.295837 463 +cost 4 37 3.332205 13.328820 480 +workstat 3 37 3.332205 9.996615 479 +connect 1 37 3.332205 3.332205 485 +word 1 34 3.401197 3.401197 508 +toler 1 33 3.433987 3.433987 533 +concept 1 32 3.465736 3.465736 537 +someth 1 31 3.496508 3.496508 554 +power 3 30 3.555348 10.666044 573 +compon 1 30 3.555348 3.555348 570 +exist 1 30 3.555348 3.555348 569 +rang 1 30 3.555348 3.555348 565 +releas 4 28 3.610918 14.443672 616 +cluster 4 28 3.610918 14.443672 612 +multiprocessor 4 28 3.610918 14.443672 605 +pass 2 28 3.610918 7.221836 611 +packag 1 28 3.610918 3.610918 614 +intend 1 28 3.610918 3.610918 599 +progress 1 28 3.610918 3.610918 598 +propos 1 28 3.610918 3.610918 602 +measur 1 28 3.610918 3.610918 609 +scale 1 28 3.610918 3.610918 613 +becom 1 28 3.610918 3.610918 603 +though 1 27 3.637586 3.637586 622 +compar 3 26 3.688879 11.066637 648 +berkelei 2 26 3.688879 7.377758 657 +altern 1 26 3.688879 3.688879 641 +challeng 1 26 3.688879 3.688879 653 +reliabl 1 25 3.737670 3.737670 674 +concern 1 25 3.737670 3.737670 666 +demonstr 2 24 3.761200 7.522400 694 +higher 1 24 3.761200 3.761200 690 +magazin 1 24 3.761200 3.761200 704 +flow 1 24 3.761200 3.761200 700 +reduc 2 22 3.850148 7.700296 759 +instal 1 22 3.850148 3.850148 754 +defin 1 22 3.850148 3.850148 746 +varieti 1 22 3.850148 3.850148 740 +flexibl 1 21 3.912023 3.912023 792 +portabl 1 20 3.951244 3.951244 819 +benchmark 2 19 4.007333 8.014666 859 +comparison 1 19 4.007333 4.007333 863 +lower 1 18 4.060443 4.060443 886 +layer 5 17 4.110874 20.554370 926 +interconnect 2 17 4.110874 8.221748 937 +whether 1 17 4.110874 4.110874 918 +protect 1 17 4.110874 4.110874 935 +outlin 1 17 4.110874 4.110874 914 +latenc 8 16 4.174387 33.395096 993 +commerci 1 16 4.174387 4.174387 1005 +across 1 16 4.174387 4.174387 974 +overhead 5 15 4.248495 21.242475 1035 +driven 2 15 4.248495 8.496990 1048 +micro 1 15 4.248495 4.248495 1031 +split 5 14 4.317488 21.587440 1078 +trip 2 14 4.317488 8.634976 1113 +achiev 1 14 4.317488 4.317488 1088 +eicken 7 13 4.382027 30.674189 1134 +thorsten 4 13 4.382027 17.528108 1133 +block 1 13 4.382027 4.382027 1183 +directli 1 13 4.382027 4.382027 1141 +signific 1 13 4.382027 4.382027 1125 +earlier 1 13 4.382027 4.382027 1140 +carri 1 13 4.382027 4.382027 1152 +coordin 1 13 4.382027 4.382027 1182 +introduc 1 13 4.382027 4.382027 1139 +characterist 2 12 4.465908 8.931816 1257 +onth 1 12 4.465908 4.465908 1218 +buffer 1 12 4.465908 4.465908 1211 +bandwidth 3 11 4.553877 13.661631 1365 +chri 1 11 4.553877 4.553877 1311 +primit 1 11 4.553877 4.553877 1317 +underli 1 10 4.653960 4.653960 1410 +equip 1 10 4.653960 4.653960 1459 +equival 1 9 4.753590 4.753590 1496 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +transmiss 1 9 4.753590 4.753590 1588 +significantli 1 9 4.753590 4.753590 1508 +desir 1 9 4.753590 4.753590 1542 +chao 3 8 4.875197 14.625591 1753 +spec 2 8 4.875197 9.750394 1640 +round 2 8 4.875197 9.750394 1769 +readm 1 8 4.875197 4.875197 1699 +ring 1 8 4.875197 4.875197 1684 +vineet 1 8 4.875197 4.875197 1639 +andcomput 1 8 4.875197 4.875197 1623 +gold 1 8 4.875197 4.875197 1745 +coast 1 8 4.875197 4.875197 1746 +poor 1 8 4.875197 4.875197 1736 +pittsburgh 1 7 5.010635 5.010635 1938 +larger 1 7 5.010635 5.010635 1875 +suffici 1 7 5.010635 5.010635 1897 +freeli 1 6 5.164786 5.164786 2014 +lack 1 6 5.164786 5.164786 1994 +affect 1 6 5.164786 5.164786 2044 +goldstein 1 6 5.164786 5.164786 2168 +phase 1 6 5.164786 5.164786 1977 +older 1 5 5.347108 5.347108 2387 +buch 1 5 5.347108 5.347108 2272 +culler 1 5 5.347108 5.347108 2381 +symp 1 5 5.347108 5.347108 2376 +australia 1 5 5.347108 5.347108 2478 +grzegorz 3 4 5.568345 16.705035 2923 +czajkowski 3 4 5.568345 16.705035 2924 +conform 2 4 5.568345 11.136690 2941 +theus 1 4 5.568345 5.568345 2992 +throughput 1 4 5.568345 5.568345 2993 +andevalu 1 4 5.568345 5.568345 2706 +asymptot 1 4 5.568345 5.568345 2676 +basu 1 4 5.568345 5.568345 2843 +forparallel 1 4 5.568345 5.568345 2703 +mpp 2 3 5.857933 11.715866 3194 +neta 1 3 5.857933 5.857933 3789 +thegener 1 3 5.857933 5.857933 3648 +moreinform 1 3 5.857933 5.857933 3307 +let 1 3 5.857933 5.857933 3790 +avula 1 3 5.857933 5.857933 3600 +abridg 1 3 5.857933 5.857933 3772 +magnitud 1 3 5.857933 5.857933 3582 +roughli 1 3 5.857933 5.857933 3097 +schauser 1 3 5.857933 5.857933 3599 +tremend 1 3 5.857933 5.857933 3453 +thegam 1 2 6.263398 6.263398 5430 +differencebetween 1 2 6.263398 6.263398 5431 +pleaseclick 1 2 6.263398 6.263398 5432 +messageslow 1 2 6.263398 6.263398 5040 +meiko 1 2 6.263398 6.263398 4996 +focuseson 1 2 6.263398 6.263398 5433 +veena 1 2 6.263398 6.263398 5000 +thecommun 1 2 6.263398 6.263398 4928 +thesetechniqu 1 2 6.263398 6.263398 4263 +thenetwork 1 2 6.263398 6.263398 5434 +incommun 1 2 6.263398 6.263398 4349 +microsecond 1 2 6.263398 6.263398 5435 +tominim 1 2 6.263398 6.263398 5436 +unnecessarili 1 2 6.263398 6.263398 4688 +mainstream 1 2 6.263398 6.263398 5437 +contactthorsten 1 2 6.263398 6.263398 5438 +activemessag 3 1 6.957497 20.872491 12299 +secondpart 2 1 6.957497 13.914994 12300 +messagescornel 1 1 6.957497 6.957497 12301 +implementationsact 1 1 6.957497 6.957497 12302 +codereleas 1 1 6.957497 6.957497 12303 +instructionson 1 1 6.957497 6.957497 12304 +releasenot 1 1 6.957497 6.957497 12305 +fileto 1 1 6.957497 6.957497 12306 +currentvers 1 1 6.957497 6.957497 12307 +libmpci 1 1 6.957497 6.957497 12308 +thedistribut 1 1 6.957497 6.957497 12309 +fordetail 1 1 6.957497 6.957497 12310 +briefnot 1 1 6.957497 6.957497 12311 +ibmrisc 1 1 6.957497 6.957497 12312 +hawblitzel 1 1 6.957497 6.957497 12313 +ieeesupercomput 1 1 6.957497 6.957497 12314 +spiteof 1 1 6.957497 6.957497 12315 +scommun 1 1 6.957497 6.957497 12316 +inferior 1 1 6.957497 6.957497 12317 +tmccm 1 1 6.957497 6.957497 12318 +standardmessag 1 1 6.957497 6.957497 12319 +tooffer 1 1 6.957497 6.957497 12320 +networkadapt 1 1 6.957497 6.957497 12321 +yieldsa 1 1 6.957497 6.957497 12322 +communicationsubstr 1 1 6.957497 6.957497 12323 +cbenchmark 1 1 6.957497 6.957497 12324 +lowmessag 1 1 6.957497 6.957497 12325 +compens 1 1 6.957497 6.957497 12326 +networklat 1 1 6.957497 6.957497 12327 +availablempich 1 1 6.957497 6.957497 12328 +implementationbenchmark 1 1 6.957497 6.957497 12329 +firmwar 1 1 6.957497 6.957497 12330 +butdo 1 1 6.957497 6.957497 12331 +assumefamiliar 1 1 6.957497 6.957497 12332 +mainperform 1 1 6.957497 6.957497 12333 +timeof 1 1 6.957497 6.957497 12334 +smessag 1 1 6.957497 6.957497 12335 +theu 1 1 6.957497 6.957497 12336 +themeiko 1 1 6.957497 6.957497 12337 +thehpam 1 1 6.957497 6.957497 12338 +fddi 1 1 6.957497 6.957497 12339 +theparagon 1 1 6.957497 6.957497 12340 +thesp 1 1 6.957497 6.957497 12341 +networksus 1 1 6.957497 6.957497 12342 +anyndia 1 1 6.957497 6.957497 12343 +ascompar 1 1 6.957497 6.957497 12344 +anatm 1 1 6.957497 6.957497 12345 +systemsoftwar 1 1 6.957497 6.957497 12346 +streamcommun 1 1 6.957497 6.957497 12347 +flowcontrol 1 1 6.957497 6.957497 12348 +builtfrom 1 1 6.957497 6.957497 12349 +artmultiprocessor 1 1 6.957497 6.957497 12350 +systemcoordin 1 1 6.957497 6.957497 12351 +andrequir 1 1 6.957497 6.957497 12352 +clusterinterconnect 1 1 6.957497 6.957497 12353 +showappl 1 1 6.957497 6.957497 12354 +smallmessag 1 1 6.957497 6.957497 12355 +messagesimplement 1 1 6.957497 6.957497 12356 +abstractth 1 1 6.957497 6.957497 12357 +overlapcomput 1 1 6.957497 6.957497 12358 +sacrificingprocessor 1 1 6.957497 6.957497 12359 +passingmultiprocessor 1 1 6.957497 6.957497 12360 +researchprototyp 1 1 6.957497 6.957497 12361 +communicationoverhead 1 1 6.957497 6.957497 12362 +simplecommun 1 1 6.957497 6.957497 12363 +isintrins 1 1 6.957497 6.957497 12364 +thehardwar 1 1 6.957497 6.957497 12365 +ncube 1 1 6.957497 6.957497 12366 +memoryextens 1 1 6.957497 6.957497 12367 +messagesar 1 1 6.957497 6.957497 12368 +forwhich 1 1 6.957497 6.957497 12369 +hardwaresupport 1 1 6.957497 6.957497 12370 +ofenhanc 1 1 6.957497 6.957497 12371 +efficientcommun 1 1 6.957497 6.957497 12372 +sitesact 1 1 6.957497 6.957497 12373 +messagesin 1 1 6.957497 6.957497 12374 +projectfor 1 1 6.957497 6.957497 12375 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html new file mode 100644 index 00000000..718065f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^HORUS^ARPA^arpa.html @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 10 443 0.693147 6.931470 6 +program 3 374 0.693147 2.079441 7 +work 3 380 0.693147 2.079441 9 +research 1 431 0.693147 0.693147 10 +us 2 329 1.098612 2.197224 16 +project 2 340 1.098612 2.197224 18 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +cornel 6 215 1.386294 8.317764 23 +also 2 259 1.386294 2.772588 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +applic 8 170 1.791759 14.334072 56 +distribut 3 162 1.791759 5.375277 51 +develop 3 174 1.791759 5.375277 53 +base 2 165 1.791759 3.583518 50 +network 1 168 1.791759 1.791759 61 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +perform 3 143 1.945910 5.837730 74 +year 2 148 1.945910 3.891820 84 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +technolog 6 131 2.079442 12.476652 102 +high 3 130 2.079442 6.238326 101 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +user 3 104 2.302585 6.907755 137 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +commun 2 95 2.397895 4.795790 157 +control 4 82 2.484907 9.939628 164 +environ 2 84 2.484907 4.969814 177 +wide 2 84 2.484907 4.969814 185 +chang 2 82 2.484907 4.969814 163 +resourc 1 81 2.484907 2.484907 172 +view 1 70 2.708050 2.708050 254 +would 1 67 2.708050 2.708050 251 +integr 1 67 2.708050 2.708050 245 +plan 3 65 2.772589 8.317767 272 +virtual 2 62 2.772589 5.545178 285 +interact 1 62 2.772589 2.772589 270 +creat 1 63 2.772589 2.772589 277 +dept 1 64 2.772589 2.772589 291 +space 2 57 2.890372 5.780744 310 +sever 1 56 2.890372 2.890372 322 +explor 1 58 2.890372 2.890372 324 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +variou 1 56 2.890372 2.890372 317 +approach 1 48 3.044522 3.044522 366 +featur 2 46 3.091042 6.182084 386 +electron 1 47 3.091042 3.091042 379 +possibl 1 47 3.091042 3.091042 378 +offer 1 43 3.178054 3.178054 414 +futur 2 41 3.218876 6.437752 427 +combin 1 42 3.218876 3.218876 421 +might 1 41 3.218876 3.218876 426 +autom 1 41 3.218876 3.218876 434 +howev 1 41 3.218876 3.218876 422 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +expect 1 37 3.332205 3.332205 484 +multi 1 36 3.367296 3.367296 493 +manual 1 35 3.401197 3.401197 504 +next 1 34 3.401197 3.401197 517 +toler 4 33 3.433987 13.735948 533 +within 1 33 3.433987 3.433987 525 +fault 3 32 3.465736 10.397208 547 +extend 1 32 3.465736 3.465736 539 +rang 2 30 3.555348 7.110696 565 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +compon 1 30 3.555348 3.555348 570 +secur 1 30 3.555348 3.555348 577 +becom 1 28 3.610918 3.610918 603 +hope 1 28 3.610918 3.610918 610 +effort 3 26 3.688879 11.066637 652 +enhanc 1 26 3.688879 3.688879 644 +reliabl 1 25 3.737670 3.737670 674 +higher 1 24 3.761200 3.761200 690 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +varieti 1 22 3.850148 3.850148 740 +thu 2 21 3.912023 7.824046 773 +similar 1 21 3.912023 3.912023 771 +flexibl 1 21 3.912023 3.912023 792 +among 1 21 3.912023 3.912023 781 +toolkit 1 20 3.951244 3.951244 835 +media 2 19 4.007333 8.014666 861 +element 1 18 4.060443 4.060443 895 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +commerci 1 16 4.174387 4.174387 1005 +latenc 1 16 4.174387 4.174387 993 +stock 1 16 4.174387 4.174387 1007 +permit 1 16 4.174387 4.174387 962 +remot 2 15 4.248495 8.496990 1041 +novel 1 15 4.248495 4.248495 1039 +capabl 1 15 4.248495 4.248495 1016 +transit 1 15 4.248495 4.248495 1046 +action 1 15 4.248495 4.248495 1038 +horu 9 14 4.317488 38.857392 1116 +demand 5 14 4.317488 21.587440 1073 +floor 1 14 4.317488 4.317488 1070 +signific 1 13 4.382027 4.382027 1125 +coordin 1 13 4.382027 4.382027 1182 +kenneth 1 12 4.465908 4.465908 1265 +branch 2 11 4.553877 9.107754 1318 +reness 1 11 4.553877 4.553877 1333 +extrem 1 11 4.553877 4.553877 1330 +market 1 11 4.553877 4.553877 1361 +impact 1 11 4.553877 4.553877 1334 +prior 2 10 4.653960 9.307920 1438 +success 2 10 4.653960 9.307920 1390 +traffic 1 10 4.653960 4.653960 1421 +birman 1 9 4.753590 4.753590 1531 +robbert 1 9 4.753590 4.753590 1529 +telecommun 1 9 4.753590 4.753590 1565 +govern 1 9 4.753590 4.753590 1581 +occur 1 9 4.753590 4.753590 1572 +manufactur 1 8 4.875197 4.875197 1634 +illustr 1 8 4.875197 4.875197 1679 +synchroni 1 7 5.010635 5.010635 1923 +ground 1 7 5.010635 5.010635 1955 +privaci 1 6 5.164786 5.164786 2144 +benefit 1 6 5.164786 5.164786 2213 +isi 7 5 5.347108 37.429756 2443 +matur 1 5 5.347108 5.347108 2269 +isth 1 5 5.347108 5.347108 2532 +licens 1 5 5.347108 5.347108 2520 +mission 1 5 5.347108 5.347108 2465 +respond 1 5 5.347108 5.347108 2354 +substanti 1 4 5.568345 5.568345 2921 +visibl 1 4 5.568345 5.568345 2994 +naval 1 4 5.568345 5.568345 2920 +rapidli 1 4 5.568345 5.568345 2850 +militari 5 3 5.857933 29.289665 3326 +reconfigur 1 3 5.857933 5.857933 3556 +ofhoru 1 2 6.263398 6.263398 5181 +offersa 1 2 6.263398 6.263398 4071 +securityand 1 2 6.263398 6.263398 5066 +retain 1 2 6.263398 6.263398 5443 +basedcommun 1 2 6.263398 6.263398 4348 +stratu 1 2 6.263398 6.263398 5345 +isdescrib 1 2 6.263398 6.263398 5444 +groupwar 1 2 6.263398 6.263398 4857 +theatr 1 2 6.263398 6.263398 5173 +environmenthoru 1 1 6.957497 6.957497 12408 +shoru 1 1 6.957497 6.957497 12409 +reliabledistribut 1 1 6.957497 6.957497 12410 +demonstrategroupwar 1 1 6.957497 6.957497 12411 +foundto 1 1 6.957497 6.957497 12412 +synchronousprocess 1 1 6.957497 6.957497 12413 +importantresearch 1 1 6.957497 6.957497 12414 +performancer 1 1 6.957497 6.957497 12415 +calledact 1 1 6.957497 6.957497 12416 +messageswith 1 1 6.957497 6.957497 12417 +playbacksystem 1 1 6.957497 6.957497 12418 +calledcontinu 1 1 6.957497 6.957497 12419 +multimediaserv 1 1 6.957497 6.957497 12420 +telemedicin 1 1 6.957497 6.957497 12421 +videoon 1 1 6.957497 6.957497 12422 +andsecur 1 1 6.957497 6.957497 12423 +expectrapid 1 1 6.957497 6.957497 12424 +uptak 1 1 6.957497 6.957497 12425 +spana 1 1 6.957497 6.957497 12426 +financialtrad 1 1 6.957497 6.957497 12427 +factori 1 1 6.957497 6.957497 12428 +fordiscret 1 1 6.957497 6.957497 12429 +beingexplor 1 1 6.957497 6.957497 12430 +othernon 1 1 6.957497 6.957497 12431 +hiper 1 1 6.957497 6.957497 12432 +systemthat 1 1 6.957497 6.957497 12433 +aegi 1 1 6.957497 6.957497 12434 +battleradar 1 1 6.957497 6.957497 12435 +benefitfrom 1 1 6.957497 6.957497 12436 +migrateisi 1 1 6.957497 6.957497 12437 +communityin 1 1 6.957497 6.957497 12438 +agreementswith 1 1 6.957497 6.957497 12439 +subsidiari 1 1 6.957497 6.957497 12440 +mixtur 1 1 6.957497 6.957497 12441 +technologieswil 1 1 6.957497 6.957497 12442 +beseen 1 1 6.957497 6.957497 12443 +belowshow 1 1 6.957497 6.957497 12444 +andus 1 1 6.957497 6.957497 12445 +asset 1 1 6.957497 6.957497 12446 +thissort 1 1 6.957497 6.957497 12447 +utmost 1 1 6.957497 6.957497 12448 +whilealso 1 1 6.957497 6.957497 12449 +civilianand 1 1 6.957497 6.957497 12450 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ new file mode 100644 index 00000000..b77f3a95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^MediaNet^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +cornel 2 215 1.386294 2.772588 23 +group 3 183 1.609438 4.828314 36 +applic 4 170 1.791759 7.167036 56 +network 3 168 1.791759 5.375277 61 +develop 2 174 1.791759 3.583518 53 +data 1 170 1.791759 1.791759 49 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +perform 3 143 1.945910 5.837730 74 +process 2 142 1.945910 3.891820 72 +architectur 1 139 1.945910 1.945910 77 +high 2 130 2.079442 4.158884 101 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +user 2 104 2.302585 4.605170 137 +access 1 102 2.302585 2.302585 136 +commun 3 95 2.397895 7.193685 157 +level 2 87 2.484907 4.969814 180 +build 1 85 2.484907 2.484907 184 +multimedia 4 68 2.708050 10.832200 258 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +improv 2 62 2.772589 5.545178 289 +approach 1 48 3.044522 3.044522 366 +adapt 1 46 3.091042 3.091042 387 +video 2 44 3.135494 6.270988 405 +combin 2 42 3.218876 6.437752 421 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +brian 1 38 3.295837 3.295837 466 +workstat 1 37 3.332205 3.332205 479 +secur 1 30 3.555348 3.555348 577 +platform 1 29 3.583519 3.583519 591 +cluster 1 28 3.610918 3.610918 612 +reliabl 2 25 3.737670 7.475340 674 +store 1 24 3.761200 3.761200 693 +flexibl 1 21 3.912023 3.912023 792 +fund 1 21 3.912023 3.912023 805 +toolkit 2 20 3.951244 7.902488 835 +portabl 1 20 3.951244 3.951244 819 +smith 1 20 3.951244 3.951244 820 +media 1 19 4.007333 4.007333 861 +critic 1 16 4.174387 4.174387 982 +commerci 1 16 4.174387 4.174387 1005 +horu 2 14 4.317488 8.634976 1116 +audio 1 14 4.317488 4.317488 1094 +eicken 1 13 4.382027 4.382027 1134 +primit 1 11 4.553877 4.553877 1317 +facilit 2 10 4.653960 9.307920 1412 +rapid 1 10 4.653960 4.653960 1453 +strength 1 9 4.753590 4.753590 1494 +transport 1 8 4.875197 4.875197 1672 +fromth 1 7 5.010635 5.010635 1802 +contract 1 6 5.164786 5.164786 1985 +testb 1 5 5.347108 5.347108 2456 +darpa 1 4 5.568345 5.568345 2944 +dramat 1 3 5.857933 5.857933 3239 +magnitud 1 3 5.857933 5.857933 3582 +militari 1 3 5.857933 5.857933 3326 +multimediaappl 1 3 5.857933 5.857933 3274 +todevelop 1 2 6.263398 6.263398 5448 +communicationprimit 1 2 6.263398 6.263398 5449 +thorstenvon 1 2 6.263398 6.263398 5450 +medianet 3 1 6.957497 20.872491 12468 +projectmedianet 1 1 6.957497 6.957497 12469 +protocolsth 1 1 6.957497 6.957497 12470 +communicationmak 1 1 6.957497 6.957497 12471 +foradvanc 1 1 6.957497 6.957497 12472 +includeaudio 1 1 6.957497 6.957497 12473 +technologyofficefor 1 1 6.957497 6.957497 12474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html new file mode 100644 index 00000000..7a17e0bb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^NuPrl^nuprl.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +project 3 340 1.098612 3.295836 18 +cornel 2 215 1.386294 2.772588 23 +link 2 247 1.386294 2.772588 24 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +introduct 1 126 2.079442 2.079442 87 +document 1 121 2.079442 2.079442 89 +theori 3 111 2.197225 6.591675 127 +user 1 104 2.302585 2.302585 137 +mani 1 92 2.397895 2.397895 150 +help 1 83 2.484907 2.484907 175 +main 1 67 2.708050 2.708050 256 +written 1 63 2.772589 2.772589 278 +browser 2 56 2.890372 5.780744 313 +reason 1 57 2.890372 2.890372 318 +index 1 56 2.890372 2.890372 309 +suggest 1 53 2.944439 2.944439 331 +autom 1 41 3.218876 3.218876 434 +announc 1 40 3.258097 3.258097 441 +articl 2 33 3.433987 6.867974 530 +linux 1 27 3.637586 3.637586 631 +theorem 1 21 3.912023 3.912023 786 +feedback 1 19 4.007333 4.007333 854 +nuprl 11 10 4.653960 51.193560 1402 +curiou 1 5 5.347108 5.347108 2541 +vaughn 1 1 6.957497 6.957497 12475 +askaltavista 1 1 6.957497 6.957497 12476 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ new file mode 100644 index 00000000..3bc21e49 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SP-2^ @@ -0,0 +1,218 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 13 374 0.693147 9.010911 7 +inform 3 412 0.693147 2.079441 8 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 3 329 1.098612 3.295836 16 +also 3 259 1.386294 4.158882 28 +softwar 2 220 1.386294 2.772588 30 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +includ 6 208 1.609438 9.656628 42 +oper 1 180 1.609438 1.609438 34 +parallel 4 169 1.791759 7.167036 60 +read 3 154 1.791759 5.375277 47 +avail 2 169 1.791759 3.583518 48 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +file 7 132 1.945910 13.621370 70 +first 2 140 1.945910 3.891820 71 +perform 2 143 1.945910 3.891820 74 +like 1 132 1.945910 1.945910 81 +assign 1 135 1.945910 1.945910 66 +process 1 142 1.945910 1.945910 72 +compil 4 122 2.079442 8.317768 96 +machin 3 129 2.079442 6.238326 95 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +look 2 107 2.197225 4.394450 115 +pleas 2 113 2.197225 4.394450 114 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +make 1 111 2.197225 2.197225 120 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +commun 3 95 2.397895 7.193685 157 +follow 2 92 2.397895 4.795790 143 +call 1 91 2.397895 2.397895 153 +center 1 88 2.397895 2.397895 158 +section 1 94 2.397895 2.397895 149 +mani 1 92 2.397895 2.397895 150 +activ 5 84 2.484907 12.424535 182 +librari 3 87 2.484907 7.454721 181 +info 2 85 2.484907 4.969814 176 +contain 1 81 2.484907 2.484907 174 +messag 7 76 2.564949 17.954643 212 +exampl 4 77 2.564949 10.259796 195 +sourc 4 77 2.564949 10.259796 201 +want 1 79 2.564949 2.564949 199 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +addit 1 74 2.639057 2.639057 228 +main 1 67 2.708050 2.708050 256 +experi 2 64 2.772589 5.545178 283 +creat 1 63 2.772589 2.772589 277 +previou 1 62 2.772589 2.772589 290 +locat 4 59 2.833213 11.332852 303 +type 3 61 2.833213 8.499639 296 +simpl 1 60 2.833213 2.833213 298 +space 1 57 2.890372 2.890372 310 +variou 1 56 2.890372 2.890372 317 +local 3 55 2.944439 8.833317 334 +found 2 53 2.944439 5.888878 337 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +sampl 1 53 2.944439 2.944439 339 +processor 1 54 2.944439 2.944439 335 +hardwar 1 51 2.995732 2.995732 350 +run 1 51 2.995732 2.995732 347 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +directori 3 45 3.135494 9.406482 396 +execut 2 45 3.135494 6.270988 404 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +must 1 40 3.258097 3.258097 442 +continu 1 39 3.258097 3.258097 448 +open 1 38 3.295837 3.295837 469 +return 2 34 3.401197 6.802394 502 +global 1 34 3.401197 3.401197 520 +word 1 34 3.401197 3.401197 508 +built 1 29 3.583519 3.583519 592 +releas 1 28 3.610918 3.610918 616 +pass 1 28 3.610918 3.610918 611 +though 1 27 3.637586 3.637586 622 +proc 1 26 3.688879 3.688879 649 +frame 1 24 3.761200 3.761200 684 +displai 1 23 3.806662 3.806662 712 +instal 3 22 3.850148 11.550444 754 +instead 1 22 3.850148 3.850148 756 +path 1 21 3.912023 3.912023 778 +portabl 1 20 3.951244 3.951244 819 +sure 1 20 3.951244 3.951244 813 +debug 6 17 4.110874 24.665244 944 +regular 1 17 4.110874 4.110874 929 +stop 1 17 4.110874 4.110874 942 +layer 1 17 4.110874 4.110874 926 +remot 2 15 4.248495 8.496990 1041 +configur 1 15 4.248495 4.248495 1012 +overhead 1 15 4.248495 4.248495 1035 +fortran 1 15 4.248495 4.248495 1027 +split 15 14 4.317488 64.762320 1078 +command 2 14 4.317488 8.634976 1083 +matlab 1 14 4.317488 4.317488 1081 +script 2 13 4.382027 8.764054 1171 +step 1 13 4.382027 4.382027 1138 +emac 1 13 4.382027 4.382027 1143 +difficulti 1 13 4.382027 4.382027 1132 +remov 1 12 4.465908 4.465908 1225 +insid 1 12 4.465908 4.465908 1262 +characterist 1 12 4.465908 4.465908 1257 +replic 1 12 4.465908 4.465908 1231 +node 7 11 4.553877 31.877139 1326 +statement 2 11 4.553877 9.107754 1313 +eight 1 11 4.553877 4.553877 1331 +bandwidth 1 11 4.553877 4.553877 1365 +enter 1 10 4.653960 4.653960 1454 +stack 1 10 4.653960 4.653960 1389 +login 2 9 4.753590 9.507180 1550 +informationabout 1 9 4.753590 4.753590 1515 +readm 1 8 4.875197 4.875197 1699 +job 1 8 4.875197 4.875197 1702 +insert 1 8 4.875197 4.875197 1687 +round 1 8 4.875197 4.875197 1769 +header 2 7 5.010635 10.021270 1787 +hit 1 7 5.010635 5.010635 1965 +attach 1 7 5.010635 5.010635 1785 +exactli 1 7 5.010635 5.010635 1817 +usag 1 6 5.164786 5.164786 2209 +neither 1 6 5.164786 5.164786 1990 +phase 1 6 5.164786 5.164786 1977 +onto 1 6 5.164786 5.164786 2089 +proce 1 6 5.164786 5.164786 2114 +nativ 1 6 5.164786 5.164786 2192 +whichi 1 6 5.164786 5.164786 2056 +shell 3 5 5.347108 16.041324 2353 +overlap 1 5 5.347108 5.347108 2368 +theth 1 5 5.347108 5.347108 2325 +czar 1 5 5.347108 5.347108 2503 +cuc 3 4 5.568345 16.705035 2630 +makefil 3 4 5.568345 16.705035 2662 +spam 2 4 5.568345 11.136690 2927 +arch 1 4 5.568345 5.568345 2995 +forparallel 1 4 5.568345 5.568345 2703 +hide 1 4 5.568345 5.568345 2996 +commonli 1 4 5.568345 5.568345 2877 +asymptot 1 4 5.568345 5.568345 2676 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +andyou 1 3 5.857933 5.857933 3256 +setenv 4 2 6.263398 25.053592 4491 +haveth 1 2 6.263398 6.263398 5378 +properli 1 2 6.263398 6.263398 5454 +animplement 1 2 6.263398 6.263398 4931 +granita 6 1 6.957497 41.744982 12493 +bench 4 1 6.957497 27.829988 12494 +am_run 4 1 6.957497 27.829988 12495 +tcsh 2 1 6.957497 13.914994 12496 +bash 2 1 6.957497 13.914994 12497 +gmake 2 1 6.957497 13.914994 12498 +ampicc 2 1 6.957497 13.914994 12499 +granitathrough 1 1 6.957497 6.957497 12500 +asinteract 1 1 6.957497 6.957497 12501 +problemsdur 1 1 6.957497 6.957497 12502 +stufffrom 1 1 6.957497 6.957497 12503 +unam 1 1 6.957497 6.957497 12504 +manyou 1 1 6.957497 6.957497 12505 +infoexplor 1 1 6.957497 6.957497 12506 +commandsand 1 1 6.957497 6.957497 12507 +activemassag 1 1 6.957497 6.957497 12508 +peor 1 1 6.957497 6.957497 12509 +messagesor 1 1 6.957497 6.957497 12510 +homegrown 1 1 6.957497 6.957497 12511 +softwarein 1 1 6.957497 6.957497 12512 +besur 1 1 6.957497 6.957497 12513 +csplit 1 1 6.957497 6.957497 12514 +globalpoint 1 1 6.957497 6.957497 12515 +dereferenc 1 1 6.957497 6.957497 12516 +latencyof 1 1 6.957497 6.957497 12517 +shellsshould 1 1 6.957497 6.957497 12518 +asact 1 1 6.957497 6.957497 12519 +scriptsloc 1 1 6.957497 6.957497 12520 +programfoo 1 1 6.957497 6.957497 12521 +foodebug 1 1 6.957497 6.957497 12522 +splitc_debug 1 1 6.957497 6.957497 12523 +aftersplitc_main 1 1 6.957497 6.957497 12524 +ongranita 1 1 6.957497 6.957497 12525 +youwant 1 1 6.957497 6.957497 12526 +thenattach 1 1 6.957497 6.957497 12527 +breakpoint 1 1 6.957497 6.957497 12528 +messagesact 1 1 6.957497 6.957497 12529 +layerthat 1 1 6.957497 6.957497 12530 +triplat 1 1 6.957497 6.957497 12531 +libspgam 1 1 6.957497 6.957497 12532 +aand 1 1 6.957497 6.957497 12533 +beforerun 1 1 6.957497 6.957497 12534 +runningprgm 1 1 6.957497 6.957497 12535 +mpimpi 1 1 6.957497 6.957497 12536 +popularmessag 1 1 6.957497 6.957497 12537 +mpich 1 1 6.957497 6.957497 12538 +overact 1 1 6.957497 6.957497 12539 +easiest 1 1 6.957497 6.957497 12540 +fooyou 1 1 6.957497 6.957497 12541 +lookat 1 1 6.957497 6.957497 12542 +examplesin 1 1 6.957497 6.957497 12543 +ampi 1 1 6.957497 6.957497 12544 +likeordinari 1 1 6.957497 6.957497 12545 +softwaresoftwar 1 1 6.957497 6.957497 12546 +xpdbx 1 1 6.957497 6.957497 12547 +bison 1 1 6.957497 6.957497 12548 +problemsif 1 1 6.957497 6.957497 12549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html new file mode 100644 index 00000000..07564b53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^SimLab^index.html @@ -0,0 +1,106 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +system 3 443 0.693147 2.079441 6 +program 2 374 0.693147 1.386294 7 +project 4 340 1.098612 4.394448 18 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +languag 4 227 1.386294 5.545176 26 +cornel 3 215 1.386294 4.158882 23 +gener 2 220 1.386294 2.772588 27 +softwar 2 220 1.386294 2.772588 30 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +model 4 145 1.945910 7.783640 69 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +version 2 113 2.197225 4.394450 122 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +peopl 1 96 2.302585 2.302585 132 +select 3 91 2.397895 7.193685 154 +present 2 91 2.397895 4.795790 145 +commun 1 95 2.397895 2.397895 157 +activ 1 84 2.484907 2.484907 182 +environ 1 84 2.484907 2.484907 177 +method 1 80 2.564949 2.564949 213 +simul 3 66 2.708050 8.124150 255 +creat 3 63 2.772589 8.317767 277 +overview 1 56 2.890372 2.890372 323 +direct 1 57 2.890372 2.890372 316 +scientif 2 53 2.944439 5.888878 341 +hardwar 1 51 2.995732 2.995732 350 +numer 1 49 3.044522 3.044522 369 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +discuss 1 45 3.135494 3.135494 399 +paul 1 38 3.295837 3.295837 471 +collabor 2 32 3.465736 6.931472 543 +transform 1 32 3.465736 3.465736 542 +richard 1 31 3.496508 3.496508 559 +compon 1 30 3.555348 3.555348 570 +semant 1 29 3.583519 3.583519 587 +propos 1 28 3.610918 3.610918 602 +effort 2 26 3.688879 7.377758 652 +defin 1 22 3.850148 3.850148 746 +geometri 1 22 3.850148 3.850148 752 +reduc 1 22 3.850148 3.850148 759 +qualiti 1 20 3.951244 3.951244 832 +longer 1 20 3.951244 3.951244 816 +particularli 1 19 4.007333 4.007333 867 +geometr 1 19 4.007333 4.007333 852 +exercis 1 19 4.007333 4.007333 842 +brief 1 16 4.174387 4.174387 1001 +topolog 1 14 4.317488 4.317488 1089 +mesh 1 11 4.553877 4.553877 1351 +guarante 1 10 4.653960 4.653960 1391 +rais 1 8 4.875197 4.875197 1711 +manufactur 1 8 4.875197 4.875197 1634 +thegoal 1 6 5.164786 5.164786 2033 +proce 1 6 5.164786 5.164786 2114 +synthes 1 5 5.347108 5.347108 2451 +weyl 1 4 5.568345 5.568345 2854 +substrat 1 4 5.568345 5.568345 2857 +nist 1 4 5.568345 5.568345 2973 +zippel 1 4 5.568345 5.568345 2879 +theus 1 4 5.568345 5.568345 2992 +rick 1 4 5.568345 5.568345 2646 +chew 2 3 5.857933 11.715866 3618 +enorm 1 3 5.857933 5.857933 3431 +expend 1 2 6.263398 6.263398 5451 +scientificsoftwar 1 2 6.263398 6.263398 5038 +andform 1 2 6.263398 6.263398 4274 +levelprogram 1 2 6.263398 6.263398 5452 +insystem 1 2 6.263398 6.263398 4172 +ideason 1 2 6.263398 6.263398 4469 +microstorag 1 2 6.263398 6.263398 4887 +palmer 1 2 6.263398 6.263398 5453 +simlab 3 1 6.957497 20.872491 12477 +oncomplex 1 1 6.957497 6.957497 12478 +bringingtogeth 1 1 6.957497 6.957497 12479 +symbolicmathemat 1 1 6.957497 6.957497 12480 +levelat 1 1 6.957497 6.957497 12481 +softwarepackag 1 1 6.957497 6.957497 12482 +microstoragearchitectur 1 1 6.957497 6.957497 12483 +computeralgebra 1 1 6.957497 6.957497 12484 +thechain 1 1 6.957497 6.957497 12485 +thearpa 1 1 6.957497 6.957497 12486 +madefast 1 1 6.957497 6.957497 12487 +ofnon 1 1 6.957497 6.957497 12488 +contemporan 1 1 6.957497 6.957497 12489 +chainsprogram 1 1 6.957497 6.957497 12490 +complextopolog 1 1 6.957497 6.957497 12491 +numericalalgorithm 1 1 6.957497 6.957497 12492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ new file mode 100644 index 00000000..49d951c4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^Split-C^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +inform 5 412 0.693147 3.465735 8 +program 1 374 0.693147 0.693147 7 +cornel 2 215 1.386294 2.772588 23 +public 1 202 1.609438 1.609438 43 +contact 1 153 1.791759 1.791759 59 +code 3 108 2.197225 6.591675 116 +memori 1 101 2.302585 2.302585 139 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +sourc 3 77 2.564949 7.694847 201 +share 1 59 2.833213 2.833213 304 +releas 3 28 3.610918 10.832754 616 +multiprocessor 1 28 3.610918 3.610918 605 +berkelei 1 26 3.688879 3.688879 657 +supercomput 1 25 3.737670 3.737670 681 +prepar 2 20 3.951244 7.902488 824 +split 10 14 4.317488 43.174880 1078 +eicken 4 13 4.382027 17.528108 1134 +thorsten 1 13 4.382027 4.382027 1133 +chao 1 8 4.875197 4.875197 1753 +goldstein 1 6 5.164786 5.164786 2168 +culler 1 5 5.347108 5.347108 2381 +spam 1 4 5.568345 5.568345 2927 +grzegorz 1 4 5.568345 5.568345 2923 +czajkowski 1 4 5.568345 5.568345 2924 +neta 1 3 5.857933 5.857933 3789 +dusseau 1 3 5.857933 5.857933 3382 +yelick 1 3 5.857933 5.857933 3374 +thorstenvon 1 2 6.263398 6.263398 5450 +multiprocessorsa 1 2 6.263398 6.263398 5455 +krishnamurthi 1 2 6.263398 6.263398 5408 +lumetta 1 2 6.263398 6.263398 5409 +contactthorsten 1 2 6.263398 6.263398 5438 +ccornel 1 1 6.957497 6.957497 12550 +implementationssplit 1 1 6.957497 6.957497 12551 +isimpl 1 1 6.957497 6.957497 12552 +messagesfor 1 1 6.957497 6.957497 12553 +ofsplit 1 1 6.957497 6.957497 12554 +distr 1 1 6.957497 6.957497 12555 +implementedon 1 1 6.957497 6.957497 12556 +contactchi 1 1 6.957497 6.957497 12557 +runningsolari 1 1 6.957497 6.957497 12558 +mattwelsh 1 1 6.957497 6.957497 12559 +cparallel 1 1 6.957497 6.957497 12560 +abstractproject 1 1 6.957497 6.957497 12561 +sitessplit 1 1 6.957497 6.957497 12562 +chome 1 1 6.957497 6.957497 12563 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ new file mode 100644 index 00000000..b61312f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^U-Net^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +cornel 1 215 1.386294 1.386294 23 +second 1 81 2.484907 2.484907 166 +html 1 75 2.639057 2.639057 235 +browser 1 56 2.890372 2.890372 313 +move 1 47 3.091042 3.091042 382 +http 1 41 3.218876 3.218876 420 +default 1 5 5.347108 5.347108 2335 +redirect 1 1 6.957497 6.957497 12564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html new file mode 100644 index 00000000..d221f96a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^csrvl^csrvl.html @@ -0,0 +1,221 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +cornel 9 215 1.386294 12.476646 23 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +paper 6 205 1.609438 9.656628 38 +list 5 201 1.609438 8.047190 39 +includ 2 208 1.609438 3.218876 42 +public 1 202 1.609438 1.609438 43 +applic 3 170 1.791759 5.375277 56 +base 3 165 1.791759 5.375277 50 +parallel 3 169 1.791759 5.375277 60 +avail 3 169 1.791759 5.375277 48 +distribut 2 162 1.791759 3.583518 51 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +professor 3 137 1.945910 5.837730 76 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +report 3 131 2.079442 6.238326 92 +confer 3 126 2.079442 6.238326 100 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +intern 3 108 2.197225 6.591675 128 +pleas 1 113 2.197225 2.197225 114 +theori 1 111 2.197225 2.197225 127 +structur 1 106 2.197225 2.197225 105 +assist 1 112 2.197225 2.197225 113 +technic 2 100 2.302585 4.605170 140 +proceed 3 93 2.397895 7.193685 152 +follow 2 92 2.397895 4.795790 143 +select 2 91 2.397895 4.795790 154 +mani 2 92 2.397895 4.795790 150 +associ 2 93 2.397895 4.795790 151 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +octob 1 89 2.397895 2.397895 156 +search 1 95 2.397895 2.397895 155 +ieee 5 86 2.484907 12.424535 190 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +server 3 76 2.564949 7.694847 204 +sourc 1 77 2.564949 2.564949 201 +master 1 76 2.564949 2.564949 216 +dynam 1 76 2.564949 2.564949 194 +symposium 2 72 2.639057 5.278114 238 +workshop 2 71 2.639057 5.278114 239 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +laboratori 3 63 2.772589 8.317767 292 +prof 3 64 2.772589 8.317767 273 +foundat 2 62 2.772589 5.545178 286 +ithaca 1 65 2.772589 2.772589 294 +complex 1 64 2.772589 2.772589 269 +artifici 1 63 2.772589 2.772589 280 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +abstract 1 62 2.772589 2.772589 276 +automat 2 61 2.833213 5.666426 306 +locat 1 59 2.833213 2.833213 303 +type 1 61 2.833213 2.833213 296 +direct 2 57 2.890372 5.780744 316 +browser 1 56 2.890372 2.890372 313 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +three 1 54 2.944439 2.944439 330 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +approach 1 48 3.044522 3.044522 366 +done 2 47 3.091042 6.182084 381 +move 2 47 3.091042 6.182084 382 +video 3 44 3.135494 9.406482 405 +discuss 1 45 3.135494 3.135494 399 +mechan 2 43 3.178054 6.356108 416 +vision 6 41 3.218876 19.313256 430 +autom 2 41 3.218876 6.437752 434 +submit 1 39 3.258097 3.258097 440 +author 1 39 3.258097 3.258097 450 +microsoft 1 38 3.295837 3.295837 468 +robot 11 36 3.367296 37.040256 497 +global 1 34 3.401197 3.401197 520 +tech 1 35 3.401197 3.401197 515 +queri 1 33 3.433987 3.433987 524 +extend 1 32 3.465736 3.465736 539 +titl 1 31 3.496508 3.496508 556 +hard 1 30 3.555348 3.555348 563 +full 1 28 3.610918 3.610918 615 +cluster 1 28 3.610918 3.610918 612 +multiprocessor 1 28 3.610918 3.610918 605 +hope 1 28 3.610918 3.610918 610 +pass 1 28 3.610918 3.610918 611 +manipul 4 27 3.637586 14.550344 624 +arrai 1 27 3.637586 3.637586 627 +proc 2 26 3.688879 7.377758 649 +revis 2 26 3.688879 7.377758 640 +detect 1 26 3.688879 3.688879 646 +task 1 25 3.737670 3.737670 678 +greg 2 24 3.761200 7.522400 695 +motion 1 24 3.761200 3.761200 699 +frame 1 24 3.761200 3.761200 684 +thank 1 23 3.806662 3.806662 721 +mobil 1 23 3.806662 3.806662 730 +break 2 20 3.951244 7.902488 812 +supervis 1 20 3.951244 3.951244 840 +mpeg 1 20 3.951244 3.951244 831 +scheme 1 20 3.951244 3.951244 818 +boston 1 19 4.007333 4.007333 862 +scott 1 18 4.060443 4.060443 884 +miller 2 17 4.110874 8.221748 949 +match 2 16 4.174387 8.348774 965 +diego 2 16 4.174387 8.348774 992 +micro 2 15 4.248495 8.496990 1031 +configur 1 15 4.248495 4.248495 1012 +massiv 1 15 4.248495 4.248495 1026 +scene 1 14 4.317488 4.317488 1114 +split 1 14 4.317488 4.317488 1078 +anonym 1 14 4.317488 4.317488 1100 +canada 1 13 4.382027 4.382027 1158 +daniel 3 12 4.465908 13.397724 1233 +bruce 2 12 4.465908 8.931816 1226 +franc 1 12 4.465908 4.465908 1276 +tour 1 11 4.553877 4.553877 1307 +peter 1 11 4.553877 4.553877 1316 +keyword 1 11 4.553877 4.553877 1356 +donald 9 9 4.753590 42.782310 1510 +transmiss 1 9 4.753590 4.753590 1588 +juan 1 9 4.753590 4.753590 1580 +wilson 1 9 4.753590 4.753590 1536 +invari 2 8 4.875197 9.750394 1748 +potenti 1 8 4.875197 4.875197 1690 +japan 1 8 4.875197 4.875197 1762 +siggraph 1 8 4.875197 4.875197 1773 +justin 3 7 5.010635 15.031905 1789 +ramin 2 7 5.010635 10.021270 1820 +sensor 1 7 5.010635 5.010635 1920 +zabih 3 6 5.164786 15.494358 2138 +huttenloch 2 6 5.164786 10.329572 1983 +actuat 1 5 5.347108 5.347108 2442 +solid 1 5 5.347108 5.347108 2255 +symmetr 1 4 5.568345 5.568345 2908 +publicationsth 1 4 5.568345 5.568345 2859 +triangul 1 4 5.568345 5.568345 2903 +csrvl 3 3 5.857933 17.573799 3543 +bhringer 3 3 5.857933 17.573799 3606 +microfabr 2 3 5.857933 11.715866 3610 +jen 1 3 5.857933 5.857933 3378 +ryan 1 3 5.857933 5.857933 3679 +michel 1 3 5.857933 5.857933 3791 +szewczyk 1 3 5.857933 5.857933 3108 +voskuhl 1 3 5.857933 5.857933 3109 +matt 1 3 5.857933 5.857933 3792 +electro 2 2 6.263398 12.526796 5014 +mihailovich 2 2 6.263398 12.526796 5018 +macdonald 2 2 6.263398 12.526796 5006 +laboratorywelcom 1 2 6.263398 6.263398 5439 +mem 1 2 6.263398 6.263398 5007 +andclassif 1 2 6.263398 6.263398 5390 +windowsnt 1 2 6.263398 6.263398 5440 +ree 1 2 6.263398 6.263398 5009 +automationnic 1 2 6.263398 6.263398 5019 +homolog 1 2 6.263398 6.263398 5441 +andj 1 2 6.263398 6.263398 5020 +latomb 1 2 6.263398 6.263398 5021 +brigg 1 2 6.263398 6.263398 5008 +actuatorarrai 1 2 6.263398 6.263398 5017 +hing 1 2 6.263398 6.263398 5442 +montral 1 2 6.263398 6.263398 5394 +cytacki 1 2 6.263398 6.263398 5330 +aaron 1 2 6.263398 6.263398 4438 +csrvlcornel 1 1 6.957497 6.957497 12376 +nich 1 1 6.957497 6.957497 12377 +rrentli 1 1 6.957497 6.957497 12378 +ofresearch 1 1 6.957497 6.957497 12379 +pictor 1 1 6.957497 6.957497 12380 +projectsth 1 1 6.957497 6.957497 12381 +byramin 1 1 6.957497 6.957497 12382 +allowingscen 1 1 6.957497 6.957497 12383 +onplatform 1 1 6.957497 6.957497 12384 +nynet 1 1 6.957497 6.957497 12385 +foru 1 1 6.957497 6.957497 12386 +sproject 1 1 6.957497 6.957497 12387 +currentlyconsid 1 1 6.957497 6.957497 12388 +theissuesher 1 1 6.957497 6.957497 12389 +thecsrvl 1 1 6.957497 6.957497 12390 +serverar 1 1 6.957497 6.957497 12391 +sensorless 1 1 6.957497 6.957497 12392 +oiso 1 1 6.957497 6.957497 12393 +micromechan 1 1 6.957497 6.957497 12394 +quebc 1 1 6.957497 6.957497 12395 +authorthes 1 1 6.957497 6.957497 12396 +pedro 1 1 6.957497 6.957497 12397 +felzenszwalb 1 1 6.957497 6.957497 12398 +lilien 1 1 6.957497 6.957497 12399 +maharbiz 1 1 6.957497 6.957497 12400 +scharstein 1 1 6.957497 6.957497 12401 +stump 1 1 6.957497 6.957497 12402 +fernando 1 1 6.957497 6.957497 12403 +viton 1 1 6.957497 6.957497 12404 +wayt 1 1 6.957497 6.957497 12405 +welsh 1 1 6.957497 6.957497 12406 +whelan 1 1 6.957497 6.957497 12407 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html new file mode 100644 index 00000000..445f3229 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^icap.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +inform 5 412 0.693147 3.465735 8 +research 4 431 0.693147 2.772588 10 +work 2 380 0.693147 1.386294 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +cornel 2 215 1.386294 2.772588 23 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +group 3 183 1.609438 4.828314 36 +fall 1 181 1.609438 1.609438 40 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +construct 2 139 1.945910 3.891820 82 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +document 2 121 2.079442 4.158884 89 +report 2 131 2.079442 4.158884 92 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +structur 2 106 2.197225 4.394450 105 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +manag 1 114 2.197225 2.197225 125 +access 3 102 2.302585 6.907755 136 +user 1 104 2.302585 2.302585 137 +text 1 98 2.302585 2.302585 133 +technic 1 100 2.302585 2.302585 140 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +build 1 85 2.484907 2.484907 184 +server 1 76 2.564949 2.564949 204 +materi 2 75 2.639057 5.278114 221 +onlin 1 75 2.639057 2.639057 223 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +order 1 69 2.708050 2.708050 249 +test 1 66 2.708050 2.708050 252 +knowledg 1 67 2.708050 2.708050 243 +collect 3 65 2.772589 8.317767 268 +locat 1 59 2.833213 2.833213 303 +overview 1 56 2.890372 2.890372 323 +found 1 53 2.944439 2.944439 337 +undergradu 1 54 2.944439 2.944439 338 +digit 1 52 2.995732 2.995732 348 +form 1 39 3.258097 3.258097 443 +john 1 33 3.433987 3.433987 532 +computersci 1 30 3.555348 3.555348 562 +determin 1 27 3.637586 3.637586 630 +consist 1 26 3.688879 3.688879 651 +util 1 21 3.912023 3.912023 774 +increas 1 20 3.951244 3.951244 829 +longer 1 20 3.951244 3.951244 816 +ever 1 19 4.007333 4.007333 872 +dean 1 14 4.317488 4.317488 1104 +captur 3 12 4.465908 13.397724 1232 +volum 1 11 4.553877 4.553877 1347 +explicit 1 9 4.753590 4.753590 1525 +extract 1 8 4.875197 4.875197 1728 +begun 1 5 5.347108 5.347108 2386 +allan 1 4 5.568345 5.568345 2849 +thisdocu 1 3 5.857933 5.857933 3336 +krafft 1 3 5.857933 5.857933 3638 +waysthat 1 2 6.263398 6.263398 5445 +tabular 1 2 6.263398 6.263398 4515 +informationag 1 2 6.263398 6.263398 5446 +hyperlink 1 2 6.263398 6.263398 5447 +oninform 1 2 6.263398 6.263398 4316 +projectinform 1 1 6.957497 6.957497 12451 +accessth 1 1 6.957497 6.957497 12452 +ofonlin 1 1 6.957497 6.957497 12453 +forhuman 1 1 6.957497 6.957497 12454 +hopcroft 1 1 6.957497 6.957497 12455 +davisin 1 1 6.957497 6.957497 12456 +researchextract 1 1 6.957497 6.957497 12457 +thestructur 1 1 6.957497 6.957497 12458 +extractinginform 1 1 6.957497 6.957497 12459 +collectionsof 1 1 6.957497 6.957497 12460 +nationwid 1 1 6.957497 6.957497 12461 +sciencetechn 1 1 6.957497 6.957497 12462 +moreaccess 1 1 6.957497 6.957497 12463 +toit 1 1 6.957497 6.957497 12464 +visitingscientist 1 1 6.957497 6.957497 12465 +jimdavi 1 1 6.957497 6.957497 12466 +jrdpublicationsjam 1 1 6.957497 6.957497 12467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html new file mode 100644 index 00000000..13cd5bfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^Info^Projects^zeno^zeno.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +research 2 431 0.693147 1.386294 10 +project 1 340 1.098612 1.098612 18 +cornel 1 215 1.386294 1.386294 23 +softwar 1 220 1.386294 1.386294 30 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +develop 1 174 1.791759 1.791759 53 +peopl 1 96 2.302585 2.302585 132 +multimedia 2 68 2.708050 5.416100 258 +direct 1 57 2.890372 2.890372 316 +mission 1 5 5.347108 5.347108 2465 +zeno 1 3 5.857933 5.857933 3580 +potpourri 1 2 6.263398 6.263398 4547 +groupzeno 1 1 6.957497 6.957497 12565 +curricula 1 1 6.957497 6.957497 12566 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html new file mode 100644 index 00000000..9e9df506 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.cornell.edu^cgi-bin^ssis^Info^People^kmai^kmai.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 3 380 0.693147 2.079441 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +control 1 82 2.484907 2.484907 164 +stuff 1 87 2.484907 2.484907 171 +come 2 78 2.564949 5.129898 202 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +degre 2 69 2.708050 5.416100 259 +receiv 1 66 2.708050 2.708050 244 +ithaca 1 65 2.772589 2.772589 294 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +januari 1 62 2.772589 2.772589 264 +back 2 60 2.833213 5.666426 297 +date 1 51 2.995732 2.995732 344 +still 1 50 3.044522 3.044522 362 +california 1 46 3.091042 3.091042 388 +video 2 44 3.135494 6.270988 405 +long 1 43 3.178054 3.178054 413 +york 1 41 3.218876 3.218876 435 +vision 1 41 3.218876 3.218876 430 +compani 1 41 3.218876 3.218876 423 +robot 1 36 3.367296 3.367296 497 +game 1 36 3.367296 3.367296 498 +actual 1 28 3.610918 3.610918 604 +quit 1 27 3.637586 3.637586 633 +challeng 1 26 3.688879 3.688879 653 +hill 1 25 3.737670 3.737670 670 +frame 1 24 3.761200 3.761200 684 +motion 1 24 3.761200 3.761200 699 +viewer 1 21 3.912023 3.912023 787 +leav 1 21 3.912023 3.912023 772 +martin 1 21 3.912023 3.912023 794 +left 1 19 4.007333 4.007333 851 +miss 1 19 4.007333 4.007333 866 +wind 1 18 4.060443 4.060443 908 +bachelor 1 17 4.110874 4.110874 957 +segment 1 17 4.110874 4.110874 931 +jose 1 16 4.174387 4.174387 976 +month 1 15 4.248495 4.248495 1025 +mayb 1 15 4.248495 4.248495 1014 +enough 1 15 4.248495 4.248495 1040 +anywai 1 15 4.248495 4.248495 1047 +decid 1 14 4.317488 4.317488 1075 +land 1 12 4.465908 4.465908 1273 +meng 1 12 4.465908 4.465908 1214 +pagewelcom 1 11 4.553877 4.553877 1344 +see 1 11 4.553877 4.553877 1337 +island 1 11 4.553877 4.553877 1345 +santa 2 10 4.653960 9.307920 1441 +town 1 10 4.653960 4.653960 1458 +babylon 1 8 4.875197 4.875197 1731 +ramin 1 7 5.010635 5.010635 1820 +rain 2 6 5.164786 10.329572 2137 +snow 1 6 5.164786 5.164786 2031 +zabih 1 6 5.164786 5.164786 2138 +corp 1 6 5.164786 5.164786 2139 +green 1 4 5.568345 5.568345 2848 +barbara 2 3 5.857933 11.715866 3380 +csrvl 1 3 5.857933 5.857933 3543 +binghamton 1 3 5.857933 5.857933 3544 +season 1 2 6.263398 6.263398 4872 +syosset 1 1 6.957497 6.957497 9497 +californialockhe 1 1 6.957497 6.957497 9498 +yorkaltera 1 1 6.957497 6.957497 9499 +californiafun 1 1 6.957497 6.957497 9500 +domainvth 1 1 6.957497 6.957497 9501 +siteoth 1 1 6.957497 6.957497 9502 +worldcareermosaictop 1 1 6.957497 6.957497 9503 +kmai 1 1 6.957497 6.957497 9504 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^ new file mode 100644 index 00000000..5ce0f52c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +link 1 247 1.386294 1.386294 24 +public 2 202 1.609438 3.218876 43 +group 1 183 1.609438 1.609438 36 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +relat 1 139 1.945910 1.945910 68 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +peopl 2 96 2.302585 4.605170 132 +academ 1 82 2.484907 2.484907 178 +requir 1 81 2.484907 2.484907 167 +descript 1 64 2.772589 2.772589 271 +organ 1 65 2.772589 2.772589 265 +faculti 2 56 2.890372 5.780744 325 +overview 1 56 2.890372 2.890372 323 +talk 1 53 2.944439 2.944439 336 +visitor 1 49 3.044522 3.044522 371 +seminar 1 38 3.295837 3.295837 470 +staff 1 36 3.367296 3.367296 490 +utc 2 27 3.637586 7.275172 629 +alumni 1 21 3.912023 3.912023 807 +facil 1 20 3.951244 3.951244 814 +event 1 18 4.060443 4.060443 896 +catalog 1 10 4.653960 4.653960 1431 +upcom 1 8 4.875197 4.875197 1685 +calendar 1 8 4.875197 4.875197 1649 +recruit 1 6 5.164786 5.164786 2145 +admiss 1 4 5.568345 5.568345 2704 +pagegener 1 1 6.957497 6.957497 12567 +schedulespag 1 1 6.957497 6.957497 12568 +directoryth 1 1 6.957497 6.957497 12569 +universitywww 1 1 6.957497 6.957497 12570 +informationgrip 1 1 6.957497 6.957497 12571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html new file mode 100644 index 00000000..7b7e9394 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^bledsoe.html @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +professor 1 137 1.945910 1.945910 76 +confer 2 126 2.079442 4.158884 100 +intern 4 108 2.197225 8.788900 128 +mathemat 3 108 2.197225 6.591675 123 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +journal 1 83 2.484907 2.484907 183 +learn 1 86 2.484907 2.484907 170 +exampl 1 77 2.564949 2.564949 195 +intellig 3 72 2.639057 7.917171 225 +servic 2 72 2.639057 5.278114 236 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +artifici 2 63 2.772589 5.545178 280 +previou 1 62 2.772589 2.772589 290 +automat 1 61 2.833213 2.833213 306 +index 1 56 2.890372 2.890372 309 +profession 1 51 2.995732 2.995732 345 +california 1 46 3.091042 3.091042 388 +third 1 43 3.178054 3.178054 412 +autom 2 41 3.218876 6.437752 434 +editor 1 41 3.218876 3.218876 433 +societi 1 40 3.258097 3.258097 456 +award 3 34 3.401197 10.203591 523 +next 1 34 3.401197 3.401197 517 +board 3 33 3.433987 10.301961 528 +profil 2 30 3.555348 7.110696 581 +chair 2 29 3.583519 7.167038 596 +focus 1 29 3.583519 3.583519 584 +american 1 27 3.637586 3.637586 634 +berkelei 1 26 3.688879 3.688879 657 +higher 1 24 3.761200 3.761200 690 +honor 1 23 3.806662 3.806662 729 +theorem 3 21 3.912023 11.736069 786 +prove 3 19 4.007333 12.021999 848 +citi 1 19 4.007333 4.007333 874 +researchmi 1 14 4.317488 4.317488 1119 +joint 2 13 4.382027 8.764054 1130 +lake 1 11 4.553877 4.553877 1373 +distinguish 1 11 4.553877 4.553877 1357 +utah 1 9 4.753590 4.753590 1585 +centenni 1 7 5.010635 5.010635 1967 +presid 1 6 5.164786 5.164786 2196 +heurist 1 6 5.164786 5.164786 2125 +emeritu 2 5 5.347108 10.694216 2544 +salt 1 5 5.347108 5.347108 2413 +analog 2 4 5.568345 11.136690 2875 +bledso 1 4 5.568345 5.568345 2999 +presentarea 1 4 5.568345 5.568345 3026 +artificialintellig 3 3 5.857933 17.573799 3608 +truste 2 3 5.857933 11.715866 3900 +alsointerest 1 3 5.857933 5.857933 3813 +donnel 1 2 6.263398 6.263398 5233 +mileston 1 2 6.263398 6.263398 4416 +jointconfer 1 2 6.263398 6.263398 5030 +woodrow 2 1 6.957497 13.914994 14287 +bledsoepet 1 1 6.957497 6.957497 14288 +americanmathemat 1 1 6.957497 6.957497 14289 +onartifici 1 1 6.957497 6.957497 14290 +interestautomat 1 1 6.957497 6.957497 14291 +theoremproof 1 1 6.957497 6.957497 14292 +levelplan 1 1 6.957497 6.957497 14293 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html new file mode 100644 index 00000000..1a38b97a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1994^profiles^jwerth.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 4 297 1.098612 4.394448 20 +current 1 284 1.098612 1.098612 21 +softwar 4 220 1.386294 5.545176 30 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +oper 2 180 1.609438 3.218876 34 +parallel 7 169 1.791759 12.542313 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +lectur 2 135 1.945910 3.891820 73 +model 2 145 1.945910 3.891820 69 +process 2 142 1.945910 3.891820 72 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +confer 3 126 2.079442 6.238326 100 +compil 2 122 2.079442 4.158884 96 +schedul 2 119 2.079442 4.158884 85 +mathemat 3 108 2.197225 6.591675 123 +intern 3 108 2.197225 6.591675 128 +code 1 108 2.197225 2.197225 116 +technic 1 100 2.302585 2.302585 140 +proceed 3 93 2.397895 7.193685 152 +associ 2 93 2.397895 4.795790 151 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +educ 5 86 2.484907 12.424535 191 +ieee 2 86 2.484907 4.969814 190 +environ 2 84 2.484907 4.969814 177 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +august 2 66 2.708050 5.416100 257 +practic 1 70 2.708050 2.708050 246 +interact 1 62 2.772589 2.772589 270 +previou 1 62 2.772589 2.772589 290 +direct 2 57 2.890372 5.780744 316 +index 1 56 2.890372 2.890372 309 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +set 1 50 3.044522 3.044522 361 +york 1 41 3.218876 3.218876 435 +societi 1 40 3.258097 3.258097 456 +multipl 1 39 3.258097 3.258097 453 +formal 1 37 3.332205 3.332205 478 +committe 1 34 3.401197 3.401197 522 +concurr 1 34 3.401197 3.401197 501 +next 1 34 3.401197 3.401197 517 +board 3 33 3.433987 10.301961 528 +john 2 33 3.433987 6.867974 532 +scientist 1 31 3.496508 3.496508 560 +profil 2 30 3.555348 7.110696 581 +computersci 1 30 3.555348 3.555348 562 +chair 3 29 3.583519 10.750557 596 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +debug 1 17 4.110874 4.110874 944 +brown 4 16 4.174387 16.697548 977 +researchmi 1 14 4.317488 4.317488 1119 +incomput 1 14 4.317488 4.317488 1096 +vice 1 9 4.753590 4.753590 1604 +unifi 1 8 4.875197 4.875197 1774 +newton 1 7 5.010635 5.010635 1824 +softwareengin 1 6 5.164786 5.164786 2162 +jain 3 5 5.347108 16.041324 2332 +parallelprogram 1 5 5.347108 5.347108 2379 +werth 7 4 5.568345 38.978415 3004 +andimplement 1 4 5.568345 5.568345 3029 +hyder 1 4 5.568345 5.568345 2772 +interestparallel 1 3 5.857933 5.857933 3806 +publicationss 1 2 6.263398 6.263398 5732 +thirteenth 1 2 6.263398 6.263398 5733 +werthsenior 1 1 6.957497 6.957497 14294 +emori 1 1 6.957497 6.957497 14295 +accredit 1 1 6.957497 6.957497 14296 +sobek 1 1 6.957497 6.957497 14297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html new file mode 100644 index 00000000..d18d69cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^adale.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +distribut 2 162 1.791759 3.583518 51 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +architectur 3 139 1.945910 5.837730 77 +area 2 144 1.945910 3.891820 80 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +databas 2 122 2.079442 4.158884 86 +manag 2 114 2.197225 4.394450 125 +person 1 111 2.197225 2.197225 117 +member 1 84 2.484907 2.484907 165 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +colleg 1 61 2.833213 2.833213 300 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +index 1 56 2.890372 2.890372 309 +algebra 1 45 3.135494 3.135494 394 +multi 1 36 3.367296 3.367296 493 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +strategi 1 25 3.737670 3.737670 682 +researchmi 1 14 4.317488 4.317488 1119 +fromindividu 1 12 4.465908 4.465908 1290 +oxford 1 6 5.164786 5.164786 2121 +regent 1 5 5.347108 5.347108 2551 +emeritu 1 5 5.347108 5.347108 2544 +england 1 5 5.347108 5.347108 2557 +alfr 2 4 5.568345 11.136690 2882 +dale 1 4 5.568345 5.568345 2687 +crow 1 3 5.857933 5.857933 3845 +trammel 1 2 6.263398 6.263398 5562 +andmap 1 2 6.263398 6.263398 4258 +daleno 1 1 6.957497 6.957497 14298 +exet 1 1 6.957497 6.957497 14299 +interestdatabas 1 1 6.957497 6.957497 14300 +stagei 1 1 6.957497 6.957497 14301 +studiedinclud 1 1 6.957497 6.957497 14302 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html new file mode 100644 index 00000000..e5fc0ddc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^brumfield.html @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 7 443 0.693147 4.852029 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +distribut 4 162 1.791759 7.167036 51 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +texa 1 160 1.791759 1.791759 64 +model 4 145 1.945910 7.783640 69 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +databas 2 122 2.079442 4.158884 86 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +mathemat 2 108 2.197225 4.394450 123 +teach 1 108 2.197225 2.197225 112 +intern 1 108 2.197225 2.197225 128 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +solut 1 82 2.484907 2.484907 162 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +orient 1 80 2.564949 2.564949 205 +decemb 1 80 2.564949 2.564949 215 +symposium 2 72 2.639057 5.278114 238 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +visual 1 48 3.044522 3.044522 372 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +respons 1 37 3.332205 3.332205 476 +award 2 34 3.401197 6.802394 523 +concurr 1 34 3.401197 3.401197 501 +obtain 1 33 3.433987 3.433987 534 +exist 1 30 3.555348 3.555348 569 +profil 1 30 3.555348 3.555348 581 +propos 1 28 3.610918 3.610918 602 +repres 1 26 3.688879 3.688879 656 +honor 1 23 3.806662 3.806662 729 +excel 1 19 4.007333 4.007333 868 +miller 1 17 4.110874 4.110874 949 +role 1 14 4.317488 4.317488 1101 +fromindividu 1 12 4.465908 4.465908 1290 +queue 3 10 4.653960 13.961880 1386 +purdu 1 10 4.653960 4.653960 1466 +length 1 10 4.653960 4.653960 1400 +jeffrei 2 9 4.753590 9.507180 1612 +modula 1 9 4.753590 4.753590 1613 +researchi 1 8 4.875197 4.875197 1756 +inproceed 1 8 4.875197 4.875197 1670 +distributedsystem 1 6 5.164786 5.164786 2022 +ofparallel 1 5 5.347108 5.347108 2380 +bulletin 1 5 5.347108 5.347108 2343 +sigcs 2 4 5.568345 11.136690 2865 +throughput 1 4 5.568345 5.568345 2993 +richter 1 4 5.568345 5.568345 2957 +chou 1 4 5.568345 5.568345 3033 +georgia 1 3 5.857933 5.857933 3834 +publicationsj 1 3 5.857933 5.857933 3808 +shen 1 3 5.857933 5.857933 3370 +performanceof 1 2 6.263398 6.263398 4585 +modelingof 1 2 6.263398 6.263398 5734 +loui 1 2 6.263398 6.263398 5220 +brumfield 4 1 6.957497 27.829988 14303 +brumfieldsenior 1 1 6.957497 6.957497 14304 +interestperform 1 1 6.957497 6.957497 14305 +designersof 1 1 6.957497 6.957497 14306 +eachresourc 1 1 6.957497 6.957497 14307 +tasksawait 1 1 6.957497 6.957497 14308 +computationof 1 1 6.957497 6.957497 14309 +graf 1 1 6.957497 6.957497 14310 +verdi 1 1 6.957497 6.957497 14311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html new file mode 100644 index 00000000..b60b5d8a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^cline.html @@ -0,0 +1,114 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +softwar 5 220 1.386294 6.931470 30 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +professor 2 137 1.945910 3.891820 76 +problem 2 147 1.945910 3.891820 75 +area 1 144 1.945910 1.945910 80 +analysi 3 124 2.079442 6.238326 98 +number 2 130 2.079442 4.158884 97 +tool 1 117 2.079442 2.079442 93 +schedul 1 119 2.079442 2.079442 85 +mathemat 8 108 2.197225 17.577800 123 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +journal 4 83 2.484907 9.939628 183 +solut 1 82 2.484907 2.484907 162 +member 1 84 2.484907 2.484907 165 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +appli 2 71 2.639057 5.278114 226 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +special 1 56 2.890372 2.890372 320 +explor 1 58 2.890372 2.890372 324 +major 1 56 2.890372 2.890372 315 +scientif 4 53 2.944439 11.777756 341 +three 1 54 2.944439 2.944439 330 +profession 2 51 2.995732 5.991464 345 +particular 1 51 2.995732 2.995732 352 +numer 4 49 3.044522 12.178088 369 +editor 2 41 3.218876 6.437752 433 +transact 2 39 3.258097 6.516194 438 +statist 1 35 3.401197 3.401197 521 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +transform 1 32 3.465736 3.465736 542 +profil 1 30 3.555348 3.555348 581 +packag 1 28 3.610918 3.610918 614 +symbol 1 27 3.637586 3.637586 620 +methodolog 1 23 3.806662 3.806662 733 +director 2 22 3.850148 7.700296 767 +siam 4 21 3.912023 15.648092 800 +rout 1 21 3.912023 3.912023 793 +expert 1 20 3.951244 3.951244 833 +region 1 19 4.007333 4.007333 875 +north 1 19 4.007333 4.007333 873 +dimension 1 18 4.060443 4.060443 909 +estim 2 17 4.110874 8.221748 930 +matrix 1 17 4.110874 4.110874 933 +condit 2 16 4.174387 8.348774 975 +alan 2 13 4.382027 8.764054 1146 +emploi 1 12 4.465908 4.465908 1284 +fromindividu 1 12 4.465908 4.465908 1290 +michigan 1 11 4.553877 4.553877 1368 +node 1 11 4.553877 4.553877 1326 +editori 1 9 4.753590 4.753590 1611 +hundr 1 9 4.753590 4.753590 1528 +researchi 1 8 4.875197 4.875197 1756 +curv 1 8 4.875197 4.875197 1656 +coast 1 8 4.875197 4.875197 1746 +presenc 1 8 4.875197 4.875197 1671 +counter 1 8 4.875197 4.875197 1765 +centenni 1 7 5.010635 5.010635 1967 +constrain 2 6 5.164786 10.329572 2042 +southern 1 6 5.164786 5.164786 2191 +spline 1 6 5.164786 5.164786 2007 +fit 2 5 5.347108 10.694216 2285 +holland 1 5 5.347108 5.347108 2490 +triangul 2 4 5.568345 11.136690 2903 +closest 1 4 5.568345 5.568345 2828 +cline 6 3 5.857933 35.147598 3218 +interestmathemat 1 3 5.857933 5.857933 3860 +scatter 1 3 5.857933 5.857933 3351 +delaunai 1 3 5.857933 5.857933 3619 +imac 1 3 5.857933 5.857933 3718 +wilkinson 1 3 5.857933 5.857933 3579 +subprogram 1 2 6.263398 6.263398 5618 +andsurfac 1 2 6.263398 6.263398 5735 +publicationsr 1 2 6.263398 6.263398 5736 +king 1 2 6.263398 6.263398 5737 +meyer 1 2 6.263398 6.263398 4728 +guard 1 2 6.263398 6.263398 5738 +tender 1 2 6.263398 6.263398 5397 +stewart 1 2 6.263398 6.263398 5739 +renka 2 1 6.957497 13.914994 14312 +clinedavid 1 1 6.957497 6.957497 14313 +bruton 1 1 6.957497 6.957497 14314 +statisticalcomput 1 1 6.957497 6.957497 14315 +socialrespons 1 1 6.957497 6.957497 14316 +whichcan 1 1 6.957497 6.957497 14317 +constructionof 1 1 6.957497 6.957497 14318 +formathemat 1 1 6.957497 6.957497 14319 +developmentha 1 1 6.957497 6.957497 14320 +tension 1 1 6.957497 6.957497 14321 +buoi 1 1 6.957497 6.957497 14322 +barrier 1 1 6.957497 6.957497 14323 +moler 1 1 6.957497 6.957497 14324 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html new file mode 100644 index 00000000..10827fb5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^dijkstra.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +list 1 201 1.609438 1.609438 39 +algorithm 1 162 1.791759 1.791759 57 +area 1 144 1.945910 1.945910 80 +mathemat 3 108 2.197225 6.591675 123 +memori 1 101 2.302585 2.302585 139 +techniqu 1 99 2.302585 2.302585 138 +member 2 84 2.484907 4.969814 165 +good 1 77 2.564949 2.564949 200 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +reason 1 57 2.890372 2.890372 318 +particular 1 51 2.995732 2.995732 352 +physic 2 47 3.091042 6.182084 377 +theoret 1 39 3.258097 3.258097 446 +correct 1 38 3.295837 3.295837 462 +award 2 34 3.401197 6.802394 523 +obtain 1 33 3.433987 3.433987 534 +power 1 30 3.555348 3.555348 573 +profil 1 30 3.555348 3.555348 581 +art 2 29 3.583519 7.167038 593 +chair 1 29 3.583519 3.583519 596 +focus 1 29 3.583519 3.583519 584 +american 1 27 3.637586 3.637586 634 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +methodolog 1 23 3.806662 3.806662 733 +increas 1 20 3.951244 3.951244 829 +fromindividu 1 12 4.465908 4.465908 1290 +academi 2 8 4.875197 9.750394 1735 +centenni 1 7 5.010635 5.010635 1967 +foreign 1 7 5.010635 5.010635 1919 +ture 1 6 5.164786 5.164786 1997 +british 1 5 5.347108 5.347108 2546 +harri 1 4 5.568345 5.568345 3034 +queen 1 4 5.568345 5.568345 2919 +dijkstra 1 3 5.857933 5.857933 3173 +netherland 1 3 5.857933 5.857933 3650 +streamlin 1 3 5.857933 5.857933 3573 +edsger 2 2 6.263398 12.526796 5740 +honorari 1 2 6.263398 6.263398 5741 +sciencesmemb 1 2 6.263398 6.263398 5742 +royal 1 2 6.263398 6.263398 4756 +wybe 2 1 6.957497 13.914994 14325 +dijkstraschlumberg 1 1 6.957497 6.957497 14326 +sciencesprofessor 1 1 6.957497 6.957497 14327 +mathematicskandidaatsexamen 1 1 6.957497 6.957497 14328 +doctora 1 1 6.957497 6.957497 14329 +examen 1 1 6.957497 6.957497 14330 +leydenph 1 1 6.957497 6.957497 14331 +amsterdamhonor 1 1 6.957497 6.957497 14332 +awardsacm 1 1 6.957497 6.957497 14333 +sciencesdistinguish 1 1 6.957497 6.957497 14334 +societyafip 1 1 6.957497 6.957497 14335 +honori 1 1 6.957497 6.957497 14336 +causa 1 1 6.957497 6.957497 14337 +belfastarea 1 1 6.957497 6.957497 14338 +systemssummari 1 1 6.957497 6.957497 14339 +argumentso 1 1 6.957497 6.957497 14340 +ofform 1 1 6.957497 6.957497 14341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html new file mode 100644 index 00000000..97e0aa47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^edmondson.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +network 2 168 1.791759 3.583518 61 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +databas 1 122 2.079442 2.079442 86 +high 1 130 2.079442 2.079442 101 +mathemat 1 108 2.197225 2.197225 123 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +theori 1 111 2.197225 2.197225 127 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +educ 1 86 2.484907 2.484907 191 +larg 1 82 2.484907 2.484907 168 +internet 1 83 2.484907 2.484907 186 +member 1 84 2.484907 2.484907 165 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +profession 1 51 2.995732 2.995732 345 +protocol 2 45 3.135494 6.270988 407 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +mobil 2 23 3.806662 7.613324 730 +speed 1 18 4.060443 4.060443 911 +researchmi 1 14 4.317488 4.317488 1119 +fromindividu 1 12 4.465908 4.465908 1290 +chri 2 11 4.553877 9.107754 1311 +secretari 1 8 4.875197 4.875197 1775 +inproceed 1 8 4.875197 4.875197 1670 +sigcomm 1 5 5.347108 5.347108 2329 +gouda 1 4 5.568345 5.568345 3021 +treasur 1 3 5.857933 5.857933 3229 +andm 1 3 5.857933 5.857933 3901 +edmondson 3 2 6.263398 18.790194 4182 +yurkanan 2 2 6.263398 12.526796 4175 +interestcomput 1 2 6.263398 6.263398 5743 +yurkananlectur 1 1 6.957497 6.957497 14342 +internetwork 1 1 6.957497 6.957497 14343 +cobb 1 1 6.957497 6.957497 14344 +informaticsconfer 1 1 6.957497 6.957497 14345 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html new file mode 100644 index 00000000..617586fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^gallagher.html @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +list 1 201 1.609438 1.609438 39 +area 3 144 1.945910 5.837730 80 +process 1 142 1.945910 1.945910 72 +confer 3 126 2.079442 6.238326 100 +techniqu 1 99 2.302585 2.302585 138 +educ 2 86 2.484907 4.969814 191 +academ 1 82 2.484907 2.484907 178 +librari 1 87 2.484907 2.484907 181 +school 1 84 2.484907 2.484907 188 +member 1 84 2.484907 2.484907 165 +servic 2 72 2.639057 5.278114 236 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +improv 1 62 2.772589 2.772589 289 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +local 1 55 2.944439 2.944439 334 +profession 1 51 2.995732 2.995732 345 +committe 3 34 3.401197 10.203591 522 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +retriev 1 27 3.637586 3.637586 621 +women 1 16 4.174387 4.174387 1004 +researchmi 1 14 4.317488 4.317488 1119 +coordin 1 13 4.382027 4.382027 1182 +minor 1 12 4.465908 4.465908 1237 +fromindividu 1 12 4.465908 4.465908 1290 +secondari 2 7 5.010635 10.021270 1884 +recruit 1 6 5.164786 5.164786 2145 +sigcs 1 4 5.568345 5.568345 2865 +louisiana 1 3 5.857933 5.857933 3902 +suzi 2 2 6.263398 12.526796 4288 +gallagh 1 2 6.263398 6.263398 4293 +southwestern 1 2 6.263398 6.263398 5744 +interestcomput 1 2 6.263398 6.263398 5743 +gallagherlectur 1 1 6.957497 6.957497 14346 +loyola 1 1 6.957497 6.957497 14347 +necc 1 1 6.957497 6.957497 14348 +andretent 1 1 6.957497 6.957497 14349 +scienceeduc 1 1 6.957497 6.957497 14350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html new file mode 100644 index 00000000..f80f3b28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^jenevein.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 4 443 0.693147 2.772588 6 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +design 1 213 1.386294 1.386294 25 +list 1 201 1.609438 1.609438 39 +network 3 168 1.791759 5.375277 61 +parallel 2 169 1.791759 3.583518 60 +recent 2 167 1.791759 3.583518 58 +applic 2 170 1.791759 3.583518 56 +develop 1 174 1.791759 1.791759 53 +perform 3 143 1.945910 5.837730 74 +architectur 2 139 1.945910 3.891820 77 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +confer 2 126 2.079442 4.158884 100 +machin 1 129 2.079442 2.079442 95 +intern 2 108 2.197225 4.394450 128 +techniqu 1 99 2.302585 2.302585 138 +proceed 2 93 2.397895 4.795790 152 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +ieee 2 86 2.484907 4.969814 190 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +integr 1 67 2.708050 2.708050 245 +guid 1 63 2.772589 2.772589 267 +januari 1 62 2.772589 2.772589 264 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +special 1 56 2.890372 2.890372 320 +processor 3 54 2.944439 8.833317 335 +press 1 42 3.218876 3.218876 419 +transact 2 39 3.258097 6.516194 438 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +toler 3 33 3.433987 10.301961 533 +obtain 1 33 3.433987 3.433987 534 +fault 4 32 3.465736 13.862944 547 +profil 1 30 3.555348 3.555348 581 +focus 1 29 3.583519 3.583519 584 +scale 3 28 3.610918 10.832754 613 +measur 3 28 3.610918 10.832754 609 +multiprocessor 1 28 3.610918 3.610918 605 +repres 1 26 3.688879 3.688879 656 +strategi 1 25 3.737670 3.737670 682 +reliabl 1 25 3.737670 3.737670 674 +methodolog 2 23 3.806662 7.613324 733 +lead 1 23 3.806662 3.806662 718 +properti 1 22 3.850148 3.850148 749 +scheme 1 20 3.951244 3.951244 818 +qualiti 1 20 3.951244 3.951244 832 +failur 1 18 4.060443 4.060443 898 +appropri 1 18 4.060443 4.060443 883 +interconnect 6 17 4.110874 24.665244 937 +across 1 16 4.174387 4.174387 974 +precis 1 15 4.248495 4.248495 1023 +senior 1 14 4.317488 4.317488 1120 +researchmi 1 14 4.317488 4.317488 1119 +johnson 1 13 4.382027 4.382027 1162 +optic 5 12 4.465908 22.329540 1221 +fromindividu 1 12 4.465908 4.465908 1290 +abil 1 11 4.553877 4.553877 1341 +impact 1 11 4.553877 4.553877 1334 +success 1 10 4.653960 4.653960 1390 +devis 1 10 4.653960 4.653960 1451 +true 1 10 4.653960 4.653960 1422 +contrast 1 8 4.875197 4.875197 1637 +multicomput 1 7 5.010635 5.010635 1890 +predic 1 7 5.010635 5.010635 1806 +nest 2 6 5.164786 10.329572 2151 +chemistri 2 5 5.347108 10.694216 2405 +orlean 1 5 5.347108 5.347108 2550 +buss 1 4 5.568345 5.568345 2649 +louisiana 1 3 5.857933 5.857933 3902 +thedevelop 1 3 5.857933 5.857933 3903 +wave 1 3 5.857933 5.857933 3518 +campbel 1 3 5.857933 5.857933 3272 +laser 1 2 6.263398 6.263398 4747 +beinginvestig 1 2 6.263398 6.263398 5745 +parallelsystem 1 2 6.263398 6.263398 5746 +publicationsr 1 2 6.263398 6.263398 5736 +jenevein 7 1 6.957497 48.702479 14351 +wafer 4 1 6.957497 27.829988 14352 +menez 2 1 6.957497 13.914994 14353 +malek 2 1 6.957497 13.914994 14354 +interestinterconnect 1 1 6.957497 6.957497 14355 +interconnectionnetwork 1 1 6.957497 6.957497 14356 +restsin 1 1 6.957497 6.957497 14357 +interconnectionstructur 1 1 6.957497 6.957497 14358 +kindof 1 1 6.957497 6.957497 14359 +beingappli 1 1 6.957497 6.957497 14360 +communicationswitch 1 1 6.957497 6.957497 14361 +iscontinu 1 1 6.957497 6.957497 14362 +performanceport 1 1 6.957497 6.957497 14363 +tobenchmark 1 1 6.957497 6.957497 14364 +memorysystem 1 1 6.957497 6.957497 14365 +kyklo 1 1 6.957497 6.957497 14366 +laranjeira 1 1 6.957497 6.957497 14367 +ullah 1 1 6.957497 6.957497 14368 +metrix 1 1 6.957497 6.957497 14369 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html new file mode 100644 index 00000000..41c74320 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^martin.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +applic 1 170 1.791759 1.791759 56 +architectur 2 139 1.945910 3.891820 77 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +structur 1 106 2.197225 2.197225 105 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +center 1 88 2.397895 2.397895 158 +activ 1 84 2.484907 2.484907 182 +member 1 84 2.484907 2.484907 165 +complet 1 77 2.564949 2.564949 208 +logic 6 71 2.639057 15.834342 230 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +back 1 60 2.833213 2.833213 297 +space 2 57 2.890372 5.780744 310 +faculti 2 56 2.890372 5.780744 325 +california 1 46 3.091042 3.091042 388 +especi 1 36 3.367296 3.367296 496 +obtain 1 33 3.433987 3.433987 534 +profil 1 30 3.555348 3.555348 581 +valu 1 25 3.737670 3.737670 665 +martin 1 21 3.912023 3.912023 794 +concentr 1 18 4.060443 4.060443 906 +track 1 15 4.248495 4.248495 1029 +researchmi 1 14 4.317488 4.317488 1119 +classic 1 14 4.317488 4.317488 1084 +philosophi 2 13 4.382027 8.764054 1167 +deduct 1 12 4.465908 4.465908 1236 +scan 1 12 4.465908 4.465908 1243 +fromindividu 1 12 4.465908 4.465908 1290 +closur 2 8 4.875197 9.750394 1643 +angel 1 8 4.875197 4.875197 1779 +notion 1 7 5.010635 5.010635 1947 +vehicl 1 7 5.010635 5.010635 1928 +delai 1 7 5.010635 5.010635 1848 +chicago 1 6 5.164786 5.164786 2149 +emeritu 2 5 5.347108 10.694216 2544 +interestmathemat 1 3 5.857933 5.857933 3860 +andon 1 3 5.857933 5.857933 3115 +metatheori 1 3 5.857933 5.857933 3642 +norman 2 1 6.957497 13.914994 14370 +martinprofessor 1 1 6.957497 6.957497 14371 +ofphilosophi 1 1 6.957497 6.957497 14372 +asinterpret 1 1 6.957497 6.957497 14373 +whichexploit 1 1 6.957497 6.957497 14374 +intension 1 1 6.957497 6.957497 14375 +significantearli 1 1 6.957497 6.957497 14376 +missil 1 1 6.957497 6.957497 14377 +trackingalgorithm 1 1 6.957497 6.957497 14378 +radar 1 1 6.957497 6.957497 14379 +inmani 1 1 6.957497 6.957497 14380 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html new file mode 100644 index 00000000..0c11f100 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^mok.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 18 443 0.693147 12.476646 6 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +time 20 293 1.098612 21.972240 17 +engin 5 297 1.098612 5.493060 20 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +softwar 5 220 1.386294 6.931470 30 +design 3 213 1.386294 4.158882 25 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +develop 3 174 1.791759 5.375277 53 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +tool 2 117 2.079442 4.158884 93 +confer 2 126 2.079442 4.158884 100 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +analysi 1 124 2.079442 2.079442 98 +intern 6 108 2.197225 13.183350 128 +specif 2 106 2.197225 4.394450 106 +technic 2 100 2.302585 4.605170 140 +techniqu 1 99 2.302585 2.302585 138 +real 14 93 2.397895 33.570530 144 +proceed 4 93 2.397895 9.591580 152 +present 3 91 2.397895 7.193685 145 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +ieee 3 86 2.484907 7.454721 190 +control 3 82 2.484907 7.454721 164 +journal 2 83 2.484907 4.969814 183 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +academ 1 82 2.484907 2.484907 178 +member 1 84 2.484907 2.484907 165 +method 3 80 2.564949 7.694847 213 +decemb 2 80 2.564949 5.129898 215 +symposium 2 72 2.639057 5.278114 238 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +addit 1 74 2.639057 2.639057 228 +goal 1 66 2.708050 2.708050 250 +foundat 1 62 2.772589 2.772589 286 +automat 1 61 2.833213 2.833213 306 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +publish 1 57 2.890372 2.890372 326 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +understand 1 47 3.091042 3.091042 384 +adapt 1 46 3.091042 3.091042 387 +mechan 1 43 3.178054 3.178054 416 +autom 3 41 3.218876 9.656628 434 +editor 1 41 3.218876 3.218876 433 +electr 1 38 3.295837 3.295837 461 +formal 5 37 3.332205 16.661025 478 +respons 2 37 3.332205 6.664410 476 +robot 1 36 3.367296 3.367296 497 +committe 3 34 3.401197 10.203591 522 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +obtain 1 33 3.433987 3.433987 534 +hard 1 30 3.555348 3.555348 563 +profil 1 30 3.555348 3.555348 581 +chair 2 29 3.583519 7.167038 596 +framework 1 28 3.610918 3.610918 606 +load 1 28 3.610918 3.610918 601 +constraint 1 26 3.688879 3.688879 636 +bound 1 26 3.688879 3.688879 659 +fundament 1 25 3.737670 3.737670 661 +primari 1 25 3.737670 3.737670 669 +concern 1 25 3.737670 3.737670 666 +toward 1 25 3.737670 3.737670 668 +fellow 1 24 3.761200 3.761200 701 +highli 1 23 3.806662 3.806662 725 +wang 3 21 3.912023 11.736069 790 +fund 1 21 3.912023 3.912023 805 +synthesi 1 20 3.951244 3.951244 834 +expert 1 20 3.951244 3.951244 833 +aid 2 18 4.060443 8.120886 904 +critic 1 16 4.174387 4.174387 982 +taiwan 1 16 4.174387 4.174387 1006 +brown 1 16 4.174387 4.174387 977 +massachusett 1 14 4.317488 4.317488 1118 +conduct 1 14 4.317488 4.317488 1065 +nasa 1 13 4.382027 4.382027 1188 +robust 2 12 4.465908 8.931816 1271 +asynchron 1 12 4.465908 4.465908 1229 +fromindividu 1 12 4.465908 4.465908 1290 +editori 1 9 4.753590 4.753590 1611 +vice 1 9 4.753590 4.753590 1604 +researchi 1 8 4.875197 4.875197 1756 +fifth 1 7 5.010635 5.010635 1931 +montreal 1 7 5.010635 5.010635 1961 +kluwer 1 6 5.164786 5.164786 2143 +antonio 1 6 5.164786 5.164786 2186 +ofdistribut 1 5 5.347108 5.347108 2316 +emerson 1 5 5.347108 5.347108 2547 +adjust 1 5 5.347108 5.347108 2422 +orlean 1 5 5.347108 5.347108 2550 +presentarea 1 4 5.568345 5.568345 3026 +avion 1 4 5.568345 5.568345 3018 +melbourn 1 4 5.568345 5.568345 3035 +sigsoft 1 4 5.568345 5.568345 3036 +aloysiu 3 3 5.857933 17.573799 3829 +systemdesign 1 2 6.263398 6.263398 4297 +stringent 1 2 6.263398 6.263398 5523 +areasinclud 1 2 6.263398 6.263398 5747 +publicationsa 1 2 6.263398 6.263398 4885 +clement 1 2 6.263398 6.263398 5526 +tsou 1 2 6.263398 6.263398 5525 +mokassoci 1 1 6.957497 6.957497 14381 +professorfaculti 1 1 6.957497 6.957497 14382 +federationof 1 1 6.957497 6.957497 14383 +interestfault 1 1 6.957497 6.957497 14384 +includespecif 1 1 6.957497 6.957497 14385 +forguarante 1 1 6.957497 6.957497 14386 +thetrad 1 1 6.957497 6.957497 14387 +criticalsystem 1 1 6.957497 6.957497 14388 +theanalysi 1 1 6.957497 6.957497 14389 +industrialprocess 1 1 6.957497 6.957497 14390 +ofnav 1 1 6.957497 6.957497 14391 +forreal 1 1 6.957497 6.957497 14392 +tilborg 1 1 6.957497 6.957497 14393 +heitmey 1 1 6.957497 6.957497 14394 +labaw 1 1 6.957497 6.957497 14395 +aptl 1 1 6.957497 6.957497 14396 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html new file mode 100644 index 00000000..3446eca5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^richards.html @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 5 374 0.693147 3.465735 7 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +languag 2 227 1.386294 2.772588 26 +list 1 201 1.609438 1.609438 39 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +process 2 142 1.945910 3.891820 72 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +teach 1 108 2.197225 2.197225 112 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +state 1 76 2.564949 2.564949 207 +orient 1 80 2.564949 2.564949 205 +appli 1 71 2.639057 2.639057 226 +addit 1 74 2.639057 2.639057 228 +function 4 62 2.772589 11.090356 275 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +reason 1 57 2.890372 2.890372 318 +undergradu 1 54 2.944439 2.944439 338 +maintain 1 51 2.995732 2.995732 342 +physic 1 47 3.091042 3.091042 377 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +editor 1 41 3.218876 3.218876 433 +formal 1 37 3.332205 3.332205 478 +concurr 2 34 3.401197 6.802394 501 +obtain 1 33 3.433987 3.433987 534 +richard 2 31 3.496508 6.993016 559 +profil 1 30 3.555348 3.555348 581 +seri 1 24 3.761200 3.761200 708 +longer 1 20 3.951244 3.951244 816 +stand 1 18 4.060443 4.060443 891 +stanford 1 17 4.110874 4.110874 955 +weslei 1 16 4.174387 4.174387 983 +permit 1 16 4.174387 4.174387 962 +senior 1 14 4.317488 4.317488 1120 +addison 1 12 4.465908 4.465908 1230 +fromindividu 1 12 4.465908 4.465908 1290 +suitabl 1 9 4.753590 4.753590 1486 +harvard 1 7 5.010635 5.010635 1926 +iowa 1 7 5.010635 5.010635 1971 +implementationof 1 7 5.010635 5.010635 1813 +microcomput 1 3 5.857933 5.857933 3444 +hamilton 2 2 6.263398 12.526796 5719 +collegem 1 2 6.263398 6.263398 5563 +astronaut 1 2 6.263398 6.263398 5748 +universityph 1 2 6.263398 6.263398 5604 +lecturerb 1 1 6.957497 6.957497 14397 +aero 1 1 6.957497 6.957497 14398 +universityprofession 1 1 6.957497 6.957497 14399 +servicecoordin 1 1 6.957497 6.957497 14400 +vol 1 1 6.957497 6.957497 14401 +educationsummari 1 1 6.957497 6.957497 14402 +potentialfor 1 1 6.957497 6.957497 14403 +infal 1 1 6.957497 6.957497 14404 +sectionof 1 1 6.957497 6.957497 14405 +onfunct 1 1 6.957497 6.957497 14406 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html new file mode 100644 index 00000000..7941d6ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^silberschatz.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 10 443 0.693147 6.931470 6 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +gener 1 220 1.386294 1.386294 27 +paper 2 205 1.609438 3.218876 38 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +parallel 2 169 1.791759 3.583518 60 +data 2 170 1.791759 3.583518 49 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +read 1 154 1.791759 1.791759 47 +area 3 144 1.945910 5.837730 80 +process 2 142 1.945910 3.891820 72 +perform 2 143 1.945910 3.891820 74 +model 1 145 1.945910 1.945910 69 +databas 8 122 2.079442 16.635536 86 +confer 4 126 2.079442 8.317768 100 +high 2 130 2.079442 4.158884 101 +intern 4 108 2.197225 8.788900 128 +manag 2 114 2.197225 4.394450 125 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +ieee 4 86 2.484907 9.939628 190 +larg 3 82 2.484907 7.454721 168 +journal 1 83 2.484907 2.484907 183 +member 1 84 2.484907 2.484907 165 +server 1 76 2.564949 2.564949 204 +june 1 79 2.564949 2.564949 214 +nation 1 74 2.639057 2.639057 240 +intellig 1 72 2.639057 2.639057 225 +workshop 1 71 2.639057 2.639057 239 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +knowledg 3 67 2.708050 8.124150 243 +main 2 67 2.708050 5.416100 256 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +septemb 2 65 2.772589 5.545178 274 +foundat 1 62 2.772589 2.772589 286 +organ 1 65 2.772589 2.772589 265 +back 1 60 2.833213 2.833213 297 +faculti 2 56 2.890372 5.780744 325 +special 1 56 2.890372 2.890372 320 +processor 1 54 2.944439 2.944439 335 +profession 1 51 2.995732 2.995732 345 +principl 1 48 3.044522 3.044522 357 +continu 2 39 3.258097 6.516194 448 +transact 2 39 3.258097 6.516194 438 +societi 1 40 3.258097 3.258097 456 +map 1 39 3.258097 3.258097 452 +movi 1 40 3.258097 3.258097 459 +cost 1 37 3.332205 3.332205 480 +robot 1 36 3.367296 3.367296 497 +multi 1 36 3.367296 3.367296 493 +award 2 34 3.401197 6.802394 523 +committe 1 34 3.401197 3.401197 522 +concurr 1 34 3.401197 3.401197 501 +obtain 1 33 3.433987 3.433987 534 +storag 2 31 3.496508 6.993016 553 +profil 1 30 3.555348 3.555348 581 +chair 2 29 3.583519 7.167038 596 +framework 1 28 3.610918 3.610918 606 +retriev 1 27 3.637586 3.637586 621 +divis 1 21 3.912023 3.912023 803 +media 2 19 4.007333 8.014666 861 +sigmod 1 19 4.007333 4.007333 877 +concentr 1 18 4.060443 4.060443 906 +demand 1 14 4.317488 4.317488 1073 +resolut 1 13 4.382027 4.382027 1172 +fromindividu 1 12 4.465908 4.465908 1290 +invit 1 10 4.653960 4.653960 1428 +conferenceon 1 9 4.753590 4.753590 1595 +databasesystem 1 8 4.875197 4.875197 1617 +silberschatz 6 6 5.164786 30.988716 1978 +outstand 1 6 5.164786 5.164786 2136 +advisori 1 6 5.164786 5.164786 2148 +sigact 1 6 5.164786 5.164786 2212 +symposiumon 1 6 5.164786 5.164786 2054 +distributedsystem 1 6 5.164786 5.164786 2022 +internationalconfer 1 6 5.164786 5.164786 2051 +seventh 1 5 5.347108 5.347108 2464 +multiresolut 1 5 5.347108 5.347108 2423 +fussel 1 5 5.347108 5.347108 2300 +abraham 2 4 5.568345 11.136690 2644 +ullman 1 4 5.568345 5.568345 2749 +stoni 1 3 5.857933 5.857933 3571 +sudarshan 1 3 5.857933 5.857933 3885 +ozden 2 2 6.263398 12.526796 5749 +eighth 1 2 6.263398 6.263398 5750 +publicationss 1 2 6.263398 6.263398 5732 +knowledgeand 1 2 6.263398 6.263398 4366 +onveri 1 2 6.263398 6.263398 4367 +rastogi 3 1 6.957497 20.872491 14407 +pod 2 1 6.957497 13.914994 14408 +silberschatzprofessorship 1 1 6.957497 6.957497 14409 +sciencesm 1 1 6.957497 6.957497 14410 +brookhonor 1 1 6.957497 6.957497 14411 +serviceiee 1 1 6.957497 6.957497 14412 +futureof 1 1 6.957497 6.957497 14413 +basedsystemssummari 1 1 6.957497 6.957497 14414 +recentresearch 1 1 6.957497 6.957497 14415 +multidatabas 1 1 6.957497 6.957497 14416 +transactionmanag 1 1 6.957497 6.957497 14417 +ganguli 1 1 6.957497 6.957497 14418 +tsur 1 1 6.957497 6.957497 14419 +datalog 1 1 6.957497 6.957497 14420 +programexecut 1 1 6.957497 6.957497 14421 +jagadish 1 1 6.957497 6.957497 14422 +lieuwen 1 1 6.957497 6.957497 14423 +dali 1 1 6.957497 6.957497 14424 +biliri 1 1 6.957497 6.957497 14425 +storageserv 1 1 6.957497 6.957497 14426 +storageand 1 1 6.957497 6.957497 14427 +relationaldata 1 1 6.957497 6.957497 14428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html new file mode 100644 index 00000000..222460d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^UTCS^report^1995^profiles^simmons.html @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +list 1 201 1.609438 1.609438 39 +professor 1 137 1.945910 1.945910 76 +novemb 1 81 2.484907 2.484907 179 +back 1 60 2.833213 2.833213 297 +faculti 1 56 2.890372 2.890372 325 +robert 2 30 3.555348 7.110696 567 +profil 1 30 3.555348 3.555348 581 +rememb 1 12 4.465908 4.465908 1217 +centenni 1 7 5.010635 5.010635 1967 +emeritu 1 5 5.347108 5.347108 2544 +bledso 1 4 5.568345 5.568345 2999 +simmon 2 2 6.263398 12.526796 5460 +simmonsquinci 1 1 6.957497 6.957497 14429 +professoremeritu 1 1 6.957497 6.957497 14430 +psychologymai 1 1 6.957497 6.957497 14431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ new file mode 100644 index 00000000..945c5f8c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^adams^ @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +email 2 220 1.386294 2.772588 29 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +austin 1 168 1.791759 1.791759 63 +hour 1 165 1.791759 1.791759 46 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +read 1 154 1.791759 1.791759 47 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +check 1 115 2.197225 2.197225 118 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +new 1 64 2.772589 2.772589 262 +type 1 61 2.833213 2.833213 296 +thesi 1 57 2.890372 2.890372 327 +think 1 57 2.890372 2.890372 314 +undergradu 1 54 2.944439 2.944439 338 +could 1 46 3.091042 3.091042 383 +semant 1 29 3.583519 3.583519 587 +progress 1 28 3.610918 3.610918 598 +rule 1 26 3.688879 3.688879 638 +altern 1 26 3.688879 3.688879 641 +yahoo 1 24 3.761200 3.761200 707 +log 1 19 4.007333 4.007333 857 +adam 2 17 4.110874 8.221748 934 +happi 1 14 4.317488 4.317488 1079 +touch 1 12 4.465908 4.465908 1288 +claim 1 8 4.875197 4.875197 1664 +core 1 7 5.010635 5.010635 1809 +gzip 2 6 5.164786 10.329572 2117 +gradual 1 4 5.568345 5.568345 2997 +vrml 1 4 5.568345 5.568345 2949 +aweekli 1 3 5.857933 5.857933 3312 +fame 1 3 5.857933 5.857933 3793 +knowwhat 1 2 6.263398 6.263398 5456 +junki 1 2 6.263398 6.263398 5457 +nando 1 2 6.263398 6.263398 5458 +seligman 2 1 6.957497 13.914994 12572 +pageadam 1 1 6.957497 6.957497 12573 +specifiedth 1 1 6.957497 6.957497 12574 +fileor 1 1 6.957497 6.957497 12575 +pagemart 1 1 6.957497 6.957497 12576 +fromreut 1 1 6.957497 6.957497 12577 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ new file mode 100644 index 00000000..8d7759bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^agapito^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 4 160 1.791759 7.167036 64 +austin 3 168 1.791759 5.375277 63 +machin 1 129 2.079442 2.079442 95 +specif 1 106 2.197225 2.197225 106 +learn 1 86 2.484907 2.484907 170 +colleg 1 61 2.833213 2.833213 300 +california 1 46 3.091042 3.091042 388 +eduphon 1 15 4.248495 4.248495 1060 +hopefulli 1 14 4.317488 4.317488 1071 +station 1 13 4.382027 4.382027 1157 +acquisit 1 10 4.653960 4.653960 1465 +santa 1 10 4.653960 4.653960 1441 +commonsens 1 4 5.568345 5.568345 2998 +barbara 1 3 5.857933 5.857933 3380 +chill 1 2 6.263398 6.263398 4244 +agapito 3 1 6.957497 20.872491 12578 +sustaita 2 1 6.957497 13.914994 12579 +austincognit 1 1 6.957497 6.957497 12580 +connection 1 1 6.957497 6.957497 12581 +reasoningschoolingph 1 1 6.957497 6.957497 12582 +miscellaneouspost 1 1 6.957497 6.957497 12583 +addressth 1 1 6.957497 6.957497 12584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ new file mode 100644 index 00000000..ed428215 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ai-lab^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +base 2 165 1.791759 3.583518 50 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +report 4 131 2.079442 8.317768 92 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +world 1 115 2.197225 2.197225 126 +technic 3 100 2.302585 6.907755 140 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +intellig 3 72 2.639057 7.917171 225 +nation 1 74 2.639057 2.639057 240 +logic 1 71 2.639057 2.639057 230 +solv 1 73 2.639057 2.639057 234 +knowledg 1 67 2.708050 2.708050 243 +artifici 3 63 2.772589 8.317767 280 +laboratori 1 63 2.772589 2.772589 292 +new 1 64 2.772589 2.772589 262 +foundat 1 62 2.772589 2.772589 286 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +faculti 3 56 2.890372 8.671116 325 +reason 2 57 2.890372 5.780744 318 +physic 1 47 3.091042 3.091042 377 +directori 1 45 3.135494 3.135494 396 +autom 3 41 3.218876 9.656628 434 +close 1 38 3.295837 3.295837 465 +multi 1 36 3.367296 3.367296 493 +robert 2 30 3.555348 7.110696 567 +neural 1 30 3.555348 3.555348 578 +rule 1 26 3.688879 3.688879 638 +lab 1 24 3.761200 3.761200 698 +theorem 3 21 3.912023 11.736069 786 +fund 1 21 3.912023 3.912023 805 +prove 3 19 4.007333 12.021999 848 +histori 1 19 4.007333 4.007333 853 +excel 1 19 4.007333 4.007333 868 +atth 1 15 4.248495 4.248495 1019 +action 1 15 4.248495 4.248495 1038 +rank 1 14 4.317488 4.317488 1063 +bruce 1 12 4.465908 4.465908 1226 +distinguish 1 11 4.553877 4.553877 1357 +benjamin 1 11 4.553877 4.553877 1296 +qualit 1 11 4.553877 4.553877 1362 +vladimir 1 11 4.553877 4.553877 1324 +peter 1 11 4.553877 4.553877 1316 +novak 3 9 4.753590 14.260770 1521 +moonei 2 9 4.753590 9.507180 1520 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 2 8 4.875197 9.750394 1667 +aaai 1 8 4.875197 4.875197 1750 +presidenti 1 8 4.875197 4.875197 1737 +postdoc 1 8 4.875197 4.875197 1724 +philosoph 1 7 5.010635 5.010635 1904 +boyer 1 6 5.164786 5.164786 2013 +gordon 1 6 5.164786 5.164786 2032 +dream 1 6 5.164786 5.164786 2165 +lifschitz 1 5 5.347108 5.347108 2542 +mirank 1 5 5.347108 5.347108 2543 +porter 1 5 5.347108 5.347108 2293 +emeritu 1 5 5.347108 5.347108 2544 +bledso 1 4 5.568345 5.568345 2999 +clark 1 4 5.568345 5.568345 2705 +kuiper 2 3 5.857933 11.715866 3794 +souther 1 3 5.857933 5.857933 3795 +woodi 1 2 6.263398 6.263398 5459 +simmon 1 2 6.263398 6.263398 5460 +laboratoryut 1 1 6.957497 6.957497 12585 +laboratoryth 1 1 6.957497 6.957497 12586 +austinha 1 1 6.957497 6.957497 12587 +andgradu 1 1 6.957497 6.957497 12588 +causei 1 1 6.957497 6.957497 12589 +deceas 1 1 6.957497 6.957497 12590 +memoriam 1 1 6.957497 6.957497 12591 +porterpoint 1 1 6.957497 6.957497 12592 +agenciescontact 1 1 6.957497 6.957497 12593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ new file mode 100644 index 00000000..96e3ba6e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ajohn^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +parallel 3 169 1.791759 5.375277 60 +austin 3 168 1.791759 5.375277 63 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +want 1 79 2.564949 2.564949 199 +main 1 67 2.708050 2.708050 256 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +advisor 1 51 2.995732 2.995732 355 +john 1 33 3.433987 3.433987 532 +postal 1 30 3.555348 3.555348 580 +constraint 1 26 3.688879 3.688879 636 +translat 1 13 4.382027 4.382027 1164 +usavoic 1 13 4.382027 4.382027 1198 +hello 1 10 4.653960 4.653960 1407 +candid 1 9 4.753590 4.753590 1606 +routin 1 9 4.753590 4.753590 1549 +ajita 1 2 6.263398 6.263398 5461 +papersmi 1 2 6.263398 6.263398 5462 +johnajita 1 1 6.957497 6.957497 12594 +programmingframework 1 1 6.957497 6.957497 12595 +parallelprocedur 1 1 6.957497 6.957497 12596 +brownemi 1 1 6.957497 6.957497 12597 +ajohn 1 1 6.957497 6.957497 12598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html new file mode 100644 index 00000000..96d82c7b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^almstrum^welcome.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +home 5 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +interest 5 384 0.693147 3.465735 11 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +group 3 183 1.609438 4.828314 36 +includ 2 208 1.609438 3.218876 42 +utexa 2 189 1.609438 3.218876 44 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +austin 6 168 1.791759 10.750554 63 +texa 3 160 1.791759 5.375277 64 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +technolog 2 131 2.079442 4.158884 102 +confer 1 126 2.079442 2.079442 100 +mathemat 2 108 2.197225 4.394450 123 +teach 2 108 2.197225 4.394450 112 +topic 1 114 2.197225 2.197225 110 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +pictur 1 89 2.397895 2.397895 160 +associ 1 93 2.397895 2.397895 151 +learn 3 86 2.484907 7.454721 170 +educ 2 86 2.484907 4.969814 191 +institut 1 84 2.484907 2.484907 187 +method 2 80 2.564949 5.129898 213 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +integr 1 67 2.708050 2.708050 245 +main 1 67 2.708050 2.708050 256 +evalu 1 64 2.772589 2.772589 266 +organ 1 65 2.772589 2.772589 265 +special 2 56 2.890372 5.780744 320 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +direct 1 57 2.890372 2.890372 316 +maintain 1 51 2.995732 2.995732 342 +profession 1 51 2.995732 2.995732 345 +understand 2 47 3.091042 6.182084 384 +electron 1 47 3.091042 3.091042 379 +electr 1 38 3.295837 3.295837 461 +formal 1 37 3.332205 3.332205 478 +field 1 37 3.332205 3.332205 482 +connect 1 37 3.332205 3.332205 485 +scientist 1 31 3.496508 3.496508 560 +computersci 1 30 3.555348 3.555348 562 +travel 1 30 3.555348 3.555348 579 +limit 1 29 3.583519 3.583519 585 +utc 1 27 3.637586 3.637586 629 +other 1 24 3.761200 3.761200 697 +alwai 1 24 3.761200 3.761200 691 +leav 1 21 3.912023 3.912023 772 +particularli 1 19 4.007333 4.007333 867 +excel 1 19 4.007333 4.007333 868 +encourag 1 18 4.060443 4.060443 880 +social 1 13 4.382027 4.382027 1123 +jump 1 9 4.753590 4.753590 1603 +sweden 1 7 5.010635 5.010635 1885 +interestedin 1 5 5.347108 5.347108 2260 +craft 1 5 5.347108 5.347108 2412 +camp 1 5 5.347108 5.347108 2545 +suffer 1 5 5.347108 5.347108 2268 +novic 1 4 5.568345 5.568345 2815 +mentor 1 4 5.568345 5.568345 2591 +sigcs 1 4 5.568345 5.568345 2865 +vicki 2 3 5.857933 11.715866 3187 +mathematicallog 1 3 5.857933 5.857933 3796 +belong 1 3 5.857933 5.857933 3797 +almstrum 3 2 6.263398 18.790194 4165 +woodwork 1 2 6.263398 6.263398 5463 +spurt 1 2 6.263398 6.263398 5464 +plenti 1 2 6.263398 6.263398 5465 +uppsala 2 1 6.957497 13.914994 12599 +almstrumabout 1 1 6.957497 6.957497 12600 +doctoralresearch 1 1 6.957497 6.957497 12601 +ispent 1 1 6.957497 6.957497 12602 +pagether 1 1 6.957497 6.957497 12603 +garden 1 1 6.957497 6.957497 12604 +sew 1 1 6.957497 6.957497 12605 +hubbi 1 1 6.957497 6.957497 12606 +torgni 1 1 6.957497 6.957497 12607 +stadler 1 1 6.957497 6.957497 12608 +itics 1 1 6.957497 6.957497 12609 +educationjun 1 1 6.957497 6.957497 12610 +swedenoth 1 1 6.957497 6.957497 12611 +frenzi 1 1 6.957497 6.957497 12612 +educationsigsoft 1 1 6.957497 6.957497 12613 +engineeringacm 1 1 6.957497 6.957497 12614 +machineryieeeth 1 1 6.957497 6.957497 12615 +engineerscpsrcomput 1 1 6.957497 6.957497 12616 +responsibilityconnect 1 1 6.957497 6.957497 12617 +elsewhereto 1 1 6.957497 6.957497 12618 +seldom 1 1 6.957497 6.957497 12619 +forewarn 1 1 6.957497 6.957497 12620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ new file mode 100644 index 00000000..124e0664 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^anthony^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +hour 1 165 1.791759 1.791759 46 +compil 1 122 2.079442 2.079442 96 +mondai 1 77 2.564949 2.564949 206 +wednesdai 1 64 2.772589 2.772589 261 +anthoni 4 4 5.568345 22.273380 2792 +pang 2 3 5.857933 11.715866 3509 +hung 1 3 5.857933 5.857933 3524 +hing 2 2 6.263398 12.526796 5442 +pagehung 1 1 6.957497 6.957497 12621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ new file mode 100644 index 00000000..a5514eb2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^aruna^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +fall 2 181 1.609438 3.218876 40 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +studi 1 120 2.079442 2.079442 91 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +taylor 1 63 2.772589 2.772589 287 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +sciencesunivers 1 37 3.332205 3.332205 486 +bachelor 1 17 4.110874 4.110874 957 +aruna 3 1 6.957497 20.872491 12622 +addalacurr 1 1 6.957497 6.957497 12623 +studentth 1 1 6.957497 6.957497 12624 +sciencess 1 1 6.957497 6.957497 12625 +engineeringmysorework 1 1 6.957497 6.957497 12626 +mysoreindiai 1 1 6.957497 6.957497 12627 +mysor 1 1 6.957497 6.957497 12628 +cityindiato 1 1 6.957497 6.957497 12629 +eduvoic 1 1 6.957497 6.957497 12630 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ new file mode 100644 index 00000000..27843130 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ashis^ @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 2 299 1.098612 2.197224 13 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +pleas 1 113 2.197225 2.197225 114 +main 1 67 2.708050 2.708050 256 +get 1 46 3.091042 3.091042 380 +exist 1 30 3.555348 3.555348 569 +known 1 24 3.761200 3.761200 702 +usavoic 1 13 4.382027 4.382027 1198 +mepost 1 10 4.653960 4.653960 1472 +round 1 8 4.875197 4.875197 1769 +let 1 3 5.857933 5.857933 3790 +patienc 1 2 6.263398 6.263398 5466 +ashi 2 1 6.957497 13.914994 12631 +tarafdarashi 1 1 6.957497 6.957497 12632 +tarafdarabout 1 1 6.957497 6.957497 12633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ new file mode 100644 index 00000000..38e13593 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bayardo^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 2 380 0.693147 1.386294 9 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +mail 2 238 1.386294 2.772588 22 +also 1 259 1.386294 1.386294 28 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +address 2 170 1.791759 3.583518 62 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +activ 1 84 2.484907 2.484907 182 +complet 1 77 2.564949 2.564949 208 +line 1 75 2.639057 2.639057 231 +solv 1 73 2.639057 2.639057 234 +prof 1 64 2.772589 2.772589 273 +dept 1 64 2.772589 2.772589 291 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +date 1 51 2.995732 2.995732 344 +advisor 1 51 2.995732 2.995732 355 +electr 1 38 3.295837 3.295837 461 +expect 1 37 3.332205 3.332205 484 +within 1 33 3.433987 3.433987 525 +queri 1 33 3.433987 3.433987 524 +hard 1 30 3.555348 3.555348 563 +campu 1 27 3.637586 3.637586 623 +mine 1 26 3.688879 3.688879 654 +constraint 1 26 3.688879 3.688879 636 +expert 1 20 3.951244 3.951244 833 +toolkit 1 20 3.951244 3.951244 835 +histori 1 19 4.007333 4.007333 853 +along 1 18 4.060443 4.060443 878 +coordin 1 13 4.382027 4.382027 1182 +daniel 1 12 4.465908 4.465908 1233 +instanc 1 11 4.553877 4.553877 1322 +candid 1 9 4.753590 4.753590 1606 +mirank 1 5 5.347108 5.347108 2543 +bayardo 3 2 6.263398 18.790194 5467 +roberto 1 2 6.263398 6.263398 5468 +exception 1 2 6.263398 6.263398 4467 +pageroberto 1 1 6.957497 6.957497 12634 +infosleuth 1 1 6.957497 6.957497 12635 +satisfactionmi 1 1 6.957497 6.957497 12636 +generatingand 1 1 6.957497 6.957497 12637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ new file mode 100644 index 00000000..e0cfab19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bert^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +address 2 170 1.791759 3.583518 62 +austin 2 168 1.791759 3.583518 63 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +model 2 145 1.945910 3.891820 69 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +abstract 1 62 2.772589 2.772589 276 +taylor 1 63 2.772589 2.772589 287 +reason 1 57 2.890372 2.890372 318 +physic 1 47 3.091042 3.091042 377 +past 1 42 3.218876 3.218876 428 +ofth 1 36 3.367296 3.367296 491 +dissert 1 32 3.465736 3.465736 549 +retriev 1 27 3.637586 3.637586 621 +behavior 1 18 4.060443 4.060443 881 +month 2 15 4.248495 8.496990 1025 +refin 2 11 4.553877 9.107754 1363 +entitl 1 9 4.753590 4.753590 1490 +drink 1 9 4.753590 4.753590 1607 +informationemail 1 9 4.753590 4.753590 1564 +overviewof 1 2 6.263398 6.263398 5469 +bert 3 1 6.957497 20.872491 12638 +imprecis 2 1 6.957497 13.914994 12639 +kayresearch 1 1 6.957497 6.957497 12640 +vitami 1 1 6.957497 6.957497 12641 +stuffsonia 1 1 6.957497 6.957497 12642 +andnina 1 1 6.957497 6.957497 12643 +springbank 1 1 6.957497 6.957497 12644 +scotchdrinksof 1 1 6.957497 6.957497 12645 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ new file mode 100644 index 00000000..90e6f9a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bhanu^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +project 1 340 1.098612 1.098612 18 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +introduct 1 126 2.079442 2.079442 87 +databas 1 122 2.079442 2.079442 86 +mathemat 1 108 2.197225 2.197225 123 +manag 1 114 2.197225 2.197225 125 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +logic 1 71 2.639057 2.639057 230 +multimedia 1 68 2.708050 2.708050 258 +visit 1 63 2.772589 2.772589 288 +semest 1 58 2.890372 2.890372 312 +visitor 1 49 3.044522 3.044522 371 +third 1 43 3.178054 3.178054 412 +term 1 43 3.178054 3.178054 411 +vladimir 1 11 4.553877 4.553877 1324 +harrick 1 7 5.010635 5.010635 1849 +lifschitz 1 5 5.347108 5.347108 2542 +coursesc 1 4 5.568345 5.568345 2692 +vinc 1 2 6.263398 6.263398 5414 +bhanu 2 1 6.957497 13.914994 12646 +homepagethi 1 1 6.957497 6.957497 12647 +akhil 1 1 6.957497 6.957497 12648 +reddythank 1 1 6.957497 6.957497 12649 +austinm 1 1 6.957497 6.957497 12650 +datacommun 1 1 6.957497 6.957497 12651 +anitish 1 1 6.957497 6.957497 12652 +barua 1 1 6.957497 6.957497 12653 +schwetmani 1 1 6.957497 6.957497 12654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ new file mode 100644 index 00000000..f2296d69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^bogo^ @@ -0,0 +1,229 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 7 374 0.693147 4.852029 7 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +updat 3 191 1.609438 4.828314 41 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +list 1 201 1.609438 1.609438 39 +austin 2 168 1.791759 3.583518 63 +read 1 154 1.791759 1.791759 47 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +process 3 142 1.945910 5.837730 72 +problem 2 147 1.945910 3.891820 75 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +number 3 130 2.079442 6.238326 97 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +make 4 111 2.197225 8.788900 120 +well 2 109 2.197225 4.394450 121 +final 1 116 2.197225 2.197225 108 +theori 1 111 2.197225 2.197225 127 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +peopl 8 96 2.302585 18.420680 132 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +pictur 1 89 2.397895 2.397895 160 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +level 1 87 2.484907 2.484907 180 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +complet 1 77 2.564949 2.564949 208 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +would 4 67 2.708050 10.832200 251 +differ 1 66 2.708050 2.708050 253 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +written 1 63 2.772589 2.772589 278 +colleg 1 61 2.833213 2.833213 300 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +telephon 2 50 3.044522 6.089044 373 +physic 1 47 3.091042 3.091042 377 +quarter 1 47 3.091042 3.091042 389 +made 1 44 3.135494 3.135494 398 +answer 1 45 3.135494 3.135494 391 +anoth 1 45 3.135494 3.135494 408 +even 1 45 3.135494 3.135494 393 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +probabl 6 40 3.258097 19.548582 455 +error 1 40 3.258097 3.258097 449 +correct 1 38 3.295837 3.295837 462 +feel 2 37 3.332205 6.664410 483 +expect 1 37 3.332205 3.332205 484 +connect 1 37 3.332205 3.332205 485 +soon 1 36 3.367296 3.367296 494 +michael 1 35 3.401197 3.401197 514 +return 1 34 3.401197 3.401197 502 +either 1 35 3.401197 3.401197 506 +articl 1 33 3.433987 3.433987 530 +human 2 32 3.465736 6.931472 546 +taken 1 31 3.496508 3.496508 555 +hard 1 30 3.555348 3.555348 563 +postal 1 30 3.555348 3.555348 580 +depend 1 29 3.583519 3.583519 583 +built 1 29 3.583519 3.583519 592 +quot 1 29 3.583519 3.583519 582 +intend 1 28 3.610918 3.610918 599 +ask 1 28 3.610918 3.610918 597 +symbol 1 27 3.637586 3.637586 620 +altern 1 26 3.688879 3.688879 641 +although 1 25 3.737670 3.737670 667 +accur 1 25 3.737670 3.737670 680 +sometim 2 24 3.761200 7.522400 696 +frame 1 24 3.761200 3.761200 684 +interpret 1 24 3.761200 3.761200 686 +decis 3 23 3.806662 11.419986 728 +lead 1 23 3.806662 3.806662 718 +almost 1 22 3.850148 3.850148 742 +util 1 21 3.912023 3.912023 774 +theorem 1 21 3.912023 3.912023 786 +prepar 1 20 3.951244 3.951244 824 +assum 1 19 4.007333 4.007333 845 +accept 1 18 4.060443 4.060443 879 +account 1 18 4.060443 4.060443 882 +estim 1 17 4.110874 4.110874 930 +differenti 1 17 4.110874 4.110874 921 +choic 1 16 4.174387 4.174387 979 +transfer 1 16 4.174387 4.174387 967 +cognit 1 16 4.174387 4.174387 986 +psycholog 1 15 4.248495 4.248495 1054 +mayb 1 15 4.248495 4.248495 1014 +save 2 14 4.317488 8.634976 1099 +manner 1 14 4.317488 4.317488 1074 +balanc 1 14 4.317488 4.317488 1112 +stori 1 14 4.317488 4.317488 1087 +econom 1 13 4.382027 4.382027 1184 +rememb 1 12 4.465908 4.465908 1217 +sound 2 9 4.753590 9.507180 1605 +unusu 1 9 4.753590 4.753590 1566 +exact 1 9 4.753590 4.753590 1509 +charg 1 9 4.753590 4.753590 1582 +risk 4 8 4.875197 19.500788 1689 +brain 2 8 4.875197 9.750394 1638 +formul 1 8 4.875197 4.875197 1733 +wire 1 8 4.875197 4.875197 1747 +simpli 1 8 4.875197 4.875197 1626 +prover 1 8 4.875197 4.875197 1653 +insert 1 8 4.875197 4.875197 1687 +filter 1 8 4.875197 4.875197 1641 +explain 2 7 5.010635 10.021270 1816 +maxim 1 7 5.010635 5.010635 1944 +prevent 1 7 5.010635 5.010635 1827 +gave 1 7 5.010635 5.010635 1922 +remind 1 7 5.010635 5.010635 1799 +wrong 2 6 5.164786 10.329572 2025 +consequ 1 6 5.164786 5.164786 1989 +biolog 1 6 5.164786 5.164786 2147 +mistak 1 6 5.164786 5.164786 2110 +postcard 1 6 5.164786 5.164786 2181 +promis 1 6 5.164786 5.164786 2037 +hidden 1 6 5.164786 5.164786 1987 +quantum 1 6 5.164786 5.164786 2214 +adopt 4 5 5.347108 21.388432 2467 +amherst 1 5 5.347108 5.347108 2484 +fair 1 5 5.347108 5.347108 2333 +respond 1 5 5.347108 5.347108 2354 +favor 1 5 5.347108 5.347108 2414 +ahead 1 5 5.347108 5.347108 2338 +puzzl 1 5 5.347108 5.347108 2507 +stupid 1 5 5.347108 5.347108 2489 +analog 3 4 5.568345 16.705035 2875 +kill 1 4 5.568345 5.568345 3000 +fire 1 4 5.568345 5.568345 3001 +hypothesi 1 4 5.568345 5.568345 2650 +suppos 1 4 5.568345 5.568345 3002 +neuron 3 3 5.857933 17.573799 3798 +coin 2 3 5.857933 11.715866 3799 +diseas 2 3 5.857933 11.715866 3635 +scream 1 3 5.857933 5.857933 3609 +wasn 1 3 5.857933 5.857933 3800 +incorrect 1 3 5.857933 5.857933 3134 +cogsci 2 2 6.263398 12.526796 4798 +toss 2 2 6.263398 12.526796 5470 +reject 2 2 6.263398 12.526796 5418 +bogu 2 2 6.263398 12.526796 5471 +advert 1 2 6.263398 6.263398 5201 +belov 1 2 6.263398 6.263398 5073 +imagin 1 2 6.263398 6.263398 5472 +combat 1 2 6.263398 6.263398 5473 +nobodi 1 2 6.263398 6.263398 5474 +voltag 1 2 6.263398 6.263398 5475 +invalid 1 2 6.263398 6.263398 5476 +append 1 2 6.263398 6.263398 4295 +informationthi 1 2 6.263398 6.263398 5477 +empti 1 2 6.263398 6.263398 5478 +bogon 3 1 6.957497 20.872491 12655 +avers 2 1 6.957497 13.914994 12656 +outcom 2 1 6.957497 13.914994 12657 +bogo 2 1 6.957497 13.914994 12658 +bogomolnymichael 1 1 6.957497 6.957497 12659 +bogomolni 1 1 6.957497 6.957497 12660 +interestsnot 1 1 6.957497 6.957497 12661 +jenef 1 1 6.957497 6.957497 12662 +husman 1 1 6.957497 6.957497 12663 +bet 1 1 6.957497 6.957497 12664 +diminish 1 1 6.957497 6.957497 12665 +tverski 1 1 6.957497 6.957497 12666 +kahneman 1 1 6.957497 6.957497 12667 +verbatimfrom 1 1 6.957497 6.957497 12668 +outbreak 1 1 6.957497 6.957497 12669 +beenpropos 1 1 6.957497 6.957497 12670 +programsar 1 1 6.957497 6.957497 12671 +besav 1 1 6.957497 6.957497 12672 +digitalif 1 1 6.957497 6.957497 12673 +electrochem 1 1 6.957497 6.957497 12674 +axon 1 1 6.957497 6.957497 12675 +shaki 1 1 6.957497 6.957497 12676 +inaccur 1 1 6.957497 6.957497 12677 +subtract 1 1 6.957497 6.957497 12678 +checkbook 1 1 6.957497 6.957497 12679 +nevertheless 1 1 6.957497 6.957497 12680 +misfir 1 1 6.957497 6.957497 12681 +italic 1 1 6.957497 6.957497 12682 +researchcognit 1 1 6.957497 6.957497 12683 +sciencearitifici 1 1 6.957497 6.957497 12684 +intelligencemathemat 1 1 6.957497 6.957497 12685 +logictopolog 1 1 6.957497 6.957497 12686 +ghrist 1 1 6.957497 6.957497 12687 +wilshir 1 1 6.957497 6.957497 12688 +parkwai 1 1 6.957497 6.957497 12689 +talentsdefinit 1 1 6.957497 6.957497 12690 +bogodynamicsdefinit 1 1 6.957497 6.957497 12691 +sortwhil 1 1 6.957497 6.957497 12692 +bogos 1 1 6.957497 6.957497 12693 +bogomet 1 1 6.957497 6.957497 12694 +flux 1 1 6.957497 6.957497 12695 +bogotifi 1 1 6.957497 6.957497 12696 +autobogotiphobia 1 1 6.957497 6.957497 12697 +blinkenlight 1 1 6.957497 6.957497 12698 +lasher 1 1 6.957497 6.957497 12699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ new file mode 100644 index 00000000..66951a8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^boyer^ @@ -0,0 +1,187 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 6 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +note 1 142 1.945910 1.945910 67 +mathemat 2 108 2.197225 4.394450 123 +teach 1 108 2.197225 2.197225 112 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +educ 1 86 2.484907 2.484907 191 +build 1 85 2.484907 2.484907 184 +state 1 76 2.564949 2.564949 207 +method 1 80 2.564949 2.564949 213 +decemb 1 80 2.564949 2.564949 215 +logic 1 71 2.639057 2.639057 230 +view 1 70 2.708050 2.708050 254 +dept 1 64 2.772589 2.772589 291 +result 1 65 2.772589 2.772589 281 +polici 1 64 2.772589 2.772589 279 +share 1 59 2.833213 2.833213 304 +detail 1 57 2.890372 2.890372 321 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +found 1 53 2.944439 2.944439 337 +much 1 52 2.995732 2.995732 349 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +physic 1 47 3.091042 3.091042 377 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +natur 1 44 3.135494 3.135494 406 +press 1 42 3.218876 3.218876 419 +close 1 38 3.295837 3.295837 465 +formal 1 37 3.332205 3.332205 478 +mean 1 37 3.332205 3.332205 477 +short 1 36 3.367296 3.367296 499 +articl 2 33 3.433987 6.867974 530 +john 1 33 3.433987 3.433987 532 +taken 2 31 3.496508 6.993016 555 +photo 1 31 3.496508 3.496508 561 +posit 1 31 3.496508 3.496508 552 +richard 1 31 3.496508 3.496508 559 +scientist 1 31 3.496508 3.496508 560 +robert 3 30 3.555348 10.666044 567 +univ 1 28 3.610918 3.610918 617 +pass 1 28 3.610918 3.610918 611 +symbol 3 27 3.637586 10.912758 620 +repres 1 26 3.688879 3.688879 656 +bound 1 26 3.688879 3.688879 659 +fundament 2 25 3.737670 7.475340 661 +reach 1 24 3.761200 3.761200 688 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +recommend 1 22 3.850148 3.850148 737 +properti 1 22 3.850148 3.850148 749 +divis 1 21 3.912023 3.912023 803 +theorem 1 21 3.912023 3.912023 786 +fund 1 21 3.912023 3.912023 805 +verif 1 20 3.951244 3.951244 826 +wonder 1 20 3.951244 3.951244 815 +wrote 1 20 3.951244 3.951244 830 +offici 1 18 4.060443 4.060443 894 +moor 1 17 4.110874 4.110874 936 +everyth 1 13 4.382027 4.382027 1169 +introduc 1 13 4.382027 4.382027 1139 +shape 1 12 4.465908 4.465908 1245 +stephen 1 11 4.553877 4.553877 1342 +smart 1 11 4.553877 4.553877 1352 +noth 1 11 4.553877 4.553877 1328 +peter 1 11 4.553877 4.553877 1316 +instanc 1 11 4.553877 4.553877 1322 +rice 1 11 4.553877 4.553877 1336 +death 1 10 4.653960 4.653960 1457 +govern 3 9 4.753590 14.260770 1581 +float 1 9 4.753590 4.753590 1504 +end 1 9 4.753590 4.753590 1567 +said 1 9 4.753590 4.753590 1571 +ball 1 9 4.753590 4.753590 1608 +prover 1 8 4.875197 4.875197 1653 +joke 1 8 4.875197 4.875197 1620 +pageth 1 7 5.010635 5.010635 1939 +microprocessor 1 7 5.010635 5.010635 1808 +zero 1 7 5.010635 5.010635 1896 +intellectu 1 7 5.010635 5.010635 1847 +discoveri 1 7 5.010635 5.010635 1915 +boyer 4 6 5.164786 20.659144 2013 +furthermor 1 6 5.164786 5.164786 2141 +licens 2 5 5.347108 10.694216 2520 +own 1 5 5.347108 5.347108 2531 +despit 1 5 5.347108 5.347108 2317 +oftexa 2 4 5.568345 11.136690 3003 +vote 1 4 5.568345 5.568345 2953 +disclaim 1 4 5.568345 5.568345 2847 +fire 1 4 5.568345 5.568345 3001 +subsequ 1 4 5.568345 5.568345 2665 +usaemail 1 3 5.857933 5.857933 3722 +mccune 1 3 5.857933 5.857933 3522 +enumer 1 3 5.857933 5.857933 3244 +tenur 1 3 5.857933 5.857933 3801 +shouldb 1 3 5.857933 5.857933 3673 +deutsch 1 3 5.857933 5.857933 3802 +harold 1 3 5.857933 5.857933 3803 +carbon 1 3 5.857933 5.857933 3804 +loss 1 3 5.857933 5.857933 3805 +edufax 1 2 6.263398 6.263398 5479 +knowna 1 2 6.263398 6.263398 5480 +webth 1 2 6.263398 6.263398 5481 +projectmi 1 2 6.263398 6.263398 5482 +andsom 1 2 6.263398 6.263398 5483 +thegreat 1 2 6.263398 6.263398 4987 +thereof 1 2 6.263398 6.263398 5484 +steal 1 2 6.263398 6.263398 5485 +riski 1 2 6.263398 6.263398 4291 +peano 1 2 6.263398 6.263398 4234 +rebel 1 2 6.263398 6.263398 5388 +amor 1 2 6.263398 6.263398 5486 +congeni 1 2 6.263398 6.263398 4713 +rudi 1 2 6.263398 6.263398 5487 +verg 1 2 6.263398 6.263398 5488 +atom 1 2 6.263398 6.263398 4472 +lament 1 2 6.263398 6.263398 4866 +texan 1 2 6.263398 6.263398 5489 +boyerhom 1 1 6.957497 6.957497 12700 +philosophydepart 1 1 6.957497 6.957497 12701 +austinhow 1 1 6.957497 6.957497 12702 +mepap 1 1 6.957497 6.957497 12703 +locationsclassescurriculum 1 1 6.957497 6.957497 12704 +vitaeperson 1 1 6.957497 6.957497 12705 +dataeducationpublicationshonorsjobsgradu 1 1 6.957497 6.957497 12706 +studentsth 1 1 6.957497 6.957497 12707 +nqthm 1 1 6.957497 6.957497 12708 +mccarthi 1 1 6.957497 6.957497 12709 +moffett 1 1 6.957497 6.957497 12710 +controversyni 1 1 6.957497 6.957497 12711 +robbin 1 1 6.957497 6.957497 12712 +permitsth 1 1 6.957497 6.957497 12713 +administrativeoverhead 1 1 6.957497 6.957497 12714 +howthi 1 1 6.957497 6.957497 12715 +confess 1 1 6.957497 6.957497 12716 +acanon 1 1 6.957497 6.957497 12717 +thumper 1 1 6.957497 6.957497 12718 +universitiesstandard 1 1 6.957497 6.957497 12719 +aweb 1 1 6.957497 6.957497 12720 +anind 1 1 6.957497 6.957497 12721 +endors 1 1 6.957497 6.957497 12722 +habitu 1 1 6.957497 6.957497 12723 +hislectur 1 1 6.957497 6.957497 12724 +militaryacademi 1 1 6.957497 6.957497 12725 +incens 1 1 6.957497 6.957497 12726 +hisformalist 1 1 6.957497 6.957497 12727 +hispromis 1 1 6.957497 6.957497 12728 +turin 1 1 6.957497 6.957497 12729 +sincomplet 1 1 6.957497 6.957497 12730 +rucker 1 1 6.957497 6.957497 12731 +extinct 1 1 6.957497 6.957497 12732 +kroto 1 1 6.957497 6.957497 12733 +britain 1 1 6.957497 6.957497 12734 +sussex 1 1 6.957497 6.957497 12735 +chemistrypr 1 1 6.957497 6.957497 12736 +curl 1 1 6.957497 6.957497 12737 +smallei 1 1 6.957497 6.957497 12738 +inhouston 1 1 6.957497 6.957497 12739 +asocc 1 1 6.957497 6.957497 12740 +upup 1 1 6.957497 6.957497 12741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ new file mode 100644 index 00000000..2c2fd005 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^browne^ @@ -0,0 +1,157 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 12 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 16 374 0.693147 11.090352 7 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +engin 2 297 1.098612 2.197224 20 +languag 6 227 1.386294 8.317764 26 +gener 2 220 1.386294 2.772588 27 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +paper 4 205 1.609438 6.437752 38 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +parallel 20 169 1.791759 35.835180 60 +texa 2 160 1.791759 3.583518 64 +develop 2 174 1.791759 3.583518 53 +base 2 165 1.791759 3.583518 50 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +distribut 1 162 1.791759 1.791759 51 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +process 3 142 1.945910 5.837730 72 +professor 2 137 1.945910 3.891820 76 +model 2 145 1.945910 3.891820 69 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +high 4 130 2.079442 8.317768 101 +confer 4 126 2.079442 8.317768 100 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +code 8 108 2.197225 17.577800 116 +intern 3 108 2.197225 6.591675 128 +specif 2 106 2.197225 4.394450 106 +structur 2 106 2.197225 4.394450 105 +version 2 113 2.197225 4.394450 122 +technic 1 100 2.302585 2.302585 140 +proceed 4 93 2.397895 9.591580 152 +graphic 3 90 2.397895 7.193685 147 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +environ 7 84 2.484907 17.394349 177 +level 4 87 2.484907 9.939628 180 +ieee 2 86 2.484907 4.969814 190 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +refer 1 78 2.564949 2.564949 203 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +august 3 66 2.708050 8.124150 257 +integr 2 67 2.708050 5.416100 245 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +interact 1 62 2.772589 2.772589 270 +juli 1 60 2.833213 2.833213 305 +major 1 56 2.890372 2.890372 315 +three 1 54 2.944439 2.944439 330 +visual 5 48 3.044522 15.222610 372 +physic 2 47 3.091042 6.182084 377 +california 1 46 3.091042 3.091042 388 +describ 1 45 3.135494 3.135494 400 +societi 2 40 3.258097 6.516194 456 +electr 1 38 3.295837 3.295837 461 +prototyp 1 38 3.295837 3.295837 463 +formal 1 37 3.332205 3.332205 478 +ofth 1 36 3.367296 3.367296 491 +jame 2 35 3.401197 6.802394 507 +award 1 34 3.401197 3.401197 523 +concurr 1 34 3.401197 3.401197 501 +extend 1 32 3.465736 3.465736 539 +idea 1 32 3.465736 3.465736 545 +focu 1 30 3.555348 3.555348 571 +specifi 1 30 3.555348 3.555348 568 +graph 1 30 3.555348 3.555348 576 +chair 1 29 3.583519 3.583519 596 +univ 1 28 3.610918 3.610918 617 +american 1 27 3.637586 3.637586 634 +compar 2 26 3.688879 7.377758 648 +experiment 1 26 3.688879 3.688879 645 +rule 1 26 3.688879 3.688879 638 +proc 1 26 3.688879 3.688879 649 +supercomput 1 25 3.737670 3.737670 681 +fellow 2 24 3.761200 7.522400 701 +flow 1 24 3.761200 3.761200 700 +displai 1 23 3.806662 3.806662 712 +siam 1 21 3.912023 3.912023 800 +longer 1 20 3.951244 3.951244 816 +debug 4 17 4.110874 16.443496 944 +moor 2 17 4.110874 8.221748 936 +brown 10 16 4.174387 41.743870 977 +partit 1 16 4.174387 4.174387 984 +brief 1 16 4.174387 4.174387 1001 +conf 1 13 4.382027 4.382027 1181 +evolv 1 12 4.465908 4.465908 1223 +robust 1 12 4.465908 4.465908 1271 +volum 1 11 4.553877 4.553877 1347 +broad 1 11 4.553877 4.553877 1302 +declar 1 9 4.753590 4.753590 1526 +notat 1 9 4.753590 4.753590 1489 +researchi 1 8 4.875197 4.875197 1756 +unifi 1 8 4.875197 4.875197 1774 +newton 3 7 5.010635 15.031905 1824 +henc 2 7 5.010635 10.021270 1805 +ongo 1 6 5.164786 5.164786 2215 +british 1 5 5.347108 5.347108 2546 +jain 1 5 5.347108 5.347108 2332 +mirank 1 5 5.347108 5.347108 2543 +remain 1 5 5.347108 5.347108 2278 +hyder 3 4 5.568345 16.705035 2772 +werth 3 4 5.568345 16.705035 3004 +interestparallel 1 3 5.857933 5.857933 3806 +narrow 1 3 5.857933 5.857933 3807 +publicationsj 1 3 5.857933 5.857933 3808 +baltimor 1 3 5.857933 5.857933 3809 +dongarra 2 2 6.263398 12.526796 5058 +hendrix 1 2 6.263398 6.263398 5490 +anabstract 1 2 6.263398 6.263398 5491 +brownereg 1 1 6.957497 6.957497 12742 +collegeph 1 1 6.957497 6.957497 12743 +austinhonor 1 1 6.957497 6.957497 12744 +societyarea 1 1 6.957497 6.957497 12745 +sciencewith 1 1 6.957497 6.957497 12746 +tenyear 1 1 6.957497 6.957497 12747 +computation 1 1 6.957497 6.957497 12748 +includesmethod 1 1 6.957497 6.957497 12749 +highlevel 1 1 6.957497 6.957497 12750 +throughdata 1 1 6.957497 6.957497 12751 +compositionalapproach 1 1 6.957497 6.957497 12752 +intelligenceprocess 1 1 6.957497 6.957497 12753 +fluiddynam 1 1 6.957497 6.957497 12754 +domaincompil 1 1 6.957497 6.957497 12755 +basedlanguag 1 1 6.957497 6.957497 12756 +timedecis 1 1 6.957497 6.957497 12757 +andpract 1 1 6.957497 6.957497 12758 +fourthworkshop 1 1 6.957497 6.957497 12759 +santacruz 1 1 6.957497 6.957497 12760 +theeffect 1 1 6.957497 6.957497 12761 +parallelizingcompil 1 1 6.957497 6.957497 12762 +kleyn 1 1 6.957497 6.957497 12763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html new file mode 100644 index 00000000..3ffd0150 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cad^cad.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +depart 2 457 0.693147 1.386294 12 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +link 2 247 1.386294 2.772588 24 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +group 5 183 1.609438 8.047190 36 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +recent 1 167 1.791759 1.791759 58 +area 2 144 1.945910 3.891820 80 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +peopl 1 96 2.302585 2.302585 132 +comment 2 93 2.397895 4.795790 146 +follow 1 92 2.397895 2.397895 143 +member 1 84 2.484907 2.484907 165 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +institut 1 84 2.484907 2.484907 187 +issu 1 78 2.564949 2.564949 211 +logic 1 71 2.639057 2.639057 230 +prof 1 64 2.772589 2.772589 273 +abstract 1 62 2.772589 2.772589 276 +special 1 56 2.890372 2.890372 320 +found 1 53 2.944439 2.944439 337 +electron 1 47 3.091042 3.091042 379 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +rang 1 30 3.555348 3.555348 565 +utc 1 27 3.637586 3.637586 629 +trace 1 25 3.737670 3.737670 677 +vlsi 3 21 3.912023 11.736069 795 +martin 1 21 3.912023 3.912023 794 +chen 1 21 3.912023 3.912023 791 +rout 1 21 3.912023 3.912023 793 +supervis 1 20 3.951244 3.951244 840 +synthesi 1 20 3.951244 3.951244 834 +partit 1 16 4.174387 4.174387 984 +fpga 2 10 4.653960 9.307920 1433 +placement 1 10 4.653960 4.653960 1420 +wong 1 9 4.753590 4.753590 1609 +classifi 1 9 4.753590 4.753590 1537 +chung 1 7 5.010635 5.010635 1964 +zhou 1 6 5.164786 5.164786 2092 +ping 2 4 5.568345 11.136690 2922 +ming 1 3 5.857933 5.857933 3712 +researchth 1 2 6.263398 6.263398 5492 +broadli 1 2 6.263398 6.263398 5095 +sigda 1 2 6.263398 6.263398 5493 +thakur 2 1 6.957497 13.914994 12764 +addressdepart 1 1 6.957497 6.957497 12765 +chenyao 1 1 6.957497 6.957497 12766 +yung 1 1 6.957497 6.957497 12767 +fang 1 1 6.957497 6.957497 12768 +shashidhar 1 1 6.957497 6.957497 12769 +groupcan 1 1 6.957497 6.957497 12770 +austinclick 1 1 6.957497 6.957497 12771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ new file mode 100644 index 00000000..ff6ef1dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^canfield^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +work 3 380 0.693147 2.079441 9 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +distribut 2 162 1.791759 3.583518 51 +avail 1 169 1.791759 1.791759 48 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +professor 1 137 1.945910 1.945910 76 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +april 1 77 2.564949 2.564949 196 +prof 1 64 2.772589 2.772589 273 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +hardwar 1 51 2.995732 2.995732 350 +give 1 50 3.044522 3.044522 359 +cool 1 49 3.044522 3.044522 374 +done 1 47 3.091042 3.091042 381 +slide 1 38 3.295837 3.295837 467 +photo 1 31 3.496508 3.496508 561 +travel 1 30 3.555348 3.555348 579 +built 1 29 3.583519 3.583519 592 +effort 1 26 3.688879 3.688879 652 +highli 1 23 3.806662 3.806662 725 +thank 1 23 3.806662 3.806662 721 +divis 1 21 3.912023 3.912023 803 +verif 1 20 3.951244 3.951244 826 +women 1 16 4.174387 4.174387 1004 +todd 1 15 4.248495 4.248495 1051 +wife 1 13 4.382027 4.382027 1196 +land 1 12 4.465908 4.465908 1273 +speak 1 12 4.465908 4.465908 1283 +bill 1 11 4.553877 4.553877 1297 +peter 1 11 4.553877 4.553877 1316 +label 1 10 4.653960 4.653960 1423 +poetri 1 9 4.753590 4.753590 1596 +andth 1 9 4.753590 4.753590 1481 +mach 1 8 4.875197 4.875197 1669 +daughter 1 7 5.010635 5.010635 1943 +ruth 1 7 5.010635 5.010635 1870 +prioriti 1 7 5.010635 5.010635 1792 +foreign 1 7 5.010635 5.010635 1919 +beer 2 6 5.164786 10.329572 2216 +somewher 1 6 5.164786 5.164786 2176 +approv 1 6 5.164786 5.164786 2078 +humor 2 5 5.347108 10.694216 2533 +allen 1 5 5.347108 5.347108 2470 +emerson 1 5 5.347108 5.347108 2547 +li 1 5 5.347108 5.347108 2500 +substitut 1 5 5.347108 5.347108 2247 +ti 1 4 5.568345 5.568345 3005 +clair 1 4 5.568345 5.568345 2605 +enjoy 1 4 5.568345 5.568345 2937 +guangtian 1 3 5.857933 5.857933 3810 +haiku 1 3 5.857933 5.857933 3811 +cristian 1 2 6.263398 6.263398 4311 +sourcesth 1 2 6.263398 6.263398 4219 +disinform 1 2 6.263398 6.263398 5494 +dole 1 2 6.263398 6.263398 4067 +canfieldhom 1 1 6.957497 6.957497 12772 +businessmi 1 1 6.957497 6.957497 12773 +flaviu 1 1 6.957497 6.957497 12774 +ther 1 1 6.957497 6.957497 12775 +pleasuredomest 1 1 6.957497 6.957497 12776 +bliss 1 1 6.957497 6.957497 12777 +carla 1 1 6.957497 6.957497 12778 +newborn 1 1 6.957497 6.957497 12779 +parenthood 1 1 6.957497 6.957497 12780 +struck 1 1 6.957497 6.957497 12781 +peel 1 1 6.957497 6.957497 12782 +bottl 1 1 6.957497 6.957497 12783 +sofaspher 1 1 6.957497 6.957497 12784 +olestra 1 1 6.957497 6.957497 12785 +canfield 1 1 6.957497 6.957497 12786 +peterst 1 1 6.957497 6.957497 12787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ new file mode 100644 index 00000000..ac921674 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^carruth^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 2 293 1.098612 2.197224 17 +offic 2 299 1.098612 2.197224 13 +mail 2 238 1.386294 2.772588 22 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +professor 2 137 1.945910 3.891820 76 +hall 1 146 1.945910 1.945910 65 +introduct 1 126 2.079442 2.079442 87 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +order 1 69 2.708050 2.708050 249 +function 1 62 2.772589 2.772589 275 +taylor 1 63 2.772589 2.772589 287 +suggest 1 53 2.944439 2.944439 331 +autom 1 41 3.218876 3.218876 434 +extend 1 32 3.465736 3.465736 539 +express 1 32 3.465736 3.465736 540 +computersci 1 30 3.555348 3.555348 562 +usual 1 28 3.610918 3.610918 608 +progress 1 28 3.610918 3.610918 598 +theorem 1 21 3.912023 3.912023 786 +supervis 1 20 3.951244 3.951244 840 +safeti 1 20 3.951244 3.951244 817 +prove 1 19 4.007333 4.007333 848 +partial 1 18 4.060443 4.060443 900 +finit 1 14 4.317488 4.317488 1106 +candid 1 9 4.753590 4.753590 1606 +misra 2 7 5.010635 10.021270 1856 +jayadev 1 4 5.568345 5.568345 3006 +uniti 3 3 5.857933 17.573799 3812 +alsointerest 1 3 5.857933 5.857933 3813 +carruth 4 2 6.263398 25.053592 5495 +mydissert 1 2 6.263398 6.263398 5496 +carruthpleas 1 1 6.957497 6.957497 12788 +boundson 1 1 6.957497 6.957497 12789 +ordersemant 1 1 6.957497 6.957497 12790 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ new file mode 100644 index 00000000..ae97637c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ccp^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hour 1 165 1.791759 1.791759 46 +problem 1 147 1.945910 1.945910 75 +schedul 2 119 2.079442 4.158884 85 +intern 1 108 2.197225 2.197225 128 +send 1 114 2.197225 2.197225 109 +grade 1 90 2.397895 2.397895 142 +exam 1 86 2.484907 2.484907 169 +homework 1 79 2.564949 2.564949 193 +syllabu 1 67 2.708050 2.708050 247 +polici 1 64 2.772589 2.772589 279 +new 1 64 2.772589 2.772589 262 +improv 1 62 2.772589 2.772589 289 +locat 1 59 2.833213 2.833213 303 +summer 1 56 2.890372 2.890372 311 +suggest 1 53 2.944439 2.944439 331 +idea 1 32 3.465736 3.465736 545 +chen 2 21 3.912023 7.824046 791 +exercis 1 19 4.007333 4.007333 842 +intel 1 16 4.174387 4.174387 1000 +meng 1 12 4.465908 4.465908 1214 +chung 2 7 5.010635 10.021270 1964 +ping 3 4 5.568345 16.705035 2922 +fiance 1 2 6.263398 6.263398 5497 +tsai 1 2 6.263398 6.263398 4831 +bufferinsert 1 1 6.957497 6.957497 12791 +syllabustopicschung 1 1 6.957497 6.957497 12792 +clen 1 1 6.957497 6.957497 12793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ new file mode 100644 index 00000000..be2753b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cdj^ @@ -0,0 +1,37 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +austin 4 168 1.791759 7.167036 63 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +well 1 109 2.197225 2.197225 121 +homepag 1 93 2.397895 2.397895 148 +main 1 67 2.708050 2.708050 256 +anoth 2 45 3.135494 6.270988 408 +third 1 43 3.178054 3.178054 412 +chen 1 21 3.912023 3.912023 791 +break 1 20 3.951244 3.951244 812 +item 1 19 4.007333 4.007333 856 +easi 1 16 4.174387 4.174387 969 +lake 1 11 4.553877 4.553877 1373 +paragraph 2 10 4.653960 9.307920 1449 +mepost 1 10 4.653960 4.653960 1472 +usaphon 1 9 4.753590 4.753590 1600 +forget 1 8 4.875197 4.875197 1712 +shanghai 1 4 5.568345 5.568345 2925 +blvd 1 4 5.568345 5.568345 3007 +deji 2 2 6.263398 12.526796 5498 +chenabout 1 2 6.263398 6.263398 5499 +bullet 1 2 6.263398 6.263398 5500 +mehello 1 1 6.957497 6.957497 12794 +tongji 1 1 6.957497 6.957497 12795 +chinaa 1 1 6.957497 6.957497 12796 +usahom 1 1 6.957497 6.957497 12797 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ new file mode 100644 index 00000000..73d77643 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chaput^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +softwar 1 220 1.386294 1.386294 30 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +peopl 1 96 2.302585 2.302585 132 +educ 2 86 2.484907 4.969814 191 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +school 1 84 2.484907 2.484907 188 +write 2 72 2.639057 5.278114 222 +simul 1 66 2.708050 2.708050 255 +artifici 1 63 2.772589 2.772589 280 +new 1 64 2.772589 2.772589 262 +plai 2 60 2.833213 5.666426 307 +visual 1 48 3.044522 3.044522 372 +life 1 50 3.044522 3.044522 375 +archiv 1 49 3.044522 3.044522 364 +programm 1 39 3.258097 3.258097 445 +movi 1 40 3.258097 3.258097 459 +microsoft 1 38 3.295837 3.295837 468 +mean 1 37 3.332205 3.332205 477 +robot 1 36 3.367296 3.367296 497 +represent 1 35 3.401197 3.401197 512 +common 1 30 3.555348 3.555348 574 +symbol 2 27 3.637586 7.275172 620 +spent 1 25 3.737670 3.737670 676 +client 1 25 3.737670 3.737670 679 +corpor 1 21 3.912023 3.912023 802 +martin 1 21 3.912023 3.912023 794 +watch 1 21 3.912023 3.912023 789 +voic 1 21 3.912023 3.912023 806 +portabl 1 20 3.951244 3.951244 819 +scheme 1 20 3.951244 3.951244 818 +left 1 19 4.007333 4.007333 851 +lisp 1 18 4.060443 4.060443 897 +listen 1 18 4.060443 4.060443 907 +macintosh 2 17 4.110874 8.221748 920 +anywai 1 15 4.248495 4.248495 1047 +emploi 1 12 4.465908 4.465908 1284 +tour 1 11 4.553877 4.553877 1307 +wood 1 11 4.553877 4.553877 1355 +road 1 11 4.553877 4.553877 1374 +bike 1 10 4.653960 4.653960 1468 +hang 1 9 4.753590 4.753590 1499 +brain 1 8 4.875197 4.875197 1638 +ride 1 8 4.875197 4.875197 1741 +sleep 1 6 5.164786 5.164786 2211 +dream 1 6 5.164786 5.164786 2165 +fiction 1 6 5.164786 5.164786 2217 +emerg 1 6 5.164786 5.164786 2038 +hair 1 5 5.347108 5.347108 2446 +worst 1 5 5.347108 5.347108 2287 +webster 1 5 5.347108 5.347108 2468 +phrase 1 5 5.347108 5.347108 2242 +austindepart 1 4 5.568345 5.568345 3008 +catch 1 4 5.568345 5.568345 2602 +sciencestaylor 1 3 5.857933 5.857933 3814 +republican 1 3 5.857933 5.857933 3815 +softwareth 1 3 5.857933 5.857933 3552 +stone 1 3 5.857933 5.857933 3674 +cliff 4 2 6.263398 25.053592 4285 +mstk 2 2 6.263398 12.526796 5501 +northwestern 1 2 6.263398 6.263398 5502 +captain 1 2 6.263398 6.263398 4983 +webth 1 2 6.263398 6.263398 5481 +weird 1 2 6.263398 6.263398 5503 +chaputcliff 1 1 6.957497 6.957497 12798 +chaputth 1 1 6.957497 6.957497 12799 +robotlab 1 1 6.957497 6.957497 12800 +dullchaput 1 1 6.957497 6.957497 12801 +gothimself 1 1 6.957497 6.957497 12802 +anemail 1 1 6.957497 6.957497 12803 +odesta 1 1 6.957497 6.957497 12804 +thelearn 1 1 6.957497 6.957497 12805 +hewrot 1 1 6.957497 6.957497 12806 +trane 1 1 6.957497 6.957497 12807 +thenimpl 1 1 6.957497 6.957497 12808 +studentscal 1 1 6.957497 6.957497 12809 +gamesproject 1 1 6.957497 6.957497 12810 +labannoi 1 1 6.957497 6.957497 12811 +farka 1 1 6.957497 6.957497 12812 +medeski 1 1 6.957497 6.957497 12813 +rerun 1 1 6.957497 6.957497 12814 +korg 1 1 6.957497 6.957497 12815 +turnon 1 1 6.957497 6.957497 12816 +breakfast 1 1 6.957497 6.957497 12817 +raspi 1 1 6.957497 6.957497 12818 +starfleet 1 1 6.957497 6.957497 12819 +turnoff 1 1 6.957497 6.957497 12820 +hangov 1 1 6.957497 6.957497 12821 +fave 1 1 6.957497 6.957497 12822 +eventsdaili 1 1 6.957497 6.957497 12823 +reutersintellicast 1 1 6.957497 6.957497 12824 +weatheraustin 1 1 6.957497 6.957497 12825 +txchicago 1 1 6.957497 6.957497 12826 +ilperiodicalssucksalonmirski 1 1 6.957497 6.957497 12827 +onionmacweekmacuserreferencehypertext 1 1 6.957497 6.957497 12828 +interfaceyahooalta 1 1 6.957497 6.957497 12829 +vistacardiff 1 1 6.957497 6.957497 12830 +databaselyco 1 1 6.957497 6.957497 12831 +mapalt 1 1 6.957497 6.957497 12832 +culturemacintosh 1 1 6.957497 6.957497 12833 +dataappl 1 1 6.957497 6.957497 12834 +computercyberdogquicktimequickdraw 1 1 6.957497 6.957497 12835 +dappl 1 1 6.957497 6.957497 12836 +supportmacintouchmacintosh 1 1 6.957497 6.957497 12837 +resourcecyberdog 1 1 6.957497 6.957497 12838 +poundinfo 1 1 6.957497 6.957497 12839 +rootcool 1 1 6.957497 6.957497 12840 +stufffringewareth 1 1 6.957497 6.957497 12841 +actlabpbsnprnow 1 1 6.957497 6.957497 12842 +catalogpap 1 1 6.957497 6.957497 12843 +rsumsymbol 1 1 6.957497 6.957497 12844 +groundingrobotmap 1 1 6.957497 6.957497 12845 +peopledav 1 1 6.957497 6.957497 12846 +falooncharl 1 1 6.957497 6.957497 12847 +lewisjeff 1 1 6.957497 6.957497 12848 +lindjeff 1 1 6.957497 6.957497 12849 +sherwoodbrian 1 1 6.957497 6.957497 12850 +slatorsandi 1 1 6.957497 6.957497 12851 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ new file mode 100644 index 00000000..578855cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chjwang^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +cours 1 273 1.098612 1.098612 15 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +read 1 154 1.791759 1.791759 47 +like 2 132 1.945910 3.891820 81 +welcom 1 122 2.079442 2.079442 99 +place 2 106 2.197225 4.394450 124 +check 2 115 2.197225 4.394450 118 +find 2 111 2.197225 4.394450 111 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +search 2 95 2.397895 4.795790 155 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +chang 1 82 2.484907 2.484907 163 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +journal 1 83 2.484907 2.484907 183 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +know 1 80 2.564949 2.564949 198 +complet 1 77 2.564949 2.564949 208 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +view 2 70 2.708050 5.416100 254 +would 1 67 2.708050 2.708050 251 +detail 1 57 2.890372 2.890372 321 +unix 1 58 2.890372 2.890372 308 +without 1 50 3.044522 3.044522 370 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +origin 1 38 3.295837 3.295837 472 +china 2 37 3.332205 6.664410 487 +word 1 34 3.401197 3.401197 508 +return 1 34 3.401197 3.401197 502 +hard 1 30 3.555348 3.555348 563 +univ 1 28 3.610918 3.610918 617 +mind 1 27 3.637586 3.637586 632 +enjoi 1 26 3.688879 3.688879 660 +magazin 1 24 3.761200 3.761200 704 +wang 2 21 3.912023 7.824046 790 +among 1 21 3.912023 3.912023 781 +break 1 20 3.951244 3.951244 812 +fine 1 20 3.951244 3.951244 822 +thought 1 17 4.110874 4.110874 945 +countri 1 15 4.248495 4.248495 1059 +decid 1 14 4.317488 4.317488 1075 +came 1 13 4.382027 4.382027 1197 +tsinghua 1 13 4.382027 4.382027 1195 +captur 1 12 4.465908 4.465908 1232 +moment 1 11 4.553877 4.553877 1379 +surf 1 11 4.553877 4.553877 1301 +earth 1 10 4.653960 4.653960 1463 +end 1 9 4.753590 4.753590 1567 +jump 1 9 4.753590 4.753590 1603 +unusu 1 9 4.753590 4.753590 1566 +opinion 1 8 4.875197 4.875197 1708 +philosoph 1 7 5.010635 5.010635 1904 +televis 1 6 5.164786 5.164786 2118 +rock 1 6 5.164786 5.164786 2164 +million 1 5 5.347108 5.347108 2495 +provinc 1 4 5.568345 5.568345 3009 +gloriou 1 3 5.857933 5.857933 3816 +hometown 1 3 5.857933 5.857933 3817 +tower 1 3 5.857933 5.857933 3818 +fresh 1 3 5.857933 5.857933 3706 +nifti 1 2 6.263398 6.263398 5504 +numb 1 2 6.263398 6.263398 5505 +pope 1 2 6.263398 6.263398 5506 +chuanjun 2 1 6.957497 13.914994 12852 +diamond 2 1 6.957497 13.914994 12853 +stun 1 1 6.957497 6.957497 12854 +hubei 1 1 6.957497 6.957497 12855 +beautifulunivers 1 1 6.957497 6.957497 12856 +faceless 1 1 6.957497 6.957497 12857 +brilliant 1 1 6.957497 6.957497 12858 +miner 1 1 6.957497 6.957497 12859 +unemploi 1 1 6.957497 6.957497 12860 +dobb 1 1 6.957497 6.957497 12861 +prose 1 1 6.957497 6.957497 12862 +porsch 1 1 6.957497 6.957497 12863 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ new file mode 100644 index 00000000..3b59a929 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^chuang^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +updat 1 191 1.609438 1.609438 41 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +process 2 142 1.945910 3.891820 72 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +assist 1 112 2.197225 2.197225 113 +person 1 111 2.197225 2.197225 117 +user 2 104 2.302585 4.605170 137 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +interfac 1 79 2.564949 2.564949 209 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +degre 1 69 2.708050 2.708050 259 +knowledg 1 67 2.708050 2.708050 243 +dept 1 64 2.772589 2.772589 291 +improv 1 62 2.772589 2.772589 289 +result 1 65 2.772589 2.772589 281 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +allow 1 53 2.944439 2.944439 333 +tabl 1 51 2.995732 2.995732 346 +visitor 1 49 3.044522 3.044522 371 +natur 1 44 3.135494 3.135494 406 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +continu 1 39 3.258097 3.258097 448 +word 1 34 3.401197 3.401197 508 +human 1 32 3.465736 3.465736 546 +chines 1 29 3.583519 3.583519 595 +reach 1 24 3.761200 3.761200 688 +proof 1 23 3.806662 3.806662 720 +tenni 1 20 3.951244 3.951244 838 +ever 1 19 4.007333 4.007333 872 +segment 1 17 4.110874 4.110874 931 +taiwan 2 16 4.174387 8.348774 1006 +capabl 1 15 4.248495 4.248495 1016 +train 1 14 4.317488 4.317488 1066 +huang 1 12 4.465908 4.465908 1202 +basketbal 1 12 4.465908 4.465908 1289 +literatur 1 11 4.553877 4.553877 1300 +tag 2 7 5.010635 10.021270 1821 +academia 1 6 5.164786 5.164786 2036 +chin 1 5 5.347108 5.347108 2408 +categori 1 5 5.347108 5.347108 2261 +markov 1 5 5.347108 5.347108 2280 +accuraci 1 5 5.347108 5.347108 2450 +atlant 1 5 5.347108 5.347108 2508 +taipei 1 4 5.568345 5.568345 2926 +worki 1 4 5.568345 5.568345 3010 +basebal 1 4 5.568345 5.568345 2969 +edufing 1 4 5.568345 5.568345 2713 +sinica 1 3 5.857933 5.857933 3819 +expans 1 3 5.857933 5.857933 3755 +instituteof 1 2 6.263398 6.263398 5507 +pinbal 1 2 6.263398 6.263398 5508 +todayth 1 2 6.263398 6.263398 5416 +tser 1 1 6.957497 6.957497 12864 +systemsexperiencei 1 1 6.957497 6.957497 12865 +usinghidden 1 1 6.957497 6.957497 12866 +friendli 1 1 6.957497 6.957497 12867 +toexecut 1 1 6.957497 6.957497 12868 +automatictag 1 1 6.957497 6.957497 12869 +improvedbecaus 1 1 6.957497 6.957497 12870 +interestsmovi 1 1 6.957497 6.957497 12871 +semiolog 1 1 6.957497 6.957497 12872 +siteschina 1 1 6.957497 6.957497 12873 +timesminsheng 1 1 6.957497 6.957497 12874 +dailyth 1 1 6.957497 6.957497 12875 +timesusa 1 1 6.957497 6.957497 12876 +economistth 1 1 6.957497 6.957497 12877 +monthlymak 1 1 6.957497 6.957497 12878 +chuang 1 1 6.957497 6.957497 12879 +meyou 1 1 6.957497 6.957497 12880 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ new file mode 100644 index 00000000..02a28058 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cilk^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +august 1 66 2.708050 2.708050 257 +robert 1 30 3.555348 3.555348 567 +runtim 1 19 4.007333 4.007333 858 +multithread 1 11 4.553877 4.553877 1315 +pronounc 1 7 5.010635 5.010635 1918 +blumoferdb 1 5 5.347108 5.347108 2324 +silk 1 2 6.263398 6.263398 5373 +inthi 1 2 6.263398 6.263398 5509 +cilkcilkcilk 1 1 6.957497 6.957497 12881 +languageand 1 1 6.957497 6.957497 12882 +thecilk 1 1 6.957497 6.957497 12883 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ new file mode 100644 index 00000000..dde6a523 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckpoon^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +high 1 130 2.079442 2.079442 101 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +school 1 84 2.484907 2.484907 188 +plan 1 65 2.772589 2.772589 272 +complex 1 64 2.772589 2.772589 269 +thesi 1 57 2.890372 2.890372 327 +theoret 1 39 3.258097 3.258097 446 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +hong 1 14 4.317488 4.317488 1105 +kong 1 9 4.753590 4.753590 1602 +chung 2 7 5.010635 10.021270 1964 +austinaustin 1 7 5.010635 5.010635 1966 +edumi 1 6 5.164786 5.164786 2132 +fish 1 6 5.164786 5.164786 2207 +poon 1 3 5.857933 5.857933 3820 +ckpoon 1 2 6.263398 6.263398 5510 +hungri 1 2 6.263398 6.263398 5511 +keung 2 1 6.957497 13.914994 12884 +poondepart 1 1 6.957497 6.957497 12885 +askvinc 1 1 6.957497 6.957497 12886 +gogan 1 1 6.957497 6.957497 12887 +problemsom 1 1 6.957497 6.957497 12888 +harmonica 1 1 6.957497 6.957497 12889 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ new file mode 100644 index 00000000..c1a3fa28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ckwong^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +offic 2 299 1.098612 2.197224 13 +link 2 247 1.386294 2.772588 24 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +modifi 1 178 1.609438 1.609438 35 +austin 4 168 1.791759 7.167036 63 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +relat 1 139 1.945910 1.945910 68 +send 1 114 2.197225 2.197225 109 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +member 1 84 2.484907 2.484907 165 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +dept 1 64 2.772589 2.772589 291 +secur 4 30 3.555348 14.221392 577 +chines 1 29 3.583519 3.583519 595 +linux 1 27 3.637586 3.637586 631 +head 1 23 3.806662 3.806662 732 +divis 1 21 3.912023 3.912023 803 +role 1 14 4.317488 4.317488 1101 +hong 1 14 4.317488 4.317488 1105 +usavoic 1 13 4.382027 4.382027 1198 +thedepart 1 11 4.553877 4.553877 1350 +cryptographi 1 9 4.753590 4.753590 1512 +kong 1 9 4.753590 4.753590 1602 +simon 1 8 4.875197 4.875197 1697 +chung 1 7 5.010635 5.010635 1964 +park 1 6 5.164786 5.164786 2218 +mission 1 5 5.347108 5.347108 2465 +nist 1 4 5.568345 5.568345 2973 +church 1 4 5.568345 5.568345 3011 +rivest 1 3 5.857933 5.857933 3248 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +thenetwork 1 2 6.263398 6.263398 5434 +byprof 1 2 6.263398 6.263398 5512 +wongchung 1 1 6.957497 6.957497 12890 +wonglast 1 1 6.957497 6.957497 12891 +labwhich 1 1 6.957497 6.957497 12892 +clearinghous 1 1 6.957497 6.957497 12893 +rbac 1 1 6.957497 6.957497 12894 +ckwong 1 1 6.957497 6.957497 12895 +hyde 1 1 6.957497 6.957497 12896 +baptist 1 1 6.957497 6.957497 12897 +netbsd 1 1 6.957497 6.957497 12898 +freebsd 1 1 6.957497 6.957497 12899 +openbsd 1 1 6.957497 6.957497 12900 +tockwong 1 1 6.957497 6.957497 12901 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ new file mode 100644 index 00000000..a8a9b458 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clancy^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +address 3 170 1.791759 5.375277 62 +develop 2 174 1.791759 3.583518 53 +austin 2 168 1.791759 3.583518 63 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +techniqu 3 99 2.302585 6.907755 138 +real 1 93 2.397895 2.397895 144 +larg 2 82 2.484907 4.969814 168 +requir 1 81 2.484907 2.484907 167 +build 1 85 2.484907 2.484907 184 +dynam 1 76 2.564949 2.564949 194 +issu 1 78 2.564949 2.564949 211 +appli 1 71 2.639057 2.639057 226 +simul 4 66 2.708050 10.832200 255 +knowledg 2 67 2.708050 5.416100 243 +integr 1 67 2.708050 2.708050 245 +abstract 2 62 2.772589 5.545178 276 +complex 1 64 2.772589 2.772589 269 +result 1 65 2.772589 2.772589 281 +descript 1 64 2.772589 2.772589 271 +taylor 1 63 2.772589 2.772589 287 +automat 2 61 2.833213 5.666426 306 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +particular 1 51 2.995732 2.995732 352 +finger 1 52 2.995732 2.995732 354 +possibl 1 47 3.091042 3.091042 378 +netscap 1 44 3.135494 3.135494 395 +vita 1 38 3.295837 3.295837 473 +focu 1 30 3.555348 3.555348 571 +scale 1 28 3.610918 3.610918 613 +retriev 1 27 3.637586 3.637586 621 +constraint 1 26 3.688879 3.688879 636 +variabl 1 23 3.806662 3.806662 715 +behavior 2 18 4.060443 8.120886 881 +hotlist 1 13 4.382027 4.382027 1199 +qualit 2 11 4.553877 9.107754 1362 +facilit 1 10 4.653960 4.653960 1412 +incomplet 1 9 4.753590 4.753590 1575 +elimin 1 9 4.753590 4.753590 1558 +informationemail 1 9 4.753590 4.753590 1564 +aggreg 2 6 5.164786 10.329572 2219 +irrelev 1 3 5.857933 5.857933 3823 +descriptionof 1 2 6.263398 6.263398 5513 +intract 1 2 6.263398 6.263398 5044 +thiswil 1 2 6.263398 6.263398 4944 +withlarg 1 2 6.263398 6.263398 4926 +followingtechniqu 1 2 6.263398 6.263398 5514 +clanci 2 1 6.957497 13.914994 12902 +clancyresearch 1 1 6.957497 6.957497 12903 +containinga 1 1 6.957497 6.957497 12904 +frequentlyi 1 1 6.957497 6.957497 12905 +incomprehens 1 1 6.957497 6.957497 12906 +simulationto 1 1 6.957497 6.957497 12907 +distinctionsof 1 1 6.957497 6.957497 12908 +whichaddress 1 1 6.957497 6.957497 12909 +abstractiontechniqu 1 1 6.957497 6.957497 12910 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ new file mode 100644 index 00000000..9453d798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^clsy^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +three 1 54 2.944439 2.944439 330 +five 1 19 4.007333 4.007333 841 +four 1 18 4.060443 4.060443 905 +eight 1 11 4.553877 4.553877 1331 +seven 1 9 4.753590 4.753590 1561 +nine 1 6 5.164786 5.164786 2047 +eleven 1 3 5.857933 5.857933 3824 +jimbo 1 1 6.957497 6.957497 12911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ new file mode 100644 index 00000000..630cd25c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cnchu^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 6 168 1.791759 10.750554 63 +address 2 170 1.791759 3.583518 62 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +call 1 91 2.397895 2.397895 153 +taylor 1 63 2.772589 2.772589 287 +januari 1 62 2.772589 2.772589 264 +author 1 39 3.258097 3.258097 450 +china 1 37 3.332205 3.332205 487 +photo 1 31 3.496508 3.496508 561 +chines 2 29 3.583519 7.167038 595 +campu 1 27 3.637586 3.637586 623 +edulast 1 17 4.110874 4.110874 927 +hong 1 14 4.317488 4.317488 1105 +chri 3 11 4.553877 13.661631 1311 +fellowship 1 10 4.653960 4.653960 1460 +kong 1 9 4.753590 4.753590 1602 +christian 1 7 5.010635 5.010635 1949 +church 1 4 5.568345 5.568345 3011 +chuwelcom 1 1 6.957497 6.957497 12912 +myselfmi 1 1 6.957497 6.957497 12913 +chuemail 1 1 6.957497 6.957497 12914 +cnchu 1 1 6.957497 6.957497 12915 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^code^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^code^ new file mode 100644 index 00000000..a4b9224f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^code^ @@ -0,0 +1,180 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 8 374 0.693147 5.545176 7 +system 4 443 0.693147 2.772588 6 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +last 2 314 1.098612 2.197224 14 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +mail 6 238 1.386294 8.317764 22 +softwar 3 220 1.386294 4.158882 30 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +list 4 201 1.609438 6.437752 39 +includ 2 208 1.609438 3.218876 42 +public 2 202 1.609438 3.218876 43 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +parallel 7 169 1.791759 12.542313 60 +avail 3 169 1.791759 5.375277 48 +base 2 165 1.791759 3.583518 50 +address 2 170 1.791759 3.583518 62 +contact 2 153 1.791759 3.583518 59 +austin 2 168 1.791759 3.583518 63 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +texa 1 160 1.791759 1.791759 64 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +click 3 142 1.945910 5.837730 78 +architectur 2 139 1.945910 3.891820 77 +model 2 145 1.945910 3.891820 69 +relat 2 139 1.945910 3.891820 68 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +document 3 121 2.079442 6.238326 89 +provid 2 121 2.079442 4.158884 94 +postscript 2 131 2.079442 4.158884 90 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +code 23 108 2.197225 50.536175 116 +version 5 113 2.197225 10.986125 122 +send 2 114 2.197225 4.394450 109 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +specif 1 106 2.197225 2.197225 106 +manag 1 114 2.197225 2.197225 125 +user 5 104 2.302585 11.512925 137 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +member 3 84 2.484907 7.454721 165 +journal 1 83 2.484907 2.484907 183 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +interfac 3 79 2.564949 7.694847 209 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +dynam 1 76 2.564949 2.564949 194 +line 3 75 2.639057 7.917171 231 +name 2 72 2.639057 5.278114 220 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +previou 2 62 2.772589 5.545178 290 +written 1 63 2.772589 2.772589 278 +improv 1 62 2.772589 2.772589 289 +januari 1 62 2.772589 2.772589 264 +automat 1 61 2.833213 2.833213 306 +direct 1 57 2.890372 2.890372 316 +major 1 56 2.890372 2.890372 315 +browser 1 56 2.890372 2.890372 313 +publish 1 57 2.890372 2.890372 326 +faculti 1 56 2.890372 2.890372 325 +overview 1 56 2.890372 2.890372 323 +allow 1 53 2.944439 2.944439 333 +visual 3 48 3.044522 9.133566 372 +still 1 50 3.044522 3.044522 362 +featur 2 46 3.091042 6.182084 386 +directori 1 45 3.135494 3.135494 396 +made 1 44 3.135494 3.135494 398 +edit 1 42 3.218876 3.218876 418 +tutori 3 39 3.258097 9.774291 437 +announc 2 40 3.258097 6.516194 441 +multipl 1 39 3.258097 3.258097 453 +join 1 39 3.258097 3.258097 457 +form 1 39 3.258097 3.258097 443 +connect 1 37 3.332205 3.332205 485 +download 2 36 3.367296 6.734592 489 +manual 3 35 3.401197 10.203591 504 +singl 1 34 3.401197 3.401197 510 +jame 1 35 3.401197 3.401197 507 +articl 2 33 3.433987 6.867974 530 +john 2 33 3.433987 6.867974 532 +independ 1 32 3.465736 3.465736 548 +kind 1 32 3.465736 3.465736 541 +ad 1 32 3.465736 3.465736 544 +produc 2 30 3.555348 7.110696 572 +graph 1 30 3.555348 3.555348 576 +compon 1 30 3.555348 3.555348 570 +releas 3 28 3.610918 10.832754 616 +quit 1 27 3.637586 3.637586 633 +repres 1 26 3.688879 3.688879 656 +revis 1 26 3.688879 3.688879 640 +constraint 1 26 3.688879 3.688879 636 +flow 1 24 3.761200 3.761200 700 +compress 1 23 3.806662 3.806662 719 +sequenti 1 22 3.850148 3.850148 745 +varieti 1 22 3.850148 3.850148 740 +hierarchi 1 22 3.850148 3.850148 744 +alumni 1 21 3.912023 3.912023 807 +prepar 1 20 3.951244 3.951244 824 +brown 1 16 4.174387 4.174387 977 +partit 1 16 4.174387 4.174387 984 +enough 1 15 4.248495 4.248495 1040 +affili 1 13 4.382027 4.382027 1194 +incorpor 1 13 4.382027 4.382027 1163 +instanc 1 11 4.553877 4.553877 1322 +node 1 11 4.553877 4.553877 1326 +fill 1 11 4.553877 4.553877 1349 +regard 1 11 4.553877 4.553877 1309 +screen 2 9 4.753590 9.507180 1577 +compos 1 9 4.753590 4.753590 1527 +entitl 1 9 4.753590 4.753590 1490 +shot 2 7 5.010635 10.021270 1898 +notifi 1 6 5.164786 5.164786 2106 +banerje 1 6 5.164786 5.164786 2018 +parallelprogram 1 5 5.347108 5.347108 2379 +stage 1 5 5.347108 5.347108 2488 +despit 1 5 5.347108 5.347108 2317 +alfr 1 4 5.568345 5.568345 2882 +lord 1 4 5.568345 5.568345 2906 +crai 1 4 5.568345 5.568345 3012 +prospect 1 4 5.568345 5.568345 3013 +snail 1 4 5.568345 5.568345 2916 +werth 1 4 5.568345 5.568345 3004 +preced 1 3 5.857933 5.857933 3107 +sophist 1 3 5.857933 5.857933 3545 +easier 1 3 5.857933 5.857933 3470 +pleasant 1 3 5.857933 5.857933 3825 +informationfor 1 3 5.857933 5.857933 3738 +berger 1 3 5.857933 5.857933 3702 +dwip 1 3 5.857933 5.857933 3197 +emeri 4 2 6.263398 25.053592 5515 +wilder 1 2 6.263398 6.263398 5516 +symmetri 1 2 6.263398 6.263398 5517 +newest 1 2 6.263398 6.263398 5518 +reproduc 1 2 6.263398 6.263398 5519 +publicationscod 1 2 6.263398 6.263398 5520 +ajita 1 2 6.263398 6.263398 5461 +systemmast 1 1 6.957497 6.957497 12916 +lawless 1 1 6.957497 6.957497 12917 +codeless 1 1 6.957497 6.957497 12918 +myriad 1 1 6.957497 6.957497 12919 +tennysoncod 1 1 6.957497 6.957497 12920 +sequentialprogram 1 1 6.957497 6.957497 12921 +wheredata 1 1 6.957497 6.957497 12922 +arc 1 1 6.957497 6.957497 12923 +thesequenti 1 1 6.957497 6.957497 12924 +sequent 1 1 6.957497 6.957497 12925 +smp 1 1 6.957497 6.957497 12926 +macdraw 1 1 6.957497 6.957497 12927 +subgraph 1 1 6.957497 6.957497 12928 +hpcwire 1 1 6.957497 6.957497 12929 +backend 1 1 6.957497 6.957497 12930 +xcodelib 1 1 6.957497 6.957497 12931 +lieu 1 1 6.957497 6.957497 12932 +groupgroup 1 1 6.957497 6.957497 12933 +leaderprofessor 1 1 6.957497 6.957497 12934 +bergerstud 1 1 6.957497 6.957497 12935 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ new file mode 100644 index 00000000..1e657066 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^correl^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +mail 2 238 1.386294 2.772588 22 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +texa 2 160 1.791759 3.583518 64 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +site 1 106 2.197225 2.197225 119 +search 5 95 2.397895 11.989475 155 +addit 1 74 2.639057 2.639057 228 +knowledg 1 67 2.708050 2.708050 243 +taylor 1 63 2.772589 2.772589 287 +tech 1 35 3.401197 3.401197 515 +steve 1 29 3.583519 3.583519 594 +hotlist 1 13 4.382027 4.382027 1199 +correl 2 5 5.347108 10.694216 2279 +multifunct 1 3 5.857933 5.857933 3826 +correlstev 1 1 6.957497 6.957497 12936 +correlresearchph 1 1 6.957497 6.957497 12937 +reportcontact 1 1 6.957497 6.957497 12938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ new file mode 100644 index 00000000..99d54256 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +system 4 443 0.693147 2.772588 6 +program 3 374 0.693147 2.079441 7 +interest 1 384 0.693147 0.693147 11 +time 4 293 1.098612 4.394448 17 +offic 2 299 1.098612 2.197224 13 +languag 3 227 1.386294 4.158882 26 +gener 1 220 1.386294 1.386294 27 +group 3 183 1.609438 4.828314 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +hall 1 146 1.945910 1.945910 65 +check 2 115 2.197225 4.394450 118 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +real 3 93 2.397895 7.193685 144 +control 1 82 2.484907 2.484907 164 +method 1 80 2.564949 2.564949 213 +written 1 63 2.772589 2.772589 278 +taylor 1 63 2.772589 2.772589 287 +origin 1 38 3.295837 3.295837 472 +respons 1 37 3.332205 3.332205 476 +formal 1 37 3.332205 3.332205 478 +robot 2 36 3.367296 6.734592 497 +synchron 1 29 3.583519 3.583519 588 +packag 1 28 3.610918 3.610918 614 +utc 1 27 3.637586 3.637586 629 +linux 1 27 3.637586 3.637586 631 +properti 1 22 3.850148 3.850148 749 +inth 1 22 3.850148 3.850148 741 +half 1 21 3.912023 3.912023 776 +latest 1 21 3.912023 3.912023 785 +wrote 1 20 3.951244 3.951244 830 +lot 1 18 4.060443 4.060443 889 +devic 1 16 4.174387 4.174387 1002 +driver 2 8 4.875197 9.750394 1657 +carlo 1 5 5.347108 5.347108 2515 +tempest 1 5 5.347108 5.347108 2548 +theth 1 5 5.347108 5.347108 2325 +toolset 1 4 5.568345 5.568345 3014 +austindepart 1 4 5.568345 5.568345 3008 +provinc 1 4 5.568345 5.568345 3009 +reactiv 1 3 5.857933 5.857933 3575 +publicationsi 1 3 5.857933 5.857933 3827 +softwareth 1 3 5.857933 5.857933 3552 +sciencesaustin 1 3 5.857933 5.857933 3828 +grabber 1 2 6.263398 6.263398 5521 +spain 1 2 6.263398 6.263398 5522 +esterel 2 1 6.957497 13.914994 12939 +pucholcarlo 1 1 6.957497 6.957497 12940 +pucholresearch 1 1 6.957497 6.957497 12941 +mawl 1 1 6.957497 6.957497 12942 +forbrows 1 1 6.957497 6.957497 12943 +verifyingsafeti 1 1 6.957497 6.957497 12944 +thequantavisionfram 1 1 6.957497 6.957497 12945 +thejoystickdevic 1 1 6.957497 6.957497 12946 +informationoffic 1 1 6.957497 6.957497 12947 +dreal 1 1 6.957497 6.957497 12948 +phun 1 1 6.957497 6.957497 12949 +interestsmemb 1 1 6.957497 6.957497 12950 +interesti 1 1 6.957497 6.957497 12951 +fromgandia 1 1 6.957497 6.957497 12952 +valencia 1 1 6.957497 6.957497 12953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ new file mode 100644 index 00000000..182c86eb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cpg^RTS^ @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +time 8 293 1.098612 8.788896 17 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +real 7 93 2.397895 16.785265 144 +follow 1 92 2.397895 2.397895 143 +build 1 85 2.484907 2.484907 184 +member 1 84 2.484907 2.484907 165 +logic 1 71 2.639057 2.639057 230 +simul 1 66 2.708050 2.708050 255 +foundat 2 62 2.772589 5.545178 286 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +past 1 42 3.218876 3.218876 428 +editor 1 41 3.218876 3.218876 433 +theoret 1 39 3.258097 3.258097 446 +paul 1 38 3.295837 3.295837 471 +ofth 1 36 3.367296 3.367296 491 +utc 1 27 3.637586 3.637586 629 +constraint 1 26 3.688879 3.688879 636 +toward 1 25 3.737670 3.737670 668 +head 1 23 3.806662 3.806662 732 +properti 3 22 3.850148 11.550444 749 +wang 3 21 3.912023 11.736069 790 +chen 1 21 3.912023 3.912023 791 +verif 1 20 3.951244 3.951244 826 +synthesi 1 20 3.951244 3.951244 834 +precis 1 15 4.248495 4.248495 1023 +verifi 1 12 4.465908 4.465908 1261 +establish 1 9 4.753590 4.753590 1532 +doug 1 9 4.753590 4.753590 1517 +formul 1 8 4.875197 4.875197 1733 +canb 1 7 5.010635 5.010635 1846 +chung 1 7 5.010635 5.010635 1964 +groupth 1 5 5.347108 5.347108 2549 +carlo 1 5 5.347108 5.347108 2515 +firm 1 4 5.568345 5.568345 2684 +systemsand 1 4 5.568345 5.568345 2804 +toolset 1 4 5.568345 5.568345 3014 +aloysiu 1 3 5.857933 5.857933 3829 +lai 1 3 5.857933 5.857933 3694 +categor 1 3 5.857933 5.857933 3765 +stuart 1 3 5.857933 5.857933 3584 +guangtian 1 3 5.857933 5.857933 3810 +byprof 1 2 6.263398 6.263398 5512 +stringent 1 2 6.263398 6.263398 5523 +scenario 1 2 6.263398 6.263398 5524 +availableonlin 1 2 6.263398 6.263398 4929 +deji 1 2 6.263398 6.263398 5498 +tsou 1 2 6.263398 6.263398 5525 +clement 1 2 6.263398 6.263398 5526 +modechart 5 1 6.957497 34.787485 12954 +groundworkfor 1 1 6.957497 6.957497 12955 +enforc 1 1 6.957497 6.957497 12956 +timetool 1 1 6.957497 6.957497 12957 +languagepublicationsabstract 1 1 6.957497 6.957497 12958 +puchol 1 1 6.957497 6.957497 12959 +yangalumni 1 1 6.957497 6.957497 12960 +chih 1 1 6.957497 6.957497 12961 +farn 1 1 6.957497 6.957497 12962 +supoj 1 1 6.957497 6.957497 12963 +suthandavibul 1 1 6.957497 6.957497 12964 +farnam 1 1 6.957497 6.957497 12965 +jahanian 1 1 6.957497 6.957497 12966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ new file mode 100644 index 00000000..ed713cfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cthomp^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +page 7 705 0.000000 0.000000 3 +scienc 6 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +research 5 431 0.693147 3.465735 10 +system 3 443 0.693147 2.079441 6 +inform 3 412 0.693147 2.079441 8 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +also 2 259 1.386294 2.772588 28 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +machin 2 129 2.079442 4.158884 95 +spring 1 131 2.079442 2.079442 88 +specif 1 106 2.197225 2.197225 106 +associ 2 93 2.397895 4.795790 151 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +learn 5 86 2.484907 12.424535 170 +resourc 3 81 2.484907 7.454721 172 +internet 2 83 2.484907 4.969814 186 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +educ 1 86 2.484907 2.484907 191 +start 1 83 2.484907 2.484907 173 +journal 1 83 2.484907 2.484907 183 +stuff 1 87 2.484907 2.484907 171 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +intellig 4 72 2.639057 10.556228 225 +would 1 67 2.708050 2.708050 251 +knowledg 1 67 2.708050 2.708050 243 +artifici 3 63 2.772589 8.317767 280 +collect 2 65 2.772589 5.545178 268 +taylor 1 63 2.772589 2.772589 287 +laboratori 1 63 2.772589 2.772589 292 +evalu 1 64 2.772589 2.772589 266 +thesi 1 57 2.890372 2.890372 327 +semest 1 58 2.890372 2.890372 312 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +robot 2 36 3.367296 6.734592 497 +bibliographi 1 34 3.401197 3.401197 518 +tech 1 35 3.401197 3.401197 515 +produc 1 30 3.555348 3.555348 572 +postal 1 30 3.555348 3.555348 580 +particip 1 29 3.583519 3.583519 589 +propos 1 28 3.610918 3.610918 602 +rule 1 26 3.688879 3.688879 638 +task 1 25 3.737670 3.737670 678 +input 1 23 3.806662 3.806662 727 +mobil 1 23 3.806662 3.806662 730 +miscellan 1 23 3.806662 3.806662 731 +increas 1 20 3.951244 3.951244 829 +wrote 1 20 3.951244 3.951244 830 +expert 1 20 3.951244 3.951244 833 +north 1 19 4.007333 4.007333 873 +agent 1 18 4.060443 4.060443 910 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +women 2 16 4.174387 8.348774 1004 +across 1 16 4.174387 4.174387 974 +cognit 1 16 4.174387 4.174387 986 +researchmi 1 14 4.317488 4.317488 1119 +hotlist 1 13 4.382027 4.382027 1199 +misc 1 13 4.382027 4.382027 1124 +acquisit 2 10 4.653960 9.307920 1465 +sentenc 1 10 4.653960 4.653960 1413 +consortium 1 10 4.653960 4.653960 1467 +suitabl 1 9 4.753590 4.753590 1486 +linguist 1 9 4.753590 4.753590 1593 +folk 1 9 4.753590 4.753590 1597 +awar 1 7 5.010635 5.010635 1800 +carolina 1 6 5.164786 5.164786 2142 +truth 1 6 5.164786 5.164786 2179 +interestedin 1 5 5.347108 5.347108 2260 +corpu 1 5 5.347108 5.347108 2282 +cindi 1 3 5.857933 5.857933 3830 +groupunivers 1 3 5.857933 5.857933 3831 +primarilyin 1 3 5.857933 5.857933 3832 +diagnost 1 3 5.857933 5.857933 3833 +georgia 1 3 5.857933 5.857933 3834 +horizon 1 3 5.857933 5.857933 3746 +austini 1 2 6.263398 6.263398 5527 +deep 1 2 6.263398 6.263398 5528 +exhibit 1 2 6.263398 6.263398 5529 +cthomp 1 2 6.263398 6.263398 5530 +lexic 2 1 6.957497 13.914994 12967 +thompsoncindi 1 1 6.957497 6.957497 12968 +thompsonmachin 1 1 6.957497 6.957497 12969 +candlelight 1 1 6.957497 6.957497 12970 +vigil 1 1 6.957497 6.957497 12971 +internetto 1 1 6.957497 6.957497 12972 +violenc 1 1 6.957497 6.957497 12973 +semanticrepresent 1 1 6.957497 6.957497 12974 +atrobofest 1 1 6.957497 6.957497 12975 +wolv 1 1 6.957497 6.957497 12976 +counsel 1 1 6.957497 6.957497 12977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ new file mode 100644 index 00000000..8a9649c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^cxh^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +utexa 1 189 1.609438 1.609438 44 +support 1 132 1.945910 1.945910 83 +welcom 1 122 2.079442 2.079442 99 +school 1 84 2.484907 2.484907 188 +want 1 79 2.564949 2.564949 199 +browser 1 56 2.890372 2.890372 313 +friend 1 48 3.044522 3.044522 376 +netscap 1 44 3.135494 3.135494 395 +frame 1 24 3.761200 3.761200 684 +famili 1 23 3.806662 3.806662 735 +latest 1 21 3.912023 3.912023 785 +seem 1 18 4.060443 4.060443 899 +doesn 1 15 4.248495 4.248495 1055 +xingshan 2 1 6.957497 13.914994 12978 +downloadth 1 1 6.957497 6.957497 12979 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ new file mode 100644 index 00000000..7dce0329 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dahlin^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +oper 2 180 1.609438 3.218876 34 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +hall 2 146 1.945910 3.891820 65 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +technolog 1 131 2.079442 2.079442 102 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +memori 1 101 2.302585 2.302585 139 +internet 1 83 2.484907 2.484907 186 +want 1 79 2.564949 2.564949 199 +taylor 2 63 2.772589 5.545178 287 +probabl 1 40 3.258097 3.258097 455 +electr 1 38 3.295837 3.295837 461 +postal 1 30 3.555348 3.555348 580 +berkelei 2 26 3.688879 7.377758 657 +experiment 1 26 3.688879 3.688879 645 +mike 2 24 3.761200 7.522400 703 +disk 1 22 3.850148 3.850148 747 +less 1 18 4.060443 4.060443 892 +seem 1 18 4.060443 4.060443 899 +classic 1 14 4.317488 4.317488 1084 +rice 1 11 4.553877 4.553877 1336 +operatingsystem 1 10 4.653960 4.653960 1401 +architect 1 8 4.875197 4.875197 1624 +gather 1 8 4.875197 4.875197 1719 +capac 1 8 4.875197 4.875197 1740 +root 1 8 4.875197 4.875197 1650 +trend 2 7 5.010635 10.021270 1842 +bore 1 7 5.010635 5.010635 1948 +austinaustin 1 7 5.010635 5.010635 1966 +price 2 6 5.164786 10.329572 1999 +pagethi 1 5 5.347108 5.347108 2336 +serverless 1 3 5.857933 5.857933 3181 +systemsth 1 3 5.857933 5.857933 3835 +informationtechnolog 1 3 5.857933 5.857933 3836 +informationassist 1 2 6.263398 6.263398 5531 +teachingfal 1 2 6.263398 6.263398 5532 +systemsspr 1 2 6.263398 6.263398 4762 +dahlin 2 1 6.957497 13.914994 12980 +dahlingener 1 1 6.957497 6.957497 12981 +architectureeveryon 1 1 6.957497 6.957497 12982 +researchxf 1 1 6.957497 6.957497 12983 +systemweb 1 1 6.957497 6.957497 12984 +pagesummar 1 1 6.957497 6.957497 12985 +compter 1 1 6.957497 6.957497 12986 +includinghistor 1 1 6.957497 6.957497 12987 +informationif 1 1 6.957497 6.957497 12988 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ new file mode 100644 index 00000000..306bdd85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^damani^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +distribut 2 162 1.791759 3.583518 51 +read 1 154 1.791759 1.791759 47 +parallel 1 169 1.791759 1.791759 60 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +activ 1 84 2.484907 2.484907 182 +servic 1 72 2.639057 2.639057 236 +laboratori 1 63 2.772589 2.772589 292 +dept 1 64 2.772589 2.772589 291 +probabl 1 40 3.258097 3.258097 455 +feel 1 37 3.332205 3.332205 483 +honor 1 23 3.806662 3.806662 729 +busi 1 21 3.912023 3.912023 784 +anyth 1 16 4.174387 4.174387 998 +regularli 1 11 4.553877 4.553877 1338 +tradit 1 10 4.653960 4.653960 1404 +meant 1 6 5.164786 5.164786 2055 +phrase 1 5 5.347108 5.347108 2242 +suffic 1 4 5.568345 5.568345 2869 +crazi 1 4 5.568345 5.568345 2822 +sytem 1 4 5.568345 5.568345 3015 +vijai 1 4 5.568345 5.568345 2960 +mehom 1 4 5.568345 5.568345 2979 +lazi 1 2 6.263398 6.263398 4527 +appeal 1 2 6.263398 6.263398 4186 +garg 1 2 6.263398 6.263398 5533 +damani 2 1 6.957497 13.914994 12989 +howdi 1 1 6.957497 6.957497 12990 +pagal 1 1 6.957497 6.957497 12991 +dekho 1 1 6.957497 6.957497 12992 +updateth 1 1 6.957497 6.957497 12993 +oblig 1 1 6.957497 6.957497 12994 +providesometh 1 1 6.957497 6.957497 12995 +guadulp 1 1 6.957497 6.957497 12996 +austinphon 1 1 6.957497 6.957497 12997 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ new file mode 100644 index 00000000..791685c2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dane^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +depart 2 457 0.693147 1.386294 12 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +austin 5 168 1.791759 8.958795 63 +texa 3 160 1.791759 5.375277 64 +address 3 170 1.791759 5.375277 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +model 1 145 1.945910 1.945910 69 +area 1 144 1.945910 1.945910 80 +machin 1 129 2.079442 2.079442 95 +make 1 111 2.197225 2.197225 120 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +school 1 84 2.484907 2.484907 188 +appli 1 71 2.639057 2.639057 226 +view 2 70 2.708050 5.416100 254 +main 1 67 2.708050 2.708050 256 +complex 1 64 2.772589 2.772589 269 +laboratori 1 63 2.772589 2.772589 292 +autom 1 41 3.218876 3.218876 434 +tree 1 36 3.367296 3.367296 492 +global 1 34 3.401197 3.401197 520 +equat 1 23 3.806662 3.806662 724 +nice 1 20 3.951244 3.951244 809 +render 1 17 4.110874 4.110874 947 +happi 1 14 4.317488 4.317488 1079 +station 1 13 4.382027 4.382027 1157 +mainten 1 9 4.753590 4.753590 1543 +plane 1 6 5.164786 5.164786 2187 +multiresolut 1 5 5.347108 5.347108 2423 +escap 1 4 5.568345 5.568345 3016 +unrel 1 3 5.857933 5.857933 3837 +dane 2 2 6.263398 12.526796 5534 +pinbal 2 2 6.263398 12.526796 5508 +marshal 1 2 6.263398 6.263398 4118 +illumin 1 2 6.263398 6.263398 4819 +probe 1 2 6.263398 6.263398 5535 +marshalldan 1 1 6.957497 6.957497 12998 +electromechan 1 1 6.957497 6.957497 12999 +thelogist 1 1 6.957497 6.957497 13000 +attractor 1 1 6.957497 6.957497 13001 +burnet 1 1 6.957497 6.957497 13002 +pastur 1 1 6.957497 6.957497 13003 +jupit 1 1 6.957497 6.957497 13004 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ new file mode 100644 index 00000000..069653cf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dastuart^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +us 2 329 1.098612 2.197224 16 +link 2 247 1.386294 2.772588 24 +gener 1 220 1.386294 1.386294 27 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +provid 2 121 2.079442 4.158884 94 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +well 2 109 2.197225 4.394450 121 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +access 1 102 2.302585 2.302585 136 +know 1 80 2.564949 2.564949 198 +good 1 77 2.564949 2.564949 200 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +archiv 1 49 3.044522 3.044522 364 +keep 1 44 3.135494 3.135494 409 +video 1 44 3.135494 3.135494 405 +go 1 33 3.433987 3.433987 529 +idea 1 32 3.465736 3.465736 545 +someth 1 31 3.496508 3.496508 554 +sort 1 22 3.850148 3.850148 738 +sure 2 20 3.951244 7.902488 813 +easi 1 16 4.174387 4.174387 969 +save 2 14 4.317488 8.634976 1099 +manner 1 14 4.317488 4.317488 1074 +pagewelcom 1 11 4.553877 4.553877 1344 +guess 1 10 4.653960 4.653960 1443 +doug 1 9 4.753590 4.753590 1517 +perhap 1 8 4.875197 4.875197 1693 +fiction 1 6 5.164786 5.164786 2217 +latexhtml 1 5 5.347108 5.347108 2347 +orlean 1 5 5.347108 5.347108 2550 +bear 1 4 5.568345 5.568345 2651 +stuart 2 3 5.857933 11.715866 3584 +justa 1 2 6.263398 6.263398 5326 +pagedoug 1 1 6.957497 6.957497 13005 +oflinksto 1 1 6.957497 6.957497 13006 +aboutsport 1 1 6.957497 6.957497 13007 +booksin 1 1 6.957497 6.957497 13008 +fewjok 1 1 6.957497 6.957497 13009 +testof 1 1 6.957497 6.957497 13010 +aweath 1 1 6.957497 6.957497 13011 +mapandcondit 1 1 6.957497 6.957497 13012 +austinandnew 1 1 6.957497 6.957497 13013 +amgraci 1 1 6.957497 6.957497 13014 +puttingit 1 1 6.957497 6.957497 13015 +webbrows 1 1 6.957497 6.957497 13016 +thisi 1 1 6.957497 6.957497 13017 +stuffmom 1 1 6.957497 6.957497 13018 +calendarlink 1 1 6.957497 6.957497 13019 +fictionbooksjokessportsfoodvideout 1 1 6.957497 6.957497 13020 +libraryresumelast 1 1 6.957497 6.957497 13021 +dasdastuart 1 1 6.957497 6.957497 13022 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ new file mode 100644 index 00000000..0c23c3a8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dhs^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +doug 1 9 4.753590 4.753590 1517 +swhich 1 1 6.957497 6.957497 13023 +annoi 1 1 6.957497 6.957497 13024 +thisorthi 1 1 6.957497 6.957497 13025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ new file mode 100644 index 00000000..36448f85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dianelaw^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 4 571 0.000000 0.000000 5 +home 4 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +link 2 247 1.386294 2.772588 24 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +austin 6 168 1.791759 10.750554 63 +texa 4 160 1.791759 7.167036 64 +network 3 168 1.791759 5.375277 61 +algorithm 3 162 1.791759 5.375277 57 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +taylor 1 63 2.772589 2.772589 287 +local 1 55 2.944439 2.944439 334 +archiv 1 49 3.044522 3.044522 364 +robot 1 36 3.367296 3.367296 497 +neural 3 30 3.555348 10.666044 578 +postal 1 30 3.555348 3.555348 580 +art 1 29 3.583519 3.583519 593 +utc 2 27 3.637586 7.275172 629 +symbol 1 27 3.637586 3.637586 620 +fine 1 20 3.951244 3.951244 822 +agent 1 18 4.060443 4.060443 910 +universityof 2 15 4.248495 8.496990 1061 +evolv 1 12 4.465908 4.465908 1223 +literatur 1 11 4.553877 4.553877 1300 +michigan 1 11 4.553877 4.553877 1368 +genet 3 10 4.653960 13.961880 1409 +santa 1 10 4.653960 4.653960 1441 +ground 1 7 5.010635 5.010635 1955 +digest 1 7 5.010635 5.010635 1864 +theus 1 4 5.568345 5.568345 2992 +spanish 1 4 5.568345 5.568345 3017 +intereststh 1 3 5.857933 5.857933 3838 +dian 1 2 6.263398 6.263398 5536 +lawdian 1 1 6.957497 6.957497 13026 +problemnavig 1 1 6.957497 6.957497 13027 +washingtonst 1 1 6.957497 6.957497 13028 +stateunivers 1 1 6.957497 6.957497 13029 +dianelaw 1 1 6.957497 6.957497 13030 +gann 1 1 6.957497 6.957497 13031 +illig 1 1 6.957497 6.957497 13032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ new file mode 100644 index 00000000..abf340e6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dionisis^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +work 1 380 0.693147 0.693147 9 +student 3 343 1.098612 3.295836 19 +engin 2 297 1.098612 2.197224 20 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +resourc 1 81 2.484907 2.484907 172 +homework 1 79 2.564949 2.564949 193 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +undergradu 1 54 2.944439 2.944439 338 +mine 1 26 3.688879 3.688879 654 +alwai 1 24 3.761200 3.761200 691 +unit 1 21 3.912023 3.912023 779 +monitor 1 17 4.110874 4.110874 941 +athlet 1 7 5.010635 5.010635 1933 +greec 2 6 5.164786 10.329572 2208 +informat 1 3 5.857933 5.857933 3839 +patra 2 2 6.263398 12.526796 5537 +reasearch 1 2 6.263398 6.263398 5538 +dionisi 2 1 6.957497 13.914994 13033 +papadopoulosdionisi 1 1 6.957497 6.957497 13034 +papadopoulosabout 1 1 6.957497 6.957497 13035 +medionisi 1 1 6.957497 6.957497 13036 +papadopoulo 1 1 6.957497 6.957497 13037 +panhellen 1 1 6.957497 6.957497 13038 +associationpanathinaiko 1 1 6.957497 6.957497 13039 +clubgreek 1 1 6.957497 6.957497 13040 +newshellen 1 1 6.957497 6.957497 13041 +networkeveryth 1 1 6.957497 6.957497 13042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ new file mode 100644 index 00000000..462faf22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^diz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +public 2 202 1.609438 3.218876 43 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +applic 2 170 1.791759 3.583518 56 +algorithm 2 162 1.791759 3.583518 57 +contact 1 153 1.791759 1.791759 59 +recent 1 167 1.791759 1.791759 58 +construct 2 139 1.945910 3.891820 82 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +problem 1 147 1.945910 1.945910 75 +report 2 131 2.079442 4.158884 92 +high 1 130 2.079442 2.079442 101 +version 7 113 2.197225 15.380575 122 +structur 2 106 2.197225 4.394450 105 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +appear 5 78 2.564949 12.824745 210 +sourc 2 77 2.564949 5.129898 201 +complet 2 77 2.564949 5.129898 208 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +simul 2 66 2.708050 5.416100 255 +complex 2 64 2.772589 5.545178 269 +taylor 1 63 2.772589 2.772589 287 +descript 1 64 2.772589 2.772589 271 +abstract 1 62 2.772589 2.772589 276 +visit 1 63 2.772589 2.772589 288 +space 1 57 2.890372 2.890372 310 +sampl 1 53 2.944439 2.944439 339 +cover 1 55 2.944439 2.944439 329 +local 1 55 2.944439 2.944439 334 +finger 1 52 2.995732 2.995732 354 +linear 1 41 3.218876 3.218876 431 +annual 1 40 3.258097 3.258097 458 +multipl 1 39 3.258097 3.258097 453 +small 1 39 3.258097 3.258097 447 +correct 1 38 3.295837 3.295837 462 +random 8 34 3.401197 27.209576 511 +product 1 33 3.433987 3.433987 527 +graph 2 30 3.555348 7.110696 576 +postal 1 30 3.555348 3.555348 580 +load 1 28 3.610918 3.610918 601 +utc 1 27 3.637586 3.637586 629 +revis 2 26 3.688879 7.377758 640 +bound 2 26 3.688879 7.377758 659 +lower 1 18 4.060443 4.060443 886 +expand 2 17 4.110874 8.221748 928 +role 1 14 4.317488 4.317488 1101 +balanc 1 14 4.317488 4.317488 1112 +weak 2 13 4.382027 8.764054 1159 +walk 1 12 4.465908 4.465908 1281 +paragraph 1 10 4.653960 4.653960 1449 +preliminari 6 9 4.753590 28.521540 1480 +cryptographi 1 9 4.753590 4.753590 1512 +leader 1 9 4.753590 4.753590 1576 +explicit 1 9 4.753590 4.753590 1525 +insert 1 8 4.875197 4.875197 1687 +elect 1 8 4.875197 4.875197 1771 +analys 1 8 4.875197 4.875197 1666 +combinatori 1 8 4.875197 4.875197 1629 +foc 2 7 5.010635 10.021270 1880 +hit 1 7 5.010635 5.010635 1965 +dimens 1 7 5.010635 5.010635 1930 +soda 1 6 5.164786 5.164786 2189 +determinist 1 6 5.164786 5.164786 2034 +stoc 6 5 5.347108 32.082648 2491 +mutual 1 5 5.347108 5.347108 2418 +asymptot 1 4 5.568345 5.568345 2676 +delet 1 4 5.568345 5.568345 2691 +exclus 1 4 5.568345 5.568345 2947 +combinatorica 2 3 5.857933 11.715866 3649 +intereststh 1 3 5.857933 5.857933 3838 +algorithmica 1 3 5.857933 5.857933 3561 +beat 1 3 5.857933 5.857933 3840 +eigenvalu 1 3 5.857933 5.857933 3364 +sicomp 2 1 6.957497 13.914994 13043 +zuckermandavid 1 1 6.957497 6.957497 13044 +zuckermanassist 1 1 6.957497 6.957497 13045 +cryptographyresearch 1 1 6.957497 6.957497 13046 +myprofil 1 1 6.957497 6.957497 13047 +transposit 1 1 6.957497 6.957497 13048 +extractor 1 1 6.957497 6.957497 13049 +jcss 1 1 6.957497 6.957497 13050 +logspac 1 1 6.957497 6.957497 13051 +tight 1 1 6.957497 6.957497 13052 +derandom 1 1 6.957497 6.957497 13053 +constructionand 1 1 6.957497 6.957497 13054 +setfor 1 1 6.957497 6.957497 13055 +rectangl 1 1 6.957497 6.957497 13056 +unapproxim 1 1 6.957497 6.957497 13057 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ new file mode 100644 index 00000000..46738ea6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dmcl^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 5 431 0.693147 3.465735 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +design 1 213 1.386294 1.386294 25 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +distribut 4 162 1.791759 7.167036 51 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +network 1 168 1.791759 1.791759 61 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +report 1 131 2.079442 2.079442 92 +send 1 114 2.197225 2.197225 109 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +wide 1 84 2.484907 2.484907 185 +member 1 84 2.484907 2.484907 165 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +nation 1 74 2.639057 2.639057 240 +multimedia 7 68 2.708050 18.956350 258 +main 1 67 2.708050 2.708050 256 +would 1 67 2.708050 2.708050 251 +laboratori 5 63 2.772589 13.862945 292 +foundat 2 62 2.772589 5.545178 286 +content 1 59 2.833213 2.833213 302 +variou 1 56 2.890372 2.890372 317 +suggest 1 53 2.944439 2.944439 331 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +tabl 1 51 2.995732 2.995732 346 +protocol 1 45 3.135494 3.135494 407 +video 1 44 3.135494 3.135494 405 +industri 1 38 3.295837 3.295837 464 +microsoft 1 38 3.295837 3.295837 468 +electr 1 38 3.295837 3.295837 461 +storag 1 31 3.496508 3.496508 553 +focus 1 29 3.583519 3.583519 584 +relev 1 26 3.688879 3.688879 637 +intel 1 16 4.174387 4.174387 1000 +audio 1 14 4.317488 4.317488 1094 +carri 1 13 4.382027 4.382027 1152 +nasa 1 13 4.382027 4.382027 1188 +departmentof 1 9 4.753590 4.753590 1539 +transport 1 8 4.875197 4.875197 1672 +hear 1 7 5.010635 5.010635 1940 +sponsor 2 6 5.164786 10.329572 2133 +multimediacomput 2 3 5.857933 11.715866 3841 +mitsubishi 1 3 5.857933 5.857933 3842 +merl 1 3 5.857933 5.857933 3843 +rangeof 1 2 6.263398 6.263398 4076 +federalinstitut 1 2 6.263398 6.263398 5539 +agenda 1 2 6.263398 6.263398 5037 +currentresearch 1 1 6.957497 6.957497 13058 +andmultiresolut 1 1 6.957497 6.957497 13059 +dmcl 1 1 6.957497 6.957497 13060 +microsystemsinc 1 1 6.957497 6.957497 13061 +yourcom 1 1 6.957497 6.957497 13062 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ new file mode 100644 index 00000000..4543c9f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dsb^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 4 431 0.693147 2.772588 10 +program 3 374 0.693147 2.079441 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +current 2 284 1.098612 2.197224 21 +project 2 340 1.098612 2.197224 18 +offic 2 299 1.098612 2.197224 13 +softwar 4 220 1.386294 5.545176 30 +gener 3 220 1.386294 4.158882 27 +languag 3 227 1.386294 4.158882 26 +design 2 213 1.386294 2.772588 25 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +address 2 170 1.791759 3.583518 62 +austin 2 168 1.791759 3.583518 63 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +specif 2 106 2.197225 4.394450 106 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +build 2 85 2.484907 4.969814 184 +larg 1 82 2.484907 2.484907 168 +orient 1 80 2.564949 2.564949 205 +appli 1 71 2.639057 2.639057 226 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +improv 1 62 2.772589 2.772589 289 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +extens 3 53 2.944439 8.833317 340 +investig 1 51 2.995732 2.995732 353 +programm 1 39 3.258097 3.258097 445 +microsoft 1 38 3.295837 3.295837 468 +cost 1 37 3.332205 3.332205 480 +product 1 33 3.433987 3.433987 527 +domain 4 30 3.555348 14.221392 564 +compon 1 30 3.555348 3.555348 570 +postal 1 30 3.555348 3.555348 580 +scale 1 28 3.610918 3.610918 613 +framework 1 28 3.610918 3.610918 606 +enhanc 1 26 3.688879 3.688879 644 +subject 1 26 3.688879 3.688879 647 +wai 1 25 3.737670 3.737670 662 +pattern 1 24 3.761200 3.761200 689 +methodolog 1 23 3.806662 3.806662 733 +reduc 1 22 3.850148 3.850148 759 +fund 1 21 3.912023 3.912023 805 +synthesi 1 20 3.951244 3.951244 834 +mainten 1 9 4.753590 4.753590 1543 +realiz 1 8 4.875197 4.875197 1739 +span 1 8 4.875197 4.875197 1751 +avion 1 4 5.568345 5.568345 3018 +darpa 1 4 5.568345 5.568345 2944 +batori 1 4 5.568345 5.568345 2690 +preprocessor 1 3 5.857933 5.857933 3844 +parameter 1 2 6.263398 6.263398 5540 +encapsul 1 2 6.263398 6.263398 5541 +jakarta 2 1 6.957497 13.914994 13063 +batorydon 1 1 6.957497 6.957497 13064 +batorysoftwar 1 1 6.957497 6.957497 13065 +pluggabl 1 1 6.957497 6.957497 13066 +schlumberg 1 1 6.957497 6.957497 13067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ new file mode 100644 index 00000000..e9c04431 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^dwip^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 2 299 1.098612 2.197224 13 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +austin 3 168 1.791759 5.375277 63 +parallel 2 169 1.791759 3.583518 60 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +process 1 142 1.945910 1.945910 72 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +present 1 91 2.397895 2.397895 145 +info 2 85 2.484907 4.969814 176 +know 1 80 2.564949 2.564949 198 +symposium 1 72 2.639057 2.639057 238 +main 1 67 2.708050 2.708050 256 +favorit 1 44 3.135494 3.135494 410 +methodolog 1 23 3.806662 3.806662 733 +partit 1 16 4.174387 4.174387 984 +photograph 1 15 4.248495 4.248495 1056 +usavoic 2 13 4.382027 8.764054 1198 +road 1 11 4.553877 4.553877 1374 +insert 1 8 4.875197 4.875197 1687 +banerje 1 6 5.164786 5.164786 2018 +theintern 1 4 5.568345 5.568345 2981 +dwip 3 3 5.857933 17.573799 3197 +banerjeeabout 1 1 6.957497 6.957497 13068 +methi 1 1 6.957497 6.957497 13069 +programminggroup 1 1 6.957497 6.957497 13070 +graphicalparallel 1 1 6.957497 6.957497 13071 +departmentpost 1 1 6.957497 6.957497 13072 +homepost 1 1 6.957497 6.957497 13073 +enfield 1 1 6.957497 6.957497 13074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ new file mode 100644 index 00000000..4c11b1ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ecamahor^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +make 1 111 2.197225 2.197225 120 +thing 1 84 2.484907 2.484907 189 +come 1 78 2.564949 2.564949 202 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +main 1 67 2.708050 2.708050 256 +previou 1 62 2.772589 2.772589 290 +januari 1 62 2.772589 2.772589 264 +taylor 1 63 2.772589 2.772589 287 +summer 1 56 2.890372 2.890372 311 +think 1 57 2.890372 2.890372 314 +set 1 50 3.044522 3.044522 361 +right 1 48 3.044522 3.044522 363 +anoth 3 45 3.135494 9.406482 408 +third 1 43 3.178054 3.178054 412 +multipl 1 39 3.258097 3.258097 453 +finish 1 22 3.850148 3.850148 748 +break 1 20 3.951244 3.951244 812 +els 1 19 4.007333 4.007333 843 +left 1 19 4.007333 4.007333 851 +item 1 19 4.007333 4.007333 856 +usavoic 1 13 4.382027 4.382027 1198 +paragraph 2 10 4.653960 9.307920 1449 +mepost 1 10 4.653960 4.653960 1472 +deadlin 1 9 4.753590 4.753590 1502 +siggraph 1 8 4.875197 4.875197 1773 +forget 1 8 4.875197 4.875197 1712 +promis 2 6 5.164786 10.329572 2037 +complaint 1 4 5.568345 5.568345 2795 +emilio 2 3 5.857933 11.715866 3683 +bout 1 3 5.857933 5.857933 3670 +credibl 1 3 5.857933 5.857933 3210 +decent 1 2 6.263398 6.263398 5542 +excus 1 2 6.263398 6.263398 4684 +camahort 2 1 6.957497 13.914994 13075 +gurrea 2 1 6.957497 13.914994 13076 +mmmmm 1 1 6.957497 6.957497 13077 +lose 1 1 6.957497 6.957497 13078 +ecamahor 1 1 6.957497 6.957497 13079 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ new file mode 100644 index 00000000..370d5870 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ejp^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +support 1 132 1.945910 1.945910 83 +summari 1 73 2.639057 2.639057 237 +view 2 70 2.708050 5.416100 254 +multimedia 1 68 2.708050 2.708050 258 +laboratori 1 63 2.772589 2.772589 292 +greg 1 24 3.761200 3.761200 695 +head 1 23 3.806662 3.806662 732 +supervis 1 20 3.951244 3.951244 840 +eduphon 1 15 4.248495 4.248495 1060 +consortium 1 10 4.653960 4.653960 1467 +harrick 1 7 5.010635 5.010635 1849 +multimediacomput 1 3 5.857933 5.857933 3841 +lavend 1 3 5.857933 5.857933 3217 +posnak 3 1 6.957497 20.872491 13080 +isod 1 1 6.957497 6.957497 13081 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ new file mode 100644 index 00000000..d4977360 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emerson^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +time 3 293 1.098612 3.295836 17 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +theori 1 111 2.197225 2.197225 127 +real 3 93 2.397895 7.193685 144 +journal 2 83 2.484907 4.969814 183 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +logic 3 71 2.639057 7.917171 230 +effici 1 73 2.639057 2.639057 233 +symposium 1 72 2.639057 2.639057 238 +practic 1 70 2.708050 2.708050 246 +taylor 1 63 2.772589 2.772589 287 +foundat 1 62 2.772589 2.772589 286 +direct 1 57 2.890372 2.890372 316 +reason 1 57 2.890372 2.890372 318 +york 1 41 3.218876 3.218876 435 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +theoret 1 39 3.258097 3.258097 446 +formal 1 37 3.332205 3.332205 478 +tree 1 36 3.367296 3.367296 492 +concurr 1 34 3.401197 3.401197 501 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +verif 1 20 3.951244 3.951244 826 +aid 1 18 4.060443 4.060443 904 +cambridg 1 16 4.174387 4.174387 1008 +automata 2 13 4.382027 8.764054 1135 +calculu 2 12 4.465908 8.931816 1203 +tempor 4 9 4.753590 19.014360 1584 +juan 1 9 4.753590 4.753590 1580 +secretari 1 8 4.875197 4.875197 1775 +quantit 1 8 4.875197 4.875197 1654 +satisfi 1 8 4.875197 4.875197 1694 +mass 1 8 4.875197 4.875197 1732 +centenni 1 7 5.010635 5.010635 1967 +foc 1 7 5.010635 5.010635 1880 +srinivasan 2 6 5.164786 10.329572 2175 +handbook 1 6 5.164786 5.164786 2061 +emerson 7 5 5.347108 37.429756 2547 +allen 2 5 5.347108 10.694216 2470 +infinit 1 4 5.568345 5.568345 2596 +elsevi 1 3 5.857933 5.857933 3671 +systemsselect 1 2 6.263398 6.263398 4049 +bakker 1 2 6.263398 6.263398 5337 +leeuwen 1 2 6.263398 6.263398 5543 +emersonbruton 1 1 6.957497 6.957497 13082 +publications 1 1 6.957497 6.957497 13083 +sistla 1 1 6.957497 6.957497 13084 +sadler 1 1 6.957497 6.957497 13085 +jutla 1 1 6.957497 6.957497 13086 +determinaci 1 1 6.957497 6.957497 13087 +modal 1 1 6.957497 6.957497 13088 +amsterdam 1 1 6.957497 6.957497 13089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ new file mode 100644 index 00000000..d60e98f2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emery^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 7 374 0.693147 4.852029 7 +work 4 380 0.693147 2.772588 9 +system 3 443 0.693147 2.079441 6 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +utexa 3 189 1.609438 4.828314 44 +group 3 183 1.609438 4.828314 36 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +parallel 4 169 1.791759 7.167036 60 +austin 3 168 1.791759 5.375277 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +object 1 138 1.945910 1.945910 79 +problem 1 147 1.945910 1.945910 75 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +code 5 108 2.197225 10.986125 116 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +user 1 104 2.302585 2.302585 137 +search 1 95 2.397895 2.397895 155 +select 1 91 2.397895 2.397895 154 +graphic 1 90 2.397895 2.397895 147 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +info 2 85 2.484907 4.969814 176 +academ 1 82 2.484907 2.484907 178 +orient 1 80 2.564949 2.564949 205 +materi 2 75 2.639057 5.278114 221 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +function 3 62 2.772589 8.317767 275 +dept 1 64 2.772589 2.772589 291 +taylor 1 63 2.772589 2.772589 287 +abstract 1 62 2.772589 2.772589 276 +type 1 61 2.833213 2.833213 296 +unix 1 58 2.890372 2.890372 308 +extens 1 53 2.944439 2.944439 340 +visual 2 48 3.044522 6.089044 372 +describ 1 45 3.135494 3.135494 400 +textbook 1 44 3.135494 3.135494 397 +http 1 41 3.218876 3.218876 420 +join 1 39 3.258097 3.258097 457 +concept 1 32 3.465736 3.465736 537 +framework 1 28 3.610918 3.610918 606 +repres 1 26 3.688879 3.688879 656 +known 1 24 3.761200 3.761200 702 +compress 1 23 3.806662 3.806662 719 +lyco 1 19 4.007333 4.007333 871 +analyz 1 17 4.110874 4.110874 925 +macintosh 1 17 4.110874 4.110874 920 +near 1 14 4.317488 4.317488 1091 +composit 2 13 4.382027 8.764054 1150 +affili 1 13 4.382027 4.382027 1194 +brother 1 13 4.382027 4.382027 1189 +believ 1 13 4.382027 4.382027 1187 +doug 2 9 4.753590 9.507180 1517 +pure 1 8 4.875197 4.875197 1776 +accomplish 1 8 4.875197 4.875197 1755 +mirror 1 6 5.164786 5.164786 2028 +artist 1 6 5.164786 5.164786 2127 +haskel 2 4 5.568345 11.136690 2618 +aspir 1 4 5.568345 5.568345 3019 +berger 3 3 5.857933 17.573799 3702 +add 1 3 5.857933 5.857933 3131 +emeri 4 2 6.263398 25.053592 5515 +groupi 2 2 6.263398 12.526796 5544 +linksth 1 2 6.263398 6.263398 5545 +analyst 2 1 6.957497 13.914994 13090 +ticam 2 1 6.957497 13.914994 13091 +evangelist 2 1 6.957497 13.914994 13092 +pageemeri 1 1 6.957497 6.957497 13093 +randomli 1 1 6.957497 6.957497 13094 +uttr 1 1 6.957497 6.957497 13095 +othermi 1 1 6.957497 6.957497 13096 +youngest 1 1 6.957497 6.957497 13097 +handiwork 1 1 6.957497 6.957497 13098 +systemtexbook 1 1 6.957497 6.957497 13099 +exchangegrac 1 1 6.957497 6.957497 13100 +macaddict 1 1 6.957497 6.957497 13101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ new file mode 100644 index 00000000..72b4341e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^emmawu^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +welcom 1 122 2.079442 2.079442 99 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +find 1 111 2.197225 2.197225 111 +intern 1 108 2.197225 2.197225 128 +part 1 98 2.302585 2.302585 129 +librari 2 87 2.484907 4.969814 181 +solut 1 82 2.484907 2.484907 162 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +come 1 78 2.564949 2.564949 202 +nation 1 74 2.639057 2.639057 240 +onlin 1 75 2.639057 2.639057 223 +august 1 66 2.708050 2.708050 257 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +visit 1 63 2.772589 2.772589 288 +januari 1 62 2.772589 2.772589 264 +semest 1 58 2.890372 2.890372 312 +date 1 51 2.995732 2.995732 344 +compani 1 41 3.218876 3.218876 423 +small 1 39 3.258097 3.258097 447 +programm 1 39 3.258097 3.258097 445 +tutori 1 39 3.258097 3.258097 437 +industri 1 38 3.295837 3.295837 464 +china 3 37 3.332205 9.996615 487 +chines 1 29 3.583519 3.583519 595 +repres 1 26 3.688879 3.688879 656 +try 1 22 3.850148 3.850148 764 +alumni 1 21 3.912023 3.912023 807 +love 1 21 3.912023 3.912023 804 +nice 1 20 3.951244 3.951244 809 +media 1 19 4.007333 4.007333 861 +attend 1 18 4.060443 4.060443 893 +lot 1 18 4.060443 4.060443 889 +jose 1 16 4.174387 4.174387 976 +club 1 15 4.248495 4.248495 1058 +fortran 1 15 4.248495 4.248495 1027 +eduphon 1 15 4.248495 4.248495 1060 +becam 1 14 4.317488 4.317488 1117 +francisco 1 14 4.317488 4.317488 1095 +opportun 1 13 4.382027 4.382027 1161 +market 1 11 4.553877 4.553877 1361 +branch 1 11 4.553877 4.553877 1318 +enter 1 10 4.653960 4.653960 1454 +telecommun 1 9 4.753590 4.753590 1565 +surpris 1 7 5.010635 5.010635 1828 +capit 1 7 5.010635 5.010635 1957 +vallei 1 7 5.010635 5.010635 1959 +south 1 6 5.164786 5.164786 2167 +deliv 1 6 5.164786 5.164786 2070 +silicon 1 6 5.164786 5.164786 2076 +girl 1 5 5.347108 5.347108 2410 +orlean 1 5 5.347108 5.347108 2550 +houston 1 5 5.347108 5.347108 2460 +planet 1 4 5.568345 5.568345 2912 +immedi 1 3 5.857933 5.857933 3117 +peac 1 3 5.857933 5.857933 3769 +emma 4 2 6.263398 25.053592 5546 +zhongshan 2 2 6.263398 12.526796 5547 +mini 1 2 6.263398 6.263398 5548 +magazinepc 1 2 6.263398 6.263398 5161 +wuabout 1 1 6.957497 6.957497 13102 +myselfhi 1 1 6.957497 6.957497 13103 +inibm 1 1 6.957497 6.957497 13104 +costom 1 1 6.957497 6.957497 13105 +manyalumni 1 1 6.957497 6.957497 13106 +instrumentsinc 1 1 6.957497 6.957497 13107 +computingmanag 1 1 6.957497 6.957497 13108 +informationautomat 1 1 6.957497 6.957497 13109 +baseyahoogalaxi 1 1 6.957497 6.957497 13110 +universityyellow 1 1 6.957497 6.957497 13111 +infoleisur 1 1 6.957497 6.957497 13112 +timenewspagepeopl 1 1 6.957497 6.957497 13113 +dailyartstim 1 1 6.957497 6.957497 13114 +magazinechines 1 1 6.957497 6.957497 13115 +magazinec 1 1 6.957497 6.957497 13116 +antoniosan 1 1 6.957497 6.957497 13117 +franciscomarina 1 1 6.957497 6.957497 13118 +citysan 1 1 6.957497 6.957497 13119 +pointemail 1 1 6.957497 6.957497 13120 +emmawu 1 1 6.957497 6.957497 13121 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ new file mode 100644 index 00000000..f19bdbd0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^eremolin^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +offic 2 299 1.098612 2.197224 13 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +first 2 140 1.945910 3.891820 71 +well 1 109 2.197225 2.197225 121 +main 1 67 2.708050 2.708050 256 +anoth 2 45 3.135494 6.270988 408 +third 1 43 3.178054 3.178054 412 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +break 1 20 3.951244 3.951244 812 +item 1 19 4.007333 4.007333 856 +easi 1 16 4.174387 4.174387 969 +usavoic 1 13 4.382027 4.382027 1198 +paragraph 2 10 4.653960 9.307920 1449 +mepost 1 10 4.653960 4.653960 1472 +forget 1 8 4.875197 4.875197 1712 +emilio 2 3 5.857933 11.715866 3683 +mehi 1 2 6.263398 6.263398 5549 +bullet 1 2 6.263398 6.263398 5500 +remolinaemilio 1 1 6.957497 6.957497 13122 +remolinaabout 1 1 6.957497 6.957497 13123 +eremolin 1 1 6.957497 6.957497 13124 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ new file mode 100644 index 00000000..1fe44259 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^erkok^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +locat 1 59 2.833213 2.833213 303 +http 1 41 3.218876 3.218876 420 +reach 1 24 3.761200 3.761200 688 +thank 1 23 3.806662 3.806662 721 +former 1 17 4.110874 4.110874 956 +universityof 1 15 4.248495 4.248495 1061 +sciencesat 1 7 5.010635 5.010635 1968 +levent 2 1 6.957497 13.914994 13125 +sayfasi 1 1 6.957497 6.957497 13126 +erkok 1 1 6.957497 6.957497 13127 +inturkei 1 1 6.957497 6.957497 13128 +ceng 1 1 6.957497 6.957497 13129 +metu 1 1 6.957497 6.957497 13130 +erkokto 1 1 6.957497 6.957497 13131 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ new file mode 100644 index 00000000..5487a7d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esra^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +area 1 144 1.945910 1.945910 80 +machin 1 129 2.079442 2.079442 95 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +homepag 1 93 2.397895 2.397895 148 +educ 1 86 2.484907 2.484907 191 +logic 1 71 2.639057 2.639057 230 +reason 2 57 2.890372 5.780744 318 +postal 1 30 3.555348 3.555348 580 +mind 1 27 3.637586 3.637586 632 +voic 1 21 3.912023 3.912023 806 +cognit 1 16 4.174387 4.174387 986 +thedepart 1 11 4.553877 4.553877 1350 +turkei 1 4 5.568345 5.568345 2914 +commonsens 1 4 5.568345 5.568345 2998 +children 1 3 5.857933 5.857933 3767 +monoton 2 2 6.263398 12.526796 5321 +andinform 1 2 6.263398 6.263398 5550 +esra 6 1 6.957497 41.744982 13132 +erdem 4 1 6.957497 27.829988 13133 +bilkent 1 1 6.957497 6.957497 13134 +learninginduct 1 1 6.957497 6.957497 13135 +sciencelearningreason 1 1 6.957497 6.957497 13136 +reasoningknowledg 1 1 6.957497 6.957497 13137 +representationemotionsphilosophi 1 1 6.957497 6.957497 13138 +mindcontact 1 1 6.957497 6.957497 13139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ new file mode 100644 index 00000000..eb6ba442 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^esteban^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 2 343 1.098612 2.197224 19 +offic 2 299 1.098612 2.197224 13 +current 1 284 1.098612 1.098612 21 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +perform 1 143 1.945910 1.945910 74 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +thing 1 84 2.484907 2.484907 189 +wide 1 84 2.484907 2.484907 185 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +degre 1 69 2.708050 2.708050 259 +main 1 67 2.708050 2.708050 256 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +extens 1 53 2.944439 2.944439 340 +life 1 50 3.044522 3.044522 375 +describ 1 45 3.135494 3.135494 400 +random 1 34 3.401197 3.401197 511 +art 1 29 3.583519 3.583519 593 +toward 1 25 3.737670 3.737670 668 +sport 1 25 3.737670 3.737670 683 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +scheme 1 20 3.951244 3.951244 818 +item 1 19 4.007333 4.007333 856 +carl 2 15 4.248495 8.496990 1024 +believ 1 13 4.382027 4.382027 1187 +usavoic 1 13 4.382027 4.382027 1198 +rice 2 11 4.553877 9.107754 1336 +worth 1 11 4.553877 4.553877 1294 +chronicl 1 7 5.010635 5.010635 1952 +athlet 1 7 5.010635 5.010635 1933 +myresum 1 6 5.164786 5.164786 2199 +snow 1 6 5.164786 5.164786 2031 +houston 1 5 5.347108 5.347108 2460 +wasn 1 3 5.857933 5.857933 3800 +dust 1 2 6.263398 6.263398 5551 +syntact 1 2 6.263398 6.263398 5552 +linksth 1 2 6.263398 6.263398 5545 +household 1 2 6.263398 6.263398 4920 +peak 1 2 6.263398 6.263398 5553 +pagestephen 1 1 6.957497 6.957497 13140 +carlpardon 1 1 6.957497 6.957497 13141 +planmi 1 1 6.957497 6.957497 13142 +interestsa 1 1 6.957497 6.957497 13143 +psuedo 1 1 6.957497 6.957497 13144 +dose 1 1 6.957497 6.957497 13145 +pike 1 1 6.957497 6.957497 13146 +bandget 1 1 6.957497 6.957497 13147 +touchpost 1 1 6.957497 6.957497 13148 +esteban 1 1 6.957497 6.957497 13149 +edureturn 1 1 6.957497 6.957497 13150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ new file mode 100644 index 00000000..e08e2630 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^estlin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +schedul 1 119 2.079442 2.079442 85 +machin 1 129 2.079442 2.079442 95 +check 1 115 2.197225 2.197225 118 +techniqu 1 99 2.302585 2.302585 138 +learn 2 86 2.484907 4.969814 170 +solut 1 82 2.484907 2.484907 162 +control 1 82 2.484907 2.484907 164 +educ 1 86 2.484907 2.484907 191 +method 1 80 2.564949 2.564949 213 +effici 1 73 2.639057 2.639057 233 +knowledg 1 67 2.708050 2.708050 243 +improv 2 62 2.772589 5.545178 289 +plan 1 65 2.772589 2.772589 272 +descript 1 64 2.772589 2.772589 271 +taylor 1 63 2.772589 2.772589 287 +detail 1 57 2.890372 2.890372 321 +combin 1 42 3.218876 3.218876 421 +vita 1 38 3.295837 3.295837 473 +postal 1 30 3.555348 3.555348 580 +accur 1 25 3.737670 3.737670 680 +induct 1 11 4.553877 4.553877 1304 +solver 1 7 5.010635 5.010635 1911 +analyt 1 7 5.010635 5.010635 1913 +machinelearn 1 6 5.164786 5.164786 2084 +groupth 1 5 5.347108 5.347108 2549 +myresearch 1 4 5.568345 5.568345 2842 +theperform 1 3 5.857933 5.857933 3262 +estlin 2 2 6.263398 12.526796 5554 +tara 1 2 6.263398 6.263398 5555 +researchinvolv 1 2 6.263398 6.263398 5556 +acquir 1 2 6.263398 6.263398 5557 +amparticularli 1 2 6.263398 6.263398 5558 +tulan 1 2 6.263398 6.263398 5559 +estlintara 1 1 6.957497 6.957497 13151 +estlinmachin 1 1 6.957497 6.957497 13152 +austinresearchcontrol 1 1 6.957497 6.957497 13153 +byguid 1 1 6.957497 6.957497 13154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ new file mode 100644 index 00000000..46a4a906 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^francois^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +interest 3 384 0.693147 2.079441 11 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +mail 1 238 1.386294 1.386294 22 +class 2 199 1.609438 3.218876 37 +utexa 2 189 1.609438 3.218876 44 +austin 2 168 1.791759 3.583518 63 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +machin 1 129 2.079442 2.079442 95 +find 1 111 2.197225 2.197225 111 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +theori 1 111 2.197225 2.197225 127 +user 1 104 2.302585 2.302585 137 +real 1 93 2.397895 2.397895 144 +issu 1 78 2.564949 2.564949 211 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +suggest 1 53 2.944439 2.944439 331 +directori 1 45 3.135494 3.135494 396 +join 1 39 3.258097 3.258097 457 +chines 1 29 3.583519 3.583519 595 +utc 2 27 3.637586 7.275172 629 +todai 2 25 3.737670 7.475340 672 +watch 1 21 3.912023 3.912023 789 +voic 1 21 3.912023 3.912023 806 +assum 1 19 4.007333 4.007333 845 +stop 1 17 4.110874 4.110874 942 +women 1 16 4.174387 4.174387 1004 +action 1 15 4.248495 4.248495 1038 +trip 1 14 4.317488 4.317488 1113 +central 3 13 4.382027 13.146081 1160 +suit 1 13 4.382027 4.382027 1129 +food 1 12 4.465908 4.465908 1285 +basketbal 1 12 4.465908 4.465908 1289 +market 3 11 4.553877 13.661631 1361 +noth 1 11 4.553877 4.553877 1328 +black 1 10 4.653960 4.653960 1418 +shop 1 10 4.653960 4.653960 1469 +mepost 1 10 4.653960 4.653960 1472 +swim 1 9 4.753590 4.753590 1599 +japan 1 8 4.875197 4.875197 1762 +foreign 1 7 5.010635 5.010635 1919 +troubl 1 6 5.164786 5.164786 2002 +knew 1 5 5.347108 5.347108 2445 +pack 1 3 5.857933 5.857933 3597 +legion 1 3 5.857933 5.857933 3708 +guadalup 1 3 5.857933 5.857933 3255 +francoi 5 2 6.263398 31.316990 4523 +polic 2 2 6.263398 12.526796 5560 +forest 1 2 6.263398 6.263398 5368 +cake 1 2 6.263398 6.263398 5118 +forthcom 1 2 6.263398 6.263398 5392 +crawl 1 2 6.263398 6.263398 5561 +wisdom 1 2 6.263398 6.263398 4430 +barbanson 3 1 6.957497 20.872491 13155 +tank 2 1 6.957497 13.914994 13156 +versionhom 1 1 6.957497 6.957497 13157 +versionthi 1 1 6.957497 6.957497 13158 +spool 1 1 6.957497 6.957497 13159 +francoisabout 1 1 6.957497 6.957497 13160 +mecurr 1 1 6.957497 6.957497 13161 +genuin 1 1 6.957497 6.957497 13162 +pastri 1 1 6.957497 6.957497 13163 +fruit 1 1 6.957497 6.957497 13164 +mouss 1 1 6.957497 6.957497 13165 +groceri 1 1 6.957497 6.957497 13166 +shed 1 1 6.957497 6.957497 13167 +lighton 1 1 6.957497 6.957497 13168 +hyogo 1 1 6.957497 6.957497 13169 +atdominion 1 1 6.957497 6.957497 13170 +hqcheck 1 1 6.957497 6.957497 13171 +dilberti 1 1 6.957497 6.957497 13172 +mentionthat 1 1 6.957497 6.957497 13173 +edufrancoi 1 1 6.957497 6.957497 13174 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ new file mode 100644 index 00000000..22a5f59e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^fussell^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +group 2 183 1.609438 3.218876 36 +utexa 2 189 1.609438 3.218876 44 +public 2 202 1.609438 3.218876 43 +texa 3 160 1.791759 5.375277 64 +austin 2 168 1.791759 3.583518 63 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +technolog 2 131 2.079442 4.158884 102 +databas 1 122 2.079442 2.079442 86 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +mathemat 2 108 2.197225 4.394450 123 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +graphic 3 90 2.397895 7.193685 147 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +appli 2 71 2.639057 5.278114 226 +laboratori 1 63 2.772589 2.772589 292 +http 1 41 3.218876 3.218876 420 +autom 1 41 3.218876 3.218876 434 +electr 1 38 3.295837 3.295837 461 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +progress 1 28 3.610918 3.610918 598 +director 1 22 3.850148 3.850148 767 +divis 1 21 3.912023 3.912023 803 +former 1 17 4.110874 4.110874 956 +social 1 13 4.382027 4.382027 1123 +donald 2 9 4.753590 9.507180 1510 +fussel 3 5 5.347108 16.041324 2300 +regent 1 5 5.347108 5.347108 2551 +dalla 1 4 5.568345 5.568345 2930 +crow 1 3 5.857933 5.857933 3845 +trammel 1 2 6.263398 6.263398 5562 +dartmouth 1 2 6.263398 6.263398 5393 +collegem 1 2 6.263398 6.263398 5563 +eduinform 1 1 6.957497 6.957497 13175 +fussellb 1 1 6.957497 6.957497 13176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ new file mode 100644 index 00000000..8b5e7d2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gajit^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +last 1 314 1.098612 1.098612 14 +gener 2 220 1.386294 2.772588 27 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +address 1 170 1.791759 1.791759 62 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +file 2 132 1.945910 3.891820 70 +construct 1 139 1.945910 1.945910 82 +document 1 121 2.079442 2.079442 89 +find 1 111 2.197225 2.197225 111 +start 1 83 2.484907 2.484907 173 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +go 1 33 3.433987 3.433987 529 +someth 1 31 3.496508 3.496508 554 +anyth 1 16 4.174387 4.174387 998 +georg 1 16 4.174387 4.174387 994 +lane 1 8 4.875197 4.875197 1720 +pagethi 1 5 5.347108 5.347108 2336 +ajit 2 3 5.857933 11.715866 3299 +eduher 1 3 5.857933 5.857933 3499 +useless 2 2 6.263398 12.526796 5564 +odd 1 2 6.263398 6.263398 5565 +georgemi 1 1 6.957497 6.957497 13177 +youand 1 1 6.957497 6.957497 13178 +goodthat 1 1 6.957497 6.957497 13179 +wickersham 1 1 6.957497 6.957497 13180 +gajit 1 1 6.957497 6.957497 13181 +foundus 1 1 6.957497 6.957497 13182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ new file mode 100644 index 00000000..e2dd5165 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^geeta^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +technolog 1 131 2.079442 2.079442 102 +institut 1 84 2.484907 2.484907 187 +still 1 50 3.044522 3.044522 362 +india 1 32 3.465736 3.465736 550 +art 1 29 3.583519 3.583519 593 +try 1 22 3.850148 3.850148 764 +indian 1 22 3.850148 3.850148 769 +medic 1 17 4.110874 4.110874 958 +undergrad 1 9 4.753590 4.753590 1589 +kanpur 1 8 4.875197 4.875197 1744 +river 1 6 5.164786 5.164786 2220 +arora 1 4 5.568345 5.568345 2658 +mehom 1 4 5.568345 5.568345 2979 +oak 1 2 6.263398 6.263398 5566 +geeta 2 1 6.957497 13.914994 13183 +tofigur 1 1 6.957497 6.957497 13184 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ new file mode 100644 index 00000000..5f376a61 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gokul^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +click 2 142 1.945910 3.891820 78 +number 1 130 2.079442 2.079442 97 +send 2 114 2.197225 4.394450 109 +final 1 116 2.197225 2.197225 108 +world 1 115 2.197225 2.197225 126 +comment 3 93 2.397895 7.193685 146 +help 1 83 2.484907 2.484907 175 +novemb 1 81 2.484907 2.484907 179 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +receiv 1 66 2.708050 2.708050 244 +suggest 1 53 2.944439 2.944439 331 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +kind 1 32 3.465736 3.465736 541 +art 1 29 3.583519 3.583519 593 +actual 1 28 3.610918 3.610918 604 +quit 1 27 3.637586 3.637586 633 +wish 1 24 3.761200 3.761200 692 +head 1 23 3.806662 3.806662 732 +medic 1 17 4.110874 4.110874 958 +critic 2 16 4.174387 8.348774 982 +easi 1 16 4.174387 4.174387 969 +mayb 1 15 4.248495 4.248495 1014 +decid 1 14 4.317488 4.317488 1075 +earlier 1 13 4.382027 4.382027 1140 +minimum 1 9 4.753590 4.753590 1555 +mass 1 8 4.875197 4.875197 1732 +risk 1 8 4.875197 4.875197 1689 +sleep 1 6 5.164786 5.164786 2211 +merit 1 5 5.347108 5.347108 2466 +gokul 2 4 5.568345 11.136690 2668 +countless 1 4 5.568345 5.568345 3020 +flame 2 3 5.857933 11.715866 3696 +gripe 1 3 5.857933 5.857933 3257 +democrat 1 2 6.263398 6.263398 5567 +plakal 1 2 6.263398 6.263398 5568 +perfectli 1 2 6.263398 6.263398 5569 +theexcess 1 1 6.957497 6.957497 13185 +verbos 1 1 6.957497 6.957497 13186 +thecollect 1 1 6.957497 6.957497 13187 +putonli 1 1 6.957497 6.957497 13188 +barest 1 1 6.957497 6.957497 13189 +adieu 1 1 6.957497 6.957497 13190 +outpour 1 1 6.957497 6.957497 13191 +hag 1 1 6.957497 6.957497 13192 +hopey 1 1 6.957497 6.957497 13193 +untroubl 1 1 6.957497 6.957497 13194 +conscienc 1 1 6.957497 6.957497 13195 +untim 1 1 6.957497 6.957497 13196 +demis 1 1 6.957497 6.957497 13197 +vitriol 1 1 6.957497 6.957497 13198 +reinstat 1 1 6.957497 6.957497 13199 +signin 1 1 6.957497 6.957497 13200 +lesscrit 1 1 6.957497 6.957497 13201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ new file mode 100644 index 00000000..749b3be8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gooty^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +also 4 259 1.386294 5.545176 28 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 5 168 1.791759 8.958795 63 +read 2 154 1.791759 3.583518 47 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +like 9 132 1.945910 17.513190 81 +year 1 148 1.945910 1.945910 84 +number 1 130 2.079442 2.079442 97 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +well 2 109 2.197225 4.394450 121 +place 2 106 2.197225 4.394450 124 +check 2 115 2.197225 4.394450 118 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +peopl 4 96 2.302585 9.210340 132 +book 3 99 2.302585 6.907755 131 +text 1 98 2.302585 2.302585 133 +present 2 91 2.397895 4.795790 145 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +thing 4 84 2.484907 9.939628 189 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +control 1 82 2.484907 2.484907 164 +know 4 80 2.564949 10.259796 198 +state 1 76 2.564949 2.564949 207 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +order 1 69 2.708050 2.708050 249 +would 1 67 2.708050 2.708050 251 +plai 2 60 2.833213 5.666426 307 +colleg 1 61 2.833213 2.833213 300 +best 1 59 2.833213 2.833213 299 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +finger 1 52 2.995732 2.995732 354 +friend 4 48 3.044522 12.178088 376 +life 2 50 3.044522 6.089044 375 +visitor 1 49 3.044522 3.044522 371 +cool 1 49 3.044522 3.044522 374 +without 1 50 3.044522 3.044522 370 +made 2 44 3.135494 6.270988 398 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +around 2 43 3.178054 6.356108 415 +futur 2 41 3.218876 6.437752 427 +past 1 42 3.218876 3.218876 428 +continu 1 39 3.258097 3.258097 448 +join 1 39 3.258097 3.258097 457 +game 3 36 3.367296 10.101888 498 +winter 1 36 3.367296 3.367296 500 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +board 1 33 3.433987 3.433987 528 +india 1 32 3.465736 3.465736 550 +travel 1 30 3.555348 3.555348 579 +option 1 30 3.555348 3.555348 575 +power 1 30 3.555348 3.555348 573 +art 1 29 3.583519 3.583519 593 +except 1 28 3.610918 3.610918 607 +weather 1 28 3.610918 3.610918 618 +usual 1 28 3.610918 3.610918 608 +propos 1 28 3.610918 3.610918 602 +great 2 27 3.637586 7.275172 626 +american 1 27 3.637586 3.637586 634 +background 1 25 3.737670 3.737670 664 +todai 1 25 3.737670 3.737670 672 +alwai 2 24 3.761200 7.522400 691 +famili 1 23 3.806662 3.806662 735 +love 3 21 3.912023 11.736069 804 +born 1 21 3.912023 3.912023 798 +watch 1 21 3.912023 3.912023 789 +tenni 2 20 3.951244 7.902488 838 +els 1 19 4.007333 4.007333 843 +ever 1 19 4.007333 4.007333 872 +spend 1 19 4.007333 4.007333 850 +listen 1 18 4.060443 4.060443 907 +intro 2 17 4.110874 8.221748 915 +sept 1 17 4.110874 4.110874 952 +bachelor 1 17 4.110874 4.110874 957 +thought 1 17 4.110874 4.110874 945 +medic 1 17 4.110874 4.110874 958 +transfer 1 16 4.174387 4.174387 967 +sign 1 16 4.174387 4.174387 970 +goe 1 15 4.248495 4.248495 1044 +hopefulli 1 14 4.317488 4.317488 1071 +becam 1 14 4.317488 4.317488 1117 +attribut 1 14 4.317488 4.317488 1092 +came 2 13 4.382027 8.764054 1197 +care 1 13 4.382027 4.382027 1177 +sai 1 13 4.382027 4.382027 1175 +guest 1 12 4.465908 4.465908 1220 +song 1 11 4.553877 4.553877 1380 +fill 1 11 4.553877 4.553877 1349 +success 1 10 4.653960 4.653960 1390 +hang 1 9 4.753590 4.753590 1499 +kumar 1 9 4.753590 4.753590 1506 +ball 1 9 4.753590 4.753590 1608 +gold 1 8 4.875197 4.875197 1745 +soccer 1 8 4.875197 4.875197 1752 +footbal 1 7 5.010635 5.010635 1912 +cricket 1 7 5.010635 5.010635 1945 +bore 1 7 5.010635 5.010635 1948 +golden 1 7 5.010635 5.010635 1962 +happen 1 7 5.010635 5.010635 1790 +southern 1 6 5.164786 5.164786 2191 +vari 1 6 5.164786 5.164786 2001 +alphabet 1 6 5.164786 5.164786 1980 +chat 1 6 5.164786 5.164786 2128 +curiou 1 5 5.347108 5.347108 2541 +chemic 1 5 5.347108 5.347108 2552 +valuabl 1 5 5.347108 5.347108 2256 +proud 1 4 5.568345 5.568345 2918 +batch 1 4 5.568345 5.568345 2700 +enrol 1 4 5.568345 5.568345 2613 +abraham 1 4 5.568345 5.568345 2644 +gokul 1 4 5.568345 5.568345 2668 +hide 1 4 5.568345 5.568345 2996 +bold 1 3 5.857933 5.857933 3846 +cold 1 3 5.857933 5.857933 3637 +acad 1 3 5.857933 5.857933 3847 +hindi 1 3 5.857933 5.857933 3753 +narrow 1 3 5.857933 5.857933 3807 +gooti 4 2 6.263398 25.053592 4281 +subramanyam 2 2 6.263398 12.526796 4282 +somebodi 1 2 6.263398 6.263398 4463 +hyderabad 1 2 6.263398 6.263398 5570 +andhra 1 2 6.263398 6.263398 5571 +pradesh 1 2 6.263398 6.263398 5572 +osmania 1 2 6.263398 6.263398 5573 +nebraska 1 2 6.263398 6.263398 5574 +lincoln 1 2 6.263398 6.263398 5575 +addict 1 2 6.263398 6.263398 5576 +neeraj 1 2 6.263398 6.263398 5577 +shailesh 1 2 6.263398 6.263398 5578 +vipin 1 2 6.263398 6.263398 5579 +hideout 1 1 6.957497 6.957497 13202 +wont 1 1 6.957497 6.957497 13203 +disappoint 1 1 6.957497 6.957497 13204 +geographi 1 1 6.957497 6.957497 13205 +gala 1 1 6.957497 6.957497 13206 +wasjust 1 1 6.957497 6.957497 13207 +mehul 1 1 6.957497 6.957497 13208 +shantanu 1 1 6.957497 6.957497 13209 +likechess 1 1 6.957497 6.957497 13210 +carrom 1 1 6.957497 6.957497 13211 +racquet 1 1 6.957497 6.957497 13212 +definetli 1 1 6.957497 6.957497 13213 +horoscop 1 1 6.957497 6.957497 13214 +compatabil 1 1 6.957497 6.957497 13215 +destini 1 1 6.957497 6.957497 13216 +hardwork 1 1 6.957497 6.957497 13217 +dispos 1 1 6.957497 6.957497 13218 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ new file mode 100644 index 00000000..1813280f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gouda^ @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +utexa 3 189 1.609438 4.828314 44 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +texa 2 160 1.791759 3.583518 64 +implement 2 152 1.791759 3.583518 52 +network 2 168 1.791759 3.583518 61 +austin 1 168 1.791759 1.791759 63 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +first 1 140 1.945910 1.945910 71 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +tool 1 117 2.079442 2.079442 93 +provid 1 121 2.079442 2.079442 94 +specif 4 106 2.197225 8.788900 106 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +octob 2 89 2.397895 4.795790 156 +associ 1 93 2.397895 2.397895 151 +decemb 2 80 2.564949 5.129898 215 +june 1 79 2.564949 2.564949 214 +sourc 1 77 2.564949 2.564949 201 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +copi 4 63 2.772589 11.090356 284 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +date 3 51 2.995732 8.987196 344 +without 1 50 3.044522 3.044522 370 +protocol 5 45 3.135494 15.677470 407 +made 1 44 3.135494 3.135494 398 +term 1 43 3.178054 3.178054 411 +http 3 41 3.218876 9.656628 420 +must 1 40 3.258097 3.258097 442 +credit 1 38 3.295837 3.295837 460 +formal 1 37 3.332205 3.332205 478 +copyright 2 36 3.367296 6.734592 495 +survei 3 35 3.401197 10.203591 513 +word 1 34 3.401197 3.401197 508 +post 1 35 3.401197 3.401197 505 +posit 1 31 3.496508 3.496508 552 +profil 1 30 3.555348 3.555348 581 +hard 1 30 3.555348 3.555348 563 +full 1 28 3.610918 3.610918 615 +utc 1 27 3.637586 3.637586 629 +revis 1 26 3.688879 3.688879 640 +request 1 26 3.688879 3.688879 635 +notic 1 25 3.737670 3.737670 675 +other 1 24 3.761200 3.761200 697 +honor 1 23 3.806662 3.806662 729 +accept 1 18 4.060443 4.060443 879 +otherwis 1 17 4.110874 4.110874 922 +commerci 1 16 4.174387 4.174387 1005 +advantag 1 16 4.174387 4.174387 987 +permit 1 16 4.174387 4.174387 962 +evolv 1 12 4.465908 4.465908 1223 +grant 1 12 4.465908 4.465908 1216 +statement 2 11 4.553877 9.107754 1313 +submiss 1 11 4.553877 4.553877 1298 +exact 3 9 4.753590 14.260770 1509 +phrase 1 5 5.347108 5.347108 2242 +own 1 5 5.347108 5.347108 2531 +permiss 4 4 5.568345 22.273380 2642 +gouda 2 4 5.568345 11.136690 3021 +machineri 1 4 5.568345 5.568345 2851 +citat 3 3 5.857933 17.573799 3617 +sciencesaustin 1 3 5.857933 5.857933 3828 +argu 1 3 5.857933 5.857933 3698 +networkprotocol 1 3 5.857933 5.857933 3285 +moham 1 3 5.857933 5.857933 3848 +fornetwork 1 2 6.263398 6.263398 5580 +ordistribut 1 2 6.263398 6.263398 5581 +redistribut 1 2 6.263398 6.263398 5582 +pragmat 3 1 6.957497 20.872491 13219 +goudanetwork 2 1 6.957497 13.914994 13220 +goudaacm 1 1 6.957497 6.957497 13221 +surveysa 1 1 6.957497 6.957497 13222 +specificationsand 1 1 6.957497 6.957497 13223 +implementationsmoham 1 1 6.957497 6.957497 13224 +goudath 1 1 6.957497 6.957497 13225 +usagouda 1 1 6.957497 6.957497 13226 +htmlabstract 1 1 6.957497 6.957497 13227 +bridgeth 1 1 6.957497 6.957497 13228 +implementationsaddit 1 1 6.957497 6.957497 13229 +methodologypubl 1 1 6.957497 6.957497 13230 +digitalor 1 1 6.957497 6.957497 13231 +classroomus 1 1 6.957497 6.957497 13232 +profit 1 1 6.957497 6.957497 13233 +bearthi 1 1 6.957497 6.957497 13234 +forcompon 1 1 6.957497 6.957497 13235 +torepublish 1 1 6.957497 6.957497 13236 +requiresprior 1 1 6.957497 6.957497 13237 +frompubl 1 1 6.957497 6.957497 13238 +orpermiss 1 1 6.957497 6.957497 13239 +goudagouda 1 1 6.957497 6.957497 13240 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ new file mode 100644 index 00000000..480aa9ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunnels^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +class 1 199 1.609438 1.609438 37 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +data 1 170 1.791759 1.791759 49 +assign 2 135 1.945910 3.891820 66 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +report 2 131 2.079442 4.158884 92 +look 2 107 2.197225 4.394450 115 +code 1 108 2.197225 2.197225 116 +check 1 115 2.197225 2.197225 118 +take 1 97 2.302585 2.302585 134 +imag 1 91 2.397895 2.397895 161 +meet 1 72 2.639057 2.639057 229 +would 1 67 2.708050 2.708050 251 +test 1 66 2.708050 2.708050 252 +plan 2 65 2.772589 5.545178 272 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +talk 1 53 2.944439 2.944439 336 +case 2 51 2.995732 5.991464 351 +much 1 52 2.995732 2.995732 349 +friend 1 48 3.044522 3.044522 376 +visitor 1 49 3.044522 3.044522 371 +long 1 43 3.178054 3.178054 413 +author 1 39 3.258097 3.258097 450 +connect 1 37 3.332205 3.332205 485 +john 2 33 3.433987 6.867974 532 +product 1 33 3.433987 3.433987 527 +except 1 28 3.610918 3.610918 607 +minut 1 20 3.951244 3.951244 810 +log 1 19 4.007333 4.007333 857 +less 1 18 4.060443 4.060443 892 +matrix 1 17 4.110874 4.110874 933 +spars 1 16 4.174387 4.174387 989 +doesn 1 15 4.248495 4.248495 1055 +central 1 13 4.382027 4.382027 1160 +land 1 12 4.465908 4.465908 1273 +rememb 1 12 4.465908 4.465908 1217 +guess 1 10 4.653960 4.653960 1443 +latter 1 9 4.753590 4.753590 1522 +bore 1 7 5.010635 5.010635 1948 +oregon 1 5 5.347108 5.347108 2437 +glimps 1 4 5.568345 5.568345 2778 +plapack 1 3 5.857933 5.857933 3849 +redmond 1 3 5.857933 5.857933 3568 +mysteri 1 2 6.263398 6.263398 4715 +hail 1 2 6.263398 6.263398 5583 +gunnel 3 1 6.957497 20.872491 13241 +transpos 2 1 6.957497 13.914994 13242 +drank 1 1 6.957497 6.957497 13243 +depict 1 1 6.957497 6.957497 13244 +computationsif 1 1 6.957497 6.957497 13245 +pageam 1 1 6.957497 6.957497 13246 +towni 1 1 6.957497 6.957497 13247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ new file mode 100644 index 00000000..6c1b2fb1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gunther^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +student 2 343 1.098612 2.197224 19 +graduat 1 215 1.386294 1.386294 31 +utexa 2 189 1.609438 3.218876 44 +oper 1 180 1.609438 1.609438 34 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +year 1 148 1.945910 1.945910 84 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +undergradu 1 54 2.944439 2.944439 338 +curriculum 1 33 3.433987 3.433987 535 +mellon 1 13 4.382027 4.382027 1179 +frank 2 9 4.753590 9.507180 1568 +junior 1 5 5.347108 5.347108 2519 +tropschuhfrank 1 1 6.957497 6.957497 13248 +tropschuh 1 1 6.957497 6.957497 13249 +gunther 1 1 6.957497 6.957497 13250 +schweiz 1 1 6.957497 6.957497 13251 +clayton 1 1 6.957497 6.957497 13252 +waldhofstrass 1 1 6.957497 6.957497 13253 +rheinfelden 1 1 6.957497 6.957497 13254 +vitaeenglishdeutschlinkscarnegi 1 1 6.957497 6.957497 13255 +universitterlangen 1 1 6.957497 6.957497 13256 +nrnberg 1 1 6.957497 6.957497 13257 +abroad 1 1 6.957497 6.957497 13258 +mathematisch 1 1 6.957497 6.957497 13259 +maschinen 1 1 6.957497 6.957497 13260 +datenverarbeitung 1 1 6.957497 6.957497 13261 +tropschuhgunth 1 1 6.957497 6.957497 13262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ new file mode 100644 index 00000000..014e3a6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gyx^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +place 1 106 2.197225 2.197225 124 +master 1 76 2.564949 2.564949 216 +name 1 72 2.639057 2.639057 220 +servic 1 72 2.639057 2.639057 236 +degre 1 69 2.708050 2.708050 259 +juli 1 60 2.833213 2.833213 305 +tabl 1 51 2.995732 2.995732 346 +directori 1 45 3.135494 3.135494 396 +china 1 37 3.332205 3.332205 487 +mine 1 26 3.688879 3.688879 654 +background 1 25 3.737670 3.737670 664 +hobbi 1 16 4.174387 4.174387 1009 +grant 1 12 4.465908 4.465908 1216 +birthdai 1 4 5.568345 5.568345 2800 +birth 1 3 5.857933 5.857933 3594 +addresspictur 1 2 6.263398 6.263398 5584 +ceremoni 1 2 6.263398 6.263398 5585 +jiangsu 1 2 6.263398 6.263398 5586 +yongxiang 2 1 6.957497 13.914994 13263 +pagemerri 1 1 6.957497 6.957497 13264 +christmashappi 1 1 6.957497 6.957497 13265 +homepagegao 1 1 6.957497 6.957497 13266 +yongxiangsever 1 1 6.957497 6.957497 13267 +pointsto 1 1 6.957497 6.957497 13268 +chinadepart 1 1 6.957497 6.957497 13269 +male 1 1 6.957497 6.957497 13270 +huanan 1 1 6.957497 6.957497 13271 +tenniseduc 1 1 6.957497 6.957497 13272 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ new file mode 100644 index 00000000..39203ff4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^gzhang^ @@ -0,0 +1,27 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +distribut 1 162 1.791759 1.791759 51 +databas 1 122 2.079442 2.079442 86 +manag 2 114 2.197225 4.394450 125 +second 1 81 2.484907 2.484907 166 +term 1 43 3.178054 3.178054 411 +queri 1 33 3.433987 3.433987 524 +stop 1 17 4.110874 4.110874 942 +zhang 2 16 4.174387 8.348774 980 +gzhang 1 2 6.263398 6.263398 4183 +schoolth 1 1 6.957497 6.957497 13273 +semestercoursesc 1 1 6.957497 6.957497 13274 +linc 1 1 6.957497 6.957497 13275 +alvis 1 1 6.957497 6.957497 13276 +mirankerfil 1 1 6.957497 6.957497 13277 +databs 1 1 6.957497 6.957497 13278 +formthank 1 1 6.957497 6.957497 13279 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ new file mode 100644 index 00000000..28d1cd3e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haizhou^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +design 3 213 1.386294 4.158882 25 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +algorithm 2 162 1.791759 3.583518 57 +applic 1 170 1.791759 1.791759 56 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +find 1 111 2.197225 2.197225 111 +mathemat 1 108 2.197225 2.197225 123 +techniqu 1 99 2.302585 2.302585 138 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +institut 2 84 2.484907 4.969814 187 +ieee 2 86 2.484907 4.969814 190 +academ 1 82 2.484907 2.484907 178 +internet 1 83 2.484907 2.484907 186 +optim 2 79 2.564949 5.129898 197 +refer 2 78 2.564949 5.129898 203 +new 1 64 2.772589 2.772589 262 +complex 1 64 2.772589 2.772589 269 +prof 1 64 2.772589 2.772589 273 +visit 1 63 2.772589 2.772589 288 +room 2 59 2.833213 5.666426 301 +undergradu 1 54 2.944439 2.944439 338 +even 1 45 3.135494 3.135494 393 +directori 1 45 3.135494 3.135494 396 +theoret 1 39 3.258097 3.258097 446 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +industri 1 38 3.295837 3.295837 464 +china 1 37 3.332205 3.332205 487 +sciencesunivers 1 37 3.332205 3.332205 486 +staff 1 36 3.367296 3.367296 490 +award 1 34 3.401197 3.401197 523 +given 1 32 3.465736 3.465736 538 +focus 1 29 3.583519 3.583519 584 +chines 1 29 3.583519 3.583519 595 +campu 2 27 3.637586 7.275172 623 +constraint 1 26 3.688879 3.688879 636 +head 1 23 3.806662 3.806662 732 +size 1 23 3.806662 3.806662 713 +period 1 22 3.850148 3.850148 743 +vlsi 2 21 3.912023 7.824046 795 +martin 1 21 3.912023 3.912023 794 +rout 1 21 3.912023 3.912023 793 +chen 1 21 3.912023 3.912023 791 +voic 1 21 3.912023 3.912023 806 +aid 2 18 4.060443 8.120886 904 +edulast 1 17 4.110874 4.110874 927 +jose 2 16 4.174387 8.348774 976 +tsinghua 1 13 4.382027 4.382027 1195 +israel 1 11 4.553877 4.553877 1366 +wong 3 9 4.753590 14.260770 1609 +classmat 1 9 4.753590 4.753590 1516 +combinatori 1 8 4.875197 4.875197 1629 +wire 1 8 4.875197 4.875197 1747 +uniform 1 7 5.010635 5.010635 1845 +delai 1 7 5.010635 5.010635 1848 +zhou 4 6 5.164786 20.659144 2092 +ture 1 6 5.164786 5.164786 1997 +alex 1 6 5.164786 5.164786 2130 +internationalconfer 1 6 5.164786 5.164786 2051 +bulletin 1 5 5.347108 5.347108 2343 +weizmann 1 4 5.568345 5.568345 2858 +kept 1 4 5.568345 5.568345 2762 +zhao 1 4 5.568345 5.568345 2699 +headlin 1 3 5.857933 5.857933 3710 +amir 1 3 5.857933 5.857933 3850 +mathematicallog 1 3 5.857933 5.857933 3796 +pagealan 1 2 6.263398 6.263398 5587 +compuer 1 2 6.263398 6.263398 4692 +researchgroup 1 2 6.263398 6.263398 5588 +pnueli 1 1 6.957497 6.957497 13280 +aprofessor 1 1 6.957497 6.957497 13281 +prestig 1 1 6.957497 6.957497 13282 +incompletelist 1 1 6.957497 6.957497 13283 +publicationshai 1 1 6.957497 6.957497 13284 +forriv 1 1 6.957497 6.957497 13285 +crosstalk 1 1 6.957497 6.957497 13286 +optimalnon 1 1 6.957497 6.957497 13287 +elmor 1 1 6.957497 6.957497 13288 +acmintern 1 1 6.957497 6.957497 13289 +austintaylor 1 1 6.957497 6.957497 13290 +staustin 1 1 6.957497 6.957497 13291 +haizhou 1 1 6.957497 6.957497 13292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ new file mode 100644 index 00000000..2248513a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^haosun^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +welcom 1 122 2.079442 2.079442 99 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +nation 1 74 2.639057 2.639057 240 +dept 2 64 2.772589 5.545178 291 +visitor 1 49 3.044522 3.044522 371 +china 3 37 3.332205 9.996615 487 +univ 3 28 3.610918 10.832754 617 +alumni 1 21 3.912023 3.912023 807 +sept 1 17 4.110874 4.110874 952 +universityof 1 15 4.248495 4.248495 1061 +wait 1 13 4.382027 4.382027 1168 +departmentof 1 9 4.753590 4.753590 1539 +hear 1 7 5.010635 5.010635 1940 +wuhan 3 2 6.263398 18.790194 5589 +pal 1 2 6.263398 6.263398 4964 +myselfnow 1 1 6.957497 6.957497 13293 +pre 1 1 6.957497 6.957497 13294 +alumnihom 1 1 6.957497 6.957497 13295 +pagecontact 1 1 6.957497 6.957497 13296 +haosun 1 1 6.957497 6.957497 13297 +edunow 1 1 6.957497 6.957497 13298 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ new file mode 100644 index 00000000..d5da5335 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hewett^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +fall 2 181 1.609438 3.218876 40 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +address 2 170 1.791759 3.583518 62 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +first 2 140 1.945910 3.891820 71 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +mathemat 2 108 2.197225 4.394450 123 +place 2 106 2.197225 4.394450 124 +intern 1 108 2.197225 2.197225 128 +call 1 91 2.397895 2.397895 153 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +nation 1 74 2.639057 2.639057 240 +view 1 70 2.708050 2.708050 254 +visit 1 63 2.772589 2.772589 288 +taylor 1 63 2.772589 2.772589 287 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +faculti 2 56 2.890372 5.780744 325 +summer 2 56 2.890372 5.780744 311 +might 1 41 3.218876 3.218876 426 +author 1 39 3.258097 3.258097 450 +open 2 38 3.295837 6.591674 469 +electr 1 38 3.295837 3.295837 461 +utc 1 27 3.637586 3.637586 629 +honor 2 23 3.806662 7.613324 729 +finish 2 22 3.850148 7.700296 748 +divis 5 21 3.912023 19.560115 803 +grad 2 20 3.951244 7.902488 837 +minut 1 20 3.951244 3.951244 810 +thoma 1 18 4.060443 4.060443 901 +edulast 1 17 4.110874 4.110874 927 +fourth 1 16 4.174387 4.174387 999 +universityof 3 15 4.248495 12.745485 1061 +club 1 15 4.248495 4.248495 1058 +central 1 13 4.382027 4.382027 1160 +volleybal 5 9 4.753590 23.767950 1598 +departmentof 1 9 4.753590 4.753590 1539 +motorola 1 9 4.753590 4.753590 1546 +competit 1 8 4.875197 4.875197 1635 +contest 1 5 5.347108 5.347108 2273 +ioanni 1 5 5.347108 5.347108 2553 +champion 7 4 5.568345 38.978415 2982 +lanc 1 4 5.568345 5.568345 3022 +educlick 1 3 5.857933 5.857933 3612 +sawada 1 3 5.857933 5.857933 3190 +smaragdaki 1 3 5.857933 5.857933 3851 +tower 1 3 5.857933 5.857933 3818 +myfavorit 1 3 5.857933 5.857933 3852 +intramur 5 2 6.263398 31.316990 5590 +kansa 2 2 6.263398 12.526796 5591 +bowl 2 2 6.263398 12.526796 5417 +marathon 1 2 6.263398 6.263398 5592 +micheal 3 1 6.957497 20.872491 13299 +hewett 2 1 6.957497 13.914994 13300 +hewetthewett 1 1 6.957497 6.957497 13301 +fingerm 1 1 6.957497 6.957497 13302 +stanfordunivers 1 1 6.957497 6.957497 13303 +washburnunivers 1 1 6.957497 6.957497 13304 +collegiateprogram 1 1 6.957497 6.957497 13305 +wahlutc 1 1 6.957497 6.957497 13306 +hanoi 1 1 6.957497 6.957497 13307 +tokudaut 1 1 6.957497 6.957497 13308 +locatem 1 1 6.957497 6.957497 13309 +learnabout 1 1 6.957497 6.957497 13310 +downloadmi 1 1 6.957497 6.957497 13311 +learnmor 1 1 6.957497 6.957497 13312 +timefax 1 1 6.957497 6.957497 13313 +hewettemail 1 1 6.957497 6.957497 13314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ new file mode 100644 index 00000000..6ec2fb90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hiep^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 6 380 0.693147 4.158882 9 +interest 3 384 0.693147 2.079441 11 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +research 1 431 0.693147 0.693147 10 +current 6 284 1.098612 6.591672 21 +us 3 329 1.098612 3.295836 16 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +softwar 4 220 1.386294 5.545176 30 +also 3 259 1.386294 4.158882 28 +languag 2 227 1.386294 2.772588 26 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +austin 5 168 1.791759 8.958795 63 +texa 2 160 1.791759 3.583518 64 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +like 2 132 1.945910 3.891820 81 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +databas 5 122 2.079442 10.397210 86 +compil 1 122 2.079442 2.079442 96 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +graphic 3 90 2.397895 7.193685 147 +center 1 88 2.397895 2.397895 158 +real 1 93 2.397895 2.397895 144 +search 1 95 2.397895 2.397895 155 +activ 2 84 2.484907 4.969814 182 +solut 2 82 2.484907 4.969814 162 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +educ 1 86 2.484907 2.484907 191 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +novemb 1 81 2.484907 2.484907 179 +resum 4 79 2.564949 10.259796 217 +state 1 76 2.564949 2.564949 207 +orient 1 80 2.564949 2.564949 205 +interfac 1 79 2.564949 2.564949 209 +onlin 1 75 2.639057 2.639057 223 +write 1 72 2.639057 2.639057 222 +java 5 70 2.708050 13.540250 248 +receiv 2 66 2.708050 5.416100 244 +simul 1 66 2.708050 2.708050 255 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +function 2 62 2.772589 5.545178 275 +written 2 63 2.772589 5.545178 278 +virtual 1 62 2.772589 2.772589 285 +best 1 59 2.833213 2.833213 299 +detail 1 57 2.890372 2.890372 321 +explor 1 58 2.890372 2.890372 324 +allow 1 53 2.944439 2.944439 333 +processor 1 54 2.944439 2.944439 335 +life 1 50 3.044522 3.044522 375 +video 1 44 3.135494 3.135494 405 +natur 1 44 3.135494 3.135494 406 +http 4 41 3.218876 12.875504 420 +might 1 41 3.218876 3.218876 426 +fast 1 42 3.218876 3.218876 429 +live 1 40 3.258097 3.258097 451 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +robot 5 36 3.367296 16.836480 497 +game 2 36 3.367296 6.734592 498 +especi 1 36 3.367296 3.367296 496 +product 1 33 3.433987 3.433987 527 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +anim 1 31 3.496508 3.496508 557 +common 2 30 3.555348 7.110696 574 +rang 1 30 3.555348 3.555348 565 +platform 1 29 3.583519 3.583519 591 +packag 4 28 3.610918 14.443672 614 +american 1 27 3.637586 3.637586 634 +client 1 25 3.737670 3.737670 679 +flow 1 24 3.761200 3.761200 700 +methodolog 1 23 3.806662 3.806662 733 +born 1 21 3.912023 3.912023 798 +unit 1 21 3.912023 3.912023 779 +busi 1 21 3.912023 3.912023 784 +theunivers 1 21 3.912023 3.912023 797 +five 1 19 4.007333 4.007333 841 +lisp 2 18 4.060443 8.120886 897 +seek 1 17 4.110874 4.110874 954 +edulast 1 17 4.110874 4.110874 927 +easi 1 16 4.174387 4.174387 969 +reflect 1 15 4.248495 4.248495 1034 +came 1 13 4.382027 4.382027 1197 +usavoic 1 13 4.382027 4.382027 1198 +assembl 1 12 4.465908 4.465908 1207 +realiti 1 12 4.465908 4.465908 1272 +resid 1 10 4.653960 4.653960 1461 +placement 1 10 4.653960 4.653960 1420 +mepost 1 10 4.653960 4.653960 1472 +novak 1 9 4.753590 4.753590 1521 +poetri 1 9 4.753590 4.753590 1596 +port 2 8 4.875197 9.750394 1766 +cross 1 8 4.875197 4.875197 1703 +realist 1 8 4.875197 4.875197 1665 +textur 1 8 4.875197 4.875197 1677 +spot 1 7 5.010635 5.010635 1894 +contract 3 6 5.164786 15.494358 1985 +gordon 1 6 5.164786 5.164786 2032 +ousterhout 1 5 5.347108 5.347108 2301 +outdoor 1 5 5.347108 5.347108 2514 +havedevelop 1 4 5.568345 5.568345 2681 +vrml 1 4 5.568345 5.568345 2949 +legion 1 3 5.857933 5.857933 3708 +flat 1 3 5.857933 5.857933 3853 +moredetail 1 3 5.857933 5.857933 3854 +expertis 1 3 5.857933 5.857933 3321 +leverag 1 3 5.857933 5.857933 3153 +vietnames 1 2 6.263398 6.263398 5593 +occup 1 2 6.263398 6.263398 5169 +akcl 1 2 6.263398 6.263398 4796 +standalon 1 2 6.263398 6.263398 4077 +researchwith 1 2 6.263398 6.263398 5594 +sdsc 1 2 6.263398 6.263398 5199 +hiep 2 1 6.957497 13.914994 13315 +xwindow 2 1 6.957497 13.914994 13316 +gunu 2 1 6.957497 13.914994 13317 +netrek 2 1 6.957497 13.914994 13318 +factoryx 2 1 6.957497 13.914994 13319 +nguyenhiep 1 1 6.957497 6.957497 13320 +nguyenabout 1 1 6.957497 6.957497 13321 +meabout 1 1 6.957497 6.957497 13322 +texasfor 1 1 6.957497 6.957497 13323 +providinghigh 1 1 6.957497 6.957497 13324 +hypertextresum 1 1 6.957497 6.957497 13325 +con 1 1 6.957497 6.957497 13326 +nsplace 1 1 6.957497 6.957497 13327 +rexi 1 1 6.957497 6.957497 13328 +emptiv 1 1 6.957497 6.957497 13329 +gdraw 1 1 6.957497 6.957497 13330 +specular 1 1 6.957497 6.957497 13331 +sonar 1 1 6.957497 6.957497 13332 +xgcl 1 1 6.957497 6.957497 13333 +xakcl 1 1 6.957497 6.957497 13334 +anonlin 1 1 6.957497 6.957497 13335 +andmaintain 1 1 6.957497 6.957497 13336 +currentlyact 1 1 6.957497 6.957497 13337 +internetsoftwar 1 1 6.957497 6.957497 13338 +mappingroutin 1 1 6.957497 6.957497 13339 +potteri 1 1 6.957497 6.957497 13340 +vrmlto 1 1 6.957497 6.957497 13341 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ new file mode 100644 index 00000000..f502dd66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hqliu^ @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +depart 3 457 0.693147 2.079441 12 +research 2 431 0.693147 1.386294 10 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +tool 1 117 2.079442 2.079442 93 +welcom 1 122 2.079442 2.079442 99 +world 3 115 2.197225 6.591675 126 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +book 2 99 2.302585 4.605170 131 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +internet 2 83 2.484907 4.969814 186 +member 1 84 2.484907 2.484907 165 +stuff 1 87 2.484907 2.484907 171 +ieee 1 86 2.484907 2.484907 190 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +java 2 70 2.708050 5.416100 248 +guid 1 63 2.772589 2.772589 267 +new 1 64 2.772589 2.772589 262 +virtual 1 62 2.772589 2.772589 285 +collect 1 65 2.772589 2.772589 268 +taylor 1 63 2.772589 2.772589 287 +unix 1 58 2.890372 2.890372 308 +directori 1 45 3.135494 3.135494 396 +societi 1 40 3.258097 3.258097 456 +expect 1 37 3.332205 3.332205 484 +manual 1 35 3.401197 3.401197 504 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +bookmark 1 26 3.688879 3.688879 639 +yahoo 1 24 3.761200 3.761200 707 +vlsi 1 21 3.912023 3.912023 795 +martin 1 21 3.912023 3.912023 794 +nice 1 20 3.951244 3.951244 809 +citi 1 19 4.007333 4.007333 874 +career 2 12 4.465908 8.931816 1287 +entertain 1 12 4.465908 4.465908 1286 +tour 1 11 4.553877 4.553877 1307 +perl 1 11 4.553877 4.553877 1332 +mosaic 1 10 4.653960 4.653960 1426 +wong 1 9 4.753590 4.753590 1609 +infoseek 1 6 5.164786 5.164786 2188 +giant 1 3 5.857933 5.857933 3137 +huiqun 2 2 6.263398 12.526796 4200 +rosett 2 2 6.263398 12.526796 5595 +hqliu 2 2 6.263398 12.526796 4199 +sunris 1 2 6.263398 6.263398 5212 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ new file mode 100644 index 00000000..e2f041d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hudson^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 7 168 1.791759 12.542313 63 +avail 3 169 1.791759 5.375277 48 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +postscript 1 131 2.079442 2.079442 90 +librari 1 87 2.484907 2.484907 181 +onlin 2 75 2.639057 5.278114 223 +knowledg 1 67 2.708050 2.708050 243 +main 1 67 2.708050 2.708050 256 +colleg 1 61 2.833213 2.833213 300 +thesi 1 57 2.890372 2.890372 327 +reason 1 57 2.890372 2.890372 318 +advisor 1 51 2.995732 2.995732 355 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +represent 1 35 3.401197 3.401197 512 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +rule 1 26 3.688879 3.688879 638 +action 1 15 4.248495 4.248495 1038 +english 1 15 4.248495 4.248495 1033 +draft 1 14 4.317488 4.317488 1085 +philosophi 1 13 4.382027 4.382027 1167 +usavoic 1 13 4.382027 4.382027 1198 +vladimir 1 11 4.553877 4.553877 1324 +mepost 1 10 4.653960 4.653960 1472 +tempor 1 9 4.753590 4.753590 1584 +colloquium 1 8 4.875197 4.875197 1734 +sciencesat 1 7 5.010635 5.010635 1968 +infer 1 6 5.164786 5.164786 2040 +causal 1 6 5.164786 5.164786 2024 +lifschitz 1 5 5.347108 5.347108 2542 +nonmonoton 1 4 5.568345 5.568345 3023 +interestscommonsens 1 2 6.263398 6.263398 5596 +actionlog 1 2 6.263398 6.263398 5597 +reasoningmi 1 2 6.263398 6.263398 5598 +hudson 2 1 6.957497 13.914994 13342 +pagehudson 1 1 6.957497 6.957497 13343 +turnerphd 1 1 6.957497 6.957497 13344 +ofcommonsens 1 1 6.957497 6.957497 13345 +msc 1 1 6.957497 6.957497 13346 +mli 1 1 6.957497 6.957497 13347 +linkseuropean 1 1 6.957497 6.957497 13348 +spatialand 1 1 6.957497 6.957497 13349 +reasoningto 1 1 6.957497 6.957497 13350 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ new file mode 100644 index 00000000..bf8dc9da --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^hyanbin^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +comput 4 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +time 3 293 1.098612 3.295836 17 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 2 215 1.386294 2.772588 31 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +modifi 1 178 1.609438 1.609438 35 +austin 5 168 1.791759 8.958795 63 +texa 2 160 1.791759 3.583518 64 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +welcom 2 122 2.079442 4.158884 99 +spring 2 131 2.079442 4.158884 88 +postscript 1 131 2.079442 2.079442 90 +number 1 130 2.079442 2.079442 97 +world 2 115 2.197225 4.394450 126 +send 1 114 2.197225 2.197225 109 +part 1 98 2.302585 2.302585 129 +center 1 88 2.397895 2.397895 158 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +ieee 1 86 2.484907 2.484907 190 +help 1 83 2.484907 2.484907 175 +internet 1 83 2.484907 2.484907 186 +librari 1 87 2.484907 2.484907 181 +resum 1 79 2.564949 2.564949 217 +onlin 1 75 2.639057 2.639057 223 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +format 1 48 3.044522 3.044522 356 +compani 2 41 3.218876 6.437752 423 +littl 1 39 3.258097 3.258097 454 +travel 1 30 3.555348 3.555348 579 +full 2 28 3.610918 7.221836 615 +campu 1 27 3.637586 3.637586 623 +left 1 19 4.007333 4.007333 851 +beauti 1 18 4.060443 4.060443 912 +seek 1 17 4.110874 4.110874 954 +zhang 2 16 4.174387 8.348774 980 +career 1 12 4.465908 4.465908 1287 +lake 1 11 4.553877 4.553877 1373 +allen 1 5 5.347108 5.347108 2470 +blvd 1 4 5.568345 5.568345 3007 +yanbin 2 2 6.263398 12.526796 5599 +hyanbin 2 1 6.957497 13.914994 13351 +cutti 1 1 6.957497 6.957497 13352 +webmuseum 1 1 6.957497 6.957497 13353 +homeland 1 1 6.957497 6.957497 13354 +tarlor 1 1 6.957497 6.957497 13355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ new file mode 100644 index 00000000..08118a72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^isheldon^ @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +inform 4 412 0.693147 2.772588 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +utexa 3 189 1.609438 4.828314 44 +updat 1 191 1.609438 1.609438 41 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +construct 1 139 1.945910 1.945910 82 +tool 1 117 2.079442 2.079442 93 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +graphic 1 90 2.397895 2.397895 147 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +degre 1 69 2.708050 2.708050 259 +creat 1 63 2.772589 2.772589 277 +summer 1 56 2.890372 2.890372 311 +undergradu 1 54 2.944439 2.944439 338 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +http 1 41 3.218876 3.218876 420 +compani 1 41 3.218876 3.218876 423 +small 1 39 3.258097 3.258097 447 +framework 1 28 3.610918 3.610918 606 +trace 1 25 3.737670 3.737670 677 +geometri 1 22 3.850148 3.850148 752 +concentr 1 18 4.060443 4.060443 906 +modular 1 10 4.653960 4.653960 1392 +univeristi 1 8 4.875197 4.875197 1754 +mass 1 8 4.875197 4.875197 1732 +solid 1 5 5.347108 5.347108 2255 +babi 1 5 5.347108 5.347108 2493 +isaac 3 3 5.857933 17.573799 3855 +coursework 1 3 5.857933 5.857933 3588 +sheldon 3 2 6.263398 18.790194 5226 +reciev 1 2 6.263398 6.263398 5600 +lowel 1 2 6.263398 6.263398 5224 +isheldon 3 1 6.957497 20.872491 13356 +reciv 1 1 6.957497 6.957497 13357 +unives 1 1 6.957497 6.957497 13358 +schlaeor 1 1 6.957497 6.957497 13359 +mellor 1 1 6.957497 6.957497 13360 +bsptree 1 1 6.957497 6.957497 13361 +butt 1 1 6.957497 6.957497 13362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ new file mode 100644 index 00000000..64f3f956 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jadair^ @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +book 1 99 2.302585 2.302585 131 +homepag 1 93 2.397895 2.397895 148 +internet 1 83 2.484907 2.484907 186 +taylor 2 63 2.772589 5.545178 287 +back 2 60 2.833213 5.666426 297 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +live 2 40 3.258097 6.516194 451 +jame 1 35 3.401197 3.401197 507 +john 2 33 3.433987 6.867974 532 +steve 1 29 3.583519 3.583519 594 +consult 1 24 3.761200 3.761200 687 +doctor 1 24 3.761200 3.761200 709 +alumni 1 21 3.912023 3.912023 807 +white 1 17 4.110874 4.110874 951 +carl 1 15 4.248495 4.248495 1024 +wife 1 13 4.382027 4.382027 1196 +rice 1 11 4.553877 4.553877 1336 +evan 1 8 4.875197 4.875197 1633 +matthew 1 6 5.164786 5.164786 2193 +fish 1 6 5.164786 5.164786 2207 +holli 1 2 6.263398 6.263398 5601 +dejanew 1 2 6.263398 6.263398 5602 +adair 2 1 6.957497 13.914994 13363 +crinkum 1 1 6.957497 6.957497 13364 +crankum 1 1 6.957497 6.957497 13365 +compound 1 1 6.957497 6.957497 13366 +eileen 1 1 6.957497 6.957497 13367 +mengerink 1 1 6.957497 6.957497 13368 +fanat 1 1 6.957497 6.957497 13369 +traylen 1 1 6.957497 6.957497 13370 +jadair 1 1 6.957497 6.957497 13371 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ new file mode 100644 index 00000000..e91ca247 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbc^ @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +oper 2 180 1.609438 3.218876 34 +paper 1 205 1.609438 1.609438 38 +texa 2 160 1.791759 3.583518 64 +austin 1 168 1.791759 1.791759 63 +physic 1 47 3.091042 3.091042 377 +vita 1 38 3.295837 3.295837 473 +john 1 33 3.433987 3.433987 532 +chamber 1 8 4.875197 4.875197 1692 +yale 1 6 5.164786 5.164786 2003 +pagejohn 1 2 6.263398 6.263398 5603 +universityph 1 2 6.263398 6.263398 5604 +chamberssenior 1 1 6.957497 6.957497 13372 +specialistb 1 1 6.957497 6.957497 13373 +paso 1 1 6.957497 6.957497 13374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ new file mode 100644 index 00000000..a050f0b6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jbednar^ @@ -0,0 +1,165 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 5 571 0.000000 0.000000 5 +scienc 4 640 0.000000 0.000000 4 +research 7 431 0.693147 4.852029 10 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +also 2 259 1.386294 2.772588 28 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +austin 9 168 1.791759 16.125831 63 +texa 8 160 1.791759 14.334072 64 +avail 2 169 1.791759 3.583518 48 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +model 5 145 1.945910 9.729550 69 +process 4 142 1.945910 7.783640 72 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +postscript 2 131 2.079442 4.158884 90 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +make 2 111 2.197225 4.394450 120 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +advanc 2 99 2.302585 4.605170 130 +call 2 91 2.397895 4.795790 153 +level 1 87 2.484907 2.484907 180 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +complet 1 77 2.564949 2.564949 208 +orient 1 80 2.564949 2.564949 205 +resum 1 79 2.564949 2.564949 217 +line 1 75 2.639057 2.639057 231 +appli 1 71 2.639057 2.639057 226 +simul 2 66 2.708050 5.416100 255 +goal 1 66 2.708050 2.708050 250 +test 1 66 2.708050 2.708050 252 +receiv 1 66 2.708050 2.708050 244 +differ 1 66 2.708050 2.708050 253 +organ 4 65 2.772589 11.090356 265 +result 4 65 2.772589 11.090356 281 +interact 3 62 2.772589 8.317767 270 +function 2 62 2.772589 5.545178 275 +dept 1 64 2.772589 2.772589 291 +artifici 1 63 2.772589 2.772589 280 +abstract 1 62 2.772589 2.772589 276 +juli 1 60 2.833213 2.833213 305 +thesi 3 57 2.890372 8.671116 327 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +extens 1 53 2.944439 2.944439 340 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +finger 1 52 2.995732 2.995732 354 +visual 7 48 3.044522 21.311654 372 +principl 1 48 3.044522 3.044522 357 +format 1 48 3.044522 3.044522 356 +understand 1 47 3.091042 3.091042 384 +effect 1 46 3.091042 3.091042 385 +long 1 43 3.178054 3.178054 413 +offer 1 43 3.178054 3.178054 414 +past 1 42 3.218876 3.218876 428 +vision 1 41 3.218876 3.218876 430 +might 1 41 3.218876 3.218876 426 +examin 1 42 3.218876 3.218876 424 +probabl 1 40 3.258097 3.258097 455 +electr 1 38 3.295837 3.295837 461 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +human 2 32 3.465736 6.931472 546 +neural 1 30 3.555348 3.555348 578 +power 1 30 3.555348 3.555348 573 +domain 1 30 3.555348 3.555348 564 +propos 2 28 3.610918 7.221836 602 +measur 1 28 3.610918 3.610918 609 +mind 1 27 3.637586 3.637586 632 +enabl 1 26 3.688879 3.688879 655 +rather 1 26 3.688879 3.688879 642 +primari 3 25 3.737670 11.213010 669 +demonstr 1 24 3.761200 3.761200 694 +doctor 1 24 3.761200 3.761200 709 +input 1 23 3.806662 3.806662 727 +begin 1 23 3.806662 3.806662 716 +self 4 22 3.850148 15.400592 761 +serv 1 22 3.850148 3.850148 758 +thu 1 21 3.912023 3.912023 773 +similar 1 21 3.912023 3.912023 771 +basi 1 20 3.951244 3.951244 828 +predict 1 19 4.007333 4.007333 855 +log 1 19 4.007333 4.007333 857 +concentr 1 18 4.060443 4.060443 906 +failur 1 18 4.060443 4.060443 898 +figur 1 18 4.060443 4.060443 903 +seek 1 17 4.110874 4.110874 954 +thought 1 17 4.110874 4.110874 945 +explan 1 16 4.174387 4.174387 985 +spatial 1 16 4.174387 4.174387 988 +later 3 15 4.248495 12.745485 1043 +drive 1 15 4.248495 4.248495 1052 +ascii 1 15 4.248495 4.248495 1032 +command 1 14 4.317488 4.317488 1083 +philosophi 1 13 4.382027 4.382027 1167 +necessari 1 13 4.382027 4.382027 1147 +carri 1 13 4.382027 4.382027 1152 +incorpor 1 13 4.382027 4.382027 1163 +overal 1 12 4.465908 4.465908 1254 +qualit 1 11 4.553877 4.553877 1362 +equal 1 10 4.653960 4.653960 1424 +candid 1 9 4.753590 4.753590 1606 +preliminari 1 9 4.753590 4.753590 1480 +brain 3 8 4.875197 14.625591 1638 +realist 2 8 4.875197 9.750394 1665 +empir 1 8 4.875197 4.875197 1722 +miikkulainen 1 8 4.875197 4.875197 1667 +quantit 1 8 4.875197 4.875197 1654 +centuri 1 7 5.010635 5.010635 1935 +appar 1 7 5.010635 5.010635 1958 +aris 1 7 5.010635 5.010635 1924 +biolog 1 6 5.164786 5.164786 2147 +illus 4 4 5.568345 22.273380 2603 +insight 1 4 5.568345 5.568345 3024 +outdat 1 4 5.568345 5.568345 2797 +cortex 3 3 5.857933 17.573799 3856 +dramat 2 3 5.857933 11.715866 3239 +useth 1 3 5.857933 5.857933 3110 +cortic 1 3 5.857933 5.857933 3857 +neuron 1 3 5.857933 5.857933 3798 +frequenc 1 3 5.857933 5.857933 3206 +lissom 3 2 6.263398 18.790194 5605 +jbednar 2 2 6.263398 12.526796 4284 +bednar 1 2 6.263398 6.263398 4283 +testabl 1 2 6.263398 6.263398 5606 +hypothes 1 2 6.263398 6.263398 5607 +nearli 1 2 6.263398 6.263398 5608 +sirosh 1 2 6.263398 6.263398 5609 +aftereffect 7 1 6.957497 48.702479 13375 +tilt 4 1 6.957497 27.829988 13376 +bednarjim 1 1 6.957497 6.957497 13377 +ofcognit 1 1 6.957497 6.957497 13378 +fewdecad 1 1 6.957497 6.957497 13379 +thehuman 1 1 6.957497 6.957497 13380 +beavail 1 1 6.957497 6.957497 13381 +becomingpract 1 1 6.957497 6.957497 13382 +refut 1 1 6.957497 6.957497 13383 +makecognit 1 1 6.957497 6.957497 13384 +purelyphilosoph 1 1 6.957497 6.957497 13385 +psychologist 1 1 6.957497 6.957497 13386 +inhibit 1 1 6.957497 6.957497 13387 +indirect 1 1 6.957497 6.957497 13388 +visualbehavior 1 1 6.957497 6.957497 13389 +departmentmi 1 1 6.957497 6.957497 13390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ new file mode 100644 index 00000000..56e3e8c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jfang^ @@ -0,0 +1,96 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +cours 2 273 1.098612 2.197224 15 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +network 1 168 1.791759 1.791759 61 +construct 2 139 1.945910 3.891820 82 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +file 1 132 1.945910 1.945910 70 +like 1 132 1.945910 1.945910 81 +document 2 121 2.079442 4.158884 89 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +number 1 130 2.079442 2.079442 97 +topic 2 114 2.197225 4.394450 110 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +homepag 3 93 2.397895 7.193685 148 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +librari 2 87 2.484907 4.969814 181 +info 2 85 2.484907 4.969814 176 +help 2 83 2.484907 4.969814 175 +academ 1 82 2.484907 2.484907 178 +stuff 1 87 2.484907 2.484907 171 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +resum 1 79 2.564949 2.564949 217 +sourc 1 77 2.564949 2.564949 201 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +html 2 75 2.639057 5.278114 235 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +test 3 66 2.708050 8.124150 252 +java 3 70 2.708050 8.124150 248 +visit 3 63 2.772589 8.317767 288 +guid 1 63 2.772589 2.772589 267 +special 1 56 2.890372 2.890372 320 +unix 1 58 2.890372 2.890372 308 +summer 1 56 2.890372 2.890372 311 +format 1 48 3.044522 3.044522 356 +visitor 1 49 3.044522 3.044522 371 +tutori 1 39 3.258097 3.258097 437 +domain 1 30 3.555348 3.555348 564 +packag 1 28 3.610918 3.610918 614 +linux 1 27 3.637586 3.637586 631 +utc 1 27 3.637586 3.637586 629 +administr 1 27 3.637586 3.637586 628 +jeff 1 25 3.737670 3.737670 673 +frame 1 24 3.761200 3.761200 684 +applet 1 20 3.951244 3.951244 827 +north 1 19 4.007333 4.007333 873 +excel 1 19 4.007333 4.007333 868 +debug 1 17 4.110874 4.110874 944 +transfer 1 16 4.174387 4.174387 967 +ascii 1 15 4.248495 4.248495 1032 +latex 1 14 4.317488 4.317488 1064 +pretti 1 13 4.382027 4.382027 1191 +america 1 11 4.553877 4.553877 1370 +catalog 1 10 4.653960 4.653960 1431 +novak 1 9 4.753590 4.753590 1521 +largest 1 7 5.010635 5.010635 1858 +privaci 1 6 5.164786 5.164786 2144 +ross 1 5 5.347108 5.347108 2243 +florida 1 5 5.347108 5.347108 2526 +automobil 1 3 5.857933 5.857933 3709 +ethernet 1 2 6.263398 6.263398 5171 +edmund 1 2 6.263398 6.263398 5213 +buyer 1 2 6.263398 6.263398 5210 +sceneri 1 2 6.263398 6.263398 5152 +ofjunfanghi 1 1 6.957497 6.957497 13391 +sysadm 1 1 6.957497 6.957497 13392 +unixish 1 1 6.957497 6.957497 13393 +kristina 1 1 6.957497 6.957497 13394 +jfang 1 1 6.957497 6.957497 13395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ new file mode 100644 index 00000000..a295ae93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jprior^ @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +start 1 83 2.484907 2.484907 173 +good 5 77 2.564949 12.824745 200 +resum 1 79 2.564949 2.564949 217 +knowledg 1 67 2.708050 2.708050 243 +long 1 43 3.178054 3.178054 413 +probabl 1 40 3.258097 3.258097 455 +john 2 33 3.433987 6.867974 532 +someth 1 31 3.496508 3.496508 554 +chip 1 21 3.912023 3.912023 770 +enough 1 15 4.248495 4.248495 1040 +beer 2 6 5.164786 10.329572 2216 +sleep 1 6 5.164786 5.164786 2211 +chew 1 3 5.857933 5.857933 3618 +dog 1 2 6.263398 6.263398 5089 +swallow 1 2 6.263398 6.263398 5025 +jprior 2 1 6.957497 13.914994 13396 +priorjohn 1 1 6.957497 6.957497 13397 +priormi 1 1 6.957497 6.957497 13398 +accumul 1 1 6.957497 6.957497 13399 +hurt 1 1 6.957497 6.957497 13400 +nacho 1 1 6.957497 6.957497 13401 +swisher 1 1 6.957497 6.957497 13402 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ new file mode 100644 index 00000000..c060a7ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jthomas^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 1 168 1.791759 1.791759 63 +technolog 1 131 2.079442 2.079442 102 +site 1 106 2.197225 2.197225 119 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +internet 1 83 2.484907 2.484907 186 +laboratori 1 63 2.772589 2.772589 292 +jeff 2 25 3.737670 7.475340 673 +thoma 3 18 4.060443 12.181329 901 +ultim 1 17 4.110874 4.110874 943 +keyword 1 11 4.553877 4.553877 1356 +trade 1 7 5.010635 5.010635 1815 +homepagejeff 1 1 6.957497 6.957497 13403 +homepagecontact 1 1 6.957497 6.957497 13404 +informationpublicationssoftwar 1 1 6.957497 6.957497 13405 +groupphoto 1 1 6.957497 6.957497 13406 +albumfavorit 1 1 6.957497 6.957497 13407 +sitesuniversityof 1 1 6.957497 6.957497 13408 +departmentappliedresearch 1 1 6.957497 6.957497 13409 +electricaland 1 1 6.957497 6.957497 13410 +departmentedsfinanci 1 1 6.957497 6.957497 13411 +fttc 1 1 6.957497 6.957497 13412 +utacademiccalendarsut 1 1 6.957497 6.957497 13413 +sportshook 1 1 6.957497 6.957497 13414 +longhorn 1 1 6.957497 6.957497 13415 +utfootbal 1 1 6.957497 6.957497 13416 +scheduleaustintexa 1 1 6.957497 6.957497 13417 +jthoma 1 1 6.957497 6.957497 13418 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ new file mode 100644 index 00000000..82a9eaa5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^jyluo^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +univers 5 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +student 2 343 1.098612 2.197224 19 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +octob 1 89 2.397895 2.397895 156 +stuff 1 87 2.484907 2.484907 171 +journal 1 83 2.484907 2.484907 183 +collect 1 65 2.772589 2.772589 268 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +china 2 37 3.332205 6.664410 487 +weather 1 28 3.610918 3.610918 618 +todai 1 25 3.737670 3.737670 672 +highli 1 23 3.806662 3.806662 725 +alumni 1 21 3.912023 3.912023 807 +beij 1 19 4.007333 4.007333 876 +lot 1 18 4.060443 4.060443 889 +excit 1 11 4.553877 4.553877 1329 +perl 1 11 4.553877 4.553877 1332 +ataustin 1 9 4.753590 4.753590 1610 +pagecomput 1 7 5.010635 5.010635 1900 +peke 3 5 5.347108 16.041324 2539 +appreci 1 5 5.347108 5.347108 2374 +meyour 1 3 5.857933 5.857933 3858 +homepagewelcom 1 2 6.263398 6.263398 4808 +novelschines 1 2 6.263398 6.263398 5610 +registrar 1 2 6.263398 6.263398 5611 +gradaut 1 2 6.263398 6.263398 5612 +studiesut 1 2 6.263398 6.263398 5613 +magzin 1 2 6.263398 6.263398 5614 +technicalreport 1 2 6.263398 6.263398 5615 +visitorsinc 1 2 6.263398 6.263398 5616 +jiani 2 1 6.957497 13.914994 13419 +indepart 1 1 6.957497 6.957497 13420 +ofpek 1 1 6.957497 6.957497 13421 +chinesechines 1 1 6.957497 6.957497 13422 +scenerychines 1 1 6.957497 6.957497 13423 +classicschines 1 1 6.957497 6.957497 13424 +magazineschines 1 1 6.957497 6.957497 13425 +newspapersus 1 1 6.957497 6.957497 13426 +libraryut 1 1 6.957497 6.957497 13427 +campusutaccessabout 1 1 6.957497 6.957497 13428 +citylimit 1 1 6.957497 6.957497 13429 +miscellaneousyahoojava 1 1 6.957497 6.957497 13430 +sunjavascript 1 1 6.957497 6.957497 13431 +netscapeth 1 1 6.957497 6.957497 13432 +associationcomput 1 1 6.957497 6.957497 13433 +webnetwork 1 1 6.957497 6.957497 13434 +libraryth 1 1 6.957497 6.957497 13435 +bibliographiesintern 1 1 6.957497 6.957497 13436 +jyluo 1 1 6.957497 6.957497 13437 +suggestionswould 1 1 6.957497 6.957497 13438 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ new file mode 100644 index 00000000..b2505da7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kedar^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +know 1 80 2.564949 2.564949 198 +logic 1 71 2.639057 2.639057 230 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +would 1 67 2.708050 2.708050 251 +street 1 63 2.772589 2.772589 293 +reason 1 57 2.890372 2.890372 318 +advisor 1 51 2.995732 2.995732 355 +concurr 2 34 3.401197 6.802394 501 +semant 1 29 3.583519 3.583519 587 +todai 1 25 3.737670 3.737670 672 +doctor 1 24 3.761200 3.761200 709 +thank 1 23 3.806662 3.806662 721 +indian 1 22 3.850148 3.850148 769 +wonder 1 20 3.951244 3.951244 815 +lot 1 18 4.060443 4.060443 889 +bachelor 1 17 4.110874 4.110874 957 +came 1 13 4.382027 4.382027 1197 +tempor 1 9 4.753590 4.753590 1584 +madra 2 8 4.875197 9.750394 1770 +allen 1 5 5.347108 5.347108 2470 +emerson 1 5 5.347108 5.347108 2547 +mehi 1 2 6.263398 6.263398 5549 +kedar 1 1 6.957497 6.957497 13439 +namjoshiabout 1 1 6.957497 6.957497 13440 +distributedalgorithm 1 1 6.957497 6.957497 13441 +automatatheori 1 1 6.957497 6.957497 13442 +amul 1 1 6.957497 6.957497 13443 +adkedar 1 1 6.957497 6.957497 13444 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ new file mode 100644 index 00000000..356c0933 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kharker^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +utexa 3 189 1.609438 4.828314 44 +public 2 202 1.609438 3.218876 43 +updat 1 191 1.609438 1.609438 41 +texa 4 160 1.791759 7.167036 64 +austin 4 168 1.791759 7.167036 63 +hall 1 146 1.945910 1.945910 65 +provid 1 121 2.079442 2.079442 94 +world 1 115 2.197225 2.197225 126 +academ 1 82 2.484907 2.484907 178 +stuff 1 87 2.484907 2.484907 171 +wide 1 84 2.484907 2.484907 185 +resum 1 79 2.564949 2.564949 217 +servic 1 72 2.639057 2.639057 236 +view 2 70 2.708050 5.416100 254 +taylor 1 63 2.772589 2.772589 287 +faculti 1 56 2.890372 2.890372 325 +author 1 39 3.258097 3.258097 450 +respons 1 37 3.332205 3.332205 476 +staff 1 36 3.367296 3.367296 490 +board 1 33 3.433987 3.433987 528 +express 1 32 3.465736 3.465736 540 +linux 1 27 3.637586 3.637586 631 +facil 1 20 3.951244 3.951244 814 +reflect 1 15 4.248495 4.248495 1034 +kenneth 3 12 4.465908 13.397724 1265 +guest 1 12 4.465908 4.465908 1220 +opinion 2 8 4.875197 9.750394 1708 +babylon 1 8 4.875197 4.875197 1731 +necessarili 1 7 5.010635 5.010635 1899 +polit 1 6 5.164786 5.164786 2115 +regent 1 5 5.347108 5.347108 2551 +radio 1 4 5.568345 5.568345 3025 +sole 1 4 5.568345 5.568345 2592 +cyberspac 1 3 5.857933 5.857933 3719 +harker 3 1 6.957497 20.872491 13445 +kharker 2 1 6.957497 13.914994 13446 +amateur 1 1 6.957497 6.957497 13447 +rocketri 1 1 6.957497 6.957497 13448 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ new file mode 100644 index 00000000..774344c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kincaid^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +softwar 2 220 1.386294 2.772588 30 +develop 2 174 1.791759 3.583518 53 +algorithm 2 162 1.791759 3.583518 57 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +area 2 144 1.945910 3.891820 80 +perform 1 143 1.945910 1.945910 74 +analysi 2 124 2.079442 4.158884 98 +high 1 130 2.079442 2.079442 101 +world 3 115 2.197225 6.591675 126 +mathemat 3 108 2.197225 6.591675 123 +topic 1 114 2.197225 2.197225 110 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +larg 2 82 2.484907 4.969814 168 +solut 2 82 2.484907 4.969814 162 +second 1 81 2.484907 2.484907 166 +method 1 80 2.564949 2.564949 213 +david 2 71 2.639057 5.278114 232 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +degre 1 69 2.708050 2.708050 259 +organ 1 65 2.772589 2.772589 265 +sever 1 56 2.890372 2.890372 322 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +numer 4 49 3.044522 12.178088 369 +basic 1 50 3.044522 3.044522 360 +algebra 3 45 3.135494 9.406482 394 +anoth 1 45 3.135494 3.135494 408 +linear 5 41 3.218876 16.094380 431 +award 1 34 3.401197 3.401197 523 +committe 1 34 3.401197 3.401197 522 +focus 1 29 3.583519 3.583519 584 +session 1 26 3.688879 3.688879 643 +equat 2 23 3.806662 7.613324 724 +honor 1 23 3.806662 3.806662 729 +recognit 1 23 3.806662 3.806662 723 +variabl 1 23 3.806662 3.806662 715 +director 1 22 3.850148 3.850148 767 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +young 2 16 4.174387 8.348774 991 +spars 1 16 4.174387 4.174387 989 +senior 1 14 4.317488 4.317488 1120 +researchmi 1 14 4.317488 4.317488 1119 +polynomi 1 14 4.317488 4.317488 1069 +nasa 1 13 4.382027 4.382027 1188 +iter 1 12 4.465908 4.465908 1206 +matric 1 10 4.653960 4.653960 1399 +congress 2 9 4.753590 9.507180 1592 +jersei 1 9 4.753590 4.753590 1587 +creativ 1 8 4.875197 4.875197 1777 +pacif 1 8 4.875197 4.875197 1674 +grove 1 8 4.875197 4.875197 1675 +edg 1 8 4.875197 4.875197 1647 +aris 1 7 5.010635 5.010635 1924 +brook 1 6 5.164786 5.164786 2152 +river 1 6 5.164786 5.164786 2220 +imac 3 3 5.857933 17.573799 3718 +certif 1 3 5.857933 5.857933 3859 +interestmathemat 1 3 5.857933 5.857933 3860 +ellipt 1 3 5.857933 5.857933 3774 +atlanta 1 3 5.857933 5.857933 3778 +stationari 1 3 5.857933 5.857933 3861 +kincaid 5 2 6.263398 31.316990 5617 +subprogram 1 2 6.263398 6.263398 5618 +cole 1 2 6.263398 6.263398 4697 +itpack 1 2 6.263398 6.263398 5619 +rassia 1 2 6.263398 6.263398 5620 +lecturerassoci 1 1 6.957497 6.957497 13449 +lamar 1 1 6.957497 6.957497 13450 +technicalinnov 1 1 6.957497 6.957497 13451 +andappli 1 1 6.957497 6.957497 13452 +coeffici 1 1 6.957497 6.957497 13453 +publicationsw 1 1 6.957497 6.957497 13454 +chenei 1 1 6.957497 6.957497 13455 +hay 1 1 6.957497 6.957497 13456 +coput 1 1 6.957497 6.957497 13457 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ new file mode 100644 index 00000000..f2ec93a4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kistler^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +page 3 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +parallel 3 169 1.791759 5.375277 60 +texa 1 160 1.791759 1.791759 64 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +analysi 1 124 2.079442 2.079442 98 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +academ 2 82 2.484907 4.969814 178 +school 1 84 2.484907 2.484907 188 +master 1 76 2.564949 2.564949 216 +prof 2 64 2.772589 5.545178 273 +collect 1 65 2.772589 2.772589 268 +juli 1 60 2.833213 2.833213 305 +numer 1 49 3.044522 3.044522 369 +visitor 1 49 3.044522 3.044522 371 +york 2 41 3.218876 6.437752 435 +press 1 42 3.218876 3.218876 419 +linear 1 41 3.218876 3.218876 431 +random 1 34 3.401197 3.401197 511 +administr 1 27 3.637586 3.637586 628 +mike 2 24 3.761200 7.522400 703 +busi 1 21 3.912023 3.912023 784 +particularli 1 19 4.007333 4.007333 867 +commerci 1 16 4.174387 4.174387 1005 +alan 1 13 4.382027 4.382027 1146 +emploi 1 12 4.465908 4.465908 1284 +walk 1 12 4.465908 4.465908 1281 +thedepart 1 11 4.553877 4.553877 1350 +ataustin 1 9 4.753590 4.753590 1610 +interestsi 1 7 5.010635 5.010635 1969 +misra 1 7 5.010635 5.010635 1856 +jayadev 1 4 5.568345 5.568345 3006 +kistler 3 3 5.857933 17.573799 3267 +syracus 2 3 5.857933 11.715866 3553 +cline 1 3 5.857933 5.857933 3218 +coursesfal 1 2 6.263398 6.263398 5225 +theperson 1 1 6.957497 6.957497 13458 +productsdivis 1 1 6.957497 6.957497 13459 +backgroundba 1 1 6.957497 6.957497 13460 +susquehanna 1 1 6.957497 6.957497 13461 +selinsgrov 1 1 6.957497 6.957497 13462 +stern 1 1 6.957497 6.957497 13463 +businessnew 1 1 6.957497 6.957497 13464 +iwith 1 1 6.957497 6.957497 13465 +algebrawith 1 1 6.957497 6.957497 13466 +pflugervil 1 1 6.957497 6.957497 13467 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ new file mode 100644 index 00000000..36fafb63 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kornerup^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +time 1 293 1.098612 1.098612 17 +welcom 1 122 2.079442 2.079442 99 +sinc 1 90 2.397895 2.397895 159 +march 1 61 2.833213 2.833213 295 +jacob 2 4 5.568345 11.136690 2667 +kornerup 2 3 5.857933 11.715866 3215 +kornerupjacob 1 1 6.957497 6.957497 13468 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ new file mode 100644 index 00000000..068c34f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^kuipers^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +spring 2 131 2.079442 4.158884 88 +mathemat 2 108 2.197225 4.394450 123 +topic 1 114 2.197225 2.197225 110 +place 1 106 2.197225 2.197225 124 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +build 2 85 2.484907 4.969814 184 +intellig 2 72 2.639057 5.278114 225 +logic 1 71 2.639057 2.639057 230 +knowledg 5 67 2.708050 13.540250 243 +simul 2 66 2.708050 5.416100 255 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +colleg 1 61 2.833213 2.833213 300 +reason 2 57 2.890372 5.780744 318 +detail 1 57 2.890372 2.890372 321 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +describ 1 45 3.135494 3.135494 400 +press 1 42 3.218876 3.218876 419 +map 1 39 3.258097 3.258097 452 +robot 1 36 3.367296 3.367296 497 +represent 2 35 3.401197 6.802394 512 +limit 1 29 3.583519 3.583519 585 +strategi 1 25 3.737670 3.737670 682 +recognit 1 23 3.806662 3.806662 723 +emphasi 1 22 3.850148 3.850148 755 +expert 1 20 3.951244 3.951244 833 +agent 2 18 4.060443 8.120886 910 +spatial 1 16 4.174387 4.174387 988 +cognit 1 16 4.174387 4.174387 986 +cambridg 1 16 4.174387 4.174387 1008 +consider 1 14 4.317488 4.317488 1076 +benjamin 2 11 4.553877 9.107754 1296 +qualit 2 11 4.553877 9.107754 1362 +tour 1 11 4.553877 4.553877 1307 +incomplet 2 9 4.753590 9.507180 1575 +accomplish 1 8 4.875197 4.875197 1755 +centenni 1 7 5.010635 5.010635 1967 +distinct 1 5 5.347108 5.347108 2319 +commonsens 2 4 5.568345 11.136690 2998 +kuiper 3 3 5.857933 17.573799 3794 +qsim 1 3 5.857933 5.857933 3862 +swarthmor 1 2 6.263398 6.263398 5621 +thequalit 1 2 6.263398 6.263398 5622 +kuipersbenjamin 1 1 6.957497 6.957497 13469 +kuipersbruton 1 1 6.957497 6.957497 13470 +withparticular 1 1 6.957497 6.957497 13471 +grouphom 1 1 6.957497 6.957497 13472 +andavail 1 1 6.957497 6.957497 13473 +qualitativereason 1 1 6.957497 6.957497 13474 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ new file mode 100644 index 00000000..2b05aa12 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 2 220 1.386294 2.772588 29 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +utexa 2 189 1.609438 3.218876 44 +fall 1 181 1.609438 1.609438 40 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +network 2 168 1.791759 3.583518 61 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +postscript 2 131 2.079442 4.158884 90 +spring 1 131 2.079442 2.079442 88 +assist 2 112 2.197225 4.394450 113 +ieee 1 86 2.484907 2.484907 190 +internet 1 83 2.484907 2.484907 186 +taylor 1 63 2.772589 2.772589 287 +laboratori 1 63 2.772589 2.772589 292 +februari 1 54 2.944439 2.944439 328 +electron 1 47 3.091042 3.091042 379 +transact 1 39 3.258097 3.258097 438 +sciencesunivers 1 37 3.332205 3.332205 486 +photo 1 31 3.496508 3.496508 561 +profil 1 30 3.555348 3.555348 581 +turn 1 29 3.583519 3.583519 586 +campu 1 27 3.637586 3.637586 623 +administr 1 27 3.637586 3.637586 628 +american 1 27 3.637586 3.637586 634 +compress 2 23 3.806662 7.613324 719 +eduphon 2 15 4.248495 8.496990 1060 +front 1 13 4.382027 4.382027 1154 +tune 1 12 4.465908 4.465908 1227 +editori 1 9 4.753590 4.753590 1611 +simon 2 8 4.875197 9.750394 1697 +clip 1 7 5.010635 5.010635 1868 +sciencesdepart 1 6 5.164786 5.164786 2020 +carbon 1 3 5.857933 5.857933 3804 +cont 1 3 5.857933 5.857933 3171 +toss 1 2 6.263398 6.263398 5470 +kata 2 1 6.957497 13.914994 13475 +submissionnew 1 1 6.957497 6.957497 13476 +empt 1 1 6.957497 6.957497 13477 +statesman 1 1 6.957497 6.957497 13478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ new file mode 100644 index 00000000..7d211e2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lam^NRL^ @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 9 431 0.693147 6.238323 10 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +network 6 168 1.791759 10.750554 61 +austin 2 168 1.791759 3.583518 63 +texa 2 160 1.791759 3.583518 64 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +perform 2 143 1.945910 3.891820 74 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +commun 1 95 2.397895 2.397895 157 +activ 1 84 2.484907 2.484907 182 +chang 1 82 2.484907 2.484907 163 +novemb 1 81 2.484907 2.484907 179 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +laboratori 4 63 2.772589 11.090356 292 +foundat 2 62 2.772589 5.545178 286 +protocol 3 45 3.135494 9.406482 407 +secur 1 30 3.555348 3.555348 577 +fund 1 21 3.912023 3.912023 805 +entir 1 20 3.951244 3.951244 811 +verif 1 20 3.951244 3.951244 826 +supervis 1 20 3.951244 3.951244 840 +tune 1 12 4.465908 4.465908 1227 +cycl 1 11 4.553877 4.553877 1335 +underli 1 10 4.653960 4.653960 1410 +span 1 8 4.875197 4.875197 1751 +simon 1 8 4.875197 4.875197 1697 +lockhe 1 3 5.857933 5.857933 3863 +currentinterest 1 1 6.957497 6.957497 13479 +nsaunivers 1 1 6.957497 6.957497 13480 +videoservic 1 1 6.957497 6.957497 13481 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ new file mode 100644 index 00000000..58c7cfc5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^landrum^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +octob 1 89 2.397895 2.397895 156 +ofth 1 36 3.367296 3.367296 491 +robert 1 30 3.555348 3.555348 567 +edulast 1 17 4.110874 4.110874 927 +stori 1 14 4.317488 4.317488 1087 +convent 1 14 4.317488 4.317488 1072 +council 1 11 4.553877 4.553877 1364 +mountain 1 10 4.653960 4.653960 1456 +awai 1 10 4.653960 4.653960 1447 +christian 2 7 5.010635 10.021270 1949 +pageth 1 7 5.010635 5.010635 1939 +gordon 1 6 5.164786 5.164786 2032 +graham 1 4 5.568345 5.568345 2817 +republican 1 3 5.857933 5.857933 3815 +backbon 1 2 6.263398 6.263398 5623 +landrum 2 1 6.957497 13.914994 13482 +viruspictur 1 1 6.957497 6.957497 13483 +empirepch 1 1 6.957497 6.957497 13484 +retreattexa 1 1 6.957497 6.957497 13485 +rockrsumfamilyinterest 1 1 6.957497 6.957497 13486 +councillandrum 1 1 6.957497 6.957497 13487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ new file mode 100644 index 00000000..af67bc15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lavender^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +welcom 1 122 2.079442 2.079442 99 +assist 1 112 2.197225 2.197225 113 +activ 1 84 2.484907 2.484907 182 +improv 1 62 2.772589 2.772589 289 +electr 1 38 3.295837 3.295837 461 +greg 2 24 3.761200 7.522400 695 +recommend 1 22 3.850148 3.850148 737 +lavend 2 3 5.857933 11.715866 3217 +professordepart 1 2 6.263398 6.263398 5624 +biograph 1 2 6.263398 6.263398 5625 +austinr 1 1 6.957497 6.957497 13488 +lavenderadjunct 1 1 6.957497 6.957497 13489 +anddepart 1 1 6.957497 6.957497 13490 +engineeringth 1 1 6.957497 6.957497 13491 +informationsuggest 1 1 6.957497 6.957497 13492 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ new file mode 100644 index 00000000..e286a33e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^leekk^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +home 4 672 0.000000 0.000000 1 +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +welcom 2 122 2.079442 4.158884 99 +look 1 107 2.197225 2.197225 115 +structur 1 106 2.197225 2.197225 105 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +come 2 78 2.564949 5.129898 202 +master 1 76 2.564949 2.564949 216 +know 1 80 2.564949 2.564949 198 +nation 2 74 2.639057 5.278114 240 +degre 1 69 2.708050 2.708050 259 +taylor 1 63 2.772589 2.772589 287 +locat 1 59 2.833213 2.833213 303 +much 1 52 2.995732 2.995732 349 +small 1 39 3.258097 3.258097 447 +jame 2 35 3.401197 6.802394 507 +particip 1 29 3.583519 3.583519 589 +campu 1 27 3.637586 3.637586 623 +equat 1 23 3.806662 3.806662 724 +vlsi 1 21 3.912023 3.912023 795 +north 1 19 4.007333 4.007333 873 +bachelor 1 17 4.110874 4.110874 957 +normal 1 16 4.174387 4.174387 995 +atth 1 15 4.248495 4.248495 1019 +month 1 15 4.248495 4.248495 1025 +hong 1 14 4.317488 4.317488 1105 +wife 1 13 4.382027 4.382027 1196 +island 2 11 4.553877 9.107754 1345 +kong 1 9 4.753590 4.753590 1602 +sciencesat 1 7 5.010635 5.010635 1968 +smile 1 7 5.010635 5.010635 1807 +singapor 3 5 5.347108 16.041324 2487 +aliv 1 3 5.857933 5.857933 3864 +disc 1 2 6.263398 6.263398 5626 +tropic 1 2 6.263398 6.263398 5398 +aboutthi 1 2 6.263398 6.263398 5627 +addr 1 2 6.263398 6.263398 5628 +pageyeap 1 1 6.957497 6.957497 13493 +designalgorithm 1 1 6.957497 6.957497 13494 +communityi 1 1 6.957497 6.957497 13495 +lovesto 1 1 6.957497 6.957497 13496 +leekk 1 1 6.957497 6.957497 13497 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^less^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^less^ new file mode 100644 index 00000000..fa51de65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^less^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +softwar 4 220 1.386294 5.545176 30 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +parallel 1 169 1.791759 1.791759 60 +austin 1 168 1.791759 1.791759 63 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +high 1 130 2.079442 2.079442 101 +decemb 1 80 2.564949 2.564949 215 +main 1 67 2.708050 2.708050 256 +laboratori 3 63 2.772589 8.317767 292 +investig 1 51 2.995732 2.995732 353 +seminar 1 38 3.295837 3.295837 470 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +experiment 3 26 3.688879 11.066637 645 +wai 1 25 3.737670 3.737670 662 +less 4 18 4.060443 16.241772 892 +apart 1 7 5.010635 5.010635 1936 +distributedsystem 1 6 5.164786 5.164786 2022 +blumoferdb 1 5 5.347108 5.347108 2324 +oftexa 1 4 5.568345 5.568345 3003 +buildreli 1 1 6.957497 6.957497 13498 +projectsmemb 1 1 6.957497 6.957497 13499 +lablessss 1 1 6.957497 6.957497 13500 +seriessponsorslast 1 1 6.957497 6.957497 13501 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ new file mode 100644 index 00000000..aced5806 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lin^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +languag 3 227 1.386294 4.158882 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +parallel 5 169 1.791759 8.958795 60 +implement 2 152 1.791759 3.583518 52 +address 2 170 1.791759 3.583518 62 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +intern 2 108 2.197225 4.394450 128 +mathemat 1 108 2.197225 2.197225 123 +memori 1 101 2.302585 2.302585 139 +proceed 2 93 2.397895 4.795790 152 +select 1 91 2.397895 2.397895 154 +thing 1 84 2.484907 2.484907 189 +journal 1 83 2.484907 2.484907 183 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +import 1 65 2.772589 2.772589 282 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +simpl 1 60 2.833213 2.833213 298 +share 1 59 2.833213 2.833213 304 +scientif 1 53 2.944439 2.944439 341 +postal 1 30 3.555348 3.555348 580 +multiprocessor 1 28 3.610918 3.610918 605 +arrai 1 27 3.637586 3.637586 627 +supercomput 1 25 3.737670 3.737670 681 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +portabl 2 20 3.951244 7.902488 819 +comparison 1 19 4.007333 4.007333 863 +novel 1 15 4.248495 4.248495 1039 +conf 1 13 4.382027 4.382027 1181 +decomposit 1 10 4.653960 4.653960 1439 +calvin 1 9 4.753590 4.753590 1518 +austinaustin 1 7 5.010635 5.010635 1966 +banerje 1 6 5.164786 5.164786 2018 +snyder 4 5 5.347108 21.388432 2359 +explicitli 1 5 5.347108 5.347108 2308 +parallelprogram 1 5 5.347108 5.347108 2379 +publicationsth 1 4 5.568345 5.568345 2859 +polymorph 1 4 5.568345 5.568345 2627 +pete 1 3 5.857933 5.857933 3865 +accommod 1 3 5.857933 5.857933 3337 +parallelprocess 1 3 5.857933 5.857933 3626 +performanceanalysi 1 2 6.263398 6.263398 5629 +padua 1 2 6.263398 6.263398 4544 +sciencesth 2 1 6.957497 13.914994 13502 +lincalvin 1 1 6.957497 6.957497 13503 +linassist 1 1 6.957497 6.957497 13504 +iswhat 1 1 6.957497 6.957497 13505 +_study_ 1 1 6.957497 6.957497 13506 +_play_ 1 1 6.957497 6.957497 13507 +carrilresearch 1 1 6.957497 6.957497 13508 +interestscompil 1 1 6.957497 6.957497 13509 +biologyalgorithm 1 1 6.957497 6.957497 13510 +dikaiako 1 1 6.957497 6.957497 13511 +manoussaki 1 1 6.957497 6.957497 13512 +woodward 1 1 6.957497 6.957497 13513 +internationalparallel 1 1 6.957497 6.957497 13514 +sublanguag 1 1 6.957497 6.957497 13515 +compilersfor 1 1 6.957497 6.957497 13516 +gelernt 1 1 6.957497 6.957497 13517 +nicolau 1 1 6.957497 6.957497 13518 +withl 1 1 6.957497 6.957497 13519 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ new file mode 100644 index 00000000..e90ee923 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^liugt^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +current 2 284 1.098612 2.197224 21 +time 2 293 1.098612 2.197224 17 +last 2 314 1.098612 2.197224 14 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +construct 1 139 1.945910 1.945910 82 +professor 1 137 1.945910 1.945910 76 +perform 1 143 1.945910 1.945910 74 +welcom 1 122 2.079442 2.079442 99 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +real 2 93 2.397895 4.795790 144 +comment 1 93 2.397895 2.397895 146 +member 1 84 2.484907 2.484907 165 +good 1 77 2.564949 2.564949 200 +knowledg 1 67 2.708050 2.708050 243 +view 1 70 2.708050 2.708050 254 +result 1 65 2.772589 2.772589 281 +content 1 59 2.833213 2.833213 302 +summer 1 56 2.890372 2.890372 311 +mine 1 26 3.688879 3.688879 654 +theunivers 1 21 3.912023 3.912023 797 +permit 1 16 4.174387 4.174387 962 +replic 1 12 4.465908 4.465908 1231 +incomplet 1 9 4.753590 4.753590 1575 +researchi 1 8 4.875197 4.875197 1756 +apolog 1 6 5.164786 5.164786 2046 +guangtian 2 3 5.857933 11.715866 3810 +inconveni 1 3 5.857933 5.857933 3866 +internship 1 3 5.857933 5.857933 3764 +liugt 2 1 6.957497 13.914994 13520 +homepagehi 1 1 6.957497 6.957497 13521 +timeschedul 1 1 6.957497 6.957497 13522 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ new file mode 100644 index 00000000..f017f636 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lorenzo^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +cornel 2 215 1.386294 2.772588 23 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +fall 2 181 1.609438 3.218876 40 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +distribut 3 162 1.791759 5.375277 51 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +taylor 1 63 2.772589 2.772589 287 +locat 1 59 2.833213 2.833213 303 +special 1 56 2.890372 2.890372 320 +physic 1 47 3.091042 3.091042 377 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +photo 1 31 3.496508 3.496508 561 +emphasi 1 22 3.850148 3.850148 755 +itali 1 11 4.553877 4.553877 1378 +interestsi 1 7 5.010635 5.010635 1969 +lorenzo 5 4 5.568345 27.841725 2588 +sytem 1 4 5.568345 5.568345 3015 +maria 1 4 5.568345 5.568345 2954 +alvisi 3 3 5.857933 17.573799 3095 +universit 1 2 6.263398 6.263398 5630 +bologna 1 2 6.263398 6.263398 5631 +laurea 1 1 6.957497 6.957497 13523 +agrav 1 1 6.957497 6.957497 13524 +taylorhal 1 1 6.957497 6.957497 13525 +campusshow 1 1 6.957497 6.957497 13526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ new file mode 100644 index 00000000..a0c98f3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^luxue^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +current 3 284 1.098612 3.295836 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +link 2 247 1.386294 2.772588 24 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +site 1 106 2.197225 2.197225 119 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +internet 1 83 2.484907 2.484907 186 +ieee 1 86 2.484907 2.484907 190 +come 1 78 2.564949 2.564949 202 +onlin 1 75 2.639057 2.639057 223 +dept 1 64 2.772589 2.772589 291 +undergradu 1 54 2.944439 2.944439 338 +cool 1 49 3.044522 3.044522 374 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +live 1 40 3.258097 3.258097 451 +microsoft 2 38 3.295837 6.591674 468 +china 1 37 3.332205 3.332205 487 +chines 1 29 3.583519 3.583519 595 +weather 1 28 3.610918 3.610918 618 +univ 1 28 3.610918 3.610918 617 +campu 1 27 3.637586 3.637586 623 +thank 1 23 3.806662 3.806662 721 +corpor 1 21 3.912023 3.912023 802 +sigmod 1 19 4.007333 4.007333 877 +tsinghua 1 13 4.382027 4.382027 1195 +shop 1 10 4.653960 4.653960 1469 +siggraph 1 8 4.875197 4.875197 1773 +dictionari 1 8 4.875197 4.875197 1642 +hunt 1 7 5.010635 5.010635 1798 +sigcomm 1 5 5.347108 5.347108 2329 +sigir 1 2 6.263398 6.263398 4873 +addr 1 2 6.263398 6.263398 5628 +luxu 2 1 6.957497 13.914994 13527 +networksoth 1 1 6.957497 6.957497 13528 +studyut 1 1 6.957497 6.957497 13529 +universityaustin 1 1 6.957497 6.957497 13530 +siglink 1 1 6.957497 6.957497 13531 +sigmm 1 1 6.957497 6.957497 13532 +newsjob 1 1 6.957497 6.957497 13533 +forcast 1 1 6.957497 6.957497 13534 +xuelu 1 1 6.957497 6.957497 13535 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ new file mode 100644 index 00000000..96141337 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^lwerth^ @@ -0,0 +1,90 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 7 297 1.098612 7.690284 20 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +softwar 9 220 1.386294 12.476646 30 +link 2 247 1.386294 2.772588 24 +fall 2 181 1.609438 3.218876 40 +class 2 199 1.609438 3.218876 37 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +develop 2 174 1.791759 3.583518 53 +hour 1 165 1.791759 1.791759 46 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +texa 1 160 1.791759 1.791759 64 +object 2 138 1.945910 3.891820 79 +area 1 144 1.945910 1.945910 80 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +process 1 142 1.945910 1.945910 72 +tool 2 117 2.079442 4.158884 93 +confer 1 126 2.079442 2.079442 100 +technic 1 100 2.302585 2.302585 140 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +educ 4 86 2.484907 9.939628 191 +ieee 2 86 2.484907 4.969814 190 +journal 1 83 2.484907 2.484907 183 +orient 2 80 2.564949 5.129898 205 +issu 1 78 2.564949 2.564949 211 +interfac 1 79 2.564949 2.564949 209 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +august 1 66 2.708050 2.708050 257 +taylor 1 63 2.772589 2.772589 287 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +direct 2 57 2.890372 5.780744 316 +semest 1 58 2.890372 2.890372 312 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +committe 2 34 3.401197 6.802394 522 +john 1 33 3.433987 3.433987 532 +human 1 32 3.465736 3.465736 546 +chair 3 29 3.583519 10.750557 596 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +qualiti 1 20 3.951244 3.951244 832 +macintosh 1 17 4.110874 4.110874 920 +cognit 1 16 4.174387 4.174387 986 +researchmi 1 14 4.317488 4.317488 1119 +strength 1 9 4.753590 4.753590 1494 +metric 1 7 5.010635 5.010635 1831 +educurr 1 5 5.347108 5.347108 2504 +werth 6 4 5.568345 33.410070 3004 +engineeringc 1 4 5.568345 5.568345 2904 +contemporari 1 4 5.568345 5.568345 2719 +presentarea 1 4 5.568345 5.568345 3026 +andsoftwar 1 4 5.568345 5.568345 2753 +assur 1 4 5.568345 5.568345 2722 +ics 1 4 5.568345 5.568345 2779 +lauri 1 3 5.857933 5.857933 3867 +honour 2 2 6.263398 12.526796 5632 +werthlauri 1 1 6.957497 6.957497 13536 +werthlectur 1 1 6.957497 6.957497 13537 +lwerth 1 1 6.957497 6.957497 13538 +scienceprofession 1 1 6.957497 6.957497 13539 +servicevic 1 1 6.957497 6.957497 13540 +presentco 1 1 6.957497 6.957497 13541 +interestsoftwar 1 1 6.957497 6.957497 13542 +andenviron 1 1 6.957497 6.957497 13543 +publicationsl 1 1 6.957497 6.957497 13544 +tomayko 1 1 6.957497 6.957497 13545 +pagefaculti 1 1 6.957497 6.957497 13546 +profilesc 1 1 6.957497 6.957497 13547 +classeslast 1 1 6.957497 6.957497 13548 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ new file mode 100644 index 00000000..46563508 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^madhukar^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +offic 2 299 1.098612 2.197224 13 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +austin 3 168 1.791759 5.375277 63 +texa 3 160 1.791759 5.375277 64 +hall 1 146 1.945910 1.945910 65 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +info 1 85 2.484907 2.484907 176 +taylor 1 63 2.772589 2.772589 287 +dept 1 64 2.772589 2.772589 291 +complex 1 64 2.772589 2.772589 269 +interact 1 62 2.772589 2.772589 270 +author 1 39 3.258097 3.258097 450 +india 1 32 3.465736 3.465736 550 +univ 1 28 3.610918 3.610918 617 +comp 1 26 3.688879 3.688879 650 +reach 1 24 3.761200 3.761200 688 +offici 2 18 4.060443 8.120886 894 +avenu 1 12 4.465908 4.465908 1277 +madra 1 8 4.875197 4.875197 1770 +colloquium 1 8 4.875197 4.875197 1734 +cricket 1 7 5.010635 5.010635 1945 +oncomput 1 5 5.347108 5.347108 2326 +reddi 2 3 5.857933 11.715866 3277 +worldwid 1 3 5.857933 5.857933 3704 +madhukar 5 2 6.263398 31.316990 5633 +espnet 1 2 6.263398 6.263398 5634 +korupoluwelcom 1 1 6.957497 6.957497 13549 +ahom 1 1 6.957497 6.957497 13550 +madrashomepag 1 1 6.957497 6.957497 13551 +ganga 1 1 6.957497 6.957497 13552 +alumniclass 1 1 6.957497 6.957497 13553 +utalgorithm 1 1 6.957497 6.957497 13554 +sportszon 1 1 6.957497 6.957497 13555 +batchu 1 1 6.957497 6.957497 13556 +korupoluemail 1 1 6.957497 6.957497 13557 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ new file mode 100644 index 00000000..2cf07e76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mallory^ @@ -0,0 +1,24 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +taylor 1 63 2.772589 2.772589 287 +simpl 1 60 2.833213 2.833213 298 +natur 1 44 3.135494 3.135494 406 +richard 1 31 3.496508 3.496508 559 +produc 1 30 3.555348 3.555348 572 +explan 1 16 4.174387 4.174387 985 +mallori 1 2 6.263398 6.263398 5635 +malloryrichard 1 1 6.957497 6.957497 13558 +malloryresearchthesi 1 1 6.957497 6.957497 13559 +quasi 1 1 6.957497 6.957497 13560 +qsimsimul 1 1 6.957497 6.957497 13561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ new file mode 100644 index 00000000..0af02ad3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^marco^ @@ -0,0 +1,121 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 9 443 0.693147 6.238323 6 +inform 3 412 0.693147 2.079441 8 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +time 2 293 1.098612 2.197224 17 +offic 1 299 1.098612 1.098612 13 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +texa 2 160 1.791759 3.583518 64 +network 2 168 1.791759 3.583518 61 +distribut 2 162 1.791759 3.583518 51 +austin 2 168 1.791759 3.583518 63 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +confer 1 126 2.079442 2.079442 100 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +memori 1 101 2.302585 2.302585 139 +proceed 3 93 2.397895 7.193685 152 +real 1 93 2.397895 2.397895 144 +octob 1 89 2.397895 2.397895 156 +academ 1 82 2.484907 2.484907 178 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +internet 1 83 2.484907 2.484907 186 +state 5 76 2.564949 12.824745 207 +appear 2 78 2.564949 5.129898 210 +workshop 2 71 2.639057 5.278114 239 +symposium 1 72 2.639057 2.639057 238 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +publish 1 57 2.890372 2.890372 326 +talk 1 53 2.944439 2.944439 336 +particular 1 51 2.995732 2.995732 352 +principl 1 48 3.044522 3.044522 357 +protocol 1 45 3.135494 3.135494 407 +execut 1 45 3.135494 3.135494 404 +third 2 43 3.178054 6.356108 412 +author 6 39 3.258097 19.548582 450 +annual 1 40 3.258097 3.258097 458 +submit 1 39 3.258097 3.258097 440 +correct 1 38 3.295837 3.295837 462 +vita 1 38 3.295837 3.295837 473 +respons 2 37 3.332205 6.664410 476 +tree 2 36 3.367296 6.734592 492 +survei 1 35 3.401197 3.401197 513 +toler 1 33 3.433987 3.433987 533 +fault 2 32 3.465736 6.931472 547 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +postal 1 30 3.555348 3.555348 580 +consid 1 29 3.583519 3.583519 590 +intend 1 28 3.610918 3.610918 599 +toward 1 25 3.737670 3.737670 668 +flow 5 24 3.761200 18.806000 700 +initi 1 23 3.806662 3.806662 717 +decis 1 23 3.806662 3.806662 728 +self 6 22 3.850148 23.100888 761 +identifi 1 22 3.850148 3.850148 760 +rout 4 21 3.912023 15.648092 793 +prepar 3 20 3.951244 11.853732 824 +finit 1 14 4.317488 4.317488 1106 +step 2 13 4.382027 8.764054 1138 +earlier 1 13 4.382027 4.382027 1140 +joint 1 13 4.382027 4.382027 1130 +stai 1 12 4.465908 4.465908 1215 +label 1 10 4.653960 4.653960 1423 +guarante 1 10 4.653960 4.653960 1391 +invit 1 10 4.653960 4.653960 1428 +minimum 2 9 4.753590 9.507180 1555 +candid 1 9 4.753590 4.753590 1606 +occur 1 9 4.753590 4.753590 1572 +said 1 9 4.753590 4.753590 1571 +depth 1 8 4.875197 4.875197 1636 +span 1 8 4.875197 4.875197 1751 +converg 1 7 5.010635 5.010635 1844 +kluwer 1 6 5.164786 5.164786 2143 +stabil 9 5 5.347108 48.123972 2286 +gouda 6 4 5.568345 33.410070 3021 +marco 2 4 5.568345 11.136690 2589 +maximum 2 4 5.568345 11.136690 2632 +implicit 1 4 5.568345 5.568345 2830 +arora 1 4 5.568345 5.568345 2658 +moham 6 3 5.857933 35.147598 3848 +fifteenth 1 3 5.857933 5.857933 3868 +forev 1 2 6.263398 6.263398 5636 +legitim 3 1 6.957497 20.872491 13562 +illegitim 3 1 6.957497 20.872491 13563 +schneidermarco 1 1 6.957497 6.957497 13564 +schneiderph 1 1 6.957497 6.957497 13565 +austinresearchth 1 1 6.957497 6.957497 13566 +itsstat 1 1 6.957497 6.957497 13567 +whenregardless 1 1 6.957497 6.957497 13568 +systemwhich 1 1 6.957497 6.957497 13569 +tolerantr 1 1 6.957497 6.957497 13570 +anish 1 1 6.957497 6.957497 13571 +silent 1 1 6.957497 6.957497 13572 +shlomi 1 1 6.957497 6.957497 13573 +dolev 1 1 6.957497 6.957497 13574 +ctaylor 1 1 6.957497 6.957497 13575 +usamarco 1 1 6.957497 6.957497 13576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ new file mode 100644 index 00000000..1105f389 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markj^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +depart 3 457 0.693147 2.079441 12 +research 3 431 0.693147 2.079441 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +class 2 199 1.609438 3.218876 37 +utexa 1 189 1.609438 1.609438 44 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +avail 2 169 1.791759 3.583518 48 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +hall 2 146 1.945910 3.891820 65 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +postscript 1 131 2.079442 2.079442 90 +pleas 2 113 2.197225 4.394450 114 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +librari 1 87 2.484907 2.484907 181 +stuff 1 87 2.484907 2.484907 171 +orient 1 80 2.564949 2.564949 205 +addit 2 74 2.639057 5.278114 228 +taylor 2 63 2.772589 5.545178 287 +copi 1 63 2.772589 2.772589 284 +descript 1 64 2.772589 2.772589 271 +best 1 59 2.833213 2.833213 299 +semest 1 58 2.890372 2.890372 312 +allow 1 53 2.944439 2.944439 333 +finger 1 52 2.995732 2.995732 354 +run 1 51 2.995732 2.995732 347 +mark 2 44 3.135494 6.270988 403 +compani 1 41 3.218876 3.218876 423 +taught 2 33 3.433987 6.867974 526 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +full 1 28 3.610918 3.610918 615 +linux 1 27 3.637586 3.637586 631 +reach 1 24 3.761200 3.761200 688 +alloc 1 20 3.951244 3.951244 821 +along 1 18 4.060443 4.060443 878 +brief 1 16 4.174387 4.174387 1001 +intel 1 16 4.174387 4.174387 1000 +precis 1 15 4.248495 4.248495 1023 +appl 2 11 4.553877 9.107754 1303 +routin 2 9 4.753590 9.507180 1549 +motorola 1 9 4.753590 4.753590 1546 +oop 1 8 4.875197 4.875197 1778 +pentium 1 6 5.164786 5.164786 2077 +glenn 1 3 5.857933 5.857933 3869 +down 1 3 5.857933 5.857933 3870 +informationi 1 3 5.857933 5.857933 3871 +listof 1 3 5.857933 5.857933 3322 +publicli 1 3 5.857933 5.857933 3687 +isvia 1 2 6.263398 6.263398 5637 +johnston 1 2 6.263398 6.263398 5638 +somerset 1 2 6.263398 6.263398 5639 +johnstonemark 1 1 6.957497 6.957497 13577 +johnstonecontact 1 1 6.957497 6.957497 13578 +markj 1 1 6.957497 6.957497 13579 +byrichard 1 1 6.957497 6.957497 13580 +brice 1 1 6.957497 6.957497 13581 +analysisclass 1 1 6.957497 6.957497 13582 +somersetdesign 1 1 6.957497 6.957497 13583 +centerresearch 1 1 6.957497 6.957497 13584 +garbagecollector 1 1 6.957497 6.957497 13585 +ofstudi 1 1 6.957497 6.957497 13586 +dissertationpropos 1 1 6.957497 6.957497 13587 +timingof 1 1 6.957497 6.957497 13588 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ new file mode 100644 index 00000000..7919bc02 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markng^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +click 1 142 1.945910 1.945910 78 +visit 1 63 2.772589 2.772589 288 +point 1 58 2.890372 2.890372 319 +friend 1 48 3.044522 3.044522 376 +mark 1 44 3.135494 3.135494 403 +markng 1 1 6.957497 6.957497 13589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ new file mode 100644 index 00000000..94717851 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^markus^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +link 3 247 1.386294 4.158882 24 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +phone 2 175 1.791759 3.583518 45 +address 2 170 1.791759 3.583518 62 +austin 2 168 1.791759 3.583518 63 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +hall 2 146 1.945910 3.891820 65 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +place 2 106 2.197225 4.394450 124 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +member 1 84 2.484907 2.484907 165 +internet 1 83 2.484907 2.484907 186 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +logic 1 71 2.639057 2.639057 230 +prof 2 64 2.772589 5.545178 273 +taylor 2 63 2.772589 5.545178 287 +result 1 65 2.772589 2.772589 281 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +archiv 2 49 3.044522 6.089044 364 +postal 1 30 3.555348 3.555348 580 +macintosh 1 17 4.110874 4.110874 920 +permit 1 16 4.174387 4.174387 962 +finit 1 14 4.317488 4.317488 1106 +verifi 1 12 4.465908 4.465908 1261 +worth 1 11 4.553877 4.553877 1294 +appl 1 11 4.553877 4.553877 1303 +incomplet 1 9 4.753590 4.753590 1575 +entri 1 8 4.875197 4.875197 1678 +researchi 1 8 4.875197 4.875197 1756 +misra 1 7 5.010635 5.010635 1856 +emerson 1 5 5.347108 5.347108 2547 +proposit 1 5 5.347108 5.347108 2339 +comprehens 1 4 5.568345 5.568345 2745 +marku 3 3 5.857933 17.573799 3872 +uniti 3 3 5.857933 17.573799 3812 +andwil 1 3 5.857933 5.857933 3335 +inconveni 1 3 5.857933 5.857933 3866 +groupand 1 3 5.857933 5.857933 3873 +isalso 1 2 6.263398 6.263398 5640 +kaltenbachmarku 1 1 6.957497 6.957497 13590 +kaltenbachintroductionwelcom 1 1 6.957497 6.957497 13591 +iapolog 1 1 6.957497 6.957497 13592 +spsp 1 1 6.957497 6.957497 13593 +stempor 1 1 6.957497 6.957497 13594 +checkerfor 1 1 6.957497 6.957497 13595 +avisit 1 1 6.957497 6.957497 13596 +theut 1 1 6.957497 6.957497 13597 +departmenthom 1 1 6.957497 6.957497 13598 +archivefor 1 1 6.957497 6.957497 13599 +sworld 1 1 6.957497 6.957497 13600 +supporthom 1 1 6.957497 6.957497 13601 +actansit 1 1 6.957497 6.957497 13602 +theatt 1 1 6.957497 6.957497 13603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ new file mode 100644 index 00000000..412ab592 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^martym^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 4 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +austin 6 168 1.791759 10.750554 63 +texa 4 160 1.791759 7.167036 64 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +read 1 154 1.791759 1.791759 47 +hall 1 146 1.945910 1.945910 65 +click 1 142 1.945910 1.945910 78 +homepag 1 93 2.397895 2.397895 148 +stuff 1 87 2.484907 2.484907 171 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +taylor 1 63 2.772589 2.772589 287 +guid 1 63 2.772589 2.772589 267 +virtual 1 62 2.772589 2.772589 285 +local 1 55 2.944439 2.944439 334 +math 1 44 3.135494 3.135494 402 +kind 1 32 3.465736 3.465736 541 +postal 1 30 3.555348 3.555348 580 +neural 1 30 3.555348 3.555348 578 +utc 2 27 3.637586 7.275172 629 +demonstr 1 24 3.761200 3.761200 694 +daili 1 24 3.761200 3.761200 706 +displai 1 23 3.806662 3.806662 712 +applet 1 20 3.951244 3.951244 827 +hotlist 1 13 4.382027 4.382027 1199 +paus 1 4 5.568345 5.568345 2965 +mayberri 1 2 6.263398 6.263398 5641 +downtown 1 2 6.263398 6.263398 5642 +texan 1 2 6.263398 6.263398 5489 +memarti 1 1 6.957497 6.957497 13604 +researchal 1 1 6.957497 6.957497 13605 +martym 1 1 6.957497 6.957497 13606 +anywher 1 1 6.957497 6.957497 13607 +virtualc 1 1 6.957497 6.957497 13608 +internetrestaur 1 1 6.957497 6.957497 13609 +tnstechnolog 1 1 6.957497 6.957497 13610 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ new file mode 100644 index 00000000..f7b3c2ae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mccain^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 4 168 1.791759 7.167036 63 +avail 2 169 1.791759 3.583518 48 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +postscript 1 131 2.079442 2.079442 90 +onlin 2 75 2.639057 5.278114 223 +main 1 67 2.708050 2.708050 256 +colleg 1 61 2.833213 2.833213 300 +reason 2 57 2.890372 5.780744 318 +thesi 1 57 2.890372 2.890372 327 +advisor 1 51 2.995732 2.995732 355 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +expect 1 37 3.332205 3.332205 484 +titl 1 31 3.496508 3.496508 556 +action 1 15 4.248495 4.248495 1038 +philosophi 1 13 4.382027 4.382027 1167 +usavoic 1 13 4.382027 4.382027 1198 +vladimir 1 11 4.553877 4.553877 1324 +mepost 1 10 4.653960 4.653960 1472 +sciencesat 1 7 5.010635 5.010635 1968 +baker 1 7 5.010635 5.010635 1812 +causal 1 6 5.164786 5.164786 2024 +lifschitz 1 5 5.347108 5.347108 2542 +commonsens 1 4 5.568345 5.568345 2998 +nonmonoton 1 4 5.568345 5.568345 3023 +norm 2 2 6.263398 12.526796 5643 +kansa 1 2 6.263398 6.263398 5591 +interestscommonsens 1 2 6.263398 6.263398 5596 +actionlog 1 2 6.263398 6.263398 5597 +reasoningmi 1 2 6.263398 6.263398 5598 +mccain 2 1 6.957497 13.914994 13611 +mccainabout 1 1 6.957497 6.957497 13612 +mephd 1 1 6.957497 6.957497 13613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ new file mode 100644 index 00000000..9321999e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mecaliff^ @@ -0,0 +1,43 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +address 2 170 1.791759 3.583518 62 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +check 1 115 2.197225 2.197225 118 +learn 2 86 2.484907 4.969814 170 +info 1 85 2.484907 2.484907 176 +educ 1 86 2.484907 2.484907 191 +logic 1 71 2.639057 2.639057 230 +taylor 1 63 2.772589 2.772589 287 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +postal 1 30 3.555348 3.555348 580 +english 2 15 4.248495 8.496990 1033 +mari 1 12 4.465908 4.465908 1266 +acquisit 1 10 4.653960 4.653960 1465 +elain 2 5 5.347108 10.694216 2496 +groupunivers 1 3 5.857933 5.857933 3831 +austinresearchmi 1 2 6.263398 6.263398 5644 +formor 1 2 6.263398 6.263398 5335 +mecaliff 1 2 6.263398 6.263398 5645 +baylor 3 1 6.957497 20.872491 13614 +califfmari 1 1 6.957497 6.957497 13615 +califfmachin 1 1 6.957497 6.957497 13616 +especiallyinduct 1 1 6.957497 6.957497 13617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ new file mode 100644 index 00000000..a0c47e99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mfkb^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 6 443 0.693147 4.158882 6 +research 5 431 0.693147 3.465735 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +us 6 329 1.098612 6.591672 16 +project 3 340 1.098612 3.295836 18 +current 1 284 1.098612 1.098612 21 +gener 4 220 1.386294 5.545176 27 +languag 2 227 1.386294 2.772588 26 +utexa 9 189 1.609438 14.484942 44 +group 3 183 1.609438 4.828314 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +base 15 165 1.791759 26.876385 50 +develop 3 174 1.791759 5.375277 53 +austin 2 168 1.791759 3.583518 63 +distribut 2 162 1.791759 3.583518 51 +recent 2 167 1.791759 3.583518 58 +texa 1 160 1.791759 1.791759 64 +phone 1 175 1.791759 1.791759 45 +model 5 145 1.945910 9.729550 69 +construct 3 139 1.945910 5.837730 82 +perform 2 143 1.945910 3.891820 74 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +technolog 1 131 2.079442 2.079442 102 +assist 2 112 2.197225 4.394450 113 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +world 1 115 2.197225 2.197225 126 +text 2 98 2.302585 4.605170 133 +part 1 98 2.302585 2.302585 129 +question 5 91 2.397895 11.989475 141 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +help 4 83 2.484907 9.939628 175 +larg 3 82 2.484907 7.454721 168 +contain 3 81 2.484907 7.454721 174 +requir 2 81 2.484907 4.969814 167 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +method 2 80 2.564949 5.129898 213 +complet 1 77 2.564949 2.564949 208 +addit 1 74 2.639057 2.639057 228 +knowledg 16 67 2.708050 43.328800 243 +would 2 67 2.708050 5.416100 251 +test 2 66 2.708050 5.416100 252 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +simul 1 66 2.708050 2.708050 255 +result 2 65 2.772589 5.545178 281 +plan 2 65 2.772589 5.545178 272 +improv 1 62 2.772589 2.772589 289 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +collect 1 65 2.772589 2.772589 268 +automat 2 61 2.833213 5.666426 306 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +reason 1 57 2.890372 2.890372 318 +detail 1 57 2.890372 2.890372 321 +found 1 53 2.944439 2.944439 337 +extens 1 53 2.944439 2.944439 340 +numer 1 49 3.044522 3.044522 369 +pointer 1 48 3.044522 3.044522 368 +answer 4 45 3.135494 12.541976 391 +anoth 1 45 3.135494 3.135494 408 +natur 1 44 3.135494 3.135494 406 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +past 1 42 3.218876 3.218876 428 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +multipl 2 39 3.258097 6.516194 453 +littl 1 39 3.258097 3.258097 454 +ofth 1 36 3.367296 3.367296 491 +especi 1 36 3.367296 3.367296 496 +jame 2 35 3.401197 6.802394 507 +represent 1 35 3.401197 3.401197 512 +concept 1 32 3.465736 3.465736 537 +kind 1 32 3.465736 3.465736 541 +express 1 32 3.465736 3.465736 540 +extend 1 32 3.465736 3.465736 539 +domain 6 30 3.555348 21.332088 564 +steve 2 29 3.583519 7.167038 594 +built 2 29 3.583519 7.167038 592 +retriev 1 27 3.637586 3.637586 621 +task 3 25 3.737670 11.213010 678 +concern 2 25 3.737670 7.475340 666 +jeff 2 25 3.737670 7.475340 673 +begin 1 23 3.806662 3.806662 716 +varieti 2 22 3.850148 7.700296 740 +fact 1 21 3.912023 3.912023 780 +similar 1 21 3.912023 3.912023 771 +alumni 1 21 3.912023 3.912023 807 +expert 1 20 3.951244 3.951244 833 +predict 2 19 4.007333 8.014666 855 +encourag 1 18 4.060443 4.060443 880 +event 1 18 4.060443 4.060443 896 +appropri 1 18 4.060443 4.060443 883 +aid 1 18 4.060443 4.060443 904 +expand 1 17 4.110874 4.110874 928 +otherwis 1 17 4.110874 4.110874 922 +explan 4 16 4.174387 16.697548 985 +normal 1 16 4.174387 4.174387 995 +carl 2 15 4.248495 8.496990 1024 +biologi 2 15 4.248495 8.496990 1049 +english 1 15 4.248495 4.248495 1033 +shown 1 14 4.317488 4.317488 1080 +charl 2 13 4.382027 8.764054 1149 +composit 2 13 4.382027 8.764054 1150 +bruce 2 12 4.465908 8.931816 1226 +brad 1 12 4.465908 4.465908 1264 +peter 2 11 4.553877 9.107754 1316 +eight 1 11 4.553877 4.553877 1331 +qualit 1 11 4.553877 4.553877 1362 +rich 2 10 4.653960 9.307920 1396 +custom 1 10 4.653960 4.653960 1414 +significantli 1 9 4.753590 4.753590 1508 +tutor 1 9 4.753590 4.753590 1552 +mainten 1 9 4.753590 4.753590 1543 +herefor 1 9 4.753590 4.753590 1483 +erik 1 8 4.875197 4.875197 1701 +largest 1 7 5.010635 5.010635 1858 +fred 2 6 5.164786 10.329572 2072 +viewpoint 2 6 5.164786 10.329572 2116 +biolog 1 6 5.164786 5.164786 2147 +ongo 1 6 5.164786 5.164786 2215 +porter 4 5 5.347108 21.388432 2293 +correl 3 5 5.347108 16.041324 2279 +desk 3 5 5.347108 16.041324 2297 +oncomput 1 5 5.347108 5.347108 2326 +notabl 1 5 5.347108 5.347108 2276 +colleagu 1 5 5.347108 5.347108 2304 +focuss 1 5 5.347108 5.347108 2271 +clark 2 4 5.568345 11.136690 2705 +knight 1 4 5.568345 5.568345 2728 +souther 3 3 5.857933 17.573799 3795 +karl 2 3 5.857933 11.715866 3623 +multifunct 1 3 5.857933 5.857933 3826 +implicitli 1 3 5.857933 5.857933 3620 +qsim 1 3 5.857933 5.857933 3862 +proport 1 3 5.857933 5.857933 3293 +boe 1 3 5.857933 5.857933 3318 +mallori 3 2 6.263398 18.790194 5635 +bareiss 2 2 6.263398 12.526796 5646 +murrai 2 2 6.263398 12.526796 5647 +rickel 2 2 6.263398 12.526796 5648 +forconstruct 1 2 6.263398 6.263398 5649 +inon 1 2 6.263398 6.263398 4496 +arealso 1 2 6.263398 6.263398 5650 +knowledgebas 1 2 6.263398 6.263398 5136 +adequ 1 2 6.263398 6.263398 4116 +lexicon 1 2 6.263398 6.263398 5651 +brant 1 2 6.263398 6.263398 5652 +aroundth 1 2 6.263398 6.263398 5653 +prado 3 1 6.957497 20.872491 13618 +lester 3 1 6.957497 20.872491 13619 +callawai 2 1 6.957497 13.914994 13620 +andersen 2 1 6.957497 13.914994 13621 +acker 2 1 6.957497 13.914994 13622 +eilert 2 1 6.957497 13.914994 13623 +groupknowledg 1 1 6.957497 6.957497 13624 +overviewour 1 1 6.957497 6.957497 13625 +atuniv 1 1 6.957497 6.957497 13626 +currentexpert 1 1 6.957497 6.957497 13627 +broadknowledg 1 1 6.957497 6.957497 13628 +toexplain 1 1 6.957497 6.957497 13629 +answeringa 1 1 6.957497 6.957497 13630 +formallyrepres 1 1 6.957497 6.957497 13631 +thebiolog 1 1 6.957497 6.957497 13632 +andthos 1 1 6.957497 6.957497 13633 +beanswer 1 1 6.957497 6.957497 13634 +jeffrickel 1 1 6.957497 6.957497 13635 +taskof 1 1 6.957497 6.957497 13636 +thesimplest 1 1 6.957497 6.957497 13637 +dauntingrequir 1 1 6.957497 6.957497 13638 +manymodel 1 1 6.957497 6.957497 13639 +compilerand 1 1 6.957497 6.957497 13640 +bybuild 1 1 6.957497 6.957497 13641 +computingenviron 1 1 6.957497 6.957497 13642 +deskassist 1 1 6.957497 6.957497 13643 +squestion 1 1 6.957497 6.957497 13644 +projectsour 1 1 6.957497 6.957497 13645 +kned 1 1 6.957497 6.957497 13646 +kastl 1 1 6.957497 6.957497 13647 +fare 1 1 6.957497 6.957497 13648 +lex 1 1 6.957497 6.957497 13649 +tripel 1 1 6.957497 6.957497 13650 +theorist 1 1 6.957497 6.957497 13651 +searcher 1 1 6.957497 6.957497 13652 +alumna 1 1 6.957497 6.957497 13653 +lian 1 1 6.957497 6.957497 13654 +blumenth 1 1 6.957497 6.957497 13655 +eolu 1 1 6.957497 6.957497 13656 +uwyo 1 1 6.957497 6.957497 13657 +clarkp 1 1 6.957497 6.957497 13658 +redwood 1 1 6.957497 6.957497 13659 +ncsu 1 1 6.957497 6.957497 13660 +publicationsclick 1 1 6.957497 6.957497 13661 +projectsclick 1 1 6.957497 6.957497 13662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ new file mode 100644 index 00000000..a215e030 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^miranker^ @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 4 343 1.098612 4.394448 19 +current 3 284 1.098612 3.295836 21 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +algorithm 2 162 1.791759 3.583518 57 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +note 1 142 1.945910 1.945910 67 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +text 3 98 2.302585 6.907755 133 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +knowledg 1 67 2.708050 2.708050 243 +execut 1 45 3.135494 3.135494 404 +past 2 42 3.218876 6.437752 428 +soon 1 36 3.367296 3.367296 494 +either 1 35 3.401197 3.401197 506 +bibliographi 1 34 3.401197 3.401197 518 +queri 1 33 3.433987 3.433987 524 +someth 1 31 3.496508 3.496508 554 +robert 1 30 3.555348 3.555348 567 +rule 4 26 3.688879 14.755516 638 +constraint 1 26 3.688879 3.688879 636 +sometim 1 24 3.761200 3.761200 696 +finish 1 22 3.850148 3.850148 748 +basi 1 20 3.951244 3.951244 828 +render 1 17 4.110874 4.110874 947 +match 2 16 4.174387 8.348774 965 +warn 1 14 4.317488 4.317488 1068 +daniel 2 12 4.465908 8.931816 1233 +evolv 1 12 4.465908 4.465908 1223 +candid 2 9 4.753590 9.507180 1606 +presenc 1 8 4.875197 4.875197 1671 +hold 1 8 4.875197 4.875197 1645 +lane 1 8 4.875197 4.875197 1720 +yang 1 8 4.875197 4.875197 1652 +wouldn 1 7 5.010635 5.010635 1970 +srinivasan 1 6 5.164786 5.164786 2175 +mirank 4 5 5.347108 21.388432 2543 +treat 2 5 5.347108 10.694216 2521 +breath 1 4 5.568345 5.568345 2946 +lanc 1 4 5.568345 5.568345 3022 +obsolet 1 3 5.857933 5.857933 3196 +byth 1 3 5.857933 5.857933 3874 +archi 1 3 5.857933 5.857933 3639 +ming 1 3 5.857933 5.857933 3712 +bibtex 3 2 6.263398 18.790194 5406 +leap 1 2 6.263398 6.263398 5654 +venu 1 2 6.263398 6.263398 5655 +usea 1 2 6.263398 6.263398 4800 +satisfact 1 2 6.263398 6.263398 5656 +roberto 1 2 6.263398 6.263398 5468 +bayardo 1 2 6.263398 6.263398 5467 +obermey 1 2 6.263398 6.263398 5657 +vaidyaraman 1 2 6.263398 6.263398 5658 +warshaw 1 2 6.263398 6.263398 5659 +rete 2 1 6.957497 13.914994 13663 +belat 1 1 6.957497 6.957497 13664 +fashionwai 1 1 6.957497 6.957497 13665 +itscomparison 1 1 6.957497 6.957497 13666 +encompass 1 1 6.957497 6.957497 13667 +fundamentalcomput 1 1 6.957497 6.957497 13668 +corollari 1 1 6.957497 6.957497 13669 +thatgoal 1 1 6.957497 6.957497 13670 +gadboi 1 1 6.957497 6.957497 13671 +vasili 1 1 6.957497 6.957497 13672 +samoladi 1 1 6.957497 6.957497 13673 +schrag 1 1 6.957497 6.957497 13674 +andrewsdavid 1 1 6.957497 6.957497 13675 +brantchin 1 1 6.957497 6.957497 13676 +kuoshiow 1 1 6.957497 6.957497 13677 +salvator 1 1 6.957497 6.957497 13678 +stolfo 1 1 6.957497 6.957497 13679 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ new file mode 100644 index 00000000..7d70a9c6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^misra^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +design 2 213 1.386294 2.772588 25 +gener 1 220 1.386294 1.386294 27 +group 2 183 1.609438 3.218876 36 +parallel 2 169 1.791759 3.583518 60 +recent 1 167 1.791759 1.791759 58 +process 3 142 1.945910 5.837730 72 +hall 1 146 1.945910 1.945910 65 +technolog 1 131 2.079442 2.079442 102 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +access 1 102 2.302585 2.302585 136 +select 1 91 2.397895 2.397895 154 +homepag 1 93 2.397895 2.397895 148 +institut 1 84 2.484907 2.484907 187 +ieee 1 86 2.484907 2.484907 190 +method 1 80 2.564949 2.564949 213 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +appli 1 71 2.639057 2.639057 226 +practic 1 70 2.708050 2.708050 246 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +reason 1 57 2.890372 2.890372 318 +profession 1 51 2.995732 2.995732 345 +electron 1 47 3.091042 3.091042 379 +futur 1 41 3.218876 3.218876 427 +formal 2 37 3.332205 6.664410 478 +tech 1 35 3.401197 3.401197 515 +award 1 34 3.401197 3.401197 523 +john 2 33 3.433987 6.867974 532 +chair 2 29 3.583519 7.167038 596 +synchron 2 29 3.583519 7.167038 588 +mind 1 27 3.637586 3.637586 632 +aspect 1 25 3.737670 3.737670 663 +fellow 2 24 3.761200 7.522400 701 +honor 2 23 3.806662 7.613324 729 +equat 1 23 3.806662 3.806662 724 +indian 1 22 3.850148 3.850148 769 +inth 1 22 3.850148 3.850148 741 +particularli 1 19 4.007333 4.007333 867 +north 1 19 4.007333 4.007333 873 +coupl 1 17 4.110874 4.110874 939 +letter 1 16 4.174387 4.174387 981 +weslei 1 16 4.174387 4.174387 983 +researchmi 1 14 4.317488 4.317488 1119 +classic 1 14 4.317488 4.317488 1084 +recurs 1 13 4.382027 4.382027 1127 +addison 1 12 4.465908 4.465908 1230 +kanpur 1 8 4.875197 4.875197 1744 +simon 1 8 4.875197 4.875197 1697 +guggenheim 1 8 4.875197 4.875197 1759 +misra 6 7 5.010635 30.063810 1856 +prentic 1 7 5.010635 5.010635 1838 +phase 1 6 5.164786 5.164786 1977 +holland 1 5 5.347108 5.347108 2490 +jayadev 2 4 5.568345 11.136690 3006 +essai 1 4 5.568345 5.568345 2948 +interestparallel 1 3 5.857933 5.857933 3806 +publicationsj 1 3 5.857933 5.857933 3808 +hoar 1 3 5.857933 5.857933 3875 +nondeterminist 1 3 5.857933 5.857933 3560 +powerlist 1 2 6.263398 6.263398 5660 +loos 1 2 6.263398 6.263398 4774 +chandi 1 2 6.263398 6.263398 5661 +seuss 1 2 6.263398 6.263398 5662 +misrareg 1 1 6.957497 6.957497 13680 +hopkin 1 1 6.957497 6.957497 13681 +fellowarea 1 1 6.957497 6.957497 13682 +asynchronoussystem 1 1 6.957497 6.957497 13683 +otherpap 1 1 6.957497 6.957497 13684 +anoverview 1 1 6.957497 6.957497 13685 +apostscript 1 1 6.957497 6.957497 13686 +versionaccess 1 1 6.957497 6.957497 13687 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ new file mode 100644 index 00000000..f49599a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ml^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 7 571 0.000000 0.000000 5 +comput 4 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 10 431 0.693147 6.931470 10 +system 10 443 0.693147 6.931470 6 +program 6 374 0.693147 4.158882 7 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +languag 6 227 1.386294 8.317764 26 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +group 14 183 1.609438 22.532132 36 +utexa 6 189 1.609438 9.656628 44 +public 2 202 1.609438 3.218876 43 +list 2 201 1.609438 3.218876 39 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +base 4 165 1.791759 7.167036 50 +texa 2 160 1.791759 3.583518 64 +data 2 170 1.791759 3.583518 49 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +austin 1 168 1.791759 1.791759 63 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +model 2 145 1.945910 3.891820 69 +first 2 140 1.945910 3.891820 71 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +problem 1 147 1.945910 1.945910 75 +machin 8 129 2.079442 16.635536 95 +confer 5 126 2.079442 10.397210 100 +theori 3 111 2.197225 6.591675 127 +intern 2 108 2.197225 4.394450 128 +site 1 106 2.197225 2.197225 119 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +associ 3 93 2.397895 7.193685 151 +pictur 1 89 2.397895 2.397895 160 +search 1 95 2.397895 2.397895 155 +learn 19 86 2.484907 47.213233 170 +journal 5 83 2.484907 12.424535 183 +control 1 82 2.484907 2.484907 164 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +intellig 7 72 2.639057 18.473399 225 +logic 4 71 2.639057 10.556228 230 +nation 1 74 2.639057 2.639057 240 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +order 3 69 2.708050 8.124150 249 +knowledg 1 67 2.708050 2.708050 243 +artifici 5 63 2.772589 13.862945 280 +plan 4 65 2.772589 11.090356 272 +special 2 56 2.890372 5.780744 320 +index 2 56 2.890372 5.780744 309 +reason 1 57 2.890372 2.890372 318 +scientif 1 53 2.944439 2.944439 341 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +natur 4 44 3.135494 12.541976 406 +combin 1 42 3.218876 3.218876 421 +autom 1 41 3.218876 3.218876 434 +form 1 39 3.258097 3.258097 443 +paul 1 38 3.295837 3.295837 471 +john 1 33 3.433987 3.433987 532 +queri 1 33 3.433987 3.433987 524 +ad 1 32 3.465736 3.465736 544 +richard 1 31 3.496508 3.496508 559 +utc 1 27 3.637586 3.637586 629 +american 1 27 3.637586 3.637586 634 +revis 2 26 3.688879 7.377758 640 +rule 2 26 3.688879 7.377758 638 +experiment 1 26 3.688879 3.688879 645 +subject 1 26 3.688879 3.688879 647 +jeff 1 25 3.737670 3.737670 673 +decis 1 23 3.806662 3.806662 728 +alumni 1 21 3.912023 3.912023 807 +supervis 1 20 3.951244 3.951244 840 +comparison 1 19 4.007333 4.007333 863 +partial 1 18 4.060443 4.060443 900 +repositori 1 17 4.110874 4.110874 932 +fourth 1 16 4.174387 4.174387 999 +explan 1 16 4.174387 4.174387 985 +atth 1 15 4.248495 4.248495 1019 +prolog 1 13 4.382027 4.382027 1155 +joint 1 13 4.382027 4.382027 1130 +mellon 1 13 4.382027 4.382027 1179 +mari 1 12 4.465908 4.465908 1266 +carnegi 1 12 4.465908 4.465908 1260 +induct 7 11 4.553877 31.877139 1304 +qualit 1 11 4.553877 4.553877 1362 +refin 1 11 4.553877 4.553877 1363 +acquisit 3 10 4.653960 13.961880 1465 +linguist 2 9 4.753590 9.507180 1593 +moonei 1 9 4.753590 4.753590 1520 +ataustin 1 9 4.753590 4.753590 1610 +tutor 1 9 4.753590 4.753590 1552 +classif 1 9 4.753590 4.753590 1586 +aaai 2 8 4.875197 9.750394 1750 +european 2 8 4.875197 9.750394 1763 +empir 1 8 4.875197 4.875197 1722 +irvin 1 8 4.875197 4.875197 1660 +illinoi 2 7 5.010635 10.021270 1941 +predic 1 7 5.010635 5.010635 1806 +planner 1 7 5.010635 5.010635 1797 +thompson 1 6 5.164786 5.164786 2049 +neither 1 6 5.164786 5.164786 1990 +machinelearn 1 6 5.164786 5.164786 2084 +oxford 1 6 5.164786 5.164786 2121 +bradlei 2 5 5.347108 10.694216 2554 +elain 1 5 5.347108 5.347108 2496 +proposit 1 5 5.347108 5.347108 2339 +sowmya 2 4 5.568345 11.136690 2670 +diagnosi 1 4 5.568345 5.568345 3027 +uncertain 1 4 5.568345 5.568345 2758 +invent 1 4 5.568345 5.568345 3028 +ijcai 1 4 5.568345 5.568345 2901 +hermjakob 1 3 5.857933 5.857933 3876 +ramachandran 1 3 5.857933 5.857933 3742 +cindi 1 3 5.857933 5.857933 3830 +acad 1 3 5.857933 5.857933 3847 +signll 1 3 5.857933 5.857933 3877 +ucpop 1 3 5.857933 5.857933 3878 +estlin 3 2 6.263398 18.790194 5554 +abduct 2 2 6.263398 12.526796 5663 +focuseson 1 2 6.263398 6.263398 5433 +califf 1 2 6.263398 6.263398 5664 +mecaliff 1 2 6.263398 6.263398 5645 +tara 1 2 6.263398 6.263398 5555 +cthomp 1 2 6.263398 6.263398 5530 +dirk 1 2 6.263398 6.263398 5665 +subramanian 1 2 6.263398 6.263398 5666 +georgetown 1 2 6.263398 6.263398 5667 +drake 1 2 6.263398 6.263398 5668 +accel 1 2 6.263398 6.263398 5166 +foidl 1 2 6.263398 6.263398 4270 +icml 1 2 6.263398 6.263398 5669 +quinlan 1 2 6.263398 6.263398 4797 +learner 1 2 6.263398 6.263398 4508 +prodigi 1 2 6.263398 6.263398 5670 +baff 2 1 6.957497 13.914994 13688 +mahonei 2 1 6.957497 13.914994 13689 +speedup 2 1 6.957497 13.914994 13690 +knowledgerefin 1 1 6.957497 6.957497 13691 +scicomp 1 1 6.957497 6.957497 13692 +firstadvisor 1 1 6.957497 6.957497 13693 +hwee 1 1 6.957497 6.957497 13694 +nhweetou 1 1 6.957497 6.957497 13695 +trantor 1 1 6.957497 6.957497 13696 +ourston 1 1 6.957497 6.957497 13697 +dirk_ourston 1 1 6.957497 6.957497 13698 +cpqm 1 1 6.957497 6.957497 13699 +saic 1 1 6.957497 6.957497 13700 +furtwangen 1 1 6.957497 6.957497 13701 +siddarth 1 1 6.957497 6.957497 13702 +zell 1 1 6.957497 6.957497 13703 +reasoningher 1 1 6.957497 6.957497 13704 +fort 1 1 6.957497 6.957497 13705 +chillin 1 1 6.957497 6.957497 13706 +dolphin 1 1 6.957497 6.957497 13707 +ilpnet 1 1 6.957497 6.957497 13708 +sigart 1 1 6.957497 6.957497 13709 +aritfici 1 1 6.957497 6.957497 13710 +biblio 1 1 6.957497 6.957497 13711 +jair 1 1 6.957497 6.957497 13712 +foil 1 1 6.957497 6.957497 13713 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ new file mode 100644 index 00000000..d89eaee9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mooney^ @@ -0,0 +1,111 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 7 571 0.000000 0.000000 5 +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 4 412 0.693147 2.772588 8 +program 3 374 0.693147 2.079441 7 +research 2 431 0.693147 1.386294 10 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 5 160 1.791759 8.958795 64 +austin 5 168 1.791759 8.958795 63 +base 3 165 1.791759 5.375277 50 +address 3 170 1.791759 5.375277 62 +network 2 168 1.791759 3.583518 61 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +machin 3 129 2.079442 6.238326 95 +theori 1 111 2.197225 2.197225 127 +search 1 95 2.397895 2.397895 155 +learn 6 86 2.484907 14.909442 170 +control 1 82 2.484907 2.484907 164 +start 1 83 2.484907 2.484907 173 +exampl 2 77 2.564949 5.129898 195 +decemb 1 80 2.564949 2.564949 215 +complet 1 77 2.564949 2.564949 208 +intellig 2 72 2.639057 5.278114 225 +logic 1 71 2.639057 2.639057 230 +effici 1 73 2.639057 2.639057 233 +knowledg 1 67 2.708050 2.708050 243 +degre 1 69 2.708050 2.708050 259 +artifici 2 63 2.772589 5.545178 280 +improv 1 62 2.772589 2.772589 289 +plan 1 65 2.772589 2.772589 272 +prof 1 64 2.772589 2.772589 273 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +finger 1 52 2.995732 2.995732 354 +natur 1 44 3.135494 3.135494 406 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +vita 1 38 3.295837 3.295837 473 +word 1 34 3.401197 3.401197 508 +obtain 1 33 3.433987 3.433987 534 +posit 1 31 3.496508 3.496508 552 +computersci 1 30 3.555348 3.555348 562 +neural 1 30 3.555348 3.555348 578 +postal 1 30 3.555348 3.555348 580 +symbol 2 27 3.637586 7.275172 620 +rule 1 26 3.688879 3.688879 638 +compar 1 26 3.688879 3.688879 648 +qualiti 1 20 3.951244 3.951244 832 +lisp 1 18 4.060443 4.060443 897 +attend 1 18 4.060443 4.060443 893 +explan 1 16 4.174387 4.174387 985 +prolog 1 13 4.382027 4.382027 1155 +went 1 12 4.465908 4.465908 1279 +sens 1 11 4.553877 4.553877 1305 +induct 1 11 4.553877 4.553877 1304 +refin 1 11 4.553877 4.553877 1363 +acquisit 2 10 4.653960 9.307920 1465 +interestsmi 1 10 4.653960 4.653960 1462 +town 1 10 4.653960 4.653960 1458 +moonei 2 9 4.753590 9.507180 1520 +extract 1 8 4.875197 4.875197 1728 +empir 1 8 4.875197 4.875197 1722 +grew 1 8 4.875197 4.875197 1742 +illinoi 5 7 5.010635 25.053175 1941 +raymond 1 5 5.347108 5.347108 2313 +began 1 5 5.347108 5.347108 2498 +disambigu 1 4 5.568345 5.568345 2899 +bayesian 1 4 5.568345 5.568345 2671 +urbana 3 3 5.857933 17.573799 3879 +primarilyin 1 3 5.857933 5.857933 3832 +parser 1 3 5.857933 5.857933 3141 +myph 1 3 5.857933 5.857933 3880 +champaign 4 2 6.263398 25.053592 5671 +lexicon 1 2 6.263398 6.263398 5651 +highschool 1 2 6.263398 6.263398 5672 +homepageraymond 1 1 6.957497 6.957497 13714 +mooneyassoci 1 1 6.957497 6.957497 13715 +informationfal 1 1 6.957497 6.957497 13716 +learningspr 1 1 6.957497 6.957497 13717 +iiperson 1 1 6.957497 6.957497 13718 +historyi 1 1 6.957497 6.957497 13719 +fallon 1 1 6.957497 6.957497 13720 +wherestart 1 1 6.957497 6.957497 13721 +fallontownship 1 1 6.957497 6.957497 13722 +urbanato 1 1 6.957497 6.957497 13723 +learninggroup 1 1 6.957497 6.957497 13724 +gerald 1 1 6.957497 6.957497 13725 +dejong 1 1 6.957497 6.957497 13726 +meadowfir 1 1 6.957497 6.957497 13727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ new file mode 100644 index 00000000..08f1351d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^moriarty^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +system 2 443 0.693147 1.386294 6 +research 2 431 0.693147 1.386294 10 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +link 5 247 1.386294 6.931470 24 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +austin 5 168 1.791759 8.958795 63 +texa 3 160 1.791759 5.375277 64 +network 2 168 1.791759 3.583518 61 +address 2 170 1.791759 3.583518 62 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +number 1 130 2.079442 2.079442 97 +world 1 115 2.197225 2.197225 126 +specif 1 106 2.197225 2.197225 106 +select 2 91 2.397895 4.795790 154 +mani 1 92 2.397895 2.397895 150 +real 1 93 2.397895 2.397895 144 +follow 1 92 2.397895 2.397895 143 +homepag 1 93 2.397895 2.397895 148 +control 2 82 2.484907 4.969814 164 +resourc 1 81 2.484907 2.484907 172 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +appear 1 78 2.564949 2.564949 210 +dynam 1 76 2.564949 2.564949 194 +state 1 76 2.564949 2.564949 207 +intellig 1 72 2.639057 2.639057 225 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +local 1 55 2.944439 2.944439 334 +visitor 1 49 3.044522 3.044522 371 +must 1 40 3.258097 3.258097 442 +game 1 36 3.367296 3.367296 498 +return 2 34 3.401197 6.802394 502 +obtain 1 33 3.433987 3.433987 534 +neural 2 30 3.555348 7.110696 578 +domain 1 30 3.555348 3.555348 564 +postal 1 30 3.555348 3.555348 580 +utc 2 27 3.637586 7.275172 629 +task 4 25 3.737670 14.950680 678 +sport 1 25 3.737670 3.737670 683 +decis 4 23 3.806662 15.226648 728 +sequenc 1 23 3.806662 3.806662 734 +sequenti 1 22 3.850148 3.850148 745 +rout 1 21 3.912023 3.912023 793 +alloc 1 20 3.951244 3.951244 821 +agent 2 18 4.060443 8.120886 910 +upon 1 16 4.174387 4.174387 978 +action 3 15 4.248495 12.745485 1038 +universityof 1 15 4.248495 4.248495 1061 +dave 1 14 4.317488 4.317488 1098 +finit 1 14 4.317488 4.317488 1106 +misc 1 13 4.382027 4.382027 1124 +evolv 1 12 4.465908 4.465908 1223 +enter 1 10 4.653960 4.653960 1454 +total 1 10 4.653960 4.653960 1398 +observ 1 9 4.753590 4.753590 1578 +character 1 8 4.875197 4.875197 1767 +canb 1 7 5.010635 5.010635 1846 +highest 1 4 5.568345 5.568345 2950 +thesystem 1 3 5.857933 5.857933 3881 +scenario 1 2 6.263398 6.263398 5524 +geneticalgorithm 1 2 6.263398 6.263398 5673 +amparticularli 1 2 6.263398 6.263398 5558 +unavail 1 2 6.263398 6.263398 5046 +tulan 1 2 6.263398 6.263398 5559 +moriarti 2 1 6.957497 13.914994 13728 +moriartydav 1 1 6.957497 6.957497 13729 +researchsequenti 1 1 6.957497 6.957497 13730 +problemsinclud 1 1 6.957497 6.957497 13731 +stateof 1 1 6.957497 6.957497 13732 +selectanoth 1 1 6.957497 6.957497 13733 +payoff 1 1 6.957497 6.957497 13734 +madeor 1 1 6.957497 6.957497 13735 +thesequ 1 1 6.957497 6.957497 13736 +cumulativepayoff 1 1 6.957497 6.957497 13737 +iscurr 1 1 6.957497 6.957497 13738 +costli 1 1 6.957497 6.957497 13739 +havestudi 1 1 6.957497 6.957497 13740 +constraintsatisfact 1 1 6.957497 6.957497 13741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ new file mode 100644 index 00000000..bf9b5db1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^mwbarnes^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +utexa 3 189 1.609438 4.828314 44 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +educ 1 86 2.484907 2.484907 191 +mondai 1 77 2.564949 2.564949 206 +decemb 1 80 2.564949 2.564949 215 +map 1 39 3.258097 3.258097 452 +campu 1 27 3.637586 3.637586 623 +mine 2 26 3.688879 7.377758 654 +reach 1 24 3.761200 3.761200 688 +edulast 1 17 4.110874 4.110874 927 +utah 2 9 4.753590 9.507180 1585 +eduresearch 1 6 5.164786 5.164786 2205 +trail 1 6 5.164786 5.164786 2071 +mehom 1 4 5.568345 5.568345 2979 +wade 3 1 6.957497 20.872491 13742 +mwbarn 2 1 6.957497 13.914994 13743 +barnesm 1 1 6.957497 6.957497 13744 +barnesmwbarn 1 1 6.957497 6.957497 13745 +workhelp 1 1 6.957497 6.957497 13746 +pagestyp 1 1 6.957497 6.957497 13747 +literatureliteratur 1 1 6.957497 6.957497 13748 +notesclassesbackground 1 1 6.957497 6.957497 13749 +informationph 1 1 6.957497 6.957497 13750 +tanglebriar 1 1 6.957497 6.957497 13751 +yete 1 1 6.957497 6.957497 13752 +eduauthor 1 1 6.957497 6.957497 13753 +barnesemail 1 1 6.957497 6.957497 13754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ new file mode 100644 index 00000000..8998beea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ndale^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +professor 2 137 1.945910 3.891820 76 +lectur 1 135 1.945910 1.945910 73 +spring 1 131 2.079442 2.079442 88 +document 1 121 2.079442 2.079442 89 +teach 2 108 2.197225 4.394450 112 +pleas 2 113 2.197225 4.394450 114 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +sinc 1 90 2.397895 2.397895 159 +contain 3 81 2.484907 7.454721 174 +resum 1 79 2.564949 2.564949 217 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +window 1 68 2.708050 2.708050 242 +receiv 1 66 2.708050 2.708050 244 +abstract 1 62 2.772589 2.772589 276 +creat 1 63 2.772589 2.772589 277 +room 5 59 2.833213 14.166065 301 +summer 2 56 2.890372 5.780744 311 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +profession 1 51 2.995732 2.995732 345 +right 1 48 3.044522 3.044522 363 +author 2 39 3.258097 6.516194 450 +vita 1 38 3.295837 3.295837 473 +feel 1 37 3.332205 3.332205 483 +bibliographi 1 34 3.401197 3.401197 518 +curriculum 1 33 3.433987 3.433987 535 +travel 1 30 3.555348 3.555348 579 +chair 1 29 3.583519 3.583519 596 +full 1 28 3.610918 3.610918 615 +load 1 28 3.610918 3.610918 601 +reach 1 24 3.761200 3.761200 688 +brows 1 23 3.806662 3.806662 726 +reserv 1 20 3.951244 3.951244 808 +els 1 19 4.007333 4.007333 843 +spend 1 19 4.007333 4.007333 850 +account 1 18 4.060443 4.060443 882 +senior 1 14 4.317488 4.317488 1120 +settimeout 1 5 5.347108 5.347108 2536 +dale 4 4 5.568345 22.273380 2687 +seed 3 4 5.568345 16.705035 2984 +websit 1 4 5.568345 5.568345 2726 +timertwo 1 4 5.568345 5.568345 2985 +oftexa 1 4 5.568345 5.568345 3003 +whichcontain 1 4 5.568345 5.568345 2714 +scrollit_rl 1 3 5.857933 5.857933 3882 +retir 2 2 6.263398 12.526796 5674 +nell 3 1 6.957497 20.872491 13755 +pagesunivers 1 1 6.957497 6.957497 13756 +departmentwelcom 1 1 6.957497 6.957497 13757 +utaustin 1 1 6.957497 6.957497 13758 +fromful 1 1 6.957497 6.957497 13759 +falland 1 1 6.957497 6.957497 13760 +ofdissert 1 1 6.957497 6.957497 13761 +memento 1 1 6.957497 6.957497 13762 +nontechn 1 1 6.957497 6.957497 13763 +anycorrespond 1 1 6.957497 6.957497 13764 +ndale 1 1 6.957497 6.957497 13765 +profilepublicationsresearch 1 1 6.957497 6.957497 13766 +interestsperson 1 1 6.957497 6.957497 13767 +interestsnel 1 1 6.957497 6.957497 13768 +westlak 1 1 6.957497 6.957497 13769 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ new file mode 100644 index 00000000..78d0a604 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^neerajm^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +design 1 213 1.386294 1.386294 25 +support 1 132 1.945910 1.945910 83 +note 1 142 1.945910 1.945910 67 +pleas 1 113 2.197225 2.197225 114 +view 3 70 2.708050 8.124150 254 +browser 2 56 2.890372 5.780744 313 +without 1 50 3.044522 3.044522 370 +netscap 3 44 3.135494 9.406482 395 +keep 1 44 3.135494 3.135494 409 +download 1 36 3.367296 3.367296 489 +either 1 35 3.401197 3.401197 506 +mind 1 27 3.637586 3.637586 632 +background 1 25 3.737670 3.737670 664 +frame 2 24 3.761200 7.522400 684 +color 2 22 3.850148 7.700296 762 +navig 1 21 3.912023 3.912023 796 +choos 1 16 4.174387 4.174387 964 +pretti 1 13 4.382027 4.382027 1191 +latter 1 9 4.753590 4.753590 1522 +chosen 1 6 5.164786 5.164786 1984 +blame 1 3 5.857933 5.857933 3636 +neeraj 1 2 6.263398 6.263398 5577 +obnoxi 1 1 6.957497 6.957497 13770 +chartreus 1 1 6.957497 6.957497 13771 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ new file mode 100644 index 00000000..a09dfc6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ngk^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 2 160 1.791759 3.583518 64 +austin 1 168 1.791759 1.791759 63 +sciencesunivers 1 37 3.332205 3.332205 486 +log 1 19 4.007333 4.007333 857 +kumar 1 9 4.753590 4.753590 1506 +sciencedepart 1 6 5.164786 5.164786 2172 +natarajan 1 2 6.263398 6.263398 4377 +austini 1 2 6.263398 6.263398 5527 +gnan 1 1 6.957497 6.957497 13772 +pagegnana 1 1 6.957497 6.957497 13773 +edufind 1 1 6.957497 6.957497 13774 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ new file mode 100644 index 00000000..c017a6ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nimar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +number 1 130 2.079442 2.079442 97 +look 2 107 2.197225 4.394450 115 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +visitor 1 49 3.044522 3.044522 371 +term 1 43 3.178054 3.178054 411 +queri 1 33 3.433987 3.433987 524 +art 1 29 3.583519 3.583519 593 +quit 1 27 3.637586 3.637586 633 +altern 1 26 3.688879 3.688879 641 +output 1 21 3.912023 3.912023 788 +medic 1 17 4.110874 4.110874 958 +doesn 1 15 4.248495 4.248495 1055 +score 1 15 4.248495 4.248495 1017 +typic 1 11 4.553877 4.553877 1360 +hit 1 7 5.010635 5.010635 1965 +arora 2 4 5.568345 11.136690 2658 +ters 1 3 5.857933 5.857933 3297 +nimar 2 2 6.263398 12.526796 4188 +singh 1 2 6.263398 6.263398 5675 +knowwhat 1 2 6.263398 6.263398 5456 +clearer 1 2 6.263398 6.263398 5676 +bookmarksto 1 1 6.957497 6.957497 13775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ new file mode 100644 index 00000000..cd7b7a0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nl-acq^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +current 2 284 1.098612 2.197224 21 +languag 5 227 1.386294 6.931470 26 +group 4 183 1.609438 6.437752 36 +paper 3 205 1.609438 4.828314 38 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +network 1 168 1.791759 1.791759 61 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +machin 1 129 2.079442 2.079442 95 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +associ 1 93 2.397895 2.397895 151 +learn 3 86 2.484907 7.454721 170 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +resum 1 79 2.564949 2.564949 217 +meet 2 72 2.639057 5.278114 229 +prof 2 64 2.772589 5.545178 273 +wednesdai 1 64 2.772589 2.772589 261 +januari 1 62 2.772589 2.772589 264 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +week 1 52 2.995732 2.995732 343 +archiv 1 49 3.044522 3.044522 364 +natur 3 44 3.135494 9.406482 406 +discuss 2 45 3.135494 6.270988 399 +third 1 43 3.178054 3.178054 412 +probabl 1 40 3.258097 3.258097 455 +close 1 38 3.295837 3.295837 465 +ofth 1 36 3.367296 3.367296 491 +everi 1 34 3.401197 3.401197 519 +print 1 34 3.401197 3.401197 503 +neural 1 30 3.555348 3.555348 578 +particip 1 29 3.583519 3.583519 589 +usual 1 28 3.610918 3.610918 608 +propos 1 28 3.610918 3.610918 602 +utc 3 27 3.637586 10.912758 629 +previous 1 17 4.110874 4.110874 923 +coordin 1 13 4.382027 4.382027 1182 +mari 1 12 4.465908 4.465908 1266 +acquisit 1 10 4.653960 4.653960 1465 +moonei 1 9 4.753590 4.753590 1520 +risto 1 9 4.753590 4.753590 1523 +linguist 1 9 4.753590 4.753590 1593 +miikkulainen 1 8 4.875197 4.875197 1667 +thompson 1 6 5.164786 5.164786 2049 +elain 1 5 5.347108 5.347108 2496 +tang 1 5 5.347108 5.347108 2409 +hermjakob 3 3 5.857933 17.573799 3876 +poon 1 3 5.857933 5.857933 3820 +cindi 1 3 5.857933 5.857933 3830 +signll 1 3 5.857933 5.857933 3877 +groupat 1 2 6.263398 6.263398 5677 +bobbi 1 2 6.263398 6.263398 5678 +califf 1 2 6.263398 6.263398 5664 +marti 1 2 6.263398 6.263398 5679 +mayberri 1 2 6.263398 6.263398 5641 +rupert 1 2 6.263398 6.263398 5680 +acquist 1 1 6.957497 6.957497 13776 +groupnatur 1 1 6.957497 6.957497 13777 +austinw 1 1 6.957497 6.957497 13778 +acquisitionand 1 1 6.957497 6.957497 13779 +havedrawn 1 1 6.957497 6.957497 13780 +bryant 1 1 6.957497 6.957497 13781 +ataustinlast 1 1 6.957497 6.957497 13782 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ new file mode 100644 index 00000000..9e0c16a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^nn^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +structur 1 106 2.197225 2.197225 105 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +book 1 99 2.302585 2.302585 131 +sourc 1 77 2.564949 2.564949 201 +intellig 2 72 2.639057 5.278114 225 +artifici 2 63 2.772589 5.545178 280 +prof 1 64 2.772589 2.772589 273 +organ 1 65 2.772589 2.772589 265 +interact 1 62 2.772589 2.772589 270 +function 1 62 2.772589 2.772589 275 +detail 1 57 2.890372 2.890372 321 +visitor 1 49 3.044522 3.044522 371 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +neural 3 30 3.555348 10.666044 578 +utc 2 27 3.637586 7.275172 629 +decis 1 23 3.806662 3.806662 728 +self 1 22 3.850148 3.850148 761 +alumni 1 21 3.912023 3.912023 807 +newsgroup 1 21 3.912023 3.912023 783 +supervis 1 20 3.951244 3.951244 840 +hypertext 1 19 4.007333 4.007333 865 +concentr 1 18 4.060443 4.060443 906 +demo 1 18 4.060443 4.060443 888 +later 1 15 4.248495 4.248495 1043 +evolv 1 12 4.465908 4.465908 1223 +genet 1 10 4.653960 4.653960 1409 +ataustin 1 9 4.753590 4.753590 1610 +risto 1 9 4.753590 4.753590 1523 +miikkulainen 1 8 4.875197 4.875197 1667 +poster 1 7 5.010635 5.010635 1814 +schema 1 6 5.164786 5.164786 1988 +groupth 1 5 5.347108 5.347108 2549 +net 2 4 5.568345 11.136690 2741 +episod 1 4 5.568345 5.568345 2747 +cortic 1 3 5.857933 5.857933 3857 +privat 1 3 5.857933 5.857933 3496 +andcognit 1 2 6.263398 6.263398 5681 +ristomiikkulainen 1 1 6.957497 6.957497 13783 +basedvis 1 1 6.957497 6.957497 13784 +mapbelow 1 1 6.957497 6.957497 13785 +thecortex 1 1 6.957497 6.957497 13786 +linkswusagemartym 1 1 6.957497 6.957497 13787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ new file mode 100644 index 00000000..c88406d7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^novak^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +program 4 374 0.693147 2.772588 7 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +us 2 329 1.098612 2.197224 16 +offic 2 299 1.098612 2.197224 13 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 2 168 1.791759 3.583518 63 +problem 2 147 1.945910 3.891820 75 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +graphic 2 90 2.397895 4.795790 147 +associ 1 93 2.397895 2.397895 151 +internet 1 83 2.484907 2.484907 186 +activ 1 84 2.484907 2.484907 182 +server 2 76 2.564949 5.129898 204 +interfac 1 79 2.564949 2.564949 209 +state 1 76 2.564949 2.564949 207 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +solv 1 73 2.639057 2.639057 234 +interact 3 62 2.772589 8.317767 270 +artifici 2 63 2.772589 5.545178 280 +creat 1 63 2.772589 2.772589 277 +automat 3 61 2.833213 8.499639 306 +physic 3 47 3.091042 9.273126 377 +vita 1 38 3.295837 3.295837 473 +connect 1 37 3.332205 3.332205 485 +common 1 30 3.555348 3.555348 574 +measur 1 28 3.610918 3.610918 609 +univ 1 28 3.610918 3.610918 617 +honor 1 23 3.806662 3.806662 729 +director 1 22 3.850148 3.850148 767 +unit 3 21 3.912023 11.736069 779 +expert 1 20 3.951244 3.951244 833 +lisp 6 18 4.060443 24.362658 897 +demo 5 18 4.060443 20.302215 888 +atth 1 15 4.248495 4.248495 1019 +english 1 15 4.248495 4.248495 1033 +draw 2 14 4.317488 8.634976 1086 +convert 1 13 4.382027 4.382027 1122 +speech 1 12 4.465908 4.465908 1222 +ofcomput 1 10 4.653960 4.653960 1442 +novak 3 9 4.753590 14.260770 1521 +reus 1 8 4.875197 4.875197 1661 +convers 1 8 4.875197 4.875197 1673 +gordon 3 6 5.164786 15.494358 2032 +shell 1 5 5.347108 5.347108 2353 +diagram 1 5 5.347108 5.347108 2346 +highest 1 4 5.568345 5.568345 2950 +intelligencec 1 4 5.568345 5.568345 2673 +isaac 1 3 5.857933 5.857933 3855 +compilersc 1 2 6.263398 6.263398 4237 +intelligencelaboratori 1 1 6.957497 6.957497 13788 +genericalgorithmssolv 1 1 6.957497 6.957497 13789 +specifiedinformallyartifici 1 1 6.957497 6.957497 13790 +intelligencecurriculum 1 1 6.957497 6.957497 13791 +publicationsemploymentgrantsprofession 1 1 6.957497 6.957497 13792 +honorscurriculum 1 1 6.957497 6.957497 13793 +vitaefre 1 1 6.957497 6.957497 13794 +tmycin 1 1 6.957497 6.957497 13795 +emycin 1 1 6.957497 6.957497 13796 +lispconvers 1 1 6.957497 6.957497 13797 +measurementsoftwar 1 1 6.957497 6.957497 13798 +schemec 1 1 6.957497 6.957497 13799 +programmingweb 1 1 6.957497 6.957497 13800 +linksweatheraddress 1 1 6.957497 6.957497 13801 +ctai 1 1 6.957497 6.957497 13802 +austinaustintexa 1 1 6.957497 6.957497 13803 +faxnovak 1 1 6.957497 6.957497 13804 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ new file mode 100644 index 00000000..d36d6db1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^obrien^ @@ -0,0 +1,52 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +welcom 1 122 2.079442 2.079442 99 +pleas 1 113 2.197225 2.197225 114 +mani 1 92 2.397895 2.397895 150 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +come 1 78 2.564949 2.564949 202 +want 1 79 2.564949 2.564949 199 +resum 1 79 2.564949 2.564949 217 +free 1 73 2.639057 2.639057 224 +august 1 66 2.708050 2.708050 257 +date 1 51 2.995732 2.995732 344 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +get 1 46 3.091042 3.091042 380 +paul 1 38 3.295837 3.295837 471 +feel 1 37 3.332205 3.332205 483 +download 1 36 3.367296 3.367296 489 +hope 1 28 3.610918 3.610918 610 +except 1 28 3.610918 3.610918 607 +thank 1 23 3.806662 3.806662 721 +size 1 23 3.806662 3.806662 713 +sent 1 22 3.850148 3.850148 763 +beauti 1 18 4.060443 4.060443 912 +anyon 1 17 4.110874 4.110874 916 +stock 1 16 4.174387 4.174387 1007 +wait 1 13 4.382027 4.382027 1168 +remov 1 12 4.465908 4.465908 1225 +enter 1 10 4.653960 4.653960 1454 +chanc 1 7 5.010635 5.010635 1960 +poster 1 7 5.010635 5.010635 1814 +hear 1 7 5.010635 5.010635 1940 +marri 1 7 5.010635 5.010635 1946 +feet 1 5 5.347108 5.347108 2492 +blow 1 5 5.347108 5.347108 2407 +complaint 1 4 5.568345 5.568345 2795 +queen 1 4 5.568345 5.568345 2919 +laugh 1 3 5.857933 5.857933 3659 +panic 2 2 6.263398 12.526796 5682 +gorgeou 1 2 6.263398 6.263398 5082 +meghan 2 1 6.957497 13.914994 13805 +insult 2 1 6.957497 13.914994 13806 +brienhi 1 1 6.957497 6.957497 13807 +wipe 1 1 6.957497 6.957497 13808 +crappi 1 1 6.957497 6.957497 13809 +obrien 1 1 6.957497 6.957497 13810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ new file mode 100644 index 00000000..3744584c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oguer^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +databas 1 122 2.079442 2.079442 86 +theunivers 1 21 3.912023 3.912023 797 +systemsth 1 3 5.857933 5.857933 3835 +oguer 2 1 6.957497 13.914994 13811 +gutierrezogu 1 1 6.957497 6.957497 13812 +gutierrezth 1 1 6.957497 6.957497 13813 +austinprojectsomioswwhlinksconfer 1 1 6.957497 6.957497 13814 +worldemail 1 1 6.957497 6.957497 13815 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ new file mode 100644 index 00000000..67df7ead --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^oops^ @@ -0,0 +1,210 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 5 443 0.693147 3.465735 6 +research 4 431 0.693147 2.772588 10 +interest 3 384 0.693147 2.079441 11 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +time 5 293 1.098612 5.493060 17 +us 3 329 1.098612 3.295836 16 +cours 2 273 1.098612 2.197224 15 +also 4 259 1.386294 5.545176 28 +languag 3 227 1.386294 4.158882 26 +gener 2 220 1.386294 2.772588 27 +graduat 1 215 1.386294 1.386294 31 +paper 6 205 1.609438 9.656628 38 +group 3 183 1.609438 4.828314 36 +includ 3 208 1.609438 4.828314 42 +list 2 201 1.609438 3.218876 39 +utexa 1 189 1.609438 1.609438 44 +avail 5 169 1.791759 8.958795 48 +distribut 2 162 1.791759 3.583518 51 +develop 2 174 1.791759 3.583518 53 +implement 2 152 1.791759 3.583518 52 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +object 4 138 1.945910 7.783640 79 +file 3 132 1.945910 5.837730 70 +note 2 142 1.945910 3.891820 67 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +like 1 132 1.945910 1.945910 81 +compil 4 122 2.079442 8.317768 96 +introduct 3 126 2.079442 6.238326 87 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +manag 3 114 2.197225 6.591675 125 +code 3 108 2.197225 6.591675 116 +site 3 106 2.197225 6.591675 119 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +memori 7 101 2.302585 16.118095 139 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +real 3 93 2.397895 7.193685 144 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +larg 2 82 2.484907 4.969814 168 +info 1 85 2.484907 2.484907 176 +contain 1 81 2.484907 2.484907 174 +thing 1 84 2.484907 2.484907 189 +master 3 76 2.564949 7.694847 216 +sourc 3 77 2.564949 7.694847 201 +interfac 2 79 2.564949 5.129898 209 +orient 2 80 2.564949 5.129898 205 +good 2 77 2.564949 5.129898 200 +dynam 1 76 2.564949 2.564949 194 +refer 1 78 2.564949 2.564949 203 +effici 2 73 2.639057 5.278114 233 +write 1 72 2.639057 2.639057 222 +onlin 1 75 2.639057 2.639057 223 +materi 1 75 2.639057 2.639057 221 +workshop 1 71 2.639057 2.639057 239 +free 1 73 2.639057 2.639057 224 +integr 1 67 2.708050 2.708050 245 +collect 4 65 2.772589 11.090356 268 +virtual 2 62 2.772589 5.545178 285 +descript 2 64 2.772589 5.545178 271 +prof 1 64 2.772589 2.772589 273 +written 1 63 2.772589 2.772589 278 +improv 1 62 2.772589 2.772589 289 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +thesi 3 57 2.890372 8.671116 327 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +extens 3 53 2.944439 8.833317 340 +three 1 54 2.944439 2.944439 330 +local 1 55 2.944439 2.944439 334 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +pointer 2 48 3.044522 6.089044 368 +standard 2 48 3.044522 6.089044 365 +basic 1 50 3.044522 3.044522 360 +done 1 47 3.091042 3.091042 381 +adapt 1 46 3.091042 3.091042 387 +mark 1 44 3.135494 3.135494 403 +keep 1 44 3.135494 3.135494 409 +anoth 1 45 3.135494 3.135494 408 +cach 1 41 3.218876 3.218876 432 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +paul 4 38 3.295837 13.183348 471 +open 1 38 3.295837 3.295837 469 +especi 3 36 3.367296 10.101888 496 +survei 2 35 3.401197 6.802394 513 +michael 1 35 3.401197 3.401197 514 +bibliographi 1 34 3.401197 3.401197 518 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +hard 1 30 3.555348 3.555348 563 +travel 1 30 3.555348 3.555348 579 +releas 1 28 3.610918 3.610918 616 +progress 1 28 3.610918 3.610918 598 +great 1 27 3.637586 3.637586 626 +although 1 25 3.737670 3.737670 667 +store 3 24 3.761200 11.283600 693 +interpret 2 24 3.761200 7.522400 686 +mike 1 24 3.761200 3.761200 703 +highli 1 23 3.806662 3.806662 725 +thread 1 23 3.806662 3.806662 722 +brows 1 23 3.806662 3.806662 726 +hierarchi 1 22 3.850148 3.850148 744 +try 1 22 3.850148 3.850148 764 +scheme 8 20 3.951244 31.609952 818 +alloc 3 20 3.951244 11.853732 821 +supervis 1 20 3.951244 3.951244 840 +portabl 1 20 3.951244 3.951244 819 +mostli 1 19 4.007333 4.007333 869 +scott 1 18 4.060443 4.060443 884 +behavior 1 18 4.060443 4.060443 881 +debug 1 17 4.110874 4.110874 944 +coupl 1 17 4.110874 4.110874 939 +intro 1 17 4.110874 4.110874 915 +georg 1 16 4.174387 4.174387 994 +stock 1 16 4.174387 4.174387 1007 +brief 1 16 4.174387 4.174387 1001 +carl 3 15 4.248495 12.745485 1024 +anywai 1 15 4.248495 4.248495 1047 +ascii 1 15 4.248495 4.248495 1032 +draft 2 14 4.317488 8.634976 1085 +anonym 1 14 4.317488 4.317488 1100 +stephen 3 11 4.553877 13.661631 1342 +persist 3 11 4.553877 13.661631 1367 +smart 1 11 4.553877 4.553877 1352 +alpha 1 11 4.553877 4.553877 1348 +henri 1 10 4.653960 4.653960 1417 +wilson 4 9 4.753590 19.014360 1536 +oop 2 8 4.875197 9.750394 1778 +besid 1 8 4.875197 4.875197 1681 +readm 1 8 4.875197 4.875197 1699 +baker 1 7 5.010635 5.010635 1812 +garbag 5 6 5.164786 25.823930 1986 +oopsla 1 6 5.164786 5.164786 2221 +dougla 1 5 5.347108 5.347108 2471 +decad 1 5 5.347108 5.347108 2455 +overload 1 5 5.347108 5.347108 2403 +whichcontain 2 4 5.568345 11.136690 2714 +ajit 1 3 5.857933 5.857933 3299 +qing 1 3 5.857933 5.857933 3295 +swizzl 1 3 5.857933 5.857933 3883 +andoper 1 3 5.857933 5.857933 3621 +forfault 1 3 5.857933 5.857933 3748 +rscheme 1 3 5.857933 5.857933 3250 +tosupport 1 3 5.857933 5.857933 3613 +providesa 1 3 5.857933 5.857933 3884 +heap 1 3 5.857933 5.857933 3123 +collector 3 2 6.263398 18.790194 5683 +sheetal 2 2 6.263398 12.526796 5684 +kakkad 2 2 6.263398 12.526796 5685 +donovan 2 2 6.263398 12.526796 4371 +kolbl 2 2 6.263398 12.526796 4372 +macro 2 2 6.263398 12.526796 5686 +johnston 1 2 6.263398 6.263398 5638 +repair 1 2 6.263398 6.263398 4198 +damag 1 2 6.263398 6.263398 5687 +checkpoint 1 2 6.263398 6.263398 4205 +programmingsystem 1 2 6.263398 6.263398 5688 +socket 1 2 6.263398 6.263398 4725 +materiali 1 2 6.263398 6.263398 4214 +subdirectori 1 2 6.263398 6.263398 4133 +han 1 2 6.263398 6.263398 4535 +neeli 2 1 6.957497 13.914994 13816 +groupoop 1 1 6.957497 6.957497 13817 +groupthi 1 1 6.957497 6.957497 13818 +studentsin 1 1 6.957497 6.957497 13819 +kaplan 1 1 6.957497 6.957497 13820 +wieren 1 1 6.957497 6.957497 13821 +toimplement 1 1 6.957497 6.957497 13822 +whichattempt 1 1 6.957497 6.957497 13823 +unsoundstudi 1 1 6.957497 6.957497 13824 +generationaland 1 1 6.957497 6.957497 13825 +ongarbag 1 1 6.957497 6.957497 13826 +managementfor 1 1 6.957497 6.957497 13827 +andcompress 1 1 6.957497 6.957497 13828 +noteson 1 1 6.957497 6.957497 13829 +rawascii 1 1 6.957497 6.957497 13830 +andrschemear 1 1 6.957497 6.957497 13831 +thesiscontain 1 1 6.957497 6.957497 13832 +whicharen 1 1 6.957497 6.957497 13833 +sometimesoon 1 1 6.957497 6.957497 13834 +htmlformat 1 1 6.957497 6.957497 13835 +materialfrom 1 1 6.957497 6.957497 13836 +expandedpresent 1 1 6.957497 6.957497 13837 +texinfo 1 1 6.957497 6.957497 13838 +metaobject 1 1 6.957497 6.957497 13839 +backgroundread 1 1 6.957497 6.957497 13840 +fortexa 1 1 6.957497 6.957497 13841 +sftp 1 1 6.957497 6.957497 13842 +notb 1 1 6.957497 6.957497 13843 +boehm 1 1 6.957497 6.957497 13844 +severalgarbag 1 1 6.957497 6.957497 13845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ new file mode 100644 index 00000000..a18566fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^otu^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +phone 1 175 1.791759 1.791759 45 +robert 1 30 3.555348 3.555348 567 +otuomagieaddress 1 1 6.957497 6.957497 13846 +emailotu 1 1 6.957497 6.957497 13847 +eduuniververs 1 1 6.957497 6.957497 13848 +infouniversityth 1 1 6.957497 6.957497 13849 +txa 1 1 6.957497 6.957497 13850 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ new file mode 100644 index 00000000..bede486f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^padgett^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 2 380 0.693147 1.386294 9 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +current 1 284 1.098612 1.098612 21 +languag 4 227 1.386294 5.545176 26 +softwar 3 220 1.386294 4.158882 30 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +class 2 199 1.609438 3.218876 37 +utexa 1 189 1.609438 1.609438 44 +avail 5 169 1.791759 8.958795 48 +implement 2 152 1.791759 3.583518 52 +austin 2 168 1.791759 3.583518 63 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +file 10 132 1.945910 19.459100 70 +construct 3 139 1.945910 5.837730 82 +professor 1 137 1.945910 1.945910 76 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +postscript 1 131 2.079442 2.079442 90 +specif 5 106 2.197225 10.986125 106 +follow 2 92 2.397895 4.795790 143 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +control 2 82 2.484907 4.969814 164 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +requir 1 81 2.484907 2.484907 167 +interfac 4 79 2.564949 10.259796 209 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +view 3 70 2.708050 8.124150 254 +window 1 68 2.708050 2.708050 242 +virtual 7 62 2.772589 19.408123 285 +creat 1 63 2.772589 2.772589 277 +variou 1 56 2.890372 2.890372 317 +investig 1 51 2.995732 2.995732 353 +physic 2 47 3.091042 6.182084 377 +featur 1 46 3.091042 3.091042 386 +prototyp 1 38 3.295837 3.295837 463 +microsoft 1 38 3.295837 3.295837 468 +manual 1 35 3.401197 3.401197 504 +dissert 1 32 3.465736 3.465736 549 +compon 5 30 3.555348 17.776740 570 +domain 2 30 3.555348 7.110696 564 +focu 1 30 3.555348 3.555348 571 +specifi 1 30 3.555348 3.555348 568 +effort 1 26 3.688879 3.688879 652 +reduc 1 22 3.850148 3.850148 759 +thu 1 21 3.912023 3.912023 773 +devic 9 16 4.174387 37.569483 1002 +brown 1 16 4.174387 4.174387 977 +draft 1 14 4.317488 4.317488 1085 +transpar 1 11 4.553877 4.553877 1325 +devis 1 10 4.653960 4.653960 1451 +researchi 1 8 4.875197 4.875197 1756 +driver 1 8 4.875197 4.875197 1657 +counter 1 8 4.875197 4.875197 1765 +creation 1 6 5.164786 5.164786 2069 +andimplement 1 4 5.568345 5.568345 3029 +multifunct 1 3 5.857933 5.857933 3826 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +export 2 2 6.263398 12.526796 5689 +manualfor 1 2 6.263398 6.263398 4720 +padgett 2 1 6.957497 13.914994 13851 +padgettdon 1 1 6.957497 6.957497 13852 +softar 1 1 6.957497 6.957497 13853 +powerpointvers 1 1 6.957497 6.957497 13854 +usafax 1 1 6.957497 6.957497 13855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ new file mode 100644 index 00000000..d5b673d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pahardin^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 3 431 0.693147 2.079441 10 +system 2 443 0.693147 1.386294 6 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +softwar 2 220 1.386294 2.772588 30 +gener 1 220 1.386294 1.386294 27 +utexa 2 189 1.609438 3.218876 44 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +austin 3 168 1.791759 5.375277 63 +avail 1 169 1.791759 1.791759 48 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +model 2 145 1.945910 3.891820 69 +file 1 132 1.945910 1.945910 70 +area 1 144 1.945910 1.945910 80 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +access 1 102 2.302585 2.302585 136 +imag 1 91 2.397895 2.397895 161 +graphic 1 90 2.397895 2.397895 147 +librari 2 87 2.484907 4.969814 181 +school 1 84 2.484907 2.484907 188 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +window 1 68 2.708050 2.708050 242 +main 1 67 2.708050 2.708050 256 +plan 2 65 2.772589 5.545178 272 +automat 1 61 2.833213 2.833213 306 +space 1 57 2.890372 2.890372 310 +standard 1 48 3.044522 3.044522 365 +cool 1 49 3.044522 3.044522 374 +get 1 46 3.091042 3.091042 380 +execut 1 45 3.135494 3.135494 404 +realli 1 40 3.258097 3.258097 444 +game 2 36 3.367296 6.734592 498 +tree 2 36 3.367296 6.734592 492 +go 1 33 3.433987 3.433987 529 +handl 1 24 3.761200 3.761200 685 +inth 1 22 3.850148 3.850148 741 +binari 2 20 3.951244 7.902488 823 +wrote 1 20 3.951244 3.951244 830 +geometr 2 19 4.007333 8.014666 852 +partit 1 16 4.174387 4.174387 984 +unfortun 1 13 4.382027 4.382027 1170 +solari 1 12 4.465908 4.465908 1238 +guess 1 10 4.653960 4.653960 1443 +elimin 1 9 4.753590 4.753590 1558 +fail 1 8 4.875197 4.875197 1655 +port 1 8 4.875197 4.875197 1766 +reus 1 8 4.875197 4.875197 1661 +bug 1 7 5.010635 5.010635 1801 +philip 1 6 5.164786 5.164786 2005 +templat 1 5 5.347108 5.347108 2311 +anda 1 5 5.347108 5.347108 2416 +suno 1 4 5.568345 5.568345 2790 +screenshot 1 4 5.568345 5.568345 2743 +campbel 1 3 5.857933 5.857933 3272 +meemail 1 3 5.857933 5.857933 3821 +edupost 1 3 5.857933 5.857933 3822 +everywher 1 2 6.263398 6.263398 5690 +curli 1 2 6.263398 6.263398 5691 +battlebal 3 1 6.957497 20.872491 13856 +hardinphilip 1 1 6.957497 6.957497 13857 +hardinabout 1 1 6.957497 6.957497 13858 +fallback 1 1 6.957497 6.957497 13859 +multiplay 1 1 6.957497 6.957497 13860 +runsund 1 1 6.957497 6.957497 13861 +graphicssoftwar 1 1 6.957497 6.957497 13862 +programmingto 1 1 6.957497 6.957497 13863 +pahardin 1 1 6.957497 6.957497 13864 +usanetrek 1 1 6.957497 6.957497 13865 +pita 1 1 6.957497 6.957497 13866 +digitaldisast 1 1 6.957497 6.957497 13867 +plaster 1 1 6.957497 6.957497 13868 +congradul 1 1 6.957497 6.957497 13869 +smartest 1 1 6.957497 6.957497 13870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ new file mode 100644 index 00000000..fb42bf5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^paulmcq^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +utexa 1 189 1.609438 1.609438 44 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +schedul 1 119 2.079442 2.079442 85 +check 1 115 2.197225 2.197225 118 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +learn 2 86 2.484907 4.969814 170 +librari 1 87 2.484907 2.484907 181 +exampl 1 77 2.564949 2.564949 195 +addit 1 74 2.639057 2.639057 228 +intellig 1 72 2.639057 2.639057 225 +onlin 1 75 2.639057 2.639057 223 +knowledg 1 67 2.708050 2.708050 243 +interact 1 62 2.772589 2.772589 270 +taylor 1 63 2.772589 2.772589 287 +artifici 1 63 2.772589 2.772589 280 +think 1 57 2.890372 2.890372 314 +pointer 1 48 3.044522 3.044522 368 +natur 1 44 3.135494 3.135494 406 +mechan 1 43 3.178054 3.178054 416 +might 1 41 3.218876 3.218876 426 +paul 1 38 3.295837 3.295837 471 +postal 1 30 3.555348 3.555348 580 +neural 1 30 3.555348 3.555348 578 +usual 1 28 3.610918 3.610918 608 +head 1 23 3.806662 3.806662 732 +reflect 1 15 4.248495 4.248495 1034 +dave 1 14 4.317488 4.317488 1098 +hotlist 1 13 4.382027 4.382027 1199 +pascal 1 12 4.465908 4.465908 1213 +evolut 4 11 4.553877 18.215508 1314 +surf 1 11 4.553877 4.553877 1301 +death 1 10 4.653960 4.653960 1457 +handi 1 6 5.164786 5.164786 2111 +mix 1 6 5.164786 5.164786 2200 +studentdepart 1 5 5.347108 5.347108 2505 +explicitli 1 5 5.347108 5.347108 2308 +seriou 1 5 5.347108 5.347108 2252 +wast 1 5 5.347108 5.347108 2537 +austindepart 1 4 5.568345 5.568345 3008 +websit 1 4 5.568345 5.568345 2726 +neuro 1 2 6.263398 6.263398 4265 +mcquestenpaul 1 1 6.957497 6.957497 13871 +mcquestenphd 1 1 6.957497 6.957497 13872 +bepract 1 1 6.957497 6.957497 13873 +paulmcq 1 1 6.957497 6.957497 13874 +forcsp 1 1 6.957497 6.957497 13875 +programmingmor 1 1 6.957497 6.957497 13876 +inmoriarti 1 1 6.957497 6.957497 13877 +atcnr 1 1 6.957497 6.957497 13878 +rome 1 1 6.957497 6.957497 13879 +tout 1 1 6.957497 6.957497 13880 +winer 1 1 6.957497 6.957497 13881 +cynb 1 1 6.957497 6.957497 13882 +humong 1 1 6.957497 6.957497 13883 +knick 1 1 6.957497 6.957497 13884 +knack 1 1 6.957497 6.957497 13885 +nut 1 1 6.957497 6.957497 13886 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ new file mode 100644 index 00000000..8e9cf696 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pawang^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +utexa 3 189 1.609438 4.828314 44 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +group 1 183 1.609438 1.609438 36 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +network 1 168 1.791759 1.791759 61 +file 1 132 1.945910 1.945910 70 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +summari 1 73 2.639057 2.639057 237 +multimedia 4 68 2.708050 10.832200 258 +finger 1 52 2.995732 2.995732 354 +get 1 46 3.091042 3.091042 380 +protocol 1 45 3.135494 3.135494 407 +log 1 19 4.007333 4.007333 857 +affili 1 13 4.382027 4.382027 1194 +touch 1 12 4.465908 4.465908 1288 +goyal 1 3 5.857933 5.857933 3268 +pawang 3 1 6.957497 20.872491 13887 +pawan 1 1 6.957497 6.957497 13888 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ new file mode 100644 index 00000000..ce86c44c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pecina^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +depart 3 457 0.693147 2.079441 12 +interest 3 384 0.693147 2.079441 11 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +current 3 284 1.098612 3.295836 21 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +also 3 259 1.386294 4.158882 28 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +group 4 183 1.609438 6.437752 36 +utexa 2 189 1.609438 3.218876 44 +paper 1 205 1.609438 1.609438 38 +austin 7 168 1.791759 12.542313 63 +texa 3 160 1.791759 5.375277 64 +parallel 2 169 1.791759 3.583518 60 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +theori 3 111 2.197225 6.591675 127 +center 1 88 2.397895 2.397895 158 +journal 1 83 2.484907 2.484907 183 +complet 1 77 2.564949 2.564949 208 +master 1 76 2.564949 2.564949 216 +want 1 79 2.564949 2.564949 199 +main 1 67 2.708050 2.708050 256 +previou 1 62 2.772589 2.772589 290 +explor 2 58 2.890372 5.780744 324 +thesi 1 57 2.890372 2.890372 327 +publish 1 57 2.890372 2.890372 326 +scientif 2 53 2.944439 5.888878 341 +three 1 54 2.944439 2.944439 330 +advisor 2 51 2.995732 5.991464 355 +numer 2 49 3.044522 6.089044 369 +visitor 1 49 3.044522 3.044522 371 +physic 5 47 3.091042 15.455210 377 +algebra 1 45 3.135494 3.135494 394 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +societi 1 40 3.258097 3.258097 456 +open 1 38 3.295837 3.295837 469 +vita 1 38 3.295837 3.295837 473 +field 1 37 3.332205 3.332205 482 +represent 2 35 3.401197 6.802394 512 +print 1 34 3.401197 3.401197 503 +obtain 2 33 3.433987 6.867974 534 +curriculum 1 33 3.433987 3.433987 535 +dissert 2 32 3.465736 6.931472 549 +posit 1 31 3.496508 3.496508 552 +spent 1 25 3.737670 3.737670 676 +finish 1 22 3.850148 3.850148 748 +sequenti 1 22 3.850148 3.850148 745 +half 1 21 3.912023 3.912023 776 +previous 1 17 4.110874 4.110874 923 +germani 1 17 4.110874 4.110874 946 +jose 2 16 4.174387 8.348774 976 +georg 1 16 4.174387 4.174387 994 +joint 1 13 4.382027 4.382027 1130 +econom 1 13 4.382027 4.382027 1184 +mellon 1 13 4.382027 4.382027 1179 +usavoic 1 13 4.382027 4.382027 1198 +calcul 1 12 4.465908 4.465908 1268 +carnegi 1 12 4.465908 4.465908 1260 +fill 1 11 4.553877 4.553877 1349 +cryptographi 1 9 4.753590 4.753590 1512 +rel 1 9 4.753590 4.753590 1487 +invari 2 8 4.875197 9.750394 1748 +pittsburgh 1 7 5.010635 5.010635 1938 +quantum 2 6 5.164786 10.329572 2214 +particl 1 5 5.347108 5.347108 2436 +invers 1 4 5.568345 5.568345 2764 +sudarshan 1 3 5.857933 5.857933 3885 +supervisor 1 3 5.857933 5.857933 3754 +quantiz 1 2 6.263398 6.263398 5692 +irreduc 1 2 6.263398 6.263398 4890 +lemk 1 2 6.263398 6.263398 5693 +thephys 1 2 6.263398 6.263398 5694 +symmetri 1 2 6.263398 6.263398 5517 +pecina 2 1 6.957497 13.914994 13889 +orpecina 2 1 6.957497 13.914994 13890 +pecinaabout 1 1 6.957497 6.957497 13891 +innuclear 1 1 6.957497 6.957497 13892 +workedinvestig 1 1 6.957497 6.957497 13893 +gaug 1 1 6.957497 6.957497 13894 +graviti 1 1 6.957497 6.957497 13895 +gravit 1 1 6.957497 6.957497 13896 +unitari 1 1 6.957497 6.957497 13897 +yuval 1 1 6.957497 6.957497 13898 +eman 1 1 6.957497 6.957497 13899 +jurgen 1 1 6.957497 6.957497 13900 +fromcologn 1 1 6.957497 6.957497 13901 +bureau 1 1 6.957497 6.957497 13902 +geologi 1 1 6.957497 6.957497 13903 +seismic 1 1 6.957497 6.957497 13904 +tomographi 1 1 6.957497 6.957497 13905 +hardag 1 1 6.957497 6.957497 13906 +geophys 1 1 6.957497 6.957497 13907 +geophysicist 1 1 6.957497 6.957497 13908 +comerci 1 1 6.957497 6.957497 13909 +solutionsin 1 1 6.957497 6.957497 13910 +chromodynamicsmi 1 1 6.957497 6.957497 13911 +defo 1 1 6.957497 6.957497 13912 +phy 1 1 6.957497 6.957497 13913 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ new file mode 100644 index 00000000..b3730f65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^pkn^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +file 1 132 1.945910 1.945910 70 +welcom 1 122 2.079442 2.079442 99 +find 2 111 2.197225 4.394450 111 +make 1 111 2.197225 2.197225 120 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +stuff 1 87 2.484907 2.484907 171 +chang 1 82 2.484907 2.484907 163 +know 3 80 2.564949 7.694847 198 +want 1 79 2.564949 2.564949 199 +name 1 72 2.639057 2.639057 220 +would 2 67 2.708050 5.416100 251 +window 1 68 2.708050 2.708050 242 +unix 2 58 2.890372 5.780744 308 +think 1 57 2.890372 2.890372 314 +cool 1 49 3.044522 3.044522 374 +could 1 46 3.091042 3.091042 383 +realli 1 40 3.258097 3.258097 444 +mean 1 37 3.332205 3.332205 477 +short 1 36 3.367296 3.367296 499 +staff 1 36 3.367296 3.367296 490 +experiment 1 26 3.688879 3.688879 645 +instal 1 22 3.850148 3.850148 754 +increas 1 20 3.951244 3.951244 829 +anyon 1 17 4.110874 4.110874 916 +protect 1 17 4.110874 4.110874 935 +drive 1 15 4.248495 4.248495 1052 +floor 1 14 4.317488 4.317488 1070 +stori 1 14 4.317488 4.317488 1087 +comic 1 14 4.317488 4.317488 1103 +neat 1 12 4.465908 4.465908 1263 +true 1 10 4.653960 4.653960 1422 +vista 1 10 4.653960 4.653960 1452 +chanc 1 7 5.010635 5.010635 1960 +escap 1 4 5.568345 5.568345 3016 +meyour 1 3 5.857933 5.857933 3858 +blah 1 2 6.263398 6.263398 5695 +drastic 1 2 6.263398 6.263398 4201 +woof 2 1 6.957497 13.914994 13914 +nettl 1 1 6.957497 6.957497 13915 +cornerinfolik 1 1 6.957497 6.957497 13916 +blahblah 1 1 6.957497 6.957497 13917 +eeek 1 1 6.957497 6.957497 13918 +ibm 1 1 6.957497 6.957497 13919 +afteri 1 1 6.957497 6.957497 13920 +theinnoc 1 1 6.957497 6.957497 13921 +buena 1 1 6.957497 6.957497 13922 +movieplex 1 1 6.957497 6.957497 13923 +employan 1 1 6.957497 6.957497 13924 +improb 1 1 6.957497 6.957497 13925 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ new file mode 100644 index 00000000..815ce9f7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^plaxton^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +utexa 2 189 1.609438 3.218876 44 +modifi 1 178 1.609438 1.609438 35 +texa 2 160 1.791759 3.583518 64 +hall 2 146 1.945910 3.891820 65 +report 1 131 2.079442 2.079442 92 +decemb 1 80 2.564949 2.564949 215 +taylor 1 63 2.772589 2.772589 287 +annual 1 40 3.258097 3.258097 458 +postal 1 30 3.555348 3.555348 580 +greg 2 24 3.761200 7.522400 695 +eduphon 1 15 4.248495 4.248495 1060 +informationemail 1 9 4.753590 4.753590 1564 +austinaustin 1 7 5.010635 5.010635 1966 +plaxton 1 3 5.857933 5.857933 3886 +plaxtongreg 2 1 6.957497 13.914994 13926 +plaxtoncontact 1 1 6.957497 6.957497 13927 +sciencetaylor 1 1 6.957497 6.957497 13928 +profilepubl 1 1 6.957497 6.957497 13929 +plaxtonplaxton 1 1 6.957497 6.957497 13930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ new file mode 100644 index 00000000..68d4424a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^porter^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +public 2 202 1.609438 3.218876 43 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +base 3 165 1.791759 5.375277 50 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +machin 3 129 2.079442 6.238326 95 +postscript 1 131 2.079442 2.079442 90 +theori 1 111 2.197225 2.197225 127 +site 1 106 2.197225 2.197225 119 +search 5 95 2.397895 11.989475 155 +question 2 91 2.397895 4.795790 141 +select 2 91 2.397895 4.795790 154 +learn 5 86 2.484907 12.424535 170 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +intellig 2 72 2.639057 5.278114 225 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +knowledg 3 67 2.708050 8.124150 243 +abstract 2 62 2.772589 5.545178 276 +artifici 1 63 2.772589 2.772589 280 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +investig 1 51 2.995732 2.995732 353 +case 1 51 2.995732 2.995732 351 +california 1 46 3.091042 3.091042 388 +answer 2 45 3.135494 6.270988 391 +editor 1 41 3.218876 3.218876 433 +autom 1 41 3.218876 3.218876 434 +press 1 42 3.218876 3.218876 419 +award 1 34 3.401197 3.401197 523 +tech 1 35 3.401197 3.401197 515 +concept 1 32 3.465736 3.465736 537 +domain 1 30 3.555348 3.555348 564 +scale 1 28 3.610918 3.610918 613 +rule 1 26 3.688879 3.688879 638 +fellow 1 24 3.761200 3.761200 701 +honor 1 23 3.806662 3.806662 729 +predict 1 19 4.007333 4.007333 855 +young 1 16 4.174387 4.174387 991 +cambridg 1 16 4.174387 4.174387 1008 +weak 1 13 4.382027 4.382027 1159 +hotlist 1 13 4.382027 4.382027 1199 +bruce 2 12 4.465908 8.931816 1226 +classif 1 9 4.753590 4.753590 1586 +aaai 2 8 4.875197 9.750394 1750 +irvin 1 8 4.875197 4.875197 1660 +presidenti 1 8 4.875197 4.875197 1737 +boundari 1 7 5.010635 5.010635 1929 +heurist 1 6 5.164786 5.164786 2125 +porter 4 5 5.347108 21.388432 2293 +complementari 1 5 5.347108 5.347108 2523 +presentarea 1 4 5.568345 5.568345 3026 +thetim 1 3 5.857933 5.857933 3581 +preced 1 3 5.857933 5.857933 3107 +researchinterest 1 2 6.263398 6.263398 5123 +rickel 1 2 6.263398 6.263398 5648 +andpostscript 1 2 6.263398 6.263398 5696 +brant 1 2 6.263398 6.263398 5652 +warrant 1 2 6.263398 6.263398 5697 +bareiss 1 2 6.263398 6.263398 5646 +porterassoci 1 1 6.957497 6.957497 13931 +interestartifici 1 1 6.957497 6.957497 13932 +researchhead 1 1 6.957497 6.957497 13933 +basesand 1 1 6.957497 6.957497 13934 +aait 1 1 6.957497 6.957497 13935 +holt 1 1 6.957497 6.957497 13936 +abstractand 1 1 6.957497 6.957497 13937 +reportport 1 1 6.957497 6.957497 13938 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ new file mode 100644 index 00000000..08c46053 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^psp^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 8 374 0.693147 5.545176 7 +work 4 380 0.693147 2.772588 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +graduat 6 215 1.386294 8.317764 31 +design 2 213 1.386294 2.772588 25 +link 1 247 1.386294 1.386294 24 +languag 1 227 1.386294 1.386294 26 +group 5 183 1.609438 8.047190 36 +paper 4 205 1.609438 6.437752 38 +list 3 201 1.609438 4.828314 39 +includ 2 208 1.609438 3.218876 42 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +parallel 5 169 1.791759 8.958795 60 +avail 2 169 1.791759 3.583518 48 +network 2 168 1.791759 3.583518 61 +texa 1 160 1.791759 1.791759 64 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +area 2 144 1.945910 3.891820 80 +note 2 142 1.945910 3.891820 67 +model 1 145 1.945910 1.945910 69 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +introduct 2 126 2.079442 4.158884 87 +studi 2 120 2.079442 4.158884 91 +compil 2 122 2.079442 4.158884 96 +theori 3 111 2.197225 6.591675 127 +structur 2 106 2.197225 4.394450 105 +specif 1 106 2.197225 2.197225 106 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +book 4 99 2.302585 9.210340 131 +present 6 91 2.397895 14.387370 145 +mani 2 92 2.397895 4.795790 150 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +member 1 84 2.484907 2.484907 165 +refer 1 78 2.564949 2.564949 203 +state 1 76 2.564949 2.564949 207 +exampl 1 77 2.564949 2.564949 195 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +logic 2 71 2.639057 5.278114 230 +write 1 72 2.639057 2.639057 222 +effici 1 73 2.639057 2.639057 233 +differ 2 66 2.708050 5.416100 253 +written 2 63 2.772589 5.545178 278 +foundat 1 62 2.772589 2.772589 286 +result 1 65 2.772589 2.772589 281 +improv 1 62 2.772589 2.772589 289 +simpl 1 60 2.833213 2.833213 298 +reason 1 57 2.890372 2.890372 318 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +detail 1 57 2.890372 2.890372 321 +overview 1 56 2.890372 2.890372 323 +thesi 1 57 2.890372 2.890372 327 +found 1 53 2.944439 2.944439 337 +give 1 50 3.044522 3.044522 359 +basic 1 50 3.044522 3.044522 360 +possibl 1 47 3.091042 3.091042 378 +electron 1 47 3.091042 3.091042 379 +understand 1 47 3.091042 3.091042 384 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +describ 1 45 3.135494 3.135494 400 +fast 1 42 3.218876 3.218876 429 +map 1 39 3.258097 3.258097 452 +correct 2 38 3.295837 6.591674 462 +concurr 1 34 3.401197 3.401197 501 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +chapter 1 32 3.465736 3.465736 536 +specifi 2 30 3.555348 7.110696 568 +power 1 30 3.555348 3.555348 573 +compon 1 30 3.555348 3.555348 570 +synchron 1 29 3.583519 3.583519 588 +aspect 1 25 3.737670 3.737670 663 +seri 1 24 3.761200 3.761200 708 +proof 3 23 3.806662 11.419986 720 +emphasi 1 22 3.850148 3.850148 755 +inth 1 22 3.850148 3.850148 741 +sort 1 22 3.850148 3.850148 738 +divis 1 21 3.912023 3.912023 803 +safeti 1 20 3.951244 3.951244 817 +assum 1 19 4.007333 4.007333 845 +prove 1 19 4.007333 4.007333 848 +stand 1 18 4.060443 4.060443 891 +adam 2 17 4.110874 8.221748 934 +former 1 17 4.110874 4.110874 956 +weslei 1 16 4.174387 4.174387 983 +reflect 1 15 4.248495 4.248495 1034 +hybrid 1 15 4.248495 4.248495 1057 +manner 1 14 4.317488 4.317488 1074 +balanc 1 14 4.317488 4.317488 1112 +circuit 2 13 4.382027 8.764054 1131 +recurs 1 13 4.382027 4.382027 1127 +composit 1 13 4.382027 4.382027 1150 +addison 1 12 4.465908 4.465908 1230 +verifi 1 12 4.465908 4.465908 1261 +summar 1 11 4.553877 4.553877 1295 +length 1 10 4.653960 4.653960 1400 +equal 1 10 4.653960 4.653960 1424 +arithmet 1 10 4.653960 4.653960 1388 +notat 5 9 4.753590 23.767950 1489 +ataustin 1 9 4.753590 4.753590 1610 +tempor 1 9 4.753590 4.753590 1584 +misra 7 7 5.010635 35.074445 1856 +restrict 1 6 5.164786 5.164786 2129 +multiprogram 1 6 5.164786 5.164786 2010 +jayadev 5 4 5.568345 27.841725 3006 +jacob 2 4 5.568345 11.136690 2667 +rigor 1 4 5.568345 5.568345 3030 +anddistribut 1 4 5.568345 5.568345 3031 +ofprogram 1 4 5.568345 5.568345 2624 +monograph 1 4 5.568345 5.568345 2860 +uniti 12 3 5.857933 70.295196 3812 +kornerup 3 3 5.857933 17.573799 3215 +marku 2 3 5.857933 11.715866 3872 +cohen 1 3 5.857933 5.857933 3652 +rajeev 1 3 5.857933 5.857933 3152 +checker 1 3 5.857933 5.857933 3644 +parallelalgorithm 1 3 5.857933 5.857933 3249 +alsoavail 1 3 5.857933 5.857933 3887 +powerlist 6 2 6.263398 37.580388 5660 +seuss 6 2 6.263398 37.580388 5662 +carruth 2 2 6.263398 12.526796 5495 +ofpap 2 2 6.263398 12.526796 4329 +erni 1 2 6.263398 6.263398 5104 +joshi 1 2 6.263398 6.263398 4202 +chandi 1 2 6.263398 6.263398 5661 +fourier 1 2 6.263398 6.263398 5698 +offspr 1 2 6.263398 6.263398 5699 +froma 1 2 6.263398 6.263398 4862 +basedprogram 1 2 6.263398 6.263398 5700 +kaltenbach 2 1 6.957497 13.914994 13939 +thepowerlist 2 1 6.957497 13.914994 13940 +austinpsp 1 1 6.957497 6.957497 13941 +austinthi 1 1 6.957497 6.957497 13942 +deriveparallel 1 1 6.957497 6.957497 13943 +issupervis 1 1 6.957497 6.957497 13944 +groupinclud 1 1 6.957497 6.957497 13945 +edgar 1 1 6.957497 6.957497 13946 +knapp 1 1 6.957497 6.957497 13947 +ingolf 1 1 6.957497 6.957497 13948 +krger 1 1 6.957497 6.957497 13949 +josyula 1 1 6.957497 6.957497 13950 +staskauska 1 1 6.957497 6.957497 13951 +publicationsbelow 1 1 6.957497 6.957497 13952 +wherev 1 1 6.957497 6.957497 13953 +topap 1 1 6.957497 6.957497 13954 +thenot 1 1 6.957497 6.957497 13955 +inchandi 1 1 6.957497 6.957497 13956 +amanuscript 1 1 6.957497 6.957497 13957 +newun 1 1 6.957497 6.957497 13958 +operatorco 1 1 6.957497 6.957497 13959 +forrefer 1 1 6.957497 6.957497 13960 +asymbol 1 1 6.957497 6.957497 13961 +forfinit 1 1 6.957497 6.957497 13962 +unityverifi 1 1 6.957497 6.957497 13963 +toinclud 1 1 6.957497 6.957497 13964 +twodiffer 1 1 6.957497 6.957497 13965 +succinct 1 1 6.957497 6.957497 13966 +givesnumer 1 1 6.957497 6.957497 13967 +batcher 1 1 6.957497 6.957497 13968 +asadd 1 1 6.957497 6.957497 13969 +multipli 1 1 6.957497 6.957497 13970 +addercircuit 1 1 6.957497 6.957497 13971 +programscan 1 1 6.957497 6.957497 13972 +speciallyhypercub 1 1 6.957497 6.957497 13973 +caninterfer 1 1 6.957497 6.957497 13974 +adisciplin 1 1 6.957497 6.957497 13975 +genrat 1 1 6.957497 6.957497 13976 +callsfor 1 1 6.957497 6.957497 13977 +anexperi 1 1 6.957497 6.957497 13978 +ingolfkrg 1 1 6.957497 6.957497 13979 +thepsp 1 1 6.957497 6.957497 13980 +sitejacob 1 1 6.957497 6.957497 13981 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ new file mode 100644 index 00000000..495e0f51 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qiming^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +email 2 220 1.386294 2.772588 29 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +modifi 1 178 1.609438 1.609438 35 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +phone 1 175 1.791759 1.791759 45 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +construct 1 139 1.945910 1.945910 82 +spring 1 131 2.079442 2.079442 88 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +center 2 88 2.397895 4.795790 158 +comment 2 93 2.397895 4.795790 146 +commun 1 95 2.397895 2.397895 157 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +server 1 76 2.564949 2.564949 204 +appli 1 71 2.639057 2.639057 226 +line 1 75 2.639057 2.639057 231 +name 1 72 2.639057 2.639057 220 +visit 1 63 2.772589 2.772589 288 +room 1 59 2.833213 2.833213 301 +march 1 61 2.833213 2.833213 295 +electr 1 38 3.295837 3.295837 461 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +client 1 25 3.737670 3.737670 679 +sign 1 16 4.174387 4.174387 970 +guest 4 12 4.465908 17.863632 1220 +huang 2 12 4.465908 8.931816 1202 +card 1 10 4.653960 4.653960 1435 +placement 1 10 4.653960 4.653960 1420 +telecommun 1 9 4.753590 4.753590 1565 +job 1 8 4.875197 4.875197 1702 +postcard 1 6 5.164786 5.164786 2181 +attract 1 5 5.347108 5.347108 2356 +hawaii 1 3 5.857933 5.857933 3888 +csc 1 3 5.857933 5.857933 3183 +nankai 1 2 6.263398 6.263398 5147 +tianjin 1 2 6.263398 6.263398 5148 +junk 1 2 6.263398 6.263398 5701 +qime 4 1 6.957497 27.829988 13982 +edureceiv 1 1 6.957497 6.957497 13983 +univeris 1 1 6.957497 6.957497 13984 +manoa 1 1 6.957497 6.957497 13985 +hawaiiwork 1 1 6.957497 6.957497 13986 +austincours 1 1 6.957497 6.957497 13987 +teamweb 1 1 6.957497 6.957497 13988 +utcssadaili 1 1 6.957497 6.957497 13989 +texanstock 1 1 6.957497 6.957497 13990 +picturesimageschines 1 1 6.957497 6.957497 13991 +popsend 1 1 6.957497 6.957497 13992 +jobtrakut 1 1 6.957497 6.957497 13993 +gopherftp 1 1 6.957497 6.957497 13994 +newstelnet 1 1 6.957497 6.957497 13995 +cschen 1 1 6.957497 6.957497 13996 +staffyour 1 1 6.957497 6.957497 13997 +commentsguest 1 1 6.957497 6.957497 13998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ new file mode 100644 index 00000000..1dd1e1e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qr^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 5 431 0.693147 3.465735 10 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +paper 5 205 1.609438 8.047190 38 +group 2 183 1.609438 3.218876 36 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +texa 1 160 1.791759 1.791759 64 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +professor 1 137 1.945910 1.945910 76 +document 1 121 2.079442 2.079442 89 +world 2 115 2.197225 4.394450 126 +access 2 102 2.302585 4.605170 136 +user 1 104 2.302585 2.302585 137 +part 1 98 2.302585 2.302585 129 +book 1 99 2.302585 2.302585 131 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +abstract 1 62 2.772589 2.772589 276 +visit 1 63 2.772589 2.772589 288 +reason 7 57 2.890372 20.232604 318 +index 3 56 2.890372 8.671116 309 +sever 1 56 2.890372 2.890372 322 +pointer 1 48 3.044522 3.044522 368 +visitor 1 49 3.044522 3.044522 371 +directori 2 45 3.135494 6.270988 396 +robot 1 36 3.367296 3.367296 497 +bibliographi 3 34 3.401197 10.203591 518 +represent 1 35 3.401197 3.401197 512 +dissert 2 32 3.465736 6.931472 549 +limit 1 29 3.583519 3.583519 585 +alumni 1 21 3.912023 3.912023 807 +supervis 1 20 3.951244 3.951244 840 +spatial 1 16 4.174387 4.174387 988 +atth 1 15 4.248495 4.248495 1019 +easili 1 14 4.317488 4.317488 1077 +qualit 5 11 4.553877 22.769385 1362 +tour 1 11 4.553877 4.553877 1307 +ataustin 1 9 4.753590 4.753590 1610 +yellow 1 9 4.753590 4.753590 1601 +qsim 3 3 5.857933 17.573799 3862 +kuiper 1 3 5.857933 5.857933 3794 +thephys 1 2 6.263398 6.263398 5694 +ourresearch 3 1 6.957497 20.872491 13999 +utexasqualit 1 1 6.957497 6.957497 14000 +utexasth 1 1 6.957497 6.957497 14001 +intelligentrobot 1 1 6.957497 6.957497 14002 +knowledgerepresent 1 1 6.957497 6.957497 14003 +algernon 1 1 6.957497 6.957497 14004 +benjaminkuip 1 1 6.957497 6.957497 14005 +areadescript 1 1 6.957497 6.957497 14006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ new file mode 100644 index 00000000..07c2966f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^qzuo^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +click 1 142 1.945910 1.945910 78 +welcom 2 122 2.079442 4.158884 99 +introduct 1 126 2.079442 2.079442 87 +pleas 3 113 2.197225 6.591675 114 +technic 1 100 2.302585 2.302585 140 +comment 1 93 2.397895 2.397895 146 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +institut 1 84 2.484907 2.484907 187 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +free 1 73 2.639057 2.639057 224 +window 2 68 2.708050 5.416100 242 +java 1 70 2.708050 2.708050 248 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +back 1 60 2.833213 2.833213 297 +unix 1 58 2.890372 2.890372 308 +suggest 1 53 2.944439 2.944439 331 +still 1 50 3.044522 3.044522 362 +around 1 43 3.178054 3.178054 415 +feel 1 37 3.332205 3.332205 483 +game 1 36 3.367296 3.367296 498 +copyright 1 36 3.367296 3.367296 495 +john 2 33 3.433987 6.867974 532 +linux 1 27 3.637586 3.637586 631 +sport 1 25 3.737670 3.737670 683 +thank 2 23 3.806662 7.613324 721 +brows 1 23 3.806662 3.806662 726 +self 1 22 3.850148 3.850148 761 +leav 1 21 3.912023 3.912023 772 +els 1 19 4.007333 4.007333 843 +modif 1 17 4.110874 4.110874 913 +joke 1 8 4.875197 4.875197 1620 +settimeout 1 5 5.347108 5.347108 2536 +guestbook 1 5 5.347108 5.347108 2475 +seed 3 4 5.568345 16.705035 2984 +timertwo 1 4 5.568345 5.568345 2985 +scrollit_rl 1 3 5.857933 5.857933 3882 +underconstruct 1 3 5.857933 5.857933 3889 +com 1 2 6.263398 6.263398 5156 +qiang 3 1 6.957497 20.872491 14007 +seriousjunk 1 1 6.957497 6.957497 14008 +realjunk 1 1 6.957497 6.957497 14009 +struggleforliv 1 1 6.957497 6.957497 14010 +qzuo 1 1 6.957497 6.957497 14011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ new file mode 100644 index 00000000..b7f6a5db --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rdb^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +last 2 314 1.098612 2.197224 14 +offic 1 299 1.098612 1.098612 13 +gener 2 220 1.386294 2.772588 27 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +paper 2 205 1.609438 3.218876 38 +utexa 2 189 1.609438 3.218876 44 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +texa 3 160 1.791759 5.375277 64 +austin 1 168 1.791759 1.791759 63 +avail 1 169 1.791759 1.791759 48 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +technolog 2 131 2.079442 4.158884 102 +compil 1 122 2.079442 2.079442 96 +document 1 121 2.079442 2.079442 89 +spring 1 131 2.079442 2.079442 88 +teach 1 108 2.197225 2.197225 112 +institut 2 84 2.484907 4.969814 187 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +laboratori 1 63 2.772589 2.772589 292 +abstract 1 62 2.772589 2.772589 276 +taylor 1 63 2.772589 2.772589 287 +type 1 61 2.833213 2.833213 296 +semest 1 58 2.890372 2.890372 312 +directori 1 45 3.135494 3.135494 396 +robert 2 30 3.555348 7.110696 567 +experiment 1 26 3.688879 3.688879 645 +runtim 1 19 4.007333 4.007333 858 +less 1 18 4.060443 4.060443 892 +brown 1 16 4.174387 4.174387 977 +eduphon 1 15 4.248495 4.248495 1060 +massachusett 2 14 4.317488 8.634976 1118 +ofcomput 1 10 4.653960 4.653960 1442 +informationemail 1 9 4.753590 4.753590 1564 +pronounc 1 7 5.010635 5.010635 1918 +austinaustin 1 7 5.010635 5.010635 1966 +blumoferdb 1 5 5.347108 5.347108 2324 +bloom 1 4 5.568345 5.568345 2913 +sciencestaylor 1 3 5.857933 5.857933 3814 +bobbi 1 2 6.263398 6.263398 5678 +informationassist 1 2 6.263398 6.263398 5531 +blumoferobert 1 1 6.957497 6.957497 14012 +blumofei 1 1 6.957497 6.957497 14013 +cilkmultithread 1 1 6.957497 6.957497 14014 +hallpost 1 1 6.957497 6.957497 14015 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ new file mode 100644 index 00000000..6c35c27a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rhwang^ @@ -0,0 +1,42 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 3 293 1.098612 3.295836 17 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +construct 1 139 1.945910 1.945910 82 +analysi 1 124 2.079442 2.079442 98 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +homepag 1 93 2.397895 2.397895 148 +real 1 93 2.397895 2.397895 144 +comment 1 93 2.397895 2.397895 146 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +content 1 59 2.833213 2.833213 302 +rule 1 26 3.688879 3.688879 638 +brows 1 23 3.806662 3.806662 726 +wang 1 21 3.912023 3.912023 790 +theunivers 1 21 3.912023 3.912023 797 +permit 1 16 4.174387 4.174387 962 +candid 1 9 4.753590 4.753590 1606 +publicationsi 1 3 5.857933 5.857933 3827 +wangwelcom 1 1 6.957497 6.957497 14016 +rhwang 1 1 6.957497 6.957497 14017 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ new file mode 100644 index 00000000..70437837 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^risto^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +cours 5 273 1.098612 5.493060 15 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +graduat 4 215 1.386294 5.545176 31 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +fall 4 181 1.609438 6.437752 40 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +network 5 168 1.791759 8.958795 61 +austin 3 168 1.791759 5.375277 63 +address 2 170 1.791759 3.583518 62 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +texa 1 160 1.791759 1.791759 64 +lectur 5 135 1.945910 9.729550 73 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +spring 2 131 2.079442 4.158884 88 +technolog 1 131 2.079442 2.079442 102 +mathemat 1 108 2.197225 2.197225 123 +memori 1 101 2.302585 2.302585 139 +intellig 3 72 2.639057 7.917171 225 +appli 1 71 2.639057 2.639057 226 +solv 1 73 2.639057 2.639057 234 +goal 1 66 2.708050 2.708050 250 +artifici 4 63 2.772589 11.090356 280 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +automat 1 61 2.833213 2.833213 306 +detail 1 57 2.890372 2.890372 321 +undergradu 3 54 2.944439 8.833317 338 +finger 1 52 2.995732 2.995732 354 +visual 1 48 3.044522 3.044522 372 +vision 1 41 3.218876 3.218876 430 +seminar 2 38 3.295837 6.591674 470 +robot 1 36 3.367296 3.367296 497 +neural 6 30 3.555348 21.332088 578 +computersci 1 30 3.555348 3.555348 562 +postal 1 30 3.555348 3.555348 580 +utc 1 27 3.637586 3.637586 629 +strategi 1 25 3.737670 3.737670 682 +decis 1 23 3.806662 3.806662 728 +self 1 22 3.850148 3.850148 761 +concentr 1 18 4.060443 4.060443 906 +cognit 2 16 4.174387 8.348774 986 +evolv 1 12 4.465908 4.465908 1223 +genet 1 10 4.653960 4.653960 1409 +risto 2 9 4.753590 9.507180 1523 +schema 1 6 5.164786 5.164786 1988 +ucla 1 5 5.347108 5.347108 2502 +oftexa 1 4 5.568345 5.568345 3003 +episod 1 4 5.568345 5.568345 2747 +intereststh 1 3 5.857933 5.857933 3838 +cortex 1 3 5.857933 5.857933 3856 +helsinki 1 2 6.263398 6.263398 5702 +miikkulainenristo 1 1 6.957497 6.957497 14018 +miikkulainenassoci 1 1 6.957497 6.957497 14019 +processeswith 1 1 6.957497 6.957497 14020 +languageacquisit 1 1 6.957497 6.957497 14021 +networkswith 1 1 6.957497 6.957497 14022 +discoversequenti 1 1 6.957497 6.957497 14023 +classessumm 1 1 6.957497 6.957497 14024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ new file mode 100644 index 00000000..873848b7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rou^ @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +rong 1 2 6.263398 6.263398 5703 +bigfoot 1 1 6.957497 6.957497 14025 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ new file mode 100644 index 00000000..58caa27f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rraj^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +algorithm 3 162 1.791759 5.375277 57 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +spring 1 131 2.079442 2.079442 88 +theori 1 111 2.197225 2.197225 127 +member 1 84 2.484907 2.484907 165 +complet 1 77 2.564949 2.564949 208 +onlin 1 75 2.639057 2.639057 223 +plan 1 65 2.772589 2.772589 272 +vita 1 38 3.295837 3.295837 473 +random 1 34 3.401197 3.401197 511 +curriculum 1 33 3.433987 3.433987 535 +postal 1 30 3.555348 3.555348 580 +particularli 1 19 4.007333 4.007333 867 +atth 1 15 4.248495 4.248495 1019 +ataustin 1 9 4.753590 4.753590 1610 +andcomput 1 8 4.875197 4.875197 1623 +supervisor 1 3 5.857933 5.857933 3754 +rajaraman 2 2 6.263398 12.526796 5704 +rraj 2 2 6.263398 12.526796 5705 +rajmohan 1 2 6.263398 6.263398 5706 +mydissert 1 2 6.263398 6.263398 5496 +mypubl 1 2 6.263398 6.263398 5707 +linkscontact 1 2 6.263398 6.263398 5708 +pagerajmohan 1 1 6.957497 6.957497 14026 +gregplaxton 1 1 6.957497 6.957497 14027 +incombinator 1 1 6.957497 6.957497 14028 +sciencemiscellan 1 1 6.957497 6.957497 14029 +ephon 1 1 6.957497 6.957497 14030 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ new file mode 100644 index 00000000..73093400 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rtan^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 2 215 1.386294 2.772588 31 +fall 1 181 1.609438 1.609438 40 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 4 168 1.791759 7.167036 63 +texa 1 160 1.791759 1.791759 64 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +introduct 2 126 2.079442 4.158884 87 +technolog 1 131 2.079442 2.079442 102 +final 1 116 2.197225 2.197225 108 +mathemat 1 108 2.197225 2.197225 123 +peopl 1 96 2.302585 2.302585 132 +homepag 3 93 2.397895 7.193685 148 +librari 1 87 2.484907 2.484907 181 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +colleg 1 61 2.833213 2.833213 300 +telephon 1 50 3.044522 3.044522 373 +movi 4 40 3.258097 13.032388 459 +live 1 40 3.258097 3.258097 451 +ofth 1 36 3.367296 3.367296 491 +computersci 1 30 3.555348 3.555348 562 +weather 1 28 3.610918 3.610918 618 +spent 1 25 3.737670 3.737670 676 +yahoo 1 24 3.761200 3.761200 707 +beij 1 19 4.007333 4.007333 876 +five 1 19 4.007333 4.007333 841 +citi 1 19 4.007333 4.007333 874 +tsinghua 1 13 4.382027 4.382027 1195 +replic 1 12 4.465908 4.465908 1231 +entertain 1 12 4.465908 4.465908 1286 +catalog 1 10 4.653960 4.653960 1431 +login 1 9 4.753590 4.753590 1550 +film 1 8 4.875197 4.875197 1761 +capit 2 7 5.010635 10.021270 1957 +apart 1 7 5.010635 5.010635 1936 +chronicl 1 7 5.010635 5.010635 1952 +river 1 6 5.164786 5.164786 2220 +provinc 1 4 5.568345 5.568345 3009 +republ 1 4 5.568345 5.568345 3032 +sinanet 1 4 5.568345 5.568345 2883 +rong 2 2 6.263398 12.526796 5703 +zhai 1 2 6.263398 6.263398 5709 +utaccess 2 1 6.957497 13.914994 14031 +homepagea 1 1 6.957497 6.957497 14032 +chinachina 1 1 6.957497 6.957497 14033 +jinan 1 1 6.957497 6.957497 14034 +myhometown 1 1 6.957497 6.957497 14035 +shandong 1 1 6.957497 6.957497 14036 +gotmi 1 1 6.957497 6.957497 14037 +ofchina 1 1 6.957497 6.957497 14038 +texasaustin 1 1 6.957497 6.957497 14039 +rent 1 1 6.957497 6.957497 14040 +utcsth 1 1 6.957497 6.957497 14041 +utnetcat 1 1 6.957497 6.957497 14042 +browsabl 1 1 6.957497 6.957497 14043 +austininform 1 1 6.957497 6.957497 14044 +utcat 1 1 6.957497 6.957497 14045 +systemsdynam 1 1 6.957497 6.957497 14046 +graphicsc 1 1 6.957497 6.957497 14047 +logicc 1 1 6.957497 6.957497 14048 +moviesaustin 1 1 6.957497 6.957497 14049 +filmsmicrosoft 1 1 6.957497 6.957497 14050 +cinemania 1 1 6.957497 6.957497 14051 +onlineal 1 1 6.957497 6.957497 14052 +guidehollywood 1 1 6.957497 6.957497 14053 +onlineinternet 1 1 6.957497 6.957497 14054 +databaserog 1 1 6.957497 6.957497 14055 +ebert 1 1 6.957497 6.957497 14056 +moviesvisit 1 1 6.957497 6.957497 14057 +contactmail 1 1 6.957497 6.957497 14058 +aaustin 1 1 6.957497 6.957497 14059 +emailrtan 1 1 6.957497 6.957497 14060 +fingerclick 1 1 6.957497 6.957497 14061 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ new file mode 100644 index 00000000..09d24f89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rupert^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +machin 1 129 2.079442 2.079442 95 +teach 1 108 2.197225 2.197225 112 +need 1 98 2.302585 2.302585 135 +academ 1 82 2.484907 2.484907 178 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +know 1 80 2.564949 2.564949 198 +meet 1 72 2.639057 2.639057 229 +servic 1 72 2.639057 2.639057 236 +would 2 67 2.708050 5.416100 251 +differ 1 66 2.708050 2.708050 253 +dept 1 64 2.772589 2.772589 291 +think 2 57 2.890372 5.780744 314 +much 1 52 2.995732 2.995732 349 +life 2 50 3.044522 6.089044 375 +cool 1 49 3.044522 3.044522 374 +littl 2 39 3.258097 6.516194 454 +realli 1 40 3.258097 3.258097 444 +actual 1 28 3.610918 3.610918 604 +altern 1 26 3.688879 3.688879 641 +alwai 2 24 3.761200 7.522400 691 +wish 1 24 3.761200 3.761200 692 +almost 1 22 3.850148 3.850148 742 +entir 1 20 3.951244 3.951244 811 +nice 1 20 3.951244 3.951244 809 +modern 1 16 4.174387 4.174387 966 +anyth 1 16 4.174387 4.174387 998 +choos 1 16 4.174387 4.174387 964 +opportun 1 13 4.382027 4.382027 1161 +besid 1 8 4.875197 4.875197 1681 +tang 2 5 5.347108 10.694216 2409 +plant 1 5 5.347108 5.347108 2497 +aspir 1 4 5.568345 5.568345 3019 +fear 1 4 5.568345 5.568345 2911 +freedom 1 3 5.857933 5.857933 3890 +rupert 3 2 6.263398 18.790194 5680 +miracl 1 2 6.263398 6.263398 5710 +holi 1 2 6.263398 6.263398 5711 +stimul 1 2 6.263398 6.263398 5712 +empti 1 2 6.263398 6.263398 5478 +truck 1 2 6.263398 6.263398 5713 +wash 1 2 6.263398 6.263398 5714 +strangl 1 1 6.957497 6.957497 14062 +curious 1 1 6.957497 6.957497 14063 +inquiri 1 1 6.957497 6.957497 14064 +delic 1 1 6.957497 6.957497 14065 +depriv 1 1 6.957497 6.957497 14066 +distast 1 1 6.957497 6.957497 14067 +deni 1 1 6.957497 6.957497 14068 +duress 1 1 6.957497 6.957497 14069 +fate 1 1 6.957497 6.957497 14070 +messi 1 1 6.957497 6.957497 14071 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ new file mode 100644 index 00000000..f60895ce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ruweihu^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +homepag 1 93 2.397895 2.397895 148 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +frame 3 24 3.761200 11.283600 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +ruwei 1 1 6.957497 6.957497 14072 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ new file mode 100644 index 00000000..70a62f53 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^rvdg^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +project 3 340 1.098612 3.295836 18 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +public 3 202 1.609438 4.828314 43 +utexa 2 189 1.609438 3.218876 44 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +parallel 4 169 1.791759 7.167036 60 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +develop 2 174 1.791759 3.583518 53 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +analysi 1 124 2.079442 2.079442 98 +introduct 1 126 2.079442 2.079442 87 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +schedul 1 119 2.079442 2.079442 85 +mathemat 3 108 2.197225 6.591675 123 +well 1 109 2.197225 2.197225 121 +techniqu 2 99 2.302585 4.605170 138 +user 1 104 2.302585 2.302585 137 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +institut 1 84 2.484907 2.484907 187 +environ 1 84 2.484907 2.484907 177 +journal 1 83 2.484907 2.484907 183 +method 4 80 2.564949 10.259796 213 +april 1 77 2.564949 2.564949 196 +appli 2 71 2.639057 5.278114 226 +summari 1 73 2.639057 2.639057 237 +workshop 1 71 2.639057 2.639057 239 +meet 1 72 2.639057 2.639057 229 +evalu 1 64 2.772589 2.772589 266 +colleg 1 61 2.833213 2.833213 300 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +scientif 1 53 2.944439 2.944439 341 +case 2 51 2.995732 5.991464 351 +numer 2 49 3.044522 6.089044 369 +better 1 45 3.135494 3.135494 401 +http 1 41 3.218876 3.218876 420 +continu 1 39 3.258097 3.258097 448 +tutori 1 39 3.258097 3.258097 437 +robert 2 30 3.555348 7.110696 567 +effort 1 26 3.688879 3.688879 652 +supercomput 1 25 3.737670 3.737670 681 +famili 1 23 3.806662 3.806662 735 +prove 1 19 4.007333 4.007333 848 +former 1 17 4.110874 4.110874 956 +easili 1 14 4.317488 4.317488 1077 +infrastructur 1 12 4.465908 4.465908 1234 +forc 1 10 4.653960 4.653960 1384 +maryland 1 6 5.164786 5.164786 2140 +park 1 6 5.164786 5.164786 2218 +plapack 1 3 5.857933 5.857933 3849 +geijn 1 2 6.263398 6.263398 5715 +appliedmathemat 1 2 6.263398 6.263398 5716 +interestnumer 1 2 6.263398 6.263398 5717 +researchth 1 2 6.263398 6.263398 5492 +rvdg 2 1 6.957497 13.914994 14073 +geijnassoci 1 1 6.957497 6.957497 14074 +oftradit 1 1 6.957497 6.957497 14075 +sequentialmachin 1 1 6.957497 6.957497 14076 +inoth 1 1 6.957497 6.957497 14077 +researchconcentr 1 1 6.957497 6.957497 14078 +forimpl 1 1 6.957497 6.957497 14079 +allowssuch 1 1 6.957497 6.957497 14080 +parallelprocessor 1 1 6.957497 6.957497 14081 +intercom 1 1 6.957497 6.957497 14082 +sl_librari 1 1 6.957497 6.957497 14083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ new file mode 100644 index 00000000..fa786495 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sak^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +time 2 293 1.098612 2.197224 17 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +contact 1 153 1.791759 1.791759 59 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +know 1 80 2.564949 2.564949 198 +window 1 68 2.708050 2.708050 242 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +visitor 1 49 3.044522 3.044522 371 +india 3 32 3.465736 10.397208 550 +computersci 1 30 3.555348 3.555348 562 +els 1 19 4.007333 4.007333 843 +region 1 19 4.007333 4.007333 875 +universityof 1 15 4.248495 4.248495 1061 +countri 1 15 4.248495 4.248495 1059 +settimeout 1 5 5.347108 5.347108 2536 +seed 3 4 5.568345 16.705035 2984 +abraham 1 4 5.568345 5.568345 2644 +timertwo 1 4 5.568345 5.568345 2985 +engg 1 4 5.568345 5.568345 2884 +scrollit_rl 1 3 5.857933 5.857933 3882 +kerala 1 3 5.857933 5.857933 3749 +sciencesand 1 2 6.263398 6.263398 4711 +hail 1 2 6.263398 6.263398 5583 +sundeep 1 1 6.957497 6.957497 14084 +sundeepabraham 1 1 6.957497 6.957497 14085 +calicut 1 1 6.957497 6.957497 14086 +tinkerwith 1 1 6.957497 6.957497 14087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ new file mode 100644 index 00000000..b2e3ec33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sammy^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +univers 4 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 3 457 0.693147 2.079441 12 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +project 2 340 1.098612 2.197224 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +utexa 2 189 1.609438 3.218876 44 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +texa 5 160 1.791759 8.958795 64 +austin 5 168 1.791759 8.958795 63 +parallel 2 169 1.791759 3.583518 60 +address 2 170 1.791759 3.583518 62 +distribut 1 162 1.791759 1.791759 51 +construct 2 139 1.945910 3.891820 82 +hall 1 146 1.945910 1.945910 65 +schedul 1 119 2.079442 2.079442 85 +compil 1 122 2.079442 2.079442 96 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +world 2 115 2.197225 4.394450 126 +make 1 111 2.197225 2.197225 120 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +advanc 1 99 2.302585 2.302585 130 +wide 2 84 2.484907 4.969814 185 +librari 2 87 2.484907 4.969814 181 +activ 1 84 2.484907 2.484907 182 +info 1 85 2.484907 2.484907 176 +refer 2 78 2.564949 5.129898 203 +state 1 76 2.564949 2.564949 207 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +intellig 1 72 2.639057 2.639057 225 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +prof 2 64 2.772589 5.545178 273 +artifici 1 63 2.772589 2.772589 280 +experi 1 64 2.772589 2.772589 283 +virtual 1 62 2.772589 2.772589 285 +guid 1 63 2.772589 2.772589 267 +taylor 1 63 2.772589 2.772589 287 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +algebra 1 45 3.135494 3.135494 394 +press 1 42 3.218876 3.218876 419 +linear 1 41 3.218876 3.218876 431 +map 1 39 3.258097 3.258097 452 +express 1 32 3.465736 3.465736 540 +robert 1 30 3.555348 3.555348 567 +weather 1 28 3.610918 3.610918 618 +packag 1 28 3.610918 3.610918 614 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +repres 1 26 3.688879 3.688879 656 +jeff 1 25 3.737670 3.737670 673 +hill 1 25 3.737670 3.737670 670 +yahoo 1 24 3.761200 3.761200 707 +famili 1 23 3.806662 3.806662 735 +miscellan 1 23 3.806662 3.806662 731 +unit 1 21 3.912023 3.912023 779 +lyco 1 19 4.007333 4.007333 871 +exercis 1 19 4.007333 4.007333 842 +hypertext 1 19 4.007333 4.007333 865 +boston 1 19 4.007333 4.007333 862 +white 1 17 4.110874 4.110874 951 +sign 1 16 4.174387 4.174387 970 +hotlist 1 13 4.382027 4.382027 1199 +entertain 1 12 4.465908 4.465908 1286 +america 1 11 4.553877 4.553877 1370 +consortium 1 10 4.653960 4.653960 1467 +swim 3 9 4.753590 14.260770 1599 +yellow 2 9 4.753590 9.507180 1601 +congress 1 9 4.753590 4.753590 1592 +respect 1 9 4.753590 4.753590 1545 +establish 1 9 4.753590 4.753590 1532 +govern 1 9 4.753590 4.753590 1581 +calvin 1 9 4.753590 4.753590 1518 +yanni 1 8 4.875197 4.875197 1713 +hockei 1 8 4.875197 4.875197 1760 +opinion 1 8 4.875197 4.875197 1708 +chronicl 1 7 5.010635 5.010635 1952 +necessarili 1 7 5.010635 5.010635 1899 +altavista 1 6 5.164786 5.164786 2222 +constitut 1 6 5.164786 5.164786 2026 +quick 1 6 5.164786 5.164786 2184 +andrea 1 5 5.347108 5.347108 2375 +shall 1 3 5.857933 5.857933 3891 +abridg 1 3 5.857933 5.857933 3772 +freedom 1 3 5.857933 5.857933 3890 +plapack 1 3 5.857933 5.857933 3849 +health 1 3 5.857933 5.857933 3787 +concert 1 3 5.857933 5.857933 3533 +thereof 1 2 6.263398 6.263398 5484 +geijn 1 2 6.263398 6.263398 5715 +musician 1 2 6.263398 6.263398 5718 +hamilton 1 2 6.263398 6.263398 5719 +guyer 1 2 6.263398 6.263398 4171 +northwestern 1 2 6.263398 6.263398 5502 +nate 1 2 6.263398 6.263398 5720 +dell 1 2 6.263398 6.263398 4193 +fring 1 2 6.263398 6.263398 5721 +sammi 3 1 6.957497 20.872491 14088 +startingpoint 1 1 6.957497 6.957497 14089 +religion 1 1 6.957497 6.957497 14090 +orprohibit 1 1 6.957497 6.957497 14091 +ofspeech 1 1 6.957497 6.957497 14092 +peaceabl 1 1 6.957497 6.957497 14093 +toassembl 1 1 6.957497 6.957497 14094 +petit 1 1 6.957497 6.957497 14095 +redress 1 1 6.957497 6.957497 14096 +grievanc 1 1 6.957497 6.957497 14097 +herbarium 1 1 6.957497 6.957497 14098 +anagram 1 1 6.957497 6.957497 14099 +nil 1 1 6.957497 6.957497 14100 +reker 1 1 6.957497 6.957497 14101 +pop 1 1 6.957497 6.957497 14102 +anthropolog 1 1 6.957497 6.957497 14103 +kate 1 1 6.957497 6.957497 14104 +showbiz 1 1 6.957497 6.957497 14105 +pollstar 1 1 6.957497 6.957497 14106 +ryder 1 1 6.957497 6.957497 14107 +laptop 1 1 6.957497 6.957497 14108 +traveloc 1 1 6.957497 6.957497 14109 +eduth 1 1 6.957497 6.957497 14110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ new file mode 100644 index 00000000..7d402544 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sawada^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +offic 1 299 1.098612 1.098612 13 +time 1 293 1.098612 1.098612 17 +mail 2 238 1.386294 2.772588 22 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +utexa 1 189 1.609438 1.609438 44 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +address 3 170 1.791759 5.375277 62 +austin 3 168 1.791759 5.375277 63 +texa 1 160 1.791759 1.791759 64 +file 2 132 1.945910 3.891820 70 +hall 1 146 1.945910 1.945910 65 +report 1 131 2.079442 2.079442 92 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +method 2 80 2.564949 5.129898 213 +logic 1 71 2.639057 2.639057 230 +main 1 67 2.708050 2.708050 256 +taylor 1 63 2.772589 2.772589 287 +abstract 1 62 2.772589 2.772589 276 +frequent 1 49 3.044522 3.044522 367 +around 1 43 3.178054 3.178054 415 +edit 1 42 3.218876 3.218876 418 +formal 2 37 3.332205 6.664410 478 +dissert 1 32 3.465736 3.465736 549 +common 1 30 3.555348 3.555348 574 +propos 2 28 3.610918 7.221836 602 +univ 1 28 3.610918 3.610918 617 +fellow 1 24 3.761200 3.761200 701 +lisp 1 18 4.060443 4.060443 897 +boyer 1 6 5.164786 5.164786 2013 +sawada 1 3 5.857933 5.857933 3190 +oral 1 3 5.857933 5.857933 3189 +teacher 1 3 5.857933 5.857933 3892 +supplementari 1 2 6.263398 6.263398 4752 +bowen 1 2 6.263398 6.263398 4170 +sawadajun 1 1 6.957497 6.957497 14111 +sawadacontact 1 1 6.957497 6.957497 14112 +wooten 1 1 6.957497 6.957497 14113 +kbresourc 1 1 6.957497 6.957497 14114 +pvsother 1 1 6.957497 6.957497 14115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ new file mode 100644 index 00000000..70b2a094 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^schwartz^ @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +system 8 443 0.693147 5.545176 6 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +softwar 13 220 1.386294 18.021822 30 +gener 8 220 1.386294 11.090352 27 +design 3 213 1.386294 4.158882 25 +graduat 1 215 1.386294 1.386294 31 +group 2 183 1.609438 3.218876 36 +utexa 2 189 1.609438 3.218876 44 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +applic 3 170 1.791759 5.375277 56 +distribut 2 162 1.791759 3.583518 51 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +avail 1 169 1.791759 1.791759 48 +perform 2 143 1.945910 3.891820 74 +architectur 2 139 1.945910 3.891820 77 +lectur 2 135 1.945910 3.891820 73 +note 2 142 1.945910 3.891820 67 +file 2 132 1.945910 3.891820 70 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +tool 1 117 2.079442 2.079442 93 +databas 1 122 2.079442 2.079442 86 +postscript 1 131 2.079442 2.079442 90 +look 2 107 2.197225 4.394450 115 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +specif 1 106 2.197225 2.197225 106 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +mani 3 92 2.397895 7.193685 150 +question 1 91 2.397895 2.397895 141 +follow 1 92 2.397895 2.397895 143 +start 4 83 2.484907 9.939628 173 +member 3 84 2.484907 7.454721 165 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +contain 1 81 2.484907 2.484907 174 +decemb 2 80 2.564949 5.129898 215 +orient 1 80 2.564949 2.564949 205 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +order 2 69 2.708050 5.416100 249 +differ 1 66 2.708050 2.708050 253 +simul 1 66 2.708050 2.708050 255 +complex 2 64 2.772589 5.545178 269 +result 2 65 2.772589 5.545178 281 +improv 2 62 2.772589 5.545178 289 +visit 1 63 2.772589 2.772589 288 +creat 1 63 2.772589 2.772589 277 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +index 1 56 2.890372 2.890372 309 +overview 1 56 2.890372 2.890372 323 +basic 2 50 3.044522 6.089044 360 +approach 1 48 3.044522 3.044522 366 +get 3 46 3.091042 9.273126 380 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +term 1 43 3.178054 3.178054 411 +must 1 40 3.258097 3.258097 442 +tutori 1 39 3.258097 3.258097 437 +feel 1 37 3.332205 3.332205 483 +product 2 33 3.433987 6.867974 527 +independ 1 32 3.465736 3.465736 548 +transform 1 32 3.465736 3.465736 542 +titl 1 31 3.496508 3.496508 556 +compon 5 30 3.555348 17.776740 570 +domain 4 30 3.555348 14.221392 564 +scale 1 28 3.610918 3.610918 613 +releas 1 28 3.610918 3.610918 616 +utc 1 27 3.637586 3.637586 629 +manipul 1 27 3.637586 3.637586 624 +subject 1 26 3.688879 3.688879 647 +pattern 2 24 3.761200 7.522400 689 +demonstr 1 24 3.761200 3.761200 694 +scalabl 1 24 3.761200 3.761200 705 +equat 1 23 3.806662 3.806662 724 +compress 1 23 3.806662 3.806662 719 +defin 1 22 3.850148 3.850148 746 +recommend 1 22 3.850148 3.850148 737 +period 1 22 3.850148 3.850148 743 +thu 1 21 3.912023 3.912023 773 +reflect 1 15 4.248495 4.248495 1034 +goe 1 15 4.248495 4.248495 1044 +composit 1 13 4.382027 4.382027 1150 +dbm 1 13 4.382027 4.382027 1136 +assembl 1 12 4.465908 4.465908 1207 +refin 2 11 4.553877 9.107754 1363 +pagewelcom 1 11 4.553877 4.553877 1344 +typic 1 11 4.553877 4.553877 1360 +valid 1 11 4.553877 4.553877 1299 +evolut 1 11 4.553877 4.553877 1314 +modul 1 10 4.653960 4.653960 1434 +relationship 1 10 4.653960 4.653960 1383 +bart 1 9 4.753590 4.753590 1559 +reus 1 8 4.875197 4.875197 1661 +successfulli 1 7 5.010635 5.010635 1869 +beyond 1 7 5.010635 5.010635 1834 +deliv 1 6 5.164786 5.164786 2070 +batori 5 4 5.568345 27.841725 2690 +avion 1 4 5.568345 5.568345 3018 +substanti 1 4 5.568345 5.568345 2921 +metadata 1 4 5.568345 5.568345 2945 +breadth 1 4 5.568345 5.568345 2695 +interchang 1 3 5.857933 5.857933 3893 +tokuda 1 3 5.857933 5.857933 3266 +smaragdaki 1 3 5.857933 5.857933 3851 +lightweight 1 3 5.857933 5.857933 3234 +encapsul 2 2 6.263398 12.526796 5541 +reusabl 1 2 6.263398 6.263398 4218 +marti 1 2 6.263398 6.263398 5679 +genvoca 4 1 6.957497 27.829988 14116 +ssgrg 1 1 6.957497 6.957497 14117 +professorangela 1 1 6.957497 6.957497 14118 +dappert 1 1 6.957497 6.957497 14119 +studentguillermo 1 1 6.957497 6.957497 14120 +jimenez 1 1 6.957497 6.957497 14121 +perezph 1 1 6.957497 6.957497 14122 +studentjeff 1 1 6.957497 6.957497 14123 +thomasph 1 1 6.957497 6.957497 14124 +studentl 1 1 6.957497 6.957497 14125 +studentyanni 1 1 6.957497 6.957497 14126 +studentk 1 1 6.957497 6.957497 14127 +shepherdresearch 1 1 6.957497 6.957497 14128 +associateform 1 1 6.957497 6.957497 14129 +datesdinesh 1 1 6.957497 6.957497 14130 +dasph 1 1 6.957497 6.957497 14131 +milli 1 1 6.957497 6.957497 14132 +villarrealph 1 1 6.957497 6.957497 14133 +geracipostdoc 1 1 6.957497 6.957497 14134 +sirkinph 1 1 6.957497 6.957497 14135 +sankar 1 1 6.957497 6.957497 14136 +dasarim 1 1 6.957497 6.957497 14137 +starter 1 1 6.957497 6.957497 14138 +reengin 1 1 6.957497 6.957497 14139 +generatorsautom 1 1 6.957497 6.957497 14140 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ new file mode 100644 index 00000000..f2a9c61d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sfkaplan^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +home 5 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +inform 3 412 0.693147 2.079441 8 +work 3 380 0.693147 2.079441 9 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +gener 1 220 1.386294 1.386294 27 +group 2 183 1.609438 3.218876 36 +class 2 199 1.609438 3.218876 37 +updat 1 191 1.609438 1.609438 41 +utexa 1 189 1.609438 1.609438 44 +includ 1 208 1.609438 1.609438 42 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +like 2 132 1.945910 3.891820 81 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +machin 1 129 2.079442 2.079442 95 +look 2 107 2.197225 4.394450 115 +place 2 106 2.197225 4.394450 124 +check 2 115 2.197225 4.394450 118 +instructor 1 108 2.197225 2.197225 107 +well 1 109 2.197225 2.197225 121 +send 1 114 2.197225 2.197225 109 +peopl 2 96 2.302585 4.605170 132 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +stuff 4 87 2.484907 9.939628 171 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +want 3 79 2.564949 7.694847 199 +orient 2 80 2.564949 5.129898 205 +good 1 77 2.564949 2.564949 200 +name 1 72 2.639057 2.639057 220 +free 1 73 2.639057 2.639057 224 +differ 1 66 2.708050 2.708050 253 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +guid 1 63 2.772589 2.772589 267 +import 1 65 2.772589 2.772589 282 +colleg 2 61 2.833213 5.666426 300 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +think 3 57 2.890372 8.671116 314 +summer 1 56 2.890372 2.890372 311 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +maintain 1 51 2.995732 2.995732 342 +basic 2 50 3.044522 6.089044 360 +right 2 48 3.044522 6.089044 363 +cool 1 49 3.044522 3.044522 374 +without 1 50 3.044522 3.044522 370 +still 1 50 3.044522 3.044522 362 +could 1 46 3.091042 3.091042 383 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +futur 1 41 3.218876 3.218876 427 +might 1 41 3.218876 3.218876 426 +realli 2 40 3.258097 6.516194 444 +littl 2 39 3.258097 6.516194 454 +probabl 1 40 3.258097 3.258097 455 +mean 1 37 3.332205 3.332205 477 +expect 1 37 3.332205 3.332205 484 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +within 1 33 3.433987 3.433987 525 +kind 1 32 3.465736 3.465736 541 +actual 1 28 3.610918 3.610918 604 +load 1 28 3.610918 3.610918 601 +utc 1 27 3.637586 3.637586 629 +great 1 27 3.637586 3.637586 626 +linux 1 27 3.637586 3.637586 631 +spent 2 25 3.737670 7.475340 676 +miscellan 1 23 3.806662 3.806662 731 +try 2 22 3.850148 7.700296 764 +finish 1 22 3.850148 3.850148 748 +dai 1 22 3.850148 3.850148 753 +longer 1 20 3.951244 3.951244 816 +sure 1 20 3.951244 3.951244 813 +minut 1 20 3.951244 3.951244 810 +spend 1 19 4.007333 4.007333 850 +item 1 19 4.007333 4.007333 856 +five 1 19 4.007333 4.007333 841 +scott 1 18 4.060443 4.060443 884 +hobbi 1 16 4.174387 4.174387 1009 +doesn 2 15 4.248495 8.496990 1055 +wait 1 13 4.382027 4.382027 1168 +neat 2 12 4.465908 8.931816 1263 +appl 1 11 4.553877 4.553877 1303 +noth 1 11 4.553877 4.553877 1328 +santa 1 10 4.653960 4.653960 1441 +didn 1 9 4.753590 4.753590 1563 +oop 1 8 4.875197 4.875197 1778 +forget 1 8 4.875197 4.875197 1712 +perfect 1 7 5.010635 5.010635 1921 +pageth 1 7 5.010635 5.010635 1939 +encrypt 1 7 5.010635 5.010635 1835 +squash 1 6 5.164786 5.164786 2223 +beer 1 6 5.164786 5.164786 2216 +sharp 1 6 5.164786 5.164786 2100 +amherst 3 5 5.347108 16.041324 2484 +humor 1 5 5.347108 5.347108 2533 +amaz 2 4 5.568345 11.136690 2600 +pagescott 1 4 5.568345 5.568345 2978 +slight 1 3 5.857933 5.857933 3894 +glenn 1 3 5.857933 5.857933 3869 +down 1 3 5.857933 5.857933 3870 +wine 1 3 5.857933 5.857933 3895 +maker 1 3 5.857933 5.857933 3164 +dine 1 3 5.857933 5.857933 3472 +citizen 1 3 5.857933 5.857933 3238 +iici 1 3 5.857933 5.857933 3436 +bright 1 3 5.857933 5.857933 3596 +fanci 1 2 6.263398 6.263398 4992 +unpredict 1 2 6.263398 6.263398 5722 +stuffit 1 2 6.263398 6.263398 4127 +invalu 1 2 6.263398 6.263398 4680 +forev 1 2 6.263398 6.263398 5636 +grab 1 2 6.263398 6.263398 5723 +pageokai 1 1 6.957497 6.957497 14141 +overdu 1 1 6.957497 6.957497 14142 +mead 1 1 6.957497 6.957497 14143 +psion 1 1 6.957497 6.957497 14144 +palmtop 1 1 6.957497 6.957497 14145 +anastasi 1 1 6.957497 6.957497 14146 +poke 1 1 6.957497 6.957497 14147 +ala 1 1 6.957497 6.957497 14148 +bebox 1 1 6.957497 6.957497 14149 +sfkaplan 1 1 6.957497 6.957497 14150 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ new file mode 100644 index 00000000..2b5b50d2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shenoy^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +utexa 3 189 1.609438 4.828314 44 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +build 1 85 2.484907 2.484907 184 +onlin 1 75 2.639057 2.639057 223 +main 2 67 2.708050 5.416100 256 +multimedia 1 68 2.708050 2.708050 258 +finger 1 52 2.995732 2.995732 354 +get 1 46 3.091042 3.091042 380 +log 1 19 4.007333 4.007333 857 +floor 1 14 4.317488 4.317488 1070 +touch 1 12 4.465908 4.465908 1288 +river 1 6 5.164786 5.164786 2220 +shenoi 5 3 5.857933 29.289665 3269 +tower 1 3 5.857933 5.857933 3818 +prashant 2 2 6.263398 12.526796 4331 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ new file mode 100644 index 00000000..7a673659 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^shma^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +austin 3 168 1.791759 5.375277 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +final 1 116 2.197225 2.197225 108 +make 1 111 2.197225 2.197225 120 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +tabl 1 51 2.995732 2.995732 346 +finger 1 52 2.995732 2.995732 354 +campu 1 27 3.637586 3.637586 623 +vlsi 1 21 3.912023 3.912023 795 +citi 1 19 4.007333 4.007333 874 +vallei 1 7 5.010635 5.010635 1959 +ongo 1 6 5.164786 5.164786 2215 +coffe 1 5 5.347108 5.347108 2556 +pleasant 1 3 5.857933 5.857933 3825 +bookshelf 1 2 6.263398 6.263398 5724 +shaob 2 1 6.957497 13.914994 14151 +cyberhom 2 1 6.957497 13.914994 14152 +hardvar 1 1 6.957497 6.957497 14153 +verifc 1 1 6.957497 6.957497 14154 +shma 1 1 6.957497 6.957497 14155 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ new file mode 100644 index 00000000..f6877304 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^skumar^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +austin 4 168 1.791759 7.167036 63 +texa 3 160 1.791759 5.375277 64 +network 2 168 1.791759 3.583518 61 +algorithm 2 162 1.791759 3.583518 57 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +object 1 138 1.945910 1.945910 79 +machin 2 129 2.079442 4.158884 95 +confer 1 126 2.079442 2.079442 100 +tool 1 117 2.079442 2.079442 93 +mathemat 1 108 2.197225 2.197225 123 +proceed 1 93 2.397895 2.397895 152 +learn 3 86 2.484907 7.454721 170 +internet 2 83 2.484907 4.969814 186 +dynam 1 76 2.564949 2.564949 194 +intellig 1 72 2.639057 2.639057 225 +logic 1 71 2.639057 2.639057 230 +line 1 75 2.639057 2.639057 231 +appli 1 71 2.639057 2.639057 226 +symposium 1 72 2.639057 2.639057 238 +artifici 2 63 2.772589 5.545178 280 +processor 1 54 2.944439 2.944439 335 +life 1 50 3.044522 3.044522 375 +adapt 1 46 3.091042 3.091042 387 +music 1 42 3.218876 3.218876 436 +annual 1 40 3.258097 3.258097 458 +india 1 32 3.465736 3.465736 550 +neural 2 30 3.555348 7.110696 578 +qualiti 1 20 3.951244 3.951244 832 +massiv 1 15 4.248495 4.248495 1026 +nonlinear 1 14 4.317488 4.317488 1107 +affili 1 13 4.382027 4.382027 1194 +automata 1 13 4.382027 4.382027 1135 +avenu 1 12 4.465908 4.465908 1277 +itali 1 11 4.553877 4.553877 1378 +evolut 1 11 4.553877 4.553877 1314 +genet 2 10 4.653960 9.307920 1409 +kumar 2 9 4.753590 9.507180 1506 +risto 1 9 4.753590 4.753590 1523 +chao 1 8 4.875197 4.875197 1753 +miikkulainen 1 8 4.875197 4.875197 1667 +signal 1 7 5.010635 5.010635 1910 +edumi 1 6 5.164786 5.164786 2132 +cellular 1 5 5.347108 5.347108 2433 +dual 1 5 5.347108 5.347108 2522 +austindepart 1 4 5.568345 5.568345 3008 +reinforc 1 4 5.568345 5.568345 2674 +snail 1 4 5.568345 5.568345 2916 +sciencestaylor 1 3 5.857933 5.857933 3814 +patrick 1 3 5.857933 5.857933 3334 +shailesh 3 2 6.263398 18.790194 5578 +fuzzi 1 2 6.263398 6.263398 5423 +publicationson 1 2 6.263398 6.263398 4899 +singh 1 2 6.263398 6.263398 5675 +kumarshailesh 1 1 6.957497 6.957497 14156 +kumarth 1 1 6.957497 6.957497 14157 +skumar 1 1 6.957497 6.957497 14158 +resumeresearch 1 1 6.957497 6.957497 14159 +publicationscontact 1 1 6.957497 6.957497 14160 +mesrcm 1 1 6.957497 6.957497 14161 +spiritu 1 1 6.957497 6.957497 14162 +offersom 1 1 6.957497 6.957497 14163 +linkscognit 1 1 6.957497 6.957497 14164 +scienceutc 1 1 6.957497 6.957497 14165 +researchutc 1 1 6.957497 6.957497 14166 +groupresearch 1 1 6.957497 6.957497 14167 +neuroevolut 1 1 6.957497 6.957497 14168 +predistort 1 1 6.957497 6.957497 14169 +goetz 1 1 6.957497 6.957497 14170 +bari 1 1 6.957497 6.957497 14171 +bord 1 1 6.957497 6.957497 14172 +aprl 1 1 6.957497 6.957497 14173 +whiti 1 1 6.957497 6.957497 14174 +offernet 1 1 6.957497 6.957497 14175 +assistancesearch 1 1 6.957497 6.957497 14176 +institutewww 1 1 6.957497 6.957497 14177 +infoindia 1 1 6.957497 6.957497 14178 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ new file mode 100644 index 00000000..4c0eda5f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^smaragd^ @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +program 2 374 0.693147 1.386294 7 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 2 343 1.098612 2.197224 19 +project 2 340 1.098612 2.197224 18 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +applic 1 170 1.791759 1.791759 56 +texa 1 160 1.791759 1.791759 64 +phone 1 175 1.791759 1.791759 45 +area 1 144 1.945910 1.945910 80 +make 1 111 2.197225 2.197225 120 +world 1 115 2.197225 2.197225 126 +main 1 67 2.708050 2.708050 256 +plan 1 65 2.772589 2.772589 272 +favorit 1 44 3.135494 3.135494 410 +posit 1 31 3.496508 3.496508 552 +someth 1 31 3.496508 3.496508 554 +photo 1 31 3.496508 3.496508 561 +turn 1 29 3.583519 3.583519 586 +utc 2 27 3.637586 7.275172 629 +lead 1 23 3.806662 3.806662 718 +scheme 1 20 3.951244 3.951244 818 +particularli 1 19 4.007333 4.007333 867 +success 1 10 4.653960 4.653960 1390 +meta 1 9 4.753590 4.753590 1505 +yanni 2 8 4.875197 9.750394 1713 +gold 1 8 4.875197 4.875197 1745 +dictionari 1 8 4.875197 4.875197 1642 +moder 1 6 5.164786 5.164786 2112 +arrang 1 6 5.164786 5.164786 2023 +webster 1 5 5.347108 5.347108 2468 +album 1 4 5.568345 5.568345 2888 +smaragdaki 2 3 5.857933 11.715866 3851 +serious 1 3 5.857933 5.857933 3663 +alchemi 1 1 6.957497 6.957497 14179 +sitessmaragd 1 1 6.957497 6.957497 14180 +eduyanni 1 1 6.957497 6.957497 14181 +smaragdakisunivers 1 1 6.957497 6.957497 14182 +departmenttai 1 1 6.957497 6.957497 14183 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^son^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^son^ new file mode 100644 index 00000000..ba46fb6a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^son^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 1 384 0.693147 0.693147 11 +link 3 247 1.386294 4.158882 24 +email 2 220 1.386294 2.772588 29 +fall 1 181 1.609438 1.609438 40 +utexa 1 189 1.609438 1.609438 44 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +like 2 132 1.945910 3.891820 81 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +note 1 142 1.945910 1.945910 67 +pleas 2 113 2.197225 4.394450 114 +site 1 106 2.197225 2.197225 119 +person 1 111 2.197225 2.197225 117 +send 1 114 2.197225 2.197225 109 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +center 1 88 2.397895 2.397895 158 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +addit 1 74 2.639057 2.639057 228 +experi 1 64 2.772589 2.772589 283 +previou 1 62 2.772589 2.772589 290 +life 1 50 3.044522 3.044522 375 +visitor 1 49 3.044522 3.044522 371 +mean 2 37 3.332205 6.664410 477 +field 1 37 3.332205 3.332205 482 +effort 1 26 3.688879 3.688879 652 +tell 1 21 3.912023 3.912023 777 +basi 1 20 3.951244 3.951244 828 +ever 1 19 4.007333 4.007333 872 +wind 1 18 4.060443 4.060443 908 +stream 1 15 4.248495 4.248495 1015 +trip 1 14 4.317488 4.317488 1113 +bodi 1 13 4.382027 4.382027 1178 +danc 1 12 4.465908 4.465908 1278 +duli 1 12 4.465908 4.465908 1248 +absolut 1 8 4.875197 4.875197 1646 +wouldn 1 7 5.010635 5.010635 1970 +edward 1 6 5.164786 5.164786 2050 +greatest 1 6 5.164786 5.164786 2073 +ignor 1 5 5.347108 5.347108 2288 +shadow 1 3 5.857933 5.857933 3519 +haiku 1 3 5.857933 5.857933 3811 +eddi 1 3 5.857933 5.857933 3896 +danger 1 2 6.263398 6.263398 5725 +strictli 1 2 6.263398 6.263398 5726 +stimul 1 2 6.263398 6.263398 5712 +minion 3 1 6.957497 20.872491 14184 +asphalt 1 1 6.957497 6.957497 14185 +moonlight 1 1 6.957497 6.957497 14186 +nerv 1 1 6.957497 6.957497 14187 +dy 1 1 6.957497 6.957497 14188 +pania 1 1 6.957497 6.957497 14189 +leaf 1 1 6.957497 6.957497 14190 +afloat 1 1 6.957497 6.957497 14191 +waterfal 1 1 6.957497 6.957497 14192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ new file mode 100644 index 00000000..b1aac978 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^souther^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +email 2 220 1.386294 2.772588 29 +mail 1 238 1.386294 1.386294 22 +utexa 2 189 1.609438 3.218876 44 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +site 1 106 2.197225 2.197225 119 +search 5 95 2.397895 11.989475 155 +member 1 84 2.484907 2.484907 165 +knowledg 1 67 2.708050 2.708050 243 +tech 1 35 3.401197 3.401197 515 +hotlist 1 13 4.382027 4.382027 1199 +souther 1 3 5.857933 5.857933 3795 +southerart 1 1 6.957497 6.957497 14193 +southerresearchbuild 1 1 6.957497 6.957497 14194 +reportsouth 1 1 6.957497 6.957497 14195 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ new file mode 100644 index 00000000..b935275f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sowmya^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +address 3 170 1.791759 5.375277 62 +texa 2 160 1.791759 3.583518 64 +network 2 168 1.791759 3.583518 61 +austin 2 168 1.791759 3.583518 63 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +area 1 144 1.945910 1.945910 80 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +theori 1 111 2.197225 2.197225 127 +techniqu 1 99 2.302585 2.302585 138 +learn 3 86 2.484907 7.454721 170 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +exampl 1 77 2.564949 2.564949 195 +resum 1 79 2.564949 2.564949 217 +intellig 1 72 2.639057 2.639057 225 +appli 1 71 2.639057 2.639057 226 +creat 1 63 2.772589 2.772589 277 +taylor 1 63 2.772589 2.772589 287 +approach 1 48 3.044522 3.044522 366 +field 1 37 3.332205 3.332205 482 +tech 1 35 3.401197 3.401197 515 +india 1 32 3.465736 3.465736 550 +postal 1 30 3.555348 3.555348 580 +symbol 1 27 3.637586 3.637586 620 +challeng 1 26 3.688879 3.688879 653 +revis 1 26 3.688879 3.688879 640 +variabl 1 23 3.806662 3.806662 715 +indian 1 22 3.850148 3.850148 769 +madra 1 8 4.875197 4.875197 1770 +connectionist 1 5 5.347108 5.347108 2430 +sowmya 2 4 5.568345 11.136690 2670 +bayesian 1 4 5.568345 5.568345 2671 +groupunivers 1 3 5.857933 5.857933 3831 +multimediaappl 1 3 5.857933 5.857933 3274 +rutger 1 3 5.857933 5.857933 3566 +austinresearchmi 1 2 6.263398 6.263398 5644 +ramachandransowmya 1 1 6.957497 6.957497 14196 +ramachandranmachin 1 1 6.957497 6.957497 14197 +ofartif 1 1 6.957497 6.957497 14198 +learningbayesian 1 1 6.957497 6.957497 14199 +withhidden 1 1 6.957497 6.957497 14200 +thisproblem 1 1 6.957497 6.957497 14201 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ new file mode 100644 index 00000000..661a5f07 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sriram^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +design 1 213 1.386294 1.386294 25 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +utexa 1 189 1.609438 1.609438 44 +texa 4 160 1.791759 7.167036 64 +austin 3 168 1.791759 5.375277 63 +implement 1 152 1.791759 1.791759 52 +click 2 142 1.945910 3.891820 78 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +comment 1 93 2.397895 2.397895 146 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +multimedia 1 68 2.708050 2.708050 258 +prof 1 64 2.772589 2.772589 273 +locat 1 59 2.833213 2.833213 303 +advisor 1 51 2.995732 2.995732 355 +hill 1 25 3.737670 3.737670 670 +countri 1 15 4.248495 4.248495 1059 +central 1 13 4.382027 4.382027 1160 +herefor 1 9 4.753590 4.753590 1483 +informationabout 1 9 4.753590 4.753590 1515 +austinaustin 1 7 5.010635 5.010635 1966 +capit 1 7 5.010635 5.010635 1957 +sciencesdepart 1 6 5.164786 5.164786 2020 +isth 1 5 5.347108 5.347108 2532 +edudepart 1 3 5.857933 5.857933 3302 +sriram 3 2 6.263398 18.790194 4550 +multimediai 1 2 6.263398 6.263398 4337 +raocurr 1 1 6.957497 6.957497 14202 +systemoper 1 1 6.957497 6.957497 14203 +multimediagroup 1 1 6.957497 6.957497 14204 +harrickvinpublicationsminegroupcontact 1 1 6.957497 6.957497 14205 +informationofficetai 1 1 6.957497 6.957497 14206 +miscellaneousotherinterest 1 1 6.957497 6.957497 14207 +pagespicturesof 1 1 6.957497 6.957497 14208 +toweraustin 1 1 6.957497 6.957497 14209 +kannada 1 1 6.957497 6.957497 14210 +koota 1 1 6.957497 6.957497 14211 +tamil 1 1 6.957497 6.957497 14212 +sangam 1 1 6.957497 6.957497 14213 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ new file mode 100644 index 00000000..85418976 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ssinha^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 3 24 3.761200 11.283600 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +tiger 1 3 5.857933 5.857933 3897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ new file mode 100644 index 00000000..b697b778 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^sunghee^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +click 2 142 1.945910 3.891820 78 +construct 1 139 1.945910 1.945910 82 +machin 1 129 2.079442 2.079442 95 +assist 1 112 2.197225 2.197225 113 +present 1 91 2.397895 2.397895 145 +real 1 93 2.397895 2.397895 144 +educ 1 86 2.484907 2.484907 191 +master 1 76 2.564949 2.564949 216 +nation 1 74 2.639057 2.639057 240 +august 1 66 2.708050 2.708050 257 +experi 1 64 2.772589 2.772589 283 +prof 1 64 2.772589 2.772589 273 +septemb 1 65 2.772589 2.772589 274 +finger 1 52 2.995732 2.995732 354 +author 1 39 3.258097 3.258097 450 +utc 2 27 3.637586 7.275172 629 +administr 1 27 3.637586 3.637586 628 +log 1 19 4.007333 4.007333 857 +edulast 1 17 4.110874 4.110874 927 +chemic 1 5 5.347108 5.347108 2552 +korea 1 4 5.568345 5.568345 2971 +seoul 2 3 5.857933 11.715866 3783 +aloysiu 1 3 5.857933 5.857933 3829 +choiwelcom 1 2 6.263398 6.263398 5727 +sunghe 3 1 6.957497 20.872491 14214 +choisunghe 1 1 6.957497 6.957497 14215 +nuec 1 1 6.957497 6.957497 14216 +choiemail 1 1 6.957497 6.957497 14217 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ new file mode 100644 index 00000000..52d0d622 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^svkakkad^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +depart 3 457 0.693147 2.079441 12 +research 3 431 0.693147 2.079441 10 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +utexa 1 189 1.609438 1.609438 44 +texa 4 160 1.791759 7.167036 64 +austin 4 168 1.791759 7.167036 63 +address 2 170 1.791759 3.583518 62 +avail 2 169 1.791759 3.583518 48 +implement 1 152 1.791759 1.791759 52 +hall 2 146 1.945910 3.891820 65 +support 1 132 1.945910 1.945910 83 +provid 1 121 2.079442 2.079442 94 +postscript 1 131 2.079442 2.079442 90 +pleas 1 113 2.197225 2.197225 114 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +call 2 91 2.397895 4.795790 153 +center 1 88 2.397895 2.397895 158 +member 1 84 2.484907 2.484907 165 +larg 1 82 2.484907 2.484907 168 +effici 1 73 2.639057 2.639057 233 +taylor 2 63 2.772589 5.545178 287 +descript 1 64 2.772589 2.772589 271 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +best 1 59 2.833213 2.833213 299 +finger 1 52 2.995732 2.995732 354 +hardwar 1 51 2.995732 2.995732 350 +pointer 1 48 3.044522 3.044522 368 +standard 1 48 3.044522 3.044522 365 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +full 1 28 3.610918 3.610918 615 +reach 1 24 3.761200 3.761200 688 +inth 1 22 3.850148 3.850148 741 +along 1 18 4.060443 4.060443 878 +easi 1 16 4.174387 4.174387 969 +brief 1 16 4.174387 4.174387 1001 +novel 1 15 4.248495 4.248495 1039 +persist 2 11 4.553877 9.107754 1367 +motorola 1 9 4.753590 4.753590 1546 +oop 1 8 4.875197 4.875197 1778 +myresum 1 6 5.164786 5.164786 2199 +informationi 1 3 5.857933 5.857933 3871 +swizzl 1 3 5.857933 5.857933 3883 +sheetal 2 2 6.263398 12.526796 5684 +isvia 1 2 6.263398 6.263398 5637 +mypubl 1 2 6.263398 6.263398 5707 +somerset 1 2 6.263398 6.263398 5639 +kakkad 1 2 6.263398 6.263398 5685 +kakkadsheet 1 1 6.957497 6.957497 14218 +kakkadcontact 1 1 6.957497 6.957497 14219 +storagesystem 1 1 6.957497 6.957497 14220 +faulttim 1 1 6.957497 6.957497 14221 +whilefinish 1 1 6.957497 6.957497 14222 +svkakkad 1 1 6.957497 6.957497 14223 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ new file mode 100644 index 00000000..8c57619d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^syu^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +check 1 115 2.197225 2.197225 118 +comment 1 93 2.397895 2.397895 146 +octob 1 89 2.397895 2.397895 156 +school 1 84 2.484907 2.484907 188 +want 1 79 2.564949 2.564949 199 +know 1 80 2.564949 2.564949 198 +suggest 1 53 2.944439 2.944439 331 +finger 1 52 2.995732 2.995732 354 +china 1 37 3.332205 3.332205 487 +chines 2 29 3.583519 7.167038 595 +art 1 29 3.583519 3.583519 593 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +todai 1 25 3.737670 3.737670 672 +highli 1 23 3.806662 3.806662 725 +voic 1 21 3.912023 3.912023 806 +item 1 19 4.007333 4.007333 856 +medic 1 17 4.110874 4.110874 958 +academi 1 8 4.875197 4.875197 1735 +scholar 1 6 5.164786 5.164786 2180 +appreci 1 5 5.347108 5.347108 2374 +sale 1 3 5.857933 5.857933 3688 +meyour 1 3 5.857933 5.857933 3858 +registrar 1 2 6.263398 6.263398 5611 +gradaut 1 2 6.263398 6.263398 5612 +studiesut 1 2 6.263398 6.263398 5613 +novelschines 1 2 6.263398 6.263398 5610 +visitorsinc 1 2 6.263398 6.263398 5616 +shengm 2 1 6.957497 13.914994 14224 +homepageabout 1 1 6.957497 6.957497 14225 +classmatesclass 1 1 6.957497 6.957497 14226 +ustc 1 1 6.957497 6.957497 14227 +sciencesus 1 1 6.957497 6.957497 14228 +linksut 1 1 6.957497 6.957497 14229 +libraryutaccesschines 1 1 6.957497 6.957497 14230 +associationchina 1 1 6.957497 6.957497 14231 +chinesechinainternet 1 1 6.957497 6.957497 14232 +magazinestsinghua 1 1 6.957497 6.957497 14233 +bbsncic 1 1 6.957497 6.957497 14234 +bbschines 1 1 6.957497 6.957497 14235 +classicsabout 1 1 6.957497 6.957497 14236 +austinwhat 1 1 6.957497 6.957497 14237 +citylimitsclassifi 1 1 6.957497 6.957497 14238 +austinto 1 1 6.957497 6.957497 14239 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ new file mode 100644 index 00000000..ba06471c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^taowang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +frame 3 24 3.761200 11.283600 684 +wang 1 21 3.912023 3.912023 790 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ new file mode 100644 index 00000000..ac0419d1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tarun^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ new file mode 100644 index 00000000..857b08d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tewari^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +austin 3 168 1.791759 5.375277 63 +texa 1 160 1.791759 1.791759 64 +site 1 106 2.197225 2.197225 119 +send 1 114 2.197225 2.197225 109 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +name 1 72 2.639057 2.639057 220 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +plai 1 60 2.833213 2.833213 307 +done 1 47 3.091042 3.091042 381 +option 1 30 3.555348 3.555348 575 +watson 1 8 4.875197 4.875197 1691 +bore 1 7 5.010635 5.010635 1948 +internship 1 3 5.857933 5.857933 3764 +renu 1 1 6.957497 6.957497 14240 +tewarirenu 1 1 6.957497 6.957497 14241 +tewariwhat 1 1 6.957497 6.957497 14242 +addresshom 1 1 6.957497 6.957497 14243 +tewari 1 1 6.957497 6.957497 14244 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ new file mode 100644 index 00000000..b5211b20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^tumlin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +email 1 220 1.386294 1.386294 29 +paper 2 205 1.609438 3.218876 38 +utexa 2 189 1.609438 3.218876 44 +updat 1 191 1.609438 1.609438 41 +distribut 3 162 1.791759 5.375277 51 +develop 2 174 1.791759 3.583518 53 +address 2 170 1.791759 3.583518 62 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +austin 1 168 1.791759 1.791759 63 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +studi 1 120 2.079442 2.079442 91 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +specif 1 106 2.197225 2.197225 106 +assist 1 112 2.197225 2.197225 113 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +control 2 82 2.484907 4.969814 164 +novemb 2 81 2.484907 4.969814 179 +stuff 1 87 2.484907 2.484907 171 +resourc 1 81 2.484907 2.484907 172 +come 1 78 2.564949 2.564949 202 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +messag 1 76 2.564949 2.564949 212 +state 1 76 2.564949 2.564949 207 +resum 1 79 2.564949 2.564949 217 +logic 1 71 2.639057 2.639057 230 +addit 1 74 2.639057 2.639057 228 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +laboratori 1 63 2.772589 2.772589 292 +taylor 1 63 2.772589 2.772589 287 +investig 1 51 2.995732 2.995732 353 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +formal 2 37 3.332205 6.664410 478 +mean 1 37 3.332205 3.332205 477 +soon 1 36 3.367296 3.367296 494 +random 1 34 3.401197 3.401197 511 +photo 1 31 3.496508 3.496508 561 +secur 1 30 3.555348 3.555348 577 +postal 1 30 3.555348 3.555348 580 +client 1 25 3.737670 3.737670 679 +synthesi 3 20 3.951244 11.853732 834 +verif 1 20 3.951244 3.951244 826 +analyz 1 17 4.110874 4.110874 925 +draft 1 14 4.317488 4.317488 1085 +finit 1 14 4.317488 4.317488 1106 +stai 1 12 4.465908 4.465908 1215 +tune 1 12 4.465908 4.465908 1227 +genet 1 10 4.653960 4.653960 1409 +metric 1 7 5.010635 5.010635 1831 +photographi 1 6 5.164786 5.164786 2146 +pierc 2 4 5.568345 11.136690 2623 +queu 1 4 5.568345 5.568345 2648 +blvd 1 4 5.568345 5.568345 3007 +evolutionari 1 3 5.857933 5.857933 3898 +amwork 1 2 6.263398 6.263398 4850 +communicatewith 1 2 6.263398 6.263398 5062 +tumlin 4 1 6.957497 27.829988 14245 +brenda 1 1 6.957497 6.957497 14246 +ladd 1 1 6.957497 6.957497 14247 +authenticationprotocol 1 1 6.957497 6.957497 14248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ new file mode 100644 index 00000000..9ebeef2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^twang^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +page 8 705 0.000000 0.000000 3 +comput 5 775 0.000000 0.000000 2 +univers 5 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 6 443 0.693147 4.158882 6 +work 3 380 0.693147 2.079441 9 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +cours 4 273 1.098612 4.394448 15 +project 4 340 1.098612 4.394448 18 +time 3 293 1.098612 3.295836 17 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +design 1 213 1.386294 1.386294 25 +fall 4 181 1.609438 6.437752 40 +oper 1 180 1.609438 1.609438 34 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 6 168 1.791759 10.750554 63 +distribut 3 162 1.791759 5.375277 51 +network 3 168 1.791759 5.375277 61 +implement 2 152 1.791759 3.583518 52 +texa 1 160 1.791759 1.791759 64 +algorithm 1 162 1.791759 1.791759 57 +perform 2 143 1.945910 3.891820 74 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +technolog 3 131 2.079442 6.238326 102 +databas 3 122 2.079442 6.238326 86 +spring 2 131 2.079442 4.158884 88 +postscript 1 131 2.079442 2.079442 90 +introduct 1 126 2.079442 2.079442 87 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +theori 2 111 2.197225 4.394450 127 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +associ 2 93 2.397895 4.795790 151 +homepag 1 93 2.397895 2.397895 148 +commun 1 95 2.397895 2.397895 157 +graphic 1 90 2.397895 2.397895 147 +present 1 91 2.397895 2.397895 145 +real 1 93 2.397895 2.397895 144 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +school 1 84 2.484907 2.484907 188 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +resum 1 79 2.564949 2.564949 217 +april 1 77 2.564949 2.564949 196 +know 1 80 2.564949 2.564949 198 +html 1 75 2.639057 2.639057 235 +java 5 70 2.708050 13.540250 248 +view 1 70 2.708050 2.708050 254 +multimedia 1 68 2.708050 2.708050 258 +differ 1 66 2.708050 2.708050 253 +prof 9 64 2.772589 24.953301 273 +visit 2 63 2.772589 5.545178 288 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +plan 1 65 2.772589 2.772589 272 +laboratori 1 63 2.772589 2.772589 292 +creat 1 63 2.772589 2.772589 277 +plai 1 60 2.833213 2.833213 307 +unix 4 58 2.890372 11.561488 308 +semest 1 58 2.890372 2.890372 312 +sampl 1 53 2.944439 2.944439 339 +maintain 1 51 2.995732 2.995732 342 +friend 4 48 3.044522 12.178088 376 +format 2 48 3.044522 6.089044 356 +life 1 50 3.044522 3.044522 375 +standard 1 48 3.044522 3.044522 365 +still 1 50 3.044522 3.044522 362 +visitor 1 49 3.044522 3.044522 371 +netscap 2 44 3.135494 6.270988 395 +anoth 1 45 3.135494 3.135494 408 +protocol 1 45 3.135494 3.135494 407 +made 1 44 3.135494 3.135494 398 +favorit 1 44 3.135494 3.135494 410 +compani 2 41 3.218876 6.437752 423 +music 1 42 3.218876 3.218876 436 +form 1 39 3.258097 3.258097 443 +movi 1 40 3.258097 3.258097 459 +tutori 1 39 3.258097 3.258097 437 +china 4 37 3.332205 13.328820 487 +robot 1 36 3.367296 3.367296 497 +copyright 1 36 3.367296 3.367296 495 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +kind 1 32 3.465736 3.465736 541 +anim 2 31 3.496508 6.993016 557 +compon 1 30 3.555348 3.555348 570 +chines 4 29 3.583519 14.334076 595 +synchron 1 29 3.583519 3.583519 588 +full 1 28 3.610918 3.610918 615 +quit 1 27 3.637586 3.637586 633 +never 1 25 3.737670 3.737670 671 +background 1 25 3.737670 3.737670 664 +reach 1 24 3.761200 3.761200 688 +mobil 1 23 3.806662 3.806662 730 +thank 1 23 3.806662 3.806662 721 +recommend 2 22 3.850148 7.700296 737 +wang 3 21 3.912023 11.736069 790 +viewer 2 21 3.912023 7.824046 787 +chen 1 21 3.912023 3.912023 791 +leav 1 21 3.912023 3.912023 772 +mpeg 4 20 3.951244 15.804976 831 +applet 1 20 3.951244 3.951244 827 +beij 1 19 4.007333 4.007333 876 +demo 2 18 4.060443 8.120886 888 +listen 1 18 4.060443 4.060443 907 +seek 1 17 4.110874 4.110874 954 +normal 1 16 4.174387 4.174387 995 +tsinghua 2 13 4.382027 8.764054 1195 +misc 1 13 4.382027 4.382027 1124 +clock 1 11 4.553877 4.553877 1320 +host 1 11 4.553877 4.553877 1306 +player 1 11 4.553877 4.553877 1371 +perl 1 11 4.553877 4.553877 1332 +lake 1 11 4.553877 4.553877 1373 +song 1 11 4.553877 4.553877 1380 +jersei 1 9 4.753590 4.753590 1587 +sound 1 9 4.753590 4.753590 1605 +trust 1 9 4.753590 4.753590 1583 +pure 1 8 4.875197 4.875197 1776 +univeristi 1 8 4.875197 4.875197 1754 +counter 1 8 4.875197 4.875197 1765 +misra 2 7 5.010635 10.021270 1856 +clip 1 7 5.010635 5.010635 1868 +attach 1 7 5.010635 5.010635 1785 +accord 1 7 5.010635 5.010635 1826 +bell 1 6 5.164786 5.164786 2224 +troubl 1 6 5.164786 5.164786 2002 +fussel 1 5 5.347108 5.347108 2300 +opengl 1 5 5.347108 5.347108 2299 +lang 1 5 5.347108 5.347108 2294 +shanghai 3 4 5.568345 16.705035 2925 +republ 1 4 5.568345 5.568345 3032 +restructur 1 4 5.568345 5.568345 2775 +gouda 1 4 5.568345 5.568345 3021 +batori 1 4 5.568345 5.568345 2690 +blvd 1 4 5.568345 5.568345 3007 +tong 5 3 5.857933 29.289665 3258 +zuckerman 1 3 5.857933 5.857933 3205 +underconstruct 1 3 5.857933 5.857933 3889 +nanj 2 2 6.263398 12.526796 5728 +mini 1 2 6.263398 6.263398 5548 +decod 1 2 6.263398 6.263398 4936 +zodiac 1 2 6.263398 6.263398 5729 +twang 1 2 6.263398 6.263398 5730 +eagl 1 2 6.263398 6.263398 5731 +jiao 2 1 6.957497 13.914994 14249 +lucent 2 1 6.957497 13.914994 14250 +nank 1 1 6.957497 6.957497 14251 +summerluc 1 1 6.957497 6.957497 14252 +thissumm 1 1 6.957497 6.957497 14253 +plexton 1 1 6.957497 6.957497 14254 +libari 1 1 6.957497 6.957497 14255 +glut 1 1 6.957497 6.957497 14256 +mariah 1 1 6.957497 6.957497 14257 +boyz 1 1 6.957497 6.957497 14258 +babyfac 1 1 6.957497 6.957497 14259 +haiq 1 1 6.957497 6.957497 14260 +shenfeng 1 1 6.957497 6.957497 14261 +deskmat 1 1 6.957497 6.957497 14262 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ new file mode 100644 index 00000000..6e3a1dd5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ulf^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 3 227 1.386294 4.158882 26 +graduat 1 215 1.386294 1.386294 31 +group 2 183 1.609438 3.218876 36 +updat 1 191 1.609438 1.609438 41 +texa 1 160 1.791759 1.791759 64 +base 1 165 1.791759 1.791759 50 +austin 1 168 1.791759 1.791759 63 +address 1 170 1.791759 1.791759 62 +machin 2 129 2.079442 4.158884 95 +welcom 1 122 2.079442 2.079442 99 +make 1 111 2.197225 2.197225 120 +place 1 106 2.197225 2.197225 124 +well 1 109 2.197225 2.197225 121 +associ 2 93 2.397895 4.795790 151 +search 1 95 2.397895 2.397895 155 +learn 2 86 2.484907 4.969814 170 +activ 1 84 2.484907 2.484907 182 +thing 1 84 2.484907 2.484907 189 +resourc 1 81 2.484907 2.484907 172 +orient 1 80 2.564949 2.564949 205 +prof 1 64 2.772589 2.772589 273 +new 1 64 2.772589 2.772589 262 +januari 1 62 2.772589 2.772589 264 +special 1 56 2.890372 2.890372 320 +archiv 1 49 3.044522 3.044522 364 +natur 2 44 3.135494 6.270988 406 +york 1 41 3.218876 3.218876 435 +print 1 34 3.401197 3.401197 503 +dissert 1 32 3.465736 3.465736 549 +focu 1 30 3.555348 3.555348 571 +consid 1 29 3.583519 3.583519 590 +weather 1 28 3.610918 3.610918 618 +utc 2 27 3.637586 7.275172 629 +yahoo 1 24 3.761200 3.761200 707 +decis 1 23 3.806662 3.806662 728 +voic 1 21 3.912023 3.912023 806 +supervis 1 20 3.951244 3.951244 840 +lyco 1 19 4.007333 4.007333 871 +context 1 13 4.382027 4.382027 1153 +perman 1 11 4.553877 4.553877 1372 +acquisit 1 10 4.653960 4.653960 1465 +moonei 1 9 4.753590 4.753590 1520 +linguist 1 9 4.753590 4.753590 1593 +european 1 8 4.875197 4.875197 1763 +altavista 1 6 5.164786 5.164786 2222 +infoseek 1 6 5.164786 5.164786 2188 +pars 1 5 5.347108 5.347108 2321 +raymond 1 5 5.347108 5.347108 2313 +hermjakob 1 3 5.857933 5.857933 3876 +groupand 1 3 5.857933 5.857933 3873 +signll 1 3 5.857933 5.857933 3877 +galaxi 1 3 5.857933 5.857933 3603 +deutsch 1 3 5.857933 5.857933 3802 +pageulf 1 1 6.957497 6.957497 14263 +hermjakobhello 1 1 6.957497 6.957497 14264 +thedept 1 1 6.957497 6.957497 14265 +austinand 1 1 6.957497 6.957497 14266 +aboutexampl 1 1 6.957497 6.957497 14267 +translationund 1 1 6.957497 6.957497 14268 +einet 1 1 6.957497 6.957497 14269 +dernir 1 1 6.957497 6.957497 14270 +nouvel 1 1 6.957497 6.957497 14271 +alsac 1 1 6.957497 6.957497 14272 +spiegel 1 1 6.957497 6.957497 14273 +svenska 1 1 6.957497 6.957497 14274 +dagbladet 1 1 6.957497 6.957497 14275 +tagesspiegel 1 1 6.957497 6.957497 14276 +vanguardia 1 1 6.957497 6.957497 14277 +welt 1 1 6.957497 6.957497 14278 +zeitplusacm 1 1 6.957497 6.957497 14279 +moltkestr 1 1 6.957497 6.957497 14280 +bnde 1 1 6.957497 6.957497 14281 +germanyphon 1 1 6.957497 6.957497 14282 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ new file mode 100644 index 00000000..e69c6db3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^unicron^ @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +softwar 2 220 1.386294 2.772588 30 +gener 1 220 1.386294 1.386294 27 +group 1 183 1.609438 1.609438 36 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +schedul 2 119 2.079442 4.158884 85 +peopl 1 96 2.302585 2.302585 132 +refer 1 78 2.564949 2.564949 203 +new 1 64 2.772589 2.772589 262 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +street 1 63 2.772589 2.772589 293 +directori 1 45 3.135494 3.135494 396 +sport 3 25 3.737670 11.213010 683 +entertain 1 12 4.465908 4.465908 1286 +magic 1 11 4.553877 4.553877 1358 +perman 1 11 4.553877 4.553877 1372 +gather 1 8 4.875197 4.875197 1719 +lanc 2 4 5.568345 11.136690 3022 +champion 1 4 5.568345 5.568345 2982 +tokuda 2 3 5.857933 11.715866 3266 +twelv 1 3 5.857933 5.857933 3899 +hawaii 1 3 5.857933 5.857933 3888 +intramur 2 2 6.263398 12.526796 5590 +unicron 1 1 6.957497 6.957497 14283 +financ 1 1 6.957497 6.957497 14284 +heeia 1 1 6.957497 6.957497 14285 +kaneoh 1 1 6.957497 6.957497 14286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ new file mode 100644 index 00000000..0b319b28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vbb^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +email 1 220 1.386294 1.386294 29 +utexa 2 189 1.609438 3.218876 44 +austin 2 168 1.791759 3.583518 63 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +hall 2 146 1.945910 3.891820 65 +click 1 142 1.945910 1.945910 78 +send 1 114 2.197225 2.197225 109 +find 1 111 2.197225 2.197225 111 +taylor 1 63 2.772589 2.772589 287 +finger 1 52 2.995732 2.995732 354 +telephon 1 50 3.044522 3.044522 373 +eduoffic 1 33 3.433987 3.433987 531 +postal 1 30 3.555348 3.555348 580 +log 1 19 4.007333 4.007333 857 +account 1 18 4.060443 4.060443 882 +whether 1 17 4.110874 4.110874 918 +informationemail 1 9 4.753590 4.753590 1564 +painter 1 2 6.263398 6.263398 4187 +balayoghanv 1 1 6.957497 6.957497 14432 +balayoghancontact 1 1 6.957497 6.957497 14433 +ineosdi 1 1 6.957497 6.957497 14434 +bookmarksvbb 1 1 6.957497 6.957497 14435 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ new file mode 100644 index 00000000..d54dd30d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vin^ @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 13 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +univers 5 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +system 12 443 0.693147 8.317764 6 +research 4 431 0.693147 2.772588 10 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +oper 2 180 1.609438 3.218876 34 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +utexa 1 189 1.609438 1.609438 44 +distribut 5 162 1.791759 8.958795 51 +network 5 168 1.791759 8.958795 61 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +develop 2 174 1.791759 3.583518 53 +applic 2 170 1.791759 3.583518 56 +algorithm 2 162 1.791759 3.583518 57 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +area 2 144 1.945910 3.891820 80 +object 2 138 1.945910 3.891820 79 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +confer 3 126 2.079442 6.238326 100 +databas 3 122 2.079442 6.238326 86 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +intern 3 108 2.197225 6.591675 128 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +advanc 1 99 2.302585 2.302585 130 +proceed 3 93 2.397895 7.193685 152 +commun 2 95 2.397895 4.795790 157 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +ieee 2 86 2.484907 4.969814 190 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +novemb 1 81 2.484907 2.484907 179 +second 1 81 2.484907 2.484907 166 +wide 1 84 2.484907 2.484907 185 +larg 1 82 2.484907 2.484907 168 +server 3 76 2.564949 7.694847 204 +state 1 76 2.564949 2.564949 207 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +optim 1 79 2.564949 2.564949 197 +april 1 77 2.564949 2.564949 196 +workshop 2 71 2.639057 5.278114 239 +symposium 2 72 2.639057 5.278114 238 +effici 2 73 2.639057 5.278114 233 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +multimedia 18 68 2.708050 48.744900 258 +main 1 67 2.708050 2.708050 256 +integr 1 67 2.708050 2.708050 245 +laboratori 2 63 2.772589 5.545178 292 +taylor 1 63 2.772589 2.772589 287 +march 1 61 2.833213 2.833213 295 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +profession 1 51 2.995732 2.995732 345 +digit 1 52 2.995732 2.995732 348 +california 2 46 3.091042 6.182084 388 +video 2 44 3.135494 6.270988 405 +third 1 43 3.178054 3.178054 412 +tutori 1 39 3.258097 3.258097 437 +industri 1 38 3.295837 3.295837 464 +china 1 37 3.332205 3.332205 487 +award 8 34 3.401197 27.209576 523 +committe 3 34 3.401197 10.203591 522 +tech 1 35 3.401197 3.401197 515 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +storag 1 31 3.496508 3.496508 553 +rang 1 30 3.555348 3.555348 565 +chair 3 29 3.583519 10.750557 596 +scale 1 28 3.610918 3.610918 613 +arrai 1 27 3.637586 3.637586 627 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +initi 2 23 3.806662 7.613324 717 +honor 1 23 3.806662 3.806662 729 +disk 2 22 3.850148 7.700296 747 +director 1 22 3.850148 3.850148 767 +indian 1 22 3.850148 3.850148 769 +beij 1 19 4.007333 4.007333 876 +speed 1 18 4.060443 4.060443 911 +failur 1 18 4.060443 4.060443 898 +germani 1 17 4.110874 4.110874 946 +diego 2 16 4.174387 8.348774 992 +taiwan 1 16 4.174387 4.174387 1006 +intel 1 16 4.174387 4.174387 1000 +atth 1 15 4.248495 4.248495 1019 +audio 2 14 4.317488 8.634976 1094 +heterogen 1 14 4.317488 4.317488 1090 +nasa 1 13 4.382027 4.382027 1188 +career 2 12 4.465908 8.931816 1287 +placement 1 10 4.653960 4.653960 1420 +ataustin 1 9 4.753590 4.753590 1610 +editori 1 9 4.753590 4.753590 1611 +vice 1 9 4.753590 4.753590 1604 +transmiss 1 9 4.753590 4.753590 1588 +recoveri 1 9 4.753590 4.753590 1474 +creativ 1 8 4.875197 4.875197 1777 +harrick 1 7 5.010635 5.010635 1849 +bombai 1 7 5.010635 5.010635 1972 +sponsor 2 6 5.164786 10.329572 2133 +internationalconfer 1 6 5.164786 5.164786 2051 +microsystem 1 6 5.164786 5.164786 2160 +ofdistribut 1 5 5.347108 5.347108 2316 +row 1 5 5.347108 5.347108 2330 +colorado 1 4 5.568345 5.568345 2938 +innov 1 4 5.568345 5.568345 2933 +multimediasystem 1 4 5.568345 5.568345 2701 +venkat 1 4 5.568345 5.568345 2702 +goyal 4 3 5.857933 23.431732 3268 +multimediacomput 1 3 5.857933 5.857933 3841 +mobilecomput 1 3 5.857933 5.857933 3629 +shenoi 1 3 5.857933 5.857933 3269 +ftc 1 3 5.857933 5.857933 3275 +rangan 1 3 5.857933 5.857933 3270 +durham 1 3 5.857933 5.857933 3279 +hampshir 1 3 5.857933 5.857933 3280 +mitsubishi 1 3 5.857933 5.857933 3842 +merl 1 3 5.857933 5.857933 3843 +andnetwork 2 2 6.263398 12.526796 5751 +icdc 1 2 6.263398 6.263398 5191 +protocolsfor 1 2 6.263398 6.263398 5204 +inmulti 1 2 6.263398 6.263398 4334 +annualintern 1 2 6.263398 6.263398 4335 +pasadena 1 2 6.263398 6.263398 4336 +gemmel 1 2 6.263398 6.263398 4332 +kandlur 1 2 6.263398 6.263398 4321 +ofmultimedia 1 2 6.263398 6.263398 4322 +ieeeintern 1 2 6.263398 6.263398 4333 +icmc 1 2 6.263398 6.263398 4323 +delaybound 1 2 6.263398 6.263398 4342 +fordigit 1 2 6.263398 6.263398 5752 +nossdav 1 2 6.263398 6.263398 4344 +federalinstitut 1 2 6.263398 6.263398 5539 +eurograph 2 1 6.957497 13.914994 14436 +vinharrick 1 1 6.957497 6.957497 14437 +electronicimag 1 1 6.957497 6.957497 14438 +kaohsiung 1 1 6.957497 6.957497 14439 +rostock 1 1 6.957497 6.957497 14440 +interestmultimedia 1 1 6.957497 6.957497 14441 +anend 1 1 6.957497 6.957497 14442 +thintern 1 1 6.957497 6.957497 14443 +designingmultimedia 1 1 6.957497 6.957497 14444 +foundationresearch 1 1 6.957497 6.957497 14445 +electricresearch 1 1 6.957497 6.957497 14446 +electrospacesystem 1 1 6.957497 6.957497 14447 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ new file mode 100644 index 00000000..97a38662 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vipin^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +number 2 130 2.079442 4.158884 97 +report 1 131 2.079442 2.079442 92 +pleas 2 113 2.197225 4.394450 114 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +thing 1 84 2.484907 2.484907 189 +activ 1 84 2.484907 2.484907 182 +academ 1 82 2.484907 2.484907 178 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +street 1 63 2.772589 2.772589 293 +semest 1 58 2.890372 2.890372 312 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +give 1 50 3.044522 3.044522 359 +visitor 1 49 3.044522 3.044522 371 +soon 1 36 3.367296 3.367296 494 +go 1 33 3.433987 3.433987 529 +india 1 32 3.465736 3.465736 550 +art 1 29 3.583519 3.583519 593 +pass 1 28 3.610918 3.610918 611 +though 1 27 3.637586 3.637586 622 +never 1 25 3.737670 3.737670 671 +color 1 22 3.850148 3.850148 762 +increas 1 20 3.951244 3.951244 829 +sure 1 20 3.951244 3.951244 813 +log 1 19 4.007333 4.007333 857 +less 1 18 4.060443 4.060443 892 +medic 1 17 4.110874 4.110874 958 +match 1 16 4.174387 4.174387 965 +sign 1 16 4.174387 4.174387 970 +guest 1 12 4.465908 4.465908 1220 +incomplet 1 9 4.753590 4.753590 1575 +risk 1 8 4.875197 4.875197 1689 +yeah 1 6 5.164786 5.164786 2195 +put 1 6 5.164786 5.164786 2017 +guestbook 2 5 5.347108 10.694216 2475 +delhi 1 5 5.347108 5.347108 2530 +haven 2 4 5.568345 11.136690 3037 +shall 1 3 5.857933 5.857933 3891 +vipin 2 2 6.263398 12.526796 5579 +interestscours 1 2 6.263398 6.263398 5026 +reset 1 2 6.263398 6.263398 5236 +decreas 1 2 6.263398 6.263398 4877 +undergraduatefrom 1 1 6.957497 6.957497 14448 +interestsreportsy 1 1 6.957497 6.957497 14449 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ new file mode 100644 index 00000000..fe9791fb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vl^ @@ -0,0 +1,156 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +paper 3 205 1.609438 4.828314 38 +utexa 1 189 1.609438 1.609438 44 +austin 1 168 1.791759 1.791759 63 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +texa 1 160 1.791759 1.791759 64 +note 2 142 1.945910 3.891820 67 +object 1 138 1.945910 1.945910 79 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +lectur 1 135 1.945910 1.945910 73 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +mathemat 4 108 2.197225 8.788900 123 +place 1 106 2.197225 2.197225 124 +world 1 115 2.197225 2.197225 126 +intern 1 108 2.197225 2.197225 128 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +school 1 84 2.484907 2.484907 188 +state 1 76 2.564949 2.564949 207 +line 1 75 2.639057 2.639057 231 +new 2 64 2.772589 5.545178 262 +import 1 65 2.772589 2.772589 282 +taylor 1 63 2.772589 2.772589 287 +reason 1 57 2.890372 2.890372 318 +three 1 54 2.944439 2.944439 330 +profession 1 51 2.995732 2.995732 345 +right 1 48 3.044522 3.044522 363 +california 1 46 3.091042 3.091042 388 +better 1 45 3.135494 3.135494 401 +favorit 1 44 3.135494 3.135494 410 +live 1 40 3.258097 3.258097 451 +paul 1 38 3.295837 3.295837 471 +close 1 38 3.295837 3.295837 465 +feel 2 37 3.332205 6.664410 483 +sciencesunivers 1 37 3.332205 3.332205 486 +survei 1 35 3.401197 3.401197 513 +human 2 32 3.465736 6.931472 546 +dissert 1 32 3.465736 3.465736 549 +taken 1 31 3.496508 3.496508 555 +scientist 1 31 3.496508 3.496508 560 +postal 1 30 3.555348 3.555348 580 +turn 1 29 3.583519 3.583519 586 +quot 1 29 3.583519 3.583519 582 +mind 1 27 3.637586 3.637586 632 +fellow 1 24 3.761200 3.761200 701 +famili 1 23 3.806662 3.806662 735 +initi 1 23 3.806662 3.806662 717 +finish 1 22 3.850148 3.850148 748 +wang 1 21 3.912023 3.912023 790 +appropri 1 18 4.060443 4.060443 883 +germani 2 17 4.110874 8.221748 946 +white 1 17 4.110874 4.110874 951 +precis 1 15 4.248495 4.248495 1023 +countri 1 15 4.248495 4.248495 1059 +incomput 1 14 4.317488 4.317488 1096 +becam 1 14 4.317488 4.317488 1117 +stori 1 14 4.317488 4.317488 1087 +daniel 1 12 4.465908 4.465908 1233 +vladimir 2 11 4.553877 9.107754 1324 +america 1 11 4.553877 4.553877 1370 +black 2 10 4.653960 9.307920 1418 +sentenc 1 10 4.653960 4.653960 1413 +gain 1 8 4.875197 4.875197 1730 +secretari 1 8 4.875197 4.875197 1775 +elect 1 8 4.875197 4.875197 1771 +joke 1 8 4.875197 4.875197 1620 +centenni 1 7 5.010635 5.010635 1967 +sciencesat 1 7 5.010635 5.010635 1968 +austinaustin 1 7 5.010635 5.010635 1966 +race 2 5 5.347108 10.694216 2417 +lifschitz 1 5 5.347108 5.347108 2542 +ortega 1 5 5.347108 5.347108 2559 +lost 1 5 5.347108 5.347108 2358 +petersburg 2 4 5.568345 11.136690 2989 +insight 1 4 5.568345 5.568345 3024 +nonmonoton 1 4 5.568345 5.568345 3023 +evid 1 4 5.568345 5.568345 2768 +ratio 1 4 5.568345 5.568345 2942 +silli 1 4 5.568345 5.568345 3038 +dijkstra 1 3 5.857933 5.857933 3173 +armi 1 3 5.857933 5.857933 3562 +prison 1 3 5.857933 5.857933 3907 +tortur 1 3 5.857933 5.857933 3634 +district 1 3 5.857933 5.857933 3756 +civil 1 3 5.857933 5.857933 3908 +russia 2 2 6.263398 12.526796 5756 +spirit 1 2 6.263398 6.263398 5234 +theamerican 1 2 6.263398 6.263398 5120 +programmingand 1 2 6.263398 6.263398 4940 +edsger 1 2 6.263398 6.263398 5740 +convoc 1 2 6.263398 6.263398 5757 +nomin 1 2 6.263398 6.263398 5758 +helm 1 2 6.263398 6.263398 4217 +burton 1 2 6.263398 6.263398 5759 +polic 1 2 6.263398 6.263398 5560 +democrat 1 2 6.263398 6.263398 5567 +admit 1 2 6.263398 6.263398 5429 +neutral 1 2 6.263398 6.263398 5760 +lifschitzwhen 1 1 6.957497 6.957497 14488 +burden 1 1 6.957497 6.957497 14489 +downcast 1 1 6.957497 6.957497 14490 +gladli 1 1 6.957497 6.957497 14491 +therealm 1 1 6.957497 6.957497 14492 +lucid 1 1 6.957497 6.957497 14493 +grasp 1 1 6.957497 6.957497 14494 +isobtain 1 1 6.957497 6.957497 14495 +pleasantli 1 1 6.957497 6.957497 14496 +conceptform 1 1 6.957497 6.957497 14497 +bernai 1 1 6.957497 6.957497 14498 +lifschitzgottesman 1 1 6.957497 6.957497 14499 +texasat 1 1 6.957497 6.957497 14500 +forartifici 1 1 6.957497 6.957497 14501 +intelligenceb 1 1 6.957497 6.957497 14502 +branchof 1 1 6.957497 6.957497 14503 +steklov 1 1 6.957497 6.957497 14504 +interesttempor 1 1 6.957497 6.957497 14505 +reasoningand 1 1 6.957497 6.957497 14506 +aboutactionslog 1 1 6.957497 6.957497 14507 +reasoningteachingoth 1 1 6.957497 6.957497 14508 +activitiespap 1 1 6.957497 6.957497 14509 +bylifschitz 1 1 6.957497 6.957497 14510 +studentsrecommend 1 1 6.957497 6.957497 14511 +speechgood 1 1 6.957497 6.957497 14512 +madelein 1 1 6.957497 6.957497 14513 +albright 1 1 6.957497 6.957497 14514 +regain 1 1 6.957497 6.957497 14515 +soviet 1 1 6.957497 6.957497 14516 +recycl 1 1 6.957497 6.957497 14517 +actbad 1 1 6.957497 6.957497 14518 +sequest 1 1 6.957497 6.957497 14519 +archeolog 1 1 6.957497 6.957497 14520 +societynot 1 1 6.957497 6.957497 14521 +redrawn 1 1 6.957497 6.957497 14522 +basisoth 1 1 6.957497 6.957497 14523 +amnesti 1 1 6.957497 6.957497 14524 +monthcontact 1 1 6.957497 6.957497 14525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ new file mode 100644 index 00000000..98fb5d8e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +design 3 213 1.386294 4.158882 25 +email 1 220 1.386294 1.386294 29 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +utexa 1 189 1.609438 1.609438 44 +algorithm 4 162 1.791759 7.167036 57 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +parallel 3 169 1.791759 5.375277 60 +address 2 170 1.791759 3.583518 62 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +hall 1 146 1.945910 1.945910 65 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +theori 1 111 2.197225 2.197225 127 +access 1 102 2.302585 2.302585 136 +sinc 1 90 2.397895 2.397895 159 +complet 1 77 2.564949 2.564949 208 +april 1 77 2.564949 2.564949 196 +effici 1 73 2.639057 2.639057 233 +evalu 2 64 2.772589 5.545178 266 +copi 1 63 2.772589 2.772589 284 +taylor 1 63 2.772589 2.772589 287 +visit 1 63 2.772589 2.772589 288 +faculti 1 56 2.890372 2.890372 325 +vita 1 38 3.295837 3.295837 473 +profil 1 30 3.555348 3.555348 581 +postal 1 30 3.555348 3.555348 580 +experiment 1 26 3.688879 3.688879 645 +mine 1 26 3.688879 3.688879 654 +sequenti 1 22 3.850148 3.850148 745 +offici 1 18 4.060443 4.060443 894 +princeton 1 15 4.248495 4.248495 1042 +interestsmi 1 10 4.653960 4.653960 1462 +regent 1 5 5.347108 5.347108 2551 +vijaya 1 4 5.568345 5.568345 2677 +primarilyin 1 3 5.857933 5.857933 3832 +ramachandranvijaya 1 1 6.957497 6.957497 14450 +ramachandranblakemor 1 1 6.957497 6.957497 14451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html new file mode 100644 index 00000000..b1b96240 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vlr^sac.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 19 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +research 5 431 0.693147 3.465735 10 +interest 3 384 0.693147 2.079441 11 +program 1 374 0.693147 0.693147 7 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +mail 9 238 1.386294 12.476646 22 +design 4 213 1.386294 5.545176 25 +also 2 259 1.386294 2.772588 28 +utexa 11 189 1.609438 17.703818 44 +list 9 201 1.609438 14.484942 39 +group 5 183 1.609438 8.047190 36 +includ 2 208 1.609438 3.218876 42 +fall 1 181 1.609438 1.609438 40 +algorithm 15 162 1.791759 26.876385 57 +parallel 4 169 1.791759 7.167036 60 +texa 3 160 1.791759 5.375277 64 +austin 2 168 1.791759 3.583518 63 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +analysi 3 124 2.079442 6.238326 98 +number 2 130 2.079442 4.158884 97 +machin 1 129 2.079442 2.079442 95 +spring 1 131 2.079442 2.079442 88 +schedul 1 119 2.079442 2.079442 85 +confer 1 126 2.079442 2.079442 100 +theori 12 111 2.197225 26.366700 127 +send 2 114 2.197225 4.394450 109 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +mani 1 92 2.397895 2.397895 150 +activ 2 84 2.484907 4.969814 182 +ieee 2 86 2.484907 4.969814 190 +resourc 1 81 2.484907 2.484907 172 +novemb 1 81 2.484907 2.484907 179 +member 1 84 2.484907 2.484907 165 +messag 3 76 2.564949 7.694847 212 +state 2 76 2.564949 5.129898 207 +method 1 80 2.564949 2.564949 213 +symposium 5 72 2.639057 13.195285 238 +effici 2 73 2.639057 5.278114 233 +meet 2 72 2.639057 5.278114 229 +solv 1 73 2.639057 2.639057 234 +david 1 71 2.639057 2.639057 232 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +differ 2 66 2.708050 5.416100 253 +complex 4 64 2.772589 11.090356 269 +foundat 3 62 2.772589 8.317767 286 +result 1 65 2.772589 2.772589 281 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +virtual 1 62 2.772589 2.772589 285 +dept 1 64 2.772589 2.772589 291 +locat 2 59 2.833213 5.666426 303 +faculti 2 56 2.890372 5.780744 325 +sever 2 56 2.890372 5.780744 322 +major 1 56 2.890372 2.890372 315 +space 1 57 2.890372 2.890372 310 +semest 1 58 2.890372 2.890372 312 +special 1 56 2.890372 2.890372 320 +processor 1 54 2.944439 2.944439 335 +talk 1 53 2.944439 2.944439 336 +pointer 1 48 3.044522 3.044522 368 +electron 2 47 3.091042 6.182084 379 +execut 1 45 3.135494 3.135494 404 +term 1 43 3.178054 3.178054 411 +theoret 3 39 3.258097 9.774291 446 +announc 3 40 3.258097 9.774291 441 +seminar 1 38 3.295837 3.295837 470 +random 4 34 3.401197 13.604788 511 +bibliographi 2 34 3.401197 6.802394 518 +post 1 35 3.401197 3.401197 505 +next 1 34 3.401197 3.401197 517 +committe 1 34 3.401197 3.401197 522 +express 1 32 3.465736 3.465736 540 +ad 1 32 3.465736 3.465736 544 +often 1 31 3.496508 3.496508 551 +scientist 1 31 3.496508 3.496508 560 +graph 4 30 3.555348 14.221392 576 +focu 1 30 3.555348 3.555348 571 +focus 1 29 3.583519 3.583519 584 +measur 1 28 3.610918 3.610918 609 +held 1 28 3.610918 3.610918 600 +becom 1 28 3.610918 3.610918 603 +bound 1 26 3.688879 3.688879 659 +berkelei 1 26 3.688879 3.688879 657 +request 1 26 3.688879 3.688879 635 +consist 1 26 3.688879 3.688879 651 +fundament 1 25 3.737670 3.737670 661 +greg 2 24 3.761200 7.522400 695 +sent 2 22 3.850148 7.700296 763 +serv 1 22 3.850148 3.850148 758 +alumni 1 21 3.912023 3.912023 807 +siam 1 21 3.912023 3.912023 800 +region 1 19 4.007333 4.007333 875 +north 1 19 4.007333 4.007333 873 +hypertext 1 19 4.007333 4.007333 865 +stand 2 18 4.060443 8.120886 891 +lower 1 18 4.060443 4.060443 886 +attend 1 18 4.060443 4.060443 893 +affili 1 13 4.382027 4.382027 1194 +discret 1 13 4.382027 4.382027 1165 +remov 2 12 4.465908 8.931816 1225 +walk 1 12 4.465908 4.465908 1281 +outsid 1 12 4.465908 4.465908 1219 +distinguish 2 11 4.553877 9.107754 1357 +probabilist 1 11 4.553877 4.553877 1343 +typic 1 11 4.553877 4.553877 1360 +regard 1 11 4.553877 4.553877 1309 +cryptographi 1 9 4.753590 4.753590 1512 +folk 1 9 4.753590 4.753590 1597 +postdoc 1 8 4.875197 4.875197 1724 +elect 1 8 4.875197 4.875197 1771 +calendar 1 8 4.875197 4.875197 1649 +colloquium 1 8 4.875197 4.875197 1734 +bit 1 7 5.010635 5.010635 1833 +foc 1 7 5.010635 5.010635 1880 +sigact 4 6 5.164786 20.659144 2212 +zhou 1 6 5.164786 5.164786 2092 +forum 1 6 5.164786 5.164786 2027 +southern 1 6 5.164786 5.164786 2191 +pool 1 6 5.164786 5.164786 2225 +arrang 1 6 5.164786 5.164786 2023 +sponsor 1 6 5.164786 5.164786 2133 +soda 1 6 5.164786 5.164786 2189 +groupth 1 5 5.347108 5.347108 2549 +provabl 1 5 5.347108 5.347108 2558 +phil 1 5 5.347108 5.347108 2419 +speaker 1 5 5.347108 5.347108 2370 +stoc 1 5 5.347108 5.347108 2491 +vijaya 3 4 5.568345 16.705035 2677 +dalla 2 4 5.568345 11.136690 2930 +combinator 1 4 5.568345 5.568345 2915 +twice 1 4 5.568345 5.568345 2614 +algorithmsand 1 4 5.568345 5.568345 2680 +warm 8 3 5.857933 46.863464 3904 +plaxton 3 3 5.857933 17.573799 3886 +ramachandran 3 3 5.857933 17.573799 3742 +louisiana 2 3 5.857933 11.715866 3902 +zuckerman 1 3 5.857933 5.857933 3205 +sinica 1 3 5.857933 5.857933 3819 +poon 1 3 5.857933 5.857933 3820 +dozen 1 3 5.857933 5.857933 3905 +gripe 1 3 5.857933 5.857933 3257 +surround 1 3 5.857933 5.857933 3492 +spaa 1 3 5.857933 5.857933 3906 +madhukar 2 2 6.263398 12.526796 5633 +baruah 1 2 6.263398 6.263398 5753 +sheng 1 2 6.263398 6.263398 5153 +ckpoon 1 2 6.263398 6.263398 5510 +rajmohan 1 2 6.263398 6.263398 5706 +rajaraman 1 2 6.263398 6.263398 5704 +rraj 1 2 6.263398 6.263398 5705 +sinha 1 2 6.263398 6.263398 5754 +southwestern 1 2 6.263398 6.263398 5744 +andarchitectur 1 2 6.263398 6.263398 5755 +sanjoi 2 1 6.957497 13.914994 14452 +kelsen 2 1 6.957497 13.914994 14453 +ramgop 2 1 6.957497 13.914994 14454 +suel 2 1 6.957497 13.914994 14455 +yuke 2 1 6.957497 13.914994 14456 +grouput 1 1 6.957497 6.957497 14457 +emba 1 1 6.957497 6.957497 14458 +tsan 1 1 6.957497 6.957497 14459 +tshsu 1 1 6.957497 6.957497 14460 +pierr 1 1 6.957497 6.957497 14461 +korupolu 1 1 6.957497 6.957497 14462 +mackenzi 1 1 6.957497 6.957497 14463 +philmac 1 1 6.957497 6.957497 14464 +idbsu 1 1 6.957497 6.957497 14465 +mettu 1 1 6.957497 6.957497 14466 +santanu 1 1 6.957497 6.957497 14467 +ssinha 1 1 6.957497 6.957497 14468 +torsten 1 1 6.957497 6.957497 14469 +lowvolum 1 1 6.957497 6.957497 14470 +themidsouth 1 1 6.957497 6.957497 14471 +midsouthwest 1 1 6.957497 6.957497 14472 +keynot 1 1 6.957497 6.957497 14473 +atut 1 1 6.957497 6.957497 14474 +organizedanoth 1 1 6.957497 6.957497 14475 +methodist 1 1 6.957497 6.957497 14476 +oklahoma 1 1 6.957497 6.957497 14477 +beheld 1 1 6.957497 6.957497 14478 +algorithmsmail 1 1 6.957497 6.957497 14479 +usuallytri 1 1 6.957497 6.957497 14480 +ofaustin 1 1 6.957497 6.957497 14481 +thatinclud 1 1 6.957497 6.957497 14482 +sponsorsth 1 1 6.957497 6.957497 14483 +interestar 1 1 6.957497 6.957497 14484 +thesigact 1 1 6.957497 6.957497 14485 +eccc 1 1 6.957497 6.957497 14486 +rolodex 1 1 6.957497 6.957497 14487 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ new file mode 100644 index 00000000..45a2a5b2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vsr^ @@ -0,0 +1,8 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +offic 2 299 1.098612 2.197224 13 +email 1 220 1.386294 1.386294 29 +utexa 1 189 1.609438 1.609438 44 +phone 2 175 1.791759 3.583518 45 +srinivasan 2 6 5.164786 10.329572 2175 +vaidyaraman 2 2 6.263398 12.526796 5658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ new file mode 100644 index 00000000..6fb85b95 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^vurgun^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 2 168 1.791759 3.583518 63 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +topic 1 114 2.197225 2.197225 110 +theori 1 111 2.197225 2.197225 127 +memori 1 101 2.302585 2.302585 139 +search 1 95 2.397895 2.397895 155 +learn 1 86 2.484907 2.484907 170 +intellig 1 72 2.639057 2.639057 225 +solv 1 73 2.639057 2.639057 234 +order 1 69 2.708050 2.708050 249 +knowledg 1 67 2.708050 2.708050 243 +artifici 1 63 2.772589 2.772589 280 +visual 1 48 3.044522 3.044522 372 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +term 1 43 3.178054 3.178054 411 +represent 1 35 3.401197 3.401197 512 +neural 1 30 3.555348 3.555348 578 +retriev 1 27 3.637586 3.637586 621 +background 1 25 3.737670 3.737670 664 +cognit 1 16 4.174387 4.174387 986 +usavoic 1 13 4.382027 4.382027 1198 +mepost 1 10 4.653960 4.653960 1472 +attent 1 8 4.875197 4.875197 1651 +interestsi 1 7 5.010635 5.010635 1969 +connectionist 1 5 5.347108 5.347108 2430 +howto 1 2 6.263398 6.263398 5761 +vurgun 2 1 6.957497 13.914994 14526 +sengul 1 1 6.957497 6.957497 14527 +sengulvurgun 1 1 6.957497 6.957497 14528 +ammainli 1 1 6.957497 6.957497 14529 +evolutionaryalgorithm 1 1 6.957497 6.957497 14530 +ofprefer 1 1 6.957497 6.957497 14531 +skillacquisit 1 1 6.957497 6.957497 14532 +mindto 1 1 6.957497 6.957497 14533 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ new file mode 100644 index 00000000..46db29b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walbourn^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +person 1 111 2.197225 2.197225 117 +server 1 76 2.564949 2.564949 204 +locat 1 59 2.833213 2.833213 303 +chuck 1 14 4.317488 4.317488 1108 +enterpris 1 2 6.263398 6.263398 4839 +walbourn 1 1 6.957497 6.957497 14534 +walbournmi 1 1 6.957497 6.957497 14535 +charybdi 1 1 6.957497 6.957497 14536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ new file mode 100644 index 00000000..60ebf021 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^walkerh^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +fall 1 181 1.609438 1.609438 40 +texa 2 160 1.791759 3.583518 64 +austin 2 168 1.791759 3.583518 63 +avail 1 169 1.791759 1.791759 48 +professor 2 137 1.945910 3.891820 76 +lectur 1 135 1.945910 1.945910 73 +year 1 148 1.945910 1.945910 84 +mathemat 2 108 2.197225 4.394450 123 +follow 1 92 2.397895 2.397895 143 +member 1 84 2.484907 2.484907 165 +activ 1 84 2.484907 2.484907 182 +academ 1 82 2.484907 2.484907 178 +complet 1 77 2.564949 2.564949 208 +visit 2 63 2.772589 5.545178 288 +septemb 2 65 2.772589 5.545178 274 +creat 1 63 2.772589 2.772589 277 +colleg 3 61 2.833213 8.499639 300 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +summer 1 56 2.890372 2.890372 311 +profession 1 51 2.995732 2.995732 345 +appoint 1 49 3.044522 3.044522 358 +math 2 44 3.135494 6.270988 402 +http 1 41 3.218876 3.218876 420 +formal 1 37 3.332205 3.332205 478 +revis 1 26 3.688879 3.688879 640 +period 1 22 3.850148 3.850148 743 +regular 1 17 4.110874 4.110874 929 +photograph 1 15 4.248495 4.248495 1056 +senior 1 14 4.317488 4.317488 1120 +henri 2 10 4.653960 9.307920 1417 +jack 1 8 4.875197 4.875197 1780 +walker 5 3 5.857933 29.289665 3161 +tenur 1 3 5.857933 5.857933 3801 +mackai 2 2 6.263398 12.526796 5762 +grinnel 2 2 6.263398 12.526796 5763 +edua 1 2 6.263398 6.263398 5764 +grin 2 1 6.957497 13.914994 14537 +professorwalk 1 1 6.957497 6.957497 14538 +teachand 1 1 6.957497 6.957497 14539 +atgrinnel 1 1 6.957497 6.957497 14540 +robertson 1 1 6.957497 6.957497 14541 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ new file mode 100644 index 00000000..dce2ec9b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^warshaw^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +graduat 2 215 1.386294 2.772588 31 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +utexa 1 189 1.609438 1.609438 44 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +base 2 165 1.791759 3.583518 50 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +studi 2 120 2.079442 4.158884 91 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +follow 1 92 2.397895 2.397895 143 +school 1 84 2.484907 2.484907 188 +activ 1 84 2.484907 2.484907 182 +appli 2 71 2.639057 5.278114 226 +knowledg 1 67 2.708050 2.708050 243 +laboratori 1 63 2.772589 2.772589 292 +case 2 51 2.995732 5.991464 351 +maintain 1 51 2.995732 2.995732 342 +cool 1 49 3.044522 3.044522 374 +anoth 1 45 3.135494 3.135494 408 +third 1 43 3.178054 3.178054 412 +posit 1 31 3.496508 3.496508 552 +rule 4 26 3.688879 14.755516 638 +mike 1 24 3.761200 3.761200 703 +basi 2 20 3.951244 7.902488 828 +item 1 19 4.007333 4.007333 856 +accept 1 18 4.060443 4.060443 879 +senior 1 14 4.317488 4.317488 1120 +usavoic 1 13 4.382027 4.382027 1198 +modul 2 10 4.653960 9.307920 1434 +mepost 1 10 4.653960 4.653960 1472 +declar 2 9 4.753590 9.507180 1526 +lane 1 8 4.875197 4.875197 1720 +unpublish 1 6 5.164786 5.164786 2226 +mirank 3 5 5.347108 16.041324 2543 +lanc 1 4 5.568345 5.568345 3022 +warshaw 4 2 6.263398 25.053592 5659 +venu 3 2 6.263398 18.790194 5655 +developedat 1 2 6.263398 6.263398 4078 +obermey 1 2 6.263398 6.263398 5657 +warshawlan 1 1 6.957497 6.957497 14542 +laboratoryinvolv 1 1 6.957497 6.957497 14543 +andat 1 1 6.957497 6.957497 14544 +arlut 1 1 6.957497 6.957497 14545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ new file mode 100644 index 00000000..f3df696d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wchen^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +utexa 3 189 1.609438 4.828314 44 +austin 5 168 1.791759 8.958795 63 +texa 2 160 1.791759 3.583518 64 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +analysi 1 124 2.079442 2.079442 98 +mathemat 2 108 2.197225 4.394450 123 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +book 1 99 2.302585 2.302585 131 +center 1 88 2.397895 2.397895 158 +comment 1 93 2.397895 2.397895 146 +master 1 76 2.564949 2.564949 216 +decemb 1 80 2.564949 2.564949 215 +refer 1 78 2.564949 2.564949 203 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +java 2 70 2.708050 5.416100 248 +august 1 66 2.708050 2.708050 257 +dept 1 64 2.772589 2.772589 291 +juli 1 60 2.833213 2.833213 305 +unix 1 58 2.890372 2.890372 308 +sampl 1 53 2.944439 2.944439 339 +numer 1 49 3.044522 3.044522 369 +math 1 44 3.135494 3.135494 402 +china 1 37 3.332205 3.332205 487 +expect 1 37 3.332205 3.332205 484 +manual 1 35 3.401197 3.401197 504 +common 1 30 3.555348 3.555348 574 +load 1 28 3.610918 3.610918 601 +chen 2 21 3.912023 7.824046 791 +demo 1 18 4.060443 4.060443 888 +perl 1 11 4.553877 4.553877 1332 +gatewai 1 7 5.010635 5.010635 1942 +fudan 1 3 5.857933 5.857933 3707 +rosett 2 2 6.263398 12.526796 5595 +wchen 2 1 6.957497 13.914994 14546 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ new file mode 100644 index 00000000..90a501e2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wilson^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 3 457 0.693147 2.079441 12 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +inform 1 412 0.693147 0.693147 8 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +languag 2 227 1.386294 2.772588 26 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +group 2 183 1.609438 3.218876 36 +utexa 1 189 1.609438 1.609438 44 +fall 1 181 1.609438 1.609438 40 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +implement 1 152 1.791759 1.791759 52 +hall 2 146 1.945910 3.891820 65 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +memori 1 101 2.302585 2.302585 139 +section 1 94 2.397895 2.397895 149 +orient 1 80 2.564949 2.564949 205 +taylor 2 63 2.772589 5.545178 287 +best 1 59 2.833213 2.833213 299 +paul 2 38 3.295837 6.591674 471 +postal 1 30 3.555348 3.555348 580 +usual 1 28 3.610918 3.610918 608 +reach 1 24 3.761200 3.761200 688 +lead 1 23 3.806662 3.806662 718 +thought 1 17 4.110874 4.110874 945 +wilson 2 9 4.753590 9.507180 1536 +oop 2 8 4.875197 9.750394 1778 +cross 1 8 4.875197 4.875197 1703 +informationi 1 3 5.857933 5.857933 3871 +novelti 1 2 6.263398 6.263398 5765 +ltwilson 1 1 6.957497 6.957497 14547 +headshot 1 1 6.957497 6.957497 14548 +workson 1 1 6.957497 6.957497 14549 +teachingin 1 1 6.957497 6.957497 14550 +sciencesnot 1 1 6.957497 6.957497 14551 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ new file mode 100644 index 00000000..40ce3629 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wkmak^ @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +also 4 259 1.386294 5.545176 28 +class 1 199 1.609438 1.609438 37 +utexa 1 189 1.609438 1.609438 44 +relat 1 139 1.945910 1.945910 68 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +need 1 98 2.302585 2.302585 135 +homepag 1 93 2.397895 2.397895 148 +school 1 84 2.484907 2.484907 188 +thing 1 84 2.484907 2.484907 189 +start 1 83 2.484907 2.484907 173 +good 2 77 2.564949 5.129898 200 +know 2 80 2.564949 5.129898 198 +come 1 78 2.564949 2.564949 202 +differ 1 66 2.708050 2.708050 253 +think 2 57 2.890372 5.780744 314 +major 1 56 2.890372 2.890372 315 +life 2 50 3.044522 6.089044 375 +friend 1 48 3.044522 3.044522 376 +without 1 50 3.044522 3.044522 370 +give 1 50 3.044522 3.044522 359 +made 1 44 3.135494 3.135494 398 +howev 2 41 3.218876 6.437752 422 +past 1 42 3.218876 3.218876 428 +live 1 40 3.258097 3.258097 451 +realli 1 40 3.258097 3.258097 444 +mean 1 37 3.332205 3.332205 477 +ofth 1 36 3.367296 3.367296 491 +john 1 33 3.433987 3.433987 532 +abl 2 30 3.555348 7.110696 566 +becom 4 28 3.610918 14.443672 603 +though 2 27 3.637586 7.275172 622 +quit 1 27 3.637586 3.637586 633 +decis 1 23 3.806662 3.806662 728 +lead 1 23 3.806662 3.806662 718 +dai 1 22 3.850148 3.850148 753 +born 1 21 3.912023 3.912023 798 +attend 1 18 4.060443 4.060443 893 +record 1 18 4.060443 4.060443 890 +listen 1 18 4.060443 4.060443 907 +accept 1 18 4.060443 4.060443 879 +thought 3 17 4.110874 12.332622 945 +seek 2 17 4.110874 8.221748 954 +whether 1 17 4.110874 4.110874 918 +whole 1 17 4.110874 4.110874 940 +earli 1 16 4.174387 4.174387 968 +month 1 15 4.248495 4.248495 1025 +enough 1 15 4.248495 4.248495 1040 +becam 2 14 4.317488 8.634976 1117 +hong 1 14 4.317488 4.317488 1105 +decid 1 14 4.317488 4.317488 1075 +believ 1 13 4.382027 4.382027 1187 +came 1 13 4.382027 4.382027 1197 +weak 1 13 4.382027 4.382027 1159 +opportun 1 13 4.382027 4.382027 1161 +count 1 12 4.465908 4.465908 1239 +true 2 10 4.653960 9.307920 1422 +reli 1 10 4.653960 4.653960 1411 +strength 2 9 4.753590 9.507180 1494 +kong 1 9 4.753590 4.753590 1602 +clear 1 9 4.753590 4.753590 1488 +trust 1 9 4.753590 4.753590 1583 +said 1 9 4.753590 4.753590 1571 +matter 1 8 4.875197 4.875197 1627 +realiz 1 8 4.875197 4.875197 1739 +christian 7 7 5.010635 35.074445 1949 +therefor 1 7 5.010635 5.010635 1822 +wrong 2 6 5.164786 10.329572 2025 +matthew 1 6 5.164786 5.164786 2193 +church 1 4 5.568345 5.568345 3011 +jesu 4 3 5.857933 23.431732 3624 +faith 2 3 5.857933 11.715866 3363 +bibl 1 3 5.857933 5.857933 3143 +credibl 1 3 5.857933 5.857933 3210 +shouldb 1 3 5.857933 5.857933 3673 +theywil 1 3 5.857933 5.857933 3102 +doubt 1 3 5.857933 5.857933 3119 +nota 1 3 5.857933 5.857933 3785 +holi 5 2 6.263398 31.316990 5711 +christ 3 2 6.263398 18.790194 5766 +arthur 1 2 6.263398 6.263398 5767 +religi 1 2 6.263398 6.263398 4816 +intent 1 2 6.263398 6.263398 5768 +stumbl 1 2 6.263398 6.263398 5349 +hei 1 2 6.263398 6.263398 5769 +forgiv 1 2 6.263398 6.263398 5770 +andto 1 2 6.263398 6.263398 5771 +differencebetween 1 2 6.263398 6.263398 5431 +deed 1 2 6.263398 6.263398 5077 +wedo 1 2 6.263398 6.263398 5772 +sick 1 2 6.263398 6.263398 5773 +ought 1 2 6.263398 6.263398 5365 +hesit 1 2 6.263398 6.263398 5774 +sin 4 1 6.957497 27.829988 14552 +cent 1 1 6.957497 6.957497 14553 +christiani 1 1 6.957497 6.957497 14554 +alittl 1 1 6.957497 6.957497 14555 +totallyunexpect 1 1 6.957497 6.957497 14556 +compulsori 1 1 6.957497 6.957497 14557 +thechristian 1 1 6.957497 6.957497 14558 +tobecom 1 1 6.957497 6.957497 14559 +slife 1 1 6.957497 6.957497 14560 +deepli 1 1 6.957497 6.957497 14561 +mylif 1 1 6.957497 6.957497 14562 +misconcept 1 1 6.957497 6.957497 14563 +christianwa 1 1 6.957497 6.957497 14564 +christianand 1 1 6.957497 6.957497 14565 +lovedeveri 1 1 6.957497 6.957497 14566 +achristian 1 1 6.957497 6.957497 14567 +virtuou 1 1 6.957497 6.957497 14568 +thefellowship 1 1 6.957497 6.957497 14569 +flesh 1 1 6.957497 6.957497 14570 +sinless 1 1 6.957497 6.957497 14571 +sympath 1 1 6.957497 6.957497 14572 +weconfess 1 1 6.957497 6.957497 14573 +cleans 1 1 6.957497 6.957497 14574 +unright 1 1 6.957497 6.957497 14575 +astheir 1 1 6.957497 6.957497 14576 +saviour 1 1 6.957497 6.957497 14577 +gratefulli 1 1 6.957497 6.957497 14578 +redempt 1 1 6.957497 6.957497 14579 +fortheir 1 1 6.957497 6.957497 14580 +justifi 1 1 6.957497 6.957497 14581 +roman 1 1 6.957497 6.957497 14582 +thecontrari 1 1 6.957497 6.957497 14583 +givesu 1 1 6.957497 6.957497 14584 +physician 1 1 6.957497 6.957497 14585 +onour 1 1 6.957497 6.957497 14586 +thetruth 1 1 6.957497 6.957497 14587 +thankgod 1 1 6.957497 6.957497 14588 +wkmak 1 1 6.957497 6.957497 14589 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ new file mode 100644 index 00000000..78825f0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^wylee^ @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +page 8 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 8 431 0.693147 5.545176 10 +interest 8 384 0.693147 5.545176 11 +work 6 380 0.693147 4.158882 9 +system 3 443 0.693147 2.079441 6 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +email 2 220 1.386294 2.772588 29 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +group 6 183 1.609438 9.656628 36 +utexa 4 189 1.609438 6.437752 44 +public 2 202 1.609438 3.218876 43 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +austin 5 168 1.791759 8.958795 63 +contact 2 153 1.791759 3.583518 59 +network 2 168 1.791759 3.583518 61 +phone 2 175 1.791759 3.583518 45 +texa 2 160 1.791759 3.583518 64 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +area 1 144 1.945910 1.945910 80 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +machin 2 129 2.079442 4.158884 95 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +topic 3 114 2.197225 6.591675 110 +send 2 114 2.197225 4.394450 109 +world 2 115 2.197225 4.394450 126 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +well 1 109 2.197225 2.197225 121 +user 1 104 2.302585 2.302585 137 +comment 3 93 2.397895 7.193685 146 +pictur 2 89 2.397895 4.795790 160 +graphic 1 90 2.397895 2.397895 147 +real 1 93 2.397895 2.397895 144 +question 1 91 2.397895 2.397895 141 +homepag 1 93 2.397895 2.397895 148 +associ 1 93 2.397895 2.397895 151 +learn 3 86 2.484907 7.454721 170 +internet 3 83 2.484907 7.454721 186 +resourc 3 81 2.484907 7.454721 172 +educ 2 86 2.484907 4.969814 191 +control 2 82 2.484907 4.969814 164 +member 2 84 2.484907 4.969814 165 +start 1 83 2.484907 2.484907 173 +wide 1 84 2.484907 2.484907 185 +interfac 2 79 2.564949 5.129898 209 +state 1 76 2.564949 2.564949 207 +intellig 3 72 2.639057 7.917171 225 +onlin 3 75 2.639057 7.917171 223 +multimedia 1 68 2.708050 2.708050 258 +artifici 1 63 2.772589 2.772589 280 +organ 1 65 2.772589 2.772589 265 +taylor 1 63 2.772589 2.772589 287 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +explor 3 58 2.890372 8.671116 324 +reason 3 57 2.890372 8.671116 318 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +approach 1 48 3.044522 3.044522 366 +frequent 1 49 3.044522 3.044522 367 +physic 1 47 3.091042 3.091042 377 +vision 1 41 3.218876 3.218876 430 +fast 1 42 3.218876 3.218876 429 +movi 1 40 3.258097 3.258097 459 +robot 21 36 3.367296 70.713216 497 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +rang 3 30 3.555348 10.666044 565 +neural 1 30 3.555348 3.555348 578 +common 1 30 3.555348 3.555348 574 +built 3 29 3.583519 10.750557 592 +semant 2 29 3.583519 7.167038 587 +art 2 29 3.583519 7.167038 593 +ask 1 28 3.610918 3.610918 597 +manipul 2 27 3.637586 7.275172 624 +doctor 1 24 3.761200 3.761200 709 +yahoo 1 24 3.761200 3.761200 707 +mobil 2 23 3.806662 7.613324 730 +miscellan 1 23 3.806662 3.806662 731 +hierarchi 2 22 3.850148 7.700296 744 +navig 2 21 3.912023 7.824046 796 +unit 1 21 3.912023 3.912023 779 +love 1 21 3.912023 3.912023 804 +offici 1 18 4.060443 4.060443 894 +spatial 2 16 4.174387 8.348774 988 +commerci 1 16 4.174387 4.174387 1005 +remot 1 15 4.248495 4.248495 1041 +embed 1 14 4.317488 4.317488 1102 +hotlist 2 13 4.382027 8.764054 1199 +primarili 1 13 4.382027 4.382027 1185 +qualit 2 11 4.553877 9.107754 1362 +player 1 11 4.553877 4.553877 1371 +hello 1 10 4.653960 4.653960 1407 +catalog 1 10 4.653960 4.653960 1431 +meta 1 9 4.753590 4.753590 1505 +ring 2 8 4.875197 9.750394 1684 +guitar 2 8 4.875197 9.750394 1758 +autonom 1 8 4.875197 4.875197 1749 +sensor 2 7 5.010635 10.021270 1920 +spot 1 7 5.010635 5.010635 1894 +usenet 1 7 5.010635 5.010635 1839 +race 1 5 5.347108 5.347108 2417 +car 2 4 5.568345 11.136690 2931 +worki 1 4 5.568345 5.568345 3010 +fora 1 4 5.568345 5.568345 2697 +ncsa 1 4 5.568345 5.568345 2767 +motor 1 3 5.857933 5.857933 3909 +badminton 4 2 6.263398 25.053592 5221 +martial 2 2 6.263398 12.526796 5004 +worm 1 2 6.263398 6.263398 5775 +eduperson 1 2 6.263398 6.263398 5776 +ultrason 2 1 6.957497 13.914994 14590 +rhino 2 1 6.957497 13.914994 14591 +robokreta 2 1 6.957497 13.914994 14592 +wyle 2 1 6.957497 13.914994 14593 +clarinet 2 1 6.957497 13.914994 14594 +mobilerobot 1 1 6.957497 6.957497 14595 +rover 1 1 6.957497 6.957497 14596 +tall 1 1 6.957497 6.957497 14597 +robocac 1 1 6.957497 6.957497 14598 +robofest 1 1 6.957497 6.957497 14599 +besar 1 1 6.957497 6.957497 14600 +kicik 1 1 6.957497 6.957497 14601 +chassi 1 1 6.957497 6.957497 14602 +andqualit 1 1 6.957497 6.957497 14603 +malaysia 1 1 6.957497 6.957497 14604 +interestsavid 1 1 6.957497 6.957497 14605 +usba 1 1 6.957497 6.957497 14606 +miscellaneousinterest 1 1 6.957497 6.957497 14607 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ new file mode 100644 index 00000000..d87ff7a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xfeng^ @@ -0,0 +1,9 @@ +term, tf, in documents count, idf, tfidf, wordid +utexa 1 189 1.609438 1.609438 44 +address 2 170 1.791759 3.583518 62 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +austin 1 168 1.791759 1.791759 63 +west 1 83 2.484907 2.484907 192 +xfeng 1 2 6.263398 6.263398 4376 +qaustin 1 1 6.957497 6.957497 14608 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ new file mode 100644 index 00000000..8b897bf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xguo^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 2 380 0.693147 1.386294 9 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +student 2 343 1.098612 2.197224 19 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +welcom 1 122 2.079442 2.079442 99 +well 2 109 2.197225 4.394450 121 +find 1 111 2.197225 2.197225 111 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +pictur 3 89 2.397895 7.193685 160 +search 3 95 2.397895 7.193685 155 +present 1 91 2.397895 2.397895 145 +imag 1 91 2.397895 2.397895 161 +stuff 1 87 2.484907 2.484907 171 +nation 1 74 2.639057 2.639057 240 +line 1 75 2.639057 2.639057 231 +multimedia 1 68 2.708050 2.708050 258 +creat 2 63 2.772589 5.545178 277 +major 1 56 2.890372 2.890372 315 +visual 1 48 3.044522 3.044522 372 +right 1 48 3.044522 3.044522 363 +frequent 1 49 3.044522 3.044522 367 +pointer 1 48 3.044522 3.044522 368 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +china 1 37 3.332205 3.332205 487 +soon 1 36 3.367296 3.367296 494 +express 1 32 3.465736 3.465736 540 +travel 2 30 3.555348 7.110696 579 +hard 1 30 3.555348 3.555348 563 +american 1 27 3.637586 3.637586 634 +sport 1 25 3.737670 3.737670 683 +head 1 23 3.806662 3.806662 732 +watch 1 21 3.912023 3.912023 789 +unit 1 21 3.912023 3.912023 779 +goe 1 15 4.248495 4.248495 1044 +club 1 15 4.248495 4.248495 1058 +classic 1 14 4.317488 4.317488 1084 +audio 1 14 4.317488 4.317488 1094 +rank 1 14 4.317488 4.317488 1063 +hopefulli 1 14 4.317488 4.317488 1071 +pretti 1 13 4.382027 4.382027 1191 +walk 1 12 4.465908 4.465908 1281 +newspap 1 12 4.465908 4.465908 1280 +string 1 11 4.553877 4.553877 1340 +keyword 1 11 4.553877 4.553877 1356 +vista 1 10 4.653960 4.653960 1452 +card 1 10 4.653960 4.653960 1435 +hang 1 9 4.753590 4.753590 1499 +soccer 1 8 4.875197 4.875197 1752 +surpris 2 7 5.010635 10.021270 1828 +harrick 1 7 5.010635 5.010635 1849 +photographi 1 6 5.164786 5.164786 2146 +infoseek 1 6 5.164786 5.164786 2188 +financi 1 6 5.164786 5.164786 2197 +atlant 1 5 5.347108 5.347108 2508 +alta 1 4 5.568345 5.568345 3039 +leagu 1 4 5.568345 5.568345 3040 +aswel 1 3 5.857933 5.857933 3286 +serious 1 3 5.857933 5.857933 3663 +monthli 1 3 5.857933 5.857933 3910 +imagin 1 2 6.263398 6.263398 5472 +clearer 1 2 6.263398 6.263398 5676 +marvel 1 2 6.263398 6.263398 5400 +morn 1 2 6.263398 6.263398 5162 +xingang 2 1 6.957497 13.914994 14609 +delight 1 1 6.957497 6.957497 14610 +temporaryresort 1 1 6.957497 6.957497 14611 +llgradual 1 1 6.957497 6.957497 14612 +havesometh 1 1 6.957497 6.957497 14613 +foliag 1 1 6.957497 6.957497 14614 +miata 1 1 6.957497 6.957497 14615 +xguo 1 1 6.957497 6.957497 14616 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ new file mode 100644 index 00000000..dbad7cfb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^xie^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +address 1 170 1.791759 1.791759 62 +geoffrei 1 3 5.857933 5.857933 3505 +pagemov 1 1 6.957497 6.957497 14617 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ new file mode 100644 index 00000000..38403469 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yangyang^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 3 24 3.761200 11.283600 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +yang 2 8 4.875197 9.750394 1652 +alert 1 5 5.347108 5.347108 2555 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ new file mode 100644 index 00000000..07ecd95b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yau^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +also 1 259 1.386294 1.386294 28 +utexa 1 189 1.609438 1.609438 44 +austin 3 168 1.791759 5.375277 63 +contact 1 153 1.791759 1.791759 59 +texa 1 160 1.791759 1.791759 64 +first 1 140 1.945910 1.945910 71 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +make 1 111 2.197225 2.197225 120 +sinc 1 90 2.397895 2.397895 159 +homepag 1 93 2.397895 2.397895 148 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +david 1 71 2.639057 2.639057 232 +nation 1 74 2.639057 2.639057 240 +street 1 63 2.772589 2.772589 293 +still 1 50 3.044522 3.044522 362 +life 1 50 3.044522 3.044522 375 +get 1 46 3.091042 3.091042 380 +favorit 1 44 3.135494 3.135494 410 +must 1 40 3.258097 3.258097 442 +word 1 34 3.401197 3.401197 508 +quot 1 29 3.583519 3.583519 582 +art 1 29 3.583519 3.583519 593 +hope 1 28 3.610918 3.610918 610 +utc 3 27 3.637586 10.912758 629 +team 1 27 3.637586 3.637586 625 +daili 1 24 3.761200 3.761200 706 +busi 1 21 3.912023 3.912023 784 +tell 1 21 3.912023 3.912023 777 +beauti 1 18 4.060443 4.060443 912 +sept 1 17 4.110874 4.110874 952 +took 1 16 4.174387 4.174387 1010 +classic 2 14 4.317488 8.634976 1084 +hong 1 14 4.317488 4.317488 1105 +stori 1 14 4.317488 4.317488 1087 +introduc 1 13 4.382027 4.382027 1139 +outsid 1 12 4.465908 4.465908 1219 +franc 1 12 4.465908 4.465908 1276 +hello 1 10 4.653960 4.653960 1407 +rich 1 10 4.653960 4.653960 1396 +label 1 10 4.653960 4.653960 1423 +establish 1 9 4.753590 4.753590 1532 +kong 1 9 4.753590 4.753590 1602 +french 1 9 4.753590 4.753590 1511 +guitar 3 8 4.875197 14.625591 1758 +grew 1 8 4.875197 4.875197 1742 +corner 2 7 5.010635 10.021270 1909 +footbal 1 7 5.010635 5.010635 1912 +whatev 1 6 5.164786 5.164786 2097 +artist 1 6 5.164786 5.164786 2127 +seriou 1 5 5.347108 5.347108 2252 +christoph 1 5 5.347108 5.347108 2512 +festiv 1 4 5.568345 5.568345 2952 +tire 1 4 5.568345 5.568345 2799 +align 1 4 5.568345 5.568345 2863 +concert 1 3 5.857933 5.857933 3533 +byth 1 3 5.857933 5.857933 3874 +jesu 1 3 5.857933 5.857933 3624 +passion 1 3 5.857933 5.857933 3633 +michel 1 3 5.857933 5.857933 3791 +medit 1 2 6.263398 6.263398 5777 +retir 1 2 6.263398 6.263398 5674 +christ 1 2 6.263398 6.263398 5766 +wwwdavid 1 1 6.957497 6.957497 14618 +assad 1 1 6.957497 6.957497 14619 +brothersin 1 1 6.957497 6.957497 14620 +parkeningi 1 1 6.957497 6.957497 14621 +guitarist 1 1 6.957497 6.957497 14622 +ofconcert 1 1 6.957497 6.957497 14623 +reconcili 1 1 6.957497 6.957497 14624 +rekindl 1 1 6.957497 6.957497 14625 +theamsterdam 1 1 6.957497 6.957497 14626 +trio 1 1 6.957497 6.957497 14627 +flair 1 1 6.957497 6.957497 14628 +platini 1 1 6.957497 6.957497 14629 +magazinepublish 1 1 6.957497 6.957497 14630 +minist 1 1 6.957497 6.957497 14631 +absmiddl 1 1 6.957497 6.957497 14632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ new file mode 100644 index 00000000..b8d12e28 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ybliu^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +document 1 121 2.079442 2.079442 89 +version 1 113 2.197225 2.197225 122 +messag 1 76 2.564949 2.564949 212 +browser 1 56 2.890372 2.890372 313 +could 1 46 3.091042 3.091042 383 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +challeng 1 26 3.688879 3.688879 653 +frame 3 24 3.761200 11.283600 684 +navig 1 21 3.912023 3.912023 796 +see 1 11 4.553877 4.553877 1337 +alert 1 5 5.347108 5.347108 2555 +yanbin 1 2 6.263398 6.263398 5599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ new file mode 100644 index 00000000..a2f0f3bc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^ykpei^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +us 1 329 1.098612 1.098612 16 +click 1 142 1.945910 1.945910 78 +pleas 1 113 2.197225 2.197225 114 +browser 1 56 2.890372 2.890372 313 +continu 1 39 3.258097 3.258097 448 +oop 1 8 4.875197 4.875197 1778 +yuan 1 3 5.857933 5.857933 3653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ new file mode 100644 index 00000000..157e6584 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yonglu^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 2 168 1.791759 3.583518 63 +texa 1 160 1.791759 1.791759 64 +network 1 168 1.791759 1.791759 61 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +number 1 130 2.079442 2.079442 97 +mathemat 1 108 2.197225 2.197225 123 +place 1 106 2.197225 2.197225 124 +sinc 1 90 2.397895 2.397895 159 +state 1 76 2.564949 2.564949 207 +servic 2 72 2.639057 5.278114 236 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +china 2 37 3.332205 6.664410 487 +travel 1 30 3.555348 3.555348 579 +postal 1 30 3.555348 3.555348 580 +unit 2 21 3.912023 7.824046 779 +beij 2 19 4.007333 8.014666 876 +beauti 1 18 4.060443 4.060443 912 +wife 1 13 4.382027 4.382027 1196 +tsinghua 1 13 4.382027 4.382027 1195 +stai 1 12 4.465908 4.465908 1215 +jersei 1 9 4.753590 4.753590 1587 +heavi 1 7 5.010635 5.010635 1841 +river 1 6 5.164786 5.164786 2220 +yong 4 4 5.568345 22.273380 2809 +rutger 1 3 5.857933 5.857933 3566 +brunswick 1 3 5.857933 5.857933 3567 +settl 1 2 6.263398 6.263398 5778 +homepageto 1 1 6.957497 6.957497 14633 +homepagey 1 1 6.957497 6.957497 14634 +milanitalian 1 1 6.957497 6.957497 14635 +soccerk 1 1 6.957497 6.957497 14636 +soccernba 1 1 6.957497 6.957497 14637 +sitefox 1 1 6.957497 6.957497 14638 +sportschicago 1 1 6.957497 6.957497 14639 +bullsmichael 1 1 6.957497 6.957497 14640 +jordannflnhlc 1 1 6.957497 6.957497 14641 +rankingmarri 1 1 6.957497 6.957497 14642 +childrenseinfeldcomput 1 1 6.957497 6.957497 14643 +sciencesutilitieshtml 1 1 6.957497 6.957497 14644 +convertersimag 1 1 6.957497 6.957497 14645 +collectionssystemshtmllatexcgitcl 1 1 6.957497 6.957497 14646 +tkjavasoftjavaworldgamelanperlmotiforganizationsacmieeecompaniessunmicrosoftibmat 1 1 6.957497 6.957497 14647 +tmiscinternet 1 1 6.957497 6.957497 14648 +parcel 1 1 6.957497 6.957497 14649 +usp 1 1 6.957497 6.957497 14650 +fedexus 1 1 6.957497 6.957497 14651 +guidefun 1 1 6.957497 6.957497 14652 +todayu 1 1 6.957497 6.957497 14653 +newsstarwavesupermodel 1 1 6.957497 6.957497 14654 +yonglu 1 1 6.957497 6.957497 14655 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^young^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^young^ new file mode 100644 index 00000000..146d85ea --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^young^ @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +system 6 443 0.693147 4.158882 6 +research 4 431 0.693147 2.772588 10 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +parallel 5 169 1.791759 8.958795 60 +applic 3 170 1.791759 5.375277 56 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +professor 3 137 1.945910 5.837730 76 +architectur 1 139 1.945910 1.945910 77 +area 1 144 1.945910 1.945910 80 +analysi 2 124 2.079442 4.158884 98 +high 1 130 2.079442 2.079442 101 +mathemat 4 108 2.197225 8.788900 123 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +memori 1 101 2.302585 2.302585 139 +associ 4 93 2.397895 9.591580 151 +search 2 95 2.397895 4.795790 155 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +larg 5 82 2.484907 12.424535 168 +solut 3 82 2.484907 7.454721 162 +journal 2 83 2.484907 4.969814 183 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +level 1 87 2.484907 2.484907 180 +method 6 80 2.564949 15.389694 213 +david 2 71 2.639057 5.278114 232 +appli 2 71 2.639057 5.278114 226 +solv 2 73 2.639057 5.278114 234 +servic 1 72 2.639057 2.639057 236 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +differ 1 66 2.708050 2.708050 253 +degre 1 69 2.708050 2.708050 259 +previou 1 62 2.772589 2.772589 290 +share 1 59 2.833213 2.833213 304 +sever 2 56 2.890372 5.780744 322 +special 1 56 2.890372 2.890372 320 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +profession 1 51 2.995732 2.995732 345 +numer 3 49 3.044522 9.133566 369 +algebra 3 45 3.135494 9.406482 394 +linear 6 41 3.218876 19.313256 431 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +press 1 42 3.218876 3.218876 419 +procedur 1 36 3.367296 3.367296 488 +award 2 34 3.401197 6.802394 523 +committe 1 34 3.401197 3.401197 522 +next 1 34 3.401197 3.401197 517 +board 1 33 3.433987 3.433987 528 +john 1 33 3.433987 3.433987 532 +profil 2 30 3.555348 7.110696 581 +chair 1 29 3.583519 3.583519 596 +focus 1 29 3.583519 3.583519 584 +packag 1 28 3.610918 3.610918 614 +american 2 27 3.637586 7.275172 634 +supercomput 1 25 3.737670 3.737670 681 +fellow 1 24 3.761200 3.761200 701 +equat 3 23 3.806662 11.419986 724 +honor 1 23 3.806662 3.806662 729 +variabl 1 23 3.806662 3.806662 715 +director 1 22 3.850148 3.850148 767 +siam 1 21 3.912023 3.912023 800 +smith 1 20 3.951244 3.951244 820 +partial 1 18 4.060443 4.060443 900 +differenti 1 17 4.110874 4.110874 921 +young 6 16 4.174387 25.046322 991 +spars 3 16 4.174387 12.523161 989 +vector 1 16 4.174387 4.174387 961 +contribut 1 15 4.248495 4.248495 1021 +researchmi 1 14 4.317488 4.317488 1119 +finit 1 14 4.317488 4.317488 1106 +polynomi 1 14 4.317488 4.317488 1069 +iter 6 12 4.465908 26.795448 1206 +matric 2 10 4.653960 9.307920 1399 +suitabl 1 9 4.753590 4.753590 1486 +carei 2 8 4.875197 9.750394 1781 +harvard 1 7 5.010635 5.010635 1926 +converg 1 7 5.010635 5.010635 1844 +solver 1 7 5.010635 5.010635 1911 +outstand 1 6 5.164786 5.164786 2136 +argonn 1 5 5.347108 5.347108 2461 +singapor 1 5 5.347108 5.347108 2487 +ration 1 5 5.347108 5.347108 2427 +minneapoli 1 5 5.347108 5.347108 2480 +crai 2 4 5.568345 11.136690 3012 +naval 1 4 5.568345 5.568345 2920 +rapidli 1 4 5.568345 5.568345 2850 +graham 1 4 5.568345 5.568345 2817 +wilei 1 4 5.568345 5.568345 2669 +truste 1 3 5.857933 5.857933 3900 +stationari 1 3 5.857933 5.857933 3861 +kincaid 3 2 6.263398 18.790194 5617 +ofmathemat 1 2 6.263398 6.263398 4167 +interestnumer 1 2 6.263398 6.263398 5717 +methodsand 1 2 6.263398 6.263398 5779 +itpack 1 2 6.263398 6.263398 5619 +rassia 1 2 6.263398 6.263398 5620 +omega 1 2 6.263398 6.263398 4368 +pde 1 2 6.263398 6.263398 4505 +youngashbel 1 1 6.957497 6.957497 14656 +webb 1 1 6.957497 6.957497 14657 +issueded 1 1 6.957497 6.957497 14658 +mathematicalsocieti 1 1 6.957497 6.957497 14659 +matrixappl 1 1 6.957497 6.957497 14660 +numericallinear 1 1 6.957497 6.957497 14661 +partialdifferenti 1 1 6.957497 6.957497 14662 +oflinear 1 1 6.957497 6.957497 14663 +andspars 1 1 6.957497 6.957497 14664 +basedon 1 1 6.957497 6.957497 14665 +beingextend 1 1 6.957497 6.957497 14666 +distributedmemori 1 1 6.957497 6.957497 14667 +methodsbas 1 1 6.957497 6.957497 14668 +multilevel 1 1 6.957497 6.957497 14669 +beingdevelop 1 1 6.957497 6.957497 14670 +publicationsd 1 1 6.957497 6.957497 14671 +srivasiava 1 1 6.957497 6.957497 14672 +yanushauska 1 1 6.957497 6.957497 14673 +publ 1 1 6.957497 6.957497 14674 +vona 1 1 6.957497 6.957497 14675 +sepehrnoori 1 1 6.957497 6.957497 14676 +son 1 1 6.957497 6.957497 14677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ new file mode 100644 index 00000000..564a4049 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yschoe^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +research 5 431 0.693147 3.465735 10 +depart 3 457 0.693147 2.079441 12 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +email 1 220 1.386294 1.386294 29 +mail 1 238 1.386294 1.386294 22 +group 5 183 1.609438 8.047190 36 +public 3 202 1.609438 4.828314 43 +utexa 2 189 1.609438 3.218876 44 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +austin 7 168 1.791759 12.542313 63 +texa 5 160 1.791759 8.958795 64 +network 2 168 1.791759 3.583518 61 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +model 3 145 1.945910 5.837730 69 +object 3 138 1.945910 5.837730 79 +relat 2 139 1.945910 3.891820 68 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +report 2 131 2.079442 4.158884 92 +welcom 1 122 2.079442 2.079442 99 +structur 3 106 2.197225 6.591675 105 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +book 2 99 2.302585 4.605170 131 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +comment 1 93 2.397895 2.397895 146 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +master 1 76 2.564949 2.564949 216 +html 1 75 2.639057 2.639057 235 +summari 1 73 2.639057 2.639057 237 +august 2 66 2.708050 5.416100 257 +differ 1 66 2.708050 2.708050 253 +organ 5 65 2.772589 13.862945 265 +written 2 63 2.772589 5.545178 278 +prof 2 64 2.772589 5.545178 273 +interact 2 62 2.772589 5.545178 270 +function 2 62 2.772589 5.545178 275 +dept 1 64 2.772589 2.772589 291 +septemb 1 65 2.772589 2.772589 274 +thesi 1 57 2.890372 2.890372 327 +digit 3 52 2.995732 8.987196 348 +maintain 1 51 2.995732 2.995732 342 +visitor 1 49 3.044522 3.044522 371 +featur 2 46 3.091042 6.182084 386 +electron 1 47 3.091042 3.091042 379 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +press 1 42 3.218876 3.218876 419 +multipl 1 39 3.258097 3.258097 453 +map 1 39 3.258097 3.258097 452 +hand 2 37 3.332205 6.664410 475 +connect 1 37 3.332205 3.332205 485 +within 1 33 3.433987 3.433987 525 +ad 1 32 3.465736 3.465736 544 +extend 1 32 3.465736 3.465736 539 +photo 1 31 3.496508 3.496508 561 +neural 3 30 3.555348 10.666044 578 +synchron 1 29 3.583519 3.583519 588 +utc 3 27 3.637586 10.912758 629 +repres 2 26 3.688879 7.377758 656 +recognit 3 23 3.806662 11.419986 723 +self 4 22 3.850148 15.400592 761 +newsgroup 1 21 3.912023 3.912023 783 +event 1 18 4.060443 4.060443 896 +interconnect 3 17 4.110874 12.332622 937 +segment 2 17 4.110874 8.221748 931 +outlin 1 17 4.110874 4.110874 914 +cambridg 1 16 4.174387 4.174387 1008 +later 5 15 4.248495 21.242475 1043 +total 1 10 4.653960 4.653960 1398 +risto 4 9 4.753590 19.014360 1523 +miikkulainen 5 8 4.875197 24.375985 1667 +isbn 1 7 5.010635 5.010635 1901 +bunch 1 7 5.010635 5.010635 1861 +joseph 4 5 5.347108 21.388432 2327 +korea 1 4 5.568345 5.568345 2971 +cortex 2 3 5.857933 11.715866 3856 +seoul 1 3 5.857933 5.857933 3783 +cortic 1 3 5.857933 5.857933 3857 +neuron 1 3 5.857933 5.857933 3798 +yoonsuck 7 2 6.263398 43.843786 4177 +choe 7 2 6.263398 43.843786 4178 +lissom 4 2 6.263398 25.053592 5605 +sirosh 4 2 6.263398 25.053592 5609 +yschoe 2 2 6.263398 12.526796 4179 +touretzki 1 2 6.263398 6.263398 4428 +spike 4 1 6.957497 27.829988 14678 +yonsei 1 1 6.957497 6.957497 14679 +systembas 1 1 6.957497 6.957497 14680 +laterali 1 1 6.957497 6.957497 14681 +synerget 1 1 6.957497 6.957497 14682 +actualspik 1 1 6.957497 6.957497 14683 +slissom 1 1 6.957497 6.957497 14684 +beself 1 1 6.957497 6.957497 14685 +retinabi 1 1 6.957497 6.957497 14686 +desynchron 1 1 6.957497 6.957497 14687 +mozer 1 1 6.957497 6.957497 14688 +hasselmo 1 1 6.957497 6.957497 14689 +handwritten 1 1 6.957497 6.957497 14690 +techic 1 1 6.957497 6.957497 14691 +unord 1 1 6.957497 6.957497 14692 +interestingcontact 1 1 6.957497 6.957497 14693 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ new file mode 100644 index 00000000..e7f002f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yuanjing^ @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +student 1 343 1.098612 1.098612 19 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +austin 3 168 1.791759 5.375277 63 +texa 2 160 1.791759 3.583518 64 +develop 1 174 1.791759 1.791759 53 +object 1 138 1.945910 1.945910 79 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +mathemat 1 108 2.197225 2.197225 123 +select 1 91 2.397895 2.397895 154 +associ 1 93 2.397895 2.397895 151 +institut 1 84 2.484907 2.484907 187 +orient 1 80 2.564949 2.564949 205 +server 1 76 2.564949 2.564949 204 +interfac 1 79 2.564949 2.564949 209 +logic 1 71 2.639057 2.639057 230 +java 2 70 2.708050 5.416100 248 +previou 1 62 2.772589 2.772589 290 +prof 1 64 2.772589 2.772589 273 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +tabl 1 51 2.995732 2.995732 346 +china 4 37 3.332205 13.328820 487 +common 1 30 3.555348 3.555348 574 +chines 1 29 3.583519 3.583519 595 +client 1 25 3.737670 3.737670 679 +yahoo 1 24 3.761200 3.761200 707 +famili 2 23 3.806662 7.613324 735 +alumni 1 21 3.912023 3.912023 807 +wang 1 21 3.912023 3.912023 790 +chen 1 21 3.912023 3.912023 791 +break 1 20 3.951244 3.951244 812 +synthesi 1 20 3.951244 3.951244 834 +beij 1 19 4.007333 4.007333 876 +brown 1 16 4.174387 4.174387 977 +universityof 1 15 4.248495 4.248495 1061 +hong 1 14 4.317488 4.317488 1105 +nick 1 13 4.382027 4.382027 1180 +perl 1 11 4.553877 4.553877 1332 +engr 1 10 4.653960 4.653960 1427 +academi 1 8 4.875197 4.875197 1735 +yang 1 8 4.875197 4.875197 1652 +gatewai 1 7 5.010635 5.010635 1942 +munich 1 3 5.857933 5.857933 3570 +yuan 1 3 5.857933 5.857933 3653 +manchest 2 2 6.263398 12.526796 4828 +addresspictur 1 2 6.263398 6.263398 5584 +schedulec 1 2 6.263398 6.263398 4190 +gang 1 2 6.263398 6.263398 4530 +yuanj 1 1 6.957497 6.957497 14694 +xuwint 1 1 6.957497 6.957497 14695 +aziz 1 1 6.957497 6.957497 14696 +pagechines 1 1 6.957497 6.957497 14697 +hefei 1 1 6.957497 6.957497 14698 +chinaunivers 1 1 6.957497 6.957497 14699 +atmunich 1 1 6.957497 6.957497 14700 +germanyunivers 1 1 6.957497 6.957497 14701 +higham 1 1 6.957497 6.957497 14702 +lifan 1 1 6.957497 6.957497 14703 +guizhongustc 1 1 6.957497 6.957497 14704 +hailiang 1 1 6.957497 6.957497 14705 +yuhongfriend 1 1 6.957497 6.957497 14706 +linsoftwar 1 1 6.957497 6.957497 14707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ new file mode 100644 index 00000000..09f2554f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^yufeng^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +last 1 314 1.098612 1.098612 14 +utexa 1 189 1.609438 1.609438 44 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +number 1 130 2.079442 2.079442 97 +sinc 1 90 2.397895 2.397895 159 +visitor 1 49 3.044522 3.044522 371 +ring 1 8 4.875197 4.875197 1684 +edufing 1 4 5.568345 5.568345 2713 +feng 2 3 5.857933 11.715866 3300 +yufeng 1 1 6.957497 6.957497 14708 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ new file mode 100644 index 00000000..1c7f8755 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zchen^ @@ -0,0 +1,69 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +fall 2 181 1.609438 3.218876 40 +utexa 1 189 1.609438 1.609438 44 +modifi 1 178 1.609438 1.609438 35 +austin 3 168 1.791759 5.375277 63 +texa 1 160 1.791759 1.791759 64 +contact 1 153 1.791759 1.791759 59 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +spring 2 131 2.079442 4.158884 88 +pleas 1 113 2.197225 2.197225 114 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +peopl 1 96 2.302585 2.302585 132 +associ 2 93 2.397895 4.795790 151 +sinc 1 90 2.397895 2.397895 159 +wide 1 84 2.484907 2.484907 185 +info 1 85 2.484907 2.484907 176 +resum 2 79 2.564949 5.129898 217 +master 1 76 2.564949 2.564949 216 +view 1 70 2.708050 2.708050 254 +differ 1 66 2.708050 2.708050 253 +creat 1 63 2.772589 2.772589 277 +maintain 1 51 2.995732 2.995732 342 +format 1 48 3.044522 3.044522 356 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +still 1 50 3.044522 3.044522 362 +visitor 1 49 3.044522 3.044522 371 +china 3 37 3.332205 9.996615 487 +copyright 1 36 3.367296 3.367296 495 +john 1 33 3.433987 3.433987 532 +kind 1 32 3.465736 3.465736 541 +anim 2 31 3.496508 6.993016 557 +chines 2 29 3.583519 7.167038 595 +full 1 28 3.610918 3.610918 615 +thank 1 23 3.806662 3.806662 721 +chen 2 21 3.912023 7.824046 791 +els 1 19 4.007333 4.007333 843 +seek 1 17 4.110874 4.110874 954 +misc 1 13 4.382027 4.382027 1124 +calculu 1 12 4.465908 4.465908 1203 +counter 1 8 4.875197 4.875197 1765 +accord 1 7 5.010635 5.010635 1826 +republ 1 4 5.568345 5.568345 3032 +zhongshan 2 2 6.263398 12.526796 5547 +chenabout 1 2 6.263398 6.263398 5499 +postcript 1 2 6.263398 6.263398 4050 +zodiac 1 2 6.263398 6.263398 5729 +burton 1 2 6.263398 6.263398 5759 +zhii 2 1 6.957497 13.914994 14709 +guangzhou 2 1 6.957497 13.914994 14710 +pagezhii 1 1 6.957497 6.957497 14711 +mefrom 1 1 6.957497 6.957497 14712 +canton 1 1 6.957497 6.957497 14713 +dong 1 1 6.957497 6.957497 14714 +zchen 1 1 6.957497 6.957497 14715 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ new file mode 100644 index 00000000..81561446 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhouxiao^ @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +utexa 3 189 1.609438 4.828314 44 +fall 2 181 1.609438 3.218876 40 +modifi 1 178 1.609438 1.609438 35 +austin 4 168 1.791759 7.167036 63 +texa 2 160 1.791759 3.583518 64 +algorithm 1 162 1.791759 1.791759 57 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +phone 1 175 1.791759 1.791759 45 +address 1 170 1.791759 1.791759 62 +process 1 142 1.945910 1.945910 72 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +welcom 1 122 2.079442 2.079442 99 +world 3 115 2.197225 6.591675 126 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +internet 2 83 2.484907 4.969814 186 +educ 1 86 2.484907 2.484907 191 +ieee 1 86 2.484907 2.484907 190 +librari 1 87 2.484907 2.484907 181 +build 1 85 2.484907 2.484907 184 +onlin 2 75 2.639057 5.278114 223 +html 1 75 2.639057 2.639057 235 +multimedia 1 68 2.708050 2.708050 258 +main 1 67 2.708050 2.708050 256 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +guid 1 63 2.772589 2.772589 267 +taylor 1 63 2.772589 2.772589 287 +room 1 59 2.833213 2.833213 301 +life 1 50 3.044522 3.044522 375 +directori 1 45 3.135494 3.135494 396 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +movi 1 40 3.258097 3.258097 459 +societi 1 40 3.258097 3.258097 456 +china 3 37 3.332205 9.996615 487 +chines 1 29 3.583519 3.583519 595 +campu 3 27 3.637586 10.912758 623 +magazin 3 24 3.761200 11.283600 704 +daili 1 24 3.761200 3.761200 706 +yahoo 1 24 3.761200 3.761200 707 +beij 1 19 4.007333 4.007333 876 +lyco 1 19 4.007333 4.007333 871 +beauti 1 18 4.060443 4.060443 912 +sept 1 17 4.110874 4.110874 952 +script 1 13 4.382027 4.382027 1171 +land 1 12 4.465908 4.465908 1273 +entertain 1 12 4.465908 4.465908 1286 +career 1 12 4.465908 4.465908 1287 +sciencesat 1 7 5.010635 5.010635 1968 +digest 1 7 5.010635 5.010635 1864 +zhou 2 6 5.164786 10.329572 2092 +assistantship 1 3 5.857933 5.857933 3660 +stamp 1 3 5.857933 5.857933 3678 +giant 1 3 5.857933 5.857933 3137 +galaxi 1 3 5.857933 5.857933 3603 +kaleidoscop 1 2 6.263398 6.263398 5780 +zhai 1 2 6.263398 6.263398 5709 +zhouxiao 3 1 6.957497 20.872491 14716 +maggi 2 1 6.957497 13.914994 14717 +xiao 2 1 6.957497 13.914994 14718 +buaa 1 1 6.957497 6.957497 14719 +newspag 1 1 6.957497 6.957497 14720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ new file mode 100644 index 00000000..5e7a6c5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.utexas.edu^users^zhuqing^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +student 2 343 1.098612 2.197224 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +utexa 1 189 1.609438 1.609438 44 +texa 3 160 1.791759 5.375277 64 +austin 3 168 1.791759 5.375277 63 +like 1 132 1.945910 1.945910 81 +construct 1 139 1.945910 1.945910 82 +document 4 121 2.079442 8.317768 89 +welcom 1 122 2.079442 2.079442 99 +number 1 130 2.079442 2.079442 97 +site 3 106 2.197225 6.591675 119 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +homepag 2 93 2.397895 4.795790 148 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +dynam 4 76 2.564949 10.259796 194 +know 2 80 2.564949 5.129898 198 +meet 1 72 2.639057 2.639057 229 +html 1 75 2.639057 2.639057 235 +degre 1 69 2.708050 2.708050 259 +best 1 59 2.833213 2.833213 299 +maintain 1 51 2.995732 2.995732 342 +friend 3 48 3.044522 9.133566 376 +visitor 1 49 3.044522 3.044522 371 +netscap 2 44 3.135494 6.270988 395 +anoth 1 45 3.135494 3.135494 408 +live 1 40 3.258097 3.258097 451 +china 3 37 3.332205 9.996615 487 +titl 1 31 3.496508 3.496508 556 +travel 1 30 3.555348 3.555348 579 +enjoi 1 26 3.688879 3.688879 660 +inth 1 22 3.850148 3.850148 741 +born 1 21 3.912023 3.912023 798 +alumni 1 21 3.912023 3.912023 807 +beij 1 19 4.007333 4.007333 876 +citi 1 19 4.007333 4.007333 874 +lot 1 18 4.060443 4.060443 889 +former 2 17 4.110874 8.221748 956 +bachelor 1 17 4.110874 4.110874 957 +danc 1 12 4.465908 4.465908 1278 +classmat 2 9 4.753590 9.507180 1516 +capit 1 7 5.010635 5.010635 1957 +peek 1 6 5.164786 5.164786 2169 +peke 2 5 5.347108 10.694216 2539 +valuabl 1 5 5.347108 5.347108 2256 +qing 1 3 5.857933 5.857933 3295 +ofmi 1 3 5.857933 5.857933 3911 +sciencesaustin 1 3 5.857933 5.857933 3828 +oversea 1 2 6.263398 6.263398 5781 +informationand 1 2 6.263398 6.263398 4840 +clike 2 1 6.957497 13.914994 14721 +qinghi 1 1 6.957497 6.957497 14722 +scinc 1 1 6.957497 6.957497 14723 +findmor 1 1 6.957497 6.957497 14724 +pekingunivers 1 1 6.957497 6.957497 14725 +professionalinternetpc 1 1 6.957497 6.957497 14726 +relatedmac 1 1 6.957497 6.957497 14727 +relatedaft 1 1 6.957497 6.957497 14728 +worknetscap 1 1 6.957497 6.957497 14729 +testtwin 1 1 6.957497 6.957497 14730 +eldertwin 1 1 6.957497 6.957497 14731 +youngernetscap 1 1 6.957497 6.957497 14732 +testanim 1 1 6.957497 6.957497 14733 +titledanc 1 1 6.957497 6.957497 14734 +testanoth 1 1 6.957497 6.957497 14735 +testyet 1 1 6.957497 6.957497 14736 +testfriendsthi 1 1 6.957497 6.957497 14737 +xiaohai 1 1 6.957497 6.957497 14738 +shan 1 1 6.957497 6.957497 14739 +shinan 1 1 6.957497 6.957497 14740 +qingunivers 1 1 6.957497 6.957497 14741 +zhuqe 1 1 6.957497 6.957497 14742 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^ new file mode 100644 index 00000000..8be57250 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^ @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +student 3 343 1.098612 3.295836 19 +cours 1 273 1.098612 1.098612 15 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +click 1 142 1.945910 1.945910 78 +technolog 1 131 2.079442 2.079442 102 +seattl 1 120 2.079442 2.079442 103 +intern 1 108 2.197225 2.197225 128 +peopl 1 96 2.302585 2.302585 132 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +refer 1 78 2.564949 2.564949 203 +nation 1 74 2.639057 2.639057 240 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +laboratori 1 63 2.772589 2.772589 292 +faculti 2 56 2.890372 5.780744 325 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +visitor 1 49 3.044522 3.044522 371 +live 1 40 3.258097 3.258097 451 +staff 1 36 3.367296 3.367296 490 +autumn 1 31 3.496508 3.496508 558 +univ 1 28 3.610918 3.610918 617 +progress 1 28 3.610918 3.610918 598 +handl 1 24 3.761200 3.761200 685 +half 1 21 3.912023 3.912023 776 +voic 1 21 3.912023 3.912023 806 +region 2 19 4.007333 8.014666 875 +intel 1 16 4.174387 4.174387 1000 +webmast 1 15 4.248495 4.248495 1045 +desktop 1 10 4.653960 4.653960 1445 +perspect 1 10 4.653960 4.653960 1437 +deadlin 1 9 4.753590 4.753590 1502 +pacif 1 8 4.875197 4.875197 1674 +centuri 1 7 5.010635 5.010635 1935 +elsewher 1 5 5.347108 5.347108 2444 +highlight 1 5 5.347108 5.347108 2340 +medal 1 3 5.857933 5.857933 3912 +theimpact 1 3 5.857933 5.857933 3179 +exponenti 1 3 5.857933 5.857933 3529 +organizationsinclud 1 1 6.957497 6.957497 14743 +ouraffili 1 1 6.957497 6.957497 14744 +regioninclud 1 1 6.957497 6.957497 14745 +spotlightuwwin 1 1 6.957497 6.957497 14746 +programmingcontesttwovideo 1 1 6.957497 6.957497 14747 +initiativesourcolloquia 1 1 6.957497 6.957497 14748 +mbonemajordon 1 1 6.957497 6.957497 14749 +corporationdickkarp 1 1 6.957497 6.957497 14750 +scienceprofessionalmast 1 1 6.957497 6.957497 14751 +departmentoverview 1 1 6.957497 6.957497 14752 +staffposit 1 1 6.957497 6.957497 14753 +newscan 1 1 6.957497 6.957497 14754 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ new file mode 100644 index 00000000..e57c179b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^aberman^ @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +data 1 170 1.791759 1.791759 49 +avail 1 169 1.791759 1.791759 48 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +postscript 2 131 2.079442 4.158884 90 +confer 1 126 2.079442 2.079442 100 +specif 1 106 2.197225 2.197225 106 +structur 1 106 2.197225 2.197225 105 +proceed 1 93 2.397895 2.397895 152 +imag 1 91 2.397895 2.397895 161 +appear 1 78 2.564949 2.564949 210 +effici 1 73 2.639057 2.639057 233 +unix 1 58 2.890372 2.890372 308 +special 1 56 2.890372 2.890372 320 +format 2 48 3.044522 6.089044 356 +fast 1 42 3.218876 3.218876 429 +multipl 1 39 3.258097 3.258097 453 +winter 1 36 3.367296 3.367296 500 +approxim 1 35 3.401197 3.401197 509 +measur 1 28 3.610918 3.610918 609 +retriev 1 27 3.637586 3.637586 621 +bookmark 1 26 3.688879 3.688879 639 +miscellan 1 23 3.806662 3.806662 731 +andrew 3 19 4.007333 12.021999 849 +beauti 1 18 4.060443 4.060443 912 +protect 1 17 4.110874 4.110874 935 +match 1 16 4.174387 4.174387 965 +wife 1 13 4.382027 4.382027 1196 +usenix 1 12 4.465908 4.465908 1240 +linda 1 10 4.653960 4.653960 1394 +distanc 1 9 4.753590 4.753590 1500 +erik 1 8 4.875197 4.875197 1701 +shapiro 1 8 4.875197 4.875197 1686 +daughter 1 7 5.010635 5.010635 1943 +spie 1 6 5.164786 5.164786 2119 +educomput 1 5 5.347108 5.347108 2524 +selberg 1 5 5.347108 5.347108 2441 +stupid 1 5 5.347108 5.347108 2489 +aberman 1 2 6.263398 6.263398 4429 +bourassa 1 2 6.263398 6.263398 5782 +virgil 1 2 6.263398 6.263398 5783 +melani 1 2 6.263398 6.263398 5784 +berman 4 1 6.957497 27.829988 14755 +debbi 2 1 6.957497 13.914994 14756 +pageandrew 1 1 6.957497 6.957497 14757 +tron 1 1 6.957497 6.957497 14758 +bothpostscript 1 1 6.957497 6.957497 14759 +andhtml 1 1 6.957497 6.957497 14760 +poison 1 1 6.957497 6.957497 14761 +donut 1 1 6.957497 6.957497 14762 +stupidmi 1 1 6.957497 6.957497 14763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ new file mode 100644 index 00000000..39c45e90 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^adam^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +washington 3 236 1.386294 4.158882 32 +softwar 3 220 1.386294 4.158882 30 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +recent 2 167 1.791759 3.583518 58 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +seattl 2 120 2.079442 4.158884 103 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +specif 1 106 2.197225 2.197225 106 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +imag 3 91 2.397895 7.193685 161 +graphic 1 90 2.397895 2.397895 147 +sinc 1 90 2.397895 2.397895 159 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +good 1 77 2.564949 2.564949 200 +view 1 70 2.708050 2.708050 254 +visit 2 63 2.772589 5.545178 288 +plan 1 65 2.772589 2.772589 272 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +undergradu 1 54 2.944439 2.944439 338 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +physic 1 47 3.091042 3.091042 377 +made 2 44 3.135494 6.270988 398 +video 1 44 3.135494 3.135494 405 +live 1 40 3.258097 3.258097 451 +join 1 39 3.258097 3.258097 457 +post 1 35 3.401197 3.401197 505 +least 1 35 3.401197 3.401197 516 +someth 1 31 3.496508 3.496508 554 +photo 1 31 3.496508 3.496508 561 +taken 1 31 3.496508 3.496508 555 +pass 1 28 3.610918 3.610918 611 +team 1 27 3.637586 3.637586 625 +great 1 27 3.637586 3.637586 626 +doctor 1 24 3.761200 3.761200 709 +finish 1 22 3.850148 3.850148 748 +hous 1 21 3.912023 3.912023 801 +wrote 1 20 3.951244 3.951244 830 +excel 1 19 4.007333 4.007333 868 +adam 3 17 4.110874 12.332622 934 +ultim 1 17 4.110874 4.110874 943 +earli 1 16 4.174387 4.174387 968 +stock 1 16 4.174387 4.174387 1007 +across 1 16 4.174387 4.174387 974 +bodi 1 13 4.382027 4.382027 1178 +menu 1 13 4.382027 4.382027 1156 +night 1 11 4.553877 4.553877 1319 +thecomput 1 10 4.653960 4.653960 1408 +drink 1 9 4.753590 4.753590 1607 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +trade 1 7 5.010635 5.010635 1815 +occasion 1 7 5.010635 5.010635 1905 +coffe 2 5 5.347108 10.694216 2556 +hair 1 5 5.347108 5.347108 2446 +formerli 1 5 5.347108 5.347108 2397 +multiresolut 1 5 5.347108 5.347108 2423 +frisbe 1 5 5.347108 5.347108 2560 +glass 1 3 5.857933 5.857933 3759 +swarthmor 1 2 6.263398 6.263398 5621 +comet 1 2 6.263398 6.263398 5785 +sculptur 1 2 6.263398 6.263398 5176 +mona 1 2 6.263398 6.263398 5786 +gothic 1 2 6.263398 6.263398 5787 +finkelstein 2 1 6.957497 13.914994 14764 +photocopi 2 1 6.957497 13.914994 14765 +cup 1 1 6.957497 6.957497 14766 +limp 1 1 6.957497 6.957497 14767 +andlack 1 1 6.957497 6.957497 14768 +atprinceton 1 1 6.957497 6.957497 14769 +tibco 1 1 6.957497 6.957497 14770 +teknekron 1 1 6.957497 6.957497 14771 +alarg 1 1 6.957497 6.957497 14772 +calledumatata 1 1 6.957497 6.957497 14773 +thehilari 1 1 6.957497 6.957497 14774 +caff 1 1 6.957497 6.957497 14775 +lardo 1 1 6.957497 6.957497 14776 +chilli 1 1 6.957497 6.957497 14777 +snoqualmi 1 1 6.957497 6.957497 14778 +hyakutak 1 1 6.957497 6.957497 14779 +marcu 1 1 6.957497 6.957497 14780 +dither 1 1 6.957497 6.957497 14781 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ new file mode 100644 index 00000000..abdf35af --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^ @@ -0,0 +1,147 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +system 6 443 0.693147 4.158882 6 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +design 2 213 1.386294 2.772588 25 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 4 205 1.609438 6.437752 38 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +data 13 170 1.791759 23.292867 49 +parallel 12 169 1.791759 21.501108 60 +algorithm 5 162 1.791759 8.958795 57 +base 4 165 1.791759 7.167036 50 +address 2 170 1.791759 3.583518 62 +applic 2 170 1.791759 3.583518 56 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +perform 4 143 1.945910 7.783640 74 +model 4 145 1.945910 7.783640 69 +process 2 142 1.945910 3.891820 72 +databas 5 122 2.079442 10.397210 86 +machin 2 129 2.079442 4.158884 95 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +provid 1 121 2.079442 2.079442 94 +manag 3 114 2.197225 6.591675 125 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +need 1 98 2.302585 2.302585 135 +present 3 91 2.397895 7.193685 145 +proceed 1 93 2.397895 2.397895 152 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +environ 1 84 2.484907 2.484907 177 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +requir 1 81 2.484907 2.484907 167 +larg 1 82 2.484907 2.484907 168 +method 2 80 2.564949 5.129898 213 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +nation 1 74 2.639057 2.639057 240 +workshop 1 71 2.639057 2.639057 239 +involv 1 71 2.639057 2.639057 227 +august 1 66 2.708050 2.708050 257 +experi 3 64 2.772589 8.317767 283 +laboratori 2 63 2.772589 5.545178 292 +improv 2 62 2.772589 5.545178 289 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +share 1 59 2.833213 2.833213 304 +simpl 1 60 2.833213 2.833213 298 +scientif 4 53 2.944439 11.777756 341 +februari 1 54 2.944439 2.944439 328 +found 1 53 2.944439 2.944439 337 +visual 5 48 3.044522 15.222610 372 +set 2 50 3.044522 6.089044 361 +effect 3 46 3.091042 9.273126 385 +possibl 1 47 3.091042 3.091042 378 +describ 2 45 3.135494 6.270988 400 +show 1 43 3.178054 3.178054 417 +vision 5 41 3.218876 16.094380 430 +fast 3 42 3.218876 9.656628 429 +cost 4 37 3.332205 13.328820 480 +jame 7 35 3.401197 23.808379 507 +post 1 35 3.401197 3.401197 505 +queri 1 33 3.433987 3.433987 524 +scientist 1 31 3.496508 3.496508 560 +load 9 28 3.610918 32.498262 601 +intend 1 28 3.610918 3.610918 599 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +supercomput 1 25 3.737670 3.737670 681 +handl 1 24 3.761200 3.761200 685 +highli 1 23 3.806662 3.806662 725 +displai 1 23 3.806662 3.806662 712 +ofwashington 1 22 3.850148 3.850148 766 +avoid 1 21 3.912023 3.912023 799 +facil 1 20 3.951244 3.951244 814 +toolkit 1 20 3.951244 3.951244 835 +longer 1 20 3.951244 3.951244 816 +aid 1 18 4.060443 4.060443 904 +render 6 17 4.110874 24.665244 947 +steven 1 17 4.110874 4.110874 953 +massiv 1 15 4.248495 4.248495 1026 +balanc 7 14 4.317488 30.222416 1112 +save 1 14 4.317488 4.317488 1099 +charl 3 13 4.382027 13.146081 1149 +directli 1 13 4.382027 4.382027 1141 +promot 1 12 4.465908 4.465908 1235 +target 1 12 4.465908 4.465908 1282 +amount 1 12 4.465908 4.465908 1208 +typic 1 11 4.553877 4.553877 1360 +motiv 1 11 4.553877 4.553877 1346 +extrem 1 11 4.553877 4.553877 1330 +mesh 1 11 4.553877 4.553877 1351 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +occur 1 9 4.753590 4.753590 1572 +factor 1 9 4.753590 4.753590 1544 +frank 1 9 4.753590 4.753590 1568 +transmiss 1 9 4.753590 4.753590 1588 +polygon 5 8 4.875197 24.375985 1723 +shapiro 1 8 4.875197 4.875197 1686 +lewi 1 8 4.875197 4.875197 1700 +unifi 1 8 4.875197 4.875197 1774 +dataset 4 7 5.010635 20.042540 1914 +shade 2 7 5.010635 10.021270 1881 +huge 1 6 5.164786 5.164786 1991 +unnecessari 1 5 5.347108 5.347108 2506 +ortega 1 5 5.347108 5.347108 2559 +icpp 1 5 5.347108 5.347108 2382 +loss 1 3 5.857933 5.857933 3805 +jakobovit 1 3 5.857933 5.857933 3913 +lara 1 3 5.857933 5.857933 3914 +ahren 7 2 6.263398 43.843786 5788 +redistribut 4 2 6.263398 25.053592 5582 +hansen 3 2 6.263398 18.790194 4301 +alamo 1 2 6.263398 6.263398 4243 +degrad 1 2 6.263398 6.263398 5362 +brinklei 1 2 6.263398 6.263398 5789 +notebook 1 2 6.263398 6.263398 5790 +arbitrarili 1 2 6.263398 6.263398 5791 +onunbalanc 2 1 6.957497 13.914994 14782 +whichperform 1 1 6.957497 6.957497 14783 +outweighth 1 1 6.957497 6.957497 14784 +polygonrender 1 1 6.957497 6.957497 14785 +percent 1 1 6.957497 6.957497 14786 +onbalanc 1 1 6.957497 6.957497 14787 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ new file mode 100644 index 00000000..36a06ca4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ahrens^DEVR^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 6 431 0.693147 4.158882 10 +system 2 443 0.693147 1.386294 6 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +base 3 165 1.791759 5.375277 50 +data 2 170 1.791759 3.583518 49 +implement 1 152 1.791759 1.791759 52 +model 2 145 1.945910 3.891820 69 +relat 2 139 1.945910 3.891820 68 +support 1 132 1.945910 1.945910 83 +databas 7 122 2.079442 14.556094 86 +technolog 2 131 2.079442 4.158884 102 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +manag 2 114 2.197225 4.394450 125 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +imag 3 91 2.397895 7.193685 161 +graphic 2 90 2.397895 4.795790 147 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +proceed 1 93 2.397895 2.397895 152 +present 1 91 2.397895 2.397895 145 +environ 4 84 2.484907 9.939628 177 +level 2 87 2.484907 4.969814 180 +contain 1 81 2.484907 2.484907 174 +interfac 2 79 2.564949 5.129898 209 +symposium 2 72 2.639057 5.278114 238 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +differ 1 66 2.708050 2.708050 253 +experi 2 64 2.772589 5.545178 283 +interact 1 62 2.772589 2.772589 270 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +share 1 59 2.833213 2.833213 304 +overview 2 56 2.890372 5.780744 323 +februari 3 54 2.944439 8.833317 328 +scientif 2 53 2.944439 5.888878 341 +investig 1 51 2.995732 2.995732 353 +visual 5 48 3.044522 15.222610 372 +electron 2 47 3.091042 6.182084 379 +describ 1 45 3.135494 3.135494 400 +vision 9 41 3.218876 28.969884 430 +multi 2 36 3.367296 6.734592 493 +jame 3 35 3.401197 10.203591 507 +queri 2 33 3.433987 6.867974 524 +experiment 1 26 3.688879 3.688879 645 +store 1 24 3.761200 3.761200 693 +highli 1 23 3.806662 3.806662 725 +among 1 21 3.912023 3.912023 781 +definit 1 19 4.007333 4.007333 864 +aid 1 18 4.060443 4.060443 904 +steven 4 17 4.110874 16.443496 953 +hierarch 1 15 4.248495 4.248495 1018 +attribut 1 14 4.317488 4.317488 1092 +motiv 1 11 4.553877 4.553877 1346 +linda 4 10 4.653960 18.615840 1394 +tanimoto 4 10 4.653960 18.615840 1429 +princip 1 10 4.653960 4.653960 1397 +shapiro 4 8 4.875197 19.500788 1686 +lewi 3 8 4.875197 14.625591 1700 +databasesystem 1 8 4.875197 4.875197 1617 +spie 2 6 5.164786 10.329572 2119 +schema 1 6 5.164786 5.164786 1988 +scienceand 1 5 5.347108 5.347108 2348 +jakobovit 3 3 5.857933 17.573799 3913 +lara 3 3 5.857933 17.573799 3914 +entiti 2 3 5.857933 11.715866 3096 +ahren 3 2 6.263398 18.790194 5788 +brinklei 2 2 6.263398 12.526796 5789 +notebook 1 2 6.263398 6.263398 5790 +databaseenviron 1 2 6.263398 6.263398 5792 +datastructur 1 2 6.263398 6.263398 4685 +devr 2 1 6.957497 13.914994 14788 +wasdesign 1 1 6.957497 6.957497 14789 +andintend 1 1 6.957497 6.957497 14790 +unifieddata 1 1 6.957497 6.957497 14791 +queryfacil 1 1 6.957497 6.957497 14792 +andpromot 1 1 6.957497 6.957497 14793 +ofproperti 1 1 6.957497 6.957497 14794 +thepart 1 1 6.957497 6.957497 14795 +buildinst 1 1 6.957497 6.957497 14796 +inmodel 1 1 6.957497 6.957497 14797 +secondcad 1 1 6.957497 6.957497 14798 +flexibledata 1 1 6.957497 6.957497 14799 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ new file mode 100644 index 00000000..868086f9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ajb^ @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +link 4 247 1.386294 5.545176 24 +graduat 3 215 1.386294 4.158882 31 +washington 2 236 1.386294 2.772588 32 +design 1 213 1.386294 1.386294 25 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +graphic 1 90 2.397895 2.397895 147 +center 1 88 2.397895 2.397895 158 +school 2 84 2.484907 4.969814 188 +educ 2 86 2.484907 4.969814 191 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +nation 1 74 2.639057 2.639057 240 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +back 1 60 2.833213 2.833213 297 +allow 1 53 2.944439 2.944439 333 +undergradu 1 54 2.944439 2.944439 338 +still 1 50 3.044522 3.044522 362 +principl 1 48 3.044522 3.044522 357 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +math 1 44 3.135494 3.135494 402 +past 1 42 3.218876 3.218876 428 +seminar 1 38 3.295837 3.295837 470 +autumn 1 31 3.496508 3.496508 558 +great 2 27 3.637586 7.275172 626 +william 1 22 3.850148 3.850148 765 +programminglanguag 1 21 3.912023 3.912023 782 +spend 1 19 4.007333 4.007333 850 +scott 1 18 4.060443 4.060443 884 +ultim 1 17 4.110874 4.110874 943 +women 2 16 4.174387 8.348774 1004 +front 1 13 4.382027 4.382027 1154 +automata 1 13 4.382027 4.382027 1135 +opportun 1 13 4.382027 4.382027 1161 +undergrad 1 9 4.753590 4.753590 1589 +christian 1 7 5.010635 5.010635 1949 +peterson 1 7 5.010635 5.010635 1850 +park 1 6 5.164786 5.164786 2218 +outdoor 2 5 5.347108 10.694216 2514 +frisbe 1 5 5.347108 5.347108 2560 +mentor 1 4 5.568345 5.568345 2591 +pile 1 2 6.263398 6.263398 5371 +blast 1 2 6.263398 6.263398 5172 +femal 1 2 6.263398 6.263398 4672 +pagelast 1 2 6.263398 6.263398 5793 +mentorship 2 1 6.957497 13.914994 14800 +bernheim 1 1 6.957497 6.957497 14801 +ofdigit 1 1 6.957497 6.957497 14802 +gorp 1 1 6.957497 6.957497 14803 +guideto 1 1 6.957497 6.957497 14804 +recreationfun 1 1 6.957497 6.957497 14805 +abig 1 1 6.957497 6.957497 14806 +scoobi 1 1 6.957497 6.957497 14807 +dooeduc 1 1 6.957497 6.957497 14808 +summerwork 1 1 6.957497 6.957497 14809 +highlyrecommend 1 1 6.957497 6.957497 14810 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ new file mode 100644 index 00000000..a8e32154 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^amir^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +algorithm 3 162 1.791759 5.375277 57 +recent 1 167 1.791759 1.791759 58 +like 2 132 1.945910 3.891820 81 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +postscript 1 131 2.079442 2.079442 90 +teach 2 108 2.197225 4.394450 112 +make 2 111 2.197225 4.394450 120 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +master 2 76 2.564949 5.129898 216 +optim 1 79 2.564949 2.564949 197 +write 1 72 2.639057 2.639057 222 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +simpl 1 60 2.833213 2.833213 298 +thesi 1 57 2.890372 2.890372 327 +explor 1 58 2.890372 2.890372 324 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +found 1 53 2.944439 2.944439 337 +particular 1 51 2.995732 2.995732 352 +better 1 45 3.135494 3.135494 401 +might 1 41 3.218876 3.218876 426 +error 2 40 3.258097 6.516194 449 +tree 1 36 3.367296 3.367296 492 +obtain 1 33 3.433987 3.433987 534 +scientist 3 31 3.496508 10.489524 560 +anim 1 31 3.496508 3.496508 557 +often 1 31 3.496508 3.496508 551 +built 1 29 3.583519 3.583519 592 +consid 1 29 3.583519 3.583519 590 +wai 2 25 3.737670 7.475340 662 +proof 4 23 3.806662 15.226648 720 +compress 1 23 3.806662 3.806662 719 +theorem 1 21 3.912023 3.912023 786 +avoid 1 21 3.912023 3.912023 799 +binari 1 20 3.951244 3.951244 823 +wrote 1 20 3.951244 3.951244 830 +seem 1 18 4.060443 4.060443 899 +whether 1 17 4.110874 4.110874 918 +universityof 1 15 4.248495 4.248495 1061 +hierarch 1 15 4.248495 4.248495 1018 +style 1 15 4.248495 4.248495 1036 +balanc 1 14 4.317488 4.317488 1112 +believ 1 13 4.382027 4.382027 1187 +unfortun 1 13 4.382027 4.382027 1170 +care 1 13 4.382027 4.382027 1177 +motiv 2 11 4.553877 9.107754 1346 +scienceat 1 11 4.553877 4.553877 1375 +ring 1 8 4.875197 4.875197 1684 +told 1 8 4.875197 4.875197 1658 +refere 1 7 5.010635 5.010635 1895 +toronto 1 6 5.164786 5.164786 2156 +meant 1 6 5.164786 5.164786 2055 +fewer 1 6 5.164786 5.164786 2074 +mistak 1 6 5.164786 5.164786 2110 +broadcast 1 5 5.347108 5.347108 2453 +shift 1 5 5.347108 5.347108 2357 +tend 1 4 5.568345 5.568345 3041 +conserv 1 4 5.568345 5.568345 2870 +will 1 4 5.568345 5.568345 2782 +glad 1 4 5.568345 5.568345 2657 +fear 1 4 5.568345 5.568345 2911 +reveal 1 4 5.568345 5.568345 2647 +complic 1 4 5.568345 5.568345 2902 +amir 2 3 5.857933 11.715866 3850 +hereto 1 3 5.857933 5.857933 3476 +quotat 1 3 5.857933 5.857933 3121 +theywil 1 3 5.857933 5.857933 3102 +incorrect 1 3 5.857933 5.857933 3134 +caught 1 3 5.857933 5.857933 3465 +obvious 1 3 5.857933 5.857933 3474 +hoar 1 3 5.857933 5.857933 3875 +mathematician 2 2 6.263398 12.526796 5189 +defici 2 2 6.263398 12.526796 5054 +persuad 1 2 6.263398 6.263398 5384 +obviou 1 2 6.263398 6.263398 5367 +michail 1 1 6.957497 6.957497 14811 +michailgradu 1 1 6.957497 6.957497 14812 +studenti 1 1 6.957497 6.957497 14813 +followingarea 1 1 6.957497 6.957497 14814 +summationfor 1 1 6.957497 6.957497 14815 +opsi 1 1 6.957497 6.957497 14816 +appletdesign 1 1 6.957497 6.957497 14817 +combinesprogram 1 1 6.957497 6.957497 14818 +lunar 1 1 6.957497 6.957497 14819 +lander 1 1 6.957497 6.957497 14820 +gamethat 1 1 6.957497 6.957497 14821 +unwillingto 1 1 6.957497 6.957497 14822 +embarrass 1 1 6.957497 6.957497 14823 +publishedincorrect 1 1 6.957497 6.957497 14824 +unconvent 1 1 6.957497 6.957497 14825 +proofstyl 1 1 6.957497 6.957497 14826 +theyhav 1 1 6.957497 6.957497 14827 +wasnot 1 1 6.957497 6.957497 14828 +stylethat 1 1 6.957497 6.957497 14829 +lesli 1 1 6.957497 6.957497 14830 +lamport 1 1 6.957497 6.957497 14831 +wayi 1 1 6.957497 6.957497 14832 +theother 1 1 6.957497 6.957497 14833 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ new file mode 100644 index 00000000..24672da9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anderson^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +project 3 340 1.098612 3.295836 18 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +algorithm 2 162 1.791759 3.583518 57 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +professor 2 137 1.945910 3.891820 76 +year 2 148 1.945910 3.891820 84 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +institut 3 84 2.484907 7.454721 187 +academ 1 82 2.484907 2.484907 178 +resum 1 79 2.564949 2.564949 217 +receiv 1 66 2.708050 2.708050 244 +main 1 67 2.708050 2.708050 256 +visit 2 63 2.772589 5.545178 288 +evalu 1 64 2.772589 2.772589 266 +colleg 1 61 2.833213 2.833213 300 +scientif 1 53 2.944439 2.944439 341 +join 1 39 3.258097 3.258097 457 +award 1 34 3.401197 3.401197 523 +india 1 32 3.465736 3.465736 550 +richard 3 31 3.496508 10.489524 559 +travel 2 30 3.555348 7.110696 579 +progress 1 28 3.610918 3.610918 598 +spent 1 25 3.737670 3.737670 676 +indian 1 22 3.850148 3.850148 769 +geometri 1 22 3.850148 3.850148 752 +anderson 3 19 4.007333 12.021999 860 +postdoc 1 8 4.875197 4.875197 1724 +presidenti 1 8 4.875197 4.875197 1737 +qualifi 1 8 4.875197 4.875197 1721 +tourist 1 8 4.875197 4.875197 1710 +implementationof 1 7 5.010635 5.010635 1813 +reed 1 6 5.164786 5.164786 2086 +inmathemat 1 2 6.263398 6.263398 5098 +younginvestig 1 2 6.263398 6.263398 5794 +bangalor 1 2 6.263398 6.263398 5110 +theindian 1 2 6.263398 6.263398 5795 +stanfordin 1 1 6.957497 6.957497 14834 +aon 1 1 6.957497 6.957497 14835 +inberkelei 1 1 6.957497 6.957497 14836 +yeara 1 1 6.957497 6.957497 14837 +talksanderson 1 1 6.957497 6.957497 14838 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ new file mode 100644 index 00000000..032f9ffe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^anhai^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +interest 6 384 0.693147 4.158882 11 +research 4 431 0.693147 2.772588 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +last 1 314 1.098612 1.098612 14 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +first 2 140 1.945910 3.891820 71 +process 1 142 1.945910 1.945910 72 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +teach 1 108 2.197225 2.197225 112 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +librari 2 87 2.484907 4.969814 181 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +academ 1 82 2.484907 2.484907 178 +thing 1 84 2.484907 2.484907 189 +educ 1 86 2.484907 2.484907 191 +name 3 72 2.639057 7.917171 220 +intellig 1 72 2.639057 2.639057 225 +write 1 72 2.639057 2.639057 222 +receiv 1 66 2.708050 2.708050 244 +knowledg 1 67 2.708050 2.708050 243 +plan 2 65 2.772589 5.545178 272 +artifici 1 63 2.772589 2.772589 280 +content 1 59 2.833213 2.833213 302 +locat 1 59 2.833213 2.833213 303 +investig 1 51 2.995732 2.995732 353 +life 1 50 3.044522 3.044522 375 +made 1 44 3.135494 3.135494 398 +show 1 43 3.178054 3.178054 417 +music 2 42 3.218876 6.437752 436 +combin 1 42 3.218876 3.218876 421 +theoret 1 39 3.258097 3.258097 446 +vita 1 38 3.295837 3.295837 473 +mean 1 37 3.332205 3.332205 477 +purpos 1 37 3.332205 3.332205 481 +soon 1 36 3.367296 3.367296 494 +represent 1 35 3.401197 3.401197 512 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +travel 2 30 3.555348 7.110696 579 +art 1 29 3.583519 3.583519 593 +decis 4 23 3.806662 15.226648 728 +honor 1 23 3.806662 3.806662 729 +finish 1 22 3.850148 3.850148 748 +ofwashington 1 22 3.850148 3.850148 766 +born 1 21 3.912023 3.912023 798 +mostli 2 19 4.007333 8.014666 869 +histori 1 19 4.007333 4.007333 853 +listen 1 18 4.060443 4.060443 907 +thought 1 17 4.110874 4.110874 945 +brother 1 13 4.382027 4.382027 1189 +employ 1 12 4.465908 4.465908 1291 +qualit 1 11 4.553877 4.553877 1362 +probabilist 1 11 4.553877 4.553877 1343 +literatur 1 11 4.553877 4.553877 1300 +creativ 1 8 4.875197 4.875197 1777 +switch 1 8 4.875197 4.875197 1718 +brought 1 7 5.010635 5.010635 1925 +gave 1 7 5.010635 5.010635 1922 +foreign 1 7 5.010635 5.010635 1919 +reconstruct 1 6 5.164786 5.164786 2170 +blue 1 6 5.164786 5.164786 2227 +mother 1 6 5.164786 5.164786 2083 +markov 1 5 5.347108 5.347108 2280 +jazz 1 5 5.347108 5.347108 2527 +paint 1 5 5.347108 5.347108 2400 +andengin 1 4 5.568345 5.568345 3042 +ofmi 2 3 5.857933 11.715866 3911 +revisit 1 3 5.857933 5.857933 3915 +father 1 3 5.857933 5.857933 3757 +birth 1 3 5.857933 5.857933 3594 +affair 1 3 5.857933 5.857933 3916 +anhai 2 2 6.263398 12.526796 4404 +doan 2 2 6.263398 12.526796 4405 +andscienc 1 2 6.263398 6.263398 5796 +milwauke 1 2 6.263398 6.263398 5797 +amcurr 1 2 6.263398 6.263398 5798 +vietnames 1 2 6.263398 6.263398 5593 +syllabl 3 1 6.957497 20.872491 14839 +hungari 2 1 6.957497 13.914994 14840 +birthplac 2 1 6.957497 13.914994 14841 +homepageanhai 1 1 6.957497 6.957497 14842 +vietnam 1 1 6.957497 6.957497 14843 +iwent 1 1 6.957497 6.957497 14844 +kossuth 1 1 6.957497 6.957497 14845 +lajo 1 1 6.957497 6.957497 14846 +debrecen 1 1 6.957497 6.957497 14847 +underuncertainti 1 1 6.957497 6.957497 14848 +calm 1 1 6.957497 6.957497 14849 +invietnames 1 1 6.957497 6.957497 14850 +nghean 1 1 6.957497 6.957497 14851 +haiphong 1 1 6.957497 6.957497 14852 +folkswer 1 1 6.957497 6.957497 14853 +younger 1 1 6.957497 6.957497 14854 +theysimpli 1 1 6.957497 6.957497 14855 +namehaian 1 1 6.957497 6.957497 14856 +comtemporari 1 1 6.957497 6.957497 14857 +snapshotsanhai 1 1 6.957497 6.957497 14858 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ new file mode 100644 index 00000000..ae651fbc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ausland^ @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +last 2 314 1.098612 2.197224 14 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 3 236 1.386294 4.158882 32 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +paper 3 205 1.609438 4.828314 38 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +click 1 142 1.945910 1.945910 78 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +compil 3 122 2.079442 6.238326 96 +seattl 2 120 2.079442 4.158884 103 +spring 1 131 2.079442 2.079442 88 +code 1 108 2.197225 2.197225 116 +site 1 106 2.197225 2.197225 119 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +final 1 116 2.197225 2.197225 108 +find 1 111 2.197225 2.197225 111 +place 1 106 2.197225 2.197225 124 +graphic 2 90 2.397895 4.795790 147 +pictur 1 89 2.397895 2.397895 160 +follow 1 92 2.397895 2.397895 143 +control 1 82 2.484907 2.484907 164 +dynam 2 76 2.564949 5.129898 194 +complet 1 77 2.564949 2.564949 208 +resum 1 79 2.564949 2.564949 217 +optim 1 79 2.564949 2.564949 197 +workshop 1 71 2.639057 2.639057 239 +sieg 1 69 2.708050 2.708050 260 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +automat 2 61 2.833213 5.666426 306 +februari 1 54 2.944439 2.944439 328 +without 2 50 3.044522 6.089044 370 +quarter 1 47 3.091042 3.091042 389 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +better 1 45 3.135494 3.135494 401 +show 1 43 3.178054 3.178054 417 +fast 1 42 3.218876 3.218876 429 +small 1 39 3.258097 3.258097 447 +origin 1 38 3.295837 3.295837 472 +slide 1 38 3.295837 3.295837 467 +seminar 1 38 3.295837 3.295837 470 +short 1 36 3.367296 3.367296 499 +anim 3 31 3.496508 10.489524 557 +autumn 1 31 3.496508 3.496508 558 +hope 1 28 3.610918 3.610918 610 +univ 1 28 3.610918 3.610918 617 +valu 1 25 3.737670 3.737670 665 +motion 3 24 3.761200 11.283600 699 +magazin 1 24 3.761200 3.761200 704 +compress 1 23 3.806662 3.806662 719 +sequenc 1 23 3.806662 3.806662 734 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +mpeg 1 20 3.951244 3.951244 831 +synthesi 1 20 3.951244 3.951244 834 +figur 4 18 4.060443 16.241772 903 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +speed 1 18 4.060443 4.060443 911 +brown 1 16 4.174387 4.174387 977 +took 1 16 4.174387 4.174387 1010 +qual 1 15 4.248495 4.248495 1062 +goe 1 15 4.248495 4.248495 1044 +piec 1 15 4.248495 4.248495 1020 +consider 1 14 4.317488 4.317488 1076 +benjamin 1 11 4.553877 4.553877 1296 +decomposit 1 10 4.653960 4.653960 1439 +jump 1 9 4.753590 4.753590 1603 +joel 3 8 4.875197 14.625591 1698 +chamber 2 8 4.875197 9.750394 1692 +egger 2 8 4.875197 9.750394 1695 +pldi 1 8 4.875197 4.875197 1704 +switch 1 8 4.875197 4.875197 1718 +dispatch 1 7 5.010635 5.010635 1791 +gave 1 7 5.010635 5.010635 1922 +mock 1 6 5.164786 5.164786 2087 +philipos 2 5 5.347108 10.694216 2373 +engineeringat 1 5 5.347108 5.347108 2561 +andp 1 4 5.568345 5.568345 2811 +pardyak 1 4 5.568345 5.568345 3043 +doubl 1 4 5.568345 5.568345 2951 +ausland 8 3 5.857933 46.863464 3917 +super 1 3 5.857933 5.857933 3918 +singular 1 3 5.857933 5.857933 3366 +multiflow 1 2 6.263398 6.263398 4473 +articul 1 2 6.263398 6.263398 5799 +acmtransact 1 2 6.263398 6.263398 4310 +wilkerson 1 2 6.263398 6.263398 4516 +mywork 1 2 6.263398 6.263398 5800 +orang 1 2 6.263398 6.263398 5163 +onit 1 1 6.957497 6.957497 14859 +andb 1 1 6.957497 6.957497 14860 +inextens 1 1 6.957497 6.957497 14861 +compilersupport 1 1 6.957497 6.957497 14862 +synthesisfor 1 1 6.957497 6.957497 14863 +fukunaga 1 1 6.957497 6.957497 14864 +partovi 1 1 6.957497 6.957497 14865 +christensen 1 1 6.957497 6.957497 14866 +reiss 1 1 6.957497 6.957497 14867 +shuman 1 1 6.957497 6.957497 14868 +leapfrog 1 1 6.957497 6.957497 14869 +lossili 1 1 6.957497 6.957497 14870 +animationthat 1 1 6.957497 6.957497 14871 +cartwheel 1 1 6.957497 6.957497 14872 +andshuffl 1 1 6.957497 6.957497 14873 +andcollaps 1 1 6.957497 6.957497 14874 +isjust 1 1 6.957497 6.957497 14875 +tosmooth 1 1 6.957497 6.957497 14876 +thetalk 1 1 6.957497 6.957497 14877 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ new file mode 100644 index 00000000..5b3fd734 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bam^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +offic 2 299 1.098612 2.197224 13 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +number 1 130 2.079442 2.079442 97 +look 2 107 2.197225 4.394450 115 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +info 4 85 2.484907 9.939628 176 +second 1 81 2.484907 2.484907 166 +institut 1 84 2.484907 2.484907 187 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +refer 1 78 2.564949 2.564949 203 +complet 1 77 2.564949 2.564949 208 +name 2 72 2.639057 5.278114 220 +onlin 1 75 2.639057 2.639057 223 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +test 1 66 2.708050 2.708050 252 +visit 1 63 2.772589 2.772589 288 +dept 1 64 2.772589 2.772589 291 +locat 1 59 2.833213 2.833213 303 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +friend 1 48 3.044522 3.044522 376 +get 1 46 3.091042 3.091042 380 +favorit 2 44 3.135494 6.270988 410 +edit 1 42 3.218876 3.218876 418 +past 1 42 3.218876 3.218876 428 +brian 4 38 3.295837 13.183348 466 +origin 1 38 3.295837 3.295837 472 +random 1 34 3.401197 3.401197 511 +idea 1 32 3.465736 3.465736 545 +quot 1 29 3.583519 3.583519 582 +actual 1 28 3.610918 3.610918 604 +stop 1 17 4.110874 4.110874 942 +alreadi 1 16 4.174387 4.174387 963 +doesn 1 15 4.248495 4.248495 1055 +hotlist 1 13 4.382027 4.382027 1199 +touch 1 12 4.465908 4.465908 1288 +brad 1 12 4.465908 4.465908 1264 +artist 1 6 5.164786 5.164786 2127 +band 1 6 5.164786 5.164786 2198 +girl 1 5 5.347108 5.347108 2410 +worst 1 5 5.347108 5.347108 2287 +poem 1 5 5.347108 5.347108 2483 +guestbook 1 5 5.347108 5.347108 2475 +snapshot 1 5 5.347108 5.347108 2303 +washingtonseattl 1 4 5.568345 5.568345 3044 +ling 1 4 5.568345 5.568345 3045 +confus 1 3 5.857933 5.857933 3144 +thrash 1 3 5.857933 5.857933 3400 +mossi 1 2 6.263398 6.263398 5801 +liber 1 2 6.263398 6.263398 5154 +glorifi 1 2 6.263398 6.263398 4114 +mental 1 2 6.263398 6.263398 5802 +stolen 1 2 6.263398 6.263398 5803 +boinge 3 1 6.957497 20.872491 14878 +michalowskidepart 1 1 6.957497 6.957497 14879 +engineeringmail 1 1 6.957497 6.957497 14880 +bitsthank 1 1 6.957497 6.957497 14881 +headscapewhenev 1 1 6.957497 6.957497 14882 +gradstud 1 1 6.957497 6.957497 14883 +inlinguist 1 1 6.957497 6.957497 14884 +ultrahotlist 1 1 6.957497 6.957497 14885 +ofal 1 1 6.957497 6.957497 14886 +forsometh 1 1 6.957497 6.957497 14887 +thave 1 1 6.957497 6.957497 14888 +urouletteto 1 1 6.957497 6.957497 14889 +ofwhich 1 1 6.957497 6.957497 14890 +songsand 1 1 6.957497 6.957497 14891 +fictiti 1 1 6.957497 6.957497 14892 +puriti 1 1 6.957497 6.957497 14893 +tokeep 1 1 6.957497 6.957497 14894 +pagesfrom 1 1 6.957497 6.957497 14895 +aslfingerspel 1 1 6.957497 6.957497 14896 +blatantli 1 1 6.957497 6.957497 14897 +chamberlain 1 1 6.957497 6.957497 14898 +michalowski 1 1 6.957497 6.957497 14899 +sanityerad 1 1 6.957497 6.957497 14900 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html new file mode 100644 index 00000000..8a02a99e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^beame^beame.html @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +washington 3 236 1.386294 4.158882 32 +paper 1 205 1.609438 1.609438 38 +recent 2 167 1.791759 3.583518 58 +distribut 1 162 1.791759 1.791759 51 +professor 1 137 1.945910 1.945910 76 +mathemat 1 108 2.197225 2.197225 123 +theori 1 111 2.197225 2.197225 127 +associ 2 93 2.397895 4.795790 151 +receiv 2 66 2.708050 5.416100 244 +complex 1 64 2.772589 2.772589 269 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +join 1 39 3.258097 3.258097 457 +theoret 1 39 3.258097 3.258097 446 +paul 4 38 3.295837 13.183348 471 +connect 1 37 3.332205 3.332205 485 +post 1 35 3.401197 3.401197 505 +award 1 34 3.401197 3.401197 523 +autumn 1 31 3.496508 3.496508 558 +enjoi 1 26 3.688879 3.688879 660 +concern 1 25 3.737670 3.737670 666 +aspect 1 25 3.737670 3.737670 663 +sport 1 25 3.737670 3.737670 683 +doctor 1 24 3.761200 3.761200 709 +proof 3 23 3.806662 11.419986 720 +theunivers 1 21 3.912023 3.912023 797 +concentr 1 18 4.060443 4.060443 906 +qual 1 15 4.248495 4.248495 1062 +primarili 1 13 4.382027 4.382027 1185 +thedepart 1 11 4.553877 4.553877 1350 +softbal 1 9 4.753590 4.753590 1594 +toronto 1 6 5.164786 5.164786 2156 +squash 1 6 5.164786 5.164786 2223 +lack 1 6 5.164786 5.164786 1994 +beam 2 5 5.347108 10.694216 2344 +engineeringat 1 5 5.347108 5.347108 2561 +talent 1 3 5.857933 5.857933 3768 +sciencein 1 2 6.263398 6.263398 5804 +paralleland 1 2 6.263398 6.263398 5805 +beamepaul 2 1 6.957497 13.914994 14901 +computationalcomplex 2 1 6.957497 13.914994 14902 +academicyear 1 1 6.957497 6.957497 14903 +presidentialyoung 1 1 6.957497 6.957497 14904 +inproposit 1 1 6.957497 6.957497 14905 +enthusiasm 1 1 6.957497 6.957497 14906 +cancompens 1 1 6.957497 6.957497 14907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ new file mode 100644 index 00000000..22b9344e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^becker^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +oper 1 180 1.609438 1.609438 34 +develop 1 174 1.791759 1.791759 53 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +confer 2 126 2.079442 4.158884 100 +code 2 108 2.197225 4.394450 116 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +real 1 93 2.397895 2.397895 144 +info 1 85 2.484907 2.484907 176 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +build 1 85 2.484907 2.484907 184 +environ 1 84 2.484907 2.484907 177 +librari 1 87 2.484907 2.484907 181 +optim 1 79 2.564949 2.564949 197 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +free 1 73 2.639057 2.639057 224 +function 1 62 2.772589 2.772589 275 +colleg 1 61 2.833213 2.833213 300 +plai 1 60 2.833213 2.833213 307 +much 1 52 2.995732 2.995732 349 +done 1 47 3.091042 3.091042 381 +mark 1 44 3.135494 3.135494 403 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +respons 1 37 3.332205 3.332205 476 +platform 1 29 3.583519 3.583519 591 +measur 1 28 3.610918 3.610918 609 +team 1 27 3.637586 3.637586 625 +sport 2 25 3.737670 7.475340 683 +grad 1 20 3.951244 3.951244 837 +particularli 1 19 4.007333 4.007333 867 +ultim 2 17 4.110874 8.221748 943 +devic 2 16 4.174387 8.348774 1002 +goe 1 15 4.248495 4.248495 1044 +spin 1 14 4.317488 4.317488 1121 +econom 1 13 4.382027 4.382027 1184 +jump 1 9 4.753590 4.753590 1603 +volleybal 1 9 4.753590 4.753590 1598 +trust 1 9 4.753590 4.753590 1583 +spot 1 7 5.010635 5.010635 1894 +bunch 1 7 5.010635 5.010635 1861 +athlet 1 7 5.010635 5.010635 1933 +tri 1 6 5.164786 5.164786 2166 +railroad 1 6 5.164786 5.164786 2161 +frisbe 2 5 5.347108 10.694216 2560 +minnesota 1 5 5.347108 5.347108 2469 +anti 1 5 5.347108 5.347108 2434 +champion 2 4 5.568345 11.136690 2982 +skate 1 4 5.568345 5.568345 3046 +borrow 1 3 5.857933 5.857933 3725 +somedai 1 3 5.857933 5.857933 3919 +tripl 1 3 5.857933 5.857933 3160 +bank 1 3 5.857933 5.857933 3920 +getto 1 2 6.263398 6.263398 5806 +locomot 1 2 6.263398 6.263398 5807 +beckerdavid 1 1 6.957497 6.957497 14908 +beckercontact 1 1 6.957497 6.957497 14909 +makingspina 1 1 6.957497 6.957497 14910 +drvier 1 1 6.957497 6.957497 14911 +bethel 1 1 6.957497 6.957497 14912 +men 1 1 6.957497 6.957497 14913 +ultimatesoftballvolleyballbasketballbroomballfootballsoccerteam 1 1 6.957497 6.957497 14914 +handbal 1 1 6.957497 6.957497 14915 +playracquetballgolftenni 1 1 6.957497 6.957497 14916 +bridgecampingcanoeingdisc 1 1 6.957497 6.957497 14917 +golffoosballhikingpaintballskisnorkelingsnowboardtrackwallyballwat 1 1 6.957497 6.957497 14918 +skiingweightliftingwhitewat 1 1 6.957497 6.957497 14919 +raftinghorseback 1 1 6.957497 6.957497 14920 +ridingmountain 1 1 6.957497 6.957497 14921 +bikingin 1 1 6.957497 6.957497 14922 +currenc 1 1 6.957497 6.957497 14923 +ssto 1 1 6.957497 6.957497 14924 +rlv 1 1 6.957497 6.957497 14925 +theologi 1 1 6.957497 6.957497 14926 +centurai 1 1 6.957497 6.957497 14927 +boot 1 1 6.957497 6.957497 14928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ new file mode 100644 index 00000000..a6ccc11c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bershad^ @@ -0,0 +1,281 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 25 443 0.693147 17.328675 6 +inform 3 412 0.693147 2.079441 8 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +project 5 340 1.098612 5.493060 18 +us 5 329 1.098612 5.493060 16 +engin 1 297 1.098612 1.098612 20 +softwar 5 220 1.386294 6.931470 30 +design 3 213 1.386294 4.158882 25 +washington 2 236 1.386294 2.772588 32 +languag 2 227 1.386294 2.772588 26 +link 1 247 1.386294 1.386294 24 +oper 11 180 1.609438 17.703818 34 +paper 5 205 1.609438 8.047190 38 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +network 4 168 1.791759 7.167036 61 +distribut 3 162 1.791759 5.375277 51 +parallel 2 169 1.791759 3.583518 60 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +perform 7 143 1.945910 13.621370 74 +support 6 132 1.945910 11.675460 83 +architectur 3 139 1.945910 5.837730 77 +relat 2 139 1.945910 3.891820 68 +professor 1 137 1.945910 1.945910 76 +click 1 142 1.945910 1.945910 78 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +compil 5 122 2.079442 10.397210 96 +seattl 3 120 2.079442 6.238326 103 +confer 3 126 2.079442 6.238326 100 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +manag 4 114 2.197225 8.788900 125 +structur 2 106 2.197225 4.394450 105 +specif 2 106 2.197225 4.394450 106 +assist 1 112 2.197225 2.197225 113 +look 1 107 2.197225 2.197225 115 +memori 4 101 2.302585 9.210340 139 +octob 3 89 2.397895 7.193685 156 +associ 2 93 2.397895 4.795790 151 +commun 2 95 2.397895 4.795790 157 +sinc 1 90 2.397895 2.397895 159 +larg 2 82 2.484907 4.969814 168 +ieee 1 86 2.484907 2.484907 190 +dynam 5 76 2.564949 12.824745 194 +appear 5 78 2.564949 12.824745 210 +master 2 76 2.564949 5.129898 216 +issu 1 78 2.564949 2.564949 211 +messag 1 76 2.564949 2.564949 212 +workshop 7 71 2.639057 18.473399 239 +symposium 2 72 2.639057 5.278114 238 +write 2 72 2.639057 5.278114 222 +servic 2 72 2.639057 5.278114 236 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +street 1 63 2.772589 2.772589 293 +experi 1 64 2.772589 2.772589 283 +polici 1 64 2.772589 2.772589 279 +virtual 1 62 2.772589 2.772589 285 +evalu 1 64 2.772589 2.772589 266 +plai 1 60 2.833213 2.833213 307 +automat 1 61 2.833213 2.833213 306 +share 1 59 2.833213 2.833213 304 +publish 1 57 2.890372 2.890372 326 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +extens 7 53 2.944439 20.611073 340 +februari 4 54 2.944439 11.777756 328 +local 1 55 2.944439 2.944439 334 +hardwar 2 51 2.995732 5.991464 350 +run 1 51 2.995732 2.995732 347 +life 1 50 3.044522 3.044522 375 +standard 1 48 3.044522 3.044522 365 +effect 1 46 3.091042 3.091042 385 +protocol 2 45 3.135494 6.270988 407 +fast 3 42 3.218876 9.656628 429 +cach 2 41 3.218876 6.437752 432 +map 1 39 3.258097 3.258097 452 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +brian 10 38 3.295837 32.958370 466 +industri 1 38 3.295837 3.295837 464 +winter 2 36 3.367296 6.734592 500 +post 1 35 3.401197 3.401197 505 +return 1 34 3.401197 3.401197 502 +concurr 1 34 3.401197 3.401197 501 +detect 1 26 3.688879 3.688879 646 +consist 1 26 3.688879 3.688879 651 +although 1 25 3.737670 3.737670 667 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +interpret 1 24 3.761200 3.761200 686 +greg 1 24 3.761200 3.761200 695 +mobil 3 23 3.806662 11.419986 730 +thread 1 23 3.806662 3.806662 722 +ofwashington 2 22 3.850148 7.700296 766 +reduc 1 22 3.850148 3.850148 759 +inth 1 22 3.850148 3.850148 741 +chen 3 21 3.912023 11.736069 791 +avoid 1 21 3.912023 3.912023 799 +hous 1 21 3.912023 3.912023 801 +kernel 2 20 3.951244 7.902488 825 +binari 1 20 3.951244 3.951244 823 +safeti 1 20 3.951244 3.951244 817 +increas 1 20 3.951244 3.951244 829 +comparison 1 19 4.007333 4.007333 863 +andrew 1 19 4.007333 4.007333 849 +miss 1 19 4.007333 4.007333 866 +citi 1 19 4.007333 4.007333 874 +bershad 36 18 4.060443 146.175948 902 +seem 1 18 4.060443 4.060443 899 +event 1 18 4.060443 4.060443 896 +asplo 5 17 4.110874 20.554370 948 +stock 2 16 4.174387 8.348774 1007 +took 1 16 4.174387 4.174387 1010 +brief 1 16 4.174387 4.174387 1001 +qual 2 15 4.248495 8.496990 1062 +driven 1 15 4.248495 4.248495 1048 +overhead 1 15 4.248495 4.248495 1035 +micro 1 15 4.248495 4.248495 1031 +spin 3 14 4.317488 12.952464 1121 +save 1 14 4.317488 4.317488 1099 +levi 1 14 4.317488 4.317488 1093 +consider 1 14 4.317488 4.317488 1076 +dean 1 14 4.317488 4.317488 1104 +karlin 2 13 4.382027 8.764054 1176 +mellon 1 13 4.382027 4.382027 1179 +charl 1 13 4.382027 4.382027 1149 +block 1 13 4.382027 4.382027 1183 +usenix 6 12 4.465908 26.795448 1240 +carnegi 1 12 4.465908 4.465908 1260 +anna 1 12 4.465908 4.465908 1292 +mari 1 12 4.465908 4.465908 1266 +safe 1 12 4.465908 4.465908 1274 +promot 1 12 4.465908 4.465908 1235 +isca 2 11 4.553877 9.107754 1354 +denni 1 11 4.553877 4.553877 1321 +baer 1 11 4.553877 4.553877 1353 +systemsc 1 11 4.553877 4.553877 1293 +impact 1 11 4.553877 4.553877 1334 +primit 1 11 4.553877 4.553877 1317 +arpa 1 11 4.553877 4.553877 1369 +sosp 5 10 4.653960 23.269800 1416 +jean 1 10 4.653960 4.653960 1440 +henri 1 10 4.653960 4.653960 1417 +packet 1 10 4.653960 4.653960 1415 +decomposit 1 10 4.653960 4.653960 1439 +osdi 3 9 4.753590 14.260770 1534 +voelker 2 9 4.753590 9.507180 1557 +hang 1 9 4.753590 4.753590 1499 +patterson 1 9 4.753590 4.753590 1554 +wong 1 9 4.753590 4.753590 1609 +modula 1 9 4.753590 4.753590 1613 +wilson 1 9 4.753590 4.753590 1536 +rel 1 9 4.753590 4.753590 1487 +romer 4 8 4.875197 19.500788 1706 +marc 3 8 4.875197 14.625591 1680 +sigop 2 8 4.875197 9.750394 1727 +chamber 2 8 4.875197 9.750394 1692 +egger 2 8 4.875197 9.750394 1695 +mach 2 8 4.875197 9.750394 1669 +besid 1 8 4.875197 4.875197 1681 +wayn 1 8 4.875197 4.875197 1738 +watson 1 8 4.875197 4.875197 1691 +uniprocessor 1 8 4.875197 4.875197 1696 +hash 1 8 4.875197 4.875197 1618 +cultur 1 7 5.010635 5.010635 1951 +northwest 1 7 5.010635 5.010635 1973 +instrument 1 7 5.010635 5.010635 1954 +dispatch 1 7 5.010635 5.010635 1791 +interrupt 1 7 5.010635 5.010635 1793 +prioriti 1 7 5.010635 5.010635 1792 +onoper 2 6 5.164786 10.329572 2048 +squash 1 6 5.164786 5.164786 2223 +prefetch 1 6 5.164786 5.164786 2039 +edward 1 6 5.164786 5.164786 2050 +wolman 1 6 5.164786 5.164786 2093 +loup 1 6 5.164786 5.164786 2228 +mock 1 6 5.164786 5.164786 2087 +conflict 1 6 5.164786 5.164786 2041 +rain 1 6 5.164786 5.164786 2137 +philipos 2 5 5.347108 10.694216 2373 +coffe 1 5 5.347108 5.347108 2556 +toc 1 5 5.347108 5.347108 2562 +bind 1 5 5.347108 5.347108 2250 +alec 1 5 5.347108 5.347108 2563 +ohlrich 1 5 5.347108 5.347108 2564 +mutual 1 5 5.347108 5.347108 2418 +pardyak 4 4 5.568345 22.273380 3043 +microkernel 4 4 5.568345 22.273380 3047 +savag 3 4 5.568345 16.705035 2777 +rocki 1 4 5.568345 5.568345 3048 +etch 1 4 5.568345 5.568345 2755 +compcon 1 4 5.568345 5.568345 2958 +exclus 1 4 5.568345 5.568345 2947 +fiuczynski 3 3 5.857933 17.573799 3390 +ausland 2 3 5.857933 11.715866 3917 +stefan 2 3 5.857933 11.715866 3921 +northeast 1 3 5.857933 5.857933 3922 +cachingtraci 1 3 5.857933 5.857933 3923 +kimbrel 1 3 5.857933 5.857933 3924 +felten 1 3 5.857933 5.857933 3925 +geoffrei 1 3 5.857933 5.857933 3505 +dynamiccompil 1 3 5.857933 5.857933 3926 +garrett 1 3 5.857933 5.857933 3377 +mobisa 1 3 5.857933 5.857933 3927 +mappedcach 1 3 5.857933 5.857933 3928 +forappl 1 3 5.857933 5.857933 3929 +irrelev 1 3 5.857933 5.857933 3823 +golub 1 3 5.857933 5.857933 3265 +alien 1 3 5.857933 5.857933 3930 +przemyslaw 3 2 6.263398 18.790194 5808 +implemen 2 2 6.263398 12.526796 5809 +emin 2 2 6.263398 12.526796 5810 +sirer 2 2 6.263398 12.526796 5811 +wwo 2 2 6.263398 12.526796 5812 +eduwork 1 2 6.263398 6.263398 5813 +hasappear 1 2 6.263398 6.263398 5099 +tomkin 1 2 6.263398 6.263398 5814 +hugo 1 2 6.263398 6.263398 5815 +garth 1 2 6.263398 6.263398 5816 +gibson 1 2 6.263398 6.263398 5817 +hsieh 1 2 6.263398 6.263398 5818 +onlinesuperpag 1 2 6.263398 6.263398 5819 +appearedin 1 2 6.263398 6.263398 5096 +endpoint 1 2 6.263398 6.263398 4967 +moss 1 2 6.263398 6.263398 5820 +redel 1 2 6.263398 6.263398 4358 +elli 1 2 6.263398 6.263398 4216 +baron 1 2 6.263398 6.263398 4317 +microbenchmark 1 2 6.263398 6.263398 5821 +rashid 1 2 6.263398 6.263398 4318 +abduct 1 2 6.263398 6.263398 5663 +maeda 3 1 6.957497 20.872491 14929 +midwai 2 1 6.957497 13.914994 14930 +zekauska 2 1 6.957497 13.914994 14931 +sawdon 2 1 6.957497 13.914994 14932 +machnix 2 1 6.957497 13.914994 14933 +drave 2 1 6.957497 13.914994 14934 +forin 2 1 6.957497 13.914994 14935 +respit 1 1 6.957497 6.957497 14936 +asigmetr 1 1 6.957497 6.957497 14937 +thestairmast 1 1 6.957497 6.957497 14938 +extensibleoper 1 1 6.957497 6.957497 14939 +parallelnetwork 1 1 6.957497 6.957497 14940 +thesequel 1 1 6.957497 6.957497 14941 +optimizationcours 1 1 6.957497 6.957497 14942 +youmight 1 1 6.957497 6.957497 14943 +extensiblesystem 1 1 6.957497 6.957497 14944 +theodor 1 1 6.957497 6.957497 14945 +implementationj 1 1 6.957497 6.957497 14946 +defouw 1 1 6.957497 6.957497 14947 +alapat 1 1 6.957497 6.957497 14948 +becker 1 1 6.957497 6.957497 14949 +sharedmemori 1 1 6.957497 6.957497 14950 +conflictresolut 1 1 6.957497 6.957497 14951 +uwtechn 1 1 6.957497 6.957497 14952 +demultiplex 1 1 6.957497 6.957497 14953 +yuhara 1 1 6.957497 6.957497 14954 +andmostli 1 1 6.957497 6.957497 14955 +moblic 1 1 6.957497 6.957497 14956 +wheeler 1 1 6.957497 6.957497 14957 +ginsburg 1 1 6.957497 6.957497 14958 +inoper 1 1 6.957497 6.957497 14959 +harrier 1 1 6.957497 6.957497 14960 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ new file mode 100644 index 00000000..748518b5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bobd^ @@ -0,0 +1,153 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +scienc 3 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +current 2 284 1.098612 2.197224 21 +project 2 340 1.098612 2.197224 18 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 3 236 1.386294 4.158882 32 +link 2 247 1.386294 2.772588 24 +also 1 259 1.386294 1.386294 28 +list 3 201 1.609438 4.828314 39 +network 1 168 1.791759 1.791759 61 +hall 1 146 1.945910 1.945910 65 +seattl 3 120 2.079442 6.238326 103 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +world 2 115 2.197225 4.394450 126 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +person 1 111 2.197225 2.197225 117 +text 1 98 2.302585 2.302585 133 +search 6 95 2.397895 14.387370 155 +select 1 91 2.397895 2.397895 154 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +good 1 77 2.564949 2.564949 200 +david 1 71 2.639057 2.639057 232 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +sieg 1 69 2.708050 2.708050 260 +view 1 70 2.708050 2.708050 254 +new 5 64 2.772589 13.862945 262 +previou 1 62 2.772589 2.772589 290 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +thesi 1 57 2.890372 2.890372 327 +index 1 56 2.890372 2.890372 309 +scientif 1 53 2.944439 2.944439 341 +particular 1 51 2.995732 2.995732 352 +pointer 2 48 3.044522 6.089044 368 +cool 1 49 3.044522 3.044522 374 +telephon 1 50 3.044522 3.044522 373 +life 1 50 3.044522 3.044522 375 +directori 3 45 3.135494 9.406482 396 +past 1 42 3.218876 3.218876 428 +open 1 38 3.295837 3.295837 469 +hand 1 37 3.332205 3.332205 475 +post 1 35 3.401197 3.401197 505 +scientist 1 31 3.496508 3.496508 560 +steve 1 29 3.583519 3.583519 594 +american 3 27 3.637586 10.912758 634 +todai 2 25 3.737670 7.475340 672 +sport 2 25 3.737670 7.475340 683 +yahoo 2 24 3.761200 7.522400 707 +daili 1 24 3.761200 3.761200 706 +magazin 1 24 3.761200 3.761200 704 +miscellan 1 23 3.806662 3.806662 731 +voic 1 21 3.912023 3.912023 806 +hous 1 21 3.912023 3.912023 801 +lyco 1 19 4.007333 4.007333 871 +boston 1 19 4.007333 4.007333 862 +agent 1 18 4.060443 4.060443 910 +white 2 17 4.110874 8.221748 951 +engineeringunivers 1 17 4.110874 4.110874 959 +repositori 1 17 4.110874 4.110874 932 +dilbert 1 16 4.174387 4.174387 996 +hierarch 1 15 4.248495 4.248495 1018 +balanc 1 14 4.317488 4.317488 1112 +washingtonbox 1 13 4.382027 4.382027 1200 +suit 1 13 4.382027 4.382027 1129 +canada 1 13 4.382027 4.382027 1158 +social 1 13 4.382027 4.382027 1123 +hank 1 12 4.465908 4.465908 1253 +excit 1 11 4.553877 4.553877 1329 +arpa 1 11 4.553877 4.553877 1369 +clock 1 11 4.553877 4.553877 1320 +shop 1 10 4.653960 4.653960 1469 +metacrawl 1 10 4.653960 4.653960 1455 +vista 1 10 4.653960 4.653960 1452 +yellow 2 9 4.753590 9.507180 1601 +weld 1 9 4.753590 4.753590 1538 +meta 1 9 4.753590 4.753590 1505 +govern 1 9 4.753590 4.753590 1581 +congress 1 9 4.753590 4.753590 1592 +entitl 1 9 4.753590 4.753590 1490 +postdoc 1 8 4.875197 4.875197 1724 +span 1 8 4.875197 4.875197 1751 +upcom 1 8 4.875197 4.875197 1685 +softbot 1 7 5.010635 5.010635 1974 +pittsburgh 1 7 5.010635 5.010635 1938 +strip 1 6 5.164786 5.164786 2203 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +gopher 1 6 5.164786 5.164786 1982 +infoseek 1 6 5.164786 5.164786 2188 +slate 1 6 5.164786 5.164786 2021 +atlant 1 5 5.347108 5.347108 2508 +feder 1 5 5.347108 5.347108 2266 +union 1 4 5.568345 5.568345 2634 +alta 1 4 5.568345 5.568345 3039 +birthdai 1 4 5.568345 5.568345 2800 +letterman 1 3 5.857933 5.857933 3931 +shortcut 1 3 5.857933 5.857933 3932 +soar 1 3 5.857933 5.857933 3506 +headlin 1 3 5.857933 5.857933 3710 +monthli 1 3 5.857933 5.857933 3910 +espn 1 3 5.857933 5.857933 3724 +zone 1 3 5.857933 5.857933 3747 +museum 1 3 5.857933 5.857933 3933 +comedi 1 2 6.263398 6.263398 5822 +geeki 1 2 6.263398 6.263398 5823 +shopbot 1 2 6.263398 6.263398 5824 +sigma 1 2 6.263398 6.263398 4369 +magellan 1 2 6.263398 6.263398 5825 +reuter 1 2 6.263398 6.263398 4099 +cafe 1 2 6.263398 6.263398 5826 +salon 1 2 6.263398 6.263398 5827 +harper 1 2 6.263398 6.263398 5141 +commiss 1 2 6.263398 6.263398 4901 +reform 1 2 6.263398 6.263398 5828 +budget 3 1 6.957497 20.872491 14961 +doorenbo 2 1 6.957497 13.914994 14962 +pagebob 1 1 6.957497 6.957497 14963 +bobd 1 1 6.957497 6.957497 14964 +netbot 1 1 6.957497 6.957497 14965 +boffo 1 1 6.957497 6.957497 14966 +zdnet 1 1 6.957497 6.957497 14967 +anchordesk 1 1 6.957497 6.957497 14968 +savvysearch 1 1 6.957497 6.957497 14969 +inktomi 1 1 6.957497 6.957497 14970 +crawler 1 1 6.957497 6.957497 14971 +hotbot 1 1 6.957497 6.957497 14972 +pointcom 1 1 6.957497 6.957497 14973 +switchboard 1 1 6.957497 6.957497 14974 +cnnfn 1 1 6.957497 6.957497 14975 +newshour 1 1 6.957497 6.957497 14976 +globe 1 1 6.957497 6.957497 14977 +feed 1 1 6.957497 6.957497 14978 +fedworld 1 1 6.957497 6.957497 14979 +deficit 1 1 6.957497 6.957497 14980 +debt 1 1 6.957497 6.957497 14981 +concord 1 1 6.957497 6.957497 14982 +coalit 1 1 6.957497 6.957497 14983 +bipartisan 1 1 6.957497 6.957497 14984 +andfun 1 1 6.957497 6.957497 14985 +pagebobd 1 1 6.957497 6.957497 14986 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ new file mode 100644 index 00000000..abe29b96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^borning^ @@ -0,0 +1,101 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 4 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +program 3 374 0.693147 2.079441 7 +research 3 431 0.693147 2.079441 10 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +languag 5 227 1.386294 6.931470 26 +washington 4 236 1.386294 5.545176 32 +graduat 2 215 1.386294 2.772588 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +recent 2 167 1.791759 3.583518 58 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +seattl 1 120 2.079442 2.079442 103 +code 1 108 2.197225 2.197225 116 +mathemat 1 108 2.197225 2.197225 123 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +orient 2 80 2.564949 5.129898 205 +logic 1 71 2.639057 2.639057 230 +intellig 1 72 2.639057 2.639057 225 +solv 1 73 2.639057 2.639057 234 +receiv 2 66 2.708050 5.416100 244 +degre 1 69 2.708050 2.708050 259 +interact 1 62 2.772589 2.772589 270 +artifici 1 63 2.772589 2.772589 280 +dept 1 64 2.772589 2.772589 291 +colleg 1 61 2.833213 2.833213 300 +done 1 47 3.091042 3.091042 381 +mechan 1 43 3.178054 3.178054 416 +http 1 41 3.218876 3.218876 420 +societi 2 40 3.258097 6.516194 456 +join 1 39 3.258097 3.258097 457 +winter 1 36 3.367296 3.367296 500 +post 1 35 3.401197 3.401197 505 +taught 1 33 3.433987 3.433987 526 +human 2 32 3.465736 6.931472 546 +idea 1 32 3.465736 3.465736 545 +concept 1 32 3.465736 3.465736 537 +dissert 1 32 3.465736 3.465736 549 +autumn 2 31 3.496508 6.993016 558 +constraint 3 26 3.688879 11.066637 636 +spent 2 25 3.737670 7.475340 676 +concern 1 25 3.737670 3.737670 666 +born 4 21 3.912023 15.648092 798 +media 1 19 4.007333 4.007333 861 +cambridg 1 16 4.174387 4.174387 1008 +alan 1 13 4.382027 4.382027 1146 +xerox 2 8 4.875197 9.750394 1725 +pagei 1 8 4.875197 4.875197 1683 +grew 1 8 4.875197 4.875197 1742 +reed 1 6 5.164786 5.164786 2086 +england 1 5 5.347108 5.347108 2557 +scotland 1 4 5.568345 5.568345 3049 +sabbat 1 4 5.568345 5.568345 2824 +groupuw 1 3 5.857933 5.857933 3934 +atstanford 1 3 5.857933 5.857933 3935 +pagealan 1 2 6.263398 6.263398 5587 +computerinteract 1 2 6.263398 6.263398 5829 +satisfact 1 2 6.263398 6.263398 5656 +idaho 1 2 6.263398 6.263398 5055 +havebeen 1 2 6.263398 6.263398 5830 +eduwww 1 2 6.263398 6.263398 5138 +principalresearch 1 1 6.957497 6.957497 14987 +activitiesuwconstraint 1 1 6.957497 6.957497 14988 +domainsourc 1 1 6.957497 6.957497 14989 +democraci 1 1 6.957497 6.957497 14990 +qualsproject 1 1 6.957497 6.957497 14991 +teachingher 1 1 6.957497 6.957497 14992 +informationhistori 1 1 6.957497 6.957497 14993 +paloalto 1 1 6.957497 6.957497 14994 +simulationlaboratori 1 1 6.957497 6.957497 14995 +doctoralfellow 1 1 6.957497 6.957497 14996 +ofedinburgh 1 1 6.957497 6.957497 14997 +symbolicalgebra 1 1 6.957497 6.957497 14998 +andexcept 1 1 6.957497 6.957497 14999 +europarc 1 1 6.957497 6.957497 15000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ new file mode 100644 index 00000000..be8d2992 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brad^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +address 1 170 1.791759 1.791759 62 +like 1 132 1.945910 1.945910 81 +thing 3 84 2.484907 7.454721 189 +probabl 1 40 3.258097 3.258097 455 +credit 1 38 3.295837 3.295837 460 +ofth 1 36 3.367296 3.367296 491 +ad 1 32 3.465736 3.465736 544 +mike 1 24 3.761200 3.761200 703 +less 1 18 4.060443 4.060443 892 +care 1 13 4.382027 4.382027 1177 +brad 2 12 4.465908 8.931816 1264 +subset 1 10 4.653960 4.653960 1425 +couldn 1 4 5.568345 5.568345 2977 +pagebrad 1 1 6.957497 6.957497 15001 +chamberlainphoto 1 1 6.957497 6.957497 15002 +perkowitzth 1 1 6.957497 6.957497 15003 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ new file mode 100644 index 00000000..d87eacf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^brendan^ @@ -0,0 +1,126 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +washington 2 236 1.386294 2.772588 32 +also 2 259 1.386294 2.772588 28 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +fall 2 181 1.609438 3.218876 40 +paper 1 205 1.609438 1.609438 38 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +problem 2 147 1.945910 3.891820 75 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +first 1 140 1.945910 1.945910 71 +postscript 4 131 2.079442 8.317768 90 +confer 2 126 2.079442 4.158884 100 +look 1 107 2.197225 2.197225 115 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +mathemat 1 108 2.197225 2.197225 123 +version 1 113 2.197225 2.197225 122 +part 1 98 2.302585 2.302585 129 +present 3 91 2.397895 7.193685 145 +call 1 91 2.397895 2.397895 153 +academ 1 82 2.484907 2.484907 178 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +html 5 75 2.639057 13.195285 235 +appli 1 71 2.639057 2.639057 226 +onlin 1 75 2.639057 2.639057 223 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +written 1 63 2.772589 2.772589 278 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +reason 1 57 2.890372 2.890372 318 +physic 2 47 3.091042 6.182084 377 +done 1 47 3.091042 3.091042 381 +math 1 44 3.135494 3.135494 402 +around 1 43 3.178054 3.178054 415 +map 2 39 3.258097 6.516194 452 +theoret 1 39 3.258097 3.258097 446 +error 1 40 3.258097 3.258097 449 +expect 1 37 3.332205 3.332205 484 +tree 1 36 3.367296 3.367296 492 +survei 1 35 3.401197 3.401197 513 +curriculum 1 33 3.433987 3.433987 535 +scientist 1 31 3.496508 3.496508 560 +photo 1 31 3.496508 3.496508 561 +power 1 30 3.555348 3.555348 573 +produc 1 30 3.555348 3.555348 572 +cluster 1 28 3.610918 3.610918 612 +hope 1 28 3.610918 3.610918 610 +challeng 1 26 3.688879 3.688879 653 +bound 1 26 3.688879 3.688879 659 +bookmark 1 26 3.688879 3.688879 639 +aspect 1 25 3.737670 3.737670 663 +sometim 1 24 3.761200 3.761200 696 +theunivers 1 21 3.912023 3.912023 797 +siam 1 21 3.912023 3.912023 800 +binari 1 20 3.951244 3.951244 823 +lower 1 18 4.060443 4.060443 886 +biologi 1 15 4.248495 4.248495 1049 +incomput 1 14 4.317488 4.317488 1096 +galleri 2 13 4.382027 8.764054 1192 +speak 1 12 4.465908 4.465908 1283 +readabl 1 12 4.465908 4.465908 1258 +scienceat 1 11 4.553877 4.553877 1375 +moment 1 11 4.553877 4.553877 1379 +cycl 1 11 4.553877 4.553877 1335 +enter 1 10 4.653960 4.653960 1454 +ski 1 10 4.653960 4.653960 1471 +drink 1 9 4.753590 4.753590 1607 +bridg 1 8 4.875197 4.875197 1764 +interestsi 1 7 5.010635 5.010635 1969 +poster 1 7 5.010635 5.010635 1814 +rough 1 6 5.164786 5.164786 2107 +quickli 1 6 5.164786 5.164786 2000 +alphabet 1 6 5.164786 5.164786 1980 +soda 1 6 5.164786 5.164786 2189 +overlap 1 5 5.347108 5.347108 2368 +upper 1 5 5.347108 5.347108 2481 +latexhtml 1 5 5.347108 5.347108 2347 +older 1 5 5.347108 5.347108 2387 +fulfil 1 4 5.568345 5.568345 2932 +climb 1 4 5.568345 5.568345 2936 +genom 1 3 5.857933 5.857933 3546 +astrophys 1 3 5.857933 5.857933 3936 +dimac 1 3 5.857933 5.857933 3574 +edufor 1 2 6.263398 6.263398 5831 +hpcc 1 2 6.263398 6.263398 5832 +clone 1 2 6.263398 6.263398 5833 +ismb 1 2 6.263398 6.263398 5834 +probe 1 2 6.263398 6.263398 5535 +mumei 4 1 6.957497 27.829988 15004 +brendan 2 1 6.957497 13.914994 15005 +pagebrendan 1 1 6.957497 6.957497 15006 +mumeyi 1 1 6.957497 6.957497 15007 +information 1 1 6.957497 6.957497 15008 +vitaein 1 1 6.957497 6.957497 15009 +htmlorpostscriptformat 1 1 6.957497 6.957497 15010 +landmark 1 1 6.957497 6.957497 15011 +tosolv 1 1 6.957497 6.957497 15012 +groupher 1 1 6.957497 6.957497 15013 +papersb 1 1 6.957497 6.957497 15014 +candidaci 1 1 6.957497 6.957497 15015 +klaw 1 1 6.957497 6.957497 15016 +ofdiscret 1 1 6.957497 6.957497 15017 +containsom 1 1 6.957497 6.957497 15018 +recreationhik 1 1 6.957497 6.957497 15019 +coffeeto 1 1 6.957497 6.957497 15020 +sailingand 1 1 6.957497 6.957497 15021 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ new file mode 100644 index 00000000..de89d4d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^bricker^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +project 4 340 1.098612 4.394448 18 +us 3 329 1.098612 3.295836 16 +cours 3 273 1.098612 3.295836 15 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +year 1 148 1.945910 1.945910 84 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +spring 1 131 2.079442 2.079442 88 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +user 1 104 2.302585 2.302585 137 +graphic 2 90 2.397895 4.795790 147 +imag 1 91 2.397895 2.397895 161 +learn 2 86 2.484907 4.969814 170 +info 1 85 2.484907 2.484907 176 +school 1 84 2.484907 2.484907 188 +method 1 80 2.564949 2.564949 213 +interfac 1 79 2.564949 2.564949 209 +mondai 1 77 2.564949 2.564949 206 +june 1 79 2.564949 2.564949 214 +goal 1 66 2.708050 2.708050 250 +interact 1 62 2.772589 2.772589 270 +extens 1 53 2.944439 2.944439 340 +particular 1 51 2.995732 2.995732 352 +quarter 1 47 3.091042 3.091042 389 +even 1 45 3.135494 3.135494 393 +ask 1 28 3.610918 3.610918 597 +rather 1 26 3.688879 3.688879 642 +enjoi 1 26 3.688879 3.688879 660 +although 1 25 3.737670 3.737670 667 +self 1 22 3.850148 3.850148 761 +busi 1 21 3.912023 3.912023 784 +hobbi 1 16 4.174387 4.174387 1009 +excit 1 11 4.553877 4.553877 1329 +junior 1 5 5.347108 5.347108 2519 +kid 1 5 5.347108 5.347108 2516 +fairli 1 5 5.347108 5.347108 2322 +writeup 1 5 5.347108 5.347108 2352 +bricker 2 4 5.568345 11.136690 3050 +asystem 1 4 5.568345 5.568345 2612 +lauren 1 3 5.857933 5.857933 3251 +metip 1 3 5.857933 5.857933 3937 +workin 1 3 5.857933 5.857933 3938 +groupi 1 2 6.263398 6.263398 5544 +stevetanimoto 1 2 6.263398 6.263398 5835 +ofthi 1 2 6.263398 6.263398 5836 +cscl 1 2 6.263398 6.263398 5837 +inthi 1 2 6.263398 6.263398 5509 +studio 1 2 6.263398 6.263398 5838 +brickerlauren 1 1 6.957497 6.957497 15022 +clue 1 1 6.957497 6.957497 15023 +primarli 1 1 6.957497 6.957497 15024 +userinterfac 1 1 6.957497 6.957497 15025 +proclaim 1 1 6.957497 6.957497 15026 +mathematicsexperi 1 1 6.957497 6.957497 15027 +usingexploratori 1 1 6.957497 6.957497 15028 +rote 1 1 6.957497 6.957497 15029 +minterest 1 1 6.957497 6.957497 15030 +supportedcollabor 1 1 6.957497 6.957497 15031 +lawk 1 1 6.957497 6.957497 15032 +dawg 1 1 6.957497 6.957497 15033 +interfacea 1 1 6.957497 6.957497 15034 +resumeschool 1 1 6.957497 6.957497 15035 +dazethi 1 1 6.957497 6.957497 15036 +quarterdoth 1 1 6.957497 6.957497 15037 +quartershuman 1 1 6.957497 6.957497 15038 +writeupwhat 1 1 6.957497 6.957497 15039 +insocieti 1 1 6.957497 6.957497 15040 +lifesportscookingpotteri 1 1 6.957497 6.957497 15041 +garag 1 1 6.957497 6.957497 15042 +stuffbecaus 1 1 6.957497 6.957497 15043 +itaddress 1 1 6.957497 6.957497 15044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ new file mode 100644 index 00000000..611a3a0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^carlson^ @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +washington 2 236 1.386294 2.772588 32 +adam 1 17 4.110874 4.110874 934 +carlson 3 5 5.347108 16.041324 2351 +carlsonadam 1 1 6.957497 6.957497 15045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ new file mode 100644 index 00000000..bab84366 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^certain^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +modifi 2 178 1.609438 3.218876 35 +paper 1 205 1.609438 1.609438 38 +model 2 145 1.945910 3.891820 69 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +look 2 107 2.197225 4.394450 115 +follow 1 92 2.397895 2.397895 143 +build 1 85 2.484907 2.484907 184 +requir 1 81 2.484907 2.484907 167 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +direct 1 57 2.890372 2.890372 316 +browser 1 56 2.890372 2.890372 313 +give 1 50 3.044522 3.044522 359 +netscap 1 44 3.135494 3.135494 395 +workstat 1 37 3.332205 3.332205 479 +download 2 36 3.367296 6.734592 489 +viewer 2 21 3.912023 7.824046 787 +similar 1 21 3.912023 3.912023 771 +andrew 2 19 4.007333 8.014666 849 +fix 1 11 4.553877 4.553877 1327 +certain 3 10 4.653960 13.961880 1393 +werner 1 10 4.653960 4.653960 1385 +salesin 1 4 5.568345 5.568345 3051 +tonyderos 1 2 6.263398 6.263398 5839 +stuetzl 1 2 6.263398 6.263398 5840 +duchamp 1 2 6.263398 6.263398 5841 +jovan 1 2 6.263398 6.263398 5842 +theview 1 1 6.957497 6.957497 15046 +popov 1 1 6.957497 6.957497 15047 +scanningproject 1 1 6.957497 6.957497 15048 +sgigraph 1 1 6.957497 6.957497 15049 +shouldalso 1 1 6.957497 6.957497 15050 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ new file mode 100644 index 00000000..eeb295dd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^chou^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +fall 2 181 1.609438 3.218876 40 +list 1 201 1.609438 1.609438 39 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +relat 1 139 1.945910 1.945910 68 +like 1 132 1.945910 1.945910 81 +welcom 1 122 2.079442 2.079442 99 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +school 2 84 2.484907 4.969814 188 +info 1 85 2.484907 2.484907 176 +requir 1 81 2.484907 2.484907 167 +line 2 75 2.639057 5.278114 231 +workshop 1 71 2.639057 2.639057 239 +java 2 70 2.708050 5.416100 248 +best 1 59 2.833213 2.833213 299 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +quarter 1 47 3.091042 3.091042 389 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +open 1 38 3.295837 3.295837 469 +computersci 1 30 3.555348 3.555348 562 +chines 1 29 3.583519 3.583519 595 +though 1 27 3.637586 3.637586 622 +enjoi 1 26 3.688879 3.688879 660 +experiment 1 26 3.688879 3.688879 645 +enabl 1 26 3.688879 3.688879 655 +instal 1 22 3.850148 3.850148 754 +grad 1 20 3.951244 3.951244 837 +applet 1 20 3.951244 3.951244 827 +taiwan 1 16 4.174387 4.174387 1006 +charact 1 15 4.248495 4.248495 1028 +promot 1 12 4.465908 4.465908 1235 +rice 1 11 4.553877 4.553877 1336 +cook 1 10 4.653960 4.653960 1464 +yellow 1 9 4.753590 4.753590 1601 +ball 1 9 4.753590 4.753590 1608 +absolut 1 8 4.875197 4.875197 1646 +chinook 1 6 5.164786 5.164786 2229 +greec 1 6 5.164786 5.164786 2208 +restaur 1 6 5.164786 5.164786 2230 +ross 1 5 5.347108 5.347108 2243 +chou 3 4 5.568345 16.705035 3033 +recip 1 3 5.857933 5.857933 3668 +infoth 1 2 6.263398 6.263398 5195 +projectmi 1 2 6.263398 6.263398 5482 +schedulemi 1 2 6.263398 6.263398 5843 +publicationscod 1 2 6.263398 6.263398 5520 +stir 1 2 6.263398 6.263398 4865 +fri 1 2 6.263398 6.263398 5844 +geek 1 2 6.263398 6.263398 5083 +toi 1 2 6.263398 6.263398 5184 +font 1 2 6.263398 6.263398 5845 +purpl 1 2 6.263398 6.263398 5372 +scari 1 1 6.957497 6.957497 15051 +codesignpersonalperson 1 1 6.957497 6.957497 15052 +resumefoodi 1 1 6.957497 6.957497 15053 +ofpeopl 1 1 6.957497 6.957497 15054 +dish 1 1 6.957497 6.957497 15055 +noodl 1 1 6.957497 6.957497 15056 +beefskew 1 1 6.957497 6.957497 15057 +toysb 1 1 6.957497 6.957497 15058 +computersand 1 1 6.957497 6.957497 15059 +taiwanesei 1 1 6.957497 6.957497 15060 +taiwaneselanguag 1 1 6.957497 6.957497 15061 +taiwanes 1 1 6.957497 6.957497 15062 +sureto 1 1 6.957497 6.957497 15063 +taiwanesedictionari 1 1 6.957497 6.957497 15064 +viewedif 1 1 6.957497 6.957497 15065 +beabl 1 1 6.957497 6.957497 15066 +bouncingov 1 1 6.957497 6.957497 15067 +barnei 1 1 6.957497 6.957497 15068 +dynosaur 1 1 6.957497 6.957497 15069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ new file mode 100644 index 00000000..e84c4f97 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^corin^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +engin 2 297 1.098612 2.197224 20 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +read 1 154 1.791759 1.791759 47 +problem 3 147 1.945910 5.837730 75 +year 1 148 1.945910 1.945910 84 +spring 2 131 2.079442 4.158884 88 +schedul 1 119 2.079442 2.079442 85 +final 2 116 2.197225 4.394450 108 +version 2 113 2.197225 4.394450 122 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +search 2 95 2.397895 4.795790 155 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +internet 2 83 2.484907 4.969814 186 +thing 2 84 2.484907 4.969814 189 +info 1 85 2.484907 2.484907 176 +help 1 83 2.484907 2.484907 175 +refer 2 78 2.564949 5.129898 203 +april 1 77 2.564949 2.564949 196 +good 1 77 2.564949 2.564949 200 +mondai 1 77 2.564949 2.564949 206 +html 3 75 2.639057 7.917171 235 +meet 2 72 2.639057 5.278114 229 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +tuesdai 1 73 2.639057 2.639057 219 +august 1 66 2.708050 2.708050 257 +thursdai 1 70 2.708050 2.708050 241 +result 2 65 2.772589 5.545178 281 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +guid 1 63 2.772589 2.772589 267 +wednesdai 1 64 2.772589 2.772589 261 +automat 1 61 2.833213 2.833213 306 +plai 1 60 2.833213 2.833213 307 +colleg 1 61 2.833213 2.833213 300 +found 1 53 2.944439 2.944439 337 +set 2 50 3.044522 6.089044 361 +archiv 1 49 3.044522 3.044522 364 +done 2 47 3.091042 6.182084 381 +adapt 1 46 3.091042 3.091042 387 +featur 1 46 3.091042 3.091042 386 +math 6 44 3.135494 18.812964 402 +netscap 1 44 3.135494 3.135494 395 +fridai 1 44 3.135494 3.135494 390 +term 1 43 3.178054 3.178054 411 +review 1 42 3.218876 3.218876 425 +late 1 40 3.258097 3.258097 439 +programm 1 39 3.258097 3.258097 445 +correct 1 38 3.295837 3.295837 462 +open 1 38 3.295837 3.295837 469 +statist 1 35 3.401197 3.401197 521 +tech 1 35 3.401197 3.401197 515 +chapter 1 32 3.465736 3.465736 536 +autumn 1 31 3.496508 3.496508 558 +linux 2 27 3.637586 7.275172 631 +lab 1 24 3.761200 3.761200 698 +color 2 22 3.850148 7.700296 762 +self 1 22 3.850148 3.850148 761 +hous 1 21 3.912023 3.912023 801 +region 1 19 4.007333 4.007333 875 +macintosh 1 17 4.110874 4.110874 920 +dilbert 1 16 4.174387 4.174387 996 +month 1 15 4.248495 4.248495 1025 +contribut 1 15 4.248495 4.248495 1021 +brother 1 13 4.382027 4.382027 1189 +tune 1 12 4.465908 4.465908 1227 +insid 1 12 4.465908 4.465908 1262 +fpga 1 10 4.653960 4.653960 1433 +pacif 1 8 4.875197 4.875197 1674 +babylon 1 8 4.875197 4.875197 1731 +duke 1 6 5.164786 5.164786 2231 +usag 1 6 5.164786 5.164786 2209 +peek 1 6 5.164786 5.164786 2169 +contest 4 5 5.347108 21.388432 2273 +compet 1 5 5.347108 5.347108 2462 +corei 1 4 5.568345 5.568345 2718 +wavelet 1 4 5.568345 5.568345 2874 +percept 1 3 5.857933 5.857933 3739 +rsum 1 3 5.857933 5.857933 3939 +zone 1 3 5.857933 5.857933 3747 +corin 1 3 5.857933 5.857933 3311 +induc 1 2 6.263398 6.263398 4795 +lurker 1 2 6.263398 6.263398 5050 +andersoncorei 1 1 6.957497 6.957497 15070 +andersonth 1 1 6.957497 6.957497 15071 +localtalk 1 1 6.957497 6.957497 15072 +collegi 1 1 6.957497 6.957497 15073 +univser 1 1 6.957497 6.957497 15074 +highlin 1 1 6.957497 6.957497 15075 +polli 1 1 6.957497 6.957497 15076 +treasuri 1 1 6.957497 6.957497 15077 +providercool 1 1 6.957497 6.957497 15078 +sunsit 1 1 6.957497 6.957497 15079 +pageus 1 1 6.957497 6.957497 15080 +washinton 1 1 6.957497 6.957497 15081 +uwtv 1 1 6.957497 6.957497 15082 +notesmi 1 1 6.957497 6.957497 15083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ new file mode 100644 index 00000000..826e6033 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^csk^ @@ -0,0 +1,66 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +address 2 170 1.791759 3.583518 62 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +well 1 109 2.197225 2.197225 121 +text 1 98 2.302585 2.302585 133 +homepag 1 93 2.397895 2.397895 148 +second 1 81 2.484907 2.484907 166 +start 1 83 2.484907 2.484907 173 +know 1 80 2.564949 2.564949 198 +experi 2 64 2.772589 5.545178 283 +locat 1 59 2.833213 2.833213 303 +found 2 53 2.944439 5.888878 337 +visitor 1 49 3.044522 3.044522 371 +express 1 32 3.465736 3.465736 540 +photo 1 31 3.496508 3.496508 561 +repres 1 26 3.688879 3.688879 656 +never 1 25 3.737670 3.737670 671 +grad 1 20 3.951244 3.951244 837 +wonder 1 20 3.951244 3.951244 815 +minut 1 20 3.951244 3.951244 810 +appropri 1 18 4.060443 4.060443 883 +anyon 1 17 4.110874 4.110874 916 +choos 1 16 4.174387 4.174387 964 +universityof 1 15 4.248495 4.248495 1061 +near 1 14 4.317488 4.317488 1091 +cannot 1 13 4.382027 4.382027 1144 +sai 1 13 4.382027 4.382027 1175 +ball 2 9 4.753590 9.507180 1608 +occur 1 9 4.753590 4.753590 1572 +didn 1 9 4.753590 4.753590 1563 +craig 1 7 5.010635 5.010635 1879 +fromth 1 7 5.010635 5.010635 1802 +saturdai 1 7 5.010635 5.010635 1794 +parent 1 6 5.164786 5.164786 2204 +situat 1 5 5.347108 5.347108 2365 +curiou 1 5 5.347108 5.347108 2541 +enjoy 2 4 5.568345 11.136690 2937 +waterloo 1 3 5.857933 5.857933 3523 +tomi 1 2 6.263398 6.263398 5846 +convoc 1 2 6.263398 6.263398 5757 +honour 1 2 6.263398 6.263398 5632 +felt 1 2 6.263398 6.263398 4978 +incident 1 2 6.263398 6.263398 5109 +ceremoni 1 2 6.263398 6.263398 5585 +forgiv 1 2 6.263398 6.263398 5770 +valedictorian 3 1 6.957497 20.872491 15084 +experiencecraig 1 1 6.957497 6.957497 15085 +kaplancurr 1 1 6.957497 6.957497 15086 +copyof 1 1 6.957497 6.957497 15087 +undergraduatewa 1 1 6.957497 6.957497 15088 +cskaplan 1 1 6.957497 6.957497 15089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ new file mode 100644 index 00000000..c9612186 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ctkwok^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +welcom 2 122 2.079442 4.158884 99 +peopl 1 96 2.302585 2.302585 132 +take 1 97 2.302585 2.302585 134 +sinc 1 90 2.397895 2.397895 159 +thing 1 84 2.484907 2.484907 189 +java 1 70 2.708050 2.708050 248 +plan 1 65 2.772589 2.772589 272 +think 1 57 2.890372 2.890372 314 +visitor 1 49 3.044522 3.044522 371 +numer 1 49 3.044522 3.044522 369 +futur 1 41 3.218876 3.218876 427 +vision 1 41 3.218876 3.218876 430 +mean 1 37 3.332205 3.332205 477 +anim 1 31 3.496508 3.496508 557 +load 1 28 3.610918 3.610918 601 +applet 1 20 3.951244 3.951244 827 +agent 1 18 4.060443 4.060443 910 +wind 1 18 4.060443 4.060443 908 +figur 1 18 4.060443 4.060443 903 +thought 1 17 4.110874 4.110874 945 +edui 1 13 4.382027 4.382027 1193 +weld 1 9 4.753590 4.753590 1538 +vallei 2 7 5.010635 10.021270 1959 +chung 1 7 5.010635 5.010635 1964 +softbot 1 7 5.010635 5.010635 1974 +etzioni 1 6 5.164786 5.164786 2135 +andsoftwar 1 4 5.568345 5.568345 2753 +arch 1 4 5.568345 5.568345 2995 +codi 3 3 5.857933 17.573799 3940 +kwok 3 3 5.857933 17.573799 3941 +aliv 1 3 5.857933 5.857933 3864 +nausicaa 2 2 6.263398 12.526796 5218 +ingram 1 2 6.263398 6.263398 5847 +castl 1 2 6.263398 6.263398 5217 +doom 1 2 6.263398 6.263398 5848 +sanctuari 2 1 6.957497 13.914994 15090 +asami 1 1 6.957497 6.957497 15091 +chiaki 1 1 6.957497 6.957497 15092 +ctkwok 1 1 6.957497 6.957497 15093 +andoren 1 1 6.957497 6.957497 15094 +aiuw 1 1 6.957497 6.957497 15095 +informationleisur 1 1 6.957497 6.957497 15096 +windlaputa 1 1 6.957497 6.957497 15097 +skyhyp 1 1 6.957497 6.957497 15098 +gunnm 1 1 6.957497 6.957497 15099 +vile 1 1 6.957497 6.957497 15100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ new file mode 100644 index 00000000..7ae02d93 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^darrenc^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +updat 1 191 1.609438 1.609438 41 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +homepag 1 93 2.397895 2.397895 148 +resum 2 79 2.564949 5.129898 217 +complet 1 77 2.564949 2.564949 208 +html 1 75 2.639057 2.639057 235 +plan 1 65 2.772589 2.772589 272 +vita 1 38 3.295837 3.295837 473 +curriculum 2 33 3.433987 6.867974 535 +employ 1 12 4.465908 4.465908 1291 +rest 1 12 4.465908 4.465908 1259 +darren 2 5 5.347108 10.694216 2565 +cronquist 2 3 5.857933 11.715866 3942 +myph 1 3 5.857933 5.857933 3880 +underconstruct 1 3 5.857933 5.857933 3889 +darrenc 1 1 6.957497 6.957497 15101 +vitaperson 1 1 6.957497 6.957497 15102 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ new file mode 100644 index 00000000..ff7083f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbc1^ @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 3 380 0.693147 2.079441 9 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +also 3 259 1.386294 4.158882 28 +washington 2 236 1.386294 2.772588 32 +graduat 2 215 1.386294 2.772588 31 +recent 2 167 1.791759 3.583518 58 +develop 2 174 1.791759 3.583518 53 +read 1 154 1.791759 1.791759 47 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +studi 2 120 2.079442 4.158884 91 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +assist 3 112 2.197225 6.591675 113 +check 2 115 2.197225 4.394450 118 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +find 1 111 2.197225 2.197225 111 +user 3 104 2.302585 6.907755 137 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +second 2 81 2.484907 4.969814 166 +activ 2 84 2.484907 4.969814 182 +build 2 85 2.484907 4.969814 184 +control 1 82 2.484907 2.484907 164 +novemb 1 81 2.484907 2.484907 179 +thing 1 84 2.484907 2.484907 189 +school 1 84 2.484907 2.484907 188 +librari 1 87 2.484907 2.484907 181 +help 1 83 2.484907 2.484907 175 +interfac 4 79 2.564949 10.259796 209 +appear 1 78 2.564949 2.564949 210 +david 3 71 2.639057 7.917171 232 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +symposium 1 72 2.639057 2.639057 238 +nation 1 74 2.639057 2.639057 240 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +plan 5 65 2.772589 13.862945 272 +interact 2 62 2.772589 5.545178 270 +experi 1 64 2.772589 2.772589 283 +import 1 65 2.772589 2.772589 282 +automat 3 61 2.833213 8.499639 306 +simpl 2 60 2.833213 5.666426 298 +variou 1 56 2.890372 2.890372 317 +local 2 55 2.944439 5.888878 334 +undergradu 1 54 2.944439 2.944439 338 +week 1 52 2.995732 2.995732 343 +even 1 45 3.135494 3.135494 393 +favorit 1 44 3.135494 3.135494 410 +third 1 43 3.178054 3.178054 412 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +live 1 40 3.258097 3.258097 451 +map 1 39 3.258097 3.258097 452 +realli 1 40 3.258097 3.258097 444 +prototyp 1 38 3.295837 3.295837 463 +feel 1 37 3.332205 3.332205 483 +respons 1 37 3.332205 3.332205 476 +game 1 36 3.367296 3.367296 498 +represent 1 35 3.401197 3.401197 512 +michael 1 35 3.401197 3.401197 514 +everi 1 34 3.401197 3.401197 519 +manual 1 35 3.401197 3.401197 504 +product 1 33 3.433987 3.433987 527 +human 1 32 3.465736 3.465736 546 +collabor 1 32 3.465736 3.465736 543 +domain 1 30 3.555348 3.555348 564 +built 1 29 3.583519 3.583519 592 +rather 1 26 3.688879 3.688879 642 +client 1 25 3.737670 3.737670 679 +spent 1 25 3.737670 3.737670 676 +store 1 24 3.761200 3.761200 693 +demonstr 1 24 3.761200 3.761200 694 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +famili 1 23 3.806662 3.806662 735 +william 1 22 3.850148 3.850148 765 +theunivers 1 21 3.912023 3.912023 797 +navig 1 21 3.912023 3.912023 796 +watch 1 21 3.912023 3.912023 789 +anderson 2 19 4.007333 8.014666 860 +boston 1 19 4.007333 4.007333 862 +agent 1 18 4.060443 4.060443 910 +adam 1 17 4.110874 4.110874 934 +match 1 16 4.174387 4.174387 965 +atth 1 15 4.248495 4.248495 1019 +mayb 1 15 4.248495 4.248495 1014 +camera 2 14 4.317488 8.634976 1115 +dave 2 14 4.317488 8.634976 1098 +senior 1 14 4.317488 4.317488 1120 +context 1 13 4.382027 4.382027 1153 +whose 1 13 4.382027 4.382027 1166 +sai 1 13 4.382027 4.382027 1175 +touch 1 12 4.465908 4.465908 1288 +grow 1 12 4.465908 4.465908 1209 +career 1 12 4.465908 4.465908 1287 +appl 1 11 4.553877 4.553877 1303 +michigan 1 11 4.553877 4.553877 1368 +surf 1 11 4.553877 4.553877 1301 +shop 2 10 4.653960 9.307920 1469 +death 1 10 4.653960 4.653960 1457 +weld 2 9 4.753590 9.507180 1538 +russel 1 9 4.753590 4.753590 1507 +debugg 1 9 4.753590 4.753590 1493 +declar 1 9 4.753590 4.753590 1526 +leader 1 9 4.753590 4.753590 1576 +juan 1 9 4.753590 4.753590 1580 +sean 1 8 4.875197 4.875197 1705 +aaai 1 8 4.875197 4.875197 1750 +edg 1 8 4.875197 4.875197 1647 +planner 1 7 5.010635 5.010635 1797 +sensor 1 7 5.010635 5.010635 1920 +fortun 1 7 5.010635 5.010635 1872 +spare 1 6 5.164786 5.164786 2177 +mix 1 6 5.164786 5.164786 2200 +chicago 1 6 5.164786 5.164786 2149 +babi 1 5 5.347108 5.347108 2493 +pars 1 5 5.347108 5.347108 2321 +darren 1 5 5.347108 5.347108 2565 +compet 1 5 5.347108 5.347108 2462 +salesin 2 4 5.568345 11.136690 3051 +midnight 1 4 5.568345 5.568345 2599 +gotten 1 4 5.568345 5.568345 2628 +cut 1 4 5.568345 5.568345 2620 +tick 1 4 5.568345 5.568345 2975 +cohen 2 3 5.857933 11.715866 3652 +workin 1 3 5.857933 5.857933 3938 +harold 1 3 5.857933 5.857933 3803 +gloriou 1 3 5.857933 5.857933 3816 +hero 1 3 5.857933 5.857933 3711 +fame 1 3 5.857933 5.857933 3793 +straight 1 3 5.857933 5.857933 3655 +evil 1 3 5.857933 5.857933 3943 +christianson 3 2 6.263398 18.790194 5849 +till 2 2 6.263398 12.526796 5850 +nowher 1 2 6.263398 6.263398 4292 +shopbot 1 2 6.263398 6.263398 5824 +chicken 1 2 6.263398 6.263398 5851 +theanim 1 2 6.263398 6.263398 5852 +thechateau 1 2 6.263398 6.263398 5853 +cynic 1 2 6.263398 6.263398 5854 +duel 1 2 6.263398 6.263398 5855 +christiansondbc 1 1 6.957497 6.957497 15103 +inaiand 1 1 6.957497 6.957497 15104 +graphicsa 1 1 6.957497 6.957497 15105 +directedbehavior 1 1 6.957497 6.957497 15106 +buzzwordacquisit 1 1 6.957497 6.957497 15107 +bobdoorenbo 1 1 6.957497 6.957497 15108 +somehowintegr 1 1 6.957497 6.957497 15109 +applicationthat 1 1 6.957497 6.957497 15110 +basket 1 1 6.957497 6.957497 15111 +determinewhat 1 1 6.957497 6.957497 15112 +moviethat 1 1 6.957497 6.957497 15113 +technologyinto 1 1 6.957497 6.957497 15114 +perpetr 1 1 6.957497 6.957497 15115 +theucpop 1 1 6.957497 6.957497 15116 +isher 1 1 6.957497 6.957497 15117 +carboload 1 1 6.957497 6.957497 15118 +publicationschristianson 1 1 6.957497 6.957497 15119 +cinematographi 1 1 6.957497 6.957497 15120 +firbi 1 1 6.957497 6.957497 15121 +mcdougal 1 1 6.957497 6.957497 15122 +fusion 1 1 6.957497 6.957497 15123 +withfreder 1 1 6.957497 6.957497 15124 +judo 1 1 6.957497 6.957497 15125 +sibl 1 1 6.957497 6.957497 15126 +sisterjust 1 1 6.957497 6.957497 15127 +supercollid 1 1 6.957497 6.957497 15128 +slack 1 1 6.957497 6.957497 15129 +mirski 1 1 6.957497 6.957497 15130 +youth 1 1 6.957497 6.957497 15131 +wwwf 1 1 6.957497 6.957497 15132 +grudg 1 1 6.957497 6.957497 15133 +doomgat 1 1 6.957497 6.957497 15134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ new file mode 100644 index 00000000..c1562be2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dbj^ @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +assist 1 112 2.197225 2.197225 113 +look 1 107 2.197225 2.197225 115 +take 2 97 2.302585 4.605170 134 +activ 1 84 2.484907 2.484907 182 +mondai 1 77 2.564949 2.564949 206 +david 1 71 2.639057 2.639057 232 +creat 1 63 2.772589 2.772589 277 +give 1 50 3.044522 3.044522 359 +tutori 1 39 3.258097 3.258097 437 +form 1 39 3.258097 3.258097 443 +togeth 1 23 3.806662 3.806662 714 +navig 1 21 3.912023 3.912023 796 +hypertext 1 19 4.007333 4.007333 865 +quiz 2 16 4.174387 8.348774 990 +dave 1 14 4.317488 4.317488 1098 +johnson 2 13 4.382027 8.764054 1162 +script 1 13 4.382027 4.382027 1171 +basketbal 1 12 4.465908 4.465908 1289 +softbal 1 9 4.753590 4.753590 1594 +golf 1 6 5.164786 5.164786 2178 +fit 1 5 5.347108 5.347108 2285 +racquetbal 1 4 5.568345 5.568345 3052 +assess 1 4 5.568345 5.568345 2724 +readersproject 1 1 6.957497 6.957497 15135 +theracquetbal 1 1 6.957497 6.957497 15136 +thecreat 1 1 6.957497 6.957497 15137 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ new file mode 100644 index 00000000..78a6612a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ddion^ @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 5 443 0.693147 3.465735 6 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +program 1 374 0.693147 0.693147 7 +current 3 284 1.098612 3.295836 21 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +languag 2 227 1.386294 2.772588 26 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +first 2 140 1.945910 3.891820 71 +like 1 132 1.945910 1.945910 81 +year 1 148 1.945910 1.945910 84 +construct 1 139 1.945910 1.945910 82 +perform 1 143 1.945910 1.945910 74 +problem 1 147 1.945910 1.945910 75 +seattl 6 120 2.079442 12.476652 103 +studi 1 120 2.079442 2.079442 91 +world 3 115 2.197225 6.591675 126 +person 2 111 2.197225 4.394450 117 +manag 1 114 2.197225 2.197225 125 +site 1 106 2.197225 2.197225 119 +user 2 104 2.302585 4.605170 137 +memori 1 101 2.302585 2.302585 139 +call 1 91 2.397895 2.397895 153 +commun 1 95 2.397895 2.397895 157 +homepag 1 93 2.397895 2.397895 148 +section 1 94 2.397895 2.397895 149 +real 1 93 2.397895 2.397895 144 +level 3 87 2.484907 7.454721 180 +stuff 2 87 2.484907 4.969814 171 +academ 1 82 2.484907 2.484907 178 +environ 1 84 2.484907 2.484907 177 +help 1 83 2.484907 2.484907 175 +institut 1 84 2.484907 2.484907 187 +learn 1 86 2.484907 2.484907 170 +server 3 76 2.564949 7.694847 204 +dynam 2 76 2.564949 5.129898 194 +know 1 80 2.564949 2.564949 198 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +david 3 71 2.639057 7.917171 232 +solv 1 73 2.639057 2.639057 234 +line 1 75 2.639057 2.639057 231 +view 1 70 2.708050 2.708050 254 +main 1 67 2.708050 2.708050 256 +visit 2 63 2.772589 5.545178 288 +written 1 63 2.772589 2.772589 278 +previou 1 62 2.772589 2.772589 290 +guid 1 63 2.772589 2.772589 267 +unix 3 58 2.890372 8.671116 308 +major 2 56 2.890372 5.780744 315 +think 1 57 2.890372 2.890372 314 +reason 1 57 2.890372 2.890372 318 +extens 4 53 2.944439 11.777756 340 +undergradu 4 54 2.944439 11.777756 338 +maintain 1 51 2.995732 2.995732 342 +run 1 51 2.995732 2.995732 347 +date 1 51 2.995732 2.995732 344 +life 2 50 3.044522 6.089044 375 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +featur 1 46 3.091042 3.091042 386 +netscap 1 44 3.135494 3.135494 395 +around 1 43 3.178054 3.178054 415 +past 1 42 3.218876 3.218876 428 +movi 2 40 3.258097 6.516194 459 +littl 1 39 3.258097 3.258097 454 +brian 1 38 3.295837 3.295837 466 +respons 1 37 3.332205 3.332205 476 +soon 1 36 3.367296 3.367296 494 +manual 1 35 3.401197 3.401197 504 +extend 1 32 3.465736 3.465736 539 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +team 1 27 3.637586 3.637586 625 +primari 1 25 3.737670 3.737670 669 +sport 1 25 3.737670 3.737670 683 +todai 1 25 3.737670 3.737670 672 +other 1 24 3.761200 3.761200 697 +dai 1 22 3.850148 3.850148 753 +recommend 1 22 3.850148 3.850148 737 +kernel 2 20 3.951244 7.902488 825 +safeti 1 20 3.951244 3.951244 817 +fine 1 20 3.951244 3.951244 822 +region 1 19 4.007333 4.007333 875 +excel 1 19 4.007333 4.007333 868 +bershad 1 18 4.060443 4.060443 902 +statu 1 18 4.060443 4.060443 885 +protect 1 17 4.110874 4.110874 935 +debug 1 17 4.110874 4.110874 944 +condit 2 16 4.174387 8.348774 975 +anyth 1 16 4.174387 4.174387 998 +dilbert 1 16 4.174387 4.174387 996 +mayb 1 15 4.248495 4.248495 1014 +spin 6 14 4.317488 25.904928 1121 +achiev 1 14 4.317488 4.317488 1088 +stai 2 12 4.465908 8.931816 1215 +touch 2 12 4.465908 8.931816 1288 +rest 1 12 4.465908 4.465908 1259 +surf 2 11 4.553877 9.107754 1301 +traffic 2 10 4.653960 9.307920 1421 +shop 1 10 4.653960 4.653960 1469 +modula 2 9 4.753590 9.507180 1613 +mach 2 8 4.875197 9.750394 1669 +transport 2 8 4.875197 9.750394 1672 +claim 1 8 4.875197 4.875197 1664 +hold 1 8 4.875197 4.875197 1645 +wouldn 1 7 5.010635 5.010635 1970 +occasion 1 7 5.010635 5.010635 1905 +athlet 1 7 5.010635 5.010635 1933 +yeah 1 6 5.164786 5.164786 2195 +impress 1 6 5.164786 5.164786 2096 +variant 1 6 5.164786 5.164786 2043 +subsystem 1 6 5.164786 5.164786 2015 +band 1 6 5.164786 5.164786 2198 +restaur 1 6 5.164786 5.164786 2230 +notr 4 4 5.568345 22.273380 2880 +dame 4 4 5.568345 22.273380 2881 +afraid 1 4 5.568345 5.568345 3053 +breath 1 4 5.568345 5.568345 2946 +countless 1 4 5.568345 5.568345 3020 +racquetbal 1 4 5.568345 5.568345 3052 +leagu 1 4 5.568345 5.568345 3040 +basebal 1 4 5.568345 5.568345 2969 +bean 1 4 5.568345 5.568345 2968 +slight 1 3 5.857933 5.857933 3894 +emul 1 3 5.857933 5.857933 3944 +meanwhil 1 3 5.857933 5.857933 3129 +distract 1 3 5.857933 5.857933 3945 +trumpet 1 3 5.857933 5.857933 3946 +espn 1 3 5.857933 5.857933 3724 +marin 1 3 5.857933 5.857933 3947 +dion 3 2 6.263398 18.790194 5856 +okai 1 2 6.263398 6.263398 4465 +occupi 1 2 6.263398 6.263398 5857 +ladder 1 2 6.263398 6.263398 5858 +outlet 1 2 6.263398 6.263398 5248 +infam 1 2 6.263398 6.263398 5859 +ddion 1 1 6.957497 6.957497 15138 +thespinoper 1 1 6.957497 6.957497 15139 +intercept 1 1 6.957497 6.957497 15140 +havework 1 1 6.957497 6.957497 15141 +ipromis 1 1 6.957497 6.957497 15142 +enhancedthi 1 1 6.957497 6.957497 15143 +vast 1 1 6.957497 6.957497 15144 +sportzon 1 1 6.957497 6.957497 15145 +eateri 1 1 6.957497 6.957497 15146 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ new file mode 100644 index 00000000..024b550d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^derrick^ @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +washington 2 236 1.386294 2.772588 32 +graduat 2 215 1.386294 2.772588 31 +design 2 213 1.386294 2.772588 25 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +fall 2 181 1.609438 3.218876 40 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +parallel 4 169 1.791759 7.167036 60 +data 3 170 1.791759 5.375277 49 +develop 2 174 1.791759 3.583518 53 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +address 1 170 1.791759 1.791759 62 +perform 2 143 1.945910 3.891820 74 +tool 2 117 2.079442 4.158884 93 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +topic 2 114 2.197225 4.394450 110 +teach 2 108 2.197225 4.394450 112 +assist 2 112 2.197225 4.394450 113 +final 1 116 2.197225 2.197225 108 +person 1 111 2.197225 2.197225 117 +advanc 3 99 2.302585 6.907755 130 +commun 2 95 2.397895 4.795790 157 +mani 1 92 2.397895 2.397895 150 +center 1 88 2.397895 2.397895 158 +environ 1 84 2.484907 2.484907 177 +start 1 83 2.484907 2.484907 173 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +experi 2 64 2.772589 5.545178 283 +collect 2 65 2.772589 5.545178 268 +interact 1 62 2.772589 2.772589 270 +colleg 3 61 2.833213 8.499639 300 +share 1 59 2.833213 2.833213 304 +index 1 56 2.890372 2.890372 309 +summer 1 56 2.890372 2.890372 311 +extens 3 53 2.944439 8.833317 340 +undergradu 1 54 2.944439 2.944439 338 +run 1 51 2.995732 2.995732 347 +protocol 1 45 3.135494 3.135494 407 +offer 2 43 3.178054 6.356108 414 +around 1 43 3.178054 3.178054 415 +howev 1 41 3.218876 3.218876 422 +field 1 37 3.332205 3.332205 482 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +independ 1 32 3.465736 3.465736 548 +dissert 1 32 3.465736 3.465736 549 +often 1 31 3.496508 3.496508 551 +administr 3 27 3.637586 10.912758 628 +campu 1 27 3.637586 3.637586 623 +arrai 1 27 3.637586 3.637586 627 +challeng 1 26 3.688879 3.688879 653 +task 1 25 3.737670 3.737670 678 +portabl 2 20 3.951244 7.902488 819 +beauti 1 18 4.060443 4.060443 912 +event 1 18 4.060443 4.060443 896 +sheet 1 16 4.174387 4.174387 973 +women 1 16 4.174387 4.174387 1004 +career 1 12 4.465908 4.465908 1287 +skill 1 12 4.465908 4.465908 1205 +asynchron 1 12 4.465908 4.465908 1229 +host 1 11 4.553877 4.553877 1306 +typic 1 11 4.553877 4.553877 1360 +extra 1 11 4.553877 4.553877 1312 +interestsmi 1 10 4.653960 4.653960 1462 +tutor 1 9 4.753590 4.753590 1552 +heart 1 8 4.875197 4.875197 1729 +ring 1 8 4.875197 4.875197 1684 +on 1 8 4.875197 4.875197 1628 +pursu 1 7 5.010635 5.010635 1902 +divers 1 6 5.164786 5.164786 2232 +averag 1 6 5.164786 5.164786 2098 +conveni 1 6 5.164786 5.164786 2088 +li 1 5 5.347108 5.347108 2500 +suffer 1 5 5.347108 5.347108 2268 +spaa 1 3 5.857933 5.857933 3906 +certif 1 3 5.857933 5.857933 3859 +token 1 2 6.263398 6.263398 4415 +foremost 1 2 6.263398 6.263398 5361 +ordistribut 1 2 6.263398 6.263398 5581 +distributedenviron 1 2 6.263398 6.263398 5183 +comm 1 2 6.263398 6.263398 4746 +newslet 1 2 6.263398 6.263398 5860 +derrick 3 1 6.957497 20.872491 15147 +weathersbi 1 1 6.957497 6.957497 15148 +bullssupersonicsi 1 1 6.957497 6.957497 15149 +phdin 1 1 6.957497 6.957497 15150 +ofseattl 1 1 6.957497 6.957497 15151 +prei 1 1 6.957497 6.957497 15152 +therebyextend 1 1 6.957497 6.957497 15153 +interestssignific 1 1 6.957497 6.957497 15154 +securityresearch 1 1 6.957497 6.957497 15155 +challengespres 1 1 6.957497 6.957497 15156 +theseenviron 1 1 6.957497 6.957497 15157 +daunt 1 1 6.957497 6.957497 15158 +projectacadem 1 1 6.957497 6.957497 15159 +achievementsinstructor 1 1 6.957497 6.957497 15160 +collegeinstructor 1 1 6.957497 6.957497 15161 +minoritystud 1 1 6.957497 6.957497 15162 +engineeringoutstand 1 1 6.957497 6.957497 15163 +cnnfinanciala 1 1 6.957497 6.957497 15164 +javaw 1 1 6.957497 6.957497 15165 +weathersbyderrick 1 1 6.957497 6.957497 15166 +edutu 1 1 6.957497 6.957497 15167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ new file mode 100644 index 00000000..7cbb9ee0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dewey^ @@ -0,0 +1,124 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 2 236 1.386294 2.772588 32 +link 2 247 1.386294 2.772588 24 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +read 3 154 1.791759 5.375277 47 +avail 2 169 1.791759 3.583518 48 +develop 1 174 1.791759 1.791759 53 +note 4 142 1.945910 7.783640 67 +like 2 132 1.945910 3.891820 81 +relat 2 139 1.945910 3.891820 68 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +postscript 4 131 2.079442 8.317768 90 +provid 1 121 2.079442 2.079442 94 +pleas 2 113 2.197225 4.394450 114 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +need 2 98 2.302585 4.605170 135 +book 1 99 2.302585 2.302585 131 +imag 1 91 2.397895 2.397895 161 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +know 1 80 2.564949 2.564949 198 +june 1 79 2.564949 2.564949 214 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +tuesdai 1 73 2.639057 2.639057 219 +goal 1 66 2.708050 2.708050 250 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +summer 1 56 2.890372 2.890372 311 +finger 1 52 2.995732 2.995732 354 +much 1 52 2.995732 2.995732 349 +format 1 48 3.044522 3.044522 356 +frequent 1 49 3.044522 3.044522 367 +possibl 1 47 3.091042 3.091042 378 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +york 1 41 3.218876 3.218876 435 +late 1 40 3.258097 3.258097 439 +brian 2 38 3.295837 6.591674 466 +china 2 37 3.332205 6.664410 487 +feel 1 37 3.332205 3.332205 483 +game 4 36 3.367296 13.469184 498 +soon 1 36 3.367296 3.367296 494 +ofth 1 36 3.367296 3.367296 491 +statist 2 35 3.401197 6.802394 521 +return 1 34 3.401197 3.401197 502 +bibliographi 1 34 3.401197 3.401197 518 +anim 1 31 3.496508 3.496508 557 +progress 1 28 3.610918 3.610918 598 +color 1 22 3.850148 3.850148 762 +period 1 22 3.850148 3.850148 743 +feedback 1 19 4.007333 4.007333 854 +histori 1 19 4.007333 4.007333 853 +encourag 1 18 4.060443 4.060443 880 +edulast 1 17 4.110874 4.110874 927 +letter 1 16 4.174387 4.174387 981 +alreadi 1 16 4.174387 4.174387 963 +draft 3 14 4.317488 12.952464 1085 +hopefulli 2 14 4.317488 8.634976 1071 +trip 1 14 4.317488 4.317488 1113 +near 1 14 4.317488 4.317488 1091 +carri 2 13 4.382027 8.764054 1152 +readabl 1 12 4.465908 4.465908 1258 +bike 1 10 4.653960 4.653960 1468 +death 1 10 4.653960 4.653960 1457 +drink 1 9 4.753590 4.753590 1607 +ride 1 8 4.875197 4.875197 1741 +blue 1 6 5.164786 5.164786 2227 +seen 1 6 5.164786 5.164786 2202 +scienceand 1 5 5.347108 5.347108 2348 +atlant 1 5 5.347108 5.347108 2508 +semi 1 5 5.347108 5.347108 2510 +chaotic 1 5 5.347108 5.347108 2566 +ireland 3 4 5.568345 16.705035 2853 +pagebrian 1 4 5.568345 5.568345 3054 +myfavorit 1 3 5.857933 5.857933 3852 +interview 1 3 5.857933 5.857933 3324 +fascin 1 3 5.857933 5.857933 3948 +northern 4 2 6.263398 25.053592 5861 +terrorist 2 2 6.263398 12.526796 5190 +thorough 2 2 6.263398 12.526796 4134 +ocean 1 2 6.263398 6.263398 5375 +shoulder 1 2 6.263398 6.263398 4750 +thecurr 1 2 6.263398 6.263398 5862 +addict 1 2 6.263398 6.263398 5576 +dewei 3 1 6.957497 20.872491 15168 +deweyabout 1 1 6.957497 6.957497 15169 +doyou 1 1 6.957497 6.957497 15170 +ilov 1 1 6.957497 6.957497 15171 +oldroomm 1 1 6.957497 6.957497 15172 +irelandi 1 1 6.957497 6.957497 15173 +belfast 1 1 6.957497 6.957497 15174 +sixti 1 1 6.957497 6.957497 15175 +pagesof 1 1 6.957497 6.957497 15176 +luggag 1 1 6.957497 6.957497 15177 +getthos 1 1 6.957497 6.957497 15178 +enlighteningformat 1 1 6.957497 6.957497 15179 +thisproject 1 1 6.957497 6.957497 15180 +sinn 1 1 6.957497 6.957497 15181 +fein 1 1 6.957497 6.957497 15182 +injuri 1 1 6.957497 6.957497 15183 +recuri 1 1 6.957497 6.957497 15184 +ancient 1 1 6.957497 6.957497 15185 +imageek 1 1 6.957497 6.957497 15186 +cuni 1 1 6.957497 6.957497 15187 +jansteen 1 1 6.957497 6.957497 15188 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ new file mode 100644 index 00000000..81163be7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dfasulo^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 4 236 1.386294 5.545176 32 +graduat 2 215 1.386294 2.772588 31 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +seattl 2 120 2.079442 4.158884 103 +theori 2 111 2.197225 4.394450 127 +mathemat 1 108 2.197225 2.197225 123 +find 1 111 2.197225 2.197225 111 +person 1 111 2.197225 2.197225 117 +look 1 107 2.197225 2.197225 115 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +graphic 1 90 2.397895 2.397895 147 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +homepag 1 93 2.397895 2.397895 148 +thing 2 84 2.484907 4.969814 189 +contain 1 81 2.484907 2.484907 174 +academ 1 82 2.484907 2.484907 178 +appli 1 71 2.639057 2.639057 226 +write 1 72 2.639057 2.639057 222 +would 1 67 2.708050 2.708050 251 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +written 1 63 2.772589 2.772589 278 +abstract 1 62 2.772589 2.772589 276 +plai 1 60 2.833213 2.833213 307 +publish 1 57 2.890372 2.890372 326 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +friend 1 48 3.044522 3.044522 376 +favorit 1 44 3.135494 3.135494 410 +better 1 45 3.135494 3.135494 401 +third 1 43 3.178054 3.178054 412 +press 1 42 3.218876 3.218876 419 +futur 1 41 3.218876 3.218876 427 +probabl 1 40 3.258097 3.258097 455 +random 2 34 3.401197 6.802394 511 +given 1 32 3.465736 3.465736 538 +taken 1 31 3.496508 3.496508 555 +computersci 1 30 3.555348 3.555348 562 +depend 1 29 3.583519 3.583519 583 +ask 1 28 3.610918 3.610918 597 +mine 1 26 3.688879 3.688879 654 +seri 1 24 3.761200 3.761200 708 +fellow 1 24 3.761200 3.761200 701 +recommend 2 22 3.850148 7.700296 737 +ofwashington 1 22 3.850148 3.850148 766 +identifi 1 22 3.850148 3.850148 760 +william 1 22 3.850148 3.850148 765 +fact 1 21 3.912023 3.912023 780 +tenni 1 20 3.951244 3.951244 838 +grad 1 20 3.951244 3.951244 837 +ever 1 19 4.007333 4.007333 872 +otherwis 1 17 4.110874 4.110874 922 +anyon 1 17 4.110874 4.110874 916 +former 1 17 4.110874 4.110874 956 +whether 1 17 4.110874 4.110874 918 +chateau 1 16 4.174387 4.174387 997 +explan 1 16 4.174387 4.174387 985 +biologi 1 15 4.248495 4.248495 1049 +charact 1 15 4.248495 4.248495 1028 +role 1 14 4.317488 4.317488 1101 +pagewelcom 1 11 4.553877 4.553877 1344 +poetri 1 9 4.753590 4.753590 1596 +distanc 1 9 4.753590 4.753590 1500 +illustr 1 8 4.875197 4.875197 1679 +babylon 1 8 4.875197 4.875197 1731 +creativ 1 8 4.875197 4.875197 1777 +absolut 1 8 4.875197 4.875197 1646 +sean 1 8 4.875197 4.875197 1705 +roger 1 7 5.010635 5.010635 1892 +chronicl 1 7 5.010635 5.010635 1952 +athlet 1 7 5.010635 5.010635 1933 +fiction 3 6 5.164786 15.494358 2217 +cat 1 6 5.164786 5.164786 2194 +fantasi 1 4 5.568345 5.568345 3055 +sandi 1 4 5.568345 5.568345 2765 +portrait 1 3 5.857933 5.857933 3491 +slight 1 3 5.857933 5.857933 3894 +kwon 1 3 5.857933 5.857933 3690 +woman 1 3 5.857933 5.857933 3539 +fasulo 1 2 6.263398 6.263398 4391 +honest 1 2 6.263398 6.263398 5060 +alumnu 1 2 6.263398 6.263398 5863 +wendi 1 2 6.263398 6.263398 5864 +belluomini 1 2 6.263398 6.263398 5865 +worthwhil 1 2 6.263398 6.263398 4951 +dfasulo 3 1 6.957497 20.872491 15189 +amber 2 1 6.957497 13.914994 15190 +williamscolleg 1 1 6.957497 6.957497 15191 +inaccuraci 1 1 6.957497 6.957497 15192 +eastlak 1 1 6.957497 6.957497 15193 +merlin 1 1 6.957497 6.957497 15194 +corwin 1 1 6.957497 6.957497 15195 +zelazni 1 1 6.957497 6.957497 15196 +drpg 1 1 6.957497 6.957497 15197 +phage 1 1 6.957497 6.957497 15198 +dress 1 1 6.957497 6.957497 15199 +dogbert 1 1 6.957497 6.957497 15200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ new file mode 100644 index 00000000..bc5508a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dickey^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +washington 1 236 1.386294 1.386294 32 +seattl 1 120 2.079442 2.079442 103 +site 1 106 2.197225 2.197225 119 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +tuesdai 1 73 2.639057 2.639057 219 +java 1 70 2.708050 2.708050 248 +septemb 1 65 2.772589 2.772589 274 +plai 1 60 2.833213 2.833213 307 +favorit 1 44 3.135494 3.135494 410 +autumn 1 31 3.496508 3.496508 558 +martin 2 21 3.912023 7.824046 794 +engineeringunivers 1 17 4.110874 4.110874 959 +weekli 1 17 4.110874 4.110874 919 +script 1 13 4.382027 4.382027 1171 +engr 1 10 4.653960 4.653960 1427 +sister 1 9 4.753590 4.753590 1524 +coffe 1 5 5.347108 5.347108 2556 +eduupd 1 4 5.568345 5.568345 3056 +dickei 2 2 6.263398 12.526796 4389 +garg 1 2 6.263398 6.263398 5533 +dickeycomput 1 1 6.957497 6.957497 15201 +washingtonwelcom 1 1 6.957497 6.957497 15202 +schedulenarr 1 1 6.957497 6.957497 15203 +blurbcs 1 1 6.957497 6.957497 15204 +housesfavorit 1 1 6.957497 6.957497 15205 +bookspirograph 1 1 6.957497 6.957497 15206 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ new file mode 100644 index 00000000..712d521d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dlee^ @@ -0,0 +1,188 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +washington 4 236 1.386294 5.545176 32 +gener 1 220 1.386294 1.386294 27 +paper 2 205 1.609438 3.218876 38 +recent 1 167 1.791759 1.791759 58 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +seattl 3 120 2.079442 6.238326 103 +high 2 130 2.079442 4.158884 101 +report 1 131 2.079442 2.079442 92 +provid 1 121 2.079442 2.079442 94 +manag 1 114 2.197225 2.197225 125 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +peopl 5 96 2.302585 11.512925 132 +search 1 95 2.397895 2.397895 155 +present 1 91 2.397895 2.397895 145 +larg 1 82 2.484907 2.484907 168 +internet 1 83 2.484907 2.484907 186 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +issu 2 78 2.564949 5.129898 211 +dynam 2 76 2.564949 5.129898 194 +good 2 77 2.564949 5.129898 200 +come 1 78 2.564949 2.564949 202 +meet 1 72 2.639057 2.639057 229 +materi 1 75 2.639057 2.639057 221 +would 2 67 2.708050 5.416100 251 +differ 1 66 2.708050 2.708050 253 +sieg 1 69 2.708050 2.708050 260 +polici 2 64 2.772589 5.545178 279 +guid 1 63 2.772589 2.772589 267 +plan 1 65 2.772589 2.772589 272 +juli 1 60 2.833213 2.833213 305 +back 1 60 2.833213 2.833213 297 +direct 1 57 2.890372 2.890372 316 +index 1 56 2.890372 2.890372 309 +point 1 58 2.890372 2.890372 319 +explor 1 58 2.890372 2.890372 324 +instruct 2 53 2.944439 5.888878 332 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +pointer 1 48 3.044522 3.044522 368 +appoint 1 49 3.044522 3.044522 358 +still 1 50 3.044522 3.044522 362 +without 1 50 3.044522 3.044522 370 +effect 1 46 3.091042 3.091042 385 +move 1 47 3.091042 3.091042 382 +could 1 46 3.091042 3.091042 383 +execut 1 45 3.135494 3.135494 404 +around 1 43 3.178054 3.178054 415 +show 1 43 3.178054 3.178054 417 +cach 3 41 3.218876 9.656628 432 +map 1 39 3.258097 3.258097 452 +realli 1 40 3.258097 3.258097 444 +live 1 40 3.258097 3.258097 451 +brian 1 38 3.295837 3.295837 466 +next 1 34 3.401197 3.401197 517 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +idea 1 32 3.465736 3.465736 545 +consid 1 29 3.583519 3.583519 590 +mind 1 27 3.637586 3.637586 632 +rather 1 26 3.688879 3.688879 642 +enjoi 1 26 3.688879 3.688879 660 +primari 1 25 3.737670 3.737670 669 +concern 1 25 3.737670 3.737670 666 +never 1 25 3.737670 3.737670 671 +alwai 2 24 3.761200 7.522400 691 +yahoo 1 24 3.761200 3.761200 707 +begin 2 23 3.806662 7.613324 716 +thread 1 23 3.806662 3.806662 722 +initi 1 23 3.806662 3.806662 717 +decis 1 23 3.806662 3.806662 728 +almost 1 22 3.850148 3.850148 742 +sort 1 22 3.850148 3.850148 738 +chen 3 21 3.912023 11.736069 791 +avoid 1 21 3.912023 3.912023 799 +among 1 21 3.912023 3.912023 781 +miss 1 19 4.007333 4.007333 866 +lyco 1 19 4.007333 4.007333 871 +definit 1 19 4.007333 4.007333 864 +bershad 3 18 4.060443 12.181329 902 +four 1 18 4.060443 4.060443 905 +element 1 18 4.060443 4.060443 895 +event 1 18 4.060443 4.060443 896 +asplo 1 17 4.110874 4.110874 948 +otherwis 1 17 4.110874 4.110874 922 +whole 1 17 4.110874 4.110874 940 +choic 1 16 4.174387 4.174387 979 +stream 1 15 4.248495 4.248495 1015 +draw 1 14 4.317488 4.317488 1086 +manner 1 14 4.317488 4.317488 1074 +alan 1 13 4.382027 4.382027 1146 +hotlist 1 13 4.382027 4.382027 1199 +brad 2 12 4.465908 8.931816 1264 +denni 2 11 4.553877 9.107754 1321 +baer 2 11 4.553877 9.107754 1353 +smart 1 11 4.553877 4.553877 1352 +isca 1 11 4.553877 4.553877 1354 +moment 1 11 4.553877 4.553877 1379 +magic 1 11 4.553877 4.553877 1358 +occur 2 9 4.753590 9.507180 1572 +osdi 1 9 4.753590 4.753590 1534 +yellow 1 9 4.753590 4.753590 1601 +russel 1 9 4.753590 4.753590 1507 +romer 3 8 4.875197 14.625591 1706 +qualifi 1 8 4.875197 4.875197 1721 +entri 1 8 4.875197 4.875197 1678 +gold 1 8 4.875197 4.875197 1745 +rais 1 8 4.875197 4.875197 1711 +baker 1 7 5.010635 5.010635 1812 +chanc 1 7 5.010635 5.010635 1960 +commit 3 6 5.164786 15.494358 2233 +conflict 2 6 5.164786 10.329572 2041 +dream 2 6 5.164786 10.329572 2165 +loup 1 6 5.164786 5.164786 2228 +presid 1 6 5.164786 5.164786 2196 +truth 1 6 5.164786 5.164786 2179 +whatev 1 6 5.164786 5.164786 2097 +fetch 1 5 5.347108 5.347108 2567 +million 1 5 5.347108 5.347108 2495 +lesson 1 5 5.347108 5.347108 2568 +ignor 1 5 5.347108 5.347108 2288 +favor 1 5 5.347108 5.347108 2414 +vote 3 4 5.568345 16.705035 2953 +ford 1 4 5.568345 5.568345 2636 +kill 1 4 5.568345 5.568345 3000 +countless 1 4 5.568345 5.568345 3020 +dlee 2 3 5.857933 11.715866 3949 +energi 1 3 5.857933 5.857933 3950 +specul 1 3 5.857933 5.857933 3951 +mappedcach 1 3 5.857933 5.857933 3928 +reorder 1 3 5.857933 5.857933 3952 +evil 1 3 5.857933 5.857933 3943 +act 1 3 5.857933 5.857933 3557 +researchwith 1 2 6.263398 6.263398 5594 +eustac 1 2 6.263398 6.263398 5866 +dirk 1 2 6.263398 6.263398 5665 +andt 1 2 6.263398 6.263398 5121 +resolutionon 1 2 6.263398 6.263398 5867 +nixon 1 2 6.263398 6.263398 5868 +court 1 2 6.263398 6.263398 4870 +silver 1 2 6.263398 6.263398 5374 +theblack 1 2 6.263398 6.263398 5869 +hesit 1 2 6.263398 6.263398 5774 +incid 1 2 6.263398 6.263398 5870 +goeth 1 2 6.263398 6.263398 5366 +calder 2 1 6.957497 13.914994 15207 +grunwald 2 1 6.957497 13.914994 15208 +huberthumphrei 2 1 6.957497 13.914994 15209 +likejean 1 1 6.957497 6.957497 15210 +enginefor 1 1 6.957497 6.957497 15211 +sensibl 1 1 6.957497 6.957497 15212 +conced 1 1 6.957497 6.957497 15213 +thatpolit 1 1 6.957497 6.957497 15214 +lesser 1 1 6.957497 6.957497 15215 +tweedledumand 1 1 6.957497 6.957497 15216 +tweedlede 1 1 6.957497 6.957497 15217 +abstain 1 1 6.957497 6.957497 15218 +theyar 1 1 6.957497 6.957497 15219 +torummag 1 1 6.957497 6.957497 15220 +allth 1 1 6.957497 6.957497 15221 +stew 1 1 6.957497 6.957497 15222 +humphrei 1 1 6.957497 6.957497 15223 +suprem 1 1 6.957497 6.957497 15224 +whentricia 1 1 6.957497 6.957497 15225 +flummeri 1 1 6.957497 6.957497 15226 +ineffect 1 1 6.957497 6.957497 15227 +splendid 1 1 6.957497 6.957497 15228 +unforeseen 1 1 6.957497 6.957497 15229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ new file mode 100644 index 00000000..abb9299b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dougz^ @@ -0,0 +1,120 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +class 1 199 1.609438 1.609438 37 +contact 2 153 1.791759 3.583518 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +databas 1 122 2.079442 2.079442 86 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +user 1 104 2.302585 2.302585 137 +homepag 2 93 2.397895 4.795790 148 +member 1 84 2.484907 2.484907 165 +novemb 1 81 2.484907 2.484907 179 +state 1 76 2.564949 2.564949 207 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +test 1 66 2.708050 2.708050 252 +thursdai 1 70 2.708050 2.708050 241 +much 1 52 2.995732 2.995732 349 +date 1 51 2.995732 2.995732 344 +anoth 1 45 3.135494 3.135494 408 +math 1 44 3.135494 3.135494 402 +execut 1 45 3.135494 3.135494 404 +edit 1 42 3.218876 3.218876 418 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +actual 2 28 3.610918 7.221836 604 +usual 1 28 3.610918 3.610918 608 +intend 1 28 3.610918 3.610918 599 +todai 2 25 3.737670 7.475340 672 +background 1 25 3.737670 3.737670 664 +brows 1 23 3.806662 3.806662 726 +ofwashington 1 22 3.850148 3.850148 766 +sure 1 20 3.951244 3.951244 813 +grad 1 20 3.951244 3.951244 837 +anywai 1 15 4.248495 4.248495 1047 +club 1 15 4.248495 4.248495 1058 +player 1 11 4.553877 4.553877 1371 +michigan 1 11 4.553877 4.553877 1368 +strongli 1 10 4.653960 4.653960 1406 +death 1 10 4.653960 4.653960 1457 +doug 2 9 4.753590 9.507180 1517 +mention 1 9 4.753590 4.753590 1569 +unusu 1 9 4.753590 4.753590 1566 +vice 1 9 4.753590 4.753590 1604 +charg 1 9 4.753590 4.753590 1582 +screen 1 9 4.753590 4.753590 1577 +sister 1 9 4.753590 4.753590 1524 +ground 1 7 5.010635 5.010635 1955 +hit 1 7 5.010635 5.010635 1965 +lucki 1 6 5.164786 5.164786 2163 +presid 1 6 5.164786 5.164786 2196 +emerg 1 6 5.164786 5.164786 2038 +highwai 1 6 5.164786 5.164786 2095 +wast 2 5 5.347108 10.694216 2537 +unnecessari 1 5 5.347108 5.347108 2506 +keeper 1 5 5.347108 5.347108 2569 +bryan 1 5 5.347108 5.347108 2421 +worst 1 5 5.347108 5.347108 2287 +exhaust 1 4 5.568345 5.568345 2825 +engineeringdepart 1 4 5.568345 5.568345 2917 +backup 1 4 5.568345 5.568345 2645 +gear 1 4 5.568345 5.568345 2891 +bold 1 3 5.857933 5.857933 3846 +treasur 1 3 5.857933 5.857933 3229 +sit 1 3 5.857933 5.857933 3953 +tast 1 3 5.857933 5.857933 3666 +labor 1 3 5.857933 5.857933 3195 +sarcasm 2 2 6.263398 12.526796 5871 +casual 1 2 6.263398 6.263398 4542 +buti 1 2 6.263398 6.263398 4775 +meth 1 2 6.263398 6.263398 5872 +useless 1 2 6.263398 6.263398 5564 +caveat 1 2 6.263398 6.263398 4975 +apprentic 1 2 6.263398 6.263398 5873 +cart 1 2 6.263398 6.263398 5874 +stolen 1 2 6.263398 6.263398 5803 +zongker 2 1 6.957497 13.914994 15230 +pagececi 1 1 6.957497 6.957497 15231 +noless 1 1 6.957497 6.957497 15232 +classeshow 1 1 6.957497 6.957497 15233 +toxic 1 1 6.957497 6.957497 15234 +custard 1 1 6.957497 6.957497 15235 +filesth 1 1 6.957497 6.957497 15236 +mento 1 1 6.957497 6.957497 15237 +galleryvisit 1 1 6.957497 6.957497 15238 +supercolliderth 1 1 6.957497 6.957497 15239 +cron 1 1 6.957497 6.957497 15240 +avirtu 1 1 6.957497 6.957497 15241 +trove 1 1 6.957497 6.957497 15242 +whichmai 1 1 6.957497 6.957497 15243 +imajor 1 1 6.957497 6.957497 15244 +andminor 1 1 6.957497 6.957497 15245 +dubiou 1 1 6.957497 6.957497 15246 +honorsjunior 1 1 6.957497 6.957497 15247 +brotherhood 1 1 6.957497 6.957497 15248 +crouton 1 1 6.957497 6.957497 15249 +pizzicato 1 1 6.957497 6.957497 15250 +clicker 1 1 6.957497 6.957497 15251 +cruis 1 1 6.957497 6.957497 15252 +inhigh 1 1 6.957497 6.957497 15253 +buttstar 1 1 6.957497 6.957497 15254 +wheremi 1 1 6.957497 6.957497 15255 +dougz 1 1 6.957497 6.957497 15256 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ new file mode 100644 index 00000000..df293a59 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^dylan^ @@ -0,0 +1,93 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 10 443 0.693147 6.931470 6 +work 3 380 0.693147 2.079441 9 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +current 2 284 1.098612 2.197224 21 +us 2 329 1.098612 2.197224 16 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +oper 7 180 1.609438 11.266066 34 +modifi 1 178 1.609438 1.609438 35 +implement 3 152 1.791759 5.375277 52 +applic 2 170 1.791759 3.583518 56 +perform 2 143 1.945910 3.891820 74 +schedul 3 119 2.079442 6.238326 85 +seattl 1 120 2.079442 2.079442 103 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +activ 2 84 2.484907 4.969814 182 +help 1 83 2.484907 2.484907 175 +level 1 87 2.484907 2.484907 180 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +come 1 78 2.564949 2.564949 202 +addit 1 74 2.639057 2.639057 228 +java 2 70 2.708050 5.416100 248 +differ 1 66 2.708050 2.708050 253 +interact 3 62 2.772589 8.317767 270 +polici 1 64 2.772589 2.772589 279 +experi 1 64 2.772589 2.772589 283 +virtual 1 62 2.772589 2.772589 285 +improv 1 62 2.772589 2.772589 289 +thesi 1 57 2.890372 2.890372 327 +allow 1 53 2.944439 2.944439 333 +investig 1 51 2.995732 2.995732 353 +done 1 47 3.091042 3.091042 381 +quarter 1 47 3.091042 3.091042 389 +mechan 1 43 3.178054 3.178054 416 +seminar 1 38 3.295837 3.295837 470 +slide 1 38 3.295837 3.295837 467 +winter 1 36 3.367296 3.367296 500 +jame 1 35 3.401197 3.401197 507 +idea 1 32 3.465736 3.465736 545 +built 1 29 3.583519 3.583519 592 +demonstr 1 24 3.761200 3.761200 694 +thread 1 23 3.806662 3.806662 722 +lead 1 23 3.806662 3.806662 718 +kernel 1 20 3.951244 3.951244 825 +concentr 1 18 4.060443 4.060443 906 +spin 1 14 4.317488 4.317488 1121 +carri 1 13 4.382027 4.382027 1152 +signific 1 13 4.382027 4.382027 1125 +folk 1 9 4.753590 4.753590 1597 +voelker 1 9 4.753590 4.753590 1557 +dylan 3 8 4.875197 14.625591 1625 +slightli 1 7 5.010635 5.010635 1795 +dedic 1 7 5.010635 5.010635 1843 +gave 1 7 5.010635 5.010635 1922 +geoff 1 6 5.164786 5.164786 2124 +caus 1 5 5.347108 5.347108 2298 +commod 1 5 5.347108 5.347108 2415 +poorli 1 4 5.568345 5.568345 2781 +opal 1 4 5.568345 5.568345 3057 +oodb 2 3 5.857933 11.715866 3954 +mcname 2 2 6.263398 12.526796 5875 +properli 1 2 6.263398 6.263398 5454 +architecturethat 1 2 6.263398 6.263398 5876 +applicationsand 1 1 6.957497 6.957497 15257 +mechanismthat 1 1 6.957497 6.957497 15258 +replacementpolici 1 1 6.957497 6.957497 15259 +machoper 1 1 6.957497 6.957497 15260 +thathelp 1 1 6.957497 6.957497 15261 +kernelthread 1 1 6.957497 6.957497 15262 +tailor 1 1 6.957497 6.957497 15263 +betweenobject 1 1 6.957497 6.957497 15264 +achiv 1 1 6.957497 6.957497 15265 +improvementscan 1 1 6.957497 6.957497 15266 +betterserv 1 1 6.957497 6.957497 15267 +paperscv 1 1 6.957497 6.957497 15268 +lectureintroduc 1 1 6.957497 6.957497 15269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ new file mode 100644 index 00000000..26925235 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^echris^ @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 2 215 1.386294 2.772588 31 +washington 2 236 1.386294 2.772588 32 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +hour 1 165 1.791759 1.791759 46 +seattl 1 120 2.079442 2.079442 103 +make 1 111 2.197225 2.197225 120 +mondai 1 77 2.564949 2.564949 206 +sieg 1 69 2.708050 2.708050 260 +dept 1 64 2.772589 2.772589 291 +wednesdai 1 64 2.772589 2.772589 261 +septemb 1 65 2.772589 2.772589 274 +could 1 46 3.091042 3.091042 383 +tent 1 22 3.850148 3.850148 739 +thur 1 19 4.007333 4.007333 847 +hello 1 10 4.653960 4.653960 1407 +lewi 3 8 4.875197 14.625591 1700 +christoph 3 5 5.347108 16.041324 2512 +glad 1 4 5.568345 5.568345 2657 +echri 1 1 6.957497 6.957497 15270 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ new file mode 100644 index 00000000..13e02af1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ecrocke^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +washington 1 236 1.386294 1.386294 32 +blank 1 3 5.857933 5.857933 3379 +ecrock 1 1 6.957497 6.957497 15271 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ new file mode 100644 index 00000000..a98a5c66 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^edhong^ @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +time 2 293 1.098612 2.197224 17 +current 2 284 1.098612 2.197224 21 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +mail 2 238 1.386294 2.772588 22 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +includ 2 208 1.609438 3.218876 42 +list 1 201 1.609438 1.609438 39 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +compil 2 122 2.079442 4.158884 96 +seattl 1 120 2.079442 2.079442 103 +well 2 109 2.197225 4.394450 121 +place 2 106 2.197225 4.394450 124 +find 2 111 2.197225 4.394450 111 +specif 1 106 2.197225 2.197225 106 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +peopl 2 96 2.302585 4.605170 132 +text 1 98 2.302585 2.302585 133 +access 1 102 2.302585 2.302585 136 +book 1 99 2.302585 2.302585 131 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +help 1 83 2.484907 2.484907 175 +know 3 80 2.564949 7.694847 198 +want 2 79 2.564949 5.129898 199 +resum 1 79 2.564949 2.564949 217 +dynam 1 76 2.564949 2.564949 194 +come 1 78 2.564949 2.564949 202 +line 1 75 2.639057 2.639057 231 +free 1 73 2.639057 2.639057 224 +name 1 72 2.639057 2.639057 220 +august 1 66 2.708050 2.708050 257 +plan 1 65 2.772589 2.772589 272 +creat 1 63 2.772589 2.772589 277 +visit 1 63 2.772589 2.772589 288 +import 1 65 2.772589 2.772589 282 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +variou 1 56 2.890372 2.890372 317 +think 1 57 2.890372 2.890372 314 +found 1 53 2.944439 2.944439 337 +give 1 50 3.044522 3.044522 359 +friend 1 48 3.044522 3.044522 376 +anoth 1 45 3.135494 3.135494 408 +keep 1 44 3.135494 3.135494 409 +better 1 45 3.135494 3.135494 401 +howev 1 41 3.218876 3.218876 422 +littl 1 39 3.258097 3.258097 454 +form 1 39 3.258097 3.258097 443 +theoret 1 39 3.258097 3.258097 446 +industri 1 38 3.295837 3.295837 464 +hand 1 37 3.332205 3.332205 475 +power 1 30 3.555348 3.555348 573 +domain 1 30 3.555348 3.555348 564 +sometim 1 24 3.761200 3.761200 696 +alwai 1 24 3.761200 3.761200 691 +busi 1 21 3.912023 3.912023 784 +fact 1 21 3.912023 3.912023 780 +stand 1 18 4.060443 4.060443 891 +macintosh 1 17 4.110874 4.110874 920 +qual 1 15 4.248495 4.248495 1062 +countri 1 15 4.248495 4.248495 1059 +hong 1 14 4.317488 4.317488 1105 +karlin 1 13 4.382027 4.382027 1176 +anna 1 12 4.465908 4.465908 1292 +appl 1 11 4.553877 4.553877 1303 +guess 1 10 4.653960 4.653960 1443 +traffic 1 10 4.653960 4.653960 1421 +plain 1 9 4.753590 4.753590 1495 +sister 1 9 4.753590 4.753590 1524 +chamber 1 8 4.875197 4.875197 1692 +craig 1 7 5.010635 5.010635 1879 +happen 1 7 5.010635 5.010635 1790 +histor 1 6 5.164786 5.164786 2085 +mac 1 5 5.347108 5.347108 2292 +advic 1 5 5.347108 5.347108 2509 +insight 1 4 5.568345 5.568345 3024 +eddi 1 3 5.857933 5.857933 3896 +studentat 1 2 6.263398 6.263398 5877 +seig 1 2 6.263398 6.263398 4462 +commentari 1 2 6.263398 6.263398 4287 +bias 1 2 6.263398 6.263398 5033 +tosai 1 1 6.957497 6.957497 15272 +hadto 1 1 6.957497 6.957497 15273 +postcriptand 1 1 6.957497 6.957497 15274 +workingon 1 1 6.957497 6.957497 15275 +fordynam 1 1 6.957497 6.957497 15276 +activit 1 1 6.957497 6.957497 15277 +vine 1 1 6.957497 6.957497 15278 +branchesmi 1 1 6.957497 6.957497 15279 +knowof 1 1 6.957497 6.957497 15280 +daveneti 1 1 6.957497 6.957497 15281 +towardslik 1 1 6.957497 6.957497 15282 +eveneasi 1 1 6.957497 6.957497 15283 +freewai 1 1 6.957497 6.957497 15284 +worldher 1 1 6.957497 6.957497 15285 +edhong 1 1 6.957497 6.957497 15286 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ new file mode 100644 index 00000000..9746fe83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eggers^ @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +research 4 431 0.693147 2.772588 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +current 2 284 1.098612 2.197224 21 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +architectur 2 139 1.945910 3.891820 77 +hall 1 146 1.945910 1.945910 65 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +compil 5 122 2.079442 10.397210 96 +schedul 2 119 2.079442 4.158884 85 +seattl 1 120 2.079442 2.079442 103 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +memori 1 101 2.302585 2.302585 139 +call 1 91 2.397895 2.397895 153 +homepag 1 93 2.397895 2.397895 148 +dynam 2 76 2.564949 5.129898 194 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +sieg 1 69 2.708050 2.708050 260 +degre 1 69 2.708050 2.708050 259 +new 1 64 2.772589 2.772589 262 +share 3 59 2.833213 8.499639 304 +back 1 60 2.833213 2.833213 297 +thesi 1 57 2.890372 2.890372 327 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +http 1 41 3.218876 3.218876 420 +cach 1 41 3.218876 3.218876 432 +might 1 41 3.218876 3.218876 426 +committe 1 34 3.401197 3.401197 522 +computersci 1 30 3.555348 3.555348 562 +miscellan 1 23 3.806662 3.806662 731 +begin 1 23 3.806662 3.806662 716 +emphasi 1 22 3.850148 3.850148 755 +reduc 1 22 3.850148 3.850148 759 +voic 1 21 3.912023 3.912023 806 +asplo 3 17 4.110874 12.332622 948 +susan 2 15 4.248495 8.496990 1050 +qual 1 15 4.248495 4.248495 1062 +coher 1 14 4.317488 4.317488 1109 +workload 1 12 4.465908 4.465908 1210 +multithread 1 11 4.553877 4.553877 1315 +egger 4 8 4.875197 19.500788 1695 +prefetch 1 6 5.164786 5.164786 2039 +fals 1 4 5.568345 5.568345 2861 +amast 1 3 5.857933 5.857933 3955 +onexperiment 1 1 6.957497 6.957497 15287 +incompil 1 1 6.957497 6.957497 15288 +optimizationsand 1 1 6.957497 6.957497 15289 +multithreadedarchitectur 1 1 6.957497 6.957497 15290 +spinprevi 1 1 6.957497 6.957497 15291 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ new file mode 100644 index 00000000..1eaafdfa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^egs^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 12 443 0.693147 8.317764 6 +work 3 380 0.693147 2.079441 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +us 3 329 1.098612 3.295836 16 +current 2 284 1.098612 2.197224 21 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +washington 5 236 1.386294 6.931470 32 +link 3 247 1.386294 4.158882 24 +softwar 3 220 1.386294 4.158882 30 +also 2 259 1.386294 2.772588 28 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +oper 9 180 1.609438 14.484942 34 +paper 3 205 1.609438 4.828314 38 +implement 4 152 1.791759 7.167036 52 +develop 2 174 1.791759 3.583518 53 +data 2 170 1.791759 3.583518 49 +applic 2 170 1.791759 3.583518 56 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +support 3 132 1.945910 5.837730 83 +perform 3 143 1.945910 5.837730 74 +year 2 148 1.945910 3.891820 84 +object 1 138 1.945910 1.945910 79 +first 1 140 1.945910 1.945910 71 +schedul 2 119 2.079442 4.158884 85 +provid 2 121 2.079442 4.158884 94 +high 2 130 2.079442 4.158884 101 +seattl 1 120 2.079442 2.079442 103 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +code 2 108 2.197225 4.394450 116 +specif 2 106 2.197225 4.394450 106 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +call 2 91 2.397895 4.795790 153 +center 1 88 2.397895 2.397895 158 +present 1 91 2.397895 2.397895 145 +follow 1 92 2.397895 2.397895 143 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +activ 1 84 2.484907 2.484907 182 +educ 1 86 2.484907 2.484907 191 +dynam 4 76 2.564949 10.259796 194 +interfac 3 79 2.564949 7.694847 209 +want 1 79 2.564949 2.564949 199 +server 1 76 2.564949 2.564949 204 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +servic 2 72 2.639057 5.278114 236 +workshop 2 71 2.639057 5.278114 239 +write 1 72 2.639057 2.639057 222 +simul 2 66 2.708050 5.416100 255 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +prof 1 64 2.772589 2.772589 273 +experi 1 64 2.772589 2.772589 283 +share 2 59 2.833213 5.666426 304 +march 1 61 2.833213 2.833213 295 +summer 2 56 2.890372 5.780744 311 +extens 9 53 2.944439 26.499951 340 +instruct 1 53 2.944439 2.944439 332 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +describ 3 45 3.135494 9.406482 400 +mechan 2 43 3.178054 6.356108 416 +third 1 43 3.178054 3.178054 412 +http 1 41 3.218876 3.218876 420 +review 1 42 3.218876 3.218876 425 +brian 1 38 3.295837 3.295837 466 +prototyp 1 38 3.295837 3.295837 463 +slide 1 38 3.295837 3.295837 467 +mean 1 37 3.332205 3.332205 477 +fault 1 32 3.465736 3.465736 547 +independ 1 32 3.465736 3.465736 548 +posit 1 31 3.496508 3.496508 552 +domain 2 30 3.555348 7.110696 564 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +load 1 28 3.610918 3.610918 601 +measur 1 28 3.610918 3.610918 609 +symbol 1 27 3.637586 3.637586 620 +compar 1 26 3.688879 3.688879 648 +aspect 2 25 3.737670 7.475340 663 +toward 1 25 3.737670 3.737670 668 +spent 1 25 3.737670 3.737670 676 +thread 3 23 3.806662 11.419986 722 +displai 1 23 3.806662 3.806662 712 +miscellan 1 23 3.806662 3.806662 731 +reduc 1 22 3.850148 3.850148 759 +path 1 21 3.912023 3.912023 778 +kernel 3 20 3.951244 11.853732 825 +wrote 2 20 3.951244 7.902488 830 +fine 2 20 3.951244 7.902488 822 +safeti 1 20 3.951244 3.951244 817 +benchmark 1 19 4.007333 4.007333 859 +bershad 1 18 4.060443 4.060443 902 +protect 4 17 4.110874 16.443496 935 +coupl 1 17 4.110874 4.110874 939 +latenc 1 16 4.174387 4.174387 993 +princeton 3 15 4.248495 12.745485 1042 +novel 2 15 4.248495 8.496990 1039 +overhead 1 15 4.248495 4.248495 1035 +enough 1 15 4.248495 4.248495 1040 +spin 10 14 4.317488 43.174880 1121 +achiev 1 14 4.317488 4.317488 1088 +senior 1 14 4.317488 4.317488 1120 +opportun 1 13 4.382027 4.382027 1161 +safe 4 12 4.465908 17.863632 1274 +robust 1 12 4.465908 4.465908 1271 +arbitrari 1 11 4.553877 4.553877 1359 +grain 2 10 4.653960 9.307920 1448 +sosp 1 10 4.653960 4.653960 1416 +ski 1 10 4.653960 4.653960 1471 +jersei 2 9 4.753590 9.507180 1587 +modula 2 9 4.753590 9.507180 1613 +grew 1 8 4.875197 4.875197 1742 +mach 1 8 4.875197 4.875197 1669 +isol 1 8 4.875197 4.875197 1663 +spec 1 8 4.875197 4.875197 1640 +sigop 1 8 4.875197 4.875197 1727 +european 1 8 4.875197 4.875197 1763 +dylan 1 8 4.875197 4.875197 1625 +interrupt 1 7 5.010635 5.010635 1793 +bell 1 6 5.164786 5.164786 2224 +subsystem 1 6 5.164786 5.164786 2015 +simultan 1 6 5.164786 5.164786 2155 +spinproject 1 5 5.347108 5.347108 2570 +compat 1 5 5.347108 5.347108 2485 +sail 1 5 5.347108 5.347108 2571 +outdoor 1 5 5.347108 5.347108 2514 +microkernel 2 4 5.568345 11.136690 3047 +turkei 1 4 5.568345 5.568345 2914 +hide 1 4 5.568345 5.568345 2996 +mip 1 4 5.568345 5.568345 2738 +wcsss 3 3 5.857933 17.573799 3956 +thesumm 1 3 5.857933 5.857933 3763 +namespac 1 3 5.857933 5.857933 3957 +hoto 1 3 5.857933 5.857933 3577 +arizona 1 3 5.857933 5.857933 3700 +dive 1 3 5.857933 5.857933 3654 +emin 2 2 6.263398 12.526796 5810 +sirer 1 2 6.263398 6.263398 5811 +backgroundi 1 2 6.263398 6.263398 5878 +andsom 1 2 6.263398 6.263398 5483 +schedulingpolici 1 2 6.263398 6.263398 5879 +strand 1 2 6.263398 6.263398 5880 +ofnew 1 2 6.263398 6.263398 5881 +mipsi 1 2 6.263398 6.263398 5882 +tucson 1 2 6.263398 6.263398 5883 +cloth 1 2 6.263398 6.263398 5884 +sirereg 1 1 6.957497 6.957497 15292 +istanbul 1 1 6.957497 6.957497 15293 +labswork 1 1 6.957497 6.957497 15294 +theplan 1 1 6.957497 6.957497 15295 +thevesta 1 1 6.957497 6.957497 15296 +projectsmi 1 1 6.957497 6.957497 15297 +andprotect 1 1 6.957497 6.957497 15298 +specificaspect 1 1 6.957497 6.957497 15299 +alarm 1 1 6.957497 6.957497 15300 +ofextend 1 1 6.957497 6.957497 15301 +allowsu 1 1 6.957497 6.957497 15302 +isdesign 1 1 6.957497 6.957497 15303 +allowsisol 1 1 6.957497 6.957497 15304 +withconflict 1 1 6.957497 6.957497 15305 +beassur 1 1 6.957497 6.957497 15306 +clincher 1 1 6.957497 6.957497 15307 +extensionsthat 1 1 6.957497 6.957497 15308 +protectionenforc 1 1 6.957497 6.957497 15309 +performanceweb 1 1 6.957497 6.957497 15310 +networkingstack 1 1 6.957497 6.957497 15311 +andminim 1 1 6.957497 6.957497 15312 +calledmipsi 1 1 6.957497 6.957497 15313 +researchplatform 1 1 6.957497 6.957497 15314 +featuresand 1 1 6.957497 6.957497 15315 +talkslanguag 1 1 6.957497 6.957497 15316 +interestswhenev 1 1 6.957497 6.957497 15317 +windsurf 1 1 6.957497 6.957497 15318 +bikingmak 1 1 6.957497 6.957497 15319 +andhik 1 1 6.957497 6.957497 15320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ new file mode 100644 index 00000000..e58f0076 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^eric^ @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +work 3 380 0.693147 2.079441 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +project 4 340 1.098612 4.394448 18 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +recent 2 167 1.791759 3.583518 58 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +like 2 132 1.945910 3.891820 81 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +seattl 3 120 2.079442 6.238326 103 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +final 1 116 2.197225 2.197225 108 +take 1 97 2.302585 2.302585 134 +text 1 98 2.302585 2.302585 133 +pictur 1 89 2.397895 2.397895 160 +graphic 1 90 2.397895 2.397895 147 +section 1 94 2.397895 2.397895 149 +solut 2 82 2.484907 4.969814 162 +method 3 80 2.564949 7.694847 213 +june 1 79 2.564949 2.564949 214 +master 1 76 2.564949 2.564949 216 +state 1 76 2.564949 2.564949 207 +solv 2 73 2.639057 5.278114 234 +write 1 72 2.639057 2.639057 222 +involv 1 71 2.639057 2.639057 227 +materi 1 75 2.639057 2.639057 221 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +prof 3 64 2.772589 8.317767 273 +street 1 63 2.772589 2.772589 293 +automat 1 61 2.833213 2.833213 306 +thesi 1 57 2.890372 2.890372 327 +particular 1 51 2.995732 2.995732 352 +still 1 50 3.044522 3.044522 362 +numer 1 49 3.044522 3.044522 369 +could 1 46 3.091042 3.091042 383 +third 1 43 3.178054 3.178054 412 +music 1 42 3.218876 3.218876 436 +probabl 1 40 3.258097 3.258097 455 +realli 1 40 3.258097 3.258097 444 +small 1 39 3.258097 3.258097 447 +mean 1 37 3.332205 3.332205 477 +idea 1 32 3.465736 3.465736 545 +richard 1 31 3.496508 3.496508 559 +power 1 30 3.555348 3.555348 573 +weather 1 28 3.610918 3.610918 618 +mind 1 27 3.637586 3.637586 632 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +try 1 22 3.850148 3.850148 764 +finish 1 22 3.850148 3.850148 748 +fact 1 21 3.912023 3.912023 780 +longer 2 20 3.951244 7.902488 816 +applet 1 20 3.951244 3.951244 827 +eric 3 19 4.007333 12.021999 870 +els 2 19 4.007333 8.014666 843 +anderson 1 19 4.007333 4.007333 860 +mostli 1 19 4.007333 4.007333 869 +prove 1 19 4.007333 4.007333 848 +matrix 1 17 4.110874 4.110874 933 +sept 1 17 4.110874 4.110874 952 +fourth 1 16 4.174387 4.174387 999 +spatial 1 16 4.174387 4.174387 988 +spars 1 16 4.174387 4.174387 989 +qual 1 15 4.248495 4.248495 1062 +nonlinear 1 14 4.317488 4.317488 1107 +step 1 13 4.382027 4.382027 1138 +care 1 13 4.382027 4.382027 1177 +resolut 1 13 4.382027 4.382027 1172 +necessari 1 13 4.382027 4.382027 1147 +bodi 1 13 4.382027 4.382027 1178 +karlin 1 13 4.382027 4.382027 1176 +brother 1 13 4.382027 4.382027 1189 +speech 1 12 4.465908 4.465908 1222 +anna 1 12 4.465908 4.465908 1292 +black 2 10 4.653960 9.307920 1418 +henri 1 10 4.653960 4.653960 1417 +explicit 1 9 4.753590 4.753590 1525 +signal 2 7 5.010635 10.021270 1910 +newton 1 7 5.010635 5.010635 1824 +commit 1 6 5.164786 5.164786 2233 +duke 1 6 5.164786 5.164786 2231 +restrict 1 6 5.164786 5.164786 2129 +transcript 1 6 5.164786 5.164786 2067 +everybodi 2 5 5.347108 10.694216 2517 +stabil 1 5 5.347108 5.347108 2286 +aim 1 5 5.347108 5.347108 2477 +isth 1 5 5.347108 5.347108 2532 +snapshot 1 5 5.347108 5.347108 2303 +implicit 2 4 5.568345 11.136690 2830 +backward 1 4 5.568345 5.568345 2638 +freedom 1 3 5.857933 5.857933 3890 +euler 1 3 5.857933 5.857933 3174 +interplai 1 3 5.857933 5.857933 3726 +astrophys 1 3 5.857933 5.857933 3936 +aclu 1 2 6.263398 6.263398 5227 +reno 1 2 6.263398 6.263398 5228 +mere 1 2 6.263398 6.263398 5340 +panic 1 2 6.263398 6.263398 5682 +criterion 1 2 6.263398 6.263398 5885 +acoust 1 2 6.263398 6.263398 5355 +musician 1 2 6.263398 6.263398 5718 +preparedfor 1 2 6.263398 6.263398 5886 +meander 1 2 6.263398 6.263398 5887 +andersonwher 1 1 6.957497 6.957497 15321 +decisionin 1 1 6.957497 6.957497 15322 +thedecis 1 1 6.957497 6.957497 15323 +interim 1 1 6.957497 6.957497 15324 +feloni 1 1 6.957497 6.957497 15325 +themarketplac 1 1 6.957497 6.957497 15326 +imostli 1 1 6.957497 6.957497 15327 +greensideof 1 1 6.957497 6.957497 15328 +onsteadi 1 1 6.957497 6.957497 15329 +biharmon 1 1 6.957497 6.957497 15330 +timesteppingmethod 1 1 6.957497 6.957497 15331 +analysisissu 1 1 6.957497 6.957497 15332 +nonlinearequ 1 1 6.957497 6.957497 15333 +newtonstep 1 1 6.957497 6.957497 15334 +spiffi 1 1 6.957497 6.957497 15335 +structuresbi 1 1 6.957497 6.957497 15336 +andersoni 1 1 6.957497 6.957497 15337 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ new file mode 100644 index 00000000..4568a739 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^etzioni^ @@ -0,0 +1,198 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +inform 8 412 0.693147 5.545176 8 +program 3 374 0.693147 2.079441 7 +research 2 431 0.693147 1.386294 10 +us 2 329 1.098612 2.197224 16 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +washington 3 236 1.386294 4.158882 32 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +paper 2 205 1.609438 3.218876 38 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +hall 1 146 1.945910 1.945910 65 +first 1 140 1.945910 1.945910 71 +machin 5 129 2.079442 10.397210 95 +databas 2 122 2.079442 4.158884 86 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +high 1 130 2.079442 2.079442 101 +postscript 1 131 2.079442 2.079442 90 +world 2 115 2.197225 4.394450 126 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +search 3 95 2.397895 7.193685 155 +select 1 91 2.397895 2.397895 154 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +learn 9 86 2.484907 22.364163 170 +internet 6 83 2.484907 14.909442 186 +contain 2 81 2.484907 4.969814 174 +second 1 81 2.484907 2.484907 166 +wide 1 84 2.484907 2.484907 185 +ieee 1 86 2.484907 2.484907 190 +resourc 1 81 2.484907 2.484907 172 +master 6 76 2.564949 15.389694 216 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +servic 6 72 2.639057 15.834342 236 +effici 2 73 2.639057 5.278114 233 +intellig 2 72 2.639057 5.278114 225 +html 1 75 2.639057 2.639057 235 +addit 1 74 2.639057 2.639057 228 +knowledg 2 67 2.708050 5.416100 243 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +plan 2 65 2.772589 5.545178 272 +juli 2 60 2.833213 5.666426 305 +room 1 59 2.833213 2.833213 301 +thesi 6 57 2.890372 17.342232 327 +reason 1 57 2.890372 2.890372 318 +unix 1 58 2.890372 2.890372 308 +undergradu 1 54 2.944439 2.944439 338 +without 1 50 3.044522 3.044522 370 +understand 2 47 3.091042 6.182084 384 +move 1 47 3.091042 3.091042 382 +error 1 40 3.258097 3.258097 449 +close 1 38 3.295837 3.295837 465 +field 1 37 3.332205 3.332205 482 +multi 2 36 3.367296 6.734592 493 +robot 2 36 3.367296 6.734592 497 +statist 1 35 3.401197 3.401197 521 +human 1 32 3.465736 3.465736 546 +photo 1 31 3.496508 3.496508 561 +robert 1 30 3.555348 3.555348 567 +neural 1 30 3.555348 3.555348 578 +determin 1 27 3.637586 3.637586 630 +enabl 2 26 3.688879 7.377758 655 +relev 2 26 3.688879 7.377758 637 +request 1 26 3.688879 3.688879 635 +magazin 1 24 3.761200 3.761200 704 +greg 1 24 3.761200 3.761200 695 +ofwashington 1 22 3.850148 3.850148 766 +william 1 22 3.850148 3.850148 765 +voic 1 21 3.912023 3.912023 806 +fact 1 21 3.912023 3.912023 780 +expert 1 20 3.951244 3.951244 833 +comparison 3 19 4.007333 12.021999 863 +agent 3 18 4.060443 12.181329 910 +repositori 2 17 4.110874 8.221748 932 +white 1 17 4.110874 4.110874 951 +analyz 1 17 4.110874 4.110874 925 +brief 1 16 4.174387 4.174387 1001 +choic 1 16 4.174387 4.174387 979 +indic 1 15 4.248495 4.248495 1013 +jonathan 1 13 4.382027 4.382027 1174 +food 1 12 4.465908 4.465908 1285 +bruce 1 12 4.465908 4.465908 1226 +count 1 12 4.465908 4.465908 1239 +stephen 1 11 4.553877 4.553877 1342 +induct 1 11 4.553877 4.553877 1304 +metacrawl 4 10 4.653960 18.615840 1455 +packard 2 10 4.653960 9.307920 1444 +shop 1 10 4.653960 4.653960 1469 +sound 1 9 4.753590 4.753590 1605 +incomplet 1 9 4.753590 4.753590 1575 +aaai 2 8 4.875197 9.750394 1750 +hewlett 2 8 4.875197 9.750394 1709 +irvin 2 8 4.875197 9.750394 1660 +accomplish 1 8 4.875197 4.875197 1755 +satisfi 1 8 4.875197 4.875197 1694 +gather 1 8 4.875197 4.875197 1719 +autonom 1 8 4.875197 4.875197 1749 +erik 1 8 4.875197 4.875197 1701 +softbot 9 7 5.010635 45.095715 1974 +discoveri 2 7 5.010635 10.021270 1915 +sparc 1 7 5.010635 5.010635 1860 +foc 1 7 5.010635 5.010635 1880 +planner 1 7 5.010635 5.010635 1797 +golden 1 7 5.010635 5.010635 1962 +hunt 1 7 5.010635 5.010635 1798 +illinoi 1 7 5.010635 5.010635 1941 +usenet 1 7 5.010635 5.010635 1839 +etzioni 4 6 5.164786 20.659144 2135 +oren 2 6 5.164786 10.329572 2134 +advis 2 6 5.164786 10.329572 2173 +fiction 1 6 5.164786 5.164786 2217 +forecast 1 6 5.164786 5.164786 2171 +brook 1 6 5.164786 5.164786 2152 +german 1 6 5.164786 5.164786 2190 +accuraci 1 5 5.347108 5.347108 2450 +cacm 1 5 5.347108 5.347108 2388 +keith 1 5 5.347108 5.347108 2528 +selberg 1 5 5.347108 5.347108 2441 +amherst 1 5 5.347108 5.347108 2484 +disambigu 1 4 5.568345 5.568345 2899 +innov 1 4 5.568345 5.568345 2933 +chain 1 4 5.568345 5.568345 2712 +repli 1 4 5.568345 5.568345 2689 +ijcai 1 4 5.568345 5.568345 2901 +sophist 1 3 5.857933 5.857933 3545 +ahoi 1 3 5.857933 5.857933 3532 +deploi 1 3 5.857933 5.857933 3750 +neal 1 3 5.857933 5.857933 3184 +lockhe 1 3 5.857933 5.857933 3863 +faq 1 3 5.857933 5.857933 3216 +pageoren 1 2 6.263398 6.263398 5888 +pagedepart 1 2 6.263398 6.263398 5052 +anddynam 1 2 6.263398 6.263398 5889 +finalist 1 2 6.263398 6.263398 5890 +discoveraward 1 2 6.263398 6.263398 5891 +brute 1 2 6.263398 6.263398 5892 +hypothes 1 2 6.263398 6.263398 5607 +ascal 1 2 6.263398 6.263398 5893 +toappear 1 2 6.263398 6.263398 4343 +bernard 1 2 6.263398 6.263398 5894 +lesh 1 2 6.263398 6.263398 5895 +goan 1 2 6.263398 6.263398 5896 +zamir 1 2 6.263398 6.263398 5897 +shake 1 2 6.263398 6.263398 5898 +umass 1 2 6.263398 6.263398 5899 +bioand 1 1 6.957497 6.957497 15338 +heor 1 1 6.957497 6.957497 15339 +searchmultipl 1 1 6.957497 6.957497 15340 +pruningopt 1 1 6.957497 6.957497 15341 +netrecommend 1 1 6.957497 6.957497 15342 +locatesindividu 1 1 6.957497 6.957497 15343 +bruteforc 1 1 6.957497 6.957497 15344 +whenrun 1 1 6.957497 6.957497 15345 +theweb 1 1 6.957497 6.957497 15346 +richardseg 1 1 6.957497 6.957497 15347 +fileretriev 1 1 6.957497 6.957497 15348 +universalquantif 1 1 6.957497 6.957497 15349 +terranc 1 1 6.957497 6.957497 15350 +mikeperkowitz 1 1 6.957497 6.957497 15351 +soderland 1 1 6.957497 6.957497 15352 +roomi 1 1 6.957497 6.957497 15353 +lesourd 1 1 6.957497 6.957497 15354 +spiger 1 1 6.957497 6.957497 15355 +alford 1 1 6.957497 6.957497 15356 +fitchenholtz 1 1 6.957497 6.957497 15357 +guido 1 1 6.957497 6.957497 15358 +dymitr 1 1 6.957497 6.957497 15359 +mozdyniewicz 1 1 6.957497 6.957497 15360 +quark 1 1 6.957497 6.957497 15361 +minecontain 1 1 6.957497 6.957497 15362 +neuroprosearch 1 1 6.957497 6.957497 15363 +statlib 1 1 6.957497 6.957497 15364 +learningtoolbox 1 1 6.957497 6.957497 15365 +bonn 1 1 6.957497 6.957497 15366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ new file mode 100644 index 00000000..28478c69 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^feeley^ @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +manag 1 114 2.197225 2.197225 125 +memori 1 101 2.302585 2.302585 139 +juli 1 60 2.833213 2.833213 305 +thesi 1 57 2.890372 2.890372 327 +faculti 1 56 2.890372 2.890372 325 +done 1 47 3.091042 3.091042 381 +join 1 39 3.258097 3.258097 457 +soon 1 36 3.367296 3.367296 494 +global 1 34 3.401197 3.401197 520 +concern 1 25 3.737670 3.737670 666 +mike 2 24 3.761200 7.522400 703 +finish 1 22 3.850148 3.850148 748 +feelei 3 7 5.010635 15.031905 1859 +british 1 5 5.347108 5.347108 2546 +columbia 1 2 6.263398 6.263398 5900 +papersmi 1 2 6.263398 6.263398 5462 +pagemik 1 1 6.957497 6.957497 15367 +workstationclust 1 1 6.957497 6.957497 15368 +opalproject 1 1 6.957497 6.957497 15369 +injanuari 1 1 6.957497 6.957497 15370 +summarycvsoutheast 1 1 6.957497 6.957497 15371 +idaholast 1 1 6.957497 6.957497 15372 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ new file mode 100644 index 00000000..757d2e47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fisher^ @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +current 1 284 1.098612 1.098612 21 +washington 3 236 1.386294 4.158882 32 +mail 1 238 1.386294 1.386294 22 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +pictur 1 89 2.397895 2.397895 160 +sieg 1 69 2.708050 2.708050 260 +room 1 59 2.833213 2.833213 301 +return 1 34 3.401197 3.401197 502 +voic 1 21 3.912023 3.912023 806 +chri 2 11 4.553877 9.107754 1311 +fisher 3 2 6.263398 18.790194 4794 +fisherdepart 1 1 6.957497 6.957497 15373 +engineeringbox 1 1 6.957497 6.957497 15374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ new file mode 100644 index 00000000..b70e95e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fix^ @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +cours 1 273 1.098612 1.098612 15 +washington 3 236 1.386294 4.158882 32 +gener 1 220 1.386294 1.386294 27 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +read 1 154 1.791759 1.791759 47 +algorithm 1 162 1.791759 1.791759 57 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +place 2 106 2.197225 4.394450 124 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +site 1 106 2.197225 2.197225 119 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +exam 1 86 2.484907 2.484907 169 +librari 1 87 2.484907 2.484907 181 +thing 1 84 2.484907 2.484907 189 +server 1 76 2.564949 2.564949 204 +html 1 75 2.639057 2.639057 235 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +found 1 53 2.944439 2.944439 337 +quarter 1 47 3.091042 3.091042 389 +around 1 43 3.178054 3.178054 415 +might 2 41 3.218876 6.437752 426 +music 1 42 3.218876 3.218876 436 +paul 1 38 3.295837 3.295837 471 +autumn 1 31 3.496508 3.496508 558 +mind 1 27 3.637586 3.637586 632 +rather 1 26 3.688879 3.688879 642 +geometri 1 22 3.850148 3.850148 752 +fund 1 21 3.912023 3.912023 805 +nice 1 20 3.951244 3.951244 809 +listen 1 18 4.060443 4.060443 907 +engineeringunivers 1 17 4.110874 4.110874 959 +otherwis 1 17 4.110874 4.110874 922 +chateau 1 16 4.174387 4.174387 997 +drive 1 15 4.248495 4.248495 1052 +galleri 1 13 4.382027 4.382027 1192 +captur 1 12 4.465908 4.465908 1232 +denni 1 11 4.553877 4.553877 1321 +moment 1 11 4.553877 4.553877 1379 +vista 1 10 4.653960 4.653960 1452 +seven 1 9 4.753590 4.753590 1561 +somewher 1 6 5.164786 5.164786 2176 +wolman 1 6 5.164786 5.164786 2093 +alec 1 5 5.347108 5.347108 2563 +lost 1 5 5.347108 5.347108 2358 +ta 1 4 5.568345 5.568345 3058 +soul 1 4 5.568345 5.568345 2907 +luci 1 3 5.857933 5.857933 3705 +schedulethi 1 2 6.263398 6.263398 4068 +meander 1 2 6.263398 6.263398 5887 +booth 1 2 6.263398 6.263398 5119 +scheduleto 1 1 6.957497 6.957497 15375 +probablyb 1 1 6.957497 6.957497 15376 +activitiesmi 1 1 6.957497 6.957497 15377 +areasof 1 1 6.957497 6.957497 15378 +outta 1 1 6.957497 6.957497 15379 +pea 1 1 6.957497 6.957497 15380 +mofo 1 1 6.957497 6.957497 15381 +peach 1 1 6.957497 6.957497 15382 +ruel 1 1 6.957497 6.957497 15383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ new file mode 100644 index 00000000..6cb47440 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^fm^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 5 431 0.693147 3.465735 10 +interest 4 384 0.693147 2.772588 11 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +washington 2 236 1.386294 2.772588 32 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +cornel 1 215 1.386294 1.386294 23 +languag 1 227 1.386294 1.386294 26 +public 1 202 1.609438 1.609438 43 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +model 2 145 1.945910 3.891820 69 +hall 1 146 1.945910 1.945910 65 +relat 1 139 1.945910 1.945910 68 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +welcom 1 122 2.079442 2.079442 99 +machin 1 129 2.079442 2.079442 95 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +user 1 104 2.302585 2.302585 137 +real 1 93 2.397895 2.397895 144 +activ 2 84 2.484907 4.969814 182 +educ 1 86 2.484907 2.484907 191 +interfac 1 79 2.564949 2.564949 209 +onlin 1 75 2.639057 2.639057 223 +summari 1 73 2.639057 2.639057 237 +symposium 1 72 2.639057 2.639057 238 +line 1 75 2.639057 2.639057 231 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +august 1 66 2.708050 2.708050 257 +interact 1 62 2.772589 2.772589 270 +polici 1 64 2.772589 2.772589 279 +previou 1 62 2.772589 2.772589 290 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +profession 1 51 2.995732 2.995732 345 +basic 1 50 3.044522 3.044522 360 +formal 2 37 3.332205 6.664410 478 +concurr 1 34 3.401197 3.401197 501 +human 2 32 3.465736 6.931472 546 +collabor 1 32 3.465736 3.465736 543 +chair 2 29 3.583519 7.167038 596 +demonstr 1 24 3.761200 3.761200 694 +verif 1 20 3.951244 3.951244 826 +safeti 1 20 3.951244 3.951244 817 +engineeringunivers 1 17 4.110874 4.110874 959 +anyth 1 16 4.174387 4.174387 998 +mellon 2 13 4.382027 8.764054 1179 +washingtonbox 1 13 4.382027 4.382027 1200 +opportun 1 13 4.382027 4.382027 1161 +carnegi 2 12 4.465908 8.931816 1260 +ski 1 10 4.653960 4.653960 1471 +cook 1 10 4.653960 4.653960 1464 +cultur 1 7 5.010635 5.010635 1951 +spanish 1 4 5.568345 5.568345 3017 +ofmi 1 3 5.857933 5.857933 3911 +uist 1 2 6.263398 6.263398 5901 +vegetarian 1 2 6.263398 6.263398 5902 +greet 1 2 6.263398 6.263398 5903 +modugno 2 1 6.957497 13.914994 15384 +francesmari 1 1 6.957497 6.957497 15385 +pagefrancesmari 1 1 6.957497 6.957497 15386 +algorthim 1 1 6.957497 6.957497 15387 +includecycl 1 1 6.957497 6.957497 15388 +previouslyitalian 1 1 6.957497 6.957497 15389 +elleri 1 1 6.957497 6.957497 15390 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ new file mode 100644 index 00000000..b745da7e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^forman^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +compil 1 122 2.079442 2.079442 96 +pictur 1 89 2.397895 2.397895 160 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +optim 1 79 2.564949 2.564949 197 +free 1 73 2.639057 2.639057 224 +receiv 1 66 2.708050 2.708050 244 +written 1 63 2.772589 2.772589 278 +dept 1 64 2.772589 2.772589 291 +finger 1 52 2.995732 2.995732 354 +netscap 1 44 3.135494 3.135494 395 +live 1 40 3.258097 3.258097 451 +word 1 34 3.401197 3.401197 508 +someth 1 31 3.496508 3.496508 554 +anim 1 31 3.496508 3.496508 557 +weather 1 28 3.610918 3.610918 618 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +pattern 1 24 3.761200 3.761200 689 +mobil 1 23 3.806662 3.806662 730 +georg 2 16 4.174387 8.348774 994 +match 1 16 4.174387 4.174387 965 +script 1 13 4.382027 4.382027 1171 +song 1 11 4.553877 4.553877 1380 +debugg 1 9 4.753590 4.753590 1493 +pagei 1 8 4.875197 4.875197 1683 +handi 1 6 5.164786 5.164786 2111 +puzzl 1 5 5.347108 5.347108 2507 +water 1 5 5.347108 5.347108 2535 +forman 3 2 6.263398 18.790194 5904 +hyperlink 1 2 6.263398 6.263398 5447 +pagegeorg 1 1 6.957497 6.957497 15391 +ariadn 1 1 6.957497 6.957497 15392 +gforman 1 1 6.957497 6.957497 15393 +comhom 1 1 6.957497 6.957497 15394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ new file mode 100644 index 00000000..990c07f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^friedman^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +cours 1 273 1.098612 1.098612 15 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +sinc 1 90 2.397895 2.397895 159 +chang 1 82 2.484907 2.484907 163 +refer 1 78 2.564949 2.564949 203 +intellig 1 72 2.639057 2.639057 225 +artifici 1 63 2.772589 2.772589 280 +life 1 50 3.044522 3.044522 375 +visitor 1 49 3.044522 3.044522 371 +favorit 1 44 3.135494 3.135494 410 +netscap 1 44 3.135494 3.135494 395 +everi 1 34 3.401197 3.401197 519 +collabor 1 32 3.465736 3.465736 543 +quot 1 29 3.583519 3.583519 582 +bookmark 1 26 3.688879 3.688879 639 +applet 1 20 3.951244 3.951244 827 +agent 1 18 4.060443 4.060443 910 +english 2 15 4.248495 8.496990 1033 +trip 1 14 4.317488 4.317488 1113 +bike 1 10 4.653960 4.653960 1468 +poetri 1 9 4.753590 4.753590 1596 +weld 1 9 4.753590 4.753590 1538 +marc 3 8 4.875197 14.625591 1680 +dictionari 1 8 4.875197 4.875197 1642 +gather 1 8 4.875197 4.875197 1719 +friedman 4 7 5.010635 20.042540 1886 +golden 1 7 5.010635 5.010635 1962 +planner 1 7 5.010635 5.010635 1797 +keith 2 5 5.347108 10.694216 2528 +camp 1 5 5.347108 5.347108 2545 +elsewher 1 5 5.347108 5.347108 2444 +spanish 1 4 5.568345 5.568345 3017 +codi 1 3 5.857933 5.857933 3940 +kwok 1 3 5.857933 5.857933 3941 +ucpop 1 3 5.857933 5.857933 3878 +watercolor 1 1 6.957497 6.957497 15395 +checklist 1 1 6.957497 6.957497 15396 +occam 1 1 6.957497 6.957497 15397 +wordbot 1 1 6.957497 6.957497 15398 +nietzschein 1 1 6.957497 6.957497 15399 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gaetano @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ new file mode 100644 index 00000000..80d20849 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^garrett^ @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +address 1 170 1.791759 1.791759 62 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +file 1 132 1.945910 1.945910 70 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +graphic 1 90 2.397895 2.397895 147 +member 1 84 2.484907 2.484907 165 +line 1 75 2.639057 2.639057 231 +plai 1 60 2.833213 2.833213 307 +game 1 36 3.367296 3.367296 498 +neural 1 30 3.555348 3.555348 578 +ofwashington 1 22 3.850148 3.850148 766 +audio 1 14 4.317488 4.317488 1094 +genet 2 10 4.653960 9.307920 1409 +cecil 1 9 4.753590 4.753590 1547 +garrett 3 3 5.857933 17.573799 3377 +charli 2 2 6.263398 12.526796 5905 +bookshelf 1 2 6.263398 6.263398 5724 +algorithmspap 1 1 6.957497 6.957497 15400 +algorithmsformerli 1 1 6.957497 6.957497 15401 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ new file mode 100644 index 00000000..cde78872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^georgew^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +seattl 1 120 2.079442 2.079442 103 +techniqu 1 99 2.302585 2.302585 138 +graphic 2 90 2.397895 4.795790 147 +imag 2 91 2.397895 4.795790 161 +follow 1 92 2.397895 2.397895 143 +david 1 71 2.639057 2.639057 232 +appli 1 71 2.639057 2.639057 226 +multimedia 1 68 2.708050 2.708050 258 +laboratori 1 63 2.772589 2.772589 292 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +prototyp 1 38 3.295837 3.295837 463 +deal 1 22 3.850148 3.850148 736 +supervis 1 20 3.951244 3.951244 840 +dimension 1 18 4.060443 4.060443 909 +render 2 17 4.110874 8.221748 947 +engineeringunivers 1 17 4.110874 4.110874 959 +georg 2 16 4.174387 8.348774 994 +eduphon 1 15 4.248495 4.248495 1060 +washingtonbox 1 13 4.382027 4.382027 1200 +galleri 1 13 4.382027 4.382027 1192 +wife 1 13 4.382027 4.382027 1196 +tradit 1 10 4.653960 4.653960 1404 +illustr 1 8 4.875197 4.875197 1679 +salesin 1 4 5.568345 5.568345 3051 +grail 1 3 5.857933 5.857933 3356 +winkenbach 1 1 6.957497 6.957497 15402 +winkenbachdepart 1 1 6.957497 6.957497 15403 +georgew 1 1 6.957497 6.957497 15404 +doneund 1 1 6.957497 6.957497 15405 +theautomat 1 1 6.957497 6.957497 15406 +imagescr 1 1 6.957497 6.957497 15407 +taweewan 1 1 6.957497 6.957497 15408 +siwadun 1 1 6.957497 6.957497 15409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ new file mode 100644 index 00000000..e5c4e68f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^gjb^ @@ -0,0 +1,212 @@ +term, tf, in documents count, idf, tfidf, wordid +home 9 672 0.000000 0.000000 1 +page 8 705 0.000000 0.000000 3 +scienc 5 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +work 3 380 0.693147 2.079441 9 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 3 297 1.098612 3.295836 20 +time 2 293 1.098612 2.197224 17 +cours 2 273 1.098612 2.197224 15 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +washington 5 236 1.386294 6.931470 32 +graduat 3 215 1.386294 4.158882 31 +link 3 247 1.386294 4.158882 24 +email 2 220 1.386294 2.772588 29 +languag 2 227 1.386294 2.772588 26 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +recent 2 167 1.791759 3.583518 58 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +file 4 132 1.945910 7.783640 70 +first 2 140 1.945910 3.891820 71 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +seattl 3 120 2.079442 6.238326 103 +welcom 2 122 2.079442 4.158884 99 +spring 2 131 2.079442 4.158884 88 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +mathemat 3 108 2.197225 6.591675 123 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +part 2 98 2.302585 4.605170 129 +user 1 104 2.302585 2.302585 137 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +commun 1 95 2.397895 2.397895 157 +imag 1 91 2.397895 2.397895 161 +stuff 1 87 2.484907 2.484907 171 +contain 1 81 2.484907 2.484907 174 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +server 1 76 2.564949 2.564949 204 +nation 2 74 2.639057 5.278114 240 +free 1 73 2.639057 2.639057 224 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +materi 1 75 2.639057 2.639057 221 +degre 1 69 2.708050 2.708050 259 +practic 1 70 2.708050 2.708050 246 +view 1 70 2.708050 2.708050 254 +foundat 2 62 2.772589 5.545178 286 +creat 1 63 2.772589 2.772589 277 +virtual 1 62 2.772589 2.772589 285 +back 1 60 2.833213 2.833213 297 +locat 1 59 2.833213 2.833213 303 +best 1 59 2.833213 2.833213 299 +simpl 1 60 2.833213 2.833213 298 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +variou 1 56 2.890372 2.890372 317 +unix 1 58 2.890372 2.890372 308 +index 1 56 2.890372 2.890372 309 +sampl 1 53 2.944439 2.944439 339 +date 1 51 2.995732 2.995732 344 +archiv 2 49 3.044522 6.089044 364 +numer 1 49 3.044522 3.044522 369 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +keep 1 44 3.135494 3.135494 409 +show 1 43 3.178054 3.178054 417 +compani 1 41 3.218876 3.218876 423 +music 1 42 3.218876 3.218876 436 +small 1 39 3.258097 3.258097 447 +author 1 39 3.258097 3.258097 450 +origin 1 38 3.295837 3.295837 472 +seminar 1 38 3.295837 3.295837 470 +microsoft 1 38 3.295837 3.295837 468 +feel 1 37 3.332205 3.332205 483 +connect 1 37 3.332205 3.332205 485 +especi 2 36 3.367296 6.734592 496 +winter 1 36 3.367296 3.367296 500 +game 1 36 3.367296 3.367296 498 +procedur 1 36 3.367296 3.367296 488 +random 1 34 3.401197 3.401197 511 +global 1 34 3.401197 3.401197 520 +articl 2 33 3.433987 6.867974 530 +express 1 32 3.465736 3.465736 540 +scientist 1 31 3.496508 3.496508 560 +computersci 1 30 3.555348 3.555348 562 +full 1 28 3.610918 3.610918 615 +progress 1 28 3.610918 3.610918 598 +univ 1 28 3.610918 3.610918 617 +linux 1 27 3.637586 3.637586 631 +pagecs 3 26 3.688879 11.066637 658 +greg 2 24 3.761200 7.522400 695 +alwai 1 24 3.761200 3.761200 691 +magazin 1 24 3.761200 3.761200 704 +yahoo 1 24 3.761200 3.761200 707 +daili 1 24 3.761200 3.761200 706 +ofwashington 1 22 3.850148 3.850148 766 +recommend 1 22 3.850148 3.850148 737 +busi 1 21 3.912023 3.912023 784 +corpor 1 21 3.912023 3.912023 802 +navig 1 21 3.912023 3.912023 796 +tenni 1 20 3.951244 3.951244 838 +feedback 1 19 4.007333 4.007333 854 +lyco 1 19 4.007333 4.007333 871 +hobbi 1 16 4.174387 4.174387 1009 +devic 1 16 4.174387 4.174387 1002 +upon 1 16 4.174387 4.174387 978 +configur 5 15 4.248495 21.242475 1012 +reflect 1 15 4.248495 4.248495 1034 +incomput 1 14 4.317488 4.317488 1096 +senior 1 14 4.317488 4.317488 1120 +emac 2 13 4.382027 8.764054 1143 +philosophi 1 13 4.382027 4.382027 1167 +misc 1 13 4.382027 4.382027 1124 +newspap 2 12 4.465908 8.931816 1280 +emploi 1 12 4.465908 4.465908 1284 +basketbal 1 12 4.465908 4.465908 1289 +magic 1 11 4.553877 4.553877 1358 +perl 1 11 4.553877 4.553877 1332 +hello 1 10 4.653960 4.653960 1407 +desktop 1 10 4.653960 4.653960 1445 +ski 1 10 4.653960 4.653960 1471 +fellowship 1 10 4.653960 4.653960 1460 +volleybal 1 9 4.753590 4.753590 1598 +inter 1 9 4.753590 4.753590 1530 +competit 2 8 4.875197 9.750394 1635 +readm 2 8 4.875197 9.750394 1699 +joel 2 8 4.875197 9.750394 1698 +entri 1 8 4.875197 4.875197 1678 +extract 1 8 4.875197 4.875197 1728 +opinion 1 8 4.875197 4.875197 1708 +chronicl 2 7 5.010635 10.021270 1952 +gatewai 1 7 5.010635 5.010635 1942 +necessarili 1 7 5.010635 5.010635 1899 +duke 5 6 5.164786 25.823930 2231 +piano 1 6 5.164786 5.164786 2201 +histor 1 6 5.164786 5.164786 2085 +vertic 1 5 5.347108 5.347108 2270 +hole 1 5 5.347108 5.347108 2518 +billi 1 5 5.347108 5.347108 2404 +doubl 1 4 5.568345 5.568345 2951 +patch 1 4 5.568345 5.568345 2710 +drew 1 4 5.568345 5.568345 2980 +cube 1 4 5.568345 5.568345 2940 +jackson 2 3 5.857933 11.715866 3586 +eduaddress 1 3 5.857933 5.857933 3762 +rsum 1 3 5.857933 5.857933 3939 +freewar 1 3 5.857933 5.857933 3504 +hotjava 1 3 5.857933 5.857933 3220 +seinfeld 1 3 5.857933 5.857933 3958 +conclus 1 3 5.857933 5.857933 3367 +pagegreg 1 2 6.263398 6.263398 5906 +amcurr 1 2 6.263398 6.263398 5798 +bermuda 1 2 6.263398 6.263398 5907 +seminarcs 1 2 6.263398 6.263398 4521 +geneticalgorithm 1 2 6.263398 6.263398 5673 +ncaa 1 2 6.263398 6.263398 5908 +unoffici 1 2 6.263398 6.263398 5909 +unif 1 2 6.263398 6.263398 5910 +badro 3 1 6.957497 20.872491 15410 +zshell 2 1 6.957497 13.914994 15411 +nesbit 1 1 6.957497 6.957497 15412 +isuppos 1 1 6.957497 6.957497 15413 +excitingfeatur 1 1 6.957497 6.957497 15414 +dukeunivers 1 1 6.957497 6.957497 15415 +fortransworld 1 1 6.957497 6.957497 15416 +indurham 1 1 6.957497 6.957497 15417 +headquart 1 1 6.957497 6.957497 15418 +myapart 1 1 6.957497 6.957497 15419 +newer 1 1 6.957497 6.957497 15420 +fvwm 1 1 6.957497 6.957497 15421 +redhat 1 1 6.957497 6.957497 15422 +transworldnumer 1 1 6.957497 6.957497 15423 +ieeenat 1 1 6.957497 6.957497 15424 +victori 1 1 6.957497 6.957497 15425 +bycomput 1 1 6.957497 6.957497 15426 +canterburi 1 1 6.957497 6.957497 15427 +definitelynot 1 1 6.957497 6.957497 15428 +juggl 1 1 6.957497 6.957497 15429 +rubik 1 1 6.957497 6.957497 15430 +sarahmclachlan 1 1 6.957497 6.957497 15431 +parliamentari 1 1 6.957497 6.957497 15432 +sgml 1 1 6.957497 6.957497 15433 +sitcom 1 1 6.957497 6.957497 15434 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ new file mode 100644 index 00000000..f3e5b571 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^glinden^ @@ -0,0 +1,295 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 3 374 0.693147 2.079441 7 +interest 3 384 0.693147 2.079441 11 +work 3 380 0.693147 2.079441 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +us 4 329 1.098612 4.394448 16 +time 2 293 1.098612 2.197224 17 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +graduat 4 215 1.386294 5.545176 31 +softwar 3 220 1.386294 4.158882 30 +link 2 247 1.386294 2.772588 24 +also 2 259 1.386294 2.772588 28 +washington 2 236 1.386294 2.772588 32 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +develop 2 174 1.791759 3.583518 53 +avail 2 169 1.791759 3.583518 48 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +model 4 145 1.945910 7.783640 69 +year 2 148 1.945910 3.891820 84 +first 1 140 1.945910 1.945910 71 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +look 2 107 2.197225 4.394450 115 +code 2 108 2.197225 4.394450 116 +check 1 115 2.197225 2.197225 118 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +intern 1 108 2.197225 2.197225 128 +book 2 99 2.302585 4.605170 131 +user 2 104 2.302585 4.605170 137 +techniqu 1 99 2.302585 2.302585 138 +imag 2 91 2.397895 4.795790 161 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +associ 1 93 2.397895 2.397895 151 +graphic 1 90 2.397895 2.397895 147 +call 1 91 2.397895 2.397895 153 +learn 2 86 2.484907 4.969814 170 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +environ 1 84 2.484907 2.484907 177 +requir 1 81 2.484907 2.484907 167 +thing 1 84 2.484907 2.484907 189 +second 1 81 2.484907 2.484907 166 +stuff 1 87 2.484907 2.484907 171 +info 1 85 2.484907 2.484907 176 +complet 1 77 2.564949 2.564949 208 +decemb 1 80 2.564949 2.564949 215 +resum 1 79 2.564949 2.564949 217 +sourc 1 77 2.564949 2.564949 201 +addit 1 74 2.639057 2.639057 228 +servic 1 72 2.639057 2.639057 236 +appli 1 71 2.639057 2.639057 226 +java 11 70 2.708050 29.788550 248 +goal 1 66 2.708050 2.708050 250 +artifici 2 63 2.772589 5.545178 280 +function 1 62 2.772589 2.772589 275 +foundat 1 62 2.772589 2.772589 286 +evalu 1 64 2.772589 2.772589 266 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +major 1 56 2.890372 2.890372 315 +publish 1 57 2.890372 2.890372 326 +browser 1 56 2.890372 2.890372 313 +summer 1 56 2.890372 2.890372 311 +thesi 1 57 2.890372 2.890372 327 +reason 1 57 2.890372 2.890372 318 +found 2 53 2.944439 5.888878 337 +allow 1 53 2.944439 2.944439 333 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +cool 5 49 3.044522 15.222610 374 +life 2 50 3.044522 6.089044 375 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +california 1 46 3.091042 3.091042 388 +could 1 46 3.091042 3.091042 383 +made 2 44 3.135494 6.270988 398 +even 1 45 3.135494 3.135494 393 +third 2 43 3.178054 6.356108 412 +show 1 43 3.178054 3.178054 417 +autom 1 41 3.218876 3.218876 434 +linear 1 41 3.218876 3.218876 431 +might 1 41 3.218876 3.218876 426 +movi 3 40 3.258097 9.774291 459 +submit 2 39 3.258097 6.516194 440 +multipl 1 39 3.258097 3.258097 453 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +expect 1 37 3.332205 3.332205 484 +tree 4 36 3.367296 13.469184 492 +staff 3 36 3.367296 10.101888 490 +ofth 1 36 3.367296 3.367296 491 +return 1 34 3.401197 3.401197 502 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +concept 1 32 3.465736 3.465736 537 +posit 2 31 3.496508 6.993016 552 +scientist 1 31 3.496508 3.496508 560 +anim 1 31 3.496508 3.496508 557 +travel 2 30 3.555348 7.110696 579 +graph 1 30 3.555348 3.555348 576 +neural 1 30 3.555348 3.555348 578 +quit 1 27 3.637586 3.637586 633 +though 1 27 3.637586 3.637586 622 +manipul 1 27 3.637586 3.637586 624 +constraint 1 26 3.688879 3.688879 636 +toward 1 25 3.737670 3.737670 668 +client 1 25 3.737670 3.737670 679 +trace 1 25 3.737670 3.737670 677 +greg 2 24 3.761200 7.522400 695 +demonstr 1 24 3.761200 3.761200 694 +pattern 1 24 3.761200 3.761200 689 +famili 1 23 3.806662 3.806662 735 +displai 1 23 3.806662 3.806662 712 +brows 1 23 3.806662 3.806662 726 +highli 1 23 3.806662 3.806662 725 +thread 1 23 3.806662 3.806662 722 +honor 1 23 3.806662 3.806662 729 +compress 1 23 3.806662 3.806662 719 +ofwashington 1 22 3.850148 3.850148 766 +instead 1 22 3.850148 3.850148 756 +love 1 21 3.912023 3.912023 804 +leav 1 21 3.912023 3.912023 772 +avoid 1 21 3.912023 3.912023 799 +applet 9 20 3.951244 35.561196 827 +qualiti 3 20 3.951244 11.853732 832 +wrote 1 20 3.951244 3.951244 830 +repositori 2 17 4.110874 8.221748 932 +thought 1 17 4.110874 4.110874 945 +adam 1 17 4.110874 4.110874 934 +diego 1 16 4.174387 4.174387 992 +earli 1 16 4.174387 4.174387 968 +spatial 1 16 4.174387 4.174387 988 +anyth 1 16 4.174387 4.174387 998 +dilbert 1 16 4.174387 4.174387 996 +cognit 1 16 4.174387 4.174387 986 +chateau 1 16 4.174387 4.174387 997 +reflect 2 15 4.248495 8.496990 1034 +rate 1 15 4.248495 4.248495 1037 +capabl 1 15 4.248495 4.248495 1016 +enough 1 15 4.248495 4.248495 1040 +doesn 1 15 4.248495 4.248495 1055 +draw 1 14 4.317488 4.317488 1086 +econom 1 13 4.382027 4.382027 1184 +iter 2 12 4.465908 8.931816 1206 +buffer 1 12 4.465908 4.465908 1211 +entertain 1 12 4.465908 4.465908 1286 +hank 1 12 4.465908 4.465908 1253 +assembl 1 12 4.465908 4.465908 1207 +transpar 2 11 4.553877 9.107754 1325 +keyword 1 11 4.553877 4.553877 1356 +enter 2 10 4.653960 9.307920 1454 +thecomput 1 10 4.653960 4.653960 1408 +awai 1 10 4.653960 4.653960 1447 +metacrawl 1 10 4.653960 4.653960 1455 +pick 3 9 4.753590 14.260770 1498 +prefer 2 9 4.753590 9.507180 1491 +congress 1 9 4.753590 4.753590 1592 +oop 1 8 4.875197 4.875197 1778 +realist 1 8 4.875197 4.875197 1665 +judg 1 8 4.875197 4.875197 1644 +guggenheim 1 8 4.875197 4.875197 1759 +hit 1 7 5.010635 5.010635 1965 +occasion 1 7 5.010635 5.010635 1905 +polit 2 6 5.164786 10.329572 2115 +impress 1 6 5.164786 5.164786 2096 +unpublish 1 6 5.164786 5.164786 2226 +stage 1 5 5.347108 5.347108 2488 +contest 1 5 5.347108 5.347108 2273 +particl 1 5 5.347108 5.347108 2436 +carlson 1 5 5.347108 5.347108 2351 +chess 1 5 5.347108 5.347108 2486 +annex 1 5 5.347108 5.347108 2572 +doubl 2 4 5.568345 11.136690 2951 +sorri 2 4 5.568345 11.136690 3059 +gradual 1 4 5.568345 5.568345 2997 +mess 1 4 5.568345 5.568345 2886 +jar 3 3 5.857933 17.573799 3223 +prison 2 3 5.857933 11.715866 3907 +slave 1 3 5.857933 5.857933 3959 +emul 1 3 5.857933 5.857933 3944 +cleaner 1 3 5.857933 5.857933 3775 +civil 1 3 5.857933 5.857933 3908 +evolutionari 1 3 5.857933 5.857933 3898 +boolean 1 3 5.857933 5.857933 3202 +recurr 1 3 5.857933 5.857933 3740 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +shadow 1 3 5.857933 5.857933 3519 +inventor 1 3 5.857933 5.857933 3695 +quicktim 1 3 5.857933 5.857933 3493 +gamelan 6 2 6.263398 37.580388 4221 +elicit 2 2 6.263398 12.526796 4294 +flight 2 2 6.263398 12.526796 5911 +ucsd 2 2 6.263398 12.526796 5192 +scienceher 1 2 6.263398 6.263398 5912 +thejava 1 2 6.263398 6.263398 4704 +certainli 1 2 6.263398 6.263398 4090 +belew 1 2 6.263398 6.263398 4739 +lesh 1 2 6.263398 6.263398 5895 +tracer 1 2 6.263398 6.263398 5913 +inc 1 2 6.263398 6.263398 5914 +duel 1 2 6.263398 6.263398 5855 +rai 1 2 6.263398 6.263398 5915 +theanim 1 2 6.263398 6.263398 5852 +thed 1 2 6.263398 6.263398 4963 +mbquicktim 1 2 6.263398 6.263398 5916 +linden 6 1 6.957497 41.744982 15435 +webview 5 1 6.957497 34.787485 15436 +wasrat 2 1 6.957497 13.914994 15437 +andwa 2 1 6.957497 13.914994 15438 +neuralnetwork 2 1 6.957497 13.914994 15439 +headless 2 1 6.957497 13.914994 15440 +horseman 2 1 6.957497 13.914994 15441 +lindenmi 1 1 6.957497 6.957497 15442 +wifecorina 1 1 6.957497 6.957497 15443 +lofti 1 1 6.957497 6.957497 15444 +undergraduatedegre 1 1 6.957497 6.957497 15445 +anodd 1 1 6.957497 6.957497 15446 +mactiv 1 1 6.957497 6.957497 15447 +orset 1 1 6.957497 6.957497 15448 +altavistawebviewand 1 1 6.957497 6.957497 15449 +metawebview 1 1 6.957497 6.957497 15450 +foraltavista 1 1 6.957497 6.957497 15451 +searchservic 1 1 6.957497 6.957497 15452 +dialog 1 1 6.957497 6.957497 15453 +travelag 1 1 6.957497 6.957497 15454 +whileallow 1 1 6.957497 6.957497 15455 +andjar 1 1 6.957497 6.957497 15456 +altavistawebview 1 1 6.957497 6.957497 15457 +winner 1 1 6.957497 6.957497 15458 +walsh 1 1 6.957497 6.957497 15459 +meilleur 1 1 6.957497 6.957497 15460 +ballet 1 1 6.957497 6.957497 15461 +flicker 1 1 6.957497 6.957497 15462 +standardsto 1 1 6.957497 6.957497 15463 +mylgramm 1 1 6.957497 6.957497 15464 +lgrammer 1 1 6.957497 6.957497 15465 +theparticletre 1 1 6.957497 6.957497 15466 +thejar 1 1 6.957497 6.957497 15467 +dawn 1 1 6.957497 6.957497 15468 +ademonstr 1 1 6.957497 6.957497 15469 +cansuccessfulli 1 1 6.957497 6.957497 15470 +myriadsoftwar 1 1 6.957497 6.957497 15471 +filippo 1 1 6.957497 6.957497 15472 +menzer 1 1 6.957497 6.957497 15473 +latentenergi 1 1 6.957497 6.957497 15474 +developingartifici 1 1 6.957497 6.957497 15475 +enviro 1 1 6.957497 6.957497 15476 +theautom 1 1 6.957497 6.957497 15477 +assit 1 1 6.957497 6.957497 15478 +majeski 1 1 6.957497 6.957497 15479 +spitzer 1 1 6.957497 6.957497 15480 +localizedinteract 1 1 6.957497 6.957497 15481 +dilemma 1 1 6.957497 6.957497 15482 +krishnamoorthi 1 1 6.957497 6.957497 15483 +paturi 1 1 6.957497 6.957497 15484 +blume 1 1 6.957497 6.957497 15485 +liden 1 1 6.957497 6.957497 15486 +esen 1 1 6.957497 6.957497 15487 +hardwaretradeoff 1 1 6.957497 6.957497 15488 +sdilemma 1 1 6.957497 6.957497 15489 +funrai 1 1 6.957497 6.957497 15490 +closeup 1 1 6.957497 6.957497 15491 +sphere 1 1 6.957497 6.957497 15492 +withreflect 1 1 6.957497 6.957497 15493 +adaptivesampl 1 1 6.957497 6.957497 15494 +thespher 1 1 6.957497 6.957497 15495 +causingth 1 1 6.957497 6.957497 15496 +refract 1 1 6.957497 6.957497 15497 +surfaceand 1 1 6.957497 6.957497 15498 +strike 1 1 6.957497 6.957497 15499 +alow 1 1 6.957497 6.957497 15500 +anyfurth 1 1 6.957497 6.957497 15501 +resembl 1 1 6.957497 6.957497 15502 +glinden 1 1 6.957497 6.957497 15503 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ new file mode 100644 index 00000000..a7d76b4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grant^ @@ -0,0 +1,28 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +relat 1 139 1.945910 1.945910 68 +compil 1 122 2.079442 2.079442 96 +octob 1 89 2.397895 2.397895 156 +homework 1 79 2.564949 2.564949 193 +brian 2 38 3.295837 6.591674 466 +computersci 1 30 3.555348 3.555348 562 +trip 1 14 4.317488 4.317488 1113 +grant 3 12 4.465908 13.397724 1216 +awai 1 10 4.653960 4.653960 1447 +daughter 1 7 5.010635 5.010635 1943 +pagebrian 1 4 5.568345 5.568345 3054 +groupuw 1 3 5.857933 5.857933 3934 +kri 2 1 6.957497 13.914994 15504 +infowork 1 1 6.957497 6.957497 15505 +backgrounduwdynam 1 1 6.957497 6.957497 15506 +engineeringperson 1 1 6.957497 6.957497 15507 +stuffperson 1 1 6.957497 6.957497 15508 +backgroundmi 1 1 6.957497 6.957497 15509 +isismi 1 1 6.957497 6.957497 15510 +singaporemi 1 1 6.957497 6.957497 15511 +bookmarksmi 1 1 6.957497 6.957497 15512 +keylast 1 1 6.957497 6.957497 15513 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ new file mode 100644 index 00000000..97ded491 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^grove^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +us 3 329 1.098612 3.295836 16 +offic 2 299 1.098612 2.197224 13 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +washington 2 236 1.386294 2.772588 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +paper 1 205 1.609438 1.609438 38 +object 1 138 1.945910 1.945910 79 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +compil 1 122 2.079442 2.079442 96 +number 1 130 2.079442 2.079442 97 +manag 1 114 2.197225 2.197225 125 +pictur 1 89 2.397895 2.397895 160 +environ 1 84 2.484907 2.484907 177 +orient 1 80 2.564949 2.564949 205 +optim 1 79 2.564949 2.564949 197 +complet 1 77 2.564949 2.564949 208 +sieg 1 69 2.708050 2.708050 260 +integr 1 67 2.708050 2.708050 245 +plai 2 60 2.833213 5.666426 307 +back 1 60 2.833213 2.833213 297 +summer 1 56 2.890372 2.890372 311 +much 1 52 2.995732 2.995732 349 +frequent 1 49 3.044522 3.044522 367 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +cool 1 49 3.044522 3.044522 374 +anoth 1 45 3.135494 3.135494 408 +author 2 39 3.258097 6.516194 450 +littl 1 39 3.258097 3.258097 454 +close 1 38 3.295837 3.295837 465 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +taken 1 31 3.496508 3.496508 555 +someth 1 31 3.496508 3.496508 554 +actual 1 28 3.610918 3.610918 604 +team 1 27 3.637586 3.637586 625 +consult 1 24 3.761200 3.761200 687 +sometim 1 24 3.761200 3.761200 696 +spend 3 19 4.007333 12.021999 850 +less 1 18 4.060443 4.060443 892 +along 1 18 4.060443 4.060443 878 +attempt 1 17 4.110874 4.110874 917 +white 1 17 4.110874 4.110874 951 +chateau 1 16 4.174387 4.174387 997 +took 1 16 4.174387 4.174387 1010 +month 1 15 4.248495 4.248495 1025 +dave 2 14 4.317488 8.634976 1098 +trip 2 14 4.317488 8.634976 1113 +council 1 11 4.553877 4.553877 1364 +cecil 2 9 4.753590 9.507180 1547 +hang 1 9 4.753590 4.753590 1499 +grove 3 8 4.875197 14.625591 1675 +pure 1 8 4.875197 4.875197 1776 +vehicl 1 7 5.010635 5.010635 1928 +wouldn 1 7 5.010635 5.010635 1970 +footbal 1 7 5.010635 5.010635 1912 +strip 1 6 5.164786 5.164786 2203 +toronto 1 6 5.164786 5.164786 2156 +spinproject 1 5 5.347108 5.347108 2570 +gui 1 5 5.347108 5.347108 2573 +water 1 5 5.347108 5.347108 2535 +worki 1 4 5.568345 5.568345 3010 +fantasi 1 4 5.568345 5.568345 3055 +silli 1 4 5.568345 5.568345 3038 +raft 1 4 5.568345 5.568345 3060 +langaug 1 3 5.857933 5.857933 3661 +hampshir 1 3 5.857933 5.857933 3280 +kick 1 3 5.857933 5.857933 3962 +hord 1 2 6.263398 6.263398 5917 +aroundth 1 2 6.263398 6.263398 5653 +fring 1 2 6.263398 6.263398 5721 +boi 1 2 6.263398 6.263398 5918 +toseattl 1 2 6.263398 6.263398 5919 +soonish 1 1 6.957497 6.957497 15514 +dilbertfix 1 1 6.957497 6.957497 15515 +thathit 1 1 6.957497 6.957497 15516 +underacheiv 1 1 6.957497 6.957497 15517 +scoutreserv 1 1 6.957497 6.957497 15518 +greaterlowel 1 1 6.957497 6.957497 15519 +casunset 1 1 6.957497 6.957497 15520 +cabin 1 1 6.957497 6.957497 15521 +drove 1 1 6.957497 6.957497 15522 +detour 1 1 6.957497 6.957497 15523 +somehihglight 1 1 6.957497 6.957497 15524 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ new file mode 100644 index 00000000..8f0f8c99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hauck^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 5 443 0.693147 3.465735 6 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +current 3 284 1.098612 3.295836 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +design 4 213 1.386294 5.545176 25 +washington 3 236 1.386294 4.158882 32 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +develop 2 174 1.791759 3.583518 53 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +architectur 4 139 1.945910 7.783640 77 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +seattl 1 120 2.079442 2.079442 103 +well 2 109 2.197225 4.394450 121 +person 1 111 2.197225 2.197225 117 +level 2 87 2.484907 4.969814 180 +educ 1 86 2.484907 2.484907 191 +june 1 79 2.564949 2.564949 214 +simul 1 66 2.708050 2.708050 255 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +hardwar 1 51 2.995732 2.995732 350 +prototyp 3 38 3.295837 9.887511 463 +multi 3 36 3.367296 10.101888 493 +survei 1 35 3.401197 3.401197 513 +board 2 33 3.433987 6.867974 528 +curriculum 1 33 3.433987 3.433987 535 +methodolog 1 23 3.806662 3.806662 733 +rout 1 21 3.912023 3.912023 793 +synthesi 1 20 3.951244 3.951244 834 +scott 2 18 4.060443 8.120886 884 +commerci 1 16 4.174387 4.174387 1005 +partit 1 16 4.174387 4.174387 984 +topolog 1 14 4.317488 4.317488 1089 +embed 1 14 4.317488 4.317488 1102 +circuit 3 13 4.382027 13.146081 1131 +asynchron 4 12 4.465908 17.863632 1229 +fpga 8 10 4.653960 37.231680 1433 +rapid 3 10 4.653960 13.961880 1453 +densiti 1 7 5.010635 5.010635 1927 +chinook 1 6 5.164786 5.164786 2229 +triptych 2 4 5.568345 11.136690 3061 +biographi 1 3 5.857933 5.857933 3658 +hauck 3 2 6.263398 18.790194 5920 +montag 2 2 6.263398 12.526796 5921 +springbok 1 2 6.263398 6.263398 5922 +thoughi 1 1 6.957497 6.957497 15525 +vitaeresearch 1 1 6.957497 6.957497 15526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ new file mode 100644 index 00000000..a76f3330 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^hinshaw^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +kevin 1 9 4.753590 4.753590 1482 +hinshaw 1 1 6.957497 6.957497 15527 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ new file mode 100644 index 00000000..6bb0b988 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ian^ @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +also 2 259 1.386294 2.772588 28 +washington 1 236 1.386294 1.386294 32 +place 2 106 2.197225 4.394450 124 +part 1 98 2.302585 2.302585 129 +imag 1 91 2.397895 2.397895 161 +stuff 2 87 2.484907 4.969814 171 +chang 1 82 2.484907 2.484907 163 +decemb 1 80 2.564949 2.564949 215 +master 1 76 2.564949 2.564949 216 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +view 1 70 2.708050 2.708050 254 +dept 1 64 2.772589 2.772589 291 +back 1 60 2.833213 2.833213 297 +thesi 1 57 2.890372 2.890372 327 +week 2 52 2.995732 5.991464 343 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +pointer 1 48 3.044522 3.044522 368 +possibl 1 47 3.091042 3.091042 378 +around 1 43 3.178054 3.178054 415 +profil 1 30 3.555348 3.555348 581 +pass 1 28 3.610918 3.610918 611 +univ 1 28 3.610918 3.610918 617 +subject 1 26 3.688879 3.688879 647 +notic 1 25 3.737670 3.737670 675 +head 1 23 3.806662 3.806662 732 +brows 1 23 3.806662 3.806662 726 +half 1 21 3.912023 3.912023 776 +busi 1 21 3.912023 3.912023 784 +unfortun 1 13 4.382027 4.382027 1170 +rememb 1 12 4.465908 4.465908 1217 +neat 1 12 4.465908 4.465908 1263 +daughter 1 7 5.010635 5.010635 1943 +chinook 1 6 5.164786 5.164786 2229 +upper 1 5 5.347108 5.347108 2481 +wast 1 5 5.347108 5.347108 2537 +silli 1 4 5.568345 5.568345 3038 +washingtonseattl 1 4 5.568345 5.568345 3044 +macduff 3 2 6.263398 18.790194 5923 +emma 1 2 6.263398 6.263398 5546 +obsess 1 2 6.263398 6.263398 5924 +ultrasound 1 1 6.957497 6.957497 15528 +elspeth 1 1 6.957497 6.957497 15529 +unborn 1 1 6.957497 6.957497 15530 +fromconcept 1 1 6.957497 6.957497 15531 +ripe 1 1 6.957497 6.957497 15532 +inmid 1 1 6.957497 6.957497 15533 +ly 1 1 6.957497 6.957497 15534 +lookingup 1 1 6.957497 6.957497 15535 +torso 1 1 6.957497 6.957497 15536 +theleft 1 1 6.957497 6.957497 15537 +impend 1 1 6.957497 6.957497 15538 +fatherhood 1 1 6.957497 6.957497 15539 +myspam 1 1 6.957497 6.957497 15540 +usingwebcrawl 1 1 6.957497 6.957497 15541 +frogstv 1 1 6.957497 6.957497 15542 +nationpenn 1 1 6.957497 6.957497 15543 +tellermus 1 1 6.957497 6.957497 15544 +lyricsian 1 1 6.957497 6.957497 15545 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ new file mode 100644 index 00000000..903f0be9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jamrozik^ @@ -0,0 +1,146 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 3 236 1.386294 4.158882 32 +languag 2 227 1.386294 2.772588 26 +mail 1 238 1.386294 1.386294 22 +softwar 1 220 1.386294 1.386294 30 +oper 1 180 1.609438 1.609438 34 +network 7 168 1.791759 12.542313 61 +distribut 2 162 1.791759 3.583518 51 +phone 1 175 1.791759 1.791759 45 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +object 2 138 1.945910 3.891820 79 +hall 1 146 1.945910 1.945910 65 +file 1 132 1.945910 1.945910 70 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +area 1 144 1.945910 1.945910 80 +high 3 130 2.079442 6.238326 101 +provid 2 121 2.079442 4.158884 94 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +memori 11 101 2.302585 25.328435 139 +access 3 102 2.302585 6.907755 136 +need 2 98 2.302585 4.605170 135 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +pictur 2 89 2.397895 4.795790 160 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +environ 2 84 2.484907 4.969814 177 +level 1 87 2.484907 2.484907 180 +orient 2 80 2.564949 5.129898 205 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +sieg 1 69 2.708050 2.708050 260 +main 1 67 2.708050 2.708050 256 +order 1 69 2.708050 2.708050 249 +septemb 1 65 2.772589 2.772589 274 +virtual 1 62 2.772589 2.772589 285 +visit 1 63 2.772589 2.772589 288 +back 1 60 2.833213 2.833213 297 +march 1 61 2.833213 2.833213 295 +thesi 1 57 2.890372 2.890372 327 +local 1 55 2.944439 2.944439 334 +processor 1 54 2.944439 2.944439 335 +cach 2 41 3.218876 6.437752 432 +small 1 39 3.258097 3.258097 447 +map 1 39 3.258097 3.258097 452 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +mean 1 37 3.332205 3.332205 477 +global 3 34 3.401197 10.203591 520 +cluster 1 28 3.610918 3.610918 612 +load 1 28 3.610918 3.610918 601 +primari 1 25 3.737670 3.737670 669 +fundament 1 25 3.737670 3.737670 661 +mike 1 24 3.761200 3.761200 703 +store 1 24 3.761200 3.761200 693 +size 4 23 3.806662 15.226648 713 +reduc 3 22 3.850148 11.550444 759 +disk 3 22 3.850148 11.550444 747 +hierarchi 1 22 3.850148 3.850148 744 +unit 1 21 3.912023 3.912023 779 +thu 1 21 3.912023 3.912023 773 +increas 1 20 3.951244 3.951244 829 +speed 3 18 4.060443 12.181329 911 +encourag 1 18 4.060443 4.060443 880 +engineeringunivers 1 17 4.110874 4.110874 959 +debug 1 17 4.110874 4.110874 944 +transfer 3 16 4.174387 12.523161 967 +latenc 3 16 4.174387 12.523161 993 +modern 1 16 4.174387 4.174387 966 +remot 2 15 4.248495 8.496990 1041 +levi 2 14 4.317488 8.634976 1093 +karlin 2 13 4.382027 8.764054 1176 +washingtonbox 1 13 4.382027 4.382027 1200 +introduc 1 13 4.382027 4.382027 1139 +unfortun 1 13 4.382027 4.382027 1170 +galleri 1 13 4.382027 4.382027 1192 +hank 1 12 4.465908 4.465908 1253 +mari 1 12 4.465908 4.465908 1266 +anna 1 12 4.465908 4.465908 1292 +franc 1 12 4.465908 4.465908 1276 +node 2 11 4.553877 9.107754 1326 +extrem 1 11 4.553877 4.553877 1330 +vernon 2 9 4.753590 9.507180 1556 +voelker 2 9 4.753590 9.507180 1557 +factor 1 9 4.753590 4.753590 1544 +postdoc 1 8 4.875197 4.875197 1724 +evan 1 8 4.875197 4.875197 1633 +inproceed 1 8 4.875197 4.875197 1670 +feelei 2 7 5.010635 10.021270 1859 +trend 1 7 5.010635 5.010635 1842 +geoff 1 6 5.164786 5.164786 2124 +temporari 1 6 5.164786 5.164786 2090 +li 1 5 5.347108 5.347108 2500 +seventh 1 5 5.347108 5.347108 2464 +joseph 1 5 5.347108 5.347108 2327 +coverag 1 4 5.568345 5.568345 2656 +greatli 1 3 5.857933 5.857933 3541 +europ 1 3 5.857933 5.857933 3761 +jamrozik 4 2 6.263398 25.053592 5925 +subpag 2 2 6.263398 12.526796 5926 +amort 1 2 6.263398 6.263398 4370 +odd 1 2 6.263398 6.263398 5565 +theuniversit 1 2 6.263398 6.263398 5927 +fourier 1 2 6.263398 6.263398 5698 +grenobl 1 2 6.263398 6.263398 5928 +laboratoir 1 2 6.263398 6.263398 5929 +herv 1 1 6.957497 6.957497 15546 +jamrozikherv 1 1 6.957497 6.957497 15547 +memoi 1 1 6.957497 6.957497 15548 +therebi 1 1 6.957497 6.957497 15549 +intens 1 1 6.957497 6.957497 15550 +lightli 1 1 6.957497 6.957497 15551 +guideproject 1 1 6.957497 6.957497 15552 +bull 1 1 6.957497 6.957497 15553 +imaginstitut 1 1 6.957497 6.957497 15554 +snot 1 1 6.957497 6.957497 15555 +louvr 1 1 6.957497 6.957497 15556 +somefamili 1 1 6.957497 6.957497 15557 +somefriend 1 1 6.957497 6.957497 15558 +eduv 1 1 6.957497 6.957497 15559 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ new file mode 100644 index 00000000..d0933ad0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jasons^ @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +modifi 1 178 1.609438 1.609438 35 +hall 1 146 1.945910 1.945910 65 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +frequent 1 49 3.044522 3.044522 367 +weather 2 28 3.610918 7.221836 618 +channel 1 7 5.010635 5.010635 1836 +forecast 1 6 5.164786 5.164786 2171 +jason 3 3 5.857933 17.573799 3389 +eduaddress 1 3 5.857933 5.857933 3762 +secoski 2 2 6.263398 12.526796 4526 +pagejason 1 1 6.957497 6.957497 15560 +cunivers 1 1 6.957497 6.957497 15561 +boxseattl 1 1 6.957497 6.957497 15562 +projectseattl 1 1 6.957497 6.957497 15563 +secoskylast 1 1 6.957497 6.957497 15564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ new file mode 100644 index 00000000..ba82c30b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbaer^ @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +current 2 284 1.098612 2.197224 21 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +washington 2 236 1.386294 2.772588 32 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +look 1 107 2.197225 2.197225 115 +question 1 91 2.397895 2.397895 141 +educ 2 86 2.484907 4.969814 191 +stuff 2 87 2.484907 4.969814 171 +school 1 84 2.484907 2.484907 188 +intellig 1 72 2.639057 2.639057 225 +multimedia 1 68 2.708050 2.708050 258 +java 1 70 2.708050 2.708050 248 +artifici 1 63 2.772589 2.772589 280 +virtual 1 62 2.772589 2.772589 285 +copi 1 63 2.772589 2.772589 284 +cool 1 49 3.044522 3.044522 374 +effect 1 46 3.091042 3.091042 385 +mark 1 44 3.135494 3.135494 403 +made 1 44 3.135494 3.135494 398 +music 1 42 3.218876 3.218876 436 +realli 1 40 3.258097 3.258097 444 +littl 1 39 3.258097 3.258097 454 +industri 1 38 3.295837 3.295837 464 +feel 1 37 3.332205 3.332205 483 +download 1 36 3.367296 3.367296 489 +human 1 32 3.465736 3.465736 546 +travel 1 30 3.555348 3.555348 579 +progress 1 28 3.610918 3.610918 598 +static 1 27 3.637586 3.637586 619 +mine 1 26 3.688879 3.688879 654 +experiment 1 26 3.688879 3.688879 645 +never 1 25 3.737670 3.737670 671 +william 1 22 3.850148 3.850148 765 +applet 3 20 3.951244 11.853732 827 +wrote 1 20 3.951244 3.951244 830 +spend 1 19 4.007333 4.007333 850 +demo 1 18 4.060443 4.060443 888 +layer 1 17 4.110874 4.110874 926 +macintosh 1 17 4.110874 4.110874 920 +signific 1 13 4.382027 4.382027 1125 +baer 2 11 4.553877 9.107754 1353 +scienceat 1 11 4.553877 4.553877 1375 +eight 1 11 4.553877 4.553877 1331 +metacrawl 1 10 4.653960 4.653960 1455 +creativ 1 8 4.875197 4.875197 1777 +dream 1 6 5.164786 5.164786 2165 +jeremi 2 5 5.347108 10.694216 2360 +interfer 1 5 5.347108 5.347108 2494 +puzzl 1 5 5.347108 5.347108 2507 +silli 1 4 5.568345 5.568345 3038 +thati 1 4 5.568345 5.568345 2616 +museum 1 3 5.857933 5.857933 3933 +computerinteract 1 2 6.263398 6.263398 5829 +stress 1 2 6.263398 6.263398 4146 +baerjeremi 1 1 6.957497 6.957497 15565 +twain 1 1 6.957497 6.957497 15566 +shakespearei 1 1 6.957497 6.957497 15567 +engineeringtool 1 1 6.957497 6.957497 15568 +pierian 1 1 6.957497 6.957497 15569 +softwareoregon 1 1 6.957497 6.957497 15570 +omsi 1 1 6.957497 6.957497 15571 +pomona 1 1 6.957497 6.957497 15572 +collegeher 1 1 6.957497 6.957497 15573 +searchcopyright 1 1 6.957497 6.957497 15574 +jbaer 1 1 6.957497 6.957497 15575 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ new file mode 100644 index 00000000..222b3cfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jbuhler^ @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 4 236 1.386294 5.545176 32 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +phone 2 175 1.791759 3.583518 45 +address 1 170 1.791759 1.791759 62 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +note 1 142 1.945910 1.945910 67 +postscript 1 131 2.079442 2.079442 90 +control 2 82 2.484907 4.969814 164 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +come 1 78 2.564949 2.564949 202 +sieg 1 69 2.708050 2.708050 260 +import 1 65 2.772589 2.772589 282 +foundat 1 62 2.772589 2.772589 286 +browser 2 56 2.890372 5.780744 313 +undergradu 1 54 2.944439 2.944439 338 +finger 1 52 2.995732 2.995732 354 +electron 1 47 3.091042 3.091042 379 +tree 1 36 3.367296 3.367296 492 +soon 1 36 3.367296 3.367296 494 +return 1 34 3.401197 3.401197 502 +statu 1 18 4.060443 4.060443 885 +attempt 1 17 4.110874 4.110874 917 +latex 1 14 4.317488 4.317488 1064 +rice 1 11 4.553877 4.553877 1336 +transmiss 1 9 4.753590 4.753590 1588 +jeremi 2 5 5.347108 10.694216 2360 +adjust 1 5 5.347108 5.347108 2422 +frontier 1 3 5.857933 5.857933 3771 +alma 1 3 5.857933 5.857933 3963 +schedulemi 1 2 6.263398 6.263398 5843 +mater 1 2 6.263398 6.263398 5930 +buhler 3 1 6.957497 20.872491 15576 +jbuhler 2 1 6.957497 13.914994 15577 +pagejeremi 1 1 6.957497 6.957497 15578 +pagedo 1 1 6.957497 6.957497 15579 +tako 1 1 6.957497 6.957497 15580 +stufflectur 1 1 6.957497 6.957497 15581 +suffix 1 1 6.957497 6.957497 15582 +keycyb 1 1 6.957497 6.957497 15583 +grinsrecommend 1 1 6.957497 6.957497 15584 +readingmi 1 1 6.957497 6.957497 15585 +universityquot 1 1 6.957497 6.957497 15586 +quotesmi 1 1 6.957497 6.957497 15587 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ new file mode 100644 index 00000000..55802bce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdean^ @@ -0,0 +1,271 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 6 431 0.693147 4.158882 10 +program 6 374 0.693147 4.158882 7 +work 3 380 0.693147 2.079441 9 +system 3 443 0.693147 2.079441 6 +interest 3 384 0.693147 2.079441 11 +depart 2 457 0.693147 1.386294 12 +offic 2 299 1.098612 2.197224 13 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +languag 8 227 1.386294 11.090352 26 +also 4 259 1.386294 5.545176 28 +design 4 213 1.386294 5.545176 25 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +develop 4 174 1.791759 7.167036 53 +implement 3 152 1.791759 5.375277 52 +applic 2 170 1.791759 3.583518 56 +recent 2 167 1.791759 3.583518 58 +base 1 165 1.791759 1.791759 50 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +object 3 138 1.945910 5.837730 79 +area 1 144 1.945910 1.945910 80 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +compil 9 122 2.079442 18.714978 96 +seattl 4 120 2.079442 8.317768 103 +postscript 2 131 2.079442 4.158884 90 +analysi 2 124 2.079442 4.158884 98 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +code 1 108 2.197225 2.197225 116 +look 1 107 2.197225 2.197225 115 +techniqu 5 99 2.302585 11.512925 138 +access 1 102 2.302585 2.302585 136 +part 1 98 2.302585 2.302585 129 +real 1 93 2.397895 2.397895 144 +larg 2 82 2.484907 4.969814 168 +build 1 85 2.484907 2.484907 184 +environ 1 84 2.484907 2.484907 177 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +contain 1 81 2.484907 2.484907 174 +optim 10 79 2.564949 25.649490 197 +orient 4 80 2.564949 10.259796 205 +exampl 2 77 2.564949 5.129898 195 +dynam 1 76 2.564949 2.564949 194 +line 2 75 2.639057 5.278114 231 +effici 2 73 2.639057 5.278114 233 +appli 2 71 2.639057 5.278114 226 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +meet 1 72 2.639057 2.639057 229 +would 3 67 2.708050 8.124150 251 +view 2 70 2.708050 5.416100 254 +integr 2 67 2.708050 5.416100 245 +sieg 1 69 2.708050 2.708050 260 +goal 1 66 2.708050 2.708050 250 +receiv 1 66 2.708050 2.708050 244 +guid 2 63 2.772589 5.545178 267 +plan 1 65 2.772589 2.772589 272 +laboratori 1 63 2.772589 2.772589 292 +experi 1 64 2.772589 2.772589 283 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +interact 1 62 2.772589 2.772589 270 +result 1 65 2.772589 2.772589 281 +back 1 60 2.833213 2.833213 297 +explor 2 58 2.890372 5.780744 324 +summer 1 56 2.890372 2.890372 311 +direct 1 57 2.890372 2.890372 316 +extens 1 53 2.944439 2.944439 340 +three 1 54 2.944439 2.944439 330 +much 1 52 2.995732 2.995732 349 +particular 1 51 2.995732 2.995732 352 +run 1 51 2.995732 2.995732 347 +adapt 1 46 3.091042 3.091042 387 +quarter 1 47 3.091042 3.091042 389 +featur 1 46 3.091042 3.091042 386 +keep 1 44 3.135494 3.135494 409 +made 1 44 3.135494 3.135494 398 +better 1 45 3.135494 3.135494 401 +even 1 45 3.135494 3.135494 393 +around 1 43 3.178054 3.178054 415 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +author 2 39 3.258097 6.516194 450 +join 1 39 3.258097 3.258097 457 +littl 1 39 3.258097 3.258097 454 +probabl 1 40 3.258097 3.258097 455 +realli 1 40 3.258097 3.258097 444 +vita 1 38 3.295837 3.295837 473 +seminar 1 38 3.295837 3.295837 470 +feel 1 37 3.332205 3.332205 483 +singl 1 34 3.401197 3.401197 510 +approxim 1 35 3.401197 3.401197 509 +word 1 34 3.401197 3.401197 508 +curriculum 1 33 3.433987 3.433987 535 +obtain 1 33 3.433987 3.433987 534 +kind 1 32 3.465736 3.465736 541 +independ 1 32 3.465736 3.465736 548 +someth 1 31 3.496508 3.496508 554 +exist 1 30 3.555348 3.555348 569 +profil 1 30 3.555348 3.555348 581 +specifi 1 30 3.555348 3.555348 568 +travel 1 30 3.555348 3.555348 579 +pass 6 28 3.610918 21.665508 611 +scale 1 28 3.610918 3.610918 613 +becom 1 28 3.610918 3.610918 603 +framework 1 28 3.610918 3.610918 606 +effort 1 26 3.688879 3.688879 652 +consist 1 26 3.688879 3.688879 651 +enjoi 1 26 3.688879 3.688879 660 +rather 1 26 3.688879 3.688879 642 +jeff 2 25 3.737670 7.475340 673 +spent 2 25 3.737670 7.475340 676 +concern 1 25 3.737670 3.737670 666 +toward 1 25 3.737670 3.737670 668 +wai 1 25 3.737670 3.737670 662 +client 1 25 3.737670 3.737670 679 +never 1 25 3.737670 3.737670 671 +highli 1 23 3.806662 3.806662 725 +defin 1 22 3.850148 3.850148 746 +recommend 1 22 3.850148 3.850148 737 +hous 1 21 3.912023 3.912023 801 +programminglanguag 1 21 3.912023 3.912023 782 +flexibl 1 21 3.912023 3.912023 792 +love 1 21 3.912023 3.912023 804 +entir 1 20 3.951244 3.951244 811 +nice 1 20 3.951244 3.951244 809 +minut 1 20 3.951244 3.951244 810 +predict 1 19 4.007333 4.007333 855 +four 1 18 4.060443 4.060443 905 +whole 2 17 4.110874 8.221748 940 +permit 4 16 4.174387 16.697548 962 +chateau 1 16 4.174387 4.174387 997 +letter 1 16 4.174387 4.174387 981 +anyth 1 16 4.174387 4.174387 998 +took 1 16 4.174387 4.174387 1010 +track 1 15 4.248495 4.248495 1029 +enough 1 15 4.248495 4.248495 1040 +doesn 1 15 4.248495 4.248495 1055 +dean 2 14 4.317488 8.634976 1104 +spin 1 14 4.317488 4.317488 1121 +split 1 14 4.317488 4.317488 1078 +wife 3 13 4.382027 13.146081 1196 +primarili 1 13 4.382027 4.382027 1185 +composit 1 13 4.382027 4.382027 1150 +hotlist 1 13 4.382027 4.382027 1199 +uniqu 1 12 4.465908 4.465908 1228 +iter 1 12 4.465908 4.465908 1206 +food 1 12 4.465908 4.465908 1285 +walk 1 12 4.465908 4.465908 1281 +valid 1 11 4.553877 4.553877 1299 +moment 1 11 4.553877 4.553877 1379 +true 1 10 4.653960 4.653960 1422 +guess 1 10 4.653960 4.653960 1443 +cecil 4 9 4.753590 19.014360 1547 +palo 1 9 4.753590 4.753590 1590 +alto 1 9 4.753590 4.753590 1591 +hang 1 9 4.753590 4.753590 1499 +inter 1 9 4.753590 4.753590 1530 +compos 1 9 4.753590 4.753590 1527 +sound 1 9 4.753590 4.753590 1605 +ride 2 8 4.875197 9.750394 1741 +pure 1 8 4.875197 4.875197 1776 +isol 1 8 4.875197 4.875197 1663 +closur 1 8 4.875197 4.875197 1643 +bug 1 7 5.010635 5.010635 1801 +dead 1 7 5.010635 5.010635 1840 +daughter 1 7 5.010635 5.010635 1943 +affect 2 6 5.164786 10.329572 2044 +park 1 6 5.164786 5.164786 2218 +increment 1 6 5.164786 5.164786 2206 +creation 1 6 5.164786 5.164786 2069 +vortex 4 5 5.347108 21.388432 2362 +spinproject 1 5 5.347108 5.347108 2570 +unnecessari 1 5 5.347108 5.347108 2506 +lesson 1 5 5.347108 5.347108 2568 +western 1 4 5.568345 5.568345 3062 +usedto 1 4 5.568345 5.568345 2643 +inlin 1 4 5.568345 5.568345 2964 +enjoy 1 4 5.568345 5.568345 2937 +insur 1 4 5.568345 5.568345 2939 +coverag 1 4 5.568345 5.568345 2656 +nearbi 1 3 5.857933 5.857933 3291 +langaug 1 3 5.857933 5.857933 3661 +stillmaintain 1 3 5.857933 5.857933 3964 +ofobject 1 3 5.857933 5.857933 3399 +forobject 1 3 5.857933 5.857933 3965 +kick 1 3 5.857933 5.857933 3962 +habit 1 3 5.857933 5.857933 3777 +somedai 1 3 5.857933 5.857933 3919 +fantast 1 3 5.857933 5.857933 3966 +flight 2 2 6.263398 12.526796 5911 +bought 1 2 6.263398 6.263398 5165 +projectsi 1 2 6.263398 6.263398 5931 +andto 1 2 6.263398 6.263398 5771 +vortexcompil 1 2 6.263398 6.263398 5932 +interfacesand 1 2 6.263398 6.263398 5206 +andhow 1 2 6.263398 6.263398 5933 +intraprocedur 1 2 6.263398 6.263398 5934 +coke 1 2 6.263398 6.263398 5935 +caffein 1 2 6.263398 6.263398 5936 +galvin 1 2 6.263398 6.263398 4160 +fly 1 2 6.263398 6.263398 5937 +anymor 1 2 6.263398 6.263398 5938 +downtown 1 2 6.263398 6.263398 5642 +wing 1 2 6.263398 6.263398 4864 +lengthi 1 2 6.263398 6.263398 4273 +jdean 1 2 6.263398 6.263398 4455 +biplan 2 1 6.957497 13.914994 15588 +dang 1 1 6.957497 6.957497 15589 +weren 1 1 6.957497 6.957497 15590 +plansi 1 1 6.957497 6.957497 15591 +sunni 1 1 6.957497 6.957497 15592 +menlo 1 1 6.957497 6.957497 15593 +avehicl 1 1 6.957497 6.957497 15594 +weintend 1 1 6.957497 6.957497 15595 +codein 1 1 6.957497 6.957497 15596 +systemmicrokernel 1 1 6.957497 6.957497 15597 +especiallyprofil 1 1 6.957497 6.957497 15598 +howwhol 1 1 6.957497 6.957497 15599 +assumedthat 1 1 6.957497 6.957497 15600 +manycompromis 1 1 6.957497 6.957497 15601 +wholeprogram 1 1 6.957497 6.957497 15602 +underlyingimplement 1 1 6.957497 6.957497 15603 +principaldesign 1 1 6.957497 6.957497 15604 +independentintermedi 1 1 6.957497 6.957497 15605 +ishigh 1 1 6.957497 6.957497 15606 +messagesend 1 1 6.957497 6.957497 15607 +wayof 1 1 6.957497 6.957497 15608 +repeatedli 1 1 6.957497 6.957497 15609 +passessepar 1 1 6.957497 6.957497 15610 +classanalysi 1 1 6.957497 6.957497 15611 +aliasanalysi 1 1 6.957497 6.957497 15612 +structuringoptim 1 1 6.957497 6.957497 15613 +stillallow 1 1 6.957497 6.957497 15614 +eachoth 1 1 6.957497 6.957497 15615 +flowanalys 1 1 6.957497 6.957497 15616 +withrel 1 1 6.957497 6.957497 15617 +assignmentelimin 1 1 6.957497 6.957497 15618 +publicationssom 1 1 6.957497 6.957497 15619 +personali 1 1 6.957497 6.957497 15620 +spici 1 1 6.957497 6.957497 15621 +mild 1 1 6.957497 6.957497 15622 +heidi 1 1 6.957497 6.957497 15623 +victoria 1 1 6.957497 6.957497 15624 +honeymoon 1 1 6.957497 6.957497 15625 +kauai 1 1 6.957497 6.957497 15626 +hurrican 1 1 6.957497 6.957497 15627 +iniki 1 1 6.957497 6.957497 15628 +puget 1 1 6.957497 6.957497 15629 +dare 1 1 6.957497 6.957497 15630 +sadli 1 1 6.957497 6.957497 15631 +passeng 1 1 6.957497 6.957497 15632 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ new file mode 100644 index 00000000..5b743af2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jdreese^ @@ -0,0 +1,177 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 5 443 0.693147 3.465735 6 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +us 2 329 1.098612 2.197224 16 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +softwar 12 220 1.386294 16.635528 30 +languag 3 227 1.386294 4.158882 26 +washington 2 236 1.386294 2.772588 32 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +design 1 213 1.386294 1.386294 25 +public 2 202 1.609438 3.218876 43 +oper 2 180 1.609438 3.218876 34 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +develop 3 174 1.791759 5.375277 53 +applic 2 170 1.791759 3.583518 56 +base 2 165 1.791759 3.583518 50 +avail 2 169 1.791759 3.583518 48 +phone 1 175 1.791759 1.791759 45 +process 2 142 1.945910 3.891820 72 +problem 1 147 1.945910 1.945910 75 +analysi 8 124 2.079442 16.635536 98 +tool 3 117 2.079442 6.238326 93 +studi 2 120 2.079442 4.158884 91 +postscript 2 131 2.079442 4.158884 90 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +confer 1 126 2.079442 2.079442 100 +document 1 121 2.079442 2.079442 89 +high 1 130 2.079442 2.079442 101 +specif 4 106 2.197225 8.788900 106 +place 2 106 2.197225 4.394450 124 +make 2 111 2.197225 4.394450 120 +structur 1 106 2.197225 2.197225 105 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +techniqu 1 99 2.302585 2.302585 138 +commun 1 95 2.397895 2.397895 157 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +requir 7 81 2.484907 17.394349 167 +control 3 82 2.484907 7.454721 164 +ieee 2 86 2.484907 4.969814 190 +academ 1 82 2.484907 2.484907 178 +school 1 84 2.484907 2.484907 188 +state 3 76 2.564949 7.694847 207 +exampl 2 77 2.564949 5.129898 195 +come 1 78 2.564949 2.564949 202 +dynam 1 76 2.564949 2.564949 194 +name 1 72 2.639057 2.639057 220 +html 1 75 2.639057 2.639057 235 +write 1 72 2.639057 2.639057 222 +workshop 1 71 2.639057 2.639057 239 +integr 1 67 2.708050 2.708050 245 +complex 1 64 2.772589 2.772589 269 +improv 1 62 2.772589 2.772589 289 +septemb 1 65 2.772589 2.772589 274 +januari 1 62 2.772589 2.772589 264 +experi 1 64 2.772589 2.772589 283 +best 1 59 2.833213 2.833213 299 +reason 1 57 2.890372 2.890372 318 +thesi 1 57 2.890372 2.890372 327 +three 1 54 2.944439 2.944439 330 +februari 1 54 2.944439 2.944439 328 +hardwar 1 51 2.995732 2.995732 350 +possibl 1 47 3.091042 3.091042 378 +california 1 46 3.091042 3.091042 388 +discuss 1 45 3.135494 3.135494 399 +transact 2 39 3.258097 6.516194 438 +industri 2 38 3.295837 6.591674 464 +expect 1 37 3.332205 3.332205 484 +especi 2 36 3.367296 6.734592 496 +procedur 2 36 3.367296 6.734592 488 +articl 1 33 3.433987 3.433987 530 +dissert 2 32 3.465736 6.931472 549 +concept 1 32 3.465736 3.465736 537 +rang 1 30 3.555348 3.555348 565 +specifi 1 30 3.555348 3.555348 568 +semant 3 29 3.583519 10.750557 587 +becom 1 28 3.610918 3.610918 603 +great 1 27 3.637586 3.637586 626 +doctor 2 24 3.761200 7.522400 709 +interpret 1 24 3.761200 3.761200 686 +displai 1 23 3.806662 3.806662 712 +flexibl 1 21 3.912023 3.912023 792 +safeti 4 20 3.951244 15.804976 817 +histori 1 19 4.007333 4.007333 853 +less 1 18 4.060443 4.060443 892 +behavior 1 18 4.060443 4.060443 881 +concentr 1 18 4.060443 4.060443 906 +engineeringunivers 1 17 4.110874 4.110874 959 +steven 1 17 4.110874 4.110874 953 +critic 3 16 4.174387 12.523161 982 +advantag 2 16 4.174387 8.348774 987 +chateau 1 16 4.174387 4.174387 997 +devic 1 16 4.174387 4.174387 1002 +convent 1 14 4.317488 4.317488 1072 +draft 1 14 4.317488 4.317488 1085 +washingtonbox 1 13 4.382027 4.382027 1200 +difficulti 1 13 4.382027 4.382027 1132 +signific 1 13 4.382027 4.382027 1125 +nanci 1 12 4.465908 4.465908 1256 +island 1 11 4.553877 4.553877 1345 +valid 1 11 4.553877 4.553877 1299 +summar 1 11 4.553877 4.553877 1295 +alpha 1 11 4.553877 4.553877 1348 +rice 1 11 4.553877 4.553877 1336 +itali 1 11 4.553877 4.553877 1378 +success 1 10 4.653960 4.653960 1390 +kurt 2 9 4.753590 9.507180 1548 +leveson 2 9 4.753590 9.507180 1540 +respect 1 9 4.753590 4.753590 1545 +linguist 1 9 4.753590 4.753590 1593 +guggenheim 1 8 4.875197 4.875197 1759 +fail 1 8 4.875197 4.875197 1655 +perhap 1 8 4.875197 4.875197 1693 +mile 1 8 4.875197 4.875197 1743 +sean 1 8 4.875197 4.875197 1705 +irvin 1 8 4.875197 4.875197 1660 +curv 1 8 4.875197 4.875197 1656 +awar 1 7 5.010635 5.010635 1800 +henc 1 7 5.010635 5.010635 1805 +sixth 1 7 5.010635 5.010635 1917 +price 1 6 5.164786 5.164786 1999 +emerg 1 6 5.164786 5.164786 2038 +transcript 1 6 5.164786 5.164786 2067 +variant 1 6 5.164786 5.164786 2043 +annex 1 5 5.347108 5.347108 2572 +caus 1 5 5.347108 5.347108 2298 +stage 1 5 5.347108 5.347108 2488 +colleagu 1 5 5.347108 5.347108 2304 +ortega 1 5 5.347108 5.347108 2559 +expens 1 4 5.568345 5.568345 2678 +avion 1 4 5.568345 5.568345 3018 +invent 1 4 5.568345 5.568345 3028 +sandi 1 4 5.568345 5.568345 2765 +rsml 6 3 5.857933 35.147598 3967 +hazard 2 3 5.857933 11.715866 3191 +partridg 2 3 5.857933 11.715866 3346 +diagnos 1 3 5.857933 5.857933 3968 +borrow 1 3 5.857933 5.857933 3725 +publicli 1 3 5.857933 5.857933 3687 +diagnost 1 3 5.857933 5.857933 3833 +deviat 8 2 6.263398 50.107184 4826 +rees 4 2 6.263398 25.053592 5939 +heimdahl 2 2 6.263398 12.526796 5940 +unpredict 1 2 6.263398 6.263398 5722 +incid 1 2 6.263398 6.263398 5870 +tca 1 2 6.263398 6.263398 5941 +mat 1 2 6.263398 6.263398 5942 +holli 1 2 6.263398 6.263398 5601 +damon 2 1 6.957497 13.914994 15633 +jdrees 2 1 6.957497 13.914994 15634 +hazop 2 1 6.957497 13.914994 15635 +waxahachi 2 1 6.957497 13.914994 15636 +hildreth 2 1 6.957497 13.914994 15637 +pagejon 1 1 6.957497 6.957497 15638 +reesepost 1 1 6.957497 6.957497 15639 +groupdepart 1 1 6.957497 6.957497 15640 +catastroph 1 1 6.957497 6.957497 15641 +wider 1 1 6.957497 6.957497 15642 +siang 1 1 6.957497 6.957497 15643 +dolin 1 1 6.957497 6.957497 15644 +statechart 1 1 6.957497 6.957497 15645 +como 1 1 6.957497 6.957497 15646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ new file mode 100644 index 00000000..0fd3d24b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jlo^ @@ -0,0 +1,105 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +parallel 4 169 1.791759 7.167036 60 +implement 2 152 1.791759 3.583518 52 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +architectur 3 139 1.945910 5.837730 77 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +compil 7 122 2.079442 14.556094 96 +postscript 4 131 2.079442 8.317768 90 +schedul 4 119 2.079442 8.317768 85 +report 2 131 2.079442 4.158884 92 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +proceed 3 93 2.397895 7.193685 152 +pictur 3 89 2.397895 7.193685 160 +level 4 87 2.484907 9.939628 180 +issu 4 78 2.564949 10.259796 211 +optim 2 79 2.564949 5.129898 197 +dynam 2 76 2.564949 5.129898 194 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +abstract 3 62 2.772589 8.317767 276 +written 2 63 2.772589 5.545178 278 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +juli 1 60 2.833213 2.833213 305 +room 1 59 2.833213 2.833213 301 +instruct 4 53 2.944439 11.777756 332 +processor 4 54 2.944439 11.777756 335 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +california 1 46 3.091042 3.091042 388 +examin 3 42 3.218876 9.656628 424 +submit 1 39 3.258097 3.258097 440 +annual 1 40 3.258097 3.258097 458 +static 2 27 3.637586 7.275172 619 +compar 1 26 3.688879 3.688879 648 +thread 1 23 3.806662 3.806662 722 +exploit 1 20 3.951244 3.951244 836 +increas 1 20 3.951244 3.951244 829 +stanford 1 17 4.110874 4.110874 955 +coupl 1 17 4.110874 4.110874 939 +choic 1 16 4.174387 4.174387 979 +susan 4 15 4.248495 16.993980 1050 +levi 3 14 4.317488 12.952464 1093 +balanc 2 14 4.317488 8.634976 1112 +dean 1 14 4.317488 4.317488 1104 +convert 1 13 4.382027 4.382027 1122 +sigplan 1 13 4.382027 4.382027 1190 +philadelphia 1 12 4.465908 4.465908 1244 +multithread 6 11 4.553877 27.323262 1315 +henri 3 10 4.653960 13.961880 1417 +franklin 1 10 4.653960 4.653960 1436 +jack 8 8 4.875197 39.001576 1780 +egger 4 8 4.875197 19.500788 1695 +joel 2 8 4.875197 9.750394 1698 +qualifi 1 8 4.875197 4.875197 1721 +simultan 5 6 5.164786 25.823930 2155 +tullsen 3 6 5.164786 15.494358 2081 +rebecca 2 6 5.164786 10.329572 2174 +superscalar 2 6 5.164786 10.329572 2082 +fetch 1 5 5.347108 5.347108 2567 +jolla 1 4 5.568345 5.568345 2988 +emer 2 3 5.857933 11.715866 3969 +stamm 2 3 5.857933 11.715866 3970 +vliw 1 3 5.857933 5.857933 3514 +lojlo 1 2 6.263398 6.263398 5943 +suif 1 2 6.263398 6.263398 5944 +anddean 2 1 6.957497 13.914994 15647 +lojack 1 1 6.957497 6.957497 15648 +loph 1 1 6.957497 6.957497 15649 +eseattl 1 1 6.957497 6.957497 15650 +orsieg 1 1 6.957497 6.957497 15651 +paintbal 1 1 6.957497 6.957497 15652 +yahoojlo 1 1 6.957497 6.957497 15653 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ new file mode 100644 index 00000000..a0300d4c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^joebob^ @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +topic 1 114 2.197225 2.197225 110 +send 1 114 2.197225 2.197225 109 +user 3 104 2.302585 6.907755 137 +activ 1 84 2.484907 2.484907 182 +interfac 3 79 2.564949 7.694847 209 +want 1 79 2.564949 2.564949 199 +automat 1 61 2.833213 2.833213 306 +browser 1 56 2.890372 2.890372 313 +local 1 55 2.944439 2.944439 334 +suggest 1 53 2.944439 2.944439 331 +visual 1 48 3.044522 3.044522 372 +directori 1 45 3.135494 3.135494 396 +might 1 41 3.218876 3.218876 426 +survei 1 35 3.401197 3.401197 513 +navig 1 21 3.912023 3.912023 796 +engineeringunivers 1 17 4.110874 4.110874 959 +washingtonbox 1 13 4.382027 4.382027 1200 +impress 1 6 5.164786 5.164786 2096 +sherman 1 1 6.957497 6.957497 15654 +shermanjoebob 1 1 6.957497 6.957497 15655 +usami 1 1 6.957497 6.957497 15656 +designinform 1 1 6.957497 6.957497 15657 +useclass 1 1 6.957497 6.957497 15658 +hcreat 1 1 6.957497 6.957497 15659 +pagequ 1 1 6.957497 6.957497 15660 +sarahsoftballstuff 1 1 6.957497 6.957497 15661 +pagesif 1 1 6.957497 6.957497 15662 +tojoebob 1 1 6.957497 6.957497 15663 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ new file mode 100644 index 00000000..9be6b5bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^josh^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +current 2 284 1.098612 2.197224 21 +washington 1 236 1.386294 1.386294 32 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +year 1 148 1.945910 1.945910 84 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +present 1 91 2.397895 2.397895 145 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +test 1 66 2.708050 2.708050 252 +abstract 1 62 2.772589 2.772589 276 +organ 1 65 2.772589 2.772589 265 +colleg 1 61 2.833213 2.833213 300 +sever 2 56 2.890372 5.780744 322 +discuss 1 45 3.135494 3.135494 399 +futur 1 41 3.218876 3.218876 427 +expect 1 37 3.332205 3.332205 484 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +neural 1 30 3.555348 3.555348 578 +travel 1 30 3.555348 3.555348 579 +task 1 25 3.737670 3.737670 678 +demonstr 1 24 3.761200 3.761200 694 +theunivers 1 21 3.912023 3.912023 797 +thought 1 17 4.110874 4.110874 945 +cognit 1 16 4.174387 4.174387 986 +achiev 1 14 4.317488 4.317488 1088 +context 1 13 4.382027 4.382027 1153 +accomplish 1 8 4.875197 4.875197 1755 +potenti 1 8 4.875197 4.875197 1690 +creativ 1 8 4.875197 4.875197 1777 +successfulli 1 7 5.010635 5.010635 1869 +earn 1 7 5.010635 5.010635 1788 +biolog 1 6 5.164786 5.164786 2147 +slate 1 6 5.164786 5.164786 2021 +addition 1 4 5.568345 5.568345 2593 +joshua 3 3 5.857933 17.573799 3333 +blank 1 3 5.857933 5.857933 3379 +emul 1 3 5.857933 5.857933 3944 +josh 3 2 6.263398 18.790194 5945 +overviewof 1 2 6.263398 6.263398 5469 +seim 3 1 6.957497 20.872491 15664 +begunin 1 1 6.957497 6.957497 15665 +lockean 1 1 6.957497 6.957497 15666 +observedbehavior 1 1 6.957497 6.957497 15667 +graduatingfrom 1 1 6.957497 6.957497 15668 +volit 1 1 6.957497 6.957497 15669 +taskw 1 1 6.957497 6.957497 15670 +ambulatori 1 1 6.957497 6.957497 15671 +academichierarchi 1 1 6.957497 6.957497 15672 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ new file mode 100644 index 00000000..5f994d0e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jovan^ @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +move 1 47 3.091042 3.091042 382 +jovan 2 2 6.263398 12.526796 5842 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ new file mode 100644 index 00000000..f1009926 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jpower^ @@ -0,0 +1,68 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +recent 1 167 1.791759 1.791759 58 +site 1 106 2.197225 2.197225 119 +graphic 2 90 2.397895 4.795790 147 +imag 1 91 2.397895 2.397895 161 +proceed 1 93 2.397895 2.397895 152 +real 1 93 2.397895 2.397895 144 +academ 2 82 2.484907 4.969814 178 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +david 1 71 2.639057 2.639057 232 +main 2 67 2.708050 5.416100 256 +major 1 56 2.890372 2.890372 315 +york 1 41 3.218876 3.218876 435 +especi 1 36 3.367296 3.367296 496 +power 1 30 3.555348 3.555348 573 +color 1 22 3.850148 3.850148 762 +grad 1 20 3.951244 3.951244 837 +eric 1 19 4.007333 4.007333 870 +women 1 16 4.174387 4.174387 1004 +biologi 2 15 4.248495 8.496990 1049 +comic 1 14 4.317488 4.317488 1103 +jonathan 1 13 4.382027 4.382027 1174 +brad 1 12 4.465908 4.465908 1264 +interestsmi 1 10 4.653960 4.653960 1462 +genet 1 10 4.653960 4.653960 1409 +gain 1 8 4.875197 4.875197 1730 +siggraph 1 8 4.875197 4.875197 1773 +sean 1 8 4.875197 4.875197 1705 +molecular 1 7 5.010635 5.010635 1887 +cat 1 6 5.164786 5.164786 2194 +salesin 1 4 5.568345 5.568345 3051 +alma 2 3 5.857933 11.715866 3963 +joanna 3 2 6.263398 18.790194 4503 +reproduc 1 2 6.263398 6.263398 5519 +powerjoanna 1 1 6.957497 6.957497 15673 +pagehi 1 1 6.957497 6.957497 15674 +uwneat 1 1 6.957497 6.957497 15675 +matercool 1 1 6.957497 6.957497 15676 +shadegraph 1 1 6.957497 6.957497 15677 +uwduoton 1 1 6.957497 6.957497 15678 +reproductionmi 1 1 6.957497 6.957497 15679 +matermost 1 1 6.957497 6.957497 15680 +employmentpubl 1 1 6.957497 6.957497 15681 +stollnitz 1 1 6.957497 6.957497 15682 +duoton 1 1 6.957497 6.957497 15683 +lifepast 1 1 6.957497 6.957497 15684 +homesdiversionsgend 1 1 6.957497 6.957497 15685 +issuesstatu 1 1 6.957497 6.957497 15686 +sciencenow 1 1 6.957497 6.957497 15687 +pagefeminist 1 1 6.957497 6.957497 15688 +onlineultim 1 1 6.957497 6.957497 15689 +frisbeefun 1 1 6.957497 6.957497 15690 +stufffroggi 1 1 6.957497 6.957497 15691 +quotesbrad 1 1 6.957497 6.957497 15692 +musicevan 1 1 6.957497 6.957497 15693 +jokes 1 1 6.957497 6.957497 15694 +pagesmi 1 1 6.957497 6.957497 15695 +herojpow 1 1 6.957497 6.957497 15696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ new file mode 100644 index 00000000..e714cb09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^jshakes^ @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +updat 1 191 1.609438 1.609438 41 +hall 1 146 1.945910 1.945910 65 +seattl 1 120 2.079442 2.079442 103 +homepag 1 93 2.397895 2.397895 148 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +jonathan 2 13 4.382027 8.764054 1174 +ahoi 1 3 5.857933 5.857933 3532 +shake 2 2 6.263398 12.526796 5898 +finderresumlinkslast 1 1 6.957497 6.957497 15697 +jshake 1 1 6.957497 6.957497 15698 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ new file mode 100644 index 00000000..66e8c83d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^juanito^ @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +juan 1 9 4.753590 4.753590 1580 +alemanyjuan 1 1 6.957497 6.957497 15699 +alemani 1 1 6.957497 6.957497 15700 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ new file mode 100644 index 00000000..8922188d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kapu^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +work 5 380 0.693147 3.465735 9 +research 2 431 0.693147 1.386294 10 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +class 2 199 1.609438 3.218876 37 +modifi 1 178 1.609438 1.609438 35 +data 3 170 1.791759 5.375277 49 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +year 2 148 1.945910 3.891820 84 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +note 1 142 1.945910 1.945910 67 +hall 1 146 1.945910 1.945910 65 +report 2 131 2.079442 4.158884 92 +seattl 2 120 2.079442 4.158884 103 +find 2 111 2.197225 4.394450 111 +mathemat 1 108 2.197225 2.197225 123 +topic 1 114 2.197225 2.197225 110 +technic 2 100 2.302585 4.605170 140 +graphic 2 90 2.397895 4.795790 147 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +sieg 1 69 2.708050 2.708050 260 +function 1 62 2.772589 2.772589 275 +understand 1 47 3.091042 3.091042 384 +third 1 43 3.178054 3.178054 412 +vision 2 41 3.218876 6.437752 430 +combin 1 42 3.218876 3.218876 421 +examin 1 42 3.218876 3.218876 424 +multipl 1 39 3.258097 3.258097 453 +slide 3 38 3.295837 9.887511 467 +microsoft 1 38 3.295837 3.295837 468 +statist 1 35 3.401197 3.401197 521 +taught 3 33 3.433987 10.301961 526 +obtain 1 33 3.433987 3.433987 534 +rang 1 30 3.555348 3.555348 565 +actual 1 28 3.610918 3.610918 604 +pass 1 28 3.610918 3.610918 611 +aspect 1 25 3.737670 3.737670 663 +ofwashington 1 22 3.850148 3.850148 766 +try 1 22 3.850148 3.850148 764 +geometri 1 22 3.850148 3.850148 752 +left 1 19 4.007333 4.007333 851 +steven 1 17 4.110874 4.110874 953 +qual 1 15 4.248495 4.248495 1062 +universityof 1 15 4.248495 4.248495 1061 +reflect 1 15 4.248495 4.248495 1034 +remov 1 12 4.465908 4.465908 1225 +werner 1 10 4.653960 4.653960 1385 +linda 1 10 4.653960 4.653960 1394 +tanimoto 1 10 4.653960 4.653960 1429 +surfac 3 9 4.753590 14.260770 1574 +folk 1 9 4.753590 4.753590 1597 +siggraph 1 8 4.875197 4.875197 1773 +theclass 2 6 5.164786 10.329572 2060 +speaker 1 5 5.347108 5.347108 2370 +engineeringdepart 1 4 5.568345 5.568345 2917 +closest 1 4 5.568345 5.568345 2828 +addition 1 4 5.568345 5.568345 2593 +rick 1 4 5.568345 5.568345 2646 +wavelet 1 4 5.568345 5.568345 2874 +union 1 4 5.568345 5.568345 2634 +kari 2 2 6.263398 12.526796 4500 +andmathemat 1 2 6.263398 6.263398 4948 +tonyderos 1 2 6.263398 6.263398 5839 +stuetzl 1 2 6.263398 6.263398 5840 +duchamp 1 2 6.263398 6.263398 5841 +hopp 1 2 6.263398 6.263398 5092 +sketch 1 2 6.263398 6.263398 5946 +getto 1 2 6.263398 6.263398 5806 +herear 1 2 6.263398 6.263398 5947 +pulli 2 1 6.957497 13.914994 15701 +antero 2 1 6.957497 13.914994 15702 +subdivis 2 1 6.957497 13.914994 15703 +pagekari 1 1 6.957497 6.957497 15704 +pullii 1 1 6.957497 6.957497 15705 +thesedisciplin 1 1 6.957497 6.957497 15706 +uwfor 1 1 6.957497 6.957497 15707 +pixar 1 1 6.957497 6.957497 15708 +lindashapiro 1 1 6.957497 6.957497 15709 +andjohn 1 1 6.957497 6.957497 15710 +mcdonald 1 1 6.957497 6.957497 15711 +andhugu 1 1 6.957497 6.957497 15712 +szeliski 1 1 6.957497 6.957497 15713 +tribor 1 1 6.957497 6.957497 15714 +triplet 1 1 6.957497 6.957497 15715 +recognitionsystem 1 1 6.957497 6.957497 15716 +surfacereconstruct 1 1 6.957497 6.957497 15717 +baselin 1 1 6.957497 6.957497 15718 +camerasystem 1 1 6.957497 6.957497 15719 +waveletanalysi 1 1 6.957497 6.957497 15720 +rigidregistr 1 1 6.957497 6.957497 15721 +architecturesystem 1 1 6.957497 6.957497 15722 +susanegg 1 1 6.957497 6.957497 15723 +brianbershad 1 1 6.957497 6.957497 15724 +eacutesum 1 1 6.957497 6.957497 15725 +eacut 1 1 6.957497 6.957497 15726 +kapu 1 1 6.957497 6.957497 15727 +takavainionti 1 1 6.957497 6.957497 15728 +oulu 1 1 6.957497 6.957497 15729 +finland 1 1 6.957497 6.957497 15730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ new file mode 100644 index 00000000..e2d18cfd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^karlin^ @@ -0,0 +1,18 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +professor 1 137 1.945910 1.945910 76 +seattl 1 120 2.079442 2.079442 103 +anna 1 12 4.465908 4.465908 1292 +karlinanna 1 1 6.957497 6.957497 15731 +rochel 1 1 6.957497 6.957497 15732 +karlinassoci 1 1 6.957497 6.957497 15733 +sincejuli 1 1 6.957497 6.957497 15734 +paperskarlin 1 1 6.957497 6.957497 15735 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ new file mode 100644 index 00000000..14f203a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kayee^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +email 1 220 1.386294 1.386294 29 +back 1 60 2.833213 2.833213 297 +yeunghom 1 1 6.957497 6.957497 15736 +yeungperson 1 1 6.957497 6.957497 15737 +infomi 1 1 6.957497 6.957497 15738 +picturemi 1 1 6.957497 6.957497 15739 +researchtelnet 1 1 6.957497 6.957497 15740 +machinessend 1 1 6.957497 6.957497 15741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ new file mode 100644 index 00000000..0b0c2a4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kepart^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +softwar 4 220 1.386294 5.545176 30 +washington 3 236 1.386294 4.158882 32 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +area 1 144 1.945910 1.945910 80 +postscript 3 131 2.079442 6.238326 90 +confer 1 126 2.079442 2.079442 100 +seattl 1 120 2.079442 2.079442 103 +specif 3 106 2.197225 6.591675 106 +user 1 104 2.302585 2.302585 137 +environ 2 84 2.484907 4.969814 177 +academ 1 82 2.484907 2.484907 178 +start 1 83 2.484907 2.484907 173 +school 1 84 2.484907 2.484907 188 +method 1 80 2.564949 2.564949 213 +interfac 1 79 2.564949 2.564949 209 +complet 1 77 2.564949 2.564949 208 +html 1 75 2.639057 2.639057 235 +name 1 72 2.639057 2.639057 220 +java 1 70 2.708050 2.708050 248 +interact 2 62 2.772589 5.545178 270 +visual 1 48 3.044522 3.044522 372 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +describ 1 45 3.135494 3.135494 400 +live 1 40 3.258097 3.258097 451 +formal 1 37 3.332205 3.332205 478 +human 1 32 3.465736 3.465736 546 +manipul 1 27 3.637586 3.637586 624 +berkelei 1 26 3.688879 3.688879 657 +background 1 25 3.737670 3.737670 664 +other 1 24 3.761200 3.761200 697 +love 1 21 3.912023 3.912023 804 +voic 1 21 3.912023 3.912023 806 +safeti 3 20 3.951244 11.853732 817 +binari 1 20 3.951244 3.951244 823 +qualiti 1 20 3.951244 3.951244 832 +critic 1 16 4.174387 4.174387 982 +nasa 1 13 4.382027 4.382027 1188 +readabl 1 12 4.465908 4.465908 1258 +nanci 1 12 4.465908 4.465908 1256 +kurt 5 9 4.753590 23.767950 1548 +leveson 1 9 4.753590 4.753590 1540 +sister 1 9 4.753590 4.753590 1524 +wayn 1 8 4.875197 4.875197 1738 +poster 2 7 5.010635 10.021270 1814 +usabl 1 7 5.010635 5.010635 1810 +corner 1 7 5.010635 5.010635 1909 +vivek 1 6 5.164786 5.164786 2210 +parent 1 6 5.164786 5.164786 2204 +ohlrich 1 5 5.347108 5.347108 2564 +humor 1 5 5.347108 5.347108 2533 +partridg 4 3 5.857933 23.431732 3346 +dabbl 1 3 5.857933 5.857933 3971 +preview 1 3 5.857933 5.857933 3306 +bauer 1 2 6.263398 6.263398 5117 +mat 1 2 6.263398 6.263398 5942 +heimdahl 1 2 6.263398 6.263398 5940 +ratan 1 2 6.263398 6.263398 5948 +rees 1 2 6.263398 6.263398 5939 +thousand 1 2 6.263398 6.263398 5949 +oak 1 2 6.263398 6.263398 5566 +kepart 1 2 6.263398 6.263398 4459 +bddtcl 1 1 6.957497 6.957497 15742 +decisiondiagram 1 1 6.957497 6.957497 15743 +suburban 1 1 6.957497 6.957497 15744 +oti 1 1 6.957497 6.957497 15745 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ new file mode 100644 index 00000000..4fcac04d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kgolden^ @@ -0,0 +1,51 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +list 1 201 1.609438 1.609438 39 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +stuff 1 87 2.484907 2.484907 171 +complet 1 77 2.564949 2.564949 208 +advisor 1 51 2.995732 2.995732 355 +friend 1 48 3.044522 3.044522 376 +natur 1 44 3.135494 3.135494 406 +vita 1 38 3.295837 3.295837 473 +collabor 1 32 3.465736 3.465736 543 +suit 1 13 4.382027 4.382027 1129 +tour 1 11 4.553877 4.553877 1307 +ofcomput 1 10 4.653960 4.653960 1442 +weld 1 9 4.753590 4.753590 1538 +dictionari 1 8 4.875197 4.875197 1642 +golden 2 7 5.010635 10.021270 1962 +photographi 1 6 5.164786 5.164786 2146 +oren 1 6 5.164786 5.164786 2134 +etzioni 1 6 5.164786 5.164786 2135 +keith 2 5 5.347108 10.694216 2528 +paint 1 5 5.347108 5.347108 2400 +coffe 1 5 5.347108 5.347108 2556 +lawyer 1 4 5.568345 5.568345 2836 +car 1 4 5.568345 5.568345 2931 +bicycl 1 2 6.263398 6.263398 5950 +questa 1 1 6.957497 6.957497 15746 +pagina 1 1 6.957497 6.957497 15747 +anch 1 1 6.957497 6.957497 15748 +italiano 1 1 6.957497 6.957497 15749 +researchsoftbotsplanningkrselect 1 1 6.957497 6.957497 15750 +publicationscurriculum 1 1 6.957497 6.957497 15751 +inpostscriptrandom 1 1 6.957497 6.957497 15752 +hackingwordbot 1 1 6.957497 6.957497 15753 +godless 1 1 6.957497 6.957497 15754 +pinko 1 1 6.957497 6.957497 15755 +dislik 1 1 6.957497 6.957497 15756 +ellenmarcruben 1 1 6.957497 6.957497 15757 +laurennickrich 1 1 6.957497 6.957497 15758 +joannavivek 1 1 6.957497 6.957497 15759 +keithgolden 1 1 6.957497 6.957497 15760 +kgolden 1 1 6.957497 6.957497 15761 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ new file mode 100644 index 00000000..7da34e2a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kingsum^ @@ -0,0 +1,144 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 9 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +engin 3 297 1.098612 3.295836 20 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +softwar 9 220 1.386294 12.476646 30 +washington 5 236 1.386294 6.931470 32 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +seattl 2 120 2.079442 4.158884 103 +confer 2 126 2.079442 4.158884 100 +schedul 1 119 2.079442 2.079442 85 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +technolog 1 131 2.079442 2.079442 102 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +manag 1 114 2.197225 2.197225 125 +version 1 113 2.197225 2.197225 122 +intern 1 108 2.197225 2.197225 128 +site 1 106 2.197225 2.197225 119 +world 1 115 2.197225 2.197225 126 +technic 2 100 2.302585 4.605170 140 +text 1 98 2.302585 2.302585 133 +book 1 99 2.302585 2.302585 131 +proceed 1 93 2.397895 2.397895 152 +center 1 88 2.397895 2.397895 158 +sinc 1 90 2.397895 2.397895 159 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +wide 1 84 2.484907 2.484907 185 +school 1 84 2.484907 2.484907 188 +resum 2 79 2.564949 5.129898 217 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +server 1 76 2.564949 2.564949 204 +david 3 71 2.639057 7.917171 232 +line 2 75 2.639057 5.278114 231 +workshop 2 71 2.639057 5.278114 239 +onlin 2 75 2.639057 5.278114 223 +servic 1 72 2.639057 2.639057 236 +polici 1 64 2.772589 2.772589 279 +new 1 64 2.772589 2.772589 262 +visit 1 63 2.772589 2.772589 288 +content 1 59 2.833213 2.833213 302 +automat 1 61 2.833213 2.833213 306 +march 1 61 2.833213 2.833213 295 +suggest 1 53 2.944439 2.944439 331 +tabl 1 51 2.995732 2.995732 346 +advisor 1 51 2.995732 2.995732 355 +date 1 51 2.995732 2.995732 344 +format 1 48 3.044522 3.044522 356 +editor 1 41 3.218876 3.218876 433 +movi 3 40 3.258097 9.774291 459 +respons 2 37 3.332205 6.664410 476 +china 1 37 3.332205 3.332205 487 +manual 1 35 3.401197 3.401197 504 +transform 2 32 3.465736 6.931472 542 +dissert 1 32 3.465736 3.465736 549 +specifi 1 30 3.555348 3.555348 568 +quot 2 29 3.583519 7.167038 582 +chines 1 29 3.583519 3.583519 595 +campu 1 27 3.637586 3.637586 623 +revis 1 26 3.688879 3.688879 640 +experiment 1 26 3.688879 3.688879 645 +alwai 1 24 3.761200 3.761200 691 +william 1 22 3.850148 3.850148 765 +wang 1 21 3.912023 3.912023 790 +watch 1 21 3.912023 3.912023 789 +fund 1 21 3.912023 3.912023 805 +qualiti 1 20 3.951244 3.951244 832 +minut 1 20 3.951244 3.951244 810 +citi 1 19 4.007333 4.007333 874 +thoma 1 18 4.060443 4.060443 901 +stock 1 16 4.174387 4.174387 1007 +driven 1 15 4.248495 4.248495 1048 +style 1 15 4.248495 4.248495 1036 +hong 8 14 4.317488 34.539904 1105 +asynchron 3 12 4.465908 13.397724 1229 +readi 1 12 4.465908 4.465908 1242 +evolut 4 11 4.553877 18.215508 1314 +market 1 11 4.553877 4.553877 1361 +kong 6 9 4.753590 28.521540 1602 +mainten 1 9 4.753590 4.753590 1543 +upcom 1 8 4.875197 4.875197 1685 +bridg 1 8 4.875197 4.875197 1764 +pacif 1 8 4.875197 4.875197 1674 +delai 1 7 5.010635 5.010635 1848 +highwai 1 6 5.164786 5.164786 2095 +invest 1 6 5.164786 5.164786 2153 +educomput 1 5 5.347108 5.347108 2524 +semi 1 5 5.347108 5.347108 2510 +these 1 5 5.347108 5.347108 2482 +mutual 1 5 5.347108 5.347108 2418 +commod 1 5 5.347108 5.347108 2415 +ics 1 4 5.568345 5.568345 2779 +chart 1 4 5.568345 5.568345 2653 +chow 6 3 5.857933 35.147598 3281 +notkin 3 3 5.857933 17.573799 3345 +polytechn 1 3 5.857933 5.857933 3222 +usathi 1 2 6.263398 6.263398 5951 +glossari 1 2 6.263398 6.263398 4418 +asia 1 2 6.263398 6.263398 5952 +alumnu 1 2 6.263398 6.263398 5863 +kingsum 8 1 6.957497 55.659976 15762 +pcct 3 1 6.957497 20.872491 15763 +feedbackresearchmi 1 1 6.957497 6.957497 15764 +toolspap 1 1 6.957497 6.957497 15765 +icsm 1 1 6.957497 6.957497 15766 +griswold 1 1 6.957497 6.957497 15767 +sorcererpcct 1 1 6.957497 6.957497 15768 +terrenc 1 1 6.957497 6.957497 15769 +parr 1 1 6.957497 6.957497 15770 +newbiesresumepleasedrop 1 1 6.957497 6.957497 15771 +mailto 1 1 6.957497 6.957497 15772 +kongchines 1 1 6.957497 6.957497 15773 +kongsingapor 1 1 6.957497 6.957497 15774 +sitessingapor 1 1 6.957497 6.957497 15775 +websom 1 1 6.957497 6.957497 15776 +friendstom 1 1 6.957497 6.957497 15777 +liew 1 1 6.957497 6.957497 15778 +fook 1 1 6.957497 6.957497 15779 +jiang 1 1 6.957497 6.957497 15780 +weidongu 1 1 6.957497 6.957497 15781 +relatedunivers 1 1 6.957497 6.957497 15782 +webserv 1 1 6.957497 6.957497 15783 +storeinvestmentsfre 1 1 6.957497 6.957497 15784 +analysismisc 1 1 6.957497 6.957497 15785 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ new file mode 100644 index 00000000..a68b4fda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^kwb^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +engin 2 297 1.098612 2.197224 20 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +parallel 1 169 1.791759 1.791759 60 +like 1 132 1.945910 1.945910 81 +professor 1 137 1.945910 1.945910 76 +seattl 4 120 2.079442 8.317768 103 +high 1 130 2.079442 2.079442 101 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +assist 1 112 2.197225 2.197225 113 +teach 1 108 2.197225 2.197225 112 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +search 1 95 2.397895 2.397895 155 +real 1 93 2.397895 2.397895 144 +build 1 85 2.484907 2.484907 184 +want 1 79 2.564949 2.564949 199 +previou 1 62 2.772589 2.772589 290 +written 1 63 2.772589 2.772589 278 +visit 1 63 2.772589 2.772589 288 +profession 1 51 2.995732 2.995732 345 +case 1 51 2.995732 2.995732 351 +life 1 50 3.044522 3.044522 375 +archiv 1 49 3.044522 3.044522 364 +get 1 46 3.091042 3.091042 380 +adapt 1 46 3.091042 3.091042 387 +anoth 1 45 3.135494 3.135494 408 +form 1 39 3.258097 3.258097 443 +electr 2 38 3.295837 6.591674 461 +ofth 1 36 3.367296 3.367296 491 +photo 1 31 3.496508 3.496508 561 +rather 1 26 3.688879 3.688879 642 +task 1 25 3.737670 3.737670 678 +ofwashington 1 22 3.850148 3.850148 766 +rout 2 21 3.912023 7.824046 793 +spend 1 19 4.007333 4.007333 850 +speed 1 18 4.060443 4.060443 911 +minim 1 18 4.060443 4.060443 887 +took 1 16 4.174387 4.174387 1010 +doesn 1 15 4.248495 4.248495 1055 +signific 1 13 4.382027 4.382027 1125 +kevin 2 9 4.753590 9.507180 1482 +suitabl 1 9 4.753590 4.753590 1486 +pacif 2 8 4.875197 9.750394 1674 +root 1 8 4.875197 4.875197 1650 +brain 1 8 4.875197 4.875197 1638 +router 1 8 4.875197 4.875197 1772 +spot 1 7 5.010635 5.010635 1894 +explain 1 7 5.010635 5.010635 1816 +multicomput 1 7 5.010635 5.010635 1890 +rock 1 6 5.164786 5.164786 2164 +chaotic 2 5 5.347108 10.694216 2566 +coral 1 5 5.347108 5.347108 2538 +engineeringat 1 5 5.347108 5.347108 2561 +wander 1 4 5.568345 5.568345 2896 +chaoticrout 1 4 5.568345 5.568345 3063 +bold 1 3 5.857933 5.857933 3846 +tenur 1 3 5.857933 5.857933 3801 +researchassoci 1 3 5.857933 5.857933 3664 +nervou 1 2 6.263398 6.263398 5953 +conscious 1 2 6.263398 6.263398 5954 +boldingkwb 1 1 6.957497 6.957497 15786 +juvenil 1 1 6.957497 6.957497 15787 +squirt 1 1 6.957497 6.957497 15788 +hunk 1 1 6.957497 6.957497 15789 +cling 1 1 6.957497 6.957497 15790 +rudimentari 1 1 6.957497 6.957497 15791 +eat 1 1 6.957497 6.957497 15792 +dennett 1 1 6.957497 6.957497 15793 +latencylan 1 1 6.957497 6.957497 15794 +researchha 1 1 6.957497 6.957497 15795 +formass 1 1 6.957497 6.957497 15796 +comethyakutak 1 1 6.957497 6.957497 15797 +moustach 1 1 6.957497 6.957497 15798 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ new file mode 100644 index 00000000..0e3fd124 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ladner^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +washington 3 236 1.386294 4.158882 32 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +introduct 2 126 2.079442 4.158884 87 +spring 2 131 2.079442 4.158884 88 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +structur 1 106 2.197225 2.197225 105 +commun 1 95 2.397895 2.397895 157 +sieg 1 69 2.708050 2.708050 260 +room 1 59 2.833213 2.833213 301 +quarter 4 47 3.091042 12.364168 389 +formal 1 37 3.332205 3.332205 478 +short 1 36 3.367296 3.367296 499 +winter 1 36 3.367296 3.367296 500 +richard 1 31 3.496508 3.496508 559 +ladner 2 6 5.164786 10.329572 2062 +ladnerrichard 1 1 6.957497 6.957497 15799 +ladnerprofessor 1 1 6.957497 6.957497 15800 +biographyresearch 1 1 6.957497 6.957497 15801 +studentsteachingcomput 1 1 6.957497 6.957497 15802 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html new file mode 100644 index 00000000..628d09c0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^larry^index.html @@ -0,0 +1,113 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +work 3 380 0.693147 2.079441 9 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +washington 4 236 1.386294 5.545176 32 +design 2 213 1.386294 2.772588 25 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +develop 2 174 1.791759 3.583518 53 +recent 1 167 1.791759 1.791759 58 +area 2 144 1.945910 3.891820 80 +hall 1 146 1.945910 1.945910 65 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +technic 1 100 2.302585 2.302585 140 +sinc 1 90 2.397895 2.397895 159 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +integr 3 67 2.708050 8.124150 245 +sieg 1 69 2.708050 2.708050 260 +test 1 66 2.708050 2.708050 252 +laboratori 3 63 2.772589 8.317767 292 +function 2 62 2.772589 5.545178 275 +evalu 1 64 2.772589 2.772589 266 +creat 1 63 2.772589 2.772589 277 +room 1 59 2.833213 2.833213 301 +allow 1 53 2.944439 2.944439 333 +small 1 39 3.258097 3.258097 447 +join 1 39 3.258097 3.258097 457 +purpos 1 37 3.332205 3.332205 481 +cost 1 37 3.332205 3.332205 480 +staff 2 36 3.367296 6.734592 490 +ofth 1 36 3.367296 3.367296 491 +return 1 34 3.401197 3.401197 502 +board 1 33 3.433987 3.433987 528 +articl 1 33 3.433987 3.433987 530 +focu 1 30 3.555348 3.555348 571 +packag 1 28 3.610918 3.610918 614 +symbol 1 27 3.637586 3.637586 620 +primari 1 25 3.737670 3.737670 669 +ofwashington 1 22 3.850148 3.850148 766 +director 1 22 3.850148 3.850148 767 +chip 2 21 3.912023 7.824046 770 +voic 1 21 3.912023 3.912023 806 +vlsi 1 21 3.912023 3.912023 795 +supervis 1 20 3.951244 3.951244 840 +spars 1 16 4.174387 4.174387 989 +later 1 15 4.248495 4.248495 1043 +driven 1 15 4.248495 4.248495 1048 +larri 6 13 4.382027 26.292162 1142 +calcul 1 12 4.465908 4.465908 1268 +captur 1 12 4.465908 4.465908 1232 +fpga 3 10 4.653960 13.961880 1433 +matric 1 10 4.653960 4.653960 1399 +router 1 8 4.875197 4.875197 1772 +upcom 1 8 4.875197 4.875197 1685 +northwest 1 7 5.010635 5.010635 1973 +densiti 1 7 5.010635 5.010635 1927 +quantum 1 6 5.164786 5.164786 2214 +chemistri 3 5 5.347108 16.041324 2405 +mcmurchi 2 4 5.568345 11.136690 2757 +western 1 4 5.568345 5.568345 3062 +comprehens 1 4 5.568345 5.568345 2745 +andengin 1 4 5.568345 5.568345 3042 +coauthor 1 4 5.568345 5.568345 3064 +tester 1 4 5.568345 5.568345 2754 +triptych 1 4 5.568345 5.568345 3061 +mactest 2 3 5.857933 11.715866 3972 +ofintegr 1 2 6.263398 6.263398 5324 +gaussian 1 2 6.263398 6.263398 4763 +molecul 1 2 6.263398 6.263398 5246 +representationof 1 2 6.263398 6.263398 4119 +andha 1 2 6.263398 6.263398 5955 +mcmurchiedepart 1 1 6.957497 6.957497 15803 +integratedsystem 1 1 6.957497 6.957497 15804 +hework 1 1 6.957497 6.957497 15805 +theconstruct 1 1 6.957497 6.957497 15806 +hamiltonian 1 1 6.957497 6.957497 15807 +coauthorof 1 1 6.957497 6.957497 15808 +meld 1 1 6.957497 6.957497 15809 +abinitio 1 1 6.957497 6.957497 15810 +wirec 1 1 6.957497 6.957497 15811 +aschemat 1 1 6.957497 6.957497 15812 +withschemat 1 1 6.957497 6.957497 15813 +concis 1 1 6.957497 6.957497 15814 +parameteriz 1 1 6.957497 6.957497 15815 +andcommerci 1 1 6.957497 6.957497 15816 +hardwareenviron 1 1 6.957497 6.957497 15817 +andsubsystem 1 1 6.957497 6.957497 15818 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ new file mode 100644 index 00000000..d54bc0d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^leveson^ @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 8 443 0.693147 5.545176 6 +research 4 431 0.693147 2.772588 10 +inform 3 412 0.693147 2.079441 8 +work 2 380 0.693147 1.386294 9 +engin 4 297 1.098612 4.394448 20 +project 3 340 1.098612 3.295836 18 +student 2 343 1.098612 2.197224 19 +current 2 284 1.098612 2.197224 21 +time 1 293 1.098612 1.098612 17 +softwar 10 220 1.386294 13.862940 30 +design 4 213 1.386294 5.545176 25 +languag 3 227 1.386294 4.158882 26 +gener 3 220 1.386294 4.158882 27 +washington 2 236 1.386294 2.772588 32 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +paper 3 205 1.609438 4.828314 38 +includ 2 208 1.609438 3.218876 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +recent 3 167 1.791759 5.375277 58 +avail 3 169 1.791759 5.375277 48 +read 1 154 1.791759 1.791759 47 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +model 6 145 1.945910 11.675460 69 +professor 4 137 1.945910 7.783640 76 +year 2 148 1.945910 3.891820 84 +area 2 144 1.945910 3.891820 80 +hall 1 146 1.945910 1.945910 65 +problem 1 147 1.945910 1.945910 75 +like 1 132 1.945910 1.945910 81 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +analysi 4 124 2.079442 8.317768 98 +machin 3 129 2.079442 6.238326 95 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +specif 4 106 2.197225 8.788900 106 +topic 2 114 2.197225 4.394450 110 +pleas 1 113 2.197225 2.197225 114 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +real 2 93 2.397895 4.795790 144 +search 1 95 2.397895 2.397895 155 +question 1 91 2.397895 2.397895 141 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +member 3 84 2.484907 7.454721 165 +build 2 85 2.484907 4.969814 184 +requir 2 81 2.484907 4.969814 167 +start 1 83 2.484907 2.484907 173 +learn 1 86 2.484907 2.484907 170 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +interfac 3 79 2.564949 7.694847 209 +come 1 78 2.564949 2.564949 202 +state 1 76 2.564949 2.564949 207 +issu 1 78 2.564949 2.564949 211 +nation 2 74 2.639057 5.278114 240 +appli 2 71 2.639057 5.278114 226 +involv 1 71 2.639057 2.639057 227 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +result 1 65 2.772589 2.772589 281 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +copi 1 63 2.772589 2.772589 284 +interact 1 62 2.772589 2.772589 270 +space 2 57 2.890372 5.780744 310 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +finger 2 52 2.995732 5.991464 354 +life 2 50 3.044522 6.089044 375 +principl 1 48 3.044522 3.044522 357 +california 2 46 3.091042 6.182084 388 +favorit 1 44 3.135494 3.135494 410 +autom 1 41 3.218876 3.218876 434 +editor 1 41 3.218876 3.218876 433 +join 1 39 3.258097 3.258097 457 +form 1 39 3.258097 3.258097 443 +late 1 40 3.258097 3.258097 439 +transact 1 39 3.258097 3.258097 438 +field 1 37 3.332205 3.332205 482 +tree 1 36 3.367296 3.367296 492 +committe 1 34 3.401197 3.401197 522 +award 1 34 3.401197 3.401197 523 +toler 1 33 3.433987 3.433987 533 +board 1 33 3.433987 3.433987 528 +human 4 32 3.465736 13.862944 546 +fault 2 32 3.465736 6.931472 547 +express 1 32 3.465736 3.465736 540 +titl 1 31 3.496508 3.496508 556 +specifi 2 30 3.555348 7.110696 568 +produc 1 30 3.555348 3.555348 572 +chair 1 29 3.583519 3.583519 596 +except 1 28 3.610918 3.610918 607 +actual 1 28 3.610918 3.610918 604 +though 1 27 3.637586 3.637586 622 +determin 1 27 3.637586 3.637586 630 +spent 1 25 3.737670 3.737670 676 +concern 1 25 3.737670 3.737670 666 +never 1 25 3.737670 3.737670 671 +wai 1 25 3.737670 3.737670 662 +fellow 1 24 3.761200 3.761200 701 +properti 1 22 3.850148 3.850148 749 +director 1 22 3.850148 3.850148 767 +avoid 2 21 3.912023 7.824046 799 +fact 1 21 3.912023 3.912023 780 +safeti 7 20 3.951244 27.658708 817 +verif 1 20 3.951244 3.951244 826 +citi 1 19 4.007333 4.007333 874 +failur 1 18 4.060443 4.060443 898 +seem 1 18 4.060443 4.060443 899 +behavior 1 18 4.060443 4.060443 881 +engineeringunivers 1 17 4.110874 4.110874 959 +analyz 1 17 4.110874 4.110874 925 +anyth 2 16 4.174387 8.348774 998 +advantag 1 16 4.174387 4.174387 987 +commerci 1 16 4.174387 4.174387 1005 +weslei 1 16 4.174387 4.174387 983 +anywai 1 15 4.248495 4.248495 1047 +contribut 1 15 4.248495 4.248495 1021 +qual 1 15 4.248495 4.248495 1062 +style 1 15 4.248495 4.248495 1036 +train 1 14 4.317488 4.317488 1066 +deriv 2 13 4.382027 8.764054 1145 +washingtonbox 1 13 4.382027 4.382027 1200 +conf 1 13 4.382027 4.382027 1181 +nanci 3 12 4.465908 13.397724 1256 +safe 2 12 4.465908 8.931816 1274 +addison 1 12 4.465908 4.465908 1230 +council 2 11 4.553877 9.107754 1364 +valid 1 11 4.553877 4.553877 1299 +leveson 7 9 4.753590 33.275130 1540 +mode 1 9 4.753590 4.753590 1492 +irvin 1 8 4.875197 4.875197 1660 +matter 1 8 4.875197 4.875197 1627 +claim 1 8 4.875197 4.875197 1664 +elect 1 8 4.875197 4.875197 1771 +analys 1 8 4.875197 4.875197 1666 +perhap 1 8 4.875197 4.875197 1693 +chief 1 7 5.010635 5.010635 1829 +awar 1 7 5.010635 5.010635 1800 +rain 1 6 5.164786 5.164786 2137 +highwai 1 6 5.164786 5.164786 2095 +softwareengin 1 6 5.164786 5.164786 2162 +ucla 1 5 5.347108 5.347108 2502 +lesson 1 5 5.347108 5.347108 2568 +adopt 1 5 5.347108 5.347108 2467 +aircraft 4 4 5.568345 22.273380 2872 +melbourn 1 4 5.568345 5.568345 3035 +rsml 2 3 5.857933 11.715866 3967 +loss 1 3 5.857933 5.857933 3805 +automobil 1 3 5.857933 5.857933 3709 +aerospac 1 3 5.857933 5.857933 3555 +hazard 1 3 5.857933 5.857933 3191 +tca 2 2 6.263398 12.526796 5941 +collis 2 2 6.263398 12.526796 5956 +nobodi 1 2 6.263398 6.263398 5474 +thatyou 1 2 6.263398 6.263398 4682 +computingresearch 1 2 6.263398 6.263398 5957 +shuttl 1 2 6.263398 6.263398 4787 +aiaa 1 2 6.263398 6.263398 5239 +aeronaut 1 2 6.263398 6.263398 5958 +andscienc 1 2 6.263398 6.263398 5796 +safewar 1 2 6.263398 6.263398 5959 +isalso 1 2 6.263398 6.263398 5640 +pressur 1 2 6.263398 6.263398 5960 +accid 1 2 6.263398 6.263398 5961 +airport 1 2 6.263398 6.263398 5962 +levesondepart 1 1 6.957497 6.957497 15819 +mathand 1 1 6.957497 6.957497 15820 +misanthrop 1 1 6.957497 6.957497 15821 +aform 1 1 6.957497 6.957497 15822 +airspac 1 1 6.957497 6.957497 15823 +theiroffici 1 1 6.957497 6.957497 15824 +safetyresearch 1 1 6.957497 6.957497 15825 +subtop 1 1 6.957497 6.957497 15826 +commissionon 1 1 6.957497 6.957497 15827 +levesoni 1 1 6.957497 6.957497 15828 +systemsaward 1 1 6.957497 6.957497 15829 +promotingrespons 1 1 6.957497 6.957497 15830 +propertyar 1 1 6.957497 6.957497 15831 +stake 1 1 6.957497 6.957497 15832 +keynoteaddress 1 1 6.957497 6.957497 15833 +steam 1 1 6.957497 6.957497 15834 +hazardanalysi 1 1 6.957497 6.957497 15835 +writtenin 1 1 6.957497 6.957497 15836 +newrequir 1 1 6.957497 6.957497 15837 +cockpit 1 1 6.957497 6.957497 15838 +problemsand 1 1 6.957497 6.957497 15839 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ new file mode 100644 index 00000000..69fbd352 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^ @@ -0,0 +1,203 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 17 443 0.693147 11.783499 6 +research 3 431 0.693147 2.079441 10 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 3 343 1.098612 3.295836 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +languag 4 227 1.386294 5.545176 26 +washington 3 236 1.386294 4.158882 32 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +oper 12 180 1.609438 19.313256 34 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +distribut 4 162 1.791759 7.167036 51 +parallel 2 169 1.791759 3.583518 60 +recent 2 167 1.791759 3.583518 58 +address 2 170 1.791759 3.583518 62 +implement 2 152 1.791759 3.583518 52 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +data 1 170 1.791759 1.791759 49 +architectur 7 139 1.945910 13.621370 77 +support 4 132 1.945910 7.783640 83 +professor 1 137 1.945910 1.945910 76 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +tool 1 117 2.079442 2.079442 93 +seattl 1 120 2.079442 2.079442 103 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +intern 2 108 2.197225 4.394450 128 +manag 1 114 2.197225 2.197225 125 +memori 2 101 2.302585 4.605170 139 +book 1 99 2.302585 2.302585 131 +octob 3 89 2.397895 7.193685 156 +select 1 91 2.397895 2.397895 154 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +environ 2 84 2.484907 4.969814 177 +novemb 2 81 2.484907 4.969814 179 +academ 1 82 2.484907 2.484907 178 +help 1 83 2.484907 2.484907 175 +control 1 82 2.484907 2.484907 164 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +master 1 76 2.564949 2.564949 216 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +symposium 5 72 2.639057 13.195285 238 +effici 1 73 2.639057 2.639057 233 +integr 1 67 2.708050 2.708050 245 +plai 1 60 2.833213 2.833213 307 +share 1 59 2.833213 2.833213 304 +space 2 57 2.890372 5.780744 310 +faculti 1 56 2.890372 2.890372 325 +special 1 56 2.890372 2.890372 320 +major 1 56 2.890372 2.890372 315 +sampl 1 53 2.944439 2.944439 339 +instruct 1 53 2.944439 2.944439 332 +processor 1 54 2.944439 2.944439 335 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +principl 3 48 3.044522 9.133566 357 +numer 1 49 3.044522 3.044522 369 +execut 1 45 3.135494 3.135494 404 +join 1 39 3.258097 3.258097 457 +author 1 39 3.258097 3.258097 450 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +workstat 3 37 3.332205 9.996615 479 +michael 3 35 3.401197 10.203591 514 +singl 2 34 3.401197 6.802394 510 +global 2 34 3.401197 6.802394 520 +award 1 34 3.401197 3.401197 523 +posit 1 31 3.496508 3.496508 552 +focu 1 30 3.555348 3.555348 571 +produc 1 30 3.555348 3.555348 572 +rang 1 30 3.555348 3.555348 565 +chair 2 29 3.583519 7.167038 596 +held 1 28 3.610918 3.610918 600 +usual 1 28 3.610918 3.610918 608 +cluster 1 28 3.610918 3.610918 612 +except 1 28 3.610918 3.610918 607 +team 1 27 3.637586 3.637586 625 +proc 6 26 3.688879 22.133274 649 +consult 1 24 3.761200 3.761200 687 +fellow 1 24 3.761200 3.761200 701 +lab 1 24 3.761200 3.761200 698 +handl 1 24 3.761200 3.761200 685 +lead 1 23 3.806662 3.806662 718 +deal 1 22 3.850148 3.850148 736 +william 1 22 3.850148 3.850148 765 +corpor 1 21 3.912023 3.912023 802 +chip 1 21 3.912023 3.912023 770 +binari 1 20 3.951244 3.951244 823 +supervis 1 20 3.951244 3.951244 840 +tenni 1 20 3.951244 3.951244 838 +exploit 1 20 3.951244 3.951244 836 +particularli 1 19 4.007333 4.007333 867 +separ 1 19 4.007333 4.007333 844 +four 1 18 4.060443 4.060443 905 +asplo 2 17 4.110874 8.221748 948 +former 1 17 4.110874 4.110874 956 +protect 1 17 4.110874 4.110874 935 +latenc 1 16 4.174387 4.174387 993 +choic 1 16 4.174387 4.174387 979 +transfer 1 16 4.174387 4.174387 967 +susan 2 15 4.248495 8.496990 1050 +levi 12 14 4.317488 51.809856 1093 +dean 2 14 4.317488 8.634976 1104 +coher 1 14 4.317488 4.317488 1109 +karlin 2 13 4.382027 8.764054 1176 +conf 2 13 4.382027 8.764054 1181 +mellon 1 13 4.382027 4.382027 1179 +hank 4 12 4.465908 17.863632 1253 +carnegi 1 12 4.465908 4.465908 1260 +anna 1 12 4.465908 4.465908 1292 +multithread 3 11 4.553877 13.661631 1315 +thedepart 1 11 4.553877 4.553877 1350 +henri 6 10 4.653960 27.923760 1417 +equip 1 10 4.653960 4.653960 1459 +ski 1 10 4.653960 4.653960 1471 +bike 1 10 4.653960 4.653960 1468 +death 1 10 4.653960 4.653960 1457 +jeffrei 2 9 4.753590 9.507180 1612 +softbal 1 9 4.753590 4.753590 1594 +voelker 1 9 4.753590 4.753590 1557 +vernon 1 9 4.753590 4.753590 1556 +morgan 1 9 4.753590 4.753590 1484 +egger 2 8 4.875197 9.750394 1695 +sigop 1 8 4.875197 4.875197 1727 +hold 1 8 4.875197 4.875197 1645 +span 1 8 4.875197 4.875197 1751 +evan 1 8 4.875197 4.875197 1633 +inproceed 1 8 4.875197 4.875197 1670 +jack 1 8 4.875197 4.875197 1780 +feelei 4 7 5.010635 20.042540 1859 +instrument 1 7 5.010635 5.010635 1954 +smile 1 7 5.010635 5.010635 1807 +maxim 1 7 5.010635 5.010635 1944 +simultan 2 6 5.164786 10.329572 2155 +tullsen 2 6 5.164786 10.329572 2081 +outstand 1 6 5.164786 5.164786 2136 +onoper 1 6 5.164786 5.164786 2048 +tobe 1 6 5.164786 5.164786 1995 +scholar 1 6 5.164786 5.164786 2180 +nine 1 6 5.164786 5.164786 2047 +rebecca 1 6 5.164786 5.164786 2174 +vivek 1 6 5.164786 5.164786 2210 +theth 1 5 5.347108 5.347108 2325 +seventh 1 5 5.347108 5.347108 2464 +fetch 1 5 5.347108 5.347108 2567 +chase 2 4 5.568345 11.136690 2897 +lazowska 2 4 5.568345 11.136690 2694 +arch 2 4 5.568345 11.136690 2995 +prog 2 4 5.568345 11.136690 2740 +opal 1 4 5.568345 5.568345 3057 +fulbright 1 4 5.568345 5.568345 2963 +escap 1 4 5.568345 5.568345 3016 +pighin 1 4 5.568345 5.568345 2735 +narasayya 1 4 5.568345 5.568345 3065 +thekkath 3 3 5.857933 17.573799 3973 +recipi 1 3 5.857933 5.857933 3627 +eleven 1 3 5.857933 5.857933 3824 +freder 1 3 5.857933 5.857933 3352 +emer 1 3 5.857933 5.857933 3969 +stamm 1 3 5.857933 5.857933 3970 +dessert 2 2 6.263398 12.526796 5194 +projecti 1 2 6.263398 6.263398 5963 +befound 1 2 6.263398 6.263398 5964 +infam 1 2 6.263398 6.263398 5859 +subpag 1 2 6.263398 6.263398 5926 +jamrozik 1 2 6.263398 6.263398 5925 +chandramohan 1 2 6.263398 6.263398 5965 +projectcal 1 1 6.957497 6.957497 15840 +theetch 1 1 6.957497 6.957497 15841 +consecutiveacm 1 1 6.957497 6.957497 15842 +symposia 1 1 6.957497 6.957497 15843 +universityand 1 1 6.957497 6.957497 15844 +machineryand 1 1 6.957497 6.957497 15845 +survivedlevi 1 1 6.957497 6.957497 15846 +haveal 1 1 6.957497 6.957497 15847 +glu 1 1 6.957497 6.957497 15848 +potato 1 1 6.957497 6.957497 15849 +parlor 1 1 6.957497 6.957497 15850 +publicationsreduc 1 1 6.957497 6.957497 15851 +implementablesimultan 1 1 6.957497 6.957497 15852 +joen 1 1 6.957497 6.957497 15853 +edwardd 1 1 6.957497 6.957497 15854 +recover 1 1 6.957497 6.957497 15855 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html new file mode 100644 index 00000000..504c9b6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^levy^opal^opal.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +oper 3 180 1.609438 4.828314 34 +list 2 201 1.609438 3.218876 39 +paper 1 205 1.609438 1.609438 38 +address 5 170 1.791759 8.958795 62 +data 2 170 1.791759 3.583518 49 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +avail 1 169 1.791759 1.791759 48 +relat 2 139 1.945910 3.891820 68 +object 1 138 1.945910 1.945910 79 +perform 1 143 1.945910 1.945910 74 +provid 1 121 2.079442 2.079442 94 +structur 4 106 2.197225 8.788900 105 +code 1 108 2.197225 2.197225 116 +need 2 98 2.302585 4.605170 135 +access 2 102 2.302585 4.605170 136 +larg 2 82 2.484907 4.969814 168 +chang 1 82 2.484907 2.484907 163 +member 1 84 2.484907 2.484907 165 +dynam 1 76 2.564949 2.564949 194 +sourc 1 77 2.564949 2.564949 201 +differ 1 66 2.708050 2.708050 253 +complex 2 64 2.772589 5.545178 269 +virtual 1 62 2.772589 2.772589 285 +result 1 65 2.772589 2.772589 281 +organ 1 65 2.772589 2.772589 265 +share 4 59 2.833213 11.332852 304 +space 4 57 2.890372 11.561488 310 +explor 1 58 2.890372 2.890372 324 +faculti 1 56 2.890372 2.890372 325 +much 1 52 2.995732 2.995732 349 +right 2 48 3.044522 6.089044 363 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +archiv 1 49 3.044522 3.044522 364 +execut 1 45 3.135494 3.135494 404 +anoth 1 45 3.135494 3.135494 408 +prototyp 1 38 3.295837 3.295837 463 +ofth 1 36 3.367296 3.367296 491 +singl 4 34 3.401197 13.604788 510 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +storag 1 31 3.496508 3.496508 553 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +depend 1 29 3.583519 3.583519 583 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +manipul 1 27 3.637586 3.637586 624 +enhanc 1 26 3.688879 3.688879 644 +jeff 1 25 3.737670 3.737670 673 +interpret 1 24 3.761200 3.761200 686 +mike 1 24 3.761200 3.761200 703 +thread 1 23 3.806662 3.806662 722 +cooper 2 22 3.850148 7.700296 757 +defin 1 22 3.850148 3.850148 746 +thu 1 21 3.912023 3.912023 773 +flexibl 1 21 3.912023 3.912023 792 +protect 3 17 4.110874 12.332622 935 +permit 1 16 4.174387 4.174387 962 +easili 1 14 4.317488 4.317488 1077 +levi 1 14 4.317488 4.317488 1093 +directli 1 13 4.382027 4.382027 1141 +translat 1 13 4.382027 4.382027 1164 +believ 1 13 4.382027 4.382027 1187 +uniqu 1 12 4.465908 4.465908 1228 +hank 1 12 4.465908 4.465908 1253 +alpha 2 11 4.553877 9.107754 1348 +persist 1 11 4.553877 4.553877 1367 +trust 1 9 4.753590 4.753590 1583 +parti 1 8 4.875197 4.875197 1676 +mach 1 8 4.875197 4.875197 1669 +dylan 1 8 4.875197 4.875197 1625 +secondari 1 7 5.010635 5.010635 1884 +feelei 1 7 5.010635 5.010635 1859 +huge 1 6 5.164786 5.164786 1991 +bestor 1 6 5.164786 5.164786 2099 +risc 1 6 5.164786 5.164786 2016 +duke 1 6 5.164786 5.164786 2231 +vivek 1 6 5.164786 5.164786 2210 +tiwari 1 5 5.347108 5.347108 2385 +opal 7 4 5.568345 38.978415 3057 +simplifi 1 4 5.568345 5.568345 3066 +mip 1 4 5.568345 5.568345 2738 +transmit 1 4 5.568345 5.568345 2835 +lazowska 1 4 5.568345 5.568345 2694 +chase 1 4 5.568345 5.568345 2897 +narasayya 1 4 5.568345 5.568345 3065 +databaseof 1 2 6.263398 6.263398 4696 +ashutosh 1 2 6.263398 6.263398 5966 +mcname 1 2 6.263398 6.263398 5875 +projectop 1 1 6.957497 6.957497 15856 +tunedto 1 1 6.957497 6.957497 15857 +numberof 1 1 6.957497 6.957497 15858 +andcooper 1 1 6.957497 6.957497 15859 +directlycommun 1 1 6.957497 6.957497 15860 +addressspac 1 1 6.957497 6.957497 15861 +domainthat 1 1 6.957497 6.957497 15862 +oneprocess 1 1 6.957497 6.957497 15863 +protectionstructur 1 1 6.957497 6.957497 15864 +relationshipbetween 1 1 6.957497 6.957497 15865 +canimprov 1 1 6.957497 6.957497 15866 +cooperatingappl 1 1 6.957497 6.957497 15867 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ new file mode 100644 index 00000000..975d57fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^lopez^ @@ -0,0 +1,140 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +program 4 374 0.693147 2.772588 7 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +engin 3 297 1.098612 3.295836 20 +student 1 343 1.098612 1.098612 19 +languag 4 227 1.386294 5.545176 26 +washington 3 236 1.386294 4.158882 32 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +implement 2 152 1.791759 3.583518 52 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +object 6 138 1.945910 11.675460 79 +hall 1 146 1.945910 1.945910 65 +year 1 148 1.945910 1.945910 84 +note 1 142 1.945910 1.945910 67 +confer 3 126 2.079442 6.238326 100 +technolog 2 131 2.079442 4.158884 102 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +high 1 130 2.079442 2.079442 101 +topic 2 114 2.197225 4.394450 110 +well 1 109 2.197225 2.197225 121 +advanc 2 99 2.302585 4.605170 130 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +octob 1 89 2.397895 2.397895 156 +school 1 84 2.484907 2.484907 188 +orient 5 80 2.564949 12.824745 205 +issu 1 78 2.564949 2.564949 211 +meet 1 72 2.639057 2.639057 229 +sieg 1 69 2.708050 2.708050 260 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +virtual 1 62 2.772589 2.772589 285 +previou 1 62 2.772589 2.772589 290 +juli 1 60 2.833213 2.833213 305 +march 1 61 2.833213 2.833213 295 +direct 1 57 2.890372 2.890372 316 +cover 1 55 2.944439 2.944439 329 +archiv 1 49 3.044522 3.044522 364 +electron 2 47 3.091042 6.182084 379 +california 1 46 3.091042 3.091042 388 +editor 1 41 3.218876 3.218876 433 +past 1 42 3.218876 3.218876 428 +tutori 8 39 3.258097 26.064776 437 +submit 1 39 3.258097 3.258097 440 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +industri 1 38 3.295837 3.295837 464 +respons 1 37 3.332205 3.332205 476 +curriculum 1 33 3.433987 3.433987 535 +chair 2 29 3.583519 7.167038 596 +consid 1 29 3.583519 3.583519 590 +propos 4 28 3.610918 14.443672 602 +constraint 5 26 3.688879 18.444395 636 +request 1 26 3.688879 3.688879 635 +aspect 1 25 3.737670 3.737670 663 +seri 1 24 3.761200 3.761200 708 +known 1 24 3.761200 3.761200 702 +lead 1 23 3.806662 3.806662 718 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +born 3 21 3.912023 11.736069 798 +qualiti 1 20 3.951244 3.951244 832 +accept 3 18 4.060443 12.181329 879 +encourag 2 18 4.060443 8.120886 880 +anyon 1 17 4.110874 4.110874 916 +jose 1 16 4.174387 4.174387 976 +alan 3 13 4.382027 13.146081 1146 +speak 1 12 4.465908 4.465908 1283 +readi 1 12 4.465908 4.465908 1242 +submiss 2 11 4.553877 9.107754 1298 +itali 1 11 4.553877 4.553877 1378 +inproceed 2 8 4.875197 9.750394 1670 +european 1 8 4.875197 4.875197 1763 +upcom 1 8 4.875197 4.875197 1685 +depth 1 8 4.875197 4.875197 1636 +edg 1 8 4.875197 4.875197 1647 +star 1 8 4.875197 4.875197 1717 +portland 1 7 5.010635 5.010635 1878 +oopsla 6 6 5.164786 30.988716 2221 +oregon 1 5 5.347108 5.347108 2437 +imper 3 4 5.568345 16.705035 3067 +freeman 3 4 5.568345 16.705035 2725 +ident 1 4 5.568345 5.568345 2826 +andsoftwar 1 4 5.568345 5.568345 2753 +breadth 1 4 5.568345 5.568345 2695 +green 1 4 5.568345 5.568345 2848 +nato 1 3 5.857933 5.857933 3587 +kaleidoscop 2 2 6.263398 12.526796 5780 +bologna 1 2 6.263398 6.263398 5631 +programmingsystem 1 2 6.263398 6.263398 5688 +hotlin 1 2 6.263398 6.263398 5967 +hendrix 1 2 6.263398 6.263398 5490 +grave 1 2 6.263398 6.263398 5968 +war 1 2 6.263398 6.263398 5969 +collector 1 2 6.263398 6.263398 5683 +lopez 3 1 6.957497 20.872491 15868 +bjorn 3 1 6.957497 20.872491 15869 +benson 3 1 6.957497 20.872491 15870 +lopezgu 1 1 6.957497 6.957497 15871 +lopezlopez 1 1 6.957497 6.957497 15872 +dissertationresearch 1 1 6.957497 6.957497 15873 +publicationsgu 1 1 6.957497 6.957497 15874 +mayoh 1 1 6.957497 6.957497 15875 +tougu 1 1 6.957497 6.957497 15876 +jann 1 1 6.957497 6.957497 15877 +penjam 1 1 6.957497 6.957497 15878 +constraintprogram 1 1 6.957497 6.957497 15879 +instituteseri 1 1 6.957497 6.957497 15880 +publisheda 1 1 6.957497 6.957497 15881 +tutorialsi 1 1 6.957497 6.957497 15882 +conferencein 1 1 6.957497 6.957497 15883 +itsextens 1 1 6.957497 6.957497 15884 +tutorialshav 1 1 6.957497 6.957497 15885 +introductorysurvei 1 1 6.957497 6.957497 15886 +academicresearch 1 1 6.957497 6.957497 15887 +attende 1 1 6.957497 6.957497 15888 +weespeci 1 1 6.957497 6.957497 15889 +requestguidelin 1 1 6.957497 6.957497 15890 +theoopsla 1 1 6.957497 6.957497 15891 +enthusiast 1 1 6.957497 6.957497 15892 +proposalswithout 1 1 6.957497 6.957497 15893 +notif 1 1 6.957497 6.957497 15894 +withcamera 1 1 6.957497 6.957497 15895 +jimi 1 1 6.957497 6.957497 15896 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ new file mode 100644 index 00000000..b2ddba98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^madani^ @@ -0,0 +1,41 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +year 1 148 1.945910 1.945910 84 +like 1 132 1.945910 1.945910 81 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +look 1 107 2.197225 2.197225 115 +graphic 1 90 2.397895 2.397895 147 +academ 1 82 2.484907 2.484907 178 +want 1 79 2.564949 2.564949 199 +best 1 59 2.833213 2.833213 299 +browser 1 56 2.890372 2.890372 313 +life 1 50 3.044522 3.044522 375 +keep 1 44 3.135494 3.135494 409 +citi 1 19 4.007333 4.007333 874 +chateau 1 16 4.174387 4.174387 997 +fourth 1 16 4.174387 4.174387 999 +countri 1 15 4.248495 4.248495 1059 +suit 1 13 4.382027 4.382027 1129 +touch 1 12 4.465908 4.465908 1288 +curiou 1 5 5.347108 5.347108 2541 +areasinclud 1 2 6.263398 6.263398 5747 +omid 2 1 6.957497 13.914994 15897 +madani 2 1 6.957497 13.914994 15898 +bhello 1 1 6.957497 6.957497 15899 +enjoytheori 1 1 6.957497 6.957497 15900 +islamicarchitectur 1 1 6.957497 6.957497 15901 +isfahan 1 1 6.957497 6.957497 15902 +nomine 1 1 6.957497 6.957497 15903 +iran 1 1 6.957497 6.957497 15904 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^map^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^map^ new file mode 100644 index 00000000..d740431a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^map^ @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +washington 1 236 1.386294 1.386294 32 +area 1 144 1.945910 1.945910 80 +written 1 63 2.772589 2.772589 278 +music 1 42 3.218876 3.218876 436 +mike 3 24 3.761200 11.283600 703 +goe 1 15 4.248495 4.248495 1044 +creativ 1 8 4.875197 4.875197 1777 +academia 1 6 5.164786 5.164786 2036 +perkowitz 2 2 6.263398 12.526796 5970 +perkowitznewsflash 1 1 6.957497 6.957497 15905 +blond 1 1 6.957497 6.957497 15906 +randomfavorit 1 1 6.957497 6.957497 15907 +sheba 1 1 6.957497 6.957497 15908 +voyeur 1 1 6.957497 6.957497 15909 +grooveneedl 1 1 6.957497 6.957497 15910 +espressoresumemik 1 1 6.957497 6.957497 15911 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ new file mode 100644 index 00000000..b5671a3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marclang^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +depart 3 457 0.693147 2.079441 12 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +washington 5 236 1.386294 6.931470 32 +email 2 220 1.386294 2.772588 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +check 1 115 2.197225 2.197225 118 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +follow 1 92 2.397895 2.397895 143 +resum 1 79 2.564949 2.564949 217 +sieg 1 69 2.708050 2.708050 260 +visit 1 63 2.772589 2.772589 288 +septemb 1 65 2.772589 2.772589 274 +back 1 60 2.833213 2.833213 297 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +http 1 41 3.218876 3.218876 420 +short 1 36 3.367296 3.367296 499 +spent 1 25 3.737670 3.737670 676 +finish 1 22 3.850148 3.850148 748 +theunivers 1 21 3.912023 3.912023 797 +germani 1 17 4.110874 4.110874 946 +marc 1 8 4.875197 4.875197 1680 +german 1 6 5.164786 5.164786 2190 +langheinrich 3 1 6.957497 20.872491 15912 +bielefeld 3 1 6.957497 20.872491 15913 +marclang 3 1 6.957497 20.872491 15914 +homepagemarc 2 1 6.957497 13.914994 15915 +langheinrichuniversitt 1 1 6.957497 6.957497 15916 +washingtontechnisch 1 1 6.957497 6.957497 15917 +fakultt 1 1 6.957497 6.957497 15918 +scienceemail 1 1 6.957497 6.957497 15919 +imlangh 1 1 6.957497 6.957497 15920 +techfak 1 1 6.957497 6.957497 15921 +eduabout 1 1 6.957497 6.957497 15922 +myselfi 1 1 6.957497 6.957497 15923 +thefulbright 1 1 6.957497 6.957497 15924 +depthinform 1 1 6.957497 6.957497 15925 +biopost 1 1 6.957497 6.957497 15926 +addressa 1 1 6.957497 6.957497 15927 +mastersat 1 1 6.957497 6.957497 15928 +homeschoolgermanyringstra 1 1 6.957497 6.957497 15929 +maintalphon 1 1 6.957497 6.957497 15930 +paulusplatz 1 1 6.957497 6.957497 15931 +bielefeldphon 1 1 6.957497 6.957497 15932 +woodlawn 1 1 6.957497 6.957497 15933 +formatmarc 1 1 6.957497 6.957497 15934 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ new file mode 100644 index 00000000..e23b5d89 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^marla^ @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +program 5 374 0.693147 3.465735 7 +work 4 380 0.693147 2.772588 9 +system 3 443 0.693147 2.079441 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +current 2 284 1.098612 2.197224 21 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +softwar 5 220 1.386294 6.931470 30 +languag 5 227 1.386294 6.931470 26 +washington 3 236 1.386294 4.158882 32 +also 1 259 1.386294 1.386294 28 +applic 1 170 1.791759 1.791759 56 +object 2 138 1.945910 3.891820 79 +support 1 132 1.945910 1.945910 83 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +check 1 115 2.197225 2.197225 118 +user 2 104 2.302585 4.605170 137 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +imag 1 91 2.397895 2.397895 161 +educ 2 86 2.484907 4.969814 191 +journal 2 83 2.484907 4.969814 183 +larg 2 82 2.484907 4.969814 168 +academ 1 82 2.484907 2.484907 178 +learn 1 86 2.484907 2.484907 170 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +environ 1 84 2.484907 2.484907 177 +interfac 2 79 2.564949 5.129898 209 +resum 1 79 2.564949 2.564949 217 +june 1 79 2.564949 2.564949 214 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +order 1 69 2.708050 2.708050 249 +goal 1 66 2.708050 2.708050 250 +differ 1 66 2.708050 2.708050 253 +sieg 1 69 2.708050 2.708050 260 +interact 2 62 2.772589 5.545178 270 +septemb 1 65 2.772589 2.772589 274 +share 1 59 2.833213 2.833213 304 +march 1 61 2.833213 2.833213 295 +explor 1 58 2.890372 2.890372 324 +space 1 57 2.890372 2.890372 310 +special 1 56 2.890372 2.890372 320 +investig 1 51 2.995732 2.995732 353 +visual 9 48 3.044522 27.400698 372 +editor 1 41 3.218876 3.218876 433 +compani 1 41 3.218876 3.218876 423 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +tutori 1 39 3.258097 3.258097 437 +collabor 2 32 3.465736 6.931472 543 +human 1 32 3.465736 3.465736 546 +given 1 32 3.465736 3.465736 538 +transform 1 32 3.465736 3.465736 542 +scale 1 28 3.610918 3.610918 613 +manipul 1 27 3.637586 3.637586 624 +wai 2 25 3.737670 7.475340 662 +displai 1 23 3.806662 3.806662 712 +cooper 1 22 3.850148 3.850148 757 +geometr 1 19 4.007333 4.007333 852 +hierarch 1 15 4.248495 4.248495 1018 +stephen 3 11 4.553877 13.661631 1342 +fill 1 11 4.553877 4.553877 1349 +itali 1 11 4.553877 4.553877 1378 +packard 1 10 4.653960 4.653960 1444 +classif 1 9 4.753590 4.753590 1586 +partner 1 8 4.875197 4.875197 1648 +yang 1 8 4.875197 4.875197 1652 +baker 7 7 5.010635 35.074445 1812 +chief 1 7 5.010635 5.010635 1829 +bell 1 6 5.164786 5.164786 2224 +carlson 1 5 5.347108 5.347108 2351 +patent 1 5 5.347108 5.347108 2574 +bricker 1 4 5.568345 5.568345 3050 +assess 1 4 5.568345 5.568345 2724 +lauren 1 3 5.857933 5.857933 3251 +metip 1 3 5.857933 5.857933 3937 +marla 8 2 6.263398 50.107184 4510 +eick 4 2 6.263398 25.053592 5971 +burnett 2 2 6.263398 12.526796 4578 +crime 1 2 6.263398 6.263398 5972 +cscl 1 2 6.263398 6.263398 5837 +stevetanimoto 1 2 6.263398 6.263398 5835 +bentlei 1 1 6.957497 6.957497 15935 +interestsgraph 1 1 6.957497 6.957497 15936 +coimag 1 1 6.957497 6.957497 15937 +devleop 1 1 6.957497 6.957497 15938 +contol 1 1 6.957497 6.957497 15939 +cansimultan 1 1 6.957497 6.957497 15940 +publicationsbak 1 1 6.957497 6.957497 15941 +bohu 1 1 6.957497 6.957497 15942 +margaret 1 1 6.957497 6.957497 15943 +sorento 1 1 6.957497 6.957497 15944 +apparatu 1 1 6.957497 6.957497 15945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ new file mode 100644 index 00000000..f231e0d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^matthai^ @@ -0,0 +1,88 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +work 3 380 0.693147 2.079441 9 +system 2 443 0.693147 1.386294 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +applic 1 170 1.791759 1.791759 56 +phone 1 175 1.791759 1.791759 45 +implement 1 152 1.791759 1.791759 52 +like 2 132 1.945910 3.891820 81 +support 2 132 1.945910 3.891820 83 +architectur 1 139 1.945910 1.945910 77 +compil 6 122 2.079442 12.476652 96 +seattl 2 120 2.079442 4.158884 103 +confer 1 126 2.079442 2.079442 100 +code 2 108 2.197225 4.394450 116 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +stuff 2 87 2.484907 4.969814 171 +dynam 4 76 2.564949 10.259796 194 +optim 1 79 2.564949 2.564949 197 +good 1 77 2.564949 2.564949 200 +workshop 1 71 2.639057 2.639057 239 +java 1 70 2.708050 2.708050 248 +plai 1 60 2.833213 2.833213 307 +think 1 57 2.890372 2.890372 314 +browser 1 56 2.890372 2.890372 313 +processor 1 54 2.944439 2.944439 335 +extens 1 53 2.944439 2.944439 340 +februari 1 54 2.944439 2.944439 328 +local 1 55 2.944439 2.944439 334 +effect 1 46 3.091042 3.091042 385 +fast 1 42 3.218876 3.218876 429 +past 1 42 3.218876 3.218876 428 +produc 1 30 3.555348 3.555348 572 +constraint 1 26 3.688879 3.688879 636 +bookmark 1 26 3.688879 3.688879 639 +interpret 1 24 3.761200 3.761200 686 +runtim 4 19 4.007333 16.029332 858 +figur 1 18 4.060443 4.060443 903 +bershad 1 18 4.060443 4.060443 902 +event 1 18 4.060443 4.060443 896 +modern 1 16 4.174387 4.174387 966 +side 1 15 4.248495 4.248495 1022 +goe 1 15 4.248495 4.248495 1044 +susan 1 15 4.248495 4.248495 1050 +black 1 10 4.653960 4.653960 1418 +chamber 3 8 4.875197 14.625591 1692 +egger 2 8 4.875197 9.750394 1695 +wire 1 8 4.875197 4.875197 1747 +craig 1 7 5.010635 5.010635 1879 +dispatch 1 7 5.010635 5.010635 1791 +mock 1 6 5.164786 5.164786 2087 +blue 1 6 5.164786 5.164786 2227 +philipos 4 5 5.347108 21.388432 2373 +asystem 1 4 5.568345 5.568345 2612 +andp 1 4 5.568345 5.568345 2811 +pardyak 1 4 5.568345 5.568345 3043 +ausland 2 3 5.857933 11.715866 3917 +matthai 3 2 6.263398 18.790194 4514 +withprofessor 1 2 6.263398 6.263398 5180 +eggersand 1 2 6.263398 6.263398 4522 +ribbon 1 2 6.263398 6.263398 5973 +compileri 1 1 6.957497 6.957497 15946 +beast 1 1 6.957497 6.957497 15947 +shortterm 1 1 6.957497 6.957497 15948 +basedsystem 1 1 6.957497 6.957497 15949 +canbenefit 1 1 6.957497 6.957497 15950 +onprogram 1 1 6.957497 6.957497 15951 +automaticdynam 1 1 6.957497 6.957497 15952 +frequentlymiscellan 1 1 6.957497 6.957497 15953 +importancefrom 1 1 6.957497 6.957497 15954 +abuwhi 1 1 6.957497 6.957497 15955 +campaign 1 1 6.957497 6.957497 15956 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ new file mode 100644 index 00000000..70b964e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mckenzie^ @@ -0,0 +1,175 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +inform 3 412 0.693147 2.079441 8 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +project 3 340 1.098612 3.295836 18 +current 3 284 1.098612 3.295836 21 +us 3 329 1.098612 3.295836 16 +time 2 293 1.098612 2.197224 17 +last 2 314 1.098612 2.197224 14 +student 1 343 1.098612 1.098612 19 +link 4 247 1.386294 5.545176 24 +mail 2 238 1.386294 2.772588 22 +design 2 213 1.386294 2.772588 25 +washington 2 236 1.386294 2.772588 32 +graduat 1 215 1.386294 1.386294 31 +public 2 202 1.609438 3.218876 43 +updat 2 191 1.609438 3.218876 41 +list 1 201 1.609438 1.609438 39 +contact 2 153 1.791759 3.583518 59 +implement 2 152 1.791759 3.583518 52 +network 2 168 1.791759 3.583518 61 +phone 1 175 1.791759 1.791759 45 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +parallel 1 169 1.791759 1.791759 60 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +seattl 3 120 2.079442 6.238326 103 +tool 2 117 2.079442 4.158884 93 +person 2 111 2.197225 4.394450 117 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +place 1 106 2.197225 2.197225 124 +user 1 104 2.302585 2.302585 137 +memori 1 101 2.302585 2.302585 139 +real 1 93 2.397895 2.397895 144 +call 1 91 2.397895 2.397895 153 +proceed 1 93 2.397895 2.397895 152 +pictur 1 89 2.397895 2.397895 160 +octob 1 89 2.397895 2.397895 156 +contain 1 81 2.484907 2.484907 174 +messag 2 76 2.564949 5.129898 212 +interfac 2 79 2.564949 5.129898 209 +exampl 1 77 2.564949 2.564949 195 +involv 1 71 2.639057 2.639057 227 +free 1 73 2.639057 2.639057 224 +workshop 1 71 2.639057 2.639057 239 +laboratori 1 63 2.772589 2.772589 292 +guid 1 63 2.772589 2.772589 267 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +index 1 56 2.890372 2.890372 309 +faculti 1 56 2.890372 2.890372 325 +summer 1 56 2.890372 2.890372 311 +advisor 1 51 2.995732 2.995732 355 +maintain 1 51 2.995732 2.995732 342 +adapt 1 46 3.091042 3.091042 387 +answer 1 45 3.135494 3.135494 391 +past 2 42 3.218876 6.437752 428 +futur 1 41 3.218876 3.218876 427 +live 1 40 3.258097 3.258097 451 +electr 1 38 3.295837 3.295837 461 +industri 1 38 3.295837 3.295837 464 +game 1 36 3.367296 3.367296 498 +dissert 1 32 3.465736 3.465736 549 +graph 1 30 3.555348 3.555348 576 +produc 1 30 3.555348 3.555348 572 +pass 2 28 3.610918 7.221836 611 +bookmark 1 26 3.688879 3.688879 639 +head 1 23 3.806662 3.806662 732 +rout 4 21 3.912023 15.648092 793 +chip 1 21 3.912023 3.912023 770 +hous 1 21 3.912023 3.912023 801 +fine 1 20 3.951244 3.951244 822 +render 1 17 4.110874 4.110874 947 +medic 1 17 4.110874 4.110874 958 +cambridg 1 16 4.174387 4.174387 1008 +carl 2 15 4.248495 8.496990 1024 +countri 1 15 4.248495 4.248495 1059 +princeton 1 15 4.248495 4.248495 1042 +floor 1 14 4.317488 4.317488 1070 +massachusett 1 14 4.317488 4.317488 1118 +larri 3 13 4.382027 13.146081 1142 +menu 1 13 4.382027 4.382027 1156 +canada 1 13 4.382027 4.382027 1158 +speak 1 12 4.465908 4.465908 1283 +volum 1 11 4.553877 4.553877 1347 +mesh 1 11 4.553877 4.553877 1351 +packet 1 10 4.653960 4.653960 1415 +label 1 10 4.653960 4.653960 1423 +purdu 1 10 4.653960 4.653960 1466 +coast 2 8 4.875197 9.750394 1746 +angel 2 8 4.875197 9.750394 1779 +mile 1 8 4.875197 4.875197 1743 +creativ 1 8 4.875197 4.875197 1777 +virginia 1 8 4.875197 4.875197 1659 +shot 2 7 5.010635 10.021270 1898 +marri 1 7 5.010635 5.010635 1946 +adob 1 7 5.010635 5.010635 1873 +layout 1 6 5.164786 5.164786 2183 +east 3 5 5.347108 16.041324 2472 +chaotic 2 5 5.347108 10.694216 2566 +snyder 1 5 5.347108 5.347108 2359 +remain 1 5 5.347108 5.347108 2278 +amus 1 5 5.347108 5.347108 2366 +neil 4 4 5.568345 22.273380 2841 +ebel 1 4 5.568345 5.568345 2756 +tester 1 4 5.568345 5.568345 2754 +mcmurchi 1 4 5.568345 5.568345 2757 +gregori 1 4 5.568345 5.568345 2928 +merl 2 3 5.857933 11.715866 3843 +dine 1 3 5.857933 5.857933 3472 +mitsubishi 1 3 5.857933 5.857933 3842 +mactest 1 3 5.857933 5.857933 3972 +fashion 1 3 5.857933 5.857933 3699 +neighborhood 1 3 5.857933 5.857933 3242 +jar 1 3 5.857933 5.857933 3223 +mckenzi 7 2 6.263398 43.843786 5974 +gemini 2 2 6.263398 12.526796 5975 +andwork 1 2 6.263398 6.263398 5403 +projectsi 1 2 6.263398 6.263398 5931 +isomorph 1 2 6.263398 6.263398 5976 +knowna 1 2 6.263398 6.263398 5480 +shirt 1 2 6.263398 6.263398 5977 +farm 1 2 6.263398 6.263398 4115 +broadwai 1 1 6.957497 6.957497 15957 +projectsgonna 1 1 6.957497 6.957497 15958 +teenag 1 1 6.957497 6.957497 15959 +lobotomi 1 1 6.957497 6.957497 15960 +ramonesi 1 1 6.957497 6.957497 15961 +projectconcern 1 1 6.957497 6.957497 15962 +copiou 1 1 6.957497 6.957497 15963 +expatri 1 1 6.957497 6.957497 15964 +onchaot 1 1 6.957497 6.957497 15965 +routingwith 1 1 6.957497 6.957497 15966 +torusnetwork 1 1 6.957497 6.957497 15967 +thecranium 1 1 6.957497 6.957497 15968 +compatiblewith 1 1 6.957497 6.957497 15969 +netlist 1 1 6.957497 6.957497 15970 +calledgemini 1 1 6.957497 6.957497 15971 +schemat 1 1 6.957497 6.957497 15972 +cranium 1 1 6.957497 6.957497 15973 +packetrout 1 1 6.957497 6.957497 15974 +andcommun 1 1 6.957497 6.957497 15975 +tomactest 1 1 6.957497 6.957497 15976 +arlington 1 1 6.957497 6.957497 15977 +livein 1 1 6.957497 6.957497 15978 +ofballard 1 1 6.957497 6.957497 15979 +artworkcr 1 1 6.957497 6.957497 15980 +photoshop 1 1 6.957497 6.957497 15981 +ownedthi 1 1 6.957497 6.957497 15982 +onlyth 1 1 6.957497 6.957497 15983 +correctlyguess 1 1 6.957497 6.957497 15984 +toriddl 1 1 6.957497 6.957497 15985 +jour 1 1 6.957497 6.957497 15986 +honei 1 1 6.957497 6.957497 15987 +myuncl 1 1 6.957497 6.957497 15988 +edmonton 1 1 6.957497 6.957497 15989 +alberta 1 1 6.957497 6.957497 15990 +linkschairman 1 1 6.957497 6.957497 15991 +linksnorm 1 1 6.957497 6.957497 15992 +halcyon 1 1 6.957497 6.957497 15993 +eugen 1 1 6.957497 6.957497 15994 +spafford 1 1 6.957497 6.957497 15995 +randi 1 1 6.957497 6.957497 15996 +pausch 1 1 6.957497 6.957497 15997 +wallach 1 1 6.957497 6.957497 15998 +scool 1 1 6.957497 6.957497 15999 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ new file mode 100644 index 00000000..5e630f22 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mef^ @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +system 11 443 0.693147 7.624617 6 +work 3 380 0.693147 2.079441 9 +depart 2 457 0.693147 1.386294 12 +engin 3 297 1.098612 3.295836 20 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +washington 6 236 1.386294 8.317764 32 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +languag 2 227 1.386294 2.772588 26 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +oper 8 180 1.609438 12.875504 34 +paper 5 205 1.609438 8.047190 38 +applic 4 170 1.791759 7.167036 56 +network 3 168 1.791759 5.375277 61 +implement 2 152 1.791759 3.583518 52 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +data 1 170 1.791759 1.791759 49 +architectur 3 139 1.945910 5.837730 77 +perform 3 143 1.945910 5.837730 74 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +analysi 2 124 2.079442 4.158884 98 +seattl 1 120 2.079442 2.079442 103 +report 1 131 2.079442 2.079442 92 +specif 2 106 2.197225 4.394450 106 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +manag 1 114 2.197225 2.197225 125 +topic 1 114 2.197225 2.197225 110 +proceed 3 93 2.397895 7.193685 152 +ieee 2 86 2.484907 4.969814 190 +interfac 3 79 2.564949 7.694847 209 +appear 2 78 2.564949 5.129898 210 +dynam 2 76 2.564949 5.129898 194 +decemb 1 80 2.564949 2.564949 215 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +servic 2 72 2.639057 5.278114 236 +symposium 1 72 2.639057 2.639057 238 +workshop 1 71 2.639057 2.639057 239 +receiv 1 66 2.708050 2.708050 244 +creat 2 63 2.772589 5.545178 277 +improv 1 62 2.772589 2.772589 289 +sever 1 56 2.890372 2.890372 322 +summer 1 56 2.890372 2.890372 311 +point 1 58 2.890372 2.890372 319 +extens 7 53 2.944439 20.611073 340 +processor 1 54 2.944439 2.944439 335 +allow 1 53 2.944439 2.944439 333 +run 2 51 2.995732 5.991464 347 +hardwar 1 51 2.995732 2.995732 350 +telephon 2 50 3.044522 6.089044 373 +principl 1 48 3.044522 3.044522 357 +adapt 1 46 3.091042 3.091042 387 +protocol 4 45 3.135494 12.541976 407 +describ 3 45 3.135494 9.406482 400 +mechan 1 43 3.178054 3.178054 416 +http 2 41 3.218876 6.437752 420 +transact 1 39 3.258097 3.258097 438 +winter 1 36 3.367296 3.367296 500 +toler 1 33 3.433987 3.433987 533 +fault 1 32 3.465736 3.465736 547 +posit 1 31 3.496508 3.496508 552 +rang 1 30 3.555348 3.555348 565 +graph 1 30 3.555348 3.555348 576 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +load 1 28 3.610918 3.610918 601 +compar 3 26 3.688879 11.066637 648 +request 2 26 3.688879 7.377758 635 +enabl 1 26 3.688879 3.688879 655 +spent 2 25 3.737670 7.475340 676 +primari 1 25 3.737670 3.737670 669 +demonstr 2 24 3.761200 7.522400 694 +lab 1 24 3.761200 3.761200 698 +flow 1 24 3.761200 3.761200 700 +deal 1 22 3.850148 3.850148 736 +corpor 1 21 3.912023 3.912023 802 +similar 1 21 3.912023 3.912023 771 +kernel 2 20 3.951244 7.902488 825 +safeti 1 20 3.951244 3.951244 817 +spend 1 19 4.007333 4.007333 850 +region 1 19 4.007333 4.007333 875 +protect 2 17 4.110874 8.221748 935 +germani 1 17 4.110874 4.110874 946 +anyon 1 17 4.110874 4.110874 916 +commerci 1 16 4.174387 4.174387 1005 +princeton 1 15 4.248495 4.248495 1042 +contribut 1 15 4.248495 4.248495 1021 +spin 5 14 4.317488 21.587440 1121 +achiev 2 14 4.317488 8.634976 1088 +near 1 14 4.317488 4.317488 1091 +happi 1 14 4.317488 4.317488 1079 +pretti 1 13 4.382027 4.382027 1191 +safe 3 12 4.465908 13.397724 1274 +usenix 1 12 4.465908 4.465908 1240 +abil 1 11 4.553877 4.553877 1341 +custom 1 10 4.653960 4.653960 1414 +elimin 1 9 4.753590 4.753590 1558 +marc 3 8 4.875197 14.625591 1680 +hack 1 7 5.010635 5.010635 1950 +fifth 1 7 5.010635 5.010635 1931 +bell 1 6 5.164786 5.164786 2224 +isth 1 5 5.347108 5.347108 2532 +sole 1 4 5.568345 5.568345 2592 +sell 1 4 5.568345 5.568345 2935 +fiuczynski 3 3 5.857933 17.573799 3390 +forappl 2 3 5.857933 11.715866 3929 +scratch 1 3 5.857933 5.857933 3140 +anin 1 3 5.857933 5.857933 3354 +fifteenth 1 3 5.857933 5.857933 3868 +linker 1 3 5.857933 5.857933 3157 +namespac 1 3 5.857933 5.857933 3957 +shortcom 2 2 6.263398 12.526796 5978 +backgroundi 1 2 6.263398 6.263398 5878 +highschool 1 2 6.263398 6.263398 5672 +ofproject 1 2 6.263398 6.263398 4446 +inord 1 2 6.263398 6.263398 4824 +linkabl 1 2 6.263398 6.263398 5979 +andcollect 1 2 6.263398 6.263398 4249 +contacthttp 2 1 6.957497 13.914994 16000 +grewup 1 1 6.957497 6.957497 16001 +sseldorf 1 1 6.957497 6.957497 16002 +fromrutg 1 1 6.957497 6.957497 16003 +mitr 1 1 6.957497 6.957497 16004 +proprietor 1 1 6.957497 6.957497 16005 +companythat 1 1 6.957497 6.957497 16006 +setof 1 1 6.957497 6.957497 16007 +chasi 1 1 6.957497 6.957497 16008 +univoic 1 1 6.957497 6.957497 16009 +cardsand 1 1 6.957497 6.957497 16010 +vxwork 1 1 6.957497 6.957497 16011 +compellingperform 1 1 6.957497 6.957497 16012 +tosimilar 1 1 6.957497 6.957497 16013 +anextens 1 1 6.957497 6.957497 16014 +betterperform 1 1 6.957497 6.957497 16015 +conventionaloper 1 1 6.957497 6.957497 16016 +technicalconfer 1 1 6.957497 6.957497 16017 +describeshow 1 1 6.957497 6.957497 16018 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ new file mode 100644 index 00000000..d899bf4f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mel^ @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +last 1 314 1.098612 1.098612 14 +washington 1 236 1.386294 1.386294 32 +modifi 1 178 1.609438 1.609438 35 +algorithm 2 162 1.791759 3.583518 57 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +model 2 145 1.945910 3.891820 69 +perform 1 143 1.945910 1.945910 74 +machin 1 129 2.079442 2.079442 95 +real 1 93 2.397895 2.397895 144 +help 1 83 2.484907 2.484907 175 +requir 1 81 2.484907 2.484907 167 +method 1 80 2.564949 2.564949 213 +practic 1 70 2.708050 2.708050 246 +compar 1 26 3.688879 3.688879 648 +bound 1 26 3.688879 3.688879 659 +sort 1 22 3.850148 3.850148 738 +rout 5 21 3.912023 19.560115 793 +predict 1 19 4.007333 4.007333 855 +minim 2 18 4.060443 8.120886 887 +lower 1 18 4.060443 4.060443 886 +topolog 1 14 4.317488 4.317488 1089 +mesh 1 11 4.553877 4.553877 1351 +router 1 8 4.875197 4.875197 1772 +versu 1 6 5.164786 5.164786 2052 +upper 1 5 5.347108 5.347108 2481 +melani 1 2 6.263398 6.263398 5784 +deflect 2 1 6.957497 13.914994 16019 +fulgham 1 1 6.957497 6.957497 16020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ new file mode 100644 index 00000000..ecc047a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^menghee^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +year 1 148 1.945910 1.945910 84 +problem 1 147 1.945910 1.945910 75 +databas 1 122 2.079442 2.079442 86 +provid 1 121 2.079442 2.079442 94 +take 1 97 2.302585 2.302585 134 +imag 2 91 2.397895 4.795790 161 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +stuff 1 87 2.484907 2.484907 171 +exampl 1 77 2.564949 2.564949 195 +nation 1 74 2.639057 2.639057 240 +main 1 67 2.708050 2.708050 256 +guid 1 63 2.772589 2.772589 267 +plan 1 65 2.772589 2.772589 272 +visit 1 63 2.772589 2.772589 288 +done 1 47 3.091042 3.091042 381 +kind 1 32 3.465736 3.465736 541 +transform 1 32 3.465736 3.465736 542 +retriev 1 27 3.637586 3.637586 621 +try 1 22 3.850148 3.850148 764 +similar 1 21 3.912023 3.912023 771 +fact 1 21 3.912023 3.912023 780 +engineeringunivers 1 17 4.110874 4.110874 959 +commerci 1 16 4.174387 4.174387 1005 +english 1 15 4.248495 4.248495 1033 +trip 1 14 4.317488 4.317488 1113 +meng 2 12 4.465908 8.931816 1214 +newspap 1 12 4.465908 4.465908 1280 +scienceat 1 11 4.553877 4.553877 1375 +island 1 11 4.553877 4.553877 1345 +undergrad 1 9 4.753590 4.753590 1589 +charg 1 9 4.753590 4.753590 1582 +pennsylvania 1 7 5.010635 5.010635 1932 +interestsi 1 7 5.010635 5.010635 1969 +huge 1 6 5.164786 5.164786 1991 +singapor 3 5 5.347108 16.041324 2487 +snapshot 1 5 5.347108 5.347108 2303 +washingtonseattl 1 4 5.568345 5.568345 3044 +heng 1 2 6.263398 6.263398 5202 +strait 1 2 6.263398 6.263398 5980 +homepagemenghe 1 1 6.957497 6.957497 16021 +edubox 1 1 6.957497 6.957497 16022 +findimag 1 1 6.957497 6.957497 16023 +virag 1 1 6.957497 6.957497 16024 +andqbicar 1 1 6.957497 6.957497 16025 +singaporesingapor 1 1 6.957497 6.957497 16026 +infomap 1 1 6.957497 6.957497 16027 +andstatist 1 1 6.957497 6.957497 16028 +singaporeonlin 1 1 6.957497 6.957497 16029 +boardi 1 1 6.957497 6.957497 16030 +anintellig 1 1 6.957497 6.957497 16031 +menghe 1 1 6.957497 6.957497 16032 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ new file mode 100644 index 00000000..d2d3fa72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mernst^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +graduat 2 215 1.386294 2.772588 31 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +includ 3 208 1.609438 4.828314 42 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +list 1 201 1.609438 1.609438 39 +parallel 1 169 1.791759 1.791759 60 +lectur 1 135 1.945910 1.945910 73 +area 1 144 1.945910 1.945910 80 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +code 1 108 2.197225 2.197225 116 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +technic 1 100 2.302585 2.302585 140 +real 1 93 2.397895 2.397895 144 +optim 1 79 2.564949 2.564949 197 +workshop 2 71 2.639057 5.278114 239 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +maintain 2 51 2.995732 5.991464 342 +frequent 1 49 3.044522 3.044522 367 +life 1 50 3.044522 3.044522 375 +microsoft 1 38 3.295837 3.295837 468 +game 1 36 3.367296 3.367296 498 +michael 2 35 3.401197 6.802394 514 +represent 1 35 3.401197 3.401197 512 +chair 1 29 3.583519 3.583519 596 +static 1 27 3.637586 3.637586 619 +properti 1 22 3.850148 3.850148 749 +particularli 1 19 4.007333 4.007333 867 +previous 1 17 4.110874 4.110874 923 +debug 1 17 4.110874 4.110874 944 +philosophi 1 13 4.382027 4.382027 1167 +carri 1 13 4.382027 4.382027 1152 +awai 1 10 4.653960 4.653960 1447 +intermedi 1 9 4.753590 4.753590 1497 +cryptographi 1 9 4.753590 4.753590 1512 +serial 1 7 5.010635 5.010635 1975 +intellectu 1 7 5.010635 5.010635 1847 +occasion 1 7 5.010635 5.010635 1905 +sciencedepart 1 6 5.164786 5.164786 2172 +slice 1 4 5.568345 5.568345 2622 +popl 1 4 5.568345 5.568345 3068 +denot 1 3 5.857933 5.857933 3147 +ernst 2 2 6.263398 12.526796 4525 +eec 1 2 6.263398 6.263398 5981 +pagemichael 1 1 6.957497 6.957497 16033 +ernsti 1 1 6.957497 6.957497 16034 +riceunivers 1 1 6.957497 6.957497 16035 +programanalysi 1 1 6.957497 6.957497 16036 +coloc 1 1 6.957497 6.957497 16037 +semanticsi 1 1 6.957497 6.957497 16038 +resourcesfor 1 1 6.957497 6.957497 16039 +slip 1 1 6.957497 6.957497 16040 +possibleinterest 1 1 6.957497 6.957497 16041 +mernst 1 1 6.957497 6.957497 16042 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ new file mode 100644 index 00000000..73f7351d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^mock^ @@ -0,0 +1,125 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +home 5 672 0.000000 0.000000 1 +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +current 2 284 1.098612 2.197224 21 +time 2 293 1.098612 2.197224 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +link 5 247 1.386294 6.931470 24 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +compil 2 122 2.079442 4.158884 96 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +check 1 115 2.197225 2.197225 118 +well 1 109 2.197225 2.197225 121 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +search 2 95 2.397895 4.795790 155 +sinc 1 90 2.397895 2.397895 159 +stuff 2 87 2.484907 4.969814 171 +journal 1 83 2.484907 2.484907 183 +come 2 78 2.564949 5.129898 202 +state 1 76 2.564949 2.564949 207 +orient 1 80 2.564949 2.564949 205 +dynam 1 76 2.564949 2.564949 194 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +main 1 67 2.708050 2.708050 256 +view 1 70 2.708050 2.708050 254 +still 1 50 3.044522 3.044522 362 +cool 1 49 3.044522 3.044522 374 +even 1 45 3.135494 3.135494 393 +live 2 40 3.258097 6.516194 451 +movi 1 40 3.258097 3.258097 459 +electr 1 38 3.295837 3.295837 461 +expect 1 37 3.332205 3.332205 484 +field 1 37 3.332205 3.332205 482 +obtain 1 33 3.433987 3.433987 534 +travel 1 30 3.555348 3.555348 579 +quot 1 29 3.583519 3.583519 582 +consid 1 29 3.583519 3.583519 590 +american 1 27 3.637586 3.637586 634 +arrai 1 27 3.637586 3.637586 627 +spent 1 25 3.737670 3.737670 676 +grad 1 20 3.951244 3.951244 837 +left 1 19 4.007333 4.007333 851 +els 1 19 4.007333 4.007333 843 +event 1 18 4.060443 4.060443 896 +squar 1 14 4.317488 4.317488 1082 +spin 1 14 4.317488 4.317488 1121 +danc 1 12 4.465908 4.465908 1278 +grow 1 12 4.465908 4.465908 1209 +metacrawl 1 10 4.653960 4.653960 1455 +grew 1 8 4.875197 4.875197 1742 +solomon 1 8 4.875197 4.875197 1716 +cultur 1 7 5.010635 5.010635 1951 +bit 1 7 5.010635 5.010635 1833 +wouldn 1 7 5.010635 5.010635 1970 +mock 4 6 5.164786 20.659144 2087 +whichi 1 6 5.164786 5.164786 2056 +oopsla 1 6 5.164786 5.164786 2221 +altavista 1 6 5.164786 5.164786 2222 +neither 1 6 5.164786 5.164786 1990 +matthew 1 6 5.164786 5.164786 2193 +chess 3 5 5.347108 16.041324 2486 +upper 1 5 5.347108 5.347108 2481 +volunt 1 5 5.347108 5.347108 2307 +lili 1 5 5.347108 5.347108 2240 +fulbright 1 4 5.568345 5.568345 2963 +spanish 1 4 5.568345 5.568345 3017 +marku 2 3 5.857933 11.715866 3872 +district 1 3 5.857933 5.857933 3756 +karlsruh 1 3 5.857933 5.857933 3689 +latin 1 3 5.857933 5.857933 3741 +deutsch 1 3 5.857933 5.857933 3802 +diplom 1 2 6.263398 6.263398 5982 +umass 1 2 6.263398 6.263398 5899 +grante 1 2 6.263398 6.263398 4914 +labyrinth 1 2 6.263398 6.263398 5983 +mainstream 1 2 6.263398 6.263398 5437 +salsa 1 2 6.263398 6.263398 5984 +colloquia 1 2 6.263398 6.263398 4710 +mossi 1 2 6.263398 6.263398 5801 +rttemberg 1 1 6.957497 6.957497 16043 +anotherpart 1 1 6.957497 6.957497 16044 +biberach 1 1 6.957497 6.957497 16045 +swabia 1 1 6.957497 6.957497 16046 +oberschwaben 1 1 6.957497 6.957497 16047 +solitud 1 1 6.957497 6.957497 16048 +dieangst 1 1 6.957497 6.957497 16049 +torwart 1 1 6.957497 6.957497 16050 +beim 1 1 6.957497 6.957497 16051 +elfmet 1 1 6.957497 6.957497 16052 +handk 1 1 6.957497 6.957497 16053 +merengu 1 1 6.957497 6.957497 16054 +publicationssepar 1 1 6.957497 6.957497 16055 +olympiad 1 1 6.957497 6.957497 16056 +yerewan 1 1 6.957497 6.957497 16057 +csek 1 1 6.957497 6.957497 16058 +csebi 1 1 6.957497 6.957497 16059 +cse 1 1 6.957497 6.957497 16060 +studentsimag 1 1 6.957497 6.957497 16061 +engineeringy 1 1 6.957497 6.957497 16062 +realaudio 1 1 6.957497 6.957497 16063 +linksand 1 1 6.957497 6.957497 16064 +toil 1 1 6.957497 6.957497 16065 +unto 1 1 6.957497 6.957497 16066 +glorywa 1 1 6.957497 6.957497 16067 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^montgmry @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ new file mode 100644 index 00000000..c8c48ea0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nara^ @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +vivek 2 6 5.164786 10.329572 2210 +narasayya 2 4 5.568345 11.136690 3065 +informationresearch 1 3 5.857933 5.857933 3675 +nara 1 1 6.957497 6.957497 16068 +interestspap 1 1 6.957497 6.957497 16069 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ new file mode 100644 index 00000000..bf87bb99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nautiyal^ @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +engin 2 297 1.098612 2.197224 20 +offic 2 299 1.098612 2.197224 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +seattl 1 120 2.079442 2.079442 103 +place 1 106 2.197225 2.197225 124 +take 1 97 2.302585 2.302585 134 +search 1 95 2.397895 2.397895 155 +internet 2 83 2.484907 4.969814 186 +name 2 72 2.639057 5.278114 220 +intellig 1 72 2.639057 2.639057 225 +sieg 1 69 2.708050 2.708050 260 +order 1 69 2.708050 2.708050 249 +artifici 1 63 2.772589 2.772589 280 +collect 1 65 2.772589 2.772589 268 +digit 1 52 2.995732 2.995732 348 +finger 1 52 2.995732 2.995732 354 +much 1 52 2.995732 2.995732 349 +principl 1 48 3.044522 3.044522 357 +friend 1 48 3.044522 3.044522 376 +quarter 1 47 3.091042 3.091042 389 +favorit 1 44 3.135494 3.135494 410 +movi 1 40 3.258097 3.258097 459 +tech 1 35 3.401197 3.401197 515 +india 2 32 3.465736 6.931472 550 +autumn 1 31 3.496508 3.496508 558 +travel 1 30 3.555348 3.555348 579 +finish 1 22 3.850148 3.850148 748 +stop 1 17 4.110874 4.110874 942 +adam 1 17 4.110874 4.110874 934 +cook 1 10 4.653960 4.653960 1464 +sound 1 9 4.753590 4.753590 1605 +heavi 1 7 5.010635 5.010635 1841 +alphabet 1 6 5.164786 5.164786 1980 +dougla 1 5 5.347108 5.347108 2471 +delhi 1 5 5.347108 5.347108 2530 +radio 1 4 5.568345 5.568345 3025 +skate 1 4 5.568345 5.568345 3046 +terri 1 3 5.857933 5.857933 3264 +impli 1 3 5.857933 5.857933 3348 +astronomi 1 3 5.857933 5.857933 3974 +coin 1 3 5.857933 5.857933 3799 +pelham 1 2 6.263398 6.263398 4988 +grenvil 1 2 6.263398 6.263398 4989 +himanshu 3 1 6.957497 20.872491 16070 +nautiy 3 1 6.957497 20.872491 16071 +pagehimanshu 1 1 6.957497 6.957497 16072 +nautiyalthi 1 1 6.957497 6.957497 16073 +nautiyaldept 1 1 6.957497 6.957497 16074 +edugod 1 1 6.957497 6.957497 16075 +gift 1 1 6.957497 6.957497 16076 +personkind 1 1 6.957497 6.957497 16077 +pratchett 1 1 6.957497 6.957497 16078 +wodehouseth 1 1 6.957497 6.957497 16079 +aviat 1 1 6.957497 6.957497 16080 +numismat 1 1 6.957497 6.957497 16081 +profound 1 1 6.957497 6.957497 16082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ new file mode 100644 index 00000000..2a98e1a0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^nick^ @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +mail 1 238 1.386294 1.386294 22 +paper 1 205 1.609438 1.609438 38 +avail 3 169 1.791759 5.375277 48 +contact 1 153 1.791759 1.791759 59 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +technolog 2 131 2.079442 4.158884 102 +provid 1 121 2.079442 2.079442 94 +number 1 130 2.079442 2.079442 97 +seattl 1 120 2.079442 2.079442 103 +version 1 113 2.197225 2.197225 122 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +need 3 98 2.302585 6.907755 135 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +comment 1 93 2.397895 2.397895 146 +resourc 1 81 2.484907 2.484907 172 +stuff 1 87 2.484907 2.484907 171 +know 1 80 2.564949 2.564949 198 +intellig 1 72 2.639057 2.639057 225 +servic 1 72 2.639057 2.639057 236 +line 1 75 2.639057 2.639057 231 +involv 1 71 2.639057 2.639057 227 +artifici 1 63 2.772589 2.772589 280 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +browser 1 56 2.890372 2.890372 313 +date 1 51 2.995732 2.995732 344 +week 1 52 2.995732 2.995732 343 +favorit 1 44 3.135494 3.135494 410 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +random 1 34 3.401197 3.401197 511 +return 1 34 3.401197 3.401197 502 +great 1 27 3.637586 3.637586 626 +enabl 1 26 3.688879 3.688879 655 +bookmark 1 26 3.688879 3.688879 639 +alwai 1 24 3.761200 3.761200 691 +color 1 22 3.850148 3.850148 762 +tell 1 21 3.912023 3.912023 777 +anonym 1 14 4.317488 4.317488 1100 +easili 1 14 4.317488 4.317488 1077 +preliminari 1 9 4.753590 4.753590 1480 +wilson 1 9 4.753590 4.753590 1536 +awar 1 7 5.010635 5.010635 1800 +guidelin 1 7 5.010635 5.010635 1832 +divers 1 6 5.164786 5.164786 2232 +handi 1 6 5.164786 5.164786 2111 +lost 1 5 5.347108 5.347108 2358 +alsointerest 1 3 5.857933 5.857933 3813 +shortli 1 3 5.857933 5.857933 3375 +surgeri 1 3 5.857933 5.857933 3975 +javascript 1 3 5.857933 5.857933 3221 +republican 1 3 5.857933 5.857933 3815 +miscellani 1 3 5.857933 5.857933 3976 +nichola 1 3 5.857933 5.857933 3252 +uwcs 1 3 5.857933 5.857933 3977 +labyrinth 1 2 6.263398 6.263398 5983 +cynic 1 2 6.263398 6.263398 5854 +andcognit 1 2 6.263398 6.263398 5681 +temperatur 1 2 6.263398 6.263398 5985 +bitter 1 2 6.263398 6.263398 5387 +ironi 1 2 6.263398 6.263398 5986 +nonetheless 1 2 6.263398 6.263398 4681 +madeavail 1 2 6.263398 6.263398 4326 +mediocr 1 1 6.957497 6.957497 16083 +bemoan 1 1 6.957497 6.957497 16084 +hype 1 1 6.957497 6.957497 16085 +skeptic 1 1 6.957497 6.957497 16086 +automaticconstruct 1 1 6.957497 6.957497 16087 +wrapper 1 1 6.957497 6.957497 16088 +beeninvolv 1 1 6.957497 6.957497 16089 +glbal 1 1 6.957497 6.957497 16090 +infrmatin 1 1 6.957497 6.957497 16091 +sperhighwai 1 1 6.957497 6.957497 16092 +meter 1 1 6.957497 6.957497 16093 +ronald 1 1 6.957497 6.957497 16094 +reagan 1 1 6.957497 6.957497 16095 +wendel 1 1 6.957497 6.957497 16096 +berri 1 1 6.957497 6.957497 16097 +constitutesgood 1 1 6.957497 6.957497 16098 +kushmerick 1 1 6.957497 6.957497 16099 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^notkin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^notkin new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^notkin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ new file mode 100644 index 00000000..31a50b3a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ohlrich^ @@ -0,0 +1,76 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 5 412 0.693147 3.465735 8 +research 3 431 0.693147 2.079441 10 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +us 3 329 1.098612 3.295836 16 +project 2 340 1.098612 2.197224 18 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +seattl 2 120 2.079442 4.158884 103 +analysi 1 124 2.079442 2.079442 98 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +memori 3 101 2.302585 6.907755 139 +octob 1 89 2.397895 2.397895 156 +contain 2 81 2.484907 4.969814 174 +onlin 1 75 2.639057 2.639057 223 +test 1 66 2.708050 2.708050 252 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +march 1 61 2.833213 2.833213 295 +sever 1 56 2.890372 2.890372 322 +summer 1 56 2.890372 2.890372 311 +local 1 55 2.944439 2.944439 334 +found 1 53 2.944439 2.944439 337 +brian 1 38 3.295837 3.295837 466 +game 1 36 3.367296 3.367296 498 +known 1 24 3.761200 3.761200 702 +reduc 1 22 3.850148 3.850148 759 +sort 1 22 3.850148 3.850148 738 +safeti 1 20 3.951244 3.951244 817 +wonder 1 20 3.951244 3.951244 815 +bershad 2 18 4.060443 8.120886 902 +chateau 1 16 4.174387 4.174387 997 +overhead 1 15 4.248495 4.248495 1035 +karlin 2 13 4.382027 8.764054 1176 +nanci 1 12 4.465908 4.465908 1256 +anna 1 12 4.465908 4.465908 1292 +promot 1 12 4.465908 4.465908 1235 +isca 2 11 4.553877 9.107754 1354 +itali 1 11 4.553877 4.553877 1378 +leveson 2 9 4.753590 9.507180 1540 +wayn 3 8 4.875197 14.625591 1738 +romer 2 8 4.875197 9.750394 1706 +guggenheim 1 8 4.875197 4.875197 1759 +invest 2 6 5.164786 10.329572 2153 +spare 1 6 5.164786 5.164786 2177 +ohlrich 4 5 5.347108 21.388432 2564 +annex 1 5 5.347108 5.347108 2572 +sytem 1 4 5.568345 5.568345 3015 +superpag 1 3 5.857933 5.857933 3978 +damag 1 2 6.263398 6.263398 5687 +debut 1 1 6.957497 6.957497 16100 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ new file mode 100644 index 00000000..8f0be433 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ori^ @@ -0,0 +1,87 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +graduat 2 215 1.386294 2.772588 31 +design 1 213 1.386294 1.386294 25 +link 1 247 1.386294 1.386294 24 +class 2 199 1.609438 3.218876 37 +hour 1 165 1.791759 1.791759 46 +year 3 148 1.945910 5.837730 84 +like 1 132 1.945910 1.945910 81 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +seattl 2 120 2.079442 4.158884 103 +welcom 1 122 2.079442 2.079442 99 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +mathemat 1 108 2.197225 2.197225 123 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +resum 1 79 2.564949 2.564949 217 +summari 1 73 2.639057 2.639057 237 +sieg 2 69 2.708050 5.416100 260 +practic 1 70 2.708050 2.708050 246 +august 1 66 2.708050 2.708050 257 +wednesdai 1 64 2.772589 2.772589 261 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +plai 1 60 2.833213 2.833213 307 +summer 2 56 2.890372 5.780744 311 +three 1 54 2.944439 2.944439 330 +cool 1 49 3.044522 3.044522 374 +california 2 46 3.091042 6.182084 388 +move 1 47 3.091042 3.091042 382 +quarter 1 47 3.091042 3.091042 389 +made 1 44 3.135494 3.135494 398 +york 1 41 3.218876 3.218876 435 +live 2 40 3.258097 6.516194 451 +origin 1 38 3.295837 3.295837 472 +seminar 1 38 3.295837 3.295837 470 +option 1 30 3.555348 3.555348 575 +usual 1 28 3.610918 3.610918 608 +berkelei 2 26 3.688879 7.377758 657 +departmentunivers 1 24 3.761200 3.761200 711 +daili 1 24 3.761200 3.761200 706 +ofwashington 1 22 3.850148 3.850148 766 +whole 1 17 4.110874 4.110874 940 +took 1 16 4.174387 4.174387 1010 +basketbal 1 12 4.465908 4.465908 1289 +employ 1 12 4.465908 4.465908 1291 +realiti 1 12 4.465908 4.465908 1272 +israel 2 11 4.553877 9.107754 1366 +seven 1 9 4.753590 4.753590 1561 +angel 1 8 4.875197 4.875197 1779 +potenti 1 8 4.875197 4.875197 1690 +studentcomput 1 7 5.010635 5.010635 1963 +bunch 1 7 5.010635 5.010635 1861 +hike 1 6 5.164786 5.164786 2234 +northeast 1 3 5.857933 5.857933 3922 +haifa 1 3 5.857933 5.857933 3554 +kwon 1 3 5.857933 5.857933 3690 +gershoni 4 2 6.263398 25.053592 4513 +shirt 1 2 6.263398 6.263398 5977 +washingtonoffic 1 1 6.957497 6.957497 16101 +fouryear 1 1 6.957497 6.957497 16102 +lancast 1 1 6.957497 6.957497 16103 +holon 1 1 6.957497 6.957497 16104 +amta 1 1 6.957497 6.957497 16105 +aremondai 1 1 6.957497 6.957497 16106 +tose 1 1 6.957497 6.957497 16107 +graphicsprogram 1 1 6.957497 6.957497 16108 +riderlink 1 1 6.957497 6.957497 16109 +seattletransport 1 1 6.957497 6.957497 16110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ new file mode 100644 index 00000000..e602fa91 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ortega^ @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +tool 1 117 2.079442 2.079442 93 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +find 1 111 2.197225 2.197225 111 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +section 1 94 2.397895 2.397895 149 +pictur 1 89 2.397895 2.397895 160 +school 1 84 2.484907 2.484907 188 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +educ 1 86 2.484907 2.484907 191 +would 2 67 2.708050 5.416100 251 +sieg 1 69 2.708050 2.708050 260 +experi 1 64 2.772589 2.772589 283 +advisor 1 51 2.995732 2.995732 355 +profession 1 51 2.995732 2.995732 345 +realli 1 40 3.258097 3.258097 444 +go 1 33 3.433987 3.433987 529 +sometim 1 24 3.761200 3.761200 696 +head 1 23 3.806662 3.806662 732 +color 2 22 3.850148 7.700296 762 +try 1 22 3.850148 3.850148 764 +leav 1 21 3.912023 3.912023 772 +boston 1 19 4.007333 4.007333 862 +offici 1 18 4.060443 4.060443 894 +decid 1 14 4.317488 4.317488 1075 +embed 1 14 4.317488 4.317488 1102 +came 1 13 4.382027 4.382027 1197 +jean 1 10 4.653960 4.653960 1440 +northwest 1 7 5.010635 5.010635 1973 +accord 1 7 5.010635 5.010635 1826 +hack 1 7 5.010635 5.010635 1950 +chinook 2 6 5.164786 10.329572 2229 +gaetano 1 6 5.164786 5.164786 2068 +beer 1 6 5.164786 5.164786 2216 +german 1 6 5.164786 5.164786 2190 +myresum 1 6 5.164786 5.164786 2199 +ortega 3 5 5.347108 16.041324 2559 +ross 2 5 5.347108 10.694216 2243 +knew 1 5 5.347108 5.347108 2445 +borriello 1 5 5.347108 5.347108 2349 +wear 1 4 5.568345 5.568345 2785 +albert 1 2 6.263398 6.263398 5987 +unoffici 1 2 6.263398 6.263398 5909 +brew 1 2 6.263398 6.263398 5988 +shepherd 1 2 6.263398 6.263398 4347 +behav 1 2 6.263398 6.263398 4670 +tequila 4 1 6.957497 27.829988 16111 +einstein 1 1 6.957497 6.957497 16112 +pageaft 1 1 6.957497 6.957497 16113 +puppi 1 1 6.957497 6.957497 16114 +updatedthu 1 1 6.957497 6.957497 16115 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ new file mode 100644 index 00000000..ed00548a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardo^ @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +washington 2 236 1.386294 2.772588 32 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +class 1 199 1.609438 1.609438 37 +list 1 201 1.609438 1.609438 39 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +relat 1 139 1.945910 1.945910 68 +seattl 4 120 2.079442 8.317768 103 +document 1 121 2.079442 2.079442 89 +tool 1 117 2.079442 2.079442 93 +site 2 106 2.197225 4.394450 119 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +need 1 98 2.302585 2.302585 135 +take 1 97 2.302585 2.302585 134 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +free 1 73 2.639057 2.639057 224 +nation 1 74 2.639057 2.639057 240 +simul 1 66 2.708050 2.708050 255 +share 1 59 2.833213 2.833213 304 +think 2 57 2.890372 5.780744 314 +thesi 1 57 2.890372 2.890372 327 +instruct 1 53 2.944439 2.944439 332 +week 1 52 2.995732 2.995732 343 +particular 1 51 2.995732 2.995732 352 +featur 1 46 3.091042 3.091042 386 +show 2 43 3.178054 6.356108 417 +around 1 43 3.178054 3.178054 415 +http 1 41 3.218876 3.218876 420 +movi 1 40 3.258097 3.258097 459 +everi 1 34 3.401197 3.401197 519 +consid 1 29 3.583519 3.583519 590 +weather 3 28 3.610918 10.832754 618 +though 1 27 3.637586 3.637586 622 +background 1 25 3.737670 3.737670 664 +trace 1 25 3.737670 3.737670 677 +sometim 2 24 3.761200 7.522400 696 +disk 1 22 3.850148 3.850148 747 +item 2 19 4.007333 8.014666 856 +particularli 1 19 4.007333 4.007333 867 +runtim 1 19 4.007333 4.007333 858 +log 1 19 4.007333 4.007333 857 +engineeringunivers 1 17 4.110874 4.110874 959 +white 1 17 4.110874 4.110874 951 +regular 1 17 4.110874 4.110874 929 +letter 1 16 4.174387 4.174387 981 +month 1 15 4.248495 4.248495 1025 +doesn 1 15 4.248495 4.248495 1055 +stori 1 14 4.317488 4.317488 1087 +washingtonbox 1 13 4.382027 4.382027 1200 +weak 1 13 4.382027 4.382027 1159 +minor 1 12 4.465908 4.465908 1237 +newspap 1 12 4.465908 4.465908 1280 +bill 1 11 4.553877 4.553877 1297 +black 1 10 4.653960 4.653960 1418 +telecommun 1 9 4.753590 4.753590 1565 +transport 1 8 4.875197 4.875197 1672 +film 1 8 4.875197 4.875197 1761 +ethic 1 7 5.010635 5.010635 1786 +courtesi 1 7 5.010635 5.010635 1953 +blue 1 6 5.164786 5.164786 2227 +quick 1 6 5.164786 5.164786 2184 +famou 1 6 5.164786 5.164786 2185 +legal 1 6 5.164786 5.164786 2094 +privaci 1 6 5.164786 5.164786 2144 +everybodi 1 5 5.347108 5.347108 2517 +oncomput 1 5 5.347108 5.347108 2326 +truli 1 5 5.347108 5.347108 2476 +festiv 1 4 5.568345 5.568345 2952 +flat 1 3 5.857933 5.857933 3853 +icon 1 3 5.857933 5.857933 3362 +unrel 1 3 5.857933 5.857933 3837 +ribbon 1 2 6.263398 6.263398 5973 +quiet 1 2 6.263398 6.263398 5203 +likewis 1 2 6.263398 6.263398 4100 +blink 1 2 6.263398 6.263398 5067 +anymor 1 2 6.263398 6.263398 5938 +gross 1 2 6.263398 6.263398 5989 +beth 2 1 6.957497 13.914994 16116 +pardo 2 1 6.957497 13.914994 16117 +courtesei 2 1 6.957497 13.914994 16118 +untitl 1 1 6.957497 6.957497 16119 +morri 1 1 6.957497 6.957497 16120 +pardodepart 1 1 6.957497 6.957497 16121 +washingtonusapardo 1 1 6.957497 6.957497 16122 +edunot 1 1 6.957497 6.957497 16123 +asimgsrc 1 1 6.957497 6.957497 16124 +blueribbon 1 1 6.957497 6.957497 16125 +rib_trn_plain_sm 1 1 6.957497 6.957497 16126 +opposit 1 1 6.957497 6.957497 16127 +speechprohibit 1 1 6.957497 6.957497 16128 +academicsom 1 1 6.957497 6.957497 16129 +papersi 1 1 6.957497 6.957497 16130 +rtcg 1 1 6.957497 6.957497 16131 +architectureandcompil 1 1 6.957497 6.957497 16132 +otherpeopl 1 1 6.957497 6.957497 16133 +stylenon 1 1 6.957497 6.957497 16134 +academicfeatur 1 1 6.957497 6.957497 16135 +itemsbicyclesbusinessescomputersfoodhumori 1 1 6.957497 6.957497 16136 +weirdnesslinux 1 1 6.957497 6.957497 16137 +journalmusicgoofi 1 1 6.957497 6.957497 16138 +politicssci 1 1 6.957497 6.957497 16139 +dant 1 1 6.957497 6.957497 16140 +trepan 1 1 6.957497 6.957497 16141 +wors 1 1 6.957497 6.957497 16142 +newhous 1 1 6.957497 6.957497 16143 +yesterdai 1 1 6.957497 6.957497 16144 +stuffpardo 1 1 6.957497 6.957497 16145 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ new file mode 100644 index 00000000..123f8884 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pardy^ @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 4 431 0.693147 2.772588 10 +system 4 443 0.693147 2.772588 6 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +engin 1 297 1.098612 1.098612 20 +washington 4 236 1.386294 5.545176 32 +also 2 259 1.386294 2.772588 28 +languag 2 227 1.386294 2.772588 26 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +group 2 183 1.609438 3.218876 36 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +phone 2 175 1.791759 3.583518 45 +distribut 1 162 1.791759 1.791759 51 +base 1 165 1.791759 1.791759 50 +relat 3 139 1.945910 5.837730 68 +year 2 148 1.945910 3.891820 84 +object 2 138 1.945910 3.891820 79 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +seattl 3 120 2.079442 6.238326 103 +compil 1 122 2.079442 2.079442 96 +schedul 1 119 2.079442 2.079442 85 +find 1 111 2.197225 2.197225 111 +book 1 99 2.302585 2.302585 131 +commun 1 95 2.397895 2.397895 157 +school 3 84 2.484907 7.454721 188 +activ 1 84 2.484907 2.484907 182 +resourc 1 81 2.484907 2.484907 172 +resum 1 79 2.564949 2.564949 217 +descript 1 64 2.772589 2.772589 271 +variou 1 56 2.890372 2.890372 317 +extens 1 53 2.944439 2.944439 340 +week 1 52 2.995732 2.995732 343 +life 1 50 3.044522 3.044522 375 +mechan 2 43 3.178054 6.356108 416 +third 1 43 3.178054 3.178054 412 +music 1 42 3.218876 3.218876 436 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +short 1 36 3.367296 3.367296 499 +built 1 29 3.583519 3.583519 592 +enjoi 1 26 3.688879 3.688879 660 +mine 1 26 3.688879 3.688879 654 +miscellan 1 23 3.806662 3.806662 731 +theunivers 1 21 3.912023 3.912023 797 +busi 1 21 3.912023 3.912023 784 +grad 2 20 3.951244 7.902488 837 +bershad 1 18 4.060443 4.060443 902 +coupl 1 17 4.110874 4.110874 939 +later 1 15 4.248495 4.248495 1043 +fill 1 11 4.553877 4.553877 1349 +besid 1 8 4.875197 4.875197 1681 +hike 1 6 5.164786 5.164786 2234 +outdoor 1 5 5.347108 5.347108 2514 +pardyak 2 4 5.568345 11.136690 3043 +withth 1 4 5.568345 5.568345 2805 +outdat 1 4 5.568345 5.568345 2797 +somehow 1 4 5.568345 5.568345 2974 +ofoper 1 3 5.857933 5.857933 3292 +gloriou 1 3 5.857933 5.857933 3816 +leadership 1 3 5.857933 5.857933 3320 +emerald 1 3 5.857933 5.857933 3979 +poland 1 3 5.857933 5.857933 3665 +unrel 1 3 5.857933 5.857933 3837 +przemyslaw 1 2 6.263398 6.263398 5808 +basedprogram 1 2 6.263398 6.263398 5700 +pardi 2 1 6.957497 13.914994 16146 +drizzl 2 1 6.957497 13.914994 16147 +przemek 1 1 6.957497 6.957497 16148 +interast 1 1 6.957497 6.957497 16149 +notbusi 1 1 6.957497 6.957497 16150 +happenswhen 1 1 6.957497 6.957497 16151 +projectsspinan 1 1 6.957497 6.957497 16152 +systemsgroup 1 1 6.957497 6.957497 16153 +polish 1 1 6.957497 6.957497 16154 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ new file mode 100644 index 00000000..bde70015 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^paul^ @@ -0,0 +1,168 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +student 3 343 1.098612 3.295836 19 +us 3 329 1.098612 3.295836 16 +time 2 293 1.098612 2.197224 17 +engin 2 297 1.098612 2.197224 20 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +graduat 2 215 1.386294 2.772588 31 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +updat 1 191 1.609438 1.609438 41 +public 1 202 1.609438 1.609438 43 +contact 1 153 1.791759 1.791759 59 +recent 1 167 1.791759 1.791759 58 +year 8 148 1.945910 15.567280 84 +first 2 140 1.945910 3.891820 71 +professor 1 137 1.945910 1.945910 76 +like 1 132 1.945910 1.945910 81 +seattl 2 120 2.079442 4.158884 103 +high 2 130 2.079442 4.158884 101 +schedul 1 119 2.079442 2.079442 85 +document 1 121 2.079442 2.079442 89 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +school 3 84 2.484907 7.454721 188 +second 2 81 2.484907 4.969814 166 +stuff 1 87 2.484907 2.484907 171 +come 1 78 2.564949 2.564949 202 +april 1 77 2.564949 2.564949 196 +june 1 79 2.564949 2.564949 214 +know 1 80 2.564949 2.564949 198 +html 3 75 2.639057 7.917171 235 +sieg 1 69 2.708050 2.708050 260 +receiv 1 66 2.708050 2.708050 244 +collect 1 65 2.772589 2.772589 268 +best 1 59 2.833213 2.833213 299 +march 1 61 2.833213 2.833213 295 +variou 1 56 2.890372 2.890372 317 +undergradu 1 54 2.944439 2.944439 338 +friend 1 48 3.044522 3.044522 376 +move 1 47 3.091042 3.091042 382 +done 1 47 3.091042 3.091042 381 +anoth 1 45 3.135494 3.135494 408 +better 1 45 3.135494 3.135494 401 +made 1 44 3.135494 3.135494 398 +around 2 43 3.178054 6.356108 415 +might 1 41 3.218876 3.218876 426 +live 1 40 3.258097 3.258097 451 +annual 1 40 3.258097 3.258097 458 +paul 1 38 3.295837 3.295837 471 +electr 1 38 3.295837 3.295837 461 +jame 1 35 3.401197 3.401197 507 +return 1 34 3.401197 3.401197 502 +everi 1 34 3.401197 3.401197 519 +taught 3 33 3.433987 10.301961 526 +express 1 32 3.465736 3.465736 540 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +travel 1 30 3.555348 3.555348 579 +univ 1 28 3.610918 3.610918 617 +usual 1 28 3.610918 3.610918 608 +hope 1 28 3.610918 3.610918 610 +concern 1 25 3.737670 3.737670 666 +hill 1 25 3.737670 3.737670 670 +never 1 25 3.737670 3.737670 671 +togeth 1 23 3.806662 3.806662 714 +dai 2 22 3.850148 7.700296 753 +inth 1 22 3.850148 3.850148 741 +love 1 21 3.912023 3.912023 804 +tell 1 21 3.912023 3.912023 777 +rout 1 21 3.912023 3.912023 793 +longer 1 20 3.951244 3.951244 816 +north 1 19 4.007333 4.007333 873 +particularli 1 19 4.007333 4.007333 867 +offici 1 18 4.060443 4.060443 894 +lot 1 18 4.060443 4.060443 889 +took 1 16 4.174387 4.174387 1010 +easi 1 16 4.174387 4.174387 969 +prolog 3 13 4.382027 13.146081 1155 +someon 1 13 4.382027 4.382027 1128 +everyon 1 13 4.382027 4.382027 1148 +brother 1 13 4.382027 4.382027 1189 +scan 1 12 4.465908 4.465908 1243 +rememb 1 12 4.465908 4.465908 1217 +rest 1 12 4.465908 4.465908 1259 +danc 1 12 4.465908 4.465908 1278 +chri 3 11 4.553877 13.661631 1311 +bike 6 10 4.653960 27.923760 1468 +franklin 2 10 4.653960 9.307920 1436 +rapid 1 10 4.653960 4.653960 1453 +town 1 10 4.653960 4.653960 1458 +folk 1 9 4.753590 4.753590 1597 +pagei 1 8 4.875197 4.875197 1683 +character 1 8 4.875197 4.875197 1767 +on 1 8 4.875197 4.875197 1628 +andcomput 1 8 4.875197 4.875197 1623 +partner 1 8 4.875197 4.875197 1648 +ride 1 8 4.875197 4.875197 1741 +davi 4 7 5.010635 20.042540 1888 +fortun 1 7 5.010635 5.010635 1872 +necessarili 1 7 5.010635 5.010635 1899 +throughout 1 7 5.010635 5.010635 1871 +portland 1 7 5.010635 5.010635 1878 +header 1 7 5.010635 5.010635 1787 +somewher 1 6 5.164786 5.164786 2176 +restaur 1 6 5.164786 5.164786 2230 +drop 1 6 5.164786 5.164786 2008 +poem 1 5 5.347108 5.347108 2483 +exchang 1 5 5.347108 5.347108 2310 +hire 1 4 5.568345 5.568345 2976 +glad 1 4 5.568345 5.568345 2657 +tend 1 4 5.568345 5.568345 3041 +kept 1 4 5.568345 5.568345 2762 +pagepaul 1 3 5.857933 5.857933 3669 +thesumm 1 3 5.857933 5.857933 3763 +matt 1 3 5.857933 5.857933 3792 +flat 1 3 5.857933 5.857933 3853 +aboutthi 1 2 6.263398 6.263398 5627 +oneof 1 2 6.263398 6.263398 4674 +diploma 1 2 6.263398 6.263398 5990 +bergen 1 2 6.263398 6.263398 5991 +itin 1 2 6.263398 6.263398 5992 +toseattl 1 2 6.263398 6.263398 5919 +myoffic 1 1 6.957497 6.957497 16155 +iliv 1 1 6.957497 6.957497 16156 +norwegian 1 1 6.957497 6.957497 16157 +likea 1 1 6.957497 6.957497 16158 +localchines 1 1 6.957497 6.957497 16159 +mundan 1 1 6.957497 6.957497 16160 +stuffi 1 1 6.957497 6.957497 16161 +hotlink 1 1 6.957497 6.957497 16162 +pagesstuff 1 1 6.957497 6.957497 16163 +maintainmi 1 1 6.957497 6.957497 16164 +mewher 1 1 6.957497 6.957497 16165 +inmorgan 1 1 6.957497 6.957497 16166 +fromuc 1 1 6.957497 6.957497 16167 +andy 1 1 6.957497 6.957497 16168 +ididn 1 1 6.957497 6.957497 16169 +ialso 1 1 6.957497 6.957497 16170 +stuffwhil 1 1 6.957497 6.957497 16171 +relatedact 1 1 6.957497 6.957497 16172 +evengot 1 1 6.957497 6.957497 16173 +marriag 1 1 6.957497 6.957497 16174 +joann 1 1 6.957497 6.957497 16175 +anexcus 1 1 6.957497 6.957497 16176 +ofbergen 1 1 6.957497 6.957497 16177 +hillier 1 1 6.957497 6.957497 16178 +returnedto 1 1 6.957497 6.957497 16179 +rollerblad 1 1 6.957497 6.957497 16180 +wasnow 1 1 6.957497 6.957497 16181 +hewlettpackard 1 1 6.957497 6.957497 16182 +vengeanc 1 1 6.957497 6.957497 16183 +intwo 1 1 6.957497 6.957497 16184 +inseason 1 1 6.957497 6.957497 16185 +justcommut 1 1 6.957497 6.957497 16186 +lindyhop 1 1 6.957497 6.957497 16187 +linethat 1 1 6.957497 6.957497 16188 +doctyp 1 1 6.957497 6.957497 16189 +ietf 1 1 6.957497 6.957497 16190 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ new file mode 100644 index 00000000..5f902a65 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^pighin^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +washington 1 236 1.386294 1.386294 32 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +implement 1 152 1.791759 1.791759 52 +like 2 132 1.945910 3.891820 81 +note 2 142 1.945910 3.891820 67 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +memori 1 101 2.302585 2.302585 139 +take 1 97 2.302585 2.302585 134 +pictur 2 89 2.397895 4.795790 160 +graphic 1 90 2.397895 2.397895 147 +proceed 1 93 2.397895 2.397895 152 +decemb 1 80 2.564949 2.564949 215 +name 1 72 2.639057 2.639057 220 +symposium 1 72 2.639057 2.639057 238 +collect 1 65 2.772589 2.772589 268 +found 1 53 2.944439 2.944439 337 +much 1 52 2.995732 2.995732 349 +principl 1 48 3.044522 3.044522 357 +quarter 1 47 3.091042 3.091042 389 +even 1 45 3.135494 3.135494 393 +might 3 41 3.218876 9.656628 426 +live 2 40 3.258097 6.516194 451 +movi 1 40 3.258097 3.258097 459 +workstat 1 37 3.332205 3.332205 479 +everi 2 34 3.401197 6.802394 519 +global 1 34 3.401197 3.401197 520 +michael 1 35 3.401197 3.401197 514 +taught 1 33 3.433987 3.433987 526 +often 1 31 3.496508 3.496508 551 +cluster 1 28 3.610918 3.610918 612 +although 1 25 3.737670 3.737670 667 +william 1 22 3.850148 3.850148 765 +minut 2 20 3.951244 7.902488 810 +wonder 1 20 3.951244 3.951244 815 +mpeg 1 20 3.951244 3.951244 831 +supervis 1 20 3.951244 3.951244 840 +citi 1 19 4.007333 4.007333 874 +five 1 19 4.007333 4.007333 841 +otherwis 1 17 4.110874 4.110874 922 +action 2 15 4.248495 8.496990 1038 +qual 1 15 4.248495 4.248495 1062 +comic 1 14 4.317488 4.317488 1103 +levi 1 14 4.317488 4.317488 1093 +squar 1 14 4.317488 4.317488 1082 +karlin 3 13 4.382027 13.146081 1176 +nick 1 13 4.382027 4.382027 1180 +anna 3 12 4.465908 13.397724 1292 +rest 1 12 4.465908 4.465908 1259 +song 1 11 4.553877 4.553877 1380 +henri 1 10 4.653960 4.653960 1417 +swim 1 9 4.753590 4.753590 1599 +french 1 9 4.753590 4.753590 1511 +morgan 1 9 4.753590 4.753590 1484 +juan 1 9 4.753590 4.753590 1580 +tourist 1 8 4.875197 4.875197 1710 +surpris 1 7 5.010635 5.010635 1828 +feelei 1 7 5.010635 5.010635 1859 +rain 2 6 5.164786 10.329572 2137 +cat 1 6 5.164786 5.164786 2194 +pari 1 6 5.164786 5.164786 2158 +lucki 1 6 5.164786 5.164786 2163 +gui 1 5 5.347108 5.347108 2573 +british 1 5 5.347108 5.347108 2546 +formerli 1 5 5.347108 5.347108 2397 +door 1 5 5.347108 5.347108 2291 +darren 1 5 5.347108 5.347108 2565 +pighin 3 4 5.568345 16.705035 2735 +ta 1 4 5.568345 5.568345 3058 +breath 1 4 5.568345 5.568345 2946 +dark 1 4 5.568345 5.568345 2910 +berlin 1 3 5.857933 5.857933 3263 +marin 1 3 5.857933 5.857933 3947 +freder 1 3 5.857933 5.857933 3352 +thekkath 1 3 5.857933 5.857933 3973 +monti 1 2 6.263398 6.263398 4993 +python 1 2 6.263398 6.263398 4994 +cave 1 2 6.263398 6.263398 4959 +italian 1 2 6.263398 6.263398 5993 +simpson 1 2 6.263398 6.263398 5994 +chandramohan 1 2 6.263398 6.263398 5965 +refresh 2 1 6.957497 13.914994 16191 +frdric 1 1 6.957497 6.957497 16192 +lcommun 1 1 6.957497 6.957497 16193 +dani 1 1 6.957497 6.957497 16194 +corto 1 1 6.957497 6.957497 16195 +maltes 1 1 6.957497 6.957497 16196 +venis 1 1 6.957497 6.957497 16197 +traditionn 1 1 6.957497 6.957497 16198 +systemher 1 1 6.957497 6.957497 16199 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ new file mode 100644 index 00000000..89823af2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rea^ @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +seattl 1 120 2.079442 2.079442 103 +map 1 39 3.258097 3.258097 452 +anderson 2 19 4.007333 8.014666 860 +brother 1 13 4.382027 4.382027 1189 +ruth 2 7 5.010635 10.021270 1870 +wxyc 1 1 6.957497 6.957497 16200 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ new file mode 100644 index 00000000..999f900f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^redstone^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +seattl 1 120 2.079442 2.079442 103 +redston 3 3 5.857933 17.573799 3332 +joshua 1 3 5.857933 5.857933 3333 +josh 1 2 6.263398 6.263398 5945 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ new file mode 100644 index 00000000..615b9449 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rex^ @@ -0,0 +1,163 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +base 2 165 1.791759 3.583518 50 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +professor 2 137 1.945910 3.891820 76 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +seattl 2 120 2.079442 4.158884 103 +databas 2 122 2.079442 4.158884 86 +manag 2 114 2.197225 4.394450 125 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +part 1 98 2.302585 2.302585 129 +advanc 1 99 2.302585 2.302585 130 +book 1 99 2.302585 2.302585 131 +imag 2 91 2.397895 4.795790 161 +pictur 1 89 2.397895 2.397895 160 +build 2 85 2.484907 4.969814 184 +librari 2 87 2.484907 4.969814 181 +good 1 77 2.564949 2.564949 200 +onlin 2 75 2.639057 5.278114 223 +line 1 75 2.639057 2.639057 231 +write 1 72 2.639057 2.639057 222 +knowledg 1 67 2.708050 2.708050 243 +would 1 67 2.708050 2.708050 251 +java 1 70 2.708050 2.708050 248 +interact 1 62 2.772589 2.772589 270 +visit 1 63 2.772589 2.772589 288 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +local 1 55 2.944439 2.944439 334 +extens 1 53 2.944439 2.944439 340 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +run 1 51 2.995732 2.995732 347 +profession 1 51 2.995732 2.995732 345 +get 1 46 3.091042 3.091042 380 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +live 2 40 3.258097 6.516194 451 +probabl 1 40 3.258097 3.258097 455 +map 1 39 3.258097 3.258097 452 +movi 1 40 3.258097 3.258097 459 +multi 1 36 3.367296 3.367296 493 +everi 1 34 3.401197 3.401197 519 +jame 1 35 3.401197 3.401197 507 +word 1 34 3.401197 3.401197 508 +taught 1 33 3.433987 3.433987 526 +human 1 32 3.465736 3.465736 546 +framework 1 28 3.610918 3.610918 606 +team 1 27 3.637586 3.637586 625 +great 1 27 3.637586 3.637586 626 +todai 1 25 3.737670 3.737670 672 +alwai 1 24 3.761200 3.761200 691 +daili 1 24 3.761200 3.761200 706 +famili 1 23 3.806662 3.806662 735 +ofwashington 1 22 3.850148 3.850148 766 +color 1 22 3.850148 3.850148 762 +wonder 1 20 3.951244 3.951244 815 +nice 1 20 3.951244 3.951244 809 +toolkit 1 20 3.951244 3.951244 835 +expert 1 20 3.951244 3.951244 833 +wrote 1 20 3.951244 3.951244 830 +citi 1 19 4.007333 4.007333 874 +media 1 19 4.007333 4.007333 861 +els 1 19 4.007333 4.007333 843 +beauti 1 18 4.060443 4.060443 912 +stand 1 18 4.060443 4.060443 891 +repositori 1 17 4.110874 4.110874 932 +stat 1 17 4.110874 4.110874 924 +stock 1 16 4.174387 4.174387 1007 +psycholog 2 15 4.248495 8.496990 1054 +camera 2 14 4.317488 8.634976 1115 +happi 1 14 4.317488 4.317488 1079 +bodi 1 13 4.382027 4.382027 1178 +script 1 13 4.382027 4.382027 1171 +step 1 13 4.382027 4.382027 1138 +outsid 1 12 4.465908 4.465908 1219 +perl 1 11 4.553877 4.553877 1332 +magic 1 11 4.553877 4.553877 1358 +market 1 11 4.553877 4.553877 1361 +town 1 10 4.653960 4.653960 1458 +traffic 1 10 4.653960 4.653960 1421 +departmentof 1 9 4.753590 4.753590 1539 +clear 1 9 4.753590 4.753590 1488 +brain 1 8 4.875197 4.875197 1638 +rais 1 8 4.875197 4.875197 1711 +leon 1 8 4.875197 4.875197 1631 +chanc 1 7 5.010635 5.010635 1960 +vallei 1 7 5.010635 5.010635 1959 +footbal 1 7 5.010635 5.010635 1912 +rain 1 6 5.164786 5.164786 2137 +peek 1 6 5.164786 5.164786 2169 +camp 1 5 5.347108 5.347108 2545 +poem 1 5 5.347108 5.347108 2483 +fountain 1 4 5.568345 5.568345 3069 +catch 1 4 5.568345 5.568345 2602 +glimps 1 4 5.568345 5.568345 2778 +proud 1 4 5.568345 5.568345 2918 +leagu 1 4 5.568345 5.568345 3040 +fantasi 1 4 5.568345 5.568345 3055 +jakobovit 4 3 5.857933 23.431732 3913 +hawaii 2 3 5.857933 11.715866 3888 +foster 1 3 5.857933 5.857933 3159 +eddi 1 3 5.857933 5.857933 3896 +drag 1 3 5.857933 5.857933 3434 +mount 1 2 6.263398 6.263398 5995 +youcan 1 2 6.263398 6.263398 4373 +glori 1 2 6.263398 6.263398 5327 +consol 1 2 6.263398 6.263398 4048 +atla 1 2 6.263398 6.263398 5996 +databaseenviron 1 2 6.263398 6.263398 5792 +persistentprogram 1 2 6.263398 6.263398 5997 +creator 1 2 6.263398 6.263398 5998 +ladder 1 2 6.263398 6.263398 5858 +newslet 1 2 6.263398 6.263398 5860 +dian 1 2 6.263398 6.263398 5536 +judi 1 2 6.263398 6.263398 4442 +usai 1 1 6.957497 6.957497 16201 +thisup 1 1 6.957497 6.957497 16202 +weatherreport 1 1 6.957497 6.957497 16203 +sneak 1 1 6.957497 6.957497 16204 +drumhel 1 1 6.957497 6.957497 16205 +rainier 1 1 6.957497 6.957497 16206 +cleardai 1 1 6.957497 6.957497 16207 +astructur 1 1 6.957497 6.957497 16208 +anatomist 1 1 6.957497 6.957497 16209 +internetracquetbal 1 1 6.957497 6.957497 16210 +rotisseriebasebal 1 1 6.957497 6.957497 16211 +fromusa 1 1 6.957497 6.957497 16212 +africancichlid 1 1 6.957497 6.957497 16213 +honolulu 1 1 6.957497 6.957497 16214 +kalalau 1 1 6.957497 6.957497 16215 +gambl 1 1 6.957497 6.957497 16216 +darn 1 1 6.957497 6.957497 16217 +javafamili 1 1 6.957497 6.957497 16218 +mydad 1 1 6.957497 6.957497 16219 +whoi 1 1 6.957497 6.957497 16220 +polem 1 1 6.957497 6.957497 16221 +emanuel 1 1 6.957497 6.957497 16222 +swedenborg 1 1 6.957497 6.957497 16223 +nahl 1 1 6.957497 6.957497 16224 +whoprovid 1 1 6.957497 6.957497 16225 +realtor 1 1 6.957497 6.957497 16226 +uncl 1 1 6.957497 6.957497 16227 +bioscienc 1 1 6.957497 6.957497 16228 +bookmarksif 1 1 6.957497 6.957497 16229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ new file mode 100644 index 00000000..7e2587cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rgrimm^ @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +welcom 1 122 2.079442 2.079442 99 +right 1 48 3.044522 3.044522 363 +robert 1 30 3.555348 3.555348 567 +head 1 23 3.806662 3.806662 732 +reserv 1 20 3.951244 3.951244 808 +five 1 19 4.007333 4.007333 841 +photograph 1 15 4.248495 4.248495 1056 +galleri 1 13 4.382027 4.382027 1192 +twenti 1 5 5.347108 5.347108 2540 +twilight 1 1 6.957497 6.957497 16230 +grimm 1 1 6.957497 6.957497 16231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ new file mode 100644 index 00000000..84a023e8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^ @@ -0,0 +1,143 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +also 1 259 1.386294 1.386294 28 +paper 2 205 1.609438 3.218876 38 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +perform 3 143 1.945910 5.837730 74 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +memori 2 101 2.302585 4.605170 139 +peopl 1 96 2.302585 2.302585 132 +larg 1 82 2.484907 2.484907 168 +learn 1 86 2.484907 2.484907 170 +journal 1 83 2.484907 2.484907 183 +dynam 2 76 2.564949 5.129898 194 +appear 1 78 2.564949 2.564949 210 +addit 1 74 2.639057 2.639057 228 +sieg 1 69 2.708050 2.708050 260 +would 1 67 2.708050 2.708050 251 +polici 1 64 2.772589 2.772589 279 +abstract 1 62 2.772589 2.772589 276 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +direct 1 57 2.890372 2.890372 316 +three 1 54 2.944439 2.944439 330 +hardwar 1 51 2.995732 2.995732 350 +friend 2 48 3.044522 6.089044 376 +standard 1 48 3.044522 3.044522 365 +could 1 46 3.091042 3.091042 383 +physic 1 47 3.091042 3.091042 377 +mark 1 44 3.135494 3.135494 403 +cach 1 41 3.218876 3.218876 432 +edit 1 42 3.218876 3.218876 418 +realli 1 40 3.258097 3.258097 444 +map 1 39 3.258097 3.258097 452 +origin 1 38 3.295837 3.295837 472 +bibliographi 1 34 3.401197 3.401197 518 +eduoffic 1 33 3.433987 3.433987 531 +scientist 2 31 3.496508 6.993016 560 +travel 1 30 3.555348 3.555348 579 +american 1 27 3.637586 3.637586 634 +symbol 1 27 3.637586 3.637586 620 +subject 1 26 3.688879 3.688879 647 +hill 1 25 3.737670 3.737670 670 +interpret 1 24 3.761200 3.761200 686 +togeth 1 23 3.806662 3.806662 714 +reduc 1 22 3.850148 3.850148 759 +chen 3 21 3.912023 11.736069 791 +avoid 1 21 3.912023 3.912023 799 +hous 1 21 3.912023 3.912023 801 +wrote 1 20 3.951244 3.951244 830 +miss 1 19 4.007333 4.007333 866 +els 1 19 4.007333 4.007333 843 +bershad 5 18 4.060443 20.302215 902 +attend 1 18 4.060443 4.060443 893 +asplo 2 17 4.110874 8.221748 948 +thought 1 17 4.110874 4.110874 945 +chateau 1 16 4.174387 4.174387 997 +took 1 16 4.174387 4.174387 1010 +overhead 1 15 4.248495 4.248495 1035 +countri 1 15 4.248495 4.248495 1059 +levi 1 14 4.317488 4.317488 1093 +conduct 1 14 4.317488 4.317488 1065 +karlin 2 13 4.382027 8.764054 1176 +alan 1 13 4.382027 4.382027 1146 +brad 1 12 4.465908 4.465908 1264 +anna 1 12 4.465908 4.465908 1292 +promot 1 12 4.465908 4.465908 1235 +food 1 12 4.465908 4.465908 1285 +speak 1 12 4.465908 4.465908 1283 +isca 2 11 4.553877 9.107754 1354 +smart 1 11 4.553877 4.553877 1352 +denni 1 11 4.553877 4.553877 1321 +baer 1 11 4.553877 4.553877 1353 +wong 2 9 4.753590 9.507180 1609 +osdi 1 9 4.753590 4.753590 1534 +voelker 1 9 4.753590 4.753590 1557 +said 1 9 4.753590 4.753590 1571 +didn 1 9 4.753590 4.753590 1563 +romer 10 8 4.875197 48.751970 1706 +wayn 1 8 4.875197 4.875197 1738 +judg 1 8 4.875197 4.875197 1644 +interestsi 1 7 5.010635 5.010635 1969 +supportfor 1 7 5.010635 5.010635 1854 +tip 1 7 5.010635 5.010635 1863 +conflict 2 6 5.164786 10.329572 2041 +theproject 1 6 5.164786 5.164786 1981 +wolman 1 6 5.164786 5.164786 2093 +edward 1 6 5.164786 5.164786 2050 +ohlrich 2 5 5.347108 10.694216 2564 +hair 1 5 5.347108 5.347108 2446 +ration 1 5 5.347108 5.347108 2427 +unknown 1 5 5.347108 5.347108 2318 +tuft 1 5 5.347108 5.347108 2575 +couldn 1 4 5.568345 5.568345 2977 +accompani 1 4 5.568345 5.568345 2666 +mappedcach 1 3 5.857933 5.857933 3928 +knee 1 3 5.857933 5.857933 3980 +surgeri 1 3 5.857933 5.857933 3975 +europ 1 3 5.857933 5.857933 3761 +lunch 1 3 5.857933 5.857933 3369 +father 1 3 5.857933 5.857933 3757 +systemswith 1 2 6.263398 6.263398 5342 +eustac 1 2 6.263398 6.263398 5866 +onlinesuperpag 1 2 6.263398 6.263398 5819 +resolutionon 1 2 6.263398 6.263398 5867 +stuffa 1 2 6.263398 6.263398 5999 +rai 1 2 6.263398 6.263398 5915 +ticker 1 2 6.263398 6.263398 5247 +likebrian 1 1 6.957497 6.957497 16232 +andwayn 1 1 6.957497 6.957497 16233 +ofinterpret 1 1 6.957497 6.957497 16234 +rockyhom 1 1 6.957497 6.957497 16235 +lobo 1 1 6.957497 6.957497 16236 +listrandom 1 1 6.957497 6.957497 16237 +limb 1 1 6.957497 6.957497 16238 +arthroscop 1 1 6.957497 6.957497 16239 +wrist 1 1 6.957497 6.957497 16240 +dylansaid 1 1 6.957497 6.957497 16241 +flowbe 1 1 6.957497 6.957497 16242 +beingexperiment 1 1 6.957497 6.957497 16243 +somepictur 1 1 6.957497 6.957497 16244 +eatsomeon 1 1 6.957497 6.957497 16245 +sincer 1 1 6.957497 6.957497 16246 +forexampl 1 1 6.957497 6.957497 16247 +leftth 1 1 6.957497 6.957497 16248 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html new file mode 100644 index 00000000..50c8035f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^memsys^index.html @@ -0,0 +1,158 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +univers 4 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 10 443 0.693147 6.931470 6 +research 3 431 0.693147 2.079441 10 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +project 6 340 1.098612 6.591672 18 +us 4 329 1.098612 4.394448 16 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +washington 10 236 1.386294 13.862940 32 +also 1 259 1.386294 1.386294 28 +paper 2 205 1.609438 3.218876 38 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +applic 3 170 1.791759 5.375277 56 +algorithm 2 162 1.791759 3.583518 57 +recent 1 167 1.791759 1.791759 58 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +perform 6 143 1.945910 11.675460 74 +support 2 132 1.945910 3.891820 83 +construct 2 139 1.945910 3.891820 82 +problem 1 147 1.945910 1.945910 75 +architectur 1 139 1.945910 1.945910 77 +report 2 131 2.079442 4.158884 92 +seattl 1 120 2.079442 2.079442 103 +welcom 1 122 2.079442 2.079442 99 +intern 2 108 2.197225 4.394450 128 +look 1 107 2.197225 2.197225 115 +code 1 108 2.197225 2.197225 116 +memori 10 101 2.302585 23.025850 139 +techniqu 1 99 2.302585 2.302585 138 +follow 1 92 2.397895 2.397895 143 +larg 1 82 2.484907 2.484907 168 +dynam 4 76 2.564949 10.259796 194 +appear 2 78 2.564949 5.129898 210 +sourc 1 77 2.564949 2.564949 201 +refer 1 78 2.564949 2.564949 203 +good 1 77 2.564949 2.564949 200 +master 1 76 2.564949 2.564949 216 +onlin 2 75 2.639057 5.278114 223 +order 1 69 2.708050 2.708050 249 +simul 1 66 2.708050 2.708050 255 +would 1 67 2.708050 2.708050 251 +differ 1 66 2.708050 2.708050 253 +polici 5 64 2.772589 13.862945 279 +improv 3 62 2.772589 8.317767 289 +descript 2 64 2.772589 5.545178 271 +collect 1 65 2.772589 2.772589 268 +result 1 65 2.772589 2.772589 281 +copi 1 63 2.772589 2.772589 284 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +sever 1 56 2.890372 2.890372 322 +space 1 57 2.890372 2.890372 310 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +cover 1 55 2.944439 2.944439 329 +instruct 1 53 2.944439 2.944439 332 +hardwar 2 51 2.995732 5.991464 350 +investig 1 51 2.995732 2.995732 353 +without 1 50 3.044522 3.044522 370 +standard 1 48 3.044522 3.044522 365 +featur 1 46 3.091042 3.091042 386 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +mechan 2 43 3.178054 6.356108 416 +cach 4 41 3.218876 12.875504 432 +combin 1 42 3.218876 3.218876 421 +futur 1 41 3.218876 3.218876 427 +small 2 39 3.258097 6.516194 447 +map 2 39 3.258097 6.516194 452 +brian 1 38 3.295837 3.295837 466 +cost 2 37 3.332205 6.664410 480 +bibliographi 1 34 3.401197 3.401197 518 +compon 1 30 3.555348 3.555348 570 +pattern 1 24 3.761200 3.761200 689 +size 3 23 3.806662 11.419986 713 +identifi 3 22 3.850148 11.550444 760 +reduc 2 22 3.850148 7.700296 759 +chen 2 21 3.912023 7.824046 791 +avoid 1 21 3.912023 3.912023 799 +miss 4 19 4.007333 16.029332 866 +region 2 19 4.007333 8.014666 875 +runtim 1 19 4.007333 4.007333 858 +comparison 1 19 4.007333 4.007333 863 +bershad 5 18 4.060443 20.302215 902 +behavior 1 18 4.060443 4.060443 881 +monitor 4 17 4.110874 16.443496 941 +modif 1 17 4.110874 4.110874 913 +asplo 1 17 4.110874 4.110874 948 +overhead 4 15 4.248495 16.993980 1035 +qual 3 15 4.248495 12.745485 1062 +karlin 3 13 4.382027 13.146081 1176 +whose 1 13 4.382027 4.382027 1166 +someon 1 13 4.382027 4.382027 1128 +resolut 1 13 4.382027 4.382027 1172 +promot 2 12 4.465908 8.931816 1235 +overal 1 12 4.465908 4.465908 1254 +anna 1 12 4.465908 4.465908 1292 +isca 2 11 4.553877 9.107754 1354 +denni 1 11 4.553877 4.553877 1321 +alpha 1 11 4.553877 4.553877 1348 +operatingsystem 1 10 4.653960 4.653960 1401 +reli 1 10 4.653960 4.653960 1411 +wong 2 9 4.753590 9.507180 1609 +significantli 1 9 4.753590 4.753590 1508 +osdi 1 9 4.753590 4.753590 1534 +romer 7 8 4.875197 34.126379 1706 +wayn 2 8 4.875197 9.750394 1738 +poor 1 8 4.875197 4.875197 1736 +potenti 1 8 4.875197 4.875197 1690 +delai 1 7 5.010635 5.010635 1848 +larger 1 7 5.010635 5.010635 1875 +reduct 1 7 5.010635 5.010635 1877 +conflict 2 6 5.164786 10.329572 2041 +benefit 1 6 5.164786 5.164786 2213 +ohlrich 3 5 5.347108 16.041324 2564 +resolv 1 4 5.568345 5.568345 2675 +bottleneck 1 4 5.568345 5.568345 2769 +mip 1 4 5.568345 5.568345 2738 +superpag 6 3 5.857933 35.147598 3978 +peoplefaculti 1 3 5.857933 5.857933 3981 +dlee 1 3 5.857933 5.857933 3949 +waynew 1 3 5.857933 5.857933 3982 +reorder 1 3 5.857933 5.857933 3952 +fragment 2 2 6.263398 12.526796 6000 +contigu 1 2 6.263398 6.263398 6001 +warrant 1 2 6.263398 6.263398 5697 +washingtonmemori 1 1 6.957497 6.957497 16249 +researchdepart 1 1 6.957497 6.957497 16250 +sharesth 1 1 6.957497 6.957497 16251 +incur 1 1 6.957497 6.957497 16252 +monitorappl 1 1 6.957497 6.957497 16253 +resolvetlb 1 1 6.957497 6.957497 16254 +tlbi 1 1 6.957497 6.957497 16255 +severalmodern 1 1 6.957497 6.957497 16256 +amultipl 1 1 6.957497 6.957497 16257 +tlbperform 1 1 6.957497 6.957497 16258 +ofwast 1 1 6.957497 6.957497 16259 +todiffer 1 1 6.957497 6.957497 16260 +constructingsuperpag 1 1 6.957497 6.957497 16261 +ofmemori 1 1 6.957497 6.957497 16262 +balancesth 1 1 6.957497 6.957497 16263 +tlbmiss 1 1 6.957497 6.957497 16264 +memorycopi 1 1 6.957497 6.957497 16265 +misspattern 1 1 6.957497 6.957497 16266 +attain 1 1 6.957497 6.957497 16267 +largepag 1 1 6.957497 6.957497 16268 +makea 1 1 6.957497 6.957497 16269 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ new file mode 100644 index 00000000..d3020294 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^romer^rocky^ @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +project 4 340 1.098612 4.394448 18 +last 2 314 1.098612 2.197224 14 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +washington 9 236 1.386294 12.476646 32 +languag 6 227 1.386294 8.317764 26 +updat 1 191 1.609438 1.609438 41 +implement 2 152 1.791759 3.583518 52 +avail 2 169 1.791759 3.583518 48 +applic 1 170 1.791759 1.791759 56 +read 1 154 1.791759 1.791759 47 +perform 6 143 1.945910 11.675460 74 +architectur 3 139 1.945910 5.837730 77 +year 1 148 1.945910 1.945910 84 +file 1 132 1.945910 1.945910 70 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +document 1 121 2.079442 2.079442 89 +intern 2 108 2.197225 4.394450 128 +structur 1 106 2.197225 2.197225 105 +part 1 98 2.302585 2.302585 129 +techniqu 1 99 2.302585 2.302585 138 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +environ 2 84 2.484907 4.969814 177 +build 1 85 2.484907 2.484907 184 +appear 1 78 2.564949 2.564949 210 +sourc 1 77 2.564949 2.564949 201 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +differ 1 66 2.708050 2.708050 253 +collect 2 65 2.772589 5.545178 268 +descript 1 64 2.772589 2.772589 271 +evalu 1 64 2.772589 2.772589 266 +juli 1 60 2.833213 2.833213 305 +sever 3 56 2.890372 8.671116 322 +variou 1 56 2.890372 2.890372 317 +processor 1 54 2.944439 2.944439 335 +execut 1 45 3.135494 3.135494 404 +examin 1 42 3.218876 3.218876 424 +brian 1 38 3.295837 3.295837 466 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +strategi 1 25 3.737670 3.737670 682 +trace 1 25 3.737670 3.737670 677 +interpret 7 24 3.761200 26.328400 686 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +portabl 1 20 3.951244 3.951244 819 +safeti 1 20 3.951244 3.951244 817 +basi 1 20 3.951244 3.951244 828 +binari 1 20 3.951244 3.951244 823 +benchmark 2 19 4.007333 8.014666 859 +bershad 3 18 4.060443 12.181329 902 +asplo 1 17 4.110874 4.110874 948 +levi 3 14 4.317488 12.952464 1093 +demand 1 14 4.317488 4.317488 1073 +characterist 1 12 4.465908 4.465908 1257 +baer 3 11 4.553877 13.661631 1353 +perl 1 11 4.553877 4.553877 1332 +denni 1 11 4.553877 4.553877 1321 +perspect 1 10 4.653960 4.653960 1437 +jean 1 10 4.653960 4.653960 1440 +henri 1 10 4.653960 4.653960 1417 +voelker 3 9 4.753590 14.260770 1557 +wong 2 9 4.753590 9.507180 1609 +romer 3 8 4.875197 14.625591 1706 +gain 1 8 4.875197 4.875197 1730 +wayn 1 8 4.875197 4.875197 1738 +instrument 1 7 5.010635 5.010635 1954 +wolman 3 6 5.164786 15.494358 2093 +loup 1 6 5.164786 5.164786 2228 +geoff 1 6 5.164786 5.164786 2124 +eas 1 5 5.347108 5.347108 2267 +alec 1 5 5.347108 5.347108 2563 +rewrit 1 5 5.347108 5.347108 2367 +rocki 2 4 5.568345 11.136690 3048 +etch 2 4 5.568345 11.136690 2755 +increasingli 1 4 5.568345 5.568345 2766 +popular 1 4 5.568345 5.568345 2802 +insight 1 4 5.568345 5.568345 3024 +peoplefaculti 1 3 5.857933 5.857933 3981 +dlee 1 3 5.857933 5.857933 3949 +waynew 1 3 5.857933 5.857933 3982 +microbenchmark 1 2 6.263398 6.263398 5821 +mipsi 1 2 6.263398 6.263398 5882 +papersrom 1 1 6.957497 6.957497 16270 +abstractpostscriptjava 1 1 6.957497 6.957497 16271 +xjava 1 1 6.957497 6.957497 16272 +benchmarkstoolsto 1 1 6.957497 6.957497 16273 +vebeen 1 1 6.957497 6.957497 16274 +yetpublicli 1 1 6.957497 6.957497 16275 +etchhom 1 1 6.957497 6.957497 16276 +documentationproject 1 1 6.957497 6.957497 16277 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ new file mode 100644 index 00000000..3af34c81 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^rrogers^ @@ -0,0 +1,99 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 6 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +work 2 380 0.693147 1.386294 9 +program 2 374 0.693147 1.386294 7 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +washington 3 236 1.386294 4.158882 32 +softwar 3 220 1.386294 4.158882 30 +also 2 259 1.386294 2.772588 28 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +phone 2 175 1.791759 3.583518 45 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +process 2 142 1.945910 3.891820 72 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +seattl 3 120 2.079442 6.238326 103 +document 2 121 2.079442 4.158884 89 +machin 1 129 2.079442 2.079442 95 +compil 1 122 2.079442 2.079442 96 +databas 1 122 2.079442 2.079442 86 +imag 2 91 2.397895 4.795790 161 +commun 1 95 2.397895 2.397895 157 +center 1 88 2.397895 2.397895 158 +grade 1 90 2.397895 2.397895 142 +librari 1 87 2.484907 2.484907 181 +help 1 83 2.484907 2.484907 175 +educ 1 86 2.484907 2.484907 191 +intellig 2 72 2.639057 5.278114 225 +nation 2 74 2.639057 5.278114 240 +sieg 1 69 2.708050 2.708050 260 +simul 1 66 2.708050 2.708050 255 +order 1 69 2.708050 2.708050 249 +foundat 1 62 2.772589 2.772589 286 +best 1 59 2.833213 2.833213 299 +summer 1 56 2.890372 2.890372 311 +februari 1 54 2.944439 2.944439 328 +basic 1 50 3.044522 3.044522 360 +offer 1 43 3.178054 3.178054 414 +long 1 43 3.178054 3.178054 413 +obtain 1 33 3.433987 3.433987 534 +richard 2 31 3.496508 6.993016 559 +produc 1 30 3.555348 3.555348 572 +arrai 1 27 3.637586 3.637586 627 +departmentunivers 1 24 3.761200 3.761200 711 +recognit 1 23 3.806662 3.806662 723 +director 1 22 3.850148 3.850148 767 +fund 1 21 3.912023 3.912023 805 +facil 1 20 3.951244 3.951244 814 +increas 1 20 3.951244 3.951244 829 +beauti 1 18 4.060443 4.060443 912 +chateau 1 16 4.174387 4.174387 997 +massiv 1 15 4.248495 4.248495 1026 +charact 1 15 4.248495 4.248495 1028 +optic 1 12 4.465908 4.465908 1221 +minor 1 12 4.465908 4.465908 1237 +island 1 11 4.553877 4.553877 1345 +length 1 10 4.653960 4.653960 1400 +juan 1 9 4.753590 4.753590 1580 +extract 1 8 4.875197 4.875197 1728 +roger 1 7 5.010635 5.010635 1892 +northwest 1 7 5.010635 5.010635 1973 +usaoffic 1 6 5.164786 5.164786 2159 +layout 1 6 5.164786 5.164786 2183 +camp 2 5 5.347108 10.694216 2545 +educomput 1 5 5.347108 5.347108 2524 +cellular 1 5 5.347108 5.347108 2433 +girl 1 5 5.347108 5.347108 2410 +snake 1 5 5.347108 5.347108 2281 +radio 1 4 5.568345 5.568345 3025 +bake 1 2 6.263398 6.263398 4468 +scam 2 1 6.957497 13.914994 16278 +splash 2 1 6.957497 13.914994 16279 +rogersrrog 1 1 6.957497 6.957497 16280 +laboratri 1 1 6.957497 6.957497 16281 +systol 1 1 6.957497 6.957497 16282 +morpholog 1 1 6.957497 6.957497 16283 +groundtruth 1 1 6.957497 6.957497 16284 +environment 1 1 6.957497 6.957497 16285 +ncee 1 1 6.957497 6.957497 16286 +ag 1 1 6.957497 6.957497 16287 +corn 1 1 6.957497 6.957497 16288 +jessica 1 1 6.957497 6.957497 16289 +squishi 1 1 6.957497 6.957497 16290 +kuow 1 1 6.957497 6.957497 16291 +stationi 1 1 6.957497 6.957497 16292 +pecan 1 1 6.957497 6.957497 16293 +seattlelast 1 1 6.957497 6.957497 16294 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^ruzzo @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salesin b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salesin new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salesin @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ new file mode 100644 index 00000000..d11e093a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^salisbur^ @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +seattl 1 120 2.079442 2.079442 103 +school 1 84 2.484907 2.484907 188 +stuff 1 87 2.484907 2.484907 171 +sieg 1 69 2.708050 2.708050 260 +friend 1 48 3.044522 3.044522 376 +mike 2 24 3.761200 7.522400 703 +chateau 1 16 4.174387 4.174387 997 +usaoffic 1 6 5.164786 5.164786 2159 +salisburysalisbur 1 1 6.957497 6.957497 16295 +lifehistori 1 1 6.957497 6.957497 16296 +vitacool 1 1 6.957497 6.957497 16297 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ new file mode 100644 index 00000000..d5075bf0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^savage^ @@ -0,0 +1,178 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 22 443 0.693147 15.249234 6 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +time 3 293 1.098612 3.295836 17 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +washington 3 236 1.386294 4.158882 32 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +languag 1 227 1.386294 1.386294 26 +oper 17 180 1.609438 27.360446 34 +paper 6 205 1.609438 9.656628 38 +applic 6 170 1.791759 10.750554 56 +implement 1 152 1.791759 1.791759 52 +first 5 140 1.945910 9.729550 71 +support 4 132 1.945910 7.783640 83 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +construct 1 139 1.945910 1.945910 82 +seattl 2 120 2.079442 4.158884 103 +compil 2 122 2.079442 4.158884 96 +report 2 131 2.079442 4.158884 92 +confer 2 126 2.079442 4.158884 100 +provid 1 121 2.079442 2.079442 94 +version 2 113 2.197225 4.394450 122 +specif 2 106 2.197225 4.394450 106 +find 1 111 2.197225 2.197225 111 +code 1 108 2.197225 2.197225 116 +topic 1 114 2.197225 2.197225 110 +intern 1 108 2.197225 2.197225 128 +manag 1 114 2.197225 2.197225 125 +need 2 98 2.302585 4.605170 135 +technic 1 100 2.302585 2.302585 140 +user 1 104 2.302585 2.302585 137 +proceed 10 93 2.397895 23.978950 152 +real 2 93 2.397895 4.795790 144 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +control 1 82 2.484907 2.484907 164 +thing 1 84 2.484907 2.484907 189 +novemb 1 81 2.484907 2.484907 179 +ieee 1 86 2.484907 2.484907 190 +issu 2 78 2.564949 5.129898 211 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +workshop 5 71 2.639057 13.195285 239 +symposium 3 72 2.639057 7.917171 238 +servic 2 72 2.639057 5.278114 236 +write 1 72 2.639057 2.639057 222 +multimedia 3 68 2.708050 8.124150 258 +integr 1 67 2.708050 2.708050 245 +abstract 2 62 2.772589 5.545178 276 +januari 2 62 2.772589 5.545178 264 +march 1 61 2.833213 2.833213 295 +best 1 59 2.833213 2.833213 299 +direct 1 57 2.890372 2.890372 316 +extens 6 53 2.944439 17.666634 340 +talk 4 53 2.944439 11.777756 336 +processor 4 54 2.944439 11.777756 335 +sampl 1 53 2.944439 2.944439 339 +right 1 48 3.044522 3.044522 363 +principl 1 48 3.044522 3.044522 357 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +third 1 43 3.178054 3.178054 412 +review 1 42 3.218876 3.218876 425 +music 1 42 3.218876 3.218876 436 +slide 3 38 3.295837 9.887511 467 +industri 2 38 3.295837 6.591674 464 +brian 1 38 3.295837 3.295837 466 +workstat 1 37 3.332205 3.332205 479 +winter 1 36 3.367296 3.367296 500 +tech 2 35 3.401197 6.802394 515 +post 1 35 3.401197 3.401197 505 +independ 1 32 3.465736 3.465736 548 +platform 1 29 3.583519 3.583519 591 +american 2 27 3.637586 7.275172 634 +quit 1 27 3.637586 3.637586 633 +arrai 1 27 3.637586 3.637586 627 +disk 1 22 3.850148 3.850148 747 +similar 1 21 3.912023 3.912023 771 +reserv 4 20 3.951244 15.804976 808 +safeti 1 20 3.951244 3.951244 817 +longer 1 20 3.951244 3.951244 816 +histori 1 19 4.007333 4.007333 853 +boston 1 19 4.007333 4.007333 862 +bershad 2 18 4.060443 8.120886 902 +protect 1 17 4.110874 4.110874 935 +modern 1 16 4.174387 4.174387 966 +match 1 16 4.174387 4.174387 965 +diego 1 16 4.174387 4.174387 992 +fourth 1 16 4.174387 4.174387 999 +spin 4 14 4.317488 17.269952 1121 +rank 1 14 4.317488 4.317488 1063 +mellon 1 13 4.382027 4.382027 1179 +usenix 3 12 4.465908 13.397724 1240 +rest 1 12 4.465908 4.465908 1259 +carnegi 1 12 4.465908 4.465908 1260 +statement 1 11 4.553877 4.553877 1313 +island 1 11 4.553877 4.553877 1345 +rich 1 10 4.653960 4.653960 1396 +sosp 1 10 4.653960 4.653960 1416 +mountain 1 10 4.653960 4.653960 1456 +santa 1 10 4.653960 4.653960 1441 +inter 1 9 4.753590 4.753590 1530 +modula 1 9 4.753590 4.753590 1613 +osdi 1 9 4.753590 4.753590 1534 +capac 3 8 4.875197 14.625591 1740 +mach 3 8 4.875197 14.625591 1669 +ride 1 8 4.875197 4.875197 1741 +isol 1 8 4.875197 4.875197 1663 +sigop 1 8 4.875197 4.875197 1727 +european 1 8 4.875197 4.875197 1763 +cultur 1 7 5.010635 5.010635 1951 +migrat 1 7 5.010635 5.010635 1851 +centuri 1 7 5.010635 5.010635 1935 +microprocessor 1 7 5.010635 5.010635 1808 +trend 1 7 5.010635 5.010635 1842 +fifth 1 7 5.010635 5.010635 1931 +sixth 1 7 5.010635 5.010635 1917 +band 1 6 5.164786 5.164786 2198 +unpublish 1 6 5.164786 5.164786 2226 +usag 1 6 5.164786 5.164786 2209 +favor 1 5 5.347108 5.347108 2414 +panel 1 5 5.347108 5.347108 2463 +savag 3 4 5.568345 16.705035 2777 +microkernel 2 4 5.568345 11.136690 3047 +afraid 2 4 5.568345 11.136690 3053 +gradual 1 4 5.568345 5.568345 2997 +witha 1 4 5.568345 5.568345 2617 +firm 1 4 5.568345 5.568345 2684 +peer 1 4 5.568345 5.568345 2742 +andimplement 1 4 5.568345 5.568345 3029 +redund 1 4 5.568345 5.568345 2839 +stefan 2 3 5.857933 11.715866 3921 +wcsss 2 3 5.857933 11.715866 3956 +caught 1 3 5.857933 5.857933 3465 +irrelev 1 3 5.857933 5.857933 3823 +inconveni 1 3 5.857933 5.857933 3866 +distract 1 3 5.857933 5.857933 3945 +stillmaintain 1 3 5.857933 5.857933 3964 +copper 1 3 5.857933 5.857933 3536 +hoto 1 3 5.857933 5.857933 3577 +orca 1 3 5.857933 5.857933 3578 +tucson 2 2 6.263398 12.526796 5883 +fool 1 2 6.263398 6.263398 5353 +ofappl 1 2 6.263398 6.263398 6002 +whichsupport 1 2 6.263398 6.263398 6003 +monterei 1 2 6.263398 6.263398 4362 +wwo 1 2 6.263398 6.263398 5812 +export 1 2 6.263398 6.263398 5689 +pittsburghfor 1 1 6.957497 6.957497 16298 +mnow 1 1 6.957497 6.957497 16299 +strongbackground 1 1 6.957497 6.957497 16300 +trash 1 1 6.957497 6.957497 16301 +tocqeuvil 1 1 6.957497 6.957497 16302 +tiresom 1 1 6.957497 6.957497 16303 +exercisepolit 1 1 6.957497 6.957497 16304 +tocurr 1 1 6.957497 6.957497 16305 +merri 1 1 6.957497 6.957497 16306 +onan 1 1 6.957497 6.957497 16307 +projectsspinspin 1 1 6.957497 6.957497 16308 +omnifemtokernel 1 1 6.957497 6.957497 16309 +writingspin 1 1 6.957497 6.957497 16310 +napa 1 1 6.957497 6.957497 16311 +timer 1 1 6.957497 6.957497 16312 +hikingthi 1 1 6.957497 6.957497 16313 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ new file mode 100644 index 00000000..a2e2aa70 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sds^ @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +david 1 71 2.639057 2.639057 232 +juli 1 60 2.833213 2.833213 305 +revis 1 26 3.688879 3.688879 640 +sean 2 8 4.875197 9.750394 1705 +sandi 2 4 5.568345 11.136690 2765 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ new file mode 100644 index 00000000..05474321 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^segal^ @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +washington 1 236 1.386294 1.386294 32 +public 1 202 1.609438 1.609438 43 +seattl 1 120 2.079442 2.079442 103 +postscript 1 131 2.079442 2.079442 90 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +internet 1 83 2.484907 2.484907 186 +overview 1 56 2.890372 2.890372 323 +better 1 45 3.135494 3.135494 401 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +richard 2 31 3.496508 6.993016 559 +famili 1 23 3.806662 3.806662 735 +half 1 21 3.912023 3.912023 776 +washingtonbox 1 13 4.382027 4.382027 1200 +ski 1 10 4.653960 4.653960 1471 +softbal 1 9 4.753590 4.753590 1594 +softbot 1 7 5.010635 5.010635 1974 +amus 1 5 5.347108 5.347108 2366 +racquetbal 1 4 5.568345 5.568345 3052 +biographi 1 3 5.857933 5.857933 3658 +brute 1 2 6.263398 6.263398 5892 +bicycl 1 2 6.263398 6.263398 5950 +segal 2 1 6.957497 13.914994 16314 +segaldepart 1 1 6.957497 6.957497 16315 +archeri 1 1 6.957497 6.957497 16316 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ new file mode 100644 index 00000000..6ae8d4fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sgberg^ @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +project 8 340 1.098612 8.788896 18 +current 2 284 1.098612 2.197224 21 +time 1 293 1.098612 1.098612 17 +washington 4 236 1.386294 5.545176 32 +softwar 2 220 1.386294 2.772588 30 +email 1 220 1.386294 1.386294 29 +phone 2 175 1.791759 3.583518 45 +implement 2 152 1.791759 3.583518 52 +address 1 170 1.791759 1.791759 62 +algorithm 1 162 1.791759 1.791759 57 +avail 1 169 1.791759 1.791759 48 +hall 1 146 1.945910 1.945910 65 +like 1 132 1.945910 1.945910 81 +seattl 3 120 2.079442 6.238326 103 +high 2 130 2.079442 4.158884 101 +spring 1 131 2.079442 2.079442 88 +machin 1 129 2.079442 2.079442 95 +studi 1 120 2.079442 2.079442 91 +compil 1 122 2.079442 2.079442 96 +teach 3 108 2.197225 6.591675 112 +assist 3 112 2.197225 6.591675 113 +code 1 108 2.197225 2.197225 116 +peopl 1 96 2.302585 2.302585 132 +pictur 2 89 2.397895 4.795790 160 +activ 4 84 2.484907 9.939628 182 +school 2 84 2.484907 4.969814 188 +solut 2 82 2.484907 4.969814 162 +contain 1 81 2.484907 2.484907 174 +complet 2 77 2.564949 5.129898 208 +come 2 78 2.564949 5.129898 202 +sourc 1 77 2.564949 2.564949 201 +resum 1 79 2.564949 2.564949 217 +line 1 75 2.639057 2.639057 231 +receiv 2 66 2.708050 5.416100 244 +main 2 67 2.708050 5.416100 256 +sieg 1 69 2.708050 2.708050 260 +share 2 59 2.833213 5.666426 304 +content 1 59 2.833213 2.833213 302 +hardwar 2 51 2.995732 5.991464 350 +finger 1 52 2.995732 2.995732 354 +date 1 51 2.995732 2.995732 344 +without 1 50 3.044522 3.044522 370 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +even 1 45 3.135494 3.135494 393 +around 1 43 3.178054 3.178054 415 +past 2 42 3.218876 6.437752 428 +linear 1 41 3.218876 3.218876 431 +field 1 37 3.332205 3.332205 482 +expect 1 37 3.332205 3.332205 484 +print 2 34 3.401197 6.802394 503 +return 1 34 3.401197 3.401197 502 +weather 1 28 3.610918 3.610918 618 +bookmark 1 26 3.688879 3.688879 639 +toward 1 25 3.737670 3.737670 668 +sometim 1 24 3.761200 3.761200 696 +honor 1 23 3.806662 3.806662 729 +sort 1 22 3.850148 3.850148 738 +born 1 21 3.912023 3.912023 798 +unit 1 21 3.912023 3.912023 779 +els 4 19 4.007333 16.029332 843 +comparison 2 19 4.007333 8.014666 863 +north 1 19 4.007333 4.007333 873 +germani 1 17 4.110874 4.110874 946 +bachelor 1 17 4.110874 4.110874 957 +qual 1 15 4.248495 4.248495 1062 +squar 1 14 4.317488 4.317488 1082 +warn 1 14 4.317488 4.317488 1068 +excit 1 11 4.553877 4.553877 1329 +moment 1 11 4.553877 4.553877 1379 +didn 1 9 4.753590 4.753590 1563 +exact 1 9 4.753590 4.753590 1509 +reduct 1 7 5.010635 5.010635 1877 +byte 2 6 5.164786 10.329572 2108 +indiana 1 6 5.164786 5.164786 2057 +trail 1 6 5.164786 5.164786 2071 +fewer 1 6 5.164786 5.164786 2074 +fals 2 4 5.568345 11.136690 2861 +raft 1 4 5.568345 5.568345 3060 +crazi 1 4 5.568345 5.568345 2822 +shouldn 1 4 5.568345 5.568345 2606 +stefan 4 3 5.857933 23.431732 3921 +char 6 2 6.263398 37.580388 4716 +berg 1 2 6.263398 6.263398 4970 +reif 1 2 6.263398 6.263398 5015 +diploma 1 2 6.263398 6.263398 5990 +bloomington 1 2 6.263398 6.263398 5034 +itin 1 2 6.263398 6.263398 5992 +printf 4 1 6.957497 27.829988 16317 +cologn 2 1 6.957497 13.914994 16318 +putchar 2 1 6.957497 13.914994 16319 +bergstefan 1 1 6.957497 6.957497 16320 +sgberg 1 1 6.957497 6.957497 16321 +mittler 1 1 6.957497 6.957497 16322 +thgrade 1 1 6.957497 6.957497 16323 +schillergymnasium 1 1 6.957497 6.957497 16324 +statesto 1 1 6.957497 6.957497 16325 +distinctionin 1 1 6.957497 6.957497 16326 +fromindiana 1 1 6.957497 6.957497 16327 +momenth 1 1 6.957497 6.957497 16328 +thiscenturi 1 1 6.957497 6.957497 16329 +yourselfsometh 1 1 6.957497 6.957497 16330 +particularsolut 1 1 6.957497 6.957497 16331 +sall 1 1 6.957497 6.957497 16332 +carriag 1 1 6.957497 6.957497 16333 +inpostscript 1 1 6.957497 6.957497 16334 +andtex 1 1 6.957497 6.957497 16335 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ new file mode 100644 index 00000000..85ac7ae7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shade^ @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +recent 1 167 1.791759 1.791759 58 +contact 1 153 1.791759 1.791759 59 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +schedul 1 119 2.079442 2.079442 85 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +make 1 111 2.197225 2.197225 120 +pictur 2 89 2.397895 4.795790 160 +follow 1 92 2.397895 2.397895 143 +mani 1 92 2.397895 2.397895 150 +graphic 1 90 2.397895 2.397895 147 +imag 1 91 2.397895 2.397895 161 +thing 2 84 2.484907 4.969814 189 +info 1 85 2.484907 2.484907 176 +wide 1 84 2.484907 2.484907 185 +differ 1 66 2.708050 2.708050 253 +interact 1 62 2.772589 2.772589 270 +complex 1 64 2.772589 2.772589 269 +laboratori 1 63 2.772589 2.772589 292 +plan 1 65 2.772589 2.772589 272 +browser 1 56 2.890372 2.890372 313 +get 1 46 3.091042 3.091042 380 +done 1 47 3.091042 3.091042 381 +third 1 43 3.178054 3.178054 412 +least 1 35 3.401197 3.401197 516 +go 1 33 3.433987 3.433987 529 +anim 1 31 3.496508 3.496508 557 +travel 1 30 3.555348 3.555348 579 +daili 1 24 3.761200 3.761200 706 +grad 1 20 3.951244 3.951244 837 +left 1 19 4.007333 4.007333 851 +lower 1 18 4.060443 4.060443 886 +lot 1 18 4.060443 4.060443 889 +scene 1 14 4.317488 4.317488 1114 +island 1 11 4.553877 4.553877 1345 +siggraph 1 8 4.875197 4.875197 1773 +corner 1 7 5.010635 5.010635 1909 +pixel 1 4 5.568345 5.568345 2831 +shortcut 1 3 5.857933 5.857933 3932 +ward 1 2 6.263398 6.263398 4506 +hereat 1 2 6.263398 6.263398 5048 +shadegreet 1 1 6.957497 6.957497 16336 +salut 1 1 6.957497 6.957497 16337 +dubcs 1 1 6.957497 6.957497 16338 +renderingof 1 1 6.957497 6.957497 16339 +walkthruproject 1 1 6.957497 6.957497 16340 +amonglot 1 1 6.957497 6.957497 16341 +paperdescrib 1 1 6.957497 6.957497 16342 +thepictur 1 1 6.957497 6.957497 16343 +aspectsof 1 1 6.957497 6.957497 16344 +thegraph 1 1 6.957497 6.957497 16345 +scrunch 1 1 6.957497 6.957497 16346 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shapiro @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ new file mode 100644 index 00000000..3a2de348 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^shuntak^ @@ -0,0 +1,30 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +summari 1 73 2.639057 2.639057 237 +prof 1 64 2.772589 2.772589 273 +januari 1 62 2.772589 2.772589 264 +pointer 1 48 3.044522 3.044522 368 +vita 1 38 3.295837 3.295837 473 +curriculum 1 33 3.433987 3.433987 535 +request 1 26 3.688879 3.688879 635 +ofwashington 1 22 3.850148 3.850148 766 +upon 1 16 4.174387 4.174387 978 +andengin 1 4 5.568345 5.568345 3042 +shun 2 2 6.263398 12.526796 4533 +leung 2 2 6.263398 12.526796 4534 +johnzahorjan 1 2 6.263398 6.263398 6004 +leungshun 1 1 6.957497 6.957497 16347 +shuntak 1 1 6.957497 6.957497 16348 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ new file mode 100644 index 00000000..42ef385c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^skoga^ @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +graduat 3 215 1.386294 4.158882 31 +also 2 259 1.386294 2.772588 28 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +relat 1 139 1.945910 1.945910 68 +studi 3 120 2.079442 6.238326 91 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +mathemat 1 108 2.197225 2.197225 123 +look 1 107 2.197225 2.197225 115 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +call 1 91 2.397895 2.397895 153 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +start 1 83 2.484907 2.484907 173 +info 1 85 2.484907 2.484907 176 +interfac 1 79 2.564949 2.564949 209 +involv 2 71 2.639057 5.278114 227 +name 1 72 2.639057 2.639057 220 +degre 1 69 2.708050 2.708050 259 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +origin 1 38 3.295837 3.295837 472 +mean 1 37 3.332205 3.332205 477 +dissert 1 32 3.465736 3.465736 549 +quit 1 27 3.637586 3.637586 633 +notic 1 25 3.737670 3.737670 675 +departmentunivers 1 24 3.761200 3.761200 711 +ofwashington 1 22 3.850148 3.850148 766 +hypertext 1 19 4.007333 4.007333 865 +figur 1 18 4.060443 4.060443 903 +less 1 18 4.060443 4.060443 892 +universityof 1 15 4.248495 4.248495 1061 +anywai 1 15 4.248495 4.248495 1047 +neat 1 12 4.465908 4.465908 1263 +virginia 1 8 4.875197 4.875197 1659 +studentcomput 1 7 5.010635 5.010635 1963 +foreign 1 7 5.010635 5.010635 1919 +smaller 1 7 5.010635 5.010635 1874 +hunt 1 7 5.010635 5.010635 1798 +bug 1 7 5.010635 5.010635 1801 +slate 1 6 5.164786 5.164786 2021 +sciencedepart 1 6 5.164786 5.164786 2172 +haven 2 4 5.568345 11.136690 3037 +asian 2 3 5.857933 11.715866 3598 +heavili 1 3 5.857933 5.857933 3572 +groupand 1 3 5.857933 5.857933 3873 +shuichi 5 2 6.263398 31.316990 4498 +myqual 1 2 6.263398 6.263398 6005 +degreein 1 2 6.263398 6.263398 5116 +koga 3 1 6.957497 20.872491 16349 +skoga 2 1 6.957497 13.914994 16350 +bynow 1 1 6.957497 6.957497 16351 +alsoheavili 1 1 6.957497 6.957497 16352 +andgovern 1 1 6.957497 6.957497 16353 +alic 1 1 6.957497 6.957497 16354 +destroi 1 1 6.957497 6.957497 16355 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ new file mode 100644 index 00000000..ab3fa282 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^soha^ @@ -0,0 +1,49 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 3 284 1.098612 3.295836 21 +engin 2 297 1.098612 2.197224 20 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +year 1 148 1.945910 1.945910 84 +professor 1 137 1.945910 1.945910 76 +schedul 1 119 2.079442 2.079442 85 +seattl 1 120 2.079442 2.079442 103 +site 1 106 2.197225 2.197225 119 +school 1 84 2.484907 2.484907 188 +educ 1 86 2.484907 2.484907 191 +dept 2 64 2.772589 5.545178 291 +previou 1 62 2.772589 2.772589 290 +experi 1 64 2.772589 2.772589 283 +profession 1 51 2.995732 2.995732 345 +littl 1 39 3.258097 3.258097 454 +photo 1 31 3.496508 3.496508 561 +busi 1 21 3.912023 3.912023 784 +vlsi 1 21 3.912023 3.912023 795 +weekli 1 17 4.110874 4.110874 919 +circuit 1 13 4.382027 4.382027 1131 +galleri 1 13 4.382027 4.382027 1192 +chao 1 8 4.875197 4.875197 1753 +patent 1 5 5.347108 5.347108 2574 +soha 3 2 6.263398 18.790194 6006 +hassoun 1 2 6.263398 6.263398 6007 +retim 1 2 6.263398 6.263398 6008 +hassounit 1 1 6.957497 6.957497 16356 +whoturn 1 1 6.957497 6.957497 16357 +onarchitectur 1 1 6.957497 6.957497 16358 +carlebel 1 1 6.957497 6.957497 16359 +deede 1 1 6.957497 6.957497 16360 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ new file mode 100644 index 00000000..f50555d8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sparekh^ @@ -0,0 +1,116 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +design 1 213 1.386294 1.386294 25 +cornel 1 215 1.386294 1.386294 23 +class 1 199 1.609438 1.609438 37 +contact 1 153 1.791759 1.791759 59 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +hall 1 146 1.945910 1.945910 65 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +seattl 2 120 2.079442 4.158884 103 +report 1 131 2.079442 2.079442 92 +pleas 1 113 2.197225 2.197225 114 +person 1 111 2.197225 2.197225 117 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +control 1 82 2.484907 2.484907 164 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +west 1 83 2.484907 2.484907 192 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +sieg 1 69 2.708050 2.708050 260 +evalu 1 64 2.772589 2.772589 266 +particular 1 51 2.995732 2.995732 352 +right 1 48 3.044522 3.044522 363 +quarter 1 47 3.091042 3.091042 389 +done 1 47 3.091042 3.091042 381 +favorit 4 44 3.135494 12.541976 410 +keep 1 44 3.135494 3.135494 409 +long 1 43 3.178054 3.178054 413 +music 1 42 3.218876 3.218876 436 +seminar 2 38 3.295837 6.591674 470 +procedur 1 36 3.367296 3.367296 488 +random 1 34 3.401197 3.401197 511 +bookmark 1 26 3.688879 3.688879 639 +sport 1 25 3.737670 3.737670 683 +sort 1 22 3.850148 3.850148 738 +fund 1 21 3.912023 3.912023 805 +corpor 1 21 3.912023 3.912023 802 +tenni 1 20 3.951244 3.951244 838 +chateau 1 16 4.174387 4.174387 997 +cognit 1 16 4.174387 4.174387 986 +action 1 15 4.248495 4.248495 1038 +track 1 15 4.248495 4.248495 1029 +psycholog 1 15 4.248495 4.248495 1054 +convent 1 14 4.317488 4.317488 1072 +philosophi 1 13 4.382027 4.382027 1167 +danc 2 12 4.465908 8.931816 1278 +food 2 12 4.465908 8.931816 1285 +emploi 1 12 4.465908 4.465908 1284 +remov 1 12 4.465908 4.465908 1225 +island 1 11 4.553877 4.553877 1345 +peter 1 11 4.553877 4.553877 1316 +french 2 9 4.753590 9.507180 1511 +volleybal 1 9 4.753590 4.753590 1598 +soccer 2 8 4.875197 9.750394 1752 +guggenheim 1 8 4.875197 4.875197 1759 +simon 1 8 4.875197 4.875197 1697 +coast 1 8 4.875197 4.875197 1746 +softbot 2 7 5.010635 10.021270 1974 +squash 1 6 5.164786 5.164786 2223 +rock 1 6 5.164786 5.164786 2164 +annex 1 5 5.347108 5.347108 2572 +sail 1 5 5.347108 5.347108 2571 +east 1 5 5.347108 5.347108 2472 +phil 1 5 5.347108 5.347108 2419 +oracl 1 4 5.568345 5.568345 2823 +swing 1 4 5.568345 5.568345 2887 +floyd 1 4 5.568345 5.568345 2682 +sujai 2 3 5.857933 11.715866 3960 +parekh 2 3 5.857933 11.715866 3961 +ballroom 1 3 5.857933 5.857933 3983 +spud 1 2 6.263398 6.263398 6009 +chicken 1 2 6.263398 6.263398 5851 +tango 1 2 6.263398 6.263398 6010 +salsa 1 2 6.263398 6.263398 5984 +strait 1 2 6.263398 6.263398 5980 +genesi 1 2 6.263398 6.263398 6011 +gabriel 1 2 6.263398 6.263398 5029 +simultaneousmultithread 1 1 6.957497 6.957497 16361 +tomultithread 1 1 6.957497 6.957497 16362 +controlsystem 1 1 6.957497 6.957497 16363 +patio 1 1 6.957497 6.957497 16364 +workspac 1 1 6.957497 6.957497 16365 +stottler 1 1 6.957497 6.957497 16366 +henk 1 1 6.957497 6.957497 16367 +oondhiu 1 1 6.957497 6.957497 16368 +mango 1 1 6.957497 6.957497 16369 +phad 1 1 6.957497 6.957497 16370 +thai 1 1 6.957497 6.957497 16371 +kung 1 1 6.957497 6.957497 16372 +beverag 1 1 6.957497 6.957497 16373 +screwdriv 1 1 6.957497 6.957497 16374 +scotch 1 1 6.957497 6.957497 16375 +ic 1 1 6.957497 6.957497 16376 +dire 1 1 6.957497 6.957497 16377 +pink 1 1 6.957497 6.957497 16378 +collin 1 1 6.957497 6.957497 16379 +petti 1 1 6.957497 6.957497 16380 +sparekh 1 1 6.957497 6.957497 16381 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ new file mode 100644 index 00000000..29d74952 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^speed^ @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +washington 1 236 1.386294 1.386294 32 +link 1 247 1.386294 1.386294 24 +address 1 170 1.791759 1.791759 62 +machin 1 129 2.079442 2.079442 95 +peopl 1 96 2.302585 2.302585 132 +academ 1 82 2.484907 2.484907 178 +name 1 72 2.639057 2.639057 220 +improv 1 62 2.772589 2.772589 289 +colleg 1 61 2.833213 2.833213 300 +friend 1 48 3.044522 3.044522 376 +compani 1 41 3.218876 3.218876 423 +live 1 40 3.258097 3.258097 451 +power 1 30 3.555348 3.555348 573 +sport 1 25 3.737670 3.737670 683 +famili 1 23 3.806662 3.806662 735 +almost 1 22 3.850148 3.850148 742 +hous 1 21 3.912023 3.912023 801 +speed 3 18 4.060443 12.181329 911 +ultim 1 17 4.110874 4.110874 943 +drive 1 15 4.248495 4.248495 1052 +comic 1 14 4.317488 4.317488 1103 +food 1 12 4.465908 4.465908 1285 +mari 1 12 4.465908 4.465908 1266 +lake 1 11 4.553877 4.553877 1373 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +ski 1 10 4.653960 4.653960 1471 +drink 1 9 4.753590 4.753590 1607 +softbal 1 9 4.753590 4.753590 1594 +utah 1 9 4.753590 4.753590 1585 +erik 3 8 4.875197 14.625591 1701 +lewi 1 8 4.875197 4.875197 1700 +star 1 8 4.875197 4.875197 1717 +brain 1 8 4.875197 4.875197 1638 +babylon 1 8 4.875197 4.875197 1731 +cultur 1 7 5.010635 5.010635 1951 +fish 1 6 5.164786 5.164786 2207 +selberg 1 5 5.347108 5.347108 2441 +salt 1 5 5.347108 5.347108 2413 +bean 1 4 5.568345 5.568345 2968 +lara 1 3 5.857933 5.857933 3914 +disc 1 2 6.263398 6.263398 5626 +spud 1 2 6.263398 6.263398 6009 +raquetbal 1 2 6.263398 6.263398 6012 +pepper 1 2 6.263398 6.263398 6013 +war 1 2 6.263398 6.263398 5969 +toon 1 2 6.263398 6.263398 4120 +fishcam 1 1 6.957497 6.957497 16382 +memorialhappi 1 1 6.957497 6.957497 16383 +kay 1 1 6.957497 6.957497 16384 +pasti 1 1 6.957497 6.957497 16385 +ur 1 1 6.957497 6.957497 16386 +pro 1 1 6.957497 6.957497 16387 +wedgwood 1 1 6.957497 6.957497 16388 +diet 1 1 6.957497 6.957497 16389 +roast 1 1 6.957497 6.957497 16390 +bagel 1 1 6.957497 6.957497 16391 +racer 1 1 6.957497 6.957497 16392 +tini 1 1 6.957497 6.957497 16393 +pinki 1 1 6.957497 6.957497 16394 +phantom 1 1 6.957497 6.957497 16395 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ new file mode 100644 index 00000000..5dccd43b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^sungeun^ @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 2 380 0.693147 1.386294 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +includ 1 208 1.609438 1.609438 42 +parallel 3 169 1.791759 5.375277 60 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +read 1 154 1.791759 1.791759 47 +like 2 132 1.945910 3.891820 81 +year 1 148 1.945910 1.945910 84 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +seattl 1 120 2.079442 2.079442 103 +final 1 116 2.197225 2.197225 108 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +world 1 115 2.197225 2.197225 126 +take 2 97 2.302585 4.605170 134 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +graphic 1 90 2.397895 2.397895 147 +sinc 1 90 2.397895 2.397895 159 +second 1 81 2.484907 2.484907 166 +good 2 77 2.564949 5.129898 200 +optim 1 79 2.564949 2.564949 197 +come 1 78 2.564949 2.564949 202 +involv 1 71 2.639057 2.639057 227 +addit 1 74 2.639057 2.639057 228 +simul 3 66 2.708050 8.124150 255 +goal 1 66 2.708050 2.708050 250 +would 1 67 2.708050 2.708050 251 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +result 1 65 2.772589 2.772589 281 +plai 2 60 2.833213 5.666426 307 +colleg 1 61 2.833213 2.833213 300 +think 1 57 2.890372 2.890372 314 +processor 1 54 2.944439 2.944439 335 +visual 1 48 3.044522 3.044522 372 +quarter 1 47 3.091042 3.091042 389 +anoth 1 45 3.135494 3.135494 408 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +movi 1 40 3.258097 3.258097 459 +must 1 40 3.258097 3.258097 442 +game 1 36 3.367296 3.367296 498 +independ 1 32 3.465736 3.465736 548 +quit 1 27 3.637586 3.637586 633 +enjoi 1 26 3.688879 3.688879 660 +primari 1 25 3.737670 3.737670 669 +cooper 1 22 3.850148 3.850148 757 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +watch 2 21 3.912023 7.824046 789 +divis 2 21 3.912023 7.824046 803 +mostli 1 19 4.007333 4.007333 869 +exercis 1 19 4.007333 4.007333 842 +left 1 19 4.007333 4.007333 851 +listen 1 18 4.060443 4.060443 907 +front 1 13 4.382027 4.382027 1154 +came 1 13 4.382027 4.382027 1197 +unfortun 1 13 4.382027 4.382027 1170 +weight 1 12 4.465908 4.465908 1204 +hang 1 9 4.753590 4.753590 1499 +drink 1 9 4.753590 4.753590 1607 +swim 1 9 4.753590 4.753590 1599 +router 1 8 4.875197 4.875197 1772 +soccer 1 8 4.875197 4.875197 1752 +sung 2 6 5.164786 10.329572 2075 +superscalar 1 6 5.164786 5.164786 2082 +seen 1 6 5.164786 5.164786 2202 +yeah 1 6 5.164786 5.164786 2195 +ta 1 4 5.568345 5.568345 3058 +choi 1 4 5.568345 5.568345 2732 +astronomi 1 3 5.857933 5.857933 3974 +comfort 1 3 5.857933 5.857933 3136 +wine 1 3 5.857933 5.857933 3895 +knee 1 3 5.857933 5.857933 3980 +choiwelcom 1 2 6.263398 6.263398 5727 +ironman 1 2 6.263398 6.263398 4226 +vegetarian 1 2 6.263398 6.263398 5902 +season 1 2 6.263398 6.263398 4872 +scrub 2 1 6.957497 13.914994 16396 +thehomepag 1 1 6.957497 6.957497 16397 +ofsung 1 1 6.957497 6.957497 16398 +eunchoi 1 1 6.957497 6.957497 16399 +myschoollifemi 1 1 6.957497 6.957497 16400 +zplcompil 1 1 6.957497 6.957497 16401 +beenspend 1 1 6.957497 6.957497 16402 +communicationgener 1 1 6.957497 6.957497 16403 +architechtur 1 1 6.957497 6.957497 16404 +communicationlibrari 1 1 6.957497 6.957497 16405 +programson 1 1 6.957497 6.957497 16406 +nodeperform 1 1 6.957497 6.957497 16407 +alsobeen 1 1 6.957497 6.957497 16408 +chaosrout 1 1 6.957497 6.957497 16409 +thatexperi 1 1 6.957497 6.957497 16410 +inzpl 1 1 6.957497 6.957497 16411 +myjunior 1 1 6.957497 6.957497 16412 +dinner 1 1 6.957497 6.957497 16413 +samewithout 1 1 6.957497 6.957497 16414 +twosoccerteam 1 1 6.957497 6.957497 16415 +cousin 1 1 6.957497 6.957497 16416 +recdivis 1 1 6.957497 6.957497 16417 +andcoop 1 1 6.957497 6.957497 16418 +sacrifiedmi 1 1 6.957497 6.957497 16419 +usualstep 1 1 6.957497 6.957497 16420 +aerobicsclass 1 1 6.957497 6.957497 16421 +trainingclass 1 1 6.957497 6.957497 16422 +abit 1 1 6.957497 6.957497 16423 +shakespear 1 1 6.957497 6.957497 16424 +publictelevis 1 1 6.957497 6.957497 16425 +classicalmus 1 1 6.957497 6.957497 16426 +myotherlif 1 1 6.957497 6.957497 16427 +sungeun 1 1 6.957497 6.957497 16428 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ new file mode 100644 index 00000000..42e848d5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^thu^ @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 5 443 0.693147 3.465735 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +support 1 132 1.945910 1.945910 83 +schedul 2 119 2.079442 4.158884 85 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +world 1 115 2.197225 2.197225 126 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +real 1 93 2.397895 2.397895 144 +help 1 83 2.484907 2.484907 175 +build 1 85 2.484907 2.484907 184 +complet 1 77 2.564949 2.564949 208 +advisor 1 51 2.995732 2.995732 355 +run 1 51 2.995732 2.995732 347 +visual 1 48 3.044522 3.044522 372 +execut 1 45 3.135494 3.135494 404 +workstat 1 37 3.332205 3.332205 479 +secur 1 30 3.555348 3.555348 577 +measur 1 28 3.610918 3.610918 609 +decis 1 23 3.806662 3.806662 728 +runtim 2 19 4.007333 8.014666 858 +partial 1 18 4.060443 4.060443 900 +engineeringunivers 1 17 4.110874 4.110874 959 +washingtonbox 1 13 4.382027 4.382027 1200 +characterist 1 12 4.465908 4.465908 1257 +multiprogram 1 6 5.164786 5.164786 2010 +nguyen 2 3 5.857933 11.715866 3290 +andparallel 1 2 6.263398 6.263398 6014 +johnzahorjan 1 2 6.263398 6.263398 6004 +soft 1 2 6.263398 6.263398 5072 +idl 1 2 6.263398 6.263398 4256 +ofappl 1 2 6.263398 6.263398 6002 +tominim 1 2 6.263398 6.263398 5436 +multiprocessorsenviron 2 1 6.957497 13.914994 16429 +frommi 1 1 6.957497 6.957497 16430 +timeappl 1 1 6.957497 6.957497 16431 +innow 1 1 6.957497 6.957497 16432 +uniprogram 1 1 6.957497 6.957497 16433 +goodglob 1 1 6.957497 6.957497 16434 +cvpublic 1 1 6.957497 6.957497 16435 +worldvietnameseresourc 1 1 6.957497 6.957497 16436 +netcyclingplayground 1 1 6.957497 6.957497 16437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ new file mode 100644 index 00000000..b1a4986b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tian^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +click 1 142 1.945910 1.945910 78 +part 1 98 2.302585 2.302585 129 +homepag 1 93 2.397895 2.397895 148 +html 1 75 2.639057 2.639057 235 +browser 1 56 2.890372 2.890372 313 +standard 1 48 3.044522 3.044522 365 +even 1 45 3.135494 3.135494 393 +frame 2 24 3.761200 7.522400 684 +yellow 1 9 4.753590 4.753590 1601 +turkei 1 4 5.568345 5.568345 2914 +tian 1 3 5.857933 5.857933 3680 +homepageyour 1 1 6.957497 6.957497 16438 +rusti 1 1 6.957497 6.957497 16439 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ new file mode 100644 index 00000000..434abfac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tiwary^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 14 443 0.693147 9.704058 6 +work 8 380 0.693147 5.545176 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +us 4 329 1.098612 4.394448 16 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +softwar 4 220 1.386294 5.545176 30 +washington 2 236 1.386294 2.772588 32 +also 2 259 1.386294 2.772588 28 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +oper 4 180 1.609438 6.437752 34 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +distribut 9 162 1.791759 16.125831 51 +address 4 170 1.791759 7.167036 62 +applic 4 170 1.791759 7.167036 56 +parallel 1 169 1.791759 1.791759 60 +object 14 138 1.945910 27.242740 79 +area 2 144 1.945910 3.891820 80 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +databas 3 122 2.079442 6.238326 86 +seattl 1 120 2.079442 2.079442 103 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +intern 1 108 2.197225 2.197225 128 +user 2 104 2.302585 4.605170 137 +techniqu 1 99 2.302585 2.302585 138 +octob 2 89 2.397895 4.795790 156 +follow 1 92 2.397895 2.397895 143 +proceed 1 93 2.397895 2.397895 152 +larg 5 82 2.484907 12.424535 168 +build 3 85 2.484907 7.454721 184 +environ 1 84 2.484907 2.484907 177 +orient 2 80 2.564949 5.129898 205 +interfac 1 79 2.564949 2.564949 209 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +workshop 7 71 2.639057 18.473399 239 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +organ 2 65 2.772589 5.545178 265 +creat 1 63 2.772589 2.772589 277 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +evalu 1 64 2.772589 2.772589 266 +juli 1 60 2.833213 2.833213 305 +space 2 57 2.890372 5.780744 310 +sever 1 56 2.890372 2.890372 322 +advisor 2 51 2.995732 5.991464 355 +profession 1 51 2.995732 2.995732 345 +past 1 42 3.218876 3.218876 428 +multipl 1 39 3.258097 3.258097 453 +close 1 38 3.295837 3.295837 465 +singl 2 34 3.401197 6.802394 510 +global 1 34 3.401197 3.401197 520 +measur 3 28 3.610918 10.832754 609 +except 2 28 3.610918 7.221836 607 +full 1 28 3.610918 3.610918 615 +proc 1 26 3.688879 3.688879 649 +jeff 1 25 3.737670 3.737670 673 +primari 1 25 3.737670 3.737670 669 +handl 2 24 3.761200 7.522400 685 +basi 1 20 3.951244 3.951244 828 +benchmark 2 19 4.007333 8.014666 859 +mostli 1 19 4.007333 4.007333 869 +behavior 2 18 4.060443 8.120886 881 +fourth 1 16 4.174387 4.174387 999 +across 1 16 4.174387 4.174387 974 +levi 4 14 4.317488 17.269952 1093 +opportun 1 13 4.382027 4.382027 1161 +workload 2 12 4.465908 8.931816 1210 +infrastructur 1 12 4.465908 4.465908 1234 +hank 1 12 4.465908 4.465908 1253 +career 1 12 4.465908 4.465908 1287 +persist 4 11 4.553877 18.215508 1367 +architect 1 8 4.875197 4.875197 1624 +character 1 8 4.875197 4.875197 1767 +oop 1 8 4.875197 4.875197 1778 +oopsla 6 6 5.164786 30.988716 2221 +spare 1 6 5.164786 5.164786 2177 +tiwari 7 5 5.347108 37.429756 2385 +opal 5 4 5.568345 27.841725 3057 +chase 2 4 5.568345 11.136690 2897 +narasayya 1 4 5.568345 5.568345 3065 +boe 1 3 5.857933 5.857933 3318 +addendum 1 3 5.857933 5.857933 3150 +ashutosh 2 2 6.263398 12.526796 5966 +projectsopali 1 1 6.957497 6.957497 16440 +thisexperi 1 1 6.957497 6.957497 16441 +distrbut 1 1 6.957497 6.957497 16442 +ecoop 1 1 6.957497 6.957497 16443 +bosch 1 1 6.957497 6.957497 16444 +messeng 1 1 6.957497 6.957497 16445 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ new file mode 100644 index 00000000..524da683 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tlau^ @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +current 2 284 1.098612 2.197224 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +also 3 259 1.386294 4.158882 28 +washington 2 236 1.386294 2.772588 32 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +avail 1 169 1.791759 1.791759 48 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +seattl 1 120 2.079442 2.079442 103 +person 1 111 2.197225 2.197225 117 +take 1 97 2.302585 2.302585 134 +advanc 1 99 2.302585 2.302585 130 +search 1 95 2.397895 2.397895 155 +pictur 1 89 2.397895 2.397895 160 +second 1 81 2.484907 2.484907 166 +requir 1 81 2.484907 2.484907 167 +onlin 1 75 2.639057 2.639057 223 +java 2 70 2.708050 5.416100 248 +share 1 59 2.833213 2.833213 304 +plai 1 60 2.833213 2.833213 307 +simpl 1 60 2.833213 2.833213 298 +found 1 53 2.944439 2.944439 337 +digit 1 52 2.995732 2.995732 348 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +frequent 1 49 3.044522 3.044522 367 +quarter 1 47 3.091042 3.091042 389 +anoth 1 45 3.135494 3.135494 408 +game 1 36 3.367296 3.367296 498 +copyright 1 36 3.367296 3.367296 495 +platform 1 29 3.583519 3.583519 591 +linux 3 27 3.637586 10.912758 631 +great 1 27 3.637586 3.637586 626 +known 1 24 3.761200 3.761200 702 +honor 1 23 3.806662 3.806662 729 +sort 1 22 3.850148 3.850148 738 +applet 1 20 3.951244 3.951244 827 +histori 1 19 4.007333 4.007333 853 +seek 1 17 4.110874 4.110874 954 +attempt 1 17 4.110874 4.110874 917 +qual 1 15 4.248495 4.248495 1062 +pretti 1 13 4.382027 4.382027 1191 +employ 1 12 4.465908 4.465908 1291 +gain 1 8 4.875197 4.875197 1730 +dead 1 7 5.010635 5.010635 1840 +apart 1 7 5.010635 5.010635 1936 +appar 1 7 5.010635 5.010635 1958 +bookstor 1 7 5.010635 5.010635 1837 +myresum 1 6 5.164786 5.164786 2199 +cat 1 6 5.164786 5.164786 2194 +commit 1 6 5.164786 5.164786 2233 +curiou 1 5 5.347108 5.347108 2541 +superhighwai 1 4 5.568345 5.568345 2943 +scotland 1 4 5.568345 5.568345 3049 +fulfil 1 4 5.568345 5.568345 2932 +breadth 1 4 5.568345 5.568345 2695 +gambit 1 3 5.857933 5.857933 3227 +tessa 2 2 6.263398 12.526796 4507 +yeargradu 1 2 6.263398 6.263398 6015 +maze 1 2 6.263398 6.263398 4843 +knit 1 2 6.263398 6.263398 4906 +relatedgoodi 1 1 6.957497 6.957497 16446 +clio 1 1 6.957497 6.957497 16447 +andbrows 1 1 6.957497 6.957497 16448 +kittyi 1 1 6.957497 6.957497 16449 +siames 1 1 6.957497 6.957497 16450 +memor 1 1 6.957497 6.957497 16451 +therear 1 1 6.957497 6.957497 16452 +tofind 1 1 6.957497 6.957497 16453 +alsor 1 1 6.957497 6.957497 16454 +classesi 1 1 6.957497 6.957497 16455 +ofeight 1 1 6.957497 6.957497 16456 +seminarlinux 1 1 6.957497 6.957497 16457 +gameseverybodi 1 1 6.957497 6.957497 16458 +gametom 1 1 6.957497 6.957497 16459 +coolgam 1 1 6.957497 6.957497 16460 +sleepingi 1 1 6.957497 6.957497 16461 +crochet 1 1 6.957497 6.957497 16462 +tlau 1 1 6.957497 6.957497 16463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ new file mode 100644 index 00000000..7b19ad45 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tompa^ @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +washington 3 236 1.386294 4.158882 32 +phone 1 175 1.791759 1.791759 45 +recent 1 167 1.791759 1.791759 58 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +seattl 1 120 2.079442 2.079442 103 +provid 1 121 2.079442 2.079442 94 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +build 1 85 2.484907 2.484907 184 +educ 1 86 2.484907 2.484907 191 +resourc 1 81 2.484907 2.484907 172 +finger 1 52 2.995732 2.995732 354 +electron 1 47 3.091042 3.091042 379 +articl 1 33 3.433987 3.433987 530 +collabor 1 32 3.465736 3.465736 543 +photo 1 31 3.496508 3.496508 561 +martin 4 21 3.912023 15.648092 794 +among 1 21 3.912023 3.912023 781 +across 1 16 4.174387 4.174387 974 +photograph 1 15 4.248495 4.248495 1056 +holidai 1 12 4.465908 4.465908 1224 +lane 1 8 4.875197 4.875197 1720 +courtesi 1 7 5.010635 5.010635 1953 +moon 1 4 5.568345 5.568345 2991 +pierc 1 4 5.568345 5.568345 2623 +tompa 4 3 5.857933 23.431732 3305 +health 1 3 5.857933 5.857933 3787 +trajectori 1 2 6.263398 6.263398 4260 +pearl 1 2 6.263398 6.263398 4485 +wash 1 2 6.263398 6.263398 5714 +receptionist 1 1 6.957497 6.957497 16464 +thelma 1 1 6.957497 6.957497 16465 +louis 1 1 6.957497 6.957497 16466 +oyster 1 1 6.957497 6.957497 16467 +surrealist 1 1 6.957497 6.957497 16468 +propheci 1 1 6.957497 6.957497 16469 +carol 1 1 6.957497 6.957497 16470 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ new file mode 100644 index 00000000..23e148e0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tracyk^ @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +washington 3 236 1.386294 4.158882 32 +design 1 213 1.386294 1.386294 25 +list 1 201 1.609438 1.609438 39 +parallel 3 169 1.791759 5.375277 60 +algorithm 2 162 1.791759 3.583518 57 +base 1 165 1.791759 1.791759 50 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +seattl 2 120 2.079442 4.158884 103 +confer 1 126 2.079442 2.079442 100 +version 1 113 2.197225 2.197225 122 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +institut 1 84 2.484907 2.484907 187 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +appear 2 78 2.564949 5.129898 210 +optim 1 79 2.564949 2.564949 197 +free 1 73 2.639057 2.639057 224 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +detail 1 57 2.890372 2.890372 321 +without 1 50 3.044522 3.044522 370 +move 1 47 3.091042 3.091042 382 +done 1 47 3.091042 3.091042 381 +cach 1 41 3.218876 3.218876 432 +vita 1 38 3.295837 3.295837 473 +brian 1 38 3.295837 3.295837 466 +return 1 34 3.401197 3.401197 502 +random 1 34 3.401197 3.401197 511 +curriculum 1 33 3.433987 3.433987 535 +product 1 33 3.433987 3.433987 527 +extend 1 32 3.465736 3.465736 539 +held 3 28 3.610918 10.832754 600 +trace 1 25 3.737670 3.737670 677 +longer 1 20 3.951244 3.951244 816 +histori 1 19 4.007333 4.007333 853 +comparison 1 19 4.007333 4.007333 863 +andrew 1 19 4.007333 4.007333 849 +bershad 1 18 4.060443 4.060443 902 +matrix 1 17 4.110874 4.110874 933 +letter 1 16 4.174387 4.174387 981 +driven 1 15 4.248495 4.248495 1048 +near 1 14 4.317488 4.317488 1091 +squar 1 14 4.317488 4.317488 1082 +karlin 3 13 4.382027 13.146081 1176 +sigmetr 1 13 4.382027 4.382027 1173 +anna 3 12 4.465908 13.397724 1292 +usenix 1 12 4.465908 4.465908 1240 +verifi 1 12 4.465908 4.465908 1261 +statement 1 11 4.553877 4.553877 1313 +probabilist 1 11 4.553877 4.553877 1343 +forc 2 10 4.653960 9.307920 1384 +charg 1 9 4.753590 4.753590 1582 +patterson 1 9 4.753590 4.753590 1554 +kumar 1 9 4.753590 4.753590 1506 +sigop 1 8 4.875197 4.875197 1727 +bit 1 7 5.010635 5.010635 1833 +prefetch 3 6 5.164786 15.494358 2039 +edward 2 6 5.164786 10.329572 2050 +promis 1 6 5.164786 5.164786 2037 +onoper 1 6 5.164786 5.164786 2048 +escap 1 4 5.568345 5.568345 3016 +kimbrel 7 3 5.857933 41.005531 3924 +traci 5 3 5.857933 29.289665 3984 +prison 4 3 5.857933 23.431732 3907 +cachingtraci 2 3 5.857933 11.715866 3923 +felten 2 3 5.857933 11.715866 3925 +eduher 1 3 5.857933 5.857933 3499 +tomanufactur 1 2 6.263398 6.263398 6016 +airplan 1 2 6.263398 6.263398 4917 +tomkin 1 2 6.263398 6.263398 5814 +hugo 1 2 6.263398 6.263398 5815 +garth 1 2 6.263398 6.263398 5816 +gibson 1 2 6.263398 6.263398 5817 +implemen 1 2 6.263398 6.263398 5809 +rakesh 1 2 6.263398 6.263398 6017 +sinha 1 2 6.263398 6.263398 5754 +imprison 2 1 6.957497 13.914994 16471 +captor 2 1 6.957497 13.914994 16472 +washingtonsinc 1 1 6.957497 6.957497 16473 +trial 1 1 6.957497 6.957497 16474 +toanoth 1 1 6.957497 6.957497 16475 +inmat 1 1 6.957497 6.957497 16476 +wasrecaptur 1 1 6.957497 6.957497 16477 +hisplight 1 1 6.957497 6.957497 16478 +rescu 1 1 6.957497 6.957497 16479 +ofwhat 1 1 6.957497 6.957497 16480 +tracyk 1 1 6.957497 6.957497 16481 +ieeesymposium 1 1 6.957497 6.957497 16482 +measurementand 1 1 6.957497 6.957497 16483 +usingo 1 1 6.957497 6.957497 16484 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ new file mode 100644 index 00000000..3b17dacf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^travis^ @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +time 4 293 1.098612 4.394448 17 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +modifi 1 178 1.609438 1.609438 35 +seattl 1 120 2.079442 2.079442 103 +take 1 97 2.302585 2.302585 134 +real 2 93 2.397895 4.795790 144 +help 1 83 2.484907 2.484907 175 +run 1 51 2.995732 2.995732 347 +quarter 1 47 3.091042 3.091042 389 +understand 1 47 3.091042 3.091042 384 +keep 1 44 3.135494 3.135494 409 +mechan 1 43 3.178054 3.178054 416 +cach 1 41 3.218876 3.218876 432 +press 1 42 3.218876 3.218876 419 +dissert 1 32 3.465736 3.465736 549 +half 1 21 3.912023 3.912023 776 +corpor 1 21 3.912023 3.912023 802 +latest 1 21 3.912023 3.912023 785 +predict 1 19 4.007333 4.007333 855 +statu 1 18 4.060443 4.060443 885 +side 1 15 4.248495 4.248495 1022 +spin 1 14 4.317488 4.317488 1121 +lock 1 9 4.753590 4.753590 1551 +craig 3 7 5.010635 15.031905 1879 +pool 1 6 5.164786 5.164786 2225 +consum 1 5 5.347108 5.347108 2334 +queu 1 4 5.568345 5.568345 2648 +travi 4 3 5.857933 23.431732 3985 +motor 1 3 5.857933 5.857933 3909 +submarin 1 2 6.263398 6.263398 6018 +restor 1 1 6.957497 6.957497 16485 +arctic 1 1 6.957497 6.957497 16486 +esca 1 1 6.957497 6.957497 16487 +volvo 1 1 6.957497 6.957497 16488 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ new file mode 100644 index 00000000..095e9026 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^tullsen^ @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +download 1 36 3.367296 3.367296 489 +bibliographi 1 34 3.401197 3.401197 518 +hobbi 1 16 4.174387 4.174387 1009 +dean 2 14 4.317488 8.634976 1104 +tullsen 2 6 5.164786 10.329572 2081 +biograph 1 2 6.263398 6.263398 5625 +resumemi 1 2 6.263398 6.263398 4971 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ new file mode 100644 index 00000000..709dff9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vanhilst^ @@ -0,0 +1,236 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 7 380 0.693147 4.852029 9 +program 3 374 0.693147 2.079441 7 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +time 5 293 1.098612 5.493060 17 +student 3 343 1.098612 3.295836 19 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +softwar 5 220 1.386294 6.931470 30 +washington 4 236 1.386294 5.545176 32 +also 3 259 1.386294 4.158882 28 +languag 2 227 1.386294 2.772588 26 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 2 183 1.609438 3.218876 36 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +data 2 170 1.791759 3.583518 49 +recent 2 167 1.791759 3.583518 58 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +year 4 148 1.945910 7.783640 84 +object 2 138 1.945910 3.891820 79 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +seattl 2 120 2.079442 4.158884 103 +schedul 2 119 2.079442 4.158884 85 +analysi 1 124 2.079442 2.079442 98 +machin 1 129 2.079442 2.079442 95 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +send 1 114 2.197225 2.197225 109 +look 1 107 2.197225 2.197225 115 +part 2 98 2.302585 4.605170 129 +user 1 104 2.302585 2.302585 137 +advanc 1 99 2.302585 2.302585 130 +present 3 91 2.397895 7.193685 145 +commun 2 95 2.397895 4.795790 157 +call 1 91 2.397895 2.397895 153 +imag 1 91 2.397895 2.397895 161 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +start 3 83 2.484907 7.454721 173 +learn 2 86 2.484907 4.969814 170 +novemb 2 81 2.484907 4.969814 179 +librari 1 87 2.484907 2.484907 181 +thing 1 84 2.484907 2.484907 189 +activ 1 84 2.484907 2.484907 182 +larg 1 82 2.484907 2.484907 168 +come 2 78 2.564949 5.129898 202 +messag 1 76 2.564949 2.564949 212 +orient 1 80 2.564949 2.564949 205 +symposium 3 72 2.639057 7.917171 238 +workshop 1 71 2.639057 2.639057 239 +solv 1 73 2.639057 2.639057 234 +free 1 73 2.639057 2.639057 224 +would 1 67 2.708050 2.708050 251 +degre 1 69 2.708050 2.708050 259 +differ 1 66 2.708050 2.708050 253 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +improv 1 62 2.772589 2.772589 289 +visit 1 63 2.772589 2.772589 288 +locat 2 59 2.833213 5.666426 303 +summer 2 56 2.890372 5.780744 311 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +extens 1 53 2.944439 2.944439 340 +talk 1 53 2.944439 2.944439 336 +hardwar 2 51 2.995732 5.991464 350 +life 1 50 3.044522 3.044522 375 +still 1 50 3.044522 3.044522 362 +quarter 1 47 3.091042 3.091042 389 +could 1 46 3.091042 3.091042 383 +get 1 46 3.091042 3.091042 380 +made 1 44 3.135494 3.135494 398 +anoth 1 45 3.135494 3.135494 408 +fridai 1 44 3.135494 3.135494 390 +around 1 43 3.178054 3.178054 415 +compani 1 41 3.218876 3.218876 423 +programm 2 39 3.258097 6.516194 445 +continu 2 39 3.258097 6.516194 448 +littl 1 39 3.258097 3.258097 454 +realli 1 40 3.258097 3.258097 444 +small 1 39 3.258097 3.258097 447 +winter 1 36 3.367296 3.367296 500 +staff 1 36 3.367296 3.367296 490 +soon 1 36 3.367296 3.367296 494 +michael 2 35 3.401197 6.802394 514 +post 1 35 3.401197 3.401197 505 +within 1 33 3.433987 3.433987 525 +taught 1 33 3.433987 3.433987 526 +product 1 33 3.433987 3.433987 527 +kind 1 32 3.465736 3.465736 541 +chapter 1 32 3.465736 3.465736 536 +taken 1 31 3.496508 3.496508 555 +photo 1 31 3.496508 3.496508 561 +particip 2 29 3.583519 7.167038 589 +steve 1 29 3.583519 3.583519 594 +enjoi 2 26 3.688879 7.377758 660 +subject 1 26 3.688879 3.688879 647 +valu 1 25 3.737670 3.737670 665 +mike 14 24 3.761200 52.656800 703 +other 1 24 3.761200 3.761200 697 +doctor 1 24 3.761200 3.761200 709 +thank 3 23 3.806662 11.419986 721 +begin 1 23 3.806662 3.806662 716 +finish 1 22 3.850148 3.850148 748 +director 1 22 3.850148 3.850148 767 +born 1 21 3.912023 3.912023 798 +wrote 4 20 3.951244 15.804976 830 +wonder 1 20 3.951244 3.951244 815 +citi 2 19 4.007333 8.014666 874 +eric 1 19 4.007333 4.007333 870 +lot 1 18 4.060443 4.060443 889 +demo 1 18 4.060443 4.060443 888 +debug 1 17 4.110874 4.110874 944 +took 1 16 4.174387 4.174387 1010 +contribut 1 15 4.248495 4.248495 1021 +countri 1 15 4.248495 4.248495 1059 +hopefulli 1 14 4.317488 4.317488 1071 +wife 1 13 4.382027 4.382027 1196 +front 1 13 4.382027 4.382027 1154 +earlier 1 13 4.382027 4.382027 1140 +forth 1 13 4.382027 4.382027 1186 +stai 1 12 4.465908 4.465908 1215 +franc 1 12 4.465908 4.465908 1276 +skill 1 12 4.465908 4.465908 1205 +bill 2 11 4.553877 9.107754 1297 +fix 1 11 4.553877 4.553877 1327 +america 1 11 4.553877 4.553877 1370 +motiv 1 11 4.553877 4.553877 1346 +chri 1 11 4.553877 4.553877 1311 +lake 1 11 4.553877 4.553877 1373 +prior 1 10 4.653960 4.653960 1438 +acquisit 1 10 4.653960 4.653960 1465 +ski 1 10 4.653960 4.653960 1471 +correctli 1 9 4.753590 4.753590 1478 +doug 1 9 4.753590 4.753590 1517 +mention 1 9 4.753590 4.753590 1569 +french 1 9 4.753590 4.753590 1511 +folk 1 9 4.753590 4.753590 1597 +screen 1 9 4.753590 4.753590 1577 +swim 1 9 4.753590 4.753590 1599 +cross 1 8 4.875197 4.875197 1703 +harvard 2 7 5.010635 10.021270 1926 +brought 1 7 5.010635 5.010635 1925 +poster 1 7 5.010635 5.010635 1814 +earn 1 7 5.010635 5.010635 1788 +iowa 1 7 5.010635 5.010635 1971 +oopsla 5 6 5.164786 25.823930 2221 +pari 3 6 5.164786 15.494358 2158 +nativ 1 6 5.164786 5.164786 2192 +south 1 6 5.164786 5.164786 2167 +hike 1 6 5.164786 5.164786 2234 +truli 1 5 5.347108 5.347108 2476 +sail 1 5 5.347108 5.347108 2571 +observatori 1 4 5.568345 5.568345 3070 +countless 1 4 5.568345 5.568345 3020 +theintern 1 4 5.568345 5.568345 2981 +theacm 1 4 5.568345 5.568345 2698 +sigsoft 1 4 5.568345 5.568345 3036 +ti 1 4 5.568345 5.568345 3005 +marco 1 4 5.568345 5.568345 2589 +luck 1 3 5.857933 5.857933 3201 +immedi 1 3 5.857933 5.857933 3117 +motif 1 3 5.857933 5.857933 3752 +astrophys 1 3 5.857933 5.857933 3936 +schwarz 1 3 5.857933 5.857933 3986 +talent 1 3 5.857933 5.857933 3768 +traci 1 3 5.857933 5.857933 3984 +harold 1 3 5.857933 5.857933 3803 +scanner 1 3 5.857933 5.857933 3437 +eduperson 1 2 6.263398 6.263398 5776 +contractor 1 2 6.263398 6.263398 4915 +widget 1 2 6.263398 6.263398 5347 +convinc 1 2 6.263398 6.263398 6019 +calibr 1 2 6.263398 6.263398 4502 +francais 1 2 6.263398 6.263398 6020 +uist 1 2 6.263398 6.263398 5901 +grinnel 1 2 6.263398 6.263398 5763 +alexand 1 2 6.263398 6.263398 5329 +smithsonian 3 1 6.957497 20.872491 16489 +uwin 3 1 6.957497 20.872491 16490 +vanhilst 2 1 6.957497 13.914994 16491 +angela 2 1 6.957497 13.914994 16492 +vanhilstmichael 1 1 6.957497 6.957497 16493 +vanhilstvanhilst 1 1 6.957497 6.957497 16494 +edumvh 1 1 6.957497 6.957497 16495 +usaclick 1 1 6.957497 6.957497 16496 +personalmik 1 1 6.957497 6.957497 16497 +theend 1 1 6.957497 6.957497 16498 +udub 1 1 6.957497 6.957497 16499 +atibm 1 1 6.957497 6.957497 16500 +unterfac 1 1 6.957497 6.957497 16501 +sdata 1 1 6.957497 6.957497 16502 +maintainingcomput 1 1 6.957497 6.957497 16503 +saoimagewhich 1 1 6.957497 6.957497 16504 +astronom 1 1 6.957497 6.957497 16505 +saoimag 1 1 6.957497 6.957497 16506 +gnudistribut 1 1 6.957497 6.957497 16507 +wyatt 1 1 6.957497 6.957497 16508 +mandel 1 1 6.957497 6.957497 16509 +minkfor 1 1 6.957497 6.957497 16510 +seismologistsin 1 1 6.957497 6.957497 16511 +theallianc 1 1 6.957497 6.957497 16512 +colombiain 1 1 6.957497 6.957497 16513 +studentsbrows 1 1 6.957497 6.957497 16514 +pine 1 1 6.957497 6.957497 16515 +shirei 1 1 6.957497 6.957497 16516 +stenvik 1 1 6.957497 6.957497 16517 +frommicrosoft 1 1 6.957497 6.957497 16518 +sacrif 1 1 6.957497 6.957497 16519 +isota 1 1 6.957497 6.957497 16520 +inarchitectur 1 1 6.957497 6.957497 16521 +wooden 1 1 6.957497 6.957497 16522 +planningfrom 1 1 6.957497 6.957497 16523 +mitand 1 1 6.957497 6.957497 16524 +visualdesign 1 1 6.957497 6.957497 16525 +andkayak 1 1 6.957497 6.957497 16526 +bronson 1 1 6.957497 6.957497 16527 +sebastien 1 1 6.957497 6.957497 16528 +hilst 1 1 6.957497 6.957497 16529 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ new file mode 100644 index 00000000..8bd2f2b4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vass^ @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +link 4 247 1.386294 5.545176 24 +washington 2 236 1.386294 2.772588 32 +also 2 259 1.386294 2.772588 28 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +paper 1 205 1.609438 1.609438 38 +data 2 170 1.791759 3.583518 49 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +object 1 138 1.945910 1.945910 79 +relat 1 139 1.945910 1.945910 68 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +databas 1 122 2.079442 2.079442 86 +person 1 111 2.197225 2.197225 117 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +check 1 115 2.197225 2.197225 118 +access 1 102 2.302585 2.302585 136 +peopl 1 96 2.302585 2.302585 132 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +come 1 78 2.564949 2.564949 202 +orient 1 80 2.564949 2.564949 205 +appear 1 78 2.564949 2.564949 210 +write 1 72 2.639057 2.639057 222 +html 1 75 2.639057 2.639057 235 +collect 3 65 2.772589 8.317767 268 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +cool 1 49 3.044522 3.044522 374 +visitor 1 49 3.044522 3.044522 371 +even 1 45 3.135494 3.135494 393 +http 1 41 3.218876 3.218876 420 +live 1 40 3.258097 3.258097 451 +realli 1 40 3.258097 3.258097 444 +join 1 39 3.258097 3.258097 457 +slide 1 38 3.295837 3.295837 467 +staff 2 36 3.367296 6.734592 490 +download 1 36 3.367296 3.367296 489 +short 1 36 3.367296 3.367296 499 +graph 2 30 3.555348 7.110696 576 +quit 1 27 3.637586 3.637586 633 +arrai 1 27 3.637586 3.637586 627 +repres 1 26 3.688879 3.688879 656 +handl 1 24 3.761200 3.761200 685 +thu 1 21 3.912023 3.912023 773 +expand 2 17 4.110874 8.221748 928 +young 1 16 4.174387 4.174387 991 +mayb 1 15 4.248495 4.248495 1014 +qual 1 15 4.248495 4.248495 1062 +infrastructur 1 12 4.465908 4.465908 1234 +hello 2 10 4.653960 9.307920 1407 +mosaic 1 10 4.653960 4.653960 1426 +cecil 1 9 4.753590 4.753590 1547 +pure 1 8 4.875197 4.875197 1776 +irregular 1 8 4.875197 4.875197 1768 +mirror 1 6 5.164786 5.164786 2028 +shortest 1 5 5.347108 5.347108 2424 +writeup 1 5 5.347108 5.347108 2352 +vass 2 2 6.263398 12.526796 4449 +pageuw 1 2 6.263398 6.263398 6021 +pagerec 1 2 6.263398 6.263398 6022 +cecilproject 1 2 6.263398 6.263398 4457 +cooler 1 2 6.263398 6.263398 6023 +anddynam 1 2 6.263398 6.263398 5889 +myqual 1 2 6.263398 6.263398 6005 +closer 1 2 6.263398 6.263398 6024 +vassilylong 1 1 6.957497 6.957497 16530 +linki 1 1 6.957497 6.957497 16531 +fewfil 1 1 6.957497 6.957497 16532 +thisstuff 1 1 6.957497 6.957497 16533 +quotesrussian 1 1 6.957497 6.957497 16534 +pagesvari 1 1 6.957497 6.957497 16535 +linksguid 1 1 6.957497 6.957497 16536 +formsoth 1 1 6.957497 6.957497 16537 +pagencsa 1 1 6.957497 6.957497 16538 +andvortex 1 1 6.957497 6.957497 16539 +befast 1 1 6.957497 6.957497 16540 +themvi 1 1 6.957497 6.957497 16541 +ourdepartment 1 1 6.957497 6.957497 16542 +beenupgrad 1 1 6.957497 6.957497 16543 +thezpl 1 1 6.957497 6.957497 16544 +languageto 1 1 6.957497 6.957497 16545 +repartit 1 1 6.957497 6.957497 16546 +theslidesfrom 1 1 6.957497 6.957497 16547 +toresourc 1 1 6.957497 6.957497 16548 +eduobject 1 1 6.957497 6.957497 16549 +pastor 1 1 6.957497 6.957497 16550 +vybrasyvalsya 1 1 6.957497 6.957497 16551 +okna 1 1 6.957497 6.957497 16552 +pyatyi 1 1 6.957497 6.957497 16553 +deystvov 1 1 6.957497 6.957497 16554 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ new file mode 100644 index 00000000..9310a02f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^virgil^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +engin 3 297 1.098612 3.295836 20 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +washington 8 236 1.386294 11.090352 32 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +seattl 2 120 2.079442 4.158884 103 +present 1 91 2.397895 2.397895 145 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +servic 1 72 2.639057 2.639057 236 +receiv 1 66 2.708050 2.708050 244 +profession 1 51 2.995732 2.995732 345 +join 1 39 3.258097 3.258097 457 +electr 1 38 3.295837 3.295837 461 +scientist 1 31 3.496508 3.496508 560 +recommend 1 22 3.850148 3.850148 737 +theunivers 2 21 3.912023 7.824046 797 +divis 1 21 3.912023 3.912023 803 +histori 1 19 4.007333 4.007333 853 +letter 1 16 4.174387 4.174387 981 +achiev 1 14 4.317488 4.317488 1088 +evan 1 8 4.875197 4.875197 1633 +patent 1 5 5.347108 5.347108 2574 +invent 1 4 5.568345 5.568345 3028 +arizona 1 3 5.857933 5.857933 3700 +electricalengin 1 3 5.857933 5.857933 3987 +expertis 1 3 5.857933 5.857933 3321 +virgil 4 2 6.263398 25.053592 5783 +bourassa 3 2 6.263398 18.790194 5782 +uwvirgil 1 1 6.957497 6.957497 16555 +bourassavirgil 1 1 6.957497 6.957497 16556 +interestsinclud 1 1 6.957497 6.957497 16557 +boeingin 1 1 6.957497 6.957497 16558 +scienceorgan 1 1 6.957497 6.957497 16559 +bellevu 1 1 6.957497 6.957497 16560 +arizonast 1 1 6.957497 6.957497 16561 +temp 1 1 6.957497 6.957497 16562 +accesswhat 1 1 6.957497 6.957497 16563 +statusoccasion 1 1 6.957497 6.957497 16564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ new file mode 100644 index 00000000..6d57169c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^vivek^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 6 571 0.000000 0.000000 5 +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +work 6 380 0.693147 4.158882 9 +interest 5 384 0.693147 3.465735 11 +system 5 443 0.693147 3.465735 6 +depart 2 457 0.693147 1.386294 12 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +current 3 284 1.098612 3.295836 21 +last 3 314 1.098612 3.295836 14 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +washington 5 236 1.386294 6.931470 32 +also 4 259 1.386294 5.545176 28 +graduat 2 215 1.386294 2.772588 31 +softwar 2 220 1.386294 2.772588 30 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +email 1 220 1.386294 1.386294 29 +link 1 247 1.386294 1.386294 24 +group 3 183 1.609438 4.828314 36 +public 2 202 1.609438 3.218876 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +avail 7 169 1.791759 12.542313 48 +distribut 5 162 1.791759 8.958795 51 +applic 3 170 1.791759 5.375277 56 +develop 2 174 1.791759 3.583518 53 +recent 2 167 1.791759 3.583518 58 +read 1 154 1.791759 1.791759 47 +year 4 148 1.945910 7.783640 84 +like 2 132 1.945910 3.891820 81 +model 2 145 1.945910 3.891820 69 +area 1 144 1.945910 1.945910 80 +high 5 130 2.079442 10.397210 101 +seattl 3 120 2.079442 6.238326 103 +provid 1 121 2.079442 2.079442 94 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +look 2 107 2.197225 4.394450 115 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +pleas 1 113 2.197225 2.197225 114 +place 1 106 2.197225 2.197225 124 +intern 1 108 2.197225 2.197225 128 +topic 1 114 2.197225 2.197225 110 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +follow 1 92 2.397895 2.397895 143 +center 1 88 2.397895 2.397895 158 +learn 2 86 2.484907 4.969814 170 +academ 1 82 2.484907 2.484907 178 +activ 1 84 2.484907 2.484907 182 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +issu 2 78 2.564949 5.129898 211 +server 1 76 2.564949 2.564949 204 +state 1 76 2.564949 2.564949 207 +involv 2 71 2.639057 5.278114 227 +nation 2 74 2.639057 5.278114 240 +servic 1 72 2.639057 2.639057 236 +simul 3 66 2.708050 8.124150 255 +receiv 1 66 2.708050 2.708050 244 +written 1 63 2.772589 2.772589 278 +organ 1 65 2.772589 2.772589 265 +collect 1 65 2.772589 2.772589 268 +visit 1 63 2.772589 2.772589 288 +plai 2 60 2.833213 5.666426 307 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +detail 2 57 2.890372 5.780744 321 +found 2 53 2.944439 5.888878 337 +undergradu 1 54 2.944439 2.944439 338 +particular 1 51 2.995732 2.995732 352 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +right 1 48 3.044522 3.044522 363 +physic 1 47 3.091042 3.091042 377 +directori 1 45 3.135494 3.135494 396 +math 1 44 3.135494 3.135494 402 +mechan 1 43 3.178054 3.178054 416 +continu 1 39 3.258097 3.258097 448 +game 2 36 3.367296 6.734592 498 +tree 1 36 3.367296 3.367296 492 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +toler 1 33 3.433987 3.433987 533 +fault 2 32 3.465736 6.931472 547 +chapter 2 32 3.465736 6.931472 536 +india 1 32 3.465736 3.465736 550 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +secur 1 30 3.555348 3.555348 577 +particip 1 29 3.583519 3.583519 589 +held 1 28 3.610918 3.610918 600 +effort 1 26 3.688879 3.688879 652 +enhanc 1 26 3.688879 3.688879 644 +session 1 26 3.688879 3.688879 643 +highli 1 23 3.806662 3.806662 725 +methodolog 1 23 3.806662 3.806662 733 +head 1 23 3.806662 3.806662 732 +period 1 22 3.850148 3.850148 743 +unit 2 21 3.912023 7.824046 779 +leav 1 21 3.912023 3.912023 772 +born 1 21 3.912023 3.912023 798 +half 1 21 3.912023 3.912023 776 +safeti 4 20 3.951244 15.804976 817 +scheme 1 20 3.951244 3.951244 818 +tenni 1 20 3.951244 3.951244 838 +exploit 1 20 3.951244 3.951244 836 +failur 1 18 4.060443 4.060443 898 +attend 1 18 4.060443 4.060443 893 +english 1 15 4.248495 4.248495 1033 +came 1 13 4.382027 4.382027 1197 +danc 2 12 4.465908 8.931816 1278 +replic 1 12 4.465908 4.465908 1231 +nanci 1 12 4.465908 4.465908 1256 +eight 1 11 4.553877 4.553877 1331 +council 1 11 4.553877 4.553877 1364 +literatur 1 11 4.553877 4.553877 1300 +leveson 1 9 4.753590 4.753590 1540 +poetri 1 9 4.753590 4.753590 1596 +simpli 1 8 4.875197 4.875197 1626 +presenc 1 8 4.875197 4.875197 1671 +coast 1 8 4.875197 4.875197 1746 +cricket 3 7 5.010635 15.031905 1945 +brought 1 7 5.010635 5.010635 1925 +whenev 1 7 5.010635 5.010635 1883 +occasion 1 7 5.010635 5.010635 1905 +saturdai 1 7 5.010635 5.010635 1794 +throughout 1 7 5.010635 5.010635 1871 +vivek 3 6 5.164786 15.494358 2210 +squash 1 6 5.164786 5.164786 2223 +band 1 6 5.164786 5.164786 2198 +corba 1 5 5.347108 5.347108 2320 +focuss 1 5 5.347108 5.347108 2271 +cell 1 5 5.347108 5.347108 2274 +nuclear 1 5 5.347108 5.347108 2576 +toolset 1 4 5.568345 5.568345 3014 +murphi 1 4 5.568345 5.568345 2737 +racquetbal 1 4 5.568345 5.568345 3052 +suffic 1 4 5.568345 5.568345 2869 +ultra 1 4 5.568345 5.568345 2889 +swing 1 4 5.568345 5.568345 2887 +restructur 1 4 5.568345 5.568345 2775 +tend 1 4 5.568345 5.568345 3041 +rsml 2 3 5.857933 11.715866 3967 +wesleyan 1 3 5.857933 5.857933 3988 +marin 1 3 5.857933 5.857933 3947 +ballroom 1 3 5.857933 5.857933 3983 +dabbl 1 3 5.857933 5.857933 3971 +bank 1 3 5.857933 5.857933 3920 +mirza 1 3 5.857933 5.857933 3989 +bellcor 3 2 6.263398 18.790194 5174 +ratan 2 2 6.263398 12.526796 5948 +adher 1 2 6.263398 6.263398 6025 +tango 1 2 6.263398 6.263398 6010 +reform 1 2 6.263398 6.263398 5828 +growth 1 2 6.263398 6.263398 4084 +angelo 2 1 6.957497 13.914994 16565 +scientistat 1 1 6.957497 6.957497 16566 +morristown 1 1 6.957497 6.957497 16567 +researchwork 1 1 6.957497 6.957497 16568 +distributedsoftwar 1 1 6.957497 6.957497 16569 +anatida 1 1 6.957497 6.957497 16570 +indc 1 1 6.957497 6.957497 16571 +foundher 1 1 6.957497 6.957497 16572 +integrationof 1 1 6.957497 6.957497 16573 +bydr 1 1 6.957497 6.957497 16574 +fromrequir 1 1 6.957497 6.957497 16575 +middletown 1 1 6.957497 6.957497 16576 +purus 1 1 6.957497 6.957497 16577 +lesserext 1 1 6.957497 6.957497 16578 +ardent 1 1 6.957497 6.957497 16579 +folow 1 1 6.957497 6.957497 16580 +superson 1 1 6.957497 6.957497 16581 +cowboi 1 1 6.957497 6.957497 16582 +keen 1 1 6.957497 6.957497 16583 +waltz 1 1 6.957497 6.957497 16584 +foxtrot 1 1 6.957497 6.957497 16585 +chacha 1 1 6.957497 6.957497 16586 +rhumba 1 1 6.957497 6.957497 16587 +mambo 1 1 6.957497 6.957497 16588 +ecosoc 1 1 6.957497 6.957497 16589 +rapidpopul 1 1 6.957497 6.957497 16590 +prolifer 1 1 6.957497 6.957497 16591 +ghalib 1 1 6.957497 6.957497 16592 +centuryindian 1 1 6.957497 6.957497 16593 +poet 1 1 6.957497 6.957497 16594 +romant 1 1 6.957497 6.957497 16595 +victorian 1 1 6.957497 6.957497 16596 +obligatori 1 1 6.957497 6.957497 16597 +sitesthat 1 1 6.957497 6.957497 16598 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ new file mode 100644 index 00000000..1a49d55c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^voelker^ @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +call 1 91 2.397895 2.397895 153 +master 1 76 2.564949 2.564949 216 +window 2 68 2.708050 5.416100 242 +thesi 2 57 2.890372 5.780744 327 +mobil 1 23 3.806662 3.806662 730 +avoid 1 21 3.912023 3.912023 799 +emac 1 13 4.382027 4.382027 1143 +voelker 2 9 4.753590 9.507180 1557 +guggenheim 1 8 4.875197 4.875197 1759 +geoff 2 6 5.164786 10.329572 2124 +annex 1 5 5.347108 5.347108 2572 +wireless 1 4 5.568345 5.568345 2693 +washingtonseattl 1 4 5.568345 5.568345 3044 +mobisa 1 3 5.857933 5.857933 3927 +inseattl 1 2 6.263398 6.263398 6026 +whati 1 2 6.263398 6.263398 6027 +andbuild 1 2 6.263398 6.263398 6028 +settl 1 2 6.263398 6.263398 5778 +skywhoi 1 1 6.957497 6.957497 16599 +wherechateau 1 1 6.957497 6.957497 16600 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ new file mode 100644 index 00000000..30be3d10 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^waynew^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +washington 3 236 1.386294 4.158882 32 +also 1 259 1.386294 1.386294 28 +list 3 201 1.609438 4.828314 39 +paper 1 205 1.609438 1.609438 38 +perform 1 143 1.945910 1.945910 74 +seattl 1 120 2.079442 2.079442 103 +look 2 107 2.197225 4.394450 115 +version 1 113 2.197225 2.197225 122 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +memori 1 101 2.302585 2.302585 139 +peopl 1 96 2.302585 2.302585 132 +thing 2 84 2.484907 4.969814 189 +stuff 1 87 2.484907 2.484907 171 +test 1 66 2.708050 2.708050 252 +organ 1 65 2.772589 2.772589 265 +particular 1 51 2.995732 2.995732 352 +investig 1 51 2.995732 2.995732 353 +cool 1 49 3.044522 3.044522 374 +keep 1 44 3.135494 3.135494 409 +howev 1 41 3.218876 3.218876 422 +littl 1 39 3.258097 3.258097 454 +actual 1 28 3.610918 3.610918 604 +interpret 1 24 3.761200 3.761200 686 +other 1 24 3.761200 3.761200 697 +earli 1 16 4.174387 4.174387 968 +baer 1 11 4.553877 4.553877 1353 +denni 1 11 4.553877 4.553877 1321 +alpha 1 11 4.553877 4.553877 1348 +jean 1 10 4.653960 4.653960 1440 +jump 1 9 4.753590 4.753590 1603 +wong 1 9 4.753590 4.753590 1609 +wayn 1 8 4.875197 4.875197 1738 +loup 1 6 5.164786 5.164786 2228 +geoff 1 6 5.164786 5.164786 2124 +fish 1 6 5.164786 5.164786 2207 +alec 1 5 5.347108 5.347108 2563 +rocki 1 4 5.568345 5.568345 3048 +waynew 2 3 5.857933 11.715866 3982 +differentmemori 1 1 6.957497 6.957497 16601 +beingdon 1 1 6.957497 6.957497 16602 +rightnow 1 1 6.957497 6.957497 16603 +peoplewho 1 1 6.957497 6.957497 16604 +testwayn 1 1 6.957497 6.957497 16605 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ new file mode 100644 index 00000000..b2442639 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wchan^ @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +time 2 293 1.098612 2.197224 17 +washington 1 236 1.386294 1.386294 32 +william 1 22 3.850148 3.850148 765 +spend 1 19 4.007333 4.007333 850 +hang 1 9 4.753590 4.753590 1499 +pagei 1 8 4.875197 4.875197 1683 +chan 2 7 5.010635 10.021270 1876 +spare 1 6 5.164786 5.164786 2177 +hell 1 4 5.568345 5.568345 2885 +heaven 1 3 5.857933 5.857933 3589 +wchan 1 3 5.857933 5.857933 3338 +pagewilliam 1 1 6.957497 6.957497 16606 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html new file mode 100644 index 00000000..2d18f87f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^weld^weld.html @@ -0,0 +1,141 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 3 431 0.693147 2.079441 10 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +offic 3 299 1.098612 3.295836 13 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +washington 2 236 1.386294 2.772588 32 +softwar 2 220 1.386294 2.772588 30 +mail 1 238 1.386294 1.386294 22 +also 1 259 1.386294 1.386294 28 +paper 2 205 1.609438 3.218876 38 +group 1 183 1.609438 1.609438 36 +base 2 165 1.791759 3.583518 50 +data 1 170 1.791759 1.791759 49 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +professor 1 137 1.945910 1.945910 76 +hall 1 146 1.945910 1.945910 65 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +report 1 131 2.079442 2.079442 92 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +world 1 115 2.197225 2.197225 126 +book 1 99 2.302585 2.302585 131 +technic 1 100 2.302585 2.302585 140 +associ 1 93 2.397895 2.397895 151 +mani 1 92 2.397895 2.397895 150 +select 1 91 2.397895 2.397895 154 +journal 1 83 2.484907 2.484907 183 +control 1 82 2.484907 2.484907 164 +wide 1 84 2.484907 2.484907 185 +internet 1 83 2.484907 2.484907 186 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +intellig 3 72 2.639057 7.917171 225 +nation 1 74 2.639057 2.639057 240 +receiv 2 66 2.708050 5.416100 244 +degre 1 69 2.708050 2.708050 259 +sieg 1 69 2.708050 2.708050 260 +august 1 66 2.708050 2.708050 257 +plan 4 65 2.772589 11.090356 272 +artifici 2 63 2.772589 5.545178 280 +dept 1 64 2.772589 2.772589 291 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +visit 1 63 2.772589 2.772589 288 +juli 1 60 2.833213 2.833213 305 +plai 1 60 2.833213 2.833213 307 +publish 1 57 2.890372 2.890372 326 +found 1 53 2.944439 2.944439 337 +investig 1 51 2.995732 2.995732 353 +electron 1 47 3.091042 3.091042 379 +favorit 1 44 3.135494 3.135494 410 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +past 1 42 3.218876 3.218876 428 +winter 1 36 3.367296 3.367296 500 +award 2 34 3.401197 6.802394 523 +least 1 35 3.401197 3.401197 516 +board 1 33 3.433987 3.433987 528 +travel 1 30 3.555348 3.555348 579 +chair 1 29 3.583519 3.583519 596 +repres 1 26 3.688879 3.688879 656 +enjoi 1 26 3.688879 3.688879 660 +reach 1 24 3.761200 3.761200 688 +magazin 1 24 3.761200 3.761200 704 +ofwashington 1 22 3.850148 3.850148 766 +almost 1 22 3.850148 3.850148 742 +comparison 1 19 4.007333 4.007333 863 +agent 3 18 4.060443 12.181329 910 +bachelor 1 17 4.110874 4.110874 957 +adam 1 17 4.110874 4.110874 934 +young 1 16 4.174387 4.174387 991 +action 1 15 4.248495 4.248495 1038 +photograph 1 15 4.248495 4.248495 1056 +role 1 14 4.317488 4.317488 1101 +stori 1 14 4.317488 4.317488 1087 +galleri 1 13 4.382027 4.382027 1192 +daniel 2 12 4.465908 8.931816 1233 +land 1 12 4.465908 4.465908 1273 +guest 1 12 4.465908 4.465908 1220 +infrastructur 1 12 4.465908 4.465908 1234 +sens 1 11 4.553877 4.553877 1305 +shop 1 10 4.653960 4.653960 1469 +invit 1 10 4.653960 4.653960 1428 +weld 9 9 4.753590 42.782310 1538 +hundr 1 9 4.753590 4.753590 1528 +aaai 3 8 4.875197 14.625591 1750 +presidenti 1 8 4.875197 4.875197 1737 +gather 1 8 4.875197 4.875197 1719 +pacif 1 8 4.875197 4.875197 1674 +illustr 1 8 4.875197 4.875197 1679 +planner 1 7 5.010635 5.010635 1797 +ground 1 7 5.010635 5.010635 1955 +softbot 1 7 5.010635 5.010635 1974 +northwest 1 7 5.010635 5.010635 1973 +yale 1 6 5.164786 5.164786 2003 +commit 1 6 5.164786 5.164786 2233 +engineeringat 1 5 5.347108 5.347108 2561 +middl 1 5 5.347108 5.347108 2372 +cacm 1 5 5.347108 5.347108 2388 +allegro 1 5 5.347108 5.347108 2314 +naval 1 4 5.568345 5.568345 2920 +climb 1 4 5.568345 5.568345 2936 +biochemistri 1 3 5.857933 5.857933 3513 +ucpop 1 3 5.857933 5.857933 3878 +revisit 1 3 5.857933 5.857933 3915 +recreat 1 3 5.857933 5.857933 3990 +theworld 1 3 5.857933 5.857933 3158 +twin 1 3 5.857933 5.857933 3657 +younginvestig 1 2 6.263398 6.263398 5794 +ascal 1 2 6.263398 6.263398 5893 +anintroduct 1 2 6.263398 6.263398 4156 +absent 1 2 6.263398 6.263398 4825 +cafe 1 2 6.263398 6.263398 5826 +boi 1 2 6.263398 6.263398 5918 +wilder 1 2 6.263398 6.263398 5516 +theadvisori 1 1 6.957497 6.957497 16607 +airesearch 1 1 6.957497 6.957497 16608 +ofintellig 1 1 6.957497 6.957497 16609 +isco 1 1 6.957497 6.957497 16610 +scad 1 1 6.957497 6.957497 16611 +seattlewa 1 1 6.957497 6.957497 16612 +sitesworldwid 1 1 6.957497 6.957497 16613 +arehi 1 1 6.957497 6.957497 16614 +aip 1 1 6.957497 6.957497 16615 +exhaustivelist 1 1 6.957497 6.957497 16616 +stormymountain 1 1 6.957497 6.957497 16617 +galen 1 1 6.957497 6.957497 16618 +desert 1 1 6.957497 6.957497 16619 +morocco 1 1 6.957497 6.957497 16620 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ new file mode 100644 index 00000000..7a4e48dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wendy^ @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +master 1 76 2.564949 2.564949 216 +univ 1 28 3.610918 3.610918 617 +utah 1 9 4.753590 4.753590 1585 +wendi 3 2 6.263398 18.790194 5864 +belluomini 2 2 6.263398 12.526796 5865 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ new file mode 100644 index 00000000..3087b680 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^whsieh^ @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +engin 3 297 1.098612 3.295836 20 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +washington 3 236 1.386294 4.158882 32 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +architectur 1 139 1.945910 1.945910 77 +compil 2 122 2.079442 4.158884 96 +seattl 1 120 2.079442 2.079442 103 +select 1 91 2.397895 2.397895 154 +member 1 84 2.484907 2.484907 165 +chang 1 82 2.484907 2.484907 163 +receiv 1 66 2.708050 2.708050 244 +sieg 1 69 2.708050 2.708050 260 +interact 1 62 2.772589 2.772589 270 +advisor 1 51 2.995732 2.995732 355 +move 1 47 3.091042 3.091042 382 +electr 1 38 3.295837 3.295837 461 +theunivers 1 21 3.912023 3.912023 797 +among 1 21 3.912023 3.912023 781 +programminglanguag 1 21 3.912023 3.912023 782 +voic 1 21 3.912023 3.912023 806 +runtim 1 19 4.007333 4.007333 858 +thedepart 2 11 4.553877 9.107754 1350 +wilson 1 9 4.753590 4.753590 1536 +postdoc 1 8 4.875197 4.875197 1724 +myresearch 1 4 5.568345 5.568345 2842 +weihl 1 3 5.857933 5.857933 3284 +inseattl 1 2 6.263398 6.263398 6026 +thespin 1 2 6.263398 6.263398 6029 +sciencein 1 2 6.263398 6.263398 5804 +thelaboratori 1 2 6.263398 6.263398 4424 +linksperson 1 2 6.263398 6.263398 5143 +hsieh 1 2 6.263398 6.263398 5818 +hsiehwilson 1 1 6.957497 6.957497 16621 +hsiehi 1 1 6.957497 6.957497 16622 +theschool 1 1 6.957497 6.957497 16623 +engineeringatmit 1 1 6.957497 6.957497 16624 +werefran 1 1 6.957497 6.957497 16625 +kaashoekandbil 1 1 6.957497 6.957497 16626 +publicationsselect 1 1 6.957497 6.957497 16627 +interestswilson 1 1 6.957497 6.957497 16628 +numberha 1 1 6.957497 6.957497 16629 +whsieh 1 1 6.957497 6.957497 16630 +keyoctob 1 1 6.957497 6.957497 16631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ new file mode 100644 index 00000000..ebc6c798 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^wolman^ @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +network 3 168 1.791759 5.375277 61 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +perform 2 143 1.945910 3.891820 74 +architectur 1 139 1.945910 1.945910 77 +seattl 2 120 2.079442 4.158884 103 +analysi 1 124 2.079442 2.079442 98 +structur 1 106 2.197225 2.197225 105 +school 1 84 2.484907 2.484907 188 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +line 1 75 2.639057 2.639057 231 +plai 1 60 2.833213 2.833213 307 +summer 1 56 2.890372 2.890372 311 +realli 1 40 3.258097 3.258097 444 +winter 1 36 3.367296 3.367296 500 +idea 1 32 3.465736 3.465736 545 +interpret 2 24 3.761200 7.522400 686 +departmentunivers 1 24 3.761200 3.761200 711 +scalabl 1 24 3.761200 3.761200 705 +binari 1 20 3.951244 3.951244 823 +bershad 1 18 4.060443 4.060443 902 +asplo 1 17 4.110874 4.110874 948 +cambridg 1 16 4.174387 4.174387 1008 +latenc 1 16 4.174387 4.174387 993 +levi 1 14 4.317488 4.317488 1093 +washingtonbox 1 13 4.382027 4.382027 1200 +usenix 2 12 4.465908 8.931816 1240 +baer 1 11 4.553877 4.553877 1353 +thecomput 1 10 4.653960 4.653960 1408 +equip 1 10 4.653960 4.653960 1459 +voelker 2 9 4.753590 9.507180 1557 +wong 1 9 4.753590 4.753590 1609 +romer 1 8 4.875197 4.875197 1706 +guitar 1 8 4.875197 4.875197 1758 +instrument 1 7 5.010635 5.010635 1954 +wolman 7 6 5.164786 36.153502 2093 +corp 1 6 5.164786 5.164786 2139 +strang 1 6 5.164786 5.164786 2064 +alec 2 5 5.347108 10.694216 2563 +departmentat 1 5 5.347108 5.347108 2513 +treat 1 5 5.347108 5.347108 2521 +gradual 1 4 5.568345 5.568345 2997 +etch 1 4 5.568345 5.568345 2755 +thekkath 1 3 5.857933 5.857933 3973 +habit 1 3 5.857933 5.857933 3777 +thechateau 1 2 6.263398 6.263398 5853 +fordigit 1 2 6.263398 6.263398 5752 +firewal 1 2 6.263398 6.263398 5407 +relai 1 2 6.263398 6.263398 5404 +hungri 1 2 6.263398 6.263398 5511 +otter 1 2 6.263398 6.263398 4166 +nervou 1 2 6.263398 6.263398 5953 +pressur 1 2 6.263398 6.263398 5960 +wolmanwolman 1 1 6.957497 6.957497 16632 +eduworkcomput 1 1 6.957497 6.957497 16633 +isroom 1 1 6.957497 6.957497 16634 +executablesrocki 1 1 6.957497 6.957497 16635 +performanceon 1 1 6.957497 6.957497 16636 +trees 1 1 6.957497 6.957497 16637 +fixha 1 1 6.957497 6.957497 16638 +hallwolman 1 1 6.957497 6.957497 16639 +diseasewolman 1 1 6.957497 6.957497 16640 +lumber 1 1 6.957497 6.957497 16641 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ new file mode 100644 index 00000000..6e3dc452 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^xqin^ @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +engin 4 297 1.098612 4.394448 20 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +parallel 5 169 1.791759 8.958795 60 +distribut 2 162 1.791759 3.583518 51 +phone 1 175 1.791759 1.791759 45 +base 1 165 1.791759 1.791759 50 +implement 1 152 1.791759 1.791759 52 +perform 6 143 1.945910 11.675460 74 +architectur 2 139 1.945910 3.891820 77 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +confer 2 126 2.079442 4.158884 100 +seattl 1 120 2.079442 2.079442 103 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +intern 1 108 2.197225 2.197225 128 +proceed 2 93 2.397895 4.795790 152 +school 1 84 2.484907 2.484907 188 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +stuff 1 87 2.484907 2.484907 171 +method 1 80 2.564949 2.564949 213 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +symposium 1 72 2.639057 2.639057 238 +simul 5 66 2.708050 13.540250 255 +sieg 1 69 2.708050 2.708050 260 +goal 1 66 2.708050 2.708050 250 +evalu 3 64 2.772589 8.317767 266 +dept 1 64 2.772589 2.772589 291 +visual 1 48 3.044522 3.044522 372 +possibl 1 47 3.091042 3.091042 378 +term 1 43 3.178054 3.178054 411 +cach 1 41 3.218876 3.218876 432 +submit 1 39 3.258097 3.258097 440 +short 1 36 3.367296 3.367296 499 +soon 1 36 3.367296 3.367296 494 +award 1 34 3.401197 3.401197 523 +tech 1 35 3.401197 3.401197 515 +photo 1 31 3.496508 3.496508 561 +graph 1 30 3.555348 3.555348 576 +multiprocessor 2 28 3.610918 7.221836 605 +cluster 1 28 3.610918 3.610918 612 +univ 1 28 3.610918 3.610918 617 +compar 1 26 3.688879 3.688879 648 +trace 3 25 3.737670 11.213010 677 +predict 1 19 4.007333 4.007333 855 +monitor 1 17 4.110874 4.110874 941 +zhang 2 16 4.174387 8.348774 980 +driven 3 15 4.248495 12.745485 1048 +coher 1 14 4.317488 4.317488 1109 +baer 6 11 4.553877 27.323262 1353 +jean 1 10 4.653960 4.653960 1440 +explicit 1 9 4.753590 4.753590 1525 +loup 1 6 5.164786 5.164786 2228 +optimist 2 5 5.347108 10.694216 2501 +conserv 1 4 5.568345 5.568345 2870 +tran 1 3 5.857933 5.857933 3384 +communicationprimit 1 2 6.263398 6.263398 5449 +hpca 1 2 6.263398 6.263398 6030 +toolfor 1 2 6.263398 6.263398 6031 +numa 1 2 6.263398 6.263398 4905 +xiaohan 2 1 6.957497 13.914994 16642 +xqin 1 1 6.957497 6.957497 16643 +basedmultiprocessor 1 1 6.957497 6.957497 16644 +nalluri 1 1 6.957497 6.957497 16645 +processingon 1 1 6.957497 6.957497 16646 +chinaread 1 1 6.957497 6.957497 16647 +chinesesearch 1 1 6.957497 6.957497 16648 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ new file mode 100644 index 00000000..d6949073 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^yasushi^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +washington 3 236 1.386294 4.158882 32 +link 3 247 1.386294 4.158882 24 +graduat 1 215 1.386294 1.386294 31 +oper 1 180 1.609438 1.609438 34 +address 1 170 1.791759 1.791759 62 +year 1 148 1.945910 1.945910 84 +seattl 1 120 2.079442 2.079442 103 +document 1 121 2.079442 2.079442 89 +schedul 1 119 2.079442 2.079442 85 +intern 1 108 2.197225 2.197225 128 +text 1 98 2.302585 2.302585 133 +info 3 85 2.484907 7.454721 176 +second 1 81 2.484907 2.484907 166 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +servic 1 72 2.639057 2.639057 236 +type 1 61 2.833213 2.833213 296 +index 2 56 2.890372 5.780744 309 +talk 1 53 2.944439 2.944439 336 +finger 1 52 2.995732 2.995732 354 +transact 1 39 3.258097 3.258097 438 +brian 1 38 3.295837 3.295837 466 +connect 1 37 3.332205 3.332205 485 +tech 1 35 3.401197 3.401197 515 +survei 1 35 3.401197 3.401197 513 +random 1 34 3.401197 3.401197 511 +linux 1 27 3.637586 3.637586 631 +yahoo 1 24 3.761200 3.761200 707 +lyco 1 19 4.007333 4.007333 871 +bershad 1 18 4.060443 4.060443 902 +qual 1 15 4.248495 4.248495 1062 +spin 2 14 4.317488 8.634976 1121 +touch 1 12 4.465908 4.465908 1288 +perl 1 11 4.553877 4.553877 1332 +desktop 2 10 4.653960 9.307920 1445 +metacrawl 1 10 4.653960 4.653960 1455 +vista 1 10 4.653960 4.653960 1452 +meta 1 9 4.753590 4.753590 1505 +modula 1 9 4.753590 4.753590 1613 +japan 1 8 4.875197 4.875197 1762 +gatewai 1 7 5.010635 5.010635 1942 +lesson 1 5 5.347108 5.347108 2568 +alta 1 4 5.568345 5.568345 3039 +japanes 1 4 5.568345 5.568345 2934 +patch 1 4 5.568345 5.568345 2710 +archi 1 3 5.857933 5.857933 3639 +javascript 1 3 5.857933 5.857933 3221 +thespin 1 2 6.263398 6.263398 6029 +apprentic 1 2 6.263398 6.263398 5873 +yasushi 2 1 6.957497 13.914994 16649 +saitoyasushi 1 1 6.957497 6.957497 16650 +saito 1 1 6.957497 6.957497 16651 +atdepart 1 1 6.957497 6.957497 16652 +workingwith 1 1 6.957497 6.957497 16653 +andperson 1 1 6.957497 6.957497 16654 +sightse 1 1 6.957497 6.957497 16655 +trainer 1 1 6.957497 6.957497 16656 +dvorak 1 1 6.957497 6.957497 16657 +trycanva 1 1 6.957497 6.957497 16658 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ new file mode 100644 index 00000000..dc08232a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^homes^zamir^ @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +washington 3 236 1.386294 4.158882 32 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +like 1 132 1.945910 1.945910 81 +seattl 2 120 2.079442 4.158884 103 +document 2 121 2.079442 4.158884 89 +mathemat 1 108 2.197225 2.197225 123 +part 1 98 2.302585 2.302585 129 +search 2 95 2.397895 4.795790 155 +pictur 2 89 2.397895 4.795790 160 +internet 2 83 2.484907 4.969814 186 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +resum 1 79 2.564949 2.564949 217 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +servic 1 72 2.639057 2.639057 236 +degre 1 69 2.708050 2.708050 259 +artifici 1 63 2.772589 2.772589 280 +result 1 65 2.772589 2.772589 281 +basic 1 50 3.044522 3.044522 360 +physic 1 47 3.091042 3.091042 377 +done 1 47 3.091042 3.091042 381 +field 1 37 3.332205 3.332205 482 +idea 1 32 3.465736 3.465736 545 +computersci 1 30 3.555348 3.555348 562 +cluster 1 28 3.610918 3.610918 612 +retriev 1 27 3.637586 3.637586 621 +along 1 18 4.060443 4.060443 878 +engineeringunivers 1 17 4.110874 4.110874 959 +chateau 1 16 4.174387 4.174387 997 +trip 1 14 4.317488 4.317488 1113 +washingtonbox 1 13 4.382027 4.382027 1200 +edui 1 13 4.382027 4.382027 1193 +israel 1 11 4.553877 4.553877 1366 +metacrawl 1 10 4.653960 4.653960 1455 +ski 1 10 4.653960 4.653960 1471 +hundr 1 9 4.753590 4.753590 1528 +erik 1 8 4.875197 4.875197 1701 +oren 1 6 5.164786 5.164786 2134 +softwareengin 1 6 5.164786 5.164786 2162 +selberg 1 5 5.347108 5.347108 2441 +algorithmsfor 1 4 5.568345 5.568345 2748 +worki 1 4 5.568345 5.568345 3010 +raft 1 4 5.568345 5.568345 3060 +dive 1 3 5.857933 5.857933 3654 +zamir 4 2 6.263398 25.053592 5897 +pageoren 1 2 6.263398 6.263398 5888 +jerusalem 1 2 6.263398 6.263398 4918 +isra 1 1 6.957497 6.957497 16659 +myundergradu 1 1 6.957497 6.957497 16660 +hebrewunivers 1 1 6.957497 6.957497 16661 +userwith 1 1 6.957497 6.957497 16662 +orenetzioni 1 1 6.957497 6.957497 16663 +sinai 1 1 6.957497 6.957497 16664 +jeeptour 1 1 6.957497 6.957497 16665 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html new file mode 100644 index 00000000..1c3f4a1e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^baer.html @@ -0,0 +1,127 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +also 4 259 1.386294 5.545176 28 +washington 1 236 1.386294 1.386294 32 +languag 1 227 1.386294 1.386294 26 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +paper 1 205 1.609438 1.609438 38 +parallel 3 169 1.791759 5.375277 60 +distribut 2 162 1.791759 3.583518 51 +recent 1 167 1.791759 1.791759 58 +architectur 6 139 1.945910 11.675460 77 +professor 2 137 1.945910 3.891820 76 +process 1 142 1.945910 1.945910 72 +lectur 1 135 1.945910 1.945910 73 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +look 1 107 2.197225 2.197225 115 +present 1 91 2.397895 2.397895 145 +ieee 3 86 2.484907 7.454721 190 +journal 2 83 2.484907 4.969814 183 +member 1 84 2.484907 2.484907 165 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +nation 1 74 2.639057 2.639057 240 +involv 1 71 2.639057 2.639057 227 +simul 2 66 2.708050 5.416100 255 +receiv 1 66 2.708050 2.708050 244 +improv 1 62 2.772589 2.772589 289 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +hardwar 1 51 2.995732 2.995732 350 +approach 1 48 3.044522 3.044522 366 +protocol 2 45 3.135494 6.270988 407 +textbook 1 44 3.135494 3.135494 397 +execut 1 45 3.135494 3.135494 404 +cach 3 41 3.218876 9.656628 432 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +author 2 39 3.258097 6.516194 450 +join 1 39 3.258097 3.258097 457 +electr 1 38 3.295837 3.295837 461 +industri 1 38 3.295837 3.295837 464 +ofth 1 36 3.367296 3.367296 491 +singl 1 34 3.401197 3.401197 510 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +chair 1 29 3.583519 3.583519 596 +cluster 2 28 3.610918 7.221836 612 +multiprocessor 1 28 3.610918 3.610918 605 +although 1 25 3.737670 3.737670 667 +trace 1 25 3.737670 3.737670 677 +fellow 2 24 3.761200 7.522400 701 +serv 2 22 3.850148 7.700296 758 +comparison 1 19 4.007333 4.007333 863 +asplo 1 17 4.110874 4.110874 948 +driven 1 15 4.248495 4.248495 1048 +coher 1 14 4.317488 4.317488 1109 +difficulti 1 13 4.382027 4.382027 1132 +block 1 13 4.382027 4.382027 1183 +franc 1 12 4.465908 4.465908 1276 +baer 3 11 4.553877 13.661631 1353 +cycl 1 11 4.553877 4.553877 1335 +primit 1 11 4.553877 4.553877 1317 +isca 1 11 4.553877 4.553877 1354 +impact 1 11 4.553877 4.553877 1334 +denni 1 11 4.553877 4.553877 1321 +jean 1 10 4.653960 4.653960 1440 +prior 1 10 4.653960 4.653960 1438 +french 1 9 4.753590 4.753590 1511 +guggenheim 1 8 4.875197 4.875197 1759 +uniprocessor 1 8 4.875197 4.875197 1696 +prefetch 2 6 5.164786 10.329572 2039 +loup 1 6 5.164786 5.164786 2228 +ucla 2 5 5.347108 10.694216 2502 +icpp 1 5 5.347108 5.347108 2382 +anddistribut 1 4 5.568345 5.568345 3031 +coauthor 1 4 5.568345 5.568345 3064 +conserv 1 4 5.568345 5.568345 2870 +chairman 3 3 5.857933 17.573799 3991 +electricalengin 1 3 5.857933 5.857933 3987 +parallelprocess 1 3 5.857933 5.857933 3626 +twelv 1 3 5.857933 5.857933 3899 +specul 1 3 5.857933 5.857933 3951 +grenobl 2 2 6.263398 12.526796 5928 +internationalsymposium 2 2 6.263398 12.526796 6032 +adjunct 1 2 6.263398 6.263398 6033 +diplom 1 2 6.263398 6.263398 5982 +theuniversit 1 2 6.263398 6.263398 5927 +laboratoir 1 2 6.263398 6.263398 5929 +universit 1 2 6.263398 6.263398 5630 +retain 1 2 6.263398 6.263398 5443 +hpca 1 2 6.263398 6.263398 6030 +professorand 1 1 6.957497 6.957497 16666 +ingnieur 1 1 6.957497 6.957497 16667 +doctorat 1 1 6.957497 6.957497 16668 +decalcul 1 1 6.957497 6.957497 16669 +technologygroup 1 1 6.957497 6.957497 16670 +thesearea 1 1 6.957497 6.957497 16671 +distinguishedvisitor 1 1 6.957497 6.957497 16672 +asprogram 1 1 6.957497 6.957497 16673 +sigarch 1 1 6.957497 6.957497 16674 +eighteen 1 1 6.957497 6.957497 16675 +professorba 1 1 6.957497 6.957497 16676 +laboratoriesand 1 1 6.957497 6.957497 16677 +inacademia 1 1 6.957497 6.957497 16678 +hashad 1 1 6.957497 6.957497 16679 +accent 1 1 6.957497 6.957497 16680 +comparisonwith 1 1 6.957497 6.957497 16681 +andisca 1 1 6.957497 6.957497 16682 +optimisticapproach 1 1 6.957497 6.957497 16683 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html new file mode 100644 index 00000000..d266904f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^chambers.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +languag 3 227 1.386294 4.158882 26 +washington 2 236 1.386294 2.772588 32 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +applic 2 170 1.791759 3.583518 56 +implement 1 152 1.791759 1.791759 52 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +object 3 138 1.945910 5.837730 79 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +area 1 144 1.945910 1.945910 80 +hall 1 146 1.945910 1.945910 65 +high 1 130 2.079442 2.079442 101 +seattl 1 120 2.079442 2.079442 103 +assist 1 112 2.197225 2.197225 113 +advanc 1 99 2.302585 2.302585 130 +member 2 84 2.484907 4.969814 165 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +requir 1 81 2.484907 2.484907 167 +orient 3 80 2.564949 7.694847 205 +optim 2 79 2.564949 5.129898 197 +method 1 80 2.564949 2.564949 213 +dynam 1 76 2.564949 2.564949 194 +interfac 1 79 2.564949 2.564949 209 +april 1 77 2.564949 2.564949 196 +effici 1 73 2.639057 2.639057 233 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +sieg 1 69 2.708050 2.708050 260 +guid 1 63 2.772589 2.772589 267 +street 1 63 2.772589 2.772589 293 +type 1 61 2.833213 2.833213 296 +room 1 59 2.833213 2.833213 301 +faculti 1 56 2.890372 2.890372 325 +direct 1 57 2.890372 2.890372 316 +extens 2 53 2.944439 5.888878 340 +undergradu 1 54 2.944439 2.944439 338 +investig 2 51 2.995732 5.991464 353 +maintain 1 51 2.995732 2.995732 342 +still 1 50 3.044522 3.044522 362 +pointer 1 48 3.044522 3.044522 368 +featur 1 46 3.091042 3.091042 386 +adapt 1 46 3.091042 3.091042 387 +join 1 39 3.258097 3.258097 457 +multi 1 36 3.367296 3.367296 493 +express 1 32 3.465736 3.465736 540 +profil 1 30 3.555348 3.555348 581 +static 2 27 3.637586 7.275172 619 +lead 1 23 3.806662 3.806662 718 +self 1 22 3.850148 3.850148 761 +programminglanguag 1 21 3.912023 3.912023 782 +util 1 21 3.912023 3.912023 774 +kernel 1 20 3.951244 3.951244 825 +fine 1 20 3.951244 3.951244 822 +stanford 1 17 4.110874 4.110874 955 +previous 1 17 4.110874 4.110874 923 +engineeringunivers 1 17 4.110874 4.110874 959 +spin 2 14 4.317488 8.634976 1121 +achiev 1 14 4.317488 4.317488 1088 +incorpor 2 13 4.382027 8.764054 1163 +washingtonbox 1 13 4.382027 4.382027 1200 +safe 1 12 4.465908 4.465908 1274 +modul 1 10 4.653960 4.653960 1434 +reli 1 10 4.653960 4.653960 1411 +cecil 2 9 4.753590 9.507180 1547 +end 1 9 4.753590 4.753590 1567 +modula 1 9 4.753590 4.753590 1613 +herefor 1 9 4.753590 4.753590 1483 +chamber 6 8 4.875197 29.251182 1692 +pure 1 8 4.875197 4.875197 1776 +analys 1 8 4.875197 4.875197 1666 +isol 1 8 4.875197 4.875197 1663 +craig 3 7 5.010635 15.031905 1879 +implementationof 1 7 5.010635 5.010635 1813 +vehicl 1 7 5.010635 5.010635 1928 +vortex 2 5 5.347108 10.694216 2362 +spinproject 1 5 5.347108 5.347108 2570 +despit 1 5 5.347108 5.347108 2317 +languagesand 1 4 5.568345 5.568345 3071 +microkernel 1 4 5.568345 5.568345 3047 +intra 1 3 5.857933 5.857933 3243 +dialect 1 3 5.857933 5.857933 3226 +dynamiccompil 1 3 5.857933 5.857933 3926 +programmingenviron 1 2 6.263398 6.263398 5240 +whichsupport 1 2 6.263398 6.263398 6003 +ceciland 1 1 6.957497 6.957497 16684 +languageserv 1 1 6.957497 6.957497 16685 +compilersystem 1 1 6.957497 6.957497 16686 +andinterprocedur 1 1 6.957497 6.957497 16687 +withfront 1 1 6.957497 6.957497 16688 +chamberswa 1 1 6.957497 6.957497 16689 +implementationsund 1 1 6.957497 6.957497 16690 +systemintegr 1 1 6.957497 6.957497 16691 +themodula 1 1 6.957497 6.957497 16692 +spinalso 1 1 6.957497 6.957497 16693 +grainedextens 1 1 6.957497 6.957497 16694 +researchproject 1 1 6.957497 6.957497 16695 +informationprof 1 1 6.957497 6.957497 16696 +chambersdepart 1 1 6.957497 6.957497 16697 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html new file mode 100644 index 00000000..9e940a9e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^ebeling.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +project 4 340 1.098612 4.394448 18 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +student 1 343 1.098612 1.098612 19 +washington 3 236 1.386294 4.158882 32 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +algorithm 1 162 1.791759 1.791759 57 +hour 1 165 1.791759 1.791759 46 +hall 1 146 1.945910 1.945910 65 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +high 1 130 2.079442 2.079442 101 +confer 1 126 2.079442 2.079442 100 +advanc 1 99 2.302585 2.302585 130 +associ 1 93 2.397895 2.397895 151 +graphic 1 90 2.397895 2.397895 147 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +journal 1 83 2.484907 2.484907 183 +method 1 80 2.564949 2.564949 213 +optim 1 79 2.564949 2.564949 197 +mondai 1 77 2.564949 2.564949 206 +april 1 77 2.564949 2.564949 196 +involv 1 71 2.639057 2.639057 227 +logic 1 71 2.639057 2.639057 230 +workshop 1 71 2.639057 2.639057 239 +sieg 1 69 2.708050 2.708050 260 +thursdai 1 70 2.708050 2.708050 241 +integr 1 67 2.708050 2.708050 245 +laboratori 1 63 2.772589 2.772589 292 +room 1 59 2.833213 2.833213 301 +digit 1 52 2.995732 2.995732 348 +physic 1 47 3.091042 3.091042 377 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +paul 1 38 3.295837 3.295837 471 +field 1 37 3.332205 3.332205 482 +articl 1 33 3.433987 3.433987 530 +focu 1 30 3.555348 3.555348 571 +travel 1 30 3.555348 3.555348 579 +arrai 1 27 3.637586 3.637586 627 +ofwashington 1 22 3.850148 3.850148 766 +voic 1 21 3.912023 3.912023 806 +vlsi 1 21 3.912023 3.912023 795 +chip 1 21 3.912023 3.912023 770 +rout 1 21 3.912023 3.912023 793 +particularli 1 19 4.007333 4.007333 867 +aid 1 18 4.060443 4.060443 904 +carl 4 15 4.248495 16.993980 1024 +draw 1 14 4.317488 4.317488 1086 +circuit 1 13 4.382027 4.382027 1131 +carnegi 1 12 4.465908 4.465908 1260 +fpga 2 10 4.653960 9.307920 1433 +franklin 1 10 4.653960 4.653960 1436 +chao 2 8 4.875197 9.750394 1753 +curv 1 8 4.875197 4.875197 1656 +sensit 1 8 4.875197 4.875197 1726 +router 1 8 4.875197 4.875197 1772 +multicomput 1 7 5.010635 5.010635 1890 +northwest 1 7 5.010635 5.010635 1973 +densiti 1 7 5.010635 5.010635 1927 +southern 1 6 5.164786 5.164786 2191 +spline 1 6 5.164786 5.164786 2007 +gate 1 6 5.164786 5.164786 2182 +categori 1 5 5.347108 5.347108 2261 +darren 1 5 5.347108 5.347108 2565 +ebel 4 4 5.568345 22.273380 2756 +triptych 3 4 5.568345 16.705035 3061 +neil 1 4 5.568345 5.568345 2841 +theperform 1 3 5.857933 5.857933 3262 +cronquist 1 3 5.857933 5.857933 3942 +haswork 1 2 6.263398 6.263398 5182 +andsurfac 1 2 6.263398 6.263398 5735 +hei 1 2 6.263398 6.263398 5769 +latch 1 2 6.263398 6.263398 6034 +soha 1 2 6.263398 6.263398 6006 +hassoun 1 2 6.263398 6.263398 6007 +mckenzi 1 2 6.263398 6.263398 5974 +ebelingdepart 1 1 6.957497 6.957497 16698 +wheatoncolleg 1 1 6.957497 6.957497 16699 +illinoisunivers 1 1 6.957497 6.957497 16700 +mellonunivers 1 1 6.957497 6.957497 16701 +vlsiarchitectur 1 1 6.957497 6.957497 16702 +hitech 1 1 6.957497 6.957497 16703 +chessmachin 1 1 6.957497 6.957497 16704 +apex 1 1 6.957497 6.957497 16705 +routingnetwork 1 1 6.957497 6.957497 16706 +placementand 1 1 6.957497 6.957497 16707 +teachingspr 1 1 6.957497 6.957497 16708 +designoffic 1 1 6.957497 6.957497 16709 +fccm 1 1 6.957497 6.957497 16710 +napamai 1 1 6.957497 6.957497 16711 +burlington 1 1 6.957497 6.957497 16712 +chicagojun 1 1 6.957497 6.957497 16713 +vegasresearch 1 1 6.957497 6.957497 16714 +amara 1 1 6.957497 6.957497 16715 +galleryelan 1 1 6.957497 6.957497 16716 +galleryebel 1 1 6.957497 6.957497 16717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html new file mode 100644 index 00000000..69203962 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^hanks.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +inform 2 412 0.693147 1.386294 8 +last 1 314 1.098612 1.098612 14 +link 2 247 1.386294 2.772588 24 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +seattl 3 120 2.079442 6.238326 103 +schedul 2 119 2.079442 4.158884 85 +spring 1 131 2.079442 2.079442 88 +confer 1 126 2.079442 2.079442 100 +server 1 76 2.564949 2.564949 204 +new 1 64 2.772589 2.772589 262 +photo 1 31 3.496508 3.496508 561 +steve 1 29 3.583519 3.583519 594 +magazin 1 24 3.761200 3.761200 704 +tenni 1 20 3.951244 3.951244 838 +agent 1 18 4.060443 4.060443 910 +hank 1 12 4.465908 4.465908 1253 +uncertainti 2 7 5.010635 10.021270 1882 +restaur 1 6 5.164786 5.164786 2230 +seriou 1 5 5.347108 5.347108 2252 +carlo 1 5 5.347108 5.347108 2515 +maria 1 4 5.568345 5.568345 2954 +wine 1 3 5.857933 5.857933 3895 +hanksunivers 1 1 6.957497 6.957497 16718 +washingtondepart 1 1 6.957497 6.957497 16719 +architecturesai 1 1 6.957497 6.957497 16720 +symphoni 1 1 6.957497 6.957497 16721 +opera 1 1 6.957497 6.957497 16722 +edita 1 1 6.957497 6.957497 16723 +gruberova 1 1 6.957497 6.957497 16724 +giulini 1 1 6.957497 6.957497 16725 +discographi 1 1 6.957497 6.957497 16726 +sumac 1 1 6.957497 6.957497 16727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html new file mode 100644 index 00000000..ece1bb68 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^holden.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +group 2 183 1.609438 3.218876 36 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +professor 2 137 1.945910 3.891820 76 +year 2 148 1.945910 3.891820 84 +take 1 97 2.302585 2.302585 134 +school 1 84 2.484907 2.484907 188 +intellig 1 72 2.639057 2.639057 225 +degre 2 69 2.708050 5.416100 259 +knowledg 1 67 2.708050 2.708050 243 +integr 1 67 2.708050 2.708050 245 +artifici 1 63 2.772589 2.772589 280 +colleg 1 61 2.833213 2.833213 300 +faculti 1 56 2.890372 2.890372 325 +understand 1 47 3.091042 3.091042 384 +math 1 44 3.135494 3.135494 402 +electr 1 38 3.295837 3.295837 461 +origin 1 38 3.295837 3.295837 472 +within 1 33 3.433987 3.433987 525 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +neural 1 30 3.555348 3.555348 578 +symbol 1 27 3.637586 3.637586 620 +spent 3 25 3.737670 11.213010 676 +initi 1 23 3.806662 3.806662 717 +divis 2 21 3.912023 7.824046 803 +corpor 1 21 3.912023 3.912023 802 +verif 1 20 3.951244 3.951244 826 +expert 1 20 3.951244 3.951244 833 +mostli 1 19 4.007333 4.007333 869 +aid 1 18 4.060443 4.060443 904 +speech 1 12 4.465908 4.465908 1222 +fellowship 1 10 4.653960 4.653960 1460 +yale 1 6 5.164786 5.164786 2003 +british 1 5 5.347108 5.347108 2546 +broadcast 1 5 5.347108 5.347108 2453 +began 1 5 5.347108 5.347108 2498 +scotland 1 4 5.568345 5.568345 3049 +withth 1 4 5.568345 5.568345 2805 +alistair 1 3 5.857933 5.857933 3315 +holden 1 3 5.857933 5.857933 3314 +london 1 3 5.857933 5.857933 3282 +imperi 1 2 6.263398 6.263398 5389 +highland 1 1 6.957497 6.957497 16728 +receivedhi 1 1 6.957497 6.957497 16729 +glasgow 1 1 6.957497 6.957497 16730 +graduateapprentic 1 1 6.957497 6.957497 16731 +edison 1 1 6.957497 6.957497 16732 +phddegre 1 1 6.957497 6.957497 16733 +learningin 1 1 6.957497 6.957497 16734 +coursefrom 1 1 6.957497 6.957497 16735 +colin 1 1 6.957497 6.957497 16736 +cherri 1 1 6.957497 6.957497 16737 +thebbc 1 1 6.957497 6.957497 16738 +theuw 1 1 6.957497 6.957497 16739 +departmentsform 1 1 6.957497 6.957497 16740 +netmethodolog 1 1 6.957497 6.957497 16741 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html new file mode 100644 index 00000000..701c538d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^karp.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +univers 4 571 0.000000 0.000000 5 +comput 4 775 0.000000 0.000000 2 +research 5 431 0.693147 3.465735 10 +program 2 374 0.693147 1.386294 7 +engin 1 297 1.098612 1.098612 20 +washington 2 236 1.386294 2.772588 32 +oper 2 180 1.609438 3.218876 34 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +algorithm 2 162 1.791759 3.583518 57 +lectur 4 135 1.945910 7.783640 73 +problem 4 147 1.945910 7.783640 75 +professor 3 137 1.945910 5.837730 76 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +mathemat 4 108 2.197225 8.788900 123 +manag 2 114 2.197225 4.394450 125 +teach 1 108 2.197225 2.197225 112 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +advanc 2 99 2.302585 4.605170 130 +part 1 98 2.302585 2.302585 129 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +institut 4 84 2.484907 9.939628 187 +member 2 84 2.484907 4.969814 165 +academ 1 82 2.484907 2.484907 178 +journal 1 83 2.484907 2.484907 183 +nation 3 74 2.639057 7.917171 240 +effici 1 73 2.639057 2.639057 233 +complex 2 64 2.772589 5.545178 269 +improv 1 62 2.772589 2.772589 289 +faculti 1 56 2.890372 2.890372 325 +profession 1 51 2.995732 2.995732 345 +press 1 42 3.218876 3.218876 419 +societi 5 40 3.258097 16.290485 456 +theoret 1 39 3.258097 3.258097 446 +industri 1 38 3.295837 3.295837 464 +respons 1 37 3.332205 3.332205 476 +award 2 34 3.401197 6.802394 523 +random 2 34 3.401197 6.802394 511 +board 3 33 3.433987 10.301961 528 +john 2 33 3.433987 6.867974 532 +travel 2 30 3.555348 7.110696 579 +art 1 29 3.583519 3.583519 593 +chair 1 29 3.583519 3.583519 596 +held 1 28 3.610918 3.610918 600 +american 3 27 3.637586 10.912758 634 +berkelei 4 26 3.688879 14.755516 657 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +flow 1 24 3.761200 3.761200 700 +ofwashington 1 22 3.850148 3.850148 766 +reduc 1 22 3.850148 3.850148 759 +among 1 21 3.912023 3.912023 781 +miller 1 17 4.110874 4.110874 949 +match 1 16 4.174387 4.174387 965 +partit 1 16 4.174387 4.174387 984 +massachusett 1 14 4.317488 4.317488 1118 +america 2 11 4.553877 9.107754 1370 +probabilist 1 11 4.553877 4.553877 1343 +minimum 1 9 4.753590 4.753590 1555 +academi 3 8 4.875197 14.625591 1735 +combinatori 1 8 4.875197 4.875197 1629 +pennsylvania 1 7 5.010635 5.010635 1932 +perfect 1 7 5.010635 5.010635 1921 +prize 4 6 5.164786 20.659144 2150 +ture 1 6 5.164786 5.164786 1997 +advisori 1 6 5.164786 5.164786 2148 +plane 1 6 5.164786 5.164786 2187 +karp 2 5 5.347108 10.694216 2284 +weyl 1 4 5.568345 5.568345 2854 +technion 1 4 5.568345 5.568345 2856 +weizmann 1 4 5.568345 5.568345 2858 +combinator 1 4 5.568345 5.568345 2915 +theacm 1 4 5.568345 5.568345 2698 +neumann 2 3 5.857933 11.715866 3720 +medal 1 3 5.857933 5.857933 3912 +truste 1 3 5.857933 5.857933 3900 +combinatorica 1 3 5.857933 5.857933 3649 +ofoper 1 3 5.857933 5.857933 3292 +dick 1 2 6.263398 6.263398 5396 +sciencesmemb 1 2 6.263398 6.263398 5742 +engineeringfellow 1 2 6.263398 6.263398 4902 +sciencesfellow 1 2 6.263398 6.263398 4903 +appliedmathemat 1 2 6.263398 6.263398 5716 +honorari 1 2 6.263398 6.263398 5741 +georgetown 1 2 6.263398 6.263398 5667 +wigderson 1 2 6.263398 6.263398 6035 +fornetwork 1 2 6.263398 6.263398 5580 +edmond 1 2 6.263398 6.263398 4144 +plenum 1 2 6.263398 6.263398 6036 +presentmemb 2 1 6.957497 13.914994 16742 +salesman 2 1 6.957497 13.914994 16743 +karprichard 1 1 6.957497 6.957497 16744 +karpprofessor 1 1 6.957497 6.957497 16745 +ofcomputersci 1 1 6.957497 6.957497 16746 +andadjunct 1 1 6.957497 6.957497 16747 +ofmolecularbiotechnologyunivers 1 1 6.957497 6.957497 16748 +eduaward 1 1 6.957497 6.957497 16749 +membershipsn 1 1 6.957497 6.957497 16750 +babbag 1 1 6.957497 6.957497 16751 +sciencedistinguish 1 1 6.957497 6.957497 16752 +senat 1 1 6.957497 6.957497 16753 +berkeleylanchest 1 1 6.957497 6.957497 16754 +fulkerson 1 1 6.957497 6.957497 16755 +hermann 1 1 6.957497 6.957497 16756 +forsoci 1 1 6.957497 6.957497 16757 +governor 1 1 6.957497 6.957497 16758 +scienceinstitut 1 1 6.957497 6.957497 16759 +presentselect 1 1 6.957497 6.957497 16760 +turingaward 1 1 6.957497 6.957497 16761 +upfal 1 1 6.957497 6.957497 16762 +spanningtre 1 1 6.957497 6.957497 16763 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ new file mode 100644 index 00000000..ad0fc31f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^lazowska^ @@ -0,0 +1,251 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 11 775 0.000000 0.000000 2 +univers 10 571 0.000000 0.000000 5 +scienc 10 640 0.000000 0.000000 4 +home 4 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +inform 7 412 0.693147 4.852029 8 +research 5 431 0.693147 3.465735 10 +program 5 374 0.693147 3.465735 7 +system 4 443 0.693147 2.772588 6 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 3 297 1.098612 3.295836 20 +student 2 343 1.098612 2.197224 19 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +washington 9 236 1.386294 12.476646 32 +graduat 2 215 1.386294 2.772588 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +recent 2 167 1.791759 3.583518 58 +data 1 170 1.791759 1.791759 49 +lectur 3 135 1.945910 5.837730 73 +perform 3 143 1.945910 5.837730 74 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +professor 1 137 1.945910 1.945910 76 +technolog 5 131 2.079442 10.397210 102 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +person 2 111 2.197225 4.394450 117 +version 1 113 2.197225 2.197225 122 +memori 2 101 2.302585 4.605170 139 +technic 1 100 2.302585 2.302585 140 +select 2 91 2.397895 4.795790 154 +grade 2 90 2.397895 4.795790 142 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +member 5 84 2.484907 12.424535 165 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +complet 2 77 2.564949 5.129898 208 +master 1 76 2.564949 2.564949 216 +issu 1 78 2.564949 2.564949 211 +april 1 77 2.564949 2.564949 196 +nation 3 74 2.639057 7.917171 240 +servic 1 72 2.639057 2.639057 236 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +test 1 66 2.708050 2.708050 252 +degre 1 69 2.708050 2.708050 259 +polici 1 64 2.772589 2.772589 279 +foundat 1 62 2.772589 2.772589 286 +colleg 1 61 2.833213 2.833213 300 +faculti 3 56 2.890372 8.671116 325 +reason 1 57 2.890372 2.890372 318 +major 1 56 2.890372 2.890372 315 +think 1 57 2.890372 2.890372 314 +talk 1 53 2.944439 2.944439 336 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +profession 1 51 2.995732 2.995732 345 +frequent 1 49 3.044522 3.044522 367 +visitor 1 49 3.044522 3.044522 371 +principl 1 48 3.044522 3.044522 357 +california 2 46 3.091042 6.182084 388 +execut 1 45 3.135494 3.135494 404 +review 3 42 3.218876 9.656628 425 +examin 1 42 3.218876 3.218876 424 +http 1 41 3.218876 3.218876 420 +transact 1 39 3.258097 3.258097 438 +annual 1 40 3.258097 3.258097 458 +industri 2 38 3.295837 6.591674 464 +electr 1 38 3.295837 3.295837 461 +field 1 37 3.332205 3.332205 482 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +committe 10 34 3.401197 34.011970 522 +award 2 34 3.401197 6.802394 523 +board 6 33 3.433987 20.603922 528 +product 1 33 3.433987 3.433987 527 +chair 8 29 3.583519 28.668152 596 +chines 2 29 3.583519 7.167038 595 +intend 1 28 3.610918 3.610918 599 +berkelei 3 26 3.688879 11.066637 657 +concern 3 25 3.737670 11.213010 666 +doctor 1 24 3.761200 3.761200 709 +sometim 1 24 3.761200 3.761200 696 +miscellan 1 23 3.806662 3.806662 731 +famili 1 23 3.806662 3.806662 735 +serv 3 22 3.850148 11.550444 758 +director 2 22 3.850148 7.700296 767 +corpor 1 21 3.912023 3.912023 802 +fund 1 21 3.912023 3.912023 805 +theunivers 1 21 3.912023 3.912023 797 +hous 1 21 3.912023 3.912023 801 +region 1 19 4.007333 4.007333 875 +seem 2 18 4.060443 8.120886 899 +appropri 1 18 4.060443 4.060443 883 +lot 1 18 4.060443 4.060443 889 +stand 1 18 4.060443 4.060443 891 +record 1 18 4.060443 4.060443 890 +demo 1 18 4.060443 4.060443 888 +universityof 3 15 4.248495 12.745485 1061 +drive 1 15 4.248495 4.248495 1052 +club 1 15 4.248495 4.248495 1058 +trip 2 14 4.317488 8.634976 1113 +essenti 1 13 4.382027 4.382027 1137 +sigmetr 1 13 4.382027 4.382027 1173 +forth 1 13 4.382027 4.382027 1186 +speech 2 12 4.465908 8.931816 1222 +thedepart 4 11 4.553877 18.215508 1350 +council 2 11 4.553877 9.107754 1364 +host 1 11 4.553877 4.553877 1306 +player 1 11 4.553877 4.553877 1371 +cook 2 10 4.653960 9.307920 1464 +perspect 1 10 4.653960 4.653960 1437 +congress 1 9 4.753590 4.753590 1592 +pick 1 9 4.753590 4.753590 1498 +govern 1 9 4.753590 4.753590 1581 +telecommun 1 9 4.753590 4.753590 1565 +andth 1 9 4.753590 4.753590 1481 +vice 1 9 4.753590 4.753590 1604 +lane 1 8 4.875197 4.875197 1720 +mile 1 8 4.875197 4.875197 1743 +virginia 1 8 4.875197 4.875197 1659 +driver 1 8 4.875197 4.875197 1657 +centuri 2 7 5.010635 10.021270 1935 +surpris 1 7 5.010635 5.010635 1828 +molecular 1 7 5.010635 5.010635 1887 +advisori 4 6 5.164786 20.659144 2148 +ture 1 6 5.164786 5.164786 1997 +brook 1 6 5.164786 5.164786 2152 +deliv 1 6 5.164786 5.164786 2070 +highwai 1 6 5.164786 5.164786 2095 +presid 1 6 5.164786 5.164786 2196 +duke 1 6 5.164786 5.164786 2231 +lazowska 10 4 5.568345 55.683450 2694 +invent 2 4 5.568345 11.136690 3028 +machineri 2 4 5.568345 11.136690 2851 +push 1 4 5.568345 5.568345 2635 +andengin 1 4 5.568345 5.568345 3042 +rack 1 3 5.857933 5.857933 3176 +researchassoci 1 3 5.857933 5.857933 3664 +affair 1 3 5.857933 5.857933 3916 +belong 1 3 5.857933 5.857933 3797 +atstanford 1 3 5.857933 5.857933 3935 +hongkong 1 3 5.857933 5.857933 3677 +theimpact 1 3 5.857933 5.857933 3179 +uwcs 1 3 5.857933 5.857933 3977 +informationtechnolog 1 3 5.857933 5.857933 3836 +down 1 3 5.857933 5.857933 3870 +celebr 2 2 6.263398 12.526796 4946 +onthi 1 2 6.263398 6.263398 5357 +mbquicktim 1 2 6.263398 6.263398 5916 +advisorycommitte 1 2 6.263398 6.263398 6037 +ofdata 1 2 6.263398 6.263398 6038 +ventur 1 2 6.263398 6.263398 4938 +andha 1 2 6.263398 6.263398 5955 +theinstitut 1 2 6.263398 6.263398 6039 +hpcc 1 2 6.263398 6.263398 5832 +pagerec 1 2 6.263398 6.263398 6022 +ahalf 2 1 6.957497 13.914994 16764 +exponentialprogress 2 1 6.957497 13.914994 16765 +annualfaculti 2 1 6.957497 13.914994 16766 +vicepresid 2 1 6.957497 13.914994 16767 +gore 2 1 6.957497 13.914994 16768 +eniac 2 1 6.957497 13.914994 16769 +thanniversari 2 1 6.957497 13.914994 16770 +nathanmyhrvold 2 1 6.957497 13.914994 16771 +joinsedlazowska 2 1 6.957497 13.914994 16772 +theuwcs 2 1 6.957497 13.914994 16773 +testimonyto 2 1 6.957497 13.914994 16774 +georgejetson 1 1 6.957497 6.957497 16775 +forfr 1 1 6.957497 6.957497 16776 +flintston 1 1 6.957497 6.957497 16777 +mostlywearsti 1 1 6.957497 6.957497 16778 +flier 1 1 6.957497 6.957497 16779 +healso 1 1 6.957497 6.957497 16780 +havefunnynos 1 1 6.957497 6.957497 16781 +allgradu 1 1 6.957497 6.957497 16782 +laboratoriesin 1 1 6.957497 6.957497 16783 +ofcra 1 1 6.957497 6.957497 16784 +scomputersci 1 1 6.957497 6.957497 16785 +formicrosoft 1 1 6.957497 6.957497 16786 +personnationalsemiconductor 1 1 6.957497 6.957497 16787 +academicadvisori 1 1 6.957497 6.957497 16788 +forcabl 1 1 6.957497 6.957497 16789 +hows 1 1 6.957497 6.957497 16790 +cascadia 1 1 6.957497 6.957497 16791 +committeesfor 1 1 6.957497 6.957497 16792 +eecsat 1 1 6.957497 6.957497 16793 +councilpanel 1 1 6.957497 6.957497 16794 +agencyhigh 1 1 6.957497 6.957497 16795 +computingand 1 1 6.957497 6.957497 16796 +sutherland 1 1 6.957497 6.957497 16797 +examinersfor 1 1 6.957497 6.957497 16798 +sspecial 1 1 6.957497 6.957497 16799 +chairof 1 1 6.957497 6.957497 16800 +andeditor 1 1 6.957497 6.957497 16801 +servinga 1 1 6.957497 6.957497 16802 +onacadem 1 1 6.957497 6.957497 16803 +thecommitte 1 1 6.957497 6.957497 16804 +deanship 1 1 6.957497 6.957497 16805 +artsand 1 1 6.957497 6.957497 16806 +biotechnolog 1 1 6.957497 6.957497 16807 +amemb 1 1 6.957497 6.957497 16808 +deanof 1 1 6.957497 6.957497 16809 +fellowof 1 1 6.957497 6.957497 16810 +associationfor 1 1 6.957497 6.957497 16811 +andelectron 1 1 6.957497 6.957497 16812 +seventeenph 1 1 6.957497 6.957497 16813 +studentshav 1 1 6.957497 6.957497 16814 +integratedoverview 1 1 6.957497 6.957497 16815 +apersuas 1 1 6.957497 6.957497 16816 +forloc 1 1 6.957497 6.957497 16817 +consumpt 1 1 6.957497 6.957497 16818 +persuas 1 1 6.957497 6.957497 16819 +playertopten 1 1 6.957497 6.957497 16820 +csebuild 1 1 6.957497 6.957497 16821 +abbrevi 1 1 6.957497 6.957497 16822 +cvcomputingresearch 1 1 6.957497 6.957497 16823 +forwardmassi 1 1 6.957497 6.957497 16824 +goldmanreport 1 1 6.957497 6.957497 16825 +alleg 1 1 6.957497 6.957497 16826 +cseph 1 1 6.957497 6.957497 16827 +flaw 1 1 6.957497 6.957497 16828 +medianyear 1 1 6.957497 6.957497 16829 +boardstudi 1 1 6.957497 6.957497 16830 +saturdayseminar 1 1 6.957497 6.957497 16831 +houseappropri 1 1 6.957497 6.957497 16832 +interestinghom 1 1 6.957497 6.957497 16833 +odeto 1 1 6.957497 6.957497 16834 +tallman 1 1 6.957497 6.957497 16835 +trask 1 1 6.957497 6.957497 16836 +departsfor 1 1 6.957497 6.957497 16837 +lanelazowska 1 1 6.957497 6.957497 16838 +pagedirect 1 1 6.957497 6.957497 16839 +houseshilshol 1 1 6.957497 6.957497 16840 +aquat 1 1 6.957497 6.957497 16841 +discoveredreview 1 1 6.957497 6.957497 16842 +poetryfing 1 1 6.957497 6.957497 16843 +scheduleinform 1 1 6.957497 6.957497 16844 +reflector 1 1 6.957497 6.957497 16845 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html new file mode 100644 index 00000000..4d79b921 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^shaw.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +comput 3 775 0.000000 0.000000 2 +system 5 443 0.693147 3.465735 6 +research 3 431 0.693147 2.079441 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +engin 3 297 1.098612 3.295836 20 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +washington 1 236 1.386294 1.386294 32 +cornel 1 215 1.386294 1.386294 23 +includ 2 208 1.609438 3.218876 42 +oper 1 180 1.609438 1.609438 34 +professor 6 137 1.945910 11.675460 76 +document 1 121 2.079442 2.079442 89 +mathemat 1 108 2.197225 2.197225 123 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +book 2 99 2.302585 4.605170 131 +text 1 98 2.302585 2.302585 133 +associ 2 93 2.397895 4.795790 151 +real 2 93 2.397895 4.795790 144 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +journal 1 83 2.484907 2.484907 183 +ieee 1 86 2.484907 2.484907 190 +thing 1 84 2.484907 2.484907 189 +academ 1 82 2.484907 2.484907 178 +method 1 80 2.564949 2.564949 213 +good 1 77 2.564949 2.564949 200 +addit 1 74 2.639057 2.639057 228 +visit 1 63 2.772589 2.772589 288 +physic 1 47 3.091042 3.091042 377 +textbook 1 44 3.135494 3.135494 397 +linear 1 41 3.218876 3.218876 431 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +transact 1 39 3.258097 3.258097 438 +live 1 40 3.258097 3.258097 451 +committe 2 34 3.401197 6.802394 522 +award 1 34 3.401197 3.401197 523 +dissert 2 32 3.465736 6.931472 549 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +serv 1 22 3.850148 3.850148 758 +half 2 21 3.912023 7.824046 776 +corpor 1 21 3.912023 3.912023 802 +among 1 21 3.912023 3.912023 781 +prepar 1 20 3.951244 3.951244 824 +supervis 1 20 3.951244 3.951244 840 +tenni 1 20 3.951244 3.951244 838 +stanford 3 17 4.110874 12.332622 955 +bachelor 1 17 4.110874 4.110874 957 +former 1 17 4.110874 4.110874 956 +hobbi 2 16 4.174387 8.348774 1009 +atth 1 15 4.248495 4.248495 1019 +incomput 1 14 4.317488 4.317488 1096 +alan 1 13 4.382027 4.382027 1146 +guest 1 12 4.465908 4.465908 1220 +food 1 12 4.465908 4.465908 1285 +distinguish 1 11 4.553877 4.553877 1357 +bike 1 10 4.653960 4.653960 1468 +introductori 1 9 4.753590 4.753590 1479 +editori 1 9 4.753590 4.753590 1611 +toronto 1 6 5.164786 5.164786 2156 +scholar 1 6 5.164786 5.164786 2180 +pari 1 6 5.164786 5.164786 2158 +softwareengin 1 6 5.164786 5.164786 2162 +hike 1 6 5.164786 5.164786 2234 +these 1 5 5.347108 5.347108 2482 +fulbright 2 4 5.568345 11.136690 2963 +hasbeen 1 4 5.568345 5.568345 2661 +amast 1 3 5.857933 5.857933 3955 +informat 1 3 5.857933 5.857933 3839 +zurich 1 3 5.857933 5.857933 3550 +memberof 1 3 5.857933 5.857933 3169 +trumpet 1 3 5.857933 5.857933 3946 +sdegre 1 2 6.263398 6.263398 6040 +acceler 1 2 6.263398 6.263398 5411 +fifteen 1 2 6.263398 6.263398 5399 +shaw 3 1 6.957497 20.872491 16846 +facultyappoint 1 1 6.957497 6.957497 16847 +theibm 1 1 6.957497 6.957497 16848 +publicationsinclud 1 1 6.957497 6.957497 16849 +andan 1 1 6.957497 6.957497 16850 +sciencescreen 1 1 6.957497 6.957497 16851 +associateeditor 1 1 6.957497 6.957497 16852 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html new file mode 100644 index 00000000..a74d6bae --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^snyder.html @@ -0,0 +1,98 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +system 3 443 0.693147 2.079441 6 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +washington 1 236 1.386294 1.386294 32 +design 1 213 1.386294 1.386294 25 +parallel 6 169 1.791759 10.750554 60 +professor 4 137 1.945910 7.783640 76 +architectur 2 139 1.945910 3.891820 77 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +mathemat 1 108 2.197225 2.197225 123 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +journal 2 83 2.484907 4.969814 183 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +complet 2 77 2.564949 5.129898 208 +master 1 76 2.564949 2.564949 216 +nation 2 74 2.639057 5.278114 240 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +receiv 2 66 2.708050 5.416100 244 +visit 2 63 2.772589 5.545178 288 +polici 1 64 2.772589 2.772589 279 +guid 1 63 2.772589 2.772589 267 +faculti 2 56 2.890372 5.780744 325 +direct 2 57 2.890372 5.780744 316 +investig 1 51 2.995732 2.995732 353 +numer 2 49 3.044522 6.089044 369 +quarter 1 47 3.091042 3.091042 389 +editor 3 41 3.218876 9.656628 433 +futur 1 41 3.218876 3.218876 427 +join 1 39 3.258097 3.258097 457 +transact 1 39 3.258097 3.258097 438 +ofth 1 36 3.367296 3.367296 491 +committe 2 34 3.401197 6.802394 522 +singl 1 34 3.401197 3.401197 510 +award 1 34 3.401197 3.401197 523 +titl 1 31 3.496508 3.496508 556 +computersci 1 30 3.555348 3.555348 562 +rang 1 30 3.555348 3.555348 565 +chair 2 29 3.583519 7.167038 596 +particip 1 29 3.583519 3.583519 589 +doctor 1 24 3.761200 3.761200 709 +proof 1 23 3.806662 3.806662 720 +highli 1 23 3.806662 3.806662 725 +serv 3 22 3.850148 11.550444 758 +properti 1 22 3.850148 3.850148 749 +chip 3 21 3.912023 11.736069 770 +divis 1 21 3.912023 3.912023 803 +bachelor 1 17 4.110874 4.110874 957 +configur 1 15 4.248495 4.248495 1012 +econom 1 13 4.382027 4.382027 1184 +mellon 1 13 4.382027 4.382027 1179 +carnegi 1 12 4.465908 4.465908 1260 +onth 1 12 4.465908 4.465908 1218 +perman 1 11 4.553877 4.553877 1372 +distinguish 1 11 4.553877 4.553877 1357 +purdu 1 10 4.653960 4.653960 1466 +andcomput 1 8 4.875197 4.875197 1623 +lawrenc 1 7 5.010635 5.010635 1908 +iowa 1 7 5.010635 5.010635 1971 +harvard 1 7 5.010635 5.010635 1926 +microprocessor 1 7 5.010635 5.010635 1808 +scholar 1 6 5.164786 5.164786 2180 +yale 1 6 5.164786 5.164786 2003 +blue 1 6 5.164786 5.164786 2227 +snyder 4 5 5.347108 21.388432 2359 +chaoticrout 1 4 5.568345 5.568345 3063 +anddistribut 1 4 5.568345 5.568345 3031 +algorithmsand 1 4 5.568345 5.568345 2680 +cmo 1 3 5.857933 5.857933 3992 +inventor 1 3 5.857933 5.857933 3695 +orca 1 3 5.857933 5.857933 3578 +dozen 1 3 5.857933 5.857933 3905 +sdegre 1 2 6.263398 6.263398 6040 +developmentof 1 2 6.263398 6.263398 6041 +hors 1 2 6.263398 6.263398 5348 +advisorycommitte 1 2 6.263398 6.263398 6037 +andin 1 1 6.957497 6.957497 16853 +scholarat 1 1 6.957497 6.957497 16854 +theundecid 1 1 6.957497 6.957497 16855 +hecreat 1 1 6.957497 6.957497 16856 +thepok 1 1 6.957497 6.957497 16857 +nowprincip 1 1 6.957497 6.957497 16858 +nwli 1 1 6.957497 6.957497 16859 +computerand 1 1 6.957497 6.957497 16860 +foundationadvisori 1 1 6.957497 6.957497 16861 +doctoraldissert 1 1 6.957497 6.957497 16862 +degreesund 1 1 6.957497 6.957497 16863 +seniorproject 1 1 6.957497 6.957497 16864 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html new file mode 100644 index 00000000..97147b09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^somani.html @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 6 443 0.693147 4.158882 6 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +engin 2 297 1.098612 2.197224 20 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +design 3 213 1.386294 4.158882 25 +network 3 168 1.791759 5.375277 61 +parallel 3 169 1.791759 5.375277 60 +develop 2 174 1.791759 3.583518 53 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +base 1 165 1.791759 1.791759 50 +professor 3 137 1.945910 5.837730 76 +architectur 2 139 1.945910 3.891820 77 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +object 1 138 1.945910 1.945910 79 +like 1 132 1.945910 1.945910 81 +high 1 130 2.079442 2.079442 101 +tool 1 117 2.079442 2.079442 93 +memori 1 101 2.302585 2.302585 139 +control 1 82 2.484907 2.484907 164 +issu 1 78 2.564949 2.564949 211 +involv 1 71 2.639057 2.639057 227 +degre 1 69 2.708050 2.708050 259 +integr 1 67 2.708050 2.708050 245 +evalu 1 64 2.772589 2.772589 266 +plai 1 60 2.833213 2.833213 307 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +scientif 1 53 2.944439 2.944439 341 +three 1 54 2.944439 2.944439 330 +tabl 1 51 2.995732 2.995732 346 +autom 1 41 3.218876 3.218876 434 +electr 2 38 3.295837 6.591674 461 +toler 2 33 3.433987 6.867974 533 +india 1 32 3.465736 3.465736 550 +fault 1 32 3.465736 3.465736 547 +multiprocessor 1 28 3.610918 3.610918 605 +indian 2 22 3.850148 7.700296 769 +period 1 22 3.850148 3.850148 743 +tenni 2 20 3.951244 7.902488 838 +interconnect 2 17 4.110874 8.221748 937 +canada 1 13 4.382027 4.382027 1158 +food 1 12 4.465908 4.465908 1285 +prior 1 10 4.653960 4.653960 1438 +grain 1 10 4.653960 4.653960 1448 +cook 1 10 4.653960 4.653960 1464 +respect 1 9 4.753590 4.753590 1545 +classif 1 9 4.753590 4.753590 1586 +bridg 1 8 4.875197 4.875197 1764 +earn 1 7 5.010635 5.010635 1788 +montreal 1 7 5.010635 5.010635 1961 +hike 1 6 5.164786 5.164786 2234 +delhi 1 5 5.347108 5.347108 2530 +anti 1 5 5.347108 5.347108 2434 +arun 2 4 5.568345 11.136690 2736 +redund 1 4 5.568345 5.568345 2839 +congest 1 3 5.857933 5.857933 3993 +reconfigur 1 3 5.857933 5.857933 3556 +somani 3 2 6.263398 18.790194 4432 +submarin 1 2 6.263398 6.263398 6018 +warfar 1 2 6.263398 6.263398 4910 +navi 1 2 6.263398 6.263398 5155 +proteu 2 1 6.957497 13.914994 16865 +msee 1 1 6.957497 6.957497 16866 +mcgill 1 1 6.957497 6.957497 16867 +govt 1 1 6.957497 6.957497 16868 +offault 1 1 6.957497 6.957497 16869 +tocach 1 1 6.957497 6.957497 16870 +broadband 1 1 6.957497 6.957497 16871 +generalizedenhanc 1 1 6.957497 6.957497 16872 +hypercub 1 1 6.957497 6.957497 16873 +coars 1 1 6.957497 6.957497 16874 +dpcnl 1 1 6.957497 6.957497 16875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html new file mode 100644 index 00000000..6a5f29d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^tanimoto.html @@ -0,0 +1,174 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 13 775 0.000000 0.000000 2 +univers 6 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +languag 3 227 1.386294 4.158882 26 +gener 2 220 1.386294 2.772588 27 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +parallel 2 169 1.791759 3.583518 60 +develop 1 174 1.791759 1.791759 53 +professor 3 137 1.945910 5.837730 76 +process 3 142 1.945910 5.837730 72 +year 1 148 1.945910 1.945910 84 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +confer 4 126 2.079442 8.317768 100 +analysi 3 124 2.079442 6.238326 98 +machin 2 129 2.079442 4.158884 95 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +seattl 1 120 2.079442 2.079442 103 +mathemat 2 108 2.197225 4.394450 123 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +person 1 111 2.197225 2.197225 117 +structur 1 106 2.197225 2.197225 105 +intern 1 108 2.197225 2.197225 128 +take 1 97 2.302585 2.302585 134 +book 1 99 2.302585 2.302585 131 +imag 4 91 2.397895 9.591580 161 +sinc 1 90 2.397895 2.397895 159 +commun 1 95 2.397895 2.397895 157 +grade 1 90 2.397895 2.397895 142 +ieee 4 86 2.484907 9.939628 190 +journal 2 83 2.484907 4.969814 183 +institut 1 84 2.484907 2.484907 187 +member 1 84 2.484907 2.484907 165 +educ 1 86 2.484907 2.484907 191 +activ 1 84 2.484907 2.484907 182 +june 2 79 2.564949 5.129898 214 +intellig 3 72 2.639057 7.917171 225 +addit 2 74 2.639057 5.278114 228 +meet 1 72 2.639057 2.639057 229 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +visit 4 63 2.772589 11.090356 288 +artifici 2 63 2.772589 5.545178 280 +experi 1 64 2.772589 2.772589 283 +written 1 63 2.772589 2.772589 278 +organ 1 65 2.772589 2.772589 265 +polici 1 64 2.772589 2.772589 279 +colleg 2 61 2.833213 5.666426 300 +plai 1 60 2.833213 2.833213 307 +faculti 1 56 2.890372 2.890372 325 +think 1 57 2.890372 2.890372 314 +variou 1 56 2.890372 2.890372 317 +publish 1 57 2.890372 2.890372 326 +processor 1 54 2.944439 2.944439 335 +visual 4 48 3.044522 12.178088 372 +numer 1 49 3.044522 3.044522 369 +understand 1 47 3.091042 3.091042 384 +textbook 1 44 3.135494 3.135494 397 +edit 2 42 3.218876 6.437752 418 +vision 2 41 3.218876 6.437752 430 +editor 1 41 3.218876 3.218876 433 +music 1 42 3.218876 3.218876 436 +societi 2 40 3.258097 6.516194 456 +join 1 39 3.258097 3.258097 457 +programm 1 39 3.258097 3.258097 445 +author 1 39 3.258097 3.258097 450 +committe 2 34 3.401197 6.802394 522 +scientist 2 31 3.496508 6.993016 560 +common 1 30 3.555348 3.555348 574 +chair 6 29 3.583519 21.501114 596 +steve 1 29 3.583519 3.583519 594 +held 1 28 3.610918 3.610918 600 +subject 1 26 3.688879 3.688879 647 +enjoi 1 26 3.688879 3.688879 660 +pattern 5 24 3.761200 18.806000 689 +fellow 1 24 3.761200 3.761200 701 +recognit 3 23 3.806662 11.419986 723 +serv 7 22 3.850148 26.951036 758 +theunivers 1 21 3.912023 3.912023 797 +corpor 1 21 3.912023 3.912023 802 +particularli 1 19 4.007333 4.007333 867 +lisp 2 18 4.060443 8.120886 897 +element 1 18 4.060443 4.060443 895 +steven 2 17 4.110874 8.221748 953 +cambridg 1 16 4.174387 4.174387 1008 +princeton 1 15 4.248495 4.248495 1042 +atth 1 15 4.248495 4.248495 1019 +massachusett 1 14 4.317488 4.317488 1118 +whose 1 13 4.382027 4.382027 1166 +franc 3 12 4.465908 13.397724 1276 +outsid 1 12 4.465908 4.465908 1219 +motiv 1 11 4.553877 4.553877 1346 +council 1 11 4.553877 4.553877 1364 +tanimoto 6 10 4.653960 27.923760 1429 +conferenceon 2 9 4.753590 9.507180 1595 +entitl 1 9 4.753590 4.753590 1490 +vice 1 9 4.753590 4.753590 1604 +japan 1 8 4.875197 4.875197 1762 +elect 1 8 4.875197 4.875197 1771 +sweden 2 7 5.010635 10.021270 1885 +chief 1 7 5.010635 5.010635 1829 +pari 1 6 5.164786 5.164786 2158 +scholar 1 6 5.164786 5.164786 2180 +sponsor 1 6 5.164786 5.164786 2133 +piano 1 6 5.164786 5.164786 2201 +anda 2 5 5.347108 10.694216 2416 +ofparallel 1 5 5.347108 5.347108 2380 +steer 1 5 5.347108 5.347108 2328 +jazz 1 5 5.347108 5.347108 2527 +devot 1 4 5.568345 5.568345 2711 +coauthor 1 4 5.568345 5.568345 3064 +electricalengin 2 3 5.857933 11.715866 3987 +chairman 1 3 5.857933 5.857933 3991 +adjunct 2 2 6.263398 12.526796 6033 +theinstitut 1 2 6.263398 6.263398 6039 +internationalworkshop 1 2 6.263398 6.263398 5012 +bergen 1 2 6.263398 6.263398 5991 +norwai 1 2 6.263398 6.263398 4908 +programcommitte 1 2 6.263398 6.263398 6042 +theieee 1 2 6.263398 6.263398 6043 +ieeetransact 1 2 6.263398 6.263398 4315 +andclass 1 2 6.263398 6.263398 4330 +linkp 2 1 6.957497 13.914994 16876 +fromharvard 1 1 6.957497 6.957497 16877 +connecticut 1 1 6.957497 6.957497 16878 +professorat 1 1 6.957497 6.957497 16879 +hasalso 1 1 6.957497 6.957497 16880 +atkob 1 1 6.957497 6.957497 16881 +enseign 1 1 6.957497 6.957497 16882 +superieur 1 1 6.957497 6.957497 16883 +techniquesd 1 1 6.957497 6.957497 16884 +electroniqu 1 1 6.957497 6.957497 16885 +irest 1 1 6.957497 6.957497 16886 +nant 1 1 6.957497 6.957497 16887 +hasrec 1 1 6.957497 6.957497 16888 +forimag 1 1 6.957497 6.957497 16889 +processingand 1 1 6.957497 6.957497 16890 +bordeaux 1 1 6.957497 6.957497 16891 +ofimag 1 1 6.957497 6.957497 16892 +currentlydirect 1 1 6.957497 6.957497 16893 +throughimag 1 1 6.957497 6.957497 16894 +softwarethat 1 1 6.957497 6.957497 16895 +thebook 1 1 6.957497 6.957497 16896 +introductionus 1 1 6.957497 6.957497 16897 +accompanyingsoftwar 1 1 6.957497 6.957497 16898 +serveda 1 1 6.957497 6.957497 16899 +subconfer 1 1 6.957497 6.957497 16900 +patternrecognit 1 1 6.957497 6.957497 16901 +societyworkshop 1 1 6.957497 6.957497 16902 +machineintellig 1 1 6.957497 6.957497 16903 +symposiaon 1 1 6.957497 6.957497 16904 +editorialboard 1 1 6.957497 6.957497 16905 +cvgip 1 1 6.957497 6.957497 16906 +engineeringeduc 1 1 6.957497 6.957497 16907 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html new file mode 100644 index 00000000..bf703464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^young.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 15 775 0.000000 0.000000 2 +scienc 10 640 0.000000 0.000000 4 +univers 5 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +current 2 284 1.098612 2.197224 21 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +also 3 259 1.386294 4.158882 28 +washington 1 236 1.386294 1.386294 32 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +cornel 1 215 1.386294 1.386294 23 +paper 1 205 1.609438 1.609438 38 +algorithm 2 162 1.791759 3.583518 57 +read 1 154 1.791759 1.791759 47 +professor 3 137 1.945910 5.837730 76 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +confer 1 126 2.079442 2.079442 100 +theori 5 111 2.197225 10.986125 127 +mathemat 1 108 2.197225 2.197225 123 +structur 1 106 2.197225 2.197225 105 +associ 2 93 2.397895 4.795790 151 +question 1 91 2.397895 2.397895 141 +journal 2 83 2.484907 4.969814 183 +member 1 84 2.484907 2.484907 165 +ieee 1 86 2.484907 2.484907 190 +control 1 82 2.484907 2.484907 164 +state 1 76 2.564949 2.564949 207 +issu 1 78 2.564949 2.564949 211 +complet 1 77 2.564949 2.564949 208 +nation 2 74 2.639057 5.278114 240 +receiv 1 66 2.708050 2.708050 244 +foundat 4 62 2.772589 11.090356 286 +complex 2 64 2.772589 5.545178 269 +visit 1 63 2.772589 2.772589 288 +colleg 3 61 2.833213 8.499639 300 +faculti 3 56 2.890372 8.671116 325 +special 2 56 2.890372 5.780744 320 +direct 1 57 2.890372 2.890372 316 +sever 1 56 2.890372 2.890372 322 +three 1 54 2.944439 2.944439 330 +california 1 46 3.091042 3.091042 388 +textbook 1 44 3.135494 3.135494 397 +theoret 2 39 3.258097 6.516194 446 +societi 2 40 3.258097 6.516194 456 +author 1 39 3.258097 3.258097 450 +paul 1 38 3.295837 3.295837 471 +industri 1 38 3.295837 3.295837 464 +connect 1 37 3.332205 3.332205 485 +ofth 1 36 3.367296 3.367296 491 +committe 5 34 3.401197 17.005985 522 +board 2 33 3.433987 6.867974 528 +taught 1 33 3.433987 3.433987 526 +dissert 1 32 3.465736 3.465736 549 +titl 1 31 3.496508 3.496508 556 +posit 1 31 3.496508 3.496508 552 +chair 1 29 3.583519 3.583519 596 +berkelei 2 26 3.688879 7.377758 657 +fellow 1 24 3.761200 3.761200 701 +doctor 1 24 3.761200 3.761200 709 +serv 10 22 3.850148 38.501480 758 +emphasi 1 22 3.850148 3.850148 755 +unit 1 21 3.912023 3.912023 779 +theunivers 1 21 3.912023 3.912023 797 +divis 1 21 3.912023 3.912023 803 +facil 1 20 3.951244 3.951244 814 +histori 1 19 4.007333 4.007333 853 +young 4 16 4.174387 16.697548 991 +earli 1 16 4.174387 4.174387 968 +becam 1 14 4.317488 4.317488 1117 +dean 1 14 4.317488 4.317488 1104 +employ 1 12 4.465908 4.465908 1291 +eight 1 11 4.553877 4.553877 1331 +ofcomput 1 10 4.653960 4.653960 1442 +editori 2 9 4.753590 9.507180 1611 +vice 1 9 4.753590 4.753590 1604 +hold 1 8 4.875197 4.875197 1645 +foc 1 7 5.010635 5.010635 1880 +reed 1 6 5.164786 5.164786 2086 +symposiumon 1 6 5.164786 5.164786 2054 +sigact 1 6 5.164786 5.164786 2212 +chosen 1 6 5.164786 5.164786 1984 +twice 1 4 5.568345 5.568345 2614 +coauthor 1 4 5.568345 5.568345 3064 +notr 1 4 5.568345 5.568345 2880 +dame 1 4 5.568345 5.568345 2881 +gone 1 4 5.568345 5.568345 3072 +chairman 4 3 5.857933 23.431732 3991 +atstanford 1 3 5.857933 5.857933 3935 +briefli 1 3 5.857933 5.857933 3459 +thegener 1 3 5.857933 5.857933 3648 +mathematicallog 1 3 5.857933 5.857933 3796 +eleven 1 3 5.857933 5.857933 3824 +postdoctor 1 2 6.263398 6.263398 5059 +mexico 1 2 6.263398 6.263398 6044 +nomin 1 2 6.263398 6.263398 5758 +programcommitte 1 2 6.263398 6.263398 6042 +annal 1 2 6.263398 6.263398 4912 +underprofessor 1 2 6.263398 6.263398 6045 +ratherthan 1 2 6.263398 6.263398 6046 +graduateof 1 1 6.957497 6.957497 16908 +antioch 1 1 6.957497 6.957497 16909 +hejoin 1 1 6.957497 6.957497 16910 +seventeen 1 1 6.957497 6.957497 16911 +atpurdu 1 1 6.957497 6.957497 16912 +inperhap 1 1 6.957497 6.957497 16913 +aschairman 1 1 6.957497 6.957497 16914 +professorin 1 1 6.957497 6.957497 16915 +iscoauthor 1 1 6.957497 6.957497 16916 +executivecommitte 1 1 6.957497 6.957497 16917 +interestgroup 1 1 6.957497 6.957497 16918 +chairmanof 1 1 6.957497 6.957497 16919 +annualsymposium 1 1 6.957497 6.957497 16920 +hasserv 1 1 6.957497 6.957497 16921 +stechnic 1 1 6.957497 6.957497 16922 +advisorysubcommitte 1 1 6.957497 6.957497 16923 +thiscommitte 1 1 6.957497 6.957497 16924 +formallog 1 1 6.957497 6.957497 16925 +dopostdoctor 1 1 6.957497 6.957497 16926 +ofcalifornia 1 1 6.957497 6.957497 16927 +avarieti 1 1 6.957497 6.957497 16928 +leather 1 1 6.957497 6.957497 16929 +motorcycl 1 1 6.957497 6.957497 16930 +jacket 1 1 6.957497 6.957497 16931 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html new file mode 100644 index 00000000..50f393fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^people^faculty^zahorjan.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +engin 1 297 1.098612 1.098612 20 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +includ 1 208 1.609438 1.609438 42 +applic 3 170 1.791759 5.375277 56 +parallel 2 169 1.791759 3.583518 60 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +support 3 132 1.945910 5.837730 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +construct 1 139 1.945910 1.945910 82 +schedul 2 119 2.079442 4.158884 85 +provid 1 121 2.079442 2.079442 94 +topic 1 114 2.197225 2.197225 110 +code 1 108 2.197225 2.197225 116 +techniqu 1 99 2.302585 2.302585 138 +real 1 93 2.397895 2.397895 144 +chang 1 82 2.484907 2.484907 163 +activ 1 84 2.484907 2.484907 182 +ieee 1 86 2.484907 2.484907 190 +interfac 1 79 2.564949 2.564949 209 +involv 1 71 2.639057 2.639057 227 +receiv 2 66 2.708050 5.416100 244 +polici 2 64 2.772589 5.545178 279 +written 1 63 2.772589 2.772589 278 +allow 1 53 2.944439 2.944439 333 +video 1 44 3.135494 3.135494 405 +continu 1 39 3.258097 3.258097 448 +survei 1 35 3.401197 3.401197 513 +john 1 33 3.433987 3.433987 532 +board 1 33 3.433987 3.433987 528 +titl 1 31 3.496508 3.496508 556 +focu 1 30 3.555348 3.555348 571 +platform 1 29 3.583519 3.583519 591 +load 1 28 3.610918 3.610918 601 +intend 1 28 3.610918 3.610918 599 +primari 1 25 3.737670 3.737670 669 +mobil 1 23 3.806662 3.806662 730 +sequenti 1 22 3.850148 3.850148 745 +runtim 2 19 4.007333 8.014666 858 +young 1 16 4.174387 4.174387 991 +audio 1 14 4.317488 4.317488 1094 +easili 1 14 4.317488 4.317488 1077 +editori 1 9 4.753590 4.753590 1611 +presidenti 1 8 4.875197 4.875197 1737 +supportfor 1 7 5.010635 5.010635 1854 +thegoal 1 6 5.164786 5.164786 2033 +zahorjan 2 3 5.857933 11.715866 3383 +formobil 1 3 5.857933 5.857933 3261 +parallelsystem 1 2 6.263398 6.263398 5746 +exhibit 1 2 6.263398 6.263398 5529 +frombrown 1 1 6.957497 6.957497 16932 +oftoronto 1 1 6.957497 6.957497 16933 +investigatoraward 1 1 6.957497 6.957497 16934 +mediaappl 1 1 6.957497 6.957497 16935 +torespond 1 1 6.957497 6.957497 16936 +parallelizationof 1 1 6.957497 6.957497 16937 +bothcontrol 1 1 6.957497 6.957497 16938 +transactionson 1 1 6.957497 6.957497 16939 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html new file mode 100644 index 00000000..dcd3e5ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^metip^metip.html @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +program 5 374 0.693147 3.465735 7 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +project 8 340 1.098612 8.788896 18 +student 7 343 1.098612 7.690284 19 +us 4 329 1.098612 4.394448 16 +current 4 284 1.098612 4.394448 21 +last 1 314 1.098612 1.098612 14 +softwar 3 220 1.386294 4.158882 30 +link 2 247 1.386294 2.772588 24 +washington 2 236 1.386294 2.772588 32 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +list 2 201 1.609438 3.218876 39 +group 1 183 1.609438 1.609438 36 +modifi 1 178 1.609438 1.609438 35 +develop 2 174 1.791759 3.583518 53 +applic 2 170 1.791759 3.583518 56 +base 1 165 1.791759 1.791759 50 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +process 5 142 1.945910 9.729550 72 +click 3 142 1.945910 5.837730 78 +relat 2 139 1.945910 3.891820 68 +like 1 132 1.945910 1.945910 81 +support 1 132 1.945910 1.945910 83 +number 2 130 2.079442 4.158884 97 +studi 1 120 2.079442 2.079442 91 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +mathemat 4 108 2.197225 8.788900 123 +pleas 2 113 2.197225 4.394450 114 +person 1 111 2.197225 2.197225 117 +version 1 113 2.197225 2.197225 122 +teach 1 108 2.197225 2.197225 112 +part 2 98 2.302585 4.605170 129 +peopl 2 96 2.302585 4.605170 132 +user 1 104 2.302585 2.302585 137 +technic 1 100 2.302585 2.302585 140 +imag 9 91 2.397895 21.581055 161 +learn 6 86 2.484907 14.909442 170 +activ 4 84 2.484907 9.939628 182 +educ 2 86 2.484907 4.969814 191 +help 1 83 2.484907 2.484907 175 +environ 1 84 2.484907 2.484907 177 +thing 1 84 2.484907 2.484907 189 +exampl 1 77 2.564949 2.564949 195 +know 1 80 2.564949 2.564949 198 +free 3 73 2.639057 7.917171 224 +materi 2 75 2.639057 5.278114 221 +meet 1 72 2.639057 2.639057 229 +involv 1 71 2.639057 2.639057 227 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +tuesdai 1 73 2.639057 2.639057 219 +order 3 69 2.708050 8.124150 249 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +window 1 68 2.708050 2.708050 242 +test 1 66 2.708050 2.708050 252 +integr 1 67 2.708050 2.708050 245 +would 1 67 2.708050 2.708050 251 +experi 3 64 2.772589 8.317767 283 +copi 3 63 2.772589 8.317767 284 +collect 1 65 2.772589 2.772589 268 +creat 1 63 2.772589 2.772589 277 +foundat 1 62 2.772589 2.772589 286 +plai 1 60 2.833213 2.833213 307 +variou 1 56 2.890372 2.890372 317 +explor 1 58 2.890372 2.890372 324 +direct 1 57 2.890372 2.890372 316 +allow 2 53 2.944439 5.888878 333 +februari 1 54 2.944439 2.944439 328 +digit 2 52 2.995732 5.991464 348 +particular 1 51 2.995732 2.995732 352 +approach 1 48 3.044522 3.044522 366 +visual 1 48 3.044522 3.044522 372 +done 1 47 3.091042 3.091042 381 +effect 1 46 3.091042 3.091042 385 +discuss 2 45 3.135494 6.270988 399 +keep 1 44 3.135494 3.135494 409 +describ 1 45 3.135494 3.135494 400 +offer 1 43 3.178054 3.178054 414 +programm 1 39 3.258097 3.258097 445 +littl 1 39 3.258097 3.258097 454 +open 1 38 3.295837 3.295837 469 +microsoft 1 38 3.295837 3.295837 468 +close 1 38 3.295837 3.295837 465 +seminar 1 38 3.295837 3.295837 470 +winter 1 36 3.367296 3.367296 500 +curriculum 1 33 3.433987 3.433987 535 +transform 2 32 3.465736 6.931472 542 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +hard 1 30 3.555348 3.555348 563 +common 1 30 3.555348 3.555348 574 +particip 1 29 3.583519 3.583519 589 +intend 1 28 3.610918 3.610918 599 +framework 1 28 3.610918 3.610918 606 +manipul 1 27 3.637586 3.637586 624 +rather 1 26 3.688879 3.688879 642 +experiment 1 26 3.688879 3.688879 645 +todai 3 25 3.737670 11.213010 672 +seri 1 24 3.761200 3.761200 708 +demonstr 1 24 3.761200 3.761200 694 +togeth 1 23 3.806662 3.806662 714 +director 1 22 3.850148 3.850148 767 +disk 1 22 3.850148 3.850148 747 +instal 1 22 3.850148 3.850148 754 +encourag 2 18 4.060443 8.120886 880 +record 1 18 4.060443 4.060443 890 +lisp 1 18 4.060443 4.060443 897 +steven 1 17 4.110874 4.110874 953 +macintosh 1 17 4.110874 4.110874 920 +choic 1 16 4.174387 4.174387 979 +took 1 16 4.174387 4.174387 1010 +role 1 14 4.317488 4.317488 1101 +primarili 1 13 4.382027 4.382027 1185 +forth 1 13 4.382027 4.382027 1186 +essenti 1 13 4.382027 4.382027 1137 +calcul 2 12 4.465908 8.931816 1268 +neat 1 12 4.465908 4.465908 1263 +appl 1 11 4.553877 4.553877 1303 +tanimoto 2 10 4.653960 9.307920 1429 +subset 1 10 4.653960 4.653960 1425 +thecomput 1 10 4.653960 4.653960 1408 +end 1 9 4.753590 4.753590 1567 +successfulli 1 7 5.010635 5.010635 1869 +classroom 1 6 5.164786 5.164786 2006 +pentium 1 6 5.164786 5.164786 2077 +put 1 6 5.164786 5.164786 2017 +volunt 1 5 5.347108 5.347108 2307 +own 1 5 5.347108 5.347108 2531 +pixel 2 4 5.568345 11.136690 2831 +emphas 1 4 5.568345 5.568345 2672 +exploratori 1 4 5.568345 5.568345 3073 +prospect 1 4 5.568345 5.568345 3013 +witha 1 4 5.568345 5.568345 2617 +bricker 1 4 5.568345 5.568345 3050 +metip 6 3 5.857933 35.147598 3937 +teacher 1 3 5.857933 5.857933 3892 +alsoavail 1 3 5.857933 5.857933 3887 +newapproach 1 2 6.263398 6.263398 6047 +pursuit 1 2 6.263398 6.263398 6048 +portrai 1 2 6.263398 6.263398 5386 +xform 2 1 6.957497 13.914994 16940 +theseobject 1 1 6.957497 6.957497 16941 +applicationsdesign 1 1 6.957497 6.957497 16942 +enrich 1 1 6.957497 6.957497 16943 +astandard 1 1 6.957497 6.957497 16944 +withthes 1 1 6.957497 6.957497 16945 +catalyz 1 1 6.957497 6.957497 16946 +bylead 1 1 6.957497 6.957497 16947 +theconcept 1 1 6.957497 6.957497 16948 +toexplor 1 1 6.957497 6.957497 16949 +warper 1 1 6.957497 6.957497 16950 +srun 1 1 6.957497 6.957497 16951 +mathematicsteach 1 1 6.957497 6.957497 16952 +transcriptproject 1 1 6.957497 6.957497 16953 +willfacilit 1 1 6.957497 6.957497 16954 +academicinform 1 1 6.957497 6.957497 16955 +floppi 1 1 6.957497 6.957497 16956 +themetip 1 1 6.957497 6.957497 16957 +ofmultiplay 1 1 6.957497 6.957497 16958 +tointegr 1 1 6.957497 6.957497 16959 +itsxform 1 1 6.957497 6.957497 16960 +somethingfun 1 1 6.957497 6.957497 16961 +beenput 1 1 6.957497 6.957497 16962 +fundamentalattract 1 1 6.957497 6.957497 16963 +digitalimag 1 1 6.957497 6.957497 16964 +discussteach 1 1 6.957497 6.957497 16965 +undergr 1 1 6.957497 6.957497 16966 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html new file mode 100644 index 00000000..6fb80a1f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^mobicomp^mobile.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +washington 2 236 1.386294 2.772588 32 +paper 2 205 1.609438 3.218876 38 +updat 1 191 1.609438 1.609438 41 +applic 1 170 1.791759 1.791759 56 +manag 1 114 2.197225 2.197225 125 +environ 1 84 2.484907 2.484907 177 +resourc 1 81 2.484907 2.484907 172 +prof 1 64 2.772589 2.772589 273 +overview 1 56 2.890372 2.890372 323 +describ 1 45 3.135494 3.135494 400 +transact 1 39 3.258097 3.258097 438 +brian 1 38 3.295837 3.295837 466 +field 1 37 3.332205 3.332205 482 +survei 1 35 3.401197 3.401197 513 +john 1 33 3.433987 3.433987 532 +graph 1 30 3.555348 3.555348 576 +challeng 1 26 3.688879 3.688879 653 +task 1 25 3.737670 3.737670 678 +fundament 1 25 3.737670 3.737670 661 +mobil 5 23 3.806662 19.033310 730 +variabl 1 23 3.806662 3.806662 715 +methodolog 1 23 3.806662 3.806662 733 +infrastructur 2 12 4.465908 8.931816 1234 +hank 1 12 4.465908 4.465908 1253 +gaetano 1 6 5.164786 5.164786 2068 +wireless 1 4 5.568345 5.568345 2693 +disconnect 1 4 5.568345 5.568345 2664 +mobisa 1 3 5.857933 5.857933 3927 +mobilecomput 1 3 5.857933 5.857933 3629 +zahorjan 1 3 5.857933 5.857933 3383 +ubiquit 1 2 6.263398 6.263398 6049 +computingresearch 1 2 6.263398 6.263398 5957 +cope 1 2 6.263398 6.263398 6050 +forman 1 2 6.263398 6.263398 5904 +washingtonher 1 1 6.957497 6.957497 16967 +handheld 1 1 6.957497 6.957497 16968 +operationdistribut 1 1 6.957497 6.957497 16969 +systemcontact 1 1 6.957497 6.957497 16970 +bershadprof 1 1 6.957497 6.957497 16971 +borriellomarc 1 1 6.957497 6.957497 16972 +fiuczynskigeorg 1 1 6.957497 6.957497 16973 +formanprof 1 1 6.957497 6.957497 16974 +levygeoff 1 1 6.957497 6.957497 16975 +voelkerterri 1 1 6.957497 6.957497 16976 +watsonprof 1 1 6.957497 6.957497 16977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html new file mode 100644 index 00000000..e99bc794 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^cecil^cecil^www^cecil-home.html @@ -0,0 +1,139 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +system 2 443 0.693147 1.386294 6 +inform 2 412 0.693147 1.386294 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +project 10 340 1.098612 10.986120 18 +current 2 284 1.098612 2.197224 21 +languag 6 227 1.386294 8.317764 26 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +washington 1 236 1.386294 1.386294 32 +class 3 199 1.609438 4.828314 37 +list 3 201 1.609438 4.828314 39 +paper 2 205 1.609438 3.218876 38 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +implement 3 152 1.791759 5.375277 52 +avail 3 169 1.791759 5.375277 48 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +object 5 138 1.945910 9.729550 79 +support 3 132 1.945910 5.837730 83 +like 3 132 1.945910 5.837730 81 +model 1 145 1.945910 1.945910 69 +assign 1 135 1.945910 1.945910 66 +relat 1 139 1.945910 1.945910 68 +high 2 130 2.079442 4.158884 101 +document 2 121 2.079442 4.158884 89 +compil 1 122 2.079442 2.079442 96 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +postscript 1 131 2.079442 2.079442 90 +intern 2 108 2.197225 4.394450 128 +structur 1 106 2.197225 2.197225 105 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +version 1 113 2.197225 2.197225 122 +peopl 2 96 2.302585 4.605170 132 +technic 1 100 2.302585 2.302585 140 +level 2 87 2.484907 4.969814 180 +member 2 84 2.484907 4.969814 165 +orient 5 80 2.564949 12.824745 205 +optim 2 79 2.564949 5.129898 197 +messag 2 76 2.564949 5.129898 212 +issu 1 78 2.564949 2.564949 211 +method 1 80 2.564949 2.564949 213 +dynam 1 76 2.564949 2.564949 194 +sourc 1 77 2.564949 2.564949 201 +java 1 70 2.708050 2.708050 248 +receiv 1 66 2.708050 2.708050 244 +goal 1 66 2.708050 2.708050 250 +august 1 66 2.708050 2.708050 257 +written 2 63 2.772589 5.545178 278 +guid 1 63 2.772589 2.772589 267 +type 2 61 2.833213 5.666426 296 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +overview 2 56 2.890372 5.780744 323 +special 1 56 2.890372 2.890372 320 +detail 1 57 2.890372 2.890372 321 +direct 1 57 2.890372 2.890372 316 +sampl 2 53 2.944439 5.888878 339 +extens 1 53 2.944439 2.944439 340 +run 1 51 2.995732 2.995732 347 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +pointer 1 48 3.044522 3.044522 368 +describ 1 45 3.135494 3.135494 400 +mechan 1 43 3.178054 3.178054 416 +past 1 42 3.218876 3.218876 428 +form 1 39 3.258097 3.258097 443 +prototyp 1 38 3.295837 3.295837 463 +procedur 1 36 3.367296 3.367296 488 +either 1 35 3.401197 3.401197 506 +obtain 1 33 3.433987 3.433987 534 +profil 2 30 3.555348 7.110696 581 +releas 2 28 3.610918 7.221836 616 +intend 1 28 3.610918 3.610918 599 +static 3 27 3.637586 10.912758 619 +request 1 26 3.688879 3.688879 635 +initi 1 23 3.806662 3.806662 717 +emphasi 1 22 3.850148 3.850148 755 +finish 1 22 3.850148 3.850148 748 +flexibl 1 21 3.912023 3.912023 792 +qualiti 1 20 3.951244 3.951244 832 +entir 1 20 3.951244 3.951244 811 +predict 1 19 4.007333 4.007333 855 +hybrid 1 15 4.248495 4.248495 1057 +conduct 1 14 4.317488 4.317488 1065 +split 1 14 4.317488 4.317488 1078 +bodi 1 13 4.382027 4.382027 1178 +infrastructur 1 12 4.465908 4.465908 1234 +target 1 12 4.465908 4.465908 1282 +solari 1 12 4.465908 4.465908 1238 +modul 1 10 4.653960 4.653960 1434 +cecil 10 9 4.753590 47.535900 1547 +elimin 2 9 4.753590 9.507180 1558 +subscrib 2 9 4.753590 9.507180 1541 +modula 1 9 4.753590 4.753590 1613 +analys 2 8 4.875197 9.750394 1666 +pure 1 8 4.875197 4.875197 1776 +closur 1 8 4.875197 4.875197 1643 +parti 1 8 4.875197 4.875197 1676 +dead 1 7 5.010635 5.010635 1840 +sparc 1 7 5.010635 5.010635 1860 +freeli 1 6 5.164786 5.164786 2014 +beta 1 6 5.164786 5.164786 1993 +vortex 6 5 5.347108 32.082648 2362 +ofinterest 1 5 5.347108 5.347108 2323 +suno 2 4 5.568345 11.136690 2790 +inlin 1 4 5.568345 5.568345 2964 +tosupport 1 3 5.857933 5.857933 3613 +inherit 1 3 5.857933 5.857933 3122 +forobject 1 3 5.857933 5.857933 3965 +uwcs 1 3 5.857933 5.857933 3977 +intraprocedur 2 2 6.263398 12.526796 5934 +orientedlanguag 1 2 6.263398 6.263398 4079 +acollect 1 2 6.263398 6.263398 5039 +vortexcompil 1 2 6.263398 6.263398 5932 +projectuw 1 1 6.957497 6.957497 16978 +projectwelcom 1 1 6.957497 6.957497 16979 +rapidconstruct 1 1 6.957497 6.957497 16980 +incorporatesmulti 1 1 6.957497 6.957497 16981 +basedencapsul 1 1 6.957497 6.957497 16982 +allowsstat 1 1 6.957497 6.957497 16983 +pureobject 1 1 6.957497 6.957497 16984 +incorporateshigh 1 1 6.957497 6.957497 16985 +hierachyanalysi 1 1 6.957497 6.957497 16986 +guidedselect 1 1 6.957497 6.957497 16987 +commonsubexpress 1 1 6.957497 6.957497 16988 +currentlyavail 1 1 6.957497 6.957497 16989 +thebeta 1 1 6.957497 6.957497 16990 +projectslast 1 1 6.957497 6.957497 16991 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html new file mode 100644 index 00000000..25a7b218 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^grail2^www^index.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +washington 3 236 1.386294 4.158882 32 +softwar 1 220 1.386294 1.386294 30 +public 1 202 1.609438 1.609438 43 +data 1 170 1.791759 1.791759 49 +seattl 1 120 2.079442 2.079442 103 +peopl 1 96 2.302585 2.302585 132 +imag 3 91 2.397895 7.193685 161 +graphic 2 90 2.397895 4.795790 147 +comment 1 93 2.397895 2.397895 146 +laboratori 1 63 2.772589 2.772589 292 +polici 1 64 2.772589 2.772589 279 +juli 1 60 2.833213 2.833213 305 +local 1 55 2.944439 2.944439 334 +cool 1 49 3.044522 3.044522 374 +disk 1 22 3.850148 3.850148 747 +theunivers 1 21 3.912023 3.912023 797 +usag 1 6 5.164786 5.164786 2209 +these 1 5 5.347108 5.347108 2482 +grail 3 3 5.857933 17.573799 3356 +neighborhood 1 3 5.857933 5.857933 3242 +laboratorywelcom 1 2 6.263398 6.263398 5439 +mtwong 1 1 6.957497 6.957497 16992 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html new file mode 100644 index 00000000..b0e7acc2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chaos^www^chaos.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 5 431 0.693147 3.465735 10 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +project 4 340 1.098612 4.394448 18 +engin 1 297 1.098612 1.098612 20 +washington 4 236 1.386294 5.545176 32 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +algorithm 3 162 1.791759 5.375277 57 +avail 2 169 1.791759 3.583518 48 +network 2 168 1.791759 3.583518 61 +implement 1 152 1.791759 1.791759 52 +parallel 1 169 1.791759 1.791759 60 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +seattl 2 120 2.079442 4.158884 103 +report 2 131 2.079442 4.158884 92 +technic 2 100 2.302585 4.605170 140 +peopl 1 96 2.302585 2.302585 132 +present 2 91 2.397895 4.795790 145 +commun 1 95 2.397895 2.397895 157 +proceed 1 93 2.397895 2.397895 152 +build 1 85 2.484907 2.484907 184 +come 1 78 2.564949 2.564949 202 +workshop 1 71 2.639057 2.639057 239 +simul 5 66 2.708050 13.540250 255 +test 1 66 2.708050 2.708050 252 +result 3 65 2.772589 8.317767 281 +hardwar 1 51 2.995732 2.995732 350 +friend 1 48 3.044522 3.044522 376 +standard 1 48 3.044522 3.044522 365 +better 1 45 3.135494 3.135494 401 +discuss 1 45 3.135494 3.135494 399 +describ 1 45 3.135494 3.135494 400 +power 1 30 3.555348 3.555348 573 +abl 1 30 3.555348 3.555348 566 +built 1 29 3.583519 3.583519 592 +held 1 28 3.610918 3.610918 600 +mine 1 26 3.688879 3.688879 654 +sort 1 22 3.850148 3.850148 738 +rout 8 21 3.912023 31.296184 793 +chip 2 21 3.912023 7.824046 770 +nice 1 20 3.951244 3.951244 809 +repositori 1 17 4.110874 4.110874 932 +interconnect 1 17 4.110874 4.110874 937 +web 1 12 4.465908 4.465908 1249 +chao 5 8 4.875197 24.375985 1753 +router 3 8 4.875197 14.625591 1772 +dylan 1 8 4.875197 4.875197 1625 +univeristi 1 8 4.875197 4.875197 1754 +guidelin 1 7 5.010635 5.010635 1832 +chaotic 4 5 5.347108 21.388432 2566 +chaoticrout 1 4 5.568345 5.568345 3063 +micron 2 3 5.857933 11.715866 3341 +cmo 1 3 5.857933 5.857933 3992 +redesign 1 3 5.857933 5.857933 3540 +thathav 1 3 5.857933 5.857933 3735 +papersand 1 2 6.263398 6.263398 4867 +pcrcw 3 1 6.957497 20.872491 16993 +peopleal 1 1 6.957497 6.957497 16994 +allsort 1 1 6.957497 6.957497 16995 +graphicalfront 1 1 6.957497 6.957497 16996 +presentationof 1 1 6.957497 6.957497 16997 +upwith 1 1 6.957497 6.957497 16998 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ new file mode 100644 index 00000000..02f5e6c9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^chinook^www^ @@ -0,0 +1,202 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +system 3 443 0.693147 2.079441 6 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +time 4 293 1.098612 4.394448 17 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +design 6 213 1.386294 8.317764 25 +softwar 5 220 1.386294 6.931470 30 +gener 3 220 1.386294 4.158882 27 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +washington 1 236 1.386294 1.386294 32 +includ 3 208 1.609438 4.828314 42 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +develop 1 174 1.791759 1.791759 53 +avail 1 169 1.791759 1.791759 48 +recent 1 167 1.791759 1.791759 58 +architectur 3 139 1.945910 5.837730 77 +perform 1 143 1.945910 1.945910 74 +first 1 140 1.945910 1.945910 71 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +schedul 1 119 2.079442 2.079442 85 +version 3 113 2.197225 6.591675 122 +make 2 111 2.197225 4.394450 120 +code 2 108 2.197225 4.394450 116 +topic 1 114 2.197225 2.197225 110 +user 1 104 2.302585 2.302585 137 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +real 2 93 2.397895 4.795790 144 +follow 1 92 2.397895 2.397895 143 +level 2 87 2.484907 4.969814 180 +activ 2 84 2.484907 4.969814 182 +larg 1 82 2.484907 2.484907 168 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +build 1 85 2.484907 2.484907 184 +school 1 84 2.484907 2.484907 188 +june 2 79 2.564949 5.129898 214 +complet 1 77 2.564949 2.564949 208 +issu 1 78 2.564949 2.564949 211 +want 1 79 2.564949 2.564949 199 +interfac 1 79 2.564949 2.564949 209 +nation 2 74 2.639057 5.278114 240 +name 1 72 2.639057 2.639057 220 +effici 1 73 2.639057 2.639057 233 +meet 1 72 2.639057 2.639057 229 +simul 2 66 2.708050 5.416100 255 +main 1 67 2.708050 2.708050 256 +evalu 1 64 2.772589 2.772589 266 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +improv 1 62 2.772589 2.772589 289 +foundat 1 62 2.772589 2.772589 286 +automat 1 61 2.833213 2.833213 306 +detail 2 57 2.890372 5.780744 321 +summer 1 56 2.890372 2.890372 311 +processor 3 54 2.944439 8.833317 335 +hardwar 4 51 2.995732 11.982928 350 +maintain 1 51 2.995732 2.995732 342 +even 1 45 3.135494 3.135494 393 +autom 1 41 3.218876 3.218876 434 +map 2 39 3.258097 6.516194 452 +connect 2 37 3.332205 6.664410 485 +singl 1 34 3.401197 3.401197 510 +concurr 1 34 3.401197 3.401197 501 +compon 2 30 3.555348 7.110696 570 +robert 1 30 3.555348 3.555348 567 +becom 1 28 3.610918 3.610918 603 +american 1 27 3.637586 3.637586 634 +rather 3 26 3.688879 11.066637 642 +constraint 2 26 3.688879 7.377758 636 +enabl 2 26 3.688879 7.377758 655 +accur 1 25 3.737670 3.737670 680 +sometim 1 24 3.761200 3.761200 696 +demonstr 1 24 3.761200 3.761200 694 +input 1 23 3.806662 3.806662 727 +togeth 1 23 3.806662 3.806662 714 +instead 1 22 3.850148 3.850148 756 +try 1 22 3.850148 3.850148 764 +sequenti 1 22 3.850148 3.850148 745 +util 1 21 3.912023 3.912023 774 +output 1 21 3.912023 3.912023 788 +synthesi 6 20 3.951244 23.707464 834 +kernel 1 20 3.951244 3.951244 825 +wind 2 18 4.060443 8.120886 908 +behavior 1 18 4.060443 4.060443 881 +partit 2 16 4.174387 8.348774 984 +earli 1 16 4.174387 4.174387 968 +advantag 1 16 4.174387 4.174387 987 +diego 1 16 4.174387 4.174387 992 +devic 1 16 4.174387 4.174387 1002 +universityof 1 15 4.248495 4.248495 1061 +embed 3 14 4.317488 12.952464 1102 +believ 1 13 4.382027 4.382027 1187 +incorpor 1 13 4.382027 4.382027 1163 +target 1 12 4.465908 4.465908 1282 +grant 1 12 4.465908 4.465908 1216 +fill 1 11 4.553877 4.553877 1349 +cycl 1 11 4.553877 4.553877 1335 +fix 1 11 4.553877 4.553877 1327 +itali 1 11 4.553877 4.553877 1378 +fellowship 3 10 4.653960 13.961880 1460 +mountain 2 10 4.653960 9.307920 1456 +forc 1 10 4.653960 4.653960 1384 +reli 1 10 4.653960 4.653960 1411 +pacif 1 8 4.875197 4.875197 1674 +character 1 8 4.875197 4.875197 1767 +driver 1 8 4.875197 4.875197 1657 +maxim 1 7 5.010635 5.010635 1944 +chinook 8 6 5.164786 41.318288 2229 +averag 1 6 5.164786 5.164786 2098 +contract 1 6 5.164786 5.164786 1985 +blow 1 5 5.347108 5.347108 2407 +east 1 5 5.347108 5.347108 2472 +synthes 1 5 5.347108 5.347108 2451 +ross 1 5 5.347108 5.347108 2243 +ortega 1 5 5.347108 5.347108 2559 +rocki 2 4 5.568345 11.136690 3048 +chou 2 4 5.568345 11.136690 3033 +ti 1 4 5.568345 5.568345 3005 +shelf 1 4 5.568345 5.568345 2621 +harri 1 4 5.568345 5.568345 3034 +warm 2 3 5.857933 11.715866 3904 +retarget 2 3 5.857933 11.715866 3994 +domin 1 3 5.857933 5.857933 3995 +moredetail 1 3 5.857933 5.857933 3854 +shortli 1 3 5.857933 5.857933 3375 +nato 1 3 5.857933 5.857933 3587 +salmon 2 2 6.263398 12.526796 4802 +rare 1 2 6.263398 6.263398 4184 +toolfor 1 2 6.263398 6.263398 6031 +neededto 1 2 6.263398 6.263398 5379 +ratherthan 1 2 6.263398 6.263398 6046 +differentarchitectur 1 2 6.263398 6.263398 6051 +verilog 1 2 6.263398 6.263398 4441 +softwareprogram 1 2 6.263398 6.263398 4889 +moreeffici 1 2 6.263398 6.263398 4209 +macduff 1 2 6.263398 6.263398 5923 +hauck 1 2 6.263398 6.263398 5920 +shinook 1 1 6.957497 6.957497 16999 +oncorhynchu 1 1 6.957497 6.957497 17000 +tshawytscha 1 1 6.957497 6.957497 17001 +amer 1 1 6.957497 6.957497 17002 +tribe 1 1 6.957497 6.957497 17003 +southerli 1 1 6.957497 6.957497 17004 +sled 1 1 6.957497 6.957497 17005 +doga 1 1 6.957497 6.957497 17006 +cadtool 1 1 6.957497 6.957497 17007 +reactivesystem 1 1 6.957497 6.957497 17008 +descriptionto 1 1 6.957497 6.957497 17009 +designdecis 1 1 6.957497 6.957497 17010 +reiterateaft 1 1 6.957497 6.957497 17011 +willnot 1 1 6.957497 6.957497 17012 +designerto 1 1 6.957497 6.957497 17013 +legacycod 1 1 6.957497 6.957497 17014 +currentlyw 1 1 6.957497 6.957497 17015 +interprocessorcommun 1 1 6.957497 6.957497 17016 +assumesmanu 1 1 6.957497 6.957497 17017 +intricateand 1 1 6.957497 6.957497 17018 +asicarchitectur 1 1 6.957497 6.957497 17019 +onoff 1 1 6.957497 6.957497 17020 +discourag 1 1 6.957497 6.957497 17021 +innovemb 1 1 6.957497 6.957497 17022 +shownat 1 1 6.957497 6.957497 17023 +mainfeatur 1 1 6.957497 6.957497 17024 +peripheraldevic 1 1 6.957497 6.957497 17025 +andsynthes 1 1 6.957497 6.957497 17026 +hardwarenetlist 1 1 6.957497 6.957497 17027 +interfacingproblem 1 1 6.957497 6.957497 17028 +timingconstraint 1 1 6.957497 6.957497 17029 +swcodedesign 1 1 6.957497 6.957497 17030 +tremezzo 1 1 6.957497 6.957497 17031 +severalmor 1 1 6.957497 6.957497 17032 +chinookersfacultygaetano 1 1 6.957497 6.957497 17033 +borriellogradu 1 1 6.957497 6.957497 17034 +ortegaken 1 1 6.957497 6.957497 17035 +hinesian 1 1 6.957497 6.957497 17036 +selizabeth 1 1 6.957497 6.957497 17037 +walkupscott 1 1 6.957497 6.957497 17038 +henrik 1 1 6.957497 6.957497 17039 +hulgaardstafflarri 1 1 6.957497 6.957497 17040 +mcmurchielist 1 1 6.957497 6.957497 17041 +paperschinook 1 1 6.957497 6.957497 17042 +sponsorsarpa 1 1 6.957497 6.957497 17043 +walkup 1 1 6.957497 6.957497 17044 +patricia 1 1 6.957497 6.957497 17045 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ new file mode 100644 index 00000000..fc9cf635 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^emerald^ @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +work 3 380 0.693147 2.079441 9 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +develop 2 174 1.791759 3.583518 53 +architectur 6 139 1.945910 11.675460 77 +construct 1 139 1.945910 1.945910 82 +tool 5 117 2.079442 10.397210 93 +provid 2 121 2.079442 4.158884 94 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +document 1 121 2.079442 2.079442 89 +compil 1 122 2.079442 2.079442 96 +intern 2 108 2.197225 4.394450 128 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +person 1 111 2.197225 2.197225 117 +need 2 98 2.302585 4.605170 135 +technic 1 100 2.302585 2.302585 140 +proceed 2 93 2.397895 4.795790 152 +mani 1 92 2.397895 2.397895 150 +environ 1 84 2.484907 2.484907 177 +contain 1 81 2.484907 2.484907 174 +appear 1 78 2.564949 2.564949 210 +logic 2 71 2.639057 5.278114 230 +symposium 2 72 2.639057 5.278114 238 +would 1 67 2.708050 2.708050 251 +evalu 1 64 2.772589 2.772589 266 +copi 1 63 2.772589 2.772589 284 +februari 2 54 2.944439 5.888878 328 +allow 1 53 2.944439 2.944439 333 +maintain 1 51 2.995732 2.995732 342 +right 1 48 3.044522 3.044522 363 +without 1 50 3.044522 3.044522 370 +featur 1 46 3.091042 3.091042 386 +offer 1 43 3.178054 3.178054 414 +term 1 43 3.178054 3.178054 411 +third 1 43 3.178054 3.178054 412 +fast 1 42 3.218876 3.218876 429 +map 2 39 3.258097 6.516194 452 +author 2 39 3.258097 6.516194 450 +programm 2 39 3.258097 6.516194 445 +prototyp 1 38 3.295837 3.295837 463 +field 2 37 3.332205 6.664410 482 +mean 1 37 3.332205 3.332205 477 +copyright 3 36 3.367296 10.101888 495 +global 1 34 3.401197 3.401197 520 +given 1 32 3.465736 3.465736 538 +produc 1 30 3.555348 3.555348 572 +power 1 30 3.555348 3.555348 573 +specifi 1 30 3.555348 3.555348 568 +propos 1 28 3.610918 3.610918 602 +arrai 1 27 3.637586 3.637586 627 +constraint 1 26 3.688879 3.688879 636 +reliabl 1 25 3.737670 3.737670 674 +accur 1 25 3.737670 3.737670 680 +frame 1 24 3.761200 3.761200 684 +rout 3 21 3.912023 11.736069 793 +basi 1 20 3.951244 3.951244 828 +definit 1 19 4.007333 4.007333 864 +partit 1 16 4.174387 4.174387 984 +commerci 1 16 4.174387 4.174387 1005 +fourth 1 16 4.174387 4.174387 999 +driven 3 15 4.248495 12.745485 1048 +carl 2 15 4.248495 8.496990 1024 +contribut 1 15 4.248495 4.248495 1021 +larri 2 13 4.382027 8.764054 1142 +unfortun 1 13 4.382027 4.382027 1170 +block 1 13 4.382027 4.382027 1183 +fpga 6 10 4.653960 27.923760 1433 +placement 2 10 4.653960 9.307920 1420 +face 1 9 4.753590 4.753590 1501 +router 2 8 4.875197 9.750394 1772 +satisfi 1 8 4.875197 4.875197 1694 +heart 1 8 4.875197 4.875197 1729 +metric 1 7 5.010635 5.010635 1831 +gate 2 6 5.164786 10.329572 2182 +phase 1 6 5.164786 5.164786 1977 +quickli 1 6 5.164786 5.164786 2000 +ensur 1 6 5.164786 5.164786 2012 +invok 1 6 5.164786 5.164786 2079 +darren 2 5 5.347108 10.694216 2565 +variat 1 5 5.347108 5.347108 2248 +understood 1 5 5.347108 5.347108 2364 +mcmurchi 3 4 5.568345 16.705035 2757 +ebel 2 4 5.568345 11.136690 2756 +permiss 1 4 5.568345 5.568345 2642 +emerald 8 3 5.857933 46.863464 3979 +cronquist 2 3 5.857933 11.715866 3942 +moreov 1 3 5.857933 5.857933 3200 +byth 1 3 5.857933 5.857933 3874 +performanceevalu 1 2 6.263398 6.263398 6052 +thoroughli 1 2 6.263398 6.263398 4801 +parameter 1 2 6.263398 6.263398 5540 +dissemin 1 2 6.263398 6.263398 5080 +adher 1 2 6.263398 6.263398 6025 +sigda 1 2 6.263398 6.263398 5493 +pathfind 1 2 6.263398 6.263398 6053 +negoti 1 2 6.263398 6.263398 6054 +basedperform 1 2 6.263398 6.263398 6055 +projectid 1 1 6.957497 6.957497 17046 +makeus 1 1 6.957497 6.957497 17047 +quickproduct 1 1 6.957497 6.957497 17048 +isoften 1 1 6.957497 6.957497 17049 +postpon 1 1 6.957497 6.957497 17050 +beenfrozen 1 1 6.957497 6.957497 17051 +havedesign 1 1 6.957497 6.957497 17052 +quickdevelop 1 1 6.957497 6.957497 17053 +basicfeatur 1 1 6.957497 6.957497 17054 +synthesisand 1 1 6.957497 6.957497 17055 +anddetail 1 1 6.957497 6.957497 17056 +aneffici 1 1 6.957497 6.957497 17057 +blockarchitectur 1 1 6.957497 6.957497 17058 +tailorplac 1 1 6.957497 6.957497 17059 +schematicspecif 1 1 6.957497 6.957497 17060 +capturedand 1 1 6.957497 6.957497 17061 +ofscholarli 1 1 6.957497 6.957497 17062 +andal 1 1 6.957497 6.957497 17063 +therein 1 1 6.957497 6.957497 17064 +copyrighthold 1 1 6.957497 6.957497 17065 +notwithstand 1 1 6.957497 6.957497 17066 +hereelectron 1 1 6.957497 6.957497 17067 +thisinform 1 1 6.957497 6.957497 17068 +eachauthor 1 1 6.957497 6.957497 17069 +repost 1 1 6.957497 6.957497 17070 +theexplicit 1 1 6.957497 6.957497 17071 +holder 1 1 6.957497 6.957497 17072 +emeraldlarri 1 1 6.957497 6.957497 17073 +arraysaid 1 1 6.957497 6.957497 17074 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html new file mode 100644 index 00000000..0e61c6ac --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^lis^www^index.html @@ -0,0 +1,134 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +system 9 443 0.693147 6.238323 6 +research 4 431 0.693147 2.772588 10 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +project 4 340 1.098612 4.394448 18 +current 3 284 1.098612 3.295836 21 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +design 4 213 1.386294 5.545176 25 +washington 2 236 1.386294 2.772588 32 +gener 2 220 1.386294 2.772588 27 +softwar 1 220 1.386294 1.386294 30 +develop 3 174 1.791759 5.375277 53 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +architectur 7 139 1.945910 13.621370 77 +perform 4 143 1.945910 7.783640 74 +first 1 140 1.945910 1.945910 71 +assign 1 135 1.945910 1.945910 66 +seattl 1 120 2.079442 2.079442 103 +tool 1 117 2.079442 2.079442 93 +high 1 130 2.079442 2.079442 101 +specif 3 106 2.197225 6.591675 106 +well 2 109 2.197225 4.394450 121 +sinc 1 90 2.397895 2.397895 159 +level 3 87 2.484907 7.454721 180 +larg 1 82 2.484907 2.484907 168 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +complet 1 77 2.564949 2.564949 208 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +effici 1 73 2.639057 2.639057 233 +integr 4 67 2.708050 10.832200 245 +simul 1 66 2.708050 2.708050 255 +improv 4 62 2.772589 11.090356 289 +laboratori 3 63 2.772589 8.317767 292 +descript 1 64 2.772589 2.772589 271 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +variou 1 56 2.890372 2.890372 317 +direct 1 57 2.890372 2.890372 316 +overview 1 56 2.890372 2.890372 323 +hardwar 1 51 2.995732 2.995732 350 +digit 1 52 2.995732 2.995732 348 +adapt 1 46 3.091042 3.091042 387 +late 1 40 3.258097 3.258097 439 +map 1 39 3.258097 3.258097 452 +prototyp 2 38 3.295837 6.591674 463 +cost 2 37 3.332205 6.664410 480 +multi 2 36 3.367296 6.734592 493 +survei 1 35 3.401197 3.401197 513 +board 1 33 3.433987 3.433987 528 +toler 1 33 3.433987 3.433987 533 +focu 1 30 3.555348 3.555348 571 +synchron 1 29 3.583519 3.583519 588 +scale 1 28 3.610918 3.610918 613 +compar 1 26 3.688879 3.688879 648 +todai 1 25 3.737670 3.737670 672 +methodolog 1 23 3.806662 3.806662 733 +varieti 1 22 3.850148 3.850148 740 +reduc 1 22 3.850148 3.850148 759 +self 1 22 3.850148 3.850148 761 +vlsi 2 21 3.912023 7.824046 795 +rout 2 21 3.912023 7.824046 793 +chip 1 21 3.912023 3.912023 770 +synthesi 2 20 3.951244 7.902488 834 +verif 1 20 3.951244 3.951244 826 +separ 1 19 4.007333 4.007333 844 +feedback 1 19 4.007333 4.007333 854 +aid 1 18 4.060443 4.060443 904 +event 1 18 4.060443 4.060443 896 +commerci 1 16 4.174387 4.174387 1005 +partit 1 16 4.174387 4.174387 984 +latenc 1 16 4.174387 4.174387 993 +embed 4 14 4.317488 17.269952 1102 +topolog 1 14 4.317488 4.317488 1089 +circuit 9 13 4.382027 39.438243 1131 +asynchron 5 12 4.465908 22.329540 1229 +tune 2 12 4.465908 8.931816 1227 +clock 2 11 4.553877 9.107754 1320 +valid 1 11 4.553877 4.553877 1299 +arpa 1 11 4.553877 4.553877 1369 +fpga 8 10 4.653960 37.231680 1433 +rapid 3 10 4.653960 13.961880 1453 +placement 1 10 4.653960 4.653960 1420 +paragraph 1 10 4.653960 4.653960 1449 +sensit 1 8 4.875197 4.875197 1726 +accomplish 1 8 4.875197 4.875197 1755 +northwest 3 7 5.010635 15.031905 1973 +densiti 1 7 5.010635 5.010635 1927 +metric 1 7 5.010635 5.010635 1831 +chinook 1 6 5.164786 5.164786 2229 +layout 1 6 5.164786 5.164786 2183 +sytem 1 4 5.568345 5.568345 3015 +triptych 1 4 5.568345 5.568345 3061 +toolset 1 4 5.568345 5.568345 3014 +chaoticrout 1 4 5.568345 5.568345 3063 +tester 1 4 5.568345 5.568345 2754 +emerald 1 3 5.857933 5.857933 3979 +systemsth 1 3 5.857933 5.857933 3835 +mactest 1 3 5.857933 5.857933 3972 +cmo 1 3 5.857933 5.857933 3992 +montag 2 2 6.263398 12.526796 5921 +retim 2 2 6.263398 12.526796 6008 +usath 1 2 6.263398 6.263398 6056 +engag 1 2 6.263398 6.263398 4937 +springbok 1 2 6.263398 6.263398 5922 +latch 1 2 6.263398 6.263398 6034 +skew 1 2 6.263398 6.263398 6057 +gemini 1 2 6.263398 6.263398 5975 +voltag 1 2 6.263398 6.263398 5475 +verificationtim 1 1 6.957497 6.957497 17075 +prototypingtriptych 1 1 6.957497 6.957497 17076 +toolscan 1 1 6.957497 6.957497 17077 +fpgaarchitectur 1 1 6.957497 6.957497 17078 +incorporatedinto 1 1 6.957497 6.957497 17079 +circuitsretim 1 1 6.957497 6.957497 17080 +uselevel 1 1 6.957497 6.957497 17081 +andincreas 1 1 6.957497 6.957497 17082 +synchronouscircuit 1 1 6.957497 6.957497 17083 +contraint 1 1 6.957497 6.957497 17084 +routerth 1 1 6.957497 6.957497 17085 +systemsself 1 1 6.957497 6.957497 17086 +kehlprevi 1 1 6.957497 6.957497 17087 +reportsarpa 1 1 6.957497 6.957497 17088 +bluebook 1 1 6.957497 6.957497 17089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ new file mode 100644 index 00000000..b345ffd4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^safety^www^ @@ -0,0 +1,62 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 7 443 0.693147 4.852029 6 +work 1 380 0.693147 0.693147 9 +us 2 329 1.098612 2.197224 16 +engin 2 297 1.098612 2.197224 20 +project 1 340 1.098612 1.098612 18 +softwar 4 220 1.386294 5.545176 30 +washington 3 236 1.386294 4.158882 32 +link 1 247 1.386294 1.386294 24 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +analysi 2 124 2.079442 4.158884 98 +tool 1 117 2.079442 2.079442 93 +specif 1 106 2.197225 2.197225 106 +techniqu 3 99 2.302585 6.907755 138 +contain 1 81 2.484907 2.484907 174 +build 1 85 2.484907 2.484907 184 +issu 1 78 2.564949 2.564949 211 +appli 1 71 2.639057 2.639057 226 +involv 1 71 2.639057 2.639057 227 +test 1 66 2.708050 2.708050 252 +goal 1 66 2.708050 2.708050 250 +foundat 3 62 2.772589 8.317767 286 +import 1 65 2.772589 2.772589 282 +plai 1 60 2.833213 2.833213 307 +space 1 57 2.890372 2.890372 310 +digit 1 52 2.995732 2.995732 348 +review 1 42 3.218876 3.218876 425 +must 1 40 3.258097 3.258097 442 +theoret 1 39 3.258097 3.258097 446 +prototyp 1 38 3.295837 3.295837 463 +exist 1 30 3.555348 3.555348 569 +built 1 29 3.583519 3.583519 592 +pass 1 28 3.610918 3.610918 611 +although 1 25 3.737670 3.737670 667 +methodolog 3 23 3.806662 11.419986 733 +safeti 9 20 3.951244 35.561196 817 +medic 1 17 4.110874 4.110874 958 +critic 2 16 4.174387 8.348774 982 +upon 1 16 4.174387 4.174387 978 +role 1 14 4.317488 4.317488 1101 +nanci 1 12 4.465908 4.465908 1256 +summar 1 11 4.553877 4.553877 1295 +valid 1 11 4.553877 4.553877 1299 +equip 1 10 4.653960 4.653960 1459 +leveson 1 9 4.753590 4.753590 1540 +consequ 1 6 5.164786 5.164786 1989 +nuclear 1 5 5.347108 5.347108 2576 +chemic 1 5 5.347108 5.347108 2552 +plant 1 5 5.347108 5.347108 2497 +decad 1 5 5.347108 5.347108 2455 +increasingli 1 4 5.568345 5.568345 2766 +aircraft 1 4 5.568345 5.568345 2872 +rigor 1 4 5.568345 5.568345 3030 +lai 1 3 5.857933 5.857933 3694 +safewar 1 2 6.263398 6.263398 5959 +reactor 1 1 6.957497 6.957497 17090 +defenc 1 1 6.957497 6.957497 17091 +malfunct 1 1 6.957497 6.957497 17092 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html new file mode 100644 index 00000000..fa8205a1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^softbots^www^softbots.html @@ -0,0 +1,170 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 3 443 0.693147 2.079441 6 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +softwar 3 220 1.386294 4.158882 30 +washington 3 236 1.386294 4.158882 32 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +develop 2 174 1.791759 3.583518 53 +base 2 165 1.791759 3.583518 50 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +implement 1 152 1.791759 1.791759 52 +support 1 132 1.945910 1.945910 83 +high 2 130 2.079442 4.158884 101 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +world 2 115 2.197225 4.394450 126 +assist 1 112 2.197225 2.197225 113 +check 1 115 2.197225 2.197225 118 +specif 1 106 2.197225 2.197225 106 +user 4 104 2.302585 9.210340 137 +access 2 102 2.302585 4.605170 136 +take 1 97 2.302585 2.302585 134 +technic 1 100 2.302585 2.302585 140 +advanc 1 99 2.302585 2.302585 130 +graphic 2 90 2.397895 4.795790 147 +search 2 95 2.397895 4.795790 155 +real 1 93 2.397895 2.397895 144 +internet 6 83 2.484907 14.909442 186 +control 3 82 2.484907 7.454721 164 +build 2 85 2.484907 4.969814 184 +level 2 87 2.484907 4.969814 180 +wide 2 84 2.484907 4.969814 185 +resourc 2 81 2.484907 4.969814 172 +learn 1 86 2.484907 2.484907 170 +info 1 85 2.484907 2.484907 176 +interfac 3 79 2.564949 7.694847 209 +decemb 2 80 2.564949 5.129898 215 +state 1 76 2.564949 2.564949 207 +want 1 79 2.564949 2.564949 199 +dynam 1 76 2.564949 2.564949 194 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +optim 1 79 2.564949 2.564949 197 +intellig 2 72 2.639057 5.278114 225 +servic 1 72 2.639057 2.639057 236 +goal 3 66 2.708050 8.124150 250 +plan 3 65 2.772589 8.317767 272 +interact 1 62 2.772589 2.772589 270 +experi 1 64 2.772589 2.772589 283 +back 2 60 2.833213 5.666426 297 +juli 1 60 2.833213 2.833213 305 +unix 1 58 2.890372 2.890372 308 +space 1 57 2.890372 2.890372 310 +browser 1 56 2.890372 2.890372 313 +found 3 53 2.944439 8.833317 337 +allow 1 53 2.944439 2.944439 333 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +maintain 1 51 2.995732 2.995732 342 +without 1 50 3.044522 3.044522 370 +execut 1 45 3.135494 3.135494 404 +protocol 1 45 3.135494 3.135494 407 +multipl 1 39 3.258097 3.258097 453 +field 1 37 3.332205 3.332205 482 +robot 2 36 3.367296 6.734592 497 +procedur 1 36 3.367296 3.367296 488 +multi 1 36 3.367296 3.367296 493 +articl 1 33 3.433987 3.433987 530 +human 1 32 3.465736 3.465736 546 +extend 1 32 3.465736 3.465736 539 +collabor 1 32 3.465736 3.465736 543 +taken 1 31 3.496508 3.496508 555 +rang 1 30 3.555348 3.555348 565 +option 1 30 3.555348 3.555348 575 +specifi 1 30 3.555348 3.555348 568 +domain 1 30 3.555348 3.555348 564 +determin 1 27 3.637586 3.637586 630 +enabl 2 26 3.688879 7.377758 655 +challeng 1 26 3.688879 3.688879 653 +compar 1 26 3.688879 3.688879 648 +rule 1 26 3.688879 3.688879 638 +magazin 2 24 3.761200 7.522400 704 +mike 1 24 3.761200 3.761200 703 +methodolog 1 23 3.806662 3.806662 733 +util 1 21 3.912023 3.912023 774 +alumni 1 21 3.912023 3.912023 807 +agent 2 18 4.060443 8.120886 910 +accept 1 18 4.060443 4.060443 879 +debug 1 17 4.110874 4.110874 944 +indic 1 15 4.248495 4.248495 1013 +achiev 1 14 4.317488 4.317488 1088 +easili 1 14 4.317488 4.317488 1077 +dave 1 14 4.317488 4.317488 1098 +daniel 1 12 4.465908 4.465908 1233 +tour 1 11 4.553877 4.553877 1307 +motiv 1 11 4.553877 4.553877 1346 +princip 1 10 4.653960 4.653960 1397 +metacrawl 1 10 4.653960 4.653960 1455 +weld 1 9 4.753590 4.753590 1538 +autonom 1 8 4.875197 4.875197 1749 +claim 1 8 4.875197 4.875197 1664 +accomplish 1 8 4.875197 4.875197 1755 +gather 1 8 4.875197 4.875197 1719 +softbot 16 7 5.010635 80.170160 1974 +golden 2 7 5.010635 10.021270 1962 +intellectu 1 7 5.010635 5.010635 1847 +planner 1 7 5.010635 5.010635 1797 +etzioni 3 6 5.164786 15.494358 2135 +oren 2 6 5.164786 10.329572 2134 +moder 1 6 5.164786 5.164786 2112 +brook 1 6 5.164786 5.164786 2152 +versu 1 6 5.164786 5.164786 2052 +keith 2 5 5.347108 10.694216 2528 +shell 1 5 5.347108 5.347108 2353 +cacm 1 5 5.347108 5.347108 2388 +innov 1 4 5.568345 5.568345 2933 +substrat 1 4 5.568345 5.568345 2857 +disambigu 1 4 5.568345 5.568345 2899 +repli 1 4 5.568345 5.568345 2689 +toth 1 4 5.568345 5.568345 2595 +reactiv 2 3 5.857933 11.715866 3575 +kwok 2 3 5.857933 11.715866 3941 +sujai 1 3 5.857933 5.857933 3960 +parekh 1 3 5.857933 5.857933 3961 +hacker 1 3 5.857933 5.857933 3996 +finalist 1 2 6.263398 6.263398 5890 +discoveraward 1 2 6.263398 6.263398 5891 +learningtechniqu 1 2 6.263398 6.263398 5028 +christianson 1 2 6.263398 6.263398 5849 +negoti 1 2 6.263398 6.263398 6054 +goan 1 2 6.263398 6.263398 5896 +ingram 1 2 6.263398 6.263398 5847 +perkowitz 1 2 6.263398 6.263398 5970 +softbotinternet 1 1 6.957497 6.957497 17093 +softbotth 1 1 6.957497 6.957497 17094 +softwareenviron 1 1 6.957497 6.957497 17095 +pragmaticallyconveni 1 1 6.957497 6.957497 17096 +acustomiz 1 1 6.957497 6.957497 17097 +internetaccess 1 1 6.957497 6.957497 17098 +generatesand 1 1 6.957497 6.957497 17099 +itsexperi 1 1 6.957497 6.957497 17100 +requestand 1 1 6.957497 6.957497 17101 +satisfyit 1 1 6.957497 6.957497 17102 +interactwith 1 1 6.957497 6.957497 17103 +sgraphic 1 1 6.957497 6.957497 17104 +tosearch 1 1 6.957497 6.957497 17105 +sophisticatedprun 1 1 6.957497 6.957497 17106 +cartoonrepresent 1 1 6.957497 6.957497 17107 +blanchard 1 1 6.957497 6.957497 17108 +ofcolumn 1 1 6.957497 6.957497 17109 +xiiplann 1 1 6.957497 6.957497 17110 +ilalearn 1 1 6.957497 6.957497 17111 +ying 1 1 6.957497 6.957497 17112 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ new file mode 100644 index 00000000..11620b58 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^spin^www^ @@ -0,0 +1,214 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +system 17 443 0.693147 11.783499 6 +program 2 374 0.693147 1.386294 7 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +project 10 340 1.098612 10.986120 18 +us 4 329 1.098612 4.394448 16 +time 3 293 1.098612 3.295836 17 +languag 5 227 1.386294 6.931470 26 +gener 3 220 1.386294 4.158882 27 +washington 2 236 1.386294 2.772588 32 +design 2 213 1.386294 2.772588 25 +link 1 247 1.386294 1.386294 24 +oper 10 180 1.609438 16.094380 34 +paper 6 205 1.609438 9.656628 38 +applic 7 170 1.791759 12.542313 56 +implement 4 152 1.791759 7.167036 52 +network 3 168 1.791759 5.375277 61 +data 2 170 1.791759 3.583518 49 +address 2 170 1.791759 3.583518 62 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +perform 5 143 1.945910 9.729550 74 +support 3 132 1.945910 5.837730 83 +architectur 1 139 1.945910 1.945910 77 +relat 1 139 1.945910 1.945910 68 +report 3 131 2.079442 6.238326 92 +compil 2 122 2.079442 4.158884 96 +machin 1 129 2.079442 2.079442 95 +provid 1 121 2.079442 2.079442 94 +confer 1 126 2.079442 2.079442 100 +high 1 130 2.079442 2.079442 101 +document 1 121 2.079442 2.079442 89 +code 8 108 2.197225 17.577800 116 +intern 2 108 2.197225 4.394450 128 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +manag 1 114 2.197225 2.197225 125 +assist 1 112 2.197225 2.197225 113 +take 5 97 2.302585 11.512925 134 +peopl 2 96 2.302585 4.605170 132 +call 2 91 2.397895 4.795790 153 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +control 2 82 2.484907 4.969814 164 +member 2 84 2.484907 4.969814 165 +resourc 1 81 2.484907 2.484907 172 +build 1 85 2.484907 2.484907 184 +dynam 7 76 2.564949 17.954643 194 +interfac 4 79 2.564949 10.259796 209 +appear 3 78 2.564949 7.694847 210 +server 1 76 2.564949 2.564949 204 +exampl 1 77 2.564949 2.564949 195 +master 1 76 2.564949 2.564949 216 +servic 3 72 2.639057 7.917171 236 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +integr 2 67 2.708050 5.416100 245 +order 1 69 2.708050 2.708050 249 +degre 1 69 2.708050 2.708050 259 +collect 2 65 2.772589 5.545178 268 +result 2 65 2.772589 5.545178 281 +creat 2 63 2.772589 5.545178 277 +copi 1 63 2.772589 2.772589 284 +written 1 63 2.772589 2.772589 278 +function 1 62 2.772589 2.772589 275 +experi 1 64 2.772589 2.772589 283 +improv 1 62 2.772589 2.772589 289 +unix 2 58 2.890372 5.780744 308 +direct 1 57 2.890372 2.890372 316 +space 1 57 2.890372 2.890372 310 +special 1 56 2.890372 2.890372 320 +point 1 58 2.890372 2.890372 319 +overview 1 56 2.890372 2.890372 323 +extens 15 53 2.944439 44.166585 340 +allow 2 53 2.944439 5.888878 333 +talk 1 53 2.944439 2.944439 336 +undergradu 1 54 2.944439 2.944439 338 +run 3 51 2.995732 8.987196 347 +maintain 1 51 2.995732 2.995732 342 +basic 2 50 3.044522 6.089044 360 +friend 1 48 3.044522 3.044522 376 +pointer 1 48 3.044522 3.044522 368 +adapt 1 46 3.091042 3.091042 387 +could 1 46 3.091042 3.091042 383 +describ 5 45 3.135494 15.677470 400 +execut 2 45 3.135494 6.270988 404 +video 1 44 3.135494 3.135494 405 +anoth 1 45 3.135494 3.135494 408 +protocol 1 45 3.135494 3.135494 407 +mechan 1 43 3.178054 3.178054 416 +show 1 43 3.178054 3.178054 417 +join 1 39 3.258097 3.258097 457 +realli 1 40 3.258097 3.258097 444 +credit 2 38 3.295837 6.591674 460 +industri 1 38 3.295837 3.295837 464 +brian 1 38 3.295837 3.295837 466 +workstat 1 37 3.332205 3.332205 479 +purpos 1 37 3.332205 3.332205 481 +connect 1 37 3.332205 3.332205 485 +procedur 1 36 3.367296 3.367296 488 +winter 1 36 3.367296 3.367296 500 +extend 1 32 3.465736 3.465736 539 +fault 1 32 3.465736 3.465736 547 +posit 1 31 3.496508 3.496508 552 +synchron 1 29 3.583519 3.583519 588 +load 2 28 3.610918 7.221836 601 +pass 1 28 3.610918 3.610918 611 +manipul 1 27 3.637586 3.637586 624 +rather 1 26 3.688879 3.688879 642 +effort 1 26 3.688879 3.688879 652 +handl 1 24 3.761200 3.761200 685 +thread 2 23 3.806662 7.613324 722 +almost 1 22 3.850148 3.850148 742 +properti 1 22 3.850148 3.850148 749 +deal 1 22 3.850148 3.850148 736 +flexibl 1 21 3.912023 3.912023 792 +latest 1 21 3.912023 3.912023 785 +fund 1 21 3.912023 3.912023 805 +kernel 10 20 3.951244 39.512440 825 +longer 1 20 3.951244 3.951244 816 +safeti 1 20 3.951244 3.951244 817 +facil 1 20 3.951244 3.951244 814 +benchmark 2 19 4.007333 8.014666 859 +runtim 1 19 4.007333 4.007333 858 +bershad 2 18 4.060443 8.120886 902 +less 1 18 4.060443 4.060443 892 +statu 1 18 4.060443 4.060443 885 +encourag 1 18 4.060443 4.060443 880 +regular 1 17 4.110874 4.110874 929 +critic 1 16 4.174387 4.174387 982 +capabl 1 15 4.248495 4.248495 1016 +piec 1 15 4.248495 4.248495 1020 +overhead 1 15 4.248495 4.248495 1035 +spin 10 14 4.317488 43.174880 1121 +happi 1 14 4.317488 4.317488 1079 +decid 1 14 4.317488 4.317488 1075 +sai 2 13 4.382027 8.764054 1175 +pretti 1 13 4.382027 4.382027 1191 +wait 1 13 4.382027 4.382027 1168 +safe 2 12 4.465908 8.931816 1274 +usenix 1 12 4.465908 4.465908 1240 +alpha 2 11 4.553877 9.107754 1348 +arbitrari 1 11 4.553877 4.553877 1359 +abil 1 11 4.553877 4.553877 1341 +arpa 1 11 4.553877 4.553877 1369 +sosp 1 10 4.653960 4.653960 1416 +modula 5 9 4.753590 23.767950 1613 +inter 1 9 4.753590 4.753590 1530 +osdi 1 9 4.753590 4.753590 1534 +clear 1 9 4.753590 4.753590 1488 +isol 1 8 4.875197 4.875197 1663 +crash 1 8 4.875197 4.875197 1616 +cross 1 8 4.875197 4.875197 1703 +mach 1 8 4.875197 4.875197 1669 +core 1 7 5.010635 5.010635 1809 +prevent 1 7 5.010635 5.010635 1827 +bottom 1 7 5.010635 5.010635 1906 +quick 1 6 5.164786 5.164786 2184 +recov 1 6 5.164786 5.164786 2235 +trail 1 6 5.164786 5.164786 2071 +academia 1 6 5.164786 5.164786 2036 +bind 1 5 5.347108 5.347108 2250 +distinct 1 5 5.347108 5.347108 2319 +adopt 1 5 5.347108 5.347108 2467 +termin 2 4 5.568345 11.136690 2852 +andimplement 1 4 5.568345 5.568345 3029 +fork 1 4 5.568345 5.568345 2801 +gotten 1 4 5.568345 5.568345 2628 +stillmaintain 1 3 5.857933 5.857933 3964 +providesa 1 3 5.857933 5.857933 3884 +thesear 1 3 5.857933 5.857933 3456 +forappl 1 3 5.857933 5.857933 3929 +linker 1 3 5.857933 5.857933 3157 +namespac 1 3 5.857933 5.857933 3957 +arrow 1 3 5.857933 5.857933 3520 +microsecond 5 2 6.263398 31.316990 5435 +shortcom 2 2 6.263398 12.526796 5978 +wella 1 2 6.263398 6.263398 4289 +linkabl 1 2 6.263398 6.263398 5979 +barb 1 2 6.263398 6.263398 6058 +qualif 1 2 6.263398 6.263398 6059 +mascot 1 2 6.263398 6.263398 6060 +systemspin 1 1 6.957497 6.957497 17113 +thatsupport 1 1 6.957497 6.957497 17114 +atruntim 1 1 6.957497 6.957497 17115 +accesshardwar 1 1 6.957497 6.957497 17116 +nooverhead 1 1 6.957497 6.957497 17117 +byrefer 1 1 6.957497 6.957497 17118 +systemservic 1 1 6.957497 6.957497 17119 +allextens 1 1 6.957497 6.957497 17120 +typesaf 1 1 6.957497 6.957497 17121 +oftypesafeti 1 1 6.957497 6.957497 17122 +attemptingto 1 1 6.957497 6.957497 17123 +writeboth 1 1 6.957497 6.957497 17124 +machinerun 1 1 6.957497 6.957497 17125 +withlow 1 1 6.957497 6.957497 17126 +executeit 1 1 6.957497 6.957497 17127 +protectedprocedur 1 1 6.957497 6.957497 17128 +overethernet 1 1 6.957497 6.957497 17129 +oldadapt 1 1 6.957497 6.957497 17130 +operationsund 1 1 6.957497 6.957497 17131 +samehardwar 1 1 6.957497 6.957497 17132 +saveyourself 1 1 6.957497 6.957497 17133 +invoc 1 1 6.957497 6.957497 17134 +andsimpl 1 1 6.957497 6.957497 17135 +interposit 1 1 6.957497 6.957497 17136 +raship 1 1 6.957497 6.957497 17137 +ourmascot 1 1 6.957497 6.957497 17138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ new file mode 100644 index 00000000..c3aad827 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^unisw^DynComp^www^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +program 2 374 0.693147 1.386294 7 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +time 5 293 1.098612 5.493060 17 +us 3 329 1.098612 3.295836 16 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +gener 2 220 1.386294 2.772588 27 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +washington 1 236 1.386294 1.386294 32 +paper 2 205 1.609438 3.218876 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +relat 1 139 1.945910 1.945910 68 +perform 1 143 1.945910 1.945910 74 +compil 14 122 2.079442 29.112188 96 +machin 2 129 2.079442 4.158884 95 +high 1 130 2.079442 2.079442 101 +code 5 108 2.197225 10.986125 116 +part 1 98 2.302585 2.302585 129 +section 1 94 2.397895 2.397895 149 +member 1 84 2.484907 2.484907 165 +activ 1 84 2.484907 2.484907 182 +start 1 83 2.484907 2.484907 173 +build 1 85 2.484907 2.484907 184 +second 1 81 2.484907 2.484907 166 +dynam 12 76 2.564949 30.779388 194 +optim 3 79 2.564949 7.694847 197 +exampl 1 77 2.564949 2.564949 195 +effici 1 73 2.639057 2.639057 233 +appli 1 71 2.639057 2.639057 226 +august 1 66 2.708050 2.708050 257 +copi 1 63 2.772589 2.772589 284 +automat 1 61 2.833213 2.833213 306 +simpl 1 60 2.833213 2.833213 298 +explor 1 58 2.890372 2.890372 324 +detail 1 57 2.890372 2.890372 321 +approach 1 48 3.044522 3.044522 366 +execut 1 45 3.135494 3.135494 404 +describ 1 45 3.135494 3.135494 400 +howev 2 41 3.218876 6.437752 422 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +prototyp 1 38 3.295837 3.295837 463 +cost 1 37 3.332205 3.332205 480 +purpos 1 37 3.332205 3.332205 481 +soon 1 36 3.367296 3.367296 494 +produc 2 30 3.555348 7.110696 572 +rang 1 30 3.555348 3.555348 565 +releas 1 28 3.610918 3.610918 616 +static 1 27 3.637586 3.637586 619 +enabl 1 26 3.688879 3.688879 655 +bound 1 26 3.688879 3.688879 659 +valu 3 25 3.737670 11.213010 665 +interpret 1 24 3.761200 3.761200 686 +variabl 1 23 3.806662 3.806662 715 +initi 1 23 3.806662 3.806662 717 +identifi 1 22 3.850148 3.850148 760 +annot 1 21 3.912023 3.912023 775 +kernel 1 20 3.951244 3.951244 825 +region 1 19 4.007333 4.007333 875 +spin 1 14 4.317488 4.317488 1121 +remov 1 12 4.465908 4.465908 1225 +target 1 12 4.465908 4.465908 1282 +grant 1 12 4.465908 4.465908 1216 +branch 1 11 4.553877 4.553877 1318 +loop 1 11 4.553877 4.553877 1310 +elimin 1 9 4.753590 4.753590 1558 +pair 1 9 4.753590 4.753590 1503 +analys 1 8 4.875197 4.875197 1666 +pldi 1 8 4.875197 4.875197 1704 +dispatch 1 7 5.010635 5.010635 1791 +constant 4 5 5.347108 21.388432 2251 +templat 1 5 5.347108 5.347108 2311 +dataflow 1 5 5.347108 5.347108 2390 +willb 1 5 5.347108 5.347108 2277 +spinproject 1 5 5.347108 5.347108 2570 +fold 1 4 5.568345 5.568345 2615 +fulli 1 4 5.568345 5.568345 2986 +theprogram 1 4 5.568345 5.568345 2686 +patch 1 4 5.568345 5.568345 2710 +imper 1 4 5.568345 5.568345 3067 +eventu 1 4 5.568345 5.568345 3074 +wewil 1 4 5.568345 5.568345 2688 +projectth 1 3 5.857933 5.857933 3344 +propag 1 3 5.857933 5.857933 3997 +dynamiccompil 1 3 5.857933 5.857933 3926 +optimizingcompil 1 2 6.263398 6.263398 4456 +projectmor 1 1 6.957497 6.957497 17139 +projectsuw 1 1 6.957497 6.957497 17140 +webdynam 1 1 6.957497 6.957497 17141 +ofinvari 1 1 6.957497 6.957497 17142 +theserun 1 1 6.957497 6.957497 17143 +memoryload 1 1 6.957497 6.957497 17144 +theydetermin 1 1 6.957497 6.957497 17145 +unrol 1 1 6.957497 6.957497 17146 +performancebenefit 1 1 6.957497 6.957497 17147 +offsetbi 1 1 6.957497 6.957497 17148 +strive 1 1 6.957497 6.957497 17149 +qualitydynam 1 1 6.957497 6.957497 17150 +thetempl 1 1 6.957497 6.957497 17151 +initialexperi 1 1 6.957497 6.957497 17152 +producedspeedup 1 1 6.957497 6.957497 17153 +dynamicallycompil 1 1 6.957497 6.957497 17154 +spinev 1 1 6.957497 6.957497 17155 +otherposs 1 1 6.957497 6.957497 17156 +invirtu 1 1 6.957497 6.957497 17157 +systemi 1 1 6.957497 6.957497 17158 +arenow 1 1 6.957497 6.957497 17159 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www new file mode 100644 index 00000000..2d6075fa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^weird^www @@ -0,0 +1,5 @@ +term, tf, in documents count, idf, tfidf, wordid +document 1 121 2.079442 2.079442 89 +move 2 47 3.091042 6.182084 382 +permanentlymov 1 12 4.465908 4.465908 1250 +permanentlyth 1 12 4.465908 4.465908 1251 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ new file mode 100644 index 00000000..a91e632b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^projects^zpl^ @@ -0,0 +1,94 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 11 374 0.693147 7.624617 7 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +project 5 340 1.098612 5.493060 18 +engin 2 297 1.098612 2.197224 20 +washington 3 236 1.386294 4.158882 32 +languag 3 227 1.386294 4.158882 26 +paper 2 205 1.609438 3.218876 38 +modifi 1 178 1.609438 1.609438 35 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +base 2 165 1.791759 3.583518 50 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +compil 3 122 2.079442 6.238326 96 +machin 2 129 2.079442 4.158884 95 +seattl 1 120 2.079442 2.079442 103 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +info 2 85 2.484907 4.969814 176 +level 2 87 2.484907 4.969814 180 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +member 1 84 2.484907 2.484907 165 +help 1 83 2.484907 2.484907 175 +write 1 72 2.639057 2.639057 222 +line 1 75 2.639057 2.639057 231 +would 1 67 2.708050 2.708050 251 +written 2 63 2.772589 5.545178 278 +descript 1 64 2.772589 2.772589 271 +direct 3 57 2.890372 8.671116 316 +overview 2 56 2.890372 5.780744 323 +special 1 56 2.890372 2.890372 320 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +detail 1 57 2.890372 2.890372 321 +scientif 2 53 2.944439 5.888878 341 +sampl 1 53 2.944439 2.944439 339 +without 2 50 3.044522 6.089044 370 +right 1 48 3.044522 3.044522 363 +fast 1 42 3.218876 3.218876 429 +futur 1 41 3.218876 3.218876 427 +error 1 40 3.258097 3.258097 449 +small 1 39 3.258097 3.258097 447 +programm 1 39 3.258097 3.258097 445 +manual 1 35 3.401197 3.401197 504 +concept 2 32 3.465736 6.931472 537 +independ 1 32 3.465736 3.465736 548 +scientist 1 31 3.496508 3.496508 560 +autumn 1 31 3.496508 3.496508 558 +arrai 2 27 3.637586 7.275172 627 +higher 1 24 3.761200 3.761200 690 +flow 1 24 3.761200 3.761200 700 +sequenti 1 22 3.850148 3.850148 745 +minut 1 20 3.951244 3.951244 810 +region 1 19 4.007333 4.007333 875 +previous 1 17 4.110874 4.110874 923 +modif 1 17 4.110874 4.110874 913 +fortran 1 15 4.248495 4.248495 1027 +easili 2 14 4.317488 8.634976 1077 +necessari 1 13 4.382027 4.382027 1147 +walk 2 12 4.465908 8.931816 1281 +loop 1 11 4.553877 4.553877 1310 +typic 1 11 4.553877 4.553877 1360 +suitabl 1 9 4.753590 4.753590 1486 +elimin 1 9 4.753590 4.753590 1558 +ideal 1 8 4.875197 4.875197 1630 +understood 1 5 5.347108 5.347108 2364 +enrol 1 4 5.568345 5.568345 2613 +tediou 1 3 5.857933 5.857933 3731 +shorter 1 3 5.857933 5.857933 3998 +conclus 1 3 5.857933 5.857933 3367 +horizon 1 3 5.857933 5.857933 3746 +border 2 2 6.263398 12.526796 4980 +prone 1 2 6.263398 6.263398 5178 +shouldconsid 1 2 6.263398 6.263398 6061 +acknowledg 1 2 6.263398 6.263398 6062 +eduzpl 1 1 6.957497 6.957497 17160 +recompil 1 1 6.957497 6.957497 17161 +shatter 1 1 6.957497 6.957497 17162 +yourmachin 1 1 6.957497 6.957497 17163 +zpthi 1 1 6.957497 6.957497 17164 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^smt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^smt^ new file mode 100644 index 00000000..63704492 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.washington.edu^research^smt^ @@ -0,0 +1,150 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +work 1 380 0.693147 0.693147 9 +student 3 343 1.098612 3.295836 19 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +public 2 202 1.609438 3.218876 43 +includ 1 208 1.609438 1.609438 42 +list 1 201 1.609438 1.609438 39 +parallel 6 169 1.791759 10.750554 60 +avail 2 169 1.791759 3.583518 48 +implement 1 152 1.791759 1.791759 52 +architectur 4 139 1.945910 7.783640 77 +problem 2 147 1.945910 3.891820 75 +perform 2 143 1.945910 3.891820 74 +first 1 140 1.945910 1.945910 71 +postscript 4 131 2.079442 8.317768 90 +compil 3 122 2.079442 6.238326 96 +high 2 130 2.079442 4.158884 101 +studi 1 120 2.079442 2.079442 91 +intern 2 108 2.197225 4.394450 128 +check 1 115 2.197225 2.197225 118 +techniqu 2 99 2.302585 4.605170 138 +memori 1 101 2.302585 2.302585 139 +proceed 3 93 2.397895 7.193685 152 +level 4 87 2.484907 9.939628 180 +resourc 2 81 2.484907 4.969814 172 +wide 1 84 2.484907 2.484907 185 +activ 1 84 2.484907 2.484907 182 +issu 5 78 2.564949 12.824745 211 +dynam 1 76 2.564949 2.564949 194 +june 1 79 2.564949 2.564949 214 +symposium 2 72 2.639057 5.278114 238 +workshop 1 71 2.639057 2.639057 239 +differ 1 66 2.708050 2.708050 253 +abstract 3 62 2.772589 8.317767 276 +function 1 62 2.772589 2.772589 275 +januari 1 62 2.772589 2.772589 264 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +processor 10 54 2.944439 29.444390 335 +instruct 7 53 2.944439 20.611073 332 +extens 1 53 2.944439 2.944439 340 +maintain 2 51 2.995732 5.991464 342 +hardwar 1 51 2.995732 2.995732 350 +investig 1 51 2.995732 2.995732 353 +digit 1 52 2.995732 2.995732 348 +still 1 50 3.044522 3.044522 362 +effect 1 46 3.091042 3.091042 385 +execut 1 45 3.135494 3.135494 404 +long 1 43 3.178054 3.178054 413 +combin 1 42 3.218876 3.218876 421 +futur 1 41 3.218876 3.218876 427 +fast 1 42 3.218876 3.218876 429 +multipl 3 39 3.258097 9.774291 453 +annual 2 40 3.258097 6.516194 458 +submit 1 39 3.258097 3.258097 440 +singl 3 34 3.401197 10.203591 510 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +collabor 1 32 3.465736 3.465736 543 +limit 1 29 3.583519 3.583519 585 +though 1 27 3.637586 3.637586 622 +enabl 2 26 3.688879 7.377758 655 +todai 1 25 3.737670 3.737670 672 +thread 3 23 3.806662 11.419986 722 +util 2 21 3.912023 7.824046 774 +unit 1 21 3.912023 3.912023 779 +corpor 1 21 3.912023 3.912023 802 +chip 1 21 3.912023 3.912023 770 +increas 1 20 3.951244 3.951244 829 +exploit 1 20 3.951244 3.951244 836 +speed 1 18 4.060443 4.060443 911 +minim 1 18 4.060443 4.060443 887 +ultim 1 17 4.110874 4.110874 943 +stanford 1 17 4.110874 4.110874 955 +latenc 2 16 4.174387 8.348774 993 +modern 2 16 4.174387 8.348774 966 +permit 1 16 4.174387 4.174387 962 +choic 1 16 4.174387 4.174387 979 +susan 1 15 4.248495 4.248495 1050 +levi 4 14 4.317488 17.269952 1093 +shown 1 14 4.317488 4.317488 1080 +conduct 1 14 4.317488 4.317488 1065 +dean 1 14 4.317488 4.317488 1104 +convert 1 13 4.382027 4.382027 1122 +amount 1 12 4.465908 4.465908 1208 +hank 1 12 4.465908 4.465908 1253 +philadelphia 1 12 4.465908 4.465908 1244 +multithread 11 11 4.553877 50.092647 1315 +cycl 5 11 4.553877 22.769385 1335 +itali 1 11 4.553877 4.553877 1378 +equip 1 10 4.653960 4.653960 1459 +santa 1 10 4.653960 4.653960 1441 +face 2 9 4.753590 9.507180 1501 +significantli 1 9 4.753590 4.753590 1508 +egger 5 8 4.875197 24.375985 1695 +jack 2 8 4.875197 9.750394 1780 +gain 1 8 4.875197 4.875197 1730 +joel 1 8 4.875197 4.875197 1698 +microprocessor 1 7 5.010635 5.010635 1808 +maxim 1 7 5.010635 5.010635 1944 +simultan 9 6 5.164786 46.483074 2155 +tullsen 4 6 5.164786 20.659144 2081 +superscalar 3 6 5.164786 15.494358 2082 +multiprogram 1 6 5.164786 5.164786 2010 +rebecca 1 6 5.164786 5.164786 2174 +crucial 1 5 5.347108 5.347108 2384 +compet 1 5 5.347108 5.347108 2462 +fetch 1 5 5.347108 5.347108 2567 +hide 1 4 5.568345 5.568345 2996 +throughput 1 4 5.568345 5.568345 2993 +emer 3 3 5.857933 17.573799 3969 +stamm 3 3 5.857933 17.573799 3970 +allevi 1 3 5.857933 5.857933 3643 +interchang 1 3 5.857933 5.857933 3893 +peoplefaculti 1 3 5.857933 5.857933 3981 +affair 1 3 5.857933 5.857933 3916 +andd 2 2 6.263398 12.526796 4346 +suif 1 2 6.263398 6.263398 5944 +lojlo 1 2 6.263398 6.263398 5943 +pagesimultan 1 1 6.957497 6.957497 17165 +projectoverviewpeoplepubl 1 1 6.957497 6.957497 17166 +overviewth 1 1 6.957497 6.957497 17167 +interleav 1 1 6.957497 6.957497 17168 +differentthread 1 1 6.957497 6.957497 17169 +issuefeatur 1 1 6.957497 6.957497 17170 +abilityof 1 1 6.957497 6.957497 17171 +contextsar 1 1 6.957497 6.957497 17172 +exploitthread 1 1 6.957497 6.957497 17173 +formsof 1 1 6.957497 6.957497 17174 +havedemonstr 1 1 6.957497 6.957497 17175 +improvesprocessor 1 1 6.957497 6.957497 17176 +parallelworkload 1 1 6.957497 6.957497 17177 +achievedin 1 1 6.957497 6.957497 17178 +ordersuperscalar 1 1 6.957497 6.957497 17179 +synchronizationtechniqu 1 1 6.957497 6.957497 17180 +otherarchitectur 1 1 6.957497 6.957497 17181 +levygradu 1 1 6.957497 6.957497 17182 +tullsenindustri 1 1 6.957497 6.957497 17183 +andh 1 1 6.957497 6.957497 17184 +margherita 1 1 6.957497 6.957497 17185 +ligur 1 1 6.957497 6.957497 17186 +doon 1 1 6.957497 6.957497 17187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu new file mode 100644 index 00000000..1485955c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 8 640 0.000000 0.000000 4 +comput 7 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +depart 5 457 0.693147 3.465735 12 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +fall 1 181 1.609438 1.609438 40 +class 1 199 1.609438 1.609438 37 +develop 1 174 1.791759 1.791759 53 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +report 2 131 2.079442 4.158884 92 +dayton 1 119 2.079442 2.079442 104 +peopl 1 96 2.302585 2.302585 132 +technic 1 100 2.302585 2.302585 140 +question 1 91 2.397895 2.397895 141 +member 1 84 2.484907 2.484907 165 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +receiv 1 66 2.708050 2.708050 244 +organ 1 65 2.772589 2.772589 265 +faculti 3 56 2.890372 8.671116 325 +three 2 54 2.944439 5.888878 330 +undergradu 1 54 2.944439 2.944439 338 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +frequent 1 49 3.044522 3.044522 367 +answer 1 45 3.135494 3.135494 391 +offer 1 43 3.178054 3.178054 414 +futur 1 41 3.218876 3.218876 427 +form 1 39 3.258097 3.258097 443 +annual 1 40 3.258097 3.258097 458 +streetmadison 1 38 3.295837 3.295837 474 +award 5 34 3.401197 17.005985 523 +statist 2 35 3.401197 6.802394 521 +dissert 1 32 3.465736 3.465736 549 +scientist 1 31 3.496508 3.496508 560 +ask 1 28 3.610918 3.610918 597 +consist 1 26 3.688879 3.688879 651 +doctor 1 24 3.761200 3.761200 709 +departmentunivers 1 24 3.761200 3.761200 711 +alumni 1 21 3.912023 3.912023 807 +util 1 21 3.912023 3.912023 774 +voic 1 21 3.912023 3.912023 806 +excel 1 19 4.007333 4.007333 868 +young 1 16 4.174387 4.174387 991 +women 1 16 4.174387 4.174387 1004 +countri 1 15 4.248495 4.248495 1059 +rank 1 14 4.317488 4.317488 1063 +packard 1 10 4.653960 4.653960 1444 +fellowship 1 10 4.653960 4.653960 1460 +presidenti 1 8 4.875197 4.875197 1737 +pagecomput 1 7 5.010635 5.010635 1900 +timet 1 3 5.857933 5.857933 3471 +guidebook 2 2 6.263398 12.526796 4643 +departmentabout 1 1 6.957497 6.957497 17188 +departmentour 1 1 6.957497 6.957497 17189 +fourteen 1 1 6.957497 6.957497 17190 +incent 1 1 6.957497 6.957497 17191 +colophon 1 1 6.957497 6.957497 17192 +infocomput 1 1 6.957497 6.957497 17193 +madisona 1 1 6.957497 6.957497 17194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html new file mode 100644 index 00000000..071a1507 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^exploration.html @@ -0,0 +1,115 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +base 2 165 1.791759 3.583518 50 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +object 4 138 1.945910 7.783640 79 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +first 1 140 1.945910 1.945910 71 +provid 1 121 2.079442 2.079442 94 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +control 6 82 2.484907 14.909442 164 +activ 1 84 2.484907 2.484907 182 +requir 1 81 2.484907 2.484907 167 +second 1 81 2.484907 2.484907 166 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +order 1 69 2.708050 2.708050 249 +view 1 70 2.708050 2.708050 254 +complex 1 64 2.772589 2.772589 269 +simpl 2 60 2.833213 5.666426 298 +point 5 58 2.890372 14.451860 319 +explor 4 58 2.890372 11.561488 324 +special 1 56 2.890372 2.890372 320 +direct 1 57 2.890372 2.890372 316 +local 3 55 2.944439 8.833317 334 +suggest 1 53 2.944439 2.944439 331 +maintain 1 51 2.995732 2.995732 342 +approach 2 48 3.044522 6.089044 366 +show 2 43 3.178054 6.356108 417 +combin 1 42 3.218876 3.218876 421 +correct 2 38 3.295837 6.591674 462 +slide 1 38 3.295837 3.295837 467 +purpos 2 37 3.332205 6.664410 481 +connect 1 37 3.332205 3.332205 485 +global 3 34 3.401197 10.203591 520 +either 1 35 3.401197 3.401197 506 +posit 1 31 3.496508 3.496508 552 +exist 1 30 3.555348 3.555348 569 +consid 2 29 3.583519 7.167038 590 +focus 1 29 3.583519 3.583519 584 +task 8 25 3.737670 29.901360 678 +strategi 1 25 3.737670 3.737670 682 +motion 1 24 3.761200 3.761200 699 +other 1 24 3.761200 3.761200 697 +reach 1 24 3.761200 3.761200 688 +frame 1 24 3.761200 3.761200 684 +decis 1 23 3.806662 3.806662 728 +mobil 1 23 3.806662 3.806662 730 +lead 1 23 3.806662 3.806662 718 +defin 1 22 3.850148 3.850148 746 +navig 2 21 3.912023 7.824046 796 +avoid 1 21 3.912023 3.912023 799 +region 1 19 4.007333 4.007333 875 +geometr 1 19 4.007333 4.007333 852 +behavior 4 18 4.060443 16.241772 881 +minim 1 18 4.060443 4.060443 887 +attempt 1 17 4.110874 4.110874 917 +scene 3 14 4.317488 12.952464 1114 +achiev 3 14 4.317488 12.952464 1088 +chuck 1 14 4.317488 4.317488 1108 +consider 1 14 4.317488 4.317488 1076 +deriv 1 13 4.382027 4.382027 1145 +emploi 1 12 4.465908 4.465908 1284 +shape 1 12 4.465908 4.465908 1245 +abil 2 11 4.553877 9.107754 1341 +arbitrari 1 11 4.553877 4.553877 1359 +princip 1 10 4.653960 4.653960 1397 +relationship 1 10 4.653960 4.653960 1383 +observ 6 9 4.753590 28.521540 1578 +surfac 3 9 4.753590 14.260770 1574 +recoveri 2 9 4.753590 9.507180 1474 +dyer 1 9 4.753590 4.753590 1573 +formul 1 8 4.875197 4.875197 1733 +maxim 1 7 5.010635 5.010635 1944 +smooth 1 7 5.010635 5.010635 1855 +viewpoint 3 6 5.164786 15.494358 2116 +reconstruct 2 6 5.164786 10.329572 2170 +recov 1 6 5.164786 5.164786 2235 +provabl 3 5 5.347108 16.041324 2558 +align 1 4 5.568345 5.568345 2863 +visibl 1 4 5.568345 5.568345 2994 +simplifi 1 4 5.568345 5.568345 3066 +kyro 1 2 6.263398 6.263398 6063 +kutulako 1 2 6.263398 6.263398 6064 +descriptionof 1 2 6.263398 6.263398 5513 +thequalit 1 2 6.263398 6.263398 5622 +smoothli 3 1 6.957497 20.872491 17195 +simpleobserv 1 1 6.957497 6.957497 17196 +propertieseasi 1 1 6.957497 6.957497 17197 +fixat 1 1 6.957497 6.957497 17198 +toperform 1 1 6.957497 6.957497 17199 +obstacl 1 1 6.957497 6.957497 17200 +ourwork 1 1 6.957497 6.957497 17201 +pointof 1 1 6.957497 6.957497 17202 +makesimpl 1 1 6.957497 6.957497 17203 +geometryof 1 1 6.957497 6.957497 17204 +thesurfac 1 1 6.957497 6.957497 17205 +generalobserv 1 1 6.957497 6.957497 17206 +objectthan 1 1 6.957497 6.957497 17207 +beexploit 1 1 6.957497 6.957497 17208 +anddeterminist 1 1 6.957497 6.957497 17209 +localshap 1 1 6.957497 6.957497 17210 +qualitativestrategi 1 1 6.957497 6.957497 17211 +viewingdirect 1 1 6.957497 6.957497 17212 +selectedpoint 1 1 6.957497 6.957497 17213 +observationso 1 1 6.957497 6.957497 17214 +observationand 1 1 6.957497 6.957497 17215 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html new file mode 100644 index 00000000..d06af089 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^snakes.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +us 1 329 1.098612 1.098612 16 +gener 1 220 1.386294 1.386294 27 +softwar 1 220 1.386294 1.386294 30 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +distribut 1 162 1.791759 1.791759 51 +avail 1 169 1.791759 1.791759 48 +model 8 145 1.945910 15.567280 69 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +studi 1 120 2.079442 2.079442 91 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +imag 2 91 2.397895 4.795790 161 +contain 1 81 2.484907 2.484907 174 +activ 1 84 2.484907 2.484907 182 +level 1 87 2.484907 2.484907 180 +optim 1 79 2.564949 2.564949 197 +integr 2 67 2.708050 5.416100 245 +practic 1 70 2.708050 2.708050 246 +function 1 62 2.772589 2.772589 275 +automat 1 61 2.833213 2.833213 306 +special 1 56 2.890372 2.890372 320 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +case 2 51 2.995732 5.991464 351 +approach 1 48 3.044522 3.044522 366 +principl 1 48 3.044522 3.044522 357 +visual 1 48 3.044522 3.044522 372 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +field 1 37 3.332205 3.332205 482 +random 1 34 3.401197 3.401197 511 +global 1 34 3.401197 3.401197 520 +transform 1 32 3.465736 3.465736 542 +consid 2 29 3.583519 7.167038 590 +turn 2 29 3.583519 7.167038 586 +framework 1 28 3.610918 3.610918 606 +determin 1 27 3.637586 3.637586 630 +detect 5 26 3.688879 18.444395 646 +experiment 1 26 3.688879 3.688879 645 +valu 1 25 3.737670 3.737670 665 +task 1 25 3.737670 3.737670 678 +initi 1 23 3.806662 3.806662 717 +recognit 1 23 3.806662 3.806662 723 +region 1 19 4.007333 4.007333 875 +along 1 18 4.060443 4.060443 878 +minim 1 18 4.060443 4.060443 887 +lower 1 18 4.060443 4.060443 886 +regular 2 17 4.110874 8.221748 929 +estim 1 17 4.110874 4.110874 930 +conduct 1 14 4.317488 4.317488 1065 +deriv 2 13 4.382027 8.764054 1145 +directli 1 13 4.382027 4.382027 1141 +arbitrari 2 11 4.553877 9.107754 1359 +valid 1 11 4.553877 4.553877 1299 +classif 3 9 4.753590 14.260770 1586 +classifi 1 9 4.753590 4.753590 1537 +equival 1 9 4.753590 4.753590 1496 +extract 5 8 4.875197 24.375985 1728 +formul 3 8 4.875197 14.625591 1733 +invari 1 8 4.875197 4.875197 1748 +furthermor 1 6 5.164786 5.164786 2141 +snake 2 5 5.347108 10.694216 2281 +yield 2 5 5.347108 10.694216 2458 +chin 1 5 5.347108 5.347108 2408 +stabl 1 5 5.347108 5.347108 2309 +markov 1 5 5.347108 5.347108 2280 +contour 9 4 5.568345 50.115105 2812 +subsequ 1 4 5.568345 5.568345 2665 +bayesian 1 4 5.568345 5.568345 2671 +rigor 1 4 5.568345 5.568345 3030 +energi 2 3 5.857933 11.715866 3950 +implicitli 1 3 5.857933 5.857933 3620 +hough 1 3 5.857933 5.857933 3527 +influenc 1 3 5.857933 5.857933 3349 +deform 5 2 6.263398 31.316990 6065 +criterion 1 2 6.263398 6.263398 5885 +pearson 1 2 6.263398 6.263398 5245 +summat 1 2 6.263398 6.263398 5325 +peak 1 2 6.263398 6.263398 5553 +confirm 1 2 6.263398 6.263398 4101 +noisi 2 1 6.957497 13.914994 17216 +fung 1 1 6.957497 6.957497 17217 +roland 1 1 6.957497 6.957497 17218 +ofact 1 1 6.957497 6.957497 17219 +minimax 1 1 6.957497 6.957497 17220 +wherebi 1 1 6.957497 6.957497 17221 +anduniqu 1 1 6.957497 6.957497 17222 +priordistribut 1 1 6.957497 6.957497 17223 +exert 1 1 6.957497 6.957497 17224 +posterior 1 1 6.957497 6.957497 17225 +withpattern 1 1 6.957497 6.957497 17226 +nearman 1 1 6.957497 6.957497 17227 +lemma 1 1 6.957497 6.957497 17228 +classificationtest 1 1 6.957497 6.957497 17229 +margin 1 1 6.957497 6.957497 17230 +gsnake 1 1 6.957497 6.957497 17231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html new file mode 100644 index 00000000..7feedca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^computer-vision^projects^visad.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +system 6 443 0.693147 4.158882 6 +program 6 374 0.693147 4.158882 7 +inform 2 412 0.693147 1.386294 8 +time 5 293 1.098612 5.493060 17 +us 3 329 1.098612 3.295836 16 +languag 5 227 1.386294 6.931470 26 +design 2 213 1.386294 2.772588 25 +link 1 247 1.386294 1.386294 24 +class 2 199 1.609438 3.218876 37 +data 30 170 1.791759 53.752770 49 +base 9 165 1.791759 16.125831 50 +implement 3 152 1.791759 5.375277 52 +develop 3 174 1.791759 5.375277 53 +algorithm 1 162 1.791759 1.791759 57 +object 22 138 1.945910 42.810020 79 +model 6 145 1.945910 11.675460 69 +relat 3 139 1.945910 5.837730 68 +process 2 142 1.945910 3.891820 72 +construct 1 139 1.945910 1.945910 82 +number 2 130 2.079442 4.158884 97 +provid 1 121 2.079442 2.079442 94 +mathemat 4 108 2.197225 8.788900 123 +place 1 106 2.197225 2.197225 124 +specif 1 106 2.197225 2.197225 106 +take 1 97 2.302585 2.302585 134 +user 1 104 2.302585 2.302585 137 +call 4 91 2.397895 9.591580 153 +graphic 2 90 2.397895 4.795790 147 +follow 1 92 2.397895 2.397895 143 +real 1 93 2.397895 2.397895 144 +commun 1 95 2.397895 2.397895 157 +contain 4 81 2.484907 9.939628 174 +control 2 82 2.484907 4.969814 164 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +complet 1 77 2.564949 2.564949 208 +exampl 1 77 2.564949 2.564949 195 +interfac 1 79 2.564949 2.564949 209 +orient 1 80 2.564949 2.564949 205 +order 3 69 2.708050 8.124150 249 +function 6 62 2.772589 16.635534 275 +foundat 2 62 2.772589 5.545178 286 +abstract 2 62 2.772589 5.545178 276 +guid 1 63 2.772589 2.772589 267 +experi 1 64 2.772589 2.772589 283 +creat 1 63 2.772589 2.772589 277 +complex 1 64 2.772589 2.772589 269 +type 5 61 2.833213 14.166065 296 +special 1 56 2.890372 2.890372 320 +scientif 2 53 2.944439 5.888878 341 +allow 1 53 2.944439 2.944439 333 +particular 4 51 2.995732 11.982928 352 +visual 9 48 3.044522 27.400698 372 +principl 1 48 3.044522 3.044522 357 +possibl 1 47 3.091042 3.091042 378 +natur 1 44 3.135494 3.135494 406 +anoth 1 45 3.135494 3.135494 408 +show 2 43 3.178054 6.356108 417 +howev 1 41 3.218876 3.218876 422 +map 3 39 3.258097 9.774291 452 +brian 1 38 3.295837 3.295837 466 +paul 1 38 3.295837 3.295837 471 +prototyp 1 38 3.295837 3.295837 463 +close 1 38 3.295837 3.295837 465 +purpos 3 37 3.332205 9.996615 481 +tree 1 36 3.367296 3.367296 492 +approxim 4 35 3.401197 13.604788 509 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +express 1 32 3.465736 3.465736 540 +scientist 2 31 3.496508 6.993016 560 +anim 1 31 3.496508 3.496508 557 +specifi 2 30 3.555348 7.110696 568 +domain 1 30 3.555348 3.555348 564 +graph 1 30 3.555348 3.555348 576 +built 1 29 3.583519 3.583519 592 +arrai 3 27 3.637586 10.912758 627 +quit 1 27 3.637586 3.637586 633 +repres 1 26 3.688879 3.688879 656 +altern 1 26 3.688879 3.688879 641 +fundament 1 25 3.737670 3.737670 661 +frame 1 24 3.761200 3.761200 684 +interpret 1 24 3.761200 3.761200 686 +seri 1 24 3.761200 3.761200 708 +flow 1 24 3.761200 3.761200 700 +displai 21 23 3.806662 79.939902 712 +variabl 2 23 3.806662 7.613324 715 +sequenc 1 23 3.806662 3.806662 734 +size 1 23 3.806662 3.806662 713 +defin 13 22 3.850148 50.051924 746 +color 2 22 3.850148 7.700296 762 +thu 3 21 3.912023 11.736069 773 +fact 2 21 3.912023 7.824046 780 +assum 1 19 4.007333 4.007333 845 +appropri 2 18 4.060443 8.120886 883 +along 2 18 4.060443 8.120886 878 +render 1 17 4.110874 4.110874 947 +condit 3 16 4.174387 12.523161 975 +upon 1 16 4.174387 4.174387 978 +alreadi 1 16 4.174387 4.174387 963 +precis 3 15 4.248495 12.745485 1023 +finit 4 14 4.317488 17.269952 1106 +chuck 1 14 4.317488 4.317488 1108 +context 1 13 4.382027 4.382027 1153 +recurs 1 13 4.382027 4.382027 1127 +amount 2 12 4.465908 8.931816 1208 +primit 4 11 4.553877 18.215508 1317 +bill 1 11 4.553877 4.553877 1297 +sens 1 11 4.553877 4.553877 1305 +volum 1 11 4.553877 4.553877 1347 +relationship 1 10 4.653960 4.653960 1383 +dyer 1 9 4.753590 4.753590 1573 +assumpt 1 9 4.753590 4.753590 1514 +ideal 2 8 4.875197 9.750394 1630 +satisfi 2 8 4.875197 9.750394 1694 +therefor 1 7 5.010635 5.010635 1822 +fromth 1 7 5.010635 5.010635 1802 +pipelin 1 7 5.010635 5.010635 1830 +analyt 1 7 5.010635 5.010635 1913 +consequ 1 6 5.164786 5.164786 1989 +tupl 1 5 5.347108 5.347108 2244 +steer 1 5 5.347108 5.347108 2328 +infinit 2 4 5.568345 11.136690 2596 +pixel 2 4 5.568345 11.136690 2831 +wherea 1 4 5.568345 5.568345 2597 +encod 1 4 5.568345 5.568345 2929 +rigor 1 4 5.568345 5.568345 3030 +fora 1 4 5.568345 5.568345 2697 +lattic 13 3 5.857933 76.153129 3721 +interfacefor 1 3 5.857933 5.857933 3534 +scalar 6 2 6.263398 37.580388 4815 +temperatur 4 2 6.263398 25.053592 5985 +ofdata 2 2 6.263398 12.526796 6038 +hibbard 1 2 6.263398 6.263398 6066 +theidea 1 2 6.263398 6.263398 5428 +themathemat 1 2 6.263398 6.263398 4421 +isomorph 1 2 6.263398 6.263398 5976 +scientificdata 1 2 6.263398 6.263398 6067 +radianc 1 2 6.263398 6.263398 6068 +ofcours 1 2 6.263398 6.263398 4064 +axi 1 2 6.263398 6.263398 6069 +remark 1 2 6.263398 6.263398 4124 +wedo 1 2 6.263398 6.263398 5772 +datatyp 1 2 6.263398 6.263398 4129 +expressivenesscondit 4 1 6.957497 27.829988 17232 +voxel 3 1 6.957497 20.872491 17233 +calleda 1 1 6.957497 6.957497 17234 +adha 1 1 6.957497 6.957497 17235 +objectsrepres 1 1 6.957497 6.957497 17236 +objectsfrequ 1 1 6.957497 6.957497 17237 +functionswith 1 1 6.957497 6.957497 17238 +containfinit 1 1 6.957497 6.957497 17239 +chosenfrom 1 1 6.957497 6.957497 17240 +palett 1 1 6.957497 6.957497 17241 +numbersof 1 1 6.957497 6.957497 17242 +computationalmodel 1 1 6.957497 6.957497 17243 +informationcont 1 1 6.957497 6.957497 17244 +thatdisplai 1 1 6.957497 6.957497 17245 +onlythos 1 1 6.957497 6.957497 17246 +itimpl 1 1 6.957497 6.957497 17247 +satisfyingth 1 1 6.957497 6.957497 17248 +expressivenss 1 1 6.957497 6.957497 17249 +onhow 1 1 6.957497 6.957497 17250 +wecan 1 1 6.957497 6.957497 17251 +howprecis 1 1 6.957497 6.957497 17252 +voxelresolut 1 1 6.957497 6.957497 17253 +visualizationprocess 1 1 6.957497 6.957497 17254 +objectsto 1 1 6.957497 6.957497 17255 +theexpress 1 1 6.957497 6.957497 17256 +primitivevari 1 1 6.957497 6.957497 17257 +latitud 1 1 6.957497 6.957497 17258 +constructor 1 1 6.957497 6.957497 17259 +appropriatefor 1 1 6.957497 6.957497 17260 +containsth 1 1 6.957497 6.957497 17261 +canalso 1 1 6.957497 6.957497 17262 +displayi 1 1 6.957497 6.957497 17263 +graphicsprimit 1 1 6.957497 6.957497 17264 +locationand 1 1 6.957497 6.957497 17265 +animationsequ 1 1 6.957497 6.957497 17266 +thedisplai 1 1 6.957497 6.957497 17267 +isnatur 1 1 6.957497 6.957497 17268 +andtemperatur 1 1 6.957497 6.957497 17269 +calledvi 1 1 6.957497 6.957497 17270 +adthat 1 1 6.957497 6.957497 17271 +theircomput 1 1 6.957497 6.957497 17272 +theirprogram 1 1 6.957497 6.957497 17273 +thevi 1 1 6.957497 6.957497 17274 +vvof 1 1 6.957497 6.957497 17275 +thatsatisfi 1 1 6.957497 6.957497 17276 +implementationi 1 1 6.957497 6.957497 17277 +auser 1 1 6.957497 6.957497 17278 +abstractionof 1 1 6.957497 6.957497 17279 +ofmap 1 1 6.957497 6.957497 17280 +defineddata 1 1 6.957497 6.957497 17281 +ingener 1 1 6.957497 6.957497 17282 +usualapproach 1 1 6.957497 6.957497 17283 +bywrit 1 1 6.957497 6.957497 17284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^ new file mode 100644 index 00000000..7ae296c3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^ @@ -0,0 +1,53 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 3 340 1.098612 3.295836 18 +engin 1 297 1.098612 1.098612 20 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +object 1 138 1.945910 1.945910 79 +support 1 132 1.945910 1.945910 83 +high 2 130 2.079442 4.158884 101 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +world 1 115 2.197225 2.197225 126 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +resourc 1 81 2.484907 2.484907 172 +environ 1 84 2.484907 2.484907 177 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +help 1 83 2.484907 2.484907 175 +goal 1 66 2.708050 2.708050 250 +polici 1 64 2.772589 2.772589 279 +collect 1 65 2.772589 2.772589 268 +guid 1 63 2.772589 2.772589 267 +scientist 1 31 3.496508 3.496508 560 +team 1 27 3.637586 3.637586 625 +challeng 1 26 3.688879 3.688879 653 +enabl 1 26 3.688879 3.688879 655 +increas 1 20 3.951244 3.951244 829 +edulast 1 17 4.110874 4.110874 927 +admin 1 9 4.753590 4.753590 1476 +pool 1 6 5.164786 5.164786 2225 +condor 7 5 5.347108 37.429756 2577 +own 1 5 5.347108 5.347108 2531 +throughput 3 4 5.568345 16.705035 2993 +deploi 1 3 5.857933 5.857933 3750 +evaluatemechan 1 1 6.957497 6.957497 17285 +technologicaland 1 1 6.957497 6.957497 17286 +sociolog 1 1 6.957497 6.957497 17287 +suggestionscondor 1 1 6.957497 6.957497 17288 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html new file mode 100644 index 00000000..ca5d980f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^condor^NEXT.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +peopl 1 96 2.302585 2.302585 132 +homepag 1 93 2.397895 2.397895 148 +septemb 1 65 2.772589 2.772589 274 +next 3 34 3.401197 10.203591 517 +miron 1 14 4.317488 4.317488 1110 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^coral^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^coral^ new file mode 100644 index 00000000..4a2aceb8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^coral^ @@ -0,0 +1,171 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 1 571 0.000000 0.000000 5 +system 4 443 0.693147 2.772588 6 +inform 4 412 0.693147 2.772588 8 +program 3 374 0.693147 2.079441 7 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +also 5 259 1.386294 6.931470 28 +languag 2 227 1.386294 2.772588 26 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +gener 1 220 1.386294 1.386294 27 +includ 7 208 1.609438 11.266066 42 +group 2 183 1.609438 3.218876 36 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +develop 2 174 1.791759 3.583518 53 +data 2 170 1.791759 3.583518 49 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +support 4 132 1.945910 7.783640 83 +relat 2 139 1.945910 3.891820 68 +file 2 132 1.945910 3.891820 70 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +provid 4 121 2.079442 8.317768 94 +databas 3 122 2.079442 6.238326 86 +high 1 130 2.079442 2.079442 101 +compil 1 122 2.079442 2.079442 96 +machin 1 129 2.079442 2.079442 95 +report 1 131 2.079442 2.079442 92 +version 3 113 2.197225 6.591675 122 +structur 1 106 2.197225 2.197225 105 +manag 1 114 2.197225 2.197225 125 +code 1 108 2.197225 2.197225 116 +send 1 114 2.197225 2.197225 109 +user 2 104 2.302585 4.605170 137 +memori 1 101 2.302585 2.302585 139 +octob 2 89 2.397895 4.795790 156 +sinc 1 90 2.397895 2.397895 159 +select 1 91 2.397895 2.397895 154 +question 1 91 2.397895 2.397895 141 +comment 1 93 2.397895 2.397895 146 +contain 2 81 2.484907 4.969814 174 +wide 2 84 2.484907 4.969814 185 +member 1 84 2.484907 2.484907 165 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +environ 1 84 2.484907 2.484907 177 +interfac 2 79 2.564949 5.129898 209 +optim 1 79 2.564949 2.564949 197 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +sourc 1 77 2.564949 2.564949 201 +effici 1 73 2.639057 2.639057 233 +addit 1 74 2.639057 2.639057 228 +degre 1 69 2.708050 2.708050 259 +main 1 67 2.708050 2.708050 256 +evalu 2 64 2.772589 5.545178 266 +complex 1 64 2.772589 2.772589 269 +organ 1 65 2.772589 2.772589 265 +collect 1 65 2.772589 2.772589 268 +interact 1 62 2.772589 2.772589 270 +guid 1 63 2.772589 2.772589 267 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +variou 1 56 2.890372 2.890372 317 +sever 1 56 2.890372 2.890372 322 +overview 1 56 2.890372 2.890372 323 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +instruct 1 53 2.944439 2.944439 332 +investig 1 51 2.995732 2.995732 353 +made 1 44 3.135494 3.135494 398 +term 1 43 3.178054 3.178054 411 +combin 2 42 3.218876 6.437752 421 +announc 2 40 3.258097 6.516194 441 +programm 1 39 3.258097 3.258097 445 +transact 1 39 3.258097 3.258097 438 +submit 1 39 3.258097 3.258097 440 +manual 1 35 3.401197 3.401197 504 +queri 2 33 3.433987 6.867974 524 +extend 1 32 3.465736 3.465736 539 +rang 2 30 3.555348 7.110696 565 +domain 1 30 3.555348 3.555348 564 +releas 6 28 3.610918 21.665508 616 +linux 2 27 3.637586 7.275172 631 +rule 1 26 3.688879 3.688879 638 +enhanc 1 26 3.688879 3.688879 644 +relev 1 26 3.688879 3.688879 637 +comp 1 26 3.688879 3.688879 650 +strategi 2 25 3.737670 7.475340 682 +seri 1 24 3.761200 3.761200 708 +variabl 1 23 3.806662 3.806662 715 +instal 3 22 3.850148 11.550444 754 +disk 2 22 3.850148 7.700296 747 +among 1 21 3.912023 3.912023 781 +newsgroup 1 21 3.912023 3.912023 783 +binari 7 20 3.951244 27.658708 823 +edulast 1 17 4.110874 4.110874 927 +choos 1 16 4.174387 4.174387 964 +permit 1 16 4.174387 4.174387 962 +choic 1 16 4.174387 4.174387 979 +atth 1 15 4.248495 4.248495 1019 +indic 1 15 4.248495 4.248495 1013 +forth 1 13 4.382027 4.382027 1186 +misc 1 13 4.382027 4.382027 1124 +solari 2 12 4.465908 8.931816 1238 +robust 1 12 4.465908 4.465908 1271 +deduct 1 12 4.465908 4.465908 1236 +stai 1 12 4.465908 4.465908 1215 +primit 1 11 4.553877 4.553877 1317 +modul 2 10 4.653960 9.307920 1434 +resid 2 10 4.653960 9.307920 1461 +underli 1 10 4.653960 4.653960 1410 +rich 1 10 4.653960 4.653960 1396 +declar 4 9 4.753590 19.014360 1526 +desir 1 9 4.753590 4.753590 1542 +readm 1 8 4.875197 4.875197 1699 +canb 1 7 5.010635 5.010635 1846 +aggreg 1 6 5.164786 5.164786 2219 +coral 21 5 5.347108 112.289268 2538 +augment 1 5 5.347108 5.347108 2350 +tupl 1 5 5.347108 5.347108 2244 +quantifi 1 5 5.347108 5.347108 2525 +lang 1 5 5.347108 5.347108 2294 +imper 2 4 5.568345 11.136690 3067 +delet 1 4 5.568345 5.568345 2691 +suno 1 4 5.568345 5.568345 2790 +claus 1 3 5.857933 5.857933 3733 +hpux 1 3 5.857933 5.857933 3780 +grab 2 2 6.263398 12.526796 5723 +objectiveoverviewreleas 1 2 6.263398 6.263398 6070 +informationse 1 2 6.263398 6.263398 6071 +horn 1 2 6.263398 6.263398 6072 +negat 1 2 6.263398 6.263398 6073 +andautomat 1 2 6.263398 6.263398 5413 +reciev 1 2 6.263398 6.263398 5600 +nobin 2 1 6.957497 13.914994 17289 +projectcor 1 1 6.957497 6.957497 17290 +projectdocu 1 1 6.957497 6.957497 17291 +coralpeopl 1 1 6.957497 6.957497 17292 +coraloth 1 1 6.957497 6.957497 17293 +madisonobject 1 1 6.957497 6.957497 17294 +efficientdeduct 1 1 6.957497 6.957497 17295 +coralsystem 1 1 6.957497 6.957497 17296 +durationof 1 1 6.957497 6.957497 17297 +declaritiveand 1 1 6.957497 6.957497 17298 +supportsgener 1 1 6.957497 6.957497 17299 +coralimplement 1 1 6.957497 6.957497 17300 +modulein 1 1 6.957497 6.957497 17301 +insertand 1 1 6.957497 6.957497 17302 +canprogram 1 1 6.957497 6.957497 17303 +withcor 1 1 6.957497 6.957497 17304 +allowingc 1 1 6.957497 6.957497 17305 +coralimplemen 1 1 6.957497 6.957497 17306 +theexodusstorag 1 1 6.957497 6.957497 17307 +manang 1 1 6.957497 6.957497 17308 +aclient 1 1 6.957497 6.957497 17309 +requiringy 1 1 6.957497 6.957497 17310 +announcemnt 1 1 6.957497 6.957497 17311 +listwhich 1 1 6.957497 6.957497 17312 +shawn 1 1 6.957497 6.957497 17313 +flisakowski 1 1 6.957497 6.957497 17314 +flisakow 1 1 6.957497 6.957497 17315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ new file mode 100644 index 00000000..6c1fb6f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^cpnet^ @@ -0,0 +1,233 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +system 7 443 0.693147 4.852029 6 +research 5 431 0.693147 3.465735 10 +inform 3 412 0.693147 2.079441 8 +interest 3 384 0.693147 2.079441 11 +program 1 374 0.693147 0.693147 7 +us 6 329 1.098612 6.591672 16 +engin 1 297 1.098612 1.098612 20 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +link 4 247 1.386294 5.545176 24 +also 3 259 1.386294 4.158882 28 +softwar 2 220 1.386294 2.772588 30 +languag 2 227 1.386294 2.772588 26 +wisc 2 242 1.386294 2.772588 33 +list 4 201 1.609438 6.437752 39 +paper 4 205 1.609438 6.437752 38 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +applic 8 170 1.791759 14.334072 56 +algorithm 7 162 1.791759 12.542313 57 +implement 3 152 1.791759 5.375277 52 +avail 2 169 1.791759 3.583518 48 +base 2 165 1.791759 3.583518 50 +contact 1 153 1.791759 1.791759 59 +data 1 170 1.791759 1.791759 49 +problem 15 147 1.945910 29.188650 75 +file 3 132 1.945910 5.837730 70 +model 3 145 1.945910 5.837730 69 +construct 2 139 1.945910 3.891820 82 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +document 2 121 2.079442 4.158884 89 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +well 2 109 2.197225 4.394450 121 +theori 2 111 2.197225 4.394450 127 +look 2 107 2.197225 4.394450 115 +mathemat 1 108 2.197225 2.197225 123 +access 3 102 2.302585 6.907755 136 +techniqu 1 99 2.302585 2.302585 138 +search 2 95 2.397895 4.795790 155 +center 1 88 2.397895 2.397895 158 +commun 1 95 2.397895 2.397895 157 +mani 1 92 2.397895 2.397895 150 +octob 1 89 2.397895 2.397895 156 +help 2 83 2.484907 4.969814 175 +solut 2 82 2.484907 4.969814 162 +librari 1 87 2.484907 2.484907 181 +requir 1 81 2.484907 2.484907 167 +method 2 80 2.564949 5.129898 213 +optim 1 79 2.564949 2.564949 197 +sourc 1 77 2.564949 2.564949 201 +interfac 1 79 2.564949 2.564949 209 +appli 2 71 2.639057 5.278114 226 +nation 2 74 2.639057 5.278114 240 +solv 1 73 2.639057 2.639057 234 +function 2 62 2.772589 5.545178 275 +result 1 65 2.772589 2.772589 281 +collect 1 65 2.772589 2.772589 268 +evalu 1 64 2.772589 2.772589 266 +laboratori 1 63 2.772589 2.772589 292 +content 1 59 2.833213 2.833213 302 +point 6 58 2.890372 17.342232 319 +sever 3 56 2.890372 8.671116 322 +detail 2 57 2.890372 5.780744 321 +direct 2 57 2.890372 5.780744 316 +major 1 56 2.890372 2.890372 315 +overview 1 56 2.890372 2.890372 323 +three 1 54 2.944439 2.944439 330 +allow 1 53 2.944439 2.944439 333 +extens 1 53 2.944439 2.944439 340 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +give 3 50 3.044522 9.133566 359 +basic 1 50 3.044522 3.044522 360 +pointer 1 48 3.044522 3.044522 368 +without 1 50 3.044522 3.044522 370 +archiv 1 49 3.044522 3.044522 364 +possibl 1 47 3.091042 3.091042 378 +directori 2 45 3.135494 6.270988 396 +describ 1 45 3.135494 3.135494 400 +keep 1 44 3.135494 3.135494 409 +linear 2 41 3.218876 6.437752 431 +small 1 39 3.258097 3.258097 447 +form 1 39 3.258097 3.258097 443 +origin 1 38 3.295837 3.295837 472 +download 1 36 3.367296 3.367296 489 +approxim 4 35 3.401197 13.604788 509 +michael 2 35 3.401197 6.802394 514 +survei 1 35 3.401197 3.401197 513 +everi 1 34 3.401197 3.401197 519 +within 1 33 3.433987 3.433987 525 +given 2 32 3.465736 6.931472 538 +taken 1 31 3.496508 3.496508 555 +exist 1 30 3.555348 3.555348 569 +option 1 30 3.555348 3.555348 575 +steve 1 29 3.583519 3.583519 594 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +determin 1 27 3.637586 3.637586 630 +relev 2 26 3.688879 7.377758 637 +consist 2 26 3.688879 7.377758 651 +subject 1 26 3.688879 3.688879 647 +compar 1 26 3.688879 3.688879 648 +strategi 2 25 3.737670 7.475340 682 +known 1 24 3.761200 3.761200 702 +equat 4 23 3.806662 15.226648 724 +sequenc 2 23 3.806662 7.613324 734 +serv 1 22 3.850148 3.850148 758 +almost 1 22 3.850148 3.850148 742 +path 6 21 3.912023 23.472138 778 +similar 1 21 3.912023 3.912023 771 +avoid 1 21 3.912023 3.912023 799 +entir 1 20 3.951244 3.951244 811 +along 1 18 4.060443 4.060443 878 +spars 1 16 4.174387 4.174387 989 +matlab 2 14 4.317488 8.634976 1081 +role 1 14 4.317488 4.317488 1101 +nonlinear 1 14 4.317488 4.317488 1107 +easili 1 14 4.317488 4.317488 1077 +econom 3 13 4.382027 13.146081 1184 +cannot 2 13 4.382027 8.764054 1144 +step 2 13 4.382027 8.764054 1138 +directli 1 13 4.382027 4.382027 1141 +forth 1 13 4.382027 4.382027 1186 +deriv 1 13 4.382027 4.382027 1145 +emploi 2 12 4.465908 8.931816 1284 +iter 2 12 4.465908 8.931816 1206 +evolv 1 12 4.465908 4.465908 1223 +regard 1 11 4.553877 4.553877 1309 +underli 1 10 4.653960 4.653960 1410 +establish 1 9 4.753590 4.753590 1532 +routin 1 9 4.753590 4.753590 1549 +mile 4 8 4.875197 19.500788 1743 +ferri 3 8 4.875197 14.625591 1715 +formul 1 8 4.875197 4.875197 1733 +solver 7 7 5.010635 35.074445 1911 +newton 7 7 5.010635 35.074445 1824 +smooth 5 7 5.010635 25.053175 1855 +secondari 1 7 5.010635 5.010635 1884 +converg 1 7 5.010635 5.010635 1844 +zero 1 7 5.010635 5.010635 1896 +divers 1 6 5.164786 5.164786 2232 +mix 1 6 5.164786 5.164786 2200 +freeli 1 6 5.164786 5.164786 2014 +subsystem 1 6 5.164786 5.164786 2015 +interior 2 5 5.347108 10.694216 2439 +decad 1 5 5.347108 5.347108 2455 +complementari 1 5 5.347108 5.347108 2523 +pivot 1 5 5.347108 5.347108 2426 +merit 1 5 5.347108 5.347108 2466 +argonn 1 5 5.347108 5.347108 2461 +monograph 1 4 5.568345 5.568345 2860 +areavail 1 4 5.568345 5.568345 2810 +colorado 1 4 5.568345 5.568345 2938 +algorithmsand 1 4 5.568345 5.568345 2680 +trick 1 4 5.568345 5.568345 2967 +complementar 15 3 5.857933 87.868995 3999 +toolbox 3 3 5.857933 17.573799 3112 +neta 1 3 5.857933 5.857933 3789 +forthes 1 3 5.857933 5.857933 3199 +andm 1 3 5.857933 5.857933 3901 +violat 1 3 5.857933 5.857933 3211 +engineeringand 1 3 5.857933 5.857933 3779 +preprocessor 1 3 5.857933 5.857933 3844 +energi 1 3 5.857933 5.857933 3950 +gam 10 2 6.263398 62.633980 4758 +edufor 1 2 6.263398 6.263398 5831 +lemk 1 2 6.263398 6.263398 5693 +similarto 1 2 6.263398 6.263398 6074 +anapproxim 1 2 6.263398 6.263398 5045 +norm 1 2 6.263398 6.263398 5643 +perturb 1 2 6.263398 6.263398 6075 +leadto 1 2 6.263398 6.263398 5350 +residu 1 2 6.263398 6.263398 4741 +thecurr 1 2 6.263398 6.263398 5862 +equilibrium 1 2 6.263398 6.263398 4259 +thegam 1 2 6.263398 6.263398 5430 +pointmethod 1 2 6.263398 6.263398 4835 +nonsmooth 5 1 6.957497 34.787485 17316 +mcplib 2 1 6.957497 13.914994 17317 +functionevalu 2 1 6.957497 13.914994 17318 +subproblem 2 1 6.957497 13.914994 17319 +uponreformul 2 1 6.957497 13.914994 17320 +fruitfuldisciplin 1 1 6.957497 6.957497 17321 +incomplementar 1 1 6.957497 6.957497 17322 +meetingsof 1 1 6.957497 6.957497 17323 +forcomplementar 1 1 6.957497 6.957497 17324 +researcherssoftwar 1 1 6.957497 6.957497 17325 +problemdescript 1 1 6.957497 6.957497 17326 +frommatlab 1 1 6.957497 6.957497 17327 +jacobian 1 1 6.957497 6.957497 17328 +specificvers 1 1 6.957497 6.957497 17329 +hook 1 1 6.957497 6.957497 17330 +rutherford 1 1 6.957497 6.957497 17331 +classicaljosephi 1 1 6.957497 6.957497 17332 +linearizedsubproblem 1 1 6.957497 6.957497 17333 +defineth 1 1 6.957497 6.957497 17334 +dampedlinesearch 1 1 6.957497 6.957497 17335 +infeas 1 1 6.957497 6.957497 17336 +restartprocedur 1 1 6.957497 6.957497 17337 +totermin 1 1 6.957497 6.957497 17338 +rescal 1 1 6.957497 6.957497 17339 +equilibr 1 1 6.957497 6.957497 17340 +elementsappear 1 1 6.957497 6.957497 17341 +mcpor 1 1 6.957497 6.957497 17342 +anonsmooth 1 1 6.957497 6.957497 17343 +reformul 1 1 6.957497 6.957497 17344 +algorithmconsist 1 1 6.957497 6.957497 17345 +pathto 1 1 6.957497 6.957497 17346 +aposs 1 1 6.957497 6.957497 17347 +thepath 1 1 6.957497 6.957497 17348 +partiallycomput 1 1 6.957497 6.957497 17349 +relinear 1 1 6.957497 6.957497 17350 +anonmonoton 1 1 6.957497 6.957497 17351 +watchdog 1 1 6.957497 6.957497 17352 +minima 1 1 6.957497 6.957497 17353 +robustnessimprov 1 1 6.957497 6.957497 17354 +proxim 1 1 6.957497 6.957497 17355 +qpcomp 1 1 6.957497 6.957497 17356 +ishandl 1 1 6.957497 6.957497 17357 +thenapproxim 1 1 6.957497 6.957497 17358 +theaccuraci 1 1 6.957497 6.957497 17359 +mpsge 1 1 6.957497 6.957497 17360 +thatallow 1 1 6.957497 6.957497 17361 +nemsth 1 1 6.957497 6.957497 17362 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^exodus^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^exodus^ new file mode 100644 index 00000000..4783351f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^exodus^ @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +project 3 340 1.098612 3.295836 18 +wisc 4 242 1.386294 5.545176 33 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +avail 1 169 1.791759 1.791759 48 +contact 1 153 1.791759 1.791759 59 +object 1 138 1.945910 1.945910 79 +construct 1 139 1.945910 1.945910 82 +support 1 132 1.945910 1.945910 83 +relat 1 139 1.945910 1.945910 68 +databas 1 122 2.079442 2.079442 86 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +manag 2 114 2.197225 4.394450 125 +user 2 104 2.302585 4.605170 137 +need 1 98 2.302585 2.302585 135 +requir 1 81 2.484907 2.484907 167 +orient 1 80 2.564949 2.564949 205 +april 1 77 2.564949 2.564949 196 +david 1 71 2.639057 2.639057 232 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +still 1 50 3.044522 3.044522 362 +michael 1 35 3.401197 3.401197 514 +storag 3 31 3.496508 10.489524 553 +mike 1 24 3.761200 3.761200 703 +prepar 1 20 3.951244 3.951244 824 +benchmark 2 19 4.007333 8.014666 859 +minim 1 18 4.060443 4.060443 887 +carei 1 8 4.875197 4.875197 1781 +licens 1 5 5.347108 5.347108 2520 +exodu 5 4 5.568345 27.841725 3075 +zwill 2 4 5.568345 11.136690 3076 +successor 1 3 5.857933 5.857933 3576 +theexodu 1 2 6.263398 6.263398 6076 +persistentprogram 1 2 6.263398 6.263398 5997 +pageexodu 1 1 6.957497 6.957497 17363 +toolkitnot 1 1 6.957497 6.957497 17364 +succed 1 1 6.957497 6.957497 17365 +theshor 1 1 6.957497 6.957497 17366 +eduprincip 1 1 6.957497 6.957497 17367 +dewittse 1 1 6.957497 6.957497 17368 +exodusshor 1 1 6.957497 6.957497 17369 +exoduslatest 1 1 6.957497 6.957497 17370 +compilercontribut 1 1 6.957497 6.957497 17371 +managera 1 1 6.957497 6.957497 17372 +exodus_al 1 1 6.957497 6.957497 17373 +oodbsdat 1 1 6.957497 6.957497 17374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^paradise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^paradise^ new file mode 100644 index 00000000..1fc91a7c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^paradise^ @@ -0,0 +1,197 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +inform 4 412 0.693147 2.772588 8 +system 3 443 0.693147 2.079441 6 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +us 4 329 1.098612 4.394448 16 +project 2 340 1.098612 2.197224 18 +current 1 284 1.098612 1.098612 21 +also 3 259 1.386294 4.158882 28 +design 2 213 1.386294 2.772588 25 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +data 7 170 1.791759 12.542313 49 +parallel 3 169 1.791759 5.375277 60 +applic 2 170 1.791759 3.583518 56 +contact 2 153 1.791759 3.583518 59 +madison 2 165 1.791759 3.583518 55 +implement 2 152 1.791759 3.583518 52 +base 1 165 1.791759 1.791759 50 +wisconsin 1 169 1.791759 1.791759 54 +object 6 138 1.945910 11.675460 79 +support 4 132 1.945910 7.783640 83 +relat 2 139 1.945910 3.891820 68 +model 2 145 1.945910 3.891820 69 +area 2 144 1.945910 3.891820 80 +problem 1 147 1.945910 1.945910 75 +click 1 142 1.945910 1.945910 78 +architectur 1 139 1.945910 1.945910 77 +databas 6 122 2.079442 12.476652 86 +provid 4 121 2.079442 8.317768 94 +document 1 121 2.079442 2.079442 89 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +dayton 1 119 2.079442 2.079442 104 +manag 2 114 2.197225 4.394450 125 +assist 1 112 2.197225 2.197225 113 +version 1 113 2.197225 2.197225 122 +peopl 1 96 2.302585 2.302585 132 +advanc 1 99 2.302585 2.302585 130 +user 1 104 2.302585 2.302585 137 +access 1 102 2.302585 2.302585 136 +graphic 3 90 2.397895 7.193685 147 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +help 1 83 2.484907 2.484907 175 +west 1 83 2.484907 2.484907 192 +server 5 76 2.564949 12.824745 204 +interfac 3 79 2.564949 7.694847 209 +issu 2 78 2.564949 5.129898 211 +method 2 80 2.564949 5.129898 213 +optim 1 79 2.564949 2.564949 197 +orient 1 80 2.564949 2.564949 205 +exampl 1 77 2.564949 2.564949 195 +come 1 78 2.564949 2.564949 202 +addit 1 74 2.639057 2.639057 228 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +david 1 71 2.639057 2.639057 232 +order 1 69 2.708050 2.708050 249 +creat 3 63 2.772589 8.317767 277 +result 2 65 2.772589 5.545178 281 +complex 1 64 2.772589 2.772589 269 +prof 1 64 2.772589 2.772589 273 +type 3 61 2.833213 8.499639 296 +content 1 59 2.833213 2.833213 302 +back 1 60 2.833213 2.833213 297 +point 1 58 2.890372 2.890372 319 +sever 1 56 2.890372 2.890372 322 +sampl 3 53 2.944439 8.833317 339 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +tabl 1 51 2.995732 2.995732 346 +set 2 50 3.044522 6.089044 361 +could 1 46 3.091042 3.091042 383 +execut 2 45 3.135494 6.270988 404 +video 1 44 3.135494 3.135494 405 +examin 1 42 3.218876 3.218876 424 +multipl 1 39 3.258097 3.258097 453 +streetmadison 1 38 3.295837 3.295837 474 +connect 1 37 3.332205 3.332205 485 +especi 1 36 3.367296 3.367296 496 +either 2 35 3.401197 6.802394 506 +queri 15 33 3.433987 51.509805 524 +extend 1 32 3.465736 3.465736 539 +ad 1 32 3.465736 3.465736 544 +built 1 29 3.583519 3.583519 592 +hope 1 28 3.610918 3.610918 610 +manipul 2 27 3.637586 7.275172 624 +client 5 25 3.737670 18.688350 679 +store 2 24 3.761200 7.522400 693 +scalabl 1 24 3.761200 3.761200 705 +handl 1 24 3.761200 3.761200 685 +displai 4 23 3.806662 15.226648 712 +brows 3 23 3.806662 11.419986 726 +size 1 23 3.806662 3.806662 713 +thread 1 23 3.806662 3.806662 722 +defin 1 22 3.850148 3.850148 746 +mpeg 1 20 3.951244 3.951244 831 +benchmark 1 19 4.007333 4.007333 859 +layer 3 17 4.110874 12.332622 926 +spatial 4 16 4.174387 16.697548 988 +massiv 1 15 4.248495 4.248495 1026 +indic 1 15 4.248495 4.248495 1013 +attribut 4 14 4.317488 17.269952 1092 +front 3 13 4.382027 13.146081 1154 +script 1 13 4.382027 4.382027 1171 +menu 1 13 4.382027 4.382027 1156 +composit 1 13 4.382027 4.382027 1150 +context 1 13 4.382027 4.382027 1153 +calcul 1 12 4.465908 4.465908 1268 +emploi 1 12 4.465908 4.465908 1284 +shore 2 11 4.553877 9.107754 1377 +string 1 11 4.553877 4.553877 1340 +persist 1 11 4.553877 4.553877 1367 +abil 1 11 4.553877 4.553877 1341 +subset 2 10 4.653960 9.307920 1425 +vldb 1 10 4.653960 4.653960 1470 +underli 1 10 4.653960 4.653960 1410 +correspond 1 10 4.653960 4.653960 1382 +custom 1 10 4.653960 4.653960 1414 +label 1 10 4.653960 4.653960 1423 +compos 1 9 4.753590 4.753590 1527 +paradis 23 8 4.875197 112.129531 1782 +polygon 3 8 4.875197 14.625591 1723 +databasesystem 1 8 4.875197 4.875197 1617 +sensit 1 8 4.875197 4.875197 1726 +insert 1 8 4.875197 4.875197 1687 +successfulli 1 7 5.010635 5.010635 1869 +geograph 2 6 5.164786 10.329572 2236 +drop 2 6 5.164786 10.329572 2008 +band 1 6 5.164786 5.164786 2198 +invok 1 6 5.164786 5.164786 2079 +syntax 1 6 5.164786 5.164786 2030 +ship 2 5 5.347108 10.694216 2534 +aim 1 5 5.347108 5.347108 2477 +tupl 1 5 5.347108 5.347108 2244 +madisoncomput 1 5 5.347108 5.347108 2391 +andevalu 1 4 5.568345 5.568345 2706 +zoom 1 4 5.568345 5.568345 2961 +insur 1 4 5.568345 5.568345 2939 +providesa 1 3 5.857933 5.857933 3884 +informationse 1 2 6.263398 6.263398 6071 +serverobject 1 2 6.263398 6.263398 6077 +raster 1 2 6.263398 6.263398 6078 +polylin 1 2 6.263398 6.263398 6079 +sketch 1 2 6.263398 6.263398 5946 +extent 1 2 6.263398 6.263398 6080 +paid 1 2 6.263398 6.263398 6081 +biswadeep 1 2 6.263398 6.263398 4805 +projectparadis 1 1 6.957497 6.957497 17375 +frontend 1 1 6.957497 6.957497 17376 +sequoia 1 1 6.957497 6.957497 17377 +iscap 1 1 6.957497 6.957497 17378 +applyingobject 1 1 6.957497 6.957497 17379 +ofstor 1 1 6.957497 6.957497 17380 +tosignificantli 1 1 6.957497 6.957497 17381 +thatcan 1 1 6.957497 6.957497 17382 +andsupport 1 1 6.957497 6.957497 17383 +paradiseprovid 1 1 6.957497 6.957497 17384 +gisappl 1 1 6.957497 6.957497 17385 +asinteg 1 1 6.957497 6.957497 17386 +circl 1 1 6.957497 6.957497 17387 +spatialattribut 1 1 6.957497 6.957497 17388 +foroverlap 1 1 6.957497 6.957497 17389 +selectingcolor 1 1 6.957497 6.957497 17390 +withad 1 1 6.957497 6.957497 17391 +issueimplicit 1 1 6.957497 6.957497 17392 +arubb 1 1 6.957497 6.957497 17393 +querycompos 1 1 6.957497 6.957497 17394 +databaseschema 1 1 6.957497 6.957497 17395 +beview 1 1 6.957497 6.957497 17396 +bedisplai 1 1 6.957497 6.957497 17397 +sqlwe 1 1 6.957497 6.957497 17398 +extendedset 1 1 6.957497 6.957497 17399 +byus 1 1 6.957497 6.957497 17400 +standarddatabas 1 1 6.957497 6.957497 17401 +anddrop 1 1 6.957497 6.957497 17402 +paradiseserv 1 1 6.957497 6.957497 17403 +theresult 1 1 6.957497 6.957497 17404 +ismulti 1 1 6.957497 6.957497 17405 +sameserv 1 1 6.957497 6.957497 17406 +carefulattent 1 1 6.957497 6.957497 17407 +processqueri 1 1 6.957497 6.957497 17408 +largevolum 1 1 6.957497 6.957497 17409 +frontendeurop 1 1 6.957497 6.957497 17410 +pressher 1 1 6.957497 6.957497 17411 +projectattn 1 1 6.957497 6.957497 17412 +dewittunivers 1 1 6.957497 6.957497 17413 +edumor 1 1 6.957497 6.957497 17414 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^scout^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^scout^ new file mode 100644 index 00000000..a3540306 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^scout^ @@ -0,0 +1,82 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +student 3 343 1.098612 3.295836 19 +project 3 340 1.098612 3.295836 18 +us 2 329 1.098612 2.197224 16 +updat 1 191 1.609438 1.609438 41 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +network 1 168 1.791759 1.791759 61 +support 1 132 1.945910 1.945910 83 +report 3 131 2.079442 6.238326 92 +provid 2 121 2.079442 4.158884 94 +tool 1 117 2.079442 2.079442 93 +welcom 1 122 2.079442 2.079442 99 +intern 4 108 2.197225 8.788900 128 +look 1 107 2.197225 2.197225 115 +site 1 106 2.197225 2.197225 119 +text 1 98 2.302585 2.302585 133 +commun 2 95 2.397895 4.795790 157 +comment 1 93 2.397895 2.397895 146 +internet 6 83 2.484907 14.909442 186 +resourc 2 81 2.484907 4.969814 172 +educ 1 86 2.484907 2.484907 191 +know 1 80 2.564949 2.564949 198 +servic 4 72 2.639057 10.556228 236 +onlin 1 75 2.639057 2.639057 223 +goal 1 66 2.708050 2.708050 250 +organ 1 65 2.772589 2.772589 265 +best 3 59 2.833213 8.499639 299 +locat 1 59 2.833213 2.833213 303 +sever 1 56 2.890372 2.890372 322 +suggest 2 53 2.944439 5.888878 331 +three 1 54 2.944439 2.944439 330 +week 1 52 2.995732 2.995732 343 +format 1 48 3.044522 3.044522 356 +effect 1 46 3.091042 3.091042 385 +show 1 43 3.178054 3.178054 417 +offer 1 43 3.178054 3.178054 414 +howev 1 41 3.218876 3.218876 422 +announc 1 40 3.258097 3.258097 441 +primari 1 25 3.737670 3.737670 669 +daili 1 24 3.761200 3.761200 706 +annot 1 21 3.912023 3.912023 775 +theunivers 1 21 3.912023 3.912023 797 +longer 1 20 3.951244 3.951244 816 +entir 1 20 3.951244 3.951244 811 +toolkit 1 20 3.951244 3.951244 835 +universityof 1 15 4.248495 4.248495 1061 +everyon 1 13 4.382027 4.382027 1148 +summar 1 11 4.553877 4.553877 1295 +discov 1 9 4.753590 4.753590 1562 +hundr 1 9 4.753590 4.753590 1528 +filter 1 8 4.875197 4.875197 1641 +scout 7 7 5.010635 35.074445 1903 +happen 1 7 5.010635 5.010635 1790 +valuabl 1 5 5.347108 5.347108 2256 +newli 1 3 5.857933 5.857933 3786 +useth 1 3 5.857933 5.857933 3110 +thescout 1 2 6.263398 6.263398 6082 +homepagego 1 1 6.957497 6.957497 17415 +versionnewslett 1 1 6.957497 6.957497 17416 +newand 1 1 6.957497 6.957497 17417 +toolsinternet 1 1 6.957497 6.957497 17418 +effectiveinternet 1 1 6.957497 6.957497 17419 +availablea 1 1 6.957497 6.957497 17420 +studentssurf 1 1 6.957497 6.957497 17421 +smarter 1 1 6.957497 6.957497 17422 +canchoos 1 1 6.957497 6.957497 17423 +annoucementseach 1 1 6.957497 6.957497 17424 +networktool 1 1 6.957497 6.957497 17425 +vefound 1 1 6.957497 6.957497 17426 +byeduc 1 1 6.957497 6.957497 17427 +encouragefeedback 1 1 6.957497 6.957497 17428 +ournewest 1 1 6.957497 6.957497 17429 +feedbackscout 1 1 6.957497 6.957497 17430 +servicesfor 1 1 6.957497 6.957497 17431 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^shore^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^shore^ new file mode 100644 index 00000000..5af9af14 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^shore^ @@ -0,0 +1,482 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +system 21 443 0.693147 14.556087 6 +inform 5 412 0.693147 3.465735 8 +research 4 431 0.693147 2.772588 10 +program 4 374 0.693147 2.772588 7 +interest 3 384 0.693147 2.079441 11 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +project 8 340 1.098612 8.788896 18 +us 4 329 1.098612 4.394448 16 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +mail 10 238 1.386294 13.862940 22 +languag 9 227 1.386294 12.476646 26 +wisc 8 242 1.386294 11.090352 33 +softwar 6 220 1.386294 8.317764 30 +design 5 213 1.386294 6.931470 25 +link 3 247 1.386294 4.158882 24 +also 2 259 1.386294 2.772588 28 +list 18 201 1.609438 28.969884 39 +includ 3 208 1.609438 4.828314 42 +public 2 202 1.609438 3.218876 43 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +data 17 170 1.791759 30.459903 49 +applic 12 170 1.791759 21.501108 56 +develop 3 174 1.791759 5.375277 53 +madison 2 165 1.791759 3.583518 55 +base 2 165 1.791759 3.583518 50 +avail 2 169 1.791759 3.583518 48 +read 2 154 1.791759 3.583518 47 +implement 1 152 1.791759 1.791759 52 +wisconsin 1 169 1.791759 1.791759 54 +network 1 168 1.791759 1.791759 61 +parallel 1 169 1.791759 1.791759 60 +contact 1 153 1.791759 1.791759 59 +object 36 138 1.945910 70.052760 79 +file 16 132 1.945910 31.134560 70 +support 7 132 1.945910 13.621370 83 +model 4 145 1.945910 7.783640 69 +first 4 140 1.945910 7.783640 71 +relat 3 139 1.945910 5.837730 68 +like 3 132 1.945910 5.837730 81 +process 3 142 1.945910 5.837730 72 +architectur 2 139 1.945910 3.891820 77 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +hall 1 146 1.945910 1.945910 65 +provid 6 121 2.079442 12.476652 94 +document 3 121 2.079442 6.238326 89 +technolog 3 131 2.079442 6.238326 102 +databas 2 122 2.079442 4.158884 86 +high 1 130 2.079442 2.079442 101 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +compil 1 122 2.079442 2.079442 96 +report 1 131 2.079442 2.079442 92 +version 5 113 2.197225 10.986125 122 +manag 3 114 2.197225 6.591675 125 +structur 3 106 2.197225 6.591675 105 +make 3 111 2.197225 6.591675 120 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +world 1 115 2.197225 2.197225 126 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +access 7 102 2.302585 16.118095 136 +text 3 98 2.302585 6.907755 133 +user 3 104 2.302585 6.907755 137 +need 2 98 2.302585 4.605170 135 +question 2 91 2.397895 4.795790 141 +mani 2 92 2.397895 4.795790 150 +section 1 94 2.397895 2.397895 149 +commun 1 95 2.397895 2.397895 157 +sinc 1 90 2.397895 2.397895 159 +pictur 1 89 2.397895 2.397895 160 +comment 1 93 2.397895 2.397895 146 +environ 3 84 2.484907 7.454721 177 +larg 3 82 2.484907 7.454721 168 +chang 3 82 2.484907 7.454721 163 +second 2 81 2.484907 4.969814 166 +build 2 85 2.484907 4.969814 184 +contain 2 81 2.484907 4.969814 174 +help 2 83 2.484907 4.969814 175 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +requir 1 81 2.484907 2.484907 167 +start 1 83 2.484907 2.484907 173 +server 6 76 2.564949 15.389694 204 +messag 5 76 2.564949 12.824745 212 +interfac 3 79 2.564949 7.694847 209 +orient 3 80 2.564949 7.694847 205 +sourc 2 77 2.564949 5.129898 201 +exampl 2 77 2.564949 5.129898 195 +refer 1 78 2.564949 2.564949 203 +want 1 79 2.564949 2.564949 199 +name 5 72 2.639057 13.195285 220 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +write 1 72 2.639057 2.639057 222 +goal 3 66 2.708050 8.124150 250 +differ 2 66 2.708050 5.416100 253 +degre 1 69 2.708050 2.708050 259 +order 1 69 2.708050 2.708050 249 +august 1 66 2.708050 2.708050 257 +receiv 1 66 2.708050 2.708050 244 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +improv 1 62 2.772589 2.772589 289 +copi 1 63 2.772589 2.772589 284 +type 11 61 2.833213 31.165343 296 +content 4 59 2.833213 11.332852 302 +share 2 59 2.833213 5.666426 304 +simpl 1 60 2.833213 2.833213 298 +plai 1 60 2.833213 2.833213 307 +unix 12 58 2.890372 34.684464 308 +space 4 57 2.890372 11.561488 310 +major 3 56 2.890372 8.671116 315 +overview 1 56 2.890372 2.890372 323 +detail 1 57 2.890372 2.890372 321 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +point 1 58 2.890372 2.890372 319 +processor 5 54 2.944439 14.722195 335 +three 1 54 2.944439 2.944439 330 +extens 1 53 2.944439 2.944439 340 +found 1 53 2.944439 2.944439 337 +hardwar 6 51 2.995732 17.974392 350 +much 2 52 2.995732 5.991464 349 +digit 2 52 2.995732 5.991464 348 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +date 1 51 2.995732 2.995732 344 +basic 3 50 3.044522 9.133566 360 +set 2 50 3.044522 6.089044 361 +standard 2 48 3.044522 6.089044 365 +give 1 50 3.044522 3.044522 359 +without 1 50 3.044522 3.044522 370 +archiv 1 49 3.044522 3.044522 364 +featur 3 46 3.091042 9.273126 386 +could 1 46 3.091042 3.091042 383 +possibl 1 47 3.091042 3.091042 378 +get 1 46 3.091042 3.091042 380 +describ 2 45 3.135494 6.270988 400 +natur 1 44 3.135494 3.135494 406 +video 1 44 3.135494 3.135494 405 +anoth 1 45 3.135494 3.135494 408 +term 3 43 3.178054 9.534162 411 +mechan 2 43 3.178054 6.356108 416 +futur 2 41 3.218876 6.437752 427 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +multipl 2 39 3.258097 6.516194 453 +submit 1 39 3.258097 3.258097 440 +must 1 40 3.258097 3.258097 442 +close 2 38 3.295837 6.591674 465 +open 1 38 3.295837 3.295837 469 +field 2 37 3.332205 6.664410 482 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +feel 1 37 3.332205 3.332205 483 +purpos 1 37 3.332205 3.332205 481 +multi 1 36 3.367296 3.367296 493 +tree 1 36 3.367296 3.367296 492 +singl 3 34 3.401197 10.203591 510 +either 2 35 3.401197 6.802394 506 +everi 1 34 3.401197 3.401197 519 +approxim 1 35 3.401197 3.401197 509 +post 1 35 3.401197 3.401197 505 +return 1 34 3.401197 3.401197 502 +concurr 1 34 3.401197 3.401197 501 +go 1 33 3.433987 3.433987 529 +product 1 33 3.433987 3.433987 527 +queri 1 33 3.433987 3.433987 524 +obtain 1 33 3.433987 3.433987 534 +ad 1 32 3.465736 3.465736 544 +kind 1 32 3.465736 3.465736 541 +photo 1 31 3.496508 3.496508 561 +someth 1 31 3.496508 3.496508 554 +exist 4 30 3.555348 14.221392 569 +abl 2 30 3.555348 7.110696 566 +focu 2 30 3.555348 7.110696 571 +option 1 30 3.555348 3.555348 575 +built 1 29 3.583519 3.583519 592 +turn 1 29 3.583519 3.583519 586 +depend 1 29 3.583519 3.583519 583 +particip 1 29 3.583519 3.583519 589 +releas 9 28 3.610918 32.498262 616 +framework 2 28 3.610918 7.221836 606 +intend 1 28 3.610918 3.610918 599 +becom 1 28 3.610918 3.610918 603 +propos 1 28 3.610918 3.610918 602 +hope 1 28 3.610918 3.610918 610 +retriev 2 27 3.637586 7.275172 621 +mind 1 27 3.637586 3.637586 632 +manipul 1 27 3.637586 3.637586 624 +quit 1 27 3.637586 3.637586 633 +symbol 1 27 3.637586 3.637586 620 +linux 1 27 3.637586 3.637586 631 +team 1 27 3.637586 3.637586 625 +effort 2 26 3.688879 7.377758 652 +request 2 26 3.688879 7.377758 635 +rather 2 26 3.688879 7.377758 642 +enhanc 1 26 3.688879 3.688879 644 +enabl 1 26 3.688879 3.688879 655 +subject 1 26 3.688879 3.688879 647 +client 2 25 3.737670 7.475340 679 +wai 1 25 3.737670 3.737670 662 +valu 1 25 3.737670 3.737670 665 +task 1 25 3.737670 3.737670 678 +scalabl 6 24 3.761200 22.567200 705 +store 3 24 3.761200 11.283600 693 +reach 1 24 3.761200 3.761200 688 +defin 4 22 3.850148 15.400592 746 +serv 1 22 3.850148 3.850148 758 +varieti 1 22 3.850148 3.850148 740 +almost 1 22 3.850148 3.850148 742 +deal 1 22 3.850148 3.850148 736 +emphasi 1 22 3.850148 3.850148 755 +sent 1 22 3.850148 3.850148 763 +programminglanguag 2 21 3.912023 7.824046 782 +fund 1 21 3.912023 3.912023 805 +flexibl 1 21 3.912023 3.912023 792 +latest 1 21 3.912023 3.912023 785 +thu 1 21 3.912023 3.912023 773 +similar 1 21 3.912023 3.912023 771 +binari 2 20 3.951244 7.902488 823 +entir 2 20 3.951244 7.902488 811 +fine 1 20 3.951244 3.951244 822 +benchmark 2 19 4.007333 8.014666 859 +media 1 19 4.007333 4.007333 861 +definit 1 19 4.007333 4.007333 864 +separ 1 19 4.007333 4.007333 844 +concentr 2 18 4.060443 8.120886 906 +along 1 18 4.060443 4.060443 878 +regist 2 17 4.110874 8.221748 938 +weekli 2 17 4.110874 8.221748 919 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +whether 1 17 4.110874 4.110874 918 +stop 1 17 4.110874 4.110874 942 +ultim 1 17 4.110874 4.110874 943 +attempt 1 17 4.110874 4.110874 917 +sept 1 17 4.110874 4.110874 952 +anyon 1 17 4.110874 4.110874 916 +commerci 3 16 4.174387 12.523161 1005 +intel 1 16 4.174387 4.174387 1000 +alreadi 1 16 4.174387 4.174387 963 +portion 1 16 4.174387 4.174387 971 +sign 1 16 4.174387 4.174387 970 +transit 2 15 4.248495 8.496990 1046 +capabl 1 15 4.248495 4.248495 1016 +hierarch 1 15 4.248495 4.248495 1018 +hybrid 1 15 4.248495 4.248495 1057 +piec 1 15 4.248495 4.248495 1020 +stream 1 15 4.248495 4.248495 1015 +charact 1 15 4.248495 4.248495 1028 +heterogen 2 14 4.317488 8.634976 1090 +anonym 2 14 4.317488 8.634976 1100 +attribut 2 14 4.317488 8.634976 1092 +role 1 14 4.317488 4.317488 1101 +shown 1 14 4.317488 4.317488 1080 +decid 1 14 4.317488 4.317488 1075 +directli 2 13 4.382027 8.764054 1141 +nasa 1 13 4.382027 4.382027 1188 +earlier 1 13 4.382027 4.382027 1140 +cannot 1 13 4.382027 4.382027 1144 +individu 1 13 4.382027 4.382027 1126 +convert 1 13 4.382027 4.382027 1122 +uniqu 3 12 4.465908 13.397724 1228 +target 1 12 4.465908 4.465908 1282 +safe 1 12 4.465908 4.465908 1274 +solari 1 12 4.465908 4.465908 1238 +nanci 1 12 4.465908 4.465908 1256 +shore 46 11 4.553877 209.478342 1377 +persist 11 11 4.553877 50.092647 1367 +string 2 11 4.553877 9.107754 1340 +arpa 1 11 4.553877 4.553877 1369 +distinguish 1 11 4.553877 4.553877 1357 +fix 1 11 4.553877 4.553877 1327 +facilit 1 10 4.653960 4.653960 1412 +equal 1 10 4.653960 4.653960 1424 +consortium 1 10 4.653960 4.653960 1467 +length 1 10 4.653960 4.653960 1400 +subscrib 9 9 4.753590 42.782310 1541 +inter 2 9 4.753590 9.507180 1530 +rel 1 9 4.753590 4.753590 1487 +mention 1 9 4.753590 4.753590 1569 +familiar 1 9 4.753590 4.753590 1485 +contrast 1 8 4.875197 4.875197 1637 +root 1 8 4.875197 4.875197 1650 +realiz 1 8 4.875197 4.875197 1739 +cross 1 8 4.875197 4.875197 1703 +port 1 8 4.875197 4.875197 1766 +parti 1 8 4.875197 4.875197 1676 +digest 3 7 5.010635 15.031905 1864 +supportfor 1 7 5.010635 5.010635 1854 +sparc 1 7 5.010635 5.010635 1860 +henc 1 7 5.010635 5.010635 1805 +beta 5 6 5.164786 25.823930 1993 +byte 4 6 5.164786 20.659144 2108 +geograph 3 6 5.164786 15.494358 2236 +furthermor 1 6 5.164786 5.164786 2141 +conveni 1 6 5.164786 5.164786 2088 +pool 1 6 5.164786 5.164786 2225 +feasibl 1 6 5.164786 5.164786 2157 +gzip 1 6 5.164786 5.164786 2117 +moder 1 6 5.164786 5.164786 2112 +notifi 1 6 5.164786 5.164786 2106 +compat 4 5 5.347108 21.388432 2485 +eas 2 5 5.347108 10.694216 2267 +default 2 5 5.347108 10.694216 2335 +anda 1 5 5.347108 5.347108 2416 +greater 1 5 5.347108 5.347108 2258 +began 1 5 5.347108 5.347108 2498 +remain 1 5 5.347108 5.347108 2278 +peer 2 4 5.568345 11.136690 2742 +repli 2 4 5.568345 11.136690 2689 +andevalu 1 4 5.568345 5.568345 2706 +satellit 1 4 5.568345 5.568345 3077 +eventu 1 4 5.568345 5.568345 3074 +symmetr 1 4 5.568345 5.568345 2908 +exodu 1 4 5.568345 5.568345 3075 +customiz 1 4 5.568345 5.568345 2966 +simplifi 1 4 5.568345 5.568345 3066 +bulk 2 3 5.857933 11.715866 4000 +oodb 2 3 5.857933 11.715866 3954 +subscript 2 3 5.857933 11.715866 3469 +predecessor 1 3 5.857933 5.857933 3585 +briefli 1 3 5.857933 5.857933 3459 +sector 1 3 5.857933 5.857933 3766 +paragon 1 3 5.857933 5.857933 3359 +serverarchitectur 1 3 5.857933 5.857933 3736 +gigabyt 1 3 5.857933 5.857933 3548 +embodi 1 3 5.857933 5.857933 3236 +intra 1 3 5.857933 5.857933 3243 +reachabl 1 3 5.857933 5.857933 4001 +eduand 1 3 5.857933 5.857933 3452 +membership 1 3 5.857933 5.857933 3751 +oodbm 3 2 6.263398 18.790194 6083 +objectiveoverviewreleas 1 2 6.263398 6.263398 6070 +serverobject 1 2 6.263398 6.263398 6077 +provis 1 2 6.263398 6.263398 4683 +neutral 1 2 6.263398 6.263398 5760 +mount 1 2 6.263398 6.263398 5995 +eduthi 1 2 6.263398 6.263398 5382 +junk 1 2 6.263398 6.263398 5701 +mailbox 1 2 6.263398 6.263398 6084 +sender 1 2 6.263398 6.263398 5064 +shore_al 8 1 6.957497 55.659976 17432 +odmg 5 1 6.957497 34.787485 17433 +listproc 4 1 6.957497 27.829988 17434 +shore_support 3 1 6.957497 20.872491 17435 +informationsystem 2 1 6.957497 13.914994 17436 +ashor 2 1 6.957497 13.914994 17437 +vendor 2 1 6.957497 13.914994 17438 +flatten 2 1 6.957497 13.914994 17439 +legaci 2 1 6.957497 13.914994 17440 +clutter 2 1 6.957497 13.914994 17441 +pageshor 1 1 6.957497 6.957497 17442 +repositorydocu 1 1 6.957497 6.957497 17443 +informationmail 1 1 6.957497 6.957497 17444 +listsse 1 1 6.957497 6.957497 17445 +shorepeopl 1 1 6.957497 6.957497 17446 +shorelatest 1 1 6.957497 6.957497 17447 +arpaparadis 1 1 6.957497 6.957497 17448 +shoreexodu 1 1 6.957497 6.957497 17449 +shoreoo 1 1 6.957497 6.957497 17450 +oodbsshor 1 1 6.957497 6.957497 17451 +albumuw 1 1 6.957497 6.957497 17452 +widevarieti 1 1 6.957497 6.957497 17453 +cadsystem 1 1 6.957497 6.957497 17454 +usedexodusstorag 1 1 6.957497 6.957497 17455 +ofwai 1 1 6.957497 6.957497 17456 +thisinterfac 1 1 6.957497 6.957497 17457 +theunix 1 1 6.957497 6.957497 17458 +viand 1 1 6.957497 6.957497 17459 +withoutmodif 1 1 6.957497 6.957497 17460 +shoreobject 1 1 6.957497 6.957497 17461 +inheritingcharacterist 1 1 6.957497 6.957497 17462 +fromfil 1 1 6.957497 6.957497 17463 +ofshor 1 1 6.957497 6.957497 17464 +scalabilitysupport 1 1 6.957497 6.957497 17465 +heterogeneitysupport 1 1 6.957497 6.957497 17466 +applicationswhen 1 1 6.957497 6.957497 17467 +uniqueamong 1 1 6.957497 6.957497 17468 +languageheterogen 1 1 6.957497 6.957497 17469 +persistentstorag 1 1 6.957497 6.957497 17470 +basicallycompat 1 1 6.957497 6.957497 17471 +betransf 1 1 6.957497 6.957497 17472 +architectureshor 1 1 6.957497 6.957497 17473 +distributedarchitectur 1 1 6.957497 6.957497 17474 +disksattach 1 1 6.957497 6.957497 17475 +architectureus 1 1 6.957497 6.957497 17476 +typicallyus 1 1 6.957497 6.957497 17477 +notionof 1 1 6.957497 6.957497 17478 +runsin 1 1 6.957497 6.957497 17479 +forus 1 1 6.957497 6.957497 17480 +theparadis 1 1 6.957497 6.957497 17481 +seosdi 1 1 6.957497 6.957497 17482 +aimport 1 1 6.957497 6.957497 17483 +endeavor 1 1 6.957497 6.957497 17484 +certainlydepend 1 1 6.957497 6.957497 17485 +transmitobject 1 1 6.957497 6.957497 17486 +whilecurr 1 1 6.957497 6.957497 17487 +orientedtoward 1 1 6.957497 6.957497 17488 +terabyt 1 1 6.957497 6.957497 17489 +libraryar 1 1 6.957497 6.957497 17490 +heterogeneityobject 1 1 6.957497 6.957497 17491 +neutraltyp 1 1 6.957497 6.957497 17492 +databasefeatur 1 1 6.957497 6.957497 17493 +ofsupport 1 1 6.957497 6.957497 17494 +feasibleto 1 1 6.957497 6.957497 17495 +wasrec 1 1 6.957497 6.957497 17496 +onprovid 1 1 6.957497 6.957497 17497 +withina 1 1 6.957497 6.957497 17498 +applicationsa 1 1 6.957497 6.957497 17499 +currentlyus 1 1 6.957497 6.957497 17500 +untyp 1 1 6.957497 6.957497 17501 +structuredobject 1 1 6.957497 6.957497 17502 +displac 1 1 6.957497 6.957497 17503 +orientedfil 1 1 6.957497 6.957497 17504 +standpoint 1 1 6.957497 6.957497 17505 +manypersist 1 1 6.957497 6.957497 17506 +indirectli 1 1 6.957497 6.957497 17507 +usersa 1 1 6.957497 6.957497 17508 +individualpersist 1 1 6.957497 6.957497 17509 +oflarg 1 1 6.957497 6.957497 17510 +unnam 1 1 6.957497 6.957497 17511 +involvessever 1 1 6.957497 6.957497 17512 +includingdirectori 1 1 6.957497 6.957497 17513 +unixappl 1 1 6.957497 6.957497 17514 +fromtradit 1 1 6.957497 6.957497 17515 +standardunix 1 1 6.957497 6.957497 17516 +mkdir 1 1 6.957497 6.957497 17517 +chdir 1 1 6.957497 6.957497 17518 +callsposs 1 1 6.957497 6.957497 17519 +onevari 1 1 6.957497 6.957497 17520 +asb 1 1 6.957497 6.957497 17521 +objectthrough 1 1 6.957497 6.957497 17522 +counterpart 1 1 6.957497 6.957497 17523 +callswil 1 1 6.957497 6.957497 17524 +thatwish 1 1 6.957497 6.957497 17525 +datacontain 1 1 6.957497 6.957497 17526 +bothnew 1 1 6.957497 6.957497 17527 +componentof 1 1 6.957497 6.957497 17528 +morestructur 1 1 6.957497 6.957497 17529 +rleas 1 1 6.957497 6.957497 17530 +completeimplement 1 1 6.957497 6.957497 17531 +tosolari 1 1 6.957497 6.957497 17532 +andpentium 1 1 6.957497 6.957497 17533 +atftp 1 1 6.957497 6.957497 17534 +liststher 1 1 6.957497 6.957497 17535 +usebi 1 1 6.957497 6.957497 17536 +madisonc 1 1 6.957497 6.957497 17537 +unmoder 1 1 6.957497 6.957497 17538 +unlikelyev 1 1 6.957497 6.957497 17539 +isalreadi 1 1 6.957497 6.957497 17540 +belowfor 1 1 6.957497 6.957497 17541 +sentwhen 1 1 6.957497 6.957497 17542 +beingpost 1 1 6.957497 6.957497 17543 +yourrepli 1 1 6.957497 6.957497 17544 +maysubscrib 1 1 6.957497 6.957497 17545 +existenceof 1 1 6.957497 6.957497 17546 +whenit 1 1 6.957497 6.957497 17547 +yoursubscript 1 1 6.957497 6.957497 17548 +conceal 1 1 6.957497 6.957497 17549 +subscriberscannot 1 1 6.957497 6.957497 17550 +specialmessag 1 1 6.957497 6.957497 17551 +sendthi 1 1 6.957497 6.957497 17552 +unsubscrib 1 1 6.957497 6.957497 17553 +messageshould 1 1 6.957497 6.957497 17554 +helplast 1 1 6.957497 6.957497 17555 +nhall 1 1 6.957497 6.957497 17556 +footnot 1 1 6.957497 6.957497 17557 +odlshor 1 1 6.957497 6.957497 17558 +modelidl 1 1 6.957497 6.957497 17559 +odlar 1 1 6.957497 6.957497 17560 +stabilizesw 1 1 6.957497 6.957497 17561 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html new file mode 100644 index 00000000..90c172e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~agupta^agupta.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +madison 3 165 1.791759 5.375277 55 +wisconsin 1 169 1.791759 1.791759 54 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +find 1 111 2.197225 2.197225 111 +stuff 1 87 2.484907 2.484907 171 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +gupta 2 12 4.465908 8.931816 1241 +avenu 1 12 4.465908 4.465908 1277 +newspap 1 12 4.465908 4.465908 1280 +whereabout 1 4 5.568345 5.568345 3078 +abhinav 2 3 5.857933 11.715866 3428 +agupta 1 3 5.857933 5.857933 3429 +kendal 1 2 6.263398 6.263398 6085 +residenceoffic 1 1 6.957497 6.957497 17562 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html new file mode 100644 index 00000000..5482c76b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~alain^alain.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +click 1 142 1.945910 1.945910 78 +pictur 1 89 2.397895 2.397895 160 +live 1 40 3.258097 3.258097 451 +ever 1 19 4.007333 4.007333 872 +larger 1 7 5.010635 5.010635 1875 +largest 1 7 5.010635 5.010635 1858 +alain 3 2 6.263398 18.790194 6086 +pagealain 1 1 6.957497 6.957497 17563 +carnivor 1 1 6.957497 6.957497 17564 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html new file mode 100644 index 00000000..e95bf546 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~allex^allex.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 6 571 0.000000 0.000000 5 +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +program 1 374 0.693147 0.693147 7 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +group 2 183 1.609438 3.218876 36 +wisconsin 7 169 1.791759 12.542313 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +relat 1 139 1.945910 1.945910 68 +machin 2 129 2.079442 4.158884 95 +dayton 1 119 2.079442 2.079442 104 +learn 2 86 2.484907 4.969814 170 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +state 1 76 2.564949 2.564949 207 +intellig 4 72 2.639057 10.556228 225 +artifici 1 63 2.772589 2.772589 280 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +neural 1 30 3.555348 3.555348 578 +departmentunivers 1 24 3.761200 3.761200 711 +sequenc 1 23 3.806662 3.806662 734 +biologi 5 15 4.248495 21.242475 1049 +train 1 14 4.317488 4.317488 1066 +edutelephon 1 10 4.653960 4.653960 1473 +purdu 1 10 4.653960 4.653960 1466 +molecular 3 7 5.010635 15.031905 1887 +jude 1 6 5.164786 5.164786 2123 +fold 1 4 5.568345 5.568345 2615 +allex 3 2 6.263398 18.790194 6087 +ismb 3 2 6.263398 18.790194 5834 +carolyn 2 2 6.263398 12.526796 6088 +studentbiotechnolog 1 1 6.957497 6.957497 17565 +traineecomput 1 1 6.957497 6.957497 17566 +shavlikinterest 1 1 6.957497 6.957497 17567 +protein 1 1 6.957497 6.957497 17568 +networkseduc 1 1 6.957497 6.957497 17569 +madisonb 1 1 6.957497 6.957497 17570 +universityb 1 1 6.957497 6.957497 17571 +mankato 1 1 6.957497 6.957497 17572 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html new file mode 100644 index 00000000..4dfdf4be --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amir^amir.html @@ -0,0 +1,183 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +program 4 374 0.693147 2.772588 7 +interest 3 384 0.693147 2.079441 11 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +cornel 1 215 1.386294 1.386294 23 +group 2 183 1.609438 3.218876 36 +updat 1 191 1.609438 1.609438 41 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +implement 1 152 1.791759 1.791759 52 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +parallel 1 169 1.791759 1.791759 60 +algorithm 1 162 1.791759 1.791759 57 +read 1 154 1.791759 1.791759 47 +like 2 132 1.945910 3.891820 81 +file 1 132 1.945910 1.945910 70 +problem 1 147 1.945910 1.945910 75 +area 1 144 1.945910 1.945910 80 +model 1 145 1.945910 1.945910 69 +perform 1 143 1.945910 1.945910 74 +architectur 1 139 1.945910 1.945910 77 +compil 2 122 2.079442 4.158884 96 +analysi 2 124 2.079442 4.158884 98 +topic 1 114 2.197225 2.197225 110 +look 1 107 2.197225 2.197225 115 +find 1 111 2.197225 2.197225 111 +theori 1 111 2.197225 2.197225 127 +advanc 1 99 2.302585 2.302585 130 +peopl 1 96 2.302585 2.302585 132 +associ 1 93 2.397895 2.397895 151 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +novemb 1 81 2.484907 2.484907 179 +thing 1 84 2.484907 2.484907 189 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +resum 1 79 2.564949 2.564949 217 +method 1 80 2.564949 2.564949 213 +master 1 76 2.564949 2.564949 216 +optim 1 79 2.564949 2.564949 197 +good 1 77 2.564949 2.564949 200 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +solv 1 73 2.639057 2.639057 234 +write 1 72 2.639057 2.639057 222 +david 1 71 2.639057 2.639057 232 +degre 2 69 2.708050 5.416100 259 +practic 1 70 2.708050 2.708050 246 +copi 1 63 2.772589 2.772589 284 +polici 1 64 2.772589 2.772589 279 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +think 2 57 2.890372 5.780744 314 +point 1 58 2.890372 2.890372 319 +index 1 56 2.890372 2.890372 309 +three 1 54 2.944439 2.944439 330 +talk 1 53 2.944439 2.944439 336 +week 1 52 2.995732 2.995732 343 +advisor 1 51 2.995732 2.995732 355 +much 1 52 2.995732 2.995732 349 +friend 4 48 3.044522 12.178088 376 +set 1 50 3.044522 3.044522 361 +physic 2 47 3.091042 6.182084 377 +get 1 46 3.091042 3.091042 380 +featur 1 46 3.091042 3.091042 386 +better 1 45 3.135494 3.135494 401 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +linear 1 41 3.218876 3.218876 431 +live 1 40 3.258097 3.258097 451 +author 1 39 3.258097 3.258097 450 +seminar 2 38 3.295837 6.591674 470 +return 1 34 3.401197 3.401197 502 +go 2 33 3.433987 6.867974 529 +articl 1 33 3.433987 3.433987 530 +depend 1 29 3.583519 3.583519 583 +enhanc 1 26 3.688879 3.688879 644 +never 1 25 3.737670 3.737670 671 +magazin 1 24 3.761200 3.761200 704 +watch 1 21 3.912023 3.912023 789 +love 1 21 3.912023 3.912023 804 +leav 1 21 3.912023 3.912023 772 +minut 1 20 3.951244 3.951244 810 +five 1 19 4.007333 4.007333 841 +beauti 2 18 4.060443 8.120886 912 +regist 1 17 4.110874 4.110874 938 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +score 1 15 4.248495 4.248495 1017 +went 1 12 4.465908 4.465908 1279 +multiscalar 2 8 4.875197 9.750394 1783 +partner 1 8 4.875197 4.875197 1648 +parti 1 8 4.875197 4.875197 1676 +vallei 1 7 5.010635 5.010635 1959 +shot 1 7 5.010635 5.010635 1898 +yale 2 6 5.164786 10.329572 2003 +truth 1 6 5.164786 5.164786 2179 +sohi 1 6 5.164786 5.164786 2237 +cat 1 6 5.164786 5.164786 2194 +presid 1 6 5.164786 5.164786 2196 +promis 1 6 5.164786 5.164786 2037 +guri 1 5 5.347108 5.347108 2578 +girlfriend 1 5 5.347108 5.347108 2579 +everybodi 1 5 5.347108 5.347108 2517 +gui 1 5 5.347108 5.347108 2573 +kid 1 5 5.347108 5.347108 2516 +arch 2 4 5.568345 11.136690 2995 +metal 1 4 5.568345 5.568345 3079 +soul 1 4 5.568345 5.568345 2907 +drew 1 4 5.568345 5.568345 2980 +amir 5 3 5.857933 29.289665 3850 +super 3 3 5.857933 17.573799 3918 +preprocessor 1 3 5.857933 5.857933 3844 +detector 1 3 5.857933 5.857933 3745 +allevi 1 3 5.857933 5.857933 3643 +recip 1 3 5.857933 5.857933 3668 +terri 1 3 5.857933 5.857933 3264 +carbon 1 3 5.857933 5.857933 3804 +vagu 1 3 5.857933 5.857933 3393 +roth 2 2 6.263398 12.526796 6089 +out 2 2 6.263398 12.526796 6090 +barb 2 2 6.263398 12.526796 6058 +delphi 1 2 6.263398 6.263398 4192 +airport 1 2 6.263398 6.263398 5962 +curli 1 2 6.263398 6.263398 5691 +fri 1 2 6.263398 6.263398 5844 +charli 1 2 6.263398 6.263398 5905 +regress 1 2 6.263398 6.263398 4501 +weird 1 2 6.263398 6.263398 5503 +subba 1 2 6.263398 6.263398 6091 +officem 1 2 6.263398 6.263398 6092 +wierd 1 2 6.263398 6.263398 6093 +marci 2 1 6.957497 13.914994 17573 +maven 1 1 6.957497 6.957497 17574 +erin 1 1 6.957497 6.957497 17575 +occasionali 1 1 6.957497 6.957497 17576 +cvte 1 1 6.957497 6.957497 17577 +deleg 1 1 6.957497 6.957497 17578 +existencei 1 1 6.957497 6.957497 17579 +nail 1 1 6.957497 6.957497 17580 +lafollett 1 1 6.957497 6.957497 17581 +meantim 1 1 6.957497 6.957497 17582 +wacki 1 1 6.957497 6.957497 17583 +eggplant 1 1 6.957497 6.957497 17584 +daddi 1 1 6.957497 6.957497 17585 +titanium 1 1 6.957497 6.957497 17586 +screw 1 1 6.957497 6.957497 17587 +desi 1 1 6.957497 6.957497 17588 +relaford 1 1 6.957497 6.957497 17589 +mulholland 1 1 6.957497 6.957497 17590 +oxygen 1 1 6.957497 6.957497 17591 +dioxid 1 1 6.957497 6.957497 17592 +whack 1 1 6.957497 6.957497 17593 +scaryarea 1 1 6.957497 6.957497 17594 +rabid 1 1 6.957497 6.957497 17595 +interestth 1 1 6.957497 6.957497 17596 +hmmm 1 1 6.957497 6.957497 17597 +handyinformatik 1 1 6.957497 6.957497 17598 +madcat 1 1 6.957497 6.957497 17599 +sportslin 1 1 6.957497 6.957497 17600 +philli 1 1 6.957497 6.957497 17601 +ickyth 1 1 6.957497 6.957497 17602 +kemin 1 1 6.957497 6.957497 17603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html new file mode 100644 index 00000000..5024bb9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ammons^ammons.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html new file mode 100644 index 00000000..34dbff20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~amos^amos.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 3 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +inform 2 412 0.693147 1.386294 8 +research 2 431 0.693147 1.386294 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +group 3 183 1.609438 4.828314 36 +list 2 201 1.609438 3.218876 39 +public 2 202 1.609438 3.218876 43 +includ 2 208 1.609438 3.218876 42 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +data 1 170 1.791759 1.791759 49 +read 1 154 1.791759 1.791759 47 +avail 1 169 1.791759 1.791759 48 +click 3 142 1.945910 5.837730 78 +file 3 132 1.945910 5.837730 70 +area 1 144 1.945910 1.945910 80 +analysi 3 124 2.079442 6.238326 98 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +provid 1 121 2.079442 2.079442 94 +theori 4 111 2.197225 8.788900 127 +version 3 113 2.197225 6.591675 122 +site 3 106 2.197225 6.591675 119 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +pleas 1 113 2.197225 2.197225 114 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +present 3 91 2.397895 7.193685 145 +homepag 2 93 2.397895 4.795790 148 +search 1 95 2.397895 2.397895 155 +associ 1 93 2.397895 2.397895 151 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +comment 1 93 2.397895 2.397895 146 +activ 3 84 2.484907 7.454721 182 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +want 1 79 2.564949 2.564949 199 +line 2 75 2.639057 5.278114 231 +summari 1 73 2.639057 2.639057 237 +view 2 70 2.708050 5.416100 254 +order 1 69 2.708050 2.708050 249 +main 1 67 2.708050 2.708050 256 +goal 1 66 2.708050 2.708050 250 +function 1 62 2.772589 2.772589 275 +copi 1 63 2.772589 2.772589 284 +handout 1 64 2.772589 2.772589 263 +abstract 1 62 2.772589 2.772589 276 +variou 2 56 2.890372 5.780744 317 +space 1 57 2.890372 2.890372 310 +unix 1 58 2.890372 2.890372 308 +found 2 53 2.944439 5.888878 337 +tabl 1 51 2.995732 2.995732 346 +maintain 1 51 2.995732 2.995732 342 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +netscap 1 44 3.135494 3.135494 395 +futur 1 41 3.218876 3.218876 427 +vita 2 38 3.295837 6.591674 473 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +download 3 36 3.367296 10.101888 489 +short 1 36 3.367296 3.367296 499 +approxim 4 35 3.401197 13.604788 509 +word 1 34 3.401197 3.401197 508 +articl 1 33 3.433987 3.433987 530 +enhanc 1 26 3.688879 3.688879 644 +mine 1 26 3.688879 3.688879 654 +wish 2 24 3.761200 7.522400 692 +compress 2 23 3.806662 7.613324 719 +togeth 1 23 3.806662 3.806662 714 +miscellan 1 23 3.806662 3.806662 731 +recommend 1 22 3.850148 3.850148 737 +basi 1 20 3.951244 3.951244 828 +item 1 19 4.007333 4.007333 856 +offici 1 18 4.060443 4.060443 894 +otherwis 1 17 4.110874 4.110874 922 +choos 1 16 4.174387 4.174387 964 +carl 1 15 4.248495 4.248495 1024 +anonym 1 14 4.317488 4.317488 1100 +invari 1 8 4.875197 4.875197 1748 +univeristi 1 8 4.875197 4.875197 1754 +none 1 7 5.010635 5.010635 1811 +spline 1 6 5.164786 5.164786 2007 +clickher 2 5 5.347108 10.694216 2428 +shift 1 5 5.347108 5.347108 2357 +wavelet 1 4 5.568345 5.568345 2874 +usa 1 4 5.568345 5.568345 3080 +thin 1 3 5.857933 5.857933 3488 +shen 1 3 5.857933 5.857933 3370 +uncompress 1 3 5.857933 5.857933 3177 +boor 1 3 5.857933 5.857933 3482 +ofwisconsin 1 3 5.857933 5.857933 4002 +amo 4 2 6.263398 25.053592 6094 +professordepart 1 2 6.263398 6.263398 5624 +deposit 1 2 6.263398 6.263398 6095 +mailbox 1 2 6.263398 6.263398 6084 +boxsplin 1 1 6.957497 6.957497 17604 +radial 1 1 6.957497 6.957497 17605 +toscatt 1 1 6.957497 6.957497 17606 +multiquadr 1 1 6.957497 6.957497 17607 +plate 1 1 6.957497 6.957497 17608 +splinesthi 1 1 6.957497 6.957497 17609 +linksat 1 1 6.957497 6.957497 17610 +paperaffin 1 1 6.957497 6.957497 17611 +operatorof 1 1 6.957497 6.957497 17612 +zuowei 1 1 6.957497 6.957497 17613 +fromher 1 1 6.957497 6.957497 17614 +directlyfrom 1 1 6.957497 6.957497 17615 +accounther 1 1 6.957497 6.957497 17616 +articlesof 1 1 6.957497 6.957497 17617 +containspostscript 1 1 6.957497 6.957497 17618 +theapproxim 1 1 6.957497 6.957497 17619 +filesconcern 1 1 6.957497 6.957497 17620 +andpubl 1 1 6.957497 6.957497 17621 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html new file mode 100644 index 00000000..eb09bad9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~andyt^andyt.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +andi 1 4 5.568345 5.568345 3081 +pageandi 1 2 6.263398 6.263398 6096 +therber 1 1 6.957497 6.957497 17622 +therberoffic 1 1 6.957497 6.957497 17623 +sphone 1 1 6.957497 6.957497 17624 +andyt 1 1 6.957497 6.957497 17625 +eduzooresumebookmarksapplet 1 1 6.957497 6.957497 17626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html new file mode 100644 index 00000000..81bd5872 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~arvind^arvind.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +world 1 115 2.197225 2.197225 126 +present 1 91 2.397895 2.397895 145 +collect 1 65 2.772589 2.772589 268 +finger 1 52 2.995732 2.995732 354 +log 1 19 4.007333 4.007333 857 +classic 1 14 4.317488 4.317488 1084 +fascin 1 3 5.857933 5.857933 3948 +arvind 3 1 6.957497 20.872491 17627 +ranganathan 2 1 6.957497 13.914994 17628 +workplac 2 1 6.957497 13.914994 17629 +ranga 1 1 6.957497 6.957497 17630 +erstwhil 1 1 6.957497 6.957497 17631 +indiaworld 1 1 6.957497 6.957497 17632 +escher 1 1 6.957497 6.957497 17633 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html new file mode 100644 index 00000000..26240daa --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashisht^ashisht.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +visit 1 63 2.772589 2.772589 288 +undergradu 1 54 2.944439 2.944439 338 +india 1 32 3.465736 3.465736 550 +altern 1 26 3.688879 3.688879 641 +worth 1 11 4.553877 4.553877 1294 +ashish 2 5 5.347108 10.694216 2473 +delhi 1 5 5.347108 5.347108 2530 +whereabout 1 4 5.568345 5.568345 3078 +indianinstitut 1 3 5.857933 5.857933 4003 +fantast 1 3 5.857933 5.857933 3966 +hadmi 1 2 6.263398 6.263398 6097 +canfing 1 2 6.263398 6.263398 6098 +thusoo 1 1 6.957497 6.957497 17634 +iitd 1 1 6.957497 6.957497 17635 +ashisht 1 1 6.957497 6.957497 17636 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html new file mode 100644 index 00000000..fcbac7fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ashraf^ashraf.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +dayton 1 119 2.079442 2.079442 104 +section 2 94 2.397895 4.795790 149 +grade 2 90 2.397895 4.795790 142 +west 1 83 2.484907 2.484907 192 +info 1 85 2.484907 2.484907 176 +june 1 79 2.564949 2.564949 214 +view 2 70 2.708050 5.416100 254 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +juli 1 60 2.833213 2.833213 305 +finger 1 52 2.995732 2.995732 354 +usaphon 1 9 4.753590 4.753590 1600 +ashraf 3 3 5.857933 17.573799 3421 +aboulnaga 2 3 5.857933 11.715866 3426 +edueduc 1 3 5.857933 5.857933 4004 +egypt 2 2 6.263398 12.526796 4856 +desautel 1 2 6.263398 6.263398 4791 +alexandria 4 1 6.957497 27.829988 17637 +pageashraf 1 1 6.957497 6.957497 17638 +aboulnagacomput 1 1 6.957497 6.957497 17639 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html new file mode 100644 index 00000000..66664bb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~atkinson^atkinson.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +gener 1 220 1.386294 1.386294 27 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +seattl 1 120 2.079442 2.079442 103 +back 1 60 2.833213 2.833213 297 +appoint 1 49 3.044522 3.044522 358 +basketbal 1 12 4.465908 4.465908 1289 +tuth 1 9 4.753590 4.753590 1519 +phil 2 5 5.347108 10.694216 2419 +educurr 1 5 5.347108 5.347108 2504 +win 1 3 5.857933 5.857933 3593 +atkinson 3 2 6.263398 18.790194 4722 +ncaa 1 2 6.263398 6.263398 5908 +infooffic 2 1 6.957497 13.914994 17640 +pageucla 1 1 6.957497 6.957497 17641 +bannon 1 1 6.957497 6.957497 17642 +championship 1 1 6.957497 6.957497 17643 +researchsailinghors 1 1 6.957497 6.957497 17644 +ridingscuba 1 1 6.957497 6.957497 17645 +divingc 1 1 6.957497 6.957497 17646 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html new file mode 100644 index 00000000..843ea2f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bach^bach.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +interest 4 384 0.693147 2.772588 11 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +us 2 329 1.098612 2.197224 16 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +algorithm 4 162 1.791759 7.167036 57 +recent 2 167 1.791759 3.583518 58 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +problem 3 147 1.945910 5.837730 75 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +number 7 130 2.079442 14.556094 97 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +theori 4 111 2.197225 8.788900 127 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +code 1 108 2.197225 2.197225 116 +find 1 111 2.197225 2.197225 111 +question 2 91 2.397895 4.795790 141 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +proceed 1 93 2.397895 2.397895 152 +larg 2 82 2.484907 4.969814 168 +info 1 85 2.484907 2.484907 176 +exampl 2 77 2.564949 5.129898 195 +complet 1 77 2.564949 2.564949 208 +effici 2 73 2.639057 5.278114 233 +summari 1 73 2.639057 2.639057 237 +appli 1 71 2.639057 2.639057 226 +test 2 66 2.708050 5.416100 252 +practic 1 70 2.708050 2.708050 246 +complex 2 64 2.772589 5.545178 269 +improv 1 62 2.772589 2.772589 289 +creat 1 63 2.772589 2.772589 277 +simpl 1 60 2.833213 2.833213 298 +juli 1 60 2.833213 2.833213 305 +direct 1 57 2.890372 2.890372 316 +digit 1 52 2.995732 2.995732 348 +telephon 1 50 3.044522 3.044522 373 +without 1 50 3.044522 3.044522 370 +california 1 46 3.091042 3.091042 388 +algebra 1 45 3.135494 3.135494 394 +answer 1 45 3.135494 3.135494 391 +math 1 44 3.135494 3.135494 402 +examin 1 42 3.218876 3.218876 424 +press 1 42 3.218876 3.218876 419 +theoret 2 39 3.258097 6.516194 446 +probabl 1 40 3.258097 3.258097 455 +small 1 39 3.258097 3.258097 447 +annual 1 40 3.258097 3.258097 458 +error 1 40 3.258097 3.258097 449 +vita 1 38 3.295837 3.295837 473 +random 1 34 3.401197 3.401197 511 +least 1 35 3.401197 3.401197 516 +approxim 1 35 3.401197 3.401197 509 +product 1 33 3.433987 3.433987 527 +curriculum 1 33 3.433987 3.433987 535 +given 1 32 3.465736 3.465736 538 +secur 1 30 3.555348 3.555348 577 +usual 1 28 3.610918 3.610918 608 +proc 2 26 3.688879 7.377758 649 +berkelei 1 26 3.688879 3.688879 657 +lead 1 23 3.806662 3.806662 718 +among 1 21 3.912023 3.912023 781 +similar 1 21 3.912023 3.912023 771 +eric 2 19 4.007333 8.014666 870 +prove 1 19 4.007333 4.007333 848 +automata 1 13 4.382027 4.382027 1135 +conf 1 13 4.382027 4.382027 1181 +string 1 11 4.553877 4.553877 1340 +probabilist 1 11 4.553877 4.553877 1343 +volum 1 11 4.553877 4.553877 1347 +cryptographi 1 9 4.753590 4.753590 1512 +transmiss 1 9 4.753590 4.753590 1588 +assumpt 1 9 4.753590 4.753590 1514 +eduto 1 7 5.010635 5.010635 1956 +bach 4 4 5.568345 22.273380 2708 +wit 2 3 5.857933 11.715866 4005 +euler 1 3 5.857933 5.857933 3174 +canadian 1 3 5.857933 5.857933 3508 +condon 1 3 5.857933 5.857933 3309 +prime 2 2 6.263398 12.526796 6099 +designand 1 2 6.263398 6.263398 6100 +functionof 1 2 6.263398 6.263398 5415 +algebraicalgorithm 1 1 6.957497 6.957497 17647 +solvealgebra 1 1 6.957497 6.957497 17648 +onetel 1 1 6.957497 6.957497 17649 +possiblefactor 1 1 6.957497 6.957497 17650 +intrins 1 1 6.957497 6.957497 17651 +forreli 1 1 6.957497 6.957497 17652 +iscomposit 1 1 6.957497 6.957497 17653 +auxiliarynumb 1 1 6.957497 6.957497 17654 +witnessbi 1 1 6.957497 6.957497 17655 +followingnatur 1 1 6.957497 6.957497 17656 +accurateheurist 1 1 6.957497 6.957497 17657 +allowsthi 1 1 6.957497 6.957497 17658 +cnta 1 1 6.957497 6.957497 17659 +glaser 1 1 6.957497 6.957497 17660 +tanguai 1 1 6.957497 6.957497 17661 +shallit 1 1 6.957497 6.957497 17662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html new file mode 100644 index 00000000..3fa56d5e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bart^bart.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 2 340 1.098612 2.197224 18 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +softwar 1 220 1.386294 1.386294 30 +oper 2 180 1.609438 3.218876 34 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +parallel 2 169 1.791759 3.583518 60 +distribut 2 162 1.791759 3.583518 51 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +perform 1 143 1.945910 1.945910 74 +tool 2 117 2.079442 4.158884 93 +dayton 1 119 2.079442 2.079442 104 +introduct 1 126 2.079442 2.079442 87 +spring 1 131 2.079442 2.079442 88 +advanc 1 99 2.302585 2.302585 130 +technic 1 100 2.302585 2.302585 140 +follow 1 92 2.397895 2.397895 143 +center 1 88 2.397895 2.397895 158 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +internet 1 83 2.484907 2.484907 186 +symposium 1 72 2.639057 2.639057 238 +undergradu 1 54 2.944439 2.944439 338 +streetmadison 1 38 3.295837 3.295837 474 +seminar 1 38 3.295837 3.295837 470 +random 1 34 3.401197 3.401197 511 +departmentunivers 1 24 3.761200 3.761200 711 +honor 1 23 3.806662 3.806662 729 +famili 1 23 3.806662 3.806662 735 +director 1 22 3.850148 3.850148 767 +offici 1 18 4.060443 4.060443 894 +miller 2 17 4.110874 8.221748 949 +convent 1 14 4.317488 4.317488 1072 +bart 2 9 4.753590 9.507180 1559 +paradyn 1 9 4.753590 4.753590 1614 +frank 1 9 4.753590 4.753590 1568 +lloyd 1 6 5.164786 5.164786 2103 +advisori 1 6 5.164786 5.164786 2148 +barton 1 5 5.347108 5.347108 2371 +professorcomput 1 3 5.857933 5.857933 3714 +usath 1 2 6.263398 6.263398 6056 +wright 1 2 6.263398 6.263398 5177 +fuzz 1 1 6.957497 6.957497 17663 +testingteach 1 1 6.957497 6.957497 17664 +graduatesprofession 1 1 6.957497 6.957497 17665 +monona 1 1 6.957497 6.957497 17666 +terrac 1 1 6.957497 6.957497 17667 +groupperson 1 1 6.957497 6.957497 17668 +photosbart 1 1 6.957497 6.957497 17669 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html new file mode 100644 index 00000000..3327975b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ben^ben.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +hour 2 165 1.791759 3.583518 46 +wisconsin 1 169 1.791759 1.791759 54 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +look 1 107 2.197225 2.197225 115 +west 1 83 2.484907 2.484907 192 +internet 1 83 2.484907 2.484907 186 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +street 1 63 2.772589 2.772589 293 +wednesdai 1 64 2.772589 2.772589 261 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +fridai 1 44 3.135494 3.135494 390 +word 1 34 3.401197 3.401197 508 +someth 1 31 3.496508 3.496508 554 +ultim 1 17 4.110874 4.110874 943 +miron 1 14 4.317488 4.317488 1110 +readabl 1 12 4.465908 4.465908 1258 +benjamin 1 11 4.553877 4.553877 1296 +teitelbaum 1 6 5.164786 5.164786 2102 +garbag 1 6 5.164786 5.164786 1986 +hyper 1 5 5.347108 5.347108 2435 +usaben 1 1 6.957497 6.957497 17670 +edursumquinc 1 1 6.957497 6.957497 17671 +gamezillion 1 1 6.957497 6.957497 17672 +bookmarksspr 1 1 6.957497 6.957497 17673 +dbseminar 1 1 6.957497 6.957497 17674 +osseminar 1 1 6.957497 6.957497 17675 +condormeet 1 1 6.957497 6.957497 17676 +plseminar 1 1 6.957497 6.957497 17677 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html new file mode 100644 index 00000000..b9d6d8b9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bestor^bestor.html @@ -0,0 +1,207 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 15 775 0.000000 0.000000 2 +scienc 7 640 0.000000 0.000000 4 +home 6 672 0.000000 0.000000 1 +univers 5 571 0.000000 0.000000 5 +page 4 705 0.000000 0.000000 3 +program 11 374 0.693147 7.624617 7 +research 4 431 0.693147 2.772588 10 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 10 340 1.098612 10.986120 18 +student 5 343 1.098612 5.493060 19 +us 4 329 1.098612 4.394448 16 +cours 4 273 1.098612 4.394448 15 +engin 4 297 1.098612 4.394448 20 +current 1 284 1.098612 1.098612 21 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +languag 5 227 1.386294 6.931470 26 +wisc 4 242 1.386294 5.545176 33 +mail 2 238 1.386294 2.772588 22 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +softwar 1 220 1.386294 1.386294 30 +group 3 183 1.609438 4.828314 36 +modifi 1 178 1.609438 1.609438 35 +madison 5 165 1.791759 8.958795 55 +wisconsin 3 169 1.791759 5.375277 54 +base 3 165 1.791759 5.375277 50 +algorithm 3 162 1.791759 5.375277 57 +data 1 170 1.791759 1.791759 49 +applic 1 170 1.791759 1.791759 56 +problem 9 147 1.945910 17.513190 75 +model 5 145 1.945910 9.729550 69 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +postscript 3 131 2.079442 6.238326 90 +machin 2 129 2.079442 4.158884 95 +high 2 130 2.079442 4.158884 101 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +spring 1 131 2.079442 2.079442 88 +introduct 1 126 2.079442 2.079442 87 +structur 5 106 2.197225 10.986125 105 +world 3 115 2.197225 6.591675 126 +teach 2 108 2.197225 4.394450 112 +mathemat 2 108 2.197225 4.394450 123 +assist 1 112 2.197225 2.197225 113 +make 1 111 2.197225 2.197225 120 +instructor 1 108 2.197225 2.197225 107 +techniqu 6 99 2.302585 13.815510 138 +advanc 2 99 2.302585 4.605170 130 +need 1 98 2.302585 2.302585 135 +section 6 94 2.397895 14.387370 149 +imag 4 91 2.397895 9.591580 161 +call 1 91 2.397895 2.397895 153 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +search 1 95 2.397895 2.397895 155 +wide 2 84 2.484907 4.969814 185 +requir 2 81 2.484907 4.969814 167 +environ 2 84 2.484907 4.969814 177 +school 2 84 2.484907 4.969814 188 +west 1 83 2.484907 2.484907 192 +librari 1 87 2.484907 2.484907 181 +learn 1 86 2.484907 2.484907 170 +start 1 83 2.484907 2.484907 173 +internet 1 83 2.484907 2.484907 186 +resum 1 79 2.564949 2.564949 217 +orient 1 80 2.564949 2.564949 205 +solv 5 73 2.639057 13.195285 234 +servic 1 72 2.639057 2.639057 236 +addit 1 74 2.639057 2.639057 228 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +materi 1 75 2.639057 2.639057 221 +write 1 72 2.639057 2.639057 222 +logic 1 71 2.639057 2.639057 230 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +receiv 1 66 2.708050 2.708050 244 +experi 2 64 2.772589 5.545178 283 +street 1 63 2.772589 2.772589 293 +abstract 1 62 2.772589 2.772589 276 +import 1 65 2.772589 2.772589 282 +result 1 65 2.772589 2.772589 281 +prof 1 64 2.772589 2.772589 273 +virtual 1 62 2.772589 2.772589 285 +artifici 1 63 2.772589 2.772589 280 +copi 1 63 2.772589 2.772589 284 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +simpl 1 60 2.833213 2.833213 298 +colleg 1 61 2.833213 2.833213 300 +point 3 58 2.890372 8.671116 319 +explor 3 58 2.890372 8.671116 324 +major 2 56 2.890372 5.780744 315 +cover 2 55 2.944439 5.888878 329 +instruct 2 53 2.944439 5.888878 332 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +finger 1 52 2.995732 2.995732 354 +advisor 1 51 2.995732 2.995732 355 +telephon 2 50 3.044522 6.089044 373 +basic 2 50 3.044522 6.089044 360 +pointer 1 48 3.044522 3.044522 368 +done 1 47 3.091042 3.091042 381 +algebra 1 45 3.135494 3.135494 394 +vision 4 41 3.218876 12.875504 430 +examin 2 42 3.218876 6.437752 424 +http 1 41 3.218876 3.218876 420 +howev 1 41 3.218876 3.218876 422 +multipl 1 39 3.258097 3.258097 453 +error 1 40 3.258097 3.258097 449 +littl 1 39 3.258097 3.258097 454 +vita 1 38 3.295837 3.295837 473 +credit 1 38 3.295837 3.295837 460 +open 1 38 3.295837 3.295837 469 +robot 4 36 3.367296 13.469184 497 +procedur 1 36 3.367296 3.367296 488 +copyright 1 36 3.367296 3.367296 495 +concurr 2 34 3.401197 6.802394 501 +approxim 1 35 3.401197 3.401197 509 +least 1 35 3.401197 3.401197 516 +survei 1 35 3.401197 3.401197 513 +statist 1 35 3.401197 3.401197 521 +next 1 34 3.401197 3.401197 517 +taught 2 33 3.433987 6.867974 526 +curriculum 1 33 3.433987 3.433987 535 +within 1 33 3.433987 3.433987 525 +dissert 2 32 3.465736 6.931472 549 +transform 2 32 3.465736 6.931472 542 +given 1 32 3.465736 3.465736 538 +posit 4 31 3.496508 13.986032 552 +titl 1 31 3.496508 3.496508 556 +exist 1 30 3.555348 3.555348 569 +specifi 1 30 3.555348 3.555348 568 +intend 3 28 3.610918 10.832754 599 +administr 1 27 3.637586 3.637586 628 +determin 1 27 3.637586 3.637586 630 +enabl 1 26 3.688879 3.688879 655 +accur 1 25 3.737670 3.737670 680 +motion 2 24 3.761200 7.522400 699 +honor 1 23 3.806662 3.806662 729 +instead 1 22 3.850148 3.850148 756 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +navig 2 21 3.912023 7.824046 796 +prepar 2 20 3.951244 7.902488 824 +entir 2 20 3.951244 7.902488 811 +geometr 1 19 4.007333 4.007333 852 +assum 1 19 4.007333 4.007333 845 +lyco 1 19 4.007333 4.007333 871 +minim 1 18 4.060443 4.060443 887 +fortran 6 15 4.248495 25.490970 1027 +drive 1 15 4.248495 4.248495 1052 +club 1 15 4.248495 4.248495 1058 +scene 4 14 4.317488 17.269952 1114 +camera 1 14 4.317488 4.317488 1115 +primarili 2 13 4.382027 8.764054 1185 +essenti 1 13 4.382027 4.382027 1137 +charl 1 13 4.382027 4.382027 1149 +optic 1 12 4.465908 4.465908 1221 +realiti 1 12 4.465908 4.465908 1272 +pascal 1 12 4.465908 4.465908 1213 +pagewelcom 1 11 4.553877 4.553877 1344 +keyword 1 11 4.553877 4.553877 1356 +perspect 1 10 4.653960 4.653960 1437 +prior 1 10 4.653960 4.653960 1438 +observ 3 9 4.753590 14.260770 1578 +assumpt 2 9 4.753590 9.507180 1514 +minimum 1 9 4.753590 4.753590 1555 +occur 1 9 4.753590 4.753590 1572 +dyer 1 9 4.753590 4.753590 1573 +sensit 1 8 4.875197 4.875197 1726 +dimens 2 7 5.010635 10.021270 1930 +elementari 2 7 5.010635 10.021270 1825 +bestor 8 6 5.164786 41.318288 2099 +recov 1 6 5.164786 5.164786 2235 +restrict 1 6 5.164786 5.164786 2129 +constrain 1 6 5.164786 5.164786 2042 +gareth 3 5 5.347108 16.041324 2392 +rigid 2 5 5.347108 10.694216 2432 +unknown 1 5 5.347108 5.347108 2318 +consent 1 5 5.347108 5.347108 2389 +invers 4 4 5.568345 22.273380 2764 +observatori 1 4 5.568345 5.568345 3070 +projector 4 3 5.857933 23.431732 3409 +edueduc 1 3 5.857933 5.857933 4004 +coursework 1 3 5.857933 5.857933 3588 +duti 1 3 5.857933 5.857933 3317 +prereq 1 3 5.857933 5.857933 3178 +wiscinfo 1 3 5.857933 5.857933 3106 +hoofer 1 2 6.263398 6.263398 6101 +out 1 2 6.263398 6.263398 6090 +nextstep 1 2 6.263398 6.263398 6102 +zealand 3 1 6.957497 20.872491 17678 +massei 2 1 6.957497 13.914994 17679 +pagegareth 1 1 6.957497 6.957497 17680 +dpl 1 1 6.957497 6.957497 17681 +dacc 1 1 6.957497 6.957497 17682 +nois 1 1 6.957497 6.957497 17683 +tradition 1 1 6.957497 6.957497 17684 +intersect 1 1 6.957497 6.957497 17685 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html new file mode 100644 index 00000000..3795739b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~beyer^beyer.html @@ -0,0 +1,34 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +project 2 340 1.098612 2.197224 18 +cours 2 273 1.098612 2.197224 15 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +graduat 2 215 1.386294 2.772588 31 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +west 1 83 2.484907 2.484907 192 +local 1 55 2.944439 2.944439 334 +undergradu 1 54 2.944439 2.944439 338 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +ramakrishnan 1 16 4.174387 4.174387 972 +raghu 1 12 4.465908 4.465908 1212 +kevin 1 9 4.753590 4.753590 1482 +assistantdepart 1 8 4.875197 4.875197 1784 +coral 1 5 5.347108 5.347108 2538 +beyer 2 2 6.263398 12.526796 6103 +caution 1 2 6.263398 6.263398 4754 +pagekevin 1 1 6.957497 6.957497 17686 +beyerbey 1 1 6.957497 6.957497 17687 +researchresearch 1 1 6.957497 6.957497 17688 +coursesinstruct 1 1 6.957497 6.957497 17689 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html new file mode 100644 index 00000000..2555e4a3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bezenek^bezenek.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +offic 1 299 1.098612 1.098612 13 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +oper 1 180 1.609438 1.609438 34 +phone 2 175 1.791759 3.583518 45 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +advanc 1 99 2.302585 2.302585 130 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +html 1 75 2.639057 2.639057 235 +window 2 68 2.708050 5.416100 242 +street 1 63 2.772589 2.772589 293 +back 1 60 2.833213 2.833213 297 +point 1 58 2.890372 2.890372 319 +past 1 42 3.218876 3.218876 428 +press 1 42 3.218876 3.218876 419 +cach 1 41 3.218876 3.218876 432 +microsoft 1 38 3.295837 3.295837 468 +express 1 32 3.465736 3.465736 540 +actual 1 28 3.610918 3.610918 604 +packag 1 28 3.610918 3.610918 614 +consist 1 26 3.688879 3.688879 651 +size 1 23 3.806662 3.806662 713 +miller 1 17 4.110874 4.110874 949 +todd 1 15 4.248495 4.248495 1051 +introduc 1 13 4.382027 4.382027 1139 +faster 1 11 4.553877 4.553877 1323 +bart 1 9 4.753590 4.753590 1559 +microprocessor 1 7 5.010635 5.010635 1808 +yeah 1 6 5.164786 5.164786 2195 +locomot 1 2 6.263398 6.263398 5807 +skew 1 2 6.263398 6.263398 6057 +bezenek 3 1 6.957497 20.872491 17690 +pith 2 1 6.957497 13.914994 17691 +toddm 1 1 6.957497 6.957497 17692 +cpu 1 1 6.957497 6.957497 17693 +_great 1 1 6.957497 6.957497 17694 +present_ 1 1 6.957497 6.957497 17695 +uregina 1 1 6.957497 6.957497 17696 +bayko 1 1 6.957497 6.957497 17697 +squeez 1 1 6.957497 6.957497 17698 +skateboard 1 1 6.957497 6.957497 17699 +helen 1 1 6.957497 6.957497 17700 +custer 1 1 6.957497 6.957497 17701 +_insid 1 1 6.957497 6.957497 17702 +pithi 1 1 6.957497 6.957497 17703 +abound 1 1 6.957497 6.957497 17704 +edubezenek 1 1 6.957497 6.957497 17705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html new file mode 100644 index 00000000..e561921d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bockrath^bockrath.html @@ -0,0 +1,55 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 1 443 0.693147 0.693147 6 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +graduat 2 215 1.386294 2.772588 31 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +hour 1 165 1.791759 1.791759 46 +distribut 1 162 1.791759 1.791759 51 +click 1 142 1.945910 1.945910 78 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +schedul 1 119 2.079442 2.079442 85 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +make 1 111 2.197225 2.197225 120 +site 1 106 2.197225 2.197225 119 +section 4 94 2.397895 9.591580 149 +pictur 1 89 2.397895 2.397895 160 +grade 1 90 2.397895 2.397895 142 +comment 1 93 2.397895 2.397895 146 +info 2 85 2.484907 4.969814 176 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +mondai 1 77 2.564949 2.564949 206 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +simul 1 66 2.708050 2.708050 255 +wednesdai 1 64 2.772589 2.772589 261 +back 1 60 2.833213 2.833213 297 +anoth 1 45 3.135494 3.135494 408 +review 1 42 3.218876 3.218876 425 +word 1 34 3.401197 3.401197 508 +held 1 28 3.610918 3.610918 600 +dai 1 22 3.850148 3.850148 753 +quiz 1 16 4.174387 4.174387 990 +anywai 1 15 4.248495 4.248495 1047 +speech 1 12 4.465908 4.465908 1222 +neat 1 12 4.465908 4.465908 1263 +averag 1 6 5.164786 5.164786 2098 +jpeg 1 6 5.164786 5.164786 2053 +condor 1 5 5.347108 5.347108 2577 +nathan 2 4 5.568345 11.136690 2794 +bockrath 4 3 5.857933 23.431732 3420 +viru 2 2 6.263398 12.526796 4782 +nate 1 2 6.263398 6.263398 5720 +macro 1 2 6.263398 6.263398 5686 +pageoth 1 2 6.263398 6.263398 6104 +pageback 1 1 6.957497 6.957497 17706 +oraclesend 1 1 6.957497 6.957497 17707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html new file mode 100644 index 00000000..0d255158 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~bolo^bolo.html @@ -0,0 +1,289 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 7 380 0.693147 4.852029 9 +system 6 443 0.693147 4.158882 6 +depart 2 457 0.693147 1.386294 12 +inform 1 412 0.693147 0.693147 8 +time 5 293 1.098612 5.493060 17 +engin 3 297 1.098612 3.295836 20 +last 1 314 1.098612 1.098612 14 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +softwar 3 220 1.386294 4.158882 30 +design 2 213 1.386294 2.772588 25 +also 2 259 1.386294 2.772588 28 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +develop 3 174 1.791759 5.375277 53 +implement 2 152 1.791759 3.583518 52 +parallel 2 169 1.791759 3.583518 60 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +year 4 148 1.945910 7.783640 84 +like 2 132 1.945910 3.891820 81 +object 2 138 1.945910 3.891820 79 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +databas 2 122 2.079442 4.158884 86 +provid 2 121 2.079442 4.158884 94 +number 1 130 2.079442 2.079442 97 +person 2 111 2.197225 4.394450 117 +place 2 106 2.197225 4.394450 124 +world 2 115 2.197225 4.394450 126 +site 1 106 2.197225 2.197225 119 +part 2 98 2.302585 4.605170 129 +take 2 97 2.302585 4.605170 134 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +associ 3 93 2.397895 7.193685 151 +call 2 91 2.397895 4.795790 153 +mani 2 92 2.397895 4.795790 150 +follow 2 92 2.397895 4.795790 143 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +select 1 91 2.397895 2.397895 154 +thing 6 84 2.484907 14.909442 189 +member 3 84 2.484907 7.454721 165 +internet 2 83 2.484907 4.969814 186 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +solut 1 82 2.484907 2.484907 162 +help 1 83 2.484907 2.484907 175 +school 1 84 2.484907 2.484907 188 +complet 1 77 2.564949 2.564949 208 +appear 1 78 2.564949 2.564949 210 +orient 1 80 2.564949 2.564949 205 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +line 2 75 2.639057 5.278114 231 +appli 1 71 2.639057 2.639057 226 +david 1 71 2.639057 2.639057 232 +addit 1 74 2.639057 2.639057 228 +free 1 73 2.639057 2.639057 224 +meet 1 72 2.639057 2.639057 229 +degre 1 69 2.708050 2.708050 259 +thursdai 1 70 2.708050 2.708050 241 +new 4 64 2.772589 11.090356 262 +creat 1 63 2.772589 2.772589 277 +organ 1 65 2.772589 2.772589 265 +visit 1 63 2.772589 2.772589 288 +import 1 65 2.772589 2.772589 282 +type 1 61 2.833213 2.833213 296 +plai 1 60 2.833213 2.833213 307 +locat 1 59 2.833213 2.833213 303 +unix 1 58 2.890372 2.890372 308 +local 1 55 2.944439 2.944439 334 +much 2 52 2.995732 5.991464 349 +case 1 51 2.995732 2.995732 351 +run 1 51 2.995732 2.995732 347 +right 1 48 3.044522 3.044522 363 +friend 1 48 3.044522 3.044522 376 +done 1 47 3.091042 3.091042 381 +howev 1 41 3.218876 3.218876 422 +realli 1 40 3.258097 3.258097 444 +societi 1 40 3.258097 3.258097 456 +prototyp 1 38 3.295837 3.295837 463 +hand 3 37 3.332205 9.996615 475 +game 1 36 3.367296 3.367296 498 +short 1 36 3.367296 3.367296 499 +everi 2 34 3.401197 6.802394 519 +word 1 34 3.401197 3.401197 508 +either 1 35 3.401197 3.401197 506 +print 1 34 3.401197 3.401197 503 +queri 1 33 3.433987 3.433987 524 +kind 1 32 3.465736 3.465736 541 +scientist 3 31 3.496508 10.489524 560 +often 1 31 3.496508 3.496508 551 +taken 1 31 3.496508 3.496508 555 +storag 1 31 3.496508 3.496508 553 +someth 1 31 3.496508 3.496508 554 +except 1 28 3.610918 3.610918 607 +quit 1 27 3.637586 3.637586 633 +administr 1 27 3.637586 3.637586 628 +though 1 27 3.637586 3.637586 622 +campu 1 27 3.637586 3.637586 623 +rather 2 26 3.688879 7.377758 642 +enjoi 1 26 3.688879 3.688879 660 +experiment 1 26 3.688879 3.688879 645 +although 2 25 3.737670 7.475340 667 +wai 2 25 3.737670 7.475340 662 +notic 1 25 3.737670 3.737670 675 +task 1 25 3.737670 3.737670 678 +store 2 24 3.761200 7.522400 693 +interpret 1 24 3.761200 3.761200 686 +consult 1 24 3.761200 3.761200 687 +alwai 1 24 3.761200 3.761200 691 +methodolog 1 23 3.806662 3.806662 733 +try 2 22 3.850148 7.700296 764 +defin 1 22 3.850148 3.850148 746 +william 1 22 3.850148 3.850148 765 +util 1 21 3.912023 3.912023 774 +voic 1 21 3.912023 3.912023 806 +tell 1 21 3.912023 3.912023 777 +wonder 2 20 3.951244 7.902488 815 +sure 1 20 3.951244 3.951244 813 +kernel 1 20 3.951244 3.951244 825 +longer 1 20 3.951244 3.951244 816 +entir 1 20 3.951244 3.951244 811 +els 2 19 4.007333 8.014666 843 +five 1 19 4.007333 4.007333 841 +histori 1 19 4.007333 4.007333 853 +along 2 18 4.060443 8.120886 878 +thoma 1 18 4.060443 4.060443 901 +seem 1 18 4.060443 4.060443 899 +whole 1 17 4.110874 4.110874 940 +anyth 1 16 4.174387 4.174387 998 +across 1 16 4.174387 4.174387 974 +enough 1 15 4.248495 4.248495 1040 +purchas 1 15 4.248495 4.248495 1030 +drive 1 15 4.248495 4.248495 1052 +goe 1 15 4.248495 4.248495 1044 +draw 1 14 4.317488 4.317488 1086 +comic 1 14 4.317488 4.317488 1103 +role 1 14 4.317488 4.317488 1101 +becam 1 14 4.317488 4.317488 1117 +care 2 13 4.382027 8.764054 1177 +everyon 1 13 4.382027 4.382027 1148 +forth 1 13 4.382027 4.382027 1186 +everyth 1 13 4.382027 4.382027 1169 +dewitt 2 12 4.465908 8.931816 1270 +reader 1 12 4.465908 4.465908 1246 +usenix 1 12 4.465908 4.465908 1240 +shore 2 11 4.553877 9.107754 1377 +road 1 11 4.553877 4.553877 1374 +lake 1 11 4.553877 4.553877 1373 +night 1 11 4.553877 4.553877 1319 +thecomput 1 10 4.653960 4.653960 1408 +drink 2 9 4.753590 9.507180 1607 +occur 1 9 4.753590 4.753590 1572 +departmentof 1 9 4.753590 4.753590 1539 +architect 1 8 4.875197 4.875197 1624 +job 1 8 4.875197 4.875197 1702 +paradis 1 8 4.875197 4.875197 1782 +port 1 8 4.875197 4.875197 1766 +burger 3 7 5.010635 15.031905 1889 +bore 2 7 5.010635 10.021270 1948 +usenet 2 7 5.010635 10.021270 1839 +throughout 1 7 5.010635 5.010635 1871 +parent 2 6 5.164786 10.329572 2204 +beer 2 6 5.164786 10.329572 2216 +sleep 2 6 5.164786 10.329572 2211 +relax 1 6 5.164786 5.164786 2120 +fiction 1 6 5.164786 5.164786 2217 +railroad 1 6 5.164786 5.164786 2161 +put 1 6 5.164786 5.164786 2017 +gate 1 6 5.164786 5.164786 2182 +famou 1 6 5.164786 5.164786 2185 +geograph 1 6 5.164786 5.164786 2236 +whatev 1 6 5.164786 5.164786 2097 +benefit 1 6 5.164786 5.164786 2213 +divers 1 6 5.164786 5.164786 2232 +semi 1 5 5.347108 5.347108 2510 +east 1 5 5.347108 5.347108 2472 +matur 1 5 5.347108 5.347108 2269 +advic 1 5 5.347108 5.347108 2509 +aircraft 2 4 5.568345 11.136690 2872 +moon 1 4 5.568345 5.568345 2991 +haven 1 4 5.568345 5.568345 3037 +hacker 2 3 5.857933 11.715866 3996 +tiger 2 3 5.857933 11.715866 3897 +roll 1 3 5.857933 5.857933 3723 +pai 1 3 5.857933 5.857933 3672 +tremend 1 3 5.857933 5.857933 3453 +insan 1 3 5.857933 5.857933 4006 +beat 1 3 5.857933 5.857933 3840 +gamma 1 3 5.857933 5.857933 3219 +workin 1 3 5.857933 5.857933 3938 +agre 1 3 5.857933 5.857933 4007 +owner 1 3 5.857933 5.857933 3531 +pilot 1 3 5.857933 5.857933 4008 +acquaint 1 3 5.857933 5.857933 3468 +weekend 1 3 5.857933 5.857933 3357 +timeoper 1 2 6.263398 6.263398 4363 +woodwork 1 2 6.263398 6.263398 5463 +brew 1 2 6.263398 6.263398 5988 +disagre 1 2 6.263398 6.263398 6105 +pursuit 1 2 6.263398 6.263398 6048 +fly 1 2 6.263398 6.263398 5937 +stripe 1 2 6.263398 6.263398 6106 +creatur 1 2 6.263398 6.263398 6107 +leap 1 2 6.263398 6.263398 5654 +tovisit 1 2 6.263398 6.263398 4686 +that 1 2 6.263398 6.263398 5111 +differentarchitectur 1 2 6.263398 6.263398 6051 +hord 1 2 6.263398 6.263398 5917 +slowli 1 2 6.263398 6.263398 5363 +pagelast 1 2 6.263398 6.263398 5793 +bolo 4 1 6.957497 27.829988 17708 +uwvax 3 1 6.957497 20.872491 17709 +josef 2 1 6.957497 13.914994 17710 +uucp 2 1 6.957497 13.914994 17711 +essen 2 1 6.957497 13.914994 17712 +hau 2 1 6.957497 13.914994 17713 +bolobologreet 1 1 6.957497 6.957497 17714 +christen 1 1 6.957497 6.957497 17715 +mebolo 1 1 6.957497 6.957497 17716 +bestexplan 1 1 6.957497 6.957497 17717 +bywhat 1 1 6.957497 6.957497 17718 +acomput 1 1 6.957497 6.957497 17719 +shudder 1 1 6.957497 6.957497 17720 +newoper 1 1 6.957497 6.957497 17721 +sameto 1 1 6.957497 6.957497 17722 +myroomm 1 1 6.957497 6.957497 17723 +sublim 1 1 6.957497 6.957497 17724 +thetig 1 1 6.957497 6.957497 17725 +blake 1 1 6.957497 6.957497 17726 +poemtyg 1 1 6.957497 6.957497 17727 +tyger 1 1 6.957497 6.957497 17728 +againin 1 1 6.957497 6.957497 17729 +ahous 1 1 6.957497 6.957497 17730 +isjosef 1 1 6.957497 6.957497 17731 +roadmonona 1 1 6.957497 6.957497 17732 +workwork 1 1 6.957497 6.957497 17733 +banana 1 1 6.957497 6.957497 17734 +grung 1 1 6.957497 6.957497 17735 +perhapssom 1 1 6.957497 6.957497 17736 +othermonth 1 1 6.957497 6.957497 17737 +intosubmiss 1 1 6.957497 6.957497 17738 +andstar 1 1 6.957497 6.957497 17739 +fordav 1 1 6.957497 6.957497 17740 +wiss 1 1 6.957497 6.957497 17741 +themadison 1 1 6.957497 6.957497 17742 +campusof 1 1 6.957497 6.957497 17743 +peninsula 1 1 6.957497 6.957497 17744 +technicalexpertis 1 1 6.957497 6.957497 17745 +newsystem 1 1 6.957497 6.957497 17746 +reviv 1 1 6.957497 6.957497 17747 +oddbal 1 1 6.957497 6.957497 17748 +tasksar 1 1 6.957497 6.957497 17749 +serverbut 1 1 6.957497 6.957497 17750 +mostlyempti 1 1 6.957497 6.957497 17751 +activitiesuwvaxi 1 1 6.957497 6.957497 17752 +svolunt 1 1 6.957497 6.957497 17753 +organizationsi 1 1 6.957497 6.957497 17754 +oftenhav 1 1 6.957497 6.957497 17755 +usersof 1 1 6.957497 6.957497 17756 +aopa 1 1 6.957497 6.957497 17757 +blitz 1 1 6.957497 6.957497 17758 +drinkingwhen 1 1 6.957497 6.957497 17759 +friendsand 1 1 6.957497 6.957497 17760 +loftili 1 1 6.957497 6.957497 17761 +labelledblitz 1 1 6.957497 6.957497 17762 +ofoctoberfest 1 1 6.957497 6.957497 17763 +chud 1 1 6.957497 6.957497 17764 +accumulateda 1 1 6.957497 6.957497 17765 +whatnotof 1 1 6.957497 6.957497 17766 +charad 1 1 6.957497 6.957497 17767 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html new file mode 100644 index 00000000..52b59a47 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~brad^brad.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +page 8 705 0.000000 0.000000 3 +home 6 672 0.000000 0.000000 1 +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +us 1 329 1.098612 1.098612 16 +link 3 247 1.386294 4.158882 24 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +oper 2 180 1.609438 3.218876 34 +network 1 168 1.791759 1.791759 61 +model 1 145 1.945910 1.945910 69 +welcom 1 122 2.079442 2.079442 99 +check 1 115 2.197225 2.197225 118 +world 1 115 2.197225 2.197225 126 +advanc 1 99 2.302585 2.302585 130 +search 2 95 2.397895 4.795790 155 +homepag 1 93 2.397895 2.397895 148 +would 1 67 2.708050 2.708050 251 +much 1 52 2.995732 2.995732 349 +possibl 1 47 3.091042 3.091042 378 +probabl 1 40 3.258097 3.258097 455 +intro 1 17 4.110874 4.110874 915 +brad 2 12 4.465908 8.931816 1264 +bore 1 7 5.010635 5.010635 1948 +athlet 1 7 5.010635 5.010635 1933 +altavista 1 6 5.164786 5.164786 2222 +aim 1 5 5.347108 5.347108 2477 +jazz 1 5 5.347108 5.347108 2527 +thayer 1 3 5.857933 5.857933 3441 +thec 1 3 5.857933 5.857933 3132 +badger 1 3 5.857933 5.857933 3502 +packer 1 3 5.857933 5.857933 3728 +foolish 1 2 6.263398 6.263398 6108 +pepper 1 2 6.263398 6.263398 6013 +pagesom 1 2 6.263398 6.263398 6109 +adress 1 2 6.263398 6.263398 5168 +pageuw 1 2 6.263398 6.263398 6021 +neglect 1 1 6.957497 6.957497 17768 +seminaranywai 1 1 6.957497 6.957497 17769 +beaucoup 1 1 6.957497 6.957497 17770 +boir 1 1 6.957497 6.957497 17771 +enginefind 1 1 6.957497 6.957497 17772 +wideth 1 1 6.957497 6.957497 17773 +duan 1 1 6.957497 6.957497 17774 +mclaughlin 1 1 6.957497 6.957497 17775 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html new file mode 100644 index 00000000..f6782a3b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~breach^breach.html @@ -0,0 +1,65 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +austin 1 168 1.791759 1.791759 63 +implement 1 152 1.791759 1.791759 52 +architectur 1 139 1.945910 1.945910 77 +file 1 132 1.945910 1.945910 70 +dayton 1 119 2.079442 2.079442 104 +intern 2 108 2.197225 4.394450 128 +access 1 102 2.302585 2.302585 136 +west 1 83 2.484907 2.484907 192 +symposium 2 72 2.639057 5.278114 238 +effici 1 73 2.639057 2.639057 233 +septemb 1 65 2.772589 2.772589 274 +pointer 1 48 3.044522 3.044522 368 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +arrai 1 27 3.637586 3.637586 627 +detect 1 26 3.688879 3.688879 646 +scott 4 18 4.060443 16.241772 884 +regist 1 17 4.110874 4.110874 938 +mellon 1 13 4.382027 4.382027 1179 +carnegi 1 12 4.465908 4.465908 1260 +multiscalar 1 8 4.875197 4.875197 1783 +sohi 1 6 5.164786 5.164786 2237 +microarchitectur 1 6 5.164786 5.164786 2238 +pagescott 1 4 5.568345 5.568345 2978 +breach 8 3 5.857933 46.863464 4009 +recreat 1 3 5.857933 5.857933 3990 +anatomi 1 3 5.857933 5.857933 4010 +vijaykumar 1 3 5.857933 5.857933 4011 +gurindar 2 2 6.263398 12.526796 6110 +usatel 1 2 6.263398 6.263398 6111 +educationph 1 2 6.263398 6.263398 6112 +interestscomput 1 2 6.263398 6.263398 6113 +addresseseducationresearch 1 1 6.957497 6.957497 17776 +associatesaddressesscott 1 1 6.957497 6.957497 17777 +breachdepart 1 1 6.957497 6.957497 17778 +advisorguri 1 1 6.957497 6.957497 17779 +sohiresearch 1 1 6.957497 6.957497 17780 +architecturemultiscalarpublicationsmultiscalar 1 1 6.957497 6.957497 17781 +processorsgurindar 1 1 6.957497 6.957497 17782 +vijaykumarnd 1 1 6.957497 6.957497 17783 +processorscott 1 1 6.957497 6.957497 17784 +sohith 1 1 6.957497 6.957497 17785 +errorstodd 1 1 6.957497 6.957497 17786 +sohiconfer 1 1 6.957497 6.957497 17787 +recreationwingsbeersquidtvassociatestodd 1 1 6.957497 6.957497 17788 +austindoug 1 1 6.957497 6.957497 17789 +burgerbabak 1 1 6.957497 6.957497 17790 +falsafialain 1 1 6.957497 6.957497 17791 +kagit 1 1 6.957497 6.957497 17792 +vijaykumarlast 1 1 6.957497 6.957497 17793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html new file mode 100644 index 00000000..4fcd35e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~burnett^burnett.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +fall 2 181 1.609438 3.218876 40 +provid 1 121 2.079442 2.079442 94 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +make 1 111 2.197225 2.197225 120 +sinc 2 90 2.397895 4.795790 159 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +name 2 72 2.639057 5.278114 220 +differ 1 66 2.708050 2.708050 253 +get 1 46 3.091042 3.091042 380 +societi 1 40 3.258097 3.258097 456 +hand 1 37 3.332205 3.332205 475 +bookmark 1 26 3.688879 3.688879 639 +notic 1 25 3.737670 3.737670 675 +brief 1 16 4.174387 4.174387 1001 +hobbi 1 16 4.174387 4.174387 1009 +zhang 1 16 4.174387 4.174387 980 +becam 1 14 4.317488 4.317488 1117 +unfortun 1 13 4.382027 4.382027 1170 +stai 1 12 4.465908 4.465908 1215 +said 1 9 4.753590 4.753590 1571 +poor 1 8 4.875197 4.875197 1736 +perhap 1 8 4.875197 4.875197 1693 +smooth 1 7 5.010635 5.010635 1855 +whenev 1 7 5.010635 5.010635 1883 +wouldn 1 7 5.010635 5.010635 1970 +smile 1 7 5.010635 5.010635 1807 +pool 1 6 5.164786 5.164786 2225 +suni 2 5 5.347108 10.694216 2452 +skin 1 4 5.568345 5.568345 2840 +temporarili 1 3 5.857933 5.857933 3692 +crack 1 3 5.857933 5.857933 3435 +roll 1 3 5.857933 5.857933 3723 +isaac 1 3 5.857933 5.857933 3855 +albani 2 2 6.263398 12.526796 4892 +tragic 1 2 6.263398 6.263398 6114 +theblack 1 2 6.263398 6.263398 5869 +eggleston 1 2 6.263398 6.263398 4581 +bleed 1 1 6.957497 6.957497 17794 +nontrivi 1 1 6.957497 6.957497 17795 +waysher 1 1 6.957497 6.957497 17796 +underst 1 1 6.957497 6.957497 17797 +unadorn 1 1 6.957497 6.957497 17798 +pizza 1 1 6.957497 6.957497 17799 +stinkin 1 1 6.957497 6.957497 17800 +myclass 1 1 6.957497 6.957497 17801 +hypersensit 1 1 6.957497 6.957497 17802 +rockjock 1 1 6.957497 6.957497 17803 +cretin 1 1 6.957497 6.957497 17804 +brood 1 1 6.957497 6.957497 17805 +glare 1 1 6.957497 6.957497 17806 +clenchesfist 1 1 6.957497 6.957497 17807 +knuckl 1 1 6.957497 6.957497 17808 +flightyfemm 1 1 6.957497 6.957497 17809 +razz 1 1 6.957497 6.957497 17810 +asskick 1 1 6.957497 6.957497 17811 +thirdgrad 1 1 6.957497 6.957497 17812 +hardbodi 1 1 6.957497 6.957497 17813 +leatherboi 1 1 6.957497 6.957497 17814 +leer 1 1 6.957497 6.957497 17815 +atm 1 1 6.957497 6.957497 17816 +todayi 1 1 6.957497 6.957497 17817 +giggl 1 1 6.957497 6.957497 17818 +aprostitut 1 1 6.957497 6.957497 17819 +bigotri 1 1 6.957497 6.957497 17820 +pedagodi 1 1 6.957497 6.957497 17821 +goat 1 1 6.957497 6.957497 17822 +refus 1 1 6.957497 6.957497 17823 +claw 1 1 6.957497 6.957497 17824 +sssuuuhhh 1 1 6.957497 6.957497 17825 +mmuuuhhhh 1 1 6.957497 6.957497 17826 +dddduuuuuhhhhh 1 1 6.957497 6.957497 17827 +mmmmuuuhhhh 1 1 6.957497 6.957497 17828 +maaaahhhjaaaaaahhhhh 1 1 6.957497 6.957497 17829 +fffuuuhhhhh 1 1 6.957497 6.957497 17830 +yyyyyyyuuuuuhhhhh 1 1 6.957497 6.957497 17831 +mmmmmuuuuuhhhhhmmmmuuuhhhhh 1 1 6.957497 6.957497 17832 +uuuhhh 1 1 6.957497 6.957497 17833 +uuummmm 1 1 6.957497 6.957497 17834 +uuuhhhh 1 1 6.957497 6.957497 17835 +wwwwwhhhhuuuuuhhhhh 1 1 6.957497 6.957497 17836 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html new file mode 100644 index 00000000..d3559385 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cao^cao.html @@ -0,0 +1,155 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +univers 4 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +system 13 443 0.693147 9.010911 6 +research 6 431 0.693147 4.158882 10 +interest 2 384 0.693147 1.386294 11 +depart 2 457 0.693147 1.386294 12 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +wisc 3 242 1.386294 4.158882 33 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +oper 4 180 1.609438 6.437752 34 +paper 1 205 1.609438 1.609438 38 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +applic 10 170 1.791759 17.917590 56 +parallel 6 169 1.791759 10.750554 60 +recent 3 167 1.791759 5.375277 58 +implement 2 152 1.791759 3.583518 52 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +algorithm 1 162 1.791759 1.791759 57 +file 8 132 1.945910 15.567280 70 +perform 6 143 1.945910 11.675460 74 +professor 1 137 1.945910 1.945910 76 +first 1 140 1.945910 1.945910 71 +architectur 1 139 1.945910 1.945910 77 +high 2 130 2.079442 4.158884 101 +report 2 131 2.079442 4.158884 92 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +schedul 1 119 2.079442 2.079442 85 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +specif 1 106 2.197225 2.197225 106 +techniqu 2 99 2.302585 4.605170 138 +memori 1 101 2.302585 2.302585 139 +advanc 1 99 2.302585 2.302585 130 +access 1 102 2.302585 2.302585 136 +technic 1 100 2.302585 2.302585 140 +proceed 5 93 2.397895 11.989475 152 +present 1 91 2.397895 2.397895 145 +mani 1 92 2.397895 2.397895 150 +control 6 82 2.484907 14.909442 164 +educ 2 86 2.484907 4.969814 191 +resourc 2 81 2.484907 4.969814 172 +west 1 83 2.484907 2.484907 192 +novemb 1 81 2.484907 2.484907 179 +optim 1 79 2.564949 2.564949 197 +appear 1 78 2.564949 2.564949 210 +good 1 77 2.564949 2.564949 200 +summari 1 73 2.639057 2.639057 237 +symposium 1 72 2.639057 2.639057 238 +addit 1 74 2.639057 2.639057 228 +integr 4 67 2.708050 10.832200 245 +simul 1 66 2.708050 2.708050 255 +polici 3 64 2.772589 8.317767 279 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +thesi 1 57 2.890372 2.890372 327 +summer 1 56 2.890372 2.890372 311 +talk 1 53 2.944439 2.944439 336 +investig 2 51 2.995732 5.991464 353 +particular 1 51 2.995732 2.995732 352 +physic 2 47 3.091042 6.182084 377 +cach 9 41 3.218876 28.969884 432 +streetmadison 1 38 3.295837 3.295837 474 +slide 1 38 3.295837 3.295837 467 +prototyp 1 38 3.295837 3.295837 463 +sciencesunivers 1 37 3.332205 3.332205 486 +china 1 37 3.332205 3.332205 487 +respons 1 37 3.332205 3.332205 476 +tech 2 35 3.401197 6.802394 515 +global 1 34 3.401197 3.401197 520 +john 1 33 3.433987 3.433987 532 +extend 1 32 3.465736 3.465736 539 +storag 1 31 3.496508 3.496508 553 +focus 1 29 3.583519 3.583519 584 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +disk 3 22 3.850148 11.550444 747 +alloc 3 20 3.951244 11.853732 821 +kernel 2 20 3.951244 7.902488 825 +department 1 20 3.951244 3.951244 839 +beij 1 19 4.007333 4.007333 876 +princeton 5 15 4.248495 21.242475 1042 +eduphon 1 15 4.248495 4.248495 1060 +decid 1 14 4.317488 4.317488 1075 +karlin 3 13 4.382027 13.146081 1176 +sigmetr 2 13 4.382027 8.764054 1173 +tsinghua 1 13 4.382027 4.382027 1195 +anna 3 12 4.465908 13.397724 1292 +usenix 1 12 4.465908 4.465908 1240 +isca 1 11 4.553877 4.553877 1354 +osdi 2 9 4.753590 9.507180 1534 +significantli 1 9 4.753590 4.753590 1508 +uniprocessor 2 8 4.875197 9.750394 1696 +replac 2 8 4.875197 9.750394 1668 +prefetch 12 6 5.164786 61.977432 2039 +edward 4 6 5.164786 20.659144 2050 +sciencedepart 1 6 5.164786 5.164786 2172 +carefulli 1 6 5.164786 5.164786 2045 +chosen 1 6 5.164786 5.164786 1984 +toc 1 5 5.347108 5.347108 2562 +summarymi 1 5 5.347108 5.347108 2580 +havedevelop 1 4 5.568345 5.568345 2681 +felten 5 3 5.857933 29.289665 3925 +cachingtraci 1 3 5.857933 5.857933 3923 +kimbrel 1 3 5.857933 5.857933 3924 +shorter 1 3 5.857933 5.857933 3998 +raid 1 3 5.857933 5.857933 4012 +aggress 1 3 5.857933 5.857933 3240 +wilk 1 2 6.263398 6.263398 4548 +andpostscript 1 2 6.263398 6.263398 5696 +andparallel 1 2 6.263398 6.263398 6014 +princetonunivers 2 1 6.957497 13.914994 17837 +usacao 1 1 6.957497 6.957497 17838 +cachingacf 1 1 6.957497 6.957497 17839 +tracesrec 1 1 6.957497 6.957497 17840 +papersintegr 1 1 6.957497 6.957497 17841 +schedulingpei 1 1 6.957497 6.957497 17842 +strategiespei 1 1 6.957497 6.957497 17843 +peform 1 1 6.957497 6.957497 17844 +tickertaip 1 1 6.957497 6.957497 17845 +swee 1 1 6.957497 6.957497 17846 +boon 1 1 6.957497 6.957497 17847 +shivakumar 1 1 6.957497 6.957497 17848 +venkataraman 1 1 6.957497 6.957497 17849 +talksslid 1 1 6.957497 6.957497 17850 +improvefil 1 1 6.957497 6.957497 17851 +filecach 1 1 6.957497 6.957497 17852 +individualappl 1 1 6.957497 6.957497 17853 +useit 1 1 6.957497 6.957497 17854 +fairglob 1 1 6.957497 6.957497 17855 +cachereplac 1 1 6.957497 6.957497 17856 +implementationon 1 1 6.957497 6.957497 17857 +demonstratedthat 1 1 6.957497 6.957497 17858 +informationcan 1 1 6.957497 6.957497 17859 +amdevelop 1 1 6.957497 6.957497 17860 +diskarrai 1 1 6.957497 6.957497 17861 +managementproblem 1 1 6.957497 6.957497 17862 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html new file mode 100644 index 00000000..5714eb20 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~carey^carey.html @@ -0,0 +1,285 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 13 443 0.693147 9.010911 6 +work 6 380 0.693147 4.158882 9 +research 5 431 0.693147 3.465735 10 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +project 5 340 1.098612 5.493060 18 +time 3 293 1.098612 3.295836 17 +student 2 343 1.098612 2.197224 19 +current 1 284 1.098612 1.098612 21 +gener 2 220 1.386294 2.772588 27 +design 2 213 1.386294 2.772588 25 +languag 2 227 1.386294 2.772588 26 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +includ 1 208 1.609438 1.609438 42 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +data 7 170 1.791759 12.542313 49 +madison 3 165 1.791759 5.375277 55 +applic 3 170 1.791759 5.375277 56 +base 2 165 1.791759 3.583518 50 +recent 2 167 1.791759 3.583518 58 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +austin 1 168 1.791759 1.791759 63 +algorithm 1 162 1.791759 1.791759 57 +perform 5 143 1.945910 9.729550 74 +object 4 138 1.945910 7.783640 79 +area 3 144 1.945910 5.837730 80 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +hall 1 146 1.945910 1.945910 65 +model 1 145 1.945910 1.945910 69 +databas 7 122 2.079442 14.556094 86 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +manag 5 114 2.197225 10.986125 125 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +techniqu 1 99 2.302585 2.302585 138 +user 1 104 2.302585 2.302585 137 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +memori 1 101 2.302585 2.302585 139 +center 2 88 2.397895 4.795790 158 +call 2 91 2.397895 4.795790 153 +octob 2 89 2.397895 4.795790 156 +real 2 93 2.397895 4.795790 144 +build 2 85 2.484907 4.969814 184 +west 1 83 2.484907 2.484907 192 +member 1 84 2.484907 2.484907 165 +environ 1 84 2.484907 2.484907 177 +academ 1 82 2.484907 2.484907 178 +ieee 1 86 2.484907 2.484907 190 +larg 1 82 2.484907 2.484907 168 +orient 2 80 2.564949 5.129898 205 +server 2 76 2.564949 5.129898 204 +come 1 78 2.564949 2.564949 202 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +involv 2 71 2.639057 5.278114 227 +workshop 2 71 2.639057 5.278114 239 +appli 1 71 2.639057 2.639057 226 +meet 1 72 2.639057 2.639057 229 +goal 2 66 2.708050 5.416100 250 +multimedia 2 68 2.708050 5.416100 258 +main 1 67 2.708050 2.708050 256 +experi 3 64 2.772589 8.317767 283 +evalu 2 64 2.772589 5.545178 266 +complex 2 64 2.772589 5.545178 269 +septemb 2 65 2.772589 5.545178 274 +street 1 63 2.772589 2.772589 293 +march 3 61 2.833213 8.499639 295 +share 2 59 2.833213 5.666426 304 +best 1 59 2.833213 2.833213 299 +locat 1 59 2.833213 2.833213 303 +content 1 59 2.833213 2.833213 302 +unix 1 58 2.890372 2.890372 308 +browser 1 56 2.890372 2.890372 313 +index 1 56 2.890372 2.890372 309 +publish 1 57 2.890372 2.890372 326 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +move 1 47 3.091042 3.091042 382 +past 1 42 3.218876 3.218876 428 +autom 1 41 3.218876 3.218876 434 +cach 1 41 3.218876 3.218876 432 +transact 1 39 3.258097 3.258097 438 +live 1 40 3.258097 3.258097 451 +multipl 1 39 3.258097 3.258097 453 +join 1 39 3.258097 3.258097 457 +industri 1 38 3.295837 3.295837 464 +staff 1 36 3.367296 3.367296 490 +multi 1 36 3.367296 3.367296 493 +next 2 34 3.401197 6.802394 517 +queri 3 33 3.433987 10.301961 524 +extend 2 32 3.465736 6.931472 539 +storag 1 31 3.496508 3.496508 553 +specifi 1 30 3.555348 3.555348 568 +focus 1 29 3.583519 3.583519 584 +becom 1 28 3.610918 3.610918 603 +measur 1 28 3.610918 3.610918 609 +manipul 1 27 3.637586 3.637586 624 +though 1 27 3.637586 3.637586 622 +proc 11 26 3.688879 40.577669 649 +effort 3 26 3.688879 11.066637 652 +altern 2 26 3.688879 7.377758 641 +challeng 1 26 3.688879 3.688879 653 +spent 2 25 3.737670 7.475340 676 +toward 2 25 3.737670 7.475340 668 +primari 1 25 3.737670 3.737670 669 +accur 1 25 3.737670 3.737670 680 +client 1 25 3.737670 3.737670 679 +mike 1 24 3.761200 3.761200 703 +scalabl 1 24 3.761200 3.761200 705 +known 1 24 3.761200 3.761200 702 +initi 1 23 3.806662 3.806662 717 +william 2 22 3.850148 7.700296 765 +varieti 1 22 3.850148 3.850148 740 +leav 1 21 3.912023 3.912023 772 +fine 1 20 3.951244 3.951244 822 +sigmod 2 19 4.007333 8.014666 877 +benchmark 1 19 4.007333 4.007333 859 +thoma 2 18 4.060443 8.120886 901 +statu 1 18 4.060443 4.060443 885 +repositori 3 17 4.110874 12.332622 932 +white 1 17 4.110874 4.110874 951 +jose 1 16 4.174387 4.174387 976 +upon 1 16 4.174387 4.174387 978 +taiwan 1 16 4.174387 4.174387 1006 +brown 1 16 4.174387 4.174387 977 +cambridg 1 16 4.174387 4.174387 1008 +livni 2 15 4.248495 8.496990 1053 +hybrid 1 15 4.248495 4.248495 1057 +heterogen 3 14 4.317488 12.952464 1090 +conf 8 13 4.382027 35.056216 1181 +dbm 1 13 4.382027 4.382027 1136 +forth 1 13 4.382027 4.382027 1186 +signific 1 13 4.382027 4.382027 1125 +front 1 13 4.382027 4.382027 1154 +sigmetr 1 13 4.382027 4.382027 1173 +workload 2 12 4.465908 8.931816 1210 +dewitt 2 12 4.465908 8.931816 1270 +tune 1 12 4.465908 4.465908 1227 +franc 1 12 4.465908 4.465908 1276 +shore 3 11 4.553877 13.661631 1377 +persist 3 11 4.553877 13.661631 1367 +road 1 11 4.553877 4.553877 1374 +franklin 3 10 4.653960 13.961880 1436 +naughton 2 10 4.653960 9.307920 1450 +resid 1 10 4.653960 4.653960 1461 +grain 1 10 4.653960 4.653960 1448 +rel 1 9 4.753590 4.753590 1487 +vernon 1 9 4.753590 4.753590 1556 +lock 1 9 4.753590 4.753590 1551 +morgan 1 9 4.753590 4.753590 1484 +carei 2 8 4.875197 9.750394 1781 +replac 1 8 4.875197 4.875197 1668 +ride 1 8 4.875197 4.875197 1741 +solomon 1 8 4.875197 4.875197 1716 +hash 1 8 4.875197 4.875197 1618 +portland 1 7 5.010635 5.010635 1878 +eduresearch 1 6 5.164786 5.164786 2205 +whichi 1 6 5.164786 5.164786 2056 +academia 1 6 5.164786 5.164786 2036 +tobe 1 6 5.164786 5.164786 1995 +oopsla 1 6 5.164786 5.164786 2221 +srinivasan 1 6 5.164786 5.164786 2175 +patel 1 6 5.164786 5.164786 2154 +almaden 4 5 5.347108 21.388432 2511 +minneapoli 3 5 5.347108 16.041324 2480 +fraction 1 5 5.347108 5.347108 2259 +ifip 1 5 5.347108 5.347108 2459 +tsatalo 1 5 5.347108 5.347108 2581 +england 1 5 5.347108 5.347108 2557 +kaufmann 1 5 5.347108 5.347108 2254 +harri 1 4 5.568345 5.568345 3034 +exodu 1 4 5.568345 5.568345 3075 +sabbat 1 4 5.568345 5.568345 2824 +taipei 1 4 5.568345 5.568345 2926 +chile 1 4 5.568345 5.568345 3082 +mcauliff 1 4 5.568345 5.568345 3083 +zwill 1 4 5.568345 5.568345 3076 +nashvil 1 4 5.568345 5.568345 2867 +andp 1 4 5.568345 5.568345 2811 +twelv 2 3 5.857933 11.715866 3899 +codi 2 3 5.857933 11.715866 3940 +schwarz 2 3 5.857933 11.715866 3986 +andm 2 3 5.857933 11.715866 3901 +tradeoff 1 3 5.857933 5.857933 3387 +forobject 1 3 5.857933 5.857933 3965 +oodb 1 3 5.857933 5.857933 3954 +switzerland 1 3 5.857933 5.857933 3551 +santiago 1 3 5.857933 5.857933 4013 +schuh 1 3 5.857933 5.857933 4014 +pang 1 3 5.857933 5.857933 3509 +revisit 1 3 5.857933 5.857933 3915 +haa 2 2 6.263398 12.526796 6115 +niblack 2 2 6.263398 12.526796 4364 +oodbm 2 2 6.263398 12.526796 6083 +interestsdatabas 1 2 6.263398 6.263398 6116 +theexodu 1 2 6.263398 6.263398 6076 +aimedat 1 2 6.263398 6.263398 6117 +researchgroup 1 2 6.263398 6.263398 5588 +homogen 1 2 6.263398 6.263398 4821 +roth 1 2 6.263398 6.263398 6089 +lausann 1 2 6.263398 6.263398 4955 +and 1 2 6.263398 6.263398 5241 +dataengin 1 2 6.263398 6.263398 6118 +zaharioudaki 1 2 6.263398 6.263398 6119 +modelingof 1 2 6.263398 6.263398 5734 +indistribut 1 2 6.263398 6.263398 4257 +garlic 4 1 6.957497 27.829988 17863 +arya 2 1 6.957497 13.914994 17864 +fagin 2 1 6.957497 13.914994 17865 +flickner 2 1 6.957497 13.914994 17866 +petkov 2 1 6.957497 13.914994 17867 +wimmer 2 1 6.957497 13.914994 17868 +careymichael 1 1 6.957497 6.957497 17869 +careyprofessor 1 1 6.957497 6.957497 17870 +performanceand 1 1 6.957497 6.957497 17871 +topicsof 1 1 6.957497 6.957497 17872 +algorithmsrel 1 1 6.957497 6.957497 17873 +userdatabas 1 1 6.957497 6.957497 17874 +persistentobject 1 1 6.957497 6.957497 17875 +objectmanag 1 1 6.957497 6.957497 17876 +applicationssuch 1 1 6.957497 6.957497 17877 +greatyear 1 1 6.957497 6.957497 17878 +tackl 1 1 6.957497 6.957497 17879 +anddiffer 1 1 6.957497 6.957497 17880 +thesourc 1 1 6.957497 6.957497 17881 +projectther 1 1 6.957497 6.957497 17882 +multimediainform 1 1 6.957497 6.957497 17883 +objectdatabas 1 1 6.957497 6.957497 17884 +continuedto 1 1 6.957497 6.957497 17885 +aqueri 1 1 6.957497 6.957497 17886 +pesto 1 1 6.957497 6.957497 17887 +thegarl 1 1 6.957497 6.957497 17888 +kiernan 1 1 6.957497 6.957497 17889 +orientedprogram 1 1 6.957497 6.957497 17890 +tork 1 1 6.957497 6.957497 17891 +visualdatabas 1 1 6.957497 6.957497 17892 +garlicapproach 1 1 6.957497 6.957497 17893 +luniewski 1 1 6.957497 6.957497 17894 +withd 1 1 6.957497 6.957497 17895 +kant 1 1 6.957497 6.957497 17896 +onobject 1 1 6.957497 6.957497 17897 +mehta 1 1 6.957497 6.957497 17898 +thint 1 1 6.957497 6.957497 17899 +smrc 1 1 6.957497 6.957497 17900 +withb 1 1 6.957497 6.957497 17901 +reinwald 1 1 6.957497 6.957497 17902 +desslock 1 1 6.957497 6.957497 17903 +lehman 1 1 6.957497 6.957497 17904 +pirahesh 1 1 6.957497 6.957497 17905 +tarascon 1 1 6.957497 6.957497 17906 +provenc 1 1 6.957497 6.957497 17907 +sigmodint 1 1 6.957497 6.957497 17908 +managementof 1 1 6.957497 6.957497 17909 +multivers 1 1 6.957497 6.957497 17910 +bober 1 1 6.957497 6.957497 17911 +oszu 1 1 6.957497 6.957497 17912 +dayal 1 1 6.957497 6.957497 17913 +valduriez 1 1 6.957497 6.957497 17914 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html new file mode 100644 index 00000000..f519378e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cchin^cchin.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +wisconsin 4 169 1.791759 7.167036 54 +madison 2 165 1.791759 3.583518 55 +data 1 170 1.791759 1.791759 49 +hour 1 165 1.791759 1.791759 46 +assign 1 135 1.945910 1.945910 66 +dayton 1 119 2.079442 2.079442 104 +introduct 1 126 2.079442 2.079442 87 +structur 1 106 2.197225 2.197225 105 +west 1 83 2.484907 2.484907 192 +mondai 1 77 2.564949 2.564949 206 +tuesdai 1 73 2.639057 2.639057 219 +fridai 1 44 3.135494 3.135494 390 +streetmadison 1 38 3.295837 3.295837 474 +edutelephon 1 10 4.653960 4.653960 1473 +chin 3 5 5.347108 16.041324 2408 +tang 1 5 5.347108 5.347108 2409 +bldg 1 4 5.568345 5.568345 2983 +biochemistri 2 3 5.857933 11.715866 3513 +cchin 2 2 6.263398 12.526796 4691 +pagechin 1 1 6.957497 6.957497 17915 +tanggradu 1 1 6.957497 6.957497 17916 +ameduc 1 1 6.957497 6.957497 17917 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html new file mode 100644 index 00000000..aafa6d23 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chandra^chandra.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 3 431 0.693147 2.079441 10 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +book 1 99 2.302585 2.302585 131 +real 1 93 2.397895 2.397895 144 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +internet 1 83 2.484907 2.484907 186 +come 1 78 2.564949 2.564949 202 +summari 1 73 2.639057 2.639057 237 +york 1 41 3.218876 3.218876 435 +movi 1 40 3.258097 3.258097 459 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +soon 2 36 3.367296 6.734592 494 +miscellan 1 23 3.806662 3.806662 731 +log 1 19 4.007333 4.007333 857 +usaphon 1 9 4.753590 4.753590 1600 +assistantdepart 1 8 4.875197 4.875197 1784 +cultur 1 7 5.010635 5.010635 1951 +chandra 3 6 5.164786 15.494358 2091 +altavista 1 6 5.164786 5.164786 2222 +satish 2 4 5.568345 11.136690 2833 +wodehous 1 2 6.263398 6.263398 4990 +italian 1 2 6.263398 6.263398 5993 +nostalgia 1 1 6.957497 6.957497 17918 +linksclick 1 1 6.957497 6.957497 17919 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html new file mode 100644 index 00000000..97c882b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~chilimbi^chilimbi.html @@ -0,0 +1,92 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +program 2 374 0.693147 1.386294 7 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +mail 1 238 1.386294 1.386294 22 +updat 1 191 1.609438 1.609438 41 +parallel 4 169 1.791759 7.167036 60 +wisconsin 3 169 1.791759 5.375277 54 +madison 2 165 1.791759 3.583518 55 +click 2 142 1.945910 3.891820 78 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +process 1 142 1.945910 1.945910 72 +compil 2 122 2.079442 4.158884 96 +tool 2 117 2.079442 4.158884 93 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +mathemat 1 108 2.197225 2.197225 123 +memori 3 101 2.302585 6.907755 139 +real 1 93 2.397895 2.397895 144 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +state 3 76 2.564949 7.694847 207 +messag 1 76 2.564949 2.564949 212 +appear 1 78 2.564949 2.564949 210 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +integr 1 67 2.708050 2.708050 245 +august 1 66 2.708050 2.708050 257 +share 2 59 2.833213 5.666426 304 +automat 1 61 2.833213 2.833213 306 +suggest 1 53 2.944439 2.944439 331 +visual 2 48 3.044522 6.089044 372 +physic 1 47 3.091042 3.091042 377 +electron 1 47 3.091042 3.091042 379 +examin 2 42 3.218876 6.437752 424 +movi 1 40 3.258097 3.258097 459 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 2 35 3.401197 6.802394 507 +tech 1 35 3.401197 3.401197 515 +award 1 34 3.401197 3.401197 523 +curriculum 1 33 3.433987 3.433987 535 +pass 1 28 3.610918 3.610918 611 +enhanc 1 26 3.688879 3.688879 644 +supercomput 1 25 3.737670 3.737670 681 +honor 1 23 3.806662 3.806662 729 +miscellan 1 23 3.806662 3.806662 731 +indian 2 22 3.850148 7.700296 769 +annot 1 21 3.912023 3.912023 775 +wind 1 18 4.060443 4.060443 908 +thoma 1 18 4.060443 4.060443 901 +stephen 1 11 4.553877 4.553877 1342 +laru 3 9 4.753590 14.260770 1560 +ball 1 9 4.753590 4.753590 1608 +assistantdepart 1 8 4.875197 4.875197 1784 +insert 1 8 4.875197 4.875197 1687 +gold 1 8 4.875197 4.875197 1745 +bombai 1 7 5.010635 5.010635 1972 +dream 1 6 5.164786 5.164786 2165 +merit 3 5 5.347108 16.041324 2466 +icpp 1 5 5.347108 5.347108 2382 +chilimbi 6 3 5.857933 35.147598 4015 +trishul 3 3 5.857933 17.573799 4016 +usaadvisor 1 3 5.857933 5.857933 4017 +certif 1 3 5.857933 5.857933 3859 +medal 1 3 5.857933 5.857933 3912 +cico 1 2 6.263398 6.263398 6120 +eick 1 2 6.263398 6.263398 5971 +megradu 1 1 6.957497 6.957497 17920 +designresearch 1 1 6.957497 6.957497 17921 +tunneleduc 1 1 6.957497 6.957497 17922 +publicationscachi 1 1 6.957497 6.957497 17923 +stormwatch 1 1 6.957497 6.957497 17924 +protocolstrishul 1 1 6.957497 6.957497 17925 +olympiadpresid 1 1 6.957497 6.957497 17926 +examinationcertif 1 1 6.957497 6.957497 17927 +chemistrycertif 1 1 6.957497 6.957497 17928 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html new file mode 100644 index 00000000..f90e43e1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~csashi^csashi.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +updat 1 191 1.609438 1.609438 41 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +june 1 79 2.564949 2.564949 214 +septemb 1 65 2.772589 2.772589 274 +mark 1 44 3.135494 3.135494 403 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +hill 1 25 3.737670 3.737670 670 +indian 1 22 3.850148 3.850148 769 +madra 1 8 4.875197 4.875197 1770 +univeristi 1 8 4.875197 4.875197 1754 +studentdepart 1 5 5.347108 5.347108 2505 +usaadvisor 1 3 5.857933 5.857933 4017 +chandrasekaran 2 2 6.263398 12.526796 6121 +sashikanth 2 2 6.263398 12.526796 6122 +btech 1 2 6.263398 6.263398 6123 +csashi 1 1 6.957497 6.957497 17929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html new file mode 100644 index 00000000..c3147eff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~curt^curt.html @@ -0,0 +1,107 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +wisc 2 242 1.386294 2.772588 33 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +develop 1 174 1.791759 1.791759 53 +network 1 168 1.791759 1.791759 61 +applic 1 170 1.791759 1.791759 56 +implement 1 152 1.791759 1.791759 52 +relat 3 139 1.945910 5.837730 68 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +perform 1 143 1.945910 1.945910 74 +databas 5 122 2.079442 10.397210 86 +site 4 106 2.197225 8.788900 119 +intern 2 108 2.197225 4.394450 128 +manag 1 114 2.197225 2.197225 125 +world 1 115 2.197225 2.197225 126 +text 1 98 2.302585 2.302585 133 +search 1 95 2.397895 2.397895 155 +center 1 88 2.397895 2.397895 158 +librari 1 87 2.484907 2.484907 181 +internet 1 83 2.484907 2.484907 186 +free 1 73 2.639057 2.639057 224 +onlin 1 75 2.639057 2.639057 223 +nation 1 74 2.639057 2.639057 240 +java 1 70 2.708050 2.708050 248 +previou 1 62 2.772589 2.772589 290 +organ 1 65 2.772589 2.772589 265 +locat 1 59 2.833213 2.833213 303 +index 1 56 2.890372 2.890372 309 +standard 2 48 3.044522 6.089044 365 +life 1 50 3.044522 3.044522 375 +approach 1 48 3.044522 3.044522 366 +transact 1 39 3.258097 3.258097 438 +map 1 39 3.258097 3.258097 452 +prototyp 1 38 3.295837 3.295837 463 +microsoft 1 38 3.295837 3.295837 468 +open 1 38 3.295837 3.295837 469 +sciencesunivers 1 37 3.332205 3.332205 486 +global 1 34 3.401197 3.401197 520 +survei 1 35 3.401197 3.401197 513 +posit 1 31 3.496508 3.496508 552 +focu 1 30 3.555348 3.555348 571 +weather 1 28 3.610918 3.610918 618 +item 2 19 4.007333 8.014666 856 +white 1 17 4.110874 4.110874 951 +dilbert 1 16 4.174387 4.174387 996 +stock 1 16 4.174387 4.174387 1007 +track 1 15 4.248495 4.248495 1029 +doit 1 14 4.317488 4.317488 1111 +draft 1 14 4.317488 4.317488 1085 +land 1 12 4.465908 4.465908 1273 +shore 1 11 4.553877 4.553877 1377 +council 1 11 4.553877 4.553877 1364 +appl 1 11 4.553877 4.553877 1303 +market 1 11 4.553877 4.553877 1361 +consortium 1 10 4.653960 4.653960 1467 +govern 1 9 4.753590 4.753590 1581 +paradis 4 8 4.875197 19.500788 1782 +gopher 1 6 5.164786 5.164786 1982 +feder 1 5 5.347108 5.347108 2266 +wiscinfo 3 3 5.857933 17.573799 3106 +dienst 2 3 5.857933 11.715866 3640 +commerc 1 3 5.857933 5.857933 3209 +eosdi 2 2 6.263398 12.526796 6124 +nebraska 1 2 6.263398 6.263398 5574 +lincoln 1 2 6.263398 6.263398 5575 +metrowerk 1 2 6.263398 6.263398 4131 +worm 1 2 6.263398 6.263398 5775 +projectdepart 1 2 6.263398 6.263398 6125 +curt 3 1 6.957497 20.872491 17930 +ellmann 2 1 6.957497 13.914994 17931 +webgnat 1 1 6.957497 6.957497 17932 +defect 1 1 6.957497 6.957497 17933 +opengi 1 1 6.957497 6.957497 17934 +calmit 1 1 6.957497 6.957497 17935 +illustra 1 1 6.957497 6.957497 17936 +papersmiscellan 1 1 6.957497 6.957497 17937 +sitescampu 1 1 6.957497 6.957497 17938 +wyrm 1 1 6.957497 6.957497 17939 +hoard 1 1 6.957497 6.957497 17940 +wiscnet 1 1 6.957497 6.957497 17941 +netcorpor 1 1 6.957497 6.957497 17942 +paww 1 1 6.957497 6.957497 17943 +taligentsearch 1 1 6.957497 6.957497 17944 +savvi 1 1 6.957497 6.957497 17945 +webcrawl 1 1 6.957497 6.957497 17946 +winsock 1 1 6.957497 6.957497 17947 +geolog 1 1 6.957497 6.957497 17948 +gil 1 1 6.957497 6.957497 17949 +oakridg 1 1 6.957497 6.957497 17950 +datacurt 1 1 6.957497 6.957497 17951 +ellmanncurt 1 1 6.957497 6.957497 17952 +eduparadis 1 1 6.957497 6.957497 17953 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html new file mode 100644 index 00000000..cca65d3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~cychan^cychan.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +yong 2 4 5.568345 11.136690 2809 +chee 2 3 5.857933 11.715866 3480 +cychan 1 2 6.263398 6.263398 4737 +pagechan 1 1 6.957497 6.957497 17954 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html new file mode 100644 index 00000000..eb265d4d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dailey^dailey.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +engin 4 297 1.098612 4.394448 20 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +public 1 202 1.609438 1.609438 43 +modifi 1 178 1.609438 1.609438 35 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +address 1 170 1.791759 1.791759 62 +schedul 1 119 2.079442 2.079442 85 +dayton 1 119 2.079442 2.079442 104 +mathemat 1 108 2.197225 2.197225 123 +educ 1 86 2.484907 2.484907 191 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +colleg 1 61 2.833213 2.833213 300 +friend 1 48 3.044522 3.044522 376 +physic 2 47 3.091042 6.182084 377 +math 1 44 3.135494 3.135494 402 +mechan 1 43 3.178054 3.178054 416 +statist 1 35 3.401197 3.401197 521 +lewi 1 8 4.875197 4.875197 1700 +nuclear 1 5 5.347108 5.347108 2576 +clark 1 4 5.568345 5.568345 2705 +mace 1 2 6.263398 6.263398 4849 +astronaut 1 2 6.263398 6.263398 5748 +sara 3 1 6.957497 20.872491 17955 +bauman 1 1 6.957497 6.957497 17956 +dailei 1 1 6.957497 6.957497 17957 +baumandailei 1 1 6.957497 6.957497 17958 +edugradu 1 1 6.957497 6.957497 17959 +pagessend 1 1 6.957497 6.957497 17960 +daileytu 1 1 6.957497 6.957497 17961 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html new file mode 100644 index 00000000..846e153c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~david^david.html @@ -0,0 +1,314 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 19 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 9 443 0.693147 6.238323 6 +research 6 431 0.693147 4.158882 10 +program 5 374 0.693147 3.465735 7 +interest 1 384 0.693147 0.693147 11 +time 5 293 1.098612 5.493060 17 +project 2 340 1.098612 2.197224 18 +current 2 284 1.098612 2.197224 21 +student 2 343 1.098612 2.197224 19 +us 2 329 1.098612 2.197224 16 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +design 4 213 1.386294 5.545176 25 +wisc 2 242 1.386294 2.772588 33 +graduat 2 215 1.386294 2.772588 31 +softwar 2 220 1.386294 2.772588 30 +also 2 259 1.386294 2.772588 28 +gener 1 220 1.386294 1.386294 27 +includ 2 208 1.609438 3.218876 42 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +parallel 6 169 1.791759 10.750554 60 +wisconsin 5 169 1.791759 8.958795 54 +network 5 168 1.791759 8.958795 61 +implement 4 152 1.791759 7.167036 52 +develop 4 174 1.791759 7.167036 53 +recent 2 167 1.791759 3.583518 58 +distribut 2 162 1.791759 3.583518 51 +data 2 170 1.791759 3.583518 49 +read 2 154 1.791759 3.583518 47 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +architectur 11 139 1.945910 21.405010 77 +perform 7 143 1.945910 13.621370 74 +support 3 132 1.945910 5.837730 83 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +area 1 144 1.945910 1.945910 80 +first 1 140 1.945910 1.945910 71 +tool 4 117 2.079442 8.317768 93 +machin 3 129 2.079442 6.238326 95 +introduct 2 126 2.079442 4.158884 87 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +confer 1 126 2.079442 2.079442 100 +studi 1 120 2.079442 2.079442 91 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +compil 1 122 2.079442 2.079442 96 +intern 5 108 2.197225 10.986125 128 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +version 1 113 2.197225 2.197225 122 +manag 1 114 2.197225 2.197225 125 +memori 18 101 2.302585 41.446530 139 +user 6 104 2.302585 13.815510 137 +techniqu 3 99 2.302585 6.907755 138 +access 3 102 2.302585 6.907755 136 +advanc 2 99 2.302585 4.605170 130 +take 1 97 2.302585 2.302585 134 +proceed 4 93 2.397895 9.591580 152 +associ 1 93 2.397895 2.397895 151 +select 1 91 2.397895 2.397895 154 +octob 1 89 2.397895 2.397895 156 +follow 1 92 2.397895 2.397895 143 +call 1 91 2.397895 2.397895 153 +ieee 5 86 2.484907 12.424535 190 +level 4 87 2.484907 9.939628 180 +control 4 82 2.484907 9.939628 164 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +activ 1 84 2.484907 2.484907 182 +novemb 1 81 2.484907 2.484907 179 +chang 1 82 2.484907 2.484907 163 +librari 1 87 2.484907 2.484907 181 +interfac 5 79 2.564949 12.824745 209 +refer 4 78 2.564949 10.259796 203 +messag 3 76 2.564949 7.694847 212 +state 2 76 2.564949 5.129898 207 +dynam 1 76 2.564949 2.564949 194 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +decemb 1 80 2.564949 2.564949 215 +method 1 80 2.564949 2.564949 213 +david 11 71 2.639057 29.029627 232 +symposium 5 72 2.639057 13.195285 238 +line 1 75 2.639057 2.639057 231 +summari 1 73 2.639057 2.639057 237 +effici 1 73 2.639057 2.639057 233 +write 1 72 2.639057 2.639057 222 +simul 9 66 2.708050 24.372450 255 +integr 2 67 2.708050 5.416100 245 +august 1 66 2.708050 2.708050 257 +main 1 67 2.708050 2.708050 256 +goal 1 66 2.708050 2.708050 250 +evalu 3 64 2.772589 8.317767 266 +abstract 3 62 2.772589 8.317767 276 +new 2 64 2.772589 5.545178 262 +organ 1 65 2.772589 2.772589 265 +virtual 1 62 2.772589 2.772589 285 +result 1 65 2.772589 2.772589 281 +function 1 62 2.772589 2.772589 275 +share 10 59 2.833213 28.332130 304 +juli 2 60 2.833213 5.666426 305 +type 1 61 2.833213 2.833213 296 +major 1 56 2.890372 2.890372 315 +explor 1 58 2.890372 2.890372 324 +special 1 56 2.890372 2.890372 320 +allow 2 53 2.944439 5.888878 333 +processor 2 54 2.944439 5.888878 335 +three 1 54 2.944439 2.944439 330 +found 1 53 2.944439 2.944439 337 +hardwar 5 51 2.995732 14.978660 350 +case 3 51 2.995732 8.987196 351 +run 1 51 2.995732 2.995732 347 +tabl 1 51 2.995732 2.995732 346 +investig 1 51 2.995732 2.995732 353 +frequent 1 49 3.044522 3.044522 367 +california 2 46 3.091042 6.182084 388 +effect 1 46 3.091042 3.091042 385 +understand 1 47 3.091042 3.091042 384 +mark 6 44 3.135494 18.812964 403 +protocol 4 45 3.135494 12.541976 407 +mechan 2 43 3.178054 6.356108 416 +cach 8 41 3.218876 25.751008 432 +fast 4 42 3.218876 12.875504 429 +combin 1 42 3.218876 3.218876 421 +programm 2 39 3.258097 6.516194 445 +transact 1 39 3.258097 3.258097 438 +electr 1 38 3.295837 3.295837 461 +streetmadison 1 38 3.295837 3.295837 474 +brian 1 38 3.295837 3.295837 466 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +origin 1 38 3.295837 3.295837 472 +sciencesunivers 1 37 3.332205 3.332205 486 +workstat 1 37 3.332205 3.332205 479 +cost 1 37 3.332205 3.332205 480 +especi 2 36 3.367296 6.734592 496 +multi 1 36 3.367296 3.367296 493 +procedur 1 36 3.367296 3.367296 488 +jame 6 35 3.401197 20.407182 507 +bibliographi 1 34 3.401197 3.401197 518 +fault 1 32 3.465736 3.465736 547 +exist 2 30 3.555348 7.110696 569 +specifi 2 30 3.555348 7.110696 568 +power 1 30 3.555348 3.555348 573 +robert 1 30 3.555348 3.555348 567 +profil 1 30 3.555348 3.555348 581 +common 1 30 3.555348 3.555348 574 +steve 1 29 3.583519 3.583519 594 +synchron 1 29 3.583519 3.583519 588 +depend 1 29 3.583519 3.583519 583 +platform 1 29 3.583519 3.583519 591 +multiprocessor 3 28 3.610918 10.832754 605 +pass 2 28 3.610918 7.221836 611 +propos 1 28 3.610918 3.610918 602 +manipul 1 27 3.637586 3.637586 624 +berkelei 2 26 3.688879 7.377758 657 +revis 1 26 3.688879 3.688879 640 +altern 1 26 3.688879 3.688879 641 +detect 1 26 3.688879 3.688879 646 +hill 5 25 3.737670 18.688350 670 +supercomput 2 25 3.737670 7.475340 681 +wai 1 25 3.737670 3.737670 662 +trace 1 25 3.737670 3.737670 677 +scalabl 1 24 3.761200 3.761200 705 +self 1 22 3.850148 3.850148 761 +reduc 1 22 3.850148 3.850148 759 +cooper 1 22 3.850148 3.850148 757 +vlsi 1 21 3.912023 3.912023 795 +annot 1 21 3.912023 3.912023 775 +fine 5 20 3.951244 19.756220 822 +department 1 20 3.951244 3.951244 839 +portabl 1 20 3.951244 3.951244 819 +exploit 1 20 3.951244 3.951244 836 +binari 1 20 3.951244 3.951244 823 +benchmark 1 19 4.007333 4.007333 859 +five 1 19 4.007333 4.007333 841 +wind 3 18 4.060443 12.181329 908 +steven 4 17 4.110874 16.443496 953 +interconnect 1 17 4.110874 4.110874 937 +asplo 1 17 4.110874 4.110874 948 +intel 1 16 4.174387 4.174387 1000 +advantag 1 16 4.174387 4.174387 987 +transfer 1 16 4.174387 4.174387 967 +upon 1 16 4.174387 4.174387 978 +overhead 2 15 4.248495 8.496990 1035 +eduphon 1 15 4.248495 4.248495 1060 +hybrid 1 15 4.248495 4.248495 1057 +remot 1 15 4.248495 4.248495 1041 +action 1 15 4.248495 4.248495 1038 +driven 1 15 4.248495 4.248495 1048 +coher 3 14 4.317488 12.952464 1109 +convent 1 14 4.317488 4.317488 1072 +block 3 13 4.382027 13.146081 1183 +tune 1 12 4.465908 4.465908 1227 +wood 15 11 4.553877 68.308155 1355 +isca 3 11 4.553877 13.661631 1354 +transpar 3 11 4.553877 13.661631 1325 +faster 1 11 4.553877 4.553877 1323 +grain 4 10 4.653960 18.615840 1448 +facilit 1 10 4.653960 4.653960 1412 +laru 6 9 4.753590 28.521540 1560 +tunnel 3 9 4.753590 14.260770 1615 +significantli 1 9 4.753590 4.753590 1508 +paradigm 2 8 4.875197 9.750394 1662 +secretari 1 8 4.875197 4.875197 1775 +uniprocessor 1 8 4.875197 4.875197 1696 +spec 1 8 4.875197 4.875197 1640 +burger 1 7 5.010635 5.010635 1889 +roger 1 7 5.010635 5.010635 1892 +hit 1 7 5.010635 5.010635 1965 +tag 1 7 5.010635 5.010635 1821 +instrument 1 7 5.010635 5.010635 1954 +duke 1 6 5.164786 5.164786 2231 +microsystem 1 6 5.164786 5.164786 2160 +ann 1 6 5.164786 5.164786 2065 +feasibl 1 6 5.164786 5.164786 2157 +byte 1 6 5.164786 5.164786 2108 +invok 1 6 5.164786 5.164786 2079 +lebeck 7 5 5.347108 37.429756 2582 +reinhardt 6 5 5.347108 32.082648 2583 +babak 4 5 5.347108 21.388432 2584 +falsafi 4 5 5.347108 21.388432 2585 +tempest 4 5 5.347108 21.388432 2548 +ioanni 2 5 5.347108 10.694216 2553 +rewrit 2 5 5.347108 10.694216 2367 +scienceand 1 5 5.347108 5.347108 2348 +mukherje 1 5 5.347108 5.347108 2586 +accuraci 1 5 5.347108 5.347108 2450 +dougla 1 5 5.347108 5.347108 2471 +toc 1 5 5.347108 5.347108 2562 +computerarchitectur 1 5 5.347108 5.347108 2290 +focuss 1 5 5.347108 5.347108 2271 +hypothet 1 5 5.347108 5.347108 2474 +lookup 1 5 5.347108 5.347108 2399 +sparcstat 1 5 5.347108 5.347108 2406 +alvin 4 4 5.568345 22.273380 3084 +wart 2 4 5.568345 11.136690 2987 +hyder 2 4 5.568345 11.136690 2772 +schoina 2 4 5.568345 11.136690 3085 +engineeringdepart 1 4 5.568345 5.568345 2917 +decoupl 1 4 5.568345 5.568345 2898 +talluri 1 4 5.568345 5.568345 2820 +rapidli 1 4 5.568345 5.568345 2850 +myresearch 1 4 5.568345 5.568345 2842 +fulli 1 4 5.568345 5.568345 2986 +pfile 2 3 5.857933 11.715866 3100 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +programmingc 1 3 5.857933 5.857933 3232 +madhusudhan 1 3 5.857933 5.857933 4021 +bulk 1 3 5.857933 5.857933 4000 +anddavid 5 2 6.263398 31.316990 6126 +architecturec 2 2 6.263398 12.526796 6127 +invalid 2 2 6.263398 12.526796 5476 +callaghan 1 2 6.263398 6.263398 6128 +virtualmemori 1 2 6.263398 6.263398 4305 +anal 1 2 6.263398 6.263398 4834 +null 1 2 6.263398 6.263398 4714 +typhoon 4 1 6.957497 27.829988 17962 +usadavid 1 1 6.957497 6.957497 17963 +toonenrec 1 1 6.957497 6.957497 17964 +rahmat 1 1 6.957497 6.957497 17965 +alvi 1 1 6.957497 6.957497 17966 +informix 1 1 6.957497 6.957497 17967 +memorysteven 1 1 6.957497 6.957497 17968 +communicationshubhendu 1 1 6.957497 6.957497 17969 +costrahmat 1 1 6.957497 6.957497 17970 +multiprocessorsalvin 1 1 6.957497 6.957497 17971 +simulationalvin 1 1 6.957497 6.957497 17972 +sigmetricsmai 1 1 6.957497 6.957497 17973 +thrust 1 1 6.957497 6.957497 17974 +hybridprogram 1 1 6.957497 6.957497 17975 +similaritesof 1 1 6.957497 6.957497 17976 +calledtempest 1 1 6.957497 6.957497 17977 +handler 1 1 6.957497 6.957497 17978 +suppliedmechan 1 1 6.957497 6.957497 17979 +tempestmechan 1 1 6.957497 6.957497 17980 +novelmechan 1 1 6.957497 6.957497 17981 +tagblock 1 1 6.957497 6.957497 17982 +theloc 1 1 6.957497 6.957497 17983 +hardwareplatform 1 1 6.957497 6.957497 17984 +revers 1 1 6.957497 6.957497 17985 +translationt 1 1 6.957497 6.957497 17986 +rtlb 1 1 6.957497 6.957497 17987 +grainaccess 1 1 6.957497 6.957497 17988 +thata 1 1 6.957497 6.957497 17989 +performscompar 1 1 6.957497 6.957497 17990 +memoryprogram 1 1 6.957497 6.957497 17991 +thatoptim 1 1 6.957497 6.957497 17992 +reducingsimul 1 1 6.957497 6.957497 17993 +tightli 1 1 6.957497 6.957497 17994 +byprovid 1 1 6.957497 6.957497 17995 +referenceinvok 1 1 6.957497 6.957497 17996 +andmemori 1 1 6.957497 6.957497 17997 +processedbi 1 1 6.957497 6.957497 17998 +functionfor 1 1 6.957497 6.957497 17999 +usingbinari 1 1 6.957497 6.957497 18000 +memoryrefer 1 1 6.957497 6.957497 18001 +tothre 1 1 6.957497 6.957497 18002 +thatcal 1 1 6.957497 6.957497 18003 +onlythre 1 1 6.957497 6.957497 18004 +slower 1 1 6.957497 6.957497 18005 +techniquesto 1 1 6.957497 6.957497 18006 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html new file mode 100644 index 00000000..c5d05f33 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dburger^dburger.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +tool 1 117 2.079442 2.079442 93 +stuff 1 87 2.484907 2.484907 171 +school 1 84 2.484907 2.484907 188 +maintain 1 51 2.995732 2.995732 342 +grad 1 20 3.951244 3.951244 837 +wind 1 18 4.060443 4.060443 908 +doug 2 9 4.753590 9.507180 1517 +burger 2 7 5.010635 10.021270 1889 +galileo 1 4 5.568345 5.568345 3086 +damn 1 2 6.263398 6.263398 6129 +pageprofession 1 1 6.957497 6.957497 18007 +summaryresum 1 1 6.957497 6.957497 18008 +cvtranscriptcours 1 1 6.957497 6.957497 18009 +projectsadvisoraffili 1 1 6.957497 6.957497 18010 +sciwisconsin 1 1 6.957497 6.957497 18011 +tunnelpag 1 1 6.957497 6.957497 18012 +architectureuw 1 1 6.957497 6.957497 18013 +architecturesimplescalar 1 1 6.957497 6.957497 18014 +setgenericasacmperson 1 1 6.957497 6.957497 18015 +meus 1 1 6.957497 6.957497 18016 +linksphoto 1 1 6.957497 6.957497 18017 +galleryrid 1 1 6.957497 6.957497 18018 +demonhunt 1 1 6.957497 6.957497 18019 +catsbewar 1 1 6.957497 6.957497 18020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html new file mode 100644 index 00000000..d3ddde94 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~deboor^deboor.html @@ -0,0 +1,173 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +inform 6 412 0.693147 4.158882 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +last 1 314 1.098612 1.098612 14 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +also 2 259 1.386294 2.772588 28 +link 1 247 1.386294 1.386294 24 +fall 2 181 1.609438 3.218876 40 +list 2 201 1.609438 3.218876 39 +includ 1 208 1.609438 1.609438 42 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +recent 2 167 1.791759 3.583518 58 +read 2 154 1.791759 3.583518 47 +hour 1 165 1.791759 1.791759 46 +algorithm 1 162 1.791759 1.791759 57 +address 1 170 1.791759 1.791759 62 +avail 1 169 1.791759 1.791759 48 +click 4 142 1.945910 7.783640 78 +file 3 132 1.945910 5.837730 70 +professor 1 137 1.945910 1.945910 76 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +provid 1 121 2.079442 2.079442 94 +tool 1 117 2.079442 2.079442 93 +introduct 1 126 2.079442 2.079442 87 +check 3 115 2.197225 6.591675 118 +look 2 107 2.197225 4.394450 115 +theori 2 111 2.197225 4.394450 127 +version 2 113 2.197225 4.394450 122 +find 2 111 2.197225 4.394450 111 +teach 1 108 2.197225 2.197225 112 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +mathemat 1 108 2.197225 2.197225 123 +access 1 102 2.302585 2.302585 136 +book 1 99 2.302585 2.302585 131 +peopl 1 96 2.302585 2.302585 132 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +mani 1 92 2.397895 2.397895 150 +search 1 95 2.397895 2.397895 155 +homepag 1 93 2.397895 2.397895 148 +center 1 88 2.397895 2.397895 158 +pictur 1 89 2.397895 2.397895 160 +journal 4 83 2.484907 9.939628 183 +chang 1 82 2.484907 2.484907 163 +west 1 83 2.484907 2.484907 192 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +contain 1 81 2.484907 2.484907 174 +html 1 75 2.639057 2.639057 235 +david 1 71 2.639057 2.639057 232 +practic 1 70 2.708050 2.708050 246 +written 1 63 2.772589 2.772589 278 +guid 1 63 2.772589 2.772589 267 +organ 1 65 2.772589 2.772589 265 +content 1 59 2.833213 2.833213 302 +variou 3 56 2.890372 8.671116 317 +publish 3 57 2.890372 8.671116 326 +much 1 52 2.995732 2.995732 349 +numer 2 49 3.044522 6.089044 369 +telephon 1 50 3.044522 3.044522 373 +approach 1 48 3.044522 3.044522 366 +even 1 45 3.135494 3.135494 393 +better 1 45 3.135494 3.135494 401 +math 1 44 3.135494 3.135494 402 +third 1 43 3.178054 3.178054 412 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +small 1 39 3.258097 3.258097 447 +author 1 39 3.258097 3.258097 450 +tutori 1 39 3.258097 3.258097 437 +paul 3 38 3.295837 9.887511 471 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +approxim 5 35 3.401197 17.005985 509 +print 1 34 3.401197 3.401197 503 +bibliographi 1 34 3.401197 3.401197 518 +next 1 34 3.401197 3.401197 517 +articl 1 33 3.433987 3.433987 530 +postal 1 30 3.555348 3.555348 580 +great 1 27 3.637586 3.637586 626 +thank 2 23 3.806662 7.613324 721 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +latest 1 21 3.912023 3.912023 785 +hous 1 21 3.912023 3.912023 801 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +former 1 17 4.110874 4.110874 956 +seek 1 17 4.110874 4.110874 954 +carl 3 15 4.248495 12.745485 1024 +anonym 1 14 4.317488 4.317488 1100 +individu 1 13 4.382027 4.382027 1126 +errata 2 10 4.653960 9.307920 1403 +town 1 10 4.653960 4.653960 1458 +death 1 10 4.653960 4.653960 1457 +subset 1 10 4.653960 4.653960 1425 +latter 1 9 4.753590 4.753590 1522 +screen 1 9 4.753590 4.753590 1577 +unusu 1 9 4.753590 4.753590 1566 +end 1 9 4.753590 4.753590 1567 +driver 1 8 4.875197 4.875197 1657 +elementari 1 7 5.010635 5.010635 1825 +spline 2 6 5.164786 10.329572 2007 +usaoffic 1 6 5.164786 5.164786 2159 +button 1 5 5.347108 5.347108 2337 +door 1 5 5.347108 5.347108 2291 +areavail 1 4 5.568345 5.568345 2810 +allan 1 4 5.568345 5.568345 2849 +technion 1 4 5.568345 5.568345 2856 +boor 4 3 5.857933 23.431732 3482 +cont 1 3 5.857933 5.857933 3171 +shall 1 3 5.857933 5.857933 3891 +clickabl 2 2 6.263398 12.526796 4788 +deboor 1 2 6.263398 6.263398 4744 +thevari 1 2 6.263398 6.263398 6130 +forconstruct 1 2 6.263398 6.263398 5649 +amo 1 2 6.263398 6.263398 6094 +joi 1 2 6.263398 6.263398 5208 +hermit 1 2 6.263398 6.263398 4150 +soup 1 2 6.263398 6.263398 6131 +kitchen 1 2 6.263398 6.263398 6132 +occupi 1 2 6.263398 6.263398 5857 +ditto 2 1 6.957497 13.914994 18021 +nevai 2 1 6.957497 13.914994 18022 +pinku 2 1 6.957497 13.914994 18023 +mathematicsdepart 1 1 6.957497 6.957497 18024 +schoenberg 1 1 6.957497 6.957497 18025 +approx 1 1 6.957497 6.957497 18026 +theclick 1 1 6.957497 6.957497 18027 +ofapproxim 1 1 6.957497 6.957497 18028 +publishedpap 1 1 6.957497 6.957497 18029 +andmuch 1 1 6.957497 6.957497 18030 +foreast 1 1 6.957497 6.957497 18031 +theirtabl 1 1 6.957497 6.957497 18032 +singli 1 1 6.957497 6.957497 18033 +thishandi 1 1 6.957497 6.957497 18034 +alsoapproxim 1 1 6.957497 6.957497 18035 +slist 1 1 6.957497 6.957497 18036 +ila 1 1 6.957497 6.957497 18037 +seeviva_vi 1 1 6.957497 6.957497 18038 +alsoon 1 1 6.957497 6.957497 18039 +thehtml 1 1 6.957497 6.957497 18040 +primermight 1 1 6.957497 6.957497 18041 +ever_chang 1 1 6.957497 6.957497 18042 +griffeath 1 1 6.957497 6.957497 18043 +sprimordi 1 1 6.957497 6.957497 18044 +seeodd 1 1 6.957497 6.957497 18045 +techunix 1 1 6.957497 6.957497 18046 +nevaiif 1 1 6.957497 6.957497 18047 +makehi 1 1 6.957497 6.957497 18048 +outputavail 1 1 6.957497 6.957497 18049 +taki 1 1 6.957497 6.957497 18050 +souganid 1 1 6.957497 6.957497 18051 +andthaleia 1 1 6.957497 6.957497 18052 +zariphopoul 1 1 6.957497 6.957497 18053 +szego 1 1 6.957497 6.957497 18054 +bust 1 1 6.957497 6.957497 18055 +inscript 1 1 6.957497 6.957497 18056 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~devise^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~devise^ new file mode 100644 index 00000000..9fefd857 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~devise^ @@ -0,0 +1,176 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +time 2 293 1.098612 2.197224 17 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +link 3 247 1.386294 4.158882 24 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +group 1 183 1.609438 1.609438 36 +data 16 170 1.791759 28.668144 49 +base 2 165 1.791759 3.583518 50 +support 2 132 1.945910 3.891820 83 +construct 1 139 1.945910 1.945910 82 +file 1 132 1.945910 1.945910 70 +model 1 145 1.945910 1.945910 69 +relat 1 139 1.945910 1.945910 68 +architectur 1 139 1.945910 1.945910 77 +click 1 142 1.945910 1.945910 78 +confer 3 126 2.079442 6.238326 100 +analysi 2 124 2.079442 4.158884 98 +introduct 1 126 2.079442 2.079442 87 +intern 1 108 2.197225 2.197225 128 +version 1 113 2.197225 2.197225 122 +send 1 114 2.197225 2.197225 109 +user 2 104 2.302585 4.605170 137 +need 2 98 2.302585 4.605170 135 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +graphic 2 90 2.397895 4.795790 147 +proceed 2 93 2.397895 4.795790 152 +follow 1 92 2.397895 2.397895 143 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +mani 1 92 2.397895 2.397895 150 +sinc 1 90 2.397895 2.397895 159 +octob 1 89 2.397895 2.397895 156 +environ 3 84 2.484907 7.454721 177 +help 2 83 2.484907 4.969814 175 +level 1 87 2.484907 2.484907 180 +larg 1 82 2.484907 2.484907 168 +requir 1 81 2.484907 2.484907 167 +librari 1 87 2.484907 2.484907 181 +interfac 1 79 2.564949 2.564949 209 +exampl 1 77 2.564949 2.564949 195 +decemb 1 80 2.564949 2.564949 215 +dynam 1 76 2.564949 2.564949 194 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +window 1 68 2.708050 2.708050 242 +differ 1 66 2.708050 2.708050 253 +descript 1 64 2.772589 2.772589 271 +januari 1 62 2.772589 2.772589 264 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +explor 3 58 2.890372 8.671116 324 +direct 1 57 2.890372 2.890372 316 +detail 1 57 2.890372 2.890372 321 +februari 1 54 2.944439 2.944439 328 +date 1 51 2.995732 2.995732 344 +visual 7 48 3.044522 21.311654 372 +cool 1 49 3.044522 3.044522 374 +featur 1 46 3.091042 3.091042 386 +execut 1 45 3.135494 3.135494 404 +mechan 1 43 3.178054 3.178054 416 +map 1 39 3.258097 3.258097 452 +tree 1 36 3.367296 3.367296 492 +download 1 36 3.367296 3.367296 489 +michael 2 35 3.401197 6.802394 514 +next 1 34 3.401197 3.401197 517 +queri 4 33 3.433987 13.735948 524 +within 1 33 3.433987 3.433987 525 +platform 1 29 3.583519 3.583519 591 +releas 2 28 3.610918 7.221836 616 +repres 1 26 3.688879 3.688879 656 +compar 1 26 3.688879 3.688879 648 +handl 1 24 3.761200 3.761200 685 +input 3 23 3.806662 11.419986 727 +togeth 1 23 3.806662 3.806662 714 +famili 1 23 3.806662 3.806662 735 +sequenc 1 23 3.806662 3.806662 734 +variabl 1 23 3.806662 3.806662 715 +color 1 22 3.850148 3.850148 762 +flexibl 1 21 3.912023 3.912023 792 +viewer 1 21 3.912023 3.912023 787 +output 1 21 3.912023 3.912023 788 +chen 1 21 3.912023 3.912023 791 +comparison 1 19 4.007333 4.007333 863 +record 4 18 4.060443 16.241772 890 +appropri 1 18 4.060443 4.060443 883 +ramakrishnan 4 16 4.174387 16.697548 972 +livni 4 15 4.248495 16.993980 1053 +ascii 1 15 4.248495 4.248495 1032 +biologi 1 15 4.248495 4.248495 1049 +stream 1 15 4.248495 4.248495 1015 +miron 2 14 4.317488 8.634976 1110 +save 1 14 4.317488 4.317488 1099 +individu 1 13 4.382027 4.382027 1126 +raghu 4 12 4.465908 17.863632 1212 +shape 1 12 4.465908 4.465908 1245 +solari 1 12 4.465908 4.465908 1238 +distinguish 1 11 4.553877 4.553877 1357 +abil 1 11 4.553877 4.553877 1341 +string 1 11 4.553877 4.553877 1340 +devis 9 10 4.653960 41.885640 1451 +cheng 2 10 4.653960 9.307920 1381 +relationship 1 10 4.653960 4.653960 1383 +float 1 9 4.753590 4.753590 1504 +integ 1 8 4.875197 4.875197 1688 +inproceed 1 8 4.875197 4.875197 1670 +larger 1 7 5.010635 5.010635 1875 +dataset 1 7 5.010635 5.010635 1914 +spie 2 6 5.164786 10.329572 2119 +layout 1 6 5.164786 5.164786 2183 +quick 1 6 5.164786 5.164786 2184 +medicin 2 5 5.347108 10.694216 2448 +cell 1 5 5.347108 5.347108 2274 +complementari 1 5 5.347108 5.347108 2523 +kent 1 4 5.568345 5.568345 2744 +myllymaki 2 3 5.857933 11.715866 4022 +asid 1 3 5.857933 5.857933 3770 +comad 1 3 5.857933 5.857933 3737 +wenger 1 3 5.857933 5.857933 4023 +jussi 2 2 6.263398 12.526796 6133 +hotlin 2 2 6.263398 12.526796 5967 +oneset 1 2 6.263398 6.263398 6134 +viewsof 1 2 6.263398 6.263398 6135 +birch 1 2 6.263398 6.263398 6136 +andanalysi 1 2 6.263398 6.263398 4271 +workth 1 2 6.263398 6.263398 6137 +guangshun 1 2 6.263398 6.263398 6138 +pagedevis 1 1 6.957497 6.957497 18057 +visualizationt 1 1 6.957497 6.957497 18058 +featuresexamplesin 1 1 6.957497 6.957497 18059 +depthpublicationsrel 1 1 6.957497 6.957497 18060 +workreleasecontactsfeaturesthes 1 1 6.957497 6.957497 18061 +cancontrol 1 1 6.957497 6.957497 18062 +ax 1 1 6.957497 6.957497 18063 +cursor 1 1 6.957497 6.957497 18064 +examplescheck 1 1 6.957497 6.957497 18065 +validationmolecular 1 1 6.957497 6.957497 18066 +soil 1 1 6.957497 6.957497 18067 +clusteringfinanci 1 1 6.957497 6.957497 18068 +explorationfamili 1 1 6.957497 6.957497 18069 +climatedata 1 1 6.957497 6.957497 18070 +centergeograph 1 1 6.957497 6.957497 18071 +systemsoil 1 1 6.957497 6.957497 18072 +sciencefil 1 1 6.957497 6.957497 18073 +serverprogram 1 1 6.957497 6.957497 18074 +tracesclin 1 1 6.957497 6.957497 18075 +moreexampl 1 1 6.957497 6.957497 18076 +depthfor 1 1 6.957497 6.957497 18077 +visualizationvisu 1 1 6.957497 6.957497 18078 +interfaceperform 1 1 6.957497 6.957497 18079 +issuespublicationsmiron 1 1 6.957497 6.957497 18080 +dataexplor 1 1 6.957497 6.957497 18081 +praveenseshadri 1 1 6.957497 6.957497 18082 +sequencequeri 1 1 6.957497 6.957497 18083 +themanag 1 1 6.957497 6.957497 18084 +seqproject 1 1 6.957497 6.957497 18085 +queryrecord 1 1 6.957497 6.957497 18086 +bevisu 1 1 6.957497 6.957497 18087 +informationw 1 1 6.957497 6.957497 18088 +executablesfor 1 1 6.957497 6.957497 18089 +ld_library_path 1 1 6.957497 6.957497 18090 +rundevis 1 1 6.957497 6.957497 18091 +arestat 1 1 6.957497 6.957497 18092 +shareabl 1 1 6.957497 6.957497 18093 +contactsfor 1 1 6.957497 6.957497 18094 +contactmiron 1 1 6.957497 6.957497 18095 +usersupport 1 1 6.957497 6.957497 18096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html new file mode 100644 index 00000000..1971be6c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dewitt^dewitt.html @@ -0,0 +1,201 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 9 443 0.693147 6.238323 6 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +project 4 340 1.098612 4.394448 18 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +languag 1 227 1.386294 1.386294 26 +washington 1 236 1.386294 1.386294 32 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +data 9 170 1.791759 16.125831 49 +wisconsin 3 169 1.791759 5.375277 54 +parallel 2 169 1.791759 3.583518 60 +applic 2 170 1.791759 3.583518 56 +develop 2 174 1.791759 3.583518 53 +recent 2 167 1.791759 3.583518 58 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +object 9 138 1.945910 17.513190 79 +file 2 132 1.945910 3.891820 70 +relat 2 139 1.945910 3.891820 68 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +like 1 132 1.945910 1.945910 81 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +hall 1 146 1.945910 1.945910 65 +databas 2 122 2.079442 4.158884 86 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +report 1 131 2.079442 2.079442 92 +manag 2 114 2.197225 4.394450 125 +intern 1 108 2.197225 2.197225 128 +need 2 98 2.302585 4.605170 135 +text 2 98 2.302585 4.605170 133 +part 1 98 2.302585 2.302585 129 +proceed 2 93 2.397895 4.795790 152 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +present 1 91 2.397895 2.397895 145 +wide 3 84 2.484907 7.454721 185 +larg 3 82 2.484907 7.454721 168 +environ 1 84 2.484907 2.484907 177 +orient 2 80 2.564949 5.129898 205 +server 2 76 2.564949 5.129898 204 +interfac 1 79 2.564949 2.564949 209 +david 2 71 2.639057 5.278114 232 +summari 1 73 2.639057 2.639057 237 +name 1 72 2.639057 2.639057 220 +appli 1 71 2.639057 2.639057 226 +main 1 67 2.708050 2.708050 256 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +complex 2 64 2.772589 5.545178 269 +evalu 1 64 2.772589 2.772589 266 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +januari 1 62 2.772589 2.772589 264 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +unix 5 58 2.890372 14.451860 308 +space 1 57 2.890372 2.890372 310 +talk 2 53 2.944439 5.888878 336 +sampl 1 53 2.944439 2.944439 339 +hardwar 3 51 2.995732 8.987196 350 +basic 2 50 3.044522 6.089044 360 +set 2 50 3.044522 6.089044 361 +telephon 1 50 3.044522 3.044522 373 +without 1 50 3.044522 3.044522 370 +anoth 1 45 3.135494 3.135494 408 +execut 1 45 3.135494 3.135494 404 +multipl 1 39 3.258097 3.258097 453 +must 1 40 3.258097 3.258097 442 +error 1 40 3.258097 3.258097 449 +field 2 37 3.332205 6.664410 482 +workstat 1 37 3.332205 3.332205 479 +connect 1 37 3.332205 3.332205 485 +either 1 35 3.401197 3.401197 506 +singl 1 34 3.401197 3.401197 510 +storag 1 31 3.496508 3.496508 553 +exist 1 30 3.555348 3.555348 569 +abl 1 30 3.555348 3.555348 566 +rang 1 30 3.555348 3.555348 565 +platform 1 29 3.583519 3.583519 591 +intend 1 28 3.610918 3.610918 599 +becom 1 28 3.610918 3.610918 603 +scale 1 28 3.610918 3.610918 613 +multiprocessor 1 28 3.610918 3.610918 605 +cluster 1 28 3.610918 3.610918 612 +manipul 1 27 3.637586 3.637586 624 +client 2 25 3.737670 7.475340 679 +wai 1 25 3.737670 3.737670 662 +store 3 24 3.761200 11.283600 693 +fellow 1 24 3.761200 3.761200 701 +serv 1 22 3.850148 3.850148 758 +varieti 1 22 3.850148 3.850148 740 +instead 1 22 3.850148 3.850148 756 +programminglanguag 1 21 3.912023 3.912023 782 +fund 1 21 3.912023 3.912023 805 +busi 1 21 3.912023 3.912023 784 +sigmod 2 19 4.007333 8.014666 877 +excel 1 19 4.007333 4.007333 868 +benchmark 1 19 4.007333 4.007333 859 +repositori 1 17 4.110874 4.110874 932 +expand 1 17 4.110874 4.110874 928 +modif 1 17 4.110874 4.110874 913 +attempt 1 17 4.110874 4.110874 917 +white 1 17 4.110874 4.110874 951 +match 2 16 4.174387 8.348774 965 +intel 1 16 4.174387 4.174387 1000 +transit 1 15 4.248495 4.248495 1046 +capabl 1 15 4.248495 4.248495 1016 +heterogen 1 14 4.317488 4.317488 1090 +signific 1 13 4.382027 4.382027 1125 +dewitt 4 12 4.465908 17.863632 1270 +target 3 12 4.465908 13.397724 1282 +emploi 2 12 4.465908 8.931816 1284 +fromindividu 1 12 4.465908 4.465908 1290 +shore 10 11 4.553877 45.538770 1377 +persist 2 11 4.553877 9.107754 1367 +michigan 1 11 4.553877 4.553877 1368 +arpa 1 11 4.553877 4.553877 1369 +naughton 3 10 4.653960 13.961880 1450 +facilit 1 10 4.653960 4.653960 1412 +franklin 1 10 4.653960 4.653960 1436 +vldb 1 10 4.653960 4.653960 1470 +invit 1 10 4.653960 4.653960 1428 +conferenceon 1 9 4.753590 4.753590 1595 +paradis 5 8 4.875197 24.375985 1782 +carei 2 8 4.875197 9.750394 1781 +solomon 1 8 4.875197 4.875197 1716 +databasesystem 1 8 4.875197 4.875197 1617 +hold 1 8 4.875197 4.875197 1645 +poor 1 8 4.875197 4.875197 1736 +polygon 1 8 4.875197 4.875197 1723 +sparc 1 7 5.010635 5.010635 1860 +geograph 4 6 5.164786 20.659144 2236 +patel 1 6 5.164786 5.164786 2154 +pub 1 6 5.164786 5.164786 2239 +compat 1 5 5.347108 5.347108 2485 +tsatalo 1 5 5.347108 5.347108 2581 +minneapoli 1 5 5.347108 5.347108 2480 +proceedingsof 1 5 5.347108 5.347108 2331 +satellit 1 4 5.568345 5.568345 3077 +exodu 1 4 5.568345 5.568345 3075 +mcauliff 1 4 5.568345 5.568345 3083 +zwill 1 4 5.568345 5.568345 3076 +chile 1 4 5.568345 5.568345 3082 +gamma 2 3 5.857933 11.715866 3219 +orth 1 3 5.857933 5.857933 3685 +paragon 1 3 5.857933 5.857933 3359 +summit 1 3 5.857933 5.857933 3684 +developeda 1 2 6.263398 6.263398 5205 +polylin 1 2 6.263398 6.263398 6079 +projecti 1 2 6.263398 6.263398 5963 +kabra 1 2 6.263398 6.263398 6139 +romn 1 1 6.957497 6.957497 18097 +databasebenchmark 1 1 6.957497 6.957497 18098 +objectiveof 1 1 6.957497 6.957497 18099 +objectsystem 1 1 6.957497 6.957497 18100 +applicationsinclud 1 1 6.957497 6.957497 18101 +capabilitiesof 1 1 6.957497 6.957497 18102 +typedobject 1 1 6.957497 6.957497 18103 +hierarchicalnam 1 1 6.957497 6.957497 18104 +interfaceto 1 1 6.957497 6.957497 18105 +toeas 1 1 6.957497 6.957497 18106 +systemenviron 1 1 6.957497 6.957497 18107 +ccwill 1 1 6.957497 6.957497 18108 +networksto 1 1 6.957497 6.957497 18109 +ajoint 1 1 6.957497 6.957497 18110 +relationaldatabas 1 1 6.957497 6.957497 18111 +thetask 1 1 6.957497 6.957497 18112 +formanag 1 1 6.957497 6.957497 18113 +modelingne 1 1 6.957497 6.957497 18114 +manipulatingmuch 1 1 6.957497 6.957497 18115 +muchbett 1 1 6.957497 6.957497 18116 +differencefrom 1 1 6.957497 6.957497 18117 +parallelismto 1 1 6.957497 6.957497 18118 +assatellit 1 1 6.957497 6.957497 18119 +withm 1 1 6.957497 6.957497 18120 +persistentappl 1 1 6.957497 6.957497 18121 +chuh 1 1 6.957497 6.957497 18122 +santiego 1 1 6.957497 6.957497 18123 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html new file mode 100644 index 00000000..262f8a9d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dyer^dyer.html @@ -0,0 +1,327 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 16 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +work 1 380 0.693147 0.693147 9 +us 6 329 1.098612 6.591672 16 +time 2 293 1.098612 2.197224 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +gener 2 220 1.386294 2.772588 27 +mail 1 238 1.386294 1.386294 22 +graduat 1 215 1.386294 1.386294 31 +public 2 202 1.609438 3.218876 43 +fall 2 181 1.609438 3.218876 40 +includ 1 208 1.609438 1.609438 42 +algorithm 7 162 1.791759 12.542313 57 +base 6 165 1.791759 10.750554 50 +data 5 170 1.791759 8.958795 49 +wisconsin 4 169 1.791759 7.167036 54 +develop 4 174 1.791759 7.167036 53 +recent 4 167 1.791759 7.167036 58 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +object 9 138 1.945910 17.513190 79 +model 8 145 1.945910 15.567280 69 +area 2 144 1.945910 3.891820 80 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +analysi 3 124 2.079442 6.238326 98 +spring 2 131 2.079442 4.158884 88 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +provid 1 121 2.079442 2.079442 94 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +make 1 111 2.197225 2.197225 120 +final 1 116 2.197225 2.197225 108 +structur 1 106 2.197225 2.197225 105 +user 4 104 2.302585 9.210340 137 +need 2 98 2.302585 4.605170 135 +take 1 97 2.302585 2.302585 134 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +imag 11 91 2.397895 26.376845 161 +real 4 93 2.397895 9.591580 144 +center 2 88 2.397895 4.795790 158 +call 1 91 2.397895 2.397895 153 +graphic 1 90 2.397895 2.397895 147 +control 11 82 2.484907 27.333977 164 +activ 3 84 2.484907 7.454721 182 +environ 3 84 2.484907 7.454721 177 +build 2 85 2.484907 4.969814 184 +ieee 2 86 2.484907 4.969814 190 +west 1 83 2.484907 2.484907 192 +start 1 83 2.484907 2.484907 173 +chang 1 82 2.484907 2.484907 163 +appear 5 78 2.564949 12.824745 210 +orient 2 80 2.564949 5.129898 205 +dynam 1 76 2.564949 2.564949 194 +refer 1 78 2.564949 2.564949 203 +complet 1 77 2.564949 2.564949 208 +workshop 3 71 2.639057 7.917171 239 +intellig 2 72 2.639057 5.278114 225 +view 10 70 2.708050 27.080500 254 +order 3 69 2.708050 8.124150 249 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +interact 5 62 2.772589 13.862945 270 +virtual 2 62 2.772589 5.545178 285 +import 2 65 2.772589 5.545178 282 +artifici 2 63 2.772589 5.545178 280 +experi 1 64 2.772589 2.772589 283 +result 1 65 2.772589 2.772589 281 +guid 1 63 2.772589 2.772589 267 +abstract 1 62 2.772589 2.772589 276 +descript 1 64 2.772589 2.772589 271 +simpl 1 60 2.833213 2.833213 298 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +explor 3 58 2.890372 8.671116 324 +point 2 58 2.890372 5.780744 319 +space 1 57 2.890372 2.890372 310 +three 5 54 2.944439 14.722195 330 +scientif 2 53 2.944439 5.888878 341 +finger 1 52 2.995732 2.995732 354 +investig 1 51 2.995732 2.995732 353 +visual 8 48 3.044522 24.356176 372 +without 2 50 3.044522 6.089044 370 +basic 1 50 3.044522 3.044522 360 +approach 1 48 3.044522 3.044522 366 +move 3 47 3.091042 9.273126 382 +understand 2 47 3.091042 6.182084 384 +could 1 46 3.091042 3.091042 383 +adapt 1 46 3.091042 3.091042 387 +possibl 1 47 3.091042 3.091042 378 +physic 1 47 3.091042 3.091042 377 +mark 2 44 3.135494 6.270988 403 +around 1 43 3.178054 3.178054 415 +vision 17 41 3.218876 54.720892 430 +combin 2 42 3.218876 6.437752 421 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +autom 1 41 3.218876 3.218876 434 +continu 2 39 3.258097 6.516194 448 +map 1 39 3.258097 3.258097 452 +societi 1 40 3.258097 3.258097 456 +paul 2 38 3.295837 6.591674 471 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +close 1 38 3.295837 3.295837 465 +correct 1 38 3.295837 3.295837 462 +brian 1 38 3.295837 3.295837 466 +purpos 4 37 3.332205 13.328820 481 +sciencesunivers 1 37 3.332205 3.332205 486 +connect 1 37 3.332205 3.332205 485 +procedur 1 36 3.367296 3.367296 488 +especi 1 36 3.367296 3.367296 496 +robot 1 36 3.367296 3.367296 497 +global 5 34 3.401197 17.005985 520 +represent 3 35 3.401197 10.203591 512 +either 1 35 3.401197 3.401197 506 +curriculum 1 33 3.433987 3.433987 535 +taught 1 33 3.433987 3.433987 526 +rang 1 30 3.555348 3.555348 565 +power 1 30 3.555348 3.555348 573 +chair 1 29 3.583519 3.583519 596 +steve 1 29 3.583519 3.583519 594 +progress 1 28 3.610918 3.610918 598 +measur 1 28 3.610918 3.610918 609 +proc 10 26 3.688879 36.888790 649 +detect 3 26 3.688879 11.066637 646 +bookmark 1 26 3.688879 3.688879 639 +toward 2 25 3.737670 7.475340 668 +task 1 25 3.737670 3.737670 678 +accur 1 25 3.737670 3.737670 680 +trace 1 25 3.737670 3.737670 677 +strategi 1 25 3.737670 3.737670 682 +motion 10 24 3.761200 37.612000 699 +pattern 3 24 3.761200 11.283600 689 +flow 1 24 3.761200 3.761200 700 +recognit 5 23 3.806662 19.033310 723 +displai 4 23 3.806662 15.226648 712 +input 2 23 3.806662 7.613324 727 +mobil 1 23 3.806662 3.806662 730 +sequenc 1 23 3.806662 3.806662 734 +defin 3 22 3.850148 11.550444 746 +period 2 22 3.850148 7.700296 743 +geometri 1 22 3.850148 3.850148 752 +path 2 21 3.912023 7.824046 778 +output 1 21 3.912023 3.912023 788 +navig 1 21 3.912023 3.912023 796 +viewer 1 21 3.912023 3.912023 787 +synthesi 2 20 3.951244 7.902488 834 +basi 1 20 3.951244 3.951244 828 +boston 1 19 4.007333 4.007333 862 +separ 1 19 4.007333 4.007333 844 +dimension 3 18 4.060443 12.181329 909 +behavior 3 18 4.060443 12.181329 881 +four 1 18 4.060443 4.060443 905 +differenti 1 17 4.110874 4.110874 921 +coupl 1 17 4.110874 4.110874 939 +precis 1 15 4.248495 4.248495 1023 +capabl 1 15 4.248495 4.248495 1016 +track 1 15 4.248495 4.248495 1029 +scene 7 14 4.317488 30.222416 1114 +camera 4 14 4.317488 17.269952 1115 +conf 5 13 4.382027 21.910135 1181 +charl 2 13 4.382027 8.764054 1149 +step 1 13 4.382027 4.382027 1138 +coordin 1 13 4.382027 4.382027 1182 +believ 1 13 4.382027 4.382027 1187 +shape 5 12 4.465908 22.329540 1245 +fix 1 11 4.553877 4.553877 1327 +valid 1 11 4.553877 4.553877 1299 +bill 1 11 4.553877 4.553877 1297 +correspond 2 10 4.653960 9.307920 1382 +edutelephon 1 10 4.653960 4.653960 1473 +custom 1 10 4.653960 4.653960 1414 +acquisit 1 10 4.653960 4.653960 1465 +earth 1 10 4.653960 4.653960 1463 +dyer 18 9 4.753590 85.564620 1573 +surfac 6 9 4.753590 28.521540 1574 +observ 3 9 4.753590 14.260770 1578 +leader 1 9 4.753590 4.753590 1576 +intermedi 1 9 4.753590 4.753590 1497 +recoveri 1 9 4.753590 4.753590 1474 +morgan 1 9 4.753590 4.753590 1484 +invari 3 8 4.875197 14.625591 1748 +autonom 1 8 4.875197 4.875197 1749 +siggraph 1 8 4.875197 4.875197 1773 +irregular 1 8 4.875197 4.875197 1768 +curv 1 8 4.875197 4.875197 1656 +edg 1 8 4.875197 4.875197 1647 +textur 1 8 4.875197 4.875197 1677 +seitz 9 7 5.010635 45.095715 1976 +morph 3 7 5.010635 15.031905 1937 +interpol 2 7 5.010635 10.021270 1823 +paramet 1 7 5.010635 5.010635 1796 +smooth 1 7 5.010635 5.010635 1855 +dimens 1 7 5.010635 5.010635 1930 +stereo 1 7 5.010635 5.010635 1818 +viewpoint 4 6 5.164786 20.659144 2116 +reconstruct 3 6 5.164786 15.494358 2170 +recov 2 6 5.164786 10.329572 2235 +maryland 1 6 5.164786 5.164786 2140 +kluwer 1 6 5.164786 5.164786 2143 +bestor 1 6 5.164786 5.164786 2099 +cyclic 3 5 5.347108 16.041324 2383 +unknown 2 5 5.347108 10.694216 2318 +provabl 2 5 5.347108 10.694216 2558 +affin 2 5 5.347108 10.694216 2378 +recogn 1 5 5.347108 5.347108 2302 +revolut 1 5 5.347108 5.347108 2315 +jain 1 5 5.347108 5.347108 2332 +adjust 1 5 5.347108 5.347108 2422 +rigid 1 5 5.347108 5.347108 2432 +gareth 1 5 5.347108 5.347108 2392 +connectionist 1 5 5.347108 5.347108 2430 +bradlei 1 5 5.347108 5.347108 2554 +contour 2 4 5.568345 11.136690 2812 +usa 1 4 5.568345 5.568345 3080 +cvpr 1 4 5.568345 5.568345 2761 +theus 1 4 5.568345 5.568345 2992 +simplifi 1 4 5.568345 5.568345 3066 +satellit 1 4 5.568345 5.568345 3077 +shah 1 4 5.568345 5.568345 2814 +lumelski 1 4 5.568345 5.568345 2837 +harri 1 4 5.568345 5.568345 3034 +asid 1 3 5.857933 5.857933 3770 +stationari 1 3 5.857933 5.857933 3861 +alamito 1 3 5.857933 5.857933 3558 +lattic 1 3 5.857933 5.857933 3721 +groupcours 1 3 5.857933 5.857933 3092 +macc 1 3 5.857933 5.857933 3414 +kutulako 6 2 6.263398 37.580388 6064 +hibbard 3 2 6.263398 18.790194 6066 +stewart 2 2 6.263398 12.526796 5739 +acquir 1 2 6.263398 6.263398 5557 +panoram 1 2 6.263398 6.263398 4755 +howto 1 2 6.263398 6.263398 5761 +arbitrarili 1 2 6.263398 6.263398 5791 +discrimin 1 2 6.263398 6.263398 6140 +festschrift 1 2 6.263398 6.263398 6141 +rosenfeld 1 2 6.263398 6.263398 4495 +articul 1 2 6.263398 6.263398 5799 +kyro 1 2 6.263398 6.263398 6063 +rochest 1 2 6.263398 6.263398 6142 +seal 3 1 6.957497 20.872491 18124 +allmen 2 1 6.957497 13.914994 18125 +kjell 2 1 6.957497 13.914994 18126 +pagecharl 1 1 6.957497 6.957497 18127 +dyerprofessordepart 1 1 6.957497 6.957497 18128 +infoph 1 1 6.957497 6.957497 18129 +visualizationgroup 1 1 6.957497 6.957497 18130 +groupprogram 1 1 6.957497 6.957497 18131 +synthesisth 1 1 6.957497 6.957497 18132 +controllingin 1 1 6.957497 6.957497 18133 +cameraof 1 1 6.957497 6.957497 18134 +videostream 1 1 6.957497 6.957497 18135 +whicha 1 1 6.957497 6.957497 18136 +througha 1 1 6.957497 6.957497 18137 +thesit 1 1 6.957497 6.957497 18138 +predetermin 1 1 6.957497 6.957497 18139 +researchquest 1 1 6.957497 6.957497 18140 +synthesizenew 1 1 6.957497 6.957497 18141 +reconstructiona 1 1 6.957497 6.957497 18142 +innovativetechniqu 1 1 6.957497 6.957497 18143 +callview 1 1 6.957497 6.957497 18144 +basisimag 1 1 6.957497 6.957497 18145 +explorationcomput 1 1 6.957497 6.957497 18146 +controllingcamera 1 1 6.957497 6.957497 18147 +purposefulli 1 1 6.957497 6.957497 18148 +theposit 1 1 6.957497 6.957497 18149 +adjustviewpoint 1 1 6.957497 6.957497 18150 +forsolv 1 1 6.957497 6.957497 18151 +findspecif 1 1 6.957497 6.957497 18152 +unknownshap 1 1 6.957497 6.957497 18153 +appearanceof 1 1 6.957497 6.957497 18154 +computationsrequir 1 1 6.957497 6.957497 18155 +andelimin 1 1 6.957497 6.957497 18156 +thecamera 1 1 6.957497 6.957497 18157 +towardsviewpoint 1 1 6.957497 6.957497 18158 +viewedobject 1 1 6.957497 6.957497 18159 +thisapproach 1 1 6.957497 6.957497 18160 +visualizationin 1 1 6.957497 6.957497 18161 +techniquescap 1 1 6.957497 6.957497 18162 +specificgraph 1 1 6.957497 6.957497 18163 +displayingarbitrari 1 1 6.957497 6.957497 18164 +commonfram 1 1 6.957497 6.957497 18165 +algorithmexecut 1 1 6.957497 6.957497 18166 +dataanalysi 1 1 6.957497 6.957497 18167 +forexperi 1 1 6.957497 6.957497 18168 +visualizingintermedi 1 1 6.957497 6.957497 18169 +forproblem 1 1 6.957497 6.957497 18170 +cloud 1 1 6.957497 6.957497 18171 +azriel 1 1 6.957497 6.957497 18172 +occlud 1 1 6.957497 6.957497 18173 +battaiola 1 1 6.957497 6.957497 18174 +santek 1 1 6.957497 6.957497 18175 +voidrot 1 1 6.957497 6.957497 18176 +martinez 1 1 6.957497 6.957497 18177 +liangyin 1 1 6.957497 6.957497 18178 +yuph 1 1 6.957497 6.957497 18179 +whibbard 1 1 6.957497 6.957497 18180 +onlattic 1 1 6.957497 6.957497 18181 +kiriako 1 1 6.957497 6.957497 18182 +ofobserv 1 1 6.957497 6.957497 18183 +iutech 1 1 6.957497 6.957497 18184 +spatiotempor 1 1 6.957497 6.957497 18185 +brent 1 1 6.957497 6.957497 18186 +dimensionalshap 1 1 6.957497 6.957497 18187 +plantinga 1 1 6.957497 6.957497 18188 +wheaton 1 1 6.957497 6.957497 18189 +representationfor 1 1 6.957497 6.957497 18190 +ccsua 1 1 6.957497 6.957497 18191 +ctstateu 1 1 6.957497 6.957497 18192 +measureslink 1 1 6.957497 6.957497 18193 +interestmi 1 1 6.957497 6.957497 18194 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html new file mode 100644 index 00000000..4e2f33ec --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~dzimm^dzimm.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +class 1 199 1.609438 1.609438 37 +welcom 2 122 2.079442 4.158884 99 +check 1 115 2.197225 2.197225 118 +teach 1 108 2.197225 2.197225 112 +section 1 94 2.397895 2.397895 149 +educ 1 86 2.484907 2.484907 191 +friend 1 48 3.044522 3.044522 376 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +curriculum 1 33 3.433987 3.433987 535 +task 1 25 3.737670 3.737670 678 +bring 1 10 4.653960 4.653960 1430 +appreci 1 5 5.347108 5.347108 2374 +patienc 1 2 6.263398 6.263398 5466 +machinew 1 1 6.957497 6.957497 18195 +arduou 1 1 6.957497 6.957497 18196 +vitaecheck 1 1 6.957497 6.957497 18197 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html new file mode 100644 index 00000000..1b3f6f2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ejhazen^ejhazen.html @@ -0,0 +1,166 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +time 3 293 1.098612 3.295836 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +email 3 220 1.386294 4.158882 29 +also 2 259 1.386294 2.772588 28 +link 2 247 1.386294 2.772588 24 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +support 2 132 1.945910 3.891820 83 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +dayton 1 119 2.079442 2.079442 104 +report 1 131 2.079442 2.079442 92 +machin 1 129 2.079442 2.079442 95 +check 4 115 2.197225 8.788900 118 +make 3 111 2.197225 6.591675 120 +intern 2 108 2.197225 4.394450 128 +assist 2 112 2.197225 4.394450 113 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +part 1 98 2.302585 2.302585 129 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +call 1 91 2.397895 2.397895 153 +question 1 91 2.397895 2.397895 141 +real 1 93 2.397895 2.397895 144 +sinc 1 90 2.397895 2.397895 159 +homepag 1 93 2.397895 2.397895 148 +thing 2 84 2.484907 4.969814 189 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +requir 1 81 2.484907 2.484907 167 +complet 2 77 2.564949 5.129898 208 +want 1 79 2.564949 2.564949 199 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +servic 3 72 2.639057 7.917171 236 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +multimedia 2 68 2.708050 5.416100 258 +practic 1 70 2.708050 2.708050 246 +laboratori 1 63 2.772589 2.772589 292 +creat 1 63 2.772589 2.772589 277 +locat 1 59 2.833213 2.833213 303 +room 1 59 2.833213 2.833213 301 +unix 1 58 2.890372 2.890372 308 +instruct 1 53 2.944439 2.944439 332 +talk 1 53 2.944439 2.944439 336 +digit 2 52 2.995732 5.991464 348 +date 1 51 2.995732 2.995732 344 +cool 1 49 3.044522 3.044522 374 +standard 1 48 3.044522 3.044522 365 +archiv 1 49 3.044522 3.044522 364 +could 1 46 3.091042 3.091042 383 +video 3 44 3.135494 9.406482 405 +even 1 45 3.135494 3.135494 393 +answer 1 45 3.135494 3.135494 391 +discuss 1 45 3.135494 3.135494 399 +made 1 44 3.135494 3.135494 398 +around 1 43 3.178054 3.178054 415 +societi 1 40 3.258097 3.258097 456 +purpos 2 37 3.332205 6.664410 481 +curriculum 1 33 3.433987 3.433987 535 +anim 1 31 3.496508 3.496508 557 +domain 1 30 3.555348 3.555348 564 +secur 1 30 3.555348 3.555348 577 +full 1 28 3.610918 3.610918 615 +never 1 25 3.737670 3.737670 671 +spent 1 25 3.737670 3.737670 676 +magazin 1 24 3.761200 3.761200 704 +serv 1 22 3.850148 3.850148 758 +half 1 21 3.912023 3.912023 776 +among 1 21 3.912023 3.912023 781 +wonder 1 20 3.951244 3.951244 815 +eric 1 19 4.007333 4.007333 870 +anderson 1 19 4.007333 4.007333 860 +offici 1 18 4.060443 4.060443 894 +edulast 1 17 4.110874 4.110874 927 +explan 1 16 4.174387 4.174387 985 +biologi 3 15 4.248495 12.745485 1049 +susan 1 15 4.248495 4.248495 1050 +shown 1 14 4.317488 4.317488 1080 +wait 2 13 4.382027 8.764054 1168 +philosophi 1 13 4.382027 4.382027 1167 +neat 1 12 4.465908 4.465908 1263 +entertain 1 12 4.465908 4.465908 1286 +holidai 1 12 4.465908 4.465908 1224 +see 1 11 4.553877 4.553877 1337 +fix 1 11 4.553877 4.553877 1327 +leader 1 9 4.753590 4.753590 1576 +mainten 1 9 4.753590 4.753590 1543 +told 1 8 4.875197 4.875197 1658 +scout 6 7 5.010635 30.063810 1903 +molecular 3 7 5.010635 15.031905 1887 +explain 2 7 5.010635 10.021270 1816 +monei 1 7 5.010635 5.010635 1934 +philosoph 1 7 5.010635 5.010635 1904 +meant 1 6 5.164786 5.164786 2055 +lucki 1 6 5.164786 5.164786 2163 +mac 2 5 5.347108 10.694216 2292 +registr 1 5 5.347108 5.347108 2249 +commod 1 5 5.347108 5.347108 2415 +girlfriend 1 5 5.347108 5.347108 2579 +billi 1 5 5.347108 5.347108 2404 +couldn 1 4 5.568345 5.568345 2977 +green 1 4 5.568345 5.568345 2848 +pageer 1 3 5.857933 5.857933 3776 +ofwisconsin 1 3 5.857933 5.857933 4002 +pete 1 3 5.857933 5.857933 3865 +specialist 1 3 5.857933 5.857933 3319 +lauri 1 3 5.857933 5.857933 3867 +wit 1 3 5.857933 5.857933 4005 +popul 1 3 5.857933 5.857933 3235 +facstaff 1 3 5.857933 5.857933 3433 +hazen 1 2 6.263398 6.263398 6143 +calcari 1 2 6.263398 6.263398 6144 +devri 1 2 6.263398 6.263398 6145 +broken 1 2 6.263398 6.263398 5074 +mice 1 2 6.263398 6.263398 5069 +shameless 1 2 6.263398 6.263398 6146 +salon 1 2 6.263398 6.263398 5827 +nixon 1 2 6.263398 6.263398 5868 +hazennon 1 1 6.957497 6.957497 18198 +professorroom 1 1 6.957497 6.957497 18199 +fornet 1 1 6.957497 6.957497 18200 +elegantli 1 1 6.957497 6.957497 18201 +fearless 1 1 6.957497 6.957497 18202 +withtech 1 1 6.957497 6.957497 18203 +capitalist 1 1 6.957497 6.957497 18204 +pragmatist 1 1 6.957497 6.957497 18205 +metaphys 1 1 6.957497 6.957497 18206 +makethi 1 1 6.957497 6.957497 18207 +drosophila 1 1 6.957497 6.957497 18208 +geneticist 1 1 6.957497 6.957497 18209 +ezin 1 1 6.957497 6.957497 18210 +shockwav 1 1 6.957497 6.957497 18211 +kudon 1 1 6.957497 6.957497 18212 +quicktimevr 1 1 6.957497 6.957497 18213 +documentari 1 1 6.957497 6.957497 18214 +plight 1 1 6.957497 6.957497 18215 +bosnia 1 1 6.957497 6.957497 18216 +uproot 1 1 6.957497 6.957497 18217 +preslei 1 1 6.957497 6.957497 18218 +meetingsejhazen 1 1 6.957497 6.957497 18219 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html new file mode 100644 index 00000000..29389e50 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eliassi^eliassi.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +illinoi 1 7 5.010635 5.010635 1941 +tina 1 3 5.857933 5.857933 3744 +urbana 1 3 5.857933 5.857933 3879 +eliassi 3 2 6.263398 18.790194 6147 +champaign 1 2 6.263398 6.263398 5671 +pagetina 1 1 6.957497 6.957497 18220 +bldgphone 1 1 6.957497 6.957497 18221 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html new file mode 100644 index 00000000..0348a464 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~eneuman^eneuman.html @@ -0,0 +1 @@ +term, tf, in documents count, idf, tfidf, wordid diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html new file mode 100644 index 00000000..645aebee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ericro^ericro.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +madison 5 165 1.791759 8.958795 55 +wisconsin 3 169 1.791759 5.375277 54 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +parallel 1 169 1.791759 1.791759 60 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +professor 1 137 1.945910 1.945910 76 +assign 1 135 1.945910 1.945910 66 +high 3 130 2.079442 6.238326 101 +dayton 1 119 2.079442 2.079442 104 +report 1 131 2.079442 2.079442 92 +intern 2 108 2.197225 4.394450 128 +topic 1 114 2.197225 2.197225 110 +technic 1 100 2.302585 2.302585 140 +proceed 2 93 2.397895 4.795790 152 +west 1 83 2.484907 2.484907 192 +level 1 87 2.484907 2.484907 180 +appear 2 78 2.564949 5.129898 210 +decemb 2 80 2.564949 5.129898 215 +april 1 77 2.564949 2.564949 196 +resum 1 79 2.564949 2.564949 217 +symposium 2 72 2.639057 5.278114 238 +street 1 63 2.772589 2.772589 293 +instruct 4 53 2.944439 11.777756 332 +advisor 1 51 2.995732 2.995732 355 +approach 2 48 3.044522 6.089044 366 +anoth 1 45 3.135494 3.135494 408 +mechan 1 43 3.178054 3.178054 416 +cach 2 41 3.218876 6.437752 432 +annual 2 40 3.258097 6.516194 458 +electr 1 38 3.295837 3.295837 461 +jame 3 35 3.401197 10.203591 507 +steve 2 29 3.583519 7.167038 594 +trace 2 25 3.737670 7.475340 677 +smith 3 20 3.951244 11.853732 820 +eric 4 19 4.007333 16.029332 870 +predict 2 19 4.007333 8.014666 855 +latenc 2 16 4.174387 8.348774 993 +condit 1 16 4.174387 4.174387 975 +drive 1 15 4.248495 4.248495 1052 +johnson 1 13 4.382027 4.382027 1162 +bandwidth 3 11 4.553877 13.661631 1365 +branch 3 11 4.553877 13.661631 1318 +multiscalar 1 8 4.875197 4.875197 1783 +erik 1 8 4.875197 4.875197 1701 +microarchitectur 2 6 5.164786 10.329572 2238 +fetch 3 5 5.347108 16.041324 2567 +kestrel 1 4 5.568345 5.568345 2990 +confid 2 3 5.857933 11.715866 3691 +bennett 2 3 5.857933 11.715866 4024 +cold 1 3 5.857933 5.857933 3637 +rotenberg 3 1 6.957497 20.872491 18222 +passsth 1 1 6.957497 6.957497 18223 +budweisth 1 1 6.957497 6.957497 18224 +ericro 1 1 6.957497 6.957497 18225 +smithresearch 1 1 6.957497 6.957497 18226 +mispredict 1 1 6.957497 6.957497 18227 +tolerancepubl 1 1 6.957497 6.957497 18228 +jacobsen 1 1 6.957497 6.957497 18229 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html new file mode 100644 index 00000000..414dc92d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~falsafi^falsafi.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +parallel 2 169 1.791759 3.583518 60 +read 1 154 1.791759 1.791759 47 +phone 1 175 1.791759 1.791759 45 +like 2 132 1.945910 3.891820 81 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +peopl 1 96 2.302585 2.302585 132 +west 1 83 2.484907 2.484907 192 +school 1 84 2.484907 2.484907 188 +june 2 79 2.564949 5.129898 214 +decemb 1 80 2.564949 2.564949 215 +would 4 67 2.708050 10.832200 251 +evalu 1 64 2.772589 2.772589 266 +streetmadison 1 38 3.295837 3.295837 474 +electr 1 38 3.295837 3.295837 461 +sciencesunivers 1 37 3.332205 3.332205 486 +next 1 34 3.401197 3.401197 517 +idea 1 32 3.465736 3.465736 545 +measur 1 28 3.610918 3.610918 609 +american 1 27 3.637586 3.637586 634 +rather 4 26 3.688879 14.755516 642 +miscellan 1 23 3.806662 3.806662 731 +listen 1 18 4.060443 4.060443 907 +drink 1 9 4.753590 4.753590 1607 +french 1 9 4.753590 4.753590 1511 +assistantdepart 1 8 4.875197 4.875197 1784 +fail 1 8 4.875197 4.875197 1655 +convers 1 8 4.875197 4.875197 1673 +architect 1 8 4.875197 4.875197 1624 +partner 1 8 4.875197 4.875197 1648 +hack 1 7 5.010635 5.010635 1950 +babak 3 5 5.347108 16.041324 2584 +falsafi 3 5 5.347108 16.041324 2585 +suni 2 5 5.347108 10.694216 2452 +queen 1 4 5.568345 5.568345 2919 +buffalo 2 2 6.263398 12.526796 4947 +usatel 1 2 6.263398 6.263398 6111 +shubu 1 2 6.263398 6.263398 6148 +crime 1 2 6.263398 6.263398 5972 +mentorcultresearch 1 1 6.957497 6.957497 18230 +modelseduc 1 1 6.957497 6.957497 18231 +morf 1 1 6.957497 6.957497 18232 +dionosi 1 1 6.957497 6.957497 18233 +hillari 1 1 6.957497 6.957497 18234 +profan 1 1 6.957497 6.957497 18235 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html new file mode 100644 index 00000000..6b7b7368 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ferris^ferris.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +program 2 374 0.693147 1.386294 7 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +engin 1 297 1.098612 1.098612 20 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +link 1 247 1.386294 1.386294 24 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +applic 3 170 1.791759 5.375277 56 +parallel 3 169 1.791759 5.375277 60 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +base 1 165 1.791759 1.791759 50 +contact 1 153 1.791759 1.791759 59 +avail 1 169 1.791759 1.791759 48 +problem 4 147 1.945910 7.783640 75 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +process 1 142 1.945910 1.945910 72 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +machin 1 129 2.079442 2.079442 95 +mathemat 3 108 2.197225 6.591675 123 +structur 3 106 2.197225 6.591675 105 +theori 1 111 2.197225 2.197225 127 +look 1 107 2.197225 2.197225 115 +techniqu 1 99 2.302585 2.302585 138 +associ 1 93 2.397895 2.397895 151 +center 1 88 2.397895 2.397895 158 +larg 2 82 2.484907 4.969814 168 +member 1 84 2.484907 2.484907 165 +west 1 83 2.484907 2.484907 192 +optim 3 79 2.564949 7.694847 197 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +street 1 63 2.772589 2.772589 293 +januari 1 62 2.772589 2.772589 264 +extens 1 53 2.944439 2.944439 340 +investig 2 51 2.995732 5.991464 353 +particular 1 51 2.995732 2.995732 352 +telephon 1 50 3.044522 3.044522 373 +numer 1 49 3.044522 3.044522 369 +effect 2 46 3.091042 6.182084 385 +electron 1 47 3.091042 3.091042 379 +linear 1 41 3.218876 3.218876 431 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +purpos 1 37 3.332205 3.332205 481 +michael 2 35 3.401197 6.802394 514 +within 1 33 3.433987 3.433987 525 +graph 1 30 3.555348 3.555348 576 +consid 1 29 3.583519 3.583519 590 +scale 2 28 3.610918 7.221836 613 +framework 1 28 3.610918 3.610918 606 +determin 1 27 3.637586 3.637586 630 +relev 1 26 3.688879 3.688879 637 +emphasi 2 22 3.850148 7.700296 755 +properti 1 22 3.850148 3.850148 749 +identifi 1 22 3.850148 3.850148 760 +path 1 21 3.912023 3.912023 778 +exploit 1 20 3.951244 3.951244 836 +mostli 1 19 4.007333 4.007333 869 +cambridg 1 16 4.174387 4.174387 1008 +pagec 1 15 4.248495 4.248495 1011 +nonlinear 2 14 4.317488 8.634976 1107 +consider 1 14 4.317488 4.317488 1076 +econom 2 13 4.382027 8.764054 1184 +directli 1 13 4.382027 4.382027 1141 +robust 1 12 4.465908 4.465908 1271 +success 1 10 4.653960 4.653960 1390 +traffic 1 10 4.653960 4.653960 1421 +underli 1 10 4.653960 4.653960 1410 +ferri 3 8 4.875197 14.625591 1715 +pivot 1 5 5.347108 5.347108 2426 +chemic 1 5 5.347108 5.347108 2552 +condor 1 5 5.347108 5.347108 2577 +complementar 2 3 5.857933 11.715866 3999 +engineeringand 1 3 5.857933 5.857933 3779 +congest 1 3 5.857933 5.857933 3993 +followingtechniqu 1 2 6.263398 6.263398 5514 +equilibria 1 2 6.263398 6.263398 4760 +taxat 1 2 6.263398 6.263398 4524 +toll 1 2 6.263398 6.263398 6149 +arealso 1 2 6.263398 6.263398 5650 +beinginvestig 1 2 6.263398 6.263398 5745 +variationalinequ 1 1 6.957497 6.957497 18236 +toproblem 1 1 6.957497 6.957497 18237 +andinterfac 1 1 6.957497 6.957497 18238 +beingconsid 1 1 6.957497 6.957497 18239 +oncarbon 1 1 6.957497 6.957497 18240 +emiss 1 1 6.957497 6.957497 18241 +solvingproblem 1 1 6.957497 6.957497 18242 +partitioningtechniqu 1 1 6.957497 6.957497 18243 +forexploit 1 1 6.957497 6.957497 18244 +underlyingmodel 1 1 6.957497 6.957497 18245 +cpnet 1 1 6.957497 6.957497 18246 +prgram 1 1 6.957497 6.957497 18247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html new file mode 100644 index 00000000..c733dd2d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~finton^finton.html @@ -0,0 +1,255 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +current 4 284 1.098612 4.394448 21 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 3 242 1.386294 4.158882 33 +also 3 259 1.386294 4.158882 28 +mail 1 238 1.386294 1.386294 22 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +modifi 1 178 1.609438 1.609438 35 +madison 5 165 1.791759 8.958795 55 +wisconsin 4 169 1.791759 7.167036 54 +develop 2 174 1.791759 3.583518 53 +base 2 165 1.791759 3.583518 50 +avail 2 169 1.791759 3.583518 48 +problem 2 147 1.945910 3.891820 75 +year 1 148 1.945910 1.945910 84 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +note 1 142 1.945910 1.945910 67 +perform 1 143 1.945910 1.945910 74 +file 1 132 1.945910 1.945910 70 +dayton 1 119 2.079442 2.079442 104 +welcom 1 122 2.079442 2.079442 99 +introduct 1 126 2.079442 2.079442 87 +machin 1 129 2.079442 2.079442 95 +number 1 130 2.079442 2.079442 97 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +world 3 115 2.197225 6.591675 126 +make 2 111 2.197225 4.394450 120 +send 1 114 2.197225 2.197225 109 +site 1 106 2.197225 2.197225 119 +check 1 115 2.197225 2.197225 118 +place 1 106 2.197225 2.197225 124 +need 3 98 2.302585 6.907755 135 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +comment 2 93 2.397895 4.795790 146 +present 1 91 2.397895 2.397895 145 +pictur 1 89 2.397895 2.397895 160 +homepag 1 93 2.397895 2.397895 148 +octob 1 89 2.397895 2.397895 156 +learn 7 86 2.484907 17.394349 170 +environ 3 84 2.484907 7.454721 177 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +librari 1 87 2.484907 2.484907 181 +control 1 82 2.484907 2.484907 164 +chang 1 82 2.484907 2.484907 163 +build 1 85 2.484907 2.484907 184 +start 1 83 2.484907 2.484907 173 +activ 1 84 2.484907 2.484907 182 +wide 1 84 2.484907 2.484907 185 +state 1 76 2.564949 2.564949 207 +master 1 76 2.564949 2.564949 216 +good 1 77 2.564949 2.564949 200 +optim 1 79 2.564949 2.564949 197 +intellig 3 72 2.639057 7.917171 225 +david 2 71 2.639057 5.278114 232 +free 1 73 2.639057 2.639057 224 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +goal 3 66 2.708050 8.124150 250 +degre 1 69 2.708050 2.708050 259 +order 1 69 2.708050 2.708050 249 +receiv 1 66 2.708050 2.708050 244 +knowledg 1 67 2.708050 2.708050 243 +artifici 3 63 2.772589 8.317767 280 +import 3 65 2.772589 8.317767 282 +street 1 63 2.772589 2.772589 293 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +copi 1 63 2.772589 2.772589 284 +function 1 62 2.772589 2.772589 275 +visit 1 63 2.772589 2.772589 288 +virtual 1 62 2.772589 2.772589 285 +plai 1 60 2.833213 2.833213 307 +browser 3 56 2.890372 8.671116 313 +explor 2 58 2.890372 5.780744 324 +thesi 1 57 2.890372 2.890372 327 +direct 1 57 2.890372 2.890372 316 +point 1 58 2.890372 2.890372 319 +think 1 57 2.890372 2.890372 314 +allow 1 53 2.944439 2.944439 333 +advisor 1 51 2.995732 2.995732 355 +finger 1 52 2.995732 2.995732 354 +case 1 51 2.995732 2.995732 351 +investig 1 51 2.995732 2.995732 353 +date 1 51 2.995732 2.995732 344 +understand 2 47 3.091042 6.182084 384 +adapt 1 46 3.091042 3.091042 387 +featur 1 46 3.091042 3.091042 386 +math 1 44 3.135494 3.135494 402 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +netscap 1 44 3.135494 3.135494 395 +favorit 1 44 3.135494 3.135494 410 +show 2 43 3.178054 6.356108 417 +term 1 43 3.178054 3.178054 411 +music 1 42 3.218876 3.218876 436 +combin 1 42 3.218876 3.218876 421 +late 1 40 3.258097 3.258097 439 +littl 1 39 3.258097 3.258097 454 +form 1 39 3.258097 3.258097 443 +realli 1 40 3.258097 3.258097 444 +feel 1 37 3.332205 3.332205 483 +represent 1 35 3.401197 3.401197 512 +next 1 34 3.401197 3.401197 517 +word 1 34 3.401197 3.401197 508 +dissert 1 32 3.465736 3.465736 549 +kind 1 32 3.465736 3.465736 541 +express 1 32 3.465736 3.465736 540 +independ 1 32 3.465736 3.465736 548 +posit 1 31 3.496508 3.496508 552 +specifi 1 30 3.555348 3.555348 568 +platform 1 29 3.583519 3.583519 591 +measur 1 28 3.610918 3.610918 609 +hope 1 28 3.610918 3.610918 610 +actual 1 28 3.610918 3.610918 604 +releas 1 28 3.610918 3.610918 616 +relev 1 26 3.688879 3.688879 637 +bookmark 1 26 3.688879 3.688879 639 +wai 1 25 3.737670 3.737670 662 +task 1 25 3.737670 3.737670 678 +daili 2 24 3.761200 7.522400 706 +reach 1 24 3.761200 3.761200 688 +input 3 23 3.806662 11.419986 727 +head 2 23 3.806662 7.613324 732 +instead 2 22 3.850148 7.700296 756 +output 2 21 3.912023 7.824046 788 +tell 1 21 3.912023 3.912023 777 +grad 1 20 3.951244 3.951244 837 +exploit 1 20 3.951244 3.951244 836 +basi 1 20 3.951244 3.951244 828 +left 1 19 4.007333 4.007333 851 +feedback 1 19 4.007333 4.007333 854 +citi 1 19 4.007333 4.007333 874 +listen 1 18 4.060443 4.060443 907 +whether 1 17 4.110874 4.110874 918 +dilbert 1 16 4.174387 4.174387 996 +action 3 15 4.248495 12.745485 1038 +contribut 1 15 4.248495 4.248495 1021 +balanc 1 14 4.317488 4.317488 1112 +weak 1 13 4.382027 4.382027 1159 +hotlist 1 13 4.382027 4.382027 1199 +nasa 1 13 4.382027 4.382027 1188 +employ 1 12 4.465908 4.465908 1291 +minor 1 12 4.465908 4.465908 1237 +michigan 2 11 4.553877 9.107754 1368 +smart 1 11 4.553877 4.553877 1352 +abil 1 11 4.553877 4.553877 1341 +sens 1 11 4.553877 4.553877 1305 +bill 1 11 4.553877 4.553877 1297 +rapid 1 10 4.653960 4.653960 1453 +traffic 1 10 4.653960 4.653960 1421 +fellowship 1 10 4.653960 4.653960 1460 +true 1 10 4.653960 4.653960 1422 +volleybal 1 9 4.753590 4.753590 1598 +pair 1 9 4.753590 4.753590 1503 +editori 1 9 4.753590 4.753590 1611 +star 3 8 4.875197 14.625591 1717 +grew 1 8 4.875197 4.875197 1742 +gain 1 8 4.875197 4.875197 1730 +irregular 1 8 4.875197 4.875197 1768 +on 1 8 4.875197 4.875197 1628 +extract 1 8 4.875197 4.875197 1728 +opinion 1 8 4.875197 4.875197 1708 +tourist 1 8 4.875197 4.875197 1710 +earn 1 7 5.010635 5.010635 1788 +notion 1 7 5.010635 5.010635 1947 +piano 1 6 5.164786 5.164786 2201 +benefit 1 6 5.164786 5.164786 2213 +variant 1 6 5.164786 5.164786 2043 +gate 1 6 5.164786 5.164786 2182 +sponsor 1 6 5.164786 5.164786 2133 +grand 1 5 5.347108 5.347108 2425 +race 1 5 5.347108 5.347108 2417 +treat 1 5 5.347108 5.347108 2521 +focuss 1 5 5.347108 5.347108 2271 +blow 1 5 5.347108 5.347108 2407 +reinforc 3 4 5.568345 16.705035 2674 +thumb 2 4 5.568345 11.136690 2816 +sorri 1 4 5.568345 5.568345 3059 +trek 3 3 5.857933 17.573799 4025 +trumpet 1 3 5.857933 5.857933 3946 +arm 1 3 5.857933 5.857933 3697 +neg 1 3 5.857933 5.857933 3451 +teacher 1 3 5.857933 5.857933 3892 +thesystem 1 3 5.857933 5.857933 3881 +interv 1 3 5.857933 5.857933 3253 +wit 1 3 5.857933 5.857933 4005 +bibl 1 3 5.857933 5.857933 3143 +glenn 1 3 5.857933 5.857933 3869 +gould 1 3 5.857933 5.857933 3559 +scienceher 1 2 6.263398 6.263398 5912 +essenc 1 2 6.263398 6.263398 6150 +agood 1 2 6.263398 6.263398 5380 +pagefor 1 2 6.263398 6.263398 6151 +nextstep 1 2 6.263398 6.263398 6102 +foral 1 2 6.263398 6.263398 4290 +isthmu 1 2 6.263398 6.263398 6152 +pagesom 1 2 6.263398 6.263398 6109 +finton 4 1 6.957497 27.829988 18248 +openstep 2 1 6.957497 13.914994 18249 +nerdin 1 1 6.957497 6.957497 18250 +intelligenceher 1 1 6.957497 6.957497 18251 +softwarefor 1 1 6.957497 6.957497 18252 +trusti 1 1 6.957497 6.957497 18253 +nextstationor 1 1 6.957497 6.957497 18254 +enjoyplai 1 1 6.957497 6.957497 18255 +longhair 1 1 6.957497 6.957497 18256 +intervarsityfolk 1 1 6.957497 6.957497 18257 +supersoak 1 1 6.957497 6.957497 18258 +accountto 1 1 6.957497 6.957497 18259 +intelligenti 1 1 6.957497 6.957497 18260 +intelligencei 1 1 6.957497 6.957497 18261 +actappropri 1 1 6.957497 6.957497 18262 +todistinguish 1 1 6.957497 6.957497 18263 +orimport 1 1 6.957497 6.957497 18264 +basedfeatur 1 1 6.957497 6.957497 18265 +learningprocess 1 1 6.957497 6.957497 18266 +intelligentadapt 1 1 6.957497 6.957497 18267 +whichwil 1 1 6.957497 6.957497 18268 +hotlistthi 1 1 6.957497 6.957497 18269 +omniweb 1 1 6.957497 6.957497 18270 +eleg 1 1 6.957497 6.957497 18271 +omniwebi 1 1 6.957497 6.957497 18272 +responseto 1 1 6.957497 6.957497 18273 +jehovah 1 1 6.957497 6.957497 18274 +deiti 1 1 6.957497 6.957497 18275 +christwisconsin 1 1 6.957497 6.957497 18276 +intervars 1 1 6.957497 6.957497 18277 +weatherin 1 1 6.957497 6.957497 18278 +nebula 1 1 6.957497 6.957497 18279 +crosssearch 1 1 6.957497 6.957497 18280 +farsid 1 1 6.957497 6.957497 18281 +voyagerent 1 1 6.957497 6.957497 18282 +zoneroam 1 1 6.957497 6.957497 18283 +stereogram 1 1 6.957497 6.957497 18284 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html new file mode 100644 index 00000000..b88b2227 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~fischer^fischer.html @@ -0,0 +1,243 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 13 374 0.693147 9.010911 7 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +time 3 293 1.098612 3.295836 17 +us 3 329 1.098612 3.295836 16 +cours 2 273 1.098612 2.197224 15 +student 1 343 1.098612 1.098612 19 +languag 5 227 1.386294 6.931470 26 +gener 4 220 1.386294 5.545176 27 +design 2 213 1.386294 2.772588 25 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +cornel 1 215 1.386294 1.386294 23 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +implement 3 152 1.791759 5.375277 52 +wisconsin 2 169 1.791759 3.583518 54 +recent 2 167 1.791759 3.583518 58 +parallel 2 169 1.791759 3.583518 60 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +model 3 145 1.945910 5.837730 69 +architectur 2 139 1.945910 3.891820 77 +compil 6 122 2.079442 12.476652 96 +schedul 3 119 2.079442 6.238326 85 +analysi 3 124 2.079442 6.238326 98 +dayton 1 119 2.079442 2.079442 104 +spring 1 131 2.079442 2.079442 88 +provid 1 121 2.079442 2.079442 94 +studi 1 120 2.079442 2.079442 91 +confer 1 126 2.079442 2.079442 100 +code 5 108 2.197225 10.986125 116 +check 3 115 2.197225 6.591675 118 +specif 2 106 2.197225 4.394450 106 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +techniqu 2 99 2.302585 4.605170 138 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +commun 1 95 2.397895 2.397895 157 +environ 2 84 2.484907 4.969814 177 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +novemb 1 81 2.484907 2.484907 179 +issu 2 78 2.564949 5.129898 211 +optim 2 79 2.564949 5.129898 197 +appear 2 78 2.564949 5.129898 210 +june 2 79 2.564949 5.129898 214 +complet 2 77 2.564949 5.129898 208 +decemb 2 80 2.564949 5.129898 215 +messag 1 76 2.564949 2.564949 212 +know 1 80 2.564949 2.564949 198 +effici 2 73 2.639057 5.278114 233 +free 2 73 2.639057 5.278114 224 +involv 1 71 2.639057 2.639057 227 +symposium 1 72 2.639057 2.639057 238 +logic 1 71 2.639057 2.639057 230 +august 5 66 2.708050 13.540250 257 +practic 2 70 2.708050 5.416100 246 +integr 1 67 2.708050 2.708050 245 +januari 3 62 2.772589 8.317767 264 +evalu 2 64 2.772589 5.545178 266 +import 1 65 2.772589 2.772589 282 +experi 1 64 2.772589 2.772589 283 +septemb 1 65 2.772589 2.772589 274 +best 2 59 2.833213 5.666426 299 +juli 2 60 2.833213 5.666426 305 +share 1 59 2.833213 2.833213 304 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +publish 1 57 2.890372 2.890372 326 +undergradu 1 54 2.944439 2.944439 338 +processor 1 54 2.944439 2.944439 335 +instruct 1 53 2.944439 2.944439 332 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +approach 3 48 3.044522 9.133566 366 +pointer 3 48 3.044522 9.133566 368 +telephon 1 50 3.044522 3.044522 373 +principl 1 48 3.044522 3.044522 357 +right 1 48 3.044522 3.044522 363 +effect 1 46 3.091042 3.091042 385 +possibl 1 47 3.091042 3.091042 378 +execut 2 45 3.135494 6.270988 404 +anoth 1 45 3.135494 3.135494 408 +better 1 45 3.135494 3.135494 401 +edit 1 42 3.218876 3.218876 418 +editor 1 41 3.218876 3.218876 433 +error 4 40 3.258097 13.032388 449 +transact 3 39 3.258097 9.774291 438 +must 1 40 3.258097 3.258097 442 +littl 1 39 3.258097 3.258097 454 +author 1 39 3.258097 3.258097 450 +correct 2 38 3.295837 6.591674 462 +cost 5 37 3.332205 16.661025 480 +workstat 1 37 3.332205 3.332205 479 +procedur 2 36 3.367296 6.734592 488 +especi 1 36 3.367296 3.367296 496 +soon 1 36 3.367296 3.367296 494 +short 1 36 3.367296 3.367296 499 +least 2 35 3.401197 6.802394 516 +global 1 34 3.401197 3.401197 520 +concurr 1 34 3.401197 3.401197 501 +john 1 33 3.433987 3.433987 532 +express 1 32 3.465736 3.465736 540 +extend 1 32 3.465736 3.465736 539 +richard 1 31 3.496508 3.496508 559 +focu 1 30 3.555348 3.555348 571 +graph 1 30 3.555348 3.555348 576 +common 1 30 3.555348 3.555348 574 +rang 1 30 3.555348 3.555348 565 +semant 1 29 3.583519 3.583519 587 +steve 1 29 3.583519 3.583519 594 +multiprocessor 1 28 3.610918 3.610918 605 +load 1 28 3.610918 3.610918 601 +framework 1 28 3.610918 3.610918 606 +arrai 3 27 3.637586 10.912758 627 +revis 1 26 3.688879 3.688879 640 +wai 1 25 3.737670 3.737670 662 +store 1 24 3.761200 3.761200 693 +flow 1 24 3.761200 3.761200 700 +william 2 22 3.850148 7.700296 765 +color 1 22 3.850148 3.850148 762 +almost 1 22 3.850148 3.850148 742 +avoid 2 21 3.912023 7.824046 799 +among 1 21 3.912023 3.912023 781 +programminglanguag 1 21 3.912023 3.912023 782 +alloc 8 20 3.951244 31.609952 821 +exploit 1 20 3.951244 3.951244 836 +supervis 1 20 3.951244 3.951244 840 +facil 1 20 3.951244 3.951244 814 +definit 1 19 4.007333 4.007333 864 +seem 1 18 4.060443 4.060443 899 +regist 8 17 4.110874 32.886992 938 +steven 3 17 4.110874 12.332622 953 +monitor 2 17 4.110874 8.221748 941 +ultim 1 17 4.110874 4.110874 943 +analyz 1 17 4.110874 4.110874 925 +anyon 1 17 4.110874 4.110874 916 +modern 1 16 4.174387 4.174387 966 +easi 1 16 4.174387 4.174387 969 +todd 3 15 4.248495 12.745485 1051 +mayb 1 15 4.248495 4.248495 1014 +indic 1 15 4.248495 4.248495 1013 +driven 1 15 4.248495 4.248495 1048 +attribut 7 14 4.317488 30.222416 1092 +polynomi 1 14 4.317488 4.317488 1069 +demand 1 14 4.317488 4.317488 1073 +split 1 14 4.317488 4.317488 1078 +charl 6 13 4.382027 26.292162 1149 +context 5 13 4.382027 21.910135 1153 +sigplan 2 13 4.382027 8.764054 1190 +care 1 13 4.382027 4.382027 1177 +johnson 1 13 4.382027 4.382027 1162 +bruce 1 12 4.465908 4.465908 1226 +benjamin 1 11 4.553877 4.553877 1296 +stephen 1 11 4.553877 4.553877 1342 +transpar 1 11 4.553877 4.553877 1325 +arithmet 1 10 4.653960 4.653960 1388 +routin 1 9 4.753590 4.753590 1549 +minimum 1 9 4.753590 4.753590 1555 +cum 1 8 4.875197 4.875197 1619 +sensit 1 8 4.875197 4.875197 1726 +fischer 8 7 5.010635 40.085080 1893 +delai 2 7 5.010635 10.021270 1848 +pipelin 1 7 5.010635 5.010635 1830 +appar 1 7 5.010635 5.010635 1958 +zero 1 7 5.010635 5.010635 1896 +bookstor 1 7 5.010635 5.010635 1837 +grammar 3 6 5.164786 15.494358 2058 +benefit 1 6 5.164786 5.164786 2213 +mistak 1 6 5.164786 5.164786 2110 +sigact 1 6 5.164786 5.164786 2212 +unnecessari 2 5 5.347108 10.694216 2506 +explicitli 1 5 5.347108 5.347108 2308 +quantifi 1 5 5.347108 5.347108 2525 +attract 1 5 5.347108 5.347108 2356 +craft 1 5 5.347108 5.347108 2412 +leblanc 1 5 5.347108 5.347108 2377 +pars 1 5 5.347108 5.347108 2321 +interprocedur 3 4 5.568345 16.705035 2771 +vital 1 4 5.568345 5.568345 2733 +popl 1 4 5.568345 5.568345 3068 +gregori 1 4 5.568345 5.568345 2928 +teachingc 2 3 5.857933 11.715866 3614 +domin 1 3 5.857933 5.857933 3995 +likelihood 1 3 5.857933 5.857933 3172 +topla 1 3 5.857933 5.857933 3563 +retarget 1 3 5.857933 5.857933 3994 +syntact 3 2 6.263398 18.790194 5552 +insoftwar 1 2 6.263398 6.263398 4932 +everywher 1 2 6.263398 6.263398 5690 +educationph 1 2 6.263398 6.263398 6112 +milton 1 2 6.263398 6.263398 6153 +bernard 1 2 6.263398 6.263398 5894 +dion 1 2 6.263398 6.263398 5856 +venkatesh 1 2 6.263398 6.263398 6154 +nbsp 15 1 6.957497 104.362455 18285 +kurland 4 1 6.957497 27.829988 18286 +proebst 3 1 6.957497 20.872491 18287 +harish 2 1 6.957497 13.914994 18288 +patil 2 1 6.957497 13.914994 18289 +nbspcharl 1 1 6.957497 6.957497 18290 +nbspprofessor 1 1 6.957497 6.957497 18291 +nbspunivers 1 1 6.957497 6.957497 18292 +enormouscap 1 1 6.957497 6.957497 18293 +haveinvestig 1 1 6.957497 6.957497 18294 +registerresid 1 1 6.957497 6.957497 18295 +loadsand 1 1 6.957497 6.957497 18296 +theprocedur 1 1 6.957497 6.957497 18297 +studiedinterprocedur 1 1 6.957497 6.957497 18298 +modelsthat 1 1 6.957497 6.957497 18299 +optimallyalloc 1 1 6.957497 6.957497 18300 +toautomat 1 1 6.957497 6.957497 18301 +orno 1 1 6.957497 6.957497 18302 +slowdown 1 1 6.957497 6.957497 18303 +inacm 1 1 6.957497 6.957497 18304 +activitiesa 1 1 6.957497 6.957497 18305 +cytronand 1 1 6.957497 6.957497 18306 +studentsdonn 1 1 6.957497 6.957497 18307 +rowland 1 1 6.957497 6.957497 18308 +skedzielewski 1 1 6.957497 6.957497 18309 +reevalu 1 1 6.957497 6.957497 18310 +corrector 1 1 6.957497 6.957497 18311 +sensitivepars 1 1 6.957497 6.957497 18312 +mahadevan 1 1 6.957497 6.957497 18313 +ganapathi 1 1 6.957497 6.957497 18314 +vimal 1 1 6.957497 6.957497 18315 +begwami 1 1 6.957497 6.957497 18316 +maunei 1 1 6.957497 6.957497 18317 +anil 1 1 6.957497 6.957497 18318 +winsborough 1 1 6.957497 6.957497 18319 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ new file mode 100644 index 00000000..24c5c5d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~galileo^ @@ -0,0 +1,260 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 15 775 0.000000 0.000000 2 +univers 13 571 0.000000 0.000000 5 +scienc 12 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 12 457 0.693147 8.317764 12 +system 4 443 0.693147 2.772588 6 +research 3 431 0.693147 2.079441 10 +program 1 374 0.693147 0.693147 7 +project 6 340 1.098612 6.591672 18 +current 4 284 1.098612 4.394448 21 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +also 5 259 1.386294 6.931470 28 +design 4 213 1.386294 5.545176 25 +link 2 247 1.386294 2.772588 24 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +includ 3 208 1.609438 4.828314 42 +public 2 202 1.609438 3.218876 43 +list 2 201 1.609438 3.218876 39 +group 1 183 1.609438 1.609438 36 +paper 1 205 1.609438 1.609438 38 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 14 169 1.791759 25.084626 54 +madison 13 165 1.791759 23.292867 55 +base 5 165 1.791759 8.958795 50 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +applic 1 170 1.791759 1.791759 56 +architectur 7 139 1.945910 13.621370 77 +perform 3 143 1.945910 5.837730 74 +relat 3 139 1.945910 5.837730 68 +support 3 132 1.945910 5.837730 83 +process 2 142 1.945910 3.891820 72 +model 2 145 1.945910 3.891820 69 +area 1 144 1.945910 1.945910 80 +report 11 131 2.079442 22.873862 92 +studi 3 120 2.079442 6.238326 91 +confer 3 126 2.079442 6.238326 100 +high 2 130 2.079442 4.158884 101 +analysi 2 124 2.079442 4.158884 98 +intern 5 108 2.197225 10.986125 128 +specif 4 106 2.197225 8.788900 106 +place 1 106 2.197225 2.197225 124 +topic 1 114 2.197225 2.197225 110 +memori 14 101 2.302585 32.236190 139 +technic 11 100 2.302585 25.328435 140 +proceed 3 93 2.397895 7.193685 152 +follow 2 92 2.397895 4.795790 143 +octob 1 89 2.397895 2.397895 156 +larg 3 82 2.484907 7.454721 168 +novemb 2 81 2.484907 4.969814 179 +ieee 1 86 2.484907 2.484907 190 +second 1 81 2.484907 2.484907 166 +appear 3 78 2.564949 7.694847 210 +interfac 2 79 2.564949 5.129898 209 +complet 1 77 2.564949 2.564949 208 +dynam 1 76 2.564949 2.564949 194 +optim 1 79 2.564949 2.564949 197 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +effici 2 73 2.639057 5.278114 233 +symposium 1 72 2.639057 2.639057 238 +involv 1 71 2.639057 2.639057 227 +workshop 1 71 2.639057 2.639057 239 +integr 5 67 2.708050 13.540250 245 +main 3 67 2.708050 8.124150 256 +differ 2 66 2.708050 5.416100 253 +simul 1 66 2.708050 2.708050 255 +januari 1 62 2.772589 2.772589 264 +share 10 59 2.833213 28.332130 304 +juli 3 60 2.833213 8.499639 305 +march 2 61 2.833213 5.666426 295 +best 1 59 2.833213 2.833213 299 +point 3 58 2.890372 8.671116 319 +variou 1 56 2.890372 2.890372 317 +explor 1 58 2.890372 2.890372 324 +faculti 1 56 2.890372 2.890372 325 +processor 4 54 2.944439 11.777756 335 +februari 2 54 2.944439 5.888878 328 +instruct 1 53 2.944439 2.944439 332 +extens 1 53 2.944439 2.944439 340 +hardwar 3 51 2.995732 8.987196 350 +standard 4 48 3.044522 12.178088 365 +possibl 1 47 3.091042 3.091042 378 +effect 1 46 3.091042 3.091042 385 +execut 2 45 3.135494 6.270988 404 +protocol 2 45 3.135494 6.270988 407 +mechan 2 43 3.178054 6.356108 416 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +third 1 43 3.178054 3.178054 412 +cach 3 41 3.218876 9.656628 432 +futur 2 41 3.218876 6.437752 427 +examin 1 42 3.218876 3.218876 424 +multipl 1 39 3.258097 3.258097 453 +close 1 38 3.295837 3.295837 465 +purpos 1 37 3.332205 3.332205 481 +cost 1 37 3.332205 3.332205 480 +jame 9 35 3.401197 30.610773 507 +least 1 35 3.401197 3.401197 516 +extend 2 32 3.465736 6.931472 539 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +specifi 1 30 3.555348 3.555348 568 +synchron 6 29 3.583519 21.501114 588 +focus 2 29 3.583519 7.167038 584 +limit 1 29 3.583519 3.583519 585 +platform 1 29 3.583519 3.583519 591 +particip 1 29 3.583519 3.583519 589 +scale 2 28 3.610918 7.221836 613 +multiprocessor 1 28 3.610918 3.610918 605 +cluster 1 28 3.610918 3.610918 612 +repres 1 26 3.688879 3.688879 656 +consist 1 26 3.688879 3.688879 651 +bound 1 26 3.688879 3.688879 659 +todai 1 25 3.737670 3.737670 672 +wai 1 25 3.737670 3.737670 662 +supercomput 1 25 3.737670 3.737670 681 +scalabl 5 24 3.761200 18.806000 705 +pattern 1 24 3.761200 3.761200 689 +hierarchi 1 22 3.850148 3.850148 744 +chip 6 21 3.912023 23.472138 770 +alumni 1 21 3.912023 3.912023 807 +increas 1 20 3.951244 3.951244 829 +exploit 1 20 3.951244 3.951244 836 +separ 1 19 4.007333 4.007333 844 +along 1 18 4.060443 4.060443 878 +element 1 18 4.060443 4.060443 895 +minim 1 18 4.060443 4.060443 887 +wind 1 18 4.060443 4.060443 908 +scott 1 18 4.060443 4.060443 884 +lower 1 18 4.060443 4.060443 886 +interconnect 3 17 4.110874 12.332622 937 +layer 3 17 4.110874 12.332622 926 +latenc 3 16 4.174387 12.523161 993 +capabl 2 15 4.248495 8.496990 1016 +massiv 1 15 4.248495 4.248495 1026 +overhead 1 15 4.248495 4.248495 1035 +hierarch 1 15 4.248495 4.248495 1018 +coher 4 14 4.317488 17.269952 1109 +conduct 1 14 4.317488 4.317488 1065 +topolog 1 14 4.317488 4.317488 1089 +johnson 3 13 4.382027 13.146081 1162 +central 1 13 4.382027 4.382027 1160 +individu 1 13 4.382027 4.382027 1126 +optic 2 12 4.465908 8.931816 1221 +mari 2 12 4.465908 8.931816 1266 +grow 1 12 4.465908 4.465908 1209 +bandwidth 4 11 4.553877 18.215508 1365 +primit 2 11 4.553877 9.107754 1317 +evolut 1 11 4.553877 4.553877 1314 +impact 1 11 4.553877 4.553877 1334 +extrem 1 11 4.553877 4.553877 1330 +penalti 1 10 4.653960 4.653960 1405 +modul 1 10 4.653960 4.653960 1434 +resid 1 10 4.653960 4.653960 1461 +label 1 10 4.653960 4.653960 1423 +queue 1 10 4.653960 4.653960 1386 +vernon 2 9 4.753590 9.507180 1556 +doug 2 9 4.753590 9.507180 1517 +elimin 1 9 4.753590 4.753590 1558 +lock 1 9 4.753590 4.753590 1551 +transport 3 8 4.875197 14.625591 1672 +univeristi 1 8 4.875197 4.875197 1754 +evan 1 8 4.875197 4.875197 1633 +goodman 12 7 5.010635 60.127620 1891 +burger 7 7 5.010635 35.074445 1889 +merg 1 7 5.010635 5.010635 1862 +migrat 1 7 5.010635 5.010635 1851 +microprocessor 1 7 5.010635 5.010635 1808 +serial 1 7 5.010635 5.010635 1975 +philip 2 6 5.164786 10.329572 2005 +onto 1 6 5.164786 5.164786 2089 +diagram 1 5 5.347108 5.347108 2346 +quantifi 1 5 5.347108 5.347108 2525 +galileo 3 4 5.568345 16.705035 3086 +medium 1 4 5.568345 5.568345 2834 +eventu 1 4 5.568345 5.568345 3074 +stefano 3 3 5.857933 17.573799 3372 +kaxira 3 3 5.857933 17.573799 3373 +arrow 2 3 5.857933 11.715866 3520 +bank 1 3 5.857933 5.857933 3920 +aswel 1 3 5.857933 5.857933 3286 +fresh 1 3 5.857933 5.857933 3706 +stein 1 3 5.857933 5.857933 3646 +alain 3 2 6.263398 18.790194 6086 +iram 2 2 6.263398 12.526796 4520 +datascalar 2 2 6.263398 12.526796 4518 +wisconsint 1 2 6.263398 6.263398 6155 +groupat 1 2 6.263398 6.263398 5677 +emphasison 1 2 6.263398 6.263398 4157 +extent 1 2 6.263398 6.263398 6080 +dram 1 2 6.263398 6.263398 4173 +spsd 1 2 6.263398 6.263398 4519 +declin 1 2 6.263398 6.263398 5385 +logarithm 1 2 6.263398 6.263398 5322 +multiprocessorsa 1 2 6.263398 6.263398 5455 +gjess 1 2 6.263398 6.263398 6156 +woest 3 1 6.957497 20.872491 18320 +nagi 2 1 6.957497 13.914994 18321 +contentsgalileoproject 1 1 6.957497 6.957497 18322 +descriptionpublicationsrel 1 1 6.957497 6.957497 18323 +projectssci 1 1 6.957497 6.957497 18324 +wisconsinproject 1 1 6.957497 6.957497 18325 +descriptionpublicationsproject 1 1 6.957497 6.957497 18326 +membersgalileo 1 1 6.957497 6.957497 18327 +wisconsingalileo 1 1 6.957497 6.957497 18328 +therelationship 1 1 6.957497 6.957497 18329 +futuresystem 1 1 6.957497 6.957497 18330 +issuabl 1 1 6.957497 6.957497 18331 +orlimit 1 1 6.957497 6.957497 18332 +capacityon 1 1 6.957497 6.957497 18333 +sizabl 1 1 6.957497 6.957497 18334 +fractionof 1 1 6.957497 6.957497 18335 +mopin 1 1 6.957497 6.957497 18336 +ofprocessor 1 1 6.957497 6.957497 18337 +eventuallyobvi 1 1 6.957497 6.957497 18338 +andlimit 1 1 6.957497 6.957497 18339 +systemsperform 1 1 6.957497 6.957497 18340 +theprocessor 1 1 6.957497 6.957497 18341 +spectrumcach 1 1 6.957497 6.957497 18342 +systemsdesign 1 1 6.957497 6.957497 18343 +systemprogram 1 1 6.957497 6.957497 18344 +bottlenecksdoug 1 1 6.957497 6.957497 18345 +modeldoug 1 1 6.957497 6.957497 18346 +microprocessorsdoug 1 1 6.957497 6.957497 18347 +microprocessorsdougla 1 1 6.957497 6.957497 18348 +berkeleyppram 1 1 6.957497 6.957497 18349 +kyushu 1 1 6.957497 6.957497 18350 +japansci 1 1 6.957497 6.957497 18351 +wisconsinour 1 1 6.957497 6.957497 18352 +coherentshar 1 1 6.957497 6.957497 18353 +coherentinterfac 1 1 6.957497 6.957497 18354 +qolb 1 1 6.957497 6.957497 18355 +pairwis 1 1 6.957497 6.957497 18356 +definitionfor 1 1 6.957497 6.957497 18357 +betweenprocess 1 1 6.957497 6.957497 18358 +structureseffici 1 1 6.957497 6.957497 18359 +extensionsaggress 1 1 6.957497 6.957497 18360 +multiprocessorswisconsin 1 1 6.957497 6.957497 18361 +tunneldougla 1 1 6.957497 6.957497 18362 +scijam 1 1 6.957497 6.957497 18363 +memoryross 1 1 6.957497 6.957497 18364 +aboulenein 1 1 6.957497 6.957497 18365 +ringsross 1 1 6.957497 6.957497 18366 +ringsteven 1 1 6.957497 6.957497 18367 +coherenceross 1 1 6.957497 6.957497 18368 +multiprocessorsphilip 1 1 6.957497 6.957497 18369 +multiprocessorjam 1 1 6.957497 6.957497 18370 +abouleneinross 1 1 6.957497 6.957497 18371 +johnsonstev 1 1 6.957497 6.957497 18372 +scottlast 1 1 6.957497 6.957497 18373 +dburger 1 1 6.957497 6.957497 18374 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html new file mode 100644 index 00000000..43827397 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~geery^geery.html @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +structur 1 106 2.197225 2.197225 105 +peopl 1 96 2.302585 2.302585 132 +grade 1 90 2.397895 2.397895 142 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +paul 1 38 3.295837 3.295837 471 +post 1 35 3.401197 3.401197 505 +martin 1 21 3.912023 3.912023 794 +andrew 2 19 4.007333 8.014666 849 +jean 1 10 4.653960 4.653960 1440 +regent 1 5 5.347108 5.347108 2551 +geeri 3 3 5.857933 17.573799 3422 +albert 1 2 6.263398 6.263398 5987 +friedrich 1 2 6.263398 6.263398 5175 +madisonin 1 1 6.957497 6.957497 18375 +compsci 1 1 6.957497 6.957497 18376 +pontif 1 1 6.957497 6.957497 18377 +jacqu 1 1 6.957497 6.957497 18378 +derrida 1 1 6.957497 6.957497 18379 +heidegg 1 1 6.957497 6.957497 18380 +camu 1 1 6.957497 6.957497 18381 +sartr 1 1 6.957497 6.957497 18382 +nietzsch 1 1 6.957497 6.957497 18383 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html new file mode 100644 index 00000000..0312a553 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gid^gid.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +time 4 293 1.098612 4.394448 17 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +also 1 259 1.386294 1.386294 28 +paper 1 205 1.609438 1.609438 38 +class 1 199 1.609438 1.609438 37 +fall 1 181 1.609438 1.609438 40 +read 2 154 1.791759 3.583518 47 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +avail 1 169 1.791759 1.791759 48 +note 1 142 1.945910 1.945910 67 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +machin 2 129 2.079442 4.158884 95 +report 1 131 2.079442 2.079442 92 +pleas 3 113 2.197225 6.591675 114 +find 2 111 2.197225 4.394450 111 +send 2 114 2.197225 4.394450 109 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +look 1 107 2.197225 2.197225 115 +well 1 109 2.197225 2.197225 121 +book 1 99 2.302585 2.302585 131 +homepag 2 93 2.397895 4.795790 148 +follow 2 92 2.397895 4.795790 143 +imag 1 91 2.397895 2.397895 161 +pictur 1 89 2.397895 2.397895 160 +help 1 83 2.484907 2.484907 175 +thing 1 84 2.484907 2.484907 189 +orient 1 80 2.564949 2.564949 205 +main 1 67 2.708050 2.708050 256 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +share 1 59 2.833213 2.833213 304 +back 1 60 2.833213 2.833213 297 +index 2 56 2.890372 5.780744 309 +unix 2 58 2.890372 5.780744 308 +sever 1 56 2.890372 2.890372 322 +think 1 57 2.890372 2.890372 314 +sampl 2 53 2.944439 5.888878 339 +case 1 51 2.995732 2.995732 351 +right 1 48 3.044522 3.044522 363 +move 1 47 3.091042 3.091042 382 +netscap 2 44 3.135494 6.270988 395 +directori 1 45 3.135494 3.135494 396 +might 1 41 3.218876 3.218876 426 +fast 1 42 3.218876 3.218876 429 +continu 1 39 3.258097 3.258097 448 +movi 1 40 3.258097 3.258097 459 +small 1 39 3.258097 3.258097 447 +cost 1 37 3.332205 3.332205 480 +either 1 35 3.401197 3.401197 506 +given 1 32 3.465736 3.465736 538 +someth 1 31 3.496508 3.496508 554 +consid 1 29 3.583519 3.583519 590 +usual 1 28 3.610918 3.610918 608 +load 1 28 3.610918 3.610918 601 +actual 1 28 3.610918 3.610918 604 +great 1 27 3.637586 3.637586 626 +bookmark 1 26 3.688879 3.688879 639 +sometim 1 24 3.761200 3.761200 696 +higher 1 24 3.761200 3.761200 690 +thank 1 23 3.806662 3.806662 721 +sent 1 22 3.850148 3.850148 763 +grad 1 20 3.951244 3.951244 837 +wrote 1 20 3.951244 3.951244 830 +mostli 1 19 4.007333 4.007333 869 +exercis 1 19 4.007333 4.007333 842 +stop 1 17 4.110874 4.110874 942 +mayb 2 15 4.248495 8.496990 1014 +purchas 1 15 4.248495 4.248495 1030 +floor 1 14 4.317488 4.317488 1070 +wait 1 13 4.382027 4.382027 1168 +walk 1 12 4.465908 4.465908 1281 +outsid 1 12 4.465908 4.465908 1219 +grow 1 12 4.465908 4.465908 1209 +noth 1 11 4.553877 4.553877 1328 +denni 1 11 4.553877 4.553877 1321 +calvin 1 9 4.753590 4.753590 1518 +claim 1 8 4.875197 4.875197 1664 +unifi 1 8 4.875197 4.875197 1774 +reload 1 8 4.875197 4.875197 1682 +told 1 8 4.875197 4.875197 1658 +accord 1 7 5.010635 5.010635 1826 +none 1 7 5.010635 5.010635 1811 +monei 1 7 5.010635 5.010635 1934 +christian 1 7 5.010635 5.010635 1949 +huge 1 6 5.164786 5.164786 1991 +handbook 1 6 5.164786 5.164786 2061 +gui 1 5 5.347108 5.347108 2573 +feet 1 5 5.347108 5.347108 2492 +anti 1 5 5.347108 5.347108 2434 +cheap 1 4 5.568345 5.568345 2751 +prospect 1 4 5.568345 5.568345 3013 +shelf 1 4 5.568345 5.568345 2621 +fork 1 4 5.568345 5.568345 2801 +kill 1 4 5.568345 5.568345 3000 +suppos 1 4 5.568345 5.568345 3002 +suffic 1 4 5.568345 5.568345 2869 +glass 2 3 5.857933 11.715866 3759 +dutch 1 3 5.857933 5.857933 3592 +influenc 1 3 5.857933 5.857933 3349 +cash 1 3 5.857933 5.857933 3355 +dabbl 1 3 5.857933 5.857933 3971 +forward 1 3 5.857933 5.857933 3784 +deposit 1 2 6.263398 6.263398 6095 +cooler 1 2 6.263398 6.263398 6023 +suspect 1 2 6.263398 6.263398 5187 +nearest 1 2 6.263398 6.263398 4922 +roommat 1 2 6.263398 6.263398 6157 +withno 1 2 6.263398 6.263398 5370 +eventhough 1 2 6.263398 6.263398 6158 +anyhow 1 2 6.263398 6.263398 5188 +killer 1 2 6.263398 6.263398 6159 +programmingin 1 2 6.263398 6.263398 4135 +ritchi 1 2 6.263398 6.263398 4306 +creator 1 2 6.263398 6.263398 5998 +gideon 2 1 6.957497 13.914994 18384 +tweak 2 1 6.957497 13.914994 18385 +toonion 1 1 6.957497 6.957497 18386 +seethi 1 1 6.957497 6.957497 18387 +blockbust 1 1 6.957497 6.957497 18388 +predica 1 1 6.957497 6.957497 18389 +dismal 1 1 6.957497 6.957497 18390 +donationto 1 1 6.957497 6.957497 18391 +defrai 1 1 6.957497 6.957497 18392 +orderscan 1 1 6.957497 6.957497 18393 +monro 1 1 6.957497 6.957497 18394 +usathank 1 1 6.957497 6.957497 18395 +unread 1 1 6.957497 6.957497 18396 +achil 1 1 6.957497 6.957497 18397 +cstechreport 1 1 6.957497 6.957497 18398 +otherstuff 1 1 6.957497 6.957497 18399 +averagewil 1 1 6.957497 6.957497 18400 +doofu 1 1 6.957497 6.957497 18401 +zippi 1 1 6.957497 6.957497 18402 +pinheadha 1 1 6.957497 6.957497 18403 +justtri 1 1 6.957497 6.957497 18404 +mozilla 1 1 6.957497 6.957497 18405 +buttonher 1 1 6.957497 6.957497 18406 +somethingin 1 1 6.957497 6.957497 18407 +hater 1 1 6.957497 6.957497 18408 +mailand 1 1 6.957497 6.957497 18409 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html new file mode 100644 index 00000000..afb2df2f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~glew^glew.html @@ -0,0 +1,407 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 12 775 0.000000 0.000000 2 +page 5 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +univers 3 571 0.000000 0.000000 5 +system 5 443 0.693147 3.465735 6 +work 3 380 0.693147 2.079441 9 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +us 5 329 1.098612 5.493060 16 +time 4 293 1.098612 4.394448 17 +last 1 314 1.098612 1.098612 14 +email 5 220 1.386294 6.931470 29 +also 4 259 1.386294 5.545176 28 +wisc 2 242 1.386294 2.772588 33 +softwar 2 220 1.386294 2.772588 30 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +public 3 202 1.609438 4.828314 43 +group 2 183 1.609438 3.218876 36 +paper 2 205 1.609438 3.218876 38 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +wisconsin 4 169 1.791759 7.167036 54 +read 2 154 1.791759 3.583518 47 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +phone 1 175 1.791759 1.791759 45 +algorithm 1 162 1.791759 1.791759 57 +first 7 140 1.945910 13.621370 71 +architectur 4 139 1.945910 7.783640 77 +like 3 132 1.945910 5.837730 81 +file 1 132 1.945910 1.945910 70 +assign 1 135 1.945910 1.945910 66 +schedul 5 119 2.079442 10.397210 85 +tool 2 117 2.079442 4.158884 93 +high 1 130 2.079442 2.079442 101 +databas 1 122 2.079442 2.079442 86 +number 1 130 2.079442 2.079442 97 +manag 4 114 2.197225 8.788900 125 +code 3 108 2.197225 6.591675 116 +version 3 113 2.197225 6.591675 122 +person 3 111 2.197225 6.591675 117 +specif 1 106 2.197225 2.197225 106 +well 1 109 2.197225 2.197225 121 +place 1 106 2.197225 2.197225 124 +make 1 111 2.197225 2.197225 120 +check 1 115 2.197225 2.197225 118 +send 1 114 2.197225 2.197225 109 +topic 1 114 2.197225 2.197225 110 +peopl 3 96 2.302585 6.907755 132 +access 2 102 2.302585 4.605170 136 +user 2 104 2.302585 4.605170 137 +book 1 99 2.302585 2.302585 131 +techniqu 1 99 2.302585 2.302585 138 +advanc 1 99 2.302585 2.302585 130 +imag 2 91 2.397895 4.795790 161 +sinc 2 90 2.397895 4.795790 159 +real 2 93 2.397895 4.795790 144 +mani 1 92 2.397895 2.397895 150 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +thing 3 84 2.484907 7.454721 189 +info 2 85 2.484907 4.969814 176 +stuff 2 87 2.484907 4.969814 171 +start 2 83 2.484907 4.969814 173 +learn 2 86 2.484907 4.969814 170 +control 2 82 2.484907 4.969814 164 +requir 2 81 2.484907 4.969814 167 +level 1 87 2.484907 2.484907 180 +educ 1 86 2.484907 2.484907 191 +internet 1 83 2.484907 2.484907 186 +build 1 85 2.484907 2.484907 184 +chang 1 82 2.484907 2.484907 163 +resum 2 79 2.564949 5.129898 217 +dynam 1 76 2.564949 2.564949 194 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +meet 13 72 2.639057 34.307741 229 +html 3 75 2.639057 7.917171 235 +involv 1 71 2.639057 2.639057 227 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +workshop 1 71 2.639057 2.639057 239 +appli 1 71 2.639057 2.639057 226 +effici 1 73 2.639057 2.639057 233 +test 2 66 2.708050 5.416100 252 +integr 1 67 2.708050 2.708050 245 +view 1 70 2.708050 2.708050 254 +creat 3 63 2.772589 8.317767 277 +organ 2 65 2.772589 5.545178 265 +previou 1 62 2.772589 2.772589 290 +plan 1 65 2.772589 2.772589 272 +best 1 59 2.833213 2.833213 299 +unix 5 58 2.890372 14.451860 308 +summer 1 56 2.890372 2.890372 311 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +space 1 57 2.890372 2.890372 310 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +suggest 2 53 2.944439 5.888878 331 +instruct 1 53 2.944439 2.944439 332 +allow 1 53 2.944439 2.944439 333 +case 1 51 2.995732 2.995732 351 +week 1 52 2.995732 2.995732 343 +approach 4 48 3.044522 12.178088 366 +frequent 1 49 3.044522 3.044522 367 +without 1 50 3.044522 3.044522 370 +right 1 48 3.044522 3.044522 363 +possibl 3 47 3.091042 9.273126 378 +get 2 46 3.091042 6.182084 380 +adapt 1 46 3.091042 3.091042 387 +effect 1 46 3.091042 3.091042 385 +could 1 46 3.091042 3.091042 383 +featur 1 46 3.091042 3.091042 386 +keep 2 44 3.135494 6.270988 409 +even 2 45 3.135494 6.270988 393 +favorit 1 44 3.135494 3.135494 410 +textbook 1 44 3.135494 3.135494 397 +describ 1 45 3.135494 3.135494 400 +mark 1 44 3.135494 3.135494 403 +futur 2 41 3.218876 6.437752 427 +http 1 41 3.218876 3.218876 420 +edit 1 42 3.218876 3.218876 418 +compani 1 41 3.218876 3.218876 423 +linear 1 41 3.218876 3.218876 431 +past 1 42 3.218876 3.218876 428 +author 2 39 3.258097 6.516194 450 +form 1 39 3.258097 3.258097 443 +multipl 1 39 3.258097 3.258097 453 +programm 1 39 3.258097 3.258097 445 +must 1 40 3.258097 3.258097 442 +open 1 38 3.295837 3.295837 469 +brian 1 38 3.295837 3.295837 466 +credit 1 38 3.295837 3.295837 460 +microsoft 1 38 3.295837 3.295837 468 +connect 1 37 3.332205 3.332205 485 +workstat 1 37 3.332205 3.332205 479 +mean 1 37 3.332205 3.332205 477 +download 2 36 3.367296 6.734592 489 +tree 1 36 3.367296 3.367296 492 +least 3 35 3.401197 10.203591 516 +concurr 2 34 3.401197 6.802394 501 +print 1 34 3.401197 3.401197 503 +singl 1 34 3.401197 3.401197 510 +manual 1 35 3.401197 3.401197 504 +someth 2 31 3.496508 6.993016 554 +often 1 31 3.496508 3.496508 551 +domain 1 30 3.555348 3.555348 564 +synchron 4 29 3.583519 14.334076 588 +quot 1 29 3.583519 3.583519 582 +actual 1 28 3.610918 3.610918 604 +usual 1 28 3.610918 3.610918 608 +static 1 27 3.637586 3.637586 619 +quit 1 27 3.637586 3.637586 633 +team 1 27 3.637586 3.637586 625 +rather 1 26 3.688879 3.688879 642 +enhanc 1 26 3.688879 3.688879 644 +although 3 25 3.737670 11.213010 667 +wai 1 25 3.737670 3.737670 662 +never 1 25 3.737670 3.737670 671 +alwai 2 24 3.761200 7.522400 691 +higher 1 24 3.761200 3.761200 690 +known 1 24 3.761200 3.761200 702 +mike 1 24 3.761200 3.761200 703 +miscellan 2 23 3.806662 7.613324 731 +self 2 22 3.850148 7.700296 761 +william 1 22 3.850148 3.850148 765 +sort 1 22 3.850148 3.850148 738 +identifi 1 22 3.850148 3.850148 760 +sent 1 22 3.850148 3.850148 763 +busi 2 21 3.912023 7.824046 784 +chip 1 21 3.912023 3.912023 770 +love 1 21 3.912023 3.912023 804 +divis 1 21 3.912023 3.912023 803 +similar 1 21 3.912023 3.912023 771 +voic 1 21 3.912023 3.912023 806 +watch 1 21 3.912023 3.912023 789 +tell 1 21 3.912023 3.912023 777 +avoid 1 21 3.912023 3.912023 799 +minut 1 20 3.951244 3.951244 810 +kernel 1 20 3.951244 3.951244 825 +portabl 1 20 3.951244 3.951244 819 +ever 1 19 4.007333 4.007333 872 +miss 1 19 4.007333 4.007333 866 +less 1 18 4.060443 4.060443 892 +element 1 18 4.060443 4.060443 895 +seem 1 18 4.060443 4.060443 899 +record 1 18 4.060443 4.060443 890 +aid 1 18 4.060443 4.060443 904 +thought 2 17 4.110874 8.221748 945 +anyon 1 17 4.110874 4.110874 916 +weekli 1 17 4.110874 4.110874 919 +intel 6 16 4.174387 25.046322 1000 +alreadi 1 16 4.174387 4.174387 963 +critic 1 16 4.174387 4.174387 982 +advantag 1 16 4.174387 4.174387 987 +configur 3 15 4.248495 12.745485 1012 +enough 2 15 4.248495 8.496990 1040 +piec 1 15 4.248495 4.248495 1020 +save 1 14 4.317488 4.317488 1099 +manner 1 14 4.317488 4.317488 1074 +becam 1 14 4.317488 4.317488 1117 +cannot 2 13 4.382027 8.764054 1144 +sai 1 13 4.382027 4.382027 1175 +care 1 13 4.382027 4.382027 1177 +central 1 13 4.382027 4.382027 1160 +everyth 1 13 4.382027 4.382027 1169 +weak 1 13 4.382027 4.382027 1159 +touch 2 12 4.465908 8.931816 1288 +assembl 1 12 4.465908 4.465908 1207 +usenix 1 12 4.465908 4.465908 1240 +went 1 12 4.465908 4.465908 1279 +overal 1 12 4.465908 4.465908 1254 +see 1 11 4.553877 4.553877 1337 +mapl 1 11 4.553877 4.553877 1376 +wood 1 11 4.553877 4.553877 1355 +fix 1 11 4.553877 4.553877 1327 +card 1 10 4.653960 4.653960 1435 +relationship 1 10 4.653960 4.653960 1383 +mainli 1 10 4.653960 4.653960 1432 +invit 1 10 4.653960 4.653960 1428 +bring 1 10 4.653960 4.653960 1430 +lock 3 9 4.753590 14.260770 1551 +trust 2 9 4.753590 9.507180 1583 +motorola 1 9 4.753590 4.753590 1546 +entitl 1 9 4.753590 4.753590 1490 +patterson 1 9 4.753590 4.753590 1554 +charg 1 9 4.753590 4.753590 1582 +login 1 9 4.753590 4.753590 1550 +prefer 1 9 4.753590 4.753590 1491 +admin 1 9 4.753590 4.753590 1476 +calendar 9 8 4.875197 43.876773 1649 +architect 2 8 4.875197 9.750394 1624 +claim 1 8 4.875197 4.875197 1664 +poor 1 8 4.875197 4.875197 1736 +perhap 1 8 4.875197 4.875197 1693 +driver 1 8 4.875197 4.875197 1657 +accomplish 1 8 4.875197 4.875197 1755 +mass 1 8 4.875197 4.875197 1732 +isol 1 8 4.875197 4.875197 1663 +realiz 1 8 4.875197 4.875197 1739 +montreal 3 7 5.010635 15.031905 1961 +serial 1 7 5.010635 5.010635 1975 +suffici 1 7 5.010635 5.010635 1897 +therefor 1 7 5.010635 5.010635 1822 +header 1 7 5.010635 5.010635 1787 +arrang 4 6 5.164786 20.659144 2023 +commit 2 6 5.164786 10.329572 2233 +sohi 1 6 5.164786 5.164786 2237 +versu 1 6 5.164786 5.164786 2052 +snow 1 6 5.164786 5.164786 2031 +sleep 1 6 5.164786 5.164786 2211 +microarchitectur 1 6 5.164786 5.164786 2238 +pentium 1 6 5.164786 5.164786 2077 +quickli 1 6 5.164786 5.164786 2000 +proce 1 6 5.164786 5.164786 2114 +relax 1 6 5.164786 5.164786 2120 +strip 1 6 5.164786 5.164786 2203 +apolog 1 6 5.164786 5.164786 2046 +optimist 3 5 5.347108 16.041324 2501 +pagethi 1 5 5.347108 5.347108 2336 +ship 1 5 5.347108 5.347108 2534 +default 1 5 5.347108 5.347108 2335 +patent 1 5 5.347108 5.347108 2574 +chapel 1 5 5.347108 5.347108 2457 +keeper 1 5 5.347108 5.347108 2569 +knew 1 5 5.347108 5.347108 2445 +exchang 1 5 5.347108 5.347108 2310 +adopt 1 5 5.347108 5.347108 2467 +hennessi 1 5 5.347108 5.347108 2289 +appreci 1 5 5.347108 5.347108 2374 +hate 1 5 5.347108 5.347108 2529 +recogn 1 5 5.347108 5.347108 2302 +truli 1 5 5.347108 5.347108 2476 +notabl 1 5 5.347108 5.347108 2276 +camp 1 5 5.347108 5.347108 2545 +andi 8 4 5.568345 44.546760 3081 +filesystem 2 4 5.568345 11.136690 2587 +unless 1 4 5.568345 5.568345 2607 +scribe 1 4 5.568345 5.568345 2631 +church 1 4 5.568345 5.568345 3011 +aspir 1 4 5.568345 5.568345 3019 +wear 1 4 5.568345 5.568345 2785 +trick 1 4 5.568345 5.568345 2967 +wander 1 4 5.568345 5.568345 2896 +wherea 1 4 5.568345 5.568345 2597 +cshrc 1 4 5.568345 5.568345 2759 +disconnect 1 4 5.568345 5.568345 2664 +pilot 4 3 5.857933 23.431732 4008 +warm 2 3 5.857933 11.715866 3904 +hacker 2 3 5.857933 11.715866 3996 +gould 2 3 5.857933 11.715866 3559 +urgent 2 3 5.857933 11.715866 3316 +fame 1 3 5.857933 5.857933 3793 +coin 1 3 5.857933 5.857933 3799 +harm 1 3 5.857933 5.857933 3515 +advertis 1 3 5.857933 5.857933 3788 +redesign 1 3 5.857933 5.857933 3540 +wilkinson 1 3 5.857933 5.857933 3579 +berlin 1 3 5.857933 5.857933 3263 +fascin 1 3 5.857933 5.857933 3948 +glew 7 2 6.263398 43.843786 4162 +pageandi 1 2 6.263398 6.263398 6096 +pope 1 2 6.263398 6.263398 5506 +parson 1 2 6.263398 6.263398 4528 +king 1 2 6.263398 6.263398 5737 +strand 1 2 6.263398 6.263398 5880 +chop 1 2 6.263398 6.263398 6160 +beard 1 2 6.263398 6.263398 6161 +constantli 1 2 6.263398 6.263398 4181 +verg 1 2 6.263398 6.263398 5488 +disagre 1 2 6.263398 6.263398 6105 +defunct 1 2 6.263398 6.263398 6162 +startup 1 2 6.263398 6.263398 4676 +clone 1 2 6.263398 6.263398 5833 +aitken 1 2 6.263398 6.263398 4941 +ubiquit 1 2 6.263398 6.263398 6049 +intervent 1 2 6.263398 6.263398 6163 +bother 1 2 6.263398 6.263398 6164 +advoc 4 1 6.957497 27.829988 18410 +beef 3 1 6.957497 20.872491 18411 +krazi 2 1 6.957497 13.914994 18412 +wannab 2 1 6.957497 13.914994 18413 +softwareto 2 1 6.957497 13.914994 18414 +teresa 2 1 6.957497 13.914994 18415 +largelyform 1 1 6.957497 6.957497 18416 +snippet 1 1 6.957497 6.957497 18417 +stylishor 1 1 6.957497 6.957497 18418 +summarycontact 1 1 6.957497 6.957497 18419 +addressescalendar 1 1 6.957497 6.957497 18420 +taker 1 1 6.957497 6.957497 18421 +priest 1 1 6.957497 6.957497 18422 +boyn 1 1 6.957497 6.957497 18423 +frost 1 1 6.957497 6.957497 18424 +almighti 1 1 6.957497 6.957497 18425 +dollar 1 1 6.957497 6.957497 18426 +bellow 1 1 6.957497 6.957497 18427 +ranter 1 1 6.957497 6.957497 18428 +preacher 1 1 6.957497 6.957497 18429 +beecher 1 1 6.957497 6.957497 18430 +harbour 1 1 6.957497 6.957497 18431 +deplor 1 1 6.957497 6.957497 18432 +churchmen 1 1 6.957497 6.957497 18433 +notori 1 1 6.957497 6.957497 18434 +atheist 1 1 6.957497 6.957497 18435 +chariti 1 1 6.957497 6.957497 18436 +sailor 1 1 6.957497 6.957497 18437 +chord 1 1 6.957497 6.957497 18438 +firewood 1 1 6.957497 6.957497 18439 +meal 1 1 6.957497 6.957497 18440 +manifesto 1 1 6.957497 6.957497 18441 +handbil 1 1 6.957497 6.957497 18442 +hackeralthough 1 1 6.957497 6.957497 18443 +formerlyhad 1 1 6.957497 6.957497 18444 +fake 1 1 6.957497 6.957497 18445 +andstil 1 1 6.957497 6.957497 18446 +wistfulli 1 1 6.957497 6.957497 18447 +suspend 1 1 6.957497 6.957497 18448 +bald 1 1 6.957497 6.957497 18449 +architectureonc 1 1 6.957497 6.957497 18450 +architecturei 1 1 6.957497 6.957497 18451 +grabbag 1 1 6.957497 6.957497 18452 +antidot 1 1 6.957497 6.957497 18453 +afford 1 1 6.957497 6.957497 18454 +diskspac 1 1 6.957497 6.957497 18455 +provideror 1 1 6.957497 6.957497 18456 +architectureon 1 1 6.957497 6.957497 18457 +datasheet 1 1 6.957497 6.957497 18458 +netscapebookmarksstockscod 1 1 6.957497 6.957497 18459 +standardsroi 1 1 6.957497 6.957497 18460 +standardsi 1 1 6.957497 6.957497 18461 +enfopris 1 1 6.957497 6.957497 18462 +writingto 1 1 6.957497 6.957497 18463 +longstand 1 1 6.957497 6.957497 18464 +configurationmanag 1 1 6.957497 6.957497 18465 +scc 1 1 6.957497 6.957497 18466 +box 1 1 6.957497 6.957497 18467 +hardlink 1 1 6.957497 6.957497 18468 +deprec 1 1 6.957497 6.957497 18469 +livelock 1 1 6.957497 6.957497 18470 +insist 1 1 6.957497 6.957497 18471 +checkinsso 1 1 6.957497 6.957497 18472 +approachin 1 1 6.957497 6.957497 18473 +fetterman 1 1 6.957497 6.957497 18474 +deserv 1 1 6.957497 6.957497 18475 +wisconsinhow 1 1 6.957497 6.957497 18476 +programat 1 1 6.957497 6.957497 18477 +cmtool 1 1 6.957497 6.957497 18478 +ical 1 1 6.957497 6.957497 18479 +anyof 1 1 6.957497 6.957497 18480 +manuallyadd 1 1 6.957497 6.957497 18481 +intelat 1 1 6.957497 6.957497 18482 +devout 1 1 6.957497 6.957497 18483 +ontim 1 1 6.957497 6.957497 18484 +meetingswith 1 1 6.957497 6.957497 18485 +reserveth 1 1 6.957497 6.957497 18486 +blindli 1 1 6.957497 6.957497 18487 +proposeif 1 1 6.957497 6.957497 18488 +overallschedul 1 1 6.957497 6.957497 18489 +secretariesand 1 1 6.957497 6.957497 18490 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html new file mode 100644 index 00000000..b7413f43 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~goodman^goodman.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +good 1 77 2.564949 2.564949 200 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 1 35 3.401197 3.401197 507 +lot 1 18 4.060443 4.060443 889 +goodman 3 7 5.010635 15.031905 1891 +sciencesdepart 1 6 5.164786 5.164786 2020 +galileo 1 4 5.568345 5.568345 3086 +usaresearch 1 1 6.957497 6.957497 18491 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html new file mode 100644 index 00000000..9bd4e483 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~greg^greg.html @@ -0,0 +1,339 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 15 775 0.000000 0.000000 2 +home 8 672 0.000000 0.000000 1 +page 7 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 6 412 0.693147 4.158882 8 +system 3 443 0.693147 2.079441 6 +program 1 374 0.693147 0.693147 7 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +project 3 340 1.098612 3.295836 18 +cours 3 273 1.098612 3.295836 15 +offic 2 299 1.098612 2.197224 13 +engin 2 297 1.098612 2.197224 20 +link 7 247 1.386294 9.704058 24 +softwar 4 220 1.386294 5.545176 30 +wisc 3 242 1.386294 4.158882 33 +washington 2 236 1.386294 2.772588 32 +languag 2 227 1.386294 2.772588 26 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +fall 3 181 1.609438 4.828314 40 +group 3 183 1.609438 4.828314 36 +list 2 201 1.609438 3.218876 39 +includ 2 208 1.609438 3.218876 42 +class 1 199 1.609438 1.609438 37 +paper 1 205 1.609438 1.609438 38 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +data 2 170 1.791759 3.583518 49 +base 2 165 1.791759 3.583518 50 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +architectur 7 139 1.945910 13.621370 77 +relat 4 139 1.945910 7.783640 68 +process 3 142 1.945910 5.837730 72 +lectur 2 135 1.945910 3.891820 73 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +object 1 138 1.945910 1.945910 79 +databas 16 122 2.079442 33.271072 86 +spring 2 131 2.079442 4.158884 88 +introduct 2 126 2.079442 4.158884 87 +machin 2 129 2.079442 4.158884 95 +tool 2 117 2.079442 4.158884 93 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +site 3 106 2.197225 6.591675 119 +manag 2 114 2.197225 4.394450 125 +instructor 1 108 2.197225 2.197225 107 +topic 1 114 2.197225 2.197225 110 +find 1 111 2.197225 2.197225 111 +version 1 113 2.197225 2.197225 122 +world 1 115 2.197225 2.197225 126 +check 1 115 2.197225 2.197225 118 +text 3 98 2.302585 6.907755 133 +imag 26 91 2.397895 62.345270 161 +graphic 3 90 2.397895 7.193685 147 +section 2 94 2.397895 4.795790 149 +select 1 91 2.397895 2.397895 154 +pictur 1 89 2.397895 2.397895 160 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +learn 3 86 2.484907 7.454721 170 +librari 3 87 2.484907 7.454721 181 +info 3 85 2.484907 7.454721 176 +stuff 3 87 2.484907 7.454721 171 +internet 2 83 2.484907 4.969814 186 +requir 1 81 2.484907 2.484907 167 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +academ 1 82 2.484907 2.484907 178 +refer 2 78 2.564949 5.129898 203 +state 2 76 2.564949 5.129898 207 +april 1 77 2.564949 2.564949 196 +homework 1 79 2.564949 2.564949 193 +html 2 75 2.639057 5.278114 235 +line 2 75 2.639057 5.278114 231 +onlin 1 75 2.639057 2.639057 223 +servic 1 72 2.639057 2.639057 236 +simul 2 66 2.708050 5.416100 255 +view 1 70 2.708050 2.708050 254 +window 1 68 2.708050 2.708050 242 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +guid 1 63 2.772589 2.772589 267 +written 1 63 2.772589 2.772589 278 +experi 1 64 2.772589 2.772589 283 +new 1 64 2.772589 2.772589 262 +back 1 60 2.833213 2.833213 297 +automat 1 61 2.833213 2.833213 306 +index 4 56 2.890372 11.561488 309 +unix 1 58 2.890372 2.890372 308 +browser 1 56 2.890372 2.890372 313 +hardwar 1 51 2.995732 2.995732 350 +cool 3 49 3.044522 9.133566 374 +archiv 2 49 3.044522 6.089044 364 +format 1 48 3.044522 3.044522 356 +standard 1 48 3.044522 3.044522 365 +numer 1 49 3.044522 3.044522 369 +give 1 50 3.044522 3.044522 359 +fridai 1 44 3.135494 3.135494 390 +algebra 1 45 3.135494 3.135494 394 +directori 1 45 3.135494 3.135494 396 +video 1 44 3.135494 3.135494 405 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +mark 1 44 3.135494 3.135494 403 +vision 3 41 3.218876 9.656628 430 +linear 1 41 3.218876 3.218876 431 +might 1 41 3.218876 3.218876 426 +compani 1 41 3.218876 3.218876 423 +http 1 41 3.218876 3.218876 420 +theoret 1 39 3.258097 3.258097 446 +electr 1 38 3.295837 3.295837 461 +robot 2 36 3.367296 6.734592 497 +everi 1 34 3.401197 3.401197 519 +tech 1 35 3.401197 3.401197 515 +survei 1 35 3.401197 3.401197 513 +eduoffic 1 33 3.433987 3.433987 531 +board 1 33 3.433987 3.433987 528 +idea 2 32 3.465736 6.931472 545 +independ 1 32 3.465736 3.465736 548 +storag 1 31 3.496508 3.496508 553 +rang 2 30 3.555348 7.110696 565 +specifi 1 30 3.555348 3.555348 568 +univ 2 28 3.610918 7.221836 617 +packag 1 28 3.610918 3.610918 614 +retriev 8 27 3.637586 29.100688 621 +altern 1 26 3.688879 3.688879 641 +challeng 1 26 3.688879 3.688879 653 +trace 2 25 3.737670 7.475340 677 +todai 1 25 3.737670 3.737670 672 +client 1 25 3.737670 3.737670 679 +greg 6 24 3.761200 22.567200 695 +yahoo 1 24 3.761200 3.761200 707 +recognit 3 23 3.806662 11.419986 723 +thank 1 23 3.806662 3.806662 721 +geometri 2 22 3.850148 7.700296 752 +fund 2 21 3.912023 7.824046 805 +util 1 21 3.912023 3.912023 774 +hous 1 21 3.912023 3.912023 801 +nice 3 20 3.951244 11.853732 809 +portabl 1 20 3.951244 3.951244 819 +toolkit 1 20 3.951244 3.951244 835 +histori 2 19 4.007333 8.014666 853 +lyco 1 19 4.007333 4.007333 871 +feedback 1 19 4.007333 4.007333 854 +thoma 2 18 4.060443 8.120886 901 +lot 1 18 4.060443 4.060443 889 +stanford 1 17 4.110874 4.110874 955 +medic 1 17 4.110874 4.110874 958 +germani 1 17 4.110874 4.110874 946 +repositori 1 17 4.110874 4.110874 932 +white 1 17 4.110874 4.110874 951 +english 2 15 4.248495 8.496990 1033 +overhead 1 15 4.248495 4.248495 1035 +todd 1 15 4.248495 4.248495 1051 +charact 1 15 4.248495 4.248495 1028 +massiv 1 15 4.248495 4.248495 1026 +draft 1 14 4.317488 4.317488 1085 +pretti 2 13 4.382027 8.764054 1191 +suit 1 13 4.382027 4.382027 1129 +resolut 1 13 4.382027 4.382027 1172 +misc 1 13 4.382027 4.382027 1124 +charl 1 13 4.382027 4.382027 1149 +shape 2 12 4.465908 8.931816 1245 +optic 1 12 4.465908 4.465908 1221 +string 2 11 4.553877 9.107754 1340 +excit 1 11 4.553877 4.553877 1329 +night 1 11 4.553877 4.553877 1319 +worth 1 11 4.553877 4.553877 1294 +perl 1 11 4.553877 4.553877 1332 +literatur 1 11 4.553877 4.553877 1300 +metacrawl 1 10 4.653960 4.653960 1455 +card 1 10 4.653960 4.653960 1435 +enter 1 10 4.653960 4.653960 1454 +custom 1 10 4.653960 4.653960 1414 +utah 1 9 4.753590 4.753590 1585 +patterson 1 9 4.753590 4.753590 1554 +linguist 1 9 4.753590 4.753590 1593 +editori 1 9 4.753590 4.753590 1611 +japan 3 8 4.875197 14.625591 1762 +textur 1 8 4.875197 4.875197 1677 +dictionari 1 8 4.875197 4.875197 1642 +jack 1 8 4.875197 4.875197 1780 +attach 2 7 5.010635 10.021270 1785 +shade 2 7 5.010635 10.021270 1881 +usenet 1 7 5.010635 5.010635 1839 +shot 1 7 5.010635 5.010635 1898 +mirror 4 6 5.164786 20.659144 2028 +sharp 2 6 5.164786 10.329572 2100 +invest 2 6 5.164786 10.329572 2153 +altavista 1 6 5.164786 5.164786 2222 +infoseek 1 6 5.164786 5.164786 2188 +postcard 1 6 5.164786 5.164786 2181 +textual 1 6 5.164786 5.164786 1979 +apolog 1 6 5.164786 5.164786 2046 +garbag 1 6 5.164786 5.164786 1986 +price 1 6 5.164786 5.164786 1999 +hyper 2 5 5.347108 10.694216 2435 +appt 1 5 5.347108 5.347108 2312 +multiresolut 1 5 5.347108 5.347108 2423 +medicin 1 5 5.347108 5.347108 2448 +hennessi 1 5 5.347108 5.347108 2289 +travers 1 5 5.347108 5.347108 2363 +row 1 5 5.347108 5.347108 2330 +japanes 4 4 5.568345 22.273380 2934 +planet 2 4 5.568345 11.136690 2912 +zoom 1 4 5.568345 5.568345 2961 +moon 1 4 5.568345 5.568345 2991 +diagnosi 1 4 5.568345 5.568345 3027 +nist 1 4 5.568345 5.568345 2973 +wander 1 4 5.568345 5.568345 2896 +freewar 2 3 5.857933 11.715866 3504 +motif 1 3 5.857933 5.857933 3752 +sharewar 1 3 5.857933 5.857933 3503 +atmospher 1 3 5.857933 5.857933 3388 +rack 1 3 5.857933 5.857933 3176 +lockhe 1 3 5.857933 5.857933 3863 +georgia 1 3 5.857933 5.857933 3834 +trec 1 3 5.857933 5.857933 3547 +gigabyt 1 3 5.857933 5.857933 3548 +spider 1 3 5.857933 5.857933 3605 +latin 1 3 5.857933 5.857933 3741 +citizen 1 3 5.857933 5.857933 3238 +belong 1 3 5.857933 5.857933 3797 +tracer 2 2 6.263398 12.526796 5913 +pagegreg 1 2 6.263398 6.263398 5906 +pagenam 1 2 6.263398 6.263398 6165 +sharpemail 1 2 6.263398 6.263398 4766 +dejanew 1 2 6.263398 6.263398 5602 +harmoni 1 2 6.263398 6.263398 5235 +solar 1 2 6.263398 6.263398 5003 +comet 1 2 6.263398 6.263398 5785 +catalogu 1 2 6.263398 6.263398 6166 +gothic 1 2 6.263398 6.263398 5787 +soup 1 2 6.263398 6.263398 6131 +kitchen 1 2 6.263398 6.263398 6132 +awesom 1 2 6.263398 6.263398 6167 +diagon 1 2 6.263398 6.263398 4974 +atla 1 2 6.263398 6.263398 5996 +raster 1 2 6.263398 6.263398 6078 +radianc 1 2 6.263398 6.263398 6068 +radios 1 2 6.263398 6.263398 4504 +mexico 1 2 6.263398 6.263398 6044 +monash 1 2 6.263398 6.263398 4460 +strictli 1 2 6.263398 6.263398 5726 +pointcast 1 2 6.263398 6.263398 5377 +portfolio 1 2 6.263398 6.263398 4408 +offens 1 2 6.263398 6.263398 6168 +brill 1 2 6.263398 6.263398 4137 +sharpgreg 1 2 6.263398 6.263398 4767 +pic 7 1 6.957497 48.702479 18492 +wyom 2 1 6.957497 13.914994 18493 +satelit 2 1 6.957497 13.914994 18494 +handwrit 2 1 6.957497 13.914994 18495 +schwab 2 1 6.957497 13.914994 18496 +tgif 1 1 6.957497 6.957497 18497 +notesclass 1 1 6.957497 6.957497 18498 +aboutsearch 1 1 6.957497 6.957497 18499 +ohioc 1 1 6.957497 6.957497 18500 +cygnu 1 1 6.957497 6.957497 18501 +mumit 1 1 6.957497 6.957497 18502 +newbi 1 1 6.957497 6.957497 18503 +guideplatform 1 1 6.957497 6.957497 18504 +kit 1 1 6.957497 6.957497 18505 +amulet 1 1 6.957497 6.957497 18506 +dclap 1 1 6.957497 6.957497 18507 +wxwindow 1 1 6.957497 6.957497 18508 +yacl 1 1 6.957497 6.957497 18509 +projectclass 1 1 6.957497 6.957497 18510 +projectmisc 1 1 6.957497 6.957497 18511 +cygwin 1 1 6.957497 6.957497 18512 +gimp 1 1 6.957497 6.957497 18513 +harmonai 1 1 6.957497 6.957497 18514 +vasc 1 1 6.957497 6.957497 18515 +jaida 1 1 6.957497 6.957497 18516 +seamless 1 1 6.957497 6.957497 18517 +meteor 1 1 6.957497 6.957497 18518 +antarctica 1 1 6.957497 6.957497 18519 +niae 1 1 6.957497 6.957497 18520 +vistex 1 1 6.957497 6.957497 18521 +databaseartifici 1 1 6.957497 6.957497 18522 +primoridi 1 1 6.957497 6.957497 18523 +dermatolog 1 1 6.957497 6.957497 18524 +erlang 1 1 6.957497 6.957497 18525 +orthopaed 1 1 6.957497 6.957497 18526 +ecvnet 1 1 6.957497 6.957497 18527 +nici 1 1 6.957497 6.957497 18528 +groupimag 1 1 6.957497 6.957497 18529 +raytrac 1 1 6.957497 6.957497 18530 +rayshad 1 1 6.957497 6.957497 18531 +avalon 1 1 6.957497 6.957497 18532 +grimstead 1 1 6.957497 6.957497 18533 +dsite 1 1 6.957497 6.957497 18534 +intergraph 1 1 6.957497 6.957497 18535 +glint 1 1 6.957497 6.957497 18536 +chipset 1 1 6.957497 6.957497 18537 +nvidia 1 1 6.957497 6.957497 18538 +chipsetcomput 1 1 6.957497 6.957497 18539 +geometeri 1 1 6.957497 6.957497 18540 +geometrylispuseless 1 1 6.957497 6.957497 18541 +pagescomput 1 1 6.957497 6.957497 18542 +superdlx 1 1 6.957497 6.957497 18543 +parl 1 1 6.957497 6.957497 18544 +washingt 1 1 6.957497 6.957497 18545 +groupjapanes 1 1 6.957497 6.957497 18546 +unvers 1 1 6.957497 6.957497 18547 +infowav 1 1 6.957497 6.957497 18548 +edict 1 1 6.957497 6.957497 18549 +shodouka 1 1 6.957497 6.957497 18550 +asiasoftinform 1 1 6.957497 6.957497 18551 +retrev 1 1 6.957497 6.957497 18552 +peregrin 1 1 6.957497 6.957497 18553 +infomin 1 1 6.957497 6.957497 18554 +other_sw 1 1 6.957497 6.957497 18555 +info_retriev 1 1 6.957497 6.957497 18556 +jedi 1 1 6.957497 6.957497 18557 +hartlib 1 1 6.957497 6.957497 18558 +stemmer 1 1 6.957497 6.957497 18559 +twainhumor 1 1 6.957497 6.957497 18560 +threw 1 1 6.957497 6.957497 18561 +investorweb 1 1 6.957497 6.957497 18562 +networth 1 1 6.957497 6.957497 18563 +fundscap 1 1 6.957497 6.957497 18564 +stockmastermutu 1 1 6.957497 6.957497 18565 +brokerag 1 1 6.957497 6.957497 18566 +fidel 1 1 6.957497 6.957497 18567 +vanguard 1 1 6.957497 6.957497 18568 +gabelli 1 1 6.957497 6.957497 18569 +mutualsmisc 1 1 6.957497 6.957497 18570 +psnuplast 1 1 6.957497 6.957497 18571 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html new file mode 100644 index 00000000..d7e668de --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gsri^gsri.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +sridhar 1 2 6.263398 6.263398 4807 +gopalsridhar 1 1 6.957497 6.957497 18572 +gopalgsri 1 1 6.957497 6.957497 18573 +edubon 1 1 6.957497 6.957497 18574 +marrow 1 1 6.957497 6.957497 18575 +pageresumest 1 1 6.957497 6.957497 18576 +pagecalvin 1 1 6.957497 6.957497 18577 +hobbesbookmark 1 1 6.957497 6.957497 18578 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html new file mode 100644 index 00000000..b41546d0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~guangshu^guangshu.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 4 571 0.000000 0.000000 5 +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +email 2 220 1.386294 2.772588 29 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +class 1 199 1.609438 1.609438 37 +madison 4 165 1.791759 7.167036 55 +wisconsin 2 169 1.791759 3.583518 54 +data 2 170 1.791759 3.583518 49 +phone 1 175 1.791759 1.791759 45 +relat 2 139 1.945910 3.891820 68 +number 2 130 2.079442 4.158884 97 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +analysi 1 124 2.079442 2.079442 98 +send 2 114 2.197225 4.394450 109 +manag 1 114 2.197225 2.197225 125 +sinc 3 90 2.397895 7.193685 159 +grade 1 90 2.397895 2.397895 142 +educ 1 86 2.484907 2.484907 191 +environ 1 84 2.484907 2.484907 177 +stuff 1 87 2.484907 2.484907 171 +state 1 76 2.564949 2.564949 207 +june 1 79 2.564949 2.564949 214 +plan 1 65 2.772589 2.772589 272 +explor 1 58 2.890372 2.890372 324 +visitor 3 49 3.044522 9.133566 371 +visual 1 48 3.044522 3.044522 372 +california 1 46 3.091042 3.091042 388 +physic 1 47 3.091042 3.091042 377 +around 1 43 3.178054 3.178054 415 +chines 1 29 3.583519 3.583519 595 +weather 1 28 3.610918 3.610918 618 +famili 1 23 3.806662 3.806662 735 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +raghu 1 12 4.465908 4.465908 1212 +career 1 12 4.465908 4.465908 1287 +devis 1 10 4.653960 4.653960 1451 +angel 1 8 4.875197 4.875197 1779 +advis 1 6 5.164786 5.164786 2173 +forecast 1 6 5.164786 5.164786 2171 +peke 1 5 5.347108 5.347108 2539 +medicin 1 5 5.347108 5.347108 2448 +miscellani 1 3 5.857933 5.857933 3976 +guangshun 2 2 6.263398 12.526796 6138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html new file mode 100644 index 00000000..ce63107c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~gviswana^gviswana.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +us 1 329 1.098612 1.098612 16 +design 2 213 1.386294 2.772588 25 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +list 1 201 1.609438 1.609438 39 +public 1 202 1.609438 1.609438 43 +data 3 170 1.791759 5.375277 49 +implement 2 152 1.791759 3.583518 52 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +code 1 108 2.197225 2.197225 116 +west 1 83 2.484907 2.484907 192 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +summari 1 73 2.639057 2.639057 237 +thesi 2 57 2.890372 5.780744 327 +detail 1 57 2.890372 2.890372 321 +local 1 55 2.944439 2.944439 334 +investig 1 51 2.995732 2.995732 353 +execut 1 45 3.135494 3.135494 404 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +hand 1 37 3.332205 3.332205 475 +focus 1 29 3.583519 3.583519 584 +target 1 12 4.465908 4.465908 1282 +laru 1 9 4.753590 4.753590 1560 +studentdepart 1 5 5.347108 5.347108 2505 +parallelprogram 1 5 5.347108 5.347108 2379 +parallellanguag 2 3 5.857933 11.715866 4026 +usaadvisor 1 3 5.857933 5.857933 4017 +guhan 2 2 6.263398 12.526796 6169 +viswanathan 2 2 6.263398 12.526796 6170 +amor 1 2 6.263398 6.263398 5486 +gviswana 1 1 6.957497 6.957497 18579 +parallelappl 1 1 6.957497 6.957497 18580 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html new file mode 100644 index 00000000..f34eed6f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~harit^harit.html @@ -0,0 +1,74 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +student 2 343 1.098612 2.197224 19 +cours 2 273 1.098612 2.197224 15 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +link 2 247 1.386294 2.772588 24 +mail 2 238 1.386294 2.772588 22 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +address 1 170 1.791759 1.791759 62 +architectur 2 139 1.945910 3.891820 77 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +year 1 148 1.945910 1.945910 84 +databas 1 122 2.079442 2.079442 86 +spring 1 131 2.079442 2.079442 88 +manag 1 114 2.197225 2.197225 125 +world 1 115 2.197225 2.197225 126 +take 2 97 2.302585 4.605170 134 +advanc 2 99 2.302585 4.605170 130 +access 2 102 2.302585 4.605170 136 +sinc 2 90 2.397895 4.795790 159 +thing 1 84 2.484907 2.484907 189 +stuff 1 87 2.484907 2.484907 171 +meet 1 72 2.639057 2.639057 229 +line 1 75 2.639057 2.639057 231 +would 1 67 2.708050 2.708050 251 +prof 3 64 2.772589 8.317767 273 +colleg 1 61 2.833213 2.833213 300 +undergradu 1 54 2.944439 2.944439 338 +electron 1 47 3.091042 3.091042 379 +mark 1 44 3.135494 3.135494 403 +jame 1 35 3.401197 3.401197 507 +india 1 32 3.465736 3.465736 550 +photo 1 31 3.496508 3.496508 561 +hill 1 25 3.737670 3.737670 670 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +sept 2 17 4.110874 8.221748 952 +ramakrishnan 1 16 4.174387 4.174387 972 +photograph 1 15 4.248495 4.248495 1056 +draw 1 14 4.317488 4.317488 1086 +warn 1 14 4.317488 4.317488 1068 +raghu 1 12 4.465908 4.465908 1212 +newspap 1 12 4.465908 4.465908 1280 +classmat 1 9 4.753590 4.753590 1516 +folk 1 9 4.753590 4.753590 1597 +counter 2 8 4.875197 9.750394 1765 +goodman 1 7 5.010635 5.010635 1891 +courtesi 1 7 5.010635 5.010635 1953 +famou 1 6 5.164786 5.164786 2185 +mirza 1 3 5.857933 5.857933 3989 +osmania 1 2 6.263398 6.263398 5573 +hyderabad 1 2 6.263398 6.263398 5570 +sastri 1 2 6.263398 6.263398 6171 +roommat 1 2 6.263398 6.263398 6157 +saeed 1 2 6.263398 6.263398 6172 +dust 1 2 6.263398 6.263398 5551 +harit 3 1 6.957497 20.872491 18581 +mvsr 1 1 6.957497 6.957497 18582 +murthi 1 1 6.957497 6.957497 18583 +zubber 1 1 6.957497 6.957497 18584 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html new file mode 100644 index 00000000..301e4691 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hasti^hasti.html @@ -0,0 +1,44 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +madison 4 165 1.791759 7.167036 55 +wisconsin 3 169 1.791759 5.375277 54 +first 1 140 1.945910 1.945910 71 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +mathemat 2 108 2.197225 4.394450 123 +java 2 70 2.708050 5.416100 248 +dept 1 64 2.772589 2.772589 291 +septemb 1 65 2.772589 2.772589 274 +colleg 1 61 2.833213 2.833213 300 +telephon 1 50 3.044522 3.044522 373 +seminar 1 38 3.295837 3.295837 470 +departmentunivers 1 24 3.761200 3.761200 711 +applet 1 20 3.951244 3.951244 827 +basketbal 1 12 4.465908 4.465908 1289 +edutelephon 1 10 4.653960 4.653960 1473 +engr 1 10 4.653960 4.653960 1427 +volleybal 1 9 4.753590 4.753590 1598 +softbal 1 9 4.753590 4.753590 1594 +rebecca 2 6 5.164786 10.329572 2174 +noland 1 5 5.347108 5.347108 2420 +assistantcomput 1 3 5.857933 5.857933 4027 +hasti 3 2 6.263398 18.790194 6173 +carleton 1 2 6.263398 6.263398 5381 +linkag 1 2 6.263398 6.263398 5139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html new file mode 100644 index 00000000..ce83d3c8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hcl^hcl.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +program 2 374 0.693147 1.386294 7 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +offic 3 299 1.098612 3.295836 13 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +madison 3 165 1.791759 5.375277 55 +phone 3 175 1.791759 5.375277 45 +wisconsin 2 169 1.791759 3.583518 54 +address 1 170 1.791759 1.791759 62 +hour 1 165 1.791759 1.791759 46 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +databas 1 122 2.079442 2.079442 86 +person 2 111 2.197225 4.394450 117 +final 1 116 2.197225 2.197225 108 +manag 1 114 2.197225 2.197225 125 +mathemat 1 108 2.197225 2.197225 123 +advanc 1 99 2.302585 2.302585 130 +section 1 94 2.397895 2.397895 149 +imag 1 91 2.397895 2.397895 161 +stuff 3 87 2.484907 7.454721 171 +academ 2 82 2.484907 4.969814 178 +info 1 85 2.484907 2.484907 176 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +internet 1 83 2.484907 2.484907 186 +state 2 76 2.564949 5.129898 207 +want 1 79 2.564949 2.564949 199 +good 1 77 2.564949 2.564949 200 +differ 1 66 2.708050 2.708050 253 +dept 2 64 2.772589 5.545178 291 +new 1 64 2.772589 2.772589 262 +talk 1 53 2.944439 2.944439 336 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +get 1 46 3.091042 3.091042 380 +expect 1 37 3.332205 3.332205 484 +download 1 36 3.367296 3.367296 489 +semant 1 29 3.583519 3.583519 587 +hope 1 28 3.610918 3.610918 610 +retriev 1 27 3.637586 3.637586 621 +enjoi 1 26 3.688879 3.688879 660 +tell 1 21 3.912023 3.912023 777 +stand 1 18 4.060443 4.060443 891 +stop 1 17 4.110874 4.110874 942 +seek 1 17 4.110874 4.110874 954 +thought 1 17 4.110874 4.110874 945 +alan 1 13 4.382027 4.382027 1146 +philosophi 1 13 4.382027 4.382027 1167 +brother 1 13 4.382027 4.382027 1189 +neat 2 12 4.465908 8.931816 1263 +count 1 12 4.465908 4.465908 1239 +minor 1 12 4.465908 4.465908 1237 +linguist 1 9 4.753590 4.753590 1593 +bart 1 9 4.753590 4.753590 1559 +lane 3 8 4.875197 14.625591 1720 +rais 1 8 4.875197 4.875197 1711 +jack 1 8 4.875197 4.875197 1780 +reload 1 8 4.875197 4.875197 1682 +on 1 8 4.875197 4.875197 1628 +marri 1 7 5.010635 5.010635 1946 +accord 1 7 5.010635 5.010635 1826 +creation 1 6 5.164786 5.164786 2069 +handi 1 6 5.164786 5.164786 2111 +advic 1 5 5.347108 5.347108 2509 +formerli 1 5 5.347108 5.347108 2397 +truli 1 5 5.347108 5.347108 2476 +plant 1 5 5.347108 5.347108 2497 +ling 1 4 5.568345 5.568345 3045 +cyber 1 4 5.568345 5.568345 2909 +luck 1 3 5.857933 5.857933 3201 +audit 1 3 5.857933 5.857933 3391 +northeast 1 3 5.857933 5.857933 3922 +chad 2 2 6.263398 12.526796 4768 +biggest 1 2 6.263398 6.263398 4790 +arthur 1 2 6.263398 6.263398 5767 +deep 1 2 6.263398 6.263398 5528 +claud 2 1 6.957497 13.914994 18585 +welcomethank 1 1 6.957497 6.957497 18586 +bestbet 1 1 6.957497 6.957497 18587 +onmai 1 1 6.957497 6.957497 18588 +nichol 1 1 6.957497 6.957497 18589 +discours 1 1 6.957497 6.957497 18590 +barwis 1 1 6.957497 6.957497 18591 +epigram 1 1 6.957497 6.957497 18592 +perli 1 1 6.957497 6.957497 18593 +laud 1 1 6.957497 6.957497 18594 +truman 1 1 6.957497 6.957497 18595 +missouri 1 1 6.957497 6.957497 18596 +poop 1 1 6.957497 6.957497 18597 +unabash 1 1 6.957497 6.957497 18598 +psychot 1 1 6.957497 6.957497 18599 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html new file mode 100644 index 00000000..14f06ebd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hert^hert.html @@ -0,0 +1,104 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +page 3 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +interest 4 384 0.693147 2.772588 11 +research 3 431 0.693147 2.079441 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +current 1 284 1.098612 1.098612 21 +link 3 247 1.386294 4.158882 24 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +design 1 213 1.386294 1.386294 25 +public 2 202 1.609438 3.218876 43 +paper 2 205 1.609438 3.218876 38 +algorithm 4 162 1.791759 7.167036 57 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +develop 1 174 1.791759 1.791759 53 +applic 1 170 1.791759 1.791759 56 +confer 3 126 2.079442 6.238326 100 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +intern 3 108 2.197225 6.591675 128 +version 2 113 2.197225 4.394450 122 +assist 1 112 2.197225 2.197225 113 +book 2 99 2.302585 4.605170 131 +technic 1 100 2.302585 2.302585 140 +graphic 1 90 2.397895 2.397895 147 +select 1 91 2.397895 2.397895 154 +proceed 1 93 2.397895 2.397895 152 +journal 2 83 2.484907 4.969814 183 +librari 2 87 2.484907 4.969814 181 +environ 1 84 2.484907 2.484907 177 +ieee 1 86 2.484907 2.484907 190 +educ 1 86 2.484907 2.484907 191 +appear 3 78 2.564949 7.694847 210 +issu 1 78 2.564949 2.564949 211 +refer 1 78 2.564949 2.564949 203 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +line 1 75 2.639057 2.639057 231 +august 2 66 2.708050 5.416100 257 +simul 1 66 2.708050 2.708050 255 +plan 5 65 2.772589 13.862945 272 +abstract 1 62 2.772589 2.772589 276 +laboratori 1 63 2.772589 2.772589 292 +juli 1 60 2.833213 2.833213 305 +publish 1 57 2.890372 2.890372 326 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +move 1 47 3.091042 3.091042 382 +autom 1 41 3.218876 3.218876 434 +multipl 3 39 3.258097 9.774291 453 +vita 1 38 3.295837 3.295837 473 +robot 12 36 3.367296 40.407552 497 +curriculum 1 33 3.433987 3.433987 535 +extend 1 32 3.465736 3.465736 539 +anim 1 31 3.496508 3.496508 557 +common 1 30 3.555348 3.555348 574 +travel 1 30 3.555348 3.555348 579 +proc 2 26 3.688879 7.377758 649 +experiment 1 26 3.688879 3.688879 645 +motion 5 24 3.761200 18.806000 699 +geometri 4 22 3.850148 15.400592 752 +rout 1 21 3.912023 3.912023 793 +unit 1 21 3.912023 3.912023 779 +basi 1 20 3.951244 3.951244 828 +geometr 1 19 4.007333 4.007333 852 +susan 8 15 4.248495 33.987960 1050 +configur 1 15 4.248495 4.248495 1012 +vladimir 6 11 4.553877 27.323262 1324 +arbitrari 1 11 4.553877 4.553877 1359 +cook 1 10 4.653960 4.653960 1464 +congress 1 9 4.753590 4.753590 1592 +autonom 3 8 4.875197 14.625591 1749 +curv 2 8 4.875197 9.750394 1656 +plane 1 6 5.164786 5.164786 2187 +bind 1 5 5.347108 5.347108 2250 +tiwari 1 5 5.347108 5.347108 2385 +lumelski 6 4 5.568345 33.410070 2837 +ti 1 4 5.568345 5.568345 3005 +underwat 1 4 5.568345 5.568345 2838 +shelf 1 4 5.568345 5.568345 2621 +canadian 1 3 5.857933 5.857933 3508 +planar 1 3 5.857933 5.857933 3647 +hert 9 2 6.263398 56.370582 4848 +tether 4 2 6.263398 25.053592 4844 +deform 1 2 6.263398 6.263398 6065 +terrain 1 2 6.263398 6.263398 6174 +epicuri 1 2 6.263398 6.263398 5105 +veggi 1 2 6.263398 6.263398 5426 +alogirthm 1 1 6.957497 6.957497 18600 +sanjai 1 1 6.957497 6.957497 18601 +reznik 1 1 6.957497 6.957497 18602 +samantha 1 1 6.957497 6.957497 18603 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html new file mode 100644 index 00000000..f0698fe1 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hog^hog.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +offic 3 299 1.098612 3.295836 13 +student 2 343 1.098612 2.197224 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +mail 3 238 1.386294 4.158882 22 +graduat 2 215 1.386294 2.772588 31 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +madison 4 165 1.791759 7.167036 55 +wisconsin 2 169 1.791759 3.583518 54 +hour 2 165 1.791759 3.583518 46 +dayton 1 119 2.079442 2.079442 104 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +visit 1 63 2.772589 2.772589 288 +experi 1 64 2.772589 2.772589 283 +finger 1 52 2.995732 2.995732 354 +maintain 1 51 2.995732 2.995732 342 +appoint 1 49 3.044522 3.044522 358 +departmentunivers 1 24 3.761200 3.761200 711 +serv 1 22 3.850148 3.850148 758 +countri 1 15 4.248495 4.248495 1059 +wife 1 13 4.382027 4.382027 1196 +sai 1 13 4.382027 4.382027 1175 +tue 1 11 4.553877 4.553877 1308 +edutelephon 1 10 4.653960 4.653960 1473 +counter 1 8 4.875197 4.875197 1765 +studentcomput 1 7 5.010635 5.010635 1963 +none 1 7 5.010635 5.010635 1811 +corp 1 6 5.164786 5.164786 2139 +peac 1 3 5.857933 5.857933 3769 +kirk 3 2 6.263398 18.790194 6175 +hogenson 3 1 6.957497 20.872491 18604 +myschedul 1 1 6.957497 6.957497 18605 +workout 1 1 6.957497 6.957497 18606 +tryto 1 1 6.957497 6.957497 18607 +ghana 1 1 6.957497 6.957497 18608 +usernam 1 1 6.957497 6.957497 18609 +pnhp 1 1 6.957497 6.957497 18610 +eilun 1 1 6.957497 6.957497 18611 +accessedtim 1 1 6.957497 6.957497 18612 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html new file mode 100644 index 00000000..e6a495cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horn^horn.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +madison 1 165 1.791759 1.791759 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +linear 1 41 3.218876 3.218876 431 +road 1 11 4.553877 4.553877 1374 +jeffrei 2 9 4.753590 9.507180 1612 +wise 1 3 5.857933 5.857933 3631 +horn 3 2 6.263398 18.790194 6072 +swanton 1 1 6.957497 6.957497 18613 +familyemploymenteducationresearchgenealog 1 1 6.957497 6.957497 18614 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html new file mode 100644 index 00000000..4594df15 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~horwitz^horwitz.html @@ -0,0 +1,149 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 15 374 0.693147 10.397205 7 +work 5 380 0.693147 3.465735 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +languag 5 227 1.386294 6.931470 26 +softwar 3 220 1.386294 4.158882 30 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +cornel 1 215 1.386294 1.386294 23 +design 1 213 1.386294 1.386294 25 +also 1 259 1.386294 1.386294 28 +gener 1 220 1.386294 1.386294 27 +washington 1 236 1.386294 1.386294 32 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +algorithm 3 162 1.791759 5.375277 57 +base 2 165 1.791759 3.583518 50 +develop 2 174 1.791759 3.583518 53 +implement 2 152 1.791759 3.583518 52 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +problem 4 147 1.945910 7.783640 75 +like 1 132 1.945910 1.945910 81 +analysi 7 124 2.079442 14.556094 98 +confer 5 126 2.079442 10.397210 100 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +version 2 113 2.197225 4.394450 122 +intern 2 108 2.197225 4.394450 128 +structur 1 106 2.197225 2.197225 105 +specif 1 106 2.197225 2.197225 106 +teach 1 108 2.197225 2.197225 112 +proceed 4 93 2.397895 9.591580 152 +call 2 91 2.397895 4.795790 153 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +chang 1 82 2.484907 2.484907 163 +larg 1 82 2.484907 2.484907 168 +second 1 81 2.484907 2.484907 166 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +symposium 4 72 2.639057 10.556228 238 +effici 3 73 2.639057 7.917171 233 +involv 2 71 2.639057 5.278114 227 +appli 1 71 2.639057 2.639057 226 +differ 2 66 2.708050 5.416100 253 +would 1 67 2.708050 2.708050 251 +practic 1 70 2.708050 2.708050 246 +test 1 66 2.708050 2.708050 252 +januari 3 62 2.772589 8.317767 264 +previou 1 62 2.772589 2.772589 290 +foundat 1 62 2.772589 2.772589 286 +point 1 58 2.890372 2.890372 319 +telephon 1 50 3.044522 3.044522 373 +understand 2 47 3.091042 6.182084 384 +combin 1 42 3.218876 3.218876 421 +fast 1 42 3.218876 3.218876 429 +programm 1 39 3.258097 3.258097 445 +streetmadison 1 38 3.295837 3.295837 474 +represent 1 35 3.401197 3.401197 512 +graph 4 30 3.555348 14.221392 576 +exist 1 30 3.555348 3.555348 569 +produc 1 30 3.555348 3.555348 572 +depend 3 29 3.583519 10.750557 583 +semant 2 29 3.583519 7.167038 587 +propos 1 28 3.610918 3.610918 602 +accur 1 25 3.737670 3.737670 680 +departmentunivers 1 24 3.761200 3.761200 711 +flow 1 24 3.761200 3.761200 700 +identifi 1 22 3.850148 3.850148 760 +record 3 18 4.060443 12.181329 890 +thoma 1 18 4.060443 4.060443 901 +modif 1 17 4.110874 4.110874 913 +white 1 17 4.110874 4.110874 951 +fourth 1 16 4.174387 4.174387 999 +precis 3 15 4.248495 12.745485 1023 +susan 1 15 4.248495 4.248495 1050 +piec 1 15 4.248495 4.248495 1020 +demand 1 14 4.317488 4.317488 1073 +francisco 1 14 4.317488 4.317488 1095 +individu 1 13 4.382027 4.382027 1126 +joint 1 13 4.382027 4.382027 1130 +sigplan 1 13 4.382027 4.382027 1190 +franc 1 12 4.465908 4.465908 1276 +mainli 2 10 4.653960 9.307920 1432 +guarante 1 10 4.653960 4.653960 1391 +conferenceon 1 9 4.753590 4.753590 1595 +plain 1 9 4.753590 4.753590 1495 +secretari 1 8 4.875197 4.875197 1775 +shapiro 1 8 4.875197 4.875197 1686 +implementationof 1 7 5.010635 5.010635 1813 +necessarili 1 7 5.010635 5.010635 1899 +sixth 1 7 5.010635 5.010635 1917 +textual 2 6 5.164786 10.329572 1979 +pari 1 6 5.164786 5.164786 2158 +softwareengin 1 6 5.164786 5.164786 2162 +increment 1 6 5.164786 5.164786 2206 +horwitz 8 5 5.347108 42.776864 2411 +dataflow 5 5 5.347108 26.735540 2390 +twenti 2 5 5.347108 10.694216 2540 +summarymi 1 5 5.347108 5.347108 2580 +australia 1 5 5.347108 5.347108 2478 +rep 5 4 5.568345 27.841725 3087 +interprocedur 4 4 5.568345 22.273380 2771 +slice 2 4 5.568345 11.136690 2622 +usa 1 4 5.568345 5.568345 3080 +theprogram 1 4 5.568345 5.568345 2686 +insensit 1 4 5.568345 5.568345 2716 +sigsoft 1 4 5.568345 5.568345 3036 +melbourn 1 4 5.568345 5.568345 3035 +principlesof 2 3 5.857933 11.715866 3145 +onprincipl 1 3 5.857933 5.857933 3701 +theoryand 1 3 5.857933 5.857933 3350 +denmark 1 3 5.857933 5.857933 3676 +reachabl 1 3 5.857933 5.857933 4001 +twentieth 1 3 5.857933 5.857933 3760 +fourteenth 1 3 5.857933 5.857933 3615 +sagiv 4 2 6.263398 25.053592 6176 +differenc 1 2 6.263398 6.263398 6177 +interproceduraldataflow 1 2 6.263398 6.263398 6178 +mooli 1 2 6.263398 6.263398 6179 +aarhu 1 2 6.263398 6.263398 6180 +charleston 1 2 6.263398 6.263398 6181 +aprogram 1 2 6.263398 6.263398 4943 +languagedesign 1 2 6.263398 6.263398 6182 +horwitzsusan 1 1 6.957497 6.957497 18615 +horwitzprofessorcomput 1 1 6.957497 6.957497 18616 +environmentsprogram 1 1 6.957497 6.957497 18617 +mergingstat 1 1 6.957497 6.957497 18618 +programsinterprocedur 1 1 6.957497 6.957497 18619 +analysisresearch 1 1 6.957497 6.957497 18620 +affectedbi 1 1 6.957497 6.957497 18621 +betweentwo 1 1 6.957497 6.957497 18622 +retest 1 1 6.957497 6.957497 18623 +certainsemant 1 1 6.957497 6.957497 18624 +concentratedeith 1 1 6.957497 6.957497 18625 +newalgorithm 1 1 6.957497 6.957497 18626 +publicationsm 1 1 6.957497 6.957497 18627 +constantpropag 1 1 6.957497 6.957497 18628 +bate 1 1 6.957497 6.957497 18629 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html new file mode 100644 index 00000000..e7ec5273 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~hummert^hummert.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +offic 1 299 1.098612 1.098612 13 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +click 1 142 1.945910 1.945910 78 +postscript 1 131 2.079442 2.079442 90 +version 1 113 2.197225 2.197225 122 +pictur 1 89 2.397895 2.397895 160 +resum 1 79 2.564949 2.564949 217 +hummert 1 3 5.857933 5.857933 3416 +pagenam 1 2 6.263398 6.263398 6165 +sidnei 1 2 6.263398 6.263398 4587 +edua 1 2 6.263398 6.263398 5764 +pagesid 1 1 6.957497 6.957497 18630 +hummertoffic 1 1 6.957497 6.957497 18631 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html new file mode 100644 index 00000000..b0b6f30c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iigor^iigor.html @@ -0,0 +1,47 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +offic 1 299 1.098612 1.098612 13 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +link 1 247 1.386294 1.386294 24 +wisconsin 2 169 1.791759 3.583518 54 +phone 2 175 1.791759 3.583518 45 +construct 2 139 1.945910 3.891820 82 +like 1 132 1.945910 1.945910 81 +dayton 1 119 2.079442 2.079442 104 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +particular 1 51 2.995732 2.995732 352 +math 1 44 3.135494 3.135494 402 +vision 1 41 3.218876 3.218876 430 +streetmadison 1 38 3.295837 3.295837 474 +feel 1 37 3.332205 3.332205 483 +robot 1 36 3.367296 3.367296 497 +actual 1 28 3.610918 3.610918 604 +departmentunivers 1 24 3.761200 3.761200 711 +ever 1 19 4.007333 4.007333 872 +alreadi 1 16 4.174387 4.174387 963 +speak 1 12 4.465908 4.465908 1283 +undergrad 1 9 4.753590 4.753590 1589 +disclaim 1 4 5.568345 5.568345 2847 +alien 2 3 5.857933 11.715866 3930 +slave 1 3 5.857933 5.857933 3959 +igor 1 2 6.263398 6.263398 6183 +ivanisev 1 2 6.263398 6.263398 6184 +newest 1 2 6.263398 6.263398 5518 +needless 1 2 6.263398 6.263398 4694 +drake 1 2 6.263398 6.263398 5668 +pageigorivanisev 1 1 6.957497 6.957497 18632 +generalgradu 1 1 6.957497 6.957497 18633 +departmentwa 1 1 6.957497 6.957497 18634 +departmentaddress 1 1 6.957497 6.957497 18635 +iigor 1 1 6.957497 6.957497 18636 +eduiigor 1 1 6.957497 6.957497 18637 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html new file mode 100644 index 00000000..072bcbd8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ireland^ireland.html @@ -0,0 +1,3 @@ +term, tf, in documents count, idf, tfidf, wordid +wisc 1 242 1.386294 1.386294 33 +pageireland 1 1 6.957497 6.957497 18638 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html new file mode 100644 index 00000000..f4b39025 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~iss^userid.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +email 1 220 1.386294 1.386294 29 +class 1 199 1.609438 1.609438 37 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +section 2 94 2.397895 4.795790 149 +comment 1 93 2.397895 2.397895 146 +meet 3 72 2.639057 7.917171 229 +tuesdai 2 73 2.639057 5.278114 219 +thursdai 1 70 2.708050 2.708050 241 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +profil 1 30 3.555348 3.555348 581 +peterson 1 7 5.010635 5.010635 1850 +sharenow 4 3 5.857933 23.431732 3439 +jone 1 3 5.857933 5.857933 3703 +recreat 1 3 5.857933 5.857933 3990 +madisonmadison 1 1 6.957497 6.957497 18639 +sciencestelephon 1 1 6.957497 6.957497 18640 +pmsection 1 1 6.957497 6.957497 18641 +pmboth 1 1 6.957497 6.957497 18642 +sciencesc 1 1 6.957497 6.957497 18643 +announcementshandoutsmoth 1 1 6.957497 6.957497 18644 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html new file mode 100644 index 00000000..d0015640 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jbasney^jbasney.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +avail 1 169 1.791759 1.791759 48 +area 1 144 1.945910 1.945910 80 +dayton 1 119 2.079442 2.079442 104 +resum 1 79 2.564949 2.564949 217 +onlin 1 75 2.639057 2.639057 223 +receiv 1 66 2.708050 2.708050 244 +prof 1 64 2.772589 2.772589 273 +previou 1 62 2.772589 2.772589 290 +colleg 1 61 2.833213 2.833213 300 +eduoffic 1 33 3.433987 3.433987 531 +departmentunivers 1 24 3.761200 3.761200 711 +livni 1 15 4.248495 4.248495 1053 +english 1 15 4.248495 4.248495 1033 +miron 1 14 4.317488 4.317488 1110 +condor 1 5 5.347108 5.347108 2577 +webpag 1 4 5.568345 5.568345 2660 +assistantcomput 1 3 5.857933 5.857933 4027 +statisticsoffic 1 2 6.263398 6.263398 4810 +andnetwork 1 2 6.263398 6.263398 5751 +basnei 1 2 6.263398 6.263398 4804 +basneyjim 1 1 6.957497 6.957497 18645 +basneygradu 1 1 6.957497 6.957497 18646 +jbasnei 1 1 6.957497 6.957497 18647 +directionof 1 1 6.957497 6.957497 18648 +fromoberlin 1 1 6.957497 6.957497 18649 +oberlin 1 1 6.957497 6.957497 18650 +codefrom 1 1 6.957497 6.957497 18651 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html new file mode 100644 index 00000000..c4358f85 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jerel^jerel.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +work 3 380 0.693147 2.079441 9 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +also 2 259 1.386294 2.772588 28 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +support 1 132 1.945910 1.945910 83 +click 1 142 1.945910 1.945910 78 +first 1 140 1.945910 1.945910 71 +like 1 132 1.945910 1.945910 81 +databas 2 122 2.079442 4.158884 86 +assist 1 112 2.197225 2.197225 113 +stuff 1 87 2.484907 2.484907 171 +know 1 80 2.564949 2.564949 198 +plai 2 60 2.833213 5.666426 307 +special 1 56 2.890372 2.890372 320 +cover 1 55 2.944439 2.944439 329 +sampl 1 53 2.944439 2.944439 339 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +favorit 1 44 3.135494 3.135494 410 +show 1 43 3.178054 3.178054 417 +around 1 43 3.178054 3.178054 415 +error 1 40 3.258097 3.258097 449 +electr 1 38 3.295837 3.295837 461 +origin 1 38 3.295837 3.295837 472 +respons 1 37 3.332205 3.332205 476 +soon 1 36 3.367296 3.367296 494 +abl 1 30 3.555348 3.555348 566 +wai 1 25 3.737670 3.737670 662 +instal 1 22 3.850148 3.850148 754 +watch 1 21 3.912023 3.912023 789 +mostli 1 19 4.007333 4.007333 869 +record 1 18 4.060443 4.060443 890 +stand 1 18 4.060443 4.060443 891 +train 1 14 4.317488 4.317488 1066 +classic 1 14 4.317488 4.317488 1084 +believ 1 13 4.382027 4.382027 1187 +mainli 1 10 4.653960 4.653960 1432 +didn 1 9 4.753590 4.753590 1563 +guitar 1 8 4.875197 4.875197 1758 +hit 1 7 5.010635 5.010635 1965 +seen 1 6 5.164786 5.164786 2202 +golf 1 6 5.164786 5.164786 2178 +pool 1 6 5.164786 5.164786 2225 +yeah 1 6 5.164786 5.164786 2195 +backup 1 4 5.568345 5.568345 2645 +metal 1 4 5.568345 5.568345 3079 +hourli 1 3 5.857933 5.857933 3734 +thrash 1 3 5.857933 5.857933 3400 +evil 1 3 5.857933 5.857933 3943 +mackai 3 2 6.263398 18.790194 5762 +fulltim 1 2 6.263398 6.263398 5170 +sybas 1 2 6.263398 6.263398 4723 +ingr 1 2 6.263398 6.263398 4097 +ey 1 2 6.263398 6.263398 5068 +metallica 1 2 6.263398 6.263398 4991 +raquetbal 1 2 6.263398 6.263398 6012 +towel 1 2 6.263398 6.263398 4793 +jerel 3 1 6.957497 20.872491 18652 +pagejerel 1 1 6.957497 6.957497 18653 +specialti 1 1 6.957497 6.957497 18654 +violin 1 1 6.957497 6.957497 18655 +baroqu 1 1 6.957497 6.957497 18656 +shock 1 1 6.957497 6.957497 18657 +funni 1 1 6.957497 6.957497 18658 +abba 1 1 6.957497 6.957497 18659 +shoot 1 1 6.957497 6.957497 18660 +jerellast 1 1 6.957497 6.957497 18661 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html new file mode 100644 index 00000000..6ef1426e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jglarson^jglarson.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +johan 1 2 6.263398 6.263398 4900 +larson 1 1 6.957497 6.957497 18662 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html new file mode 100644 index 00000000..73ee17f5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jherro^jherro.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +time 2 293 1.098612 2.197224 17 +engin 2 297 1.098612 2.197224 20 +link 6 247 1.386294 8.317764 24 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +class 1 199 1.609438 1.609438 37 +year 3 148 1.945910 5.837730 84 +note 1 142 1.945910 1.945910 67 +like 1 132 1.945910 1.945910 81 +click 1 142 1.945910 1.945910 78 +number 2 130 2.079442 4.158884 97 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +take 1 97 2.302585 2.302585 134 +memori 1 101 2.302585 2.302585 139 +pictur 5 89 2.397895 11.989475 160 +imag 2 91 2.397895 4.795790 161 +homepag 2 93 2.397895 4.795790 148 +search 2 95 2.397895 4.795790 155 +mani 1 92 2.397895 2.397895 150 +follow 1 92 2.397895 2.397895 143 +select 1 91 2.397895 2.397895 154 +stuff 2 87 2.484907 4.969814 171 +thing 1 84 2.484907 2.484907 189 +start 1 83 2.484907 2.484907 173 +contain 1 81 2.484907 2.484907 174 +good 1 77 2.564949 2.564949 200 +want 1 79 2.564949 2.564949 199 +name 1 72 2.639057 2.639057 220 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +would 2 67 2.708050 5.416100 251 +java 1 70 2.708050 2.708050 248 +order 1 69 2.708050 2.708050 249 +date 1 51 2.995732 2.995732 344 +cool 5 49 3.044522 15.222610 374 +friend 2 48 3.044522 6.089044 376 +format 1 48 3.044522 3.044522 356 +archiv 1 49 3.044522 3.044522 364 +made 1 44 3.135494 3.135494 398 +directori 1 45 3.135494 3.135494 396 +realli 2 40 3.258097 6.516194 444 +societi 1 40 3.258097 3.258097 456 +join 1 39 3.258097 3.258097 457 +movi 1 40 3.258097 3.258097 459 +go 1 33 3.433987 3.433987 529 +anim 1 31 3.496508 3.496508 557 +someth 1 31 3.496508 3.496508 554 +exist 1 30 3.555348 3.555348 569 +though 1 27 3.637586 3.637586 622 +great 1 27 3.637586 3.637586 626 +mine 1 26 3.688879 3.688879 654 +enjoi 1 26 3.688879 3.688879 660 +rule 1 26 3.688879 3.688879 638 +todai 1 25 3.737670 3.737670 672 +frame 4 24 3.761200 15.044800 684 +seri 1 24 3.761200 3.761200 708 +head 1 23 3.806662 3.806662 732 +sequenc 1 23 3.806662 3.806662 734 +almost 1 22 3.850148 3.850148 742 +hierarchi 1 22 3.850148 3.850148 744 +half 1 21 3.912023 3.912023 776 +grad 1 20 3.951244 3.951244 837 +mpeg 1 20 3.951244 3.951244 831 +els 1 19 4.007333 4.007333 843 +club 1 15 4.248495 4.248495 1058 +happi 1 14 4.317488 4.317488 1079 +becam 1 14 4.317488 4.317488 1117 +trip 1 14 4.317488 4.317488 1113 +pretti 1 13 4.382027 4.382027 1191 +neat 1 12 4.465908 4.465908 1263 +realiti 1 12 4.465908 4.465908 1272 +death 1 10 4.653960 4.653960 1457 +float 1 9 4.753590 4.753590 1504 +explicit 1 9 4.753590 4.753590 1525 +said 1 9 4.753590 4.753590 1571 +sound 1 9 4.753590 4.753590 1605 +jack 1 8 4.875197 4.875197 1780 +relax 1 6 5.164786 5.164786 2120 +jpeg 1 6 5.164786 5.164786 2053 +apolog 1 6 5.164786 5.164786 2046 +benefit 1 6 5.164786 5.164786 2213 +default 2 5 5.347108 10.694216 2335 +girlfriend 1 5 5.347108 5.347108 2579 +frog 1 5 5.347108 5.347108 2479 +chaotic 1 5 5.347108 5.347108 2566 +semi 1 5 5.347108 5.347108 2510 +paus 2 4 5.568345 11.136690 2965 +notr 2 4 5.568345 11.136690 2880 +dame 2 4 5.568345 11.136690 2881 +relief 1 4 5.568345 5.568345 2784 +afraid 1 4 5.568345 5.568345 3053 +repeat 1 4 5.568345 5.568345 2798 +vital 1 4 5.568345 5.568345 2733 +bear 1 4 5.568345 5.568345 2651 +exploratori 1 4 5.568345 5.568345 3073 +exclus 1 4 5.568345 5.568345 2947 +jherro 1 3 5.857933 5.857933 3427 +let 1 3 5.857933 5.857933 3790 +tortur 1 3 5.857933 5.857933 3634 +nicknam 1 3 5.857933 5.857933 3716 +lame 1 3 5.857933 5.857933 3717 +haiku 1 3 5.857933 5.857933 3811 +bout 1 3 5.857933 5.857933 3670 +cano 2 2 6.263398 12.526796 5207 +apictur 1 2 6.263398 6.263398 5024 +kermit 1 2 6.263398 6.263398 4742 +intervent 1 2 6.263398 6.263398 6163 +shack 1 2 6.263398 6.263398 5369 +roomat 2 1 6.957497 13.914994 18663 +censorship 1 1 6.957497 6.957497 18664 +disembody 1 1 6.957497 6.957497 18665 +millisecond 1 1 6.957497 6.957497 18666 +overriden 1 1 6.957497 6.957497 18667 +aquir 1 1 6.957497 6.957497 18668 +skellington 1 1 6.957497 6.957497 18669 +thath 1 1 6.957497 6.957497 18670 +forgotten 1 1 6.957497 6.957497 18671 +cult 1 1 6.957497 6.957497 18672 +hippothi 1 1 6.957497 6.957497 18673 +matriarch 1 1 6.957497 6.957497 18674 +yahooooooooooooo 1 1 6.957497 6.957497 18675 +bazillion 1 1 6.957497 6.957497 18676 +muppet 1 1 6.957497 6.957497 18677 +rachel 1 1 6.957497 6.957497 18678 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html new file mode 100644 index 00000000..b11c3ac2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jiebing^jiebing.html @@ -0,0 +1,131 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +inform 2 412 0.693147 1.386294 8 +interest 2 384 0.693147 1.386294 11 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +gener 2 220 1.386294 2.772588 27 +wisc 2 242 1.386294 2.772588 33 +public 3 202 1.609438 4.828314 43 +updat 1 191 1.609438 1.609438 41 +madison 3 165 1.791759 5.375277 55 +data 3 170 1.791759 5.375277 49 +wisconsin 2 169 1.791759 3.583518 54 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +object 2 138 1.945910 3.891820 79 +process 2 142 1.945910 3.891820 72 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +perform 1 143 1.945910 1.945910 74 +click 1 142 1.945910 1.945910 78 +databas 3 122 2.079442 6.238326 86 +technolog 2 131 2.079442 4.158884 102 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +manag 2 114 2.197225 4.394450 125 +version 1 113 2.197225 2.197225 122 +find 1 111 2.197225 2.197225 111 +octob 2 89 2.397895 4.795790 156 +proceed 2 93 2.397895 4.795790 152 +pictur 2 89 2.397895 4.795790 160 +imag 1 91 2.397895 2.397895 161 +educ 1 86 2.484907 2.484907 191 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +ieee 1 86 2.484907 2.484907 190 +appear 2 78 2.564949 5.129898 210 +server 2 76 2.564949 5.129898 204 +orient 1 80 2.564949 2.564949 205 +david 1 71 2.639057 2.639057 232 +effici 1 73 2.639057 2.639057 233 +workshop 1 71 2.639057 2.639057 239 +septemb 2 65 2.772589 5.545178 274 +prof 1 64 2.772589 2.772589 273 +evalu 1 64 2.772589 2.772589 266 +juli 1 60 2.833213 2.833213 305 +index 1 56 2.890372 2.890372 309 +februari 2 54 2.944439 5.888878 328 +advisor 2 51 2.995732 5.991464 355 +pointer 2 48 3.044522 6.089044 368 +approach 1 48 3.044522 3.044522 366 +set 1 50 3.044522 3.044522 361 +execut 1 45 3.135494 3.135494 404 +submit 2 39 3.258097 6.516194 440 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +tree 1 36 3.367296 3.367296 492 +queri 3 33 3.433987 10.301961 524 +storag 4 31 3.496508 13.986032 553 +full 1 28 3.610918 3.610918 615 +constraint 2 26 3.688879 7.377758 636 +client 2 25 3.737670 7.475340 679 +store 1 24 3.761200 3.761200 693 +size 2 23 3.806662 7.613324 713 +tenni 2 20 3.951244 7.902488 838 +sigmod 1 19 4.007333 4.007333 877 +white 1 17 4.110874 4.110874 951 +hobbi 1 16 4.174387 4.174387 1009 +spatial 1 16 4.174387 4.174387 988 +ramakrishnan 1 16 4.174387 4.174387 972 +dbm 3 13 4.382027 13.146081 1136 +nasa 1 13 4.382027 4.382027 1188 +dewitt 4 12 4.465908 17.863632 1270 +impact 1 11 4.553877 4.553877 1334 +naughton 2 10 4.653960 9.307920 1450 +resid 1 10 4.653960 4.653960 1461 +conferenceon 1 9 4.753590 4.753590 1595 +volleybal 1 9 4.753590 4.753590 1598 +paradis 3 8 4.875197 14.625591 1782 +assistantdepart 1 8 4.875197 4.875197 1784 +mass 1 8 4.875197 4.875197 1732 +burger 1 7 5.010635 5.010635 1889 +patel 2 6 5.164786 10.329572 2154 +geograph 1 6 5.164786 5.164786 2236 +goldstein 1 6 5.164786 5.164786 2168 +tuft 1 5 5.347108 5.347108 2575 +houston 1 5 5.347108 5.347108 2460 +water 1 5 5.347108 5.347108 2535 +ramasami 1 4 5.568345 5.568345 3088 +batch 1 4 5.568345 5.568345 2700 +tape 1 4 5.568345 5.568345 2959 +satellit 1 4 5.568345 5.568345 3077 +chile 1 4 5.568345 5.568345 3082 +yong 1 4 5.568345 5.568345 2809 +raft 1 4 5.568345 5.568345 3060 +tertiari 2 3 5.857933 11.715866 3193 +informationresearch 1 3 5.857933 5.857933 3675 +edueduc 1 3 5.857933 5.857933 4004 +shorter 1 3 5.857933 5.857933 3998 +santiago 1 3 5.857933 5.857933 4013 +kabra 2 2 6.263398 12.526796 6139 +dewittresearch 1 2 6.263398 6.263398 6185 +shaft 1 2 6.263398 6.263398 6186 +eosdi 1 2 6.263398 6.263398 6124 +bing 3 1 6.957497 20.872491 18679 +jieb 2 1 6.957497 13.914994 18680 +systemsresearch 1 1 6.957497 6.957497 18681 +shorepublicationsbuild 1 1 6.957497 6.957497 18682 +scaleabl 1 1 6.957497 6.957497 18683 +implment 1 1 6.957497 6.957497 18684 +lueder 1 1 6.957497 6.957497 18685 +ellman 1 1 6.957497 6.957497 18686 +kupsch 1 1 6.957497 6.957497 18687 +prong 1 1 6.957497 6.957497 18688 +tile 1 1 6.957497 6.957497 18689 +goddard 1 1 6.957497 6.957497 18690 +reclam 1 1 6.957497 6.957497 18691 +reorgan 1 1 6.957497 6.957497 18692 +serverpersist 1 1 6.957497 6.957497 18693 +grouphobbi 1 1 6.957497 6.957497 18694 +volleyballweb 1 1 6.957497 6.957497 18695 +whitewat 1 1 6.957497 6.957497 18696 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html new file mode 100644 index 00000000..49c51910 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jignesh^jignesh.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +public 2 202 1.609438 3.218876 43 +paper 2 205 1.609438 3.218876 38 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +base 1 165 1.791759 1.791759 50 +algorithm 1 162 1.791759 1.791759 57 +relat 2 139 1.945910 3.891820 68 +object 1 138 1.945910 1.945910 79 +model 1 145 1.945910 1.945910 69 +databas 2 122 2.079442 4.158884 86 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +virtual 1 62 2.772589 2.772589 285 +publish 3 57 2.890372 8.671116 326 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +join 2 39 3.258097 6.516194 457 +bookmark 1 26 3.688879 3.688879 639 +client 1 25 3.737670 3.737670 679 +accur 1 25 3.737670 3.737670 680 +miscellan 1 23 3.806662 3.806662 731 +sigmod 1 19 4.007333 4.007333 877 +partit 1 16 4.174387 4.174387 984 +spatial 1 16 4.174387 4.174387 988 +hybrid 1 15 4.248495 4.248495 1057 +sigmetr 1 13 4.382027 4.382027 1173 +dewitt 1 12 4.465908 4.465908 1270 +vldb 1 10 4.653960 4.653960 1470 +paradis 3 8 4.875197 14.625591 1782 +hash 1 8 4.875197 4.875197 1618 +tourist 1 8 4.875197 4.875197 1710 +merg 1 7 5.010635 5.010635 1862 +patel 1 6 5.164786 5.164786 2154 +inlin 1 4 5.568345 5.568345 2964 +skate 1 4 5.568345 5.568345 3046 +jignesh 3 1 6.957497 20.872491 18697 +madhuri 1 1 6.957497 6.957497 18698 +kashmir 1 1 6.957497 6.957497 18699 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html new file mode 100644 index 00000000..ce6eb867 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~joev^joev.html @@ -0,0 +1,11 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +click 1 142 1.945910 1.945910 78 +peopl 1 96 2.302585 2.302585 132 +netscap 1 44 3.135494 3.135494 395 +download 1 36 3.367296 3.367296 489 +georg 1 16 4.174387 4.174387 994 +warn 1 14 4.317488 4.317488 1068 +pretti 1 13 4.382027 4.382027 1191 +varghes 1 3 5.857933 5.857933 3442 +lame 1 3 5.857933 5.857933 3717 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html new file mode 100644 index 00000000..07926ca2 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~johannes^johannes.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +system 3 443 0.693147 2.079441 6 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +time 3 293 1.098612 3.295836 17 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +washington 1 236 1.386294 1.386294 32 +public 2 202 1.609438 3.218876 43 +utexa 1 189 1.609438 1.609438 44 +madison 4 165 1.791759 7.167036 55 +wisconsin 2 169 1.791759 3.583518 54 +data 1 170 1.791759 1.791759 49 +contact 1 153 1.791759 1.791759 59 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +area 2 144 1.945910 3.891820 80 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +intern 2 108 2.197225 4.394450 128 +version 2 113 2.197225 4.394450 122 +real 2 93 2.397895 4.795790 144 +proceed 1 93 2.397895 2.397895 152 +resourc 2 81 2.484907 4.969814 172 +ieee 2 86 2.484907 4.969814 190 +west 1 83 2.484907 2.484907 192 +decemb 1 80 2.564949 2.564949 215 +appear 1 78 2.564949 2.564949 210 +april 1 77 2.564949 2.564949 196 +symposium 1 72 2.639057 2.639057 238 +street 1 63 2.772589 2.772589 293 +share 2 59 2.833213 5.666426 304 +room 1 59 2.833213 2.833213 301 +februari 1 54 2.944439 2.944439 328 +multipl 1 39 3.258097 3.258097 453 +mine 1 26 3.688879 3.688879 654 +task 1 25 3.737670 3.737670 678 +period 1 22 3.850148 3.850148 743 +alloc 1 20 3.951244 3.951244 821 +expand 1 17 4.110874 4.110874 928 +universityof 1 15 4.248495 4.248495 1061 +inproceed 1 8 4.875197 4.875197 1670 +sciencesdepart 1 6 5.164786 5.164786 2020 +height 1 4 5.568345 5.568345 2890 +johann 3 3 5.857933 17.573799 3758 +plaxton 2 3 5.857933 11.715866 3886 +ofwisconsin 1 3 5.857933 5.857933 4002 +proport 1 3 5.857933 5.857933 3293 +baruah 2 2 6.263398 12.526796 5753 +studentat 1 2 6.263398 6.263398 5877 +databasemanag 1 2 6.263398 6.263398 4089 +underprofessor 1 2 6.263398 6.263398 6045 +linkscontact 1 2 6.263398 6.263398 5708 +eagl 1 2 6.263398 6.263398 5731 +jeffai 1 2 6.263398 6.263398 4357 +technicalreport 1 2 6.263398 6.263398 5615 +gehrk 4 1 6.957497 27.829988 18700 +homepagejohann 1 1 6.957497 6.957497 18701 +gehrkewelcom 1 1 6.957497 6.957497 18702 +raghuramakrishnan 1 1 6.957497 6.957497 18703 +stoica 1 1 6.957497 6.957497 18704 +abdel 1 1 6.957497 6.957497 18705 +wahab 1 1 6.957497 6.957497 18706 +algorithmfor 1 1 6.957497 6.957497 18707 +anexpand 1 1 6.957497 6.957497 18708 +fastschedul 1 1 6.957497 6.957497 18709 +processingsymposium 1 1 6.957497 6.957497 18710 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html new file mode 100644 index 00000000..02efc5cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jonb^jonb.html @@ -0,0 +1,72 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +student 2 343 1.098612 2.197224 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +list 2 201 1.609438 3.218876 39 +modifi 1 178 1.609438 1.609438 35 +madison 4 165 1.791759 7.167036 55 +wisconsin 1 169 1.791759 1.791759 54 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +model 1 145 1.945910 1.945910 69 +click 1 142 1.945910 1.945910 78 +construct 1 139 1.945910 1.945910 82 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +databas 1 122 2.079442 2.079442 86 +compil 1 122 2.079442 2.079442 96 +site 2 106 2.197225 4.394450 119 +look 1 107 2.197225 2.197225 115 +check 1 115 2.197225 2.197225 118 +manag 1 114 2.197225 2.197225 125 +question 1 91 2.397895 2.397895 141 +section 1 94 2.397895 2.397895 149 +thing 1 84 2.484907 2.484907 189 +good 1 77 2.564949 2.564949 200 +visit 1 63 2.772589 2.772589 288 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +maintain 1 51 2.995732 2.995732 342 +frequent 1 49 3.044522 3.044522 367 +keep 1 44 3.135494 3.135494 409 +industri 1 38 3.295837 3.295837 464 +often 1 31 3.496508 3.496508 551 +ask 1 28 3.610918 3.610918 597 +releas 1 28 3.610918 3.610918 616 +great 1 27 3.637586 3.637586 626 +mind 1 27 3.637586 3.637586 632 +latest 1 21 3.912023 3.912023 785 +dilbert 1 16 4.174387 4.174387 996 +appl 2 11 4.553877 9.107754 1303 +pagewelcom 1 11 4.553877 4.553877 1344 +spot 1 7 5.010635 5.010635 1894 +bodner 1 5 5.347108 5.347108 2401 +alma 1 3 5.857933 5.857933 3963 +herei 1 2 6.263398 6.263398 6187 +mater 1 2 6.263398 6.263398 5930 +nando 1 2 6.263398 6.263398 5458 +numb 1 2 6.263398 6.263398 5505 +soap 1 2 6.263398 6.263398 4511 +jonb 1 2 6.263398 6.263398 4771 +mound 1 2 6.263398 6.263398 4773 +powerbook 1 1 6.957497 6.957497 18711 +amass 1 1 6.957497 6.957497 18712 +catagori 1 1 6.957497 6.957497 18713 +needsth 1 1 6.957497 6.957497 18714 +coverageth 1 1 6.957497 6.957497 18715 +operaish 1 1 6.957497 6.957497 18716 +drivelziffnet 1 1 6.957497 6.957497 18717 +newsc 1 1 6.957497 6.957497 18718 +classworktodai 1 1 6.957497 6.957497 18719 +chucklejon 1 1 6.957497 6.957497 18720 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html new file mode 100644 index 00000000..449ae0f8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jq^jq.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +comput 3 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 5 457 0.693147 3.465735 12 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +student 2 343 1.098612 2.197224 19 +engin 1 297 1.098612 1.098612 20 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +madison 3 165 1.791759 5.375277 55 +network 2 168 1.791759 3.583518 61 +wisconsin 1 169 1.791759 1.791759 54 +data 1 170 1.791759 1.791759 49 +address 1 170 1.791759 1.791759 62 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +process 1 142 1.945910 1.945910 72 +spring 2 131 2.079442 4.158884 88 +welcom 1 122 2.079442 2.079442 99 +databas 1 122 2.079442 2.079442 86 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +structur 1 106 2.197225 2.197225 105 +place 1 106 2.197225 2.197225 124 +site 1 106 2.197225 2.197225 119 +advanc 2 99 2.302585 4.605170 130 +technic 1 100 2.302585 2.302585 140 +imag 1 91 2.397895 2.397895 161 +commun 1 95 2.397895 2.397895 157 +sinc 1 90 2.397895 2.397895 159 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +refer 1 78 2.564949 2.564949 203 +degre 1 69 2.708050 2.708050 259 +street 1 63 2.772589 2.772589 293 +finger 1 52 2.995732 2.995732 354 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +autom 1 41 3.218876 3.218876 434 +china 2 37 3.332205 6.664410 487 +chines 2 29 3.583519 7.167038 595 +pattern 1 24 3.761200 3.761200 689 +recognit 1 23 3.806662 3.806662 723 +beij 1 19 4.007333 4.007333 876 +stanford 1 17 4.110874 4.110874 955 +medic 1 17 4.110874 4.110874 958 +came 1 13 4.382027 4.382027 1197 +academi 2 8 4.875197 9.750394 1735 +signal 1 7 5.010635 5.010635 1910 +provinc 1 4 5.568345 5.568345 3009 +biomed 1 4 5.568345 5.568345 2905 +hometown 1 3 5.857933 5.857933 3817 +nanj 2 2 6.263398 12.526796 5728 +jiangsu 1 2 6.263398 6.263398 5586 +southeast 1 2 6.263398 6.263398 6188 +frombeij 1 1 6.957497 6.957497 18721 +capitol 1 1 6.957497 6.957497 18722 +specil 1 1 6.957497 6.957497 18723 +chinacurr 1 1 6.957497 6.957497 18724 +tele 1 1 6.957497 6.957497 18725 +stuffjava 1 1 6.957497 6.957497 18726 +placeshor 1 1 6.957497 6.957497 18727 +tutorialchina 1 1 6.957497 6.957497 18728 +affairchina 1 1 6.957497 6.957497 18729 +democracybeij 1 1 6.957497 6.957497 18730 +groupstanford 1 1 6.957497 6.957497 18731 +informaticsmit 1 1 6.957497 6.957497 18732 +processingjob 1 1 6.957497 6.957497 18733 +newsyou 1 1 6.957497 6.957497 18734 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html new file mode 100644 index 00000000..19b580bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jshabel^jshabel.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +engin 2 297 1.098612 2.197224 20 +student 1 343 1.098612 1.098612 19 +graduat 2 215 1.386294 2.772588 31 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +hour 1 165 1.791759 1.791759 46 +architectur 1 139 1.945910 1.945910 77 +year 1 148 1.945910 1.945910 84 +high 2 130 2.079442 4.158884 101 +welcom 1 122 2.079442 2.079442 99 +schedul 1 119 2.079442 2.079442 85 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +school 2 84 2.484907 4.969814 188 +second 1 81 2.484907 2.484907 166 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +state 1 76 2.564949 2.564949 207 +view 2 70 2.708050 5.416100 254 +receiv 1 66 2.708050 2.708050 244 +window 1 68 2.708050 2.708050 242 +new 4 64 2.772589 11.090356 262 +plan 1 65 2.772589 2.772589 272 +appoint 1 49 3.044522 3.044522 358 +favorit 2 44 3.135494 6.270988 410 +music 1 42 3.218876 3.218876 436 +join 2 39 3.258097 6.516194 457 +electr 1 38 3.295837 3.295837 461 +print 1 34 3.401197 3.401197 503 +team 1 27 3.637586 3.637586 625 +jeff 2 25 3.737670 7.475340 673 +background 1 25 3.737670 3.737670 664 +sport 1 25 3.737670 3.737670 683 +miscellan 1 23 3.806662 3.806662 731 +emphasi 1 22 3.850148 3.850148 755 +deal 1 22 3.850148 3.850148 736 +hous 2 21 3.912023 7.824046 801 +thur 1 19 4.007333 4.007333 847 +statu 1 18 4.060443 4.060443 885 +jose 6 16 4.174387 25.046322 976 +diego 1 16 4.174387 4.174387 992 +club 1 15 4.248495 4.248495 1058 +francisco 1 14 4.317488 4.317488 1095 +basketbal 1 12 4.465908 4.465908 1289 +song 1 11 4.553877 4.553877 1380 +tue 1 11 4.553877 4.553877 1308 +town 1 10 4.653960 4.653960 1458 +vista 1 10 4.653960 4.653960 1452 +theme 1 8 4.875197 4.875197 1707 +hockei 1 8 4.875197 4.875197 1760 +golden 1 7 5.010635 5.010635 1962 +footbal 1 7 5.010635 5.010635 1912 +tip 1 7 5.010635 5.010635 1863 +cupertino 2 2 6.263398 12.526796 4956 +columbia 2 2 6.263398 12.526796 5900 +mercuri 4 1 6.957497 27.829988 18735 +andnando 3 1 6.957497 20.872491 18736 +shabel 2 1 6.957497 13.914994 18737 +pagech 1 1 6.957497 6.957497 18738 +wisconsinch 1 1 6.957497 6.957497 18739 +informationmajor 1 1 6.957497 6.957497 18740 +monta 1 1 6.957497 6.957497 18741 +warrior 1 1 6.957497 6.957497 18742 +shark 1 1 6.957497 6.957497 18743 +oakland 1 1 6.957497 6.957497 18744 +newsmus 1 1 6.957497 6.957497 18745 +jshabel 1 1 6.957497 6.957497 18746 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html new file mode 100644 index 00000000..a86ed39f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jussi^jussi.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +home 4 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +depart 4 457 0.693147 2.772588 12 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +engin 2 297 1.098612 2.197224 20 +us 1 329 1.098612 1.098612 16 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +wisc 2 242 1.386294 2.772588 33 +link 2 247 1.386294 2.772588 24 +email 1 220 1.386294 1.386294 29 +includ 2 208 1.609438 3.218876 42 +class 2 199 1.609438 3.218876 37 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +data 9 170 1.791759 16.125831 49 +wisconsin 3 169 1.791759 5.375277 54 +recent 2 167 1.791759 3.583518 58 +parallel 2 169 1.791759 3.583518 60 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +perform 5 143 1.945910 9.729550 74 +relat 2 139 1.945910 3.891820 68 +problem 1 147 1.945910 1.945910 75 +model 1 145 1.945910 1.945910 69 +report 8 131 2.079442 16.635536 92 +technolog 5 131 2.079442 10.397210 102 +confer 5 126 2.079442 10.397210 100 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +structur 3 106 2.197225 6.591675 105 +intern 2 108 2.197225 4.394450 128 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +manag 1 114 2.197225 2.197225 125 +technic 2 100 2.302585 4.605170 140 +memori 1 101 2.302585 2.302585 139 +access 1 102 2.302585 2.302585 136 +need 1 98 2.302585 2.302585 135 +proceed 5 93 2.397895 11.989475 152 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +search 1 95 2.397895 2.397895 155 +larg 5 82 2.484907 12.424535 168 +west 1 83 2.484907 2.484907 192 +environ 1 84 2.484907 2.484907 177 +master 2 76 2.564949 5.129898 216 +server 1 76 2.564949 2.564949 204 +appli 2 71 2.639057 5.278114 226 +solv 1 73 2.639057 2.639057 234 +involv 1 71 2.639057 2.639057 227 +workshop 1 71 2.639057 2.639057 239 +integr 2 67 2.708050 5.416100 245 +august 1 66 2.708050 2.708050 257 +organ 2 65 2.772589 5.545178 265 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +improv 1 62 2.772589 2.772589 289 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +experi 1 64 2.772589 2.772589 283 +explor 3 58 2.890372 8.671116 324 +thesi 1 57 2.890372 2.890372 327 +overview 1 56 2.890372 2.890372 323 +scientif 1 53 2.944439 2.944439 341 +februari 1 54 2.944439 2.944439 328 +found 1 53 2.944439 2.944439 337 +digit 2 52 2.995732 5.991464 348 +advisor 1 51 2.995732 2.995732 355 +visual 3 48 3.044522 9.133566 372 +set 2 50 3.044522 6.089044 361 +telephon 1 50 3.044522 3.044522 373 +frequent 1 49 3.044522 3.044522 367 +adapt 1 46 3.091042 3.091042 387 +discuss 1 45 3.135494 3.135494 399 +third 1 43 3.178054 3.178054 412 +linear 1 41 3.218876 3.218876 431 +join 4 39 3.258097 13.032388 457 +submit 3 39 3.258097 9.774291 440 +societi 1 40 3.258097 3.258097 456 +industri 1 38 3.295837 3.295837 464 +concurr 1 34 3.401197 3.401197 501 +queri 1 33 3.433987 3.433987 524 +storag 1 31 3.496508 3.496508 553 +synchron 2 29 3.583519 7.167038 588 +limit 1 29 3.583519 3.583519 585 +measur 1 28 3.610918 3.610918 609 +arrai 1 27 3.637586 3.637586 627 +client 1 25 3.737670 3.737670 679 +jeff 1 25 3.737670 3.737670 673 +disk 8 22 3.850148 30.801184 747 +chen 1 21 3.912023 3.912023 791 +media 1 19 4.007333 4.007333 861 +sigmod 1 19 4.007333 4.007333 877 +miller 1 17 4.110874 4.110874 949 +ramakrishnan 2 16 4.174387 8.348774 972 +livni 7 15 4.248495 29.739465 1053 +miron 4 14 4.317488 17.269952 1110 +dbm 1 13 4.382027 4.382027 1136 +buffer 2 12 4.465908 8.931816 1211 +characterist 1 12 4.465908 4.465908 1257 +daniel 1 12 4.465908 4.465908 1233 +raghu 1 12 4.465908 4.465908 1212 +optic 1 12 4.465908 4.465908 1221 +volum 1 11 4.553877 4.553877 1347 +alpha 1 11 4.553877 4.553877 1348 +resid 1 10 4.653960 4.653960 1461 +unifi 1 8 4.875197 4.875197 1774 +dataset 2 7 5.010635 10.021270 1914 +refere 1 7 5.010635 5.010635 1895 +eduresearch 1 6 5.164786 5.164786 2205 +divers 1 6 5.164786 5.164786 2232 +spie 1 6 5.164786 5.164786 2119 +quantum 1 6 5.164786 5.164786 2214 +almaden 1 5 5.347108 5.347108 2511 +tape 7 4 5.568345 38.978415 2959 +theintern 2 4 5.568345 11.136690 2981 +metadata 1 4 5.568345 5.568345 2945 +karen 1 4 5.568345 5.568345 2796 +ford 1 4 5.568345 5.568345 2636 +filesystem 1 4 5.568345 5.568345 2587 +tertiari 4 3 5.857933 23.431732 3193 +myllymaki 3 3 5.857933 17.573799 4022 +alsoavail 1 3 5.857933 5.857933 3887 +wenger 1 3 5.857933 5.857933 4023 +schwarz 1 3 5.857933 5.857933 3986 +trishul 1 3 5.857933 5.857933 4016 +chilimbi 1 3 5.857933 5.857933 4015 +raid 1 3 5.857933 5.857933 4012 +jussi 4 2 6.263398 25.053592 6133 +andvisu 1 2 6.263398 6.263398 6189 +karavan 1 2 6.263398 6.263398 6190 +andtool 1 2 6.263398 6.263398 5126 +beyer 1 2 6.263398 6.263398 6103 +lawand 1 2 6.263398 6.263398 6191 +dataengin 1 2 6.263398 6.263398 6118 +helsinki 1 2 6.263398 6.263398 5702 +storageto 2 1 6.957497 13.914994 18747 +andtap 2 1 6.957497 13.914994 18748 +yoav 2 1 6.957497 13.914994 18749 +weiss 2 1 6.957497 13.914994 18750 +scsi 2 1 6.957497 13.914994 18751 +myllymakijussi 1 1 6.957497 6.957497 18752 +summaryi 1 1 6.957497 6.957497 18753 +onadvanc 1 1 6.957497 6.957497 18754 +mcurrent 1 1 6.957497 6.957497 18755 +deviseproject 1 1 6.957497 6.957497 18756 +mironlivni 1 1 6.957497 6.957497 18757 +joinsof 1 1 6.957497 6.957497 18758 +listbelow 1 1 6.957497 6.957497 18759 +andfunct 1 1 6.957497 6.957497 18760 +datavisu 1 1 6.957497 6.957497 18761 +managementissu 1 1 6.957497 6.957497 18762 +publicationseffici 1 1 6.957497 6.957497 18763 +programperform 1 1 6.957497 6.957497 18764 +bartonp 1 1 6.957497 6.957497 18765 +tertiarystorag 1 1 6.957497 6.957497 18766 +withmiron 1 1 6.957497 6.957497 18767 +acmsigmetr 1 1 6.957497 6.957497 18768 +publicationdevis 1 1 6.957497 6.957497 18769 +donjerkov 1 1 6.957497 6.957497 18770 +andmiron 1 1 6.957497 6.957497 18771 +publicationsdisk 1 1 6.957497 6.957497 18772 +tapeaccess 1 1 6.957497 6.957497 18773 +degreeproject 1 1 6.957497 6.957497 18774 +networkarchitectur 1 1 6.957497 6.957497 18775 +finnish 1 1 6.957497 6.957497 18776 +documentsimplement 1 1 6.957497 6.957497 18777 +treealgorithm 1 1 6.957497 6.957497 18778 +productsoverview 1 1 6.957497 6.957497 18779 +supplier 1 1 6.957497 6.957497 18780 +productssom 1 1 6.957497 6.957497 18781 +adaptec 1 1 6.957497 6.957497 18782 +workstationsandpcsandtechn 1 1 6.957497 6.957497 18783 +journaland 1 1 6.957497 6.957497 18784 +whitepap 1 1 6.957497 6.957497 18785 +researchandcyberjourn 1 1 6.957497 6.957497 18786 +tapeanddlt 1 1 6.957497 6.957497 18787 +faqandwhitepap 1 1 6.957497 6.957497 18788 +solarisandsparcstationsandtechn 1 1 6.957497 6.957497 18789 +faqandstorag 1 1 6.957497 6.957497 18790 +faqand 1 1 6.957497 6.957497 18791 +otherusenet 1 1 6.957497 6.957497 18792 +faqsmani 1 1 6.957497 6.957497 18793 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html new file mode 100644 index 00000000..1d5696e9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~jyothi^jyothi.html @@ -0,0 +1,15 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +construct 1 139 1.945910 1.945910 82 +homepag 1 93 2.397895 2.397895 148 +grade 1 90 2.397895 2.397895 142 +info 1 85 2.484907 2.484907 176 +other 1 24 3.761200 3.761200 697 +sorri 1 4 5.568345 5.568345 3059 +jyothi 1 3 5.857933 5.857933 3423 +jyothithi 1 1 6.957497 6.957497 18794 +dissappoint 1 1 6.957497 6.957497 18795 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html new file mode 100644 index 00000000..74158327 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~karavan^karavan.html @@ -0,0 +1,89 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +class 1 199 1.609438 1.609438 37 +madison 4 165 1.791759 7.167036 55 +parallel 2 169 1.791759 3.583518 60 +wisconsin 1 169 1.791759 1.791759 54 +perform 2 143 1.945910 3.891820 74 +support 2 132 1.945910 3.891820 83 +process 1 142 1.945910 1.945910 72 +high 3 130 2.079442 6.238326 101 +tool 1 117 2.079442 2.079442 93 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +site 1 106 2.197225 2.197225 119 +need 1 98 2.302585 2.302585 135 +school 4 84 2.484907 9.939628 188 +learn 1 86 2.484907 2.484907 170 +west 1 83 2.484907 2.484907 192 +environ 1 84 2.484907 2.484907 177 +thing 1 84 2.484907 2.484907 189 +know 1 80 2.564949 2.564949 198 +servic 1 72 2.639057 2.639057 236 +free 1 73 2.639057 2.639057 224 +street 1 63 2.772589 2.772589 293 +undergradu 1 54 2.944439 2.944439 338 +cool 1 49 3.044522 3.044522 374 +life 1 50 3.044522 3.044522 375 +could 1 46 3.091042 3.091042 383 +autom 1 41 3.218876 3.218876 434 +alumni 1 21 3.912023 3.912023 807 +miss 1 19 4.007333 4.007333 866 +thoma 1 18 4.060443 4.060443 901 +women 1 16 4.174387 4.174387 1004 +anyth 1 16 4.174387 4.174387 998 +save 1 14 4.317488 4.317488 1099 +tune 1 12 4.465908 4.465908 1227 +safe 1 12 4.465908 4.465908 1274 +paradyn 1 9 4.753590 4.753590 1614 +tutor 1 9 4.753590 4.753590 1552 +port 1 8 4.875197 4.875197 1766 +pursu 1 7 5.010635 5.010635 1902 +constitut 1 6 5.164786 5.164786 2026 +ship 2 5 5.347108 10.694216 2534 +salt 1 5 5.347108 5.347108 2413 +water 1 5 5.347108 5.347108 2535 +sail 1 5 5.347108 5.347108 2571 +karen 2 4 5.568345 11.136690 2796 +frontier 1 3 5.857933 5.857933 3771 +counti 1 3 5.857933 5.857933 3682 +karavan 1 2 6.263398 6.263398 6190 +wic 1 2 6.263398 6.263398 4673 +dane 1 2 6.263398 6.263398 5534 +pagefor 1 2 6.263398 6.263398 6151 +lover 1 2 6.263398 6.263398 6192 +tear 1 2 6.263398 6.263398 5076 +karavaniceveryth 1 1 6.957497 6.957497 18796 +karavanicresearch 1 1 6.957497 6.957497 18797 +databasesask 1 1 6.957497 6.957497 18798 +studentstrio 1 1 6.957497 6.957497 18799 +safer 1 1 6.957497 6.957497 18800 +chocol 1 1 6.957497 6.957497 18801 +onlystuyves 1 1 6.957497 6.957497 18802 +associationstuyves 1 1 6.957497 6.957497 18803 +legisl 1 1 6.957497 6.957497 18804 +internetth 1 1 6.957497 6.957497 18805 +cure 1 1 6.957497 6.957497 18806 +sweat 1 1 6.957497 6.957497 18807 +isak 1 1 6.957497 6.957497 18808 +dinesen 1 1 6.957497 6.957497 18809 +admir 1 1 6.957497 6.957497 18810 +grace 1 1 6.957497 6.957497 18811 +hopper 1 1 6.957497 6.957497 18812 +pioneer 1 1 6.957497 6.957497 18813 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html new file mode 100644 index 00000000..d8386d0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kaxiras^kaxiras.html @@ -0,0 +1,110 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +work 1 380 0.693147 0.693147 9 +system 1 443 0.693147 0.693147 6 +us 2 329 1.098612 2.197224 16 +design 2 213 1.386294 2.772588 25 +softwar 2 220 1.386294 2.772588 30 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +wisconsin 3 169 1.791759 5.375277 54 +parallel 3 169 1.791759 5.375277 60 +recent 2 167 1.791759 3.583518 58 +base 2 165 1.791759 3.583518 50 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +implement 1 152 1.791759 1.791759 52 +perform 3 143 1.945910 5.837730 74 +architectur 2 139 1.945910 3.891820 77 +process 1 142 1.945910 1.945910 72 +high 2 130 2.079442 4.158884 101 +confer 1 126 2.079442 2.079442 100 +report 1 131 2.079442 2.079442 92 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +intern 4 108 2.197225 8.788900 128 +assist 1 112 2.197225 2.197225 113 +specif 1 106 2.197225 2.197225 106 +memori 3 101 2.302585 6.907755 139 +technic 1 100 2.302585 2.302585 140 +proceed 2 93 2.397895 4.795790 152 +ieee 1 86 2.484907 2.484907 190 +wide 1 84 2.484907 2.484907 185 +appear 2 78 2.564949 5.129898 210 +interfac 1 79 2.564949 2.564949 209 +april 1 77 2.564949 2.564949 196 +summari 2 73 2.639057 5.278114 237 +workshop 2 71 2.639057 5.278114 239 +david 1 71 2.639057 2.639057 232 +symposium 1 72 2.639057 2.639057 238 +august 1 66 2.708050 2.708050 257 +simul 1 66 2.708050 2.708050 255 +dept 1 64 2.772589 2.772589 291 +share 2 59 2.833213 5.666426 304 +march 1 61 2.833213 2.833213 295 +juli 1 60 2.833213 2.833213 305 +extens 8 53 2.944439 23.555512 340 +processor 2 54 2.944439 5.888878 335 +hardwar 1 51 2.995732 2.995732 350 +standard 1 48 3.044522 3.044522 365 +protocol 1 45 3.135494 3.135494 407 +cach 3 41 3.218876 9.656628 432 +editor 1 41 3.218876 3.218876 433 +examin 1 42 3.218876 3.218876 424 +autom 1 41 3.218876 3.218876 434 +prototyp 1 38 3.295837 3.295837 463 +cost 2 37 3.332205 6.664410 480 +jame 5 35 3.401197 17.005985 507 +option 1 30 3.555348 3.555348 575 +multiprocessor 1 28 3.610918 3.610918 605 +aspect 1 25 3.737670 3.737670 663 +supercomput 1 25 3.737670 3.737670 681 +scalabl 1 24 3.761200 3.761200 705 +methodolog 1 23 3.806662 3.806662 733 +synthesi 2 20 3.951244 7.902488 834 +north 1 19 4.007333 4.007333 873 +wind 1 18 4.060443 4.060443 908 +monitor 1 17 4.110874 4.110874 941 +hierarch 3 15 4.248495 12.745485 1018 +coher 3 14 4.317488 12.952464 1109 +prolog 2 13 4.382027 8.764054 1155 +introduc 1 13 4.382027 4.382027 1139 +tunnel 1 9 4.753590 4.753590 1615 +depth 1 8 4.875197 4.875197 1636 +upcom 1 8 4.875197 4.875197 1685 +goodman 3 7 5.010635 15.031905 1891 +dedic 1 7 5.010635 5.010635 1843 +greec 1 6 5.164786 5.164786 2208 +holland 1 5 5.347108 5.347108 2490 +publicationsresearch 1 4 5.568345 5.568345 2876 +galileo 1 4 5.568345 5.568345 3086 +microprogram 1 4 5.568345 5.568345 2604 +stefano 7 3 5.857933 41.005531 3372 +kaxira 6 3 5.857933 35.147598 3373 +stein 1 3 5.857933 5.857933 3646 +multiprocess 1 2 6.263398 6.263398 5142 +gjess 1 2 6.263398 6.263398 6156 +kiloprocessor 4 1 6.957497 27.829988 18814 +glow 3 1 6.957497 20.872491 18815 +papakonstantin 3 1 6.957497 20.872491 18816 +tsanaka 2 1 6.957497 13.914994 18817 +sciresearch 1 1 6.957497 6.957497 18818 +collaborationwith 1 1 6.957497 6.957497 18819 +incolabor 1 1 6.957497 6.957497 18820 +goodmanto 1 1 6.957497 6.957497 18821 +kaxirasto 1 1 6.957497 6.957497 18822 +goodmannd 1 1 6.957497 6.957497 18823 +goodmanst 1 1 6.957497 6.957497 18824 +kaxirasunivers 1 1 6.957497 6.957497 18825 +stafylopati 1 1 6.957497 6.957497 18826 +kaxirasinform 1 1 6.957497 6.957497 18827 +pekmestzi 1 1 6.957497 6.957497 18828 +kaxirasp 1 1 6.957497 6.957497 18829 +kaxirasmicroprocess 1 1 6.957497 6.957497 18830 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html new file mode 100644 index 00000000..e829a9ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~keeper^keeper.html @@ -0,0 +1,252 @@ +term, tf, in documents count, idf, tfidf, wordid +page 7 705 0.000000 0.000000 3 +comput 4 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +inform 5 412 0.693147 3.465735 8 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +us 3 329 1.098612 3.295836 16 +student 2 343 1.098612 2.197224 19 +project 2 340 1.098612 2.197224 18 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +list 2 201 1.609438 3.218876 39 +includ 1 208 1.609438 1.609438 42 +updat 1 191 1.609438 1.609438 41 +contact 2 153 1.791759 3.583518 59 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +distribut 1 162 1.791759 1.791759 51 +like 3 132 1.945910 5.837730 81 +note 2 142 1.945910 3.891820 67 +year 2 148 1.945910 3.891820 84 +support 1 132 1.945910 1.945910 83 +object 1 138 1.945910 1.945910 79 +databas 3 122 2.079442 6.238326 86 +number 2 130 2.079442 4.158884 97 +machin 1 129 2.079442 2.079442 95 +postscript 1 131 2.079442 2.079442 90 +document 1 121 2.079442 2.079442 89 +provid 1 121 2.079442 2.079442 94 +world 3 115 2.197225 6.591675 126 +make 3 111 2.197225 6.591675 120 +look 2 107 2.197225 4.394450 115 +structur 1 106 2.197225 2.197225 105 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +person 1 111 2.197225 2.197225 117 +peopl 2 96 2.302585 4.605170 132 +user 2 104 2.302585 4.605170 137 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +section 1 94 2.397895 2.397895 149 +call 1 91 2.397895 2.397895 153 +start 2 83 2.484907 4.969814 173 +contain 1 81 2.484907 2.484907 174 +level 1 87 2.484907 2.484907 180 +institut 1 84 2.484907 2.484907 187 +thing 1 84 2.484907 2.484907 189 +want 2 79 2.564949 5.129898 199 +know 1 80 2.564949 2.564949 198 +interfac 1 79 2.564949 2.564949 209 +come 1 78 2.564949 2.564949 202 +refer 1 78 2.564949 2.564949 203 +sourc 1 77 2.564949 2.564949 201 +write 2 72 2.639057 5.278114 222 +free 1 73 2.639057 2.639057 224 +html 1 75 2.639057 2.639057 235 +would 3 67 2.708050 8.124150 251 +degre 1 69 2.708050 2.708050 259 +java 1 70 2.708050 2.708050 248 +result 1 65 2.772589 2.772589 281 +locat 1 59 2.833213 2.833213 303 +index 3 56 2.890372 8.671116 309 +publish 1 57 2.890372 2.890372 326 +variou 1 56 2.890372 2.890372 317 +think 1 57 2.890372 2.890372 314 +talk 3 53 2.944439 8.833317 336 +allow 2 53 2.944439 5.888878 333 +week 1 52 2.995732 2.995732 343 +date 1 51 2.995732 2.995732 344 +much 1 52 2.995732 2.995732 349 +standard 1 48 3.044522 3.044522 365 +visual 1 48 3.044522 3.044522 372 +give 1 50 3.044522 3.044522 359 +archiv 1 49 3.044522 3.044522 364 +pointer 1 48 3.044522 3.044522 368 +possibl 1 47 3.091042 3.091042 378 +done 1 47 3.091042 3.091042 381 +electron 1 47 3.091042 3.091042 379 +made 2 44 3.135494 6.270988 398 +directori 1 45 3.135494 3.135494 396 +better 1 45 3.135494 3.135494 401 +third 1 43 3.178054 3.178054 412 +long 1 43 3.178054 3.178054 413 +might 1 41 3.218876 3.218876 426 +probabl 4 40 3.258097 13.032388 455 +littl 1 39 3.258097 3.258097 454 +realli 1 40 3.258097 3.258097 444 +programm 1 39 3.258097 3.258097 445 +form 1 39 3.258097 3.258097 443 +small 1 39 3.258097 3.258097 447 +slide 4 38 3.295837 13.183348 467 +especi 1 36 3.367296 3.367296 496 +short 1 36 3.367296 3.367296 499 +soon 1 36 3.367296 3.367296 494 +print 1 34 3.401197 3.401197 503 +taught 1 33 3.433987 3.433987 526 +idea 1 32 3.465736 3.465736 545 +often 1 31 3.496508 3.496508 551 +someth 1 31 3.496508 3.496508 554 +quot 3 29 3.583519 10.750557 582 +pass 1 28 3.610918 3.610918 611 +becom 1 28 3.610918 3.610918 603 +rather 1 26 3.688879 3.688879 642 +relev 1 26 3.688879 3.688879 637 +never 1 25 3.737670 3.737670 671 +hill 1 25 3.737670 3.737670 670 +reliabl 1 25 3.737670 3.737670 674 +notic 1 25 3.737670 3.737670 675 +wish 1 24 3.761200 3.761200 692 +consult 1 24 3.761200 3.761200 687 +displai 1 23 3.806662 3.806662 712 +proof 1 23 3.806662 3.806662 720 +varieti 1 22 3.850148 3.850148 740 +hierarchi 1 22 3.850148 3.850148 744 +properti 1 22 3.850148 3.850148 749 +leav 2 21 3.912023 7.824046 772 +love 1 21 3.912023 3.912023 804 +break 1 20 3.951244 3.951244 812 +ever 2 19 4.007333 8.014666 872 +anderson 1 19 4.007333 4.007333 860 +steven 2 17 4.110874 8.221748 953 +thought 1 17 4.110874 4.110874 945 +anyon 1 17 4.110874 4.110874 916 +anyth 1 16 4.174387 4.174387 998 +doesn 1 15 4.248495 4.248495 1055 +indic 1 15 4.248495 4.248495 1013 +piec 1 15 4.248495 4.248495 1020 +side 1 15 4.248495 4.248495 1022 +anywai 1 15 4.248495 4.248495 1047 +rate 1 15 4.248495 4.248495 1037 +atth 1 15 4.248495 4.248495 1019 +psycholog 1 15 4.248495 4.248495 1054 +convent 1 14 4.317488 4.317488 1072 +attribut 1 14 4.317488 4.317488 1092 +everyth 2 13 4.382027 8.764054 1169 +care 1 13 4.382027 4.382027 1177 +front 1 13 4.382027 4.382027 1154 +unfortun 1 13 4.382027 4.382027 1170 +translat 1 13 4.382027 4.382027 1164 +insid 2 12 4.465908 8.931816 1262 +prelim 1 12 4.465908 4.465908 1201 +skill 1 12 4.465908 4.465908 1205 +uniqu 1 12 4.465908 4.465908 1228 +primit 1 11 4.553877 4.553877 1317 +perman 1 11 4.553877 4.553877 1372 +success 1 10 4.653960 4.653960 1390 +enter 1 10 4.653960 4.653960 1454 +subset 1 10 4.653960 4.653960 1425 +light 1 9 4.753590 4.753590 1533 +end 1 9 4.753590 4.753590 1567 +discov 1 9 4.753590 4.753590 1562 +clear 1 9 4.753590 4.753590 1488 +angel 1 8 4.875197 4.875197 1779 +unifi 1 8 4.875197 4.875197 1774 +gave 3 7 5.010635 15.031905 1922 +surpris 2 7 5.010635 10.021270 1828 +perfect 1 7 5.010635 5.010635 1921 +tag 1 7 5.010635 5.010635 1821 +intellectu 1 7 5.010635 5.010635 1847 +serial 1 7 5.010635 5.010635 1975 +adob 1 7 5.010635 5.010635 1873 +chanc 1 7 5.010635 5.010635 1960 +shot 1 7 5.010635 5.010635 1898 +markup 2 6 5.164786 10.329572 2059 +strang 1 6 5.164786 5.164786 2064 +meant 1 6 5.164786 5.164786 2055 +vari 1 6 5.164786 5.164786 2001 +impress 1 6 5.164786 5.164786 2096 +ifyou 1 6 5.164786 5.164786 1992 +creation 1 6 5.164786 5.164786 2069 +somewher 1 6 5.164786 5.164786 2176 +keeper 2 5 5.347108 10.694216 2569 +stupid 1 5 5.347108 5.347108 2489 +junior 1 5 5.347108 5.347108 2519 +explicitli 1 5 5.347108 5.347108 2308 +hate 1 5 5.347108 5.347108 2529 +bean 1 4 5.568345 5.568345 2968 +hell 1 4 5.568345 5.568345 2885 +suppos 1 4 5.568345 5.568345 3002 +cheap 1 4 5.568345 5.568345 2751 +aliv 2 3 5.857933 11.715866 3864 +heaven 1 3 5.857933 5.857933 3589 +lauri 1 3 5.857933 5.857933 3867 +wasn 1 3 5.857933 5.857933 3800 +argu 1 3 5.857933 5.857933 3698 +rsum 1 3 5.857933 5.857933 3939 +outof 1 3 5.857933 5.857933 3296 +aren 1 3 5.857933 5.857933 3512 +easier 1 3 5.857933 5.857933 3470 +caltech 6 2 6.263398 37.580388 5223 +ironi 1 2 6.263398 6.263398 5986 +sarcasm 1 2 6.263398 6.263398 5871 +offens 1 2 6.263398 6.263398 6168 +miracl 1 2 6.263398 6.263398 5710 +convinc 1 2 6.263398 6.263398 6019 +defunct 1 2 6.263398 6.263398 6162 +personnel 1 2 6.263398 6.263398 4381 +danger 1 2 6.263398 6.263398 5725 +informationag 1 2 6.263398 6.263398 5446 +bui 1 2 6.263398 6.263398 4486 +ofread 1 2 6.263398 6.263398 4417 +possibleto 1 2 6.263398 6.263398 4942 +hedgehog 2 1 6.957497 13.914994 18831 +pager 2 1 6.957497 13.914994 18832 +foughtthei 1 1 6.957497 6.957497 18833 +bitmap 1 1 6.957497 6.957497 18834 +theblind 1 1 6.957497 6.957497 18835 +whateverbrows 1 1 6.957497 6.957497 18836 +literari 1 1 6.957497 6.957497 18837 +satir 1 1 6.957497 6.957497 18838 +butnoth 1 1 6.957497 6.957497 18839 +herein 1 1 6.957497 6.957497 18840 +areoffend 1 1 6.957497 6.957497 18841 +firsttwo 1 1 6.957497 6.957497 18842 +addup 1 1 6.957497 6.957497 18843 +fizzl 1 1 6.957497 6.957497 18844 +areobtain 1 1 6.957497 6.957497 18845 +creatingkiosk 1 1 6.957497 6.957497 18846 +thosewho 1 1 6.957497 6.957497 18847 +mybe 1 1 6.957497 6.957497 18848 +thoughtson 1 1 6.957497 6.957497 18849 +wantto 1 1 6.957497 6.957497 18850 +todo 1 1 6.957497 6.957497 18851 +sporad 1 1 6.957497 6.957497 18852 +danenet 1 1 6.957497 6.957497 18853 +dilhr 1 1 6.957497 6.957497 18854 +jobnet 1 1 6.957497 6.957497 18855 +photonet 1 1 6.957497 6.957497 18856 +databaseus 1 1 6.957497 6.957497 18857 +freez 1 1 6.957497 6.957497 18858 +fought 1 1 6.957497 6.957497 18859 +sfuai 1 1 6.957497 6.957497 18860 +assigna 1 1 6.957497 6.957497 18861 +contextu 1 1 6.957497 6.957497 18862 +distil 1 1 6.957497 6.957497 18863 +rsuminto 1 1 6.957497 6.957497 18864 +pinch 1 1 6.957497 6.957497 18865 +certaintruth 1 1 6.957497 6.957497 18866 +eventuallypick 1 1 6.957497 6.957497 18867 +mull 1 1 6.957497 6.957497 18868 +accessibleto 1 1 6.957497 6.957497 18869 +tough 1 1 6.957497 6.957497 18870 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html new file mode 100644 index 00000000..2303f5dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krisna^krisna.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +last 1 314 1.098612 1.098612 14 +languag 1 227 1.386294 1.386294 26 +modifi 1 178 1.609438 1.609438 35 +read 1 154 1.791759 1.791759 47 +data 1 170 1.791759 1.791759 49 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +perform 2 143 1.945910 3.891820 74 +welcom 1 122 2.079442 2.079442 99 +postscript 1 131 2.079442 2.079442 90 +search 1 95 2.397895 2.397895 155 +music 1 42 3.218876 3.218876 436 +indian 1 22 3.850148 3.850148 769 +classic 1 14 4.317488 4.317488 1084 +gzip 1 6 5.164786 5.164786 2117 +steer 1 5 5.347108 5.347108 2328 +krishna 1 3 5.857933 5.857933 3495 +kunchithapadamkrishna 1 1 6.957497 6.957497 18871 +kunchithapadamgreet 1 1 6.957497 6.957497 18872 +miscellaneouspubl 1 1 6.957497 6.957497 18873 +toolsresum 1 1 6.957497 6.957497 18874 +bykk 1 1 6.957497 6.957497 18875 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html new file mode 100644 index 00000000..8359ab72 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kristint^kristint.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 2 431 0.693147 1.386294 10 +inform 2 412 0.693147 1.386294 8 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +wisc 2 242 1.386294 2.772588 33 +modifi 1 178 1.609438 1.609438 35 +madison 3 165 1.791759 5.375277 55 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +server 1 76 2.564949 2.564949 204 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +sigmod 1 19 4.007333 4.007333 877 +dbm 1 13 4.382027 4.382027 1136 +dewitt 1 12 4.465908 4.465908 1270 +tuft 4 5 5.347108 21.388432 2575 +kristin 2 4 5.568345 11.136690 3089 +miscellani 1 3 5.857933 5.857933 3976 +pagekristin 1 1 6.957497 6.957497 18876 +eduadvisor 1 1 6.957497 6.957497 18877 +serveruw 1 1 6.957497 6.957497 18878 +groupacm 1 1 6.957497 6.957497 18879 +pageeo 1 1 6.957497 6.957497 18880 +officelast 1 1 6.957497 6.957497 18881 +tuftekristin 1 1 6.957497 6.957497 18882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html new file mode 100644 index 00000000..54b4b796 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~krung^krung.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +work 2 380 0.693147 1.386294 9 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +cours 2 273 1.098612 2.197224 15 +project 1 340 1.098612 1.098612 18 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +wisconsin 2 169 1.791759 3.583518 54 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +person 2 111 2.197225 4.394450 117 +topic 1 114 2.197225 2.197225 110 +mathemat 1 108 2.197225 2.197225 123 +follow 2 92 2.397895 4.795790 143 +homepag 1 93 2.397895 2.397895 148 +novemb 1 81 2.484907 2.484907 179 +librari 1 87 2.484907 2.484907 181 +good 1 77 2.564949 2.564949 200 +import 1 65 2.772589 2.772589 282 +life 1 50 3.044522 3.044522 375 +electron 1 47 3.091042 3.091042 379 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +compani 1 41 3.218876 3.218876 423 +short 1 36 3.367296 3.367296 499 +whole 1 17 4.110874 4.110874 940 +hobbi 1 16 4.174387 4.174387 1009 +uniqu 1 12 4.465908 4.465908 1228 +opinion 1 8 4.875197 4.875197 1708 +pursu 1 7 5.010635 5.010635 1902 +entiti 1 3 5.857933 5.857933 3096 +krung 3 1 6.957497 20.872491 18883 +homepageupd 1 1 6.957497 6.957497 18884 +underconstructioni 1 1 6.957497 6.957497 18885 +serf 1 1 6.957497 6.957497 18886 +cometh 1 1 6.957497 6.957497 18887 +linkedth 1 1 6.957497 6.957497 18888 +sinapiromsaran 1 1 6.957497 6.957497 18889 +emailkrung 1 1 6.957497 6.957497 18890 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html new file mode 100644 index 00000000..49cc43ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~kunen^kunen.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +program 3 374 0.693147 2.079441 7 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +group 2 183 1.609438 3.218876 36 +public 1 202 1.609438 1.609438 43 +fall 1 181 1.609438 1.609438 40 +wisconsin 3 169 1.791759 5.375277 54 +applic 3 170 1.791759 5.375277 56 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +like 1 132 1.945910 1.945910 81 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +file 1 132 1.945910 1.945910 70 +construct 1 139 1.945910 1.945910 82 +note 1 142 1.945910 1.945910 67 +report 2 131 2.079442 4.158884 92 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +studi 1 120 2.079442 2.079442 91 +postscript 1 131 2.079442 2.079442 90 +spring 1 131 2.079442 2.079442 88 +mathemat 7 108 2.197225 15.380575 123 +theori 5 111 2.197225 10.986125 127 +specif 1 106 2.197225 2.197225 106 +topic 1 114 2.197225 2.197225 110 +structur 1 106 2.197225 2.197225 105 +technic 2 100 2.302585 4.605170 140 +book 1 99 2.302585 2.302585 131 +mani 1 92 2.397895 2.397895 150 +question 1 91 2.397895 2.397895 141 +select 1 91 2.397895 2.397895 154 +follow 1 92 2.397895 2.397895 143 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +chang 1 82 2.484907 2.484907 163 +appear 5 78 2.564949 12.824745 210 +complet 1 77 2.564949 2.564949 208 +logic 6 71 2.639057 15.834342 230 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +abstract 1 62 2.772589 2.772589 276 +function 1 62 2.772589 2.772589 275 +result 1 65 2.772589 2.772589 281 +foundat 1 62 2.772589 2.772589 286 +artifici 1 63 2.772589 2.772589 280 +reason 5 57 2.890372 14.451860 318 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +space 1 57 2.890372 2.890372 310 +local 1 55 2.944439 2.944439 334 +extens 1 53 2.944439 2.944439 340 +right 1 48 3.044522 3.044522 363 +basic 1 50 3.044522 3.044522 360 +algebra 2 45 3.135494 6.270988 394 +math 2 44 3.135494 6.270988 402 +answer 1 45 3.135494 3.135494 391 +autom 7 41 3.218876 22.532132 434 +review 2 42 3.218876 6.437752 425 +press 1 42 3.218876 3.218876 419 +theoret 1 39 3.258097 3.258097 446 +close 1 38 3.295837 3.295837 465 +singl 2 34 3.401197 6.802394 510 +taught 1 33 3.433987 3.433987 526 +independ 1 32 3.465736 3.465736 548 +semant 3 29 3.583519 10.750557 587 +consid 1 29 3.583519 3.583519 590 +turn 1 29 3.583519 3.583519 586 +measur 3 28 3.610918 10.832754 609 +american 1 27 3.637586 3.637586 634 +comp 1 26 3.688879 3.688879 650 +theorem 2 21 3.912023 7.824046 786 +prove 1 19 4.007333 4.007333 848 +geometr 1 19 4.007333 4.007333 852 +failur 1 18 4.060443 4.060443 898 +stanford 1 17 4.110874 4.110874 955 +moor 1 17 4.110874 4.110874 936 +style 1 15 4.248495 4.248495 1036 +topolog 2 14 4.317488 8.634976 1089 +draft 1 14 4.317488 4.317488 1085 +prolog 1 13 4.382027 4.382027 1155 +resolut 1 13 4.382027 4.382027 1172 +weak 1 13 4.382027 4.382027 1159 +deduct 3 12 4.465908 13.397724 1236 +kenneth 1 12 4.465908 4.465908 1265 +loop 2 11 4.553877 9.107754 1310 +typic 1 11 4.553877 4.553877 1360 +fix 1 11 4.553877 4.553877 1327 +edutelephon 1 10 4.653960 4.653960 1473 +besid 1 8 4.875197 4.875197 1681 +compact 1 7 5.010635 5.010635 1907 +boyer 1 6 5.164786 5.164786 2013 +rough 1 6 5.164786 5.164786 2107 +infer 1 6 5.164786 5.164786 2040 +shortest 1 5 5.347108 5.347108 2424 +constant 1 5 5.347108 5.347108 2251 +kunen 17 3 5.857933 99.584861 3500 +mathematica 2 3 5.857933 11.715866 3303 +preprint 2 3 5.857933 11.715866 3481 +axiomat 1 3 5.857933 5.857933 3288 +monthli 1 3 5.857933 5.857933 3910 +expon 2 2 6.263398 12.526796 5323 +negat 1 2 6.263398 6.263398 6073 +mill 1 2 6.263398 6.263398 6193 +liter 1 2 6.263398 6.263398 4689 +law 1 2 6.263398 6.263398 4896 +hart 4 1 6.957497 27.829988 18891 +axiom 3 1 6.957497 20.872491 18892 +fundamenta 2 1 6.957497 13.914994 18893 +quasigroup 2 1 6.957497 13.914994 18894 +professormath 1 1 6.957497 6.957497 18895 +resolutionto 1 1 6.957497 6.957497 18896 +likeprolog 1 1 6.957497 6.957497 18897 +prologus 1 1 6.957497 6.957497 18898 +incompat 1 1 6.957497 6.957497 18899 +betweenleast 1 1 6.957497 6.957497 18900 +backtrack 1 1 6.957497 6.957497 18901 +thissubject 1 1 6.957497 6.957497 18902 +usualaxiom 1 1 6.957497 6.957497 18903 +ramsei 1 1 6.957497 6.957497 18904 +corson 1 1 6.957497 6.957497 18905 +moufang 1 1 6.957497 6.957497 18906 +conjugaci 1 1 6.957497 6.957497 18907 +moschovaki 1 1 6.957497 6.957497 18908 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html new file mode 100644 index 00000000..79740dcc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~larus^larus.html @@ -0,0 +1,246 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 18 374 0.693147 12.476646 7 +research 9 431 0.693147 6.238323 10 +system 7 443 0.693147 4.852029 6 +interest 3 384 0.693147 2.079441 11 +project 4 340 1.098612 4.394448 18 +cours 2 273 1.098612 2.197224 15 +offic 1 299 1.098612 1.098612 13 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +languag 11 227 1.386294 15.249234 26 +softwar 6 220 1.386294 8.317764 30 +wisc 3 242 1.386294 4.158882 33 +design 2 213 1.386294 2.772588 25 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +oper 3 180 1.609438 4.828314 34 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +parallel 12 169 1.791759 21.501108 60 +wisconsin 4 169 1.791759 7.167036 54 +data 4 170 1.791759 7.167036 49 +implement 3 152 1.791759 5.375277 52 +develop 3 174 1.791759 5.375277 53 +recent 2 167 1.791759 3.583518 58 +applic 2 170 1.791759 3.583518 56 +distribut 2 162 1.791759 3.583518 51 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +support 7 132 1.945910 13.621370 83 +architectur 5 139 1.945910 9.729550 77 +first 3 140 1.945910 5.837730 71 +perform 2 143 1.945910 3.891820 74 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +compil 8 122 2.079442 16.635536 96 +confer 5 126 2.079442 10.397210 100 +tool 5 117 2.079442 10.397210 93 +machin 3 129 2.079442 6.238326 95 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +spring 1 131 2.079442 2.079442 88 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +intern 5 108 2.197225 10.986125 128 +specif 1 106 2.197225 2.197225 106 +memori 9 101 2.302585 20.723265 139 +user 2 104 2.302585 4.605170 137 +access 1 102 2.302585 2.302585 136 +techniqu 1 99 2.302585 2.302585 138 +part 1 98 2.302585 2.302585 129 +octob 3 89 2.397895 7.193685 156 +associ 1 93 2.397895 2.397895 151 +control 4 82 2.484907 9.939628 164 +novemb 3 81 2.484907 7.454721 179 +educ 2 86 2.484907 4.969814 191 +larg 2 82 2.484907 4.969814 168 +level 2 87 2.484907 4.969814 180 +west 1 83 2.484907 2.484907 192 +librari 1 87 2.484907 2.484907 181 +ieee 1 86 2.484907 2.484907 190 +help 1 83 2.484907 2.484907 175 +appear 2 78 2.564949 5.129898 210 +decemb 1 80 2.564949 2.564949 215 +june 1 79 2.564949 2.564949 214 +messag 1 76 2.564949 2.564949 212 +dynam 1 76 2.564949 2.564949 194 +david 6 71 2.639057 15.834342 232 +effici 2 73 2.639057 5.278114 233 +symposium 2 72 2.639057 5.278114 238 +summari 1 73 2.639057 2.639057 237 +write 1 72 2.639057 2.639057 222 +workshop 1 71 2.639057 2.639057 239 +august 5 66 2.708050 13.540250 257 +java 1 70 2.708050 2.708050 248 +new 1 64 2.772589 2.772589 262 +septemb 1 65 2.772589 2.772589 274 +evalu 1 64 2.772589 2.772589 266 +improv 1 62 2.772589 2.772589 289 +share 6 59 2.833213 16.999278 304 +juli 2 60 2.833213 5.666426 305 +colleg 2 61 2.833213 5.666426 300 +march 1 61 2.833213 2.833213 295 +automat 1 61 2.833213 2.833213 306 +instruct 1 53 2.944439 2.944439 332 +februari 1 54 2.944439 2.944439 328 +hardwar 2 51 2.995732 5.991464 350 +particular 1 51 2.995732 2.995732 352 +principl 1 48 3.044522 3.044522 357 +frequent 1 49 3.044522 3.044522 367 +california 2 46 3.091042 6.182084 388 +understand 1 47 3.091042 3.091042 384 +possibl 1 47 3.091042 3.091042 378 +mark 5 44 3.135494 15.677470 403 +execut 4 45 3.135494 12.541976 404 +protocol 2 45 3.135494 6.270988 407 +better 1 45 3.135494 3.135494 401 +edit 4 42 3.218876 12.875504 418 +press 1 42 3.218876 3.218876 419 +annual 1 40 3.258097 3.258097 458 +transact 1 39 3.258097 3.258097 438 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 17 35 3.401197 57.820349 507 +bibliographi 1 34 3.401197 3.401197 518 +within 1 33 3.433987 3.433987 525 +independ 1 32 3.465736 3.465736 548 +richard 4 31 3.496508 13.986032 559 +profil 4 30 3.555348 14.221392 581 +power 1 30 3.555348 3.555348 573 +depend 1 29 3.583519 3.583519 583 +focus 1 29 3.583519 3.583519 584 +pass 1 28 3.610918 3.610918 611 +multiprocessor 1 28 3.610918 3.610918 605 +symbol 1 27 3.637586 3.637586 620 +static 1 27 3.637586 3.637586 619 +berkelei 2 26 3.688879 7.377758 657 +revis 1 26 3.688879 3.688879 640 +hill 5 25 3.737670 18.688350 670 +trace 1 25 3.737670 3.737670 677 +supercomput 1 25 3.737670 3.737670 681 +spent 1 25 3.737670 3.737670 676 +lab 2 24 3.761200 7.522400 698 +flow 2 24 3.761200 7.522400 700 +scalabl 1 24 3.761200 3.761200 705 +demonstr 1 24 3.761200 3.761200 694 +cooper 1 22 3.850148 3.850148 757 +path 2 21 3.912023 7.824046 778 +annot 2 21 3.912023 7.824046 775 +programminglanguag 1 21 3.912023 3.912023 782 +department 1 20 3.951244 3.951244 839 +portabl 1 20 3.951244 3.951244 819 +fine 1 20 3.951244 3.951244 822 +exploit 1 20 3.951244 3.951244 836 +eric 2 19 4.007333 8.014666 870 +wind 3 18 4.060443 12.181329 908 +thoma 2 18 4.060443 8.120886 901 +steven 3 17 4.110874 12.332622 953 +asplo 3 17 4.110874 12.332622 948 +micro 2 15 4.248495 8.496990 1031 +eduphon 1 15 4.248495 4.248495 1060 +hybrid 1 15 4.248495 4.248495 1057 +coher 1 14 4.317488 4.317488 1109 +sigplan 3 13 4.382027 13.146081 1190 +employ 4 12 4.465908 17.863632 1291 +brad 3 12 4.465908 13.397724 1264 +wood 6 11 4.553877 27.323262 1355 +branch 1 11 4.553877 4.553877 1318 +grain 3 10 4.653960 13.961880 1448 +facilit 1 10 4.653960 4.653960 1412 +laru 20 9 4.753590 95.071800 1560 +tunnel 3 9 4.753590 14.260770 1615 +ball 3 9 4.753590 14.260770 1608 +wilson 1 9 4.753590 4.753590 1536 +routin 1 9 4.753590 4.753590 1549 +pldi 2 8 4.875197 9.750394 1704 +secretari 1 8 4.875197 4.875197 1775 +upcom 1 8 4.875197 4.875197 1685 +irregular 1 8 4.875197 4.875197 1768 +joel 1 8 4.875197 4.875197 1698 +insert 1 8 4.875197 4.875197 1687 +sixth 3 7 5.010635 15.031905 1917 +roger 2 7 5.010635 10.021270 1892 +harvard 1 7 5.010635 5.010635 1926 +fifth 1 7 5.010635 5.010635 1931 +chandra 2 6 5.164786 10.329572 2091 +ann 2 6 5.164786 10.329572 2065 +bell 2 6 5.164786 10.329572 2224 +sciencedepart 1 6 5.164786 5.164786 2172 +microarchitectur 1 6 5.164786 5.164786 2238 +unpublish 1 6 5.164786 5.164786 2226 +lebeck 3 5 5.347108 16.041324 2582 +reinhardt 3 5 5.347108 16.041324 2583 +forprogram 3 5 5.347108 16.041324 2361 +babak 2 5 5.347108 10.694216 2584 +falsafi 2 5 5.347108 10.694216 2585 +ioanni 2 5 5.347108 10.694216 2553 +mukherje 1 5 5.347108 5.347108 2586 +tempest 1 5 5.347108 5.347108 2548 +icpp 1 5 5.347108 5.347108 2382 +toc 1 5 5.347108 5.347108 2562 +summarymi 1 5 5.347108 5.347108 2580 +computerarchitectur 1 5 5.347108 5.347108 2290 +alvin 3 4 5.568345 16.705035 3084 +satish 2 4 5.568345 11.136690 2833 +schoina 2 4 5.568345 11.136690 3085 +languagesand 1 4 5.568345 5.568345 3071 +gregori 1 4 5.568345 5.568345 2928 +sharma 1 4 5.568345 5.568345 2752 +ppopp 1 4 5.568345 5.568345 2774 +substrat 1 4 5.568345 5.568345 2857 +compcon 1 4 5.568345 5.568345 2958 +markhil 1 4 5.568345 5.568345 2819 +manuscript 1 4 5.568345 5.568345 2750 +wart 1 4 5.568345 5.568345 2987 +talluri 1 4 5.568345 5.568345 2820 +oracl 1 4 5.568345 5.568345 2823 +andi 1 4 5.568345 5.568345 3081 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +wcsss 1 3 5.857933 5.857933 3956 +shubhendu 1 3 5.857933 5.857933 4028 +saltz 1 3 5.857933 5.857933 3385 +frequenc 1 3 5.857933 5.857933 3206 +trishul 1 3 5.857933 5.857933 4016 +chilimbi 1 3 5.857933 5.857933 4015 +madhusudhan 1 3 5.857933 5.857933 4021 +parallelmachin 1 3 5.857933 5.857933 3693 +moredetail 1 3 5.857933 5.857933 3854 +guhan 3 2 6.263398 18.790194 6169 +viswanathan 3 2 6.263398 18.790194 6170 +schnarr 2 2 6.263398 12.526796 6194 +lorenz 2 2 6.263398 12.526796 4846 +shamik 1 2 6.263398 6.263398 6195 +cico 1 2 6.263398 6.263398 6120 +thewisconsin 1 2 6.263398 6.263398 6196 +usalaru 1 1 6.957497 6.957497 18909 +structuresc 1 1 6.957497 6.957497 18910 +spim 1 1 6.957497 6.957497 18911 +wartsrec 1 1 6.957497 6.957497 18912 +paperseffici 1 1 6.957497 6.957497 18913 +teapot 1 1 6.957497 6.957497 18914 +andjam 1 1 6.957497 6.957497 18915 +annerog 1 1 6.957497 6.957497 18916 +practiceof 1 1 6.957497 6.957497 18917 +languagesdesign 1 1 6.957497 6.957497 18918 +youfeng 1 1 6.957497 6.957497 18919 +jameslaru 1 1 6.957497 6.957497 18920 +cachier 1 1 6.957497 6.957497 18921 +graduatesbrad 1 1 6.957497 6.957497 18922 +vassar 1 1 6.957497 6.957497 18923 +languagesfirst 1 1 6.957497 6.957497 18924 +huelsbergen 1 1 6.957497 6.957497 18925 +tball 1 1 6.957497 6.957497 18926 +havehelp 1 1 6.957497 6.957497 18927 +coherencepolici 1 1 6.957497 6.957497 18928 +programmersunderstand 1 1 6.957497 6.957497 18929 +hasidentifi 1 1 6.957497 6.957497 18930 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html new file mode 100644 index 00000000..e0f9165c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~leavy^leavy.html @@ -0,0 +1,12 @@ +term, tf, in documents count, idf, tfidf, wordid +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +tuesdai 1 73 2.639057 2.639057 219 +eduoffic 1 33 3.433987 3.433987 531 +nick 1 13 4.382027 4.382027 1180 +leavi 1 3 5.857933 5.857933 3438 +pagenick 1 1 6.957497 6.957497 18931 +pageoffic 1 1 6.957497 6.957497 18932 +wednessdai 1 1 6.957497 6.957497 18933 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html new file mode 100644 index 00000000..6acd35d9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lederman^lederman.html @@ -0,0 +1,78 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +project 1 340 1.098612 1.098612 18 +current 1 284 1.098612 1.098612 21 +offic 1 299 1.098612 1.098612 13 +also 2 259 1.386294 2.772588 28 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +includ 1 208 1.609438 1.609438 42 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +parallel 1 169 1.791759 1.791759 60 +recent 1 167 1.791759 1.791759 58 +avail 1 169 1.791759 1.791759 48 +phone 1 175 1.791759 1.791759 45 +file 2 132 1.945910 3.891820 70 +relat 1 139 1.945910 1.945910 68 +area 1 144 1.945910 1.945910 80 +document 1 121 2.079442 2.079442 89 +postscript 1 131 2.079442 2.079442 90 +dayton 1 119 2.079442 2.079442 104 +look 1 107 2.197225 2.197225 115 +pleas 1 113 2.197225 2.197225 114 +book 2 99 2.302585 4.605170 131 +member 1 84 2.484907 2.484907 165 +build 1 85 2.484907 2.484907 184 +complet 2 77 2.564949 5.129898 208 +sourc 2 77 2.564949 5.129898 201 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +html 1 75 2.639057 2.639057 235 +order 1 69 2.708050 2.708050 249 +would 1 67 2.708050 2.708050 251 +dept 1 64 2.772589 2.772589 291 +sever 1 56 2.890372 2.890372 322 +publish 1 57 2.890372 2.890372 326 +finger 1 52 2.995732 2.995732 354 +algebra 1 45 3.135494 3.135494 394 +keep 1 44 3.135494 3.135494 409 +linear 1 41 3.218876 3.218876 431 +press 1 42 3.218876 3.218876 419 +editor 1 41 3.218876 3.218876 433 +http 1 41 3.218876 3.218876 420 +origin 1 38 3.295837 3.295837 472 +committe 1 34 3.401197 3.401197 522 +statist 1 35 3.401197 3.401197 521 +univ 2 28 3.610918 7.221836 617 +intend 1 28 3.610918 3.610918 599 +mind 1 27 3.637586 3.637586 632 +other 1 24 3.761200 3.761200 697 +compress 1 23 3.806662 3.806662 719 +wind 1 18 4.060443 4.060443 908 +steven 3 17 4.110874 12.332622 953 +draft 1 14 4.317488 4.317488 1085 +individu 1 13 4.382027 4.382027 1126 +tunnel 1 9 4.753590 4.753590 1615 +isbn 1 7 5.010635 5.010635 1901 +forum 3 6 5.164786 15.494358 2027 +ongo 1 6 5.164786 5.164786 2215 +heavili 1 3 5.857933 5.857933 3572 +andit 1 3 5.857933 5.857933 3328 +thewisconsin 1 2 6.263398 6.263398 6196 +lederman 6 1 6.957497 41.744982 18934 +huss 3 1 6.957497 20.872491 18935 +mpistandard 2 1 6.957497 13.914994 18936 +iscov 1 1 6.957497 6.957497 18937 +prismproject 1 1 6.957497 6.957497 18938 +invol 1 1 6.957497 6.957497 18939 +ongoingwork 1 1 6.957497 6.957497 18940 +compressedtar 1 1 6.957497 6.957497 18941 +desper 1 1 6.957497 6.957497 18942 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html new file mode 100644 index 00000000..627ba73f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lhl^lhl.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +time 2 293 1.098612 2.197224 17 +design 3 213 1.386294 4.158882 25 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +network 6 168 1.791759 10.750554 61 +wisconsin 2 169 1.791759 3.583518 54 +implement 2 152 1.791759 3.583518 52 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +high 2 130 2.079442 4.158884 101 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +proceed 3 93 2.397895 7.193685 152 +control 3 82 2.484907 7.454721 164 +second 1 81 2.484907 2.484907 166 +ieee 1 86 2.484907 2.484907 190 +dynam 2 76 2.564949 5.129898 194 +april 1 77 2.564949 2.564949 196 +summari 1 73 2.639057 2.639057 237 +nation 1 74 2.639057 2.639057 240 +window 2 68 2.708050 5.416100 242 +august 1 66 2.708050 2.708050 257 +virtual 3 62 2.772589 8.317767 285 +sampl 1 53 2.944439 2.944439 339 +tabl 1 51 2.995732 2.995732 346 +telephon 1 50 3.044522 3.044522 373 +protocol 2 45 3.135494 6.270988 407 +fast 1 42 3.218876 3.218876 429 +combin 1 42 3.218876 3.218876 421 +close 1 38 3.295837 3.295837 465 +open 1 38 3.295837 3.295837 469 +focus 1 29 3.583519 3.583519 584 +feedback 1 19 4.007333 4.007333 854 +speed 2 18 4.060443 8.120886 911 +rate 1 15 4.248495 4.248495 1037 +francisco 1 14 4.317488 4.317488 1095 +circuit 1 13 4.382027 4.382027 1131 +loop 2 11 4.553877 9.107754 1310 +clock 1 11 4.553877 4.553877 1320 +purdu 1 10 4.653960 4.653960 1466 +packet 1 10 4.653960 4.653960 1415 +lawrenc 2 7 5.010635 10.021270 1908 +conferenc 1 7 5.010635 5.010635 1857 +mukherje 2 5 5.347108 10.694216 2586 +testb 1 5 5.347108 5.347108 2456 +admiss 2 4 5.568345 11.136690 2704 +darpa 1 4 5.568345 5.568345 2944 +phenomena 1 4 5.568345 5.568345 2962 +landweb 2 3 5.857933 11.715866 3402 +congest 2 3 5.857933 11.715866 3993 +infocom 2 3 5.857933 11.715866 3283 +atmospher 1 3 5.857933 5.857933 3388 +baltimor 1 3 5.857933 5.857933 3809 +theieee 1 2 6.263398 6.263398 6043 +florenc 1 2 6.263398 6.263398 4950 +faber 2 1 6.957497 13.914994 18943 +electronicmail 1 1 6.957497 6.957497 18944 +participatingin 1 1 6.957497 6.957497 18945 +gigabit 1 1 6.957497 6.957497 18946 +involvesth 1 1 6.957497 6.957497 18947 +atgigabit 1 1 6.957497 6.957497 18948 +onissu 1 1 6.957497 6.957497 18949 +visualizationof 1 1 6.957497 6.957497 18950 +establishmentmethod 1 1 6.957497 6.957497 18951 +olsen 1 1 6.957497 6.957497 18952 +witht 1 1 6.957497 6.957497 18953 +sigcommconfer 1 1 6.957497 6.957497 18954 +coursesconnect 1 1 6.957497 6.957497 18955 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html new file mode 100644 index 00000000..7479bdd6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lloyd^lloyd.html @@ -0,0 +1,50 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +work 1 380 0.693147 0.693147 9 +us 1 329 1.098612 1.098612 16 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +languag 2 227 1.386294 2.772588 26 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +madison 2 165 1.791759 3.583518 55 +address 1 170 1.791759 1.791759 62 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +lectur 1 135 1.945910 1.945910 73 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +person 1 111 2.197225 2.197225 117 +section 1 94 2.397895 2.397895 149 +west 1 83 2.484907 2.484907 192 +servic 1 72 2.639057 2.639057 236 +intellig 1 72 2.639057 2.639057 225 +thursdai 1 70 2.708050 2.708050 241 +street 1 63 2.772589 2.772589 293 +wednesdai 1 64 2.772589 2.772589 261 +artifici 1 63 2.772589 2.772589 280 +variou 1 56 2.890372 2.890372 317 +appoint 1 49 3.044522 3.044522 358 +archiv 1 49 3.044522 3.044522 364 +natur 1 44 3.135494 3.135494 406 +respons 1 37 3.332205 3.332205 476 +comp 2 26 3.688879 7.377758 650 +women 1 16 4.174387 4.174387 1004 +cognit 1 16 4.174387 4.174387 986 +career 1 12 4.465908 4.465908 1287 +linguist 3 9 4.753590 14.260770 1593 +utah 2 9 4.753590 9.507180 1585 +lloyd 3 6 5.164786 15.494358 2103 +chemistri 1 5 5.347108 5.347108 2405 +shannon 2 1 6.957497 13.914994 18956 +xsoft 1 1 6.957497 6.957497 18957 +lexdemo 1 1 6.957497 6.957497 18958 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html new file mode 100644 index 00000000..0dbf8d48 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lukas^lukas.html @@ -0,0 +1,194 @@ +term, tf, in documents count, idf, tfidf, wordid +page 4 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +cours 1 273 1.098612 1.098612 15 +current 1 284 1.098612 1.098612 21 +also 2 259 1.386294 2.772588 28 +mail 2 238 1.386294 2.772588 22 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +languag 1 227 1.386294 1.386294 26 +fall 2 181 1.609438 3.218876 40 +list 2 201 1.609438 3.218876 39 +includ 2 208 1.609438 3.218876 42 +phone 1 175 1.791759 1.791759 45 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +click 2 142 1.945910 3.891820 78 +like 2 132 1.945910 3.891820 81 +number 1 130 2.079442 2.079442 97 +technolog 1 131 2.079442 2.079442 102 +check 2 115 2.197225 4.394450 118 +site 1 106 2.197225 2.197225 119 +well 1 109 2.197225 2.197225 121 +pleas 1 113 2.197225 2.197225 114 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +search 2 95 2.397895 4.795790 155 +real 1 93 2.397895 2.397895 144 +pictur 1 89 2.397895 2.397895 160 +thing 2 84 2.484907 4.969814 189 +know 2 80 2.564949 5.129898 198 +good 1 77 2.564949 2.564949 200 +meet 2 72 2.639057 5.278114 229 +free 1 73 2.639057 2.639057 224 +name 1 72 2.639057 2.639057 220 +appli 1 71 2.639057 2.639057 226 +html 1 75 2.639057 2.639057 235 +java 1 70 2.708050 2.708050 248 +interact 2 62 2.772589 5.545178 270 +virtual 1 62 2.772589 2.772589 285 +organ 1 65 2.772589 2.772589 265 +automat 1 61 2.833213 2.833213 306 +semest 1 58 2.890372 2.890372 312 +space 1 57 2.890372 2.890372 310 +advisor 1 51 2.995732 2.995732 355 +maintain 1 51 2.995732 2.995732 342 +case 1 51 2.995732 2.995732 351 +much 1 52 2.995732 2.995732 349 +cool 3 49 3.044522 9.133566 374 +friend 1 48 3.044522 3.044522 376 +electron 1 47 3.091042 3.091042 379 +favorit 1 44 3.135494 3.135494 410 +realli 2 40 3.258097 6.516194 444 +submit 1 39 3.258097 3.258097 440 +seminar 1 38 3.295837 3.295837 470 +feel 2 37 3.332205 6.664410 483 +go 1 33 3.433987 3.433987 529 +taught 1 33 3.433987 3.433987 526 +someth 1 31 3.496508 3.496508 554 +option 4 30 3.555348 14.221392 575 +becom 1 28 3.610918 3.610918 603 +concern 1 25 3.737670 3.737670 666 +higher 1 24 3.761200 3.761200 690 +longer 1 20 3.951244 3.951244 816 +figur 1 18 4.060443 4.060443 903 +women 2 16 4.174387 8.348774 1004 +stock 1 16 4.174387 4.174387 1007 +todd 1 15 4.248495 4.248495 1051 +anywai 1 15 4.248495 4.248495 1047 +mayb 1 15 4.248495 4.248495 1014 +save 1 14 4.317488 4.317488 1099 +edui 1 13 4.382027 4.382027 1193 +coordin 1 13 4.382027 4.382027 1182 +wait 1 13 4.382027 4.382027 1168 +wife 1 13 4.382027 4.382027 1196 +entertain 2 12 4.465908 8.931816 1286 +basketbal 1 12 4.465908 4.465908 1289 +readi 1 12 4.465908 4.465908 1242 +food 1 12 4.465908 4.465908 1285 +fill 3 11 4.553877 13.661631 1349 +market 1 11 4.553877 4.553877 1361 +keyword 1 11 4.553877 4.553877 1356 +candid 1 9 4.753590 4.753590 1606 +jump 1 9 4.753590 4.753590 1603 +simpli 1 8 4.875197 4.875197 1626 +reload 1 8 4.875197 4.875197 1682 +appar 1 7 5.010635 5.010635 1958 +iowa 1 7 5.010635 5.010635 1971 +polit 1 6 5.164786 5.164786 2115 +legal 1 6 5.164786 5.164786 2094 +troubl 1 6 5.164786 5.164786 2002 +christoph 1 5 5.347108 5.347108 2512 +czar 1 5 5.347108 5.347108 2503 +tuft 1 5 5.347108 5.347108 2575 +amus 1 5 5.347108 5.347108 2366 +sing 1 5 5.347108 5.347108 2499 +gui 1 5 5.347108 5.347108 2573 +girlfriend 1 5 5.347108 5.347108 2579 +festiv 1 4 5.568345 5.568345 2952 +turnidg 1 4 5.568345 5.568345 2829 +superhighwai 1 4 5.568345 5.568345 2943 +chees 1 4 5.568345 5.568345 3090 +rival 1 3 5.857933 5.857933 3583 +tiger 1 3 5.857933 5.857933 3897 +wealth 1 3 5.857933 5.857933 3353 +traci 1 3 5.857933 5.857933 3984 +child 1 3 5.857933 5.857933 3542 +tast 1 3 5.857933 5.857933 3666 +kick 1 3 5.857933 5.857933 3962 +alien 1 3 5.857933 5.857933 3930 +laugh 1 3 5.857933 5.857933 3659 +defeat 1 2 6.263398 6.263398 5401 +kirk 1 2 6.263398 6.263398 6175 +killer 1 2 6.263398 6.263398 6159 +tragic 1 2 6.263398 6.263398 6114 +junki 1 2 6.263398 6.263398 5457 +outer 1 2 6.263398 6.263398 4464 +gross 1 2 6.263398 6.263398 5989 +nake 1 2 6.263398 6.263398 6197 +offspr 1 2 6.263398 6.263398 5699 +bogu 1 2 6.263398 6.263398 5471 +wierd 1 2 6.263398 6.263398 6093 +luka 2 1 6.957497 13.914994 18959 +lone 2 1 6.957497 13.914994 18960 +checkbox 2 1 6.957497 13.914994 18961 +pagechristoph 1 1 6.957497 6.957497 18962 +lukasrelev 1 1 6.957497 6.957497 18963 +mspl 1 1 6.957497 6.957497 18964 +workshipi 1 1 6.957497 6.957497 18965 +quest 1 1 6.957497 6.957497 18966 +sunivers 1 1 6.957497 6.957497 18967 +prisonerthi 1 1 6.957497 6.957497 18968 +quoteserv 1 1 6.957497 6.957497 18969 +fabul 1 1 6.957497 6.957497 18970 +pagebet 1 1 6.957497 6.957497 18971 +identitycaptain 1 1 6.957497 6.957497 18972 +throughamaz 1 1 6.957497 6.957497 18973 +withtri 1 1 6.957497 6.957497 18974 +teri 1 1 6.957497 6.957497 18975 +incred 1 1 6.957497 6.957497 18976 +catthi 1 1 6.957497 6.957497 18977 +buttmunchextrem 1 1 6.957497 6.957497 18978 +dudemichael 1 1 6.957497 6.957497 18979 +nesmith 1 1 6.957497 6.957497 18980 +fanfoolmyth 1 1 6.957497 6.957497 18981 +figurewick 1 1 6.957497 6.957497 18982 +playervalu 1 1 6.957497 6.957497 18983 +studentment 1 1 6.957497 6.957497 18984 +defectivea 1 1 6.957497 6.957497 18985 +wkrp 1 1 6.957497 6.957497 18986 +cincinatti 1 1 6.957497 6.957497 18987 +figuregeek 1 1 6.957497 6.957497 18988 +tradesgonzo 1 1 6.957497 6.957497 18989 +admirernetscap 1 1 6.957497 6.957497 18990 +pornpersonifi 1 1 6.957497 6.957497 18991 +condom 1 1 6.957497 6.957497 18992 +stretch 1 1 6.957497 6.957497 18993 +blowflam 1 1 6.957497 6.957497 18994 +testicl 1 1 6.957497 6.957497 18995 +goodpoetri 1 1 6.957497 6.957497 18996 +guruhogwildthi 1 1 6.957497 6.957497 18997 +assman 1 1 6.957497 6.957497 18998 +manbig 1 1 6.957497 6.957497 18999 +dudeuh 1 1 6.957497 6.957497 19000 +ohprofession 1 1 6.957497 6.957497 19001 +muff 1 1 6.957497 6.957497 19002 +diverregress 1 1 6.957497 6.957497 19003 +lifeformherald 1 1 6.957497 6.957497 19004 +invas 1 1 6.957497 6.957497 19005 +forcechri 1 1 6.957497 6.957497 19006 +formsalienherpetophiletodd 1 1 6.957497 6.957497 19007 +hatth 1 1 6.957497 6.957497 19008 +mancreepi 1 1 6.957497 6.957497 19009 +headsmal 1 1 6.957497 6.957497 19010 +planetdr 1 1 6.957497 6.957497 19011 +companioneast 1 1 6.957497 6.957497 19012 +bunnycyberweenietcl 1 1 6.957497 6.957497 19013 +hellbeast 1 1 6.957497 6.957497 19014 +drug 1 1 6.957497 6.957497 19015 +cosmo 1 1 6.957497 6.957497 19016 +irrit 1 1 6.957497 6.957497 19017 +scatolog 1 1 6.957497 6.957497 19018 +pervert 1 1 6.957497 6.957497 19019 +etymolog 1 1 6.957497 6.957497 19020 +phat 1 1 6.957497 6.957497 19021 +gnarli 1 1 6.957497 6.957497 19022 +cybermuffin 1 1 6.957497 6.957497 19023 +erotica 1 1 6.957497 6.957497 19024 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html new file mode 100644 index 00000000..716bc4ff --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~lzheng^lzheng.html @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 6 571 0.000000 0.000000 5 +scienc 5 640 0.000000 0.000000 4 +home 4 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +mail 2 238 1.386294 2.772588 22 +graduat 2 215 1.386294 2.772588 31 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +madison 2 165 1.791759 3.583518 55 +phone 1 175 1.791759 1.791759 45 +click 2 142 1.945910 3.891820 78 +architectur 1 139 1.945910 1.945910 77 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +look 2 107 2.197225 4.394450 115 +version 1 113 2.197225 2.197225 122 +assist 1 112 2.197225 2.197225 113 +place 1 106 2.197225 2.197225 124 +take 2 97 2.302585 4.605170 134 +text 1 98 2.302585 2.302585 133 +pictur 1 89 2.397895 2.397895 160 +homepag 1 93 2.397895 2.397895 148 +sinc 1 90 2.397895 2.397895 159 +educ 1 86 2.484907 2.484907 191 +school 1 84 2.484907 2.484907 188 +want 3 79 2.564949 7.694847 199 +know 2 80 2.564949 5.129898 198 +resum 1 79 2.564949 2.564949 217 +dept 3 64 2.772589 8.317767 291 +januari 1 62 2.772589 2.772589 264 +best 2 59 2.833213 5.666426 299 +march 1 61 2.833213 2.833213 295 +suggest 1 53 2.944439 2.944439 331 +visitor 1 49 3.044522 3.044522 371 +could 1 46 3.091042 3.091042 383 +compani 1 41 3.218876 3.218876 423 +china 1 37 3.332205 3.332205 487 +thank 1 23 3.806662 3.806662 721 +self 1 22 3.850148 3.850148 761 +alumni 2 21 3.912023 7.824046 807 +miller 1 17 4.110874 4.110874 949 +side 1 15 4.248495 4.248495 1022 +promot 1 12 4.465908 4.465908 1235 +surf 1 11 4.553877 4.553877 1301 +america 1 11 4.553877 4.553877 1370 +paradyn 2 9 4.753590 9.507180 1614 +charg 1 9 4.753590 4.753590 1582 +port 1 8 4.875197 4.875197 1766 +hack 1 7 5.010635 5.010635 1950 +iowa 1 7 5.010635 5.010635 1971 +onto 1 6 5.164786 5.164786 2089 +barton 1 5 5.347108 5.347108 2371 +girlfriend 1 5 5.347108 5.347108 2579 +ignor 1 5 5.347108 5.347108 2288 +ling 2 4 5.568345 11.136690 3045 +hpux 1 3 5.857933 5.857933 3780 +temporarili 1 3 5.857933 5.857933 3692 +wuhan 3 2 6.263398 18.790194 5589 +sheboygan 1 2 6.263398 6.263398 6198 +shameless 1 2 6.263398 6.263398 6146 +chinaand 1 2 6.263398 6.263398 5151 +officem 1 2 6.263398 6.263398 6092 +marcelo 1 2 6.263398 6.263398 6199 +infom 1 2 6.263398 6.263398 5425 +bother 1 2 6.263398 6.263398 6164 +zheng 2 1 6.957497 13.914994 19025 +lzheng 1 1 6.957497 6.957497 19026 +boss 1 1 6.957497 6.957497 19027 +prese 1 1 6.957497 6.957497 19028 +winsconsin 1 1 6.957497 6.957497 19029 +goncalv 1 1 6.957497 6.957497 19030 +hereif 1 1 6.957497 6.957497 19031 +sthe 1 1 6.957497 6.957497 19032 +schoolssend 1 1 6.957497 6.957497 19033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html new file mode 100644 index 00000000..6027078c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~manuvir^manuvir.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +cours 1 273 1.098612 1.098612 15 +email 1 220 1.386294 1.386294 29 +gener 1 220 1.386294 1.386294 27 +wisc 1 242 1.386294 1.386294 33 +like 2 132 1.945910 3.891820 81 +look 2 107 2.197225 4.394450 115 +final 1 116 2.197225 2.197225 108 +need 1 98 2.302585 2.302585 135 +start 1 83 2.484907 2.484907 173 +know 3 80 2.564949 7.694847 198 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +plai 1 60 2.833213 2.833213 307 +advisor 1 51 2.995732 2.995732 355 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +around 1 43 3.178054 3.178054 415 +photo 1 31 3.496508 3.496508 561 +turn 1 29 3.583519 3.583519 586 +team 1 27 3.637586 3.637586 625 +dai 1 22 3.850148 3.850148 753 +later 1 15 4.248495 4.248495 1043 +america 1 11 4.553877 4.553877 1370 +hello 1 10 4.653960 4.653960 1407 +sundai 1 10 4.653960 4.653960 1387 +said 1 9 4.753590 4.753590 1571 +round 1 8 4.875197 4.875197 1769 +monei 1 7 5.010635 5.010635 1934 +golf 2 6 5.164786 10.329572 2178 +leagu 1 4 5.568345 5.568345 3040 +passion 1 3 5.857933 5.857933 3633 +manuvir 2 1 6.957497 13.914994 19034 +pagemanuvir 1 1 6.957497 6.957497 19035 +dasnow 1 1 6.957497 6.957497 19036 +andwhat 1 1 6.957497 6.957497 19037 +feelfre 1 1 6.957497 6.957497 19038 +somethingsend 1 1 6.957497 6.957497 19039 +anact 1 1 6.957497 6.957497 19040 +manuvirwhat 1 1 6.957497 6.957497 19041 +thisto 1 1 6.957497 6.957497 19042 +theorigin 1 1 6.957497 6.957497 19043 +consin 1 1 6.957497 6.957497 19044 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html new file mode 100644 index 00000000..690aa18a --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markhill^markhill.html @@ -0,0 +1,395 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 40 775 0.000000 0.000000 2 +page 12 705 0.000000 0.000000 3 +scienc 7 640 0.000000 0.000000 4 +home 5 672 0.000000 0.000000 1 +univers 5 571 0.000000 0.000000 5 +system 16 443 0.693147 11.090352 6 +research 10 431 0.693147 6.931470 10 +inform 4 412 0.693147 2.772588 8 +program 4 374 0.693147 2.772588 7 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +project 5 340 1.098612 5.493060 18 +current 4 284 1.098612 4.394448 21 +engin 4 297 1.098612 4.394448 20 +offic 3 299 1.098612 3.295836 13 +us 2 329 1.098612 2.197224 16 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +wisc 3 242 1.386294 4.158882 33 +languag 3 227 1.386294 4.158882 26 +design 3 213 1.386294 4.158882 25 +softwar 3 220 1.386294 4.158882 30 +email 3 220 1.386294 4.158882 29 +oper 4 180 1.609438 6.437752 34 +group 2 183 1.609438 3.218876 36 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +fall 1 181 1.609438 1.609438 40 +wisconsin 15 169 1.791759 26.876385 54 +parallel 12 169 1.791759 21.501108 60 +address 5 170 1.791759 8.958795 62 +implement 5 152 1.791759 8.958795 52 +recent 4 167 1.791759 7.167036 58 +distribut 4 162 1.791759 7.167036 51 +data 3 170 1.791759 5.375277 49 +hour 2 165 1.791759 3.583518 46 +base 2 165 1.791759 3.583518 50 +applic 2 170 1.791759 3.583518 56 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +algorithm 1 162 1.791759 1.791759 57 +architectur 17 139 1.945910 33.080470 77 +perform 9 143 1.945910 17.513190 74 +support 5 132 1.945910 9.729550 83 +model 4 145 1.945910 7.783640 69 +like 3 132 1.945910 5.837730 81 +first 3 140 1.945910 5.837730 71 +professor 2 137 1.945910 3.891820 76 +process 2 142 1.945910 3.891820 72 +click 1 142 1.945910 1.945910 78 +confer 5 126 2.079442 10.397210 100 +machin 3 129 2.079442 6.238326 95 +tool 2 117 2.079442 4.158884 93 +high 2 130 2.079442 4.158884 101 +analysi 2 124 2.079442 4.158884 98 +compil 2 122 2.079442 4.158884 96 +dayton 1 119 2.079442 2.079442 104 +introduct 1 126 2.079442 2.079442 87 +technolog 1 131 2.079442 2.079442 102 +report 1 131 2.079442 2.079442 92 +intern 9 108 2.197225 19.775025 128 +teach 1 108 2.197225 2.197225 112 +world 1 115 2.197225 2.197225 126 +topic 1 114 2.197225 2.197225 110 +final 1 116 2.197225 2.197225 108 +specif 1 106 2.197225 2.197225 106 +assist 1 112 2.197225 2.197225 113 +memori 16 101 2.302585 36.841360 139 +advanc 3 99 2.302585 6.907755 130 +part 1 98 2.302585 2.302585 129 +user 1 104 2.302585 2.302585 137 +techniqu 1 99 2.302585 2.302585 138 +associ 2 93 2.397895 4.795790 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +search 1 95 2.397895 2.397895 155 +ieee 5 86 2.484907 12.424535 190 +level 3 87 2.484907 7.454721 180 +novemb 3 81 2.484907 7.454721 179 +larg 2 82 2.484907 4.969814 168 +requir 2 81 2.484907 4.969814 167 +educ 1 86 2.484907 2.484907 191 +wide 1 84 2.484907 2.484907 185 +stuff 1 87 2.484907 2.484907 171 +west 1 83 2.484907 2.484907 192 +librari 1 87 2.484907 2.484907 181 +chang 1 82 2.484907 2.484907 163 +journal 1 83 2.484907 2.484907 183 +help 1 83 2.484907 2.484907 175 +june 7 79 2.564949 17.954643 214 +interfac 3 79 2.564949 7.694847 209 +mondai 1 77 2.564949 2.564949 206 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +complet 1 77 2.564949 2.564949 208 +decemb 1 80 2.564949 2.564949 215 +david 13 71 2.639057 34.307741 232 +symposium 7 72 2.639057 18.473399 238 +onlin 1 75 2.639057 2.639057 223 +name 1 72 2.639057 2.639057 220 +workshop 1 71 2.639057 2.639057 239 +effici 1 73 2.639057 2.639057 233 +solv 1 73 2.639057 2.639057 234 +august 5 66 2.708050 13.540250 257 +simul 3 66 2.708050 8.124150 255 +java 1 70 2.708050 2.708050 248 +differ 1 66 2.708050 2.708050 253 +order 1 69 2.708050 2.708050 249 +wednesdai 1 64 2.772589 2.772589 261 +organ 1 65 2.772589 2.772589 265 +import 1 65 2.772589 2.772589 282 +prof 1 64 2.772589 2.772589 273 +improv 1 62 2.772589 2.772589 289 +evalu 1 64 2.772589 2.772589 266 +new 1 64 2.772589 2.772589 262 +virtual 1 62 2.772589 2.772589 285 +abstract 1 62 2.772589 2.772589 276 +share 9 59 2.833213 25.498917 304 +juli 5 60 2.833213 14.166065 305 +content 2 59 2.833213 5.666426 302 +march 1 61 2.833213 2.833213 295 +space 3 57 2.890372 8.671116 310 +think 1 57 2.890372 2.890372 314 +index 1 56 2.890372 2.890372 309 +sampl 2 53 2.944439 5.888878 339 +talk 1 53 2.944439 2.944439 336 +februari 1 54 2.944439 2.944439 328 +hardwar 7 51 2.995732 20.970124 350 +tabl 4 51 2.995732 11.982928 346 +much 1 52 2.995732 2.995732 349 +investig 1 51 2.995732 2.995732 353 +give 1 50 3.044522 3.044522 359 +appoint 1 49 3.044522 3.044522 358 +frequent 1 49 3.044522 3.044522 367 +california 2 46 3.091042 6.182084 388 +effect 1 46 3.091042 3.091042 385 +mark 35 44 3.135494 109.742290 403 +protocol 2 45 3.135494 6.270988 407 +execut 1 45 3.135494 3.135494 404 +directori 1 45 3.135494 3.135494 396 +show 1 43 3.178054 3.178054 417 +mechan 1 43 3.178054 3.178054 416 +cach 7 41 3.218876 22.532132 432 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +transact 5 39 3.258097 16.290485 438 +programm 1 39 3.258097 3.258097 445 +streetmadison 1 38 3.295837 3.295837 474 +industri 1 38 3.295837 3.295837 464 +prototyp 1 38 3.295837 3.295837 463 +expect 2 37 3.332205 6.664410 484 +workstat 2 37 3.332205 6.664410 479 +sciencesunivers 1 37 3.332205 3.332205 486 +cost 1 37 3.332205 3.332205 480 +formal 1 37 3.332205 3.332205 478 +multi 2 36 3.367296 6.734592 493 +jame 10 35 3.401197 34.011970 507 +bibliographi 1 34 3.401197 3.401197 518 +award 1 34 3.401197 3.401197 523 +toler 1 33 3.433987 3.433987 533 +john 1 33 3.433987 3.433987 532 +fault 1 32 3.465736 3.465736 547 +extend 1 32 3.465736 3.465736 539 +richard 2 31 3.496508 6.993016 559 +often 1 31 3.496508 3.496508 551 +option 1 30 3.555348 3.555348 575 +robert 1 30 3.555348 3.555348 567 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +cluster 2 28 3.610918 7.221836 612 +multiprocessor 2 28 3.610918 7.221836 605 +pass 1 28 3.610918 3.610918 611 +propos 1 28 3.610918 3.610918 602 +scale 1 28 3.610918 3.610918 613 +determin 1 27 3.637586 3.637586 630 +consist 3 26 3.688879 11.066637 651 +berkelei 2 26 3.688879 7.377758 657 +experiment 2 26 3.688879 7.377758 645 +enabl 1 26 3.688879 3.688879 655 +revis 1 26 3.688879 3.688879 640 +detect 1 26 3.688879 3.688879 646 +hill 36 25 3.737670 134.556120 670 +supercomput 3 25 3.737670 11.213010 681 +trace 2 25 3.737670 7.475340 677 +scalabl 1 24 3.761200 3.761200 705 +higher 1 24 3.761200 3.761200 690 +proof 1 23 3.806662 3.806662 720 +size 1 23 3.806662 3.806662 713 +highli 1 23 3.806662 3.806662 725 +cooper 2 22 3.850148 7.700296 757 +sequenti 1 22 3.850148 3.850148 745 +hierarchi 1 22 3.850148 3.850148 744 +annot 1 21 3.912023 3.912023 775 +department 1 20 3.951244 3.951244 839 +fine 1 20 3.951244 3.951244 822 +smith 1 20 3.951244 3.951244 820 +scheme 1 20 3.951244 3.951244 818 +benchmark 2 19 4.007333 8.014666 859 +comparison 2 19 4.007333 8.014666 863 +miss 1 19 4.007333 4.007333 866 +definit 1 19 4.007333 4.007333 864 +wind 6 18 4.060443 24.362658 908 +partial 1 18 4.060443 4.060443 900 +less 1 18 4.060443 4.060443 892 +four 1 18 4.060443 4.060443 905 +steven 4 17 4.110874 16.443496 953 +seek 1 17 4.110874 4.110874 954 +asplo 1 17 4.110874 4.110874 948 +miller 1 17 4.110874 4.110874 949 +estim 1 17 4.110874 4.110874 930 +transfer 1 16 4.174387 4.174387 967 +young 1 16 4.174387 4.174387 991 +eduphon 1 15 4.248495 4.248495 1060 +massiv 1 15 4.248495 4.248495 1026 +hybrid 1 15 4.248495 4.248495 1057 +micro 1 15 4.248495 4.248495 1031 +coher 2 14 4.317488 8.634976 1109 +manner 1 14 4.317488 4.317488 1074 +convent 1 14 4.317488 4.317488 1072 +rank 1 14 4.317488 4.317488 1063 +sigmetr 3 13 4.382027 13.146081 1173 +suit 2 13 4.382027 8.764054 1129 +translat 2 13 4.382027 8.764054 1164 +weak 2 13 4.382027 8.764054 1159 +alan 1 13 4.382027 4.382027 1146 +employ 3 12 4.465908 13.397724 1291 +target 1 12 4.465908 4.465908 1282 +buffer 1 12 4.465908 4.465908 1211 +gupta 1 12 4.465908 4.465908 1241 +mari 1 12 4.465908 4.465908 1266 +wood 13 11 4.553877 59.200401 1355 +isca 6 11 4.553877 27.323262 1354 +rice 2 11 4.553877 9.107754 1336 +michigan 1 11 4.553877 4.553877 1368 +node 1 11 4.553877 4.553877 1326 +transpar 1 11 4.553877 4.553877 1325 +keyword 1 11 4.553877 4.553877 1356 +catalog 1 10 4.653960 4.653960 1431 +grain 1 10 4.653960 4.653960 1448 +princip 1 10 4.653960 4.653960 1397 +sosp 1 10 4.653960 4.653960 1416 +placement 1 10 4.653960 4.653960 1420 +stack 1 10 4.653960 4.653960 1389 +laru 9 9 4.753590 42.782310 1560 +tunnel 6 9 4.753590 28.521540 1615 +patterson 2 9 4.753590 9.507180 1554 +sound 1 9 4.753590 4.753590 1605 +frank 1 9 4.753590 4.753590 1568 +jeffrei 1 9 4.753590 4.753590 1612 +kong 1 9 4.753590 4.753590 1602 +vernon 1 9 4.753590 4.753590 1556 +spec 2 8 4.875197 9.750394 1640 +lewi 2 8 4.875197 9.750394 1700 +secretari 1 8 4.875197 4.875197 1775 +uniprocessor 1 8 4.875197 4.875197 1696 +quantit 1 8 4.875197 4.875197 1654 +presidenti 1 8 4.875197 4.875197 1737 +irregular 1 8 4.875197 4.875197 1768 +joel 1 8 4.875197 4.875197 1698 +unifi 1 8 4.875197 4.875197 1774 +roger 2 7 5.010635 10.021270 1892 +secondari 1 7 5.010635 5.010635 1884 +ann 2 6 5.164786 10.329572 2065 +consensu 1 6 5.164786 5.164786 2080 +unpublish 1 6 5.164786 5.164786 2226 +chandra 1 6 5.164786 5.164786 2091 +microsystem 1 6 5.164786 5.164786 2160 +mukherje 4 5 5.347108 21.388432 2586 +lebeck 4 5 5.347108 21.388432 2582 +reinhardt 4 5 5.347108 21.388432 2583 +babak 3 5 5.347108 16.041324 2584 +falsafi 3 5 5.347108 16.041324 2585 +tempest 2 5 5.347108 10.694216 2548 +engineeringat 1 5 5.347108 5.347108 2561 +advic 1 5 5.347108 5.347108 2509 +educurr 1 5 5.347108 5.347108 2504 +summarymi 1 5 5.347108 5.347108 2580 +middl 1 5 5.347108 5.347108 2372 +hypothet 1 5 5.347108 5.347108 2474 +optimist 1 5 5.347108 5.347108 2501 +chemic 1 5 5.347108 5.347108 2552 +ioanni 1 5 5.347108 5.347108 2553 +forprogram 1 5 5.347108 5.347108 2361 +toc 1 5 5.347108 5.347108 2562 +andrea 1 5 5.347108 5.347108 2375 +hennessi 1 5 5.347108 5.347108 2289 +race 1 5 5.347108 5.347108 2417 +barton 1 5 5.347108 5.347108 2371 +talluri 5 4 5.568345 27.841725 2820 +alvin 4 4 5.568345 22.273380 3084 +markhil 2 4 5.568345 11.136690 2819 +wart 2 4 5.568345 11.136690 2987 +crai 2 4 5.568345 11.136690 3012 +emphas 1 4 5.568345 5.568345 2672 +languagesand 1 4 5.568345 5.568345 3071 +align 1 4 5.568345 5.568345 2863 +manuscript 1 4 5.568345 5.568345 2750 +sabbat 1 4 5.568345 5.568345 2824 +sharma 1 4 5.568345 5.568345 2752 +ppopp 1 4 5.568345 5.568345 2774 +schoina 1 4 5.568345 5.568345 3085 +medium 1 4 5.568345 5.568345 2834 +implic 1 4 5.568345 5.568345 2696 +satish 1 4 5.568345 5.568345 2833 +anoop 1 4 5.568345 5.568345 2770 +ratio 1 4 5.568345 5.568345 2942 +shubhendu 4 3 5.857933 23.431732 4028 +madhusudhan 4 3 5.857933 23.431732 4021 +superpag 2 3 5.857933 11.715866 3978 +megabyt 2 3 5.857933 11.715866 3732 +fingerson 1 3 5.857933 5.857933 4018 +thea 1 3 5.857933 5.857933 4019 +sklenar 1 3 5.857933 5.857933 4020 +programmingc 1 3 5.857933 5.857933 3232 +saltz 1 3 5.857933 5.857933 3385 +surpass 1 3 5.857933 5.857933 3247 +tradeoff 1 3 5.857933 5.857933 3387 +adv 8 2 6.263398 50.107184 4540 +andelectr 1 2 6.263398 6.263398 6200 +wisconsint 1 2 6.263398 6.263398 6155 +teachingfal 1 2 6.263398 6.263398 5532 +ifal 1 2 6.263398 6.263398 4776 +architecturec 1 2 6.263398 6.263398 6127 +sustain 1 2 6.263398 6.263398 6201 +mywork 1 2 6.263398 6.263398 5800 +projectwith 1 2 6.263398 6.263398 4986 +uniformli 1 2 6.263398 6.263398 6202 +todevelop 1 2 6.263398 6.263398 5448 +aredevelop 1 2 6.263398 6.263398 4930 +similarto 1 2 6.263398 6.263398 6074 +aeronaut 1 2 6.263398 6.263398 5958 +anddavid 1 2 6.263398 6.263398 6126 +sashikanth 1 2 6.263398 6.263398 6122 +chandrasekaran 1 2 6.263398 6.263398 6121 +shamik 1 2 6.263398 6.263398 6195 +memorymultiprocessor 1 2 6.263398 6.263398 4529 +dionisio 1 2 6.263398 6.263398 6203 +pnevmatikato 1 2 6.263398 6.263398 6204 +subbarao 1 2 6.263398 6.263398 6205 +shing 1 2 6.263398 6.263398 5146 +sarita 8 1 6.957497 55.659976 19045 +kessler 4 1 6.957497 27.829988 19046 +subblock 3 1 6.957497 20.872491 19047 +sampler 2 1 6.957497 13.914994 19048 +madhu 2 1 6.957497 13.914994 19049 +tlb 2 1 6.957497 13.914994 19050 +pagemark 1 1 6.957497 6.957497 19051 +andsummari 1 1 6.957497 6.957497 19052 +graduateslink 1 1 6.957497 6.957497 19053 +oralpresent 1 1 6.957497 6.957497 19054 +forcach 1 1 6.957497 6.957497 19055 +usamarkhil 1 1 6.957497 6.957497 19056 +icatalog 1 1 6.957497 6.957497 19057 +teachc 1 1 6.957497 6.957497 19058 +iieduc 1 1 6.957497 6.957497 19059 +evaluationresearch 1 1 6.957497 6.957497 19060 +multiprocessorsand 1 1 6.957497 6.957497 19061 +evaluationtechniqu 1 1 6.957497 6.957497 19062 +windtunnel 1 1 6.957497 6.957497 19063 +manystud 1 1 6.957497 6.957497 19064 +computerswil 1 1 6.957497 6.957497 19065 +levelparallel 1 1 6.957497 6.957497 19066 +inwhich 1 1 6.957497 6.957497 19067 +recentlypropos 1 1 6.957497 6.957497 19068 +aclust 1 1 6.957497 6.957497 19069 +toolsto 1 1 6.957497 6.957497 19070 +cull 1 1 6.957497 6.957497 19071 +designairplan 1 1 6.957497 6.957497 19072 +talluritarget 1 1 6.957497 6.957497 19073 +lookasid 1 1 6.957497 6.957497 19074 +superpagesand 1 1 6.957497 6.957497 19075 +asplosandsosppap 1 1 6.957497 6.957497 19076 +papersth 1 1 6.957497 6.957497 19077 +bidirect 1 1 6.957497 6.957497 19078 +pad 1 1 6.957497 6.957497 19079 +yousef 1 1 6.957497 6.957497 19080 +khalidi 1 1 6.957497 6.957497 19081 +microstructur 1 1 6.957497 6.957497 19082 +electrostat 1 1 6.957497 6.957497 19083 +traenkl 1 1 6.957497 6.957497 19084 +sangta 1 1 6.957497 6.957497 19085 +tpd 1 1 6.957497 6.957497 19086 +farid 1 1 6.957497 6.957497 19087 +pour 1 1 6.957497 6.957497 19088 +palacharla 1 1 6.957497 6.957497 19089 +kourosh 1 1 6.957497 6.957497 19090 +gharachorloo 1 1 6.957497 6.957497 19091 +netzer 1 1 6.957497 6.957497 19092 +vikram 1 1 6.957497 6.957497 19093 +kessleracm 1 1 6.957497 6.957497 19094 +graduatesmadhusudhan 1 1 6.957497 6.957497 19095 +updatedw 1 1 6.957497 6.957497 19096 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html new file mode 100644 index 00000000..2d34f6f0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~markos^markos.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +usaphon 1 9 4.753590 4.753590 1600 +assistantdepart 1 8 4.875197 4.875197 1784 +zaharioudaki 1 2 6.263398 6.263398 6119 +marko 3 1 6.957497 20.872491 19097 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html new file mode 100644 index 00000000..a6d7aeda --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mbirk^mbirk.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +list 1 201 1.609438 1.609438 39 +section 2 94 2.397895 4.795790 149 +michael 2 35 3.401197 6.802394 514 +birk 2 4 5.568345 11.136690 2791 +mbirk 1 3 5.857933 5.857933 3501 +alltraxx 1 1 6.957497 6.957497 19098 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html new file mode 100644 index 00000000..8cfacf19 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mcauliff^mcauliff.html @@ -0,0 +1,56 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +implement 1 152 1.791759 1.791759 52 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +data 1 170 1.791759 1.791759 49 +object 1 138 1.945910 1.945910 79 +hall 1 146 1.945910 1.945910 65 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +manag 1 114 2.197225 2.197225 125 +techniqu 1 99 2.302585 2.302585 138 +proceed 2 93 2.397895 4.795790 152 +ieee 1 86 2.484907 2.484907 190 +orient 1 80 2.564949 2.564949 205 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +simul 1 66 2.708050 2.708050 255 +march 1 61 2.833213 2.833213 295 +space 1 57 2.890372 2.890372 310 +pointer 1 48 3.044522 3.044522 368 +mark 4 44 3.135494 12.541976 403 +michael 1 35 3.401197 3.401197 514 +proc 1 26 3.688879 3.688879 649 +sigmod 2 19 4.007333 8.014666 877 +white 1 17 4.110874 4.110874 951 +dewitt 1 12 4.465908 4.465908 1270 +persist 1 11 4.553877 4.553877 1367 +franklin 1 10 4.653960 4.653960 1436 +naughton 1 10 4.653960 4.653960 1450 +solomon 3 8 4.875197 14.625591 1716 +carei 2 8 4.875197 9.750394 1781 +tsatalo 1 5 5.347108 5.347108 2581 +mcauliff 6 4 5.568345 33.410070 3083 +marvin 2 4 5.568345 11.136690 2806 +zwill 1 4 5.568345 5.568345 3076 +schuh 1 3 5.857933 5.857933 4014 +swizzl 1 3 5.857933 5.857933 3883 +shoringup 1 1 6.957497 6.957497 19099 +atrac 1 1 6.957497 6.957497 19100 +towardseffect 1 1 6.957497 6.957497 19101 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html new file mode 100644 index 00000000..79a39f88 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mds^mds.html @@ -0,0 +1,117 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +work 1 380 0.693147 0.693147 9 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 4 242 1.386294 5.545176 33 +languag 3 227 1.386294 4.158882 26 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +paper 1 205 1.609438 1.609438 38 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +read 1 154 1.791759 1.791759 47 +address 1 170 1.791759 1.791759 62 +file 1 132 1.945910 1.945910 70 +introduct 1 126 2.079442 2.079442 87 +high 1 130 2.079442 2.079442 101 +schedul 1 119 2.079442 2.079442 85 +analysi 1 124 2.079442 2.079442 98 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +peopl 1 96 2.302585 2.302585 132 +school 1 84 2.484907 2.484907 188 +level 1 87 2.484907 2.484907 180 +appear 1 78 2.564949 2.564949 210 +know 1 80 2.564949 2.564949 198 +html 2 75 2.639057 5.278114 235 +symposium 1 72 2.639057 2.639057 238 +meet 1 72 2.639057 2.639057 229 +dept 1 64 2.772589 2.772589 291 +automat 1 61 2.833213 2.833213 306 +think 2 57 2.890372 5.780744 314 +point 1 58 2.890372 2.890372 319 +variou 1 56 2.890372 2.890372 317 +talk 1 53 2.944439 2.944439 336 +much 1 52 2.995732 2.995732 349 +finger 1 52 2.995732 2.995732 354 +pointer 2 48 3.044522 6.089044 368 +cool 1 49 3.044522 3.044522 374 +principl 1 48 3.044522 3.044522 357 +term 1 43 3.178054 3.178054 411 +fast 2 42 3.218876 6.437752 429 +movi 1 40 3.258097 3.258097 459 +littl 1 39 3.258097 3.258097 454 +submit 1 39 3.258097 3.258097 440 +realli 1 40 3.258097 3.258097 444 +paul 1 38 3.295837 3.295837 471 +abl 1 30 3.555348 3.555348 566 +hope 1 28 3.610918 3.610918 610 +never 1 25 3.737670 3.737670 671 +accur 1 25 3.737670 3.737670 680 +flow 1 24 3.761200 3.761200 700 +togeth 1 23 3.806662 3.806662 714 +try 1 22 3.850148 3.850148 764 +hous 2 21 3.912023 7.824046 801 +watch 1 21 3.912023 3.912023 789 +wrote 1 20 3.951244 3.951244 830 +mostli 1 19 4.007333 4.007333 869 +miss 1 19 4.007333 4.007333 866 +lot 1 18 4.060443 4.060443 889 +previous 1 17 4.110874 4.110874 923 +brown 1 16 4.174387 4.174387 977 +todd 1 15 4.248495 4.248495 1051 +doesn 1 15 4.248495 4.248495 1055 +susan 1 15 4.248495 4.248495 1050 +believ 1 13 4.382027 4.382027 1187 +recurs 1 13 4.382027 4.382027 1127 +step 1 13 4.382027 4.382027 1138 +emac 1 13 4.382027 4.382027 1143 +menu 1 13 4.382027 4.382027 1156 +jonathan 1 13 4.382027 4.382027 1174 +submiss 1 11 4.553877 4.553877 1298 +mode 1 9 4.753590 4.753590 1492 +marc 6 8 4.875197 29.251182 1680 +shapiro 5 8 4.875197 24.375985 1686 +analys 1 8 4.875197 4.875197 1666 +pldi 1 8 4.875197 4.875197 1704 +chan 1 7 5.010635 5.010635 1876 +elementari 1 7 5.010635 5.010635 1825 +interrupt 1 7 5.010635 5.010635 1793 +tag 1 7 5.010635 5.010635 1821 +lawrenc 1 7 5.010635 5.010635 1908 +recov 1 6 5.164786 5.164786 2235 +goldstein 1 6 5.164786 5.164786 2168 +elain 1 5 5.347108 5.347108 2496 +hyper 1 5 5.347108 5.347108 2435 +horwitz 1 5 5.347108 5.347108 2411 +fear 1 4 5.568345 5.568345 2911 +backward 1 4 5.568345 5.568345 2638 +popl 1 4 5.568345 5.568345 3068 +insensit 1 4 5.568345 5.568345 2716 +hoar 1 3 5.857933 5.857933 3875 +obsess 1 2 6.263398 6.263398 5924 +disappear 1 2 6.263398 6.263398 4748 +accid 1 2 6.263398 6.263398 5961 +softwarei 1 2 6.263398 6.263398 4960 +tautolog 3 1 6.957497 20.872491 19102 +fond 1 1 6.957497 6.957497 19103 +repuls 1 1 6.957497 6.957497 19104 +ponder 1 1 6.957497 6.957497 19105 +jacki 1 1 6.957497 6.957497 19106 +dimasi 1 1 6.957497 6.957497 19107 +twisti 1 1 6.957497 6.957497 19108 +amanda 1 1 6.957497 6.957497 19109 +peet 1 1 6.957497 6.957497 19110 +retreather 1 1 6.957497 6.957497 19111 +thepul 1 1 6.957497 6.957497 19112 +cobbl 1 1 6.957497 6.957497 19113 +nowinclud 1 1 6.957497 6.957497 19114 +shapiroand 1 1 6.957497 6.957497 19115 +marion 1 1 6.957497 6.957497 19116 +ferguson 1 1 6.957497 6.957497 19117 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html new file mode 100644 index 00000000..3f94e474 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mellen^mellen.html @@ -0,0 +1,23 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +system 1 443 0.693147 0.693147 6 +offic 3 299 1.098612 3.295836 13 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +modifi 1 178 1.609438 1.609438 35 +oper 1 180 1.609438 1.609438 34 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +introduct 1 126 2.079442 2.079442 87 +build 1 85 2.484907 2.484907 184 +august 1 66 2.708050 2.708050 257 +appoint 1 49 3.044522 3.044522 358 +mellen 2 2 6.263398 12.526796 4708 +mellencamp 1 2 6.263398 6.263398 4707 +pagerob 1 1 6.957497 6.957497 19118 +minimalist 1 1 6.957497 6.957497 19119 +taship 1 1 6.957497 6.957497 19120 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html new file mode 100644 index 00000000..4af91b80 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~melski^melski.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +last 1 314 1.098612 1.098612 14 +also 3 259 1.386294 4.158882 28 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +recent 1 167 1.791759 1.791759 58 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +person 2 111 2.197225 4.394450 117 +teach 1 108 2.197225 2.197225 112 +make 1 111 2.197225 2.197225 120 +need 1 98 2.302585 2.302585 135 +section 1 94 2.397895 2.397895 149 +info 1 85 2.484907 2.484907 176 +west 1 83 2.484907 2.484907 192 +second 1 81 2.484907 2.484907 166 +want 1 79 2.564949 2.564949 199 +david 2 71 2.639057 5.278114 232 +plan 1 65 2.772589 2.772589 272 +back 1 60 2.833213 2.833213 297 +semest 1 58 2.890372 2.890372 312 +major 1 56 2.890372 2.890372 315 +direct 1 57 2.890372 2.890372 316 +still 1 50 3.044522 3.044522 362 +give 1 50 3.044522 3.044522 359 +even 1 45 3.135494 3.135494 393 +map 1 39 3.258097 3.258097 452 +often 1 31 3.496508 3.496508 551 +great 1 27 3.637586 3.637586 626 +determin 1 27 3.637586 3.637586 630 +spent 1 25 3.737670 3.737670 676 +eric 1 19 4.007333 4.007333 870 +miss 1 19 4.007333 4.007333 866 +coupl 1 17 4.110874 4.110874 939 +brother 1 13 4.382027 4.382027 1189 +sister 1 9 4.753590 4.753590 1524 +undergrad 1 9 4.753590 4.753590 1589 +soccer 1 8 4.875197 4.875197 1752 +chanc 1 7 5.010635 5.010635 1960 +chess 1 5 5.347108 5.347108 2486 +rewrit 1 5 5.347108 5.347108 2367 +rep 1 4 5.568345 5.568345 3087 +somedai 2 3 5.857933 11.715866 3919 +michel 1 3 5.857933 5.857933 3791 +distract 1 3 5.857933 5.857933 3945 +melski 3 2 6.263398 18.790194 4780 +pagedavid 1 2 6.263398 6.263398 5114 +mill 1 2 6.263398 6.263398 6193 +awesom 1 2 6.263398 6.263398 6167 +russia 1 2 6.263398 6.263398 5756 +hasti 1 2 6.263398 6.263398 6173 +steal 1 2 6.263398 6.263398 5485 +russian 2 1 6.957497 13.914994 19121 +melskicurr 1 1 6.957497 6.957497 19122 +statisticsmadison 1 1 6.957497 6.957497 19123 +permen 1 1 6.957497 6.957497 19124 +ivesmarshfield 1 1 6.957497 6.957497 19125 +kasei 1 1 6.957497 6.957497 19126 +myexact 1 1 6.957497 6.957497 19127 +studiesher 1 1 6.957497 6.957497 19128 +semesterof 1 1 6.957497 6.957497 19129 +beenbik 1 1 6.957497 6.957497 19130 +numerousbook 1 1 6.957497 6.957497 19131 +tomapquest 1 1 6.957497 6.957497 19132 +alot 1 1 6.957497 6.957497 19133 +marshfield 1 1 6.957497 6.957497 19134 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html new file mode 100644 index 00000000..3c9bb2dc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~milo^milo.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 5 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +interest 6 384 0.693147 4.158882 11 +program 3 374 0.693147 2.079441 7 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 2 343 1.098612 2.197224 19 +offic 2 299 1.098612 2.197224 13 +us 1 329 1.098612 1.098612 16 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +wisc 3 242 1.386294 4.158882 33 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +design 1 213 1.386294 1.386294 25 +mail 1 238 1.386294 1.386294 22 +email 1 220 1.386294 1.386294 29 +oper 2 180 1.609438 3.218876 34 +class 1 199 1.609438 1.609438 37 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +develop 1 174 1.791759 1.791759 53 +parallel 1 169 1.791759 1.791759 60 +applic 1 170 1.791759 1.791759 56 +architectur 3 139 1.945910 5.837730 77 +year 3 148 1.945910 5.837730 84 +perform 2 143 1.945910 3.891820 74 +construct 1 139 1.945910 1.945910 82 +first 1 140 1.945910 1.945910 71 +compil 3 122 2.079442 6.238326 96 +technolog 2 131 2.079442 4.158884 102 +dayton 1 119 2.079442 2.079442 104 +confer 1 126 2.079442 2.079442 100 +postscript 1 131 2.079442 2.079442 90 +teach 1 108 2.197225 2.197225 112 +specif 1 106 2.197225 2.197225 106 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +world 1 115 2.197225 2.197225 126 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +mani 3 92 2.397895 7.193685 150 +section 1 94 2.397895 2.397895 149 +present 1 91 2.397895 2.397895 145 +imag 1 91 2.397895 2.397895 161 +sinc 1 90 2.397895 2.397895 159 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +thing 1 84 2.484907 2.484907 189 +ieee 1 86 2.484907 2.484907 190 +school 1 84 2.484907 2.484907 188 +resourc 1 81 2.484907 2.484907 172 +educ 1 86 2.484907 2.484907 191 +build 1 85 2.484907 2.484907 184 +optim 1 79 2.564949 2.564949 197 +know 1 80 2.564949 2.564949 198 +dynam 1 76 2.564949 2.564949 194 +tuesdai 1 73 2.639057 2.639057 219 +addit 1 74 2.639057 2.639057 228 +nation 1 74 2.639057 2.639057 240 +symposium 1 72 2.639057 2.639057 238 +java 4 70 2.708050 10.832200 248 +thursdai 1 70 2.708050 2.708050 241 +view 1 70 2.708050 2.708050 254 +receiv 1 66 2.708050 2.708050 244 +evalu 2 64 2.772589 5.545178 266 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +colleg 3 61 2.833213 8.499639 300 +plai 3 60 2.833213 8.499639 307 +best 1 59 2.833213 2.833213 299 +simpl 1 60 2.833213 2.833213 298 +direct 2 57 2.890372 5.780744 316 +explor 2 58 2.890372 5.780744 324 +summer 1 56 2.890372 2.890372 311 +thesi 1 57 2.890372 2.890372 327 +space 1 57 2.890372 2.890372 310 +found 1 53 2.944439 2.944439 337 +scientif 1 53 2.944439 2.944439 341 +hardwar 1 51 2.995732 2.995732 350 +profession 1 51 2.995732 2.995732 345 +standard 1 48 3.044522 3.044522 365 +right 1 48 3.044522 3.044522 363 +mark 2 44 3.135494 6.270988 403 +algebra 1 45 3.135494 3.135494 394 +even 1 45 3.135494 3.135494 393 +show 1 43 3.178054 3.178054 417 +fast 2 42 3.218876 6.437752 429 +compani 1 41 3.218876 3.218876 423 +combin 1 42 3.218876 3.218876 421 +live 2 40 3.258097 6.516194 451 +streetmadison 1 38 3.295837 3.295837 474 +open 1 38 3.295837 3.295837 469 +game 3 36 3.367296 10.101888 498 +multi 1 36 3.367296 3.367296 493 +next 2 34 3.401197 6.802394 517 +jame 1 35 3.401197 3.401197 507 +eduoffic 1 33 3.433987 3.433987 531 +quot 2 29 3.583519 7.167038 582 +team 1 27 3.637586 3.637586 625 +detect 2 26 3.688879 7.377758 646 +rule 2 26 3.688879 7.377758 638 +challeng 1 26 3.688879 3.688879 653 +hill 2 25 3.737670 7.475340 670 +departmentunivers 1 24 3.761200 3.761200 711 +mobil 1 23 3.806662 3.806662 730 +honor 1 23 3.806662 3.806662 729 +head 1 23 3.806662 3.806662 732 +serv 1 22 3.850148 3.850148 758 +martin 6 21 3.912023 23.472138 794 +programminglanguag 1 21 3.912023 3.912023 782 +divis 1 21 3.912023 3.912023 803 +wrote 1 20 3.951244 3.951244 830 +wonder 1 20 3.951244 3.951244 815 +element 1 18 4.060443 4.060443 895 +ultim 3 17 4.110874 12.332622 943 +medic 1 17 4.110874 4.110874 958 +senior 1 14 4.317488 4.317488 1120 +train 1 14 4.317488 4.317488 1066 +charl 2 13 4.382027 8.764054 1149 +everyon 2 13 4.382027 8.764054 1148 +land 2 12 4.465908 8.931816 1273 +promot 1 12 4.465908 4.465908 1235 +basketbal 1 12 4.465908 4.465908 1289 +player 5 11 4.553877 22.769385 1371 +transmiss 1 9 4.753590 4.753590 1588 +discov 1 9 4.753590 4.753590 1562 +babylon 1 8 4.875197 4.875197 1731 +footbal 3 7 5.010635 15.031905 1912 +fischer 1 7 5.010635 5.010635 1893 +interestsi 1 7 5.010635 5.010635 1969 +paramet 1 7 5.010635 5.010635 1796 +dedic 1 7 5.010635 5.010635 1843 +ethic 1 7 5.010635 5.010635 1786 +trade 1 7 5.010635 5.010635 1815 +advis 2 6 5.164786 10.329572 2173 +reconstruct 1 6 5.164786 5.164786 2170 +determinist 1 6 5.164786 5.164786 2034 +pace 1 6 5.164786 5.164786 2011 +minnesota 2 5 5.347108 10.694216 2469 +argonn 1 5 5.347108 5.347108 2461 +nuclear 1 5 5.347108 5.347108 2576 +frisbe 1 5 5.347108 5.347108 2560 +publicationsresearch 2 4 5.568345 11.136690 2876 +chees 1 4 5.568345 5.568345 3090 +fink 3 3 5.857933 17.573799 3425 +assistantcomput 1 3 5.857933 5.857933 4027 +usaemail 1 3 5.857933 5.857933 3722 +sit 1 3 5.857933 5.857933 3953 +informationtechnolog 1 3 5.857933 5.857933 3836 +interchang 1 3 5.857933 5.857933 3893 +myfavorit 1 3 5.857933 5.857933 3852 +armi 1 3 5.857933 5.857933 3562 +milo 6 2 6.263398 37.580388 4781 +spectroscopi 1 2 6.263398 6.263398 6206 +meth 1 2 6.263398 6.263398 5872 +eventhough 1 2 6.263398 6.263398 6158 +conquer 1 2 6.263398 6.263398 5112 +combat 1 2 6.263398 6.263398 5473 +fight 1 2 6.263398 6.263398 5209 +monster 1 2 6.263398 6.263398 6207 +gustavu 3 1 6.957497 20.872491 19135 +adolphu 3 1 6.957497 20.872491 19136 +atlanti 3 1 6.957497 20.872491 19137 +humm 2 1 6.957497 13.914994 19138 +micklich 2 1 6.957497 13.914994 19139 +illicitsubst 2 1 6.957497 13.914994 19140 +neutron 2 1 6.957497 13.914994 19141 +hailperin 2 1 6.957497 13.914994 19142 +pagemilo 1 1 6.957497 6.957497 19143 +byappointmentba 1 1 6.957497 6.957497 19144 +larusteach 1 1 6.957497 6.957497 19145 +beinfluenc 1 1 6.957497 6.957497 19146 +yule 1 1 6.957497 6.957497 19147 +sagalovski 1 1 6.957497 6.957497 19148 +nucl 1 1 6.957497 6.957497 19149 +inst 1 1 6.957497 6.957497 19150 +languageflex 1 1 6.957497 6.957497 19151 +anintern 1 1 6.957497 6.957497 19152 +toadvanc 1 1 6.957497 6.957497 19153 +fosteringth 1 1 6.957497 6.957497 19154 +highestprofession 1 1 6.957497 6.957497 19155 +bignfl 1 1 6.957497 6.957497 19156 +vike 1 1 6.957497 6.957497 19157 +colon 1 1 6.957497 6.957497 19158 +imho 1 1 6.957497 6.957497 19159 +mythic 1 1 6.957497 6.957497 19160 +engaug 1 1 6.957497 6.957497 19161 +wizard 1 1 6.957497 6.957497 19162 +underworld 1 1 6.957497 6.957497 19163 +ofsocc 1 1 6.957497 6.957497 19164 +afrisbe 1 1 6.957497 6.957497 19165 +quarterback 1 1 6.957497 6.957497 19166 +ultimatein 1 1 6.957497 6.957497 19167 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html new file mode 100644 index 00000000..e6c2c917 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~minos^minos.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 7 775 0.000000 0.000000 2 +home 6 672 0.000000 0.000000 1 +page 6 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +engin 1 297 1.098612 1.098612 20 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +public 2 202 1.609438 3.218876 43 +paper 2 205 1.609438 3.218876 38 +updat 1 191 1.609438 1.609438 41 +madison 4 165 1.791759 7.167036 55 +parallel 3 169 1.791759 5.375277 60 +wisconsin 2 169 1.791759 3.583518 54 +phone 1 175 1.791759 1.791759 45 +algorithm 1 162 1.791759 1.791759 57 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +construct 1 139 1.945910 1.945910 82 +databas 4 122 2.079442 8.317768 86 +schedul 3 119 2.079442 6.238326 85 +postscript 2 131 2.079442 4.158884 90 +report 2 131 2.079442 4.158884 92 +dayton 1 119 2.079442 2.079442 104 +confer 1 126 2.079442 2.079442 100 +technolog 1 131 2.079442 2.079442 102 +assist 1 112 2.197225 2.197225 113 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +technic 2 100 2.302585 4.605170 140 +proceed 1 93 2.397895 2.397895 152 +octob 1 89 2.397895 2.397895 156 +real 1 93 2.397895 2.397895 144 +center 1 88 2.397895 2.397895 158 +resourc 3 81 2.484907 7.454721 172 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +optim 2 79 2.564949 5.129898 197 +decemb 2 80 2.564949 5.129898 215 +june 2 79 2.564949 5.129898 214 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +server 1 76 2.564949 2.564949 204 +free 1 73 2.639057 2.639057 224 +logic 1 71 2.639057 2.639057 230 +multimedia 2 68 2.708050 5.416100 258 +view 1 70 2.708050 2.708050 254 +dept 2 64 2.772589 5.545178 291 +complex 1 64 2.772589 2.772589 269 +abstract 1 62 2.772589 2.772589 276 +juli 1 60 2.833213 2.833213 305 +februari 1 54 2.944439 2.944439 328 +advisor 1 51 2.995732 2.995732 355 +pointer 1 48 3.044522 3.044522 368 +effect 1 46 3.091042 3.091042 385 +continu 1 39 3.258097 3.258097 448 +submit 1 39 3.258097 3.258097 440 +societi 1 40 3.258097 3.258097 456 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +feel 1 37 3.332205 3.332205 483 +multi 1 36 3.367296 3.367296 493 +survei 1 35 3.401197 3.401197 513 +michael 1 35 3.401197 3.401197 514 +queri 3 33 3.433987 10.301961 524 +enhanc 1 26 3.688879 3.688879 644 +sequenti 1 22 3.850148 3.850148 745 +sigmod 2 19 4.007333 8.014666 877 +media 1 19 4.007333 4.007333 861 +dimension 1 18 4.060443 4.060443 909 +stat 1 17 4.110874 4.110874 924 +canada 1 13 4.382027 4.382027 1158 +dbm 1 13 4.382027 4.382027 1136 +probabilist 1 11 4.553877 4.553877 1343 +vldb 1 10 4.653960 4.653960 1470 +candid 1 9 4.753590 4.753590 1606 +yanni 3 8 4.875197 14.625591 1713 +ioannidi 2 8 4.875197 9.750394 1714 +watson 1 8 4.875197 4.875197 1691 +refere 1 7 5.010635 5.010635 1895 +montreal 1 7 5.010635 5.010635 1961 +usaoffic 1 6 5.164786 5.164786 2159 +silberschatz 1 6 5.164786 5.164786 1978 +peek 1 6 5.164786 5.164786 2169 +almaden 1 5 5.347108 5.347108 2511 +informat 1 3 5.857933 5.857933 3839 +mino 6 2 6.263398 37.580388 6208 +garofalaki 4 2 6.263398 25.053592 6209 +patra 2 2 6.263398 12.526796 5537 +ozden 1 2 6.263398 6.263398 5749 +reasearch 1 2 6.263398 6.263398 5538 +hellen 1 2 6.263398 6.263398 6210 +garofalakismino 1 1 6.957497 6.957497 19168 +eduphd 1 1 6.957497 6.957497 19169 +workresearch 1 1 6.957497 6.957497 19170 +theoryeduc 1 1 6.957497 6.957497 19171 +banu 1 1 6.957497 6.957497 19172 +ioannidismor 1 1 6.957497 6.957497 19173 +centerdr 1 1 6.957497 6.957497 19174 +bibliograpi 1 1 6.957497 6.957497 19175 +perpetu 1 1 6.957497 6.957497 19176 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html new file mode 100644 index 00000000..64e8aedc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mjrg^mjrg.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +madison 3 165 1.791759 5.375277 55 +phone 2 175 1.791759 3.583518 45 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +associ 1 93 2.397895 2.397895 151 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +sciencesunivers 1 37 3.332205 3.332205 486 +paradyn 1 9 4.753590 4.753590 1614 +marcelo 2 2 6.263398 12.526796 6199 +sheboygan 1 2 6.263398 6.263398 6198 +gonalv 2 1 6.957497 13.914994 19177 +mjrg 1 1 6.957497 6.957497 19178 +addresswork 1 1 6.957497 6.957497 19179 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html new file mode 100644 index 00000000..d1350e52 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~morgan^morgan.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +interest 1 384 0.693147 0.693147 11 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +group 1 183 1.609438 1.609438 36 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +network 1 168 1.791759 1.791759 61 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +imag 1 91 2.397895 2.397895 161 +west 1 83 2.484907 2.484907 192 +advisor 1 51 2.995732 2.995732 355 +telephon 1 50 3.044522 3.044522 373 +video 1 44 3.135494 3.135494 405 +vision 1 41 3.218876 3.218876 430 +brian 2 38 3.295837 6.591674 466 +streetmadison 1 38 3.295837 3.295837 474 +compress 1 23 3.806662 3.806662 719 +chuck 1 14 4.317488 4.317488 1108 +bandwidth 1 11 4.553877 4.553877 1365 +morgan 2 9 4.753590 9.507180 1484 +conferenc 2 7 5.010635 10.021270 1857 +studentcomput 1 7 5.010635 5.010635 1963 +morgangradu 1 1 6.957497 6.957497 19180 +dyerresearch 1 1 6.957497 6.957497 19181 +interestsvirtu 1 1 6.957497 6.957497 19182 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html new file mode 100644 index 00000000..d3172318 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moshovos^moshovos.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +work 3 380 0.693147 2.079441 9 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +gener 1 220 1.386294 1.386294 27 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +wisconsin 3 169 1.791759 5.375277 54 +data 2 170 1.791759 3.583518 49 +address 1 170 1.791759 1.791759 62 +parallel 1 169 1.791759 1.791759 60 +implement 1 152 1.791759 1.791759 52 +algorithm 1 162 1.791759 1.791759 57 +network 1 168 1.791759 1.791759 61 +support 3 132 1.945910 5.837730 83 +architectur 2 139 1.945910 3.891820 77 +click 1 142 1.945910 1.945910 78 +like 1 132 1.945910 1.945910 81 +postscript 2 131 2.079442 4.158884 90 +report 1 131 2.079442 2.079442 92 +compil 1 122 2.079442 2.079442 96 +spring 1 131 2.079442 2.079442 88 +assist 1 112 2.197225 2.197225 113 +send 1 114 2.197225 2.197225 109 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +mani 2 92 2.397895 4.795790 150 +sinc 1 90 2.397895 2.397895 159 +level 1 87 2.484907 2.484907 180 +chang 1 82 2.484907 2.484907 163 +want 1 79 2.564949 2.564949 199 +write 1 72 2.639057 2.639057 222 +meet 1 72 2.639057 2.639057 229 +nation 1 74 2.639057 2.639057 240 +free 1 73 2.639057 2.639057 224 +degre 1 69 2.708050 2.708050 259 +descript 1 64 2.772589 2.772589 271 +visit 1 63 2.772589 2.772589 288 +copi 1 63 2.772589 2.772589 284 +processor 3 54 2.944439 8.833317 335 +sampl 1 53 2.944439 2.944439 339 +talk 1 53 2.944439 2.944439 336 +instruct 1 53 2.944439 2.944439 332 +found 1 53 2.944439 2.944439 337 +local 1 55 2.944439 2.944439 334 +advisor 1 51 2.995732 2.995732 355 +numer 1 49 3.044522 3.044522 369 +futur 1 41 3.218876 3.218876 427 +york 1 41 3.218876 3.218876 435 +howev 1 41 3.218876 3.218876 422 +editor 1 41 3.218876 3.218876 433 +edit 1 42 3.218876 3.218876 418 +slide 1 38 3.295837 3.295837 467 +sciencesunivers 1 37 3.332205 3.332205 486 +download 2 36 3.367296 6.734592 489 +short 1 36 3.367296 3.367296 499 +obtain 1 33 3.433987 3.433987 534 +depend 1 29 3.583519 3.583519 583 +load 1 28 3.610918 3.610918 601 +bookmark 1 26 3.688879 3.688879 639 +compress 1 23 3.806662 3.806662 719 +instal 1 22 3.850148 3.850148 754 +leav 1 21 3.912023 3.912023 772 +vlsi 1 21 3.912023 3.912023 795 +sure 1 20 3.951244 3.951244 813 +excel 1 19 4.007333 4.007333 868 +transfer 1 16 4.174387 4.174387 967 +balanc 1 14 4.317488 4.317488 1112 +brother 1 13 4.382027 4.382027 1189 +wife 1 13 4.382027 4.382027 1196 +resid 1 10 4.653960 4.653960 1461 +poetri 1 9 4.753590 4.753590 1596 +herefor 1 9 4.753590 4.753590 1483 +multiscalar 2 8 4.875197 9.750394 1783 +dictionari 1 8 4.875197 4.875197 1642 +earn 1 7 5.010635 5.010635 1788 +pipelin 1 7 5.010635 5.010635 1830 +greec 2 6 5.164786 10.329572 2208 +peek 1 6 5.164786 5.164786 2169 +andrea 2 5 5.347108 10.694216 2375 +guri 1 5 5.347108 5.347108 2578 +hyper 1 5 5.347108 5.347108 2435 +kestrel 1 4 5.568345 5.568345 2990 +decoupl 1 4 5.568345 5.568345 2898 +mess 1 4 5.568345 5.568345 2886 +specul 2 3 5.857933 11.715866 3951 +crete 2 3 5.857933 11.715866 3773 +greek 2 3 5.857933 11.715866 3595 +uncompress 1 3 5.857933 5.857933 3177 +moshovo 1 2 6.263398 6.263398 6211 +madisonadvisor 1 2 6.263398 6.263398 6212 +instituteof 1 2 6.263398 6.263398 5507 +architecturethat 1 2 6.263398 6.263398 5876 +hellen 1 2 6.263398 6.263398 6210 +font 1 2 6.263398 6.263398 5845 +moshovosresearch 1 1 6.957497 6.957497 19183 +sohigroup 1 1 6.957497 6.957497 19184 +notese 1 1 6.957497 6.957497 19185 +aroundw 1 1 6.957497 6.957497 19186 +clickheremi 1 1 6.957497 6.957497 19187 +explot 1 1 6.957497 6.957497 19188 +thecour 1 1 6.957497 6.957497 19189 +theopportun 1 1 6.957497 6.957497 19190 +kateveni 1 1 6.957497 6.957497 19191 +viha 1 1 6.957497 6.957497 19192 +resouc 1 1 6.957497 6.957497 19193 +atwww 1 1 6.957497 6.957497 19194 +devil 1 1 6.957497 6.957497 19195 +fraud 1 1 6.957497 6.957497 19196 +centerusenet 1 1 6.957497 6.957497 19197 +afax 1 1 6.957497 6.957497 19198 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html new file mode 100644 index 00000000..5bce26cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~moy^moy.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +updat 3 191 1.609438 4.828314 41 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +contact 2 153 1.791759 3.583518 59 +octob 1 89 2.397895 2.397895 156 +chang 2 82 2.484907 4.969814 163 +second 1 81 2.484907 2.484907 166 +level 1 87 2.484907 2.484907 180 +main 1 67 2.708050 2.708050 256 +back 1 60 2.833213 2.833213 297 +sever 1 56 2.890372 2.890372 322 +variou 1 56 2.890372 2.890372 317 +index 1 56 2.890372 2.890372 309 +friend 1 48 3.044522 3.044522 376 +better 1 45 3.135494 3.135494 401 +keep 1 44 3.135494 3.135494 409 +favorit 1 44 3.135494 3.135494 410 +background 1 25 3.737670 3.737670 664 +navig 1 21 3.912023 3.912023 796 +brief 1 16 4.174387 4.174387 1001 +minor 1 12 4.465908 4.465908 1237 +black 1 10 4.653960 4.653960 1418 +prefer 1 9 4.753590 4.753590 1491 +contrast 1 8 4.875197 4.875197 1637 +older 1 5 5.347108 5.347108 2387 +toni 2 3 5.857933 11.715866 3415 +herear 1 2 6.263398 6.263398 5947 +pagewhat 1 1 6.957497 6.957497 19199 +newoctob 1 1 6.957497 6.957497 19200 +inmadison 1 1 6.957497 6.957497 19201 +informationlast 1 1 6.957497 6.957497 19202 +educopyright 1 1 6.957497 6.957497 19203 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html new file mode 100644 index 00000000..a33cd337 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mreames^mreames.html @@ -0,0 +1,152 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +student 2 343 1.098612 2.197224 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +design 3 213 1.386294 4.158882 25 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +gener 1 220 1.386294 1.386294 27 +updat 1 191 1.609438 1.609438 41 +modifi 1 178 1.609438 1.609438 35 +wisconsin 4 169 1.791759 7.167036 54 +madison 2 165 1.791759 3.583518 55 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +year 2 148 1.945910 3.891820 84 +model 1 145 1.945910 1.945910 69 +databas 3 122 2.079442 6.238326 86 +compil 2 122 2.079442 4.158884 96 +machin 1 129 2.079442 2.079442 95 +dayton 1 119 2.079442 2.079442 104 +spring 1 131 2.079442 2.079442 88 +studi 1 120 2.079442 2.079442 91 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +mathemat 1 108 2.197225 2.197225 123 +well 1 109 2.197225 2.197225 121 +section 1 94 2.397895 2.397895 149 +exam 2 86 2.484907 4.969814 169 +thing 2 84 2.484907 4.969814 189 +educ 1 86 2.484907 2.484907 191 +learn 1 86 2.484907 2.484907 170 +stuff 1 87 2.484907 2.484907 171 +want 2 79 2.564949 5.129898 199 +resum 1 79 2.564949 2.564949 217 +know 1 80 2.564949 2.564949 198 +logic 1 71 2.639057 2.639057 230 +html 1 75 2.639057 2.639057 235 +free 1 73 2.639057 2.639057 224 +line 1 75 2.639057 2.639057 231 +dept 2 64 2.772589 5.545178 291 +previou 1 62 2.772589 2.772589 290 +plai 1 60 2.833213 2.833213 307 +reason 1 57 2.890372 2.890372 318 +summer 1 56 2.890372 2.890372 311 +talk 1 53 2.944439 2.944439 336 +februari 1 54 2.944439 2.944439 328 +finger 1 52 2.995732 2.995732 354 +particular 1 51 2.995732 2.995732 352 +digit 1 52 2.995732 2.995732 348 +telephon 2 50 3.044522 6.089044 373 +life 1 50 3.044522 3.044522 375 +even 2 45 3.135494 6.270988 393 +better 1 45 3.135494 3.135494 401 +third 1 43 3.178054 3.178054 412 +around 1 43 3.178054 3.178054 415 +might 2 41 3.218876 6.437752 426 +examin 1 42 3.218876 3.218876 424 +realli 1 40 3.258097 3.258097 444 +probabl 1 40 3.258097 3.258097 455 +feel 1 37 3.332205 3.332205 483 +product 1 33 3.433987 3.433987 527 +often 1 31 3.496508 3.496508 551 +usual 1 28 3.610918 3.610918 608 +weather 1 28 3.610918 3.610918 618 +campu 1 27 3.637586 3.637586 623 +enjoi 1 26 3.688879 3.688879 660 +notic 1 25 3.737670 3.737670 675 +departmentunivers 1 24 3.761200 3.761200 711 +sometim 1 24 3.761200 3.761200 696 +alwai 1 24 3.761200 3.761200 691 +wish 1 24 3.761200 3.761200 692 +head 1 23 3.806662 3.806662 732 +hierarchi 1 22 3.850148 3.850148 744 +martin 3 21 3.912023 11.736069 794 +exploit 1 20 3.951244 3.951244 836 +exercis 1 19 4.007333 4.007333 842 +concentr 1 18 4.060443 4.060443 906 +ultim 1 17 4.110874 4.110874 943 +doesn 1 15 4.248495 4.248495 1055 +qual 1 15 4.248495 4.248495 1062 +senior 1 14 4.317488 4.317488 1120 +role 1 14 4.317488 4.317488 1101 +affili 1 13 4.382027 4.382027 1194 +basketbal 1 12 4.465908 4.465908 1289 +mountain 1 10 4.653960 4.653960 1456 +bike 1 10 4.653960 4.653960 1468 +mention 1 9 4.753590 4.753590 1569 +qualifi 1 8 4.875197 4.875197 1721 +heart 1 8 4.875197 4.875197 1729 +besid 1 8 4.875197 4.875197 1681 +round 1 8 4.875197 4.875197 1769 +relax 1 6 5.164786 5.164786 2120 +squash 1 6 5.164786 5.164786 2223 +adjust 1 5 5.347108 5.347108 2422 +crucial 1 5 5.347108 5.347108 2384 +frisbe 1 5 5.347108 5.347108 2560 +gone 2 4 5.568345 11.136690 3072 +afraid 1 4 5.568345 5.568345 3053 +poorli 1 4 5.568345 5.568345 2781 +wesleyan 1 3 5.857933 5.857933 3988 +coke 5 2 6.263398 31.316990 5935 +ream 3 2 6.263398 18.790194 4783 +mream 2 2 6.263398 12.526796 4784 +terrain 1 2 6.263398 6.263398 6174 +logicprogram 1 2 6.263398 6.263398 4262 +interestsin 1 2 6.263398 6.263398 6213 +unif 1 2 6.263398 6.263398 5910 +mighti 1 2 6.263398 6.263398 4863 +tomi 1 2 6.263398 6.263398 5846 +mental 1 2 6.263398 6.263398 5802 +yeargradu 1 2 6.263398 6.263398 6015 +poobah 3 1 6.957497 20.872491 19204 +edufal 1 1 6.957497 6.957497 19205 +scheduleresearch 1 1 6.957497 6.957497 19206 +tin 1 1 6.957497 6.957497 19207 +orientedenviron 1 1 6.957497 6.957497 19208 +postscriptand 1 1 6.957497 6.957497 19209 +faint 1 1 6.957497 6.957497 19210 +alink 1 1 6.957497 6.957497 19211 +honorsthesi 1 1 6.957497 6.957497 19212 +poobahlook 1 1 6.957497 6.957497 19213 +dear 1 1 6.957497 6.957497 19214 +tosomeon 1 1 6.957497 6.957497 19215 +youshould 1 1 6.957497 6.957497 19216 +elton 1 1 6.957497 6.957497 19217 +imaginethat 1 1 6.957497 6.957497 19218 +aforement 1 1 6.957497 6.957497 19219 +poobahship 1 1 6.957497 6.957497 19220 +ill 1 1 6.957497 6.957497 19221 +afew 1 1 6.957497 6.957497 19222 +indatabas 1 1 6.957497 6.957497 19223 +inearli 1 1 6.957497 6.957497 19224 +andinfrequ 1 1 6.957497 6.957497 19225 +rapidlyrid 1 1 6.957497 6.957497 19226 +chilliest 1 1 6.957497 6.957497 19227 +helmet 1 1 6.957497 6.957497 19228 +mynot 1 1 6.957497 6.957497 19229 +ilik 1 1 6.957497 6.957497 19230 +librarylast 1 1 6.957497 6.957497 19231 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ new file mode 100644 index 00000000..a6269179 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~mscalar^ @@ -0,0 +1,35 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +project 3 340 1.098612 3.295836 18 +last 1 314 1.098612 1.098612 14 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 3 169 1.791759 5.375277 54 +avail 1 169 1.791759 1.791759 48 +architectur 2 139 1.945910 3.891820 77 +relat 1 139 1.945910 1.945910 68 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +peopl 1 96 2.302585 2.302585 132 +user 1 104 2.302585 2.302585 137 +wide 1 84 2.484907 2.484907 185 +sourc 1 77 2.564949 2.564949 201 +talk 1 53 2.944439 2.944439 336 +local 1 55 2.944439 2.944439 334 +februari 1 54 2.944439 2.944439 328 +given 1 32 3.465736 3.465736 538 +fund 1 21 3.912023 3.912023 805 +multiscalar 3 8 4.875197 14.625591 1783 +sohi 2 6 5.164786 10.329572 2237 +departmentat 1 5 5.347108 5.347108 2513 +guri 1 5 5.347108 5.347108 2578 +contributor 1 2 6.263398 6.263398 6214 +pagewisconsin 1 1 6.957497 6.957497 19232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html new file mode 100644 index 00000000..1fac8709 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~msteele^msteele.html @@ -0,0 +1,135 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +depart 4 457 0.693147 2.772588 12 +system 3 443 0.693147 2.079441 6 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +mail 3 238 1.386294 4.158882 22 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +also 1 259 1.386294 1.386294 28 +oper 3 180 1.609438 4.828314 34 +class 2 199 1.609438 3.218876 37 +list 2 201 1.609438 3.218876 39 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +note 1 142 1.945910 1.945910 67 +year 1 148 1.945910 1.945910 84 +perform 1 143 1.945910 1.945910 74 +introduct 2 126 2.079442 4.158884 87 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +world 1 115 2.197225 2.197225 126 +technic 1 100 2.302585 2.302585 140 +question 2 91 2.397895 4.795790 141 +center 1 88 2.397895 2.397895 158 +section 1 94 2.397895 2.397895 149 +pictur 1 89 2.397895 2.397895 160 +build 1 85 2.484907 2.484907 184 +school 1 84 2.484907 2.484907 188 +start 1 83 2.484907 2.484907 173 +member 1 84 2.484907 2.484907 165 +wide 1 84 2.484907 2.484907 185 +come 3 78 2.564949 7.694847 202 +april 1 77 2.564949 2.564949 196 +know 1 80 2.564949 2.564949 198 +intellig 1 72 2.639057 2.639057 225 +receiv 1 66 2.708050 2.708050 244 +degre 1 69 2.708050 2.708050 259 +test 1 66 2.708050 2.708050 252 +artifici 1 63 2.772589 2.772589 280 +back 1 60 2.833213 2.833213 297 +semest 1 58 2.890372 2.890372 312 +undergradu 1 54 2.944439 2.944439 338 +friend 2 48 3.044522 6.089044 376 +frequent 2 49 3.044522 6.089044 367 +right 1 48 3.044522 3.044522 363 +still 1 50 3.044522 3.044522 362 +favorit 2 44 3.135494 6.270988 410 +algebra 1 45 3.135494 3.135494 394 +around 2 43 3.178054 6.356108 415 +vision 1 41 3.218876 3.218876 430 +hand 1 37 3.332205 3.332205 475 +michael 1 35 3.401197 3.401197 514 +eduoffic 1 33 3.433987 3.433987 531 +photo 1 31 3.496508 3.496508 561 +ask 2 28 3.610918 7.221836 597 +univ 1 28 3.610918 3.610918 617 +hope 1 28 3.610918 3.610918 610 +administr 1 27 3.637586 3.637586 628 +comp 1 26 3.688879 3.688879 650 +sport 1 25 3.737670 3.737670 683 +mike 1 24 3.761200 3.761200 703 +sometim 1 24 3.761200 3.761200 696 +theunivers 1 21 3.912023 3.912023 797 +lower 1 18 4.060443 4.060443 886 +stat 1 17 4.110874 4.110874 924 +bachelor 1 17 4.110874 4.110874 957 +chuck 1 14 4.317488 4.317488 1108 +dave 1 14 4.317488 4.317488 1098 +sai 1 13 4.382027 4.382027 1175 +suit 1 13 4.382027 4.382027 1129 +land 1 12 4.465908 4.465908 1273 +mari 1 12 4.465908 4.465908 1266 +touch 1 12 4.465908 4.465908 1288 +host 1 11 4.553877 4.553877 1306 +folk 1 9 4.753590 4.753590 1597 +joel 1 8 4.875197 4.875197 1698 +usenet 2 7 5.010635 10.021270 1839 +corner 1 7 5.010635 5.010635 1909 +maryland 3 6 5.164786 15.494358 2140 +gzip 1 6 5.164786 5.164786 2117 +billi 1 5 5.347108 5.347108 2404 +madisoncomput 1 5 5.347108 5.347108 2391 +steel 3 4 5.568345 16.705035 2818 +chees 1 4 5.568345 5.568345 3090 +kill 1 4 5.568345 5.568345 3000 +sit 1 3 5.857933 5.857933 3953 +stamp 1 3 5.857933 5.857933 3678 +artificialintellig 1 3 5.857933 5.857933 3608 +narrow 1 3 5.857933 5.857933 3807 +predat 1 3 5.857933 5.857933 3135 +forgot 1 2 6.263398 6.263398 4769 +linksmi 1 2 6.263398 6.263398 6215 +barri 1 2 6.263398 6.263398 5149 +eduunivers 1 2 6.263398 6.263398 6216 +homepagemik 1 1 6.957497 6.957497 19233 +homepagemsteel 1 1 6.957497 6.957497 19234 +struggl 1 1 6.957497 6.957497 19235 +sometimearound 1 1 6.957497 6.957497 19236 +motto 1 1 6.957497 6.957497 19237 +freezein 1 1 6.957497 6.957497 19238 +graduateinstructor 1 1 6.957497 6.957497 19239 +scomput 1 1 6.957497 6.957497 19240 +publicationsgrindston 1 1 6.957497 6.957497 19241 +jefferyk 1 1 6.957497 6.957497 19242 +hollingsworth 1 1 6.957497 6.957497 19243 +reportc 1 1 6.957497 6.957497 19244 +postscriptfil 1 1 6.957497 6.957497 19245 +semesterc 1 1 6.957497 6.957497 19246 +vernonc 1 1 6.957497 6.957497 19247 +dyermi 1 1 6.957497 6.957497 19248 +pagesinform 1 1 6.957497 6.957497 19249 +gettingin 1 1 6.957497 6.957497 19250 +marylandwhom 1 1 6.957497 6.957497 19251 +teamssom 1 1 6.957497 6.957497 19252 +listth 1 1 6.957497 6.957497 19253 +listi 1 1 6.957497 6.957497 19254 +thefruit 1 1 6.957497 6.957497 19255 +ofmaryland 1 1 6.957497 6.957497 19256 +insidejok 1 1 6.957497 6.957497 19257 +andnow 1 1 6.957497 6.957497 19258 +someinfrar 1 1 6.957497 6.957497 19259 +looklik 1 1 6.957497 6.957497 19260 +infrar 1 1 6.957497 6.957497 19261 +memik 1 1 6.957497 6.957497 19262 +steelemsteel 1 1 6.957497 6.957497 19263 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html new file mode 100644 index 00000000..9eda7052 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~myuin^myuin.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +section 1 94 2.397895 2.397895 149 +want 1 79 2.564949 2.564949 199 +visit 2 63 2.772589 5.545178 288 +colleg 1 61 2.833213 2.833213 300 +might 1 41 3.218876 3.218876 426 +mayb 1 15 4.248495 4.248495 1014 +maryland 1 6 5.164786 5.164786 2140 +park 1 6 5.164786 5.164786 2218 +maria 1 4 5.568345 5.568345 2954 +pagemaria 1 1 6.957497 6.957497 19264 +pagehow 1 1 6.957497 6.957497 19265 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html new file mode 100644 index 00000000..efcc941f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naim^naim.html @@ -0,0 +1,80 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +work 2 380 0.693147 1.386294 9 +research 2 431 0.693147 1.386294 10 +program 1 374 0.693147 0.693147 7 +project 1 340 1.098612 1.098612 18 +time 1 293 1.098612 1.098612 17 +also 1 259 1.386294 1.386294 28 +wisc 1 242 1.386294 1.386294 33 +parallel 1 169 1.791759 1.791759 60 +read 1 154 1.791759 1.791759 47 +like 3 132 1.945910 5.837730 81 +area 1 144 1.945910 1.945910 80 +perform 1 143 1.945910 1.945910 74 +year 1 148 1.945910 1.945910 84 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +check 1 115 2.197225 2.197225 118 +part 1 98 2.302585 2.302585 129 +peopl 1 96 2.302585 2.302585 132 +book 1 99 2.302585 2.302585 131 +pictur 1 89 2.397895 2.397895 160 +member 1 84 2.484907 2.484907 165 +good 1 77 2.564949 2.564949 200 +main 1 67 2.708050 2.708050 256 +locat 1 59 2.833213 2.833213 303 +plai 1 60 2.833213 2.833213 307 +finger 1 52 2.995732 2.995732 354 +visual 1 48 3.044522 3.044522 372 +around 1 43 3.178054 3.178054 415 +music 1 42 3.218876 3.218876 436 +howev 1 41 3.218876 3.218876 422 +staff 1 36 3.367296 3.367296 490 +known 1 24 3.761200 3.761200 702 +finish 1 22 3.850148 3.850148 748 +born 1 21 3.912023 3.912023 798 +fact 1 21 3.912023 3.912023 780 +watch 1 21 3.912023 3.912023 789 +citi 1 19 4.007333 4.007333 874 +spend 1 19 4.007333 4.007333 850 +beauti 1 18 4.060443 4.060443 912 +stori 1 14 4.317488 4.317488 1087 +central 1 13 4.382027 4.382027 1160 +cook 1 10 4.653960 4.653960 1464 +paradyn 1 9 4.753590 4.753590 1614 +undergrad 1 9 4.753590 4.753590 1589 +guitar 2 8 4.875197 9.750394 1758 +simon 1 8 4.875197 4.875197 1697 +capit 1 7 5.010635 5.010635 1957 +apart 1 7 5.010635 5.010635 1936 +antonio 1 6 5.164786 5.164786 2186 +england 1 5 5.347108 5.347108 2557 +million 1 5 5.347108 5.347108 2495 +western 1 4 5.568345 5.568345 3062 +basebal 1 4 5.568345 5.568345 2969 +myph 1 3 5.857933 5.857933 3880 +popul 1 3 5.857933 5.857933 3235 +pleaseclick 1 2 6.263398 6.263398 5432 +venezuela 5 1 6.957497 34.787485 19266 +barquisimeto 3 1 6.957497 20.872491 19267 +naim 2 1 6.957497 13.914994 19268 +oscar 1 1 6.957497 6.957497 19269 +bienvenido 1 1 6.957497 6.957497 19270 +southampton 1 1 6.957497 6.957497 19271 +universidad 1 1 6.957497 6.957497 19272 +bolivar 1 1 6.957497 6.957497 19273 +caraca 1 1 6.957497 6.957497 19274 +barquisimetoi 1 1 6.957497 6.957497 19275 +ofabout 1 1 6.957497 6.957497 19276 +playclass 1 1 6.957497 6.957497 19277 +excellentmaestro 1 1 6.957497 6.957497 19278 +rodrigo 1 1 6.957497 6.957497 19279 +riera 1 1 6.957497 6.957497 19280 +lauro 1 1 6.957497 6.957497 19281 +sherlock 1 1 6.957497 6.957497 19282 +holm 1 1 6.957497 6.957497 19283 +beati 1 1 6.957497 6.957497 19284 +mundo 1 1 6.957497 6.957497 19285 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html new file mode 100644 index 00000000..6bd06f78 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~natassa^natassa.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +find 1 111 2.197225 2.197225 111 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +pictur 1 89 2.397895 2.397895 160 +want 1 79 2.564949 2.564949 199 +street 1 63 2.772589 2.772589 293 +import 1 65 2.772589 2.772589 282 +guid 1 63 2.772589 2.772589 267 +visit 1 63 2.772589 2.772589 288 +realli 2 40 3.258097 6.516194 444 +notic 1 25 3.737670 3.737670 675 +nice 1 20 3.951244 3.951244 809 +georg 1 16 4.174387 4.174387 994 +worth 1 11 4.553877 4.553877 1294 +alex 1 6 5.164786 5.164786 2130 +greek 1 3 5.857933 5.857933 3595 +decent 1 2 6.263398 6.263398 5542 +rochest 1 2 6.263398 6.263398 6142 +anastassia 2 1 6.957497 13.914994 19286 +ailamaki 2 1 6.957497 13.914994 19287 +islandsar 1 1 6.957497 6.957497 19288 +natassa 1 1 6.957497 6.957497 19289 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html new file mode 100644 index 00000000..d1471dce --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~naughton^naughton.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +system 4 443 0.693147 2.772588 6 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +current 2 284 1.098612 2.197224 21 +wisc 1 242 1.386294 1.386294 33 +includ 2 208 1.609438 3.218876 42 +public 2 202 1.609438 3.218876 43 +parallel 3 169 1.791759 5.375277 60 +algorithm 3 162 1.791759 5.375277 57 +data 2 170 1.791759 3.583518 49 +develop 1 174 1.791759 1.791759 53 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +object 4 138 1.945910 7.783640 79 +relat 3 139 1.945910 5.837730 68 +perform 2 143 1.945910 3.891820 74 +area 1 144 1.945910 1.945910 80 +databas 5 122 2.079442 10.397210 86 +analysi 2 124 2.079442 4.158884 98 +confer 2 126 2.079442 4.158884 100 +intern 2 108 2.197225 4.394450 128 +techniqu 2 99 2.302585 4.605170 138 +proceed 1 93 2.397895 2.397895 152 +larg 2 82 2.484907 4.969814 168 +david 1 71 2.639057 2.639057 232 +goal 1 66 2.708050 2.708050 250 +main 1 67 2.708050 2.708050 256 +improv 1 62 2.772589 2.772589 289 +three 1 54 2.944439 2.944439 330 +submit 1 39 3.258097 3.258097 440 +multi 1 36 3.367296 3.367296 493 +michael 1 35 3.401197 3.401197 514 +storag 2 31 3.496508 6.993016 553 +arrai 1 27 3.637586 3.637586 627 +toward 1 25 3.737670 3.737670 668 +hierarchi 1 22 3.850148 3.850148 744 +prepar 1 20 3.951244 3.951244 824 +benchmark 2 19 4.007333 8.014666 859 +dimension 2 18 4.060443 8.120886 909 +estim 1 17 4.110874 4.110874 930 +spatial 1 16 4.174387 4.174387 988 +ramakrishnan 1 16 4.174387 4.174387 972 +indic 1 15 4.248495 4.248495 1013 +dbm 2 13 4.382027 8.764054 1136 +overal 1 12 4.465908 4.465908 1254 +workload 1 12 4.465908 4.465908 1210 +gupta 1 12 4.465908 4.465908 1241 +raghu 1 12 4.465908 4.465908 1212 +dewitt 1 12 4.465908 4.465908 1270 +jeffrei 1 9 4.753590 4.753590 1612 +presenc 1 8 4.875197 4.875197 1671 +carei 1 8 4.875197 4.875197 1781 +bombai 2 7 5.010635 10.021270 1972 +prasad 2 6 5.164786 10.329572 2126 +eduresearch 1 6 5.164786 5.164786 2205 +aggreg 1 6 5.164786 5.164786 2219 +deshpand 2 5 5.347108 10.694216 2431 +eas 1 5 5.347108 5.347108 2267 +ofinterest 1 5 5.347108 5.347108 2323 +ashish 1 5 5.347108 5.347108 2473 +tuft 1 5 5.347108 5.347108 2575 +multidimension 2 4 5.568345 11.136690 3091 +cube 1 4 5.568345 5.568345 2940 +amit 1 4 5.568345 5.568345 2972 +ramasami 1 4 5.568345 5.568345 3088 +shah 1 4 5.568345 5.568345 2814 +zhao 1 4 5.568345 5.568345 2699 +kristin 1 4 5.568345 5.568345 3089 +mumbai 2 3 5.857933 11.715866 4029 +surpass 1 3 5.857933 5.857933 3247 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +johann 1 3 5.857933 5.857933 3758 +moham 1 3 5.857933 5.857933 3848 +asgarian 1 3 5.857933 5.857933 3447 +andprocess 1 2 6.263398 6.263398 4925 +agarw 1 2 6.263398 6.263398 5352 +rakesh 1 2 6.263398 6.263398 6017 +agraw 1 2 6.263398 6.263398 4536 +molap 1 2 6.263398 6.263398 6217 +naughtonjeffrei 1 1 6.957497 6.957497 19290 +naughtonnaughton 1 1 6.957497 6.957497 19291 +interestsolap 1 1 6.957497 6.957497 19292 +relationaldbm 1 1 6.957497 6.957497 19293 +ofdatabas 1 1 6.957497 6.957497 19294 +inperform 1 1 6.957497 6.957497 19295 +ofmulti 1 1 6.957497 6.957497 19296 +computingth 1 1 6.957497 6.957497 19297 +valuedattribut 1 1 6.957497 6.957497 19298 +withsameet 1 1 6.957497 6.957497 19299 +sunita 1 1 6.957497 6.957497 19300 +sarawagi 1 1 6.957497 6.957497 19301 +thend 1 1 6.957497 6.957497 19302 +aggregatesin 1 1 6.957497 6.957497 19303 +bucki 1 1 6.957497 6.957497 19304 +gerhk 1 1 6.957497 6.957497 19305 +dhaval 1 1 6.957497 6.957497 19306 +withyihong 1 1 6.957497 6.957497 19307 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html new file mode 100644 index 00000000..c1f70749 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~navin^navin.html @@ -0,0 +1,40 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +address 1 170 1.791759 1.791759 62 +area 1 144 1.945910 1.945910 80 +databas 1 122 2.079442 2.079442 86 +look 1 107 2.197225 2.197225 115 +stuff 1 87 2.484907 2.484907 171 +thing 1 84 2.484907 2.484907 189 +optim 1 79 2.564949 2.564949 197 +david 1 71 2.639057 2.639057 232 +plan 1 65 2.772589 2.772589 272 +explor 1 58 2.890372 2.890372 324 +archiv 1 49 3.044522 3.044522 364 +could 1 46 3.091042 3.091042 383 +better 1 45 3.135494 3.135494 401 +queri 1 33 3.433987 3.433987 524 +bookmark 1 26 3.688879 3.688879 639 +indian 1 22 3.850148 3.850148 769 +among 1 21 3.912023 3.912023 781 +noth 1 11 4.553877 4.553877 1328 +song 1 11 4.553877 4.553877 1380 +paradis 1 8 4.875197 4.875197 1782 +customiz 1 4 5.568345 5.568345 2966 +hindi 1 3 5.857933 5.857933 3753 +navin 2 2 6.263398 12.526796 5351 +madisonadvisor 1 2 6.263398 6.263398 6212 +dewittresearch 1 2 6.263398 6.263398 6185 +kabranavin 1 1 6.957497 6.957497 19308 +kabragradu 1 1 6.957497 6.957497 19309 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html new file mode 100644 index 00000000..52d78898 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~newhall^newhall.html @@ -0,0 +1,29 @@ +term, tf, in documents count, idf, tfidf, wordid +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +group 1 183 1.609438 1.609438 36 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +perform 2 143 1.945910 3.891820 74 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +analysi 1 124 2.079442 2.079442 98 +pictur 1 89 2.397895 2.397895 160 +chang 1 82 2.484907 2.484907 163 +java 1 70 2.708050 2.708050 248 +august 1 66 2.708050 2.708050 257 +telephon 1 50 3.044522 3.044522 373 +scalabl 1 24 3.761200 3.761200 705 +predict 1 19 4.007333 4.007333 855 +bart 1 9 4.753590 4.753590 1559 +newhal 2 1 6.957497 13.914994 19310 +newhalltia 1 1 6.957497 6.957497 19311 +paradynadvisor 1 1 6.957497 6.957497 19312 +millermummi 1 1 6.957497 6.957497 19313 +guanajuato 1 1 6.957497 6.957497 19314 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html new file mode 100644 index 00000000..a68e63cc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~nhall^nhall.html @@ -0,0 +1,17 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +project 1 340 1.098612 1.098612 18 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +object 1 138 1.945910 1.945910 79 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +scalabl 1 24 3.761200 3.761200 705 +repositori 1 17 4.110874 4.110874 932 +heterogen 1 14 4.317488 4.317488 1090 +nanci 1 12 4.465908 4.465908 1256 +shore 1 11 4.553877 4.553877 1377 +hallcomput 1 1 6.957497 6.957497 19315 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html new file mode 100644 index 00000000..97593098 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^olvi.html @@ -0,0 +1,208 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 10 775 0.000000 0.000000 2 +scienc 6 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +program 24 374 0.693147 16.635528 7 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +inform 3 412 0.693147 2.079441 8 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +current 2 284 1.098612 2.197224 21 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +gener 2 220 1.386294 2.772588 27 +email 1 220 1.386294 1.386294 29 +softwar 1 220 1.386294 1.386294 30 +group 3 183 1.609438 4.828314 36 +public 2 202 1.609438 3.218876 43 +paper 2 205 1.609438 3.218876 38 +class 1 199 1.609438 1.609438 37 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +wisconsin 3 169 1.791759 5.375277 54 +parallel 3 169 1.791759 5.375277 60 +applic 2 170 1.791759 3.583518 56 +madison 1 165 1.791759 1.791759 55 +distribut 1 162 1.791759 1.791759 51 +algorithm 1 162 1.791759 1.791759 57 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +problem 7 147 1.945910 13.621370 75 +process 2 142 1.945910 3.891820 72 +professor 1 137 1.945910 1.945910 76 +report 22 131 2.079442 45.747724 92 +machin 5 129 2.079442 10.397210 95 +dayton 1 119 2.079442 2.079442 104 +mathemat 28 108 2.197225 61.522300 123 +theori 1 111 2.197225 2.197225 127 +topic 1 114 2.197225 2.197225 110 +well 1 109 2.197225 2.197225 121 +specif 1 106 2.197225 2.197225 106 +technic 20 100 2.302585 46.051700 140 +advanc 2 99 2.302585 4.605170 130 +techniqu 1 99 2.302585 2.302585 138 +octob 3 89 2.397895 7.193685 156 +proceed 2 93 2.397895 4.795790 152 +center 1 88 2.397895 2.397895 158 +mani 1 92 2.397895 2.397895 150 +real 1 93 2.397895 2.397895 144 +select 1 91 2.397895 2.397895 154 +learn 5 86 2.484907 12.424535 170 +novemb 5 81 2.484907 12.424535 179 +journal 3 83 2.484907 7.454721 183 +member 1 84 2.484907 2.484907 165 +optim 9 79 2.564949 23.084541 197 +decemb 4 80 2.564949 10.259796 215 +method 3 80 2.564949 7.694847 213 +april 2 77 2.564949 5.129898 196 +appear 2 78 2.564949 5.129898 210 +june 2 79 2.564949 5.129898 214 +solv 2 73 2.639057 5.278114 234 +summari 1 73 2.639057 2.639057 237 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +august 7 66 2.708050 18.956350 257 +view 3 70 2.708050 8.124150 254 +street 5 63 2.772589 13.862945 293 +function 2 62 2.772589 5.545178 275 +januari 2 62 2.772589 5.545178 264 +septemb 2 65 2.772589 5.545178 274 +result 1 65 2.772589 2.772589 281 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +new 1 64 2.772589 2.772589 262 +juli 6 60 2.833213 16.999278 305 +publish 1 57 2.890372 2.890372 326 +februari 2 54 2.944439 5.888878 328 +telephon 1 50 3.044522 3.044522 373 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +california 1 46 3.091042 3.091042 388 +linear 5 41 3.218876 16.094380 431 +editor 2 41 3.218876 6.437752 433 +press 1 42 3.218876 3.218876 419 +submit 3 39 3.258097 9.774291 440 +error 1 40 3.258097 3.258097 449 +paul 1 38 3.295837 3.295837 471 +download 1 36 3.367296 3.367296 489 +global 2 34 3.401197 6.802394 520 +bibliographi 1 34 3.401197 3.401197 518 +john 1 33 3.433987 3.433987 532 +toler 1 33 3.433987 3.433987 533 +neural 3 30 3.555348 10.666044 578 +rang 1 30 3.555348 3.555348 565 +cluster 1 28 3.610918 3.610918 612 +revis 8 26 3.688879 29.511032 640 +bound 2 26 3.688879 7.377758 659 +constraint 2 26 3.688879 7.377758 636 +aspect 2 25 3.737670 7.475340 663 +accur 1 25 3.737670 3.737670 680 +proof 1 23 3.806662 3.806662 720 +variabl 1 23 3.806662 3.806662 715 +equat 1 23 3.806662 3.806662 724 +verlag 1 22 3.850148 3.850148 751 +period 1 22 3.850148 3.850148 743 +chen 3 21 3.912023 11.736069 791 +siam 1 21 3.912023 3.912023 800 +predict 1 19 4.007333 4.007333 855 +eric 1 19 4.007333 4.007333 870 +separ 1 19 4.007333 4.007333 844 +minim 5 18 4.060443 20.302215 887 +differenti 1 17 4.110874 4.110874 921 +germani 1 17 4.110874 4.110874 946 +hybrid 1 15 4.248495 4.248495 1057 +nonlinear 3 14 4.317488 12.952464 1107 +train 1 14 4.317488 4.317488 1066 +francisco 1 14 4.317488 4.317488 1095 +nick 2 13 4.382027 8.764054 1180 +context 1 13 4.382027 4.382027 1153 +individu 1 13 4.382027 4.382027 1126 +broad 1 11 4.553877 4.553877 1302 +rich 1 10 4.653960 4.653960 1396 +strongli 1 10 4.653960 4.653960 1406 +penalti 1 10 4.653960 4.653960 1405 +mangasarian 20 9 4.753590 95.071800 1570 +pose 1 9 4.753590 4.753590 1535 +morgan 1 9 4.753590 4.753590 1484 +converg 3 7 5.010635 15.031905 1844 +smooth 3 7 5.010635 15.031905 1855 +harvard 1 7 5.010635 5.010635 1926 +fischer 1 7 5.010635 5.010635 1893 +serial 1 7 5.010635 5.010635 1975 +olvi 4 6 5.164786 20.659144 2109 +inequ 2 6 5.164786 10.329572 2113 +constrain 1 6 5.164786 5.164786 2042 +strong 1 6 5.164786 5.164786 2029 +mix 1 6 5.164786 5.164786 2200 +determinist 1 6 5.164786 5.164786 2034 +bradlei 3 5 5.347108 16.041324 2554 +variat 1 5 5.347108 5.347108 2248 +kaufmann 1 5 5.347108 5.347108 2254 +convex 2 4 5.568345 11.136690 2807 +concav 2 4 5.568345 11.136690 2808 +diagnosi 1 4 5.568345 5.568345 3027 +nonmonoton 1 4 5.568345 5.568345 3023 +net 1 4 5.568345 5.568345 2741 +complementar 6 3 5.857933 35.147598 3999 +cancer 3 3 5.857933 17.573799 4032 +breast 2 3 5.857933 11.715866 4033 +backpropag 2 3 5.857933 11.715866 3507 +neumann 1 3 5.857933 5.857933 3720 +programsand 1 3 5.857933 5.857933 3111 +programmingtechniqu 1 3 5.857933 5.857933 3113 +diagnos 1 3 5.857933 5.857933 3968 +diagnost 1 3 5.857933 5.857933 3833 +baltimor 1 3 5.857933 5.857933 3809 +chronolog 1 3 5.857933 5.857933 4034 +wolberg 2 2 6.263398 12.526796 6218 +perturb 2 2 6.263398 12.526796 6075 +interestsin 1 2 6.263398 6.263398 6213 +spectrum 1 2 6.263398 6.263398 5405 +applicationsto 1 2 6.263398 6.263398 4254 +computer 1 2 6.263398 6.263398 6219 +linearli 1 2 6.263398 6.263398 6220 +qualif 1 2 6.263398 6.263398 6059 +prognost 1 2 6.263398 6.263398 6221 +polyhedr 1 2 6.263398 6.263398 5412 +festschrift 1 2 6.263398 6.263398 6141 +klau 1 2 6.263398 6.263398 4999 +internationalsymposium 1 2 6.263398 6.263398 6032 +plenum 1 2 6.263398 6.263398 6036 +prognosi 1 2 6.263398 6.263398 6222 +chunhui 3 1 6.957497 20.872491 19316 +misclassif 2 1 6.957497 13.914994 19317 +solodov 2 1 6.957497 13.914994 19318 +effectivecomputation 1 1 6.957497 6.957497 19319 +encompassestheoret 1 1 6.957497 6.957497 19320 +parallelgradi 1 1 6.957497 6.957497 19321 +problemsa 1 1 6.957497 6.957497 19322 +animport 1 1 6.957497 6.957497 19323 +ahighli 1 1 6.957497 6.957497 19324 +useat 1 1 6.957497 6.957497 19325 +hospit 1 1 6.957497 6.957497 19326 +solodova 1 1 6.957497 6.957497 19327 +descent 1 1 6.957497 6.957497 19328 +monotonecomplementar 1 1 6.957497 6.957497 19329 +jong 1 1 6.957497 6.957497 19330 +pangexact 1 1 6.957497 6.957497 19331 +programswith 1 1 6.957497 6.957497 19332 +mangasarianmathemat 1 1 6.957497 6.957497 19333 +miningmathemat 1 1 6.957497 6.957497 19334 +mangasarianerror 1 1 6.957497 6.957497 19335 +nondifferenti 1 1 6.957497 6.957497 19336 +slater 1 1 6.957497 6.957497 19337 +ritter 1 1 6.957497 6.957497 19338 +riedmuel 1 1 6.957497 6.957497 19339 +schaeffler 1 1 6.957497 6.957497 19340 +physica 1 1 6.957497 6.957497 19341 +siag 1 1 6.957497 6.957497 19342 +bilinear 1 1 6.957497 6.957497 19343 +cowan 1 1 6.957497 6.957497 19344 +tesauro 1 1 6.957497 6.957497 19345 +alspector 1 1 6.957497 6.957497 19346 +inequalitiesand 1 1 6.957497 6.957497 19347 +vianonmonoton 1 1 6.957497 6.957497 19348 +minimn 1 1 6.957497 6.957497 19349 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html new file mode 100644 index 00000000..6802bcb0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^cancer.html @@ -0,0 +1,422 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 13 775 0.000000 0.000000 2 +page 9 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +program 9 374 0.693147 6.238323 7 +research 8 431 0.693147 5.545176 10 +work 4 380 0.693147 2.772588 9 +depart 3 457 0.693147 2.079441 12 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +us 10 329 1.098612 10.986120 16 +time 7 293 1.098612 7.690284 17 +student 1 343 1.098612 1.098612 19 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +link 3 247 1.386294 4.158882 24 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +softwar 1 220 1.386294 1.386294 30 +email 1 220 1.386294 1.386294 29 +also 1 259 1.386294 1.386294 28 +washington 1 236 1.386294 1.386294 32 +paper 2 205 1.609438 3.218876 38 +group 2 183 1.609438 3.218876 36 +includ 1 208 1.609438 1.609438 42 +oper 1 180 1.609438 1.609438 34 +base 4 165 1.791759 7.167036 50 +data 4 170 1.791759 7.167036 49 +wisconsin 3 169 1.791759 5.375277 54 +madison 3 165 1.791759 5.375277 55 +algorithm 2 162 1.791759 3.583518 57 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +relat 4 139 1.945910 7.783640 68 +construct 3 139 1.945910 5.837730 82 +year 3 148 1.945910 5.837730 84 +process 2 142 1.945910 3.891820 72 +problem 2 147 1.945910 3.891820 75 +perform 1 143 1.945910 1.945910 74 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +first 1 140 1.945910 1.945910 71 +model 1 145 1.945910 1.945910 69 +machin 6 129 2.079442 12.476652 95 +analysi 4 124 2.079442 8.317768 98 +provid 3 121 2.079442 6.238326 94 +report 3 131 2.079442 6.238326 92 +postscript 2 131 2.079442 4.158884 90 +confer 2 126 2.079442 4.158884 100 +studi 1 120 2.079442 2.079442 91 +high 1 130 2.079442 2.079442 101 +pleas 2 113 2.197225 4.394450 114 +mathemat 2 108 2.197225 4.394450 123 +intern 2 108 2.197225 4.394450 128 +theori 1 111 2.197225 2.197225 127 +user 2 104 2.302585 4.605170 137 +technic 2 100 2.302585 4.605170 140 +take 1 97 2.302585 2.302585 134 +need 1 98 2.302585 2.302585 135 +text 1 98 2.302585 2.302585 133 +imag 9 91 2.397895 21.581055 161 +proceed 2 93 2.397895 4.795790 152 +section 1 94 2.397895 2.397895 149 +follow 1 92 2.397895 2.397895 143 +grade 1 90 2.397895 2.397895 142 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +center 1 88 2.397895 2.397895 158 +learn 7 86 2.484907 17.394349 170 +journal 2 83 2.484907 4.969814 183 +second 1 81 2.484907 2.484907 166 +solut 1 82 2.484907 2.484907 162 +larg 1 82 2.484907 2.484907 168 +librari 1 87 2.484907 2.484907 181 +institut 1 84 2.484907 2.484907 187 +april 3 77 2.564949 7.694847 196 +method 2 80 2.564949 5.129898 213 +good 1 77 2.564949 2.564949 200 +sourc 1 77 2.564949 2.564949 201 +exampl 1 77 2.564949 2.564949 195 +optim 1 79 2.564949 2.564949 197 +june 1 79 2.564949 2.564949 214 +free 4 73 2.639057 10.556228 224 +appli 2 71 2.639057 5.278114 226 +addit 1 74 2.639057 2.639057 228 +materi 1 75 2.639057 2.639057 221 +workshop 1 71 2.639057 2.639057 239 +nation 1 74 2.639057 2.639057 240 +august 2 66 2.708050 5.416100 257 +goal 1 66 2.708050 2.708050 250 +abstract 9 62 2.772589 24.953301 276 +street 8 63 2.772589 22.180712 293 +result 6 65 2.772589 16.635534 281 +new 6 64 2.772589 16.635534 262 +januari 3 62 2.772589 8.317767 264 +copi 1 63 2.772589 2.772589 284 +prof 1 64 2.772589 2.772589 273 +interact 1 62 2.772589 2.772589 270 +previou 1 62 2.772589 2.772589 290 +function 1 62 2.772589 2.772589 275 +collect 1 65 2.772589 2.772589 268 +improv 1 62 2.772589 2.772589 289 +septemb 1 65 2.772589 2.772589 274 +march 5 61 2.833213 14.166065 295 +content 1 59 2.833213 2.833213 302 +simpl 1 60 2.833213 2.833213 298 +automat 1 61 2.833213 2.833213 306 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +variou 2 56 2.890372 5.780744 317 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +point 1 58 2.890372 2.890372 319 +detail 1 57 2.890372 2.890372 321 +sampl 6 53 2.944439 17.666634 339 +local 2 55 2.944439 5.888878 334 +allow 2 53 2.944439 5.888878 333 +found 1 53 2.944439 2.944439 337 +suggest 1 53 2.944439 2.944439 331 +case 7 51 2.995732 20.970124 351 +tabl 1 51 2.995732 2.995732 346 +digit 1 52 2.995732 2.995732 348 +date 1 51 2.995732 2.995732 344 +particular 1 51 2.995732 2.995732 352 +approach 2 48 3.044522 6.089044 366 +visual 1 48 3.044522 3.044522 372 +pointer 1 48 3.044522 3.044522 368 +right 1 48 3.044522 3.044522 363 +format 1 48 3.044522 3.044522 356 +numer 1 49 3.044522 3.044522 369 +without 1 50 3.044522 3.044522 370 +friend 1 48 3.044522 3.044522 376 +featur 5 46 3.091042 15.455210 386 +could 2 46 3.091042 6.182084 383 +describ 1 45 3.135494 3.135494 400 +better 1 45 3.135494 3.135494 401 +netscap 1 44 3.135494 3.135494 395 +term 2 43 3.178054 6.356108 411 +long 1 43 3.178054 3.178054 413 +linear 5 41 3.218876 16.094380 431 +press 2 42 3.218876 6.437752 419 +vision 1 41 3.218876 3.218876 430 +examin 1 42 3.218876 3.218876 424 +probabl 6 40 3.258097 19.548582 455 +small 2 39 3.258097 6.516194 447 +submit 2 39 3.258097 6.516194 440 +societi 1 40 3.258097 3.258097 456 +author 1 39 3.258097 3.258097 450 +slide 4 38 3.295837 13.183348 467 +seminar 1 38 3.295837 3.295837 470 +mean 2 37 3.332205 6.664410 477 +ofth 1 36 3.367296 3.367296 491 +procedur 1 36 3.367296 3.367296 488 +download 1 36 3.367296 3.367296 489 +approxim 5 35 3.401197 17.005985 509 +bibliographi 1 34 3.401197 3.401197 518 +singl 1 34 3.401197 3.401197 510 +return 1 34 3.401197 3.401197 502 +print 1 34 3.401197 3.401197 503 +tech 1 35 3.401197 3.401197 515 +board 1 33 3.433987 3.433987 528 +obtain 1 33 3.433987 3.433987 534 +collabor 2 32 3.465736 6.931472 543 +human 2 32 3.465736 6.931472 546 +taken 1 31 3.496508 3.496508 555 +often 1 31 3.496508 3.496508 551 +power 1 30 3.555348 3.555348 573 +consid 1 29 3.583519 3.583519 590 +releas 1 28 3.610918 3.610918 616 +measur 1 28 3.610918 3.610918 609 +actual 1 28 3.610918 3.610918 604 +scale 1 28 3.610918 3.610918 613 +progress 1 28 3.610918 3.610918 598 +american 2 27 3.637586 7.275172 634 +team 1 27 3.637586 3.637586 625 +repres 2 26 3.688879 7.377758 656 +consist 1 26 3.688879 3.688879 651 +compar 1 26 3.688879 3.688879 648 +detect 1 26 3.688879 3.688879 646 +valu 5 25 3.737670 18.688350 665 +accur 1 25 3.737670 3.737670 680 +todai 1 25 3.737670 3.737670 672 +known 3 24 3.761200 11.283600 702 +pattern 2 24 3.761200 7.522400 689 +interpret 2 24 3.761200 7.522400 686 +recognit 2 23 3.806662 7.613324 723 +size 1 23 3.806662 3.806662 713 +togeth 1 23 3.806662 3.806662 714 +william 1 22 3.850148 3.850148 765 +identifi 1 22 3.850148 3.850148 760 +siam 2 21 3.912023 7.824046 800 +theunivers 1 21 3.912023 3.912023 797 +util 1 21 3.912023 3.912023 774 +similar 1 21 3.912023 3.912023 771 +viewer 1 21 3.912023 3.912023 787 +fine 2 20 3.951244 7.902488 822 +minut 1 20 3.951244 3.951244 810 +predict 8 19 4.007333 32.058664 855 +separ 3 19 4.007333 12.021999 844 +five 1 19 4.007333 4.007333 841 +comparison 1 19 4.007333 4.007333 863 +aid 2 18 4.060443 8.120886 904 +behavior 1 18 4.060443 4.060443 881 +statu 1 18 4.060443 4.060443 885 +medic 5 17 4.110874 20.554370 958 +differenti 2 17 4.110874 8.221748 921 +segment 2 17 4.110874 8.221748 931 +analyz 1 17 4.110874 4.110874 925 +seek 1 17 4.110874 4.110874 954 +portion 1 16 4.174387 4.174387 971 +capabl 1 15 4.248495 4.248495 1016 +indic 1 15 4.248495 4.248495 1013 +precis 1 15 4.248495 4.248495 1023 +ascii 1 15 4.248495 4.248495 1032 +train 2 14 4.317488 8.634976 1066 +shown 2 14 4.317488 8.634976 1080 +camera 1 14 4.317488 4.317488 1115 +draw 1 14 4.317488 4.317488 1086 +individu 2 13 4.382027 8.764054 1126 +deriv 2 13 4.382027 8.764054 1145 +nick 1 13 4.382027 4.382027 1180 +incorpor 1 13 4.382027 4.382027 1163 +characterist 2 12 4.465908 8.931816 1257 +scan 1 12 4.465908 4.465908 1243 +shape 1 12 4.465908 4.465908 1245 +remov 1 12 4.465908 4.465908 1225 +philadelphia 1 12 4.465908 4.465908 1244 +extrem 3 11 4.553877 13.661631 1330 +node 2 11 4.553877 9.107754 1326 +eight 1 11 4.553877 4.553877 1331 +distinguish 1 11 4.553877 4.553877 1357 +induct 1 11 4.553877 4.553877 1304 +total 1 10 4.653960 4.653960 1398 +subset 1 10 4.653960 4.653960 1425 +black 1 10 4.653960 4.653960 1418 +equal 1 10 4.653960 4.653960 1424 +tradit 1 10 4.653960 4.653960 1404 +perspect 1 10 4.653960 4.653960 1437 +mangasarian 11 9 4.753590 52.289490 1570 +surfac 2 9 4.753590 9.507180 1574 +factor 2 9 4.753590 9.507180 1544 +desir 1 9 4.753590 4.753590 1542 +exact 1 9 4.753590 4.753590 1509 +classifi 1 9 4.753590 4.753590 1537 +hundr 1 9 4.753590 4.753590 1528 +correctli 1 9 4.753590 4.753590 1478 +russel 1 9 4.753590 4.753590 1507 +morgan 1 9 4.753590 4.753590 1484 +curv 3 8 4.875197 14.625591 1656 +mass 2 8 4.875197 9.750394 1732 +isol 2 8 4.875197 9.750394 1663 +textur 2 8 4.875197 9.750394 1677 +quantit 2 8 4.875197 9.750394 1654 +grew 1 8 4.875197 4.875197 1742 +judg 1 8 4.875197 4.875197 1644 +aaai 1 8 4.875197 4.875197 1750 +replac 1 8 4.875197 4.875197 1668 +angel 1 8 4.875197 4.875197 1779 +boundari 2 7 5.010635 10.021270 1929 +analyt 2 7 5.010635 10.021270 1913 +hunt 2 7 5.010635 10.021270 1798 +converg 1 7 5.010635 5.010635 1844 +smooth 1 7 5.010635 5.010635 1855 +densiti 1 7 5.010635 5.010635 1927 +ruth 1 7 5.010635 5.010635 1870 +chronicl 1 7 5.010635 5.010635 1952 +capit 1 7 5.010635 5.010635 1957 +nine 2 6 5.164786 10.329572 2047 +plane 2 6 5.164786 10.329572 2187 +olvi 1 6 5.164786 5.164786 2109 +onto 1 6 5.164786 5.164786 2089 +versu 1 6 5.164786 5.164786 2052 +averag 1 6 5.164786 5.164786 2098 +nuclear 4 5 5.347108 21.388432 2576 +medicin 2 5 5.347108 10.694216 2448 +began 1 5 5.347108 5.347108 2498 +highlight 1 5 5.347108 5.347108 2340 +cell 1 5 5.347108 5.347108 2274 +snake 1 5 5.347108 5.347108 2281 +accuraci 1 5 5.347108 5.347108 2450 +shift 1 5 5.347108 5.347108 2357 +kaufmann 1 5 5.347108 5.347108 2254 +houston 1 5 5.347108 5.347108 2460 +diagnosi 14 4 5.568345 77.956830 3027 +surviv 5 4 5.568345 27.841725 2734 +aspir 3 4 5.568345 16.705035 3019 +popular 2 4 5.568345 11.136690 2802 +biomed 2 4 5.568345 11.136690 2905 +writer 1 4 5.568345 5.568345 2783 +sole 1 4 5.568345 5.568345 2592 +assess 1 4 5.568345 5.568345 2724 +kristin 1 4 5.568345 5.568345 3089 +prospect 1 4 5.568345 5.568345 3013 +cancer 17 3 5.857933 99.584861 4032 +breast 13 3 5.857933 76.153129 4033 +recurr 5 3 5.857933 29.289665 3740 +diagnos 4 3 5.857933 23.431732 3968 +diseas 4 3 5.857933 23.431732 3635 +surgeri 3 3 5.857933 17.573799 3975 +citat 2 3 5.857933 11.715866 3617 +bennett 1 3 5.857933 5.857933 4024 +microscop 1 3 5.857933 5.857933 4035 +confid 1 3 5.857933 5.857933 3691 +pain 1 3 5.857933 5.857933 3460 +chronolog 1 3 5.857933 5.857933 4034 +man 1 3 5.857933 5.857933 3417 +detroit 1 3 5.857933 5.857933 3565 +paulb 1 3 5.857933 5.857933 4036 +wolberg 11 2 6.263398 68.897378 6218 +prognosi 8 2 6.263398 50.107184 6222 +patient 6 2 6.263398 37.580388 6223 +benign 4 2 6.263398 25.053592 4893 +prognost 4 2 6.263398 25.053592 6221 +plot 2 2 6.263398 12.526796 4236 +milwauke 2 2 6.263398 12.526796 5797 +rudi 1 2 6.263398 6.263398 5487 +multisurfac 1 2 6.263398 6.263398 6224 +ofthi 1 2 6.263398 6.263398 5836 +grabber 1 2 6.263398 6.263398 5521 +nucleu 1 2 6.263398 6.263398 4302 +thenorm 1 2 6.263398 6.263398 4412 +ofvari 1 2 6.263398 6.263398 4582 +twelfth 1 2 6.263398 6.263398 5035 +icml 1 2 6.263398 6.263398 5669 +prime 1 2 6.263398 6.263398 6099 +computer 1 2 6.263398 6.263398 6219 +cope 1 2 6.263398 6.263398 6050 +nevada 1 2 6.263398 6.263398 4875 +malign 7 1 6.957497 48.702479 19350 +nuclei 4 1 6.957497 27.829988 19351 +cytolog 4 1 6.957497 27.829988 19352 +biopsi 4 1 6.957497 27.829988 19353 +oncolog 3 1 6.957497 20.872491 19354 +needl 3 1 6.957497 20.872491 19355 +xcyt 3 1 6.957497 20.872491 19356 +setiono 2 1 6.957497 13.914994 19357 +ofeach 2 1 6.957497 13.914994 19358 +ofdiseas 2 1 6.957497 13.914994 19359 +lymph 2 1 6.957497 13.914994 19360 +histolog 2 1 6.957497 13.914994 19361 +heisei 2 1 6.957497 13.914994 19362 +prognosismachin 1 1 6.957497 6.957497 19363 +prognosisthi 1 1 6.957497 6.957497 19364 +learningapproach 1 1 6.957497 6.957497 19365 +ofbreast 1 1 6.957497 6.957497 19366 +betweenprof 1 1 6.957497 6.957497 19367 +anddr 1 1 6.957497 6.957497 19368 +wolbergof 1 1 6.957497 6.957497 19369 +thepress 1 1 6.957497 6.957497 19370 +inmarch 1 1 6.957497 6.957497 19371 +linksdiagnosisthi 1 1 6.957497 6.957497 19372 +diagnosebreast 1 1 6.957497 6.957497 19373 +heidentifi 1 1 6.957497 6.957497 19374 +consideredrelev 1 1 6.957497 6.957497 19375 +andtwo 1 1 6.957497 6.957497 19376 +aclassifi 1 1 6.957497 6.957497 19377 +thatsuccessfulli 1 1 6.957497 6.957497 19378 +iswel 1 1 6.957497 6.957497 19379 +streetto 1 1 6.957497 6.957497 19380 +adigit 1 1 6.957497 6.957497 19381 +consolid 1 1 6.957497 6.957497 19382 +clinicalpractic 1 1 6.957497 6.957497 19383 +thenmount 1 1 6.957497 6.957497 19384 +stain 1 1 6.957497 6.957497 19385 +cellularnuclei 1 1 6.957497 6.957497 19386 +arewel 1 1 6.957497 6.957497 19387 +afram 1 1 6.957497 6.957497 19388 +mous 1 1 6.957497 6.957497 19389 +showingxcyt 1 1 6.957497 6.957497 19390 +thisfas 1 1 6.957497 6.957497 19391 +standarderror 1 1 6.957497 6.957497 19392 +wasconstruct 1 1 6.957497 6.957497 19393 +thisclassifi 1 1 6.957497 6.957497 19394 +threeof 1 1 6.957497 6.957497 19395 +bayesiancomput 1 1 6.957497 6.957497 19396 +thesedens 1 1 6.957497 6.957497 19397 +consecut 1 1 6.957497 6.957497 19398 +newpati 1 1 6.957497 6.957497 19399 +didxcyt 1 1 6.957497 6.957497 19400 +suspici 1 1 6.957497 6.957497 19401 +estimatedprob 1 1 6.957497 6.957497 19402 +goodtest 1 1 6.957497 6.957497 19403 +petsegment 1 1 6.957497 6.957497 19404 +inthes 1 1 6.957497 6.957497 19405 +prognosisth 1 1 6.957497 6.957497 19406 +haveapproach 1 1 6.957497 6.957497 19407 +inputfeatur 1 1 6.957497 6.957497 19408 +atim 1 1 6.957497 6.957497 19409 +censor 1 1 6.957497 6.957497 19410 +linearprogram 1 1 6.957497 6.957497 19411 +fornew 1 1 6.957497 6.957497 19412 +caseswith 1 1 6.957497 6.957497 19413 +anindividu 1 1 6.957497 6.957497 19414 +intoxcyt 1 1 6.957497 6.957497 19415 +ourorigin 1 1 6.957497 6.957497 19416 +thereforeha 1 1 6.957497 6.957497 19417 +freeafter 1 1 6.957497 6.957497 19418 +xcytgiv 1 1 6.957497 6.957497 19419 +tumors 1 1 6.957497 6.957497 19420 +corrobor 1 1 6.957497 6.957497 19421 +axillari 1 1 6.957497 6.957497 19422 +bibliographylink 1 1 6.957497 6.957497 19423 +notlink 1 1 6.957497 6.957497 19424 +patholog 1 1 6.957497 6.957497 19425 +priediti 1 1 6.957497 6.957497 19426 +teagu 1 1 6.957497 6.957497 19427 +indetermin 1 1 6.957497 6.957497 19428 +imit 1 1 6.957497 6.957497 19429 +sentinel 1 1 6.957497 6.957497 19430 +marilynn 1 1 6.957497 6.957497 19431 +marchion 1 1 6.957497 6.957497 19432 +sorel 1 1 6.957497 6.957497 19433 +surgic 1 1 6.957497 6.957497 19434 +column 1 1 6.957497 6.957497 19435 +schooloth 1 1 6.957497 6.957497 19436 +oncolink 1 1 6.957497 6.957497 19437 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html new file mode 100644 index 00000000..2cabac76 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~olvi^uwmp^pat_sep.html @@ -0,0 +1,161 @@ +term, tf, in documents count, idf, tfidf, wordid +page 6 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 10 374 0.693147 6.931470 7 +work 1 380 0.693147 0.693147 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +inform 1 412 0.693147 0.693147 8 +us 4 329 1.098612 4.394448 16 +last 1 314 1.098612 1.098612 14 +also 4 259 1.386294 5.545176 28 +gener 2 220 1.386294 2.772588 27 +softwar 1 220 1.386294 1.386294 30 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +modifi 1 178 1.609438 1.609438 35 +parallel 5 169 1.791759 8.958795 60 +implement 4 152 1.791759 7.167036 52 +network 3 168 1.791759 5.375277 61 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +assign 3 135 1.945910 5.837730 66 +construct 2 139 1.945910 3.891820 82 +problem 1 147 1.945910 1.945910 75 +process 1 142 1.945910 1.945910 72 +file 1 132 1.945910 1.945910 70 +confer 1 126 2.079442 2.079442 100 +mathemat 4 108 2.197225 8.788900 123 +well 1 109 2.197225 2.197225 121 +theori 1 111 2.197225 2.197225 127 +follow 3 92 2.397895 7.193685 143 +section 1 94 2.397895 2.397895 149 +proceed 1 93 2.397895 2.397895 152 +contain 4 81 2.484907 9.939628 174 +learn 1 86 2.484907 2.484907 170 +requir 1 81 2.484907 2.484907 167 +ieee 1 86 2.484907 2.484907 190 +novemb 1 81 2.484907 2.484907 179 +journal 1 83 2.484907 2.484907 183 +method 8 80 2.564949 20.519592 213 +optim 4 79 2.564949 10.259796 197 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +solv 1 73 2.639057 2.639057 234 +appli 1 71 2.639057 2.639057 226 +intellig 1 72 2.639057 2.639057 225 +view 2 70 2.708050 5.416100 254 +goal 1 66 2.708050 2.708050 250 +artifici 2 63 2.772589 5.545178 280 +street 1 63 2.772589 2.772589 293 +descript 1 64 2.772589 2.772589 271 +best 1 59 2.833213 2.833213 299 +point 16 58 2.890372 46.245952 319 +space 3 57 2.890372 8.671116 310 +found 1 53 2.944439 2.944439 337 +particular 1 51 2.995732 2.995732 352 +much 1 52 2.995732 2.995732 349 +set 7 50 3.044522 21.311654 361 +approach 2 48 3.044522 6.089044 366 +numer 1 49 3.044522 3.044522 369 +could 1 46 3.091042 3.091042 383 +describ 2 45 3.135494 6.270988 400 +anoth 1 45 3.135494 3.135494 408 +better 1 45 3.135494 3.135494 401 +long 1 43 3.178054 3.178054 413 +linear 10 41 3.218876 32.188760 431 +howev 1 41 3.218876 3.218876 422 +error 1 40 3.258097 3.258097 449 +transact 1 39 3.258097 3.258097 438 +societi 1 40 3.258097 3.258097 456 +paul 2 38 3.295837 6.591674 471 +close 1 38 3.295837 3.295837 465 +tree 4 36 3.367296 13.469184 492 +procedur 1 36 3.367296 3.367296 488 +bibliographi 1 34 3.401197 3.401197 518 +concept 1 32 3.465736 3.465736 537 +neural 3 30 3.555348 10.666044 578 +packag 2 28 3.610918 7.221836 614 +determin 1 27 3.637586 3.637586 630 +pattern 5 24 3.761200 18.806000 689 +reach 1 24 3.761200 3.761200 688 +togeth 2 23 3.806662 7.613324 714 +sequenc 2 23 3.806662 7.613324 734 +decis 2 23 3.806662 7.613324 728 +avoid 1 21 3.912023 3.912023 799 +separ 14 19 4.007333 56.102662 844 +region 6 19 4.007333 24.043998 875 +mostli 2 19 4.007333 8.014666 869 +histori 1 19 4.007333 4.007333 853 +dimension 5 18 4.060443 20.302215 909 +minim 2 18 4.060443 8.120886 887 +stop 1 17 4.110874 4.110874 942 +otherwis 1 17 4.110874 4.110874 922 +layer 1 17 4.110874 4.110874 926 +brief 1 16 4.174387 4.174387 1001 +choos 1 16 4.174387 4.174387 964 +advantag 1 16 4.174387 4.174387 987 +cognit 1 16 4.174387 4.174387 986 +side 2 15 4.248495 8.496990 1022 +nonlinear 2 14 4.317488 8.634976 1107 +finit 2 14 4.317488 8.634976 1106 +split 2 14 4.317488 8.634976 1078 +matlab 2 14 4.317488 8.634976 1081 +polynomi 1 14 4.317488 4.317488 1069 +shown 1 14 4.317488 4.317488 1080 +train 1 14 4.317488 4.317488 1066 +difficulti 1 13 4.382027 4.382027 1132 +nick 1 13 4.382027 4.382027 1180 +robust 1 12 4.465908 4.465908 1271 +node 3 11 4.553877 13.661631 1326 +branch 1 11 4.553877 4.553877 1318 +faster 1 11 4.553877 4.553877 1323 +surfac 4 9 4.753590 19.014360 1574 +mangasarian 4 9 4.753590 19.014360 1570 +distanc 1 9 4.753590 4.753590 1500 +formul 1 8 4.875197 4.875197 1733 +paramet 2 7 5.010635 10.021270 1796 +whenev 1 7 5.010635 5.010635 1883 +plane 13 6 5.164786 67.142218 2187 +variant 1 6 5.164786 5.164786 2043 +averag 1 6 5.164786 5.164786 2098 +hidden 1 6 5.164786 5.164786 1987 +proce 1 6 5.164786 5.164786 2114 +li 2 5 5.347108 10.694216 2500 +bradlei 2 5 5.347108 10.694216 2554 +disjoint 2 4 5.568345 11.136690 2709 +repeat 1 4 5.568345 5.568345 2798 +kristin 1 4 5.568345 5.568345 3089 +bennett 3 3 5.857933 17.573799 4024 +todetermin 1 3 5.857933 5.857933 3182 +similarli 1 3 5.857933 5.857933 3241 +backpropag 1 3 5.857933 5.857933 3507 +chronolog 1 3 5.857933 5.857933 4034 +paulb 1 3 5.857933 5.857933 4036 +multisurfac 4 2 6.263398 25.053592 6224 +linearli 3 2 6.263398 18.790194 6220 +euclidean 3 2 6.263398 18.790194 5198 +quadrat 1 2 6.263398 6.263398 4497 +oneset 1 2 6.263398 6.263398 6134 +cart 1 2 6.263398 6.263398 5874 +mino 1 2 6.263398 6.263398 6208 +midwest 1 2 6.263398 6.263398 6225 +discrimin 1 2 6.263398 6.263398 6140 +misclassifi 3 1 6.957497 20.872491 19438 +euclideanspac 2 1 6.957497 13.914994 19439 +programmingpattern 1 1 6.957497 6.957497 19440 +programmingthi 1 1 6.957497 6.957497 19441 +outlinemathemat 1 1 6.957497 6.957497 19442 +failon 1 1 6.957497 6.957497 19443 +discard 1 1 6.957497 6.957497 19444 +eachnod 1 1 6.957497 6.957497 19445 +thesam 1 1 6.957497 6.957497 19446 +astrain 1 1 6.957497 6.957497 19447 +traditionallearn 1 1 6.957497 6.957497 19448 +inthat 1 1 6.957497 6.957497 19449 +insepar 1 1 6.957497 6.957497 19450 +orsa 1 1 6.957497 6.957497 19451 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ new file mode 100644 index 00000000..a504aea9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paradyn^ @@ -0,0 +1,85 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +program 2 374 0.693147 1.386294 7 +inform 1 412 0.693147 0.693147 8 +project 7 340 1.098612 7.690284 18 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +email 1 220 1.386294 1.386294 29 +paper 1 205 1.609438 1.609438 38 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +parallel 4 169 1.791759 7.167036 60 +recent 1 167 1.791759 1.791759 58 +develop 1 174 1.791759 1.791759 53 +distribut 1 162 1.791759 1.791759 51 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +perform 3 143 1.945910 5.837730 74 +relat 1 139 1.945910 1.945910 68 +tool 5 117 2.079442 10.397210 93 +report 1 131 2.079442 2.079442 92 +high 1 130 2.079442 2.079442 101 +dayton 1 119 2.079442 2.079442 104 +version 1 113 2.197225 2.197225 122 +technic 1 100 2.302585 2.302585 140 +access 1 102 2.302585 2.302585 136 +present 2 91 2.397895 4.795790 145 +contain 1 81 2.484907 2.484907 174 +build 1 85 2.484907 2.484907 184 +level 1 87 2.484907 2.484907 180 +west 1 83 2.484907 2.484907 192 +meet 3 72 2.639057 7.917171 229 +symposium 1 72 2.639057 2.639057 238 +copi 1 63 2.772589 2.772589 284 +explor 1 58 2.890372 2.890372 324 +tabl 1 51 2.995732 2.995732 346 +made 2 44 3.135494 6.270988 398 +describ 1 45 3.135494 3.135494 400 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +staff 1 36 3.367296 3.367296 490 +common 1 30 3.555348 3.555348 574 +releas 1 28 3.610918 3.610918 616 +symbol 1 27 3.637586 3.637586 620 +effort 1 26 3.688879 3.688879 652 +scalabl 1 24 3.761200 3.761200 705 +hypertext 1 19 4.007333 4.007333 865 +statu 1 18 4.060443 4.060443 885 +sigmetr 1 13 4.382027 4.382027 1173 +arpa 4 11 4.553877 18.215508 1369 +paradyn 8 9 4.753590 38.028720 1614 +routin 1 9 4.753590 4.753590 1549 +bart 1 9 4.753590 4.753590 1559 +poster 1 7 5.010635 5.010635 1814 +antonio 1 6 5.164786 5.164786 2186 +restaur 1 6 5.164786 5.164786 2230 +temporari 1 6 5.164786 5.164786 2090 +panel 1 5 5.347108 5.347108 2463 +elsewher 1 5 5.347108 5.347108 2444 +super 3 3 5.857933 17.573799 3918 +insan 1 3 5.857933 5.857933 4006 +parallellanguag 1 3 5.857933 5.857933 4026 +informationthi 1 2 6.263398 6.263398 5477 +ofreleas 1 2 6.263398 6.263398 4860 +newapproach 1 2 6.263398 6.263398 6047 +blizzard 1 2 6.263398 6.263398 6226 +projectdepart 1 2 6.263398 6.263398 6125 +edufax 1 2 6.263398 6.263398 5479 +csto 3 1 6.957497 20.872491 19452 +presentationthi 2 1 6.957497 13.914994 19453 +goalsth 1 1 6.957497 6.957497 19454 +manualsstatu 1 1 6.957497 6.957497 19455 +reporta 1 1 6.957497 6.957497 19456 +inflorida 1 1 6.957497 6.957497 19457 +tocompil 1 1 6.957497 6.957497 19458 +postera 1 1 6.957497 6.957497 19459 +spdt 1 1 6.957497 6.957497 19460 +toolsyou 1 1 6.957497 6.957497 19461 +placehold 1 1 6.957497 6.957497 19462 +informationparadyn 1 1 6.957497 6.957497 19463 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html new file mode 100644 index 00000000..fa9f4084 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~parker^parker.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +mail 1 238 1.386294 1.386294 22 +modifi 1 178 1.609438 1.609438 35 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +area 1 144 1.945910 1.945910 80 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +send 1 114 2.197225 2.197225 109 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +numer 1 49 3.044522 3.044522 369 +math 1 44 3.135494 3.135494 402 +steven 2 17 4.110874 8.221748 953 +employ 1 12 4.465908 4.465908 1291 +depth 1 8 4.875197 4.875197 1636 +parker 3 1 6.957497 20.872491 19464 +prism 2 1 6.957497 13.914994 19465 +projectfal 1 1 6.957497 6.957497 19466 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html new file mode 100644 index 00000000..7c510f3f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~paulb^paulb.html @@ -0,0 +1,91 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 4 571 0.000000 0.000000 5 +page 3 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +program 6 374 0.693147 4.158882 7 +depart 3 457 0.693147 2.079441 12 +work 2 380 0.693147 1.386294 9 +inform 2 412 0.693147 1.386294 8 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +paper 1 205 1.609438 1.609438 38 +modifi 1 178 1.609438 1.609438 35 +wisconsin 7 169 1.791759 12.542313 54 +madison 3 165 1.791759 5.375277 55 +area 1 144 1.945910 1.945910 80 +professor 1 137 1.945910 1.945910 76 +file 1 132 1.945910 1.945910 70 +click 1 142 1.945910 1.945910 78 +process 1 142 1.945910 1.945910 72 +postscript 2 131 2.079442 4.158884 90 +report 2 131 2.079442 4.158884 92 +mathemat 4 108 2.197225 8.788900 123 +specif 1 106 2.197225 2.197225 106 +pleas 1 113 2.197225 2.197225 114 +site 1 106 2.197225 2.197225 119 +technic 2 100 2.302585 4.605170 140 +techniqu 1 99 2.302585 2.302585 138 +text 1 98 2.302585 2.302585 133 +select 1 91 2.397895 2.397895 154 +present 1 91 2.397895 2.397895 145 +learn 1 86 2.484907 2.484907 170 +journal 1 83 2.484907 2.484907 183 +decemb 1 80 2.564949 2.564949 215 +summari 1 73 2.639057 2.639057 237 +street 4 63 2.772589 11.090356 293 +abstract 3 62 2.772589 8.317767 276 +guid 1 63 2.772589 2.772589 267 +march 1 61 2.833213 2.833213 295 +advisor 1 51 2.995732 2.995732 355 +format 1 48 3.044522 3.044522 356 +done 1 47 3.091042 3.091042 381 +featur 1 46 3.091042 3.091042 386 +netscap 1 44 3.135494 3.135494 395 +linear 1 41 3.218876 3.218876 431 +submit 1 39 3.258097 3.258097 440 +paul 3 38 3.295837 9.887511 471 +download 1 36 3.367296 3.367296 489 +print 1 34 3.401197 3.401197 503 +eduoffic 1 33 3.433987 3.433987 531 +neural 1 30 3.555348 3.555348 578 +cluster 1 28 3.610918 3.610918 612 +revis 1 26 3.688879 3.688879 640 +store 1 24 3.761200 3.761200 693 +viewer 1 21 3.912023 3.912023 787 +minim 1 18 4.060443 4.060443 887 +accept 1 18 4.060443 4.060443 879 +ascii 1 15 4.248495 4.248495 1032 +nonlinear 1 14 4.317488 4.317488 1107 +nick 2 13 4.382027 8.764054 1180 +nasa 1 13 4.382027 4.382027 1188 +induct 1 11 4.553877 4.553877 1304 +mangasarian 2 9 4.753590 9.507180 1570 +dead 1 7 5.010635 5.010635 1840 +fish 2 6 5.164786 10.329572 2207 +bradlei 3 5 5.347108 16.041324 2554 +shift 1 5 5.347108 5.347108 2357 +frog 1 5 5.347108 5.347108 2479 +concav 1 4 5.568345 5.568345 2808 +paulb 2 3 5.857933 11.715866 4036 +csphone 1 3 5.857933 5.857933 3394 +backcountri 1 3 5.857933 5.857933 3686 +espnet 1 2 6.263398 6.263398 5634 +bradleygradu 1 1 6.957497 6.957497 19467 +mangasarianinterestsmathemat 1 1 6.957497 6.957497 19468 +programmingmachin 1 1 6.957497 6.957497 19469 +learningfli 1 1 6.957497 6.957497 19470 +currentlyb 1 1 6.957497 6.957497 19471 +madisonmathemat 1 1 6.957497 6.957497 19472 +thiswork 1 1 6.957497 6.957497 19473 +olvimangasarian 1 1 6.957497 6.957497 19474 +publicationsal 1 1 6.957497 6.957497 19475 +picksthes 1 1 6.957497 6.957497 19476 +grate 1 1 6.957497 6.957497 19477 +timesfax 1 1 6.957497 6.957497 19478 +uroullett 1 1 6.957497 6.957497 19479 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html new file mode 100644 index 00000000..bded39ed --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pdevries^pdevries.html @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +also 1 259 1.386294 1.386294 28 +group 1 183 1.609438 1.609438 36 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +develop 6 174 1.791759 10.750554 53 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +recent 1 167 1.791759 1.791759 58 +year 1 148 1.945910 1.945910 84 +relat 1 139 1.945910 1.945910 68 +tool 2 117 2.079442 4.158884 93 +technolog 1 131 2.079442 2.079442 102 +studi 1 120 2.079442 2.079442 91 +intern 2 108 2.197225 4.394450 128 +site 2 106 2.197225 4.394450 119 +make 1 111 2.197225 2.197225 120 +look 1 107 2.197225 2.197225 115 +technic 1 100 2.302585 2.302585 140 +techniqu 1 99 2.302585 2.302585 138 +book 1 99 2.302585 2.302585 131 +imag 2 91 2.397895 4.795790 161 +present 1 91 2.397895 2.397895 145 +internet 4 83 2.484907 9.939628 186 +resourc 2 81 2.484907 4.969814 172 +journal 2 83 2.484907 4.969814 183 +thing 1 84 2.484907 2.484907 189 +help 1 83 2.484907 2.484907 175 +info 1 85 2.484907 2.484907 176 +appear 1 78 2.564949 2.564949 210 +issu 1 78 2.564949 2.564949 211 +resum 1 79 2.564949 2.564949 217 +decemb 1 80 2.564949 2.564949 215 +write 1 72 2.639057 2.639057 222 +servic 1 72 2.639057 2.639057 236 +involv 1 71 2.639057 2.639057 227 +tuesdai 1 73 2.639057 2.639057 219 +integr 1 67 2.708050 2.708050 245 +receiv 1 66 2.708050 2.708050 244 +prof 2 64 2.772589 5.545178 273 +laboratori 1 63 2.772589 2.772589 292 +creat 1 63 2.772589 2.772589 277 +guid 1 63 2.772589 2.772589 267 +experi 1 64 2.772589 2.772589 283 +room 1 59 2.833213 2.833213 301 +overview 1 56 2.890372 2.890372 323 +think 1 57 2.890372 2.890372 314 +major 1 56 2.890372 2.890372 315 +cover 1 55 2.944439 2.944439 329 +basic 1 50 3.044522 3.044522 360 +cool 1 49 3.044522 3.044522 374 +standard 1 48 3.044522 3.044522 365 +join 1 39 3.258097 3.258097 457 +multipl 1 39 3.258097 3.258097 453 +mean 1 37 3.332205 3.332205 477 +articl 2 33 3.433987 6.867974 530 +photo 1 31 3.496508 3.496508 561 +great 1 27 3.637586 3.637586 626 +team 1 27 3.637586 3.637586 625 +rather 1 26 3.688879 3.688879 642 +although 1 25 3.737670 3.737670 667 +lab 1 24 3.761200 3.761200 698 +inth 1 22 3.850148 3.850148 741 +tell 1 21 3.912023 3.912023 777 +toolkit 1 20 3.951244 3.951244 835 +eric 1 19 4.007333 4.007333 870 +excel 1 19 4.007333 4.007333 868 +dimension 1 18 4.060443 4.060443 909 +anywai 1 15 4.248495 4.248495 1047 +webmast 1 15 4.248495 4.248495 1045 +biologi 1 15 4.248495 4.248495 1049 +everyth 2 13 4.382027 8.764054 1169 +rest 1 12 4.465908 4.465908 1259 +guest 1 12 4.465908 4.465908 1220 +peter 1 11 4.553877 4.553877 1316 +sens 1 11 4.553877 4.553877 1305 +eight 1 11 4.553877 4.553877 1331 +label 1 10 4.653960 4.653960 1423 +star 1 8 4.875197 4.875197 1717 +scout 5 7 5.010635 25.053175 1903 +fortun 2 7 5.010635 10.021270 1872 +molecular 1 7 5.010635 5.010635 1887 +isthat 1 4 5.568345 5.568345 2723 +biomed 1 4 5.568345 5.568345 2905 +specialist 2 3 5.857933 11.715866 3319 +microscop 2 3 5.857933 11.715866 4035 +pete 1 3 5.857933 5.857933 3865 +devri 2 2 6.263398 12.526796 6145 +foolish 1 2 6.263398 6.263398 6108 +hazen 1 2 6.263398 6.263398 6143 +nearli 1 2 6.263398 6.263398 5608 +magellan 1 2 6.263398 6.263398 5825 +isdescrib 1 2 6.263398 6.263398 5444 +molecularbiolog 2 1 6.957497 13.914994 19480 +embryo 2 1 6.957497 13.914994 19481 +westdayton 1 1 6.957497 6.957497 19482 +pdevri 1 1 6.957497 6.957497 19483 +andthen 1 1 6.957497 6.957497 19484 +iread 1 1 6.957497 6.957497 19485 +topai 1 1 6.957497 6.957497 19486 +alsoprovid 1 1 6.957497 6.957497 19487 +folksat 1 1 6.957497 6.957497 19488 +microscopi 1 1 6.957497 6.957497 19489 +seancarrol 1 1 6.957497 6.957497 19490 +confoc 1 1 6.957497 6.957497 19491 +lotof 1 1 6.957497 6.957497 19492 +johnwhit 1 1 6.957497 6.957497 19493 +imrstaff 1 1 6.957497 6.957497 19494 +augustnd 1 1 6.957497 6.957497 19495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html new file mode 100644 index 00000000..3a8f2ea0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~plakal^plakal.html @@ -0,0 +1,100 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 5 775 0.000000 0.000000 2 +page 4 705 0.000000 0.000000 3 +scienc 4 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +univers 2 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +program 1 374 0.693147 0.693147 7 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +graduat 3 215 1.386294 4.158882 31 +link 2 247 1.386294 2.772588 24 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +class 2 199 1.609438 3.218876 37 +updat 1 191 1.609438 1.609438 41 +madison 5 165 1.791759 8.958795 55 +wisconsin 2 169 1.791759 3.583518 54 +contact 1 153 1.791759 1.791759 59 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +support 1 132 1.945910 1.945910 83 +architectur 1 139 1.945910 1.945910 77 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +check 1 115 2.197225 2.197225 118 +book 1 99 2.302585 2.302585 131 +need 1 98 2.302585 2.302585 135 +access 1 102 2.302585 2.302585 136 +stuff 2 87 2.484907 4.969814 171 +school 1 84 2.484907 2.484907 188 +state 1 76 2.564949 2.564949 207 +dept 1 64 2.772589 2.772589 291 +visit 1 63 2.772589 2.772589 288 +street 1 63 2.772589 2.772589 293 +major 1 56 2.890372 2.890372 315 +variou 1 56 2.890372 2.890372 317 +tabl 1 51 2.995732 2.995732 346 +friend 2 48 3.044522 6.089044 376 +life 1 50 3.044522 3.044522 375 +featur 1 46 3.091042 3.091042 386 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +origin 1 38 3.295837 3.295837 472 +industri 1 38 3.295837 3.295837 464 +either 1 35 3.401197 3.401197 506 +everi 1 34 3.401197 3.401197 519 +india 1 32 3.465736 3.465736 550 +though 1 27 3.637586 3.637586 622 +altern 1 26 3.688879 3.688879 641 +bookmark 1 26 3.688879 3.688879 639 +log 1 19 4.007333 4.007333 857 +north 1 19 4.007333 4.007333 873 +countri 1 15 4.248495 4.248495 1059 +galleri 1 13 4.382027 4.382027 1192 +stai 1 12 4.465908 4.465908 1215 +avenu 1 12 4.465908 4.465908 1277 +undergrad 1 9 4.753590 4.753590 1589 +kanpur 3 8 4.875197 14.625591 1744 +hack 1 7 5.010635 5.010635 1950 +gatewai 1 7 5.010635 5.010635 1942 +corner 1 7 5.010635 5.010635 1909 +rock 1 6 5.164786 5.164786 2164 +blue 1 6 5.164786 5.164786 2227 +chat 1 6 5.164786 5.164786 2128 +metal 1 4 5.568345 5.568345 3079 +randal 1 4 5.568345 5.568345 2776 +venkat 1 4 5.568345 5.568345 2702 +slave 2 3 5.857933 11.715866 3959 +kerala 1 3 5.857933 5.857933 3749 +assistantship 1 3 5.857933 5.857933 3660 +acad 1 3 5.857933 5.857933 3847 +icon 1 3 5.857933 5.857933 3362 +plakal 3 2 6.263398 18.790194 5568 +blah 1 2 6.263398 6.263398 5695 +geeki 1 2 6.263398 6.263398 5823 +iitk 1 2 6.263398 6.263398 6227 +snap 1 2 6.263398 6.263398 4962 +nerd 1 2 6.263398 6.263398 5231 +acknowledg 1 2 6.263398 6.263398 6062 +nifti 1 2 6.263398 6.263398 5504 +igor 1 2 6.263398 6.263398 6183 +ivanisev 1 2 6.263398 6.263398 6184 +calcutta 2 1 6.957497 13.914994 19496 +bosco 2 1 6.957497 13.914994 19497 +yumpe 1 1 6.957497 6.957497 19498 +manoj 1 1 6.957497 6.957497 19499 +universityofwisconsin 1 1 6.957497 6.957497 19500 +salesian 1 1 6.957497 6.957497 19501 +stare 1 1 6.957497 6.957497 19502 +barrel 1 1 6.957497 6.957497 19503 +nerdi 1 1 6.957497 6.957497 19504 +seealso 1 1 6.957497 6.957497 19505 +pinup 1 1 6.957497 6.957497 19506 +suresh 1 1 6.957497 6.957497 19507 +wisecrack 1 1 6.957497 6.957497 19508 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html new file mode 100644 index 00000000..db879482 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pmd^pmd.html @@ -0,0 +1,71 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +system 1 443 0.693147 0.693147 6 +research 1 431 0.693147 0.693147 10 +time 2 293 1.098612 2.197224 17 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +madison 4 165 1.791759 7.167036 55 +wisconsin 2 169 1.791759 3.583518 54 +address 2 170 1.791759 3.583518 62 +area 1 144 1.945910 1.945910 80 +databas 2 122 2.079442 4.158884 86 +dayton 1 119 2.079442 2.079442 104 +schedul 1 119 2.079442 2.079442 85 +introduct 1 126 2.079442 2.079442 87 +theori 3 111 2.197225 6.591675 127 +world 1 115 2.197225 2.197225 126 +find 1 111 2.197225 2.197225 111 +sinc 1 90 2.397895 2.397895 159 +build 1 85 2.484907 2.484907 184 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +want 1 79 2.564949 2.564949 199 +meet 1 72 2.639057 2.639057 229 +java 1 70 2.708050 2.708050 248 +prof 1 64 2.772589 2.772589 273 +creat 1 63 2.772589 2.772589 277 +explor 1 58 2.890372 2.890372 324 +finger 1 52 2.995732 2.995732 354 +music 2 42 3.218876 6.437752 436 +download 1 36 3.367296 3.367296 489 +random 1 34 3.401197 3.401197 511 +india 1 32 3.465736 3.465736 550 +storag 1 31 3.496508 3.496508 553 +packag 1 28 3.610918 3.610918 614 +bookmark 1 26 3.688879 3.688879 639 +jeff 1 25 3.737670 3.737670 673 +hierarchi 1 22 3.850148 3.850148 744 +spend 1 19 4.007333 4.007333 850 +estim 1 17 4.110874 4.110874 930 +dilbert 1 16 4.174387 4.174387 996 +princeton 1 15 4.248495 4.248495 1042 +econom 1 13 4.382027 4.382027 1184 +vldb 2 10 4.653960 9.307920 1470 +naughton 1 10 4.653960 4.653960 1450 +presenc 1 8 4.875197 4.875197 1671 +prasad 2 6 5.164786 10.329572 2126 +invest 1 6 5.164786 5.164786 2153 +aggreg 1 6 5.164786 5.164786 2219 +deshpand 1 5 5.347108 5.347108 2431 +multidimension 1 4 5.568345 5.568345 3091 +meanwhil 1 3 5.857933 5.857933 3129 +manageri 1 2 6.263398 6.263398 5135 +constuct 1 1 6.957497 6.957497 19509 +depar 1 1 6.957497 6.957497 19510 +multidimensionalaggreg 1 1 6.957497 6.957497 19511 +timex 1 1 6.957497 6.957497 19512 +comix 1 1 6.957497 6.957497 19513 +hakuna 1 1 6.957497 6.957497 19514 +matata 1 1 6.957497 6.957497 19515 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html new file mode 100644 index 00000000..1a7fcca8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~poosala^poosala.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 3 412 0.693147 2.079441 8 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +wisc 1 242 1.386294 1.386294 33 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +databas 1 122 2.079442 2.079442 86 +assist 1 112 2.197225 2.197225 113 +west 1 83 2.484907 2.484907 192 +help 1 83 2.484907 2.484907 175 +educ 1 86 2.484907 2.484907 191 +resum 1 79 2.564949 2.564949 217 +summari 1 73 2.639057 2.639057 237 +html 1 75 2.639057 2.639057 235 +street 1 63 2.772589 2.772589 293 +prof 1 64 2.772589 2.772589 273 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +advisor 1 51 2.995732 2.995732 355 +basic 1 50 3.044522 3.044522 360 +india 1 32 3.465736 3.465736 550 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +asha 1 3 5.857933 5.857933 4037 +poosala 2 2 6.263398 12.526796 6228 +vishi 1 1 6.957497 6.957497 19516 +viswanath 1 1 6.957497 6.957497 19517 +reseach 1 1 6.957497 6.957497 19518 +voluntari 1 1 6.957497 6.957497 19519 +interestsuw 1 1 6.957497 6.957497 19520 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html new file mode 100644 index 00000000..73960e17 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~praveen^projects^seq.html @@ -0,0 +1,354 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +system 10 443 0.693147 6.931470 6 +inform 2 412 0.693147 1.386294 8 +work 2 380 0.693147 1.386294 9 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +project 8 340 1.098612 8.788896 18 +us 7 329 1.098612 7.690284 16 +time 6 293 1.098612 6.591672 17 +last 2 314 1.098612 2.197224 14 +current 1 284 1.098612 1.098612 21 +engin 1 297 1.098612 1.098612 20 +languag 6 227 1.386294 8.317764 26 +wisc 4 242 1.386294 5.545176 33 +also 3 259 1.386294 4.158882 28 +gener 2 220 1.386294 2.772588 27 +design 1 213 1.386294 1.386294 25 +oper 13 180 1.609438 20.922694 34 +includ 4 208 1.609438 6.437752 42 +group 3 183 1.609438 4.828314 36 +paper 2 205 1.609438 3.218876 38 +modifi 1 178 1.609438 1.609438 35 +data 23 170 1.791759 41.210457 49 +implement 4 152 1.791759 7.167036 52 +recent 3 167 1.791759 5.375277 58 +madison 3 165 1.791759 5.375277 55 +base 2 165 1.791759 3.583518 50 +applic 1 170 1.791759 1.791759 56 +avail 1 169 1.791759 1.791759 48 +algorithm 1 162 1.791759 1.791759 57 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +relat 13 139 1.945910 25.296830 68 +model 11 145 1.945910 21.405010 69 +support 5 132 1.945910 9.729550 83 +process 5 142 1.945910 9.729550 72 +object 4 138 1.945910 7.783640 79 +click 4 142 1.945910 7.783640 78 +like 3 132 1.945910 5.837730 81 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +databas 13 122 2.079442 27.032746 86 +confer 3 126 2.079442 6.238326 100 +document 2 121 2.079442 4.158884 89 +postscript 2 131 2.079442 4.158884 90 +provid 2 121 2.079442 4.158884 94 +number 1 130 2.079442 2.079442 97 +analysi 1 124 2.079442 2.079442 98 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +manag 6 114 2.197225 13.183350 125 +version 2 113 2.197225 4.394450 122 +theori 1 111 2.197225 2.197225 127 +find 1 111 2.197225 2.197225 111 +check 1 115 2.197225 2.197225 118 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +intern 1 108 2.197225 2.197225 128 +techniqu 4 99 2.302585 9.210340 138 +need 3 98 2.302585 6.907755 135 +user 2 104 2.302585 4.605170 137 +memori 1 101 2.302585 2.302585 139 +part 1 98 2.302585 2.302585 129 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +peopl 1 96 2.302585 2.302585 132 +mani 3 92 2.397895 7.193685 150 +proceed 3 93 2.397895 7.193685 152 +call 2 91 2.397895 4.795790 153 +present 1 91 2.397895 2.397895 145 +commun 1 95 2.397895 2.397895 157 +associ 1 93 2.397895 2.397895 151 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +requir 4 81 2.484907 9.939628 167 +larg 1 82 2.484907 2.484907 168 +build 1 85 2.484907 2.484907 184 +contain 1 81 2.484907 2.484907 174 +ieee 1 86 2.484907 2.484907 190 +optim 10 79 2.564949 25.649490 197 +orient 3 80 2.564949 7.694847 205 +server 3 76 2.564949 7.694847 204 +exampl 2 77 2.564949 5.129898 195 +issu 1 78 2.564949 2.564949 211 +decemb 1 80 2.564949 2.564949 215 +effici 6 73 2.639057 15.834342 233 +involv 3 71 2.639057 7.917171 227 +order 7 69 2.708050 18.956350 249 +view 3 70 2.708050 8.124150 254 +would 2 67 2.708050 5.416100 251 +goal 1 66 2.708050 2.708050 250 +integr 1 67 2.708050 2.708050 245 +knowledg 1 67 2.708050 2.708050 243 +practic 1 70 2.708050 2.708050 246 +evalu 5 64 2.772589 13.862945 266 +import 4 65 2.772589 11.090356 282 +complex 4 64 2.772589 11.090356 269 +plan 3 65 2.772589 8.317767 272 +collect 2 65 2.772589 5.545178 268 +previou 2 62 2.772589 5.545178 290 +result 2 65 2.772589 5.545178 281 +abstract 1 62 2.772589 2.772589 276 +descript 1 64 2.772589 2.772589 271 +function 1 62 2.772589 2.772589 275 +street 1 63 2.772589 2.772589 293 +content 1 59 2.833213 2.833213 302 +type 1 61 2.833213 2.833213 296 +march 1 61 2.833213 2.833213 295 +variou 5 56 2.890372 14.451860 317 +detail 2 57 2.890372 5.780744 321 +publish 2 57 2.890372 5.780744 326 +explor 2 58 2.890372 5.780744 324 +sever 1 56 2.890372 2.890372 322 +allow 3 53 2.944439 8.833317 333 +extens 3 53 2.944439 8.833317 340 +case 1 51 2.995732 2.995732 351 +set 2 50 3.044522 6.089044 361 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +could 2 46 3.091042 6.182084 383 +featur 1 46 3.091042 3.091042 386 +possibl 1 47 3.091042 3.091042 378 +answer 2 45 3.135494 6.270988 391 +natur 1 44 3.135494 3.135494 406 +algebra 1 45 3.135494 3.135494 394 +describ 1 45 3.135494 3.135494 400 +execut 1 45 3.135494 3.135494 404 +even 1 45 3.135494 3.135494 393 +combin 1 42 3.218876 3.218876 421 +howev 1 41 3.218876 3.218876 422 +cach 1 41 3.218876 3.218876 432 +examin 1 42 3.218876 3.218876 424 +form 2 39 3.258097 6.516194 443 +join 2 39 3.258097 6.516194 457 +map 2 39 3.258097 6.516194 452 +probabl 2 40 3.258097 6.516194 455 +theoret 1 39 3.258097 3.258097 446 +littl 1 39 3.258097 3.258097 454 +multipl 1 39 3.258097 3.258097 453 +submit 1 39 3.258097 3.258097 440 +mean 1 37 3.332205 3.332205 477 +cost 1 37 3.332205 3.332205 480 +multi 1 36 3.367296 3.367296 493 +next 3 34 3.401197 10.203591 517 +singl 3 34 3.401197 10.203591 510 +either 1 35 3.401197 3.401197 506 +michael 1 35 3.401197 3.401197 514 +queri 38 33 3.433987 130.491506 524 +express 5 32 3.465736 17.328680 540 +kind 4 32 3.465736 13.862944 541 +idea 1 32 3.465736 3.465736 545 +given 1 32 3.465736 3.465736 538 +extend 1 32 3.465736 3.465736 539 +transform 1 32 3.465736 3.465736 542 +posit 6 31 3.496508 20.979048 552 +storag 4 31 3.496508 13.986032 553 +scientist 1 31 3.496508 3.496508 560 +domain 3 30 3.555348 10.666044 564 +exist 3 30 3.555348 10.666044 569 +specifi 1 30 3.555348 3.555348 568 +built 3 29 3.583519 10.750557 592 +consid 1 29 3.583519 3.583519 590 +propos 3 28 3.610918 10.832754 602 +weather 1 28 3.610918 3.610918 618 +ask 1 28 3.610918 3.610918 597 +scale 1 28 3.610918 3.610918 613 +except 1 28 3.610918 3.610918 607 +framework 1 28 3.610918 3.610918 606 +client 2 25 3.737670 7.475340 679 +valu 1 25 3.737670 3.737670 665 +wai 1 25 3.737670 3.737670 662 +strategi 1 25 3.737670 3.737670 682 +demonstr 2 24 3.761200 7.522400 694 +store 2 24 3.761200 7.522400 693 +daili 1 24 3.761200 3.761200 706 +sequenc 41 23 3.806662 156.073142 734 +input 1 23 3.806662 3.806662 727 +thread 1 23 3.806662 3.806662 722 +defin 3 22 3.850148 11.550444 746 +sequenti 2 22 3.850148 7.700296 745 +sort 2 22 3.850148 7.700296 738 +serv 1 22 3.850148 3.850148 758 +identifi 1 22 3.850148 3.850148 760 +disk 1 22 3.850148 3.850148 747 +deal 1 22 3.850148 3.850148 736 +instead 1 22 3.850148 3.850148 756 +similar 4 21 3.912023 15.648092 771 +util 1 21 3.912023 3.912023 774 +sigmod 1 19 4.007333 4.007333 877 +record 11 18 4.060443 44.664873 890 +statu 1 18 4.060443 4.060443 885 +event 1 18 4.060443 4.060443 896 +account 1 18 4.060443 4.060443 882 +expand 2 17 4.110874 8.221748 928 +medic 1 17 4.110874 4.110874 958 +monitor 1 17 4.110874 4.110874 941 +weekli 1 17 4.110874 4.110874 919 +estim 1 17 4.110874 4.110874 930 +ramakrishnan 5 16 4.174387 20.871935 972 +advantag 2 16 4.174387 8.348774 987 +easi 1 16 4.174387 4.174387 969 +livni 5 15 4.248495 21.242475 1053 +indic 1 15 4.248495 4.248495 1013 +stream 1 15 4.248495 4.248495 1015 +miron 5 14 4.317488 21.587440 1110 +manner 2 14 4.317488 8.634976 1074 +embed 2 14 4.317488 8.634976 1102 +convent 1 14 4.317488 4.317488 1072 +econom 1 13 4.382027 4.382027 1184 +social 1 13 4.382027 4.382027 1123 +opportun 1 13 4.382027 4.382027 1161 +composit 1 13 4.382027 4.382027 1150 +step 1 13 4.382027 4.382027 1138 +front 1 13 4.382027 4.382027 1154 +raghu 4 12 4.465908 17.863632 1212 +scan 3 12 4.465908 13.397724 1243 +buffer 2 12 4.465908 8.931816 1211 +insid 2 12 4.465908 8.931816 1262 +amount 1 12 4.465908 4.465908 1208 +uniqu 1 12 4.465908 4.465908 1228 +shore 3 11 4.553877 13.661631 1377 +regard 1 11 4.553877 4.553877 1309 +motiv 1 11 4.553877 4.553877 1346 +instanc 1 11 4.553877 4.553877 1322 +devis 2 10 4.653960 9.307920 1451 +relationship 1 10 4.653960 4.653960 1383 +reli 1 10 4.653960 4.653960 1411 +subset 1 10 4.653960 4.653960 1425 +vldb 1 10 4.653960 4.653960 1470 +cheng 1 10 4.653960 4.653960 1381 +declar 2 9 4.753590 9.507180 1526 +tempor 2 9 4.753590 9.507180 1584 +strength 2 9 4.753590 9.507180 1494 +compos 1 9 4.753590 4.753590 1527 +vice 1 9 4.753590 4.753590 1604 +lock 1 9 4.753590 4.753590 1551 +respect 1 9 4.753590 4.753590 1545 +meta 1 9 4.753590 4.753590 1505 +intermedi 1 9 4.753590 4.753590 1497 +observ 1 9 4.753590 4.753590 1578 +mode 1 9 4.753590 4.753590 1492 +pose 1 9 4.753590 4.753590 1535 +seshadri 6 7 5.010635 30.063810 1803 +pageth 1 7 5.010635 5.010635 1939 +notion 1 7 5.010635 5.010635 1947 +merg 1 7 5.010635 5.010635 1862 +whenev 1 7 5.010635 5.010635 1883 +therefor 1 7 5.010635 5.010635 1822 +supportfor 1 7 5.010635 5.010635 1854 +praveen 6 6 5.164786 30.988716 1996 +nest 4 6 5.164786 20.659144 2151 +feasibl 2 6 5.164786 10.329572 2157 +financi 1 6 5.164786 5.164786 2197 +histor 1 6 5.164786 5.164786 2085 +consequ 1 6 5.164786 5.164786 1989 +temporari 1 6 5.164786 5.164786 2090 +greater 2 5 5.347108 10.694216 2258 +treat 1 5 5.347108 5.347108 2521 +correl 1 5 5.347108 5.347108 2279 +dual 1 5 5.347108 5.347108 2522 +distinct 1 5 5.347108 5.347108 2319 +overlap 1 5 5.347108 5.347108 2368 +complementari 1 5 5.347108 5.347108 2523 +educomput 1 5 5.347108 5.347108 2524 +zoom 3 4 5.568345 16.705035 2961 +phenomena 2 4 5.568345 11.136690 2962 +flavor 2 4 5.568345 11.136690 2625 +richter 1 4 5.568345 5.568345 2957 +collaps 2 3 5.857933 11.715866 3729 +inadequ 1 3 5.857933 5.857933 3730 +tediou 1 3 5.857933 5.857933 3731 +ineffici 1 3 5.857933 5.857933 3457 +megabyt 1 3 5.857933 5.857933 3732 +claus 1 3 5.857933 5.857933 3733 +offset 1 3 5.857933 5.857933 3467 +hourli 1 3 5.857933 5.857933 3734 +thathav 1 3 5.857933 5.857933 3735 +serverarchitectur 1 3 5.857933 5.857933 3736 +comad 1 3 5.857933 5.857933 3737 +informationfor 1 3 5.857933 5.857933 3738 +sequin 7 2 6.263398 43.843786 5250 +earthquak 4 2 6.263398 25.053592 5251 +volcano 3 2 6.263398 18.790194 5252 +meteorolog 2 2 6.263398 12.526796 5253 +aredescrib 2 2 6.263398 12.526796 5254 +objectivescurr 1 2 6.263398 6.263398 5255 +statusmotiv 1 2 6.263398 6.263398 5256 +exampleseq 1 2 6.263398 6.263398 5257 +languageoptim 1 2 6.263398 6.263398 5258 +techniquesseq 1 2 6.263398 6.263398 5259 +developmentpublicationsrel 1 2 6.263398 6.263398 5260 +workcontact 1 2 6.263398 6.263398 5261 +informationproject 1 2 6.263398 6.263398 5262 +processingof 1 2 6.263398 6.263398 5263 +theseappl 1 2 6.263398 6.263398 5264 +metereolog 1 2 6.263398 6.263398 5265 +andbiolog 1 2 6.263398 6.263398 5266 +semanticstak 1 2 6.263398 6.263398 5267 +evaluationintegr 1 2 6.263398 6.263398 5268 +canstor 1 2 6.263398 6.263398 5269 +sequencesthes 1 2 6.263398 6.263398 5270 +themost 1 2 6.263398 6.263398 5271 +statusth 1 2 6.263398 6.263398 5272 +algebraicqueri 1 2 6.263398 6.263398 5273 +analogousto 1 2 6.263398 6.263398 5274 +candeclar 1 2 6.263398 6.263398 5275 +likesql 1 2 6.263398 6.263398 5276 +versa 1 2 6.263398 6.263398 5277 +querya 1 2 6.263398 6.263398 5278 +occurr 1 2 6.263398 6.263398 5279 +erupt 1 2 6.263398 6.263398 5280 +didth 1 2 6.263398 6.263398 5281 +groupbi 1 2 6.263398 6.263398 5282 +subqueri 1 2 6.263398 6.263398 5283 +aggregatefunct 1 2 6.263398 6.263398 5284 +sequencesord 1 2 6.263398 6.263398 5285 +modelth 1 2 6.263398 6.263398 5286 +gist 1 2 6.263398 6.263398 5287 +ordereddomain 1 2 6.263398 6.263398 5288 +andposit 1 2 6.263398 6.263398 5289 +recordsmap 1 2 6.263398 6.263398 5290 +rise 1 2 6.263398 6.263398 5291 +relationaloper 1 2 6.263398 6.263398 5292 +andaggreg 1 2 6.263398 6.263398 5293 +researchersin 1 2 6.263398 6.263398 5294 +movingaggreg 1 2 6.263398 6.263398 5295 +worldsitu 1 2 6.263398 6.263398 5296 +extensionof 1 2 6.263398 6.263398 5297 +ofseq 1 2 6.263398 6.263398 5298 +languagew 1 2 6.263398 6.263398 5299 +usingwhich 1 2 6.263398 6.263398 5300 +languagei 1 2 6.263398 6.263398 5301 +queriesa 1 2 6.263398 6.263398 5302 +techniquesw 1 2 6.263398 6.263398 5303 +developmentth 1 2 6.263398 6.263398 5304 +viaa 1 2 6.263398 6.263398 5305 +ontop 1 2 6.263398 6.263398 5306 +languageswhich 1 2 6.263398 6.263398 5307 +arbitrarylevel 1 2 6.263398 6.263398 5308 +viceversa 1 2 6.263398 6.263398 5309 +detailson 1 2 6.263398 6.263398 5310 +publicationssequ 1 2 6.263398 6.263398 5311 +datapraveen 1 2 6.263398 6.263398 5312 +systempraveen 1 2 6.263398 6.263398 5313 +queriesraghu 1 2 6.263398 6.263398 5314 +workthedevis 1 2 6.263398 6.263398 5315 +visualizationenviron 1 2 6.263398 6.263398 5316 +servercontact 1 2 6.263398 6.263398 5317 +eduraghu 1 2 6.263398 6.263398 5318 +edumiron 1 2 6.263398 6.263398 5319 +seshadripraveen 1 2 6.263398 6.263398 5320 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html new file mode 100644 index 00000000..4fdcb8ee --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~prock^prock.html @@ -0,0 +1,21 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +class 2 199 1.609438 3.218876 37 +hour 1 165 1.791759 1.791759 46 +madison 1 165 1.791759 1.791759 55 +person 1 111 2.197225 2.197225 117 +school 2 84 2.484907 4.969814 188 +resum 1 79 2.564949 2.564949 217 +bookmark 1 26 3.688879 3.688879 639 +andrew 2 19 4.007333 8.014666 849 +histori 1 19 4.007333 4.007333 853 +vista 2 10 4.653960 9.307920 1452 +alta 2 4 5.568345 11.136690 3039 +prock 2 2 6.263398 12.526796 4786 +clemen 1 1 6.957497 6.957497 19521 +hockert 1 1 6.957497 6.957497 19522 +prockoffic 1 1 6.957497 6.957497 19523 +doonesburi 1 1 6.957497 6.957497 19524 +trot 1 1 6.957497 6.957497 19525 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html new file mode 100644 index 00000000..65546b39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^condon.html @@ -0,0 +1,122 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 6 443 0.693147 4.158882 6 +interest 3 384 0.693147 2.079441 11 +work 3 380 0.693147 2.079441 9 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +us 3 329 1.098612 3.295836 16 +current 1 284 1.098612 1.098612 21 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +washington 1 236 1.386294 1.386294 32 +also 1 259 1.386294 1.386294 28 +design 1 213 1.386294 1.386294 25 +public 1 202 1.609438 1.609438 43 +parallel 4 169 1.791759 7.167036 60 +algorithm 3 162 1.791759 5.375277 57 +recent 2 167 1.791759 3.583518 58 +develop 2 174 1.791759 3.583518 53 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +problem 4 147 1.945910 7.783640 75 +model 2 145 1.945910 3.891820 69 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +theori 6 111 2.197225 13.183350 127 +well 1 109 2.197225 2.197225 121 +associ 1 93 2.397895 2.397895 151 +commun 1 95 2.397895 2.397895 157 +octob 1 89 2.397895 2.397895 156 +journal 2 83 2.484907 4.969814 183 +solut 1 82 2.484907 2.484907 162 +exampl 1 77 2.564949 2.564949 195 +complet 1 77 2.564949 2.564949 208 +state 1 76 2.564949 2.564949 207 +summari 1 73 2.639057 2.639057 237 +solv 1 73 2.639057 2.639057 234 +symposium 1 72 2.639057 2.639057 238 +goal 1 66 2.708050 2.708050 250 +practic 1 70 2.708050 2.708050 246 +interact 3 62 2.772589 8.317767 270 +complex 2 64 2.772589 5.545178 269 +result 2 65 2.772589 5.545178 281 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +sever 1 56 2.890372 2.890372 322 +sampl 1 53 2.944439 2.944439 339 +februari 1 54 2.944439 2.944439 328 +much 1 52 2.995732 2.995732 349 +telephon 1 50 3.044522 3.044522 373 +understand 1 47 3.091042 3.091042 384 +long 1 43 3.178054 3.178054 413 +combin 1 42 3.218876 3.218876 421 +annual 1 40 3.258097 3.258097 458 +error 1 40 3.258097 3.258097 449 +game 1 36 3.367296 3.367296 498 +tree 1 36 3.367296 3.367296 492 +random 2 34 3.401197 6.802394 511 +approxim 2 35 3.401197 6.802394 509 +posit 1 31 3.496508 3.496508 552 +hard 2 30 3.555348 7.110696 563 +graph 2 30 3.555348 7.110696 576 +turn 1 29 3.583519 3.583519 586 +synchron 1 29 3.583519 3.583519 588 +progress 1 28 3.610918 3.610918 598 +bound 1 26 3.688879 3.688879 659 +although 1 25 3.737670 3.737670 667 +strategi 1 25 3.737670 3.737670 682 +proof 3 23 3.806662 11.419986 720 +identifi 1 22 3.850148 3.850148 760 +prove 1 19 4.007333 4.007333 848 +easi 1 16 4.174387 4.174387 969 +novel 1 15 4.248495 4.248495 1039 +polynomi 1 14 4.317488 4.317488 1069 +finit 1 14 4.317488 4.317488 1106 +automata 2 13 4.382027 8.764054 1135 +minimum 1 9 4.753590 4.753590 1555 +span 1 8 4.875197 4.875197 1751 +prover 1 8 4.875197 4.875197 1653 +round 1 8 4.875197 4.875197 1769 +aris 1 7 5.010635 5.010635 1924 +eduto 1 7 5.010635 5.010635 1956 +ann 2 6 5.164786 10.329572 2065 +ladner 1 6 5.164786 5.164786 2062 +pub 1 6 5.164786 5.164786 2239 +proceedingsof 1 5 5.347108 5.347108 2331 +provabl 1 5 5.347108 5.347108 2558 +surprisingli 1 4 5.568345 5.568345 2609 +expens 1 4 5.568345 5.568345 2678 +condon 3 3 5.857933 17.573799 3309 +neg 1 3 5.857933 5.857933 3451 +theoryand 1 3 5.857933 5.857933 3350 +nondeterminist 1 3 5.857933 5.857933 3560 +wigderson 1 2 6.263398 6.263398 6035 +complexityclass 1 1 6.957497 6.957497 19526 +interactiveproof 1 1 6.957497 6.957497 19527 +nondetermin 1 1 6.957497 6.957497 19528 +suchmodel 1 1 6.957497 6.957497 19529 +proven 1 1 6.957497 6.957497 19530 +classicproblem 1 1 6.957497 6.957497 19531 +theoryof 1 1 6.957497 6.957497 19532 +computationalproblem 1 1 6.957497 6.957497 19533 +whichhard 1 1 6.957497 6.957497 19534 +recentresult 1 1 6.957497 6.957497 19535 +modelsof 1 1 6.957497 6.957497 19536 +approximabilityresult 1 1 6.957497 6.957497 19537 +developingboth 1 1 6.957497 6.957497 19538 +hardcombinatori 1 1 6.957497 6.957497 19539 +forsort 1 1 6.957497 6.957497 19540 +costscan 1 1 6.957497 6.957497 19541 +probabilisticst 1 1 6.957497 6.957497 19542 +hellerstein 1 1 6.957497 6.957497 19543 +pottl 1 1 6.957497 6.957497 19544 +pspace 1 1 6.957497 6.957497 19545 +caiand 1 1 6.957497 6.957497 19546 +lipton 1 1 6.957497 6.957497 19547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html new file mode 100644 index 00000000..1f5d1e09 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^joseph.html @@ -0,0 +1,138 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 8 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 5 431 0.693147 3.465735 10 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +time 2 293 1.098612 2.197224 17 +last 1 314 1.098612 1.098612 14 +project 1 340 1.098612 1.098612 18 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +design 1 213 1.386294 1.386294 25 +class 4 199 1.609438 6.437752 37 +public 1 202 1.609438 1.609438 43 +algorithm 3 162 1.791759 5.375277 57 +recent 2 167 1.791759 3.583518 58 +develop 2 174 1.791759 3.583518 53 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +area 2 144 1.945910 3.891820 80 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +year 1 148 1.945910 1.945910 84 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +analysi 1 124 2.079442 2.079442 98 +report 1 131 2.079442 2.079442 92 +structur 5 106 2.197225 10.986125 105 +mathemat 2 108 2.197225 4.394450 123 +theori 1 111 2.197225 2.197225 127 +specif 1 106 2.197225 2.197225 106 +techniqu 2 99 2.302585 4.605170 138 +question 2 91 2.397895 4.795790 141 +proceed 2 93 2.397895 4.795790 152 +associ 1 93 2.397895 2.397895 151 +octob 1 89 2.397895 2.397895 156 +larg 1 82 2.484907 2.484907 168 +method 2 80 2.564949 5.129898 213 +dynam 1 76 2.564949 2.564949 194 +appli 1 71 2.639057 2.639057 226 +logic 1 71 2.639057 2.639057 230 +summari 1 73 2.639057 2.639057 237 +addit 1 74 2.639057 2.639057 228 +degre 1 69 2.708050 2.708050 259 +complex 4 64 2.772589 11.090356 269 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +automat 1 61 2.833213 2.833213 306 +sampl 1 53 2.944439 2.944439 339 +local 1 55 2.944439 2.944439 334 +set 2 50 3.044522 6.089044 361 +telephon 1 50 3.044522 3.044522 373 +theoret 1 39 3.258097 3.258097 446 +littl 1 39 3.258097 3.258097 454 +error 1 40 3.258097 3.258097 449 +formal 1 37 3.332205 3.332205 478 +global 1 34 3.401197 3.401197 520 +obtain 1 33 3.433987 3.433987 534 +graph 1 30 3.555348 3.555348 576 +scale 1 28 3.610918 3.610918 613 +great 1 27 3.637586 3.637586 626 +effort 1 26 3.688879 3.688879 652 +detect 1 26 3.688879 3.688879 646 +concern 2 25 3.737670 7.475340 666 +known 1 24 3.761200 3.761200 702 +sequenc 4 23 3.806662 15.226648 734 +proof 2 23 3.806662 7.613324 720 +properti 3 22 3.850148 11.550444 749 +geometri 2 22 3.850148 7.700296 752 +deal 1 22 3.850148 3.850148 736 +inth 1 22 3.850148 3.850148 741 +springer 1 22 3.850148 3.850148 750 +verlag 1 22 3.850148 3.850148 751 +similar 2 21 3.912023 7.824046 771 +fact 1 21 3.912023 3.912023 780 +util 1 21 3.912023 3.912023 774 +whether 1 17 4.110874 4.110874 918 +young 1 16 4.174387 4.174387 991 +spars 1 16 4.174387 4.174387 989 +fourth 1 16 4.174387 4.174387 999 +biologi 2 15 4.248495 8.496990 1049 +decid 1 14 4.317488 4.317488 1075 +polynomi 1 14 4.317488 4.317488 1069 +anonym 1 14 4.317488 4.317488 1100 +incomput 1 14 4.317488 4.317488 1096 +necessari 1 13 4.382027 4.382027 1147 +primarili 1 13 4.382027 4.382027 1185 +discret 1 13 4.382027 4.382027 1165 +assembl 1 12 4.465908 4.465908 1207 +weight 1 12 4.465908 4.465908 1204 +purdu 1 10 4.653960 4.653960 1466 +rapid 1 10 4.653960 4.653960 1453 +eduto 1 7 5.010635 5.010635 1956 +biolog 1 6 5.164786 5.164786 2147 +determinist 1 6 5.164786 5.164786 2034 +pub 1 6 5.164786 5.164786 2239 +joseph 3 5 5.347108 16.041324 2327 +twenti 1 5 5.347108 5.347108 2540 +despit 1 5 5.347108 5.347108 2317 +tiwari 1 5 5.347108 5.347108 2385 +gone 1 4 5.568345 5.568345 3072 +resolv 1 4 5.568345 5.568345 2675 +algorithmsfor 1 4 5.568345 5.568345 2748 +genom 2 3 5.857933 11.715866 3546 +collaps 1 3 5.857933 5.857933 3729 +ninth 1 3 5.857933 5.857933 3616 +soar 1 3 5.857933 5.857933 3506 +adequaci 1 2 6.263398 6.263398 6229 +fragment 1 2 6.263398 6.263398 6000 +homolog 1 2 6.263398 6.263398 5441 +analysisof 1 2 6.263398 6.263398 4277 +deborah 2 1 6.957497 13.914994 19548 +studyingth 1 1 6.957497 6.957497 19549 +andnondeterminist 1 1 6.957497 6.957497 19550 +stillknow 1 1 6.957497 6.957497 19551 +computerscientist 1 1 6.957497 6.957497 19552 +techniquesfor 1 1 6.957497 6.957497 19553 +investigatesth 1 1 6.957497 6.957497 19554 +exploresin 1 1 6.957497 6.957497 19555 +resolveproblem 1 1 6.957497 6.957497 19556 +theseinclud 1 1 6.957497 6.957497 19557 +handlingrepetit 1 1 6.957497 6.957497 19558 +graphtheoret 1 1 6.957497 6.957497 19559 +subexponenti 1 1 6.957497 6.957497 19560 +pruim 1 1 6.957497 6.957497 19561 +theoryconfer 1 1 6.957497 6.957497 19562 +spanner 1 1 6.957497 6.957497 19563 +althof 1 1 6.957497 6.957497 19564 +dobkin 1 1 6.957497 6.957497 19565 +meidanisand 1 1 6.957497 6.957497 19566 +scandinavianworkshop 1 1 6.957497 6.957497 19567 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html new file mode 100644 index 00000000..26b334d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^miron.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +design 1 213 1.386294 1.386294 25 +gener 1 220 1.386294 1.386294 27 +languag 1 227 1.386294 1.386294 26 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +algorithm 2 162 1.791759 3.583518 57 +data 2 170 1.791759 3.583518 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +perform 3 143 1.945910 5.837730 74 +process 3 142 1.945910 5.837730 72 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +analysi 1 124 2.079442 2.079442 98 +schedul 1 119 2.079442 2.079442 85 +studi 1 120 2.079442 2.079442 91 +report 1 131 2.079442 2.079442 92 +manag 4 114 2.197225 8.788900 125 +well 1 109 2.197225 2.197225 121 +techniqu 1 99 2.302585 2.302585 138 +access 1 102 2.302585 2.302585 136 +proceed 2 93 2.397895 4.795790 152 +sinc 1 90 2.397895 2.397895 159 +graphic 1 90 2.397895 2.397895 147 +octob 1 89 2.397895 2.397895 156 +resourc 3 81 2.484907 7.454721 172 +institut 1 84 2.484907 2.484907 187 +journal 1 83 2.484907 2.484907 183 +interfac 1 79 2.564949 2.564949 209 +summari 1 73 2.639057 2.639057 237 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +simul 5 66 2.708050 13.540250 255 +differ 1 66 2.708050 2.708050 253 +polici 4 64 2.772589 11.090356 279 +laboratori 2 63 2.772589 5.545178 292 +foundat 1 62 2.772589 2.772589 286 +creat 1 63 2.772589 2.772589 277 +type 1 61 2.833213 2.833213 296 +juli 1 60 2.833213 2.833213 305 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +sampl 1 53 2.944439 2.944439 339 +telephon 1 50 3.044522 3.044522 373 +visual 1 48 3.044522 3.044522 372 +join 1 39 3.258097 3.258097 457 +error 1 40 3.258097 3.258097 449 +purpos 1 37 3.332205 3.332205 481 +queri 1 33 3.433987 3.433987 524 +consid 1 29 3.583519 3.583519 590 +synchron 1 29 3.583519 3.583519 588 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +disk 2 22 3.850148 7.700296 747 +emphasi 1 22 3.850148 3.850148 755 +properti 1 22 3.850148 3.850148 749 +util 1 21 3.912023 3.912023 774 +sigmod 1 19 4.007333 4.007333 877 +event 2 18 4.060443 8.120886 896 +ramakrishnan 1 16 4.174387 4.174387 972 +livni 2 15 4.248495 8.496990 1053 +novel 1 15 4.248495 4.248495 1039 +miron 3 14 4.317488 12.952464 1110 +discret 2 13 4.382027 8.764054 1165 +sigmetr 1 13 4.382027 4.382027 1173 +emploi 1 12 4.465908 4.465908 1284 +israel 1 11 4.553877 4.553877 1366 +ioannidi 1 8 4.875197 4.875197 1714 +eduto 1 7 5.010635 5.010635 1956 +schema 1 6 5.164786 5.164786 1988 +pub 1 6 5.164786 5.164786 2239 +tape 2 4 5.568345 11.136690 2959 +weizmann 1 4 5.568345 5.568345 2858 +evaluationof 1 3 5.857933 5.857933 3192 +interplai 1 3 5.857933 5.857933 3726 +myllymaki 1 3 5.857933 5.857933 4022 +metaphor 1 3 5.857933 5.857933 4038 +rehovot 1 2 6.263398 6.263398 4891 +developmentof 1 2 6.263398 6.263398 6041 +managementsystem 1 2 6.263398 6.263398 4365 +researchinvolv 1 2 6.263398 6.263398 5556 +asreal 1 1 6.957497 6.957497 19568 +specialemphasi 1 1 6.957497 6.957497 19569 +systemand 1 1 6.957497 6.957497 19570 +performancestudi 1 1 6.957497 6.957497 19571 +modelingand 1 1 6.957497 6.957497 19572 +implementinga 1 1 6.957497 6.957497 19573 +visualizationtool 1 1 6.957497 6.957497 19574 +sashadri 1 1 6.957497 6.957497 19575 +haberand 1 1 6.957497 6.957497 19576 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html new file mode 100644 index 00000000..fedb4c37 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^parter.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +work 2 380 0.693147 1.386294 9 +system 2 443 0.693147 1.386294 6 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +also 1 259 1.386294 1.386294 28 +class 1 199 1.609438 1.609438 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +distribut 2 162 1.791759 3.583518 51 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +recent 1 167 1.791759 1.791759 58 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +studi 1 120 2.079442 2.079442 91 +number 1 130 2.079442 2.079442 97 +report 1 131 2.079442 2.079442 92 +mathemat 1 108 2.197225 2.197225 123 +part 1 98 2.302585 2.302585 129 +real 1 93 2.397895 2.397895 144 +octob 1 89 2.397895 2.397895 156 +journal 2 83 2.484907 4.969814 183 +second 1 81 2.484907 2.484907 166 +method 6 80 2.564949 15.389694 213 +appear 1 78 2.564949 2.564949 210 +summari 1 73 2.639057 2.639057 237 +order 1 69 2.708050 2.708050 249 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +major 1 56 2.890372 2.890372 315 +sever 1 56 2.890372 2.890372 322 +sampl 1 53 2.944439 2.944439 339 +numer 3 49 3.044522 9.133566 369 +telephon 1 50 3.044522 3.044522 373 +without 1 50 3.044522 3.044522 370 +effect 2 46 3.091042 6.182084 385 +york 1 41 3.218876 3.218876 435 +small 1 39 3.258097 3.258097 447 +error 1 40 3.258097 3.258097 449 +hand 1 37 3.332205 3.332205 475 +procedur 1 36 3.367296 3.367296 488 +concept 1 32 3.465736 3.465736 537 +posit 2 31 3.496508 6.993016 552 +valu 2 25 3.737670 7.475340 665 +equat 4 23 3.806662 15.226648 724 +emphasi 1 22 3.850148 3.850148 755 +siam 2 21 3.912023 7.824046 800 +thu 1 21 3.912023 3.912023 773 +definit 2 19 4.007333 8.014666 864 +partial 2 18 4.060443 8.120886 900 +differenti 3 17 4.110874 12.332622 921 +attempt 1 17 4.110874 4.110874 917 +estim 1 17 4.110874 4.110874 930 +condit 3 16 4.174387 12.523161 975 +spars 1 16 4.174387 4.174387 989 +discret 2 13 4.382027 8.764054 1165 +boundari 1 7 5.010635 5.010635 1929 +eduto 1 7 5.010635 5.010635 1956 +mix 1 6 5.164786 5.164786 2200 +pub 1 6 5.164786 5.164786 2239 +pivot 1 5 5.347108 5.347108 2426 +symmetr 1 4 5.568345 5.568345 2908 +ellipt 3 3 5.857933 17.573799 3774 +singular 2 3 5.857933 11.715866 3366 +preserv 1 3 5.857933 5.857933 3628 +thesystem 1 3 5.857933 5.857933 3881 +encount 1 3 5.857933 5.857933 3128 +attack 1 3 5.857933 5.857933 3168 +parter 3 2 6.263398 18.790194 4075 +solutionof 1 2 6.263398 6.263398 5056 +linearalgebra 1 2 6.263398 6.263398 4833 +scientificcomput 1 2 6.263398 6.263398 4145 +precondit 4 1 6.957497 27.829988 19577 +seymour 2 1 6.957497 13.914994 19578 +indefinit 2 1 6.957497 13.914994 19579 +classicalit 1 1 6.957497 6.957497 19580 +multigrid 1 1 6.957497 6.957497 19581 +effectivelywhen 1 1 6.957497 6.957497 19582 +bemad 1 1 6.957497 6.957497 19583 +operatori 1 1 6.957497 6.957497 19584 +casedirect 1 1 6.957497 6.957497 19585 +challengingproblem 1 1 6.957497 6.957497 19586 +nowinvolv 1 1 6.957497 6.957497 19587 +specialmultigrid 1 1 6.957497 6.957497 19588 +chebyshev 1 1 6.957497 6.957497 19589 +collact 1 1 6.957497 6.957497 19590 +ellipticparti 1 1 6.957497 6.957497 19591 +journalon 1 1 6.957497 6.957497 19592 +numbersand 1 1 6.957497 6.957497 19593 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html new file mode 100644 index 00000000..0e8f6171 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~pubs^faculty-info^vernon.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +system 6 443 0.693147 4.158882 6 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +depart 1 457 0.693147 0.693147 12 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +current 1 284 1.098612 1.098612 21 +project 1 340 1.098612 1.098612 18 +email 2 220 1.386294 2.772588 29 +wisc 2 242 1.386294 2.772588 33 +also 2 259 1.386294 2.772588 28 +design 1 213 1.386294 1.386294 25 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +parallel 8 169 1.791759 14.334072 60 +applic 3 170 1.791759 5.375277 56 +recent 2 167 1.791759 3.583518 58 +network 2 168 1.791759 3.583518 61 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +distribut 1 162 1.791759 1.791759 51 +develop 1 174 1.791759 1.791759 53 +algorithm 1 162 1.791759 1.791759 57 +perform 3 143 1.945910 5.837730 74 +model 3 145 1.945910 5.837730 69 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +analysi 4 124 2.079442 8.317768 98 +schedul 2 119 2.079442 4.158884 85 +confer 2 126 2.079442 4.158884 100 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +report 1 131 2.079442 2.079442 92 +techniqu 4 99 2.302585 9.210340 138 +proceed 3 93 2.397895 7.193685 152 +call 1 91 2.397895 2.397895 153 +octob 1 89 2.397895 2.397895 156 +ieee 1 86 2.484907 2.484907 190 +issu 3 78 2.564949 7.694847 211 +june 2 79 2.564949 5.129898 214 +server 1 76 2.564949 2.564949 204 +complet 1 77 2.564949 2.564949 208 +summari 1 73 2.639057 2.639057 237 +servic 1 72 2.639057 2.639057 236 +solv 1 73 2.639057 2.639057 234 +effici 1 73 2.639057 2.639057 233 +multimedia 1 68 2.708050 2.708050 258 +august 1 66 2.708050 2.708050 257 +polici 3 64 2.772589 8.317767 279 +creat 1 63 2.772589 2.772589 277 +automat 1 61 2.833213 2.833213 306 +processor 3 54 2.944439 8.833317 335 +sampl 1 53 2.944439 2.944439 339 +telephon 1 50 3.044522 3.044522 373 +california 1 46 3.091042 3.091042 388 +featur 1 46 3.091042 3.091042 386 +join 1 39 3.258097 3.258097 457 +error 1 40 3.258097 3.258097 449 +industri 1 38 3.295837 3.295837 464 +workstat 1 37 3.332205 3.332205 479 +approxim 1 35 3.401197 3.401197 509 +synchron 1 29 3.583519 3.583519 588 +limit 1 29 3.583519 3.583519 585 +repres 2 26 3.688879 7.377758 656 +valu 1 25 3.737670 3.737670 665 +accur 1 25 3.737670 3.737670 680 +togeth 1 23 3.806662 3.806662 714 +emphasi 1 22 3.850148 3.850148 755 +alloc 1 20 3.951244 3.951244 821 +precis 1 15 4.248495 4.248495 1023 +hybrid 1 15 4.248495 4.248495 1057 +sigmetr 1 13 4.382027 4.382027 1173 +mari 2 12 4.465908 8.931816 1266 +workload 1 12 4.465908 4.465908 1210 +characterist 1 12 4.465908 4.465908 1257 +operatingsystem 1 10 4.653960 4.653960 1401 +custom 1 10 4.653960 4.653960 1414 +vernon 3 9 4.753590 14.260770 1556 +angel 1 8 4.875197 4.875197 1779 +character 1 8 4.875197 4.875197 1767 +reus 1 8 4.875197 4.875197 1661 +hash 1 8 4.875197 4.875197 1618 +carei 1 8 4.875197 4.875197 1781 +analyt 1 7 5.010635 5.010635 1913 +prioriti 1 7 5.010635 5.010635 1792 +interpol 1 7 5.010635 5.010635 1823 +chiang 1 7 5.010635 5.010635 1853 +eduto 1 7 5.010635 5.010635 1956 +pub 1 6 5.164786 5.164786 2239 +yield 1 5 5.347108 5.347108 2458 +fair 1 5 5.347108 5.347108 2333 +infocom 1 3 5.857933 5.857933 3283 +paralleland 1 2 6.263398 6.263398 5805 +petri 1 2 6.263398 6.263398 4414 +intuit 1 2 6.263398 6.263398 4921 +performanceanalysi 1 2 6.263398 6.263398 5629 +schedulingpolici 1 2 6.263398 6.263398 5879 +memorymanag 1 2 6.263398 6.263398 4158 +preemption 1 2 6.263398 6.263398 6230 +mansharamani 1 2 6.263398 6.263398 6231 +applicationto 1 1 6.957497 6.957497 19594 +techniquesi 1 1 6.957497 6.957497 19595 +colleaguesinclud 1 1 6.957497 6.957497 19596 +customizedmean 1 1 6.957497 6.957497 19597 +gtpn 1 1 6.957497 6.957497 19598 +systemfeatur 1 1 6.957497 6.957497 19599 +equationsthat 1 1 6.957497 6.957497 19600 +butcan 1 1 6.957497 6.957497 19601 +proposedth 1 1 6.957497 6.957497 19602 +approximationsfor 1 1 6.957497 6.957497 19603 +techniquemai 1 1 6.957497 6.957497 19604 +broader 1 1 6.957497 6.957497 19605 +performanceparallel 1 1 6.957497 6.957497 19606 +dqdb 1 1 6.957497 6.957497 19607 +slot 1 1 6.957497 6.957497 19608 +brewster 1 1 6.957497 6.957497 19609 +pateland 1 1 6.957497 6.957497 19610 +forrun 1 1 6.957497 6.957497 19611 +with 1 1 6.957497 6.957497 19612 +sigmetricsconfer 1 1 6.957497 6.957497 19613 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html new file mode 100644 index 00000000..f989bbf4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~qw^qw.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +welcom 1 122 2.079442 2.079442 99 +wang 2 21 3.912023 7.824046 790 +edulast 1 17 4.110874 4.110874 927 +qinqin 2 1 6.957497 13.914994 19614 +pageqw 1 1 6.957497 6.957497 19615 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html new file mode 100644 index 00000000..32b58f0d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raghu^raghu.html @@ -0,0 +1,199 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 8 431 0.693147 5.545176 10 +system 6 443 0.693147 4.158882 6 +interest 5 384 0.693147 3.465735 11 +work 5 380 0.693147 3.465735 9 +depart 3 457 0.693147 2.079441 12 +program 3 374 0.693147 2.079441 7 +project 9 340 1.098612 9.887508 18 +us 4 329 1.098612 4.394448 16 +cours 2 273 1.098612 2.197224 15 +time 1 293 1.098612 1.098612 17 +languag 4 227 1.386294 5.545176 26 +graduat 3 215 1.386294 4.158882 31 +design 2 213 1.386294 2.772588 25 +gener 2 220 1.386294 2.772588 27 +cornel 2 215 1.386294 2.772588 23 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +data 17 170 1.791759 30.459903 49 +develop 5 174 1.791759 8.958795 53 +base 4 165 1.791759 7.167036 50 +madison 2 165 1.791759 3.583518 55 +algorithm 2 162 1.791759 3.583518 57 +wisconsin 1 169 1.791759 1.791759 54 +texa 1 160 1.791759 1.791759 64 +austin 1 168 1.791759 1.791759 63 +recent 1 167 1.791759 1.791759 58 +first 4 140 1.945910 7.783640 71 +relat 3 139 1.945910 5.837730 68 +support 2 132 1.945910 3.891820 83 +professor 1 137 1.945910 1.945910 76 +area 1 144 1.945910 1.945910 80 +object 1 138 1.945910 1.945910 79 +databas 9 122 2.079442 18.714978 86 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +teach 2 108 2.197225 4.394450 112 +manag 2 114 2.197225 4.394450 125 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +text 3 98 2.302585 6.907755 133 +access 3 102 2.302585 6.907755 136 +techniqu 2 99 2.302585 4.605170 138 +part 2 98 2.302585 4.605170 129 +take 1 97 2.302585 2.302585 134 +imag 5 91 2.397895 11.989475 161 +call 2 91 2.397895 4.795790 153 +associ 1 93 2.397895 2.397895 151 +educ 3 86 2.484907 7.454721 191 +activ 2 84 2.484907 4.969814 182 +second 2 81 2.484907 4.969814 166 +level 2 87 2.484907 4.969814 180 +larg 2 82 2.484907 4.969814 168 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +school 1 84 2.484907 2.484907 188 +sourc 2 77 2.564949 5.129898 201 +optim 2 79 2.564949 5.129898 197 +orient 1 80 2.564949 2.564949 205 +logic 3 71 2.639057 7.917171 230 +appli 2 71 2.639057 5.278114 226 +effici 1 73 2.639057 2.639057 233 +involv 1 71 2.639057 2.639057 227 +august 4 66 2.708050 10.832200 257 +integr 3 67 2.708050 8.124150 245 +practic 1 70 2.708050 2.708050 246 +goal 1 66 2.708050 2.708050 250 +prof 3 64 2.772589 8.317767 273 +evalu 3 64 2.772589 8.317767 266 +street 1 63 2.772589 2.772589 293 +result 1 65 2.772589 2.772589 281 +complex 1 64 2.772589 2.772589 269 +import 1 65 2.772589 2.772589 282 +collect 1 65 2.772589 2.772589 268 +content 3 59 2.833213 8.499639 302 +sever 2 56 2.890372 5.780744 322 +explor 2 58 2.890372 5.780744 324 +publish 1 57 2.890372 2.890372 326 +index 1 56 2.890372 2.890372 309 +cover 1 55 2.944439 2.944439 329 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +set 3 50 3.044522 9.133566 361 +visual 2 48 3.044522 6.089044 372 +long 1 43 3.178054 3.178054 413 +term 1 43 3.178054 3.178054 411 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +sciencesunivers 1 37 3.332205 3.332205 486 +formal 1 37 3.332205 3.332205 478 +next 4 34 3.401197 13.604788 517 +tech 1 35 3.401197 3.401197 515 +queri 8 33 3.433987 27.471896 524 +extend 1 32 3.465736 3.465736 539 +independ 1 32 3.465736 3.465736 548 +express 1 32 3.465736 3.465736 540 +given 1 32 3.465736 3.465736 538 +abl 1 30 3.555348 3.555348 566 +focu 1 30 3.555348 3.555348 571 +power 1 30 3.555348 3.555348 573 +specifi 1 30 3.555348 3.555348 568 +rang 1 30 3.555348 3.555348 565 +focus 1 29 3.583519 3.583519 584 +cluster 1 28 3.610918 3.610918 612 +retriev 1 27 3.637586 3.637586 621 +constraint 2 26 3.688879 7.377758 636 +mine 1 26 3.688879 3.688879 654 +hill 4 25 3.737670 14.950680 670 +aspect 1 25 3.737670 3.737670 663 +lab 3 24 3.761200 11.283600 698 +pattern 1 24 3.761200 3.761200 689 +sequenc 4 23 3.806662 15.226648 734 +deal 2 22 3.850148 7.700296 736 +indian 1 22 3.850148 3.850148 769 +identifi 1 22 3.850148 3.850148 760 +toolkit 1 20 3.951244 3.951244 835 +definit 1 19 4.007333 4.007333 864 +stand 1 18 4.060443 4.060443 891 +ramakrishnan 2 16 4.174387 8.348774 972 +advantag 1 16 4.174387 4.174387 987 +upon 1 16 4.174387 4.174387 978 +livni 3 15 4.248495 12.745485 1053 +transit 1 15 4.248495 4.248495 1046 +heterogen 2 14 4.317488 8.634976 1090 +easili 1 14 4.317488 4.317488 1077 +joint 3 13 4.382027 13.146081 1130 +dbm 2 13 4.382027 8.764054 1136 +recurs 1 13 4.382027 4.382027 1127 +employ 4 12 4.465908 17.863632 1291 +raghu 3 12 4.465908 13.397724 1212 +grow 1 12 4.465908 4.465908 1209 +deduct 1 12 4.465908 4.465908 1236 +broad 1 11 4.553877 4.553877 1302 +usaphon 1 9 4.753590 4.753590 1600 +madra 1 8 4.875197 4.875197 1770 +ioannidi 1 8 4.875197 4.875197 1714 +closur 1 8 4.875197 4.875197 1643 +bottom 2 7 5.010635 10.021270 1906 +dataset 1 7 5.010635 5.010635 1914 +seshadri 1 7 5.010635 5.010635 1803 +bell 3 6 5.164786 15.494358 2224 +ongo 1 6 5.164786 5.164786 2215 +praveen 1 6 5.164786 5.164786 1996 +coral 2 5 5.347108 10.694216 2538 +mcgraw 1 5 5.347108 5.347108 2262 +minibas 1 4 5.568345 5.568345 2608 +exploratori 1 4 5.568345 5.568345 3073 +ofinform 1 4 5.568345 5.568345 2707 +successor 1 3 5.857933 5.857933 3576 +sudarshan 1 3 5.857933 5.857933 3885 +murrai 3 2 6.263398 18.790194 5647 +aimedat 1 2 6.263398 6.263398 6117 +srivastava 1 2 6.263398 6.263398 5395 +minibaseand 1 1 6.957497 6.957497 19616 +coralth 1 1 6.957497 6.957497 19617 +undergraduateand 1 1 6.957497 6.957497 19618 +inconjunct 1 1 6.957497 6.957497 19619 +coursesthat 1 1 6.957497 6.957497 19620 +deductiona 1 1 6.957497 6.957497 19621 +diversifi 1 1 6.957497 6.957497 19622 +increasinglyimport 1 1 6.957497 6.957497 19623 +dispers 1 1 6.957497 6.957497 19624 +rodin 1 1 6.957497 6.957497 19625 +severalissu 1 1 6.957497 6.957497 19626 +forsemant 1 1 6.957497 6.957497 19627 +serviceand 1 1 6.957497 6.957497 19628 +networkedclust 1 1 6.957497 6.957497 19629 +explorationfrom 1 1 6.957497 6.957497 19630 +assequ 1 1 6.957497 6.957497 19631 +seqsystem 1 1 6.957497 6.957497 19632 +optimizationissu 1 1 6.957497 6.957497 19633 +identifyingtrend 1 1 6.957497 6.957497 19634 +fromlarg 1 1 6.957497 6.957497 19635 +implementingan 1 1 6.957497 6.957497 19636 +customizea 1 1 6.957497 6.957497 19637 +specializedinform 1 1 6.957497 6.957497 19638 +indexedand 1 1 6.957497 6.957497 19639 +andmin 1 1 6.957497 6.957497 19640 +birchfor 1 1 6.957497 6.957497 19641 +devisea 1 1 6.957497 6.957497 19642 +databasequeri 1 1 6.957497 6.957497 19643 +featuressuch 1 1 6.957497 6.957497 19644 +ofarithmet 1 1 6.957497 6.957497 19645 +morecompactli 1 1 6.957497 6.957497 19646 +coraldeduct 1 1 6.957497 6.957497 19647 +fixpointevalu 1 1 6.957497 6.957497 19648 +efficientacross 1 1 6.957497 6.957497 19649 +sudarsha 1 1 6.957497 6.957497 19650 +divesh 1 1 6.957497 6.957497 19651 +managementfirst 1 1 6.957497 6.957497 19652 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html new file mode 100644 index 00000000..06bc3c87 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rahul^rahul.html @@ -0,0 +1,164 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 2 384 0.693147 1.386294 11 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +wisc 1 242 1.386294 1.386294 33 +fall 3 181 1.609438 4.828314 40 +madison 4 165 1.791759 7.167036 55 +contact 2 153 1.791759 3.583518 59 +wisconsin 1 169 1.791759 1.791759 54 +read 1 154 1.791759 1.791759 47 +year 1 148 1.945910 1.945910 84 +area 1 144 1.945910 1.945910 80 +like 1 132 1.945910 1.945910 81 +file 1 132 1.945910 1.945910 70 +welcom 1 122 2.079442 2.079442 99 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +final 2 116 2.197225 4.394450 108 +place 2 106 2.197225 4.394450 124 +pleas 1 113 2.197225 2.197225 114 +check 1 115 2.197225 2.197225 118 +person 1 111 2.197225 2.197225 117 +well 1 109 2.197225 2.197225 121 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +take 1 97 2.302585 2.302585 134 +call 1 91 2.397895 2.397895 153 +internet 1 83 2.484907 2.484907 186 +stuff 1 87 2.484907 2.484907 171 +learn 1 86 2.484907 2.484907 170 +know 3 80 2.564949 7.694847 198 +master 1 76 2.564949 2.564949 216 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +state 1 76 2.564949 2.564949 207 +good 1 77 2.564949 2.564949 200 +come 1 78 2.564949 2.564949 202 +degre 1 69 2.708050 2.708050 259 +collect 1 65 2.772589 2.772589 268 +plai 1 60 2.833213 2.833213 307 +semest 2 58 2.890372 5.780744 312 +think 2 57 2.890372 5.780744 314 +summer 2 56 2.890372 5.780744 311 +much 1 52 2.995732 2.995732 349 +california 2 46 3.091042 6.182084 388 +get 1 46 3.091042 3.091042 380 +could 1 46 3.091042 3.091042 383 +natur 1 44 3.135494 3.135494 406 +long 2 43 3.178054 6.356108 413 +show 2 43 3.178054 6.356108 417 +third 1 43 3.178054 3.178054 412 +music 1 42 3.218876 3.218876 436 +littl 2 39 3.258097 6.516194 454 +small 1 39 3.258097 3.258097 447 +live 1 40 3.258097 3.258097 451 +movi 1 40 3.258097 3.258097 459 +form 1 39 3.258097 3.258097 443 +winter 1 36 3.367296 3.367296 500 +go 1 33 3.433987 3.433987 529 +india 3 32 3.465736 10.397208 550 +kind 2 32 3.465736 6.931472 541 +travel 1 30 3.555348 3.555348 579 +hope 1 28 3.610918 3.610918 610 +great 2 27 3.637586 7.275172 626 +though 1 27 3.637586 3.637586 622 +enjoi 1 26 3.688879 3.688879 660 +sport 1 25 3.737670 3.737670 683 +concern 1 25 3.737670 3.737670 666 +magazin 1 24 3.761200 3.761200 704 +wish 1 24 3.761200 3.761200 692 +famili 1 23 3.806662 3.806662 735 +almost 1 22 3.850148 3.850148 742 +try 1 22 3.850148 3.850148 764 +love 3 21 3.912023 11.736069 804 +born 1 21 3.912023 3.912023 798 +watch 1 21 3.912023 3.912023 789 +nice 1 20 3.951244 3.951244 809 +tenni 1 20 3.951244 3.951244 838 +citi 1 19 4.007333 4.007333 874 +offici 2 18 4.060443 8.120886 894 +figur 1 18 4.060443 4.060443 903 +bachelor 1 17 4.110874 4.110874 957 +normal 1 16 4.174387 4.174387 995 +jose 1 16 4.174387 4.174387 976 +enough 1 15 4.248495 4.248495 1040 +rate 1 15 4.248495 4.248495 1037 +anywai 1 15 4.248495 4.248495 1047 +novel 1 15 4.248495 4.248495 1039 +came 2 13 4.382027 8.764054 1197 +cannot 1 13 4.382027 4.382027 1144 +sai 1 13 4.382027 4.382027 1175 +philosophi 1 13 4.382027 4.382027 1167 +stai 1 12 4.465908 4.465908 1215 +employ 1 12 4.465908 4.465908 1291 +walk 1 12 4.465908 4.465908 1281 +rest 1 12 4.465908 4.465908 1259 +surf 1 11 4.553877 4.553877 1301 +town 1 10 4.653960 4.653960 1458 +guess 1 10 4.653960 4.653960 1443 +sister 1 9 4.753590 4.753590 1524 +prefer 1 9 4.753590 4.753590 1491 +swim 1 9 4.753590 4.753590 1599 +kanpur 2 8 4.875197 9.750394 1744 +star 1 8 4.875197 4.875197 1717 +bridg 1 8 4.875197 4.875197 1764 +job 1 8 4.875197 4.875197 1702 +fortun 1 7 5.010635 5.010635 1872 +monei 1 7 5.010635 5.010635 1934 +cricket 1 7 5.010635 5.010635 1945 +slightli 1 7 5.010635 5.010635 1795 +parent 1 6 5.164786 5.164786 2204 +whatev 1 6 5.164786 5.164786 2097 +hike 1 6 5.164786 5.164786 2234 +televis 1 6 5.164786 5.164786 2118 +almaden 1 5 5.347108 5.347108 2511 +cyber 1 4 5.568345 5.568345 2909 +gone 1 4 5.568345 5.568345 3072 +compris 1 4 5.568345 5.568345 2862 +shouldn 1 4 5.568345 5.568345 2606 +suppos 1 4 5.568345 5.568345 3002 +skate 1 4 5.568345 5.568345 3046 +rahul 2 3 5.857933 11.715866 3464 +indianinstitut 1 3 5.857933 5.857933 4003 +romanc 1 3 5.857933 5.857933 3632 +trek 1 3 5.857933 5.857933 4025 +win 1 3 5.857933 5.857933 3593 +comedi 2 2 6.263398 12.526796 5822 +surfer 1 2 6.263398 6.263398 4982 +centr 1 2 6.263398 6.263398 4222 +northern 1 2 6.263398 6.263398 5861 +lover 1 2 6.263398 6.263398 6192 +paid 1 2 6.263398 6.263398 6081 +livabl 2 1 6.957497 13.914994 19653 +kapoorhello 1 1 6.957497 6.957497 19654 +schedulemydepartmentmyuniversityiitkanpuriitkclass 1 1 6.957497 6.957497 19655 +relatedlink 1 1 6.957497 6.957497 19656 +menow 1 1 6.957497 6.957497 19657 +andrais 1 1 6.957497 6.957497 19658 +elder 1 1 6.957497 6.957497 19659 +moneymagazin 1 1 6.957497 6.957497 19660 +editormust 1 1 6.957497 6.957497 19661 +greenland 1 1 6.957497 6.957497 19662 +complain 1 1 6.957497 6.957497 19663 +isawesom 1 1 6.957497 6.957497 19664 +regret 1 1 6.957497 6.957497 19665 +genr 1 1 6.957497 6.957497 19666 +gymnast 1 1 6.957497 6.957497 19667 +cloudi 1 1 6.957497 6.957497 19668 +breezi 1 1 6.957497 6.957497 19669 +youget 1 1 6.957497 6.957497 19670 +musicstuffmovi 1 1 6.957497 6.957497 19671 +televisioninternettravelotherbookmark 1 1 6.957497 6.957497 19672 +meget 1 1 6.957497 6.957497 19673 +guestbookrahul 1 1 6.957497 6.957497 19674 +eduh 1 1 6.957497 6.957497 19675 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html new file mode 100644 index 00000000..05d2d009 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~raman^raman.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 2 457 0.693147 1.386294 12 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +work 1 380 0.693147 0.693147 9 +current 3 284 1.098612 3.295836 21 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +graduat 1 215 1.386294 1.386294 31 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +distribut 1 162 1.791759 1.791759 51 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +architectur 1 139 1.945910 1.945910 77 +perform 1 143 1.945910 1.945910 74 +model 1 145 1.945910 1.945910 69 +dayton 1 119 2.079442 2.079442 104 +postscript 1 131 2.079442 2.079442 90 +mathemat 1 108 2.197225 2.197225 123 +person 1 111 2.197225 2.197225 117 +specif 1 106 2.197225 2.197225 106 +part 1 98 2.302585 2.302585 129 +west 1 83 2.484907 2.484907 192 +member 1 84 2.484907 2.484907 165 +thing 1 84 2.484907 2.484907 189 +integr 1 67 2.708050 2.708050 245 +street 1 63 2.772589 2.772589 293 +evalu 1 64 2.772589 2.772589 266 +major 1 56 2.890372 2.890372 315 +telephon 1 50 3.044522 3.044522 373 +music 1 42 3.218876 3.218876 436 +error 1 40 3.258097 3.258097 449 +vita 1 38 3.295837 3.295837 473 +committe 1 34 3.401197 3.401197 522 +curriculum 1 33 3.433987 3.433987 535 +team 1 27 3.637586 3.637586 625 +bookmark 1 26 3.688879 3.688879 639 +subject 1 26 3.688879 3.688879 647 +livni 1 15 4.248495 4.248495 1053 +minor 1 12 4.465908 4.465908 1237 +pascal 1 12 4.465908 4.465908 1213 +earth 1 10 4.653960 4.653960 1463 +chao 1 8 4.875197 4.875197 1753 +judg 1 8 4.875197 4.875197 1644 +uncertainti 1 7 5.010635 5.010635 1882 +truth 1 6 5.164786 5.164786 2179 +ohio 1 5 5.347108 5.347108 2447 +condor 1 5 5.347108 5.347108 2577 +raman 6 4 5.568345 33.410070 2827 +rajesh 4 3 5.857933 23.431732 3511 +off 1 3 5.857933 5.857933 3170 +wesleyan 1 3 5.857933 5.857933 3988 +saluja 1 3 5.857933 5.857933 3104 +novelti 1 2 6.263398 6.263398 5765 +monster 1 2 6.263398 6.263398 6207 +prodigi 1 2 6.263398 6.263398 5670 +old 1 1 6.957497 6.957497 19676 +homm 1 1 6.957497 6.957497 19677 +winsonsin 1 1 6.957497 6.957497 19678 +chimera 1 1 6.957497 6.957497 19679 +contradict 1 1 6.957497 6.957497 19680 +feebleworm 1 1 6.957497 6.957497 19681 +depositari 1 1 6.957497 6.957497 19682 +cloaca 1 1 6.957497 6.957497 19683 +theglori 1 1 6.957497 6.957497 19684 +shame 1 1 6.957497 6.957497 19685 +blais 1 1 6.957497 6.957497 19686 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html new file mode 100644 index 00000000..8a12c2a7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ramasamy^ramasamy.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +wisconsin 2 169 1.791759 3.583518 54 +parallel 1 169 1.791759 1.791759 60 +avail 1 169 1.791759 1.791759 48 +address 1 170 1.791759 1.791759 62 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +databas 3 122 2.079442 6.238326 86 +confer 1 126 2.079442 2.079442 100 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +intern 1 108 2.197225 2.197225 128 +person 1 111 2.197225 2.197225 117 +comment 1 93 2.397895 2.397895 146 +larg 1 82 2.484907 2.484907 168 +journal 1 83 2.484907 2.484907 183 +david 1 71 2.639057 2.639057 232 +street 1 63 2.772589 2.772589 293 +improv 1 62 2.772589 2.772589 289 +think 1 57 2.890372 2.890372 314 +electron 1 47 3.091042 3.091042 379 +might 1 41 3.218876 3.218876 426 +eduoffic 1 33 3.433987 3.433987 531 +altern 1 26 3.688879 3.688879 641 +scalabl 1 24 3.761200 3.761200 705 +hierarchi 1 22 3.850148 3.850148 744 +tell 1 21 3.912023 3.912023 777 +mostli 1 19 4.007333 4.007333 869 +estim 1 17 4.110874 4.110874 930 +dewitt 1 12 4.465908 4.465908 1270 +naughton 3 10 4.653960 13.961880 1450 +jeffrei 3 9 4.753590 14.260770 1612 +wall 1 9 4.753590 4.753590 1553 +paradis 1 8 4.875197 4.875197 1782 +presenc 1 8 4.875197 4.875197 1671 +bombai 1 7 5.010635 5.010635 1972 +aggreg 1 6 5.164786 5.164786 2219 +prasad 1 6 5.164786 5.164786 2126 +deshpand 1 5 5.347108 5.347108 2431 +multidimension 1 4 5.568345 5.568345 3091 +amit 1 4 5.568345 5.568345 2972 +ramasami 1 4 5.568345 5.568345 3088 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +mumbai 1 3 5.857933 5.857933 4029 +karthik 1 1 6.957497 6.957497 19687 +pagekarthikeyan 1 1 6.957497 6.957497 19688 +ramasamyabouti 1 1 6.957497 6.957497 19689 +projectshack 1 1 6.957497 6.957497 19690 +connectivityparadis 1 1 6.957497 6.957497 19691 +pthread 1 1 6.957497 6.957497 19692 +wrapperspublicationsstorag 1 1 6.957497 6.957497 19693 +presentationsweb 1 1 6.957497 6.957497 19694 +picturearchitectur 1 1 6.957497 6.957497 19695 +serversphoto 1 1 6.957497 6.957497 19696 +albumencount 1 1 6.957497 6.957497 19697 +leafperson 1 1 6.957497 6.957497 19698 +inforesum 1 1 6.957497 6.957497 19699 +financemonei 1 1 6.957497 6.957497 19700 +interestshack 1 1 6.957497 6.957497 19701 +photographycontact 1 1 6.957497 6.957497 19702 +informationstreet 1 1 6.957497 6.957497 19703 +addresskarthik 1 1 6.957497 6.957497 19704 +suggestionspleas 1 1 6.957497 6.957497 19705 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html new file mode 100644 index 00000000..75e3c6bd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratliff^ratliff.html @@ -0,0 +1,22 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +student 1 343 1.098612 1.098612 19 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +phone 1 175 1.791759 1.791759 45 +hour 1 165 1.791759 1.791759 46 +construct 1 139 1.945910 1.945910 82 +place 1 106 2.197225 2.197225 124 +visit 1 63 2.772589 2.772589 288 +space 1 57 2.890372 2.890372 310 +edulast 1 17 4.110874 4.110874 927 +stai 1 12 4.465908 4.465908 1215 +tune 1 12 4.465908 4.465908 1227 +login 1 9 4.753590 4.753590 1550 +kelli 3 4 5.568345 16.705035 2793 +ratliffoffic 1 1 6.957497 6.957497 19706 +genealog 1 1 6.957497 6.957497 19707 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html new file mode 100644 index 00000000..ce106c96 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ratnakar^ratnakar.html @@ -0,0 +1,46 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +research 2 431 0.693147 1.386294 10 +depart 2 457 0.693147 1.386294 12 +interest 1 384 0.693147 0.693147 11 +wisc 1 242 1.386294 1.386294 33 +public 1 202 1.609438 1.609438 43 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +avail 2 169 1.791759 3.583518 48 +base 1 165 1.791759 1.791759 50 +area 1 144 1.945910 1.945910 80 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +assist 1 112 2.197225 2.197225 113 +imag 2 91 2.397895 4.795790 161 +control 1 82 2.484907 2.484907 164 +thing 1 84 2.484907 2.484907 189 +west 1 83 2.484907 2.484907 192 +main 1 67 2.708050 2.708050 256 +street 1 63 2.772589 2.772589 293 +digit 1 52 2.995732 2.995732 348 +advisor 1 51 2.995732 2.995732 355 +video 1 44 3.135494 3.135494 405 +product 1 33 3.433987 3.433987 527 +compress 5 23 3.806662 19.033310 719 +qualiti 1 20 3.951244 3.951244 832 +vector 1 16 4.174387 4.174387 961 +livni 1 15 4.248495 4.248495 1053 +miron 1 14 4.317488 4.317488 1110 +mode 1 9 4.753590 4.753590 1492 +invok 2 6 5.164786 10.329572 2079 +reveal 1 4 5.568345 5.568345 2647 +fractal 1 3 5.857933 5.857933 3475 +quantiz 1 2 6.263398 6.263398 5692 +ratnakar 3 1 6.957497 20.872491 19708 +viresh 2 1 6.957497 13.914994 19709 +lossi 1 1 6.957497 6.957497 19710 +qclicauthor 1 1 6.957497 6.957497 19711 +qclic 1 1 6.957497 6.957497 19712 +qclicbrows 1 1 6.957497 6.957497 19713 +rever 1 1 6.957497 6.957497 19714 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html new file mode 100644 index 00000000..068ea8cb --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~rcarl^rcarl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +offic 4 299 1.098612 4.394448 13 +current 1 284 1.098612 1.098612 21 +cours 1 273 1.098612 1.098612 15 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +address 3 170 1.791759 5.375277 62 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +distribut 1 162 1.791759 1.791759 51 +architectur 1 139 1.945910 1.945910 77 +number 3 130 2.079442 6.238326 97 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +introduct 1 126 2.079442 2.079442 87 +person 1 111 2.197225 2.197225 117 +check 1 115 2.197225 2.197225 118 +advanc 1 99 2.302585 2.302585 130 +need 1 98 2.302585 2.302585 135 +mani 1 92 2.397895 2.397895 150 +graphic 1 90 2.397895 2.397895 147 +west 1 83 2.484907 2.484907 192 +mondai 2 77 2.564949 5.129898 206 +tuesdai 1 73 2.639057 2.639057 219 +thursdai 1 70 2.708050 2.708050 241 +order 1 69 2.708050 2.708050 249 +wednesdai 2 64 2.772589 5.545178 261 +street 1 63 2.772589 2.772589 293 +best 1 59 2.833213 2.833213 299 +publish 1 57 2.890372 2.890372 326 +profession 1 51 2.995732 2.995732 345 +without 1 50 3.044522 3.044522 370 +math 1 44 3.135494 3.135494 402 +fridai 1 44 3.135494 3.135494 390 +richard 2 31 3.496508 6.993016 559 +load 1 28 3.610918 3.610918 601 +fellow 1 24 3.761200 3.761200 701 +emphasi 1 22 3.850148 3.850148 755 +thur 1 19 4.007333 4.007333 847 +ever 1 19 4.007333 4.007333 872 +whole 1 17 4.110874 4.110874 940 +brother 2 13 4.382027 8.764054 1189 +sundai 1 10 4.653960 4.653960 1387 +desktop 1 10 4.653960 4.653960 1445 +prevent 1 7 5.010635 5.010635 1827 +saturdai 1 7 5.010635 5.010635 1794 +shade 1 7 5.010635 5.010635 1881 +polit 1 6 5.164786 5.164786 2115 +artist 1 6 5.164786 5.164786 2127 +seriou 1 5 5.347108 5.347108 2252 +haven 3 4 5.568345 16.705035 3037 +underwat 1 4 5.568345 5.568345 2838 +fire 1 4 5.568345 5.568345 3001 +beard 1 2 6.263398 6.263398 6161 +grave 1 2 6.263398 6.263398 5968 +doom 1 2 6.263398 6.263398 5848 +goofi 1 2 6.263398 6.263398 4074 +omin 3 1 6.957497 20.872491 19715 +monasteriu 2 1 6.957497 13.914994 19716 +doominu 2 1 6.957497 13.914994 19717 +rcarl 1 1 6.957497 6.957497 19718 +subsurfac 1 1 6.957497 6.957497 19719 +depositori 1 1 6.957497 6.957497 19720 +dig 1 1 6.957497 6.957497 19721 +solitari 1 1 6.957497 6.957497 19722 +innebri 1 1 6.957497 6.957497 19723 +vampir 1 1 6.957497 6.957497 19724 +nostalg 1 1 6.957497 6.957497 19725 +funki 1 1 6.957497 6.957497 19726 +monk 1 1 6.957497 6.957497 19727 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html new file mode 100644 index 00000000..1ffd3753 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~reps^reps.html @@ -0,0 +1,559 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 48 775 0.000000 0.000000 2 +scienc 31 640 0.000000 0.000000 4 +univers 18 571 0.000000 0.000000 5 +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +program 91 374 0.693147 63.076377 7 +depart 17 457 0.693147 11.783499 12 +system 15 443 0.693147 10.397205 6 +research 6 431 0.693147 4.158882 10 +inform 3 412 0.693147 2.079441 8 +interest 2 384 0.693147 1.386294 11 +work 2 380 0.693147 1.386294 9 +engin 17 297 1.098612 18.676404 20 +us 9 329 1.098612 9.887508 16 +project 4 340 1.098612 4.394448 18 +current 4 284 1.098612 4.394448 21 +time 2 293 1.098612 2.197224 17 +student 1 343 1.098612 1.098612 19 +languag 33 227 1.386294 45.747702 26 +softwar 31 220 1.386294 42.975114 30 +also 17 259 1.386294 23.566998 28 +gener 12 220 1.386294 16.635528 27 +cornel 6 215 1.386294 8.317764 23 +washington 4 236 1.386294 5.545176 32 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +paper 11 205 1.609438 17.703818 38 +public 6 202 1.609438 9.656628 43 +updat 4 191 1.609438 6.437752 41 +list 2 201 1.609438 3.218876 39 +oper 2 180 1.609438 3.218876 34 +includ 1 208 1.609438 1.609438 42 +class 1 199 1.609438 1.609438 37 +wisconsin 20 169 1.791759 35.835180 54 +madison 15 165 1.791759 26.876385 55 +algorithm 15 162 1.791759 26.876385 57 +base 13 165 1.791759 23.292867 50 +develop 9 174 1.791759 16.125831 53 +applic 6 170 1.791759 10.750554 56 +implement 5 152 1.791759 8.958795 52 +parallel 3 169 1.791759 5.375277 60 +distribut 2 162 1.791759 3.583518 51 +recent 1 167 1.791759 1.791759 58 +data 1 170 1.791759 1.791759 49 +note 17 142 1.945910 33.080470 67 +problem 14 147 1.945910 27.242740 75 +click 11 142 1.945910 21.405010 78 +lectur 9 135 1.945910 17.513190 73 +support 3 132 1.945910 5.837730 83 +process 3 142 1.945910 5.837730 72 +professor 2 137 1.945910 3.891820 76 +object 1 138 1.945910 1.945910 79 +architectur 1 139 1.945910 1.945910 77 +first 1 140 1.945910 1.945910 71 +analysi 28 124 2.079442 58.224376 98 +confer 17 126 2.079442 35.350514 100 +tool 4 117 2.079442 8.317768 93 +technolog 2 131 2.079442 4.158884 102 +databas 2 122 2.079442 4.158884 86 +report 2 131 2.079442 4.158884 92 +dayton 1 119 2.079442 2.079442 104 +provid 1 121 2.079442 2.079442 94 +spring 1 131 2.079442 2.079442 88 +number 1 130 2.079442 2.079442 97 +version 7 113 2.197225 15.380575 122 +intern 7 108 2.197225 15.380575 128 +world 2 115 2.197225 4.394450 126 +theori 2 111 2.197225 4.394450 127 +specif 1 106 2.197225 2.197225 106 +make 1 111 2.197225 2.197225 120 +find 1 111 2.197225 2.197225 111 +manag 1 114 2.197225 2.197225 125 +check 1 115 2.197225 2.197225 118 +site 1 106 2.197225 2.197225 119 +access 10 102 2.302585 23.025850 136 +book 4 99 2.302585 9.210340 131 +text 2 98 2.302585 4.605170 133 +proceed 24 93 2.397895 57.549480 152 +associ 4 93 2.397895 9.591580 151 +octob 4 89 2.397895 9.591580 156 +center 3 88 2.397895 7.193685 158 +commun 2 95 2.397895 4.795790 157 +call 1 91 2.397895 2.397895 153 +present 1 91 2.397895 2.397895 145 +environ 12 84 2.484907 29.818884 177 +ieee 9 86 2.484907 22.364163 190 +novemb 6 81 2.484907 14.909442 179 +journal 5 83 2.484907 12.424535 183 +second 5 81 2.484907 12.424535 166 +larg 2 82 2.484907 4.969814 168 +solut 2 82 2.484907 4.969814 162 +west 1 83 2.484907 2.484907 192 +member 1 84 2.484907 2.484907 165 +academ 1 82 2.484907 2.484907 178 +chang 1 82 2.484907 2.484907 163 +control 1 82 2.484907 2.484907 164 +institut 1 84 2.484907 2.484907 187 +june 9 79 2.564949 23.084541 214 +april 7 77 2.564949 17.954643 196 +appear 5 78 2.564949 12.824745 210 +decemb 5 80 2.564949 12.824745 215 +refer 3 78 2.564949 7.694847 203 +dynam 3 76 2.564949 7.694847 194 +method 1 80 2.564949 2.564949 213 +complet 1 77 2.564949 2.564949 208 +optim 1 79 2.564949 2.564949 197 +symposium 14 72 2.639057 36.946798 238 +solv 6 73 2.639057 15.834342 234 +logic 3 71 2.639057 7.917171 230 +david 2 71 2.639057 5.278114 232 +workshop 2 71 2.639057 5.278114 239 +summari 1 73 2.639057 2.639057 237 +line 1 75 2.639057 2.639057 231 +effici 1 73 2.639057 2.639057 233 +nation 1 74 2.639057 2.639057 240 +integr 16 67 2.708050 43.328800 245 +august 5 66 2.708050 13.540250 257 +test 1 66 2.708050 2.708050 252 +order 1 69 2.708050 2.708050 249 +practic 1 70 2.708050 2.708050 246 +differ 1 66 2.708050 2.708050 253 +januari 11 62 2.772589 30.498479 264 +evalu 7 64 2.772589 19.408123 266 +foundat 5 62 2.772589 13.862945 286 +septemb 3 65 2.772589 8.317767 274 +complex 2 64 2.772589 5.545178 269 +interact 2 62 2.772589 5.545178 270 +creat 1 63 2.772589 2.772589 277 +previou 1 62 2.772589 2.772589 290 +abstract 1 62 2.772589 2.772589 276 +juli 13 60 2.833213 36.831769 305 +march 3 61 2.833213 8.499639 295 +content 1 59 2.833213 2.833213 302 +thesi 6 57 2.890372 17.342232 327 +publish 4 57 2.890372 11.561488 326 +direct 4 57 2.890372 11.561488 316 +special 3 56 2.890372 8.671116 320 +index 2 56 2.890372 5.780744 309 +explor 1 58 2.890372 2.890372 324 +overview 1 56 2.890372 2.890372 323 +space 1 57 2.890372 2.890372 310 +scientif 2 53 2.944439 5.888878 341 +allow 1 53 2.944439 2.944439 333 +particular 2 51 2.995732 5.991464 352 +maintain 1 51 2.995732 2.995732 342 +principl 8 48 3.044522 24.356176 357 +pointer 3 48 3.044522 9.133566 368 +visitor 2 49 3.044522 6.089044 371 +telephon 1 50 3.044522 3.044522 373 +set 1 50 3.044522 3.044522 361 +california 2 46 3.091042 6.182084 388 +understand 1 47 3.091042 3.091042 384 +algebra 4 45 3.135494 12.541976 394 +describ 1 45 3.135494 3.135494 400 +natur 1 44 3.135494 3.135494 406 +third 4 43 3.178054 12.712216 412 +show 1 43 3.178054 3.178054 417 +york 22 41 3.218876 70.815272 435 +editor 8 41 3.218876 25.751008 433 +combin 2 42 3.218876 6.437752 421 +edit 1 42 3.218876 3.218876 418 +press 1 42 3.218876 3.218876 419 +compani 1 41 3.218876 3.218876 423 +transact 9 39 3.258097 29.322873 438 +societi 6 40 3.258097 19.548582 456 +submit 3 39 3.258097 9.774291 440 +theoret 2 39 3.258097 6.516194 446 +tutori 2 39 3.258097 6.516194 437 +small 1 39 3.258097 3.258097 447 +probabl 1 40 3.258097 3.258097 455 +seminar 3 38 3.295837 9.887511 470 +streetmadison 1 38 3.295837 3.295837 474 +vita 1 38 3.295837 3.295837 473 +origin 1 38 3.295837 3.295837 472 +correct 1 38 3.295837 3.295837 462 +prototyp 1 38 3.295837 3.295837 463 +connect 1 37 3.332205 3.332205 485 +mean 1 37 3.332205 3.332205 477 +china 1 37 3.332205 3.332205 487 +procedur 4 36 3.367296 13.469184 488 +tree 2 36 3.367296 6.734592 492 +multi 2 36 3.367296 6.734592 493 +manual 5 35 3.401197 17.005985 504 +post 3 35 3.401197 10.203591 505 +tech 3 35 3.401197 10.203591 515 +represent 2 35 3.401197 6.802394 512 +either 1 35 3.401197 3.401197 506 +bibliographi 1 34 3.401197 3.401197 518 +curriculum 1 33 3.433987 3.433987 535 +obtain 1 33 3.433987 3.433987 534 +dissert 3 32 3.465736 10.397208 549 +transform 2 32 3.465736 6.931472 542 +kind 1 32 3.465736 3.465736 541 +chapter 1 32 3.465736 3.465736 536 +graph 16 30 3.555348 56.885568 576 +power 1 30 3.555348 3.555348 573 +robert 1 30 3.555348 3.555348 567 +semant 13 29 3.583519 46.585747 587 +depend 11 29 3.583519 39.418709 583 +chines 1 29 3.583519 3.583519 595 +releas 4 28 3.610918 14.443672 616 +univ 2 28 3.610918 7.221836 617 +manipul 6 27 3.637586 21.825516 624 +static 1 27 3.637586 3.637586 619 +consist 1 26 3.688879 3.688879 651 +subject 1 26 3.688879 3.688879 647 +repres 1 26 3.688879 3.688879 656 +bound 1 26 3.688879 3.688879 659 +notic 5 25 3.737670 18.688350 675 +hill 2 25 3.737670 7.475340 670 +valu 1 25 3.737670 3.737670 665 +fundament 1 25 3.737670 3.737670 661 +doctor 2 24 3.761200 7.522400 709 +handl 2 24 3.761200 7.522400 685 +departmentunivers 1 24 3.761200 3.761200 711 +demonstr 1 24 3.761200 3.761200 694 +methodolog 3 23 3.806662 11.419986 733 +variabl 2 23 3.806662 7.613324 715 +miscellan 1 23 3.806662 3.806662 731 +proof 1 23 3.806662 3.806662 720 +springer 11 22 3.850148 42.351628 750 +verlag 11 22 3.850148 42.351628 751 +properti 2 22 3.850148 7.700296 749 +serv 1 22 3.850148 3.850148 758 +sequenti 1 22 3.850148 3.850148 745 +identifi 1 22 3.850148 3.850148 760 +path 2 21 3.912023 7.824046 778 +latest 2 21 3.912023 7.824046 785 +corpor 1 21 3.912023 3.912023 802 +programminglanguag 1 21 3.912023 3.912023 782 +theorem 1 21 3.912023 3.912023 786 +basi 1 20 3.951244 3.951244 828 +citi 2 19 4.007333 8.014666 874 +beij 1 19 4.007333 4.007333 876 +comparison 1 19 4.007333 4.007333 863 +boston 1 19 4.007333 4.007333 862 +north 1 19 4.007333 4.007333 873 +record 10 18 4.060443 40.604430 890 +thoma 2 18 4.060443 8.120886 901 +element 2 18 4.060443 8.120886 895 +partial 1 18 4.060443 4.060443 900 +speed 1 18 4.060443 4.060443 911 +germani 6 17 4.110874 24.665244 946 +modif 4 17 4.110874 16.443496 913 +debug 1 17 4.110874 4.110874 944 +fourth 2 16 4.174387 8.348774 999 +diego 2 16 4.174387 8.348774 992 +cambridg 1 16 4.174387 4.174387 1008 +letter 1 16 4.174387 4.174387 981 +ramakrishnan 1 16 4.174387 4.174387 972 +taiwan 1 16 4.174387 4.174387 1006 +precis 5 15 4.248495 21.242475 1023 +transit 1 15 4.248495 4.248495 1046 +princeton 1 15 4.248495 4.248495 1042 +configur 1 15 4.248495 4.248495 1012 +remot 1 15 4.248495 4.248495 1041 +reprint 8 14 4.317488 34.539904 1097 +attribut 7 14 4.317488 30.222416 1092 +demand 5 14 4.317488 21.587440 1073 +francisco 3 14 4.317488 12.952464 1095 +polynomi 1 14 4.317488 4.317488 1069 +sigplan 13 13 4.382027 56.966351 1190 +directli 1 13 4.382027 4.382027 1141 +carri 1 13 4.382027 4.382027 1152 +context 1 13 4.382027 4.382027 1153 +shape 8 12 4.465908 35.727264 1245 +pascal 1 12 4.465908 4.465908 1213 +nanci 1 12 4.465908 4.465908 1256 +franc 1 12 4.465908 4.465908 1276 +onth 1 12 4.465908 4.465908 1218 +scan 1 12 4.465908 4.465908 1243 +instanc 2 11 4.553877 9.107754 1322 +magic 1 11 4.553877 4.553877 1358 +impact 1 11 4.553877 4.553877 1334 +lake 1 11 4.553877 4.553877 1373 +israel 1 11 4.553877 4.553877 1366 +decomposit 1 10 4.653960 4.653960 1439 +underli 1 10 4.653960 4.653960 1410 +invit 1 10 4.653960 4.653960 1428 +respect 1 9 4.753590 4.753590 1545 +mainten 1 9 4.753590 4.753590 1543 +establish 1 9 4.753590 4.753590 1532 +conferenceon 1 9 4.753590 4.753590 1595 +utah 1 9 4.753590 4.753590 1585 +herefor 1 9 4.753590 4.753590 1483 +ball 1 9 4.753590 4.753590 1608 +equival 1 9 4.753590 4.753590 1496 +yang 8 8 4.875197 39.001576 1652 +colloquium 4 8 4.875197 19.500788 1734 +reus 2 8 4.875197 9.750394 1661 +european 2 8 4.875197 9.750394 1763 +secretari 1 8 4.875197 4.875197 1775 +pldi 1 8 4.875197 4.875197 1704 +competit 1 8 4.875197 4.875197 1635 +irvin 1 8 4.875197 4.875197 1660 +illustr 1 8 4.875197 4.875197 1679 +analys 1 8 4.875197 4.875197 1666 +merg 7 7 5.010635 35.074445 1862 +portland 4 7 5.010635 20.042540 1878 +fifth 2 7 5.010635 10.021270 1931 +iowa 2 7 5.010635 10.021270 1971 +bottom 1 7 5.010635 5.010635 1906 +prioriti 1 7 5.010635 5.010635 1792 +pittsburgh 1 7 5.010635 5.010635 1938 +digest 1 7 5.010635 5.010635 1864 +increment 14 6 5.164786 72.307004 2206 +teitelbaum 12 6 5.164786 61.977432 2102 +symposiumon 6 6 5.164786 30.988716 2054 +grammar 5 6 5.164786 25.823930 2058 +syntax 3 6 5.164786 15.494358 2030 +affect 1 6 5.164786 5.164786 2044 +textual 1 6 5.164786 5.164786 1979 +kluwer 1 6 5.164786 5.164786 2143 +variant 1 6 5.164786 5.164786 2043 +german 1 6 5.164786 5.164786 2190 +unpublish 1 6 5.164786 5.164786 2226 +carolina 1 6 5.164786 5.164786 2142 +horwitz 28 5 5.347108 149.719024 2411 +synthes 12 5 5.347108 64.165296 2451 +dataflow 9 5 5.347108 48.123972 2390 +licens 3 5 5.347108 16.041324 2520 +twenti 3 5 5.347108 16.041324 2540 +aim 2 5 5.347108 10.694216 2477 +summarymi 1 5 5.347108 5.347108 2580 +cacm 1 5 5.347108 5.347108 2388 +shortest 1 5 5.347108 5.347108 2424 +australia 1 5 5.347108 5.347108 2478 +singapor 1 5 5.347108 5.347108 2487 +mcgraw 1 5 5.347108 5.347108 2262 +bind 1 5 5.347108 5.347108 2250 +orlean 1 5 5.347108 5.347108 2550 +interfer 1 5 5.347108 5.347108 2494 +forprogram 1 5 5.347108 5.347108 2361 +salt 1 5 5.347108 5.347108 2413 +patent 1 5 5.347108 5.347108 2574 +chapel 1 5 5.347108 5.347108 2457 +rep 77 4 5.568345 428.762565 3087 +slice 30 4 5.568345 167.050350 2622 +interprocedur 17 4 5.568345 94.661865 2771 +popl 17 4 5.568345 94.661865 3068 +sigsoft 13 4 5.568345 72.388485 3036 +dagstuhl 12 4 5.568345 66.820140 2871 +compcon 3 4 5.568345 16.705035 2958 +ofprogram 3 4 5.568345 16.705035 2624 +ics 2 4 5.568345 11.136690 2779 +petersburg 2 4 5.568345 11.136690 2989 +jolla 2 4 5.568345 11.136690 2988 +bricker 2 4 5.568345 11.136690 3050 +usa 1 4 5.568345 5.568345 3080 +exhaust 1 4 5.568345 5.568345 2825 +melbourn 1 4 5.568345 5.568345 3035 +turnidg 1 4 5.568345 5.568345 2829 +imper 1 4 5.568345 5.568345 3067 +scotland 1 4 5.568345 5.568345 3049 +topla 11 3 5.857933 64.437263 3563 +reachabl 6 3 5.857933 35.147598 4001 +alamito 4 3 5.857933 23.431732 3558 +categor 3 3 5.857933 17.573799 3765 +schloss 3 3 5.857933 17.573799 3727 +denmark 3 3 5.857933 17.573799 3676 +amast 2 3 5.857933 11.715866 3955 +spaa 2 3 5.857933 11.715866 3906 +propag 2 3 5.857933 11.715866 3997 +accommod 2 3 5.857933 11.715866 3337 +fifteenth 2 3 5.857933 11.715866 3868 +principlesof 2 3 5.857933 11.715866 3145 +twentieth 2 3 5.857933 11.715866 3760 +thedevelop 1 3 5.857933 5.857933 3903 +meaning 1 3 5.857933 5.857933 3458 +nearbi 1 3 5.857933 5.857933 3291 +retarget 1 3 5.857933 5.857933 3994 +fourteenth 1 3 5.857933 5.857933 3615 +domin 1 3 5.857933 5.857933 3995 +preserv 1 3 5.857933 5.857933 3628 +jone 1 3 5.857933 5.857933 3703 +atlanta 1 3 5.857933 5.857933 3778 +onprincipl 1 3 5.857933 5.857933 3701 +ninth 1 3 5.857933 5.857933 3616 +sagiv 11 2 6.263398 68.897378 6176 +acta 7 2 6.263398 43.843786 5124 +differenc 5 2 6.263398 31.316990 6177 +chop 3 2 6.263398 18.790194 6160 +informatica 3 2 6.263398 18.790194 5125 +destruct 3 2 6.263398 18.790194 6232 +copenhagen 3 2 6.263398 18.790194 5145 +alia 2 2 6.263398 12.526796 5383 +charleston 2 2 6.263398 12.526796 6181 +thevari 1 2 6.263398 6.263398 6130 +contigu 1 2 6.263398 6.263398 6001 +worker 1 2 6.263398 6.263398 4841 +andbuild 1 2 6.263398 6.263398 6028 +clickherefor 1 2 6.263398 6.263398 5344 +interproceduraldataflow 1 2 6.263398 6.263398 6178 +unrestrict 1 2 6.263398 6.263398 4879 +arnold 1 2 6.263398 6.263398 4705 +wasserman 1 2 6.263398 6.263398 5331 +aarhu 1 2 6.263398 6.263398 6180 +moss 1 2 6.263398 6.263398 5820 +fritzson 1 2 6.263398 6.263398 4546 +andarchitectur 1 2 6.263398 6.263398 5755 +languagedesign 1 2 6.263398 6.263398 6182 +spain 1 2 6.263398 6.263398 5522 +adequaci 1 2 6.263398 6.263398 6229 +thirteenth 1 2 6.263398 6.263398 5733 +eleventh 1 2 6.263398 6.263398 5031 +eighth 1 2 6.263398 6.263398 5750 +leeuwen 1 2 6.263398 6.263398 5543 +doc 1 2 6.263398 6.263398 5022 +mooli 1 2 6.263398 6.263398 6179 +tung 1 2 6.263398 6.263398 4709 +binklei 12 1 6.957497 83.489964 19728 +ramalingam 10 1 6.957497 69.574970 19729 +prin 6 1 6.957497 41.744982 19730 +idfa 4 1 6.957497 27.829988 19731 +interf 4 1 6.957497 27.829988 19732 +wilhelm 4 1 6.957497 27.829988 19733 +tosem 3 1 6.957497 20.872491 19734 +pfeiffer 3 1 6.957497 20.872491 19735 +demer 3 1 6.957497 20.872491 19736 +fromacm 3 1 6.957497 20.872491 19737 +berzin 3 1 6.957497 20.872491 19738 +sigsoftsymposium 3 1 6.957497 20.872491 19739 +wadern 3 1 6.957497 20.872491 19740 +rosai 3 1 6.957497 20.872491 19741 +fseb 2 1 6.957497 13.914994 19742 +thesiswuu 2 1 6.957497 13.914994 19743 +esop 2 1 6.957497 13.914994 19744 +poplb 2 1 6.957497 13.914994 19745 +pepma 2 1 6.957497 13.914994 19746 +fsea 2 1 6.957497 13.914994 19747 +diku 2 1 6.957497 13.914994 19748 +fase 2 1 6.957497 13.914994 19749 +pepmb 2 1 6.957497 13.914994 19750 +lape 2 1 6.957497 13.914994 19751 +psde 2 1 6.957497 13.914994 19752 +toconst 2 1 6.957497 13.914994 19753 +paradigmsfor 2 1 6.957497 13.914994 19754 +brighton 2 1 6.957497 13.914994 19755 +abramski 2 1 6.957497 13.914994 19756 +maibaum 2 1 6.957497 13.914994 19757 +wherefor 2 1 6.957497 13.914994 19758 +sigoa 2 1 6.957497 13.914994 19759 +pepm 2 1 6.957497 13.914994 19760 +onparti 2 1 6.957497 13.914994 19761 +ibfi 2 1 6.957497 13.914994 19762 +repsprofessorcomput 1 1 6.957497 6.957497 19763 +thehom 1 1 6.957497 6.957497 19764 +createtool 1 1 6.957497 6.957497 19765 +manipulationoper 1 1 6.957497 6.957497 19766 +slicingcan 1 1 6.957497 6.957497 19767 +elementss 1 1 6.957497 6.957497 19768 +thatmight 1 1 6.957497 6.957497 19769 +findsemant 1 1 6.957497 6.957497 19770 +thedecomposit 1 1 6.957497 6.957497 19771 +solvingmani 1 1 6.957497 6.957497 19772 +applicationsin 1 1 6.957497 6.957497 19773 +atimprov 1 1 6.957497 6.957497 19774 +relatedoper 1 1 6.957497 6.957497 19775 +slicer 1 1 6.957497 6.957497 19776 +unexpect 1 1 6.957497 6.957497 19777 +betweeninterprocedur 1 1 6.957497 6.957497 19778 +oninterprocedur 1 1 6.957497 6.957497 19779 +transformingthem 1 1 6.957497 6.957497 19780 +timebi 1 1 6.957497 6.957497 19781 +probleminst 1 1 6.957497 6.957497 19782 +publicationsprogram 1 1 6.957497 6.957497 19783 +slicing_pat 1 1 6.957497 6.957497 19784 +thesismerg 1 1 6.957497 6.957497 19785 +iwscm 1 1 6.957497 6.957497 19786 +popla 1 1 6.957497 6.957497 19787 +iwsvcc 1 1 6.957497 6.957497 19788 +ccpsd 1 1 6.957497 6.957497 19789 +npfo_submiss 1 1 6.957497 6.957497 19790 +ccipl 1 1 6.957497 6.957497 19791 +prog_integration_system 1 1 6.957497 6.957497 19792 +prog_integration_manu 1 1 6.957497 6.957497 19793 +subsetof 1 1 6.957497 6.957497 19794 +clickingher 1 1 6.957497 6.957497 19795 +andexpect 1 1 6.957497 6.957497 19796 +anddifferenc 1 1 6.957497 6.957497 19797 +thesesdavid 1 1 6.957497 6.957497 19798 +thesisphil 1 1 6.957497 6.957497 19799 +thesisinterprocedur 1 1 6.957497 6.957497 19800 +analysisdemand 1 1 6.957497 6.957497 19801 +tcs_ide_pap 1 1 6.957497 6.957497 19802 +ptime 1 1 6.957497 6.957497 19803 +acta_pap 1 1 6.957497 6.957497 19804 +pfeiffer_thesi 1 1 6.957497 6.957497 19805 +jalg_pap 1 1 6.957497 6.957497 19806 +popl_not 1 1 6.957497 6.957497 19807 +publicationsbooksrep 1 1 6.957497 6.957497 19808 +constructinglanguag 1 1 6.957497 6.957497 19809 +publicationssagiv 1 1 6.957497 6.957497 19810 +j_alg 1 1 6.957497 6.957497 19811 +preservingtransform 1 1 6.957497 6.957497 19812 +grammarswith 1 1 6.957497 6.957497 19813 +movement 1 1 6.957497 6.957497 19814 +sublinear 1 1 6.957497 6.957497 19815 +papershorwitz 1 1 6.957497 6.957497 19816 +ganzing 1 1 6.957497 6.957497 19817 +chaptersrep 1 1 6.957497 6.957497 19818 +bohner 1 1 6.957497 6.957497 19819 +fromproceed 1 1 6.957497 6.957497 19820 +ichikawa 1 1 6.957497 6.957497 19821 +tsubotani 1 1 6.957497 6.957497 19822 +barstow 1 1 6.957497 6.957497 19823 +sandewal 1 1 6.957497 6.957497 19824 +shrobe 1 1 6.957497 6.957497 19825 +publicationssiff 1 1 6.957497 6.957497 19826 +danvi 1 1 6.957497 6.957497 19827 +glueck 1 1 6.957497 6.957497 19828 +thiemann 1 1 6.957497 6.957497 19829 +hentenryck 1 1 6.957497 6.957497 19830 +formalapproach 1 1 6.957497 6.957497 19831 +nielsen 1 1 6.957497 6.957497 19832 +schwartzbach 1 1 6.957497 6.957497 19833 +tapsoft 1 1 6.957497 6.957497 19834 +compilerconstruct 1 1 6.957497 6.957497 19835 +edinburgh 1 1 6.957497 6.957497 19836 +reducibleflowgraph 1 1 6.957497 6.957497 19837 +velen 1 1 6.957497 6.957497 19838 +onalgebra 1 1 6.957497 6.957497 19839 +softwareconfigur 1 1 6.957497 6.957497 19840 +issuesin 1 1 6.957497 6.957497 19841 +barcelona 1 1 6.957497 6.957497 19842 +diaz 1 1 6.957497 6.957497 19843 +oreja 1 1 6.957497 6.957497 19844 +versionand 1 1 6.957497 6.957497 19845 +grassau 1 1 6.957497 6.957497 19846 +bericht 1 1 6.957497 6.957497 19847 +winkler 1 1 6.957497 6.957497 19848 +teubner 1 1 6.957497 6.957497 19849 +stuttgart 1 1 6.957497 6.957497 19850 +marceau 1 1 6.957497 6.957497 19851 +engineeringsymposium 1 1 6.957497 6.957497 19852 +alpern 1 1 6.957497 6.957497 19853 +albuquerqu 1 1 6.957497 6.957497 19854 +tosyntax 1 1 6.957497 6.957497 19855 +williamsburg 1 1 6.957497 6.957497 19856 +softwarerep 1 1 6.957497 6.957497 19857 +patentsrep 1 1 6.957497 6.957497 19858 +pend 1 1 6.957497 6.957497 19859 +submissionsrep 1 1 6.957497 6.957497 19860 +reportsrep 1 1 6.957497 6.957497 19861 +mehlhorn 1 1 6.957497 6.957497 19862 +datalogisk 1 1 6.957497 6.957497 19863 +psramalingam 1 1 6.957497 6.957497 19864 +klint 1 1 6.957497 6.957497 19865 +snelt 1 1 6.957497 6.957497 19866 +extendedabstract 1 1 6.957497 6.957497 19867 +reconstitut 1 1 6.957497 6.957497 19868 +studentsvisitor 1 1 6.957497 6.957497 19869 +jiazhen 1 1 6.957497 6.957497 19870 +paig 1 1 6.957497 6.957497 19871 +chiao 1 1 6.957497 6.957497 19872 +studentsramalingam 1 1 6.957497 6.957497 19873 +programintegr 1 1 6.957497 6.957497 19874 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html new file mode 100644 index 00000000..22629566 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~saeed^saeed.html @@ -0,0 +1,70 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +time 2 293 1.098612 2.197224 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +madison 4 165 1.791759 7.167036 55 +contact 3 153 1.791759 5.375277 59 +wisconsin 2 169 1.791759 3.583518 54 +read 1 154 1.791759 1.791759 47 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +dayton 1 119 2.079442 2.079442 104 +send 1 114 2.197225 2.197225 109 +check 1 115 2.197225 2.197225 118 +peopl 1 96 2.302585 2.302585 132 +access 1 102 2.302585 2.302585 136 +pictur 1 89 2.397895 2.397895 160 +sinc 1 90 2.397895 2.397895 159 +institut 1 84 2.484907 2.484907 187 +window 3 68 2.708050 8.124150 242 +function 2 62 2.772589 5.545178 275 +street 1 63 2.772589 2.772589 293 +copi 1 63 2.772589 2.772589 284 +best 1 59 2.833213 2.833213 299 +suggest 1 53 2.944439 2.944439 331 +date 1 51 2.995732 2.995732 344 +right 2 48 3.044522 6.089044 363 +friend 1 48 3.044522 3.044522 376 +tech 1 35 3.401197 3.401197 515 +india 1 32 3.465736 3.465736 550 +univ 2 28 3.610918 7.221836 617 +comp 1 26 3.688879 3.688879 650 +indian 2 22 3.850148 7.700296 769 +love 2 21 3.912023 7.824046 804 +reserv 1 20 3.951244 3.951244 808 +spend 1 19 4.007333 4.007333 850 +beauti 2 18 4.060443 8.120886 912 +statu 1 18 4.060443 4.060443 885 +speed 1 18 4.060443 4.060443 911 +seem 1 18 4.060443 4.060443 899 +comic 1 14 4.317488 4.317488 1103 +song 1 11 4.553877 4.553877 1380 +calvin 1 9 4.753590 4.753590 1518 +kanpur 1 8 4.875197 4.875197 1744 +film 1 8 4.875197 4.875197 1761 +apart 1 7 5.010635 5.010635 1936 +settimeout 1 5 5.347108 5.347108 2536 +guestbook 1 5 5.347108 5.347108 2475 +randal 1 4 5.568345 5.568345 2776 +mirza 1 3 5.857933 5.857933 3989 +hero 1 3 5.857933 5.857933 3711 +saeed 4 2 6.263398 25.053592 6172 +statusclock 2 1 6.957497 13.914994 19875 +pagespe 1 1 6.957497 6.957497 19876 +clearid 1 1 6.957497 6.957497 19877 +cleartimeout 1 1 6.957497 6.957497 19878 +lucknow 1 1 6.957497 6.957497 19879 +listn 1 1 6.957497 6.957497 19880 +netsurf 1 1 6.957497 6.957497 19881 +wismad 1 1 6.957497 6.957497 19882 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html new file mode 100644 index 00000000..c8bea350 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sally^sally.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +languag 1 227 1.386294 1.386294 26 +oper 1 180 1.609438 1.609438 34 +fall 1 181 1.609438 1.609438 40 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +lectur 1 135 1.945910 1.945910 73 +dayton 1 119 2.079442 2.079442 104 +real 1 93 2.397895 2.397895 144 +chang 1 82 2.484907 2.484907 163 +septemb 1 65 2.772589 2.772589 274 +taught 1 33 3.433987 3.433987 526 +comp 1 26 3.688879 3.688879 650 +edutelephon 1 10 4.653960 4.653960 1473 +desktop 1 10 4.653960 4.653960 1445 +peterson 2 7 5.010635 10.021270 1850 +salli 4 3 5.857933 23.431732 3432 +goodwin 1 1 6.957497 6.957497 19883 +lecturercomput 1 1 6.957497 6.957497 19884 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html new file mode 100644 index 00000000..825835b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~samit^samit.html @@ -0,0 +1,130 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 4 640 0.000000 0.000000 4 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +comput 3 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +interest 3 384 0.693147 2.079441 11 +research 2 431 0.693147 1.386294 10 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +system 1 443 0.693147 0.693147 6 +offic 1 299 1.098612 1.098612 13 +engin 1 297 1.098612 1.098612 20 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +softwar 1 220 1.386294 1.386294 30 +also 1 259 1.386294 1.386294 28 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +madison 5 165 1.791759 8.958795 55 +wisconsin 2 169 1.791759 3.583518 54 +data 2 170 1.791759 3.583518 49 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +base 1 165 1.791759 1.791759 50 +applic 1 170 1.791759 1.791759 56 +process 3 142 1.945910 5.837730 72 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +support 1 132 1.945910 1.945910 83 +databas 7 122 2.079442 14.556094 86 +confer 3 126 2.079442 6.238326 100 +technolog 2 131 2.079442 4.158884 102 +postscript 1 131 2.079442 2.079442 90 +seattl 1 120 2.079442 2.079442 103 +introduct 1 126 2.079442 2.079442 87 +intern 2 108 2.197225 4.394450 128 +manag 1 114 2.197225 2.197225 125 +look 1 107 2.197225 2.197225 115 +person 1 111 2.197225 2.197225 117 +present 1 91 2.397895 2.397895 145 +educ 2 86 2.484907 4.969814 191 +larg 2 82 2.484907 4.969814 168 +institut 1 84 2.484907 2.484907 187 +activ 1 84 2.484907 2.484907 182 +help 1 83 2.484907 2.484907 175 +server 2 76 2.564949 5.129898 204 +master 1 76 2.564949 2.564949 216 +sourc 1 77 2.564949 2.564949 201 +onlin 2 75 2.639057 5.278114 223 +logic 1 71 2.639057 2.639057 230 +name 1 72 2.639057 2.639057 220 +nation 1 74 2.639057 2.639057 240 +multimedia 1 68 2.708050 2.708050 258 +dept 1 64 2.772589 2.772589 291 +evalu 1 64 2.772589 2.772589 266 +run 1 51 2.995732 2.995732 347 +date 1 51 2.995732 2.995732 344 +digit 1 52 2.995732 2.995732 348 +pointer 1 48 3.044522 3.044522 368 +archiv 1 49 3.044522 3.044522 364 +protocol 2 45 3.135494 6.270988 407 +york 1 41 3.218876 3.218876 435 +transact 2 39 3.258097 6.516194 438 +slide 1 38 3.295837 3.295837 467 +industri 1 38 3.295837 3.295837 464 +bibliographi 1 34 3.401197 3.401197 518 +queri 1 33 3.433987 3.433987 524 +articl 1 33 3.433987 3.433987 530 +idea 1 32 3.465736 3.465736 545 +storag 1 31 3.496508 3.496508 553 +profil 1 30 3.555348 3.555348 581 +toward 1 25 3.737670 3.737670 668 +jeff 1 25 3.737670 3.737670 673 +reach 1 24 3.761200 3.761200 688 +initi 1 23 3.806662 3.806662 717 +sort 2 22 3.850148 7.700296 738 +indian 1 22 3.850148 3.850148 769 +hierarchi 1 22 3.850148 3.850148 744 +cooper 1 22 3.850148 3.850148 757 +boston 1 19 4.007333 4.007333 862 +sigmod 1 19 4.007333 4.007333 877 +bachelor 1 17 4.110874 4.110874 957 +estim 1 17 4.110874 4.110874 930 +georg 1 16 4.174387 4.174387 994 +princeton 1 15 4.248495 4.248495 1042 +massiv 1 15 4.248495 4.248495 1026 +warn 1 14 4.317488 4.317488 1068 +infrastructur 1 12 4.465908 4.465908 1234 +council 1 11 4.553877 4.553877 1364 +naughton 2 10 4.653960 9.307920 1450 +vldb 2 10 4.653960 9.307920 1470 +consortium 1 10 4.653960 4.653960 1467 +jeffrei 1 9 4.753590 4.753590 1612 +utah 1 9 4.753590 4.753590 1585 +madra 1 8 4.875197 4.875197 1770 +presenc 1 8 4.875197 4.875197 1671 +competit 1 8 4.875197 4.875197 1635 +spec 1 8 4.875197 4.875197 1640 +analyt 1 7 5.010635 5.010635 1913 +bombai 1 7 5.010635 5.010635 1972 +aggreg 1 6 5.164786 5.164786 2219 +prasad 1 6 5.164786 5.164786 2126 +chicago 1 6 5.164786 5.164786 2149 +deshpand 1 5 5.347108 5.347108 2431 +amit 3 4 5.568345 16.705035 2972 +snail 1 4 5.568345 5.568345 2916 +multidimension 1 4 5.568345 5.568345 3091 +ramasami 1 4 5.568345 5.568345 3088 +shukla 1 3 5.857933 5.857933 4030 +karthikeyan 1 3 5.857933 5.857933 4031 +mumbai 1 3 5.857933 5.857933 4029 +pilot 1 3 5.857933 5.857933 4008 +children 1 3 5.857933 5.857933 3767 +asha 1 3 5.857933 5.857933 4037 +marathon 6 2 6.263398 37.580388 5592 +olap 2 2 6.263398 12.526796 6233 +endow 1 2 6.263398 6.263398 6234 +guidanc 1 1 6.957497 6.957497 19885 +trier 1 1 6.957497 6.957497 19886 +mdd 1 1 6.957497 6.957497 19887 +niiip 1 1 6.957497 6.957497 19888 +transcoop 1 1 6.957497 6.957497 19889 +needi 1 1 6.957497 6.957497 19890 +pageand 1 1 6.957497 6.957497 19891 +bookmarksar 1 1 6.957497 6.957497 19892 +garfield 1 1 6.957497 6.957497 19893 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html new file mode 100644 index 00000000..69eeea44 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sashwin^sashwin.html @@ -0,0 +1,39 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +student 1 343 1.098612 1.098612 19 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +contact 1 153 1.791759 1.791759 59 +construct 1 139 1.945910 1.945910 82 +like 1 132 1.945910 1.945910 81 +technolog 1 131 2.079442 2.079442 102 +place 1 106 2.197225 2.197225 124 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +educ 1 86 2.484907 2.484907 191 +come 1 78 2.564949 2.564949 202 +name 1 72 2.639057 2.639057 220 +visit 1 63 2.772589 2.772589 288 +undergradu 1 54 2.944439 2.944439 338 +india 1 32 3.465736 3.465736 550 +altern 1 26 3.688879 3.688879 641 +bookmark 1 26 3.688879 3.688879 639 +worth 1 11 4.553877 4.553877 1294 +bombai 1 7 5.010635 5.010635 1972 +whereabout 1 4 5.568345 5.568345 3078 +indianinstitut 1 3 5.857933 5.857933 4003 +fantast 1 3 5.857933 5.857933 3966 +hadmi 1 2 6.263398 6.263398 6097 +canfing 1 2 6.263398 6.263398 6098 +ashwin 2 1 6.957497 13.914994 19894 +iitb 1 1 6.957497 6.957497 19895 +meto 1 1 6.957497 6.957497 19896 +sashwin 1 1 6.957497 6.957497 19897 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html new file mode 100644 index 00000000..9f4d0093 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sastry^sastry.html @@ -0,0 +1,167 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +time 3 293 1.098612 3.295836 17 +engin 1 297 1.098612 1.098612 20 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +also 5 259 1.386294 6.931470 28 +graduat 2 215 1.386294 2.772588 31 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +updat 1 191 1.609438 1.609438 41 +read 3 154 1.791759 5.375277 47 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +like 9 132 1.945910 17.513190 81 +year 3 148 1.945910 5.837730 84 +architectur 1 139 1.945910 1.945910 77 +technolog 1 131 2.079442 2.079442 102 +spring 1 131 2.079442 2.079442 88 +compil 1 122 2.079442 2.079442 96 +provid 1 121 2.079442 2.079442 94 +place 2 106 2.197225 4.394450 124 +version 1 113 2.197225 2.197225 122 +site 1 106 2.197225 2.197225 119 +make 1 111 2.197225 2.197225 120 +send 1 114 2.197225 2.197225 109 +mani 1 92 2.397895 2.397895 150 +homepag 1 93 2.397895 2.397895 148 +comment 1 93 2.397895 2.397895 146 +school 1 84 2.484907 2.484907 188 +institut 1 84 2.484907 2.484907 187 +contain 1 81 2.484907 2.484907 174 +academ 1 82 2.484907 2.484907 178 +come 2 78 2.564949 5.129898 202 +know 1 80 2.564949 2.564949 198 +solv 1 73 2.639057 2.639057 234 +would 1 67 2.708050 2.708050 251 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +back 2 60 2.833213 5.666426 297 +plai 1 60 2.833213 2.833213 307 +undergradu 1 54 2.944439 2.944439 338 +talk 1 53 2.944439 2.944439 336 +suggest 1 53 2.944439 2.944439 331 +much 4 52 2.995732 11.982928 349 +maintain 1 51 2.995732 2.995732 342 +date 1 51 2.995732 2.995732 344 +friend 2 48 3.044522 6.089044 376 +better 1 45 3.135494 3.135494 401 +long 2 43 3.178054 6.356108 413 +compani 2 41 3.218876 6.437752 423 +music 1 42 3.218876 3.218876 436 +past 1 42 3.218876 3.218876 428 +must 1 40 3.258097 3.258097 442 +realli 1 40 3.258097 3.258097 444 +author 1 39 3.258097 3.258097 450 +field 2 37 3.332205 6.664410 482 +mean 1 37 3.332205 3.332205 477 +india 1 32 3.465736 3.465736 550 +kind 1 32 3.465736 3.465736 541 +photo 4 31 3.496508 13.986032 561 +someth 3 31 3.496508 10.489524 554 +hard 1 30 3.555348 3.555348 563 +built 1 29 3.583519 3.583519 592 +consid 1 29 3.583519 3.583519 590 +hope 1 28 3.610918 3.610918 610 +mine 2 26 3.688879 7.377758 654 +bookmark 1 26 3.688879 3.688879 639 +enjoi 1 26 3.688879 3.688879 660 +rather 1 26 3.688879 3.688879 642 +sport 1 25 3.737670 3.737670 683 +indian 1 22 3.850148 3.850148 769 +inth 1 22 3.850148 3.850148 741 +watch 2 21 3.912023 7.824046 789 +wonder 1 20 3.951244 3.951244 815 +tenni 1 20 3.951244 3.951244 838 +beauti 1 18 4.060443 4.060443 912 +listen 1 18 4.060443 4.060443 907 +anyth 2 16 4.174387 8.348774 998 +across 1 16 4.174387 4.174387 974 +hobbi 1 16 4.174387 4.174387 1009 +photograph 1 15 4.248495 4.248495 1056 +goe 1 15 4.248495 4.248495 1044 +near 1 14 4.317488 4.317488 1091 +unfortun 1 13 4.382027 4.382027 1170 +scan 1 12 4.465908 4.465908 1243 +reader 1 12 4.465908 4.465908 1246 +awai 2 10 4.653960 9.307920 1447 +town 1 10 4.653960 4.653960 1458 +interestsmi 1 10 4.653960 4.653960 1462 +earth 1 10 4.653960 4.653960 1463 +hint 1 10 4.653960 4.653960 1419 +ball 1 9 4.753590 4.753590 1608 +jeffrei 1 9 4.753590 4.753590 1612 +pick 1 9 4.753590 4.753590 1498 +kanpur 2 8 4.875197 9.750394 1744 +pagei 1 8 4.875197 4.875197 1683 +empir 1 8 4.875197 4.875197 1722 +bridg 1 8 4.875197 4.875197 1764 +cricket 3 7 5.010635 15.031905 1945 +river 1 6 5.164786 5.164786 2220 +rock 1 6 5.164786 5.164786 2164 +whatev 1 6 5.164786 5.164786 2097 +neither 1 6 5.164786 5.164786 1990 +fiction 1 6 5.164786 5.164786 2217 +tri 1 6 5.164786 5.164786 2166 +album 3 4 5.568345 16.705035 2888 +gokul 1 4 5.568345 5.568345 2668 +thati 1 4 5.568345 5.568345 2616 +metal 1 4 5.568345 5.568345 3079 +fantasi 1 4 5.568345 5.568345 3055 +devot 1 4 5.568345 5.568345 2711 +dont 2 3 5.857933 11.715866 3473 +pleasant 2 3 5.857933 11.715866 3825 +seinfeld 1 3 5.857933 5.857933 3958 +romanc 1 3 5.857933 5.857933 3632 +iitk 4 2 6.263398 25.053592 6227 +sastri 3 2 6.263398 18.790194 6171 +mugshot 1 2 6.263398 6.263398 4984 +karnataka 1 2 6.263398 6.263398 5106 +whati 1 2 6.263398 6.263398 6027 +horror 1 2 6.263398 6.263398 5075 +eduunivers 1 2 6.263398 6.263398 6216 +subramanya 2 1 6.957497 13.914994 19898 +hospet 2 1 6.957497 13.914994 19899 +tungabhadra 2 1 6.957497 13.914994 19900 +favourit 2 1 6.957497 13.914994 19901 +hampi 1 1 6.957497 6.957497 19902 +ruin 1 1 6.957497 6.957497 19903 +vijayanagara 1 1 6.957497 6.957497 19904 +fewphotograph 1 1 6.957497 6.957497 19905 +classmatesat 1 1 6.957497 6.957497 19906 +presentcurr 1 1 6.957497 6.957497 19907 +registeredfor 1 1 6.957497 6.957497 19908 +playphatta 1 1 6.957497 6.957497 19909 +champ 1 1 6.957497 6.957497 19910 +entertainmentin 1 1 6.957497 6.957497 19911 +donot 1 1 6.957497 6.957497 19912 +sshow 1 1 6.957497 6.957497 19913 +voraci 1 1 6.957497 6.957497 19914 +unsuccesfulli 1 1 6.957497 6.957497 19915 +grip 1 1 6.957497 6.957497 19916 +ifposs 1 1 6.957497 6.957497 19917 +archer 1 1 6.957497 6.957497 19918 +jane 1 1 6.957497 6.957497 19919 +austen 1 1 6.957497 6.957497 19920 +pride 1 1 6.957497 6.957497 19921 +prejudic 1 1 6.957497 6.957497 19922 +ramesh 1 1 6.957497 6.957497 19923 +mahadeven 1 1 6.957497 6.957497 19924 +sarticl 1 1 6.957497 6.957497 19925 +wonderfulgam 1 1 6.957497 6.957497 19926 +itagain 1 1 6.957497 6.957497 19927 +crossword 1 1 6.957497 6.957497 19928 +cryptic 1 1 6.957497 6.957497 19929 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html new file mode 100644 index 00000000..c15f168f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~scal^scal.html @@ -0,0 +1,220 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 5 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +comput 2 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +inform 6 412 0.693147 4.158882 8 +research 5 431 0.693147 3.465735 10 +depart 2 457 0.693147 1.386294 12 +work 2 380 0.693147 1.386294 9 +interest 1 384 0.693147 0.693147 11 +project 6 340 1.098612 6.591672 18 +time 3 293 1.098612 3.295836 17 +wisc 2 242 1.386294 2.772588 33 +email 2 220 1.386294 2.772588 29 +also 2 259 1.386294 2.772588 28 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +includ 4 208 1.609438 6.437752 42 +public 2 202 1.609438 3.218876 43 +group 1 183 1.609438 1.609438 36 +list 1 201 1.609438 1.609438 39 +madison 5 165 1.791759 8.958795 55 +wisconsin 4 169 1.791759 7.167036 54 +read 2 154 1.791759 3.583518 47 +network 2 168 1.791759 3.583518 61 +base 2 165 1.791759 3.583518 50 +develop 1 174 1.791759 1.791759 53 +address 1 170 1.791759 1.791759 62 +support 3 132 1.945910 5.837730 83 +year 3 148 1.945910 5.837730 84 +first 1 140 1.945910 1.945910 71 +area 1 144 1.945910 1.945910 80 +report 6 131 2.079442 12.476652 92 +provid 2 121 2.079442 4.158884 94 +tool 2 117 2.079442 4.158884 93 +studi 1 120 2.079442 2.079442 91 +dayton 1 119 2.079442 2.079442 104 +intern 3 108 2.197225 6.591675 128 +manag 2 114 2.197225 4.394450 125 +specif 2 106 2.197225 4.394450 106 +assist 1 112 2.197225 2.197225 113 +topic 1 114 2.197225 2.197225 110 +site 1 106 2.197225 2.197225 119 +find 1 111 2.197225 2.197225 111 +send 1 114 2.197225 2.197225 109 +user 2 104 2.302585 4.605170 137 +peopl 2 96 2.302585 4.605170 132 +need 1 98 2.302585 2.302585 135 +part 1 98 2.302585 2.302585 129 +commun 2 95 2.397895 4.795790 157 +present 1 91 2.397895 2.397895 145 +select 1 91 2.397895 2.397895 154 +sinc 1 90 2.397895 2.397895 159 +internet 9 83 2.484907 22.364163 186 +educ 2 86 2.484907 4.969814 191 +resourc 2 81 2.484907 4.969814 172 +help 1 83 2.484907 2.484907 175 +wide 1 84 2.484907 2.484907 185 +level 1 87 2.484907 2.484907 180 +come 1 78 2.564949 2.564949 202 +issu 1 78 2.564949 2.564949 211 +june 1 79 2.564949 2.564949 214 +orient 1 80 2.564949 2.564949 205 +servic 11 72 2.639057 29.029627 236 +nation 4 74 2.639057 10.556228 240 +addit 2 74 2.639057 5.278114 228 +involv 1 71 2.639057 2.639057 227 +appli 1 71 2.639057 2.639057 226 +write 1 72 2.639057 2.639057 222 +free 1 73 2.639057 2.639057 224 +goal 2 66 2.708050 5.416100 250 +receiv 2 66 2.708050 5.416100 244 +organ 2 65 2.772589 5.545178 265 +import 1 65 2.772589 2.772589 282 +written 1 63 2.772589 2.772589 278 +result 1 65 2.772589 2.772589 281 +plan 1 65 2.772589 2.772589 272 +visit 1 63 2.772589 2.772589 288 +descript 1 64 2.772589 2.772589 271 +street 1 63 2.772589 2.772589 293 +best 1 59 2.833213 2.833213 299 +special 1 56 2.890372 2.890372 320 +three 1 54 2.944439 2.944439 330 +undergradu 1 54 2.944439 2.944439 338 +sampl 1 53 2.944439 2.944439 339 +week 2 52 2.995732 5.991464 343 +profession 1 51 2.995732 2.995732 345 +format 1 48 3.044522 3.044522 356 +telephon 1 50 3.044522 3.044522 373 +effect 1 46 3.091042 3.091042 385 +natur 1 44 3.135494 3.135494 406 +third 1 43 3.178054 3.178054 412 +edit 1 42 3.218876 3.218876 418 +futur 1 41 3.218876 3.218876 427 +join 3 39 3.258097 9.774291 457 +multipl 1 39 3.258097 3.258097 453 +continu 1 39 3.258097 3.258097 448 +open 3 38 3.295837 9.887511 469 +seminar 1 38 3.295837 3.295837 470 +feel 1 37 3.332205 3.332205 483 +staff 4 36 3.367296 13.469184 490 +everi 1 34 3.401197 3.401197 519 +award 1 34 3.401197 3.401197 523 +kind 1 32 3.465736 3.465736 541 +collabor 1 32 3.465736 3.465736 543 +posit 3 31 3.496508 10.489524 552 +titl 1 31 3.496508 3.496508 556 +produc 1 30 3.555348 3.555348 572 +focus 1 29 3.583519 3.583519 584 +depend 1 29 3.583519 3.583519 583 +propos 1 28 3.610918 3.610918 602 +retriev 1 27 3.637586 3.637586 621 +background 1 25 3.737670 3.737670 664 +spent 1 25 3.737670 3.737670 676 +departmentunivers 2 24 3.761200 7.522400 711 +higher 1 24 3.761200 3.761200 690 +seri 1 24 3.761200 3.761200 708 +director 1 22 3.850148 3.850148 767 +cooper 1 22 3.850148 3.850148 757 +newsgroup 1 21 3.912023 3.912023 783 +divis 1 21 3.912023 3.912023 803 +toolkit 2 20 3.951244 7.902488 835 +wrote 1 20 3.951244 3.951244 830 +item 1 19 4.007333 4.007333 856 +expand 1 17 4.110874 4.110874 928 +diego 2 16 4.174387 8.348774 992 +susan 3 15 4.248495 12.745485 1050 +later 1 15 4.248495 4.248495 1043 +becam 1 14 4.317488 4.317488 1117 +speak 1 12 4.465908 4.465908 1283 +onth 1 12 4.465908 4.465908 1218 +branch 1 11 4.553877 4.553877 1318 +thecomput 1 10 4.653960 4.653960 1408 +hundr 1 9 4.753590 4.753590 1528 +discov 1 9 4.753590 4.753590 1562 +respect 1 9 4.753590 4.753590 1545 +filter 1 8 4.875197 4.875197 1641 +elect 1 8 4.875197 4.875197 1771 +jack 1 8 4.875197 4.875197 1780 +potenti 1 8 4.875197 4.875197 1690 +gather 1 8 4.875197 4.875197 1719 +scout 18 7 5.010635 90.191430 1903 +usabl 1 7 5.010635 5.010635 1810 +happen 1 7 5.010635 5.010635 1790 +discoveri 1 7 5.010635 5.010635 1915 +edumi 1 6 5.164786 5.164786 2132 +approv 1 6 5.164786 5.164786 2078 +matthew 1 6 5.164786 5.164786 2193 +ifyou 1 6 5.164786 5.164786 1992 +kid 2 5 5.347108 10.694216 2516 +merit 2 5 5.347108 10.694216 2466 +devot 2 4 5.568345 11.136690 2711 +newslett 1 4 5.568345 5.568345 2873 +termin 1 4 5.568345 5.568345 2852 +chose 1 4 5.568345 5.568345 2629 +hire 1 4 5.568345 5.568345 2976 +agreement 2 3 5.857933 11.715866 3207 +newli 1 3 5.857933 5.857933 3786 +orth 1 3 5.857933 5.857933 3685 +moreinform 1 3 5.857933 5.857933 3307 +audienc 1 3 5.857933 5.857933 3180 +aproject 1 3 5.857933 5.857933 3142 +expans 1 3 5.857933 5.857933 3755 +disciplin 1 3 5.857933 5.857933 3392 +sciencefound 2 2 6.263398 12.526796 5150 +calcari 1 2 6.263398 6.263398 6144 +thehigh 1 2 6.263398 6.263398 4095 +thousand 1 2 6.263398 6.263398 5949 +arbor 1 2 6.263398 6.263398 6235 +backbon 1 2 6.263398 6.263398 5623 +thescout 1 2 6.263398 6.263398 6082 +andeduc 2 1 6.957497 13.914994 19930 +reloc 2 1 6.957497 13.914994 19931 +speciallibrarian 2 1 6.957497 13.914994 19932 +systemadministr 2 1 6.957497 13.914994 19933 +calcarimanag 1 1 6.957497 6.957497 19934 +servicescomput 1 1 6.957497 6.957497 19935 +madisonsc 1 1 6.957497 6.957497 19936 +scoutservic 1 1 6.957497 6.957497 19937 +internicand 1 1 6.957497 6.957497 19938 +bestresourc 1 1 6.957497 6.957497 19939 +soonth 1 1 6.957497 6.957497 19940 +sprout 1 1 6.957497 6.957497 19941 +andthousand 1 1 6.957497 6.957497 19942 +annotatedlist 1 1 6.957497 6.957497 19943 +itemsinclud 1 1 6.957497 6.957497 19944 +happeningspost 1 1 6.957497 6.957497 19945 +weekdai 1 1 6.957497 6.957497 19946 +wheni 1 1 6.957497 6.957497 19947 +thensfnet 1 1 6.957497 6.957497 19948 +informationservic 1 1 6.957497 6.957497 19949 +tonat 1 1 6.957497 6.957497 19950 +internetand 1 1 6.957497 6.957497 19951 +seminarseri 1 1 6.957497 6.957497 19952 +internetend 1 1 6.957497 6.957497 19953 +forcerfnet 1 1 6.957497 6.957497 19954 +internicproject 1 1 6.957497 6.957497 19955 +theport 1 1 6.957497 6.957497 19956 +workof 1 1 6.957497 6.957497 19957 +andrequest 1 1 6.957497 6.957497 19958 +heartilyagre 1 1 6.957497 6.957497 19959 +servicesat 1 1 6.957497 6.957497 19960 +solock 1 1 6.957497 6.957497 19961 +theaddit 1 1 6.957497 6.957497 19962 +livesei 1 1 6.957497 6.957497 19963 +asscout 1 1 6.957497 6.957497 19964 +researcharea 1 1 6.957497 6.957497 19965 +campus 1 1 6.957497 6.957497 19966 +includenetwork 1 1 6.957497 6.957497 19967 +nidr 1 1 6.957497 6.957497 19968 +anddisciplin 1 1 6.957497 6.957497 19969 +willincludecomput 1 1 6.957497 6.957497 19970 +ofour 1 1 6.957497 6.957497 19971 +theonlin 1 1 6.957497 6.957497 19972 +librarian 1 1 6.957497 6.957497 19973 +aresum 1 1 6.957497 6.957497 19974 +contactm 1 1 6.957497 6.957497 19975 +calcariinternet 1 1 6.957497 6.957497 19976 +scal 1 1 6.957497 6.957497 19977 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html new file mode 100644 index 00000000..528c09fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schandra^schandra.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +depart 1 457 0.693147 0.693147 12 +student 1 343 1.098612 1.098612 19 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +time 1 293 1.098612 1.098612 17 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +madison 2 165 1.791759 3.583518 55 +welcom 1 122 2.079442 2.079442 99 +high 1 130 2.079442 2.079442 101 +technolog 1 131 2.079442 2.079442 102 +dayton 1 119 2.079442 2.079442 104 +person 1 111 2.197225 2.197225 117 +find 1 111 2.197225 2.197225 111 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +present 1 91 2.397895 2.397895 145 +sinc 1 90 2.397895 2.397895 159 +school 2 84 2.484907 4.969814 188 +educ 2 86 2.484907 4.969814 191 +institut 1 84 2.484907 2.484907 187 +stuff 1 87 2.484907 2.484907 171 +june 1 79 2.564949 2.564949 214 +state 1 76 2.564949 2.564949 207 +dept 1 64 2.772589 2.772589 291 +major 1 56 2.890372 2.890372 315 +undergradu 1 54 2.944439 2.944439 338 +finger 1 52 2.995732 2.995732 354 +past 1 42 3.218876 3.218876 428 +higher 1 24 3.761200 3.761200 690 +born 1 21 3.912023 3.912023 798 +happi 1 14 4.317488 4.317488 1079 +avenu 1 12 4.465908 4.465908 1277 +resid 1 10 4.653960 4.653960 1461 +secondari 1 7 5.010635 5.010635 1884 +southern 1 6 5.164786 5.164786 2191 +whereabout 1 4 5.568345 5.568345 3078 +worri 1 3 5.857933 5.857933 3130 +coimbator 2 2 6.263398 12.526796 5130 +theindian 1 2 6.263398 6.263398 5795 +kharagpur 1 2 6.263398 6.263398 6236 +kendal 1 2 6.263398 6.263398 6085 +chandrasekar 3 1 6.957497 20.872491 19978 +tamilnadu 1 1 6.957497 6.957497 19979 +inindia 1 1 6.957497 6.957497 19980 +officedept 1 1 6.957497 6.957497 19981 +sivasankaran 1 1 6.957497 6.957497 19982 +schandra 1 1 6.957497 6.957497 19983 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html new file mode 100644 index 00000000..8cc23559 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schnarr^schnarr.html @@ -0,0 +1,32 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +wisc 2 242 1.386294 2.772588 33 +languag 1 227 1.386294 1.386294 26 +link 1 247 1.386294 1.386294 24 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +west 1 83 2.484907 2.484907 192 +descript 1 64 2.772589 2.772589 271 +advisor 1 51 2.995732 2.995732 355 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +eric 1 19 4.007333 4.007333 870 +wind 1 18 4.060443 4.060443 908 +club 1 15 4.248495 4.248495 1058 +usaphon 1 9 4.753590 4.753590 1600 +tunnel 1 9 4.753590 4.753590 1615 +assistantdepart 1 8 4.875197 4.875197 1784 +hockei 1 8 4.875197 4.875197 1760 +byte 1 6 5.164786 5.164786 2108 +pageer 1 3 5.857933 5.857933 3776 +schnarr 4 2 6.263398 25.053592 6194 +dragon 1 2 6.263398 6.263398 4176 +larusresearch 1 1 6.957497 6.957497 19984 +languagesfunct 1 1 6.957497 6.957497 19985 +designinterest 1 1 6.957497 6.957497 19986 +sacm 1 1 6.957497 6.957497 19987 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html new file mode 100644 index 00000000..e522480f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~schoinas^schoinas.html @@ -0,0 +1,67 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +system 1 443 0.693147 0.693147 6 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +parallel 1 169 1.791759 1.791759 60 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +architectur 1 139 1.945910 1.945910 77 +support 1 132 1.945910 1.945910 83 +dayton 1 119 2.079442 2.079442 104 +confer 1 126 2.079442 2.079442 100 +intern 1 108 2.197225 2.197225 128 +specif 1 106 2.197225 2.197225 106 +memori 2 101 2.302585 4.605170 139 +access 1 102 2.302585 2.302585 136 +user 1 104 2.302585 2.302585 137 +west 1 83 2.484907 2.484907 192 +control 1 82 2.484907 2.484907 164 +level 1 87 2.484907 2.484907 180 +educ 1 86 2.484907 2.484907 191 +david 2 71 2.639057 5.278114 232 +share 2 59 2.833213 5.666426 304 +juli 1 60 2.833213 2.833213 305 +advisor 1 51 2.995732 2.995732 355 +mark 2 44 3.135494 6.270988 403 +protocol 1 45 3.135494 3.135494 407 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +jame 2 35 3.401197 6.802394 507 +hill 2 25 3.737670 7.475340 670 +supercomput 1 25 3.737670 3.737670 681 +programminglanguag 1 21 3.912023 3.912023 782 +fine 1 20 3.951244 3.951244 822 +steven 2 17 4.110874 8.221748 953 +asplo 1 17 4.110874 4.110874 948 +wood 2 11 4.553877 9.107754 1355 +grain 1 10 4.653960 4.653960 1448 +cook 1 10 4.653960 4.653960 1464 +laru 2 9 4.753590 9.507180 1560 +yanni 2 8 4.875197 9.750394 1713 +assistantdepart 1 8 4.875197 4.875197 1784 +sixth 1 7 5.010635 5.010635 1917 +roger 1 7 5.010635 5.010635 1892 +ann 1 6 5.164786 5.164786 2065 +ioanni 2 5 5.347108 10.694216 2553 +babak 2 5 5.347108 10.694216 2584 +falsafi 2 5 5.347108 10.694216 2585 +lebeck 2 5 5.347108 10.694216 2582 +reinhardt 2 5 5.347108 10.694216 2583 +schoina 5 4 5.568345 27.841725 3085 +alvin 2 4 5.568345 11.136690 3084 +crete 2 3 5.857933 11.715866 3773 +iraklio 2 1 6.957497 13.914994 19988 +systemspubl 1 1 6.957497 6.957497 19989 +cretan 1 1 6.957497 6.957497 19990 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html new file mode 100644 index 00000000..cbc70777 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seavey^seavey.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +interest 1 384 0.693147 0.693147 11 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +includ 1 208 1.609438 1.609438 42 +version 1 113 2.197225 2.197225 122 +degre 1 69 2.708050 2.708050 259 +differ 1 66 2.708050 2.708050 253 +special 1 56 2.890372 2.890372 320 +finger 2 52 2.995732 5.991464 354 +give 1 50 3.044522 3.044522 359 +could 1 46 3.091042 3.091042 383 +india 2 32 3.465736 6.931472 550 +mine 1 26 3.688879 3.688879 654 +wish 1 24 3.761200 3.761200 692 +instead 1 22 3.850148 3.850148 756 +grad 1 20 3.951244 3.951244 837 +account 1 18 4.060443 4.060443 882 +regist 1 17 4.110874 4.110874 938 +biologi 1 15 4.248495 4.248495 1049 +classic 1 14 4.317488 4.317488 1084 +danc 2 12 4.465908 8.931816 1278 +switch 1 8 4.875197 4.875197 1718 +keeper 1 5 5.347108 5.347108 2569 +keyboard 1 4 5.568345 5.568345 2970 +asian 1 3 5.857933 5.857933 3598 +southeast 2 2 6.263398 12.526796 6188 +asia 2 2 6.263398 12.526796 5952 +hairbal 1 2 6.263398 6.263398 6237 +beverli 2 1 6.957497 13.914994 19991 +seavei 2 1 6.957497 13.914994 19992 +ramayana 2 1 6.957497 13.914994 19993 +drama 1 1 6.957497 6.957497 19994 +ramakien 1 1 6.957497 6.957497 19995 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html new file mode 100644 index 00000000..2d41cee3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sec^sec.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 6 672 0.000000 0.000000 1 +page 5 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +mail 1 238 1.386294 1.386294 22 +link 1 247 1.386294 1.386294 24 +also 1 259 1.386294 1.386294 28 +list 1 201 1.609438 1.609438 39 +updat 1 191 1.609438 1.609438 41 +madison 3 165 1.791759 5.375277 55 +address 1 170 1.791759 1.791759 62 +wisconsin 1 169 1.791759 1.791759 54 +welcom 2 122 2.079442 4.158884 99 +dayton 1 119 2.079442 2.079442 104 +databas 1 122 2.079442 2.079442 86 +world 2 115 2.197225 4.394450 126 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +book 1 99 2.302585 2.302585 131 +school 2 84 2.484907 4.969814 188 +educ 1 86 2.484907 2.484907 191 +want 1 79 2.564949 2.564949 199 +addit 1 74 2.639057 2.639057 228 +guid 1 63 2.772589 2.772589 267 +septemb 1 65 2.772589 2.772589 274 +back 1 60 2.833213 2.833213 297 +game 1 36 3.367296 3.367296 498 +eduoffic 1 33 3.433987 3.433987 531 +enjoi 1 26 3.688879 3.688879 660 +fact 1 21 3.912023 3.912023 780 +scott 1 18 4.060443 4.060443 884 +rate 1 15 4.248495 4.248495 1037 +english 1 15 4.248495 4.248495 1033 +hopefulli 1 14 4.317488 4.317488 1071 +franc 1 12 4.465908 4.465908 1276 +ball 1 9 4.753590 4.753590 1608 +drink 1 9 4.753590 4.753590 1607 +lock 1 9 4.753590 4.753590 1551 +poetri 1 9 4.753590 4.753590 1596 +absolut 1 8 4.875197 4.875197 1646 +dictionari 1 8 4.875197 4.875197 1642 +largest 1 7 5.010635 5.010635 1858 +seen 1 6 5.164786 5.164786 2202 +beer 1 6 5.164786 5.164786 2216 +soda 1 6 5.164786 5.164786 2189 +constitut 1 6 5.164786 5.164786 2026 +pagescott 1 4 5.568345 5.568345 2978 +chees 1 4 5.568345 5.568345 3090 +add 1 3 5.857933 5.857933 3131 +uwisc 2 2 6.263398 12.526796 4738 +caffein 1 2 6.263398 6.263398 5936 +thesauru 1 2 6.263398 6.263398 6238 +colvil 2 1 6.957497 13.914994 19996 +pagein 1 1 6.957497 6.957497 19997 +pickingand 1 1 6.957497 6.957497 19998 +artsi 1 1 6.957497 6.957497 19999 +roget 1 1 6.957497 6.957497 20000 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html new file mode 100644 index 00000000..9867541e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^animation.html @@ -0,0 +1,60 @@ +term, tf, in documents count, idf, tfidf, wordid +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +project 1 340 1.098612 1.098612 18 +includ 1 208 1.609438 1.609438 42 +applic 1 170 1.791759 1.791759 56 +problem 2 147 1.945910 3.891820 75 +model 2 145 1.945910 3.891820 69 +perform 1 143 1.945910 1.945910 74 +object 1 138 1.945910 1.945910 79 +analysi 2 124 2.079442 4.158884 98 +high 1 130 2.079442 2.079442 101 +imag 2 91 2.397895 4.795790 161 +graphic 2 90 2.397895 4.795790 147 +control 1 82 2.484907 2.484907 164 +learn 1 86 2.484907 2.484907 170 +level 1 87 2.484907 2.484907 180 +knowledg 1 67 2.708050 2.708050 243 +goal 1 66 2.708050 2.708050 250 +virtual 2 62 2.772589 5.545178 285 +robot 1 36 3.367296 3.367296 497 +anim 4 31 3.496508 13.986032 557 +steve 2 29 3.583519 7.167038 594 +task 1 25 3.737670 3.737670 678 +motion 5 24 3.761200 18.806000 699 +store 1 24 3.761200 3.761200 693 +input 1 23 3.806662 3.806662 727 +sequenc 1 23 3.806662 3.806662 734 +period 1 22 3.850148 3.850148 743 +modern 1 16 4.174387 4.174387 966 +devic 1 16 4.174387 4.174387 1002 +charact 1 15 4.248495 4.248495 1028 +track 1 15 4.248495 4.248495 1029 +chuck 1 14 4.317488 4.317488 1108 +directli 1 13 4.382027 4.382027 1141 +realiti 1 12 4.465908 4.465908 1272 +walk 1 12 4.465908 4.465908 1281 +motiv 1 11 4.553877 4.553877 1346 +realist 1 8 4.875197 4.875197 1665 +root 1 8 4.875197 4.875197 1650 +seitz 2 7 5.010635 10.021270 1976 +smile 1 7 5.010635 5.010635 1807 +infer 1 6 5.164786 5.164786 2040 +writeup 1 5 5.347108 5.347108 2352 +rigid 1 5 5.347108 5.347108 2432 +tocomput 1 3 5.857933 5.857933 3162 +endow 1 2 6.263398 6.263398 6234 +cue 1 2 6.263398 6.263398 5391 +anabstract 1 2 6.263398 6.263398 5491 +dyerour 1 1 6.957497 6.957497 20001 +teachinga 1 1 6.957497 6.957497 20002 +hasit 1 1 6.957497 6.957497 20003 +cartoon 1 1 6.957497 6.957497 20004 +teleconferenc 1 1 6.957497 6.957497 20005 +performa 1 1 6.957497 6.957497 20006 +repertoir 1 1 6.957497 6.957497 20007 +beinvok 1 1 6.957497 6.957497 20008 +cu 1 1 6.957497 6.957497 20009 +levelev 1 1 6.957497 6.957497 20010 +nonrigid 1 1 6.957497 6.957497 20011 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html new file mode 100644 index 00000000..e5f9752b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^interp.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 3 380 0.693147 2.079441 9 +us 2 329 1.098612 2.197224 16 +last 1 314 1.098612 1.098612 14 +recent 1 167 1.791759 1.791759 58 +first 1 140 1.945910 1.945910 71 +process 1 142 1.945910 1.945910 72 +click 1 142 1.945910 1.945910 78 +provid 1 121 2.079442 2.079442 94 +techniqu 3 99 2.302585 6.907755 138 +user 1 104 2.302585 2.302585 137 +imag 9 91 2.397895 21.581055 161 +center 1 88 2.397895 2.397895 158 +chang 1 82 2.484907 2.484907 163 +june 1 79 2.564949 2.564949 214 +workshop 1 71 2.639057 2.639057 239 +view 9 70 2.708050 24.372450 254 +differ 1 66 2.708050 2.708050 253 +creat 1 63 2.772589 2.772589 277 +interact 1 62 2.772589 2.772589 270 +guid 1 63 2.772589 2.772589 267 +three 1 54 2.944439 2.944439 330 +investig 1 51 2.995732 2.995732 353 +particular 1 51 2.995732 2.995732 352 +right 1 48 3.044522 3.044522 363 +visual 1 48 3.044522 3.044522 372 +physic 2 47 3.091042 6.182084 377 +describ 2 45 3.135494 6.270988 400 +answer 1 45 3.135494 3.135494 391 +show 1 43 3.178054 3.178054 417 +autom 2 41 3.218876 6.437752 434 +theoret 1 39 3.258097 3.258097 446 +movi 1 40 3.258097 3.258097 459 +origin 3 38 3.295837 9.887511 472 +correct 1 38 3.295837 3.295837 462 +procedur 1 36 3.367296 3.367296 488 +represent 1 35 3.401197 3.401197 512 +produc 2 30 3.555348 7.110696 572 +steve 2 29 3.583519 7.167038 594 +consid 1 29 3.583519 3.583519 590 +enjoi 1 26 3.688879 3.688879 660 +proc 1 26 3.688879 3.688879 649 +although 1 25 3.737670 3.737670 667 +known 1 24 3.761200 3.761200 702 +sequenc 1 23 3.806662 3.806662 734 +synthesi 3 20 3.951244 11.853732 834 +basi 1 20 3.951244 3.951244 828 +mpeg 1 20 3.951244 3.951244 831 +geometr 1 19 4.007333 4.007333 852 +left 1 19 4.007333 4.007333 851 +scene 5 14 4.317488 21.587440 1114 +chuck 1 14 4.317488 4.317488 1108 +shown 1 14 4.317488 4.317488 1080 +valid 2 11 4.553877 9.107754 1299 +devis 1 10 4.653960 4.653960 1451 +reli 1 10 4.653960 4.653960 1411 +certain 1 10 4.653960 4.653960 1393 +correspond 1 10 4.653960 4.653960 1382 +intermedi 1 9 4.753590 4.753590 1497 +establish 1 9 4.753590 4.753590 1532 +assumpt 1 9 4.753590 4.753590 1514 +pair 1 9 4.753590 4.753590 1503 +dyer 1 9 4.753590 4.753590 1573 +satisfi 1 8 4.875197 4.875197 1694 +interpol 9 7 5.010635 45.095715 1823 +seitz 3 7 5.010635 15.031905 1976 +morph 3 7 5.010635 15.031905 1937 +stereo 1 7 5.010635 5.010635 1818 +theproject 1 6 5.164786 5.164786 1981 +provabl 1 5 5.347108 5.347108 2558 +surprisingli 1 4 5.568345 5.568345 2609 +visibl 1 4 5.568345 5.568345 2994 +todetermin 1 3 5.857933 5.857933 3182 +widespread 1 2 6.263398 6.263398 4911 +viewsof 1 2 6.263398 6.263398 6135 +undergo 1 2 6.263398 6.263398 4253 +dyerw 1 1 6.957497 6.957497 20012 +graphicscommun 1 1 6.957497 6.957497 20013 +techniquescurr 1 1 6.957497 6.957497 20014 +validityha 1 1 6.957497 6.957497 20015 +ofthat 1 1 6.957497 6.957497 20016 +simplerectif 1 1 6.957497 6.957497 20017 +therectifi 1 1 6.957497 6.957497 20018 +theinterpol 1 1 6.957497 6.957497 20019 +computedinterpol 1 1 6.957497 6.957497 20020 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html new file mode 100644 index 00000000..84944e71 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^interp^vmorph.html @@ -0,0 +1,95 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 1 775 0.000000 0.000000 2 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +us 2 329 1.098612 2.197224 16 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +paper 1 205 1.609438 1.609438 38 +base 1 165 1.791759 1.791759 50 +click 3 142 1.945910 5.837730 78 +object 2 138 1.945910 3.891820 79 +relat 1 139 1.945910 1.945910 68 +high 1 130 2.079442 2.079442 101 +well 1 109 2.197225 2.197225 121 +techniqu 3 99 2.302585 6.907755 138 +imag 7 91 2.397895 16.785265 161 +call 1 91 2.397895 2.397895 153 +chang 2 82 2.484907 4.969814 163 +requir 1 81 2.484907 2.484907 167 +wide 1 84 2.484907 2.484907 185 +appear 2 78 2.564949 5.129898 210 +view 8 70 2.708050 21.664400 254 +differ 3 66 2.708050 8.124150 253 +knowledg 1 67 2.708050 2.708050 243 +virtual 1 62 2.772589 2.772589 285 +septemb 1 65 2.772589 2.772589 274 +simpl 2 60 2.833213 5.666426 298 +extens 1 53 2.944439 2.944439 340 +investig 1 51 2.995732 2.995732 353 +basic 1 50 3.044522 3.044522 360 +principl 1 48 3.044522 3.044522 357 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +move 1 47 3.091042 3.091042 382 +howev 1 41 3.218876 3.218876 422 +movi 6 40 3.258097 19.548582 459 +correct 1 38 3.295837 3.295837 462 +represent 1 35 3.401197 3.401197 512 +manual 1 35 3.401197 3.401197 504 +transform 1 32 3.465736 3.465736 542 +often 1 31 3.496508 3.496508 551 +taken 1 31 3.496508 3.496508 555 +produc 2 30 3.555348 7.110696 572 +steve 2 29 3.583519 7.167038 594 +toward 1 25 3.737670 3.737670 668 +frame 2 24 3.761200 7.522400 684 +handl 1 24 3.761200 3.761200 685 +varieti 1 22 3.850148 3.850148 740 +color 1 22 3.850148 3.850148 762 +mpeg 6 20 3.951244 23.707464 831 +render 1 17 4.110874 4.110874 947 +transit 1 15 4.248495 4.248495 1046 +photograph 1 15 4.248495 4.248495 1056 +reflect 1 15 4.248495 4.248495 1034 +scene 3 14 4.317488 12.952464 1114 +chuck 2 14 4.317488 8.634976 1108 +camera 2 14 4.317488 8.634976 1115 +draw 1 14 4.317488 4.317488 1086 +resolut 3 13 4.382027 13.146081 1172 +introduc 1 13 4.382027 4.382027 1139 +shape 2 12 4.465908 8.931816 1245 +abil 1 11 4.553877 4.553877 1341 +dyer 2 9 4.753590 9.507180 1573 +pose 2 9 4.753590 9.507180 1535 +correctli 1 9 4.753590 4.753590 1478 +face 1 9 4.753590 4.753590 1501 +siggraph 1 8 4.875197 4.875197 1773 +morph 11 7 5.010635 55.116985 1937 +interpol 4 7 5.010635 20.042540 1823 +seitz 2 7 5.010635 10.021270 1976 +viewpoint 3 6 5.164786 15.494358 2116 +difficult 1 6 5.164786 5.164786 2035 +simultan 1 6 5.164786 5.164786 2155 +jude 1 6 5.164786 5.164786 2123 +synthes 1 5 5.347108 5.347108 2451 +facial 1 5 5.347108 5.347108 2438 +shavlik 1 5 5.347108 5.347108 2429 +illus 1 4 5.568345 5.568345 2603 +mona 2 2 6.263398 12.526796 5786 +lisa 2 2 6.263398 12.526796 5427 +icpr 1 1 6.957497 6.957497 20021 +compel 1 1 6.957497 6.957497 20022 +betweenimag 1 1 6.957497 6.957497 20023 +causeunnatur 1 1 6.957497 6.957497 20024 +distort 1 1 6.957497 6.957497 20025 +projectivegeometri 1 1 6.957497 6.957497 20026 +morphingthat 1 1 6.957497 6.957497 20027 +prewarp 1 1 6.957497 6.957497 20028 +imagesprior 1 1 6.957497 6.957497 20029 +postwarp 1 1 6.957497 6.957497 20030 +appliedto 1 1 6.957497 6.957497 20031 +structureafford 1 1 6.957497 6.957497 20032 +imagetransform 1 1 6.957497 6.957497 20033 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html new file mode 100644 index 00000000..b3d7190c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^pmotion.html @@ -0,0 +1,128 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 4 412 0.693147 2.772588 8 +us 2 329 1.098612 2.197224 16 +gener 1 220 1.386294 1.386294 27 +also 1 259 1.386294 1.386294 28 +paper 2 205 1.609438 3.218876 38 +object 2 138 1.945910 3.891820 79 +click 1 142 1.945910 1.945910 78 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +analysi 1 124 2.079442 2.079442 98 +provid 1 121 2.079442 2.079442 94 +person 1 111 2.197225 2.197225 117 +make 1 111 2.197225 2.197225 120 +theori 1 111 2.197225 2.197225 127 +imag 9 91 2.397895 21.581055 161 +real 2 93 2.397895 4.795790 144 +call 1 91 2.397895 2.397895 153 +refer 4 78 2.564949 10.259796 203 +line 1 75 2.639057 2.639057 231 +addit 1 74 2.639057 2.639057 228 +view 1 70 2.708050 2.708050 254 +previou 1 62 2.772589 2.772589 290 +import 1 65 2.772589 2.772589 282 +locat 1 59 2.833213 2.833213 303 +sever 1 56 2.890372 2.890372 322 +allow 1 53 2.944439 2.944439 333 +run 1 51 2.995732 2.995732 347 +life 1 50 3.044522 3.044522 375 +approach 1 48 3.044522 3.044522 366 +without 1 50 3.044522 3.044522 370 +move 2 47 3.091042 6.182084 382 +could 1 46 3.091042 3.091042 383 +physic 1 47 3.091042 3.091042 377 +featur 1 46 3.091042 3.091042 386 +even 1 45 3.135494 3.135494 393 +describ 1 45 3.135494 3.135494 400 +natur 1 44 3.135494 3.135494 406 +show 1 43 3.178054 3.178054 417 +error 1 40 3.258097 3.258097 449 +mean 1 37 3.332205 3.332205 477 +next 1 34 3.401197 3.401197 517 +represent 1 35 3.401197 3.401197 512 +singl 1 34 3.401197 3.401197 510 +human 1 32 3.465736 3.465736 546 +steve 1 29 3.583519 3.583519 594 +determin 3 27 3.637586 10.912758 630 +enhanc 2 26 3.688879 7.377758 644 +trace 6 25 3.737670 22.426020 677 +reliabl 1 25 3.737670 3.737670 674 +motion 8 24 3.761200 30.089600 699 +frame 3 24 3.761200 11.283600 684 +sequenc 2 23 3.806662 7.613324 734 +period 16 22 3.850148 61.602368 743 +defin 1 22 3.850148 3.850148 746 +identifi 1 22 3.850148 3.850148 760 +attempt 1 17 4.110874 4.110874 917 +medic 1 17 4.110874 4.110874 958 +spatial 1 16 4.174387 4.174387 988 +scene 2 14 4.317488 8.634976 1114 +chuck 1 14 4.317488 4.317488 1108 +camera 1 14 4.317488 4.317488 1115 +composit 3 13 4.382027 13.146081 1150 +whose 1 13 4.382027 4.382027 1166 +deriv 1 13 4.382027 4.382027 1145 +walk 1 12 4.465908 4.465908 1281 +cycl 4 11 4.553877 18.215508 1335 +instanc 1 11 4.553877 4.553877 1322 +moment 1 11 4.553877 4.553877 1379 +correspond 3 10 4.653960 13.961880 1382 +tempor 3 9 4.753590 14.260770 1584 +surfac 1 9 4.753590 4.753590 1574 +explicit 1 9 4.753590 4.753590 1525 +pure 2 8 4.875197 9.750394 1776 +invari 2 8 4.875197 9.750394 1748 +film 1 8 4.875197 4.875197 1761 +irregular 1 8 4.875197 4.875197 1768 +heart 1 8 4.875197 4.875197 1729 +seitz 1 7 5.010635 5.010635 1976 +compact 1 7 5.010635 5.010635 1907 +canb 1 7 5.010635 5.010635 1846 +bottom 1 7 5.010635 5.010635 1906 +appar 1 7 5.010635 5.010635 1958 +recov 2 6 5.164786 10.329572 2235 +furthermor 1 6 5.164786 5.164786 2141 +cyclic 3 5 5.347108 16.041324 2383 +skip 1 5 5.347108 5.347108 2402 +variat 1 5 5.347108 5.347108 2248 +affin 1 5 5.347108 5.347108 2378 +clickher 1 5 5.347108 5.347108 2428 +havedevelop 1 4 5.568345 5.568345 2681 +repeat 1 4 5.568345 5.568345 2798 +tend 1 4 5.568345 5.568345 3041 +visibl 1 4 5.568345 5.568345 2994 +fashion 1 3 5.857933 5.857933 3699 +unlik 1 2 6.263398 6.263398 5063 +slow 1 2 6.263398 6.263398 5341 +perfectli 1 2 6.263398 6.263398 5569 +poscript 2 1 6.957497 13.914994 20034 +turntabl 2 1 6.957497 13.914994 20035 +dyermani 1 1 6.957497 6.957497 20036 +locomotori 1 1 6.957497 6.957497 20037 +shuffl 1 1 6.957497 6.957497 20038 +areperiod 1 1 6.957497 6.957497 20039 +beenproduc 1 1 6.957497 6.957497 20040 +ourapproach 1 1 6.957497 6.957497 20041 +tracethi 1 1 6.957497 6.957497 20042 +imagesequ 1 1 6.957497 6.957497 20043 +phonograph 1 1 6.957497 6.957497 20044 +ramp 1 1 6.957497 6.957497 20045 +timewher 1 1 6.957497 6.957497 20046 +momentarili 1 1 6.957497 6.957497 20047 +shownsuperimpos 1 1 6.957497 6.957497 20048 +variesslightli 1 1 6.957497 6.957497 20049 +changesin 1 1 6.957497 6.957497 20050 +motionsthat 1 1 6.957497 6.957497 20051 +evolutionof 1 1 6.957497 6.957497 20052 +quantiti 1 1 6.957497 6.957497 20053 +asposit 1 1 6.957497 6.957497 20054 +veloc 1 1 6.957497 6.957497 20055 +delimit 1 1 6.957497 6.957497 20056 +correspondencesacross 1 1 6.957497 6.957497 20057 +parsinga 1 1 6.957497 6.957497 20058 +tracecan 1 1 6.957497 6.957497 20059 +fromdiffer 1 1 6.957497 6.957497 20060 +recoveredfrom 1 1 6.957497 6.957497 20061 +angiograph 1 1 6.957497 6.957497 20062 +additionalstructur 1 1 6.957497 6.957497 20063 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html new file mode 100644 index 00000000..f0f5a550 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~seitz^seitz.html @@ -0,0 +1,48 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 1 384 0.693147 0.693147 11 +research 1 431 0.693147 0.693147 10 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +base 1 165 1.791759 1.791759 50 +recent 1 167 1.791759 1.791759 58 +wisconsin 1 169 1.791759 1.791759 54 +click 2 142 1.945910 3.891820 78 +area 1 144 1.945910 1.945910 80 +analysi 2 124 2.079442 4.158884 98 +machin 1 129 2.079442 2.079442 95 +look 1 107 2.197225 2.197225 115 +imag 3 91 2.397895 7.193685 161 +graphic 1 90 2.397895 2.397895 147 +stuff 1 87 2.484907 2.484907 171 +chang 1 82 2.484907 2.484907 163 +exampl 1 77 2.564949 2.564949 195 +view 2 70 2.708050 5.416100 254 +juli 1 60 2.833213 2.833213 305 +frequent 1 49 3.044522 3.044522 367 +math 1 44 3.135494 3.135494 402 +show 1 43 3.178054 3.178054 417 +vision 2 41 3.218876 6.437752 430 +cach 1 41 3.218876 3.218876 432 +movi 2 40 3.258097 6.516194 459 +steve 2 29 3.583519 7.167038 594 +berkelei 1 26 3.688879 3.688879 657 +motion 2 24 3.761200 7.522400 699 +mpeg 2 20 3.951244 7.902488 831 +synthesi 1 20 3.951244 3.951244 834 +left 1 19 4.007333 4.007333 851 +render 1 17 4.110874 4.110874 947 +seitz 4 7 5.010635 20.042540 1976 +morph 1 7 5.010635 5.010635 1937 +interpol 1 7 5.010635 5.010635 1823 +cyclic 1 5 5.347108 5.347108 2383 +closer 1 2 6.263398 6.263398 6024 +surreal 1 1 6.957497 6.957497 20064 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html new file mode 100644 index 00000000..fcbdc667 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shavlik^uwml.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +page 5 705 0.000000 0.000000 3 +home 4 672 0.000000 0.000000 1 +comput 3 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 2 431 0.693147 1.386294 10 +inform 1 412 0.693147 0.693147 8 +program 1 374 0.693147 0.693147 7 +cours 2 273 1.098612 2.197224 15 +current 2 284 1.098612 2.197224 21 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +graduat 5 215 1.386294 6.931470 31 +link 4 247 1.386294 5.545176 24 +also 3 259 1.386294 4.158882 28 +wisc 2 242 1.386294 2.772588 33 +group 8 183 1.609438 12.875504 36 +paper 4 205 1.609438 6.437752 38 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +modifi 1 178 1.609438 1.609438 35 +madison 13 165 1.791759 23.292867 55 +recent 4 167 1.791759 7.167036 58 +read 3 154 1.791759 5.375277 47 +wisconsin 2 169 1.791759 3.583518 54 +avail 1 169 1.791759 1.791759 48 +file 1 132 1.945910 1.945910 70 +machin 6 129 2.079442 12.476652 95 +schedul 5 119 2.079442 10.397210 85 +databas 1 122 2.079442 2.079442 86 +confer 1 126 2.079442 2.079442 100 +theori 1 111 2.197225 2.197225 127 +mathemat 1 108 2.197225 2.197225 123 +intern 1 108 2.197225 2.197225 128 +access 2 102 2.302585 4.605170 136 +center 1 88 2.397895 2.397895 158 +select 1 91 2.397895 2.397895 154 +learn 7 86 2.484907 17.394349 170 +contain 3 81 2.484907 7.454721 174 +member 2 84 2.484907 4.969814 165 +librari 2 87 2.484907 4.969814 181 +school 1 84 2.484907 2.484907 188 +journal 1 83 2.484907 2.484907 183 +david 1 71 2.639057 2.639057 232 +line 1 75 2.639057 2.639057 231 +workshop 1 71 2.639057 2.639057 239 +august 2 66 2.708050 5.416100 257 +dept 2 64 2.772589 5.545178 291 +prof 1 64 2.772589 2.772589 273 +abstract 1 62 2.772589 2.772589 276 +content 2 59 2.833213 5.666426 302 +local 4 55 2.944439 11.777756 334 +tabl 2 51 2.995732 5.991464 346 +archiv 4 49 3.044522 12.178088 364 +mark 1 44 3.135494 3.135494 403 +describ 1 45 3.135494 3.135494 400 +directori 1 45 3.135494 3.135494 396 +math 1 44 3.135494 3.135494 402 +vision 1 41 3.218876 3.218876 430 +seminar 3 38 3.295837 9.887511 470 +robot 1 36 3.367296 3.367296 497 +richard 1 31 3.496508 3.496508 559 +domain 2 30 3.555348 7.110696 564 +held 1 28 3.610918 3.610918 600 +relev 3 26 3.688879 11.066637 637 +proc 1 26 3.688879 3.688879 649 +mostli 1 19 4.007333 4.007333 869 +agent 2 18 4.060443 8.120886 910 +ascii 1 15 4.248495 4.248495 1032 +biologi 1 15 4.248495 4.248495 1049 +doit 1 14 4.317488 4.317488 1111 +readabl 1 12 4.465908 4.465908 1258 +wendt 1 10 4.653960 4.653960 1446 +kevin 1 9 4.753590 4.753590 1482 +mangasarian 1 9 4.753590 4.753590 1570 +dataset 2 7 5.010635 10.021270 1914 +jude 2 6 5.164786 10.329572 2123 +extern 1 6 5.164786 5.164786 2105 +olvi 1 6 5.164786 5.164786 2109 +gopher 1 6 5.164786 5.164786 1982 +shavlik 3 5 5.347108 16.041324 2429 +bodner 1 5 5.347108 5.347108 2401 +testb 1 5 5.347108 5.347108 2456 +tina 1 3 5.857933 5.857933 3744 +breast 1 3 5.857933 5.857933 4033 +cancer 1 3 5.857933 5.857933 4032 +carolyn 1 2 6.263398 6.263398 6088 +allex 1 2 6.263398 6.263398 6087 +eliassi 1 2 6.263398 6.263398 6147 +mlrg 8 1 6.957497 55.659976 20065 +thememb 1 1 6.957497 6.957497 20066 +jonathon 1 1 6.957497 6.957497 20067 +cherkauer 1 1 6.957497 6.957497 20068 +craven 1 1 6.957497 6.957497 20069 +maclin 1 1 6.957497 6.957497 20070 +opitz 1 1 6.957497 6.957497 20071 +papersvisit 1 1 6.957497 6.957497 20072 +recentabstractsi 1 1 6.957497 6.957497 20073 +theoriesy 1 1 6.957497 6.957497 20074 +severalml 1 1 6.957497 6.957497 20075 +sgroup 1 1 6.957497 6.957497 20076 +neurosci 1 1 6.957497 6.957497 20077 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html new file mode 100644 index 00000000..cb27b17e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~shubu^shubu.html @@ -0,0 +1,97 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +research 3 431 0.693147 2.079441 10 +interest 2 384 0.693147 1.386294 11 +project 1 340 1.098612 1.098612 18 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +public 2 202 1.609438 3.218876 43 +wisconsin 5 169 1.791759 8.958795 54 +madison 4 165 1.791759 7.167036 55 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +applic 1 170 1.791759 1.791759 56 +parallel 1 169 1.791759 1.791759 60 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +spring 1 131 2.079442 2.079442 88 +technolog 1 131 2.079442 2.079442 102 +assist 1 112 2.197225 2.197225 113 +world 1 115 2.197225 2.197225 126 +person 1 111 2.197225 2.197225 117 +memori 2 101 2.302585 4.605170 139 +west 1 83 2.484907 2.484907 192 +wide 1 84 2.484907 2.484907 185 +institut 1 84 2.484907 2.484907 187 +interfac 1 79 2.564949 2.564949 209 +summari 1 73 2.639057 2.639057 237 +simul 2 66 2.708050 5.416100 255 +street 1 63 2.772589 2.772589 293 +copi 1 63 2.772589 2.772589 284 +share 2 59 2.833213 5.666426 304 +space 1 57 2.890372 2.890372 310 +advisor 1 51 2.995732 2.995732 355 +right 1 48 3.044522 3.044522 363 +protocol 2 45 3.135494 6.270988 407 +mark 1 44 3.135494 3.135494 403 +mechan 1 43 3.178054 3.178054 416 +cach 2 41 3.218876 6.437752 432 +submit 1 39 3.258097 3.258097 440 +tutori 1 39 3.258097 3.258097 437 +expect 1 37 3.332205 3.332205 484 +workstat 1 37 3.332205 3.332205 479 +copyright 1 36 3.367296 3.367296 495 +random 1 34 3.401197 3.401197 511 +tech 1 35 3.401197 3.401197 515 +articl 1 33 3.433987 3.433987 530 +india 1 32 3.465736 3.465736 550 +dissert 1 32 3.465736 3.465736 549 +steve 1 29 3.583519 3.583519 594 +progress 1 28 3.610918 3.610918 598 +team 1 27 3.637586 3.637586 625 +hill 1 25 3.737670 3.737670 670 +departmentunivers 1 24 3.761200 3.761200 711 +indian 1 22 3.850148 3.850148 769 +cooper 1 22 3.850148 3.850148 757 +reserv 1 20 3.951244 3.951244 808 +wind 2 18 4.060443 8.120886 908 +hobbi 1 16 4.174387 4.174387 1009 +month 1 15 4.248495 4.248495 1025 +coher 2 14 4.317488 8.634976 1109 +danc 1 12 4.465908 4.465908 1278 +isca 3 11 4.553877 13.661631 1354 +correspond 1 10 4.653960 4.653960 1382 +queue 1 10 4.653960 4.653960 1386 +custom 1 10 4.653960 4.653960 1414 +tunnel 2 9 4.753590 9.507180 1615 +usaphon 1 9 4.753590 4.753590 1600 +jump 1 9 4.753590 4.753590 1603 +architect 2 8 4.875197 9.750394 1624 +kanpur 1 8 4.875197 4.875197 1744 +irregular 1 8 4.875197 4.875197 1768 +morph 1 7 5.010635 5.010635 1937 +courtesi 1 7 5.010635 5.010635 1953 +seitz 1 7 5.010635 5.010635 1976 +mukherje 4 5 5.347108 21.388432 2586 +button 1 5 5.347108 5.347108 2337 +commod 1 5 5.347108 5.347108 2415 +ppopp 2 4 5.568345 11.136690 2774 +shubhendu 1 3 5.857933 5.857933 4028 +badger 1 3 5.857933 5.857933 3502 +ballroom 1 3 5.857933 5.857933 3983 +shubu 4 2 6.263398 25.053592 6148 +fiance 1 2 6.263398 6.263398 5497 +nephew 1 2 6.263398 6.263398 5332 +dionisio 1 2 6.263398 6.263398 6203 +grai 1 2 6.263398 6.263398 4098 +mimi 1 1 6.957497 6.957497 20078 +avirup 1 1 6.957497 6.957497 20079 +linkseducationph 1 1 6.957497 6.957497 20080 +cachabl 1 1 6.957497 6.957497 20081 +dirsw 1 1 6.957497 6.957497 20082 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html new file mode 100644 index 00000000..a28372ef --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~siff^siff.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +time 1 293 1.098612 1.098612 17 +fall 1 181 1.609438 1.609438 40 +book 1 99 2.302585 2.302585 131 +academ 1 82 2.484907 2.484907 178 +resourc 1 81 2.484907 2.484907 172 +new 1 64 2.772589 2.772589 262 +run 1 51 2.995732 2.995732 347 +movi 1 40 3.258097 3.258097 459 +seminar 1 38 3.295837 3.295837 470 +michael 2 35 3.401197 6.802394 514 +wai 1 25 3.737670 3.737670 662 +sport 1 25 3.737670 3.737670 683 +wonder 1 20 3.951244 3.951244 815 +club 1 15 4.248495 4.248495 1058 +philosophi 1 13 4.382027 4.382027 1167 +televis 1 6 5.164786 5.164786 2118 +wast 1 5 5.347108 5.347108 2537 +humor 1 5 5.347108 5.347108 2533 +midwest 1 2 6.263398 6.263398 6225 +siff 2 1 6.957497 13.914994 20083 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html new file mode 100644 index 00000000..d97c13f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~skrentny^skrentny.html @@ -0,0 +1,25 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +wisc 2 242 1.386294 2.772588 33 +link 1 247 1.386294 1.386294 24 +updat 1 191 1.609438 1.609438 41 +wisconsin 3 169 1.791759 5.375277 54 +madison 2 165 1.791759 3.583518 55 +relat 1 139 1.945910 1.945910 68 +dayton 1 119 2.079442 2.079442 104 +februari 1 54 2.944439 2.944439 328 +departmentunivers 1 24 3.761200 3.761200 711 +edutelephon 1 10 4.653960 4.653960 1473 +studentcomput 1 7 5.010635 5.010635 1963 +skrentni 3 6 5.164786 15.494358 2104 +lecturerc 1 1 6.957497 6.957497 20084 +coordinatorgradu 1 1 6.957497 6.957497 20085 +sciencesemail 1 1 6.957497 6.957497 20086 +groupskrentni 1 1 6.957497 6.957497 20087 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html new file mode 100644 index 00000000..5a91a504 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^so.html @@ -0,0 +1,54 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +scienc 3 640 0.000000 0.000000 4 +comput 3 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +oper 1 180 1.609438 1.609438 34 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 5 169 1.791759 8.958795 54 +madison 4 165 1.791759 7.167036 55 +recent 1 167 1.791759 1.791759 58 +relat 1 139 1.945910 1.945910 68 +professor 1 137 1.945910 1.945910 76 +dayton 1 119 2.079442 2.079442 104 +report 1 131 2.079442 2.079442 92 +studi 1 120 2.079442 2.079442 91 +technic 1 100 2.302585 2.302585 140 +select 1 91 2.397895 2.397895 154 +commun 1 95 2.397895 2.397895 157 +help 2 83 2.484907 4.969814 175 +activ 1 84 2.484907 2.484907 182 +april 1 77 2.564949 2.564949 196 +intellig 2 72 2.639057 5.278114 225 +knowledg 2 67 2.708050 5.416100 243 +interact 1 62 2.772589 2.772589 270 +unix 3 58 2.890372 8.671116 308 +advisor 1 51 2.995732 2.995732 355 +represent 2 35 3.401197 6.802394 512 +human 1 32 3.465736 3.465736 546 +toward 1 25 3.737670 3.737670 668 +reliabl 1 25 3.737670 3.737670 674 +departmentunivers 1 24 3.761200 3.761200 711 +util 2 21 3.912023 7.824046 774 +miller 1 17 4.110874 4.110874 949 +step 1 13 4.382027 4.382027 1138 +larri 1 13 4.382027 4.382027 1142 +edutelephon 1 10 4.653960 4.653960 1473 +purdu 1 10 4.653960 4.653960 1466 +empir 1 8 4.875197 4.875197 1722 +studentcomput 1 7 5.010635 5.010635 1963 +bryan 2 5 5.347108 10.694216 2421 +travi 2 3 5.857933 11.715866 3985 +fredriksen 1 1 6.957497 6.957497 20139 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html new file mode 100644 index 00000000..3c7547f6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~so^travis.html @@ -0,0 +1,129 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +system 7 443 0.693147 4.852029 6 +interest 2 384 0.693147 1.386294 11 +research 2 431 0.693147 1.386294 10 +inform 2 412 0.693147 1.386294 8 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +current 1 284 1.098612 1.098612 21 +student 1 343 1.098612 1.098612 19 +last 1 314 1.098612 1.098612 14 +wisc 4 242 1.386294 5.545176 33 +mail 1 238 1.386294 1.386294 22 +design 1 213 1.386294 1.386294 25 +softwar 1 220 1.386294 1.386294 30 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +group 1 183 1.609438 1.609438 36 +data 3 170 1.791759 5.375277 49 +recent 2 167 1.791759 3.583518 58 +develop 2 174 1.791759 3.583518 53 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +implement 1 152 1.791759 1.791759 52 +support 2 132 1.945910 3.891820 83 +model 1 145 1.945910 1.945910 69 +databas 3 122 2.079442 6.238326 86 +dayton 1 119 2.079442 2.079442 104 +high 1 130 2.079442 2.079442 101 +introduct 1 126 2.079442 2.079442 87 +technolog 1 131 2.079442 2.079442 102 +analysi 1 124 2.079442 2.079442 98 +manag 1 114 2.197225 2.197225 125 +center 1 88 2.397895 2.397895 158 +associ 1 93 2.397895 2.397895 151 +control 2 82 2.484907 4.969814 164 +larg 2 82 2.484907 4.969814 168 +west 2 83 2.484907 4.969814 192 +level 1 87 2.484907 2.484907 180 +activ 1 84 2.484907 2.484907 182 +journal 1 83 2.484907 2.484907 183 +chang 1 82 2.484907 2.484907 163 +issu 1 78 2.564949 2.564949 211 +appear 1 78 2.564949 2.564949 210 +june 1 79 2.564949 2.564949 214 +intellig 7 72 2.639057 18.473399 225 +summari 1 73 2.639057 2.639057 237 +logic 1 71 2.639057 2.639057 230 +involv 1 71 2.639057 2.639057 227 +integr 2 67 2.708050 5.416100 245 +knowledg 1 67 2.708050 2.708050 243 +test 1 66 2.708050 2.708050 252 +artifici 5 63 2.772589 13.862945 280 +complex 1 64 2.772589 2.772589 269 +foundat 1 62 2.772589 2.772589 286 +abstract 1 62 2.772589 2.772589 276 +dept 1 64 2.772589 2.772589 291 +automat 1 61 2.833213 2.833213 306 +special 1 56 2.890372 2.890372 320 +sever 1 56 2.890372 2.890372 322 +visual 1 48 3.044522 3.044522 372 +approach 1 48 3.044522 3.044522 366 +understand 1 47 3.091042 3.091042 384 +could 1 46 3.091042 3.091042 383 +around 1 43 3.178054 3.178054 415 +examin 1 42 3.218876 3.218876 424 +form 1 39 3.258097 3.258097 443 +map 1 39 3.258097 3.258097 452 +societi 1 40 3.258097 3.258097 456 +formal 1 37 3.332205 3.332205 478 +procedur 2 36 3.367296 6.734592 488 +represent 1 35 3.401197 3.401197 512 +singl 1 34 3.401197 3.401197 510 +focus 1 29 3.583519 3.583519 584 +altern 2 26 3.688879 7.377758 641 +enhanc 1 26 3.688879 3.688879 644 +magazin 2 24 3.761200 7.522400 704 +departmentunivers 1 24 3.761200 3.761200 711 +pattern 1 24 3.761200 3.761200 689 +displai 1 23 3.806662 3.806662 712 +expert 4 20 3.951244 15.804976 833 +basi 1 20 3.951244 3.951244 828 +aid 1 18 4.060443 4.060443 904 +scott 1 18 4.060443 4.060443 884 +failur 1 18 4.060443 4.060443 898 +heterogen 1 14 4.317488 4.317488 1090 +chuck 1 14 4.317488 4.317488 1108 +larri 2 13 4.382027 8.764054 1142 +social 2 13 4.382027 8.764054 1123 +incorpor 1 13 4.382027 4.382027 1163 +deduct 2 12 4.465908 8.931816 1236 +edutelephon 1 10 4.653960 4.653960 1473 +angel 1 8 4.875197 4.875197 1779 +databasesystem 1 8 4.875197 4.875197 1617 +attent 1 8 4.875197 4.875197 1651 +philosoph 1 7 5.010635 5.010635 1904 +geograph 1 6 5.164786 5.164786 2236 +augment 1 5 5.347108 5.347108 2350 +bryan 1 5 5.347108 5.347108 2421 +implic 1 4 5.568345 5.568345 2696 +andi 1 4 5.568345 5.568345 3081 +travi 2 3 5.857933 11.715866 3985 +metaphor 2 3 5.857933 11.715866 4038 +landscap 1 3 5.857933 5.857933 3525 +waysthat 1 2 6.263398 6.263398 5445 +andwith 1 2 6.263398 6.263398 5051 +derek 1 2 6.263398 6.263398 4537 +travisprofessorcomput 1 1 6.957497 6.957497 20140 +californa 1 1 6.957497 6.957497 20141 +ofartifici 1 1 6.957497 6.957497 20142 +automaticdeduct 1 1 6.957497 6.957497 20143 +contruct 1 1 6.957497 6.957497 20144 +informationcontain 1 1 6.957497 6.957497 20145 +beingdevot 1 1 6.957497 6.957497 20146 +visualiz 1 1 6.957497 6.957497 20147 +organiz 1 1 6.957497 6.957497 20148 +suppositionsunderli 1 1 6.957497 6.957497 20149 +ohar 1 1 6.957497 6.957497 20150 +swanson 1 1 6.957497 6.957497 20151 +whitsitt 1 1 6.957497 6.957497 20152 +zahn 1 1 6.957497 6.957497 20153 +oravec 1 1 6.957497 6.957497 20154 +reflex 1 1 6.957497 6.957497 20155 +falsework 1 1 6.957497 6.957497 20156 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html new file mode 100644 index 00000000..439fb45c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sodani^sodani.html @@ -0,0 +1,64 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +page 2 705 0.000000 0.000000 3 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +depart 3 457 0.693147 2.079441 12 +interest 2 384 0.693147 1.386294 11 +program 1 374 0.693147 0.693147 7 +project 2 340 1.098612 2.197224 18 +student 1 343 1.098612 1.098612 19 +cours 1 273 1.098612 1.098612 15 +time 1 293 1.098612 1.098612 17 +link 3 247 1.386294 4.158882 24 +wisc 1 242 1.386294 1.386294 33 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +relat 2 139 1.945910 3.891820 68 +architectur 1 139 1.945910 1.945910 77 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +compil 1 122 2.079442 2.079442 96 +look 1 107 2.197225 2.197225 115 +world 1 115 2.197225 2.197225 126 +access 1 102 2.302585 2.302585 136 +center 1 88 2.397895 2.397895 158 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +west 1 83 2.484907 2.484907 192 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +academ 1 82 2.484907 2.484907 178 +info 1 85 2.484907 2.484907 176 +meet 1 72 2.639057 2.639057 229 +onlin 1 75 2.639057 2.639057 223 +free 1 73 2.639057 2.639057 224 +java 1 70 2.708050 2.708050 248 +street 1 63 2.772589 2.772589 293 +new 1 64 2.772589 2.772589 262 +juli 1 60 2.833213 2.833213 305 +directori 1 45 3.135494 3.135494 396 +edit 1 42 3.218876 3.218876 418 +download 1 36 3.367296 3.367296 489 +tech 1 35 3.401197 3.401197 515 +random 1 34 3.401197 3.401197 511 +india 3 32 3.465736 10.397208 550 +packag 2 28 3.610918 7.221836 614 +indian 1 22 3.850148 3.850148 769 +rank 1 14 4.317488 4.317488 1063 +multiscalar 1 8 4.875197 4.875197 1783 +cricket 1 7 5.010635 5.010635 1945 +kestrel 1 4 5.568345 5.568345 2990 +batch 1 4 5.568345 5.568345 2700 +will 1 4 5.568345 5.568345 2782 +avinash 2 3 5.857933 11.715866 3510 +mate 1 3 5.857933 5.857933 3127 +hindu 1 3 5.857933 5.857933 3590 +sodani 3 2 6.263398 18.790194 4803 +kharagpur 1 2 6.263398 6.263398 6236 +toll 1 2 6.263398 6.263398 6149 +hon 1 1 6.957497 6.957497 20088 +kgpite 1 1 6.957497 6.957497 20089 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html new file mode 100644 index 00000000..eb105c26 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sohi^sohi.html @@ -0,0 +1,253 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 15 775 0.000000 0.000000 2 +scienc 7 640 0.000000 0.000000 4 +univers 6 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +research 6 431 0.693147 4.158882 10 +program 6 374 0.693147 4.158882 7 +depart 5 457 0.693147 3.465735 12 +system 4 443 0.693147 2.772588 6 +interest 2 384 0.693147 1.386294 11 +current 6 284 1.098612 6.591672 21 +engin 4 297 1.098612 4.394448 20 +student 2 343 1.098612 2.197224 19 +offic 1 299 1.098612 1.098612 13 +last 1 314 1.098612 1.098612 14 +gener 4 220 1.386294 5.545176 27 +design 3 213 1.386294 4.158882 25 +wisc 2 242 1.386294 2.772588 33 +graduat 2 215 1.386294 2.772588 31 +also 1 259 1.386294 1.386294 28 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +group 2 183 1.609438 3.218876 36 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +austin 10 168 1.791759 17.917590 63 +parallel 7 169 1.791759 12.542313 60 +recent 5 167 1.791759 8.958795 58 +wisconsin 5 169 1.791759 8.958795 54 +madison 5 165 1.791759 8.958795 55 +address 3 170 1.791759 5.375277 62 +avail 2 169 1.791759 3.583518 48 +applic 1 170 1.791759 1.791759 56 +develop 1 174 1.791759 1.791759 53 +data 1 170 1.791759 1.791759 49 +network 1 168 1.791759 1.791759 61 +distribut 1 162 1.791759 1.791759 51 +implement 1 152 1.791759 1.791759 52 +base 1 165 1.791759 1.791759 50 +architectur 9 139 1.945910 17.513190 77 +perform 3 143 1.945910 5.837730 74 +file 3 132 1.945910 5.837730 70 +process 2 142 1.945910 3.891820 72 +professor 1 137 1.945910 1.945910 76 +model 1 145 1.945910 1.945910 69 +support 1 132 1.945910 1.945910 83 +report 4 131 2.079442 8.317768 92 +compil 2 122 2.079442 4.158884 96 +studi 2 120 2.079442 4.158884 91 +postscript 2 131 2.079442 4.158884 90 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +confer 1 126 2.079442 2.079442 100 +intern 10 108 2.197225 21.972250 128 +place 1 106 2.197225 2.197225 124 +memori 6 101 2.302585 13.815510 139 +technic 4 100 2.302585 9.210340 140 +access 3 102 2.302585 6.907755 136 +associ 1 93 2.397895 2.397895 151 +proceed 1 93 2.397895 2.397895 152 +level 3 87 2.484907 7.454721 180 +novemb 3 81 2.484907 7.454721 179 +ieee 3 86 2.484907 7.454721 190 +educ 2 86 2.484907 4.969814 191 +west 1 83 2.484907 2.484907 192 +institut 1 84 2.484907 2.484907 187 +resourc 1 81 2.484907 2.484907 172 +control 1 82 2.484907 2.484907 164 +decemb 5 80 2.564949 12.824745 215 +dynam 5 76 2.564949 12.824745 194 +issu 2 78 2.564949 5.129898 211 +appear 2 78 2.564949 5.129898 210 +april 2 77 2.564949 5.129898 196 +symposium 11 72 2.639057 29.029627 238 +summari 2 73 2.639057 5.278114 237 +effici 2 73 2.639057 5.278114 233 +goal 1 66 2.708050 2.708050 250 +order 1 69 2.708050 2.708050 249 +simul 1 66 2.708050 2.708050 255 +window 1 68 2.708050 2.708050 242 +import 1 65 2.772589 2.772589 282 +evalu 1 64 2.772589 2.772589 266 +januari 1 62 2.772589 2.772589 264 +septemb 1 65 2.772589 2.772589 274 +share 1 59 2.833213 2.833213 304 +juli 1 60 2.833213 2.833213 305 +sever 2 56 2.890372 5.780744 322 +detail 1 57 2.890372 2.890372 321 +processor 9 54 2.944439 26.499951 335 +instruct 6 53 2.944439 17.666634 332 +talk 2 53 2.944439 5.888878 336 +investig 2 51 2.995732 5.991464 353 +hardwar 2 51 2.995732 5.991464 350 +pointer 2 48 3.044522 6.089044 368 +numer 1 49 3.044522 3.044522 369 +set 1 50 3.044522 3.044522 361 +electron 1 47 3.091042 3.091042 379 +possibl 1 47 3.091042 3.091042 378 +understand 1 47 3.091042 3.091042 384 +could 1 46 3.091042 3.091042 383 +execut 5 45 3.135494 15.677470 404 +mechan 2 43 3.178054 6.356108 416 +futur 1 41 3.218876 3.218876 427 +cach 1 41 3.218876 3.218876 432 +fast 1 42 3.218876 3.218876 429 +combin 1 42 3.218876 3.218876 421 +press 1 42 3.218876 3.218876 419 +annual 5 40 3.258097 16.290485 458 +transact 2 39 3.258097 6.516194 438 +continu 1 39 3.258097 3.258097 448 +multipl 1 39 3.258097 3.258097 453 +error 1 40 3.258097 3.258097 449 +electr 2 38 3.295837 6.591674 461 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +given 2 32 3.465736 6.931472 538 +india 1 32 3.465736 3.465736 550 +concept 1 32 3.465736 3.465736 537 +exist 1 30 3.555348 3.555348 569 +focus 1 29 3.583519 3.583519 584 +depend 1 29 3.583519 3.583519 583 +multiprocessor 3 28 3.610918 10.832754 605 +load 3 28 3.610918 10.832754 601 +held 1 28 3.610918 3.610918 600 +arrai 2 27 3.637586 7.275172 627 +detect 2 26 3.688879 7.377758 646 +challeng 1 26 3.688879 3.688879 653 +effort 1 26 3.688879 3.688879 652 +request 1 26 3.688879 3.688879 635 +flow 1 24 3.761200 3.761200 700 +compress 2 23 3.806662 7.613324 719 +reduc 2 22 3.850148 7.700296 759 +sequenti 1 22 3.850148 3.850148 745 +hierarchi 1 22 3.850148 3.850148 744 +chip 1 21 3.912023 3.912023 770 +exploit 3 20 3.951244 11.853732 836 +fine 3 20 3.951244 11.853732 822 +department 1 20 3.951244 3.951244 839 +smith 1 20 3.951244 3.951244 820 +predict 3 19 4.007333 12.021999 855 +scott 1 18 4.060443 4.060443 884 +regist 2 17 4.110874 8.221748 938 +interconnect 1 17 4.110874 4.110874 937 +expand 1 17 4.110874 4.110874 928 +micro 4 15 4.248495 16.993980 1031 +eduphon 1 15 4.248495 4.248495 1060 +novel 1 15 4.248495 4.248495 1039 +todd 1 15 4.248495 4.248495 1051 +achiev 1 14 4.317488 4.317488 1088 +split 1 14 4.317488 4.317488 1078 +translat 1 13 4.382027 4.382027 1164 +sigplan 1 13 4.382027 4.382027 1190 +incorpor 1 13 4.382027 4.382027 1163 +prolog 1 13 4.382027 4.382027 1155 +target 1 12 4.465908 4.465908 1282 +calcul 1 12 4.465908 4.465908 1268 +cycl 3 11 4.553877 13.661631 1335 +branch 2 11 4.553877 9.107754 1318 +bandwidth 1 11 4.553877 4.553877 1365 +arbitrari 1 11 4.553877 4.553877 1359 +franklin 5 10 4.653960 23.269800 1436 +grain 3 10 4.653960 13.961880 1448 +traffic 1 10 4.653960 4.653960 1421 +inter 1 9 4.753590 4.753590 1530 +multiscalar 8 8 4.875197 39.001576 1783 +character 3 8 4.875197 14.625591 1767 +paradigm 2 8 4.875197 9.750394 1662 +elect 1 8 4.875197 4.875197 1771 +uniprocessor 1 8 4.875197 4.875197 1696 +potenti 1 8 4.875197 4.875197 1690 +watson 1 8 4.875197 4.875197 1691 +illinoi 2 7 5.010635 10.021270 1941 +zero 2 7 5.010635 10.021270 1896 +goodman 1 7 5.010635 5.010635 1891 +serial 1 7 5.010635 5.010635 1975 +friedman 1 7 5.010635 5.010635 1886 +chiang 1 7 5.010635 5.010635 1853 +sohi 22 6 5.164786 113.625292 2237 +microarchitectur 6 6 5.164786 30.988716 2238 +risc 1 6 5.164786 5.164786 2016 +superscalar 1 6 5.164786 5.164786 2082 +handbook 1 6 5.164786 5.164786 2061 +guri 1 5 5.347108 5.347108 2578 +andrea 1 5 5.347108 5.347108 2375 +lebeck 1 5 5.347108 5.347108 2582 +highest 1 4 5.568345 5.568345 2950 +resolv 1 4 5.568345 5.568345 2675 +height 1 4 5.568345 5.568345 2890 +appendix 1 4 5.568345 5.568345 2739 +crai 1 4 5.568345 5.568345 3012 +breach 5 3 5.857933 29.289665 4009 +vijaykumar 3 3 5.857933 17.573799 4011 +urbana 2 3 5.857933 11.715866 3879 +ordinari 2 3 5.857933 11.715866 3233 +streamlin 2 3 5.857933 11.715866 3573 +bulk 1 3 5.857933 5.857933 4000 +thedevelop 1 3 5.857933 5.857933 3903 +reorder 1 3 5.857933 5.857933 3952 +anatomi 1 3 5.857933 5.857933 4010 +chow 1 3 5.857933 5.857933 3281 +pnevmatikato 5 2 6.263398 31.316990 6204 +guard 3 2 6.263398 18.790194 5738 +gurindar 1 2 6.263398 6.263398 6110 +andelectr 1 2 6.263398 6.263398 6200 +birla 1 2 6.263398 6.263398 6239 +pilani 1 2 6.263398 6.263398 6240 +plenti 1 2 6.263398 6.263398 5465 +sustain 1 2 6.263398 6.263398 6201 +needto 1 2 6.263398 6.263398 4927 +andhow 1 2 6.263398 6.263398 5933 +expend 1 2 6.263398 6.263398 5451 +moshovo 1 2 6.263398 6.263398 6211 +inrd 1 2 6.263398 6.263398 4531 +tetra 1 2 6.263398 6.263398 5196 +framemak 2 1 6.957497 13.914994 20090 +graduatesaddress 1 1 6.957497 6.957497 20091 +usasohi 1 1 6.957497 6.957497 20092 +thehighest 1 1 6.957497 6.957497 20093 +circa 1 1 6.957497 6.957497 20094 +transistor 1 1 6.957497 6.957497 20095 +availableon 1 1 6.957497 6.957497 20096 +getth 1 1 6.957497 6.957497 20097 +ofov 1 1 6.957497 6.957497 20098 +thenatur 1 1 6.957497 6.957497 20099 +numericappl 1 1 6.957497 6.957497 20100 +andcarri 1 1 6.957497 6.957497 20101 +assessth 1 1 6.957497 6.957497 20102 +vijaykumarrec 1 1 6.957497 6.957497 20103 +talkswil 1 1 6.957497 6.957497 20104 +researchcent 1 1 6.957497 6.957497 20105 +yorktown 1 1 6.957497 6.957497 20106 +publicationshigh 1 1 6.957497 6.957497 20107 +ofdetail 1 1 6.957497 6.957497 20108 +resultsi 1 1 6.957497 6.957497 20109 +latencyt 1 1 6.957497 6.957497 20110 +processorsj 1 1 6.957497 6.957497 20111 +referencesm 1 1 6.957497 6.957497 20112 +communicationin 1 1 6.957497 6.957497 20113 +errorst 1 1 6.957497 6.957497 20114 +processorsd 1 1 6.957497 6.957497 20115 +knapsack 1 1 6.957497 6.957497 20116 +componentt 1 1 6.957497 6.957497 20117 +processorst 1 1 6.957497 6.957497 20118 +gradstodd 1 1 6.957497 6.957497 20119 +latencydionisio 1 1 6.957497 6.957497 20120 +setsmanoj 1 1 6.957497 6.957497 20121 +architecturemark 1 1 6.957497 6.957497 20122 +executionsriram 1 1 6.957497 6.957497 20123 +vajapeyam 1 1 6.957497 6.957497 20124 +processormen 1 1 6.957497 6.957497 20125 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html new file mode 100644 index 00000000..3655f0ab --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~solomon^solomon.html @@ -0,0 +1,119 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 3 374 0.693147 2.079441 7 +depart 2 457 0.693147 1.386294 12 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +us 1 329 1.098612 1.098612 16 +softwar 3 220 1.386294 4.158882 30 +languag 2 227 1.386294 2.772588 26 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +oper 1 180 1.609438 1.609438 34 +updat 1 191 1.609438 1.609438 41 +data 3 170 1.791759 5.375277 49 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +develop 2 174 1.791759 3.583518 53 +avail 2 169 1.791759 3.583518 48 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +network 1 168 1.791759 1.791759 61 +implement 1 152 1.791759 1.791759 52 +recent 1 167 1.791759 1.791759 58 +applic 1 170 1.791759 1.791759 56 +object 2 138 1.945910 3.891820 79 +professor 1 137 1.945910 1.945910 76 +support 1 132 1.945910 1.945910 83 +hall 1 146 1.945910 1.945910 65 +lectur 1 135 1.945910 1.945910 73 +note 1 142 1.945910 1.945910 67 +report 3 131 2.079442 6.238326 92 +databas 2 122 2.079442 4.158884 86 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +spring 1 131 2.079442 2.079442 88 +manag 4 114 2.197225 8.788900 125 +version 2 113 2.197225 4.394450 122 +theori 1 111 2.197225 2.197225 127 +intern 1 108 2.197225 2.197225 128 +graphic 1 90 2.397895 2.397895 147 +environ 2 84 2.484907 4.969814 177 +west 1 83 2.484907 2.484907 192 +larg 1 82 2.484907 2.484907 168 +june 2 79 2.564949 5.129898 214 +appear 2 78 2.564949 5.129898 210 +orient 1 80 2.564949 2.564949 205 +april 1 77 2.564949 2.564949 196 +interfac 1 79 2.564949 2.564949 209 +effici 1 73 2.639057 2.639057 233 +free 1 73 2.639057 2.639057 224 +david 1 71 2.639057 2.639057 232 +workshop 1 71 2.639057 2.639057 239 +logic 1 71 2.639057 2.639057 230 +java 1 70 2.708050 2.708050 248 +street 1 63 2.772589 2.772589 293 +septemb 1 65 2.772589 2.772589 274 +room 1 59 2.833213 2.833213 301 +space 1 57 2.890372 2.890372 310 +overview 1 56 2.890372 2.890372 323 +point 1 58 2.890372 2.890372 319 +effect 1 46 3.091042 3.091042 385 +physic 1 47 3.091042 3.091042 377 +mark 2 44 3.135494 6.270988 403 +paul 2 38 3.295837 6.591674 471 +michael 3 35 3.401197 10.203591 514 +independ 1 32 3.465736 3.465736 548 +chair 2 29 3.583519 7.167038 596 +built 1 29 3.583519 3.583519 592 +univ 1 28 3.610918 3.610918 617 +proc 3 26 3.688879 11.066637 649 +todai 1 25 3.737670 3.737670 672 +inth 1 22 3.850148 3.850148 741 +sigmod 2 19 4.007333 8.014666 877 +adam 2 17 4.110874 8.221748 934 +former 1 17 4.110874 4.110874 956 +white 1 17 4.110874 4.110874 951 +fourth 1 16 4.174387 4.174387 999 +dilbert 1 16 4.174387 4.174387 996 +configur 1 15 4.248495 4.248495 1012 +conf 3 13 4.382027 13.146081 1181 +dewitt 1 12 4.465908 4.465908 1270 +nanci 1 12 4.465908 4.465908 1256 +daniel 1 12 4.465908 4.465908 1233 +persist 2 11 4.553877 9.107754 1367 +shore 2 11 4.553877 9.107754 1377 +vldb 1 10 4.653960 4.653960 1470 +franklin 1 10 4.653960 4.653960 1436 +naughton 1 10 4.653960 4.653960 1450 +jeffrei 1 9 4.753590 4.753590 1612 +solomon 8 8 4.875197 39.001576 1716 +carei 2 8 4.875197 9.750394 1781 +ioannidi 1 8 4.875197 4.875197 1714 +goodman 1 7 5.010635 5.010635 1891 +tsatalo 2 5 5.347108 10.694216 2581 +marvin 3 4 5.568345 16.705035 2806 +mcauliff 2 4 5.568345 11.136690 3083 +schuh 1 3 5.857933 5.857933 4014 +gmap 1 2 6.263398 6.263398 6241 +versatil 1 2 6.263398 6.263398 6242 +seth 1 2 6.263398 6.263398 4998 +andmarvin 3 1 6.957497 20.872491 20126 +astech 3 1 6.957497 20.872491 20127 +odyssea 2 1 6.957497 13.914994 20128 +publicationstoward 1 1 6.957497 6.957497 20129 +abstractpostscriptth 1 1 6.957497 6.957497 20130 +andyanni 1 1 6.957497 6.957497 20131 +abstractpostscriptexpand 1 1 6.957497 6.957497 20132 +journalv 1 1 6.957497 6.957497 20133 +abstractpostscriptshor 1 1 6.957497 6.957497 20134 +andmichael 1 1 6.957497 6.957497 20135 +zwillingavail 1 1 6.957497 6.957497 20136 +capitl 1 1 6.957497 6.957497 20137 +photoalbum 1 1 6.957497 6.957497 20138 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html new file mode 100644 index 00000000..e0f83205 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sowmya^sowmya.html @@ -0,0 +1,7 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +wisc 1 242 1.386294 1.386294 33 +welcom 1 122 2.079442 2.079442 99 +sowmya 3 4 5.568345 16.705035 2670 +subramanian 1 2 6.263398 6.263398 5666 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html new file mode 100644 index 00000000..45d303b8 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~ssl^ssl.html @@ -0,0 +1,79 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +inform 1 412 0.693147 0.693147 8 +student 1 343 1.098612 1.098612 19 +us 1 329 1.098612 1.098612 16 +time 1 293 1.098612 1.098612 17 +link 2 247 1.386294 2.772588 24 +wisc 2 242 1.386294 2.772588 33 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +modifi 1 178 1.609438 1.609438 35 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +read 1 154 1.791759 1.791759 47 +relat 1 139 1.945910 1.945910 68 +first 1 140 1.945910 1.945910 71 +welcom 1 122 2.079442 2.079442 99 +person 1 111 2.197225 2.197225 117 +place 1 106 2.197225 2.197225 124 +send 1 114 2.197225 2.197225 109 +book 1 99 2.302585 2.302585 131 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +follow 1 92 2.397895 2.397895 143 +sinc 1 90 2.397895 2.397895 159 +comment 1 93 2.397895 2.397895 146 +stuff 2 87 2.484907 4.969814 171 +info 1 85 2.484907 2.484907 176 +second 1 81 2.484907 2.484907 166 +resum 1 79 2.564949 2.564949 217 +want 1 79 2.564949 2.564949 199 +june 1 79 2.564949 2.564949 214 +html 1 75 2.639057 2.639057 235 +meet 1 72 2.639057 2.639057 229 +java 1 70 2.708050 2.708050 248 +juli 1 60 2.833213 2.833213 305 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +finger 1 52 2.995732 2.995732 354 +cool 1 49 3.044522 3.044522 374 +music 1 42 3.218876 3.218876 436 +movi 1 40 3.258097 3.258097 459 +decis 1 23 3.806662 3.806662 728 +love 1 21 3.912023 3.912023 804 +women 1 16 4.174387 4.174387 1004 +ascii 1 15 4.248495 4.248495 1032 +philadelphia 1 12 4.465908 4.465908 1244 +guest 1 12 4.465908 4.465908 1220 +pagei 1 8 4.875197 4.875197 1683 +judg 1 8 4.875197 4.875197 1644 +constitut 1 6 5.164786 5.164786 2026 +sail 1 5 5.347108 5.347108 2571 +panel 1 5 5.347108 5.347108 2463 +vote 1 4 5.568345 5.568345 2953 +lawand 2 2 6.263398 12.526796 6191 +stuffa 1 2 6.263398 6.263398 5999 +resours 1 2 6.263398 6.263398 5211 +serverth 1 2 6.263398 6.263398 4448 +hoofer 1 2 6.263398 6.263398 6101 +shilpa 3 1 6.957497 20.872491 20157 +pastfor 1 1 6.957497 6.957497 20158 +schoolher 1 1 6.957497 6.957497 20159 +syster 1 1 6.957497 6.957497 20160 +madisonsurf 1 1 6.957497 6.957497 20161 +madisonst 1 1 6.957497 6.957497 20162 +clubowl 1 1 6.957497 6.957497 20163 +signatur 1 1 6.957497 6.957497 20164 +lovesnowi 1 1 6.957497 6.957497 20165 +linksher 1 1 6.957497 6.957497 20166 +iswher 1 1 6.957497 6.957497 20167 +tossl 1 1 6.957497 6.957497 20168 +shilpal 1 1 6.957497 6.957497 20169 +thru 1 1 6.957497 6.957497 20170 +formlast 1 1 6.957497 6.957497 20171 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html new file mode 100644 index 00000000..6c4f5cde --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stenglei^stenglei.html @@ -0,0 +1,33 @@ +term, tf, in documents count, idf, tfidf, wordid +home 5 672 0.000000 0.000000 1 +comput 4 775 0.000000 0.000000 2 +scienc 4 640 0.000000 0.000000 4 +page 3 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 2 457 0.693147 1.386294 12 +student 1 343 1.098612 1.098612 19 +offic 1 299 1.098612 1.098612 13 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +phone 1 175 1.791759 1.791759 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +compil 1 122 2.079442 2.079442 96 +teach 1 108 2.197225 2.197225 112 +take 1 97 2.302585 2.302585 134 +section 2 94 2.397895 4.795790 149 +west 1 83 2.484907 2.484907 192 +street 1 63 2.772589 2.772589 293 +sport 1 25 3.737670 3.737670 683 +pageth 1 7 5.010635 5.010635 1939 +jeremi 2 5 5.347108 10.694216 2360 +simpson 1 2 6.263398 6.263398 5994 +stenglein 2 1 6.957497 13.914994 20172 +stenglei 1 1 6.957497 6.957497 20173 +pageespn 1 1 6.957497 6.957497 20174 +hotwir 1 1 6.957497 6.957497 20175 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html new file mode 100644 index 00000000..eabe54d4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~stever^stever.html @@ -0,0 +1,58 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 2 457 0.693147 1.386294 12 +work 1 380 0.693147 0.693147 9 +interest 1 384 0.693147 0.693147 11 +student 1 343 1.098612 1.098612 19 +project 1 340 1.098612 1.098612 18 +graduat 1 215 1.386294 1.386294 31 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +updat 1 191 1.609438 1.609438 41 +wisconsin 2 169 1.791759 3.583518 54 +phone 1 175 1.791759 1.791759 45 +madison 1 165 1.791759 1.791759 55 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +well 1 109 2.197225 2.197225 121 +find 1 111 2.197225 2.197225 111 +west 1 83 2.484907 2.484907 192 +june 1 79 2.564949 2.564949 214 +david 1 71 2.639057 2.639057 232 +free 1 73 2.639057 2.639057 224 +summari 1 73 2.639057 2.639057 237 +plan 1 65 2.772589 2.772589 272 +januari 1 62 2.772589 2.772589 264 +faculti 1 56 2.890372 2.890372 325 +advisor 1 51 2.995732 2.995732 355 +finger 1 52 2.995732 2.995732 354 +mark 1 44 3.135494 3.135494 403 +join 1 39 3.258097 3.258097 457 +streetmadison 1 38 3.295837 3.295837 474 +feel 1 37 3.332205 3.332205 483 +sciencesunivers 1 37 3.332205 3.332205 486 +ofth 1 36 3.367296 3.367296 491 +often 1 31 3.496508 3.496508 551 +steve 1 29 3.583519 3.583519 594 +although 1 25 3.737670 3.737670 667 +hill 1 25 3.737670 3.737670 670 +finish 1 22 3.850148 3.850148 748 +tell 1 21 3.912023 3.912023 777 +wind 1 18 4.060443 4.060443 908 +steven 1 17 4.110874 4.110874 953 +wood 1 11 4.553877 4.553877 1355 +michigan 1 11 4.553877 4.553877 1368 +laru 1 9 4.753590 4.753590 1560 +reinhardt 2 5 5.347108 10.694216 2583 +computerarchitectur 1 5 5.347108 5.347108 2290 +publicationsresearch 1 4 5.568345 5.568345 2876 +eec 1 2 6.263398 6.263398 5981 +tunnelgroup 1 1 6.957497 6.957497 20176 +andjim 1 1 6.957497 6.957497 20177 +mewhat 1 1 6.957497 6.957497 20178 +stever 1 1 6.957497 6.957497 20179 +usalast 1 1 6.957497 6.957497 20180 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html new file mode 100644 index 00000000..02e1c604 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~strik^strik.html @@ -0,0 +1,59 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +home 3 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +inform 2 412 0.693147 1.386294 8 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +depart 1 457 0.693147 0.693147 12 +us 1 329 1.098612 1.098612 16 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +wisconsin 2 169 1.791759 3.583518 54 +madison 1 165 1.791759 1.791759 55 +professor 1 137 1.945910 1.945910 76 +problem 1 147 1.945910 1.945910 75 +assign 1 135 1.945910 1.945910 66 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +analysi 2 124 2.079442 4.158884 98 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +search 1 95 2.397895 2.397895 155 +west 1 83 2.484907 2.484907 192 +exam 1 86 2.484907 2.484907 169 +stuff 1 87 2.484907 2.484907 171 +solv 1 73 2.639057 2.639057 234 +nation 1 74 2.639057 2.639057 240 +januari 1 62 2.772589 2.772589 264 +foundat 1 62 2.772589 2.772589 286 +best 1 59 2.833213 2.833213 299 +point 1 58 2.890372 2.890372 319 +talk 1 53 2.944439 2.944439 336 +numer 2 49 3.044522 6.089044 369 +telephon 1 50 3.044522 3.044522 373 +show 1 43 3.178054 3.178054 417 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +field 1 37 3.332205 3.332205 482 +john 3 33 3.433987 10.301961 532 +begin 1 23 3.806662 3.806662 716 +rate 1 15 4.248495 4.248495 1037 +qualifi 1 8 4.875197 4.875197 1721 +chicago 2 6 5.164786 10.329572 2149 +fluid 1 5 5.347108 5.347108 2440 +kid 1 5 5.347108 5.347108 2516 +nathan 2 4 5.568345 11.136690 2794 +radio 1 4 5.568345 5.568345 3025 +car 1 4 5.568345 5.568345 2931 +drew 1 4 5.568345 5.568345 2980 +museum 1 3 5.857933 5.857933 3933 +pageoth 1 2 6.263398 6.263398 6104 +strikwerda 2 1 6.957497 13.914994 20181 +strikwerdadepart 1 1 6.957497 6.957497 20182 +strik 1 1 6.957497 6.957497 20183 +dynamicsmyoffici 1 1 6.957497 6.957497 20184 +tribun 1 1 6.957497 6.957497 20185 +footballmi 1 1 6.957497 6.957497 20186 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html new file mode 100644 index 00000000..62a70ce9 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~subbarao^subbarao.html @@ -0,0 +1,31 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +interest 2 384 0.693147 1.386294 11 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +thing 1 84 2.484907 2.484907 189 +david 1 71 2.639057 2.639057 232 +evalu 1 64 2.772589 2.772589 266 +processor 1 54 2.944439 2.944439 335 +execut 1 45 3.135494 3.135494 404 +show 1 43 3.178054 3.178054 417 +cach 1 41 3.218876 3.218876 432 +late 1 40 3.258097 3.258097 439 +enjoi 1 26 3.688879 3.688879 660 +cambridg 1 16 4.174387 4.174387 1008 +stream 1 15 4.248495 4.248495 1015 +buffer 1 12 4.465908 4.465908 1211 +calvin 1 9 4.753590 4.753590 1518 +replac 1 8 4.875197 4.875197 1668 +integ 1 8 4.875197 4.875197 1688 +secondari 1 7 5.010635 5.010635 1884 +histor 1 6 5.164786 5.164786 2085 +superscalar 1 6 5.164786 5.164786 2082 +hobb 1 4 5.568345 5.568345 2893 +decoupl 1 4 5.568345 5.568345 2898 +letterman 1 3 5.857933 5.857933 3931 +seinfeld 1 3 5.857933 5.857933 3958 +subba 1 2 6.263398 6.263398 6091 +subbarao 1 2 6.263398 6.263398 6205 +prooocessor 1 1 6.957497 6.957497 20187 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html new file mode 100644 index 00000000..1b456535 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~suhui^suhui.html @@ -0,0 +1,84 @@ +term, tf, in documents count, idf, tfidf, wordid +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +engin 1 297 1.098612 1.098612 20 +last 1 314 1.098612 1.098612 14 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +fall 1 181 1.609438 1.609438 40 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +parallel 3 169 1.791759 5.375277 60 +wisconsin 1 169 1.791759 1.791759 54 +hour 1 165 1.791759 1.791759 46 +applic 1 170 1.791759 1.791759 56 +base 1 165 1.791759 1.791759 50 +network 1 168 1.791759 1.791759 61 +construct 1 139 1.945910 1.945910 82 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +relat 1 139 1.945910 1.945910 68 +schedul 2 119 2.079442 4.158884 85 +send 1 114 2.197225 2.197225 109 +world 1 115 2.197225 2.197225 126 +search 1 95 2.397895 2.397895 155 +resourc 1 81 2.484907 2.484907 172 +wide 1 84 2.484907 2.484907 185 +librari 1 87 2.484907 2.484907 181 +complet 1 77 2.564949 2.564949 208 +dynam 1 76 2.564949 2.564949 194 +april 1 77 2.564949 2.564949 196 +sourc 1 77 2.564949 2.564949 201 +workshop 1 71 2.639057 2.639057 239 +servic 1 72 2.639057 2.639057 236 +polici 1 64 2.772589 2.772589 279 +virtual 1 62 2.772589 2.772589 285 +new 1 64 2.772589 2.772589 262 +guid 1 63 2.772589 2.772589 267 +processor 2 54 2.944439 5.888878 335 +still 1 50 3.044522 3.044522 362 +bibliographi 1 34 3.401197 3.401197 518 +limit 1 29 3.583519 3.583519 585 +measur 1 28 3.610918 3.610918 609 +static 1 27 3.637586 3.637586 619 +proc 1 26 3.688879 3.688879 649 +subject 1 26 3.688879 3.688879 647 +strategi 1 25 3.737670 3.737670 682 +yahoo 1 24 3.761200 3.761200 707 +alloc 1 20 3.951244 3.951244 821 +thur 1 19 4.007333 4.007333 847 +taiwan 3 16 4.174387 12.523161 1006 +sigmetr 1 13 4.382027 4.382027 1173 +conf 1 13 4.382027 4.382027 1181 +mari 2 12 4.465908 8.931816 1266 +characterist 1 12 4.465908 4.465908 1257 +shop 1 10 4.653960 4.653960 1469 +vernon 2 9 4.753590 9.507180 1556 +job 1 8 4.875197 4.875197 1702 +calendar 1 8 4.875197 4.875197 1649 +chiang 2 7 5.010635 10.021270 1853 +quantum 1 6 5.164786 5.164786 2214 +academia 1 6 5.164786 5.164786 2036 +ta 1 4 5.568345 5.568345 3058 +nashvil 1 4 5.568345 5.568345 2867 +sinanet 1 4 5.568345 5.568345 2883 +suhui 1 3 5.857933 5.857933 3430 +educlick 1 3 5.857933 5.857933 3612 +rajesh 1 3 5.857933 5.857933 3511 +conjunct 1 3 5.857933 5.857933 3743 +ipp 1 3 5.857933 5.857933 3381 +sinica 1 3 5.857933 5.857933 3819 +preemption 1 2 6.263398 6.263398 6230 +mansharamani 1 2 6.263398 6.263398 6231 +catalogu 1 2 6.263398 6.263398 6166 +magzin 1 2 6.263398 6.263398 5614 +madisonoffic 1 1 6.957497 6.957497 20188 +stelephon 1 1 6.957497 6.957497 20189 +emailoffic 1 1 6.957497 6.957497 20190 +seednet 1 1 6.957497 6.957497 20191 +vistor 1 1 6.957497 6.957497 20192 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html new file mode 100644 index 00000000..4874e036 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~sundaram^sundaram.html @@ -0,0 +1,193 @@ +term, tf, in documents count, idf, tfidf, wordid +page 8 705 0.000000 0.000000 3 +scienc 3 640 0.000000 0.000000 4 +comput 2 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +interest 3 384 0.693147 2.079441 11 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +current 3 284 1.098612 3.295836 21 +project 1 340 1.098612 1.098612 18 +link 10 247 1.386294 13.862940 24 +also 1 259 1.386294 1.386294 28 +mail 1 238 1.386294 1.386294 22 +class 2 199 1.609438 3.218876 37 +includ 1 208 1.609438 1.609438 42 +public 1 202 1.609438 1.609438 43 +list 1 201 1.609438 1.609438 39 +madison 5 165 1.791759 8.958795 55 +recent 1 167 1.791759 1.791759 58 +hall 1 146 1.945910 1.945910 65 +construct 1 139 1.945910 1.945910 82 +relat 1 139 1.945910 1.945910 68 +note 1 142 1.945910 1.945910 67 +provid 1 121 2.079442 2.079442 94 +site 7 106 2.197225 15.380575 119 +specif 2 106 2.197225 4.394450 106 +manag 1 114 2.197225 2.197225 125 +assist 1 112 2.197225 2.197225 113 +theori 1 111 2.197225 2.197225 127 +take 2 97 2.302585 4.605170 134 +technic 1 100 2.302585 2.302585 140 +homepag 1 93 2.397895 2.397895 148 +call 1 91 2.397895 2.397895 153 +pictur 1 89 2.397895 2.397895 160 +select 1 91 2.397895 2.397895 154 +follow 1 92 2.397895 2.397895 143 +info 3 85 2.484907 7.454721 176 +institut 1 84 2.484907 2.484907 187 +contain 1 81 2.484907 2.484907 174 +sourc 1 77 2.564949 2.564949 201 +david 2 71 2.639057 5.278114 232 +onlin 2 75 2.639057 5.278114 223 +nation 2 74 2.639057 5.278114 240 +addit 1 74 2.639057 2.639057 228 +servic 1 72 2.639057 2.639057 236 +view 2 70 2.708050 5.416100 254 +new 5 64 2.772589 13.862945 262 +artifici 1 63 2.772589 2.772589 280 +laboratori 1 63 2.772589 2.772589 292 +organ 1 65 2.772589 2.772589 265 +foundat 1 62 2.772589 2.772589 286 +written 1 63 2.772589 2.772589 278 +back 1 60 2.833213 2.833213 297 +variou 3 56 2.890372 8.671116 317 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +publish 1 57 2.890372 2.890372 326 +scientif 2 53 2.944439 5.888878 341 +local 1 55 2.944439 2.944439 334 +tabl 1 51 2.995732 2.995732 346 +life 1 50 3.044522 3.044522 375 +friend 1 48 3.044522 3.044522 376 +numer 1 49 3.044522 3.044522 369 +physic 1 47 3.091042 3.091042 377 +math 1 44 3.135494 3.135494 402 +natur 1 44 3.135494 3.135494 406 +show 1 43 3.178054 3.178054 417 +edit 1 42 3.218876 3.218876 418 +review 1 42 3.218876 3.218876 425 +realli 1 40 3.258097 3.258097 444 +late 1 40 3.258097 3.258097 439 +movi 1 40 3.258097 3.258097 459 +hand 1 37 3.332205 3.332205 475 +print 1 34 3.401197 3.401197 503 +articl 3 33 3.433987 10.301961 530 +obtain 1 33 3.433987 3.433987 534 +within 1 33 3.433987 3.433987 525 +india 1 32 3.465736 3.465736 550 +scientist 1 31 3.496508 3.496508 560 +steve 1 29 3.583519 3.583519 594 +weather 4 28 3.610918 14.443672 618 +american 1 27 3.637586 3.637586 634 +great 1 27 3.637586 3.637586 626 +william 1 22 3.850148 3.850148 765 +tell 1 21 3.912023 3.912023 777 +ever 1 19 4.007333 4.007333 872 +hypertext 1 19 4.007333 4.007333 865 +histori 1 19 4.007333 4.007333 853 +thoma 1 18 4.060443 4.060443 901 +regist 1 17 4.110874 4.110874 938 +weekli 1 17 4.110874 4.110874 919 +upon 1 16 4.174387 4.174387 978 +choos 1 16 4.174387 4.174387 964 +brief 1 16 4.174387 4.174387 1001 +dilbert 1 16 4.174387 4.174387 996 +advantag 1 16 4.174387 4.174387 987 +piec 1 15 4.248495 4.248495 1020 +floor 1 14 4.317488 4.317488 1070 +camera 1 14 4.317488 4.317488 1115 +care 1 13 4.382027 4.382027 1177 +wait 1 13 4.382027 4.382027 1168 +reader 1 12 4.465908 4.465908 1246 +outsid 1 12 4.465908 4.465908 1219 +newspap 1 12 4.465908 4.465908 1280 +smart 1 11 4.553877 4.553877 1352 +see 1 11 4.553877 4.553877 1337 +fill 1 11 4.553877 4.553877 1349 +santa 1 10 4.653960 4.653960 1441 +jump 1 9 4.753590 4.753590 1603 +joke 1 8 4.875197 4.875197 1620 +philosoph 2 7 5.010635 10.021270 1904 +channel 2 7 5.010635 10.021270 1836 +dedic 1 7 5.010635 5.010635 1843 +centuri 1 7 5.010635 5.010635 1935 +smaller 1 7 5.010635 5.010635 1874 +tri 1 6 5.164786 5.164786 2166 +strip 1 6 5.164786 5.164786 2203 +somewher 1 6 5.164786 5.164786 2176 +artist 1 6 5.164786 5.164786 2127 +feet 2 5 5.347108 10.694216 2492 +optimist 1 5 5.347108 5.347108 2501 +push 1 4 5.568345 5.568345 2635 +climb 1 4 5.568345 5.568345 2936 +surviv 1 4 5.568345 5.568345 2734 +writer 1 4 5.568345 5.568345 2783 +observatori 1 4 5.568345 5.568345 3070 +ultra 1 4 5.568345 5.568345 2889 +sundaram 2 3 5.857933 11.715866 3463 +astronomi 2 3 5.857933 11.715866 3974 +blind 1 3 5.857933 5.857933 3662 +scream 1 3 5.857933 5.857933 3609 +knee 1 3 5.857933 5.857933 3980 +woman 1 3 5.857933 5.857933 3539 +lunch 1 3 5.857933 5.857933 3369 +beat 1 3 5.857933 5.857933 3840 +packer 1 3 5.857933 5.857933 3728 +dozen 1 3 5.857933 5.857933 3905 +hindu 1 3 5.857933 5.857933 3590 +stukel 2 2 6.263398 12.526796 4698 +patient 1 2 6.263398 6.263398 6223 +spurt 1 2 6.263398 6.263398 5464 +crawl 1 2 6.263398 6.263398 5561 +damn 1 2 6.263398 6.263398 6129 +nake 1 2 6.263398 6.263398 6197 +cloth 1 2 6.263398 6.263398 5884 +conscious 1 2 6.263398 6.263398 5954 +destruct 1 2 6.263398 6.263398 6232 +phoenix 1 2 6.263398 6.263398 4552 +reward 1 2 6.263398 6.263398 5402 +disinform 1 2 6.263398 6.263398 5494 +trendi 1 2 6.263398 6.263398 4717 +browbeck 2 1 6.957497 13.914994 20193 +effronteri 1 1 6.957497 6.957497 20194 +femor 1 1 6.957497 6.957497 20195 +arteri 1 1 6.957497 6.957497 20196 +blood 1 1 6.957497 6.957497 20197 +anesthetist 1 1 6.957497 6.957497 20198 +groin 1 1 6.957497 6.957497 20199 +hamstr 1 1 6.957497 6.957497 20200 +scalpel 1 1 6.957497 6.957497 20201 +stab 1 1 6.957497 6.957497 20202 +leg 1 1 6.957497 6.957497 20203 +voilet 1 1 6.957497 6.957497 20204 +baboon 1 1 6.957497 6.957497 20205 +wig 1 1 6.957497 6.957497 20206 +pois 1 1 6.957497 6.957497 20207 +stomp 1 1 6.957497 6.957497 20208 +cop 1 1 6.957497 6.957497 20209 +rush 1 1 6.957497 6.957497 20210 +burrough 1 1 6.957497 6.957497 20211 +catapult 1 1 6.957497 6.957497 20212 +mann 1 1 6.957497 6.957497 20213 +wearabl 1 1 6.957497 6.957497 20214 +tierra 1 1 6.957497 6.957497 20215 +arcosanti 1 1 6.957497 6.957497 20216 +arcolog 1 1 6.957497 6.957497 20217 +krishnamurti 1 1 6.957497 6.957497 20218 +harass 1 1 6.957497 6.957497 20219 +factoid 1 1 6.957497 6.957497 20220 +astound 1 1 6.957497 6.957497 20221 +onion 1 1 6.957497 6.957497 20222 +washburn 1 1 6.957497 6.957497 20223 +len 1 1 6.957497 6.957497 20224 +insignific 1 1 6.957497 6.957497 20225 +webweath 1 1 6.957497 6.957497 20226 +timothi 1 1 6.957497 6.957497 20227 +leari 1 1 6.957497 6.957497 20228 +noam 1 1 6.957497 6.957497 20229 +chomski 1 1 6.957497 6.957497 20230 +conspiraci 1 1 6.957497 6.957497 20231 +buri 1 1 6.957497 6.957497 20232 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html new file mode 100644 index 00000000..d8c33b83 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~swanderb^swanderb.html @@ -0,0 +1,10 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +offic 2 299 1.098612 2.197224 13 +hour 1 165 1.791759 1.791759 46 +think 1 57 2.890372 2.890372 314 +mark 1 44 3.135494 3.135494 403 +brian 1 38 3.295837 3.295837 466 +bookmark 1 26 3.688879 3.688879 639 +pagebrian 1 4 5.568345 5.568345 3054 +swander 2 3 5.857933 11.715866 3440 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html new file mode 100644 index 00000000..fb2c95c5 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tamches^tamches.html @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +system 4 443 0.693147 2.772588 6 +research 1 431 0.693147 0.693147 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +offic 1 299 1.098612 1.098612 13 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +link 1 247 1.386294 1.386294 24 +oper 3 180 1.609438 4.828314 34 +paper 3 205 1.609438 4.828314 38 +fall 2 181 1.609438 3.218876 40 +parallel 2 169 1.791759 3.583518 60 +distribut 2 162 1.791759 3.583518 51 +network 2 168 1.791759 3.583518 61 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +perform 4 143 1.945910 7.783640 74 +file 2 132 1.945910 3.891820 70 +area 1 144 1.945910 1.945910 80 +architectur 1 139 1.945910 1.945910 77 +object 1 138 1.945910 1.945910 79 +spring 2 131 2.079442 4.158884 88 +dayton 1 119 2.079442 2.079442 104 +tool 1 117 2.079442 2.079442 93 +postscript 1 131 2.079442 2.079442 90 +analysi 1 124 2.079442 2.079442 98 +version 2 113 2.197225 4.394450 122 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +structur 1 106 2.197225 2.197225 105 +memori 2 101 2.302585 4.605170 139 +peopl 1 96 2.302585 2.302585 132 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +search 1 95 2.397895 2.397895 155 +octob 1 89 2.397895 2.397895 156 +west 1 83 2.484907 2.484907 192 +stuff 1 87 2.484907 2.484907 171 +level 1 87 2.484907 2.484907 180 +resourc 1 81 2.484907 2.484907 172 +april 1 77 2.564949 2.564949 196 +free 1 73 2.639057 2.639057 224 +differ 2 66 2.708050 5.416100 253 +street 1 63 2.772589 2.772589 293 +organ 1 65 2.772589 2.772589 265 +improv 1 62 2.772589 2.772589 289 +share 2 59 2.833213 5.666426 304 +colleg 1 61 2.833213 2.833213 300 +thesi 1 57 2.890372 2.890372 327 +talk 3 53 2.944439 8.833317 336 +instruct 1 53 2.944439 2.944439 332 +cool 1 49 3.044522 3.044522 374 +favorit 1 44 3.135494 3.135494 410 +music 1 42 3.218876 3.218876 436 +concurr 1 34 3.401197 3.401197 501 +multiprocessor 1 28 3.610918 3.610918 605 +enhanc 1 26 3.688879 3.688879 644 +supercomput 1 25 3.737670 3.737670 681 +highli 1 23 3.806662 3.806662 725 +els 1 19 4.007333 4.007333 843 +interconnect 1 17 4.110874 4.110874 937 +countri 1 15 4.248495 4.248495 1059 +fortran 1 15 4.248495 4.248495 1027 +block 1 13 4.382027 4.382027 1183 +wait 1 13 4.382027 4.382027 1168 +asynchron 1 12 4.465908 4.465908 1229 +typic 1 11 4.553877 4.553877 1360 +pose 1 9 4.753590 4.753590 1535 +paradyn 1 9 4.753590 4.753590 1614 +joke 1 8 4.875197 4.875197 1620 +attach 1 7 5.010635 5.010635 1785 +park 1 6 5.164786 5.164786 2218 +risc 1 6 5.164786 5.164786 2016 +matur 1 5 5.347108 5.347108 2269 +hate 1 5 5.347108 5.347108 2529 +sorri 1 4 5.568345 5.568345 3059 +eventu 1 4 5.568345 5.568345 3074 +cleaner 1 3 5.857933 5.857933 3775 +raid 1 3 5.857933 5.857933 4012 +snowboard 4 2 6.263398 25.053592 5084 +angri 1 2 6.263398 6.263398 5088 +greet 1 2 6.263398 6.263398 5903 +dude 1 2 6.263398 6.263398 4977 +callaghan 1 2 6.263398 6.263398 6128 +stripe 1 2 6.263398 6.263398 6106 +tamch 3 1 6.957497 20.872491 20233 +ariel 2 1 6.957497 13.914994 20234 +municip 2 1 6.957497 13.914994 20235 +bond 2 1 6.957497 13.914994 20236 +tamchesari 1 1 6.957497 6.957497 20237 +assistantemail 1 1 6.957497 6.957497 20238 +posei 1 1 6.957497 6.957497 20239 +sresearch 1 1 6.957497 6.957497 20240 +toolsstatu 1 1 6.957497 6.957497 20241 +toolsparallel 1 1 6.957497 6.957497 20242 +systemsbluesth 1 1 6.957497 6.957497 20243 +simpsonsseinfeldskiingskinetkeyston 1 1 6.957497 6.957497 20244 +vacum 1 1 6.957497 6.957497 20245 +dirt 1 1 6.957497 6.957497 20246 +whoa 1 1 6.957497 6.957497 20247 +incom 1 1 6.957497 6.957497 20248 +yahooespncpu 1 1 6.957497 6.957497 20249 +infoskinetoth 1 1 6.957497 6.957497 20250 +exokernel 1 1 6.957497 6.957497 20251 +zebra 1 1 6.957497 6.957497 20252 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html new file mode 100644 index 00000000..4499b360 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tick^tick.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 3 705 0.000000 0.000000 3 +work 2 380 0.693147 1.386294 9 +program 1 374 0.693147 0.693147 7 +last 1 314 1.098612 1.098612 14 +link 1 247 1.386294 1.386294 24 +wisc 1 242 1.386294 1.386294 33 +class 1 199 1.609438 1.609438 37 +like 2 132 1.945910 3.891820 81 +click 2 142 1.945910 3.891820 78 +welcom 1 122 2.079442 2.079442 99 +high 1 130 2.079442 2.079442 101 +look 3 107 2.197225 6.591675 115 +make 1 111 2.197225 2.197225 120 +well 1 109 2.197225 2.197225 121 +person 1 111 2.197225 2.197225 117 +take 2 97 2.302585 4.605170 134 +pictur 5 89 2.397895 11.989475 160 +school 1 84 2.484907 2.484907 188 +academ 1 82 2.484907 2.484907 178 +good 2 77 2.564949 5.129898 200 +know 1 80 2.564949 2.564949 198 +resum 1 79 2.564949 2.564949 217 +name 1 72 2.639057 2.639057 220 +would 1 67 2.708050 2.708050 251 +copi 1 63 2.772589 2.772589 284 +dept 1 64 2.772589 2.772589 291 +import 1 65 2.772589 2.772589 282 +juli 1 60 2.833213 2.833213 305 +think 1 57 2.890372 2.890372 314 +found 1 53 2.944439 2.944439 337 +still 1 50 3.044522 3.044522 362 +basic 1 50 3.044522 3.044522 360 +favorit 1 44 3.135494 3.135494 410 +show 3 43 3.178054 9.534162 417 +music 2 42 3.218876 6.437752 436 +movi 1 40 3.258097 3.258097 459 +least 1 35 3.401197 3.401197 516 +human 1 32 3.465736 3.465736 546 +turn 2 29 3.583519 7.167038 586 +jeff 1 25 3.737670 3.737670 673 +never 1 25 3.737670 3.737670 671 +wish 1 24 3.761200 3.761200 692 +miscellan 1 23 3.806662 3.806662 731 +voic 1 21 3.912023 3.912023 806 +newsgroup 1 21 3.912023 3.912023 783 +separ 1 19 4.007333 4.007333 844 +andrew 1 19 4.007333 4.007333 849 +coupl 1 17 4.110874 4.110874 939 +monitor 1 17 4.110874 4.110874 941 +seek 1 17 4.110874 4.110874 954 +took 1 16 4.174387 4.174387 1010 +sign 1 16 4.174387 4.174387 970 +choos 1 16 4.174387 4.174387 964 +hobbi 1 16 4.174387 4.174387 1009 +doesn 1 15 4.248495 4.248495 1055 +club 1 15 4.248495 4.248495 1058 +someon 2 13 4.382027 8.764054 1128 +song 2 11 4.553877 9.107754 1380 +night 1 11 4.553877 4.553877 1319 +sound 2 9 4.753590 9.507180 1605 +theme 1 8 4.875197 4.875197 1707 +henc 1 7 5.010635 5.010635 1805 +pace 1 6 5.164786 5.164786 2011 +plu 1 6 5.164786 5.164786 2004 +lampert 2 5 5.347108 10.694216 2398 +babi 2 5 5.347108 10.694216 2493 +tick 2 4 5.568345 11.136690 2975 +heard 1 4 5.568345 5.568345 2895 +fire 1 4 5.568345 5.568345 3001 +nota 1 3 5.857933 5.857933 3785 +crow 1 3 5.857933 5.857933 3845 +straight 1 3 5.857933 5.857933 3655 +forward 1 3 5.857933 5.857933 3784 +yearbook 1 2 6.263398 6.263398 6243 +cute 1 2 6.263398 6.263398 5108 +befound 1 2 6.263398 6.263398 5964 +sick 1 2 6.263398 6.263398 5773 +mstk 1 2 6.263398 6.263398 5501 +pagejeff 1 1 6.957497 6.957497 20253 +ricardo 1 1 6.957497 6.957497 20254 +montalban 1 1 6.957497 6.957497 20255 +foron 1 1 6.957497 6.957497 20256 +incrimin 1 1 6.957497 6.957497 20257 +aconvict 1 1 6.957497 6.957497 20258 +lasttim 1 1 6.957497 6.957497 20259 +threaten 1 1 6.957497 6.957497 20260 +intoa 1 1 6.957497 6.957497 20261 +dispens 1 1 6.957497 6.957497 20262 +anautograph 1 1 6.957497 6.957497 20263 +pictureappear 1 1 6.957497 6.957497 20264 +weasel 1 1 6.957497 6.957497 20265 +factswho 1 1 6.957497 6.957497 20266 +relatedwhat 1 1 6.957497 6.957497 20267 +entertainmentbook 1 1 6.957497 6.957497 20268 +subjectsfriendsno 1 1 6.957497 6.957497 20269 +organizationsgroup 1 1 6.957497 6.957497 20270 +inmi 1 1 6.957497 6.957497 20271 +linksugh 1 1 6.957497 6.957497 20272 +servo 1 1 6.957497 6.957497 20273 +eclect 1 1 6.957497 6.957497 20274 +paraphenaliai 1 1 6.957497 6.957497 20275 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html new file mode 100644 index 00000000..003b5023 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tmunson^tmunson.html @@ -0,0 +1,19 @@ +term, tf, in documents count, idf, tfidf, wordid +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +inform 1 412 0.693147 0.693147 8 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +area 1 144 1.945910 1.945910 80 +teach 1 108 2.197225 2.197225 112 +mathemat 1 108 2.197225 2.197225 123 +section 1 94 2.397895 2.397895 149 +sinc 1 90 2.397895 2.397895 159 +contain 1 81 2.484907 2.484907 174 +todd 1 15 4.248495 4.248495 1051 +wealth 1 3 5.857933 5.857933 3353 +plug 1 2 6.263398 6.263398 5167 +tmunson 1 2 6.263398 6.263398 4809 +mathematicalprogram 2 1 6.957497 13.914994 20276 +homepagetodd 1 1 6.957497 6.957497 20277 +homepagein 1 1 6.957497 6.957497 20278 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html new file mode 100644 index 00000000..9d6b8c55 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~toonen^toonen.html @@ -0,0 +1,36 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +offic 1 299 1.098612 1.098612 13 +modifi 1 178 1.609438 1.609438 35 +wisconsin 1 169 1.791759 1.791759 54 +dayton 1 119 2.079442 2.079442 104 +thing 1 84 2.484907 2.484907 189 +life 1 50 3.044522 3.044522 375 +brian 1 38 3.295837 3.295837 466 +streetmadison 1 38 3.295837 3.295837 474 +departmentunivers 1 24 3.761200 3.761200 711 +accept 1 18 4.060443 4.060443 879 +stand 1 18 4.060443 4.060443 891 +chief 1 7 5.010635 5.010635 1829 +ground 1 7 5.010635 5.010635 1955 +pagebrian 1 4 5.568345 5.568345 3054 +bear 1 4 5.568345 5.568345 2651 +civil 1 3 5.857933 5.857933 3908 +medit 1 2 6.263398 6.263398 5777 +creatur 1 2 6.263398 6.263398 6107 +essenc 1 2 6.263398 6.263398 6150 +toonen 1 1 6.957497 6.957497 20279 +cswhatev 1 1 6.957497 6.957497 20280 +seattleth 1 1 6.957497 6.957497 20281 +tipi 1 1 6.957497 6.957497 20282 +itsmean 1 1 6.957497 6.957497 20283 +kinship 1 1 6.957497 6.957497 20284 +acknowledgingun 1 1 6.957497 6.957497 20285 +infus 1 1 6.957497 6.957497 20286 +thetru 1 1 6.957497 6.957497 20287 +luther 1 1 6.957497 6.957497 20288 +oglala 1 1 6.957497 6.957497 20289 +siouxlast 1 1 6.957497 6.957497 20290 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html new file mode 100644 index 00000000..ea7e2ef0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~tsiolis^tsiolis.html @@ -0,0 +1,16 @@ +term, tf, in documents count, idf, tfidf, wordid +page 3 705 0.000000 0.000000 3 +home 2 672 0.000000 0.000000 1 +read 1 154 1.791759 1.791759 47 +site 1 106 2.197225 2.197225 119 +version 1 113 2.197225 2.197225 122 +browser 1 56 2.890372 2.890372 313 +netscap 2 44 3.135494 6.270988 395 +option 1 30 3.555348 3.555348 575 +enhanc 1 26 3.688879 3.688879 644 +latest 1 21 3.912023 3.912023 785 +thano 1 3 5.857933 5.857933 3424 +tsioli 1 3 5.857933 5.857933 3418 +shouldconsid 1 2 6.263398 6.263398 6061 +upgrad 1 1 6.957497 6.957497 20291 +ifthat 1 1 6.957497 6.957497 20292 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html new file mode 100644 index 00000000..bb06ed39 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~turnidge^turnidge.html @@ -0,0 +1,57 @@ +term, tf, in documents count, idf, tfidf, wordid +scienc 1 640 0.000000 0.000000 4 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +modifi 1 178 1.609438 1.609438 35 +madison 4 165 1.791759 7.167036 55 +wisconsin 2 169 1.791759 3.583518 54 +year 1 148 1.945910 1.945910 84 +click 1 142 1.945910 1.945910 78 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +section 1 94 2.397895 2.397895 149 +locat 1 59 2.833213 2.833213 303 +case 1 51 2.995732 2.995732 351 +keep 1 44 3.135494 3.135494 409 +long 1 43 3.178054 3.178054 413 +compani 1 41 3.218876 3.218876 423 +live 1 40 3.258097 3.258097 451 +origin 1 38 3.295837 3.295837 472 +computersci 1 30 3.555348 3.555348 562 +departmentunivers 1 24 3.761200 3.761200 711 +left 1 19 4.007333 4.007333 851 +todd 3 15 4.248495 12.745485 1051 +enough 1 15 4.248495 4.248495 1040 +awai 1 10 4.653960 4.653960 1447 +babylon 1 8 4.875197 4.875197 1731 +hold 1 8 4.875197 4.875197 1645 +judg 1 8 4.875197 4.875197 1644 +sciencesat 1 7 5.010635 5.010635 1968 +ohio 2 5 5.347108 10.694216 2447 +dougla 1 5 5.347108 5.347108 2471 +amus 1 5 5.347108 5.347108 2366 +turnidg 3 4 5.568345 16.705035 2829 +rep 1 4 5.568345 5.568345 3087 +western 1 4 5.568345 5.568345 3062 +kent 1 4 5.568345 5.568345 2744 +evid 1 4 5.568345 5.568345 2768 +shortcut 1 3 5.857933 5.857933 3932 +axi 2 2 6.263398 12.526796 6069 +milton 1 2 6.263398 6.263398 6153 +professorthoma 1 2 6.263398 6.263398 5053 +turnidgeschoolcomput 1 1 6.957497 6.957497 20293 +homemuppet 1 1 6.957497 6.957497 20294 +eyesightright 1 1 6.957497 6.957497 20295 +studyingprogram 1 1 6.957497 6.957497 20296 +mathematicsand 1 1 6.957497 6.957497 20297 +reserveunivers 1 1 6.957497 6.957497 20298 +cleveland 1 1 6.957497 6.957497 20299 +myfamili 1 1 6.957497 6.957497 20300 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html new file mode 100644 index 00000000..716475fc --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~twang^twang.html @@ -0,0 +1,38 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +last 1 314 1.098612 1.098612 14 +graduat 1 215 1.386294 1.386294 31 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +updat 1 191 1.609438 1.609438 41 +madison 2 165 1.791759 3.583518 55 +phone 2 175 1.791759 3.583518 45 +contact 1 153 1.791759 1.791759 59 +wisconsin 1 169 1.791759 1.791759 54 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +number 2 130 2.079442 4.158884 97 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +teach 1 108 2.197225 2.197225 112 +assist 1 112 2.197225 2.197225 113 +sinc 3 90 2.397895 7.193685 159 +june 1 79 2.564949 2.564949 214 +street 1 63 2.772589 2.772589 293 +visit 1 63 2.772589 2.772589 288 +finger 1 52 2.995732 2.995732 354 +visitor 2 49 3.044522 6.089044 371 +wang 3 21 3.912023 11.736069 790 +heavi 1 7 5.010635 5.010635 1841 +bldg 1 4 5.568345 5.568345 2983 +taxiao 3 2 6.263398 18.790194 4806 +twang 1 2 6.263398 6.263398 5730 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html new file mode 100644 index 00000000..570f88d6 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~uri^uri.html @@ -0,0 +1,13 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +wisc 1 242 1.386294 1.386294 33 +start 1 83 2.484907 2.484907 173 +meet 2 72 2.639057 5.278114 229 +window 1 68 2.708050 2.708050 242 +microsoft 1 38 3.295837 3.295837 468 +trek 2 3 5.857933 11.715866 4025 +shaft 2 2 6.263398 12.526796 6186 +pageuri 1 1 6.957497 6.957497 20301 +pageemail 1 1 6.957497 6.957497 20302 +eduinterest 1 1 6.957497 6.957497 20303 +diversionsstart 1 1 6.957497 6.957497 20304 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html new file mode 100644 index 00000000..8869586b --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vganti^vganti.html @@ -0,0 +1,61 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +research 1 431 0.693147 0.693147 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +student 2 343 1.098612 2.197224 19 +time 2 293 1.098612 2.197224 17 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +graduat 2 215 1.386294 2.772588 31 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +madison 4 165 1.791759 7.167036 55 +wisconsin 1 169 1.791759 1.791759 54 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +click 1 142 1.945910 1.945910 78 +databas 2 122 2.079442 4.158884 86 +dayton 1 119 2.079442 2.079442 104 +look 1 107 2.197225 2.197225 115 +homepag 2 93 2.397895 4.795790 148 +real 2 93 2.397895 4.795790 144 +present 1 91 2.397895 2.397895 145 +info 1 85 2.484907 2.484907 176 +educ 1 86 2.484907 2.484907 191 +know 1 80 2.564949 2.564949 198 +want 1 79 2.564949 2.564949 199 +onlin 1 75 2.639057 2.639057 223 +januari 1 62 2.772589 2.772589 264 +undergradu 1 54 2.944439 2.944439 338 +basic 1 50 3.044522 3.044522 360 +past 1 42 3.218876 3.218876 428 +india 3 32 3.465736 10.397208 550 +hope 1 28 3.610918 3.610918 610 +sometim 1 24 3.761200 3.761200 696 +earlier 1 13 4.382027 4.382027 1140 +usaphon 1 9 4.753590 4.753590 1600 +madra 2 8 4.875197 9.750394 1770 +nativ 1 6 5.164786 5.164786 2192 +asha 3 3 5.857933 17.573799 4037 +venkatesh 1 2 6.263398 6.263398 6154 +andhra 1 2 6.263398 6.263398 5571 +pradesh 1 2 6.263398 6.263398 5572 +yearbook 1 2 6.263398 6.263398 6243 +till 1 2 6.263398 6.263398 5850 +btech 1 2 6.263398 6.263398 6123 +genesi 1 2 6.263398 6.263398 6011 +ganti 2 1 6.957497 13.914994 20305 +godav 2 1 6.957497 13.914994 20306 +pagevenkatesh 1 1 6.957497 6.957497 20307 +vganti 1 1 6.957497 6.957497 20308 +studentoffic 1 1 6.957497 6.957497 20309 +kakinada 1 1 6.957497 6.957497 20310 +hostel 1 1 6.957497 6.957497 20311 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html new file mode 100644 index 00000000..a969a2f3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~vijay^vijay.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +depart 2 457 0.693147 1.386294 12 +research 1 431 0.693147 0.693147 10 +work 1 380 0.693147 0.693147 9 +project 2 340 1.098612 2.197224 18 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +design 1 213 1.386294 1.386294 25 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +contact 1 153 1.791759 1.791759 59 +address 1 170 1.791759 1.791759 62 +phone 1 175 1.791759 1.791759 45 +distribut 1 162 1.791759 1.791759 51 +data 1 170 1.791759 1.791759 49 +architectur 3 139 1.945910 5.837730 77 +file 2 132 1.945910 3.891820 70 +compil 3 122 2.079442 6.238326 96 +dayton 1 119 2.079442 2.079442 104 +technolog 1 131 2.079442 2.079442 102 +schedul 1 119 2.079442 2.079442 85 +intern 3 108 2.197225 6.591675 128 +person 1 111 2.197225 2.197225 117 +memori 1 101 2.302585 2.302585 139 +commun 4 95 2.397895 9.591580 157 +educ 1 86 2.484907 2.484907 191 +institut 1 84 2.484907 2.484907 187 +symposium 3 72 2.639057 7.917171 238 +august 1 66 2.708050 2.708050 257 +street 1 63 2.772589 2.772589 293 +processor 3 54 2.944439 8.833317 335 +undergradu 1 54 2.944439 2.944439 338 +profession 1 51 2.995732 2.995732 345 +advisor 1 51 2.995732 2.995732 355 +annual 2 40 3.258097 6.516194 458 +submit 1 39 3.258097 3.258097 440 +go 1 33 3.433987 3.433987 529 +india 1 32 3.465736 3.465736 550 +dissert 1 32 3.465736 3.465736 549 +depend 1 29 3.583519 3.583519 583 +strategi 1 25 3.737670 3.737670 682 +doctor 1 24 3.761200 3.761200 709 +predict 1 19 4.007333 4.007333 855 +regist 6 17 4.110874 24.665244 938 +micro 2 15 4.248495 8.496990 1031 +side 1 15 4.248495 4.248495 1022 +affili 1 13 4.382027 4.382027 1194 +multiscalar 7 8 4.875197 34.126379 1783 +sohi 5 6 5.164786 25.823930 2237 +microarchitectur 2 6 5.164786 10.329572 2238 +guri 1 5 5.347108 5.347108 2578 +vijai 3 4 5.568345 16.705035 2960 +vijaykumar 5 3 5.857933 29.289665 4011 +breach 3 3 5.857933 17.573799 4009 +anatomi 1 3 5.857933 5.857933 4010 +birla 1 2 6.263398 6.263398 6239 +pilani 1 2 6.263398 6.263398 6240 +architecturet 1 1 6.957497 6.957497 20312 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html new file mode 100644 index 00000000..8f08680e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~watrous^watrous.html @@ -0,0 +1,75 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +inform 2 412 0.693147 1.386294 8 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +link 3 247 1.386294 4.158882 24 +wisc 1 242 1.386294 1.386294 33 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +algorithm 1 162 1.791759 1.791759 57 +dayton 1 119 2.079442 2.079442 104 +number 1 130 2.079442 2.079442 97 +confer 1 126 2.079442 2.079442 100 +theori 2 111 2.197225 4.394450 127 +site 1 106 2.197225 2.197225 119 +associ 1 93 2.397895 2.397895 151 +refer 1 78 2.564949 2.564949 203 +laboratori 2 63 2.772589 5.545178 292 +foundat 1 62 2.772589 2.772589 286 +telephon 1 50 3.044522 3.044522 373 +archiv 1 49 3.044522 3.044522 364 +physic 1 47 3.091042 3.091042 377 +theoret 1 39 3.258097 3.258097 446 +streetmadison 1 38 3.295837 3.295837 474 +bibliographi 3 34 3.401197 10.203591 518 +approxim 1 35 3.401197 3.401197 509 +random 1 34 3.401197 3.401197 511 +john 3 33 3.433987 10.301961 532 +proc 1 26 3.688879 3.688879 649 +departmentunivers 1 24 3.761200 3.761200 711 +daili 1 24 3.761200 3.761200 706 +theorem 1 21 3.912023 3.912023 786 +hypertext 1 19 4.007333 4.007333 865 +dimension 1 18 4.060443 4.060443 909 +element 1 18 4.060443 4.060443 895 +stanford 1 17 4.110874 4.110874 955 +fourth 1 16 4.174387 4.174387 999 +polynomi 1 14 4.317488 4.317488 1069 +automata 1 13 4.382027 4.382027 1135 +montreal 1 7 5.010635 5.010635 1961 +quantum 6 6 5.164786 30.988716 2214 +oxford 1 6 5.164786 5.164786 2121 +forecast 1 6 5.164786 5.164786 2171 +cellular 1 5 5.347108 5.347108 2433 +symp 1 5 5.347108 5.347108 2376 +particl 1 5 5.347108 5.347108 2436 +beam 1 5 5.347108 5.347108 2344 +ucla 1 5 5.347108 5.347108 2502 +webster 1 5 5.347108 5.347108 2468 +preprint 1 3 5.857933 5.857933 3481 +quotat 1 3 5.857933 5.857933 3121 +pagejohn 1 2 6.263398 6.263398 5603 +thesauru 1 2 6.263398 6.263398 6238 +isthmu 1 2 6.263398 6.263398 6152 +watrou 5 1 6.957497 34.787485 20313 +artin 1 1 6.957497 6.957497 20314 +whapl 1 1 6.957497 6.957497 20315 +canadiannumb 1 1 6.957497 6.957497 20316 +assort 1 1 6.957497 6.957497 20317 +lanl 1 1 6.957497 6.957497 20318 +hypatia 1 1 6.957497 6.957497 20319 +stylehypertext 1 1 6.957497 6.957497 20320 +interfaceroget 1 1 6.957497 6.957497 20321 +parasol 1 1 6.957497 6.957497 20322 +recordsplayst 1 1 6.957497 6.957497 20323 +linksweath 1 1 6.957497 6.957497 20324 +madisonth 1 1 6.957497 6.957497 20325 +pagemathemat 1 1 6.957497 6.957497 20326 +servermathematician 1 1 6.957497 6.957497 20327 +biographiesgeek 1 1 6.957497 6.957497 20328 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html new file mode 100644 index 00000000..f9d39dfe --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiruc^weiruc.html @@ -0,0 +1,109 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 5 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +depart 1 457 0.693147 0.693147 12 +last 1 314 1.098612 1.098612 14 +email 1 220 1.386294 1.386294 29 +languag 1 227 1.386294 1.386294 26 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +madison 1 165 1.791759 1.791759 55 +send 3 114 2.197225 6.591675 109 +world 2 115 2.197225 4.394450 126 +find 1 111 2.197225 2.197225 111 +peopl 1 96 2.302585 2.302585 132 +part 1 98 2.302585 2.302585 129 +call 1 91 2.397895 2.397895 153 +associ 1 93 2.397895 2.397895 151 +pictur 1 89 2.397895 2.397895 160 +david 1 71 2.639057 2.639057 232 +test 1 66 2.708050 2.708050 252 +guid 1 63 2.772589 2.772589 267 +plai 2 60 2.833213 5.666426 307 +think 1 57 2.890372 2.890372 314 +three 1 54 2.944439 2.944439 330 +suggest 1 53 2.944439 2.944439 331 +date 3 51 2.995732 8.987196 344 +friend 1 48 3.044522 3.044522 376 +possibl 1 47 3.091042 3.091042 378 +physic 1 47 3.091042 3.091042 377 +favorit 1 44 3.135494 3.135494 410 +show 2 43 3.178054 6.356108 417 +around 1 43 3.178054 3.178054 415 +review 1 42 3.218876 3.218876 425 +movi 2 40 3.258097 6.516194 459 +must 1 40 3.258097 3.258097 442 +littl 1 39 3.258097 3.258097 454 +late 1 40 3.258097 3.258097 439 +correct 1 38 3.295837 3.295837 462 +game 1 36 3.367296 3.367296 498 +least 1 35 3.401197 3.401197 516 +richard 1 31 3.496508 3.496508 559 +steve 1 29 3.583519 3.583519 594 +weather 1 28 3.610918 3.610918 618 +great 1 27 3.637586 3.637586 626 +sometim 1 24 3.761200 3.761200 696 +seri 1 24 3.761200 3.761200 708 +wish 1 24 3.761200 3.761200 692 +deal 1 22 3.850148 3.850148 736 +reduc 1 22 3.850148 3.850148 759 +alumni 1 21 3.912023 3.912023 807 +increas 1 20 3.951244 3.951244 829 +longer 1 20 3.951244 3.951244 816 +miss 1 19 4.007333 4.007333 866 +english 1 15 4.248495 4.248495 1033 +manner 1 14 4.317488 4.317488 1074 +someon 1 13 4.382027 4.382027 1128 +entertain 5 12 4.465908 22.329540 1286 +food 3 12 4.465908 13.397724 1285 +amount 1 12 4.465908 4.465908 1208 +player 1 11 4.553877 4.553877 1371 +recit 2 9 4.753590 9.507180 1475 +french 1 9 4.753590 4.753590 1511 +hockei 1 8 4.875197 4.875197 1760 +affect 3 6 5.164786 15.494358 2044 +commit 1 6 5.164786 5.164786 2233 +moder 1 6 5.164786 5.164786 2112 +postcard 1 6 5.164786 5.164786 2181 +forecast 1 6 5.164786 5.164786 2171 +ship 2 5 5.347108 10.694216 2534 +speaker 1 5 5.347108 5.347108 2370 +feet 1 5 5.347108 5.347108 2492 +circumst 1 5 5.347108 5.347108 2283 +peke 1 5 5.347108 5.347108 2539 +cell 1 5 5.347108 5.347108 2274 +lesson 1 5 5.347108 5.347108 2568 +insan 1 3 5.857933 5.857933 4006 +omit 1 3 5.857933 5.857933 3466 +letterman 1 3 5.857933 5.857933 3931 +truck 1 2 6.263398 6.263398 5713 +proportion 1 2 6.263398 6.263398 4091 +behaviour 1 2 6.263398 6.263398 4724 +studio 1 2 6.263398 6.263398 5838 +francais 1 2 6.263398 6.263398 6020 +weiru 1 1 6.957497 6.957497 20329 +eiru 1 1 6.957497 6.957497 20330 +ppppleas 1 1 6.957497 6.957497 20331 +asylum 1 1 6.957497 6.957497 20332 +verbal 1 1 6.957497 6.957497 20333 +cargo 1 1 6.957497 6.957497 20334 +havenos 1 1 6.957497 6.957497 20335 +smell 1 1 6.957497 6.957497 20336 +leder 1 1 6.957497 6.957497 20337 +beoffer 1 1 6.957497 6.957497 20338 +customari 1 1 6.957497 6.957497 20339 +begina 1 1 6.957497 6.957497 20340 +amountof 1 1 6.957497 6.957497 20341 +merest 1 1 6.957497 6.957497 20342 +ofaffect 1 1 6.957497 6.957497 20343 +excruciatingli 1 1 6.957497 6.957497 20344 +atmadison 1 1 6.957497 6.957497 20345 +grei 1 1 6.957497 6.957497 20346 +francai 1 1 6.957497 6.957497 20347 +dictionnairefrancai 1 1 6.957497 6.957497 20348 +anglai 1 1 6.957497 6.957497 20349 +dictionnair 1 1 6.957497 6.957497 20350 +relatif 1 1 6.957497 6.957497 20351 +lafrancophoni 1 1 6.957497 6.957497 20352 +degrammair 1 1 6.957497 6.957497 20353 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html new file mode 100644 index 00000000..955ae752 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~weiz^weiz.html @@ -0,0 +1,83 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +scienc 3 640 0.000000 0.000000 4 +univers 3 571 0.000000 0.000000 5 +home 2 672 0.000000 0.000000 1 +page 2 705 0.000000 0.000000 3 +depart 3 457 0.693147 2.079441 12 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +student 2 343 1.098612 2.197224 19 +us 2 329 1.098612 2.197224 16 +engin 1 297 1.098612 1.098612 20 +softwar 2 220 1.386294 2.772588 30 +graduat 1 215 1.386294 1.386294 31 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +includ 2 208 1.609438 3.218876 42 +oper 1 180 1.609438 1.609438 34 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +develop 1 174 1.791759 1.791759 53 +first 1 140 1.945910 1.945910 71 +year 1 148 1.945910 1.945910 84 +technolog 2 131 2.079442 4.158884 102 +welcom 1 122 2.079442 2.079442 99 +provid 1 121 2.079442 2.079442 94 +manag 3 114 2.197225 6.591675 125 +peopl 2 96 2.302585 4.605170 132 +memori 2 101 2.302585 4.605170 139 +center 1 88 2.397895 2.397895 158 +educ 1 86 2.484907 2.484907 191 +level 1 87 2.484907 2.484907 180 +second 1 81 2.484907 2.484907 166 +state 1 76 2.564949 2.564949 207 +differ 1 66 2.708050 2.708050 253 +written 1 63 2.772589 2.772589 278 +best 1 59 2.833213 2.833213 299 +variou 1 56 2.890372 2.890372 317 +tabl 1 51 2.995732 2.995732 346 +without 1 50 3.044522 3.044522 370 +california 1 46 3.091042 3.091042 388 +china 1 37 3.332205 3.332205 487 +game 1 36 3.367296 3.367296 498 +within 1 33 3.433987 3.433987 525 +platform 2 29 3.583519 7.167038 591 +quot 1 29 3.583519 3.583519 582 +framework 1 28 3.610918 3.610918 606 +mine 1 26 3.688879 3.688879 654 +corpor 2 21 3.912023 7.824046 802 +tenni 1 20 3.951244 3.951244 838 +expert 1 20 3.951244 3.951244 833 +beij 1 19 4.007333 4.007333 876 +ultim 1 17 4.110874 4.110874 943 +jose 2 16 4.174387 8.348774 976 +zhang 1 16 4.174387 4.174387 980 +tsinghua 1 13 4.382027 4.382027 1195 +nasa 1 13 4.382027 4.382027 1188 +solari 1 12 4.465908 4.465908 1238 +card 1 10 4.653960 4.653960 1435 +cheat 1 10 4.653960 4.653960 1395 +puzzl 1 5 5.347108 5.347108 2507 +republ 2 4 5.568345 11.136690 3032 +shanghai 1 4 5.568345 5.568345 2925 +hometown 1 3 5.857933 5.857933 3817 +am 1 3 5.857933 5.857933 3386 +weiz 1 2 6.263398 6.263398 4693 +eduwork 1 2 6.263398 6.263398 5813 +windowsnt 1 2 6.263398 6.263398 5440 +tandem 1 2 6.263398 6.263398 5027 +chinaemail 1 1 6.957497 6.957497 20354 +experiencecontractor 1 1 6.957497 6.957497 20355 +tuxedo 1 1 6.957497 6.957497 20356 +pathwai 1 1 6.957497 6.957497 20357 +sherpa 1 1 6.957497 6.957497 20358 +hobbiesma 1 1 6.957497 6.957497 20359 +jiangbridg 1 1 6.957497 6.957497 20360 +pingpong 1 1 6.957497 6.957497 20361 +joggingth 1 1 6.957497 6.957497 20362 +challengesolv 1 1 6.957497 6.957497 20363 +sweeper 1 1 6.957497 6.957497 20364 +dayth 1 1 6.957497 6.957497 20365 +ackowledgementthi 1 1 6.957497 6.957497 20366 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html new file mode 100644 index 00000000..e3e52be0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wenger^wenger.html @@ -0,0 +1,63 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +page 4 705 0.000000 0.000000 3 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +comput 1 775 0.000000 0.000000 2 +work 2 380 0.693147 1.386294 9 +inform 1 412 0.693147 0.693147 8 +research 1 431 0.693147 0.693147 10 +project 2 340 1.098612 2.197224 18 +email 1 220 1.386294 1.386294 29 +wisc 1 242 1.386294 1.386294 33 +softwar 1 220 1.386294 1.386294 30 +updat 1 191 1.609438 1.609438 41 +madison 3 165 1.791759 5.375277 55 +wisconsin 2 169 1.791759 3.583518 54 +data 2 170 1.791759 3.583518 49 +note 1 142 1.945910 1.945910 67 +construct 1 139 1.945910 1.945910 82 +welcom 1 122 2.079442 2.079442 99 +dayton 1 119 2.079442 2.079442 104 +provid 1 121 2.079442 2.079442 94 +need 1 98 2.302585 2.302585 135 +peopl 1 96 2.302585 2.302585 132 +pictur 1 89 2.397895 2.397895 160 +west 1 83 2.484907 2.484907 192 +come 1 78 2.564949 2.564949 202 +good 1 77 2.564949 2.564949 200 +main 1 67 2.708050 2.708050 256 +explor 1 58 2.890372 2.890372 324 +still 1 50 3.044522 3.044522 362 +telephon 1 50 3.044522 3.044522 373 +streetmadison 1 38 3.295837 3.295837 474 +cluster 1 28 3.610918 3.610918 612 +departmentunivers 1 24 3.761200 3.761200 711 +definit 1 19 4.007333 4.007333 864 +miron 1 14 4.317488 4.317488 1110 +dbm 1 13 4.382027 4.382027 1136 +scan 1 12 4.465908 4.465908 1243 +devis 1 10 4.653960 4.653960 1451 +yanni 1 8 4.875197 4.875197 1713 +ioannidi 1 8 4.875197 4.875197 1714 +wouldn 1 7 5.010635 5.010635 1970 +kent 3 4 5.568345 16.705035 2744 +edufing 1 4 5.568345 5.568345 2713 +wenger 3 3 5.857933 17.573799 4023 +agre 1 3 5.857933 5.857933 4007 +groupuw 1 3 5.857933 5.857933 3934 +preparedfor 1 2 6.263398 6.263398 5886 +workth 1 2 6.263398 6.263398 6137 +andvisu 1 2 6.263398 6.263398 6189 +pothol 1 1 6.957497 6.957497 20367 +wengerassoci 1 1 6.957497 6.957497 20368 +researchercomput 1 1 6.957497 6.957497 20369 +arecod 1 1 6.957497 6.957497 20370 +anddevis 1 1 6.957497 6.957497 20371 +acronym 1 1 6.957497 6.957497 20372 +importantpart 1 1 6.957497 6.957497 20373 +visualizationproduc 1 1 6.957497 6.957497 20374 +livnyraghu 1 1 6.957497 6.957497 20375 +ramakrishnanmor 1 1 6.957497 6.957497 20376 +pagewiscinfo 1 1 6.957497 6.957497 20377 +personallinksimageslast 1 1 6.957497 6.957497 20378 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ new file mode 100644 index 00000000..f7a7d1b3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~wwt^ @@ -0,0 +1,118 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +page 2 705 0.000000 0.000000 3 +home 1 672 0.000000 0.000000 1 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +program 1 374 0.693147 0.693147 7 +work 1 380 0.693147 0.693147 9 +inform 1 412 0.693147 0.693147 8 +project 5 340 1.098612 5.493060 18 +last 1 314 1.098612 1.098612 14 +softwar 2 220 1.386294 2.772588 30 +languag 1 227 1.386294 1.386294 26 +wisc 1 242 1.386294 1.386294 33 +paper 1 205 1.609438 1.609438 38 +group 1 183 1.609438 1.609438 36 +updat 1 191 1.609438 1.609438 41 +wisconsin 7 169 1.791759 12.542313 54 +implement 2 152 1.791759 3.583518 52 +parallel 1 169 1.791759 1.791759 60 +address 1 170 1.791759 1.791759 62 +data 1 170 1.791759 1.791759 49 +develop 1 174 1.791759 1.791759 53 +recent 1 167 1.791759 1.791759 58 +like 2 132 1.945910 3.891820 81 +architectur 2 139 1.945910 3.891820 77 +support 1 132 1.945910 1.945910 83 +first 1 140 1.945910 1.945910 71 +perform 1 143 1.945910 1.945910 74 +relat 1 139 1.945910 1.945910 68 +compil 2 122 2.079442 4.158884 96 +high 1 130 2.079442 2.079442 101 +provid 1 121 2.079442 2.079442 94 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +world 1 115 2.197225 2.197225 126 +memori 3 101 2.302585 6.907755 139 +technic 1 100 2.302585 2.302585 140 +call 1 91 2.397895 2.397895 153 +level 2 87 2.484907 4.969814 180 +librari 1 87 2.484907 2.484907 181 +novemb 1 81 2.484907 2.484907 179 +wide 1 84 2.484907 2.484907 185 +interfac 3 79 2.564949 7.694847 209 +refer 1 78 2.564949 2.564949 203 +messag 1 76 2.564949 2.564949 212 +complet 1 77 2.564949 2.564949 208 +sourc 1 77 2.564949 2.564949 201 +logic 1 71 2.639057 2.639057 230 +name 1 72 2.639057 2.639057 220 +share 3 59 2.833213 8.499639 304 +juli 1 60 2.833213 2.833213 305 +space 1 57 2.890372 2.890372 310 +think 1 57 2.890372 2.890372 314 +overview 1 56 2.890372 2.890372 323 +extens 1 53 2.944439 2.944439 340 +allow 1 53 2.944439 2.944439 333 +talk 1 53 2.944439 2.944439 336 +hardwar 3 51 2.995732 8.987196 350 +week 1 52 2.995732 2.995732 343 +approach 1 48 3.044522 3.044522 366 +adapt 1 46 3.091042 3.091042 387 +mark 1 44 3.135494 3.135494 403 +mechan 1 43 3.178054 3.178054 416 +futur 1 41 3.218876 3.218876 427 +combin 1 42 3.218876 3.218876 421 +programm 1 39 3.258097 3.258097 445 +slide 3 38 3.295837 9.887511 467 +origin 1 38 3.295837 3.295837 472 +bibliographi 1 34 3.401197 3.401197 518 +articl 1 33 3.433987 3.433987 530 +built 1 29 3.583519 3.583519 592 +platform 1 29 3.583519 3.583519 591 +propos 1 28 3.610918 3.610918 602 +pass 1 28 3.610918 3.610918 611 +cluster 1 28 3.610918 3.610918 612 +hill 1 25 3.737670 3.737670 670 +annot 1 21 3.912023 3.912023 775 +fund 1 21 3.912023 3.912023 805 +wind 3 18 4.060443 12.181329 908 +four 1 18 4.060443 4.060443 905 +seek 1 17 4.110874 4.110874 954 +massiv 1 15 4.248495 4.248495 1026 +hybrid 1 15 4.248495 4.248495 1057 +node 1 11 4.553877 4.553877 1326 +fpga 1 10 4.653960 4.653960 1433 +tunnel 3 9 4.753590 14.260770 1615 +paradyn 2 9 4.753590 9.507180 1614 +consensu 1 6 5.164786 5.164786 2080 +tempest 4 5 5.347108 21.388432 2548 +middl 1 5 5.347108 5.347108 2372 +hypothet 1 5 5.347108 5.347108 2474 +departmentat 1 5 5.347108 5.347108 2513 +aboutth 1 4 5.568345 5.568345 2720 +ofworkst 1 4 5.568345 5.568345 2679 +markhil 1 4 5.568345 5.568345 2819 +parallellanguag 1 3 5.857933 5.857933 4026 +evolutionari 1 3 5.857933 5.857933 3898 +uniformli 1 2 6.263398 6.263398 6202 +havebeen 1 2 6.263398 6.263398 5830 +snoop 1 2 6.263398 6.263398 5364 +contributor 1 2 6.263398 6.263398 6214 +projectmost 1 1 6.957497 6.957497 20379 +fromworkst 1 1 6.957497 6.957497 20380 +whichprocess 1 1 6.957497 6.957497 20381 +abovesystem 1 1 6.957497 6.957497 20382 +wascoop 1 1 6.957497 6.957497 20383 +toconvent 1 1 6.957497 6.957497 20384 +revolutionari 1 1 6.957497 6.957497 20385 +andprogram 1 1 6.957497 6.957497 20386 +transparentshar 1 1 6.957497 6.957497 20387 +developingimplement 1 1 6.957497 6.957497 20388 +wisconsincow 1 1 6.957497 6.957497 20389 +cowus 1 1 6.957497 6.957497 20390 +sram 1 1 6.957497 6.957497 20391 +collaboratingwith 1 1 6.957497 6.957497 20392 +overviewand 1 1 6.957497 6.957497 20393 +pageor 1 1 6.957497 6.957497 20394 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html new file mode 100644 index 00000000..b51b4105 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~xuelin^xuelin.html @@ -0,0 +1,86 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +program 1 374 0.693147 0.693147 7 +time 1 293 1.098612 1.098612 17 +base 1 165 1.791759 1.791759 50 +develop 1 174 1.791759 1.791759 53 +first 3 140 1.945910 5.837730 71 +like 1 132 1.945910 1.945910 81 +make 2 111 2.197225 4.394450 120 +person 1 111 2.197225 2.197225 117 +pictur 1 89 2.397895 2.397895 160 +imag 1 91 2.397895 2.397895 161 +thing 1 84 2.484907 2.484907 189 +appear 1 78 2.564949 2.564949 210 +creat 1 63 2.772589 2.772589 277 +back 1 60 2.833213 2.833213 297 +finger 2 52 2.995732 5.991464 354 +give 1 50 3.044522 3.044522 359 +featur 1 46 3.091042 3.091042 386 +could 1 46 3.091042 3.091042 383 +obtain 1 33 3.433987 3.433987 534 +product 1 33 3.433987 3.433987 527 +human 1 32 3.465736 3.465736 546 +anim 1 31 3.496508 3.496508 557 +pass 1 28 3.610918 3.610918 611 +team 1 27 3.637586 3.637586 625 +seri 1 24 3.761200 3.761200 708 +wish 1 24 3.761200 3.761200 692 +instead 1 22 3.850148 3.850148 756 +spend 1 19 4.007333 4.007333 850 +statu 1 18 4.060443 4.060443 885 +seem 1 18 4.060443 4.060443 899 +account 1 18 4.060443 4.060443 882 +whether 1 17 4.110874 4.110874 918 +across 1 16 4.174387 4.174387 974 +charact 1 15 4.248495 4.248495 1028 +save 1 14 4.317488 4.317488 1099 +shown 1 14 4.317488 4.317488 1080 +comic 1 14 4.317488 4.317488 1103 +charl 1 13 4.382027 4.382027 1149 +newspap 1 12 4.465908 4.465908 1280 +remov 1 12 4.465908 4.465908 1225 +magic 1 11 4.553877 4.553877 1358 +pick 1 9 4.753590 4.753590 1498 +didn 1 9 4.753590 4.753590 1563 +hundr 1 9 4.753590 4.753590 1528 +star 2 8 4.875197 9.750394 1717 +film 1 8 4.875197 4.875197 1761 +successfulli 1 7 5.010635 5.010635 1869 +televis 3 6 5.164786 15.494358 2118 +famou 1 6 5.164786 5.164786 2185 +strip 1 6 5.164786 5.164786 2203 +put 1 6 5.164786 5.164786 2017 +chat 1 6 5.164786 5.164786 2128 +keeper 1 5 5.347108 5.347108 2569 +trick 2 4 5.568345 11.136690 2967 +accompani 1 4 5.568345 5.568345 2666 +transmit 1 4 5.568345 5.568345 2835 +somehow 1 4 5.568345 5.568345 2974 +keyboard 1 4 5.568345 5.568345 2970 +rival 1 3 5.857933 5.857933 3583 +agre 1 3 5.857933 5.857933 4007 +advertis 1 3 5.857933 5.857933 3788 +felix 2 2 6.263398 12.526796 5103 +princ 1 2 6.263398 6.263398 4813 +wale 1 2 6.263398 6.263398 4827 +mascot 1 2 6.263398 6.263398 6060 +ear 1 2 6.263398 6.263398 5071 +hairbal 1 2 6.263398 6.263398 6237 +xuelin 1 1 6.957497 6.957497 20395 +otto 1 1 6.957497 6.957497 20396 +messmer 1 1 6.957497 6.957497 20397 +whichwa 1 1 6.957497 6.957497 20398 +chaplin 1 1 6.957497 6.957497 20399 +keaton 1 1 6.957497 6.957497 20400 +polo 1 1 6.957497 6.957497 20401 +lindbergh 1 1 6.957497 6.957497 20402 +theatlant 1 1 6.957497 6.957497 20403 +oneev 1 1 6.957497 6.957497 20404 +teeth 1 1 6.957497 6.957497 20405 +whisker 1 1 6.957497 6.957497 20406 +tail 1 1 6.957497 6.957497 20407 +sui 1 1 6.957497 6.957497 20408 +vritabl 1 1 6.957497 6.957497 20409 +partout 1 1 6.957497 6.957497 20410 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html new file mode 100644 index 00000000..91ee93d3 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yannis^yannis.html @@ -0,0 +1,330 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 4 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +system 13 443 0.693147 9.010911 6 +inform 5 412 0.693147 3.465735 8 +research 2 431 0.693147 1.386294 10 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +current 2 284 1.098612 2.197224 21 +engin 2 297 1.098612 2.197224 20 +time 1 293 1.098612 1.098612 17 +us 1 329 1.098612 1.098612 16 +also 2 259 1.386294 2.772588 28 +gener 2 220 1.386294 2.772588 27 +wisc 1 242 1.386294 1.386294 33 +design 1 213 1.386294 1.386294 25 +languag 1 227 1.386294 1.386294 26 +data 5 170 1.791759 8.958795 49 +parallel 3 169 1.791759 5.375277 60 +algorithm 2 162 1.791759 3.583518 57 +distribut 2 162 1.791759 3.583518 51 +develop 2 174 1.791759 3.583518 53 +base 2 165 1.791759 3.583518 50 +applic 1 170 1.791759 1.791759 56 +address 1 170 1.791759 1.791759 62 +recent 1 167 1.791759 1.791759 58 +implement 1 152 1.791759 1.791759 52 +problem 4 147 1.945910 7.783640 75 +support 2 132 1.945910 3.891820 83 +object 2 138 1.945910 3.891820 79 +area 1 144 1.945910 1.945910 80 +process 1 142 1.945910 1.945910 72 +model 1 145 1.945910 1.945910 69 +file 1 132 1.945910 1.945910 70 +databas 12 122 2.079442 24.953304 86 +confer 8 126 2.079442 16.635536 100 +schedul 3 119 2.079442 6.238326 85 +studi 3 120 2.079442 6.238326 91 +tool 3 117 2.079442 6.238326 93 +number 2 130 2.079442 4.158884 97 +technolog 1 131 2.079442 2.079442 102 +provid 1 121 2.079442 2.079442 94 +intern 13 108 2.197225 28.563925 128 +manag 3 114 2.197225 6.591675 125 +well 2 109 2.197225 4.394450 121 +theori 2 111 2.197225 4.394450 127 +find 1 111 2.197225 2.197225 111 +look 1 107 2.197225 2.197225 115 +user 3 104 2.302585 6.907755 137 +need 2 98 2.302585 4.605170 135 +access 1 102 2.302585 2.302585 136 +take 1 97 2.302585 2.302585 134 +part 1 98 2.302585 2.302585 129 +mani 1 92 2.397895 2.397895 150 +associ 1 93 2.397895 2.397895 151 +imag 1 91 2.397895 2.397895 161 +solut 2 82 2.484907 4.969814 162 +environ 2 84 2.484907 4.969814 177 +ieee 2 86 2.484907 4.969814 190 +larg 1 82 2.484907 2.484907 168 +help 1 83 2.484907 2.484907 175 +contain 1 81 2.484907 2.484907 174 +journal 1 83 2.484907 2.484907 183 +resourc 1 81 2.484907 2.484907 172 +optim 8 79 2.564949 20.519592 197 +issu 5 78 2.564949 12.824745 211 +interfac 3 79 2.564949 7.694847 209 +decemb 3 80 2.564949 7.694847 215 +dynam 2 76 2.564949 5.129898 194 +orient 2 80 2.564949 5.129898 205 +june 1 79 2.564949 2.564949 214 +april 1 77 2.564949 2.564949 196 +symposium 2 72 2.639057 5.278114 238 +workshop 2 71 2.639057 5.278114 239 +involv 1 71 2.639057 2.639057 227 +intellig 1 72 2.639057 2.639057 225 +integr 4 67 2.708050 10.832200 245 +simul 3 66 2.708050 8.124150 255 +august 3 66 2.708050 8.124150 257 +multimedia 2 68 2.708050 5.416100 258 +practic 2 70 2.708050 5.416100 246 +differ 1 66 2.708050 2.708050 253 +knowledg 1 67 2.708050 2.708050 243 +septemb 8 65 2.772589 22.180712 274 +complex 6 64 2.772589 16.635534 269 +result 3 65 2.772589 8.317767 281 +evalu 1 64 2.772589 2.772589 266 +plan 1 65 2.772589 2.772589 272 +experi 1 64 2.772589 2.772589 283 +guid 1 63 2.772589 2.772589 267 +foundat 1 62 2.772589 2.772589 286 +januari 1 62 2.772589 2.772589 264 +improv 1 62 2.772589 2.772589 289 +march 2 61 2.833213 5.666426 295 +juli 1 60 2.833213 2.833213 305 +special 3 56 2.890372 8.671116 320 +sever 2 56 2.890372 5.780744 322 +variou 1 56 2.890372 2.890372 317 +major 1 56 2.890372 2.890372 315 +index 1 56 2.890372 2.890372 309 +scientif 3 53 2.944439 8.833317 341 +februari 1 54 2.944439 2.944439 328 +investig 3 51 2.995732 8.987196 353 +much 1 52 2.995732 2.995732 349 +maintain 1 51 2.995732 2.995732 342 +particular 1 51 2.995732 2.995732 352 +case 1 51 2.995732 2.995732 351 +visual 10 48 3.044522 30.445220 372 +life 1 50 3.044522 3.044522 375 +right 1 48 3.044522 3.044522 363 +format 1 48 3.044522 3.044522 356 +set 1 50 3.044522 3.044522 361 +natur 1 44 3.135494 3.135494 406 +futur 2 41 3.218876 6.437752 427 +edit 1 42 3.218876 3.218876 418 +york 1 41 3.218876 3.218876 435 +transact 4 39 3.258097 13.032388 438 +error 3 40 3.258097 9.774291 449 +join 2 39 3.258097 6.516194 457 +must 1 40 3.258097 3.258097 442 +multipl 1 39 3.258097 3.258097 453 +cost 1 37 3.332205 3.332205 480 +expect 1 37 3.332205 3.332205 484 +especi 1 36 3.367296 3.367296 496 +multi 1 36 3.367296 3.367296 493 +survei 2 35 3.401197 6.802394 513 +random 1 34 3.401197 3.401197 511 +approxim 1 35 3.401197 3.401197 509 +statist 1 35 3.401197 3.401197 521 +queri 13 33 3.433987 44.641831 524 +india 2 32 3.465736 6.931472 550 +independ 1 32 3.465736 3.465736 548 +express 1 32 3.465736 3.465736 540 +scientist 2 31 3.496508 6.993016 560 +compon 1 30 3.555348 3.555348 570 +power 1 30 3.555348 3.555348 573 +graph 1 30 3.555348 3.555348 576 +rang 1 30 3.555348 3.555348 565 +focus 2 29 3.583519 7.167038 584 +limit 1 29 3.583519 3.583519 585 +ask 1 28 3.610918 3.610918 597 +load 1 28 3.610918 3.610918 601 +framework 1 28 3.610918 3.610918 606 +proc 16 26 3.688879 59.022064 649 +altern 3 26 3.688879 11.066637 641 +challeng 1 26 3.688879 3.688879 653 +repres 1 26 3.688879 3.688879 656 +effort 1 26 3.688879 3.688879 652 +valu 2 25 3.737670 7.475340 665 +primari 1 25 3.737670 3.737670 669 +although 1 25 3.737670 3.737670 667 +higher 1 24 3.761200 3.761200 690 +size 3 23 3.806662 11.419986 713 +sequenc 1 23 3.806662 3.806662 734 +displai 1 23 3.806662 3.806662 712 +identifi 3 22 3.850148 11.550444 760 +try 1 22 3.850148 3.850148 764 +properti 1 22 3.850148 3.850148 749 +thu 1 21 3.912023 3.912023 773 +among 1 21 3.912023 3.912023 781 +path 1 21 3.912023 3.912023 778 +flexibl 1 21 3.912023 3.912023 792 +sigmod 4 19 4.007333 16.029332 877 +boston 1 19 4.007333 4.007333 862 +concentr 2 18 4.060443 8.120886 906 +record 1 18 4.060443 4.060443 890 +dimension 1 18 4.060443 4.060443 909 +estim 4 17 4.110874 16.443496 930 +miller 3 17 4.110874 12.332622 949 +attempt 1 17 4.110874 4.110874 917 +former 1 17 4.110874 4.110874 956 +ramakrishnan 5 16 4.174387 20.871935 972 +advantag 1 16 4.174387 4.174387 987 +jose 1 16 4.174387 4.174387 976 +cambridg 1 16 4.174387 4.174387 1008 +livni 6 15 4.248495 25.490970 1053 +transit 1 15 4.248495 4.248495 1046 +heterogen 2 14 4.317488 8.634976 1090 +balanc 2 14 4.317488 8.634976 1112 +attribut 1 14 4.317488 4.317488 1092 +primarili 3 13 4.382027 13.146081 1185 +translat 3 13 4.382027 13.146081 1164 +canada 2 13 4.382027 8.764054 1158 +readi 1 12 4.465908 4.465908 1242 +gupta 1 12 4.465908 4.465908 1241 +extrem 1 11 4.553877 4.553877 1330 +cycl 1 11 4.553877 4.553877 1335 +itali 1 11 4.553877 4.553877 1378 +desktop 2 10 4.653960 9.307920 1445 +tradit 1 10 4.653960 4.653960 1404 +genet 1 10 4.653960 4.653960 1409 +vldb 1 10 4.653960 4.653960 1470 +pose 1 9 4.753590 4.753590 1535 +significantli 1 9 4.753590 4.753590 1508 +mode 1 9 4.753590 4.753590 1492 +latter 1 9 4.753590 4.753590 1522 +conferenceon 1 9 4.753590 4.753590 1595 +incomplet 1 9 4.753590 4.753590 1575 +ioannidi 27 8 4.875197 131.630319 1714 +yanni 2 8 4.875197 9.750394 1713 +bridg 2 8 4.875197 9.750394 1764 +closur 1 8 4.875197 4.875197 1643 +solomon 1 8 4.875197 4.875197 1716 +unifi 1 8 4.875197 4.875197 1774 +databasesystem 1 8 4.875197 4.875197 1617 +aris 2 7 5.010635 10.021270 1924 +bombai 2 7 5.010635 10.021270 1972 +montreal 2 7 5.010635 10.021270 1961 +paramet 1 7 5.010635 5.010635 1796 +parametr 1 7 5.010635 5.010635 1819 +throughout 1 7 5.010635 5.010635 1871 +sweden 1 7 5.010635 5.010635 1885 +predic 1 7 5.010635 5.010635 1806 +serial 1 7 5.010635 5.010635 1975 +schema 6 6 5.164786 30.988716 1988 +eduresearch 1 6 5.164786 5.164786 2205 +divers 1 6 5.164786 5.164786 2232 +greec 1 6 5.164786 5.164786 2208 +tsatalo 2 5 5.347108 10.694216 2581 +travers 1 5 5.347108 5.347108 2363 +frog 1 5 5.347108 5.347108 2479 +desk 1 5 5.347108 5.347108 2297 +minneapoli 1 5 5.347108 5.347108 2480 +england 1 5 5.347108 5.347108 2557 +ireland 2 4 5.568345 11.136690 2853 +algorithmsfor 1 4 5.568345 5.568345 2748 +multimediasystem 1 4 5.568345 5.568345 2701 +forparallel 1 4 5.568345 5.568345 2703 +customiz 1 4 5.568345 5.568345 2966 +chile 1 4 5.568345 5.568345 3082 +histogram 6 3 5.857933 35.147598 3490 +propag 2 3 5.857933 11.715866 3997 +disciplin 2 3 5.857933 11.715866 3392 +metaphor 2 3 5.857933 11.715866 4038 +inadequ 1 3 5.857933 5.857933 3730 +andsemant 1 3 5.857933 5.857933 3246 +microscop 1 3 5.857933 5.857933 4035 +publicationsi 1 3 5.857933 5.857933 3827 +conjunct 1 3 5.857933 5.857933 3743 +stockholm 1 3 5.857933 5.857933 3715 +zurich 1 3 5.857933 5.857933 3550 +switzerland 1 3 5.857933 5.857933 3551 +santiago 1 3 5.857933 5.857933 4013 +poosala 4 2 6.263398 25.053592 6228 +queryoptim 2 2 6.263398 12.526796 4057 +garofalaki 2 2 6.263398 12.526796 6209 +dublin 2 2 6.263398 12.526796 4883 +interestsdatabas 1 2 6.263398 6.263398 6116 +andinform 1 2 6.263398 6.263398 5550 +scientificdata 1 2 6.263398 6.263398 6067 +algorithmsa 1 2 6.263398 6.263398 4487 +anneal 1 2 6.263398 6.263398 4136 +basedperform 1 2 6.263398 6.263398 6055 +spectroscopi 1 2 6.263398 6.263398 6206 +anniversari 1 2 6.263398 6.263398 4945 +performanceevalu 1 2 6.263398 6.263398 6052 +bermuda 1 2 6.263398 6.263398 5907 +turtl 1 2 6.263398 6.263398 4235 +haa 1 2 6.263398 6.263398 6115 +gmap 1 2 6.263398 6.263398 6241 +versatil 1 2 6.263398 6.263398 6242 +haber 4 1 6.957497 27.829988 20411 +vldbconfer 4 1 6.957497 27.829988 20412 +tod 3 1 6.957497 20.872491 20413 +ofheterogen 2 1 6.957497 13.914994 20414 +ondatabas 2 1 6.957497 13.914994 20415 +opossum 2 1 6.957497 13.914994 20416 +ioannidisyanni 1 1 6.957497 6.957497 20417 +toqueri 1 1 6.957497 6.957497 20418 +thanin 1 1 6.957497 6.957497 20419 +highera 1 1 6.957497 6.957497 20420 +tooptim 1 1 6.957497 6.957497 20421 +querywil 1 1 6.957497 6.957497 20422 +optimum 1 1 6.957497 6.957497 20423 +viabl 1 1 6.957497 6.957497 20424 +propertiesof 1 1 6.957497 6.957497 20425 +especiallythos 1 1 6.957497 6.957497 20426 +alsopart 1 1 6.957497 6.957497 20427 +appropriateinform 1 1 6.957497 6.957497 20428 +thepropag 1 1 6.957497 6.957497 20429 +ofoptim 1 1 6.957497 6.957497 20430 +inrel 1 1 6.957497 6.957497 20431 +manyexperi 1 1 6.957497 6.957497 20432 +aspectsthat 1 1 6.957497 6.957497 20433 +managementenviron 1 1 6.957497 6.957497 20434 +theirexperiment 1 1 6.957497 6.957497 20435 +arefor 1 1 6.957497 6.957497 20436 +scientistsso 1 1 6.957497 6.957497 20437 +facilitatetransl 1 1 6.957497 6.957497 20438 +experimentalscientif 1 1 6.957497 6.957497 20439 +specificproject 1 1 6.957497 6.957497 20440 +plantgrowth 1 1 6.957497 6.957497 20441 +issueon 1 1 6.957497 6.957497 20442 +beyondrel 1 1 6.957497 6.957497 20443 +forschema 1 1 6.957497 6.957497 20444 +tsangari 1 1 6.957497 6.957497 20445 +tkde 1 1 6.957497 6.957497 20446 +christodoulaki 1 1 6.957497 6.957497 20447 +limitingworst 1 1 6.957497 6.957497 20448 +winger 1 1 6.957497 6.957497 20449 +algorithmsbas 1 1 6.957497 6.957497 20450 +databaseestim 1 1 6.957497 6.957497 20451 +ponnekanti 1 1 6.957497 6.957497 20452 +experimentmanag 1 1 6.957497 6.957497 20453 +itsappl 1 1 6.957497 6.957497 20454 +anjur 1 1 6.957497 6.957497 20455 +bridgesbetween 1 1 6.957497 6.957497 20456 +shekita 1 1 6.957497 6.957497 20457 +forselect 1 1 6.957497 6.957497 20458 +internationalacm 1 1 6.957497 6.957497 20459 +layoutat 1 1 6.957497 6.957497 20460 +granular 1 1 6.957497 6.957497 20461 +advancedvisu 1 1 6.957497 6.957497 20462 +gubbio 1 1 6.957497 6.957497 20463 +managementthrough 1 1 6.957497 6.957497 20464 +practicalityfor 1 1 6.957497 6.957497 20465 +sigmodconfer 1 1 6.957497 6.957497 20466 +forphys 1 1 6.957497 6.957497 20467 +dexa 1 1 6.957497 6.957497 20468 +athen 1 1 6.957497 6.957497 20469 +lashkari 1 1 6.957497 6.957497 20470 +theirdisambigu 1 1 6.957497 6.957497 20471 +schemavisu 1 1 6.957497 6.957497 20472 +edbt 1 1 6.957497 6.957497 20473 +internationalvldb 1 1 6.957497 6.957497 20474 +capacityin 1 1 6.957497 6.957497 20475 +wiener 1 1 6.957497 6.957497 20476 +moos 1 1 6.957497 6.957497 20477 +withdata 1 1 6.957497 6.957497 20478 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html new file mode 100644 index 00000000..eb414f0c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yin^yin.html @@ -0,0 +1,4 @@ +term, tf, in documents count, idf, tfidf, wordid +homepag 1 93 2.397895 2.397895 148 +kevin 1 9 4.753590 4.753590 1482 +zhongbin 1 1 6.957497 6.957497 20496 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html new file mode 100644 index 00000000..8fcde93d --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~yinong^yinong.html @@ -0,0 +1,77 @@ +term, tf, in documents count, idf, tfidf, wordid +home 4 672 0.000000 0.000000 1 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +work 1 380 0.693147 0.693147 9 +cours 2 273 1.098612 2.197224 15 +time 2 293 1.098612 2.197224 17 +last 2 314 1.098612 2.197224 14 +offic 1 299 1.098612 1.098612 13 +modifi 1 178 1.609438 1.609438 35 +madison 2 165 1.791759 3.583518 55 +address 1 170 1.791759 1.791759 62 +read 1 154 1.791759 1.791759 47 +support 1 132 1.945910 1.945910 83 +welcom 1 122 2.079442 2.079442 99 +seattl 1 120 2.079442 2.079442 103 +number 1 130 2.079442 2.079442 97 +person 2 111 2.197225 4.394450 117 +pleas 1 113 2.197225 2.197225 114 +look 1 107 2.197225 2.197225 115 +send 1 114 2.197225 2.197225 109 +take 1 97 2.302585 2.302585 134 +access 1 102 2.302585 2.302585 136 +homepag 1 93 2.397895 2.397895 148 +pictur 1 89 2.397895 2.397895 160 +mani 1 92 2.397895 2.397895 150 +comment 1 93 2.397895 2.397895 146 +info 1 85 2.484907 2.484907 176 +good 2 77 2.564949 5.129898 200 +collect 1 65 2.772589 2.772589 268 +organ 1 65 2.772589 2.772589 265 +give 1 50 3.044522 3.044522 359 +telephon 1 50 3.044522 3.044522 373 +visitor 1 49 3.044522 3.044522 371 +press 1 42 3.218876 3.218876 419 +littl 1 39 3.258097 3.258097 454 +especi 1 36 3.367296 3.367296 496 +everi 1 34 3.401197 3.401197 519 +articl 1 33 3.433987 3.433987 530 +travel 1 30 3.555348 3.555348 579 +chines 1 29 3.583519 3.583519 595 +univ 1 28 3.610918 3.610918 617 +comp 1 26 3.688879 3.688879 650 +client 1 25 3.737670 3.737670 679 +sometim 1 24 3.761200 3.761200 696 +alumni 1 21 3.912023 3.912023 807 +wrote 1 20 3.951244 3.951244 830 +stat 1 17 4.110874 4.110874 924 +took 1 16 4.174387 4.174387 1010 +month 1 15 4.248495 4.248495 1025 +trip 1 14 4.317488 4.317488 1113 +employ 1 12 4.465908 4.465908 1291 +classmat 1 9 4.753590 4.753590 1516 +chicago 1 6 5.164786 5.164786 2149 +bldg 1 4 5.568345 5.568345 2983 +amaz 1 4 5.568345 5.568345 2600 +usathi 1 2 6.263398 6.263398 5951 +diari 1 2 6.263398 6.263398 4740 +linksmi 1 2 6.263398 6.263398 6215 +oversea 1 2 6.263398 6.263398 5781 +yinng 1 1 6.957497 6.957497 20479 +pageindexofyinongwei 1 1 6.957497 6.957497 20480 +spagehi 1 1 6.957497 6.957497 20481 +alsolink 1 1 6.957497 6.957497 20482 +inforesumehobbiestravel 1 1 6.957497 6.957497 20483 +pointersr 1 1 6.957497 6.957497 20484 +computingmacin 1 1 6.957497 6.957497 20485 +learningpattern 1 1 6.957497 6.957497 20486 +recognitioncomputatin 1 1 6.957497 6.957497 20487 +geometrydatabasevisionacadem 1 1 6.957497 6.957497 20488 +diarythi 1 1 6.957497 6.957497 20489 +septemberoctobernovemberdecemberjanuaryfebruarymarchaprilrel 1 1 6.957497 6.957497 20490 +beida 1 1 6.957497 6.957497 20491 +classmatespek 1 1 6.957497 6.957497 20492 +ciumi 1 1 6.957497 6.957497 20493 +bookmarkcom 1 1 6.957497 6.957497 20494 +yinong 1 1 6.957497 6.957497 20495 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html new file mode 100644 index 00000000..96c9303e --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zeiden^zeiden.html @@ -0,0 +1,45 @@ +term, tf, in documents count, idf, tfidf, wordid +home 2 672 0.000000 0.000000 1 +inform 1 412 0.693147 0.693147 8 +offic 1 299 1.098612 1.098612 13 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +updat 1 191 1.609438 1.609438 41 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +call 2 91 2.397895 4.795790 153 +room 1 59 2.833213 2.833213 301 +basic 1 50 3.044522 3.044522 360 +give 1 50 3.044522 3.044522 359 +california 1 46 3.091042 3.091042 388 +around 1 43 3.178054 3.178054 415 +strategi 1 25 3.737670 3.737670 682 +voic 1 21 3.912023 3.912023 806 +hous 1 21 3.912023 3.912023 801 +beauti 1 18 4.060443 4.060443 912 +drive 1 15 4.248495 4.248495 1052 +food 2 12 4.465908 8.931816 1285 +poor 2 8 4.875197 9.750394 1736 +matthew 1 6 5.164786 5.164786 2193 +parent 1 6 5.164786 5.164786 2204 +observatori 1 4 5.568345 5.568345 3070 +matt 1 3 5.857933 5.857933 3792 +beach 1 3 5.857933 5.857933 3782 +convuls 2 1 6.957497 13.914994 20497 +sera 2 1 6.957497 13.914994 20498 +pageuntil 1 1 6.957497 6.957497 20499 +zeidenbergcent 1 1 6.957497 6.957497 20500 +gilson 1 1 6.957497 6.957497 20501 +zeiden 1 1 6.957497 6.957497 20502 +eduzeidenb 1 1 6.957497 6.957497 20503 +eduwhen 1 1 6.957497 6.957497 20504 +coho 1 1 6.957497 6.957497 20505 +huntington 1 1 6.957497 6.957497 20506 +breton 1 1 6.957497 6.957497 20507 +nadja 1 1 6.957497 6.957497 20508 +beaut 1 1 6.957497 6.957497 20509 +saint 1 1 6.957497 6.957497 20510 +whyth 1 1 6.957497 6.957497 20511 +communist 1 1 6.957497 6.957497 20512 +helder 1 1 6.957497 6.957497 20513 +camara 1 1 6.957497 6.957497 20514 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html new file mode 100644 index 00000000..9d55caf7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhang^zhang.html @@ -0,0 +1,162 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +home 2 672 0.000000 0.000000 1 +scienc 2 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +research 6 431 0.693147 4.158882 10 +interest 2 384 0.693147 1.386294 11 +system 2 443 0.693147 1.386294 6 +inform 1 412 0.693147 0.693147 8 +depart 1 457 0.693147 0.693147 12 +us 2 329 1.098612 2.197224 16 +student 1 343 1.098612 1.098612 19 +time 1 293 1.098612 1.098612 17 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +design 2 213 1.386294 2.772588 25 +gener 1 220 1.386294 1.386294 27 +mail 1 238 1.386294 1.386294 22 +wisc 1 242 1.386294 1.386294 33 +link 1 247 1.386294 1.386294 24 +public 1 202 1.609438 1.609438 43 +updat 1 191 1.609438 1.609438 41 +data 10 170 1.791759 17.917590 49 +algorithm 3 162 1.791759 5.375277 57 +applic 3 170 1.791759 5.375277 56 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +recent 2 167 1.791759 3.583518 58 +develop 1 174 1.791759 1.791759 53 +implement 1 152 1.791759 1.791759 52 +distribut 1 162 1.791759 1.791759 51 +databas 6 122 2.079442 12.476652 86 +analysi 4 124 2.079442 8.317768 98 +confer 2 126 2.079442 4.158884 100 +report 1 131 2.079442 2.079442 92 +technolog 1 131 2.079442 2.079442 102 +document 1 121 2.079442 2.079442 89 +topic 1 114 2.197225 2.197225 110 +manag 1 114 2.197225 2.197225 125 +find 1 111 2.197225 2.197225 111 +technic 2 100 2.302585 4.605170 140 +need 1 98 2.302585 2.302585 135 +techniqu 1 99 2.302585 2.302585 138 +memori 1 101 2.302585 2.302585 139 +mani 1 92 2.397895 2.397895 150 +imag 1 91 2.397895 2.397895 161 +select 1 91 2.397895 2.397895 154 +larg 8 82 2.484907 19.879256 168 +journal 3 83 2.484907 7.454721 183 +educ 2 86 2.484907 4.969814 191 +resourc 1 81 2.484907 2.484907 172 +method 4 80 2.564949 10.259796 213 +june 3 79 2.564949 7.694847 214 +issu 1 78 2.564949 2.564949 211 +effici 4 73 2.639057 10.556228 233 +intellig 3 72 2.639057 7.917171 225 +symposium 2 72 2.639057 5.278114 238 +appli 1 71 2.639057 2.639057 226 +workshop 1 71 2.639057 2.639057 239 +free 1 73 2.639057 2.639057 224 +integr 1 67 2.708050 2.708050 245 +practic 1 70 2.708050 2.708050 246 +knowledg 1 67 2.708050 2.708050 243 +artifici 3 63 2.772589 8.317767 280 +prof 2 64 2.772589 5.545178 273 +plan 2 65 2.772589 5.545178 272 +dept 1 64 2.772589 2.772589 291 +function 1 62 2.772589 2.772589 275 +import 1 65 2.772589 2.772589 282 +interact 1 62 2.772589 2.772589 270 +organ 1 65 2.772589 2.772589 265 +room 1 59 2.833213 2.833213 301 +juli 1 60 2.833213 2.833213 305 +major 1 56 2.890372 2.890372 315 +thesi 1 57 2.890372 2.890372 327 +space 1 57 2.890372 2.890372 310 +run 1 51 2.995732 2.995732 347 +profession 1 51 2.995732 2.995732 345 +telephon 2 50 3.044522 6.089044 373 +fast 1 42 3.218876 3.218876 429 +submit 1 39 3.258097 3.258097 440 +probabl 1 40 3.258097 3.258097 455 +purpos 1 37 3.332205 3.332205 481 +china 1 37 3.332205 3.332205 487 +robot 5 36 3.367296 16.836480 497 +multi 2 36 3.367296 6.734592 493 +statist 1 35 3.401197 3.401197 521 +eduoffic 1 33 3.433987 3.433987 531 +given 1 32 3.465736 3.465736 538 +domain 1 30 3.555348 3.555348 564 +limit 1 29 3.583519 3.583519 585 +cluster 5 28 3.610918 18.054590 612 +univ 1 28 3.610918 3.610918 617 +manipul 1 27 3.637586 3.637586 624 +mine 5 26 3.688879 18.444395 654 +proc 5 26 3.688879 18.444395 649 +relev 1 26 3.688879 3.688879 637 +accur 1 25 3.737670 3.737670 680 +pattern 2 24 3.761200 7.522400 689 +motion 2 24 3.761200 7.522400 699 +compress 1 23 3.806662 3.806662 719 +recognit 1 23 3.806662 3.806662 723 +mobil 1 23 3.806662 3.806662 730 +identifi 1 22 3.850148 3.850148 760 +cooper 1 22 3.850148 3.850148 757 +divis 1 21 3.912023 3.912023 803 +path 1 21 3.912023 3.912023 778 +kernel 1 20 3.951244 3.951244 825 +sigmod 2 19 4.007333 8.014666 877 +beij 2 19 4.007333 8.014666 876 +region 1 19 4.007333 4.007333 875 +concentr 1 18 4.060443 4.060443 906 +dimension 1 18 4.060443 4.060443 909 +estim 2 17 4.110874 8.221748 930 +zhang 20 16 4.174387 83.487740 980 +ramakrishnan 5 16 4.174387 20.871935 972 +spars 1 16 4.174387 4.174387 989 +young 1 16 4.174387 4.174387 991 +livni 5 15 4.248495 21.242475 1053 +configur 1 15 4.248495 4.248495 1012 +miron 5 14 4.317488 21.587440 1110 +topolog 2 14 4.317488 8.634976 1089 +finit 1 14 4.317488 4.317488 1106 +joint 3 13 4.382027 13.146081 1130 +canada 2 13 4.382027 8.764054 1158 +conf 1 13 4.382027 4.382027 1181 +raghu 5 12 4.465908 22.329540 1212 +grow 1 12 4.465908 4.465908 1209 +amount 1 12 4.465908 4.465908 1208 +overal 1 12 4.465908 4.465908 1254 +branch 1 11 4.553877 4.553877 1318 +discov 1 9 4.753590 4.753590 1562 +classif 1 9 4.753590 4.753590 1586 +manufactur 1 8 4.875197 4.875197 1634 +dataset 4 7 5.010635 20.042540 1914 +densiti 4 7 5.010635 20.042540 1927 +discoveri 1 7 5.010635 5.010635 1915 +trend 1 7 5.010635 5.010635 1842 +dimens 1 7 5.010635 5.010635 1930 +reduct 1 7 5.010635 5.010635 1877 +financi 1 6 5.164786 5.164786 2197 +invest 1 6 5.164786 5.164786 2153 +ling 3 4 5.568345 16.705035 3045 +exploratori 1 4 5.568345 5.568345 3073 +ijcai 1 4 5.568345 5.568345 2901 +tian 10 3 5.857933 58.579330 3680 +birch 4 2 6.263398 25.053592 6136 +ortool 1 2 6.263398 6.263398 4169 +andmanufactur 1 2 6.263398 6.263398 6244 +collis 1 2 6.263398 6.263398 5956 +jianwei 2 1 6.957497 13.914994 20515 +assistantadvisor 1 1 6.957497 6.957497 20516 +compilerminor 1 1 6.957497 6.957497 20517 +bankingoffic 1 1 6.957497 6.957497 20518 +intereststher 1 1 6.957497 6.957497 20519 +territori 1 1 6.957497 6.957497 20520 +densityanalysi 1 1 6.957497 6.957497 20521 +crowd 1 1 6.957497 6.957497 20522 +dataclassif 1 1 6.957497 6.957497 20523 +knowledgediscoveri 1 1 6.957497 6.957497 20524 +dimensionreduct 1 1 6.957497 6.957497 20525 +findpath 1 1 6.957497 6.957497 20526 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html new file mode 100644 index 00000000..5164c6bf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhao^zhao.html @@ -0,0 +1,73 @@ +term, tf, in documents count, idf, tfidf, wordid +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +univers 1 571 0.000000 0.000000 5 +research 3 431 0.693147 2.079441 10 +depart 1 457 0.693147 0.693147 12 +interest 1 384 0.693147 0.693147 11 +engin 1 297 1.098612 1.098612 20 +wisc 1 242 1.386294 1.386294 33 +fall 1 181 1.609438 1.609438 40 +group 1 183 1.609438 1.609438 36 +data 3 170 1.791759 5.375277 49 +madison 2 165 1.791759 3.583518 55 +wisconsin 1 169 1.791759 1.791759 54 +parallel 1 169 1.791759 1.791759 60 +relat 2 139 1.945910 3.891820 68 +object 1 138 1.945910 1.945910 79 +process 1 142 1.945910 1.945910 72 +dayton 1 119 2.079442 2.079442 104 +site 3 106 2.197225 6.591675 119 +assist 1 112 2.197225 2.197225 113 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +west 1 83 2.484907 2.484907 192 +server 3 76 2.564949 7.694847 204 +line 1 75 2.639057 2.639057 231 +new 2 64 2.772589 5.545178 262 +prof 1 64 2.772589 2.772589 273 +detail 1 57 2.890372 2.890372 321 +streetmadison 1 38 3.295837 3.295837 474 +sciencesunivers 1 37 3.332205 3.332205 486 +graph 1 30 3.555348 3.555348 576 +chines 1 29 3.583519 3.583519 595 +mine 2 26 3.688879 7.377758 654 +jeff 1 25 3.737670 3.737670 673 +todai 1 25 3.737670 3.737670 672 +daili 2 24 3.761200 7.522400 706 +yahoo 1 24 3.761200 3.761200 707 +benchmark 1 19 4.007333 4.007333 859 +north 1 19 4.007333 4.007333 873 +sigmod 1 19 4.007333 4.007333 877 +lyco 1 19 4.007333 4.007333 871 +stock 1 16 4.174387 4.174387 1007 +taiwan 1 16 4.174387 4.174387 1006 +club 1 15 4.248495 4.248495 1058 +dbm 2 13 4.382027 8.764054 1136 +excit 1 11 4.553877 4.553877 1329 +surf 1 11 4.553877 4.553877 1301 +naughton 1 10 4.653960 4.653960 1450 +analyt 1 7 5.010635 5.010635 1913 +monei 1 7 5.010635 5.010635 1934 +financi 2 6 5.164786 10.329572 2197 +advis 1 6 5.164786 5.164786 2173 +carolina 1 6 5.164786 5.164786 2142 +maryland 1 6 5.164786 5.164786 2140 +chapel 1 5 5.347108 5.347108 2457 +zhao 2 4 5.568345 11.136690 2699 +ters 1 3 5.857933 5.857933 3297 +pathfind 2 2 6.263398 12.526796 6053 +olap 1 2 6.263398 6.263398 6233 +arbor 1 2 6.263398 6.263398 6235 +molap 1 2 6.263398 6.263398 6217 +yihong 2 1 6.957497 13.914994 20527 +educationb 1 1 6.957497 6.957497 20528 +hillm 1 1 6.957497 6.957497 20529 +wiscosin 1 1 6.957497 6.957497 20530 +datamin 1 1 6.957497 6.957497 20531 +microstrategi 1 1 6.957497 6.957497 20532 +rolap 1 1 6.957497 6.957497 20533 +lombard 1 1 6.957497 6.957497 20534 +kiwi 1 1 6.957497 6.957497 20535 +pgmo 1 1 6.957497 6.957497 20536 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html new file mode 100644 index 00000000..28d3703c --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhe^zhe.html @@ -0,0 +1,20 @@ +term, tf, in documents count, idf, tfidf, wordid +home 3 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +inform 1 412 0.693147 0.693147 8 +offic 2 299 1.098612 2.197224 13 +student 1 343 1.098612 1.098612 19 +wisc 2 242 1.386294 2.772588 33 +email 1 220 1.386294 1.386294 29 +address 3 170 1.791759 5.375277 62 +madison 2 165 1.791759 3.583518 55 +phone 2 175 1.791759 3.583518 45 +construct 1 139 1.945910 1.945910 82 +dayton 1 119 2.079442 2.079442 104 +homepag 1 93 2.397895 2.397895 148 +street 1 63 2.772589 2.772589 293 +still 1 50 3.044522 3.044522 362 +offer 1 43 3.178054 3.178054 414 +wang 1 21 3.912023 3.912023 790 +johnson 1 13 4.382027 4.382027 1162 +zhewang 1 1 6.957497 6.957497 20537 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html new file mode 100644 index 00000000..72c540b0 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zhichen^zhichen.html @@ -0,0 +1,103 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +system 3 443 0.693147 2.079441 6 +research 2 431 0.693147 1.386294 10 +interest 2 384 0.693147 1.386294 11 +program 2 374 0.693147 1.386294 7 +depart 1 457 0.693147 0.693147 12 +work 1 380 0.693147 0.693147 9 +offic 1 299 1.098612 1.098612 13 +project 1 340 1.098612 1.098612 18 +languag 4 227 1.386294 5.545176 26 +softwar 2 220 1.386294 2.772588 30 +link 1 247 1.386294 1.386294 24 +paper 1 205 1.609438 1.609438 38 +public 1 202 1.609438 1.609438 43 +oper 1 180 1.609438 1.609438 34 +parallel 3 169 1.791759 5.375277 60 +distribut 3 162 1.791759 5.375277 51 +recent 2 167 1.791759 3.583518 58 +madison 1 165 1.791759 1.791759 55 +phone 1 175 1.791759 1.791759 45 +network 1 168 1.791759 1.791759 61 +texa 1 160 1.791759 1.791759 64 +develop 1 174 1.791759 1.791759 53 +perform 5 143 1.945910 9.729550 74 +area 3 144 1.945910 5.837730 80 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +object 1 138 1.945910 1.945910 79 +click 1 142 1.945910 1.945910 78 +studi 3 120 2.079442 6.238326 91 +confer 2 126 2.079442 4.158884 100 +high 2 130 2.079442 4.158884 101 +compil 2 122 2.079442 4.158884 96 +dayton 1 119 2.079442 2.079442 104 +machin 1 129 2.079442 2.079442 95 +tool 1 117 2.079442 2.079442 93 +technolog 1 131 2.079442 2.079442 102 +postscript 1 131 2.079442 2.079442 90 +version 2 113 2.197225 4.394450 122 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +place 1 106 2.197225 2.197225 124 +techniqu 2 99 2.302585 4.605170 138 +memori 1 101 2.302585 2.302585 139 +environ 2 84 2.484907 4.969814 177 +issu 1 78 2.564949 2.564949 211 +orient 1 80 2.564949 2.564949 205 +nation 1 74 2.639057 2.639057 240 +html 1 75 2.639057 2.639057 235 +simul 1 66 2.708050 2.708050 255 +java 1 70 2.708050 2.708050 248 +evalu 1 64 2.772589 2.772589 266 +laboratori 1 63 2.772589 2.772589 292 +juli 1 60 2.833213 2.833213 305 +share 1 59 2.833213 2.833213 304 +think 1 57 2.890372 2.890372 314 +publish 1 57 2.890372 2.890372 326 +sever 1 56 2.890372 2.890372 322 +advisor 1 51 2.995732 2.995732 355 +friend 1 48 3.044522 3.044522 376 +press 1 42 3.218876 3.218876 419 +combin 1 42 3.218876 3.218876 421 +workstat 1 37 3.332205 3.332205 479 +field 1 37 3.332205 3.332205 482 +china 1 37 3.332205 3.332205 487 +jame 1 35 3.401197 3.401197 507 +award 1 34 3.401197 3.401197 523 +particip 1 29 3.583519 3.583519 589 +cluster 1 28 3.610918 3.610918 612 +detect 1 26 3.688879 3.688879 646 +supercomput 1 25 3.737670 3.737670 681 +benchmark 1 19 4.007333 4.007333 859 +predict 1 19 4.007333 4.007333 855 +asplo 1 17 4.110874 4.110874 948 +novel 1 15 4.248495 4.248495 1039 +paradyn 1 9 4.753590 4.753590 1614 +tunnel 1 9 4.753590 4.753590 1615 +andth 1 9 4.753590 4.753590 1481 +antonio 1 6 5.164786 5.164786 2186 +barton 1 5 5.347108 5.347108 2371 +ofparallel 1 5 5.347108 5.347108 2380 +departmentat 1 5 5.347108 5.347108 2513 +anddistribut 1 4 5.568345 5.568345 3031 +bottleneck 1 4 5.568345 5.568345 2769 +fudan 2 3 5.857933 11.715866 3707 +blizzard 1 2 6.263398 6.263398 6226 +levelprogram 1 2 6.263398 6.263398 5452 +zhichen 2 1 6.957497 13.914994 20538 +larusprofessor 1 1 6.957497 6.957497 20539 +millerawardbest 1 1 6.957497 6.957497 20540 +eliminateperform 1 1 6.957497 6.957497 20541 +toolwith 1 1 6.957497 6.957497 20542 +wisconsinwind 1 1 6.957497 6.957497 20543 +interestprogram 1 1 6.957497 6.957497 20544 +andimcrement 1 1 6.957497 6.957497 20545 +programjourn 1 1 6.957497 6.957497 20546 +researchchines 1 1 6.957497 6.957497 20547 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html new file mode 100644 index 00000000..52e3a211 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zj^zj.html @@ -0,0 +1,26 @@ +term, tf, in documents count, idf, tfidf, wordid +univers 2 571 0.000000 0.000000 5 +home 1 672 0.000000 0.000000 1 +page 1 705 0.000000 0.000000 3 +comput 1 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +depart 1 457 0.693147 0.693147 12 +wisconsin 2 169 1.791759 3.583518 54 +madison 2 165 1.791759 3.583518 55 +dayton 1 119 2.079442 2.079442 104 +pictur 1 89 2.397895 2.397895 160 +west 1 83 2.484907 2.484907 192 +name 1 72 2.639057 2.639057 220 +street 1 63 2.772589 2.772589 293 +taken 1 31 3.496508 3.496508 555 +chen 1 21 3.912023 3.912023 791 +wang 1 21 3.912023 3.912023 790 +zhang 2 16 4.174387 8.348774 980 +tsinghua 1 13 4.382027 4.382027 1195 +hello 1 10 4.653960 4.653960 1407 +invit 1 10 4.653960 4.653960 1428 +restaur 1 6 5.164786 5.164786 2230 +theth 1 5 5.347108 5.347108 2325 +tong 1 3 5.857933 5.857933 3258 +supper 1 1 6.957497 6.957497 20548 +weihai 1 1 6.957497 6.957497 20549 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html new file mode 100644 index 00000000..a3eb75c7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.cs.wisc.edu^~zmudzin^zmudzin.html @@ -0,0 +1,14 @@ +term, tf, in documents count, idf, tfidf, wordid +inform 2 412 0.693147 1.386294 8 +student 1 343 1.098612 1.098612 19 +number 1 130 2.079442 2.079442 97 +pictur 1 89 2.397895 2.397895 160 +visitor 1 49 3.044522 3.044522 371 +thank 1 23 3.806662 3.806662 721 +stop 1 17 4.110874 4.110874 942 +poland 1 3 5.857933 5.857933 3665 +inc 1 2 6.263398 6.263398 5914 +krzysztof 1 1 6.957497 6.957497 20550 +zmudzinskikrzysztof 1 1 6.957497 6.957497 20551 +zmudzinskispin 1 1 6.957497 6.957497 20552 +pole 1 1 6.957497 6.957497 20553 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html new file mode 100644 index 00000000..372f8b99 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^ece^faculty^saluja_kewal.html @@ -0,0 +1,132 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 3 775 0.000000 0.000000 2 +scienc 1 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +research 3 431 0.693147 2.079441 10 +system 3 443 0.693147 2.079441 6 +interest 1 384 0.693147 0.693147 11 +work 1 380 0.693147 0.693147 9 +program 1 374 0.693147 0.693147 7 +engin 3 297 1.098612 3.295836 20 +us 1 329 1.098612 1.098612 16 +last 1 314 1.098612 1.098612 14 +design 6 213 1.386294 8.317764 25 +gener 3 220 1.386294 4.158882 27 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +oper 2 180 1.609438 3.218876 34 +modifi 1 178 1.609438 1.609438 35 +data 2 170 1.791759 3.583518 49 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +algorithm 1 162 1.791759 1.791759 57 +implement 1 152 1.791759 1.791759 52 +address 1 170 1.791759 1.791759 62 +perform 4 143 1.945910 7.783640 74 +area 3 144 1.945910 5.837730 80 +hall 1 146 1.945910 1.945910 65 +architectur 1 139 1.945910 1.945910 77 +model 1 145 1.945910 1.945910 69 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +analysi 1 124 2.079442 2.079442 98 +tool 1 117 2.079442 2.079442 93 +number 1 130 2.079442 2.079442 97 +make 2 111 2.197225 4.394450 120 +techniqu 1 99 2.302585 2.302585 138 +center 1 88 2.397895 2.397895 158 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +educ 1 86 2.484907 2.484907 191 +help 1 83 2.484907 2.484907 175 +involv 1 71 2.639057 2.639057 227 +effici 1 73 2.639057 2.639057 233 +logic 1 71 2.639057 2.639057 230 +servic 1 72 2.639057 2.639057 236 +test 9 66 2.708050 24.372450 252 +integr 1 67 2.708050 2.708050 245 +simul 1 66 2.708050 2.708050 255 +goal 1 66 2.708050 2.708050 250 +view 1 70 2.708050 2.708050 254 +laboratori 2 63 2.772589 5.545178 292 +dept 1 64 2.772589 2.772589 291 +colleg 2 61 2.833213 5.666426 300 +best 1 59 2.833213 2.833213 299 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +extens 1 53 2.944439 2.944439 340 +investig 3 51 2.995732 8.987196 353 +digit 2 52 2.995732 5.991464 348 +hardwar 1 51 2.995732 2.995732 350 +much 1 52 2.995732 2.995732 349 +tabl 1 51 2.995732 2.995732 346 +anoth 1 45 3.135494 3.135494 408 +fridai 1 44 3.135494 3.135494 390 +combin 1 42 3.218876 3.218876 421 +littl 2 39 3.258097 6.516194 454 +programm 1 39 3.258097 3.258097 445 +continu 1 39 3.258097 3.258097 448 +credit 1 38 3.295837 3.295837 460 +toler 1 33 3.433987 3.433987 533 +fault 2 32 3.465736 6.931472 547 +profil 1 30 3.555348 3.555348 581 +built 4 29 3.583519 14.334076 592 +arrai 1 27 3.637586 3.637586 627 +enhanc 1 26 3.688879 3.688879 644 +reliabl 1 25 3.737670 3.737670 674 +wai 1 25 3.737670 3.737670 662 +compress 2 23 3.806662 7.613324 719 +self 4 22 3.850148 15.400592 761 +vlsi 3 21 3.912023 11.736069 795 +hous 1 21 3.912023 3.912023 801 +facil 1 20 3.951244 3.951244 814 +concentr 1 18 4.060443 4.060443 906 +engineeringunivers 1 17 4.110874 4.110874 959 +modif 1 17 4.110874 4.110874 913 +monitor 1 17 4.110874 4.110874 941 +normal 1 16 4.174387 4.174387 995 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +circuit 4 13 4.382027 17.528108 1131 +carri 1 13 4.382027 4.382027 1152 +station 1 13 4.382027 4.382027 1157 +engr 2 10 4.653960 9.307920 1427 +penalti 1 10 4.653960 4.653960 1405 +iowa 1 7 5.010635 5.010635 1971 +compact 1 7 5.010635 5.010635 1907 +asystem 1 4 5.568345 5.568345 2612 +termin 1 4 5.568345 5.568345 2852 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +saluja 2 3 5.857933 11.715866 3104 +eduportrait 1 3 5.857933 5.857933 4039 +fabric 1 3 5.857933 5.857933 3607 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +testabl 3 2 6.263398 18.790194 5606 +kewal 2 2 6.263398 12.526796 4072 +drivemadison 1 2 6.263398 6.263398 6245 +andsequenti 1 2 6.263398 6.263398 4532 +salujaprofessor 1 1 6.957497 6.957497 20554 +jpgdepartmentselectr 1 1 6.957497 6.957497 20555 +engineeringcomput 1 1 6.957497 6.957497 20556 +interestsdesign 1 1 6.957497 6.957497 20557 +testableand 1 1 6.957497 6.957497 20558 +thisarea 1 1 6.957497 6.957497 20559 +theresearch 1 1 6.957497 6.957497 20560 +testgener 1 1 6.957497 6.957497 20561 +inself 1 1 6.957497 6.957497 20562 +andfault 1 1 6.957497 6.957497 20563 +methodsapplic 1 1 6.957497 6.957497 20564 +testenviron 1 1 6.957497 6.957497 20565 +regularstructur 1 1 6.957497 6.957497 20566 +ram 1 1 6.957497 6.957497 20567 +areinvestig 1 1 6.957497 6.957497 20568 +inhardwar 1 1 6.957497 6.957497 20569 +projectw 1 1 6.957497 6.957497 20570 +thatth 1 1 6.957497 6.957497 20571 +noimpact 1 1 6.957497 6.957497 20572 +digitalsystem 1 1 6.957497 6.957497 20573 +withcolor 1 1 6.957497 6.957497 20574 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html new file mode 100644 index 00000000..e35ed8e4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^duffie_neil.html @@ -0,0 +1,136 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 6 775 0.000000 0.000000 2 +univers 3 571 0.000000 0.000000 5 +page 1 705 0.000000 0.000000 3 +system 14 443 0.693147 9.704058 6 +research 4 431 0.693147 2.772588 10 +work 2 380 0.693147 1.386294 9 +engin 4 297 1.098612 4.394448 20 +time 1 293 1.098612 1.098612 17 +cours 1 273 1.098612 1.098612 15 +last 1 314 1.098612 1.098612 14 +mail 2 238 1.386294 2.772588 22 +wisc 2 242 1.386294 2.772588 33 +oper 1 180 1.609438 1.609438 34 +modifi 1 178 1.609438 1.609438 35 +wisconsin 5 169 1.791759 8.958795 54 +develop 3 174 1.791759 5.375277 53 +distribut 2 162 1.791759 3.583518 51 +madison 1 165 1.791759 1.791759 55 +data 1 170 1.791759 1.791759 49 +base 1 165 1.791759 1.791759 50 +address 1 170 1.791759 1.791759 62 +perform 3 143 1.945910 5.837730 74 +professor 1 137 1.945910 1.945910 76 +architectur 1 139 1.945910 1.945910 77 +construct 1 139 1.945910 1.945910 82 +process 1 142 1.945910 1.945910 72 +support 1 132 1.945910 1.945910 83 +machin 2 129 2.079442 4.158884 95 +high 1 130 2.079442 2.079442 101 +studi 1 120 2.079442 2.079442 91 +schedul 1 119 2.079442 2.079442 85 +theori 1 111 2.197225 2.197225 127 +well 1 109 2.197225 2.197225 121 +teach 1 108 2.197225 2.197225 112 +advanc 1 99 2.302585 2.302585 130 +center 4 88 2.397895 9.591580 158 +real 1 93 2.397895 2.397895 144 +associ 1 93 2.397895 2.397895 151 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +control 8 82 2.484907 19.879256 164 +build 1 85 2.484907 2.484907 184 +larg 1 82 2.484907 2.484907 168 +help 1 83 2.484907 2.484907 175 +optim 1 79 2.564949 2.564949 197 +method 1 80 2.564949 2.564949 213 +involv 1 71 2.639057 2.639057 227 +materi 1 75 2.639057 2.639057 221 +servic 1 72 2.639057 2.639057 236 +tuesdai 1 73 2.639057 2.639057 219 +integr 2 67 2.708050 5.416100 245 +test 1 66 2.708050 2.708050 252 +view 1 70 2.708050 2.708050 254 +guid 1 63 2.772589 2.772589 267 +complex 1 64 2.772589 2.772589 269 +evalu 1 64 2.772589 2.772589 266 +dept 1 64 2.772589 2.772589 291 +colleg 2 61 2.833213 5.666426 300 +automat 1 61 2.833213 2.833213 306 +best 1 59 2.833213 2.833213 299 +space 2 57 2.890372 5.780744 310 +sever 1 56 2.890372 2.890372 322 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +tabl 1 51 2.995732 2.995732 346 +autom 5 41 3.218876 16.094380 434 +author 1 39 3.258097 3.258097 450 +close 1 38 3.295837 3.295837 465 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +cost 1 37 3.332205 3.332205 480 +robot 2 36 3.367296 6.734592 497 +product 2 33 3.433987 6.867974 527 +toler 1 33 3.433987 3.433987 533 +human 2 32 3.465736 6.931472 546 +fault 1 32 3.465736 3.465736 547 +profil 1 30 3.555348 3.555348 581 +hope 1 28 3.610918 3.610918 610 +scale 1 28 3.610918 3.610918 613 +experiment 2 26 3.688879 7.377758 645 +handl 1 24 3.761200 3.761200 685 +highli 1 23 3.806662 3.806662 725 +self 1 22 3.850148 3.850148 761 +finish 1 22 3.850148 3.850148 748 +reduc 1 22 3.850148 3.850148 759 +properti 1 22 3.850148 3.850148 749 +director 1 22 3.850148 3.850148 767 +flexibl 1 21 3.912023 3.912023 792 +fund 1 21 3.912023 3.912023 805 +increas 1 20 3.951244 3.951244 829 +feedback 2 19 4.007333 8.014666 854 +engineeringunivers 1 17 4.110874 4.110874 959 +precis 1 15 4.248495 4.248495 1023 +hierarch 1 15 4.248495 4.248495 1018 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +incorpor 1 13 4.382027 4.382027 1163 +nasa 1 13 4.382027 4.382027 1188 +carri 1 13 4.382027 4.382027 1152 +engr 3 10 4.653960 13.961880 1427 +weld 1 9 4.753590 4.753590 1538 +factor 1 9 4.753590 4.753590 1544 +manufactur 7 8 4.875197 34.126379 1634 +sensor 2 7 5.010635 10.021270 1920 +explain 1 7 5.010635 5.010635 1816 +actuat 1 5 5.347108 5.347108 2442 +neil 2 4 5.568345 11.136690 2841 +fulli 1 4 5.568345 5.568345 2986 +emphas 1 4 5.568345 5.568345 2672 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +eduportrait 1 3 5.857933 5.857933 4039 +aerospac 1 3 5.857933 5.857933 3555 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +duffi 7 2 6.263398 43.843786 4845 +telerobot 2 2 6.263398 12.526796 4847 +drivemadison 1 2 6.263398 6.263398 6245 +departmentsmechan 1 1 6.957497 6.957497 20575 +engineeringeducationb 1 1 6.957497 6.957497 20576 +madisonm 1 1 6.957497 6.957497 20577 +madisonphd 1 1 6.957497 6.957497 20578 +madisonresearch 1 1 6.957497 6.957497 20579 +interestsrobot 1 1 6.957497 6.957497 20580 +micromechanismscent 1 1 6.957497 6.957497 20581 +consortiamanufactur 1 1 6.957497 6.957497 20582 +programwisconsin 1 1 6.957497 6.957497 20583 +roboticsprofessor 1 1 6.957497 6.957497 20584 +inspect 1 1 6.957497 6.957497 20585 +mold 1 1 6.957497 6.957497 20586 +rework 1 1 6.957497 6.957497 20587 +agricultur 1 1 6.957497 6.957497 20588 +tactil 1 1 6.957497 6.957497 20589 +sensori 1 1 6.957497 6.957497 20590 +fatigu 1 1 6.957497 6.957497 20591 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html new file mode 100644 index 00000000..a74edbb7 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.engr.wisc.edu^me^faculty^shapiro_vadim.html @@ -0,0 +1,204 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 9 775 0.000000 0.000000 2 +univers 2 571 0.000000 0.000000 5 +scienc 1 640 0.000000 0.000000 4 +page 1 705 0.000000 0.000000 3 +system 4 443 0.693147 2.772588 6 +research 4 431 0.693147 2.772588 10 +interest 1 384 0.693147 0.693147 11 +inform 1 412 0.693147 0.693147 8 +engin 10 297 1.098612 10.986120 20 +us 2 329 1.098612 2.197224 16 +current 2 284 1.098612 2.197224 21 +project 1 340 1.098612 1.098612 18 +last 1 314 1.098612 1.098612 14 +design 10 213 1.386294 13.862940 25 +wisc 3 242 1.386294 4.158882 33 +mail 2 238 1.386294 2.772588 22 +cornel 2 215 1.386294 2.772588 23 +graduat 1 215 1.386294 1.386294 31 +gener 1 220 1.386294 1.386294 27 +public 1 202 1.609438 1.609438 43 +includ 1 208 1.609438 1.609438 42 +modifi 1 178 1.609438 1.609438 35 +algorithm 2 162 1.791759 3.583518 57 +develop 2 174 1.791759 3.583518 53 +wisconsin 1 169 1.791759 1.791759 54 +madison 1 165 1.791759 1.791759 55 +applic 1 170 1.791759 1.791759 56 +recent 1 167 1.791759 1.791759 58 +address 1 170 1.791759 1.791759 62 +model 14 145 1.945910 27.242740 69 +professor 2 137 1.945910 3.891820 76 +support 2 132 1.945910 3.891820 83 +process 2 142 1.945910 3.891820 72 +analysi 3 124 2.079442 6.238326 98 +studi 1 120 2.079442 2.079442 91 +tool 1 117 2.079442 2.079442 93 +specif 3 106 2.197225 6.591675 106 +assist 1 112 2.197225 2.197225 113 +intern 1 108 2.197225 2.197225 128 +structur 1 106 2.197225 2.197225 105 +make 1 111 2.197225 2.197225 120 +part 4 98 2.302585 9.210340 129 +techniqu 1 99 2.302585 2.302585 138 +need 1 98 2.302585 2.302585 135 +center 2 88 2.397895 4.795790 158 +select 1 91 2.397895 2.397895 154 +real 1 93 2.397895 2.397895 144 +graphic 1 90 2.397895 2.397895 147 +commun 1 95 2.397895 2.397895 157 +present 1 91 2.397895 2.397895 145 +call 1 91 2.397895 2.397895 153 +search 1 95 2.397895 2.397895 155 +comment 1 93 2.397895 2.397895 146 +journal 1 83 2.484907 2.484907 183 +activ 1 84 2.484907 2.484907 182 +contain 1 81 2.484907 2.484907 174 +help 1 83 2.484907 2.484907 175 +april 2 77 2.564949 5.129898 196 +appli 1 71 2.639057 2.639057 226 +servic 1 72 2.639057 2.639057 236 +simul 2 66 2.708050 5.416100 255 +practic 1 70 2.708050 2.708050 246 +thursdai 1 70 2.708050 2.708050 241 +view 1 70 2.708050 2.708050 254 +function 2 62 2.772589 5.545178 275 +foundat 1 62 2.772589 2.772589 286 +januari 1 62 2.772589 2.772589 264 +creat 1 63 2.772589 2.772589 277 +dept 1 64 2.772589 2.772589 291 +colleg 2 61 2.833213 5.666426 300 +simpl 1 60 2.833213 2.833213 298 +best 1 59 2.833213 2.833213 299 +space 1 57 2.890372 2.890372 310 +major 1 56 2.890372 2.890372 315 +index 1 56 2.890372 2.890372 309 +browser 1 56 2.890372 2.890372 313 +suggest 1 53 2.944439 2.944439 331 +investig 2 51 2.995732 5.991464 353 +maintain 1 51 2.995732 2.995732 342 +tabl 1 51 2.995732 2.995732 346 +physic 8 47 3.091042 24.728336 377 +california 1 46 3.091042 3.091042 388 +possibl 1 47 3.091042 3.091042 378 +algebra 1 45 3.135494 3.135494 394 +mechan 10 43 3.178054 31.780540 416 +term 1 43 3.178054 3.178054 411 +http 1 41 3.218876 3.218876 420 +york 1 41 3.218876 3.218876 435 +autom 1 41 3.218876 3.218876 434 +form 2 39 3.258097 6.516194 443 +transact 1 39 3.258097 3.258097 438 +industri 1 38 3.295837 3.295837 464 +credit 1 38 3.295837 3.295837 460 +formal 2 37 3.332205 6.664410 478 +represent 4 35 3.401197 13.604788 512 +award 2 34 3.401197 6.802394 523 +product 1 33 3.433987 3.433987 527 +collabor 1 32 3.465736 3.465736 543 +focu 1 30 3.555348 3.555348 571 +profil 1 30 3.555348 3.555348 581 +manipul 1 27 3.637586 3.637586 624 +repres 1 26 3.688879 3.688879 656 +consist 1 26 3.688879 3.688879 651 +effort 1 26 3.688879 3.688879 652 +reliabl 1 25 3.737670 3.737670 674 +todai 1 25 3.737670 3.737670 672 +aspect 1 25 3.737670 3.737670 663 +fellow 1 24 3.761200 3.761200 701 +famili 1 23 3.806662 3.806662 735 +geometri 2 22 3.850148 7.700296 752 +deal 1 22 3.850148 3.850148 736 +thu 1 21 3.912023 3.912023 773 +basi 1 20 3.951244 3.951244 828 +geometr 6 19 4.007333 24.043998 852 +separ 1 19 4.007333 4.007333 844 +behavior 4 18 4.060443 16.241772 881 +aid 3 18 4.060443 12.181329 904 +engineeringunivers 1 17 4.110874 4.110874 959 +analyz 1 17 4.110874 4.110874 925 +seek 1 17 4.110874 4.110874 954 +novel 1 15 4.248495 4.248495 1039 +photograph 1 15 4.248495 4.248495 1056 +webmast 1 15 4.248495 4.248495 1045 +topolog 1 14 4.317488 4.317488 1089 +convert 1 13 4.382027 4.382027 1122 +cannot 1 13 4.382027 4.382027 1144 +discret 1 13 4.382027 4.382027 1165 +career 1 12 4.465908 4.465908 1287 +captur 1 12 4.465908 4.465908 1232 +abil 1 11 4.553877 4.553877 1341 +engr 2 10 4.653960 9.307920 1427 +decomposit 1 10 4.653960 4.653960 1439 +relationship 1 10 4.653960 4.653960 1383 +facilit 1 10 4.653960 4.653960 1412 +mainten 1 9 4.753590 4.753590 1543 +establish 1 9 4.753590 4.753590 1532 +shapiro 3 8 4.875197 14.625591 1686 +manufactur 3 8 4.875197 14.625591 1634 +combinatori 2 8 4.875197 9.750394 1629 +competit 2 8 4.875197 9.750394 1635 +convers 1 8 4.875197 4.875197 1673 +boundari 1 7 5.010635 5.010635 1929 +appar 1 7 5.010635 5.010635 1958 +ongo 1 6 5.164786 5.164786 2215 +lack 1 6 5.164786 5.164786 1994 +solid 2 5 5.347108 10.694216 2255 +rigid 1 5 5.347108 5.347108 2432 +chain 2 4 5.568345 11.136690 2712 +phenomena 1 4 5.568345 5.568345 2962 +languagesand 1 4 5.568345 5.568345 3071 +fountain 1 4 5.568345 5.568345 3069 +eduupd 1 4 5.568345 5.568345 3056 +systemat 2 3 5.857933 11.715866 3781 +eduportrait 1 3 5.857933 5.857933 4039 +motor 1 3 5.857933 5.857933 3909 +fabric 1 3 5.857933 5.857933 3607 +consortia 1 3 5.857933 5.857933 4040 +cdtthi 1 3 5.857933 5.857933 4041 +artifact 2 2 6.263398 12.526796 5346 +avenuemadison 1 2 6.263398 6.263398 4842 +interestscomput 1 2 6.263398 6.263398 6113 +palmer 1 2 6.263398 6.263398 5453 +methodsand 1 2 6.263398 6.263398 5779 +amajor 1 2 6.263398 6.263398 5343 +designand 1 2 6.263398 6.263398 6100 +andmanufactur 1 2 6.263398 6.263398 6244 +tomanufactur 1 2 6.263398 6.263398 6016 +ofnew 1 2 6.263398 6.263398 5881 +vadim 2 1 6.957497 13.914994 20592 +vshapiro 1 1 6.957497 6.957497 20593 +jpgurl 1 1 6.957497 6.957497 20594 +departmentscomput 1 1 6.957497 6.957497 20595 +sciencemechan 1 1 6.957497 6.957497 20596 +engineeringeducationba 1 1 6.957497 6.957497 20597 +universitym 1 1 6.957497 6.957497 20598 +angelesm 1 1 6.957497 6.957497 20599 +universityphd 1 1 6.957497 6.957497 20600 +univeristyresearch 1 1 6.957497 6.957497 20601 +automationcent 1 1 6.957497 6.957497 20602 +consortiamathemat 1 1 6.957497 6.957497 20603 +programmanufactur 1 1 6.957497 6.957497 20604 +programspati 1 1 6.957497 6.957497 20605 +laboratoryselect 1 1 6.957497 6.957497 20606 +honorsn 1 1 6.957497 6.957497 20607 +vossler 1 1 6.957497 6.957497 20608 +betweengeometri 1 1 6.957497 6.957497 20609 +bemodel 1 1 6.957497 6.957497 20610 +manufacturedbas 1 1 6.957497 6.957497 20611 +ofdistinct 1 1 6.957497 6.957497 20612 +technologicalbarri 1 1 6.957497 6.957497 20613 +undermin 1 1 6.957497 6.957497 20614 +commercialgeometr 1 1 6.957497 6.957497 20615 +eliminatingambigu 1 1 6.957497 6.957497 20616 +ofparametr 1 1 6.957497 6.957497 20617 +bedescrib 1 1 6.957497 6.957497 20618 +interactingprimit 1 1 6.957497 6.957497 20619 +roadblock 1 1 6.957497 6.957497 20620 +withtheoret 1 1 6.957497 6.957497 20621 +smoothintegr 1 1 6.957497 6.957497 20622 +thedesir 1 1 6.957497 6.957497 20623 +tounifi 1 1 6.957497 6.957497 20624 +theseand 1 1 6.957497 6.957497 20625 +physicalobject 1 1 6.957497 6.957497 20626 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ new file mode 100644 index 00000000..4ac150fd --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.ma.utexas.edu^users^bshults^ATP^ @@ -0,0 +1,108 @@ +term, tf, in documents count, idf, tfidf, wordid +comput 2 775 0.000000 0.000000 2 +scienc 2 640 0.000000 0.000000 4 +univers 1 571 0.000000 0.000000 5 +depart 1 457 0.693147 0.693147 12 +system 1 443 0.693147 0.693147 6 +inform 1 412 0.693147 0.693147 8 +student 2 343 1.098612 2.197224 19 +current 1 284 1.098612 1.098612 21 +us 1 329 1.098612 1.098612 16 +also 1 259 1.386294 1.386294 28 +list 4 201 1.609438 6.437752 39 +group 1 183 1.609438 1.609438 36 +texa 1 160 1.791759 1.791759 64 +avail 1 169 1.791759 1.791759 48 +develop 1 174 1.791759 1.791759 53 +contact 1 153 1.791759 1.791759 59 +first 2 140 1.945910 3.891820 71 +problem 1 147 1.945910 1.945910 75 +relat 1 139 1.945910 1.945910 68 +report 2 131 2.079442 4.158884 92 +technolog 1 131 2.079442 2.079442 102 +mathemat 3 108 2.197225 6.591675 123 +site 1 106 2.197225 2.197225 119 +theori 1 111 2.197225 2.197225 127 +part 1 98 2.302585 2.302585 129 +present 1 91 2.397895 2.397895 145 +method 2 80 2.564949 5.129898 213 +want 1 79 2.564949 2.564949 199 +logic 2 71 2.639057 5.278114 230 +appli 1 71 2.639057 2.639057 226 +order 2 69 2.708050 5.416100 249 +knowledg 1 67 2.708050 2.708050 243 +previou 1 62 2.772589 2.772589 290 +improv 1 62 2.772589 2.772589 289 +descript 1 64 2.772589 2.772589 271 +index 1 56 2.890372 2.890372 309 +faculti 1 56 2.890372 2.890372 325 +variou 1 56 2.890372 2.890372 317 +visitor 1 49 3.044522 3.044522 371 +electron 1 47 3.091042 3.091042 379 +done 1 47 3.091042 3.091042 381 +natur 1 44 3.135494 3.135494 406 +autom 2 41 3.218876 6.437752 434 +past 1 42 3.218876 3.218876 428 +continu 1 39 3.258097 3.258097 448 +late 1 40 3.258097 3.258097 439 +tech 3 35 3.401197 10.203591 515 +ad 1 32 3.465736 3.465736 544 +produc 1 30 3.555348 3.555348 572 +computersci 1 30 3.555348 3.555348 562 +robert 1 30 3.555348 3.555348 567 +profil 1 30 3.555348 3.555348 581 +intend 1 28 3.610918 3.610918 599 +higher 1 24 3.761200 3.761200 690 +seri 1 24 3.761200 3.761200 708 +other 1 24 3.761200 3.761200 697 +proof 2 23 3.806662 7.613324 720 +geometri 1 22 3.850148 3.850148 752 +theorem 4 21 3.912023 15.648092 786 +prove 4 19 4.007333 16.029332 848 +feedback 1 19 4.007333 4.007333 854 +primarili 1 13 4.382027 4.382027 1185 +deduct 1 12 4.465908 4.465908 1236 +benjamin 1 11 4.553877 4.553877 1296 +incomplet 4 9 4.753590 19.014360 1575 +ataustin 1 9 4.753590 4.753590 1610 +prover 6 8 4.875197 29.251182 1653 +boyer 1 6 5.164786 5.164786 2013 +inequ 1 6 5.164786 5.164786 2113 +groupth 2 5 5.347108 10.694216 2549 +bledso 2 4 5.568345 11.136690 2999 +systemsand 1 4 5.568345 5.568345 2804 +chou 1 4 5.568345 5.568345 3033 +analog 1 4 5.568345 5.568345 2875 +feng 1 3 5.857933 5.857933 3300 +woodi 2 2 6.263398 12.526796 5459 +hine 2 2 6.263398 12.526796 4475 +intent 1 2 6.263398 6.263398 5768 +herei 1 2 6.263398 6.263398 6187 +hein 2 1 6.957497 13.914994 20627 +borel 2 1 6.957497 13.914994 20628 +groupautom 1 1 6.957497 6.957497 20629 +techreport 1 1 6.957497 6.957497 20630 +reportseri 1 1 6.957497 6.957497 20631 +grouplarri 1 1 6.957497 6.957497 20632 +hinesmarti 1 1 6.957497 6.957497 20633 +mayberrybenjamin 1 1 6.957497 6.957497 20634 +shultsalumniprevi 1 1 6.957497 6.957497 20635 +robertboyerj 1 1 6.957497 6.957497 20636 +strother 1 1 6.957497 6.957497 20637 +moorethi 1 1 6.957497 6.957497 20638 +collaboratorswhat 1 1 6.957497 6.957497 20639 +implyth 1 1 6.957497 6.957497 20640 +proverstrivelarri 1 1 6.957497 6.957497 20641 +struvelarri 1 1 6.957497 6.957497 20642 +proverand 1 1 6.957497 6.957497 20643 +theretoinclud 1 1 6.957497 6.957497 20644 +mcphee 1 1 6.957497 6.957497 20645 +theoryimplement 1 1 6.957497 6.957497 20646 +theoremprecondit 1 1 6.957497 6.957497 20647 +proverbledso 1 1 6.957497 6.957497 20648 +theoremnqthmboy 1 1 6.957497 6.957497 20649 +andmoor 1 1 6.957497 6.957497 20650 +clinc 1 1 6.957497 6.957497 20651 +iprshult 1 1 6.957497 6.957497 20652 +relatedlinksdo 1 1 6.957497 6.957497 20653 +shult 1 1 6.957497 6.957497 20654 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~anne b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~anne new file mode 100644 index 00000000..77b1f484 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~anne @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 2 121 2.079442 4.158884 89 +move 1 47 3.091042 3.091042 382 +perman 1 11 4.553877 4.553877 1372 +moveddocu 1 2 6.263398 6.263398 6246 +movedthi 1 2 6.263398 6.263398 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~bruce b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~bruce new file mode 100644 index 00000000..77b1f484 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/data_sanitized/train/tfidf/tfidf_raw/nc/http_^^www.tc.cornell.edu^~bruce @@ -0,0 +1,6 @@ +term, tf, in documents count, idf, tfidf, wordid +document 2 121 2.079442 4.158884 89 +move 1 47 3.091042 3.091042 382 +perman 1 11 4.553877 4.553877 1372 +moveddocu 1 2 6.263398 6.263398 6246 +movedthi 1 2 6.263398 6.263398 6247 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/1000.txt b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/1000.txt new file mode 100644 index 00000000..09a0ff04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/1000.txt @@ -0,0 +1,1051 @@ ++1 1:0.000000 6:0.693147 7:0.693147 13:3.295836 14:1.098612 22:4.158882 23:4.158882 24:1.386294 34:3.218876 35:1.609438 45:5.375277 46:5.375277 65:5.837730 66:3.891820 67:1.945910 105:2.197225 ++1 1:0.000000 2:0.000000 3:0.000000 6:2.079441 7:0.693147 8:0.693147 14:2.197224 15:1.098612 16:1.098612 34:8.047190 36:1.609438 85:2.079442 106:2.197225 141:2.397895 163:4.969814 195:2.564949 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:5.545176 6:3.465735 9:1.386294 10:0.693147 15:8.788896 17:2.197224 13:2.197224 14:2.197224 23:6.931470 25:5.545176 22:2.772588 26:1.386294 ++1 3:0.000000 1:0.000000 8:2.079441 7:0.693147 13:2.197224 14:1.098612 27:1.386294 36:1.609438 46:3.583518 70:11.675460 73:1.945910 67:1.945910 66:1.945910 74:1.945910 71:1.945910 90:6.238326 ++1 3:0.000000 6:2.772588 9:2.079441 8:2.079441 11:0.693147 18:6.591672 15:4.394448 19:2.197224 16:1.098612 25:2.772588 28:1.386294 51:7.167036 52:1.791759 46:1.791759 53:1.791759 91:2.079442 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 8:0.693147 12:0.693147 15:5.493060 20:2.197224 18:2.197224 14:1.098612 24:2.772588 29:2.772588 25:1.386294 40:4.828314 36:1.609438 35:1.609438 ++1 3:0.000000 15:2.197224 13:1.098612 14:1.098612 27:1.386294 24:1.386294 29:1.386294 23:1.386294 40:1.609438 37:1.609438 41:1.609438 46:1.791759 78:1.945910 85:2.079442 114:2.197225 142:4.795790 ++1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 7:0.693147 12:0.693147 9:0.693147 15:3.295836 23:1.386294 24:1.386294 40:1.609438 42:1.609438 37:1.609438 67:1.945910 70:1.945910 105:2.197225 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:7.624617 9:3.465735 6:1.386294 12:0.693147 15:19.775016 17:6.591672 16:4.394448 19:3.295836 21:2.197224 13:1.098612 26:6.931470 27:5.545176 ++1 3:0.000000 1:0.000000 2:0.000000 8:2.772588 6:0.693147 15:9.887508 18:2.197224 16:1.098612 29:2.772588 26:1.386294 25:1.386294 28:1.386294 23:1.386294 42:3.218876 37:1.609438 49:1.791759 ++1 3:0.000000 1:0.000000 8:2.079441 7:0.693147 13:2.197224 14:1.098612 27:1.386294 36:1.609438 46:3.583518 70:11.675460 73:1.945910 67:1.945910 66:1.945910 74:1.945910 71:1.945910 90:6.238326 ++1 1:0.000000 6:0.693147 7:0.693147 13:3.295836 14:1.098612 22:4.158882 23:4.158882 24:1.386294 34:3.218876 35:1.609438 45:5.375277 46:5.375277 66:11.675460 65:5.837730 67:1.945910 105:2.197225 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:3.465735 12:1.386294 8:1.386294 6:0.693147 15:4.394448 16:2.197224 17:2.197224 23:1.386294 28:1.386294 37:3.218876 34:3.218876 ++1 1:0.000000 2:0.000000 8:0.693147 12:0.693147 15:3.295836 13:2.197224 40:3.218876 48:1.791759 46:1.791759 47:1.791759 67:5.837730 78:1.945910 73:1.945910 91:2.079442 114:2.197225 169:2.484907 ++1 3:0.000000 2:0.000000 1:0.000000 6:13.169793 9:4.158882 10:3.465735 7:2.772588 8:2.079441 18:28.563912 15:7.690284 16:6.591672 19:4.394448 13:4.394448 17:2.197224 20:2.197224 30:6.931470 ++1 2:0.000000 4:0.000000 7:7.624617 8:2.772588 9:1.386294 11:0.693147 10:0.693147 6:0.693147 15:6.591672 19:5.493060 13:2.197224 16:1.098612 18:1.098612 26:15.249234 23:4.158882 24:2.772588 ++1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:3.295836 19:1.098612 18:1.098612 16:1.098612 23:2.772588 24:1.386294 22:1.386294 40:1.609438 109:2.197225 136:2.302585 141:2.397895 ++1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:3.295836 18:2.197224 19:1.098612 16:1.098612 23:2.772588 24:1.386294 22:1.386294 40:1.609438 85:2.079442 108:2.197225 109:2.197225 ++1 14:9.887508 17:2.197224 15:1.098612 25:2.772588 35:14.484942 37:1.609438 38:1.609438 57:3.583518 67:3.891820 73:1.945910 98:4.158884 107:2.197225 120:2.197225 133:4.605170 148:2.397895 162:19.879256 ++1 1:0.000000 2:0.000000 6:6.238323 7:2.772588 15:6.591672 14:1.098612 16:1.098612 18:1.098612 26:4.158882 27:1.386294 25:1.386294 34:3.218876 60:16.125831 57:3.583518 46:1.791759 53:1.791759 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 7:1.386294 6:0.693147 8:0.693147 15:4.394448 16:4.394448 26:2.772588 28:2.772588 25:1.386294 29:1.386294 23:1.386294 24:1.386294 42:1.609438 ++1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 8:0.693147 15:3.295836 22:2.772588 23:1.386294 40:1.609438 47:1.791759 48:1.791759 99:2.079442 108:4.394450 116:2.197225 ++1 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:4.394448 13:4.394448 20:1.098612 23:6.931470 30:1.386294 40:1.609438 35:1.609438 46:7.167036 67:3.891820 76:1.945910 73:1.945910 66:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 6:0.693147 7:0.693147 18:3.295836 15:1.098612 25:1.386294 66:9.729550 73:1.945910 87:4.158884 90:2.079442 193:2.564949 230:2.639057 267:2.772589 ++1 1:0.000000 3:0.000000 7:5.545176 8:1.386294 15:3.295836 13:1.098612 14:1.098612 23:1.386294 41:1.609438 46:1.791759 73:1.945910 88:6.238326 99:2.079442 118:2.197225 107:2.197225 112:2.197225 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:9.010911 12:2.079441 6:1.386294 11:0.693147 13:7.690284 15:5.493060 14:2.197224 16:1.098612 17:1.098612 18:1.098612 24:12.476646 ++1 1:0.000000 2:0.000000 4:0.000000 15:3.295836 17:1.098612 14:1.098612 27:1.386294 35:1.609438 105:2.197225 124:2.197225 120:2.197225 149:2.397895 176:2.484907 163:2.484907 169:2.484907 221:2.639057 ++1 3:0.000000 7:1.386294 8:1.386294 10:0.693147 6:0.693147 13:4.394448 18:1.098612 26:8.317764 29:4.158882 24:2.772588 23:2.772588 37:3.218876 52:5.375277 45:3.583518 46:3.583518 48:3.583518 ++1 6:0.693147 19:1.098612 27:1.386294 37:1.609438 51:3.583518 47:1.791759 90:6.238326 108:2.197225 193:5.129898 246:2.708050 323:2.890372 424:6.437752 518:3.401197 775:7.824046 824:3.951244 1147:4.382027 ++1 3:0.000000 1:0.000000 2:0.000000 9:1.386294 6:0.693147 7:0.693147 8:0.693147 18:3.295836 15:1.098612 21:1.098612 14:1.098612 20:1.098612 16:1.098612 28:1.386294 36:1.609438 38:1.609438 ++1 2:0.000000 1:0.000000 3:0.000000 15:2.197224 17:1.098612 18:1.098612 37:1.609438 50:1.791759 73:5.837730 75:3.891820 67:3.891820 69:1.945910 79:1.945910 95:4.158884 87:2.079442 107:2.197225 ++1 1:0.000000 4:0.000000 5:0.000000 3:0.000000 8:2.079441 12:1.386294 6:0.693147 18:4.394448 15:3.295836 16:1.098612 23:2.772588 26:1.386294 39:1.609438 38:1.609438 47:1.791759 48:1.791759 ++1 1:0.000000 3:0.000000 2:0.000000 15:2.197224 24:1.386294 37:6.437752 41:1.609438 66:3.891820 71:3.891820 131:2.302585 135:2.302585 149:4.795790 143:2.397895 176:2.484907 169:2.484907 218:5.278114 ++1 15:1.098612 23:1.386294 40:3.218876 57:8.958795 52:1.791759 73:44.755930 76:1.945910 111:6.591675 162:2.484907 194:2.564949 324:2.890372 492:6.734592 576:3.555348 786:3.912023 933:4.110874 1643:4.875197 ++1 2:0.000000 3:0.000000 9:2.079441 6:2.079441 8:0.693147 7:0.693147 15:6.591672 13:4.394448 17:3.295836 19:2.197224 16:1.098612 23:2.772588 30:1.386294 25:1.386294 37:14.484942 42:3.218876 ++1 1:0.000000 3:0.000000 2:0.000000 6:7.624617 7:2.772588 8:0.693147 15:8.788896 13:2.197224 28:5.545176 23:2.772588 25:1.386294 37:16.094380 34:8.047190 36:3.218876 42:1.609438 46:3.583518 ++1 7:0.693147 13:2.197224 15:1.098612 23:2.772588 37:1.609438 46:3.583518 73:62.269120 67:42.810020 87:2.079442 107:2.197225 112:2.197225 113:2.197225 130:2.302585 156:45.560005 179:39.758512 162:22.364163 ++1 3:0.000000 7:0.693147 13:2.197224 26:1.386294 22:1.386294 23:1.386294 40:11.266066 46:3.583518 67:7.783640 66:1.945910 112:2.197225 113:2.197225 114:2.197225 130:2.302585 146:2.397895 141:2.397895 ++1 3:0.000000 2:0.000000 4:0.000000 7:1.386294 11:0.693147 12:0.693147 14:1.098612 22:1.386294 44:4.828314 40:1.609438 37:1.609438 41:1.609438 63:1.791759 66:1.945910 78:1.945910 98:4.158884 ++1 3:0.000000 2:0.000000 6:3.465735 7:1.386294 15:1.098612 42:1.609438 37:1.609438 50:1.791759 48:1.791759 52:1.791759 83:1.945910 71:1.945910 66:1.945910 72:1.945910 93:8.317768 97:4.158884 ++1 2:0.000000 3:0.000000 6:2.079441 10:1.386294 9:1.386294 18:8.788896 15:3.295836 21:1.098612 14:1.098612 25:4.158882 38:1.609438 37:1.609438 60:3.583518 61:3.583518 46:3.583518 47:1.791759 ++1 6:2.772588 11:0.693147 10:0.693147 8:0.693147 18:2.197224 16:1.098612 21:1.098612 27:2.772588 28:2.772588 25:1.386294 34:8.047190 37:8.047190 38:3.218876 39:3.218876 62:3.583518 58:1.791759 ++1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 7:24.953292 6:1.386294 9:0.693147 14:12.084732 13:8.788896 15:7.690284 17:3.295836 16:3.295836 19:1.098612 28:5.545176 29:2.772588 31:1.386294 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 7:4.158882 8:1.386294 11:0.693147 13:5.493060 16:3.295836 14:2.197224 17:2.197224 15:1.098612 24:8.317764 28:2.772588 29:1.386294 27:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 8:1.386294 15:4.394448 16:2.197224 19:2.197224 27:2.772588 41:4.828314 43:1.609438 66:13.621370 84:1.945910 81:1.945910 67:1.945910 70:1.945910 88:2.079442 ++1 3:0.000000 2:0.000000 1:0.000000 8:2.079441 7:0.693147 9:0.693147 15:4.394448 16:2.197224 27:2.772588 40:1.609438 43:1.609438 66:9.729550 84:1.945910 81:1.945910 70:1.945910 95:6.238326 ++1 3:0.000000 13:2.197224 14:2.197224 29:4.158882 44:4.828314 35:1.609438 46:3.583518 75:3.891820 76:1.945910 65:1.945910 81:1.945910 99:2.079442 123:2.197225 109:2.197225 146:2.397895 163:2.484907 ++1 2:0.000000 3:0.000000 1:0.000000 6:4.158882 7:1.386294 8:0.693147 11:0.693147 9:0.693147 13:3.295836 17:3.295836 16:3.295836 15:2.197224 19:2.197224 14:2.197224 24:4.158882 25:2.772588 ++1 3:0.000000 1:0.000000 7:0.693147 21:1.098612 15:1.098612 17:1.098612 13:1.098612 24:1.386294 37:8.047190 40:4.828314 48:3.583518 47:3.583518 63:1.791759 67:3.891820 99:2.079442 96:2.079442 ++1 1:0.000000 2:0.000000 3:0.000000 5:0.000000 7:4.852029 8:2.772588 11:0.693147 6:0.693147 15:14.281956 19:3.295836 16:2.197224 21:1.098612 25:6.931470 24:2.772588 26:1.386294 30:1.386294 ++1 2:0.000000 7:3.465735 8:0.693147 13:2.197224 17:1.098612 26:4.158882 29:1.386294 27:1.386294 44:3.218876 38:1.609438 35:1.609438 60:5.375277 45:1.791759 46:1.791759 66:5.837730 73:1.945910 ++1 1:0.000000 3:0.000000 8:1.386294 6:0.693147 18:1.098612 22:2.772588 34:1.609438 37:1.609438 36:1.609438 47:1.791759 66:1.945910 109:4.394450 263:2.772589 273:2.772589 783:3.912023 7094:6.957497 ++1 2:0.000000 6:0.693147 39:1.609438 40:1.609438 51:3.583518 47:1.791759 110:2.197225 7095:6.957497 7096:6.957497 7097:6.957497 7098:6.957497 7099:6.957497 7100:6.957497 7101:6.957497 7102:6.957497 7103:6.957497 ++1 18:1.098612 39:1.609438 86:2.079442 221:2.639057 411:3.178054 654:7.377758 941:8.221748 1233:4.465908 7104:6.957497 7105:6.957497 7106:6.957497 7107:6.957497 7108:6.957497 ++1 8:0.693147 12:0.693147 15:3.295836 14:1.098612 41:1.609438 65:1.945910 84:1.945910 70:1.945910 66:1.945910 87:2.079442 116:4.394450 107:2.197225 225:2.639057 247:2.708050 280:2.772589 287:2.772589 ++1 3:0.000000 7:1.386294 8:0.693147 12:0.693147 15:3.295836 70:1.945910 91:2.079442 116:4.394450 107:2.197225 113:2.197225 176:2.484907 193:10.259796 225:2.639057 252:8.124150 247:2.708050 280:2.772589 ++1 2:0.000000 8:0.693147 7:0.693147 12:0.693147 15:3.295836 18:2.197224 27:1.386294 38:1.609438 50:3.583518 61:1.791759 70:1.945910 95:4.158884 87:2.079442 88:2.079442 116:4.394450 107:2.197225 ++1 2:0.000000 4:0.000000 7:4.158882 9:0.693147 15:2.197224 26:5.545176 30:1.386294 39:1.609438 52:1.791759 49:1.791759 66:3.891820 84:1.945910 70:1.945910 91:4.158884 87:2.079442 101:2.079442 ++1 7:2.079441 15:3.295836 19:2.197224 25:1.386294 27:1.386294 70:3.891820 82:1.945910 96:4.158884 91:4.158884 134:2.302585 169:2.484907 204:2.564949 222:2.639057 272:2.772589 271:2.772589 311:5.780744 ++1 2:0.000000 16:1.098612 15:1.098612 26:1.386294 42:1.609438 72:1.945910 70:1.945910 91:6.238326 95:2.079442 110:2.197225 155:2.397895 154:2.397895 167:2.484907 177:2.484907 169:2.484907 225:7.917171 ++1 7:3.465735 6:0.693147 10:0.693147 15:1.098612 19:1.098612 27:1.386294 26:1.386294 38:1.609438 37:1.609438 47:1.791759 73:3.891820 71:1.945910 75:1.945910 66:1.945910 96:2.079442 106:2.197225 ++1 2:0.000000 4:0.000000 6:9.010911 7:9.010911 8:2.079441 11:1.386294 10:0.693147 9:0.693147 16:7.690284 15:5.493060 21:3.295836 20:3.295836 13:2.197224 17:2.197224 19:2.197224 14:1.098612 ++1 7:2.772588 8:0.693147 13:4.394448 17:1.098612 14:1.098612 29:2.772588 27:1.386294 44:4.828314 34:3.218876 35:1.609438 45:3.583518 46:3.583518 52:1.791759 75:23.350920 66:7.783640 73:3.891820 ++1 13:2.197224 16:1.098612 44:4.828314 37:3.218876 40:1.609438 38:1.609438 61:5.375277 57:1.791759 56:1.791759 77:1.945910 67:1.945910 73:1.945910 97:2.079442 90:2.079442 107:2.197225 108:2.197225 ++1 4:0.000000 3:0.000000 2:0.000000 10:0.693147 13:2.197224 19:2.197224 15:1.098612 28:1.386294 24:1.386294 27:1.386294 44:4.828314 40:1.609438 38:1.609438 37:1.609438 39:1.609438 47:5.375277 ++1 3:0.000000 2:0.000000 7:4.158882 9:1.386294 11:0.693147 15:4.394448 19:4.394448 17:1.098612 25:1.386294 24:1.386294 28:1.386294 27:1.386294 37:4.828314 44:3.218876 41:3.218876 36:3.218876 ++1 6:1.386294 7:1.386294 18:4.394448 19:2.197224 17:2.197224 16:1.098612 29:1.386294 40:1.609438 49:14.334072 52:3.583518 47:1.791759 70:7.783640 75:5.837730 76:1.945910 71:1.945910 86:4.158884 ++1 3:0.000000 8:1.386294 15:2.197224 40:1.609438 37:1.609438 44:1.609438 88:2.079442 165:2.484907 228:5.278114 342:2.995732 687:3.761200 2692:5.568345 7418:6.957497 7419:6.957497 ++1 3:0.000000 2:0.000000 4:0.000000 6:29.112174 8:1.386294 10:1.386294 17:9.887508 15:7.690284 18:2.197224 14:1.098612 19:1.098612 21:1.098612 20:1.098612 16:1.098612 25:6.931470 27:2.772588 ++1 3:0.000000 2:0.000000 6:13.169793 10:2.772588 8:0.693147 7:0.693147 15:8.788896 17:5.493060 13:3.295836 18:3.295836 19:2.197224 21:1.098612 25:15.249234 22:2.772588 32:2.772588 27:1.386294 ++1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 6:5.545176 8:2.079441 10:0.693147 15:8.788896 17:3.295836 13:2.197224 18:2.197224 21:1.098612 20:1.098612 25:6.931470 27:2.772588 22:1.386294 ++1 1:0.000000 9:4.158882 17:6.591672 15:3.295836 14:2.197224 16:1.098612 41:4.828314 40:3.218876 57:3.583518 49:1.791759 62:1.791759 75:7.783640 71:1.945910 67:1.945910 97:2.079442 108:8.788900 ++1 3:0.000000 1:0.000000 5:0.000000 6:6.238323 7:4.158882 9:0.693147 16:7.690284 15:6.591672 26:5.545176 28:4.158882 27:1.386294 37:12.875504 42:3.218876 43:3.218876 39:1.609438 52:14.334072 ++1 2:0.000000 7:0.693147 8:0.693147 15:3.295836 13:1.098612 17:1.098612 28:1.386294 44:8.047190 37:6.437752 40:3.218876 36:1.609438 63:3.583518 59:3.583518 46:1.791759 64:1.791759 70:17.513190 ++1 2:0.000000 7:2.079441 6:0.693147 13:3.295836 29:2.772588 30:1.386294 44:4.828314 40:1.609438 37:1.609438 36:1.609438 46:5.375277 59:3.583518 66:5.837730 70:3.891820 77:1.945910 99:2.079442 ++1 8:0.693147 18:4.394448 13:2.197224 14:1.098612 27:1.386294 37:4.828314 36:4.828314 44:3.218876 38:1.609438 43:1.609438 35:1.609438 46:3.583518 47:3.583518 61:1.791759 52:1.791759 64:1.791759 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 8:3.465735 7:1.386294 12:0.693147 20:4.394448 15:1.098612 14:1.098612 16:1.098612 22:1.386294 24:1.386294 26:1.386294 32:1.386294 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 7:2.079441 8:2.079441 9:0.693147 12:0.693147 20:4.394448 15:3.295836 14:2.197224 13:1.098612 17:1.098612 16:1.098612 32:5.545176 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 7:2.079441 9:0.693147 8:0.693147 12:0.693147 15:3.295836 20:3.295836 13:1.098612 17:1.098612 16:1.098612 14:1.098612 32:2.772588 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 13:3.295836 15:1.098612 20:1.098612 32:5.545176 37:1.609438 40:1.609438 35:1.609438 46:3.583518 45:1.791759 66:15.567280 88:4.158884 90:2.079442 ++1 1:0.000000 3:0.000000 14:2.197224 15:2.197224 21:1.098612 16:1.098612 32:2.772588 29:1.386294 26:1.386294 27:1.386294 41:3.218876 42:1.609438 47:1.791759 69:1.945910 82:1.945910 75:1.945910 ++1 3:0.000000 1:0.000000 14:2.197224 22:4.158882 32:2.772588 29:1.386294 39:4.828314 40:3.218876 37:3.218876 41:3.218876 42:1.609438 73:5.837730 69:3.891820 87:2.079442 99:2.079442 118:4.394450 ++1 14:2.197224 15:1.098612 22:1.386294 32:1.386294 41:3.218876 49:1.791759 57:1.791759 88:4.158884 105:2.197225 118:2.197225 212:2.564949 290:2.772589 308:2.890372 367:3.044522 389:3.091042 500:6.734592 ++1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 8:0.693147 13:1.098612 18:1.098612 20:1.098612 14:1.098612 32:2.772588 37:3.218876 41:1.609438 49:1.791759 46:1.791759 47:1.791759 78:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:2.079441 12:1.386294 10:0.693147 21:1.098612 20:1.098612 26:4.158882 32:2.772588 88:4.158884 94:2.079442 201:2.564949 389:3.091042 387:3.091042 ++1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:2.079441 12:1.386294 10:0.693147 14:17.577792 16:5.493060 13:2.197224 15:2.197224 20:1.098612 32:5.545176 26:4.158882 22:2.772588 41:25.751008 ++1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 7:1.386294 9:0.693147 8:0.693147 20:4.394448 21:1.098612 14:1.098612 32:5.545176 24:1.386294 22:1.386294 41:1.609438 48:1.791759 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 6:0.693147 11:0.693147 12:0.693147 14:3.295836 16:2.197224 13:2.197224 15:1.098612 17:1.098612 20:1.098612 25:5.545176 ++1 2:0.000000 8:2.079441 9:0.693147 13:2.197224 18:2.197224 15:1.098612 17:1.098612 19:1.098612 16:1.098612 32:1.386294 26:1.386294 41:1.609438 49:3.583518 46:3.583518 57:1.791759 66:7.783640 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:1.386294 8:0.693147 12:0.693147 11:0.693147 20:3.295836 15:2.197224 14:1.098612 32:4.158882 37:1.609438 41:1.609438 49:1.791759 68:1.945910 ++1 7:2.079441 15:1.098612 17:1.098612 29:2.772588 32:2.772588 28:1.386294 49:3.583518 46:3.583518 70:13.621370 66:7.783640 67:1.945910 79:1.945910 69:1.945910 91:2.079442 105:2.197225 124:2.197225 ++1 8:1.386294 14:1.098612 32:1.386294 37:3.218876 35:1.609438 66:1.945910 96:2.079442 89:2.079442 126:2.197225 185:2.484907 174:2.484907 223:2.639057 229:2.639057 367:3.044522 409:3.135494 441:3.258097 ++1 1:0.000000 6:4.852029 9:4.158882 7:2.772588 18:19.775016 20:9.887508 19:5.493060 15:2.197224 13:1.098612 14:1.098612 17:1.098612 21:1.098612 16:1.098612 30:23.566998 25:12.476646 28:2.772588 ++1 1:0.000000 2:0.000000 4:0.000000 6:2.079441 11:0.693147 8:0.693147 15:3.295836 17:2.197224 13:2.197224 20:2.197224 18:1.098612 30:6.931470 22:4.158882 32:4.158882 25:1.386294 28:1.386294 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:8.317764 6:4.158882 9:2.079441 12:1.386294 11:0.693147 15:8.788896 18:7.690284 19:6.591672 13:3.295836 16:3.295836 20:1.098612 ++1 2:0.000000 7:4.852029 8:2.772588 13:2.197224 18:2.197224 15:1.098612 16:1.098612 27:1.386294 32:1.386294 46:5.375277 66:15.567280 71:3.891820 76:1.945910 67:1.945910 87:8.317768 90:6.238326 ++1 3:0.000000 1:0.000000 6:0.693147 14:2.197224 17:1.098612 29:1.386294 27:1.386294 32:1.386294 41:3.218876 39:1.609438 42:1.609438 57:1.791759 47:1.791759 70:3.891820 66:1.945910 83:1.945910 ++1 3:0.000000 2:0.000000 1:0.000000 8:0.693147 6:0.693147 17:2.197224 13:2.197224 14:1.098612 15:1.098612 32:4.158882 27:2.772588 22:2.772588 37:3.218876 41:1.609438 39:1.609438 42:1.609438 ++1 3:0.000000 1:0.000000 6:0.693147 11:0.693147 13:3.295836 16:1.098612 15:1.098612 24:4.158882 32:1.386294 46:3.583518 66:1.945910 78:1.945910 86:6.238326 87:4.158884 107:2.197225 125:2.197225 ++1 3:0.000000 1:0.000000 6:0.693147 8:0.693147 18:6.591672 15:3.295836 13:2.197224 32:4.158882 22:1.386294 34:1.609438 37:1.609438 41:1.609438 46:3.583518 48:3.583518 73:7.783640 71:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:0.693147 7:0.693147 15:2.197224 20:2.197224 19:1.098612 13:1.098612 16:1.098612 22:1.386294 32:1.386294 37:1.609438 75:1.945910 78:1.945910 ++1 2:0.000000 1:0.000000 3:0.000000 13:5.493060 19:2.197224 15:1.098612 21:1.098612 16:1.098612 29:1.386294 26:1.386294 27:1.386294 32:1.386294 37:3.218876 42:1.609438 46:7.167036 47:1.791759 ++1 2:0.000000 1:0.000000 3:0.000000 13:2.197224 15:1.098612 21:1.098612 16:1.098612 32:2.772588 29:1.386294 26:1.386294 27:1.386294 37:1.609438 42:1.609438 46:3.583518 45:1.791759 47:1.791759 ++1 1:0.000000 3:0.000000 32:1.386294 40:3.218876 130:2.302585 337:2.944439 348:2.995732 658:3.688879 4436:6.263398 4437:6.263398 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 9:0.693147 13:2.197224 20:2.197224 15:1.098612 17:1.098612 14:1.098612 19:1.098612 18:1.098612 32:2.772588 ++1 2:0.000000 3:0.000000 7:2.772588 8:0.693147 6:0.693147 13:2.197224 25:9.704058 22:2.772588 27:1.386294 26:1.386294 32:1.386294 40:3.218876 37:1.609438 47:17.917590 62:5.375277 46:3.583518 ++1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 7:2.772588 6:2.079441 12:0.693147 9:0.693147 11:0.693147 15:8.788896 16:5.493060 18:5.493060 13:2.197224 20:1.098612 21:1.098612 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 6:0.693147 20:2.197224 15:1.098612 17:1.098612 14:1.098612 13:1.098612 32:2.772588 41:1.609438 39:1.609438 ++1 1:0.000000 8:1.386294 15:2.197224 18:2.197224 22:1.386294 32:1.386294 75:1.945910 73:1.945910 89:2.079442 85:2.079442 109:2.197225 108:2.197225 174:2.484907 175:2.484907 263:2.772589 295:2.833213 ++1 3:0.000000 1:0.000000 7:1.386294 10:1.386294 8:0.693147 13:2.197224 14:2.197224 15:1.098612 17:1.098612 26:5.545176 22:1.386294 32:1.386294 39:4.828314 37:1.609438 42:1.609438 48:19.709349 ++1 1:0.000000 3:0.000000 7:2.079441 8:1.386294 10:1.386294 15:2.197224 13:2.197224 17:1.098612 18:1.098612 26:5.545176 22:1.386294 32:1.386294 39:3.218876 37:1.609438 48:8.958795 52:3.583518 ++1 1:0.000000 30:1.386294 32:1.386294 66:9.729550 88:2.079442 263:2.772589 339:2.944439 658:3.688879 1479:4.753590 3345:5.857933 8036:6.957497 8037:6.957497 8038:6.957497 ++1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 7:2.772588 10:1.386294 13:2.197224 16:1.098612 18:1.098612 26:8.317764 22:5.545176 32:4.158882 39:8.047190 46:3.583518 66:1.945910 79:1.945910 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 8:0.693147 11:0.693147 9:0.693147 18:3.295836 13:2.197224 17:2.197224 15:2.197224 16:1.098612 32:5.545176 25:2.772588 27:1.386294 37:6.437752 39:1.609438 46:7.167036 57:3.583518 50:1.791759 ++1 2:0.000000 8:0.693147 7:0.693147 9:0.693147 10:0.693147 11:0.693147 15:13.183344 13:2.197224 16:1.098612 17:1.098612 18:1.098612 19:1.098612 27:2.772588 22:1.386294 25:1.386294 28:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 21:1.098612 20:1.098612 14:1.098612 32:1.386294 41:1.609438 94:2.079442 146:2.397895 201:2.564949 269:2.772589 382:3.091042 ++1 1:0.000000 3:0.000000 32:1.386294 40:1.609438 90:6.238326 99:2.079442 107:2.197225 126:2.197225 108:2.197225 185:2.484907 169:2.484907 471:3.295837 499:3.367296 990:8.348774 1064:4.317488 1247:4.465908 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:1.386294 6:1.386294 15:4.394448 14:2.197224 17:1.098612 20:1.098612 22:2.772588 32:2.772588 27:1.386294 40:3.218876 37:3.218876 ++1 2:0.000000 6:2.079441 8:0.693147 9:0.693147 15:2.197224 17:1.098612 20:1.098612 16:1.098612 27:1.386294 28:1.386294 30:1.386294 38:3.218876 57:3.583518 52:1.791759 71:3.891820 72:1.945910 ++1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 6:0.693147 8:0.693147 12:0.693147 13:1.098612 32:1.386294 61:1.791759 48:1.791759 65:1.945910 69:1.945910 100:2.079442 110:2.197225 162:2.484907 ++1 3:0.000000 1:0.000000 2:0.000000 6:1.386294 8:1.386294 13:2.197224 15:2.197224 21:1.098612 32:2.772588 41:1.609438 46:3.583518 56:1.791759 77:3.891820 73:1.945910 67:1.945910 75:1.945910 ++1 1:0.000000 3:0.000000 8:2.079441 6:0.693147 13:2.197224 15:2.197224 17:1.098612 22:2.772588 32:1.386294 34:1.609438 37:1.609438 46:3.583518 66:3.891820 75:1.945910 71:1.945910 88:2.079442 ++1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 8:4.158882 7:1.386294 12:0.693147 20:3.295836 14:2.197224 18:2.197224 15:1.098612 16:1.098612 22:1.386294 26:1.386294 37:3.218876 48:3.583518 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 6:0.693147 10:0.693147 9:0.693147 13:3.295836 20:2.197224 15:1.098612 17:1.098612 19:1.098612 25:2.772588 ++1 2:0.000000 4:0.000000 15:1.098612 28:1.386294 42:1.609438 37:1.609438 46:5.375277 62:1.791759 94:2.079442 87:2.079442 110:2.197225 141:4.795790 154:2.397895 155:2.397895 184:2.484907 212:2.564949 ++1 4:0.000000 11:0.693147 6:0.693147 8:0.693147 10:0.693147 13:2.197224 15:1.098612 14:1.098612 32:4.158882 22:2.772588 29:1.386294 37:3.218876 39:3.218876 38:3.218876 41:1.609438 47:8.958795 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:4.158882 11:0.693147 15:9.887508 20:3.295836 19:2.197224 17:1.098612 18:1.098612 21:1.098612 13:1.098612 14:1.098612 32:2.772588 ++1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 8:1.386294 7:0.693147 6:0.693147 18:4.394448 15:3.295836 17:2.197224 16:1.098612 19:1.098612 14:1.098612 26:2.772588 30:1.386294 28:1.386294 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 3:0.000000 1:0.000000 6:0.693147 14:2.197224 15:1.098612 17:1.098612 27:1.386294 32:1.386294 41:3.218876 57:1.791759 70:3.891820 73:1.945910 67:1.945910 83:1.945910 85:6.238326 90:4.158884 ++1 2:0.000000 7:2.079441 6:0.693147 14:1.098612 38:3.218876 35:1.609438 49:1.791759 111:2.197225 147:4.795790 213:10.259796 196:10.259796 197:7.694847 195:5.129898 234:2.639057 253:2.708050 369:3.044522 ++1 5:0.000000 2:0.000000 15:1.098612 14:1.098612 32:2.772588 31:1.386294 43:1.609438 41:1.609438 50:1.791759 72:3.891820 68:1.945910 88:2.079442 102:2.079442 123:4.394450 110:4.394450 119:2.197225 ++1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 8:1.386294 17:2.197224 19:2.197224 15:1.098612 16:1.098612 14:1.098612 32:1.386294 38:16.094380 43:1.609438 36:1.609438 41:1.609438 47:14.334072 ++1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 11:0.693147 21:2.197224 15:1.098612 19:1.098612 17:1.098612 14:1.098612 32:2.772588 27:1.386294 28:1.386294 38:4.828314 43:1.609438 36:1.609438 ++1 2:0.000000 5:0.000000 4:0.000000 9:0.693147 10:0.693147 12:0.693147 17:1.098612 15:1.098612 22:4.158882 24:1.386294 29:1.386294 32:1.386294 38:17.703818 39:3.218876 47:10.750554 48:3.583518 ++1 1:0.000000 3:0.000000 2:0.000000 15:1.098612 31:1.386294 32:1.386294 88:2.079442 270:2.772589 414:3.178054 546:3.465736 658:3.688879 645:3.688879 798:3.912023 ++1 6:0.693147 7:0.693147 17:2.197224 27:2.772588 22:2.772588 29:1.386294 32:1.386294 39:1.609438 42:1.609438 49:3.583518 56:1.791759 51:1.791759 96:8.317768 98:2.079442 95:2.079442 99:2.079442 ++1 3:0.000000 2:0.000000 6:9.704058 9:0.693147 7:0.693147 16:2.197224 17:1.098612 28:1.386294 34:3.218876 38:1.609438 60:21.501108 51:10.750554 50:7.167036 61:1.791759 56:1.791759 49:1.791759 ++1 2:0.000000 1:0.000000 3:0.000000 7:7.624617 6:0.693147 8:0.693147 16:1.098612 14:1.098612 22:2.772588 27:2.772588 38:1.609438 37:1.609438 39:1.609438 35:1.609438 60:25.084626 49:19.709349 ++1 6:2.079441 22:2.772588 39:3.218876 88:2.079442 109:2.197225 135:2.302585 212:2.564949 231:2.639057 317:2.890372 311:2.890372 343:2.995732 470:6.591674 500:3.367296 558:6.993016 635:3.688879 963:4.174387 ++1 3:0.000000 6:2.772588 38:3.218876 34:3.218876 47:3.583518 52:1.791759 61:1.791759 50:1.791759 70:5.837730 74:3.891820 77:1.945910 108:2.197225 114:2.197225 125:2.197225 139:4.605170 136:2.302585 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:5.545176 11:0.693147 19:3.295836 16:3.295836 20:1.098612 14:1.098612 22:6.931470 26:4.158882 28:1.386294 25:1.386294 24:1.386294 37:8.047190 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 6:0.693147 11:0.693147 12:0.693147 14:4.394448 16:2.197224 15:2.197224 13:2.197224 17:1.098612 20:1.098612 25:5.545176 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 1:0.000000 3:0.000000 8:0.693147 15:2.197224 17:1.098612 37:1.609438 48:1.791759 107:2.197225 145:2.397895 186:2.484907 183:2.484907 240:2.639057 260:2.708050 427:3.218876 456:6.516194 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 4:0.000000 15:1.098612 28:1.386294 42:1.609438 37:1.609438 46:5.375277 62:1.791759 94:2.079442 87:2.079442 110:2.197225 141:4.795790 154:2.397895 155:2.397895 184:2.484907 212:2.564949 ++1 3:0.000000 1:0.000000 8:3.465735 7:1.386294 11:0.693147 9:0.693147 16:3.295836 21:1.098612 22:5.545176 24:2.772588 27:1.386294 25:1.386294 32:1.386294 39:3.218876 37:3.218876 41:1.609438 ++1 1:0.000000 3:0.000000 6:1.386294 7:0.693147 8:0.693147 21:1.098612 18:1.098612 19:1.098612 15:1.098612 30:1.386294 40:1.609438 95:2.079442 87:2.079442 91:2.079442 105:2.197225 106:2.197225 ++1 2:0.000000 3:0.000000 6:0.693147 8:0.693147 15:1.098612 25:4.158882 40:1.609438 62:1.791759 77:1.945910 87:2.079442 105:4.394450 125:2.197225 139:4.605170 164:2.484907 176:2.484907 335:2.944439 ++1 2:0.000000 1:0.000000 3:0.000000 8:0.693147 15:1.098612 25:1.386294 40:1.609438 60:1.791759 61:1.791759 49:1.791759 77:3.891820 72:1.945910 101:2.079442 130:4.605170 138:2.302585 320:2.890372 ++1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 15:1.098612 40:1.609438 60:3.583518 57:1.791759 61:1.791759 49:1.791759 77:1.945910 95:4.158884 107:2.197225 130:2.302585 176:2.484907 320:2.890372 ++1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 7:4.158882 6:3.465735 9:2.079441 16:7.690284 15:4.394448 13:3.295836 14:3.295836 17:1.098612 19:1.098612 33:4.158882 30:1.386294 37:16.094380 ++1 4:0.000000 2:0.000000 6:0.693147 13:1.098612 17:1.098612 33:1.386294 37:1.609438 35:1.609438 54:1.791759 46:1.791759 66:1.945910 73:1.945910 77:1.945910 74:1.945910 70:1.945910 88:4.158884 ++1 5:0.000000 4:0.000000 3:0.000000 6:4.158882 8:0.693147 9:0.693147 18:4.394448 15:3.295836 21:2.197224 16:1.098612 25:2.772588 31:1.386294 28:1.386294 38:19.313256 37:12.875504 34:6.437752 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:6.931470 11:1.386294 9:0.693147 12:0.693147 15:5.493060 19:3.295836 20:3.295836 16:3.295836 13:1.098612 17:1.098612 14:1.098612 26:2.772588 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:13.862940 11:1.386294 9:1.386294 8:0.693147 10:0.693147 12:0.693147 13:5.493060 15:4.394448 16:4.394448 20:2.197224 19:2.197224 17:2.197224 ++1 3:0.000000 1:0.000000 7:2.772588 8:0.693147 13:2.197224 15:1.098612 33:2.772588 26:1.386294 27:1.386294 40:3.218876 41:3.218876 37:1.609438 39:1.609438 35:1.609438 45:1.791759 46:1.791759 ++1 2:0.000000 4:0.000000 6:5.545176 10:2.079441 18:10.986120 15:3.295836 16:3.295836 21:1.098612 20:1.098612 25:1.386294 38:11.266066 34:4.828314 37:3.218876 39:1.609438 52:7.167036 47:5.375277 ++1 2:0.000000 4:0.000000 7:1.386294 15:3.295836 19:2.197224 20:1.098612 25:1.386294 40:3.218876 39:1.609438 35:1.609438 73:29.188650 75:1.945910 71:1.945910 87:2.079442 135:2.302585 149:2.397895 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 13:37.352808 16:1.098612 33:16.635528 29:15.249234 24:2.772588 22:1.386294 30:1.386294 39:3.218876 40:1.609438 35:1.609438 46:21.501108 45:21.501108 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 13:37.352808 16:1.098612 33:16.635528 29:15.249234 24:2.772588 22:1.386294 30:1.386294 39:3.218876 40:1.609438 35:1.609438 46:21.501108 45:21.501108 ++1 2:0.000000 4:0.000000 6:2.772588 7:2.079441 8:0.693147 9:0.693147 16:6.591672 15:2.197224 17:1.098612 20:1.098612 22:2.772588 33:2.772588 27:2.772588 25:1.386294 26:1.386294 37:4.828314 ++1 2:0.000000 4:0.000000 6:2.772588 7:2.079441 8:0.693147 9:0.693147 16:6.591672 15:2.197224 17:1.098612 20:1.098612 22:2.772588 33:2.772588 27:2.772588 25:1.386294 26:1.386294 37:4.828314 ++1 1:0.000000 4:0.000000 3:0.000000 8:1.386294 7:0.693147 12:0.693147 6:0.693147 15:2.197224 13:1.098612 16:1.098612 14:1.098612 26:2.772588 29:2.772588 30:1.386294 33:1.386294 40:1.609438 ++1 1:0.000000 3:0.000000 8:0.693147 7:0.693147 9:0.693147 15:1.098612 19:1.098612 26:2.772588 57:1.791759 82:1.945910 75:1.945910 101:2.079442 123:4.394450 107:2.197225 130:4.605170 138:2.302585 ++1 1:0.000000 4:0.000000 3:0.000000 8:1.386294 7:0.693147 12:0.693147 6:0.693147 15:2.197224 13:1.098612 16:1.098612 14:1.098612 26:2.772588 29:2.772588 30:1.386294 33:1.386294 40:1.609438 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:2.772588 9:0.693147 12:0.693147 16:1.098612 13:1.098612 42:6.437752 40:1.609438 46:1.791759 47:1.791759 66:5.837730 75:1.945910 70:1.945910 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:2.772588 9:0.693147 12:0.693147 16:1.098612 13:1.098612 42:6.437752 40:1.609438 46:1.791759 47:1.791759 66:5.837730 75:1.945910 70:1.945910 ++1 1:0.000000 3:0.000000 7:5.545176 8:0.693147 6:0.693147 13:7.690284 15:2.197224 17:1.098612 14:1.098612 22:6.931470 33:6.931470 37:20.922694 40:12.875504 41:1.609438 46:12.542313 45:8.958795 ++1 1:0.000000 3:0.000000 7:6.238323 8:0.693147 6:0.693147 13:7.690284 15:2.197224 17:1.098612 14:1.098612 22:6.931470 33:6.931470 37:20.922694 40:12.875504 41:1.609438 46:12.542313 45:8.958795 ++1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:2.079441 7:2.079441 9:0.693147 13:6.591672 15:1.098612 14:1.098612 17:1.098612 33:5.545176 27:1.386294 24:1.386294 39:6.437752 41:1.609438 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 7:26.339586 8:5.545176 9:2.079441 11:0.693147 12:0.693147 6:0.693147 15:13.183344 17:7.690284 13:6.591672 19:4.394448 16:3.295836 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 7:13.862940 8:4.158882 15:12.084732 16:5.493060 13:3.295836 19:1.098612 33:8.317764 28:8.317764 26:6.931470 29:2.772588 22:2.772588 27:1.386294 ++1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:2.079441 7:2.079441 9:0.693147 13:6.591672 15:1.098612 14:1.098612 17:1.098612 33:5.545176 27:1.386294 24:1.386294 39:6.437752 41:1.609438 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 8:0.693147 13:4.394448 29:2.772588 33:2.772588 26:1.386294 22:1.386294 40:1.609438 42:1.609438 39:1.609438 46:3.583518 55:3.583518 ++1 3:0.000000 1:0.000000 7:2.079441 8:1.386294 13:4.394448 15:4.394448 20:2.197224 17:2.197224 18:1.098612 28:2.772588 22:2.772588 33:2.772588 24:1.386294 40:3.218876 41:1.609438 46:3.583518 ++1 7:2.079441 8:1.386294 11:0.693147 13:4.394448 15:2.197224 16:1.098612 22:2.772588 33:2.772588 26:1.386294 27:1.386294 29:1.386294 24:1.386294 40:1.609438 42:1.609438 46:3.583518 47:1.791759 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:7.624617 6:5.545176 9:1.386294 12:1.386294 8:1.386294 18:25.268076 15:6.591672 17:4.394448 16:4.394448 19:3.295836 13:2.197224 ++1 1:0.000000 3:0.000000 6:2.772588 13:1.098612 18:1.098612 29:1.386294 22:1.386294 33:1.386294 40:1.609438 34:1.609438 37:1.609438 62:3.583518 51:3.583518 46:1.791759 47:1.791759 52:1.791759 ++1 1:0.000000 3:0.000000 8:2.079441 18:5.493060 13:4.394448 15:3.295836 17:2.197224 22:2.772588 33:2.772588 40:6.437752 37:1.609438 42:1.609438 46:7.167036 45:3.583518 66:21.405010 73:1.945910 ++1 1:0.000000 3:0.000000 6:1.386294 8:1.386294 13:6.591672 14:6.591672 17:2.197224 16:1.098612 22:4.158882 25:2.772588 26:1.386294 41:4.828314 37:3.218876 39:1.609438 35:1.609438 46:7.167036 ++1 3:0.000000 1:0.000000 8:1.386294 17:2.197224 21:1.098612 13:1.098612 27:1.386294 37:1.609438 73:13.621370 66:3.891820 82:1.945910 99:2.079442 124:4.394450 107:2.197225 114:2.197225 135:2.302585 ++1 8:0.693147 15:4.394448 13:3.295836 22:5.545176 33:4.158882 45:5.375277 46:5.375277 57:3.583518 87:4.158884 112:4.394450 113:4.394450 107:2.197225 131:2.302585 169:2.484907 162:2.484907 193:15.389694 ++1 2:0.000000 7:2.772588 8:0.693147 15:6.591672 18:5.493060 17:2.197224 20:1.098612 29:6.931470 33:6.931470 22:4.158882 30:1.386294 25:1.386294 40:8.047190 37:1.609438 61:14.334072 46:5.375277 ++1 2:0.000000 1:0.000000 3:0.000000 18:2.197224 17:1.098612 13:1.098612 15:1.098612 33:1.386294 40:3.218876 46:1.791759 67:3.891820 107:2.197225 124:2.197225 129:4.605170 162:4.969814 193:10.259796 ++1 3:0.000000 1:0.000000 7:4.158882 8:0.693147 15:5.493060 13:4.394448 16:1.098612 18:1.098612 33:5.545176 22:4.158882 27:2.772588 28:1.386294 40:1.609438 39:1.609438 37:1.609438 41:1.609438 ++1 3:0.000000 1:0.000000 7:4.158882 8:0.693147 15:5.493060 13:4.394448 16:1.098612 18:1.098612 33:5.545176 22:4.158882 27:2.772588 28:1.386294 40:1.609438 39:1.609438 37:1.609438 41:1.609438 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:0.693147 8:0.693147 13:2.197224 15:1.098612 33:4.158882 30:2.772588 22:2.772588 43:1.609438 46:5.375277 45:3.583518 48:1.791759 74:1.945910 ++1 2:0.000000 4:0.000000 8:1.386294 11:0.693147 19:3.295836 15:2.197224 14:1.098612 29:2.772588 25:1.386294 33:1.386294 37:3.218876 35:1.609438 54:1.791759 47:1.791759 73:1.945910 68:1.945910 ++1 3:0.000000 2:0.000000 1:0.000000 8:0.693147 11:0.693147 6:0.693147 19:3.295836 14:2.197224 15:2.197224 13:1.098612 16:1.098612 17:1.098612 21:1.098612 28:4.158882 29:2.772588 37:6.437752 ++1 3:0.000000 1:0.000000 8:0.693147 12:0.693147 14:1.098612 21:1.098612 17:1.098612 13:1.098612 15:1.098612 29:2.772588 33:1.386294 37:3.218876 40:1.609438 48:1.791759 58:1.791759 46:1.791759 ++1 3:0.000000 8:0.693147 15:2.197224 14:1.098612 67:1.945910 88:2.079442 127:2.197225 122:2.197225 163:2.484907 174:2.484907 402:3.135494 509:6.802394 8937:6.957497 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:4.158882 8:2.079441 13:2.197224 15:1.098612 14:1.098612 33:2.772588 29:1.386294 30:1.386294 34:1.609438 35:1.609438 59:1.791759 45:1.791759 ++1 2:0.000000 5:0.000000 3:0.000000 1:0.000000 4:0.000000 7:2.772588 6:2.079441 8:1.386294 11:1.386294 16:10.986120 18:8.788896 15:6.591672 19:6.591672 20:2.197224 17:1.098612 29:6.931470 ++1 1:0.000000 3:0.000000 7:2.079441 13:2.197224 17:1.098612 15:1.098612 14:1.098612 33:2.772588 26:1.386294 27:1.386294 40:3.218876 39:1.609438 35:1.609438 45:1.791759 46:1.791759 50:1.791759 ++1 3:0.000000 2:0.000000 7:3.465735 8:0.693147 6:0.693147 15:3.295836 14:1.098612 27:1.386294 31:1.386294 40:3.218876 35:1.609438 54:1.791759 61:1.791759 57:1.791759 88:4.158884 107:2.197225 ++1 3:0.000000 2:0.000000 1:0.000000 7:2.079441 8:1.386294 15:5.493060 13:4.394448 16:2.197224 18:1.098612 17:1.098612 14:1.098612 22:4.158882 33:4.158882 27:2.772588 37:6.437752 39:1.609438 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 7:0.693147 15:5.493060 13:4.394448 18:1.098612 17:1.098612 16:1.098612 14:1.098612 22:4.158882 33:4.158882 28:1.386294 27:1.386294 ++1 7:1.386294 8:0.693147 11:0.693147 13:6.591672 15:2.197224 16:1.098612 22:4.158882 33:4.158882 27:1.386294 24:1.386294 40:1.609438 37:1.609438 46:5.375277 47:1.791759 73:3.891820 82:1.945910 ++1 1:0.000000 7:0.693147 13:2.197224 15:1.098612 33:5.545176 26:1.386294 40:3.218876 39:1.609438 35:1.609438 45:1.791759 46:1.791759 47:1.791759 79:1.945910 107:2.197225 114:2.197225 149:4.795790 ++1 1:0.000000 3:0.000000 7:6.238323 8:2.772588 9:1.386294 16:2.197224 18:2.197224 17:1.098612 28:4.158882 24:2.772588 26:1.386294 33:1.386294 22:1.386294 37:11.266066 40:3.218876 43:3.218876 ++1 7:2.079441 8:1.386294 11:0.693147 13:4.394448 15:2.197224 16:1.098612 22:2.772588 33:2.772588 26:1.386294 27:1.386294 29:1.386294 24:1.386294 40:1.609438 42:1.609438 46:3.583518 47:1.791759 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:2.079441 8:2.079441 13:3.295836 15:1.098612 29:1.386294 33:1.386294 30:1.386294 34:1.609438 45:3.583518 59:1.791759 46:1.791759 79:1.945910 ++1 2:0.000000 4:0.000000 13:3.295836 14:1.098612 22:2.772588 33:2.772588 35:1.609438 46:3.583518 45:1.791759 48:1.791759 55:1.791759 78:5.837730 65:1.945910 70:1.945910 97:2.079442 114:2.197225 ++1 3:0.000000 7:6.931470 8:2.772588 15:5.493060 17:3.295836 16:3.295836 14:2.197224 29:2.772588 33:2.772588 26:2.772588 25:1.386294 28:1.386294 37:1.609438 43:1.609438 40:1.609438 48:7.167036 ++1 2:0.000000 8:1.386294 18:2.197224 15:1.098612 16:1.098612 33:2.772588 29:1.386294 38:6.437752 37:3.218876 40:1.609438 36:1.609438 60:5.375277 46:3.583518 54:1.791759 66:13.621370 77:9.729550 ++1 1:0.000000 3:0.000000 2:0.000000 7:2.079441 8:0.693147 16:6.591672 13:4.394448 17:1.098612 33:2.772588 29:1.386294 24:1.386294 37:8.047190 34:3.218876 45:1.791759 46:1.791759 49:1.791759 ++1 1:0.000000 2:0.000000 3:0.000000 7:7.624617 8:0.693147 16:6.591672 13:4.394448 14:1.098612 19:1.098612 17:1.098612 33:2.772588 29:1.386294 24:1.386294 37:11.266066 34:3.218876 40:1.609438 ++1 3:0.000000 2:0.000000 4:0.000000 7:0.693147 9:0.693147 13:2.197224 16:2.197224 14:1.098612 29:5.545176 24:2.772588 33:2.772588 30:1.386294 27:1.386294 39:1.609438 35:1.609438 45:1.791759 ++1 3:0.000000 1:0.000000 7:9.704058 8:1.386294 16:3.295836 17:2.197224 13:2.197224 21:2.197224 18:1.098612 26:4.158882 33:4.158882 27:2.772588 29:1.386294 24:1.386294 37:12.875504 40:3.218876 ++1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 7:7.624617 8:2.079441 9:0.693147 13:2.197224 15:1.098612 19:1.098612 14:1.098612 29:2.772588 33:2.772588 26:1.386294 22:1.386294 37:4.828314 ++1 3:0.000000 1:0.000000 7:4.158882 9:0.693147 17:1.098612 21:1.098612 16:1.098612 15:1.098612 33:2.772588 22:2.772588 26:1.386294 27:1.386294 40:3.218876 42:1.609438 37:1.609438 39:1.609438 ++1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 8:1.386294 7:0.693147 9:0.693147 13:4.394448 17:1.098612 15:1.098612 33:4.158882 29:4.158882 24:2.772588 27:2.772588 26:1.386294 22:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 8:1.386294 11:1.386294 7:0.693147 21:1.098612 16:1.098612 30:4.158882 22:2.772588 24:2.772588 28:1.386294 48:1.791759 70:1.945910 119:6.591675 126:4.394450 ++1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 7:5.545176 8:2.079441 9:0.693147 19:1.098612 15:1.098612 33:1.386294 30:1.386294 34:1.609438 46:1.791759 81:1.945910 73:1.945910 ++1 2:0.000000 3:0.000000 8:0.693147 6:0.693147 7:0.693147 15:3.295836 16:1.098612 14:1.098612 33:11.090352 26:2.772588 24:2.772588 27:1.386294 31:1.386294 40:6.437752 36:6.437752 42:3.218876 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 7:1.386294 8:0.693147 15:5.493060 16:4.394448 13:3.295836 18:1.098612 19:1.098612 14:1.098612 33:15.249234 24:5.545176 27:1.386294 29:1.386294 ++1 2:0.000000 4:0.000000 6:2.772588 8:1.386294 10:1.386294 18:17.577792 15:5.493060 17:2.197224 16:2.197224 13:1.098612 14:1.098612 19:1.098612 21:1.098612 33:2.772588 25:2.772588 29:1.386294 ++1 17:1.098612 148:4.795790 1853:5.010635 9217:6.957497 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:2.079441 9:0.693147 11:0.693147 12:0.693147 16:3.295836 17:1.098612 13:1.098612 14:1.098612 22:4.158882 28:2.772588 27:1.386294 26:1.386294 ++1 3:0.000000 2:0.000000 4:0.000000 8:0.693147 7:0.693147 19:1.098612 13:1.098612 16:1.098612 33:2.772588 27:1.386294 45:1.791759 46:1.791759 75:1.945910 67:1.945910 94:2.079442 89:2.079442 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:3.465735 7:1.386294 11:0.693147 6:0.693147 15:3.295836 17:1.098612 16:1.098612 14:1.098612 27:2.772588 22:1.386294 29:1.386294 33:1.386294 ++1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 7:2.772588 12:0.693147 13:6.591672 17:1.098612 15:1.098612 14:1.098612 29:4.158882 33:2.772588 26:1.386294 27:1.386294 42:1.609438 38:1.609438 ++1 2:0.000000 8:1.386294 6:0.693147 13:6.591672 18:5.493060 15:2.197224 20:2.197224 16:1.098612 33:2.772588 29:1.386294 22:1.386294 30:1.386294 25:1.386294 38:6.437752 39:1.609438 47:8.958795 ++1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 7:1.386294 6:1.386294 15:9.887508 18:6.591672 19:2.197224 14:2.197224 16:1.098612 21:1.098612 23:11.090352 28:2.772588 27:1.386294 ++1 2:0.000000 3:0.000000 4:0.000000 21:3.295836 18:3.295836 19:2.197224 16:1.098612 14:1.098612 23:5.545176 30:1.386294 28:1.386294 25:1.386294 42:1.609438 36:1.609438 35:1.609438 50:1.791759 ++1 2:0.000000 6:1.386294 10:0.693147 9:0.693147 19:2.197224 16:2.197224 21:1.098612 23:1.386294 25:1.386294 40:3.218876 36:3.218876 38:1.609438 47:1.791759 52:1.791759 50:1.791759 51:1.791759 +-1 2:0.000000 10:1.386294 11:0.693147 9:0.693147 12:0.693147 19:1.098612 21:1.098612 23:1.386294 40:1.609438 56:1.791759 65:1.945910 80:1.945910 75:1.945910 92:2.079442 123:2.197225 158:2.397895 +-1 3:0.000000 5:0.000000 2:0.000000 10:2.079441 11:1.386294 12:0.693147 6:0.693147 9:0.693147 19:1.098612 23:2.772588 31:1.386294 24:1.386294 38:1.609438 34:1.609438 56:1.791759 84:3.891820 +-1 2:0.000000 4:0.000000 10:0.693147 11:0.693147 23:2.772588 29:1.386294 39:3.218876 61:3.583518 51:1.791759 47:1.791759 49:1.791759 74:1.945910 68:1.945910 92:10.397210 89:4.158884 106:2.197225 +-1 1:0.000000 3:0.000000 10:1.386294 9:0.693147 8:0.693147 25:2.772588 23:1.386294 43:1.609438 70:1.945910 72:1.945910 92:2.079442 100:2.079442 128:2.197225 119:2.197225 109:2.197225 140:4.605170 +-1 2:0.000000 4:0.000000 5:0.000000 11:2.079441 9:2.079441 6:1.386294 8:1.386294 10:0.693147 16:6.591672 18:3.295836 15:3.295836 19:3.295836 17:1.098612 20:1.098612 25:4.158882 27:4.158882 +-1 1:0.000000 3:0.000000 117:2.197225 155:7.193685 248:2.708050 273:2.772589 281:2.772589 363:3.044522 508:3.401197 637:3.688879 695:7.522400 808:3.951244 1012:4.248495 1455:9.307920 1701:9.750394 2134:10.329572 +-1 5:0.000000 3:0.000000 2:0.000000 1:0.000000 10:2.079441 6:1.386294 7:0.693147 20:2.197224 18:2.197224 15:1.098612 24:2.772588 33:1.386294 36:3.218876 43:1.609438 39:1.609438 55:8.958795 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 6:0.693147 7:0.693147 20:5.493060 18:1.098612 24:2.772588 31:1.386294 54:1.791759 123:4.394450 158:2.397895 190:7.454721 187:2.484907 291:11.090356 +-1 2:0.000000 5:0.000000 4:0.000000 6:6.931470 12:0.693147 11:0.693147 9:0.693147 18:2.197224 17:2.197224 20:1.098612 23:4.158882 25:4.158882 29:1.386294 30:1.386294 27:1.386294 36:4.828314 +-1 2:0.000000 3:0.000000 6:6.238323 10:0.693147 18:5.493060 16:2.197224 17:1.098612 30:4.158882 25:2.772588 27:2.772588 26:1.386294 24:1.386294 36:1.609438 37:1.609438 40:1.609438 42:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 12:6.931470 10:2.772588 8:1.386294 7:0.693147 18:2.197224 15:2.197224 20:1.098612 19:1.098612 23:8.317764 27:4.158882 28:2.772588 24:1.386294 +-1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 9:2.079441 12:0.693147 11:0.693147 10:0.693147 6:0.693147 21:2.197224 20:1.098612 18:1.098612 19:1.098612 15:1.098612 14:1.098612 23:2.772588 +-1 5:0.000000 2:0.000000 1:0.000000 4:0.000000 3:0.000000 8:7.624617 10:3.465735 6:3.465735 11:0.693147 9:0.693147 16:5.493060 21:1.098612 20:1.098612 15:1.098612 14:1.098612 23:5.545176 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 11:2.079441 10:1.386294 20:3.295836 16:2.197224 21:1.098612 14:1.098612 23:4.158882 27:4.158882 28:1.386294 42:3.218876 39:1.609438 35:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 12:3.465735 6:2.772588 10:2.079441 7:2.079441 9:0.693147 16:4.394448 20:3.295836 21:1.098612 14:1.098612 23:4.158882 30:2.772588 27:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 6:6.238323 9:3.465735 7:2.772588 12:2.079441 10:0.693147 8:0.693147 16:4.394448 17:2.197224 19:2.197224 20:2.197224 14:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:3.465735 7:1.386294 20:2.197224 21:1.098612 14:1.098612 23:4.158882 27:2.772588 42:1.609438 35:1.609438 53:3.583518 57:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 6:2.079441 12:0.693147 7:0.693147 20:5.493060 17:3.295836 14:1.098612 23:4.158882 28:1.386294 37:6.437752 35:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:4.852029 8:2.772588 12:1.386294 6:0.693147 20:3.295836 14:1.098612 25:2.772588 23:2.772588 35:1.609438 57:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:9.704058 9:3.465735 10:2.772588 6:1.386294 8:1.386294 11:0.693147 12:0.693147 16:3.295836 18:1.098612 17:1.098612 14:1.098612 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 6:11.783499 7:8.317764 10:4.158882 8:3.465735 9:2.079441 17:9.887508 18:2.197224 13:1.098612 14:1.098612 30:6.931470 32:5.545176 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 9:0.693147 12:0.693147 14:2.197224 21:1.098612 23:2.772588 28:1.386294 40:1.609438 35:1.609438 53:3.583518 56:3.583518 57:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 12:1.386294 9:0.693147 11:0.693147 7:0.693147 15:3.295836 19:1.098612 20:1.098612 14:1.098612 23:4.158882 35:1.609438 53:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 10:16.635528 6:4.158882 11:0.693147 8:0.693147 7:0.693147 9:0.693147 16:5.493060 13:3.295836 15:3.295836 21:2.197224 18:1.098612 30:5.545176 28:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 6:9.010911 7:4.852029 16:9.887508 21:1.098612 20:1.098612 19:1.098612 30:8.317764 25:6.931470 26:4.158882 23:2.772588 28:2.772588 27:1.386294 24:1.386294 +-1 5:0.000000 4:0.000000 19:1.098612 14:1.098612 23:2.772588 24:1.386294 22:1.386294 42:1.609438 41:1.609438 65:1.945910 71:1.945910 84:1.945910 98:2.079442 111:2.197225 143:2.397895 146:2.397895 +-1 3:0.000000 5:0.000000 1:0.000000 2:0.000000 4:0.000000 12:0.693147 6:0.693147 19:1.098612 23:5.545176 51:1.791759 84:1.945910 118:2.197225 166:2.484907 234:2.639057 294:2.772589 331:2.944439 +-1 1:0.000000 3:0.000000 5:0.000000 13:1.098612 18:1.098612 23:2.772588 45:1.791759 65:1.945910 119:2.197225 118:2.197225 167:2.484907 171:2.484907 218:2.639057 262:5.545178 294:2.772589 380:3.091042 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 6:2.079441 12:1.386294 11:0.693147 7:0.693147 8:0.693147 18:2.197224 19:1.098612 20:1.098612 17:1.098612 23:4.158882 22:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 12:0.693147 20:1.098612 19:1.098612 18:1.098612 23:1.386294 62:1.791759 82:1.945910 248:2.708050 294:2.772589 373:3.044522 362:3.044522 1503:4.753590 9810:13.914994 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 3:0.000000 6:1.386294 7:0.693147 10:0.693147 11:0.693147 9:0.693147 20:1.098612 19:1.098612 21:1.098612 23:9.704058 24:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:1.386294 12:0.693147 16:3.295836 20:2.197224 18:2.197224 15:2.197224 30:2.772588 23:1.386294 22:1.386294 27:1.386294 42:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 10:0.693147 6:0.693147 15:2.197224 18:2.197224 17:1.098612 23:2.772588 24:1.386294 36:1.609438 53:1.791759 56:1.791759 47:1.791759 74:1.945910 79:1.945910 +-1 2:0.000000 26:1.386294 86:2.079442 2162:5.164786 2473:5.347108 4969:6.263398 9892:6.957497 9893:6.957497 9894:6.957497 9895:6.957497 9896:6.957497 9897:6.957497 9898:6.957497 9899:6.957497 9900:6.957497 +-1 3:0.000000 1:0.000000 7:1.386294 11:0.693147 19:2.197224 23:4.158882 31:2.772588 26:1.386294 45:3.583518 65:1.945910 117:4.394450 160:2.397895 204:2.564949 218:2.639057 294:2.772589 267:2.772589 +-1 1:0.000000 2:0.000000 3:0.000000 6:4.158882 12:0.693147 11:0.693147 15:1.098612 20:1.098612 18:1.098612 16:1.098612 17:1.098612 27:2.772588 22:1.386294 30:1.386294 23:1.386294 24:1.386294 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 9:2.772588 10:2.079441 7:2.079441 11:1.386294 6:0.693147 8:0.693147 15:3.295836 18:3.295836 19:2.197224 20:2.197224 17:2.197224 16:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 9:2.772588 7:1.386294 12:0.693147 19:2.197224 16:2.197224 13:1.098612 17:1.098612 14:1.098612 18:1.098612 23:6.931470 28:4.158882 31:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 8:0.693147 6:0.693147 19:1.098612 14:1.098612 23:6.931470 27:1.386294 35:1.609438 60:1.791759 47:1.791759 61:1.791759 80:3.891820 +-1 1:0.000000 2:0.000000 6:0.693147 7:0.693147 8:0.693147 26:1.386294 23:1.386294 61:1.791759 294:2.772589 435:3.218876 1936:5.010635 5005:6.263398 4969:6.263398 4952:6.263398 10036:6.957497 10037:6.957497 +-1 1:0.000000 3:0.000000 8:4.158882 6:2.079441 9:1.386294 11:0.693147 10:0.693147 7:0.693147 12:0.693147 16:4.394448 19:2.197224 23:12.476646 31:1.386294 22:1.386294 38:11.266066 36:4.828314 +-1 5:0.000000 3:0.000000 2:0.000000 4:0.000000 1:0.000000 10:4.158882 6:4.158882 9:2.772588 8:2.079441 11:1.386294 12:0.693147 16:6.591672 18:4.394448 21:1.098612 26:13.862940 23:4.158882 +-1 5:0.000000 10:1.386294 9:1.386294 11:0.693147 7:0.693147 12:0.693147 16:2.197224 20:1.098612 27:4.158882 23:2.772588 30:2.772588 56:3.583518 57:1.791759 53:1.791759 78:1.945910 75:1.945910 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 4:0.000000 11:1.386294 6:1.386294 10:0.693147 7:0.693147 19:2.197224 23:6.931470 25:1.386294 36:1.609438 35:1.609438 61:3.583518 45:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 8:1.386294 9:0.693147 7:0.693147 20:2.197224 19:1.098612 15:1.098612 31:2.772588 23:2.772588 26:1.386294 30:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 10:4.158882 11:1.386294 7:1.386294 12:0.693147 18:2.197224 19:2.197224 21:1.098612 23:6.931470 28:1.386294 25:1.386294 30:1.386294 24:1.386294 36:3.218876 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 7:4.852029 11:2.079441 10:1.386294 9:1.386294 6:1.386294 18:2.197224 26:9.704058 23:5.545176 28:5.545176 25:1.386294 52:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.772588 9:0.693147 11:0.693147 7:0.693147 18:2.197224 21:1.098612 16:1.098612 23:9.704058 30:2.772588 28:1.386294 22:1.386294 43:1.609438 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 10:2.772588 8:2.079441 9:0.693147 6:0.693147 21:3.295836 16:3.295836 25:4.158882 23:2.772588 28:2.772588 29:1.386294 27:1.386294 36:24.141570 +-1 1:0.000000 5:0.000000 18:3.295836 69:3.891820 99:2.079442 88:2.079442 120:2.197225 124:2.197225 126:2.197225 135:2.302585 134:2.302585 141:2.397895 150:2.397895 217:2.564949 251:2.708050 296:2.833213 +-1 3:0.000000 1:0.000000 8:2.079441 12:0.693147 20:2.197224 18:1.098612 15:1.098612 23:4.158882 22:1.386294 43:1.609438 39:1.609438 61:3.583518 93:6.238326 89:2.079442 92:2.079442 119:2.197225 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 10:4.158882 6:1.386294 12:0.693147 9:0.693147 8:0.693147 18:4.394448 21:1.098612 23:6.931470 29:1.386294 36:1.609438 42:1.609438 +-1 1:0.000000 5:0.000000 2:0.000000 4:0.000000 3:0.000000 11:0.693147 19:1.098612 23:4.158882 31:1.386294 62:1.791759 65:1.945910 80:1.945910 98:4.158884 124:2.197225 111:2.197225 200:2.564949 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 6:0.693147 12:0.693147 20:2.197224 15:1.098612 21:1.098612 19:1.098612 14:1.098612 16:1.098612 23:4.158882 30:1.386294 28:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 10:1.386294 9:1.386294 6:1.386294 11:1.386294 7:1.386294 16:3.295836 15:2.197224 17:1.098612 14:1.098612 23:2.772588 28:2.772588 36:1.609438 +-1 2:0.000000 9:2.079441 10:0.693147 6:0.693147 19:1.098612 16:1.098612 25:2.772588 23:1.386294 31:1.386294 27:1.386294 28:1.386294 30:1.386294 37:1.609438 42:1.609438 53:1.791759 50:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 9:0.693147 12:0.693147 10:0.693147 23:2.772588 22:1.386294 28:1.386294 68:1.945910 82:1.945910 99:2.079442 117:2.197225 159:2.397895 171:2.484907 +-1 2:0.000000 4:0.000000 10:4.158882 12:0.693147 11:0.693147 7:0.693147 6:0.693147 21:3.295836 17:1.098612 23:2.772588 29:1.386294 25:1.386294 38:4.828314 34:3.218876 43:3.218876 42:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 5:0.000000 19:1.098612 31:1.386294 23:1.386294 388:3.091042 2275:5.347108 3287:11.715866 3651:5.857933 10458:6.957497 10459:6.957497 10460:6.957497 +-1 126:2.197225 5103:6.263398 +-1 5:0.000000 2:0.000000 3:0.000000 1:0.000000 7:1.386294 10:0.693147 6:0.693147 19:1.098612 16:1.098612 14:1.098612 23:4.158882 24:2.772588 26:2.772588 38:1.609438 37:1.609438 41:1.609438 +-1 3:0.000000 10:0.693147 8:0.693147 13:1.098612 14:1.098612 23:4.158882 25:1.386294 22:1.386294 41:1.609438 45:1.791759 151:2.397895 187:2.484907 289:2.772589 379:3.091042 506:3.401197 505:3.401197 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 7:0.693147 9:0.693147 20:2.197224 21:2.197224 18:1.098612 23:5.545176 26:4.158882 30:2.772588 29:1.386294 36:1.609438 51:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 6:2.079441 11:0.693147 7:0.693147 20:4.394448 15:3.295836 18:1.098612 17:1.098612 23:4.158882 31:2.772588 30:1.386294 24:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:4.158882 11:2.079441 9:1.386294 7:1.386294 8:0.693147 20:3.295836 19:1.098612 18:1.098612 23:4.158882 42:1.609438 51:1.791759 +-1 1:0.000000 3:0.000000 288:2.772589 382:3.091042 420:3.218876 657:3.688879 2495:5.347108 10503:6.957497 10504:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 7:4.158882 12:2.079441 6:2.079441 9:2.079441 11:1.386294 8:1.386294 10:1.386294 16:4.394448 17:2.197224 20:1.098612 21:1.098612 23:6.931470 26:2.772588 +-1 2:0.000000 3:0.000000 19:1.098612 23:2.772588 29:1.386294 24:1.386294 71:1.945910 170:2.484907 235:2.639057 343:2.995732 540:3.465736 768:3.850148 1469:4.653960 1948:5.010635 2130:5.164786 10580:13.914994 +-1 2:0.000000 5:0.000000 4:0.000000 7:0.693147 12:0.693147 10:0.693147 13:1.098612 19:1.098612 21:1.098612 18:1.098612 14:1.098612 23:4.158882 28:1.386294 24:1.386294 35:1.609438 84:1.945910 +-1 2:0.000000 1:0.000000 4:0.000000 12:1.386294 9:1.386294 10:0.693147 11:0.693147 7:0.693147 15:1.098612 14:1.098612 28:2.772588 23:1.386294 26:1.386294 38:3.218876 40:3.218876 39:1.609438 +-1 1:0.000000 3:0.000000 23:1.386294 82:1.945910 10598:13.914994 10599:13.914994 10600:6.957497 10601:6.957497 10602:6.957497 10603:6.957497 +-1 5:0.000000 6:2.079441 11:0.693147 13:1.098612 14:1.098612 23:2.772588 40:1.609438 41:1.609438 51:3.583518 53:1.791759 112:2.197225 157:4.795790 179:2.484907 218:2.639057 294:2.772589 308:2.890372 +-1 1:0.000000 3:0.000000 289:2.772589 2909:5.568345 5127:6.263398 10604:13.914994 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:1.386294 11:0.693147 6:0.693147 9:0.693147 19:1.098612 16:1.098612 23:8.317764 24:1.386294 65:1.945910 76:1.945910 72:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 19:2.197224 14:1.098612 23:5.545176 31:1.386294 35:1.609438 84:1.945910 202:2.564949 218:2.639057 244:2.708050 259:2.708050 325:2.890372 338:2.944439 +-1 3:0.000000 1:0.000000 2:0.000000 6:1.386294 21:1.098612 17:1.098612 14:1.098612 22:1.386294 23:1.386294 37:1.609438 35:1.609438 62:3.583518 51:3.583518 82:1.945910 99:2.079442 88:2.079442 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 6:2.079441 12:0.693147 7:0.693147 20:3.295836 15:1.098612 23:1.386294 30:1.386294 61:1.791759 82:1.945910 86:2.079442 101:2.079442 130:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 6:3.465735 8:1.386294 12:0.693147 10:0.693147 11:0.693147 18:4.394448 20:2.197224 21:1.098612 19:1.098612 15:1.098612 14:1.098612 23:2.772588 30:1.386294 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:2.079441 10:0.693147 12:0.693147 11:0.693147 7:0.693147 8:0.693147 9:0.693147 16:3.295836 20:2.197224 21:1.098612 23:2.772588 30:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 4:0.000000 12:1.386294 10:1.386294 7:0.693147 6:0.693147 20:1.098612 18:1.098612 14:1.098612 23:5.545176 25:5.545176 30:2.772588 22:1.386294 +-1 1:0.000000 3:0.000000 5140:6.263398 +-1 1:0.000000 3:0.000000 5:0.000000 11:0.693147 18:1.098612 23:2.772588 45:1.791759 65:1.945910 119:2.197225 218:2.639057 294:2.772589 1372:4.553877 2159:5.164786 3149:5.857933 5113:6.263398 10646:6.957497 +-1 2:0.000000 4:0.000000 12:0.693147 20:1.098612 19:1.098612 23:1.386294 62:1.791759 216:2.564949 294:2.772589 373:3.044522 1277:4.465908 1376:4.553877 10652:20.872491 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:9.010911 10:6.238323 6:3.465735 11:2.079441 8:0.693147 12:0.693147 16:3.295836 13:2.197224 17:2.197224 18:2.197224 26:22.180704 +-1 21:1.098612 29:1.386294 23:1.386294 62:3.583518 217:2.564949 248:2.708050 294:2.772589 1006:4.174387 1277:4.465908 1376:4.553877 1372:4.553877 2926:5.568345 5146:6.263398 10674:13.914994 10675:6.957497 +-1 18:1.098612 23:1.386294 148:2.397895 3445:11.715866 10676:6.957497 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 12:2.079441 10:0.693147 11:0.693147 7:0.693147 19:3.295836 13:1.098612 20:1.098612 17:1.098612 23:6.931470 31:5.545176 29:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:0.693147 21:1.098612 20:1.098612 19:1.098612 18:1.098612 23:1.386294 27:1.386294 72:1.945910 95:2.079442 112:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 9:1.386294 6:1.386294 11:0.693147 8:0.693147 10:0.693147 20:2.197224 21:1.098612 19:1.098612 14:1.098612 30:4.158882 23:2.772588 31:1.386294 41:1.609438 +-1 1:0.000000 2:0.000000 3:0.000000 6:2.079441 11:1.386294 12:0.693147 18:7.690284 16:2.197224 21:2.197224 20:1.098612 17:1.098612 22:1.386294 23:1.386294 25:1.386294 34:1.609438 51:3.583518 +-1 1:0.000000 2:0.000000 12:0.693147 6:0.693147 20:2.197224 18:2.197224 23:2.772588 31:1.386294 22:1.386294 51:1.791759 78:1.945910 188:2.484907 216:2.564949 255:2.708050 473:3.295837 535:3.433987 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 6:0.693147 21:2.197224 23:2.772588 29:1.386294 36:1.609438 64:3.583518 99:2.079442 109:2.197225 189:2.484907 208:2.564949 217:2.564949 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:1.386294 8:0.693147 11:0.693147 20:3.295836 15:2.197224 21:1.098612 18:1.098612 17:1.098612 14:1.098612 22:4.158882 23:1.386294 +-1 1:0.000000 23:1.386294 448:3.258097 1956:5.010635 5094:6.263398 10805:6.957497 10806:6.957497 10807:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:0.693147 6:0.693147 20:3.295836 13:2.197224 21:2.197224 19:1.098612 18:1.098612 16:1.098612 23:5.545176 25:2.772588 31:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 7:2.079441 12:1.386294 11:1.386294 10:1.386294 9:1.386294 19:1.098612 21:1.098612 18:1.098612 23:5.545176 26:4.158882 31:1.386294 30:1.386294 +-1 1:0.000000 3:0.000000 9:0.693147 16:1.098612 17:1.098612 14:1.098612 23:1.386294 37:1.609438 41:1.609438 82:1.945910 78:1.945910 81:1.945910 99:2.079442 136:2.302585 159:2.397895 189:2.484907 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 11:1.386294 9:1.386294 8:0.693147 10:0.693147 19:1.098612 21:1.098612 18:1.098612 17:1.098612 13:1.098612 23:8.317764 31:1.386294 29:1.386294 +-1 2:0.000000 5:0.000000 11:5.545176 9:2.079441 10:2.079441 6:1.386294 12:0.693147 8:0.693147 18:3.295836 20:3.295836 16:2.197224 17:2.197224 19:1.098612 14:1.098612 23:6.931470 27:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 6:4.852029 9:2.772588 12:0.693147 11:0.693147 8:0.693147 20:3.295836 15:2.197224 18:2.197224 16:1.098612 14:1.098612 23:4.158882 30:2.772588 +-1 9:0.693147 17:1.098612 28:1.386294 47:1.791759 81:1.945910 101:2.079442 159:2.397895 158:2.397895 189:4.969814 188:2.484907 185:2.484907 222:5.278114 223:2.639057 278:2.772589 297:2.833213 326:11.561488 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 6:2.772588 9:1.386294 12:1.386294 11:0.693147 10:0.693147 19:1.098612 18:1.098612 14:1.098612 23:8.317764 28:1.386294 27:1.386294 +-1 2:0.000000 7:2.079441 18:1.098612 26:1.386294 39:1.609438 79:1.945910 93:2.079442 94:2.079442 137:2.302585 147:2.397895 170:2.484907 177:2.484907 163:2.484907 205:2.564949 194:2.564949 248:10.832200 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 7:2.079441 8:2.079441 11:0.693147 9:0.693147 6:0.693147 18:3.295836 17:1.098612 23:8.317764 24:1.386294 36:3.218876 42:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 9:0.693147 12:0.693147 8:0.693147 11:0.693147 21:1.098612 20:1.098612 19:1.098612 15:1.098612 23:4.158882 82:1.945910 66:1.945910 134:2.302585 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 10:2.079441 11:1.386294 7:0.693147 9:0.693147 23:4.158882 26:1.386294 42:1.609438 38:1.609438 43:1.609438 57:3.583518 48:1.791759 76:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 7:0.693147 8:0.693147 13:1.098612 14:1.098612 23:4.158882 29:1.386294 24:1.386294 35:1.609438 45:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:0.693147 6:0.693147 20:1.098612 15:1.098612 23:2.772588 62:1.791759 47:1.791759 81:1.945910 82:1.945910 99:2.079442 124:2.197225 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 10:4.852029 9:2.772588 12:0.693147 11:0.693147 18:2.197224 17:2.197224 21:1.098612 16:1.098612 23:9.704058 28:4.158882 25:2.772588 +-1 2:0.000000 3:0.000000 1:0.000000 6:2.079441 12:1.386294 10:1.386294 7:0.693147 8:0.693147 18:2.197224 15:1.098612 23:4.158882 26:1.386294 24:1.386294 31:1.386294 40:1.609438 38:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:2.079441 8:0.693147 10:0.693147 7:0.693147 11:0.693147 12:0.693147 18:4.394448 19:1.098612 16:1.098612 20:1.098612 24:5.545176 +-1 3:0.000000 1:0.000000 11:0.693147 14:1.098612 41:1.609438 47:1.791759 115:4.394450 118:2.197225 134:4.605170 189:2.484907 214:2.564949 196:2.564949 268:2.772589 274:2.772589 348:2.995732 405:3.135494 +-1 6:0.693147 13:3.295836 15:1.098612 22:1.386294 23:1.386294 34:1.609438 46:1.791759 65:1.945910 134:2.302585 220:2.639057 218:2.639057 2240:5.347108 +-1 4:0.000000 2:0.000000 12:1.386294 11:0.693147 9:0.693147 19:2.197224 23:5.545176 28:1.386294 34:3.218876 42:1.609438 38:1.609438 58:5.375277 56:1.791759 84:1.945910 68:1.945910 98:2.079442 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 6:4.158882 12:0.693147 11:0.693147 19:1.098612 18:1.098612 20:1.098612 23:4.158882 30:2.772588 26:2.772588 22:1.386294 37:3.218876 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:1.386294 12:0.693147 9:0.693147 10:0.693147 11:0.693147 20:2.197224 19:1.098612 18:1.098612 16:1.098612 23:4.158882 24:1.386294 +-1 2:0.000000 14:1.098612 23:1.386294 41:1.609438 218:2.639057 305:2.833213 710:3.761200 768:3.850148 3641:5.857933 4087:6.263398 11242:6.957497 11243:6.957497 11244:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 10:0.693147 6:0.693147 9:0.693147 8:0.693147 18:3.295836 20:1.098612 19:1.098612 23:2.772588 25:1.386294 29:1.386294 45:1.791759 51:1.791759 +-1 4:0.000000 6:0.693147 10:0.693147 21:1.098612 18:1.098612 29:1.386294 23:1.386294 43:1.609438 40:1.609438 57:5.375277 49:3.583518 58:1.791759 105:4.394450 127:2.197225 148:2.397895 194:5.129898 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:2.079441 6:0.693147 8:0.693147 9:0.693147 18:3.295836 20:2.197224 23:4.158882 24:1.386294 38:4.828314 37:1.609438 34:1.609438 +-1 2:0.000000 8:0.693147 14:2.197224 23:2.772588 24:1.386294 39:3.218876 41:1.609438 35:1.609438 43:1.609438 53:1.791759 84:1.945910 99:2.079442 117:4.394450 111:2.197225 114:2.197225 115:2.197225 +-1 1:0.000000 3:0.000000 5:0.000000 4:0.000000 2:0.000000 11:1.386294 6:1.386294 7:0.693147 20:2.197224 19:1.098612 15:1.098612 18:1.098612 24:5.545176 23:4.158882 31:1.386294 26:1.386294 +-1 3:0.000000 5:0.000000 14:2.197224 19:1.098612 15:1.098612 16:1.098612 24:5.545176 22:1.386294 23:1.386294 39:3.218876 37:1.609438 58:1.791759 81:3.891820 78:3.891820 82:1.945910 68:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 10:0.693147 6:0.693147 14:1.098612 40:1.609438 82:1.945910 88:2.079442 147:2.397895 258:2.708050 312:5.780744 385:3.091042 398:3.135494 557:3.496508 556:3.496508 +-1 2:0.000000 31:1.386294 29:1.386294 23:1.386294 45:1.791759 78:1.945910 65:1.945910 117:2.197225 148:2.397895 161:2.397895 176:2.484907 217:2.564949 203:2.564949 218:2.639057 531:3.433987 561:3.496508 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 9:1.386294 10:1.386294 7:1.386294 12:0.693147 6:0.693147 18:5.493060 19:3.295836 21:1.098612 20:1.098612 30:5.545176 27:5.545176 23:5.545176 +-1 2:0.000000 5:0.000000 6:2.079441 10:1.386294 7:0.693147 9:0.693147 23:5.545176 25:1.386294 38:1.609438 36:1.609438 60:8.958795 52:1.791759 62:1.791759 49:1.791759 69:5.837730 83:3.891820 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 9:0.693147 23:2.772588 43:1.609438 65:1.945910 73:1.945910 97:2.079442 112:2.197225 159:2.397895 218:2.639057 248:2.708050 +-1 1:0.000000 3:0.000000 9:0.693147 18:1.098612 22:1.386294 99:2.079442 115:4.394450 126:2.197225 109:2.197225 124:2.197225 134:2.302585 144:2.397895 170:2.484907 248:2.708050 288:2.772589 278:2.772589 +-1 2:0.000000 4:0.000000 7:2.772588 9:2.079441 10:1.386294 11:0.693147 6:0.693147 8:0.693147 12:0.693147 17:4.394448 21:2.197224 16:1.098612 25:6.931470 26:5.545176 23:4.158882 27:2.772588 +-1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 9:2.772588 7:2.772588 12:0.693147 6:0.693147 20:4.394448 19:3.295836 15:1.098612 21:1.098612 17:1.098612 18:1.098612 16:1.098612 23:9.704058 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:0.693147 19:2.197224 13:1.098612 23:4.158882 39:1.609438 47:3.583518 62:1.791759 58:1.791759 65:1.945910 84:1.945910 117:2.197225 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:2.079441 9:1.386294 10:1.386294 12:0.693147 7:0.693147 20:1.098612 19:1.098612 18:1.098612 23:6.931470 28:1.386294 27:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 6:0.693147 18:2.197224 13:1.098612 17:1.098612 23:1.386294 40:1.609438 43:1.609438 49:3.583518 76:1.945910 65:1.945910 +-1 4:0.000000 1:0.000000 5:0.000000 6:6.931470 8:1.386294 9:1.386294 12:1.386294 10:0.693147 18:8.788896 16:7.690284 17:6.591672 14:2.197224 21:1.098612 20:1.098612 26:8.317764 33:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 7:0.693147 8:0.693147 19:2.197224 15:1.098612 13:1.098612 23:2.772588 31:1.386294 26:1.386294 25:1.386294 22:1.386294 57:1.791759 59:1.791759 +-1 2:0.000000 3:0.000000 6:2.079441 11:0.693147 7:0.693147 16:8.788896 14:1.098612 27:5.545176 28:1.386294 36:1.609438 34:1.609438 35:1.609438 50:5.375277 48:3.583518 47:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 12:0.693147 23:2.772588 27:1.386294 30:1.386294 60:3.583518 72:3.891820 100:6.238326 102:2.079442 91:2.079442 118:4.394450 127:4.394450 128:2.197225 +-1 2:0.000000 3:0.000000 1:0.000000 11:2.079441 7:2.079441 10:0.693147 8:0.693147 9:0.693147 21:2.197224 16:2.197224 15:2.197224 20:1.098612 19:1.098612 17:1.098612 28:4.158882 23:1.386294 +-1 1:0.000000 3:0.000000 11524:6.957497 11525:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 12:1.386294 9:0.693147 10:0.693147 15:1.098612 20:1.098612 19:1.098612 23:2.772588 31:1.386294 29:1.386294 40:3.218876 38:1.609438 58:1.791759 62:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 12:2.079441 9:1.386294 6:1.386294 18:2.197224 21:1.098612 16:1.098612 23:2.772588 28:1.386294 36:1.609438 39:1.609438 43:1.609438 51:5.375277 52:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 23:4.158882 38:1.609438 62:1.791759 58:1.791759 65:1.945910 69:1.945910 92:2.079442 122:2.197225 151:2.397895 +-1 2:0.000000 6:5.545176 10:1.386294 12:0.693147 9:0.693147 8:0.693147 18:1.098612 16:1.098612 25:2.772588 23:1.386294 24:1.386294 36:4.828314 34:1.609438 61:3.583518 51:1.791759 56:1.791759 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 6:3.465735 8:1.386294 10:1.386294 11:0.693147 16:1.098612 23:5.545176 43:1.609438 51:7.167036 57:1.791759 78:5.837730 72:1.945910 92:6.238326 +-1 5:0.000000 2:0.000000 3:0.000000 4:0.000000 7:2.772588 10:1.386294 6:1.386294 11:0.693147 20:2.197224 15:1.098612 23:6.931470 25:4.158882 30:2.772588 26:2.772588 29:1.386294 43:4.828314 +-1 2:0.000000 4:0.000000 5:0.000000 6:7.624617 10:2.079441 9:2.079441 11:1.386294 8:0.693147 17:1.098612 19:1.098612 16:1.098612 42:1.609438 43:1.609438 51:12.542313 57:7.167036 52:5.375277 +-1 2:0.000000 1:0.000000 3:0.000000 6:3.465735 11:2.079441 9:1.386294 8:0.693147 7:0.693147 18:5.493060 20:3.295836 21:2.197224 15:2.197224 14:1.098612 23:4.158882 25:2.772588 35:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 6:3.465735 12:0.693147 8:0.693147 18:4.394448 20:3.295836 19:1.098612 15:1.098612 14:1.098612 23:2.772588 30:1.386294 34:4.828314 41:1.609438 51:1.791759 +-1 2:0.000000 4:0.000000 6:2.079441 9:0.693147 10:0.693147 7:0.693147 16:7.690284 19:2.197224 13:1.098612 21:1.098612 26:6.931470 23:2.772588 24:2.772588 30:2.772588 28:2.772588 27:2.772588 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 4:0.000000 8:0.693147 6:0.693147 11:0.693147 9:0.693147 18:3.295836 20:1.098612 19:1.098612 14:1.098612 17:1.098612 23:4.158882 31:1.386294 +-1 1:0.000000 4:0.000000 2:0.000000 3:0.000000 10:2.772588 8:2.772588 6:2.079441 11:0.693147 12:0.693147 16:5.493060 19:3.295836 21:2.197224 14:1.098612 20:1.098612 23:4.158882 27:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 9:2.079441 12:1.386294 14:2.197224 21:1.098612 23:5.545176 28:2.772588 30:1.386294 24:1.386294 38:3.218876 34:1.609438 43:1.609438 +-1 3:0.000000 1:0.000000 5:0.000000 12:1.386294 9:0.693147 20:2.197224 19:1.098612 23:5.545176 54:1.791759 55:1.791759 82:1.945910 124:2.197225 159:2.397895 273:2.772589 461:3.295837 691:3.761200 +-1 3:0.000000 2:0.000000 4:0.000000 9:2.772588 16:4.394448 14:3.295836 17:2.197224 15:1.098612 13:1.098612 19:1.098612 28:5.545176 24:2.772588 23:2.772588 40:1.609438 38:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 23:1.386294 134:2.302585 374:3.044522 409:3.135494 413:3.178054 494:3.367296 889:4.060443 1037:4.248495 2473:5.347108 2889:5.568345 5070:6.263398 11854:13.914994 +-1 16:1.098612 251:2.708050 313:2.890372 401:3.135494 443:3.258097 884:4.060443 1016:4.248495 4886:6.263398 11864:6.957497 11865:6.957497 11866:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 23:2.772588 65:1.945910 127:2.197225 130:2.302585 158:2.397895 187:2.484907 471:3.295837 818:3.951244 943:4.110874 1194:4.382027 +-1 1:0.000000 3:0.000000 41:1.609438 257:2.708050 382:3.091042 420:3.218876 884:4.060443 956:8.221748 2057:5.164786 2866:16.705035 2978:5.568345 11870:6.957497 11871:6.957497 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:2.079441 6:1.386294 12:0.693147 19:1.098612 23:4.158882 31:1.386294 25:1.386294 28:1.386294 45:3.583518 59:1.791759 74:3.891820 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 19:1.098612 18:1.098612 23:2.772588 135:2.302585 145:2.397895 217:2.564949 248:2.708050 254:2.708050 294:2.772589 313:2.890372 375:3.044522 +-1 2:0.000000 1:0.000000 9:1.386294 7:0.693147 17:1.098612 23:1.386294 28:1.386294 30:1.386294 47:3.583518 71:5.837730 101:2.079442 128:2.197225 121:2.197225 126:2.197225 117:2.197225 120:2.197225 +-1 2:0.000000 5:0.000000 4:0.000000 8:2.079441 10:1.386294 11:1.386294 9:1.386294 6:0.693147 19:2.197224 16:2.197224 21:1.098612 23:4.158882 24:1.386294 28:1.386294 27:1.386294 31:1.386294 +-1 2:0.000000 6:3.465735 10:1.386294 7:0.693147 8:0.693147 18:4.394448 20:2.197224 16:1.098612 23:1.386294 25:1.386294 30:1.386294 34:4.828314 40:1.609438 49:1.791759 61:1.791759 69:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 19:1.098612 17:1.098612 23:4.158882 26:1.386294 35:1.609438 45:1.791759 72:1.945910 97:2.079442 258:2.708050 454:3.258097 459:3.258097 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 8:0.693147 20:1.098612 18:1.098612 16:1.098612 22:1.386294 23:1.386294 37:3.218876 117:2.197225 109:2.197225 141:2.397895 178:2.484907 1344:4.553877 +-1 1:0.000000 2:0.000000 5:0.000000 4:0.000000 3:0.000000 7:2.079441 11:0.693147 6:0.693147 15:3.295836 18:2.197224 17:1.098612 20:1.098612 21:1.098612 14:1.098612 28:2.772588 23:1.386294 +-1 102:2.079442 382:3.091042 1057:4.248495 3713:5.857933 12002:6.957497 12003:6.957497 +-1 4:0.000000 2:0.000000 6:2.079441 7:0.693147 17:6.591672 23:2.772588 29:1.386294 39:1.609438 40:1.609438 41:1.609438 53:1.791759 83:1.945910 68:1.945910 69:1.945910 98:10.397210 130:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 11:1.386294 12:0.693147 10:0.693147 6:0.693147 7:0.693147 19:1.098612 18:1.098612 14:1.098612 29:1.386294 23:1.386294 35:1.609438 51:1.791759 125:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 12:0.693147 11:0.693147 14:1.098612 23:2.772588 26:1.386294 41:1.609438 50:1.791759 76:1.945910 96:2.079442 177:2.484907 433:3.218876 542:3.465736 +-1 2:0.000000 5:0.000000 7:4.158882 6:2.079441 9:0.693147 16:2.197224 18:1.098612 17:1.098612 26:4.158882 29:1.386294 23:1.386294 27:1.386294 24:1.386294 40:6.437752 38:4.828314 42:1.609438 +-1 4:0.000000 5:0.000000 3:0.000000 8:0.693147 9:0.693147 19:1.098612 21:1.098612 17:1.098612 23:1.386294 24:1.386294 27:1.386294 59:1.791759 81:1.945910 67:1.945910 114:2.197225 144:2.397895 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 12:0.693147 18:1.098612 16:1.098612 27:6.931470 30:4.158882 23:2.772588 42:1.609438 39:1.609438 68:1.945910 69:1.945910 158:2.397895 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 11:1.386294 12:0.693147 10:0.693147 9:0.693147 17:1.098612 16:1.098612 18:1.098612 27:5.545176 23:2.772588 29:2.772588 30:2.772588 +-1 3:0.000000 83:1.945910 135:2.302585 148:2.397895 313:2.890372 395:3.135494 684:3.761200 690:3.761200 2736:5.568345 4341:6.263398 +-1 3:0.000000 16:1.098612 47:1.791759 313:2.890372 344:2.995732 395:3.135494 489:3.367296 721:3.806662 5414:6.263398 5232:6.263398 +-1 1:0.000000 20:1.098612 23:1.386294 86:2.079442 130:2.302585 173:2.484907 189:2.484907 186:2.484907 258:2.708050 359:3.044522 812:3.951244 1011:8.496990 3776:5.857933 12100:27.829988 12101:6.957497 +-1 3:0.000000 1:0.000000 17:4.394448 16:3.295836 21:1.098612 22:1.386294 23:1.386294 24:1.386294 39:1.609438 56:1.791759 59:1.791759 82:1.945910 70:1.945910 81:1.945910 67:1.945910 83:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 9:2.772588 12:0.693147 10:0.693147 11:0.693147 19:1.098612 18:1.098612 23:1.386294 31:1.386294 40:1.609438 36:1.609438 60:1.791759 76:7.783640 81:3.891820 +-1 1:0.000000 5:0.000000 19:1.098612 13:1.098612 14:1.098612 23:2.772588 31:1.386294 41:1.609438 62:3.583518 65:1.945910 257:2.708050 294:5.545178 1376:4.553877 1579:4.753590 2960:5.568345 5249:6.263398 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 12:2.079441 11:1.386294 9:0.693147 6:0.693147 17:2.197224 21:1.098612 19:1.098612 14:1.098612 23:2.772588 36:1.609438 41:1.609438 57:1.791759 +-1 3:0.000000 119:2.197225 148:2.397895 242:2.708050 459:3.258097 1280:4.465908 4088:6.263398 12135:6.957497 12136:6.957497 12137:6.957497 12138:6.957497 12139:6.957497 12140:6.957497 12141:6.957497 12142:6.957497 12143:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:1.386294 7:0.693147 11:0.693147 18:2.197224 31:2.772588 23:2.772588 30:1.386294 29:1.386294 34:1.609438 38:1.609438 62:1.791759 51:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 10:2.772588 6:2.079441 9:0.693147 8:0.693147 18:2.197224 20:2.197224 19:1.098612 23:2.772588 30:2.772588 25:1.386294 28:1.386294 31:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 8:0.693147 21:2.197224 15:2.197224 13:1.098612 20:1.098612 19:1.098612 18:1.098612 23:1.386294 30:1.386294 40:1.609438 +-1 11:0.693147 110:2.197225 +-1 2:0.000000 4:0.000000 3:0.000000 7:8.317764 6:1.386294 8:0.693147 18:1.098612 20:1.098612 14:1.098612 26:2.772588 27:1.386294 30:1.386294 23:1.386294 43:1.609438 41:1.609438 50:7.167036 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 7:18.021822 6:4.852029 12:4.158882 10:3.465735 8:1.386294 9:0.693147 11:0.693147 20:2.197224 13:2.197224 16:1.098612 21:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 19:1.098612 23:5.545176 29:1.386294 31:1.386294 24:1.386294 35:1.609438 65:1.945910 218:2.639057 223:2.639057 294:2.772589 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 9:1.386294 11:0.693147 20:2.197224 16:2.197224 18:1.098612 14:1.098612 23:4.158882 29:2.772588 31:1.386294 24:1.386294 35:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 9:0.693147 6:0.693147 7:0.693147 17:3.295836 19:2.197224 21:1.098612 18:1.098612 16:1.098612 20:1.098612 23:4.158882 31:2.772588 +-1 1:0.000000 7:1.386294 6:0.693147 21:2.197224 14:1.098612 23:4.158882 26:1.386294 30:1.386294 34:1.609438 62:1.791759 87:4.158884 96:4.158884 95:2.079442 90:2.079442 99:2.079442 127:2.197225 +-1 3:0.000000 2:0.000000 5:0.000000 8:1.386294 11:0.693147 6:0.693147 7:0.693147 16:5.493060 18:2.197224 21:1.098612 25:6.931470 23:4.158882 30:4.158882 27:2.772588 28:1.386294 26:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 8:1.386294 9:0.693147 10:0.693147 7:0.693147 18:2.197224 21:1.098612 17:1.098612 16:1.098612 23:12.476646 25:1.386294 27:1.386294 38:9.656628 +-1 2:0.000000 4:0.000000 5:0.000000 6:6.931470 7:2.079441 9:2.079441 10:0.693147 16:2.197224 18:2.197224 14:1.098612 17:1.098612 23:8.317764 28:2.772588 30:1.386294 27:1.386294 36:1.609438 +-1 2:0.000000 8:3.465735 10:2.772588 9:1.386294 21:1.098612 19:1.098612 18:1.098612 23:2.772588 27:1.386294 31:1.386294 36:4.828314 40:1.609438 49:1.791759 48:1.791759 82:3.891820 80:1.945910 +-1 2:0.000000 8:1.386294 10:0.693147 16:1.098612 18:1.098612 23:2.772588 36:4.828314 56:7.167036 61:5.375277 53:3.583518 49:1.791759 60:1.791759 51:1.791759 59:1.791759 74:5.837730 72:3.891820 +-1 3:0.000000 18:3.295836 23:2.772588 24:2.772588 25:1.386294 43:1.609438 37:1.609438 68:1.945910 67:1.945910 87:2.079442 89:2.079442 127:6.591675 137:2.302585 150:2.397895 175:2.484907 256:2.708050 +-1 2:0.000000 1:0.000000 3:0.000000 10:2.772588 6:2.079441 7:1.386294 18:4.394448 21:1.098612 16:1.098612 20:1.098612 26:5.545176 23:4.158882 27:2.772588 30:2.772588 25:1.386294 42:1.609438 +-1 2:0.000000 4:0.000000 7:9.010911 8:2.079441 6:0.693147 9:0.693147 16:3.295836 28:4.158882 30:2.772588 25:1.386294 23:1.386294 27:1.386294 24:1.386294 42:9.656628 34:1.609438 60:7.167036 +-1 3:0.000000 8:3.465735 7:0.693147 23:2.772588 43:1.609438 59:1.791759 116:6.591675 139:2.302585 154:2.397895 152:2.397895 182:2.484907 163:2.484907 179:2.484907 201:7.694847 304:2.833213 616:10.832754 +-1 3:0.000000 23:1.386294 166:2.484907 235:2.639057 313:2.890372 382:3.091042 420:3.218876 2335:5.347108 12564:6.957497 +-1 10:1.386294 18:1.098612 23:1.386294 30:1.386294 36:1.609438 38:1.609438 53:1.791759 132:2.302585 258:5.416100 316:2.890372 2465:5.347108 3580:5.857933 4547:6.263398 12565:6.957497 12566:6.957497 +-1 1:0.000000 2:0.000000 3:0.000000 8:1.386294 12:1.386294 10:0.693147 7:0.693147 15:1.098612 19:1.098612 24:1.386294 43:3.218876 36:1.609438 37:1.609438 44:1.609438 68:1.945910 117:2.197225 +-1 1:0.000000 3:0.000000 7:0.693147 12:0.693147 19:1.098612 17:1.098612 29:2.772588 34:1.609438 44:1.609438 38:1.609438 63:1.791759 46:1.791759 48:1.791759 45:1.791759 47:1.791759 78:1.945910 +-1 5:0.000000 4:0.000000 2:0.000000 11:0.693147 12:0.693147 26:1.386294 22:1.386294 44:1.609438 64:7.167036 63:5.375277 95:2.079442 106:2.197225 170:2.484907 300:2.833213 388:3.091042 1060:4.248495 +-1 5:0.000000 2:0.000000 4:0.000000 7:1.386294 12:0.693147 6:0.693147 19:1.098612 21:1.098612 24:1.386294 30:1.386294 44:1.609438 50:3.583518 64:1.791759 61:1.791759 62:1.791759 48:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:2.079441 9:1.386294 6:1.386294 12:0.693147 10:0.693147 13:2.197224 16:1.098612 36:1.609438 44:1.609438 60:5.375277 63:5.375277 64:1.791759 50:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 11:3.465735 9:0.693147 10:0.693147 12:0.693147 13:2.197224 19:1.098612 17:1.098612 24:1.386294 30:1.386294 29:1.386294 36:4.828314 +-1 1:0.000000 8:0.693147 13:2.197224 15:1.098612 29:1.386294 44:1.609438 46:1.791759 96:2.079442 206:2.564949 261:2.772589 2792:22.273380 3509:11.715866 3524:5.857933 5442:12.526796 12621:6.957497 +-1 2:0.000000 5:0.000000 4:0.000000 12:1.386294 20:1.098612 31:1.386294 29:1.386294 40:3.218876 44:1.609438 63:3.583518 64:1.791759 59:1.791759 65:1.945910 73:1.945910 91:2.079442 148:2.397895 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 13:2.197224 44:1.609438 63:3.583518 59:1.791759 114:2.197225 256:2.708050 380:3.091042 569:3.555348 702:3.761200 1198:4.382027 1472:4.653960 1769:4.875197 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 9:1.386294 10:1.386294 12:0.693147 11:0.693147 6:0.693147 8:0.693147 20:2.197224 21:1.098612 18:1.098612 22:2.772588 28:1.386294 40:1.609438 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 12:0.693147 13:1.098612 38:1.609438 44:1.609438 62:3.583518 63:3.583518 61:1.791759 59:1.791759 64:1.791759 69:3.891820 72:1.945910 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:1.386294 18:1.098612 64:1.791759 61:1.791759 77:1.945910 99:2.079442 97:2.079442 87:2.079442 86:2.079442 123:2.197225 125:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 7:4.852029 9:1.386294 8:1.386294 10:0.693147 6:0.693147 21:1.098612 18:1.098612 15:1.098612 16:1.098612 14:1.098612 31:1.386294 29:1.386294 +-1 4:0.000000 5:0.000000 2:0.000000 3:0.000000 1:0.000000 9:0.693147 12:0.693147 17:2.197224 18:1.098612 15:1.098612 19:1.098612 22:1.386294 28:1.386294 30:1.386294 44:1.609438 43:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 7:11.090352 9:1.386294 6:1.386294 10:0.693147 20:2.197224 26:8.317764 27:2.772588 28:1.386294 25:1.386294 30:1.386294 38:6.437752 42:1.609438 44:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:2.079441 12:1.386294 8:1.386294 10:0.693147 21:1.098612 20:1.098612 24:2.772588 25:1.386294 22:1.386294 36:8.047190 43:1.609438 +-1 1:0.000000 3:0.000000 9:2.079441 6:0.693147 10:0.693147 12:0.693147 11:0.693147 18:2.197224 21:1.098612 14:1.098612 30:1.386294 22:1.386294 24:1.386294 44:3.218876 37:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:0.693147 7:0.693147 8:0.693147 17:2.197224 13:2.197224 22:2.772588 26:1.386294 29:1.386294 24:1.386294 44:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 9:0.693147 19:1.098612 21:1.098612 13:1.098612 14:1.098612 44:3.218876 37:1.609438 40:1.609438 41:1.609438 64:1.791759 63:1.791759 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 13:2.197224 19:1.098612 39:1.609438 42:1.609438 44:1.609438 63:7.167036 59:1.791759 71:1.945910 121:2.197225 148:2.397895 256:2.708050 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 6:1.386294 7:1.386294 19:1.098612 21:1.098612 30:1.386294 31:1.386294 44:1.609438 42:1.609438 63:3.583518 64:1.791759 47:1.791759 65:1.945910 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 9:0.693147 8:0.693147 7:0.693147 12:0.693147 19:1.098612 17:1.098612 21:1.098612 16:1.098612 15:1.098612 31:1.386294 25:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 6:1.386294 12:0.693147 11:0.693147 9:0.693147 14:1.098612 20:1.098612 21:1.098612 19:1.098612 26:1.386294 25:1.386294 41:1.609438 +-1 3:0.000000 6:0.693147 8:0.693147 17:1.098612 16:1.098612 14:1.098612 35:1.609438 44:1.609438 60:1.791759 50:1.791759 111:2.197225 118:2.197225 257:2.708050 567:3.555348 858:4.007333 1315:4.553877 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 11:0.693147 13:1.098612 44:1.609438 64:1.791759 101:2.079442 114:2.197225 119:2.197225 188:2.484907 272:2.772589 269:2.772589 327:2.890372 446:3.258097 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 10:1.386294 18:3.295836 19:2.197224 13:2.197224 24:2.772588 31:1.386294 29:1.386294 44:3.218876 35:1.609438 63:7.167036 64:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 11:1.386294 12:0.693147 8:0.693147 16:1.098612 13:1.098612 39:1.609438 38:1.609438 44:1.609438 62:5.375277 53:3.583518 63:3.583518 50:1.791759 +-1 3:0.000000 82:1.945910 78:1.945910 330:2.944439 841:4.007333 905:4.060443 1331:4.553877 1561:4.753590 2047:5.164786 3824:5.857933 12911:6.957497 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:1.386294 11:0.693147 19:1.098612 13:1.098612 22:1.386294 24:1.386294 44:1.609438 41:1.609438 63:10.750554 62:3.583518 45:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 7:5.545176 6:2.772588 10:1.386294 12:0.693147 16:2.197224 14:2.197224 21:1.098612 18:1.098612 22:8.317764 30:4.158882 26:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 9:0.693147 12:0.693147 19:1.098612 21:1.098612 13:1.098612 22:2.772588 29:1.386294 44:3.218876 36:1.609438 43:1.609438 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 6:2.772588 7:2.079441 11:0.693147 17:4.394448 13:2.197224 26:4.158882 27:1.386294 36:4.828314 39:1.609438 43:1.609438 44:1.609438 52:1.791759 +-1 6:2.079441 10:1.386294 9:1.386294 17:8.788896 18:1.098612 21:1.098612 28:1.386294 25:1.386294 36:3.218876 38:1.609438 50:1.791759 84:1.945910 80:1.945910 69:1.945910 93:2.079442 98:2.079442 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 10:3.465735 6:2.079441 8:2.079441 11:1.386294 12:0.693147 21:1.098612 16:1.098612 13:1.098612 18:1.098612 28:2.772588 26:1.386294 +-1 1:0.000000 3:0.000000 9:0.693147 44:1.609438 83:1.945910 99:2.079442 188:2.484907 199:2.564949 313:2.890372 376:3.044522 395:3.135494 684:3.761200 735:3.806662 785:3.912023 899:4.060443 1055:4.248495 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:0.693147 11:0.693147 10:0.693147 9:0.693147 20:1.098612 13:1.098612 30:1.386294 24:1.386294 29:1.386294 34:3.218876 43:1.609438 39:1.609438 +-1 1:0.000000 3:0.000000 10:0.693147 9:0.693147 11:0.693147 6:0.693147 19:1.098612 16:1.098612 17:1.098612 13:1.098612 43:1.609438 44:1.609438 51:3.583518 47:1.791759 60:1.791759 61:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 12:1.386294 6:1.386294 8:0.693147 9:0.693147 19:1.098612 17:1.098612 29:1.386294 24:1.386294 44:1.609438 63:8.958795 64:5.375277 62:5.375277 +-1 4:0.000000 3:0.000000 1:0.000000 2:0.000000 11:0.693147 8:0.693147 16:2.197224 24:2.772588 27:1.386294 41:1.609438 44:1.609438 82:1.945910 72:1.945910 78:1.945910 94:4.158884 99:2.079442 +-1 1517:4.753590 13023:6.957497 13024:6.957497 13025:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:1.386294 8:0.693147 19:1.098612 16:1.098612 13:1.098612 24:2.772588 32:1.386294 29:1.386294 44:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 9:0.693147 19:3.295836 20:2.197224 31:1.386294 28:1.386294 30:1.386294 22:1.386294 24:1.386294 44:1.609438 63:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:1.386294 12:0.693147 13:1.098612 15:1.098612 17:1.098612 16:1.098612 14:1.098612 29:1.386294 28:1.386294 27:1.386294 43:3.218876 44:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:3.465735 6:0.693147 9:0.693147 25:1.386294 38:3.218876 42:1.609438 39:1.609438 44:1.609438 51:7.167036 64:5.375277 63:5.375277 +-1 5:0.000000 2:0.000000 4:0.000000 10:2.772588 7:2.079441 6:0.693147 11:0.693147 8:0.693147 12:0.693147 19:2.197224 21:2.197224 18:2.197224 13:2.197224 30:5.545176 27:4.158882 26:4.158882 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 7:0.693147 6:0.693147 13:2.197224 42:1.609438 38:1.609438 39:1.609438 44:1.609438 63:5.375277 60:3.583518 49:1.791759 59:1.791759 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 13:2.197224 17:1.098612 38:1.609438 44:1.609438 63:3.583518 59:1.791759 71:1.945910 120:2.197225 189:2.484907 202:2.564949 198:2.564949 229:2.639057 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 10:1.386294 11:0.693147 9:0.693147 19:1.098612 31:1.386294 34:1.609438 43:1.609438 44:1.609438 63:3.583518 64:1.791759 61:1.791759 51:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 17:3.295836 22:1.386294 44:1.609438 64:3.583518 63:3.583518 45:1.791759 51:1.791759 58:1.791759 76:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:4.852029 9:2.772588 6:2.079441 8:1.386294 10:0.693147 18:1.098612 16:1.098612 14:1.098612 22:4.158882 28:1.386294 26:1.386294 +-1 5:0.000000 3:0.000000 2:0.000000 4:0.000000 1:0.000000 12:1.386294 11:0.693147 9:0.693147 7:0.693147 20:1.098612 19:1.098612 17:1.098612 15:1.098612 16:1.098612 14:1.098612 31:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 13:2.197224 39:1.609438 42:1.609438 44:1.609438 63:3.583518 59:1.791759 71:3.891820 121:2.197225 256:2.708050 408:6.270988 412:3.178054 473:3.295837 +-1 1:0.000000 3:0.000000 2:0.000000 12:0.693147 8:0.693147 19:1.098612 31:1.386294 64:1.791759 63:1.791759 99:2.079442 117:2.197225 111:2.197225 303:2.833213 420:3.218876 688:3.761200 721:3.806662 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 11:1.386294 7:0.693147 8:0.693147 19:1.098612 20:1.098612 22:1.386294 44:3.218876 63:5.375277 64:3.583518 80:1.945910 95:2.079442 110:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 9:0.693147 12:0.693147 6:0.693147 7:0.693147 11:0.693147 10:0.693147 19:2.197224 13:2.197224 21:1.098612 26:1.386294 44:1.609438 63:3.583518 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 8:2.079441 10:1.386294 11:0.693147 6:0.693147 12:0.693147 16:2.197224 13:1.098612 28:1.386294 29:1.386294 44:3.218876 42:1.609438 39:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 10:2.079441 11:2.079441 9:0.693147 21:1.098612 22:1.386294 37:3.218876 44:3.218876 63:3.583518 60:1.791759 59:1.791759 64:1.791759 86:2.079442 97:2.079442 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 12:1.386294 8:0.693147 11:0.693147 6:0.693147 9:0.693147 20:2.197224 15:1.098612 21:1.098612 19:1.098612 22:1.386294 25:1.386294 36:3.218876 +-1 3:0.000000 10:0.693147 14:1.098612 27:2.772588 30:1.386294 44:1.609438 41:1.609438 62:1.791759 63:1.791759 58:1.791759 70:3.891820 82:1.945910 89:2.079442 111:2.197225 173:2.484907 196:2.564949 +-1 1:0.000000 3:0.000000 10:0.693147 19:1.098612 21:1.098612 31:1.386294 44:1.609438 59:1.791759 63:1.791759 45:1.791759 84:1.945910 102:2.079442 187:2.484907 362:3.044522 550:3.465736 593:3.583519 +-1 3:0.000000 1:0.000000 14:1.098612 44:1.609438 41:1.609438 59:1.791759 63:1.791759 78:3.891820 97:2.079442 109:4.394450 108:2.197225 126:2.197225 146:7.193685 175:2.484907 179:2.484907 200:2.564949 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 11:1.386294 7:0.693147 12:0.693147 17:2.197224 20:1.098612 15:1.098612 14:1.098612 28:5.545176 39:1.609438 44:1.609438 41:1.609438 +-1 2:0.000000 3:0.000000 5:0.000000 9:1.386294 12:0.693147 8:0.693147 14:1.098612 27:1.386294 30:1.386294 44:4.828314 43:1.609438 39:1.609438 35:1.609438 64:3.583518 52:3.583518 61:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 28:1.386294 44:1.609438 37:1.609438 64:1.791759 63:1.791759 49:1.791759 66:3.891820 81:1.945910 70:1.945910 92:4.158884 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 6:0.693147 19:2.197224 31:1.386294 44:3.218876 34:1.609438 63:3.583518 64:1.791759 84:1.945910 188:2.484907 187:2.484907 338:2.944439 535:3.433987 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 8:0.693147 27:1.386294 30:1.386294 44:1.609438 64:3.583518 63:3.583518 59:1.791759 84:1.945910 99:2.079442 124:2.197225 216:2.564949 220:2.639057 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 18:1.098612 26:1.386294 44:1.609438 64:1.791759 63:1.791759 51:1.791759 86:2.079442 125:4.394450 166:2.484907 411:3.178054 524:3.433987 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:1.386294 10:0.693147 11:0.693147 8:0.693147 21:1.098612 19:1.098612 20:1.098612 25:4.158882 22:1.386294 44:1.609438 35:1.609438 64:3.583518 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 19:1.098612 20:1.098612 30:1.386294 29:1.386294 44:1.609438 64:5.375277 63:5.375277 82:1.945910 71:1.945910 84:1.945910 78:1.945910 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 10:0.693147 11:0.693147 12:0.693147 19:1.098612 20:1.098612 13:1.098612 29:1.386294 22:1.386294 44:3.218876 40:3.218876 43:1.609438 +-1 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:4.158882 11:2.079441 6:1.386294 7:1.386294 10:0.693147 21:6.591672 16:3.295836 13:2.197224 17:1.098612 19:1.098612 30:5.545176 28:4.158882 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 12:2.079441 10:1.386294 8:1.386294 7:0.693147 19:1.098612 14:1.098612 25:1.386294 26:1.386294 22:1.386294 29:1.386294 44:3.218876 +-1 4:0.000000 2:0.000000 1:0.000000 5:0.000000 10:1.386294 8:0.693147 7:0.693147 13:2.197224 19:1.098612 28:1.386294 38:1.609438 44:1.609438 63:12.542313 48:5.375277 64:1.791759 59:1.791759 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 12:1.386294 17:3.295836 19:1.098612 21:1.098612 15:1.098612 13:1.098612 14:1.098612 31:2.772588 26:1.386294 22:1.386294 29:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 8:2.772588 12:0.693147 21:1.098612 19:1.098612 20:1.098612 18:1.098612 16:1.098612 14:1.098612 22:1.386294 31:1.386294 30:1.386294 44:4.828314 41:1.609438 +-1 3:0.000000 1:0.000000 9:0.693147 19:1.098612 14:1.098612 28:1.386294 29:1.386294 31:1.386294 42:1.609438 44:1.609438 35:1.609438 131:2.302585 148:2.397895 186:2.484907 287:5.545178 297:5.666426 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 10:1.386294 6:0.693147 24:1.386294 22:1.386294 34:3.218876 38:1.609438 64:3.583518 63:1.791759 377:3.091042 473:3.295837 532:3.433987 1692:4.875197 +-1 2:0.000000 5:0.000000 4:0.000000 10:4.852029 8:1.386294 12:0.693147 11:0.693147 16:2.197224 20:1.098612 14:1.098612 28:2.772588 29:1.386294 22:1.386294 24:1.386294 44:3.218876 37:1.609438 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 4:0.000000 6:2.079441 12:0.693147 8:0.693147 15:2.197224 19:1.098612 18:1.098612 16:1.098612 14:1.098612 31:1.386294 29:1.386294 26:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 8:0.693147 12:0.693147 29:1.386294 22:1.386294 44:3.218876 63:5.375277 62:3.583518 59:1.791759 64:1.791759 45:1.791759 84:1.945910 173:2.484907 +-1 2:0.000000 4:0.000000 5:0.000000 6:0.693147 10:0.693147 12:0.693147 20:1.098612 14:1.098612 27:1.386294 35:1.609438 44:1.609438 64:3.583518 63:1.791759 102:2.079442 119:2.197225 158:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 12:1.386294 10:0.693147 19:2.197224 24:1.386294 26:1.386294 44:1.609438 63:7.167036 64:3.583518 59:1.791759 71:1.945910 84:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 11:1.386294 8:1.386294 12:0.693147 10:0.693147 7:0.693147 19:1.098612 13:1.098612 40:1.609438 44:1.609438 63:3.583518 45:3.583518 59:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 12:2.079441 9:0.693147 6:0.693147 14:1.098612 19:1.098612 44:4.828314 43:3.218876 41:1.609438 64:7.167036 63:7.167036 65:1.945910 94:2.079442 +-1 2:0.000000 5:0.000000 6:2.079441 11:1.386294 10:0.693147 16:1.098612 30:2.772588 53:3.583518 57:3.583518 64:1.791759 63:1.791759 52:1.791759 60:1.791759 58:1.791759 56:1.791759 80:3.891820 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 11:0.693147 19:1.098612 21:1.098612 16:1.098612 28:1.386294 30:1.386294 24:1.386294 29:1.386294 44:1.609438 60:5.375277 +-1 1:0.000000 3:0.000000 17:1.098612 99:2.079442 159:2.397895 295:2.833213 2667:11.136690 3215:11.715866 13468:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 11:0.693147 8:0.693147 12:0.693147 19:1.098612 30:1.386294 22:1.386294 29:1.386294 38:1.609438 42:1.609438 40:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 10:0.693147 8:0.693147 13:1.098612 29:2.772588 22:1.386294 28:1.386294 44:3.218876 40:1.609438 64:3.583518 63:3.583518 61:3.583518 76:1.945910 65:1.945910 +-1 4:0.000000 2:0.000000 5:0.000000 10:6.238323 7:1.386294 12:0.693147 18:2.197224 21:1.098612 25:1.386294 38:1.609438 61:10.750554 63:3.583518 64:3.583518 53:1.791759 52:1.791759 62:1.791759 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 22:1.386294 44:1.609438 41:1.609438 64:1.791759 63:1.791759 156:2.397895 491:3.367296 567:3.555348 927:4.110874 1087:4.317488 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 10:0.693147 20:1.098612 15:1.098612 14:1.098612 41:1.609438 44:1.609438 64:3.583518 63:1.791759 59:1.791759 62:1.791759 47:1.791759 99:2.079442 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:2.079441 8:1.386294 6:0.693147 10:0.693147 11:0.693147 19:1.098612 14:1.098612 22:1.386294 44:1.609438 41:1.609438 63:5.375277 +-1 5:0.000000 6:2.079441 10:1.386294 12:0.693147 30:5.545176 35:1.609438 44:1.609438 60:1.791759 63:1.791759 79:1.945910 74:1.945910 101:2.079442 215:2.564949 256:2.708050 292:8.317767 353:2.995732 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 7:2.079441 8:0.693147 12:0.693147 18:1.098612 13:1.098612 26:4.158882 29:1.386294 44:1.609438 60:8.958795 52:3.583518 62:3.583518 49:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:2.079441 12:1.386294 10:1.386294 11:0.693147 9:0.693147 8:0.693147 21:2.197224 17:2.197224 14:2.197224 19:1.098612 13:1.098612 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 13:1.098612 15:1.098612 14:1.098612 23:2.772588 22:1.386294 44:3.218876 40:3.218876 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 9:1.386294 12:0.693147 6:0.693147 8:0.693147 21:3.295836 19:1.098612 14:1.098612 24:2.772588 31:1.386294 26:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 7:1.386294 9:0.693147 12:0.693147 20:7.690284 13:2.197224 17:1.098612 21:1.098612 18:1.098612 16:1.098612 30:12.476646 24:2.772588 40:3.218876 +-1 1:0.000000 3:0.000000 4:0.000000 2:0.000000 13:2.197224 24:1.386294 44:3.218876 36:1.609438 63:5.375277 64:5.375277 65:1.945910 127:2.197225 119:2.197225 176:2.484907 287:2.772589 291:2.772589 +-1 1:0.000000 10:0.693147 9:0.693147 6:0.693147 21:1.098612 13:1.098612 26:1.386294 29:1.386294 44:1.609438 52:1.791759 59:1.791759 63:1.791759 287:2.772589 298:2.833213 406:3.135494 559:3.496508 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:6.238323 8:2.079441 12:1.386294 11:1.386294 10:0.693147 17:2.197224 13:1.098612 25:1.386294 24:1.386294 29:1.386294 44:3.218876 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 8:2.079441 12:2.079441 10:2.079441 6:0.693147 9:0.693147 13:2.197224 17:1.098612 29:1.386294 28:1.386294 25:1.386294 31:1.386294 +-1 1:0.000000 3:0.000000 11:0.693147 14:1.098612 28:1.386294 35:1.609438 44:1.609438 78:1.945910 288:2.772589 319:2.890372 376:3.044522 403:3.135494 13589:6.957497 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 11:1.386294 10:0.693147 9:0.693147 7:0.693147 6:0.693147 8:0.693147 12:0.693147 17:2.197224 21:1.098612 13:1.098612 14:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 8:0.693147 19:1.098612 13:1.098612 29:1.386294 24:1.386294 44:1.609438 63:10.750554 64:7.167036 62:3.583518 59:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:0.693147 7:0.693147 13:2.197224 19:1.098612 38:1.609438 44:1.609438 63:7.167036 48:3.583518 64:1.791759 59:1.791759 90:2.079442 +-1 5:0.000000 2:0.000000 4:0.000000 10:1.386294 11:0.693147 7:0.693147 8:0.693147 12:0.693147 21:1.098612 16:1.098612 13:1.098612 26:1.386294 29:1.386294 44:1.609438 64:3.583518 62:3.583518 +-1 2:0.000000 4:0.000000 6:4.158882 10:3.465735 12:0.693147 7:0.693147 16:6.591672 18:3.295836 21:1.098612 27:5.545176 26:2.772588 44:14.484942 36:4.828314 42:1.609438 43:1.609438 50:26.876385 +-1 3:0.000000 1:0.000000 4:0.000000 10:1.386294 11:0.693147 7:0.693147 19:4.394448 21:3.295836 26:1.386294 24:1.386294 44:1.609438 38:1.609438 36:1.609438 57:3.583518 51:1.791759 60:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 8:1.386294 10:1.386294 12:0.693147 11:0.693147 6:0.693147 9:0.693147 21:1.098612 18:1.098612 25:2.772588 27:1.386294 36:3.218876 60:3.583518 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 10:6.931470 6:6.931470 7:4.158882 11:1.386294 8:1.386294 12:0.693147 19:2.197224 26:8.317764 30:2.772588 31:1.386294 32:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:2.772588 7:2.079441 10:1.386294 12:1.386294 11:0.693147 20:1.098612 21:1.098612 15:1.098612 13:1.098612 26:1.386294 29:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 6:1.386294 10:1.386294 8:1.386294 11:0.693147 19:1.098612 13:1.098612 16:1.098612 24:6.931470 29:1.386294 42:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 10:0.693147 12:0.693147 20:2.197224 19:1.098612 13:1.098612 22:1.386294 44:4.828314 41:1.609438 63:5.375277 64:3.583518 191:2.484907 206:2.564949 215:2.564949 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 8:0.693147 10:0.693147 11:0.693147 17:1.098612 13:1.098612 14:1.098612 22:1.386294 44:1.609438 41:1.609438 63:3.583518 64:1.791759 +-1 3:0.000000 1:0.000000 9:0.693147 25:1.386294 83:1.945910 67:1.945910 114:2.197225 254:8.124150 313:5.780744 370:3.044522 395:9.406482 409:3.135494 489:3.367296 506:3.401197 632:3.637586 664:3.737670 +-1 1:0.000000 2:0.000000 3:0.000000 5:0.000000 12:0.693147 19:1.098612 31:1.386294 22:1.386294 44:1.609438 64:3.583518 63:1.791759 486:3.332205 857:4.007333 1506:4.753590 2172:5.164786 4377:6.263398 +-1 1:0.000000 3:0.000000 11:0.693147 19:1.098612 63:1.791759 59:1.791759 71:1.945910 84:1.945910 80:1.945910 78:1.945910 97:2.079442 115:4.394450 160:2.397895 217:2.564949 198:2.564949 371:3.044522 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:1.386294 8:0.693147 11:0.693147 21:2.197224 26:6.931470 36:6.437752 38:4.828314 40:1.609438 42:1.609438 44:1.609438 41:1.609438 64:3.583518 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 12:0.693147 8:0.693147 19:1.098612 26:1.386294 31:1.386294 30:1.386294 27:1.386294 36:3.218876 42:1.609438 43:1.609438 +-1 4:0.000000 2:0.000000 5:0.000000 7:2.772588 10:0.693147 6:0.693147 16:2.197224 13:2.197224 30:1.386294 37:1.609438 44:1.609438 64:5.375277 63:3.583518 75:3.891820 83:1.945910 76:1.945910 +-1 3:0.000000 1:0.000000 9:0.693147 24:1.386294 29:1.386294 44:1.609438 99:2.079442 114:2.197225 150:2.397895 160:2.397895 186:2.484907 202:2.564949 199:2.564949 217:2.564949 224:2.639057 257:2.708050 +-1 2:0.000000 4:0.000000 12:0.693147 44:1.609438 64:1.791759 86:2.079442 797:3.912023 3835:5.857933 13811:13.914994 13812:6.957497 13813:6.957497 13814:6.957497 13815:6.957497 +-1 3:0.000000 1:0.000000 6:3.465735 10:2.772588 11:2.079441 7:1.386294 9:0.693147 17:5.493060 16:3.295836 15:2.197224 28:5.545176 26:4.158882 27:2.772588 31:1.386294 38:9.656628 36:4.828314 +-1 5:0.000000 12:1.386294 44:1.609438 63:1.791759 45:1.791759 567:3.555348 13846:6.957497 13847:6.957497 13848:6.957497 13849:6.957497 13850:6.957497 +-1 2:0.000000 4:0.000000 9:1.386294 7:0.693147 12:0.693147 16:2.197224 21:1.098612 26:5.545176 30:4.158882 28:2.772588 25:1.386294 37:3.218876 44:1.609438 48:8.958795 52:3.583518 63:3.583518 +-1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 1:0.000000 10:2.079441 6:1.386294 11:1.386294 9:0.693147 12:0.693147 17:1.098612 16:1.098612 19:1.098612 20:1.098612 30:2.772588 27:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:0.693147 12:0.693147 21:1.098612 13:1.098612 15:1.098612 14:1.098612 29:1.386294 24:1.386294 44:1.609438 36:1.609438 41:1.609438 64:5.375277 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:1.386294 12:1.386294 10:0.693147 8:0.693147 29:1.386294 28:1.386294 44:4.828314 43:1.609438 34:1.609438 36:1.609438 63:7.167036 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 11:2.079441 10:1.386294 9:0.693147 21:3.295836 20:1.098612 13:1.098612 28:4.158882 30:2.772588 31:1.386294 27:1.386294 22:1.386294 +-1 3:0.000000 6:0.693147 9:0.693147 16:1.098612 22:1.386294 44:1.609438 70:1.945910 99:2.079442 111:4.394450 120:2.197225 114:2.197225 109:2.197225 171:2.484907 163:2.484907 198:7.694847 199:2.564949 +-1 2:0.000000 5:0.000000 12:0.693147 8:0.693147 13:1.098612 14:1.098612 44:3.218876 35:1.609438 64:3.583518 65:3.891820 92:2.079442 215:2.564949 287:2.772589 458:3.258097 580:3.555348 695:7.522400 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:2.079441 10:1.386294 16:1.098612 29:1.386294 43:3.218876 36:1.609438 44:1.609438 50:5.375277 53:1.791759 58:1.791759 62:1.791759 76:1.945910 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:5.545176 9:2.772588 12:0.693147 10:0.693147 6:0.693147 21:2.197224 17:1.098612 16:1.098612 31:8.317764 25:2.772588 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:2.079441 6:1.386294 12:0.693147 15:1.098612 14:1.098612 29:2.772588 22:1.386294 44:3.218876 35:1.609438 64:5.375277 63:5.375277 45:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:3.465735 6:0.693147 12:0.693147 19:1.098612 30:2.772588 31:1.386294 28:1.386294 38:8.047190 36:3.218876 44:1.609438 42:1.609438 56:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.079441 12:0.693147 8:0.693147 17:1.098612 21:1.098612 15:1.098612 14:1.098612 26:1.386294 44:1.609438 63:3.583518 64:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 6:1.386294 9:0.693147 12:0.693147 14:2.197224 13:1.098612 27:2.772588 26:1.386294 30:1.386294 28:1.386294 38:3.218876 44:3.218876 39:1.609438 35:1.609438 +-1 3:0.000000 2:0.000000 4:0.000000 11:1.386294 12:0.693147 10:0.693147 6:0.693147 7:0.693147 17:3.295836 21:2.197224 20:1.098612 14:1.098612 30:1.386294 42:1.609438 39:1.609438 43:1.609438 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 10:2.079441 9:1.386294 8:0.693147 12:0.693147 15:5.493060 21:1.098612 13:1.098612 31:5.545176 28:1.386294 29:1.386294 40:6.437752 +-1 5703:6.263398 14025:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:1.386294 10:0.693147 11:0.693147 8:0.693147 19:1.098612 16:1.098612 13:1.098612 31:1.386294 24:1.386294 29:1.386294 44:3.218876 36:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 12:1.386294 15:1.098612 18:1.098612 17:1.098612 14:1.098612 31:2.772588 40:1.609438 34:1.609438 44:1.609438 35:1.609438 63:7.167036 64:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 11:0.693147 10:0.693147 19:1.098612 44:1.609438 41:1.609438 63:5.375277 64:3.583518 80:1.945910 82:1.945910 95:2.079442 112:2.197225 +-1 16:1.098612 83:1.945910 78:1.945910 89:2.079442 122:2.197225 148:2.397895 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 14072:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 8:0.693147 7:0.693147 18:3.295836 21:2.197224 16:1.098612 19:1.098612 22:1.386294 31:1.386294 30:1.386294 43:4.828314 44:3.218876 37:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 17:2.197224 19:1.098612 20:1.098612 64:1.791759 63:1.791759 59:1.791759 78:1.945910 82:1.945910 91:2.079442 97:2.079442 216:2.564949 207:2.564949 198:2.564949 +-1 2:0.000000 3:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 10:1.386294 6:0.693147 18:2.197224 13:1.098612 14:1.098612 26:1.386294 24:1.386294 44:3.218876 34:1.609438 43:1.609438 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 12:1.386294 13:1.098612 17:1.098612 22:2.772588 29:1.386294 26:1.386294 44:1.609438 38:1.609438 37:1.609438 62:5.375277 63:5.375277 +-1 3:0.000000 1:0.000000 2:0.000000 6:5.545176 10:1.386294 7:0.693147 9:0.693147 8:0.693147 17:1.098612 18:1.098612 16:1.098612 14:1.098612 30:18.021822 27:11.090352 25:4.158882 31:1.386294 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 8:2.079441 9:2.079441 10:2.079441 11:1.386294 12:1.386294 6:0.693147 17:2.197224 21:1.098612 14:1.098612 26:1.386294 25:1.386294 29:1.386294 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 12:1.386294 8:0.693147 13:1.098612 29:1.386294 28:1.386294 44:4.828314 36:1.609438 39:1.609438 43:1.609438 63:7.167036 64:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 9:0.693147 21:1.098612 19:1.098612 18:1.098612 40:1.609438 44:1.609438 63:5.375277 64:1.791759 61:1.791759 59:1.791759 99:2.079442 118:2.197225 +-1 5:0.000000 2:0.000000 3:0.000000 4:0.000000 1:0.000000 11:1.386294 10:0.693147 12:0.693147 7:0.693147 13:1.098612 22:1.386294 44:1.609438 63:7.167036 64:5.375277 61:3.583518 57:3.583518 +-1 2:0.000000 4:0.000000 11:1.386294 7:1.386294 10:1.386294 12:0.693147 6:0.693147 19:2.197224 18:2.197224 31:1.386294 30:1.386294 27:1.386294 44:1.609438 63:3.583518 56:1.791759 64:1.791759 +-1 11:0.693147 24:4.158882 29:2.772588 40:1.609438 44:1.609438 49:1.791759 58:1.791759 81:3.891820 82:1.945910 78:1.945910 67:1.945910 114:4.394450 119:2.197225 117:2.197225 109:2.197225 135:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:0.693147 10:0.693147 8:0.693147 9:0.693147 12:0.693147 29:2.772588 22:1.386294 44:3.218876 36:1.609438 43:1.609438 64:3.583518 63:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:1.386294 8:0.693147 12:0.693147 13:1.098612 28:1.386294 25:1.386294 29:1.386294 39:1.609438 38:1.609438 44:1.609438 62:5.375277 64:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:0.693147 6:0.693147 9:0.693147 8:0.693147 25:1.386294 29:1.386294 22:1.386294 44:1.609438 64:7.167036 63:5.375277 52:1.791759 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 3897:5.857933 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 12:1.386294 9:1.386294 6:1.386294 10:1.386294 7:0.693147 8:0.693147 20:2.197224 17:1.098612 21:1.098612 31:1.386294 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 8:2.079441 12:2.079441 10:2.079441 9:0.693147 13:2.197224 16:1.098612 21:1.098612 29:1.386294 31:1.386294 25:1.386294 36:1.609438 +-1 4:0.000000 5:0.000000 1:0.000000 3:0.000000 2:0.000000 12:0.693147 19:2.197224 31:1.386294 29:1.386294 37:1.609438 44:1.609438 63:5.375277 64:3.583518 51:1.791759 59:1.791759 62:1.791759 +-1 16:1.098612 83:1.945910 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 684:11.283600 790:3.912023 796:3.912023 1337:4.553877 2555:5.347108 +-1 3:0.000000 1:0.000000 82:1.945910 99:2.079442 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:1.386294 10:0.693147 11:0.693147 29:1.386294 44:1.609438 43:1.609438 63:5.375277 64:1.791759 119:2.197225 109:2.197225 158:2.397895 146:2.397895 +-1 3:0.000000 2:0.000000 10:2.079441 6:2.079441 11:1.386294 9:1.386294 8:1.386294 18:2.197224 19:1.098612 21:1.098612 16:1.098612 13:1.098612 14:1.098612 17:1.098612 29:1.386294 38:3.218876 +-1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 4:0.000000 6:4.158882 9:2.079441 7:1.386294 12:0.693147 15:4.394448 18:4.394448 17:3.295836 21:1.098612 14:1.098612 25:1.386294 40:6.437752 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 11:1.386294 9:0.693147 10:0.693147 19:2.197224 20:1.098612 17:1.098612 14:1.098612 26:4.158882 31:1.386294 36:3.218876 41:1.609438 64:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 10:1.386294 6:0.693147 12:0.693147 17:1.098612 13:1.098612 30:2.772588 27:1.386294 36:1.609438 44:1.609438 64:5.375277 63:5.375277 85:4.158884 +-1 5:0.000000 2:0.000000 10:1.386294 6:0.693147 76:1.945910 100:4.158884 128:8.788900 123:6.591675 118:2.197225 121:2.197225 151:2.397895 183:2.484907 170:2.484907 195:2.564949 225:7.917171 236:5.278114 +-1 2:0.000000 4:0.000000 5:0.000000 7:2.079441 10:1.386294 11:0.693147 6:0.693147 20:4.394448 21:1.098612 30:5.545176 32:1.386294 28:1.386294 27:1.386294 34:3.218876 60:12.542313 58:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 6:0.693147 11:0.693147 8:0.693147 34:1.609438 39:1.609438 51:3.583518 64:1.791759 63:1.791759 56:1.791759 60:1.791759 49:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:4.852029 11:0.693147 7:0.693147 8:0.693147 17:1.098612 25:1.386294 34:1.609438 39:1.609438 51:7.167036 61:1.791759 58:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 11:1.386294 6:0.693147 8:0.693147 16:1.098612 30:6.931470 36:1.609438 39:1.609438 57:1.791759 58:1.791759 49:1.791759 76:3.891820 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:1.386294 7:0.693147 10:0.693147 8:0.693147 39:1.609438 57:1.791759 80:1.945910 123:6.591675 139:2.302585 138:2.302585 165:4.969814 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:0.693147 11:0.693147 8:0.693147 18:1.098612 30:1.386294 25:1.386294 43:1.609438 39:1.609438 61:3.583518 64:1.791759 63:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:1.386294 7:0.693147 11:0.693147 6:0.693147 19:1.098612 39:1.609438 80:5.837730 72:1.945910 100:6.238326 138:2.302585 191:4.969814 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 6:2.772588 9:1.386294 10:0.693147 8:0.693147 20:2.197224 25:1.386294 39:1.609438 61:5.375277 60:3.583518 58:3.583518 56:3.583518 53:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:0.693147 8:0.693147 21:1.098612 25:1.386294 34:1.609438 39:1.609438 56:1.791759 77:3.891820 76:1.945910 80:1.945910 69:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:12.476646 7:1.386294 10:1.386294 9:0.693147 8:0.693147 17:21.972240 20:5.493060 21:1.098612 13:1.098612 30:6.931470 25:4.158882 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:3.465735 11:1.386294 10:0.693147 9:0.693147 8:0.693147 20:2.197224 16:1.098612 17:1.098612 18:1.098612 26:2.772588 39:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:6.931470 8:1.386294 10:1.386294 7:0.693147 11:0.693147 17:1.098612 20:1.098612 27:1.386294 38:3.218876 34:1.609438 39:1.609438 60:3.583518 +-1 2:0.000000 4:0.000000 39:1.609438 76:1.945910 179:2.484907 297:2.833213 325:2.890372 567:7.110696 581:3.555348 1217:4.465908 1967:5.010635 2544:5.347108 2999:5.568345 5460:12.526796 14429:6.957497 14430:6.957497 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 12:0.693147 29:1.386294 44:3.218876 63:3.583518 62:1.791759 64:1.791759 65:3.891820 78:1.945910 109:2.197225 111:2.197225 287:2.772589 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:8.317764 10:2.772588 7:2.079441 9:0.693147 8:0.693147 12:0.693147 20:1.098612 15:1.098612 25:1.386294 32:1.386294 29:1.386294 22:1.386294 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 11:0.693147 12:0.693147 9:0.693147 17:1.098612 15:1.098612 19:1.098612 31:1.386294 29:1.386294 41:1.609438 39:1.609438 63:3.583518 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 10:1.386294 11:0.693147 8:0.693147 12:0.693147 13:1.098612 25:4.158882 29:1.386294 42:1.609438 38:1.609438 39:1.609438 43:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 10:3.465735 11:2.079441 7:0.693147 17:2.197224 21:1.098612 19:1.098612 16:1.098612 18:1.098612 22:12.476646 25:5.545176 28:2.772588 44:17.703818 39:14.484942 +-1 5:0.000000 1:0.000000 4:0.000000 2:0.000000 10:1.386294 8:0.693147 12:0.693147 13:2.197224 16:1.098612 19:1.098612 38:4.828314 44:1.609438 63:1.791759 47:1.791759 58:1.791759 45:1.791759 +-1 1:0.000000 13:2.197224 29:1.386294 44:1.609438 45:3.583518 2175:10.329572 5658:12.526796 +-1 2:0.000000 4:0.000000 11:1.386294 12:0.693147 19:1.098612 38:1.609438 44:1.609438 63:3.583518 61:1.791759 59:1.791759 75:1.945910 77:1.945910 110:2.197225 127:2.197225 139:2.302585 155:2.397895 +-1 3:0.000000 1:0.000000 117:2.197225 204:2.564949 303:2.833213 1108:4.317488 4839:6.263398 14534:6.957497 14535:6.957497 14536:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 8:0.693147 14:1.098612 40:1.609438 64:3.583518 63:3.583518 48:1.791759 76:3.891820 73:1.945910 84:1.945910 123:4.394450 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.079441 9:0.693147 6:0.693147 8:0.693147 19:1.098612 21:1.098612 31:2.772588 26:1.386294 39:1.609438 38:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 19:1.098612 13:1.098612 16:1.098612 29:1.386294 22:1.386294 24:1.386294 44:4.828314 63:8.958795 64:3.583518 45:1.791759 62:1.791759 78:1.945910 +-1 2:0.000000 3:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 10:2.079441 7:2.079441 8:0.693147 6:0.693147 13:1.098612 26:2.772588 29:1.386294 25:1.386294 36:3.218876 44:1.609438 +-1 17:2.197224 16:1.098612 28:5.545176 37:1.609438 44:1.609438 68:1.945910 91:2.079442 101:2.079442 97:2.079442 112:2.197225 134:2.302585 132:2.302585 135:2.302585 148:2.397895 188:2.484907 189:2.484907 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 10:5.545176 11:5.545176 9:4.158882 6:2.079441 8:1.386294 12:0.693147 20:2.197224 16:1.098612 13:1.098612 29:2.772588 30:1.386294 +-1 44:1.609438 62:3.583518 45:3.583518 59:1.791759 63:1.791759 192:2.484907 4376:6.263398 14608:6.957497 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 9:1.386294 11:1.386294 12:0.693147 7:0.693147 19:2.197224 17:2.197224 20:1.098612 31:1.386294 24:1.386294 38:1.609438 36:1.609438 +-1 1:0.000000 62:1.791759 3505:5.857933 14617:6.957497 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 1652:9.750394 2555:5.347108 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 9:0.693147 11:0.693147 28:1.386294 44:1.609438 63:5.375277 59:1.791759 64:1.791759 71:1.945910 81:1.945910 99:2.079442 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 5599:6.263398 +-1 1:0.000000 3:0.000000 16:1.098612 78:1.945910 114:2.197225 313:2.890372 448:3.258097 1778:4.875197 3653:5.857933 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 7:1.386294 21:1.098612 14:1.098612 31:1.386294 44:1.609438 35:1.609438 63:3.583518 64:1.791759 61:1.791759 59:1.791759 84:1.945910 82:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 6:4.158882 10:2.772588 18:1.098612 20:1.098612 30:1.386294 28:1.386294 42:1.609438 60:8.958795 56:5.375277 50:1.791759 53:1.791759 58:1.791759 57:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 10:3.465735 12:2.079441 9:1.386294 8:1.386294 11:0.693147 6:0.693147 19:1.098612 21:1.098612 13:1.098612 14:1.098612 28:1.386294 +-1 4:0.000000 2:0.000000 5:0.000000 3:0.000000 12:0.693147 6:0.693147 9:0.693147 7:0.693147 17:1.098612 15:1.098612 19:1.098612 30:1.386294 24:1.386294 41:1.609438 63:5.375277 64:3.583518 +-1 14:1.098612 44:1.609438 43:1.609438 35:1.609438 97:2.079442 159:2.397895 371:3.044522 1684:4.875197 2713:5.568345 3300:11.715866 14708:6.957497 +-1 5:0.000000 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:0.693147 12:0.693147 21:1.098612 17:1.098612 14:1.098612 40:3.218876 44:1.609438 35:1.609438 63:5.375277 64:1.791759 59:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 12:1.386294 9:1.386294 6:0.693147 8:0.693147 13:2.197224 19:1.098612 21:1.098612 17:1.098612 14:1.098612 31:1.386294 22:1.386294 +-1 5:0.000000 2:0.000000 1:0.000000 3:0.000000 12:1.386294 19:2.197224 14:1.098612 31:1.386294 35:1.609438 44:1.609438 64:5.375277 63:5.375277 81:1.945910 82:1.945910 89:8.317768 99:2.079442 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 7:1.386294 8:1.386294 10:1.386294 19:3.295836 15:1.098612 32:4.158882 31:1.386294 24:1.386294 56:1.791759 48:1.791759 78:1.945910 102:2.079442 +-1 1:0.000000 4:0.000000 6:0.693147 32:1.386294 24:1.386294 34:1.609438 49:1.791759 48:1.791759 72:1.945910 70:1.945910 90:4.158884 100:2.079442 106:2.197225 105:2.197225 152:2.397895 161:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 6:0.693147 10:0.693147 9:0.693147 19:2.197224 20:1.098612 18:1.098612 32:4.158882 30:4.158882 31:1.386294 40:1.609438 37:1.609438 58:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 6:4.158882 12:1.386294 10:1.386294 11:0.693147 7:0.693147 20:2.197224 18:2.197224 16:1.098612 14:1.098612 32:4.158882 25:2.772588 +-1 2:0.000000 4:0.000000 10:4.158882 6:1.386294 19:1.098612 18:1.098612 14:1.098612 27:1.386294 31:1.386294 29:1.386294 32:1.386294 42:1.609438 43:1.609438 35:1.609438 50:5.375277 49:3.583518 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:1.386294 8:1.386294 6:0.693147 10:0.693147 18:2.197224 24:5.545176 31:4.158882 32:2.772588 25:1.386294 37:1.609438 39:1.609438 +-1 2:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 19:1.098612 20:1.098612 15:1.098612 30:4.158882 25:2.772588 31:1.386294 32:1.386294 42:1.609438 43:1.609438 57:5.375277 58:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 9:1.386294 11:0.693147 12:0.693147 18:3.295836 20:1.098612 32:4.158882 31:1.386294 42:1.609438 38:1.609438 57:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 11:4.158882 10:2.772588 7:0.693147 12:0.693147 8:0.693147 14:1.098612 15:1.098612 13:1.098612 31:1.386294 28:1.386294 26:1.386294 27:1.386294 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 6:1.386294 12:0.693147 14:2.197224 18:2.197224 17:1.098612 13:1.098612 16:1.098612 32:4.158882 27:1.386294 30:1.386294 28:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:0.693147 9:0.693147 13:2.197224 21:2.197224 16:1.098612 17:1.098612 22:1.386294 32:1.386294 42:1.609438 45:1.791759 84:1.945910 70:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 6:0.693147 18:1.098612 32:4.158882 38:1.609438 58:3.583518 51:1.791759 76:1.945910 123:2.197225 127:2.197225 151:4.795790 244:5.416100 269:2.772589 +-1 11:1.386294 6:0.693147 17:1.098612 28:1.386294 34:1.609438 53:1.791759 69:1.945910 83:1.945910 74:1.945910 100:4.158884 116:4.394450 125:2.197225 110:2.197225 144:2.397895 176:2.484907 171:2.484907 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 6:17.328675 8:2.079441 9:1.386294 12:0.693147 10:0.693147 7:0.693147 18:5.493060 16:5.493060 20:1.098612 30:6.931470 25:4.158882 +-1 3:0.000000 1:0.000000 4:0.000000 2:0.000000 5:0.000000 9:1.386294 12:0.693147 10:0.693147 21:2.197224 18:2.197224 13:1.098612 20:1.098612 17:1.098612 32:4.158882 24:2.772588 28:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 7:2.079441 10:2.079441 9:1.386294 11:0.693147 6:0.693147 20:2.197224 21:1.098612 19:1.098612 15:1.098612 26:6.931470 +-1 1:0.000000 9:0.693147 13:1.098612 32:1.386294 62:1.791759 81:1.945910 189:7.454721 455:3.258097 460:3.295837 491:3.367296 544:3.465736 703:3.761200 892:4.060443 1177:4.382027 1264:8.931816 1425:4.653960 +-1 2:0.000000 1:0.000000 9:1.386294 12:0.693147 11:0.693147 16:2.197224 19:1.098612 21:1.098612 32:2.772588 28:2.772588 31:1.386294 22:1.386294 27:1.386294 40:3.218876 38:1.609438 59:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 11:2.079441 10:1.386294 12:0.693147 9:0.693147 18:4.394448 16:3.295836 15:3.295836 20:1.098612 21:1.098612 14:1.098612 32:2.772588 35:1.609438 53:1.791759 +-1 2:0.000000 4:0.000000 32:2.772588 934:4.110874 2351:16.041324 15045:6.957497 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 9:1.386294 11:0.693147 12:0.693147 21:2.197224 20:1.098612 32:2.772588 35:3.218876 38:1.609438 69:3.891820 67:1.945910 103:2.079442 +-1 2:0.000000 3:0.000000 19:1.098612 21:1.098612 16:1.098612 14:1.098612 32:2.772588 28:1.386294 29:1.386294 40:3.218876 39:1.609438 42:1.609438 41:1.609438 53:1.791759 71:1.945910 68:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.079441 12:1.386294 11:0.693147 10:0.693147 20:2.197224 32:2.772588 31:1.386294 24:1.386294 40:1.609438 58:1.791759 53:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 19:1.098612 17:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 35:1.609438 37:1.609438 41:1.609438 62:3.583518 103:2.079442 +-1 1:0.000000 3:0.000000 9:1.386294 19:1.098612 14:1.098612 32:1.386294 31:1.386294 35:1.609438 59:1.791759 71:1.945910 99:4.158884 132:2.302585 134:2.302585 159:2.397895 189:2.484907 248:2.708050 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 8:1.386294 12:0.693147 21:2.197224 20:1.098612 14:1.098612 32:2.772588 41:1.609438 103:2.079442 99:2.079442 90:2.079442 148:2.397895 +-1 2:0.000000 5:0.000000 4:0.000000 9:2.079441 11:1.386294 8:1.386294 12:0.693147 10:0.693147 21:2.197224 20:1.098612 17:1.098612 19:1.098612 16:1.098612 13:1.098612 28:4.158882 32:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 20:1.098612 14:1.098612 32:2.772588 35:1.609438 103:2.079442 113:2.197225 115:2.197225 134:4.605170 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 1:0.000000 6:3.465735 10:1.386294 9:1.386294 7:0.693147 21:3.295836 17:2.197224 16:1.098612 20:1.098612 14:1.098612 32:4.158882 26:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:1.386294 10:1.386294 7:0.693147 12:0.693147 11:0.693147 18:3.295836 19:2.197224 20:2.197224 17:1.098612 15:1.098612 32:2.772588 31:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 9:1.386294 12:0.693147 8:0.693147 11:0.693147 19:1.098612 20:1.098612 17:1.098612 32:2.772588 24:2.772588 22:1.386294 43:1.609438 35:1.609438 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 9:1.386294 11:1.386294 10:0.693147 19:2.197224 20:1.098612 13:1.098612 16:1.098612 32:5.545176 31:2.772588 29:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 32:1.386294 103:2.079442 119:2.197225 186:2.484907 217:2.564949 219:2.639057 248:2.708050 274:2.772589 307:2.833213 410:3.135494 558:3.496508 794:7.824046 959:4.110874 +-1 2:0.000000 3:0.000000 5:0.000000 1:0.000000 4:0.000000 11:0.693147 6:0.693147 9:0.693147 12:0.693147 18:1.098612 20:1.098612 13:1.098612 32:5.545176 27:1.386294 38:3.218876 58:1.791759 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 12:1.386294 9:0.693147 17:2.197224 16:1.098612 19:1.098612 14:1.098612 31:1.386294 32:1.386294 39:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:6.931470 9:2.079441 10:1.386294 12:0.693147 8:0.693147 18:2.197224 21:2.197224 16:2.197224 20:1.098612 15:1.098612 32:2.772588 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:0.693147 7:0.693147 19:2.197224 20:1.098612 18:1.098612 13:1.098612 14:1.098612 31:2.772588 32:2.772588 26:1.386294 35:1.609438 +-1 3:0.000000 32:1.386294 3379:5.857933 15271:6.957497 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 9:1.386294 11:0.693147 8:0.693147 17:2.197224 21:2.197224 13:1.098612 18:1.098612 16:1.098612 32:2.772588 22:2.772588 31:1.386294 +-1 1:0.000000 5:0.000000 2:0.000000 10:2.772588 12:0.693147 11:0.693147 9:0.693147 7:0.693147 8:0.693147 18:4.394448 21:2.197224 17:2.197224 20:1.098612 13:1.098612 14:1.098612 32:4.158882 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 6:8.317764 9:2.079441 12:0.693147 10:0.693147 16:3.295836 21:2.197224 17:2.197224 20:1.098612 19:1.098612 18:1.098612 32:6.931470 24:4.158882 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:2.079441 12:0.693147 11:0.693147 18:4.394448 20:1.098612 19:1.098612 14:1.098612 32:4.158882 31:1.386294 28:1.386294 24:1.386294 +-1 1:0.000000 5:0.000000 2:0.000000 3:0.000000 4:0.000000 8:5.545176 7:2.079441 10:1.386294 16:2.197224 19:2.197224 20:1.098612 13:1.098612 21:1.098612 32:4.158882 30:2.772588 22:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:0.693147 8:0.693147 10:0.693147 16:1.098612 28:1.386294 24:1.386294 35:1.609438 51:1.791759 48:1.791759 125:2.197225 139:2.302585 305:2.833213 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 21:1.098612 32:4.158882 22:1.386294 65:1.945910 82:1.945910 103:2.079442 160:2.397895 260:2.708050 301:2.833213 502:3.401197 806:3.912023 +-1 2:0.000000 4:0.000000 12:0.693147 9:0.693147 10:0.693147 11:0.693147 15:1.098612 32:4.158882 27:1.386294 38:1.609438 43:1.609438 47:1.791759 57:1.791759 60:1.791759 45:1.791759 65:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:3.465735 11:2.772588 12:0.693147 7:0.693147 6:0.693147 21:2.197224 13:1.098612 18:1.098612 17:1.098612 32:2.772588 30:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 11:0.693147 30:1.386294 22:1.386294 27:1.386294 43:1.609438 60:1.791759 96:2.079442 160:2.397895 181:2.484907 163:2.484907 197:2.564949 224:2.639057 +-1 3:0.000000 1:0.000000 10:1.386294 8:0.693147 9:0.693147 15:1.098612 24:1.386294 32:1.386294 70:1.945910 93:2.079442 159:2.397895 163:2.484907 203:2.564949 225:2.639057 280:2.772589 375:3.044522 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 5:0.000000 10:0.693147 11:0.693147 32:1.386294 36:1.609438 62:1.791759 61:1.791759 57:1.791759 70:1.945910 103:2.079442 96:2.079442 147:2.397895 165:2.484907 231:2.639057 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:0.693147 9:0.693147 6:0.693147 12:0.693147 20:1.098612 22:1.386294 32:1.386294 24:1.386294 76:1.945910 69:1.945910 103:2.079442 138:2.302585 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:2.079441 11:1.386294 10:0.693147 12:0.693147 7:0.693147 20:3.295836 17:2.197224 15:2.197224 14:1.098612 19:1.098612 32:6.931470 +-1 3:0.000000 4:0.000000 2:0.000000 5:0.000000 1:0.000000 7:2.079441 11:2.079441 9:2.079441 12:0.693147 10:0.693147 16:4.394448 17:2.197224 18:2.197224 21:1.098612 20:1.098612 14:1.098612 +-1 1:0.000000 12:0.693147 32:1.386294 43:1.609438 41:1.609438 68:1.945910 96:2.079442 156:2.397895 193:2.564949 466:6.591674 562:3.555348 1113:4.317488 1216:13.397724 1447:4.653960 1943:5.010635 3054:5.568345 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:0.693147 10:0.693147 7:0.693147 9:0.693147 16:3.295836 13:2.197224 20:1.098612 17:1.098612 21:1.098612 32:2.772588 26:1.386294 25:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 6:3.465735 9:1.386294 12:0.693147 11:0.693147 21:3.295836 20:1.098612 19:1.098612 18:1.098612 25:5.545176 32:4.158882 31:2.772588 28:1.386294 27:1.386294 +-1 1:0.000000 3:0.000000 1482:4.753590 15527:6.957497 +-1 2:0.000000 4:0.000000 11:0.693147 17:2.197224 18:1.098612 20:1.098612 28:2.772588 32:1.386294 124:4.394450 129:2.302585 161:2.397895 171:4.969814 163:2.484907 215:2.564949 216:2.564949 220:2.639057 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 6:2.772588 10:1.386294 12:0.693147 11:0.693147 9:0.693147 7:0.693147 13:1.098612 20:1.098612 21:1.098612 16:1.098612 32:4.158882 26:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 12:0.693147 20:1.098612 13:1.098612 16:1.098612 32:2.772588 35:1.609438 65:1.945910 260:2.708050 241:2.708050 367:3.044522 618:7.221836 1836:5.010635 +-1 2:0.000000 5:0.000000 4:0.000000 11:0.693147 9:0.693147 7:0.693147 21:2.197224 18:2.197224 19:1.098612 17:1.098612 32:2.772588 30:2.772588 31:1.386294 27:1.386294 42:1.609438 84:1.945910 +-1 1:0.000000 5:0.000000 2:0.000000 4:0.000000 3:0.000000 12:0.693147 10:0.693147 13:2.197224 19:1.098612 20:1.098612 15:1.098612 14:1.098612 32:5.545176 22:1.386294 43:1.609438 41:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 10:4.158882 7:4.158882 9:2.079441 6:2.079441 11:2.079441 12:1.386294 13:2.197224 20:1.098612 18:1.098612 16:1.098612 21:1.098612 26:11.090352 28:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 6:3.465735 10:1.386294 11:0.693147 8:0.693147 18:2.197224 16:2.197224 13:1.098612 21:1.098612 20:1.098612 30:16.635528 26:4.158882 32:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 7:0.693147 9:0.693147 11:0.693147 21:2.197224 20:1.098612 32:4.158882 26:1.386294 25:1.386294 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 10:1.386294 12:0.693147 11:0.693147 8:0.693147 18:2.197224 17:1.098612 16:1.098612 32:2.772588 24:1.386294 22:1.386294 68:1.945910 83:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 6:0.693147 12:0.693147 9:0.693147 21:2.197224 32:1.386294 50:1.791759 61:1.791759 58:1.791759 84:1.945910 89:2.079442 94:2.079442 117:2.197225 +-1 1:0.000000 3:0.000000 382:3.091042 5842:12.526796 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 11:1.386294 10:0.693147 24:1.386294 32:1.386294 58:1.791759 119:2.197225 147:4.795790 161:2.397895 152:2.397895 144:2.397895 178:4.969814 188:2.484907 +-1 2:0.000000 4:0.000000 5:0.000000 20:1.098612 32:2.772588 41:1.609438 65:1.945910 103:2.079442 148:2.397895 260:2.708050 257:2.708050 1174:8.764054 3532:5.857933 5898:12.526796 15697:6.957497 15698:6.957497 +-1 1:0.000000 3:0.000000 1580:4.753590 15699:6.957497 15700:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:3.465735 10:1.386294 12:1.386294 11:0.693147 18:2.197224 20:2.197224 19:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 27:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 12:0.693147 20:1.098612 32:4.158882 76:1.945910 103:2.079442 1292:4.465908 15731:6.957497 15732:6.957497 15733:6.957497 15734:6.957497 +-1 3:0.000000 1:0.000000 29:1.386294 297:2.833213 15736:6.957497 15737:6.957497 15738:6.957497 15739:6.957497 15740:6.957497 15741:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:0.693147 12:0.693147 11:0.693147 9:0.693147 7:0.693147 19:1.098612 20:1.098612 30:5.545176 32:4.158882 31:2.772588 28:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 28:1.386294 26:1.386294 39:1.609438 81:1.945910 103:2.079442 171:2.484907 208:2.564949 355:2.995732 376:3.044522 +-1 5:0.000000 3:0.000000 4:0.000000 1:0.000000 10:1.386294 7:1.386294 12:0.693147 8:0.693147 20:3.295836 21:1.098612 16:1.098612 14:1.098612 30:12.476646 32:6.931470 31:1.386294 41:1.609438 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 3:0.000000 9:1.386294 12:0.693147 6:0.693147 10:0.693147 20:2.197224 17:2.197224 21:1.098612 32:2.772588 28:1.386294 38:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 7:0.693147 20:1.098612 13:1.098612 32:4.158882 22:1.386294 43:1.609438 40:1.609438 45:1.791759 61:1.791759 49:1.791759 65:1.945910 69:1.945910 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 9:2.079441 6:2.079441 10:0.693147 7:0.693147 12:0.693147 20:1.098612 13:1.098612 21:1.098612 16:1.098612 32:5.545176 25:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:5.545176 10:2.772588 8:2.079441 9:1.386294 20:4.394448 18:3.295836 19:2.197224 21:2.197224 17:1.098612 30:13.862940 25:5.545176 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 6:11.783499 10:2.079441 7:1.386294 11:0.693147 9:0.693147 19:3.295836 21:1.098612 18:1.098612 20:1.098612 16:1.098612 26:5.545176 32:4.158882 +-1 3:0.000000 5:0.000000 6:2.079441 7:1.386294 8:1.386294 17:2.197224 18:1.098612 21:1.098612 19:1.098612 31:1.386294 22:1.386294 34:4.828314 39:3.218876 38:1.609438 62:8.958795 49:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 7:2.772588 12:0.693147 6:0.693147 11:0.693147 20:3.295836 19:1.098612 26:5.545176 32:4.158882 28:2.772588 25:1.386294 30:1.386294 22:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 9:0.693147 20:1.098612 19:1.098612 32:2.772588 31:1.386294 28:1.386294 84:1.945910 81:1.945910 103:2.079442 99:2.079442 +-1 3:0.000000 10:0.693147 32:1.386294 80:1.945910 278:2.772589 436:3.218876 703:11.283600 1044:4.248495 1777:4.875197 2036:5.164786 5970:12.526796 15905:6.957497 15906:6.957497 15907:6.957497 15908:6.957497 15909:6.957497 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 12:2.079441 7:0.693147 14:1.098612 19:1.098612 18:1.098612 32:6.931470 29:2.772588 31:1.386294 24:1.386294 39:1.609438 45:3.583518 59:1.791759 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 7:3.465735 9:2.772588 6:2.079441 12:0.693147 8:0.693147 20:2.197224 21:2.197224 18:1.098612 17:1.098612 13:1.098612 30:6.931470 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:2.079441 6:1.386294 11:0.693147 12:0.693147 18:1.098612 17:1.098612 20:1.098612 32:2.772588 27:1.386294 26:1.386294 25:1.386294 +-1 3:0.000000 2:0.000000 1:0.000000 8:2.079441 10:0.693147 9:0.693147 11:0.693147 18:3.295836 21:3.295836 16:3.295836 17:2.197224 14:2.197224 19:1.098612 24:5.545176 22:2.772588 25:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:7.624617 9:2.079441 12:1.386294 20:3.295836 16:2.197224 19:1.098612 17:1.098612 32:8.317764 30:4.158882 25:2.772588 26:2.772588 +-1 2:0.000000 4:0.000000 14:1.098612 32:1.386294 35:1.609438 57:3.583518 53:1.791759 60:1.791759 69:3.891820 74:1.945910 95:2.079442 144:2.397895 175:2.484907 167:2.484907 213:2.564949 246:2.708050 +-1 2:0.000000 5:0.000000 4:0.000000 9:1.386294 12:0.693147 10:0.693147 11:0.693147 19:1.098612 17:1.098612 32:4.158882 31:1.386294 84:1.945910 75:1.945910 86:2.079442 94:2.079442 134:2.302585 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 7:2.079441 12:1.386294 10:1.386294 11:0.693147 9:0.693147 19:2.197224 31:2.772588 32:2.772588 24:1.386294 42:4.828314 36:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 11:2.079441 10:0.693147 6:0.693147 9:0.693147 12:0.693147 21:2.197224 17:2.197224 15:1.098612 14:1.098612 24:6.931470 32:2.772588 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 117:2.197225 2210:10.329572 3065:11.136690 3675:5.857933 16068:6.957497 16069:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:0.693147 20:2.197224 13:2.197224 15:1.098612 14:1.098612 32:2.772588 22:1.386294 25:1.386294 24:1.386294 45:1.791759 82:1.945910 +-1 4:0.000000 3:0.000000 10:0.693147 9:0.693147 8:0.693147 21:1.098612 17:1.098612 22:1.386294 38:1.609438 48:5.375277 59:1.791759 80:1.945910 81:1.945910 102:4.158884 94:2.079442 97:2.079442 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 8:3.465735 10:2.079441 9:1.386294 12:0.693147 11:0.693147 16:3.295836 18:2.197224 20:1.098612 13:1.098612 21:1.098612 17:1.098612 +-1 4:0.000000 5:0.000000 2:0.000000 1:0.000000 11:0.693147 8:0.693147 12:0.693147 13:2.197224 17:2.197224 20:1.098612 19:1.098612 14:1.098612 32:2.772588 31:2.772588 25:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 9:1.386294 12:0.693147 10:0.693147 6:0.693147 11:0.693147 18:2.197224 20:1.098612 17:1.098612 13:1.098612 14:1.098612 32:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:1.386294 11:0.693147 17:1.098612 32:2.772588 28:2.772588 27:1.386294 24:1.386294 22:1.386294 37:1.609438 39:1.609438 62:1.791759 49:1.791759 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 6:2.772588 11:1.386294 9:0.693147 12:0.693147 19:1.098612 21:1.098612 17:1.098612 18:1.098612 20:1.098612 32:5.545176 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 9:1.386294 10:0.693147 19:3.295836 16:3.295836 17:2.197224 20:2.197224 21:1.098612 18:1.098612 31:2.772588 32:1.386294 28:1.386294 +-1 2:0.000000 4:0.000000 9:0.693147 6:0.693147 32:1.386294 38:1.609438 34:1.609438 52:1.791759 81:3.891820 67:3.891820 92:2.079442 91:2.079442 90:2.079442 125:2.197225 115:2.197225 139:2.302585 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 452:3.258097 860:8.014666 1189:4.382027 1870:10.021270 16200:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:4.158882 103:2.079442 3332:17.573799 3333:5.857933 5945:6.263398 +-1 4:0.000000 5:0.000000 1:0.000000 2:0.000000 3:0.000000 10:1.386294 7:1.386294 8:1.386294 12:0.693147 11:0.693147 20:1.098612 32:2.772588 26:1.386294 24:1.386294 41:1.609438 50:3.583518 +-1 99:2.079442 363:3.044522 567:3.555348 732:3.806662 808:3.951244 841:4.007333 1056:4.248495 1192:4.382027 2540:5.347108 16230:6.957497 16231:6.957497 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 12:0.693147 6:0.693147 20:1.098612 13:1.098612 16:1.098612 32:2.772588 28:1.386294 38:3.218876 34:1.609438 43:1.609438 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 1:0.000000 6:6.931470 10:2.079441 9:1.386294 8:0.693147 18:6.591672 16:4.394448 21:2.197224 20:1.098612 19:1.098612 32:13.862940 28:1.386294 +-1 3:0.000000 8:0.693147 18:4.394448 14:2.197224 16:1.098612 19:1.098612 32:12.476646 26:8.317764 41:1.609438 52:3.583518 48:3.583518 56:1.791759 47:1.791759 74:11.675460 77:5.837730 84:1.945910 +-1 4:0.000000 2:0.000000 1:0.000000 3:0.000000 5:0.000000 6:2.079441 9:1.386294 7:1.386294 10:0.693147 11:0.693147 20:1.098612 21:1.098612 19:1.098612 32:4.158882 30:4.158882 28:2.772588 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 188:2.484907 171:2.484907 260:2.708050 376:3.044522 703:7.522400 997:4.174387 2159:5.164786 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 6:15.249234 9:1.386294 12:0.693147 11:0.693147 17:3.295836 19:2.197224 20:1.098612 18:1.098612 32:4.158882 30:4.158882 25:2.772588 26:1.386294 +-1 2:0.000000 4:0.000000 14:1.098612 32:2.772588 232:2.639057 305:2.833213 640:3.688879 1705:9.750394 2765:11.136690 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 20:1.098612 32:1.386294 43:1.609438 103:2.079442 90:2.079442 117:2.197225 160:2.397895 186:2.484907 323:2.890372 401:3.135494 +-1 5:0.000000 3:0.000000 4:0.000000 1:0.000000 2:0.000000 9:1.386294 8:1.386294 11:1.386294 7:0.693147 18:8.788896 21:2.197224 17:1.098612 32:5.545176 30:2.772588 29:1.386294 45:3.583518 +-1 3:0.000000 2:0.000000 9:1.386294 11:0.693147 18:2.197224 19:1.098612 21:1.098612 24:1.386294 43:1.609438 58:1.791759 59:1.791759 84:1.945910 78:1.945910 85:2.079442 111:2.197225 115:2.197225 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:1.386294 9:0.693147 19:1.098612 20:1.098612 14:1.098612 32:2.772588 29:1.386294 43:1.609438 35:1.609438 103:2.079442 237:2.639057 273:2.772589 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 12:0.693147 9:0.693147 20:1.098612 18:1.098612 21:1.098612 14:1.098612 32:4.158882 31:4.158882 28:2.772588 24:1.386294 35:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 10:0.693147 11:0.693147 8:0.693147 12:0.693147 21:3.295836 20:2.197224 32:4.158882 31:1.386294 25:1.386294 43:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 6:1.386294 11:1.386294 9:0.693147 12:0.693147 10:0.693147 18:2.197224 20:1.098612 32:2.772588 25:1.386294 23:1.386294 37:1.609438 +-1 1:0.000000 3:0.000000 13:1.098612 32:1.386294 24:1.386294 62:1.791759 95:2.079442 132:2.302585 178:2.484907 220:2.639057 289:2.772589 300:2.833213 376:3.044522 423:3.218876 451:3.258097 573:3.555348 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 9:1.386294 10:0.693147 11:0.693147 7:0.693147 12:0.693147 18:1.098612 17:1.098612 16:1.098612 21:1.098612 14:1.098612 20:1.098612 +-1 2:0.000000 4:0.000000 6:3.465735 12:0.693147 9:0.693147 10:0.693147 11:0.693147 21:1.098612 16:1.098612 17:1.098612 32:1.386294 42:1.609438 34:1.609438 51:1.791759 61:1.791759 58:1.791759 +-1 78:1.945910 129:2.302585 148:2.397895 235:2.639057 313:2.890372 365:3.044522 393:3.135494 684:7.522400 1601:4.753590 2914:5.568345 3680:5.857933 16438:6.957497 16439:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 6:9.704058 9:5.545176 12:0.693147 10:0.693147 16:4.394448 17:2.197224 20:1.098612 19:1.098612 18:1.098612 30:5.545176 32:2.772588 28:2.772588 31:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:1.386294 11:1.386294 9:1.386294 6:1.386294 10:0.693147 7:0.693147 21:2.197224 19:1.098612 14:1.098612 28:4.158882 32:2.772588 42:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:4.158882 45:1.791759 58:1.791759 73:1.945910 67:1.945910 103:2.079442 94:2.079442 158:2.397895 150:2.397895 161:2.397895 184:2.484907 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 6:1.386294 12:0.693147 8:0.693147 20:1.098612 17:1.098612 32:4.158882 25:1.386294 39:1.609438 60:5.375277 57:3.583518 50:1.791759 84:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:1.386294 9:1.386294 12:0.693147 10:0.693147 11:0.693147 17:4.394448 20:1.098612 21:1.098612 15:1.098612 18:1.098612 14:1.098612 +-1 1:0.000000 3:0.000000 8:0.693147 10:0.693147 11:0.693147 489:3.367296 518:3.401197 1009:4.174387 1104:8.634976 2081:10.329572 5625:6.263398 4971:6.263398 +-1 5:0.000000 2:0.000000 4:0.000000 9:4.852029 7:2.079441 10:1.386294 12:0.693147 6:0.693147 17:5.493060 19:3.295836 20:2.197224 16:1.098612 18:1.098612 14:1.098612 30:6.931470 32:5.545176 +-1 1:0.000000 3:0.000000 6:0.693147 18:2.197224 16:1.098612 24:5.545176 32:2.772588 28:2.772588 26:1.386294 25:1.386294 38:1.609438 49:3.583518 52:1.791759 58:1.791759 79:1.945910 68:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 9:1.386294 12:0.693147 10:0.693147 6:0.693147 8:0.693147 11:0.693147 20:3.295836 19:1.098612 14:1.098612 32:11.090352 34:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 1:0.000000 9:4.158882 11:3.465735 6:3.465735 12:1.386294 10:1.386294 8:0.693147 21:3.295836 14:3.295836 18:2.197224 19:1.098612 16:1.098612 +-1 5:0.000000 2:0.000000 6:0.693147 19:1.098612 21:1.098612 32:4.158882 31:1.386294 25:1.386294 110:2.197225 115:2.197225 153:2.397895 216:2.564949 242:5.416100 327:5.780744 730:3.806662 799:3.912023 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:1.386294 12:0.693147 9:0.693147 11:0.693147 20:1.098612 21:1.098612 32:4.158882 28:1.386294 39:4.828314 38:1.609438 74:1.945910 103:2.079442 +-1 1:0.000000 17:2.197224 32:1.386294 765:3.850148 850:4.007333 1499:4.753590 1683:4.875197 1876:10.021270 2177:5.164786 2885:5.568345 3589:5.857933 3338:5.857933 16606:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:2.079441 8:1.386294 11:1.386294 6:0.693147 7:0.693147 9:0.693147 13:3.295836 21:2.197224 20:1.098612 16:1.098612 32:2.772588 30:2.772588 +-1 3:0.000000 9:0.693147 21:1.098612 32:1.386294 31:1.386294 216:2.564949 617:3.610918 1585:4.753590 5864:18.790194 5865:12.526796 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 9:0.693147 10:0.693147 11:0.693147 12:0.693147 20:3.295836 18:1.098612 13:1.098612 32:4.158882 34:1.609438 43:1.609438 60:1.791759 45:1.791759 +-1 1:0.000000 4:0.000000 3:0.000000 5:0.000000 10:1.386294 9:0.693147 11:0.693147 6:0.693147 21:2.197224 20:1.098612 19:1.098612 13:1.098612 18:1.098612 32:4.158882 31:1.386294 24:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 11:1.386294 6:1.386294 12:0.693147 9:0.693147 10:0.693147 20:4.394448 13:1.098612 19:1.098612 32:4.158882 31:1.386294 30:1.386294 38:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:0.693147 6:0.693147 18:2.197224 19:1.098612 20:1.098612 21:1.098612 17:1.098612 16:1.098612 32:4.158882 24:4.158882 31:1.386294 34:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 11:1.386294 9:0.693147 13:1.098612 19:1.098612 20:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 10:2.079441 6:2.079441 11:0.693147 7:0.693147 9:0.693147 20:2.197224 18:2.197224 21:1.098612 19:1.098612 15:1.098612 28:5.545176 +-1 2:0.000000 4:0.000000 6:2.079441 10:0.693147 11:0.693147 7:0.693147 8:0.693147 18:2.197224 21:1.098612 14:1.098612 26:4.158882 32:2.772588 25:1.386294 28:1.386294 31:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 11:1.386294 6:1.386294 10:0.693147 18:4.394448 20:1.098612 13:1.098612 21:1.098612 16:1.098612 19:1.098612 32:4.158882 22:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:1.386294 14:1.098612 24:2.772588 25:1.386294 32:1.386294 36:1.609438 41:1.609438 103:6.238326 85:4.158884 88:2.079442 100:2.079442 204:2.564949 +-1 5:0.000000 2:0.000000 4:0.000000 6:1.386294 10:0.693147 11:0.693147 7:0.693147 9:0.693147 20:2.197224 17:1.098612 21:1.098612 32:1.386294 31:1.386294 25:1.386294 36:3.218876 56:1.791759 +-1 4:0.000000 5:0.000000 2:0.000000 10:3.465735 7:1.386294 20:1.098612 32:2.772588 34:3.218876 37:1.609438 43:1.609438 57:3.583518 73:7.783640 75:7.783640 76:5.837730 82:1.945910 91:2.079442 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 8:4.852029 10:3.465735 7:3.465735 6:2.772588 12:1.386294 11:0.693147 9:0.693147 20:3.295836 19:2.197224 18:1.098612 13:1.098612 +-1 5:0.000000 4:0.000000 2:0.000000 6:3.465735 10:2.079441 11:0.693147 9:0.693147 20:3.295836 17:2.197224 21:1.098612 18:1.098612 19:1.098612 30:2.772588 31:1.386294 32:1.386294 23:1.386294 +-1 5:0.000000 4:0.000000 2:0.000000 10:2.079441 7:2.079441 6:2.079441 18:2.197224 19:1.098612 32:1.386294 25:1.386294 60:10.750554 76:7.783640 77:3.891820 80:1.945910 71:1.945910 123:2.197225 +-1 2:0.000000 5:0.000000 6:4.158882 9:0.693147 10:0.693147 11:0.693147 8:0.693147 20:2.197224 13:1.098612 21:1.098612 18:1.098612 25:4.158882 61:5.375277 60:5.375277 53:3.583518 57:1.791759 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:2.079441 7:2.079441 12:0.693147 11:0.693147 9:0.693147 21:2.197224 20:1.098612 16:1.098612 18:1.098612 19:1.098612 26:4.158882 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 7:2.079441 12:1.386294 8:1.386294 11:0.693147 6:0.693147 9:0.693147 21:2.197224 20:1.098612 17:1.098612 19:1.098612 28:4.158882 32:1.386294 +-1 2:0.000000 5:0.000000 10:1.386294 6:1.386294 7:1.386294 11:0.693147 21:1.098612 17:1.098612 20:1.098612 31:1.386294 26:1.386294 30:1.386294 42:1.609438 56:5.375277 60:3.583518 49:1.791759 +-1 2:0.000000 4:0.000000 7:3.465735 9:1.386294 12:0.693147 11:0.693147 18:8.788896 19:7.690284 16:4.394448 21:4.394448 14:1.098612 30:4.158882 24:2.772588 32:2.772588 25:1.386294 31:1.386294 +-1 2:0.000000 5:0.000000 6:1.386294 10:0.693147 8:0.693147 7:0.693147 18:1.098612 14:1.098612 32:2.772588 38:3.218876 41:1.609438 56:1.791759 125:2.197225 177:2.484907 172:2.484907 273:2.772589 +-1 3:0.000000 1:0.000000 2:0.000000 10:2.079441 6:1.386294 8:1.386294 7:0.693147 11:0.693147 18:10.986120 21:2.197224 26:8.317764 28:2.772588 25:1.386294 30:1.386294 22:1.386294 32:1.386294 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 8:0.693147 10:0.693147 11:0.693147 20:2.197224 15:1.098612 18:1.098612 32:4.158882 30:1.386294 43:1.609438 49:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:3.465735 12:0.693147 9:0.693147 18:4.394448 20:1.098612 32:5.545176 36:3.218876 38:1.609438 42:1.609438 39:1.609438 57:5.375277 +-1 4:0.000000 2:0.000000 6:2.079441 9:1.386294 8:0.693147 12:0.693147 17:4.394448 18:2.197224 21:1.098612 19:1.098612 20:1.098612 14:1.098612 25:8.317764 30:6.931470 27:4.158882 31:2.772588 +-1 3:0.000000 9:2.079441 6:1.386294 10:0.693147 17:2.197224 16:1.098612 25:2.772588 43:1.609438 42:1.609438 38:1.609438 53:3.583518 77:11.675460 82:1.945910 93:10.397210 94:4.158884 98:2.079442 +-1 2:0.000000 4:0.000000 5:0.000000 6:6.238323 10:2.772588 12:1.386294 9:0.693147 18:4.394448 21:3.295836 20:2.197224 17:1.098612 25:5.545176 32:2.772588 27:2.772588 30:1.386294 53:5.375277 +-1 5:0.000000 2:0.000000 4:0.000000 6:4.852029 9:0.693147 16:2.197224 20:2.197224 18:1.098612 30:5.545176 32:4.158882 24:1.386294 53:1.791759 83:1.945910 98:4.158884 93:2.079442 106:2.197225 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 6:2.079441 8:2.079441 10:1.386294 9:0.693147 18:2.197224 16:1.098612 21:1.098612 30:4.158882 32:4.158882 26:1.386294 28:1.386294 25:1.386294 +-1 3:0.000000 6:11.783499 7:1.386294 8:1.386294 11:0.693147 18:10.986120 16:4.394448 17:3.295836 26:6.931470 27:4.158882 32:2.772588 25:2.772588 24:1.386294 34:16.094380 38:9.656628 56:12.542313 +-1 2:0.000000 7:1.386294 6:1.386294 8:0.693147 9:0.693147 17:5.493060 16:3.295836 18:2.197224 19:1.098612 14:1.098612 27:2.772588 26:1.386294 28:1.386294 25:1.386294 32:1.386294 38:3.218876 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 2:0.000000 4:0.000000 5:0.000000 7:7.624617 12:0.693147 8:0.693147 9:0.693147 18:5.493060 20:2.197224 32:4.158882 26:4.158882 38:3.218876 35:1.609438 36:1.609438 39:1.609438 50:3.583518 +-1 2:0.000000 3:0.000000 1:0.000000 10:1.386294 9:0.693147 19:3.295836 16:1.098612 21:1.098612 18:1.098612 28:1.386294 32:1.386294 43:3.218876 42:1.609438 39:1.609438 60:10.750554 48:3.583518 +-1 4:0.000000 2:0.000000 1:0.000000 12:3.465735 8:1.386294 10:0.693147 6:0.693147 20:1.098612 18:1.098612 15:1.098612 16:1.098612 33:2.772588 31:1.386294 40:1.609438 37:1.609438 53:1.791759 +-1 2:0.000000 11:0.693147 8:0.693147 16:1.098612 28:1.386294 50:3.583518 58:1.791759 53:1.791759 79:7.783640 74:1.945910 72:1.945910 71:1.945910 94:2.079442 120:2.197225 121:2.197225 161:2.397895 +-1 16:1.098612 27:1.386294 30:1.386294 53:1.791759 50:1.791759 56:1.791759 51:1.791759 48:1.791759 69:15.567280 75:1.945910 72:1.945910 91:2.079442 108:2.197225 135:2.302585 161:4.795790 174:2.484907 +-1 2:0.000000 6:4.158882 7:4.158882 8:1.386294 17:5.493060 16:3.295836 26:6.931470 25:2.772588 24:1.386294 37:3.218876 49:53.752770 50:16.125831 52:5.375277 53:5.375277 57:1.791759 79:42.810020 +-1 2:0.000000 3:0.000000 5:0.000000 1:0.000000 10:0.693147 6:0.693147 18:3.295836 20:1.098612 30:1.386294 22:1.386294 33:1.386294 39:1.609438 35:1.609438 53:1.791759 52:1.791759 51:1.791759 +-1 18:1.098612 14:1.098612 33:1.386294 35:1.609438 132:2.302585 148:2.397895 274:2.772589 517:10.203591 1110:4.317488 +-1 5:0.000000 6:2.772588 8:2.772588 7:2.079441 9:0.693147 10:0.693147 18:2.197224 16:1.098612 21:1.098612 28:6.931470 26:2.772588 22:2.772588 33:2.772588 27:1.386294 42:11.266066 36:3.218876 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 6:4.852029 10:3.465735 8:2.079441 11:2.079441 7:0.693147 16:6.591672 20:1.098612 18:1.098612 14:1.098612 24:5.545176 28:4.158882 30:2.772588 +-1 1:0.000000 6:0.693147 8:0.693147 18:3.295836 33:5.545176 26:1.386294 28:1.386294 30:1.386294 22:1.386294 43:1.609438 39:1.609438 48:1.791759 59:1.791759 79:1.945910 82:1.945910 83:1.945910 +-1 4:0.000000 8:2.772588 6:2.079441 12:1.386294 9:0.693147 10:0.693147 16:4.394448 18:2.197224 21:1.098612 28:4.158882 25:2.772588 33:2.772588 29:1.386294 43:1.609438 38:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 8:1.386294 10:0.693147 12:0.693147 19:3.295836 18:3.295836 16:2.197224 41:1.609438 43:1.609438 42:1.609438 54:3.583518 55:3.583518 61:1.791759 83:1.945910 92:6.238326 +-1 1:0.000000 6:14.556087 8:3.465735 10:2.772588 7:2.772588 11:2.079441 12:1.386294 9:0.693147 18:8.788896 16:4.394448 17:2.197224 21:1.098612 22:13.862940 26:12.476646 33:11.090352 30:8.317764 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 12:1.386294 11:0.693147 19:1.098612 33:2.772588 31:1.386294 24:1.386294 55:5.375277 54:1.791759 59:1.791759 82:1.945910 99:2.079442 +-1 1:0.000000 14:1.098612 33:1.386294 35:1.609438 78:1.945910 160:2.397895 451:3.258097 872:4.007333 1875:5.010635 1858:5.010635 6086:18.790194 17563:6.957497 17564:6.957497 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:2.079441 7:0.693147 12:0.693147 10:0.693147 31:1.386294 22:1.386294 33:1.386294 24:1.386294 36:3.218876 54:12.542313 55:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.772588 11:2.079441 10:0.693147 9:0.693147 12:0.693147 16:2.197224 18:1.098612 19:1.098612 20:1.098612 14:1.098612 33:2.772588 +-1 3:0.000000 82:1.945910 +-1 3:0.000000 1:0.000000 2:0.000000 8:1.386294 10:1.386294 6:0.693147 9:0.693147 11:0.693147 16:2.197224 20:1.098612 19:1.098612 33:2.772588 22:1.386294 29:1.386294 28:1.386294 27:1.386294 +-1 1:0.000000 29:1.386294 33:1.386294 3081:5.568345 6096:6.263398 17622:6.957497 17623:6.957497 17624:6.957497 17625:6.957497 17626:6.957497 +-1 2:0.000000 4:0.000000 33:1.386294 38:1.609438 126:2.197225 145:2.397895 268:2.772589 354:2.995732 857:4.007333 1084:4.317488 3948:5.857933 17627:20.872491 17628:13.914994 17629:13.914994 17630:6.957497 17631:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 19:1.098612 31:1.386294 29:1.386294 33:1.386294 54:1.791759 55:1.791759 59:1.791759 81:1.945910 102:2.079442 124:2.197225 +-1 4:0.000000 5:0.000000 1:0.000000 2:0.000000 3:0.000000 12:0.693147 13:1.098612 14:1.098612 22:1.386294 33:1.386294 35:1.609438 55:3.583518 54:1.791759 46:1.791759 104:2.079442 149:4.795790 +-1 1:0.000000 3:0.000000 27:1.386294 29:1.386294 33:1.386294 45:1.791759 46:1.791759 103:2.079442 297:2.833213 358:3.044522 1289:4.465908 1519:4.753590 2419:10.694216 2504:5.347108 3593:5.857933 4722:18.790194 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 11:2.772588 12:0.693147 10:0.693147 8:0.693147 9:0.693147 16:2.197224 29:2.772588 33:2.772588 27:1.386294 28:1.386294 43:1.609438 +-1 1:0.000000 3:0.000000 4:0.000000 6:2.079441 10:0.693147 12:0.693147 18:2.197224 14:1.098612 33:2.772588 30:1.386294 34:3.218876 39:1.609438 40:1.609438 35:1.609438 60:3.583518 51:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 13:2.197224 33:1.386294 55:3.583518 46:3.583518 54:1.791759 83:1.945910 81:1.945910 78:1.945910 104:2.079442 85:2.079442 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 7:7.624617 10:2.772588 12:1.386294 11:1.386294 6:0.693147 9:0.693147 8:0.693147 18:10.986120 19:5.493060 16:4.394448 15:4.394448 +-1 1:0.000000 2:0.000000 9:0.693147 10:0.693147 11:0.693147 8:0.693147 18:2.197224 15:2.197224 19:1.098612 33:2.772588 31:2.772588 54:1.791759 55:1.791759 80:1.945910 104:2.079442 86:2.079442 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 6:0.693147 8:0.693147 12:0.693147 21:1.098612 15:1.098612 13:1.098612 33:2.772588 22:1.386294 34:1.609438 45:3.583518 54:1.791759 +-1 1:0.000000 3:0.000000 6:0.693147 12:0.693147 19:2.197224 13:1.098612 18:1.098612 31:2.772588 33:2.772588 29:1.386294 46:1.791759 51:1.791759 78:1.945910 69:1.945910 83:1.945910 85:2.079442 +-1 4:0.000000 2:0.000000 1:0.000000 3:0.000000 5:0.000000 9:4.852029 6:4.158882 12:1.386294 8:0.693147 17:5.493060 20:3.295836 14:1.098612 16:1.098612 21:1.098612 18:1.098612 30:4.158882 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 6:2.079441 11:0.693147 16:1.098612 24:4.158882 22:1.386294 29:1.386294 34:3.218876 61:1.791759 69:1.945910 99:2.079442 118:2.197225 126:2.197225 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 11:0.693147 7:0.693147 20:1.098612 33:2.772588 26:1.386294 25:1.386294 43:1.609438 41:1.609438 54:3.583518 55:3.583518 63:1.791759 52:1.791759 +-1 3:0.000000 40:3.218876 94:2.079442 85:2.079442 88:2.079442 120:2.197225 159:4.795790 149:2.397895 153:2.397895 220:5.278114 253:2.708050 380:3.091042 456:3.258097 475:3.332205 639:3.688879 675:3.737670 +-1 3:0.000000 5:0.000000 2:0.000000 1:0.000000 6:9.010911 10:4.158882 11:1.386294 12:1.386294 15:2.197224 13:1.098612 18:1.098612 16:1.098612 21:1.098612 14:1.098612 33:4.158882 24:1.386294 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 6:9.010911 9:4.158882 10:3.465735 11:1.386294 12:0.693147 8:0.693147 7:0.693147 18:5.493060 17:3.295836 19:2.197224 21:1.098612 27:2.772588 +-1 5:0.000000 1:0.000000 12:0.693147 13:2.197224 19:1.098612 21:1.098612 33:2.772588 22:1.386294 54:7.167036 55:3.583518 49:1.791759 46:1.791759 66:1.945910 104:2.079442 87:2.079442 105:2.197225 +-1 1:0.000000 3:0.000000 2:0.000000 10:2.079441 11:0.693147 17:1.098612 33:1.386294 26:1.386294 43:1.609438 54:1.791759 55:1.791759 104:2.079442 86:2.079442 131:2.302585 144:2.397895 192:2.484907 +-1 2:0.000000 3:0.000000 1:0.000000 5:0.000000 10:2.079441 7:1.386294 11:0.693147 6:0.693147 18:1.098612 14:1.098612 33:2.772588 26:1.386294 22:1.386294 41:1.609438 60:7.167036 54:5.375277 +-1 1:0.000000 3:0.000000 2:0.000000 12:0.693147 18:1.098612 14:1.098612 33:1.386294 31:1.386294 41:1.609438 54:3.583518 55:1.791759 104:2.079442 102:2.079442 192:2.484907 191:2.484907 187:2.484907 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 8:1.386294 12:0.693147 6:0.693147 10:0.693147 18:1.098612 21:1.098612 33:2.772588 36:1.609438 39:1.609438 55:5.375277 54:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 19:1.098612 13:1.098612 31:1.386294 29:1.386294 33:1.386294 55:3.583518 54:1.791759 45:1.791759 82:1.945910 104:2.079442 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 7:0.693147 10:0.693147 9:0.693147 20:4.394448 21:1.098612 13:1.098612 14:1.098612 33:1.386294 24:1.386294 22:1.386294 +-1 2:0.000000 5:0.000000 3:0.000000 1:0.000000 6:6.238323 10:4.158882 7:3.465735 11:0.693147 17:5.493060 18:2.197224 21:2.197224 19:2.197224 16:2.197224 13:1.098612 15:1.098612 14:1.098612 +-1 1:0.000000 2:0.000000 3:0.000000 8:0.693147 10:0.693147 18:1.098612 93:2.079442 171:2.484907 188:2.484907 342:2.995732 837:3.951244 908:4.060443 1517:9.507180 1889:10.021270 3086:5.568345 6129:6.263398 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:4.158882 9:0.693147 7:0.693147 11:0.693147 16:2.197224 14:1.098612 19:1.098612 15:1.098612 29:2.772588 33:2.772588 28:2.772588 24:1.386294 +-1 4:0.000000 1:0.000000 3:0.000000 8:1.386294 10:0.693147 17:2.197224 16:1.098612 21:1.098612 18:1.098612 24:4.158882 25:1.386294 22:1.386294 36:1.609438 49:28.668144 50:3.583518 83:3.891820 +-1 5:0.000000 3:0.000000 1:0.000000 2:0.000000 4:0.000000 6:6.238323 8:2.079441 10:1.386294 12:0.693147 11:0.693147 7:0.693147 18:4.394448 21:2.197224 16:1.098612 29:2.772588 33:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 4:0.000000 11:1.386294 10:1.386294 6:1.386294 9:0.693147 16:6.591672 17:2.197224 21:1.098612 19:1.098612 33:2.772588 27:2.772588 22:1.386294 31:1.386294 +-1 1:0.000000 3:0.000000 37:1.609438 99:4.158884 118:2.197225 112:2.197225 149:2.397895 191:2.484907 376:3.044522 401:3.135494 413:3.178054 535:3.433987 678:3.737670 1430:4.653960 2374:5.347108 5466:6.263398 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:1.386294 9:0.693147 11:0.693147 7:0.693147 8:0.693147 17:3.295836 21:1.098612 18:1.098612 19:1.098612 16:1.098612 29:4.158882 +-1 5:0.000000 1:0.000000 13:1.098612 33:1.386294 54:1.791759 55:1.791759 1941:5.010635 3744:5.857933 3879:5.857933 6147:18.790194 5671:6.263398 18220:6.957497 18221:6.957497 +-1 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 12:1.386294 10:0.693147 20:1.098612 13:1.098612 18:1.098612 22:1.386294 33:1.386294 55:8.958795 54:5.375277 62:1.791759 45:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 10:0.693147 9:0.693147 11:0.693147 6:0.693147 7:0.693147 20:1.098612 16:1.098612 14:1.098612 33:2.772588 29:1.386294 27:1.386294 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:1.386294 6:1.386294 12:0.693147 11:0.693147 10:0.693147 20:1.098612 16:1.098612 29:1.386294 33:1.386294 26:1.386294 25:1.386294 +-1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 4:0.000000 6:2.772588 9:1.386294 8:1.386294 12:0.693147 10:0.693147 7:0.693147 21:4.394448 19:1.098612 13:1.098612 18:1.098612 16:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:9.010911 6:2.079441 10:1.386294 11:1.386294 12:0.693147 17:3.295836 16:3.295836 15:2.197224 19:1.098612 26:6.931470 27:5.545176 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:8.317764 6:2.772588 10:2.079441 7:0.693147 18:6.591672 21:4.394448 16:2.197224 19:1.098612 28:6.931470 25:5.545176 24:2.772588 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 11:0.693147 21:1.098612 33:1.386294 55:3.583518 104:2.079442 85:2.079442 105:2.197225 132:2.302585 142:2.397895 192:2.484907 293:2.772589 +-1 7:0.693147 9:0.693147 17:4.394448 19:1.098612 18:1.098612 14:1.098612 28:1.386294 38:1.609438 37:1.609438 40:1.609438 47:3.583518 58:1.791759 62:1.791759 55:1.791759 48:1.791759 67:1.945910 +-1 2:0.000000 3:0.000000 1:0.000000 5:0.000000 6:3.465735 9:2.079441 10:1.386294 11:1.386294 12:0.693147 7:0.693147 16:5.493060 17:4.394448 14:1.098612 29:6.931470 28:5.545176 33:2.772588 +-1 2:0.000000 1:0.000000 3:0.000000 11:0.693147 21:1.098612 18:1.098612 14:1.098612 33:1.386294 41:1.609438 54:3.583518 55:1.791759 76:1.945910 104:2.079442 192:2.484907 171:2.484907 200:2.564949 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 8:4.158882 6:2.079441 7:0.693147 10:0.693147 12:0.693147 18:3.295836 15:3.295836 13:2.197224 20:2.197224 24:9.704058 30:5.545176 +-1 33:1.386294 54:1.791759 4807:6.263398 18572:6.957497 18573:6.957497 18574:6.957497 18575:6.957497 18576:6.957497 18577:6.957497 18578:6.957497 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 12:1.386294 11:1.386294 10:0.693147 6:0.693147 18:2.197224 19:1.098612 13:1.098612 29:2.772588 31:1.386294 24:1.386294 37:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 10:1.386294 16:1.098612 25:2.772588 33:1.386294 31:1.386294 24:1.386294 39:1.609438 43:1.609438 49:5.375277 52:3.583518 54:1.791759 55:1.791759 50:1.791759 +-1 3:0.000000 5:0.000000 2:0.000000 1:0.000000 4:0.000000 12:0.693147 6:0.693147 11:0.693147 19:2.197224 15:2.197224 17:2.197224 20:1.098612 24:2.772588 22:2.772588 31:1.386294 33:1.386294 +-1 4:0.000000 5:0.000000 1:0.000000 3:0.000000 2:0.000000 10:0.693147 11:0.693147 7:0.693147 19:1.098612 13:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 26:1.386294 40:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 8:2.079441 10:1.386294 7:1.386294 6:0.693147 11:0.693147 13:3.295836 15:1.098612 16:1.098612 14:1.098612 22:1.386294 33:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 11:2.772588 10:2.079441 6:1.386294 12:0.693147 9:0.693147 7:0.693147 21:1.098612 24:4.158882 33:1.386294 30:1.386294 25:1.386294 +-1 4:0.000000 3:0.000000 12:0.693147 13:3.295836 19:2.197224 17:1.098612 14:1.098612 22:4.158882 31:2.772588 33:1.386294 28:1.386294 36:1.609438 41:1.609438 55:7.167036 54:3.583518 46:3.583518 +-1 29:1.386294 33:1.386294 55:1.791759 54:1.791759 45:1.791759 431:3.218876 1374:4.553877 1612:9.507180 3631:5.857933 6072:18.790194 18613:6.957497 18614:6.957497 +-1 4:0.000000 5:0.000000 7:10.397205 9:3.465735 12:0.693147 10:0.693147 11:0.693147 20:2.197224 16:1.098612 26:6.931470 30:4.158882 22:1.386294 33:1.386294 23:1.386294 25:1.386294 28:1.386294 +-1 3:0.000000 13:1.098612 29:1.386294 33:1.386294 45:1.791759 78:1.945910 90:2.079442 122:2.197225 160:2.397895 217:2.564949 3416:5.857933 6165:6.263398 4587:6.263398 5764:6.263398 18630:6.957497 18631:6.957497 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 10:0.693147 11:0.693147 18:1.098612 13:1.098612 22:2.772588 33:2.772588 24:1.386294 54:3.583518 45:3.583518 82:3.891820 +-1 33:1.386294 18638:6.957497 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 12:0.693147 13:2.197224 14:1.098612 33:1.386294 29:1.386294 37:1.609438 35:1.609438 54:1.791759 46:1.791759 112:2.197225 113:2.197225 +-1 4:0.000000 2:0.000000 10:1.386294 11:0.693147 6:0.693147 9:0.693147 19:1.098612 21:1.098612 18:1.098612 14:1.098612 29:1.386294 33:1.386294 34:1.609438 35:1.609438 55:3.583518 54:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:2.079441 6:1.386294 10:0.693147 12:0.693147 19:1.098612 17:1.098612 28:2.772588 30:1.386294 33:1.386294 34:1.609438 42:1.609438 35:1.609438 +-1 1:0.000000 3:0.000000 82:1.945910 148:2.397895 4900:6.263398 18662:6.957497 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 9:0.693147 17:2.197224 20:2.197224 24:8.317764 22:1.386294 33:1.386294 30:1.386294 37:1.609438 84:5.837730 67:1.945910 81:1.945910 78:1.945910 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 10:2.079441 6:2.079441 8:1.386294 11:1.386294 18:2.197224 16:1.098612 20:1.098612 14:1.098612 27:2.772588 33:2.772588 43:4.828314 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 6:1.386294 12:0.693147 11:0.693147 9:0.693147 21:1.098612 18:1.098612 33:1.386294 43:3.218876 38:3.218876 55:3.583518 +-1 3:0.000000 78:1.945910 132:2.302585 395:3.135494 489:3.367296 994:4.174387 1068:4.317488 1191:4.382027 3442:5.857933 3717:5.857933 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 1:0.000000 6:2.079441 11:1.386294 8:1.386294 12:1.386294 9:0.693147 17:3.295836 13:1.098612 31:1.386294 29:1.386294 32:1.386294 43:3.218876 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 4:0.000000 12:0.693147 6:0.693147 19:2.197224 16:1.098612 17:1.098612 14:1.098612 31:1.386294 28:1.386294 33:1.386294 39:3.218876 35:1.609438 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:3.465735 6:0.693147 9:0.693147 8:0.693147 11:0.693147 19:2.197224 20:1.098612 15:1.098612 21:1.098612 13:1.098612 31:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 12:0.693147 13:2.197224 20:2.197224 19:1.098612 31:2.772588 24:1.386294 28:1.386294 22:1.386294 33:1.386294 40:1.609438 46:1.791759 77:1.945910 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 12:2.772588 10:1.386294 11:0.693147 9:0.693147 6:0.693147 20:2.197224 16:1.098612 18:1.098612 21:1.098612 33:2.772588 24:2.772588 29:1.386294 +-1 3:0.000000 19:1.098612 15:1.098612 29:1.386294 33:1.386294 82:1.945910 148:2.397895 142:2.397895 176:2.484907 697:3.761200 3059:5.568345 3423:5.857933 18794:6.957497 18795:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 7:0.693147 8:0.693147 18:1.098612 21:1.098612 19:1.098612 33:1.386294 43:1.609438 42:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 10:1.386294 11:1.386294 9:0.693147 6:0.693147 16:2.197224 25:2.772588 30:2.772588 33:1.386294 28:1.386294 43:1.609438 54:5.375277 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 8:3.465735 12:0.693147 10:0.693147 6:0.693147 9:0.693147 16:3.295836 19:2.197224 18:2.197224 31:2.772588 28:1.386294 22:1.386294 33:1.386294 +-1 3:0.000000 11:0.693147 14:1.098612 26:1.386294 35:1.609438 47:1.791759 49:1.791759 51:1.791759 59:1.791759 74:3.891820 99:2.079442 90:2.079442 155:2.397895 436:3.218876 769:3.850148 1084:4.317488 +-1 1:0.000000 4:0.000000 2:0.000000 5:0.000000 10:1.386294 8:1.386294 12:0.693147 18:1.098612 33:2.772588 35:1.609438 55:5.375277 54:1.791759 104:2.079442 113:2.197225 192:2.484907 204:2.564949 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 8:1.386294 9:1.386294 12:1.386294 10:0.693147 7:0.693147 6:0.693147 15:2.197224 18:1.098612 24:1.386294 33:1.386294 55:3.583518 54:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 7:2.079441 11:1.386294 10:1.386294 9:1.386294 15:1.098612 14:1.098612 33:2.772588 22:1.386294 26:1.386294 24:1.386294 36:3.218876 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 7:12.476646 10:6.238323 6:4.852029 11:2.079441 18:4.394448 15:2.197224 13:1.098612 16:1.098612 17:1.098612 21:1.098612 19:1.098612 14:1.098612 +-1 29:1.386294 33:1.386294 45:1.791759 46:1.791759 219:2.639057 531:3.433987 1180:4.382027 3438:5.857933 18931:6.957497 18932:6.957497 18933:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 10:2.079441 11:1.386294 8:1.386294 9:0.693147 18:1.098612 21:1.098612 13:1.098612 28:2.772588 33:2.772588 22:1.386294 42:1.609438 55:5.375277 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:1.386294 12:0.693147 11:0.693147 7:0.693147 9:0.693147 18:2.197224 17:2.197224 25:4.158882 29:1.386294 33:1.386294 27:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 9:0.693147 16:1.098612 13:1.098612 15:1.098612 20:1.098612 26:2.772588 33:1.386294 24:1.386294 40:1.609438 55:3.583518 +-1 3:0.000000 1:0.000000 5:0.000000 8:0.693147 7:0.693147 11:0.693147 13:1.098612 15:1.098612 21:1.098612 28:2.772588 22:2.772588 29:1.386294 33:1.386294 26:1.386294 40:3.218876 39:3.218876 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:1.386294 12:0.693147 11:0.693147 13:1.098612 21:1.098612 14:1.098612 22:2.772588 31:2.772588 33:1.386294 28:1.386294 36:1.609438 +-1 1:0.000000 8:0.693147 15:1.098612 29:1.386294 27:1.386294 33:1.386294 81:3.891820 115:4.394450 108:2.197225 135:2.302585 173:2.484907 198:7.694847 202:2.564949 220:2.639057 307:2.833213 355:2.995732 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 6:11.090352 10:6.931470 8:2.772588 7:2.772588 11:1.386294 9:1.386294 12:0.693147 18:5.493060 21:4.394448 20:4.394448 13:3.295836 +-1 3:0.000000 1:0.000000 2:0.000000 10:0.693147 33:2.772588 22:1.386294 54:1.791759 55:1.791759 67:1.945910 82:1.945910 104:2.079442 192:2.484907 474:3.295837 486:3.332205 1600:4.753590 1784:4.875197 +-1 1:0.000000 3:0.000000 7:0.693147 18:1.098612 26:1.386294 24:1.386294 33:1.386294 39:1.609438 149:4.795790 514:6.802394 2791:11.136690 3501:5.857933 19098:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 20:1.098612 33:1.386294 25:1.386294 43:1.609438 55:3.583518 54:1.791759 52:1.791759 56:1.791759 50:1.791759 +-1 3:0.000000 1:0.000000 7:1.386294 9:0.693147 21:1.098612 19:1.098612 14:1.098612 33:5.545176 26:4.158882 25:1.386294 27:1.386294 22:1.386294 38:1.609438 39:1.609438 35:1.609438 55:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 6:0.693147 13:3.295836 14:1.098612 33:2.772588 29:1.386294 35:1.609438 34:1.609438 45:1.791759 46:1.791759 87:2.079442 184:2.484907 257:2.708050 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:1.386294 12:0.693147 7:0.693147 11:0.693147 21:1.098612 14:1.098612 28:4.158882 26:1.386294 24:1.386294 33:1.386294 40:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:4.158882 7:2.079441 6:2.079441 10:0.693147 12:0.693147 8:0.693147 19:2.197224 13:2.197224 16:1.098612 20:1.098612 21:1.098612 33:4.158882 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 10:2.079441 11:1.386294 6:1.386294 12:0.693147 7:0.693147 20:1.098612 17:1.098612 14:1.098612 33:1.386294 43:3.218876 38:3.218876 +-1 2:0.000000 1:0.000000 4:0.000000 12:1.386294 10:0.693147 18:1.098612 33:1.386294 55:5.375277 45:3.583518 54:1.791759 104:2.079442 151:2.397895 192:2.484907 293:2.772589 486:3.332205 1614:4.753590 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 12:0.693147 6:0.693147 11:0.693147 22:1.386294 33:1.386294 24:1.386294 36:1.609438 54:3.583518 55:1.791759 61:1.791759 68:1.945910 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 9:2.079441 11:1.386294 12:0.693147 8:0.693147 21:1.098612 19:1.098612 27:1.386294 31:1.386294 30:1.386294 24:1.386294 40:1.609438 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 24:1.386294 33:1.386294 41:4.828314 39:1.609438 35:1.609438 59:3.583518 156:2.397895 163:4.969814 166:2.484907 180:2.484907 256:2.708050 297:2.833213 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 11:2.079441 7:0.693147 9:0.693147 12:0.693147 19:2.197224 13:1.098612 21:1.098612 25:4.158882 33:2.772588 31:1.386294 28:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 8:1.386294 11:0.693147 18:3.295836 14:1.098612 30:1.386294 33:1.386294 38:1.609438 36:1.609438 41:1.609438 54:5.375277 48:1.791759 77:3.891820 +-1 2:0.000000 4:0.000000 5:0.000000 12:2.772588 6:2.079441 10:0.693147 11:0.693147 7:0.693147 17:1.098612 19:1.098612 13:1.098612 22:4.158882 33:2.772588 31:1.386294 26:1.386294 28:1.386294 +-1 1:0.000000 5:0.000000 3:0.000000 54:1.791759 55:1.791759 149:2.397895 199:2.564949 288:5.545178 300:2.833213 426:3.218876 1014:4.248495 2140:5.164786 2218:5.164786 2954:5.568345 19264:6.957497 19265:6.957497 +-1 1:0.000000 3:0.000000 5:0.000000 9:1.386294 10:1.386294 7:0.693147 18:1.098612 17:1.098612 28:1.386294 33:1.386294 60:1.791759 47:1.791759 81:5.837730 80:1.945910 74:1.945910 84:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 19:1.098612 17:1.098612 31:1.386294 24:1.386294 22:1.386294 33:1.386294 55:3.583518 54:1.791759 45:1.791759 99:2.079442 +-1 2:0.000000 6:2.772588 10:0.693147 8:0.693147 21:2.197224 33:1.386294 42:3.218876 43:3.218876 60:5.375277 57:5.375277 49:3.583518 53:1.791759 50:1.791759 58:1.791759 79:7.783640 68:5.837730 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 19:1.098612 18:1.098612 33:1.386294 42:1.609438 43:1.609438 54:1.791759 62:1.791759 80:1.945910 86:2.079442 115:2.197225 +-1 10:1.386294 11:0.693147 6:0.693147 19:1.098612 14:1.098612 33:1.386294 31:1.386294 36:1.609438 55:1.791759 60:1.791759 51:1.791759 74:3.891820 104:2.079442 93:2.079442 98:2.079442 160:2.397895 +-1 4:0.000000 5:0.000000 12:0.693147 18:1.098612 55:3.583518 54:1.791759 79:1.945910 104:2.079442 192:2.484907 293:2.772589 705:3.761200 932:4.110874 1090:4.317488 1256:4.465908 1377:4.553877 19315:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 7:16.635528 10:2.079441 6:2.079441 8:2.079441 12:0.693147 11:0.693147 21:2.197224 19:1.098612 33:2.772588 27:2.772588 29:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 7:6.238323 10:5.545176 9:2.772588 12:2.079441 6:1.386294 8:0.693147 16:10.986120 17:7.690284 19:1.098612 21:1.098612 18:1.098612 24:4.158882 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:6.931470 9:0.693147 12:0.693147 10:0.693147 8:0.693147 16:4.394448 14:1.098612 28:5.545176 27:2.772588 30:1.386294 33:1.386294 34:1.609438 +-1 3:0.000000 2:0.000000 1:0.000000 7:1.386294 8:0.693147 18:7.690284 16:1.098612 14:1.098612 33:2.772588 27:1.386294 28:1.386294 29:1.386294 38:1.609438 42:1.609438 35:1.609438 60:7.167036 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 19:1.098612 13:1.098612 14:1.098612 31:1.386294 33:1.386294 24:1.386294 22:1.386294 35:1.609438 54:3.583518 55:3.583518 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 7:4.158882 12:2.079441 9:1.386294 8:1.386294 11:0.693147 6:0.693147 16:2.197224 19:1.098612 14:1.098612 33:2.772588 38:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:1.386294 6:0.693147 14:1.098612 18:1.098612 33:1.386294 28:1.386294 36:1.609438 43:1.609438 41:1.609438 53:10.750554 55:1.791759 47:1.791759 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 12:0.693147 7:0.693147 11:0.693147 20:1.098612 21:1.098612 19:1.098612 14:1.098612 31:4.158882 24:2.772588 33:2.772588 26:1.386294 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 11:1.386294 6:0.693147 10:0.693147 17:2.197224 19:1.098612 13:1.098612 21:1.098612 15:1.098612 18:1.098612 31:1.386294 24:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 12:0.693147 33:1.386294 55:3.583518 54:1.791759 68:1.945910 104:2.079442 90:2.079442 86:2.079442 113:2.197225 +-1 4:0.000000 1:0.000000 5:0.000000 6:6.931470 8:1.386294 9:1.386294 12:1.386294 10:0.693147 18:8.788896 16:7.690284 17:6.591672 14:2.197224 21:1.098612 20:1.098612 26:8.317764 33:5.545176 +-1 1:0.000000 3:0.000000 33:1.386294 37:3.218876 46:1.791759 55:1.791759 117:2.197225 188:4.969814 217:2.564949 639:3.688879 849:8.014666 853:4.007333 1452:9.307920 3039:11.136690 4786:12.526796 19521:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 6:4.158882 11:2.079441 9:2.079441 12:0.693147 10:0.693147 16:3.295836 21:1.098612 29:2.772588 33:2.772588 32:1.386294 28:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 10:3.465735 11:1.386294 12:0.693147 9:0.693147 17:2.197224 14:1.098612 18:1.098612 29:2.772588 33:2.772588 25:1.386294 37:6.437752 +-1 3:0.000000 4:0.000000 1:0.000000 2:0.000000 5:0.000000 11:1.386294 10:1.386294 6:1.386294 12:0.693147 8:0.693147 17:1.098612 21:1.098612 29:2.772588 33:2.772588 25:1.386294 27:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 9:1.386294 6:1.386294 12:0.693147 11:0.693147 17:1.098612 18:1.098612 29:2.772588 33:2.772588 28:1.386294 37:1.609438 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 6:4.158882 11:1.386294 10:1.386294 12:0.693147 20:1.098612 19:1.098612 17:1.098612 21:1.098612 18:1.098612 29:2.772588 33:2.772588 +-1 1:0.000000 3:0.000000 33:1.386294 35:1.609438 99:2.079442 790:7.824046 927:4.110874 19614:13.914994 19615:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:5.545176 6:4.158882 11:3.465735 9:3.465735 12:2.079441 7:2.079441 18:9.887508 16:4.394448 15:2.197224 17:1.098612 26:5.545176 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 11:1.386294 12:0.693147 9:0.693147 10:0.693147 19:1.098612 15:1.098612 17:1.098612 33:1.386294 40:4.828314 55:7.167036 59:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 12:1.386294 6:1.386294 8:0.693147 9:0.693147 21:3.295836 19:1.098612 15:1.098612 18:1.098612 14:1.098612 33:2.772588 29:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:0.693147 11:0.693147 6:0.693147 9:0.693147 19:1.098612 18:1.098612 22:1.386294 33:1.386294 34:1.609438 54:3.583518 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 13:1.098612 19:1.098612 29:1.386294 33:1.386294 45:1.791759 46:1.791759 82:1.945910 124:2.197225 288:2.772589 310:2.890372 927:4.110874 1215:4.465908 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 12:1.386294 11:0.693147 33:1.386294 43:1.609438 55:5.375277 54:3.583518 48:3.583518 50:1.791759 80:1.945910 78:1.945910 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:0.693147 6:0.693147 13:4.394448 21:1.098612 15:1.098612 20:1.098612 33:1.386294 34:1.609438 62:5.375277 55:3.583518 54:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 7:63.076377 12:11.783499 6:10.397205 10:4.158882 8:2.079441 11:1.386294 9:1.386294 20:18.676404 16:9.887508 18:4.394448 21:4.394448 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 12:2.079441 17:2.197224 20:1.098612 19:1.098612 13:1.098612 14:1.098612 31:1.386294 29:1.386294 33:1.386294 41:1.609438 55:7.167036 59:5.375277 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 11:0.693147 6:0.693147 7:0.693147 17:1.098612 15:1.098612 16:1.098612 14:1.098612 33:2.772588 22:1.386294 26:1.386294 34:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 11:2.079441 10:1.386294 8:1.386294 9:0.693147 7:0.693147 6:0.693147 13:1.098612 20:1.098612 29:1.386294 33:1.386294 22:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 19:1.098612 31:1.386294 29:1.386294 33:1.386294 54:1.791759 55:1.791759 59:1.791759 82:1.945910 81:1.945910 102:2.079442 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:1.386294 11:1.386294 8:0.693147 7:0.693147 17:3.295836 20:1.098612 19:1.098612 15:1.098612 16:1.098612 14:1.098612 28:6.931470 +-1 4:0.000000 3:0.000000 2:0.000000 1:0.000000 5:0.000000 8:4.158882 10:3.465735 12:1.386294 9:1.386294 11:0.693147 18:6.591672 17:3.295836 33:2.772588 29:2.772588 28:2.772588 31:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 12:0.693147 19:1.098612 20:1.098612 14:1.098612 17:1.098612 31:1.386294 33:1.386294 41:1.609438 55:3.583518 99:2.079442 101:2.079442 102:2.079442 +-1 1:0.000000 2:0.000000 10:0.693147 11:0.693147 33:2.772588 26:1.386294 24:1.386294 54:3.583518 55:1.791759 77:1.945910 104:2.079442 192:2.484907 271:2.772589 355:2.995732 474:3.295837 486:3.332205 +-1 3:0.000000 5:0.000000 1:0.000000 2:0.000000 10:1.386294 11:0.693147 6:0.693147 14:1.098612 33:1.386294 34:1.609438 41:1.609438 54:1.791759 55:1.791759 60:1.791759 51:1.791759 56:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 11:0.693147 21:1.098612 19:1.098612 42:1.609438 122:2.197225 259:2.708050 253:2.708050 320:2.890372 354:5.991464 359:3.044522 383:3.091042 550:6.931472 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:0.693147 13:1.098612 14:1.098612 33:2.772588 22:1.386294 24:1.386294 28:1.386294 39:1.609438 41:1.609438 55:5.375277 62:1.791759 +-1 10:0.693147 11:0.693147 18:1.098612 42:1.609438 56:1.791759 75:3.891820 69:3.891820 74:1.945910 79:1.945910 98:4.158884 101:2.079442 161:4.795790 147:4.795790 164:2.484907 170:2.484907 180:2.484907 +-1 2:0.000000 9:2.079441 16:2.197224 14:1.098612 58:1.791759 71:1.945910 72:1.945910 78:1.945910 94:2.079442 138:6.907755 137:2.302585 161:21.581055 158:2.397895 163:2.484907 214:2.564949 239:2.639057 +-1 2:0.000000 9:0.693147 11:0.693147 16:2.197224 18:1.098612 14:1.098612 27:1.386294 43:1.609438 38:1.609438 50:1.791759 78:5.837730 79:3.891820 68:1.945910 101:2.079442 121:2.197225 138:6.907755 +-1 8:2.772588 16:2.197224 27:1.386294 28:1.386294 38:3.218876 79:3.891820 78:1.945910 68:1.945910 67:1.945910 98:2.079442 94:2.079442 117:2.197225 120:2.197225 127:2.197225 161:21.581055 144:4.795790 +-1 2:0.000000 1:0.000000 3:0.000000 11:0.693147 10:0.693147 19:1.098612 18:1.098612 14:1.098612 33:1.386294 31:1.386294 24:1.386294 43:1.609438 36:1.609438 50:1.791759 58:1.791759 54:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 10:1.386294 8:0.693147 7:0.693147 15:2.197224 21:2.197224 16:1.098612 14:1.098612 31:6.931470 24:5.545176 28:4.158882 33:2.772588 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 10:2.079441 11:1.386294 18:1.098612 33:1.386294 31:1.386294 25:1.386294 30:1.386294 43:3.218876 54:8.958795 55:7.167036 61:1.791759 +-1 1:0.000000 3:0.000000 10:0.693147 11:0.693147 8:0.693147 17:1.098612 40:1.609438 131:2.302585 178:2.484907 172:2.484907 262:2.772589 347:2.995732 459:3.258097 470:3.295837 514:6.802394 662:3.737670 +-1 5:0.000000 1:0.000000 3:0.000000 4:0.000000 2:0.000000 12:0.693147 13:1.098612 14:1.098612 33:2.772588 24:1.386294 41:1.609438 54:5.375277 55:3.583518 68:1.945910 104:2.079442 328:2.944439 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 12:2.079441 11:1.386294 7:0.693147 18:2.197224 19:1.098612 15:1.098612 17:1.098612 24:4.158882 33:1.386294 31:1.386294 26:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:4.158882 7:4.158882 12:3.465735 6:2.772588 11:1.386294 21:6.591672 20:4.394448 19:2.197224 13:1.098612 14:1.098612 27:5.545176 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:2.079441 12:1.386294 11:1.386294 6:1.386294 10:0.693147 18:2.197224 16:1.098612 30:4.158882 26:2.772588 33:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 33:1.386294 99:2.079442 2670:16.705035 5666:6.263398 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 6:2.079441 11:0.693147 12:0.693147 13:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 24:1.386294 34:1.609438 43:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:4.852029 11:1.386294 10:1.386294 8:1.386294 9:0.693147 18:2.197224 16:1.098612 21:1.098612 19:1.098612 14:1.098612 33:5.545176 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:0.693147 8:0.693147 19:1.098612 16:1.098612 17:1.098612 24:2.772588 33:2.772588 31:1.386294 29:1.386294 35:1.609438 54:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:1.386294 19:1.098612 13:1.098612 31:1.386294 22:1.386294 33:1.386294 27:1.386294 24:1.386294 55:3.583518 54:1.791759 45:1.791759 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 12:1.386294 9:0.693147 11:0.693147 19:1.098612 18:1.098612 31:1.386294 29:1.386294 33:1.386294 40:1.609438 41:1.609438 54:3.583518 45:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 8:1.386294 10:0.693147 11:0.693147 12:0.693147 16:1.098612 29:1.386294 33:1.386294 40:1.609438 54:3.583518 55:1.791759 76:1.945910 75:1.945910 +-1 1:0.000000 3:0.000000 11:1.386294 33:1.386294 38:1.609438 189:2.484907 232:2.639057 266:2.772589 335:2.944439 404:3.135494 417:3.178054 432:3.218876 439:3.258097 660:3.688879 1008:4.174387 1015:4.248495 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 12:0.693147 6:0.693147 20:1.098612 14:1.098612 22:1.386294 33:1.386294 24:1.386294 40:1.609438 43:1.609438 41:1.609438 60:5.375277 54:1.791759 +-1 3:0.000000 4:0.000000 2:0.000000 5:0.000000 1:0.000000 11:2.079441 8:0.693147 10:0.693147 12:0.693147 21:3.295836 18:1.098612 24:13.862940 28:1.386294 22:1.386294 37:3.218876 42:1.609438 +-1 1:0.000000 13:2.197224 46:1.791759 314:2.890372 403:3.135494 466:3.295837 639:3.688879 3054:5.568345 3440:11.715866 +-1 2:0.000000 4:0.000000 6:2.772588 10:0.693147 12:0.693147 11:0.693147 13:1.098612 33:1.386294 27:1.386294 24:1.386294 34:4.828314 38:4.828314 40:3.218876 60:3.583518 51:3.583518 61:3.583518 +-1 1:0.000000 3:0.000000 9:1.386294 7:0.693147 14:1.098612 24:1.386294 33:1.386294 37:1.609438 81:3.891820 78:3.891820 99:2.079442 101:2.079442 115:6.591675 120:2.197225 121:2.197225 117:2.197225 +-1 3:0.000000 7:0.693147 8:0.693147 33:1.386294 40:1.609438 80:1.945910 112:2.197225 123:2.197225 149:2.397895 159:2.397895 174:2.484907 1051:4.248495 3353:5.857933 5167:6.263398 4809:6.263398 20276:13.914994 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 13:1.098612 35:1.609438 54:1.791759 104:2.079442 189:2.484907 375:3.044522 466:3.295837 474:3.295837 711:3.761200 879:4.060443 891:4.060443 1829:5.010635 +-1 3:0.000000 1:0.000000 47:1.791759 119:2.197225 122:2.197225 313:2.890372 395:6.270988 575:3.555348 644:3.688879 785:3.912023 3424:5.857933 3418:5.857933 6061:6.263398 20291:6.957497 20292:6.957497 +-1 4:0.000000 2:0.000000 5:0.000000 12:0.693147 9:0.693147 19:1.098612 17:1.098612 14:1.098612 31:1.386294 26:1.386294 33:1.386294 35:1.609438 55:7.167036 54:3.583518 84:1.945910 78:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:0.693147 12:0.693147 13:2.197224 19:1.098612 17:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 41:1.609438 55:3.583518 +-1 1:0.000000 33:1.386294 173:2.484907 229:5.278114 242:2.708050 468:3.295837 4025:11.715866 6186:12.526796 20301:6.957497 20302:6.957497 20303:6.957497 20304:6.957497 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 9:0.693147 19:2.197224 17:2.197224 18:1.098612 14:1.098612 31:2.772588 33:1.386294 40:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:0.693147 9:0.693147 18:2.197224 33:2.772588 29:1.386294 25:1.386294 55:5.375277 54:3.583518 59:1.791759 62:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:1.386294 17:1.098612 18:1.098612 24:4.158882 33:1.386294 27:1.386294 43:1.609438 54:1.791759 55:1.791759 57:1.791759 104:2.079442 97:2.079442 +-1 5:0.000000 1:0.000000 3:0.000000 12:0.693147 14:1.098612 29:1.386294 26:1.386294 30:1.386294 41:1.609438 55:1.791759 109:6.591675 126:4.394450 111:2.197225 132:2.302585 129:2.302585 153:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 6:1.386294 8:0.693147 10:0.693147 19:2.197224 16:2.197224 20:1.098612 30:2.772588 31:1.386294 33:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:1.386294 8:0.693147 10:0.693147 18:2.197224 29:1.386294 33:1.386294 30:1.386294 41:1.609438 55:5.375277 54:3.583518 49:3.583518 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 7:0.693147 9:0.693147 8:0.693147 18:5.493060 14:1.098612 30:2.772588 26:1.386294 33:1.386294 38:1.609438 36:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 7:0.693147 17:1.098612 50:1.791759 53:1.791759 71:5.837730 81:1.945910 120:4.394450 117:2.197225 160:2.397895 161:2.397895 189:2.484907 210:2.564949 277:2.772589 297:2.833213 +-1 2:0.000000 5:0.000000 6:9.010911 8:3.465735 10:1.386294 11:0.693147 9:0.693147 7:0.693147 21:2.197224 20:2.197224 17:1.098612 16:1.098612 28:2.772588 27:2.772588 33:1.386294 25:1.386294 +-1 1:0.000000 5:0.000000 3:0.000000 9:0.693147 15:2.197224 17:2.197224 14:2.197224 13:1.098612 35:1.609438 55:3.583518 62:1.791759 47:1.791759 83:1.945910 99:2.079442 103:2.079442 97:2.079442 +-1 148:2.397895 1482:4.753590 20496:6.957497 +-1 1:0.000000 8:0.693147 13:1.098612 33:2.772588 29:1.386294 41:1.609438 55:3.583518 54:1.791759 153:4.795790 301:2.833213 360:3.044522 359:3.044522 388:3.091042 415:3.178054 682:3.737670 806:3.912023 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 10:4.158882 11:1.386294 6:1.386294 8:0.693147 12:0.693147 16:2.197224 19:1.098612 17:1.098612 18:1.098612 14:1.098612 25:2.772588 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 10:2.079441 12:0.693147 11:0.693147 20:1.098612 33:1.386294 40:1.609438 36:1.609438 49:5.375277 55:3.583518 54:1.791759 60:1.791759 68:3.891820 +-1 1:0.000000 3:0.000000 8:0.693147 13:2.197224 19:1.098612 33:2.772588 29:1.386294 62:5.375277 55:3.583518 45:3.583518 82:1.945910 104:2.079442 148:2.397895 293:2.772589 362:3.044522 414:3.178054 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:2.079441 10:1.386294 11:1.386294 7:1.386294 12:0.693147 9:0.693147 13:1.098612 18:1.098612 26:5.545176 30:2.772588 24:1.386294 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 12:0.693147 54:3.583518 55:3.583518 104:2.079442 160:2.397895 192:2.484907 220:2.639057 293:2.772589 555:3.496508 791:3.912023 790:3.912023 +-1 8:1.386294 19:1.098612 97:2.079442 160:2.397895 371:3.044522 721:3.806662 942:4.110874 3665:5.857933 5914:6.263398 20550:6.957497 20551:6.957497 20552:6.957497 20553:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.079441 6:2.079441 11:0.693147 9:0.693147 7:0.693147 20:3.295836 16:1.098612 14:1.098612 25:8.317764 27:4.158882 22:2.772588 33:2.772588 +-1 2:0.000000 5:0.000000 3:0.000000 6:9.704058 10:2.772588 9:1.386294 20:4.394448 17:1.098612 15:1.098612 14:1.098612 22:2.772588 33:2.772588 34:1.609438 35:1.609438 54:8.958795 53:5.375277 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 6:2.772588 10:2.772588 11:0.693147 8:0.693147 20:10.986120 16:2.197224 21:2.197224 18:1.098612 14:1.098612 25:13.862940 33:4.158882 22:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 6:0.693147 8:0.693147 19:2.197224 21:1.098612 16:1.098612 28:1.386294 39:6.437752 36:1.609438 64:1.791759 48:1.791759 53:1.791759 59:1.791759 +-1 89:4.158884 382:3.091042 1372:4.553877 6246:6.263398 6247:6.263398 +-1 89:4.158884 382:3.091042 1372:4.553877 6246:6.263398 6247:6.263398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/15.txt b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/15.txt new file mode 100644 index 00000000..09a0ff04 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/tfidf/tfidf_raw/15.txt @@ -0,0 +1,1051 @@ ++1 1:0.000000 6:0.693147 7:0.693147 13:3.295836 14:1.098612 22:4.158882 23:4.158882 24:1.386294 34:3.218876 35:1.609438 45:5.375277 46:5.375277 65:5.837730 66:3.891820 67:1.945910 105:2.197225 ++1 1:0.000000 2:0.000000 3:0.000000 6:2.079441 7:0.693147 8:0.693147 14:2.197224 15:1.098612 16:1.098612 34:8.047190 36:1.609438 85:2.079442 106:2.197225 141:2.397895 163:4.969814 195:2.564949 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:5.545176 6:3.465735 9:1.386294 10:0.693147 15:8.788896 17:2.197224 13:2.197224 14:2.197224 23:6.931470 25:5.545176 22:2.772588 26:1.386294 ++1 3:0.000000 1:0.000000 8:2.079441 7:0.693147 13:2.197224 14:1.098612 27:1.386294 36:1.609438 46:3.583518 70:11.675460 73:1.945910 67:1.945910 66:1.945910 74:1.945910 71:1.945910 90:6.238326 ++1 3:0.000000 6:2.772588 9:2.079441 8:2.079441 11:0.693147 18:6.591672 15:4.394448 19:2.197224 16:1.098612 25:2.772588 28:1.386294 51:7.167036 52:1.791759 46:1.791759 53:1.791759 91:2.079442 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 8:0.693147 12:0.693147 15:5.493060 20:2.197224 18:2.197224 14:1.098612 24:2.772588 29:2.772588 25:1.386294 40:4.828314 36:1.609438 35:1.609438 ++1 3:0.000000 15:2.197224 13:1.098612 14:1.098612 27:1.386294 24:1.386294 29:1.386294 23:1.386294 40:1.609438 37:1.609438 41:1.609438 46:1.791759 78:1.945910 85:2.079442 114:2.197225 142:4.795790 ++1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 7:0.693147 12:0.693147 9:0.693147 15:3.295836 23:1.386294 24:1.386294 40:1.609438 42:1.609438 37:1.609438 67:1.945910 70:1.945910 105:2.197225 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:7.624617 9:3.465735 6:1.386294 12:0.693147 15:19.775016 17:6.591672 16:4.394448 19:3.295836 21:2.197224 13:1.098612 26:6.931470 27:5.545176 ++1 3:0.000000 1:0.000000 2:0.000000 8:2.772588 6:0.693147 15:9.887508 18:2.197224 16:1.098612 29:2.772588 26:1.386294 25:1.386294 28:1.386294 23:1.386294 42:3.218876 37:1.609438 49:1.791759 ++1 3:0.000000 1:0.000000 8:2.079441 7:0.693147 13:2.197224 14:1.098612 27:1.386294 36:1.609438 46:3.583518 70:11.675460 73:1.945910 67:1.945910 66:1.945910 74:1.945910 71:1.945910 90:6.238326 ++1 1:0.000000 6:0.693147 7:0.693147 13:3.295836 14:1.098612 22:4.158882 23:4.158882 24:1.386294 34:3.218876 35:1.609438 45:5.375277 46:5.375277 66:11.675460 65:5.837730 67:1.945910 105:2.197225 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:3.465735 12:1.386294 8:1.386294 6:0.693147 15:4.394448 16:2.197224 17:2.197224 23:1.386294 28:1.386294 37:3.218876 34:3.218876 ++1 1:0.000000 2:0.000000 8:0.693147 12:0.693147 15:3.295836 13:2.197224 40:3.218876 48:1.791759 46:1.791759 47:1.791759 67:5.837730 78:1.945910 73:1.945910 91:2.079442 114:2.197225 169:2.484907 ++1 3:0.000000 2:0.000000 1:0.000000 6:13.169793 9:4.158882 10:3.465735 7:2.772588 8:2.079441 18:28.563912 15:7.690284 16:6.591672 19:4.394448 13:4.394448 17:2.197224 20:2.197224 30:6.931470 ++1 2:0.000000 4:0.000000 7:7.624617 8:2.772588 9:1.386294 11:0.693147 10:0.693147 6:0.693147 15:6.591672 19:5.493060 13:2.197224 16:1.098612 18:1.098612 26:15.249234 23:4.158882 24:2.772588 ++1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:3.295836 19:1.098612 18:1.098612 16:1.098612 23:2.772588 24:1.386294 22:1.386294 40:1.609438 109:2.197225 136:2.302585 141:2.397895 ++1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:3.295836 18:2.197224 19:1.098612 16:1.098612 23:2.772588 24:1.386294 22:1.386294 40:1.609438 85:2.079442 108:2.197225 109:2.197225 ++1 14:9.887508 17:2.197224 15:1.098612 25:2.772588 35:14.484942 37:1.609438 38:1.609438 57:3.583518 67:3.891820 73:1.945910 98:4.158884 107:2.197225 120:2.197225 133:4.605170 148:2.397895 162:19.879256 ++1 1:0.000000 2:0.000000 6:6.238323 7:2.772588 15:6.591672 14:1.098612 16:1.098612 18:1.098612 26:4.158882 27:1.386294 25:1.386294 34:3.218876 60:16.125831 57:3.583518 46:1.791759 53:1.791759 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 7:1.386294 6:0.693147 8:0.693147 15:4.394448 16:4.394448 26:2.772588 28:2.772588 25:1.386294 29:1.386294 23:1.386294 24:1.386294 42:1.609438 ++1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 8:0.693147 15:3.295836 22:2.772588 23:1.386294 40:1.609438 47:1.791759 48:1.791759 99:2.079442 108:4.394450 116:2.197225 ++1 1:0.000000 4:0.000000 5:0.000000 12:0.693147 15:4.394448 13:4.394448 20:1.098612 23:6.931470 30:1.386294 40:1.609438 35:1.609438 46:7.167036 67:3.891820 76:1.945910 73:1.945910 66:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 6:0.693147 7:0.693147 18:3.295836 15:1.098612 25:1.386294 66:9.729550 73:1.945910 87:4.158884 90:2.079442 193:2.564949 230:2.639057 267:2.772589 ++1 1:0.000000 3:0.000000 7:5.545176 8:1.386294 15:3.295836 13:1.098612 14:1.098612 23:1.386294 41:1.609438 46:1.791759 73:1.945910 88:6.238326 99:2.079442 118:2.197225 107:2.197225 112:2.197225 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:9.010911 12:2.079441 6:1.386294 11:0.693147 13:7.690284 15:5.493060 14:2.197224 16:1.098612 17:1.098612 18:1.098612 24:12.476646 ++1 1:0.000000 2:0.000000 4:0.000000 15:3.295836 17:1.098612 14:1.098612 27:1.386294 35:1.609438 105:2.197225 124:2.197225 120:2.197225 149:2.397895 176:2.484907 163:2.484907 169:2.484907 221:2.639057 ++1 3:0.000000 7:1.386294 8:1.386294 10:0.693147 6:0.693147 13:4.394448 18:1.098612 26:8.317764 29:4.158882 24:2.772588 23:2.772588 37:3.218876 52:5.375277 45:3.583518 46:3.583518 48:3.583518 ++1 6:0.693147 19:1.098612 27:1.386294 37:1.609438 51:3.583518 47:1.791759 90:6.238326 108:2.197225 193:5.129898 246:2.708050 323:2.890372 424:6.437752 518:3.401197 775:7.824046 824:3.951244 1147:4.382027 ++1 3:0.000000 1:0.000000 2:0.000000 9:1.386294 6:0.693147 7:0.693147 8:0.693147 18:3.295836 15:1.098612 21:1.098612 14:1.098612 20:1.098612 16:1.098612 28:1.386294 36:1.609438 38:1.609438 ++1 2:0.000000 1:0.000000 3:0.000000 15:2.197224 17:1.098612 18:1.098612 37:1.609438 50:1.791759 73:5.837730 75:3.891820 67:3.891820 69:1.945910 79:1.945910 95:4.158884 87:2.079442 107:2.197225 ++1 1:0.000000 4:0.000000 5:0.000000 3:0.000000 8:2.079441 12:1.386294 6:0.693147 18:4.394448 15:3.295836 16:1.098612 23:2.772588 26:1.386294 39:1.609438 38:1.609438 47:1.791759 48:1.791759 ++1 1:0.000000 3:0.000000 2:0.000000 15:2.197224 24:1.386294 37:6.437752 41:1.609438 66:3.891820 71:3.891820 131:2.302585 135:2.302585 149:4.795790 143:2.397895 176:2.484907 169:2.484907 218:5.278114 ++1 15:1.098612 23:1.386294 40:3.218876 57:8.958795 52:1.791759 73:44.755930 76:1.945910 111:6.591675 162:2.484907 194:2.564949 324:2.890372 492:6.734592 576:3.555348 786:3.912023 933:4.110874 1643:4.875197 ++1 2:0.000000 3:0.000000 9:2.079441 6:2.079441 8:0.693147 7:0.693147 15:6.591672 13:4.394448 17:3.295836 19:2.197224 16:1.098612 23:2.772588 30:1.386294 25:1.386294 37:14.484942 42:3.218876 ++1 1:0.000000 3:0.000000 2:0.000000 6:7.624617 7:2.772588 8:0.693147 15:8.788896 13:2.197224 28:5.545176 23:2.772588 25:1.386294 37:16.094380 34:8.047190 36:3.218876 42:1.609438 46:3.583518 ++1 7:0.693147 13:2.197224 15:1.098612 23:2.772588 37:1.609438 46:3.583518 73:62.269120 67:42.810020 87:2.079442 107:2.197225 112:2.197225 113:2.197225 130:2.302585 156:45.560005 179:39.758512 162:22.364163 ++1 3:0.000000 7:0.693147 13:2.197224 26:1.386294 22:1.386294 23:1.386294 40:11.266066 46:3.583518 67:7.783640 66:1.945910 112:2.197225 113:2.197225 114:2.197225 130:2.302585 146:2.397895 141:2.397895 ++1 3:0.000000 2:0.000000 4:0.000000 7:1.386294 11:0.693147 12:0.693147 14:1.098612 22:1.386294 44:4.828314 40:1.609438 37:1.609438 41:1.609438 63:1.791759 66:1.945910 78:1.945910 98:4.158884 ++1 3:0.000000 2:0.000000 6:3.465735 7:1.386294 15:1.098612 42:1.609438 37:1.609438 50:1.791759 48:1.791759 52:1.791759 83:1.945910 71:1.945910 66:1.945910 72:1.945910 93:8.317768 97:4.158884 ++1 2:0.000000 3:0.000000 6:2.079441 10:1.386294 9:1.386294 18:8.788896 15:3.295836 21:1.098612 14:1.098612 25:4.158882 38:1.609438 37:1.609438 60:3.583518 61:3.583518 46:3.583518 47:1.791759 ++1 6:2.772588 11:0.693147 10:0.693147 8:0.693147 18:2.197224 16:1.098612 21:1.098612 27:2.772588 28:2.772588 25:1.386294 34:8.047190 37:8.047190 38:3.218876 39:3.218876 62:3.583518 58:1.791759 ++1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 7:24.953292 6:1.386294 9:0.693147 14:12.084732 13:8.788896 15:7.690284 17:3.295836 16:3.295836 19:1.098612 28:5.545176 29:2.772588 31:1.386294 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 7:4.158882 8:1.386294 11:0.693147 13:5.493060 16:3.295836 14:2.197224 17:2.197224 15:1.098612 24:8.317764 28:2.772588 29:1.386294 27:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 8:1.386294 15:4.394448 16:2.197224 19:2.197224 27:2.772588 41:4.828314 43:1.609438 66:13.621370 84:1.945910 81:1.945910 67:1.945910 70:1.945910 88:2.079442 ++1 3:0.000000 2:0.000000 1:0.000000 8:2.079441 7:0.693147 9:0.693147 15:4.394448 16:2.197224 27:2.772588 40:1.609438 43:1.609438 66:9.729550 84:1.945910 81:1.945910 70:1.945910 95:6.238326 ++1 3:0.000000 13:2.197224 14:2.197224 29:4.158882 44:4.828314 35:1.609438 46:3.583518 75:3.891820 76:1.945910 65:1.945910 81:1.945910 99:2.079442 123:2.197225 109:2.197225 146:2.397895 163:2.484907 ++1 2:0.000000 3:0.000000 1:0.000000 6:4.158882 7:1.386294 8:0.693147 11:0.693147 9:0.693147 13:3.295836 17:3.295836 16:3.295836 15:2.197224 19:2.197224 14:2.197224 24:4.158882 25:2.772588 ++1 3:0.000000 1:0.000000 7:0.693147 21:1.098612 15:1.098612 17:1.098612 13:1.098612 24:1.386294 37:8.047190 40:4.828314 48:3.583518 47:3.583518 63:1.791759 67:3.891820 99:2.079442 96:2.079442 ++1 1:0.000000 2:0.000000 3:0.000000 5:0.000000 7:4.852029 8:2.772588 11:0.693147 6:0.693147 15:14.281956 19:3.295836 16:2.197224 21:1.098612 25:6.931470 24:2.772588 26:1.386294 30:1.386294 ++1 2:0.000000 7:3.465735 8:0.693147 13:2.197224 17:1.098612 26:4.158882 29:1.386294 27:1.386294 44:3.218876 38:1.609438 35:1.609438 60:5.375277 45:1.791759 46:1.791759 66:5.837730 73:1.945910 ++1 1:0.000000 3:0.000000 8:1.386294 6:0.693147 18:1.098612 22:2.772588 34:1.609438 37:1.609438 36:1.609438 47:1.791759 66:1.945910 109:4.394450 263:2.772589 273:2.772589 783:3.912023 7094:6.957497 ++1 2:0.000000 6:0.693147 39:1.609438 40:1.609438 51:3.583518 47:1.791759 110:2.197225 7095:6.957497 7096:6.957497 7097:6.957497 7098:6.957497 7099:6.957497 7100:6.957497 7101:6.957497 7102:6.957497 7103:6.957497 ++1 18:1.098612 39:1.609438 86:2.079442 221:2.639057 411:3.178054 654:7.377758 941:8.221748 1233:4.465908 7104:6.957497 7105:6.957497 7106:6.957497 7107:6.957497 7108:6.957497 ++1 8:0.693147 12:0.693147 15:3.295836 14:1.098612 41:1.609438 65:1.945910 84:1.945910 70:1.945910 66:1.945910 87:2.079442 116:4.394450 107:2.197225 225:2.639057 247:2.708050 280:2.772589 287:2.772589 ++1 3:0.000000 7:1.386294 8:0.693147 12:0.693147 15:3.295836 70:1.945910 91:2.079442 116:4.394450 107:2.197225 113:2.197225 176:2.484907 193:10.259796 225:2.639057 252:8.124150 247:2.708050 280:2.772589 ++1 2:0.000000 8:0.693147 7:0.693147 12:0.693147 15:3.295836 18:2.197224 27:1.386294 38:1.609438 50:3.583518 61:1.791759 70:1.945910 95:4.158884 87:2.079442 88:2.079442 116:4.394450 107:2.197225 ++1 2:0.000000 4:0.000000 7:4.158882 9:0.693147 15:2.197224 26:5.545176 30:1.386294 39:1.609438 52:1.791759 49:1.791759 66:3.891820 84:1.945910 70:1.945910 91:4.158884 87:2.079442 101:2.079442 ++1 7:2.079441 15:3.295836 19:2.197224 25:1.386294 27:1.386294 70:3.891820 82:1.945910 96:4.158884 91:4.158884 134:2.302585 169:2.484907 204:2.564949 222:2.639057 272:2.772589 271:2.772589 311:5.780744 ++1 2:0.000000 16:1.098612 15:1.098612 26:1.386294 42:1.609438 72:1.945910 70:1.945910 91:6.238326 95:2.079442 110:2.197225 155:2.397895 154:2.397895 167:2.484907 177:2.484907 169:2.484907 225:7.917171 ++1 7:3.465735 6:0.693147 10:0.693147 15:1.098612 19:1.098612 27:1.386294 26:1.386294 38:1.609438 37:1.609438 47:1.791759 73:3.891820 71:1.945910 75:1.945910 66:1.945910 96:2.079442 106:2.197225 ++1 2:0.000000 4:0.000000 6:9.010911 7:9.010911 8:2.079441 11:1.386294 10:0.693147 9:0.693147 16:7.690284 15:5.493060 21:3.295836 20:3.295836 13:2.197224 17:2.197224 19:2.197224 14:1.098612 ++1 7:2.772588 8:0.693147 13:4.394448 17:1.098612 14:1.098612 29:2.772588 27:1.386294 44:4.828314 34:3.218876 35:1.609438 45:3.583518 46:3.583518 52:1.791759 75:23.350920 66:7.783640 73:3.891820 ++1 13:2.197224 16:1.098612 44:4.828314 37:3.218876 40:1.609438 38:1.609438 61:5.375277 57:1.791759 56:1.791759 77:1.945910 67:1.945910 73:1.945910 97:2.079442 90:2.079442 107:2.197225 108:2.197225 ++1 4:0.000000 3:0.000000 2:0.000000 10:0.693147 13:2.197224 19:2.197224 15:1.098612 28:1.386294 24:1.386294 27:1.386294 44:4.828314 40:1.609438 38:1.609438 37:1.609438 39:1.609438 47:5.375277 ++1 3:0.000000 2:0.000000 7:4.158882 9:1.386294 11:0.693147 15:4.394448 19:4.394448 17:1.098612 25:1.386294 24:1.386294 28:1.386294 27:1.386294 37:4.828314 44:3.218876 41:3.218876 36:3.218876 ++1 6:1.386294 7:1.386294 18:4.394448 19:2.197224 17:2.197224 16:1.098612 29:1.386294 40:1.609438 49:14.334072 52:3.583518 47:1.791759 70:7.783640 75:5.837730 76:1.945910 71:1.945910 86:4.158884 ++1 3:0.000000 8:1.386294 15:2.197224 40:1.609438 37:1.609438 44:1.609438 88:2.079442 165:2.484907 228:5.278114 342:2.995732 687:3.761200 2692:5.568345 7418:6.957497 7419:6.957497 ++1 3:0.000000 2:0.000000 4:0.000000 6:29.112174 8:1.386294 10:1.386294 17:9.887508 15:7.690284 18:2.197224 14:1.098612 19:1.098612 21:1.098612 20:1.098612 16:1.098612 25:6.931470 27:2.772588 ++1 3:0.000000 2:0.000000 6:13.169793 10:2.772588 8:0.693147 7:0.693147 15:8.788896 17:5.493060 13:3.295836 18:3.295836 19:2.197224 21:1.098612 25:15.249234 22:2.772588 32:2.772588 27:1.386294 ++1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 6:5.545176 8:2.079441 10:0.693147 15:8.788896 17:3.295836 13:2.197224 18:2.197224 21:1.098612 20:1.098612 25:6.931470 27:2.772588 22:1.386294 ++1 1:0.000000 9:4.158882 17:6.591672 15:3.295836 14:2.197224 16:1.098612 41:4.828314 40:3.218876 57:3.583518 49:1.791759 62:1.791759 75:7.783640 71:1.945910 67:1.945910 97:2.079442 108:8.788900 ++1 3:0.000000 1:0.000000 5:0.000000 6:6.238323 7:4.158882 9:0.693147 16:7.690284 15:6.591672 26:5.545176 28:4.158882 27:1.386294 37:12.875504 42:3.218876 43:3.218876 39:1.609438 52:14.334072 ++1 2:0.000000 7:0.693147 8:0.693147 15:3.295836 13:1.098612 17:1.098612 28:1.386294 44:8.047190 37:6.437752 40:3.218876 36:1.609438 63:3.583518 59:3.583518 46:1.791759 64:1.791759 70:17.513190 ++1 2:0.000000 7:2.079441 6:0.693147 13:3.295836 29:2.772588 30:1.386294 44:4.828314 40:1.609438 37:1.609438 36:1.609438 46:5.375277 59:3.583518 66:5.837730 70:3.891820 77:1.945910 99:2.079442 ++1 8:0.693147 18:4.394448 13:2.197224 14:1.098612 27:1.386294 37:4.828314 36:4.828314 44:3.218876 38:1.609438 43:1.609438 35:1.609438 46:3.583518 47:3.583518 61:1.791759 52:1.791759 64:1.791759 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 8:3.465735 7:1.386294 12:0.693147 20:4.394448 15:1.098612 14:1.098612 16:1.098612 22:1.386294 24:1.386294 26:1.386294 32:1.386294 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 7:2.079441 8:2.079441 9:0.693147 12:0.693147 20:4.394448 15:3.295836 14:2.197224 13:1.098612 17:1.098612 16:1.098612 32:5.545176 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 7:2.079441 9:0.693147 8:0.693147 12:0.693147 15:3.295836 20:3.295836 13:1.098612 17:1.098612 16:1.098612 14:1.098612 32:2.772588 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 13:3.295836 15:1.098612 20:1.098612 32:5.545176 37:1.609438 40:1.609438 35:1.609438 46:3.583518 45:1.791759 66:15.567280 88:4.158884 90:2.079442 ++1 1:0.000000 3:0.000000 14:2.197224 15:2.197224 21:1.098612 16:1.098612 32:2.772588 29:1.386294 26:1.386294 27:1.386294 41:3.218876 42:1.609438 47:1.791759 69:1.945910 82:1.945910 75:1.945910 ++1 3:0.000000 1:0.000000 14:2.197224 22:4.158882 32:2.772588 29:1.386294 39:4.828314 40:3.218876 37:3.218876 41:3.218876 42:1.609438 73:5.837730 69:3.891820 87:2.079442 99:2.079442 118:4.394450 ++1 14:2.197224 15:1.098612 22:1.386294 32:1.386294 41:3.218876 49:1.791759 57:1.791759 88:4.158884 105:2.197225 118:2.197225 212:2.564949 290:2.772589 308:2.890372 367:3.044522 389:3.091042 500:6.734592 ++1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 8:0.693147 13:1.098612 18:1.098612 20:1.098612 14:1.098612 32:2.772588 37:3.218876 41:1.609438 49:1.791759 46:1.791759 47:1.791759 78:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:2.079441 12:1.386294 10:0.693147 21:1.098612 20:1.098612 26:4.158882 32:2.772588 88:4.158884 94:2.079442 201:2.564949 389:3.091042 387:3.091042 ++1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:2.079441 12:1.386294 10:0.693147 14:17.577792 16:5.493060 13:2.197224 15:2.197224 20:1.098612 32:5.545176 26:4.158882 22:2.772588 41:25.751008 ++1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 7:1.386294 9:0.693147 8:0.693147 20:4.394448 21:1.098612 14:1.098612 32:5.545176 24:1.386294 22:1.386294 41:1.609438 48:1.791759 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 6:0.693147 11:0.693147 12:0.693147 14:3.295836 16:2.197224 13:2.197224 15:1.098612 17:1.098612 20:1.098612 25:5.545176 ++1 2:0.000000 8:2.079441 9:0.693147 13:2.197224 18:2.197224 15:1.098612 17:1.098612 19:1.098612 16:1.098612 32:1.386294 26:1.386294 41:1.609438 49:3.583518 46:3.583518 57:1.791759 66:7.783640 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:1.386294 8:0.693147 12:0.693147 11:0.693147 20:3.295836 15:2.197224 14:1.098612 32:4.158882 37:1.609438 41:1.609438 49:1.791759 68:1.945910 ++1 7:2.079441 15:1.098612 17:1.098612 29:2.772588 32:2.772588 28:1.386294 49:3.583518 46:3.583518 70:13.621370 66:7.783640 67:1.945910 79:1.945910 69:1.945910 91:2.079442 105:2.197225 124:2.197225 ++1 8:1.386294 14:1.098612 32:1.386294 37:3.218876 35:1.609438 66:1.945910 96:2.079442 89:2.079442 126:2.197225 185:2.484907 174:2.484907 223:2.639057 229:2.639057 367:3.044522 409:3.135494 441:3.258097 ++1 1:0.000000 6:4.852029 9:4.158882 7:2.772588 18:19.775016 20:9.887508 19:5.493060 15:2.197224 13:1.098612 14:1.098612 17:1.098612 21:1.098612 16:1.098612 30:23.566998 25:12.476646 28:2.772588 ++1 1:0.000000 2:0.000000 4:0.000000 6:2.079441 11:0.693147 8:0.693147 15:3.295836 17:2.197224 13:2.197224 20:2.197224 18:1.098612 30:6.931470 22:4.158882 32:4.158882 25:1.386294 28:1.386294 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:8.317764 6:4.158882 9:2.079441 12:1.386294 11:0.693147 15:8.788896 18:7.690284 19:6.591672 13:3.295836 16:3.295836 20:1.098612 ++1 2:0.000000 7:4.852029 8:2.772588 13:2.197224 18:2.197224 15:1.098612 16:1.098612 27:1.386294 32:1.386294 46:5.375277 66:15.567280 71:3.891820 76:1.945910 67:1.945910 87:8.317768 90:6.238326 ++1 3:0.000000 1:0.000000 6:0.693147 14:2.197224 17:1.098612 29:1.386294 27:1.386294 32:1.386294 41:3.218876 39:1.609438 42:1.609438 57:1.791759 47:1.791759 70:3.891820 66:1.945910 83:1.945910 ++1 3:0.000000 2:0.000000 1:0.000000 8:0.693147 6:0.693147 17:2.197224 13:2.197224 14:1.098612 15:1.098612 32:4.158882 27:2.772588 22:2.772588 37:3.218876 41:1.609438 39:1.609438 42:1.609438 ++1 3:0.000000 1:0.000000 6:0.693147 11:0.693147 13:3.295836 16:1.098612 15:1.098612 24:4.158882 32:1.386294 46:3.583518 66:1.945910 78:1.945910 86:6.238326 87:4.158884 107:2.197225 125:2.197225 ++1 3:0.000000 1:0.000000 6:0.693147 8:0.693147 18:6.591672 15:3.295836 13:2.197224 32:4.158882 22:1.386294 34:1.609438 37:1.609438 41:1.609438 46:3.583518 48:3.583518 73:7.783640 71:1.945910 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:0.693147 7:0.693147 15:2.197224 20:2.197224 19:1.098612 13:1.098612 16:1.098612 22:1.386294 32:1.386294 37:1.609438 75:1.945910 78:1.945910 ++1 2:0.000000 1:0.000000 3:0.000000 13:5.493060 19:2.197224 15:1.098612 21:1.098612 16:1.098612 29:1.386294 26:1.386294 27:1.386294 32:1.386294 37:3.218876 42:1.609438 46:7.167036 47:1.791759 ++1 2:0.000000 1:0.000000 3:0.000000 13:2.197224 15:1.098612 21:1.098612 16:1.098612 32:2.772588 29:1.386294 26:1.386294 27:1.386294 37:1.609438 42:1.609438 46:3.583518 45:1.791759 47:1.791759 ++1 1:0.000000 3:0.000000 32:1.386294 40:3.218876 130:2.302585 337:2.944439 348:2.995732 658:3.688879 4436:6.263398 4437:6.263398 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 9:0.693147 13:2.197224 20:2.197224 15:1.098612 17:1.098612 14:1.098612 19:1.098612 18:1.098612 32:2.772588 ++1 2:0.000000 3:0.000000 7:2.772588 8:0.693147 6:0.693147 13:2.197224 25:9.704058 22:2.772588 27:1.386294 26:1.386294 32:1.386294 40:3.218876 37:1.609438 47:17.917590 62:5.375277 46:3.583518 ++1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 7:2.772588 6:2.079441 12:0.693147 9:0.693147 11:0.693147 15:8.788896 16:5.493060 18:5.493060 13:2.197224 20:1.098612 21:1.098612 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 6:0.693147 20:2.197224 15:1.098612 17:1.098612 14:1.098612 13:1.098612 32:2.772588 41:1.609438 39:1.609438 ++1 1:0.000000 8:1.386294 15:2.197224 18:2.197224 22:1.386294 32:1.386294 75:1.945910 73:1.945910 89:2.079442 85:2.079442 109:2.197225 108:2.197225 174:2.484907 175:2.484907 263:2.772589 295:2.833213 ++1 3:0.000000 1:0.000000 7:1.386294 10:1.386294 8:0.693147 13:2.197224 14:2.197224 15:1.098612 17:1.098612 26:5.545176 22:1.386294 32:1.386294 39:4.828314 37:1.609438 42:1.609438 48:19.709349 ++1 1:0.000000 3:0.000000 7:2.079441 8:1.386294 10:1.386294 15:2.197224 13:2.197224 17:1.098612 18:1.098612 26:5.545176 22:1.386294 32:1.386294 39:3.218876 37:1.609438 48:8.958795 52:3.583518 ++1 1:0.000000 30:1.386294 32:1.386294 66:9.729550 88:2.079442 263:2.772589 339:2.944439 658:3.688879 1479:4.753590 3345:5.857933 8036:6.957497 8037:6.957497 8038:6.957497 ++1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 7:2.772588 10:1.386294 13:2.197224 16:1.098612 18:1.098612 26:8.317764 22:5.545176 32:4.158882 39:8.047190 46:3.583518 66:1.945910 79:1.945910 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 8:0.693147 11:0.693147 9:0.693147 18:3.295836 13:2.197224 17:2.197224 15:2.197224 16:1.098612 32:5.545176 25:2.772588 27:1.386294 37:6.437752 39:1.609438 46:7.167036 57:3.583518 50:1.791759 ++1 2:0.000000 8:0.693147 7:0.693147 9:0.693147 10:0.693147 11:0.693147 15:13.183344 13:2.197224 16:1.098612 17:1.098612 18:1.098612 19:1.098612 27:2.772588 22:1.386294 25:1.386294 28:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 21:1.098612 20:1.098612 14:1.098612 32:1.386294 41:1.609438 94:2.079442 146:2.397895 201:2.564949 269:2.772589 382:3.091042 ++1 1:0.000000 3:0.000000 32:1.386294 40:1.609438 90:6.238326 99:2.079442 107:2.197225 126:2.197225 108:2.197225 185:2.484907 169:2.484907 471:3.295837 499:3.367296 990:8.348774 1064:4.317488 1247:4.465908 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 12:1.386294 6:1.386294 15:4.394448 14:2.197224 17:1.098612 20:1.098612 22:2.772588 32:2.772588 27:1.386294 40:3.218876 37:3.218876 ++1 2:0.000000 6:2.079441 8:0.693147 9:0.693147 15:2.197224 17:1.098612 20:1.098612 16:1.098612 27:1.386294 28:1.386294 30:1.386294 38:3.218876 57:3.583518 52:1.791759 71:3.891820 72:1.945910 ++1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 6:0.693147 8:0.693147 12:0.693147 13:1.098612 32:1.386294 61:1.791759 48:1.791759 65:1.945910 69:1.945910 100:2.079442 110:2.197225 162:2.484907 ++1 3:0.000000 1:0.000000 2:0.000000 6:1.386294 8:1.386294 13:2.197224 15:2.197224 21:1.098612 32:2.772588 41:1.609438 46:3.583518 56:1.791759 77:3.891820 73:1.945910 67:1.945910 75:1.945910 ++1 1:0.000000 3:0.000000 8:2.079441 6:0.693147 13:2.197224 15:2.197224 17:1.098612 22:2.772588 32:1.386294 34:1.609438 37:1.609438 46:3.583518 66:3.891820 75:1.945910 71:1.945910 88:2.079442 ++1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 8:4.158882 7:1.386294 12:0.693147 20:3.295836 14:2.197224 18:2.197224 15:1.098612 16:1.098612 22:1.386294 26:1.386294 37:3.218876 48:3.583518 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 12:1.386294 6:0.693147 10:0.693147 9:0.693147 13:3.295836 20:2.197224 15:1.098612 17:1.098612 19:1.098612 25:2.772588 ++1 2:0.000000 4:0.000000 15:1.098612 28:1.386294 42:1.609438 37:1.609438 46:5.375277 62:1.791759 94:2.079442 87:2.079442 110:2.197225 141:4.795790 154:2.397895 155:2.397895 184:2.484907 212:2.564949 ++1 4:0.000000 11:0.693147 6:0.693147 8:0.693147 10:0.693147 13:2.197224 15:1.098612 14:1.098612 32:4.158882 22:2.772588 29:1.386294 37:3.218876 39:3.218876 38:3.218876 41:1.609438 47:8.958795 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:4.158882 11:0.693147 15:9.887508 20:3.295836 19:2.197224 17:1.098612 18:1.098612 21:1.098612 13:1.098612 14:1.098612 32:2.772588 ++1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 8:1.386294 7:0.693147 6:0.693147 18:4.394448 15:3.295836 17:2.197224 16:1.098612 19:1.098612 14:1.098612 26:2.772588 30:1.386294 28:1.386294 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 3:0.000000 1:0.000000 6:0.693147 14:2.197224 15:1.098612 17:1.098612 27:1.386294 32:1.386294 41:3.218876 57:1.791759 70:3.891820 73:1.945910 67:1.945910 83:1.945910 85:6.238326 90:4.158884 ++1 2:0.000000 7:2.079441 6:0.693147 14:1.098612 38:3.218876 35:1.609438 49:1.791759 111:2.197225 147:4.795790 213:10.259796 196:10.259796 197:7.694847 195:5.129898 234:2.639057 253:2.708050 369:3.044522 ++1 5:0.000000 2:0.000000 15:1.098612 14:1.098612 32:2.772588 31:1.386294 43:1.609438 41:1.609438 50:1.791759 72:3.891820 68:1.945910 88:2.079442 102:2.079442 123:4.394450 110:4.394450 119:2.197225 ++1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 8:1.386294 17:2.197224 19:2.197224 15:1.098612 16:1.098612 14:1.098612 32:1.386294 38:16.094380 43:1.609438 36:1.609438 41:1.609438 47:14.334072 ++1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 11:0.693147 21:2.197224 15:1.098612 19:1.098612 17:1.098612 14:1.098612 32:2.772588 27:1.386294 28:1.386294 38:4.828314 43:1.609438 36:1.609438 ++1 2:0.000000 5:0.000000 4:0.000000 9:0.693147 10:0.693147 12:0.693147 17:1.098612 15:1.098612 22:4.158882 24:1.386294 29:1.386294 32:1.386294 38:17.703818 39:3.218876 47:10.750554 48:3.583518 ++1 1:0.000000 3:0.000000 2:0.000000 15:1.098612 31:1.386294 32:1.386294 88:2.079442 270:2.772589 414:3.178054 546:3.465736 658:3.688879 645:3.688879 798:3.912023 ++1 6:0.693147 7:0.693147 17:2.197224 27:2.772588 22:2.772588 29:1.386294 32:1.386294 39:1.609438 42:1.609438 49:3.583518 56:1.791759 51:1.791759 96:8.317768 98:2.079442 95:2.079442 99:2.079442 ++1 3:0.000000 2:0.000000 6:9.704058 9:0.693147 7:0.693147 16:2.197224 17:1.098612 28:1.386294 34:3.218876 38:1.609438 60:21.501108 51:10.750554 50:7.167036 61:1.791759 56:1.791759 49:1.791759 ++1 2:0.000000 1:0.000000 3:0.000000 7:7.624617 6:0.693147 8:0.693147 16:1.098612 14:1.098612 22:2.772588 27:2.772588 38:1.609438 37:1.609438 39:1.609438 35:1.609438 60:25.084626 49:19.709349 ++1 6:2.079441 22:2.772588 39:3.218876 88:2.079442 109:2.197225 135:2.302585 212:2.564949 231:2.639057 317:2.890372 311:2.890372 343:2.995732 470:6.591674 500:3.367296 558:6.993016 635:3.688879 963:4.174387 ++1 3:0.000000 6:2.772588 38:3.218876 34:3.218876 47:3.583518 52:1.791759 61:1.791759 50:1.791759 70:5.837730 74:3.891820 77:1.945910 108:2.197225 114:2.197225 125:2.197225 139:4.605170 136:2.302585 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:5.545176 11:0.693147 19:3.295836 16:3.295836 20:1.098612 14:1.098612 22:6.931470 26:4.158882 28:1.386294 25:1.386294 24:1.386294 37:8.047190 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 6:0.693147 11:0.693147 12:0.693147 14:4.394448 16:2.197224 15:2.197224 13:2.197224 17:1.098612 20:1.098612 25:5.545176 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 1:0.000000 3:0.000000 8:0.693147 15:2.197224 17:1.098612 37:1.609438 48:1.791759 107:2.197225 145:2.397895 186:2.484907 183:2.484907 240:2.639057 260:2.708050 427:3.218876 456:6.516194 ++1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 ++1 2:0.000000 4:0.000000 15:1.098612 28:1.386294 42:1.609438 37:1.609438 46:5.375277 62:1.791759 94:2.079442 87:2.079442 110:2.197225 141:4.795790 154:2.397895 155:2.397895 184:2.484907 212:2.564949 ++1 3:0.000000 1:0.000000 8:3.465735 7:1.386294 11:0.693147 9:0.693147 16:3.295836 21:1.098612 22:5.545176 24:2.772588 27:1.386294 25:1.386294 32:1.386294 39:3.218876 37:3.218876 41:1.609438 ++1 1:0.000000 3:0.000000 6:1.386294 7:0.693147 8:0.693147 21:1.098612 18:1.098612 19:1.098612 15:1.098612 30:1.386294 40:1.609438 95:2.079442 87:2.079442 91:2.079442 105:2.197225 106:2.197225 ++1 2:0.000000 3:0.000000 6:0.693147 8:0.693147 15:1.098612 25:4.158882 40:1.609438 62:1.791759 77:1.945910 87:2.079442 105:4.394450 125:2.197225 139:4.605170 164:2.484907 176:2.484907 335:2.944439 ++1 2:0.000000 1:0.000000 3:0.000000 8:0.693147 15:1.098612 25:1.386294 40:1.609438 60:1.791759 61:1.791759 49:1.791759 77:3.891820 72:1.945910 101:2.079442 130:4.605170 138:2.302585 320:2.890372 ++1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 15:1.098612 40:1.609438 60:3.583518 57:1.791759 61:1.791759 49:1.791759 77:1.945910 95:4.158884 107:2.197225 130:2.302585 176:2.484907 320:2.890372 ++1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 7:4.158882 6:3.465735 9:2.079441 16:7.690284 15:4.394448 13:3.295836 14:3.295836 17:1.098612 19:1.098612 33:4.158882 30:1.386294 37:16.094380 ++1 4:0.000000 2:0.000000 6:0.693147 13:1.098612 17:1.098612 33:1.386294 37:1.609438 35:1.609438 54:1.791759 46:1.791759 66:1.945910 73:1.945910 77:1.945910 74:1.945910 70:1.945910 88:4.158884 ++1 5:0.000000 4:0.000000 3:0.000000 6:4.158882 8:0.693147 9:0.693147 18:4.394448 15:3.295836 21:2.197224 16:1.098612 25:2.772588 31:1.386294 28:1.386294 38:19.313256 37:12.875504 34:6.437752 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:6.931470 11:1.386294 9:0.693147 12:0.693147 15:5.493060 19:3.295836 20:3.295836 16:3.295836 13:1.098612 17:1.098612 14:1.098612 26:2.772588 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:13.862940 11:1.386294 9:1.386294 8:0.693147 10:0.693147 12:0.693147 13:5.493060 15:4.394448 16:4.394448 20:2.197224 19:2.197224 17:2.197224 ++1 3:0.000000 1:0.000000 7:2.772588 8:0.693147 13:2.197224 15:1.098612 33:2.772588 26:1.386294 27:1.386294 40:3.218876 41:3.218876 37:1.609438 39:1.609438 35:1.609438 45:1.791759 46:1.791759 ++1 2:0.000000 4:0.000000 6:5.545176 10:2.079441 18:10.986120 15:3.295836 16:3.295836 21:1.098612 20:1.098612 25:1.386294 38:11.266066 34:4.828314 37:3.218876 39:1.609438 52:7.167036 47:5.375277 ++1 2:0.000000 4:0.000000 7:1.386294 15:3.295836 19:2.197224 20:1.098612 25:1.386294 40:3.218876 39:1.609438 35:1.609438 73:29.188650 75:1.945910 71:1.945910 87:2.079442 135:2.302585 149:2.397895 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 13:37.352808 16:1.098612 33:16.635528 29:15.249234 24:2.772588 22:1.386294 30:1.386294 39:3.218876 40:1.609438 35:1.609438 46:21.501108 45:21.501108 ++1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 13:37.352808 16:1.098612 33:16.635528 29:15.249234 24:2.772588 22:1.386294 30:1.386294 39:3.218876 40:1.609438 35:1.609438 46:21.501108 45:21.501108 ++1 2:0.000000 4:0.000000 6:2.772588 7:2.079441 8:0.693147 9:0.693147 16:6.591672 15:2.197224 17:1.098612 20:1.098612 22:2.772588 33:2.772588 27:2.772588 25:1.386294 26:1.386294 37:4.828314 ++1 2:0.000000 4:0.000000 6:2.772588 7:2.079441 8:0.693147 9:0.693147 16:6.591672 15:2.197224 17:1.098612 20:1.098612 22:2.772588 33:2.772588 27:2.772588 25:1.386294 26:1.386294 37:4.828314 ++1 1:0.000000 4:0.000000 3:0.000000 8:1.386294 7:0.693147 12:0.693147 6:0.693147 15:2.197224 13:1.098612 16:1.098612 14:1.098612 26:2.772588 29:2.772588 30:1.386294 33:1.386294 40:1.609438 ++1 1:0.000000 3:0.000000 8:0.693147 7:0.693147 9:0.693147 15:1.098612 19:1.098612 26:2.772588 57:1.791759 82:1.945910 75:1.945910 101:2.079442 123:4.394450 107:2.197225 130:4.605170 138:2.302585 ++1 1:0.000000 4:0.000000 3:0.000000 8:1.386294 7:0.693147 12:0.693147 6:0.693147 15:2.197224 13:1.098612 16:1.098612 14:1.098612 26:2.772588 29:2.772588 30:1.386294 33:1.386294 40:1.609438 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:2.772588 9:0.693147 12:0.693147 16:1.098612 13:1.098612 42:6.437752 40:1.609438 46:1.791759 47:1.791759 66:5.837730 75:1.945910 70:1.945910 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 8:2.772588 9:0.693147 12:0.693147 16:1.098612 13:1.098612 42:6.437752 40:1.609438 46:1.791759 47:1.791759 66:5.837730 75:1.945910 70:1.945910 ++1 1:0.000000 3:0.000000 7:5.545176 8:0.693147 6:0.693147 13:7.690284 15:2.197224 17:1.098612 14:1.098612 22:6.931470 33:6.931470 37:20.922694 40:12.875504 41:1.609438 46:12.542313 45:8.958795 ++1 1:0.000000 3:0.000000 7:6.238323 8:0.693147 6:0.693147 13:7.690284 15:2.197224 17:1.098612 14:1.098612 22:6.931470 33:6.931470 37:20.922694 40:12.875504 41:1.609438 46:12.542313 45:8.958795 ++1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:2.079441 7:2.079441 9:0.693147 13:6.591672 15:1.098612 14:1.098612 17:1.098612 33:5.545176 27:1.386294 24:1.386294 39:6.437752 41:1.609438 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 5:0.000000 7:26.339586 8:5.545176 9:2.079441 11:0.693147 12:0.693147 6:0.693147 15:13.183344 17:7.690284 13:6.591672 19:4.394448 16:3.295836 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 7:13.862940 8:4.158882 15:12.084732 16:5.493060 13:3.295836 19:1.098612 33:8.317764 28:8.317764 26:6.931470 29:2.772588 22:2.772588 27:1.386294 ++1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:2.079441 7:2.079441 9:0.693147 13:6.591672 15:1.098612 14:1.098612 17:1.098612 33:5.545176 27:1.386294 24:1.386294 39:6.437752 41:1.609438 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 8:0.693147 13:4.394448 29:2.772588 33:2.772588 26:1.386294 22:1.386294 40:1.609438 42:1.609438 39:1.609438 46:3.583518 55:3.583518 ++1 3:0.000000 1:0.000000 7:2.079441 8:1.386294 13:4.394448 15:4.394448 20:2.197224 17:2.197224 18:1.098612 28:2.772588 22:2.772588 33:2.772588 24:1.386294 40:3.218876 41:1.609438 46:3.583518 ++1 7:2.079441 8:1.386294 11:0.693147 13:4.394448 15:2.197224 16:1.098612 22:2.772588 33:2.772588 26:1.386294 27:1.386294 29:1.386294 24:1.386294 40:1.609438 42:1.609438 46:3.583518 47:1.791759 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:7.624617 6:5.545176 9:1.386294 12:1.386294 8:1.386294 18:25.268076 15:6.591672 17:4.394448 16:4.394448 19:3.295836 13:2.197224 ++1 1:0.000000 3:0.000000 6:2.772588 13:1.098612 18:1.098612 29:1.386294 22:1.386294 33:1.386294 40:1.609438 34:1.609438 37:1.609438 62:3.583518 51:3.583518 46:1.791759 47:1.791759 52:1.791759 ++1 1:0.000000 3:0.000000 8:2.079441 18:5.493060 13:4.394448 15:3.295836 17:2.197224 22:2.772588 33:2.772588 40:6.437752 37:1.609438 42:1.609438 46:7.167036 45:3.583518 66:21.405010 73:1.945910 ++1 1:0.000000 3:0.000000 6:1.386294 8:1.386294 13:6.591672 14:6.591672 17:2.197224 16:1.098612 22:4.158882 25:2.772588 26:1.386294 41:4.828314 37:3.218876 39:1.609438 35:1.609438 46:7.167036 ++1 3:0.000000 1:0.000000 8:1.386294 17:2.197224 21:1.098612 13:1.098612 27:1.386294 37:1.609438 73:13.621370 66:3.891820 82:1.945910 99:2.079442 124:4.394450 107:2.197225 114:2.197225 135:2.302585 ++1 8:0.693147 15:4.394448 13:3.295836 22:5.545176 33:4.158882 45:5.375277 46:5.375277 57:3.583518 87:4.158884 112:4.394450 113:4.394450 107:2.197225 131:2.302585 169:2.484907 162:2.484907 193:15.389694 ++1 2:0.000000 7:2.772588 8:0.693147 15:6.591672 18:5.493060 17:2.197224 20:1.098612 29:6.931470 33:6.931470 22:4.158882 30:1.386294 25:1.386294 40:8.047190 37:1.609438 61:14.334072 46:5.375277 ++1 2:0.000000 1:0.000000 3:0.000000 18:2.197224 17:1.098612 13:1.098612 15:1.098612 33:1.386294 40:3.218876 46:1.791759 67:3.891820 107:2.197225 124:2.197225 129:4.605170 162:4.969814 193:10.259796 ++1 3:0.000000 1:0.000000 7:4.158882 8:0.693147 15:5.493060 13:4.394448 16:1.098612 18:1.098612 33:5.545176 22:4.158882 27:2.772588 28:1.386294 40:1.609438 39:1.609438 37:1.609438 41:1.609438 ++1 3:0.000000 1:0.000000 7:4.158882 8:0.693147 15:5.493060 13:4.394448 16:1.098612 18:1.098612 33:5.545176 22:4.158882 27:2.772588 28:1.386294 40:1.609438 39:1.609438 37:1.609438 41:1.609438 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:0.693147 8:0.693147 13:2.197224 15:1.098612 33:4.158882 30:2.772588 22:2.772588 43:1.609438 46:5.375277 45:3.583518 48:1.791759 74:1.945910 ++1 2:0.000000 4:0.000000 8:1.386294 11:0.693147 19:3.295836 15:2.197224 14:1.098612 29:2.772588 25:1.386294 33:1.386294 37:3.218876 35:1.609438 54:1.791759 47:1.791759 73:1.945910 68:1.945910 ++1 3:0.000000 2:0.000000 1:0.000000 8:0.693147 11:0.693147 6:0.693147 19:3.295836 14:2.197224 15:2.197224 13:1.098612 16:1.098612 17:1.098612 21:1.098612 28:4.158882 29:2.772588 37:6.437752 ++1 3:0.000000 1:0.000000 8:0.693147 12:0.693147 14:1.098612 21:1.098612 17:1.098612 13:1.098612 15:1.098612 29:2.772588 33:1.386294 37:3.218876 40:1.609438 48:1.791759 58:1.791759 46:1.791759 ++1 3:0.000000 8:0.693147 15:2.197224 14:1.098612 67:1.945910 88:2.079442 127:2.197225 122:2.197225 163:2.484907 174:2.484907 402:3.135494 509:6.802394 8937:6.957497 ++1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 7:4.158882 8:2.079441 13:2.197224 15:1.098612 14:1.098612 33:2.772588 29:1.386294 30:1.386294 34:1.609438 35:1.609438 59:1.791759 45:1.791759 ++1 2:0.000000 5:0.000000 3:0.000000 1:0.000000 4:0.000000 7:2.772588 6:2.079441 8:1.386294 11:1.386294 16:10.986120 18:8.788896 15:6.591672 19:6.591672 20:2.197224 17:1.098612 29:6.931470 ++1 1:0.000000 3:0.000000 7:2.079441 13:2.197224 17:1.098612 15:1.098612 14:1.098612 33:2.772588 26:1.386294 27:1.386294 40:3.218876 39:1.609438 35:1.609438 45:1.791759 46:1.791759 50:1.791759 ++1 3:0.000000 2:0.000000 7:3.465735 8:0.693147 6:0.693147 15:3.295836 14:1.098612 27:1.386294 31:1.386294 40:3.218876 35:1.609438 54:1.791759 61:1.791759 57:1.791759 88:4.158884 107:2.197225 ++1 3:0.000000 2:0.000000 1:0.000000 7:2.079441 8:1.386294 15:5.493060 13:4.394448 16:2.197224 18:1.098612 17:1.098612 14:1.098612 22:4.158882 33:4.158882 27:2.772588 37:6.437752 39:1.609438 ++1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 7:0.693147 15:5.493060 13:4.394448 18:1.098612 17:1.098612 16:1.098612 14:1.098612 22:4.158882 33:4.158882 28:1.386294 27:1.386294 ++1 7:1.386294 8:0.693147 11:0.693147 13:6.591672 15:2.197224 16:1.098612 22:4.158882 33:4.158882 27:1.386294 24:1.386294 40:1.609438 37:1.609438 46:5.375277 47:1.791759 73:3.891820 82:1.945910 ++1 1:0.000000 7:0.693147 13:2.197224 15:1.098612 33:5.545176 26:1.386294 40:3.218876 39:1.609438 35:1.609438 45:1.791759 46:1.791759 47:1.791759 79:1.945910 107:2.197225 114:2.197225 149:4.795790 ++1 1:0.000000 3:0.000000 7:6.238323 8:2.772588 9:1.386294 16:2.197224 18:2.197224 17:1.098612 28:4.158882 24:2.772588 26:1.386294 33:1.386294 22:1.386294 37:11.266066 40:3.218876 43:3.218876 ++1 7:2.079441 8:1.386294 11:0.693147 13:4.394448 15:2.197224 16:1.098612 22:2.772588 33:2.772588 26:1.386294 27:1.386294 29:1.386294 24:1.386294 40:1.609438 42:1.609438 46:3.583518 47:1.791759 ++1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 7:2.079441 8:2.079441 13:3.295836 15:1.098612 29:1.386294 33:1.386294 30:1.386294 34:1.609438 45:3.583518 59:1.791759 46:1.791759 79:1.945910 ++1 2:0.000000 4:0.000000 13:3.295836 14:1.098612 22:2.772588 33:2.772588 35:1.609438 46:3.583518 45:1.791759 48:1.791759 55:1.791759 78:5.837730 65:1.945910 70:1.945910 97:2.079442 114:2.197225 ++1 3:0.000000 7:6.931470 8:2.772588 15:5.493060 17:3.295836 16:3.295836 14:2.197224 29:2.772588 33:2.772588 26:2.772588 25:1.386294 28:1.386294 37:1.609438 43:1.609438 40:1.609438 48:7.167036 ++1 2:0.000000 8:1.386294 18:2.197224 15:1.098612 16:1.098612 33:2.772588 29:1.386294 38:6.437752 37:3.218876 40:1.609438 36:1.609438 60:5.375277 46:3.583518 54:1.791759 66:13.621370 77:9.729550 ++1 1:0.000000 3:0.000000 2:0.000000 7:2.079441 8:0.693147 16:6.591672 13:4.394448 17:1.098612 33:2.772588 29:1.386294 24:1.386294 37:8.047190 34:3.218876 45:1.791759 46:1.791759 49:1.791759 ++1 1:0.000000 2:0.000000 3:0.000000 7:7.624617 8:0.693147 16:6.591672 13:4.394448 14:1.098612 19:1.098612 17:1.098612 33:2.772588 29:1.386294 24:1.386294 37:11.266066 34:3.218876 40:1.609438 ++1 3:0.000000 2:0.000000 4:0.000000 7:0.693147 9:0.693147 13:2.197224 16:2.197224 14:1.098612 29:5.545176 24:2.772588 33:2.772588 30:1.386294 27:1.386294 39:1.609438 35:1.609438 45:1.791759 ++1 3:0.000000 1:0.000000 7:9.704058 8:1.386294 16:3.295836 17:2.197224 13:2.197224 21:2.197224 18:1.098612 26:4.158882 33:4.158882 27:2.772588 29:1.386294 24:1.386294 37:12.875504 40:3.218876 ++1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 7:7.624617 8:2.079441 9:0.693147 13:2.197224 15:1.098612 19:1.098612 14:1.098612 29:2.772588 33:2.772588 26:1.386294 22:1.386294 37:4.828314 ++1 3:0.000000 1:0.000000 7:4.158882 9:0.693147 17:1.098612 21:1.098612 16:1.098612 15:1.098612 33:2.772588 22:2.772588 26:1.386294 27:1.386294 40:3.218876 42:1.609438 37:1.609438 39:1.609438 ++1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 8:1.386294 7:0.693147 9:0.693147 13:4.394448 17:1.098612 15:1.098612 33:4.158882 29:4.158882 24:2.772588 27:2.772588 26:1.386294 22:1.386294 ++1 3:0.000000 2:0.000000 1:0.000000 8:1.386294 11:1.386294 7:0.693147 21:1.098612 16:1.098612 30:4.158882 22:2.772588 24:2.772588 28:1.386294 48:1.791759 70:1.945910 119:6.591675 126:4.394450 ++1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 7:5.545176 8:2.079441 9:0.693147 19:1.098612 15:1.098612 33:1.386294 30:1.386294 34:1.609438 46:1.791759 81:1.945910 73:1.945910 ++1 2:0.000000 3:0.000000 8:0.693147 6:0.693147 7:0.693147 15:3.295836 16:1.098612 14:1.098612 33:11.090352 26:2.772588 24:2.772588 27:1.386294 31:1.386294 40:6.437752 36:6.437752 42:3.218876 ++1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 7:1.386294 8:0.693147 15:5.493060 16:4.394448 13:3.295836 18:1.098612 19:1.098612 14:1.098612 33:15.249234 24:5.545176 27:1.386294 29:1.386294 ++1 2:0.000000 4:0.000000 6:2.772588 8:1.386294 10:1.386294 18:17.577792 15:5.493060 17:2.197224 16:2.197224 13:1.098612 14:1.098612 19:1.098612 21:1.098612 33:2.772588 25:2.772588 29:1.386294 ++1 17:1.098612 148:4.795790 1853:5.010635 9217:6.957497 ++1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:2.079441 9:0.693147 11:0.693147 12:0.693147 16:3.295836 17:1.098612 13:1.098612 14:1.098612 22:4.158882 28:2.772588 27:1.386294 26:1.386294 ++1 3:0.000000 2:0.000000 4:0.000000 8:0.693147 7:0.693147 19:1.098612 13:1.098612 16:1.098612 33:2.772588 27:1.386294 45:1.791759 46:1.791759 75:1.945910 67:1.945910 94:2.079442 89:2.079442 ++1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:3.465735 7:1.386294 11:0.693147 6:0.693147 15:3.295836 17:1.098612 16:1.098612 14:1.098612 27:2.772588 22:1.386294 29:1.386294 33:1.386294 ++1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 7:2.772588 12:0.693147 13:6.591672 17:1.098612 15:1.098612 14:1.098612 29:4.158882 33:2.772588 26:1.386294 27:1.386294 42:1.609438 38:1.609438 ++1 2:0.000000 8:1.386294 6:0.693147 13:6.591672 18:5.493060 15:2.197224 20:2.197224 16:1.098612 33:2.772588 29:1.386294 22:1.386294 30:1.386294 25:1.386294 38:6.437752 39:1.609438 47:8.958795 ++1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 7:1.386294 6:1.386294 15:9.887508 18:6.591672 19:2.197224 14:2.197224 16:1.098612 21:1.098612 23:11.090352 28:2.772588 27:1.386294 ++1 2:0.000000 3:0.000000 4:0.000000 21:3.295836 18:3.295836 19:2.197224 16:1.098612 14:1.098612 23:5.545176 30:1.386294 28:1.386294 25:1.386294 42:1.609438 36:1.609438 35:1.609438 50:1.791759 ++1 2:0.000000 6:1.386294 10:0.693147 9:0.693147 19:2.197224 16:2.197224 21:1.098612 23:1.386294 25:1.386294 40:3.218876 36:3.218876 38:1.609438 47:1.791759 52:1.791759 50:1.791759 51:1.791759 +-1 2:0.000000 10:1.386294 11:0.693147 9:0.693147 12:0.693147 19:1.098612 21:1.098612 23:1.386294 40:1.609438 56:1.791759 65:1.945910 80:1.945910 75:1.945910 92:2.079442 123:2.197225 158:2.397895 +-1 3:0.000000 5:0.000000 2:0.000000 10:2.079441 11:1.386294 12:0.693147 6:0.693147 9:0.693147 19:1.098612 23:2.772588 31:1.386294 24:1.386294 38:1.609438 34:1.609438 56:1.791759 84:3.891820 +-1 2:0.000000 4:0.000000 10:0.693147 11:0.693147 23:2.772588 29:1.386294 39:3.218876 61:3.583518 51:1.791759 47:1.791759 49:1.791759 74:1.945910 68:1.945910 92:10.397210 89:4.158884 106:2.197225 +-1 1:0.000000 3:0.000000 10:1.386294 9:0.693147 8:0.693147 25:2.772588 23:1.386294 43:1.609438 70:1.945910 72:1.945910 92:2.079442 100:2.079442 128:2.197225 119:2.197225 109:2.197225 140:4.605170 +-1 2:0.000000 4:0.000000 5:0.000000 11:2.079441 9:2.079441 6:1.386294 8:1.386294 10:0.693147 16:6.591672 18:3.295836 15:3.295836 19:3.295836 17:1.098612 20:1.098612 25:4.158882 27:4.158882 +-1 1:0.000000 3:0.000000 117:2.197225 155:7.193685 248:2.708050 273:2.772589 281:2.772589 363:3.044522 508:3.401197 637:3.688879 695:7.522400 808:3.951244 1012:4.248495 1455:9.307920 1701:9.750394 2134:10.329572 +-1 5:0.000000 3:0.000000 2:0.000000 1:0.000000 10:2.079441 6:1.386294 7:0.693147 20:2.197224 18:2.197224 15:1.098612 24:2.772588 33:1.386294 36:3.218876 43:1.609438 39:1.609438 55:8.958795 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 6:0.693147 7:0.693147 20:5.493060 18:1.098612 24:2.772588 31:1.386294 54:1.791759 123:4.394450 158:2.397895 190:7.454721 187:2.484907 291:11.090356 +-1 2:0.000000 5:0.000000 4:0.000000 6:6.931470 12:0.693147 11:0.693147 9:0.693147 18:2.197224 17:2.197224 20:1.098612 23:4.158882 25:4.158882 29:1.386294 30:1.386294 27:1.386294 36:4.828314 +-1 2:0.000000 3:0.000000 6:6.238323 10:0.693147 18:5.493060 16:2.197224 17:1.098612 30:4.158882 25:2.772588 27:2.772588 26:1.386294 24:1.386294 36:1.609438 37:1.609438 40:1.609438 42:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 12:6.931470 10:2.772588 8:1.386294 7:0.693147 18:2.197224 15:2.197224 20:1.098612 19:1.098612 23:8.317764 27:4.158882 28:2.772588 24:1.386294 +-1 3:0.000000 1:0.000000 4:0.000000 5:0.000000 9:2.079441 12:0.693147 11:0.693147 10:0.693147 6:0.693147 21:2.197224 20:1.098612 18:1.098612 19:1.098612 15:1.098612 14:1.098612 23:2.772588 +-1 5:0.000000 2:0.000000 1:0.000000 4:0.000000 3:0.000000 8:7.624617 10:3.465735 6:3.465735 11:0.693147 9:0.693147 16:5.493060 21:1.098612 20:1.098612 15:1.098612 14:1.098612 23:5.545176 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 11:2.079441 10:1.386294 20:3.295836 16:2.197224 21:1.098612 14:1.098612 23:4.158882 27:4.158882 28:1.386294 42:3.218876 39:1.609438 35:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 12:3.465735 6:2.772588 10:2.079441 7:2.079441 9:0.693147 16:4.394448 20:3.295836 21:1.098612 14:1.098612 23:4.158882 30:2.772588 27:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 6:6.238323 9:3.465735 7:2.772588 12:2.079441 10:0.693147 8:0.693147 16:4.394448 17:2.197224 19:2.197224 20:2.197224 14:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:3.465735 7:1.386294 20:2.197224 21:1.098612 14:1.098612 23:4.158882 27:2.772588 42:1.609438 35:1.609438 53:3.583518 57:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 6:2.079441 12:0.693147 7:0.693147 20:5.493060 17:3.295836 14:1.098612 23:4.158882 28:1.386294 37:6.437752 35:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:4.852029 8:2.772588 12:1.386294 6:0.693147 20:3.295836 14:1.098612 25:2.772588 23:2.772588 35:1.609438 57:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:9.704058 9:3.465735 10:2.772588 6:1.386294 8:1.386294 11:0.693147 12:0.693147 16:3.295836 18:1.098612 17:1.098612 14:1.098612 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 6:11.783499 7:8.317764 10:4.158882 8:3.465735 9:2.079441 17:9.887508 18:2.197224 13:1.098612 14:1.098612 30:6.931470 32:5.545176 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 9:0.693147 12:0.693147 14:2.197224 21:1.098612 23:2.772588 28:1.386294 40:1.609438 35:1.609438 53:3.583518 56:3.583518 57:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 12:1.386294 9:0.693147 11:0.693147 7:0.693147 15:3.295836 19:1.098612 20:1.098612 14:1.098612 23:4.158882 35:1.609438 53:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 10:16.635528 6:4.158882 11:0.693147 8:0.693147 7:0.693147 9:0.693147 16:5.493060 13:3.295836 15:3.295836 21:2.197224 18:1.098612 30:5.545176 28:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 6:9.010911 7:4.852029 16:9.887508 21:1.098612 20:1.098612 19:1.098612 30:8.317764 25:6.931470 26:4.158882 23:2.772588 28:2.772588 27:1.386294 24:1.386294 +-1 5:0.000000 4:0.000000 19:1.098612 14:1.098612 23:2.772588 24:1.386294 22:1.386294 42:1.609438 41:1.609438 65:1.945910 71:1.945910 84:1.945910 98:2.079442 111:2.197225 143:2.397895 146:2.397895 +-1 3:0.000000 5:0.000000 1:0.000000 2:0.000000 4:0.000000 12:0.693147 6:0.693147 19:1.098612 23:5.545176 51:1.791759 84:1.945910 118:2.197225 166:2.484907 234:2.639057 294:2.772589 331:2.944439 +-1 1:0.000000 3:0.000000 5:0.000000 13:1.098612 18:1.098612 23:2.772588 45:1.791759 65:1.945910 119:2.197225 118:2.197225 167:2.484907 171:2.484907 218:2.639057 262:5.545178 294:2.772589 380:3.091042 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 6:2.079441 12:1.386294 11:0.693147 7:0.693147 8:0.693147 18:2.197224 19:1.098612 20:1.098612 17:1.098612 23:4.158882 22:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 12:0.693147 20:1.098612 19:1.098612 18:1.098612 23:1.386294 62:1.791759 82:1.945910 248:2.708050 294:2.772589 373:3.044522 362:3.044522 1503:4.753590 9810:13.914994 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 3:0.000000 6:1.386294 7:0.693147 10:0.693147 11:0.693147 9:0.693147 20:1.098612 19:1.098612 21:1.098612 23:9.704058 24:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:1.386294 12:0.693147 16:3.295836 20:2.197224 18:2.197224 15:2.197224 30:2.772588 23:1.386294 22:1.386294 27:1.386294 42:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 10:0.693147 6:0.693147 15:2.197224 18:2.197224 17:1.098612 23:2.772588 24:1.386294 36:1.609438 53:1.791759 56:1.791759 47:1.791759 74:1.945910 79:1.945910 +-1 2:0.000000 26:1.386294 86:2.079442 2162:5.164786 2473:5.347108 4969:6.263398 9892:6.957497 9893:6.957497 9894:6.957497 9895:6.957497 9896:6.957497 9897:6.957497 9898:6.957497 9899:6.957497 9900:6.957497 +-1 3:0.000000 1:0.000000 7:1.386294 11:0.693147 19:2.197224 23:4.158882 31:2.772588 26:1.386294 45:3.583518 65:1.945910 117:4.394450 160:2.397895 204:2.564949 218:2.639057 294:2.772589 267:2.772589 +-1 1:0.000000 2:0.000000 3:0.000000 6:4.158882 12:0.693147 11:0.693147 15:1.098612 20:1.098612 18:1.098612 16:1.098612 17:1.098612 27:2.772588 22:1.386294 30:1.386294 23:1.386294 24:1.386294 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 9:2.772588 10:2.079441 7:2.079441 11:1.386294 6:0.693147 8:0.693147 15:3.295836 18:3.295836 19:2.197224 20:2.197224 17:2.197224 16:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 9:2.772588 7:1.386294 12:0.693147 19:2.197224 16:2.197224 13:1.098612 17:1.098612 14:1.098612 18:1.098612 23:6.931470 28:4.158882 31:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 8:0.693147 6:0.693147 19:1.098612 14:1.098612 23:6.931470 27:1.386294 35:1.609438 60:1.791759 47:1.791759 61:1.791759 80:3.891820 +-1 1:0.000000 2:0.000000 6:0.693147 7:0.693147 8:0.693147 26:1.386294 23:1.386294 61:1.791759 294:2.772589 435:3.218876 1936:5.010635 5005:6.263398 4969:6.263398 4952:6.263398 10036:6.957497 10037:6.957497 +-1 1:0.000000 3:0.000000 8:4.158882 6:2.079441 9:1.386294 11:0.693147 10:0.693147 7:0.693147 12:0.693147 16:4.394448 19:2.197224 23:12.476646 31:1.386294 22:1.386294 38:11.266066 36:4.828314 +-1 5:0.000000 3:0.000000 2:0.000000 4:0.000000 1:0.000000 10:4.158882 6:4.158882 9:2.772588 8:2.079441 11:1.386294 12:0.693147 16:6.591672 18:4.394448 21:1.098612 26:13.862940 23:4.158882 +-1 5:0.000000 10:1.386294 9:1.386294 11:0.693147 7:0.693147 12:0.693147 16:2.197224 20:1.098612 27:4.158882 23:2.772588 30:2.772588 56:3.583518 57:1.791759 53:1.791759 78:1.945910 75:1.945910 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 4:0.000000 11:1.386294 6:1.386294 10:0.693147 7:0.693147 19:2.197224 23:6.931470 25:1.386294 36:1.609438 35:1.609438 61:3.583518 45:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 8:1.386294 9:0.693147 7:0.693147 20:2.197224 19:1.098612 15:1.098612 31:2.772588 23:2.772588 26:1.386294 30:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 10:4.158882 11:1.386294 7:1.386294 12:0.693147 18:2.197224 19:2.197224 21:1.098612 23:6.931470 28:1.386294 25:1.386294 30:1.386294 24:1.386294 36:3.218876 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 7:4.852029 11:2.079441 10:1.386294 9:1.386294 6:1.386294 18:2.197224 26:9.704058 23:5.545176 28:5.545176 25:1.386294 52:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.772588 9:0.693147 11:0.693147 7:0.693147 18:2.197224 21:1.098612 16:1.098612 23:9.704058 30:2.772588 28:1.386294 22:1.386294 43:1.609438 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 10:2.772588 8:2.079441 9:0.693147 6:0.693147 21:3.295836 16:3.295836 25:4.158882 23:2.772588 28:2.772588 29:1.386294 27:1.386294 36:24.141570 +-1 1:0.000000 5:0.000000 18:3.295836 69:3.891820 99:2.079442 88:2.079442 120:2.197225 124:2.197225 126:2.197225 135:2.302585 134:2.302585 141:2.397895 150:2.397895 217:2.564949 251:2.708050 296:2.833213 +-1 3:0.000000 1:0.000000 8:2.079441 12:0.693147 20:2.197224 18:1.098612 15:1.098612 23:4.158882 22:1.386294 43:1.609438 39:1.609438 61:3.583518 93:6.238326 89:2.079442 92:2.079442 119:2.197225 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 10:4.158882 6:1.386294 12:0.693147 9:0.693147 8:0.693147 18:4.394448 21:1.098612 23:6.931470 29:1.386294 36:1.609438 42:1.609438 +-1 1:0.000000 5:0.000000 2:0.000000 4:0.000000 3:0.000000 11:0.693147 19:1.098612 23:4.158882 31:1.386294 62:1.791759 65:1.945910 80:1.945910 98:4.158884 124:2.197225 111:2.197225 200:2.564949 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 6:0.693147 12:0.693147 20:2.197224 15:1.098612 21:1.098612 19:1.098612 14:1.098612 16:1.098612 23:4.158882 30:1.386294 28:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 10:1.386294 9:1.386294 6:1.386294 11:1.386294 7:1.386294 16:3.295836 15:2.197224 17:1.098612 14:1.098612 23:2.772588 28:2.772588 36:1.609438 +-1 2:0.000000 9:2.079441 10:0.693147 6:0.693147 19:1.098612 16:1.098612 25:2.772588 23:1.386294 31:1.386294 27:1.386294 28:1.386294 30:1.386294 37:1.609438 42:1.609438 53:1.791759 50:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 9:0.693147 12:0.693147 10:0.693147 23:2.772588 22:1.386294 28:1.386294 68:1.945910 82:1.945910 99:2.079442 117:2.197225 159:2.397895 171:2.484907 +-1 2:0.000000 4:0.000000 10:4.158882 12:0.693147 11:0.693147 7:0.693147 6:0.693147 21:3.295836 17:1.098612 23:2.772588 29:1.386294 25:1.386294 38:4.828314 34:3.218876 43:3.218876 42:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 5:0.000000 19:1.098612 31:1.386294 23:1.386294 388:3.091042 2275:5.347108 3287:11.715866 3651:5.857933 10458:6.957497 10459:6.957497 10460:6.957497 +-1 126:2.197225 5103:6.263398 +-1 5:0.000000 2:0.000000 3:0.000000 1:0.000000 7:1.386294 10:0.693147 6:0.693147 19:1.098612 16:1.098612 14:1.098612 23:4.158882 24:2.772588 26:2.772588 38:1.609438 37:1.609438 41:1.609438 +-1 3:0.000000 10:0.693147 8:0.693147 13:1.098612 14:1.098612 23:4.158882 25:1.386294 22:1.386294 41:1.609438 45:1.791759 151:2.397895 187:2.484907 289:2.772589 379:3.091042 506:3.401197 505:3.401197 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 7:0.693147 9:0.693147 20:2.197224 21:2.197224 18:1.098612 23:5.545176 26:4.158882 30:2.772588 29:1.386294 36:1.609438 51:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 6:2.079441 11:0.693147 7:0.693147 20:4.394448 15:3.295836 18:1.098612 17:1.098612 23:4.158882 31:2.772588 30:1.386294 24:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:4.158882 11:2.079441 9:1.386294 7:1.386294 8:0.693147 20:3.295836 19:1.098612 18:1.098612 23:4.158882 42:1.609438 51:1.791759 +-1 1:0.000000 3:0.000000 288:2.772589 382:3.091042 420:3.218876 657:3.688879 2495:5.347108 10503:6.957497 10504:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 7:4.158882 12:2.079441 6:2.079441 9:2.079441 11:1.386294 8:1.386294 10:1.386294 16:4.394448 17:2.197224 20:1.098612 21:1.098612 23:6.931470 26:2.772588 +-1 2:0.000000 3:0.000000 19:1.098612 23:2.772588 29:1.386294 24:1.386294 71:1.945910 170:2.484907 235:2.639057 343:2.995732 540:3.465736 768:3.850148 1469:4.653960 1948:5.010635 2130:5.164786 10580:13.914994 +-1 2:0.000000 5:0.000000 4:0.000000 7:0.693147 12:0.693147 10:0.693147 13:1.098612 19:1.098612 21:1.098612 18:1.098612 14:1.098612 23:4.158882 28:1.386294 24:1.386294 35:1.609438 84:1.945910 +-1 2:0.000000 1:0.000000 4:0.000000 12:1.386294 9:1.386294 10:0.693147 11:0.693147 7:0.693147 15:1.098612 14:1.098612 28:2.772588 23:1.386294 26:1.386294 38:3.218876 40:3.218876 39:1.609438 +-1 1:0.000000 3:0.000000 23:1.386294 82:1.945910 10598:13.914994 10599:13.914994 10600:6.957497 10601:6.957497 10602:6.957497 10603:6.957497 +-1 5:0.000000 6:2.079441 11:0.693147 13:1.098612 14:1.098612 23:2.772588 40:1.609438 41:1.609438 51:3.583518 53:1.791759 112:2.197225 157:4.795790 179:2.484907 218:2.639057 294:2.772589 308:2.890372 +-1 1:0.000000 3:0.000000 289:2.772589 2909:5.568345 5127:6.263398 10604:13.914994 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:1.386294 11:0.693147 6:0.693147 9:0.693147 19:1.098612 16:1.098612 23:8.317764 24:1.386294 65:1.945910 76:1.945910 72:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 19:2.197224 14:1.098612 23:5.545176 31:1.386294 35:1.609438 84:1.945910 202:2.564949 218:2.639057 244:2.708050 259:2.708050 325:2.890372 338:2.944439 +-1 3:0.000000 1:0.000000 2:0.000000 6:1.386294 21:1.098612 17:1.098612 14:1.098612 22:1.386294 23:1.386294 37:1.609438 35:1.609438 62:3.583518 51:3.583518 82:1.945910 99:2.079442 88:2.079442 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 6:2.079441 12:0.693147 7:0.693147 20:3.295836 15:1.098612 23:1.386294 30:1.386294 61:1.791759 82:1.945910 86:2.079442 101:2.079442 130:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 6:3.465735 8:1.386294 12:0.693147 10:0.693147 11:0.693147 18:4.394448 20:2.197224 21:1.098612 19:1.098612 15:1.098612 14:1.098612 23:2.772588 30:1.386294 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:2.079441 10:0.693147 12:0.693147 11:0.693147 7:0.693147 8:0.693147 9:0.693147 16:3.295836 20:2.197224 21:1.098612 23:2.772588 30:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 4:0.000000 12:1.386294 10:1.386294 7:0.693147 6:0.693147 20:1.098612 18:1.098612 14:1.098612 23:5.545176 25:5.545176 30:2.772588 22:1.386294 +-1 1:0.000000 3:0.000000 5140:6.263398 +-1 1:0.000000 3:0.000000 5:0.000000 11:0.693147 18:1.098612 23:2.772588 45:1.791759 65:1.945910 119:2.197225 218:2.639057 294:2.772589 1372:4.553877 2159:5.164786 3149:5.857933 5113:6.263398 10646:6.957497 +-1 2:0.000000 4:0.000000 12:0.693147 20:1.098612 19:1.098612 23:1.386294 62:1.791759 216:2.564949 294:2.772589 373:3.044522 1277:4.465908 1376:4.553877 10652:20.872491 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:9.010911 10:6.238323 6:3.465735 11:2.079441 8:0.693147 12:0.693147 16:3.295836 13:2.197224 17:2.197224 18:2.197224 26:22.180704 +-1 21:1.098612 29:1.386294 23:1.386294 62:3.583518 217:2.564949 248:2.708050 294:2.772589 1006:4.174387 1277:4.465908 1376:4.553877 1372:4.553877 2926:5.568345 5146:6.263398 10674:13.914994 10675:6.957497 +-1 18:1.098612 23:1.386294 148:2.397895 3445:11.715866 10676:6.957497 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 12:2.079441 10:0.693147 11:0.693147 7:0.693147 19:3.295836 13:1.098612 20:1.098612 17:1.098612 23:6.931470 31:5.545176 29:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:0.693147 21:1.098612 20:1.098612 19:1.098612 18:1.098612 23:1.386294 27:1.386294 72:1.945910 95:2.079442 112:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 9:1.386294 6:1.386294 11:0.693147 8:0.693147 10:0.693147 20:2.197224 21:1.098612 19:1.098612 14:1.098612 30:4.158882 23:2.772588 31:1.386294 41:1.609438 +-1 1:0.000000 2:0.000000 3:0.000000 6:2.079441 11:1.386294 12:0.693147 18:7.690284 16:2.197224 21:2.197224 20:1.098612 17:1.098612 22:1.386294 23:1.386294 25:1.386294 34:1.609438 51:3.583518 +-1 1:0.000000 2:0.000000 12:0.693147 6:0.693147 20:2.197224 18:2.197224 23:2.772588 31:1.386294 22:1.386294 51:1.791759 78:1.945910 188:2.484907 216:2.564949 255:2.708050 473:3.295837 535:3.433987 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 6:0.693147 21:2.197224 23:2.772588 29:1.386294 36:1.609438 64:3.583518 99:2.079442 109:2.197225 189:2.484907 208:2.564949 217:2.564949 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:1.386294 8:0.693147 11:0.693147 20:3.295836 15:2.197224 21:1.098612 18:1.098612 17:1.098612 14:1.098612 22:4.158882 23:1.386294 +-1 1:0.000000 23:1.386294 448:3.258097 1956:5.010635 5094:6.263398 10805:6.957497 10806:6.957497 10807:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:0.693147 6:0.693147 20:3.295836 13:2.197224 21:2.197224 19:1.098612 18:1.098612 16:1.098612 23:5.545176 25:2.772588 31:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 7:2.079441 12:1.386294 11:1.386294 10:1.386294 9:1.386294 19:1.098612 21:1.098612 18:1.098612 23:5.545176 26:4.158882 31:1.386294 30:1.386294 +-1 1:0.000000 3:0.000000 9:0.693147 16:1.098612 17:1.098612 14:1.098612 23:1.386294 37:1.609438 41:1.609438 82:1.945910 78:1.945910 81:1.945910 99:2.079442 136:2.302585 159:2.397895 189:2.484907 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 11:1.386294 9:1.386294 8:0.693147 10:0.693147 19:1.098612 21:1.098612 18:1.098612 17:1.098612 13:1.098612 23:8.317764 31:1.386294 29:1.386294 +-1 2:0.000000 5:0.000000 11:5.545176 9:2.079441 10:2.079441 6:1.386294 12:0.693147 8:0.693147 18:3.295836 20:3.295836 16:2.197224 17:2.197224 19:1.098612 14:1.098612 23:6.931470 27:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 6:4.852029 9:2.772588 12:0.693147 11:0.693147 8:0.693147 20:3.295836 15:2.197224 18:2.197224 16:1.098612 14:1.098612 23:4.158882 30:2.772588 +-1 9:0.693147 17:1.098612 28:1.386294 47:1.791759 81:1.945910 101:2.079442 159:2.397895 158:2.397895 189:4.969814 188:2.484907 185:2.484907 222:5.278114 223:2.639057 278:2.772589 297:2.833213 326:11.561488 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 6:2.772588 9:1.386294 12:1.386294 11:0.693147 10:0.693147 19:1.098612 18:1.098612 14:1.098612 23:8.317764 28:1.386294 27:1.386294 +-1 2:0.000000 7:2.079441 18:1.098612 26:1.386294 39:1.609438 79:1.945910 93:2.079442 94:2.079442 137:2.302585 147:2.397895 170:2.484907 177:2.484907 163:2.484907 205:2.564949 194:2.564949 248:10.832200 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 7:2.079441 8:2.079441 11:0.693147 9:0.693147 6:0.693147 18:3.295836 17:1.098612 23:8.317764 24:1.386294 36:3.218876 42:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 9:0.693147 12:0.693147 8:0.693147 11:0.693147 21:1.098612 20:1.098612 19:1.098612 15:1.098612 23:4.158882 82:1.945910 66:1.945910 134:2.302585 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 10:2.079441 11:1.386294 7:0.693147 9:0.693147 23:4.158882 26:1.386294 42:1.609438 38:1.609438 43:1.609438 57:3.583518 48:1.791759 76:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 7:0.693147 8:0.693147 13:1.098612 14:1.098612 23:4.158882 29:1.386294 24:1.386294 35:1.609438 45:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:0.693147 6:0.693147 20:1.098612 15:1.098612 23:2.772588 62:1.791759 47:1.791759 81:1.945910 82:1.945910 99:2.079442 124:2.197225 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 10:4.852029 9:2.772588 12:0.693147 11:0.693147 18:2.197224 17:2.197224 21:1.098612 16:1.098612 23:9.704058 28:4.158882 25:2.772588 +-1 2:0.000000 3:0.000000 1:0.000000 6:2.079441 12:1.386294 10:1.386294 7:0.693147 8:0.693147 18:2.197224 15:1.098612 23:4.158882 26:1.386294 24:1.386294 31:1.386294 40:1.609438 38:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:2.079441 8:0.693147 10:0.693147 7:0.693147 11:0.693147 12:0.693147 18:4.394448 19:1.098612 16:1.098612 20:1.098612 24:5.545176 +-1 3:0.000000 1:0.000000 11:0.693147 14:1.098612 41:1.609438 47:1.791759 115:4.394450 118:2.197225 134:4.605170 189:2.484907 214:2.564949 196:2.564949 268:2.772589 274:2.772589 348:2.995732 405:3.135494 +-1 6:0.693147 13:3.295836 15:1.098612 22:1.386294 23:1.386294 34:1.609438 46:1.791759 65:1.945910 134:2.302585 220:2.639057 218:2.639057 2240:5.347108 +-1 4:0.000000 2:0.000000 12:1.386294 11:0.693147 9:0.693147 19:2.197224 23:5.545176 28:1.386294 34:3.218876 42:1.609438 38:1.609438 58:5.375277 56:1.791759 84:1.945910 68:1.945910 98:2.079442 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 6:4.158882 12:0.693147 11:0.693147 19:1.098612 18:1.098612 20:1.098612 23:4.158882 30:2.772588 26:2.772588 22:1.386294 37:3.218876 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:1.386294 12:0.693147 9:0.693147 10:0.693147 11:0.693147 20:2.197224 19:1.098612 18:1.098612 16:1.098612 23:4.158882 24:1.386294 +-1 2:0.000000 14:1.098612 23:1.386294 41:1.609438 218:2.639057 305:2.833213 710:3.761200 768:3.850148 3641:5.857933 4087:6.263398 11242:6.957497 11243:6.957497 11244:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 10:0.693147 6:0.693147 9:0.693147 8:0.693147 18:3.295836 20:1.098612 19:1.098612 23:2.772588 25:1.386294 29:1.386294 45:1.791759 51:1.791759 +-1 4:0.000000 6:0.693147 10:0.693147 21:1.098612 18:1.098612 29:1.386294 23:1.386294 43:1.609438 40:1.609438 57:5.375277 49:3.583518 58:1.791759 105:4.394450 127:2.197225 148:2.397895 194:5.129898 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:2.079441 6:0.693147 8:0.693147 9:0.693147 18:3.295836 20:2.197224 23:4.158882 24:1.386294 38:4.828314 37:1.609438 34:1.609438 +-1 2:0.000000 8:0.693147 14:2.197224 23:2.772588 24:1.386294 39:3.218876 41:1.609438 35:1.609438 43:1.609438 53:1.791759 84:1.945910 99:2.079442 117:4.394450 111:2.197225 114:2.197225 115:2.197225 +-1 1:0.000000 3:0.000000 5:0.000000 4:0.000000 2:0.000000 11:1.386294 6:1.386294 7:0.693147 20:2.197224 19:1.098612 15:1.098612 18:1.098612 24:5.545176 23:4.158882 31:1.386294 26:1.386294 +-1 3:0.000000 5:0.000000 14:2.197224 19:1.098612 15:1.098612 16:1.098612 24:5.545176 22:1.386294 23:1.386294 39:3.218876 37:1.609438 58:1.791759 81:3.891820 78:3.891820 82:1.945910 68:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 10:0.693147 6:0.693147 14:1.098612 40:1.609438 82:1.945910 88:2.079442 147:2.397895 258:2.708050 312:5.780744 385:3.091042 398:3.135494 557:3.496508 556:3.496508 +-1 2:0.000000 31:1.386294 29:1.386294 23:1.386294 45:1.791759 78:1.945910 65:1.945910 117:2.197225 148:2.397895 161:2.397895 176:2.484907 217:2.564949 203:2.564949 218:2.639057 531:3.433987 561:3.496508 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 9:1.386294 10:1.386294 7:1.386294 12:0.693147 6:0.693147 18:5.493060 19:3.295836 21:1.098612 20:1.098612 30:5.545176 27:5.545176 23:5.545176 +-1 2:0.000000 5:0.000000 6:2.079441 10:1.386294 7:0.693147 9:0.693147 23:5.545176 25:1.386294 38:1.609438 36:1.609438 60:8.958795 52:1.791759 62:1.791759 49:1.791759 69:5.837730 83:3.891820 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 9:0.693147 23:2.772588 43:1.609438 65:1.945910 73:1.945910 97:2.079442 112:2.197225 159:2.397895 218:2.639057 248:2.708050 +-1 1:0.000000 3:0.000000 9:0.693147 18:1.098612 22:1.386294 99:2.079442 115:4.394450 126:2.197225 109:2.197225 124:2.197225 134:2.302585 144:2.397895 170:2.484907 248:2.708050 288:2.772589 278:2.772589 +-1 2:0.000000 4:0.000000 7:2.772588 9:2.079441 10:1.386294 11:0.693147 6:0.693147 8:0.693147 12:0.693147 17:4.394448 21:2.197224 16:1.098612 25:6.931470 26:5.545176 23:4.158882 27:2.772588 +-1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 9:2.772588 7:2.772588 12:0.693147 6:0.693147 20:4.394448 19:3.295836 15:1.098612 21:1.098612 17:1.098612 18:1.098612 16:1.098612 23:9.704058 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:0.693147 19:2.197224 13:1.098612 23:4.158882 39:1.609438 47:3.583518 62:1.791759 58:1.791759 65:1.945910 84:1.945910 117:2.197225 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:2.079441 9:1.386294 10:1.386294 12:0.693147 7:0.693147 20:1.098612 19:1.098612 18:1.098612 23:6.931470 28:1.386294 27:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 6:0.693147 18:2.197224 13:1.098612 17:1.098612 23:1.386294 40:1.609438 43:1.609438 49:3.583518 76:1.945910 65:1.945910 +-1 4:0.000000 1:0.000000 5:0.000000 6:6.931470 8:1.386294 9:1.386294 12:1.386294 10:0.693147 18:8.788896 16:7.690284 17:6.591672 14:2.197224 21:1.098612 20:1.098612 26:8.317764 33:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 7:0.693147 8:0.693147 19:2.197224 15:1.098612 13:1.098612 23:2.772588 31:1.386294 26:1.386294 25:1.386294 22:1.386294 57:1.791759 59:1.791759 +-1 2:0.000000 3:0.000000 6:2.079441 11:0.693147 7:0.693147 16:8.788896 14:1.098612 27:5.545176 28:1.386294 36:1.609438 34:1.609438 35:1.609438 50:5.375277 48:3.583518 47:1.791759 53:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 12:0.693147 23:2.772588 27:1.386294 30:1.386294 60:3.583518 72:3.891820 100:6.238326 102:2.079442 91:2.079442 118:4.394450 127:4.394450 128:2.197225 +-1 2:0.000000 3:0.000000 1:0.000000 11:2.079441 7:2.079441 10:0.693147 8:0.693147 9:0.693147 21:2.197224 16:2.197224 15:2.197224 20:1.098612 19:1.098612 17:1.098612 28:4.158882 23:1.386294 +-1 1:0.000000 3:0.000000 11524:6.957497 11525:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 12:1.386294 9:0.693147 10:0.693147 15:1.098612 20:1.098612 19:1.098612 23:2.772588 31:1.386294 29:1.386294 40:3.218876 38:1.609438 58:1.791759 62:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 12:2.079441 9:1.386294 6:1.386294 18:2.197224 21:1.098612 16:1.098612 23:2.772588 28:1.386294 36:1.609438 39:1.609438 43:1.609438 51:5.375277 52:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 23:4.158882 38:1.609438 62:1.791759 58:1.791759 65:1.945910 69:1.945910 92:2.079442 122:2.197225 151:2.397895 +-1 2:0.000000 6:5.545176 10:1.386294 12:0.693147 9:0.693147 8:0.693147 18:1.098612 16:1.098612 25:2.772588 23:1.386294 24:1.386294 36:4.828314 34:1.609438 61:3.583518 51:1.791759 56:1.791759 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 6:3.465735 8:1.386294 10:1.386294 11:0.693147 16:1.098612 23:5.545176 43:1.609438 51:7.167036 57:1.791759 78:5.837730 72:1.945910 92:6.238326 +-1 5:0.000000 2:0.000000 3:0.000000 4:0.000000 7:2.772588 10:1.386294 6:1.386294 11:0.693147 20:2.197224 15:1.098612 23:6.931470 25:4.158882 30:2.772588 26:2.772588 29:1.386294 43:4.828314 +-1 2:0.000000 4:0.000000 5:0.000000 6:7.624617 10:2.079441 9:2.079441 11:1.386294 8:0.693147 17:1.098612 19:1.098612 16:1.098612 42:1.609438 43:1.609438 51:12.542313 57:7.167036 52:5.375277 +-1 2:0.000000 1:0.000000 3:0.000000 6:3.465735 11:2.079441 9:1.386294 8:0.693147 7:0.693147 18:5.493060 20:3.295836 21:2.197224 15:2.197224 14:1.098612 23:4.158882 25:2.772588 35:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 6:3.465735 12:0.693147 8:0.693147 18:4.394448 20:3.295836 19:1.098612 15:1.098612 14:1.098612 23:2.772588 30:1.386294 34:4.828314 41:1.609438 51:1.791759 +-1 2:0.000000 4:0.000000 6:2.079441 9:0.693147 10:0.693147 7:0.693147 16:7.690284 19:2.197224 13:1.098612 21:1.098612 26:6.931470 23:2.772588 24:2.772588 30:2.772588 28:2.772588 27:2.772588 +-1 3:0.000000 2:0.000000 1:0.000000 5:0.000000 4:0.000000 8:0.693147 6:0.693147 11:0.693147 9:0.693147 18:3.295836 20:1.098612 19:1.098612 14:1.098612 17:1.098612 23:4.158882 31:1.386294 +-1 1:0.000000 4:0.000000 2:0.000000 3:0.000000 10:2.772588 8:2.772588 6:2.079441 11:0.693147 12:0.693147 16:5.493060 19:3.295836 21:2.197224 14:1.098612 20:1.098612 23:4.158882 27:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 9:2.079441 12:1.386294 14:2.197224 21:1.098612 23:5.545176 28:2.772588 30:1.386294 24:1.386294 38:3.218876 34:1.609438 43:1.609438 +-1 3:0.000000 1:0.000000 5:0.000000 12:1.386294 9:0.693147 20:2.197224 19:1.098612 23:5.545176 54:1.791759 55:1.791759 82:1.945910 124:2.197225 159:2.397895 273:2.772589 461:3.295837 691:3.761200 +-1 3:0.000000 2:0.000000 4:0.000000 9:2.772588 16:4.394448 14:3.295836 17:2.197224 15:1.098612 13:1.098612 19:1.098612 28:5.545176 24:2.772588 23:2.772588 40:1.609438 38:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 8:0.693147 23:1.386294 134:2.302585 374:3.044522 409:3.135494 413:3.178054 494:3.367296 889:4.060443 1037:4.248495 2473:5.347108 2889:5.568345 5070:6.263398 11854:13.914994 +-1 16:1.098612 251:2.708050 313:2.890372 401:3.135494 443:3.258097 884:4.060443 1016:4.248495 4886:6.263398 11864:6.957497 11865:6.957497 11866:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 23:2.772588 65:1.945910 127:2.197225 130:2.302585 158:2.397895 187:2.484907 471:3.295837 818:3.951244 943:4.110874 1194:4.382027 +-1 1:0.000000 3:0.000000 41:1.609438 257:2.708050 382:3.091042 420:3.218876 884:4.060443 956:8.221748 2057:5.164786 2866:16.705035 2978:5.568345 11870:6.957497 11871:6.957497 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:2.079441 6:1.386294 12:0.693147 19:1.098612 23:4.158882 31:1.386294 25:1.386294 28:1.386294 45:3.583518 59:1.791759 74:3.891820 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 19:1.098612 18:1.098612 23:2.772588 135:2.302585 145:2.397895 217:2.564949 248:2.708050 254:2.708050 294:2.772589 313:2.890372 375:3.044522 +-1 2:0.000000 1:0.000000 9:1.386294 7:0.693147 17:1.098612 23:1.386294 28:1.386294 30:1.386294 47:3.583518 71:5.837730 101:2.079442 128:2.197225 121:2.197225 126:2.197225 117:2.197225 120:2.197225 +-1 2:0.000000 5:0.000000 4:0.000000 8:2.079441 10:1.386294 11:1.386294 9:1.386294 6:0.693147 19:2.197224 16:2.197224 21:1.098612 23:4.158882 24:1.386294 28:1.386294 27:1.386294 31:1.386294 +-1 2:0.000000 6:3.465735 10:1.386294 7:0.693147 8:0.693147 18:4.394448 20:2.197224 16:1.098612 23:1.386294 25:1.386294 30:1.386294 34:4.828314 40:1.609438 49:1.791759 61:1.791759 69:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 19:1.098612 17:1.098612 23:4.158882 26:1.386294 35:1.609438 45:1.791759 72:1.945910 97:2.079442 258:2.708050 454:3.258097 459:3.258097 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 8:0.693147 20:1.098612 18:1.098612 16:1.098612 22:1.386294 23:1.386294 37:3.218876 117:2.197225 109:2.197225 141:2.397895 178:2.484907 1344:4.553877 +-1 1:0.000000 2:0.000000 5:0.000000 4:0.000000 3:0.000000 7:2.079441 11:0.693147 6:0.693147 15:3.295836 18:2.197224 17:1.098612 20:1.098612 21:1.098612 14:1.098612 28:2.772588 23:1.386294 +-1 102:2.079442 382:3.091042 1057:4.248495 3713:5.857933 12002:6.957497 12003:6.957497 +-1 4:0.000000 2:0.000000 6:2.079441 7:0.693147 17:6.591672 23:2.772588 29:1.386294 39:1.609438 40:1.609438 41:1.609438 53:1.791759 83:1.945910 68:1.945910 69:1.945910 98:10.397210 130:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 11:1.386294 12:0.693147 10:0.693147 6:0.693147 7:0.693147 19:1.098612 18:1.098612 14:1.098612 29:1.386294 23:1.386294 35:1.609438 51:1.791759 125:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 12:0.693147 11:0.693147 14:1.098612 23:2.772588 26:1.386294 41:1.609438 50:1.791759 76:1.945910 96:2.079442 177:2.484907 433:3.218876 542:3.465736 +-1 2:0.000000 5:0.000000 7:4.158882 6:2.079441 9:0.693147 16:2.197224 18:1.098612 17:1.098612 26:4.158882 29:1.386294 23:1.386294 27:1.386294 24:1.386294 40:6.437752 38:4.828314 42:1.609438 +-1 4:0.000000 5:0.000000 3:0.000000 8:0.693147 9:0.693147 19:1.098612 21:1.098612 17:1.098612 23:1.386294 24:1.386294 27:1.386294 59:1.791759 81:1.945910 67:1.945910 114:2.197225 144:2.397895 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 12:0.693147 18:1.098612 16:1.098612 27:6.931470 30:4.158882 23:2.772588 42:1.609438 39:1.609438 68:1.945910 69:1.945910 158:2.397895 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 11:1.386294 12:0.693147 10:0.693147 9:0.693147 17:1.098612 16:1.098612 18:1.098612 27:5.545176 23:2.772588 29:2.772588 30:2.772588 +-1 3:0.000000 83:1.945910 135:2.302585 148:2.397895 313:2.890372 395:3.135494 684:3.761200 690:3.761200 2736:5.568345 4341:6.263398 +-1 3:0.000000 16:1.098612 47:1.791759 313:2.890372 344:2.995732 395:3.135494 489:3.367296 721:3.806662 5414:6.263398 5232:6.263398 +-1 1:0.000000 20:1.098612 23:1.386294 86:2.079442 130:2.302585 173:2.484907 189:2.484907 186:2.484907 258:2.708050 359:3.044522 812:3.951244 1011:8.496990 3776:5.857933 12100:27.829988 12101:6.957497 +-1 3:0.000000 1:0.000000 17:4.394448 16:3.295836 21:1.098612 22:1.386294 23:1.386294 24:1.386294 39:1.609438 56:1.791759 59:1.791759 82:1.945910 70:1.945910 81:1.945910 67:1.945910 83:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 9:2.772588 12:0.693147 10:0.693147 11:0.693147 19:1.098612 18:1.098612 23:1.386294 31:1.386294 40:1.609438 36:1.609438 60:1.791759 76:7.783640 81:3.891820 +-1 1:0.000000 5:0.000000 19:1.098612 13:1.098612 14:1.098612 23:2.772588 31:1.386294 41:1.609438 62:3.583518 65:1.945910 257:2.708050 294:5.545178 1376:4.553877 1579:4.753590 2960:5.568345 5249:6.263398 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 12:2.079441 11:1.386294 9:0.693147 6:0.693147 17:2.197224 21:1.098612 19:1.098612 14:1.098612 23:2.772588 36:1.609438 41:1.609438 57:1.791759 +-1 3:0.000000 119:2.197225 148:2.397895 242:2.708050 459:3.258097 1280:4.465908 4088:6.263398 12135:6.957497 12136:6.957497 12137:6.957497 12138:6.957497 12139:6.957497 12140:6.957497 12141:6.957497 12142:6.957497 12143:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:1.386294 7:0.693147 11:0.693147 18:2.197224 31:2.772588 23:2.772588 30:1.386294 29:1.386294 34:1.609438 38:1.609438 62:1.791759 51:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 10:2.772588 6:2.079441 9:0.693147 8:0.693147 18:2.197224 20:2.197224 19:1.098612 23:2.772588 30:2.772588 25:1.386294 28:1.386294 31:1.386294 34:1.609438 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 8:0.693147 21:2.197224 15:2.197224 13:1.098612 20:1.098612 19:1.098612 18:1.098612 23:1.386294 30:1.386294 40:1.609438 +-1 11:0.693147 110:2.197225 +-1 2:0.000000 4:0.000000 3:0.000000 7:8.317764 6:1.386294 8:0.693147 18:1.098612 20:1.098612 14:1.098612 26:2.772588 27:1.386294 30:1.386294 23:1.386294 43:1.609438 41:1.609438 50:7.167036 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 7:18.021822 6:4.852029 12:4.158882 10:3.465735 8:1.386294 9:0.693147 11:0.693147 20:2.197224 13:2.197224 16:1.098612 21:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 19:1.098612 23:5.545176 29:1.386294 31:1.386294 24:1.386294 35:1.609438 65:1.945910 218:2.639057 223:2.639057 294:2.772589 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 9:1.386294 11:0.693147 20:2.197224 16:2.197224 18:1.098612 14:1.098612 23:4.158882 29:2.772588 31:1.386294 24:1.386294 35:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 9:0.693147 6:0.693147 7:0.693147 17:3.295836 19:2.197224 21:1.098612 18:1.098612 16:1.098612 20:1.098612 23:4.158882 31:2.772588 +-1 1:0.000000 7:1.386294 6:0.693147 21:2.197224 14:1.098612 23:4.158882 26:1.386294 30:1.386294 34:1.609438 62:1.791759 87:4.158884 96:4.158884 95:2.079442 90:2.079442 99:2.079442 127:2.197225 +-1 3:0.000000 2:0.000000 5:0.000000 8:1.386294 11:0.693147 6:0.693147 7:0.693147 16:5.493060 18:2.197224 21:1.098612 25:6.931470 23:4.158882 30:4.158882 27:2.772588 28:1.386294 26:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 8:1.386294 9:0.693147 10:0.693147 7:0.693147 18:2.197224 21:1.098612 17:1.098612 16:1.098612 23:12.476646 25:1.386294 27:1.386294 38:9.656628 +-1 2:0.000000 4:0.000000 5:0.000000 6:6.931470 7:2.079441 9:2.079441 10:0.693147 16:2.197224 18:2.197224 14:1.098612 17:1.098612 23:8.317764 28:2.772588 30:1.386294 27:1.386294 36:1.609438 +-1 2:0.000000 8:3.465735 10:2.772588 9:1.386294 21:1.098612 19:1.098612 18:1.098612 23:2.772588 27:1.386294 31:1.386294 36:4.828314 40:1.609438 49:1.791759 48:1.791759 82:3.891820 80:1.945910 +-1 2:0.000000 8:1.386294 10:0.693147 16:1.098612 18:1.098612 23:2.772588 36:4.828314 56:7.167036 61:5.375277 53:3.583518 49:1.791759 60:1.791759 51:1.791759 59:1.791759 74:5.837730 72:3.891820 +-1 3:0.000000 18:3.295836 23:2.772588 24:2.772588 25:1.386294 43:1.609438 37:1.609438 68:1.945910 67:1.945910 87:2.079442 89:2.079442 127:6.591675 137:2.302585 150:2.397895 175:2.484907 256:2.708050 +-1 2:0.000000 1:0.000000 3:0.000000 10:2.772588 6:2.079441 7:1.386294 18:4.394448 21:1.098612 16:1.098612 20:1.098612 26:5.545176 23:4.158882 27:2.772588 30:2.772588 25:1.386294 42:1.609438 +-1 2:0.000000 4:0.000000 7:9.010911 8:2.079441 6:0.693147 9:0.693147 16:3.295836 28:4.158882 30:2.772588 25:1.386294 23:1.386294 27:1.386294 24:1.386294 42:9.656628 34:1.609438 60:7.167036 +-1 3:0.000000 8:3.465735 7:0.693147 23:2.772588 43:1.609438 59:1.791759 116:6.591675 139:2.302585 154:2.397895 152:2.397895 182:2.484907 163:2.484907 179:2.484907 201:7.694847 304:2.833213 616:10.832754 +-1 3:0.000000 23:1.386294 166:2.484907 235:2.639057 313:2.890372 382:3.091042 420:3.218876 2335:5.347108 12564:6.957497 +-1 10:1.386294 18:1.098612 23:1.386294 30:1.386294 36:1.609438 38:1.609438 53:1.791759 132:2.302585 258:5.416100 316:2.890372 2465:5.347108 3580:5.857933 4547:6.263398 12565:6.957497 12566:6.957497 +-1 1:0.000000 2:0.000000 3:0.000000 8:1.386294 12:1.386294 10:0.693147 7:0.693147 15:1.098612 19:1.098612 24:1.386294 43:3.218876 36:1.609438 37:1.609438 44:1.609438 68:1.945910 117:2.197225 +-1 1:0.000000 3:0.000000 7:0.693147 12:0.693147 19:1.098612 17:1.098612 29:2.772588 34:1.609438 44:1.609438 38:1.609438 63:1.791759 46:1.791759 48:1.791759 45:1.791759 47:1.791759 78:1.945910 +-1 5:0.000000 4:0.000000 2:0.000000 11:0.693147 12:0.693147 26:1.386294 22:1.386294 44:1.609438 64:7.167036 63:5.375277 95:2.079442 106:2.197225 170:2.484907 300:2.833213 388:3.091042 1060:4.248495 +-1 5:0.000000 2:0.000000 4:0.000000 7:1.386294 12:0.693147 6:0.693147 19:1.098612 21:1.098612 24:1.386294 30:1.386294 44:1.609438 50:3.583518 64:1.791759 61:1.791759 62:1.791759 48:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:2.079441 9:1.386294 6:1.386294 12:0.693147 10:0.693147 13:2.197224 16:1.098612 36:1.609438 44:1.609438 60:5.375277 63:5.375277 64:1.791759 50:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 11:3.465735 9:0.693147 10:0.693147 12:0.693147 13:2.197224 19:1.098612 17:1.098612 24:1.386294 30:1.386294 29:1.386294 36:4.828314 +-1 1:0.000000 8:0.693147 13:2.197224 15:1.098612 29:1.386294 44:1.609438 46:1.791759 96:2.079442 206:2.564949 261:2.772589 2792:22.273380 3509:11.715866 3524:5.857933 5442:12.526796 12621:6.957497 +-1 2:0.000000 5:0.000000 4:0.000000 12:1.386294 20:1.098612 31:1.386294 29:1.386294 40:3.218876 44:1.609438 63:3.583518 64:1.791759 59:1.791759 65:1.945910 73:1.945910 91:2.079442 148:2.397895 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 13:2.197224 44:1.609438 63:3.583518 59:1.791759 114:2.197225 256:2.708050 380:3.091042 569:3.555348 702:3.761200 1198:4.382027 1472:4.653960 1769:4.875197 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 9:1.386294 10:1.386294 12:0.693147 11:0.693147 6:0.693147 8:0.693147 20:2.197224 21:1.098612 18:1.098612 22:2.772588 28:1.386294 40:1.609438 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 12:0.693147 13:1.098612 38:1.609438 44:1.609438 62:3.583518 63:3.583518 61:1.791759 59:1.791759 64:1.791759 69:3.891820 72:1.945910 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 6:1.386294 18:1.098612 64:1.791759 61:1.791759 77:1.945910 99:2.079442 97:2.079442 87:2.079442 86:2.079442 123:2.197225 125:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 7:4.852029 9:1.386294 8:1.386294 10:0.693147 6:0.693147 21:1.098612 18:1.098612 15:1.098612 16:1.098612 14:1.098612 31:1.386294 29:1.386294 +-1 4:0.000000 5:0.000000 2:0.000000 3:0.000000 1:0.000000 9:0.693147 12:0.693147 17:2.197224 18:1.098612 15:1.098612 19:1.098612 22:1.386294 28:1.386294 30:1.386294 44:1.609438 43:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 7:11.090352 9:1.386294 6:1.386294 10:0.693147 20:2.197224 26:8.317764 27:2.772588 28:1.386294 25:1.386294 30:1.386294 38:6.437752 42:1.609438 44:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:2.079441 12:1.386294 8:1.386294 10:0.693147 21:1.098612 20:1.098612 24:2.772588 25:1.386294 22:1.386294 36:8.047190 43:1.609438 +-1 1:0.000000 3:0.000000 9:2.079441 6:0.693147 10:0.693147 12:0.693147 11:0.693147 18:2.197224 21:1.098612 14:1.098612 30:1.386294 22:1.386294 24:1.386294 44:3.218876 37:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:0.693147 7:0.693147 8:0.693147 17:2.197224 13:2.197224 22:2.772588 26:1.386294 29:1.386294 24:1.386294 44:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 9:0.693147 19:1.098612 21:1.098612 13:1.098612 14:1.098612 44:3.218876 37:1.609438 40:1.609438 41:1.609438 64:1.791759 63:1.791759 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 13:2.197224 19:1.098612 39:1.609438 42:1.609438 44:1.609438 63:7.167036 59:1.791759 71:1.945910 121:2.197225 148:2.397895 256:2.708050 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 6:1.386294 7:1.386294 19:1.098612 21:1.098612 30:1.386294 31:1.386294 44:1.609438 42:1.609438 63:3.583518 64:1.791759 47:1.791759 65:1.945910 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 9:0.693147 8:0.693147 7:0.693147 12:0.693147 19:1.098612 17:1.098612 21:1.098612 16:1.098612 15:1.098612 31:1.386294 25:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 6:1.386294 12:0.693147 11:0.693147 9:0.693147 14:1.098612 20:1.098612 21:1.098612 19:1.098612 26:1.386294 25:1.386294 41:1.609438 +-1 3:0.000000 6:0.693147 8:0.693147 17:1.098612 16:1.098612 14:1.098612 35:1.609438 44:1.609438 60:1.791759 50:1.791759 111:2.197225 118:2.197225 257:2.708050 567:3.555348 858:4.007333 1315:4.553877 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 11:0.693147 13:1.098612 44:1.609438 64:1.791759 101:2.079442 114:2.197225 119:2.197225 188:2.484907 272:2.772589 269:2.772589 327:2.890372 446:3.258097 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 10:1.386294 18:3.295836 19:2.197224 13:2.197224 24:2.772588 31:1.386294 29:1.386294 44:3.218876 35:1.609438 63:7.167036 64:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 11:1.386294 12:0.693147 8:0.693147 16:1.098612 13:1.098612 39:1.609438 38:1.609438 44:1.609438 62:5.375277 53:3.583518 63:3.583518 50:1.791759 +-1 3:0.000000 82:1.945910 78:1.945910 330:2.944439 841:4.007333 905:4.060443 1331:4.553877 1561:4.753590 2047:5.164786 3824:5.857933 12911:6.957497 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:1.386294 11:0.693147 19:1.098612 13:1.098612 22:1.386294 24:1.386294 44:1.609438 41:1.609438 63:10.750554 62:3.583518 45:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 7:5.545176 6:2.772588 10:1.386294 12:0.693147 16:2.197224 14:2.197224 21:1.098612 18:1.098612 22:8.317764 30:4.158882 26:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:1.386294 9:0.693147 12:0.693147 19:1.098612 21:1.098612 13:1.098612 22:2.772588 29:1.386294 44:3.218876 36:1.609438 43:1.609438 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 6:2.772588 7:2.079441 11:0.693147 17:4.394448 13:2.197224 26:4.158882 27:1.386294 36:4.828314 39:1.609438 43:1.609438 44:1.609438 52:1.791759 +-1 6:2.079441 10:1.386294 9:1.386294 17:8.788896 18:1.098612 21:1.098612 28:1.386294 25:1.386294 36:3.218876 38:1.609438 50:1.791759 84:1.945910 80:1.945910 69:1.945910 93:2.079442 98:2.079442 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 10:3.465735 6:2.079441 8:2.079441 11:1.386294 12:0.693147 21:1.098612 16:1.098612 13:1.098612 18:1.098612 28:2.772588 26:1.386294 +-1 1:0.000000 3:0.000000 9:0.693147 44:1.609438 83:1.945910 99:2.079442 188:2.484907 199:2.564949 313:2.890372 376:3.044522 395:3.135494 684:3.761200 735:3.806662 785:3.912023 899:4.060443 1055:4.248495 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:0.693147 11:0.693147 10:0.693147 9:0.693147 20:1.098612 13:1.098612 30:1.386294 24:1.386294 29:1.386294 34:3.218876 43:1.609438 39:1.609438 +-1 1:0.000000 3:0.000000 10:0.693147 9:0.693147 11:0.693147 6:0.693147 19:1.098612 16:1.098612 17:1.098612 13:1.098612 43:1.609438 44:1.609438 51:3.583518 47:1.791759 60:1.791759 61:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 12:1.386294 6:1.386294 8:0.693147 9:0.693147 19:1.098612 17:1.098612 29:1.386294 24:1.386294 44:1.609438 63:8.958795 64:5.375277 62:5.375277 +-1 4:0.000000 3:0.000000 1:0.000000 2:0.000000 11:0.693147 8:0.693147 16:2.197224 24:2.772588 27:1.386294 41:1.609438 44:1.609438 82:1.945910 72:1.945910 78:1.945910 94:4.158884 99:2.079442 +-1 1517:4.753590 13023:6.957497 13024:6.957497 13025:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:1.386294 8:0.693147 19:1.098612 16:1.098612 13:1.098612 24:2.772588 32:1.386294 29:1.386294 44:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 9:0.693147 19:3.295836 20:2.197224 31:1.386294 28:1.386294 30:1.386294 22:1.386294 24:1.386294 44:1.609438 63:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:1.386294 12:0.693147 13:1.098612 15:1.098612 17:1.098612 16:1.098612 14:1.098612 29:1.386294 28:1.386294 27:1.386294 43:3.218876 44:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:3.465735 6:0.693147 9:0.693147 25:1.386294 38:3.218876 42:1.609438 39:1.609438 44:1.609438 51:7.167036 64:5.375277 63:5.375277 +-1 5:0.000000 2:0.000000 4:0.000000 10:2.772588 7:2.079441 6:0.693147 11:0.693147 8:0.693147 12:0.693147 19:2.197224 21:2.197224 18:2.197224 13:2.197224 30:5.545176 27:4.158882 26:4.158882 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 7:0.693147 6:0.693147 13:2.197224 42:1.609438 38:1.609438 39:1.609438 44:1.609438 63:5.375277 60:3.583518 49:1.791759 59:1.791759 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 13:2.197224 17:1.098612 38:1.609438 44:1.609438 63:3.583518 59:1.791759 71:1.945910 120:2.197225 189:2.484907 202:2.564949 198:2.564949 229:2.639057 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 10:1.386294 11:0.693147 9:0.693147 19:1.098612 31:1.386294 34:1.609438 43:1.609438 44:1.609438 63:3.583518 64:1.791759 61:1.791759 51:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 17:3.295836 22:1.386294 44:1.609438 64:3.583518 63:3.583518 45:1.791759 51:1.791759 58:1.791759 76:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:4.852029 9:2.772588 6:2.079441 8:1.386294 10:0.693147 18:1.098612 16:1.098612 14:1.098612 22:4.158882 28:1.386294 26:1.386294 +-1 5:0.000000 3:0.000000 2:0.000000 4:0.000000 1:0.000000 12:1.386294 11:0.693147 9:0.693147 7:0.693147 20:1.098612 19:1.098612 17:1.098612 15:1.098612 16:1.098612 14:1.098612 31:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 13:2.197224 39:1.609438 42:1.609438 44:1.609438 63:3.583518 59:1.791759 71:3.891820 121:2.197225 256:2.708050 408:6.270988 412:3.178054 473:3.295837 +-1 1:0.000000 3:0.000000 2:0.000000 12:0.693147 8:0.693147 19:1.098612 31:1.386294 64:1.791759 63:1.791759 99:2.079442 117:2.197225 111:2.197225 303:2.833213 420:3.218876 688:3.761200 721:3.806662 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 11:1.386294 7:0.693147 8:0.693147 19:1.098612 20:1.098612 22:1.386294 44:3.218876 63:5.375277 64:3.583518 80:1.945910 95:2.079442 110:2.197225 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 9:0.693147 12:0.693147 6:0.693147 7:0.693147 11:0.693147 10:0.693147 19:2.197224 13:2.197224 21:1.098612 26:1.386294 44:1.609438 63:3.583518 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 8:2.079441 10:1.386294 11:0.693147 6:0.693147 12:0.693147 16:2.197224 13:1.098612 28:1.386294 29:1.386294 44:3.218876 42:1.609438 39:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 10:2.079441 11:2.079441 9:0.693147 21:1.098612 22:1.386294 37:3.218876 44:3.218876 63:3.583518 60:1.791759 59:1.791759 64:1.791759 86:2.079442 97:2.079442 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 12:1.386294 8:0.693147 11:0.693147 6:0.693147 9:0.693147 20:2.197224 15:1.098612 21:1.098612 19:1.098612 22:1.386294 25:1.386294 36:3.218876 +-1 3:0.000000 10:0.693147 14:1.098612 27:2.772588 30:1.386294 44:1.609438 41:1.609438 62:1.791759 63:1.791759 58:1.791759 70:3.891820 82:1.945910 89:2.079442 111:2.197225 173:2.484907 196:2.564949 +-1 1:0.000000 3:0.000000 10:0.693147 19:1.098612 21:1.098612 31:1.386294 44:1.609438 59:1.791759 63:1.791759 45:1.791759 84:1.945910 102:2.079442 187:2.484907 362:3.044522 550:3.465736 593:3.583519 +-1 3:0.000000 1:0.000000 14:1.098612 44:1.609438 41:1.609438 59:1.791759 63:1.791759 78:3.891820 97:2.079442 109:4.394450 108:2.197225 126:2.197225 146:7.193685 175:2.484907 179:2.484907 200:2.564949 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 11:1.386294 7:0.693147 12:0.693147 17:2.197224 20:1.098612 15:1.098612 14:1.098612 28:5.545176 39:1.609438 44:1.609438 41:1.609438 +-1 2:0.000000 3:0.000000 5:0.000000 9:1.386294 12:0.693147 8:0.693147 14:1.098612 27:1.386294 30:1.386294 44:4.828314 43:1.609438 39:1.609438 35:1.609438 64:3.583518 52:3.583518 61:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 28:1.386294 44:1.609438 37:1.609438 64:1.791759 63:1.791759 49:1.791759 66:3.891820 81:1.945910 70:1.945910 92:4.158884 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 6:0.693147 19:2.197224 31:1.386294 44:3.218876 34:1.609438 63:3.583518 64:1.791759 84:1.945910 188:2.484907 187:2.484907 338:2.944439 535:3.433987 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 8:0.693147 27:1.386294 30:1.386294 44:1.609438 64:3.583518 63:3.583518 59:1.791759 84:1.945910 99:2.079442 124:2.197225 216:2.564949 220:2.639057 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 18:1.098612 26:1.386294 44:1.609438 64:1.791759 63:1.791759 51:1.791759 86:2.079442 125:4.394450 166:2.484907 411:3.178054 524:3.433987 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:1.386294 10:0.693147 11:0.693147 8:0.693147 21:1.098612 19:1.098612 20:1.098612 25:4.158882 22:1.386294 44:1.609438 35:1.609438 64:3.583518 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 19:1.098612 20:1.098612 30:1.386294 29:1.386294 44:1.609438 64:5.375277 63:5.375277 82:1.945910 71:1.945910 84:1.945910 78:1.945910 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 10:0.693147 11:0.693147 12:0.693147 19:1.098612 20:1.098612 13:1.098612 29:1.386294 22:1.386294 44:3.218876 40:3.218876 43:1.609438 +-1 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:4.158882 11:2.079441 6:1.386294 7:1.386294 10:0.693147 21:6.591672 16:3.295836 13:2.197224 17:1.098612 19:1.098612 30:5.545176 28:4.158882 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 12:2.079441 10:1.386294 8:1.386294 7:0.693147 19:1.098612 14:1.098612 25:1.386294 26:1.386294 22:1.386294 29:1.386294 44:3.218876 +-1 4:0.000000 2:0.000000 1:0.000000 5:0.000000 10:1.386294 8:0.693147 7:0.693147 13:2.197224 19:1.098612 28:1.386294 38:1.609438 44:1.609438 63:12.542313 48:5.375277 64:1.791759 59:1.791759 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 12:1.386294 17:3.295836 19:1.098612 21:1.098612 15:1.098612 13:1.098612 14:1.098612 31:2.772588 26:1.386294 22:1.386294 29:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 8:2.772588 12:0.693147 21:1.098612 19:1.098612 20:1.098612 18:1.098612 16:1.098612 14:1.098612 22:1.386294 31:1.386294 30:1.386294 44:4.828314 41:1.609438 +-1 3:0.000000 1:0.000000 9:0.693147 19:1.098612 14:1.098612 28:1.386294 29:1.386294 31:1.386294 42:1.609438 44:1.609438 35:1.609438 131:2.302585 148:2.397895 186:2.484907 287:5.545178 297:5.666426 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 10:1.386294 6:0.693147 24:1.386294 22:1.386294 34:3.218876 38:1.609438 64:3.583518 63:1.791759 377:3.091042 473:3.295837 532:3.433987 1692:4.875197 +-1 2:0.000000 5:0.000000 4:0.000000 10:4.852029 8:1.386294 12:0.693147 11:0.693147 16:2.197224 20:1.098612 14:1.098612 28:2.772588 29:1.386294 22:1.386294 24:1.386294 44:3.218876 37:1.609438 +-1 2:0.000000 1:0.000000 5:0.000000 3:0.000000 4:0.000000 6:2.079441 12:0.693147 8:0.693147 15:2.197224 19:1.098612 18:1.098612 16:1.098612 14:1.098612 31:1.386294 29:1.386294 26:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 8:0.693147 12:0.693147 29:1.386294 22:1.386294 44:3.218876 63:5.375277 62:3.583518 59:1.791759 64:1.791759 45:1.791759 84:1.945910 173:2.484907 +-1 2:0.000000 4:0.000000 5:0.000000 6:0.693147 10:0.693147 12:0.693147 20:1.098612 14:1.098612 27:1.386294 35:1.609438 44:1.609438 64:3.583518 63:1.791759 102:2.079442 119:2.197225 158:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 12:1.386294 10:0.693147 19:2.197224 24:1.386294 26:1.386294 44:1.609438 63:7.167036 64:3.583518 59:1.791759 71:1.945910 84:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 11:1.386294 8:1.386294 12:0.693147 10:0.693147 7:0.693147 19:1.098612 13:1.098612 40:1.609438 44:1.609438 63:3.583518 45:3.583518 59:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 12:2.079441 9:0.693147 6:0.693147 14:1.098612 19:1.098612 44:4.828314 43:3.218876 41:1.609438 64:7.167036 63:7.167036 65:1.945910 94:2.079442 +-1 2:0.000000 5:0.000000 6:2.079441 11:1.386294 10:0.693147 16:1.098612 30:2.772588 53:3.583518 57:3.583518 64:1.791759 63:1.791759 52:1.791759 60:1.791759 58:1.791759 56:1.791759 80:3.891820 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 11:0.693147 19:1.098612 21:1.098612 16:1.098612 28:1.386294 30:1.386294 24:1.386294 29:1.386294 44:1.609438 60:5.375277 +-1 1:0.000000 3:0.000000 17:1.098612 99:2.079442 159:2.397895 295:2.833213 2667:11.136690 3215:11.715866 13468:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 11:0.693147 8:0.693147 12:0.693147 19:1.098612 30:1.386294 22:1.386294 29:1.386294 38:1.609438 42:1.609438 40:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 3:0.000000 10:0.693147 8:0.693147 13:1.098612 29:2.772588 22:1.386294 28:1.386294 44:3.218876 40:1.609438 64:3.583518 63:3.583518 61:3.583518 76:1.945910 65:1.945910 +-1 4:0.000000 2:0.000000 5:0.000000 10:6.238323 7:1.386294 12:0.693147 18:2.197224 21:1.098612 25:1.386294 38:1.609438 61:10.750554 63:3.583518 64:3.583518 53:1.791759 52:1.791759 62:1.791759 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 22:1.386294 44:1.609438 41:1.609438 64:1.791759 63:1.791759 156:2.397895 491:3.367296 567:3.555348 927:4.110874 1087:4.317488 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 10:0.693147 20:1.098612 15:1.098612 14:1.098612 41:1.609438 44:1.609438 64:3.583518 63:1.791759 59:1.791759 62:1.791759 47:1.791759 99:2.079442 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:2.079441 8:1.386294 6:0.693147 10:0.693147 11:0.693147 19:1.098612 14:1.098612 22:1.386294 44:1.609438 41:1.609438 63:5.375277 +-1 5:0.000000 6:2.079441 10:1.386294 12:0.693147 30:5.545176 35:1.609438 44:1.609438 60:1.791759 63:1.791759 79:1.945910 74:1.945910 101:2.079442 215:2.564949 256:2.708050 292:8.317767 353:2.995732 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 7:2.079441 8:0.693147 12:0.693147 18:1.098612 13:1.098612 26:4.158882 29:1.386294 44:1.609438 60:8.958795 52:3.583518 62:3.583518 49:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:2.079441 12:1.386294 10:1.386294 11:0.693147 9:0.693147 8:0.693147 21:2.197224 17:2.197224 14:2.197224 19:1.098612 13:1.098612 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 13:1.098612 15:1.098612 14:1.098612 23:2.772588 22:1.386294 44:3.218876 40:3.218876 34:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 9:1.386294 12:0.693147 6:0.693147 8:0.693147 21:3.295836 19:1.098612 14:1.098612 24:2.772588 31:1.386294 26:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 7:1.386294 9:0.693147 12:0.693147 20:7.690284 13:2.197224 17:1.098612 21:1.098612 18:1.098612 16:1.098612 30:12.476646 24:2.772588 40:3.218876 +-1 1:0.000000 3:0.000000 4:0.000000 2:0.000000 13:2.197224 24:1.386294 44:3.218876 36:1.609438 63:5.375277 64:5.375277 65:1.945910 127:2.197225 119:2.197225 176:2.484907 287:2.772589 291:2.772589 +-1 1:0.000000 10:0.693147 9:0.693147 6:0.693147 21:1.098612 13:1.098612 26:1.386294 29:1.386294 44:1.609438 52:1.791759 59:1.791759 63:1.791759 287:2.772589 298:2.833213 406:3.135494 559:3.496508 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:6.238323 8:2.079441 12:1.386294 11:1.386294 10:0.693147 17:2.197224 13:1.098612 25:1.386294 24:1.386294 29:1.386294 44:3.218876 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 8:2.079441 12:2.079441 10:2.079441 6:0.693147 9:0.693147 13:2.197224 17:1.098612 29:1.386294 28:1.386294 25:1.386294 31:1.386294 +-1 1:0.000000 3:0.000000 11:0.693147 14:1.098612 28:1.386294 35:1.609438 44:1.609438 78:1.945910 288:2.772589 319:2.890372 376:3.044522 403:3.135494 13589:6.957497 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 11:1.386294 10:0.693147 9:0.693147 7:0.693147 6:0.693147 8:0.693147 12:0.693147 17:2.197224 21:1.098612 13:1.098612 14:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 8:0.693147 19:1.098612 13:1.098612 29:1.386294 24:1.386294 44:1.609438 63:10.750554 64:7.167036 62:3.583518 59:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:0.693147 7:0.693147 13:2.197224 19:1.098612 38:1.609438 44:1.609438 63:7.167036 48:3.583518 64:1.791759 59:1.791759 90:2.079442 +-1 5:0.000000 2:0.000000 4:0.000000 10:1.386294 11:0.693147 7:0.693147 8:0.693147 12:0.693147 21:1.098612 16:1.098612 13:1.098612 26:1.386294 29:1.386294 44:1.609438 64:3.583518 62:3.583518 +-1 2:0.000000 4:0.000000 6:4.158882 10:3.465735 12:0.693147 7:0.693147 16:6.591672 18:3.295836 21:1.098612 27:5.545176 26:2.772588 44:14.484942 36:4.828314 42:1.609438 43:1.609438 50:26.876385 +-1 3:0.000000 1:0.000000 4:0.000000 10:1.386294 11:0.693147 7:0.693147 19:4.394448 21:3.295836 26:1.386294 24:1.386294 44:1.609438 38:1.609438 36:1.609438 57:3.583518 51:1.791759 60:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 8:1.386294 10:1.386294 12:0.693147 11:0.693147 6:0.693147 9:0.693147 21:1.098612 18:1.098612 25:2.772588 27:1.386294 36:3.218876 60:3.583518 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 10:6.931470 6:6.931470 7:4.158882 11:1.386294 8:1.386294 12:0.693147 19:2.197224 26:8.317764 30:2.772588 31:1.386294 32:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:2.772588 7:2.079441 10:1.386294 12:1.386294 11:0.693147 20:1.098612 21:1.098612 15:1.098612 13:1.098612 26:1.386294 29:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 6:1.386294 10:1.386294 8:1.386294 11:0.693147 19:1.098612 13:1.098612 16:1.098612 24:6.931470 29:1.386294 42:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 10:0.693147 12:0.693147 20:2.197224 19:1.098612 13:1.098612 22:1.386294 44:4.828314 41:1.609438 63:5.375277 64:3.583518 191:2.484907 206:2.564949 215:2.564949 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 8:0.693147 10:0.693147 11:0.693147 17:1.098612 13:1.098612 14:1.098612 22:1.386294 44:1.609438 41:1.609438 63:3.583518 64:1.791759 +-1 3:0.000000 1:0.000000 9:0.693147 25:1.386294 83:1.945910 67:1.945910 114:2.197225 254:8.124150 313:5.780744 370:3.044522 395:9.406482 409:3.135494 489:3.367296 506:3.401197 632:3.637586 664:3.737670 +-1 1:0.000000 2:0.000000 3:0.000000 5:0.000000 12:0.693147 19:1.098612 31:1.386294 22:1.386294 44:1.609438 64:3.583518 63:1.791759 486:3.332205 857:4.007333 1506:4.753590 2172:5.164786 4377:6.263398 +-1 1:0.000000 3:0.000000 11:0.693147 19:1.098612 63:1.791759 59:1.791759 71:1.945910 84:1.945910 80:1.945910 78:1.945910 97:2.079442 115:4.394450 160:2.397895 217:2.564949 198:2.564949 371:3.044522 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:1.386294 8:0.693147 11:0.693147 21:2.197224 26:6.931470 36:6.437752 38:4.828314 40:1.609438 42:1.609438 44:1.609438 41:1.609438 64:3.583518 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 12:0.693147 8:0.693147 19:1.098612 26:1.386294 31:1.386294 30:1.386294 27:1.386294 36:3.218876 42:1.609438 43:1.609438 +-1 4:0.000000 2:0.000000 5:0.000000 7:2.772588 10:0.693147 6:0.693147 16:2.197224 13:2.197224 30:1.386294 37:1.609438 44:1.609438 64:5.375277 63:3.583518 75:3.891820 83:1.945910 76:1.945910 +-1 3:0.000000 1:0.000000 9:0.693147 24:1.386294 29:1.386294 44:1.609438 99:2.079442 114:2.197225 150:2.397895 160:2.397895 186:2.484907 202:2.564949 199:2.564949 217:2.564949 224:2.639057 257:2.708050 +-1 2:0.000000 4:0.000000 12:0.693147 44:1.609438 64:1.791759 86:2.079442 797:3.912023 3835:5.857933 13811:13.914994 13812:6.957497 13813:6.957497 13814:6.957497 13815:6.957497 +-1 3:0.000000 1:0.000000 6:3.465735 10:2.772588 11:2.079441 7:1.386294 9:0.693147 17:5.493060 16:3.295836 15:2.197224 28:5.545176 26:4.158882 27:2.772588 31:1.386294 38:9.656628 36:4.828314 +-1 5:0.000000 12:1.386294 44:1.609438 63:1.791759 45:1.791759 567:3.555348 13846:6.957497 13847:6.957497 13848:6.957497 13849:6.957497 13850:6.957497 +-1 2:0.000000 4:0.000000 9:1.386294 7:0.693147 12:0.693147 16:2.197224 21:1.098612 26:5.545176 30:4.158882 28:2.772588 25:1.386294 37:3.218876 44:1.609438 48:8.958795 52:3.583518 63:3.583518 +-1 3:0.000000 2:0.000000 5:0.000000 4:0.000000 1:0.000000 10:2.079441 6:1.386294 11:1.386294 9:0.693147 12:0.693147 17:1.098612 16:1.098612 19:1.098612 20:1.098612 30:2.772588 27:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:0.693147 12:0.693147 21:1.098612 13:1.098612 15:1.098612 14:1.098612 29:1.386294 24:1.386294 44:1.609438 36:1.609438 41:1.609438 64:5.375277 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:1.386294 12:1.386294 10:0.693147 8:0.693147 29:1.386294 28:1.386294 44:4.828314 43:1.609438 34:1.609438 36:1.609438 63:7.167036 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 11:2.079441 10:1.386294 9:0.693147 21:3.295836 20:1.098612 13:1.098612 28:4.158882 30:2.772588 31:1.386294 27:1.386294 22:1.386294 +-1 3:0.000000 6:0.693147 9:0.693147 16:1.098612 22:1.386294 44:1.609438 70:1.945910 99:2.079442 111:4.394450 120:2.197225 114:2.197225 109:2.197225 171:2.484907 163:2.484907 198:7.694847 199:2.564949 +-1 2:0.000000 5:0.000000 12:0.693147 8:0.693147 13:1.098612 14:1.098612 44:3.218876 35:1.609438 64:3.583518 65:3.891820 92:2.079442 215:2.564949 287:2.772589 458:3.258097 580:3.555348 695:7.522400 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:2.079441 10:1.386294 16:1.098612 29:1.386294 43:3.218876 36:1.609438 44:1.609438 50:5.375277 53:1.791759 58:1.791759 62:1.791759 76:1.945910 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:5.545176 9:2.772588 12:0.693147 10:0.693147 6:0.693147 21:2.197224 17:1.098612 16:1.098612 31:8.317764 25:2.772588 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:2.079441 6:1.386294 12:0.693147 15:1.098612 14:1.098612 29:2.772588 22:1.386294 44:3.218876 35:1.609438 64:5.375277 63:5.375277 45:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:3.465735 6:0.693147 12:0.693147 19:1.098612 30:2.772588 31:1.386294 28:1.386294 38:8.047190 36:3.218876 44:1.609438 42:1.609438 56:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.079441 12:0.693147 8:0.693147 17:1.098612 21:1.098612 15:1.098612 14:1.098612 26:1.386294 44:1.609438 63:3.583518 64:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 6:1.386294 9:0.693147 12:0.693147 14:2.197224 13:1.098612 27:2.772588 26:1.386294 30:1.386294 28:1.386294 38:3.218876 44:3.218876 39:1.609438 35:1.609438 +-1 3:0.000000 2:0.000000 4:0.000000 11:1.386294 12:0.693147 10:0.693147 6:0.693147 7:0.693147 17:3.295836 21:2.197224 20:1.098612 14:1.098612 30:1.386294 42:1.609438 39:1.609438 43:1.609438 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 10:2.079441 9:1.386294 8:0.693147 12:0.693147 15:5.493060 21:1.098612 13:1.098612 31:5.545176 28:1.386294 29:1.386294 40:6.437752 +-1 5703:6.263398 14025:6.957497 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 12:1.386294 10:0.693147 11:0.693147 8:0.693147 19:1.098612 16:1.098612 13:1.098612 31:1.386294 24:1.386294 29:1.386294 44:3.218876 36:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 12:1.386294 15:1.098612 18:1.098612 17:1.098612 14:1.098612 31:2.772588 40:1.609438 34:1.609438 44:1.609438 35:1.609438 63:7.167036 64:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 11:0.693147 10:0.693147 19:1.098612 44:1.609438 41:1.609438 63:5.375277 64:3.583518 80:1.945910 82:1.945910 95:2.079442 112:2.197225 +-1 16:1.098612 83:1.945910 78:1.945910 89:2.079442 122:2.197225 148:2.397895 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 14072:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 8:0.693147 7:0.693147 18:3.295836 21:2.197224 16:1.098612 19:1.098612 22:1.386294 31:1.386294 30:1.386294 43:4.828314 44:3.218876 37:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 17:2.197224 19:1.098612 20:1.098612 64:1.791759 63:1.791759 59:1.791759 78:1.945910 82:1.945910 91:2.079442 97:2.079442 216:2.564949 207:2.564949 198:2.564949 +-1 2:0.000000 3:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 10:1.386294 6:0.693147 18:2.197224 13:1.098612 14:1.098612 26:1.386294 24:1.386294 44:3.218876 34:1.609438 43:1.609438 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 8:1.386294 12:1.386294 13:1.098612 17:1.098612 22:2.772588 29:1.386294 26:1.386294 44:1.609438 38:1.609438 37:1.609438 62:5.375277 63:5.375277 +-1 3:0.000000 1:0.000000 2:0.000000 6:5.545176 10:1.386294 7:0.693147 9:0.693147 8:0.693147 17:1.098612 18:1.098612 16:1.098612 14:1.098612 30:18.021822 27:11.090352 25:4.158882 31:1.386294 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 8:2.079441 9:2.079441 10:2.079441 11:1.386294 12:1.386294 6:0.693147 17:2.197224 21:1.098612 14:1.098612 26:1.386294 25:1.386294 29:1.386294 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 12:1.386294 8:0.693147 13:1.098612 29:1.386294 28:1.386294 44:4.828314 36:1.609438 39:1.609438 43:1.609438 63:7.167036 64:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 9:0.693147 21:1.098612 19:1.098612 18:1.098612 40:1.609438 44:1.609438 63:5.375277 64:1.791759 61:1.791759 59:1.791759 99:2.079442 118:2.197225 +-1 5:0.000000 2:0.000000 3:0.000000 4:0.000000 1:0.000000 11:1.386294 10:0.693147 12:0.693147 7:0.693147 13:1.098612 22:1.386294 44:1.609438 63:7.167036 64:5.375277 61:3.583518 57:3.583518 +-1 2:0.000000 4:0.000000 11:1.386294 7:1.386294 10:1.386294 12:0.693147 6:0.693147 19:2.197224 18:2.197224 31:1.386294 30:1.386294 27:1.386294 44:1.609438 63:3.583518 56:1.791759 64:1.791759 +-1 11:0.693147 24:4.158882 29:2.772588 40:1.609438 44:1.609438 49:1.791759 58:1.791759 81:3.891820 82:1.945910 78:1.945910 67:1.945910 114:4.394450 119:2.197225 117:2.197225 109:2.197225 135:2.302585 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:0.693147 10:0.693147 8:0.693147 9:0.693147 12:0.693147 29:2.772588 22:1.386294 44:3.218876 36:1.609438 43:1.609438 64:3.583518 63:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 10:1.386294 11:1.386294 8:0.693147 12:0.693147 13:1.098612 28:1.386294 25:1.386294 29:1.386294 39:1.609438 38:1.609438 44:1.609438 62:5.375277 64:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:0.693147 6:0.693147 9:0.693147 8:0.693147 25:1.386294 29:1.386294 22:1.386294 44:1.609438 64:7.167036 63:5.375277 52:1.791759 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 3897:5.857933 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 12:1.386294 9:1.386294 6:1.386294 10:1.386294 7:0.693147 8:0.693147 20:2.197224 17:1.098612 21:1.098612 31:1.386294 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 8:2.079441 12:2.079441 10:2.079441 9:0.693147 13:2.197224 16:1.098612 21:1.098612 29:1.386294 31:1.386294 25:1.386294 36:1.609438 +-1 4:0.000000 5:0.000000 1:0.000000 3:0.000000 2:0.000000 12:0.693147 19:2.197224 31:1.386294 29:1.386294 37:1.609438 44:1.609438 63:5.375277 64:3.583518 51:1.791759 59:1.791759 62:1.791759 +-1 16:1.098612 83:1.945910 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 684:11.283600 790:3.912023 796:3.912023 1337:4.553877 2555:5.347108 +-1 3:0.000000 1:0.000000 82:1.945910 99:2.079442 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:1.386294 10:0.693147 11:0.693147 29:1.386294 44:1.609438 43:1.609438 63:5.375277 64:1.791759 119:2.197225 109:2.197225 158:2.397895 146:2.397895 +-1 3:0.000000 2:0.000000 10:2.079441 6:2.079441 11:1.386294 9:1.386294 8:1.386294 18:2.197224 19:1.098612 21:1.098612 16:1.098612 13:1.098612 14:1.098612 17:1.098612 29:1.386294 38:3.218876 +-1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 4:0.000000 6:4.158882 9:2.079441 7:1.386294 12:0.693147 15:4.394448 18:4.394448 17:3.295836 21:1.098612 14:1.098612 25:1.386294 40:6.437752 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 11:1.386294 9:0.693147 10:0.693147 19:2.197224 20:1.098612 17:1.098612 14:1.098612 26:4.158882 31:1.386294 36:3.218876 41:1.609438 64:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 10:1.386294 6:0.693147 12:0.693147 17:1.098612 13:1.098612 30:2.772588 27:1.386294 36:1.609438 44:1.609438 64:5.375277 63:5.375277 85:4.158884 +-1 5:0.000000 2:0.000000 10:1.386294 6:0.693147 76:1.945910 100:4.158884 128:8.788900 123:6.591675 118:2.197225 121:2.197225 151:2.397895 183:2.484907 170:2.484907 195:2.564949 225:7.917171 236:5.278114 +-1 2:0.000000 4:0.000000 5:0.000000 7:2.079441 10:1.386294 11:0.693147 6:0.693147 20:4.394448 21:1.098612 30:5.545176 32:1.386294 28:1.386294 27:1.386294 34:3.218876 60:12.542313 58:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 6:0.693147 11:0.693147 8:0.693147 34:1.609438 39:1.609438 51:3.583518 64:1.791759 63:1.791759 56:1.791759 60:1.791759 49:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:4.852029 11:0.693147 7:0.693147 8:0.693147 17:1.098612 25:1.386294 34:1.609438 39:1.609438 51:7.167036 61:1.791759 58:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 11:1.386294 6:0.693147 8:0.693147 16:1.098612 30:6.931470 36:1.609438 39:1.609438 57:1.791759 58:1.791759 49:1.791759 76:3.891820 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:1.386294 7:0.693147 10:0.693147 8:0.693147 39:1.609438 57:1.791759 80:1.945910 123:6.591675 139:2.302585 138:2.302585 165:4.969814 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:0.693147 11:0.693147 8:0.693147 18:1.098612 30:1.386294 25:1.386294 43:1.609438 39:1.609438 61:3.583518 64:1.791759 63:1.791759 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:1.386294 7:0.693147 11:0.693147 6:0.693147 19:1.098612 39:1.609438 80:5.837730 72:1.945910 100:6.238326 138:2.302585 191:4.969814 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 6:2.772588 9:1.386294 10:0.693147 8:0.693147 20:2.197224 25:1.386294 39:1.609438 61:5.375277 60:3.583518 58:3.583518 56:3.583518 53:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:0.693147 8:0.693147 21:1.098612 25:1.386294 34:1.609438 39:1.609438 56:1.791759 77:3.891820 76:1.945910 80:1.945910 69:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:12.476646 7:1.386294 10:1.386294 9:0.693147 8:0.693147 17:21.972240 20:5.493060 21:1.098612 13:1.098612 30:6.931470 25:4.158882 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:3.465735 11:1.386294 10:0.693147 9:0.693147 8:0.693147 20:2.197224 16:1.098612 17:1.098612 18:1.098612 26:2.772588 39:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:6.931470 8:1.386294 10:1.386294 7:0.693147 11:0.693147 17:1.098612 20:1.098612 27:1.386294 38:3.218876 34:1.609438 39:1.609438 60:3.583518 +-1 2:0.000000 4:0.000000 39:1.609438 76:1.945910 179:2.484907 297:2.833213 325:2.890372 567:7.110696 581:3.555348 1217:4.465908 1967:5.010635 2544:5.347108 2999:5.568345 5460:12.526796 14429:6.957497 14430:6.957497 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 12:0.693147 29:1.386294 44:3.218876 63:3.583518 62:1.791759 64:1.791759 65:3.891820 78:1.945910 109:2.197225 111:2.197225 287:2.772589 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:8.317764 10:2.772588 7:2.079441 9:0.693147 8:0.693147 12:0.693147 20:1.098612 15:1.098612 25:1.386294 32:1.386294 29:1.386294 22:1.386294 +-1 1:0.000000 3:0.000000 5:0.000000 2:0.000000 4:0.000000 11:0.693147 12:0.693147 9:0.693147 17:1.098612 15:1.098612 19:1.098612 31:1.386294 29:1.386294 41:1.609438 39:1.609438 63:3.583518 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 10:1.386294 11:0.693147 8:0.693147 12:0.693147 13:1.098612 25:4.158882 29:1.386294 42:1.609438 38:1.609438 39:1.609438 43:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 10:3.465735 11:2.079441 7:0.693147 17:2.197224 21:1.098612 19:1.098612 16:1.098612 18:1.098612 22:12.476646 25:5.545176 28:2.772588 44:17.703818 39:14.484942 +-1 5:0.000000 1:0.000000 4:0.000000 2:0.000000 10:1.386294 8:0.693147 12:0.693147 13:2.197224 16:1.098612 19:1.098612 38:4.828314 44:1.609438 63:1.791759 47:1.791759 58:1.791759 45:1.791759 +-1 1:0.000000 13:2.197224 29:1.386294 44:1.609438 45:3.583518 2175:10.329572 5658:12.526796 +-1 2:0.000000 4:0.000000 11:1.386294 12:0.693147 19:1.098612 38:1.609438 44:1.609438 63:3.583518 61:1.791759 59:1.791759 75:1.945910 77:1.945910 110:2.197225 127:2.197225 139:2.302585 155:2.397895 +-1 3:0.000000 1:0.000000 117:2.197225 204:2.564949 303:2.833213 1108:4.317488 4839:6.263398 14534:6.957497 14535:6.957497 14536:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 8:0.693147 14:1.098612 40:1.609438 64:3.583518 63:3.583518 48:1.791759 76:3.891820 73:1.945910 84:1.945910 123:4.394450 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.079441 9:0.693147 6:0.693147 8:0.693147 19:1.098612 21:1.098612 31:2.772588 26:1.386294 39:1.609438 38:1.609438 44:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 7:1.386294 19:1.098612 13:1.098612 16:1.098612 29:1.386294 22:1.386294 24:1.386294 44:4.828314 63:8.958795 64:3.583518 45:1.791759 62:1.791759 78:1.945910 +-1 2:0.000000 3:0.000000 5:0.000000 4:0.000000 1:0.000000 12:2.079441 10:2.079441 7:2.079441 8:0.693147 6:0.693147 13:1.098612 26:2.772588 29:1.386294 25:1.386294 36:3.218876 44:1.609438 +-1 17:2.197224 16:1.098612 28:5.545176 37:1.609438 44:1.609438 68:1.945910 91:2.079442 101:2.079442 97:2.079442 112:2.197225 134:2.302585 132:2.302585 135:2.302585 148:2.397895 188:2.484907 189:2.484907 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 10:5.545176 11:5.545176 9:4.158882 6:2.079441 8:1.386294 12:0.693147 20:2.197224 16:1.098612 13:1.098612 29:2.772588 30:1.386294 +-1 44:1.609438 62:3.583518 45:3.583518 59:1.791759 63:1.791759 192:2.484907 4376:6.263398 14608:6.957497 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 9:1.386294 11:1.386294 12:0.693147 7:0.693147 19:2.197224 17:2.197224 20:1.098612 31:1.386294 24:1.386294 38:1.609438 36:1.609438 +-1 1:0.000000 62:1.791759 3505:5.857933 14617:6.957497 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 1652:9.750394 2555:5.347108 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 9:0.693147 11:0.693147 28:1.386294 44:1.609438 63:5.375277 59:1.791759 64:1.791759 71:1.945910 81:1.945910 99:2.079442 +-1 16:1.098612 78:1.945910 89:2.079442 122:2.197225 212:2.564949 313:2.890372 383:3.091042 395:3.135494 489:3.367296 653:3.688879 684:11.283600 796:3.912023 1337:4.553877 2555:5.347108 5599:6.263398 +-1 1:0.000000 3:0.000000 16:1.098612 78:1.945910 114:2.197225 313:2.890372 448:3.258097 1778:4.875197 3653:5.857933 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 7:1.386294 21:1.098612 14:1.098612 31:1.386294 44:1.609438 35:1.609438 63:3.583518 64:1.791759 61:1.791759 59:1.791759 84:1.945910 82:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 6:4.158882 10:2.772588 18:1.098612 20:1.098612 30:1.386294 28:1.386294 42:1.609438 60:8.958795 56:5.375277 50:1.791759 53:1.791759 58:1.791759 57:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 10:3.465735 12:2.079441 9:1.386294 8:1.386294 11:0.693147 6:0.693147 19:1.098612 21:1.098612 13:1.098612 14:1.098612 28:1.386294 +-1 4:0.000000 2:0.000000 5:0.000000 3:0.000000 12:0.693147 6:0.693147 9:0.693147 7:0.693147 17:1.098612 15:1.098612 19:1.098612 30:1.386294 24:1.386294 41:1.609438 63:5.375277 64:3.583518 +-1 14:1.098612 44:1.609438 43:1.609438 35:1.609438 97:2.079442 159:2.397895 371:3.044522 1684:4.875197 2713:5.568345 3300:11.715866 14708:6.957497 +-1 5:0.000000 2:0.000000 3:0.000000 1:0.000000 4:0.000000 7:0.693147 12:0.693147 21:1.098612 17:1.098612 14:1.098612 40:3.218876 44:1.609438 35:1.609438 63:5.375277 64:1.791759 59:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 12:1.386294 9:1.386294 6:0.693147 8:0.693147 13:2.197224 19:1.098612 21:1.098612 17:1.098612 14:1.098612 31:1.386294 22:1.386294 +-1 5:0.000000 2:0.000000 1:0.000000 3:0.000000 12:1.386294 19:2.197224 14:1.098612 31:1.386294 35:1.609438 44:1.609438 64:5.375277 63:5.375277 81:1.945910 82:1.945910 89:8.317768 99:2.079442 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 7:1.386294 8:1.386294 10:1.386294 19:3.295836 15:1.098612 32:4.158882 31:1.386294 24:1.386294 56:1.791759 48:1.791759 78:1.945910 102:2.079442 +-1 1:0.000000 4:0.000000 6:0.693147 32:1.386294 24:1.386294 34:1.609438 49:1.791759 48:1.791759 72:1.945910 70:1.945910 90:4.158884 100:2.079442 106:2.197225 105:2.197225 152:2.397895 161:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 6:0.693147 10:0.693147 9:0.693147 19:2.197224 20:1.098612 18:1.098612 32:4.158882 30:4.158882 31:1.386294 40:1.609438 37:1.609438 58:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 6:4.158882 12:1.386294 10:1.386294 11:0.693147 7:0.693147 20:2.197224 18:2.197224 16:1.098612 14:1.098612 32:4.158882 25:2.772588 +-1 2:0.000000 4:0.000000 10:4.158882 6:1.386294 19:1.098612 18:1.098612 14:1.098612 27:1.386294 31:1.386294 29:1.386294 32:1.386294 42:1.609438 43:1.609438 35:1.609438 50:5.375277 49:3.583518 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 7:1.386294 8:1.386294 6:0.693147 10:0.693147 18:2.197224 24:5.545176 31:4.158882 32:2.772588 25:1.386294 37:1.609438 39:1.609438 +-1 2:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 19:1.098612 20:1.098612 15:1.098612 30:4.158882 25:2.772588 31:1.386294 32:1.386294 42:1.609438 43:1.609438 57:5.375277 58:1.791759 +-1 4:0.000000 2:0.000000 5:0.000000 1:0.000000 3:0.000000 10:2.079441 9:1.386294 11:0.693147 12:0.693147 18:3.295836 20:1.098612 32:4.158882 31:1.386294 42:1.609438 38:1.609438 57:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 11:4.158882 10:2.772588 7:0.693147 12:0.693147 8:0.693147 14:1.098612 15:1.098612 13:1.098612 31:1.386294 28:1.386294 26:1.386294 27:1.386294 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 6:1.386294 12:0.693147 14:2.197224 18:2.197224 17:1.098612 13:1.098612 16:1.098612 32:4.158882 27:1.386294 30:1.386294 28:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:0.693147 9:0.693147 13:2.197224 21:2.197224 16:1.098612 17:1.098612 22:1.386294 32:1.386294 42:1.609438 45:1.791759 84:1.945910 70:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 6:0.693147 18:1.098612 32:4.158882 38:1.609438 58:3.583518 51:1.791759 76:1.945910 123:2.197225 127:2.197225 151:4.795790 244:5.416100 269:2.772589 +-1 11:1.386294 6:0.693147 17:1.098612 28:1.386294 34:1.609438 53:1.791759 69:1.945910 83:1.945910 74:1.945910 100:4.158884 116:4.394450 125:2.197225 110:2.197225 144:2.397895 176:2.484907 171:2.484907 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 6:17.328675 8:2.079441 9:1.386294 12:0.693147 10:0.693147 7:0.693147 18:5.493060 16:5.493060 20:1.098612 30:6.931470 25:4.158882 +-1 3:0.000000 1:0.000000 4:0.000000 2:0.000000 5:0.000000 9:1.386294 12:0.693147 10:0.693147 21:2.197224 18:2.197224 13:1.098612 20:1.098612 17:1.098612 32:4.158882 24:2.772588 28:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 7:2.079441 10:2.079441 9:1.386294 11:0.693147 6:0.693147 20:2.197224 21:1.098612 19:1.098612 15:1.098612 26:6.931470 +-1 1:0.000000 9:0.693147 13:1.098612 32:1.386294 62:1.791759 81:1.945910 189:7.454721 455:3.258097 460:3.295837 491:3.367296 544:3.465736 703:3.761200 892:4.060443 1177:4.382027 1264:8.931816 1425:4.653960 +-1 2:0.000000 1:0.000000 9:1.386294 12:0.693147 11:0.693147 16:2.197224 19:1.098612 21:1.098612 32:2.772588 28:2.772588 31:1.386294 22:1.386294 27:1.386294 40:3.218876 38:1.609438 59:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 11:2.079441 10:1.386294 12:0.693147 9:0.693147 18:4.394448 16:3.295836 15:3.295836 20:1.098612 21:1.098612 14:1.098612 32:2.772588 35:1.609438 53:1.791759 +-1 2:0.000000 4:0.000000 32:2.772588 934:4.110874 2351:16.041324 15045:6.957497 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 9:1.386294 11:0.693147 12:0.693147 21:2.197224 20:1.098612 32:2.772588 35:3.218876 38:1.609438 69:3.891820 67:1.945910 103:2.079442 +-1 2:0.000000 3:0.000000 19:1.098612 21:1.098612 16:1.098612 14:1.098612 32:2.772588 28:1.386294 29:1.386294 40:3.218876 39:1.609438 42:1.609438 41:1.609438 53:1.791759 71:1.945910 68:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.079441 12:1.386294 11:0.693147 10:0.693147 20:2.197224 32:2.772588 31:1.386294 24:1.386294 40:1.609438 58:1.791759 53:1.791759 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 19:1.098612 17:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 35:1.609438 37:1.609438 41:1.609438 62:3.583518 103:2.079442 +-1 1:0.000000 3:0.000000 9:1.386294 19:1.098612 14:1.098612 32:1.386294 31:1.386294 35:1.609438 59:1.791759 71:1.945910 99:4.158884 132:2.302585 134:2.302585 159:2.397895 189:2.484907 248:2.708050 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 8:1.386294 12:0.693147 21:2.197224 20:1.098612 14:1.098612 32:2.772588 41:1.609438 103:2.079442 99:2.079442 90:2.079442 148:2.397895 +-1 2:0.000000 5:0.000000 4:0.000000 9:2.079441 11:1.386294 8:1.386294 12:0.693147 10:0.693147 21:2.197224 20:1.098612 17:1.098612 19:1.098612 16:1.098612 13:1.098612 28:4.158882 32:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 20:1.098612 14:1.098612 32:2.772588 35:1.609438 103:2.079442 113:2.197225 115:2.197225 134:4.605170 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 1:0.000000 6:3.465735 10:1.386294 9:1.386294 7:0.693147 21:3.295836 17:2.197224 16:1.098612 20:1.098612 14:1.098612 32:4.158882 26:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:1.386294 10:1.386294 7:0.693147 12:0.693147 11:0.693147 18:3.295836 19:2.197224 20:2.197224 17:1.098612 15:1.098612 32:2.772588 31:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 9:1.386294 12:0.693147 8:0.693147 11:0.693147 19:1.098612 20:1.098612 17:1.098612 32:2.772588 24:2.772588 22:1.386294 43:1.609438 35:1.609438 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 12:1.386294 9:1.386294 11:1.386294 10:0.693147 19:2.197224 20:1.098612 13:1.098612 16:1.098612 32:5.545176 31:2.772588 29:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 32:1.386294 103:2.079442 119:2.197225 186:2.484907 217:2.564949 219:2.639057 248:2.708050 274:2.772589 307:2.833213 410:3.135494 558:3.496508 794:7.824046 959:4.110874 +-1 2:0.000000 3:0.000000 5:0.000000 1:0.000000 4:0.000000 11:0.693147 6:0.693147 9:0.693147 12:0.693147 18:1.098612 20:1.098612 13:1.098612 32:5.545176 27:1.386294 38:3.218876 58:1.791759 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 12:1.386294 9:0.693147 17:2.197224 16:1.098612 19:1.098612 14:1.098612 31:1.386294 32:1.386294 39:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 6:6.931470 9:2.079441 10:1.386294 12:0.693147 8:0.693147 18:2.197224 21:2.197224 16:2.197224 20:1.098612 15:1.098612 32:2.772588 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:0.693147 7:0.693147 19:2.197224 20:1.098612 18:1.098612 13:1.098612 14:1.098612 31:2.772588 32:2.772588 26:1.386294 35:1.609438 +-1 3:0.000000 32:1.386294 3379:5.857933 15271:6.957497 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 9:1.386294 11:0.693147 8:0.693147 17:2.197224 21:2.197224 13:1.098612 18:1.098612 16:1.098612 32:2.772588 22:2.772588 31:1.386294 +-1 1:0.000000 5:0.000000 2:0.000000 10:2.772588 12:0.693147 11:0.693147 9:0.693147 7:0.693147 8:0.693147 18:4.394448 21:2.197224 17:2.197224 20:1.098612 13:1.098612 14:1.098612 32:4.158882 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 6:8.317764 9:2.079441 12:0.693147 10:0.693147 16:3.295836 21:2.197224 17:2.197224 20:1.098612 19:1.098612 18:1.098612 32:6.931470 24:4.158882 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:2.079441 12:0.693147 11:0.693147 18:4.394448 20:1.098612 19:1.098612 14:1.098612 32:4.158882 31:1.386294 28:1.386294 24:1.386294 +-1 1:0.000000 5:0.000000 2:0.000000 3:0.000000 4:0.000000 8:5.545176 7:2.079441 10:1.386294 16:2.197224 19:2.197224 20:1.098612 13:1.098612 21:1.098612 32:4.158882 30:2.772588 22:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 9:0.693147 8:0.693147 10:0.693147 16:1.098612 28:1.386294 24:1.386294 35:1.609438 51:1.791759 48:1.791759 125:2.197225 139:2.302585 305:2.833213 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 21:1.098612 32:4.158882 22:1.386294 65:1.945910 82:1.945910 103:2.079442 160:2.397895 260:2.708050 301:2.833213 502:3.401197 806:3.912023 +-1 2:0.000000 4:0.000000 12:0.693147 9:0.693147 10:0.693147 11:0.693147 15:1.098612 32:4.158882 27:1.386294 38:1.609438 43:1.609438 47:1.791759 57:1.791759 60:1.791759 45:1.791759 65:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:3.465735 11:2.772588 12:0.693147 7:0.693147 6:0.693147 21:2.197224 13:1.098612 18:1.098612 17:1.098612 32:2.772588 30:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 11:0.693147 30:1.386294 22:1.386294 27:1.386294 43:1.609438 60:1.791759 96:2.079442 160:2.397895 181:2.484907 163:2.484907 197:2.564949 224:2.639057 +-1 3:0.000000 1:0.000000 10:1.386294 8:0.693147 9:0.693147 15:1.098612 24:1.386294 32:1.386294 70:1.945910 93:2.079442 159:2.397895 163:2.484907 203:2.564949 225:2.639057 280:2.772589 375:3.044522 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 5:0.000000 10:0.693147 11:0.693147 32:1.386294 36:1.609438 62:1.791759 61:1.791759 57:1.791759 70:1.945910 103:2.079442 96:2.079442 147:2.397895 165:2.484907 231:2.639057 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:0.693147 9:0.693147 6:0.693147 12:0.693147 20:1.098612 22:1.386294 32:1.386294 24:1.386294 76:1.945910 69:1.945910 103:2.079442 138:2.302585 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:2.079441 11:1.386294 10:0.693147 12:0.693147 7:0.693147 20:3.295836 17:2.197224 15:2.197224 14:1.098612 19:1.098612 32:6.931470 +-1 3:0.000000 4:0.000000 2:0.000000 5:0.000000 1:0.000000 7:2.079441 11:2.079441 9:2.079441 12:0.693147 10:0.693147 16:4.394448 17:2.197224 18:2.197224 21:1.098612 20:1.098612 14:1.098612 +-1 1:0.000000 12:0.693147 32:1.386294 43:1.609438 41:1.609438 68:1.945910 96:2.079442 156:2.397895 193:2.564949 466:6.591674 562:3.555348 1113:4.317488 1216:13.397724 1447:4.653960 1943:5.010635 3054:5.568345 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:0.693147 10:0.693147 7:0.693147 9:0.693147 16:3.295836 13:2.197224 20:1.098612 17:1.098612 21:1.098612 32:2.772588 26:1.386294 25:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 6:3.465735 9:1.386294 12:0.693147 11:0.693147 21:3.295836 20:1.098612 19:1.098612 18:1.098612 25:5.545176 32:4.158882 31:2.772588 28:1.386294 27:1.386294 +-1 1:0.000000 3:0.000000 1482:4.753590 15527:6.957497 +-1 2:0.000000 4:0.000000 11:0.693147 17:2.197224 18:1.098612 20:1.098612 28:2.772588 32:1.386294 124:4.394450 129:2.302585 161:2.397895 171:4.969814 163:2.484907 215:2.564949 216:2.564949 220:2.639057 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 6:2.772588 10:1.386294 12:0.693147 11:0.693147 9:0.693147 7:0.693147 13:1.098612 20:1.098612 21:1.098612 16:1.098612 32:4.158882 26:2.772588 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 12:0.693147 20:1.098612 13:1.098612 16:1.098612 32:2.772588 35:1.609438 65:1.945910 260:2.708050 241:2.708050 367:3.044522 618:7.221836 1836:5.010635 +-1 2:0.000000 5:0.000000 4:0.000000 11:0.693147 9:0.693147 7:0.693147 21:2.197224 18:2.197224 19:1.098612 17:1.098612 32:2.772588 30:2.772588 31:1.386294 27:1.386294 42:1.609438 84:1.945910 +-1 1:0.000000 5:0.000000 2:0.000000 4:0.000000 3:0.000000 12:0.693147 10:0.693147 13:2.197224 19:1.098612 20:1.098612 15:1.098612 14:1.098612 32:5.545176 22:1.386294 43:1.609438 41:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 10:4.158882 7:4.158882 9:2.079441 6:2.079441 11:2.079441 12:1.386294 13:2.197224 20:1.098612 18:1.098612 16:1.098612 21:1.098612 26:11.090352 28:5.545176 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 6:3.465735 10:1.386294 11:0.693147 8:0.693147 18:2.197224 16:2.197224 13:1.098612 21:1.098612 20:1.098612 30:16.635528 26:4.158882 32:2.772588 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 12:0.693147 7:0.693147 9:0.693147 11:0.693147 21:2.197224 20:1.098612 32:4.158882 26:1.386294 25:1.386294 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 10:1.386294 12:0.693147 11:0.693147 8:0.693147 18:2.197224 17:1.098612 16:1.098612 32:2.772588 24:1.386294 22:1.386294 68:1.945910 83:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 6:0.693147 12:0.693147 9:0.693147 21:2.197224 32:1.386294 50:1.791759 61:1.791759 58:1.791759 84:1.945910 89:2.079442 94:2.079442 117:2.197225 +-1 1:0.000000 3:0.000000 382:3.091042 5842:12.526796 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 11:1.386294 10:0.693147 24:1.386294 32:1.386294 58:1.791759 119:2.197225 147:4.795790 161:2.397895 152:2.397895 144:2.397895 178:4.969814 188:2.484907 +-1 2:0.000000 4:0.000000 5:0.000000 20:1.098612 32:2.772588 41:1.609438 65:1.945910 103:2.079442 148:2.397895 260:2.708050 257:2.708050 1174:8.764054 3532:5.857933 5898:12.526796 15697:6.957497 15698:6.957497 +-1 1:0.000000 3:0.000000 1580:4.753590 15699:6.957497 15700:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:3.465735 10:1.386294 12:1.386294 11:0.693147 18:2.197224 20:2.197224 19:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 27:1.386294 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 12:0.693147 20:1.098612 32:4.158882 76:1.945910 103:2.079442 1292:4.465908 15731:6.957497 15732:6.957497 15733:6.957497 15734:6.957497 +-1 3:0.000000 1:0.000000 29:1.386294 297:2.833213 15736:6.957497 15737:6.957497 15738:6.957497 15739:6.957497 15740:6.957497 15741:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:0.693147 12:0.693147 11:0.693147 9:0.693147 7:0.693147 19:1.098612 20:1.098612 30:5.545176 32:4.158882 31:2.772588 28:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 28:1.386294 26:1.386294 39:1.609438 81:1.945910 103:2.079442 171:2.484907 208:2.564949 355:2.995732 376:3.044522 +-1 5:0.000000 3:0.000000 4:0.000000 1:0.000000 10:1.386294 7:1.386294 12:0.693147 8:0.693147 20:3.295836 21:1.098612 16:1.098612 14:1.098612 30:12.476646 32:6.931470 31:1.386294 41:1.609438 +-1 5:0.000000 1:0.000000 2:0.000000 4:0.000000 3:0.000000 9:1.386294 12:0.693147 6:0.693147 10:0.693147 20:2.197224 17:2.197224 21:1.098612 32:2.772588 28:1.386294 38:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 7:0.693147 20:1.098612 13:1.098612 32:4.158882 22:1.386294 43:1.609438 40:1.609438 45:1.791759 61:1.791759 49:1.791759 65:1.945910 69:1.945910 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 9:2.079441 6:2.079441 10:0.693147 7:0.693147 12:0.693147 20:1.098612 13:1.098612 21:1.098612 16:1.098612 32:5.545176 25:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:5.545176 10:2.772588 8:2.079441 9:1.386294 20:4.394448 18:3.295836 19:2.197224 21:2.197224 17:1.098612 30:13.862940 25:5.545176 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 6:11.783499 10:2.079441 7:1.386294 11:0.693147 9:0.693147 19:3.295836 21:1.098612 18:1.098612 20:1.098612 16:1.098612 26:5.545176 32:4.158882 +-1 3:0.000000 5:0.000000 6:2.079441 7:1.386294 8:1.386294 17:2.197224 18:1.098612 21:1.098612 19:1.098612 31:1.386294 22:1.386294 34:4.828314 39:3.218876 38:1.609438 62:8.958795 49:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 7:2.772588 12:0.693147 6:0.693147 11:0.693147 20:3.295836 19:1.098612 26:5.545176 32:4.158882 28:2.772588 25:1.386294 30:1.386294 22:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 9:0.693147 20:1.098612 19:1.098612 32:2.772588 31:1.386294 28:1.386294 84:1.945910 81:1.945910 103:2.079442 99:2.079442 +-1 3:0.000000 10:0.693147 32:1.386294 80:1.945910 278:2.772589 436:3.218876 703:11.283600 1044:4.248495 1777:4.875197 2036:5.164786 5970:12.526796 15905:6.957497 15906:6.957497 15907:6.957497 15908:6.957497 15909:6.957497 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 12:2.079441 7:0.693147 14:1.098612 19:1.098612 18:1.098612 32:6.931470 29:2.772588 31:1.386294 24:1.386294 39:1.609438 45:3.583518 59:1.791759 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 3:0.000000 7:3.465735 9:2.772588 6:2.079441 12:0.693147 8:0.693147 20:2.197224 21:2.197224 18:1.098612 17:1.098612 13:1.098612 30:6.931470 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 9:2.079441 6:1.386294 11:0.693147 12:0.693147 18:1.098612 17:1.098612 20:1.098612 32:2.772588 27:1.386294 26:1.386294 25:1.386294 +-1 3:0.000000 2:0.000000 1:0.000000 8:2.079441 10:0.693147 9:0.693147 11:0.693147 18:3.295836 21:3.295836 16:3.295836 17:2.197224 14:2.197224 19:1.098612 24:5.545176 22:2.772588 25:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:7.624617 9:2.079441 12:1.386294 20:3.295836 16:2.197224 19:1.098612 17:1.098612 32:8.317764 30:4.158882 25:2.772588 26:2.772588 +-1 2:0.000000 4:0.000000 14:1.098612 32:1.386294 35:1.609438 57:3.583518 53:1.791759 60:1.791759 69:3.891820 74:1.945910 95:2.079442 144:2.397895 175:2.484907 167:2.484907 213:2.564949 246:2.708050 +-1 2:0.000000 5:0.000000 4:0.000000 9:1.386294 12:0.693147 10:0.693147 11:0.693147 19:1.098612 17:1.098612 32:4.158882 31:1.386294 84:1.945910 75:1.945910 86:2.079442 94:2.079442 134:2.302585 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 5:0.000000 7:2.079441 12:1.386294 10:1.386294 11:0.693147 9:0.693147 19:2.197224 31:2.772588 32:2.772588 24:1.386294 42:4.828314 36:1.609438 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 11:2.079441 10:0.693147 6:0.693147 9:0.693147 12:0.693147 21:2.197224 17:2.197224 15:1.098612 14:1.098612 24:6.931470 32:2.772588 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 117:2.197225 2210:10.329572 3065:11.136690 3675:5.857933 16068:6.957497 16069:6.957497 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:0.693147 20:2.197224 13:2.197224 15:1.098612 14:1.098612 32:2.772588 22:1.386294 25:1.386294 24:1.386294 45:1.791759 82:1.945910 +-1 4:0.000000 3:0.000000 10:0.693147 9:0.693147 8:0.693147 21:1.098612 17:1.098612 22:1.386294 38:1.609438 48:5.375277 59:1.791759 80:1.945910 81:1.945910 102:4.158884 94:2.079442 97:2.079442 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 4:0.000000 8:3.465735 10:2.079441 9:1.386294 12:0.693147 11:0.693147 16:3.295836 18:2.197224 20:1.098612 13:1.098612 21:1.098612 17:1.098612 +-1 4:0.000000 5:0.000000 2:0.000000 1:0.000000 11:0.693147 8:0.693147 12:0.693147 13:2.197224 17:2.197224 20:1.098612 19:1.098612 14:1.098612 32:2.772588 31:2.772588 25:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 9:1.386294 12:0.693147 10:0.693147 6:0.693147 11:0.693147 18:2.197224 20:1.098612 17:1.098612 13:1.098612 14:1.098612 32:2.772588 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:1.386294 11:0.693147 17:1.098612 32:2.772588 28:2.772588 27:1.386294 24:1.386294 22:1.386294 37:1.609438 39:1.609438 62:1.791759 49:1.791759 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.772588 6:2.772588 11:1.386294 9:0.693147 12:0.693147 19:1.098612 21:1.098612 17:1.098612 18:1.098612 20:1.098612 32:5.545176 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 9:1.386294 10:0.693147 19:3.295836 16:3.295836 17:2.197224 20:2.197224 21:1.098612 18:1.098612 31:2.772588 32:1.386294 28:1.386294 +-1 2:0.000000 4:0.000000 9:0.693147 6:0.693147 32:1.386294 38:1.609438 34:1.609438 52:1.791759 81:3.891820 67:3.891820 92:2.079442 91:2.079442 90:2.079442 125:2.197225 115:2.197225 139:2.302585 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 452:3.258097 860:8.014666 1189:4.382027 1870:10.021270 16200:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:4.158882 103:2.079442 3332:17.573799 3333:5.857933 5945:6.263398 +-1 4:0.000000 5:0.000000 1:0.000000 2:0.000000 3:0.000000 10:1.386294 7:1.386294 8:1.386294 12:0.693147 11:0.693147 20:1.098612 32:2.772588 26:1.386294 24:1.386294 41:1.609438 50:3.583518 +-1 99:2.079442 363:3.044522 567:3.555348 732:3.806662 808:3.951244 841:4.007333 1056:4.248495 1192:4.382027 2540:5.347108 16230:6.957497 16231:6.957497 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 12:0.693147 6:0.693147 20:1.098612 13:1.098612 16:1.098612 32:2.772588 28:1.386294 38:3.218876 34:1.609438 43:1.609438 +-1 3:0.000000 5:0.000000 2:0.000000 4:0.000000 1:0.000000 6:6.931470 10:2.079441 9:1.386294 8:0.693147 18:6.591672 16:4.394448 21:2.197224 20:1.098612 19:1.098612 32:13.862940 28:1.386294 +-1 3:0.000000 8:0.693147 18:4.394448 14:2.197224 16:1.098612 19:1.098612 32:12.476646 26:8.317764 41:1.609438 52:3.583518 48:3.583518 56:1.791759 47:1.791759 74:11.675460 77:5.837730 84:1.945910 +-1 4:0.000000 2:0.000000 1:0.000000 3:0.000000 5:0.000000 6:2.079441 9:1.386294 7:1.386294 10:0.693147 11:0.693147 20:1.098612 21:1.098612 19:1.098612 32:4.158882 30:4.158882 28:2.772588 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:2.772588 103:2.079442 188:2.484907 171:2.484907 260:2.708050 376:3.044522 703:7.522400 997:4.174387 2159:5.164786 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 6:15.249234 9:1.386294 12:0.693147 11:0.693147 17:3.295836 19:2.197224 20:1.098612 18:1.098612 32:4.158882 30:4.158882 25:2.772588 26:1.386294 +-1 2:0.000000 4:0.000000 14:1.098612 32:2.772588 232:2.639057 305:2.833213 640:3.688879 1705:9.750394 2765:11.136690 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:0.693147 20:1.098612 32:1.386294 43:1.609438 103:2.079442 90:2.079442 117:2.197225 160:2.397895 186:2.484907 323:2.890372 401:3.135494 +-1 5:0.000000 3:0.000000 4:0.000000 1:0.000000 2:0.000000 9:1.386294 8:1.386294 11:1.386294 7:0.693147 18:8.788896 21:2.197224 17:1.098612 32:5.545176 30:2.772588 29:1.386294 45:3.583518 +-1 3:0.000000 2:0.000000 9:1.386294 11:0.693147 18:2.197224 19:1.098612 21:1.098612 24:1.386294 43:1.609438 58:1.791759 59:1.791759 84:1.945910 78:1.945910 85:2.079442 111:2.197225 115:2.197225 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 2:0.000000 4:0.000000 5:0.000000 12:1.386294 10:1.386294 9:0.693147 19:1.098612 20:1.098612 14:1.098612 32:2.772588 29:1.386294 43:1.609438 35:1.609438 103:2.079442 237:2.639057 273:2.772589 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 12:0.693147 9:0.693147 20:1.098612 18:1.098612 21:1.098612 14:1.098612 32:4.158882 31:4.158882 28:2.772588 24:1.386294 35:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:0.693147 10:0.693147 11:0.693147 8:0.693147 12:0.693147 21:3.295836 20:2.197224 32:4.158882 31:1.386294 25:1.386294 43:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 6:1.386294 11:1.386294 9:0.693147 12:0.693147 10:0.693147 18:2.197224 20:1.098612 32:2.772588 25:1.386294 23:1.386294 37:1.609438 +-1 1:0.000000 3:0.000000 13:1.098612 32:1.386294 24:1.386294 62:1.791759 95:2.079442 132:2.302585 178:2.484907 220:2.639057 289:2.772589 300:2.833213 376:3.044522 423:3.218876 451:3.258097 573:3.555348 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 9:1.386294 10:0.693147 11:0.693147 7:0.693147 12:0.693147 18:1.098612 17:1.098612 16:1.098612 21:1.098612 14:1.098612 20:1.098612 +-1 2:0.000000 4:0.000000 6:3.465735 12:0.693147 9:0.693147 10:0.693147 11:0.693147 21:1.098612 16:1.098612 17:1.098612 32:1.386294 42:1.609438 34:1.609438 51:1.791759 61:1.791759 58:1.791759 +-1 78:1.945910 129:2.302585 148:2.397895 235:2.639057 313:2.890372 365:3.044522 393:3.135494 684:7.522400 1601:4.753590 2914:5.568345 3680:5.857933 16438:6.957497 16439:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 6:9.704058 9:5.545176 12:0.693147 10:0.693147 16:4.394448 17:2.197224 20:1.098612 19:1.098612 18:1.098612 30:5.545176 32:2.772588 28:2.772588 31:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 8:1.386294 11:1.386294 9:1.386294 6:1.386294 10:0.693147 7:0.693147 21:2.197224 19:1.098612 14:1.098612 28:4.158882 32:2.772588 42:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 20:1.098612 32:4.158882 45:1.791759 58:1.791759 73:1.945910 67:1.945910 103:2.079442 94:2.079442 158:2.397895 150:2.397895 161:2.397895 184:2.484907 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 6:1.386294 12:0.693147 8:0.693147 20:1.098612 17:1.098612 32:4.158882 25:1.386294 39:1.609438 60:5.375277 57:3.583518 50:1.791759 84:1.945910 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 6:1.386294 9:1.386294 12:0.693147 10:0.693147 11:0.693147 17:4.394448 20:1.098612 21:1.098612 15:1.098612 18:1.098612 14:1.098612 +-1 1:0.000000 3:0.000000 8:0.693147 10:0.693147 11:0.693147 489:3.367296 518:3.401197 1009:4.174387 1104:8.634976 2081:10.329572 5625:6.263398 4971:6.263398 +-1 5:0.000000 2:0.000000 4:0.000000 9:4.852029 7:2.079441 10:1.386294 12:0.693147 6:0.693147 17:5.493060 19:3.295836 20:2.197224 16:1.098612 18:1.098612 14:1.098612 30:6.931470 32:5.545176 +-1 1:0.000000 3:0.000000 6:0.693147 18:2.197224 16:1.098612 24:5.545176 32:2.772588 28:2.772588 26:1.386294 25:1.386294 38:1.609438 49:3.583518 52:1.791759 58:1.791759 79:1.945910 68:1.945910 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 9:1.386294 12:0.693147 10:0.693147 6:0.693147 8:0.693147 11:0.693147 20:3.295836 19:1.098612 14:1.098612 32:11.090352 34:1.609438 +-1 5:0.000000 2:0.000000 4:0.000000 3:0.000000 1:0.000000 9:4.158882 11:3.465735 6:3.465735 12:1.386294 10:1.386294 8:0.693147 21:3.295836 14:3.295836 18:2.197224 19:1.098612 16:1.098612 +-1 5:0.000000 2:0.000000 6:0.693147 19:1.098612 21:1.098612 32:4.158882 31:1.386294 25:1.386294 110:2.197225 115:2.197225 153:2.397895 216:2.564949 242:5.416100 327:5.780744 730:3.806662 799:3.912023 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 6:1.386294 12:0.693147 9:0.693147 11:0.693147 20:1.098612 21:1.098612 32:4.158882 28:1.386294 39:4.828314 38:1.609438 74:1.945910 103:2.079442 +-1 1:0.000000 17:2.197224 32:1.386294 765:3.850148 850:4.007333 1499:4.753590 1683:4.875197 1876:10.021270 2177:5.164786 2885:5.568345 3589:5.857933 3338:5.857933 16606:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:2.079441 8:1.386294 11:1.386294 6:0.693147 7:0.693147 9:0.693147 13:3.295836 21:2.197224 20:1.098612 16:1.098612 32:2.772588 30:2.772588 +-1 3:0.000000 9:0.693147 21:1.098612 32:1.386294 31:1.386294 216:2.564949 617:3.610918 1585:4.753590 5864:18.790194 5865:12.526796 +-1 2:0.000000 4:0.000000 5:0.000000 6:1.386294 9:0.693147 10:0.693147 11:0.693147 12:0.693147 20:3.295836 18:1.098612 13:1.098612 32:4.158882 34:1.609438 43:1.609438 60:1.791759 45:1.791759 +-1 1:0.000000 4:0.000000 3:0.000000 5:0.000000 10:1.386294 9:0.693147 11:0.693147 6:0.693147 21:2.197224 20:1.098612 19:1.098612 13:1.098612 18:1.098612 32:4.158882 31:1.386294 24:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 11:1.386294 6:1.386294 12:0.693147 9:0.693147 10:0.693147 20:4.394448 13:1.098612 19:1.098612 32:4.158882 31:1.386294 30:1.386294 38:3.218876 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:0.693147 6:0.693147 18:2.197224 19:1.098612 20:1.098612 21:1.098612 17:1.098612 16:1.098612 32:4.158882 24:4.158882 31:1.386294 34:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 11:1.386294 9:0.693147 13:1.098612 19:1.098612 20:1.098612 21:1.098612 14:1.098612 32:4.158882 31:1.386294 24:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 10:2.079441 6:2.079441 11:0.693147 7:0.693147 9:0.693147 20:2.197224 18:2.197224 21:1.098612 19:1.098612 15:1.098612 28:5.545176 +-1 2:0.000000 4:0.000000 6:2.079441 10:0.693147 11:0.693147 7:0.693147 8:0.693147 18:2.197224 21:1.098612 14:1.098612 26:4.158882 32:2.772588 25:1.386294 28:1.386294 31:1.386294 22:1.386294 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 11:1.386294 6:1.386294 10:0.693147 18:4.394448 20:1.098612 13:1.098612 21:1.098612 16:1.098612 19:1.098612 32:4.158882 22:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 8:1.386294 14:1.098612 24:2.772588 25:1.386294 32:1.386294 36:1.609438 41:1.609438 103:6.238326 85:4.158884 88:2.079442 100:2.079442 204:2.564949 +-1 5:0.000000 2:0.000000 4:0.000000 6:1.386294 10:0.693147 11:0.693147 7:0.693147 9:0.693147 20:2.197224 17:1.098612 21:1.098612 32:1.386294 31:1.386294 25:1.386294 36:3.218876 56:1.791759 +-1 4:0.000000 5:0.000000 2:0.000000 10:3.465735 7:1.386294 20:1.098612 32:2.772588 34:3.218876 37:1.609438 43:1.609438 57:3.583518 73:7.783640 75:7.783640 76:5.837730 82:1.945910 91:2.079442 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 8:4.852029 10:3.465735 7:3.465735 6:2.772588 12:1.386294 11:0.693147 9:0.693147 20:3.295836 19:2.197224 18:1.098612 13:1.098612 +-1 5:0.000000 4:0.000000 2:0.000000 6:3.465735 10:2.079441 11:0.693147 9:0.693147 20:3.295836 17:2.197224 21:1.098612 18:1.098612 19:1.098612 30:2.772588 31:1.386294 32:1.386294 23:1.386294 +-1 5:0.000000 4:0.000000 2:0.000000 10:2.079441 7:2.079441 6:2.079441 18:2.197224 19:1.098612 32:1.386294 25:1.386294 60:10.750554 76:7.783640 77:3.891820 80:1.945910 71:1.945910 123:2.197225 +-1 2:0.000000 5:0.000000 6:4.158882 9:0.693147 10:0.693147 11:0.693147 8:0.693147 20:2.197224 13:1.098612 21:1.098612 18:1.098612 25:4.158882 61:5.375277 60:5.375277 53:3.583518 57:1.791759 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:2.079441 7:2.079441 12:0.693147 11:0.693147 9:0.693147 21:2.197224 20:1.098612 16:1.098612 18:1.098612 19:1.098612 26:4.158882 +-1 2:0.000000 4:0.000000 5:0.000000 10:2.079441 7:2.079441 12:1.386294 8:1.386294 11:0.693147 6:0.693147 9:0.693147 21:2.197224 20:1.098612 17:1.098612 19:1.098612 28:4.158882 32:1.386294 +-1 2:0.000000 5:0.000000 10:1.386294 6:1.386294 7:1.386294 11:0.693147 21:1.098612 17:1.098612 20:1.098612 31:1.386294 26:1.386294 30:1.386294 42:1.609438 56:5.375277 60:3.583518 49:1.791759 +-1 2:0.000000 4:0.000000 7:3.465735 9:1.386294 12:0.693147 11:0.693147 18:8.788896 19:7.690284 16:4.394448 21:4.394448 14:1.098612 30:4.158882 24:2.772588 32:2.772588 25:1.386294 31:1.386294 +-1 2:0.000000 5:0.000000 6:1.386294 10:0.693147 8:0.693147 7:0.693147 18:1.098612 14:1.098612 32:2.772588 38:3.218876 41:1.609438 56:1.791759 125:2.197225 177:2.484907 172:2.484907 273:2.772589 +-1 3:0.000000 1:0.000000 2:0.000000 10:2.079441 6:1.386294 8:1.386294 7:0.693147 11:0.693147 18:10.986120 21:2.197224 26:8.317764 28:2.772588 25:1.386294 30:1.386294 22:1.386294 32:1.386294 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 8:0.693147 10:0.693147 11:0.693147 20:2.197224 15:1.098612 18:1.098612 32:4.158882 30:1.386294 43:1.609438 49:1.791759 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:3.465735 12:0.693147 9:0.693147 18:4.394448 20:1.098612 32:5.545176 36:3.218876 38:1.609438 42:1.609438 39:1.609438 57:5.375277 +-1 4:0.000000 2:0.000000 6:2.079441 9:1.386294 8:0.693147 12:0.693147 17:4.394448 18:2.197224 21:1.098612 19:1.098612 20:1.098612 14:1.098612 25:8.317764 30:6.931470 27:4.158882 31:2.772588 +-1 3:0.000000 9:2.079441 6:1.386294 10:0.693147 17:2.197224 16:1.098612 25:2.772588 43:1.609438 42:1.609438 38:1.609438 53:3.583518 77:11.675460 82:1.945910 93:10.397210 94:4.158884 98:2.079442 +-1 2:0.000000 4:0.000000 5:0.000000 6:6.238323 10:2.772588 12:1.386294 9:0.693147 18:4.394448 21:3.295836 20:2.197224 17:1.098612 25:5.545176 32:2.772588 27:2.772588 30:1.386294 53:5.375277 +-1 5:0.000000 2:0.000000 4:0.000000 6:4.852029 9:0.693147 16:2.197224 20:2.197224 18:1.098612 30:5.545176 32:4.158882 24:1.386294 53:1.791759 83:1.945910 98:4.158884 93:2.079442 106:2.197225 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 6:2.079441 8:2.079441 10:1.386294 9:0.693147 18:2.197224 16:1.098612 21:1.098612 30:4.158882 32:4.158882 26:1.386294 28:1.386294 25:1.386294 +-1 3:0.000000 6:11.783499 7:1.386294 8:1.386294 11:0.693147 18:10.986120 16:4.394448 17:3.295836 26:6.931470 27:4.158882 32:2.772588 25:2.772588 24:1.386294 34:16.094380 38:9.656628 56:12.542313 +-1 2:0.000000 7:1.386294 6:1.386294 8:0.693147 9:0.693147 17:5.493060 16:3.295836 18:2.197224 19:1.098612 14:1.098612 27:2.772588 26:1.386294 28:1.386294 25:1.386294 32:1.386294 38:3.218876 +-1 89:2.079442 382:6.182084 1250:4.465908 1251:4.465908 +-1 2:0.000000 4:0.000000 5:0.000000 7:7.624617 12:0.693147 8:0.693147 9:0.693147 18:5.493060 20:2.197224 32:4.158882 26:4.158882 38:3.218876 35:1.609438 36:1.609438 39:1.609438 50:3.583518 +-1 2:0.000000 3:0.000000 1:0.000000 10:1.386294 9:0.693147 19:3.295836 16:1.098612 21:1.098612 18:1.098612 28:1.386294 32:1.386294 43:3.218876 42:1.609438 39:1.609438 60:10.750554 48:3.583518 +-1 4:0.000000 2:0.000000 1:0.000000 12:3.465735 8:1.386294 10:0.693147 6:0.693147 20:1.098612 18:1.098612 15:1.098612 16:1.098612 33:2.772588 31:1.386294 40:1.609438 37:1.609438 53:1.791759 +-1 2:0.000000 11:0.693147 8:0.693147 16:1.098612 28:1.386294 50:3.583518 58:1.791759 53:1.791759 79:7.783640 74:1.945910 72:1.945910 71:1.945910 94:2.079442 120:2.197225 121:2.197225 161:2.397895 +-1 16:1.098612 27:1.386294 30:1.386294 53:1.791759 50:1.791759 56:1.791759 51:1.791759 48:1.791759 69:15.567280 75:1.945910 72:1.945910 91:2.079442 108:2.197225 135:2.302585 161:4.795790 174:2.484907 +-1 2:0.000000 6:4.158882 7:4.158882 8:1.386294 17:5.493060 16:3.295836 26:6.931470 25:2.772588 24:1.386294 37:3.218876 49:53.752770 50:16.125831 52:5.375277 53:5.375277 57:1.791759 79:42.810020 +-1 2:0.000000 3:0.000000 5:0.000000 1:0.000000 10:0.693147 6:0.693147 18:3.295836 20:1.098612 30:1.386294 22:1.386294 33:1.386294 39:1.609438 35:1.609438 53:1.791759 52:1.791759 51:1.791759 +-1 18:1.098612 14:1.098612 33:1.386294 35:1.609438 132:2.302585 148:2.397895 274:2.772589 517:10.203591 1110:4.317488 +-1 5:0.000000 6:2.772588 8:2.772588 7:2.079441 9:0.693147 10:0.693147 18:2.197224 16:1.098612 21:1.098612 28:6.931470 26:2.772588 22:2.772588 33:2.772588 27:1.386294 42:11.266066 36:3.218876 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 6:4.852029 10:3.465735 8:2.079441 11:2.079441 7:0.693147 16:6.591672 20:1.098612 18:1.098612 14:1.098612 24:5.545176 28:4.158882 30:2.772588 +-1 1:0.000000 6:0.693147 8:0.693147 18:3.295836 33:5.545176 26:1.386294 28:1.386294 30:1.386294 22:1.386294 43:1.609438 39:1.609438 48:1.791759 59:1.791759 79:1.945910 82:1.945910 83:1.945910 +-1 4:0.000000 8:2.772588 6:2.079441 12:1.386294 9:0.693147 10:0.693147 16:4.394448 18:2.197224 21:1.098612 28:4.158882 25:2.772588 33:2.772588 29:1.386294 43:1.609438 38:1.609438 36:1.609438 +-1 2:0.000000 4:0.000000 8:1.386294 10:0.693147 12:0.693147 19:3.295836 18:3.295836 16:2.197224 41:1.609438 43:1.609438 42:1.609438 54:3.583518 55:3.583518 61:1.791759 83:1.945910 92:6.238326 +-1 1:0.000000 6:14.556087 8:3.465735 10:2.772588 7:2.772588 11:2.079441 12:1.386294 9:0.693147 18:8.788896 16:4.394448 17:2.197224 21:1.098612 22:13.862940 26:12.476646 33:11.090352 30:8.317764 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 12:1.386294 11:0.693147 19:1.098612 33:2.772588 31:1.386294 24:1.386294 55:5.375277 54:1.791759 59:1.791759 82:1.945910 99:2.079442 +-1 1:0.000000 14:1.098612 33:1.386294 35:1.609438 78:1.945910 160:2.397895 451:3.258097 872:4.007333 1875:5.010635 1858:5.010635 6086:18.790194 17563:6.957497 17564:6.957497 +-1 5:0.000000 2:0.000000 4:0.000000 1:0.000000 3:0.000000 6:2.079441 7:0.693147 12:0.693147 10:0.693147 31:1.386294 22:1.386294 33:1.386294 24:1.386294 36:3.218876 54:12.542313 55:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 7:2.772588 11:2.079441 10:0.693147 9:0.693147 12:0.693147 16:2.197224 18:1.098612 19:1.098612 20:1.098612 14:1.098612 33:2.772588 +-1 3:0.000000 82:1.945910 +-1 3:0.000000 1:0.000000 2:0.000000 8:1.386294 10:1.386294 6:0.693147 9:0.693147 11:0.693147 16:2.197224 20:1.098612 19:1.098612 33:2.772588 22:1.386294 29:1.386294 28:1.386294 27:1.386294 +-1 1:0.000000 29:1.386294 33:1.386294 3081:5.568345 6096:6.263398 17622:6.957497 17623:6.957497 17624:6.957497 17625:6.957497 17626:6.957497 +-1 2:0.000000 4:0.000000 33:1.386294 38:1.609438 126:2.197225 145:2.397895 268:2.772589 354:2.995732 857:4.007333 1084:4.317488 3948:5.857933 17627:20.872491 17628:13.914994 17629:13.914994 17630:6.957497 17631:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 19:1.098612 31:1.386294 29:1.386294 33:1.386294 54:1.791759 55:1.791759 59:1.791759 81:1.945910 102:2.079442 124:2.197225 +-1 4:0.000000 5:0.000000 1:0.000000 2:0.000000 3:0.000000 12:0.693147 13:1.098612 14:1.098612 22:1.386294 33:1.386294 35:1.609438 55:3.583518 54:1.791759 46:1.791759 104:2.079442 149:4.795790 +-1 1:0.000000 3:0.000000 27:1.386294 29:1.386294 33:1.386294 45:1.791759 46:1.791759 103:2.079442 297:2.833213 358:3.044522 1289:4.465908 1519:4.753590 2419:10.694216 2504:5.347108 3593:5.857933 4722:18.790194 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 11:2.772588 12:0.693147 10:0.693147 8:0.693147 9:0.693147 16:2.197224 29:2.772588 33:2.772588 27:1.386294 28:1.386294 43:1.609438 +-1 1:0.000000 3:0.000000 4:0.000000 6:2.079441 10:0.693147 12:0.693147 18:2.197224 14:1.098612 33:2.772588 30:1.386294 34:3.218876 39:1.609438 40:1.609438 35:1.609438 60:3.583518 51:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 13:2.197224 33:1.386294 55:3.583518 46:3.583518 54:1.791759 83:1.945910 81:1.945910 78:1.945910 104:2.079442 85:2.079442 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 3:0.000000 7:7.624617 10:2.772588 12:1.386294 11:1.386294 6:0.693147 9:0.693147 8:0.693147 18:10.986120 19:5.493060 16:4.394448 15:4.394448 +-1 1:0.000000 2:0.000000 9:0.693147 10:0.693147 11:0.693147 8:0.693147 18:2.197224 15:2.197224 19:1.098612 33:2.772588 31:2.772588 54:1.791759 55:1.791759 80:1.945910 104:2.079442 86:2.079442 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 6:0.693147 8:0.693147 12:0.693147 21:1.098612 15:1.098612 13:1.098612 33:2.772588 22:1.386294 34:1.609438 45:3.583518 54:1.791759 +-1 1:0.000000 3:0.000000 6:0.693147 12:0.693147 19:2.197224 13:1.098612 18:1.098612 31:2.772588 33:2.772588 29:1.386294 46:1.791759 51:1.791759 78:1.945910 69:1.945910 83:1.945910 85:2.079442 +-1 4:0.000000 2:0.000000 1:0.000000 3:0.000000 5:0.000000 9:4.852029 6:4.158882 12:1.386294 8:0.693147 17:5.493060 20:3.295836 14:1.098612 16:1.098612 21:1.098612 18:1.098612 30:4.158882 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 6:2.079441 11:0.693147 16:1.098612 24:4.158882 22:1.386294 29:1.386294 34:3.218876 61:1.791759 69:1.945910 99:2.079442 118:2.197225 126:2.197225 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 11:0.693147 7:0.693147 20:1.098612 33:2.772588 26:1.386294 25:1.386294 43:1.609438 41:1.609438 54:3.583518 55:3.583518 63:1.791759 52:1.791759 +-1 3:0.000000 40:3.218876 94:2.079442 85:2.079442 88:2.079442 120:2.197225 159:4.795790 149:2.397895 153:2.397895 220:5.278114 253:2.708050 380:3.091042 456:3.258097 475:3.332205 639:3.688879 675:3.737670 +-1 3:0.000000 5:0.000000 2:0.000000 1:0.000000 6:9.010911 10:4.158882 11:1.386294 12:1.386294 15:2.197224 13:1.098612 18:1.098612 16:1.098612 21:1.098612 14:1.098612 33:4.158882 24:1.386294 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 6:9.010911 9:4.158882 10:3.465735 11:1.386294 12:0.693147 8:0.693147 7:0.693147 18:5.493060 17:3.295836 19:2.197224 21:1.098612 27:2.772588 +-1 5:0.000000 1:0.000000 12:0.693147 13:2.197224 19:1.098612 21:1.098612 33:2.772588 22:1.386294 54:7.167036 55:3.583518 49:1.791759 46:1.791759 66:1.945910 104:2.079442 87:2.079442 105:2.197225 +-1 1:0.000000 3:0.000000 2:0.000000 10:2.079441 11:0.693147 17:1.098612 33:1.386294 26:1.386294 43:1.609438 54:1.791759 55:1.791759 104:2.079442 86:2.079442 131:2.302585 144:2.397895 192:2.484907 +-1 2:0.000000 3:0.000000 1:0.000000 5:0.000000 10:2.079441 7:1.386294 11:0.693147 6:0.693147 18:1.098612 14:1.098612 33:2.772588 26:1.386294 22:1.386294 41:1.609438 60:7.167036 54:5.375277 +-1 1:0.000000 3:0.000000 2:0.000000 12:0.693147 18:1.098612 14:1.098612 33:1.386294 31:1.386294 41:1.609438 54:3.583518 55:1.791759 104:2.079442 102:2.079442 192:2.484907 191:2.484907 187:2.484907 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 8:1.386294 12:0.693147 6:0.693147 10:0.693147 18:1.098612 21:1.098612 33:2.772588 36:1.609438 39:1.609438 55:5.375277 54:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 19:1.098612 13:1.098612 31:1.386294 29:1.386294 33:1.386294 55:3.583518 54:1.791759 45:1.791759 82:1.945910 104:2.079442 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:1.386294 7:0.693147 10:0.693147 9:0.693147 20:4.394448 21:1.098612 13:1.098612 14:1.098612 33:1.386294 24:1.386294 22:1.386294 +-1 2:0.000000 5:0.000000 3:0.000000 1:0.000000 6:6.238323 10:4.158882 7:3.465735 11:0.693147 17:5.493060 18:2.197224 21:2.197224 19:2.197224 16:2.197224 13:1.098612 15:1.098612 14:1.098612 +-1 1:0.000000 2:0.000000 3:0.000000 8:0.693147 10:0.693147 18:1.098612 93:2.079442 171:2.484907 188:2.484907 342:2.995732 837:3.951244 908:4.060443 1517:9.507180 1889:10.021270 3086:5.568345 6129:6.263398 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 8:4.158882 9:0.693147 7:0.693147 11:0.693147 16:2.197224 14:1.098612 19:1.098612 15:1.098612 29:2.772588 33:2.772588 28:2.772588 24:1.386294 +-1 4:0.000000 1:0.000000 3:0.000000 8:1.386294 10:0.693147 17:2.197224 16:1.098612 21:1.098612 18:1.098612 24:4.158882 25:1.386294 22:1.386294 36:1.609438 49:28.668144 50:3.583518 83:3.891820 +-1 5:0.000000 3:0.000000 1:0.000000 2:0.000000 4:0.000000 6:6.238323 8:2.079441 10:1.386294 12:0.693147 11:0.693147 7:0.693147 18:4.394448 21:2.197224 16:1.098612 29:2.772588 33:2.772588 +-1 2:0.000000 1:0.000000 5:0.000000 4:0.000000 11:1.386294 10:1.386294 6:1.386294 9:0.693147 16:6.591672 17:2.197224 21:1.098612 19:1.098612 33:2.772588 27:2.772588 22:1.386294 31:1.386294 +-1 1:0.000000 3:0.000000 37:1.609438 99:4.158884 118:2.197225 112:2.197225 149:2.397895 191:2.484907 376:3.044522 401:3.135494 413:3.178054 535:3.433987 678:3.737670 1430:4.653960 2374:5.347108 5466:6.263398 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 12:1.386294 9:0.693147 11:0.693147 7:0.693147 8:0.693147 17:3.295836 21:1.098612 18:1.098612 19:1.098612 16:1.098612 29:4.158882 +-1 5:0.000000 1:0.000000 13:1.098612 33:1.386294 54:1.791759 55:1.791759 1941:5.010635 3744:5.857933 3879:5.857933 6147:18.790194 5671:6.263398 18220:6.957497 18221:6.957497 +-1 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 12:1.386294 10:0.693147 20:1.098612 13:1.098612 18:1.098612 22:1.386294 33:1.386294 55:8.958795 54:5.375277 62:1.791759 45:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 10:0.693147 9:0.693147 11:0.693147 6:0.693147 7:0.693147 20:1.098612 16:1.098612 14:1.098612 33:2.772588 29:1.386294 27:1.386294 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 7:1.386294 6:1.386294 12:0.693147 11:0.693147 10:0.693147 20:1.098612 16:1.098612 29:1.386294 33:1.386294 26:1.386294 25:1.386294 +-1 3:0.000000 2:0.000000 5:0.000000 1:0.000000 4:0.000000 6:2.772588 9:1.386294 8:1.386294 12:0.693147 10:0.693147 7:0.693147 21:4.394448 19:1.098612 13:1.098612 18:1.098612 16:1.098612 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 7:9.010911 6:2.079441 10:1.386294 11:1.386294 12:0.693147 17:3.295836 16:3.295836 15:2.197224 19:1.098612 26:6.931470 27:5.545176 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:8.317764 6:2.772588 10:2.079441 7:0.693147 18:6.591672 21:4.394448 16:2.197224 19:1.098612 28:6.931470 25:5.545176 24:2.772588 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 11:0.693147 21:1.098612 33:1.386294 55:3.583518 104:2.079442 85:2.079442 105:2.197225 132:2.302585 142:2.397895 192:2.484907 293:2.772589 +-1 7:0.693147 9:0.693147 17:4.394448 19:1.098612 18:1.098612 14:1.098612 28:1.386294 38:1.609438 37:1.609438 40:1.609438 47:3.583518 58:1.791759 62:1.791759 55:1.791759 48:1.791759 67:1.945910 +-1 2:0.000000 3:0.000000 1:0.000000 5:0.000000 6:3.465735 9:2.079441 10:1.386294 11:1.386294 12:0.693147 7:0.693147 16:5.493060 17:4.394448 14:1.098612 29:6.931470 28:5.545176 33:2.772588 +-1 2:0.000000 1:0.000000 3:0.000000 11:0.693147 21:1.098612 18:1.098612 14:1.098612 33:1.386294 41:1.609438 54:3.583518 55:1.791759 76:1.945910 104:2.079442 192:2.484907 171:2.484907 200:2.564949 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 8:4.158882 6:2.079441 7:0.693147 10:0.693147 12:0.693147 18:3.295836 15:3.295836 13:2.197224 20:2.197224 24:9.704058 30:5.545176 +-1 33:1.386294 54:1.791759 4807:6.263398 18572:6.957497 18573:6.957497 18574:6.957497 18575:6.957497 18576:6.957497 18577:6.957497 18578:6.957497 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 12:1.386294 11:1.386294 10:0.693147 6:0.693147 18:2.197224 19:1.098612 13:1.098612 29:2.772588 31:1.386294 24:1.386294 37:1.609438 +-1 1:0.000000 3:0.000000 2:0.000000 10:1.386294 16:1.098612 25:2.772588 33:1.386294 31:1.386294 24:1.386294 39:1.609438 43:1.609438 49:5.375277 52:3.583518 54:1.791759 55:1.791759 50:1.791759 +-1 3:0.000000 5:0.000000 2:0.000000 1:0.000000 4:0.000000 12:0.693147 6:0.693147 11:0.693147 19:2.197224 15:2.197224 17:2.197224 20:1.098612 24:2.772588 22:2.772588 31:1.386294 33:1.386294 +-1 4:0.000000 5:0.000000 1:0.000000 3:0.000000 2:0.000000 10:0.693147 11:0.693147 7:0.693147 19:1.098612 13:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 26:1.386294 40:1.609438 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 8:2.079441 10:1.386294 7:1.386294 6:0.693147 11:0.693147 13:3.295836 15:1.098612 16:1.098612 14:1.098612 22:1.386294 33:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 11:2.772588 10:2.079441 6:1.386294 12:0.693147 9:0.693147 7:0.693147 21:1.098612 24:4.158882 33:1.386294 30:1.386294 25:1.386294 +-1 4:0.000000 3:0.000000 12:0.693147 13:3.295836 19:2.197224 17:1.098612 14:1.098612 22:4.158882 31:2.772588 33:1.386294 28:1.386294 36:1.609438 41:1.609438 55:7.167036 54:3.583518 46:3.583518 +-1 29:1.386294 33:1.386294 55:1.791759 54:1.791759 45:1.791759 431:3.218876 1374:4.553877 1612:9.507180 3631:5.857933 6072:18.790194 18613:6.957497 18614:6.957497 +-1 4:0.000000 5:0.000000 7:10.397205 9:3.465735 12:0.693147 10:0.693147 11:0.693147 20:2.197224 16:1.098612 26:6.931470 30:4.158882 22:1.386294 33:1.386294 23:1.386294 25:1.386294 28:1.386294 +-1 3:0.000000 13:1.098612 29:1.386294 33:1.386294 45:1.791759 78:1.945910 90:2.079442 122:2.197225 160:2.397895 217:2.564949 3416:5.857933 6165:6.263398 4587:6.263398 5764:6.263398 18630:6.957497 18631:6.957497 +-1 1:0.000000 5:0.000000 3:0.000000 2:0.000000 4:0.000000 9:0.693147 10:0.693147 11:0.693147 18:1.098612 13:1.098612 22:2.772588 33:2.772588 24:1.386294 54:3.583518 45:3.583518 82:3.891820 +-1 33:1.386294 18638:6.957497 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 12:0.693147 13:2.197224 14:1.098612 33:1.386294 29:1.386294 37:1.609438 35:1.609438 54:1.791759 46:1.791759 112:2.197225 113:2.197225 +-1 4:0.000000 2:0.000000 10:1.386294 11:0.693147 6:0.693147 9:0.693147 19:1.098612 21:1.098612 18:1.098612 14:1.098612 29:1.386294 33:1.386294 34:1.609438 35:1.609438 55:3.583518 54:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:2.079441 6:1.386294 10:0.693147 12:0.693147 19:1.098612 17:1.098612 28:2.772588 30:1.386294 33:1.386294 34:1.609438 42:1.609438 35:1.609438 +-1 1:0.000000 3:0.000000 82:1.945910 148:2.397895 4900:6.263398 18662:6.957497 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 9:0.693147 17:2.197224 20:2.197224 24:8.317764 22:1.386294 33:1.386294 30:1.386294 37:1.609438 84:5.837730 67:1.945910 81:1.945910 78:1.945910 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 10:2.079441 6:2.079441 8:1.386294 11:1.386294 18:2.197224 16:1.098612 20:1.098612 14:1.098612 27:2.772588 33:2.772588 43:4.828314 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 10:1.386294 6:1.386294 12:0.693147 11:0.693147 9:0.693147 21:1.098612 18:1.098612 33:1.386294 43:3.218876 38:3.218876 55:3.583518 +-1 3:0.000000 78:1.945910 132:2.302585 395:3.135494 489:3.367296 994:4.174387 1068:4.317488 1191:4.382027 3442:5.857933 3717:5.857933 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 1:0.000000 6:2.079441 11:1.386294 8:1.386294 12:1.386294 9:0.693147 17:3.295836 13:1.098612 31:1.386294 29:1.386294 32:1.386294 43:3.218876 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 4:0.000000 12:0.693147 6:0.693147 19:2.197224 16:1.098612 17:1.098612 14:1.098612 31:1.386294 28:1.386294 33:1.386294 39:3.218876 35:1.609438 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:3.465735 6:0.693147 9:0.693147 8:0.693147 11:0.693147 19:2.197224 20:1.098612 15:1.098612 21:1.098612 13:1.098612 31:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 12:0.693147 13:2.197224 20:2.197224 19:1.098612 31:2.772588 24:1.386294 28:1.386294 22:1.386294 33:1.386294 40:1.609438 46:1.791759 77:1.945910 +-1 2:0.000000 5:0.000000 1:0.000000 4:0.000000 12:2.772588 10:1.386294 11:0.693147 9:0.693147 6:0.693147 20:2.197224 16:1.098612 18:1.098612 21:1.098612 33:2.772588 24:2.772588 29:1.386294 +-1 3:0.000000 19:1.098612 15:1.098612 29:1.386294 33:1.386294 82:1.945910 148:2.397895 142:2.397895 176:2.484907 697:3.761200 3059:5.568345 3423:5.857933 18794:6.957497 18795:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 7:0.693147 8:0.693147 18:1.098612 21:1.098612 19:1.098612 33:1.386294 43:1.609438 42:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 10:1.386294 11:1.386294 9:0.693147 6:0.693147 16:2.197224 25:2.772588 30:2.772588 33:1.386294 28:1.386294 43:1.609438 54:5.375277 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 8:3.465735 12:0.693147 10:0.693147 6:0.693147 9:0.693147 16:3.295836 19:2.197224 18:2.197224 31:2.772588 28:1.386294 22:1.386294 33:1.386294 +-1 3:0.000000 11:0.693147 14:1.098612 26:1.386294 35:1.609438 47:1.791759 49:1.791759 51:1.791759 59:1.791759 74:3.891820 99:2.079442 90:2.079442 155:2.397895 436:3.218876 769:3.850148 1084:4.317488 +-1 1:0.000000 4:0.000000 2:0.000000 5:0.000000 10:1.386294 8:1.386294 12:0.693147 18:1.098612 33:2.772588 35:1.609438 55:5.375277 54:1.791759 104:2.079442 113:2.197225 192:2.484907 204:2.564949 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 8:1.386294 9:1.386294 12:1.386294 10:0.693147 7:0.693147 6:0.693147 15:2.197224 18:1.098612 24:1.386294 33:1.386294 55:3.583518 54:3.583518 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 7:2.079441 11:1.386294 10:1.386294 9:1.386294 15:1.098612 14:1.098612 33:2.772588 22:1.386294 26:1.386294 24:1.386294 36:3.218876 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 7:12.476646 10:6.238323 6:4.852029 11:2.079441 18:4.394448 15:2.197224 13:1.098612 16:1.098612 17:1.098612 21:1.098612 19:1.098612 14:1.098612 +-1 29:1.386294 33:1.386294 45:1.791759 46:1.791759 219:2.639057 531:3.433987 1180:4.382027 3438:5.857933 18931:6.957497 18932:6.957497 18933:6.957497 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 10:2.079441 11:1.386294 8:1.386294 9:0.693147 18:1.098612 21:1.098612 13:1.098612 28:2.772588 33:2.772588 22:1.386294 42:1.609438 55:5.375277 +-1 2:0.000000 5:0.000000 1:0.000000 3:0.000000 4:0.000000 10:1.386294 12:0.693147 11:0.693147 7:0.693147 9:0.693147 18:2.197224 17:2.197224 25:4.158882 29:1.386294 33:1.386294 27:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 9:0.693147 16:1.098612 13:1.098612 15:1.098612 20:1.098612 26:2.772588 33:1.386294 24:1.386294 40:1.609438 55:3.583518 +-1 3:0.000000 1:0.000000 5:0.000000 8:0.693147 7:0.693147 11:0.693147 13:1.098612 15:1.098612 21:1.098612 28:2.772588 22:2.772588 29:1.386294 33:1.386294 26:1.386294 40:3.218876 39:3.218876 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 10:1.386294 12:0.693147 11:0.693147 13:1.098612 21:1.098612 14:1.098612 22:2.772588 31:2.772588 33:1.386294 28:1.386294 36:1.609438 +-1 1:0.000000 8:0.693147 15:1.098612 29:1.386294 27:1.386294 33:1.386294 81:3.891820 115:4.394450 108:2.197225 135:2.302585 173:2.484907 198:7.694847 202:2.564949 220:2.639057 307:2.833213 355:2.995732 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 6:11.090352 10:6.931470 8:2.772588 7:2.772588 11:1.386294 9:1.386294 12:0.693147 18:5.493060 21:4.394448 20:4.394448 13:3.295836 +-1 3:0.000000 1:0.000000 2:0.000000 10:0.693147 33:2.772588 22:1.386294 54:1.791759 55:1.791759 67:1.945910 82:1.945910 104:2.079442 192:2.484907 474:3.295837 486:3.332205 1600:4.753590 1784:4.875197 +-1 1:0.000000 3:0.000000 7:0.693147 18:1.098612 26:1.386294 24:1.386294 33:1.386294 39:1.609438 149:4.795790 514:6.802394 2791:11.136690 3501:5.857933 19098:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 6:0.693147 20:1.098612 33:1.386294 25:1.386294 43:1.609438 55:3.583518 54:1.791759 52:1.791759 56:1.791759 50:1.791759 +-1 3:0.000000 1:0.000000 7:1.386294 9:0.693147 21:1.098612 19:1.098612 14:1.098612 33:5.545176 26:4.158882 25:1.386294 27:1.386294 22:1.386294 38:1.609438 39:1.609438 35:1.609438 55:3.583518 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 6:0.693147 13:3.295836 14:1.098612 33:2.772588 29:1.386294 35:1.609438 34:1.609438 45:1.791759 46:1.791759 87:2.079442 184:2.484907 257:2.708050 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 9:1.386294 12:0.693147 7:0.693147 11:0.693147 21:1.098612 14:1.098612 28:4.158882 26:1.386294 24:1.386294 33:1.386294 40:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 11:4.158882 7:2.079441 6:2.079441 10:0.693147 12:0.693147 8:0.693147 19:2.197224 13:2.197224 16:1.098612 20:1.098612 21:1.098612 33:4.158882 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 10:2.079441 11:1.386294 6:1.386294 12:0.693147 7:0.693147 20:1.098612 17:1.098612 14:1.098612 33:1.386294 43:3.218876 38:3.218876 +-1 2:0.000000 1:0.000000 4:0.000000 12:1.386294 10:0.693147 18:1.098612 33:1.386294 55:5.375277 45:3.583518 54:1.791759 104:2.079442 151:2.397895 192:2.484907 293:2.772589 486:3.332205 1614:4.753590 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 12:0.693147 6:0.693147 11:0.693147 22:1.386294 33:1.386294 24:1.386294 36:1.609438 54:3.583518 55:1.791759 61:1.791759 68:1.945910 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 9:2.079441 11:1.386294 12:0.693147 8:0.693147 21:1.098612 19:1.098612 27:1.386294 31:1.386294 30:1.386294 24:1.386294 40:1.609438 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 24:1.386294 33:1.386294 41:4.828314 39:1.609438 35:1.609438 59:3.583518 156:2.397895 163:4.969814 166:2.484907 180:2.484907 256:2.708050 297:2.833213 +-1 3:0.000000 2:0.000000 4:0.000000 1:0.000000 5:0.000000 11:2.079441 7:0.693147 9:0.693147 12:0.693147 19:2.197224 13:1.098612 21:1.098612 25:4.158882 33:2.772588 31:1.386294 28:1.386294 +-1 2:0.000000 1:0.000000 4:0.000000 5:0.000000 8:1.386294 11:0.693147 18:3.295836 14:1.098612 30:1.386294 33:1.386294 38:1.609438 36:1.609438 41:1.609438 54:5.375277 48:1.791759 77:3.891820 +-1 2:0.000000 4:0.000000 5:0.000000 12:2.772588 6:2.079441 10:0.693147 11:0.693147 7:0.693147 17:1.098612 19:1.098612 13:1.098612 22:4.158882 33:2.772588 31:1.386294 26:1.386294 28:1.386294 +-1 1:0.000000 5:0.000000 3:0.000000 54:1.791759 55:1.791759 149:2.397895 199:2.564949 288:5.545178 300:2.833213 426:3.218876 1014:4.248495 2140:5.164786 2218:5.164786 2954:5.568345 19264:6.957497 19265:6.957497 +-1 1:0.000000 3:0.000000 5:0.000000 9:1.386294 10:1.386294 7:0.693147 18:1.098612 17:1.098612 28:1.386294 33:1.386294 60:1.791759 47:1.791759 81:5.837730 80:1.945910 74:1.945910 84:1.945910 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 3:0.000000 12:0.693147 19:1.098612 17:1.098612 31:1.386294 24:1.386294 22:1.386294 33:1.386294 55:3.583518 54:1.791759 45:1.791759 99:2.079442 +-1 2:0.000000 6:2.772588 10:0.693147 8:0.693147 21:2.197224 33:1.386294 42:3.218876 43:3.218876 60:5.375277 57:5.375277 49:3.583518 53:1.791759 50:1.791759 58:1.791759 79:7.783640 68:5.837730 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 19:1.098612 18:1.098612 33:1.386294 42:1.609438 43:1.609438 54:1.791759 62:1.791759 80:1.945910 86:2.079442 115:2.197225 +-1 10:1.386294 11:0.693147 6:0.693147 19:1.098612 14:1.098612 33:1.386294 31:1.386294 36:1.609438 55:1.791759 60:1.791759 51:1.791759 74:3.891820 104:2.079442 93:2.079442 98:2.079442 160:2.397895 +-1 4:0.000000 5:0.000000 12:0.693147 18:1.098612 55:3.583518 54:1.791759 79:1.945910 104:2.079442 192:2.484907 293:2.772589 705:3.761200 932:4.110874 1090:4.317488 1256:4.465908 1377:4.553877 19315:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 7:16.635528 10:2.079441 6:2.079441 8:2.079441 12:0.693147 11:0.693147 21:2.197224 19:1.098612 33:2.772588 27:2.772588 29:1.386294 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 7:6.238323 10:5.545176 9:2.772588 12:2.079441 6:1.386294 8:0.693147 16:10.986120 17:7.690284 19:1.098612 21:1.098612 18:1.098612 24:4.158882 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 7:6.931470 9:0.693147 12:0.693147 10:0.693147 8:0.693147 16:4.394448 14:1.098612 28:5.545176 27:2.772588 30:1.386294 33:1.386294 34:1.609438 +-1 3:0.000000 2:0.000000 1:0.000000 7:1.386294 8:0.693147 18:7.690284 16:1.098612 14:1.098612 33:2.772588 27:1.386294 28:1.386294 29:1.386294 38:1.609438 42:1.609438 35:1.609438 60:7.167036 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 12:0.693147 19:1.098612 13:1.098612 14:1.098612 31:1.386294 33:1.386294 24:1.386294 22:1.386294 35:1.609438 54:3.583518 55:3.583518 +-1 2:0.000000 5:0.000000 3:0.000000 4:0.000000 1:0.000000 7:4.158882 12:2.079441 9:1.386294 8:1.386294 11:0.693147 6:0.693147 16:2.197224 19:1.098612 14:1.098612 33:2.772588 38:1.609438 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 9:1.386294 6:0.693147 14:1.098612 18:1.098612 33:1.386294 28:1.386294 36:1.609438 43:1.609438 41:1.609438 53:10.750554 55:1.791759 47:1.791759 +-1 2:0.000000 3:0.000000 4:0.000000 1:0.000000 5:0.000000 12:0.693147 7:0.693147 11:0.693147 20:1.098612 21:1.098612 19:1.098612 14:1.098612 31:4.158882 24:2.772588 33:2.772588 26:1.386294 +-1 3:0.000000 2:0.000000 1:0.000000 4:0.000000 5:0.000000 11:1.386294 6:0.693147 10:0.693147 17:2.197224 19:1.098612 13:1.098612 21:1.098612 15:1.098612 18:1.098612 31:1.386294 24:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:2.079441 10:1.386294 12:0.693147 33:1.386294 55:3.583518 54:1.791759 68:1.945910 104:2.079442 90:2.079442 86:2.079442 113:2.197225 +-1 4:0.000000 1:0.000000 5:0.000000 6:6.931470 8:1.386294 9:1.386294 12:1.386294 10:0.693147 18:8.788896 16:7.690284 17:6.591672 14:2.197224 21:1.098612 20:1.098612 26:8.317764 33:5.545176 +-1 1:0.000000 3:0.000000 33:1.386294 37:3.218876 46:1.791759 55:1.791759 117:2.197225 188:4.969814 217:2.564949 639:3.688879 849:8.014666 853:4.007333 1452:9.307920 3039:11.136690 4786:12.526796 19521:6.957497 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 6:4.158882 11:2.079441 9:2.079441 12:0.693147 10:0.693147 16:3.295836 21:1.098612 29:2.772588 33:2.772588 32:1.386294 28:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 10:3.465735 11:1.386294 12:0.693147 9:0.693147 17:2.197224 14:1.098612 18:1.098612 29:2.772588 33:2.772588 25:1.386294 37:6.437752 +-1 3:0.000000 4:0.000000 1:0.000000 2:0.000000 5:0.000000 11:1.386294 10:1.386294 6:1.386294 12:0.693147 8:0.693147 17:1.098612 21:1.098612 29:2.772588 33:2.772588 25:1.386294 27:1.386294 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 9:1.386294 6:1.386294 12:0.693147 11:0.693147 17:1.098612 18:1.098612 29:2.772588 33:2.772588 28:1.386294 37:1.609438 +-1 2:0.000000 3:0.000000 4:0.000000 5:0.000000 1:0.000000 6:4.158882 11:1.386294 10:1.386294 12:0.693147 20:1.098612 19:1.098612 17:1.098612 21:1.098612 18:1.098612 29:2.772588 33:2.772588 +-1 1:0.000000 3:0.000000 33:1.386294 35:1.609438 99:2.079442 790:7.824046 927:4.110874 19614:13.914994 19615:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:5.545176 6:4.158882 11:3.465735 9:3.465735 12:2.079441 7:2.079441 18:9.887508 16:4.394448 15:2.197224 17:1.098612 26:5.545176 +-1 1:0.000000 2:0.000000 3:0.000000 4:0.000000 5:0.000000 11:1.386294 12:0.693147 9:0.693147 10:0.693147 19:1.098612 15:1.098612 17:1.098612 33:1.386294 40:4.828314 55:7.167036 59:3.583518 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 12:1.386294 6:1.386294 8:0.693147 9:0.693147 21:3.295836 19:1.098612 15:1.098612 18:1.098612 14:1.098612 33:2.772588 29:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:0.693147 11:0.693147 6:0.693147 9:0.693147 19:1.098612 18:1.098612 22:1.386294 33:1.386294 34:1.609438 54:3.583518 +-1 3:0.000000 1:0.000000 8:0.693147 11:0.693147 13:1.098612 19:1.098612 29:1.386294 33:1.386294 45:1.791759 46:1.791759 82:1.945910 124:2.197225 288:2.772589 310:2.890372 927:4.110874 1215:4.465908 +-1 3:0.000000 2:0.000000 4:0.000000 5:0.000000 1:0.000000 10:1.386294 12:1.386294 11:0.693147 33:1.386294 43:1.609438 55:5.375277 54:3.583518 48:3.583518 50:1.791759 80:1.945910 78:1.945910 +-1 2:0.000000 5:0.000000 4:0.000000 1:0.000000 3:0.000000 12:0.693147 6:0.693147 13:4.394448 21:1.098612 15:1.098612 20:1.098612 33:1.386294 34:1.609438 62:5.375277 55:3.583518 54:1.791759 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 1:0.000000 7:63.076377 12:11.783499 6:10.397205 10:4.158882 8:2.079441 11:1.386294 9:1.386294 20:18.676404 16:9.887508 18:4.394448 21:4.394448 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 12:2.079441 17:2.197224 20:1.098612 19:1.098612 13:1.098612 14:1.098612 31:1.386294 29:1.386294 33:1.386294 41:1.609438 55:7.167036 59:5.375277 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 11:0.693147 6:0.693147 7:0.693147 17:1.098612 15:1.098612 16:1.098612 14:1.098612 33:2.772588 22:1.386294 26:1.386294 34:1.609438 +-1 4:0.000000 1:0.000000 3:0.000000 2:0.000000 5:0.000000 11:2.079441 10:1.386294 8:1.386294 9:0.693147 7:0.693147 6:0.693147 13:1.098612 20:1.098612 29:1.386294 33:1.386294 22:1.386294 +-1 3:0.000000 1:0.000000 2:0.000000 4:0.000000 5:0.000000 12:1.386294 19:1.098612 31:1.386294 29:1.386294 33:1.386294 54:1.791759 55:1.791759 59:1.791759 82:1.945910 81:1.945910 102:2.079442 +-1 4:0.000000 1:0.000000 2:0.000000 5:0.000000 3:0.000000 12:1.386294 11:1.386294 8:0.693147 7:0.693147 17:3.295836 20:1.098612 19:1.098612 15:1.098612 16:1.098612 14:1.098612 28:6.931470 +-1 4:0.000000 3:0.000000 2:0.000000 1:0.000000 5:0.000000 8:4.158882 10:3.465735 12:1.386294 9:1.386294 11:0.693147 18:6.591672 17:3.295836 33:2.772588 29:2.772588 28:2.772588 31:1.386294 +-1 2:0.000000 4:0.000000 3:0.000000 1:0.000000 12:0.693147 19:1.098612 20:1.098612 14:1.098612 17:1.098612 31:1.386294 33:1.386294 41:1.609438 55:3.583518 99:2.079442 101:2.079442 102:2.079442 +-1 1:0.000000 2:0.000000 10:0.693147 11:0.693147 33:2.772588 26:1.386294 24:1.386294 54:3.583518 55:1.791759 77:1.945910 104:2.079442 192:2.484907 271:2.772589 355:2.995732 474:3.295837 486:3.332205 +-1 3:0.000000 5:0.000000 1:0.000000 2:0.000000 10:1.386294 11:0.693147 6:0.693147 14:1.098612 33:1.386294 34:1.609438 41:1.609438 54:1.791759 55:1.791759 60:1.791759 51:1.791759 56:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 11:0.693147 21:1.098612 19:1.098612 42:1.609438 122:2.197225 259:2.708050 253:2.708050 320:2.890372 354:5.991464 359:3.044522 383:3.091042 550:6.931472 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 11:0.693147 13:1.098612 14:1.098612 33:2.772588 22:1.386294 24:1.386294 28:1.386294 39:1.609438 41:1.609438 55:5.375277 62:1.791759 +-1 10:0.693147 11:0.693147 18:1.098612 42:1.609438 56:1.791759 75:3.891820 69:3.891820 74:1.945910 79:1.945910 98:4.158884 101:2.079442 161:4.795790 147:4.795790 164:2.484907 170:2.484907 180:2.484907 +-1 2:0.000000 9:2.079441 16:2.197224 14:1.098612 58:1.791759 71:1.945910 72:1.945910 78:1.945910 94:2.079442 138:6.907755 137:2.302585 161:21.581055 158:2.397895 163:2.484907 214:2.564949 239:2.639057 +-1 2:0.000000 9:0.693147 11:0.693147 16:2.197224 18:1.098612 14:1.098612 27:1.386294 43:1.609438 38:1.609438 50:1.791759 78:5.837730 79:3.891820 68:1.945910 101:2.079442 121:2.197225 138:6.907755 +-1 8:2.772588 16:2.197224 27:1.386294 28:1.386294 38:3.218876 79:3.891820 78:1.945910 68:1.945910 67:1.945910 98:2.079442 94:2.079442 117:2.197225 120:2.197225 127:2.197225 161:21.581055 144:4.795790 +-1 2:0.000000 1:0.000000 3:0.000000 11:0.693147 10:0.693147 19:1.098612 18:1.098612 14:1.098612 33:1.386294 31:1.386294 24:1.386294 43:1.609438 36:1.609438 50:1.791759 58:1.791759 54:1.791759 +-1 3:0.000000 1:0.000000 2:0.000000 5:0.000000 4:0.000000 10:1.386294 8:0.693147 7:0.693147 15:2.197224 21:2.197224 16:1.098612 14:1.098612 31:6.931470 24:5.545176 28:4.158882 33:2.772588 +-1 2:0.000000 1:0.000000 3:0.000000 5:0.000000 4:0.000000 10:2.079441 11:1.386294 18:1.098612 33:1.386294 31:1.386294 25:1.386294 30:1.386294 43:3.218876 54:8.958795 55:7.167036 61:1.791759 +-1 1:0.000000 3:0.000000 10:0.693147 11:0.693147 8:0.693147 17:1.098612 40:1.609438 131:2.302585 178:2.484907 172:2.484907 262:2.772589 347:2.995732 459:3.258097 470:3.295837 514:6.802394 662:3.737670 +-1 5:0.000000 1:0.000000 3:0.000000 4:0.000000 2:0.000000 12:0.693147 13:1.098612 14:1.098612 33:2.772588 24:1.386294 41:1.609438 54:5.375277 55:3.583518 68:1.945910 104:2.079442 328:2.944439 +-1 2:0.000000 4:0.000000 3:0.000000 5:0.000000 1:0.000000 12:2.079441 11:1.386294 7:0.693147 18:2.197224 19:1.098612 15:1.098612 17:1.098612 24:4.158882 33:1.386294 31:1.386294 26:1.386294 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 10:4.158882 7:4.158882 12:3.465735 6:2.772588 11:1.386294 21:6.591672 20:4.394448 19:2.197224 13:1.098612 14:1.098612 27:5.545176 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 7:2.079441 12:1.386294 11:1.386294 6:1.386294 10:0.693147 18:2.197224 16:1.098612 30:4.158882 26:2.772588 33:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 33:1.386294 99:2.079442 2670:16.705035 5666:6.263398 +-1 5:0.000000 4:0.000000 2:0.000000 1:0.000000 3:0.000000 6:2.079441 11:0.693147 12:0.693147 13:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 24:1.386294 34:1.609438 43:1.609438 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 5:0.000000 6:4.852029 11:1.386294 10:1.386294 8:1.386294 9:0.693147 18:2.197224 16:1.098612 21:1.098612 19:1.098612 14:1.098612 33:5.545176 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 5:0.000000 12:0.693147 8:0.693147 19:1.098612 16:1.098612 17:1.098612 24:2.772588 33:2.772588 31:1.386294 29:1.386294 35:1.609438 54:3.583518 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:1.386294 19:1.098612 13:1.098612 31:1.386294 22:1.386294 33:1.386294 27:1.386294 24:1.386294 55:3.583518 54:1.791759 45:1.791759 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 12:1.386294 9:0.693147 11:0.693147 19:1.098612 18:1.098612 31:1.386294 29:1.386294 33:1.386294 40:1.609438 41:1.609438 54:3.583518 45:1.791759 +-1 2:0.000000 1:0.000000 3:0.000000 4:0.000000 8:1.386294 10:0.693147 11:0.693147 12:0.693147 16:1.098612 29:1.386294 33:1.386294 40:1.609438 54:3.583518 55:1.791759 76:1.945910 75:1.945910 +-1 1:0.000000 3:0.000000 11:1.386294 33:1.386294 38:1.609438 189:2.484907 232:2.639057 266:2.772589 335:2.944439 404:3.135494 417:3.178054 432:3.218876 439:3.258097 660:3.688879 1008:4.174387 1015:4.248495 +-1 3:0.000000 1:0.000000 5:0.000000 2:0.000000 12:0.693147 6:0.693147 20:1.098612 14:1.098612 22:1.386294 33:1.386294 24:1.386294 40:1.609438 43:1.609438 41:1.609438 60:5.375277 54:1.791759 +-1 3:0.000000 4:0.000000 2:0.000000 5:0.000000 1:0.000000 11:2.079441 8:0.693147 10:0.693147 12:0.693147 21:3.295836 18:1.098612 24:13.862940 28:1.386294 22:1.386294 37:3.218876 42:1.609438 +-1 1:0.000000 13:2.197224 46:1.791759 314:2.890372 403:3.135494 466:3.295837 639:3.688879 3054:5.568345 3440:11.715866 +-1 2:0.000000 4:0.000000 6:2.772588 10:0.693147 12:0.693147 11:0.693147 13:1.098612 33:1.386294 27:1.386294 24:1.386294 34:4.828314 38:4.828314 40:3.218876 60:3.583518 51:3.583518 61:3.583518 +-1 1:0.000000 3:0.000000 9:1.386294 7:0.693147 14:1.098612 24:1.386294 33:1.386294 37:1.609438 81:3.891820 78:3.891820 99:2.079442 101:2.079442 115:6.591675 120:2.197225 121:2.197225 117:2.197225 +-1 3:0.000000 7:0.693147 8:0.693147 33:1.386294 40:1.609438 80:1.945910 112:2.197225 123:2.197225 149:2.397895 159:2.397895 174:2.484907 1051:4.248495 3353:5.857933 5167:6.263398 4809:6.263398 20276:13.914994 +-1 1:0.000000 2:0.000000 4:0.000000 5:0.000000 13:1.098612 35:1.609438 54:1.791759 104:2.079442 189:2.484907 375:3.044522 466:3.295837 474:3.295837 711:3.761200 879:4.060443 891:4.060443 1829:5.010635 +-1 3:0.000000 1:0.000000 47:1.791759 119:2.197225 122:2.197225 313:2.890372 395:6.270988 575:3.555348 644:3.688879 785:3.912023 3424:5.857933 3418:5.857933 6061:6.263398 20291:6.957497 20292:6.957497 +-1 4:0.000000 2:0.000000 5:0.000000 12:0.693147 9:0.693147 19:1.098612 17:1.098612 14:1.098612 31:1.386294 26:1.386294 33:1.386294 35:1.609438 55:7.167036 54:3.583518 84:1.945910 78:1.945910 +-1 1:0.000000 3:0.000000 2:0.000000 4:0.000000 5:0.000000 8:0.693147 12:0.693147 13:2.197224 19:1.098612 17:1.098612 14:1.098612 31:1.386294 22:1.386294 33:1.386294 41:1.609438 55:3.583518 +-1 1:0.000000 33:1.386294 173:2.484907 229:5.278114 242:2.708050 468:3.295837 4025:11.715866 6186:12.526796 20301:6.957497 20302:6.957497 20303:6.957497 20304:6.957497 +-1 1:0.000000 2:0.000000 4:0.000000 3:0.000000 5:0.000000 12:0.693147 10:0.693147 11:0.693147 9:0.693147 19:2.197224 17:2.197224 18:1.098612 14:1.098612 31:2.772588 33:1.386294 40:1.609438 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:1.386294 10:0.693147 9:0.693147 18:2.197224 33:2.772588 29:1.386294 25:1.386294 55:5.375277 54:3.583518 59:1.791759 62:1.791759 +-1 2:0.000000 4:0.000000 1:0.000000 3:0.000000 8:1.386294 17:1.098612 18:1.098612 24:4.158882 33:1.386294 27:1.386294 43:1.609438 54:1.791759 55:1.791759 57:1.791759 104:2.079442 97:2.079442 +-1 5:0.000000 1:0.000000 3:0.000000 12:0.693147 14:1.098612 29:1.386294 26:1.386294 30:1.386294 41:1.609438 55:1.791759 109:6.591675 126:4.394450 111:2.197225 132:2.302585 129:2.302585 153:2.397895 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 12:2.079441 6:1.386294 8:0.693147 10:0.693147 19:2.197224 16:2.197224 20:1.098612 30:2.772588 31:1.386294 33:1.386294 25:1.386294 +-1 1:0.000000 3:0.000000 4:0.000000 5:0.000000 2:0.000000 9:1.386294 8:0.693147 10:0.693147 18:2.197224 29:1.386294 33:1.386294 30:1.386294 41:1.609438 55:5.375277 54:3.583518 49:3.583518 +-1 2:0.000000 3:0.000000 1:0.000000 4:0.000000 5:0.000000 7:0.693147 9:0.693147 8:0.693147 18:5.493060 14:1.098612 30:2.772588 26:1.386294 33:1.386294 38:1.609438 36:1.609438 41:1.609438 +-1 1:0.000000 3:0.000000 7:0.693147 17:1.098612 50:1.791759 53:1.791759 71:5.837730 81:1.945910 120:4.394450 117:2.197225 160:2.397895 161:2.397895 189:2.484907 210:2.564949 277:2.772589 297:2.833213 +-1 2:0.000000 5:0.000000 6:9.010911 8:3.465735 10:1.386294 11:0.693147 9:0.693147 7:0.693147 21:2.197224 20:2.197224 17:1.098612 16:1.098612 28:2.772588 27:2.772588 33:1.386294 25:1.386294 +-1 1:0.000000 5:0.000000 3:0.000000 9:0.693147 15:2.197224 17:2.197224 14:2.197224 13:1.098612 35:1.609438 55:3.583518 62:1.791759 47:1.791759 83:1.945910 99:2.079442 103:2.079442 97:2.079442 +-1 148:2.397895 1482:4.753590 20496:6.957497 +-1 1:0.000000 8:0.693147 13:1.098612 33:2.772588 29:1.386294 41:1.609438 55:3.583518 54:1.791759 153:4.795790 301:2.833213 360:3.044522 359:3.044522 388:3.091042 415:3.178054 682:3.737670 806:3.912023 +-1 2:0.000000 1:0.000000 4:0.000000 3:0.000000 10:4.158882 11:1.386294 6:1.386294 8:0.693147 12:0.693147 16:2.197224 19:1.098612 17:1.098612 18:1.098612 14:1.098612 25:2.772588 27:1.386294 +-1 1:0.000000 3:0.000000 2:0.000000 5:0.000000 10:2.079441 12:0.693147 11:0.693147 20:1.098612 33:1.386294 40:1.609438 36:1.609438 49:5.375277 55:3.583518 54:1.791759 60:1.791759 68:3.891820 +-1 1:0.000000 3:0.000000 8:0.693147 13:2.197224 19:1.098612 33:2.772588 29:1.386294 62:5.375277 55:3.583518 45:3.583518 82:1.945910 104:2.079442 148:2.397895 293:2.772589 362:3.044522 414:3.178054 +-1 2:0.000000 4:0.000000 5:0.000000 1:0.000000 3:0.000000 6:2.079441 10:1.386294 11:1.386294 7:1.386294 12:0.693147 9:0.693147 13:1.098612 18:1.098612 26:5.545176 30:2.772588 24:1.386294 +-1 5:0.000000 1:0.000000 3:0.000000 2:0.000000 4:0.000000 12:0.693147 54:3.583518 55:3.583518 104:2.079442 160:2.397895 192:2.484907 220:2.639057 293:2.772589 555:3.496508 791:3.912023 790:3.912023 +-1 8:1.386294 19:1.098612 97:2.079442 160:2.397895 371:3.044522 721:3.806662 942:4.110874 3665:5.857933 5914:6.263398 20550:6.957497 20551:6.957497 20552:6.957497 20553:6.957497 +-1 2:0.000000 4:0.000000 5:0.000000 3:0.000000 10:2.079441 6:2.079441 11:0.693147 9:0.693147 7:0.693147 20:3.295836 16:1.098612 14:1.098612 25:8.317764 27:4.158882 22:2.772588 33:2.772588 +-1 2:0.000000 5:0.000000 3:0.000000 6:9.704058 10:2.772588 9:1.386294 20:4.394448 17:1.098612 15:1.098612 14:1.098612 22:2.772588 33:2.772588 34:1.609438 35:1.609438 54:8.958795 53:5.375277 +-1 2:0.000000 5:0.000000 4:0.000000 3:0.000000 6:2.772588 10:2.772588 11:0.693147 8:0.693147 20:10.986120 16:2.197224 21:2.197224 18:1.098612 14:1.098612 25:13.862940 33:4.158882 22:2.772588 +-1 2:0.000000 4:0.000000 5:0.000000 12:0.693147 6:0.693147 8:0.693147 19:2.197224 21:1.098612 16:1.098612 28:1.386294 39:6.437752 36:1.609438 64:1.791759 48:1.791759 53:1.791759 59:1.791759 +-1 89:4.158884 382:3.091042 1372:4.553877 6246:6.263398 6247:6.263398 +-1 89:4.158884 382:3.091042 1372:4.553877 6246:6.263398 6247:6.263398 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/1000.txt b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/1000.txt new file mode 100644 index 00000000..c7a4795f --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/1000.txt @@ -0,0 +1,1000 @@ +1 0 0 +2 1 0 +3 0 0 +4 0 0 +5 0 0 +6 1 0 +7 0 0 +8 0 0 +9 1 0 +10 1 0 +11 0 0 +12 0 0 +13 0 0 +14 0 0 +15 0 0 +16 1 0 +17 0 0 +18 0 0 +19 1 0 +20 0 0 +21 1 0 +22 0 0 +23 1 0 +24 0 0 +25 1 0 +26 0 0 +27 0 0 +28 0 0 +29 0 0 +30 0 0 +31 0 0 +32 0 0 +33 0 0 +34 0 0 +35 0 0 +36 1 0 +37 0 0 +38 1 0 +39 0 0 +40 1 0 +41 0 0 +42 0 0 +43 0 0 +44 0 0 +45 0 0 +46 0 0 +47 1 0 +48 0 0 +49 0 0 +50 1 0 +51 1 0 +52 1 0 +53 0 0 +54 0 0 +55 0 0 +56 0 0 +57 0 0 +58 0 0 +59 0 0 +60 0 0 +61 0 0 +62 0 0 +63 0 0 +64 0 0 +65 0 0 +66 0 0 +67 0 0 +68 0 0 +69 1 0 +70 0 0 +71 0 0 +72 0 0 +73 1 0 +74 0 0 +75 0 0 +76 0 0 +77 0 0 +78 0 0 +79 0 0 +80 0 0 +81 0 0 +82 0 0 +83 0 0 +84 0 0 +85 0 0 +86 0 0 +87 0 0 +88 0 0 +89 1 1 +90 0 0 +91 0 0 +92 0 0 +93 0 0 +94 0 0 +95 0 0 +96 0 0 +97 0 0 +98 0 0 +99 0 0 +100 0 0 +101 0 0 +102 0 0 +103 0 0 +104 0 0 +105 0 0 +106 0 0 +107 0 0 +108 0 0 +109 0 0 +110 1 0 +111 0 0 +112 0 0 +113 0 0 +114 0 0 +115 0 0 +116 1 0 +117 0 0 +118 0 0 +119 0 0 +120 0 0 +121 0 0 +122 0 0 +123 0 0 +124 0 0 +125 0 0 +126 0 0 +127 0 0 +128 0 0 +129 0 0 +130 0 0 +131 0 0 +132 0 0 +133 0 0 +134 0 0 +135 0 0 +136 0 0 +137 0 0 +138 0 0 +139 0 0 +140 0 0 +141 0 0 +142 0 0 +143 0 0 +144 0 0 +145 0 0 +146 0 0 +147 1 0 +148 0 0 +149 0 0 +150 0 0 +151 0 0 +152 0 0 +153 0 0 +154 0 0 +155 0 0 +156 0 0 +157 0 0 +158 0 0 +159 0 0 +160 0 0 +161 1 0 +162 0 0 +163 0 0 +164 0 0 +165 0 0 +166 0 0 +167 0 0 +168 0 0 +169 0 0 +170 0 0 +171 0 0 +172 0 0 +173 0 0 +174 0 0 +175 0 0 +176 0 0 +177 0 0 +178 0 0 +179 0 0 +180 0 0 +181 0 0 +182 0 0 +183 0 0 +184 0 0 +185 0 0 +186 0 0 +187 0 0 +188 0 0 +189 0 0 +190 0 0 +191 0 0 +192 0 0 +193 0 0 +194 0 0 +195 0 0 +196 0 0 +197 0 0 +198 0 0 +199 0 0 +200 0 0 +201 0 0 +202 0 0 +203 0 0 +204 0 0 +205 0 0 +206 0 0 +207 0 0 +208 0 0 +209 0 0 +210 0 0 +211 0 0 +212 0 0 +213 0 0 +214 0 0 +215 0 0 +216 0 0 +217 0 0 +218 0 0 +219 0 0 +220 0 0 +221 0 0 +222 0 0 +223 0 0 +224 0 0 +225 0 0 +226 0 0 +227 0 0 +228 0 0 +229 0 0 +230 0 0 +231 0 0 +232 0 0 +233 0 0 +234 0 0 +235 0 0 +236 0 0 +237 0 0 +238 0 0 +239 0 0 +240 0 0 +241 0 0 +242 1 0 +243 0 0 +244 0 0 +245 0 0 +246 0 0 +247 0 0 +248 0 0 +249 0 0 +250 0 0 +251 0 0 +252 0 0 +253 0 0 +254 0 0 +255 0 0 +256 0 0 +257 0 0 +258 0 0 +259 0 0 +260 0 0 +261 0 0 +262 0 0 +263 0 0 +264 0 0 +265 0 0 +266 0 0 +267 0 0 +268 0 0 +269 0 0 +270 0 0 +271 0 0 +272 0 0 +273 0 0 +274 0 0 +275 0 0 +276 0 0 +277 0 0 +278 0 0 +279 0 0 +280 0 0 +281 0 0 +282 0 0 +283 0 0 +284 0 0 +285 0 0 +286 0 0 +287 0 0 +288 0 0 +289 0 0 +290 0 0 +291 0 0 +292 0 0 +293 0 0 +294 0 0 +295 0 0 +296 0 0 +297 0 0 +298 0 0 +299 0 0 +300 0 0 +301 0 0 +302 1 0 +303 0 0 +304 0 0 +305 0 0 +306 0 0 +307 0 0 +308 0 0 +309 0 0 +310 0 0 +311 0 0 +312 0 0 +313 0 0 +314 0 0 +315 0 0 +316 0 0 +317 0 0 +318 0 0 +319 0 0 +320 0 0 +321 0 0 +322 0 0 +323 0 0 +324 0 0 +325 0 0 +326 0 0 +327 0 0 +328 0 0 +329 0 0 +330 0 0 +331 0 0 +332 0 0 +333 0 0 +334 0 0 +335 0 0 +336 0 0 +337 0 0 +338 0 0 +339 1 0 +340 0 0 +341 0 0 +342 0 0 +343 0 0 +344 0 0 +345 0 0 +346 0 0 +347 0 0 +348 0 0 +349 0 0 +350 0 0 +351 0 0 +352 0 0 +353 0 0 +354 0 0 +355 0 0 +356 0 0 +357 0 0 +358 0 0 +359 0 0 +360 0 0 +361 0 0 +362 0 0 +363 0 0 +364 0 0 +365 0 0 +366 0 0 +367 0 0 +368 0 0 +369 0 0 +370 0 0 +371 0 0 +372 1 0 +373 0 0 +374 0 0 +375 0 0 +376 0 0 +377 0 0 +378 0 0 +379 0 0 +380 0 0 +381 0 0 +382 0 1 +383 0 0 +384 0 0 +385 0 0 +386 0 0 +387 0 0 +388 0 0 +389 0 0 +390 0 0 +391 0 0 +392 0 0 +393 0 0 +394 0 0 +395 0 0 +396 0 0 +397 0 0 +398 0 0 +399 0 0 +400 0 0 +401 0 0 +402 0 0 +403 0 0 +404 0 0 +405 1 0 +406 0 0 +407 0 0 +408 0 0 +409 0 0 +410 0 0 +411 0 0 +412 0 0 +413 0 0 +414 0 0 +415 0 0 +416 0 0 +417 0 0 +418 0 0 +419 0 0 +420 0 0 +421 0 0 +422 0 0 +423 0 0 +424 0 0 +425 0 0 +426 0 0 +427 0 0 +428 0 0 +429 0 0 +430 0 0 +431 0 0 +432 0 0 +433 0 0 +434 0 0 +435 0 0 +436 0 0 +437 0 0 +438 0 0 +439 0 0 +440 0 0 +441 0 0 +442 0 0 +443 0 0 +444 0 0 +445 0 0 +446 0 0 +447 0 0 +448 0 0 +449 0 0 +450 0 0 +451 0 0 +452 0 0 +453 0 0 +454 0 0 +455 0 0 +456 0 0 +457 0 0 +458 0 0 +459 0 0 +460 0 0 +461 0 0 +462 0 0 +463 0 0 +464 0 0 +465 0 0 +466 0 0 +467 0 0 +468 0 0 +469 0 0 +470 0 0 +471 0 0 +472 0 0 +473 0 0 +474 0 0 +475 0 0 +476 0 0 +477 0 0 +478 0 0 +479 0 0 +480 0 0 +481 0 0 +482 0 0 +483 0 0 +484 0 0 +485 0 0 +486 0 0 +487 0 0 +488 0 0 +489 0 0 +490 0 0 +491 0 0 +492 0 0 +493 0 0 +494 0 0 +495 0 0 +496 0 0 +497 0 0 +498 0 0 +499 0 0 +500 0 0 +501 0 0 +502 0 0 +503 0 0 +504 0 0 +505 0 0 +506 0 0 +507 0 0 +508 0 0 +509 0 0 +510 0 0 +511 0 0 +512 0 0 +513 0 0 +514 1 0 +515 0 0 +516 0 0 +517 0 0 +518 0 0 +519 0 0 +520 0 0 +521 0 0 +522 0 0 +523 0 0 +524 0 0 +525 0 0 +526 0 0 +527 0 0 +528 0 0 +529 0 0 +530 0 0 +531 0 0 +532 0 0 +533 0 0 +534 0 0 +535 0 0 +536 0 0 +537 0 0 +538 0 0 +539 0 0 +540 1 0 +541 0 0 +542 0 0 +543 0 0 +544 0 0 +545 0 0 +546 1 0 +547 0 0 +548 0 0 +549 0 0 +550 0 0 +551 0 0 +552 0 0 +553 0 0 +554 0 0 +555 0 0 +556 0 0 +557 0 0 +558 0 0 +559 0 0 +560 0 0 +561 0 0 +562 0 0 +563 0 0 +564 0 0 +565 0 0 +566 0 0 +567 0 0 +568 0 0 +569 0 0 +570 0 0 +571 0 0 +572 0 0 +573 0 0 +574 0 0 +575 0 0 +576 0 0 +577 0 0 +578 0 0 +579 0 0 +580 0 0 +581 0 0 +582 0 0 +583 0 0 +584 0 0 +585 0 0 +586 0 0 +587 0 0 +588 0 0 +589 0 0 +590 0 0 +591 0 0 +592 0 0 +593 0 0 +594 0 0 +595 0 0 +596 0 0 +597 0 0 +598 0 0 +599 0 0 +600 0 0 +601 0 0 +602 0 0 +603 0 0 +604 0 0 +605 0 0 +606 0 0 +607 0 0 +608 0 0 +609 0 0 +610 0 0 +611 0 0 +612 0 0 +613 0 0 +614 0 0 +615 0 0 +616 0 0 +617 0 0 +618 0 0 +619 0 0 +620 0 0 +621 1 0 +622 0 0 +623 0 0 +624 0 0 +625 0 0 +626 0 0 +627 0 0 +628 0 0 +629 0 0 +630 0 0 +631 0 0 +632 0 0 +633 0 0 +634 0 0 +635 0 0 +636 0 0 +637 0 0 +638 0 0 +639 0 0 +640 0 0 +641 0 0 +642 0 0 +643 0 0 +644 0 0 +645 0 0 +646 0 0 +647 0 0 +648 0 0 +649 0 0 +650 0 0 +651 0 0 +652 0 0 +653 0 0 +654 0 0 +655 0 0 +656 0 0 +657 0 0 +658 0 0 +659 0 0 +660 0 0 +661 0 0 +662 0 0 +663 0 0 +664 0 0 +665 0 0 +666 0 0 +667 0 0 +668 0 0 +669 0 0 +670 0 0 +671 0 0 +672 0 0 +673 0 0 +674 0 0 +675 0 0 +676 0 0 +677 0 0 +678 0 0 +679 0 0 +680 0 0 +681 0 0 +682 0 0 +683 0 0 +684 0 0 +685 0 0 +686 0 0 +687 0 0 +688 0 0 +689 0 0 +690 0 0 +691 0 0 +692 0 0 +693 0 0 +694 0 0 +695 0 0 +696 0 0 +697 0 0 +698 0 0 +699 0 0 +700 0 0 +701 0 0 +702 0 0 +703 0 0 +704 0 0 +705 0 0 +706 0 0 +707 0 0 +708 0 0 +709 0 0 +710 0 0 +711 0 0 +712 0 0 +713 0 0 +714 0 0 +715 0 0 +716 0 0 +717 0 0 +718 0 0 +719 0 0 +720 0 0 +721 0 0 +722 0 0 +723 0 0 +724 0 0 +725 0 0 +726 0 0 +727 0 0 +728 0 0 +729 0 0 +730 0 0 +731 0 0 +732 0 0 +733 0 0 +734 0 0 +735 0 0 +736 0 0 +737 0 0 +738 0 0 +739 0 0 +740 0 0 +741 0 0 +742 0 0 +743 0 0 +744 0 0 +745 0 0 +746 0 0 +747 0 0 +748 0 0 +749 0 0 +750 0 0 +751 0 0 +752 0 0 +753 0 0 +754 0 0 +755 0 0 +756 0 0 +757 0 0 +758 0 0 +759 0 0 +760 0 0 +761 0 0 +762 0 0 +763 0 0 +764 0 0 +765 0 0 +766 0 0 +767 0 0 +768 0 0 +769 0 0 +770 0 0 +771 0 0 +772 0 0 +773 0 0 +774 0 0 +775 0 0 +776 0 0 +777 0 0 +778 0 0 +779 0 0 +780 0 0 +781 0 0 +782 0 0 +783 0 0 +784 0 0 +785 0 0 +786 0 0 +787 0 0 +788 0 0 +789 0 0 +790 0 0 +791 0 0 +792 0 0 +793 0 0 +794 0 0 +795 0 0 +796 0 0 +797 0 0 +798 0 0 +799 0 0 +800 0 0 +801 0 0 +802 0 0 +803 0 0 +804 0 0 +805 0 0 +806 0 0 +807 0 0 +808 0 0 +809 0 0 +810 0 0 +811 0 0 +812 0 0 +813 0 0 +814 0 0 +815 0 0 +816 0 0 +817 0 0 +818 0 0 +819 0 0 +820 0 0 +821 0 0 +822 0 0 +823 0 0 +824 0 0 +825 0 0 +826 0 0 +827 0 0 +828 0 0 +829 0 0 +830 0 0 +831 0 0 +832 0 0 +833 0 0 +834 0 0 +835 0 0 +836 0 0 +837 0 0 +838 0 0 +839 0 0 +840 0 0 +841 0 0 +842 0 0 +843 0 0 +844 0 0 +845 0 0 +846 0 0 +847 0 0 +848 0 0 +849 0 0 +850 0 0 +851 0 0 +852 0 0 +853 0 0 +854 0 0 +855 0 0 +856 0 0 +857 0 0 +858 0 0 +859 0 0 +860 0 0 +861 0 0 +862 0 0 +863 0 0 +864 0 0 +865 0 0 +866 0 0 +867 0 0 +868 0 0 +869 0 0 +870 0 0 +871 0 0 +872 0 0 +873 0 0 +874 0 0 +875 0 0 +876 0 0 +877 0 0 +878 0 0 +879 0 0 +880 0 0 +881 0 0 +882 0 0 +883 1 0 +884 0 0 +885 0 0 +886 0 0 +887 0 0 +888 0 0 +889 0 0 +890 0 0 +891 0 0 +892 0 0 +893 0 0 +894 0 0 +895 0 0 +896 0 0 +897 0 0 +898 0 0 +899 0 0 +900 0 0 +901 0 0 +902 0 0 +903 0 0 +904 0 0 +905 0 0 +906 0 0 +907 0 0 +908 0 0 +909 0 0 +910 0 0 +911 0 0 +912 0 0 +913 0 0 +914 0 0 +915 0 0 +916 0 0 +917 0 0 +918 0 0 +919 0 0 +920 0 0 +921 0 0 +922 0 0 +923 0 0 +924 0 0 +925 0 0 +926 0 0 +927 0 0 +928 0 0 +929 0 0 +930 0 0 +931 0 0 +932 0 0 +933 0 0 +934 0 0 +935 0 0 +936 0 0 +937 0 0 +938 0 0 +939 0 0 +940 0 0 +941 0 0 +942 0 0 +943 0 0 +944 0 0 +945 0 0 +946 0 0 +947 0 0 +948 0 0 +949 0 0 +950 0 0 +951 0 0 +952 0 0 +953 0 0 +954 0 0 +955 0 0 +956 0 0 +957 0 0 +958 0 0 +959 0 0 +960 0 0 +961 0 0 +962 0 0 +963 0 0 +964 0 0 +965 0 0 +966 0 0 +967 0 0 +968 0 0 +969 0 0 +970 0 0 +971 0 0 +972 0 0 +973 0 0 +974 0 0 +975 0 0 +976 0 0 +977 0 0 +978 0 0 +979 0 0 +980 0 0 +981 0 0 +982 0 0 +983 0 0 +984 0 0 +985 0 0 +986 0 0 +987 0 0 +988 1 0 +989 0 0 +990 0 0 +991 0 0 +992 0 0 +993 0 0 +994 0 0 +995 0 0 +996 0 0 +997 0 0 +998 0 0 +999 0 0 +1000 0 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/15.txt b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/15.txt new file mode 100644 index 00000000..19166889 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/data/selector/wordlist/15.txt @@ -0,0 +1,1001 @@ +1 0 0 +2 1 0 +3 0 0 +4 0 0 +5 0 0 +6 1 0 +7 0 0 +8 0 0 +9 1 0 +10 1 0 +11 0 0 +12 0 0 +13 0 0 +14 0 0 +15 0 0 +16 1 0 +17 0 0 +18 0 0 +19 1 0 +20 0 0 +21 1 0 +22 0 0 +23 1 0 +24 0 0 +25 1 0 +26 0 0 +27 0 0 +28 0 0 +29 0 0 +30 0 0 +31 0 0 +32 0 0 +33 0 0 +34 0 0 +35 0 0 +36 1 0 +37 0 0 +38 1 0 +39 0 0 +40 1 0 +41 0 0 +42 0 0 +43 0 0 +44 0 0 +45 0 0 +46 0 0 +47 1 0 +48 0 0 +49 0 0 +50 1 0 +51 1 0 +52 1 0 +53 0 0 +54 0 0 +55 0 0 +56 0 0 +57 0 0 +58 0 0 +59 0 0 +60 0 0 +61 0 0 +62 0 0 +63 0 0 +64 0 0 +65 0 0 +66 0 0 +67 0 0 +68 0 0 +69 1 0 +70 0 0 +71 0 0 +72 0 0 +73 1 0 +74 0 0 +75 0 0 +76 0 0 +77 0 0 +78 0 0 +79 0 0 +80 0 0 +81 0 0 +82 0 0 +83 0 0 +84 0 0 +85 0 0 +86 0 0 +87 0 0 +88 0 0 +89 1 1 +90 0 0 +91 0 0 +92 0 0 +93 0 0 +94 0 0 +95 0 0 +96 0 0 +97 0 0 +98 0 0 +99 0 0 +100 0 0 +101 0 0 +102 0 0 +103 0 0 +104 0 0 +105 0 0 +106 0 0 +107 0 0 +108 0 0 +109 0 0 +110 1 0 +111 0 0 +112 0 0 +113 0 0 +114 0 0 +115 0 0 +116 1 0 +117 0 0 +118 0 0 +119 0 0 +120 0 0 +121 0 0 +122 0 0 +123 0 0 +124 0 0 +125 0 0 +126 0 0 +127 0 0 +128 0 0 +129 0 0 +130 0 0 +131 0 0 +132 0 0 +133 0 0 +134 0 0 +135 0 0 +136 0 0 +137 0 0 +138 0 0 +139 0 0 +140 0 0 +141 0 0 +142 0 0 +143 0 0 +144 0 0 +145 0 0 +146 0 0 +147 1 0 +148 0 0 +149 0 0 +150 0 0 +151 0 0 +152 0 0 +153 0 0 +154 0 0 +155 0 0 +156 0 0 +157 0 0 +158 0 0 +159 0 0 +160 0 0 +161 1 0 +162 0 0 +163 0 0 +164 0 0 +165 0 0 +166 0 0 +167 0 0 +168 0 0 +169 0 0 +170 0 0 +171 0 0 +172 0 0 +173 0 0 +174 0 0 +175 0 0 +176 0 0 +177 0 0 +178 0 0 +179 0 0 +180 0 0 +181 0 0 +182 0 0 +183 0 0 +184 0 0 +185 0 0 +186 0 0 +187 0 0 +188 0 0 +189 0 0 +190 0 0 +191 0 0 +192 0 0 +193 0 0 +194 0 0 +195 0 0 +196 0 0 +197 0 0 +198 0 0 +199 0 0 +200 0 0 +201 0 0 +202 0 0 +203 0 0 +204 0 0 +205 0 0 +206 0 0 +207 0 0 +208 0 0 +209 0 0 +210 0 0 +211 0 0 +212 0 0 +213 0 0 +214 0 0 +215 0 0 +216 0 0 +217 0 0 +218 0 0 +219 0 0 +220 0 0 +221 0 0 +222 0 0 +223 0 0 +224 0 0 +225 0 0 +226 0 0 +227 0 0 +228 0 0 +229 0 0 +230 0 0 +231 0 0 +232 0 0 +233 0 0 +234 0 0 +235 0 0 +236 0 0 +237 0 0 +238 0 0 +239 0 0 +240 0 0 +241 0 0 +242 1 0 +243 0 0 +244 0 0 +245 0 0 +246 0 0 +247 0 0 +248 0 0 +249 0 0 +250 0 0 +251 0 0 +252 0 0 +253 0 0 +254 0 0 +255 0 0 +256 0 0 +257 0 0 +258 0 0 +259 0 0 +260 0 0 +261 0 0 +262 0 0 +263 0 0 +264 0 0 +265 0 0 +266 0 0 +267 0 0 +268 0 0 +269 0 0 +270 0 0 +271 0 0 +272 0 0 +273 0 0 +274 0 0 +275 0 0 +276 0 0 +277 0 0 +278 0 0 +279 0 0 +280 0 0 +281 0 0 +282 0 0 +283 0 0 +284 0 0 +285 0 0 +286 0 0 +287 0 0 +288 0 0 +289 0 0 +290 0 0 +291 0 0 +292 0 0 +293 0 0 +294 0 0 +295 0 0 +296 0 0 +297 0 0 +298 0 0 +299 0 0 +300 0 0 +301 0 0 +302 1 0 +303 0 0 +304 0 0 +305 0 0 +306 0 0 +307 0 0 +308 0 0 +309 0 0 +310 0 0 +311 0 0 +312 0 0 +313 0 0 +314 0 0 +315 0 0 +316 0 0 +317 0 0 +318 0 0 +319 0 0 +320 0 0 +321 0 0 +322 0 0 +323 0 0 +324 0 0 +325 0 0 +326 0 0 +327 0 0 +328 0 0 +329 0 0 +330 0 0 +331 0 0 +332 0 0 +333 0 0 +334 0 0 +335 0 0 +336 0 0 +337 0 0 +338 0 0 +339 1 0 +340 0 0 +341 0 0 +342 0 0 +343 0 0 +344 0 0 +345 0 0 +346 0 0 +347 0 0 +348 0 0 +349 0 0 +350 0 0 +351 0 0 +352 0 0 +353 0 0 +354 0 0 +355 0 0 +356 0 0 +357 0 0 +358 0 0 +359 0 0 +360 0 0 +361 0 0 +362 0 0 +363 0 0 +364 0 0 +365 0 0 +366 0 0 +367 0 0 +368 0 0 +369 0 0 +370 0 0 +371 0 0 +372 1 0 +373 0 0 +374 0 0 +375 0 0 +376 0 0 +377 0 0 +378 0 0 +379 0 0 +380 0 0 +381 0 0 +382 0 1 +383 0 0 +384 0 0 +385 0 0 +386 0 0 +387 0 0 +388 0 0 +389 0 0 +390 0 0 +391 0 0 +392 0 0 +393 0 0 +394 0 0 +395 0 0 +396 0 0 +397 0 0 +398 0 0 +399 0 0 +400 0 0 +401 0 0 +402 0 0 +403 0 0 +404 0 0 +405 1 0 +406 0 0 +407 0 0 +408 0 0 +409 0 0 +410 0 0 +411 0 0 +412 0 0 +413 0 0 +414 0 0 +415 0 0 +416 0 0 +417 0 0 +418 0 0 +419 0 0 +420 0 0 +421 0 0 +422 0 0 +423 0 0 +424 0 0 +425 0 0 +426 0 0 +427 0 0 +428 0 0 +429 0 0 +430 0 0 +431 0 0 +432 0 0 +433 0 0 +434 0 0 +435 0 0 +436 0 0 +437 0 0 +438 0 0 +439 0 0 +440 0 0 +441 0 0 +442 0 0 +443 0 0 +444 0 0 +445 0 0 +446 0 0 +447 0 0 +448 0 0 +449 0 0 +450 0 0 +451 0 0 +452 0 0 +453 0 0 +454 0 0 +455 0 0 +456 0 0 +457 0 0 +458 0 0 +459 0 0 +460 0 0 +461 0 0 +462 0 0 +463 0 0 +464 0 0 +465 0 0 +466 0 0 +467 0 0 +468 0 0 +469 0 0 +470 0 0 +471 0 0 +472 0 0 +473 0 0 +474 0 0 +475 0 0 +476 0 0 +477 0 0 +478 0 0 +479 0 0 +480 0 0 +481 0 0 +482 0 0 +483 0 0 +484 0 0 +485 0 0 +486 0 0 +487 0 0 +488 0 0 +489 0 0 +490 0 0 +491 0 0 +492 0 0 +493 0 0 +494 0 0 +495 0 0 +496 0 0 +497 0 0 +498 0 0 +499 0 0 +500 0 0 +501 0 0 +502 0 0 +503 0 0 +504 0 0 +505 0 0 +506 0 0 +507 0 0 +508 0 0 +509 0 0 +510 0 0 +511 0 0 +512 0 0 +513 0 0 +514 1 0 +515 0 0 +516 0 0 +517 0 0 +518 0 0 +519 0 0 +520 0 0 +521 0 0 +522 0 0 +523 0 0 +524 0 0 +525 0 0 +526 0 0 +527 0 0 +528 0 0 +529 0 0 +530 0 0 +531 0 0 +532 0 0 +533 0 0 +534 0 0 +535 0 0 +536 0 0 +537 0 0 +538 0 0 +539 0 0 +540 1 0 +541 0 0 +542 0 0 +543 0 0 +544 0 0 +545 0 0 +546 1 0 +547 0 0 +548 0 0 +549 0 0 +550 0 0 +551 0 0 +552 0 0 +553 0 0 +554 0 0 +555 0 0 +556 0 0 +557 0 0 +558 0 0 +559 0 0 +560 0 0 +561 0 0 +562 0 0 +563 0 0 +564 0 0 +565 0 0 +566 0 0 +567 0 0 +568 0 0 +569 0 0 +570 0 0 +571 0 0 +572 0 0 +573 0 0 +574 0 0 +575 0 0 +576 0 0 +577 0 0 +578 0 0 +579 0 0 +580 0 0 +581 0 0 +582 0 0 +583 0 0 +584 0 0 +585 0 0 +586 0 0 +587 0 0 +588 0 0 +589 0 0 +590 0 0 +591 0 0 +592 0 0 +593 0 0 +594 0 0 +595 0 0 +596 0 0 +597 0 0 +598 0 0 +599 0 0 +600 0 0 +601 0 0 +602 0 0 +603 0 0 +604 0 0 +605 0 0 +606 0 0 +607 0 0 +608 0 0 +609 0 0 +610 0 0 +611 0 0 +612 0 0 +613 0 0 +614 0 0 +615 0 0 +616 0 0 +617 0 0 +618 0 0 +619 0 0 +620 0 0 +621 1 0 +622 0 0 +623 0 0 +624 0 0 +625 0 0 +626 0 0 +627 0 0 +628 0 0 +629 0 0 +630 0 0 +631 0 0 +632 0 0 +633 0 0 +634 0 0 +635 0 0 +636 0 0 +637 0 0 +638 0 0 +639 0 0 +640 0 0 +641 0 0 +642 0 0 +643 0 0 +644 0 0 +645 0 0 +646 0 0 +647 0 0 +648 0 0 +649 0 0 +650 0 0 +651 0 0 +652 0 0 +653 0 0 +654 0 0 +655 0 0 +656 0 0 +657 0 0 +658 0 0 +659 0 0 +660 0 0 +661 0 0 +662 0 0 +663 0 0 +664 0 0 +665 0 0 +666 0 0 +667 0 0 +668 0 0 +669 0 0 +670 0 0 +671 0 0 +672 0 0 +673 0 0 +674 0 0 +675 0 0 +676 0 0 +677 0 0 +678 0 0 +679 0 0 +680 0 0 +681 0 0 +682 0 0 +683 0 0 +684 0 0 +685 0 0 +686 0 0 +687 0 0 +688 0 0 +689 0 0 +690 0 0 +691 0 0 +692 0 0 +693 0 0 +694 0 0 +695 0 0 +696 0 0 +697 0 0 +698 0 0 +699 0 0 +700 0 0 +701 0 0 +702 0 0 +703 0 0 +704 0 0 +705 0 0 +706 0 0 +707 0 0 +708 0 0 +709 0 0 +710 0 0 +711 0 0 +712 0 0 +713 0 0 +714 0 0 +715 0 0 +716 0 0 +717 0 0 +718 0 0 +719 0 0 +720 0 0 +721 0 0 +722 0 0 +723 0 0 +724 0 0 +725 0 0 +726 0 0 +727 0 0 +728 0 0 +729 0 0 +730 0 0 +731 0 0 +732 0 0 +733 0 0 +734 0 0 +735 0 0 +736 0 0 +737 0 0 +738 0 0 +739 0 0 +740 0 0 +741 0 0 +742 0 0 +743 0 0 +744 0 0 +745 0 0 +746 0 0 +747 0 0 +748 0 0 +749 0 0 +750 0 0 +751 0 0 +752 0 0 +753 0 0 +754 0 0 +755 0 0 +756 0 0 +757 0 0 +758 0 0 +759 0 0 +760 0 0 +761 0 0 +762 0 0 +763 0 0 +764 0 0 +765 0 0 +766 0 0 +767 0 0 +768 0 0 +769 0 0 +770 0 0 +771 0 0 +772 0 0 +773 0 0 +774 0 0 +775 0 0 +776 0 0 +777 0 0 +778 0 0 +779 0 0 +780 0 0 +781 0 0 +782 0 0 +783 0 0 +784 0 0 +785 0 0 +786 0 0 +787 0 0 +788 0 0 +789 0 0 +790 0 0 +791 0 0 +792 0 0 +793 0 0 +794 0 0 +795 0 0 +796 0 0 +797 0 0 +798 0 0 +799 0 0 +800 0 0 +801 0 0 +802 0 0 +803 0 0 +804 0 0 +805 0 0 +806 0 0 +807 0 0 +808 0 0 +809 0 0 +810 0 0 +811 0 0 +812 0 0 +813 0 0 +814 0 0 +815 0 0 +816 0 0 +817 0 0 +818 0 0 +819 0 0 +820 0 0 +821 0 0 +822 0 0 +823 0 0 +824 0 0 +825 0 0 +826 0 0 +827 0 0 +828 0 0 +829 0 0 +830 0 0 +831 0 0 +832 0 0 +833 0 0 +834 0 0 +835 0 0 +836 0 0 +837 0 0 +838 0 0 +839 0 0 +840 0 0 +841 0 0 +842 0 0 +843 0 0 +844 0 0 +845 0 0 +846 0 0 +847 0 0 +848 0 0 +849 0 0 +850 0 0 +851 0 0 +852 0 0 +853 0 0 +854 0 0 +855 0 0 +856 0 0 +857 0 0 +858 0 0 +859 0 0 +860 0 0 +861 0 0 +862 0 0 +863 0 0 +864 0 0 +865 0 0 +866 0 0 +867 0 0 +868 0 0 +869 0 0 +870 0 0 +871 0 0 +872 0 0 +873 0 0 +874 0 0 +875 0 0 +876 0 0 +877 0 0 +878 0 0 +879 0 0 +880 0 0 +881 0 0 +882 0 0 +883 1 0 +884 0 0 +885 0 0 +886 0 0 +887 0 0 +888 0 0 +889 0 0 +890 0 0 +891 0 0 +892 0 0 +893 0 0 +894 0 0 +895 0 0 +896 0 0 +897 0 0 +898 0 0 +899 0 0 +900 0 0 +901 0 0 +902 0 0 +903 0 0 +904 0 0 +905 0 0 +906 0 0 +907 0 0 +908 0 0 +909 0 0 +910 0 0 +911 0 0 +912 0 0 +913 0 0 +914 0 0 +915 0 0 +916 0 0 +917 0 0 +918 0 0 +919 0 0 +920 0 0 +921 0 0 +922 0 0 +923 0 0 +924 0 0 +925 0 0 +926 0 0 +927 0 0 +928 0 0 +929 0 0 +930 0 0 +931 0 0 +932 0 0 +933 0 0 +934 0 0 +935 0 0 +936 0 0 +937 0 0 +938 0 0 +939 0 0 +940 0 0 +941 0 0 +942 0 0 +943 0 0 +944 0 0 +945 0 0 +946 0 0 +947 0 0 +948 0 0 +949 0 0 +950 0 0 +951 0 0 +952 0 0 +953 0 0 +954 0 0 +955 0 0 +956 0 0 +957 0 0 +958 0 0 +959 0 0 +960 0 0 +961 0 0 +962 0 0 +963 0 0 +964 0 0 +965 0 0 +966 0 0 +967 0 0 +968 0 0 +969 0 0 +970 0 0 +971 0 0 +972 0 0 +973 0 0 +974 0 0 +975 0 0 +976 0 0 +977 0 0 +978 0 0 +979 0 0 +980 0 0 +981 0 0 +982 0 0 +983 0 0 +984 0 0 +985 0 0 +986 0 0 +987 0 0 +988 1 0 +989 0 0 +990 0 0 +991 0 0 +992 0 0 +993 0 0 +994 0 0 +995 0 0 +996 0 0 +997 0 0 +998 0 0 +999 0 0 +1000 0 0 +1001 0 0 diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.py b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.py new file mode 100644 index 00000000..405a5f98 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python + +"""Porter Stemming Algorithm +This is the Porter stemming algorithm, ported to Python from the +version coded up in ANSI C by the author. It may be be regarded +as canonical, in that it follows the algorithm presented in + +Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, +no. 3, pp 130-137, + +only differing from it at the points maked --DEPARTURE-- below. + +See also http://www.tartarus.org/~martin/PorterStemmer + +The algorithm as described in the paper could be exactly replicated +by adjusting the points of DEPARTURE, but this is barely necessary, +because (a) the points of DEPARTURE are definitely improvements, and +(b) no encoding of the Porter stemmer I have seen is anything like +as exact as this version, even with the points of DEPARTURE! + +Vivake Gupta (v@nano.com) + +Release 1: January 2001 + +Further adjustments by Santiago Bruno (bananabruno@gmail.com) +to allow word input not restricted to one word per line, leading +to: + +release 2: July 2008 +""" + +import sys + +class PorterStemmer: + + def __init__(self): + """The main part of the stemming algorithm starts here. + b is a buffer holding a word to be stemmed. The letters are in b[k0], + b[k0+1] ... ending at b[k]. In fact k0 = 0 in this demo program. k is + readjusted downwards as the stemming progresses. Zero termination is + not in fact used in the algorithm. + + Note that only lower case sequences are stemmed. Forcing to lower case + should be done before stem(...) is called. + """ + + self.b = "" # buffer for word to be stemmed + self.k = 0 + self.k0 = 0 + self.j = 0 # j is a general offset into the string + + def cons(self, i): + """cons(i) is TRUE <=> b[i] is a consonant.""" + if self.b[i] == 'a' or self.b[i] == 'e' or self.b[i] == 'i' or self.b[i] == 'o' or self.b[i] == 'u': + return 0 + if self.b[i] == 'y': + if i == self.k0: + return 1 + else: + return (not self.cons(i - 1)) + return 1 + + def m(self): + """m() measures the number of consonant sequences between k0 and j. + if c is a consonant sequence and v a vowel sequence, and <..> + indicates arbitrary presence, + + gives 0 + vc gives 1 + vcvc gives 2 + vcvcvc gives 3 + .... + """ + n = 0 + i = self.k0 + while 1: + if i > self.j: + return n + if not self.cons(i): + break + i = i + 1 + i = i + 1 + while 1: + while 1: + if i > self.j: + return n + if self.cons(i): + break + i = i + 1 + i = i + 1 + n = n + 1 + while 1: + if i > self.j: + return n + if not self.cons(i): + break + i = i + 1 + i = i + 1 + + def vowelinstem(self): + """vowelinstem() is TRUE <=> k0,...j contains a vowel""" + for i in range(self.k0, self.j + 1): + if not self.cons(i): + return 1 + return 0 + + def doublec(self, j): + """doublec(j) is TRUE <=> j,(j-1) contain a double consonant.""" + if j < (self.k0 + 1): + return 0 + if (self.b[j] != self.b[j-1]): + return 0 + return self.cons(j) + + def cvc(self, i): + """cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant + and also if the second c is not w,x or y. this is used when trying to + restore an e at the end of a short e.g. + + cav(e), lov(e), hop(e), crim(e), but + snow, box, tray. + """ + if i < (self.k0 + 2) or not self.cons(i) or self.cons(i-1) or not self.cons(i-2): + return 0 + ch = self.b[i] + if ch == 'w' or ch == 'x' or ch == 'y': + return 0 + return 1 + + def ends(self, s): + """ends(s) is TRUE <=> k0,...k ends with the string s.""" + length = len(s) + if s[length - 1] != self.b[self.k]: # tiny speed-up + return 0 + if length > (self.k - self.k0 + 1): + return 0 + if self.b[self.k-length+1:self.k+1] != s: + return 0 + self.j = self.k - length + return 1 + + def setto(self, s): + """setto(s) sets (j+1),...k to the characters in the string s, readjusting k.""" + length = len(s) + self.b = self.b[:self.j+1] + s + self.b[self.j+length+1:] + self.k = self.j + length + + def r(self, s): + """r(s) is used further down.""" + if self.m() > 0: + self.setto(s) + + def step1ab(self): + """step1ab() gets rid of plurals and -ed or -ing. e.g. + + caresses -> caress + ponies -> poni + ties -> ti + caress -> caress + cats -> cat + + feed -> feed + agreed -> agree + disabled -> disable + + matting -> mat + mating -> mate + meeting -> meet + milling -> mill + messing -> mess + + meetings -> meet + """ + if self.b[self.k] == 's': + if self.ends("sses"): + self.k = self.k - 2 + elif self.ends("ies"): + self.setto("i") + elif self.b[self.k - 1] != 's': + self.k = self.k - 1 + if self.ends("eed"): + if self.m() > 0: + self.k = self.k - 1 + elif (self.ends("ed") or self.ends("ing")) and self.vowelinstem(): + self.k = self.j + if self.ends("at"): self.setto("ate") + elif self.ends("bl"): self.setto("ble") + elif self.ends("iz"): self.setto("ize") + elif self.doublec(self.k): + self.k = self.k - 1 + ch = self.b[self.k] + if ch == 'l' or ch == 's' or ch == 'z': + self.k = self.k + 1 + elif (self.m() == 1 and self.cvc(self.k)): + self.setto("e") + + def step1c(self): + """step1c() turns terminal y to i when there is another vowel in the stem.""" + if (self.ends("y") and self.vowelinstem()): + self.b = self.b[:self.k] + 'i' + self.b[self.k+1:] + + def step2(self): + """step2() maps double suffices to single ones. + so -ization ( = -ize plus -ation) maps to -ize etc. note that the + string before the suffix must give m() > 0. + """ + if self.b[self.k - 1] == 'a': + if self.ends("ational"): self.r("ate") + elif self.ends("tional"): self.r("tion") + elif self.b[self.k - 1] == 'c': + if self.ends("enci"): self.r("ence") + elif self.ends("anci"): self.r("ance") + elif self.b[self.k - 1] == 'e': + if self.ends("izer"): self.r("ize") + elif self.b[self.k - 1] == 'l': + if self.ends("bli"): self.r("ble") # --DEPARTURE-- + # To match the published algorithm, replace this phrase with + # if self.ends("abli"): self.r("able") + elif self.ends("alli"): self.r("al") + elif self.ends("entli"): self.r("ent") + elif self.ends("eli"): self.r("e") + elif self.ends("ousli"): self.r("ous") + elif self.b[self.k - 1] == 'o': + if self.ends("ization"): self.r("ize") + elif self.ends("ation"): self.r("ate") + elif self.ends("ator"): self.r("ate") + elif self.b[self.k - 1] == 's': + if self.ends("alism"): self.r("al") + elif self.ends("iveness"): self.r("ive") + elif self.ends("fulness"): self.r("ful") + elif self.ends("ousness"): self.r("ous") + elif self.b[self.k - 1] == 't': + if self.ends("aliti"): self.r("al") + elif self.ends("iviti"): self.r("ive") + elif self.ends("biliti"): self.r("ble") + elif self.b[self.k - 1] == 'g': # --DEPARTURE-- + if self.ends("logi"): self.r("log") + # To match the published algorithm, delete this phrase + + def step3(self): + """step3() dels with -ic-, -full, -ness etc. similar strategy to step2.""" + if self.b[self.k] == 'e': + if self.ends("icate"): self.r("ic") + elif self.ends("ative"): self.r("") + elif self.ends("alize"): self.r("al") + elif self.b[self.k] == 'i': + if self.ends("iciti"): self.r("ic") + elif self.b[self.k] == 'l': + if self.ends("ical"): self.r("ic") + elif self.ends("ful"): self.r("") + elif self.b[self.k] == 's': + if self.ends("ness"): self.r("") + + def step4(self): + """step4() takes off -ant, -ence etc., in context vcvc.""" + if self.b[self.k - 1] == 'a': + if self.ends("al"): pass + else: return + elif self.b[self.k - 1] == 'c': + if self.ends("ance"): pass + elif self.ends("ence"): pass + else: return + elif self.b[self.k - 1] == 'e': + if self.ends("er"): pass + else: return + elif self.b[self.k - 1] == 'i': + if self.ends("ic"): pass + else: return + elif self.b[self.k - 1] == 'l': + if self.ends("able"): pass + elif self.ends("ible"): pass + else: return + elif self.b[self.k - 1] == 'n': + if self.ends("ant"): pass + elif self.ends("ement"): pass + elif self.ends("ment"): pass + elif self.ends("ent"): pass + else: return + elif self.b[self.k - 1] == 'o': + if self.ends("ion") and (self.b[self.j] == 's' or self.b[self.j] == 't'): pass + elif self.ends("ou"): pass + # takes care of -ous + else: return + elif self.b[self.k - 1] == 's': + if self.ends("ism"): pass + else: return + elif self.b[self.k - 1] == 't': + if self.ends("ate"): pass + elif self.ends("iti"): pass + else: return + elif self.b[self.k - 1] == 'u': + if self.ends("ous"): pass + else: return + elif self.b[self.k - 1] == 'v': + if self.ends("ive"): pass + else: return + elif self.b[self.k - 1] == 'z': + if self.ends("ize"): pass + else: return + else: + return + if self.m() > 1: + self.k = self.j + + def step5(self): + """step5() removes a final -e if m() > 1, and changes -ll to -l if + m() > 1. + """ + self.j = self.k + if self.b[self.k] == 'e': + a = self.m() + if a > 1 or (a == 1 and not self.cvc(self.k-1)): + self.k = self.k - 1 + if self.b[self.k] == 'l' and self.doublec(self.k) and self.m() > 1: + self.k = self.k -1 + + def stem(self, p, i, j): + """In stem(p,i,j), p is a char pointer, and the string to be stemmed + is from p[i] to p[j] inclusive. Typically i is zero and j is the + offset to the last character of a string, (p[j+1] == '\0'). The + stemmer adjusts the characters p[i] ... p[j] and returns the new + end-point of the string, k. Stemming never increases word length, so + i <= k <= j. To turn the stemmer into a module, declare 'stem' as + extern, and delete the remainder of this file. + """ + # copy the parameters into statics + self.b = p + self.k = j + self.k0 = i + if self.k <= self.k0 + 1: + return self.b # --DEPARTURE-- + + # With this line, strings of length 1 or 2 don't go through the + # stemming process, although no mention is made of this in the + # published algorithm. Remove the line to match the published + # algorithm. + + self.step1ab() + self.step1c() + self.step2() + self.step3() + self.step4() + self.step5() + return self.b[self.k0:self.k+1] \ No newline at end of file diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.pyc b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.pyc new file mode 100644 index 00000000..0747ccf9 Binary files /dev/null and b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/PorterStemmer.pyc differ diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/extractor.py b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/extractor.py index 5579e630..4f9d79c5 100644 --- a/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/extractor.py +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/extractor.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- import os import re +import operator +import math +from PorterStemmer import PorterStemmer trainings_data = os.path.join(os.path.dirname(__file__),"../data/data_training/") normal_data = os.path.join(os.path.dirname(__file__),"../data/data/") @@ -21,6 +24,7 @@ def getFile(file): return content_file.read() def docToWordList(file,outputfile): + print('wordlist: '+file) tmp = re.sub("[\n\r]", "", getFile(file)) # remove all scripts tmp = re.sub("<\s*script.*?>.+?<\s*\/script.*?>", "", tmp) # remove all scripts tmp = re.sub("<\s*style.*?>.+?<\s*\/style.*?>", "", tmp) # remove all styles @@ -33,7 +37,10 @@ def docToWordList(file,outputfile): words = [x+'\n' for x in words] # add newline after every word nf = open(outputfile,'w+') + nf.seek(0) nf.writelines(words); + nf.truncate() + nf.close() def docsToWordList(infolder,outfolder): for file in getFileList(infolder+'c/'): @@ -43,23 +50,288 @@ def docsToWordList(infolder,outfolder): def isStopWord(word,stopwords): for line in open(stopwords,'r').readlines(): - if line == word: + if line.strip() == word: return True return False def filterStopW(file,outputfile,stopwords): + print('stopwords: '+file) filtered = [] for line in open(file,'r').readlines(): - if not isStopWord(line,stopwords): - filtered.append(line) + if not isStopWord(line.strip(),stopwords): + filtered.append(line.strip()) nf = open(outputfile,'w+') - nf.writelines(filtered) + nf.seek(0) + for fil in filtered: + nf.write("%s\n" % (fil)) + nf.truncate() + nf.close() def filterStopwords(infolder,outfolder,stopwords): for file in getFileList(infolder+'c/'): filterStopW(infolder+'c/'+file,outfolder+'c/'+file,stopwords) for file in getFileList(infolder+'nc/'): filterStopW(infolder+'nc/'+file,outfolder+'nc/'+file,stopwords) + +def stemm(file,outputfile): + print('stemming: '+file) + stemmed = [] + p = PorterStemmer() + for line in open(file,'r').readlines(): + stemmed += [p.stem(line.strip(), 0,len(line.strip())-1)] + nf = open(outputfile,'w+') + nf.seek(0) + for stem in stemmed: + nf.write("%s\n" % (stem)) + nf.truncate() + nf.close() + +def stemming(infolder,outfolder): + for file in getFileList(infolder+'c/'): + stemm(infolder+'c/'+file,outfolder+'c/'+file) + for file in getFileList(infolder+'nc/'): + stemm(infolder+'nc/'+file,outfolder+'nc/'+file) + +def tf_raw(file,outputfile): + print('tf_raw: '+file) + tfed = [] + for line in open(file,'r').readlines(): + b = True + for tf in tfed: + if tf[0] == line.strip(): + tf[1] += 1 + b = False + break + if b: + tfed.append([line.strip(),1]) + + tfed.sort(key=operator.itemgetter(1),reverse=True) + + nf = open(outputfile,'w+') + nf.seek(0) + for tf in tfed: + nf.write("%s %d\n" % (tf[0], tf[1])) + nf.truncate() + nf.close() + +def tf_bool(file,outputfile): + print('tf_bool: '+file) + tfed = [] + for line in open(file,'r').readlines(): + b = True + for tf in tfed: + if tf[0] == line.strip(): + b = False + break + if b: + tfed.append([line.strip(),1]) + + + nf = open(outputfile,'w+') + nf.seek(0) + for tf in tfed: + nf.write("%s %d\n" % (tf[0], tf[1])) + nf.truncate() + nf.close() + +def tf_log(file,outputfile): + print('tf_log: '+file) + tfed = [] + for line in open(file,'r').readlines(): + b = True + for tf in tfed: + if tf[0] == line.strip(): + tf[1] += 1 + b = False + break + if b: + tfed.append([line.strip(),1]) + + tfed.sort(key=operator.itemgetter(1),reverse=True) + + nf = open(outputfile,'w+') + nf.seek(0) + for tf in tfed: + nf.write("%s %d\n" % (tf[0], math.log(tf[1]+1))) + nf.truncate() + nf.close() + +def tf_aug(file,outputfile): + print('tf_aug: '+file) + tfed = [] + for line in open(file,'r').readlines(): + b = True + for tf in tfed: + if tf[0] == line.strip(): + tf[1] += 1 + b = False + break + if b: + tfed.append([line.strip(),1]) + + tfed.sort(key=operator.itemgetter(1),reverse=True) + + nf = open(outputfile,'w+') + nf.seek(0) + for tf in tfed: + maxwordcount = tfed[0][1] + nf.write("%s %d\n" % (tf[0], 0.5 + ((0.5*tf[1])/maxwordcount))) + nf.truncate() + nf.close() + +def tf(infolder,outfolder): + for file in getFileList(infolder+'c/'): + tf_raw(infolder+'c/'+file,outfolder+'tf_raw/'+'c/'+file) + for file in getFileList(infolder+'nc/'): + tf_raw(infolder+'nc/'+file,outfolder+'tf_raw/'+'nc/'+file) + for file in getFileList(infolder+'c/'): + tf_bool(infolder+'c/'+file,outfolder+'tf_bool/'+'c/'+file) + for file in getFileList(infolder+'nc/'): + tf_bool(infolder+'nc/'+file,outfolder+'tf_bool/'+'nc/'+file) + for file in getFileList(infolder+'c/'): + tf_log(infolder+'c/'+file,outfolder+'tf_log/'+'c/'+file) + for file in getFileList(infolder+'nc/'): + tf_log(infolder+'nc/'+file,outfolder+'tf_log/'+'nc/'+file) + for file in getFileList(infolder+'c/'): + tf_aug(infolder+'c/'+file,outfolder+'tf_aug/'+'c/'+file) + for file in getFileList(infolder+'nc/'): + tf_aug(infolder+'nc/'+file,outfolder+'tf_aug/'+'nc/'+file) + + +def idf_readterms(file): + print('idf: '+file) + terms = [] + for line in open(file,'r').readlines(): + terms.append(line.split()[0]) + + return terms + +def idf(infolder,outfile): + allterms = [] + all_cterms = [] + all_ncterms = [] + documentcount = 0 + for file in getFileList(infolder+'c/'): + all_cterms += idf_readterms(infolder+'c/'+file) + allterms += all_cterms + documentcount += 1 + for file in getFileList(infolder+'nc/'): + all_ncterms += idf_readterms(infolder+'nc/'+file) + allterms += all_ncterms + documentcount += 1 + + print("idf: start wordoccurence in document") + idf_terms = [] + for term in allterms: + b = True + for term2 in idf_terms: + if term2[0] == term: + term2[1] += 1 + b = False + break + if b: + idf_terms.append([term,1,0,0,0,0]) + + print("idf: start wordoccurence in all_cterms") + cterms = [] + for term in all_cterms: + b = True + for term2 in cterms: + if term2[0] == term: + term2[1] += 1 + b = False + break + if b: + cterms.append([term,1]) + + print("idf: start wordoccurence in all_ncterms") + ncterms = [] + for term in all_ncterms: + b = True + for term2 in ncterms: + if term2[0] == term: + term2[1] += 1 + b = False + break + if b: + ncterms.append([term,1]) + + print("idf: start calc idf & words in c & words in nc") + for term in idf_terms: + term[2] = math.log( (documentcount/term[1])) + for c in cterms: + if c[0] == term[0]: + term[3] = c[1] + for nc in ncterms: + if nc[0] == term[0]: + term[4] = nc[1] + + print("idf: start sort") + idf_terms.sort(key=operator.itemgetter(2)) + + print("idf: start wordid") + wordid = 1 + for term in idf_terms: + term[5] = wordid + wordid += 1 + + print("idf: start writefile") + nf = open(outfile,'w+') + nf.seek(0) + nf.write('term, in documents count, idf, in c count, in nc count, wordid; documentcount = '+str(documentcount)+'\n') + for term in idf_terms: + nf.write("%s %d %f %d %d %d\n" % (term[0], term[1], term[2], term[3],term[4],term[5])) + nf.truncate() + nf.close() + +def tfidffile(file,idffile,outputfile): + print('tfidf: '+file) + idf = [] + idff = open(idffile,'r') + idff.readline() + for line in idff.readlines(): + idf.append(line.split()) + idff.close() + + terms = [] + for line in open(file,'r').readlines(): + terms.append(line.split()) + + tfidf = [] + for t in terms: + for f in idf: + if t[0] == f[0]: + tfidf += [[t[0],t[1],f[1],f[2],int(t[1]) * float(f[2]),f[5]]] + break; + + tfidf.sort(key=operator.itemgetter(3)) + + nf = open(outputfile,'w+') + nf.seek(0) + nf.write('term, tf, in documents count, idf, tfidf, wordid'+'\n') + for term in tfidf: + nf.write("%s %s %s %s %f %s\n" % (term[0], term[1], term[2], term[3], term[4], term[5])) + nf.truncate() + nf.close() + + +def tfidf(infolder, idffile, outfolder): + for file in getFileList(infolder+'tf_raw/'+'c/'): + tfidffile(infolder+'tf_raw/'+'c/'+file,idffile,outfolder+'tfidf_raw/'+'c/'+file) + for file in getFileList(infolder+'tf_raw/'+'nc/'): + tfidffile(infolder+'tf_raw/'+'nc/'+file,idffile,outfolder+'tfidf_raw/'+'nc/'+file) + for file in getFileList(infolder+'tf_bool/'+'c/'): + tfidffile(infolder+'tf_bool/'+'c/'+file,idffile,outfolder+'tfidf_bool/'+'c/'+file) + for file in getFileList(infolder+'tf_bool/'+'nc/'): + tfidffile(infolder+'tf_bool/'+'nc/'+file,idffile,outfolder+'tfidf_bool/'+'nc/'+file) + for file in getFileList(infolder+'tf_log/'+'c/'): + tfidffile(infolder+'tf_log/'+'c/'+file,idffile,outfolder+'tfidf_log/'+'c/'+file) + for file in getFileList(infolder+'tf_log/'+'nc/'): + tfidffile(infolder+'tf_log/'+'nc/'+file,idffile,outfolder+'tfidf_log/'+'nc/'+file) + for file in getFileList(infolder+'tf_aug/'+'c/'): + tfidffile(infolder+'tf_aug/'+'c/'+file,idffile,outfolder+'tfidf_aug/'+'c/'+file) + for file in getFileList(infolder+'tf_aug/'+'nc/'): + tfidffile(infolder+'tf_aug/'+'nc/'+file,idffile,outfolder+'tfidf_aug/'+'nc/'+file) #MAIN if __name__ == "__main__": @@ -69,19 +341,14 @@ if __name__ == "__main__": #Stoppwort-Filterung und Stemming filterStopwords(output_training+'wl/',output_training+'sw/',stopwords) - filterStopwords(output_data+'wl/',output_training+'sw/',stopwords) + filterStopwords(output_data+'wl/',output_data+'sw/',stopwords) - stemming(output_training) - stemming(output_data) + stemming(output_training+'sw/',output_training+'ps/') + stemming(output_data+'sw/',output_data+'ps/') #Wort-Liste zu TF-IDF-Vektor - tfidf(output_training) - tfidf(output_data) + tf(output_training+'ps/',output_training+'tf/') + idf(output_training+'tf/tf_raw/',output_training+'idf/idf.txt') + tfidf(output_training+'tf/',output_training+'idf/idf.txt',output_training+'tfidf/') - #Einfache Feature-Selection - filternwords(output_training) - filternwords(output_data) - - #Sparse-Repräsentation - sparse(output_training) - sparse(output_data) \ No newline at end of file + print('done') \ No newline at end of file diff --git a/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/selector.py b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/selector.py new file mode 100644 index 00000000..52f418f4 --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/3_Uebung/extractor/selector.py @@ -0,0 +1,72 @@ + +import os + +n_words=1000 +n = 15 +tf_algo = 'raw' #raw, bool, log, aug http://en.wikipedia.org/wiki/Tf%E2%80%93idf + +input_training = os.path.join(os.path.dirname(__file__),"../data/data_sanitized/train/") +input_data = os.path.join(os.path.dirname(__file__),"../data/data_sanitized/data/") + +output = os.path.join(os.path.dirname(__file__),"../data/selector/") + +def getFileList(dir): + dirList = os.listdir(dir) + #dirList.sort() + return dirList; + +def wordlist(idffile,outputfile): + idf = [] + idff = open(idffile,'r') + idff.readline() + for line in idff.readlines(): + idf.append(line.split()) + idff.close() + + nf = open(outputfile,'w+') + nf.seek(0) + ncount = 0 + for w in idf: + nf.write("%s %s %s\n" % (w[5], w[3], w[4])) + ncount += 1 + if ncount >= n_words: + break + nf.truncate() + nf.close() + +def tfidffile(file): + print('tfidf: '+file) + terms = [] + f = open(file,'r') + f.readline() + for line in f.readlines(): + terms.append(line.split()) + + out = "" + ncount = 0 + for t in terms: + out += t[5] +':'+t[4]+' ' + ncount += 1 + if ncount > n: + break + + return out + +def tfidf(infolder, outfile): + out = "" + for file in getFileList(infolder+'c/'): + out += '+1 '+tfidffile(infolder+'c/'+file)+'\n' + for file in getFileList(infolder+'nc/'): + out += '-1 '+tfidffile(infolder+'nc/'+file)+'\n' + nf = open(outfile,'w+') + + nf.seek(0) + nf.write(out) + nf.truncate() + nf.close() + + +if __name__ == "__main__": + wordlist(input_training+'idf/idf.txt',output+'wordlist/'+str(n_words)+'.txt') + tfidf(input_training+'tfidf/tfidf_'+tf_algo+'/',output+'tfidf/tfidf_'+tf_algo+'/'+str(n)+'.txt') + print('done') \ No newline at end of file